From 5c2ae01e8dadfd8f22346afb31ceb974759051c0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Wed, 24 Sep 2025 11:00:41 +0000
Subject: [PATCH 001/334] chore: Add codeowners file
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 CODEOWNERS | 63 +-----------------------------------------------------
 1 file changed, 1 insertion(+), 62 deletions(-)

diff --git a/CODEOWNERS b/CODEOWNERS
index 6f59d98afb6..48513c28d32 100644
--- a/CODEOWNERS
+++ b/CODEOWNERS
@@ -1,64 +1,7 @@
 # Core
-[Core-ADLR] @mcore-reviewers/core-adlr
+[Core] @mcore-reviewers/dev-core
 megatron/core/ 
 
-[Core-NeMo] @mcore-reviewers/core-nemo
-megatron/core/ 
-
-^[Core-MLPerf] @mcore-reviewers/mlperf
-megatron/core/
-
-[GPT] @mcore-reviewers/gpt
-megatron/core/models/gpt/
-
-[Multimodal] @mcore-reviewers/multi-modal
-megatron/core/models/multimodal/
-
-[Hybrid-mamba] @mcore-reviewers/hybrid-mamba
-megatron/core/models/mamba/
-
-# Distributed Checkpointing
-[Distributed Checkpointing] @mcore-reviewers/dist-checkpointing
-megatron/core/dist_checkpointing/
-
-# Distributed Optimizer
-[Distributed Optimizer] @mcore-reviewers/dist-optimizer
-megatron/core/optimizer/distrib_optimizer/ 
-
-# Quantization and Inference (QAT)
-[Quantization and Inference (QAT)] @mcore-reviewers/quantization-and-inference
-megatron/core/inference/modelopt_support
-
-# Datasets
-[Datasets] @mcore-reviewers/datasets
-megatron/core/datasets/
-
-# Parallelism
-[Pipeline Parallelism] @mcore-reviewers/pipeline-parallelism
-megatron/core/pipeline_parallel/
-
-# Transformer
-[Transformer] @mcore-reviewers/core-adlr @mcore-reviewers/core-nemo
-megatron/core/transformer/
-
-[MoE-ADLR] @mcore-reviewers/moe-adlr
-megatron/core/transformer/moe/
-
-[MoE-Moe] @mcore-reviewers/moe-moe
-megatron/core/transformer/moe/
-
-# Inference
-[Inference] @mcore-reviewers/inference
-megatron/core/inference/
-
-# Parallel State
-[ParallelState] @mcore-reviewers/core-adlr @mcore-reviewers/core-nemo
-megatron/core/parallel_state.py
-
-[Post-Training] @mcore-reviewers/post-training
-megatron/core/post_training/
-megatron/post_training
-
 [CI][1] @mcore-reviewers/ci
 .gitlab/
 .github/
@@ -68,7 +11,3 @@ Dockerfile.ci.dev
 tests/
 megatron/core/transformer/transformer_block.py
 megatron/core/transformer/transformer_layer.py
-
-[RL] @mcore-reviewers/rl
-megatron/rl/
-examples/rl/
\ No newline at end of file

From 454e7b5ecfb7e19e2d06dce153e90690587cce70 Mon Sep 17 00:00:00 2001
From: Oliver Koenig <okoenig@nvidia.com>
Date: Wed, 24 Sep 2025 15:18:22 -0700
Subject: [PATCH 002/334] ADLR/megatron-lm!4065 - ci: Add main/dev branching to
 queuemanager

---
 .gitlab/stages/02.test.yml                    |  2 +-
 .../python_scripts/wait_for_resources.py      | 29 +++++++++++++++----
 2 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/.gitlab/stages/02.test.yml b/.gitlab/stages/02.test.yml
index ed050e19864..8abdf310156 100644
--- a/.gitlab/stages/02.test.yml
+++ b/.gitlab/stages/02.test.yml
@@ -36,7 +36,7 @@ wait_for_resources:
     - export RO_API_TOKEN=${PROJECT_ACCESS_TOKEN_MCORE}
     - export GITLAB_ENDPOINT
     - export NUM_CONCURRENT_JOBS
-    - python tests/test_utils/python_scripts/wait_for_resources.py --pipeline-id $CI_PIPELINE_ID
+    - python tests/test_utils/python_scripts/wait_for_resources.py --pipeline-id $CI_PIPELINE_ID --target-branch $CI_MERGE_REQUEST_TARGET_BRANCH_NAME
   rules:
     - if: $CI_MERGE_REQUEST_LABELS =~ /fast-track/
       when: never
diff --git a/tests/test_utils/python_scripts/wait_for_resources.py b/tests/test_utils/python_scripts/wait_for_resources.py
index 6b20fc55c96..c653567c0f6 100644
--- a/tests/test_utils/python_scripts/wait_for_resources.py
+++ b/tests/test_utils/python_scripts/wait_for_resources.py
@@ -2,7 +2,9 @@
 
 import logging
 import os
+import re
 import time
+from typing import Literal
 
 import click
 import gitlab
@@ -11,7 +13,7 @@
 PROJECT_ID = int(os.getenv("CI_PROJECT_ID", 19378))
 GITLAB_ENDPOINT = os.getenv("GITLAB_ENDPOINT")
 RO_API_TOKEN = os.getenv("RO_API_TOKEN")
-NUM_CONCURRENT_JOBS = int(os.getenv("NUM_CONCURRENT_JOBS", 2))
+NUM_CONCURRENT_JOBS = int(os.getenv("NUM_CONCURRENT_JOBS", 2)) // 2  # for main and dev branch
 
 logging.basicConfig()
 logger = logging.getLogger(__name__)
@@ -22,12 +24,14 @@ def get_gitlab_handle():
     return gitlab.Gitlab(f"https://{GITLAB_ENDPOINT}", private_token=os.getenv("RO_API_TOKEN"))
 
 
-def ci_is_busy(pipeline):
+def ci_is_busy(pipeline, target_branch: str):
     """List all merge request pipelines created before the given pipeline that are still pending or running."""
     mr_pipelines = (
         get_gitlab_handle()
         .projects.get(PROJECT_ID)
-        .pipelines.list(source="merge_request_event", get_all=True)
+        .pipelines.list(
+            source="merge_request_event", per_page=100, page=1, order_by="id", sort="desc"
+        )
     )
 
     pipeline_time = pipeline.attributes["created_at"]
@@ -36,22 +40,32 @@ def ci_is_busy(pipeline):
             p
             for p in mr_pipelines
             if p.attributes["created_at"] < pipeline_time
+            if (
+                get_gitlab_handle()
+                .projects.get(PROJECT_ID)
+                .mergerequests.get(
+                    int(re.search(r'merge-requests/(\d+)', p.attributes["ref"]).group(1))
+                )
+                .target_branch
+                == target_branch
+            )
             and p.attributes["status"] in ("pending", "running")
         ]
     )
-    logger.info(f"In queue: {in_queue}. Waiting for resources...")
+    logger.info(f"Position in queue: {in_queue+1}. Waiting for resources...")
     return in_queue > NUM_CONCURRENT_JOBS
 
 
 @click.command()
 @click.option("--pipeline-id", required=True, type=int, help="CI pipeline ID to check")
-def main(pipeline_id):
+@click.option("--target-branch", required=True, type=str, help="Target branch to check")
+def main(pipeline_id, target_branch):
     pipeline = get_gitlab_handle().projects.get(PROJECT_ID).pipelines.get(pipeline_id)
     logger.info(f"Job concurrency: {NUM_CONCURRENT_JOBS}")
 
     while True:
         try:
-            is_busy = ci_is_busy(pipeline)
+            is_busy = ci_is_busy(pipeline, target_branch)
             if not is_busy:
                 break
             time.sleep(60)
@@ -60,6 +74,9 @@ def main(pipeline_id):
             logger.info(f"Network error. Retrying... {e}")
             time.sleep(15)
             continue
+        except Exception as e:
+            logger.error(f"Error: {e}")
+            break
 
 
 if __name__ == "__main__":

From c0188dc2aa94e68cd3521176dbc549970ab686cb Mon Sep 17 00:00:00 2001
From: Oliver Koenig <okoenig@nvidia.com>
Date: Mon, 29 Sep 2025 09:34:04 -0700
Subject: [PATCH 003/334] ADLR/megatron-lm!4090 - cp: `!4084 - ci: Send dev
 alerts to separate channel`

---
 .gitlab/scripts/build.sh               | 1 -
 .gitlab/stages/02.test.yml             | 9 +++++++--
 .gitlab/stages/04.functional-tests.yml | 8 ++++++--
 3 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/.gitlab/scripts/build.sh b/.gitlab/scripts/build.sh
index edb774e72bd..960af104628 100644
--- a/.gitlab/scripts/build.sh
+++ b/.gitlab/scripts/build.sh
@@ -44,7 +44,6 @@ JET_API_VERSION=$(curl -s -u "$ARTIFACTORY_USER:$ARTIFACTORY_TOKEN" "https://sc-
 DOCKER_BUILDKIT=1 docker build \
     --secret id=JET_INDEX_URLS \
     --secret id=LOGGER_INDEX_URL \
-    --secret id=EXPERIMENTAL_FLASH_ATTN \
     --target $STAGE \
     -f docker/$FILE \
     -t ${IMAGE}:${CI_PIPELINE_ID} \
diff --git a/.gitlab/stages/02.test.yml b/.gitlab/stages/02.test.yml
index 8abdf310156..72f1491b07c 100644
--- a/.gitlab/stages/02.test.yml
+++ b/.gitlab/stages/02.test.yml
@@ -216,7 +216,12 @@ test:unit_tests_notify:
     - team/megatron
   script:
     - env
-    - export WEBHOOK_URL=${MCORE_NOTIFICATION_HOOK}
+    - |
+      if [[ "$CI_COMMIT_BRANCH" == "dev" ]]; then
+        export WEBHOOK_URL=${MCORE_NOTIFICATION_HOOK_DEV}
+      else
+        export WEBHOOK_URL=${MCORE_NOTIFICATION_HOOK}
+      fi
     - export RO_API_TOKEN=${PROJECT_ACCESS_TOKEN_MCORE}
     - export GITLAB_ENDPOINT
     - export TAG_TEAM=$([[ "$CI_COMMIT_BRANCH" == "main" ]] && echo "1" || "0")
@@ -232,7 +237,7 @@ test:unit_tests_notify:
     paths:
       - scripts
   rules:
-    - if: $CI_PIPELINE_SOURCE == "schedule" && $CI_COMMIT_BRANCH == "ci-unit-test-extended"
+    - if: $CI_PIPELINE_SOURCE == "schedule" && ($CI_COMMIT_BRANCH == "ci-unit-test-extended" || "ci-dev-unit-test-extended")
       when: always
     - when: never
 
diff --git a/.gitlab/stages/04.functional-tests.yml b/.gitlab/stages/04.functional-tests.yml
index a8575e921ee..084787e8ec3 100644
--- a/.gitlab/stages/04.functional-tests.yml
+++ b/.gitlab/stages/04.functional-tests.yml
@@ -202,12 +202,16 @@ functional:x_notify:
     - purpose/utility
     - team/megatron
   variables:
-    WEBHOOK_URL: ${MCORE_NOTIFICATION_HOOK}
     RO_API_TOKEN: ${PROJECT_ACCESS_TOKEN_MCORE}
     CONTEXT: $FUNCTIONAL_TEST_SCOPE
   script:
     - env
-    - export WEBHOOK_URL=${MCORE_NOTIFICATION_HOOK}
+    - |
+      if [[ "$CI_COMMIT_BRANCH" == "dev" ]]; then
+        export WEBHOOK_URL=${MCORE_NOTIFICATION_HOOK_DEV}
+      else
+        export WEBHOOK_URL=${MCORE_NOTIFICATION_HOOK}
+      fi
     - export RO_API_TOKEN=${PROJECT_ACCESS_TOKEN_MCORE}
     - export GITLAB_ENDPOINT
     - export CONTEXT=$FUNCTIONAL_TEST_SCOPE

From 4808e33c6052fcfd2da66f82c35b3957ddf3c2d7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Tue, 30 Sep 2025 08:48:15 +0000
Subject: [PATCH 004/334] ci(hotfix): Nightly runs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 .gitlab/stages/02.test.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.gitlab/stages/02.test.yml b/.gitlab/stages/02.test.yml
index 72f1491b07c..6eb60d03ec7 100644
--- a/.gitlab/stages/02.test.yml
+++ b/.gitlab/stages/02.test.yml
@@ -237,7 +237,7 @@ test:unit_tests_notify:
     paths:
       - scripts
   rules:
-    - if: $CI_PIPELINE_SOURCE == "schedule" && ($CI_COMMIT_BRANCH == "ci-unit-test-extended" || "ci-dev-unit-test-extended")
+    - if: $CI_PIPELINE_SOURCE == "schedule" && ($CI_COMMIT_BRANCH == "ci-unit-test-extended" || $CI_COMMIT_BRANCH ==  "ci-dev-unit-test-extended")
       when: always
     - when: never
 

From a43c0483c8f472e7954ecca5c919868400a3d951 Mon Sep 17 00:00:00 2001
From: Oliver Koenig <okoenig@nvidia.com>
Date: Fri, 3 Oct 2025 08:40:37 -0700
Subject: [PATCH 005/334] ADLR/megatron-lm!4127 - ADLR/megatron-lm!4084 - ci:
 Send dev alerts to separate channel

---
 .gitlab/stages/02.test.yml             | 6 +++++-
 .gitlab/stages/04.functional-tests.yml | 2 +-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/.gitlab/stages/02.test.yml b/.gitlab/stages/02.test.yml
index 6eb60d03ec7..49135bda6af 100644
--- a/.gitlab/stages/02.test.yml
+++ b/.gitlab/stages/02.test.yml
@@ -163,6 +163,8 @@ test:unit_tests_pyt(DEV)_mcore(legacy):
     ENVIRONMENT: dev
     TAG: legacy
   rules:
+    - if: $CI_MERGE_REQUEST_TARGET_BRANCH_NAME == 'dev'
+      when: never
     - if: $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /^core_r/
       when: never
     - if: $CI_MERGE_REQUEST_EVENT_TYPE == 'merged_result' && $CI_MERGE_REQUEST_TARGET_BRANCH_NAME != 'main'
@@ -179,6 +181,8 @@ test:unit_tests_pyt(LTS)_mcore(legacy):
     ENVIRONMENT: lts
     TAG: legacy
   rules:
+    - if: $CI_MERGE_REQUEST_TARGET_BRANCH_NAME == 'dev'
+      when: never
     - if: $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /^core_r/
       when: never
     - if: $CI_MERGE_REQUEST_EVENT_TYPE == 'merged_result' && $CI_MERGE_REQUEST_TARGET_BRANCH_NAME != 'main'
@@ -217,7 +221,7 @@ test:unit_tests_notify:
   script:
     - env
     - |
-      if [[ "$CI_COMMIT_BRANCH" == "dev" ]]; then
+      if [[ "$CI_COMMIT_BRANCH" == "*dev*" ]]; then
         export WEBHOOK_URL=${MCORE_NOTIFICATION_HOOK_DEV}
       else
         export WEBHOOK_URL=${MCORE_NOTIFICATION_HOOK}
diff --git a/.gitlab/stages/04.functional-tests.yml b/.gitlab/stages/04.functional-tests.yml
index 084787e8ec3..4b7c17668fe 100644
--- a/.gitlab/stages/04.functional-tests.yml
+++ b/.gitlab/stages/04.functional-tests.yml
@@ -207,7 +207,7 @@ functional:x_notify:
   script:
     - env
     - |
-      if [[ "$CI_COMMIT_BRANCH" == "dev" ]]; then
+      if [[ "$CI_COMMIT_BRANCH" == "*dev*" ]]; then
         export WEBHOOK_URL=${MCORE_NOTIFICATION_HOOK_DEV}
       else
         export WEBHOOK_URL=${MCORE_NOTIFICATION_HOOK}

From c862095921ad876628bc27f72505dfc6ad407e8f Mon Sep 17 00:00:00 2001
From: Oliver Koenig <okoenig@nvidia.com>
Date: Fri, 3 Oct 2025 09:16:40 -0700
Subject: [PATCH 006/334] ADLR/megatron-lm!4128 - ci: Auto-cherrypick MR into
 main

---
 .gitlab/stages/00.pre.yml | 63 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 63 insertions(+)

diff --git a/.gitlab/stages/00.pre.yml b/.gitlab/stages/00.pre.yml
index 5e209e62548..c91ffc80995 100644
--- a/.gitlab/stages/00.pre.yml
+++ b/.gitlab/stages/00.pre.yml
@@ -141,6 +141,69 @@ pre:label_merge_request:
       source labels
       curl --header "PRIVATE-TOKEN: ${PROJECT_ACCESS_TOKEN_MCORE}" --url "https://${GITLAB_ENDPOINT}/api/v4/projects/${CI_PROJECT_ID}/merge_requests/${CI_MERGE_REQUEST_IID}" --data-urlencode "add_labels=$LABELS" -X PUT
 
+pre:maybe_cherry_pick_to_main:
+  rules:
+    - if: "$CI_MERGE_REQUEST_EVENT_TYPE == 'merged_result' && $CI_MERGE_REQUEST_TARGET_BRANCH_NAME == 'dev' && $CI_MERGE_REQUEST_LABELS =~ /mirror-to-main/"
+    - when: never
+  tags:
+    - arch/amd64
+    - env/prod
+    - origin/jet-fleet
+    - owner/jet-core
+    - purpose/utility
+    - team/megatron
+  stage: .pre
+  image: nentangso/alpine-git-curl-jq
+  variables:
+    GIT_STRATEGY: "clone"
+  script:
+    - |
+      set -x
+      MR_ID=$CI_MERGE_REQUEST_IID
+      TARGET_BRANCH="cp/$MR_ID-into-main"
+      TARGET_BRANCH_EXISTS_OK=$([[ "$(git ls-remote --heads origin refs/heads/$TARGET_BRANCH)" != "" ]] && echo true || echo false)
+
+      if [[ "$TARGET_BRANCH_EXISTS_OK" == "true" ]]; then
+        echo Target branch already exists, will not cherry-pick again.
+        exit 0
+      fi
+
+      MR=$(curl --header "PRIVATE-TOKEN: ${PROJECT_ACCESS_TOKEN_MCORE}" --url "https://${GITLAB_ENDPOINT}/api/v4/projects/${CI_PROJECT_ID}/merge_requests/${MR_ID}")
+
+      LABELS=$(echo -E $MR | jq '.labels | join(",")' | tr -d '"')
+      AUTHOR_ID=$(echo -E $MR | jq '.author.id' | tr -d '"')
+      AUTHOR_NAME=$(echo -E $MR | jq '.author.username' | tr -d '"')
+      TITLE=$(echo -E $MR | jq '.title' | tr -d '"')
+      MILESTONE_ID=$(echo -E $MR | jq '.milestone.id' | tr -d '"')
+
+      git remote set-url origin "https://gitlab-ci-token:${PROJECT_ACCESS_TOKEN_MCORE}@${GITLAB_ENDPOINT}/$CI_PROJECT_NAMESPACE/megatron-lm.git"
+      git config --global user.email "mcore-bot@nvidia.com"
+      git config --global user.name "Mcore Bot"
+
+      git fetch origin dev
+      git fetch origin $CI_MERGE_REQUEST_SOURCE_BRANCH_NAME
+      git checkout $CI_MERGE_REQUEST_SOURCE_BRANCH_NAME
+      START_COMMIT=$(git merge-base origin/dev origin/$CI_MERGE_REQUEST_SOURCE_BRANCH_NAME)
+      END_COMMIT=$(git rev-parse HEAD)
+
+      git fetch origin main
+      git checkout main
+      git checkout -b $TARGET_BRANCH
+
+      git cherry-pick $START_COMMIT..$END_COMMIT
+      git push -u origin $TARGET_BRANCH
+
+      curl \
+        --header "PRIVATE-TOKEN: $PAT" \
+        --url https://${GITLAB_ENDPOINT}/api/v4/projects/${CI_PROJECT_ID}/merge_requests \
+        -d "source_branch=$TARGET_BRANCH" \
+        -d "target_branch=main" \
+        -d "title=cp MR !$MR_ID from dev: \`$TITLE\`" \
+        -d "labels=cherry-picked-from-dev" \
+        -d "reviewer_ids=$AUTHOR_ID" \
+        -d "milestone_id=$MILESTONE_ID" \
+        -d "description=[🤖]: Hi @$AUTHOR_NAME 👋,<br><br>we've cherry picked \`$TITLE (!$MR_ID)\` into \`main\` for you! 🚀<br><br>Please review and approve this cherry pick by your convenience\!"
+
 pre:maybe_cherry_pick_commit:
   rules:
     - if: '$CI_COMMIT_BRANCH == "main" && $CI_PIPELINE_SOURCE == "push"'

From f9bb58c87e5e78fa031259cfe48bffc4ad12da0e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Fri, 10 Oct 2025 09:16:49 +0000
Subject: [PATCH 007/334] ci: Re-add safe-imports
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 .gitlab/stages/02.test.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.gitlab/stages/02.test.yml b/.gitlab/stages/02.test.yml
index 49135bda6af..b271f72b3bd 100644
--- a/.gitlab/stages/02.test.yml
+++ b/.gitlab/stages/02.test.yml
@@ -410,7 +410,7 @@ test:safe_imports:
     - python -m pip install --no-cache-dir click
     - python .gitlab/scripts/check_imports.py --package-name megatron.core
   rules:
-    - if: $CI_MERGE_REQUEST_EVENT_TYPE == 'merged_result' && $CI_MERGE_REQUEST_TARGET_BRANCH_NAME != 'main'
+    - if: $CI_MERGE_REQUEST_EVENT_TYPE == 'merged_result' && $CI_MERGE_REQUEST_TARGET_BRANCH_NAME != 'main' && $CI_MERGE_REQUEST_TARGET_BRANCH_NAME != 'dev'
       when: never
     - if: $UNIT_TEST == 'yes' && $CI_MERGE_REQUEST_EVENT_TYPE == 'merged_result' && $CI_MERGE_REQUEST_TARGET_BRANCH_PROTECTED != "true"
       allow_failure: true

From 2a6ca17db30d0e0daf501a0838720c417a88894c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Fri, 10 Oct 2025 09:20:03 +0000
Subject: [PATCH 008/334] ci: No legacy for unit test extended
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 .gitlab/stages/02.test.yml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.gitlab/stages/02.test.yml b/.gitlab/stages/02.test.yml
index b271f72b3bd..e3ea9fdd68c 100644
--- a/.gitlab/stages/02.test.yml
+++ b/.gitlab/stages/02.test.yml
@@ -165,6 +165,8 @@ test:unit_tests_pyt(DEV)_mcore(legacy):
   rules:
     - if: $CI_MERGE_REQUEST_TARGET_BRANCH_NAME == 'dev'
       when: never
+    - if: $CI_COMMIT_BRANCH == 'ci-dev-unit-test-extended'
+      when: never
     - if: $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /^core_r/
       when: never
     - if: $CI_MERGE_REQUEST_EVENT_TYPE == 'merged_result' && $CI_MERGE_REQUEST_TARGET_BRANCH_NAME != 'main'
@@ -183,6 +185,8 @@ test:unit_tests_pyt(LTS)_mcore(legacy):
   rules:
     - if: $CI_MERGE_REQUEST_TARGET_BRANCH_NAME == 'dev'
       when: never
+    - if: $CI_COMMIT_BRANCH == 'ci-dev-unit-test-extended'
+      when: never
     - if: $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /^core_r/
       when: never
     - if: $CI_MERGE_REQUEST_EVENT_TYPE == 'merged_result' && $CI_MERGE_REQUEST_TARGET_BRANCH_NAME != 'main'

From 54825abc134efe545dff8669039f0f3fe74f6999 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Fri, 10 Oct 2025 09:22:58 +0000
Subject: [PATCH 009/334] ci: Reduce number of repeats
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 .gitlab-ci.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index be4b658f2d6..6b46d92aacb 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -6,7 +6,7 @@
   INTEGRATION_TEST_SCOPE: mr
   FUNCTIONAL_TEST: "yes"
   FUNCTIONAL_TEST_SCOPE: mr-slim
-  FUNCTIONAL_TEST_REPEAT: 5
+  FUNCTIONAL_TEST_REPEAT: 1
   FUNCTIONAL_TEST_TIME_LIMIT: 2700
   CLUSTER_A100: ""
   CLUSTER_H100: ""
@@ -72,7 +72,7 @@ workflow:
         INTEGRATION_TEST_SCOPE: mr
         FUNCTIONAL_TEST: "no"
         FUNCTIONAL_TEST_SCOPE: mr-slim
-        FUNCTIONAL_TEST_REPEAT: 5
+        FUNCTIONAL_TEST_REPEAT: 1
         FUNCTIONAL_TEST_TIME_LIMIT: 2700
         CLUSTER_A100: ""
         CLUSTER_H100: ""
@@ -119,7 +119,7 @@ workflow:
         INTEGRATION_TEST: "no"
         FUNCTIONAL_TEST: "yes"
         FUNCTIONAL_TEST_SCOPE: mr
-        FUNCTIONAL_TEST_REPEAT: 5
+        FUNCTIONAL_TEST_REPEAT: 1
         FUNCTIONAL_TEST_TIME_LIMIT: 2700
         CLUSTER_A100: ""
         CLUSTER_H100: ""

From 15819b664c52c5426a6110d088fab9e121de5f88 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Fri, 10 Oct 2025 14:34:30 +0000
Subject: [PATCH 010/334] ci: Fix notification channel
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 .gitlab/stages/02.test.yml             | 2 +-
 .gitlab/stages/04.functional-tests.yml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.gitlab/stages/02.test.yml b/.gitlab/stages/02.test.yml
index e3ea9fdd68c..71f49f55055 100644
--- a/.gitlab/stages/02.test.yml
+++ b/.gitlab/stages/02.test.yml
@@ -225,7 +225,7 @@ test:unit_tests_notify:
   script:
     - env
     - |
-      if [[ "$CI_COMMIT_BRANCH" == "*dev*" ]]; then
+      if [[ "$CI_COMMIT_BRANCH" == *dev* ]]; then
         export WEBHOOK_URL=${MCORE_NOTIFICATION_HOOK_DEV}
       else
         export WEBHOOK_URL=${MCORE_NOTIFICATION_HOOK}
diff --git a/.gitlab/stages/04.functional-tests.yml b/.gitlab/stages/04.functional-tests.yml
index 4b7c17668fe..7fe8aad0771 100644
--- a/.gitlab/stages/04.functional-tests.yml
+++ b/.gitlab/stages/04.functional-tests.yml
@@ -207,7 +207,7 @@ functional:x_notify:
   script:
     - env
     - |
-      if [[ "$CI_COMMIT_BRANCH" == "*dev*" ]]; then
+      if [[ "$CI_COMMIT_BRANCH" == *dev* ]]; then
         export WEBHOOK_URL=${MCORE_NOTIFICATION_HOOK_DEV}
       else
         export WEBHOOK_URL=${MCORE_NOTIFICATION_HOOK}

From 879a7a1e33cddf88523a587ffb4b9f1c7e163591 Mon Sep 17 00:00:00 2001
From: Deyu Fu <deyuf@nvidia.com>
Date: Fri, 10 Oct 2025 07:34:34 -0700
Subject: [PATCH 011/334] ADLR/megatron-lm!4106 - [DEV] Add muon and layer-wise
 distributed optimizer
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Zijie Yan <zijiey@nvidia.com>
Co-authored-by: Hao Wu <skyw@nvidia.com>
Co-authored-by: oliver könig <okoenig@nvidia.com>
Co-authored-by: Boxiang Wang <boxiangw@nvidia.com>
Co-authored-by: mikail <mkhona@nvidia.com>
---
 docker/Dockerfile.ci.dev                      |   2 +-
 .../core/optimizer/layer_wise_optimizer.py    | 158 +++++++++
 megatron/core/optimizer/muon.py               | 307 ++++++++++++++++++
 megatron/core/optimizer/optimizer_config.py   |  25 +-
 megatron/core/tensor_parallel/layers.py       |   1 +
 megatron/training/arguments.py                |  28 +-
 megatron/training/checkpointing.py            |  14 +-
 megatron/training/training.py                 |  36 +-
 pyproject.toml                                |   3 +
 tests/unit_tests/test_muon_optimizer.py       | 245 ++++++++++++++
 uv.lock                                       |  14 +
 11 files changed, 818 insertions(+), 15 deletions(-)
 create mode 100644 megatron/core/optimizer/layer_wise_optimizer.py
 create mode 100644 megatron/core/optimizer/muon.py
 create mode 100644 tests/unit_tests/test_muon_optimizer.py

diff --git a/docker/Dockerfile.ci.dev b/docker/Dockerfile.ci.dev
index 45b0cba871c..b3295697f31 100644
--- a/docker/Dockerfile.ci.dev
+++ b/docker/Dockerfile.ci.dev
@@ -32,7 +32,7 @@ COPY megatron/core/package_info.py /workspace/megatron/core/
 RUN --mount=type=cache,target=/root/.cache/uv \
     bash -ex <<"EOF"
     uv venv ${UV_PROJECT_ENVIRONMENT} --system-site-packages
-    uv sync --extra dev --extra mlm --link-mode copy --locked \
+    uv sync --extra dev --extra mlm --link-mode copy --locked --all-groups \
         --no-install-package torch \
         --no-install-package torchvision \
         --no-install-package triton \
diff --git a/megatron/core/optimizer/layer_wise_optimizer.py b/megatron/core/optimizer/layer_wise_optimizer.py
new file mode 100644
index 00000000000..b398a645ce3
--- /dev/null
+++ b/megatron/core/optimizer/layer_wise_optimizer.py
@@ -0,0 +1,158 @@
+# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+
+import torch
+
+from .optimizer import ChainedOptimizer, MegatronOptimizer, Float16OptimizerWithFloat16Params
+from .optimizer_config import OptimizerConfig
+from .clip_grads import clip_grad_by_total_norm_fp32, count_zeros_fp32, get_grad_norm_fp32
+
+from megatron.core.process_groups_config import ProcessGroupCollection
+from megatron.core.utils import get_pg_rank, get_pg_size
+
+
+class LayerWiseDistributedOptimizer(ChainedOptimizer):
+    """Layer-wise distributed optimizer for Megatron-core models.
+
+    This is a experimental distributed optimizer wrapper that distributes weight to DP ranks by full layer.
+    Implemented as ChainedOptimizer to support different weights use different optimizers (e.g. muon+adam)
+    When using, keep all megatron distributed optimizer related options OFF.
+
+    How LayerWiseDistributedOptimizer work:
+    1. weights are splited into lists and each rank only keep its shard in its optimizer
+    2. Megatron DDP handle allreduce grad for all params, note that each rank have full model and grad
+    3. optimizer is already modified so only param belong to this DP rank is updated
+    3. grad_norm and zero counting will reduce metrics globally in step function
+    4. Do regular update with chained optimizers, optimizer is already modified so partial update happens
+    5. allgather updated params to every rank(currently through broadcast loop)
+    """
+    def __init__(
+        self,
+        optimizers: List[MegatronOptimizer],
+        config: OptimizerConfig,
+        pg_collection: Optional[ProcessGroupCollection] = None,
+    ) -> None:
+        self.pg_collection = pg_collection
+        self.shard_params(optimizers)
+        # wrap optimizer after sharding to avoid unnecessary master weight creation
+        # TODO(deyuf): check if underlying optimizer.config need to fixed and if so can use that instead of passing
+        if config.bf16:
+            if isinstance(optimizers[0], Float16OptimizerWithFloat16Params):
+                raise TypeError('LayerWiseDistributedOptimizer received Float16 optimizer already.')
+            optimizers = [Float16OptimizerWithFloat16Params(optim, config, None, None) for optim in optimizers]
+        super().__init__(optimizers)
+
+        # TODO(kunlun, deyuf): potential future perf optimization
+        # since allreduce is unchanged and handled by megatron DDP, they're already in contiguous gbuf
+        # so instead of shard param by layer randomly, we can still shard by buf range but keep some "extras"
+        # to keep boundary weight not sharded. This way each rank do some duplicated work but we can call
+        # single allgather later and all current distopt optimization can be applied
+
+    def shard_params(self, optimizers):
+        """Shard all params into lists by rank. """
+        # We'll optimize sharding later if there is perf issue. should be ok since linear are grouped already
+        # Key is to create separate sharding for dp/expt parallel, saved in dp_cp_params_list, expt_dp_params_list
+        # example of 4 dp rank and 10 non-expert parameters p0-p9, then dp_cp_params_list will look like
+        # [[p0, p4, p8], [p1, p5, p9], [p2, p6], [p3, p7]]
+
+        # simplify when dp_cp group size is 1
+        if get_pg_size(self.pg_collection.dp_cp) == 1:
+            self.dp_cp_params_list = None
+            self.expt_dp_params_list = None
+            return
+
+        dp_cp_idx, expt_dp_idx = 0, 0
+        dp_cp_size = get_pg_size(self.pg_collection.dp_cp)
+        expt_dp_size = get_pg_size(self.pg_collection.expt_dp)
+        self.dp_cp_params_list = [[] for _ in range(dp_cp_size)]
+        self.expt_dp_params_list = [[] for _ in range(expt_dp_size)]
+        # get all param groups, this is called before init so cannot rely on Chained optimizer method
+        param_groups = []
+        for optimizer in optimizers:
+            param_groups += optimizer.param_groups
+        for group in param_groups:
+            params_this_rank = []
+            if group["is_expert_parallel"]:
+                for p in group["params"]:
+                    if expt_dp_idx == get_pg_rank(self.pg_collection.expt_dp):
+                        params_this_rank.append(p)
+                    self.expt_dp_params_list[expt_dp_idx].append(p)
+                    expt_dp_idx = (expt_dp_idx + 1) % expt_dp_size
+            else:
+                for p in group["params"]:
+                    if dp_cp_idx == get_pg_rank(self.pg_collection.dp_cp):
+                        params_this_rank.append(p)
+                    self.dp_cp_params_list[dp_cp_idx].append(p)
+                    dp_cp_idx = (dp_cp_idx + 1) % dp_cp_size
+            # now we modify the group to only handle local params
+            group["params"] = params_this_rank
+
+        # simplify when expt_dp group size is 1 or expert parallel is off
+        if expt_dp_size == 1 or len(self.expt_dp_params_list[0]) == 0:
+            self.expt_dp_params_list = None
+
+    @torch.no_grad()
+    def broadcast_params(self):
+        """All rank broadcast updated local params(allgatherv). """
+        # Broadcast linear layer weights to all other ranks.
+        # This may not be slower than PyTorch allgatherv which calls broadcast internally.
+        # TODO(skyw): Profile and implement more efficient version.
+        if self.dp_cp_params_list is None:
+            return
+        for i, params in enumerate(self.dp_cp_params_list):
+            src_global_rank = torch.distributed.get_global_rank(self.pg_collection.dp_cp, i)
+            for p in params:
+                torch.distributed.broadcast(p, src_global_rank, self.pg_collection.dp_cp)
+        if self.expt_dp_params_list is None:
+            return
+        for i, params in enumerate(self.expt_dp_params_list):
+            src_global_rank = torch.distributed.get_global_rank(self.pg_collection.expt_dp, i)
+            for p in params:
+                torch.distributed.broadcast(p, src_global_rank, self.pg_collection.expt_dp)
+
+    @torch.no_grad()
+    def get_grad_norm(self):
+        # similar to dist opt, always aggregate globally
+        grads_for_norm = []
+        for optimizer in self.chained_optimizers:
+            grads_for_norm += optimizer.get_main_grads_for_grad_norm()
+        grad_norm = get_grad_norm_fp32(
+            grads_for_norm, grad_stats_parallel_group=None
+        )
+        return grad_norm
+
+    @torch.no_grad()
+    def count_zeros(self):
+        params = []
+        for optimizer in self.chained_optimizers:
+            params += optimizer.get_parameters()
+        return count_zeros_fp32(
+            params,
+            grad_stats_parallel_group=None,
+            use_decoupled_grad=self.config.use_precision_aware_optimizer_no_fp8_or_ds_fp8,
+        )
+
+    @torch.no_grad()
+    def step(self):  # type: ignore[no-untyped-def]
+        """step function for layer-wise optimizer."""
+        update_successful, grad_norm, num_zeros_in_grad = super().step()
+
+        # All gather updated params.
+        self.broadcast_params()
+
+        return update_successful, grad_norm, num_zeros_in_grad
+
+    def save_state_dict_to_file(self, filename: str) -> None:
+        """Save the parameter state of the optimizer.
+
+        Args:
+            filename: The filename to save the parameter state.
+        """
+        torch.save(super().state_dict(), filename)
+
+    def load_state_dict_from_file(self, filename: str) -> None:
+        """Load the parameter state of the optimizer."""
+        super().load_state_dict(torch.load(filename))
+
+
diff --git a/megatron/core/optimizer/muon.py b/megatron/core/optimizer/muon.py
new file mode 100644
index 00000000000..d2dc7533bf9
--- /dev/null
+++ b/megatron/core/optimizer/muon.py
@@ -0,0 +1,307 @@
+# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+
+"""Megatron muon optimizer wrapper to handle tensor-parallel."""
+
+import logging
+from functools import partial
+from typing import Callable, List, Literal, Optional
+
+import torch
+from torch.optim.optimizer import ParamsT
+
+from megatron.core import parallel_state
+from megatron.core.process_groups_config import ProcessGroupCollection
+from megatron.core.transformer.module import MegatronModule
+from megatron.core.utils import get_pg_size, log_single_rank
+
+from . import _get_param_groups, get_megatron_optimizer
+from .layer_wise_optimizer import LayerWiseDistributedOptimizer
+from .optimizer import (
+    ChainedOptimizer,
+    Float16OptimizerWithFloat16Params,
+    FP32Optimizer,
+    MegatronOptimizer,
+)
+from .optimizer_config import OptimizerConfig
+
+try:
+    from emerging_optimizers.orthogonalized_optimizers import (
+        OrthogonalizedOptimizer,
+        get_muon_scale_factor,
+    )
+    from emerging_optimizers.orthogonalized_optimizers.muon_utils import newton_schulz_tp
+
+    HAVE_EMERGING_OPTIMIZERS = True
+except ImportError:
+    HAVE_EMERGING_OPTIMIZERS = False
+    OrthogonalizedOptimizer = object
+
+
+logger = logging.getLogger(__name__)
+
+
+class TensorParallelMuon(OrthogonalizedOptimizer):
+    """Tensor Parallel Muon optimizer."""
+
+    def __init__(
+        self,
+        params: ParamsT,
+        lr: float = 3e-4,
+        momentum_beta: float = 0.95,
+        use_nesterov: bool = True,
+        weight_decay: float = 0.01,
+        use_decoupled_weight_decay: bool = True,
+        split_qkv: bool = False,
+        is_qkv_fn: Callable[[torch.Tensor], bool] | None = None,
+        qkv_split_shapes: tuple[int, int, int] | None = None,
+        fp32_matmul_prec: str = "medium",
+        coefficient_type: str = "quintic",
+        num_ns_steps: int = 5,
+        scale_mode: str = "spectral",
+        extra_scale_factor: float = 1.0,
+        pg_collection: Optional[ProcessGroupCollection] = None,
+        mode: Literal["blockwise", "duplicated", "distributed"] = "duplicated",
+    ) -> None:
+        if num_ns_steps < 1:
+            raise ValueError(f"num_ns_steps must be at least 1, got {num_ns_steps}")
+
+        orthogonalize_fn = partial(
+            newton_schulz_tp,
+            steps=num_ns_steps,
+            coefficient_type=coefficient_type,
+            mode="duplicated" if mode == "blockwise" else mode,
+        )
+        scale_factor_fn = partial(
+            get_muon_scale_factor, mode=scale_mode, extra_scale_factor=extra_scale_factor
+        )
+
+        def orthogonalize_fn_tp(
+            x: torch.Tensor,
+            tp_group: torch.distributed.ProcessGroup,
+            partition_dim: int | None = None,
+        ) -> torch.Tensor:
+            return orthogonalize_fn(x, tp_group=tp_group, partition_dim=partition_dim)
+
+        def scale_factor_fn_tp(
+            size_out: int, size_in: int, partition_dim: int | None = None
+        ) -> float:
+            if partition_dim is None:
+                return scale_factor_fn(size_out, size_in)
+
+            size = [size_out, size_in]
+            size[partition_dim] *= get_pg_size(pg_collection.tp) if pg_collection else 1
+            return scale_factor_fn(*size)
+
+        self.pg_collection = pg_collection
+        self.mode = mode
+
+        super().__init__(
+            params,
+            lr,
+            momentum_beta,
+            use_nesterov,
+            weight_decay,
+            use_decoupled_weight_decay,
+            split_qkv,
+            is_qkv_fn,
+            qkv_split_shapes,
+            fp32_matmul_prec,
+            orthogonalize_fn_tp,
+            scale_factor_fn_tp,
+        )
+
+    def orthogonalize(self, p: torch.Tensor, grad: torch.Tensor) -> torch.Tensor:
+        """Orthogonalize the momentum.
+
+        Args:
+            p: The parameter tensor. i is necessary to pass param tensor in addition to momentum
+                because a lot of information is only available in the param tensor,
+                attributes for example.
+            grad: The momentum tensor.
+
+        Returns:
+            The orthogonalized gradient tensor.
+        """
+        if self.pg_collection:
+            tp_group = (
+                self.pg_collection.expt_tp
+                if getattr(p, 'expert_tp', False)
+                else self.pg_collection.tp
+            )
+        else:
+            tp_group = None
+        partition_dim = None if self.mode == "blockwise" else getattr(p, "partition_dim", None)
+        if partition_dim == -1:
+            # llm-shower use different default value for partition_dim than TE.
+            # Because -1 is a valid index for ndarray, we decided to not overload it.
+            partition_dim = None
+        if self.split_qkv and self.is_qkv_fn(p):  # type: ignore[misc]
+            # split grouped attention parameters (e.g., QKV, GQA, etc.)
+            qkv_grads = torch.split(grad, self.qkv_split_shapes, dim=0)
+
+            # Apply Newton-Schulz to each component
+            qkv_whitened = [
+                self.orthogonalize_fn(g, tp_group=tp_group, partition_dim=partition_dim)
+                for g in qkv_grads
+            ]
+            qkv_scales = [
+                self.scale_factor_fn(g.size(0), g.size(1), partition_dim) for g in qkv_grads
+            ]
+
+            # Apply individual scales to each component and concatenate
+            grad = torch.cat(
+                [whitened * scale for whitened, scale in zip(qkv_whitened, qkv_scales)]
+            )
+        else:
+            grad = self.orthogonalize_fn(
+                grad, tp_group=tp_group, partition_dim=partition_dim
+            ) * self.scale_factor_fn(grad.size(0), grad.size(1), partition_dim)
+        return grad
+
+
+def get_megatron_muon_optimizer(
+    config: OptimizerConfig,
+    model_chunks: List[MegatronModule],
+    no_weight_decay_cond: Optional[Callable] = None,
+    scale_lr_cond: Optional[Callable] = None,
+    lr_mult: float = 1.0,
+    use_gloo_process_groups: bool = True,
+    layer_wise_distributed_optimizer: bool = False,
+    pg_collection: Optional[ProcessGroupCollection] = None,
+) -> MegatronOptimizer:
+    """This function is used to get the muon optimizer for the model chunks.
+    It is used to get the muon optimizer for the model chunks.
+
+    Args:
+        config (OptimizerConfig): optimizer configuration object.
+        model_chunks (List[MegatronModule]): model chunks to get optimizer for.
+        no_weight_decay_cond (func, optional): function to determine whether a parameter
+            should not perform weight decay. Defaults to None.
+        scale_lr_cond (func, optional): function to determine whether a parameter
+            should have a scaled learning rate. Defaults to None.
+        lr_mult (float, optional): learning rate multiplier for parameters that
+            satisfy scale_lr_cond. Defaults to 1.0.
+        use_gloo_process_groups (bool): if false, disable use of Gloo process groups
+            in underlying Megatron optimizers.
+        layer_wise_distributed_optimizer (bool): if true, use layer-wise distributed optimizer.
+            Defaults to False.
+    """
+    assert HAVE_EMERGING_OPTIMIZERS, "Emerging Optimizers is not installed."
+
+    # dist-optim is not supported due to strong coupling with how DDP init grad buffer
+    # in thoery we can put some weight to use non-dist-muon and rest to dist-adam
+    # but there are strong dependency and assumption in DDP that prevent it
+    if config.use_distributed_optimizer:
+        raise Exception('muon with dist optimizer is not supported.')
+
+    # before this function receive properly created collection
+    if pg_collection is None:
+        pg_collection = ProcessGroupCollection.use_mpu_process_groups()
+        pg_collection.dp_cp = parallel_state.get_data_parallel_group(with_context_parallel=True)
+        pg_collection.expt_dp = parallel_state.get_expert_data_parallel_group()
+
+    log_single_rank(logger, logging.INFO, f'Setting up emerging optimizer with config {config}')
+
+    optimizers = []
+    # record list of non/linear params
+    linear_params = []
+    nonlinear_params = []
+    for model_chunk in model_chunks:
+        for name, param in model_chunk.named_parameters():
+            if not param.requires_grad:
+                continue
+            # add flag for expert weight so optimizer can figure which tp group it uses
+            # alternatively, create new param group and save tp_group. this require more
+            # change in optimizer
+            if 'experts' in name and 'shared' not in name:
+                param.expert_tp = True
+            # TODO(deyuf): might not be sufficient for future algorithm. revisit this conditioning
+            if not getattr(param, 'is_embedding_or_output_parameter', False) and not (
+                len(param.shape) == 1
+            ):
+                linear_params.append(param)
+            else:
+                nonlinear_params.append(param)
+
+    # freezing nonlinear params and get param groups for muon
+    for param in nonlinear_params:
+        param.requires_grad = False
+
+    linear_param_groups = _get_param_groups(
+        model_chunks,
+        no_weight_decay_cond,
+        scale_lr_cond,
+        lr_mult,
+        lr=config.lr,
+        min_lr=config.min_lr,
+        decoupled_lr=config.decoupled_lr,
+        decoupled_min_lr=config.decoupled_min_lr,
+    )
+
+    # TODO(deyuf): support qkv split
+    optimizer = TensorParallelMuon(
+        linear_param_groups,
+        lr=config.lr,
+        momentum_beta=config.muon_momentum,
+        use_nesterov=config.muon_use_nesterov,
+        weight_decay=config.weight_decay,
+        fp32_matmul_prec=config.muon_fp32_matmul_prec,
+        num_ns_steps=config.muon_num_ns_steps,
+        scale_mode=config.muon_scale_mode,
+        split_qkv=False,
+        qkv_split_shapes=None,
+        extra_scale_factor=config.muon_extra_scale_factor,
+        pg_collection=pg_collection,
+        mode=config.muon_tp_mode,
+    )
+
+    # set config here to:
+    # 1. get adam for rest of layer
+    # 2. avoid ChainedOptimizer check fail that assert all optimizers are same kind
+    # side effect is muon optimizer will have wrong name str, i.e. config.optimizer == 'adam'
+    # TODO(deyuf): allow user to select optimizer mix and relax ChainedOptimizer design
+    config.optimizer = 'adam'
+
+    # need to wrap into megatron mix precision optimizer. (only support bf16 w/o loss scale now)
+    if config.fp16:
+        raise Exception('muon with fp16 is not supported.')
+    reset_config_bf16 = False
+    if config.bf16:
+        if layer_wise_distributed_optimizer:
+            # creating master weight before layerwise sharding will lead to unnecessary master
+            # weight  so here we delay master weight creation into layer_wise unset config.bf16
+            # will also result in all optimizers below(adam) to also not be wrapped
+            config.bf16 = False
+            reset_config_bf16 = True
+        else:
+            # if not using layer_wise wrapper, just create master weight here is fine
+            optimizer = Float16OptimizerWithFloat16Params(optimizer, config, None, None)
+    else:
+        optimizer = FP32Optimizer(optimizer, config, None)
+
+    optimizers.append(optimizer)
+
+    # done with muon, unfreeze nonlinear and freeze linear
+    for param in nonlinear_params:
+        param.requires_grad = True
+    for param in linear_params:
+        param.requires_grad = False
+
+    # call original get. linear params will be skipped since they're freezed
+    chained_adam = get_megatron_optimizer(
+        config, model_chunks, no_weight_decay_cond, scale_lr_cond, lr_mult, use_gloo_process_groups
+    )
+
+    # unfreeze everything
+    for param in linear_params:
+        param.requires_grad = True
+
+    # chain everything together
+    optimizers += chained_adam.chained_optimizers
+
+    if layer_wise_distributed_optimizer:
+        log_single_rank(logger, logging.INFO, 'Using LayerWiseDistributedOptimizer for Muon')
+        if reset_config_bf16:
+            config.bf16 = True
+        return LayerWiseDistributedOptimizer(optimizers, config, pg_collection)
+    return ChainedOptimizer(optimizers)
diff --git a/megatron/core/optimizer/optimizer_config.py b/megatron/core/optimizer/optimizer_config.py
index 8151d5e9de1..65e1fd6a71f 100644
--- a/megatron/core/optimizer/optimizer_config.py
+++ b/megatron/core/optimizer/optimizer_config.py
@@ -16,7 +16,7 @@ class OptimizerConfig:
     # General
     ##############
     optimizer: str = 'adam'
-    """Optimizer to use (one of Adam or SGD)."""
+    """Optimizer to use (one of Adam, SGD, or Muon)."""
 
     lr: Optional[float] = None
     """Initial learning rate. Depending on decay style and initial warmup, the learning rate at each
@@ -124,6 +124,29 @@ class OptimizerConfig:
     sgd_momentum: float = 0.9
     """Momentum factor for SGD optimizer."""
 
+    # Muon
+    muon_momentum: float = 0.95
+    """The momentum used by the internal SGD."""
+
+    muon_use_nesterov: bool = True
+    """Whether to use Nesterov-style momentum in the internal SGD."""
+
+    muon_scale_mode: str = "spectral"
+    """The mode to use for the scale factor. Defaults to "spectral"."""
+
+    muon_fp32_matmul_prec: str = "medium"
+    """The precision to use for the fp32 matmul. Defaults to "medium"."""
+
+    muon_num_ns_steps: int = 5
+    """The number of iteration steps to use in the Newton-Schulz iteration."""
+
+    muon_tp_mode: str = "blockwise"
+    """How to perform NS calculation for tensor parallel weights. Defaults to "blockwise"."""
+
+    muon_extra_scale_factor: float = 1.0
+    """Additional scale factor for the muon update."""
+
+
     #######################
     # Distributed optimizer
     #######################
diff --git a/megatron/core/tensor_parallel/layers.py b/megatron/core/tensor_parallel/layers.py
index e6e65425b23..773c61597bc 100644
--- a/megatron/core/tensor_parallel/layers.py
+++ b/megatron/core/tensor_parallel/layers.py
@@ -56,6 +56,7 @@
     HAVE_TE = False
 
 _MODEL_PARALLEL_ATTRIBUTE_DEFAULTS = {
+    "expert_tp": False,
     "tensor_model_parallel": False,
     "partition_dim": -1,
     "partition_stride": 1,
diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py
index 11fa9ad2d58..dc33a639e8d 100644
--- a/megatron/training/arguments.py
+++ b/megatron/training/arguments.py
@@ -1121,6 +1121,13 @@ def validate_args(args, defaults={}):
             args.no_load_rng = True
             print('Warning: disabling --no-load-rng for upcycling.')
 
+    # Muon optimizercheck
+    if 'muon' in args.optimizer:
+        assert not args.use_distributed_optimizer, "Muon optimizer does not support distributed optimizer for now."
+        assert not args.use_torch_fsdp2, "Muon optimizer does not support Torch-FSDP2 for now."
+        assert not args.use_megatron_fsdp, "Muon optimizer does not support Megatron-FSDP for now."
+        assert args.ckpt_format == "torch", "Muon optimizer only supports torch checkpoint format for now."
+
     # Optimizer CPU offload check
     if args.optimizer_cpu_offload:
         assert args.use_precision_aware_optimizer, (
@@ -1866,6 +1873,25 @@ def _add_regularization_args(parser):
                        'numerical stability')
     group.add_argument('--sgd-momentum', type=float, default=0.9,
                        help='Momentum factor for sgd')
+    group.add_argument('--muon-momentum', type=float, default=0.95,
+                       help='Momentum factor for Muon optimizer')
+    group.add_argument('--muon-no-use-nesterov', action='store_false', default=True,
+                       dest='muon_use_nesterov',
+                       help='Whether to use Nesterov-style momentum in the internal SGD')
+    group.add_argument('--muon-scale-mode', type=str, default='spectral',
+                       choices=['spectral', 'unit_rms_norm', 'shape_scaling'],
+                       help='Scale mode for Muon optimizer')
+    group.add_argument('--muon-fp32-matmul-prec', type=str, default='medium',
+                       choices=['low', 'medium', 'high'],
+                       help='FP32 matmul precision for Newton-Schulz iteration')
+    group.add_argument('--muon-num-ns-steps', type=int, default=5,
+                       help='Number of Newton-Schulz steps for Muon optimizer')
+    group.add_argument('--muon-tp-mode', type=str, default='blockwise',
+                       choices=['blockwise', 'duplicated', 'distributed'],
+                       help='How to perform NS calculation for tensor model parallel weights')
+    group.add_argument('--muon-extra-scale-factor', type=float, default=1.0,
+                       help='Additional scale factor for the muon update')
+
     return parser
 
 
@@ -2152,7 +2178,7 @@ def _add_training_args(parser):
                        help='Enable bias only in the QKV linear layers',
                        dest='add_qkv_bias')
     group.add_argument('--optimizer', type=str, default='adam',
-                       choices=['adam', 'sgd'],
+                       choices=['adam', 'sgd', 'muon', 'dist_muon'],
                        help='Optimizer function')
     group.add_argument('--optimizer-cpu-offload', action='store_true',
                        help='Offload optimizer state to CPU')
diff --git a/megatron/training/checkpointing.py b/megatron/training/checkpointing.py
index 4302b3fa8fd..deff728aa23 100644
--- a/megatron/training/checkpointing.py
+++ b/megatron/training/checkpointing.py
@@ -486,6 +486,14 @@ def save_checkpoint(iteration, model, optimizer, opt_param_scheduler, num_floati
         if not optimizer.is_stub_optimizer:
             optimizer.save_parameter_state(optim_checkpoint_name)
 
+    # LayerWiseDistributedOptimizer save
+    if getattr(args, "optimizer", "adam").startswith("dist_"):
+        dp_rank = mpu.get_data_parallel_rank()
+        optim_checkpoint_name = os.path.join(os.path.dirname(checkpoint_name), f"layer_wise_optimizer_{dp_rank}.pt")
+        ensure_directory_exists(optim_checkpoint_name)
+        if not optimizer.is_stub_optimizer:
+            optimizer.save_state_dict_to_file(optim_checkpoint_name)
+
     async_save_request = None
     if args.async_save:
         if ckpt_type == CheckpointType.LEGACY:
@@ -1655,7 +1663,11 @@ def load_model_state_dict(module, state_dict, strict: bool):
     if not release and not args.finetune and not args.no_load_optim:
         try:
             # Load state dict.
-            if not skip_load_to_model_and_opt and optimizer is not None and not optimizer.is_stub_optimizer:
+            if getattr(args, "optimizer", "adam").startswith("dist_"):
+                dp_rank = mpu.get_data_parallel_rank()
+                optim_checkpoint_name = os.path.join(os.path.dirname(checkpoint_name), f"layer_wise_optimizer_{dp_rank}.pt")
+                optimizer.load_state_dict_from_file(optim_checkpoint_name)
+            elif not skip_load_to_model_and_opt and optimizer is not None and not optimizer.is_stub_optimizer:
                 optimizer.load_state_dict(state_dict['optimizer'])
 
             # Load distributed optimizer's custom parameter state.
diff --git a/megatron/training/training.py b/megatron/training/training.py
index 23a6ba6170f..bc5fefa86ba 100644
--- a/megatron/training/training.py
+++ b/megatron/training/training.py
@@ -75,6 +75,7 @@
 from megatron.core.distributed import finalize_model_grads
 from megatron.core.enums import ModelType
 from megatron.core.optimizer import get_megatron_optimizer, OptimizerConfig
+from megatron.core.optimizer.muon import get_megatron_muon_optimizer
 from megatron.core.rerun_state_machine import (
     get_rerun_state_machine,
     destroy_rerun_state_machine,
@@ -1090,17 +1091,30 @@ def setup_model_and_optimizer(
             kwargs[f.name] = getattr(args, f.name)
     config = OptimizerConfig(**kwargs)
     config.timers = timers
-    optimizer = get_megatron_optimizer(
-        config,
-        model,
-        no_wd_decay_cond,
-        scale_lr_cond,
-        lr_mult,
-        use_gloo_process_groups=args.enable_gloo_process_groups,
-        # If the user is asking for a non-zero embedding init std, skip weight decay for embeddings
-        #  to avoid embeddings from shrinking to zero as recommended in https://arxiv.org/abs/2312.16903
-        default_skip_embedding_weight_decay=args.embedding_init_method_std is not None,
-    )
+
+    if 'muon' not in config.optimizer:
+        optimizer = get_megatron_optimizer(
+            config,
+            model,
+            no_wd_decay_cond,
+            scale_lr_cond,
+            lr_mult,
+            use_gloo_process_groups=args.enable_gloo_process_groups,
+            # If the user is asking for a non-zero embedding init std, skip weight decay for embeddings
+            #  to avoid embeddings from shrinking to zero as recommended in https://arxiv.org/abs/2312.16903
+            default_skip_embedding_weight_decay=args.embedding_init_method_std is not None,
+        )
+    else:
+        optimizer = get_megatron_muon_optimizer(
+            config,
+            model,
+            no_wd_decay_cond,
+            scale_lr_cond,
+            lr_mult,
+            use_gloo_process_groups=args.enable_gloo_process_groups,
+            layer_wise_distributed_optimizer='dist' in config.optimizer,
+        )
+
     opt_param_scheduler = get_optimizer_param_scheduler(optimizer)
     one_logger and one_logger.log_metrics({"app_build_optimzer_finish_time": one_logger_utils.get_timestamp_in_ms()})
 
diff --git a/pyproject.toml b/pyproject.toml
index 71e87bc8b83..3362a0181c1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -139,6 +139,7 @@ linting = [
 ]
 ci = ["python-gitlab", "slack-sdk", "pandas"]
 flash_mla = ["flash_mla"]
+emerging_optimizers = ["emerging_optimizers"]
 
 [tool.uv]
 default-groups = ["linting", "build", "test"]
@@ -165,7 +166,9 @@ override-dependencies = [
 flash_mla = [
     { git = "https://github.com/deepseek-ai/FlashMLA", rev = "9edee0c022cd0938148a18e334203b0aab43aa19" },
 ]
+
 # transformer-engine = { git = "https://github.com/NVIDIA/TransformerEngine.git", rev = "0289e76380088358a584d809faf69effab1a7cda" } # on `release_v2.7
+emerging_optimizers = { git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git", rev= "fb1add873e7851ec34b48581ea1b15761b73d189"}
 
 [tool.isort]
 profile = "black"                                                          # black-compatible
diff --git a/tests/unit_tests/test_muon_optimizer.py b/tests/unit_tests/test_muon_optimizer.py
new file mode 100644
index 00000000000..d5dffcd0e19
--- /dev/null
+++ b/tests/unit_tests/test_muon_optimizer.py
@@ -0,0 +1,245 @@
+import os
+import pytest
+
+from packaging.version import Version
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from megatron.core.distributed import DistributedDataParallel, DistributedDataParallelConfig
+from megatron.core.optimizer import OptimizerConfig
+from megatron.core.optimizer.muon import get_megatron_muon_optimizer, TensorParallelMuon
+from megatron.core.transformer import TransformerConfig
+from tests.unit_tests.test_utilities import Utils
+from tests.unit_tests.test_utils import _deinit_distributed, _init_distributed
+
+
+class Net(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.fc1 = nn.Linear(80, 48)
+        self.fc2 = nn.Linear(48, 10)
+
+    def forward(self, x):
+        x = F.relu(self.fc1(x))
+        x = self.fc2(x)
+        return x
+
+
+@pytest.mark.skipif(
+    Version(os.getenv('NVIDIA_PYTORCH_VERSION', "24.01")) <= Version("25.05"),
+    reason="Skip muon optimizer for LTS test",
+)
+def test_muon_optimizer_smoke():
+    """Smoke test for TensorParallelMuon optimizer."""
+    # Create a simple linear model for testing
+    model = torch.nn.Linear(100, 50, bias=False, dtype=torch.float32, device='cuda')
+    model.requires_grad_(True)
+    model.weight.data.fill_(1.0)
+
+    # Create TensorParallelMuon optimizer
+    optimizer = TensorParallelMuon(
+        params=[model.weight],
+        lr=0.01,
+        momentum_beta=0.95,
+        use_nesterov=True,
+        weight_decay=0.01,
+        use_decoupled_weight_decay=True,
+        split_qkv=False,
+        fp32_matmul_prec="medium",
+        num_ns_steps=5,
+        scale_mode="spectral",
+        extra_scale_factor=1.0,
+        pg_collection=None,
+        mode="duplicated",
+    )
+
+    # Test basic properties
+    assert optimizer is not None, "Optimizer should not be None"
+    assert hasattr(optimizer, 'param_groups'), "Optimizer should have param_groups"
+    assert len(optimizer.param_groups) > 0, "Optimizer should have at least one parameter group"
+
+    # Test forward and backward pass
+    input_tensor = torch.randn(32, 100, dtype=torch.float32, device='cuda')
+    output = model(input_tensor)
+    loss = output.sum()
+    loss.backward()
+
+    # Store original weight
+    original_weight = model.weight.data.clone()
+
+    # Test optimizer step
+    optimizer.step()
+
+    # Verify weight was updated
+    assert not torch.equal(
+        model.weight.data, original_weight
+    ), "Weight should be updated after optimizer step"
+
+    # Test zero_grad
+    optimizer.zero_grad()
+    assert model.weight.grad is None or torch.all(
+        model.weight.grad == 0
+    ), "Gradients should be zeroed"
+
+    # Test state_dict and load_state_dict
+    state_dict = optimizer.state_dict()
+    assert 'state' in state_dict, "State dict should contain state"
+    assert 'param_groups' in state_dict, "State dict should contain param_groups"
+
+    # Load state dict should not raise error
+    optimizer.load_state_dict(state_dict)
+
+
+@pytest.mark.skipif(
+    Version(os.getenv('NVIDIA_PYTORCH_VERSION', "24.01")) <= Version("25.05"),
+    reason="Skip muon optimizer for LTS test",
+)
+def test_get_megatron_muon_optimizer_smoke():
+    """Smoke test for get_megatron_muon_optimizer function."""
+    world = int(os.getenv('WORLD_SIZE', '1'))
+    rank = int(os.getenv('RANK', '0'))
+
+    # Setup: distributed, model
+    _init_distributed(world, rank)
+    Utils.initialize_model_parallel()
+
+    # Create a model with both linear and non-linear parameters
+    model = Net().bfloat16().cuda()
+    model.requires_grad_(True)
+
+    # Wrap in DDP (required for Megatron optimizer)
+    ddp_config = DistributedDataParallelConfig(use_distributed_optimizer=False)
+    model = DistributedDataParallel(
+        TransformerConfig(num_attention_heads=1, num_layers=1), ddp_config, model
+    )
+
+    # Ensure all parameters require gradients
+    for param in model.parameters():
+        assert param.requires_grad, "All parameters should require gradients"
+
+    # Create optimizer config for Muon
+    optimizer_config = OptimizerConfig(
+        optimizer='muon',  # This will be changed internally to 'adam' for non-linear params
+        lr=0.01,
+        weight_decay=0.01,
+        bf16=True,
+        use_distributed_optimizer=False,  # Muon doesn't support distributed optimizer
+        muon_momentum=0.95,
+        muon_use_nesterov=True,
+        muon_fp32_matmul_prec="medium",
+        muon_num_ns_steps=5,
+        muon_scale_mode="spectral",
+        muon_tp_mode="duplicated",
+    )
+
+    # Test creating the optimizer
+    optimizer = get_megatron_muon_optimizer(
+        config=optimizer_config,
+        model_chunks=[model],
+        use_gloo_process_groups=True,
+        layer_wise_distributed_optimizer=False,
+    )
+
+    # Test basic properties
+    assert optimizer is not None, "Optimizer should not be None"
+    assert hasattr(optimizer, 'param_groups'), "Optimizer should have param_groups"
+    assert hasattr(optimizer, 'chained_optimizers'), "Should be a ChainedOptimizer"
+    assert len(optimizer.chained_optimizers) >= 1, "Should have at least one chained optimizer"
+
+    # Test forward and backward pass
+    input_tensor = torch.randn(16, 80, dtype=torch.bfloat16, device='cuda')
+    output = model(input_tensor)
+    loss = output.sum()
+    loss.backward()
+
+    # Store original parameters
+    original_params = {}
+    for name, param in model.named_parameters():
+        original_params[name] = param.data.clone()
+
+    # Test optimizer step
+    optimizer.step()
+
+    # Verify at least some parameters were updated
+    params_updated = 0
+    for name, param in model.named_parameters():
+        if not torch.equal(param.data, original_params[name]):
+            params_updated += 1
+
+    assert params_updated > 0, "At least some parameters should be updated after optimizer step"
+
+    # Test zero_grad
+    optimizer.zero_grad()
+    for param in model.parameters():
+        assert param.grad is None or torch.all(
+            param.grad == 0
+        ), f"Gradients should be zeroed for all parameters"
+
+    # Test state_dict and load_state_dict
+    state_dict = optimizer.state_dict()
+    assert isinstance(state_dict, list), "State dict should be a list"
+
+    # Load state dict should not raise error
+    optimizer.load_state_dict(state_dict)
+
+    _deinit_distributed()
+
+
+@pytest.mark.skipif(
+    Version(os.getenv('NVIDIA_PYTORCH_VERSION', "24.01")) <= Version("25.05"),
+    reason="Skip muon optimizer for LTS test",
+)
+def test_get_megatron_muon_optimizer_validation():
+    """Test validation logic for get_megatron_muon_optimizer."""
+    world = int(os.getenv('WORLD_SIZE', '1'))
+    rank = int(os.getenv('RANK', '0'))
+
+    # Setup: distributed, model
+    _init_distributed(world, rank)
+    Utils.initialize_model_parallel()
+
+    # Create a simple model
+    model = torch.nn.Linear(100, 50, bias=False, dtype=torch.bfloat16, device='cuda')
+    model.requires_grad_(True)
+    ddp_config = DistributedDataParallelConfig(use_distributed_optimizer=False)
+    model = DistributedDataParallel(
+        TransformerConfig(num_attention_heads=1, num_layers=1), ddp_config, model
+    )
+
+    # Test 1: Distributed optimizer should raise exception
+    optimizer_config_dist = OptimizerConfig(
+        optimizer='muon',
+        lr=0.01,
+        bf16=True,
+        use_distributed_optimizer=True,  # This should cause an exception
+    )
+
+    with pytest.raises(Exception, match='muon with dist optimizer is not supported'):
+        get_megatron_muon_optimizer(config=optimizer_config_dist, model_chunks=[model])
+
+    # Test 2: FP16 should raise exception
+    optimizer_config_fp16 = OptimizerConfig(
+        optimizer='muon',
+        lr=0.01,
+        fp16=True,  # This should cause an exception
+        use_distributed_optimizer=False,
+    )
+
+    with pytest.raises(Exception, match='muon with fp16 is not supported'):
+        get_megatron_muon_optimizer(config=optimizer_config_fp16, model_chunks=[model])
+
+    # Test 3: Invalid num_ns_steps should raise exception
+    optimizer_config_invalid_ns = OptimizerConfig(
+        optimizer='muon',
+        lr=0.01,
+        bf16=True,
+        use_distributed_optimizer=False,
+        muon_num_ns_steps=0,  # This should cause an exception
+    )
+
+    with pytest.raises(ValueError, match='num_ns_steps must be at least 1'):
+        get_megatron_muon_optimizer(config=optimizer_config_invalid_ns, model_chunks=[model])
+
+    _deinit_distributed()
diff --git a/uv.lock b/uv.lock
index 6a674513f11..84da2bd685a 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1181,6 +1181,16 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/87/62/9773de14fe6c45c23649e98b83231fffd7b9892b6cf863251dc2afa73643/einops-0.8.1-py3-none-any.whl", hash = "sha256:919387eb55330f5757c6bea9165c5ff5cfe63a642682ea788a6d472576d81737", size = 64359, upload-time = "2025-02-09T03:17:01.998Z" },
 ]
 
+[[package]]
+name = "emerging-optimizers"
+version = "0.1.0"
+source = { git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git?rev=fb1add873e7851ec34b48581ea1b15761b73d189#fb1add873e7851ec34b48581ea1b15761b73d189" }
+dependencies = [
+    { name = "absl-py" },
+    { name = "torch", marker = "sys_platform == 'never' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "typing-extensions" },
+]
+
 [[package]]
 name = "exceptiongroup"
 version = "1.3.0"
@@ -2227,6 +2237,9 @@ docs = [
     { name = "sphinx-autodoc2" },
     { name = "sphinx-copybutton" },
 ]
+emerging-optimizers = [
+    { name = "emerging-optimizers" },
+]
 flash-mla = [
     { name = "flash-mla" },
 ]
@@ -2314,6 +2327,7 @@ docs = [
     { name = "sphinx-autodoc2" },
     { name = "sphinx-copybutton" },
 ]
+emerging-optimizers = [{ name = "emerging-optimizers", git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git?rev=fb1add873e7851ec34b48581ea1b15761b73d189" }]
 flash-mla = [{ name = "flash-mla", git = "https://github.com/deepseek-ai/FlashMLA?rev=9edee0c022cd0938148a18e334203b0aab43aa19" }]
 linting = [
     { name = "black", specifier = "==24.4.2" },

From 4bdc4e279c43b58bbbb61cdcbe533d4f5d7c4b45 Mon Sep 17 00:00:00 2001
From: Santosh Bhavani <sbhavani@nvidia.com>
Date: Sat, 11 Oct 2025 07:21:56 -0700
Subject: [PATCH 012/334] ADLR/megatron-lm!4060 - Update dev branch README

Co-authored-by: Santosh Bhavani <santosh.bhavani@live.com>
---
 README.md | 460 ++++--------------------------------------------------
 1 file changed, 32 insertions(+), 428 deletions(-)

diff --git a/README.md b/README.md
index 85f21a4322e..6765569370b 100644
--- a/README.md
+++ b/README.md
@@ -10,461 +10,65 @@ Megatron-LM & Megatron Core
 
 <div align="left">
 
-## ⚡ Quick Start
+> ## 🚨 **DEVELOPMENT BRANCH**
+> ⚠️ **EXPERIMENTAL FEATURES** - This is the **dev branch** with experimental features. 
+>
+> **→ For releases and comprehensive documentation, visit the [main branch](https://github.com/NVIDIA/Megatron-LM)**
 
-```bash
-# 1. Install Megatron Core with required dependencies
-pip install megatron-core
-pip install --no-build-isolation transformer-engine[pytorch]
+## ⚡ Quickstart
 
-# 2. Clone repository for examples
-git clone https://github.com/NVIDIA/Megatron-LM.git
+```bash
+# Clone the dev branch
+git clone -b dev https://github.com/NVIDIA/Megatron-LM.git
 cd Megatron-LM
-```
-
-**→ [Complete Installation Guide](#installation)** - Docker, pip variants (dev,lts,etc.), source installation, and system requirements
-
-# Latest News
-
-- 🔄 NEW! **[Megatron Bridge](https://github.com/NVIDIA-NeMo/Megatron-Bridge)** - Bidirectional converter for interoperability between Hugging Face and Megatron checkpoints, featuring production-ready recipes for popular models.
-- 🗺️ **[MoE Q3-Q4 2025 Roadmap](https://github.com/NVIDIA/Megatron-LM/issues/1729)** - Comprehensive roadmap for MoE features including DeepSeek-V3, Qwen3, advanced parallelism strategies, FP8 optimizations, and Blackwell performance enhancements.
-- 🚀 **[GPT-OSS Implementation](https://github.com/NVIDIA/Megatron-LM/issues/1739)** - Advanced features including YaRN RoPE scaling, attention sinks, and custom activation functions are being integrated into Megatron Core.
-- **[2025/06]** **[Megatron MoE Model Zoo](https://github.com/yanring/Megatron-MoE-ModelZoo)** - Best practices and optimized configurations for training DeepSeek-V3, Mixtral, and Qwen3 MoE models with performance benchmarking and checkpoint conversion tools.
-- **[2025/05]** Megatron Core v0.11.0 brings new capabilities for multi-data center LLM training ([blog](https://developer.nvidia.com/blog/turbocharge-llm-training-across-long-haul-data-center-networks-with-nvidia-nemo-framework/)). 
-
-<details>
-<summary>Previous News</summary>
-
-- **[2024/07]** Megatron Core v0.7 improves scalability and training resiliency and adds support for multimodal training ([blog](https://developer.nvidia.com/blog/train-generative-ai-models-more-efficiently-with-new-nvidia-Megatron-Core-functionalities/)). 
-- **[2024/06]** Megatron Core added supports for Mamba-based models. Check out our paper [An Empirical Study of Mamba-based Language Models](https://arxiv.org/pdf/2406.07887) and [code example](https://github.com/NVIDIA/Megatron-LM/tree/ssm/examples/mamba).
-- **[2024/01 Announcement]** NVIDIA has released the core capabilities in **Megatron-LM** into [**Megatron Core**](https://github.com/NVIDIA/Megatron-LM/tree/main/megatron/core) in this repository. Megatron Core expands upon Megatron-LM's GPU-optimized techniques with more cutting-edge innovations on system-level optimizations, featuring composable and modular APIs. Explore the [Megatron Core intro](#Megatron Core) for more details.
 
-</details>
+# Install from source with dev dependencies (includes transformer_engine)
+pip install -e .[mlm,dev]
+```
 
 <details>
 <summary>Table of Contents</summary>
 
 **Getting Started**
-- [Quick Start](#-quick-start)
-- [Latest News](#latest-news)
-- [Megatron Overview](#megatron-overview)
-  - [Project Structure](#project-structure)
-  - [Megatron-LM: Reference Implementation](#megatron-lm-reference-implementation)
-  - [Megatron Core: Production Library](#megatron-core-production-library)
-- [Installation](#installation) 
-  - [Docker (Recommended)](#-docker-recommended)
-  - [Pip Installation](#-pip-installation)
-  - [Source Installation](#-source-installation)
-  - [System Requirements](#system-requirements)
-
-**Core Features**
-- [Performance Benchmarking](#performance-benchmarking)
-  - [Weak Scaling Results](#weak-scaling-results)
-  - [Strong Scaling Results](#strong-scaling-results)
-- [Ecosystem Libraries](#ecosystem-libraries)
-
-**Training**
-- [Training](#training)
-  - [Getting Started](#getting-started)
-  - [Data Preparation](#data-preparation)
-- [Parallelism Strategies](#parallelism-strategies)
-  - [Data Parallelism (DP)](#data-parallelism-dp)
-  - [Tensor Parallelism (TP)](#tensor-parallelism-tp)
-  - [Pipeline Parallelism (PP)](#pipeline-parallelism-pp)
-  - [Context Parallelism (CP)](#context-parallelism-cp)
-  - [Expert Parallelism (EP)](#expert-parallelism-ep)
-  - [Parallelism Selection Guide](#parallelism-selection-guide)
-- [Performance Optimizations](#performance-optimizations)
+- [⚡ Quick Start](#-quick-start)
+- [🧠 Dev Branch Philosophy](#-dev-branch-philosophy)
+- [📊 Performance & Benchmarking](#-performance--benchmarking)
+- [👥 Community & Support](#-community--support)
 
-**Resources**
-- [Examples](./examples/) - Training scripts and tutorials
-- [Documentation](https://docs.nvidia.com/Megatron-Core/) - Official docs
-- [Roadmaps](#roadmaps) - Development roadmaps and feature tracking
-- [Community & Support](#-community--support) - Get help and contribute
-  - [Getting Help](#getting-help)
-  - [Contributing](#contributing)
-  - [Citation](#citation)
+**For Complete Documentation** → [Main Branch](https://github.com/NVIDIA/Megatron-LM) | [Official Docs](https://docs.nvidia.com/Megatron-Core/)
 
 </details>
 
-# Megatron Overview
-
-## Project Structure
-```
-Megatron-LM/
-├── megatron/                    
-│   ├── core/                    # Megatron Core (kernels, parallelism, building blocks)
-│   │   ├── models/              # Transformer models
-│   │   ├── transformer/         # Transformer building blocks
-│   │   ├── tensor_parallel/     # Tensor parallelism
-│   │   ├── pipeline_parallel/   # Pipeline parallelism
-│   │   ├── distributed/         # Distributed training (FSDP, DDP)
-│   │   ├── optimizer/           # Optimizers
-│   │   ├── datasets/            # Dataset loaders
-│   │   ├── inference/           # Inference engines
-│   │   └── export/              # Model export (e.g. TensorRT-LLM)
-│   ├── training/                # Training scripts
-│   ├── inference/               # Inference server
-│   ├── legacy/                  # Legacy components
-│   └── post_training/           # Post-training (RLHF, etc.)
-├── examples/                    # Ready-to-use training examples
-├── tools/                       # Utility tools
-├── tests/                       # Comprehensive test suite
-└── docs/                        # Documentation
-```
-
-### Megatron-LM: Reference Implementation
-**Reference implementation** that includes Megatron Core plus everything needed to train models.
-
-**Best for:**
-- **Training state-of-the-art foundation models** at scale with cutting-edge performance on latest NVIDIA hardware
-- **Research teams** exploring new architectures and training techniques
-- **Learning distributed training** concepts and best practices  
-- **Quick experimentation** with proven model configurations
-
-**What you get:**
-- Pre-configured training scripts for GPT, LLama, DeepSeek, Qwen, and more.
-- End-to-end examples from data prep to evaluation
-- Research-focused tools and utilities
-
-### Megatron Core: Composable Library  
-**Composable library** with GPU-optimized building blocks for custom training frameworks.
-
-**Best for:**
-- **Framework developers** building on top of modular and optimized components
-- **Research teams** needing custom training loops, optimizers, or data pipelines
-- **ML engineers** requiring fault-tolerant training pipelines
-
-**What you get:**
-- Composable transformer building blocks (attention, MLP, etc.)
-- Advanced parallelism strategies (TP, PP, DP, EP, CP)
-- Pipeline schedules and distributed optimizers
-- Mixed precision support (FP16, BF16, FP8)
-- GPU-optimized kernels and memory management
-- High-performance dataloaders and dataset utilities
-- Model architectures (LLaMA, Qwen, GPT, Mixtral, Mamba, etc.)
-
-## Ecosystem Libraries
-
-**Libraries used by Megatron Core:**
-
-- **[Megatron Energon](https://github.com/NVIDIA/Megatron-Energon)** 📣 **NEW!** - Multi-modal data loader (text, images, video, audio) with distributed loading and dataset blending
-- **[Transformer Engine](https://github.com/NVIDIA/TransformerEngine)** - Optimized kernels and FP8 mixed precision support
-- **[Resiliency Extension (NVRx)](https://github.com/NVIDIA/nvidia-resiliency-ext)** - Fault tolerant training with failure detection and recovery
-
-**Libraries using Megatron Core:**
-
-- **[Megatron Bridge](https://github.com/NVIDIA-NeMo/Megatron-Bridge)** - Training library with bidirectional Hugging Face ↔ Megatron checkpoint conversion, flexible training loops, and production-ready recipes
-- **[NeMo RL](https://github.com/NVIDIA-NeMo/RL)** - Scalable toolkit for efficient reinforcement learning with RLHF, DPO, and other post-training methods
-- **[NeMo Framework](https://docs.nvidia.com/nemo-framework/user-guide/latest/overview.html)** - Enterprise framework with cloud-native support and end-to-end examples
-- **[TensorRT Model Optimizer (ModelOpt)](https://github.com/NVIDIA/TensorRT-Model-Optimizer)** - Model optimization toolkit for quantization, pruning, and distillation
-
-**Compatible with:** [Hugging Face Accelerate](https://github.com/huggingface/accelerate), [Colossal-AI](https://github.com/hpcaitech/ColossalAI), [DeepSpeed](https://github.com/microsoft/DeepSpeed)
-
-# Installation
-
-## 🐳 Docker (Recommended)
-
-We strongly recommend using the previous releases of [PyTorch NGC Container](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/pytorch) rather than the latest one for optimal compatibility with Megatron Core release and testing. Our releases are always based on the previous month's NGC container, so this ensures compatibility and stability.
-
-This container comes with all dependencies pre-installed with compatible versions and optimized configurations for NVIDIA GPUs:
-
-- PyTorch (latest stable version)
-- CUDA, cuDNN, NCCL (latest stable versions)
-- Support for FP8 on NVIDIA Hopper, Ada, and Blackwell GPUs
-- For best performance, use NVIDIA Turing GPU architecture generations and later
-
-```bash
-# Run container with mounted directories
-docker run --runtime --nvidia --gpus all -it --rm \
-  -v /path/to/megatron:/workspace/megatron \
-  -v /path/to/dataset:/workspace/dataset \
-  -v /path/to/checkpoints:/workspace/checkpoints \
-  nvcr.io/nvidia/pytorch:25.04-py3
-```
-
-## Pip Installation
-
-Megatron Core offers support for two NGC PyTorch containers:
-
-- `dev`: Moving head that supports the most recent upstream dependencies
-- `lts`: Long-term support of NGC PyTorch 24.01
-
-Both containers can be combined with `mlm` which adds package dependencies for Megatron-LM on top of Megatron Core.
-
-```bash
-# Install the latest release with minimal dependencies (no Transformer Engine)
-pip install megatron-core[dev]
-```
-
-```bash
-# Install packages for LTS support NGC PyTorch 24.01
-pip install megatron-core[lts]
-```
-
-For a version of Megatron Core with only torch, run:
-
-```bash
-pip install megatron-core
-```
-
-For dependencies required by Megatron-LM, please run:
-
-```bash
-pip install megatron-core[mlm]
-```
-
-## Source Installation
-
-For development or latest features:
-
-For Hybrid models, Megatron Core requires [mamba](https://github.com/state-spaces/mamba). If the pre-built wheel in PyPI does not fit your environment, you can fall back to an install script Megatron Core uses in its CI system. For this, please install `uv` first:
-
-```bash
-export UV_VERSION=0.7.2
-export PATH="$HOME/.local/bin:$PATH"
-curl -LsSf https://astral.sh/uv/${UV_VERSION}/install.sh | sh
-export UV_PROJECT_ENVIRONMENT=./venv
-export PATH="$UV_PROJECT_ENVIRONMENT/bin:$PATH"
-export UV_LINK_MODE=copy
-```
-
-Run the following command to build upstream dependencies from source:
-
-```bash
-# Clone and install
-git clone https://github.com/NVIDIA/Megatron-LM.git
-cd Megatron-LM
-
-# Optional: checkout specific release
-git checkout core_r0.13.0
-
-bash docker/common/install.sh --environment {dev,lts}
-```
-
-## System Requirements
 
-### Hardware Requirements
-- **FP8 Support**: NVIDIA Hopper, Ada, Blackwell GPUs
-- **Recommended**: NVIDIA Turing architecture or later
 
-### Software Requirements
-- **CUDA/cuDNN/NCCL**: Latest stable versions
-- **PyTorch**: Latest stable version
-- **Transformer Engine**: Latest stable version
-- **Python**: 3.12 recommended
 
-# Performance Benchmarking
 
-For our latest performance benchmarking results, please refer to [NVIDIA NeMo Framework Performance Summary](https://docs.nvidia.com/nemo-framework/user-guide/latest/performance/performance_summary.html).
 
-Our codebase efficiently trains models from 2B to 462B parameters across thousands of GPUs, achieving up to **47% Model FLOP Utilization (MFU)** on H100 clusters.
+## Dev Branch Philosophy
 
-![Model table](images/model_table.png)
-
-**Benchmark Configuration:**
-- **Vocabulary size**: 131,072 tokens
-- **Sequence length**: 4096 tokens  
-- **Model scaling**: Varied hidden size, attention heads, and layers to achieve target parameter counts
-- **Communication optimizations**: Fine-grained overlapping with DP (`--overlap-grad-reduce`, `--overlap-param-gather`), TP (`--tp-comm-overlap`), and PP (enabled by default)
-
-**Key Results:**
-- **6144 H100 GPUs**: Successfully benchmarked 462B parameter model training
-- **Superlinear scaling**: MFU increases from 41% to 47-48% with model size
-- **End-to-end measurement**: Throughputs include all operations (data loading, optimizer steps, communication, logging)
-- **Production ready**: Full training pipeline with checkpointing and fault tolerance
-- *Note: Performance results measured without training to convergence*
-
-## Weak Scaling Results
-Our weak scaled results show superlinear scaling (MFU increases from 41% for the smallest model considered to 47-48% for the largest models); this is because larger GEMMs have higher arithmetic intensity and are consequently more efficient to execute.
-
-![Weak scaling](images/weak_scaling.png)
-
-## Strong Scaling Results
-We also strong scaled the standard GPT-3 model (our version has slightly more than 175 billion parameters due to larger vocabulary size) from 96 H100 GPUs to 4608 GPUs, using the same batch size of 1152 sequences throughout. Communication becomes more exposed at larger scale, leading to a reduction in MFU from 47% to 42%.
-
-![Strong scaling](images/strong_scaling.png)
-
-# Training
-
-## Getting Started
-
-### Simple Training Example
-```bash
-# Distributed training example (2 GPUs, mock data)
-torchrun --nproc_per_node=2 examples/run_simple_mcore_train_loop.py
-```
-
-### LLama-3 Training Example
-```bash
-# 8 GPUs, FP8 precision, mock data
-./examples/llama/train_llama3_8b_fp8.sh
-```
-
-## Data Preparation
-
-### JSONL Data Format
-```json
-{"text": "Your training text here..."}
-{"text": "Another training sample..."}
-```
-
-### Basic Preprocessing
-```bash
-python tools/preprocess_data.py \
-    --input data.jsonl \
-    --output-prefix processed_data \
-    --tokenizer-type HuggingFaceTokenizer \
-    --tokenizer-model /path/to/tokenizer.model \
-    --workers 8 \
-    --append-eod
-```
-
-### Key Arguments
-- `--input`: Path to input JSON/JSONL file
-- `--output-prefix`: Prefix for output binary files (.bin and .idx)
-- `--tokenizer-type`: Tokenizer type (`HuggingFaceTokenizer`, `GPT2BPETokenizer`, etc.)
-- `--tokenizer-model`: Path to tokenizer model file
-- `--workers`: Number of parallel workers for processing
-- `--append-eod`: Add end-of-document token
-
-<!-- **→ [Complete Data Preparation Guide](./docs/data-preparation.md)** - Comprehensive guide covering advanced preprocessing, dataset collection, deduplication, and optimization strategies -->
-
-# Parallelism Strategies
-
-## Data Parallelism (DP)
-
-### Standard Data Parallel
-```bash
-# Standard DDP - replicate model on each GPU
-torchrun --nproc_per_node=8 pretrain_gpt.py \
-    --data-parallel-sharding-strategy no_shard
-```
-
-### Fully Sharded Data Parallel (FSDP)
-```bash
-# Megatron's optimized FSDP (~15% faster than PyTorch FSDP2)
---use-custom-fsdp
-
-# PyTorch FSDP2
---use-torch-fsdp2
-
-# Sharding strategies
---data-parallel-sharding-strategy optim              # Shard optimizer states (ZeRO-1)
---data-parallel-sharding-strategy optim_grads        # Shard gradients + optimizer (ZeRO-2)
---data-parallel-sharding-strategy optim_grads_params # Shard parameters + gradients + optimizer (ZeRO-3)
-```
-
-## Tensor Parallelism (TP)
-Split individual model layers across GPUs:
-```bash
---tensor-model-parallel-size 4  # 4-way tensor parallelism
---sequence-parallel             # Enable sequence parallelism (recommended with TP)
-```
-
-## Pipeline Parallelism (PP)
-Split model depth across GPUs:
-```bash
---pipeline-model-parallel-size 8     # 8 pipeline stages
---virtual-pipeline-model-parallel-size 4  # Virtual pipeline for better load balancing
-```
-
-## Context Parallelism (CP)
-Split long sequences across GPUs for handling long contexts:
-```bash
---context-parallel-size 2                    # 2-way context parallelism
---cp-comm-type p2p                          # Communication: p2p, a2a, allgather, a2a+p2p
---hierarchical-context-parallel-sizes 2 4   # Hierarchical context parallelism
-```
-
-## Expert Parallelism (EP)
-For Mixture of Experts (MoE) models:
-```bash
---expert-model-parallel-size 4  # 4-way expert parallelism
---num-experts 8                 # 8 experts per MoE layer
---moe-grouped-gemm              # Optimize expert computation
-```
-
-## Combining Parallelism Strategies
-
-### Parallelism Selection Guide
-
-Based on [NVIDIA NeMo production configurations](https://github.com/NVIDIA/NeMo/tree/main/scripts/performance/recommended_model_configs):
-
-| Model | Size | GPUs | TP | PP | CP | EP | Notes |
-|-------|------|------|----|----|----|----|-------|
-| **LLama-3** | 8B | 8 | 1 | 1 | 2 | 1 | CP for long seqlen (8K) |
-| **LLama-3** | 70B | 64 | 4 | 4 | 2 | 1 | TP+PP |
-| **LLama-3.1** | 405B | 1024 | 8 | 8 | 2 | 1 | 3D parallelism for scale |
-| **GPT-3** | 175B | 128-512 | 4 | 8 | 1 | 1 | Large model config |
-| **Mixtral** | 8x7B | 64 | 1 | 4 | 1 | 8 | EP for MoE |
-| **Mixtral** | 8x22B | 256 | 4 | 4 | 8 | 8 | Combined TP+EP for large MoE |
-| **DeepSeek-V3** | 671B | 1024 | 2 | 16 | 1 | 64 | Large MoE config |
-
-### MoE-Specific Requirements
-
-**Important**: When combining Expert Parallelism (EP) with Tensor Parallelism (TP), **Sequence Parallelism (SP) must be enabled**.
-
-## Performance Optimizations
-
-| Feature | Flag | Benefit |
-|---------|------|---------|
-| **FlashAttention** | `--attention-backend` | Faster attention and lower memory usage |
-| **FP8 Training** | `--fp8-hybrid` | Faster training |
-| **Activation Checkpointing** | `--recompute-activations` | Reduced memory usage |
-| **Data Parallelism Communication Overlap** | `--overlap-grad-reduce` | Faster distributed training |
-| **Distributed Optimizer** | `--use-distributed-optimizer` | Reduced checkpointing time |
-
-**→ [NVIDIA NeMo Framework Performance Tuning Guide](https://docs.nvidia.com/nemo-framework/user-guide/latest/performance/performance-guide.html#performance-tuning-guide)** - Comprehensive performance optimization guide covering advanced tuning techniques, communication overlaps, memory optimizations, and profiling options.
-
-### FlashAttention
-[FlashAttention](https://github.com/Dao-AILab/flash-attention) is a fast and memory-efficient attention algorithm. We recommend the default usage, which uses cuDNN for attention via Transformer Engine and provides up to 50% speedups on forward and 84% on backward propagation with FP8 kernels. The `flash-attn` package is also supported via `--use-flash-attn`.
-
-### Mixed Precision Training
-```bash
---fp16                    # Standard FP16
---bf16                    # BFloat16 (recommended for large models)
---fp8-hybrid              # FP8 training (Hopper, Ada, and Blackwell GPUs)
-```
-
-### Activation Checkpointing and Recomputation
-```bash
-# For limited memory
---recompute-activations
-
-# For extreme memory constraints
---recompute-granularity full \
---recompute-method uniform
-```
-
-### Data Parallelism Communication Overlap
-
-```bash
---overlap-grad-reduce
---overlap-param-gather
-```
-
-### Distributed Optimizer
-```bash
---use-distributed-optimizer
-```
+### Fast Iteration
+- **Streamlined Review**: 1 code owner + 1 dev approver (can delegate review) + CI/CD
 
-# Roadmaps
+### Feature Lifecycle (Coming Soon)
+- **6-Month Timeline**: Experimental features must graduate to stable or be deprecated
+- **Migration Support**: Assistance provided for feature transitions
 
-Stay up-to-date with our development roadmaps and planned features:
+### Stability Expectations
+- **Experimental Nature**: Features may change or be removed as development progresses
+- **Testing**: All features will pass convergence and performance validation before inclusion
+- **Support**: Dev branch issues should include `[DEV]` prefix
 
-- **[MoE Q3-Q4 2025 Roadmap](https://github.com/NVIDIA/Megatron-LM/issues/1729)** - Comprehensive MoE feature development including DeepSeek-V3, Qwen3, advanced parallelism, FP8 optimizations, and Blackwell enhancements
-- **[GPT-OSS Implementation Tracker](https://github.com/NVIDIA/Megatron-LM/issues/1739)** - Advanced features including YaRN RoPE scaling, attention sinks, and custom activation functions
+## Performance & Benchmarking
 
-*More roadmap trackers will be added soon.*
+🚧 **Coming Soon** - We will update this section with performance benchmarks of experimental features as they become available.
 
-# Community & Support
+## Community & Support
 
-## Getting Help
+### Getting Help
 - 📖 **[Documentation](https://docs.nvidia.com/Megatron-Core/)** - Official documentation
 - 🐛 **[Issues](https://github.com/NVIDIA/Megatron-LM/issues)** - Bug reports and feature requests
 
-## Contributing
+### Contributing
 We ❤️ contributions! Ways to contribute:
 - 🐛 **Report bugs** - Help us improve reliability
 - 💡 **Suggest features** - Shape the future of Megatron Core
@@ -473,7 +77,7 @@ We ❤️ contributions! Ways to contribute:
 
 **→ [Contributing Guide](./CONTRIBUTING.md)**
 
-## Citation
+### Citation
 ```bibtex
 @article{megatron-lm,
   title={Megatron-LM: Training Multi-Billion Parameter Language Models Using Model Parallelism},

From eff3f6ab9f074a2f8882c3f222539e2d16912d60 Mon Sep 17 00:00:00 2001
From: Oliver Koenig <okoenig@nvidia.com>
Date: Sun, 12 Oct 2025 10:27:14 -0700
Subject: [PATCH 013/334] ADLR/megatron-lm!4223 - Ko3n1g/cp/4213 to dev

Co-authored-by: Mcore Bot <mcore-bot@nvidia.com>
---
 .../core/optimizer/layer_wise_optimizer.py    |  23 +-
 megatron/core/optimizer/optimizer_config.py   |   1 -
 .../python_test_utils/common.py               |  39 +-
 .../get_test_results_from_tensorboard_logs.py |   1 -
 .../shell_test_utils/run_ci_test.sh           |   5 +-
 .../golden_values_dev_dgx_h100.json           | 227 ++++++-
 .../golden_values_dev_dgxh100_coreweave.json  | 287 +++++++++
 .../golden_values_dev_dgxh100_eos.json        | 287 +++++++++
 .../golden_values_dev_dgx_h100.json           | 227 ++++++-
 .../golden_values_dev_dgxh100_coreweave.json  | 287 +++++++++
 .../golden_values_dev_dgxh100_eos.json        | 287 +++++++++
 .../golden_values_dev_dgx_h100.json           | 447 ++++++++++++-
 .../golden_values_dev_dgxh100_coreweave.json  | 537 +++++++++++++++
 .../golden_values_dev_dgxh100_eos.json        | 537 +++++++++++++++
 .../golden_values_dev_dgx_h100.json           | 447 ++++++++++++-
 .../golden_values_dev_dgxh100_coreweave.json  | 537 +++++++++++++++
 .../golden_values_dev_dgxh100_eos.json        | 537 +++++++++++++++
 .../golden_values_dev_dgx_h100.json           | 227 ++++++-
 .../golden_values_dev_dgxh100_coreweave.json  | 287 +++++++++
 .../golden_values_dev_dgxh100_eos.json        | 287 +++++++++
 .../golden_values_dev_dgx_h100.json           | 227 ++++++-
 .../golden_values_dev_dgxh100_coreweave.json  | 287 +++++++++
 .../golden_values_dev_dgxh100_eos.json        | 287 +++++++++
 .../golden_values_dev_dgx_h100.json           | 227 ++++++-
 .../golden_values_dev_dgxh100_coreweave.json  | 287 +++++++++
 .../golden_values_dev_dgxh100_eos.json        | 287 +++++++++
 .../golden_values_dev_dgx_h100.json           | 447 ++++++++++++-
 .../golden_values_dev_dgxh100_coreweave.json  | 537 +++++++++++++++
 .../golden_values_dev_dgxh100_eos.json        | 537 +++++++++++++++
 ...olden_values_lts_dgxa100_dracooci-ord.json | 537 +++++++++++++++
 .../golden_values_lts_dgxa100_dracooci.json   | 537 +++++++++++++++
 .../golden_values_dev_dgx_h100.json           | 227 ++++++-
 .../golden_values_dev_dgxh100_coreweave.json  | 287 +++++++++
 .../golden_values_dev_dgxh100_eos.json        | 287 +++++++++
 ...olden_values_lts_dgxa100_dracooci-ord.json | 287 +++++++++
 .../golden_values_lts_dgxa100_dracooci.json   | 287 +++++++++
 ...olden_values_dev_dgxa100_dracooci-ord.json | 537 +++++++++++++++
 .../golden_values_dev_dgxa100_dracooci.json   | 537 +++++++++++++++
 ...olden_values_lts_dgxa100_dracooci-ord.json | 537 +++++++++++++++
 .../golden_values_lts_dgxa100_dracooci.json   | 537 +++++++++++++++
 .../golden_values_dev_dgx_h100.json           | 227 ++++++-
 .../golden_values_dev_dgxh100_coreweave.json  | 287 +++++++++
 .../golden_values_dev_dgxh100_eos.json        | 287 +++++++++
 ...olden_values_lts_dgxa100_dracooci-ord.json | 287 +++++++++
 .../golden_values_lts_dgxa100_dracooci.json   | 287 +++++++++
 ...olden_values_dev_dgxa100_dracooci-ord.json | 537 +++++++++++++++
 .../golden_values_dev_dgxa100_dracooci.json   | 537 +++++++++++++++
 ...olden_values_lts_dgxa100_dracooci-ord.json | 537 +++++++++++++++
 .../golden_values_lts_dgxa100_dracooci.json   | 537 +++++++++++++++
 .../golden_values_dev_dgx_h100.json           | 447 ++++++++++++-
 .../golden_values_dev_dgxh100_coreweave.json  | 537 +++++++++++++++
 .../golden_values_dev_dgxh100_eos.json        | 537 +++++++++++++++
 ...olden_values_lts_dgxa100_dracooci-ord.json | 537 +++++++++++++++
 .../golden_values_lts_dgxa100_dracooci.json   | 537 +++++++++++++++
 .../golden_values_dev_dgx_h100.json           | 447 ++++++++++++-
 .../golden_values_dev_dgxh100_coreweave.json  | 537 +++++++++++++++
 .../golden_values_dev_dgxh100_eos.json        | 537 +++++++++++++++
 ...olden_values_lts_dgxa100_dracooci-ord.json | 537 +++++++++++++++
 .../golden_values_lts_dgxa100_dracooci.json   | 537 +++++++++++++++
 .../golden_values_dev_dgxh100_coreweave.json  | 162 +++++
 .../golden_values_dev_dgxh100_eos.json        | 162 +++++
 .../golden_values_dev_dgxh100_coreweave.json  | 162 +++++
 .../golden_values_dev_dgxh100_eos.json        | 162 +++++
 .../golden_values_dev_dgx_h100.json           | 288 ++++++++-
 .../golden_values_dev_dgxh100_coreweave.json  | 287 +++++++++
 .../golden_values_dev_dgxh100_eos.json        | 287 +++++++++
 ...olden_values_lts_dgxa100_dracooci-ord.json | 287 +++++++++
 .../golden_values_lts_dgxa100_dracooci.json   | 287 +++++++++
 .../golden_values_dev_dgx_h100.json           | 447 ++++++++++++-
 .../golden_values_dev_dgxh100_coreweave.json  | 537 +++++++++++++++
 .../golden_values_dev_dgxh100_eos.json        | 537 +++++++++++++++
 ...olden_values_lts_dgxa100_dracooci-ord.json | 537 +++++++++++++++
 .../golden_values_lts_dgxa100_dracooci.json   | 537 +++++++++++++++
 .../golden_values_dev_dgx_h100.json           | 538 +++++++++++++++-
 .../golden_values_dev_dgxh100_coreweave.json  | 537 +++++++++++++++
 .../golden_values_dev_dgxh100_eos.json        | 537 +++++++++++++++
 ...olden_values_lts_dgxa100_dracooci-ord.json | 537 +++++++++++++++
 .../golden_values_lts_dgxa100_dracooci.json   | 537 +++++++++++++++
 .../golden_values_dev_dgx_h100.json           | 447 ++++++++++++-
 .../golden_values_dev_dgxh100_coreweave.json  | 537 +++++++++++++++
 .../golden_values_dev_dgxh100_eos.json        | 537 +++++++++++++++
 .../golden_values_dev_dgx_h100.json           | 447 ++++++++++++-
 .../golden_values_dev_dgxh100_coreweave.json  | 537 +++++++++++++++
 .../golden_values_dev_dgxh100_eos.json        | 537 +++++++++++++++
 ...olden_values_lts_dgxa100_dracooci-ord.json | 537 +++++++++++++++
 .../golden_values_lts_dgxa100_dracooci.json   | 537 +++++++++++++++
 .../golden_values_dev_dgx_h100.json           | 447 ++++++++++++-
 .../golden_values_dev_dgxh100_coreweave.json  | 537 +++++++++++++++
 .../golden_values_dev_dgxh100_eos.json        | 537 +++++++++++++++
 .../golden_values_dev_dgx_h100.json           | 447 ++++++++++++-
 .../golden_values_dev_dgxh100_coreweave.json  | 537 +++++++++++++++
 .../golden_values_dev_dgxh100_eos.json        | 537 +++++++++++++++
 ...olden_values_lts_dgxa100_dracooci-ord.json | 537 +++++++++++++++
 .../golden_values_lts_dgxa100_dracooci.json   | 537 +++++++++++++++
 .../golden_values_dev_dgx_h100.json           | 447 ++++++++++++-
 .../golden_values_dev_dgxh100_coreweave.json  | 537 +++++++++++++++
 .../golden_values_dev_dgxh100_eos.json        | 537 +++++++++++++++
 .../golden_values_dev_dgx_h100.json           | 447 ++++++++++++-
 .../golden_values_dev_dgxh100_coreweave.json  | 537 +++++++++++++++
 .../golden_values_dev_dgxh100_eos.json        | 537 +++++++++++++++
 ...olden_values_lts_dgxa100_dracooci-ord.json | 537 +++++++++++++++
 .../golden_values_lts_dgxa100_dracooci.json   | 537 +++++++++++++++
 .../golden_values_dev_dgx_h100.json           | 288 ++++++++-
 .../golden_values_dev_dgxh100_coreweave.json  | 287 +++++++++
 .../golden_values_dev_dgxh100_eos.json        | 287 +++++++++
 ...olden_values_lts_dgxa100_dracooci-ord.json | 287 +++++++++
 .../golden_values_lts_dgxa100_dracooci.json   | 287 +++++++++
 .../golden_values_dev_dgx_h100.json           | 288 ++++++++-
 .../golden_values_dev_dgxh100_coreweave.json  | 287 +++++++++
 .../golden_values_dev_dgxh100_eos.json        | 287 +++++++++
 ...olden_values_lts_dgxa100_dracooci-ord.json | 287 +++++++++
 .../golden_values_lts_dgxa100_dracooci.json   | 287 +++++++++
 .../golden_values_dev_dgx_h100.json           | 538 +++++++++++++++-
 .../golden_values_dev_dgxh100_coreweave.json  | 537 +++++++++++++++
 .../golden_values_dev_dgxh100_eos.json        | 537 +++++++++++++++
 ...olden_values_lts_dgxa100_dracooci-ord.json | 537 +++++++++++++++
 .../golden_values_lts_dgxa100_dracooci.json   | 537 +++++++++++++++
 .../golden_values_dev_dgx_h100.json           | 538 +++++++++++++++-
 .../golden_values_dev_dgxh100_coreweave.json  | 537 +++++++++++++++
 .../golden_values_dev_dgxh100_eos.json        | 537 +++++++++++++++
 ...olden_values_lts_dgxa100_dracooci-ord.json | 537 +++++++++++++++
 .../golden_values_lts_dgxa100_dracooci.json   | 537 +++++++++++++++
 .../golden_values_dev_dgx_h100.json           | 538 +++++++++++++++-
 .../golden_values_dev_dgxh100_coreweave.json  | 537 +++++++++++++++
 .../golden_values_dev_dgxh100_eos.json        | 537 +++++++++++++++
 ...olden_values_lts_dgxa100_dracooci-ord.json | 537 +++++++++++++++
 .../golden_values_lts_dgxa100_dracooci.json   | 537 +++++++++++++++
 ...olden_values_lts_dgxa100_dracooci-ord.json | 537 +++++++++++++++
 .../golden_values_lts_dgxa100_dracooci.json   | 537 +++++++++++++++
 ...olden_values_lts_dgxa100_dracooci-ord.json | 537 +++++++++++++++
 .../golden_values_lts_dgxa100_dracooci.json   | 537 +++++++++++++++
 ...olden_values_lts_dgxa100_dracooci-ord.json | 537 +++++++++++++++
 .../golden_values_lts_dgxa100_dracooci.json   | 537 +++++++++++++++
 .../golden_values_dev_dgx_h100.json           | 288 ++++++++-
 .../golden_values_dev_dgxh100_coreweave.json  | 287 +++++++++
 .../golden_values_dev_dgxh100_eos.json        | 287 +++++++++
 ...olden_values_lts_dgxa100_dracooci-ord.json | 287 +++++++++
 .../golden_values_lts_dgxa100_dracooci.json   | 287 +++++++++
 .../golden_values_dev_dgx_h100.json           | 288 ++++++++-
 .../golden_values_dev_dgxh100_coreweave.json  | 287 +++++++++
 .../golden_values_dev_dgxh100_eos.json        | 287 +++++++++
 ...olden_values_lts_dgxa100_dracooci-ord.json | 287 +++++++++
 .../golden_values_lts_dgxa100_dracooci.json   | 287 +++++++++
 .../golden_values_dev_dgx_h100.json           | 288 ++++++++-
 .../golden_values_dev_dgxh100_coreweave.json  | 287 +++++++++
 .../golden_values_dev_dgxh100_eos.json        | 287 +++++++++
 ...olden_values_lts_dgxa100_dracooci-ord.json | 287 +++++++++
 .../golden_values_lts_dgxa100_dracooci.json   | 287 +++++++++
 .../golden_values_dev_dgx_h100.json           | 609 +++++++++++++++---
 .../golden_values_dev_dgxh100_coreweave.json  | 537 +++++++++++++++
 .../golden_values_dev_dgxh100_eos.json        | 537 +++++++++++++++
 ...olden_values_lts_dgxa100_dracooci-ord.json | 537 +++++++++++++++
 .../golden_values_lts_dgxa100_dracooci.json   | 537 +++++++++++++++
 .../golden_values_dev_dgx_h100.json           | 447 ++++++++++++-
 .../golden_values_dev_dgxh100_coreweave.json  | 537 +++++++++++++++
 .../golden_values_dev_dgxh100_eos.json        | 537 +++++++++++++++
 ...olden_values_lts_dgxa100_dracooci-ord.json | 537 +++++++++++++++
 .../golden_values_lts_dgxa100_dracooci.json   | 537 +++++++++++++++
 .../golden_values_dev_dgx_h100.json           | 271 ++++++--
 .../golden_values_dev_dgxh100_coreweave.json  | 287 +++++++++
 .../golden_values_dev_dgxh100_eos.json        | 287 +++++++++
 .../golden_values_dev_dgx_h100.json           | 271 ++++++--
 .../golden_values_dev_dgxh100_coreweave.json  | 287 +++++++++
 .../golden_values_dev_dgxh100_eos.json        | 287 +++++++++
 ...olden_values_lts_dgxa100_dracooci-ord.json | 287 +++++++++
 .../golden_values_lts_dgxa100_dracooci.json   | 287 +++++++++
 .../golden_values_dev_dgx_h100.json           | 247 ++++++-
 .../golden_values_dev_dgxh100_coreweave.json  | 287 +++++++++
 .../golden_values_dev_dgxh100_eos.json        | 287 +++++++++
 .../golden_values_dev_dgx_h100.json           | 288 ++++++++-
 .../golden_values_dev_dgxh100_coreweave.json  | 287 +++++++++
 .../golden_values_dev_dgxh100_eos.json        | 287 +++++++++
 .../golden_values_dev_dgx_h100.json           | 263 +++++++-
 .../golden_values_dev_dgxh100_coreweave.json  | 287 +++++++++
 .../golden_values_dev_dgxh100_eos.json        | 287 +++++++++
 ...olden_values_lts_dgxa100_dracooci-ord.json | 287 +++++++++
 .../golden_values_lts_dgxa100_dracooci.json   | 287 +++++++++
 .../golden_values_dev_dgx_h100.json           | 271 ++++++--
 .../golden_values_dev_dgxh100_coreweave.json  | 287 +++++++++
 .../golden_values_dev_dgxh100_eos.json        | 287 +++++++++
 .../golden_values_dev_dgx_h100.json           | 281 ++++++--
 .../golden_values_dev_dgxh100_coreweave.json  | 287 +++++++++
 .../golden_values_dev_dgxh100_eos.json        | 287 +++++++++
 ...olden_values_lts_dgxa100_dracooci-ord.json | 287 +++++++++
 .../golden_values_lts_dgxa100_dracooci.json   | 287 +++++++++
 .../golden_values_dev_dgx_h100.json           | 285 ++++++--
 .../golden_values_dev_dgxh100_coreweave.json  | 287 +++++++++
 .../golden_values_dev_dgxh100_eos.json        | 287 +++++++++
 ...olden_values_lts_dgxa100_dracooci-ord.json | 287 +++++++++
 .../golden_values_lts_dgxa100_dracooci.json   | 287 +++++++++
 .../golden_values_dev_dgx_h100.json           | 243 ++++++-
 .../golden_values_dev_dgxh100_coreweave.json  | 287 +++++++++
 .../golden_values_dev_dgxh100_eos.json        | 287 +++++++++
 ...olden_values_lts_dgxa100_dracooci-ord.json | 287 +++++++++
 .../golden_values_lts_dgxa100_dracooci.json   | 287 +++++++++
 .../golden_values_dev_dgx_h100.json           | 227 ++++++-
 .../golden_values_dev_dgxh100_coreweave.json  | 287 +++++++++
 .../golden_values_dev_dgxh100_eos.json        | 287 +++++++++
 .../golden_values_dev_dgx_h100.json           | 521 +++++++++++++--
 .../golden_values_dev_dgxh100_coreweave.json  | 537 +++++++++++++++
 .../golden_values_dev_dgxh100_eos.json        | 537 +++++++++++++++
 ...olden_values_lts_dgxa100_dracooci-ord.json | 537 +++++++++++++++
 .../golden_values_lts_dgxa100_dracooci.json   | 537 +++++++++++++++
 .../golden_values_dev_dgx_h100.json           | 481 ++++++++++++--
 .../golden_values_dev_dgxh100_coreweave.json  | 537 +++++++++++++++
 .../golden_values_dev_dgxh100_eos.json        | 537 +++++++++++++++
 ...olden_values_lts_dgxa100_dracooci-ord.json | 537 +++++++++++++++
 .../golden_values_lts_dgxa100_dracooci.json   | 537 +++++++++++++++
 .../golden_values_dev_dgx_h100.json           | 538 +++++++++++++++-
 .../golden_values_dev_dgxh100_coreweave.json  | 537 +++++++++++++++
 .../golden_values_dev_dgxh100_eos.json        | 537 +++++++++++++++
 ...olden_values_lts_dgxa100_dracooci-ord.json | 537 +++++++++++++++
 .../golden_values_lts_dgxa100_dracooci.json   | 537 +++++++++++++++
 .../golden_values_dev_dgx_h100.json           | 538 +++++++++++++++-
 .../golden_values_dev_dgxh100_coreweave.json  | 537 +++++++++++++++
 .../golden_values_dev_dgxh100_eos.json        | 537 +++++++++++++++
 ...olden_values_lts_dgxa100_dracooci-ord.json | 537 +++++++++++++++
 .../golden_values_lts_dgxa100_dracooci.json   | 537 +++++++++++++++
 .../golden_values_dev_dgx_h100.json           | 447 ++++++++++++-
 .../golden_values_dev_dgxh100_coreweave.json  | 537 +++++++++++++++
 .../golden_values_dev_dgxh100_eos.json        | 537 +++++++++++++++
 ...olden_values_lts_dgxa100_dracooci-ord.json | 537 +++++++++++++++
 .../golden_values_lts_dgxa100_dracooci.json   | 537 +++++++++++++++
 ...olden_values_lts_dgxa100_dracooci-ord.json | 537 +++++++++++++++
 .../golden_values_lts_dgxa100_dracooci.json   | 537 +++++++++++++++
 .../golden_values_dev_dgx_h100.json           | 487 ++++++++++++--
 .../golden_values_dev_dgxh100_coreweave.json  | 537 +++++++++++++++
 .../golden_values_dev_dgxh100_eos.json        | 537 +++++++++++++++
 ...olden_values_lts_dgxa100_dracooci-ord.json | 537 +++++++++++++++
 .../golden_values_lts_dgxa100_dracooci.json   | 537 +++++++++++++++
 .../golden_values_dev_dgx_h100.json           | 447 ++++++++++++-
 .../golden_values_dev_dgxh100_coreweave.json  | 537 +++++++++++++++
 .../golden_values_dev_dgxh100_eos.json        | 537 +++++++++++++++
 .../golden_values_dev_dgx_h100.json           | 288 ++++++++-
 .../golden_values_dev_dgxh100_coreweave.json  | 287 +++++++++
 .../golden_values_dev_dgxh100_eos.json        | 287 +++++++++
 ...olden_values_lts_dgxa100_dracooci-ord.json | 287 +++++++++
 .../golden_values_lts_dgxa100_dracooci.json   | 287 +++++++++
 .../golden_values_dev_dgx_h100.json           | 538 +++++++++++++++-
 .../golden_values_dev_dgxh100_coreweave.json  | 537 +++++++++++++++
 .../golden_values_dev_dgxh100_eos.json        | 537 +++++++++++++++
 ...olden_values_lts_dgxa100_dracooci-ord.json | 537 +++++++++++++++
 .../golden_values_lts_dgxa100_dracooci.json   | 537 +++++++++++++++
 .../golden_values_dev_dgx_h100.json           | 447 ++++++++++++-
 .../golden_values_dev_dgxh100_coreweave.json  | 537 +++++++++++++++
 .../golden_values_dev_dgxh100_eos.json        | 537 +++++++++++++++
 .../golden_values_lts_dgx_a100.json           | 538 +++++++++++++++-
 ...olden_values_lts_dgxa100_dracooci-ord.json | 537 +++++++++++++++
 .../golden_values_lts_dgxa100_dracooci.json   | 537 +++++++++++++++
 .../golden_values_dev_dgx_h100.json           | 538 +++++++++++++++-
 .../golden_values_dev_dgxh100_coreweave.json  | 537 +++++++++++++++
 .../golden_values_dev_dgxh100_eos.json        | 537 +++++++++++++++
 ...olden_values_lts_dgxa100_dracooci-ord.json | 537 +++++++++++++++
 .../golden_values_lts_dgxa100_dracooci.json   | 537 +++++++++++++++
 .../golden_values_dev_dgx_h100.json           | 447 ++++++++++++-
 .../golden_values_dev_dgxh100_coreweave.json  | 537 +++++++++++++++
 .../golden_values_dev_dgxh100_eos.json        | 537 +++++++++++++++
 .../golden_values_lts_dgx_a100.json           | 538 +++++++++++++++-
 ...olden_values_lts_dgxa100_dracooci-ord.json | 537 +++++++++++++++
 .../golden_values_lts_dgxa100_dracooci.json   | 537 +++++++++++++++
 .../golden_values_dev_dgxh100_coreweave.json  |   1 +
 .../golden_values_dev_dgxh100_eos.json        |   1 +
 .../golden_values_dev_dgxh100_coreweave.json  |   1 +
 .../golden_values_dev_dgxh100_eos.json        |   1 +
 .../golden_values_dev_dgxh100_coreweave.json  |   1 +
 .../golden_values_dev_dgxh100_eos.json        |   1 +
 .../golden_values_dev_dgxh100_coreweave.json  |   1 +
 .../golden_values_dev_dgxh100_eos.json        |   1 +
 .../golden_values_dev_dgx_h100.json           | 288 ++++++++-
 .../golden_values_dev_dgxh100_coreweave.json  | 287 +++++++++
 .../golden_values_dev_dgxh100_eos.json        | 287 +++++++++
 .../golden_values_dev_dgx_h100.json           | 265 +++++++-
 .../golden_values_dev_dgxh100_coreweave.json  | 287 +++++++++
 .../golden_values_dev_dgxh100_eos.json        | 287 +++++++++
 .../golden_values_dev_dgx_h100.json           | 269 ++++++--
 .../golden_values_dev_dgxh100_coreweave.json  | 287 +++++++++
 .../golden_values_dev_dgxh100_eos.json        | 287 +++++++++
 .../golden_values_dev_dgxh100_coreweave.json  |   1 +
 .../golden_values_dev_dgxh100_eos.json        |   1 +
 .../golden_values_dev_dgxh100_coreweave.json  |   1 +
 .../golden_values_dev_dgxh100_eos.json        |   1 +
 .../golden_values_dev_dgx_h100.json           | 297 +++++++--
 ...olden_values_dev_dgxa100_dracooci-ord.json | 287 +++++++++
 .../golden_values_dev_dgxa100_dracooci.json   | 287 +++++++++
 .../golden_values_dev_dgxh100_coreweave.json  | 287 +++++++++
 .../golden_values_dev_dgxh100_eos.json        | 287 +++++++++
 ...olden_values_lts_dgxa100_dracooci-ord.json | 287 +++++++++
 .../golden_values_lts_dgxa100_dracooci.json   | 287 +++++++++
 .../golden_values_dev_dgx_h100.json           | 301 +++++++--
 ...olden_values_dev_dgxa100_dracooci-ord.json | 287 +++++++++
 .../golden_values_dev_dgxa100_dracooci.json   | 287 +++++++++
 .../golden_values_dev_dgxh100_coreweave.json  | 287 +++++++++
 .../golden_values_dev_dgxh100_eos.json        | 287 +++++++++
 .../golden_values_lts_dgx_a100.json           | 311 +++++++--
 ...olden_values_lts_dgxa100_dracooci-ord.json | 287 +++++++++
 .../golden_values_lts_dgxa100_dracooci.json   | 287 +++++++++
 .../golden_values_dev_dgx_h100.json           | 227 ++++++-
 ...olden_values_dev_dgxa100_dracooci-ord.json | 287 +++++++++
 .../golden_values_dev_dgxa100_dracooci.json   | 287 +++++++++
 .../golden_values_dev_dgxh100_coreweave.json  | 287 +++++++++
 .../golden_values_dev_dgxh100_eos.json        | 287 +++++++++
 .../golden_values_dev_dgx_h100.json           | 227 ++++++-
 ...olden_values_dev_dgxa100_dracooci-ord.json | 287 +++++++++
 .../golden_values_dev_dgxa100_dracooci.json   | 287 +++++++++
 .../golden_values_dev_dgxh100_coreweave.json  | 287 +++++++++
 .../golden_values_dev_dgxh100_eos.json        | 287 +++++++++
 .../golden_values_dev_dgx_h100.json           | 227 ++++++-
 ...olden_values_dev_dgxa100_dracooci-ord.json | 287 +++++++++
 .../golden_values_dev_dgxa100_dracooci.json   | 287 +++++++++
 .../golden_values_dev_dgxh100_coreweave.json  | 287 +++++++++
 .../golden_values_dev_dgxh100_eos.json        | 287 +++++++++
 ...olden_values_lts_dgxa100_dracooci-ord.json | 287 +++++++++
 .../golden_values_lts_dgxa100_dracooci.json   | 287 +++++++++
 .../golden_values_dev_dgx_h100.json           | 227 ++++++-
 ...olden_values_dev_dgxa100_dracooci-ord.json | 287 +++++++++
 .../golden_values_dev_dgxa100_dracooci.json   | 287 +++++++++
 .../golden_values_dev_dgxh100_coreweave.json  | 287 +++++++++
 .../golden_values_dev_dgxh100_eos.json        | 287 +++++++++
 ...olden_values_lts_dgxa100_dracooci-ord.json | 287 +++++++++
 .../golden_values_lts_dgxa100_dracooci.json   | 287 +++++++++
 .../golden_values_dev_dgx_h100.json           | 227 ++++++-
 ...olden_values_dev_dgxa100_dracooci-ord.json | 287 +++++++++
 .../golden_values_dev_dgxa100_dracooci.json   | 287 +++++++++
 .../golden_values_dev_dgxh100_coreweave.json  | 287 +++++++++
 .../golden_values_dev_dgxh100_eos.json        | 287 +++++++++
 ...olden_values_lts_dgxa100_dracooci-ord.json | 287 +++++++++
 .../golden_values_lts_dgxa100_dracooci.json   | 287 +++++++++
 .../golden_values_dev_dgxh100_coreweave.json  | 344 ++++++++++
 .../golden_values_dev_dgxh100_eos.json        | 344 ++++++++++
 .../golden_values_dev_dgx_h100.json           | 447 ++++++++++++-
 .../golden_values_dev_dgxh100_coreweave.json  | 537 +++++++++++++++
 .../golden_values_dev_dgxh100_eos.json        | 537 +++++++++++++++
 .../golden_values_dev_dgx_h100.json           | 447 ++++++++++++-
 .../golden_values_dev_dgxh100_coreweave.json  | 537 +++++++++++++++
 .../golden_values_dev_dgxh100_eos.json        | 537 +++++++++++++++
 .../golden_values_dev_dgx_h100.json           | 597 ++++++++++++++---
 .../golden_values_dev_dgxh100_coreweave.json  | 537 +++++++++++++++
 .../golden_values_dev_dgxh100_eos.json        | 537 +++++++++++++++
 .../golden_values_dev_dgx_h100.json           | 447 ++++++++++++-
 .../golden_values_dev_dgxh100_coreweave.json  | 537 +++++++++++++++
 .../golden_values_dev_dgxh100_eos.json        | 537 +++++++++++++++
 .../golden_values_dev_dgx_h100.json           | 447 ++++++++++++-
 .../golden_values_dev_dgxh100_coreweave.json  | 537 +++++++++++++++
 .../golden_values_dev_dgxh100_eos.json        | 537 +++++++++++++++
 .../golden_values_dev_dgx_h100.json           | 227 ++++++-
 .../golden_values_dev_dgxh100_coreweave.json  | 287 +++++++++
 .../golden_values_dev_dgxh100_eos.json        | 287 +++++++++
 ...olden_values_lts_dgxa100_dracooci-ord.json | 287 +++++++++
 .../golden_values_lts_dgxa100_dracooci.json   | 287 +++++++++
 .../golden_values_dev_dgx_h100.json           | 227 ++++++-
 .../golden_values_dev_dgxh100_coreweave.json  | 287 +++++++++
 .../golden_values_dev_dgxh100_eos.json        | 287 +++++++++
 ...olden_values_lts_dgxa100_dracooci-ord.json | 287 +++++++++
 .../golden_values_lts_dgxa100_dracooci.json   | 287 +++++++++
 .../golden_values_dev_dgx_h100.json           | 271 ++++++--
 .../golden_values_dev_dgxh100_coreweave.json  | 287 +++++++++
 .../golden_values_dev_dgxh100_eos.json        | 287 +++++++++
 .../golden_values_dev_dgxh100_coreweave.json  | 287 +++++++++
 .../golden_values_dev_dgxh100_eos.json        | 287 +++++++++
 .../golden_values_dev_dgxh100_coreweave.json  | 160 +++++
 .../golden_values_dev_dgxh100_eos.json        | 160 +++++
 .../golden_values_dev_dgxh100_coreweave.json  |   1 +
 .../golden_values_dev_dgxh100_eos.json        |   1 +
 .../golden_values_dev_dgxh100_coreweave.json  |   1 +
 .../golden_values_dev_dgxh100_eos.json        |   1 +
 .../golden_values_dev_dgxh100_coreweave.json  | 287 +++++++++
 .../golden_values_dev_dgxh100_eos.json        | 287 +++++++++
 .../golden_values_dev_dgx_h100.json           | 288 ++++++++-
 .../golden_values_dev_dgxh100_coreweave.json  | 287 +++++++++
 .../golden_values_dev_dgxh100_eos.json        | 287 +++++++++
 .../golden_values_dev_dgx_h100.json           | 163 ++++-
 .../golden_values_dev_dgxh100_coreweave.json  | 162 +++++
 .../golden_values_dev_dgxh100_eos.json        | 162 +++++
 .../golden_values_dev_dgx_h100.json           | 447 ++++++++++++-
 .../golden_values_dev_dgxh100_coreweave.json  | 537 +++++++++++++++
 .../golden_values_dev_dgxh100_eos.json        | 537 +++++++++++++++
 .../golden_values_dev_dgx_h100.json           | 447 ++++++++++++-
 .../golden_values_dev_dgxh100_coreweave.json  | 537 +++++++++++++++
 .../golden_values_dev_dgxh100_eos.json        | 537 +++++++++++++++
 .../golden_values_dev_dgx_h100.json           | 447 ++++++++++++-
 .../golden_values_dev_dgxh100_coreweave.json  | 537 +++++++++++++++
 .../golden_values_dev_dgxh100_eos.json        | 537 +++++++++++++++
 .../golden_values_dev_dgx_h100.json           | 447 ++++++++++++-
 .../golden_values_dev_dgxh100_coreweave.json  | 537 +++++++++++++++
 .../golden_values_dev_dgxh100_eos.json        | 537 +++++++++++++++
 .../golden_values_dev_dgx_h100.json           | 447 ++++++++++++-
 ...olden_values_dev_dgxa100_dracooci-ord.json | 537 +++++++++++++++
 .../golden_values_dev_dgxa100_dracooci.json   | 537 +++++++++++++++
 .../golden_values_dev_dgxh100_coreweave.json  | 537 +++++++++++++++
 .../golden_values_dev_dgxh100_eos.json        | 537 +++++++++++++++
 .../golden_values_dev_dgx_h100.json           | 447 ++++++++++++-
 ...olden_values_dev_dgxa100_dracooci-ord.json | 537 +++++++++++++++
 .../golden_values_dev_dgxa100_dracooci.json   | 537 +++++++++++++++
 .../golden_values_dev_dgxh100_coreweave.json  | 537 +++++++++++++++
 .../golden_values_dev_dgxh100_eos.json        | 537 +++++++++++++++
 .../golden_values_dev_dgx_h100.json           | 447 ++++++++++++-
 ...olden_values_dev_dgxa100_dracooci-ord.json | 537 +++++++++++++++
 .../golden_values_dev_dgxa100_dracooci.json   | 537 +++++++++++++++
 .../golden_values_dev_dgxh100_coreweave.json  | 537 +++++++++++++++
 .../golden_values_dev_dgxh100_eos.json        | 537 +++++++++++++++
 .../golden_values_dev_dgx_h100.json           | 447 ++++++++++++-
 ...olden_values_dev_dgxa100_dracooci-ord.json | 537 +++++++++++++++
 .../golden_values_dev_dgxa100_dracooci.json   | 537 +++++++++++++++
 .../golden_values_dev_dgxh100_coreweave.json  | 537 +++++++++++++++
 .../golden_values_dev_dgxh100_eos.json        | 537 +++++++++++++++
 .../golden_values_dev_dgx_h100.json           | 447 ++++++++++++-
 ...olden_values_dev_dgxa100_dracooci-ord.json | 537 +++++++++++++++
 .../golden_values_dev_dgxa100_dracooci.json   | 537 +++++++++++++++
 .../golden_values_dev_dgxh100_coreweave.json  | 537 +++++++++++++++
 .../golden_values_dev_dgxh100_eos.json        | 537 +++++++++++++++
 .../golden_values_dev_dgx_h100.json           | 447 ++++++++++++-
 ...olden_values_dev_dgxa100_dracooci-ord.json | 537 +++++++++++++++
 .../golden_values_dev_dgxa100_dracooci.json   | 537 +++++++++++++++
 .../golden_values_dev_dgxh100_coreweave.json  | 537 +++++++++++++++
 .../golden_values_dev_dgxh100_eos.json        | 537 +++++++++++++++
 .../python_scripts/download_golden_values.py  |  40 +-
 .../python_scripts/launch_jet_workload.py     |   7 +-
 tests/test_utils/recipes/bert.yaml            |   2 +-
 ...pt-dynamic-inference-with-coordinator.yaml |   3 +-
 .../recipes/gpt-dynamic-inference.yaml        |   3 +-
 tests/test_utils/recipes/gpt-grads.yaml       |   2 +-
 tests/test_utils/recipes/gpt-nemo.yaml        |   2 +-
 .../recipes/gpt-static-inference.yaml         |   4 +-
 tests/test_utils/recipes/gpt.yaml             |   2 +-
 .../recipes/mamba-static-inference.yaml       |   4 +-
 tests/test_utils/recipes/mamba.yaml           |   2 +-
 tests/test_utils/recipes/mimo.yaml            |   2 +-
 .../recipes/moe-dynamic-inference.yaml        |   6 +-
 .../recipes/moe-static-inference.yaml         |   6 +-
 tests/test_utils/recipes/moe.yaml             |  14 +-
 .../test_utils/recipes/multimodal-llava.yaml  |   2 +-
 tests/test_utils/recipes/t5.yaml              |   2 +-
 tests/unit_tests/test_muon_optimizer.py       |   7 +-
 433 files changed, 158359 insertions(+), 2068 deletions(-)
 create mode 100644 tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_local_spec_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_local_spec_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_local_spec_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_local_spec_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp4_vp2/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp4_vp2/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp4_pp1/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp4_pp1/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgxa100_dracooci-ord.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgxa100_dracooci.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_lts_dgxa100_dracooci-ord.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_lts_dgxa100_dracooci.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_resume_torch_dist/golden_values_dev_dgxa100_dracooci-ord.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_resume_torch_dist/golden_values_dev_dgxa100_dracooci.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_resume_torch_dist/golden_values_lts_dgxa100_dracooci-ord.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_resume_torch_dist/golden_values_lts_dgxa100_dracooci.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4/golden_values_lts_dgxa100_dracooci-ord.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4/golden_values_lts_dgxa100_dracooci.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_resume_torch_dist/golden_values_dev_dgxa100_dracooci-ord.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_resume_torch_dist/golden_values_dev_dgxa100_dracooci.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_resume_torch_dist/golden_values_lts_dgxa100_dracooci-ord.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_resume_torch_dist/golden_values_lts_dgxa100_dracooci.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch/golden_values_lts_dgxa100_dracooci-ord.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch/golden_values_lts_dgxa100_dracooci.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist/golden_values_lts_dgxa100_dracooci-ord.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist/golden_values_lts_dgxa100_dracooci.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_7b_mr_dgx_a100_1N8G_tp1_pp4_memory_speed/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_7b_mr_dgx_a100_1N8G_tp1_pp4_memory_speed/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_7b_mr_dgx_a100_1N8G_tp4_pp1_memory_speed/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_7b_mr_dgx_a100_1N8G_tp4_pp1_memory_speed/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_swiglu_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_swiglu_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_dp_last_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_dp_last_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_mla_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_mla_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_zp_z3_resume_fsdp_dtensor_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_zp_z3_resume_fsdp_dtensor_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_16b_multiprompt_tokensmatch/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_16b_multiprompt_tokensmatch/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_cudagraphs/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_cudagraphs/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_fp8_cudagraphs/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_fp8_cudagraphs/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_logitsmatch/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_logitsmatch/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp1_cp1_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp1_cp1_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp1_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp1_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp4_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp4_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/hybrid/hybrid_static_inference_tp1_pp1_2B_cudagraphs/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/hybrid/hybrid_static_inference_tp1_pp1_2B_cudagraphs/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/hybrid/hybrid_static_inference_tp1_pp1_2B_logitsmatch/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/hybrid/hybrid_static_inference_tp1_pp1_2B_logitsmatch/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgxa100_dracooci-ord.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgxa100_dracooci.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_lts_dgxa100_dracooci-ord.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_lts_dgxa100_dracooci.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgxa100_dracooci-ord.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgxa100_dracooci.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_lts_dgxa100_dracooci-ord.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_lts_dgxa100_dracooci.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci-ord.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgxa100_dracooci-ord.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgxa100_dracooci.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci-ord.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_lts_dgxa100_dracooci-ord.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_lts_dgxa100_dracooci.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgxa100_dracooci-ord.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgxa100_dracooci.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_lts_dgxa100_dracooci-ord.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_lts_dgxa100_dracooci.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci-ord.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_lts_dgxa100_dracooci-ord.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_lts_dgxa100_dracooci.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts_etp1_ep4_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts_etp1_ep4_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt_dynamic_inference_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt_dynamic_inference_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt_static_inference_tp1_pp1_ep1_16B_logitsmatch/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt_static_inference_tp1_pp1_ep1_16B_logitsmatch/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt_static_inference_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt_static_inference_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp1_pp1_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp1_pp1_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp4_sp_cp2_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp4_sp_cp2_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/t5/t5_11b_mr_mcore_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/t5/t5_11b_mr_mcore_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxa100_dracooci-ord.json
 create mode 100644 tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxa100_dracooci.json
 create mode 100644 tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1/golden_values_dev_dgxa100_dracooci-ord.json
 create mode 100644 tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1/golden_values_dev_dgxa100_dracooci.json
 create mode 100644 tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgxa100_dracooci-ord.json
 create mode 100644 tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgxa100_dracooci.json
 create mode 100644 tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1/golden_values_dev_dgxa100_dracooci-ord.json
 create mode 100644 tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1/golden_values_dev_dgxa100_dracooci.json
 create mode 100644 tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxa100_dracooci-ord.json
 create mode 100644 tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxa100_dracooci.json
 create mode 100644 tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp2_pp1_vp1/golden_values_dev_dgxa100_dracooci-ord.json
 create mode 100644 tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp2_pp1_vp1/golden_values_dev_dgxa100_dracooci.json
 create mode 100644 tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp2_pp1_vp1/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp2_pp1_vp1/golden_values_dev_dgxh100_eos.json

diff --git a/megatron/core/optimizer/layer_wise_optimizer.py b/megatron/core/optimizer/layer_wise_optimizer.py
index b398a645ce3..6c77be48e30 100644
--- a/megatron/core/optimizer/layer_wise_optimizer.py
+++ b/megatron/core/optimizer/layer_wise_optimizer.py
@@ -4,13 +4,13 @@
 
 import torch
 
-from .optimizer import ChainedOptimizer, MegatronOptimizer, Float16OptimizerWithFloat16Params
-from .optimizer_config import OptimizerConfig
-from .clip_grads import clip_grad_by_total_norm_fp32, count_zeros_fp32, get_grad_norm_fp32
-
 from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.utils import get_pg_rank, get_pg_size
 
+from .clip_grads import clip_grad_by_total_norm_fp32, count_zeros_fp32, get_grad_norm_fp32
+from .optimizer import ChainedOptimizer, Float16OptimizerWithFloat16Params, MegatronOptimizer
+from .optimizer_config import OptimizerConfig
+
 
 class LayerWiseDistributedOptimizer(ChainedOptimizer):
     """Layer-wise distributed optimizer for Megatron-core models.
@@ -27,6 +27,7 @@ class LayerWiseDistributedOptimizer(ChainedOptimizer):
     4. Do regular update with chained optimizers, optimizer is already modified so partial update happens
     5. allgather updated params to every rank(currently through broadcast loop)
     """
+
     def __init__(
         self,
         optimizers: List[MegatronOptimizer],
@@ -40,7 +41,9 @@ def __init__(
         if config.bf16:
             if isinstance(optimizers[0], Float16OptimizerWithFloat16Params):
                 raise TypeError('LayerWiseDistributedOptimizer received Float16 optimizer already.')
-            optimizers = [Float16OptimizerWithFloat16Params(optim, config, None, None) for optim in optimizers]
+            optimizers = [
+                Float16OptimizerWithFloat16Params(optim, config, None, None) for optim in optimizers
+            ]
         super().__init__(optimizers)
 
         # TODO(kunlun, deyuf): potential future perf optimization
@@ -50,7 +53,7 @@ def __init__(
         # single allgather later and all current distopt optimization can be applied
 
     def shard_params(self, optimizers):
-        """Shard all params into lists by rank. """
+        """Shard all params into lists by rank."""
         # We'll optimize sharding later if there is perf issue. should be ok since linear are grouped already
         # Key is to create separate sharding for dp/expt parallel, saved in dp_cp_params_list, expt_dp_params_list
         # example of 4 dp rank and 10 non-expert parameters p0-p9, then dp_cp_params_list will look like
@@ -94,7 +97,7 @@ def shard_params(self, optimizers):
 
     @torch.no_grad()
     def broadcast_params(self):
-        """All rank broadcast updated local params(allgatherv). """
+        """All rank broadcast updated local params(allgatherv)."""
         # Broadcast linear layer weights to all other ranks.
         # This may not be slower than PyTorch allgatherv which calls broadcast internally.
         # TODO(skyw): Profile and implement more efficient version.
@@ -117,9 +120,7 @@ def get_grad_norm(self):
         grads_for_norm = []
         for optimizer in self.chained_optimizers:
             grads_for_norm += optimizer.get_main_grads_for_grad_norm()
-        grad_norm = get_grad_norm_fp32(
-            grads_for_norm, grad_stats_parallel_group=None
-        )
+        grad_norm = get_grad_norm_fp32(grads_for_norm, grad_stats_parallel_group=None)
         return grad_norm
 
     @torch.no_grad()
@@ -154,5 +155,3 @@ def save_state_dict_to_file(self, filename: str) -> None:
     def load_state_dict_from_file(self, filename: str) -> None:
         """Load the parameter state of the optimizer."""
         super().load_state_dict(torch.load(filename))
-
-
diff --git a/megatron/core/optimizer/optimizer_config.py b/megatron/core/optimizer/optimizer_config.py
index 65e1fd6a71f..ced3845804f 100644
--- a/megatron/core/optimizer/optimizer_config.py
+++ b/megatron/core/optimizer/optimizer_config.py
@@ -146,7 +146,6 @@ class OptimizerConfig:
     muon_extra_scale_factor: float = 1.0
     """Additional scale factor for the muon update."""
 
-
     #######################
     # Distributed optimizer
     #######################
diff --git a/tests/functional_tests/python_test_utils/common.py b/tests/functional_tests/python_test_utils/common.py
index 23d512f1125..4af4bd36167 100644
--- a/tests/functional_tests/python_test_utils/common.py
+++ b/tests/functional_tests/python_test_utils/common.py
@@ -218,25 +218,18 @@ def pipeline(
                 ]
 
                 if metric_name == "iteration-time":
-                    if len(actual_value_list) >= 10:
-                        actual_value_list = actual_value_list[3:-3]
-                        golden_value_list = golden_value_list[3:-3]
-                        total_steps_evaluated = (
-                            golden_value.end_step / golden_value.step_interval + 1 - 3 - 3
-                        )
-                    else:
-                        actual_value_list = actual_value_list[3:-1]
-                        golden_value_list = golden_value_list[3:-1]
-                        total_steps_evaluated = (
-                            golden_value.end_step / golden_value.step_interval + 1 - 3 - 1
-                        )
-                    logger.info(
-                        "For metric `%s`, the first and last 3 scalars are removed from the list to reduce noise.",
-                        metric_name,
-                    )
-
-                actual_value_list = [np.inf if type(v) is str else v for v in actual_value_list]
-                golden_value_list = [np.inf if type(v) is str else v for v in golden_value_list]
+                    actual_value_list = [
+                        np.median([np.inf if type(v) is str else v for v in actual_value_list])
+                    ]
+                    golden_value_list = [
+                        np.median([np.inf if type(v) is str else v for v in golden_value_list])
+                    ]
+                    total_steps_evaluated = 1
+                else:
+                    total_steps_evaluated = golden_value.end_step / golden_value.step_interval + 1
+
+                    actual_value_list = [np.inf if type(v) is str else v for v in actual_value_list]
+                    golden_value_list = [np.inf if type(v) is str else v for v in golden_value_list]
 
                 actual = np.array(actual_value_list)
                 golden = np.array(golden_value_list)
@@ -248,8 +241,12 @@ def pipeline(
                 passing = np.mean(is_close) >= (num_failing_steps_allowed / total_steps_evaluated)
 
                 if not passing:
-                    logger.info("Actual values: %s", ", ".join([str(v) for v in actual_value_list]))
-                    logger.info("Golden values: %s", ", ".join([str(v) for v in golden_value_list]))
+                    logger.info(
+                        "Actual values: %s", ", ".join([str(v) for v in (*actual_value_list,)])
+                    )
+                    logger.info(
+                        "Golden values: %s", ", ".join([str(v) for v in (*golden_value_list,)])
+                    )
                     raise test.error_message(metric_name)
 
                 result = f"{test.type_of_test_result.name} test for metric {metric_name}: PASSED"
diff --git a/tests/functional_tests/python_test_utils/get_test_results_from_tensorboard_logs.py b/tests/functional_tests/python_test_utils/get_test_results_from_tensorboard_logs.py
index 50e7e03b0c2..7b74a6879ad 100644
--- a/tests/functional_tests/python_test_utils/get_test_results_from_tensorboard_logs.py
+++ b/tests/functional_tests/python_test_utils/get_test_results_from_tensorboard_logs.py
@@ -29,7 +29,6 @@
     default=False,
 )
 @click.option("--step-size", required=False, default=5, type=int, help="Step size of sampling")
-@click.option("--step-size", required=False, default=5, type=int, help="Step size of sampling")
 def collect_train_test_metrics(
     logs_dir: str,
     train_iters: str,
diff --git a/tests/functional_tests/shell_test_utils/run_ci_test.sh b/tests/functional_tests/shell_test_utils/run_ci_test.sh
index 872053a8d3f..b24423773e5 100644
--- a/tests/functional_tests/shell_test_utils/run_ci_test.sh
+++ b/tests/functional_tests/shell_test_utils/run_ci_test.sh
@@ -231,7 +231,7 @@ for i in $(seq 1 $N_REPEAT); do
         if [[ "$TEST_TYPE" == "release" ]]; then
             EXTRACT_ARGS=("--is-convergence-test")
         else
-            EXTRACT_ARGS=("--is-normal-test")
+            EXTRACT_ARGS=("--is-normal-test" "--step-size" "1")
         fi
 
         # Read test values from Tensorboard for non-inference tests.
@@ -285,7 +285,8 @@ for i in $(seq 1 $N_REPEAT); do
                         --logs-dir $TENSORBOARD_PATH \
                         --train-iters $TRAIN_ITERS \
                         --output-path "${OUTPUT_PATH}/$(basename $GOLDEN_VALUES_PATH .json)_2nd.json" \
-                        --is-second-run
+                        --is-second-run \
+                        "${EXTRACT_ARGS[@]}"
                             
                     echo "Running pytest 1st vs 2nd run comparison"
                     uv run --no-sync pytest -s -o log_cli=true --log-cli-level=info $ROOT_DIR/tests/functional_tests/python_test_utils/test_pretraining_resume_checkpoint_pipeline.py \
diff --git a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
index c9c84707301..a7cfd87bc71 100644
--- a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
@@ -2,91 +2,286 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 10.48367,
+            "2": 10.48426,
+            "3": 10.48254,
+            "4": 10.48311,
             "5": 10.4764,
+            "6": 10.4844,
+            "7": 10.48458,
+            "8": 10.48829,
+            "9": 10.49008,
             "10": 10.47268,
+            "11": 10.47256,
+            "12": 10.48259,
+            "13": 10.47857,
+            "14": 10.45154,
             "15": 10.47925,
+            "16": 10.45346,
+            "17": 10.45145,
+            "18": 10.46238,
+            "19": 10.44113,
             "20": 10.45448,
+            "21": 10.43454,
+            "22": 10.40592,
+            "23": 10.39961,
+            "24": 10.37579,
             "25": 10.38182,
+            "26": 10.35147,
+            "27": 10.35388,
+            "28": 10.34937,
+            "29": 10.28711,
             "30": 10.21159,
+            "31": 10.1726,
+            "32": 10.13421,
+            "33": 10.14744,
+            "34": 10.10737,
             "35": 10.10581,
+            "36": 10.08735,
+            "37": 10.08157,
+            "38": 10.07233,
+            "39": 10.00094,
             "40": 9.98143,
+            "41": 9.92541,
+            "42": 9.87527,
+            "43": 9.88711,
+            "44": 9.80642,
             "45": 9.82325,
+            "46": 9.73785,
+            "47": 9.74817,
+            "48": 9.71609,
+            "49": 9.74484,
             "50": 9.72982
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 2570.0,
+            "2": 1923.0,
+            "3": 1512.0,
+            "4": 2322.0,
             "5": 2033.0,
+            "6": 1774.0,
+            "7": 2781.0,
+            "8": 2460.0,
+            "9": 2308.0,
             "10": 2635.0,
+            "11": 2397.0,
+            "12": 1817.0,
+            "13": 2348.0,
+            "14": 2749.0,
             "15": 2027.0,
+            "16": 2719.0,
+            "17": 2487.0,
+            "18": 2533.0,
+            "19": 2547.0,
             "20": 2850.0,
+            "21": 1990.0,
+            "22": 2884.0,
+            "23": 2857.0,
+            "24": 2685.0,
             "25": 2514.0,
+            "26": 2958.0,
+            "27": 2673.0,
+            "28": 2723.0,
+            "29": 2571.0,
             "30": 2858.0,
+            "31": 2157.0,
+            "32": 2357.0,
+            "33": 2242.0,
+            "34": 2464.0,
             "35": 2544.0,
+            "36": 2933.0,
+            "37": 3293.0,
+            "38": 2730.0,
+            "39": 2795.0,
             "40": 3310.0,
+            "41": 1816.0,
+            "42": 1467.0,
+            "43": 1817.0,
+            "44": 2633.0,
             "45": 3576.0,
+            "46": 3015.0,
+            "47": 2805.0,
+            "48": 3071.0,
+            "49": 2974.0,
             "50": 2267.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 1784014336.0,
+            "2": 1784014336.0,
+            "3": 1784014336.0,
+            "4": 1784014336.0,
             "5": 1784014336.0,
+            "6": 1784014336.0,
+            "7": 1784014336.0,
+            "8": 1784014336.0,
+            "9": 1784014336.0,
             "10": 1784014336.0,
+            "11": 1784014336.0,
+            "12": 1784014336.0,
+            "13": 1784014336.0,
+            "14": 1784014336.0,
             "15": 1784014336.0,
+            "16": 1784014336.0,
+            "17": 1784014336.0,
+            "18": 1784014336.0,
+            "19": 1784014336.0,
             "20": 1784014336.0,
+            "21": 1784014336.0,
+            "22": 1784014336.0,
+            "23": 1784014336.0,
+            "24": 1784014336.0,
             "25": 1784014336.0,
+            "26": 1784014336.0,
+            "27": 1784014336.0,
+            "28": 1784014336.0,
+            "29": 1784014336.0,
             "30": 1784014336.0,
+            "31": 1784014336.0,
+            "32": 1784014336.0,
+            "33": 1784014336.0,
+            "34": 1784014336.0,
             "35": 1784014336.0,
+            "36": 1784014336.0,
+            "37": 1784014336.0,
+            "38": 1784014336.0,
+            "39": 1784014336.0,
             "40": 1784014336.0,
+            "41": 1784014336.0,
+            "42": 1784014336.0,
+            "43": 1784014336.0,
+            "44": 1784014336.0,
             "45": 1784014336.0,
+            "46": 1784014336.0,
+            "47": 1784014336.0,
+            "48": 1784014336.0,
+            "49": 1784014336.0,
             "50": 1784014336.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 2365860864.0,
+            "2": 3108323328.0,
+            "3": 3108323328.0,
+            "4": 3108323328.0,
             "5": 3108323328.0,
+            "6": 3108323328.0,
+            "7": 3108323328.0,
+            "8": 3108323328.0,
+            "9": 3108323328.0,
             "10": 3108323328.0,
+            "11": 3108323328.0,
+            "12": 3108323328.0,
+            "13": 3108323328.0,
+            "14": 3108323328.0,
             "15": 3108323328.0,
+            "16": 3108323328.0,
+            "17": 3108323328.0,
+            "18": 3108323328.0,
+            "19": 3108323328.0,
             "20": 3108323328.0,
+            "21": 3108323328.0,
+            "22": 3108323328.0,
+            "23": 3108323328.0,
+            "24": 3108323328.0,
             "25": 3108323328.0,
+            "26": 3108323328.0,
+            "27": 3108323328.0,
+            "28": 3108323328.0,
+            "29": 3108323328.0,
             "30": 3108323328.0,
+            "31": 3108323328.0,
+            "32": 3108323328.0,
+            "33": 3108323328.0,
+            "34": 3108323328.0,
             "35": 3108323328.0,
+            "36": 3108323328.0,
+            "37": 3108323328.0,
+            "38": 3108323328.0,
+            "39": 3108323328.0,
             "40": 3108323328.0,
+            "41": 3108323328.0,
+            "42": 3108323328.0,
+            "43": 3108323328.0,
+            "44": 3108323328.0,
             "45": 3108323328.0,
+            "46": 3108323328.0,
+            "47": 3108323328.0,
+            "48": 3108323328.0,
+            "49": 3108323328.0,
             "50": 3108323328.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 12.77355,
-            "5": 0.85924,
-            "10": 0.86109,
-            "15": 0.87427,
-            "20": 1.11915,
-            "25": 0.87738,
-            "30": 0.86647,
-            "35": 0.84584,
-            "40": 0.86114,
-            "45": 1.15934,
-            "50": 0.84601
+            "1": 11.95325,
+            "2": 1.03495,
+            "3": 1.01983,
+            "4": 1.02247,
+            "5": 1.02376,
+            "6": 1.01057,
+            "7": 1.00305,
+            "8": 1.00511,
+            "9": 1.01164,
+            "10": 1.00809,
+            "11": 1.00401,
+            "12": 1.01195,
+            "13": 1.00522,
+            "14": 1.01037,
+            "15": 1.01016,
+            "16": 1.00481,
+            "17": 1.00787,
+            "18": 1.00866,
+            "19": 1.0117,
+            "20": 1.43302,
+            "21": 1.37362,
+            "22": 1.11681,
+            "23": 1.05672,
+            "24": 1.00983,
+            "25": 1.01065,
+            "26": 1.00572,
+            "27": 1.00992,
+            "28": 1.00576,
+            "29": 1.00599,
+            "30": 1.00468,
+            "31": 1.00657,
+            "32": 1.00207,
+            "33": 1.00815,
+            "34": 1.01333,
+            "35": 1.00888,
+            "36": 1.01481,
+            "37": 1.32861,
+            "38": 1.01215,
+            "39": 1.00755,
+            "40": 1.00235,
+            "41": 1.00954,
+            "42": 1.00544,
+            "43": 1.0136,
+            "44": 1.34075,
+            "45": 1.00937,
+            "46": 1.0108,
+            "47": 1.01217,
+            "48": 1.11889,
+            "49": 1.34225,
+            "50": 1.09191
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..2e4f3c6e211
--- /dev/null
+++ b/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.48367,
+            "2": 10.48426,
+            "3": 10.48254,
+            "4": 10.48311,
+            "5": 10.4764,
+            "6": 10.4844,
+            "7": 10.48458,
+            "8": 10.48829,
+            "9": 10.49008,
+            "10": 10.47268,
+            "11": 10.47256,
+            "12": 10.48259,
+            "13": 10.47857,
+            "14": 10.45154,
+            "15": 10.47925,
+            "16": 10.45346,
+            "17": 10.45145,
+            "18": 10.46238,
+            "19": 10.44113,
+            "20": 10.45448,
+            "21": 10.43454,
+            "22": 10.40592,
+            "23": 10.39961,
+            "24": 10.37579,
+            "25": 10.38182,
+            "26": 10.35147,
+            "27": 10.35388,
+            "28": 10.34937,
+            "29": 10.28711,
+            "30": 10.21159,
+            "31": 10.1726,
+            "32": 10.13421,
+            "33": 10.14744,
+            "34": 10.10737,
+            "35": 10.10581,
+            "36": 10.08735,
+            "37": 10.08157,
+            "38": 10.07233,
+            "39": 10.00094,
+            "40": 9.98143,
+            "41": 9.92541,
+            "42": 9.87527,
+            "43": 9.88711,
+            "44": 9.80642,
+            "45": 9.82325,
+            "46": 9.73785,
+            "47": 9.74817,
+            "48": 9.71609,
+            "49": 9.74484,
+            "50": 9.72982
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 2570.0,
+            "2": 1923.0,
+            "3": 1512.0,
+            "4": 2322.0,
+            "5": 2033.0,
+            "6": 1774.0,
+            "7": 2781.0,
+            "8": 2460.0,
+            "9": 2308.0,
+            "10": 2635.0,
+            "11": 2397.0,
+            "12": 1817.0,
+            "13": 2348.0,
+            "14": 2749.0,
+            "15": 2027.0,
+            "16": 2719.0,
+            "17": 2487.0,
+            "18": 2533.0,
+            "19": 2547.0,
+            "20": 2850.0,
+            "21": 1990.0,
+            "22": 2884.0,
+            "23": 2857.0,
+            "24": 2685.0,
+            "25": 2514.0,
+            "26": 2958.0,
+            "27": 2673.0,
+            "28": 2723.0,
+            "29": 2571.0,
+            "30": 2858.0,
+            "31": 2157.0,
+            "32": 2357.0,
+            "33": 2242.0,
+            "34": 2464.0,
+            "35": 2544.0,
+            "36": 2933.0,
+            "37": 3293.0,
+            "38": 2730.0,
+            "39": 2795.0,
+            "40": 3310.0,
+            "41": 1816.0,
+            "42": 1467.0,
+            "43": 1817.0,
+            "44": 2633.0,
+            "45": 3576.0,
+            "46": 3015.0,
+            "47": 2805.0,
+            "48": 3071.0,
+            "49": 2974.0,
+            "50": 2267.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1784014336.0,
+            "2": 1784014336.0,
+            "3": 1784014336.0,
+            "4": 1784014336.0,
+            "5": 1784014336.0,
+            "6": 1784014336.0,
+            "7": 1784014336.0,
+            "8": 1784014336.0,
+            "9": 1784014336.0,
+            "10": 1784014336.0,
+            "11": 1784014336.0,
+            "12": 1784014336.0,
+            "13": 1784014336.0,
+            "14": 1784014336.0,
+            "15": 1784014336.0,
+            "16": 1784014336.0,
+            "17": 1784014336.0,
+            "18": 1784014336.0,
+            "19": 1784014336.0,
+            "20": 1784014336.0,
+            "21": 1784014336.0,
+            "22": 1784014336.0,
+            "23": 1784014336.0,
+            "24": 1784014336.0,
+            "25": 1784014336.0,
+            "26": 1784014336.0,
+            "27": 1784014336.0,
+            "28": 1784014336.0,
+            "29": 1784014336.0,
+            "30": 1784014336.0,
+            "31": 1784014336.0,
+            "32": 1784014336.0,
+            "33": 1784014336.0,
+            "34": 1784014336.0,
+            "35": 1784014336.0,
+            "36": 1784014336.0,
+            "37": 1784014336.0,
+            "38": 1784014336.0,
+            "39": 1784014336.0,
+            "40": 1784014336.0,
+            "41": 1784014336.0,
+            "42": 1784014336.0,
+            "43": 1784014336.0,
+            "44": 1784014336.0,
+            "45": 1784014336.0,
+            "46": 1784014336.0,
+            "47": 1784014336.0,
+            "48": 1784014336.0,
+            "49": 1784014336.0,
+            "50": 1784014336.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 2365860864.0,
+            "2": 3108323328.0,
+            "3": 3108323328.0,
+            "4": 3108323328.0,
+            "5": 3108323328.0,
+            "6": 3108323328.0,
+            "7": 3108323328.0,
+            "8": 3108847104.0,
+            "9": 3108847104.0,
+            "10": 3108847104.0,
+            "11": 3108847104.0,
+            "12": 3108847104.0,
+            "13": 3108847104.0,
+            "14": 3108847104.0,
+            "15": 3108847104.0,
+            "16": 3108847104.0,
+            "17": 3108847104.0,
+            "18": 3108847104.0,
+            "19": 3108847104.0,
+            "20": 3108847104.0,
+            "21": 3108847104.0,
+            "22": 3108847104.0,
+            "23": 3108847104.0,
+            "24": 3108847104.0,
+            "25": 3108847104.0,
+            "26": 3108847104.0,
+            "27": 3108847104.0,
+            "28": 3108847104.0,
+            "29": 3108847104.0,
+            "30": 3108847104.0,
+            "31": 3108847104.0,
+            "32": 3108847104.0,
+            "33": 3108847104.0,
+            "34": 3108847104.0,
+            "35": 3108847104.0,
+            "36": 3108847104.0,
+            "37": 3108847104.0,
+            "38": 3108847104.0,
+            "39": 3108847104.0,
+            "40": 3108847104.0,
+            "41": 3108847104.0,
+            "42": 3108847104.0,
+            "43": 3108847104.0,
+            "44": 3108847104.0,
+            "45": 3108847104.0,
+            "46": 3108847104.0,
+            "47": 3108847104.0,
+            "48": 3108847104.0,
+            "49": 3108847104.0,
+            "50": 3108847104.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 12.28863,
+            "2": 1.02215,
+            "3": 0.91269,
+            "4": 0.90798,
+            "5": 0.9095,
+            "6": 0.89623,
+            "7": 0.91406,
+            "8": 0.93659,
+            "9": 0.98867,
+            "10": 0.97926,
+            "11": 0.92244,
+            "12": 0.93168,
+            "13": 0.91684,
+            "14": 0.92151,
+            "15": 0.90545,
+            "16": 0.92975,
+            "17": 0.9771,
+            "18": 0.91421,
+            "19": 0.91325,
+            "20": 1.37492,
+            "21": 1.35582,
+            "22": 0.90471,
+            "23": 0.90119,
+            "24": 0.9066,
+            "25": 0.89745,
+            "26": 0.90071,
+            "27": 0.90705,
+            "28": 0.91467,
+            "29": 0.90066,
+            "30": 0.94983,
+            "31": 0.9257,
+            "32": 0.92349,
+            "33": 0.92172,
+            "34": 0.93247,
+            "35": 0.91594,
+            "36": 0.9259,
+            "37": 0.91518,
+            "38": 0.91714,
+            "39": 0.91191,
+            "40": 0.91531,
+            "41": 0.91413,
+            "42": 0.92876,
+            "43": 0.95961,
+            "44": 0.90524,
+            "45": 0.89573,
+            "46": 0.90239,
+            "47": 0.89546,
+            "48": 1.05878,
+            "49": 1.18954,
+            "50": 1.15643
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..1352649be85
--- /dev/null
+++ b/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.48367,
+            "2": 10.48426,
+            "3": 10.48254,
+            "4": 10.48311,
+            "5": 10.4764,
+            "6": 10.4844,
+            "7": 10.48458,
+            "8": 10.48829,
+            "9": 10.49008,
+            "10": 10.47268,
+            "11": 10.47256,
+            "12": 10.48259,
+            "13": 10.47857,
+            "14": 10.45154,
+            "15": 10.47925,
+            "16": 10.45346,
+            "17": 10.45145,
+            "18": 10.46238,
+            "19": 10.44113,
+            "20": 10.45448,
+            "21": 10.43454,
+            "22": 10.40592,
+            "23": 10.39961,
+            "24": 10.37579,
+            "25": 10.38182,
+            "26": 10.35147,
+            "27": 10.35388,
+            "28": 10.34937,
+            "29": 10.28711,
+            "30": 10.21159,
+            "31": 10.1726,
+            "32": 10.13421,
+            "33": 10.14744,
+            "34": 10.10737,
+            "35": 10.10581,
+            "36": 10.08735,
+            "37": 10.08157,
+            "38": 10.07233,
+            "39": 10.00094,
+            "40": 9.98143,
+            "41": 9.92541,
+            "42": 9.87527,
+            "43": 9.88711,
+            "44": 9.80642,
+            "45": 9.82325,
+            "46": 9.73785,
+            "47": 9.74817,
+            "48": 9.71609,
+            "49": 9.74484,
+            "50": 9.72982
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 2570.0,
+            "2": 1923.0,
+            "3": 1512.0,
+            "4": 2322.0,
+            "5": 2033.0,
+            "6": 1774.0,
+            "7": 2781.0,
+            "8": 2460.0,
+            "9": 2308.0,
+            "10": 2635.0,
+            "11": 2397.0,
+            "12": 1817.0,
+            "13": 2348.0,
+            "14": 2749.0,
+            "15": 2027.0,
+            "16": 2719.0,
+            "17": 2487.0,
+            "18": 2533.0,
+            "19": 2547.0,
+            "20": 2850.0,
+            "21": 1990.0,
+            "22": 2884.0,
+            "23": 2857.0,
+            "24": 2685.0,
+            "25": 2514.0,
+            "26": 2958.0,
+            "27": 2673.0,
+            "28": 2723.0,
+            "29": 2571.0,
+            "30": 2858.0,
+            "31": 2157.0,
+            "32": 2357.0,
+            "33": 2242.0,
+            "34": 2464.0,
+            "35": 2544.0,
+            "36": 2933.0,
+            "37": 3293.0,
+            "38": 2730.0,
+            "39": 2795.0,
+            "40": 3310.0,
+            "41": 1816.0,
+            "42": 1467.0,
+            "43": 1817.0,
+            "44": 2633.0,
+            "45": 3576.0,
+            "46": 3015.0,
+            "47": 2805.0,
+            "48": 3071.0,
+            "49": 2974.0,
+            "50": 2267.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1784014336.0,
+            "2": 1784014336.0,
+            "3": 1784014336.0,
+            "4": 1784014336.0,
+            "5": 1784014336.0,
+            "6": 1784014336.0,
+            "7": 1784014336.0,
+            "8": 1784014336.0,
+            "9": 1784014336.0,
+            "10": 1784014336.0,
+            "11": 1784014336.0,
+            "12": 1784014336.0,
+            "13": 1784014336.0,
+            "14": 1784014336.0,
+            "15": 1784014336.0,
+            "16": 1784014336.0,
+            "17": 1784014336.0,
+            "18": 1784014336.0,
+            "19": 1784014336.0,
+            "20": 1784014336.0,
+            "21": 1784014336.0,
+            "22": 1784014336.0,
+            "23": 1784014336.0,
+            "24": 1784014336.0,
+            "25": 1784014336.0,
+            "26": 1784014336.0,
+            "27": 1784014336.0,
+            "28": 1784014336.0,
+            "29": 1784014336.0,
+            "30": 1784014336.0,
+            "31": 1784014336.0,
+            "32": 1784014336.0,
+            "33": 1784014336.0,
+            "34": 1784014336.0,
+            "35": 1784014336.0,
+            "36": 1784014336.0,
+            "37": 1784014336.0,
+            "38": 1784014336.0,
+            "39": 1784014336.0,
+            "40": 1784014336.0,
+            "41": 1784014336.0,
+            "42": 1784014336.0,
+            "43": 1784014336.0,
+            "44": 1784014336.0,
+            "45": 1784014336.0,
+            "46": 1784014336.0,
+            "47": 1784014336.0,
+            "48": 1784014336.0,
+            "49": 1784014336.0,
+            "50": 1784014336.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 2365860864.0,
+            "2": 3108323328.0,
+            "3": 3108323328.0,
+            "4": 3108323328.0,
+            "5": 3108323328.0,
+            "6": 3108842496.0,
+            "7": 3108842496.0,
+            "8": 3108842496.0,
+            "9": 3108842496.0,
+            "10": 3108842496.0,
+            "11": 3108842496.0,
+            "12": 3108842496.0,
+            "13": 3108842496.0,
+            "14": 3108842496.0,
+            "15": 3108842496.0,
+            "16": 3108842496.0,
+            "17": 3108842496.0,
+            "18": 3108842496.0,
+            "19": 3108842496.0,
+            "20": 3108842496.0,
+            "21": 3108842496.0,
+            "22": 3108842496.0,
+            "23": 3108842496.0,
+            "24": 3108842496.0,
+            "25": 3108842496.0,
+            "26": 3108842496.0,
+            "27": 3108842496.0,
+            "28": 3108842496.0,
+            "29": 3108842496.0,
+            "30": 3108842496.0,
+            "31": 3108842496.0,
+            "32": 3108842496.0,
+            "33": 3108842496.0,
+            "34": 3108842496.0,
+            "35": 3108842496.0,
+            "36": 3108842496.0,
+            "37": 3108842496.0,
+            "38": 3108842496.0,
+            "39": 3108842496.0,
+            "40": 3108842496.0,
+            "41": 3108842496.0,
+            "42": 3108842496.0,
+            "43": 3108842496.0,
+            "44": 3108842496.0,
+            "45": 3108842496.0,
+            "46": 3108842496.0,
+            "47": 3108842496.0,
+            "48": 3108842496.0,
+            "49": 3108842496.0,
+            "50": 3108842496.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 11.98661,
+            "2": 1.05916,
+            "3": 1.01721,
+            "4": 1.02611,
+            "5": 1.02779,
+            "6": 1.11252,
+            "7": 1.0176,
+            "8": 1.02427,
+            "9": 1.02561,
+            "10": 1.01845,
+            "11": 1.02419,
+            "12": 1.01745,
+            "13": 1.01224,
+            "14": 1.02388,
+            "15": 1.03687,
+            "16": 1.01886,
+            "17": 1.01708,
+            "18": 1.01143,
+            "19": 1.01902,
+            "20": 1.49878,
+            "21": 1.47537,
+            "22": 1.01801,
+            "23": 1.05158,
+            "24": 1.03481,
+            "25": 1.01773,
+            "26": 1.01186,
+            "27": 1.02203,
+            "28": 1.01824,
+            "29": 1.01865,
+            "30": 1.02165,
+            "31": 1.0184,
+            "32": 1.02106,
+            "33": 1.04655,
+            "34": 1.03129,
+            "35": 1.01893,
+            "36": 1.02153,
+            "37": 1.02154,
+            "38": 1.0213,
+            "39": 1.14846,
+            "40": 1.02149,
+            "41": 1.01905,
+            "42": 1.02038,
+            "43": 1.03126,
+            "44": 1.04155,
+            "45": 1.01649,
+            "46": 1.01742,
+            "47": 1.02406,
+            "48": 1.27122,
+            "49": 1.15085,
+            "50": 1.10861
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_local_spec_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_local_spec_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
index f38e38fdb9c..fb44f049ad6 100644
--- a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_local_spec_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_local_spec_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
@@ -2,91 +2,286 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 10.4837,
+            "2": 10.48435,
+            "3": 10.48251,
+            "4": 10.48303,
             "5": 10.47647,
+            "6": 10.48423,
+            "7": 10.48457,
+            "8": 10.48837,
+            "9": 10.49003,
             "10": 10.47255,
+            "11": 10.47245,
+            "12": 10.4828,
+            "13": 10.47855,
+            "14": 10.45162,
             "15": 10.47936,
+            "16": 10.45364,
+            "17": 10.45143,
+            "18": 10.46239,
+            "19": 10.44136,
             "20": 10.45438,
+            "21": 10.43469,
+            "22": 10.40587,
+            "23": 10.39982,
+            "24": 10.37585,
             "25": 10.38173,
+            "26": 10.35154,
+            "27": 10.35401,
+            "28": 10.3497,
+            "29": 10.28714,
             "30": 10.21194,
+            "31": 10.17274,
+            "32": 10.13439,
+            "33": 10.14753,
+            "34": 10.10759,
             "35": 10.10592,
+            "36": 10.08756,
+            "37": 10.08177,
+            "38": 10.07257,
+            "39": 10.0013,
             "40": 9.9816,
+            "41": 9.92549,
+            "42": 9.87529,
+            "43": 9.88742,
+            "44": 9.80641,
             "45": 9.82342,
+            "46": 9.73815,
+            "47": 9.74831,
+            "48": 9.71619,
+            "49": 9.74504,
             "50": 9.73004
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 2554.0,
+            "2": 1919.0,
+            "3": 1521.0,
+            "4": 2330.0,
             "5": 2010.0,
+            "6": 1725.0,
+            "7": 2803.0,
+            "8": 2435.0,
+            "9": 2286.0,
             "10": 2570.0,
+            "11": 2438.0,
+            "12": 1829.0,
+            "13": 2332.0,
+            "14": 2832.0,
             "15": 2008.0,
+            "16": 2659.0,
+            "17": 2454.0,
+            "18": 2500.0,
+            "19": 2588.0,
             "20": 2834.0,
+            "21": 2042.0,
+            "22": 3037.0,
+            "23": 2702.0,
+            "24": 2700.0,
             "25": 2568.0,
+            "26": 2896.0,
+            "27": 2735.0,
+            "28": 2699.0,
+            "29": 2548.0,
             "30": 2843.0,
+            "31": 2160.0,
+            "32": 2458.0,
+            "33": 2130.0,
+            "34": 2517.0,
             "35": 2597.0,
+            "36": 3001.0,
+            "37": 3305.0,
+            "38": 2682.0,
+            "39": 2805.0,
             "40": 3425.0,
+            "41": 1812.0,
+            "42": 1481.0,
+            "43": 1726.0,
+            "44": 2575.0,
             "45": 3438.0,
+            "46": 2960.0,
+            "47": 2792.0,
+            "48": 3107.0,
+            "49": 2854.0,
             "50": 2145.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 1767237120.0,
+            "2": 1767237120.0,
+            "3": 1767237120.0,
+            "4": 1767237120.0,
             "5": 1767237120.0,
+            "6": 1767237120.0,
+            "7": 1767237120.0,
+            "8": 1767237120.0,
+            "9": 1767237120.0,
             "10": 1767237120.0,
+            "11": 1767237120.0,
+            "12": 1767237120.0,
+            "13": 1767237120.0,
+            "14": 1767237120.0,
             "15": 1767237120.0,
+            "16": 1767237120.0,
+            "17": 1767237120.0,
+            "18": 1767237120.0,
+            "19": 1767237120.0,
             "20": 1767237120.0,
+            "21": 1767237120.0,
+            "22": 1767237120.0,
+            "23": 1767237120.0,
+            "24": 1767237120.0,
             "25": 1767237120.0,
+            "26": 1767237120.0,
+            "27": 1767237120.0,
+            "28": 1767237120.0,
+            "29": 1767237120.0,
             "30": 1767237120.0,
+            "31": 1767237120.0,
+            "32": 1767237120.0,
+            "33": 1767237120.0,
+            "34": 1767237120.0,
             "35": 1767237120.0,
+            "36": 1767237120.0,
+            "37": 1767237120.0,
+            "38": 1767237120.0,
+            "39": 1767237120.0,
             "40": 1767237120.0,
+            "41": 1767237120.0,
+            "42": 1767237120.0,
+            "43": 1767237120.0,
+            "44": 1767237120.0,
             "45": 1767237120.0,
+            "46": 1767237120.0,
+            "47": 1767237120.0,
+            "48": 1767237120.0,
+            "49": 1767237120.0,
             "50": 1767237120.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 2336500736.0,
+            "2": 3079487488.0,
+            "3": 3079487488.0,
+            "4": 3079487488.0,
             "5": 3079487488.0,
+            "6": 3079487488.0,
+            "7": 3079487488.0,
+            "8": 3079487488.0,
+            "9": 3079487488.0,
             "10": 3079487488.0,
+            "11": 3079487488.0,
+            "12": 3079487488.0,
+            "13": 3079487488.0,
+            "14": 3079487488.0,
             "15": 3079487488.0,
+            "16": 3079487488.0,
+            "17": 3079487488.0,
+            "18": 3079487488.0,
+            "19": 3079487488.0,
             "20": 3079487488.0,
+            "21": 3079487488.0,
+            "22": 3079487488.0,
+            "23": 3079487488.0,
+            "24": 3079487488.0,
             "25": 3079487488.0,
+            "26": 3079487488.0,
+            "27": 3079487488.0,
+            "28": 3079487488.0,
+            "29": 3079487488.0,
             "30": 3079487488.0,
+            "31": 3079487488.0,
+            "32": 3079487488.0,
+            "33": 3079487488.0,
+            "34": 3079487488.0,
             "35": 3079487488.0,
+            "36": 3079487488.0,
+            "37": 3079487488.0,
+            "38": 3079487488.0,
+            "39": 3079487488.0,
             "40": 3079487488.0,
+            "41": 3079487488.0,
+            "42": 3079487488.0,
+            "43": 3079487488.0,
+            "44": 3079487488.0,
             "45": 3079487488.0,
+            "46": 3079487488.0,
+            "47": 3079487488.0,
+            "48": 3079487488.0,
+            "49": 3079487488.0,
             "50": 3079487488.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 9.51607,
-            "5": 0.70637,
-            "10": 0.74903,
-            "15": 0.69218,
-            "20": 0.94021,
-            "25": 0.69,
-            "30": 0.69576,
-            "35": 0.69538,
-            "40": 0.69122,
-            "45": 1.04545,
-            "50": 0.69215
+            "1": 11.5674,
+            "2": 0.87925,
+            "3": 0.84214,
+            "4": 0.85037,
+            "5": 0.85134,
+            "6": 0.84821,
+            "7": 0.84955,
+            "8": 0.84912,
+            "9": 0.85227,
+            "10": 0.84641,
+            "11": 0.84805,
+            "12": 0.84791,
+            "13": 0.86059,
+            "14": 0.86196,
+            "15": 1.10537,
+            "16": 1.03739,
+            "17": 0.8309,
+            "18": 0.82806,
+            "19": 1.30044,
+            "20": 0.83029,
+            "21": 0.82677,
+            "22": 1.30745,
+            "23": 0.85382,
+            "24": 0.83942,
+            "25": 0.83871,
+            "26": 0.8337,
+            "27": 0.83434,
+            "28": 0.8309,
+            "29": 0.83936,
+            "30": 0.83788,
+            "31": 0.83476,
+            "32": 0.83236,
+            "33": 0.83163,
+            "34": 0.84328,
+            "35": 0.83702,
+            "36": 0.83877,
+            "37": 0.83834,
+            "38": 0.83145,
+            "39": 0.83941,
+            "40": 0.84432,
+            "41": 1.16619,
+            "42": 1.1534,
+            "43": 1.08513,
+            "44": 0.84537,
+            "45": 0.99113,
+            "46": 0.84419,
+            "47": 0.89066,
+            "48": 0.83549,
+            "49": 1.01154,
+            "50": 0.96557
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_local_spec_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_local_spec_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..0ff198806cb
--- /dev/null
+++ b/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_local_spec_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.4837,
+            "2": 10.48435,
+            "3": 10.48251,
+            "4": 10.48303,
+            "5": 10.47647,
+            "6": 10.48423,
+            "7": 10.48457,
+            "8": 10.48837,
+            "9": 10.49003,
+            "10": 10.47255,
+            "11": 10.47245,
+            "12": 10.4828,
+            "13": 10.47855,
+            "14": 10.45162,
+            "15": 10.47936,
+            "16": 10.45364,
+            "17": 10.45143,
+            "18": 10.46239,
+            "19": 10.44136,
+            "20": 10.45438,
+            "21": 10.43469,
+            "22": 10.40587,
+            "23": 10.39982,
+            "24": 10.37585,
+            "25": 10.38173,
+            "26": 10.35154,
+            "27": 10.35401,
+            "28": 10.3497,
+            "29": 10.28714,
+            "30": 10.21194,
+            "31": 10.17274,
+            "32": 10.13439,
+            "33": 10.14753,
+            "34": 10.10759,
+            "35": 10.10592,
+            "36": 10.08756,
+            "37": 10.08177,
+            "38": 10.07257,
+            "39": 10.0013,
+            "40": 9.9816,
+            "41": 9.92549,
+            "42": 9.87529,
+            "43": 9.88742,
+            "44": 9.80641,
+            "45": 9.82342,
+            "46": 9.73815,
+            "47": 9.74831,
+            "48": 9.71619,
+            "49": 9.74504,
+            "50": 9.73004
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 2554.0,
+            "2": 1919.0,
+            "3": 1521.0,
+            "4": 2330.0,
+            "5": 2010.0,
+            "6": 1725.0,
+            "7": 2803.0,
+            "8": 2435.0,
+            "9": 2286.0,
+            "10": 2570.0,
+            "11": 2438.0,
+            "12": 1829.0,
+            "13": 2332.0,
+            "14": 2832.0,
+            "15": 2008.0,
+            "16": 2659.0,
+            "17": 2454.0,
+            "18": 2500.0,
+            "19": 2588.0,
+            "20": 2834.0,
+            "21": 2042.0,
+            "22": 3037.0,
+            "23": 2702.0,
+            "24": 2700.0,
+            "25": 2568.0,
+            "26": 2896.0,
+            "27": 2735.0,
+            "28": 2699.0,
+            "29": 2548.0,
+            "30": 2843.0,
+            "31": 2160.0,
+            "32": 2458.0,
+            "33": 2130.0,
+            "34": 2517.0,
+            "35": 2597.0,
+            "36": 3001.0,
+            "37": 3305.0,
+            "38": 2682.0,
+            "39": 2805.0,
+            "40": 3425.0,
+            "41": 1812.0,
+            "42": 1481.0,
+            "43": 1726.0,
+            "44": 2575.0,
+            "45": 3438.0,
+            "46": 2960.0,
+            "47": 2792.0,
+            "48": 3107.0,
+            "49": 2854.0,
+            "50": 2145.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1767237120.0,
+            "2": 1767237120.0,
+            "3": 1767237120.0,
+            "4": 1767237120.0,
+            "5": 1767237120.0,
+            "6": 1767237120.0,
+            "7": 1767237120.0,
+            "8": 1767237120.0,
+            "9": 1767237120.0,
+            "10": 1767237120.0,
+            "11": 1767237120.0,
+            "12": 1767237120.0,
+            "13": 1767237120.0,
+            "14": 1767237120.0,
+            "15": 1767237120.0,
+            "16": 1767237120.0,
+            "17": 1767237120.0,
+            "18": 1767237120.0,
+            "19": 1767237120.0,
+            "20": 1767237120.0,
+            "21": 1767237120.0,
+            "22": 1767237120.0,
+            "23": 1767237120.0,
+            "24": 1767237120.0,
+            "25": 1767237120.0,
+            "26": 1767237120.0,
+            "27": 1767237120.0,
+            "28": 1767237120.0,
+            "29": 1767237120.0,
+            "30": 1767237120.0,
+            "31": 1767237120.0,
+            "32": 1767237120.0,
+            "33": 1767237120.0,
+            "34": 1767237120.0,
+            "35": 1767237120.0,
+            "36": 1767237120.0,
+            "37": 1767237120.0,
+            "38": 1767237120.0,
+            "39": 1767237120.0,
+            "40": 1767237120.0,
+            "41": 1767237120.0,
+            "42": 1767237120.0,
+            "43": 1767237120.0,
+            "44": 1767237120.0,
+            "45": 1767237120.0,
+            "46": 1767237120.0,
+            "47": 1767237120.0,
+            "48": 1767237120.0,
+            "49": 1767237120.0,
+            "50": 1767237120.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 2336500736.0,
+            "2": 3079487488.0,
+            "3": 3079487488.0,
+            "4": 3079487488.0,
+            "5": 3079487488.0,
+            "6": 3079487488.0,
+            "7": 3079487488.0,
+            "8": 3079487488.0,
+            "9": 3079487488.0,
+            "10": 3079487488.0,
+            "11": 3079487488.0,
+            "12": 3079487488.0,
+            "13": 3079487488.0,
+            "14": 3079487488.0,
+            "15": 3079487488.0,
+            "16": 3079487488.0,
+            "17": 3079487488.0,
+            "18": 3079487488.0,
+            "19": 3079487488.0,
+            "20": 3079487488.0,
+            "21": 3079487488.0,
+            "22": 3079487488.0,
+            "23": 3079487488.0,
+            "24": 3079487488.0,
+            "25": 3079487488.0,
+            "26": 3079487488.0,
+            "27": 3079487488.0,
+            "28": 3079487488.0,
+            "29": 3079487488.0,
+            "30": 3079487488.0,
+            "31": 3079487488.0,
+            "32": 3079487488.0,
+            "33": 3079487488.0,
+            "34": 3079487488.0,
+            "35": 3079487488.0,
+            "36": 3079487488.0,
+            "37": 3079487488.0,
+            "38": 3079487488.0,
+            "39": 3079487488.0,
+            "40": 3079487488.0,
+            "41": 3079487488.0,
+            "42": 3079487488.0,
+            "43": 3079487488.0,
+            "44": 3079487488.0,
+            "45": 3079487488.0,
+            "46": 3079487488.0,
+            "47": 3079487488.0,
+            "48": 3079487488.0,
+            "49": 3079487488.0,
+            "50": 3079487488.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 12.70758,
+            "2": 0.8354,
+            "3": 0.78875,
+            "4": 0.77893,
+            "5": 0.81797,
+            "6": 0.77299,
+            "7": 0.76726,
+            "8": 0.77744,
+            "9": 0.77036,
+            "10": 0.76808,
+            "11": 0.77009,
+            "12": 0.77543,
+            "13": 0.78463,
+            "14": 0.77498,
+            "15": 0.76065,
+            "16": 1.28888,
+            "17": 0.78476,
+            "18": 0.77415,
+            "19": 0.77341,
+            "20": 1.04994,
+            "21": 1.25413,
+            "22": 0.7709,
+            "23": 0.85615,
+            "24": 0.76186,
+            "25": 0.75903,
+            "26": 0.75431,
+            "27": 0.76868,
+            "28": 0.7776,
+            "29": 0.74989,
+            "30": 0.75136,
+            "31": 0.7956,
+            "32": 0.74247,
+            "33": 0.73237,
+            "34": 0.73066,
+            "35": 0.74241,
+            "36": 0.74361,
+            "37": 0.77983,
+            "38": 0.77753,
+            "39": 0.75036,
+            "40": 0.75188,
+            "41": 0.75332,
+            "42": 0.89635,
+            "43": 0.73883,
+            "44": 0.92932,
+            "45": 0.73444,
+            "46": 0.73103,
+            "47": 1.01543,
+            "48": 1.06091,
+            "49": 0.92342,
+            "50": 1.25669
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_local_spec_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_local_spec_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..bf20b2b00e3
--- /dev/null
+++ b/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_local_spec_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.4837,
+            "2": 10.48435,
+            "3": 10.48251,
+            "4": 10.48303,
+            "5": 10.47647,
+            "6": 10.48423,
+            "7": 10.48457,
+            "8": 10.48837,
+            "9": 10.49003,
+            "10": 10.47255,
+            "11": 10.47245,
+            "12": 10.4828,
+            "13": 10.47855,
+            "14": 10.45162,
+            "15": 10.47936,
+            "16": 10.45364,
+            "17": 10.45143,
+            "18": 10.46239,
+            "19": 10.44136,
+            "20": 10.45438,
+            "21": 10.43469,
+            "22": 10.40587,
+            "23": 10.39982,
+            "24": 10.37585,
+            "25": 10.38173,
+            "26": 10.35154,
+            "27": 10.35401,
+            "28": 10.3497,
+            "29": 10.28714,
+            "30": 10.21194,
+            "31": 10.17274,
+            "32": 10.13439,
+            "33": 10.14753,
+            "34": 10.10759,
+            "35": 10.10592,
+            "36": 10.08756,
+            "37": 10.08177,
+            "38": 10.07257,
+            "39": 10.0013,
+            "40": 9.9816,
+            "41": 9.92549,
+            "42": 9.87529,
+            "43": 9.88742,
+            "44": 9.80641,
+            "45": 9.82342,
+            "46": 9.73815,
+            "47": 9.74831,
+            "48": 9.71619,
+            "49": 9.74504,
+            "50": 9.73004
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 2554.0,
+            "2": 1919.0,
+            "3": 1521.0,
+            "4": 2330.0,
+            "5": 2010.0,
+            "6": 1725.0,
+            "7": 2803.0,
+            "8": 2435.0,
+            "9": 2286.0,
+            "10": 2570.0,
+            "11": 2438.0,
+            "12": 1829.0,
+            "13": 2332.0,
+            "14": 2832.0,
+            "15": 2008.0,
+            "16": 2659.0,
+            "17": 2454.0,
+            "18": 2500.0,
+            "19": 2588.0,
+            "20": 2834.0,
+            "21": 2042.0,
+            "22": 3037.0,
+            "23": 2702.0,
+            "24": 2700.0,
+            "25": 2568.0,
+            "26": 2896.0,
+            "27": 2735.0,
+            "28": 2699.0,
+            "29": 2548.0,
+            "30": 2843.0,
+            "31": 2160.0,
+            "32": 2458.0,
+            "33": 2130.0,
+            "34": 2517.0,
+            "35": 2597.0,
+            "36": 3001.0,
+            "37": 3305.0,
+            "38": 2682.0,
+            "39": 2805.0,
+            "40": 3425.0,
+            "41": 1812.0,
+            "42": 1481.0,
+            "43": 1726.0,
+            "44": 2575.0,
+            "45": 3438.0,
+            "46": 2960.0,
+            "47": 2792.0,
+            "48": 3107.0,
+            "49": 2854.0,
+            "50": 2145.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1767237120.0,
+            "2": 1767237120.0,
+            "3": 1767237120.0,
+            "4": 1767237120.0,
+            "5": 1767237120.0,
+            "6": 1767237120.0,
+            "7": 1767237120.0,
+            "8": 1767237120.0,
+            "9": 1767237120.0,
+            "10": 1767237120.0,
+            "11": 1767237120.0,
+            "12": 1767237120.0,
+            "13": 1767237120.0,
+            "14": 1767237120.0,
+            "15": 1767237120.0,
+            "16": 1767237120.0,
+            "17": 1767237120.0,
+            "18": 1767237120.0,
+            "19": 1767237120.0,
+            "20": 1767237120.0,
+            "21": 1767237120.0,
+            "22": 1767237120.0,
+            "23": 1767237120.0,
+            "24": 1767237120.0,
+            "25": 1767237120.0,
+            "26": 1767237120.0,
+            "27": 1767237120.0,
+            "28": 1767237120.0,
+            "29": 1767237120.0,
+            "30": 1767237120.0,
+            "31": 1767237120.0,
+            "32": 1767237120.0,
+            "33": 1767237120.0,
+            "34": 1767237120.0,
+            "35": 1767237120.0,
+            "36": 1767237120.0,
+            "37": 1767237120.0,
+            "38": 1767237120.0,
+            "39": 1767237120.0,
+            "40": 1767237120.0,
+            "41": 1767237120.0,
+            "42": 1767237120.0,
+            "43": 1767237120.0,
+            "44": 1767237120.0,
+            "45": 1767237120.0,
+            "46": 1767237120.0,
+            "47": 1767237120.0,
+            "48": 1767237120.0,
+            "49": 1767237120.0,
+            "50": 1767237120.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 2336500736.0,
+            "2": 3079487488.0,
+            "3": 3079487488.0,
+            "4": 3079487488.0,
+            "5": 3079487488.0,
+            "6": 3079487488.0,
+            "7": 3079487488.0,
+            "8": 3079487488.0,
+            "9": 3079487488.0,
+            "10": 3079487488.0,
+            "11": 3079487488.0,
+            "12": 3079487488.0,
+            "13": 3079487488.0,
+            "14": 3079487488.0,
+            "15": 3079487488.0,
+            "16": 3079487488.0,
+            "17": 3079487488.0,
+            "18": 3079487488.0,
+            "19": 3079487488.0,
+            "20": 3079487488.0,
+            "21": 3079487488.0,
+            "22": 3079487488.0,
+            "23": 3079487488.0,
+            "24": 3079487488.0,
+            "25": 3079487488.0,
+            "26": 3079487488.0,
+            "27": 3079487488.0,
+            "28": 3079487488.0,
+            "29": 3079487488.0,
+            "30": 3079487488.0,
+            "31": 3079487488.0,
+            "32": 3079487488.0,
+            "33": 3079487488.0,
+            "34": 3079487488.0,
+            "35": 3079487488.0,
+            "36": 3079487488.0,
+            "37": 3079487488.0,
+            "38": 3079487488.0,
+            "39": 3079487488.0,
+            "40": 3079487488.0,
+            "41": 3079487488.0,
+            "42": 3079487488.0,
+            "43": 3079487488.0,
+            "44": 3079487488.0,
+            "45": 3079487488.0,
+            "46": 3079487488.0,
+            "47": 3079487488.0,
+            "48": 3079487488.0,
+            "49": 3079487488.0,
+            "50": 3079487488.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 11.68301,
+            "2": 0.87796,
+            "3": 0.84756,
+            "4": 0.85513,
+            "5": 0.85643,
+            "6": 0.85366,
+            "7": 0.8468,
+            "8": 0.84974,
+            "9": 0.84989,
+            "10": 0.8464,
+            "11": 0.84369,
+            "12": 0.84972,
+            "13": 0.84311,
+            "14": 0.85648,
+            "15": 1.1084,
+            "16": 0.8827,
+            "17": 0.87952,
+            "18": 0.88554,
+            "19": 0.82673,
+            "20": 0.82222,
+            "21": 1.06414,
+            "22": 1.09134,
+            "23": 1.02591,
+            "24": 0.82601,
+            "25": 0.82277,
+            "26": 0.81844,
+            "27": 0.82627,
+            "28": 0.82854,
+            "29": 0.82653,
+            "30": 0.82247,
+            "31": 0.82906,
+            "32": 0.82363,
+            "33": 0.82944,
+            "34": 0.82401,
+            "35": 0.82902,
+            "36": 0.83537,
+            "37": 0.8265,
+            "38": 0.82728,
+            "39": 0.82087,
+            "40": 0.82525,
+            "41": 0.82691,
+            "42": 1.14473,
+            "43": 0.97566,
+            "44": 0.82343,
+            "45": 0.82956,
+            "46": 0.82572,
+            "47": 0.83635,
+            "48": 0.94255,
+            "49": 0.99753,
+            "50": 1.10127
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
index 325bd59c44d..8063c892338 100644
--- a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
@@ -2,141 +2,536 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 10.48367,
+            "2": 10.48426,
+            "3": 10.48254,
+            "4": 10.48311,
             "5": 10.4764,
+            "6": 10.4844,
+            "7": 10.48458,
+            "8": 10.48829,
+            "9": 10.49008,
             "10": 10.47268,
+            "11": 10.47256,
+            "12": 10.48259,
+            "13": 10.47857,
+            "14": 10.45154,
             "15": 10.47925,
+            "16": 10.45346,
+            "17": 10.45145,
+            "18": 10.46238,
+            "19": 10.44113,
             "20": 10.45448,
+            "21": 10.43454,
+            "22": 10.40592,
+            "23": 10.39961,
+            "24": 10.37579,
             "25": 10.38182,
+            "26": 10.35147,
+            "27": 10.35388,
+            "28": 10.34937,
+            "29": 10.28711,
             "30": 10.21159,
+            "31": 10.1726,
+            "32": 10.13421,
+            "33": 10.14744,
+            "34": 10.10737,
             "35": 10.10581,
+            "36": 10.08735,
+            "37": 10.08157,
+            "38": 10.07233,
+            "39": 10.00094,
             "40": 9.98143,
+            "41": 9.92541,
+            "42": 9.87527,
+            "43": 9.88711,
+            "44": 9.80642,
             "45": 9.82325,
+            "46": 9.73785,
+            "47": 9.74817,
+            "48": 9.71609,
+            "49": 9.74484,
             "50": 9.72982,
+            "51": 9.71485,
+            "52": 9.66475,
+            "53": 9.60919,
+            "54": 9.62705,
             "55": 9.61012,
+            "56": 9.617,
+            "57": 9.56786,
+            "58": 9.52731,
+            "59": 9.51668,
             "60": 9.51865,
+            "61": 9.53132,
+            "62": 9.45016,
+            "63": 9.45725,
+            "64": 9.43435,
             "65": 9.45801,
+            "66": 9.4368,
+            "67": 9.3968,
+            "68": 9.36474,
+            "69": 9.4095,
             "70": 9.376,
+            "71": 9.41716,
+            "72": 9.42574,
+            "73": 9.37581,
+            "74": 9.41547,
             "75": 9.37891,
+            "76": 9.28017,
+            "77": 9.32205,
+            "78": 9.35754,
+            "79": 9.32162,
             "80": 9.31486,
+            "81": 9.2678,
+            "82": 9.34178,
+            "83": 9.32145,
+            "84": 9.24785,
             "85": 9.35023,
+            "86": 9.22392,
+            "87": 9.3062,
+            "88": 9.29891,
+            "89": 9.22716,
             "90": 9.28483,
+            "91": 9.23109,
+            "92": 9.27463,
+            "93": 9.19241,
+            "94": 9.23984,
             "95": 9.28006,
+            "96": 9.17526,
+            "97": 9.21894,
+            "98": 9.17192,
+            "99": 9.16446,
             "100": 9.14816
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 2570.0,
+            "2": 1923.0,
+            "3": 1512.0,
+            "4": 2322.0,
             "5": 2033.0,
+            "6": 1774.0,
+            "7": 2781.0,
+            "8": 2460.0,
+            "9": 2308.0,
             "10": 2635.0,
+            "11": 2397.0,
+            "12": 1817.0,
+            "13": 2348.0,
+            "14": 2749.0,
             "15": 2027.0,
+            "16": 2719.0,
+            "17": 2487.0,
+            "18": 2533.0,
+            "19": 2547.0,
             "20": 2850.0,
+            "21": 1990.0,
+            "22": 2884.0,
+            "23": 2857.0,
+            "24": 2685.0,
             "25": 2514.0,
+            "26": 2958.0,
+            "27": 2673.0,
+            "28": 2723.0,
+            "29": 2571.0,
             "30": 2858.0,
+            "31": 2157.0,
+            "32": 2357.0,
+            "33": 2242.0,
+            "34": 2464.0,
             "35": 2544.0,
+            "36": 2933.0,
+            "37": 3293.0,
+            "38": 2730.0,
+            "39": 2795.0,
             "40": 3310.0,
+            "41": 1816.0,
+            "42": 1467.0,
+            "43": 1817.0,
+            "44": 2633.0,
             "45": 3576.0,
+            "46": 3015.0,
+            "47": 2805.0,
+            "48": 3071.0,
+            "49": 2974.0,
             "50": 2267.0,
+            "51": 1923.0,
+            "52": 2515.0,
+            "53": 3615.0,
+            "54": 3426.0,
             "55": 3436.0,
+            "56": 4411.0,
+            "57": 4095.0,
+            "58": 4308.0,
+            "59": 1687.0,
             "60": 2431.0,
+            "61": 2151.0,
+            "62": 3986.0,
+            "63": 3558.0,
+            "64": 4286.0,
             "65": 3052.0,
+            "66": 1720.0,
+            "67": 1910.0,
+            "68": 4193.0,
+            "69": 4347.0,
             "70": 4596.0,
+            "71": 2078.0,
+            "72": 4406.0,
+            "73": 4062.0,
+            "74": 3358.0,
             "75": 4606.0,
+            "76": 2187.0,
+            "77": 4854.0,
+            "78": 4098.0,
+            "79": 2652.0,
             "80": 3776.0,
+            "81": 3550.0,
+            "82": 3031.0,
+            "83": 5345.0,
+            "84": 4396.0,
             "85": 4354.0,
+            "86": 3332.0,
+            "87": 4815.0,
+            "88": 3303.0,
+            "89": 4611.0,
             "90": 4346.0,
+            "91": 4361.0,
+            "92": 3502.0,
+            "93": 5624.0,
+            "94": 3733.0,
             "95": 4728.0,
+            "96": 3534.0,
+            "97": 3873.0,
+            "98": 4525.0,
+            "99": 4329.0,
             "100": 3365.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 1784014336.0,
+            "2": 1784014336.0,
+            "3": 1784014336.0,
+            "4": 1784014336.0,
             "5": 1784014336.0,
+            "6": 1784014336.0,
+            "7": 1784014336.0,
+            "8": 1784014336.0,
+            "9": 1784014336.0,
             "10": 1784014336.0,
+            "11": 1784014336.0,
+            "12": 1784014336.0,
+            "13": 1784014336.0,
+            "14": 1784014336.0,
             "15": 1784014336.0,
+            "16": 1784014336.0,
+            "17": 1784014336.0,
+            "18": 1784014336.0,
+            "19": 1784014336.0,
             "20": 1784014336.0,
+            "21": 1784014336.0,
+            "22": 1784014336.0,
+            "23": 1784014336.0,
+            "24": 1784014336.0,
             "25": 1784014336.0,
+            "26": 1784014336.0,
+            "27": 1784014336.0,
+            "28": 1784014336.0,
+            "29": 1784014336.0,
             "30": 1784014336.0,
+            "31": 1784014336.0,
+            "32": 1784014336.0,
+            "33": 1784014336.0,
+            "34": 1784014336.0,
             "35": 1784014336.0,
+            "36": 1784014336.0,
+            "37": 1784014336.0,
+            "38": 1784014336.0,
+            "39": 1784014336.0,
             "40": 1784014336.0,
+            "41": 1784014336.0,
+            "42": 1784014336.0,
+            "43": 1784014336.0,
+            "44": 1784014336.0,
             "45": 1784014336.0,
+            "46": 1784014336.0,
+            "47": 1784014336.0,
+            "48": 1784014336.0,
+            "49": 1784014336.0,
             "50": 1784014336.0,
+            "51": 1784014336.0,
+            "52": 1784014336.0,
+            "53": 1784014336.0,
+            "54": 1784014336.0,
             "55": 1784014336.0,
+            "56": 1784014336.0,
+            "57": 1784014336.0,
+            "58": 1784014336.0,
+            "59": 1784014336.0,
             "60": 1784014336.0,
+            "61": 1784014336.0,
+            "62": 1784014336.0,
+            "63": 1784014336.0,
+            "64": 1784014336.0,
             "65": 1784014336.0,
+            "66": 1784014336.0,
+            "67": 1784014336.0,
+            "68": 1784014336.0,
+            "69": 1784014336.0,
             "70": 1784014336.0,
+            "71": 1784014336.0,
+            "72": 1784014336.0,
+            "73": 1784014336.0,
+            "74": 1784014336.0,
             "75": 1784014336.0,
+            "76": 1784014336.0,
+            "77": 1784014336.0,
+            "78": 1784014336.0,
+            "79": 1784014336.0,
             "80": 1784014336.0,
+            "81": 1784014336.0,
+            "82": 1784014336.0,
+            "83": 1784014336.0,
+            "84": 1784014336.0,
             "85": 1784014336.0,
+            "86": 1784014336.0,
+            "87": 1784014336.0,
+            "88": 1784014336.0,
+            "89": 1784014336.0,
             "90": 1784014336.0,
+            "91": 1784014336.0,
+            "92": 1784014336.0,
+            "93": 1784014336.0,
+            "94": 1784014336.0,
             "95": 1784014336.0,
+            "96": 1784014336.0,
+            "97": 1784014336.0,
+            "98": 1784014336.0,
+            "99": 1784014336.0,
             "100": 1784014336.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 2365860864.0,
+            "2": 3108323328.0,
+            "3": 3108323328.0,
+            "4": 3108323328.0,
             "5": 3108323328.0,
+            "6": 3108323328.0,
+            "7": 3108323328.0,
+            "8": 3108323328.0,
+            "9": 3108323328.0,
             "10": 3108323328.0,
+            "11": 3108323328.0,
+            "12": 3108323328.0,
+            "13": 3108323328.0,
+            "14": 3108323328.0,
             "15": 3108323328.0,
+            "16": 3108323328.0,
+            "17": 3108323328.0,
+            "18": 3108323328.0,
+            "19": 3108323328.0,
             "20": 3108323328.0,
+            "21": 3108323328.0,
+            "22": 3108323328.0,
+            "23": 3108323328.0,
+            "24": 3108323328.0,
             "25": 3108323328.0,
+            "26": 3108323328.0,
+            "27": 3108323328.0,
+            "28": 3108323328.0,
+            "29": 3108323328.0,
             "30": 3108323328.0,
+            "31": 3108323328.0,
+            "32": 3108323328.0,
+            "33": 3108323328.0,
+            "34": 3108323328.0,
             "35": 3108323328.0,
+            "36": 3108323328.0,
+            "37": 3108323328.0,
+            "38": 3108323328.0,
+            "39": 3108323328.0,
             "40": 3108323328.0,
+            "41": 3108323328.0,
+            "42": 3108323328.0,
+            "43": 3108323328.0,
+            "44": 3108323328.0,
             "45": 3108323328.0,
+            "46": 3108323328.0,
+            "47": 3108323328.0,
+            "48": 3108323328.0,
+            "49": 3108323328.0,
             "50": 3108323328.0,
+            "51": 3108323328.0,
+            "52": 3108323328.0,
+            "53": 3108323328.0,
+            "54": 3108323328.0,
             "55": 3108323328.0,
+            "56": 3108323328.0,
+            "57": 3108323328.0,
+            "58": 3108323328.0,
+            "59": 3108323328.0,
             "60": 3108323328.0,
+            "61": 3108323328.0,
+            "62": 3108323328.0,
+            "63": 3108323328.0,
+            "64": 3108323328.0,
             "65": 3108323328.0,
+            "66": 3108323328.0,
+            "67": 3108323328.0,
+            "68": 3108323328.0,
+            "69": 3108323328.0,
             "70": 3108323328.0,
+            "71": 3108323328.0,
+            "72": 3108323328.0,
+            "73": 3108323328.0,
+            "74": 3108323328.0,
             "75": 3108323328.0,
+            "76": 3108323328.0,
+            "77": 3108323328.0,
+            "78": 3108323328.0,
+            "79": 3108323328.0,
             "80": 3108323328.0,
+            "81": 3108323328.0,
+            "82": 3108323328.0,
+            "83": 3108323328.0,
+            "84": 3108323328.0,
             "85": 3108323328.0,
+            "86": 3108323328.0,
+            "87": 3108323328.0,
+            "88": 3108323328.0,
+            "89": 3108323328.0,
             "90": 3108323328.0,
+            "91": 3108323328.0,
+            "92": 3108323328.0,
+            "93": 3108323328.0,
+            "94": 3108323328.0,
             "95": 3108323328.0,
+            "96": 3108323328.0,
+            "97": 3108323328.0,
+            "98": 3108323328.0,
+            "99": 3108323328.0,
             "100": 3108323328.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 11.15622,
-            "5": 0.89876,
-            "10": 0.89356,
-            "15": 0.87954,
-            "20": 0.86205,
-            "25": 0.856,
-            "30": 0.88843,
-            "35": 0.85722,
-            "40": 0.87142,
-            "45": 1.00082,
-            "50": 1.22422,
-            "55": 1.51231,
-            "60": 0.8651,
-            "65": 0.85577,
-            "70": 0.86627,
-            "75": 0.94057,
-            "80": 0.86318,
-            "85": 1.18974,
-            "90": 0.85756,
-            "95": 0.85398,
-            "100": 0.85745
+            "1": 12.25998,
+            "2": 1.04599,
+            "3": 1.00983,
+            "4": 1.01193,
+            "5": 1.01326,
+            "6": 1.01181,
+            "7": 1.01264,
+            "8": 1.01822,
+            "9": 1.02424,
+            "10": 1.0191,
+            "11": 1.01303,
+            "12": 1.00485,
+            "13": 1.0025,
+            "14": 1.00999,
+            "15": 1.00956,
+            "16": 1.00094,
+            "17": 1.00769,
+            "18": 1.01014,
+            "19": 1.01639,
+            "20": 1.22304,
+            "21": 1.4851,
+            "22": 1.19412,
+            "23": 1.01165,
+            "24": 1.0106,
+            "25": 1.01512,
+            "26": 1.00595,
+            "27": 1.01769,
+            "28": 1.01182,
+            "29": 1.00676,
+            "30": 1.00481,
+            "31": 1.1042,
+            "32": 1.00908,
+            "33": 1.01083,
+            "34": 1.00353,
+            "35": 1.00454,
+            "36": 1.00641,
+            "37": 1.00279,
+            "38": 1.00471,
+            "39": 1.00143,
+            "40": 1.00802,
+            "41": 1.00755,
+            "42": 1.00913,
+            "43": 1.00814,
+            "44": 1.00935,
+            "45": 1.00635,
+            "46": 1.01076,
+            "47": 1.01077,
+            "48": 1.14065,
+            "49": 1.24856,
+            "50": 1.09012,
+            "51": 1.03825,
+            "52": 1.44742,
+            "53": 1.3184,
+            "54": 1.01374,
+            "55": 1.01506,
+            "56": 1.01099,
+            "57": 1.04106,
+            "58": 1.02232,
+            "59": 1.01748,
+            "60": 1.00992,
+            "61": 1.02073,
+            "62": 1.02809,
+            "63": 1.34383,
+            "64": 1.38941,
+            "65": 1.10673,
+            "66": 1.01505,
+            "67": 1.00839,
+            "68": 1.00645,
+            "69": 1.01066,
+            "70": 1.01137,
+            "71": 1.35475,
+            "72": 1.02215,
+            "73": 1.0187,
+            "74": 1.01939,
+            "75": 1.10218,
+            "76": 1.12059,
+            "77": 1.12057,
+            "78": 1.03631,
+            "79": 1.12601,
+            "80": 1.33494,
+            "81": 1.09935,
+            "82": 1.06264,
+            "83": 1.31187,
+            "84": 1.0139,
+            "85": 1.00708,
+            "86": 1.02816,
+            "87": 1.02033,
+            "88": 1.01728,
+            "89": 1.2628,
+            "90": 1.01941,
+            "91": 1.01944,
+            "92": 1.0295,
+            "93": 1.01897,
+            "94": 1.01663,
+            "95": 1.02386,
+            "96": 1.00901,
+            "97": 1.00751,
+            "98": 1.0074,
+            "99": 1.00366,
+            "100": 1.00628
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..137f195264d
--- /dev/null
+++ b/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.48367,
+            "2": 10.48426,
+            "3": 10.48254,
+            "4": 10.48311,
+            "5": 10.4764,
+            "6": 10.4844,
+            "7": 10.48458,
+            "8": 10.48829,
+            "9": 10.49008,
+            "10": 10.47268,
+            "11": 10.47256,
+            "12": 10.48259,
+            "13": 10.47857,
+            "14": 10.45154,
+            "15": 10.47925,
+            "16": 10.45346,
+            "17": 10.45145,
+            "18": 10.46238,
+            "19": 10.44113,
+            "20": 10.45448,
+            "21": 10.43454,
+            "22": 10.40592,
+            "23": 10.39961,
+            "24": 10.37579,
+            "25": 10.38182,
+            "26": 10.35147,
+            "27": 10.35388,
+            "28": 10.34937,
+            "29": 10.28711,
+            "30": 10.21159,
+            "31": 10.1726,
+            "32": 10.13421,
+            "33": 10.14744,
+            "34": 10.10737,
+            "35": 10.10581,
+            "36": 10.08735,
+            "37": 10.08157,
+            "38": 10.07233,
+            "39": 10.00094,
+            "40": 9.98143,
+            "41": 9.92541,
+            "42": 9.87527,
+            "43": 9.88711,
+            "44": 9.80642,
+            "45": 9.82325,
+            "46": 9.73785,
+            "47": 9.74817,
+            "48": 9.71609,
+            "49": 9.74484,
+            "50": 9.72982,
+            "51": 9.71485,
+            "52": 9.66475,
+            "53": 9.60919,
+            "54": 9.62705,
+            "55": 9.61012,
+            "56": 9.617,
+            "57": 9.56786,
+            "58": 9.52731,
+            "59": 9.51668,
+            "60": 9.51865,
+            "61": 9.53132,
+            "62": 9.45016,
+            "63": 9.45725,
+            "64": 9.43435,
+            "65": 9.45801,
+            "66": 9.4368,
+            "67": 9.3968,
+            "68": 9.36474,
+            "69": 9.4095,
+            "70": 9.376,
+            "71": 9.41716,
+            "72": 9.42574,
+            "73": 9.37581,
+            "74": 9.41547,
+            "75": 9.37891,
+            "76": 9.28017,
+            "77": 9.32205,
+            "78": 9.35754,
+            "79": 9.32162,
+            "80": 9.31486,
+            "81": 9.2678,
+            "82": 9.34178,
+            "83": 9.32145,
+            "84": 9.24785,
+            "85": 9.35023,
+            "86": 9.22392,
+            "87": 9.3062,
+            "88": 9.29891,
+            "89": 9.22716,
+            "90": 9.28483,
+            "91": 9.23109,
+            "92": 9.27463,
+            "93": 9.19241,
+            "94": 9.23984,
+            "95": 9.28006,
+            "96": 9.17526,
+            "97": 9.21894,
+            "98": 9.17192,
+            "99": 9.16446,
+            "100": 9.14816
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2570.0,
+            "2": 1923.0,
+            "3": 1512.0,
+            "4": 2322.0,
+            "5": 2033.0,
+            "6": 1774.0,
+            "7": 2781.0,
+            "8": 2460.0,
+            "9": 2308.0,
+            "10": 2635.0,
+            "11": 2397.0,
+            "12": 1817.0,
+            "13": 2348.0,
+            "14": 2749.0,
+            "15": 2027.0,
+            "16": 2719.0,
+            "17": 2487.0,
+            "18": 2533.0,
+            "19": 2547.0,
+            "20": 2850.0,
+            "21": 1990.0,
+            "22": 2884.0,
+            "23": 2857.0,
+            "24": 2685.0,
+            "25": 2514.0,
+            "26": 2958.0,
+            "27": 2673.0,
+            "28": 2723.0,
+            "29": 2571.0,
+            "30": 2858.0,
+            "31": 2157.0,
+            "32": 2357.0,
+            "33": 2242.0,
+            "34": 2464.0,
+            "35": 2544.0,
+            "36": 2933.0,
+            "37": 3293.0,
+            "38": 2730.0,
+            "39": 2795.0,
+            "40": 3310.0,
+            "41": 1816.0,
+            "42": 1467.0,
+            "43": 1817.0,
+            "44": 2633.0,
+            "45": 3576.0,
+            "46": 3015.0,
+            "47": 2805.0,
+            "48": 3071.0,
+            "49": 2974.0,
+            "50": 2267.0,
+            "51": 1923.0,
+            "52": 2515.0,
+            "53": 3615.0,
+            "54": 3426.0,
+            "55": 3436.0,
+            "56": 4411.0,
+            "57": 4095.0,
+            "58": 4308.0,
+            "59": 1687.0,
+            "60": 2431.0,
+            "61": 2151.0,
+            "62": 3986.0,
+            "63": 3558.0,
+            "64": 4286.0,
+            "65": 3052.0,
+            "66": 1720.0,
+            "67": 1910.0,
+            "68": 4193.0,
+            "69": 4347.0,
+            "70": 4596.0,
+            "71": 2078.0,
+            "72": 4406.0,
+            "73": 4062.0,
+            "74": 3358.0,
+            "75": 4606.0,
+            "76": 2187.0,
+            "77": 4854.0,
+            "78": 4098.0,
+            "79": 2652.0,
+            "80": 3776.0,
+            "81": 3550.0,
+            "82": 3031.0,
+            "83": 5345.0,
+            "84": 4396.0,
+            "85": 4354.0,
+            "86": 3332.0,
+            "87": 4815.0,
+            "88": 3303.0,
+            "89": 4611.0,
+            "90": 4346.0,
+            "91": 4361.0,
+            "92": 3502.0,
+            "93": 5624.0,
+            "94": 3733.0,
+            "95": 4728.0,
+            "96": 3534.0,
+            "97": 3873.0,
+            "98": 4525.0,
+            "99": 4329.0,
+            "100": 3365.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1784014336.0,
+            "2": 1784014336.0,
+            "3": 1784014336.0,
+            "4": 1784014336.0,
+            "5": 1784014336.0,
+            "6": 1784014336.0,
+            "7": 1784014336.0,
+            "8": 1784014336.0,
+            "9": 1784014336.0,
+            "10": 1784014336.0,
+            "11": 1784014336.0,
+            "12": 1784014336.0,
+            "13": 1784014336.0,
+            "14": 1784014336.0,
+            "15": 1784014336.0,
+            "16": 1784014336.0,
+            "17": 1784014336.0,
+            "18": 1784014336.0,
+            "19": 1784014336.0,
+            "20": 1784014336.0,
+            "21": 1784014336.0,
+            "22": 1784014336.0,
+            "23": 1784014336.0,
+            "24": 1784014336.0,
+            "25": 1784014336.0,
+            "26": 1784014336.0,
+            "27": 1784014336.0,
+            "28": 1784014336.0,
+            "29": 1784014336.0,
+            "30": 1784014336.0,
+            "31": 1784014336.0,
+            "32": 1784014336.0,
+            "33": 1784014336.0,
+            "34": 1784014336.0,
+            "35": 1784014336.0,
+            "36": 1784014336.0,
+            "37": 1784014336.0,
+            "38": 1784014336.0,
+            "39": 1784014336.0,
+            "40": 1784014336.0,
+            "41": 1784014336.0,
+            "42": 1784014336.0,
+            "43": 1784014336.0,
+            "44": 1784014336.0,
+            "45": 1784014336.0,
+            "46": 1784014336.0,
+            "47": 1784014336.0,
+            "48": 1784014336.0,
+            "49": 1784014336.0,
+            "50": 1784014336.0,
+            "51": 1784014336.0,
+            "52": 1784014336.0,
+            "53": 1784014336.0,
+            "54": 1784014336.0,
+            "55": 1784014336.0,
+            "56": 1784014336.0,
+            "57": 1784014336.0,
+            "58": 1784014336.0,
+            "59": 1784014336.0,
+            "60": 1784014336.0,
+            "61": 1784014336.0,
+            "62": 1784014336.0,
+            "63": 1784014336.0,
+            "64": 1784014336.0,
+            "65": 1784014336.0,
+            "66": 1784014336.0,
+            "67": 1784014336.0,
+            "68": 1784014336.0,
+            "69": 1784014336.0,
+            "70": 1784014336.0,
+            "71": 1784014336.0,
+            "72": 1784014336.0,
+            "73": 1784014336.0,
+            "74": 1784014336.0,
+            "75": 1784014336.0,
+            "76": 1784014336.0,
+            "77": 1784014336.0,
+            "78": 1784014336.0,
+            "79": 1784014336.0,
+            "80": 1784014336.0,
+            "81": 1784014336.0,
+            "82": 1784014336.0,
+            "83": 1784014336.0,
+            "84": 1784014336.0,
+            "85": 1784014336.0,
+            "86": 1784014336.0,
+            "87": 1784014336.0,
+            "88": 1784014336.0,
+            "89": 1784014336.0,
+            "90": 1784014336.0,
+            "91": 1784014336.0,
+            "92": 1784014336.0,
+            "93": 1784014336.0,
+            "94": 1784014336.0,
+            "95": 1784014336.0,
+            "96": 1784014336.0,
+            "97": 1784014336.0,
+            "98": 1784014336.0,
+            "99": 1784014336.0,
+            "100": 1784014336.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2365860864.0,
+            "2": 3108323328.0,
+            "3": 3108323328.0,
+            "4": 3108323328.0,
+            "5": 3108323328.0,
+            "6": 3108323328.0,
+            "7": 3108323328.0,
+            "8": 3108323328.0,
+            "9": 3108323328.0,
+            "10": 3108845568.0,
+            "11": 3108845568.0,
+            "12": 3108845568.0,
+            "13": 3108845568.0,
+            "14": 3108845568.0,
+            "15": 3108845568.0,
+            "16": 3108845568.0,
+            "17": 3108845568.0,
+            "18": 3108845568.0,
+            "19": 3108845568.0,
+            "20": 3108845568.0,
+            "21": 3108845568.0,
+            "22": 3108845568.0,
+            "23": 3108845568.0,
+            "24": 3108845568.0,
+            "25": 3108845568.0,
+            "26": 3108845568.0,
+            "27": 3108845568.0,
+            "28": 3108845568.0,
+            "29": 3108845568.0,
+            "30": 3108845568.0,
+            "31": 3108845568.0,
+            "32": 3108845568.0,
+            "33": 3108845568.0,
+            "34": 3108845568.0,
+            "35": 3108845568.0,
+            "36": 3108845568.0,
+            "37": 3108846080.0,
+            "38": 3108846080.0,
+            "39": 3108846080.0,
+            "40": 3108846080.0,
+            "41": 3108846080.0,
+            "42": 3108846080.0,
+            "43": 3108846080.0,
+            "44": 3108846080.0,
+            "45": 3108846080.0,
+            "46": 3108846080.0,
+            "47": 3108846080.0,
+            "48": 3108846080.0,
+            "49": 3108846080.0,
+            "50": 3108846080.0,
+            "51": 3108846080.0,
+            "52": 3108846080.0,
+            "53": 3108846080.0,
+            "54": 3108846080.0,
+            "55": 3108846080.0,
+            "56": 3108846080.0,
+            "57": 3108846080.0,
+            "58": 3108846080.0,
+            "59": 3108846080.0,
+            "60": 3108846080.0,
+            "61": 3108846080.0,
+            "62": 3108847616.0,
+            "63": 3108847616.0,
+            "64": 3108847616.0,
+            "65": 3108847616.0,
+            "66": 3108847616.0,
+            "67": 3108847616.0,
+            "68": 3108847616.0,
+            "69": 3108847616.0,
+            "70": 3108847616.0,
+            "71": 3108847616.0,
+            "72": 3108847616.0,
+            "73": 3108847616.0,
+            "74": 3108847616.0,
+            "75": 3108847616.0,
+            "76": 3108847616.0,
+            "77": 3108847616.0,
+            "78": 3108847616.0,
+            "79": 3108847616.0,
+            "80": 3108847616.0,
+            "81": 3108847616.0,
+            "82": 3108847616.0,
+            "83": 3108847616.0,
+            "84": 3108847616.0,
+            "85": 3108847616.0,
+            "86": 3108847616.0,
+            "87": 3108847616.0,
+            "88": 3108847616.0,
+            "89": 3108847616.0,
+            "90": 3108847616.0,
+            "91": 3108847616.0,
+            "92": 3108847616.0,
+            "93": 3108847616.0,
+            "94": 3108847616.0,
+            "95": 3108847616.0,
+            "96": 3108847616.0,
+            "97": 3108847616.0,
+            "98": 3108847616.0,
+            "99": 3108847616.0,
+            "100": 3108847616.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 13.09913,
+            "2": 1.02984,
+            "3": 0.9509,
+            "4": 0.92961,
+            "5": 0.88057,
+            "6": 0.86499,
+            "7": 0.87435,
+            "8": 0.87748,
+            "9": 0.88481,
+            "10": 0.87813,
+            "11": 0.88937,
+            "12": 0.91092,
+            "13": 0.85441,
+            "14": 0.87519,
+            "15": 0.89434,
+            "16": 1.08771,
+            "17": 0.87461,
+            "18": 0.8785,
+            "19": 1.08419,
+            "20": 1.00138,
+            "21": 0.98051,
+            "22": 1.32806,
+            "23": 0.85982,
+            "24": 0.88387,
+            "25": 0.88245,
+            "26": 0.87335,
+            "27": 0.88317,
+            "28": 0.88985,
+            "29": 0.895,
+            "30": 0.87281,
+            "31": 0.88109,
+            "32": 0.87358,
+            "33": 0.89681,
+            "34": 0.91049,
+            "35": 0.89763,
+            "36": 0.89169,
+            "37": 0.89357,
+            "38": 0.89732,
+            "39": 0.88241,
+            "40": 0.90292,
+            "41": 0.88715,
+            "42": 0.90721,
+            "43": 1.00024,
+            "44": 1.05261,
+            "45": 0.88589,
+            "46": 0.89065,
+            "47": 1.19824,
+            "48": 1.03763,
+            "49": 0.88362,
+            "50": 2.54681,
+            "51": 0.88554,
+            "52": 1.29624,
+            "53": 0.90469,
+            "54": 1.25859,
+            "55": 0.8959,
+            "56": 0.89223,
+            "57": 0.91307,
+            "58": 0.9046,
+            "59": 0.90217,
+            "60": 1.19764,
+            "61": 0.96385,
+            "62": 1.26273,
+            "63": 1.00365,
+            "64": 0.95065,
+            "65": 0.87723,
+            "66": 0.87675,
+            "67": 0.8752,
+            "68": 1.1677,
+            "69": 0.87584,
+            "70": 0.88581,
+            "71": 1.19607,
+            "72": 0.88789,
+            "73": 1.11276,
+            "74": 0.89256,
+            "75": 0.8887,
+            "76": 1.28091,
+            "77": 0.93746,
+            "78": 0.87892,
+            "79": 1.07934,
+            "80": 0.88837,
+            "81": 0.87726,
+            "82": 0.87655,
+            "83": 0.89632,
+            "84": 0.90579,
+            "85": 0.88535,
+            "86": 0.8924,
+            "87": 0.8763,
+            "88": 0.8769,
+            "89": 0.87952,
+            "90": 0.89745,
+            "91": 0.8736,
+            "92": 0.8825,
+            "93": 0.8845,
+            "94": 0.87495,
+            "95": 0.88075,
+            "96": 0.94076,
+            "97": 0.87753,
+            "98": 0.88407,
+            "99": 0.89106,
+            "100": 0.88092
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..dc5d31f8f8b
--- /dev/null
+++ b/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.48367,
+            "2": 10.48426,
+            "3": 10.48254,
+            "4": 10.48311,
+            "5": 10.4764,
+            "6": 10.4844,
+            "7": 10.48458,
+            "8": 10.48829,
+            "9": 10.49008,
+            "10": 10.47268,
+            "11": 10.47256,
+            "12": 10.48259,
+            "13": 10.47857,
+            "14": 10.45154,
+            "15": 10.47925,
+            "16": 10.45346,
+            "17": 10.45145,
+            "18": 10.46238,
+            "19": 10.44113,
+            "20": 10.45448,
+            "21": 10.43454,
+            "22": 10.40592,
+            "23": 10.39961,
+            "24": 10.37579,
+            "25": 10.38182,
+            "26": 10.35147,
+            "27": 10.35388,
+            "28": 10.34937,
+            "29": 10.28711,
+            "30": 10.21159,
+            "31": 10.1726,
+            "32": 10.13421,
+            "33": 10.14744,
+            "34": 10.10737,
+            "35": 10.10581,
+            "36": 10.08735,
+            "37": 10.08157,
+            "38": 10.07233,
+            "39": 10.00094,
+            "40": 9.98143,
+            "41": 9.92541,
+            "42": 9.87527,
+            "43": 9.88711,
+            "44": 9.80642,
+            "45": 9.82325,
+            "46": 9.73785,
+            "47": 9.74817,
+            "48": 9.71609,
+            "49": 9.74484,
+            "50": 9.72982,
+            "51": 9.71485,
+            "52": 9.66475,
+            "53": 9.60919,
+            "54": 9.62705,
+            "55": 9.61012,
+            "56": 9.617,
+            "57": 9.56786,
+            "58": 9.52731,
+            "59": 9.51668,
+            "60": 9.51865,
+            "61": 9.53132,
+            "62": 9.45016,
+            "63": 9.45725,
+            "64": 9.43435,
+            "65": 9.45801,
+            "66": 9.4368,
+            "67": 9.3968,
+            "68": 9.36474,
+            "69": 9.4095,
+            "70": 9.376,
+            "71": 9.41716,
+            "72": 9.42574,
+            "73": 9.37581,
+            "74": 9.41547,
+            "75": 9.37891,
+            "76": 9.28017,
+            "77": 9.32205,
+            "78": 9.35754,
+            "79": 9.32162,
+            "80": 9.31486,
+            "81": 9.2678,
+            "82": 9.34178,
+            "83": 9.32145,
+            "84": 9.24785,
+            "85": 9.35023,
+            "86": 9.22392,
+            "87": 9.3062,
+            "88": 9.29891,
+            "89": 9.22716,
+            "90": 9.28483,
+            "91": 9.23109,
+            "92": 9.27463,
+            "93": 9.19241,
+            "94": 9.23984,
+            "95": 9.28006,
+            "96": 9.17526,
+            "97": 9.21894,
+            "98": 9.17192,
+            "99": 9.16446,
+            "100": 9.14816
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2570.0,
+            "2": 1923.0,
+            "3": 1512.0,
+            "4": 2322.0,
+            "5": 2033.0,
+            "6": 1774.0,
+            "7": 2781.0,
+            "8": 2460.0,
+            "9": 2308.0,
+            "10": 2635.0,
+            "11": 2397.0,
+            "12": 1817.0,
+            "13": 2348.0,
+            "14": 2749.0,
+            "15": 2027.0,
+            "16": 2719.0,
+            "17": 2487.0,
+            "18": 2533.0,
+            "19": 2547.0,
+            "20": 2850.0,
+            "21": 1990.0,
+            "22": 2884.0,
+            "23": 2857.0,
+            "24": 2685.0,
+            "25": 2514.0,
+            "26": 2958.0,
+            "27": 2673.0,
+            "28": 2723.0,
+            "29": 2571.0,
+            "30": 2858.0,
+            "31": 2157.0,
+            "32": 2357.0,
+            "33": 2242.0,
+            "34": 2464.0,
+            "35": 2544.0,
+            "36": 2933.0,
+            "37": 3293.0,
+            "38": 2730.0,
+            "39": 2795.0,
+            "40": 3310.0,
+            "41": 1816.0,
+            "42": 1467.0,
+            "43": 1817.0,
+            "44": 2633.0,
+            "45": 3576.0,
+            "46": 3015.0,
+            "47": 2805.0,
+            "48": 3071.0,
+            "49": 2974.0,
+            "50": 2267.0,
+            "51": 1923.0,
+            "52": 2515.0,
+            "53": 3615.0,
+            "54": 3426.0,
+            "55": 3436.0,
+            "56": 4411.0,
+            "57": 4095.0,
+            "58": 4308.0,
+            "59": 1687.0,
+            "60": 2431.0,
+            "61": 2151.0,
+            "62": 3986.0,
+            "63": 3558.0,
+            "64": 4286.0,
+            "65": 3052.0,
+            "66": 1720.0,
+            "67": 1910.0,
+            "68": 4193.0,
+            "69": 4347.0,
+            "70": 4596.0,
+            "71": 2078.0,
+            "72": 4406.0,
+            "73": 4062.0,
+            "74": 3358.0,
+            "75": 4606.0,
+            "76": 2187.0,
+            "77": 4854.0,
+            "78": 4098.0,
+            "79": 2652.0,
+            "80": 3776.0,
+            "81": 3550.0,
+            "82": 3031.0,
+            "83": 5345.0,
+            "84": 4396.0,
+            "85": 4354.0,
+            "86": 3332.0,
+            "87": 4815.0,
+            "88": 3303.0,
+            "89": 4611.0,
+            "90": 4346.0,
+            "91": 4361.0,
+            "92": 3502.0,
+            "93": 5624.0,
+            "94": 3733.0,
+            "95": 4728.0,
+            "96": 3534.0,
+            "97": 3873.0,
+            "98": 4525.0,
+            "99": 4329.0,
+            "100": 3365.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1784014336.0,
+            "2": 1784014336.0,
+            "3": 1784014336.0,
+            "4": 1784014336.0,
+            "5": 1784014336.0,
+            "6": 1784014336.0,
+            "7": 1784014336.0,
+            "8": 1784014336.0,
+            "9": 1784014336.0,
+            "10": 1784014336.0,
+            "11": 1784014336.0,
+            "12": 1784014336.0,
+            "13": 1784014336.0,
+            "14": 1784014336.0,
+            "15": 1784014336.0,
+            "16": 1784014336.0,
+            "17": 1784014336.0,
+            "18": 1784014336.0,
+            "19": 1784014336.0,
+            "20": 1784014336.0,
+            "21": 1784014336.0,
+            "22": 1784014336.0,
+            "23": 1784014336.0,
+            "24": 1784014336.0,
+            "25": 1784014336.0,
+            "26": 1784014336.0,
+            "27": 1784014336.0,
+            "28": 1784014336.0,
+            "29": 1784014336.0,
+            "30": 1784014336.0,
+            "31": 1784014336.0,
+            "32": 1784014336.0,
+            "33": 1784014336.0,
+            "34": 1784014336.0,
+            "35": 1784014336.0,
+            "36": 1784014336.0,
+            "37": 1784014336.0,
+            "38": 1784014336.0,
+            "39": 1784014336.0,
+            "40": 1784014336.0,
+            "41": 1784014336.0,
+            "42": 1784014336.0,
+            "43": 1784014336.0,
+            "44": 1784014336.0,
+            "45": 1784014336.0,
+            "46": 1784014336.0,
+            "47": 1784014336.0,
+            "48": 1784014336.0,
+            "49": 1784014336.0,
+            "50": 1784014336.0,
+            "51": 1784014336.0,
+            "52": 1784014336.0,
+            "53": 1784014336.0,
+            "54": 1784014336.0,
+            "55": 1784014336.0,
+            "56": 1784014336.0,
+            "57": 1784014336.0,
+            "58": 1784014336.0,
+            "59": 1784014336.0,
+            "60": 1784014336.0,
+            "61": 1784014336.0,
+            "62": 1784014336.0,
+            "63": 1784014336.0,
+            "64": 1784014336.0,
+            "65": 1784014336.0,
+            "66": 1784014336.0,
+            "67": 1784014336.0,
+            "68": 1784014336.0,
+            "69": 1784014336.0,
+            "70": 1784014336.0,
+            "71": 1784014336.0,
+            "72": 1784014336.0,
+            "73": 1784014336.0,
+            "74": 1784014336.0,
+            "75": 1784014336.0,
+            "76": 1784014336.0,
+            "77": 1784014336.0,
+            "78": 1784014336.0,
+            "79": 1784014336.0,
+            "80": 1784014336.0,
+            "81": 1784014336.0,
+            "82": 1784014336.0,
+            "83": 1784014336.0,
+            "84": 1784014336.0,
+            "85": 1784014336.0,
+            "86": 1784014336.0,
+            "87": 1784014336.0,
+            "88": 1784014336.0,
+            "89": 1784014336.0,
+            "90": 1784014336.0,
+            "91": 1784014336.0,
+            "92": 1784014336.0,
+            "93": 1784014336.0,
+            "94": 1784014336.0,
+            "95": 1784014336.0,
+            "96": 1784014336.0,
+            "97": 1784014336.0,
+            "98": 1784014336.0,
+            "99": 1784014336.0,
+            "100": 1784014336.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2365860864.0,
+            "2": 3108323328.0,
+            "3": 3108323328.0,
+            "4": 3108323328.0,
+            "5": 3108323328.0,
+            "6": 3108323328.0,
+            "7": 3108323328.0,
+            "8": 3108323328.0,
+            "9": 3108323328.0,
+            "10": 3108323328.0,
+            "11": 3108323328.0,
+            "12": 3108323328.0,
+            "13": 3108323328.0,
+            "14": 3108323328.0,
+            "15": 3108323328.0,
+            "16": 3108323328.0,
+            "17": 3108323328.0,
+            "18": 3108323328.0,
+            "19": 3108323328.0,
+            "20": 3108323328.0,
+            "21": 3108323328.0,
+            "22": 3108323328.0,
+            "23": 3108323328.0,
+            "24": 3108323328.0,
+            "25": 3108323328.0,
+            "26": 3108323328.0,
+            "27": 3108323328.0,
+            "28": 3108323328.0,
+            "29": 3108323328.0,
+            "30": 3108323328.0,
+            "31": 3108323328.0,
+            "32": 3108323328.0,
+            "33": 3108323328.0,
+            "34": 3108323328.0,
+            "35": 3108323328.0,
+            "36": 3108323328.0,
+            "37": 3108323328.0,
+            "38": 3108323328.0,
+            "39": 3108323328.0,
+            "40": 3108323328.0,
+            "41": 3108323328.0,
+            "42": 3108323328.0,
+            "43": 3108323328.0,
+            "44": 3108323328.0,
+            "45": 3108323328.0,
+            "46": 3108323328.0,
+            "47": 3108323328.0,
+            "48": 3108323328.0,
+            "49": 3108323328.0,
+            "50": 3108323328.0,
+            "51": 3108323328.0,
+            "52": 3108323328.0,
+            "53": 3108323328.0,
+            "54": 3108323328.0,
+            "55": 3108323328.0,
+            "56": 3108323328.0,
+            "57": 3108842496.0,
+            "58": 3108842496.0,
+            "59": 3108842496.0,
+            "60": 3108842496.0,
+            "61": 3108842496.0,
+            "62": 3108842496.0,
+            "63": 3108842496.0,
+            "64": 3108842496.0,
+            "65": 3108842496.0,
+            "66": 3108842496.0,
+            "67": 3108842496.0,
+            "68": 3108842496.0,
+            "69": 3108842496.0,
+            "70": 3108842496.0,
+            "71": 3108842496.0,
+            "72": 3108842496.0,
+            "73": 3108842496.0,
+            "74": 3108842496.0,
+            "75": 3108844544.0,
+            "76": 3108844544.0,
+            "77": 3108844544.0,
+            "78": 3108844544.0,
+            "79": 3108844544.0,
+            "80": 3108844544.0,
+            "81": 3108844544.0,
+            "82": 3108844544.0,
+            "83": 3108844544.0,
+            "84": 3108844544.0,
+            "85": 3108844544.0,
+            "86": 3108844544.0,
+            "87": 3108844544.0,
+            "88": 3108844544.0,
+            "89": 3108844544.0,
+            "90": 3108844544.0,
+            "91": 3108844544.0,
+            "92": 3108844544.0,
+            "93": 3108844544.0,
+            "94": 3108844544.0,
+            "95": 3108844544.0,
+            "96": 3108844544.0,
+            "97": 3108844544.0,
+            "98": 3108844544.0,
+            "99": 3108844544.0,
+            "100": 3108844544.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 11.84806,
+            "2": 1.03522,
+            "3": 1.00793,
+            "4": 1.00939,
+            "5": 1.00929,
+            "6": 1.01517,
+            "7": 1.01009,
+            "8": 1.01561,
+            "9": 1.02131,
+            "10": 1.01787,
+            "11": 1.01149,
+            "12": 1.0128,
+            "13": 1.01358,
+            "14": 1.01768,
+            "15": 1.23565,
+            "16": 1.01096,
+            "17": 1.19479,
+            "18": 1.01674,
+            "19": 1.01808,
+            "20": 1.23016,
+            "21": 1.01908,
+            "22": 1.11536,
+            "23": 1.0888,
+            "24": 1.02965,
+            "25": 1.03972,
+            "26": 1.00766,
+            "27": 1.00981,
+            "28": 1.01339,
+            "29": 1.01801,
+            "30": 1.01655,
+            "31": 1.01796,
+            "32": 1.01286,
+            "33": 1.01823,
+            "34": 1.00604,
+            "35": 1.01493,
+            "36": 1.01106,
+            "37": 1.00783,
+            "38": 1.01573,
+            "39": 1.01525,
+            "40": 1.09842,
+            "41": 1.39919,
+            "42": 1.22658,
+            "43": 1.00841,
+            "44": 0.99932,
+            "45": 1.00156,
+            "46": 1.18473,
+            "47": 1.01528,
+            "48": 1.00768,
+            "49": 1.00498,
+            "50": 0.9957,
+            "51": 1.29149,
+            "52": 1.10051,
+            "53": 1.00264,
+            "54": 1.00531,
+            "55": 1.30558,
+            "56": 0.99836,
+            "57": 1.00645,
+            "58": 1.00413,
+            "59": 1.00106,
+            "60": 1.00076,
+            "61": 1.32205,
+            "62": 1.00795,
+            "63": 1.2523,
+            "64": 1.01369,
+            "65": 1.01151,
+            "66": 1.01484,
+            "67": 1.00831,
+            "68": 1.01849,
+            "69": 1.01821,
+            "70": 1.01316,
+            "71": 1.01068,
+            "72": 1.01792,
+            "73": 1.47417,
+            "74": 1.01143,
+            "75": 1.14077,
+            "76": 1.01286,
+            "77": 1.08819,
+            "78": 1.01005,
+            "79": 1.0069,
+            "80": 1.01196,
+            "81": 1.0882,
+            "82": 1.00417,
+            "83": 1.29479,
+            "84": 1.0044,
+            "85": 1.0103,
+            "86": 1.00862,
+            "87": 1.01863,
+            "88": 1.2549,
+            "89": 1.0075,
+            "90": 1.00874,
+            "91": 1.0111,
+            "92": 1.01049,
+            "93": 1.01084,
+            "94": 1.01043,
+            "95": 1.01246,
+            "96": 1.01317,
+            "97": 1.09821,
+            "98": 1.01406,
+            "99": 1.00578,
+            "100": 1.09442
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_local_spec_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_local_spec_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
index 0019ac97573..b5f4b597886 100644
--- a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_local_spec_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_local_spec_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
@@ -2,141 +2,536 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 10.4837,
+            "2": 10.48435,
+            "3": 10.48251,
+            "4": 10.48303,
             "5": 10.47647,
+            "6": 10.48423,
+            "7": 10.48457,
+            "8": 10.48837,
+            "9": 10.49003,
             "10": 10.47255,
+            "11": 10.47245,
+            "12": 10.4828,
+            "13": 10.47855,
+            "14": 10.45162,
             "15": 10.47936,
+            "16": 10.45364,
+            "17": 10.45143,
+            "18": 10.46239,
+            "19": 10.44136,
             "20": 10.45438,
+            "21": 10.43469,
+            "22": 10.40587,
+            "23": 10.39982,
+            "24": 10.37585,
             "25": 10.38173,
+            "26": 10.35154,
+            "27": 10.35401,
+            "28": 10.3497,
+            "29": 10.28714,
             "30": 10.21194,
+            "31": 10.17274,
+            "32": 10.13439,
+            "33": 10.14753,
+            "34": 10.10759,
             "35": 10.10592,
+            "36": 10.08756,
+            "37": 10.08177,
+            "38": 10.07257,
+            "39": 10.0013,
             "40": 9.9816,
+            "41": 9.92549,
+            "42": 9.87529,
+            "43": 9.88742,
+            "44": 9.80641,
             "45": 9.82342,
+            "46": 9.73815,
+            "47": 9.74831,
+            "48": 9.71619,
+            "49": 9.74504,
             "50": 9.73004,
+            "51": 9.71503,
+            "52": 9.66484,
+            "53": 9.60935,
+            "54": 9.62735,
             "55": 9.61036,
+            "56": 9.61745,
+            "57": 9.56794,
+            "58": 9.52742,
+            "59": 9.51685,
             "60": 9.51873,
+            "61": 9.53147,
+            "62": 9.45024,
+            "63": 9.45733,
+            "64": 9.43455,
             "65": 9.4582,
+            "66": 9.43694,
+            "67": 9.39693,
+            "68": 9.36491,
+            "69": 9.40957,
             "70": 9.37605,
+            "71": 9.41735,
+            "72": 9.42581,
+            "73": 9.37614,
+            "74": 9.41544,
             "75": 9.37897,
+            "76": 9.28015,
+            "77": 9.32215,
+            "78": 9.35752,
+            "79": 9.32154,
             "80": 9.31496,
+            "81": 9.26776,
+            "82": 9.34189,
+            "83": 9.32163,
+            "84": 9.24791,
             "85": 9.35021,
+            "86": 9.22383,
+            "87": 9.30627,
+            "88": 9.29884,
+            "89": 9.22708,
             "90": 9.28475,
+            "91": 9.23116,
+            "92": 9.27477,
+            "93": 9.1922,
+            "94": 9.23984,
             "95": 9.27996,
+            "96": 9.17534,
+            "97": 9.21892,
+            "98": 9.1719,
+            "99": 9.1646,
             "100": 9.14809
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 2554.0,
+            "2": 1919.0,
+            "3": 1521.0,
+            "4": 2330.0,
             "5": 2010.0,
+            "6": 1725.0,
+            "7": 2803.0,
+            "8": 2435.0,
+            "9": 2286.0,
             "10": 2570.0,
+            "11": 2438.0,
+            "12": 1829.0,
+            "13": 2332.0,
+            "14": 2832.0,
             "15": 2008.0,
+            "16": 2659.0,
+            "17": 2454.0,
+            "18": 2500.0,
+            "19": 2588.0,
             "20": 2834.0,
+            "21": 2042.0,
+            "22": 3037.0,
+            "23": 2702.0,
+            "24": 2700.0,
             "25": 2568.0,
+            "26": 2896.0,
+            "27": 2735.0,
+            "28": 2699.0,
+            "29": 2548.0,
             "30": 2843.0,
+            "31": 2160.0,
+            "32": 2458.0,
+            "33": 2130.0,
+            "34": 2517.0,
             "35": 2597.0,
+            "36": 3001.0,
+            "37": 3305.0,
+            "38": 2682.0,
+            "39": 2805.0,
             "40": 3425.0,
+            "41": 1812.0,
+            "42": 1481.0,
+            "43": 1726.0,
+            "44": 2575.0,
             "45": 3438.0,
+            "46": 2960.0,
+            "47": 2792.0,
+            "48": 3107.0,
+            "49": 2854.0,
             "50": 2145.0,
+            "51": 1964.0,
+            "52": 2437.0,
+            "53": 3823.0,
+            "54": 3427.0,
             "55": 3392.0,
+            "56": 4421.0,
+            "57": 4003.0,
+            "58": 4224.0,
+            "59": 1816.0,
             "60": 2520.0,
+            "61": 2106.0,
+            "62": 4011.0,
+            "63": 3637.0,
+            "64": 4375.0,
             "65": 3080.0,
+            "66": 1753.0,
+            "67": 1913.0,
+            "68": 4407.0,
+            "69": 4475.0,
             "70": 4419.0,
+            "71": 2152.0,
+            "72": 4399.0,
+            "73": 4134.0,
+            "74": 3315.0,
             "75": 4815.0,
+            "76": 2322.0,
+            "77": 5019.0,
+            "78": 4171.0,
+            "79": 2788.0,
             "80": 3831.0,
+            "81": 3411.0,
+            "82": 3004.0,
+            "83": 5145.0,
+            "84": 4399.0,
             "85": 4295.0,
+            "86": 3410.0,
+            "87": 4880.0,
+            "88": 3350.0,
+            "89": 4659.0,
             "90": 4370.0,
+            "91": 4273.0,
+            "92": 3325.0,
+            "93": 5509.0,
+            "94": 3804.0,
             "95": 4711.0,
+            "96": 3631.0,
+            "97": 3774.0,
+            "98": 4477.0,
+            "99": 4459.0,
             "100": 3220.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 1767237120.0,
+            "2": 1767237120.0,
+            "3": 1767237120.0,
+            "4": 1767237120.0,
             "5": 1767237120.0,
+            "6": 1767237120.0,
+            "7": 1767237120.0,
+            "8": 1767237120.0,
+            "9": 1767237120.0,
             "10": 1767237120.0,
+            "11": 1767237120.0,
+            "12": 1767237120.0,
+            "13": 1767237120.0,
+            "14": 1767237120.0,
             "15": 1767237120.0,
+            "16": 1767237120.0,
+            "17": 1767237120.0,
+            "18": 1767237120.0,
+            "19": 1767237120.0,
             "20": 1767237120.0,
+            "21": 1767237120.0,
+            "22": 1767237120.0,
+            "23": 1767237120.0,
+            "24": 1767237120.0,
             "25": 1767237120.0,
+            "26": 1767237120.0,
+            "27": 1767237120.0,
+            "28": 1767237120.0,
+            "29": 1767237120.0,
             "30": 1767237120.0,
+            "31": 1767237120.0,
+            "32": 1767237120.0,
+            "33": 1767237120.0,
+            "34": 1767237120.0,
             "35": 1767237120.0,
+            "36": 1767237120.0,
+            "37": 1767237120.0,
+            "38": 1767237120.0,
+            "39": 1767237120.0,
             "40": 1767237120.0,
+            "41": 1767237120.0,
+            "42": 1767237120.0,
+            "43": 1767237120.0,
+            "44": 1767237120.0,
             "45": 1767237120.0,
+            "46": 1767237120.0,
+            "47": 1767237120.0,
+            "48": 1767237120.0,
+            "49": 1767237120.0,
             "50": 1767237120.0,
+            "51": 1767237120.0,
+            "52": 1767237120.0,
+            "53": 1767237120.0,
+            "54": 1767237120.0,
             "55": 1767237120.0,
+            "56": 1767237120.0,
+            "57": 1767237120.0,
+            "58": 1767237120.0,
+            "59": 1767237120.0,
             "60": 1767237120.0,
+            "61": 1767237120.0,
+            "62": 1767237120.0,
+            "63": 1767237120.0,
+            "64": 1767237120.0,
             "65": 1767237120.0,
+            "66": 1767237120.0,
+            "67": 1767237120.0,
+            "68": 1767237120.0,
+            "69": 1767237120.0,
             "70": 1767237120.0,
+            "71": 1767237120.0,
+            "72": 1767237120.0,
+            "73": 1767237120.0,
+            "74": 1767237120.0,
             "75": 1767237120.0,
+            "76": 1767237120.0,
+            "77": 1767237120.0,
+            "78": 1767237120.0,
+            "79": 1767237120.0,
             "80": 1767237120.0,
+            "81": 1767237120.0,
+            "82": 1767237120.0,
+            "83": 1767237120.0,
+            "84": 1767237120.0,
             "85": 1767237120.0,
+            "86": 1767237120.0,
+            "87": 1767237120.0,
+            "88": 1767237120.0,
+            "89": 1767237120.0,
             "90": 1767237120.0,
+            "91": 1767237120.0,
+            "92": 1767237120.0,
+            "93": 1767237120.0,
+            "94": 1767237120.0,
             "95": 1767237120.0,
+            "96": 1767237120.0,
+            "97": 1767237120.0,
+            "98": 1767237120.0,
+            "99": 1767237120.0,
             "100": 1767237120.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 2336500736.0,
+            "2": 3079487488.0,
+            "3": 3079487488.0,
+            "4": 3079487488.0,
             "5": 3079487488.0,
+            "6": 3079487488.0,
+            "7": 3079487488.0,
+            "8": 3079487488.0,
+            "9": 3079487488.0,
             "10": 3079487488.0,
+            "11": 3079487488.0,
+            "12": 3079487488.0,
+            "13": 3079487488.0,
+            "14": 3079487488.0,
             "15": 3079487488.0,
+            "16": 3079487488.0,
+            "17": 3079487488.0,
+            "18": 3079487488.0,
+            "19": 3079487488.0,
             "20": 3079487488.0,
+            "21": 3079487488.0,
+            "22": 3079487488.0,
+            "23": 3079487488.0,
+            "24": 3079487488.0,
             "25": 3079487488.0,
+            "26": 3079487488.0,
+            "27": 3079487488.0,
+            "28": 3079487488.0,
+            "29": 3079487488.0,
             "30": 3079487488.0,
+            "31": 3079487488.0,
+            "32": 3079487488.0,
+            "33": 3079487488.0,
+            "34": 3079487488.0,
             "35": 3079487488.0,
+            "36": 3079487488.0,
+            "37": 3079487488.0,
+            "38": 3079487488.0,
+            "39": 3079487488.0,
             "40": 3079487488.0,
+            "41": 3079487488.0,
+            "42": 3079487488.0,
+            "43": 3079487488.0,
+            "44": 3079487488.0,
             "45": 3079487488.0,
+            "46": 3079487488.0,
+            "47": 3079487488.0,
+            "48": 3079487488.0,
+            "49": 3079487488.0,
             "50": 3079487488.0,
+            "51": 3079487488.0,
+            "52": 3079487488.0,
+            "53": 3079487488.0,
+            "54": 3079487488.0,
             "55": 3079487488.0,
+            "56": 3079487488.0,
+            "57": 3079487488.0,
+            "58": 3079487488.0,
+            "59": 3079487488.0,
             "60": 3079487488.0,
+            "61": 3079487488.0,
+            "62": 3079487488.0,
+            "63": 3079487488.0,
+            "64": 3079487488.0,
             "65": 3079487488.0,
+            "66": 3079487488.0,
+            "67": 3079487488.0,
+            "68": 3079487488.0,
+            "69": 3079487488.0,
             "70": 3079487488.0,
+            "71": 3079487488.0,
+            "72": 3079487488.0,
+            "73": 3079487488.0,
+            "74": 3079487488.0,
             "75": 3079487488.0,
+            "76": 3079487488.0,
+            "77": 3079487488.0,
+            "78": 3079487488.0,
+            "79": 3079487488.0,
             "80": 3079487488.0,
+            "81": 3079487488.0,
+            "82": 3079487488.0,
+            "83": 3079487488.0,
+            "84": 3079487488.0,
             "85": 3079487488.0,
+            "86": 3079487488.0,
+            "87": 3079487488.0,
+            "88": 3079487488.0,
+            "89": 3079487488.0,
             "90": 3079487488.0,
+            "91": 3079487488.0,
+            "92": 3079487488.0,
+            "93": 3079487488.0,
+            "94": 3079487488.0,
             "95": 3079487488.0,
+            "96": 3079487488.0,
+            "97": 3079487488.0,
+            "98": 3079487488.0,
+            "99": 3079487488.0,
             "100": 3079487488.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 12.8928,
-            "5": 0.79082,
-            "10": 0.75815,
-            "15": 0.75209,
-            "20": 0.9959,
-            "25": 0.75483,
-            "30": 0.74868,
-            "35": 0.75419,
-            "40": 0.75497,
-            "45": 0.9028,
-            "50": 0.80341,
-            "55": 1.06556,
-            "60": 0.72403,
-            "65": 0.72429,
-            "70": 1.04312,
-            "75": 1.09577,
-            "80": 0.77413,
-            "85": 0.72501,
-            "90": 0.72387,
-            "95": 0.72312,
-            "100": 0.80268
+            "1": 11.88602,
+            "2": 0.95024,
+            "3": 0.88873,
+            "4": 0.84081,
+            "5": 0.8407,
+            "6": 0.841,
+            "7": 0.83666,
+            "8": 0.83819,
+            "9": 0.83577,
+            "10": 0.83982,
+            "11": 0.83346,
+            "12": 0.8683,
+            "13": 0.84255,
+            "14": 0.83676,
+            "15": 1.08071,
+            "16": 1.25785,
+            "17": 0.83186,
+            "18": 0.8423,
+            "19": 0.84907,
+            "20": 0.84641,
+            "21": 0.84182,
+            "22": 1.26058,
+            "23": 0.86142,
+            "24": 0.84798,
+            "25": 0.84097,
+            "26": 0.84232,
+            "27": 0.85483,
+            "28": 0.85596,
+            "29": 0.85197,
+            "30": 0.85702,
+            "31": 0.85002,
+            "32": 0.85132,
+            "33": 0.85438,
+            "34": 0.86588,
+            "35": 0.87207,
+            "36": 0.85768,
+            "37": 0.87379,
+            "38": 0.85134,
+            "39": 0.8537,
+            "40": 0.84912,
+            "41": 0.85397,
+            "42": 0.9623,
+            "43": 1.06611,
+            "44": 0.98659,
+            "45": 1.18823,
+            "46": 0.86085,
+            "47": 0.85574,
+            "48": 0.8596,
+            "49": 0.97573,
+            "50": 0.95882,
+            "51": 0.86517,
+            "52": 0.85872,
+            "53": 0.86263,
+            "54": 0.86436,
+            "55": 0.89018,
+            "56": 0.8674,
+            "57": 0.86176,
+            "58": 0.85395,
+            "59": 1.16789,
+            "60": 0.85822,
+            "61": 1.20441,
+            "62": 0.85426,
+            "63": 0.85652,
+            "64": 0.85392,
+            "65": 0.86218,
+            "66": 0.88112,
+            "67": 1.16257,
+            "68": 0.85308,
+            "69": 1.00689,
+            "70": 0.86168,
+            "71": 1.01898,
+            "72": 1.007,
+            "73": 1.32547,
+            "74": 0.87953,
+            "75": 0.86331,
+            "76": 1.21865,
+            "77": 0.97064,
+            "78": 0.86068,
+            "79": 0.97841,
+            "80": 0.87282,
+            "81": 0.87319,
+            "82": 0.86404,
+            "83": 0.85854,
+            "84": 0.86686,
+            "85": 1.10394,
+            "86": 0.88271,
+            "87": 0.88117,
+            "88": 0.86213,
+            "89": 0.86328,
+            "90": 0.86472,
+            "91": 0.86372,
+            "92": 0.86414,
+            "93": 0.86268,
+            "94": 0.86412,
+            "95": 0.86343,
+            "96": 0.86012,
+            "97": 1.00046,
+            "98": 1.16876,
+            "99": 0.86021,
+            "100": 0.86224
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_local_spec_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_local_spec_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..1c7c359e92d
--- /dev/null
+++ b/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_local_spec_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.4837,
+            "2": 10.48435,
+            "3": 10.48251,
+            "4": 10.48303,
+            "5": 10.47647,
+            "6": 10.48423,
+            "7": 10.48457,
+            "8": 10.48837,
+            "9": 10.49003,
+            "10": 10.47255,
+            "11": 10.47245,
+            "12": 10.4828,
+            "13": 10.47855,
+            "14": 10.45162,
+            "15": 10.47936,
+            "16": 10.45364,
+            "17": 10.45143,
+            "18": 10.46239,
+            "19": 10.44136,
+            "20": 10.45438,
+            "21": 10.43469,
+            "22": 10.40587,
+            "23": 10.39982,
+            "24": 10.37585,
+            "25": 10.38173,
+            "26": 10.35154,
+            "27": 10.35401,
+            "28": 10.3497,
+            "29": 10.28714,
+            "30": 10.21194,
+            "31": 10.17274,
+            "32": 10.13439,
+            "33": 10.14753,
+            "34": 10.10759,
+            "35": 10.10592,
+            "36": 10.08756,
+            "37": 10.08177,
+            "38": 10.07257,
+            "39": 10.0013,
+            "40": 9.9816,
+            "41": 9.92549,
+            "42": 9.87529,
+            "43": 9.88742,
+            "44": 9.80641,
+            "45": 9.82342,
+            "46": 9.73815,
+            "47": 9.74831,
+            "48": 9.71619,
+            "49": 9.74504,
+            "50": 9.73004,
+            "51": 9.71503,
+            "52": 9.66484,
+            "53": 9.60935,
+            "54": 9.62735,
+            "55": 9.61036,
+            "56": 9.61745,
+            "57": 9.56794,
+            "58": 9.52742,
+            "59": 9.51685,
+            "60": 9.51873,
+            "61": 9.53147,
+            "62": 9.45024,
+            "63": 9.45733,
+            "64": 9.43455,
+            "65": 9.4582,
+            "66": 9.43694,
+            "67": 9.39693,
+            "68": 9.36491,
+            "69": 9.40957,
+            "70": 9.37605,
+            "71": 9.41735,
+            "72": 9.42581,
+            "73": 9.37614,
+            "74": 9.41544,
+            "75": 9.37897,
+            "76": 9.28015,
+            "77": 9.32215,
+            "78": 9.35752,
+            "79": 9.32154,
+            "80": 9.31496,
+            "81": 9.26776,
+            "82": 9.34189,
+            "83": 9.32163,
+            "84": 9.24791,
+            "85": 9.35021,
+            "86": 9.22383,
+            "87": 9.30627,
+            "88": 9.29884,
+            "89": 9.22708,
+            "90": 9.28475,
+            "91": 9.23116,
+            "92": 9.27477,
+            "93": 9.1922,
+            "94": 9.23984,
+            "95": 9.27996,
+            "96": 9.17534,
+            "97": 9.21892,
+            "98": 9.1719,
+            "99": 9.1646,
+            "100": 9.14809
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2554.0,
+            "2": 1919.0,
+            "3": 1521.0,
+            "4": 2330.0,
+            "5": 2010.0,
+            "6": 1725.0,
+            "7": 2803.0,
+            "8": 2435.0,
+            "9": 2286.0,
+            "10": 2570.0,
+            "11": 2438.0,
+            "12": 1829.0,
+            "13": 2332.0,
+            "14": 2832.0,
+            "15": 2008.0,
+            "16": 2659.0,
+            "17": 2454.0,
+            "18": 2500.0,
+            "19": 2588.0,
+            "20": 2834.0,
+            "21": 2042.0,
+            "22": 3037.0,
+            "23": 2702.0,
+            "24": 2700.0,
+            "25": 2568.0,
+            "26": 2896.0,
+            "27": 2735.0,
+            "28": 2699.0,
+            "29": 2548.0,
+            "30": 2843.0,
+            "31": 2160.0,
+            "32": 2458.0,
+            "33": 2130.0,
+            "34": 2517.0,
+            "35": 2597.0,
+            "36": 3001.0,
+            "37": 3305.0,
+            "38": 2682.0,
+            "39": 2805.0,
+            "40": 3425.0,
+            "41": 1812.0,
+            "42": 1481.0,
+            "43": 1726.0,
+            "44": 2575.0,
+            "45": 3438.0,
+            "46": 2960.0,
+            "47": 2792.0,
+            "48": 3107.0,
+            "49": 2854.0,
+            "50": 2145.0,
+            "51": 1964.0,
+            "52": 2437.0,
+            "53": 3823.0,
+            "54": 3427.0,
+            "55": 3392.0,
+            "56": 4421.0,
+            "57": 4003.0,
+            "58": 4224.0,
+            "59": 1816.0,
+            "60": 2520.0,
+            "61": 2106.0,
+            "62": 4011.0,
+            "63": 3637.0,
+            "64": 4375.0,
+            "65": 3080.0,
+            "66": 1753.0,
+            "67": 1913.0,
+            "68": 4407.0,
+            "69": 4475.0,
+            "70": 4419.0,
+            "71": 2152.0,
+            "72": 4399.0,
+            "73": 4134.0,
+            "74": 3315.0,
+            "75": 4815.0,
+            "76": 2322.0,
+            "77": 5019.0,
+            "78": 4171.0,
+            "79": 2788.0,
+            "80": 3831.0,
+            "81": 3411.0,
+            "82": 3004.0,
+            "83": 5145.0,
+            "84": 4399.0,
+            "85": 4295.0,
+            "86": 3410.0,
+            "87": 4880.0,
+            "88": 3350.0,
+            "89": 4659.0,
+            "90": 4370.0,
+            "91": 4273.0,
+            "92": 3325.0,
+            "93": 5509.0,
+            "94": 3804.0,
+            "95": 4711.0,
+            "96": 3631.0,
+            "97": 3774.0,
+            "98": 4477.0,
+            "99": 4459.0,
+            "100": 3220.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1767237120.0,
+            "2": 1767237120.0,
+            "3": 1767237120.0,
+            "4": 1767237120.0,
+            "5": 1767237120.0,
+            "6": 1767237120.0,
+            "7": 1767237120.0,
+            "8": 1767237120.0,
+            "9": 1767237120.0,
+            "10": 1767237120.0,
+            "11": 1767237120.0,
+            "12": 1767237120.0,
+            "13": 1767237120.0,
+            "14": 1767237120.0,
+            "15": 1767237120.0,
+            "16": 1767237120.0,
+            "17": 1767237120.0,
+            "18": 1767237120.0,
+            "19": 1767237120.0,
+            "20": 1767237120.0,
+            "21": 1767237120.0,
+            "22": 1767237120.0,
+            "23": 1767237120.0,
+            "24": 1767237120.0,
+            "25": 1767237120.0,
+            "26": 1767237120.0,
+            "27": 1767237120.0,
+            "28": 1767237120.0,
+            "29": 1767237120.0,
+            "30": 1767237120.0,
+            "31": 1767237120.0,
+            "32": 1767237120.0,
+            "33": 1767237120.0,
+            "34": 1767237120.0,
+            "35": 1767237120.0,
+            "36": 1767237120.0,
+            "37": 1767237120.0,
+            "38": 1767237120.0,
+            "39": 1767237120.0,
+            "40": 1767237120.0,
+            "41": 1767237120.0,
+            "42": 1767237120.0,
+            "43": 1767237120.0,
+            "44": 1767237120.0,
+            "45": 1767237120.0,
+            "46": 1767237120.0,
+            "47": 1767237120.0,
+            "48": 1767237120.0,
+            "49": 1767237120.0,
+            "50": 1767237120.0,
+            "51": 1767237120.0,
+            "52": 1767237120.0,
+            "53": 1767237120.0,
+            "54": 1767237120.0,
+            "55": 1767237120.0,
+            "56": 1767237120.0,
+            "57": 1767237120.0,
+            "58": 1767237120.0,
+            "59": 1767237120.0,
+            "60": 1767237120.0,
+            "61": 1767237120.0,
+            "62": 1767237120.0,
+            "63": 1767237120.0,
+            "64": 1767237120.0,
+            "65": 1767237120.0,
+            "66": 1767237120.0,
+            "67": 1767237120.0,
+            "68": 1767237120.0,
+            "69": 1767237120.0,
+            "70": 1767237120.0,
+            "71": 1767237120.0,
+            "72": 1767237120.0,
+            "73": 1767237120.0,
+            "74": 1767237120.0,
+            "75": 1767237120.0,
+            "76": 1767237120.0,
+            "77": 1767237120.0,
+            "78": 1767237120.0,
+            "79": 1767237120.0,
+            "80": 1767237120.0,
+            "81": 1767237120.0,
+            "82": 1767237120.0,
+            "83": 1767237120.0,
+            "84": 1767237120.0,
+            "85": 1767237120.0,
+            "86": 1767237120.0,
+            "87": 1767237120.0,
+            "88": 1767237120.0,
+            "89": 1767237120.0,
+            "90": 1767237120.0,
+            "91": 1767237120.0,
+            "92": 1767237120.0,
+            "93": 1767237120.0,
+            "94": 1767237120.0,
+            "95": 1767237120.0,
+            "96": 1767237120.0,
+            "97": 1767237120.0,
+            "98": 1767237120.0,
+            "99": 1767237120.0,
+            "100": 1767237120.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2336500736.0,
+            "2": 3079487488.0,
+            "3": 3079487488.0,
+            "4": 3079487488.0,
+            "5": 3079487488.0,
+            "6": 3079487488.0,
+            "7": 3079487488.0,
+            "8": 3079487488.0,
+            "9": 3079487488.0,
+            "10": 3079487488.0,
+            "11": 3079487488.0,
+            "12": 3079487488.0,
+            "13": 3079487488.0,
+            "14": 3079487488.0,
+            "15": 3079487488.0,
+            "16": 3079487488.0,
+            "17": 3079487488.0,
+            "18": 3079487488.0,
+            "19": 3079487488.0,
+            "20": 3079487488.0,
+            "21": 3079487488.0,
+            "22": 3079487488.0,
+            "23": 3079487488.0,
+            "24": 3079487488.0,
+            "25": 3079487488.0,
+            "26": 3079487488.0,
+            "27": 3079487488.0,
+            "28": 3079487488.0,
+            "29": 3079487488.0,
+            "30": 3079487488.0,
+            "31": 3079487488.0,
+            "32": 3079487488.0,
+            "33": 3079487488.0,
+            "34": 3079487488.0,
+            "35": 3079487488.0,
+            "36": 3079487488.0,
+            "37": 3079487488.0,
+            "38": 3079487488.0,
+            "39": 3079487488.0,
+            "40": 3079487488.0,
+            "41": 3079487488.0,
+            "42": 3079487488.0,
+            "43": 3079487488.0,
+            "44": 3079487488.0,
+            "45": 3079487488.0,
+            "46": 3079487488.0,
+            "47": 3079487488.0,
+            "48": 3079487488.0,
+            "49": 3079487488.0,
+            "50": 3079487488.0,
+            "51": 3079487488.0,
+            "52": 3079487488.0,
+            "53": 3079487488.0,
+            "54": 3079487488.0,
+            "55": 3079487488.0,
+            "56": 3079487488.0,
+            "57": 3079487488.0,
+            "58": 3079487488.0,
+            "59": 3079487488.0,
+            "60": 3079487488.0,
+            "61": 3079487488.0,
+            "62": 3079487488.0,
+            "63": 3079487488.0,
+            "64": 3079487488.0,
+            "65": 3079487488.0,
+            "66": 3079487488.0,
+            "67": 3079487488.0,
+            "68": 3079487488.0,
+            "69": 3079487488.0,
+            "70": 3079487488.0,
+            "71": 3079487488.0,
+            "72": 3079487488.0,
+            "73": 3079487488.0,
+            "74": 3079487488.0,
+            "75": 3079487488.0,
+            "76": 3079487488.0,
+            "77": 3079487488.0,
+            "78": 3079487488.0,
+            "79": 3079487488.0,
+            "80": 3079487488.0,
+            "81": 3079487488.0,
+            "82": 3079487488.0,
+            "83": 3079487488.0,
+            "84": 3079487488.0,
+            "85": 3079487488.0,
+            "86": 3079487488.0,
+            "87": 3079487488.0,
+            "88": 3079487488.0,
+            "89": 3079487488.0,
+            "90": 3079487488.0,
+            "91": 3079487488.0,
+            "92": 3079487488.0,
+            "93": 3079487488.0,
+            "94": 3079487488.0,
+            "95": 3079487488.0,
+            "96": 3079487488.0,
+            "97": 3079487488.0,
+            "98": 3079487488.0,
+            "99": 3079487488.0,
+            "100": 3079487488.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 12.43441,
+            "2": 0.78136,
+            "3": 0.7462,
+            "4": 0.7121,
+            "5": 0.71539,
+            "6": 0.71675,
+            "7": 0.71163,
+            "8": 0.71648,
+            "9": 0.72398,
+            "10": 0.71927,
+            "11": 0.80592,
+            "12": 0.70909,
+            "13": 0.71547,
+            "14": 0.71572,
+            "15": 0.70839,
+            "16": 0.71281,
+            "17": 0.71709,
+            "18": 0.70875,
+            "19": 0.71455,
+            "20": 0.989,
+            "21": 0.98319,
+            "22": 0.95078,
+            "23": 0.94171,
+            "24": 0.71144,
+            "25": 0.70971,
+            "26": 0.71131,
+            "27": 0.70864,
+            "28": 0.72406,
+            "29": 0.71861,
+            "30": 0.71986,
+            "31": 0.71003,
+            "32": 0.70772,
+            "33": 0.71322,
+            "34": 0.70935,
+            "35": 0.71103,
+            "36": 0.70629,
+            "37": 0.71354,
+            "38": 0.71466,
+            "39": 0.71799,
+            "40": 0.71635,
+            "41": 0.72804,
+            "42": 0.71281,
+            "43": 0.7097,
+            "44": 0.71324,
+            "45": 0.70979,
+            "46": 0.7111,
+            "47": 0.71491,
+            "48": 1.05833,
+            "49": 0.89093,
+            "50": 0.8836,
+            "51": 0.72864,
+            "52": 0.72146,
+            "53": 0.72243,
+            "54": 0.71938,
+            "55": 0.71917,
+            "56": 0.71867,
+            "57": 0.72048,
+            "58": 0.72484,
+            "59": 0.72197,
+            "60": 0.7218,
+            "61": 0.728,
+            "62": 0.71944,
+            "63": 0.73343,
+            "64": 5.90055,
+            "65": 5.53828,
+            "66": 0.91077,
+            "67": 1.09715,
+            "68": 0.70698,
+            "69": 0.70556,
+            "70": 1.00845,
+            "71": 0.71076,
+            "72": 0.71777,
+            "73": 0.71659,
+            "74": 0.71156,
+            "75": 0.8128,
+            "76": 0.7115,
+            "77": 0.97488,
+            "78": 0.89177,
+            "79": 0.87098,
+            "80": 1.01456,
+            "81": 0.81896,
+            "82": 0.71793,
+            "83": 1.04586,
+            "84": 0.72118,
+            "85": 1.02779,
+            "86": 0.72077,
+            "87": 0.71418,
+            "88": 0.71356,
+            "89": 0.74602,
+            "90": 0.77996,
+            "91": 1.05945,
+            "92": 0.72043,
+            "93": 0.72396,
+            "94": 0.72365,
+            "95": 0.72843,
+            "96": 0.71516,
+            "97": 0.71321,
+            "98": 0.72468,
+            "99": 0.72441,
+            "100": 0.71951
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_local_spec_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_local_spec_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..27a34e32198
--- /dev/null
+++ b/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_local_spec_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.4837,
+            "2": 10.48435,
+            "3": 10.48251,
+            "4": 10.48303,
+            "5": 10.47647,
+            "6": 10.48423,
+            "7": 10.48457,
+            "8": 10.48837,
+            "9": 10.49003,
+            "10": 10.47255,
+            "11": 10.47245,
+            "12": 10.4828,
+            "13": 10.47855,
+            "14": 10.45162,
+            "15": 10.47936,
+            "16": 10.45364,
+            "17": 10.45143,
+            "18": 10.46239,
+            "19": 10.44136,
+            "20": 10.45438,
+            "21": 10.43469,
+            "22": 10.40587,
+            "23": 10.39982,
+            "24": 10.37585,
+            "25": 10.38173,
+            "26": 10.35154,
+            "27": 10.35401,
+            "28": 10.3497,
+            "29": 10.28714,
+            "30": 10.21194,
+            "31": 10.17274,
+            "32": 10.13439,
+            "33": 10.14753,
+            "34": 10.10759,
+            "35": 10.10592,
+            "36": 10.08756,
+            "37": 10.08177,
+            "38": 10.07257,
+            "39": 10.0013,
+            "40": 9.9816,
+            "41": 9.92549,
+            "42": 9.87529,
+            "43": 9.88742,
+            "44": 9.80641,
+            "45": 9.82342,
+            "46": 9.73815,
+            "47": 9.74831,
+            "48": 9.71619,
+            "49": 9.74504,
+            "50": 9.73004,
+            "51": 9.71503,
+            "52": 9.66484,
+            "53": 9.60935,
+            "54": 9.62735,
+            "55": 9.61036,
+            "56": 9.61745,
+            "57": 9.56794,
+            "58": 9.52742,
+            "59": 9.51685,
+            "60": 9.51873,
+            "61": 9.53147,
+            "62": 9.45024,
+            "63": 9.45733,
+            "64": 9.43455,
+            "65": 9.4582,
+            "66": 9.43694,
+            "67": 9.39693,
+            "68": 9.36491,
+            "69": 9.40957,
+            "70": 9.37605,
+            "71": 9.41735,
+            "72": 9.42581,
+            "73": 9.37614,
+            "74": 9.41544,
+            "75": 9.37897,
+            "76": 9.28015,
+            "77": 9.32215,
+            "78": 9.35752,
+            "79": 9.32154,
+            "80": 9.31496,
+            "81": 9.26776,
+            "82": 9.34189,
+            "83": 9.32163,
+            "84": 9.24791,
+            "85": 9.35021,
+            "86": 9.22383,
+            "87": 9.30627,
+            "88": 9.29884,
+            "89": 9.22708,
+            "90": 9.28475,
+            "91": 9.23116,
+            "92": 9.27477,
+            "93": 9.1922,
+            "94": 9.23984,
+            "95": 9.27996,
+            "96": 9.17534,
+            "97": 9.21892,
+            "98": 9.1719,
+            "99": 9.1646,
+            "100": 9.14809
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2554.0,
+            "2": 1919.0,
+            "3": 1521.0,
+            "4": 2330.0,
+            "5": 2010.0,
+            "6": 1725.0,
+            "7": 2803.0,
+            "8": 2435.0,
+            "9": 2286.0,
+            "10": 2570.0,
+            "11": 2438.0,
+            "12": 1829.0,
+            "13": 2332.0,
+            "14": 2832.0,
+            "15": 2008.0,
+            "16": 2659.0,
+            "17": 2454.0,
+            "18": 2500.0,
+            "19": 2588.0,
+            "20": 2834.0,
+            "21": 2042.0,
+            "22": 3037.0,
+            "23": 2702.0,
+            "24": 2700.0,
+            "25": 2568.0,
+            "26": 2896.0,
+            "27": 2735.0,
+            "28": 2699.0,
+            "29": 2548.0,
+            "30": 2843.0,
+            "31": 2160.0,
+            "32": 2458.0,
+            "33": 2130.0,
+            "34": 2517.0,
+            "35": 2597.0,
+            "36": 3001.0,
+            "37": 3305.0,
+            "38": 2682.0,
+            "39": 2805.0,
+            "40": 3425.0,
+            "41": 1812.0,
+            "42": 1481.0,
+            "43": 1726.0,
+            "44": 2575.0,
+            "45": 3438.0,
+            "46": 2960.0,
+            "47": 2792.0,
+            "48": 3107.0,
+            "49": 2854.0,
+            "50": 2145.0,
+            "51": 1964.0,
+            "52": 2437.0,
+            "53": 3823.0,
+            "54": 3427.0,
+            "55": 3392.0,
+            "56": 4421.0,
+            "57": 4003.0,
+            "58": 4224.0,
+            "59": 1816.0,
+            "60": 2520.0,
+            "61": 2106.0,
+            "62": 4011.0,
+            "63": 3637.0,
+            "64": 4375.0,
+            "65": 3080.0,
+            "66": 1753.0,
+            "67": 1913.0,
+            "68": 4407.0,
+            "69": 4475.0,
+            "70": 4419.0,
+            "71": 2152.0,
+            "72": 4399.0,
+            "73": 4134.0,
+            "74": 3315.0,
+            "75": 4815.0,
+            "76": 2322.0,
+            "77": 5019.0,
+            "78": 4171.0,
+            "79": 2788.0,
+            "80": 3831.0,
+            "81": 3411.0,
+            "82": 3004.0,
+            "83": 5145.0,
+            "84": 4399.0,
+            "85": 4295.0,
+            "86": 3410.0,
+            "87": 4880.0,
+            "88": 3350.0,
+            "89": 4659.0,
+            "90": 4370.0,
+            "91": 4273.0,
+            "92": 3325.0,
+            "93": 5509.0,
+            "94": 3804.0,
+            "95": 4711.0,
+            "96": 3631.0,
+            "97": 3774.0,
+            "98": 4477.0,
+            "99": 4459.0,
+            "100": 3220.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1767237120.0,
+            "2": 1767237120.0,
+            "3": 1767237120.0,
+            "4": 1767237120.0,
+            "5": 1767237120.0,
+            "6": 1767237120.0,
+            "7": 1767237120.0,
+            "8": 1767237120.0,
+            "9": 1767237120.0,
+            "10": 1767237120.0,
+            "11": 1767237120.0,
+            "12": 1767237120.0,
+            "13": 1767237120.0,
+            "14": 1767237120.0,
+            "15": 1767237120.0,
+            "16": 1767237120.0,
+            "17": 1767237120.0,
+            "18": 1767237120.0,
+            "19": 1767237120.0,
+            "20": 1767237120.0,
+            "21": 1767237120.0,
+            "22": 1767237120.0,
+            "23": 1767237120.0,
+            "24": 1767237120.0,
+            "25": 1767237120.0,
+            "26": 1767237120.0,
+            "27": 1767237120.0,
+            "28": 1767237120.0,
+            "29": 1767237120.0,
+            "30": 1767237120.0,
+            "31": 1767237120.0,
+            "32": 1767237120.0,
+            "33": 1767237120.0,
+            "34": 1767237120.0,
+            "35": 1767237120.0,
+            "36": 1767237120.0,
+            "37": 1767237120.0,
+            "38": 1767237120.0,
+            "39": 1767237120.0,
+            "40": 1767237120.0,
+            "41": 1767237120.0,
+            "42": 1767237120.0,
+            "43": 1767237120.0,
+            "44": 1767237120.0,
+            "45": 1767237120.0,
+            "46": 1767237120.0,
+            "47": 1767237120.0,
+            "48": 1767237120.0,
+            "49": 1767237120.0,
+            "50": 1767237120.0,
+            "51": 1767237120.0,
+            "52": 1767237120.0,
+            "53": 1767237120.0,
+            "54": 1767237120.0,
+            "55": 1767237120.0,
+            "56": 1767237120.0,
+            "57": 1767237120.0,
+            "58": 1767237120.0,
+            "59": 1767237120.0,
+            "60": 1767237120.0,
+            "61": 1767237120.0,
+            "62": 1767237120.0,
+            "63": 1767237120.0,
+            "64": 1767237120.0,
+            "65": 1767237120.0,
+            "66": 1767237120.0,
+            "67": 1767237120.0,
+            "68": 1767237120.0,
+            "69": 1767237120.0,
+            "70": 1767237120.0,
+            "71": 1767237120.0,
+            "72": 1767237120.0,
+            "73": 1767237120.0,
+            "74": 1767237120.0,
+            "75": 1767237120.0,
+            "76": 1767237120.0,
+            "77": 1767237120.0,
+            "78": 1767237120.0,
+            "79": 1767237120.0,
+            "80": 1767237120.0,
+            "81": 1767237120.0,
+            "82": 1767237120.0,
+            "83": 1767237120.0,
+            "84": 1767237120.0,
+            "85": 1767237120.0,
+            "86": 1767237120.0,
+            "87": 1767237120.0,
+            "88": 1767237120.0,
+            "89": 1767237120.0,
+            "90": 1767237120.0,
+            "91": 1767237120.0,
+            "92": 1767237120.0,
+            "93": 1767237120.0,
+            "94": 1767237120.0,
+            "95": 1767237120.0,
+            "96": 1767237120.0,
+            "97": 1767237120.0,
+            "98": 1767237120.0,
+            "99": 1767237120.0,
+            "100": 1767237120.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2336500736.0,
+            "2": 3079487488.0,
+            "3": 3079487488.0,
+            "4": 3079487488.0,
+            "5": 3079487488.0,
+            "6": 3079487488.0,
+            "7": 3079487488.0,
+            "8": 3079487488.0,
+            "9": 3079487488.0,
+            "10": 3079487488.0,
+            "11": 3079487488.0,
+            "12": 3079487488.0,
+            "13": 3079487488.0,
+            "14": 3079487488.0,
+            "15": 3079487488.0,
+            "16": 3079487488.0,
+            "17": 3079487488.0,
+            "18": 3079487488.0,
+            "19": 3079487488.0,
+            "20": 3079487488.0,
+            "21": 3079487488.0,
+            "22": 3079487488.0,
+            "23": 3079487488.0,
+            "24": 3079487488.0,
+            "25": 3079487488.0,
+            "26": 3079487488.0,
+            "27": 3079487488.0,
+            "28": 3079487488.0,
+            "29": 3079487488.0,
+            "30": 3079487488.0,
+            "31": 3079487488.0,
+            "32": 3079487488.0,
+            "33": 3079487488.0,
+            "34": 3079487488.0,
+            "35": 3079487488.0,
+            "36": 3079487488.0,
+            "37": 3079487488.0,
+            "38": 3079487488.0,
+            "39": 3079487488.0,
+            "40": 3079487488.0,
+            "41": 3079487488.0,
+            "42": 3079487488.0,
+            "43": 3079487488.0,
+            "44": 3079487488.0,
+            "45": 3079487488.0,
+            "46": 3079487488.0,
+            "47": 3079487488.0,
+            "48": 3079487488.0,
+            "49": 3079487488.0,
+            "50": 3079487488.0,
+            "51": 3079487488.0,
+            "52": 3079487488.0,
+            "53": 3079487488.0,
+            "54": 3079487488.0,
+            "55": 3079487488.0,
+            "56": 3079487488.0,
+            "57": 3079487488.0,
+            "58": 3079487488.0,
+            "59": 3079487488.0,
+            "60": 3079487488.0,
+            "61": 3079487488.0,
+            "62": 3079487488.0,
+            "63": 3079487488.0,
+            "64": 3079487488.0,
+            "65": 3079487488.0,
+            "66": 3079487488.0,
+            "67": 3079487488.0,
+            "68": 3079487488.0,
+            "69": 3079487488.0,
+            "70": 3079487488.0,
+            "71": 3079487488.0,
+            "72": 3079487488.0,
+            "73": 3079487488.0,
+            "74": 3079487488.0,
+            "75": 3079487488.0,
+            "76": 3079487488.0,
+            "77": 3079487488.0,
+            "78": 3079487488.0,
+            "79": 3079487488.0,
+            "80": 3079487488.0,
+            "81": 3079487488.0,
+            "82": 3079487488.0,
+            "83": 3079487488.0,
+            "84": 3079487488.0,
+            "85": 3079487488.0,
+            "86": 3079487488.0,
+            "87": 3079487488.0,
+            "88": 3079487488.0,
+            "89": 3079487488.0,
+            "90": 3079487488.0,
+            "91": 3079487488.0,
+            "92": 3079487488.0,
+            "93": 3079487488.0,
+            "94": 3079487488.0,
+            "95": 3079487488.0,
+            "96": 3079487488.0,
+            "97": 3079487488.0,
+            "98": 3079487488.0,
+            "99": 3079487488.0,
+            "100": 3079487488.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 11.74907,
+            "2": 0.85881,
+            "3": 0.84325,
+            "4": 0.84358,
+            "5": 0.84379,
+            "6": 0.84251,
+            "7": 0.84123,
+            "8": 0.8499,
+            "9": 0.8999,
+            "10": 0.92522,
+            "11": 0.94116,
+            "12": 0.85793,
+            "13": 0.84568,
+            "14": 0.84264,
+            "15": 0.84084,
+            "16": 0.84084,
+            "17": 0.83843,
+            "18": 0.8412,
+            "19": 0.84178,
+            "20": 1.1044,
+            "21": 1.21871,
+            "22": 1.25946,
+            "23": 0.85008,
+            "24": 0.91404,
+            "25": 0.84787,
+            "26": 0.84792,
+            "27": 0.85174,
+            "28": 0.84996,
+            "29": 0.84337,
+            "30": 0.84498,
+            "31": 0.8486,
+            "32": 0.84203,
+            "33": 0.84451,
+            "34": 0.85648,
+            "35": 0.83537,
+            "36": 0.84205,
+            "37": 0.83563,
+            "38": 0.84541,
+            "39": 0.84231,
+            "40": 0.84639,
+            "41": 0.84365,
+            "42": 0.84512,
+            "43": 0.84437,
+            "44": 0.84299,
+            "45": 0.85866,
+            "46": 0.84237,
+            "47": 0.84617,
+            "48": 1.18328,
+            "49": 0.88875,
+            "50": 0.96388,
+            "51": 0.98149,
+            "52": 0.89905,
+            "53": 0.84382,
+            "54": 0.85382,
+            "55": 0.84338,
+            "56": 0.84282,
+            "57": 0.92404,
+            "58": 0.84627,
+            "59": 0.83811,
+            "60": 0.83802,
+            "61": 0.85109,
+            "62": 0.83231,
+            "63": 0.83505,
+            "64": 1.15842,
+            "65": 1.1324,
+            "66": 0.83972,
+            "67": 0.82896,
+            "68": 0.82596,
+            "69": 0.83118,
+            "70": 0.84229,
+            "71": 0.8328,
+            "72": 0.82924,
+            "73": 0.83555,
+            "74": 0.83422,
+            "75": 0.90796,
+            "76": 0.85077,
+            "77": 1.07568,
+            "78": 1.30938,
+            "79": 1.12037,
+            "80": 0.82751,
+            "81": 0.83544,
+            "82": 0.88688,
+            "83": 1.16362,
+            "84": 0.83207,
+            "85": 0.83917,
+            "86": 1.14681,
+            "87": 1.17025,
+            "88": 0.82985,
+            "89": 0.82492,
+            "90": 0.90586,
+            "91": 0.83299,
+            "92": 0.83139,
+            "93": 0.83405,
+            "94": 0.83756,
+            "95": 0.83351,
+            "96": 0.83063,
+            "97": 0.83499,
+            "98": 0.84617,
+            "99": 0.83623,
+            "100": 0.84014
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_dev_dgx_h100.json
index 7a7d567ec46..2219c242a8b 100644
--- a/tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_dev_dgx_h100.json
@@ -2,91 +2,286 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 10.47723,
+            "2": 10.47576,
+            "3": 10.46809,
+            "4": 10.47326,
             "5": 10.47148,
+            "6": 10.46049,
+            "7": 10.46357,
+            "8": 10.47334,
+            "9": 10.48063,
             "10": 10.46319,
+            "11": 10.47102,
+            "12": 10.45502,
+            "13": 10.44665,
+            "14": 10.451,
             "15": 10.48846,
+            "16": 10.4509,
+            "17": 10.44648,
+            "18": 10.44272,
+            "19": 10.43057,
             "20": 10.44534,
+            "21": 10.41778,
+            "22": 10.38667,
+            "23": 10.39322,
+            "24": 10.37847,
             "25": 10.35474,
+            "26": 10.35955,
+            "27": 10.34527,
+            "28": 10.33539,
+            "29": 10.25416,
             "30": 10.23011,
+            "31": 10.14092,
+            "32": 10.13601,
+            "33": 10.13944,
+            "34": 10.11377,
             "35": 10.0888,
+            "36": 10.09247,
+            "37": 10.06836,
+            "38": 10.04664,
+            "39": 9.97584,
             "40": 9.93781,
+            "41": 9.90867,
+            "42": 9.84873,
+            "43": 9.8577,
+            "44": 9.79259,
             "45": 9.8035,
+            "46": 9.7029,
+            "47": 9.73432,
+            "48": 9.70106,
+            "49": 9.69981,
             "50": 9.70258
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 2137.0,
+            "2": 1618.0,
+            "3": 1561.0,
+            "4": 1871.0,
             "5": 1983.0,
+            "6": 1565.0,
+            "7": 2779.0,
+            "8": 2108.0,
+            "9": 2008.0,
             "10": 2086.0,
+            "11": 2534.0,
+            "12": 1686.0,
+            "13": 2120.0,
+            "14": 2814.0,
             "15": 1735.0,
+            "16": 2535.0,
+            "17": 2409.0,
+            "18": 2345.0,
+            "19": 2374.0,
             "20": 2739.0,
+            "21": 2030.0,
+            "22": 2819.0,
+            "23": 2763.0,
+            "24": 2731.0,
             "25": 2429.0,
+            "26": 2817.0,
+            "27": 2944.0,
+            "28": 2741.0,
+            "29": 2639.0,
             "30": 2723.0,
+            "31": 2158.0,
+            "32": 2242.0,
+            "33": 2046.0,
+            "34": 2139.0,
             "35": 2492.0,
+            "36": 2641.0,
+            "37": 2853.0,
+            "38": 2705.0,
+            "39": 2807.0,
             "40": 3333.0,
+            "41": 1762.0,
+            "42": 1410.0,
+            "43": 1558.0,
+            "44": 2384.0,
             "45": 3170.0,
+            "46": 2664.0,
+            "47": 2641.0,
+            "48": 3490.0,
+            "49": 2928.0,
             "50": 2487.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 3404871168.0,
+            "2": 3404871168.0,
+            "3": 3404871168.0,
+            "4": 3404871168.0,
             "5": 3404871168.0,
+            "6": 3404871168.0,
+            "7": 3404871168.0,
+            "8": 3404871168.0,
+            "9": 3404871168.0,
             "10": 3404871168.0,
+            "11": 3404871168.0,
+            "12": 3404871168.0,
+            "13": 3404871168.0,
+            "14": 3404871168.0,
             "15": 3404871168.0,
+            "16": 3404871168.0,
+            "17": 3404871168.0,
+            "18": 3404871168.0,
+            "19": 3404871168.0,
             "20": 3404871168.0,
+            "21": 3404871168.0,
+            "22": 3404871168.0,
+            "23": 3404871168.0,
+            "24": 3404871168.0,
             "25": 3404871168.0,
+            "26": 3404871168.0,
+            "27": 3404871168.0,
+            "28": 3404871168.0,
+            "29": 3404871168.0,
             "30": 3404871168.0,
+            "31": 3404871168.0,
+            "32": 3404871168.0,
+            "33": 3404871168.0,
+            "34": 3404871168.0,
             "35": 3404871168.0,
+            "36": 3404871168.0,
+            "37": 3404871168.0,
+            "38": 3404871168.0,
+            "39": 3404871168.0,
             "40": 3404871168.0,
+            "41": 3404871168.0,
+            "42": 3404871168.0,
+            "43": 3404871168.0,
+            "44": 3404871168.0,
             "45": 3404871168.0,
+            "46": 3404871168.0,
+            "47": 3404871168.0,
+            "48": 3404871168.0,
+            "49": 3404871168.0,
             "50": 3404871168.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 4194526208.0,
+            "2": 5660965888.0,
+            "3": 5660965888.0,
+            "4": 5660965888.0,
             "5": 5660965888.0,
+            "6": 5660965888.0,
+            "7": 5660965888.0,
+            "8": 5660965888.0,
+            "9": 5660965888.0,
             "10": 5660965888.0,
+            "11": 5660965888.0,
+            "12": 5660965888.0,
+            "13": 5660965888.0,
+            "14": 5660965888.0,
             "15": 5660965888.0,
+            "16": 5660965888.0,
+            "17": 5660965888.0,
+            "18": 5660965888.0,
+            "19": 5660965888.0,
             "20": 5660965888.0,
+            "21": 5660965888.0,
+            "22": 5660965888.0,
+            "23": 5660965888.0,
+            "24": 5660965888.0,
             "25": 5660965888.0,
+            "26": 5660965888.0,
+            "27": 5660965888.0,
+            "28": 5660965888.0,
+            "29": 5660965888.0,
             "30": 5660965888.0,
+            "31": 5660965888.0,
+            "32": 5660965888.0,
+            "33": 5660965888.0,
+            "34": 5660965888.0,
             "35": 5660965888.0,
+            "36": 5660965888.0,
+            "37": 5660965888.0,
+            "38": 5660965888.0,
+            "39": 5660965888.0,
             "40": 5660965888.0,
+            "41": 5660965888.0,
+            "42": 5660965888.0,
+            "43": 5660965888.0,
+            "44": 5660965888.0,
             "45": 5660965888.0,
+            "46": 5660965888.0,
+            "47": 5660965888.0,
+            "48": 5660965888.0,
+            "49": 5660965888.0,
             "50": 5660965888.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 10.04018,
-            "5": 0.49888,
-            "10": 0.45046,
-            "15": 0.45352,
-            "20": 0.46632,
-            "25": 0.44805,
-            "30": 0.58321,
-            "35": 0.60604,
-            "40": 0.44629,
-            "45": 0.75157,
-            "50": 0.44163
+            "1": 10.41177,
+            "2": 0.63219,
+            "3": 0.53615,
+            "4": 0.53244,
+            "5": 0.53041,
+            "6": 0.53364,
+            "7": 0.53797,
+            "8": 0.52807,
+            "9": 0.53172,
+            "10": 0.53116,
+            "11": 0.52906,
+            "12": 0.53113,
+            "13": 0.52796,
+            "14": 0.52974,
+            "15": 0.52875,
+            "16": 0.52005,
+            "17": 0.51948,
+            "18": 0.52008,
+            "19": 0.52456,
+            "20": 0.52593,
+            "21": 0.52988,
+            "22": 0.52281,
+            "23": 0.51971,
+            "24": 0.52235,
+            "25": 0.54145,
+            "26": 0.52876,
+            "27": 0.51926,
+            "28": 0.51381,
+            "29": 0.51526,
+            "30": 0.51632,
+            "31": 0.52532,
+            "32": 0.61496,
+            "33": 0.59949,
+            "34": 0.52069,
+            "35": 0.52649,
+            "36": 0.66485,
+            "37": 0.52497,
+            "38": 0.52464,
+            "39": 0.76801,
+            "40": 0.52465,
+            "41": 0.69091,
+            "42": 0.74369,
+            "43": 0.5242,
+            "44": 0.75825,
+            "45": 0.68331,
+            "46": 0.75831,
+            "47": 0.51724,
+            "48": 0.51305,
+            "49": 0.51686,
+            "50": 0.52176
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..8ff12f47d08
--- /dev/null
+++ b/tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.47723,
+            "2": 10.47576,
+            "3": 10.46809,
+            "4": 10.47326,
+            "5": 10.47148,
+            "6": 10.46049,
+            "7": 10.46357,
+            "8": 10.47334,
+            "9": 10.48063,
+            "10": 10.46319,
+            "11": 10.47102,
+            "12": 10.45502,
+            "13": 10.44665,
+            "14": 10.451,
+            "15": 10.48846,
+            "16": 10.4509,
+            "17": 10.44648,
+            "18": 10.44272,
+            "19": 10.43057,
+            "20": 10.44534,
+            "21": 10.41778,
+            "22": 10.38667,
+            "23": 10.39322,
+            "24": 10.37847,
+            "25": 10.35474,
+            "26": 10.35955,
+            "27": 10.34527,
+            "28": 10.33539,
+            "29": 10.25416,
+            "30": 10.23011,
+            "31": 10.14092,
+            "32": 10.13601,
+            "33": 10.13944,
+            "34": 10.11377,
+            "35": 10.0888,
+            "36": 10.09247,
+            "37": 10.06836,
+            "38": 10.04664,
+            "39": 9.97584,
+            "40": 9.93781,
+            "41": 9.90867,
+            "42": 9.84873,
+            "43": 9.8577,
+            "44": 9.79259,
+            "45": 9.8035,
+            "46": 9.7029,
+            "47": 9.73432,
+            "48": 9.70106,
+            "49": 9.69981,
+            "50": 9.70258
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 2137.0,
+            "2": 1618.0,
+            "3": 1561.0,
+            "4": 1871.0,
+            "5": 1983.0,
+            "6": 1565.0,
+            "7": 2779.0,
+            "8": 2108.0,
+            "9": 2008.0,
+            "10": 2086.0,
+            "11": 2534.0,
+            "12": 1686.0,
+            "13": 2120.0,
+            "14": 2814.0,
+            "15": 1735.0,
+            "16": 2535.0,
+            "17": 2409.0,
+            "18": 2345.0,
+            "19": 2374.0,
+            "20": 2739.0,
+            "21": 2030.0,
+            "22": 2819.0,
+            "23": 2763.0,
+            "24": 2731.0,
+            "25": 2429.0,
+            "26": 2817.0,
+            "27": 2944.0,
+            "28": 2741.0,
+            "29": 2639.0,
+            "30": 2723.0,
+            "31": 2158.0,
+            "32": 2242.0,
+            "33": 2046.0,
+            "34": 2139.0,
+            "35": 2492.0,
+            "36": 2641.0,
+            "37": 2853.0,
+            "38": 2705.0,
+            "39": 2807.0,
+            "40": 3333.0,
+            "41": 1762.0,
+            "42": 1410.0,
+            "43": 1558.0,
+            "44": 2384.0,
+            "45": 3170.0,
+            "46": 2664.0,
+            "47": 2641.0,
+            "48": 3490.0,
+            "49": 2928.0,
+            "50": 2487.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 3404871168.0,
+            "2": 3404871168.0,
+            "3": 3404871168.0,
+            "4": 3404871168.0,
+            "5": 3404871168.0,
+            "6": 3404871168.0,
+            "7": 3404871168.0,
+            "8": 3404871168.0,
+            "9": 3404871168.0,
+            "10": 3404871168.0,
+            "11": 3404871168.0,
+            "12": 3404871168.0,
+            "13": 3404871168.0,
+            "14": 3404871168.0,
+            "15": 3404871168.0,
+            "16": 3404871168.0,
+            "17": 3404871168.0,
+            "18": 3404871168.0,
+            "19": 3404871168.0,
+            "20": 3404871168.0,
+            "21": 3404871168.0,
+            "22": 3404871168.0,
+            "23": 3404871168.0,
+            "24": 3404871168.0,
+            "25": 3404871168.0,
+            "26": 3404871168.0,
+            "27": 3404871168.0,
+            "28": 3404871168.0,
+            "29": 3404871168.0,
+            "30": 3404871168.0,
+            "31": 3404871168.0,
+            "32": 3404871168.0,
+            "33": 3404871168.0,
+            "34": 3404871168.0,
+            "35": 3404871168.0,
+            "36": 3404871168.0,
+            "37": 3404871168.0,
+            "38": 3404871168.0,
+            "39": 3404871168.0,
+            "40": 3404871168.0,
+            "41": 3404871168.0,
+            "42": 3404871168.0,
+            "43": 3404871168.0,
+            "44": 3404871168.0,
+            "45": 3404871168.0,
+            "46": 3404871168.0,
+            "47": 3404871168.0,
+            "48": 3404871168.0,
+            "49": 3404871168.0,
+            "50": 3404871168.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 4194526208.0,
+            "2": 5660965888.0,
+            "3": 5660965888.0,
+            "4": 5660965888.0,
+            "5": 5660965888.0,
+            "6": 5660965888.0,
+            "7": 5660965888.0,
+            "8": 5660965888.0,
+            "9": 5660965888.0,
+            "10": 5660965888.0,
+            "11": 5660965888.0,
+            "12": 5660965888.0,
+            "13": 5660965888.0,
+            "14": 5660965888.0,
+            "15": 5660965888.0,
+            "16": 5660965888.0,
+            "17": 5660965888.0,
+            "18": 5660965888.0,
+            "19": 5660965888.0,
+            "20": 5660965888.0,
+            "21": 5660965888.0,
+            "22": 5660965888.0,
+            "23": 5660965888.0,
+            "24": 5660965888.0,
+            "25": 5660965888.0,
+            "26": 5660965888.0,
+            "27": 5660965888.0,
+            "28": 5660965888.0,
+            "29": 5660965888.0,
+            "30": 5660965888.0,
+            "31": 5660965888.0,
+            "32": 5660965888.0,
+            "33": 5660965888.0,
+            "34": 5660965888.0,
+            "35": 5660965888.0,
+            "36": 5660965888.0,
+            "37": 5660965888.0,
+            "38": 5660965888.0,
+            "39": 5660965888.0,
+            "40": 5660965888.0,
+            "41": 5660965888.0,
+            "42": 5660965888.0,
+            "43": 5660965888.0,
+            "44": 5660965888.0,
+            "45": 5660965888.0,
+            "46": 5660965888.0,
+            "47": 5660965888.0,
+            "48": 5660965888.0,
+            "49": 5660965888.0,
+            "50": 5660965888.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 11.13654,
+            "2": 0.5493,
+            "3": 0.46515,
+            "4": 0.45431,
+            "5": 0.46032,
+            "6": 0.45814,
+            "7": 0.45793,
+            "8": 0.46137,
+            "9": 0.46682,
+            "10": 0.46519,
+            "11": 0.46206,
+            "12": 0.46526,
+            "13": 0.46309,
+            "14": 0.46231,
+            "15": 0.47151,
+            "16": 0.4581,
+            "17": 0.4833,
+            "18": 0.47393,
+            "19": 0.48513,
+            "20": 0.47017,
+            "21": 0.47471,
+            "22": 0.46394,
+            "23": 0.46475,
+            "24": 0.46879,
+            "25": 0.46294,
+            "26": 0.46242,
+            "27": 0.4645,
+            "28": 0.4715,
+            "29": 0.46842,
+            "30": 0.46401,
+            "31": 0.96127,
+            "32": 0.4785,
+            "33": 0.62004,
+            "34": 0.4827,
+            "35": 0.47953,
+            "36": 0.48459,
+            "37": 0.48738,
+            "38": 0.49573,
+            "39": 0.58967,
+            "40": 0.79369,
+            "41": 0.46618,
+            "42": 0.72243,
+            "43": 0.63291,
+            "44": 0.62301,
+            "45": 0.68335,
+            "46": 0.48579,
+            "47": 0.46817,
+            "48": 0.46582,
+            "49": 0.46457,
+            "50": 0.46777
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..df02cb774f4
--- /dev/null
+++ b/tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.47723,
+            "2": 10.47576,
+            "3": 10.46809,
+            "4": 10.47326,
+            "5": 10.47148,
+            "6": 10.46049,
+            "7": 10.46357,
+            "8": 10.47334,
+            "9": 10.48063,
+            "10": 10.46319,
+            "11": 10.47102,
+            "12": 10.45502,
+            "13": 10.44665,
+            "14": 10.451,
+            "15": 10.48846,
+            "16": 10.4509,
+            "17": 10.44648,
+            "18": 10.44272,
+            "19": 10.43057,
+            "20": 10.44534,
+            "21": 10.41778,
+            "22": 10.38667,
+            "23": 10.39322,
+            "24": 10.37847,
+            "25": 10.35474,
+            "26": 10.35955,
+            "27": 10.34527,
+            "28": 10.33539,
+            "29": 10.25416,
+            "30": 10.23011,
+            "31": 10.14092,
+            "32": 10.13601,
+            "33": 10.13944,
+            "34": 10.11377,
+            "35": 10.0888,
+            "36": 10.09247,
+            "37": 10.06836,
+            "38": 10.04664,
+            "39": 9.97584,
+            "40": 9.93781,
+            "41": 9.90867,
+            "42": 9.84873,
+            "43": 9.8577,
+            "44": 9.79259,
+            "45": 9.8035,
+            "46": 9.7029,
+            "47": 9.73432,
+            "48": 9.70106,
+            "49": 9.69981,
+            "50": 9.70258
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 2137.0,
+            "2": 1618.0,
+            "3": 1561.0,
+            "4": 1871.0,
+            "5": 1983.0,
+            "6": 1565.0,
+            "7": 2779.0,
+            "8": 2108.0,
+            "9": 2008.0,
+            "10": 2086.0,
+            "11": 2534.0,
+            "12": 1686.0,
+            "13": 2120.0,
+            "14": 2814.0,
+            "15": 1735.0,
+            "16": 2535.0,
+            "17": 2409.0,
+            "18": 2345.0,
+            "19": 2374.0,
+            "20": 2739.0,
+            "21": 2030.0,
+            "22": 2819.0,
+            "23": 2763.0,
+            "24": 2731.0,
+            "25": 2429.0,
+            "26": 2817.0,
+            "27": 2944.0,
+            "28": 2741.0,
+            "29": 2639.0,
+            "30": 2723.0,
+            "31": 2158.0,
+            "32": 2242.0,
+            "33": 2046.0,
+            "34": 2139.0,
+            "35": 2492.0,
+            "36": 2641.0,
+            "37": 2853.0,
+            "38": 2705.0,
+            "39": 2807.0,
+            "40": 3333.0,
+            "41": 1762.0,
+            "42": 1410.0,
+            "43": 1558.0,
+            "44": 2384.0,
+            "45": 3170.0,
+            "46": 2664.0,
+            "47": 2641.0,
+            "48": 3490.0,
+            "49": 2928.0,
+            "50": 2487.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 3404871168.0,
+            "2": 3404871168.0,
+            "3": 3404871168.0,
+            "4": 3404871168.0,
+            "5": 3404871168.0,
+            "6": 3404871168.0,
+            "7": 3404871168.0,
+            "8": 3404871168.0,
+            "9": 3404871168.0,
+            "10": 3404871168.0,
+            "11": 3404871168.0,
+            "12": 3404871168.0,
+            "13": 3404871168.0,
+            "14": 3404871168.0,
+            "15": 3404871168.0,
+            "16": 3404871168.0,
+            "17": 3404871168.0,
+            "18": 3404871168.0,
+            "19": 3404871168.0,
+            "20": 3404871168.0,
+            "21": 3404871168.0,
+            "22": 3404871168.0,
+            "23": 3404871168.0,
+            "24": 3404871168.0,
+            "25": 3404871168.0,
+            "26": 3404871168.0,
+            "27": 3404871168.0,
+            "28": 3404871168.0,
+            "29": 3404871168.0,
+            "30": 3404871168.0,
+            "31": 3404871168.0,
+            "32": 3404871168.0,
+            "33": 3404871168.0,
+            "34": 3404871168.0,
+            "35": 3404871168.0,
+            "36": 3404871168.0,
+            "37": 3404871168.0,
+            "38": 3404871168.0,
+            "39": 3404871168.0,
+            "40": 3404871168.0,
+            "41": 3404871168.0,
+            "42": 3404871168.0,
+            "43": 3404871168.0,
+            "44": 3404871168.0,
+            "45": 3404871168.0,
+            "46": 3404871168.0,
+            "47": 3404871168.0,
+            "48": 3404871168.0,
+            "49": 3404871168.0,
+            "50": 3404871168.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 4194526208.0,
+            "2": 5660965888.0,
+            "3": 5660965888.0,
+            "4": 5660965888.0,
+            "5": 5660965888.0,
+            "6": 5660965888.0,
+            "7": 5660965888.0,
+            "8": 5660965888.0,
+            "9": 5660965888.0,
+            "10": 5660965888.0,
+            "11": 5660965888.0,
+            "12": 5660965888.0,
+            "13": 5660965888.0,
+            "14": 5660965888.0,
+            "15": 5660965888.0,
+            "16": 5660965888.0,
+            "17": 5660965888.0,
+            "18": 5660965888.0,
+            "19": 5660965888.0,
+            "20": 5660965888.0,
+            "21": 5660965888.0,
+            "22": 5660965888.0,
+            "23": 5660965888.0,
+            "24": 5660965888.0,
+            "25": 5660965888.0,
+            "26": 5660965888.0,
+            "27": 5660965888.0,
+            "28": 5660965888.0,
+            "29": 5660965888.0,
+            "30": 5660965888.0,
+            "31": 5660965888.0,
+            "32": 5660965888.0,
+            "33": 5660965888.0,
+            "34": 5660965888.0,
+            "35": 5660965888.0,
+            "36": 5660965888.0,
+            "37": 5660965888.0,
+            "38": 5660965888.0,
+            "39": 5660965888.0,
+            "40": 5660965888.0,
+            "41": 5660965888.0,
+            "42": 5660965888.0,
+            "43": 5660965888.0,
+            "44": 5660965888.0,
+            "45": 5660965888.0,
+            "46": 5660965888.0,
+            "47": 5660965888.0,
+            "48": 5660965888.0,
+            "49": 5660965888.0,
+            "50": 5660965888.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.44279,
+            "2": 0.55345,
+            "3": 0.53909,
+            "4": 0.52187,
+            "5": 0.52958,
+            "6": 0.5241,
+            "7": 0.5353,
+            "8": 0.51946,
+            "9": 0.52732,
+            "10": 0.52759,
+            "11": 0.51849,
+            "12": 0.52326,
+            "13": 0.52472,
+            "14": 0.52577,
+            "15": 0.51817,
+            "16": 0.51922,
+            "17": 0.51686,
+            "18": 0.5248,
+            "19": 0.51945,
+            "20": 0.74697,
+            "21": 0.51544,
+            "22": 0.52412,
+            "23": 0.66206,
+            "24": 0.51781,
+            "25": 0.52429,
+            "26": 0.52068,
+            "27": 0.62432,
+            "28": 0.52016,
+            "29": 0.52217,
+            "30": 0.51949,
+            "31": 0.69033,
+            "32": 0.52127,
+            "33": 0.52602,
+            "34": 0.6403,
+            "35": 0.51723,
+            "36": 0.52445,
+            "37": 0.51746,
+            "38": 0.52296,
+            "39": 0.52159,
+            "40": 0.6718,
+            "41": 0.58171,
+            "42": 0.7393,
+            "43": 0.54277,
+            "44": 0.81615,
+            "45": 0.52284,
+            "46": 0.71947,
+            "47": 0.52219,
+            "48": 0.51866,
+            "49": 0.51764,
+            "50": 0.51841
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp4_vp2/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp4_vp2/golden_values_dev_dgx_h100.json
index 8101027dc18..edd42f32479 100644
--- a/tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp4_vp2/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp4_vp2/golden_values_dev_dgx_h100.json
@@ -2,91 +2,286 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 10.55236,
+            "2": 10.52891,
+            "3": 10.55085,
+            "4": 10.55035,
             "5": 10.52311,
+            "6": 10.53328,
+            "7": 10.53097,
+            "8": 10.54323,
+            "9": 10.54514,
             "10": 10.53676,
+            "11": 10.53791,
+            "12": 10.54319,
+            "13": 10.5263,
+            "14": 10.5316,
             "15": 10.52714,
+            "16": 10.50594,
+            "17": 10.5009,
+            "18": 10.51023,
+            "19": 10.493,
             "20": 10.48862,
+            "21": 10.47473,
+            "22": 10.42799,
+            "23": 10.42684,
+            "24": 10.4036,
             "25": 10.39991,
+            "26": 10.38461,
+            "27": 10.38216,
+            "28": 10.36877,
+            "29": 10.32192,
             "30": 10.2204,
+            "31": 10.17094,
+            "32": 10.12605,
+            "33": 10.10628,
+            "34": 10.09438,
             "35": 10.07042,
+            "36": 10.07481,
+            "37": 10.03644,
+            "38": 10.01812,
+            "39": 9.96852,
             "40": 9.93082,
+            "41": 9.87316,
+            "42": 9.81842,
+            "43": 9.8156,
+            "44": 9.73841,
             "45": 9.7628,
+            "46": 9.67691,
+            "47": 9.68688,
+            "48": 9.66292,
+            "49": 9.67587,
             "50": 9.67446
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 2320.0,
+            "2": 2645.0,
+            "3": 2441.0,
+            "4": 2417.0,
             "5": 2730.0,
+            "6": 2332.0,
+            "7": 1661.0,
+            "8": 2386.0,
+            "9": 2256.0,
             "10": 2428.0,
+            "11": 2152.0,
+            "12": 2337.0,
+            "13": 2643.0,
+            "14": 2209.0,
             "15": 2607.0,
+            "16": 2411.0,
+            "17": 2529.0,
+            "18": 2418.0,
+            "19": 2363.0,
             "20": 2323.0,
+            "21": 2401.0,
+            "22": 2588.0,
+            "23": 2338.0,
+            "24": 2305.0,
             "25": 2702.0,
+            "26": 2370.0,
+            "27": 2462.0,
+            "28": 2407.0,
+            "29": 2240.0,
             "30": 2850.0,
+            "31": 2882.0,
+            "32": 2837.0,
+            "33": 2645.0,
+            "34": 2874.0,
             "35": 2913.0,
+            "36": 3000.0,
+            "37": 3122.0,
+            "38": 2680.0,
+            "39": 2216.0,
             "40": 2211.0,
+            "41": 3456.0,
+            "42": 3624.0,
+            "43": 3364.0,
+            "44": 4026.0,
             "45": 4145.0,
+            "46": 2924.0,
+            "47": 1942.0,
+            "48": 3363.0,
+            "49": 3532.0,
             "50": 3710.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 2061524480.0,
+            "2": 2061524480.0,
+            "3": 2061524480.0,
+            "4": 2061524480.0,
             "5": 2061524480.0,
+            "6": 2061524480.0,
+            "7": 2061524480.0,
+            "8": 2061524480.0,
+            "9": 2061524480.0,
             "10": 2061524480.0,
+            "11": 2061524480.0,
+            "12": 2061524480.0,
+            "13": 2061524480.0,
+            "14": 2061524480.0,
             "15": 2061524480.0,
+            "16": 2061524480.0,
+            "17": 2061524480.0,
+            "18": 2061524480.0,
+            "19": 2061524480.0,
             "20": 2061524480.0,
+            "21": 2061524480.0,
+            "22": 2061524480.0,
+            "23": 2061524480.0,
+            "24": 2061524480.0,
             "25": 2061524480.0,
+            "26": 2061524480.0,
+            "27": 2061524480.0,
+            "28": 2061524480.0,
+            "29": 2061524480.0,
             "30": 2061524480.0,
+            "31": 2061524480.0,
+            "32": 2061524480.0,
+            "33": 2061524480.0,
+            "34": 2061524480.0,
             "35": 2061524480.0,
+            "36": 2061524480.0,
+            "37": 2061524480.0,
+            "38": 2061524480.0,
+            "39": 2061524480.0,
             "40": 2061524480.0,
+            "41": 2061524480.0,
+            "42": 2061524480.0,
+            "43": 2061524480.0,
+            "44": 2061524480.0,
             "45": 2061524480.0,
+            "46": 2061524480.0,
+            "47": 2061524480.0,
+            "48": 2061524480.0,
+            "49": 2061524480.0,
             "50": 2061524480.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 4385424896.0,
+            "2": 5245672960.0,
+            "3": 5245672960.0,
+            "4": 5245672960.0,
             "5": 5245672960.0,
+            "6": 5245672960.0,
+            "7": 5245672960.0,
+            "8": 5245672960.0,
+            "9": 5245672960.0,
             "10": 5245672960.0,
+            "11": 5245672960.0,
+            "12": 5245672960.0,
+            "13": 5245672960.0,
+            "14": 5245672960.0,
             "15": 5245672960.0,
+            "16": 5245672960.0,
+            "17": 5245672960.0,
+            "18": 5245672960.0,
+            "19": 5245672960.0,
             "20": 5245672960.0,
+            "21": 5245672960.0,
+            "22": 5245672960.0,
+            "23": 5245672960.0,
+            "24": 5245672960.0,
             "25": 5245672960.0,
+            "26": 5245672960.0,
+            "27": 5245672960.0,
+            "28": 5245672960.0,
+            "29": 5245672960.0,
             "30": 5245672960.0,
+            "31": 5245672960.0,
+            "32": 5245672960.0,
+            "33": 5245672960.0,
+            "34": 5245672960.0,
             "35": 5245672960.0,
+            "36": 5245672960.0,
+            "37": 5245672960.0,
+            "38": 5245672960.0,
+            "39": 5245672960.0,
             "40": 5245672960.0,
+            "41": 5245672960.0,
+            "42": 5245672960.0,
+            "43": 5245672960.0,
+            "44": 5245672960.0,
             "45": 5245672960.0,
+            "46": 5245672960.0,
+            "47": 5245672960.0,
+            "48": 5245672960.0,
+            "49": 5245672960.0,
             "50": 5245672960.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 13.96724,
-            "5": 0.61599,
-            "10": 0.61805,
-            "15": 0.63435,
-            "20": 1.30403,
-            "25": 0.62544,
-            "30": 0.59341,
-            "35": 0.60604,
-            "40": 0.61527,
-            "45": 1.34256,
-            "50": 0.59871
+            "1": 14.52125,
+            "2": 0.80201,
+            "3": 0.7469,
+            "4": 0.73694,
+            "5": 0.7315,
+            "6": 0.74178,
+            "7": 0.74868,
+            "8": 0.76041,
+            "9": 0.73349,
+            "10": 0.73103,
+            "11": 0.72627,
+            "12": 1.24485,
+            "13": 0.92369,
+            "14": 0.9992,
+            "15": 0.71522,
+            "16": 0.72059,
+            "17": 0.70821,
+            "18": 0.72513,
+            "19": 0.92847,
+            "20": 1.55552,
+            "21": 1.65501,
+            "22": 1.61714,
+            "23": 1.01208,
+            "24": 0.97003,
+            "25": 0.73922,
+            "26": 0.76213,
+            "27": 0.71228,
+            "28": 0.74068,
+            "29": 0.70429,
+            "30": 0.73547,
+            "31": 0.73693,
+            "32": 0.72401,
+            "33": 0.73688,
+            "34": 0.73718,
+            "35": 0.70434,
+            "36": 0.71346,
+            "37": 0.71973,
+            "38": 0.70358,
+            "39": 1.01971,
+            "40": 0.72495,
+            "41": 1.04905,
+            "42": 0.71671,
+            "43": 0.89934,
+            "44": 0.71242,
+            "45": 0.70583,
+            "46": 0.69596,
+            "47": 1.2374,
+            "48": 1.16,
+            "49": 1.08122,
+            "50": 1.48874
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp4_vp2/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp4_vp2/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..b825cf8964e
--- /dev/null
+++ b/tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp4_vp2/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.55236,
+            "2": 10.52891,
+            "3": 10.55085,
+            "4": 10.55035,
+            "5": 10.52311,
+            "6": 10.53328,
+            "7": 10.53097,
+            "8": 10.54323,
+            "9": 10.54514,
+            "10": 10.53676,
+            "11": 10.53791,
+            "12": 10.54319,
+            "13": 10.5263,
+            "14": 10.5316,
+            "15": 10.52714,
+            "16": 10.50594,
+            "17": 10.5009,
+            "18": 10.51023,
+            "19": 10.493,
+            "20": 10.48862,
+            "21": 10.47473,
+            "22": 10.42799,
+            "23": 10.42684,
+            "24": 10.4036,
+            "25": 10.39991,
+            "26": 10.38461,
+            "27": 10.38216,
+            "28": 10.36877,
+            "29": 10.32192,
+            "30": 10.2204,
+            "31": 10.17094,
+            "32": 10.12605,
+            "33": 10.10628,
+            "34": 10.09438,
+            "35": 10.07042,
+            "36": 10.07481,
+            "37": 10.03644,
+            "38": 10.01812,
+            "39": 9.96852,
+            "40": 9.93082,
+            "41": 9.87316,
+            "42": 9.81842,
+            "43": 9.8156,
+            "44": 9.73841,
+            "45": 9.7628,
+            "46": 9.67691,
+            "47": 9.68688,
+            "48": 9.66292,
+            "49": 9.67587,
+            "50": 9.67446
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 2320.0,
+            "2": 2645.0,
+            "3": 2441.0,
+            "4": 2417.0,
+            "5": 2730.0,
+            "6": 2332.0,
+            "7": 1661.0,
+            "8": 2386.0,
+            "9": 2256.0,
+            "10": 2428.0,
+            "11": 2152.0,
+            "12": 2337.0,
+            "13": 2643.0,
+            "14": 2209.0,
+            "15": 2607.0,
+            "16": 2411.0,
+            "17": 2529.0,
+            "18": 2418.0,
+            "19": 2363.0,
+            "20": 2323.0,
+            "21": 2401.0,
+            "22": 2588.0,
+            "23": 2338.0,
+            "24": 2305.0,
+            "25": 2702.0,
+            "26": 2370.0,
+            "27": 2462.0,
+            "28": 2407.0,
+            "29": 2240.0,
+            "30": 2850.0,
+            "31": 2882.0,
+            "32": 2837.0,
+            "33": 2645.0,
+            "34": 2874.0,
+            "35": 2913.0,
+            "36": 3000.0,
+            "37": 3122.0,
+            "38": 2680.0,
+            "39": 2216.0,
+            "40": 2211.0,
+            "41": 3456.0,
+            "42": 3624.0,
+            "43": 3364.0,
+            "44": 4026.0,
+            "45": 4145.0,
+            "46": 2924.0,
+            "47": 1942.0,
+            "48": 3363.0,
+            "49": 3532.0,
+            "50": 3710.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 2061524480.0,
+            "2": 2061524480.0,
+            "3": 2061524480.0,
+            "4": 2061524480.0,
+            "5": 2061524480.0,
+            "6": 2061524480.0,
+            "7": 2061524480.0,
+            "8": 2061524480.0,
+            "9": 2061524480.0,
+            "10": 2061524480.0,
+            "11": 2061524480.0,
+            "12": 2061524480.0,
+            "13": 2061524480.0,
+            "14": 2061524480.0,
+            "15": 2061524480.0,
+            "16": 2061524480.0,
+            "17": 2061524480.0,
+            "18": 2061524480.0,
+            "19": 2061524480.0,
+            "20": 2061524480.0,
+            "21": 2061524480.0,
+            "22": 2061524480.0,
+            "23": 2061524480.0,
+            "24": 2061524480.0,
+            "25": 2061524480.0,
+            "26": 2061524480.0,
+            "27": 2061524480.0,
+            "28": 2061524480.0,
+            "29": 2061524480.0,
+            "30": 2061524480.0,
+            "31": 2061524480.0,
+            "32": 2061524480.0,
+            "33": 2061524480.0,
+            "34": 2061524480.0,
+            "35": 2061524480.0,
+            "36": 2061524480.0,
+            "37": 2061524480.0,
+            "38": 2061524480.0,
+            "39": 2061524480.0,
+            "40": 2061524480.0,
+            "41": 2061524480.0,
+            "42": 2061524480.0,
+            "43": 2061524480.0,
+            "44": 2061524480.0,
+            "45": 2061524480.0,
+            "46": 2061524480.0,
+            "47": 2061524480.0,
+            "48": 2061524480.0,
+            "49": 2061524480.0,
+            "50": 2061524480.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 4385424896.0,
+            "2": 5245672960.0,
+            "3": 5245672960.0,
+            "4": 5245672960.0,
+            "5": 5245672960.0,
+            "6": 5245672960.0,
+            "7": 5245672960.0,
+            "8": 5245672960.0,
+            "9": 5245672960.0,
+            "10": 5245672960.0,
+            "11": 5245672960.0,
+            "12": 5245672960.0,
+            "13": 5245672960.0,
+            "14": 5245672960.0,
+            "15": 5245672960.0,
+            "16": 5245672960.0,
+            "17": 5245672960.0,
+            "18": 5245672960.0,
+            "19": 5245672960.0,
+            "20": 5245672960.0,
+            "21": 5245672960.0,
+            "22": 5245672960.0,
+            "23": 5245672960.0,
+            "24": 5245672960.0,
+            "25": 5245672960.0,
+            "26": 5245672960.0,
+            "27": 5245672960.0,
+            "28": 5245672960.0,
+            "29": 5245672960.0,
+            "30": 5245672960.0,
+            "31": 5245672960.0,
+            "32": 5245672960.0,
+            "33": 5245672960.0,
+            "34": 5245672960.0,
+            "35": 5245672960.0,
+            "36": 5245672960.0,
+            "37": 5245672960.0,
+            "38": 5245672960.0,
+            "39": 5245672960.0,
+            "40": 5245672960.0,
+            "41": 5245672960.0,
+            "42": 5245672960.0,
+            "43": 5245672960.0,
+            "44": 5245672960.0,
+            "45": 5245672960.0,
+            "46": 5245672960.0,
+            "47": 5245672960.0,
+            "48": 5245672960.0,
+            "49": 5245672960.0,
+            "50": 5245672960.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 16.04066,
+            "2": 0.7032,
+            "3": 0.64317,
+            "4": 0.64902,
+            "5": 0.64969,
+            "6": 0.63112,
+            "7": 0.65022,
+            "8": 0.64825,
+            "9": 0.6561,
+            "10": 0.65389,
+            "11": 0.63629,
+            "12": 0.61059,
+            "13": 0.61378,
+            "14": 0.63387,
+            "15": 0.63512,
+            "16": 0.67245,
+            "17": 1.84585,
+            "18": 0.92074,
+            "19": 0.88511,
+            "20": 1.52328,
+            "21": 1.57421,
+            "22": 1.42349,
+            "23": 0.90417,
+            "24": 0.62214,
+            "25": 0.61751,
+            "26": 0.62328,
+            "27": 0.63404,
+            "28": 0.64274,
+            "29": 0.61224,
+            "30": 0.6522,
+            "31": 0.65622,
+            "32": 0.64451,
+            "33": 0.65916,
+            "34": 0.67975,
+            "35": 0.63318,
+            "36": 0.63519,
+            "37": 0.62099,
+            "38": 0.63824,
+            "39": 0.65345,
+            "40": 0.63256,
+            "41": 0.64564,
+            "42": 0.61807,
+            "43": 0.84645,
+            "44": 0.85427,
+            "45": 0.85855,
+            "46": 0.97022,
+            "47": 1.2994,
+            "48": 1.26968,
+            "49": 1.21118,
+            "50": 1.43722
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp4_vp2/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp4_vp2/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..0d85e13b23b
--- /dev/null
+++ b/tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp4_vp2/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.55236,
+            "2": 10.52891,
+            "3": 10.55085,
+            "4": 10.55035,
+            "5": 10.52311,
+            "6": 10.53328,
+            "7": 10.53097,
+            "8": 10.54323,
+            "9": 10.54514,
+            "10": 10.53676,
+            "11": 10.53791,
+            "12": 10.54319,
+            "13": 10.5263,
+            "14": 10.5316,
+            "15": 10.52714,
+            "16": 10.50594,
+            "17": 10.5009,
+            "18": 10.51023,
+            "19": 10.493,
+            "20": 10.48862,
+            "21": 10.47473,
+            "22": 10.42799,
+            "23": 10.42684,
+            "24": 10.4036,
+            "25": 10.39991,
+            "26": 10.38461,
+            "27": 10.38216,
+            "28": 10.36877,
+            "29": 10.32192,
+            "30": 10.2204,
+            "31": 10.17094,
+            "32": 10.12605,
+            "33": 10.10628,
+            "34": 10.09438,
+            "35": 10.07042,
+            "36": 10.07481,
+            "37": 10.03644,
+            "38": 10.01812,
+            "39": 9.96852,
+            "40": 9.93082,
+            "41": 9.87316,
+            "42": 9.81842,
+            "43": 9.8156,
+            "44": 9.73841,
+            "45": 9.7628,
+            "46": 9.67691,
+            "47": 9.68688,
+            "48": 9.66292,
+            "49": 9.67587,
+            "50": 9.67446
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 2320.0,
+            "2": 2645.0,
+            "3": 2441.0,
+            "4": 2417.0,
+            "5": 2730.0,
+            "6": 2332.0,
+            "7": 1661.0,
+            "8": 2386.0,
+            "9": 2256.0,
+            "10": 2428.0,
+            "11": 2152.0,
+            "12": 2337.0,
+            "13": 2643.0,
+            "14": 2209.0,
+            "15": 2607.0,
+            "16": 2411.0,
+            "17": 2529.0,
+            "18": 2418.0,
+            "19": 2363.0,
+            "20": 2323.0,
+            "21": 2401.0,
+            "22": 2588.0,
+            "23": 2338.0,
+            "24": 2305.0,
+            "25": 2702.0,
+            "26": 2370.0,
+            "27": 2462.0,
+            "28": 2407.0,
+            "29": 2240.0,
+            "30": 2850.0,
+            "31": 2882.0,
+            "32": 2837.0,
+            "33": 2645.0,
+            "34": 2874.0,
+            "35": 2913.0,
+            "36": 3000.0,
+            "37": 3122.0,
+            "38": 2680.0,
+            "39": 2216.0,
+            "40": 2211.0,
+            "41": 3456.0,
+            "42": 3624.0,
+            "43": 3364.0,
+            "44": 4026.0,
+            "45": 4145.0,
+            "46": 2924.0,
+            "47": 1942.0,
+            "48": 3363.0,
+            "49": 3532.0,
+            "50": 3710.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 2061524480.0,
+            "2": 2061524480.0,
+            "3": 2061524480.0,
+            "4": 2061524480.0,
+            "5": 2061524480.0,
+            "6": 2061524480.0,
+            "7": 2061524480.0,
+            "8": 2061524480.0,
+            "9": 2061524480.0,
+            "10": 2061524480.0,
+            "11": 2061524480.0,
+            "12": 2061524480.0,
+            "13": 2061524480.0,
+            "14": 2061524480.0,
+            "15": 2061524480.0,
+            "16": 2061524480.0,
+            "17": 2061524480.0,
+            "18": 2061524480.0,
+            "19": 2061524480.0,
+            "20": 2061524480.0,
+            "21": 2061524480.0,
+            "22": 2061524480.0,
+            "23": 2061524480.0,
+            "24": 2061524480.0,
+            "25": 2061524480.0,
+            "26": 2061524480.0,
+            "27": 2061524480.0,
+            "28": 2061524480.0,
+            "29": 2061524480.0,
+            "30": 2061524480.0,
+            "31": 2061524480.0,
+            "32": 2061524480.0,
+            "33": 2061524480.0,
+            "34": 2061524480.0,
+            "35": 2061524480.0,
+            "36": 2061524480.0,
+            "37": 2061524480.0,
+            "38": 2061524480.0,
+            "39": 2061524480.0,
+            "40": 2061524480.0,
+            "41": 2061524480.0,
+            "42": 2061524480.0,
+            "43": 2061524480.0,
+            "44": 2061524480.0,
+            "45": 2061524480.0,
+            "46": 2061524480.0,
+            "47": 2061524480.0,
+            "48": 2061524480.0,
+            "49": 2061524480.0,
+            "50": 2061524480.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 4385424896.0,
+            "2": 5245672960.0,
+            "3": 5245672960.0,
+            "4": 5245672960.0,
+            "5": 5245672960.0,
+            "6": 5245672960.0,
+            "7": 5245672960.0,
+            "8": 5245672960.0,
+            "9": 5245672960.0,
+            "10": 5245672960.0,
+            "11": 5245672960.0,
+            "12": 5245672960.0,
+            "13": 5245672960.0,
+            "14": 5245672960.0,
+            "15": 5245672960.0,
+            "16": 5245672960.0,
+            "17": 5245672960.0,
+            "18": 5245672960.0,
+            "19": 5245672960.0,
+            "20": 5245672960.0,
+            "21": 5245672960.0,
+            "22": 5245672960.0,
+            "23": 5245672960.0,
+            "24": 5245672960.0,
+            "25": 5245672960.0,
+            "26": 5245672960.0,
+            "27": 5245672960.0,
+            "28": 5245672960.0,
+            "29": 5245672960.0,
+            "30": 5245672960.0,
+            "31": 5245672960.0,
+            "32": 5245672960.0,
+            "33": 5245672960.0,
+            "34": 5245672960.0,
+            "35": 5245672960.0,
+            "36": 5245672960.0,
+            "37": 5245672960.0,
+            "38": 5245672960.0,
+            "39": 5245672960.0,
+            "40": 5245672960.0,
+            "41": 5245672960.0,
+            "42": 5245672960.0,
+            "43": 5245672960.0,
+            "44": 5245672960.0,
+            "45": 5245672960.0,
+            "46": 5245672960.0,
+            "47": 5245672960.0,
+            "48": 5245672960.0,
+            "49": 5245672960.0,
+            "50": 5245672960.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 14.48983,
+            "2": 0.782,
+            "3": 0.71913,
+            "4": 0.71541,
+            "5": 0.71528,
+            "6": 0.7219,
+            "7": 0.72729,
+            "8": 0.72714,
+            "9": 0.7634,
+            "10": 0.71523,
+            "11": 0.72303,
+            "12": 1.34179,
+            "13": 0.93338,
+            "14": 0.72484,
+            "15": 0.70784,
+            "16": 0.72443,
+            "17": 0.72151,
+            "18": 0.71102,
+            "19": 1.13624,
+            "20": 1.56469,
+            "21": 1.66622,
+            "22": 0.9574,
+            "23": 0.69921,
+            "24": 0.70477,
+            "25": 0.73932,
+            "26": 0.74798,
+            "27": 0.72633,
+            "28": 0.72782,
+            "29": 0.73646,
+            "30": 0.73665,
+            "31": 0.74301,
+            "32": 0.73363,
+            "33": 0.71952,
+            "34": 0.7406,
+            "35": 0.71103,
+            "36": 0.70026,
+            "37": 0.71087,
+            "38": 0.88272,
+            "39": 0.71279,
+            "40": 0.92123,
+            "41": 1.20193,
+            "42": 0.72924,
+            "43": 0.70749,
+            "44": 0.72158,
+            "45": 0.71169,
+            "46": 1.23637,
+            "47": 1.13432,
+            "48": 1.26896,
+            "49": 1.13682,
+            "50": 1.21366
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp4_pp1/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp4_pp1/golden_values_dev_dgx_h100.json
index 6ca48489088..36ea57771ea 100644
--- a/tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp4_pp1/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp4_pp1/golden_values_dev_dgx_h100.json
@@ -2,91 +2,286 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 10.42626,
+            "2": 10.41171,
+            "3": 10.41885,
+            "4": 10.42153,
             "5": 10.42192,
+            "6": 10.41563,
+            "7": 10.42859,
+            "8": 10.42079,
+            "9": 10.43014,
             "10": 10.40859,
+            "11": 10.43501,
+            "12": 10.4025,
+            "13": 10.42274,
+            "14": 10.41249,
             "15": 10.40948,
+            "16": 10.40806,
+            "17": 10.3892,
+            "18": 10.38857,
+            "19": 10.37147,
             "20": 10.40453,
+            "21": 10.36615,
+            "22": 10.34963,
+            "23": 10.35388,
+            "24": 10.30136,
             "25": 10.31117,
+            "26": 10.30241,
+            "27": 10.2821,
+            "28": 10.27928,
+            "29": 10.23928,
             "30": 10.14742,
+            "31": 10.10532,
+            "32": 10.09426,
+            "33": 10.09032,
+            "34": 10.06437,
             "35": 10.04643,
+            "36": 10.03306,
+            "37": 10.00505,
+            "38": 10.00274,
+            "39": 9.91418,
             "40": 9.91103,
+            "41": 9.86562,
+            "42": 9.78095,
+            "43": 9.79496,
+            "44": 9.73077,
             "45": 9.7428,
+            "46": 9.63829,
+            "47": 9.6868,
+            "48": 9.637,
+            "49": 9.6554,
             "50": 9.65776
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 3452.0,
+            "2": 2890.0,
+            "3": 1856.0,
+            "4": 3256.0,
             "5": 3333.0,
+            "6": 2985.0,
+            "7": 3208.0,
+            "8": 3314.0,
+            "9": 3134.0,
             "10": 3124.0,
+            "11": 3913.0,
+            "12": 3008.0,
+            "13": 3108.0,
+            "14": 3652.0,
             "15": 3267.0,
+            "16": 3662.0,
+            "17": 3680.0,
+            "18": 3708.0,
+            "19": 3375.0,
             "20": 3449.0,
+            "21": 3115.0,
+            "22": 3545.0,
+            "23": 3516.0,
+            "24": 3789.0,
             "25": 3570.0,
+            "26": 3719.0,
+            "27": 2808.0,
+            "28": 3823.0,
+            "29": 3626.0,
             "30": 4136.0,
+            "31": 2541.0,
+            "32": 3945.0,
+            "33": 3501.0,
+            "34": 3795.0,
             "35": 3652.0,
+            "36": 4269.0,
+            "37": 4152.0,
+            "38": 3787.0,
+            "39": 3873.0,
             "40": 4661.0,
+            "41": 2846.0,
+            "42": 1556.0,
+            "43": 2809.0,
+            "44": 4030.0,
             "45": 4724.0,
+            "46": 4587.0,
+            "47": 3120.0,
+            "48": 4366.0,
+            "49": 3839.0,
             "50": 3146.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 1661765632.0,
+            "2": 1661765632.0,
+            "3": 1661765632.0,
+            "4": 1661765632.0,
             "5": 1661765632.0,
+            "6": 1661765632.0,
+            "7": 1661765632.0,
+            "8": 1661765632.0,
+            "9": 1661765632.0,
             "10": 1661765632.0,
+            "11": 1661765632.0,
+            "12": 1661765632.0,
+            "13": 1661765632.0,
+            "14": 1661765632.0,
             "15": 1661765632.0,
+            "16": 1661765632.0,
+            "17": 1661765632.0,
+            "18": 1661765632.0,
+            "19": 1661765632.0,
             "20": 1661765632.0,
+            "21": 1661765632.0,
+            "22": 1661765632.0,
+            "23": 1661765632.0,
+            "24": 1661765632.0,
             "25": 1661765632.0,
+            "26": 1661765632.0,
+            "27": 1661765632.0,
+            "28": 1661765632.0,
+            "29": 1661765632.0,
             "30": 1661765632.0,
+            "31": 1661765632.0,
+            "32": 1661765632.0,
+            "33": 1661765632.0,
+            "34": 1661765632.0,
             "35": 1661765632.0,
+            "36": 1661765632.0,
+            "37": 1661765632.0,
+            "38": 1661765632.0,
+            "39": 1661765632.0,
             "40": 1661765632.0,
+            "41": 1661765632.0,
+            "42": 1661765632.0,
+            "43": 1661765632.0,
+            "44": 1661765632.0,
             "45": 1661765632.0,
+            "46": 1661765632.0,
+            "47": 1661765632.0,
+            "48": 1661765632.0,
+            "49": 1661765632.0,
             "50": 1661765632.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 2506479104.0,
+            "2": 3205449216.0,
+            "3": 3205449216.0,
+            "4": 3205449216.0,
             "5": 3205449216.0,
+            "6": 3205449216.0,
+            "7": 3205449216.0,
+            "8": 3205449216.0,
+            "9": 3205449216.0,
             "10": 3205449216.0,
+            "11": 3205449216.0,
+            "12": 3205449216.0,
+            "13": 3205449216.0,
+            "14": 3205449216.0,
             "15": 3205449216.0,
+            "16": 3205449216.0,
+            "17": 3205449216.0,
+            "18": 3205449216.0,
+            "19": 3205449216.0,
             "20": 3205449216.0,
+            "21": 3205449216.0,
+            "22": 3205449216.0,
+            "23": 3205449216.0,
+            "24": 3205449216.0,
             "25": 3205449216.0,
+            "26": 3205449216.0,
+            "27": 3205449216.0,
+            "28": 3205449216.0,
+            "29": 3205449216.0,
             "30": 3205449216.0,
+            "31": 3205449216.0,
+            "32": 3205449216.0,
+            "33": 3205449216.0,
+            "34": 3205449216.0,
             "35": 3205449216.0,
+            "36": 3205449216.0,
+            "37": 3205449216.0,
+            "38": 3205449216.0,
+            "39": 3205449216.0,
             "40": 3205449216.0,
+            "41": 3205449216.0,
+            "42": 3205449216.0,
+            "43": 3205449216.0,
+            "44": 3205449216.0,
             "45": 3205449216.0,
+            "46": 3205449216.0,
+            "47": 3205449216.0,
+            "48": 3205449216.0,
+            "49": 3205449216.0,
             "50": 3205449216.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 9.57532,
-            "5": 1.46202,
-            "10": 1.45865,
-            "15": 1.46969,
-            "20": 1.46895,
-            "25": 1.45633,
-            "30": 1.74568,
-            "35": 1.47151,
-            "40": 1.4582,
-            "45": 1.45697,
-            "50": 1.45728
+            "1": 10.20165,
+            "2": 1.76894,
+            "3": 1.75257,
+            "4": 1.76371,
+            "5": 1.76165,
+            "6": 1.76697,
+            "7": 1.7566,
+            "8": 1.76422,
+            "9": 1.76493,
+            "10": 1.76085,
+            "11": 1.75557,
+            "12": 1.7612,
+            "13": 1.84209,
+            "14": 1.7609,
+            "15": 1.75819,
+            "16": 1.76084,
+            "17": 2.14365,
+            "18": 1.77031,
+            "19": 1.77623,
+            "20": 1.81462,
+            "21": 2.1764,
+            "22": 1.76578,
+            "23": 1.75799,
+            "24": 2.18418,
+            "25": 1.76236,
+            "26": 2.12149,
+            "27": 2.09277,
+            "28": 1.77853,
+            "29": 1.83529,
+            "30": 1.77362,
+            "31": 1.77704,
+            "32": 1.78154,
+            "33": 1.76732,
+            "34": 1.77318,
+            "35": 1.77963,
+            "36": 1.77541,
+            "37": 1.77626,
+            "38": 1.77185,
+            "39": 1.78486,
+            "40": 1.78003,
+            "41": 1.78092,
+            "42": 1.77118,
+            "43": 1.77626,
+            "44": 1.78384,
+            "45": 1.78376,
+            "46": 1.84893,
+            "47": 1.78761,
+            "48": 1.79814,
+            "49": 1.79323,
+            "50": 1.77941
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp4_pp1/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp4_pp1/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..73cbc43b7f2
--- /dev/null
+++ b/tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp4_pp1/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.42626,
+            "2": 10.41171,
+            "3": 10.41885,
+            "4": 10.42153,
+            "5": 10.42192,
+            "6": 10.41563,
+            "7": 10.42859,
+            "8": 10.42079,
+            "9": 10.43014,
+            "10": 10.40859,
+            "11": 10.43501,
+            "12": 10.4025,
+            "13": 10.42274,
+            "14": 10.41249,
+            "15": 10.40948,
+            "16": 10.40806,
+            "17": 10.3892,
+            "18": 10.38857,
+            "19": 10.37147,
+            "20": 10.40453,
+            "21": 10.36615,
+            "22": 10.34963,
+            "23": 10.35388,
+            "24": 10.30136,
+            "25": 10.31117,
+            "26": 10.30241,
+            "27": 10.2821,
+            "28": 10.27928,
+            "29": 10.23928,
+            "30": 10.14742,
+            "31": 10.10532,
+            "32": 10.09426,
+            "33": 10.09032,
+            "34": 10.06437,
+            "35": 10.04643,
+            "36": 10.03306,
+            "37": 10.00505,
+            "38": 10.00274,
+            "39": 9.91418,
+            "40": 9.91103,
+            "41": 9.86562,
+            "42": 9.78095,
+            "43": 9.79496,
+            "44": 9.73077,
+            "45": 9.7428,
+            "46": 9.63829,
+            "47": 9.6868,
+            "48": 9.637,
+            "49": 9.6554,
+            "50": 9.65776
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 3452.0,
+            "2": 2890.0,
+            "3": 1856.0,
+            "4": 3256.0,
+            "5": 3333.0,
+            "6": 2985.0,
+            "7": 3208.0,
+            "8": 3314.0,
+            "9": 3134.0,
+            "10": 3124.0,
+            "11": 3913.0,
+            "12": 3008.0,
+            "13": 3108.0,
+            "14": 3652.0,
+            "15": 3267.0,
+            "16": 3662.0,
+            "17": 3680.0,
+            "18": 3708.0,
+            "19": 3375.0,
+            "20": 3449.0,
+            "21": 3115.0,
+            "22": 3545.0,
+            "23": 3516.0,
+            "24": 3789.0,
+            "25": 3570.0,
+            "26": 3719.0,
+            "27": 2808.0,
+            "28": 3823.0,
+            "29": 3626.0,
+            "30": 4136.0,
+            "31": 2541.0,
+            "32": 3945.0,
+            "33": 3501.0,
+            "34": 3795.0,
+            "35": 3652.0,
+            "36": 4269.0,
+            "37": 4152.0,
+            "38": 3787.0,
+            "39": 3873.0,
+            "40": 4661.0,
+            "41": 2846.0,
+            "42": 1556.0,
+            "43": 2809.0,
+            "44": 4030.0,
+            "45": 4724.0,
+            "46": 4587.0,
+            "47": 3120.0,
+            "48": 4366.0,
+            "49": 3839.0,
+            "50": 3146.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1661765632.0,
+            "2": 1661765632.0,
+            "3": 1661765632.0,
+            "4": 1661765632.0,
+            "5": 1661765632.0,
+            "6": 1661765632.0,
+            "7": 1661765632.0,
+            "8": 1661765632.0,
+            "9": 1661765632.0,
+            "10": 1661765632.0,
+            "11": 1661765632.0,
+            "12": 1661765632.0,
+            "13": 1661765632.0,
+            "14": 1661765632.0,
+            "15": 1661765632.0,
+            "16": 1661765632.0,
+            "17": 1661765632.0,
+            "18": 1661765632.0,
+            "19": 1661765632.0,
+            "20": 1661765632.0,
+            "21": 1661765632.0,
+            "22": 1661765632.0,
+            "23": 1661765632.0,
+            "24": 1661765632.0,
+            "25": 1661765632.0,
+            "26": 1661765632.0,
+            "27": 1661765632.0,
+            "28": 1661765632.0,
+            "29": 1661765632.0,
+            "30": 1661765632.0,
+            "31": 1661765632.0,
+            "32": 1661765632.0,
+            "33": 1661765632.0,
+            "34": 1661765632.0,
+            "35": 1661765632.0,
+            "36": 1661765632.0,
+            "37": 1661765632.0,
+            "38": 1661765632.0,
+            "39": 1661765632.0,
+            "40": 1661765632.0,
+            "41": 1661765632.0,
+            "42": 1661765632.0,
+            "43": 1661765632.0,
+            "44": 1661765632.0,
+            "45": 1661765632.0,
+            "46": 1661765632.0,
+            "47": 1661765632.0,
+            "48": 1661765632.0,
+            "49": 1661765632.0,
+            "50": 1661765632.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 2506479104.0,
+            "2": 3205449216.0,
+            "3": 3205449216.0,
+            "4": 3205449216.0,
+            "5": 3205449216.0,
+            "6": 3205449216.0,
+            "7": 3205449216.0,
+            "8": 3205449216.0,
+            "9": 3205449216.0,
+            "10": 3205449216.0,
+            "11": 3205449216.0,
+            "12": 3205449216.0,
+            "13": 3205449216.0,
+            "14": 3205449216.0,
+            "15": 3205449216.0,
+            "16": 3205449216.0,
+            "17": 3205449216.0,
+            "18": 3205449216.0,
+            "19": 3205449216.0,
+            "20": 3205449216.0,
+            "21": 3205449216.0,
+            "22": 3205449216.0,
+            "23": 3205449216.0,
+            "24": 3205449216.0,
+            "25": 3205449216.0,
+            "26": 3205449216.0,
+            "27": 3205449216.0,
+            "28": 3205449216.0,
+            "29": 3205449216.0,
+            "30": 3205449216.0,
+            "31": 3205449216.0,
+            "32": 3205449216.0,
+            "33": 3205449216.0,
+            "34": 3205449216.0,
+            "35": 3205449216.0,
+            "36": 3205449216.0,
+            "37": 3205449216.0,
+            "38": 3205449216.0,
+            "39": 3205449216.0,
+            "40": 3205449216.0,
+            "41": 3205449216.0,
+            "42": 3205449216.0,
+            "43": 3205449216.0,
+            "44": 3205449216.0,
+            "45": 3205449216.0,
+            "46": 3205449216.0,
+            "47": 3205449216.0,
+            "48": 3205449216.0,
+            "49": 3205449216.0,
+            "50": 3205449216.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.60443,
+            "2": 1.59144,
+            "3": 1.53882,
+            "4": 1.56784,
+            "5": 1.52207,
+            "6": 1.53885,
+            "7": 1.52214,
+            "8": 1.52095,
+            "9": 1.51957,
+            "10": 1.51224,
+            "11": 1.49689,
+            "12": 1.5078,
+            "13": 1.50118,
+            "14": 1.4917,
+            "15": 1.60359,
+            "16": 1.55447,
+            "17": 1.55262,
+            "18": 1.84594,
+            "19": 1.55841,
+            "20": 1.7545,
+            "21": 1.48478,
+            "22": 1.49549,
+            "23": 1.81525,
+            "24": 1.79126,
+            "25": 2.12023,
+            "26": 1.49775,
+            "27": 1.80406,
+            "28": 1.49411,
+            "29": 1.96966,
+            "30": 1.48009,
+            "31": 1.47915,
+            "32": 1.48757,
+            "33": 1.47812,
+            "34": 1.4701,
+            "35": 1.47099,
+            "36": 1.47773,
+            "37": 1.48414,
+            "38": 1.51352,
+            "39": 1.48595,
+            "40": 1.49001,
+            "41": 1.48545,
+            "42": 1.50863,
+            "43": 1.47565,
+            "44": 1.48135,
+            "45": 1.48123,
+            "46": 1.48152,
+            "47": 1.48884,
+            "48": 1.56195,
+            "49": 1.55628,
+            "50": 1.48725
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp4_pp1/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp4_pp1/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..88adf60a26e
--- /dev/null
+++ b/tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp4_pp1/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.42626,
+            "2": 10.41171,
+            "3": 10.41885,
+            "4": 10.42153,
+            "5": 10.42192,
+            "6": 10.41563,
+            "7": 10.42859,
+            "8": 10.42079,
+            "9": 10.43014,
+            "10": 10.40859,
+            "11": 10.43501,
+            "12": 10.4025,
+            "13": 10.42274,
+            "14": 10.41249,
+            "15": 10.40948,
+            "16": 10.40806,
+            "17": 10.3892,
+            "18": 10.38857,
+            "19": 10.37147,
+            "20": 10.40453,
+            "21": 10.36615,
+            "22": 10.34963,
+            "23": 10.35388,
+            "24": 10.30136,
+            "25": 10.31117,
+            "26": 10.30241,
+            "27": 10.2821,
+            "28": 10.27928,
+            "29": 10.23928,
+            "30": 10.14742,
+            "31": 10.10532,
+            "32": 10.09426,
+            "33": 10.09032,
+            "34": 10.06437,
+            "35": 10.04643,
+            "36": 10.03306,
+            "37": 10.00505,
+            "38": 10.00274,
+            "39": 9.91418,
+            "40": 9.91103,
+            "41": 9.86562,
+            "42": 9.78095,
+            "43": 9.79496,
+            "44": 9.73077,
+            "45": 9.7428,
+            "46": 9.63829,
+            "47": 9.6868,
+            "48": 9.637,
+            "49": 9.6554,
+            "50": 9.65776
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 3452.0,
+            "2": 2890.0,
+            "3": 1856.0,
+            "4": 3256.0,
+            "5": 3333.0,
+            "6": 2985.0,
+            "7": 3208.0,
+            "8": 3314.0,
+            "9": 3134.0,
+            "10": 3124.0,
+            "11": 3913.0,
+            "12": 3008.0,
+            "13": 3108.0,
+            "14": 3652.0,
+            "15": 3267.0,
+            "16": 3662.0,
+            "17": 3680.0,
+            "18": 3708.0,
+            "19": 3375.0,
+            "20": 3449.0,
+            "21": 3115.0,
+            "22": 3545.0,
+            "23": 3516.0,
+            "24": 3789.0,
+            "25": 3570.0,
+            "26": 3719.0,
+            "27": 2808.0,
+            "28": 3823.0,
+            "29": 3626.0,
+            "30": 4136.0,
+            "31": 2541.0,
+            "32": 3945.0,
+            "33": 3501.0,
+            "34": 3795.0,
+            "35": 3652.0,
+            "36": 4269.0,
+            "37": 4152.0,
+            "38": 3787.0,
+            "39": 3873.0,
+            "40": 4661.0,
+            "41": 2846.0,
+            "42": 1556.0,
+            "43": 2809.0,
+            "44": 4030.0,
+            "45": 4724.0,
+            "46": 4587.0,
+            "47": 3120.0,
+            "48": 4366.0,
+            "49": 3839.0,
+            "50": 3146.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1661765632.0,
+            "2": 1661765632.0,
+            "3": 1661765632.0,
+            "4": 1661765632.0,
+            "5": 1661765632.0,
+            "6": 1661765632.0,
+            "7": 1661765632.0,
+            "8": 1661765632.0,
+            "9": 1661765632.0,
+            "10": 1661765632.0,
+            "11": 1661765632.0,
+            "12": 1661765632.0,
+            "13": 1661765632.0,
+            "14": 1661765632.0,
+            "15": 1661765632.0,
+            "16": 1661765632.0,
+            "17": 1661765632.0,
+            "18": 1661765632.0,
+            "19": 1661765632.0,
+            "20": 1661765632.0,
+            "21": 1661765632.0,
+            "22": 1661765632.0,
+            "23": 1661765632.0,
+            "24": 1661765632.0,
+            "25": 1661765632.0,
+            "26": 1661765632.0,
+            "27": 1661765632.0,
+            "28": 1661765632.0,
+            "29": 1661765632.0,
+            "30": 1661765632.0,
+            "31": 1661765632.0,
+            "32": 1661765632.0,
+            "33": 1661765632.0,
+            "34": 1661765632.0,
+            "35": 1661765632.0,
+            "36": 1661765632.0,
+            "37": 1661765632.0,
+            "38": 1661765632.0,
+            "39": 1661765632.0,
+            "40": 1661765632.0,
+            "41": 1661765632.0,
+            "42": 1661765632.0,
+            "43": 1661765632.0,
+            "44": 1661765632.0,
+            "45": 1661765632.0,
+            "46": 1661765632.0,
+            "47": 1661765632.0,
+            "48": 1661765632.0,
+            "49": 1661765632.0,
+            "50": 1661765632.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 2506479104.0,
+            "2": 3205449216.0,
+            "3": 3205449216.0,
+            "4": 3205449216.0,
+            "5": 3205449216.0,
+            "6": 3205449216.0,
+            "7": 3205449216.0,
+            "8": 3205449216.0,
+            "9": 3205449216.0,
+            "10": 3205449216.0,
+            "11": 3205449216.0,
+            "12": 3205449216.0,
+            "13": 3205449216.0,
+            "14": 3205449216.0,
+            "15": 3205449216.0,
+            "16": 3205449216.0,
+            "17": 3205449216.0,
+            "18": 3205449216.0,
+            "19": 3205449216.0,
+            "20": 3205449216.0,
+            "21": 3205449216.0,
+            "22": 3205449216.0,
+            "23": 3205449216.0,
+            "24": 3205449216.0,
+            "25": 3205449216.0,
+            "26": 3205449216.0,
+            "27": 3205449216.0,
+            "28": 3205449216.0,
+            "29": 3205449216.0,
+            "30": 3205449216.0,
+            "31": 3205449216.0,
+            "32": 3205449216.0,
+            "33": 3205449216.0,
+            "34": 3205449216.0,
+            "35": 3205449216.0,
+            "36": 3205449216.0,
+            "37": 3205449216.0,
+            "38": 3205449216.0,
+            "39": 3205449216.0,
+            "40": 3205449216.0,
+            "41": 3205449216.0,
+            "42": 3205449216.0,
+            "43": 3205449216.0,
+            "44": 3205449216.0,
+            "45": 3205449216.0,
+            "46": 3205449216.0,
+            "47": 3205449216.0,
+            "48": 3205449216.0,
+            "49": 3205449216.0,
+            "50": 3205449216.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.29331,
+            "2": 1.82828,
+            "3": 1.75745,
+            "4": 1.75149,
+            "5": 1.76912,
+            "6": 1.75888,
+            "7": 1.75313,
+            "8": 1.75423,
+            "9": 1.74482,
+            "10": 1.84387,
+            "11": 2.01499,
+            "12": 1.74448,
+            "13": 1.75425,
+            "14": 2.09351,
+            "15": 1.77765,
+            "16": 1.76841,
+            "17": 1.75495,
+            "18": 2.05727,
+            "19": 1.77481,
+            "20": 2.11285,
+            "21": 1.77659,
+            "22": 1.75669,
+            "23": 1.75872,
+            "24": 2.1065,
+            "25": 2.02543,
+            "26": 1.84773,
+            "27": 1.76632,
+            "28": 1.76482,
+            "29": 1.75732,
+            "30": 1.75335,
+            "31": 1.75453,
+            "32": 1.80627,
+            "33": 1.757,
+            "34": 1.75719,
+            "35": 1.75478,
+            "36": 1.76009,
+            "37": 1.75602,
+            "38": 1.75806,
+            "39": 1.75609,
+            "40": 1.75247,
+            "41": 1.75179,
+            "42": 1.75873,
+            "43": 1.77534,
+            "44": 1.80833,
+            "45": 1.74663,
+            "46": 1.75048,
+            "47": 1.7473,
+            "48": 1.75253,
+            "49": 1.76783,
+            "50": 1.75365
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_h100.json
index a8c99cdd960..fbdb62b88ac 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_h100.json
@@ -2,140 +2,535 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 10.89618,
+            "2": 10.89538,
+            "3": 10.88915,
+            "4": 10.89094,
             "5": 10.8927,
+            "6": 10.90148,
+            "7": 10.89392,
+            "8": 10.90369,
+            "9": 10.90794,
             "10": 10.89108,
+            "11": 10.88762,
+            "12": 10.9076,
+            "13": 10.91429,
+            "14": 10.90654,
             "15": 10.90227,
+            "16": 10.91042,
+            "17": 10.89896,
+            "18": 10.90666,
+            "19": 10.89908,
             "20": 10.90133,
+            "21": 10.91713,
+            "22": 10.89139,
+            "23": 10.90085,
+            "24": 10.89366,
             "25": 10.89372,
+            "26": 10.87372,
+            "27": 10.87917,
+            "28": 10.88756,
+            "29": 10.85461,
             "30": 10.83891,
+            "31": 10.75166,
+            "32": 10.8278,
+            "33": 10.80306,
+            "34": 10.73559,
             "35": 10.7301,
+            "36": 10.69318,
+            "37": 10.72854,
+            "38": 10.65364,
+            "39": 10.71672,
             "40": 10.56996,
+            "41": 10.58467,
+            "42": 10.59853,
+            "43": 10.3948,
+            "44": 10.44431,
             "45": 10.3452,
+            "46": 10.31919,
+            "47": 10.49671,
+            "48": 10.31281,
+            "49": 10.09084,
             "50": 10.31089,
+            "51": 10.25547,
+            "52": 10.15856,
+            "53": 10.38114,
+            "54": 10.2992,
             "55": 10.23806,
+            "56": 10.00726,
+            "57": 9.87765,
+            "58": 10.15279,
+            "59": 9.94207,
             "60": 9.8666,
+            "61": 10.00032,
+            "62": 10.23443,
+            "63": 9.71917,
+            "64": 10.04209,
             "65": 9.30009,
+            "66": 9.95537,
+            "67": 9.6499,
+            "68": 10.00402,
+            "69": 9.99988,
             "70": 9.96383,
+            "71": 9.84259,
+            "72": 9.81258,
+            "73": 9.70921,
+            "74": 9.19832,
             "75": 9.61686,
+            "76": 9.28859,
+            "77": 10.20416,
+            "78": 9.88378,
+            "79": 9.54296,
             "80": 9.57095,
+            "81": 9.64006,
+            "82": 9.83648,
+            "83": 9.47691,
+            "84": 9.54866,
             "85": 9.75198,
+            "86": 9.21427,
+            "87": 9.70607,
+            "88": 9.87307,
+            "89": 9.72876,
             "90": 9.92353,
+            "91": 9.48236,
+            "92": 9.47671,
+            "93": 9.20895,
+            "94": 8.9625,
             "95": 9.62369,
+            "96": 9.64228,
+            "97": 9.41575,
+            "98": 9.77515,
+            "99": 9.00692,
             "100": 9.51305
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 454770688.0,
+            "2": 454770688.0,
+            "3": 454770688.0,
+            "4": 454770688.0,
             "5": 454770688.0,
+            "6": 454770688.0,
+            "7": 454770688.0,
+            "8": 454770688.0,
+            "9": 454770688.0,
             "10": 454770688.0,
+            "11": 454770688.0,
+            "12": 454770688.0,
+            "13": 454770688.0,
+            "14": 454770688.0,
             "15": 454770688.0,
+            "16": 454770688.0,
+            "17": 454770688.0,
+            "18": 518880768.0,
+            "19": 518880768.0,
             "20": 518880768.0,
+            "21": 518880768.0,
+            "22": 518880768.0,
+            "23": 518880768.0,
+            "24": 518880768.0,
             "25": 518880768.0,
+            "26": 518880768.0,
+            "27": 518880768.0,
+            "28": 518880768.0,
+            "29": 518880768.0,
             "30": 518880768.0,
+            "31": 518880768.0,
+            "32": 518880768.0,
+            "33": 518880768.0,
+            "34": 518880768.0,
             "35": 518880768.0,
+            "36": 518880768.0,
+            "37": 518880768.0,
+            "38": 518880768.0,
+            "39": 518880768.0,
             "40": 518880768.0,
+            "41": 518880768.0,
+            "42": 518880768.0,
+            "43": 518880768.0,
+            "44": 518880768.0,
             "45": 518880768.0,
+            "46": 518880768.0,
+            "47": 518880768.0,
+            "48": 518880768.0,
+            "49": 518880768.0,
             "50": 518880768.0,
+            "51": 518880768.0,
+            "52": 518880768.0,
+            "53": 518880768.0,
+            "54": 518880768.0,
             "55": 518880768.0,
+            "56": 518880768.0,
+            "57": 518880768.0,
+            "58": 518880768.0,
+            "59": 518880768.0,
             "60": 518880768.0,
+            "61": 518880768.0,
+            "62": 518880768.0,
+            "63": 518880768.0,
+            "64": 518880768.0,
             "65": 518880768.0,
+            "66": 518880768.0,
+            "67": 518880768.0,
+            "68": 518880768.0,
+            "69": 518880768.0,
             "70": 518880768.0,
+            "71": 518880768.0,
+            "72": 518880768.0,
+            "73": 518880768.0,
+            "74": 518880768.0,
             "75": 518880768.0,
+            "76": 518880768.0,
+            "77": 518880768.0,
+            "78": 518880768.0,
+            "79": 518880768.0,
             "80": 518880768.0,
+            "81": 518880768.0,
+            "82": 518880768.0,
+            "83": 518880768.0,
+            "84": 518880768.0,
             "85": 518880768.0,
+            "86": 518880768.0,
+            "87": 518880768.0,
+            "88": 518880768.0,
+            "89": 518880768.0,
             "90": 518880768.0,
+            "91": 518880768.0,
+            "92": 518880768.0,
+            "93": 518880768.0,
+            "94": 518880768.0,
             "95": 518880768.0,
+            "96": 518880768.0,
+            "97": 518880768.0,
+            "98": 518880768.0,
+            "99": 518880768.0,
             "100": 518880768.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 4511150592.0,
+            "2": 4544705536.0,
+            "3": 4544705536.0,
+            "4": 4544705536.0,
             "5": 4544705536.0,
+            "6": 4544705536.0,
+            "7": 4544705536.0,
+            "8": 4544705536.0,
+            "9": 4544705536.0,
             "10": 4544705536.0,
+            "11": 4544705536.0,
+            "12": 4544705536.0,
+            "13": 4544705536.0,
+            "14": 4544705536.0,
             "15": 4544705536.0,
+            "16": 4544705536.0,
+            "17": 4544705536.0,
+            "18": 4544705536.0,
+            "19": 4607767040.0,
             "20": 4607767040.0,
+            "21": 4607767040.0,
+            "22": 4607767040.0,
+            "23": 4607767040.0,
+            "24": 4607767040.0,
             "25": 4607767040.0,
+            "26": 4607767040.0,
+            "27": 4607767040.0,
+            "28": 4607767040.0,
+            "29": 4607767040.0,
             "30": 4607767040.0,
+            "31": 4607767040.0,
+            "32": 4607767040.0,
+            "33": 4607767040.0,
+            "34": 4607767040.0,
             "35": 4607767040.0,
+            "36": 4607767040.0,
+            "37": 4607767040.0,
+            "38": 4607767040.0,
+            "39": 4607767040.0,
             "40": 4607767040.0,
+            "41": 4607767040.0,
+            "42": 4607767040.0,
+            "43": 4607767040.0,
+            "44": 4607767040.0,
             "45": 4607767040.0,
+            "46": 4607767040.0,
+            "47": 4607767040.0,
+            "48": 4607767040.0,
+            "49": 4607767040.0,
             "50": 4607767040.0,
+            "51": 4607767040.0,
+            "52": 4607767040.0,
+            "53": 4607767040.0,
+            "54": 4607767040.0,
             "55": 4607767040.0,
+            "56": 4607767040.0,
+            "57": 4607767040.0,
+            "58": 4607767040.0,
+            "59": 4607767040.0,
             "60": 4607767040.0,
+            "61": 4607767040.0,
+            "62": 4607767040.0,
+            "63": 4607767040.0,
+            "64": 4607767040.0,
             "65": 4607767040.0,
+            "66": 4607767040.0,
+            "67": 4607767040.0,
+            "68": 4607767040.0,
+            "69": 4607767040.0,
             "70": 4607767040.0,
+            "71": 4607767040.0,
+            "72": 4607767040.0,
+            "73": 4607767040.0,
+            "74": 4607767040.0,
             "75": 4607767040.0,
+            "76": 4607767040.0,
+            "77": 4607767040.0,
+            "78": 4607767040.0,
+            "79": 4607767040.0,
             "80": 4607767040.0,
+            "81": 4607767040.0,
+            "82": 4607767040.0,
+            "83": 4607767040.0,
+            "84": 4607767040.0,
             "85": 4607767040.0,
+            "86": 4607767040.0,
+            "87": 4607767040.0,
+            "88": 4607767040.0,
+            "89": 4607767040.0,
             "90": 4607767040.0,
+            "91": 4607767040.0,
+            "92": 4607767040.0,
+            "93": 4607767040.0,
+            "94": 4607767040.0,
             "95": 4607767040.0,
+            "96": 4607767040.0,
+            "97": 4607767040.0,
+            "98": 4607767040.0,
+            "99": 4607767040.0,
             "100": 4607767040.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 6.03441,
-            "5": 0.05457,
-            "10": 0.0555,
-            "15": 0.05442,
-            "20": 0.05936,
-            "25": 0.06165,
-            "30": 0.05917,
-            "35": 0.06761,
-            "40": 0.06021,
-            "45": 0.06061,
-            "50": 0.05916,
-            "55": 0.06279,
-            "60": 0.05959,
-            "65": 0.05975,
-            "70": 0.05984,
-            "75": 0.05968,
-            "80": 0.06032,
-            "85": 0.05993,
-            "90": 0.06577,
-            "95": 0.0595,
-            "100": 0.06114
+            "1": 6.44783,
+            "2": 0.09007,
+            "3": 0.06737,
+            "4": 0.06577,
+            "5": 0.06617,
+            "6": 0.06499,
+            "7": 0.06848,
+            "8": 0.06519,
+            "9": 0.06616,
+            "10": 0.06552,
+            "11": 0.06475,
+            "12": 0.06425,
+            "13": 0.06448,
+            "14": 0.0646,
+            "15": 0.06511,
+            "16": 0.06475,
+            "17": 0.06554,
+            "18": 0.11461,
+            "19": 0.07217,
+            "20": 0.07186,
+            "21": 0.07086,
+            "22": 0.06865,
+            "23": 0.07004,
+            "24": 0.07096,
+            "25": 0.071,
+            "26": 0.07082,
+            "27": 0.07253,
+            "28": 0.07103,
+            "29": 0.07101,
+            "30": 0.07144,
+            "31": 0.07157,
+            "32": 0.07144,
+            "33": 0.07102,
+            "34": 0.0715,
+            "35": 0.07197,
+            "36": 0.07104,
+            "37": 0.07183,
+            "38": 0.07076,
+            "39": 0.07174,
+            "40": 0.07198,
+            "41": 0.0728,
+            "42": 0.07014,
+            "43": 0.07139,
+            "44": 0.07151,
+            "45": 0.0731,
+            "46": 0.07262,
+            "47": 0.07101,
+            "48": 0.07085,
+            "49": 0.07236,
+            "50": 0.07208,
+            "51": 0.10876,
+            "52": 0.07904,
+            "53": 0.07811,
+            "54": 0.07594,
+            "55": 0.07858,
+            "56": 0.08222,
+            "57": 0.08161,
+            "58": 0.0804,
+            "59": 0.07879,
+            "60": 0.07013,
+            "61": 0.06958,
+            "62": 0.07024,
+            "63": 0.06986,
+            "64": 0.07068,
+            "65": 0.07096,
+            "66": 0.07033,
+            "67": 0.07005,
+            "68": 0.07023,
+            "69": 0.07133,
+            "70": 0.07104,
+            "71": 0.0717,
+            "72": 0.07141,
+            "73": 0.07155,
+            "74": 0.07093,
+            "75": 0.07044,
+            "76": 0.06976,
+            "77": 0.07009,
+            "78": 0.07092,
+            "79": 0.07151,
+            "80": 0.07062,
+            "81": 0.07312,
+            "82": 0.07117,
+            "83": 0.07287,
+            "84": 0.07054,
+            "85": 0.07186,
+            "86": 0.0698,
+            "87": 0.07076,
+            "88": 0.0702,
+            "89": 0.07128,
+            "90": 0.07039,
+            "91": 0.07054,
+            "92": 0.07169,
+            "93": 0.07155,
+            "94": 0.07057,
+            "95": 0.07134,
+            "96": 0.07134,
+            "97": 0.07146,
+            "98": 0.07223,
+            "99": 0.07189,
+            "100": 0.07136
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
             "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
             "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
             "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": 1155.0,
+            "19": 1454.0,
             "20": 1095.0,
+            "21": 1230.0,
+            "22": "nan",
+            "23": 1357.0,
+            "24": 1150.0,
             "25": 1228.0,
+            "26": 1202.0,
+            "27": 1326.0,
+            "28": 1466.0,
+            "29": 1438.0,
             "30": 1238.0,
+            "31": 1008.0,
+            "32": 1160.0,
+            "33": 1371.0,
+            "34": 1154.0,
             "35": 1295.0,
+            "36": 1156.0,
+            "37": 1403.0,
+            "38": 1487.0,
+            "39": 1429.0,
             "40": 1412.0,
+            "41": 1458.0,
+            "42": 1316.0,
+            "43": 1193.0,
+            "44": 1323.0,
             "45": 1297.0,
+            "46": 1276.0,
+            "47": 1868.0,
+            "48": 1251.0,
+            "49": 1272.0,
             "50": 1524.0,
+            "51": 1367.0,
+            "52": 1372.0,
+            "53": 1715.0,
+            "54": 1485.0,
             "55": 1482.0,
+            "56": 1473.0,
+            "57": 1539.0,
+            "58": 1736.0,
+            "59": 1661.0,
             "60": 1586.0,
+            "61": 1691.0,
+            "62": 1865.0,
+            "63": 1395.0,
+            "64": 1846.0,
             "65": 1428.0,
+            "66": 1717.0,
+            "67": 1700.0,
+            "68": 1750.0,
+            "69": 1681.0,
             "70": 1861.0,
+            "71": 2048.0,
+            "72": 1552.0,
+            "73": 2010.0,
+            "74": 1344.0,
             "75": 1840.0,
+            "76": 1846.0,
+            "77": 2034.0,
+            "78": 2170.0,
+            "79": 1949.0,
             "80": 2077.0,
+            "81": 2381.0,
+            "82": 2390.0,
+            "83": 1843.0,
+            "84": 2060.0,
             "85": 2317.0,
+            "86": 1958.0,
+            "87": 2829.0,
+            "88": 2046.0,
+            "89": 2260.0,
             "90": 2545.0,
+            "91": 1801.0,
+            "92": 2505.0,
+            "93": 2064.0,
+            "94": 2223.0,
             "95": 2379.0,
+            "96": 2579.0,
+            "97": 2411.0,
+            "98": 2500.0,
+            "99": 2124.0,
             "100": 2119.0
         }
     }
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..68de1078bf3
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.89618,
+            "2": 10.89538,
+            "3": 10.88915,
+            "4": 10.89094,
+            "5": 10.8927,
+            "6": 10.90148,
+            "7": 10.89392,
+            "8": 10.90369,
+            "9": 10.90794,
+            "10": 10.89108,
+            "11": 10.88762,
+            "12": 10.9076,
+            "13": 10.91429,
+            "14": 10.90654,
+            "15": 10.90227,
+            "16": 10.91042,
+            "17": 10.89896,
+            "18": 10.90666,
+            "19": 10.89908,
+            "20": 10.90133,
+            "21": 10.91713,
+            "22": 10.89139,
+            "23": 10.90085,
+            "24": 10.89366,
+            "25": 10.89372,
+            "26": 10.87372,
+            "27": 10.87917,
+            "28": 10.88756,
+            "29": 10.85461,
+            "30": 10.83891,
+            "31": 10.75166,
+            "32": 10.8278,
+            "33": 10.80306,
+            "34": 10.73559,
+            "35": 10.7301,
+            "36": 10.69318,
+            "37": 10.72854,
+            "38": 10.65364,
+            "39": 10.71672,
+            "40": 10.56996,
+            "41": 10.58467,
+            "42": 10.59853,
+            "43": 10.3948,
+            "44": 10.44431,
+            "45": 10.3452,
+            "46": 10.31919,
+            "47": 10.49671,
+            "48": 10.31281,
+            "49": 10.09084,
+            "50": 10.31089,
+            "51": 10.25547,
+            "52": 10.15856,
+            "53": 10.38114,
+            "54": 10.2992,
+            "55": 10.23806,
+            "56": 10.00726,
+            "57": 9.87765,
+            "58": 10.15279,
+            "59": 9.94207,
+            "60": 9.8666,
+            "61": 10.00032,
+            "62": 10.23443,
+            "63": 9.71917,
+            "64": 10.04209,
+            "65": 9.30009,
+            "66": 9.95537,
+            "67": 9.6499,
+            "68": 10.00402,
+            "69": 9.99988,
+            "70": 9.96383,
+            "71": 9.84259,
+            "72": 9.81258,
+            "73": 9.70921,
+            "74": 9.19832,
+            "75": 9.61686,
+            "76": 9.28859,
+            "77": 10.20416,
+            "78": 9.88378,
+            "79": 9.54296,
+            "80": 9.57095,
+            "81": 9.64006,
+            "82": 9.83648,
+            "83": 9.47691,
+            "84": 9.54866,
+            "85": 9.75198,
+            "86": 9.21427,
+            "87": 9.70607,
+            "88": 9.87307,
+            "89": 9.72876,
+            "90": 9.92353,
+            "91": 9.48236,
+            "92": 9.47671,
+            "93": 9.20895,
+            "94": 8.9625,
+            "95": 9.62369,
+            "96": 9.64228,
+            "97": 9.41575,
+            "98": 9.77515,
+            "99": 9.00692,
+            "100": 9.51305
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 454770688.0,
+            "2": 454770688.0,
+            "3": 454770688.0,
+            "4": 454770688.0,
+            "5": 454770688.0,
+            "6": 454770688.0,
+            "7": 454770688.0,
+            "8": 454770688.0,
+            "9": 454770688.0,
+            "10": 454770688.0,
+            "11": 454770688.0,
+            "12": 454770688.0,
+            "13": 454770688.0,
+            "14": 454770688.0,
+            "15": 454770688.0,
+            "16": 454770688.0,
+            "17": 454770688.0,
+            "18": 518880768.0,
+            "19": 518880768.0,
+            "20": 518880768.0,
+            "21": 518880768.0,
+            "22": 518880768.0,
+            "23": 518880768.0,
+            "24": 518880768.0,
+            "25": 518880768.0,
+            "26": 518880768.0,
+            "27": 518880768.0,
+            "28": 518880768.0,
+            "29": 518880768.0,
+            "30": 518880768.0,
+            "31": 518880768.0,
+            "32": 518880768.0,
+            "33": 518880768.0,
+            "34": 518880768.0,
+            "35": 518880768.0,
+            "36": 518880768.0,
+            "37": 518880768.0,
+            "38": 518880768.0,
+            "39": 518880768.0,
+            "40": 518880768.0,
+            "41": 518880768.0,
+            "42": 518880768.0,
+            "43": 518880768.0,
+            "44": 518880768.0,
+            "45": 518880768.0,
+            "46": 518880768.0,
+            "47": 518880768.0,
+            "48": 518880768.0,
+            "49": 518880768.0,
+            "50": 518880768.0,
+            "51": 518880768.0,
+            "52": 518880768.0,
+            "53": 518880768.0,
+            "54": 518880768.0,
+            "55": 518880768.0,
+            "56": 518880768.0,
+            "57": 518880768.0,
+            "58": 518880768.0,
+            "59": 518880768.0,
+            "60": 518880768.0,
+            "61": 518880768.0,
+            "62": 518880768.0,
+            "63": 518880768.0,
+            "64": 518880768.0,
+            "65": 518880768.0,
+            "66": 518880768.0,
+            "67": 518880768.0,
+            "68": 518880768.0,
+            "69": 518880768.0,
+            "70": 518880768.0,
+            "71": 518880768.0,
+            "72": 518880768.0,
+            "73": 518880768.0,
+            "74": 518880768.0,
+            "75": 518880768.0,
+            "76": 518880768.0,
+            "77": 518880768.0,
+            "78": 518880768.0,
+            "79": 518880768.0,
+            "80": 518880768.0,
+            "81": 518880768.0,
+            "82": 518880768.0,
+            "83": 518880768.0,
+            "84": 518880768.0,
+            "85": 518880768.0,
+            "86": 518880768.0,
+            "87": 518880768.0,
+            "88": 518880768.0,
+            "89": 518880768.0,
+            "90": 518880768.0,
+            "91": 518880768.0,
+            "92": 518880768.0,
+            "93": 518880768.0,
+            "94": 518880768.0,
+            "95": 518880768.0,
+            "96": 518880768.0,
+            "97": 518880768.0,
+            "98": 518880768.0,
+            "99": 518880768.0,
+            "100": 518880768.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 4511150592.0,
+            "2": 4544705536.0,
+            "3": 4544705536.0,
+            "4": 4544705536.0,
+            "5": 4544705536.0,
+            "6": 4544705536.0,
+            "7": 4544705536.0,
+            "8": 4544705536.0,
+            "9": 4544705536.0,
+            "10": 4544705536.0,
+            "11": 4544705536.0,
+            "12": 4544705536.0,
+            "13": 4544705536.0,
+            "14": 4544705536.0,
+            "15": 4544705536.0,
+            "16": 4544705536.0,
+            "17": 4544705536.0,
+            "18": 4544705536.0,
+            "19": 4607767040.0,
+            "20": 4607767040.0,
+            "21": 4607767040.0,
+            "22": 4607767040.0,
+            "23": 4607767040.0,
+            "24": 4607767040.0,
+            "25": 4607767040.0,
+            "26": 4607767040.0,
+            "27": 4607767040.0,
+            "28": 4607767040.0,
+            "29": 4607767040.0,
+            "30": 4607767040.0,
+            "31": 4607767040.0,
+            "32": 4607767040.0,
+            "33": 4607767040.0,
+            "34": 4607767040.0,
+            "35": 4607767040.0,
+            "36": 4607767040.0,
+            "37": 4607767040.0,
+            "38": 4607767040.0,
+            "39": 4607767040.0,
+            "40": 4607767040.0,
+            "41": 4607767040.0,
+            "42": 4607767040.0,
+            "43": 4607767040.0,
+            "44": 4607767040.0,
+            "45": 4607767040.0,
+            "46": 4607767040.0,
+            "47": 4607767040.0,
+            "48": 4607767040.0,
+            "49": 4607767040.0,
+            "50": 4607767040.0,
+            "51": 4607767040.0,
+            "52": 4607767040.0,
+            "53": 4607767040.0,
+            "54": 4607767040.0,
+            "55": 4607767040.0,
+            "56": 4607767040.0,
+            "57": 4607767040.0,
+            "58": 4607767040.0,
+            "59": 4607767040.0,
+            "60": 4607767040.0,
+            "61": 4607767040.0,
+            "62": 4607767040.0,
+            "63": 4607767040.0,
+            "64": 4607767040.0,
+            "65": 4607767040.0,
+            "66": 4607767040.0,
+            "67": 4607767040.0,
+            "68": 4607767040.0,
+            "69": 4607767040.0,
+            "70": 4607767040.0,
+            "71": 4607767040.0,
+            "72": 4607767040.0,
+            "73": 4607767040.0,
+            "74": 4607767040.0,
+            "75": 4607767040.0,
+            "76": 4607767040.0,
+            "77": 4607767040.0,
+            "78": 4607767040.0,
+            "79": 4607767040.0,
+            "80": 4607767040.0,
+            "81": 4607767040.0,
+            "82": 4607767040.0,
+            "83": 4607767040.0,
+            "84": 4607767040.0,
+            "85": 4607767040.0,
+            "86": 4607767040.0,
+            "87": 4607767040.0,
+            "88": 4607767040.0,
+            "89": 4607767040.0,
+            "90": 4607767040.0,
+            "91": 4607767040.0,
+            "92": 4607767040.0,
+            "93": 4607767040.0,
+            "94": 4607767040.0,
+            "95": 4607767040.0,
+            "96": 4607767040.0,
+            "97": 4607767040.0,
+            "98": 4607767040.0,
+            "99": 4607767040.0,
+            "100": 4607767040.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 6.06687,
+            "2": 0.09744,
+            "3": 0.05659,
+            "4": 0.05607,
+            "5": 0.05508,
+            "6": 0.05545,
+            "7": 0.06728,
+            "8": 0.06907,
+            "9": 0.06794,
+            "10": 0.05561,
+            "11": 0.05366,
+            "12": 0.05478,
+            "13": 0.05682,
+            "14": 0.0602,
+            "15": 0.05987,
+            "16": 0.05524,
+            "17": 0.05387,
+            "18": 0.0976,
+            "19": 0.06103,
+            "20": 0.06125,
+            "21": 0.06399,
+            "22": 0.06406,
+            "23": 0.05846,
+            "24": 0.0595,
+            "25": 0.05948,
+            "26": 0.05947,
+            "27": 0.05843,
+            "28": 0.06573,
+            "29": 0.06497,
+            "30": 0.05987,
+            "31": 0.05899,
+            "32": 0.05983,
+            "33": 0.05828,
+            "34": 0.06034,
+            "35": 0.06568,
+            "36": 0.0606,
+            "37": 0.05892,
+            "38": 0.05998,
+            "39": 0.06244,
+            "40": 0.06557,
+            "41": 0.05845,
+            "42": 0.06012,
+            "43": 0.05942,
+            "44": 0.05983,
+            "45": 0.06123,
+            "46": 0.06648,
+            "47": 0.06513,
+            "48": 0.0599,
+            "49": 0.05866,
+            "50": 0.06093,
+            "51": 0.06536,
+            "52": 0.06086,
+            "53": 0.05831,
+            "54": 0.06064,
+            "55": 0.05976,
+            "56": 0.06762,
+            "57": 0.06301,
+            "58": 0.05996,
+            "59": 0.05844,
+            "60": 0.06016,
+            "61": 0.05903,
+            "62": 0.05975,
+            "63": 0.06658,
+            "64": 0.06396,
+            "65": 0.05913,
+            "66": 0.06025,
+            "67": 0.0595,
+            "68": 0.06002,
+            "69": 0.05954,
+            "70": 0.06032,
+            "71": 0.06012,
+            "72": 0.06048,
+            "73": 0.05933,
+            "74": 0.05958,
+            "75": 0.06007,
+            "76": 0.06034,
+            "77": 0.05974,
+            "78": 0.06035,
+            "79": 0.06014,
+            "80": 0.06072,
+            "81": 0.06083,
+            "82": 0.062,
+            "83": 0.05964,
+            "84": 0.06048,
+            "85": 0.0602,
+            "86": 0.0607,
+            "87": 0.05907,
+            "88": 0.0636,
+            "89": 0.06003,
+            "90": 0.06002,
+            "91": 0.05858,
+            "92": 0.06008,
+            "93": 0.05932,
+            "94": 0.05884,
+            "95": 0.05815,
+            "96": 0.05789,
+            "97": 0.05853,
+            "98": 0.05852,
+            "99": 0.05895,
+            "100": 0.0617
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": 1155.0,
+            "19": 1454.0,
+            "20": 1095.0,
+            "21": 1230.0,
+            "22": "nan",
+            "23": 1357.0,
+            "24": 1150.0,
+            "25": 1228.0,
+            "26": 1202.0,
+            "27": 1326.0,
+            "28": 1466.0,
+            "29": 1438.0,
+            "30": 1238.0,
+            "31": 1008.0,
+            "32": 1160.0,
+            "33": 1371.0,
+            "34": 1154.0,
+            "35": 1295.0,
+            "36": 1156.0,
+            "37": 1403.0,
+            "38": 1487.0,
+            "39": 1429.0,
+            "40": 1412.0,
+            "41": 1458.0,
+            "42": 1316.0,
+            "43": 1193.0,
+            "44": 1323.0,
+            "45": 1297.0,
+            "46": 1276.0,
+            "47": 1868.0,
+            "48": 1251.0,
+            "49": 1272.0,
+            "50": 1524.0,
+            "51": 1367.0,
+            "52": 1372.0,
+            "53": 1715.0,
+            "54": 1485.0,
+            "55": 1482.0,
+            "56": 1473.0,
+            "57": 1539.0,
+            "58": 1736.0,
+            "59": 1661.0,
+            "60": 1586.0,
+            "61": 1691.0,
+            "62": 1865.0,
+            "63": 1395.0,
+            "64": 1846.0,
+            "65": 1428.0,
+            "66": 1717.0,
+            "67": 1700.0,
+            "68": 1750.0,
+            "69": 1681.0,
+            "70": 1861.0,
+            "71": 2048.0,
+            "72": 1552.0,
+            "73": 2010.0,
+            "74": 1344.0,
+            "75": 1840.0,
+            "76": 1846.0,
+            "77": 2034.0,
+            "78": 2170.0,
+            "79": 1949.0,
+            "80": 2077.0,
+            "81": 2381.0,
+            "82": 2390.0,
+            "83": 1843.0,
+            "84": 2060.0,
+            "85": 2317.0,
+            "86": 1958.0,
+            "87": 2829.0,
+            "88": 2046.0,
+            "89": 2260.0,
+            "90": 2545.0,
+            "91": 1801.0,
+            "92": 2505.0,
+            "93": 2064.0,
+            "94": 2223.0,
+            "95": 2379.0,
+            "96": 2579.0,
+            "97": 2411.0,
+            "98": 2500.0,
+            "99": 2124.0,
+            "100": 2119.0
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..8828025e4b4
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.89618,
+            "2": 10.89538,
+            "3": 10.88915,
+            "4": 10.89094,
+            "5": 10.8927,
+            "6": 10.90148,
+            "7": 10.89392,
+            "8": 10.90369,
+            "9": 10.90794,
+            "10": 10.89108,
+            "11": 10.88762,
+            "12": 10.9076,
+            "13": 10.91429,
+            "14": 10.90654,
+            "15": 10.90227,
+            "16": 10.91042,
+            "17": 10.89896,
+            "18": 10.90666,
+            "19": 10.89908,
+            "20": 10.90133,
+            "21": 10.91713,
+            "22": 10.89139,
+            "23": 10.90085,
+            "24": 10.89366,
+            "25": 10.89372,
+            "26": 10.87372,
+            "27": 10.87917,
+            "28": 10.88756,
+            "29": 10.85461,
+            "30": 10.83891,
+            "31": 10.75166,
+            "32": 10.8278,
+            "33": 10.80306,
+            "34": 10.73559,
+            "35": 10.7301,
+            "36": 10.69318,
+            "37": 10.72854,
+            "38": 10.65364,
+            "39": 10.71672,
+            "40": 10.56996,
+            "41": 10.58467,
+            "42": 10.59853,
+            "43": 10.3948,
+            "44": 10.44431,
+            "45": 10.3452,
+            "46": 10.31919,
+            "47": 10.49671,
+            "48": 10.31281,
+            "49": 10.09084,
+            "50": 10.31089,
+            "51": 10.25547,
+            "52": 10.15856,
+            "53": 10.38114,
+            "54": 10.2992,
+            "55": 10.23806,
+            "56": 10.00726,
+            "57": 9.87765,
+            "58": 10.15279,
+            "59": 9.94207,
+            "60": 9.8666,
+            "61": 10.00032,
+            "62": 10.23443,
+            "63": 9.71917,
+            "64": 10.04209,
+            "65": 9.30009,
+            "66": 9.95537,
+            "67": 9.6499,
+            "68": 10.00402,
+            "69": 9.99988,
+            "70": 9.96383,
+            "71": 9.84259,
+            "72": 9.81258,
+            "73": 9.70921,
+            "74": 9.19832,
+            "75": 9.61686,
+            "76": 9.28859,
+            "77": 10.20416,
+            "78": 9.88378,
+            "79": 9.54296,
+            "80": 9.57095,
+            "81": 9.64006,
+            "82": 9.83648,
+            "83": 9.47691,
+            "84": 9.54866,
+            "85": 9.75198,
+            "86": 9.21427,
+            "87": 9.70607,
+            "88": 9.87307,
+            "89": 9.72876,
+            "90": 9.92353,
+            "91": 9.48236,
+            "92": 9.47671,
+            "93": 9.20895,
+            "94": 8.9625,
+            "95": 9.62369,
+            "96": 9.64228,
+            "97": 9.41575,
+            "98": 9.77515,
+            "99": 9.00692,
+            "100": 9.51305
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 454770688.0,
+            "2": 454770688.0,
+            "3": 454770688.0,
+            "4": 454770688.0,
+            "5": 454770688.0,
+            "6": 454770688.0,
+            "7": 454770688.0,
+            "8": 454770688.0,
+            "9": 454770688.0,
+            "10": 454770688.0,
+            "11": 454770688.0,
+            "12": 454770688.0,
+            "13": 454770688.0,
+            "14": 454770688.0,
+            "15": 454770688.0,
+            "16": 454770688.0,
+            "17": 454770688.0,
+            "18": 518880768.0,
+            "19": 518880768.0,
+            "20": 518880768.0,
+            "21": 518880768.0,
+            "22": 518880768.0,
+            "23": 518880768.0,
+            "24": 518880768.0,
+            "25": 518880768.0,
+            "26": 518880768.0,
+            "27": 518880768.0,
+            "28": 518880768.0,
+            "29": 518880768.0,
+            "30": 518880768.0,
+            "31": 518880768.0,
+            "32": 518880768.0,
+            "33": 518880768.0,
+            "34": 518880768.0,
+            "35": 518880768.0,
+            "36": 518880768.0,
+            "37": 518880768.0,
+            "38": 518880768.0,
+            "39": 518880768.0,
+            "40": 518880768.0,
+            "41": 518880768.0,
+            "42": 518880768.0,
+            "43": 518880768.0,
+            "44": 518880768.0,
+            "45": 518880768.0,
+            "46": 518880768.0,
+            "47": 518880768.0,
+            "48": 518880768.0,
+            "49": 518880768.0,
+            "50": 518880768.0,
+            "51": 518880768.0,
+            "52": 518880768.0,
+            "53": 518880768.0,
+            "54": 518880768.0,
+            "55": 518880768.0,
+            "56": 518880768.0,
+            "57": 518880768.0,
+            "58": 518880768.0,
+            "59": 518880768.0,
+            "60": 518880768.0,
+            "61": 518880768.0,
+            "62": 518880768.0,
+            "63": 518880768.0,
+            "64": 518880768.0,
+            "65": 518880768.0,
+            "66": 518880768.0,
+            "67": 518880768.0,
+            "68": 518880768.0,
+            "69": 518880768.0,
+            "70": 518880768.0,
+            "71": 518880768.0,
+            "72": 518880768.0,
+            "73": 518880768.0,
+            "74": 518880768.0,
+            "75": 518880768.0,
+            "76": 518880768.0,
+            "77": 518880768.0,
+            "78": 518880768.0,
+            "79": 518880768.0,
+            "80": 518880768.0,
+            "81": 518880768.0,
+            "82": 518880768.0,
+            "83": 518880768.0,
+            "84": 518880768.0,
+            "85": 518880768.0,
+            "86": 518880768.0,
+            "87": 518880768.0,
+            "88": 518880768.0,
+            "89": 518880768.0,
+            "90": 518880768.0,
+            "91": 518880768.0,
+            "92": 518880768.0,
+            "93": 518880768.0,
+            "94": 518880768.0,
+            "95": 518880768.0,
+            "96": 518880768.0,
+            "97": 518880768.0,
+            "98": 518880768.0,
+            "99": 518880768.0,
+            "100": 518880768.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 4511150592.0,
+            "2": 4544705536.0,
+            "3": 4544705536.0,
+            "4": 4544705536.0,
+            "5": 4544705536.0,
+            "6": 4544705536.0,
+            "7": 4544705536.0,
+            "8": 4544705536.0,
+            "9": 4544705536.0,
+            "10": 4544705536.0,
+            "11": 4544705536.0,
+            "12": 4544705536.0,
+            "13": 4544705536.0,
+            "14": 4544705536.0,
+            "15": 4544705536.0,
+            "16": 4544705536.0,
+            "17": 4544705536.0,
+            "18": 4544705536.0,
+            "19": 4607767040.0,
+            "20": 4607767040.0,
+            "21": 4607767040.0,
+            "22": 4607767040.0,
+            "23": 4607767040.0,
+            "24": 4607767040.0,
+            "25": 4607767040.0,
+            "26": 4607767040.0,
+            "27": 4607767040.0,
+            "28": 4607767040.0,
+            "29": 4607767040.0,
+            "30": 4607767040.0,
+            "31": 4607767040.0,
+            "32": 4607767040.0,
+            "33": 4607767040.0,
+            "34": 4607767040.0,
+            "35": 4607767040.0,
+            "36": 4607767040.0,
+            "37": 4607767040.0,
+            "38": 4607767040.0,
+            "39": 4607767040.0,
+            "40": 4607767040.0,
+            "41": 4607767040.0,
+            "42": 4607767040.0,
+            "43": 4607767040.0,
+            "44": 4607767040.0,
+            "45": 4607767040.0,
+            "46": 4607767040.0,
+            "47": 4607767040.0,
+            "48": 4607767040.0,
+            "49": 4607767040.0,
+            "50": 4607767040.0,
+            "51": 4607767040.0,
+            "52": 4607767040.0,
+            "53": 4607767040.0,
+            "54": 4607767040.0,
+            "55": 4607767040.0,
+            "56": 4607767040.0,
+            "57": 4607767040.0,
+            "58": 4607767040.0,
+            "59": 4607767040.0,
+            "60": 4607767040.0,
+            "61": 4607767040.0,
+            "62": 4607767040.0,
+            "63": 4607767040.0,
+            "64": 4607767040.0,
+            "65": 4607767040.0,
+            "66": 4607767040.0,
+            "67": 4607767040.0,
+            "68": 4607767040.0,
+            "69": 4607767040.0,
+            "70": 4607767040.0,
+            "71": 4607767040.0,
+            "72": 4607767040.0,
+            "73": 4607767040.0,
+            "74": 4607767040.0,
+            "75": 4607767040.0,
+            "76": 4607767040.0,
+            "77": 4607767040.0,
+            "78": 4607767040.0,
+            "79": 4607767040.0,
+            "80": 4607767040.0,
+            "81": 4607767040.0,
+            "82": 4607767040.0,
+            "83": 4607767040.0,
+            "84": 4607767040.0,
+            "85": 4607767040.0,
+            "86": 4607767040.0,
+            "87": 4607767040.0,
+            "88": 4607767040.0,
+            "89": 4607767040.0,
+            "90": 4607767040.0,
+            "91": 4607767040.0,
+            "92": 4607767040.0,
+            "93": 4607767040.0,
+            "94": 4607767040.0,
+            "95": 4607767040.0,
+            "96": 4607767040.0,
+            "97": 4607767040.0,
+            "98": 4607767040.0,
+            "99": 4607767040.0,
+            "100": 4607767040.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 6.49307,
+            "2": 0.10356,
+            "3": 0.08062,
+            "4": 0.0772,
+            "5": 0.07555,
+            "6": 0.06677,
+            "7": 0.06434,
+            "8": 0.06228,
+            "9": 0.0624,
+            "10": 0.06213,
+            "11": 0.06353,
+            "12": 0.0622,
+            "13": 0.06377,
+            "14": 0.06323,
+            "15": 0.06296,
+            "16": 0.06251,
+            "17": 0.06382,
+            "18": 0.11433,
+            "19": 0.07262,
+            "20": 0.07222,
+            "21": 0.07613,
+            "22": 0.06977,
+            "23": 0.06664,
+            "24": 0.07256,
+            "25": 0.07344,
+            "26": 0.0723,
+            "27": 0.07264,
+            "28": 0.0697,
+            "29": 0.06998,
+            "30": 0.06785,
+            "31": 0.07022,
+            "32": 0.06834,
+            "33": 0.06679,
+            "34": 0.0678,
+            "35": 0.0679,
+            "36": 0.0679,
+            "37": 0.06826,
+            "38": 0.06821,
+            "39": 0.0665,
+            "40": 0.06798,
+            "41": 0.06816,
+            "42": 0.06816,
+            "43": 0.06901,
+            "44": 0.06772,
+            "45": 0.06849,
+            "46": 0.06843,
+            "47": 0.06773,
+            "48": 0.06705,
+            "49": 0.06755,
+            "50": 0.06844,
+            "51": 0.0971,
+            "52": 0.06968,
+            "53": 0.06915,
+            "54": 0.06982,
+            "55": 0.0703,
+            "56": 0.07014,
+            "57": 0.07047,
+            "58": 0.06835,
+            "59": 0.07077,
+            "60": 0.06886,
+            "61": 0.06929,
+            "62": 0.06887,
+            "63": 0.06946,
+            "64": 0.06924,
+            "65": 0.06987,
+            "66": 0.06898,
+            "67": 0.06873,
+            "68": 0.0695,
+            "69": 0.0712,
+            "70": 0.06928,
+            "71": 0.0692,
+            "72": 0.07014,
+            "73": 0.06964,
+            "74": 0.06884,
+            "75": 0.06897,
+            "76": 0.07036,
+            "77": 0.0693,
+            "78": 0.06905,
+            "79": 0.0698,
+            "80": 0.06831,
+            "81": 0.06969,
+            "82": 0.06871,
+            "83": 0.07059,
+            "84": 0.06905,
+            "85": 0.06955,
+            "86": 0.06926,
+            "87": 0.06905,
+            "88": 0.06912,
+            "89": 0.07039,
+            "90": 0.06895,
+            "91": 0.069,
+            "92": 0.0698,
+            "93": 0.06946,
+            "94": 0.06825,
+            "95": 0.06933,
+            "96": 0.06851,
+            "97": 0.06883,
+            "98": 0.07421,
+            "99": 0.06926,
+            "100": 0.07018
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": 1155.0,
+            "19": 1454.0,
+            "20": 1095.0,
+            "21": 1230.0,
+            "22": "nan",
+            "23": 1357.0,
+            "24": 1150.0,
+            "25": 1228.0,
+            "26": 1202.0,
+            "27": 1326.0,
+            "28": 1466.0,
+            "29": 1438.0,
+            "30": 1238.0,
+            "31": 1008.0,
+            "32": 1160.0,
+            "33": 1371.0,
+            "34": 1154.0,
+            "35": 1295.0,
+            "36": 1156.0,
+            "37": 1403.0,
+            "38": 1487.0,
+            "39": 1429.0,
+            "40": 1412.0,
+            "41": 1458.0,
+            "42": 1316.0,
+            "43": 1193.0,
+            "44": 1323.0,
+            "45": 1297.0,
+            "46": 1276.0,
+            "47": 1868.0,
+            "48": 1251.0,
+            "49": 1272.0,
+            "50": 1524.0,
+            "51": 1367.0,
+            "52": 1372.0,
+            "53": 1715.0,
+            "54": 1485.0,
+            "55": 1482.0,
+            "56": 1473.0,
+            "57": 1539.0,
+            "58": 1736.0,
+            "59": 1661.0,
+            "60": 1586.0,
+            "61": 1691.0,
+            "62": 1865.0,
+            "63": 1395.0,
+            "64": 1846.0,
+            "65": 1428.0,
+            "66": 1717.0,
+            "67": 1700.0,
+            "68": 1750.0,
+            "69": 1681.0,
+            "70": 1861.0,
+            "71": 2048.0,
+            "72": 1552.0,
+            "73": 2010.0,
+            "74": 1344.0,
+            "75": 1840.0,
+            "76": 1846.0,
+            "77": 2034.0,
+            "78": 2170.0,
+            "79": 1949.0,
+            "80": 2077.0,
+            "81": 2381.0,
+            "82": 2390.0,
+            "83": 1843.0,
+            "84": 2060.0,
+            "85": 2317.0,
+            "86": 1958.0,
+            "87": 2829.0,
+            "88": 2046.0,
+            "89": 2260.0,
+            "90": 2545.0,
+            "91": 1801.0,
+            "92": 2505.0,
+            "93": 2064.0,
+            "94": 2223.0,
+            "95": 2379.0,
+            "96": 2579.0,
+            "97": 2411.0,
+            "98": 2500.0,
+            "99": 2124.0,
+            "100": 2119.0
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgxa100_dracooci-ord.json
new file mode 100644
index 00000000000..2dcf90e989f
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgxa100_dracooci-ord.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.84012,
+            "2": 10.83568,
+            "3": 10.83117,
+            "4": 10.81867,
+            "5": 10.84147,
+            "6": 10.87385,
+            "7": 10.83678,
+            "8": 10.84423,
+            "9": 10.84878,
+            "10": 10.82107,
+            "11": 10.85906,
+            "12": 10.85708,
+            "13": 10.88186,
+            "14": 10.87751,
+            "15": 10.85423,
+            "16": 10.85071,
+            "17": 10.84178,
+            "18": 10.86599,
+            "19": 10.86171,
+            "20": 10.85111,
+            "21": 10.85522,
+            "22": 10.82384,
+            "23": 10.86861,
+            "24": 10.82812,
+            "25": 10.82229,
+            "26": 10.83266,
+            "27": 10.82346,
+            "28": 10.84367,
+            "29": 10.83315,
+            "30": 10.75584,
+            "31": 10.66438,
+            "32": 10.78744,
+            "33": 10.76542,
+            "34": 10.67705,
+            "35": 10.68389,
+            "36": 10.63442,
+            "37": 10.68265,
+            "38": 10.6013,
+            "39": 10.69422,
+            "40": 10.52756,
+            "41": 10.54166,
+            "42": 10.56471,
+            "43": 10.34495,
+            "44": 10.38785,
+            "45": 10.3119,
+            "46": 10.3021,
+            "47": 10.479,
+            "48": 10.28168,
+            "49": 10.05783,
+            "50": 10.29392,
+            "51": 10.2381,
+            "52": 10.15425,
+            "53": 10.35958,
+            "54": 10.26866,
+            "55": 10.21882,
+            "56": 9.9963,
+            "57": 9.87322,
+            "58": 10.14154,
+            "59": 9.93616,
+            "60": 9.8477,
+            "61": 9.98627,
+            "62": 10.21642,
+            "63": 9.69005,
+            "64": 10.01919,
+            "65": 9.30027,
+            "66": 9.9353,
+            "67": 9.63074,
+            "68": 9.99036,
+            "69": 9.98369,
+            "70": 9.92473,
+            "71": 9.81441,
+            "72": 9.79281,
+            "73": 9.67937,
+            "74": 9.19331,
+            "75": 9.60615,
+            "76": 9.28477,
+            "77": 10.18543,
+            "78": 9.86681,
+            "79": 9.52304,
+            "80": 9.55867,
+            "81": 9.62718,
+            "82": 9.81491,
+            "83": 9.45803,
+            "84": 9.53679,
+            "85": 9.7331,
+            "86": 9.20021,
+            "87": 9.69537,
+            "88": 9.85367,
+            "89": 9.7164,
+            "90": 9.91024,
+            "91": 9.46125,
+            "92": 9.46592,
+            "93": 9.19252,
+            "94": 8.94116,
+            "95": 9.60586,
+            "96": 9.62228,
+            "97": 9.39813,
+            "98": 9.76041,
+            "99": 8.9914,
+            "100": 9.49453
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 454770688.0,
+            "2": 454770688.0,
+            "3": 454770688.0,
+            "4": 454770688.0,
+            "5": 454770688.0,
+            "6": 454770688.0,
+            "7": 454770688.0,
+            "8": 454770688.0,
+            "9": 454770688.0,
+            "10": 454770688.0,
+            "11": 454770688.0,
+            "12": 454770688.0,
+            "13": 454770688.0,
+            "14": 454770688.0,
+            "15": 454770688.0,
+            "16": 454770688.0,
+            "17": 454770688.0,
+            "18": 518880768.0,
+            "19": 518880768.0,
+            "20": 518880768.0,
+            "21": 518880768.0,
+            "22": 518880768.0,
+            "23": 518880768.0,
+            "24": 518880768.0,
+            "25": 518880768.0,
+            "26": 518880768.0,
+            "27": 518880768.0,
+            "28": 518880768.0,
+            "29": 518880768.0,
+            "30": 518880768.0,
+            "31": 518880768.0,
+            "32": 518880768.0,
+            "33": 518880768.0,
+            "34": 518880768.0,
+            "35": 518880768.0,
+            "36": 518880768.0,
+            "37": 518880768.0,
+            "38": 518880768.0,
+            "39": 518880768.0,
+            "40": 518880768.0,
+            "41": 518880768.0,
+            "42": 518880768.0,
+            "43": 518880768.0,
+            "44": 518880768.0,
+            "45": 518880768.0,
+            "46": 518880768.0,
+            "47": 518880768.0,
+            "48": 518880768.0,
+            "49": 518880768.0,
+            "50": 518880768.0,
+            "51": 518880768.0,
+            "52": 518880768.0,
+            "53": 518880768.0,
+            "54": 518880768.0,
+            "55": 518880768.0,
+            "56": 518880768.0,
+            "57": 518880768.0,
+            "58": 518880768.0,
+            "59": 518880768.0,
+            "60": 518880768.0,
+            "61": 518880768.0,
+            "62": 518880768.0,
+            "63": 518880768.0,
+            "64": 518880768.0,
+            "65": 518880768.0,
+            "66": 518880768.0,
+            "67": 518880768.0,
+            "68": 518880768.0,
+            "69": 518880768.0,
+            "70": 518880768.0,
+            "71": 518880768.0,
+            "72": 518880768.0,
+            "73": 518880768.0,
+            "74": 518880768.0,
+            "75": 518880768.0,
+            "76": 518880768.0,
+            "77": 518880768.0,
+            "78": 518880768.0,
+            "79": 518880768.0,
+            "80": 518880768.0,
+            "81": 518880768.0,
+            "82": 518880768.0,
+            "83": 518880768.0,
+            "84": 518880768.0,
+            "85": 518880768.0,
+            "86": 518880768.0,
+            "87": 518880768.0,
+            "88": 518880768.0,
+            "89": 518880768.0,
+            "90": 518880768.0,
+            "91": 518880768.0,
+            "92": 518880768.0,
+            "93": 518880768.0,
+            "94": 518880768.0,
+            "95": 518880768.0,
+            "96": 518880768.0,
+            "97": 518880768.0,
+            "98": 518880768.0,
+            "99": 518880768.0,
+            "100": 518880768.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 4511150592.0,
+            "2": 4544705536.0,
+            "3": 4544705536.0,
+            "4": 4544705536.0,
+            "5": 4544705536.0,
+            "6": 4544705536.0,
+            "7": 4544705536.0,
+            "8": 4544705536.0,
+            "9": 4544705536.0,
+            "10": 4544705536.0,
+            "11": 4544705536.0,
+            "12": 4544705536.0,
+            "13": 4544705536.0,
+            "14": 4544705536.0,
+            "15": 4544705536.0,
+            "16": 4544705536.0,
+            "17": 4544705536.0,
+            "18": 4544705536.0,
+            "19": 4607767040.0,
+            "20": 4607767040.0,
+            "21": 4607767040.0,
+            "22": 4607767040.0,
+            "23": 4607767040.0,
+            "24": 4607767040.0,
+            "25": 4607767040.0,
+            "26": 4607767040.0,
+            "27": 4607767040.0,
+            "28": 4607767040.0,
+            "29": 4607767040.0,
+            "30": 4607767040.0,
+            "31": 4607767040.0,
+            "32": 4607767040.0,
+            "33": 4607767040.0,
+            "34": 4607767040.0,
+            "35": 4607767040.0,
+            "36": 4607767040.0,
+            "37": 4607767040.0,
+            "38": 4607767040.0,
+            "39": 4607767040.0,
+            "40": 4607767040.0,
+            "41": 4607767040.0,
+            "42": 4607767040.0,
+            "43": 4607767040.0,
+            "44": 4607767040.0,
+            "45": 4607767040.0,
+            "46": 4607767040.0,
+            "47": 4607767040.0,
+            "48": 4607767040.0,
+            "49": 4607767040.0,
+            "50": 4607767040.0,
+            "51": 4607767040.0,
+            "52": 4607767040.0,
+            "53": 4607767040.0,
+            "54": 4607767040.0,
+            "55": 4607767040.0,
+            "56": 4607767040.0,
+            "57": 4607767040.0,
+            "58": 4607767040.0,
+            "59": 4607767040.0,
+            "60": 4607767040.0,
+            "61": 4607767040.0,
+            "62": 4607767040.0,
+            "63": 4607767040.0,
+            "64": 4607767040.0,
+            "65": 4607767040.0,
+            "66": 4607767040.0,
+            "67": 4607767040.0,
+            "68": 4607767040.0,
+            "69": 4607767040.0,
+            "70": 4607767040.0,
+            "71": 4607767040.0,
+            "72": 4607767040.0,
+            "73": 4607767040.0,
+            "74": 4607767040.0,
+            "75": 4607767040.0,
+            "76": 4607767040.0,
+            "77": 4607767040.0,
+            "78": 4607767040.0,
+            "79": 4607767040.0,
+            "80": 4607767040.0,
+            "81": 4607767040.0,
+            "82": 4607767040.0,
+            "83": 4607767040.0,
+            "84": 4607767040.0,
+            "85": 4607767040.0,
+            "86": 4607767040.0,
+            "87": 4607767040.0,
+            "88": 4607767040.0,
+            "89": 4607767040.0,
+            "90": 4607767040.0,
+            "91": 4607767040.0,
+            "92": 4607767040.0,
+            "93": 4607767040.0,
+            "94": 4607767040.0,
+            "95": 4607767040.0,
+            "96": 4607767040.0,
+            "97": 4607767040.0,
+            "98": 4607767040.0,
+            "99": 4607767040.0,
+            "100": 4607767040.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 11.15163,
+            "2": 0.14001,
+            "3": 0.09738,
+            "4": 0.09666,
+            "5": 0.09591,
+            "6": 0.09502,
+            "7": 0.30332,
+            "8": 0.09429,
+            "9": 0.09574,
+            "10": 0.32414,
+            "11": 0.10077,
+            "12": 0.09969,
+            "13": 0.10068,
+            "14": 0.09948,
+            "15": 0.09294,
+            "16": 0.09255,
+            "17": 0.09477,
+            "18": 0.14327,
+            "19": 0.10341,
+            "20": 0.10247,
+            "21": 0.11373,
+            "22": 0.09883,
+            "23": 0.1005,
+            "24": 0.10247,
+            "25": 0.10217,
+            "26": 0.10239,
+            "27": 0.36118,
+            "28": 0.10234,
+            "29": 0.1012,
+            "30": 0.10299,
+            "31": 0.1015,
+            "32": 0.10188,
+            "33": 0.32101,
+            "34": 0.10218,
+            "35": 0.10166,
+            "36": 0.10235,
+            "37": 0.10172,
+            "38": 0.10247,
+            "39": 0.10164,
+            "40": 0.10267,
+            "41": 0.1028,
+            "42": 0.10313,
+            "43": 0.1019,
+            "44": 0.10268,
+            "45": 0.10251,
+            "46": 0.10335,
+            "47": 0.10126,
+            "48": 0.10332,
+            "49": 0.10228,
+            "50": 0.10227,
+            "51": 0.10617,
+            "52": 0.10408,
+            "53": 0.10202,
+            "54": 0.10229,
+            "55": 0.10292,
+            "56": 0.10208,
+            "57": 0.10265,
+            "58": 0.10167,
+            "59": 0.1041,
+            "60": 0.10412,
+            "61": 0.10262,
+            "62": 0.10173,
+            "63": 0.10364,
+            "64": 0.10282,
+            "65": 0.10402,
+            "66": 0.10211,
+            "67": 0.10345,
+            "68": 0.10307,
+            "69": 0.10364,
+            "70": 0.10244,
+            "71": 0.10307,
+            "72": 0.10282,
+            "73": 0.10422,
+            "74": 0.1031,
+            "75": 0.10272,
+            "76": 0.10576,
+            "77": 0.10322,
+            "78": 0.10398,
+            "79": 0.10274,
+            "80": 0.10278,
+            "81": 0.10314,
+            "82": 0.10329,
+            "83": 0.10412,
+            "84": 0.10207,
+            "85": 0.10239,
+            "86": 0.10321,
+            "87": 0.10221,
+            "88": 0.10195,
+            "89": 0.10399,
+            "90": 0.10279,
+            "91": 0.10252,
+            "92": 0.10385,
+            "93": 0.10387,
+            "94": 0.10226,
+            "95": 0.10105,
+            "96": 0.10245,
+            "97": 0.10298,
+            "98": 0.1036,
+            "99": 0.10248,
+            "100": 0.10187
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": 1199.0,
+            "19": 1499.0,
+            "20": 1143.0,
+            "21": 1307.0,
+            "22": "nan",
+            "23": 1326.0,
+            "24": 1091.0,
+            "25": 1185.0,
+            "26": 1131.0,
+            "27": 1294.0,
+            "28": 1528.0,
+            "29": 1487.0,
+            "30": 1375.0,
+            "31": 1058.0,
+            "32": 1170.0,
+            "33": 1406.0,
+            "34": 1265.0,
+            "35": 1207.0,
+            "36": 1197.0,
+            "37": 1581.0,
+            "38": 1477.0,
+            "39": 1542.0,
+            "40": 1423.0,
+            "41": 1538.0,
+            "42": 1460.0,
+            "43": 1153.0,
+            "44": 1282.0,
+            "45": 1344.0,
+            "46": 1162.0,
+            "47": 1831.0,
+            "48": 1308.0,
+            "49": 1218.0,
+            "50": 1559.0,
+            "51": 1515.0,
+            "52": 1569.0,
+            "53": 1758.0,
+            "54": 1439.0,
+            "55": 1573.0,
+            "56": 1418.0,
+            "57": 1514.0,
+            "58": 1624.0,
+            "59": 1622.0,
+            "60": 1564.0,
+            "61": 1714.0,
+            "62": 1854.0,
+            "63": 1577.0,
+            "64": 1773.0,
+            "65": 1496.0,
+            "66": 1668.0,
+            "67": 1597.0,
+            "68": 1804.0,
+            "69": 1804.0,
+            "70": 1898.0,
+            "71": 1957.0,
+            "72": 1568.0,
+            "73": 2020.0,
+            "74": 1322.0,
+            "75": 1893.0,
+            "76": 1826.0,
+            "77": 2136.0,
+            "78": 2137.0,
+            "79": 1990.0,
+            "80": 2134.0,
+            "81": 2465.0,
+            "82": 2240.0,
+            "83": 1883.0,
+            "84": 2128.0,
+            "85": 2231.0,
+            "86": 1998.0,
+            "87": 2747.0,
+            "88": 2122.0,
+            "89": 2331.0,
+            "90": 2378.0,
+            "91": 1880.0,
+            "92": 2563.0,
+            "93": 2065.0,
+            "94": 2127.0,
+            "95": 2285.0,
+            "96": 2665.0,
+            "97": 2514.0,
+            "98": 2516.0,
+            "99": 2265.0,
+            "100": 2233.0
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgxa100_dracooci.json
new file mode 100644
index 00000000000..ad019904f52
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgxa100_dracooci.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.84012,
+            "2": 10.83568,
+            "3": 10.83117,
+            "4": 10.81867,
+            "5": 10.84147,
+            "6": 10.87385,
+            "7": 10.83678,
+            "8": 10.84423,
+            "9": 10.84878,
+            "10": 10.82107,
+            "11": 10.85906,
+            "12": 10.85708,
+            "13": 10.88186,
+            "14": 10.87751,
+            "15": 10.85423,
+            "16": 10.85071,
+            "17": 10.84178,
+            "18": 10.86599,
+            "19": 10.86171,
+            "20": 10.85111,
+            "21": 10.85522,
+            "22": 10.82384,
+            "23": 10.86861,
+            "24": 10.82812,
+            "25": 10.82229,
+            "26": 10.83266,
+            "27": 10.82346,
+            "28": 10.84367,
+            "29": 10.83315,
+            "30": 10.75584,
+            "31": 10.66438,
+            "32": 10.78744,
+            "33": 10.76542,
+            "34": 10.67705,
+            "35": 10.68389,
+            "36": 10.63442,
+            "37": 10.68265,
+            "38": 10.6013,
+            "39": 10.69422,
+            "40": 10.52756,
+            "41": 10.54166,
+            "42": 10.56471,
+            "43": 10.34495,
+            "44": 10.38785,
+            "45": 10.3119,
+            "46": 10.3021,
+            "47": 10.479,
+            "48": 10.28168,
+            "49": 10.05783,
+            "50": 10.29392,
+            "51": 10.2381,
+            "52": 10.15425,
+            "53": 10.35958,
+            "54": 10.26866,
+            "55": 10.21882,
+            "56": 9.9963,
+            "57": 9.87322,
+            "58": 10.14154,
+            "59": 9.93616,
+            "60": 9.8477,
+            "61": 9.98627,
+            "62": 10.21642,
+            "63": 9.69005,
+            "64": 10.01919,
+            "65": 9.30027,
+            "66": 9.9353,
+            "67": 9.63074,
+            "68": 9.99036,
+            "69": 9.98369,
+            "70": 9.92473,
+            "71": 9.81441,
+            "72": 9.79281,
+            "73": 9.67937,
+            "74": 9.19331,
+            "75": 9.60615,
+            "76": 9.28477,
+            "77": 10.18543,
+            "78": 9.86681,
+            "79": 9.52304,
+            "80": 9.55867,
+            "81": 9.62718,
+            "82": 9.81491,
+            "83": 9.45803,
+            "84": 9.53679,
+            "85": 9.7331,
+            "86": 9.20021,
+            "87": 9.69537,
+            "88": 9.85367,
+            "89": 9.7164,
+            "90": 9.91024,
+            "91": 9.46125,
+            "92": 9.46592,
+            "93": 9.19252,
+            "94": 8.94116,
+            "95": 9.60586,
+            "96": 9.62228,
+            "97": 9.39813,
+            "98": 9.76041,
+            "99": 8.9914,
+            "100": 9.49453
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 454770688.0,
+            "2": 454770688.0,
+            "3": 454770688.0,
+            "4": 454770688.0,
+            "5": 454770688.0,
+            "6": 454770688.0,
+            "7": 454770688.0,
+            "8": 454770688.0,
+            "9": 454770688.0,
+            "10": 454770688.0,
+            "11": 454770688.0,
+            "12": 454770688.0,
+            "13": 454770688.0,
+            "14": 454770688.0,
+            "15": 454770688.0,
+            "16": 454770688.0,
+            "17": 454770688.0,
+            "18": 518880768.0,
+            "19": 518880768.0,
+            "20": 518880768.0,
+            "21": 518880768.0,
+            "22": 518880768.0,
+            "23": 518880768.0,
+            "24": 518880768.0,
+            "25": 518880768.0,
+            "26": 518880768.0,
+            "27": 518880768.0,
+            "28": 518880768.0,
+            "29": 518880768.0,
+            "30": 518880768.0,
+            "31": 518880768.0,
+            "32": 518880768.0,
+            "33": 518880768.0,
+            "34": 518880768.0,
+            "35": 518880768.0,
+            "36": 518880768.0,
+            "37": 518880768.0,
+            "38": 518880768.0,
+            "39": 518880768.0,
+            "40": 518880768.0,
+            "41": 518880768.0,
+            "42": 518880768.0,
+            "43": 518880768.0,
+            "44": 518880768.0,
+            "45": 518880768.0,
+            "46": 518880768.0,
+            "47": 518880768.0,
+            "48": 518880768.0,
+            "49": 518880768.0,
+            "50": 518880768.0,
+            "51": 518880768.0,
+            "52": 518880768.0,
+            "53": 518880768.0,
+            "54": 518880768.0,
+            "55": 518880768.0,
+            "56": 518880768.0,
+            "57": 518880768.0,
+            "58": 518880768.0,
+            "59": 518880768.0,
+            "60": 518880768.0,
+            "61": 518880768.0,
+            "62": 518880768.0,
+            "63": 518880768.0,
+            "64": 518880768.0,
+            "65": 518880768.0,
+            "66": 518880768.0,
+            "67": 518880768.0,
+            "68": 518880768.0,
+            "69": 518880768.0,
+            "70": 518880768.0,
+            "71": 518880768.0,
+            "72": 518880768.0,
+            "73": 518880768.0,
+            "74": 518880768.0,
+            "75": 518880768.0,
+            "76": 518880768.0,
+            "77": 518880768.0,
+            "78": 518880768.0,
+            "79": 518880768.0,
+            "80": 518880768.0,
+            "81": 518880768.0,
+            "82": 518880768.0,
+            "83": 518880768.0,
+            "84": 518880768.0,
+            "85": 518880768.0,
+            "86": 518880768.0,
+            "87": 518880768.0,
+            "88": 518880768.0,
+            "89": 518880768.0,
+            "90": 518880768.0,
+            "91": 518880768.0,
+            "92": 518880768.0,
+            "93": 518880768.0,
+            "94": 518880768.0,
+            "95": 518880768.0,
+            "96": 518880768.0,
+            "97": 518880768.0,
+            "98": 518880768.0,
+            "99": 518880768.0,
+            "100": 518880768.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 4511150592.0,
+            "2": 4544705536.0,
+            "3": 4544705536.0,
+            "4": 4544705536.0,
+            "5": 4544705536.0,
+            "6": 4544705536.0,
+            "7": 4544705536.0,
+            "8": 4544705536.0,
+            "9": 4544705536.0,
+            "10": 4544705536.0,
+            "11": 4544705536.0,
+            "12": 4544705536.0,
+            "13": 4544705536.0,
+            "14": 4544705536.0,
+            "15": 4544705536.0,
+            "16": 4544705536.0,
+            "17": 4544705536.0,
+            "18": 4544705536.0,
+            "19": 4607767040.0,
+            "20": 4607767040.0,
+            "21": 4607767040.0,
+            "22": 4607767040.0,
+            "23": 4607767040.0,
+            "24": 4607767040.0,
+            "25": 4607767040.0,
+            "26": 4607767040.0,
+            "27": 4607767040.0,
+            "28": 4607767040.0,
+            "29": 4607767040.0,
+            "30": 4607767040.0,
+            "31": 4607767040.0,
+            "32": 4607767040.0,
+            "33": 4607767040.0,
+            "34": 4607767040.0,
+            "35": 4607767040.0,
+            "36": 4607767040.0,
+            "37": 4607767040.0,
+            "38": 4607767040.0,
+            "39": 4607767040.0,
+            "40": 4607767040.0,
+            "41": 4607767040.0,
+            "42": 4607767040.0,
+            "43": 4607767040.0,
+            "44": 4607767040.0,
+            "45": 4607767040.0,
+            "46": 4607767040.0,
+            "47": 4607767040.0,
+            "48": 4607767040.0,
+            "49": 4607767040.0,
+            "50": 4607767040.0,
+            "51": 4607767040.0,
+            "52": 4607767040.0,
+            "53": 4607767040.0,
+            "54": 4607767040.0,
+            "55": 4607767040.0,
+            "56": 4607767040.0,
+            "57": 4607767040.0,
+            "58": 4607767040.0,
+            "59": 4607767040.0,
+            "60": 4607767040.0,
+            "61": 4607767040.0,
+            "62": 4607767040.0,
+            "63": 4607767040.0,
+            "64": 4607767040.0,
+            "65": 4607767040.0,
+            "66": 4607767040.0,
+            "67": 4607767040.0,
+            "68": 4607767040.0,
+            "69": 4607767040.0,
+            "70": 4607767040.0,
+            "71": 4607767040.0,
+            "72": 4607767040.0,
+            "73": 4607767040.0,
+            "74": 4607767040.0,
+            "75": 4607767040.0,
+            "76": 4607767040.0,
+            "77": 4607767040.0,
+            "78": 4607767040.0,
+            "79": 4607767040.0,
+            "80": 4607767040.0,
+            "81": 4607767040.0,
+            "82": 4607767040.0,
+            "83": 4607767040.0,
+            "84": 4607767040.0,
+            "85": 4607767040.0,
+            "86": 4607767040.0,
+            "87": 4607767040.0,
+            "88": 4607767040.0,
+            "89": 4607767040.0,
+            "90": 4607767040.0,
+            "91": 4607767040.0,
+            "92": 4607767040.0,
+            "93": 4607767040.0,
+            "94": 4607767040.0,
+            "95": 4607767040.0,
+            "96": 4607767040.0,
+            "97": 4607767040.0,
+            "98": 4607767040.0,
+            "99": 4607767040.0,
+            "100": 4607767040.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 8.67451,
+            "2": 0.15078,
+            "3": 0.09855,
+            "4": 0.09629,
+            "5": 0.09742,
+            "6": 0.09583,
+            "7": 0.09793,
+            "8": 0.09606,
+            "9": 0.10504,
+            "10": 0.09835,
+            "11": 0.0952,
+            "12": 0.09441,
+            "13": 0.0944,
+            "14": 0.0943,
+            "15": 0.09542,
+            "16": 0.09535,
+            "17": 0.0966,
+            "18": 0.13822,
+            "19": 0.10314,
+            "20": 0.10196,
+            "21": 0.10307,
+            "22": 0.09787,
+            "23": 0.11254,
+            "24": 0.10384,
+            "25": 0.10311,
+            "26": 0.10301,
+            "27": 0.10387,
+            "28": 0.10266,
+            "29": 0.10411,
+            "30": 0.11398,
+            "31": 0.32837,
+            "32": 0.10305,
+            "33": 0.10287,
+            "34": 0.10161,
+            "35": 0.10254,
+            "36": 0.10257,
+            "37": 0.10309,
+            "38": 0.10366,
+            "39": 0.1025,
+            "40": 0.1018,
+            "41": 0.10351,
+            "42": 0.10149,
+            "43": 0.10316,
+            "44": 0.10083,
+            "45": 0.10239,
+            "46": 0.34508,
+            "47": 0.10287,
+            "48": 0.36063,
+            "49": 0.10328,
+            "50": 0.10084,
+            "51": 0.10526,
+            "52": 0.10046,
+            "53": 0.09909,
+            "54": 0.09965,
+            "55": 0.09957,
+            "56": 0.09996,
+            "57": 0.09902,
+            "58": 0.1004,
+            "59": 0.10194,
+            "60": 0.101,
+            "61": 0.09902,
+            "62": 0.10015,
+            "63": 0.09937,
+            "64": 0.1003,
+            "65": 0.09988,
+            "66": 0.10055,
+            "67": 0.09976,
+            "68": 0.10001,
+            "69": 0.10157,
+            "70": 0.10136,
+            "71": 0.09951,
+            "72": 0.10026,
+            "73": 0.09946,
+            "74": 0.10113,
+            "75": 0.09881,
+            "76": 0.1007,
+            "77": 0.09917,
+            "78": 0.09983,
+            "79": 0.10051,
+            "80": 0.10101,
+            "81": 0.09942,
+            "82": 0.09995,
+            "83": 0.09932,
+            "84": 0.10088,
+            "85": 0.0992,
+            "86": 0.10084,
+            "87": 0.099,
+            "88": 0.0997,
+            "89": 0.10146,
+            "90": 0.10228,
+            "91": 0.09992,
+            "92": 0.09981,
+            "93": 0.09937,
+            "94": 0.10022,
+            "95": 0.09934,
+            "96": 0.10011,
+            "97": 0.09912,
+            "98": 0.09963,
+            "99": 0.10098,
+            "100": 0.10322
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": 1199.0,
+            "19": 1499.0,
+            "20": 1143.0,
+            "21": 1307.0,
+            "22": "nan",
+            "23": 1326.0,
+            "24": 1091.0,
+            "25": 1185.0,
+            "26": 1131.0,
+            "27": 1294.0,
+            "28": 1528.0,
+            "29": 1487.0,
+            "30": 1375.0,
+            "31": 1058.0,
+            "32": 1170.0,
+            "33": 1406.0,
+            "34": 1265.0,
+            "35": 1207.0,
+            "36": 1197.0,
+            "37": 1581.0,
+            "38": 1477.0,
+            "39": 1542.0,
+            "40": 1423.0,
+            "41": 1538.0,
+            "42": 1460.0,
+            "43": 1153.0,
+            "44": 1282.0,
+            "45": 1344.0,
+            "46": 1162.0,
+            "47": 1831.0,
+            "48": 1308.0,
+            "49": 1218.0,
+            "50": 1559.0,
+            "51": 1515.0,
+            "52": 1569.0,
+            "53": 1758.0,
+            "54": 1439.0,
+            "55": 1573.0,
+            "56": 1418.0,
+            "57": 1514.0,
+            "58": 1624.0,
+            "59": 1622.0,
+            "60": 1564.0,
+            "61": 1714.0,
+            "62": 1854.0,
+            "63": 1577.0,
+            "64": 1773.0,
+            "65": 1496.0,
+            "66": 1668.0,
+            "67": 1597.0,
+            "68": 1804.0,
+            "69": 1804.0,
+            "70": 1898.0,
+            "71": 1957.0,
+            "72": 1568.0,
+            "73": 2020.0,
+            "74": 1322.0,
+            "75": 1893.0,
+            "76": 1826.0,
+            "77": 2136.0,
+            "78": 2137.0,
+            "79": 1990.0,
+            "80": 2134.0,
+            "81": 2465.0,
+            "82": 2240.0,
+            "83": 1883.0,
+            "84": 2128.0,
+            "85": 2231.0,
+            "86": 1998.0,
+            "87": 2747.0,
+            "88": 2122.0,
+            "89": 2331.0,
+            "90": 2378.0,
+            "91": 1880.0,
+            "92": 2563.0,
+            "93": 2065.0,
+            "94": 2127.0,
+            "95": 2285.0,
+            "96": 2665.0,
+            "97": 2514.0,
+            "98": 2516.0,
+            "99": 2265.0,
+            "100": 2233.0
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_dev_dgx_h100.json
index 2e7b0f25d33..f558db5b4f0 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_dev_dgx_h100.json
@@ -2,91 +2,286 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 10.86114,
+            "2": 10.86847,
+            "3": 10.86465,
+            "4": 10.86473,
             "5": 10.87296,
+            "6": 10.88615,
+            "7": 10.8645,
+            "8": 10.87335,
+            "9": 10.87481,
             "10": 10.83903,
+            "11": 10.86614,
+            "12": 10.86169,
+            "13": 10.87354,
+            "14": 10.87593,
             "15": 10.8216,
+            "16": 10.83071,
+            "17": 10.79411,
+            "18": 10.81433,
+            "19": 10.80011,
             "20": 10.71697,
+            "21": 10.70154,
+            "22": 10.57235,
+            "23": 10.70749,
+            "24": 10.6006,
             "25": 10.5566,
+            "26": 10.60138,
+            "27": 10.60955,
+            "28": 10.55626,
+            "29": 10.57268,
             "30": 10.36032,
+            "31": 10.11454,
+            "32": 10.45937,
+            "33": 10.45389,
+            "34": 10.21168,
             "35": 10.26583,
+            "36": 10.21483,
+            "37": 10.34814,
+            "38": 10.19787,
+            "39": 10.39713,
             "40": 10.08719,
+            "41": 10.13539,
+            "42": 10.20638,
+            "43": 9.82769,
+            "44": 9.95444,
             "45": 9.82374,
+            "46": 9.79864,
+            "47": 10.12579,
+            "48": 9.83547,
+            "49": 9.51888,
             "50": 9.90498
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 1696.0,
+            "2": 1671.0,
+            "3": 1537.0,
+            "4": 1705.0,
             "5": 1776.0,
+            "6": 1735.0,
+            "7": 1767.0,
+            "8": 1569.0,
+            "9": 1750.0,
             "10": 1413.0,
+            "11": 1746.0,
+            "12": 1681.0,
+            "13": 1828.0,
+            "14": 1739.0,
             "15": 1801.0,
+            "16": 1895.0,
+            "17": 1781.0,
+            "18": 1693.0,
+            "19": 1705.0,
             "20": 1624.0,
+            "21": 1838.0,
+            "22": 1792.0,
+            "23": 2005.0,
+            "24": 1601.0,
             "25": 1483.0,
+            "26": 1615.0,
+            "27": 1844.0,
+            "28": 1961.0,
+            "29": 2012.0,
             "30": 1856.0,
+            "31": 1502.0,
+            "32": 1794.0,
+            "33": 2118.0,
+            "34": 1742.0,
             "35": 1953.0,
+            "36": 1940.0,
+            "37": 2324.0,
+            "38": 2109.0,
+            "39": 2369.0,
             "40": 2183.0,
+            "41": 2063.0,
+            "42": 2232.0,
+            "43": 1917.0,
+            "44": 2084.0,
             "45": 2058.0,
+            "46": 2144.0,
+            "47": 2488.0,
+            "48": 2407.0,
+            "49": 2125.0,
             "50": 2134.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 952847360.0,
+            "2": 952847360.0,
+            "3": 952847360.0,
+            "4": 952847360.0,
             "5": 952847360.0,
+            "6": 952847360.0,
+            "7": 952847360.0,
+            "8": 952847360.0,
+            "9": 952847360.0,
             "10": 952847360.0,
+            "11": 952847360.0,
+            "12": 952847360.0,
+            "13": 952847360.0,
+            "14": 952847360.0,
             "15": 952847360.0,
+            "16": 952847360.0,
+            "17": 952847360.0,
+            "18": 952847360.0,
+            "19": 952847360.0,
             "20": 952847360.0,
+            "21": 952847360.0,
+            "22": 952847360.0,
+            "23": 952847360.0,
+            "24": 952847360.0,
             "25": 952847360.0,
+            "26": 952847360.0,
+            "27": 952847360.0,
+            "28": 952847360.0,
+            "29": 952847360.0,
             "30": 952847360.0,
+            "31": 952847360.0,
+            "32": 952847360.0,
+            "33": 952847360.0,
+            "34": 952847360.0,
             "35": 952847360.0,
+            "36": 952847360.0,
+            "37": 952847360.0,
+            "38": 952847360.0,
+            "39": 952847360.0,
             "40": 952847360.0,
+            "41": 952847360.0,
+            "42": 952847360.0,
+            "43": 952847360.0,
+            "44": 952847360.0,
             "45": 952847360.0,
+            "46": 952847360.0,
+            "47": 952847360.0,
+            "48": 952847360.0,
+            "49": 952847360.0,
             "50": 952847360.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 3275808768.0,
+            "2": 3637371904.0,
+            "3": 3637371904.0,
+            "4": 3637371904.0,
             "5": 3637371904.0,
+            "6": 3637371904.0,
+            "7": 3637371904.0,
+            "8": 3637371904.0,
+            "9": 3637371904.0,
             "10": 3637371904.0,
+            "11": 3637371904.0,
+            "12": 3637371904.0,
+            "13": 3637371904.0,
+            "14": 3637371904.0,
             "15": 3637371904.0,
+            "16": 3637371904.0,
+            "17": 3637371904.0,
+            "18": 3637371904.0,
+            "19": 3637371904.0,
             "20": 3637371904.0,
+            "21": 3637371904.0,
+            "22": 3637371904.0,
+            "23": 3637371904.0,
+            "24": 3637371904.0,
             "25": 3637371904.0,
+            "26": 3637371904.0,
+            "27": 3637371904.0,
+            "28": 3637371904.0,
+            "29": 3637371904.0,
             "30": 3637371904.0,
+            "31": 3637371904.0,
+            "32": 3637371904.0,
+            "33": 3637371904.0,
+            "34": 3637371904.0,
             "35": 3637371904.0,
+            "36": 3637371904.0,
+            "37": 3637371904.0,
+            "38": 3637371904.0,
+            "39": 3637371904.0,
             "40": 3637371904.0,
+            "41": 3637371904.0,
+            "42": 3637371904.0,
+            "43": 3637371904.0,
+            "44": 3637371904.0,
             "45": 3637371904.0,
+            "46": 3637371904.0,
+            "47": 3637371904.0,
+            "48": 3637371904.0,
+            "49": 3637371904.0,
             "50": 3637371904.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 7.61967,
-            "5": 0.10355,
-            "10": 0.08878,
-            "15": 0.08692,
-            "20": 0.08664,
-            "25": 0.0863,
-            "30": 0.08732,
-            "35": 0.08763,
-            "40": 0.08674,
-            "45": 0.087,
-            "50": 0.08652
+            "1": 8.69225,
+            "2": 0.11422,
+            "3": 0.10425,
+            "4": 0.10234,
+            "5": 0.10569,
+            "6": 0.10564,
+            "7": 0.1017,
+            "8": 0.10104,
+            "9": 0.10184,
+            "10": 0.10389,
+            "11": 0.10239,
+            "12": 0.10308,
+            "13": 0.10366,
+            "14": 0.10282,
+            "15": 0.10527,
+            "16": 0.10468,
+            "17": 0.10379,
+            "18": 0.10311,
+            "19": 0.10589,
+            "20": 0.1039,
+            "21": 0.10317,
+            "22": 0.10318,
+            "23": 0.10407,
+            "24": 0.1045,
+            "25": 0.10518,
+            "26": 0.10372,
+            "27": 0.10299,
+            "28": 0.1034,
+            "29": 0.1018,
+            "30": 0.10184,
+            "31": 0.10197,
+            "32": 0.10201,
+            "33": 0.10166,
+            "34": 0.1031,
+            "35": 0.1016,
+            "36": 0.10083,
+            "37": 0.09963,
+            "38": 0.10028,
+            "39": 0.10032,
+            "40": 0.10016,
+            "41": 0.09952,
+            "42": 0.09904,
+            "43": 0.09972,
+            "44": 0.10089,
+            "45": 0.10162,
+            "46": 0.10079,
+            "47": 0.09922,
+            "48": 0.10128,
+            "49": 0.09992,
+            "50": 0.0985
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..64d215b77ba
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.86114,
+            "2": 10.86847,
+            "3": 10.86465,
+            "4": 10.86473,
+            "5": 10.87296,
+            "6": 10.88615,
+            "7": 10.8645,
+            "8": 10.87335,
+            "9": 10.87481,
+            "10": 10.83903,
+            "11": 10.86614,
+            "12": 10.86169,
+            "13": 10.87354,
+            "14": 10.87593,
+            "15": 10.8216,
+            "16": 10.83071,
+            "17": 10.79411,
+            "18": 10.81433,
+            "19": 10.80011,
+            "20": 10.71697,
+            "21": 10.70154,
+            "22": 10.57235,
+            "23": 10.70749,
+            "24": 10.6006,
+            "25": 10.5566,
+            "26": 10.60138,
+            "27": 10.60955,
+            "28": 10.55626,
+            "29": 10.57268,
+            "30": 10.36032,
+            "31": 10.11454,
+            "32": 10.45937,
+            "33": 10.45389,
+            "34": 10.21168,
+            "35": 10.26583,
+            "36": 10.21483,
+            "37": 10.34814,
+            "38": 10.19787,
+            "39": 10.39713,
+            "40": 10.08719,
+            "41": 10.13539,
+            "42": 10.20638,
+            "43": 9.82769,
+            "44": 9.95444,
+            "45": 9.82374,
+            "46": 9.79864,
+            "47": 10.12579,
+            "48": 9.83547,
+            "49": 9.51888,
+            "50": 9.90498
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1696.0,
+            "2": 1671.0,
+            "3": 1537.0,
+            "4": 1705.0,
+            "5": 1776.0,
+            "6": 1735.0,
+            "7": 1767.0,
+            "8": 1569.0,
+            "9": 1750.0,
+            "10": 1413.0,
+            "11": 1746.0,
+            "12": 1681.0,
+            "13": 1828.0,
+            "14": 1739.0,
+            "15": 1801.0,
+            "16": 1895.0,
+            "17": 1781.0,
+            "18": 1693.0,
+            "19": 1705.0,
+            "20": 1624.0,
+            "21": 1838.0,
+            "22": 1792.0,
+            "23": 2005.0,
+            "24": 1601.0,
+            "25": 1483.0,
+            "26": 1615.0,
+            "27": 1844.0,
+            "28": 1961.0,
+            "29": 2012.0,
+            "30": 1856.0,
+            "31": 1502.0,
+            "32": 1794.0,
+            "33": 2118.0,
+            "34": 1742.0,
+            "35": 1953.0,
+            "36": 1940.0,
+            "37": 2324.0,
+            "38": 2109.0,
+            "39": 2369.0,
+            "40": 2183.0,
+            "41": 2063.0,
+            "42": 2232.0,
+            "43": 1917.0,
+            "44": 2084.0,
+            "45": 2058.0,
+            "46": 2144.0,
+            "47": 2488.0,
+            "48": 2407.0,
+            "49": 2125.0,
+            "50": 2134.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 952847360.0,
+            "2": 952847360.0,
+            "3": 952847360.0,
+            "4": 952847360.0,
+            "5": 952847360.0,
+            "6": 952847360.0,
+            "7": 952847360.0,
+            "8": 952847360.0,
+            "9": 952847360.0,
+            "10": 952847360.0,
+            "11": 952847360.0,
+            "12": 952847360.0,
+            "13": 952847360.0,
+            "14": 952847360.0,
+            "15": 952847360.0,
+            "16": 952847360.0,
+            "17": 952847360.0,
+            "18": 952847360.0,
+            "19": 952847360.0,
+            "20": 952847360.0,
+            "21": 952847360.0,
+            "22": 952847360.0,
+            "23": 952847360.0,
+            "24": 952847360.0,
+            "25": 952847360.0,
+            "26": 952847360.0,
+            "27": 952847360.0,
+            "28": 952847360.0,
+            "29": 952847360.0,
+            "30": 952847360.0,
+            "31": 952847360.0,
+            "32": 952847360.0,
+            "33": 952847360.0,
+            "34": 952847360.0,
+            "35": 952847360.0,
+            "36": 952847360.0,
+            "37": 952847360.0,
+            "38": 952847360.0,
+            "39": 952847360.0,
+            "40": 952847360.0,
+            "41": 952847360.0,
+            "42": 952847360.0,
+            "43": 952847360.0,
+            "44": 952847360.0,
+            "45": 952847360.0,
+            "46": 952847360.0,
+            "47": 952847360.0,
+            "48": 952847360.0,
+            "49": 952847360.0,
+            "50": 952847360.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 3275808768.0,
+            "2": 3637371904.0,
+            "3": 3637371904.0,
+            "4": 3637371904.0,
+            "5": 3637371904.0,
+            "6": 3637371904.0,
+            "7": 3637371904.0,
+            "8": 3637371904.0,
+            "9": 3637371904.0,
+            "10": 3637371904.0,
+            "11": 3637371904.0,
+            "12": 3637371904.0,
+            "13": 3637371904.0,
+            "14": 3637371904.0,
+            "15": 3637371904.0,
+            "16": 3637371904.0,
+            "17": 3637371904.0,
+            "18": 3637371904.0,
+            "19": 3637371904.0,
+            "20": 3637371904.0,
+            "21": 3637371904.0,
+            "22": 3637371904.0,
+            "23": 3637371904.0,
+            "24": 3637371904.0,
+            "25": 3637371904.0,
+            "26": 3637371904.0,
+            "27": 3637371904.0,
+            "28": 3637371904.0,
+            "29": 3637371904.0,
+            "30": 3637371904.0,
+            "31": 3637371904.0,
+            "32": 3637371904.0,
+            "33": 3637371904.0,
+            "34": 3637371904.0,
+            "35": 3637371904.0,
+            "36": 3637371904.0,
+            "37": 3637371904.0,
+            "38": 3637371904.0,
+            "39": 3637371904.0,
+            "40": 3637371904.0,
+            "41": 3637371904.0,
+            "42": 3637371904.0,
+            "43": 3637371904.0,
+            "44": 3637371904.0,
+            "45": 3637371904.0,
+            "46": 3637371904.0,
+            "47": 3637371904.0,
+            "48": 3637371904.0,
+            "49": 3637371904.0,
+            "50": 3637371904.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 7.73281,
+            "2": 0.12339,
+            "3": 0.09356,
+            "4": 0.09244,
+            "5": 0.0876,
+            "6": 0.08746,
+            "7": 0.08714,
+            "8": 0.08631,
+            "9": 0.08986,
+            "10": 0.09011,
+            "11": 0.09237,
+            "12": 0.09085,
+            "13": 0.09077,
+            "14": 0.09007,
+            "15": 0.0931,
+            "16": 0.09275,
+            "17": 0.08996,
+            "18": 0.0933,
+            "19": 0.09008,
+            "20": 0.0898,
+            "21": 0.08974,
+            "22": 0.09148,
+            "23": 0.09027,
+            "24": 0.09097,
+            "25": 0.08936,
+            "26": 0.08932,
+            "27": 0.09046,
+            "28": 0.09053,
+            "29": 0.08937,
+            "30": 0.08941,
+            "31": 0.09008,
+            "32": 0.08969,
+            "33": 0.08975,
+            "34": 0.09039,
+            "35": 0.08967,
+            "36": 0.08981,
+            "37": 0.09109,
+            "38": 0.08894,
+            "39": 0.09029,
+            "40": 0.09,
+            "41": 0.0901,
+            "42": 0.08944,
+            "43": 0.09026,
+            "44": 0.09008,
+            "45": 0.09096,
+            "46": 0.08999,
+            "47": 0.08974,
+            "48": 0.08959,
+            "49": 0.09001,
+            "50": 0.08972
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..6660a5e446e
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.86114,
+            "2": 10.86847,
+            "3": 10.86465,
+            "4": 10.86473,
+            "5": 10.87296,
+            "6": 10.88615,
+            "7": 10.8645,
+            "8": 10.87335,
+            "9": 10.87481,
+            "10": 10.83903,
+            "11": 10.86614,
+            "12": 10.86169,
+            "13": 10.87354,
+            "14": 10.87593,
+            "15": 10.8216,
+            "16": 10.83071,
+            "17": 10.79411,
+            "18": 10.81433,
+            "19": 10.80011,
+            "20": 10.71697,
+            "21": 10.70154,
+            "22": 10.57235,
+            "23": 10.70749,
+            "24": 10.6006,
+            "25": 10.5566,
+            "26": 10.60138,
+            "27": 10.60955,
+            "28": 10.55626,
+            "29": 10.57268,
+            "30": 10.36032,
+            "31": 10.11454,
+            "32": 10.45937,
+            "33": 10.45389,
+            "34": 10.21168,
+            "35": 10.26583,
+            "36": 10.21483,
+            "37": 10.34814,
+            "38": 10.19787,
+            "39": 10.39713,
+            "40": 10.08719,
+            "41": 10.13539,
+            "42": 10.20638,
+            "43": 9.82769,
+            "44": 9.95444,
+            "45": 9.82374,
+            "46": 9.79864,
+            "47": 10.12579,
+            "48": 9.83547,
+            "49": 9.51888,
+            "50": 9.90498
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1696.0,
+            "2": 1671.0,
+            "3": 1537.0,
+            "4": 1705.0,
+            "5": 1776.0,
+            "6": 1735.0,
+            "7": 1767.0,
+            "8": 1569.0,
+            "9": 1750.0,
+            "10": 1413.0,
+            "11": 1746.0,
+            "12": 1681.0,
+            "13": 1828.0,
+            "14": 1739.0,
+            "15": 1801.0,
+            "16": 1895.0,
+            "17": 1781.0,
+            "18": 1693.0,
+            "19": 1705.0,
+            "20": 1624.0,
+            "21": 1838.0,
+            "22": 1792.0,
+            "23": 2005.0,
+            "24": 1601.0,
+            "25": 1483.0,
+            "26": 1615.0,
+            "27": 1844.0,
+            "28": 1961.0,
+            "29": 2012.0,
+            "30": 1856.0,
+            "31": 1502.0,
+            "32": 1794.0,
+            "33": 2118.0,
+            "34": 1742.0,
+            "35": 1953.0,
+            "36": 1940.0,
+            "37": 2324.0,
+            "38": 2109.0,
+            "39": 2369.0,
+            "40": 2183.0,
+            "41": 2063.0,
+            "42": 2232.0,
+            "43": 1917.0,
+            "44": 2084.0,
+            "45": 2058.0,
+            "46": 2144.0,
+            "47": 2488.0,
+            "48": 2407.0,
+            "49": 2125.0,
+            "50": 2134.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 952847360.0,
+            "2": 952847360.0,
+            "3": 952847360.0,
+            "4": 952847360.0,
+            "5": 952847360.0,
+            "6": 952847360.0,
+            "7": 952847360.0,
+            "8": 952847360.0,
+            "9": 952847360.0,
+            "10": 952847360.0,
+            "11": 952847360.0,
+            "12": 952847360.0,
+            "13": 952847360.0,
+            "14": 952847360.0,
+            "15": 952847360.0,
+            "16": 952847360.0,
+            "17": 952847360.0,
+            "18": 952847360.0,
+            "19": 952847360.0,
+            "20": 952847360.0,
+            "21": 952847360.0,
+            "22": 952847360.0,
+            "23": 952847360.0,
+            "24": 952847360.0,
+            "25": 952847360.0,
+            "26": 952847360.0,
+            "27": 952847360.0,
+            "28": 952847360.0,
+            "29": 952847360.0,
+            "30": 952847360.0,
+            "31": 952847360.0,
+            "32": 952847360.0,
+            "33": 952847360.0,
+            "34": 952847360.0,
+            "35": 952847360.0,
+            "36": 952847360.0,
+            "37": 952847360.0,
+            "38": 952847360.0,
+            "39": 952847360.0,
+            "40": 952847360.0,
+            "41": 952847360.0,
+            "42": 952847360.0,
+            "43": 952847360.0,
+            "44": 952847360.0,
+            "45": 952847360.0,
+            "46": 952847360.0,
+            "47": 952847360.0,
+            "48": 952847360.0,
+            "49": 952847360.0,
+            "50": 952847360.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 3275808768.0,
+            "2": 3637371904.0,
+            "3": 3637371904.0,
+            "4": 3637371904.0,
+            "5": 3637371904.0,
+            "6": 3637371904.0,
+            "7": 3637371904.0,
+            "8": 3637371904.0,
+            "9": 3637371904.0,
+            "10": 3637371904.0,
+            "11": 3637371904.0,
+            "12": 3637371904.0,
+            "13": 3637371904.0,
+            "14": 3637371904.0,
+            "15": 3637371904.0,
+            "16": 3637371904.0,
+            "17": 3637371904.0,
+            "18": 3637371904.0,
+            "19": 3637371904.0,
+            "20": 3637371904.0,
+            "21": 3637371904.0,
+            "22": 3637371904.0,
+            "23": 3637371904.0,
+            "24": 3637371904.0,
+            "25": 3637371904.0,
+            "26": 3637371904.0,
+            "27": 3637371904.0,
+            "28": 3637371904.0,
+            "29": 3637371904.0,
+            "30": 3637371904.0,
+            "31": 3637371904.0,
+            "32": 3637371904.0,
+            "33": 3637371904.0,
+            "34": 3637371904.0,
+            "35": 3637371904.0,
+            "36": 3637371904.0,
+            "37": 3637371904.0,
+            "38": 3637371904.0,
+            "39": 3637371904.0,
+            "40": 3637371904.0,
+            "41": 3637371904.0,
+            "42": 3637371904.0,
+            "43": 3637371904.0,
+            "44": 3637371904.0,
+            "45": 3637371904.0,
+            "46": 3637371904.0,
+            "47": 3637371904.0,
+            "48": 3637371904.0,
+            "49": 3637371904.0,
+            "50": 3637371904.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 8.92875,
+            "2": 0.12034,
+            "3": 0.10184,
+            "4": 0.10215,
+            "5": 0.10291,
+            "6": 0.10167,
+            "7": 0.09936,
+            "8": 0.10097,
+            "9": 0.10127,
+            "10": 0.10171,
+            "11": 0.10013,
+            "12": 0.09898,
+            "13": 0.10085,
+            "14": 0.10081,
+            "15": 0.10088,
+            "16": 0.10002,
+            "17": 0.0999,
+            "18": 0.10168,
+            "19": 0.10032,
+            "20": 0.09815,
+            "21": 0.10018,
+            "22": 0.09914,
+            "23": 0.1005,
+            "24": 0.10106,
+            "25": 0.10086,
+            "26": 0.10152,
+            "27": 0.1,
+            "28": 0.10161,
+            "29": 0.10038,
+            "30": 0.10045,
+            "31": 0.10187,
+            "32": 0.10055,
+            "33": 0.11357,
+            "34": 0.10266,
+            "35": 0.10298,
+            "36": 0.10061,
+            "37": 0.10166,
+            "38": 0.10185,
+            "39": 0.09925,
+            "40": 0.10087,
+            "41": 0.10001,
+            "42": 0.1,
+            "43": 0.10286,
+            "44": 0.10227,
+            "45": 0.10327,
+            "46": 0.10041,
+            "47": 0.10091,
+            "48": 0.10215,
+            "49": 0.10017,
+            "50": 0.10055
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_lts_dgxa100_dracooci-ord.json
new file mode 100644
index 00000000000..1306e400ed7
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_lts_dgxa100_dracooci-ord.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.8337,
+            "2": 10.83217,
+            "3": 10.83141,
+            "4": 10.80035,
+            "5": 10.85677,
+            "6": 10.86685,
+            "7": 10.84597,
+            "8": 10.84289,
+            "9": 10.8558,
+            "10": 10.80851,
+            "11": 10.89022,
+            "12": 10.87084,
+            "13": 10.87527,
+            "14": 10.8902,
+            "15": 10.79856,
+            "16": 10.81047,
+            "17": 10.78972,
+            "18": 10.824,
+            "19": 10.80709,
+            "20": 10.71089,
+            "21": 10.68461,
+            "22": 10.54244,
+            "23": 10.71826,
+            "24": 10.58552,
+            "25": 10.5436,
+            "26": 10.60978,
+            "27": 10.61027,
+            "28": 10.57094,
+            "29": 10.5905,
+            "30": 10.35069,
+            "31": 10.08989,
+            "32": 10.47124,
+            "33": 10.45479,
+            "34": 10.19985,
+            "35": 10.26074,
+            "36": 10.21478,
+            "37": 10.33663,
+            "38": 10.17509,
+            "39": 10.39333,
+            "40": 10.07155,
+            "41": 10.14016,
+            "42": 10.19706,
+            "43": 9.81234,
+            "44": 9.93566,
+            "45": 9.81507,
+            "46": 9.80601,
+            "47": 10.12818,
+            "48": 9.82423,
+            "49": 9.50741,
+            "50": 9.88952
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1563.0,
+            "2": 1726.0,
+            "3": 1587.0,
+            "4": 1729.0,
+            "5": 1808.0,
+            "6": 1766.0,
+            "7": 1701.0,
+            "8": 1761.0,
+            "9": 1852.0,
+            "10": 1377.0,
+            "11": 1784.0,
+            "12": 1773.0,
+            "13": 1887.0,
+            "14": 1869.0,
+            "15": 1872.0,
+            "16": 1819.0,
+            "17": 1779.0,
+            "18": 1669.0,
+            "19": 1838.0,
+            "20": 1675.0,
+            "21": 1847.0,
+            "22": 1671.0,
+            "23": 1931.0,
+            "24": 1672.0,
+            "25": 1549.0,
+            "26": 1756.0,
+            "27": 1756.0,
+            "28": 1977.0,
+            "29": 1963.0,
+            "30": 2043.0,
+            "31": 1615.0,
+            "32": 1875.0,
+            "33": 2095.0,
+            "34": 1910.0,
+            "35": 2002.0,
+            "36": 1897.0,
+            "37": 2269.0,
+            "38": 2215.0,
+            "39": 2342.0,
+            "40": 2311.0,
+            "41": 2338.0,
+            "42": 2189.0,
+            "43": 1957.0,
+            "44": 2119.0,
+            "45": 2149.0,
+            "46": 2258.0,
+            "47": 2617.0,
+            "48": 2367.0,
+            "49": 2311.0,
+            "50": 2368.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 952847360.0,
+            "2": 952847360.0,
+            "3": 952847360.0,
+            "4": 952847360.0,
+            "5": 952847360.0,
+            "6": 952847360.0,
+            "7": 952847360.0,
+            "8": 952847360.0,
+            "9": 952847360.0,
+            "10": 952847360.0,
+            "11": 952847360.0,
+            "12": 952847360.0,
+            "13": 952847360.0,
+            "14": 952847360.0,
+            "15": 952847360.0,
+            "16": 952847360.0,
+            "17": 952847360.0,
+            "18": 952847360.0,
+            "19": 952847360.0,
+            "20": 952847360.0,
+            "21": 952847360.0,
+            "22": 952847360.0,
+            "23": 952847360.0,
+            "24": 952847360.0,
+            "25": 952847360.0,
+            "26": 952847360.0,
+            "27": 952847360.0,
+            "28": 952847360.0,
+            "29": 952847360.0,
+            "30": 952847360.0,
+            "31": 952847360.0,
+            "32": 952847360.0,
+            "33": 952847360.0,
+            "34": 952847360.0,
+            "35": 952847360.0,
+            "36": 952847360.0,
+            "37": 952847360.0,
+            "38": 952847360.0,
+            "39": 952847360.0,
+            "40": 952847360.0,
+            "41": 952847360.0,
+            "42": 952847360.0,
+            "43": 952847360.0,
+            "44": 952847360.0,
+            "45": 952847360.0,
+            "46": 952847360.0,
+            "47": 952847360.0,
+            "48": 952847360.0,
+            "49": 952847360.0,
+            "50": 952847360.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 3275808768.0,
+            "2": 3637371904.0,
+            "3": 3637371904.0,
+            "4": 3637371904.0,
+            "5": 3637371904.0,
+            "6": 3637371904.0,
+            "7": 3637371904.0,
+            "8": 3637371904.0,
+            "9": 3637371904.0,
+            "10": 3637371904.0,
+            "11": 3637371904.0,
+            "12": 3637371904.0,
+            "13": 3637371904.0,
+            "14": 3637371904.0,
+            "15": 3637371904.0,
+            "16": 3637371904.0,
+            "17": 3637371904.0,
+            "18": 3637371904.0,
+            "19": 3637371904.0,
+            "20": 3637371904.0,
+            "21": 3637371904.0,
+            "22": 3637371904.0,
+            "23": 3637371904.0,
+            "24": 3637371904.0,
+            "25": 3637371904.0,
+            "26": 3637371904.0,
+            "27": 3637371904.0,
+            "28": 3637371904.0,
+            "29": 3637371904.0,
+            "30": 3637371904.0,
+            "31": 3637371904.0,
+            "32": 3637371904.0,
+            "33": 3637371904.0,
+            "34": 3637371904.0,
+            "35": 3637371904.0,
+            "36": 3637371904.0,
+            "37": 3637371904.0,
+            "38": 3637371904.0,
+            "39": 3637371904.0,
+            "40": 3637371904.0,
+            "41": 3637371904.0,
+            "42": 3637371904.0,
+            "43": 3637371904.0,
+            "44": 3637371904.0,
+            "45": 3637371904.0,
+            "46": 3637371904.0,
+            "47": 3637371904.0,
+            "48": 3637371904.0,
+            "49": 3637371904.0,
+            "50": 3637371904.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.97292,
+            "2": 0.1992,
+            "3": 0.16312,
+            "4": 0.15734,
+            "5": 0.40689,
+            "6": 0.36557,
+            "7": 0.15246,
+            "8": 0.14808,
+            "9": 0.14741,
+            "10": 0.14777,
+            "11": 0.14712,
+            "12": 0.1483,
+            "13": 0.14786,
+            "14": 0.14918,
+            "15": 0.1483,
+            "16": 0.14751,
+            "17": 0.14865,
+            "18": 0.14757,
+            "19": 0.14736,
+            "20": 0.14811,
+            "21": 0.14912,
+            "22": 0.14808,
+            "23": 0.14726,
+            "24": 0.14827,
+            "25": 0.14733,
+            "26": 0.14693,
+            "27": 0.14758,
+            "28": 0.14719,
+            "29": 0.14607,
+            "30": 0.14763,
+            "31": 0.14698,
+            "32": 0.14682,
+            "33": 0.14766,
+            "34": 0.14759,
+            "35": 0.14762,
+            "36": 0.14523,
+            "37": 0.14552,
+            "38": 0.14636,
+            "39": 0.14736,
+            "40": 0.14684,
+            "41": 0.14843,
+            "42": 0.14643,
+            "43": 0.1472,
+            "44": 0.34866,
+            "45": 0.14782,
+            "46": 0.14753,
+            "47": 0.14656,
+            "48": 0.14734,
+            "49": 0.14632,
+            "50": 0.14628
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_lts_dgxa100_dracooci.json
new file mode 100644
index 00000000000..d92033a2e8a
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_lts_dgxa100_dracooci.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.8337,
+            "2": 10.83217,
+            "3": 10.83141,
+            "4": 10.80035,
+            "5": 10.85677,
+            "6": 10.86685,
+            "7": 10.84597,
+            "8": 10.84289,
+            "9": 10.8558,
+            "10": 10.80851,
+            "11": 10.89022,
+            "12": 10.87084,
+            "13": 10.87527,
+            "14": 10.8902,
+            "15": 10.79856,
+            "16": 10.81047,
+            "17": 10.78972,
+            "18": 10.824,
+            "19": 10.80709,
+            "20": 10.71089,
+            "21": 10.68461,
+            "22": 10.54244,
+            "23": 10.71826,
+            "24": 10.58552,
+            "25": 10.5436,
+            "26": 10.60978,
+            "27": 10.61027,
+            "28": 10.57094,
+            "29": 10.5905,
+            "30": 10.35069,
+            "31": 10.08989,
+            "32": 10.47124,
+            "33": 10.45479,
+            "34": 10.19985,
+            "35": 10.26074,
+            "36": 10.21478,
+            "37": 10.33663,
+            "38": 10.17509,
+            "39": 10.39333,
+            "40": 10.07155,
+            "41": 10.14016,
+            "42": 10.19706,
+            "43": 9.81234,
+            "44": 9.93566,
+            "45": 9.81507,
+            "46": 9.80601,
+            "47": 10.12818,
+            "48": 9.82423,
+            "49": 9.50741,
+            "50": 9.88952
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1563.0,
+            "2": 1726.0,
+            "3": 1587.0,
+            "4": 1729.0,
+            "5": 1808.0,
+            "6": 1766.0,
+            "7": 1701.0,
+            "8": 1761.0,
+            "9": 1852.0,
+            "10": 1377.0,
+            "11": 1784.0,
+            "12": 1773.0,
+            "13": 1887.0,
+            "14": 1869.0,
+            "15": 1872.0,
+            "16": 1819.0,
+            "17": 1779.0,
+            "18": 1669.0,
+            "19": 1838.0,
+            "20": 1675.0,
+            "21": 1847.0,
+            "22": 1671.0,
+            "23": 1931.0,
+            "24": 1672.0,
+            "25": 1549.0,
+            "26": 1756.0,
+            "27": 1756.0,
+            "28": 1977.0,
+            "29": 1963.0,
+            "30": 2043.0,
+            "31": 1615.0,
+            "32": 1875.0,
+            "33": 2095.0,
+            "34": 1910.0,
+            "35": 2002.0,
+            "36": 1897.0,
+            "37": 2269.0,
+            "38": 2215.0,
+            "39": 2342.0,
+            "40": 2311.0,
+            "41": 2338.0,
+            "42": 2189.0,
+            "43": 1957.0,
+            "44": 2119.0,
+            "45": 2149.0,
+            "46": 2258.0,
+            "47": 2617.0,
+            "48": 2367.0,
+            "49": 2311.0,
+            "50": 2368.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 952847360.0,
+            "2": 952847360.0,
+            "3": 952847360.0,
+            "4": 952847360.0,
+            "5": 952847360.0,
+            "6": 952847360.0,
+            "7": 952847360.0,
+            "8": 952847360.0,
+            "9": 952847360.0,
+            "10": 952847360.0,
+            "11": 952847360.0,
+            "12": 952847360.0,
+            "13": 952847360.0,
+            "14": 952847360.0,
+            "15": 952847360.0,
+            "16": 952847360.0,
+            "17": 952847360.0,
+            "18": 952847360.0,
+            "19": 952847360.0,
+            "20": 952847360.0,
+            "21": 952847360.0,
+            "22": 952847360.0,
+            "23": 952847360.0,
+            "24": 952847360.0,
+            "25": 952847360.0,
+            "26": 952847360.0,
+            "27": 952847360.0,
+            "28": 952847360.0,
+            "29": 952847360.0,
+            "30": 952847360.0,
+            "31": 952847360.0,
+            "32": 952847360.0,
+            "33": 952847360.0,
+            "34": 952847360.0,
+            "35": 952847360.0,
+            "36": 952847360.0,
+            "37": 952847360.0,
+            "38": 952847360.0,
+            "39": 952847360.0,
+            "40": 952847360.0,
+            "41": 952847360.0,
+            "42": 952847360.0,
+            "43": 952847360.0,
+            "44": 952847360.0,
+            "45": 952847360.0,
+            "46": 952847360.0,
+            "47": 952847360.0,
+            "48": 952847360.0,
+            "49": 952847360.0,
+            "50": 952847360.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 3275808768.0,
+            "2": 3637371904.0,
+            "3": 3637371904.0,
+            "4": 3637371904.0,
+            "5": 3637371904.0,
+            "6": 3637371904.0,
+            "7": 3637371904.0,
+            "8": 3637371904.0,
+            "9": 3637371904.0,
+            "10": 3637371904.0,
+            "11": 3637371904.0,
+            "12": 3637371904.0,
+            "13": 3637371904.0,
+            "14": 3637371904.0,
+            "15": 3637371904.0,
+            "16": 3637371904.0,
+            "17": 3637371904.0,
+            "18": 3637371904.0,
+            "19": 3637371904.0,
+            "20": 3637371904.0,
+            "21": 3637371904.0,
+            "22": 3637371904.0,
+            "23": 3637371904.0,
+            "24": 3637371904.0,
+            "25": 3637371904.0,
+            "26": 3637371904.0,
+            "27": 3637371904.0,
+            "28": 3637371904.0,
+            "29": 3637371904.0,
+            "30": 3637371904.0,
+            "31": 3637371904.0,
+            "32": 3637371904.0,
+            "33": 3637371904.0,
+            "34": 3637371904.0,
+            "35": 3637371904.0,
+            "36": 3637371904.0,
+            "37": 3637371904.0,
+            "38": 3637371904.0,
+            "39": 3637371904.0,
+            "40": 3637371904.0,
+            "41": 3637371904.0,
+            "42": 3637371904.0,
+            "43": 3637371904.0,
+            "44": 3637371904.0,
+            "45": 3637371904.0,
+            "46": 3637371904.0,
+            "47": 3637371904.0,
+            "48": 3637371904.0,
+            "49": 3637371904.0,
+            "50": 3637371904.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 14.79244,
+            "2": 0.18866,
+            "3": 0.15434,
+            "4": 0.15761,
+            "5": 0.15724,
+            "6": 0.15378,
+            "7": 0.15381,
+            "8": 0.15636,
+            "9": 0.15341,
+            "10": 0.15408,
+            "11": 0.15704,
+            "12": 0.15148,
+            "13": 0.14733,
+            "14": 0.14655,
+            "15": 0.15415,
+            "16": 0.15103,
+            "17": 0.1512,
+            "18": 0.15478,
+            "19": 0.15325,
+            "20": 0.14874,
+            "21": 0.14873,
+            "22": 0.15363,
+            "23": 0.14741,
+            "24": 0.14761,
+            "25": 0.14905,
+            "26": 0.14826,
+            "27": 0.14811,
+            "28": 0.14877,
+            "29": 0.15462,
+            "30": 0.15391,
+            "31": 0.15501,
+            "32": 0.15366,
+            "33": 0.15348,
+            "34": 0.15427,
+            "35": 0.15377,
+            "36": 0.15502,
+            "37": 0.15312,
+            "38": 0.15305,
+            "39": 0.15313,
+            "40": 0.15265,
+            "41": 0.15294,
+            "42": 0.15318,
+            "43": 0.15372,
+            "44": 0.1524,
+            "45": 0.15283,
+            "46": 0.15215,
+            "47": 0.15253,
+            "48": 0.15208,
+            "49": 0.15253,
+            "50": 0.15255
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_resume_torch_dist/golden_values_dev_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_resume_torch_dist/golden_values_dev_dgxa100_dracooci-ord.json
new file mode 100644
index 00000000000..9669534a70b
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_resume_torch_dist/golden_values_dev_dgxa100_dracooci-ord.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.83568,
+            "2": 10.83266,
+            "3": 10.83151,
+            "4": 10.80343,
+            "5": 10.8567,
+            "6": 10.86778,
+            "7": 10.84836,
+            "8": 10.84624,
+            "9": 10.85924,
+            "10": 10.81478,
+            "11": 10.89821,
+            "12": 10.88433,
+            "13": 10.88963,
+            "14": 10.90075,
+            "15": 10.85098,
+            "16": 10.86603,
+            "17": 10.85455,
+            "18": 10.88507,
+            "19": 10.8773,
+            "20": 10.85865,
+            "21": 10.85654,
+            "22": 10.79685,
+            "23": 10.88724,
+            "24": 10.82649,
+            "25": 10.81343,
+            "26": 10.82705,
+            "27": 10.84612,
+            "28": 10.84227,
+            "29": 10.85329,
+            "30": 10.74969,
+            "31": 10.63041,
+            "32": 10.79004,
+            "33": 10.77234,
+            "34": 10.65722,
+            "35": 10.65857,
+            "36": 10.61583,
+            "37": 10.67536,
+            "38": 10.58101,
+            "39": 10.69083,
+            "40": 10.50359,
+            "41": 10.52777,
+            "42": 10.55371,
+            "43": 10.28636,
+            "44": 10.36369,
+            "45": 10.2738,
+            "46": 10.24567,
+            "47": 10.45103,
+            "48": 10.23707,
+            "49": 9.99555,
+            "50": 10.25588,
+            "51": 10.20129,
+            "52": 10.10855,
+            "53": 10.34609,
+            "54": 10.24857,
+            "55": 10.18782,
+            "56": 9.95521,
+            "57": 9.81221,
+            "58": 10.10875,
+            "59": 9.8863,
+            "60": 9.80901,
+            "61": 9.94824,
+            "62": 10.1999,
+            "63": 9.64431,
+            "64": 9.9951,
+            "65": 9.24475,
+            "66": 9.90917,
+            "67": 9.59735,
+            "68": 9.97285,
+            "69": 9.96332,
+            "70": 9.91039,
+            "71": 9.78596,
+            "72": 9.77263,
+            "73": 9.6618,
+            "74": 9.16289,
+            "75": 9.5812,
+            "76": 9.26137,
+            "77": 10.17615,
+            "78": 9.85644,
+            "79": 9.50644,
+            "80": 9.54102,
+            "81": 9.61313,
+            "82": 9.80669,
+            "83": 9.44696,
+            "84": 9.52782,
+            "85": 9.72633,
+            "86": 9.19099,
+            "87": 9.68736,
+            "88": 9.85216,
+            "89": 9.71335,
+            "90": 9.90316,
+            "91": 9.46064,
+            "92": 9.46059,
+            "93": 9.19418,
+            "94": 8.93434,
+            "95": 9.60258,
+            "96": 9.61852,
+            "97": 9.39594,
+            "98": 9.76012,
+            "99": 8.98668,
+            "100": 9.49405
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 684610560.0,
+            "2": 684610560.0,
+            "3": 684610560.0,
+            "4": 684610560.0,
+            "5": 684610560.0,
+            "6": 684610560.0,
+            "7": 684610560.0,
+            "8": 684610560.0,
+            "9": 684610560.0,
+            "10": 684610560.0,
+            "11": 684610560.0,
+            "12": 684610560.0,
+            "13": 684610560.0,
+            "14": 684610560.0,
+            "15": 684610560.0,
+            "16": 684610560.0,
+            "17": 1043027456.0,
+            "18": 1043027456.0,
+            "19": 1043027456.0,
+            "20": 1043027456.0,
+            "21": 1043027456.0,
+            "22": 1043027456.0,
+            "23": 1043027456.0,
+            "24": 1043027456.0,
+            "25": 1043027456.0,
+            "26": 1043027456.0,
+            "27": 1043027456.0,
+            "28": 1043027456.0,
+            "29": 1043027456.0,
+            "30": 1043027456.0,
+            "31": 1043027456.0,
+            "32": 1043027456.0,
+            "33": 1043027456.0,
+            "34": 1043027456.0,
+            "35": 1043027456.0,
+            "36": 1043027456.0,
+            "37": 1043027456.0,
+            "38": 1043027456.0,
+            "39": 1043027456.0,
+            "40": 1043027456.0,
+            "41": 1043027456.0,
+            "42": 1043027456.0,
+            "43": 1043027456.0,
+            "44": 1043027456.0,
+            "45": 1043027456.0,
+            "46": 1043027456.0,
+            "47": 1043027456.0,
+            "48": 1043027456.0,
+            "49": 1043027456.0,
+            "50": 1043027456.0,
+            "51": 1043027456.0,
+            "52": 1043027456.0,
+            "53": 1043027456.0,
+            "54": 1043027456.0,
+            "55": 1043027456.0,
+            "56": 1043027456.0,
+            "57": 1043027456.0,
+            "58": 1043027456.0,
+            "59": 1043027456.0,
+            "60": 1043027456.0,
+            "61": 1043027456.0,
+            "62": 1043027456.0,
+            "63": 1043027456.0,
+            "64": 1043027456.0,
+            "65": 1043027456.0,
+            "66": 1043027456.0,
+            "67": 1043027456.0,
+            "68": 1043027456.0,
+            "69": 1043027456.0,
+            "70": 1043027456.0,
+            "71": 1043027456.0,
+            "72": 1043027456.0,
+            "73": 1043027456.0,
+            "74": 1043027456.0,
+            "75": 1043027456.0,
+            "76": 1043027456.0,
+            "77": 1043027456.0,
+            "78": 1043027456.0,
+            "79": 1043027456.0,
+            "80": 1043027456.0,
+            "81": 1043027456.0,
+            "82": 1043027456.0,
+            "83": 1043027456.0,
+            "84": 1043027456.0,
+            "85": 1043027456.0,
+            "86": 1043027456.0,
+            "87": 1043027456.0,
+            "88": 1043027456.0,
+            "89": 1043027456.0,
+            "90": 1043027456.0,
+            "91": 1043027456.0,
+            "92": 1043027456.0,
+            "93": 1043027456.0,
+            "94": 1043027456.0,
+            "95": 1043027456.0,
+            "96": 1043027456.0,
+            "97": 1043027456.0,
+            "98": 1043027456.0,
+            "99": 1043027456.0,
+            "100": 1043027456.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 3187304960.0,
+            "2": 3187305472.0,
+            "3": 3187305472.0,
+            "4": 3187305472.0,
+            "5": 3187305472.0,
+            "6": 3187305472.0,
+            "7": 3187305472.0,
+            "8": 3187305472.0,
+            "9": 3187305472.0,
+            "10": 3187305472.0,
+            "11": 3187305472.0,
+            "12": 3187305472.0,
+            "13": 3187305472.0,
+            "14": 3187305472.0,
+            "15": 3187305472.0,
+            "16": 3187305472.0,
+            "17": 3187305472.0,
+            "18": 3547033088.0,
+            "19": 3547033088.0,
+            "20": 3547033088.0,
+            "21": 3547033088.0,
+            "22": 3547033088.0,
+            "23": 3547033088.0,
+            "24": 3547033088.0,
+            "25": 3547033088.0,
+            "26": 3547033088.0,
+            "27": 3547033088.0,
+            "28": 3547033088.0,
+            "29": 3547033088.0,
+            "30": 3547033088.0,
+            "31": 3547033088.0,
+            "32": 3547033088.0,
+            "33": 3547033088.0,
+            "34": 3547033088.0,
+            "35": 3547033088.0,
+            "36": 3547033088.0,
+            "37": 3547033088.0,
+            "38": 3547033088.0,
+            "39": 3547033088.0,
+            "40": 3547033088.0,
+            "41": 3547033088.0,
+            "42": 3547033088.0,
+            "43": 3547033088.0,
+            "44": 3547033088.0,
+            "45": 3547033088.0,
+            "46": 3547033088.0,
+            "47": 3547033088.0,
+            "48": 3547033088.0,
+            "49": 3547033088.0,
+            "50": 3547033088.0,
+            "51": 3547033088.0,
+            "52": 3547033088.0,
+            "53": 3547033088.0,
+            "54": 3547033088.0,
+            "55": 3547033088.0,
+            "56": 3547033088.0,
+            "57": 3547033088.0,
+            "58": 3547033088.0,
+            "59": 3547033088.0,
+            "60": 3547033088.0,
+            "61": 3547033088.0,
+            "62": 3547033088.0,
+            "63": 3547033088.0,
+            "64": 3547033088.0,
+            "65": 3547033088.0,
+            "66": 3547033088.0,
+            "67": 3547033088.0,
+            "68": 3547033088.0,
+            "69": 3547033088.0,
+            "70": 3547033088.0,
+            "71": 3547033088.0,
+            "72": 3547033088.0,
+            "73": 3547033088.0,
+            "74": 3547033088.0,
+            "75": 3547033088.0,
+            "76": 3547033088.0,
+            "77": 3547033088.0,
+            "78": 3547033088.0,
+            "79": 3547033088.0,
+            "80": 3547033088.0,
+            "81": 3547033088.0,
+            "82": 3547033088.0,
+            "83": 3547033088.0,
+            "84": 3547033088.0,
+            "85": 3547033088.0,
+            "86": 3547033088.0,
+            "87": 3547033088.0,
+            "88": 3547033088.0,
+            "89": 3547033088.0,
+            "90": 3547033088.0,
+            "91": 3547033088.0,
+            "92": 3547033088.0,
+            "93": 3547033088.0,
+            "94": 3547033088.0,
+            "95": 3547033088.0,
+            "96": 3547033088.0,
+            "97": 3547033088.0,
+            "98": 3547033088.0,
+            "99": 3547033088.0,
+            "100": 3547033088.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 6.2136,
+            "2": 0.17385,
+            "3": 0.1375,
+            "4": 0.14124,
+            "5": 0.13525,
+            "6": 0.13546,
+            "7": 0.13534,
+            "8": 0.13459,
+            "9": 0.13505,
+            "10": 0.1463,
+            "11": 0.13547,
+            "12": 0.14518,
+            "13": 0.13738,
+            "14": 0.13687,
+            "15": 0.14389,
+            "16": 0.13574,
+            "17": 0.2165,
+            "18": 0.15319,
+            "19": 0.14548,
+            "20": 0.15335,
+            "21": 0.14926,
+            "22": 0.13834,
+            "23": 0.14513,
+            "24": 0.14572,
+            "25": 0.14607,
+            "26": 0.14645,
+            "27": 0.14591,
+            "28": 0.14675,
+            "29": 0.14668,
+            "30": 0.1468,
+            "31": 0.14701,
+            "32": 0.14635,
+            "33": 0.14655,
+            "34": 0.14999,
+            "35": 0.14702,
+            "36": 0.14559,
+            "37": 0.14632,
+            "38": 0.15055,
+            "39": 0.1456,
+            "40": 0.15293,
+            "41": 0.14613,
+            "42": 0.14562,
+            "43": 0.15546,
+            "44": 0.14537,
+            "45": 0.14571,
+            "46": 0.14754,
+            "47": 0.14944,
+            "48": 0.14875,
+            "49": 0.14515,
+            "50": 0.14462,
+            "51": 0.15106,
+            "52": 0.1468,
+            "53": 0.14697,
+            "54": 0.14607,
+            "55": 0.14673,
+            "56": 0.1478,
+            "57": 0.14729,
+            "58": 0.14787,
+            "59": 0.14686,
+            "60": 0.14664,
+            "61": 0.14613,
+            "62": 0.14473,
+            "63": 0.14534,
+            "64": 0.14576,
+            "65": 0.14698,
+            "66": 0.14626,
+            "67": 0.14642,
+            "68": 0.14692,
+            "69": 0.14497,
+            "70": 0.14585,
+            "71": 0.14658,
+            "72": 0.14646,
+            "73": 0.14784,
+            "74": 0.14641,
+            "75": 0.14604,
+            "76": 0.14649,
+            "77": 0.14675,
+            "78": 0.14677,
+            "79": 0.14639,
+            "80": 0.14873,
+            "81": 0.14632,
+            "82": 0.14642,
+            "83": 0.14666,
+            "84": 0.14579,
+            "85": 0.14675,
+            "86": 0.14449,
+            "87": 0.14611,
+            "88": 0.1466,
+            "89": 0.14651,
+            "90": 0.14511,
+            "91": 0.14613,
+            "92": 0.14552,
+            "93": 0.14658,
+            "94": 0.14599,
+            "95": 0.14588,
+            "96": 0.14535,
+            "97": 0.14603,
+            "98": 0.14551,
+            "99": 0.14681,
+            "100": 0.14606
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": 2249.0,
+            "18": 2165.0,
+            "19": 2362.0,
+            "20": 1953.0,
+            "21": 1898.0,
+            "22": "nan",
+            "23": 2371.0,
+            "24": 1984.0,
+            "25": 1818.0,
+            "26": 1980.0,
+            "27": 2078.0,
+            "28": 2467.0,
+            "29": 2395.0,
+            "30": 2298.0,
+            "31": 1682.0,
+            "32": 2236.0,
+            "33": 2192.0,
+            "34": 1800.0,
+            "35": 2083.0,
+            "36": 2139.0,
+            "37": 2498.0,
+            "38": 2218.0,
+            "39": 2642.0,
+            "40": 2287.0,
+            "41": 2344.0,
+            "42": 2304.0,
+            "43": 2098.0,
+            "44": 2107.0,
+            "45": 2243.0,
+            "46": 1960.0,
+            "47": 2729.0,
+            "48": 2418.0,
+            "49": 1910.0,
+            "50": 2426.0,
+            "51": 2335.0,
+            "52": 2407.0,
+            "53": 2888.0,
+            "54": 2477.0,
+            "55": 2440.0,
+            "56": 2286.0,
+            "57": 2340.0,
+            "58": 2652.0,
+            "59": 2321.0,
+            "60": 2493.0,
+            "61": 2812.0,
+            "62": 2711.0,
+            "63": 2367.0,
+            "64": 2802.0,
+            "65": 2411.0,
+            "66": 2869.0,
+            "67": 2577.0,
+            "68": 2859.0,
+            "69": 2524.0,
+            "70": 3119.0,
+            "71": 2926.0,
+            "72": 2251.0,
+            "73": 2929.0,
+            "74": 2110.0,
+            "75": 2884.0,
+            "76": 2992.0,
+            "77": 3380.0,
+            "78": 3484.0,
+            "79": 3533.0,
+            "80": 3549.0,
+            "81": 3616.0,
+            "82": 3347.0,
+            "83": 3124.0,
+            "84": 3276.0,
+            "85": 3721.0,
+            "86": 3207.0,
+            "87": 3941.0,
+            "88": 3250.0,
+            "89": 3863.0,
+            "90": 3452.0,
+            "91": 2630.0,
+            "92": 3431.0,
+            "93": 3123.0,
+            "94": 3671.0,
+            "95": 3340.0,
+            "96": 3874.0,
+            "97": 3519.0,
+            "98": 3727.0,
+            "99": 3447.0,
+            "100": 3338.0
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_resume_torch_dist/golden_values_dev_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_resume_torch_dist/golden_values_dev_dgxa100_dracooci.json
new file mode 100644
index 00000000000..fbf4935d854
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_resume_torch_dist/golden_values_dev_dgxa100_dracooci.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.83568,
+            "2": 10.83266,
+            "3": 10.83151,
+            "4": 10.80343,
+            "5": 10.8567,
+            "6": 10.86778,
+            "7": 10.84836,
+            "8": 10.84624,
+            "9": 10.85924,
+            "10": 10.81478,
+            "11": 10.89821,
+            "12": 10.88433,
+            "13": 10.88963,
+            "14": 10.90075,
+            "15": 10.85098,
+            "16": 10.86603,
+            "17": 10.85455,
+            "18": 10.88507,
+            "19": 10.8773,
+            "20": 10.85865,
+            "21": 10.85654,
+            "22": 10.79685,
+            "23": 10.88724,
+            "24": 10.82649,
+            "25": 10.81343,
+            "26": 10.82705,
+            "27": 10.84612,
+            "28": 10.84227,
+            "29": 10.85329,
+            "30": 10.74969,
+            "31": 10.63041,
+            "32": 10.79004,
+            "33": 10.77234,
+            "34": 10.65722,
+            "35": 10.65857,
+            "36": 10.61583,
+            "37": 10.67536,
+            "38": 10.58101,
+            "39": 10.69083,
+            "40": 10.50359,
+            "41": 10.52777,
+            "42": 10.55371,
+            "43": 10.28636,
+            "44": 10.36369,
+            "45": 10.2738,
+            "46": 10.24567,
+            "47": 10.45103,
+            "48": 10.23707,
+            "49": 9.99555,
+            "50": 10.25588,
+            "51": 10.20129,
+            "52": 10.10855,
+            "53": 10.34609,
+            "54": 10.24857,
+            "55": 10.18782,
+            "56": 9.95521,
+            "57": 9.81221,
+            "58": 10.10875,
+            "59": 9.8863,
+            "60": 9.80901,
+            "61": 9.94824,
+            "62": 10.1999,
+            "63": 9.64431,
+            "64": 9.9951,
+            "65": 9.24475,
+            "66": 9.90917,
+            "67": 9.59735,
+            "68": 9.97285,
+            "69": 9.96332,
+            "70": 9.91039,
+            "71": 9.78596,
+            "72": 9.77263,
+            "73": 9.6618,
+            "74": 9.16289,
+            "75": 9.5812,
+            "76": 9.26137,
+            "77": 10.17615,
+            "78": 9.85644,
+            "79": 9.50644,
+            "80": 9.54102,
+            "81": 9.61313,
+            "82": 9.80669,
+            "83": 9.44696,
+            "84": 9.52782,
+            "85": 9.72633,
+            "86": 9.19099,
+            "87": 9.68736,
+            "88": 9.85216,
+            "89": 9.71335,
+            "90": 9.90316,
+            "91": 9.46064,
+            "92": 9.46059,
+            "93": 9.19418,
+            "94": 8.93434,
+            "95": 9.60258,
+            "96": 9.61852,
+            "97": 9.39594,
+            "98": 9.76012,
+            "99": 8.98668,
+            "100": 9.49405
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 684610560.0,
+            "2": 684610560.0,
+            "3": 684610560.0,
+            "4": 684610560.0,
+            "5": 684610560.0,
+            "6": 684610560.0,
+            "7": 684610560.0,
+            "8": 684610560.0,
+            "9": 684610560.0,
+            "10": 684610560.0,
+            "11": 684610560.0,
+            "12": 684610560.0,
+            "13": 684610560.0,
+            "14": 684610560.0,
+            "15": 684610560.0,
+            "16": 684610560.0,
+            "17": 1043027456.0,
+            "18": 1043027456.0,
+            "19": 1043027456.0,
+            "20": 1043027456.0,
+            "21": 1043027456.0,
+            "22": 1043027456.0,
+            "23": 1043027456.0,
+            "24": 1043027456.0,
+            "25": 1043027456.0,
+            "26": 1043027456.0,
+            "27": 1043027456.0,
+            "28": 1043027456.0,
+            "29": 1043027456.0,
+            "30": 1043027456.0,
+            "31": 1043027456.0,
+            "32": 1043027456.0,
+            "33": 1043027456.0,
+            "34": 1043027456.0,
+            "35": 1043027456.0,
+            "36": 1043027456.0,
+            "37": 1043027456.0,
+            "38": 1043027456.0,
+            "39": 1043027456.0,
+            "40": 1043027456.0,
+            "41": 1043027456.0,
+            "42": 1043027456.0,
+            "43": 1043027456.0,
+            "44": 1043027456.0,
+            "45": 1043027456.0,
+            "46": 1043027456.0,
+            "47": 1043027456.0,
+            "48": 1043027456.0,
+            "49": 1043027456.0,
+            "50": 1043027456.0,
+            "51": 1043027456.0,
+            "52": 1043027456.0,
+            "53": 1043027456.0,
+            "54": 1043027456.0,
+            "55": 1043027456.0,
+            "56": 1043027456.0,
+            "57": 1043027456.0,
+            "58": 1043027456.0,
+            "59": 1043027456.0,
+            "60": 1043027456.0,
+            "61": 1043027456.0,
+            "62": 1043027456.0,
+            "63": 1043027456.0,
+            "64": 1043027456.0,
+            "65": 1043027456.0,
+            "66": 1043027456.0,
+            "67": 1043027456.0,
+            "68": 1043027456.0,
+            "69": 1043027456.0,
+            "70": 1043027456.0,
+            "71": 1043027456.0,
+            "72": 1043027456.0,
+            "73": 1043027456.0,
+            "74": 1043027456.0,
+            "75": 1043027456.0,
+            "76": 1043027456.0,
+            "77": 1043027456.0,
+            "78": 1043027456.0,
+            "79": 1043027456.0,
+            "80": 1043027456.0,
+            "81": 1043027456.0,
+            "82": 1043027456.0,
+            "83": 1043027456.0,
+            "84": 1043027456.0,
+            "85": 1043027456.0,
+            "86": 1043027456.0,
+            "87": 1043027456.0,
+            "88": 1043027456.0,
+            "89": 1043027456.0,
+            "90": 1043027456.0,
+            "91": 1043027456.0,
+            "92": 1043027456.0,
+            "93": 1043027456.0,
+            "94": 1043027456.0,
+            "95": 1043027456.0,
+            "96": 1043027456.0,
+            "97": 1043027456.0,
+            "98": 1043027456.0,
+            "99": 1043027456.0,
+            "100": 1043027456.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 3187304960.0,
+            "2": 3187305472.0,
+            "3": 3187305472.0,
+            "4": 3187305472.0,
+            "5": 3187305472.0,
+            "6": 3187305472.0,
+            "7": 3187305472.0,
+            "8": 3187305472.0,
+            "9": 3187305472.0,
+            "10": 3187305472.0,
+            "11": 3187305472.0,
+            "12": 3187305472.0,
+            "13": 3187305472.0,
+            "14": 3187305472.0,
+            "15": 3187305472.0,
+            "16": 3187305472.0,
+            "17": 3187305472.0,
+            "18": 3547033088.0,
+            "19": 3547033088.0,
+            "20": 3547033088.0,
+            "21": 3547033088.0,
+            "22": 3547033088.0,
+            "23": 3547033088.0,
+            "24": 3547033088.0,
+            "25": 3547033088.0,
+            "26": 3547033088.0,
+            "27": 3547033088.0,
+            "28": 3547033088.0,
+            "29": 3547033088.0,
+            "30": 3547033088.0,
+            "31": 3547033088.0,
+            "32": 3547033088.0,
+            "33": 3547033088.0,
+            "34": 3547033088.0,
+            "35": 3547033088.0,
+            "36": 3547033088.0,
+            "37": 3547033088.0,
+            "38": 3547033088.0,
+            "39": 3547033088.0,
+            "40": 3547033088.0,
+            "41": 3547033088.0,
+            "42": 3547033088.0,
+            "43": 3547033088.0,
+            "44": 3547033088.0,
+            "45": 3547033088.0,
+            "46": 3547033088.0,
+            "47": 3547033088.0,
+            "48": 3547033088.0,
+            "49": 3547033088.0,
+            "50": 3547033088.0,
+            "51": 3547033088.0,
+            "52": 3547033088.0,
+            "53": 3547033088.0,
+            "54": 3547033088.0,
+            "55": 3547033088.0,
+            "56": 3547033088.0,
+            "57": 3547033088.0,
+            "58": 3547033088.0,
+            "59": 3547033088.0,
+            "60": 3547033088.0,
+            "61": 3547033088.0,
+            "62": 3547033088.0,
+            "63": 3547033088.0,
+            "64": 3547033088.0,
+            "65": 3547033088.0,
+            "66": 3547033088.0,
+            "67": 3547033088.0,
+            "68": 3547033088.0,
+            "69": 3547033088.0,
+            "70": 3547033088.0,
+            "71": 3547033088.0,
+            "72": 3547033088.0,
+            "73": 3547033088.0,
+            "74": 3547033088.0,
+            "75": 3547033088.0,
+            "76": 3547033088.0,
+            "77": 3547033088.0,
+            "78": 3547033088.0,
+            "79": 3547033088.0,
+            "80": 3547033088.0,
+            "81": 3547033088.0,
+            "82": 3547033088.0,
+            "83": 3547033088.0,
+            "84": 3547033088.0,
+            "85": 3547033088.0,
+            "86": 3547033088.0,
+            "87": 3547033088.0,
+            "88": 3547033088.0,
+            "89": 3547033088.0,
+            "90": 3547033088.0,
+            "91": 3547033088.0,
+            "92": 3547033088.0,
+            "93": 3547033088.0,
+            "94": 3547033088.0,
+            "95": 3547033088.0,
+            "96": 3547033088.0,
+            "97": 3547033088.0,
+            "98": 3547033088.0,
+            "99": 3547033088.0,
+            "100": 3547033088.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 5.93467,
+            "2": 0.17161,
+            "3": 0.14039,
+            "4": 0.13829,
+            "5": 0.13749,
+            "6": 0.13944,
+            "7": 0.13824,
+            "8": 0.13871,
+            "9": 0.13838,
+            "10": 0.13737,
+            "11": 0.13776,
+            "12": 0.13721,
+            "13": 0.13753,
+            "14": 0.13754,
+            "15": 0.13872,
+            "16": 0.13797,
+            "17": 0.20803,
+            "18": 0.15259,
+            "19": 0.14464,
+            "20": 0.14422,
+            "21": 0.14345,
+            "22": 0.13549,
+            "23": 0.14245,
+            "24": 0.14329,
+            "25": 0.14394,
+            "26": 0.14405,
+            "27": 0.14342,
+            "28": 0.14331,
+            "29": 0.14487,
+            "30": 0.14483,
+            "31": 0.14485,
+            "32": 0.14456,
+            "33": 0.14289,
+            "34": 0.14297,
+            "35": 0.14395,
+            "36": 0.14402,
+            "37": 0.14382,
+            "38": 0.13994,
+            "39": 0.14081,
+            "40": 0.14133,
+            "41": 0.14193,
+            "42": 0.14096,
+            "43": 0.14276,
+            "44": 0.14166,
+            "45": 0.13978,
+            "46": 0.1416,
+            "47": 0.14022,
+            "48": 0.14002,
+            "49": 0.14073,
+            "50": 0.14162,
+            "51": 0.14791,
+            "52": 0.14124,
+            "53": 0.14062,
+            "54": 0.14018,
+            "55": 0.14011,
+            "56": 0.13945,
+            "57": 0.14062,
+            "58": 0.14119,
+            "59": 0.14089,
+            "60": 0.14102,
+            "61": 0.13963,
+            "62": 0.14092,
+            "63": 0.14055,
+            "64": 0.14084,
+            "65": 0.14007,
+            "66": 0.13972,
+            "67": 0.14119,
+            "68": 0.13979,
+            "69": 0.14005,
+            "70": 0.14035,
+            "71": 0.14023,
+            "72": 0.14046,
+            "73": 0.1403,
+            "74": 0.13974,
+            "75": 0.14059,
+            "76": 0.1405,
+            "77": 0.14012,
+            "78": 0.14025,
+            "79": 0.13985,
+            "80": 0.1396,
+            "81": 0.1399,
+            "82": 0.14103,
+            "83": 0.13999,
+            "84": 0.13938,
+            "85": 0.13986,
+            "86": 0.14082,
+            "87": 0.13988,
+            "88": 0.13941,
+            "89": 0.13979,
+            "90": 0.13994,
+            "91": 0.14044,
+            "92": 0.13957,
+            "93": 0.14067,
+            "94": 0.13918,
+            "95": 0.14088,
+            "96": 0.14093,
+            "97": 0.13871,
+            "98": 0.13964,
+            "99": 0.13894,
+            "100": 0.13923
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": 2249.0,
+            "18": 2165.0,
+            "19": 2362.0,
+            "20": 1953.0,
+            "21": 1898.0,
+            "22": "nan",
+            "23": 2371.0,
+            "24": 1984.0,
+            "25": 1818.0,
+            "26": 1980.0,
+            "27": 2078.0,
+            "28": 2467.0,
+            "29": 2395.0,
+            "30": 2298.0,
+            "31": 1682.0,
+            "32": 2236.0,
+            "33": 2192.0,
+            "34": 1800.0,
+            "35": 2083.0,
+            "36": 2139.0,
+            "37": 2498.0,
+            "38": 2218.0,
+            "39": 2642.0,
+            "40": 2287.0,
+            "41": 2344.0,
+            "42": 2304.0,
+            "43": 2098.0,
+            "44": 2107.0,
+            "45": 2243.0,
+            "46": 1960.0,
+            "47": 2729.0,
+            "48": 2418.0,
+            "49": 1910.0,
+            "50": 2426.0,
+            "51": 2335.0,
+            "52": 2407.0,
+            "53": 2888.0,
+            "54": 2477.0,
+            "55": 2440.0,
+            "56": 2286.0,
+            "57": 2340.0,
+            "58": 2652.0,
+            "59": 2321.0,
+            "60": 2493.0,
+            "61": 2812.0,
+            "62": 2711.0,
+            "63": 2367.0,
+            "64": 2802.0,
+            "65": 2411.0,
+            "66": 2869.0,
+            "67": 2577.0,
+            "68": 2859.0,
+            "69": 2524.0,
+            "70": 3119.0,
+            "71": 2926.0,
+            "72": 2251.0,
+            "73": 2929.0,
+            "74": 2110.0,
+            "75": 2884.0,
+            "76": 2992.0,
+            "77": 3380.0,
+            "78": 3484.0,
+            "79": 3533.0,
+            "80": 3549.0,
+            "81": 3616.0,
+            "82": 3347.0,
+            "83": 3124.0,
+            "84": 3276.0,
+            "85": 3721.0,
+            "86": 3207.0,
+            "87": 3941.0,
+            "88": 3250.0,
+            "89": 3863.0,
+            "90": 3452.0,
+            "91": 2630.0,
+            "92": 3431.0,
+            "93": 3123.0,
+            "94": 3671.0,
+            "95": 3340.0,
+            "96": 3874.0,
+            "97": 3519.0,
+            "98": 3727.0,
+            "99": 3447.0,
+            "100": 3338.0
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_resume_torch_dist/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_resume_torch_dist/golden_values_lts_dgxa100_dracooci-ord.json
new file mode 100644
index 00000000000..809ba358612
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_resume_torch_dist/golden_values_lts_dgxa100_dracooci-ord.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.8337,
+            "2": 10.83216,
+            "3": 10.83262,
+            "4": 10.80149,
+            "5": 10.85789,
+            "6": 10.86796,
+            "7": 10.84795,
+            "8": 10.84663,
+            "9": 10.86076,
+            "10": 10.81578,
+            "11": 10.89921,
+            "12": 10.88475,
+            "13": 10.89093,
+            "14": 10.9047,
+            "15": 10.84971,
+            "16": 10.86517,
+            "17": 10.85475,
+            "18": 10.8881,
+            "19": 10.87622,
+            "20": 10.85686,
+            "21": 10.85506,
+            "22": 10.79694,
+            "23": 10.88579,
+            "24": 10.8279,
+            "25": 10.81326,
+            "26": 10.82693,
+            "27": 10.846,
+            "28": 10.84147,
+            "29": 10.8522,
+            "30": 10.74663,
+            "31": 10.62679,
+            "32": 10.79112,
+            "33": 10.77171,
+            "34": 10.65521,
+            "35": 10.65647,
+            "36": 10.61755,
+            "37": 10.67472,
+            "38": 10.58181,
+            "39": 10.69126,
+            "40": 10.50351,
+            "41": 10.53015,
+            "42": 10.55529,
+            "43": 10.28638,
+            "44": 10.36341,
+            "45": 10.27258,
+            "46": 10.24593,
+            "47": 10.45076,
+            "48": 10.23738,
+            "49": 9.99756,
+            "50": 10.25445,
+            "51": 10.20109,
+            "52": 10.10787,
+            "53": 10.34615,
+            "54": 10.24765,
+            "55": 10.18699,
+            "56": 9.95445,
+            "57": 9.81113,
+            "58": 10.10718,
+            "59": 9.88656,
+            "60": 9.8098,
+            "61": 9.95021,
+            "62": 10.20123,
+            "63": 9.64325,
+            "64": 9.99571,
+            "65": 9.24409,
+            "66": 9.90919,
+            "67": 9.59742,
+            "68": 9.97199,
+            "69": 9.96262,
+            "70": 9.91024,
+            "71": 9.78581,
+            "72": 9.77311,
+            "73": 9.66157,
+            "74": 9.16191,
+            "75": 9.58173,
+            "76": 9.26165,
+            "77": 10.17527,
+            "78": 9.85663,
+            "79": 9.50663,
+            "80": 9.54167,
+            "81": 9.61305,
+            "82": 9.80599,
+            "83": 9.44744,
+            "84": 9.52725,
+            "85": 9.7262,
+            "86": 9.1912,
+            "87": 9.68768,
+            "88": 9.85199,
+            "89": 9.71342,
+            "90": 9.90242,
+            "91": 9.4603,
+            "92": 9.46187,
+            "93": 9.19485,
+            "94": 8.93416,
+            "95": 9.60208,
+            "96": 9.61859,
+            "97": 9.39629,
+            "98": 9.76032,
+            "99": 8.98677,
+            "100": 9.49424
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 684610560.0,
+            "2": 684610560.0,
+            "3": 684610560.0,
+            "4": 684610560.0,
+            "5": 684610560.0,
+            "6": 684610560.0,
+            "7": 684610560.0,
+            "8": 684610560.0,
+            "9": 684610560.0,
+            "10": 684610560.0,
+            "11": 684610560.0,
+            "12": 684610560.0,
+            "13": 684610560.0,
+            "14": 684610560.0,
+            "15": 684610560.0,
+            "16": 684610560.0,
+            "17": 1043027456.0,
+            "18": 1043027456.0,
+            "19": 1043027456.0,
+            "20": 1043027456.0,
+            "21": 1043027456.0,
+            "22": 1043027456.0,
+            "23": 1043027456.0,
+            "24": 1043027456.0,
+            "25": 1043027456.0,
+            "26": 1043027456.0,
+            "27": 1043027456.0,
+            "28": 1043027456.0,
+            "29": 1043027456.0,
+            "30": 1043027456.0,
+            "31": 1043027456.0,
+            "32": 1043027456.0,
+            "33": 1043027456.0,
+            "34": 1043027456.0,
+            "35": 1043027456.0,
+            "36": 1043027456.0,
+            "37": 1043027456.0,
+            "38": 1043027456.0,
+            "39": 1043027456.0,
+            "40": 1043027456.0,
+            "41": 1043027456.0,
+            "42": 1043027456.0,
+            "43": 1043027456.0,
+            "44": 1043027456.0,
+            "45": 1043027456.0,
+            "46": 1043027456.0,
+            "47": 1043027456.0,
+            "48": 1043027456.0,
+            "49": 1043027456.0,
+            "50": 1043027456.0,
+            "51": 1043027456.0,
+            "52": 1043027456.0,
+            "53": 1043027456.0,
+            "54": 1043027456.0,
+            "55": 1043027456.0,
+            "56": 1043027456.0,
+            "57": 1043027456.0,
+            "58": 1043027456.0,
+            "59": 1043027456.0,
+            "60": 1043027456.0,
+            "61": 1043027456.0,
+            "62": 1043027456.0,
+            "63": 1043027456.0,
+            "64": 1043027456.0,
+            "65": 1043027456.0,
+            "66": 1043027456.0,
+            "67": 1043027456.0,
+            "68": 1043027456.0,
+            "69": 1043027456.0,
+            "70": 1043027456.0,
+            "71": 1043027456.0,
+            "72": 1043027456.0,
+            "73": 1043027456.0,
+            "74": 1043027456.0,
+            "75": 1043027456.0,
+            "76": 1043027456.0,
+            "77": 1043027456.0,
+            "78": 1043027456.0,
+            "79": 1043027456.0,
+            "80": 1043027456.0,
+            "81": 1043027456.0,
+            "82": 1043027456.0,
+            "83": 1043027456.0,
+            "84": 1043027456.0,
+            "85": 1043027456.0,
+            "86": 1043027456.0,
+            "87": 1043027456.0,
+            "88": 1043027456.0,
+            "89": 1043027456.0,
+            "90": 1043027456.0,
+            "91": 1043027456.0,
+            "92": 1043027456.0,
+            "93": 1043027456.0,
+            "94": 1043027456.0,
+            "95": 1043027456.0,
+            "96": 1043027456.0,
+            "97": 1043027456.0,
+            "98": 1043027456.0,
+            "99": 1043027456.0,
+            "100": 1043027456.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 3187304960.0,
+            "2": 3187305472.0,
+            "3": 3187305472.0,
+            "4": 3187305472.0,
+            "5": 3187305472.0,
+            "6": 3187305472.0,
+            "7": 3187305472.0,
+            "8": 3187305472.0,
+            "9": 3187305472.0,
+            "10": 3187305472.0,
+            "11": 3187305472.0,
+            "12": 3187305472.0,
+            "13": 3187305472.0,
+            "14": 3187305472.0,
+            "15": 3187305472.0,
+            "16": 3187305472.0,
+            "17": 3187305472.0,
+            "18": 3547033088.0,
+            "19": 3547033088.0,
+            "20": 3547033088.0,
+            "21": 3547033088.0,
+            "22": 3547033088.0,
+            "23": 3547033088.0,
+            "24": 3547033088.0,
+            "25": 3547033088.0,
+            "26": 3547033088.0,
+            "27": 3547033088.0,
+            "28": 3547033088.0,
+            "29": 3547033088.0,
+            "30": 3547033088.0,
+            "31": 3547033088.0,
+            "32": 3547033088.0,
+            "33": 3547033088.0,
+            "34": 3547033088.0,
+            "35": 3547033088.0,
+            "36": 3547033088.0,
+            "37": 3547033088.0,
+            "38": 3547033088.0,
+            "39": 3547033088.0,
+            "40": 3547033088.0,
+            "41": 3547033088.0,
+            "42": 3547033088.0,
+            "43": 3547033088.0,
+            "44": 3547033088.0,
+            "45": 3547033088.0,
+            "46": 3547033088.0,
+            "47": 3547033088.0,
+            "48": 3547033088.0,
+            "49": 3547033088.0,
+            "50": 3547033088.0,
+            "51": 3547033088.0,
+            "52": 3547033088.0,
+            "53": 3547033088.0,
+            "54": 3547033088.0,
+            "55": 3547033088.0,
+            "56": 3547033088.0,
+            "57": 3547033088.0,
+            "58": 3547033088.0,
+            "59": 3547033088.0,
+            "60": 3547033088.0,
+            "61": 3547033088.0,
+            "62": 3547033088.0,
+            "63": 3547033088.0,
+            "64": 3547033088.0,
+            "65": 3547033088.0,
+            "66": 3547033088.0,
+            "67": 3547033088.0,
+            "68": 3547033088.0,
+            "69": 3547033088.0,
+            "70": 3547033088.0,
+            "71": 3547033088.0,
+            "72": 3547033088.0,
+            "73": 3547033088.0,
+            "74": 3547033088.0,
+            "75": 3547033088.0,
+            "76": 3547033088.0,
+            "77": 3547033088.0,
+            "78": 3547033088.0,
+            "79": 3547033088.0,
+            "80": 3547033088.0,
+            "81": 3547033088.0,
+            "82": 3547033088.0,
+            "83": 3547033088.0,
+            "84": 3547033088.0,
+            "85": 3547033088.0,
+            "86": 3547033088.0,
+            "87": 3547033088.0,
+            "88": 3547033088.0,
+            "89": 3547033088.0,
+            "90": 3547033088.0,
+            "91": 3547033088.0,
+            "92": 3547033088.0,
+            "93": 3547033088.0,
+            "94": 3547033088.0,
+            "95": 3547033088.0,
+            "96": 3547033088.0,
+            "97": 3547033088.0,
+            "98": 3547033088.0,
+            "99": 3547033088.0,
+            "100": 3547033088.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.37801,
+            "2": 0.17868,
+            "3": 0.15737,
+            "4": 0.15064,
+            "5": 0.14295,
+            "6": 0.14401,
+            "7": 0.14335,
+            "8": 0.14238,
+            "9": 0.14059,
+            "10": 0.14021,
+            "11": 0.14214,
+            "12": 0.14087,
+            "13": 0.13924,
+            "14": 0.13916,
+            "15": 0.13973,
+            "16": 0.13895,
+            "17": 0.19936,
+            "18": 0.22469,
+            "19": 0.1492,
+            "20": 0.1494,
+            "21": 0.14972,
+            "22": 0.1406,
+            "23": 0.14885,
+            "24": 0.15067,
+            "25": 0.14941,
+            "26": 0.14905,
+            "27": 0.14895,
+            "28": 0.1478,
+            "29": 0.14932,
+            "30": 0.14921,
+            "31": 0.15043,
+            "32": 0.15028,
+            "33": 0.14795,
+            "34": 0.14864,
+            "35": 0.14904,
+            "36": 0.1491,
+            "37": 0.14886,
+            "38": 0.14931,
+            "39": 0.1489,
+            "40": 0.14851,
+            "41": 0.14847,
+            "42": 0.14829,
+            "43": 0.15254,
+            "44": 0.1485,
+            "45": 0.14926,
+            "46": 0.1481,
+            "47": 0.14794,
+            "48": 0.14884,
+            "49": 0.1478,
+            "50": 0.14737,
+            "51": 0.15947,
+            "52": 0.15469,
+            "53": 0.15082,
+            "54": 0.15106,
+            "55": 0.15266,
+            "56": 0.15055,
+            "57": 0.15141,
+            "58": 0.15117,
+            "59": 0.15229,
+            "60": 0.15163,
+            "61": 0.1511,
+            "62": 0.15177,
+            "63": 0.1513,
+            "64": 0.15114,
+            "65": 0.1506,
+            "66": 0.15109,
+            "67": 0.15009,
+            "68": 0.1507,
+            "69": 0.15042,
+            "70": 0.15201,
+            "71": 0.15105,
+            "72": 0.1509,
+            "73": 0.1504,
+            "74": 0.15078,
+            "75": 0.15053,
+            "76": 0.14994,
+            "77": 0.14987,
+            "78": 0.15076,
+            "79": 0.15058,
+            "80": 0.1508,
+            "81": 0.15114,
+            "82": 0.15016,
+            "83": 0.15085,
+            "84": 0.15149,
+            "85": 0.15054,
+            "86": 0.15154,
+            "87": 0.15001,
+            "88": 0.14995,
+            "89": 0.15097,
+            "90": 0.15063,
+            "91": 0.15144,
+            "92": 0.15033,
+            "93": 0.14991,
+            "94": 0.15161,
+            "95": 0.15125,
+            "96": 0.1519,
+            "97": 0.15146,
+            "98": 0.15186,
+            "99": 0.153,
+            "100": 0.15275
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": 2245.0,
+            "18": 2160.0,
+            "19": 2344.0,
+            "20": 1969.0,
+            "21": 1966.0,
+            "22": "nan",
+            "23": 2369.0,
+            "24": 1914.0,
+            "25": 1863.0,
+            "26": 1931.0,
+            "27": 2040.0,
+            "28": 2378.0,
+            "29": 2411.0,
+            "30": 2312.0,
+            "31": 1759.0,
+            "32": 2303.0,
+            "33": 2170.0,
+            "34": 1860.0,
+            "35": 2063.0,
+            "36": 2040.0,
+            "37": 2464.0,
+            "38": 2129.0,
+            "39": 2616.0,
+            "40": 2212.0,
+            "41": 2402.0,
+            "42": 2290.0,
+            "43": 2083.0,
+            "44": 2083.0,
+            "45": 2333.0,
+            "46": 1979.0,
+            "47": 2653.0,
+            "48": 2424.0,
+            "49": 1878.0,
+            "50": 2369.0,
+            "51": 2318.0,
+            "52": 2456.0,
+            "53": 2905.0,
+            "54": 2495.0,
+            "55": 2357.0,
+            "56": 2295.0,
+            "57": 2256.0,
+            "58": 2752.0,
+            "59": 2319.0,
+            "60": 2500.0,
+            "61": 2883.0,
+            "62": 2791.0,
+            "63": 2396.0,
+            "64": 2838.0,
+            "65": 2438.0,
+            "66": 2880.0,
+            "67": 2596.0,
+            "68": 2940.0,
+            "69": 2730.0,
+            "70": 3075.0,
+            "71": 2957.0,
+            "72": 2334.0,
+            "73": 2995.0,
+            "74": 2178.0,
+            "75": 2803.0,
+            "76": 3073.0,
+            "77": 3411.0,
+            "78": 3517.0,
+            "79": 3430.0,
+            "80": 3568.0,
+            "81": 3657.0,
+            "82": 3328.0,
+            "83": 3188.0,
+            "84": 3296.0,
+            "85": 3675.0,
+            "86": 3300.0,
+            "87": 3966.0,
+            "88": 3275.0,
+            "89": 3995.0,
+            "90": 3397.0,
+            "91": 2658.0,
+            "92": 3409.0,
+            "93": 3067.0,
+            "94": 3727.0,
+            "95": 3468.0,
+            "96": 3802.0,
+            "97": 3448.0,
+            "98": 3735.0,
+            "99": 3426.0,
+            "100": 3267.0
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_resume_torch_dist/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_resume_torch_dist/golden_values_lts_dgxa100_dracooci.json
new file mode 100644
index 00000000000..de5bb1034d5
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_resume_torch_dist/golden_values_lts_dgxa100_dracooci.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.8337,
+            "2": 10.83216,
+            "3": 10.83262,
+            "4": 10.80149,
+            "5": 10.85789,
+            "6": 10.86796,
+            "7": 10.84795,
+            "8": 10.84663,
+            "9": 10.86076,
+            "10": 10.81578,
+            "11": 10.89921,
+            "12": 10.88475,
+            "13": 10.89093,
+            "14": 10.9047,
+            "15": 10.84971,
+            "16": 10.86517,
+            "17": 10.85475,
+            "18": 10.8881,
+            "19": 10.87622,
+            "20": 10.85686,
+            "21": 10.85506,
+            "22": 10.79694,
+            "23": 10.88579,
+            "24": 10.8279,
+            "25": 10.81326,
+            "26": 10.82693,
+            "27": 10.846,
+            "28": 10.84147,
+            "29": 10.8522,
+            "30": 10.74663,
+            "31": 10.62679,
+            "32": 10.79112,
+            "33": 10.77171,
+            "34": 10.65521,
+            "35": 10.65647,
+            "36": 10.61755,
+            "37": 10.67472,
+            "38": 10.58181,
+            "39": 10.69126,
+            "40": 10.50351,
+            "41": 10.53015,
+            "42": 10.55529,
+            "43": 10.28638,
+            "44": 10.36341,
+            "45": 10.27258,
+            "46": 10.24593,
+            "47": 10.45076,
+            "48": 10.23738,
+            "49": 9.99756,
+            "50": 10.25445,
+            "51": 10.20109,
+            "52": 10.10787,
+            "53": 10.34615,
+            "54": 10.24765,
+            "55": 10.18699,
+            "56": 9.95445,
+            "57": 9.81113,
+            "58": 10.10718,
+            "59": 9.88656,
+            "60": 9.8098,
+            "61": 9.95021,
+            "62": 10.20123,
+            "63": 9.64325,
+            "64": 9.99571,
+            "65": 9.24409,
+            "66": 9.90919,
+            "67": 9.59742,
+            "68": 9.97199,
+            "69": 9.96262,
+            "70": 9.91024,
+            "71": 9.78581,
+            "72": 9.77311,
+            "73": 9.66157,
+            "74": 9.16191,
+            "75": 9.58173,
+            "76": 9.26165,
+            "77": 10.17527,
+            "78": 9.85663,
+            "79": 9.50663,
+            "80": 9.54167,
+            "81": 9.61305,
+            "82": 9.80599,
+            "83": 9.44744,
+            "84": 9.52725,
+            "85": 9.7262,
+            "86": 9.1912,
+            "87": 9.68768,
+            "88": 9.85199,
+            "89": 9.71342,
+            "90": 9.90242,
+            "91": 9.4603,
+            "92": 9.46187,
+            "93": 9.19485,
+            "94": 8.93416,
+            "95": 9.60208,
+            "96": 9.61859,
+            "97": 9.39629,
+            "98": 9.76032,
+            "99": 8.98677,
+            "100": 9.49424
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 684610560.0,
+            "2": 684610560.0,
+            "3": 684610560.0,
+            "4": 684610560.0,
+            "5": 684610560.0,
+            "6": 684610560.0,
+            "7": 684610560.0,
+            "8": 684610560.0,
+            "9": 684610560.0,
+            "10": 684610560.0,
+            "11": 684610560.0,
+            "12": 684610560.0,
+            "13": 684610560.0,
+            "14": 684610560.0,
+            "15": 684610560.0,
+            "16": 684610560.0,
+            "17": 1043027456.0,
+            "18": 1043027456.0,
+            "19": 1043027456.0,
+            "20": 1043027456.0,
+            "21": 1043027456.0,
+            "22": 1043027456.0,
+            "23": 1043027456.0,
+            "24": 1043027456.0,
+            "25": 1043027456.0,
+            "26": 1043027456.0,
+            "27": 1043027456.0,
+            "28": 1043027456.0,
+            "29": 1043027456.0,
+            "30": 1043027456.0,
+            "31": 1043027456.0,
+            "32": 1043027456.0,
+            "33": 1043027456.0,
+            "34": 1043027456.0,
+            "35": 1043027456.0,
+            "36": 1043027456.0,
+            "37": 1043027456.0,
+            "38": 1043027456.0,
+            "39": 1043027456.0,
+            "40": 1043027456.0,
+            "41": 1043027456.0,
+            "42": 1043027456.0,
+            "43": 1043027456.0,
+            "44": 1043027456.0,
+            "45": 1043027456.0,
+            "46": 1043027456.0,
+            "47": 1043027456.0,
+            "48": 1043027456.0,
+            "49": 1043027456.0,
+            "50": 1043027456.0,
+            "51": 1043027456.0,
+            "52": 1043027456.0,
+            "53": 1043027456.0,
+            "54": 1043027456.0,
+            "55": 1043027456.0,
+            "56": 1043027456.0,
+            "57": 1043027456.0,
+            "58": 1043027456.0,
+            "59": 1043027456.0,
+            "60": 1043027456.0,
+            "61": 1043027456.0,
+            "62": 1043027456.0,
+            "63": 1043027456.0,
+            "64": 1043027456.0,
+            "65": 1043027456.0,
+            "66": 1043027456.0,
+            "67": 1043027456.0,
+            "68": 1043027456.0,
+            "69": 1043027456.0,
+            "70": 1043027456.0,
+            "71": 1043027456.0,
+            "72": 1043027456.0,
+            "73": 1043027456.0,
+            "74": 1043027456.0,
+            "75": 1043027456.0,
+            "76": 1043027456.0,
+            "77": 1043027456.0,
+            "78": 1043027456.0,
+            "79": 1043027456.0,
+            "80": 1043027456.0,
+            "81": 1043027456.0,
+            "82": 1043027456.0,
+            "83": 1043027456.0,
+            "84": 1043027456.0,
+            "85": 1043027456.0,
+            "86": 1043027456.0,
+            "87": 1043027456.0,
+            "88": 1043027456.0,
+            "89": 1043027456.0,
+            "90": 1043027456.0,
+            "91": 1043027456.0,
+            "92": 1043027456.0,
+            "93": 1043027456.0,
+            "94": 1043027456.0,
+            "95": 1043027456.0,
+            "96": 1043027456.0,
+            "97": 1043027456.0,
+            "98": 1043027456.0,
+            "99": 1043027456.0,
+            "100": 1043027456.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 3187304960.0,
+            "2": 3187305472.0,
+            "3": 3187305472.0,
+            "4": 3187305472.0,
+            "5": 3187305472.0,
+            "6": 3187305472.0,
+            "7": 3187305472.0,
+            "8": 3187305472.0,
+            "9": 3187305472.0,
+            "10": 3187305472.0,
+            "11": 3187305472.0,
+            "12": 3187305472.0,
+            "13": 3187305472.0,
+            "14": 3187305472.0,
+            "15": 3187305472.0,
+            "16": 3187305472.0,
+            "17": 3187305472.0,
+            "18": 3547033088.0,
+            "19": 3547033088.0,
+            "20": 3547033088.0,
+            "21": 3547033088.0,
+            "22": 3547033088.0,
+            "23": 3547033088.0,
+            "24": 3547033088.0,
+            "25": 3547033088.0,
+            "26": 3547033088.0,
+            "27": 3547033088.0,
+            "28": 3547033088.0,
+            "29": 3547033088.0,
+            "30": 3547033088.0,
+            "31": 3547033088.0,
+            "32": 3547033088.0,
+            "33": 3547033088.0,
+            "34": 3547033088.0,
+            "35": 3547033088.0,
+            "36": 3547033088.0,
+            "37": 3547033088.0,
+            "38": 3547033088.0,
+            "39": 3547033088.0,
+            "40": 3547033088.0,
+            "41": 3547033088.0,
+            "42": 3547033088.0,
+            "43": 3547033088.0,
+            "44": 3547033088.0,
+            "45": 3547033088.0,
+            "46": 3547033088.0,
+            "47": 3547033088.0,
+            "48": 3547033088.0,
+            "49": 3547033088.0,
+            "50": 3547033088.0,
+            "51": 3547033088.0,
+            "52": 3547033088.0,
+            "53": 3547033088.0,
+            "54": 3547033088.0,
+            "55": 3547033088.0,
+            "56": 3547033088.0,
+            "57": 3547033088.0,
+            "58": 3547033088.0,
+            "59": 3547033088.0,
+            "60": 3547033088.0,
+            "61": 3547033088.0,
+            "62": 3547033088.0,
+            "63": 3547033088.0,
+            "64": 3547033088.0,
+            "65": 3547033088.0,
+            "66": 3547033088.0,
+            "67": 3547033088.0,
+            "68": 3547033088.0,
+            "69": 3547033088.0,
+            "70": 3547033088.0,
+            "71": 3547033088.0,
+            "72": 3547033088.0,
+            "73": 3547033088.0,
+            "74": 3547033088.0,
+            "75": 3547033088.0,
+            "76": 3547033088.0,
+            "77": 3547033088.0,
+            "78": 3547033088.0,
+            "79": 3547033088.0,
+            "80": 3547033088.0,
+            "81": 3547033088.0,
+            "82": 3547033088.0,
+            "83": 3547033088.0,
+            "84": 3547033088.0,
+            "85": 3547033088.0,
+            "86": 3547033088.0,
+            "87": 3547033088.0,
+            "88": 3547033088.0,
+            "89": 3547033088.0,
+            "90": 3547033088.0,
+            "91": 3547033088.0,
+            "92": 3547033088.0,
+            "93": 3547033088.0,
+            "94": 3547033088.0,
+            "95": 3547033088.0,
+            "96": 3547033088.0,
+            "97": 3547033088.0,
+            "98": 3547033088.0,
+            "99": 3547033088.0,
+            "100": 3547033088.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 13.07277,
+            "2": 0.17981,
+            "3": 0.14386,
+            "4": 0.1435,
+            "5": 0.14361,
+            "6": 0.14398,
+            "7": 0.14414,
+            "8": 0.14134,
+            "9": 0.14066,
+            "10": 0.14194,
+            "11": 0.14352,
+            "12": 0.14166,
+            "13": 0.14151,
+            "14": 0.1412,
+            "15": 0.14002,
+            "16": 0.13993,
+            "17": 0.30867,
+            "18": 0.15579,
+            "19": 0.15102,
+            "20": 0.15133,
+            "21": 0.14959,
+            "22": 0.14048,
+            "23": 0.14802,
+            "24": 0.14897,
+            "25": 0.14939,
+            "26": 0.14898,
+            "27": 0.14842,
+            "28": 0.14823,
+            "29": 0.14857,
+            "30": 0.14925,
+            "31": 0.15012,
+            "32": 0.14855,
+            "33": 0.14814,
+            "34": 0.14919,
+            "35": 0.14741,
+            "36": 0.14744,
+            "37": 0.14683,
+            "38": 0.14765,
+            "39": 0.14761,
+            "40": 0.14793,
+            "41": 0.1474,
+            "42": 0.14696,
+            "43": 0.1474,
+            "44": 0.14654,
+            "45": 0.14791,
+            "46": 0.14781,
+            "47": 0.14668,
+            "48": 0.14704,
+            "49": 0.14651,
+            "50": 0.14572,
+            "51": 0.15362,
+            "52": 0.14601,
+            "53": 0.14563,
+            "54": 0.14741,
+            "55": 0.14637,
+            "56": 0.14559,
+            "57": 0.14652,
+            "58": 0.14699,
+            "59": 0.14779,
+            "60": 0.1462,
+            "61": 0.14772,
+            "62": 0.14661,
+            "63": 0.14845,
+            "64": 0.14671,
+            "65": 0.1482,
+            "66": 0.14822,
+            "67": 0.14825,
+            "68": 0.14639,
+            "69": 0.15372,
+            "70": 0.14987,
+            "71": 0.15493,
+            "72": 0.1481,
+            "73": 0.15538,
+            "74": 0.14975,
+            "75": 0.15142,
+            "76": 0.15038,
+            "77": 0.15289,
+            "78": 0.14615,
+            "79": 0.14637,
+            "80": 0.14753,
+            "81": 0.14757,
+            "82": 0.14613,
+            "83": 0.14695,
+            "84": 0.14643,
+            "85": 0.14587,
+            "86": 0.15058,
+            "87": 0.14782,
+            "88": 0.1457,
+            "89": 0.14638,
+            "90": 0.14656,
+            "91": 0.14569,
+            "92": 0.14658,
+            "93": 0.14636,
+            "94": 0.14616,
+            "95": 0.14633,
+            "96": 0.14546,
+            "97": 0.14634,
+            "98": 0.14579,
+            "99": 0.14537,
+            "100": 0.14711
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": 2245.0,
+            "18": 2160.0,
+            "19": 2344.0,
+            "20": 1969.0,
+            "21": 1966.0,
+            "22": "nan",
+            "23": 2369.0,
+            "24": 1914.0,
+            "25": 1863.0,
+            "26": 1931.0,
+            "27": 2040.0,
+            "28": 2378.0,
+            "29": 2411.0,
+            "30": 2312.0,
+            "31": 1759.0,
+            "32": 2303.0,
+            "33": 2170.0,
+            "34": 1860.0,
+            "35": 2063.0,
+            "36": 2040.0,
+            "37": 2464.0,
+            "38": 2129.0,
+            "39": 2616.0,
+            "40": 2212.0,
+            "41": 2402.0,
+            "42": 2290.0,
+            "43": 2083.0,
+            "44": 2083.0,
+            "45": 2333.0,
+            "46": 1979.0,
+            "47": 2653.0,
+            "48": 2424.0,
+            "49": 1878.0,
+            "50": 2369.0,
+            "51": 2318.0,
+            "52": 2456.0,
+            "53": 2905.0,
+            "54": 2495.0,
+            "55": 2357.0,
+            "56": 2295.0,
+            "57": 2256.0,
+            "58": 2752.0,
+            "59": 2319.0,
+            "60": 2500.0,
+            "61": 2883.0,
+            "62": 2791.0,
+            "63": 2396.0,
+            "64": 2838.0,
+            "65": 2438.0,
+            "66": 2880.0,
+            "67": 2596.0,
+            "68": 2940.0,
+            "69": 2730.0,
+            "70": 3075.0,
+            "71": 2957.0,
+            "72": 2334.0,
+            "73": 2995.0,
+            "74": 2178.0,
+            "75": 2803.0,
+            "76": 3073.0,
+            "77": 3411.0,
+            "78": 3517.0,
+            "79": 3430.0,
+            "80": 3568.0,
+            "81": 3657.0,
+            "82": 3328.0,
+            "83": 3188.0,
+            "84": 3296.0,
+            "85": 3675.0,
+            "86": 3300.0,
+            "87": 3966.0,
+            "88": 3275.0,
+            "89": 3995.0,
+            "90": 3397.0,
+            "91": 2658.0,
+            "92": 3409.0,
+            "93": 3067.0,
+            "94": 3727.0,
+            "95": 3468.0,
+            "96": 3802.0,
+            "97": 3448.0,
+            "98": 3735.0,
+            "99": 3426.0,
+            "100": 3267.0
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4/golden_values_dev_dgx_h100.json
index 66d41feb78a..6a5be6c0d9c 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4/golden_values_dev_dgx_h100.json
@@ -2,91 +2,286 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 10.85678,
+            "2": 10.86405,
+            "3": 10.86854,
+            "4": 10.85128,
             "5": 10.88398,
+            "6": 10.89024,
+            "7": 10.86645,
+            "8": 10.86924,
+            "9": 10.87305,
             "10": 10.84079,
+            "11": 10.87928,
+            "12": 10.8729,
+            "13": 10.87791,
+            "14": 10.8901,
             "15": 10.82504,
+            "16": 10.8296,
+            "17": 10.80874,
+            "18": 10.8116,
+            "19": 10.81543,
             "20": 10.71912,
+            "21": 10.70404,
+            "22": 10.56645,
+            "23": 10.71858,
+            "24": 10.60989,
             "25": 10.55479,
+            "26": 10.60874,
+            "27": 10.62302,
+            "28": 10.56954,
+            "29": 10.57966,
             "30": 10.35998,
+            "31": 10.11311,
+            "32": 10.46587,
+            "33": 10.45154,
+            "34": 10.20826,
             "35": 10.26937,
+            "36": 10.21924,
+            "37": 10.33852,
+            "38": 10.186,
+            "39": 10.3997,
             "40": 10.08396,
+            "41": 10.13418,
+            "42": 10.20887,
+            "43": 9.82537,
+            "44": 9.95906,
             "45": 9.82563,
+            "46": 9.80623,
+            "47": 10.13499,
+            "48": 9.84002,
+            "49": 9.52482,
             "50": 9.90725
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 1654.0,
+            "2": 1646.0,
+            "3": 1565.0,
+            "4": 1759.0,
             "5": 1860.0,
+            "6": 1741.0,
+            "7": 1752.0,
+            "8": 1579.0,
+            "9": 1849.0,
             "10": 1317.0,
+            "11": 1901.0,
+            "12": 1702.0,
+            "13": 1872.0,
+            "14": 1781.0,
             "15": 1759.0,
+            "16": 1820.0,
+            "17": 1819.0,
+            "18": 1721.0,
+            "19": 1828.0,
             "20": 1730.0,
+            "21": 1935.0,
+            "22": 1764.0,
+            "23": 1962.0,
+            "24": 1564.0,
             "25": 1552.0,
+            "26": 1668.0,
+            "27": 1803.0,
+            "28": 1988.0,
+            "29": 1966.0,
             "30": 1895.0,
+            "31": 1532.0,
+            "32": 1866.0,
+            "33": 2026.0,
+            "34": 1906.0,
             "35": 1987.0,
+            "36": 1863.0,
+            "37": 2231.0,
+            "38": 2109.0,
+            "39": 2277.0,
             "40": 2099.0,
+            "41": 2209.0,
+            "42": 2227.0,
+            "43": 1913.0,
+            "44": 2129.0,
             "45": 1993.0,
+            "46": 2288.0,
+            "47": 2458.0,
+            "48": 2418.0,
+            "49": 2155.0,
             "50": 2085.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 777900032.0,
+            "2": 777900032.0,
+            "3": 777900032.0,
+            "4": 777900032.0,
             "5": 777900032.0,
+            "6": 777900032.0,
+            "7": 777900032.0,
+            "8": 777900032.0,
+            "9": 777900032.0,
             "10": 777900032.0,
+            "11": 777900032.0,
+            "12": 777900032.0,
+            "13": 777900032.0,
+            "14": 777900032.0,
             "15": 777900032.0,
+            "16": 777900032.0,
+            "17": 777900032.0,
+            "18": 777900032.0,
+            "19": 777900032.0,
             "20": 777900032.0,
+            "21": 777900032.0,
+            "22": 777900032.0,
+            "23": 777900032.0,
+            "24": 777900032.0,
             "25": 777900032.0,
+            "26": 777900032.0,
+            "27": 777900032.0,
+            "28": 777900032.0,
+            "29": 777900032.0,
             "30": 777900032.0,
+            "31": 777900032.0,
+            "32": 777900032.0,
+            "33": 777900032.0,
+            "34": 777900032.0,
             "35": 777900032.0,
+            "36": 777900032.0,
+            "37": 777900032.0,
+            "38": 777900032.0,
+            "39": 777900032.0,
             "40": 777900032.0,
+            "41": 777900032.0,
+            "42": 777900032.0,
+            "43": 777900032.0,
+            "44": 777900032.0,
             "45": 777900032.0,
+            "46": 777900032.0,
+            "47": 777900032.0,
+            "48": 777900032.0,
+            "49": 777900032.0,
             "50": 777900032.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 2463815680.0,
+            "2": 2744478720.0,
+            "3": 2744478720.0,
+            "4": 2744478720.0,
             "5": 2744478720.0,
+            "6": 2744478720.0,
+            "7": 2744478720.0,
+            "8": 2744478720.0,
+            "9": 2744478720.0,
             "10": 2744478720.0,
+            "11": 2744478720.0,
+            "12": 2744478720.0,
+            "13": 2744478720.0,
+            "14": 2744478720.0,
             "15": 2744478720.0,
+            "16": 2744478720.0,
+            "17": 2744478720.0,
+            "18": 2744478720.0,
+            "19": 2744478720.0,
             "20": 2744478720.0,
+            "21": 2744478720.0,
+            "22": 2744478720.0,
+            "23": 2744478720.0,
+            "24": 2744478720.0,
             "25": 2744478720.0,
+            "26": 2744478720.0,
+            "27": 2744478720.0,
+            "28": 2744478720.0,
+            "29": 2744478720.0,
             "30": 2744478720.0,
+            "31": 2744478720.0,
+            "32": 2744478720.0,
+            "33": 2744478720.0,
+            "34": 2744478720.0,
             "35": 2744478720.0,
+            "36": 2744478720.0,
+            "37": 2744478720.0,
+            "38": 2744478720.0,
+            "39": 2744478720.0,
             "40": 2744478720.0,
+            "41": 2744478720.0,
+            "42": 2744478720.0,
+            "43": 2744478720.0,
+            "44": 2744478720.0,
             "45": 2744478720.0,
+            "46": 2744478720.0,
+            "47": 2744478720.0,
+            "48": 2744478720.0,
+            "49": 2744478720.0,
             "50": 2744478720.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 12.50471,
-            "5": 0.10661,
-            "10": 0.10734,
-            "15": 0.1053,
-            "20": 0.10696,
-            "25": 0.10794,
-            "30": 0.10635,
-            "35": 0.10713,
-            "40": 0.10333,
-            "45": 0.10618,
-            "50": 0.10738
+            "1": 11.05472,
+            "2": 0.1429,
+            "3": 0.12828,
+            "4": 0.12976,
+            "5": 0.12969,
+            "6": 0.12181,
+            "7": 0.12512,
+            "8": 0.12267,
+            "9": 0.12362,
+            "10": 0.12382,
+            "11": 0.1219,
+            "12": 0.12295,
+            "13": 0.12406,
+            "14": 0.12396,
+            "15": 0.12483,
+            "16": 0.12596,
+            "17": 0.12252,
+            "18": 0.12284,
+            "19": 0.12465,
+            "20": 0.12674,
+            "21": 0.12398,
+            "22": 0.12376,
+            "23": 0.12244,
+            "24": 0.12641,
+            "25": 0.1234,
+            "26": 0.12355,
+            "27": 0.12183,
+            "28": 0.12355,
+            "29": 0.12372,
+            "30": 0.12258,
+            "31": 0.1231,
+            "32": 0.12444,
+            "33": 0.12266,
+            "34": 0.12208,
+            "35": 0.12181,
+            "36": 0.12028,
+            "37": 0.12298,
+            "38": 0.1214,
+            "39": 0.12242,
+            "40": 0.12058,
+            "41": 0.12169,
+            "42": 0.1223,
+            "43": 0.1221,
+            "44": 0.12176,
+            "45": 0.12039,
+            "46": 0.12206,
+            "47": 0.12138,
+            "48": 0.12715,
+            "49": 0.12339,
+            "50": 0.12175
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..e8f7325e5f3
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.85678,
+            "2": 10.86405,
+            "3": 10.86854,
+            "4": 10.85128,
+            "5": 10.88398,
+            "6": 10.89024,
+            "7": 10.86645,
+            "8": 10.86924,
+            "9": 10.87305,
+            "10": 10.84079,
+            "11": 10.87928,
+            "12": 10.8729,
+            "13": 10.87791,
+            "14": 10.8901,
+            "15": 10.82504,
+            "16": 10.8296,
+            "17": 10.80874,
+            "18": 10.8116,
+            "19": 10.81543,
+            "20": 10.71912,
+            "21": 10.70404,
+            "22": 10.56645,
+            "23": 10.71858,
+            "24": 10.60989,
+            "25": 10.55479,
+            "26": 10.60874,
+            "27": 10.62302,
+            "28": 10.56954,
+            "29": 10.57966,
+            "30": 10.35998,
+            "31": 10.11311,
+            "32": 10.46587,
+            "33": 10.45154,
+            "34": 10.20826,
+            "35": 10.26937,
+            "36": 10.21924,
+            "37": 10.33852,
+            "38": 10.186,
+            "39": 10.3997,
+            "40": 10.08396,
+            "41": 10.13418,
+            "42": 10.20887,
+            "43": 9.82537,
+            "44": 9.95906,
+            "45": 9.82563,
+            "46": 9.80623,
+            "47": 10.13499,
+            "48": 9.84002,
+            "49": 9.52482,
+            "50": 9.90725
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1654.0,
+            "2": 1646.0,
+            "3": 1565.0,
+            "4": 1759.0,
+            "5": 1860.0,
+            "6": 1741.0,
+            "7": 1752.0,
+            "8": 1579.0,
+            "9": 1849.0,
+            "10": 1317.0,
+            "11": 1901.0,
+            "12": 1702.0,
+            "13": 1872.0,
+            "14": 1781.0,
+            "15": 1759.0,
+            "16": 1820.0,
+            "17": 1819.0,
+            "18": 1721.0,
+            "19": 1828.0,
+            "20": 1730.0,
+            "21": 1935.0,
+            "22": 1764.0,
+            "23": 1962.0,
+            "24": 1564.0,
+            "25": 1552.0,
+            "26": 1668.0,
+            "27": 1803.0,
+            "28": 1988.0,
+            "29": 1966.0,
+            "30": 1895.0,
+            "31": 1532.0,
+            "32": 1866.0,
+            "33": 2026.0,
+            "34": 1906.0,
+            "35": 1987.0,
+            "36": 1863.0,
+            "37": 2231.0,
+            "38": 2109.0,
+            "39": 2277.0,
+            "40": 2099.0,
+            "41": 2209.0,
+            "42": 2227.0,
+            "43": 1913.0,
+            "44": 2129.0,
+            "45": 1993.0,
+            "46": 2288.0,
+            "47": 2458.0,
+            "48": 2418.0,
+            "49": 2155.0,
+            "50": 2085.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 777900032.0,
+            "2": 777900032.0,
+            "3": 777900032.0,
+            "4": 777900032.0,
+            "5": 777900032.0,
+            "6": 777900032.0,
+            "7": 777900032.0,
+            "8": 777900032.0,
+            "9": 777900032.0,
+            "10": 777900032.0,
+            "11": 777900032.0,
+            "12": 777900032.0,
+            "13": 777900032.0,
+            "14": 777900032.0,
+            "15": 777900032.0,
+            "16": 777900032.0,
+            "17": 777900032.0,
+            "18": 777900032.0,
+            "19": 777900032.0,
+            "20": 777900032.0,
+            "21": 777900032.0,
+            "22": 777900032.0,
+            "23": 777900032.0,
+            "24": 777900032.0,
+            "25": 777900032.0,
+            "26": 777900032.0,
+            "27": 777900032.0,
+            "28": 777900032.0,
+            "29": 777900032.0,
+            "30": 777900032.0,
+            "31": 777900032.0,
+            "32": 777900032.0,
+            "33": 777900032.0,
+            "34": 777900032.0,
+            "35": 777900032.0,
+            "36": 777900032.0,
+            "37": 777900032.0,
+            "38": 777900032.0,
+            "39": 777900032.0,
+            "40": 777900032.0,
+            "41": 777900032.0,
+            "42": 777900032.0,
+            "43": 777900032.0,
+            "44": 777900032.0,
+            "45": 777900032.0,
+            "46": 777900032.0,
+            "47": 777900032.0,
+            "48": 777900032.0,
+            "49": 777900032.0,
+            "50": 777900032.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 2463815680.0,
+            "2": 2744478720.0,
+            "3": 2744478720.0,
+            "4": 2744478720.0,
+            "5": 2744478720.0,
+            "6": 2744478720.0,
+            "7": 2744478720.0,
+            "8": 2744478720.0,
+            "9": 2744478720.0,
+            "10": 2744478720.0,
+            "11": 2744478720.0,
+            "12": 2744478720.0,
+            "13": 2744478720.0,
+            "14": 2744478720.0,
+            "15": 2744478720.0,
+            "16": 2744478720.0,
+            "17": 2744478720.0,
+            "18": 2744478720.0,
+            "19": 2744478720.0,
+            "20": 2744478720.0,
+            "21": 2744478720.0,
+            "22": 2744478720.0,
+            "23": 2744478720.0,
+            "24": 2744478720.0,
+            "25": 2744478720.0,
+            "26": 2744478720.0,
+            "27": 2744478720.0,
+            "28": 2744478720.0,
+            "29": 2744478720.0,
+            "30": 2744478720.0,
+            "31": 2744478720.0,
+            "32": 2744478720.0,
+            "33": 2744478720.0,
+            "34": 2744478720.0,
+            "35": 2744478720.0,
+            "36": 2744478720.0,
+            "37": 2744478720.0,
+            "38": 2744478720.0,
+            "39": 2744478720.0,
+            "40": 2744478720.0,
+            "41": 2744478720.0,
+            "42": 2744478720.0,
+            "43": 2744478720.0,
+            "44": 2744478720.0,
+            "45": 2744478720.0,
+            "46": 2744478720.0,
+            "47": 2744478720.0,
+            "48": 2744478720.0,
+            "49": 2744478720.0,
+            "50": 2744478720.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.35419,
+            "2": 0.13991,
+            "3": 0.10767,
+            "4": 0.10938,
+            "5": 0.10724,
+            "6": 0.10478,
+            "7": 0.10552,
+            "8": 0.10656,
+            "9": 0.10556,
+            "10": 0.10532,
+            "11": 0.10534,
+            "12": 0.10534,
+            "13": 0.10527,
+            "14": 0.10709,
+            "15": 0.10495,
+            "16": 0.10604,
+            "17": 0.10965,
+            "18": 0.1088,
+            "19": 0.1041,
+            "20": 0.10506,
+            "21": 0.1048,
+            "22": 0.10602,
+            "23": 0.10565,
+            "24": 0.1054,
+            "25": 0.10522,
+            "26": 0.10463,
+            "27": 0.10589,
+            "28": 0.10459,
+            "29": 0.10668,
+            "30": 0.10356,
+            "31": 0.10981,
+            "32": 0.10384,
+            "33": 0.1044,
+            "34": 0.10384,
+            "35": 0.10498,
+            "36": 0.10335,
+            "37": 0.10417,
+            "38": 0.10399,
+            "39": 0.10546,
+            "40": 0.10397,
+            "41": 0.10485,
+            "42": 0.104,
+            "43": 0.10561,
+            "44": 0.10556,
+            "45": 0.10548,
+            "46": 0.10502,
+            "47": 0.10566,
+            "48": 0.10496,
+            "49": 0.1064,
+            "50": 0.10702
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..5517997e6c1
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.85678,
+            "2": 10.86405,
+            "3": 10.86854,
+            "4": 10.85128,
+            "5": 10.88398,
+            "6": 10.89024,
+            "7": 10.86645,
+            "8": 10.86924,
+            "9": 10.87305,
+            "10": 10.84079,
+            "11": 10.87928,
+            "12": 10.8729,
+            "13": 10.87791,
+            "14": 10.8901,
+            "15": 10.82504,
+            "16": 10.8296,
+            "17": 10.80874,
+            "18": 10.8116,
+            "19": 10.81543,
+            "20": 10.71912,
+            "21": 10.70404,
+            "22": 10.56645,
+            "23": 10.71858,
+            "24": 10.60989,
+            "25": 10.55479,
+            "26": 10.60874,
+            "27": 10.62302,
+            "28": 10.56954,
+            "29": 10.57966,
+            "30": 10.35998,
+            "31": 10.11311,
+            "32": 10.46587,
+            "33": 10.45154,
+            "34": 10.20826,
+            "35": 10.26937,
+            "36": 10.21924,
+            "37": 10.33852,
+            "38": 10.186,
+            "39": 10.3997,
+            "40": 10.08396,
+            "41": 10.13418,
+            "42": 10.20887,
+            "43": 9.82537,
+            "44": 9.95906,
+            "45": 9.82563,
+            "46": 9.80623,
+            "47": 10.13499,
+            "48": 9.84002,
+            "49": 9.52482,
+            "50": 9.90725
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1654.0,
+            "2": 1646.0,
+            "3": 1565.0,
+            "4": 1759.0,
+            "5": 1860.0,
+            "6": 1741.0,
+            "7": 1752.0,
+            "8": 1579.0,
+            "9": 1849.0,
+            "10": 1317.0,
+            "11": 1901.0,
+            "12": 1702.0,
+            "13": 1872.0,
+            "14": 1781.0,
+            "15": 1759.0,
+            "16": 1820.0,
+            "17": 1819.0,
+            "18": 1721.0,
+            "19": 1828.0,
+            "20": 1730.0,
+            "21": 1935.0,
+            "22": 1764.0,
+            "23": 1962.0,
+            "24": 1564.0,
+            "25": 1552.0,
+            "26": 1668.0,
+            "27": 1803.0,
+            "28": 1988.0,
+            "29": 1966.0,
+            "30": 1895.0,
+            "31": 1532.0,
+            "32": 1866.0,
+            "33": 2026.0,
+            "34": 1906.0,
+            "35": 1987.0,
+            "36": 1863.0,
+            "37": 2231.0,
+            "38": 2109.0,
+            "39": 2277.0,
+            "40": 2099.0,
+            "41": 2209.0,
+            "42": 2227.0,
+            "43": 1913.0,
+            "44": 2129.0,
+            "45": 1993.0,
+            "46": 2288.0,
+            "47": 2458.0,
+            "48": 2418.0,
+            "49": 2155.0,
+            "50": 2085.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 777900032.0,
+            "2": 777900032.0,
+            "3": 777900032.0,
+            "4": 777900032.0,
+            "5": 777900032.0,
+            "6": 777900032.0,
+            "7": 777900032.0,
+            "8": 777900032.0,
+            "9": 777900032.0,
+            "10": 777900032.0,
+            "11": 777900032.0,
+            "12": 777900032.0,
+            "13": 777900032.0,
+            "14": 777900032.0,
+            "15": 777900032.0,
+            "16": 777900032.0,
+            "17": 777900032.0,
+            "18": 777900032.0,
+            "19": 777900032.0,
+            "20": 777900032.0,
+            "21": 777900032.0,
+            "22": 777900032.0,
+            "23": 777900032.0,
+            "24": 777900032.0,
+            "25": 777900032.0,
+            "26": 777900032.0,
+            "27": 777900032.0,
+            "28": 777900032.0,
+            "29": 777900032.0,
+            "30": 777900032.0,
+            "31": 777900032.0,
+            "32": 777900032.0,
+            "33": 777900032.0,
+            "34": 777900032.0,
+            "35": 777900032.0,
+            "36": 777900032.0,
+            "37": 777900032.0,
+            "38": 777900032.0,
+            "39": 777900032.0,
+            "40": 777900032.0,
+            "41": 777900032.0,
+            "42": 777900032.0,
+            "43": 777900032.0,
+            "44": 777900032.0,
+            "45": 777900032.0,
+            "46": 777900032.0,
+            "47": 777900032.0,
+            "48": 777900032.0,
+            "49": 777900032.0,
+            "50": 777900032.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 2463815680.0,
+            "2": 2744478720.0,
+            "3": 2744478720.0,
+            "4": 2744478720.0,
+            "5": 2744478720.0,
+            "6": 2744478720.0,
+            "7": 2744478720.0,
+            "8": 2744478720.0,
+            "9": 2744478720.0,
+            "10": 2744478720.0,
+            "11": 2744478720.0,
+            "12": 2744478720.0,
+            "13": 2744478720.0,
+            "14": 2744478720.0,
+            "15": 2744478720.0,
+            "16": 2744478720.0,
+            "17": 2744478720.0,
+            "18": 2744478720.0,
+            "19": 2744478720.0,
+            "20": 2744478720.0,
+            "21": 2744478720.0,
+            "22": 2744478720.0,
+            "23": 2744478720.0,
+            "24": 2744478720.0,
+            "25": 2744478720.0,
+            "26": 2744478720.0,
+            "27": 2744478720.0,
+            "28": 2744478720.0,
+            "29": 2744478720.0,
+            "30": 2744478720.0,
+            "31": 2744478720.0,
+            "32": 2744478720.0,
+            "33": 2744478720.0,
+            "34": 2744478720.0,
+            "35": 2744478720.0,
+            "36": 2744478720.0,
+            "37": 2744478720.0,
+            "38": 2744478720.0,
+            "39": 2744478720.0,
+            "40": 2744478720.0,
+            "41": 2744478720.0,
+            "42": 2744478720.0,
+            "43": 2744478720.0,
+            "44": 2744478720.0,
+            "45": 2744478720.0,
+            "46": 2744478720.0,
+            "47": 2744478720.0,
+            "48": 2744478720.0,
+            "49": 2744478720.0,
+            "50": 2744478720.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 11.34716,
+            "2": 0.14227,
+            "3": 0.12689,
+            "4": 0.13008,
+            "5": 0.12281,
+            "6": 0.12008,
+            "7": 0.11926,
+            "8": 0.11756,
+            "9": 0.11844,
+            "10": 0.11959,
+            "11": 0.11763,
+            "12": 0.11828,
+            "13": 0.11955,
+            "14": 0.11929,
+            "15": 0.11867,
+            "16": 0.11859,
+            "17": 0.12095,
+            "18": 0.11695,
+            "19": 0.11774,
+            "20": 0.11863,
+            "21": 0.11942,
+            "22": 0.12117,
+            "23": 0.11884,
+            "24": 0.12003,
+            "25": 0.11915,
+            "26": 0.11977,
+            "27": 0.11816,
+            "28": 0.12705,
+            "29": 0.11815,
+            "30": 0.12166,
+            "31": 0.12023,
+            "32": 0.12154,
+            "33": 0.12781,
+            "34": 0.12209,
+            "35": 0.12372,
+            "36": 0.12109,
+            "37": 0.11897,
+            "38": 0.12385,
+            "39": 0.11961,
+            "40": 0.11846,
+            "41": 0.11902,
+            "42": 0.11915,
+            "43": 0.12286,
+            "44": 0.11759,
+            "45": 0.11912,
+            "46": 0.1204,
+            "47": 0.12027,
+            "48": 0.12073,
+            "49": 0.1164,
+            "50": 0.11734
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4/golden_values_lts_dgxa100_dracooci-ord.json
new file mode 100644
index 00000000000..e6214f74d31
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4/golden_values_lts_dgxa100_dracooci-ord.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.79203,
+            "2": 10.80273,
+            "3": 10.80585,
+            "4": 10.77187,
+            "5": 10.84593,
+            "6": 10.86693,
+            "7": 10.82551,
+            "8": 10.81215,
+            "9": 10.83332,
+            "10": 10.76951,
+            "11": 10.89062,
+            "12": 10.84504,
+            "13": 10.85859,
+            "14": 10.8801,
+            "15": 10.78971,
+            "16": 10.78188,
+            "17": 10.75787,
+            "18": 10.79172,
+            "19": 10.79529,
+            "20": 10.67886,
+            "21": 10.65973,
+            "22": 10.50045,
+            "23": 10.71219,
+            "24": 10.55058,
+            "25": 10.50431,
+            "26": 10.5802,
+            "27": 10.58378,
+            "28": 10.55688,
+            "29": 10.55907,
+            "30": 10.33089,
+            "31": 10.08209,
+            "32": 10.44504,
+            "33": 10.44161,
+            "34": 10.19769,
+            "35": 10.25278,
+            "36": 10.19158,
+            "37": 10.31839,
+            "38": 10.16293,
+            "39": 10.37474,
+            "40": 10.05241,
+            "41": 10.13501,
+            "42": 10.18884,
+            "43": 9.8066,
+            "44": 9.92658,
+            "45": 9.80259,
+            "46": 9.81165,
+            "47": 10.12682,
+            "48": 9.8236,
+            "49": 9.51061,
+            "50": 9.88804
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1649.0,
+            "2": 1710.0,
+            "3": 1754.0,
+            "4": 1850.0,
+            "5": 1890.0,
+            "6": 1767.0,
+            "7": 1830.0,
+            "8": 1723.0,
+            "9": 1758.0,
+            "10": 1397.0,
+            "11": 1890.0,
+            "12": 1657.0,
+            "13": 1761.0,
+            "14": 1813.0,
+            "15": 1928.0,
+            "16": 1828.0,
+            "17": 1933.0,
+            "18": 1633.0,
+            "19": 1777.0,
+            "20": 1565.0,
+            "21": 1807.0,
+            "22": 1678.0,
+            "23": 2014.0,
+            "24": 1766.0,
+            "25": 1699.0,
+            "26": 1741.0,
+            "27": 1800.0,
+            "28": 1937.0,
+            "29": 1921.0,
+            "30": 1943.0,
+            "31": 1527.0,
+            "32": 1848.0,
+            "33": 2144.0,
+            "34": 1925.0,
+            "35": 2018.0,
+            "36": 1937.0,
+            "37": 2297.0,
+            "38": 2214.0,
+            "39": 2374.0,
+            "40": 2191.0,
+            "41": 2369.0,
+            "42": 2299.0,
+            "43": 1963.0,
+            "44": 2146.0,
+            "45": 2207.0,
+            "46": 2332.0,
+            "47": 2590.0,
+            "48": 2428.0,
+            "49": 2255.0,
+            "50": 2362.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 782094336.0,
+            "2": 782094336.0,
+            "3": 782094336.0,
+            "4": 782094336.0,
+            "5": 782094336.0,
+            "6": 782094336.0,
+            "7": 782094336.0,
+            "8": 782094336.0,
+            "9": 782094336.0,
+            "10": 782094336.0,
+            "11": 782094336.0,
+            "12": 782094336.0,
+            "13": 782094336.0,
+            "14": 782094336.0,
+            "15": 782094336.0,
+            "16": 782094336.0,
+            "17": 782094336.0,
+            "18": 782094336.0,
+            "19": 782094336.0,
+            "20": 782094336.0,
+            "21": 782094336.0,
+            "22": 782094336.0,
+            "23": 782094336.0,
+            "24": 782094336.0,
+            "25": 782094336.0,
+            "26": 782094336.0,
+            "27": 782094336.0,
+            "28": 782094336.0,
+            "29": 782094336.0,
+            "30": 782094336.0,
+            "31": 782094336.0,
+            "32": 782094336.0,
+            "33": 782094336.0,
+            "34": 782094336.0,
+            "35": 782094336.0,
+            "36": 782094336.0,
+            "37": 782094336.0,
+            "38": 782094336.0,
+            "39": 782094336.0,
+            "40": 782094336.0,
+            "41": 782094336.0,
+            "42": 782094336.0,
+            "43": 782094336.0,
+            "44": 782094336.0,
+            "45": 782094336.0,
+            "46": 782094336.0,
+            "47": 782094336.0,
+            "48": 782094336.0,
+            "49": 782094336.0,
+            "50": 782094336.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 2462767104.0,
+            "2": 2748673024.0,
+            "3": 2748673024.0,
+            "4": 2748673024.0,
+            "5": 2748673024.0,
+            "6": 2748673024.0,
+            "7": 2748673024.0,
+            "8": 2748673024.0,
+            "9": 2748673024.0,
+            "10": 2748673024.0,
+            "11": 2748673024.0,
+            "12": 2748673024.0,
+            "13": 2748673024.0,
+            "14": 2748673024.0,
+            "15": 2748673024.0,
+            "16": 2748673024.0,
+            "17": 2748673024.0,
+            "18": 2748673024.0,
+            "19": 2748673024.0,
+            "20": 2748673024.0,
+            "21": 2748673024.0,
+            "22": 2748673024.0,
+            "23": 2748673024.0,
+            "24": 2748673024.0,
+            "25": 2748673024.0,
+            "26": 2748673024.0,
+            "27": 2748673024.0,
+            "28": 2748673024.0,
+            "29": 2748673024.0,
+            "30": 2748673024.0,
+            "31": 2748673024.0,
+            "32": 2748673024.0,
+            "33": 2748673024.0,
+            "34": 2748673024.0,
+            "35": 2748673024.0,
+            "36": 2748673024.0,
+            "37": 2748673024.0,
+            "38": 2748673024.0,
+            "39": 2748673024.0,
+            "40": 2748673024.0,
+            "41": 2748673024.0,
+            "42": 2748673024.0,
+            "43": 2748673024.0,
+            "44": 2748673024.0,
+            "45": 2748673024.0,
+            "46": 2748673024.0,
+            "47": 2748673024.0,
+            "48": 2748673024.0,
+            "49": 2748673024.0,
+            "50": 2748673024.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 16.75952,
+            "2": 0.21448,
+            "3": 0.18235,
+            "4": 0.18003,
+            "5": 0.17893,
+            "6": 0.17927,
+            "7": 0.1794,
+            "8": 0.17993,
+            "9": 0.17782,
+            "10": 0.17913,
+            "11": 0.18107,
+            "12": 0.18068,
+            "13": 0.18061,
+            "14": 0.17963,
+            "15": 0.17853,
+            "16": 0.17955,
+            "17": 0.17969,
+            "18": 0.17916,
+            "19": 0.18341,
+            "20": 0.18099,
+            "21": 0.18071,
+            "22": 0.17995,
+            "23": 0.17926,
+            "24": 0.17948,
+            "25": 0.18014,
+            "26": 0.17924,
+            "27": 0.1802,
+            "28": 0.17909,
+            "29": 0.18091,
+            "30": 0.18001,
+            "31": 0.17868,
+            "32": 0.17758,
+            "33": 0.1779,
+            "34": 0.17881,
+            "35": 0.17826,
+            "36": 0.1779,
+            "37": 0.17715,
+            "38": 0.17751,
+            "39": 0.17819,
+            "40": 0.17892,
+            "41": 0.17948,
+            "42": 0.45058,
+            "43": 0.18152,
+            "44": 0.17768,
+            "45": 0.17817,
+            "46": 0.17937,
+            "47": 0.17662,
+            "48": 0.17804,
+            "49": 0.17764,
+            "50": 0.17626
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4/golden_values_lts_dgxa100_dracooci.json
new file mode 100644
index 00000000000..e0e25d127f8
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4/golden_values_lts_dgxa100_dracooci.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.79203,
+            "2": 10.80273,
+            "3": 10.80585,
+            "4": 10.77187,
+            "5": 10.84593,
+            "6": 10.86693,
+            "7": 10.82551,
+            "8": 10.81215,
+            "9": 10.83332,
+            "10": 10.76951,
+            "11": 10.89062,
+            "12": 10.84504,
+            "13": 10.85859,
+            "14": 10.8801,
+            "15": 10.78971,
+            "16": 10.78188,
+            "17": 10.75787,
+            "18": 10.79172,
+            "19": 10.79529,
+            "20": 10.67886,
+            "21": 10.65973,
+            "22": 10.50045,
+            "23": 10.71219,
+            "24": 10.55058,
+            "25": 10.50431,
+            "26": 10.5802,
+            "27": 10.58378,
+            "28": 10.55688,
+            "29": 10.55907,
+            "30": 10.33089,
+            "31": 10.08209,
+            "32": 10.44504,
+            "33": 10.44161,
+            "34": 10.19769,
+            "35": 10.25278,
+            "36": 10.19158,
+            "37": 10.31839,
+            "38": 10.16293,
+            "39": 10.37474,
+            "40": 10.05241,
+            "41": 10.13501,
+            "42": 10.18884,
+            "43": 9.8066,
+            "44": 9.92658,
+            "45": 9.80259,
+            "46": 9.81165,
+            "47": 10.12682,
+            "48": 9.8236,
+            "49": 9.51061,
+            "50": 9.88804
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1649.0,
+            "2": 1710.0,
+            "3": 1754.0,
+            "4": 1850.0,
+            "5": 1890.0,
+            "6": 1767.0,
+            "7": 1830.0,
+            "8": 1723.0,
+            "9": 1758.0,
+            "10": 1397.0,
+            "11": 1890.0,
+            "12": 1657.0,
+            "13": 1761.0,
+            "14": 1813.0,
+            "15": 1928.0,
+            "16": 1828.0,
+            "17": 1933.0,
+            "18": 1633.0,
+            "19": 1777.0,
+            "20": 1565.0,
+            "21": 1807.0,
+            "22": 1678.0,
+            "23": 2014.0,
+            "24": 1766.0,
+            "25": 1699.0,
+            "26": 1741.0,
+            "27": 1800.0,
+            "28": 1937.0,
+            "29": 1921.0,
+            "30": 1943.0,
+            "31": 1527.0,
+            "32": 1848.0,
+            "33": 2144.0,
+            "34": 1925.0,
+            "35": 2018.0,
+            "36": 1937.0,
+            "37": 2297.0,
+            "38": 2214.0,
+            "39": 2374.0,
+            "40": 2191.0,
+            "41": 2369.0,
+            "42": 2299.0,
+            "43": 1963.0,
+            "44": 2146.0,
+            "45": 2207.0,
+            "46": 2332.0,
+            "47": 2590.0,
+            "48": 2428.0,
+            "49": 2255.0,
+            "50": 2362.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 782094336.0,
+            "2": 782094336.0,
+            "3": 782094336.0,
+            "4": 782094336.0,
+            "5": 782094336.0,
+            "6": 782094336.0,
+            "7": 782094336.0,
+            "8": 782094336.0,
+            "9": 782094336.0,
+            "10": 782094336.0,
+            "11": 782094336.0,
+            "12": 782094336.0,
+            "13": 782094336.0,
+            "14": 782094336.0,
+            "15": 782094336.0,
+            "16": 782094336.0,
+            "17": 782094336.0,
+            "18": 782094336.0,
+            "19": 782094336.0,
+            "20": 782094336.0,
+            "21": 782094336.0,
+            "22": 782094336.0,
+            "23": 782094336.0,
+            "24": 782094336.0,
+            "25": 782094336.0,
+            "26": 782094336.0,
+            "27": 782094336.0,
+            "28": 782094336.0,
+            "29": 782094336.0,
+            "30": 782094336.0,
+            "31": 782094336.0,
+            "32": 782094336.0,
+            "33": 782094336.0,
+            "34": 782094336.0,
+            "35": 782094336.0,
+            "36": 782094336.0,
+            "37": 782094336.0,
+            "38": 782094336.0,
+            "39": 782094336.0,
+            "40": 782094336.0,
+            "41": 782094336.0,
+            "42": 782094336.0,
+            "43": 782094336.0,
+            "44": 782094336.0,
+            "45": 782094336.0,
+            "46": 782094336.0,
+            "47": 782094336.0,
+            "48": 782094336.0,
+            "49": 782094336.0,
+            "50": 782094336.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 2462767104.0,
+            "2": 2748673024.0,
+            "3": 2748673024.0,
+            "4": 2748673024.0,
+            "5": 2748673024.0,
+            "6": 2748673024.0,
+            "7": 2748673024.0,
+            "8": 2748673024.0,
+            "9": 2748673024.0,
+            "10": 2748673024.0,
+            "11": 2748673024.0,
+            "12": 2748673024.0,
+            "13": 2748673024.0,
+            "14": 2748673024.0,
+            "15": 2748673024.0,
+            "16": 2748673024.0,
+            "17": 2748673024.0,
+            "18": 2748673024.0,
+            "19": 2748673024.0,
+            "20": 2748673024.0,
+            "21": 2748673024.0,
+            "22": 2748673024.0,
+            "23": 2748673024.0,
+            "24": 2748673024.0,
+            "25": 2748673024.0,
+            "26": 2748673024.0,
+            "27": 2748673024.0,
+            "28": 2748673024.0,
+            "29": 2748673024.0,
+            "30": 2748673024.0,
+            "31": 2748673024.0,
+            "32": 2748673024.0,
+            "33": 2748673024.0,
+            "34": 2748673024.0,
+            "35": 2748673024.0,
+            "36": 2748673024.0,
+            "37": 2748673024.0,
+            "38": 2748673024.0,
+            "39": 2748673024.0,
+            "40": 2748673024.0,
+            "41": 2748673024.0,
+            "42": 2748673024.0,
+            "43": 2748673024.0,
+            "44": 2748673024.0,
+            "45": 2748673024.0,
+            "46": 2748673024.0,
+            "47": 2748673024.0,
+            "48": 2748673024.0,
+            "49": 2748673024.0,
+            "50": 2748673024.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 17.23168,
+            "2": 0.20941,
+            "3": 0.18259,
+            "4": 0.18034,
+            "5": 0.18066,
+            "6": 0.17945,
+            "7": 0.17976,
+            "8": 0.18065,
+            "9": 0.18143,
+            "10": 0.18186,
+            "11": 0.18118,
+            "12": 0.17934,
+            "13": 0.17804,
+            "14": 0.17863,
+            "15": 0.17803,
+            "16": 0.35778,
+            "17": 0.17914,
+            "18": 0.17741,
+            "19": 0.17754,
+            "20": 0.17681,
+            "21": 0.17586,
+            "22": 0.17817,
+            "23": 0.17672,
+            "24": 0.17747,
+            "25": 0.17716,
+            "26": 0.17607,
+            "27": 0.17666,
+            "28": 0.17643,
+            "29": 0.17611,
+            "30": 0.17755,
+            "31": 0.17964,
+            "32": 0.17651,
+            "33": 0.18061,
+            "34": 0.17677,
+            "35": 0.179,
+            "36": 0.17888,
+            "37": 0.17609,
+            "38": 0.17685,
+            "39": 0.17655,
+            "40": 0.37865,
+            "41": 0.17694,
+            "42": 0.17631,
+            "43": 0.17661,
+            "44": 0.17607,
+            "45": 0.17551,
+            "46": 0.1785,
+            "47": 0.17532,
+            "48": 0.17603,
+            "49": 0.17585,
+            "50": 0.17631
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_resume_torch_dist/golden_values_dev_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_resume_torch_dist/golden_values_dev_dgxa100_dracooci-ord.json
new file mode 100644
index 00000000000..987f9cc4371
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_resume_torch_dist/golden_values_dev_dgxa100_dracooci-ord.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.79449,
+            "2": 10.80656,
+            "3": 10.80727,
+            "4": 10.77389,
+            "5": 10.84829,
+            "6": 10.86736,
+            "7": 10.82922,
+            "8": 10.81537,
+            "9": 10.83956,
+            "10": 10.77652,
+            "11": 10.90107,
+            "12": 10.85927,
+            "13": 10.87396,
+            "14": 10.89723,
+            "15": 10.83961,
+            "16": 10.83508,
+            "17": 10.82101,
+            "18": 10.86029,
+            "19": 10.86558,
+            "20": 10.82896,
+            "21": 10.83275,
+            "22": 10.75286,
+            "23": 10.88062,
+            "24": 10.78219,
+            "25": 10.76607,
+            "26": 10.79522,
+            "27": 10.79866,
+            "28": 10.81697,
+            "29": 10.82169,
+            "30": 10.69891,
+            "31": 10.55698,
+            "32": 10.75759,
+            "33": 10.74362,
+            "34": 10.59976,
+            "35": 10.61772,
+            "36": 10.56389,
+            "37": 10.63614,
+            "38": 10.53029,
+            "39": 10.65358,
+            "40": 10.44072,
+            "41": 10.49636,
+            "42": 10.50954,
+            "43": 10.22362,
+            "44": 10.30902,
+            "45": 10.21065,
+            "46": 10.19943,
+            "47": 10.41641,
+            "48": 10.18128,
+            "49": 9.94311,
+            "50": 10.21224,
+            "51": 10.16759,
+            "52": 10.06895,
+            "53": 10.30707,
+            "54": 10.20911,
+            "55": 10.15688,
+            "56": 9.91474,
+            "57": 9.77696,
+            "58": 10.07417,
+            "59": 9.86333,
+            "60": 9.77328,
+            "61": 9.9292,
+            "62": 10.17156,
+            "63": 9.62041,
+            "64": 9.97113,
+            "65": 9.21979,
+            "66": 9.88693,
+            "67": 9.58363,
+            "68": 9.94922,
+            "69": 9.95271,
+            "70": 9.89312,
+            "71": 9.77658,
+            "72": 9.75435,
+            "73": 9.6497,
+            "74": 9.1439,
+            "75": 9.56121,
+            "76": 9.25111,
+            "77": 10.17063,
+            "78": 9.85402,
+            "79": 9.49965,
+            "80": 9.53086,
+            "81": 9.60555,
+            "82": 9.80179,
+            "83": 9.43744,
+            "84": 9.51987,
+            "85": 9.7196,
+            "86": 9.18595,
+            "87": 9.68687,
+            "88": 9.8443,
+            "89": 9.70586,
+            "90": 9.89977,
+            "91": 9.45029,
+            "92": 9.45356,
+            "93": 9.18554,
+            "94": 8.92968,
+            "95": 9.59767,
+            "96": 9.61491,
+            "97": 9.39084,
+            "98": 9.75667,
+            "99": 8.97921,
+            "100": 9.49001
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 570640384.0,
+            "2": 570640384.0,
+            "3": 570640384.0,
+            "4": 570640384.0,
+            "5": 570640384.0,
+            "6": 570640384.0,
+            "7": 570640384.0,
+            "8": 570640384.0,
+            "9": 570640384.0,
+            "10": 570640384.0,
+            "11": 570640384.0,
+            "12": 570640384.0,
+            "13": 570640384.0,
+            "14": 570640384.0,
+            "15": 570640384.0,
+            "16": 570640384.0,
+            "17": 852351488.0,
+            "18": 852351488.0,
+            "19": 852351488.0,
+            "20": 852351488.0,
+            "21": 852351488.0,
+            "22": 852351488.0,
+            "23": 852351488.0,
+            "24": 852351488.0,
+            "25": 852351488.0,
+            "26": 852351488.0,
+            "27": 852351488.0,
+            "28": 852351488.0,
+            "29": 852351488.0,
+            "30": 852351488.0,
+            "31": 852351488.0,
+            "32": 852351488.0,
+            "33": 852351488.0,
+            "34": 852351488.0,
+            "35": 852351488.0,
+            "36": 852351488.0,
+            "37": 852351488.0,
+            "38": 852351488.0,
+            "39": 852351488.0,
+            "40": 852351488.0,
+            "41": 852351488.0,
+            "42": 852351488.0,
+            "43": 852351488.0,
+            "44": 852351488.0,
+            "45": 852351488.0,
+            "46": 852351488.0,
+            "47": 852351488.0,
+            "48": 852351488.0,
+            "49": 852351488.0,
+            "50": 852351488.0,
+            "51": 852351488.0,
+            "52": 852351488.0,
+            "53": 852351488.0,
+            "54": 852351488.0,
+            "55": 852351488.0,
+            "56": 852351488.0,
+            "57": 852351488.0,
+            "58": 852351488.0,
+            "59": 852351488.0,
+            "60": 852351488.0,
+            "61": 852351488.0,
+            "62": 852351488.0,
+            "63": 852351488.0,
+            "64": 852351488.0,
+            "65": 852351488.0,
+            "66": 852351488.0,
+            "67": 852351488.0,
+            "68": 852351488.0,
+            "69": 852351488.0,
+            "70": 852351488.0,
+            "71": 852351488.0,
+            "72": 852351488.0,
+            "73": 852351488.0,
+            "74": 852351488.0,
+            "75": 852351488.0,
+            "76": 852351488.0,
+            "77": 852351488.0,
+            "78": 852351488.0,
+            "79": 852351488.0,
+            "80": 852351488.0,
+            "81": 852351488.0,
+            "82": 852351488.0,
+            "83": 852351488.0,
+            "84": 852351488.0,
+            "85": 852351488.0,
+            "86": 852351488.0,
+            "87": 852351488.0,
+            "88": 852351488.0,
+            "89": 852351488.0,
+            "90": 852351488.0,
+            "91": 852351488.0,
+            "92": 852351488.0,
+            "93": 852351488.0,
+            "94": 852351488.0,
+            "95": 852351488.0,
+            "96": 852351488.0,
+            "97": 852351488.0,
+            "98": 852351488.0,
+            "99": 852351488.0,
+            "100": 852351488.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2393217536.0,
+            "2": 2393218048.0,
+            "3": 2393218048.0,
+            "4": 2393218048.0,
+            "5": 2393218048.0,
+            "6": 2393218048.0,
+            "7": 2393218048.0,
+            "8": 2393218048.0,
+            "9": 2393218048.0,
+            "10": 2393218048.0,
+            "11": 2393218048.0,
+            "12": 2393218048.0,
+            "13": 2393218048.0,
+            "14": 2393218048.0,
+            "15": 2393218048.0,
+            "16": 2393218048.0,
+            "17": 2393218048.0,
+            "18": 2675191296.0,
+            "19": 2675191296.0,
+            "20": 2675191296.0,
+            "21": 2675191296.0,
+            "22": 2675191296.0,
+            "23": 2675191296.0,
+            "24": 2675191296.0,
+            "25": 2675191296.0,
+            "26": 2675191296.0,
+            "27": 2675191296.0,
+            "28": 2675191296.0,
+            "29": 2675191296.0,
+            "30": 2675191296.0,
+            "31": 2675191296.0,
+            "32": 2675191296.0,
+            "33": 2675191296.0,
+            "34": 2675191296.0,
+            "35": 2675191296.0,
+            "36": 2675191296.0,
+            "37": 2675191296.0,
+            "38": 2675191296.0,
+            "39": 2675191296.0,
+            "40": 2675191296.0,
+            "41": 2675191296.0,
+            "42": 2675191296.0,
+            "43": 2675191296.0,
+            "44": 2675191296.0,
+            "45": 2675191296.0,
+            "46": 2675191296.0,
+            "47": 2675191296.0,
+            "48": 2675191296.0,
+            "49": 2675191296.0,
+            "50": 2675191296.0,
+            "51": 2675191296.0,
+            "52": 2675191296.0,
+            "53": 2675191296.0,
+            "54": 2675191296.0,
+            "55": 2675191296.0,
+            "56": 2675191296.0,
+            "57": 2675191296.0,
+            "58": 2675191296.0,
+            "59": 2675191296.0,
+            "60": 2675191296.0,
+            "61": 2675191296.0,
+            "62": 2675191296.0,
+            "63": 2675191296.0,
+            "64": 2675191296.0,
+            "65": 2675191296.0,
+            "66": 2675191296.0,
+            "67": 2675191296.0,
+            "68": 2675191296.0,
+            "69": 2675191296.0,
+            "70": 2675191296.0,
+            "71": 2675191296.0,
+            "72": 2675191296.0,
+            "73": 2675191296.0,
+            "74": 2675191296.0,
+            "75": 2675191296.0,
+            "76": 2675191296.0,
+            "77": 2675191296.0,
+            "78": 2675191296.0,
+            "79": 2675191296.0,
+            "80": 2675191296.0,
+            "81": 2675191296.0,
+            "82": 2675191296.0,
+            "83": 2675191296.0,
+            "84": 2675191296.0,
+            "85": 2675191296.0,
+            "86": 2675191296.0,
+            "87": 2675191296.0,
+            "88": 2675191296.0,
+            "89": 2675191296.0,
+            "90": 2675191296.0,
+            "91": 2675191296.0,
+            "92": 2675191296.0,
+            "93": 2675191296.0,
+            "94": 2675191296.0,
+            "95": 2675191296.0,
+            "96": 2675191296.0,
+            "97": 2675191296.0,
+            "98": 2675191296.0,
+            "99": 2675191296.0,
+            "100": 2675191296.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 9.25711,
+            "2": 0.20442,
+            "3": 0.31053,
+            "4": 0.17506,
+            "5": 0.17361,
+            "6": 0.16764,
+            "7": 0.16815,
+            "8": 0.16765,
+            "9": 0.16758,
+            "10": 0.17113,
+            "11": 0.16809,
+            "12": 0.17003,
+            "13": 0.16677,
+            "14": 0.16938,
+            "15": 0.16824,
+            "16": 0.16835,
+            "17": 0.24523,
+            "18": 0.17988,
+            "19": 0.17563,
+            "20": 0.17432,
+            "21": 0.17506,
+            "22": 0.17636,
+            "23": 0.17595,
+            "24": 0.17331,
+            "25": 0.17442,
+            "26": 0.17591,
+            "27": 0.17526,
+            "28": 0.17471,
+            "29": 0.17521,
+            "30": 0.17559,
+            "31": 0.17578,
+            "32": 0.17405,
+            "33": 0.17441,
+            "34": 0.17455,
+            "35": 0.17668,
+            "36": 0.17388,
+            "37": 0.17292,
+            "38": 0.17248,
+            "39": 0.17218,
+            "40": 0.17206,
+            "41": 0.17379,
+            "42": 0.17175,
+            "43": 0.17411,
+            "44": 0.17163,
+            "45": 0.17284,
+            "46": 0.17334,
+            "47": 0.17308,
+            "48": 0.17237,
+            "49": 0.17279,
+            "50": 0.17287,
+            "51": 0.18182,
+            "52": 0.17476,
+            "53": 0.17364,
+            "54": 0.17347,
+            "55": 0.1738,
+            "56": 0.17294,
+            "57": 0.17424,
+            "58": 0.17414,
+            "59": 0.17308,
+            "60": 0.17396,
+            "61": 0.17298,
+            "62": 0.17287,
+            "63": 0.17296,
+            "64": 0.17278,
+            "65": 0.17319,
+            "66": 0.17283,
+            "67": 0.17327,
+            "68": 0.17328,
+            "69": 0.17196,
+            "70": 0.17288,
+            "71": 0.1729,
+            "72": 0.1733,
+            "73": 0.17323,
+            "74": 0.17351,
+            "75": 0.17316,
+            "76": 0.17296,
+            "77": 0.17287,
+            "78": 0.17254,
+            "79": 0.17342,
+            "80": 0.17324,
+            "81": 0.17326,
+            "82": 0.17333,
+            "83": 0.17397,
+            "84": 0.17448,
+            "85": 0.17529,
+            "86": 0.17422,
+            "87": 0.17326,
+            "88": 0.17393,
+            "89": 0.17292,
+            "90": 0.17379,
+            "91": 0.17366,
+            "92": 0.17324,
+            "93": 0.17397,
+            "94": 0.17409,
+            "95": 0.17371,
+            "96": 0.17366,
+            "97": 0.17346,
+            "98": 0.17343,
+            "99": 0.17375,
+            "100": 0.17351
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": 2437.0,
+            "18": 2405.0,
+            "19": 2950.0,
+            "20": 1827.0,
+            "21": 2154.0,
+            "22": 2731.0,
+            "23": 2609.0,
+            "24": 2290.0,
+            "25": 2325.0,
+            "26": 2079.0,
+            "27": 2138.0,
+            "28": 2702.0,
+            "29": 2576.0,
+            "30": 2528.0,
+            "31": 1895.0,
+            "32": 2628.0,
+            "33": 2325.0,
+            "34": 1928.0,
+            "35": 2061.0,
+            "36": 2153.0,
+            "37": 2600.0,
+            "38": 2350.0,
+            "39": 2997.0,
+            "40": 2053.0,
+            "41": 3352.0,
+            "42": 2497.0,
+            "43": 2867.0,
+            "44": 2109.0,
+            "45": 2490.0,
+            "46": 2279.0,
+            "47": 3051.0,
+            "48": 2527.0,
+            "49": 1973.0,
+            "50": 2887.0,
+            "51": 2310.0,
+            "52": 2526.0,
+            "53": 3705.0,
+            "54": 2888.0,
+            "55": 2440.0,
+            "56": 2496.0,
+            "57": 2338.0,
+            "58": 3283.0,
+            "59": 2849.0,
+            "60": 2893.0,
+            "61": 2956.0,
+            "62": 3134.0,
+            "63": 3275.0,
+            "64": 3176.0,
+            "65": 2318.0,
+            "66": 3857.0,
+            "67": 2606.0,
+            "68": 3313.0,
+            "69": 2826.0,
+            "70": 3665.0,
+            "71": 3011.0,
+            "72": 2693.0,
+            "73": 3357.0,
+            "74": 2271.0,
+            "75": 2955.0,
+            "76": 3617.0,
+            "77": 3936.0,
+            "78": 3951.0,
+            "79": 4065.0,
+            "80": 3665.0,
+            "81": 5191.0,
+            "82": 3511.0,
+            "83": 3263.0,
+            "84": 3876.0,
+            "85": 4048.0,
+            "86": 3414.0,
+            "87": 3980.0,
+            "88": 3617.0,
+            "89": 4400.0,
+            "90": 3695.0,
+            "91": 2857.0,
+            "92": 4432.0,
+            "93": 3494.0,
+            "94": 4438.0,
+            "95": 4076.0,
+            "96": 3948.0,
+            "97": 4242.0,
+            "98": 4943.0,
+            "99": 3861.0,
+            "100": 3631.0
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_resume_torch_dist/golden_values_dev_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_resume_torch_dist/golden_values_dev_dgxa100_dracooci.json
new file mode 100644
index 00000000000..2bcd6d2eaf1
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_resume_torch_dist/golden_values_dev_dgxa100_dracooci.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.79449,
+            "2": 10.80656,
+            "3": 10.80727,
+            "4": 10.77389,
+            "5": 10.84829,
+            "6": 10.86736,
+            "7": 10.82922,
+            "8": 10.81537,
+            "9": 10.83956,
+            "10": 10.77652,
+            "11": 10.90107,
+            "12": 10.85927,
+            "13": 10.87396,
+            "14": 10.89723,
+            "15": 10.83961,
+            "16": 10.83508,
+            "17": 10.82101,
+            "18": 10.86029,
+            "19": 10.86558,
+            "20": 10.82896,
+            "21": 10.83275,
+            "22": 10.75286,
+            "23": 10.88062,
+            "24": 10.78219,
+            "25": 10.76607,
+            "26": 10.79522,
+            "27": 10.79866,
+            "28": 10.81697,
+            "29": 10.82169,
+            "30": 10.69891,
+            "31": 10.55698,
+            "32": 10.75759,
+            "33": 10.74362,
+            "34": 10.59976,
+            "35": 10.61772,
+            "36": 10.56389,
+            "37": 10.63614,
+            "38": 10.53029,
+            "39": 10.65358,
+            "40": 10.44072,
+            "41": 10.49636,
+            "42": 10.50954,
+            "43": 10.22362,
+            "44": 10.30902,
+            "45": 10.21065,
+            "46": 10.19943,
+            "47": 10.41641,
+            "48": 10.18128,
+            "49": 9.94311,
+            "50": 10.21224,
+            "51": 10.16759,
+            "52": 10.06895,
+            "53": 10.30707,
+            "54": 10.20911,
+            "55": 10.15688,
+            "56": 9.91474,
+            "57": 9.77696,
+            "58": 10.07417,
+            "59": 9.86333,
+            "60": 9.77328,
+            "61": 9.9292,
+            "62": 10.17156,
+            "63": 9.62041,
+            "64": 9.97113,
+            "65": 9.21979,
+            "66": 9.88693,
+            "67": 9.58363,
+            "68": 9.94922,
+            "69": 9.95271,
+            "70": 9.89312,
+            "71": 9.77658,
+            "72": 9.75435,
+            "73": 9.6497,
+            "74": 9.1439,
+            "75": 9.56121,
+            "76": 9.25111,
+            "77": 10.17063,
+            "78": 9.85402,
+            "79": 9.49965,
+            "80": 9.53086,
+            "81": 9.60555,
+            "82": 9.80179,
+            "83": 9.43744,
+            "84": 9.51987,
+            "85": 9.7196,
+            "86": 9.18595,
+            "87": 9.68687,
+            "88": 9.8443,
+            "89": 9.70586,
+            "90": 9.89977,
+            "91": 9.45029,
+            "92": 9.45356,
+            "93": 9.18554,
+            "94": 8.92968,
+            "95": 9.59767,
+            "96": 9.61491,
+            "97": 9.39084,
+            "98": 9.75667,
+            "99": 8.97921,
+            "100": 9.49001
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 570640384.0,
+            "2": 570640384.0,
+            "3": 570640384.0,
+            "4": 570640384.0,
+            "5": 570640384.0,
+            "6": 570640384.0,
+            "7": 570640384.0,
+            "8": 570640384.0,
+            "9": 570640384.0,
+            "10": 570640384.0,
+            "11": 570640384.0,
+            "12": 570640384.0,
+            "13": 570640384.0,
+            "14": 570640384.0,
+            "15": 570640384.0,
+            "16": 570640384.0,
+            "17": 852351488.0,
+            "18": 852351488.0,
+            "19": 852351488.0,
+            "20": 852351488.0,
+            "21": 852351488.0,
+            "22": 852351488.0,
+            "23": 852351488.0,
+            "24": 852351488.0,
+            "25": 852351488.0,
+            "26": 852351488.0,
+            "27": 852351488.0,
+            "28": 852351488.0,
+            "29": 852351488.0,
+            "30": 852351488.0,
+            "31": 852351488.0,
+            "32": 852351488.0,
+            "33": 852351488.0,
+            "34": 852351488.0,
+            "35": 852351488.0,
+            "36": 852351488.0,
+            "37": 852351488.0,
+            "38": 852351488.0,
+            "39": 852351488.0,
+            "40": 852351488.0,
+            "41": 852351488.0,
+            "42": 852351488.0,
+            "43": 852351488.0,
+            "44": 852351488.0,
+            "45": 852351488.0,
+            "46": 852351488.0,
+            "47": 852351488.0,
+            "48": 852351488.0,
+            "49": 852351488.0,
+            "50": 852351488.0,
+            "51": 852351488.0,
+            "52": 852351488.0,
+            "53": 852351488.0,
+            "54": 852351488.0,
+            "55": 852351488.0,
+            "56": 852351488.0,
+            "57": 852351488.0,
+            "58": 852351488.0,
+            "59": 852351488.0,
+            "60": 852351488.0,
+            "61": 852351488.0,
+            "62": 852351488.0,
+            "63": 852351488.0,
+            "64": 852351488.0,
+            "65": 852351488.0,
+            "66": 852351488.0,
+            "67": 852351488.0,
+            "68": 852351488.0,
+            "69": 852351488.0,
+            "70": 852351488.0,
+            "71": 852351488.0,
+            "72": 852351488.0,
+            "73": 852351488.0,
+            "74": 852351488.0,
+            "75": 852351488.0,
+            "76": 852351488.0,
+            "77": 852351488.0,
+            "78": 852351488.0,
+            "79": 852351488.0,
+            "80": 852351488.0,
+            "81": 852351488.0,
+            "82": 852351488.0,
+            "83": 852351488.0,
+            "84": 852351488.0,
+            "85": 852351488.0,
+            "86": 852351488.0,
+            "87": 852351488.0,
+            "88": 852351488.0,
+            "89": 852351488.0,
+            "90": 852351488.0,
+            "91": 852351488.0,
+            "92": 852351488.0,
+            "93": 852351488.0,
+            "94": 852351488.0,
+            "95": 852351488.0,
+            "96": 852351488.0,
+            "97": 852351488.0,
+            "98": 852351488.0,
+            "99": 852351488.0,
+            "100": 852351488.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2393217536.0,
+            "2": 2393218048.0,
+            "3": 2393218048.0,
+            "4": 2393218048.0,
+            "5": 2393218048.0,
+            "6": 2393218048.0,
+            "7": 2393218048.0,
+            "8": 2393218048.0,
+            "9": 2393218048.0,
+            "10": 2393218048.0,
+            "11": 2393218048.0,
+            "12": 2393218048.0,
+            "13": 2393218048.0,
+            "14": 2393218048.0,
+            "15": 2393218048.0,
+            "16": 2393218048.0,
+            "17": 2393218048.0,
+            "18": 2675191296.0,
+            "19": 2675191296.0,
+            "20": 2675191296.0,
+            "21": 2675191296.0,
+            "22": 2675191296.0,
+            "23": 2675191296.0,
+            "24": 2675191296.0,
+            "25": 2675191296.0,
+            "26": 2675191296.0,
+            "27": 2675191296.0,
+            "28": 2675191296.0,
+            "29": 2675191296.0,
+            "30": 2675191296.0,
+            "31": 2675191296.0,
+            "32": 2675191296.0,
+            "33": 2675191296.0,
+            "34": 2675191296.0,
+            "35": 2675191296.0,
+            "36": 2675191296.0,
+            "37": 2675191296.0,
+            "38": 2675191296.0,
+            "39": 2675191296.0,
+            "40": 2675191296.0,
+            "41": 2675191296.0,
+            "42": 2675191296.0,
+            "43": 2675191296.0,
+            "44": 2675191296.0,
+            "45": 2675191296.0,
+            "46": 2675191296.0,
+            "47": 2675191296.0,
+            "48": 2675191296.0,
+            "49": 2675191296.0,
+            "50": 2675191296.0,
+            "51": 2675191296.0,
+            "52": 2675191296.0,
+            "53": 2675191296.0,
+            "54": 2675191296.0,
+            "55": 2675191296.0,
+            "56": 2675191296.0,
+            "57": 2675191296.0,
+            "58": 2675191296.0,
+            "59": 2675191296.0,
+            "60": 2675191296.0,
+            "61": 2675191296.0,
+            "62": 2675191296.0,
+            "63": 2675191296.0,
+            "64": 2675191296.0,
+            "65": 2675191296.0,
+            "66": 2675191296.0,
+            "67": 2675191296.0,
+            "68": 2675191296.0,
+            "69": 2675191296.0,
+            "70": 2675191296.0,
+            "71": 2675191296.0,
+            "72": 2675191296.0,
+            "73": 2675191296.0,
+            "74": 2675191296.0,
+            "75": 2675191296.0,
+            "76": 2675191296.0,
+            "77": 2675191296.0,
+            "78": 2675191296.0,
+            "79": 2675191296.0,
+            "80": 2675191296.0,
+            "81": 2675191296.0,
+            "82": 2675191296.0,
+            "83": 2675191296.0,
+            "84": 2675191296.0,
+            "85": 2675191296.0,
+            "86": 2675191296.0,
+            "87": 2675191296.0,
+            "88": 2675191296.0,
+            "89": 2675191296.0,
+            "90": 2675191296.0,
+            "91": 2675191296.0,
+            "92": 2675191296.0,
+            "93": 2675191296.0,
+            "94": 2675191296.0,
+            "95": 2675191296.0,
+            "96": 2675191296.0,
+            "97": 2675191296.0,
+            "98": 2675191296.0,
+            "99": 2675191296.0,
+            "100": 2675191296.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 7.71736,
+            "2": 0.20733,
+            "3": 0.16848,
+            "4": 0.16524,
+            "5": 0.16238,
+            "6": 0.16187,
+            "7": 0.16222,
+            "8": 0.16966,
+            "9": 0.16728,
+            "10": 0.16645,
+            "11": 0.16656,
+            "12": 0.16608,
+            "13": 0.16573,
+            "14": 0.16701,
+            "15": 0.16496,
+            "16": 0.16669,
+            "17": 0.23079,
+            "18": 0.1849,
+            "19": 0.17171,
+            "20": 0.17096,
+            "21": 0.17174,
+            "22": 0.17119,
+            "23": 0.17277,
+            "24": 0.17201,
+            "25": 0.17439,
+            "26": 0.17169,
+            "27": 0.17161,
+            "28": 0.17192,
+            "29": 0.17194,
+            "30": 0.17228,
+            "31": 0.17292,
+            "32": 0.17122,
+            "33": 0.17157,
+            "34": 0.1724,
+            "35": 0.17452,
+            "36": 0.17212,
+            "37": 0.17181,
+            "38": 0.17195,
+            "39": 0.17197,
+            "40": 0.17277,
+            "41": 0.17339,
+            "42": 0.17111,
+            "43": 0.17212,
+            "44": 0.17128,
+            "45": 0.17186,
+            "46": 0.17214,
+            "47": 0.17062,
+            "48": 0.17161,
+            "49": 0.17218,
+            "50": 0.17161,
+            "51": 0.17752,
+            "52": 0.17189,
+            "53": 0.17103,
+            "54": 0.17149,
+            "55": 0.1719,
+            "56": 0.17107,
+            "57": 0.17148,
+            "58": 0.17125,
+            "59": 0.17359,
+            "60": 0.172,
+            "61": 0.17008,
+            "62": 0.17062,
+            "63": 0.17153,
+            "64": 0.17237,
+            "65": 0.1724,
+            "66": 0.17702,
+            "67": 0.17451,
+            "68": 0.17335,
+            "69": 0.17257,
+            "70": 0.17296,
+            "71": 0.17324,
+            "72": 0.17308,
+            "73": 0.1733,
+            "74": 0.17393,
+            "75": 0.17307,
+            "76": 0.17314,
+            "77": 0.17235,
+            "78": 0.17169,
+            "79": 0.17051,
+            "80": 0.17076,
+            "81": 0.17091,
+            "82": 0.1698,
+            "83": 0.16956,
+            "84": 0.16892,
+            "85": 0.17014,
+            "86": 0.16969,
+            "87": 0.16994,
+            "88": 0.17052,
+            "89": 0.1722,
+            "90": 0.16945,
+            "91": 0.17051,
+            "92": 0.16932,
+            "93": 0.17024,
+            "94": 0.1701,
+            "95": 0.16924,
+            "96": 0.16933,
+            "97": 0.17042,
+            "98": 0.16973,
+            "99": 0.17021,
+            "100": 0.17096
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": 2437.0,
+            "18": 2405.0,
+            "19": 2950.0,
+            "20": 1827.0,
+            "21": 2154.0,
+            "22": 2731.0,
+            "23": 2609.0,
+            "24": 2290.0,
+            "25": 2325.0,
+            "26": 2079.0,
+            "27": 2138.0,
+            "28": 2702.0,
+            "29": 2576.0,
+            "30": 2528.0,
+            "31": 1895.0,
+            "32": 2628.0,
+            "33": 2325.0,
+            "34": 1928.0,
+            "35": 2061.0,
+            "36": 2153.0,
+            "37": 2600.0,
+            "38": 2350.0,
+            "39": 2997.0,
+            "40": 2053.0,
+            "41": 3352.0,
+            "42": 2497.0,
+            "43": 2867.0,
+            "44": 2109.0,
+            "45": 2490.0,
+            "46": 2279.0,
+            "47": 3051.0,
+            "48": 2527.0,
+            "49": 1973.0,
+            "50": 2887.0,
+            "51": 2310.0,
+            "52": 2526.0,
+            "53": 3705.0,
+            "54": 2888.0,
+            "55": 2440.0,
+            "56": 2496.0,
+            "57": 2338.0,
+            "58": 3283.0,
+            "59": 2849.0,
+            "60": 2893.0,
+            "61": 2956.0,
+            "62": 3134.0,
+            "63": 3275.0,
+            "64": 3176.0,
+            "65": 2318.0,
+            "66": 3857.0,
+            "67": 2606.0,
+            "68": 3313.0,
+            "69": 2826.0,
+            "70": 3665.0,
+            "71": 3011.0,
+            "72": 2693.0,
+            "73": 3357.0,
+            "74": 2271.0,
+            "75": 2955.0,
+            "76": 3617.0,
+            "77": 3936.0,
+            "78": 3951.0,
+            "79": 4065.0,
+            "80": 3665.0,
+            "81": 5191.0,
+            "82": 3511.0,
+            "83": 3263.0,
+            "84": 3876.0,
+            "85": 4048.0,
+            "86": 3414.0,
+            "87": 3980.0,
+            "88": 3617.0,
+            "89": 4400.0,
+            "90": 3695.0,
+            "91": 2857.0,
+            "92": 4432.0,
+            "93": 3494.0,
+            "94": 4438.0,
+            "95": 4076.0,
+            "96": 3948.0,
+            "97": 4242.0,
+            "98": 4943.0,
+            "99": 3861.0,
+            "100": 3631.0
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_resume_torch_dist/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_resume_torch_dist/golden_values_lts_dgxa100_dracooci-ord.json
new file mode 100644
index 00000000000..66f5a69ba1b
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_resume_torch_dist/golden_values_lts_dgxa100_dracooci-ord.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.79219,
+            "2": 10.80294,
+            "3": 10.80725,
+            "4": 10.77342,
+            "5": 10.84727,
+            "6": 10.8682,
+            "7": 10.8278,
+            "8": 10.81626,
+            "9": 10.83861,
+            "10": 10.77729,
+            "11": 10.90005,
+            "12": 10.85954,
+            "13": 10.87494,
+            "14": 10.8953,
+            "15": 10.84106,
+            "16": 10.83779,
+            "17": 10.82436,
+            "18": 10.85906,
+            "19": 10.86597,
+            "20": 10.82889,
+            "21": 10.83382,
+            "22": 10.75171,
+            "23": 10.8822,
+            "24": 10.78198,
+            "25": 10.7666,
+            "26": 10.79421,
+            "27": 10.79973,
+            "28": 10.81809,
+            "29": 10.81973,
+            "30": 10.69961,
+            "31": 10.55541,
+            "32": 10.75748,
+            "33": 10.7417,
+            "34": 10.59849,
+            "35": 10.61845,
+            "36": 10.56439,
+            "37": 10.63758,
+            "38": 10.53033,
+            "39": 10.65378,
+            "40": 10.44051,
+            "41": 10.49785,
+            "42": 10.50842,
+            "43": 10.22237,
+            "44": 10.30681,
+            "45": 10.20859,
+            "46": 10.20077,
+            "47": 10.41716,
+            "48": 10.18042,
+            "49": 9.94398,
+            "50": 10.21168,
+            "51": 10.16603,
+            "52": 10.06842,
+            "53": 10.30736,
+            "54": 10.20998,
+            "55": 10.15675,
+            "56": 9.91528,
+            "57": 9.77636,
+            "58": 10.07274,
+            "59": 9.86327,
+            "60": 9.77265,
+            "61": 9.92815,
+            "62": 10.17249,
+            "63": 9.62223,
+            "64": 9.97162,
+            "65": 9.22128,
+            "66": 9.88606,
+            "67": 9.5836,
+            "68": 9.95061,
+            "69": 9.95306,
+            "70": 9.89371,
+            "71": 9.77681,
+            "72": 9.75545,
+            "73": 9.64983,
+            "74": 9.14359,
+            "75": 9.56098,
+            "76": 9.25119,
+            "77": 10.16981,
+            "78": 9.854,
+            "79": 9.49956,
+            "80": 9.5311,
+            "81": 9.60482,
+            "82": 9.80129,
+            "83": 9.43763,
+            "84": 9.51982,
+            "85": 9.71911,
+            "86": 9.18564,
+            "87": 9.68731,
+            "88": 9.84403,
+            "89": 9.7063,
+            "90": 9.89983,
+            "91": 9.45059,
+            "92": 9.45364,
+            "93": 9.18519,
+            "94": 8.92953,
+            "95": 9.59785,
+            "96": 9.61472,
+            "97": 9.39069,
+            "98": 9.75698,
+            "99": 8.9803,
+            "100": 9.49009
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 570640384.0,
+            "2": 570640384.0,
+            "3": 570640384.0,
+            "4": 570640384.0,
+            "5": 570640384.0,
+            "6": 570640384.0,
+            "7": 570640384.0,
+            "8": 570640384.0,
+            "9": 570640384.0,
+            "10": 570640384.0,
+            "11": 570640384.0,
+            "12": 570640384.0,
+            "13": 570640384.0,
+            "14": 570640384.0,
+            "15": 570640384.0,
+            "16": 570640384.0,
+            "17": 852351488.0,
+            "18": 852351488.0,
+            "19": 852351488.0,
+            "20": 852351488.0,
+            "21": 852351488.0,
+            "22": 852351488.0,
+            "23": 852351488.0,
+            "24": 852351488.0,
+            "25": 852351488.0,
+            "26": 852351488.0,
+            "27": 852351488.0,
+            "28": 852351488.0,
+            "29": 852351488.0,
+            "30": 852351488.0,
+            "31": 852351488.0,
+            "32": 852351488.0,
+            "33": 852351488.0,
+            "34": 852351488.0,
+            "35": 852351488.0,
+            "36": 852351488.0,
+            "37": 852351488.0,
+            "38": 852351488.0,
+            "39": 852351488.0,
+            "40": 852351488.0,
+            "41": 852351488.0,
+            "42": 852351488.0,
+            "43": 852351488.0,
+            "44": 852351488.0,
+            "45": 852351488.0,
+            "46": 852351488.0,
+            "47": 852351488.0,
+            "48": 852351488.0,
+            "49": 852351488.0,
+            "50": 852351488.0,
+            "51": 852351488.0,
+            "52": 852351488.0,
+            "53": 852351488.0,
+            "54": 852351488.0,
+            "55": 852351488.0,
+            "56": 852351488.0,
+            "57": 852351488.0,
+            "58": 852351488.0,
+            "59": 852351488.0,
+            "60": 852351488.0,
+            "61": 852351488.0,
+            "62": 852351488.0,
+            "63": 852351488.0,
+            "64": 852351488.0,
+            "65": 852351488.0,
+            "66": 852351488.0,
+            "67": 852351488.0,
+            "68": 852351488.0,
+            "69": 852351488.0,
+            "70": 852351488.0,
+            "71": 852351488.0,
+            "72": 852351488.0,
+            "73": 852351488.0,
+            "74": 852351488.0,
+            "75": 852351488.0,
+            "76": 852351488.0,
+            "77": 852351488.0,
+            "78": 852351488.0,
+            "79": 852351488.0,
+            "80": 852351488.0,
+            "81": 852351488.0,
+            "82": 852351488.0,
+            "83": 852351488.0,
+            "84": 852351488.0,
+            "85": 852351488.0,
+            "86": 852351488.0,
+            "87": 852351488.0,
+            "88": 852351488.0,
+            "89": 852351488.0,
+            "90": 852351488.0,
+            "91": 852351488.0,
+            "92": 852351488.0,
+            "93": 852351488.0,
+            "94": 852351488.0,
+            "95": 852351488.0,
+            "96": 852351488.0,
+            "97": 852351488.0,
+            "98": 852351488.0,
+            "99": 852351488.0,
+            "100": 852351488.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2393217536.0,
+            "2": 2393218048.0,
+            "3": 2393218048.0,
+            "4": 2393218048.0,
+            "5": 2393218048.0,
+            "6": 2393218048.0,
+            "7": 2393218048.0,
+            "8": 2393218048.0,
+            "9": 2393218048.0,
+            "10": 2393218048.0,
+            "11": 2393218048.0,
+            "12": 2393218048.0,
+            "13": 2393218048.0,
+            "14": 2393218048.0,
+            "15": 2393218048.0,
+            "16": 2393218048.0,
+            "17": 2393218048.0,
+            "18": 2675191296.0,
+            "19": 2675191296.0,
+            "20": 2675191296.0,
+            "21": 2675191296.0,
+            "22": 2675191296.0,
+            "23": 2675191296.0,
+            "24": 2675191296.0,
+            "25": 2675191296.0,
+            "26": 2675191296.0,
+            "27": 2675191296.0,
+            "28": 2675191296.0,
+            "29": 2675191296.0,
+            "30": 2675191296.0,
+            "31": 2675191296.0,
+            "32": 2675191296.0,
+            "33": 2675191296.0,
+            "34": 2675191296.0,
+            "35": 2675191296.0,
+            "36": 2675191296.0,
+            "37": 2675191296.0,
+            "38": 2675191296.0,
+            "39": 2675191296.0,
+            "40": 2675191296.0,
+            "41": 2675191296.0,
+            "42": 2675191296.0,
+            "43": 2675191296.0,
+            "44": 2675191296.0,
+            "45": 2675191296.0,
+            "46": 2675191296.0,
+            "47": 2675191296.0,
+            "48": 2675191296.0,
+            "49": 2675191296.0,
+            "50": 2675191296.0,
+            "51": 2675191296.0,
+            "52": 2675191296.0,
+            "53": 2675191296.0,
+            "54": 2675191296.0,
+            "55": 2675191296.0,
+            "56": 2675191296.0,
+            "57": 2675191296.0,
+            "58": 2675191296.0,
+            "59": 2675191296.0,
+            "60": 2675191296.0,
+            "61": 2675191296.0,
+            "62": 2675191296.0,
+            "63": 2675191296.0,
+            "64": 2675191296.0,
+            "65": 2675191296.0,
+            "66": 2675191296.0,
+            "67": 2675191296.0,
+            "68": 2675191296.0,
+            "69": 2675191296.0,
+            "70": 2675191296.0,
+            "71": 2675191296.0,
+            "72": 2675191296.0,
+            "73": 2675191296.0,
+            "74": 2675191296.0,
+            "75": 2675191296.0,
+            "76": 2675191296.0,
+            "77": 2675191296.0,
+            "78": 2675191296.0,
+            "79": 2675191296.0,
+            "80": 2675191296.0,
+            "81": 2675191296.0,
+            "82": 2675191296.0,
+            "83": 2675191296.0,
+            "84": 2675191296.0,
+            "85": 2675191296.0,
+            "86": 2675191296.0,
+            "87": 2675191296.0,
+            "88": 2675191296.0,
+            "89": 2675191296.0,
+            "90": 2675191296.0,
+            "91": 2675191296.0,
+            "92": 2675191296.0,
+            "93": 2675191296.0,
+            "94": 2675191296.0,
+            "95": 2675191296.0,
+            "96": 2675191296.0,
+            "97": 2675191296.0,
+            "98": 2675191296.0,
+            "99": 2675191296.0,
+            "100": 2675191296.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 19.89272,
+            "2": 0.22107,
+            "3": 0.18275,
+            "4": 0.18107,
+            "5": 0.17886,
+            "6": 0.18018,
+            "7": 0.17948,
+            "8": 0.18069,
+            "9": 0.17962,
+            "10": 0.17963,
+            "11": 0.17947,
+            "12": 0.17823,
+            "13": 0.17865,
+            "14": 0.17837,
+            "15": 0.17763,
+            "16": 0.1799,
+            "17": 0.22816,
+            "18": 0.19169,
+            "19": 0.18609,
+            "20": 0.18543,
+            "21": 0.18512,
+            "22": 0.1854,
+            "23": 0.18528,
+            "24": 0.18513,
+            "25": 0.18379,
+            "26": 0.18616,
+            "27": 0.18415,
+            "28": 0.18391,
+            "29": 0.18338,
+            "30": 0.18284,
+            "31": 0.18419,
+            "32": 0.18271,
+            "33": 0.18342,
+            "34": 0.18309,
+            "35": 0.18499,
+            "36": 0.18314,
+            "37": 0.18313,
+            "38": 0.18318,
+            "39": 0.18257,
+            "40": 0.18362,
+            "41": 0.18408,
+            "42": 0.18593,
+            "43": 0.18429,
+            "44": 0.18306,
+            "45": 0.18258,
+            "46": 0.18357,
+            "47": 0.18345,
+            "48": 0.18361,
+            "49": 0.18333,
+            "50": 0.18415,
+            "51": 0.19311,
+            "52": 0.18608,
+            "53": 0.18549,
+            "54": 0.18334,
+            "55": 0.38073,
+            "56": 0.18342,
+            "57": 0.18432,
+            "58": 0.18626,
+            "59": 0.18513,
+            "60": 0.18344,
+            "61": 0.18248,
+            "62": 0.18332,
+            "63": 0.18441,
+            "64": 0.18566,
+            "65": 0.18351,
+            "66": 0.1834,
+            "67": 0.18454,
+            "68": 0.18312,
+            "69": 0.18334,
+            "70": 0.18273,
+            "71": 0.18529,
+            "72": 0.18793,
+            "73": 0.18357,
+            "74": 0.18295,
+            "75": 0.18311,
+            "76": 0.18315,
+            "77": 0.18309,
+            "78": 0.1831,
+            "79": 0.18331,
+            "80": 0.18243,
+            "81": 0.1841,
+            "82": 0.18426,
+            "83": 0.18296,
+            "84": 0.18393,
+            "85": 0.18305,
+            "86": 0.18319,
+            "87": 0.18267,
+            "88": 0.18256,
+            "89": 0.18287,
+            "90": 0.18205,
+            "91": 0.18594,
+            "92": 0.18287,
+            "93": 0.18383,
+            "94": 0.18383,
+            "95": 0.183,
+            "96": 0.18259,
+            "97": 0.18302,
+            "98": 0.18382,
+            "99": 0.18264,
+            "100": 0.18713
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": 2468.0,
+            "18": 2395.0,
+            "19": 3001.0,
+            "20": 1788.0,
+            "21": 2162.0,
+            "22": 2852.0,
+            "23": 2510.0,
+            "24": 2287.0,
+            "25": 2300.0,
+            "26": 2108.0,
+            "27": 2155.0,
+            "28": 2751.0,
+            "29": 2604.0,
+            "30": 2419.0,
+            "31": 1842.0,
+            "32": 2598.0,
+            "33": 2277.0,
+            "34": 1897.0,
+            "35": 2097.0,
+            "36": 2176.0,
+            "37": 2715.0,
+            "38": 2423.0,
+            "39": 3095.0,
+            "40": 2126.0,
+            "41": 3441.0,
+            "42": 2505.0,
+            "43": 2679.0,
+            "44": 2086.0,
+            "45": 2520.0,
+            "46": 2259.0,
+            "47": 3003.0,
+            "48": 2604.0,
+            "49": 1956.0,
+            "50": 2929.0,
+            "51": 2283.0,
+            "52": 2458.0,
+            "53": 3770.0,
+            "54": 2965.0,
+            "55": 2457.0,
+            "56": 2411.0,
+            "57": 2342.0,
+            "58": 3450.0,
+            "59": 2845.0,
+            "60": 2961.0,
+            "61": 2897.0,
+            "62": 3092.0,
+            "63": 3200.0,
+            "64": 3129.0,
+            "65": 2359.0,
+            "66": 3857.0,
+            "67": 2591.0,
+            "68": 3272.0,
+            "69": 2823.0,
+            "70": 3633.0,
+            "71": 3058.0,
+            "72": 2755.0,
+            "73": 3353.0,
+            "74": 2201.0,
+            "75": 2932.0,
+            "76": 3649.0,
+            "77": 4022.0,
+            "78": 3953.0,
+            "79": 4091.0,
+            "80": 3595.0,
+            "81": 5179.0,
+            "82": 3499.0,
+            "83": 3262.0,
+            "84": 3902.0,
+            "85": 3959.0,
+            "86": 3288.0,
+            "87": 4032.0,
+            "88": 3628.0,
+            "89": 4405.0,
+            "90": 3785.0,
+            "91": 2856.0,
+            "92": 4187.0,
+            "93": 3564.0,
+            "94": 4347.0,
+            "95": 4072.0,
+            "96": 3833.0,
+            "97": 4121.0,
+            "98": 4897.0,
+            "99": 4120.0,
+            "100": 3581.0
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_resume_torch_dist/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_resume_torch_dist/golden_values_lts_dgxa100_dracooci.json
new file mode 100644
index 00000000000..77c8aa6317e
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_resume_torch_dist/golden_values_lts_dgxa100_dracooci.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.79219,
+            "2": 10.80294,
+            "3": 10.80725,
+            "4": 10.77342,
+            "5": 10.84727,
+            "6": 10.8682,
+            "7": 10.8278,
+            "8": 10.81626,
+            "9": 10.83861,
+            "10": 10.77729,
+            "11": 10.90005,
+            "12": 10.85954,
+            "13": 10.87494,
+            "14": 10.8953,
+            "15": 10.84106,
+            "16": 10.83779,
+            "17": 10.82436,
+            "18": 10.85906,
+            "19": 10.86597,
+            "20": 10.82889,
+            "21": 10.83382,
+            "22": 10.75171,
+            "23": 10.8822,
+            "24": 10.78198,
+            "25": 10.7666,
+            "26": 10.79421,
+            "27": 10.79973,
+            "28": 10.81809,
+            "29": 10.81973,
+            "30": 10.69961,
+            "31": 10.55541,
+            "32": 10.75748,
+            "33": 10.7417,
+            "34": 10.59849,
+            "35": 10.61845,
+            "36": 10.56439,
+            "37": 10.63758,
+            "38": 10.53033,
+            "39": 10.65378,
+            "40": 10.44051,
+            "41": 10.49785,
+            "42": 10.50842,
+            "43": 10.22237,
+            "44": 10.30681,
+            "45": 10.20859,
+            "46": 10.20077,
+            "47": 10.41716,
+            "48": 10.18042,
+            "49": 9.94398,
+            "50": 10.21168,
+            "51": 10.16603,
+            "52": 10.06842,
+            "53": 10.30736,
+            "54": 10.20998,
+            "55": 10.15675,
+            "56": 9.91528,
+            "57": 9.77636,
+            "58": 10.07274,
+            "59": 9.86327,
+            "60": 9.77265,
+            "61": 9.92815,
+            "62": 10.17249,
+            "63": 9.62223,
+            "64": 9.97162,
+            "65": 9.22128,
+            "66": 9.88606,
+            "67": 9.5836,
+            "68": 9.95061,
+            "69": 9.95306,
+            "70": 9.89371,
+            "71": 9.77681,
+            "72": 9.75545,
+            "73": 9.64983,
+            "74": 9.14359,
+            "75": 9.56098,
+            "76": 9.25119,
+            "77": 10.16981,
+            "78": 9.854,
+            "79": 9.49956,
+            "80": 9.5311,
+            "81": 9.60482,
+            "82": 9.80129,
+            "83": 9.43763,
+            "84": 9.51982,
+            "85": 9.71911,
+            "86": 9.18564,
+            "87": 9.68731,
+            "88": 9.84403,
+            "89": 9.7063,
+            "90": 9.89983,
+            "91": 9.45059,
+            "92": 9.45364,
+            "93": 9.18519,
+            "94": 8.92953,
+            "95": 9.59785,
+            "96": 9.61472,
+            "97": 9.39069,
+            "98": 9.75698,
+            "99": 8.9803,
+            "100": 9.49009
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 570640384.0,
+            "2": 570640384.0,
+            "3": 570640384.0,
+            "4": 570640384.0,
+            "5": 570640384.0,
+            "6": 570640384.0,
+            "7": 570640384.0,
+            "8": 570640384.0,
+            "9": 570640384.0,
+            "10": 570640384.0,
+            "11": 570640384.0,
+            "12": 570640384.0,
+            "13": 570640384.0,
+            "14": 570640384.0,
+            "15": 570640384.0,
+            "16": 570640384.0,
+            "17": 852351488.0,
+            "18": 852351488.0,
+            "19": 852351488.0,
+            "20": 852351488.0,
+            "21": 852351488.0,
+            "22": 852351488.0,
+            "23": 852351488.0,
+            "24": 852351488.0,
+            "25": 852351488.0,
+            "26": 852351488.0,
+            "27": 852351488.0,
+            "28": 852351488.0,
+            "29": 852351488.0,
+            "30": 852351488.0,
+            "31": 852351488.0,
+            "32": 852351488.0,
+            "33": 852351488.0,
+            "34": 852351488.0,
+            "35": 852351488.0,
+            "36": 852351488.0,
+            "37": 852351488.0,
+            "38": 852351488.0,
+            "39": 852351488.0,
+            "40": 852351488.0,
+            "41": 852351488.0,
+            "42": 852351488.0,
+            "43": 852351488.0,
+            "44": 852351488.0,
+            "45": 852351488.0,
+            "46": 852351488.0,
+            "47": 852351488.0,
+            "48": 852351488.0,
+            "49": 852351488.0,
+            "50": 852351488.0,
+            "51": 852351488.0,
+            "52": 852351488.0,
+            "53": 852351488.0,
+            "54": 852351488.0,
+            "55": 852351488.0,
+            "56": 852351488.0,
+            "57": 852351488.0,
+            "58": 852351488.0,
+            "59": 852351488.0,
+            "60": 852351488.0,
+            "61": 852351488.0,
+            "62": 852351488.0,
+            "63": 852351488.0,
+            "64": 852351488.0,
+            "65": 852351488.0,
+            "66": 852351488.0,
+            "67": 852351488.0,
+            "68": 852351488.0,
+            "69": 852351488.0,
+            "70": 852351488.0,
+            "71": 852351488.0,
+            "72": 852351488.0,
+            "73": 852351488.0,
+            "74": 852351488.0,
+            "75": 852351488.0,
+            "76": 852351488.0,
+            "77": 852351488.0,
+            "78": 852351488.0,
+            "79": 852351488.0,
+            "80": 852351488.0,
+            "81": 852351488.0,
+            "82": 852351488.0,
+            "83": 852351488.0,
+            "84": 852351488.0,
+            "85": 852351488.0,
+            "86": 852351488.0,
+            "87": 852351488.0,
+            "88": 852351488.0,
+            "89": 852351488.0,
+            "90": 852351488.0,
+            "91": 852351488.0,
+            "92": 852351488.0,
+            "93": 852351488.0,
+            "94": 852351488.0,
+            "95": 852351488.0,
+            "96": 852351488.0,
+            "97": 852351488.0,
+            "98": 852351488.0,
+            "99": 852351488.0,
+            "100": 852351488.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2393217536.0,
+            "2": 2393218048.0,
+            "3": 2393218048.0,
+            "4": 2393218048.0,
+            "5": 2393218048.0,
+            "6": 2393218048.0,
+            "7": 2393218048.0,
+            "8": 2393218048.0,
+            "9": 2393218048.0,
+            "10": 2393218048.0,
+            "11": 2393218048.0,
+            "12": 2393218048.0,
+            "13": 2393218048.0,
+            "14": 2393218048.0,
+            "15": 2393218048.0,
+            "16": 2393218048.0,
+            "17": 2393218048.0,
+            "18": 2675191296.0,
+            "19": 2675191296.0,
+            "20": 2675191296.0,
+            "21": 2675191296.0,
+            "22": 2675191296.0,
+            "23": 2675191296.0,
+            "24": 2675191296.0,
+            "25": 2675191296.0,
+            "26": 2675191296.0,
+            "27": 2675191296.0,
+            "28": 2675191296.0,
+            "29": 2675191296.0,
+            "30": 2675191296.0,
+            "31": 2675191296.0,
+            "32": 2675191296.0,
+            "33": 2675191296.0,
+            "34": 2675191296.0,
+            "35": 2675191296.0,
+            "36": 2675191296.0,
+            "37": 2675191296.0,
+            "38": 2675191296.0,
+            "39": 2675191296.0,
+            "40": 2675191296.0,
+            "41": 2675191296.0,
+            "42": 2675191296.0,
+            "43": 2675191296.0,
+            "44": 2675191296.0,
+            "45": 2675191296.0,
+            "46": 2675191296.0,
+            "47": 2675191296.0,
+            "48": 2675191296.0,
+            "49": 2675191296.0,
+            "50": 2675191296.0,
+            "51": 2675191296.0,
+            "52": 2675191296.0,
+            "53": 2675191296.0,
+            "54": 2675191296.0,
+            "55": 2675191296.0,
+            "56": 2675191296.0,
+            "57": 2675191296.0,
+            "58": 2675191296.0,
+            "59": 2675191296.0,
+            "60": 2675191296.0,
+            "61": 2675191296.0,
+            "62": 2675191296.0,
+            "63": 2675191296.0,
+            "64": 2675191296.0,
+            "65": 2675191296.0,
+            "66": 2675191296.0,
+            "67": 2675191296.0,
+            "68": 2675191296.0,
+            "69": 2675191296.0,
+            "70": 2675191296.0,
+            "71": 2675191296.0,
+            "72": 2675191296.0,
+            "73": 2675191296.0,
+            "74": 2675191296.0,
+            "75": 2675191296.0,
+            "76": 2675191296.0,
+            "77": 2675191296.0,
+            "78": 2675191296.0,
+            "79": 2675191296.0,
+            "80": 2675191296.0,
+            "81": 2675191296.0,
+            "82": 2675191296.0,
+            "83": 2675191296.0,
+            "84": 2675191296.0,
+            "85": 2675191296.0,
+            "86": 2675191296.0,
+            "87": 2675191296.0,
+            "88": 2675191296.0,
+            "89": 2675191296.0,
+            "90": 2675191296.0,
+            "91": 2675191296.0,
+            "92": 2675191296.0,
+            "93": 2675191296.0,
+            "94": 2675191296.0,
+            "95": 2675191296.0,
+            "96": 2675191296.0,
+            "97": 2675191296.0,
+            "98": 2675191296.0,
+            "99": 2675191296.0,
+            "100": 2675191296.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 17.43358,
+            "2": 0.22524,
+            "3": 0.17789,
+            "4": 0.17624,
+            "5": 0.17537,
+            "6": 0.17509,
+            "7": 0.17504,
+            "8": 0.17597,
+            "9": 0.33529,
+            "10": 0.1733,
+            "11": 0.17189,
+            "12": 0.36273,
+            "13": 0.33105,
+            "14": 0.17358,
+            "15": 0.17041,
+            "16": 0.17127,
+            "17": 0.22308,
+            "18": 0.18489,
+            "19": 0.17575,
+            "20": 0.17774,
+            "21": 0.17576,
+            "22": 0.17856,
+            "23": 0.17708,
+            "24": 0.17716,
+            "25": 0.17653,
+            "26": 0.17714,
+            "27": 0.17666,
+            "28": 0.17607,
+            "29": 0.17677,
+            "30": 0.17713,
+            "31": 0.17662,
+            "32": 0.17475,
+            "33": 0.17536,
+            "34": 0.17541,
+            "35": 0.17373,
+            "36": 0.17425,
+            "37": 0.17642,
+            "38": 0.17354,
+            "39": 0.1728,
+            "40": 0.17398,
+            "41": 0.17325,
+            "42": 0.17407,
+            "43": 0.17446,
+            "44": 0.17406,
+            "45": 0.17259,
+            "46": 0.17351,
+            "47": 0.17206,
+            "48": 0.17349,
+            "49": 0.17325,
+            "50": 0.17301,
+            "51": 0.1847,
+            "52": 0.17696,
+            "53": 0.17664,
+            "54": 0.17578,
+            "55": 0.17469,
+            "56": 0.1747,
+            "57": 0.17669,
+            "58": 0.46947,
+            "59": 0.17866,
+            "60": 0.18128,
+            "61": 0.1841,
+            "62": 0.18126,
+            "63": 0.18539,
+            "64": 0.18121,
+            "65": 0.18392,
+            "66": 0.18089,
+            "67": 0.18156,
+            "68": 0.18143,
+            "69": 0.18341,
+            "70": 0.18174,
+            "71": 0.18035,
+            "72": 0.18154,
+            "73": 0.18372,
+            "74": 0.18315,
+            "75": 0.18495,
+            "76": 0.18114,
+            "77": 0.18247,
+            "78": 0.18539,
+            "79": 0.18003,
+            "80": 0.18064,
+            "81": 0.18357,
+            "82": 0.18141,
+            "83": 0.18237,
+            "84": 0.1825,
+            "85": 0.1832,
+            "86": 0.18311,
+            "87": 0.18223,
+            "88": 0.18193,
+            "89": 0.18393,
+            "90": 0.18315,
+            "91": 0.18376,
+            "92": 0.1829,
+            "93": 0.18319,
+            "94": 0.18381,
+            "95": 0.18373,
+            "96": 0.18292,
+            "97": 0.18321,
+            "98": 0.18299,
+            "99": 0.1838,
+            "100": 0.18438
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": 2468.0,
+            "18": 2395.0,
+            "19": 3001.0,
+            "20": 1788.0,
+            "21": 2162.0,
+            "22": 2852.0,
+            "23": 2510.0,
+            "24": 2287.0,
+            "25": 2300.0,
+            "26": 2108.0,
+            "27": 2155.0,
+            "28": 2751.0,
+            "29": 2604.0,
+            "30": 2419.0,
+            "31": 1842.0,
+            "32": 2598.0,
+            "33": 2277.0,
+            "34": 1897.0,
+            "35": 2097.0,
+            "36": 2176.0,
+            "37": 2715.0,
+            "38": 2423.0,
+            "39": 3095.0,
+            "40": 2126.0,
+            "41": 3441.0,
+            "42": 2505.0,
+            "43": 2679.0,
+            "44": 2086.0,
+            "45": 2520.0,
+            "46": 2259.0,
+            "47": 3003.0,
+            "48": 2604.0,
+            "49": 1956.0,
+            "50": 2929.0,
+            "51": 2283.0,
+            "52": 2458.0,
+            "53": 3770.0,
+            "54": 2965.0,
+            "55": 2457.0,
+            "56": 2411.0,
+            "57": 2342.0,
+            "58": 3450.0,
+            "59": 2845.0,
+            "60": 2961.0,
+            "61": 2897.0,
+            "62": 3092.0,
+            "63": 3200.0,
+            "64": 3129.0,
+            "65": 2359.0,
+            "66": 3857.0,
+            "67": 2591.0,
+            "68": 3272.0,
+            "69": 2823.0,
+            "70": 3633.0,
+            "71": 3058.0,
+            "72": 2755.0,
+            "73": 3353.0,
+            "74": 2201.0,
+            "75": 2932.0,
+            "76": 3649.0,
+            "77": 4022.0,
+            "78": 3953.0,
+            "79": 4091.0,
+            "80": 3595.0,
+            "81": 5179.0,
+            "82": 3499.0,
+            "83": 3262.0,
+            "84": 3902.0,
+            "85": 3959.0,
+            "86": 3288.0,
+            "87": 4032.0,
+            "88": 3628.0,
+            "89": 4405.0,
+            "90": 3785.0,
+            "91": 2856.0,
+            "92": 4187.0,
+            "93": 3564.0,
+            "94": 4347.0,
+            "95": 4072.0,
+            "96": 3833.0,
+            "97": 4121.0,
+            "98": 4897.0,
+            "99": 4120.0,
+            "100": 3581.0
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch/golden_values_dev_dgx_h100.json
index d7a8a24cd68..dabf1673e8e 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch/golden_values_dev_dgx_h100.json
@@ -2,140 +2,535 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 10.84277,
+            "2": 10.85562,
+            "3": 10.84568,
+            "4": 10.84364,
             "5": 10.85979,
+            "6": 10.86413,
+            "7": 10.85362,
+            "8": 10.85066,
+            "9": 10.8615,
             "10": 10.82586,
+            "11": 10.86811,
+            "12": 10.85685,
+            "13": 10.87827,
+            "14": 10.86894,
             "15": 10.85888,
+            "16": 10.8685,
+            "17": 10.85105,
+            "18": 10.85939,
+            "19": 10.85704,
             "20": 10.84526,
+            "21": 10.85808,
+            "22": 10.83215,
+            "23": 10.86717,
+            "24": 10.83773,
             "25": 10.82744,
+            "26": 10.83163,
+            "27": 10.83573,
+            "28": 10.82373,
+            "29": 10.81624,
             "30": 10.76486,
+            "31": 10.69044,
+            "32": 10.76257,
+            "33": 10.75455,
+            "34": 10.67733,
             "35": 10.66335,
+            "36": 10.63634,
+            "37": 10.66856,
+            "38": 10.5969,
+            "39": 10.67599,
             "40": 10.50898,
+            "41": 10.53945,
+            "42": 10.55263,
+            "43": 10.35003,
+            "44": 10.40418,
             "45": 10.32106,
+            "46": 10.27724,
+            "47": 10.45205,
+            "48": 10.28913,
+            "49": 10.05779,
             "50": 10.27777,
+            "51": 10.23471,
+            "52": 10.13764,
+            "53": 10.34797,
+            "54": 10.26738,
             "55": 10.20734,
+            "56": 9.99527,
+            "57": 9.89333,
+            "58": 10.13452,
+            "59": 9.92856,
             "60": 9.8551,
+            "61": 9.98264,
+            "62": 10.20686,
+            "63": 9.70842,
+            "64": 10.01687,
             "65": 9.30409,
+            "66": 9.93326,
+            "67": 9.62677,
+            "68": 9.98429,
+            "69": 9.9755,
             "70": 9.93956,
+            "71": 9.81005,
+            "72": 9.798,
+            "73": 9.68454,
+            "74": 9.19951,
             "75": 9.60518,
+            "76": 9.27791,
+            "77": 10.19437,
+            "78": 9.8671,
+            "79": 9.53341,
             "80": 9.56341,
+            "81": 9.63047,
+            "82": 9.82819,
+            "83": 9.46388,
+            "84": 9.53736,
             "85": 9.74561,
+            "86": 9.21332,
+            "87": 9.7014,
+            "88": 9.86621,
+            "89": 9.72242,
             "90": 9.92089,
+            "91": 9.47178,
+            "92": 9.46996,
+            "93": 9.20589,
+            "94": 8.94772,
             "95": 9.60815,
+            "96": 9.63635,
+            "97": 9.4138,
+            "98": 9.77274,
+            "99": 8.9958,
             "100": 9.50415
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 284527616.0,
+            "2": 284527616.0,
+            "3": 284527616.0,
+            "4": 284527616.0,
             "5": 284527616.0,
+            "6": 284527616.0,
+            "7": 284527616.0,
+            "8": 284527616.0,
+            "9": 284527616.0,
             "10": 284527616.0,
+            "11": 284527616.0,
+            "12": 284527616.0,
+            "13": 284527616.0,
+            "14": 284527616.0,
             "15": 284527616.0,
+            "16": 416513536.0,
+            "17": 416513536.0,
+            "18": 416513536.0,
+            "19": 416513536.0,
             "20": 416513536.0,
+            "21": 416513536.0,
+            "22": 416513536.0,
+            "23": 416513536.0,
+            "24": 416513536.0,
             "25": 416513536.0,
+            "26": 416513536.0,
+            "27": 416513536.0,
+            "28": 416513536.0,
+            "29": 416513536.0,
             "30": 416513536.0,
+            "31": 416513536.0,
+            "32": 416513536.0,
+            "33": 416513536.0,
+            "34": 416513536.0,
             "35": 416513536.0,
+            "36": 416513536.0,
+            "37": 416513536.0,
+            "38": 416513536.0,
+            "39": 416513536.0,
             "40": 416513536.0,
+            "41": 416513536.0,
+            "42": 416513536.0,
+            "43": 416513536.0,
+            "44": 416513536.0,
             "45": 416513536.0,
+            "46": 416513536.0,
+            "47": 416513536.0,
+            "48": 416513536.0,
+            "49": 416513536.0,
             "50": 416513536.0,
+            "51": 416513536.0,
+            "52": 416513536.0,
+            "53": 416513536.0,
+            "54": 416513536.0,
             "55": 416513536.0,
+            "56": 416513536.0,
+            "57": 416513536.0,
+            "58": 416513536.0,
+            "59": 416513536.0,
             "60": 416513536.0,
+            "61": 416513536.0,
+            "62": 416513536.0,
+            "63": 416513536.0,
+            "64": 416513536.0,
             "65": 416513536.0,
+            "66": 416513536.0,
+            "67": 416513536.0,
+            "68": 416513536.0,
+            "69": 416513536.0,
             "70": 416513536.0,
+            "71": 416513536.0,
+            "72": 416513536.0,
+            "73": 416513536.0,
+            "74": 416513536.0,
             "75": 416513536.0,
+            "76": 416513536.0,
+            "77": 416513536.0,
+            "78": 416513536.0,
+            "79": 416513536.0,
             "80": 416513536.0,
+            "81": 416513536.0,
+            "82": 416513536.0,
+            "83": 416513536.0,
+            "84": 416513536.0,
             "85": 416513536.0,
+            "86": 416513536.0,
+            "87": 416513536.0,
+            "88": 416513536.0,
+            "89": 416513536.0,
             "90": 416513536.0,
+            "91": 416513536.0,
+            "92": 416513536.0,
+            "93": 416513536.0,
+            "94": 416513536.0,
             "95": 416513536.0,
+            "96": 416513536.0,
+            "97": 416513536.0,
+            "98": 416513536.0,
+            "99": 416513536.0,
             "100": 416513536.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 1465368064.0,
+            "2": 1465368576.0,
+            "3": 1465368576.0,
+            "4": 1465368576.0,
             "5": 1465368576.0,
+            "6": 1465368576.0,
+            "7": 1465368576.0,
+            "8": 1465368576.0,
+            "9": 1465368576.0,
             "10": 1465368576.0,
+            "11": 1465368576.0,
+            "12": 1465368576.0,
+            "13": 1465368576.0,
+            "14": 1465368576.0,
             "15": 1465368576.0,
+            "16": 1465368576.0,
+            "17": 1597092352.0,
+            "18": 1597092352.0,
+            "19": 1597092352.0,
             "20": 1597092352.0,
+            "21": 1597092352.0,
+            "22": 1597092352.0,
+            "23": 1597092352.0,
+            "24": 1597092352.0,
             "25": 1597092352.0,
+            "26": 1597092352.0,
+            "27": 1597092352.0,
+            "28": 1597092352.0,
+            "29": 1597092352.0,
             "30": 1597092352.0,
+            "31": 1597092352.0,
+            "32": 1597092352.0,
+            "33": 1597092352.0,
+            "34": 1597092352.0,
             "35": 1597092352.0,
+            "36": 1597092352.0,
+            "37": 1597092352.0,
+            "38": 1597092352.0,
+            "39": 1597092352.0,
             "40": 1597092352.0,
+            "41": 1597092352.0,
+            "42": 1597092352.0,
+            "43": 1597092352.0,
+            "44": 1597092352.0,
             "45": 1597092352.0,
+            "46": 1597092352.0,
+            "47": 1597092352.0,
+            "48": 1597092352.0,
+            "49": 1597092352.0,
             "50": 1597092352.0,
+            "51": 1597092352.0,
+            "52": 1597092352.0,
+            "53": 1597092352.0,
+            "54": 1597092352.0,
             "55": 1597092352.0,
+            "56": 1597092352.0,
+            "57": 1597092352.0,
+            "58": 1597092352.0,
+            "59": 1597092352.0,
             "60": 1597092352.0,
+            "61": 1597092352.0,
+            "62": 1597092352.0,
+            "63": 1597092352.0,
+            "64": 1597092352.0,
             "65": 1597092352.0,
+            "66": 1597092352.0,
+            "67": 1597092352.0,
+            "68": 1597092352.0,
+            "69": 1597092352.0,
             "70": 1597092352.0,
+            "71": 1597092352.0,
+            "72": 1597092352.0,
+            "73": 1597092352.0,
+            "74": 1597092352.0,
             "75": 1597092352.0,
+            "76": 1597092352.0,
+            "77": 1597092352.0,
+            "78": 1597092352.0,
+            "79": 1597092352.0,
             "80": 1597092352.0,
+            "81": 1597092352.0,
+            "82": 1597092352.0,
+            "83": 1597092352.0,
+            "84": 1597092352.0,
             "85": 1597092352.0,
+            "86": 1597092352.0,
+            "87": 1597092352.0,
+            "88": 1597092352.0,
+            "89": 1597092352.0,
             "90": 1597092352.0,
+            "91": 1597092352.0,
+            "92": 1597092352.0,
+            "93": 1597092352.0,
+            "94": 1597092352.0,
             "95": 1597092352.0,
+            "96": 1597092352.0,
+            "97": 1597092352.0,
+            "98": 1597092352.0,
+            "99": 1597092352.0,
             "100": 1597092352.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 6.08145,
-            "5": 0.19699,
-            "10": 0.18649,
-            "15": 0.1857,
-            "20": 0.2021,
-            "25": 0.20057,
-            "30": 0.19804,
-            "35": 0.19848,
-            "40": 0.20241,
-            "45": 0.19796,
-            "50": 0.19684,
-            "55": 0.19872,
-            "60": 0.19694,
-            "65": 0.19755,
-            "70": 0.19889,
-            "75": 0.19755,
-            "80": 0.20241,
-            "85": 0.20082,
-            "90": 0.19963,
-            "95": 0.20089,
-            "100": 0.19724
+            "1": 6.78518,
+            "2": 0.23744,
+            "3": 0.21193,
+            "4": 0.21211,
+            "5": 0.21234,
+            "6": 0.21714,
+            "7": 0.21381,
+            "8": 0.21678,
+            "9": 0.21057,
+            "10": 0.21454,
+            "11": 0.21268,
+            "12": 0.21347,
+            "13": 0.209,
+            "14": 0.20717,
+            "15": 0.20674,
+            "16": 0.28167,
+            "17": 0.21476,
+            "18": 0.22185,
+            "19": 0.22342,
+            "20": 0.21927,
+            "21": 0.21844,
+            "22": 0.20869,
+            "23": 0.21636,
+            "24": 0.22148,
+            "25": 0.21904,
+            "26": 0.21751,
+            "27": 0.21967,
+            "28": 0.21863,
+            "29": 0.21626,
+            "30": 0.22036,
+            "31": 0.21954,
+            "32": 0.22158,
+            "33": 0.22026,
+            "34": 0.21931,
+            "35": 0.21953,
+            "36": 0.22128,
+            "37": 0.22086,
+            "38": 0.22232,
+            "39": 0.22188,
+            "40": 0.22409,
+            "41": 0.22246,
+            "42": 0.22597,
+            "43": 0.22399,
+            "44": 0.22475,
+            "45": 0.22278,
+            "46": 0.22509,
+            "47": 0.2265,
+            "48": 0.22645,
+            "49": 0.22526,
+            "50": 0.22341,
+            "51": 0.22545,
+            "52": 0.22535,
+            "53": 0.22576,
+            "54": 0.2245,
+            "55": 0.22609,
+            "56": 0.2228,
+            "57": 0.22559,
+            "58": 0.22342,
+            "59": 0.22459,
+            "60": 0.2267,
+            "61": 0.22697,
+            "62": 0.22521,
+            "63": 0.22584,
+            "64": 0.22709,
+            "65": 0.22302,
+            "66": 0.22625,
+            "67": 0.22446,
+            "68": 0.22406,
+            "69": 0.22377,
+            "70": 0.22903,
+            "71": 0.2251,
+            "72": 0.22663,
+            "73": 0.2167,
+            "74": 0.21951,
+            "75": 0.22056,
+            "76": 0.22119,
+            "77": 0.21831,
+            "78": 0.21638,
+            "79": 0.22219,
+            "80": 0.21903,
+            "81": 0.21864,
+            "82": 0.22289,
+            "83": 0.21759,
+            "84": 0.21896,
+            "85": 0.21769,
+            "86": 0.21796,
+            "87": 0.22137,
+            "88": 0.2181,
+            "89": 0.22173,
+            "90": 0.21854,
+            "91": 0.21692,
+            "92": 0.21712,
+            "93": 0.21996,
+            "94": 0.2158,
+            "95": 0.21804,
+            "96": 0.21776,
+            "97": 0.21778,
+            "98": 0.21975,
+            "99": 0.21815,
+            "100": 0.21699
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
             "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
             "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
             "15": "nan",
+            "16": 2365.0,
+            "17": "nan",
+            "18": 2331.0,
+            "19": 2912.0,
             "20": 1664.0,
+            "21": 2009.0,
+            "22": "nan",
+            "23": 2483.0,
+            "24": 2192.0,
             "25": 2290.0,
+            "26": 1916.0,
+            "27": 2020.0,
+            "28": 2503.0,
+            "29": 2379.0,
             "30": 2400.0,
+            "31": 1759.0,
+            "32": 2522.0,
+            "33": 2145.0,
+            "34": 1791.0,
             "35": 1777.0,
+            "36": 2100.0,
+            "37": 2396.0,
+            "38": 2040.0,
+            "39": 2983.0,
             "40": 1805.0,
+            "41": 3097.0,
+            "42": 2421.0,
+            "43": 2566.0,
+            "44": 1858.0,
             "45": 2371.0,
+            "46": 2140.0,
+            "47": 2603.0,
+            "48": 2358.0,
+            "49": 1739.0,
             "50": 2686.0,
+            "51": 2041.0,
+            "52": 2226.0,
+            "53": 3222.0,
+            "54": 2784.0,
             "55": 2290.0,
+            "56": 2428.0,
+            "57": 2146.0,
+            "58": 3048.0,
+            "59": 2504.0,
             "60": 2612.0,
+            "61": 2623.0,
+            "62": 3003.0,
+            "63": 2762.0,
+            "64": 2917.0,
             "65": 2104.0,
+            "66": 3550.0,
+            "67": 2433.0,
+            "68": 3146.0,
+            "69": 2877.0,
             "70": 3528.0,
+            "71": 2983.0,
+            "72": 2640.0,
+            "73": 3199.0,
+            "74": 2084.0,
             "75": 2809.0,
+            "76": 3599.0,
+            "77": 3667.0,
+            "78": 3680.0,
+            "79": 3972.0,
             "80": 3365.0,
+            "81": 5042.0,
+            "82": 3291.0,
+            "83": 3016.0,
+            "84": 3592.0,
             "85": 3792.0,
+            "86": 3192.0,
+            "87": 4219.0,
+            "88": 3376.0,
+            "89": 4110.0,
             "90": 3939.0,
+            "91": 2912.0,
+            "92": 4114.0,
+            "93": 3499.0,
+            "94": 4339.0,
             "95": 3829.0,
+            "96": 3875.0,
+            "97": 4100.0,
+            "98": 4889.0,
+            "99": 3771.0,
             "100": 3390.0
         }
     }
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..eaee6a60f26
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.84277,
+            "2": 10.85562,
+            "3": 10.84568,
+            "4": 10.84364,
+            "5": 10.85979,
+            "6": 10.86413,
+            "7": 10.85362,
+            "8": 10.85066,
+            "9": 10.8615,
+            "10": 10.82586,
+            "11": 10.86811,
+            "12": 10.85685,
+            "13": 10.87827,
+            "14": 10.86894,
+            "15": 10.85888,
+            "16": 10.8685,
+            "17": 10.85105,
+            "18": 10.85939,
+            "19": 10.85704,
+            "20": 10.84526,
+            "21": 10.85808,
+            "22": 10.83215,
+            "23": 10.86717,
+            "24": 10.83773,
+            "25": 10.82744,
+            "26": 10.83163,
+            "27": 10.83573,
+            "28": 10.82373,
+            "29": 10.81624,
+            "30": 10.76486,
+            "31": 10.69044,
+            "32": 10.76257,
+            "33": 10.75455,
+            "34": 10.67733,
+            "35": 10.66335,
+            "36": 10.63634,
+            "37": 10.66856,
+            "38": 10.5969,
+            "39": 10.67599,
+            "40": 10.50898,
+            "41": 10.53945,
+            "42": 10.55263,
+            "43": 10.35003,
+            "44": 10.40418,
+            "45": 10.32106,
+            "46": 10.27724,
+            "47": 10.45205,
+            "48": 10.28913,
+            "49": 10.05779,
+            "50": 10.27777,
+            "51": 10.23471,
+            "52": 10.13764,
+            "53": 10.34797,
+            "54": 10.26738,
+            "55": 10.20734,
+            "56": 9.99527,
+            "57": 9.89333,
+            "58": 10.13452,
+            "59": 9.92856,
+            "60": 9.8551,
+            "61": 9.98264,
+            "62": 10.20686,
+            "63": 9.70842,
+            "64": 10.01687,
+            "65": 9.30409,
+            "66": 9.93326,
+            "67": 9.62677,
+            "68": 9.98429,
+            "69": 9.9755,
+            "70": 9.93956,
+            "71": 9.81005,
+            "72": 9.798,
+            "73": 9.68454,
+            "74": 9.19951,
+            "75": 9.60518,
+            "76": 9.27791,
+            "77": 10.19437,
+            "78": 9.8671,
+            "79": 9.53341,
+            "80": 9.56341,
+            "81": 9.63047,
+            "82": 9.82819,
+            "83": 9.46388,
+            "84": 9.53736,
+            "85": 9.74561,
+            "86": 9.21332,
+            "87": 9.7014,
+            "88": 9.86621,
+            "89": 9.72242,
+            "90": 9.92089,
+            "91": 9.47178,
+            "92": 9.46996,
+            "93": 9.20589,
+            "94": 8.94772,
+            "95": 9.60815,
+            "96": 9.63635,
+            "97": 9.4138,
+            "98": 9.77274,
+            "99": 8.9958,
+            "100": 9.50415
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 284527616.0,
+            "2": 284527616.0,
+            "3": 284527616.0,
+            "4": 284527616.0,
+            "5": 284527616.0,
+            "6": 284527616.0,
+            "7": 284527616.0,
+            "8": 284527616.0,
+            "9": 284527616.0,
+            "10": 284527616.0,
+            "11": 284527616.0,
+            "12": 284527616.0,
+            "13": 284527616.0,
+            "14": 284527616.0,
+            "15": 284527616.0,
+            "16": 416513536.0,
+            "17": 416513536.0,
+            "18": 416513536.0,
+            "19": 416513536.0,
+            "20": 416513536.0,
+            "21": 416513536.0,
+            "22": 416513536.0,
+            "23": 416513536.0,
+            "24": 416513536.0,
+            "25": 416513536.0,
+            "26": 416513536.0,
+            "27": 416513536.0,
+            "28": 416513536.0,
+            "29": 416513536.0,
+            "30": 416513536.0,
+            "31": 416513536.0,
+            "32": 416513536.0,
+            "33": 416513536.0,
+            "34": 416513536.0,
+            "35": 416513536.0,
+            "36": 416513536.0,
+            "37": 416513536.0,
+            "38": 416513536.0,
+            "39": 416513536.0,
+            "40": 416513536.0,
+            "41": 416513536.0,
+            "42": 416513536.0,
+            "43": 416513536.0,
+            "44": 416513536.0,
+            "45": 416513536.0,
+            "46": 416513536.0,
+            "47": 416513536.0,
+            "48": 416513536.0,
+            "49": 416513536.0,
+            "50": 416513536.0,
+            "51": 416513536.0,
+            "52": 416513536.0,
+            "53": 416513536.0,
+            "54": 416513536.0,
+            "55": 416513536.0,
+            "56": 416513536.0,
+            "57": 416513536.0,
+            "58": 416513536.0,
+            "59": 416513536.0,
+            "60": 416513536.0,
+            "61": 416513536.0,
+            "62": 416513536.0,
+            "63": 416513536.0,
+            "64": 416513536.0,
+            "65": 416513536.0,
+            "66": 416513536.0,
+            "67": 416513536.0,
+            "68": 416513536.0,
+            "69": 416513536.0,
+            "70": 416513536.0,
+            "71": 416513536.0,
+            "72": 416513536.0,
+            "73": 416513536.0,
+            "74": 416513536.0,
+            "75": 416513536.0,
+            "76": 416513536.0,
+            "77": 416513536.0,
+            "78": 416513536.0,
+            "79": 416513536.0,
+            "80": 416513536.0,
+            "81": 416513536.0,
+            "82": 416513536.0,
+            "83": 416513536.0,
+            "84": 416513536.0,
+            "85": 416513536.0,
+            "86": 416513536.0,
+            "87": 416513536.0,
+            "88": 416513536.0,
+            "89": 416513536.0,
+            "90": 416513536.0,
+            "91": 416513536.0,
+            "92": 416513536.0,
+            "93": 416513536.0,
+            "94": 416513536.0,
+            "95": 416513536.0,
+            "96": 416513536.0,
+            "97": 416513536.0,
+            "98": 416513536.0,
+            "99": 416513536.0,
+            "100": 416513536.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1465368064.0,
+            "2": 1465368576.0,
+            "3": 1465368576.0,
+            "4": 1465368576.0,
+            "5": 1465368576.0,
+            "6": 1465368576.0,
+            "7": 1465368576.0,
+            "8": 1465368576.0,
+            "9": 1465368576.0,
+            "10": 1465368576.0,
+            "11": 1465368576.0,
+            "12": 1465368576.0,
+            "13": 1465368576.0,
+            "14": 1465368576.0,
+            "15": 1465368576.0,
+            "16": 1465368576.0,
+            "17": 1597092352.0,
+            "18": 1597092352.0,
+            "19": 1597092352.0,
+            "20": 1597092352.0,
+            "21": 1597092352.0,
+            "22": 1597092352.0,
+            "23": 1597092352.0,
+            "24": 1597092352.0,
+            "25": 1597092352.0,
+            "26": 1597092352.0,
+            "27": 1597092352.0,
+            "28": 1597092352.0,
+            "29": 1597092352.0,
+            "30": 1597092352.0,
+            "31": 1597092352.0,
+            "32": 1597092352.0,
+            "33": 1597092352.0,
+            "34": 1597092352.0,
+            "35": 1597092352.0,
+            "36": 1597092352.0,
+            "37": 1597092352.0,
+            "38": 1597092352.0,
+            "39": 1597092352.0,
+            "40": 1597092352.0,
+            "41": 1597092352.0,
+            "42": 1597092352.0,
+            "43": 1597092352.0,
+            "44": 1597092352.0,
+            "45": 1597092352.0,
+            "46": 1597092352.0,
+            "47": 1597092352.0,
+            "48": 1597092352.0,
+            "49": 1597092352.0,
+            "50": 1597092352.0,
+            "51": 1597092352.0,
+            "52": 1597092352.0,
+            "53": 1597092352.0,
+            "54": 1597092352.0,
+            "55": 1597092352.0,
+            "56": 1597092352.0,
+            "57": 1597092352.0,
+            "58": 1597092352.0,
+            "59": 1597092352.0,
+            "60": 1597092352.0,
+            "61": 1597092352.0,
+            "62": 1597092352.0,
+            "63": 1597092352.0,
+            "64": 1597092352.0,
+            "65": 1597092352.0,
+            "66": 1597092352.0,
+            "67": 1597092352.0,
+            "68": 1597092352.0,
+            "69": 1597092352.0,
+            "70": 1597092352.0,
+            "71": 1597092352.0,
+            "72": 1597092352.0,
+            "73": 1597092352.0,
+            "74": 1597092352.0,
+            "75": 1597092352.0,
+            "76": 1597092352.0,
+            "77": 1597092352.0,
+            "78": 1597092352.0,
+            "79": 1597092352.0,
+            "80": 1597092352.0,
+            "81": 1597092352.0,
+            "82": 1597092352.0,
+            "83": 1597092352.0,
+            "84": 1597092352.0,
+            "85": 1597092352.0,
+            "86": 1597092352.0,
+            "87": 1597092352.0,
+            "88": 1597092352.0,
+            "89": 1597092352.0,
+            "90": 1597092352.0,
+            "91": 1597092352.0,
+            "92": 1597092352.0,
+            "93": 1597092352.0,
+            "94": 1597092352.0,
+            "95": 1597092352.0,
+            "96": 1597092352.0,
+            "97": 1597092352.0,
+            "98": 1597092352.0,
+            "99": 1597092352.0,
+            "100": 1597092352.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 7.37179,
+            "2": 0.21537,
+            "3": 0.18911,
+            "4": 0.18458,
+            "5": 0.18487,
+            "6": 0.18754,
+            "7": 0.18665,
+            "8": 0.1878,
+            "9": 0.18553,
+            "10": 0.1849,
+            "11": 0.18796,
+            "12": 0.18834,
+            "13": 0.19005,
+            "14": 0.18356,
+            "15": 0.18558,
+            "16": 0.27381,
+            "17": 0.18936,
+            "18": 0.19528,
+            "19": 0.19364,
+            "20": 0.1953,
+            "21": 0.19158,
+            "22": 0.18527,
+            "23": 0.1891,
+            "24": 0.19114,
+            "25": 0.19216,
+            "26": 0.19001,
+            "27": 0.19218,
+            "28": 0.19054,
+            "29": 0.19151,
+            "30": 0.19191,
+            "31": 0.19643,
+            "32": 0.19421,
+            "33": 0.19414,
+            "34": 0.19615,
+            "35": 0.19402,
+            "36": 0.19651,
+            "37": 0.19212,
+            "38": 0.19469,
+            "39": 0.19904,
+            "40": 0.19924,
+            "41": 0.19587,
+            "42": 0.21217,
+            "43": 0.21187,
+            "44": 0.19529,
+            "45": 0.20033,
+            "46": 0.20271,
+            "47": 0.19543,
+            "48": 0.20218,
+            "49": 0.20489,
+            "50": 0.19921,
+            "51": 0.2115,
+            "52": 0.20718,
+            "53": 0.19391,
+            "54": 0.19638,
+            "55": 0.19472,
+            "56": 0.19481,
+            "57": 0.19264,
+            "58": 0.19802,
+            "59": 0.19862,
+            "60": 0.19826,
+            "61": 0.19634,
+            "62": 0.19752,
+            "63": 0.19602,
+            "64": 0.19649,
+            "65": 0.19524,
+            "66": 0.19483,
+            "67": 0.19471,
+            "68": 0.19619,
+            "69": 0.19456,
+            "70": 0.1972,
+            "71": 0.19562,
+            "72": 0.1963,
+            "73": 0.19559,
+            "74": 0.1958,
+            "75": 0.2007,
+            "76": 0.19838,
+            "77": 0.1931,
+            "78": 0.19809,
+            "79": 0.19589,
+            "80": 0.19799,
+            "81": 0.19659,
+            "82": 0.19661,
+            "83": 0.20092,
+            "84": 0.19558,
+            "85": 0.19886,
+            "86": 0.20355,
+            "87": 0.19808,
+            "88": 0.19948,
+            "89": 0.19521,
+            "90": 0.19741,
+            "91": 0.19953,
+            "92": 0.19688,
+            "93": 0.19645,
+            "94": 0.19575,
+            "95": 0.19574,
+            "96": 0.19609,
+            "97": 0.19745,
+            "98": 0.19491,
+            "99": 0.19618,
+            "100": 0.19576
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": 2365.0,
+            "17": "nan",
+            "18": 2331.0,
+            "19": 2912.0,
+            "20": 1664.0,
+            "21": 2009.0,
+            "22": "nan",
+            "23": 2483.0,
+            "24": 2192.0,
+            "25": 2290.0,
+            "26": 1916.0,
+            "27": 2020.0,
+            "28": 2503.0,
+            "29": 2379.0,
+            "30": 2400.0,
+            "31": 1759.0,
+            "32": 2522.0,
+            "33": 2145.0,
+            "34": 1791.0,
+            "35": 1777.0,
+            "36": 2100.0,
+            "37": 2396.0,
+            "38": 2040.0,
+            "39": 2983.0,
+            "40": 1805.0,
+            "41": 3097.0,
+            "42": 2421.0,
+            "43": 2566.0,
+            "44": 1858.0,
+            "45": 2371.0,
+            "46": 2140.0,
+            "47": 2603.0,
+            "48": 2358.0,
+            "49": 1739.0,
+            "50": 2686.0,
+            "51": 2041.0,
+            "52": 2226.0,
+            "53": 3222.0,
+            "54": 2784.0,
+            "55": 2290.0,
+            "56": 2428.0,
+            "57": 2146.0,
+            "58": 3048.0,
+            "59": 2504.0,
+            "60": 2612.0,
+            "61": 2623.0,
+            "62": 3003.0,
+            "63": 2762.0,
+            "64": 2917.0,
+            "65": 2104.0,
+            "66": 3550.0,
+            "67": 2433.0,
+            "68": 3146.0,
+            "69": 2877.0,
+            "70": 3528.0,
+            "71": 2983.0,
+            "72": 2640.0,
+            "73": 3199.0,
+            "74": 2084.0,
+            "75": 2809.0,
+            "76": 3599.0,
+            "77": 3667.0,
+            "78": 3680.0,
+            "79": 3972.0,
+            "80": 3365.0,
+            "81": 5042.0,
+            "82": 3291.0,
+            "83": 3016.0,
+            "84": 3592.0,
+            "85": 3792.0,
+            "86": 3192.0,
+            "87": 4219.0,
+            "88": 3376.0,
+            "89": 4110.0,
+            "90": 3939.0,
+            "91": 2912.0,
+            "92": 4114.0,
+            "93": 3499.0,
+            "94": 4339.0,
+            "95": 3829.0,
+            "96": 3875.0,
+            "97": 4100.0,
+            "98": 4889.0,
+            "99": 3771.0,
+            "100": 3390.0
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..47fa63fad72
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.84277,
+            "2": 10.85562,
+            "3": 10.84568,
+            "4": 10.84364,
+            "5": 10.85979,
+            "6": 10.86413,
+            "7": 10.85362,
+            "8": 10.85066,
+            "9": 10.8615,
+            "10": 10.82586,
+            "11": 10.86811,
+            "12": 10.85685,
+            "13": 10.87827,
+            "14": 10.86894,
+            "15": 10.85888,
+            "16": 10.8685,
+            "17": 10.85105,
+            "18": 10.85939,
+            "19": 10.85704,
+            "20": 10.84526,
+            "21": 10.85808,
+            "22": 10.83215,
+            "23": 10.86717,
+            "24": 10.83773,
+            "25": 10.82744,
+            "26": 10.83163,
+            "27": 10.83573,
+            "28": 10.82373,
+            "29": 10.81624,
+            "30": 10.76486,
+            "31": 10.69044,
+            "32": 10.76257,
+            "33": 10.75455,
+            "34": 10.67733,
+            "35": 10.66335,
+            "36": 10.63634,
+            "37": 10.66856,
+            "38": 10.5969,
+            "39": 10.67599,
+            "40": 10.50898,
+            "41": 10.53945,
+            "42": 10.55263,
+            "43": 10.35003,
+            "44": 10.40418,
+            "45": 10.32106,
+            "46": 10.27724,
+            "47": 10.45205,
+            "48": 10.28913,
+            "49": 10.05779,
+            "50": 10.27777,
+            "51": 10.23471,
+            "52": 10.13764,
+            "53": 10.34797,
+            "54": 10.26738,
+            "55": 10.20734,
+            "56": 9.99527,
+            "57": 9.89333,
+            "58": 10.13452,
+            "59": 9.92856,
+            "60": 9.8551,
+            "61": 9.98264,
+            "62": 10.20686,
+            "63": 9.70842,
+            "64": 10.01687,
+            "65": 9.30409,
+            "66": 9.93326,
+            "67": 9.62677,
+            "68": 9.98429,
+            "69": 9.9755,
+            "70": 9.93956,
+            "71": 9.81005,
+            "72": 9.798,
+            "73": 9.68454,
+            "74": 9.19951,
+            "75": 9.60518,
+            "76": 9.27791,
+            "77": 10.19437,
+            "78": 9.8671,
+            "79": 9.53341,
+            "80": 9.56341,
+            "81": 9.63047,
+            "82": 9.82819,
+            "83": 9.46388,
+            "84": 9.53736,
+            "85": 9.74561,
+            "86": 9.21332,
+            "87": 9.7014,
+            "88": 9.86621,
+            "89": 9.72242,
+            "90": 9.92089,
+            "91": 9.47178,
+            "92": 9.46996,
+            "93": 9.20589,
+            "94": 8.94772,
+            "95": 9.60815,
+            "96": 9.63635,
+            "97": 9.4138,
+            "98": 9.77274,
+            "99": 8.9958,
+            "100": 9.50415
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 284527616.0,
+            "2": 284527616.0,
+            "3": 284527616.0,
+            "4": 284527616.0,
+            "5": 284527616.0,
+            "6": 284527616.0,
+            "7": 284527616.0,
+            "8": 284527616.0,
+            "9": 284527616.0,
+            "10": 284527616.0,
+            "11": 284527616.0,
+            "12": 284527616.0,
+            "13": 284527616.0,
+            "14": 284527616.0,
+            "15": 284527616.0,
+            "16": 416513536.0,
+            "17": 416513536.0,
+            "18": 416513536.0,
+            "19": 416513536.0,
+            "20": 416513536.0,
+            "21": 416513536.0,
+            "22": 416513536.0,
+            "23": 416513536.0,
+            "24": 416513536.0,
+            "25": 416513536.0,
+            "26": 416513536.0,
+            "27": 416513536.0,
+            "28": 416513536.0,
+            "29": 416513536.0,
+            "30": 416513536.0,
+            "31": 416513536.0,
+            "32": 416513536.0,
+            "33": 416513536.0,
+            "34": 416513536.0,
+            "35": 416513536.0,
+            "36": 416513536.0,
+            "37": 416513536.0,
+            "38": 416513536.0,
+            "39": 416513536.0,
+            "40": 416513536.0,
+            "41": 416513536.0,
+            "42": 416513536.0,
+            "43": 416513536.0,
+            "44": 416513536.0,
+            "45": 416513536.0,
+            "46": 416513536.0,
+            "47": 416513536.0,
+            "48": 416513536.0,
+            "49": 416513536.0,
+            "50": 416513536.0,
+            "51": 416513536.0,
+            "52": 416513536.0,
+            "53": 416513536.0,
+            "54": 416513536.0,
+            "55": 416513536.0,
+            "56": 416513536.0,
+            "57": 416513536.0,
+            "58": 416513536.0,
+            "59": 416513536.0,
+            "60": 416513536.0,
+            "61": 416513536.0,
+            "62": 416513536.0,
+            "63": 416513536.0,
+            "64": 416513536.0,
+            "65": 416513536.0,
+            "66": 416513536.0,
+            "67": 416513536.0,
+            "68": 416513536.0,
+            "69": 416513536.0,
+            "70": 416513536.0,
+            "71": 416513536.0,
+            "72": 416513536.0,
+            "73": 416513536.0,
+            "74": 416513536.0,
+            "75": 416513536.0,
+            "76": 416513536.0,
+            "77": 416513536.0,
+            "78": 416513536.0,
+            "79": 416513536.0,
+            "80": 416513536.0,
+            "81": 416513536.0,
+            "82": 416513536.0,
+            "83": 416513536.0,
+            "84": 416513536.0,
+            "85": 416513536.0,
+            "86": 416513536.0,
+            "87": 416513536.0,
+            "88": 416513536.0,
+            "89": 416513536.0,
+            "90": 416513536.0,
+            "91": 416513536.0,
+            "92": 416513536.0,
+            "93": 416513536.0,
+            "94": 416513536.0,
+            "95": 416513536.0,
+            "96": 416513536.0,
+            "97": 416513536.0,
+            "98": 416513536.0,
+            "99": 416513536.0,
+            "100": 416513536.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1465368064.0,
+            "2": 1465368576.0,
+            "3": 1465368576.0,
+            "4": 1465368576.0,
+            "5": 1465368576.0,
+            "6": 1465368576.0,
+            "7": 1465368576.0,
+            "8": 1465368576.0,
+            "9": 1465368576.0,
+            "10": 1465368576.0,
+            "11": 1465368576.0,
+            "12": 1465368576.0,
+            "13": 1465368576.0,
+            "14": 1465368576.0,
+            "15": 1465368576.0,
+            "16": 1465368576.0,
+            "17": 1597092352.0,
+            "18": 1597092352.0,
+            "19": 1597092352.0,
+            "20": 1597092352.0,
+            "21": 1597092352.0,
+            "22": 1597092352.0,
+            "23": 1597092352.0,
+            "24": 1597092352.0,
+            "25": 1597092352.0,
+            "26": 1597092352.0,
+            "27": 1597092352.0,
+            "28": 1597092352.0,
+            "29": 1597092352.0,
+            "30": 1597092352.0,
+            "31": 1597092352.0,
+            "32": 1597092352.0,
+            "33": 1597092352.0,
+            "34": 1597092352.0,
+            "35": 1597092352.0,
+            "36": 1597092352.0,
+            "37": 1597092352.0,
+            "38": 1597092352.0,
+            "39": 1597092352.0,
+            "40": 1597092352.0,
+            "41": 1597092352.0,
+            "42": 1597092352.0,
+            "43": 1597092352.0,
+            "44": 1597092352.0,
+            "45": 1597092352.0,
+            "46": 1597092352.0,
+            "47": 1597092352.0,
+            "48": 1597092352.0,
+            "49": 1597092352.0,
+            "50": 1597092352.0,
+            "51": 1597092352.0,
+            "52": 1597092352.0,
+            "53": 1597092352.0,
+            "54": 1597092352.0,
+            "55": 1597092352.0,
+            "56": 1597092352.0,
+            "57": 1597092352.0,
+            "58": 1597092352.0,
+            "59": 1597092352.0,
+            "60": 1597092352.0,
+            "61": 1597092352.0,
+            "62": 1597092352.0,
+            "63": 1597092352.0,
+            "64": 1597092352.0,
+            "65": 1597092352.0,
+            "66": 1597092352.0,
+            "67": 1597092352.0,
+            "68": 1597092352.0,
+            "69": 1597092352.0,
+            "70": 1597092352.0,
+            "71": 1597092352.0,
+            "72": 1597092352.0,
+            "73": 1597092352.0,
+            "74": 1597092352.0,
+            "75": 1597092352.0,
+            "76": 1597092352.0,
+            "77": 1597092352.0,
+            "78": 1597092352.0,
+            "79": 1597092352.0,
+            "80": 1597092352.0,
+            "81": 1597092352.0,
+            "82": 1597092352.0,
+            "83": 1597092352.0,
+            "84": 1597092352.0,
+            "85": 1597092352.0,
+            "86": 1597092352.0,
+            "87": 1597092352.0,
+            "88": 1597092352.0,
+            "89": 1597092352.0,
+            "90": 1597092352.0,
+            "91": 1597092352.0,
+            "92": 1597092352.0,
+            "93": 1597092352.0,
+            "94": 1597092352.0,
+            "95": 1597092352.0,
+            "96": 1597092352.0,
+            "97": 1597092352.0,
+            "98": 1597092352.0,
+            "99": 1597092352.0,
+            "100": 1597092352.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 7.02035,
+            "2": 0.23195,
+            "3": 0.20851,
+            "4": 0.20697,
+            "5": 0.20737,
+            "6": 0.20888,
+            "7": 0.2126,
+            "8": 0.21169,
+            "9": 0.21057,
+            "10": 0.21255,
+            "11": 0.21108,
+            "12": 0.21506,
+            "13": 0.21085,
+            "14": 0.21072,
+            "15": 0.20967,
+            "16": 0.28325,
+            "17": 0.21485,
+            "18": 0.21984,
+            "19": 0.22277,
+            "20": 0.22004,
+            "21": 0.2242,
+            "22": 0.21349,
+            "23": 0.22346,
+            "24": 0.22444,
+            "25": 0.22521,
+            "26": 0.22267,
+            "27": 0.22592,
+            "28": 0.22136,
+            "29": 0.22802,
+            "30": 0.2227,
+            "31": 0.22084,
+            "32": 0.22099,
+            "33": 0.22019,
+            "34": 0.22336,
+            "35": 0.23024,
+            "36": 0.23188,
+            "37": 0.21929,
+            "38": 0.22277,
+            "39": 0.22303,
+            "40": 0.22269,
+            "41": 0.22539,
+            "42": 0.22835,
+            "43": 0.22379,
+            "44": 0.22103,
+            "45": 0.21919,
+            "46": 0.22653,
+            "47": 0.21996,
+            "48": 0.22399,
+            "49": 0.22202,
+            "50": 0.22099,
+            "51": 0.21773,
+            "52": 0.22165,
+            "53": 0.2208,
+            "54": 0.22241,
+            "55": 0.22007,
+            "56": 0.22113,
+            "57": 0.22282,
+            "58": 0.22209,
+            "59": 0.22153,
+            "60": 0.22251,
+            "61": 0.22383,
+            "62": 0.22477,
+            "63": 0.22389,
+            "64": 0.22518,
+            "65": 0.22491,
+            "66": 0.22204,
+            "67": 0.23149,
+            "68": 0.22301,
+            "69": 0.2298,
+            "70": 0.23059,
+            "71": 0.22412,
+            "72": 0.21788,
+            "73": 0.2209,
+            "74": 0.22227,
+            "75": 0.22603,
+            "76": 0.22022,
+            "77": 0.22045,
+            "78": 0.22051,
+            "79": 0.22157,
+            "80": 0.22544,
+            "81": 0.22703,
+            "82": 0.23226,
+            "83": 0.23535,
+            "84": 0.22503,
+            "85": 0.21869,
+            "86": 0.21989,
+            "87": 0.21782,
+            "88": 0.22296,
+            "89": 0.24294,
+            "90": 0.27356,
+            "91": 0.2182,
+            "92": 0.22138,
+            "93": 0.21695,
+            "94": 0.22172,
+            "95": 0.21947,
+            "96": 0.21792,
+            "97": 0.22243,
+            "98": 0.21902,
+            "99": 0.2202,
+            "100": 0.22043
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": 2365.0,
+            "17": "nan",
+            "18": 2331.0,
+            "19": 2912.0,
+            "20": 1664.0,
+            "21": 2009.0,
+            "22": "nan",
+            "23": 2483.0,
+            "24": 2192.0,
+            "25": 2290.0,
+            "26": 1916.0,
+            "27": 2020.0,
+            "28": 2503.0,
+            "29": 2379.0,
+            "30": 2400.0,
+            "31": 1759.0,
+            "32": 2522.0,
+            "33": 2145.0,
+            "34": 1791.0,
+            "35": 1777.0,
+            "36": 2100.0,
+            "37": 2396.0,
+            "38": 2040.0,
+            "39": 2983.0,
+            "40": 1805.0,
+            "41": 3097.0,
+            "42": 2421.0,
+            "43": 2566.0,
+            "44": 1858.0,
+            "45": 2371.0,
+            "46": 2140.0,
+            "47": 2603.0,
+            "48": 2358.0,
+            "49": 1739.0,
+            "50": 2686.0,
+            "51": 2041.0,
+            "52": 2226.0,
+            "53": 3222.0,
+            "54": 2784.0,
+            "55": 2290.0,
+            "56": 2428.0,
+            "57": 2146.0,
+            "58": 3048.0,
+            "59": 2504.0,
+            "60": 2612.0,
+            "61": 2623.0,
+            "62": 3003.0,
+            "63": 2762.0,
+            "64": 2917.0,
+            "65": 2104.0,
+            "66": 3550.0,
+            "67": 2433.0,
+            "68": 3146.0,
+            "69": 2877.0,
+            "70": 3528.0,
+            "71": 2983.0,
+            "72": 2640.0,
+            "73": 3199.0,
+            "74": 2084.0,
+            "75": 2809.0,
+            "76": 3599.0,
+            "77": 3667.0,
+            "78": 3680.0,
+            "79": 3972.0,
+            "80": 3365.0,
+            "81": 5042.0,
+            "82": 3291.0,
+            "83": 3016.0,
+            "84": 3592.0,
+            "85": 3792.0,
+            "86": 3192.0,
+            "87": 4219.0,
+            "88": 3376.0,
+            "89": 4110.0,
+            "90": 3939.0,
+            "91": 2912.0,
+            "92": 4114.0,
+            "93": 3499.0,
+            "94": 4339.0,
+            "95": 3829.0,
+            "96": 3875.0,
+            "97": 4100.0,
+            "98": 4889.0,
+            "99": 3771.0,
+            "100": 3390.0
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch/golden_values_lts_dgxa100_dracooci-ord.json
new file mode 100644
index 00000000000..9f83699719d
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch/golden_values_lts_dgxa100_dracooci-ord.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.86104,
+            "2": 10.85751,
+            "3": 10.86157,
+            "4": 10.84944,
+            "5": 10.88371,
+            "6": 10.88763,
+            "7": 10.86427,
+            "8": 10.87317,
+            "9": 10.86952,
+            "10": 10.84263,
+            "11": 10.88626,
+            "12": 10.88784,
+            "13": 10.89496,
+            "14": 10.90319,
+            "15": 10.87935,
+            "16": 10.88588,
+            "17": 10.86428,
+            "18": 10.88923,
+            "19": 10.88151,
+            "20": 10.87405,
+            "21": 10.88996,
+            "22": 10.83151,
+            "23": 10.89289,
+            "24": 10.85821,
+            "25": 10.82867,
+            "26": 10.82729,
+            "27": 10.85428,
+            "28": 10.84631,
+            "29": 10.85408,
+            "30": 10.77191,
+            "31": 10.67404,
+            "32": 10.78923,
+            "33": 10.7757,
+            "34": 10.67639,
+            "35": 10.67622,
+            "36": 10.63402,
+            "37": 10.69312,
+            "38": 10.61026,
+            "39": 10.70232,
+            "40": 10.517,
+            "41": 10.54604,
+            "42": 10.57058,
+            "43": 10.32305,
+            "44": 10.39205,
+            "45": 10.28436,
+            "46": 10.27329,
+            "47": 10.4798,
+            "48": 10.25535,
+            "49": 10.01605,
+            "50": 10.27861,
+            "51": 10.21825,
+            "52": 10.1281,
+            "53": 10.35922,
+            "54": 10.25909,
+            "55": 10.20112,
+            "56": 9.9815,
+            "57": 9.84915,
+            "58": 10.12333,
+            "59": 9.90734,
+            "60": 9.83306,
+            "61": 9.97107,
+            "62": 10.22132,
+            "63": 9.6767,
+            "64": 10.01779,
+            "65": 9.26979,
+            "66": 9.9402,
+            "67": 9.62874,
+            "68": 9.9875,
+            "69": 9.98441,
+            "70": 9.92662,
+            "71": 9.80996,
+            "72": 9.79208,
+            "73": 9.68101,
+            "74": 9.18023,
+            "75": 9.61385,
+            "76": 9.28826,
+            "77": 10.19395,
+            "78": 9.87453,
+            "79": 9.52966,
+            "80": 9.56419,
+            "81": 9.63453,
+            "82": 9.82245,
+            "83": 9.47207,
+            "84": 9.54654,
+            "85": 9.74319,
+            "86": 9.2009,
+            "87": 9.70113,
+            "88": 9.86518,
+            "89": 9.7307,
+            "90": 9.92148,
+            "91": 9.4869,
+            "92": 9.47682,
+            "93": 9.2135,
+            "94": 8.94897,
+            "95": 9.6163,
+            "96": 9.63416,
+            "97": 9.41229,
+            "98": 9.77615,
+            "99": 9.00251,
+            "100": 9.5087
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 284527616.0,
+            "2": 284527616.0,
+            "3": 284527616.0,
+            "4": 284527616.0,
+            "5": 284527616.0,
+            "6": 284527616.0,
+            "7": 284527616.0,
+            "8": 284527616.0,
+            "9": 284527616.0,
+            "10": 284527616.0,
+            "11": 284527616.0,
+            "12": 284527616.0,
+            "13": 284527616.0,
+            "14": 284527616.0,
+            "15": 284527616.0,
+            "16": 416513536.0,
+            "17": 416513536.0,
+            "18": 416513536.0,
+            "19": 416513536.0,
+            "20": 416513536.0,
+            "21": 416513536.0,
+            "22": 416513536.0,
+            "23": 416513536.0,
+            "24": 416513536.0,
+            "25": 416513536.0,
+            "26": 416513536.0,
+            "27": 416513536.0,
+            "28": 416513536.0,
+            "29": 416513536.0,
+            "30": 416513536.0,
+            "31": 416513536.0,
+            "32": 416513536.0,
+            "33": 416513536.0,
+            "34": 416513536.0,
+            "35": 416513536.0,
+            "36": 416513536.0,
+            "37": 416513536.0,
+            "38": 416513536.0,
+            "39": 416513536.0,
+            "40": 416513536.0,
+            "41": 416513536.0,
+            "42": 416513536.0,
+            "43": 416513536.0,
+            "44": 416513536.0,
+            "45": 416513536.0,
+            "46": 416513536.0,
+            "47": 416513536.0,
+            "48": 416513536.0,
+            "49": 416513536.0,
+            "50": 416513536.0,
+            "51": 416513536.0,
+            "52": 416513536.0,
+            "53": 416513536.0,
+            "54": 416513536.0,
+            "55": 416513536.0,
+            "56": 416513536.0,
+            "57": 416513536.0,
+            "58": 416513536.0,
+            "59": 416513536.0,
+            "60": 416513536.0,
+            "61": 416513536.0,
+            "62": 416513536.0,
+            "63": 416513536.0,
+            "64": 416513536.0,
+            "65": 416513536.0,
+            "66": 416513536.0,
+            "67": 416513536.0,
+            "68": 416513536.0,
+            "69": 416513536.0,
+            "70": 416513536.0,
+            "71": 416513536.0,
+            "72": 416513536.0,
+            "73": 416513536.0,
+            "74": 416513536.0,
+            "75": 416513536.0,
+            "76": 416513536.0,
+            "77": 416513536.0,
+            "78": 416513536.0,
+            "79": 416513536.0,
+            "80": 416513536.0,
+            "81": 416513536.0,
+            "82": 416513536.0,
+            "83": 416513536.0,
+            "84": 416513536.0,
+            "85": 416513536.0,
+            "86": 416513536.0,
+            "87": 416513536.0,
+            "88": 416513536.0,
+            "89": 416513536.0,
+            "90": 416513536.0,
+            "91": 416513536.0,
+            "92": 416513536.0,
+            "93": 416513536.0,
+            "94": 416513536.0,
+            "95": 416513536.0,
+            "96": 416513536.0,
+            "97": 416513536.0,
+            "98": 416513536.0,
+            "99": 416513536.0,
+            "100": 416513536.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1464319488.0,
+            "2": 1465368576.0,
+            "3": 1465368576.0,
+            "4": 1465368576.0,
+            "5": 1465368576.0,
+            "6": 1465368576.0,
+            "7": 1465368576.0,
+            "8": 1465368576.0,
+            "9": 1465368576.0,
+            "10": 1465368576.0,
+            "11": 1465368576.0,
+            "12": 1465368576.0,
+            "13": 1465368576.0,
+            "14": 1465368576.0,
+            "15": 1465368576.0,
+            "16": 1465368576.0,
+            "17": 1595257344.0,
+            "18": 1595257344.0,
+            "19": 1595257344.0,
+            "20": 1595257344.0,
+            "21": 1595257344.0,
+            "22": 1595257344.0,
+            "23": 1596305920.0,
+            "24": 1596305920.0,
+            "25": 1596305920.0,
+            "26": 1596305920.0,
+            "27": 1596305920.0,
+            "28": 1596305920.0,
+            "29": 1596305920.0,
+            "30": 1596305920.0,
+            "31": 1596305920.0,
+            "32": 1596305920.0,
+            "33": 1596305920.0,
+            "34": 1596305920.0,
+            "35": 1596305920.0,
+            "36": 1596305920.0,
+            "37": 1596305920.0,
+            "38": 1596305920.0,
+            "39": 1596305920.0,
+            "40": 1596305920.0,
+            "41": 1596305920.0,
+            "42": 1596305920.0,
+            "43": 1596305920.0,
+            "44": 1596305920.0,
+            "45": 1596305920.0,
+            "46": 1596305920.0,
+            "47": 1596305920.0,
+            "48": 1596305920.0,
+            "49": 1596305920.0,
+            "50": 1596305920.0,
+            "51": 1596305920.0,
+            "52": 1596305920.0,
+            "53": 1596305920.0,
+            "54": 1596305920.0,
+            "55": 1596305920.0,
+            "56": 1596305920.0,
+            "57": 1596305920.0,
+            "58": 1596305920.0,
+            "59": 1596305920.0,
+            "60": 1596305920.0,
+            "61": 1596305920.0,
+            "62": 1596305920.0,
+            "63": 1596305920.0,
+            "64": 1596305920.0,
+            "65": 1596305920.0,
+            "66": 1596305920.0,
+            "67": 1596305920.0,
+            "68": 1596305920.0,
+            "69": 1596305920.0,
+            "70": 1596305920.0,
+            "71": 1596305920.0,
+            "72": 1596305920.0,
+            "73": 1596305920.0,
+            "74": 1596305920.0,
+            "75": 1596305920.0,
+            "76": 1596305920.0,
+            "77": 1596305920.0,
+            "78": 1596305920.0,
+            "79": 1596305920.0,
+            "80": 1596305920.0,
+            "81": 1596305920.0,
+            "82": 1596305920.0,
+            "83": 1596305920.0,
+            "84": 1596305920.0,
+            "85": 1596305920.0,
+            "86": 1596305920.0,
+            "87": 1596305920.0,
+            "88": 1596305920.0,
+            "89": 1596305920.0,
+            "90": 1596305920.0,
+            "91": 1596305920.0,
+            "92": 1596305920.0,
+            "93": 1596305920.0,
+            "94": 1596305920.0,
+            "95": 1596305920.0,
+            "96": 1596305920.0,
+            "97": 1596305920.0,
+            "98": 1596305920.0,
+            "99": 1596305920.0,
+            "100": 1596305920.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 8.02223,
+            "2": 0.38061,
+            "3": 0.32373,
+            "4": 0.32033,
+            "5": 0.31913,
+            "6": 0.32369,
+            "7": 0.32104,
+            "8": 0.55134,
+            "9": 0.31907,
+            "10": 0.31445,
+            "11": 0.31681,
+            "12": 0.32078,
+            "13": 0.31316,
+            "14": 0.31705,
+            "15": 0.32367,
+            "16": 0.51605,
+            "17": 0.32163,
+            "18": 0.33141,
+            "19": 0.32965,
+            "20": 0.33483,
+            "21": 0.33262,
+            "22": 0.31555,
+            "23": 0.54296,
+            "24": 0.32628,
+            "25": 0.32494,
+            "26": 0.33072,
+            "27": 0.32494,
+            "28": 0.32501,
+            "29": 0.33418,
+            "30": 0.32445,
+            "31": 0.32469,
+            "32": 0.54347,
+            "33": 0.32433,
+            "34": 0.33133,
+            "35": 0.32861,
+            "36": 0.32508,
+            "37": 0.33059,
+            "38": 0.32933,
+            "39": 0.32486,
+            "40": 0.32922,
+            "41": 0.32822,
+            "42": 0.32589,
+            "43": 0.32604,
+            "44": 0.32857,
+            "45": 0.32472,
+            "46": 0.32696,
+            "47": 0.32915,
+            "48": 0.32449,
+            "49": 0.32476,
+            "50": 0.33417,
+            "51": 0.32622,
+            "52": 0.31932,
+            "53": 0.32288,
+            "54": 0.32664,
+            "55": 0.3199,
+            "56": 0.32098,
+            "57": 0.33106,
+            "58": 0.32428,
+            "59": 0.32012,
+            "60": 0.63225,
+            "61": 0.3217,
+            "62": 0.3235,
+            "63": 0.32372,
+            "64": 0.31863,
+            "65": 0.32545,
+            "66": 0.32518,
+            "67": 0.32024,
+            "68": 0.32648,
+            "69": 0.32388,
+            "70": 0.32115,
+            "71": 0.32798,
+            "72": 0.32445,
+            "73": 0.32219,
+            "74": 0.32407,
+            "75": 0.32414,
+            "76": 0.31907,
+            "77": 0.3226,
+            "78": 0.32339,
+            "79": 0.31992,
+            "80": 0.32293,
+            "81": 0.32579,
+            "82": 0.31876,
+            "83": 0.31946,
+            "84": 0.32957,
+            "85": 0.3196,
+            "86": 0.31988,
+            "87": 0.32978,
+            "88": 0.31888,
+            "89": 0.31848,
+            "90": 0.32475,
+            "91": 0.32291,
+            "92": 0.32112,
+            "93": 0.32728,
+            "94": 0.32274,
+            "95": 0.31869,
+            "96": 0.32364,
+            "97": 0.32247,
+            "98": 0.32012,
+            "99": 0.32377,
+            "100": 0.32291
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": 2261.0,
+            "17": "nan",
+            "18": 2424.0,
+            "19": 2800.0,
+            "20": 1777.0,
+            "21": 2046.0,
+            "22": "nan",
+            "23": 2489.0,
+            "24": 2136.0,
+            "25": 2124.0,
+            "26": 1902.0,
+            "27": 2006.0,
+            "28": 2337.0,
+            "29": 2425.0,
+            "30": 2262.0,
+            "31": 1584.0,
+            "32": 2470.0,
+            "33": 2074.0,
+            "34": 1679.0,
+            "35": 1763.0,
+            "36": 1918.0,
+            "37": 2542.0,
+            "38": 2195.0,
+            "39": 3045.0,
+            "40": 1875.0,
+            "41": 3199.0,
+            "42": 2508.0,
+            "43": 2563.0,
+            "44": 1898.0,
+            "45": 2434.0,
+            "46": 2065.0,
+            "47": 2739.0,
+            "48": 2291.0,
+            "49": 1821.0,
+            "50": 2634.0,
+            "51": 2172.0,
+            "52": 2278.0,
+            "53": 3531.0,
+            "54": 2662.0,
+            "55": 2383.0,
+            "56": 2480.0,
+            "57": 2136.0,
+            "58": 3305.0,
+            "59": 2485.0,
+            "60": 2832.0,
+            "61": 2847.0,
+            "62": 2841.0,
+            "63": 2867.0,
+            "64": 3107.0,
+            "65": 2223.0,
+            "66": 3682.0,
+            "67": 2533.0,
+            "68": 3137.0,
+            "69": 2650.0,
+            "70": 3836.0,
+            "71": 2945.0,
+            "72": 2727.0,
+            "73": 3322.0,
+            "74": 2186.0,
+            "75": 2913.0,
+            "76": 3553.0,
+            "77": 3629.0,
+            "78": 3871.0,
+            "79": 4097.0,
+            "80": 3398.0,
+            "81": 5006.0,
+            "82": 3345.0,
+            "83": 3174.0,
+            "84": 3718.0,
+            "85": 3618.0,
+            "86": 3181.0,
+            "87": 3995.0,
+            "88": 3634.0,
+            "89": 4250.0,
+            "90": 3676.0,
+            "91": 2926.0,
+            "92": 4446.0,
+            "93": 3780.0,
+            "94": 4430.0,
+            "95": 4082.0,
+            "96": 3952.0,
+            "97": 4117.0,
+            "98": 5049.0,
+            "99": 4122.0,
+            "100": 3502.0
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch/golden_values_lts_dgxa100_dracooci.json
new file mode 100644
index 00000000000..dd9dc5f116a
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch/golden_values_lts_dgxa100_dracooci.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.86104,
+            "2": 10.85751,
+            "3": 10.86157,
+            "4": 10.84944,
+            "5": 10.88371,
+            "6": 10.88763,
+            "7": 10.86427,
+            "8": 10.87317,
+            "9": 10.86952,
+            "10": 10.84263,
+            "11": 10.88626,
+            "12": 10.88784,
+            "13": 10.89496,
+            "14": 10.90319,
+            "15": 10.87935,
+            "16": 10.88588,
+            "17": 10.86428,
+            "18": 10.88923,
+            "19": 10.88151,
+            "20": 10.87405,
+            "21": 10.88996,
+            "22": 10.83151,
+            "23": 10.89289,
+            "24": 10.85821,
+            "25": 10.82867,
+            "26": 10.82729,
+            "27": 10.85428,
+            "28": 10.84631,
+            "29": 10.85408,
+            "30": 10.77191,
+            "31": 10.67404,
+            "32": 10.78923,
+            "33": 10.7757,
+            "34": 10.67639,
+            "35": 10.67622,
+            "36": 10.63402,
+            "37": 10.69312,
+            "38": 10.61026,
+            "39": 10.70232,
+            "40": 10.517,
+            "41": 10.54604,
+            "42": 10.57058,
+            "43": 10.32305,
+            "44": 10.39205,
+            "45": 10.28436,
+            "46": 10.27329,
+            "47": 10.4798,
+            "48": 10.25535,
+            "49": 10.01605,
+            "50": 10.27861,
+            "51": 10.21825,
+            "52": 10.1281,
+            "53": 10.35922,
+            "54": 10.25909,
+            "55": 10.20112,
+            "56": 9.9815,
+            "57": 9.84915,
+            "58": 10.12333,
+            "59": 9.90734,
+            "60": 9.83306,
+            "61": 9.97107,
+            "62": 10.22132,
+            "63": 9.6767,
+            "64": 10.01779,
+            "65": 9.26979,
+            "66": 9.9402,
+            "67": 9.62874,
+            "68": 9.9875,
+            "69": 9.98441,
+            "70": 9.92662,
+            "71": 9.80996,
+            "72": 9.79208,
+            "73": 9.68101,
+            "74": 9.18023,
+            "75": 9.61385,
+            "76": 9.28826,
+            "77": 10.19395,
+            "78": 9.87453,
+            "79": 9.52966,
+            "80": 9.56419,
+            "81": 9.63453,
+            "82": 9.82245,
+            "83": 9.47207,
+            "84": 9.54654,
+            "85": 9.74319,
+            "86": 9.2009,
+            "87": 9.70113,
+            "88": 9.86518,
+            "89": 9.7307,
+            "90": 9.92148,
+            "91": 9.4869,
+            "92": 9.47682,
+            "93": 9.2135,
+            "94": 8.94897,
+            "95": 9.6163,
+            "96": 9.63416,
+            "97": 9.41229,
+            "98": 9.77615,
+            "99": 9.00251,
+            "100": 9.5087
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 284527616.0,
+            "2": 284527616.0,
+            "3": 284527616.0,
+            "4": 284527616.0,
+            "5": 284527616.0,
+            "6": 284527616.0,
+            "7": 284527616.0,
+            "8": 284527616.0,
+            "9": 284527616.0,
+            "10": 284527616.0,
+            "11": 284527616.0,
+            "12": 284527616.0,
+            "13": 284527616.0,
+            "14": 284527616.0,
+            "15": 284527616.0,
+            "16": 416513536.0,
+            "17": 416513536.0,
+            "18": 416513536.0,
+            "19": 416513536.0,
+            "20": 416513536.0,
+            "21": 416513536.0,
+            "22": 416513536.0,
+            "23": 416513536.0,
+            "24": 416513536.0,
+            "25": 416513536.0,
+            "26": 416513536.0,
+            "27": 416513536.0,
+            "28": 416513536.0,
+            "29": 416513536.0,
+            "30": 416513536.0,
+            "31": 416513536.0,
+            "32": 416513536.0,
+            "33": 416513536.0,
+            "34": 416513536.0,
+            "35": 416513536.0,
+            "36": 416513536.0,
+            "37": 416513536.0,
+            "38": 416513536.0,
+            "39": 416513536.0,
+            "40": 416513536.0,
+            "41": 416513536.0,
+            "42": 416513536.0,
+            "43": 416513536.0,
+            "44": 416513536.0,
+            "45": 416513536.0,
+            "46": 416513536.0,
+            "47": 416513536.0,
+            "48": 416513536.0,
+            "49": 416513536.0,
+            "50": 416513536.0,
+            "51": 416513536.0,
+            "52": 416513536.0,
+            "53": 416513536.0,
+            "54": 416513536.0,
+            "55": 416513536.0,
+            "56": 416513536.0,
+            "57": 416513536.0,
+            "58": 416513536.0,
+            "59": 416513536.0,
+            "60": 416513536.0,
+            "61": 416513536.0,
+            "62": 416513536.0,
+            "63": 416513536.0,
+            "64": 416513536.0,
+            "65": 416513536.0,
+            "66": 416513536.0,
+            "67": 416513536.0,
+            "68": 416513536.0,
+            "69": 416513536.0,
+            "70": 416513536.0,
+            "71": 416513536.0,
+            "72": 416513536.0,
+            "73": 416513536.0,
+            "74": 416513536.0,
+            "75": 416513536.0,
+            "76": 416513536.0,
+            "77": 416513536.0,
+            "78": 416513536.0,
+            "79": 416513536.0,
+            "80": 416513536.0,
+            "81": 416513536.0,
+            "82": 416513536.0,
+            "83": 416513536.0,
+            "84": 416513536.0,
+            "85": 416513536.0,
+            "86": 416513536.0,
+            "87": 416513536.0,
+            "88": 416513536.0,
+            "89": 416513536.0,
+            "90": 416513536.0,
+            "91": 416513536.0,
+            "92": 416513536.0,
+            "93": 416513536.0,
+            "94": 416513536.0,
+            "95": 416513536.0,
+            "96": 416513536.0,
+            "97": 416513536.0,
+            "98": 416513536.0,
+            "99": 416513536.0,
+            "100": 416513536.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1465367040.0,
+            "2": 1465367040.0,
+            "3": 1465367552.0,
+            "4": 1465367552.0,
+            "5": 1465367552.0,
+            "6": 1465367552.0,
+            "7": 1465367552.0,
+            "8": 1465367552.0,
+            "9": 1465367552.0,
+            "10": 1465367552.0,
+            "11": 1465367552.0,
+            "12": 1465367552.0,
+            "13": 1465368064.0,
+            "14": 1465368064.0,
+            "15": 1465368064.0,
+            "16": 1465368064.0,
+            "17": 1597091328.0,
+            "18": 1597092352.0,
+            "19": 1597092352.0,
+            "20": 1597092352.0,
+            "21": 1597092352.0,
+            "22": 1597092352.0,
+            "23": 1597092352.0,
+            "24": 1597092352.0,
+            "25": 1597092352.0,
+            "26": 1597092352.0,
+            "27": 1597092352.0,
+            "28": 1597092352.0,
+            "29": 1597092352.0,
+            "30": 1597092352.0,
+            "31": 1597092352.0,
+            "32": 1597092352.0,
+            "33": 1597092352.0,
+            "34": 1597092352.0,
+            "35": 1597092352.0,
+            "36": 1597092352.0,
+            "37": 1597092352.0,
+            "38": 1597092352.0,
+            "39": 1597092352.0,
+            "40": 1597092352.0,
+            "41": 1597092352.0,
+            "42": 1597092352.0,
+            "43": 1597092352.0,
+            "44": 1597092352.0,
+            "45": 1597092352.0,
+            "46": 1597092352.0,
+            "47": 1597092352.0,
+            "48": 1597092352.0,
+            "49": 1597092352.0,
+            "50": 1597092352.0,
+            "51": 1597092352.0,
+            "52": 1597092352.0,
+            "53": 1597092352.0,
+            "54": 1597092352.0,
+            "55": 1597092352.0,
+            "56": 1597092352.0,
+            "57": 1597092352.0,
+            "58": 1597092352.0,
+            "59": 1597092352.0,
+            "60": 1597092352.0,
+            "61": 1597092352.0,
+            "62": 1597092352.0,
+            "63": 1597092352.0,
+            "64": 1597092352.0,
+            "65": 1597092352.0,
+            "66": 1597092352.0,
+            "67": 1597092352.0,
+            "68": 1597092352.0,
+            "69": 1597092352.0,
+            "70": 1597092352.0,
+            "71": 1597092352.0,
+            "72": 1597092352.0,
+            "73": 1597092352.0,
+            "74": 1597092352.0,
+            "75": 1597092352.0,
+            "76": 1597092352.0,
+            "77": 1597092352.0,
+            "78": 1597092352.0,
+            "79": 1597092352.0,
+            "80": 1597092352.0,
+            "81": 1597092352.0,
+            "82": 1597092352.0,
+            "83": 1597092352.0,
+            "84": 1597092352.0,
+            "85": 1597092352.0,
+            "86": 1597092352.0,
+            "87": 1597092352.0,
+            "88": 1597092352.0,
+            "89": 1597092352.0,
+            "90": 1597092352.0,
+            "91": 1597092352.0,
+            "92": 1597092352.0,
+            "93": 1597092352.0,
+            "94": 1597092352.0,
+            "95": 1597092352.0,
+            "96": 1597092352.0,
+            "97": 1597092352.0,
+            "98": 1597092352.0,
+            "99": 1597092352.0,
+            "100": 1597092352.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 7.2197,
+            "2": 0.38153,
+            "3": 0.31292,
+            "4": 0.31213,
+            "5": 0.30805,
+            "6": 0.31347,
+            "7": 0.30766,
+            "8": 0.30913,
+            "9": 0.31477,
+            "10": 0.311,
+            "11": 0.65045,
+            "12": 0.30686,
+            "13": 0.49089,
+            "14": 0.47587,
+            "15": 0.30732,
+            "16": 0.44089,
+            "17": 0.30846,
+            "18": 0.31946,
+            "19": 0.34232,
+            "20": 0.31703,
+            "21": 0.31667,
+            "22": 0.6731,
+            "23": 0.3162,
+            "24": 0.31788,
+            "25": 0.31492,
+            "26": 0.31699,
+            "27": 0.31509,
+            "28": 0.31634,
+            "29": 0.55951,
+            "30": 0.31931,
+            "31": 0.54064,
+            "32": 0.32022,
+            "33": 0.31532,
+            "34": 0.31678,
+            "35": 0.31737,
+            "36": 0.31871,
+            "37": 0.31753,
+            "38": 0.31664,
+            "39": 0.32082,
+            "40": 0.31603,
+            "41": 0.31831,
+            "42": 0.32238,
+            "43": 0.31648,
+            "44": 0.31713,
+            "45": 0.32324,
+            "46": 0.31647,
+            "47": 0.31877,
+            "48": 0.32192,
+            "49": 0.31644,
+            "50": 0.31704,
+            "51": 0.31935,
+            "52": 0.31622,
+            "53": 0.32109,
+            "54": 0.31685,
+            "55": 0.31646,
+            "56": 0.32045,
+            "57": 0.31644,
+            "58": 0.31787,
+            "59": 0.32038,
+            "60": 0.31946,
+            "61": 0.31938,
+            "62": 0.31564,
+            "63": 0.32119,
+            "64": 0.31817,
+            "65": 0.31991,
+            "66": 0.32324,
+            "67": 0.31621,
+            "68": 0.31739,
+            "69": 0.32315,
+            "70": 0.31648,
+            "71": 0.31985,
+            "72": 0.32121,
+            "73": 0.31529,
+            "74": 0.31685,
+            "75": 0.32032,
+            "76": 0.31549,
+            "77": 0.31631,
+            "78": 0.32153,
+            "79": 0.31574,
+            "80": 0.32036,
+            "81": 0.31981,
+            "82": 0.31914,
+            "83": 0.31869,
+            "84": 0.31666,
+            "85": 0.32462,
+            "86": 0.31593,
+            "87": 0.31737,
+            "88": 0.32152,
+            "89": 0.31605,
+            "90": 0.31771,
+            "91": 0.32722,
+            "92": 0.31534,
+            "93": 0.31963,
+            "94": 0.32198,
+            "95": 0.31603,
+            "96": 0.31693,
+            "97": 0.32705,
+            "98": 0.31586,
+            "99": 0.31749,
+            "100": 0.32114
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": 2261.0,
+            "17": "nan",
+            "18": 2424.0,
+            "19": 2800.0,
+            "20": 1777.0,
+            "21": 2046.0,
+            "22": "nan",
+            "23": 2489.0,
+            "24": 2136.0,
+            "25": 2124.0,
+            "26": 1902.0,
+            "27": 2006.0,
+            "28": 2337.0,
+            "29": 2425.0,
+            "30": 2262.0,
+            "31": 1584.0,
+            "32": 2470.0,
+            "33": 2074.0,
+            "34": 1679.0,
+            "35": 1763.0,
+            "36": 1918.0,
+            "37": 2542.0,
+            "38": 2195.0,
+            "39": 3045.0,
+            "40": 1875.0,
+            "41": 3199.0,
+            "42": 2508.0,
+            "43": 2563.0,
+            "44": 1898.0,
+            "45": 2434.0,
+            "46": 2065.0,
+            "47": 2739.0,
+            "48": 2291.0,
+            "49": 1821.0,
+            "50": 2634.0,
+            "51": 2172.0,
+            "52": 2278.0,
+            "53": 3531.0,
+            "54": 2662.0,
+            "55": 2383.0,
+            "56": 2480.0,
+            "57": 2136.0,
+            "58": 3305.0,
+            "59": 2485.0,
+            "60": 2832.0,
+            "61": 2847.0,
+            "62": 2841.0,
+            "63": 2867.0,
+            "64": 3107.0,
+            "65": 2223.0,
+            "66": 3682.0,
+            "67": 2533.0,
+            "68": 3137.0,
+            "69": 2650.0,
+            "70": 3836.0,
+            "71": 2945.0,
+            "72": 2727.0,
+            "73": 3322.0,
+            "74": 2186.0,
+            "75": 2913.0,
+            "76": 3553.0,
+            "77": 3629.0,
+            "78": 3871.0,
+            "79": 4097.0,
+            "80": 3398.0,
+            "81": 5006.0,
+            "82": 3345.0,
+            "83": 3174.0,
+            "84": 3718.0,
+            "85": 3618.0,
+            "86": 3181.0,
+            "87": 3995.0,
+            "88": 3634.0,
+            "89": 4250.0,
+            "90": 3676.0,
+            "91": 2926.0,
+            "92": 4446.0,
+            "93": 3780.0,
+            "94": 4430.0,
+            "95": 4082.0,
+            "96": 3952.0,
+            "97": 4117.0,
+            "98": 5049.0,
+            "99": 4122.0,
+            "100": 3502.0
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_h100.json
index ac72f0a511b..24b971e51f0 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_h100.json
@@ -2,140 +2,535 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 10.84277,
+            "2": 10.85562,
+            "3": 10.84568,
+            "4": 10.84364,
             "5": 10.85979,
+            "6": 10.86413,
+            "7": 10.85362,
+            "8": 10.85066,
+            "9": 10.8615,
             "10": 10.82586,
+            "11": 10.86811,
+            "12": 10.85685,
+            "13": 10.87827,
+            "14": 10.86894,
             "15": 10.85888,
+            "16": 10.8685,
+            "17": 10.85105,
+            "18": 10.85939,
+            "19": 10.85704,
             "20": 10.84526,
+            "21": 10.85808,
+            "22": 10.83215,
+            "23": 10.86717,
+            "24": 10.83773,
             "25": 10.82744,
+            "26": 10.83163,
+            "27": 10.83573,
+            "28": 10.82373,
+            "29": 10.81624,
             "30": 10.76486,
+            "31": 10.69044,
+            "32": 10.76257,
+            "33": 10.75455,
+            "34": 10.67733,
             "35": 10.66335,
+            "36": 10.63634,
+            "37": 10.66856,
+            "38": 10.5969,
+            "39": 10.67599,
             "40": 10.50898,
+            "41": 10.53945,
+            "42": 10.55263,
+            "43": 10.35003,
+            "44": 10.40418,
             "45": 10.32106,
+            "46": 10.27724,
+            "47": 10.45205,
+            "48": 10.28913,
+            "49": 10.05779,
             "50": 10.27777,
+            "51": 10.23471,
+            "52": 10.13764,
+            "53": 10.34797,
+            "54": 10.26738,
             "55": 10.20734,
+            "56": 9.99527,
+            "57": 9.89333,
+            "58": 10.13452,
+            "59": 9.92856,
             "60": 9.8551,
+            "61": 9.98264,
+            "62": 10.20686,
+            "63": 9.70842,
+            "64": 10.01687,
             "65": 9.30409,
+            "66": 9.93326,
+            "67": 9.62677,
+            "68": 9.98429,
+            "69": 9.9755,
             "70": 9.93956,
+            "71": 9.81005,
+            "72": 9.798,
+            "73": 9.68454,
+            "74": 9.19951,
             "75": 9.60518,
+            "76": 9.27791,
+            "77": 10.19437,
+            "78": 9.8671,
+            "79": 9.53341,
             "80": 9.56341,
+            "81": 9.63047,
+            "82": 9.82819,
+            "83": 9.46388,
+            "84": 9.53736,
             "85": 9.74561,
+            "86": 9.21332,
+            "87": 9.7014,
+            "88": 9.86621,
+            "89": 9.72242,
             "90": 9.92089,
+            "91": 9.47178,
+            "92": 9.46996,
+            "93": 9.20589,
+            "94": 8.94772,
             "95": 9.60815,
+            "96": 9.63635,
+            "97": 9.4138,
+            "98": 9.77274,
+            "99": 8.9958,
             "100": 9.50415
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 284527616.0,
+            "2": 284527616.0,
+            "3": 284527616.0,
+            "4": 284527616.0,
             "5": 284527616.0,
+            "6": 284527616.0,
+            "7": 284527616.0,
+            "8": 284527616.0,
+            "9": 284527616.0,
             "10": 284527616.0,
+            "11": 284527616.0,
+            "12": 284527616.0,
+            "13": 284527616.0,
+            "14": 284527616.0,
             "15": 284527616.0,
+            "16": 416513536.0,
+            "17": 416513536.0,
+            "18": 416513536.0,
+            "19": 416513536.0,
             "20": 416513536.0,
+            "21": 416513536.0,
+            "22": 416513536.0,
+            "23": 416513536.0,
+            "24": 416513536.0,
             "25": 416513536.0,
+            "26": 416513536.0,
+            "27": 416513536.0,
+            "28": 416513536.0,
+            "29": 416513536.0,
             "30": 416513536.0,
+            "31": 416513536.0,
+            "32": 416513536.0,
+            "33": 416513536.0,
+            "34": 416513536.0,
             "35": 416513536.0,
+            "36": 416513536.0,
+            "37": 416513536.0,
+            "38": 416513536.0,
+            "39": 416513536.0,
             "40": 416513536.0,
+            "41": 416513536.0,
+            "42": 416513536.0,
+            "43": 416513536.0,
+            "44": 416513536.0,
             "45": 416513536.0,
+            "46": 416513536.0,
+            "47": 416513536.0,
+            "48": 416513536.0,
+            "49": 416513536.0,
             "50": 416513536.0,
+            "51": 416513536.0,
+            "52": 416513536.0,
+            "53": 416513536.0,
+            "54": 416513536.0,
             "55": 416513536.0,
+            "56": 416513536.0,
+            "57": 416513536.0,
+            "58": 416513536.0,
+            "59": 416513536.0,
             "60": 416513536.0,
+            "61": 416513536.0,
+            "62": 416513536.0,
+            "63": 416513536.0,
+            "64": 416513536.0,
             "65": 416513536.0,
+            "66": 416513536.0,
+            "67": 416513536.0,
+            "68": 416513536.0,
+            "69": 416513536.0,
             "70": 416513536.0,
+            "71": 416513536.0,
+            "72": 416513536.0,
+            "73": 416513536.0,
+            "74": 416513536.0,
             "75": 416513536.0,
+            "76": 416513536.0,
+            "77": 416513536.0,
+            "78": 416513536.0,
+            "79": 416513536.0,
             "80": 416513536.0,
+            "81": 416513536.0,
+            "82": 416513536.0,
+            "83": 416513536.0,
+            "84": 416513536.0,
             "85": 416513536.0,
+            "86": 416513536.0,
+            "87": 416513536.0,
+            "88": 416513536.0,
+            "89": 416513536.0,
             "90": 416513536.0,
+            "91": 416513536.0,
+            "92": 416513536.0,
+            "93": 416513536.0,
+            "94": 416513536.0,
             "95": 416513536.0,
+            "96": 416513536.0,
+            "97": 416513536.0,
+            "98": 416513536.0,
+            "99": 416513536.0,
             "100": 416513536.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 1465368064.0,
+            "2": 1465368576.0,
+            "3": 1465368576.0,
+            "4": 1465368576.0,
             "5": 1465368576.0,
+            "6": 1465368576.0,
+            "7": 1465368576.0,
+            "8": 1465368576.0,
+            "9": 1465368576.0,
             "10": 1465368576.0,
+            "11": 1465368576.0,
+            "12": 1465368576.0,
+            "13": 1465368576.0,
+            "14": 1465368576.0,
             "15": 1465368576.0,
+            "16": 1465368576.0,
+            "17": 1597092352.0,
+            "18": 1597092352.0,
+            "19": 1597092352.0,
             "20": 1597092352.0,
+            "21": 1597092352.0,
+            "22": 1597092352.0,
+            "23": 1597092352.0,
+            "24": 1597092352.0,
             "25": 1597092352.0,
+            "26": 1597092352.0,
+            "27": 1597092352.0,
+            "28": 1597092352.0,
+            "29": 1597092352.0,
             "30": 1597092352.0,
+            "31": 1597092352.0,
+            "32": 1597092352.0,
+            "33": 1597092352.0,
+            "34": 1597092352.0,
             "35": 1597092352.0,
+            "36": 1597092352.0,
+            "37": 1597092352.0,
+            "38": 1597092352.0,
+            "39": 1597092352.0,
             "40": 1597092352.0,
+            "41": 1597092352.0,
+            "42": 1597092352.0,
+            "43": 1597092352.0,
+            "44": 1597092352.0,
             "45": 1597092352.0,
+            "46": 1597092352.0,
+            "47": 1597092352.0,
+            "48": 1597092352.0,
+            "49": 1597092352.0,
             "50": 1597092352.0,
+            "51": 1597092352.0,
+            "52": 1597092352.0,
+            "53": 1597092352.0,
+            "54": 1597092352.0,
             "55": 1597092352.0,
+            "56": 1597092352.0,
+            "57": 1597092352.0,
+            "58": 1597092352.0,
+            "59": 1597092352.0,
             "60": 1597092352.0,
+            "61": 1597092352.0,
+            "62": 1597092352.0,
+            "63": 1597092352.0,
+            "64": 1597092352.0,
             "65": 1597092352.0,
+            "66": 1597092352.0,
+            "67": 1597092352.0,
+            "68": 1597092352.0,
+            "69": 1597092352.0,
             "70": 1597092352.0,
+            "71": 1597092352.0,
+            "72": 1597092352.0,
+            "73": 1597092352.0,
+            "74": 1597092352.0,
             "75": 1597092352.0,
+            "76": 1597092352.0,
+            "77": 1597092352.0,
+            "78": 1597092352.0,
+            "79": 1597092352.0,
             "80": 1597092352.0,
+            "81": 1597092352.0,
+            "82": 1597092352.0,
+            "83": 1597092352.0,
+            "84": 1597092352.0,
             "85": 1597092352.0,
+            "86": 1597092352.0,
+            "87": 1597092352.0,
+            "88": 1597092352.0,
+            "89": 1597092352.0,
             "90": 1597092352.0,
+            "91": 1597092352.0,
+            "92": 1597092352.0,
+            "93": 1597092352.0,
+            "94": 1597092352.0,
             "95": 1597092352.0,
+            "96": 1597092352.0,
+            "97": 1597092352.0,
+            "98": 1597092352.0,
+            "99": 1597092352.0,
             "100": 1597092352.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 6.8101,
-            "5": 0.18701,
-            "10": 0.18541,
-            "15": 0.18521,
-            "20": 0.19609,
-            "25": 0.1951,
-            "30": 0.19333,
-            "35": 0.19677,
-            "40": 0.19632,
-            "45": 0.1936,
-            "50": 0.1942,
-            "55": 0.19155,
-            "60": 0.19561,
-            "65": 0.19204,
-            "70": 0.2011,
-            "75": 0.19962,
-            "80": 0.19865,
-            "85": 0.20072,
-            "90": 0.19885,
-            "95": 0.20622,
-            "100": 0.20088
+            "1": 6.81983,
+            "2": 0.2794,
+            "3": 0.23686,
+            "4": 0.21148,
+            "5": 0.21241,
+            "6": 0.21432,
+            "7": 0.21203,
+            "8": 0.21066,
+            "9": 0.20958,
+            "10": 0.21304,
+            "11": 0.2134,
+            "12": 0.21369,
+            "13": 0.2107,
+            "14": 0.21366,
+            "15": 0.20862,
+            "16": 0.28561,
+            "17": 0.2165,
+            "18": 0.21953,
+            "19": 0.22122,
+            "20": 0.22177,
+            "21": 0.2229,
+            "22": 0.21407,
+            "23": 0.22275,
+            "24": 0.22407,
+            "25": 0.22273,
+            "26": 0.22637,
+            "27": 0.22313,
+            "28": 0.22384,
+            "29": 0.22193,
+            "30": 0.22359,
+            "31": 0.2209,
+            "32": 0.22301,
+            "33": 0.22023,
+            "34": 0.22191,
+            "35": 0.22291,
+            "36": 0.22174,
+            "37": 0.22136,
+            "38": 0.22212,
+            "39": 0.22108,
+            "40": 0.22197,
+            "41": 0.22185,
+            "42": 0.22093,
+            "43": 0.22393,
+            "44": 0.22166,
+            "45": 0.2211,
+            "46": 0.22759,
+            "47": 0.22278,
+            "48": 0.22181,
+            "49": 0.2205,
+            "50": 0.2208,
+            "51": 0.22217,
+            "52": 0.22209,
+            "53": 0.21851,
+            "54": 0.21953,
+            "55": 0.22284,
+            "56": 0.21873,
+            "57": 0.21994,
+            "58": 0.21738,
+            "59": 0.22216,
+            "60": 0.22091,
+            "61": 0.21912,
+            "62": 0.21916,
+            "63": 0.21618,
+            "64": 0.22037,
+            "65": 0.22084,
+            "66": 0.21741,
+            "67": 0.2191,
+            "68": 0.21708,
+            "69": 0.21714,
+            "70": 0.22023,
+            "71": 0.21802,
+            "72": 0.216,
+            "73": 0.22116,
+            "74": 0.22062,
+            "75": 0.23228,
+            "76": 0.22254,
+            "77": 0.21552,
+            "78": 0.2206,
+            "79": 0.2158,
+            "80": 0.22212,
+            "81": 0.22066,
+            "82": 0.22199,
+            "83": 0.21697,
+            "84": 0.21798,
+            "85": 0.21804,
+            "86": 0.21874,
+            "87": 0.21943,
+            "88": 0.21826,
+            "89": 0.21793,
+            "90": 0.21769,
+            "91": 0.21994,
+            "92": 0.21792,
+            "93": 0.22021,
+            "94": 0.21851,
+            "95": 0.21939,
+            "96": 0.21921,
+            "97": 0.22073,
+            "98": 0.21992,
+            "99": 0.21794,
+            "100": 0.21873
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
             "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
             "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
             "15": "nan",
+            "16": 2365.0,
+            "17": "nan",
+            "18": 2331.0,
+            "19": 2912.0,
             "20": 1664.0,
+            "21": 2009.0,
+            "22": "nan",
+            "23": 2483.0,
+            "24": 2192.0,
             "25": 2290.0,
+            "26": 1916.0,
+            "27": 2020.0,
+            "28": 2503.0,
+            "29": 2379.0,
             "30": 2400.0,
+            "31": 1759.0,
+            "32": 2522.0,
+            "33": 2145.0,
+            "34": 1791.0,
             "35": 1777.0,
+            "36": 2100.0,
+            "37": 2396.0,
+            "38": 2040.0,
+            "39": 2983.0,
             "40": 1805.0,
+            "41": 3097.0,
+            "42": 2421.0,
+            "43": 2566.0,
+            "44": 1858.0,
             "45": 2371.0,
+            "46": 2140.0,
+            "47": 2603.0,
+            "48": 2358.0,
+            "49": 1739.0,
             "50": 2686.0,
+            "51": 2041.0,
+            "52": 2226.0,
+            "53": 3222.0,
+            "54": 2784.0,
             "55": 2290.0,
+            "56": 2428.0,
+            "57": 2146.0,
+            "58": 3048.0,
+            "59": 2504.0,
             "60": 2612.0,
+            "61": 2623.0,
+            "62": 3003.0,
+            "63": 2762.0,
+            "64": 2917.0,
             "65": 2104.0,
+            "66": 3550.0,
+            "67": 2433.0,
+            "68": 3146.0,
+            "69": 2877.0,
             "70": 3528.0,
+            "71": 2983.0,
+            "72": 2640.0,
+            "73": 3199.0,
+            "74": 2084.0,
             "75": 2809.0,
+            "76": 3599.0,
+            "77": 3667.0,
+            "78": 3680.0,
+            "79": 3972.0,
             "80": 3365.0,
+            "81": 5042.0,
+            "82": 3291.0,
+            "83": 3016.0,
+            "84": 3592.0,
             "85": 3792.0,
+            "86": 3192.0,
+            "87": 4219.0,
+            "88": 3376.0,
+            "89": 4110.0,
             "90": 3939.0,
+            "91": 2912.0,
+            "92": 4114.0,
+            "93": 3499.0,
+            "94": 4339.0,
             "95": 3829.0,
+            "96": 3875.0,
+            "97": 4100.0,
+            "98": 4889.0,
+            "99": 3771.0,
             "100": 3390.0
         }
     }
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..6d3fed6a4e1
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.84277,
+            "2": 10.85562,
+            "3": 10.84568,
+            "4": 10.84364,
+            "5": 10.85979,
+            "6": 10.86413,
+            "7": 10.85362,
+            "8": 10.85066,
+            "9": 10.8615,
+            "10": 10.82586,
+            "11": 10.86811,
+            "12": 10.85685,
+            "13": 10.87827,
+            "14": 10.86894,
+            "15": 10.85888,
+            "16": 10.8685,
+            "17": 10.85105,
+            "18": 10.85939,
+            "19": 10.85704,
+            "20": 10.84526,
+            "21": 10.85808,
+            "22": 10.83215,
+            "23": 10.86717,
+            "24": 10.83773,
+            "25": 10.82744,
+            "26": 10.83163,
+            "27": 10.83573,
+            "28": 10.82373,
+            "29": 10.81624,
+            "30": 10.76486,
+            "31": 10.69044,
+            "32": 10.76257,
+            "33": 10.75455,
+            "34": 10.67733,
+            "35": 10.66335,
+            "36": 10.63634,
+            "37": 10.66856,
+            "38": 10.5969,
+            "39": 10.67599,
+            "40": 10.50898,
+            "41": 10.53945,
+            "42": 10.55263,
+            "43": 10.35003,
+            "44": 10.40418,
+            "45": 10.32106,
+            "46": 10.27724,
+            "47": 10.45205,
+            "48": 10.28913,
+            "49": 10.05779,
+            "50": 10.27777,
+            "51": 10.23471,
+            "52": 10.13764,
+            "53": 10.34797,
+            "54": 10.26738,
+            "55": 10.20734,
+            "56": 9.99527,
+            "57": 9.89333,
+            "58": 10.13452,
+            "59": 9.92856,
+            "60": 9.8551,
+            "61": 9.98264,
+            "62": 10.20686,
+            "63": 9.70842,
+            "64": 10.01687,
+            "65": 9.30409,
+            "66": 9.93326,
+            "67": 9.62677,
+            "68": 9.98429,
+            "69": 9.9755,
+            "70": 9.93956,
+            "71": 9.81005,
+            "72": 9.798,
+            "73": 9.68454,
+            "74": 9.19951,
+            "75": 9.60518,
+            "76": 9.27791,
+            "77": 10.19437,
+            "78": 9.8671,
+            "79": 9.53341,
+            "80": 9.56341,
+            "81": 9.63047,
+            "82": 9.82819,
+            "83": 9.46388,
+            "84": 9.53736,
+            "85": 9.74561,
+            "86": 9.21332,
+            "87": 9.7014,
+            "88": 9.86621,
+            "89": 9.72242,
+            "90": 9.92089,
+            "91": 9.47178,
+            "92": 9.46996,
+            "93": 9.20589,
+            "94": 8.94772,
+            "95": 9.60815,
+            "96": 9.63635,
+            "97": 9.4138,
+            "98": 9.77274,
+            "99": 8.9958,
+            "100": 9.50415
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 284527616.0,
+            "2": 284527616.0,
+            "3": 284527616.0,
+            "4": 284527616.0,
+            "5": 284527616.0,
+            "6": 284527616.0,
+            "7": 284527616.0,
+            "8": 284527616.0,
+            "9": 284527616.0,
+            "10": 284527616.0,
+            "11": 284527616.0,
+            "12": 284527616.0,
+            "13": 284527616.0,
+            "14": 284527616.0,
+            "15": 284527616.0,
+            "16": 416513536.0,
+            "17": 416513536.0,
+            "18": 416513536.0,
+            "19": 416513536.0,
+            "20": 416513536.0,
+            "21": 416513536.0,
+            "22": 416513536.0,
+            "23": 416513536.0,
+            "24": 416513536.0,
+            "25": 416513536.0,
+            "26": 416513536.0,
+            "27": 416513536.0,
+            "28": 416513536.0,
+            "29": 416513536.0,
+            "30": 416513536.0,
+            "31": 416513536.0,
+            "32": 416513536.0,
+            "33": 416513536.0,
+            "34": 416513536.0,
+            "35": 416513536.0,
+            "36": 416513536.0,
+            "37": 416513536.0,
+            "38": 416513536.0,
+            "39": 416513536.0,
+            "40": 416513536.0,
+            "41": 416513536.0,
+            "42": 416513536.0,
+            "43": 416513536.0,
+            "44": 416513536.0,
+            "45": 416513536.0,
+            "46": 416513536.0,
+            "47": 416513536.0,
+            "48": 416513536.0,
+            "49": 416513536.0,
+            "50": 416513536.0,
+            "51": 416513536.0,
+            "52": 416513536.0,
+            "53": 416513536.0,
+            "54": 416513536.0,
+            "55": 416513536.0,
+            "56": 416513536.0,
+            "57": 416513536.0,
+            "58": 416513536.0,
+            "59": 416513536.0,
+            "60": 416513536.0,
+            "61": 416513536.0,
+            "62": 416513536.0,
+            "63": 416513536.0,
+            "64": 416513536.0,
+            "65": 416513536.0,
+            "66": 416513536.0,
+            "67": 416513536.0,
+            "68": 416513536.0,
+            "69": 416513536.0,
+            "70": 416513536.0,
+            "71": 416513536.0,
+            "72": 416513536.0,
+            "73": 416513536.0,
+            "74": 416513536.0,
+            "75": 416513536.0,
+            "76": 416513536.0,
+            "77": 416513536.0,
+            "78": 416513536.0,
+            "79": 416513536.0,
+            "80": 416513536.0,
+            "81": 416513536.0,
+            "82": 416513536.0,
+            "83": 416513536.0,
+            "84": 416513536.0,
+            "85": 416513536.0,
+            "86": 416513536.0,
+            "87": 416513536.0,
+            "88": 416513536.0,
+            "89": 416513536.0,
+            "90": 416513536.0,
+            "91": 416513536.0,
+            "92": 416513536.0,
+            "93": 416513536.0,
+            "94": 416513536.0,
+            "95": 416513536.0,
+            "96": 416513536.0,
+            "97": 416513536.0,
+            "98": 416513536.0,
+            "99": 416513536.0,
+            "100": 416513536.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1465368064.0,
+            "2": 1465368576.0,
+            "3": 1465368576.0,
+            "4": 1465368576.0,
+            "5": 1465368576.0,
+            "6": 1465368576.0,
+            "7": 1465368576.0,
+            "8": 1465368576.0,
+            "9": 1465368576.0,
+            "10": 1465368576.0,
+            "11": 1465368576.0,
+            "12": 1465368576.0,
+            "13": 1465368576.0,
+            "14": 1465368576.0,
+            "15": 1465368576.0,
+            "16": 1465368576.0,
+            "17": 1597092352.0,
+            "18": 1597092352.0,
+            "19": 1597092352.0,
+            "20": 1597092352.0,
+            "21": 1597092352.0,
+            "22": 1597092352.0,
+            "23": 1597092352.0,
+            "24": 1597092352.0,
+            "25": 1597092352.0,
+            "26": 1597092352.0,
+            "27": 1597092352.0,
+            "28": 1597092352.0,
+            "29": 1597092352.0,
+            "30": 1597092352.0,
+            "31": 1597092352.0,
+            "32": 1597092352.0,
+            "33": 1597092352.0,
+            "34": 1597092352.0,
+            "35": 1597092352.0,
+            "36": 1597092352.0,
+            "37": 1597092352.0,
+            "38": 1597092352.0,
+            "39": 1597092352.0,
+            "40": 1597092352.0,
+            "41": 1597092352.0,
+            "42": 1597092352.0,
+            "43": 1597092352.0,
+            "44": 1597092352.0,
+            "45": 1597092352.0,
+            "46": 1597092352.0,
+            "47": 1597092352.0,
+            "48": 1597092352.0,
+            "49": 1597092352.0,
+            "50": 1597092352.0,
+            "51": 1597092352.0,
+            "52": 1597092352.0,
+            "53": 1597092352.0,
+            "54": 1597092352.0,
+            "55": 1597092352.0,
+            "56": 1597092352.0,
+            "57": 1597092352.0,
+            "58": 1597092352.0,
+            "59": 1597092352.0,
+            "60": 1597092352.0,
+            "61": 1597092352.0,
+            "62": 1597092352.0,
+            "63": 1597092352.0,
+            "64": 1597092352.0,
+            "65": 1597092352.0,
+            "66": 1597092352.0,
+            "67": 1597092352.0,
+            "68": 1597092352.0,
+            "69": 1597092352.0,
+            "70": 1597092352.0,
+            "71": 1597092352.0,
+            "72": 1597092352.0,
+            "73": 1597092352.0,
+            "74": 1597092352.0,
+            "75": 1597092352.0,
+            "76": 1597092352.0,
+            "77": 1597092352.0,
+            "78": 1597092352.0,
+            "79": 1597092352.0,
+            "80": 1597092352.0,
+            "81": 1597092352.0,
+            "82": 1597092352.0,
+            "83": 1597092352.0,
+            "84": 1597092352.0,
+            "85": 1597092352.0,
+            "86": 1597092352.0,
+            "87": 1597092352.0,
+            "88": 1597092352.0,
+            "89": 1597092352.0,
+            "90": 1597092352.0,
+            "91": 1597092352.0,
+            "92": 1597092352.0,
+            "93": 1597092352.0,
+            "94": 1597092352.0,
+            "95": 1597092352.0,
+            "96": 1597092352.0,
+            "97": 1597092352.0,
+            "98": 1597092352.0,
+            "99": 1597092352.0,
+            "100": 1597092352.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 6.88808,
+            "2": 0.20981,
+            "3": 0.18464,
+            "4": 0.18146,
+            "5": 0.18139,
+            "6": 0.18232,
+            "7": 0.18139,
+            "8": 0.19305,
+            "9": 0.20922,
+            "10": 0.21649,
+            "11": 0.21725,
+            "12": 0.21609,
+            "13": 0.21598,
+            "14": 0.20547,
+            "15": 0.17989,
+            "16": 0.28174,
+            "17": 0.18387,
+            "18": 0.18953,
+            "19": 0.18846,
+            "20": 0.19189,
+            "21": 0.19314,
+            "22": 0.18064,
+            "23": 0.18755,
+            "24": 0.18827,
+            "25": 0.18887,
+            "26": 0.19031,
+            "27": 0.1885,
+            "28": 0.18793,
+            "29": 0.19305,
+            "30": 0.19416,
+            "31": 0.19643,
+            "32": 0.1951,
+            "33": 0.19776,
+            "34": 0.1938,
+            "35": 0.19081,
+            "36": 0.19042,
+            "37": 0.18859,
+            "38": 0.19216,
+            "39": 0.1926,
+            "40": 0.19911,
+            "41": 0.19456,
+            "42": 0.19355,
+            "43": 0.1903,
+            "44": 0.1948,
+            "45": 0.19482,
+            "46": 0.19503,
+            "47": 0.19164,
+            "48": 0.19046,
+            "49": 0.19133,
+            "50": 0.19304,
+            "51": 0.19406,
+            "52": 0.20215,
+            "53": 0.18888,
+            "54": 0.19054,
+            "55": 0.1901,
+            "56": 0.18974,
+            "57": 0.18817,
+            "58": 0.18992,
+            "59": 0.18977,
+            "60": 0.19074,
+            "61": 0.1885,
+            "62": 0.18892,
+            "63": 0.18809,
+            "64": 0.19043,
+            "65": 0.19082,
+            "66": 0.19034,
+            "67": 0.19393,
+            "68": 0.18998,
+            "69": 0.19445,
+            "70": 0.19067,
+            "71": 0.19176,
+            "72": 0.18979,
+            "73": 0.18866,
+            "74": 0.18912,
+            "75": 0.19329,
+            "76": 0.19148,
+            "77": 0.19217,
+            "78": 0.18942,
+            "79": 0.19141,
+            "80": 0.19297,
+            "81": 0.19247,
+            "82": 0.19228,
+            "83": 0.19275,
+            "84": 0.19196,
+            "85": 0.19648,
+            "86": 0.20088,
+            "87": 0.20172,
+            "88": 0.1985,
+            "89": 0.20262,
+            "90": 0.20618,
+            "91": 0.19394,
+            "92": 0.1911,
+            "93": 0.19148,
+            "94": 0.50543,
+            "95": 0.19162,
+            "96": 0.19339,
+            "97": 0.1931,
+            "98": 0.19152,
+            "99": 0.19182,
+            "100": 0.1939
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": 2365.0,
+            "17": "nan",
+            "18": 2331.0,
+            "19": 2912.0,
+            "20": 1664.0,
+            "21": 2009.0,
+            "22": "nan",
+            "23": 2483.0,
+            "24": 2192.0,
+            "25": 2290.0,
+            "26": 1916.0,
+            "27": 2020.0,
+            "28": 2503.0,
+            "29": 2379.0,
+            "30": 2400.0,
+            "31": 1759.0,
+            "32": 2522.0,
+            "33": 2145.0,
+            "34": 1791.0,
+            "35": 1777.0,
+            "36": 2100.0,
+            "37": 2396.0,
+            "38": 2040.0,
+            "39": 2983.0,
+            "40": 1805.0,
+            "41": 3097.0,
+            "42": 2421.0,
+            "43": 2566.0,
+            "44": 1858.0,
+            "45": 2371.0,
+            "46": 2140.0,
+            "47": 2603.0,
+            "48": 2358.0,
+            "49": 1739.0,
+            "50": 2686.0,
+            "51": 2041.0,
+            "52": 2226.0,
+            "53": 3222.0,
+            "54": 2784.0,
+            "55": 2290.0,
+            "56": 2428.0,
+            "57": 2146.0,
+            "58": 3048.0,
+            "59": 2504.0,
+            "60": 2612.0,
+            "61": 2623.0,
+            "62": 3003.0,
+            "63": 2762.0,
+            "64": 2917.0,
+            "65": 2104.0,
+            "66": 3550.0,
+            "67": 2433.0,
+            "68": 3146.0,
+            "69": 2877.0,
+            "70": 3528.0,
+            "71": 2983.0,
+            "72": 2640.0,
+            "73": 3199.0,
+            "74": 2084.0,
+            "75": 2809.0,
+            "76": 3599.0,
+            "77": 3667.0,
+            "78": 3680.0,
+            "79": 3972.0,
+            "80": 3365.0,
+            "81": 5042.0,
+            "82": 3291.0,
+            "83": 3016.0,
+            "84": 3592.0,
+            "85": 3792.0,
+            "86": 3192.0,
+            "87": 4219.0,
+            "88": 3376.0,
+            "89": 4110.0,
+            "90": 3939.0,
+            "91": 2912.0,
+            "92": 4114.0,
+            "93": 3499.0,
+            "94": 4339.0,
+            "95": 3829.0,
+            "96": 3875.0,
+            "97": 4100.0,
+            "98": 4889.0,
+            "99": 3771.0,
+            "100": 3390.0
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..cb0ad3fdb4b
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.84277,
+            "2": 10.85562,
+            "3": 10.84568,
+            "4": 10.84364,
+            "5": 10.85979,
+            "6": 10.86413,
+            "7": 10.85362,
+            "8": 10.85066,
+            "9": 10.8615,
+            "10": 10.82586,
+            "11": 10.86811,
+            "12": 10.85685,
+            "13": 10.87827,
+            "14": 10.86894,
+            "15": 10.85888,
+            "16": 10.8685,
+            "17": 10.85105,
+            "18": 10.85939,
+            "19": 10.85704,
+            "20": 10.84526,
+            "21": 10.85808,
+            "22": 10.83215,
+            "23": 10.86717,
+            "24": 10.83773,
+            "25": 10.82744,
+            "26": 10.83163,
+            "27": 10.83573,
+            "28": 10.82373,
+            "29": 10.81624,
+            "30": 10.76486,
+            "31": 10.69044,
+            "32": 10.76257,
+            "33": 10.75455,
+            "34": 10.67733,
+            "35": 10.66335,
+            "36": 10.63634,
+            "37": 10.66856,
+            "38": 10.5969,
+            "39": 10.67599,
+            "40": 10.50898,
+            "41": 10.53945,
+            "42": 10.55263,
+            "43": 10.35003,
+            "44": 10.40418,
+            "45": 10.32106,
+            "46": 10.27724,
+            "47": 10.45205,
+            "48": 10.28913,
+            "49": 10.05779,
+            "50": 10.27777,
+            "51": 10.23471,
+            "52": 10.13764,
+            "53": 10.34797,
+            "54": 10.26738,
+            "55": 10.20734,
+            "56": 9.99527,
+            "57": 9.89333,
+            "58": 10.13452,
+            "59": 9.92856,
+            "60": 9.8551,
+            "61": 9.98264,
+            "62": 10.20686,
+            "63": 9.70842,
+            "64": 10.01687,
+            "65": 9.30409,
+            "66": 9.93326,
+            "67": 9.62677,
+            "68": 9.98429,
+            "69": 9.9755,
+            "70": 9.93956,
+            "71": 9.81005,
+            "72": 9.798,
+            "73": 9.68454,
+            "74": 9.19951,
+            "75": 9.60518,
+            "76": 9.27791,
+            "77": 10.19437,
+            "78": 9.8671,
+            "79": 9.53341,
+            "80": 9.56341,
+            "81": 9.63047,
+            "82": 9.82819,
+            "83": 9.46388,
+            "84": 9.53736,
+            "85": 9.74561,
+            "86": 9.21332,
+            "87": 9.7014,
+            "88": 9.86621,
+            "89": 9.72242,
+            "90": 9.92089,
+            "91": 9.47178,
+            "92": 9.46996,
+            "93": 9.20589,
+            "94": 8.94772,
+            "95": 9.60815,
+            "96": 9.63635,
+            "97": 9.4138,
+            "98": 9.77274,
+            "99": 8.9958,
+            "100": 9.50415
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 284527616.0,
+            "2": 284527616.0,
+            "3": 284527616.0,
+            "4": 284527616.0,
+            "5": 284527616.0,
+            "6": 284527616.0,
+            "7": 284527616.0,
+            "8": 284527616.0,
+            "9": 284527616.0,
+            "10": 284527616.0,
+            "11": 284527616.0,
+            "12": 284527616.0,
+            "13": 284527616.0,
+            "14": 284527616.0,
+            "15": 284527616.0,
+            "16": 416513536.0,
+            "17": 416513536.0,
+            "18": 416513536.0,
+            "19": 416513536.0,
+            "20": 416513536.0,
+            "21": 416513536.0,
+            "22": 416513536.0,
+            "23": 416513536.0,
+            "24": 416513536.0,
+            "25": 416513536.0,
+            "26": 416513536.0,
+            "27": 416513536.0,
+            "28": 416513536.0,
+            "29": 416513536.0,
+            "30": 416513536.0,
+            "31": 416513536.0,
+            "32": 416513536.0,
+            "33": 416513536.0,
+            "34": 416513536.0,
+            "35": 416513536.0,
+            "36": 416513536.0,
+            "37": 416513536.0,
+            "38": 416513536.0,
+            "39": 416513536.0,
+            "40": 416513536.0,
+            "41": 416513536.0,
+            "42": 416513536.0,
+            "43": 416513536.0,
+            "44": 416513536.0,
+            "45": 416513536.0,
+            "46": 416513536.0,
+            "47": 416513536.0,
+            "48": 416513536.0,
+            "49": 416513536.0,
+            "50": 416513536.0,
+            "51": 416513536.0,
+            "52": 416513536.0,
+            "53": 416513536.0,
+            "54": 416513536.0,
+            "55": 416513536.0,
+            "56": 416513536.0,
+            "57": 416513536.0,
+            "58": 416513536.0,
+            "59": 416513536.0,
+            "60": 416513536.0,
+            "61": 416513536.0,
+            "62": 416513536.0,
+            "63": 416513536.0,
+            "64": 416513536.0,
+            "65": 416513536.0,
+            "66": 416513536.0,
+            "67": 416513536.0,
+            "68": 416513536.0,
+            "69": 416513536.0,
+            "70": 416513536.0,
+            "71": 416513536.0,
+            "72": 416513536.0,
+            "73": 416513536.0,
+            "74": 416513536.0,
+            "75": 416513536.0,
+            "76": 416513536.0,
+            "77": 416513536.0,
+            "78": 416513536.0,
+            "79": 416513536.0,
+            "80": 416513536.0,
+            "81": 416513536.0,
+            "82": 416513536.0,
+            "83": 416513536.0,
+            "84": 416513536.0,
+            "85": 416513536.0,
+            "86": 416513536.0,
+            "87": 416513536.0,
+            "88": 416513536.0,
+            "89": 416513536.0,
+            "90": 416513536.0,
+            "91": 416513536.0,
+            "92": 416513536.0,
+            "93": 416513536.0,
+            "94": 416513536.0,
+            "95": 416513536.0,
+            "96": 416513536.0,
+            "97": 416513536.0,
+            "98": 416513536.0,
+            "99": 416513536.0,
+            "100": 416513536.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1465368064.0,
+            "2": 1465368576.0,
+            "3": 1465368576.0,
+            "4": 1465368576.0,
+            "5": 1465368576.0,
+            "6": 1465368576.0,
+            "7": 1465368576.0,
+            "8": 1465368576.0,
+            "9": 1465368576.0,
+            "10": 1465368576.0,
+            "11": 1465368576.0,
+            "12": 1465368576.0,
+            "13": 1465368576.0,
+            "14": 1465368576.0,
+            "15": 1465368576.0,
+            "16": 1465368576.0,
+            "17": 1597092352.0,
+            "18": 1597092352.0,
+            "19": 1597092352.0,
+            "20": 1597092352.0,
+            "21": 1597092352.0,
+            "22": 1597092352.0,
+            "23": 1597092352.0,
+            "24": 1597092352.0,
+            "25": 1597092352.0,
+            "26": 1597092352.0,
+            "27": 1597092352.0,
+            "28": 1597092352.0,
+            "29": 1597092352.0,
+            "30": 1597092352.0,
+            "31": 1597092352.0,
+            "32": 1597092352.0,
+            "33": 1597092352.0,
+            "34": 1597092352.0,
+            "35": 1597092352.0,
+            "36": 1597092352.0,
+            "37": 1597092352.0,
+            "38": 1597092352.0,
+            "39": 1597092352.0,
+            "40": 1597092352.0,
+            "41": 1597092352.0,
+            "42": 1597092352.0,
+            "43": 1597092352.0,
+            "44": 1597092352.0,
+            "45": 1597092352.0,
+            "46": 1597092352.0,
+            "47": 1597092352.0,
+            "48": 1597092352.0,
+            "49": 1597092352.0,
+            "50": 1597092352.0,
+            "51": 1597092352.0,
+            "52": 1597092352.0,
+            "53": 1597092352.0,
+            "54": 1597092352.0,
+            "55": 1597092352.0,
+            "56": 1597092352.0,
+            "57": 1597092352.0,
+            "58": 1597092352.0,
+            "59": 1597092352.0,
+            "60": 1597092352.0,
+            "61": 1597092352.0,
+            "62": 1597092352.0,
+            "63": 1597092352.0,
+            "64": 1597092352.0,
+            "65": 1597092352.0,
+            "66": 1597092352.0,
+            "67": 1597092352.0,
+            "68": 1597092352.0,
+            "69": 1597092352.0,
+            "70": 1597092352.0,
+            "71": 1597092352.0,
+            "72": 1597092352.0,
+            "73": 1597092352.0,
+            "74": 1597092352.0,
+            "75": 1597092352.0,
+            "76": 1597092352.0,
+            "77": 1597092352.0,
+            "78": 1597092352.0,
+            "79": 1597092352.0,
+            "80": 1597092352.0,
+            "81": 1597092352.0,
+            "82": 1597092352.0,
+            "83": 1597092352.0,
+            "84": 1597092352.0,
+            "85": 1597092352.0,
+            "86": 1597092352.0,
+            "87": 1597092352.0,
+            "88": 1597092352.0,
+            "89": 1597092352.0,
+            "90": 1597092352.0,
+            "91": 1597092352.0,
+            "92": 1597092352.0,
+            "93": 1597092352.0,
+            "94": 1597092352.0,
+            "95": 1597092352.0,
+            "96": 1597092352.0,
+            "97": 1597092352.0,
+            "98": 1597092352.0,
+            "99": 1597092352.0,
+            "100": 1597092352.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 6.78805,
+            "2": 0.23224,
+            "3": 0.20783,
+            "4": 0.21971,
+            "5": 0.22246,
+            "6": 0.23346,
+            "7": 0.21626,
+            "8": 0.20597,
+            "9": 0.2043,
+            "10": 0.20681,
+            "11": 0.20511,
+            "12": 0.20484,
+            "13": 0.21351,
+            "14": 0.20446,
+            "15": 0.21063,
+            "16": 0.28338,
+            "17": 0.21017,
+            "18": 0.21577,
+            "19": 0.21852,
+            "20": 0.23072,
+            "21": 0.25974,
+            "22": 0.21717,
+            "23": 0.22548,
+            "24": 0.21878,
+            "25": 0.21448,
+            "26": 0.21416,
+            "27": 0.22357,
+            "28": 0.21645,
+            "29": 0.21325,
+            "30": 0.21465,
+            "31": 0.21452,
+            "32": 0.21608,
+            "33": 0.23531,
+            "34": 0.227,
+            "35": 0.2188,
+            "36": 0.21248,
+            "37": 0.21694,
+            "38": 0.21269,
+            "39": 0.22285,
+            "40": 0.21458,
+            "41": 0.2134,
+            "42": 0.21991,
+            "43": 0.21621,
+            "44": 0.21422,
+            "45": 0.21339,
+            "46": 0.21332,
+            "47": 0.21892,
+            "48": 0.21384,
+            "49": 0.21668,
+            "50": 0.21806,
+            "51": 0.21958,
+            "52": 0.2173,
+            "53": 0.21642,
+            "54": 0.22157,
+            "55": 0.21549,
+            "56": 0.21528,
+            "57": 0.21789,
+            "58": 0.21634,
+            "59": 0.21649,
+            "60": 0.2141,
+            "61": 0.21447,
+            "62": 0.21596,
+            "63": 0.21545,
+            "64": 0.22145,
+            "65": 0.21603,
+            "66": 0.21504,
+            "67": 0.21551,
+            "68": 0.21918,
+            "69": 0.21831,
+            "70": 0.21943,
+            "71": 0.21537,
+            "72": 0.21937,
+            "73": 0.21783,
+            "74": 0.2246,
+            "75": 0.22031,
+            "76": 0.23249,
+            "77": 0.21862,
+            "78": 0.21663,
+            "79": 0.21806,
+            "80": 0.21694,
+            "81": 0.21684,
+            "82": 0.21559,
+            "83": 0.21877,
+            "84": 0.2151,
+            "85": 0.21819,
+            "86": 0.2167,
+            "87": 0.21768,
+            "88": 0.21415,
+            "89": 0.21694,
+            "90": 0.21444,
+            "91": 0.21616,
+            "92": 0.21967,
+            "93": 0.21672,
+            "94": 0.21699,
+            "95": 0.21892,
+            "96": 0.21871,
+            "97": 0.21805,
+            "98": 0.21674,
+            "99": 0.21639,
+            "100": 0.21581
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": 2365.0,
+            "17": "nan",
+            "18": 2331.0,
+            "19": 2912.0,
+            "20": 1664.0,
+            "21": 2009.0,
+            "22": "nan",
+            "23": 2483.0,
+            "24": 2192.0,
+            "25": 2290.0,
+            "26": 1916.0,
+            "27": 2020.0,
+            "28": 2503.0,
+            "29": 2379.0,
+            "30": 2400.0,
+            "31": 1759.0,
+            "32": 2522.0,
+            "33": 2145.0,
+            "34": 1791.0,
+            "35": 1777.0,
+            "36": 2100.0,
+            "37": 2396.0,
+            "38": 2040.0,
+            "39": 2983.0,
+            "40": 1805.0,
+            "41": 3097.0,
+            "42": 2421.0,
+            "43": 2566.0,
+            "44": 1858.0,
+            "45": 2371.0,
+            "46": 2140.0,
+            "47": 2603.0,
+            "48": 2358.0,
+            "49": 1739.0,
+            "50": 2686.0,
+            "51": 2041.0,
+            "52": 2226.0,
+            "53": 3222.0,
+            "54": 2784.0,
+            "55": 2290.0,
+            "56": 2428.0,
+            "57": 2146.0,
+            "58": 3048.0,
+            "59": 2504.0,
+            "60": 2612.0,
+            "61": 2623.0,
+            "62": 3003.0,
+            "63": 2762.0,
+            "64": 2917.0,
+            "65": 2104.0,
+            "66": 3550.0,
+            "67": 2433.0,
+            "68": 3146.0,
+            "69": 2877.0,
+            "70": 3528.0,
+            "71": 2983.0,
+            "72": 2640.0,
+            "73": 3199.0,
+            "74": 2084.0,
+            "75": 2809.0,
+            "76": 3599.0,
+            "77": 3667.0,
+            "78": 3680.0,
+            "79": 3972.0,
+            "80": 3365.0,
+            "81": 5042.0,
+            "82": 3291.0,
+            "83": 3016.0,
+            "84": 3592.0,
+            "85": 3792.0,
+            "86": 3192.0,
+            "87": 4219.0,
+            "88": 3376.0,
+            "89": 4110.0,
+            "90": 3939.0,
+            "91": 2912.0,
+            "92": 4114.0,
+            "93": 3499.0,
+            "94": 4339.0,
+            "95": 3829.0,
+            "96": 3875.0,
+            "97": 4100.0,
+            "98": 4889.0,
+            "99": 3771.0,
+            "100": 3390.0
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist/golden_values_lts_dgxa100_dracooci-ord.json
new file mode 100644
index 00000000000..d7593924d14
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist/golden_values_lts_dgxa100_dracooci-ord.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.86104,
+            "2": 10.85751,
+            "3": 10.86157,
+            "4": 10.84944,
+            "5": 10.88371,
+            "6": 10.88763,
+            "7": 10.86427,
+            "8": 10.87317,
+            "9": 10.86952,
+            "10": 10.84263,
+            "11": 10.88626,
+            "12": 10.88784,
+            "13": 10.89496,
+            "14": 10.90319,
+            "15": 10.87935,
+            "16": 10.88588,
+            "17": 10.86428,
+            "18": 10.88923,
+            "19": 10.88151,
+            "20": 10.87405,
+            "21": 10.88996,
+            "22": 10.83151,
+            "23": 10.89289,
+            "24": 10.85821,
+            "25": 10.82867,
+            "26": 10.82729,
+            "27": 10.85428,
+            "28": 10.84631,
+            "29": 10.85408,
+            "30": 10.77191,
+            "31": 10.67404,
+            "32": 10.78923,
+            "33": 10.7757,
+            "34": 10.67639,
+            "35": 10.67622,
+            "36": 10.63402,
+            "37": 10.69312,
+            "38": 10.61026,
+            "39": 10.70232,
+            "40": 10.517,
+            "41": 10.54604,
+            "42": 10.57058,
+            "43": 10.32305,
+            "44": 10.39205,
+            "45": 10.28436,
+            "46": 10.27329,
+            "47": 10.4798,
+            "48": 10.25535,
+            "49": 10.01605,
+            "50": 10.27861,
+            "51": 10.21825,
+            "52": 10.1281,
+            "53": 10.35922,
+            "54": 10.25909,
+            "55": 10.20112,
+            "56": 9.9815,
+            "57": 9.84915,
+            "58": 10.12333,
+            "59": 9.90734,
+            "60": 9.83306,
+            "61": 9.97107,
+            "62": 10.22132,
+            "63": 9.6767,
+            "64": 10.01779,
+            "65": 9.26979,
+            "66": 9.9402,
+            "67": 9.62874,
+            "68": 9.9875,
+            "69": 9.98441,
+            "70": 9.92662,
+            "71": 9.80996,
+            "72": 9.79208,
+            "73": 9.68101,
+            "74": 9.18023,
+            "75": 9.61385,
+            "76": 9.28826,
+            "77": 10.19395,
+            "78": 9.87453,
+            "79": 9.52966,
+            "80": 9.56419,
+            "81": 9.63453,
+            "82": 9.82245,
+            "83": 9.47207,
+            "84": 9.54654,
+            "85": 9.74319,
+            "86": 9.2009,
+            "87": 9.70113,
+            "88": 9.86518,
+            "89": 9.7307,
+            "90": 9.92148,
+            "91": 9.4869,
+            "92": 9.47682,
+            "93": 9.2135,
+            "94": 8.94897,
+            "95": 9.6163,
+            "96": 9.63416,
+            "97": 9.41229,
+            "98": 9.77615,
+            "99": 9.00251,
+            "100": 9.5087
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 284527616.0,
+            "2": 284527616.0,
+            "3": 284527616.0,
+            "4": 284527616.0,
+            "5": 284527616.0,
+            "6": 284527616.0,
+            "7": 284527616.0,
+            "8": 284527616.0,
+            "9": 284527616.0,
+            "10": 284527616.0,
+            "11": 284527616.0,
+            "12": 284527616.0,
+            "13": 284527616.0,
+            "14": 284527616.0,
+            "15": 284527616.0,
+            "16": 416513536.0,
+            "17": 416513536.0,
+            "18": 416513536.0,
+            "19": 416513536.0,
+            "20": 416513536.0,
+            "21": 416513536.0,
+            "22": 416513536.0,
+            "23": 416513536.0,
+            "24": 416513536.0,
+            "25": 416513536.0,
+            "26": 416513536.0,
+            "27": 416513536.0,
+            "28": 416513536.0,
+            "29": 416513536.0,
+            "30": 416513536.0,
+            "31": 416513536.0,
+            "32": 416513536.0,
+            "33": 416513536.0,
+            "34": 416513536.0,
+            "35": 416513536.0,
+            "36": 416513536.0,
+            "37": 416513536.0,
+            "38": 416513536.0,
+            "39": 416513536.0,
+            "40": 416513536.0,
+            "41": 416513536.0,
+            "42": 416513536.0,
+            "43": 416513536.0,
+            "44": 416513536.0,
+            "45": 416513536.0,
+            "46": 416513536.0,
+            "47": 416513536.0,
+            "48": 416513536.0,
+            "49": 416513536.0,
+            "50": 416513536.0,
+            "51": 416513536.0,
+            "52": 416513536.0,
+            "53": 416513536.0,
+            "54": 416513536.0,
+            "55": 416513536.0,
+            "56": 416513536.0,
+            "57": 416513536.0,
+            "58": 416513536.0,
+            "59": 416513536.0,
+            "60": 416513536.0,
+            "61": 416513536.0,
+            "62": 416513536.0,
+            "63": 416513536.0,
+            "64": 416513536.0,
+            "65": 416513536.0,
+            "66": 416513536.0,
+            "67": 416513536.0,
+            "68": 416513536.0,
+            "69": 416513536.0,
+            "70": 416513536.0,
+            "71": 416513536.0,
+            "72": 416513536.0,
+            "73": 416513536.0,
+            "74": 416513536.0,
+            "75": 416513536.0,
+            "76": 416513536.0,
+            "77": 416513536.0,
+            "78": 416513536.0,
+            "79": 416513536.0,
+            "80": 416513536.0,
+            "81": 416513536.0,
+            "82": 416513536.0,
+            "83": 416513536.0,
+            "84": 416513536.0,
+            "85": 416513536.0,
+            "86": 416513536.0,
+            "87": 416513536.0,
+            "88": 416513536.0,
+            "89": 416513536.0,
+            "90": 416513536.0,
+            "91": 416513536.0,
+            "92": 416513536.0,
+            "93": 416513536.0,
+            "94": 416513536.0,
+            "95": 416513536.0,
+            "96": 416513536.0,
+            "97": 416513536.0,
+            "98": 416513536.0,
+            "99": 416513536.0,
+            "100": 416513536.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1464319488.0,
+            "2": 1465368064.0,
+            "3": 1465368576.0,
+            "4": 1465368576.0,
+            "5": 1465368576.0,
+            "6": 1465368576.0,
+            "7": 1465368576.0,
+            "8": 1465368576.0,
+            "9": 1465368576.0,
+            "10": 1465368576.0,
+            "11": 1465368576.0,
+            "12": 1465368576.0,
+            "13": 1465368576.0,
+            "14": 1465368576.0,
+            "15": 1465368576.0,
+            "16": 1465368576.0,
+            "17": 1597089792.0,
+            "18": 1597089792.0,
+            "19": 1597089792.0,
+            "20": 1597089792.0,
+            "21": 1597089792.0,
+            "22": 1597089792.0,
+            "23": 1597089792.0,
+            "24": 1597089792.0,
+            "25": 1597089792.0,
+            "26": 1597089792.0,
+            "27": 1597089792.0,
+            "28": 1597089792.0,
+            "29": 1597089792.0,
+            "30": 1597089792.0,
+            "31": 1597089792.0,
+            "32": 1597089792.0,
+            "33": 1597089792.0,
+            "34": 1597089792.0,
+            "35": 1597089792.0,
+            "36": 1597089792.0,
+            "37": 1597089792.0,
+            "38": 1597089792.0,
+            "39": 1597089792.0,
+            "40": 1597089792.0,
+            "41": 1597089792.0,
+            "42": 1597089792.0,
+            "43": 1597089792.0,
+            "44": 1597089792.0,
+            "45": 1597089792.0,
+            "46": 1597089792.0,
+            "47": 1597089792.0,
+            "48": 1597089792.0,
+            "49": 1597089792.0,
+            "50": 1597089792.0,
+            "51": 1597089792.0,
+            "52": 1597089792.0,
+            "53": 1597089792.0,
+            "54": 1597089792.0,
+            "55": 1597089792.0,
+            "56": 1597089792.0,
+            "57": 1597089792.0,
+            "58": 1597089792.0,
+            "59": 1597089792.0,
+            "60": 1597089792.0,
+            "61": 1597089792.0,
+            "62": 1597089792.0,
+            "63": 1597089792.0,
+            "64": 1597089792.0,
+            "65": 1597089792.0,
+            "66": 1597089792.0,
+            "67": 1597091328.0,
+            "68": 1597091328.0,
+            "69": 1597091328.0,
+            "70": 1597091328.0,
+            "71": 1597091328.0,
+            "72": 1597091328.0,
+            "73": 1597091328.0,
+            "74": 1597091328.0,
+            "75": 1597091328.0,
+            "76": 1597091328.0,
+            "77": 1597091328.0,
+            "78": 1597091328.0,
+            "79": 1597091328.0,
+            "80": 1597091328.0,
+            "81": 1597091328.0,
+            "82": 1597091328.0,
+            "83": 1597091328.0,
+            "84": 1597091328.0,
+            "85": 1597091328.0,
+            "86": 1597091328.0,
+            "87": 1597091328.0,
+            "88": 1597091328.0,
+            "89": 1597091840.0,
+            "90": 1597091840.0,
+            "91": 1597091840.0,
+            "92": 1597091840.0,
+            "93": 1597091840.0,
+            "94": 1597091840.0,
+            "95": 1597091840.0,
+            "96": 1597091840.0,
+            "97": 1597091840.0,
+            "98": 1597091840.0,
+            "99": 1597091840.0,
+            "100": 1597091840.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 8.86459,
+            "2": 0.35839,
+            "3": 0.5214,
+            "4": 0.31404,
+            "5": 0.31247,
+            "6": 0.30997,
+            "7": 0.30873,
+            "8": 0.49835,
+            "9": 0.30592,
+            "10": 0.30506,
+            "11": 0.30662,
+            "12": 0.30928,
+            "13": 0.30537,
+            "14": 0.30594,
+            "15": 0.30802,
+            "16": 0.43126,
+            "17": 0.30967,
+            "18": 0.53614,
+            "19": 0.64808,
+            "20": 0.31719,
+            "21": 0.31628,
+            "22": 0.30781,
+            "23": 0.32412,
+            "24": 0.31672,
+            "25": 0.32015,
+            "26": 0.31659,
+            "27": 0.31615,
+            "28": 0.3194,
+            "29": 0.32624,
+            "30": 0.31611,
+            "31": 0.32028,
+            "32": 0.33615,
+            "33": 0.31587,
+            "34": 0.31903,
+            "35": 0.33274,
+            "36": 0.3171,
+            "37": 0.31597,
+            "38": 0.32394,
+            "39": 0.316,
+            "40": 0.31757,
+            "41": 0.32645,
+            "42": 0.32417,
+            "43": 0.31631,
+            "44": 0.32431,
+            "45": 0.31726,
+            "46": 0.31727,
+            "47": 0.32304,
+            "48": 0.32395,
+            "49": 0.31889,
+            "50": 0.31989,
+            "51": 0.32325,
+            "52": 0.31611,
+            "53": 0.31629,
+            "54": 0.32342,
+            "55": 0.31477,
+            "56": 0.31566,
+            "57": 0.32276,
+            "58": 0.31546,
+            "59": 0.31489,
+            "60": 0.31909,
+            "61": 0.32058,
+            "62": 0.31567,
+            "63": 0.31971,
+            "64": 0.32041,
+            "65": 0.31499,
+            "66": 0.3179,
+            "67": 0.32106,
+            "68": 0.31511,
+            "69": 0.31464,
+            "70": 0.32289,
+            "71": 0.31535,
+            "72": 0.3155,
+            "73": 0.32255,
+            "74": 0.31506,
+            "75": 0.3148,
+            "76": 0.32238,
+            "77": 0.31466,
+            "78": 0.31532,
+            "79": 0.32059,
+            "80": 0.31659,
+            "81": 0.31482,
+            "82": 0.31978,
+            "83": 0.31945,
+            "84": 0.31576,
+            "85": 0.31726,
+            "86": 0.32066,
+            "87": 0.31517,
+            "88": 0.31517,
+            "89": 0.32561,
+            "90": 0.3153,
+            "91": 0.31485,
+            "92": 0.32199,
+            "93": 0.31486,
+            "94": 0.31701,
+            "95": 0.32449,
+            "96": 0.3188,
+            "97": 0.31788,
+            "98": 0.32439,
+            "99": 0.31804,
+            "100": 0.31798
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": 2261.0,
+            "17": "nan",
+            "18": 2424.0,
+            "19": 2800.0,
+            "20": 1777.0,
+            "21": 2046.0,
+            "22": "nan",
+            "23": 2489.0,
+            "24": 2136.0,
+            "25": 2124.0,
+            "26": 1902.0,
+            "27": 2006.0,
+            "28": 2337.0,
+            "29": 2425.0,
+            "30": 2262.0,
+            "31": 1584.0,
+            "32": 2470.0,
+            "33": 2074.0,
+            "34": 1679.0,
+            "35": 1763.0,
+            "36": 1918.0,
+            "37": 2542.0,
+            "38": 2195.0,
+            "39": 3045.0,
+            "40": 1875.0,
+            "41": 3199.0,
+            "42": 2508.0,
+            "43": 2563.0,
+            "44": 1898.0,
+            "45": 2434.0,
+            "46": 2065.0,
+            "47": 2739.0,
+            "48": 2291.0,
+            "49": 1821.0,
+            "50": 2634.0,
+            "51": 2172.0,
+            "52": 2278.0,
+            "53": 3531.0,
+            "54": 2662.0,
+            "55": 2383.0,
+            "56": 2480.0,
+            "57": 2136.0,
+            "58": 3305.0,
+            "59": 2485.0,
+            "60": 2832.0,
+            "61": 2847.0,
+            "62": 2841.0,
+            "63": 2867.0,
+            "64": 3107.0,
+            "65": 2223.0,
+            "66": 3682.0,
+            "67": 2533.0,
+            "68": 3137.0,
+            "69": 2650.0,
+            "70": 3836.0,
+            "71": 2945.0,
+            "72": 2727.0,
+            "73": 3322.0,
+            "74": 2186.0,
+            "75": 2913.0,
+            "76": 3553.0,
+            "77": 3629.0,
+            "78": 3871.0,
+            "79": 4097.0,
+            "80": 3398.0,
+            "81": 5006.0,
+            "82": 3345.0,
+            "83": 3174.0,
+            "84": 3718.0,
+            "85": 3618.0,
+            "86": 3181.0,
+            "87": 3995.0,
+            "88": 3634.0,
+            "89": 4250.0,
+            "90": 3676.0,
+            "91": 2926.0,
+            "92": 4446.0,
+            "93": 3780.0,
+            "94": 4430.0,
+            "95": 4082.0,
+            "96": 3952.0,
+            "97": 4117.0,
+            "98": 5049.0,
+            "99": 4122.0,
+            "100": 3502.0
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist/golden_values_lts_dgxa100_dracooci.json
new file mode 100644
index 00000000000..7a89171c0cd
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist/golden_values_lts_dgxa100_dracooci.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.86104,
+            "2": 10.85751,
+            "3": 10.86157,
+            "4": 10.84944,
+            "5": 10.88371,
+            "6": 10.88763,
+            "7": 10.86427,
+            "8": 10.87317,
+            "9": 10.86952,
+            "10": 10.84263,
+            "11": 10.88626,
+            "12": 10.88784,
+            "13": 10.89496,
+            "14": 10.90319,
+            "15": 10.87935,
+            "16": 10.88588,
+            "17": 10.86428,
+            "18": 10.88923,
+            "19": 10.88151,
+            "20": 10.87405,
+            "21": 10.88996,
+            "22": 10.83151,
+            "23": 10.89289,
+            "24": 10.85821,
+            "25": 10.82867,
+            "26": 10.82729,
+            "27": 10.85428,
+            "28": 10.84631,
+            "29": 10.85408,
+            "30": 10.77191,
+            "31": 10.67404,
+            "32": 10.78923,
+            "33": 10.7757,
+            "34": 10.67639,
+            "35": 10.67622,
+            "36": 10.63402,
+            "37": 10.69312,
+            "38": 10.61026,
+            "39": 10.70232,
+            "40": 10.517,
+            "41": 10.54604,
+            "42": 10.57058,
+            "43": 10.32305,
+            "44": 10.39205,
+            "45": 10.28436,
+            "46": 10.27329,
+            "47": 10.4798,
+            "48": 10.25535,
+            "49": 10.01605,
+            "50": 10.27861,
+            "51": 10.21825,
+            "52": 10.1281,
+            "53": 10.35922,
+            "54": 10.25909,
+            "55": 10.20112,
+            "56": 9.9815,
+            "57": 9.84915,
+            "58": 10.12333,
+            "59": 9.90734,
+            "60": 9.83306,
+            "61": 9.97107,
+            "62": 10.22132,
+            "63": 9.6767,
+            "64": 10.01779,
+            "65": 9.26979,
+            "66": 9.9402,
+            "67": 9.62874,
+            "68": 9.9875,
+            "69": 9.98441,
+            "70": 9.92662,
+            "71": 9.80996,
+            "72": 9.79208,
+            "73": 9.68101,
+            "74": 9.18023,
+            "75": 9.61385,
+            "76": 9.28826,
+            "77": 10.19395,
+            "78": 9.87453,
+            "79": 9.52966,
+            "80": 9.56419,
+            "81": 9.63453,
+            "82": 9.82245,
+            "83": 9.47207,
+            "84": 9.54654,
+            "85": 9.74319,
+            "86": 9.2009,
+            "87": 9.70113,
+            "88": 9.86518,
+            "89": 9.7307,
+            "90": 9.92148,
+            "91": 9.4869,
+            "92": 9.47682,
+            "93": 9.2135,
+            "94": 8.94897,
+            "95": 9.6163,
+            "96": 9.63416,
+            "97": 9.41229,
+            "98": 9.77615,
+            "99": 9.00251,
+            "100": 9.5087
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 284527616.0,
+            "2": 284527616.0,
+            "3": 284527616.0,
+            "4": 284527616.0,
+            "5": 284527616.0,
+            "6": 284527616.0,
+            "7": 284527616.0,
+            "8": 284527616.0,
+            "9": 284527616.0,
+            "10": 284527616.0,
+            "11": 284527616.0,
+            "12": 284527616.0,
+            "13": 284527616.0,
+            "14": 284527616.0,
+            "15": 284527616.0,
+            "16": 416513536.0,
+            "17": 416513536.0,
+            "18": 416513536.0,
+            "19": 416513536.0,
+            "20": 416513536.0,
+            "21": 416513536.0,
+            "22": 416513536.0,
+            "23": 416513536.0,
+            "24": 416513536.0,
+            "25": 416513536.0,
+            "26": 416513536.0,
+            "27": 416513536.0,
+            "28": 416513536.0,
+            "29": 416513536.0,
+            "30": 416513536.0,
+            "31": 416513536.0,
+            "32": 416513536.0,
+            "33": 416513536.0,
+            "34": 416513536.0,
+            "35": 416513536.0,
+            "36": 416513536.0,
+            "37": 416513536.0,
+            "38": 416513536.0,
+            "39": 416513536.0,
+            "40": 416513536.0,
+            "41": 416513536.0,
+            "42": 416513536.0,
+            "43": 416513536.0,
+            "44": 416513536.0,
+            "45": 416513536.0,
+            "46": 416513536.0,
+            "47": 416513536.0,
+            "48": 416513536.0,
+            "49": 416513536.0,
+            "50": 416513536.0,
+            "51": 416513536.0,
+            "52": 416513536.0,
+            "53": 416513536.0,
+            "54": 416513536.0,
+            "55": 416513536.0,
+            "56": 416513536.0,
+            "57": 416513536.0,
+            "58": 416513536.0,
+            "59": 416513536.0,
+            "60": 416513536.0,
+            "61": 416513536.0,
+            "62": 416513536.0,
+            "63": 416513536.0,
+            "64": 416513536.0,
+            "65": 416513536.0,
+            "66": 416513536.0,
+            "67": 416513536.0,
+            "68": 416513536.0,
+            "69": 416513536.0,
+            "70": 416513536.0,
+            "71": 416513536.0,
+            "72": 416513536.0,
+            "73": 416513536.0,
+            "74": 416513536.0,
+            "75": 416513536.0,
+            "76": 416513536.0,
+            "77": 416513536.0,
+            "78": 416513536.0,
+            "79": 416513536.0,
+            "80": 416513536.0,
+            "81": 416513536.0,
+            "82": 416513536.0,
+            "83": 416513536.0,
+            "84": 416513536.0,
+            "85": 416513536.0,
+            "86": 416513536.0,
+            "87": 416513536.0,
+            "88": 416513536.0,
+            "89": 416513536.0,
+            "90": 416513536.0,
+            "91": 416513536.0,
+            "92": 416513536.0,
+            "93": 416513536.0,
+            "94": 416513536.0,
+            "95": 416513536.0,
+            "96": 416513536.0,
+            "97": 416513536.0,
+            "98": 416513536.0,
+            "99": 416513536.0,
+            "100": 416513536.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1465368064.0,
+            "2": 1465368064.0,
+            "3": 1465368576.0,
+            "4": 1465368576.0,
+            "5": 1465368576.0,
+            "6": 1465368576.0,
+            "7": 1465368576.0,
+            "8": 1465368576.0,
+            "9": 1465368576.0,
+            "10": 1465368576.0,
+            "11": 1465368576.0,
+            "12": 1465368576.0,
+            "13": 1465368576.0,
+            "14": 1465368576.0,
+            "15": 1465368576.0,
+            "16": 1465368576.0,
+            "17": 1597092352.0,
+            "18": 1597092352.0,
+            "19": 1597092352.0,
+            "20": 1597092352.0,
+            "21": 1597092352.0,
+            "22": 1597092352.0,
+            "23": 1597092352.0,
+            "24": 1597092352.0,
+            "25": 1597092352.0,
+            "26": 1597092352.0,
+            "27": 1597092352.0,
+            "28": 1597092352.0,
+            "29": 1597092352.0,
+            "30": 1597092352.0,
+            "31": 1597092352.0,
+            "32": 1597092352.0,
+            "33": 1597092352.0,
+            "34": 1597092352.0,
+            "35": 1597092352.0,
+            "36": 1597092352.0,
+            "37": 1597092352.0,
+            "38": 1597092352.0,
+            "39": 1597092352.0,
+            "40": 1597092352.0,
+            "41": 1597092352.0,
+            "42": 1597092352.0,
+            "43": 1597092352.0,
+            "44": 1597092352.0,
+            "45": 1597092352.0,
+            "46": 1597092352.0,
+            "47": 1597092352.0,
+            "48": 1597092352.0,
+            "49": 1597092352.0,
+            "50": 1597092352.0,
+            "51": 1597092352.0,
+            "52": 1597092352.0,
+            "53": 1597092352.0,
+            "54": 1597092352.0,
+            "55": 1597092352.0,
+            "56": 1597092352.0,
+            "57": 1597092352.0,
+            "58": 1597092352.0,
+            "59": 1597092352.0,
+            "60": 1597092352.0,
+            "61": 1597092352.0,
+            "62": 1597092352.0,
+            "63": 1597092352.0,
+            "64": 1597092352.0,
+            "65": 1597092352.0,
+            "66": 1597092352.0,
+            "67": 1597092352.0,
+            "68": 1597092352.0,
+            "69": 1597092352.0,
+            "70": 1597092352.0,
+            "71": 1597092352.0,
+            "72": 1597092352.0,
+            "73": 1597092352.0,
+            "74": 1597092352.0,
+            "75": 1597092352.0,
+            "76": 1597092352.0,
+            "77": 1597092352.0,
+            "78": 1597092352.0,
+            "79": 1597092352.0,
+            "80": 1597092352.0,
+            "81": 1597092352.0,
+            "82": 1597092352.0,
+            "83": 1597092352.0,
+            "84": 1597092352.0,
+            "85": 1597092352.0,
+            "86": 1597092352.0,
+            "87": 1597092352.0,
+            "88": 1597092352.0,
+            "89": 1597092352.0,
+            "90": 1597092352.0,
+            "91": 1597092352.0,
+            "92": 1597092352.0,
+            "93": 1597092352.0,
+            "94": 1597092352.0,
+            "95": 1597092352.0,
+            "96": 1597092352.0,
+            "97": 1597092352.0,
+            "98": 1597092352.0,
+            "99": 1597092352.0,
+            "100": 1597092352.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.35929,
+            "2": 0.34184,
+            "3": 0.31203,
+            "4": 0.30787,
+            "5": 0.30852,
+            "6": 0.30384,
+            "7": 0.30155,
+            "8": 0.30427,
+            "9": 0.51091,
+            "10": 0.30553,
+            "11": 0.30575,
+            "12": 0.61502,
+            "13": 0.30643,
+            "14": 0.57901,
+            "15": 0.52637,
+            "16": 0.42283,
+            "17": 0.30589,
+            "18": 0.32011,
+            "19": 0.31661,
+            "20": 0.31932,
+            "21": 0.32147,
+            "22": 0.48024,
+            "23": 0.32123,
+            "24": 0.32199,
+            "25": 0.6542,
+            "26": 0.65941,
+            "27": 0.31987,
+            "28": 0.32071,
+            "29": 0.31705,
+            "30": 0.3217,
+            "31": 0.32104,
+            "32": 0.31733,
+            "33": 0.31859,
+            "34": 0.32143,
+            "35": 0.31823,
+            "36": 0.31764,
+            "37": 0.32459,
+            "38": 0.31791,
+            "39": 0.31709,
+            "40": 0.3224,
+            "41": 0.32157,
+            "42": 0.31769,
+            "43": 0.32161,
+            "44": 0.32202,
+            "45": 0.31808,
+            "46": 0.32115,
+            "47": 0.32215,
+            "48": 0.31811,
+            "49": 0.32081,
+            "50": 0.3219,
+            "51": 0.32586,
+            "52": 0.32097,
+            "53": 0.32086,
+            "54": 0.31965,
+            "55": 0.32299,
+            "56": 0.32057,
+            "57": 0.31894,
+            "58": 0.3227,
+            "59": 0.31818,
+            "60": 0.31815,
+            "61": 0.32331,
+            "62": 0.31818,
+            "63": 0.31777,
+            "64": 0.32493,
+            "65": 0.31806,
+            "66": 0.31829,
+            "67": 0.32281,
+            "68": 0.31721,
+            "69": 0.31771,
+            "70": 0.323,
+            "71": 0.31739,
+            "72": 0.31848,
+            "73": 0.31915,
+            "74": 0.3218,
+            "75": 0.31772,
+            "76": 0.31789,
+            "77": 0.32187,
+            "78": 0.31771,
+            "79": 0.3183,
+            "80": 0.32385,
+            "81": 0.31791,
+            "82": 0.31794,
+            "83": 0.32606,
+            "84": 0.31846,
+            "85": 0.31748,
+            "86": 0.32559,
+            "87": 0.31829,
+            "88": 0.31805,
+            "89": 0.32163,
+            "90": 0.31834,
+            "91": 0.31753,
+            "92": 0.32249,
+            "93": 0.3175,
+            "94": 0.31731,
+            "95": 0.31891,
+            "96": 0.31986,
+            "97": 0.31789,
+            "98": 0.31909,
+            "99": 0.32353,
+            "100": 0.31768
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": 2261.0,
+            "17": "nan",
+            "18": 2424.0,
+            "19": 2800.0,
+            "20": 1777.0,
+            "21": 2046.0,
+            "22": "nan",
+            "23": 2489.0,
+            "24": 2136.0,
+            "25": 2124.0,
+            "26": 1902.0,
+            "27": 2006.0,
+            "28": 2337.0,
+            "29": 2425.0,
+            "30": 2262.0,
+            "31": 1584.0,
+            "32": 2470.0,
+            "33": 2074.0,
+            "34": 1679.0,
+            "35": 1763.0,
+            "36": 1918.0,
+            "37": 2542.0,
+            "38": 2195.0,
+            "39": 3045.0,
+            "40": 1875.0,
+            "41": 3199.0,
+            "42": 2508.0,
+            "43": 2563.0,
+            "44": 1898.0,
+            "45": 2434.0,
+            "46": 2065.0,
+            "47": 2739.0,
+            "48": 2291.0,
+            "49": 1821.0,
+            "50": 2634.0,
+            "51": 2172.0,
+            "52": 2278.0,
+            "53": 3531.0,
+            "54": 2662.0,
+            "55": 2383.0,
+            "56": 2480.0,
+            "57": 2136.0,
+            "58": 3305.0,
+            "59": 2485.0,
+            "60": 2832.0,
+            "61": 2847.0,
+            "62": 2841.0,
+            "63": 2867.0,
+            "64": 3107.0,
+            "65": 2223.0,
+            "66": 3682.0,
+            "67": 2533.0,
+            "68": 3137.0,
+            "69": 2650.0,
+            "70": 3836.0,
+            "71": 2945.0,
+            "72": 2727.0,
+            "73": 3322.0,
+            "74": 2186.0,
+            "75": 2913.0,
+            "76": 3553.0,
+            "77": 3629.0,
+            "78": 3871.0,
+            "79": 4097.0,
+            "80": 3398.0,
+            "81": 5006.0,
+            "82": 3345.0,
+            "83": 3174.0,
+            "84": 3718.0,
+            "85": 3618.0,
+            "86": 3181.0,
+            "87": 3995.0,
+            "88": 3634.0,
+            "89": 4250.0,
+            "90": 3676.0,
+            "91": 2926.0,
+            "92": 4446.0,
+            "93": 3780.0,
+            "94": 4430.0,
+            "95": 4082.0,
+            "96": 3952.0,
+            "97": 4117.0,
+            "98": 5049.0,
+            "99": 4122.0,
+            "100": 3502.0
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_7b_mr_dgx_a100_1N8G_tp1_pp4_memory_speed/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_7b_mr_dgx_a100_1N8G_tp1_pp4_memory_speed/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..3aad045fc8e
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_7b_mr_dgx_a100_1N8G_tp1_pp4_memory_speed/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,162 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 25,
+        "step_interval": 1,
+        "values": {
+            "1": 12.58569,
+            "2": 12.58406,
+            "3": 12.58486,
+            "4": 12.58642,
+            "5": 12.58279,
+            "6": 12.57912,
+            "7": 12.56177,
+            "8": 12.52304,
+            "9": 12.4966,
+            "10": 12.4826,
+            "11": 12.31462,
+            "12": 12.272,
+            "13": 12.20924,
+            "14": 12.20094,
+            "15": 11.79651,
+            "16": 11.78035,
+            "17": 11.74188,
+            "18": 11.71656,
+            "19": 11.59074,
+            "20": 11.47672,
+            "21": 11.23784,
+            "22": 11.3586,
+            "23": 11.25768,
+            "24": 11.14081,
+            "25": 10.97989
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 25,
+        "step_interval": 1,
+        "values": {
+            "1": 521035392.0,
+            "2": 521662624.0,
+            "3": 520932992.0,
+            "4": 521225120.0,
+            "5": 520993600.0,
+            "6": 521369824.0,
+            "7": 521417344.0,
+            "8": 521054784.0,
+            "9": 521458592.0,
+            "10": 521175520.0,
+            "11": 522277376.0,
+            "12": 521435904.0,
+            "13": 521472640.0,
+            "14": 522442496.0,
+            "15": 521589568.0,
+            "16": 521414080.0,
+            "17": 521025696.0,
+            "18": 521279168.0,
+            "19": 521154400.0,
+            "20": 521132352.0,
+            "21": 522909696.0,
+            "22": 521591904.0,
+            "23": 521353504.0,
+            "24": 521426496.0,
+            "25": 523547008.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 25,
+        "step_interval": 1,
+        "values": {
+            "1": 24540168192.0,
+            "2": 24540168192.0,
+            "3": 24540168192.0,
+            "4": 24540168192.0,
+            "5": 24540168192.0,
+            "6": 24540168192.0,
+            "7": 24540168192.0,
+            "8": 24540168192.0,
+            "9": 24540168192.0,
+            "10": 24540168192.0,
+            "11": 24540168192.0,
+            "12": 24540168192.0,
+            "13": 24540168192.0,
+            "14": 24540168192.0,
+            "15": 24540168192.0,
+            "16": 24540168192.0,
+            "17": 24540168192.0,
+            "18": 24540168192.0,
+            "19": 24540168192.0,
+            "20": 24540168192.0,
+            "21": 24540168192.0,
+            "22": 24540168192.0,
+            "23": 24540168192.0,
+            "24": 24540168192.0,
+            "25": 24540168192.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 25,
+        "step_interval": 1,
+        "values": {
+            "1": 52730810368.0,
+            "2": 60518309888.0,
+            "3": 60518309888.0,
+            "4": 60518309888.0,
+            "5": 60518309888.0,
+            "6": 60518309888.0,
+            "7": 60518309888.0,
+            "8": 60518309888.0,
+            "9": 60518309888.0,
+            "10": 60518309888.0,
+            "11": 60518309888.0,
+            "12": 60518309888.0,
+            "13": 60518309888.0,
+            "14": 60518309888.0,
+            "15": 60518309888.0,
+            "16": 60518309888.0,
+            "17": 60518309888.0,
+            "18": 60518309888.0,
+            "19": 60518309888.0,
+            "20": 60518309888.0,
+            "21": 60518309888.0,
+            "22": 60518309888.0,
+            "23": 60518309888.0,
+            "24": 60518309888.0,
+            "25": 60518309888.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 25,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": 9.35391,
+            "3": "nan",
+            "4": 1.17482,
+            "5": "nan",
+            "6": 1.17131,
+            "7": "nan",
+            "8": 1.17328,
+            "9": "nan",
+            "10": 1.17214,
+            "11": "nan",
+            "12": 1.17467,
+            "13": "nan",
+            "14": 1.17439,
+            "15": "nan",
+            "16": 1.17582,
+            "17": "nan",
+            "18": 1.1764,
+            "19": "nan",
+            "20": 1.17744,
+            "21": "nan",
+            "22": 1.17439,
+            "23": "nan",
+            "24": 1.17461,
+            "25": "nan"
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_7b_mr_dgx_a100_1N8G_tp1_pp4_memory_speed/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_7b_mr_dgx_a100_1N8G_tp1_pp4_memory_speed/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..478f889b21c
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_7b_mr_dgx_a100_1N8G_tp1_pp4_memory_speed/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,162 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 25,
+        "step_interval": 1,
+        "values": {
+            "1": 12.58569,
+            "2": 12.58406,
+            "3": 12.58486,
+            "4": 12.58642,
+            "5": 12.58279,
+            "6": 12.57912,
+            "7": 12.56177,
+            "8": 12.52304,
+            "9": 12.4966,
+            "10": 12.4826,
+            "11": 12.31462,
+            "12": 12.272,
+            "13": 12.20924,
+            "14": 12.20094,
+            "15": 11.79651,
+            "16": 11.78035,
+            "17": 11.74188,
+            "18": 11.71656,
+            "19": 11.59074,
+            "20": 11.47672,
+            "21": 11.23784,
+            "22": 11.3586,
+            "23": 11.25768,
+            "24": 11.14081,
+            "25": 10.97989
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 25,
+        "step_interval": 1,
+        "values": {
+            "1": 521035392.0,
+            "2": 521662624.0,
+            "3": 520932992.0,
+            "4": 521225120.0,
+            "5": 520993600.0,
+            "6": 521369824.0,
+            "7": 521417344.0,
+            "8": 521054784.0,
+            "9": 521458592.0,
+            "10": 521175520.0,
+            "11": 522277376.0,
+            "12": 521435904.0,
+            "13": 521472640.0,
+            "14": 522442496.0,
+            "15": 521589568.0,
+            "16": 521414080.0,
+            "17": 521025696.0,
+            "18": 521279168.0,
+            "19": 521154400.0,
+            "20": 521132352.0,
+            "21": 522909696.0,
+            "22": 521591904.0,
+            "23": 521353504.0,
+            "24": 521426496.0,
+            "25": 523547008.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 25,
+        "step_interval": 1,
+        "values": {
+            "1": 24540168192.0,
+            "2": 24540168192.0,
+            "3": 24540168192.0,
+            "4": 24540168192.0,
+            "5": 24540168192.0,
+            "6": 24540168192.0,
+            "7": 24540168192.0,
+            "8": 24540168192.0,
+            "9": 24540168192.0,
+            "10": 24540168192.0,
+            "11": 24540168192.0,
+            "12": 24540168192.0,
+            "13": 24540168192.0,
+            "14": 24540168192.0,
+            "15": 24540168192.0,
+            "16": 24540168192.0,
+            "17": 24540168192.0,
+            "18": 24540168192.0,
+            "19": 24540168192.0,
+            "20": 24540168192.0,
+            "21": 24540389376.0,
+            "22": 24540168192.0,
+            "23": 24540168192.0,
+            "24": 24540168192.0,
+            "25": 24540168192.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 25,
+        "step_interval": 1,
+        "values": {
+            "1": 52730810368.0,
+            "2": 60518424576.0,
+            "3": 60518424576.0,
+            "4": 60518424576.0,
+            "5": 60518424576.0,
+            "6": 60518424576.0,
+            "7": 60518424576.0,
+            "8": 60518424576.0,
+            "9": 60518424576.0,
+            "10": 60518424576.0,
+            "11": 60518424576.0,
+            "12": 60518424576.0,
+            "13": 60518424576.0,
+            "14": 60518424576.0,
+            "15": 60518424576.0,
+            "16": 60518424576.0,
+            "17": 60518424576.0,
+            "18": 60518424576.0,
+            "19": 60518424576.0,
+            "20": 60518424576.0,
+            "21": 60518424576.0,
+            "22": 60518424576.0,
+            "23": 60518424576.0,
+            "24": 60518424576.0,
+            "25": 60518424576.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 25,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": 10.03336,
+            "3": "nan",
+            "4": 1.18525,
+            "5": "nan",
+            "6": 1.18158,
+            "7": "nan",
+            "8": 1.18536,
+            "9": "nan",
+            "10": 1.18428,
+            "11": "nan",
+            "12": 1.18625,
+            "13": "nan",
+            "14": 1.18256,
+            "15": "nan",
+            "16": 1.18023,
+            "17": "nan",
+            "18": 1.18227,
+            "19": "nan",
+            "20": 1.18284,
+            "21": "nan",
+            "22": 1.18238,
+            "23": "nan",
+            "24": 1.18151,
+            "25": "nan"
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_7b_mr_dgx_a100_1N8G_tp4_pp1_memory_speed/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_7b_mr_dgx_a100_1N8G_tp4_pp1_memory_speed/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..a059e81b488
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_7b_mr_dgx_a100_1N8G_tp4_pp1_memory_speed/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,162 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 25,
+        "step_interval": 1,
+        "values": {
+            "1": 12.59715,
+            "2": 12.60067,
+            "3": 12.59727,
+            "4": 12.60021,
+            "5": 12.59013,
+            "6": 12.58834,
+            "7": 12.57605,
+            "8": 12.5362,
+            "9": 12.50745,
+            "10": 12.49091,
+            "11": 12.32614,
+            "12": 12.29366,
+            "13": 12.22589,
+            "14": 12.23023,
+            "15": 11.82108,
+            "16": 11.80586,
+            "17": 11.77001,
+            "18": 11.74946,
+            "19": 11.62189,
+            "20": 11.51704,
+            "21": 11.27121,
+            "22": 11.38966,
+            "23": 11.29559,
+            "24": 11.16591,
+            "25": 11.00354
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 25,
+        "step_interval": 1,
+        "values": {
+            "1": 523037536.0,
+            "2": 523668064.0,
+            "3": 522933056.0,
+            "4": 523228480.0,
+            "5": 523009792.0,
+            "6": 523364320.0,
+            "7": 523427840.0,
+            "8": 523074688.0,
+            "9": 523459232.0,
+            "10": 523184992.0,
+            "11": 524288736.0,
+            "12": 523447712.0,
+            "13": 523490112.0,
+            "14": 524476096.0,
+            "15": 523630496.0,
+            "16": 523459232.0,
+            "17": 523075936.0,
+            "18": 523360192.0,
+            "19": 523206816.0,
+            "20": 523230848.0,
+            "21": 524941248.0,
+            "22": 523654464.0,
+            "23": 523420576.0,
+            "24": 523494720.0,
+            "25": 525638016.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 25,
+        "step_interval": 1,
+        "values": {
+            "1": 20663519232.0,
+            "2": 20663519232.0,
+            "3": 20663519232.0,
+            "4": 20663519232.0,
+            "5": 20663519232.0,
+            "6": 20663519232.0,
+            "7": 20663519232.0,
+            "8": 20663519232.0,
+            "9": 20663519232.0,
+            "10": 20663519232.0,
+            "11": 20663519232.0,
+            "12": 20663519232.0,
+            "13": 20663519232.0,
+            "14": 20663519232.0,
+            "15": 20663519232.0,
+            "16": 20663519232.0,
+            "17": 20663519232.0,
+            "18": 20663519232.0,
+            "19": 20663519232.0,
+            "20": 20663519232.0,
+            "21": 20663519232.0,
+            "22": 20663519232.0,
+            "23": 20663519232.0,
+            "24": 20663519232.0,
+            "25": 20663519232.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 25,
+        "step_interval": 1,
+        "values": {
+            "1": 50289545216.0,
+            "2": 57143791616.0,
+            "3": 57143791616.0,
+            "4": 57143791616.0,
+            "5": 57143791616.0,
+            "6": 57143791616.0,
+            "7": 57143791616.0,
+            "8": 57143791616.0,
+            "9": 57143791616.0,
+            "10": 57143791616.0,
+            "11": 57143791616.0,
+            "12": 57143791616.0,
+            "13": 57143791616.0,
+            "14": 57143791616.0,
+            "15": 57143791616.0,
+            "16": 57143791616.0,
+            "17": 57143791616.0,
+            "18": 57143791616.0,
+            "19": 57143791616.0,
+            "20": 57143791616.0,
+            "21": 57143791616.0,
+            "22": 57143791616.0,
+            "23": 57143791616.0,
+            "24": 57143791616.0,
+            "25": 57143791616.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 25,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": 6.55725,
+            "3": "nan",
+            "4": 1.12211,
+            "5": "nan",
+            "6": 1.11783,
+            "7": "nan",
+            "8": 1.11727,
+            "9": "nan",
+            "10": 1.1176,
+            "11": "nan",
+            "12": 1.11841,
+            "13": "nan",
+            "14": 1.11918,
+            "15": "nan",
+            "16": 1.12025,
+            "17": "nan",
+            "18": 1.11888,
+            "19": "nan",
+            "20": 1.12,
+            "21": "nan",
+            "22": 1.11939,
+            "23": "nan",
+            "24": 1.11949,
+            "25": "nan"
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_7b_mr_dgx_a100_1N8G_tp4_pp1_memory_speed/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_7b_mr_dgx_a100_1N8G_tp4_pp1_memory_speed/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..0847af86737
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_7b_mr_dgx_a100_1N8G_tp4_pp1_memory_speed/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,162 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 25,
+        "step_interval": 1,
+        "values": {
+            "1": 12.59715,
+            "2": 12.60067,
+            "3": 12.59727,
+            "4": 12.60021,
+            "5": 12.59013,
+            "6": 12.58834,
+            "7": 12.57605,
+            "8": 12.5362,
+            "9": 12.50745,
+            "10": 12.49091,
+            "11": 12.32614,
+            "12": 12.29366,
+            "13": 12.22589,
+            "14": 12.23023,
+            "15": 11.82108,
+            "16": 11.80586,
+            "17": 11.77001,
+            "18": 11.74946,
+            "19": 11.62189,
+            "20": 11.51704,
+            "21": 11.27121,
+            "22": 11.38966,
+            "23": 11.29559,
+            "24": 11.16591,
+            "25": 11.00354
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 25,
+        "step_interval": 1,
+        "values": {
+            "1": 523037536.0,
+            "2": 523668064.0,
+            "3": 522933056.0,
+            "4": 523228480.0,
+            "5": 523009792.0,
+            "6": 523364320.0,
+            "7": 523427840.0,
+            "8": 523074688.0,
+            "9": 523459232.0,
+            "10": 523184992.0,
+            "11": 524288736.0,
+            "12": 523447712.0,
+            "13": 523490112.0,
+            "14": 524476096.0,
+            "15": 523630496.0,
+            "16": 523459232.0,
+            "17": 523075936.0,
+            "18": 523360192.0,
+            "19": 523206816.0,
+            "20": 523230848.0,
+            "21": 524941248.0,
+            "22": 523654464.0,
+            "23": 523420576.0,
+            "24": 523494720.0,
+            "25": 525638016.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 25,
+        "step_interval": 1,
+        "values": {
+            "1": 20663519232.0,
+            "2": 20663519232.0,
+            "3": 20663519232.0,
+            "4": 20663519232.0,
+            "5": 20663519232.0,
+            "6": 20663519232.0,
+            "7": 20663519232.0,
+            "8": 20663519232.0,
+            "9": 20663519232.0,
+            "10": 20663519232.0,
+            "11": 20663519232.0,
+            "12": 20663519232.0,
+            "13": 20663519232.0,
+            "14": 20663519232.0,
+            "15": 20663519232.0,
+            "16": 20663519232.0,
+            "17": 20663519232.0,
+            "18": 20663519232.0,
+            "19": 20663519232.0,
+            "20": 20663519232.0,
+            "21": 20663519232.0,
+            "22": 20663519232.0,
+            "23": 20663519232.0,
+            "24": 20663519232.0,
+            "25": 20663519232.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 25,
+        "step_interval": 1,
+        "values": {
+            "1": 50289545216.0,
+            "2": 57143791616.0,
+            "3": 57143791616.0,
+            "4": 57143791616.0,
+            "5": 57143791616.0,
+            "6": 57143791616.0,
+            "7": 57143791616.0,
+            "8": 57143791616.0,
+            "9": 57143791616.0,
+            "10": 57143791616.0,
+            "11": 57143791616.0,
+            "12": 57143791616.0,
+            "13": 57143791616.0,
+            "14": 57143791616.0,
+            "15": 57143791616.0,
+            "16": 57143791616.0,
+            "17": 57143791616.0,
+            "18": 57143791616.0,
+            "19": 57143791616.0,
+            "20": 57143791616.0,
+            "21": 57143791616.0,
+            "22": 57143791616.0,
+            "23": 57143791616.0,
+            "24": 57143791616.0,
+            "25": 57143791616.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 25,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": 6.11084,
+            "3": "nan",
+            "4": 1.11678,
+            "5": "nan",
+            "6": 1.11532,
+            "7": "nan",
+            "8": 1.11539,
+            "9": "nan",
+            "10": 1.1161,
+            "11": "nan",
+            "12": 1.11723,
+            "13": "nan",
+            "14": 1.11756,
+            "15": "nan",
+            "16": 1.11596,
+            "17": "nan",
+            "18": 1.11605,
+            "19": "nan",
+            "20": 1.11783,
+            "21": "nan",
+            "22": 1.11636,
+            "23": "nan",
+            "24": 1.11585,
+            "25": "nan"
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
index d3e2bdcb541..22254614510 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
@@ -1 +1,287 @@
-{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.89592, "5": 10.89131, "10": 10.88299, "15": 10.84786, "20": 10.74925, "25": 10.59226, "30": 10.41136, "35": 10.28136, "40": 10.09306, "45": 9.84149, "50": 9.91285}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1581.0, "5": 1962.0, "10": 1435.0, "15": 1944.0, "20": 1679.0, "25": 1645.0, "30": 1912.0, "35": 2023.0, "40": 2270.0, "45": 2152.0, "50": 2580.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 581488640.0, "5": 581488640.0, "10": 581488640.0, "15": 581488640.0, "20": 581488640.0, "25": 581488640.0, "30": 581488640.0, "35": 581488640.0, "40": 581488640.0, "45": 581488640.0, "50": 581488640.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 4605813248.0, "5": 4702429696.0, "10": 4702429696.0, "15": 4702429696.0, "20": 4702429696.0, "25": 4702429696.0, "30": 4702429696.0, "35": 4702429696.0, "40": 4702429696.0, "45": 4702429696.0, "50": 4702429696.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 6.75074, "5": 0.05633, "10": 0.05789, "15": 0.05558, "20": 0.05703, "25": 0.05856, "30": 0.06132, "35": 0.05777, "40": 0.05818, "45": 0.05736, "50": 0.05735}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.89592,
+            "2": 10.89514,
+            "3": 10.88761,
+            "4": 10.88903,
+            "5": 10.89131,
+            "6": 10.90004,
+            "7": 10.89143,
+            "8": 10.89938,
+            "9": 10.90231,
+            "10": 10.88299,
+            "11": 10.87827,
+            "12": 10.89318,
+            "13": 10.89818,
+            "14": 10.89188,
+            "15": 10.84786,
+            "16": 10.85369,
+            "17": 10.831,
+            "18": 10.83994,
+            "19": 10.82779,
+            "20": 10.74925,
+            "21": 10.73558,
+            "22": 10.61567,
+            "23": 10.72599,
+            "24": 10.63027,
+            "25": 10.59226,
+            "26": 10.63312,
+            "27": 10.63277,
+            "28": 10.58231,
+            "29": 10.58547,
+            "30": 10.41136,
+            "31": 10.15833,
+            "32": 10.48326,
+            "33": 10.46651,
+            "34": 10.23801,
+            "35": 10.28136,
+            "36": 10.24029,
+            "37": 10.3617,
+            "38": 10.20342,
+            "39": 10.404,
+            "40": 10.09306,
+            "41": 10.15805,
+            "42": 10.21903,
+            "43": 9.84274,
+            "44": 9.97219,
+            "45": 9.84149,
+            "46": 9.82007,
+            "47": 10.14934,
+            "48": 9.85997,
+            "49": 9.54155,
+            "50": 9.91285
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1581.0,
+            "2": 1674.0,
+            "3": 1724.0,
+            "4": 1803.0,
+            "5": 1962.0,
+            "6": 1846.0,
+            "7": 1864.0,
+            "8": 1792.0,
+            "9": 1848.0,
+            "10": 1435.0,
+            "11": 1868.0,
+            "12": 1782.0,
+            "13": 1874.0,
+            "14": 1783.0,
+            "15": 1944.0,
+            "16": 1933.0,
+            "17": 1807.0,
+            "18": 1737.0,
+            "19": 1822.0,
+            "20": 1679.0,
+            "21": 1808.0,
+            "22": 1806.0,
+            "23": 2077.0,
+            "24": 1663.0,
+            "25": 1645.0,
+            "26": 1719.0,
+            "27": 1925.0,
+            "28": 2030.0,
+            "29": 2042.0,
+            "30": 1912.0,
+            "31": 1603.0,
+            "32": 1938.0,
+            "33": 2158.0,
+            "34": 1896.0,
+            "35": 2023.0,
+            "36": 1910.0,
+            "37": 2330.0,
+            "38": 2298.0,
+            "39": 2498.0,
+            "40": 2270.0,
+            "41": 2464.0,
+            "42": 2296.0,
+            "43": 2042.0,
+            "44": 2138.0,
+            "45": 2152.0,
+            "46": 2282.0,
+            "47": 2529.0,
+            "48": 2454.0,
+            "49": 2358.0,
+            "50": 2580.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 581488640.0,
+            "2": 581488640.0,
+            "3": 581488640.0,
+            "4": 581488640.0,
+            "5": 581488640.0,
+            "6": 581488640.0,
+            "7": 581488640.0,
+            "8": 581488640.0,
+            "9": 581488640.0,
+            "10": 581488640.0,
+            "11": 581488640.0,
+            "12": 581488640.0,
+            "13": 581488640.0,
+            "14": 581488640.0,
+            "15": 581488640.0,
+            "16": 581488640.0,
+            "17": 581488640.0,
+            "18": 581488640.0,
+            "19": 581488640.0,
+            "20": 581488640.0,
+            "21": 581488640.0,
+            "22": 581488640.0,
+            "23": 581488640.0,
+            "24": 581488640.0,
+            "25": 581488640.0,
+            "26": 581488640.0,
+            "27": 581488640.0,
+            "28": 581488640.0,
+            "29": 581488640.0,
+            "30": 581488640.0,
+            "31": 581488640.0,
+            "32": 581488640.0,
+            "33": 581488640.0,
+            "34": 581488640.0,
+            "35": 581488640.0,
+            "36": 581488640.0,
+            "37": 581488640.0,
+            "38": 581488640.0,
+            "39": 581488640.0,
+            "40": 581488640.0,
+            "41": 581488640.0,
+            "42": 581488640.0,
+            "43": 581488640.0,
+            "44": 581488640.0,
+            "45": 581488640.0,
+            "46": 581488640.0,
+            "47": 581488640.0,
+            "48": 581488640.0,
+            "49": 581488640.0,
+            "50": 581488640.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 4605813248.0,
+            "2": 4702429696.0,
+            "3": 4702429696.0,
+            "4": 4702429696.0,
+            "5": 4702429696.0,
+            "6": 4702429696.0,
+            "7": 4702429696.0,
+            "8": 4702429696.0,
+            "9": 4702429696.0,
+            "10": 4702429696.0,
+            "11": 4702429696.0,
+            "12": 4702429696.0,
+            "13": 4702429696.0,
+            "14": 4702429696.0,
+            "15": 4702429696.0,
+            "16": 4702429696.0,
+            "17": 4702429696.0,
+            "18": 4702429696.0,
+            "19": 4702429696.0,
+            "20": 4702429696.0,
+            "21": 4702429696.0,
+            "22": 4702429696.0,
+            "23": 4702429696.0,
+            "24": 4702429696.0,
+            "25": 4702429696.0,
+            "26": 4702429696.0,
+            "27": 4702429696.0,
+            "28": 4702429696.0,
+            "29": 4702429696.0,
+            "30": 4702429696.0,
+            "31": 4702429696.0,
+            "32": 4702429696.0,
+            "33": 4702429696.0,
+            "34": 4702429696.0,
+            "35": 4702429696.0,
+            "36": 4702429696.0,
+            "37": 4702429696.0,
+            "38": 4702429696.0,
+            "39": 4702429696.0,
+            "40": 4702429696.0,
+            "41": 4702429696.0,
+            "42": 4702429696.0,
+            "43": 4702429696.0,
+            "44": 4702429696.0,
+            "45": 4702429696.0,
+            "46": 4702429696.0,
+            "47": 4702429696.0,
+            "48": 4702429696.0,
+            "49": 4702429696.0,
+            "50": 4702429696.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 6.5684,
+            "2": 0.10503,
+            "3": 0.08759,
+            "4": 0.08854,
+            "5": 0.08902,
+            "6": 0.08493,
+            "7": 0.07755,
+            "8": 0.0738,
+            "9": 0.07491,
+            "10": 0.07437,
+            "11": 0.07546,
+            "12": 0.07621,
+            "13": 0.08298,
+            "14": 0.07518,
+            "15": 0.07632,
+            "16": 0.07439,
+            "17": 0.07556,
+            "18": 0.07572,
+            "19": 0.0773,
+            "20": 0.07632,
+            "21": 0.07507,
+            "22": 0.07379,
+            "23": 0.07514,
+            "24": 0.07634,
+            "25": 0.07537,
+            "26": 0.07376,
+            "27": 0.07568,
+            "28": 0.07436,
+            "29": 0.07588,
+            "30": 0.07446,
+            "31": 0.0821,
+            "32": 0.08812,
+            "33": 0.0891,
+            "34": 0.08234,
+            "35": 0.07539,
+            "36": 0.07468,
+            "37": 0.07649,
+            "38": 0.07542,
+            "39": 0.07476,
+            "40": 0.07444,
+            "41": 0.07481,
+            "42": 0.07343,
+            "43": 0.07666,
+            "44": 0.08426,
+            "45": 0.07584,
+            "46": 0.07674,
+            "47": 0.07463,
+            "48": 0.07387,
+            "49": 0.07347,
+            "50": 0.07545
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..8e0ed5db84f
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.89592,
+            "2": 10.89514,
+            "3": 10.88761,
+            "4": 10.88903,
+            "5": 10.89131,
+            "6": 10.90004,
+            "7": 10.89143,
+            "8": 10.89938,
+            "9": 10.90231,
+            "10": 10.88299,
+            "11": 10.87827,
+            "12": 10.89318,
+            "13": 10.89818,
+            "14": 10.89188,
+            "15": 10.84786,
+            "16": 10.85369,
+            "17": 10.831,
+            "18": 10.83994,
+            "19": 10.82779,
+            "20": 10.74925,
+            "21": 10.73558,
+            "22": 10.61567,
+            "23": 10.72599,
+            "24": 10.63027,
+            "25": 10.59226,
+            "26": 10.63312,
+            "27": 10.63277,
+            "28": 10.58231,
+            "29": 10.58547,
+            "30": 10.41136,
+            "31": 10.15833,
+            "32": 10.48326,
+            "33": 10.46651,
+            "34": 10.23801,
+            "35": 10.28136,
+            "36": 10.24029,
+            "37": 10.3617,
+            "38": 10.20342,
+            "39": 10.404,
+            "40": 10.09306,
+            "41": 10.15805,
+            "42": 10.21903,
+            "43": 9.84274,
+            "44": 9.97219,
+            "45": 9.84149,
+            "46": 9.82007,
+            "47": 10.14934,
+            "48": 9.85997,
+            "49": 9.54155,
+            "50": 9.91285
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1581.0,
+            "2": 1674.0,
+            "3": 1724.0,
+            "4": 1803.0,
+            "5": 1962.0,
+            "6": 1846.0,
+            "7": 1864.0,
+            "8": 1792.0,
+            "9": 1848.0,
+            "10": 1435.0,
+            "11": 1868.0,
+            "12": 1782.0,
+            "13": 1874.0,
+            "14": 1783.0,
+            "15": 1944.0,
+            "16": 1933.0,
+            "17": 1807.0,
+            "18": 1737.0,
+            "19": 1822.0,
+            "20": 1679.0,
+            "21": 1808.0,
+            "22": 1806.0,
+            "23": 2077.0,
+            "24": 1663.0,
+            "25": 1645.0,
+            "26": 1719.0,
+            "27": 1925.0,
+            "28": 2030.0,
+            "29": 2042.0,
+            "30": 1912.0,
+            "31": 1603.0,
+            "32": 1938.0,
+            "33": 2158.0,
+            "34": 1896.0,
+            "35": 2023.0,
+            "36": 1910.0,
+            "37": 2330.0,
+            "38": 2298.0,
+            "39": 2498.0,
+            "40": 2270.0,
+            "41": 2464.0,
+            "42": 2296.0,
+            "43": 2042.0,
+            "44": 2138.0,
+            "45": 2152.0,
+            "46": 2282.0,
+            "47": 2529.0,
+            "48": 2454.0,
+            "49": 2358.0,
+            "50": 2580.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 581488640.0,
+            "2": 581488640.0,
+            "3": 581488640.0,
+            "4": 581488640.0,
+            "5": 581488640.0,
+            "6": 581488640.0,
+            "7": 581488640.0,
+            "8": 581488640.0,
+            "9": 581488640.0,
+            "10": 581488640.0,
+            "11": 581488640.0,
+            "12": 581488640.0,
+            "13": 581488640.0,
+            "14": 581488640.0,
+            "15": 581488640.0,
+            "16": 581488640.0,
+            "17": 581488640.0,
+            "18": 581488640.0,
+            "19": 581488640.0,
+            "20": 581488640.0,
+            "21": 581488640.0,
+            "22": 581488640.0,
+            "23": 581488640.0,
+            "24": 581488640.0,
+            "25": 581488640.0,
+            "26": 581488640.0,
+            "27": 581488640.0,
+            "28": 581488640.0,
+            "29": 581488640.0,
+            "30": 581488640.0,
+            "31": 581488640.0,
+            "32": 581488640.0,
+            "33": 581488640.0,
+            "34": 581488640.0,
+            "35": 581488640.0,
+            "36": 581488640.0,
+            "37": 581488640.0,
+            "38": 581488640.0,
+            "39": 581488640.0,
+            "40": 581488640.0,
+            "41": 581488640.0,
+            "42": 581488640.0,
+            "43": 581488640.0,
+            "44": 581488640.0,
+            "45": 581488640.0,
+            "46": 581488640.0,
+            "47": 581488640.0,
+            "48": 581488640.0,
+            "49": 581488640.0,
+            "50": 581488640.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 4605813248.0,
+            "2": 4702429696.0,
+            "3": 4702429696.0,
+            "4": 4702429696.0,
+            "5": 4702429696.0,
+            "6": 4702429696.0,
+            "7": 4702429696.0,
+            "8": 4702429696.0,
+            "9": 4702429696.0,
+            "10": 4702429696.0,
+            "11": 4702429696.0,
+            "12": 4702429696.0,
+            "13": 4702429696.0,
+            "14": 4702429696.0,
+            "15": 4702429696.0,
+            "16": 4702429696.0,
+            "17": 4702429696.0,
+            "18": 4702429696.0,
+            "19": 4702429696.0,
+            "20": 4702429696.0,
+            "21": 4702429696.0,
+            "22": 4702429696.0,
+            "23": 4702429696.0,
+            "24": 4702429696.0,
+            "25": 4702429696.0,
+            "26": 4702429696.0,
+            "27": 4702429696.0,
+            "28": 4702429696.0,
+            "29": 4702429696.0,
+            "30": 4702429696.0,
+            "31": 4702429696.0,
+            "32": 4702429696.0,
+            "33": 4702429696.0,
+            "34": 4702429696.0,
+            "35": 4702429696.0,
+            "36": 4702429696.0,
+            "37": 4702429696.0,
+            "38": 4702429696.0,
+            "39": 4702429696.0,
+            "40": 4702429696.0,
+            "41": 4702429696.0,
+            "42": 4702429696.0,
+            "43": 4702429696.0,
+            "44": 4702429696.0,
+            "45": 4702429696.0,
+            "46": 4702429696.0,
+            "47": 4702429696.0,
+            "48": 4702429696.0,
+            "49": 4702429696.0,
+            "50": 4702429696.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 6.83679,
+            "2": 0.10466,
+            "3": 0.07514,
+            "4": 0.07264,
+            "5": 0.06334,
+            "6": 0.06416,
+            "7": 0.06155,
+            "8": 0.06516,
+            "9": 0.06439,
+            "10": 0.06295,
+            "11": 0.06245,
+            "12": 0.06307,
+            "13": 0.06464,
+            "14": 0.06342,
+            "15": 0.06273,
+            "16": 0.0658,
+            "17": 0.06138,
+            "18": 0.06379,
+            "19": 0.06329,
+            "20": 0.06616,
+            "21": 0.06117,
+            "22": 0.06327,
+            "23": 0.06081,
+            "24": 0.06339,
+            "25": 0.06116,
+            "26": 0.06459,
+            "27": 0.06165,
+            "28": 0.06346,
+            "29": 0.06054,
+            "30": 0.06342,
+            "31": 0.06119,
+            "32": 0.06267,
+            "33": 0.06074,
+            "34": 0.0635,
+            "35": 0.06057,
+            "36": 0.06382,
+            "37": 0.06202,
+            "38": 0.06345,
+            "39": 0.06229,
+            "40": 0.06422,
+            "41": 0.06182,
+            "42": 0.06246,
+            "43": 0.06164,
+            "44": 0.06299,
+            "45": 0.06869,
+            "46": 0.06388,
+            "47": 0.06106,
+            "48": 0.06243,
+            "49": 0.06122,
+            "50": 0.06339
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..db410897813
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.89592,
+            "2": 10.89514,
+            "3": 10.88761,
+            "4": 10.88903,
+            "5": 10.89131,
+            "6": 10.90004,
+            "7": 10.89143,
+            "8": 10.89938,
+            "9": 10.90231,
+            "10": 10.88299,
+            "11": 10.87827,
+            "12": 10.89318,
+            "13": 10.89818,
+            "14": 10.89188,
+            "15": 10.84786,
+            "16": 10.85369,
+            "17": 10.831,
+            "18": 10.83994,
+            "19": 10.82779,
+            "20": 10.74925,
+            "21": 10.73558,
+            "22": 10.61567,
+            "23": 10.72599,
+            "24": 10.63027,
+            "25": 10.59226,
+            "26": 10.63312,
+            "27": 10.63277,
+            "28": 10.58231,
+            "29": 10.58547,
+            "30": 10.41136,
+            "31": 10.15833,
+            "32": 10.48326,
+            "33": 10.46651,
+            "34": 10.23801,
+            "35": 10.28136,
+            "36": 10.24029,
+            "37": 10.3617,
+            "38": 10.20342,
+            "39": 10.404,
+            "40": 10.09306,
+            "41": 10.15805,
+            "42": 10.21903,
+            "43": 9.84274,
+            "44": 9.97219,
+            "45": 9.84149,
+            "46": 9.82007,
+            "47": 10.14934,
+            "48": 9.85997,
+            "49": 9.54155,
+            "50": 9.91285
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1581.0,
+            "2": 1674.0,
+            "3": 1724.0,
+            "4": 1803.0,
+            "5": 1962.0,
+            "6": 1846.0,
+            "7": 1864.0,
+            "8": 1792.0,
+            "9": 1848.0,
+            "10": 1435.0,
+            "11": 1868.0,
+            "12": 1782.0,
+            "13": 1874.0,
+            "14": 1783.0,
+            "15": 1944.0,
+            "16": 1933.0,
+            "17": 1807.0,
+            "18": 1737.0,
+            "19": 1822.0,
+            "20": 1679.0,
+            "21": 1808.0,
+            "22": 1806.0,
+            "23": 2077.0,
+            "24": 1663.0,
+            "25": 1645.0,
+            "26": 1719.0,
+            "27": 1925.0,
+            "28": 2030.0,
+            "29": 2042.0,
+            "30": 1912.0,
+            "31": 1603.0,
+            "32": 1938.0,
+            "33": 2158.0,
+            "34": 1896.0,
+            "35": 2023.0,
+            "36": 1910.0,
+            "37": 2330.0,
+            "38": 2298.0,
+            "39": 2498.0,
+            "40": 2270.0,
+            "41": 2464.0,
+            "42": 2296.0,
+            "43": 2042.0,
+            "44": 2138.0,
+            "45": 2152.0,
+            "46": 2282.0,
+            "47": 2529.0,
+            "48": 2454.0,
+            "49": 2358.0,
+            "50": 2580.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 581488640.0,
+            "2": 581488640.0,
+            "3": 581488640.0,
+            "4": 581488640.0,
+            "5": 581488640.0,
+            "6": 581488640.0,
+            "7": 581488640.0,
+            "8": 581488640.0,
+            "9": 581488640.0,
+            "10": 581488640.0,
+            "11": 581488640.0,
+            "12": 581488640.0,
+            "13": 581488640.0,
+            "14": 581488640.0,
+            "15": 581488640.0,
+            "16": 581488640.0,
+            "17": 581488640.0,
+            "18": 581488640.0,
+            "19": 581488640.0,
+            "20": 581488640.0,
+            "21": 581488640.0,
+            "22": 581488640.0,
+            "23": 581488640.0,
+            "24": 581488640.0,
+            "25": 581488640.0,
+            "26": 581488640.0,
+            "27": 581488640.0,
+            "28": 581488640.0,
+            "29": 581488640.0,
+            "30": 581488640.0,
+            "31": 581488640.0,
+            "32": 581488640.0,
+            "33": 581488640.0,
+            "34": 581488640.0,
+            "35": 581488640.0,
+            "36": 581488640.0,
+            "37": 581488640.0,
+            "38": 581488640.0,
+            "39": 581488640.0,
+            "40": 581488640.0,
+            "41": 581488640.0,
+            "42": 581488640.0,
+            "43": 581488640.0,
+            "44": 581488640.0,
+            "45": 581488640.0,
+            "46": 581488640.0,
+            "47": 581488640.0,
+            "48": 581488640.0,
+            "49": 581488640.0,
+            "50": 581488640.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 4605813248.0,
+            "2": 4702429696.0,
+            "3": 4702429696.0,
+            "4": 4702429696.0,
+            "5": 4702429696.0,
+            "6": 4702429696.0,
+            "7": 4702429696.0,
+            "8": 4702429696.0,
+            "9": 4702429696.0,
+            "10": 4702429696.0,
+            "11": 4702429696.0,
+            "12": 4702429696.0,
+            "13": 4702429696.0,
+            "14": 4702429696.0,
+            "15": 4702429696.0,
+            "16": 4702429696.0,
+            "17": 4702429696.0,
+            "18": 4702429696.0,
+            "19": 4702429696.0,
+            "20": 4702429696.0,
+            "21": 4702429696.0,
+            "22": 4702429696.0,
+            "23": 4702429696.0,
+            "24": 4702429696.0,
+            "25": 4702429696.0,
+            "26": 4702429696.0,
+            "27": 4702429696.0,
+            "28": 4702429696.0,
+            "29": 4702429696.0,
+            "30": 4702429696.0,
+            "31": 4702429696.0,
+            "32": 4702429696.0,
+            "33": 4702429696.0,
+            "34": 4702429696.0,
+            "35": 4702429696.0,
+            "36": 4702429696.0,
+            "37": 4702429696.0,
+            "38": 4702429696.0,
+            "39": 4702429696.0,
+            "40": 4702429696.0,
+            "41": 4702429696.0,
+            "42": 4702429696.0,
+            "43": 4702429696.0,
+            "44": 4702429696.0,
+            "45": 4702429696.0,
+            "46": 4702429696.0,
+            "47": 4702429696.0,
+            "48": 4702429696.0,
+            "49": 4702429696.0,
+            "50": 4702429696.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 6.7331,
+            "2": 0.09599,
+            "3": 0.08799,
+            "4": 0.08582,
+            "5": 0.08478,
+            "6": 0.08513,
+            "7": 0.07688,
+            "8": 0.07429,
+            "9": 0.07778,
+            "10": 0.07515,
+            "11": 0.07987,
+            "12": 0.07525,
+            "13": 0.07727,
+            "14": 0.07535,
+            "15": 0.07896,
+            "16": 0.07509,
+            "17": 0.07751,
+            "18": 0.076,
+            "19": 0.07647,
+            "20": 0.07502,
+            "21": 0.07467,
+            "22": 0.07544,
+            "23": 0.0742,
+            "24": 0.07536,
+            "25": 0.07588,
+            "26": 0.07381,
+            "27": 0.07407,
+            "28": 0.075,
+            "29": 0.07424,
+            "30": 0.07454,
+            "31": 0.07482,
+            "32": 0.07526,
+            "33": 0.07493,
+            "34": 0.07437,
+            "35": 0.07447,
+            "36": 0.07482,
+            "37": 0.07454,
+            "38": 0.07501,
+            "39": 0.07495,
+            "40": 0.07481,
+            "41": 0.07433,
+            "42": 0.07467,
+            "43": 0.0754,
+            "44": 0.07543,
+            "45": 0.07498,
+            "46": 0.07457,
+            "47": 0.07378,
+            "48": 0.07477,
+            "49": 0.07465,
+            "50": 0.07444
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
new file mode 100644
index 00000000000..f9dab22ab59
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.8401,
+            "2": 10.83566,
+            "3": 10.82993,
+            "4": 10.8173,
+            "5": 10.84032,
+            "6": 10.87262,
+            "7": 10.83467,
+            "8": 10.8403,
+            "9": 10.84359,
+            "10": 10.8134,
+            "11": 10.85025,
+            "12": 10.84316,
+            "13": 10.86605,
+            "14": 10.86315,
+            "15": 10.80276,
+            "16": 10.79643,
+            "17": 10.7763,
+            "18": 10.8015,
+            "19": 10.7939,
+            "20": 10.705,
+            "21": 10.68148,
+            "22": 10.56313,
+            "23": 10.70136,
+            "24": 10.57939,
+            "25": 10.53849,
+            "26": 10.60617,
+            "27": 10.59211,
+            "28": 10.56156,
+            "29": 10.57666,
+            "30": 10.35521,
+            "31": 10.12773,
+            "32": 10.46367,
+            "33": 10.45444,
+            "34": 10.22451,
+            "35": 10.27148,
+            "36": 10.22184,
+            "37": 10.33945,
+            "38": 10.18637,
+            "39": 10.39329,
+            "40": 10.08049,
+            "41": 10.13789,
+            "42": 10.20012,
+            "43": 9.83791,
+            "44": 9.94327,
+            "45": 9.8229,
+            "46": 9.82313,
+            "47": 10.13353,
+            "48": 9.8415,
+            "49": 9.52102,
+            "50": 9.90118
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1670.0,
+            "2": 1691.0,
+            "3": 1630.0,
+            "4": 1805.0,
+            "5": 1970.0,
+            "6": 1901.0,
+            "7": 1816.0,
+            "8": 1587.0,
+            "9": 1905.0,
+            "10": 1397.0,
+            "11": 1954.0,
+            "12": 1859.0,
+            "13": 1873.0,
+            "14": 1875.0,
+            "15": 1936.0,
+            "16": 1972.0,
+            "17": 1816.0,
+            "18": 1773.0,
+            "19": 1833.0,
+            "20": 1715.0,
+            "21": 1923.0,
+            "22": 1681.0,
+            "23": 2055.0,
+            "24": 1727.0,
+            "25": 1703.0,
+            "26": 1761.0,
+            "27": 1917.0,
+            "28": 1962.0,
+            "29": 2010.0,
+            "30": 1957.0,
+            "31": 1723.0,
+            "32": 1898.0,
+            "33": 2153.0,
+            "34": 1828.0,
+            "35": 1991.0,
+            "36": 1937.0,
+            "37": 2347.0,
+            "38": 2365.0,
+            "39": 2349.0,
+            "40": 2239.0,
+            "41": 2217.0,
+            "42": 2222.0,
+            "43": 2121.0,
+            "44": 2059.0,
+            "45": 2144.0,
+            "46": 2296.0,
+            "47": 2487.0,
+            "48": 2376.0,
+            "49": 2330.0,
+            "50": 2377.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 552238592.0,
+            "2": 552238592.0,
+            "3": 552238592.0,
+            "4": 552238592.0,
+            "5": 552238592.0,
+            "6": 552238592.0,
+            "7": 552238592.0,
+            "8": 552238592.0,
+            "9": 552238592.0,
+            "10": 552238592.0,
+            "11": 552238592.0,
+            "12": 552238592.0,
+            "13": 552238592.0,
+            "14": 552238592.0,
+            "15": 552238592.0,
+            "16": 552238592.0,
+            "17": 552238592.0,
+            "18": 552238592.0,
+            "19": 552238592.0,
+            "20": 552238592.0,
+            "21": 552238592.0,
+            "22": 552238592.0,
+            "23": 552238592.0,
+            "24": 552238592.0,
+            "25": 552238592.0,
+            "26": 552238592.0,
+            "27": 552238592.0,
+            "28": 552238592.0,
+            "29": 552238592.0,
+            "30": 552238592.0,
+            "31": 552238592.0,
+            "32": 552238592.0,
+            "33": 552238592.0,
+            "34": 552238592.0,
+            "35": 552238592.0,
+            "36": 552238592.0,
+            "37": 552238592.0,
+            "38": 552238592.0,
+            "39": 552238592.0,
+            "40": 552238592.0,
+            "41": 552238592.0,
+            "42": 552238592.0,
+            "43": 552238592.0,
+            "44": 552238592.0,
+            "45": 552238592.0,
+            "46": 552238592.0,
+            "47": 552238592.0,
+            "48": 552238592.0,
+            "49": 552238592.0,
+            "50": 552238592.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 4576563200.0,
+            "2": 4673179648.0,
+            "3": 4673179648.0,
+            "4": 4673179648.0,
+            "5": 4673179648.0,
+            "6": 4673179648.0,
+            "7": 4673179648.0,
+            "8": 4673179648.0,
+            "9": 4673179648.0,
+            "10": 4673179648.0,
+            "11": 4673179648.0,
+            "12": 4673179648.0,
+            "13": 4673179648.0,
+            "14": 4673179648.0,
+            "15": 4673179648.0,
+            "16": 4673179648.0,
+            "17": 4673179648.0,
+            "18": 4673179648.0,
+            "19": 4673179648.0,
+            "20": 4673179648.0,
+            "21": 4673179648.0,
+            "22": 4673179648.0,
+            "23": 4673179648.0,
+            "24": 4673179648.0,
+            "25": 4673179648.0,
+            "26": 4673179648.0,
+            "27": 4673179648.0,
+            "28": 4673179648.0,
+            "29": 4673179648.0,
+            "30": 4673179648.0,
+            "31": 4673179648.0,
+            "32": 4673179648.0,
+            "33": 4673179648.0,
+            "34": 4673179648.0,
+            "35": 4673179648.0,
+            "36": 4673179648.0,
+            "37": 4673179648.0,
+            "38": 4673179648.0,
+            "39": 4673179648.0,
+            "40": 4673179648.0,
+            "41": 4673179648.0,
+            "42": 4673179648.0,
+            "43": 4673179648.0,
+            "44": 4673179648.0,
+            "45": 4673179648.0,
+            "46": 4673179648.0,
+            "47": 4673179648.0,
+            "48": 4673179648.0,
+            "49": 4673179648.0,
+            "50": 4673179648.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 8.45713,
+            "2": 0.13161,
+            "3": 0.11061,
+            "4": 0.12579,
+            "5": 0.13121,
+            "6": 0.13773,
+            "7": 0.13653,
+            "8": 0.46789,
+            "9": 0.12385,
+            "10": 0.12166,
+            "11": 0.1263,
+            "12": 0.13396,
+            "13": 0.12492,
+            "14": 0.12502,
+            "15": 0.11723,
+            "16": 0.15631,
+            "17": 0.3771,
+            "18": 0.12361,
+            "19": 0.11397,
+            "20": 0.11135,
+            "21": 0.10366,
+            "22": 0.10396,
+            "23": 0.10431,
+            "24": 0.10481,
+            "25": 0.10339,
+            "26": 0.1068,
+            "27": 0.10511,
+            "28": 0.36221,
+            "29": 0.1036,
+            "30": 0.10364,
+            "31": 0.10951,
+            "32": 0.11609,
+            "33": 0.11339,
+            "34": 0.1139,
+            "35": 0.11975,
+            "36": 0.11809,
+            "37": 0.10984,
+            "38": 0.10706,
+            "39": 0.10797,
+            "40": 0.11217,
+            "41": 0.11266,
+            "42": 0.10821,
+            "43": 0.1114,
+            "44": 0.10779,
+            "45": 0.1071,
+            "46": 0.11272,
+            "47": 0.1145,
+            "48": 0.10778,
+            "49": 0.10649,
+            "50": 0.10728
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
new file mode 100644
index 00000000000..cc9bcd1b512
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.8401,
+            "2": 10.83566,
+            "3": 10.82993,
+            "4": 10.8173,
+            "5": 10.84032,
+            "6": 10.87262,
+            "7": 10.83467,
+            "8": 10.8403,
+            "9": 10.84359,
+            "10": 10.8134,
+            "11": 10.85025,
+            "12": 10.84316,
+            "13": 10.86605,
+            "14": 10.86315,
+            "15": 10.80276,
+            "16": 10.79643,
+            "17": 10.7763,
+            "18": 10.8015,
+            "19": 10.7939,
+            "20": 10.705,
+            "21": 10.68148,
+            "22": 10.56313,
+            "23": 10.70136,
+            "24": 10.57939,
+            "25": 10.53849,
+            "26": 10.60617,
+            "27": 10.59211,
+            "28": 10.56156,
+            "29": 10.57666,
+            "30": 10.35521,
+            "31": 10.12773,
+            "32": 10.46367,
+            "33": 10.45444,
+            "34": 10.22451,
+            "35": 10.27148,
+            "36": 10.22184,
+            "37": 10.33945,
+            "38": 10.18637,
+            "39": 10.39329,
+            "40": 10.08049,
+            "41": 10.13789,
+            "42": 10.20012,
+            "43": 9.83791,
+            "44": 9.94327,
+            "45": 9.8229,
+            "46": 9.82313,
+            "47": 10.13353,
+            "48": 9.8415,
+            "49": 9.52102,
+            "50": 9.90118
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1670.0,
+            "2": 1691.0,
+            "3": 1630.0,
+            "4": 1805.0,
+            "5": 1970.0,
+            "6": 1901.0,
+            "7": 1816.0,
+            "8": 1587.0,
+            "9": 1905.0,
+            "10": 1397.0,
+            "11": 1954.0,
+            "12": 1859.0,
+            "13": 1873.0,
+            "14": 1875.0,
+            "15": 1936.0,
+            "16": 1972.0,
+            "17": 1816.0,
+            "18": 1773.0,
+            "19": 1833.0,
+            "20": 1715.0,
+            "21": 1923.0,
+            "22": 1681.0,
+            "23": 2055.0,
+            "24": 1727.0,
+            "25": 1703.0,
+            "26": 1761.0,
+            "27": 1917.0,
+            "28": 1962.0,
+            "29": 2010.0,
+            "30": 1957.0,
+            "31": 1723.0,
+            "32": 1898.0,
+            "33": 2153.0,
+            "34": 1828.0,
+            "35": 1991.0,
+            "36": 1937.0,
+            "37": 2347.0,
+            "38": 2365.0,
+            "39": 2349.0,
+            "40": 2239.0,
+            "41": 2217.0,
+            "42": 2222.0,
+            "43": 2121.0,
+            "44": 2059.0,
+            "45": 2144.0,
+            "46": 2296.0,
+            "47": 2487.0,
+            "48": 2376.0,
+            "49": 2330.0,
+            "50": 2377.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 552238592.0,
+            "2": 552238592.0,
+            "3": 552238592.0,
+            "4": 552238592.0,
+            "5": 552238592.0,
+            "6": 552238592.0,
+            "7": 552238592.0,
+            "8": 552238592.0,
+            "9": 552238592.0,
+            "10": 552238592.0,
+            "11": 552238592.0,
+            "12": 552238592.0,
+            "13": 552238592.0,
+            "14": 552238592.0,
+            "15": 552238592.0,
+            "16": 552238592.0,
+            "17": 552238592.0,
+            "18": 552238592.0,
+            "19": 552238592.0,
+            "20": 552238592.0,
+            "21": 552238592.0,
+            "22": 552238592.0,
+            "23": 552238592.0,
+            "24": 552238592.0,
+            "25": 552238592.0,
+            "26": 552238592.0,
+            "27": 552238592.0,
+            "28": 552238592.0,
+            "29": 552238592.0,
+            "30": 552238592.0,
+            "31": 552238592.0,
+            "32": 552238592.0,
+            "33": 552238592.0,
+            "34": 552238592.0,
+            "35": 552238592.0,
+            "36": 552238592.0,
+            "37": 552238592.0,
+            "38": 552238592.0,
+            "39": 552238592.0,
+            "40": 552238592.0,
+            "41": 552238592.0,
+            "42": 552238592.0,
+            "43": 552238592.0,
+            "44": 552238592.0,
+            "45": 552238592.0,
+            "46": 552238592.0,
+            "47": 552238592.0,
+            "48": 552238592.0,
+            "49": 552238592.0,
+            "50": 552238592.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 4576563200.0,
+            "2": 4673179648.0,
+            "3": 4673179648.0,
+            "4": 4673179648.0,
+            "5": 4673179648.0,
+            "6": 4673179648.0,
+            "7": 4673179648.0,
+            "8": 4673179648.0,
+            "9": 4673179648.0,
+            "10": 4673179648.0,
+            "11": 4673179648.0,
+            "12": 4673179648.0,
+            "13": 4673179648.0,
+            "14": 4673179648.0,
+            "15": 4673179648.0,
+            "16": 4673179648.0,
+            "17": 4673179648.0,
+            "18": 4673179648.0,
+            "19": 4673179648.0,
+            "20": 4673179648.0,
+            "21": 4673179648.0,
+            "22": 4673179648.0,
+            "23": 4673179648.0,
+            "24": 4673179648.0,
+            "25": 4673179648.0,
+            "26": 4673179648.0,
+            "27": 4673179648.0,
+            "28": 4673179648.0,
+            "29": 4673179648.0,
+            "30": 4673179648.0,
+            "31": 4673179648.0,
+            "32": 4673179648.0,
+            "33": 4673179648.0,
+            "34": 4673179648.0,
+            "35": 4673179648.0,
+            "36": 4673179648.0,
+            "37": 4673179648.0,
+            "38": 4673179648.0,
+            "39": 4673179648.0,
+            "40": 4673179648.0,
+            "41": 4673179648.0,
+            "42": 4673179648.0,
+            "43": 4673179648.0,
+            "44": 4673179648.0,
+            "45": 4673179648.0,
+            "46": 4673179648.0,
+            "47": 4673179648.0,
+            "48": 4673179648.0,
+            "49": 4673179648.0,
+            "50": 4673179648.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 13.01978,
+            "2": 0.13386,
+            "3": 0.10421,
+            "4": 0.10575,
+            "5": 0.10347,
+            "6": 0.10366,
+            "7": 0.10198,
+            "8": 0.10204,
+            "9": 0.10153,
+            "10": 0.10361,
+            "11": 0.10226,
+            "12": 0.31034,
+            "13": 0.36244,
+            "14": 0.32183,
+            "15": 0.09858,
+            "16": 0.10098,
+            "17": 0.10218,
+            "18": 0.09859,
+            "19": 0.09858,
+            "20": 0.0985,
+            "21": 0.09758,
+            "22": 0.0984,
+            "23": 0.09686,
+            "24": 0.09763,
+            "25": 0.09689,
+            "26": 0.0979,
+            "27": 0.09858,
+            "28": 0.09763,
+            "29": 0.09678,
+            "30": 0.09714,
+            "31": 0.10001,
+            "32": 0.09705,
+            "33": 0.09776,
+            "34": 0.09662,
+            "35": 0.09763,
+            "36": 0.10137,
+            "37": 0.10113,
+            "38": 0.09825,
+            "39": 0.09976,
+            "40": 0.09925,
+            "41": 0.09738,
+            "42": 0.09904,
+            "43": 0.10108,
+            "44": 0.09921,
+            "45": 0.09873,
+            "46": 0.10018,
+            "47": 0.09927,
+            "48": 0.09914,
+            "49": 0.09907,
+            "50": 0.09879
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
index 1f0d2e2e9a1..ca95ad65b3d 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
@@ -2,141 +2,536 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 10.89631,
+            "2": 10.89416,
+            "3": 10.88786,
+            "4": 10.8914,
             "5": 10.89154,
+            "6": 10.90001,
+            "7": 10.89184,
+            "8": 10.89886,
+            "9": 10.90208,
             "10": 10.88361,
+            "11": 10.87816,
+            "12": 10.89332,
+            "13": 10.89816,
+            "14": 10.89241,
             "15": 10.84798,
+            "16": 10.854,
+            "17": 10.83093,
+            "18": 10.83991,
+            "19": 10.82802,
             "20": 10.74822,
+            "21": 10.73494,
+            "22": 10.61719,
+            "23": 10.72621,
+            "24": 10.63177,
             "25": 10.5931,
+            "26": 10.63365,
+            "27": 10.63304,
+            "28": 10.58259,
+            "29": 10.58595,
             "30": 10.41201,
+            "31": 10.15907,
+            "32": 10.48362,
+            "33": 10.46704,
+            "34": 10.23815,
             "35": 10.28193,
+            "36": 10.24052,
+            "37": 10.36227,
+            "38": 10.20306,
+            "39": 10.40456,
             "40": 10.09271,
+            "41": 10.15831,
+            "42": 10.21934,
+            "43": 9.8436,
+            "44": 9.97299,
             "45": 9.84189,
+            "46": 9.82017,
+            "47": 10.14968,
+            "48": 9.86021,
+            "49": 9.54238,
             "50": 9.91347,
+            "51": 9.85447,
+            "52": 9.73936,
+            "53": 10.07426,
+            "54": 9.96915,
             "55": 9.88574,
+            "56": 9.62437,
+            "57": 9.4823,
+            "58": 9.83483,
+            "59": 9.58732,
             "60": 9.50245,
+            "61": 9.69343,
+            "62": 9.98806,
+            "63": 9.39103,
+            "64": 9.78021,
             "65": 8.94515,
+            "66": 9.70494,
+            "67": 9.37251,
+            "68": 9.78329,
+            "69": 9.79058,
             "70": 9.74454,
+            "71": 9.62301,
+            "72": 9.58458,
+            "73": 9.50513,
+            "74": 8.94312,
             "75": 9.42524,
+            "76": 9.07601,
+            "77": 10.06353,
+            "78": 9.72308,
+            "79": 9.37502,
             "80": 9.40453,
+            "81": 9.47794,
+            "82": 9.69667,
+            "83": 9.3072,
+            "84": 9.41526,
             "85": 9.61293,
+            "86": 9.07195,
+            "87": 9.5884,
+            "88": 9.74762,
+            "89": 9.59982,
             "90": 9.81672,
+            "91": 9.3379,
+            "92": 9.35605,
+            "93": 9.07425,
+            "94": 8.8351,
             "95": 9.5184,
+            "96": 9.52391,
+            "97": 9.30923,
+            "98": 9.66743,
+            "99": 8.88419,
             "100": 9.39924
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 1483.0,
+            "2": 1650.0,
+            "3": 1681.0,
+            "4": 1767.0,
             "5": 1903.0,
+            "6": 1952.0,
+            "7": 1967.0,
+            "8": 1651.0,
+            "9": 1886.0,
             "10": 1427.0,
+            "11": 1897.0,
+            "12": 1855.0,
+            "13": 1941.0,
+            "14": 1749.0,
             "15": 1901.0,
+            "16": 1813.0,
+            "17": 1710.0,
+            "18": 1707.0,
+            "19": 1819.0,
             "20": 1639.0,
+            "21": 1880.0,
+            "22": 1769.0,
+            "23": 2016.0,
+            "24": 1692.0,
             "25": 1672.0,
+            "26": 1778.0,
+            "27": 1861.0,
+            "28": 1964.0,
+            "29": 2021.0,
             "30": 1938.0,
+            "31": 1645.0,
+            "32": 1864.0,
+            "33": 2150.0,
+            "34": 1828.0,
             "35": 1982.0,
+            "36": 1864.0,
+            "37": 2355.0,
+            "38": 2358.0,
+            "39": 2385.0,
             "40": 2407.0,
+            "41": 2501.0,
+            "42": 2435.0,
+            "43": 2033.0,
+            "44": 2089.0,
             "45": 2210.0,
+            "46": 2351.0,
+            "47": 2502.0,
+            "48": 2444.0,
+            "49": 2302.0,
             "50": 2492.0,
+            "51": 2598.0,
+            "52": 2547.0,
+            "53": 2957.0,
+            "54": 2750.0,
             "55": 2372.0,
+            "56": 2569.0,
+            "57": 2395.0,
+            "58": 2901.0,
+            "59": 2741.0,
             "60": 2430.0,
+            "61": 2868.0,
+            "62": 2651.0,
+            "63": 2507.0,
+            "64": 3014.0,
             "65": 2683.0,
+            "66": 2935.0,
+            "67": 2783.0,
+            "68": 2725.0,
+            "69": 2788.0,
             "70": 3152.0,
+            "71": 3026.0,
+            "72": 2415.0,
+            "73": 3122.0,
+            "74": 1967.0,
             "75": 2581.0,
+            "76": 3010.0,
+            "77": 3294.0,
+            "78": 3166.0,
+            "79": 3150.0,
             "80": 3246.0,
+            "81": 3566.0,
+            "82": 3285.0,
+            "83": 2817.0,
+            "84": 3269.0,
             "85": 3425.0,
+            "86": 2819.0,
+            "87": 3577.0,
+            "88": 3004.0,
+            "89": 3323.0,
             "90": 3023.0,
+            "91": 2661.0,
+            "92": 3066.0,
+            "93": 2691.0,
+            "94": 3305.0,
             "95": 3403.0,
+            "96": 3377.0,
+            "97": 3242.0,
+            "98": 3697.0,
+            "99": 3112.0,
             "100": 3199.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 581488640.0,
+            "2": 581488640.0,
+            "3": 581488640.0,
+            "4": 581488640.0,
             "5": 581488640.0,
+            "6": 581488640.0,
+            "7": 581488640.0,
+            "8": 581488640.0,
+            "9": 581488640.0,
             "10": 581488640.0,
+            "11": 581488640.0,
+            "12": 581488640.0,
+            "13": 581488640.0,
+            "14": 581488640.0,
             "15": 581488640.0,
+            "16": 581488640.0,
+            "17": 581488640.0,
+            "18": 581488640.0,
+            "19": 581488640.0,
             "20": 581488640.0,
+            "21": 581488640.0,
+            "22": 581488640.0,
+            "23": 581488640.0,
+            "24": 581488640.0,
             "25": 581488640.0,
+            "26": 581488640.0,
+            "27": 581488640.0,
+            "28": 581488640.0,
+            "29": 581488640.0,
             "30": 581488640.0,
+            "31": 581488640.0,
+            "32": 581488640.0,
+            "33": 581488640.0,
+            "34": 581488640.0,
             "35": 581488640.0,
+            "36": 581488640.0,
+            "37": 581488640.0,
+            "38": 581488640.0,
+            "39": 581488640.0,
             "40": 581488640.0,
+            "41": 581488640.0,
+            "42": 581488640.0,
+            "43": 581488640.0,
+            "44": 581488640.0,
             "45": 581488640.0,
+            "46": 581488640.0,
+            "47": 581488640.0,
+            "48": 581488640.0,
+            "49": 581488640.0,
             "50": 581488640.0,
+            "51": 581488640.0,
+            "52": 581488640.0,
+            "53": 581488640.0,
+            "54": 581488640.0,
             "55": 581488640.0,
+            "56": 581488640.0,
+            "57": 581488640.0,
+            "58": 581488640.0,
+            "59": 581488640.0,
             "60": 581488640.0,
+            "61": 581488640.0,
+            "62": 581488640.0,
+            "63": 581488640.0,
+            "64": 581488640.0,
             "65": 581488640.0,
+            "66": 581488640.0,
+            "67": 581488640.0,
+            "68": 581488640.0,
+            "69": 581488640.0,
             "70": 581488640.0,
+            "71": 581488640.0,
+            "72": 581488640.0,
+            "73": 581488640.0,
+            "74": 581488640.0,
             "75": 581488640.0,
+            "76": 581488640.0,
+            "77": 581488640.0,
+            "78": 581488640.0,
+            "79": 581488640.0,
             "80": 581488640.0,
+            "81": 581488640.0,
+            "82": 581488640.0,
+            "83": 581488640.0,
+            "84": 581488640.0,
             "85": 581488640.0,
+            "86": 581488640.0,
+            "87": 581488640.0,
+            "88": 581488640.0,
+            "89": 581488640.0,
             "90": 581488640.0,
+            "91": 581488640.0,
+            "92": 581488640.0,
+            "93": 581488640.0,
+            "94": 581488640.0,
             "95": 581488640.0,
+            "96": 581488640.0,
+            "97": 581488640.0,
+            "98": 581488640.0,
+            "99": 581488640.0,
             "100": 581488640.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 2594126336.0,
+            "2": 2690742784.0,
+            "3": 2690742784.0,
+            "4": 2690742784.0,
             "5": 2690742784.0,
+            "6": 2690742784.0,
+            "7": 2690742784.0,
+            "8": 2690742784.0,
+            "9": 2690742784.0,
             "10": 2690742784.0,
+            "11": 2690742784.0,
+            "12": 2690742784.0,
+            "13": 2690742784.0,
+            "14": 2690742784.0,
             "15": 2690742784.0,
+            "16": 2690742784.0,
+            "17": 2690742784.0,
+            "18": 2690742784.0,
+            "19": 2690742784.0,
             "20": 2690742784.0,
+            "21": 2690742784.0,
+            "22": 2690742784.0,
+            "23": 2690742784.0,
+            "24": 2690742784.0,
             "25": 2690742784.0,
+            "26": 2690742784.0,
+            "27": 2690742784.0,
+            "28": 2690742784.0,
+            "29": 2690742784.0,
             "30": 2690742784.0,
+            "31": 2690742784.0,
+            "32": 2690742784.0,
+            "33": 2690742784.0,
+            "34": 2690742784.0,
             "35": 2690742784.0,
+            "36": 2690742784.0,
+            "37": 2690742784.0,
+            "38": 2690742784.0,
+            "39": 2690742784.0,
             "40": 2690742784.0,
+            "41": 2690742784.0,
+            "42": 2690742784.0,
+            "43": 2690742784.0,
+            "44": 2690742784.0,
             "45": 2690742784.0,
+            "46": 2690742784.0,
+            "47": 2690742784.0,
+            "48": 2690742784.0,
+            "49": 2690742784.0,
             "50": 2690742784.0,
+            "51": 2690742784.0,
+            "52": 2690742784.0,
+            "53": 2690742784.0,
+            "54": 2690742784.0,
             "55": 2690742784.0,
+            "56": 2690742784.0,
+            "57": 2690742784.0,
+            "58": 2690742784.0,
+            "59": 2690742784.0,
             "60": 2690742784.0,
+            "61": 2690742784.0,
+            "62": 2690742784.0,
+            "63": 2690742784.0,
+            "64": 2690742784.0,
             "65": 2690742784.0,
+            "66": 2690742784.0,
+            "67": 2690742784.0,
+            "68": 2690742784.0,
+            "69": 2690742784.0,
             "70": 2690742784.0,
+            "71": 2690742784.0,
+            "72": 2690742784.0,
+            "73": 2690742784.0,
+            "74": 2690742784.0,
             "75": 2690742784.0,
+            "76": 2690742784.0,
+            "77": 2690742784.0,
+            "78": 2690742784.0,
+            "79": 2690742784.0,
             "80": 2690742784.0,
+            "81": 2690742784.0,
+            "82": 2690742784.0,
+            "83": 2690742784.0,
+            "84": 2690742784.0,
             "85": 2690742784.0,
+            "86": 2690742784.0,
+            "87": 2690742784.0,
+            "88": 2690742784.0,
+            "89": 2690742784.0,
             "90": 2690742784.0,
+            "91": 2690742784.0,
+            "92": 2690742784.0,
+            "93": 2690742784.0,
+            "94": 2690742784.0,
             "95": 2690742784.0,
+            "96": 2690742784.0,
+            "97": 2690742784.0,
+            "98": 2690742784.0,
+            "99": 2690742784.0,
             "100": 2690742784.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 8.28181,
-            "5": 0.05617,
-            "10": 0.05714,
-            "15": 0.05541,
-            "20": 0.05475,
-            "25": 0.05518,
-            "30": 0.0563,
-            "35": 0.05638,
-            "40": 0.05543,
-            "45": 0.05574,
-            "50": 0.05563,
-            "55": 0.07246,
-            "60": 0.05657,
-            "65": 0.05621,
-            "70": 0.05607,
-            "75": 0.05605,
-            "80": 0.05618,
-            "85": 0.05509,
-            "90": 0.05962,
-            "95": 0.05777,
-            "100": 0.06336
+            "1": 7.50382,
+            "2": 0.09494,
+            "3": 0.08499,
+            "4": 0.08516,
+            "5": 0.08574,
+            "6": 0.07205,
+            "7": 0.0678,
+            "8": 0.06716,
+            "9": 0.06722,
+            "10": 0.06806,
+            "11": 0.06825,
+            "12": 0.06735,
+            "13": 0.06795,
+            "14": 0.06749,
+            "15": 0.06675,
+            "16": 0.06707,
+            "17": 0.06697,
+            "18": 0.06753,
+            "19": 0.06817,
+            "20": 0.06848,
+            "21": 0.06619,
+            "22": 0.06841,
+            "23": 0.06785,
+            "24": 0.06849,
+            "25": 0.06774,
+            "26": 0.06776,
+            "27": 0.06722,
+            "28": 0.06759,
+            "29": 0.06651,
+            "30": 0.06707,
+            "31": 0.06654,
+            "32": 0.06698,
+            "33": 0.06699,
+            "34": 0.06679,
+            "35": 0.06871,
+            "36": 0.06753,
+            "37": 0.06724,
+            "38": 0.06699,
+            "39": 0.06694,
+            "40": 0.06736,
+            "41": 0.06719,
+            "42": 0.06704,
+            "43": 0.06772,
+            "44": 0.06769,
+            "45": 0.06718,
+            "46": 0.06687,
+            "47": 0.0666,
+            "48": 0.06791,
+            "49": 0.06768,
+            "50": 0.06799,
+            "51": 0.08137,
+            "52": 0.07388,
+            "53": 0.07162,
+            "54": 0.06825,
+            "55": 0.09073,
+            "56": 0.06514,
+            "57": 0.06572,
+            "58": 0.066,
+            "59": 0.06584,
+            "60": 0.06564,
+            "61": 0.06432,
+            "62": 0.06646,
+            "63": 0.06643,
+            "64": 0.06637,
+            "65": 0.06605,
+            "66": 0.06606,
+            "67": 0.06661,
+            "68": 0.06602,
+            "69": 0.06559,
+            "70": 0.06607,
+            "71": 0.06417,
+            "72": 0.06658,
+            "73": 0.06562,
+            "74": 0.06641,
+            "75": 0.0655,
+            "76": 0.06656,
+            "77": 0.065,
+            "78": 0.06615,
+            "79": 0.06666,
+            "80": 0.06535,
+            "81": 0.06679,
+            "82": 0.06885,
+            "83": 0.06577,
+            "84": 0.06461,
+            "85": 0.06689,
+            "86": 0.06445,
+            "87": 0.06546,
+            "88": 0.06624,
+            "89": 0.06635,
+            "90": 0.0643,
+            "91": 0.06631,
+            "92": 0.0655,
+            "93": 0.06522,
+            "94": 0.06652,
+            "95": 0.06592,
+            "96": 0.0658,
+            "97": 0.06642,
+            "98": 0.06519,
+            "99": 0.06466,
+            "100": 0.06561
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..0f5131905ca
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.89631,
+            "2": 10.89416,
+            "3": 10.88786,
+            "4": 10.8914,
+            "5": 10.89154,
+            "6": 10.90001,
+            "7": 10.89184,
+            "8": 10.89886,
+            "9": 10.90208,
+            "10": 10.88361,
+            "11": 10.87816,
+            "12": 10.89332,
+            "13": 10.89816,
+            "14": 10.89241,
+            "15": 10.84798,
+            "16": 10.854,
+            "17": 10.83093,
+            "18": 10.83991,
+            "19": 10.82802,
+            "20": 10.74822,
+            "21": 10.73494,
+            "22": 10.61719,
+            "23": 10.72621,
+            "24": 10.63177,
+            "25": 10.5931,
+            "26": 10.63365,
+            "27": 10.63304,
+            "28": 10.58259,
+            "29": 10.58595,
+            "30": 10.41201,
+            "31": 10.15907,
+            "32": 10.48362,
+            "33": 10.46704,
+            "34": 10.23815,
+            "35": 10.28193,
+            "36": 10.24052,
+            "37": 10.36227,
+            "38": 10.20306,
+            "39": 10.40456,
+            "40": 10.09271,
+            "41": 10.15831,
+            "42": 10.21934,
+            "43": 9.8436,
+            "44": 9.97299,
+            "45": 9.84189,
+            "46": 9.82017,
+            "47": 10.14968,
+            "48": 9.86021,
+            "49": 9.54238,
+            "50": 9.91347,
+            "51": 9.85447,
+            "52": 9.73936,
+            "53": 10.07426,
+            "54": 9.96915,
+            "55": 9.88574,
+            "56": 9.62437,
+            "57": 9.4823,
+            "58": 9.83483,
+            "59": 9.58732,
+            "60": 9.50245,
+            "61": 9.69343,
+            "62": 9.98806,
+            "63": 9.39103,
+            "64": 9.78021,
+            "65": 8.94515,
+            "66": 9.70494,
+            "67": 9.37251,
+            "68": 9.78329,
+            "69": 9.79058,
+            "70": 9.74454,
+            "71": 9.62301,
+            "72": 9.58458,
+            "73": 9.50513,
+            "74": 8.94312,
+            "75": 9.42524,
+            "76": 9.07601,
+            "77": 10.06353,
+            "78": 9.72308,
+            "79": 9.37502,
+            "80": 9.40453,
+            "81": 9.47794,
+            "82": 9.69667,
+            "83": 9.3072,
+            "84": 9.41526,
+            "85": 9.61293,
+            "86": 9.07195,
+            "87": 9.5884,
+            "88": 9.74762,
+            "89": 9.59982,
+            "90": 9.81672,
+            "91": 9.3379,
+            "92": 9.35605,
+            "93": 9.07425,
+            "94": 8.8351,
+            "95": 9.5184,
+            "96": 9.52391,
+            "97": 9.30923,
+            "98": 9.66743,
+            "99": 8.88419,
+            "100": 9.39924
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1483.0,
+            "2": 1650.0,
+            "3": 1681.0,
+            "4": 1767.0,
+            "5": 1903.0,
+            "6": 1952.0,
+            "7": 1967.0,
+            "8": 1651.0,
+            "9": 1886.0,
+            "10": 1427.0,
+            "11": 1897.0,
+            "12": 1855.0,
+            "13": 1941.0,
+            "14": 1749.0,
+            "15": 1901.0,
+            "16": 1813.0,
+            "17": 1710.0,
+            "18": 1707.0,
+            "19": 1819.0,
+            "20": 1639.0,
+            "21": 1880.0,
+            "22": 1769.0,
+            "23": 2016.0,
+            "24": 1692.0,
+            "25": 1672.0,
+            "26": 1778.0,
+            "27": 1861.0,
+            "28": 1964.0,
+            "29": 2021.0,
+            "30": 1938.0,
+            "31": 1645.0,
+            "32": 1864.0,
+            "33": 2150.0,
+            "34": 1828.0,
+            "35": 1982.0,
+            "36": 1864.0,
+            "37": 2355.0,
+            "38": 2358.0,
+            "39": 2385.0,
+            "40": 2407.0,
+            "41": 2501.0,
+            "42": 2435.0,
+            "43": 2033.0,
+            "44": 2089.0,
+            "45": 2210.0,
+            "46": 2351.0,
+            "47": 2502.0,
+            "48": 2444.0,
+            "49": 2302.0,
+            "50": 2492.0,
+            "51": 2598.0,
+            "52": 2547.0,
+            "53": 2957.0,
+            "54": 2750.0,
+            "55": 2372.0,
+            "56": 2569.0,
+            "57": 2395.0,
+            "58": 2901.0,
+            "59": 2741.0,
+            "60": 2430.0,
+            "61": 2868.0,
+            "62": 2651.0,
+            "63": 2507.0,
+            "64": 3014.0,
+            "65": 2683.0,
+            "66": 2935.0,
+            "67": 2783.0,
+            "68": 2725.0,
+            "69": 2788.0,
+            "70": 3152.0,
+            "71": 3026.0,
+            "72": 2415.0,
+            "73": 3122.0,
+            "74": 1967.0,
+            "75": 2581.0,
+            "76": 3010.0,
+            "77": 3294.0,
+            "78": 3166.0,
+            "79": 3150.0,
+            "80": 3246.0,
+            "81": 3566.0,
+            "82": 3285.0,
+            "83": 2817.0,
+            "84": 3269.0,
+            "85": 3425.0,
+            "86": 2819.0,
+            "87": 3577.0,
+            "88": 3004.0,
+            "89": 3323.0,
+            "90": 3023.0,
+            "91": 2661.0,
+            "92": 3066.0,
+            "93": 2691.0,
+            "94": 3305.0,
+            "95": 3403.0,
+            "96": 3377.0,
+            "97": 3242.0,
+            "98": 3697.0,
+            "99": 3112.0,
+            "100": 3199.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 581488640.0,
+            "2": 581488640.0,
+            "3": 581488640.0,
+            "4": 581488640.0,
+            "5": 581488640.0,
+            "6": 581488640.0,
+            "7": 581488640.0,
+            "8": 581488640.0,
+            "9": 581488640.0,
+            "10": 581488640.0,
+            "11": 581488640.0,
+            "12": 581488640.0,
+            "13": 581488640.0,
+            "14": 581488640.0,
+            "15": 581488640.0,
+            "16": 581488640.0,
+            "17": 581488640.0,
+            "18": 581488640.0,
+            "19": 581488640.0,
+            "20": 581488640.0,
+            "21": 581488640.0,
+            "22": 581488640.0,
+            "23": 581488640.0,
+            "24": 581488640.0,
+            "25": 581488640.0,
+            "26": 581488640.0,
+            "27": 581488640.0,
+            "28": 581488640.0,
+            "29": 581488640.0,
+            "30": 581488640.0,
+            "31": 581488640.0,
+            "32": 581488640.0,
+            "33": 581488640.0,
+            "34": 581488640.0,
+            "35": 581488640.0,
+            "36": 581488640.0,
+            "37": 581488640.0,
+            "38": 581488640.0,
+            "39": 581488640.0,
+            "40": 581488640.0,
+            "41": 581488640.0,
+            "42": 581488640.0,
+            "43": 581488640.0,
+            "44": 581488640.0,
+            "45": 581488640.0,
+            "46": 581488640.0,
+            "47": 581488640.0,
+            "48": 581488640.0,
+            "49": 581488640.0,
+            "50": 581488640.0,
+            "51": 581488640.0,
+            "52": 581488640.0,
+            "53": 581488640.0,
+            "54": 581488640.0,
+            "55": 581488640.0,
+            "56": 581488640.0,
+            "57": 581488640.0,
+            "58": 581488640.0,
+            "59": 581488640.0,
+            "60": 581488640.0,
+            "61": 581488640.0,
+            "62": 581488640.0,
+            "63": 581488640.0,
+            "64": 581488640.0,
+            "65": 581488640.0,
+            "66": 581488640.0,
+            "67": 581488640.0,
+            "68": 581488640.0,
+            "69": 581488640.0,
+            "70": 581488640.0,
+            "71": 581488640.0,
+            "72": 581488640.0,
+            "73": 581488640.0,
+            "74": 581488640.0,
+            "75": 581488640.0,
+            "76": 581488640.0,
+            "77": 581488640.0,
+            "78": 581488640.0,
+            "79": 581488640.0,
+            "80": 581488640.0,
+            "81": 581488640.0,
+            "82": 581488640.0,
+            "83": 581488640.0,
+            "84": 581488640.0,
+            "85": 581488640.0,
+            "86": 581488640.0,
+            "87": 581488640.0,
+            "88": 581488640.0,
+            "89": 581488640.0,
+            "90": 581488640.0,
+            "91": 581488640.0,
+            "92": 581488640.0,
+            "93": 581488640.0,
+            "94": 581488640.0,
+            "95": 581488640.0,
+            "96": 581488640.0,
+            "97": 581488640.0,
+            "98": 581488640.0,
+            "99": 581488640.0,
+            "100": 581488640.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2594126336.0,
+            "2": 2690742784.0,
+            "3": 2690742784.0,
+            "4": 2690742784.0,
+            "5": 2690742784.0,
+            "6": 2690742784.0,
+            "7": 2690742784.0,
+            "8": 2690742784.0,
+            "9": 2690742784.0,
+            "10": 2690742784.0,
+            "11": 2690742784.0,
+            "12": 2690742784.0,
+            "13": 2690742784.0,
+            "14": 2690742784.0,
+            "15": 2690742784.0,
+            "16": 2690742784.0,
+            "17": 2690742784.0,
+            "18": 2690742784.0,
+            "19": 2690742784.0,
+            "20": 2690742784.0,
+            "21": 2690742784.0,
+            "22": 2690742784.0,
+            "23": 2690742784.0,
+            "24": 2690742784.0,
+            "25": 2690742784.0,
+            "26": 2690742784.0,
+            "27": 2690742784.0,
+            "28": 2690742784.0,
+            "29": 2690742784.0,
+            "30": 2690742784.0,
+            "31": 2690742784.0,
+            "32": 2690742784.0,
+            "33": 2690742784.0,
+            "34": 2690742784.0,
+            "35": 2690742784.0,
+            "36": 2690742784.0,
+            "37": 2690742784.0,
+            "38": 2690742784.0,
+            "39": 2690742784.0,
+            "40": 2690742784.0,
+            "41": 2690742784.0,
+            "42": 2690742784.0,
+            "43": 2690742784.0,
+            "44": 2690742784.0,
+            "45": 2690742784.0,
+            "46": 2690742784.0,
+            "47": 2690742784.0,
+            "48": 2690742784.0,
+            "49": 2690742784.0,
+            "50": 2690742784.0,
+            "51": 2690742784.0,
+            "52": 2690742784.0,
+            "53": 2690742784.0,
+            "54": 2690742784.0,
+            "55": 2690742784.0,
+            "56": 2690742784.0,
+            "57": 2690742784.0,
+            "58": 2690742784.0,
+            "59": 2690742784.0,
+            "60": 2690742784.0,
+            "61": 2690742784.0,
+            "62": 2690742784.0,
+            "63": 2690742784.0,
+            "64": 2690742784.0,
+            "65": 2690742784.0,
+            "66": 2690742784.0,
+            "67": 2690742784.0,
+            "68": 2690742784.0,
+            "69": 2690742784.0,
+            "70": 2690742784.0,
+            "71": 2690742784.0,
+            "72": 2690742784.0,
+            "73": 2690742784.0,
+            "74": 2690742784.0,
+            "75": 2690742784.0,
+            "76": 2690742784.0,
+            "77": 2690742784.0,
+            "78": 2690742784.0,
+            "79": 2690742784.0,
+            "80": 2690742784.0,
+            "81": 2690742784.0,
+            "82": 2690742784.0,
+            "83": 2690742784.0,
+            "84": 2690742784.0,
+            "85": 2690742784.0,
+            "86": 2690742784.0,
+            "87": 2690742784.0,
+            "88": 2690742784.0,
+            "89": 2690742784.0,
+            "90": 2690742784.0,
+            "91": 2690742784.0,
+            "92": 2690742784.0,
+            "93": 2690742784.0,
+            "94": 2690742784.0,
+            "95": 2690742784.0,
+            "96": 2690742784.0,
+            "97": 2690742784.0,
+            "98": 2690742784.0,
+            "99": 2690742784.0,
+            "100": 2690742784.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 7.57521,
+            "2": 0.07593,
+            "3": 0.05387,
+            "4": 0.05352,
+            "5": 0.05602,
+            "6": 3.85308,
+            "7": 0.05787,
+            "8": 0.71621,
+            "9": 0.33662,
+            "10": 0.6136,
+            "11": 1.43071,
+            "12": 0.0585,
+            "13": 0.05762,
+            "14": 0.0573,
+            "15": 0.06754,
+            "16": 0.06151,
+            "17": 0.06798,
+            "18": 0.05523,
+            "19": 0.18762,
+            "20": 0.28771,
+            "21": 0.05854,
+            "22": 0.05692,
+            "23": 0.05871,
+            "24": 0.05788,
+            "25": 0.05853,
+            "26": 0.05723,
+            "27": 0.05911,
+            "28": 0.05718,
+            "29": 0.05914,
+            "30": 0.0562,
+            "31": 0.05914,
+            "32": 0.05683,
+            "33": 0.0585,
+            "34": 0.05641,
+            "35": 0.06095,
+            "36": 0.05706,
+            "37": 0.05915,
+            "38": 0.05666,
+            "39": 0.05887,
+            "40": 0.05689,
+            "41": 0.06354,
+            "42": 0.05728,
+            "43": 0.06056,
+            "44": 0.05698,
+            "45": 0.05866,
+            "46": 0.05782,
+            "47": 0.05864,
+            "48": 0.05766,
+            "49": 0.0593,
+            "50": 0.05709,
+            "51": 0.07764,
+            "52": 0.06534,
+            "53": 0.05923,
+            "54": 0.08052,
+            "55": 0.05743,
+            "56": 0.05803,
+            "57": 0.05961,
+            "58": 0.05679,
+            "59": 0.05691,
+            "60": 0.05989,
+            "61": 0.05604,
+            "62": 0.05739,
+            "63": 0.05673,
+            "64": 0.0572,
+            "65": 0.0573,
+            "66": 0.05797,
+            "67": 0.05694,
+            "68": 0.05763,
+            "69": 0.05765,
+            "70": 0.05718,
+            "71": 0.05666,
+            "72": 0.05782,
+            "73": 0.0577,
+            "74": 0.05704,
+            "75": 0.06457,
+            "76": 0.06526,
+            "77": 0.06461,
+            "78": 0.05996,
+            "79": 0.05701,
+            "80": 0.0582,
+            "81": 0.06253,
+            "82": 0.05976,
+            "83": 0.05924,
+            "84": 0.05851,
+            "85": 0.0593,
+            "86": 0.05994,
+            "87": 0.05913,
+            "88": 0.05723,
+            "89": 0.0581,
+            "90": 0.05828,
+            "91": 0.06035,
+            "92": 0.05762,
+            "93": 0.059,
+            "94": 0.05728,
+            "95": 0.05927,
+            "96": 0.05721,
+            "97": 0.05992,
+            "98": 0.05777,
+            "99": 0.05867,
+            "100": 0.0569
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..686e980d509
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.89631,
+            "2": 10.89416,
+            "3": 10.88786,
+            "4": 10.8914,
+            "5": 10.89154,
+            "6": 10.90001,
+            "7": 10.89184,
+            "8": 10.89886,
+            "9": 10.90208,
+            "10": 10.88361,
+            "11": 10.87816,
+            "12": 10.89332,
+            "13": 10.89816,
+            "14": 10.89241,
+            "15": 10.84798,
+            "16": 10.854,
+            "17": 10.83093,
+            "18": 10.83991,
+            "19": 10.82802,
+            "20": 10.74822,
+            "21": 10.73494,
+            "22": 10.61719,
+            "23": 10.72621,
+            "24": 10.63177,
+            "25": 10.5931,
+            "26": 10.63365,
+            "27": 10.63304,
+            "28": 10.58259,
+            "29": 10.58595,
+            "30": 10.41201,
+            "31": 10.15907,
+            "32": 10.48362,
+            "33": 10.46704,
+            "34": 10.23815,
+            "35": 10.28193,
+            "36": 10.24052,
+            "37": 10.36227,
+            "38": 10.20306,
+            "39": 10.40456,
+            "40": 10.09271,
+            "41": 10.15831,
+            "42": 10.21934,
+            "43": 9.8436,
+            "44": 9.97299,
+            "45": 9.84189,
+            "46": 9.82017,
+            "47": 10.14968,
+            "48": 9.86021,
+            "49": 9.54238,
+            "50": 9.91347,
+            "51": 9.85447,
+            "52": 9.73936,
+            "53": 10.07426,
+            "54": 9.96915,
+            "55": 9.88574,
+            "56": 9.62437,
+            "57": 9.4823,
+            "58": 9.83483,
+            "59": 9.58732,
+            "60": 9.50245,
+            "61": 9.69343,
+            "62": 9.98806,
+            "63": 9.39103,
+            "64": 9.78021,
+            "65": 8.94515,
+            "66": 9.70494,
+            "67": 9.37251,
+            "68": 9.78329,
+            "69": 9.79058,
+            "70": 9.74454,
+            "71": 9.62301,
+            "72": 9.58458,
+            "73": 9.50513,
+            "74": 8.94312,
+            "75": 9.42524,
+            "76": 9.07601,
+            "77": 10.06353,
+            "78": 9.72308,
+            "79": 9.37502,
+            "80": 9.40453,
+            "81": 9.47794,
+            "82": 9.69667,
+            "83": 9.3072,
+            "84": 9.41526,
+            "85": 9.61293,
+            "86": 9.07195,
+            "87": 9.5884,
+            "88": 9.74762,
+            "89": 9.59982,
+            "90": 9.81672,
+            "91": 9.3379,
+            "92": 9.35605,
+            "93": 9.07425,
+            "94": 8.8351,
+            "95": 9.5184,
+            "96": 9.52391,
+            "97": 9.30923,
+            "98": 9.66743,
+            "99": 8.88419,
+            "100": 9.39924
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1483.0,
+            "2": 1650.0,
+            "3": 1681.0,
+            "4": 1767.0,
+            "5": 1903.0,
+            "6": 1952.0,
+            "7": 1967.0,
+            "8": 1651.0,
+            "9": 1886.0,
+            "10": 1427.0,
+            "11": 1897.0,
+            "12": 1855.0,
+            "13": 1941.0,
+            "14": 1749.0,
+            "15": 1901.0,
+            "16": 1813.0,
+            "17": 1710.0,
+            "18": 1707.0,
+            "19": 1819.0,
+            "20": 1639.0,
+            "21": 1880.0,
+            "22": 1769.0,
+            "23": 2016.0,
+            "24": 1692.0,
+            "25": 1672.0,
+            "26": 1778.0,
+            "27": 1861.0,
+            "28": 1964.0,
+            "29": 2021.0,
+            "30": 1938.0,
+            "31": 1645.0,
+            "32": 1864.0,
+            "33": 2150.0,
+            "34": 1828.0,
+            "35": 1982.0,
+            "36": 1864.0,
+            "37": 2355.0,
+            "38": 2358.0,
+            "39": 2385.0,
+            "40": 2407.0,
+            "41": 2501.0,
+            "42": 2435.0,
+            "43": 2033.0,
+            "44": 2089.0,
+            "45": 2210.0,
+            "46": 2351.0,
+            "47": 2502.0,
+            "48": 2444.0,
+            "49": 2302.0,
+            "50": 2492.0,
+            "51": 2598.0,
+            "52": 2547.0,
+            "53": 2957.0,
+            "54": 2750.0,
+            "55": 2372.0,
+            "56": 2569.0,
+            "57": 2395.0,
+            "58": 2901.0,
+            "59": 2741.0,
+            "60": 2430.0,
+            "61": 2868.0,
+            "62": 2651.0,
+            "63": 2507.0,
+            "64": 3014.0,
+            "65": 2683.0,
+            "66": 2935.0,
+            "67": 2783.0,
+            "68": 2725.0,
+            "69": 2788.0,
+            "70": 3152.0,
+            "71": 3026.0,
+            "72": 2415.0,
+            "73": 3122.0,
+            "74": 1967.0,
+            "75": 2581.0,
+            "76": 3010.0,
+            "77": 3294.0,
+            "78": 3166.0,
+            "79": 3150.0,
+            "80": 3246.0,
+            "81": 3566.0,
+            "82": 3285.0,
+            "83": 2817.0,
+            "84": 3269.0,
+            "85": 3425.0,
+            "86": 2819.0,
+            "87": 3577.0,
+            "88": 3004.0,
+            "89": 3323.0,
+            "90": 3023.0,
+            "91": 2661.0,
+            "92": 3066.0,
+            "93": 2691.0,
+            "94": 3305.0,
+            "95": 3403.0,
+            "96": 3377.0,
+            "97": 3242.0,
+            "98": 3697.0,
+            "99": 3112.0,
+            "100": 3199.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 581488640.0,
+            "2": 581488640.0,
+            "3": 581488640.0,
+            "4": 581488640.0,
+            "5": 581488640.0,
+            "6": 581488640.0,
+            "7": 581488640.0,
+            "8": 581488640.0,
+            "9": 581488640.0,
+            "10": 581488640.0,
+            "11": 581488640.0,
+            "12": 581488640.0,
+            "13": 581488640.0,
+            "14": 581488640.0,
+            "15": 581488640.0,
+            "16": 581488640.0,
+            "17": 581488640.0,
+            "18": 581488640.0,
+            "19": 581488640.0,
+            "20": 581488640.0,
+            "21": 581488640.0,
+            "22": 581488640.0,
+            "23": 581488640.0,
+            "24": 581488640.0,
+            "25": 581488640.0,
+            "26": 581488640.0,
+            "27": 581488640.0,
+            "28": 581488640.0,
+            "29": 581488640.0,
+            "30": 581488640.0,
+            "31": 581488640.0,
+            "32": 581488640.0,
+            "33": 581488640.0,
+            "34": 581488640.0,
+            "35": 581488640.0,
+            "36": 581488640.0,
+            "37": 581488640.0,
+            "38": 581488640.0,
+            "39": 581488640.0,
+            "40": 581488640.0,
+            "41": 581488640.0,
+            "42": 581488640.0,
+            "43": 581488640.0,
+            "44": 581488640.0,
+            "45": 581488640.0,
+            "46": 581488640.0,
+            "47": 581488640.0,
+            "48": 581488640.0,
+            "49": 581488640.0,
+            "50": 581488640.0,
+            "51": 581488640.0,
+            "52": 581488640.0,
+            "53": 581488640.0,
+            "54": 581488640.0,
+            "55": 581488640.0,
+            "56": 581488640.0,
+            "57": 581488640.0,
+            "58": 581488640.0,
+            "59": 581488640.0,
+            "60": 581488640.0,
+            "61": 581488640.0,
+            "62": 581488640.0,
+            "63": 581488640.0,
+            "64": 581488640.0,
+            "65": 581488640.0,
+            "66": 581488640.0,
+            "67": 581488640.0,
+            "68": 581488640.0,
+            "69": 581488640.0,
+            "70": 581488640.0,
+            "71": 581488640.0,
+            "72": 581488640.0,
+            "73": 581488640.0,
+            "74": 581488640.0,
+            "75": 581488640.0,
+            "76": 581488640.0,
+            "77": 581488640.0,
+            "78": 581488640.0,
+            "79": 581488640.0,
+            "80": 581488640.0,
+            "81": 581488640.0,
+            "82": 581488640.0,
+            "83": 581488640.0,
+            "84": 581488640.0,
+            "85": 581488640.0,
+            "86": 581488640.0,
+            "87": 581488640.0,
+            "88": 581488640.0,
+            "89": 581488640.0,
+            "90": 581488640.0,
+            "91": 581488640.0,
+            "92": 581488640.0,
+            "93": 581488640.0,
+            "94": 581488640.0,
+            "95": 581488640.0,
+            "96": 581488640.0,
+            "97": 581488640.0,
+            "98": 581488640.0,
+            "99": 581488640.0,
+            "100": 581488640.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2594126336.0,
+            "2": 2690742784.0,
+            "3": 2690742784.0,
+            "4": 2690742784.0,
+            "5": 2690742784.0,
+            "6": 2690742784.0,
+            "7": 2690742784.0,
+            "8": 2690742784.0,
+            "9": 2690742784.0,
+            "10": 2690742784.0,
+            "11": 2690742784.0,
+            "12": 2690742784.0,
+            "13": 2690742784.0,
+            "14": 2690742784.0,
+            "15": 2690742784.0,
+            "16": 2690742784.0,
+            "17": 2690742784.0,
+            "18": 2690742784.0,
+            "19": 2690742784.0,
+            "20": 2690742784.0,
+            "21": 2690742784.0,
+            "22": 2690742784.0,
+            "23": 2690742784.0,
+            "24": 2690742784.0,
+            "25": 2690742784.0,
+            "26": 2690742784.0,
+            "27": 2690742784.0,
+            "28": 2690742784.0,
+            "29": 2690742784.0,
+            "30": 2690742784.0,
+            "31": 2690742784.0,
+            "32": 2690742784.0,
+            "33": 2690742784.0,
+            "34": 2690742784.0,
+            "35": 2690742784.0,
+            "36": 2690742784.0,
+            "37": 2690742784.0,
+            "38": 2690742784.0,
+            "39": 2690742784.0,
+            "40": 2690742784.0,
+            "41": 2690742784.0,
+            "42": 2690742784.0,
+            "43": 2690742784.0,
+            "44": 2690742784.0,
+            "45": 2690742784.0,
+            "46": 2690742784.0,
+            "47": 2690742784.0,
+            "48": 2690742784.0,
+            "49": 2690742784.0,
+            "50": 2690742784.0,
+            "51": 2690742784.0,
+            "52": 2690742784.0,
+            "53": 2690742784.0,
+            "54": 2690742784.0,
+            "55": 2690742784.0,
+            "56": 2690742784.0,
+            "57": 2690742784.0,
+            "58": 2690742784.0,
+            "59": 2690742784.0,
+            "60": 2690742784.0,
+            "61": 2690742784.0,
+            "62": 2690742784.0,
+            "63": 2690742784.0,
+            "64": 2690742784.0,
+            "65": 2690742784.0,
+            "66": 2690742784.0,
+            "67": 2690742784.0,
+            "68": 2690742784.0,
+            "69": 2690742784.0,
+            "70": 2690742784.0,
+            "71": 2690742784.0,
+            "72": 2690742784.0,
+            "73": 2690742784.0,
+            "74": 2690742784.0,
+            "75": 2690742784.0,
+            "76": 2690742784.0,
+            "77": 2690742784.0,
+            "78": 2690742784.0,
+            "79": 2690742784.0,
+            "80": 2690742784.0,
+            "81": 2690742784.0,
+            "82": 2690742784.0,
+            "83": 2690742784.0,
+            "84": 2690742784.0,
+            "85": 2690742784.0,
+            "86": 2690742784.0,
+            "87": 2690742784.0,
+            "88": 2690742784.0,
+            "89": 2690742784.0,
+            "90": 2690742784.0,
+            "91": 2690742784.0,
+            "92": 2690742784.0,
+            "93": 2690742784.0,
+            "94": 2690742784.0,
+            "95": 2690742784.0,
+            "96": 2690742784.0,
+            "97": 2690742784.0,
+            "98": 2690742784.0,
+            "99": 2690742784.0,
+            "100": 2690742784.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 7.46673,
+            "2": 0.07879,
+            "3": 0.06822,
+            "4": 0.06744,
+            "5": 0.06664,
+            "6": 0.06786,
+            "7": 0.06766,
+            "8": 0.06659,
+            "9": 0.06797,
+            "10": 0.07184,
+            "11": 0.07288,
+            "12": 0.07188,
+            "13": 0.07026,
+            "14": 0.06821,
+            "15": 0.06667,
+            "16": 0.06656,
+            "17": 0.06764,
+            "18": 0.06816,
+            "19": 0.06695,
+            "20": 0.06832,
+            "21": 0.06808,
+            "22": 0.06822,
+            "23": 0.06838,
+            "24": 0.06731,
+            "25": 0.06857,
+            "26": 0.06706,
+            "27": 0.06819,
+            "28": 0.06784,
+            "29": 0.06785,
+            "30": 0.06735,
+            "31": 0.0685,
+            "32": 0.07005,
+            "33": 0.07122,
+            "34": 0.07241,
+            "35": 0.07067,
+            "36": 0.06981,
+            "37": 0.06934,
+            "38": 0.06771,
+            "39": 0.06805,
+            "40": 0.06824,
+            "41": 0.06831,
+            "42": 0.06733,
+            "43": 0.06819,
+            "44": 0.06816,
+            "45": 0.06847,
+            "46": 0.0674,
+            "47": 0.06856,
+            "48": 0.07158,
+            "49": 0.07079,
+            "50": 0.0717,
+            "51": 0.08179,
+            "52": 0.07272,
+            "53": 0.06939,
+            "54": 0.06631,
+            "55": 0.07046,
+            "56": 0.09852,
+            "57": 0.06464,
+            "58": 0.06466,
+            "59": 0.06537,
+            "60": 0.06301,
+            "61": 0.06361,
+            "62": 0.06551,
+            "63": 0.06563,
+            "64": 0.0749,
+            "65": 0.0748,
+            "66": 0.07507,
+            "67": 0.07552,
+            "68": 0.07573,
+            "69": 0.07066,
+            "70": 0.0658,
+            "71": 0.0647,
+            "72": 0.06444,
+            "73": 0.06462,
+            "74": 0.06543,
+            "75": 0.06609,
+            "76": 0.06503,
+            "77": 0.06499,
+            "78": 0.0644,
+            "79": 0.06439,
+            "80": 0.06417,
+            "81": 0.06401,
+            "82": 0.06575,
+            "83": 0.06494,
+            "84": 0.06442,
+            "85": 0.06396,
+            "86": 0.06422,
+            "87": 0.06484,
+            "88": 0.06512,
+            "89": 0.06426,
+            "90": 0.06481,
+            "91": 0.06476,
+            "92": 0.06383,
+            "93": 0.06456,
+            "94": 0.06292,
+            "95": 0.0638,
+            "96": 0.06392,
+            "97": 0.06356,
+            "98": 0.06355,
+            "99": 0.06439,
+            "100": 0.06428
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
new file mode 100644
index 00000000000..42b005d7102
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.8401,
+            "2": 10.83566,
+            "3": 10.82993,
+            "4": 10.8173,
+            "5": 10.84032,
+            "6": 10.87262,
+            "7": 10.83467,
+            "8": 10.8403,
+            "9": 10.84359,
+            "10": 10.8134,
+            "11": 10.85025,
+            "12": 10.84316,
+            "13": 10.86605,
+            "14": 10.86315,
+            "15": 10.80276,
+            "16": 10.79643,
+            "17": 10.7763,
+            "18": 10.8015,
+            "19": 10.7939,
+            "20": 10.705,
+            "21": 10.68148,
+            "22": 10.56313,
+            "23": 10.70136,
+            "24": 10.57939,
+            "25": 10.53849,
+            "26": 10.60617,
+            "27": 10.59211,
+            "28": 10.56156,
+            "29": 10.57666,
+            "30": 10.35521,
+            "31": 10.12773,
+            "32": 10.46367,
+            "33": 10.45444,
+            "34": 10.22451,
+            "35": 10.27148,
+            "36": 10.22184,
+            "37": 10.33945,
+            "38": 10.18637,
+            "39": 10.39329,
+            "40": 10.08049,
+            "41": 10.13789,
+            "42": 10.20012,
+            "43": 9.83791,
+            "44": 9.94327,
+            "45": 9.8229,
+            "46": 9.82313,
+            "47": 10.13353,
+            "48": 9.8415,
+            "49": 9.52102,
+            "50": 9.90118,
+            "51": 9.83467,
+            "52": 9.73176,
+            "53": 10.04773,
+            "54": 9.93856,
+            "55": 9.86424,
+            "56": 9.61259,
+            "57": 9.46819,
+            "58": 9.81223,
+            "59": 9.57172,
+            "60": 9.4803,
+            "61": 9.67964,
+            "62": 9.96738,
+            "63": 9.35351,
+            "64": 9.7573,
+            "65": 8.93743,
+            "66": 9.68132,
+            "67": 9.35694,
+            "68": 9.7681,
+            "69": 9.77289,
+            "70": 9.71026,
+            "71": 9.60024,
+            "72": 9.56674,
+            "73": 9.47644,
+            "74": 8.93189,
+            "75": 9.4088,
+            "76": 9.06887,
+            "77": 10.04696,
+            "78": 9.70975,
+            "79": 9.35669,
+            "80": 9.39078,
+            "81": 9.46574,
+            "82": 9.68028,
+            "83": 9.29218,
+            "84": 9.40234,
+            "85": 9.59741,
+            "86": 9.06109,
+            "87": 9.57951,
+            "88": 9.73247,
+            "89": 9.58838,
+            "90": 9.80389,
+            "91": 9.32105,
+            "92": 9.35011,
+            "93": 9.06313,
+            "94": 8.82006,
+            "95": 9.50562,
+            "96": 9.51103,
+            "97": 9.29305,
+            "98": 9.65571,
+            "99": 8.87502,
+            "100": 9.38808
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1670.0,
+            "2": 1691.0,
+            "3": 1630.0,
+            "4": 1805.0,
+            "5": 1970.0,
+            "6": 1901.0,
+            "7": 1816.0,
+            "8": 1587.0,
+            "9": 1905.0,
+            "10": 1397.0,
+            "11": 1954.0,
+            "12": 1859.0,
+            "13": 1873.0,
+            "14": 1875.0,
+            "15": 1936.0,
+            "16": 1972.0,
+            "17": 1816.0,
+            "18": 1773.0,
+            "19": 1833.0,
+            "20": 1715.0,
+            "21": 1923.0,
+            "22": 1681.0,
+            "23": 2055.0,
+            "24": 1727.0,
+            "25": 1703.0,
+            "26": 1761.0,
+            "27": 1917.0,
+            "28": 1962.0,
+            "29": 2010.0,
+            "30": 1957.0,
+            "31": 1723.0,
+            "32": 1898.0,
+            "33": 2153.0,
+            "34": 1828.0,
+            "35": 1991.0,
+            "36": 1937.0,
+            "37": 2347.0,
+            "38": 2365.0,
+            "39": 2349.0,
+            "40": 2239.0,
+            "41": 2217.0,
+            "42": 2222.0,
+            "43": 2121.0,
+            "44": 2059.0,
+            "45": 2144.0,
+            "46": 2296.0,
+            "47": 2487.0,
+            "48": 2376.0,
+            "49": 2330.0,
+            "50": 2377.0,
+            "51": 2540.0,
+            "52": 2598.0,
+            "53": 2917.0,
+            "54": 2715.0,
+            "55": 2436.0,
+            "56": 2691.0,
+            "57": 2196.0,
+            "58": 2875.0,
+            "59": 2726.0,
+            "60": 2445.0,
+            "61": 3031.0,
+            "62": 2618.0,
+            "63": 2551.0,
+            "64": 2939.0,
+            "65": 2645.0,
+            "66": 3160.0,
+            "67": 2729.0,
+            "68": 2852.0,
+            "69": 2938.0,
+            "70": 3337.0,
+            "71": 3044.0,
+            "72": 2531.0,
+            "73": 2918.0,
+            "74": 1976.0,
+            "75": 2726.0,
+            "76": 3036.0,
+            "77": 3435.0,
+            "78": 3375.0,
+            "79": 3221.0,
+            "80": 3356.0,
+            "81": 3820.0,
+            "82": 3203.0,
+            "83": 2699.0,
+            "84": 3073.0,
+            "85": 3336.0,
+            "86": 2729.0,
+            "87": 3962.0,
+            "88": 3062.0,
+            "89": 3512.0,
+            "90": 3044.0,
+            "91": 2957.0,
+            "92": 3276.0,
+            "93": 2757.0,
+            "94": 3568.0,
+            "95": 3484.0,
+            "96": 3627.0,
+            "97": 3229.0,
+            "98": 3722.0,
+            "99": 3219.0,
+            "100": 3467.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 552238592.0,
+            "2": 552238592.0,
+            "3": 552238592.0,
+            "4": 552238592.0,
+            "5": 552238592.0,
+            "6": 552238592.0,
+            "7": 552238592.0,
+            "8": 552238592.0,
+            "9": 552238592.0,
+            "10": 552238592.0,
+            "11": 552238592.0,
+            "12": 552238592.0,
+            "13": 552238592.0,
+            "14": 552238592.0,
+            "15": 552238592.0,
+            "16": 552238592.0,
+            "17": 552238592.0,
+            "18": 552238592.0,
+            "19": 552238592.0,
+            "20": 552238592.0,
+            "21": 552238592.0,
+            "22": 552238592.0,
+            "23": 552238592.0,
+            "24": 552238592.0,
+            "25": 552238592.0,
+            "26": 552238592.0,
+            "27": 552238592.0,
+            "28": 552238592.0,
+            "29": 552238592.0,
+            "30": 552238592.0,
+            "31": 552238592.0,
+            "32": 552238592.0,
+            "33": 552238592.0,
+            "34": 552238592.0,
+            "35": 552238592.0,
+            "36": 552238592.0,
+            "37": 552238592.0,
+            "38": 552238592.0,
+            "39": 552238592.0,
+            "40": 552238592.0,
+            "41": 552238592.0,
+            "42": 552238592.0,
+            "43": 552238592.0,
+            "44": 552238592.0,
+            "45": 552238592.0,
+            "46": 552238592.0,
+            "47": 552238592.0,
+            "48": 552238592.0,
+            "49": 552238592.0,
+            "50": 552238592.0,
+            "51": 552238592.0,
+            "52": 552238592.0,
+            "53": 552238592.0,
+            "54": 552238592.0,
+            "55": 552238592.0,
+            "56": 552238592.0,
+            "57": 552238592.0,
+            "58": 552238592.0,
+            "59": 552238592.0,
+            "60": 552238592.0,
+            "61": 552238592.0,
+            "62": 552238592.0,
+            "63": 552238592.0,
+            "64": 552238592.0,
+            "65": 552238592.0,
+            "66": 552238592.0,
+            "67": 552238592.0,
+            "68": 552238592.0,
+            "69": 552238592.0,
+            "70": 552238592.0,
+            "71": 552238592.0,
+            "72": 552238592.0,
+            "73": 552238592.0,
+            "74": 552238592.0,
+            "75": 552238592.0,
+            "76": 552238592.0,
+            "77": 552238592.0,
+            "78": 552238592.0,
+            "79": 552238592.0,
+            "80": 552238592.0,
+            "81": 552238592.0,
+            "82": 552238592.0,
+            "83": 552238592.0,
+            "84": 552238592.0,
+            "85": 552238592.0,
+            "86": 552238592.0,
+            "87": 552238592.0,
+            "88": 552238592.0,
+            "89": 552238592.0,
+            "90": 552238592.0,
+            "91": 552238592.0,
+            "92": 552238592.0,
+            "93": 552238592.0,
+            "94": 552238592.0,
+            "95": 552238592.0,
+            "96": 552238592.0,
+            "97": 552238592.0,
+            "98": 552238592.0,
+            "99": 552238592.0,
+            "100": 552238592.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 4576563200.0,
+            "2": 4673179648.0,
+            "3": 4673179648.0,
+            "4": 4673179648.0,
+            "5": 4673179648.0,
+            "6": 4673179648.0,
+            "7": 4673179648.0,
+            "8": 4673179648.0,
+            "9": 4673179648.0,
+            "10": 4673179648.0,
+            "11": 4673179648.0,
+            "12": 4673179648.0,
+            "13": 4673179648.0,
+            "14": 4673179648.0,
+            "15": 4673179648.0,
+            "16": 4673179648.0,
+            "17": 4673179648.0,
+            "18": 4673179648.0,
+            "19": 4673179648.0,
+            "20": 4673179648.0,
+            "21": 4673179648.0,
+            "22": 4673179648.0,
+            "23": 4673179648.0,
+            "24": 4673179648.0,
+            "25": 4673179648.0,
+            "26": 4673179648.0,
+            "27": 4673179648.0,
+            "28": 4673179648.0,
+            "29": 4673179648.0,
+            "30": 4673179648.0,
+            "31": 4673179648.0,
+            "32": 4673179648.0,
+            "33": 4673179648.0,
+            "34": 4673179648.0,
+            "35": 4673179648.0,
+            "36": 4673179648.0,
+            "37": 4673179648.0,
+            "38": 4673179648.0,
+            "39": 4673179648.0,
+            "40": 4673179648.0,
+            "41": 4673179648.0,
+            "42": 4673179648.0,
+            "43": 4673179648.0,
+            "44": 4673179648.0,
+            "45": 4673179648.0,
+            "46": 4673179648.0,
+            "47": 4673179648.0,
+            "48": 4673179648.0,
+            "49": 4673179648.0,
+            "50": 4673179648.0,
+            "51": 4673179648.0,
+            "52": 4673179648.0,
+            "53": 4673179648.0,
+            "54": 4673179648.0,
+            "55": 4673179648.0,
+            "56": 4673179648.0,
+            "57": 4673179648.0,
+            "58": 4673179648.0,
+            "59": 4673179648.0,
+            "60": 4673179648.0,
+            "61": 4673179648.0,
+            "62": 4673179648.0,
+            "63": 4673179648.0,
+            "64": 4673179648.0,
+            "65": 4673179648.0,
+            "66": 4673179648.0,
+            "67": 4673179648.0,
+            "68": 4673179648.0,
+            "69": 4673179648.0,
+            "70": 4673179648.0,
+            "71": 4673179648.0,
+            "72": 4673179648.0,
+            "73": 4673179648.0,
+            "74": 4673179648.0,
+            "75": 4673179648.0,
+            "76": 4673179648.0,
+            "77": 4673179648.0,
+            "78": 4673179648.0,
+            "79": 4673179648.0,
+            "80": 4673179648.0,
+            "81": 4673179648.0,
+            "82": 4673179648.0,
+            "83": 4673179648.0,
+            "84": 4673179648.0,
+            "85": 4673179648.0,
+            "86": 4673179648.0,
+            "87": 4673179648.0,
+            "88": 4673179648.0,
+            "89": 4673179648.0,
+            "90": 4673179648.0,
+            "91": 4673179648.0,
+            "92": 4673179648.0,
+            "93": 4673179648.0,
+            "94": 4673179648.0,
+            "95": 4673179648.0,
+            "96": 4673179648.0,
+            "97": 4673179648.0,
+            "98": 4673179648.0,
+            "99": 4673179648.0,
+            "100": 4673179648.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 12.14508,
+            "2": 0.13504,
+            "3": 0.10484,
+            "4": 0.10489,
+            "5": 0.10473,
+            "6": 0.10497,
+            "7": 0.10413,
+            "8": 0.10536,
+            "9": 0.32726,
+            "10": 0.10707,
+            "11": 0.1004,
+            "12": 0.10131,
+            "13": 0.10126,
+            "14": 0.10152,
+            "15": 0.10011,
+            "16": 0.10055,
+            "17": 0.10006,
+            "18": 0.10008,
+            "19": 0.09902,
+            "20": 0.10043,
+            "21": 0.09943,
+            "22": 0.10108,
+            "23": 0.10016,
+            "24": 0.10055,
+            "25": 0.10767,
+            "26": 0.10062,
+            "27": 0.09965,
+            "28": 0.09956,
+            "29": 0.09902,
+            "30": 0.09994,
+            "31": 0.10043,
+            "32": 0.09913,
+            "33": 0.09934,
+            "34": 0.10116,
+            "35": 0.09881,
+            "36": 0.09921,
+            "37": 0.09882,
+            "38": 0.09871,
+            "39": 0.09864,
+            "40": 0.09965,
+            "41": 0.09923,
+            "42": 0.09939,
+            "43": 0.10071,
+            "44": 0.09983,
+            "45": 0.35882,
+            "46": 0.10188,
+            "47": 0.09992,
+            "48": 0.09983,
+            "49": 0.09848,
+            "50": 0.10049,
+            "51": 0.11806,
+            "52": 0.10549,
+            "53": 0.10158,
+            "54": 0.10548,
+            "55": 0.10224,
+            "56": 0.10244,
+            "57": 0.10391,
+            "58": 0.10383,
+            "59": 0.10417,
+            "60": 0.10737,
+            "61": 0.1029,
+            "62": 0.10202,
+            "63": 0.10011,
+            "64": 0.10594,
+            "65": 0.10093,
+            "66": 0.10168,
+            "67": 0.1008,
+            "68": 0.14562,
+            "69": 0.09913,
+            "70": 0.10262,
+            "71": 0.09958,
+            "72": 0.10173,
+            "73": 0.09928,
+            "74": 0.10376,
+            "75": 0.09944,
+            "76": 0.10143,
+            "77": 0.10005,
+            "78": 0.1033,
+            "79": 0.09996,
+            "80": 0.10114,
+            "81": 0.09988,
+            "82": 0.10093,
+            "83": 0.09908,
+            "84": 0.1014,
+            "85": 0.09925,
+            "86": 0.10175,
+            "87": 0.09965,
+            "88": 0.10189,
+            "89": 0.10015,
+            "90": 0.10099,
+            "91": 0.09925,
+            "92": 0.10123,
+            "93": 0.09879,
+            "94": 0.10599,
+            "95": 0.0991,
+            "96": 0.10147,
+            "97": 0.09941,
+            "98": 0.10245,
+            "99": 0.09902,
+            "100": 0.10071
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
new file mode 100644
index 00000000000..2fd83504089
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.8401,
+            "2": 10.83566,
+            "3": 10.82993,
+            "4": 10.8173,
+            "5": 10.84032,
+            "6": 10.87262,
+            "7": 10.83467,
+            "8": 10.8403,
+            "9": 10.84359,
+            "10": 10.8134,
+            "11": 10.85025,
+            "12": 10.84316,
+            "13": 10.86605,
+            "14": 10.86315,
+            "15": 10.80276,
+            "16": 10.79643,
+            "17": 10.7763,
+            "18": 10.8015,
+            "19": 10.7939,
+            "20": 10.705,
+            "21": 10.68148,
+            "22": 10.56313,
+            "23": 10.70136,
+            "24": 10.57939,
+            "25": 10.53849,
+            "26": 10.60617,
+            "27": 10.59211,
+            "28": 10.56156,
+            "29": 10.57666,
+            "30": 10.35521,
+            "31": 10.12773,
+            "32": 10.46367,
+            "33": 10.45444,
+            "34": 10.22451,
+            "35": 10.27148,
+            "36": 10.22184,
+            "37": 10.33945,
+            "38": 10.18637,
+            "39": 10.39329,
+            "40": 10.08049,
+            "41": 10.13789,
+            "42": 10.20012,
+            "43": 9.83791,
+            "44": 9.94327,
+            "45": 9.8229,
+            "46": 9.82313,
+            "47": 10.13353,
+            "48": 9.8415,
+            "49": 9.52102,
+            "50": 9.90118,
+            "51": 9.83467,
+            "52": 9.73176,
+            "53": 10.04773,
+            "54": 9.93856,
+            "55": 9.86424,
+            "56": 9.61259,
+            "57": 9.46819,
+            "58": 9.81223,
+            "59": 9.57172,
+            "60": 9.4803,
+            "61": 9.67964,
+            "62": 9.96738,
+            "63": 9.35351,
+            "64": 9.7573,
+            "65": 8.93743,
+            "66": 9.68132,
+            "67": 9.35694,
+            "68": 9.7681,
+            "69": 9.77289,
+            "70": 9.71026,
+            "71": 9.60024,
+            "72": 9.56674,
+            "73": 9.47644,
+            "74": 8.93189,
+            "75": 9.4088,
+            "76": 9.06887,
+            "77": 10.04696,
+            "78": 9.70975,
+            "79": 9.35669,
+            "80": 9.39078,
+            "81": 9.46574,
+            "82": 9.68028,
+            "83": 9.29218,
+            "84": 9.40234,
+            "85": 9.59741,
+            "86": 9.06109,
+            "87": 9.57951,
+            "88": 9.73247,
+            "89": 9.58838,
+            "90": 9.80389,
+            "91": 9.32105,
+            "92": 9.35011,
+            "93": 9.06313,
+            "94": 8.82006,
+            "95": 9.50562,
+            "96": 9.51103,
+            "97": 9.29305,
+            "98": 9.65571,
+            "99": 8.87502,
+            "100": 9.38808
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1670.0,
+            "2": 1691.0,
+            "3": 1630.0,
+            "4": 1805.0,
+            "5": 1970.0,
+            "6": 1901.0,
+            "7": 1816.0,
+            "8": 1587.0,
+            "9": 1905.0,
+            "10": 1397.0,
+            "11": 1954.0,
+            "12": 1859.0,
+            "13": 1873.0,
+            "14": 1875.0,
+            "15": 1936.0,
+            "16": 1972.0,
+            "17": 1816.0,
+            "18": 1773.0,
+            "19": 1833.0,
+            "20": 1715.0,
+            "21": 1923.0,
+            "22": 1681.0,
+            "23": 2055.0,
+            "24": 1727.0,
+            "25": 1703.0,
+            "26": 1761.0,
+            "27": 1917.0,
+            "28": 1962.0,
+            "29": 2010.0,
+            "30": 1957.0,
+            "31": 1723.0,
+            "32": 1898.0,
+            "33": 2153.0,
+            "34": 1828.0,
+            "35": 1991.0,
+            "36": 1937.0,
+            "37": 2347.0,
+            "38": 2365.0,
+            "39": 2349.0,
+            "40": 2239.0,
+            "41": 2217.0,
+            "42": 2222.0,
+            "43": 2121.0,
+            "44": 2059.0,
+            "45": 2144.0,
+            "46": 2296.0,
+            "47": 2487.0,
+            "48": 2376.0,
+            "49": 2330.0,
+            "50": 2377.0,
+            "51": 2540.0,
+            "52": 2598.0,
+            "53": 2917.0,
+            "54": 2715.0,
+            "55": 2436.0,
+            "56": 2691.0,
+            "57": 2196.0,
+            "58": 2875.0,
+            "59": 2726.0,
+            "60": 2445.0,
+            "61": 3031.0,
+            "62": 2618.0,
+            "63": 2551.0,
+            "64": 2939.0,
+            "65": 2645.0,
+            "66": 3160.0,
+            "67": 2729.0,
+            "68": 2852.0,
+            "69": 2938.0,
+            "70": 3337.0,
+            "71": 3044.0,
+            "72": 2531.0,
+            "73": 2918.0,
+            "74": 1976.0,
+            "75": 2726.0,
+            "76": 3036.0,
+            "77": 3435.0,
+            "78": 3375.0,
+            "79": 3221.0,
+            "80": 3356.0,
+            "81": 3820.0,
+            "82": 3203.0,
+            "83": 2699.0,
+            "84": 3073.0,
+            "85": 3336.0,
+            "86": 2729.0,
+            "87": 3962.0,
+            "88": 3062.0,
+            "89": 3512.0,
+            "90": 3044.0,
+            "91": 2957.0,
+            "92": 3276.0,
+            "93": 2757.0,
+            "94": 3568.0,
+            "95": 3484.0,
+            "96": 3627.0,
+            "97": 3229.0,
+            "98": 3722.0,
+            "99": 3219.0,
+            "100": 3467.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 552238592.0,
+            "2": 552238592.0,
+            "3": 552238592.0,
+            "4": 552238592.0,
+            "5": 552238592.0,
+            "6": 552238592.0,
+            "7": 552238592.0,
+            "8": 552238592.0,
+            "9": 552238592.0,
+            "10": 552238592.0,
+            "11": 552238592.0,
+            "12": 552238592.0,
+            "13": 552238592.0,
+            "14": 552238592.0,
+            "15": 552238592.0,
+            "16": 552238592.0,
+            "17": 552238592.0,
+            "18": 552238592.0,
+            "19": 552238592.0,
+            "20": 552238592.0,
+            "21": 552238592.0,
+            "22": 552238592.0,
+            "23": 552238592.0,
+            "24": 552238592.0,
+            "25": 552238592.0,
+            "26": 552238592.0,
+            "27": 552238592.0,
+            "28": 552238592.0,
+            "29": 552238592.0,
+            "30": 552238592.0,
+            "31": 552238592.0,
+            "32": 552238592.0,
+            "33": 552238592.0,
+            "34": 552238592.0,
+            "35": 552238592.0,
+            "36": 552238592.0,
+            "37": 552238592.0,
+            "38": 552238592.0,
+            "39": 552238592.0,
+            "40": 552238592.0,
+            "41": 552238592.0,
+            "42": 552238592.0,
+            "43": 552238592.0,
+            "44": 552238592.0,
+            "45": 552238592.0,
+            "46": 552238592.0,
+            "47": 552238592.0,
+            "48": 552238592.0,
+            "49": 552238592.0,
+            "50": 552238592.0,
+            "51": 552238592.0,
+            "52": 552238592.0,
+            "53": 552238592.0,
+            "54": 552238592.0,
+            "55": 552238592.0,
+            "56": 552238592.0,
+            "57": 552238592.0,
+            "58": 552238592.0,
+            "59": 552238592.0,
+            "60": 552238592.0,
+            "61": 552238592.0,
+            "62": 552238592.0,
+            "63": 552238592.0,
+            "64": 552238592.0,
+            "65": 552238592.0,
+            "66": 552238592.0,
+            "67": 552238592.0,
+            "68": 552238592.0,
+            "69": 552238592.0,
+            "70": 552238592.0,
+            "71": 552238592.0,
+            "72": 552238592.0,
+            "73": 552238592.0,
+            "74": 552238592.0,
+            "75": 552238592.0,
+            "76": 552238592.0,
+            "77": 552238592.0,
+            "78": 552238592.0,
+            "79": 552238592.0,
+            "80": 552238592.0,
+            "81": 552238592.0,
+            "82": 552238592.0,
+            "83": 552238592.0,
+            "84": 552238592.0,
+            "85": 552238592.0,
+            "86": 552238592.0,
+            "87": 552238592.0,
+            "88": 552238592.0,
+            "89": 552238592.0,
+            "90": 552238592.0,
+            "91": 552238592.0,
+            "92": 552238592.0,
+            "93": 552238592.0,
+            "94": 552238592.0,
+            "95": 552238592.0,
+            "96": 552238592.0,
+            "97": 552238592.0,
+            "98": 552238592.0,
+            "99": 552238592.0,
+            "100": 552238592.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 4576563200.0,
+            "2": 4673179648.0,
+            "3": 4673179648.0,
+            "4": 4673179648.0,
+            "5": 4673179648.0,
+            "6": 4673179648.0,
+            "7": 4673179648.0,
+            "8": 4673179648.0,
+            "9": 4673179648.0,
+            "10": 4673179648.0,
+            "11": 4673179648.0,
+            "12": 4673179648.0,
+            "13": 4673179648.0,
+            "14": 4673179648.0,
+            "15": 4673179648.0,
+            "16": 4673179648.0,
+            "17": 4673179648.0,
+            "18": 4673179648.0,
+            "19": 4673179648.0,
+            "20": 4673179648.0,
+            "21": 4673179648.0,
+            "22": 4673179648.0,
+            "23": 4673179648.0,
+            "24": 4673179648.0,
+            "25": 4673179648.0,
+            "26": 4673179648.0,
+            "27": 4673179648.0,
+            "28": 4673179648.0,
+            "29": 4673179648.0,
+            "30": 4673179648.0,
+            "31": 4673179648.0,
+            "32": 4673179648.0,
+            "33": 4673179648.0,
+            "34": 4673179648.0,
+            "35": 4673179648.0,
+            "36": 4673179648.0,
+            "37": 4673179648.0,
+            "38": 4673179648.0,
+            "39": 4673179648.0,
+            "40": 4673179648.0,
+            "41": 4673179648.0,
+            "42": 4673179648.0,
+            "43": 4673179648.0,
+            "44": 4673179648.0,
+            "45": 4673179648.0,
+            "46": 4673179648.0,
+            "47": 4673179648.0,
+            "48": 4673179648.0,
+            "49": 4673179648.0,
+            "50": 4673179648.0,
+            "51": 4673179648.0,
+            "52": 4673179648.0,
+            "53": 4673179648.0,
+            "54": 4673179648.0,
+            "55": 4673179648.0,
+            "56": 4673179648.0,
+            "57": 4673179648.0,
+            "58": 4673179648.0,
+            "59": 4673179648.0,
+            "60": 4673179648.0,
+            "61": 4673179648.0,
+            "62": 4673179648.0,
+            "63": 4673179648.0,
+            "64": 4673179648.0,
+            "65": 4673179648.0,
+            "66": 4673179648.0,
+            "67": 4673179648.0,
+            "68": 4673179648.0,
+            "69": 4673179648.0,
+            "70": 4673179648.0,
+            "71": 4673179648.0,
+            "72": 4673179648.0,
+            "73": 4673179648.0,
+            "74": 4673179648.0,
+            "75": 4673179648.0,
+            "76": 4673179648.0,
+            "77": 4673179648.0,
+            "78": 4673179648.0,
+            "79": 4673179648.0,
+            "80": 4673179648.0,
+            "81": 4673179648.0,
+            "82": 4673179648.0,
+            "83": 4673179648.0,
+            "84": 4673179648.0,
+            "85": 4673179648.0,
+            "86": 4673179648.0,
+            "87": 4673179648.0,
+            "88": 4673179648.0,
+            "89": 4673179648.0,
+            "90": 4673179648.0,
+            "91": 4673179648.0,
+            "92": 4673179648.0,
+            "93": 4673179648.0,
+            "94": 4673179648.0,
+            "95": 4673179648.0,
+            "96": 4673179648.0,
+            "97": 4673179648.0,
+            "98": 4673179648.0,
+            "99": 4673179648.0,
+            "100": 4673179648.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 8.84608,
+            "2": 0.1383,
+            "3": 0.11074,
+            "4": 0.09988,
+            "5": 0.09832,
+            "6": 0.09852,
+            "7": 0.09942,
+            "8": 0.09887,
+            "9": 0.09982,
+            "10": 0.0999,
+            "11": 0.32507,
+            "12": 0.0997,
+            "13": 0.10073,
+            "14": 0.09862,
+            "15": 0.09903,
+            "16": 0.09813,
+            "17": 0.09854,
+            "18": 0.09827,
+            "19": 0.09818,
+            "20": 0.09782,
+            "21": 0.0976,
+            "22": 0.09763,
+            "23": 0.09742,
+            "24": 0.10007,
+            "25": 0.09709,
+            "26": 0.10028,
+            "27": 0.09967,
+            "28": 0.10005,
+            "29": 0.09819,
+            "30": 0.09782,
+            "31": 0.09728,
+            "32": 0.09707,
+            "33": 0.09712,
+            "34": 0.09768,
+            "35": 0.09779,
+            "36": 0.09761,
+            "37": 0.09958,
+            "38": 0.09866,
+            "39": 0.09784,
+            "40": 0.09877,
+            "41": 0.09772,
+            "42": 0.09833,
+            "43": 0.09811,
+            "44": 0.09781,
+            "45": 0.09781,
+            "46": 0.09827,
+            "47": 0.09771,
+            "48": 0.09763,
+            "49": 0.09768,
+            "50": 0.09899,
+            "51": 0.10947,
+            "52": 0.09886,
+            "53": 0.09597,
+            "54": 0.09838,
+            "55": 0.09729,
+            "56": 0.09695,
+            "57": 0.09961,
+            "58": 0.09847,
+            "59": 0.09888,
+            "60": 0.09635,
+            "61": 0.09692,
+            "62": 0.0979,
+            "63": 0.09738,
+            "64": 0.09561,
+            "65": 0.0984,
+            "66": 0.0969,
+            "67": 0.13611,
+            "68": 0.09631,
+            "69": 0.09564,
+            "70": 0.09538,
+            "71": 0.09557,
+            "72": 0.09548,
+            "73": 0.09581,
+            "74": 0.09593,
+            "75": 0.09489,
+            "76": 0.0959,
+            "77": 0.09486,
+            "78": 0.09568,
+            "79": 0.09634,
+            "80": 0.09468,
+            "81": 0.09589,
+            "82": 0.09598,
+            "83": 0.09489,
+            "84": 0.0954,
+            "85": 0.09413,
+            "86": 0.09499,
+            "87": 0.09424,
+            "88": 0.09411,
+            "89": 0.09598,
+            "90": 0.09549,
+            "91": 0.09452,
+            "92": 0.09467,
+            "93": 0.09619,
+            "94": 0.09523,
+            "95": 0.09445,
+            "96": 0.09426,
+            "97": 0.09435,
+            "98": 0.09523,
+            "99": 0.09534,
+            "100": 0.09547
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
index 5d9f1423ab0..dc66396ad6b 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
@@ -1 +1,537 @@
-{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.89631, "5": 10.89154, "10": 10.88361, "15": 10.84803, "20": 10.74824, "25": 10.59309, "30": 10.41204, "35": 10.28189, "40": 10.09271, "45": 9.84194, "50": 9.91343, "55": 9.88574, "60": 9.50243, "65": 8.94516, "70": 9.74451, "75": 9.42524, "80": 9.40454, "85": 9.61295, "90": 9.81672, "95": 9.51841, "100": 9.39923}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1483.0, "5": 1903.0, "10": 1427.0, "15": 1980.0, "20": 1588.0, "25": 1649.0, "30": 1984.0, "35": 1921.0, "40": 2367.0, "45": 2184.0, "50": 2444.0, "55": 2503.0, "60": 2367.0, "65": 2605.0, "70": 3135.0, "75": 2556.0, "80": 3301.0, "85": 3380.0, "90": 3198.0, "95": 3431.0, "100": 3089.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1261848064.0, "5": 1261848064.0, "10": 1261848064.0, "15": 1261848064.0, "20": 1261848064.0, "25": 1261848064.0, "30": 1261848064.0, "35": 1261848064.0, "40": 1261848064.0, "45": 1261848064.0, "50": 1261848064.0, "55": 1261848064.0, "60": 1261848064.0, "65": 1261848064.0, "70": 1261848064.0, "75": 1261848064.0, "80": 1261848064.0, "85": 1261848064.0, "90": 1261848064.0, "95": 1261848064.0, "100": 1261848064.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 2013852672.0, "5": 2563430400.0, "10": 2563430400.0, "15": 2563430400.0, "20": 2563430400.0, "25": 2563430400.0, "30": 2563430400.0, "35": 2563430400.0, "40": 2563430400.0, "45": 2563430400.0, "50": 2563430400.0, "55": 2563430400.0, "60": 2563430400.0, "65": 2563430400.0, "70": 2563430400.0, "75": 2563430400.0, "80": 2563430400.0, "85": 2563430400.0, "90": 2563430400.0, "95": 2563430400.0, "100": 2563430400.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 6.02119, "5": 0.07164, "10": 0.07403, "15": 0.07196, "20": 0.07295, "25": 0.07132, "30": 0.07266, "35": 0.07052, "40": 0.08274, "45": 0.07025, "50": 0.07178, "55": 0.0715, "60": 0.07114, "65": 0.07081, "70": 0.07243, "75": 0.07071, "80": 0.07039, "85": 0.07108, "90": 0.07278, "95": 0.07197, "100": 0.07038}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.89631,
+            "2": 10.89416,
+            "3": 10.88786,
+            "4": 10.8914,
+            "5": 10.89154,
+            "6": 10.90001,
+            "7": 10.89184,
+            "8": 10.89886,
+            "9": 10.90208,
+            "10": 10.88361,
+            "11": 10.87817,
+            "12": 10.89334,
+            "13": 10.89814,
+            "14": 10.89242,
+            "15": 10.84803,
+            "16": 10.85398,
+            "17": 10.83097,
+            "18": 10.83991,
+            "19": 10.82801,
+            "20": 10.74824,
+            "21": 10.73496,
+            "22": 10.61719,
+            "23": 10.72621,
+            "24": 10.63178,
+            "25": 10.59309,
+            "26": 10.63369,
+            "27": 10.63304,
+            "28": 10.58264,
+            "29": 10.58594,
+            "30": 10.41204,
+            "31": 10.15899,
+            "32": 10.48366,
+            "33": 10.46706,
+            "34": 10.23811,
+            "35": 10.28189,
+            "36": 10.24056,
+            "37": 10.36219,
+            "38": 10.20309,
+            "39": 10.40454,
+            "40": 10.09271,
+            "41": 10.15835,
+            "42": 10.21933,
+            "43": 9.84358,
+            "44": 9.97303,
+            "45": 9.84194,
+            "46": 9.82017,
+            "47": 10.14969,
+            "48": 9.86023,
+            "49": 9.54235,
+            "50": 9.91343,
+            "51": 9.8545,
+            "52": 9.7393,
+            "53": 10.07426,
+            "54": 9.96913,
+            "55": 9.88574,
+            "56": 9.62438,
+            "57": 9.48229,
+            "58": 9.83484,
+            "59": 9.58731,
+            "60": 9.50243,
+            "61": 9.6934,
+            "62": 9.988,
+            "63": 9.39105,
+            "64": 9.78022,
+            "65": 8.94516,
+            "66": 9.70492,
+            "67": 9.37249,
+            "68": 9.78328,
+            "69": 9.79057,
+            "70": 9.74451,
+            "71": 9.62298,
+            "72": 9.58457,
+            "73": 9.50511,
+            "74": 8.94308,
+            "75": 9.42524,
+            "76": 9.07602,
+            "77": 10.06352,
+            "78": 9.72307,
+            "79": 9.37497,
+            "80": 9.40454,
+            "81": 9.4779,
+            "82": 9.69669,
+            "83": 9.30714,
+            "84": 9.41525,
+            "85": 9.61295,
+            "86": 9.07198,
+            "87": 9.58834,
+            "88": 9.7476,
+            "89": 9.59984,
+            "90": 9.81672,
+            "91": 9.33791,
+            "92": 9.35608,
+            "93": 9.07423,
+            "94": 8.83511,
+            "95": 9.51841,
+            "96": 9.52391,
+            "97": 9.30922,
+            "98": 9.66746,
+            "99": 8.88421,
+            "100": 9.39923
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1483.0,
+            "2": 1650.0,
+            "3": 1681.0,
+            "4": 1767.0,
+            "5": 1903.0,
+            "6": 1952.0,
+            "7": 1967.0,
+            "8": 1651.0,
+            "9": 1886.0,
+            "10": 1427.0,
+            "11": 1939.0,
+            "12": 1778.0,
+            "13": 1964.0,
+            "14": 1762.0,
+            "15": 1980.0,
+            "16": 1923.0,
+            "17": 1817.0,
+            "18": 1783.0,
+            "19": 1750.0,
+            "20": 1588.0,
+            "21": 1855.0,
+            "22": 1641.0,
+            "23": 2098.0,
+            "24": 1679.0,
+            "25": 1649.0,
+            "26": 1806.0,
+            "27": 1834.0,
+            "28": 2042.0,
+            "29": 2033.0,
+            "30": 1984.0,
+            "31": 1518.0,
+            "32": 1954.0,
+            "33": 2068.0,
+            "34": 1900.0,
+            "35": 1921.0,
+            "36": 1965.0,
+            "37": 2321.0,
+            "38": 2340.0,
+            "39": 2344.0,
+            "40": 2367.0,
+            "41": 2457.0,
+            "42": 2367.0,
+            "43": 2020.0,
+            "44": 2135.0,
+            "45": 2184.0,
+            "46": 2310.0,
+            "47": 2463.0,
+            "48": 2450.0,
+            "49": 2259.0,
+            "50": 2444.0,
+            "51": 2543.0,
+            "52": 2613.0,
+            "53": 2945.0,
+            "54": 2713.0,
+            "55": 2503.0,
+            "56": 2692.0,
+            "57": 2338.0,
+            "58": 2961.0,
+            "59": 2620.0,
+            "60": 2367.0,
+            "61": 2909.0,
+            "62": 2728.0,
+            "63": 2399.0,
+            "64": 2909.0,
+            "65": 2605.0,
+            "66": 2983.0,
+            "67": 2793.0,
+            "68": 2663.0,
+            "69": 2833.0,
+            "70": 3135.0,
+            "71": 2997.0,
+            "72": 2464.0,
+            "73": 3088.0,
+            "74": 1970.0,
+            "75": 2556.0,
+            "76": 3064.0,
+            "77": 3231.0,
+            "78": 3097.0,
+            "79": 3035.0,
+            "80": 3301.0,
+            "81": 3599.0,
+            "82": 3215.0,
+            "83": 2757.0,
+            "84": 3130.0,
+            "85": 3380.0,
+            "86": 2742.0,
+            "87": 3723.0,
+            "88": 3066.0,
+            "89": 3264.0,
+            "90": 3198.0,
+            "91": 2718.0,
+            "92": 3070.0,
+            "93": 2624.0,
+            "94": 3301.0,
+            "95": 3431.0,
+            "96": 3358.0,
+            "97": 3142.0,
+            "98": 3704.0,
+            "99": 3107.0,
+            "100": 3089.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1261848064.0,
+            "2": 1261848064.0,
+            "3": 1261848064.0,
+            "4": 1261848064.0,
+            "5": 1261848064.0,
+            "6": 1261848064.0,
+            "7": 1261848064.0,
+            "8": 1261848064.0,
+            "9": 1261848064.0,
+            "10": 1261848064.0,
+            "11": 1261848064.0,
+            "12": 1261848064.0,
+            "13": 1261848064.0,
+            "14": 1261848064.0,
+            "15": 1261848064.0,
+            "16": 1261848064.0,
+            "17": 1261848064.0,
+            "18": 1261848064.0,
+            "19": 1261848064.0,
+            "20": 1261848064.0,
+            "21": 1261848064.0,
+            "22": 1261848064.0,
+            "23": 1261848064.0,
+            "24": 1261848064.0,
+            "25": 1261848064.0,
+            "26": 1261848064.0,
+            "27": 1261848064.0,
+            "28": 1261848064.0,
+            "29": 1261848064.0,
+            "30": 1261848064.0,
+            "31": 1261848064.0,
+            "32": 1261848064.0,
+            "33": 1261848064.0,
+            "34": 1261848064.0,
+            "35": 1261848064.0,
+            "36": 1261848064.0,
+            "37": 1261848064.0,
+            "38": 1261848064.0,
+            "39": 1261848064.0,
+            "40": 1261848064.0,
+            "41": 1261848064.0,
+            "42": 1261848064.0,
+            "43": 1261848064.0,
+            "44": 1261848064.0,
+            "45": 1261848064.0,
+            "46": 1261848064.0,
+            "47": 1261848064.0,
+            "48": 1261848064.0,
+            "49": 1261848064.0,
+            "50": 1261848064.0,
+            "51": 1261848064.0,
+            "52": 1261848064.0,
+            "53": 1261848064.0,
+            "54": 1261848064.0,
+            "55": 1261848064.0,
+            "56": 1261848064.0,
+            "57": 1261848064.0,
+            "58": 1261848064.0,
+            "59": 1261848064.0,
+            "60": 1261848064.0,
+            "61": 1261848064.0,
+            "62": 1261848064.0,
+            "63": 1261848064.0,
+            "64": 1261848064.0,
+            "65": 1261848064.0,
+            "66": 1261848064.0,
+            "67": 1261848064.0,
+            "68": 1261848064.0,
+            "69": 1261848064.0,
+            "70": 1261848064.0,
+            "71": 1261848064.0,
+            "72": 1261848064.0,
+            "73": 1261848064.0,
+            "74": 1261848064.0,
+            "75": 1261848064.0,
+            "76": 1261848064.0,
+            "77": 1261848064.0,
+            "78": 1261848064.0,
+            "79": 1261848064.0,
+            "80": 1261848064.0,
+            "81": 1261848064.0,
+            "82": 1261848064.0,
+            "83": 1261848064.0,
+            "84": 1261848064.0,
+            "85": 1261848064.0,
+            "86": 1261848064.0,
+            "87": 1261848064.0,
+            "88": 1261848064.0,
+            "89": 1261848064.0,
+            "90": 1261848064.0,
+            "91": 1261848064.0,
+            "92": 1261848064.0,
+            "93": 1261848064.0,
+            "94": 1261848064.0,
+            "95": 1261848064.0,
+            "96": 1261848064.0,
+            "97": 1261848064.0,
+            "98": 1261848064.0,
+            "99": 1261848064.0,
+            "100": 1261848064.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2013852672.0,
+            "2": 2563430400.0,
+            "3": 2563430400.0,
+            "4": 2563430400.0,
+            "5": 2563430400.0,
+            "6": 2563430400.0,
+            "7": 2563430400.0,
+            "8": 2563430400.0,
+            "9": 2563430400.0,
+            "10": 2563430400.0,
+            "11": 2563430400.0,
+            "12": 2563430400.0,
+            "13": 2563430400.0,
+            "14": 2563430400.0,
+            "15": 2563430400.0,
+            "16": 2563430400.0,
+            "17": 2563430400.0,
+            "18": 2563430400.0,
+            "19": 2563430400.0,
+            "20": 2563430400.0,
+            "21": 2563430400.0,
+            "22": 2563430400.0,
+            "23": 2563430400.0,
+            "24": 2563430400.0,
+            "25": 2563430400.0,
+            "26": 2563430400.0,
+            "27": 2563430400.0,
+            "28": 2563430400.0,
+            "29": 2563430400.0,
+            "30": 2563430400.0,
+            "31": 2563430400.0,
+            "32": 2563430400.0,
+            "33": 2563430400.0,
+            "34": 2563430400.0,
+            "35": 2563430400.0,
+            "36": 2563430400.0,
+            "37": 2563430400.0,
+            "38": 2563430400.0,
+            "39": 2563430400.0,
+            "40": 2563430400.0,
+            "41": 2563430400.0,
+            "42": 2563430400.0,
+            "43": 2563430400.0,
+            "44": 2563430400.0,
+            "45": 2563430400.0,
+            "46": 2563430400.0,
+            "47": 2563430400.0,
+            "48": 2563430400.0,
+            "49": 2563430400.0,
+            "50": 2563430400.0,
+            "51": 2563430400.0,
+            "52": 2563430400.0,
+            "53": 2563430400.0,
+            "54": 2563430400.0,
+            "55": 2563430400.0,
+            "56": 2563430400.0,
+            "57": 2563430400.0,
+            "58": 2563430400.0,
+            "59": 2563430400.0,
+            "60": 2563430400.0,
+            "61": 2563430400.0,
+            "62": 2563430400.0,
+            "63": 2563430400.0,
+            "64": 2563430400.0,
+            "65": 2563430400.0,
+            "66": 2563430400.0,
+            "67": 2563430400.0,
+            "68": 2563430400.0,
+            "69": 2563430400.0,
+            "70": 2563430400.0,
+            "71": 2563430400.0,
+            "72": 2563430400.0,
+            "73": 2563430400.0,
+            "74": 2563430400.0,
+            "75": 2563430400.0,
+            "76": 2563430400.0,
+            "77": 2563430400.0,
+            "78": 2563430400.0,
+            "79": 2563430400.0,
+            "80": 2563430400.0,
+            "81": 2563430400.0,
+            "82": 2563430400.0,
+            "83": 2563430400.0,
+            "84": 2563430400.0,
+            "85": 2563430400.0,
+            "86": 2563430400.0,
+            "87": 2563430400.0,
+            "88": 2563430400.0,
+            "89": 2563430400.0,
+            "90": 2563430400.0,
+            "91": 2563430400.0,
+            "92": 2563430400.0,
+            "93": 2563430400.0,
+            "94": 2563430400.0,
+            "95": 2563430400.0,
+            "96": 2563430400.0,
+            "97": 2563430400.0,
+            "98": 2563430400.0,
+            "99": 2563430400.0,
+            "100": 2563430400.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 6.44856,
+            "2": 0.10562,
+            "3": 0.09824,
+            "4": 0.09657,
+            "5": 0.10604,
+            "6": 0.09627,
+            "7": 0.09681,
+            "8": 0.09299,
+            "9": 0.09413,
+            "10": 0.09401,
+            "11": 0.09341,
+            "12": 0.09223,
+            "13": 0.09373,
+            "14": 0.0936,
+            "15": 0.09439,
+            "16": 0.09285,
+            "17": 0.09422,
+            "18": 0.09511,
+            "19": 0.09966,
+            "20": 0.10107,
+            "21": 0.09445,
+            "22": 0.09548,
+            "23": 0.09554,
+            "24": 0.09478,
+            "25": 0.09465,
+            "26": 0.09292,
+            "27": 0.10339,
+            "28": 0.09562,
+            "29": 0.09593,
+            "30": 0.09298,
+            "31": 0.09573,
+            "32": 0.09264,
+            "33": 0.0942,
+            "34": 0.09203,
+            "35": 0.09537,
+            "36": 0.09222,
+            "37": 0.09501,
+            "38": 0.0938,
+            "39": 0.09662,
+            "40": 0.10355,
+            "41": 0.09832,
+            "42": 0.09636,
+            "43": 0.09409,
+            "44": 0.09306,
+            "45": 0.09367,
+            "46": 0.09321,
+            "47": 0.10415,
+            "48": 0.09382,
+            "49": 0.09322,
+            "50": 0.09238,
+            "51": 0.09596,
+            "52": 0.09089,
+            "53": 0.0918,
+            "54": 0.09088,
+            "55": 0.09144,
+            "56": 0.09049,
+            "57": 0.09241,
+            "58": 0.09222,
+            "59": 0.09415,
+            "60": 0.09271,
+            "61": 0.09208,
+            "62": 0.09152,
+            "63": 0.09266,
+            "64": 0.09085,
+            "65": 0.09196,
+            "66": 0.09181,
+            "67": 0.09397,
+            "68": 0.08963,
+            "69": 0.09222,
+            "70": 0.09229,
+            "71": 0.09614,
+            "72": 0.0904,
+            "73": 0.09323,
+            "74": 0.09152,
+            "75": 0.09189,
+            "76": 0.08973,
+            "77": 0.09202,
+            "78": 0.08991,
+            "79": 0.09241,
+            "80": 0.08986,
+            "81": 0.09353,
+            "82": 0.09206,
+            "83": 0.09177,
+            "84": 0.09067,
+            "85": 0.09271,
+            "86": 0.09133,
+            "87": 0.09239,
+            "88": 0.08972,
+            "89": 0.09242,
+            "90": 0.09005,
+            "91": 0.09389,
+            "92": 0.09396,
+            "93": 0.09776,
+            "94": 0.09824,
+            "95": 0.1008,
+            "96": 0.09732,
+            "97": 0.09819,
+            "98": 0.09221,
+            "99": 0.09502,
+            "100": 0.09143
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..b668a763f40
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.89631,
+            "2": 10.89416,
+            "3": 10.88786,
+            "4": 10.8914,
+            "5": 10.89154,
+            "6": 10.90001,
+            "7": 10.89184,
+            "8": 10.89886,
+            "9": 10.90208,
+            "10": 10.88361,
+            "11": 10.87817,
+            "12": 10.89334,
+            "13": 10.89814,
+            "14": 10.89242,
+            "15": 10.84803,
+            "16": 10.85398,
+            "17": 10.83097,
+            "18": 10.83991,
+            "19": 10.82801,
+            "20": 10.74824,
+            "21": 10.73496,
+            "22": 10.61719,
+            "23": 10.72621,
+            "24": 10.63178,
+            "25": 10.59309,
+            "26": 10.63369,
+            "27": 10.63304,
+            "28": 10.58264,
+            "29": 10.58594,
+            "30": 10.41204,
+            "31": 10.15899,
+            "32": 10.48366,
+            "33": 10.46706,
+            "34": 10.23811,
+            "35": 10.28189,
+            "36": 10.24056,
+            "37": 10.36219,
+            "38": 10.20309,
+            "39": 10.40454,
+            "40": 10.09271,
+            "41": 10.15835,
+            "42": 10.21933,
+            "43": 9.84358,
+            "44": 9.97303,
+            "45": 9.84194,
+            "46": 9.82017,
+            "47": 10.14969,
+            "48": 9.86023,
+            "49": 9.54235,
+            "50": 9.91343,
+            "51": 9.8545,
+            "52": 9.7393,
+            "53": 10.07426,
+            "54": 9.96913,
+            "55": 9.88574,
+            "56": 9.62438,
+            "57": 9.48229,
+            "58": 9.83484,
+            "59": 9.58731,
+            "60": 9.50243,
+            "61": 9.6934,
+            "62": 9.988,
+            "63": 9.39105,
+            "64": 9.78022,
+            "65": 8.94516,
+            "66": 9.70492,
+            "67": 9.37249,
+            "68": 9.78328,
+            "69": 9.79057,
+            "70": 9.74451,
+            "71": 9.62298,
+            "72": 9.58457,
+            "73": 9.50511,
+            "74": 8.94308,
+            "75": 9.42524,
+            "76": 9.07602,
+            "77": 10.06352,
+            "78": 9.72307,
+            "79": 9.37497,
+            "80": 9.40454,
+            "81": 9.4779,
+            "82": 9.69669,
+            "83": 9.30714,
+            "84": 9.41525,
+            "85": 9.61295,
+            "86": 9.07198,
+            "87": 9.58834,
+            "88": 9.7476,
+            "89": 9.59984,
+            "90": 9.81672,
+            "91": 9.33791,
+            "92": 9.35608,
+            "93": 9.07423,
+            "94": 8.83511,
+            "95": 9.51841,
+            "96": 9.52391,
+            "97": 9.30922,
+            "98": 9.66746,
+            "99": 8.88421,
+            "100": 9.39923
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1483.0,
+            "2": 1650.0,
+            "3": 1681.0,
+            "4": 1767.0,
+            "5": 1903.0,
+            "6": 1952.0,
+            "7": 1967.0,
+            "8": 1651.0,
+            "9": 1886.0,
+            "10": 1427.0,
+            "11": 1939.0,
+            "12": 1778.0,
+            "13": 1964.0,
+            "14": 1762.0,
+            "15": 1980.0,
+            "16": 1923.0,
+            "17": 1817.0,
+            "18": 1783.0,
+            "19": 1750.0,
+            "20": 1588.0,
+            "21": 1855.0,
+            "22": 1641.0,
+            "23": 2098.0,
+            "24": 1679.0,
+            "25": 1649.0,
+            "26": 1806.0,
+            "27": 1834.0,
+            "28": 2042.0,
+            "29": 2033.0,
+            "30": 1984.0,
+            "31": 1518.0,
+            "32": 1954.0,
+            "33": 2068.0,
+            "34": 1900.0,
+            "35": 1921.0,
+            "36": 1965.0,
+            "37": 2321.0,
+            "38": 2340.0,
+            "39": 2344.0,
+            "40": 2367.0,
+            "41": 2457.0,
+            "42": 2367.0,
+            "43": 2020.0,
+            "44": 2135.0,
+            "45": 2184.0,
+            "46": 2310.0,
+            "47": 2463.0,
+            "48": 2450.0,
+            "49": 2259.0,
+            "50": 2444.0,
+            "51": 2543.0,
+            "52": 2613.0,
+            "53": 2945.0,
+            "54": 2713.0,
+            "55": 2503.0,
+            "56": 2692.0,
+            "57": 2338.0,
+            "58": 2961.0,
+            "59": 2620.0,
+            "60": 2367.0,
+            "61": 2909.0,
+            "62": 2728.0,
+            "63": 2399.0,
+            "64": 2909.0,
+            "65": 2605.0,
+            "66": 2983.0,
+            "67": 2793.0,
+            "68": 2663.0,
+            "69": 2833.0,
+            "70": 3135.0,
+            "71": 2997.0,
+            "72": 2464.0,
+            "73": 3088.0,
+            "74": 1970.0,
+            "75": 2556.0,
+            "76": 3064.0,
+            "77": 3231.0,
+            "78": 3097.0,
+            "79": 3035.0,
+            "80": 3301.0,
+            "81": 3599.0,
+            "82": 3215.0,
+            "83": 2757.0,
+            "84": 3130.0,
+            "85": 3380.0,
+            "86": 2742.0,
+            "87": 3723.0,
+            "88": 3066.0,
+            "89": 3264.0,
+            "90": 3198.0,
+            "91": 2718.0,
+            "92": 3070.0,
+            "93": 2624.0,
+            "94": 3301.0,
+            "95": 3431.0,
+            "96": 3358.0,
+            "97": 3142.0,
+            "98": 3704.0,
+            "99": 3107.0,
+            "100": 3089.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1261848064.0,
+            "2": 1261848064.0,
+            "3": 1261848064.0,
+            "4": 1261848064.0,
+            "5": 1261848064.0,
+            "6": 1261848064.0,
+            "7": 1261848064.0,
+            "8": 1261848064.0,
+            "9": 1261848064.0,
+            "10": 1261848064.0,
+            "11": 1261848064.0,
+            "12": 1261848064.0,
+            "13": 1261848064.0,
+            "14": 1261848064.0,
+            "15": 1261848064.0,
+            "16": 1261848064.0,
+            "17": 1261848064.0,
+            "18": 1261848064.0,
+            "19": 1261848064.0,
+            "20": 1261848064.0,
+            "21": 1261848064.0,
+            "22": 1261848064.0,
+            "23": 1261848064.0,
+            "24": 1261848064.0,
+            "25": 1261848064.0,
+            "26": 1261848064.0,
+            "27": 1261848064.0,
+            "28": 1261848064.0,
+            "29": 1261848064.0,
+            "30": 1261848064.0,
+            "31": 1261848064.0,
+            "32": 1261848064.0,
+            "33": 1261848064.0,
+            "34": 1261848064.0,
+            "35": 1261848064.0,
+            "36": 1261848064.0,
+            "37": 1261848064.0,
+            "38": 1261848064.0,
+            "39": 1261848064.0,
+            "40": 1261848064.0,
+            "41": 1261848064.0,
+            "42": 1261848064.0,
+            "43": 1261848064.0,
+            "44": 1261848064.0,
+            "45": 1261848064.0,
+            "46": 1261848064.0,
+            "47": 1261848064.0,
+            "48": 1261848064.0,
+            "49": 1261848064.0,
+            "50": 1261848064.0,
+            "51": 1261848064.0,
+            "52": 1261848064.0,
+            "53": 1261848064.0,
+            "54": 1261848064.0,
+            "55": 1261848064.0,
+            "56": 1261848064.0,
+            "57": 1261848064.0,
+            "58": 1261848064.0,
+            "59": 1261848064.0,
+            "60": 1261848064.0,
+            "61": 1261848064.0,
+            "62": 1261848064.0,
+            "63": 1261848064.0,
+            "64": 1261848064.0,
+            "65": 1261848064.0,
+            "66": 1261848064.0,
+            "67": 1261848064.0,
+            "68": 1261848064.0,
+            "69": 1261848064.0,
+            "70": 1261848064.0,
+            "71": 1261848064.0,
+            "72": 1261848064.0,
+            "73": 1261848064.0,
+            "74": 1261848064.0,
+            "75": 1261848064.0,
+            "76": 1261848064.0,
+            "77": 1261848064.0,
+            "78": 1261848064.0,
+            "79": 1261848064.0,
+            "80": 1261848064.0,
+            "81": 1261848064.0,
+            "82": 1261848064.0,
+            "83": 1261848064.0,
+            "84": 1261848064.0,
+            "85": 1261848064.0,
+            "86": 1261848064.0,
+            "87": 1261848064.0,
+            "88": 1261848064.0,
+            "89": 1261848064.0,
+            "90": 1261848064.0,
+            "91": 1261848064.0,
+            "92": 1261848064.0,
+            "93": 1261848064.0,
+            "94": 1261848064.0,
+            "95": 1261848064.0,
+            "96": 1261848064.0,
+            "97": 1261848064.0,
+            "98": 1261848064.0,
+            "99": 1261848064.0,
+            "100": 1261848064.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2013852672.0,
+            "2": 2563430400.0,
+            "3": 2563430400.0,
+            "4": 2563430400.0,
+            "5": 2563430400.0,
+            "6": 2563430400.0,
+            "7": 2563430400.0,
+            "8": 2563430400.0,
+            "9": 2563430400.0,
+            "10": 2563430400.0,
+            "11": 2563430400.0,
+            "12": 2563430400.0,
+            "13": 2563430400.0,
+            "14": 2563430400.0,
+            "15": 2563430400.0,
+            "16": 2563430400.0,
+            "17": 2563430400.0,
+            "18": 2563430400.0,
+            "19": 2563430400.0,
+            "20": 2563430400.0,
+            "21": 2563430400.0,
+            "22": 2563430400.0,
+            "23": 2563430400.0,
+            "24": 2563430400.0,
+            "25": 2563430400.0,
+            "26": 2563430400.0,
+            "27": 2563430400.0,
+            "28": 2563430400.0,
+            "29": 2563430400.0,
+            "30": 2563430400.0,
+            "31": 2563430400.0,
+            "32": 2563430400.0,
+            "33": 2563430400.0,
+            "34": 2563430400.0,
+            "35": 2563430400.0,
+            "36": 2563430400.0,
+            "37": 2563430400.0,
+            "38": 2563430400.0,
+            "39": 2563430400.0,
+            "40": 2563430400.0,
+            "41": 2563430400.0,
+            "42": 2563430400.0,
+            "43": 2563430400.0,
+            "44": 2563430400.0,
+            "45": 2563430400.0,
+            "46": 2563430400.0,
+            "47": 2563430400.0,
+            "48": 2563430400.0,
+            "49": 2563430400.0,
+            "50": 2563430400.0,
+            "51": 2563430400.0,
+            "52": 2563430400.0,
+            "53": 2563430400.0,
+            "54": 2563430400.0,
+            "55": 2563430400.0,
+            "56": 2563430400.0,
+            "57": 2563430400.0,
+            "58": 2563430400.0,
+            "59": 2563430400.0,
+            "60": 2563430400.0,
+            "61": 2563430400.0,
+            "62": 2563430400.0,
+            "63": 2563430400.0,
+            "64": 2563430400.0,
+            "65": 2563430400.0,
+            "66": 2563430400.0,
+            "67": 2563430400.0,
+            "68": 2563430400.0,
+            "69": 2563430400.0,
+            "70": 2563430400.0,
+            "71": 2563430400.0,
+            "72": 2563430400.0,
+            "73": 2563430400.0,
+            "74": 2563430400.0,
+            "75": 2563430400.0,
+            "76": 2563430400.0,
+            "77": 2563430400.0,
+            "78": 2563430400.0,
+            "79": 2563430400.0,
+            "80": 2563430400.0,
+            "81": 2563430400.0,
+            "82": 2563430400.0,
+            "83": 2563430400.0,
+            "84": 2563430400.0,
+            "85": 2563430400.0,
+            "86": 2563430400.0,
+            "87": 2563430400.0,
+            "88": 2563430400.0,
+            "89": 2563430400.0,
+            "90": 2563430400.0,
+            "91": 2563430400.0,
+            "92": 2563430400.0,
+            "93": 2563430400.0,
+            "94": 2563430400.0,
+            "95": 2563430400.0,
+            "96": 2563430400.0,
+            "97": 2563430400.0,
+            "98": 2563430400.0,
+            "99": 2563430400.0,
+            "100": 2563430400.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 5.78359,
+            "2": 0.10731,
+            "3": 0.08283,
+            "4": 0.07992,
+            "5": 0.08439,
+            "6": 0.07969,
+            "7": 0.08163,
+            "8": 0.08089,
+            "9": 0.08141,
+            "10": 0.07975,
+            "11": 0.08161,
+            "12": 0.0805,
+            "13": 0.0818,
+            "14": 0.07991,
+            "15": 0.08157,
+            "16": 0.07954,
+            "17": 0.08164,
+            "18": 0.07926,
+            "19": 0.08125,
+            "20": 0.07966,
+            "21": 0.08124,
+            "22": 0.08103,
+            "23": 0.08196,
+            "24": 0.08021,
+            "25": 0.08231,
+            "26": 0.07972,
+            "27": 0.08528,
+            "28": 0.07953,
+            "29": 0.08123,
+            "30": 0.08056,
+            "31": 0.08212,
+            "32": 0.08047,
+            "33": 0.08698,
+            "34": 0.07962,
+            "35": 0.08139,
+            "36": 0.0794,
+            "37": 0.08328,
+            "38": 0.07999,
+            "39": 0.08718,
+            "40": 0.08108,
+            "41": 0.08156,
+            "42": 0.07929,
+            "43": 0.08201,
+            "44": 0.07973,
+            "45": 0.08159,
+            "46": 0.08471,
+            "47": 0.08541,
+            "48": 0.07975,
+            "49": 0.08192,
+            "50": 0.08031,
+            "51": 0.08385,
+            "52": 0.08324,
+            "53": 0.08018,
+            "54": 0.08375,
+            "55": 0.08221,
+            "56": 0.08137,
+            "57": 0.08577,
+            "58": 0.08166,
+            "59": 0.08204,
+            "60": 0.08143,
+            "61": 0.08073,
+            "62": 0.08115,
+            "63": 0.08107,
+            "64": 0.08084,
+            "65": 0.08278,
+            "66": 0.08197,
+            "67": 0.08122,
+            "68": 0.08061,
+            "69": 0.08097,
+            "70": 0.08354,
+            "71": 0.08073,
+            "72": 0.08394,
+            "73": 0.08209,
+            "74": 0.0827,
+            "75": 0.08731,
+            "76": 0.08195,
+            "77": 0.08148,
+            "78": 0.08314,
+            "79": 0.08109,
+            "80": 0.0807,
+            "81": 0.08051,
+            "82": 0.08191,
+            "83": 0.08724,
+            "84": 0.08176,
+            "85": 0.0832,
+            "86": 0.08166,
+            "87": 0.08365,
+            "88": 0.0816,
+            "89": 0.0817,
+            "90": 0.08103,
+            "91": 0.08096,
+            "92": 0.08046,
+            "93": 0.08298,
+            "94": 0.08019,
+            "95": 0.08128,
+            "96": 0.08237,
+            "97": 0.08167,
+            "98": 0.0806,
+            "99": 0.08319,
+            "100": 0.08202
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..df5117f4d8f
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.89631,
+            "2": 10.89416,
+            "3": 10.88786,
+            "4": 10.8914,
+            "5": 10.89154,
+            "6": 10.90001,
+            "7": 10.89184,
+            "8": 10.89886,
+            "9": 10.90208,
+            "10": 10.88361,
+            "11": 10.87817,
+            "12": 10.89334,
+            "13": 10.89814,
+            "14": 10.89242,
+            "15": 10.84803,
+            "16": 10.85398,
+            "17": 10.83097,
+            "18": 10.83991,
+            "19": 10.82801,
+            "20": 10.74824,
+            "21": 10.73496,
+            "22": 10.61719,
+            "23": 10.72621,
+            "24": 10.63178,
+            "25": 10.59309,
+            "26": 10.63369,
+            "27": 10.63304,
+            "28": 10.58264,
+            "29": 10.58594,
+            "30": 10.41204,
+            "31": 10.15899,
+            "32": 10.48366,
+            "33": 10.46706,
+            "34": 10.23811,
+            "35": 10.28189,
+            "36": 10.24056,
+            "37": 10.36219,
+            "38": 10.20309,
+            "39": 10.40454,
+            "40": 10.09271,
+            "41": 10.15835,
+            "42": 10.21933,
+            "43": 9.84358,
+            "44": 9.97303,
+            "45": 9.84194,
+            "46": 9.82017,
+            "47": 10.14969,
+            "48": 9.86023,
+            "49": 9.54235,
+            "50": 9.91343,
+            "51": 9.8545,
+            "52": 9.7393,
+            "53": 10.07426,
+            "54": 9.96913,
+            "55": 9.88574,
+            "56": 9.62438,
+            "57": 9.48229,
+            "58": 9.83484,
+            "59": 9.58731,
+            "60": 9.50243,
+            "61": 9.6934,
+            "62": 9.988,
+            "63": 9.39105,
+            "64": 9.78022,
+            "65": 8.94516,
+            "66": 9.70492,
+            "67": 9.37249,
+            "68": 9.78328,
+            "69": 9.79057,
+            "70": 9.74451,
+            "71": 9.62298,
+            "72": 9.58457,
+            "73": 9.50511,
+            "74": 8.94308,
+            "75": 9.42524,
+            "76": 9.07602,
+            "77": 10.06352,
+            "78": 9.72307,
+            "79": 9.37497,
+            "80": 9.40454,
+            "81": 9.4779,
+            "82": 9.69669,
+            "83": 9.30714,
+            "84": 9.41525,
+            "85": 9.61295,
+            "86": 9.07198,
+            "87": 9.58834,
+            "88": 9.7476,
+            "89": 9.59984,
+            "90": 9.81672,
+            "91": 9.33791,
+            "92": 9.35608,
+            "93": 9.07423,
+            "94": 8.83511,
+            "95": 9.51841,
+            "96": 9.52391,
+            "97": 9.30922,
+            "98": 9.66746,
+            "99": 8.88421,
+            "100": 9.39923
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1483.0,
+            "2": 1650.0,
+            "3": 1681.0,
+            "4": 1767.0,
+            "5": 1903.0,
+            "6": 1952.0,
+            "7": 1967.0,
+            "8": 1651.0,
+            "9": 1886.0,
+            "10": 1427.0,
+            "11": 1939.0,
+            "12": 1778.0,
+            "13": 1964.0,
+            "14": 1762.0,
+            "15": 1980.0,
+            "16": 1923.0,
+            "17": 1817.0,
+            "18": 1783.0,
+            "19": 1750.0,
+            "20": 1588.0,
+            "21": 1855.0,
+            "22": 1641.0,
+            "23": 2098.0,
+            "24": 1679.0,
+            "25": 1649.0,
+            "26": 1806.0,
+            "27": 1834.0,
+            "28": 2042.0,
+            "29": 2033.0,
+            "30": 1984.0,
+            "31": 1518.0,
+            "32": 1954.0,
+            "33": 2068.0,
+            "34": 1900.0,
+            "35": 1921.0,
+            "36": 1965.0,
+            "37": 2321.0,
+            "38": 2340.0,
+            "39": 2344.0,
+            "40": 2367.0,
+            "41": 2457.0,
+            "42": 2367.0,
+            "43": 2020.0,
+            "44": 2135.0,
+            "45": 2184.0,
+            "46": 2310.0,
+            "47": 2463.0,
+            "48": 2450.0,
+            "49": 2259.0,
+            "50": 2444.0,
+            "51": 2543.0,
+            "52": 2613.0,
+            "53": 2945.0,
+            "54": 2713.0,
+            "55": 2503.0,
+            "56": 2692.0,
+            "57": 2338.0,
+            "58": 2961.0,
+            "59": 2620.0,
+            "60": 2367.0,
+            "61": 2909.0,
+            "62": 2728.0,
+            "63": 2399.0,
+            "64": 2909.0,
+            "65": 2605.0,
+            "66": 2983.0,
+            "67": 2793.0,
+            "68": 2663.0,
+            "69": 2833.0,
+            "70": 3135.0,
+            "71": 2997.0,
+            "72": 2464.0,
+            "73": 3088.0,
+            "74": 1970.0,
+            "75": 2556.0,
+            "76": 3064.0,
+            "77": 3231.0,
+            "78": 3097.0,
+            "79": 3035.0,
+            "80": 3301.0,
+            "81": 3599.0,
+            "82": 3215.0,
+            "83": 2757.0,
+            "84": 3130.0,
+            "85": 3380.0,
+            "86": 2742.0,
+            "87": 3723.0,
+            "88": 3066.0,
+            "89": 3264.0,
+            "90": 3198.0,
+            "91": 2718.0,
+            "92": 3070.0,
+            "93": 2624.0,
+            "94": 3301.0,
+            "95": 3431.0,
+            "96": 3358.0,
+            "97": 3142.0,
+            "98": 3704.0,
+            "99": 3107.0,
+            "100": 3089.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1261848064.0,
+            "2": 1261848064.0,
+            "3": 1261848064.0,
+            "4": 1261848064.0,
+            "5": 1261848064.0,
+            "6": 1261848064.0,
+            "7": 1261848064.0,
+            "8": 1261848064.0,
+            "9": 1261848064.0,
+            "10": 1261848064.0,
+            "11": 1261848064.0,
+            "12": 1261848064.0,
+            "13": 1261848064.0,
+            "14": 1261848064.0,
+            "15": 1261848064.0,
+            "16": 1261848064.0,
+            "17": 1261848064.0,
+            "18": 1261848064.0,
+            "19": 1261848064.0,
+            "20": 1261848064.0,
+            "21": 1261848064.0,
+            "22": 1261848064.0,
+            "23": 1261848064.0,
+            "24": 1261848064.0,
+            "25": 1261848064.0,
+            "26": 1261848064.0,
+            "27": 1261848064.0,
+            "28": 1261848064.0,
+            "29": 1261848064.0,
+            "30": 1261848064.0,
+            "31": 1261848064.0,
+            "32": 1261848064.0,
+            "33": 1261848064.0,
+            "34": 1261848064.0,
+            "35": 1261848064.0,
+            "36": 1261848064.0,
+            "37": 1261848064.0,
+            "38": 1261848064.0,
+            "39": 1261848064.0,
+            "40": 1261848064.0,
+            "41": 1261848064.0,
+            "42": 1261848064.0,
+            "43": 1261848064.0,
+            "44": 1261848064.0,
+            "45": 1261848064.0,
+            "46": 1261848064.0,
+            "47": 1261848064.0,
+            "48": 1261848064.0,
+            "49": 1261848064.0,
+            "50": 1261848064.0,
+            "51": 1261848064.0,
+            "52": 1261848064.0,
+            "53": 1261848064.0,
+            "54": 1261848064.0,
+            "55": 1261848064.0,
+            "56": 1261848064.0,
+            "57": 1261848064.0,
+            "58": 1261848064.0,
+            "59": 1261848064.0,
+            "60": 1261848064.0,
+            "61": 1261848064.0,
+            "62": 1261848064.0,
+            "63": 1261848064.0,
+            "64": 1261848064.0,
+            "65": 1261848064.0,
+            "66": 1261848064.0,
+            "67": 1261848064.0,
+            "68": 1261848064.0,
+            "69": 1261848064.0,
+            "70": 1261848064.0,
+            "71": 1261848064.0,
+            "72": 1261848064.0,
+            "73": 1261848064.0,
+            "74": 1261848064.0,
+            "75": 1261848064.0,
+            "76": 1261848064.0,
+            "77": 1261848064.0,
+            "78": 1261848064.0,
+            "79": 1261848064.0,
+            "80": 1261848064.0,
+            "81": 1261848064.0,
+            "82": 1261848064.0,
+            "83": 1261848064.0,
+            "84": 1261848064.0,
+            "85": 1261848064.0,
+            "86": 1261848064.0,
+            "87": 1261848064.0,
+            "88": 1261848064.0,
+            "89": 1261848064.0,
+            "90": 1261848064.0,
+            "91": 1261848064.0,
+            "92": 1261848064.0,
+            "93": 1261848064.0,
+            "94": 1261848064.0,
+            "95": 1261848064.0,
+            "96": 1261848064.0,
+            "97": 1261848064.0,
+            "98": 1261848064.0,
+            "99": 1261848064.0,
+            "100": 1261848064.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2013852672.0,
+            "2": 2563430400.0,
+            "3": 2563430400.0,
+            "4": 2563430400.0,
+            "5": 2563430400.0,
+            "6": 2563430400.0,
+            "7": 2563430400.0,
+            "8": 2563430400.0,
+            "9": 2563430400.0,
+            "10": 2563430400.0,
+            "11": 2563430400.0,
+            "12": 2563430400.0,
+            "13": 2563430400.0,
+            "14": 2563430400.0,
+            "15": 2563430400.0,
+            "16": 2563430400.0,
+            "17": 2563430400.0,
+            "18": 2563430400.0,
+            "19": 2563430400.0,
+            "20": 2563430400.0,
+            "21": 2563430400.0,
+            "22": 2563430400.0,
+            "23": 2563430400.0,
+            "24": 2563430400.0,
+            "25": 2563430400.0,
+            "26": 2563430400.0,
+            "27": 2563430400.0,
+            "28": 2563430400.0,
+            "29": 2563430400.0,
+            "30": 2563430400.0,
+            "31": 2563430400.0,
+            "32": 2563430400.0,
+            "33": 2563430400.0,
+            "34": 2563430400.0,
+            "35": 2563430400.0,
+            "36": 2563430400.0,
+            "37": 2563430400.0,
+            "38": 2563430400.0,
+            "39": 2563430400.0,
+            "40": 2563430400.0,
+            "41": 2563430400.0,
+            "42": 2563430400.0,
+            "43": 2563430400.0,
+            "44": 2563430400.0,
+            "45": 2563430400.0,
+            "46": 2563430400.0,
+            "47": 2563430400.0,
+            "48": 2563430400.0,
+            "49": 2563430400.0,
+            "50": 2563430400.0,
+            "51": 2563430400.0,
+            "52": 2563430400.0,
+            "53": 2563430400.0,
+            "54": 2563430400.0,
+            "55": 2563430400.0,
+            "56": 2563430400.0,
+            "57": 2563430400.0,
+            "58": 2563430400.0,
+            "59": 2563430400.0,
+            "60": 2563430400.0,
+            "61": 2563430400.0,
+            "62": 2563430400.0,
+            "63": 2563430400.0,
+            "64": 2563430400.0,
+            "65": 2563430400.0,
+            "66": 2563430400.0,
+            "67": 2563430400.0,
+            "68": 2563430400.0,
+            "69": 2563430400.0,
+            "70": 2563430400.0,
+            "71": 2563430400.0,
+            "72": 2563430400.0,
+            "73": 2563430400.0,
+            "74": 2563430400.0,
+            "75": 2563430400.0,
+            "76": 2563430400.0,
+            "77": 2563430400.0,
+            "78": 2563430400.0,
+            "79": 2563430400.0,
+            "80": 2563430400.0,
+            "81": 2563430400.0,
+            "82": 2563430400.0,
+            "83": 2563430400.0,
+            "84": 2563430400.0,
+            "85": 2563430400.0,
+            "86": 2563430400.0,
+            "87": 2563430400.0,
+            "88": 2563430400.0,
+            "89": 2563430400.0,
+            "90": 2563430400.0,
+            "91": 2563430400.0,
+            "92": 2563430400.0,
+            "93": 2563430400.0,
+            "94": 2563430400.0,
+            "95": 2563430400.0,
+            "96": 2563430400.0,
+            "97": 2563430400.0,
+            "98": 2563430400.0,
+            "99": 2563430400.0,
+            "100": 2563430400.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 6.36321,
+            "2": 0.1218,
+            "3": 0.11132,
+            "4": 0.10707,
+            "5": 0.0969,
+            "6": 0.09387,
+            "7": 0.09166,
+            "8": 0.09482,
+            "9": 0.09368,
+            "10": 0.09371,
+            "11": 0.0914,
+            "12": 0.09315,
+            "13": 0.09323,
+            "14": 0.09407,
+            "15": 0.09341,
+            "16": 0.09525,
+            "17": 0.09338,
+            "18": 0.09247,
+            "19": 0.09648,
+            "20": 0.09425,
+            "21": 0.09329,
+            "22": 0.09356,
+            "23": 0.09379,
+            "24": 0.09405,
+            "25": 0.0935,
+            "26": 0.09238,
+            "27": 0.09612,
+            "28": 0.09315,
+            "29": 0.09297,
+            "30": 0.09342,
+            "31": 0.09294,
+            "32": 0.09287,
+            "33": 0.09256,
+            "34": 0.09461,
+            "35": 0.09355,
+            "36": 0.09517,
+            "37": 0.09434,
+            "38": 0.0956,
+            "39": 0.09435,
+            "40": 0.09568,
+            "41": 0.09615,
+            "42": 0.09244,
+            "43": 0.09364,
+            "44": 0.09376,
+            "45": 0.09258,
+            "46": 0.09268,
+            "47": 0.09255,
+            "48": 0.09424,
+            "49": 0.09573,
+            "50": 0.09436,
+            "51": 0.0945,
+            "52": 0.09894,
+            "53": 0.09918,
+            "54": 0.09823,
+            "55": 0.09863,
+            "56": 0.09834,
+            "57": 0.09709,
+            "58": 0.09303,
+            "59": 0.09404,
+            "60": 0.09192,
+            "61": 0.09198,
+            "62": 0.09274,
+            "63": 0.09166,
+            "64": 0.09147,
+            "65": 0.09327,
+            "66": 0.11015,
+            "67": 0.09684,
+            "68": 0.09642,
+            "69": 0.09562,
+            "70": 0.0924,
+            "71": 0.09384,
+            "72": 0.09189,
+            "73": 0.09372,
+            "74": 0.09193,
+            "75": 0.09409,
+            "76": 0.09252,
+            "77": 0.09275,
+            "78": 0.09475,
+            "79": 0.0945,
+            "80": 0.10107,
+            "81": 0.09197,
+            "82": 0.09204,
+            "83": 0.09353,
+            "84": 0.09326,
+            "85": 0.09194,
+            "86": 0.1029,
+            "87": 0.09285,
+            "88": 0.09168,
+            "89": 0.09478,
+            "90": 0.09254,
+            "91": 0.0921,
+            "92": 0.09246,
+            "93": 0.09207,
+            "94": 0.09324,
+            "95": 0.09431,
+            "96": 0.09195,
+            "97": 0.09285,
+            "98": 0.09175,
+            "99": 0.09153,
+            "100": 0.11457
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
new file mode 100644
index 00000000000..2fa4188369a
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.8401,
+            "2": 10.83566,
+            "3": 10.82993,
+            "4": 10.8173,
+            "5": 10.84032,
+            "6": 10.87262,
+            "7": 10.83467,
+            "8": 10.84031,
+            "9": 10.84361,
+            "10": 10.81341,
+            "11": 10.85023,
+            "12": 10.84316,
+            "13": 10.86604,
+            "14": 10.86311,
+            "15": 10.80278,
+            "16": 10.79645,
+            "17": 10.77627,
+            "18": 10.80147,
+            "19": 10.79392,
+            "20": 10.70496,
+            "21": 10.68149,
+            "22": 10.56314,
+            "23": 10.70138,
+            "24": 10.57935,
+            "25": 10.53846,
+            "26": 10.60617,
+            "27": 10.5921,
+            "28": 10.56154,
+            "29": 10.57665,
+            "30": 10.35517,
+            "31": 10.1277,
+            "32": 10.46372,
+            "33": 10.45444,
+            "34": 10.22446,
+            "35": 10.27147,
+            "36": 10.22183,
+            "37": 10.33944,
+            "38": 10.18637,
+            "39": 10.39327,
+            "40": 10.08044,
+            "41": 10.13794,
+            "42": 10.20012,
+            "43": 9.8379,
+            "44": 9.9433,
+            "45": 9.82292,
+            "46": 9.8231,
+            "47": 10.13356,
+            "48": 9.84151,
+            "49": 9.52105,
+            "50": 9.90113,
+            "51": 9.83465,
+            "52": 9.73175,
+            "53": 10.04772,
+            "54": 9.93858,
+            "55": 9.86422,
+            "56": 9.61259,
+            "57": 9.46816,
+            "58": 9.81221,
+            "59": 9.57171,
+            "60": 9.48029,
+            "61": 9.67964,
+            "62": 9.96739,
+            "63": 9.35353,
+            "64": 9.75732,
+            "65": 8.93749,
+            "66": 9.68132,
+            "67": 9.357,
+            "68": 9.76807,
+            "69": 9.77288,
+            "70": 9.71025,
+            "71": 9.60021,
+            "72": 9.56674,
+            "73": 9.47644,
+            "74": 8.93192,
+            "75": 9.40879,
+            "76": 9.06885,
+            "77": 10.04691,
+            "78": 9.70976,
+            "79": 9.35666,
+            "80": 9.39077,
+            "81": 9.46573,
+            "82": 9.6803,
+            "83": 9.29215,
+            "84": 9.40239,
+            "85": 9.59743,
+            "86": 9.06112,
+            "87": 9.57954,
+            "88": 9.73247,
+            "89": 9.58838,
+            "90": 9.80386,
+            "91": 9.32104,
+            "92": 9.35012,
+            "93": 9.06314,
+            "94": 8.82007,
+            "95": 9.50565,
+            "96": 9.51099,
+            "97": 9.29311,
+            "98": 9.65573,
+            "99": 8.87504,
+            "100": 9.38812
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1670.0,
+            "2": 1691.0,
+            "3": 1630.0,
+            "4": 1805.0,
+            "5": 1970.0,
+            "6": 1901.0,
+            "7": 1815.0,
+            "8": 1592.0,
+            "9": 1968.0,
+            "10": 1436.0,
+            "11": 1923.0,
+            "12": 1867.0,
+            "13": 1888.0,
+            "14": 1807.0,
+            "15": 1918.0,
+            "16": 1922.0,
+            "17": 1774.0,
+            "18": 1735.0,
+            "19": 1886.0,
+            "20": 1786.0,
+            "21": 2020.0,
+            "22": 1685.0,
+            "23": 2112.0,
+            "24": 1657.0,
+            "25": 1610.0,
+            "26": 1815.0,
+            "27": 1880.0,
+            "28": 2025.0,
+            "29": 1975.0,
+            "30": 2039.0,
+            "31": 1713.0,
+            "32": 1926.0,
+            "33": 2163.0,
+            "34": 1894.0,
+            "35": 2001.0,
+            "36": 1963.0,
+            "37": 2401.0,
+            "38": 2324.0,
+            "39": 2351.0,
+            "40": 2321.0,
+            "41": 2266.0,
+            "42": 2317.0,
+            "43": 1999.0,
+            "44": 2133.0,
+            "45": 2205.0,
+            "46": 2324.0,
+            "47": 2463.0,
+            "48": 2447.0,
+            "49": 2237.0,
+            "50": 2365.0,
+            "51": 2534.0,
+            "52": 2604.0,
+            "53": 2995.0,
+            "54": 2699.0,
+            "55": 2489.0,
+            "56": 2680.0,
+            "57": 2285.0,
+            "58": 2976.0,
+            "59": 2816.0,
+            "60": 2508.0,
+            "61": 3075.0,
+            "62": 2710.0,
+            "63": 2574.0,
+            "64": 3027.0,
+            "65": 2719.0,
+            "66": 3182.0,
+            "67": 2770.0,
+            "68": 2875.0,
+            "69": 2961.0,
+            "70": 3241.0,
+            "71": 2859.0,
+            "72": 2495.0,
+            "73": 2972.0,
+            "74": 1989.0,
+            "75": 2643.0,
+            "76": 3012.0,
+            "77": 3398.0,
+            "78": 3413.0,
+            "79": 3272.0,
+            "80": 3368.0,
+            "81": 3656.0,
+            "82": 3228.0,
+            "83": 2772.0,
+            "84": 3146.0,
+            "85": 3336.0,
+            "86": 2738.0,
+            "87": 3886.0,
+            "88": 3044.0,
+            "89": 3429.0,
+            "90": 2961.0,
+            "91": 2952.0,
+            "92": 3239.0,
+            "93": 2791.0,
+            "94": 3583.0,
+            "95": 3533.0,
+            "96": 3530.0,
+            "97": 3241.0,
+            "98": 3680.0,
+            "99": 3320.0,
+            "100": 3432.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1230390272.0,
+            "2": 1230390272.0,
+            "3": 1230390272.0,
+            "4": 1230390272.0,
+            "5": 1230390272.0,
+            "6": 1230390272.0,
+            "7": 1230390272.0,
+            "8": 1230390272.0,
+            "9": 1230390272.0,
+            "10": 1230390272.0,
+            "11": 1230390272.0,
+            "12": 1230390272.0,
+            "13": 1230390272.0,
+            "14": 1230390272.0,
+            "15": 1230390272.0,
+            "16": 1230390272.0,
+            "17": 1230390272.0,
+            "18": 1230390272.0,
+            "19": 1230390272.0,
+            "20": 1230390272.0,
+            "21": 1230390272.0,
+            "22": 1230390272.0,
+            "23": 1230390272.0,
+            "24": 1230390272.0,
+            "25": 1230390272.0,
+            "26": 1230390272.0,
+            "27": 1230390272.0,
+            "28": 1230390272.0,
+            "29": 1230390272.0,
+            "30": 1230390272.0,
+            "31": 1230390272.0,
+            "32": 1230390272.0,
+            "33": 1230390272.0,
+            "34": 1230390272.0,
+            "35": 1230390272.0,
+            "36": 1230390272.0,
+            "37": 1230390272.0,
+            "38": 1230390272.0,
+            "39": 1230390272.0,
+            "40": 1230390272.0,
+            "41": 1230390272.0,
+            "42": 1230390272.0,
+            "43": 1230390272.0,
+            "44": 1230390272.0,
+            "45": 1230390272.0,
+            "46": 1230390272.0,
+            "47": 1230390272.0,
+            "48": 1230390272.0,
+            "49": 1230390272.0,
+            "50": 1230390272.0,
+            "51": 1230390272.0,
+            "52": 1230390272.0,
+            "53": 1230390272.0,
+            "54": 1230390272.0,
+            "55": 1230390272.0,
+            "56": 1230390272.0,
+            "57": 1230390272.0,
+            "58": 1230390272.0,
+            "59": 1230390272.0,
+            "60": 1230390272.0,
+            "61": 1230390272.0,
+            "62": 1230390272.0,
+            "63": 1230390272.0,
+            "64": 1230390272.0,
+            "65": 1230390272.0,
+            "66": 1230390272.0,
+            "67": 1230390272.0,
+            "68": 1230390272.0,
+            "69": 1230390272.0,
+            "70": 1230390272.0,
+            "71": 1230390272.0,
+            "72": 1230390272.0,
+            "73": 1230390272.0,
+            "74": 1230390272.0,
+            "75": 1230390272.0,
+            "76": 1230390272.0,
+            "77": 1230390272.0,
+            "78": 1230390272.0,
+            "79": 1230390272.0,
+            "80": 1230390272.0,
+            "81": 1230390272.0,
+            "82": 1230390272.0,
+            "83": 1230390272.0,
+            "84": 1230390272.0,
+            "85": 1230390272.0,
+            "86": 1230390272.0,
+            "87": 1230390272.0,
+            "88": 1230390272.0,
+            "89": 1230390272.0,
+            "90": 1230390272.0,
+            "91": 1230390272.0,
+            "92": 1230390272.0,
+            "93": 1230390272.0,
+            "94": 1230390272.0,
+            "95": 1230390272.0,
+            "96": 1230390272.0,
+            "97": 1230390272.0,
+            "98": 1230390272.0,
+            "99": 1230390272.0,
+            "100": 1230390272.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1984492032.0,
+            "2": 2531972608.0,
+            "3": 2531972608.0,
+            "4": 2531972608.0,
+            "5": 2531972608.0,
+            "6": 2531972608.0,
+            "7": 2531972608.0,
+            "8": 2531972608.0,
+            "9": 2531972608.0,
+            "10": 2531972608.0,
+            "11": 2531972608.0,
+            "12": 2531972608.0,
+            "13": 2531972608.0,
+            "14": 2531972608.0,
+            "15": 2531972608.0,
+            "16": 2531972608.0,
+            "17": 2531972608.0,
+            "18": 2531972608.0,
+            "19": 2531972608.0,
+            "20": 2531972608.0,
+            "21": 2531972608.0,
+            "22": 2531972608.0,
+            "23": 2531972608.0,
+            "24": 2531972608.0,
+            "25": 2531972608.0,
+            "26": 2531972608.0,
+            "27": 2531972608.0,
+            "28": 2531972608.0,
+            "29": 2531972608.0,
+            "30": 2531972608.0,
+            "31": 2531972608.0,
+            "32": 2531972608.0,
+            "33": 2531972608.0,
+            "34": 2531972608.0,
+            "35": 2531972608.0,
+            "36": 2531972608.0,
+            "37": 2531972608.0,
+            "38": 2531972608.0,
+            "39": 2531972608.0,
+            "40": 2531972608.0,
+            "41": 2531972608.0,
+            "42": 2531972608.0,
+            "43": 2531972608.0,
+            "44": 2531972608.0,
+            "45": 2531972608.0,
+            "46": 2531972608.0,
+            "47": 2531972608.0,
+            "48": 2531972608.0,
+            "49": 2531972608.0,
+            "50": 2531972608.0,
+            "51": 2531972608.0,
+            "52": 2531972608.0,
+            "53": 2531972608.0,
+            "54": 2531972608.0,
+            "55": 2531972608.0,
+            "56": 2531972608.0,
+            "57": 2531972608.0,
+            "58": 2531972608.0,
+            "59": 2531972608.0,
+            "60": 2531972608.0,
+            "61": 2531972608.0,
+            "62": 2531972608.0,
+            "63": 2531972608.0,
+            "64": 2531972608.0,
+            "65": 2531972608.0,
+            "66": 2531972608.0,
+            "67": 2531972608.0,
+            "68": 2531972608.0,
+            "69": 2531972608.0,
+            "70": 2531972608.0,
+            "71": 2531972608.0,
+            "72": 2531972608.0,
+            "73": 2531972608.0,
+            "74": 2531972608.0,
+            "75": 2531972608.0,
+            "76": 2531972608.0,
+            "77": 2531972608.0,
+            "78": 2531972608.0,
+            "79": 2531972608.0,
+            "80": 2531972608.0,
+            "81": 2531972608.0,
+            "82": 2531972608.0,
+            "83": 2531972608.0,
+            "84": 2531972608.0,
+            "85": 2531972608.0,
+            "86": 2531972608.0,
+            "87": 2531972608.0,
+            "88": 2531972608.0,
+            "89": 2531972608.0,
+            "90": 2531972608.0,
+            "91": 2531972608.0,
+            "92": 2531972608.0,
+            "93": 2531972608.0,
+            "94": 2531972608.0,
+            "95": 2531972608.0,
+            "96": 2531972608.0,
+            "97": 2531972608.0,
+            "98": 2531972608.0,
+            "99": 2531972608.0,
+            "100": 2531972608.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 6.66979,
+            "2": 0.15375,
+            "3": 0.13471,
+            "4": 0.1451,
+            "5": 0.13243,
+            "6": 0.13226,
+            "7": 0.14437,
+            "8": 0.13751,
+            "9": 0.1427,
+            "10": 0.14549,
+            "11": 0.14547,
+            "12": 0.14682,
+            "13": 0.40877,
+            "14": 0.1477,
+            "15": 0.15085,
+            "16": 0.14383,
+            "17": 0.15106,
+            "18": 0.14683,
+            "19": 0.14809,
+            "20": 0.1535,
+            "21": 0.14869,
+            "22": 0.14139,
+            "23": 0.16201,
+            "24": 0.15437,
+            "25": 0.14424,
+            "26": 0.15046,
+            "27": 0.14191,
+            "28": 0.14273,
+            "29": 0.14227,
+            "30": 0.14587,
+            "31": 0.14729,
+            "32": 0.14529,
+            "33": 0.14194,
+            "34": 0.14753,
+            "35": 0.14364,
+            "36": 0.15173,
+            "37": 0.15588,
+            "38": 0.17947,
+            "39": 0.16014,
+            "40": 0.16333,
+            "41": 0.15457,
+            "42": 0.17017,
+            "43": 0.13231,
+            "44": 0.13057,
+            "45": 0.13024,
+            "46": 0.1296,
+            "47": 0.13068,
+            "48": 0.12962,
+            "49": 0.13029,
+            "50": 0.13004,
+            "51": 0.13664,
+            "52": 0.1321,
+            "53": 0.13024,
+            "54": 0.16102,
+            "55": 0.15998,
+            "56": 0.16599,
+            "57": 0.1739,
+            "58": 0.1617,
+            "59": 0.16149,
+            "60": 0.15536,
+            "61": 0.19483,
+            "62": 0.18185,
+            "63": 0.17713,
+            "64": 0.20241,
+            "65": 0.2339,
+            "66": 0.19396,
+            "67": 0.18469,
+            "68": 0.13408,
+            "69": 0.13102,
+            "70": 0.13245,
+            "71": 0.1302,
+            "72": 0.13294,
+            "73": 0.13181,
+            "74": 0.13273,
+            "75": 0.13082,
+            "76": 0.13319,
+            "77": 0.13089,
+            "78": 0.13266,
+            "79": 0.13146,
+            "80": 0.13271,
+            "81": 0.13064,
+            "82": 0.133,
+            "83": 0.1325,
+            "84": 0.13269,
+            "85": 0.13105,
+            "86": 0.13314,
+            "87": 0.13059,
+            "88": 0.13244,
+            "89": 0.13183,
+            "90": 0.13294,
+            "91": 0.13281,
+            "92": 0.13352,
+            "93": 0.13201,
+            "94": 0.1343,
+            "95": 0.13224,
+            "96": 0.13339,
+            "97": 0.13189,
+            "98": 0.1351,
+            "99": 0.13191,
+            "100": 0.13277
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
new file mode 100644
index 00000000000..a6e28752239
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.8401,
+            "2": 10.83566,
+            "3": 10.82993,
+            "4": 10.8173,
+            "5": 10.84032,
+            "6": 10.87262,
+            "7": 10.83467,
+            "8": 10.84031,
+            "9": 10.84361,
+            "10": 10.81341,
+            "11": 10.85023,
+            "12": 10.84316,
+            "13": 10.86604,
+            "14": 10.86311,
+            "15": 10.80278,
+            "16": 10.79645,
+            "17": 10.77627,
+            "18": 10.80147,
+            "19": 10.79392,
+            "20": 10.70496,
+            "21": 10.68149,
+            "22": 10.56314,
+            "23": 10.70138,
+            "24": 10.57935,
+            "25": 10.53846,
+            "26": 10.60617,
+            "27": 10.5921,
+            "28": 10.56154,
+            "29": 10.57665,
+            "30": 10.35517,
+            "31": 10.1277,
+            "32": 10.46372,
+            "33": 10.45444,
+            "34": 10.22446,
+            "35": 10.27147,
+            "36": 10.22183,
+            "37": 10.33944,
+            "38": 10.18637,
+            "39": 10.39327,
+            "40": 10.08044,
+            "41": 10.13794,
+            "42": 10.20012,
+            "43": 9.8379,
+            "44": 9.9433,
+            "45": 9.82292,
+            "46": 9.8231,
+            "47": 10.13356,
+            "48": 9.84151,
+            "49": 9.52105,
+            "50": 9.90113,
+            "51": 9.83465,
+            "52": 9.73175,
+            "53": 10.04772,
+            "54": 9.93858,
+            "55": 9.86422,
+            "56": 9.61259,
+            "57": 9.46816,
+            "58": 9.81221,
+            "59": 9.57171,
+            "60": 9.48029,
+            "61": 9.67964,
+            "62": 9.96739,
+            "63": 9.35353,
+            "64": 9.75732,
+            "65": 8.93749,
+            "66": 9.68132,
+            "67": 9.357,
+            "68": 9.76807,
+            "69": 9.77288,
+            "70": 9.71025,
+            "71": 9.60021,
+            "72": 9.56674,
+            "73": 9.47644,
+            "74": 8.93192,
+            "75": 9.40879,
+            "76": 9.06885,
+            "77": 10.04691,
+            "78": 9.70976,
+            "79": 9.35666,
+            "80": 9.39077,
+            "81": 9.46573,
+            "82": 9.6803,
+            "83": 9.29215,
+            "84": 9.40239,
+            "85": 9.59743,
+            "86": 9.06112,
+            "87": 9.57954,
+            "88": 9.73247,
+            "89": 9.58838,
+            "90": 9.80386,
+            "91": 9.32104,
+            "92": 9.35012,
+            "93": 9.06314,
+            "94": 8.82007,
+            "95": 9.50565,
+            "96": 9.51099,
+            "97": 9.29311,
+            "98": 9.65573,
+            "99": 8.87504,
+            "100": 9.38812
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1670.0,
+            "2": 1691.0,
+            "3": 1630.0,
+            "4": 1805.0,
+            "5": 1970.0,
+            "6": 1901.0,
+            "7": 1815.0,
+            "8": 1592.0,
+            "9": 1968.0,
+            "10": 1436.0,
+            "11": 1923.0,
+            "12": 1867.0,
+            "13": 1888.0,
+            "14": 1807.0,
+            "15": 1918.0,
+            "16": 1922.0,
+            "17": 1774.0,
+            "18": 1735.0,
+            "19": 1886.0,
+            "20": 1786.0,
+            "21": 2020.0,
+            "22": 1685.0,
+            "23": 2112.0,
+            "24": 1657.0,
+            "25": 1610.0,
+            "26": 1815.0,
+            "27": 1880.0,
+            "28": 2025.0,
+            "29": 1975.0,
+            "30": 2039.0,
+            "31": 1713.0,
+            "32": 1926.0,
+            "33": 2163.0,
+            "34": 1894.0,
+            "35": 2001.0,
+            "36": 1963.0,
+            "37": 2401.0,
+            "38": 2324.0,
+            "39": 2351.0,
+            "40": 2321.0,
+            "41": 2266.0,
+            "42": 2317.0,
+            "43": 1999.0,
+            "44": 2133.0,
+            "45": 2205.0,
+            "46": 2324.0,
+            "47": 2463.0,
+            "48": 2447.0,
+            "49": 2237.0,
+            "50": 2365.0,
+            "51": 2534.0,
+            "52": 2604.0,
+            "53": 2995.0,
+            "54": 2699.0,
+            "55": 2489.0,
+            "56": 2680.0,
+            "57": 2285.0,
+            "58": 2976.0,
+            "59": 2816.0,
+            "60": 2508.0,
+            "61": 3075.0,
+            "62": 2710.0,
+            "63": 2574.0,
+            "64": 3027.0,
+            "65": 2719.0,
+            "66": 3182.0,
+            "67": 2770.0,
+            "68": 2875.0,
+            "69": 2961.0,
+            "70": 3241.0,
+            "71": 2859.0,
+            "72": 2495.0,
+            "73": 2972.0,
+            "74": 1989.0,
+            "75": 2643.0,
+            "76": 3012.0,
+            "77": 3398.0,
+            "78": 3413.0,
+            "79": 3272.0,
+            "80": 3368.0,
+            "81": 3656.0,
+            "82": 3228.0,
+            "83": 2772.0,
+            "84": 3146.0,
+            "85": 3336.0,
+            "86": 2738.0,
+            "87": 3886.0,
+            "88": 3044.0,
+            "89": 3429.0,
+            "90": 2961.0,
+            "91": 2952.0,
+            "92": 3239.0,
+            "93": 2791.0,
+            "94": 3583.0,
+            "95": 3533.0,
+            "96": 3530.0,
+            "97": 3241.0,
+            "98": 3680.0,
+            "99": 3320.0,
+            "100": 3432.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1230390272.0,
+            "2": 1230390272.0,
+            "3": 1230390272.0,
+            "4": 1230390272.0,
+            "5": 1230390272.0,
+            "6": 1230390272.0,
+            "7": 1230390272.0,
+            "8": 1230390272.0,
+            "9": 1230390272.0,
+            "10": 1230390272.0,
+            "11": 1230390272.0,
+            "12": 1230390272.0,
+            "13": 1230390272.0,
+            "14": 1230390272.0,
+            "15": 1230390272.0,
+            "16": 1230390272.0,
+            "17": 1230390272.0,
+            "18": 1230390272.0,
+            "19": 1230390272.0,
+            "20": 1230390272.0,
+            "21": 1230390272.0,
+            "22": 1230390272.0,
+            "23": 1230390272.0,
+            "24": 1230390272.0,
+            "25": 1230390272.0,
+            "26": 1230390272.0,
+            "27": 1230390272.0,
+            "28": 1230390272.0,
+            "29": 1230390272.0,
+            "30": 1230390272.0,
+            "31": 1230390272.0,
+            "32": 1230390272.0,
+            "33": 1230390272.0,
+            "34": 1230390272.0,
+            "35": 1230390272.0,
+            "36": 1230390272.0,
+            "37": 1230390272.0,
+            "38": 1230390272.0,
+            "39": 1230390272.0,
+            "40": 1230390272.0,
+            "41": 1230390272.0,
+            "42": 1230390272.0,
+            "43": 1230390272.0,
+            "44": 1230390272.0,
+            "45": 1230390272.0,
+            "46": 1230390272.0,
+            "47": 1230390272.0,
+            "48": 1230390272.0,
+            "49": 1230390272.0,
+            "50": 1230390272.0,
+            "51": 1230390272.0,
+            "52": 1230390272.0,
+            "53": 1230390272.0,
+            "54": 1230390272.0,
+            "55": 1230390272.0,
+            "56": 1230390272.0,
+            "57": 1230390272.0,
+            "58": 1230390272.0,
+            "59": 1230390272.0,
+            "60": 1230390272.0,
+            "61": 1230390272.0,
+            "62": 1230390272.0,
+            "63": 1230390272.0,
+            "64": 1230390272.0,
+            "65": 1230390272.0,
+            "66": 1230390272.0,
+            "67": 1230390272.0,
+            "68": 1230390272.0,
+            "69": 1230390272.0,
+            "70": 1230390272.0,
+            "71": 1230390272.0,
+            "72": 1230390272.0,
+            "73": 1230390272.0,
+            "74": 1230390272.0,
+            "75": 1230390272.0,
+            "76": 1230390272.0,
+            "77": 1230390272.0,
+            "78": 1230390272.0,
+            "79": 1230390272.0,
+            "80": 1230390272.0,
+            "81": 1230390272.0,
+            "82": 1230390272.0,
+            "83": 1230390272.0,
+            "84": 1230390272.0,
+            "85": 1230390272.0,
+            "86": 1230390272.0,
+            "87": 1230390272.0,
+            "88": 1230390272.0,
+            "89": 1230390272.0,
+            "90": 1230390272.0,
+            "91": 1230390272.0,
+            "92": 1230390272.0,
+            "93": 1230390272.0,
+            "94": 1230390272.0,
+            "95": 1230390272.0,
+            "96": 1230390272.0,
+            "97": 1230390272.0,
+            "98": 1230390272.0,
+            "99": 1230390272.0,
+            "100": 1230390272.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1984492032.0,
+            "2": 2531972608.0,
+            "3": 2531972608.0,
+            "4": 2531972608.0,
+            "5": 2531972608.0,
+            "6": 2531972608.0,
+            "7": 2531972608.0,
+            "8": 2531972608.0,
+            "9": 2531972608.0,
+            "10": 2531972608.0,
+            "11": 2531972608.0,
+            "12": 2531972608.0,
+            "13": 2531972608.0,
+            "14": 2531972608.0,
+            "15": 2531972608.0,
+            "16": 2531972608.0,
+            "17": 2531972608.0,
+            "18": 2531972608.0,
+            "19": 2531972608.0,
+            "20": 2531972608.0,
+            "21": 2531972608.0,
+            "22": 2531972608.0,
+            "23": 2531972608.0,
+            "24": 2531972608.0,
+            "25": 2531972608.0,
+            "26": 2531972608.0,
+            "27": 2531972608.0,
+            "28": 2531972608.0,
+            "29": 2531972608.0,
+            "30": 2531972608.0,
+            "31": 2531972608.0,
+            "32": 2531972608.0,
+            "33": 2531972608.0,
+            "34": 2531972608.0,
+            "35": 2531972608.0,
+            "36": 2531972608.0,
+            "37": 2531972608.0,
+            "38": 2531972608.0,
+            "39": 2531972608.0,
+            "40": 2531972608.0,
+            "41": 2531972608.0,
+            "42": 2531972608.0,
+            "43": 2531972608.0,
+            "44": 2531972608.0,
+            "45": 2531972608.0,
+            "46": 2531972608.0,
+            "47": 2531972608.0,
+            "48": 2531972608.0,
+            "49": 2531972608.0,
+            "50": 2531972608.0,
+            "51": 2531972608.0,
+            "52": 2531972608.0,
+            "53": 2531972608.0,
+            "54": 2531972608.0,
+            "55": 2531972608.0,
+            "56": 2531972608.0,
+            "57": 2531972608.0,
+            "58": 2531972608.0,
+            "59": 2531972608.0,
+            "60": 2531972608.0,
+            "61": 2531972608.0,
+            "62": 2531972608.0,
+            "63": 2531972608.0,
+            "64": 2531972608.0,
+            "65": 2531972608.0,
+            "66": 2531972608.0,
+            "67": 2531972608.0,
+            "68": 2531972608.0,
+            "69": 2531972608.0,
+            "70": 2531972608.0,
+            "71": 2531972608.0,
+            "72": 2531972608.0,
+            "73": 2531972608.0,
+            "74": 2531972608.0,
+            "75": 2531972608.0,
+            "76": 2531972608.0,
+            "77": 2531972608.0,
+            "78": 2531972608.0,
+            "79": 2531972608.0,
+            "80": 2531972608.0,
+            "81": 2531972608.0,
+            "82": 2531972608.0,
+            "83": 2531972608.0,
+            "84": 2531972608.0,
+            "85": 2531972608.0,
+            "86": 2531972608.0,
+            "87": 2531972608.0,
+            "88": 2531972608.0,
+            "89": 2531972608.0,
+            "90": 2531972608.0,
+            "91": 2531972608.0,
+            "92": 2531972608.0,
+            "93": 2531972608.0,
+            "94": 2531972608.0,
+            "95": 2531972608.0,
+            "96": 2531972608.0,
+            "97": 2531972608.0,
+            "98": 2531972608.0,
+            "99": 2531972608.0,
+            "100": 2531972608.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 6.69156,
+            "2": 0.15851,
+            "3": 0.15939,
+            "4": 0.14587,
+            "5": 0.13996,
+            "6": 0.14246,
+            "7": 0.14168,
+            "8": 0.13947,
+            "9": 0.1406,
+            "10": 0.13629,
+            "11": 0.38438,
+            "12": 0.13502,
+            "13": 0.13606,
+            "14": 0.14033,
+            "15": 0.13443,
+            "16": 0.13179,
+            "17": 0.13378,
+            "18": 0.13167,
+            "19": 0.13416,
+            "20": 0.134,
+            "21": 0.13338,
+            "22": 0.13341,
+            "23": 0.13463,
+            "24": 0.13194,
+            "25": 0.13343,
+            "26": 0.13151,
+            "27": 0.13224,
+            "28": 0.13211,
+            "29": 0.13154,
+            "30": 0.13114,
+            "31": 0.13127,
+            "32": 0.13156,
+            "33": 0.13112,
+            "34": 0.13133,
+            "35": 0.13254,
+            "36": 0.1314,
+            "37": 0.13112,
+            "38": 0.13159,
+            "39": 0.13294,
+            "40": 0.1325,
+            "41": 0.1311,
+            "42": 0.13177,
+            "43": 0.13171,
+            "44": 0.13171,
+            "45": 0.1308,
+            "46": 0.13012,
+            "47": 0.13104,
+            "48": 0.13108,
+            "49": 0.13129,
+            "50": 0.13155,
+            "51": 0.15273,
+            "52": 0.1324,
+            "53": 0.13236,
+            "54": 0.13244,
+            "55": 0.13198,
+            "56": 0.1336,
+            "57": 0.13148,
+            "58": 0.13225,
+            "59": 0.13123,
+            "60": 0.13225,
+            "61": 0.13307,
+            "62": 0.13259,
+            "63": 0.13191,
+            "64": 0.13297,
+            "65": 0.13243,
+            "66": 0.13236,
+            "67": 0.1309,
+            "68": 0.13226,
+            "69": 0.13072,
+            "70": 0.13171,
+            "71": 0.13137,
+            "72": 0.13229,
+            "73": 0.13521,
+            "74": 0.13296,
+            "75": 0.13526,
+            "76": 0.13228,
+            "77": 0.13205,
+            "78": 0.13248,
+            "79": 0.13355,
+            "80": 0.13311,
+            "81": 0.13269,
+            "82": 0.13199,
+            "83": 0.13576,
+            "84": 0.13205,
+            "85": 0.13411,
+            "86": 0.13176,
+            "87": 0.13273,
+            "88": 0.13166,
+            "89": 0.13262,
+            "90": 0.13138,
+            "91": 0.13261,
+            "92": 0.13197,
+            "93": 0.13258,
+            "94": 0.13132,
+            "95": 0.13295,
+            "96": 0.1307,
+            "97": 0.13291,
+            "98": 0.13163,
+            "99": 0.13281,
+            "100": 0.13201
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
index 8f2ce322a3e..8056e7174f0 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
@@ -2,141 +2,536 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 10.85787,
+            "2": 10.87336,
+            "3": 10.86821,
+            "4": 10.87255,
             "5": 10.87398,
+            "6": 10.89631,
+            "7": 10.86379,
+            "8": 10.87834,
+            "9": 10.87399,
             "10": 10.83714,
+            "11": 10.86988,
+            "12": 10.85947,
+            "13": 10.87777,
+            "14": 10.87924,
             "15": 10.81888,
+            "16": 10.83058,
+            "17": 10.78684,
+            "18": 10.80146,
+            "19": 10.79775,
             "20": 10.71155,
+            "21": 10.6865,
+            "22": 10.55277,
+            "23": 10.7014,
+            "24": 10.58527,
             "25": 10.52658,
+            "26": 10.58299,
+            "27": 10.59487,
+            "28": 10.54787,
+            "29": 10.55928,
             "30": 10.32818,
+            "31": 10.08272,
+            "32": 10.44699,
+            "33": 10.42755,
+            "34": 10.17932,
             "35": 10.24095,
+            "36": 10.18094,
+            "37": 10.32809,
+            "38": 10.16727,
+            "39": 10.37344,
             "40": 10.05079,
+            "41": 10.10728,
+            "42": 10.17799,
+            "43": 9.77846,
+            "44": 9.91207,
             "45": 9.77392,
+            "46": 9.75431,
+            "47": 10.09497,
+            "48": 9.79523,
+            "49": 9.46391,
             "50": 9.8673,
+            "51": 9.80381,
+            "52": 9.68202,
+            "53": 10.02345,
+            "54": 9.91634,
             "55": 9.82456,
+            "56": 9.56974,
+            "57": 9.42672,
+            "58": 9.78081,
+            "59": 9.53243,
             "60": 9.44593,
+            "61": 9.64254,
+            "62": 9.94293,
+            "63": 9.31764,
+            "64": 9.72548,
             "65": 8.88739,
+            "66": 9.65691,
+            "67": 9.31749,
+            "68": 9.73495,
+            "69": 9.74866,
             "70": 9.69625,
+            "71": 9.57689,
+            "72": 9.52422,
+            "73": 9.45595,
+            "74": 8.88269,
             "75": 9.37584,
+            "76": 9.01136,
+            "77": 10.02287,
+            "78": 9.67963,
+            "79": 9.33172,
             "80": 9.35826,
+            "81": 9.43394,
+            "82": 9.65054,
+            "83": 9.25503,
+            "84": 9.3714,
             "85": 9.5623,
+            "86": 9.03489,
+            "87": 9.54614,
+            "88": 9.69785,
+            "89": 9.54656,
             "90": 9.77624,
+            "91": 9.2884,
+            "92": 9.30662,
+            "93": 9.02647,
+            "94": 8.78837,
             "95": 9.48027,
+            "96": 9.47974,
+            "97": 9.25611,
+            "98": 9.61949,
+            "99": 8.83824,
             "100": 9.35135
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 1858.0,
+            "2": 1854.0,
+            "3": 1803.0,
+            "4": 1955.0,
             "5": 2000.0,
+            "6": 2036.0,
+            "7": 1932.0,
+            "8": 1791.0,
+            "9": 1935.0,
             "10": 1654.0,
+            "11": 2080.0,
+            "12": 1881.0,
+            "13": 1977.0,
+            "14": 2080.0,
             "15": 1957.0,
+            "16": 1910.0,
+            "17": 1974.0,
+            "18": 1896.0,
+            "19": 1955.0,
             "20": 1816.0,
+            "21": 1906.0,
+            "22": 1972.0,
+            "23": 2062.0,
+            "24": 1897.0,
             "25": 1830.0,
+            "26": 1788.0,
+            "27": 1849.0,
+            "28": 2008.0,
+            "29": 2128.0,
             "30": 1969.0,
+            "31": 1630.0,
+            "32": 2057.0,
+            "33": 2171.0,
+            "34": 1947.0,
             "35": 2097.0,
+            "36": 1972.0,
+            "37": 2348.0,
+            "38": 2186.0,
+            "39": 2378.0,
             "40": 2181.0,
+            "41": 2326.0,
+            "42": 2334.0,
+            "43": 2219.0,
+            "44": 2234.0,
             "45": 2231.0,
+            "46": 2229.0,
+            "47": 2449.0,
+            "48": 2439.0,
+            "49": 2159.0,
             "50": 2290.0,
+            "51": 2514.0,
+            "52": 2513.0,
+            "53": 2894.0,
+            "54": 2656.0,
             "55": 2348.0,
+            "56": 2506.0,
+            "57": 2501.0,
+            "58": 2770.0,
+            "59": 2681.0,
             "60": 2434.0,
+            "61": 2776.0,
+            "62": 2596.0,
+            "63": 2617.0,
+            "64": 3012.0,
             "65": 2657.0,
+            "66": 2947.0,
+            "67": 3089.0,
+            "68": 2818.0,
+            "69": 2909.0,
             "70": 3025.0,
+            "71": 2924.0,
+            "72": 2702.0,
+            "73": 2947.0,
+            "74": 2306.0,
             "75": 2791.0,
+            "76": 3093.0,
+            "77": 3107.0,
+            "78": 3134.0,
+            "79": 3205.0,
             "80": 3123.0,
+            "81": 3290.0,
+            "82": 3172.0,
+            "83": 2719.0,
+            "84": 3328.0,
             "85": 3255.0,
+            "86": 2546.0,
+            "87": 3472.0,
+            "88": 3068.0,
+            "89": 2953.0,
             "90": 3300.0,
+            "91": 3154.0,
+            "92": 3061.0,
+            "93": 2889.0,
+            "94": 3535.0,
             "95": 3078.0,
+            "96": 3181.0,
+            "97": 3135.0,
+            "98": 3569.0,
+            "99": 3319.0,
             "100": 3223.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 921653248.0,
+            "2": 921653248.0,
+            "3": 921653248.0,
+            "4": 921653248.0,
             "5": 921653248.0,
+            "6": 921653248.0,
+            "7": 921653248.0,
+            "8": 921653248.0,
+            "9": 921653248.0,
             "10": 921653248.0,
+            "11": 921653248.0,
+            "12": 921653248.0,
+            "13": 921653248.0,
+            "14": 921653248.0,
             "15": 921653248.0,
+            "16": 921653248.0,
+            "17": 921653248.0,
+            "18": 921653248.0,
+            "19": 921653248.0,
             "20": 921653248.0,
+            "21": 921653248.0,
+            "22": 921653248.0,
+            "23": 921653248.0,
+            "24": 921653248.0,
             "25": 921653248.0,
+            "26": 921653248.0,
+            "27": 921653248.0,
+            "28": 921653248.0,
+            "29": 921653248.0,
             "30": 921653248.0,
+            "31": 921653248.0,
+            "32": 921653248.0,
+            "33": 921653248.0,
+            "34": 921653248.0,
             "35": 921653248.0,
+            "36": 921653248.0,
+            "37": 921653248.0,
+            "38": 921653248.0,
+            "39": 921653248.0,
             "40": 921653248.0,
+            "41": 921653248.0,
+            "42": 921653248.0,
+            "43": 921653248.0,
+            "44": 921653248.0,
             "45": 921653248.0,
+            "46": 921653248.0,
+            "47": 921653248.0,
+            "48": 921653248.0,
+            "49": 921653248.0,
             "50": 921653248.0,
+            "51": 921653248.0,
+            "52": 921653248.0,
+            "53": 921653248.0,
+            "54": 921653248.0,
             "55": 921653248.0,
+            "56": 921653248.0,
+            "57": 921653248.0,
+            "58": 921653248.0,
+            "59": 921653248.0,
             "60": 921653248.0,
+            "61": 921653248.0,
+            "62": 921653248.0,
+            "63": 921653248.0,
+            "64": 921653248.0,
             "65": 921653248.0,
+            "66": 921653248.0,
+            "67": 921653248.0,
+            "68": 921653248.0,
+            "69": 921653248.0,
             "70": 921653248.0,
+            "71": 921653248.0,
+            "72": 921653248.0,
+            "73": 921653248.0,
+            "74": 921653248.0,
             "75": 921653248.0,
+            "76": 921653248.0,
+            "77": 921653248.0,
+            "78": 921653248.0,
+            "79": 921653248.0,
             "80": 921653248.0,
+            "81": 921653248.0,
+            "82": 921653248.0,
+            "83": 921653248.0,
+            "84": 921653248.0,
             "85": 921653248.0,
+            "86": 921653248.0,
+            "87": 921653248.0,
+            "88": 921653248.0,
+            "89": 921653248.0,
             "90": 921653248.0,
+            "91": 921653248.0,
+            "92": 921653248.0,
+            "93": 921653248.0,
+            "94": 921653248.0,
             "95": 921653248.0,
+            "96": 921653248.0,
+            "97": 921653248.0,
+            "98": 921653248.0,
+            "99": 921653248.0,
             "100": 921653248.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 2237722624.0,
+            "2": 2600334336.0,
+            "3": 2600334336.0,
+            "4": 2600334336.0,
             "5": 2600334336.0,
+            "6": 2600334336.0,
+            "7": 2600334336.0,
+            "8": 2600334336.0,
+            "9": 2600334336.0,
             "10": 2600334336.0,
+            "11": 2600334336.0,
+            "12": 2600334336.0,
+            "13": 2600334336.0,
+            "14": 2600334336.0,
             "15": 2600334336.0,
+            "16": 2600334336.0,
+            "17": 2600334336.0,
+            "18": 2600334336.0,
+            "19": 2600334336.0,
             "20": 2600334336.0,
+            "21": 2600334336.0,
+            "22": 2600334336.0,
+            "23": 2600334336.0,
+            "24": 2600334336.0,
             "25": 2600334336.0,
+            "26": 2600334336.0,
+            "27": 2600334336.0,
+            "28": 2600334336.0,
+            "29": 2600334336.0,
             "30": 2600334336.0,
+            "31": 2600334336.0,
+            "32": 2600334336.0,
+            "33": 2600334336.0,
+            "34": 2600334336.0,
             "35": 2600334336.0,
+            "36": 2600334336.0,
+            "37": 2600334336.0,
+            "38": 2600334336.0,
+            "39": 2600334336.0,
             "40": 2600334336.0,
+            "41": 2600334336.0,
+            "42": 2600334336.0,
+            "43": 2600334336.0,
+            "44": 2600334336.0,
             "45": 2600334336.0,
+            "46": 2600334336.0,
+            "47": 2600334336.0,
+            "48": 2600334336.0,
+            "49": 2600334336.0,
             "50": 2600334336.0,
+            "51": 2600334336.0,
+            "52": 2600334336.0,
+            "53": 2600334336.0,
+            "54": 2600334336.0,
             "55": 2600334336.0,
+            "56": 2600334336.0,
+            "57": 2600334336.0,
+            "58": 2600334336.0,
+            "59": 2600334336.0,
             "60": 2600334336.0,
+            "61": 2600334336.0,
+            "62": 2600334336.0,
+            "63": 2600334336.0,
+            "64": 2600334336.0,
             "65": 2600334336.0,
+            "66": 2600334336.0,
+            "67": 2600334336.0,
+            "68": 2600334336.0,
+            "69": 2600334336.0,
             "70": 2600334336.0,
+            "71": 2600334336.0,
+            "72": 2600334336.0,
+            "73": 2600334336.0,
+            "74": 2600334336.0,
             "75": 2600334336.0,
+            "76": 2600334336.0,
+            "77": 2600334336.0,
+            "78": 2600334336.0,
+            "79": 2600334336.0,
             "80": 2600334336.0,
+            "81": 2600334336.0,
+            "82": 2600334336.0,
+            "83": 2600334336.0,
+            "84": 2600334336.0,
             "85": 2600334336.0,
+            "86": 2600334336.0,
+            "87": 2600334336.0,
+            "88": 2600334336.0,
+            "89": 2600334336.0,
             "90": 2600334336.0,
+            "91": 2600334336.0,
+            "92": 2600334336.0,
+            "93": 2600334336.0,
+            "94": 2600334336.0,
             "95": 2600334336.0,
+            "96": 2600334336.0,
+            "97": 2600334336.0,
+            "98": 2600334336.0,
+            "99": 2600334336.0,
             "100": 2600334336.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 10.274,
-            "5": 0.08611,
-            "10": 0.08856,
-            "15": 0.09559,
-            "20": 0.08781,
-            "25": 0.0877,
-            "30": 0.08743,
-            "35": 0.08748,
-            "40": 0.08658,
-            "45": 0.08701,
-            "50": 0.08685,
-            "55": 0.08673,
-            "60": 0.08608,
-            "65": 0.08606,
-            "70": 0.08638,
-            "75": 0.08694,
-            "80": 0.08748,
-            "85": 0.08937,
-            "90": 0.08844,
-            "95": 0.08722,
-            "100": 0.08813
+            "1": 10.43555,
+            "2": 0.12658,
+            "3": 0.11069,
+            "4": 0.10147,
+            "5": 0.10118,
+            "6": 0.10108,
+            "7": 0.10059,
+            "8": 0.09885,
+            "9": 0.10197,
+            "10": 0.10148,
+            "11": 0.10092,
+            "12": 0.10046,
+            "13": 0.10111,
+            "14": 0.10211,
+            "15": 0.10226,
+            "16": 0.10138,
+            "17": 0.10161,
+            "18": 0.10294,
+            "19": 0.10161,
+            "20": 0.10231,
+            "21": 0.10295,
+            "22": 0.10337,
+            "23": 0.10219,
+            "24": 0.10301,
+            "25": 0.10137,
+            "26": 0.10266,
+            "27": 0.10223,
+            "28": 0.10298,
+            "29": 0.1033,
+            "30": 0.1033,
+            "31": 0.10269,
+            "32": 0.1022,
+            "33": 0.10279,
+            "34": 0.1017,
+            "35": 0.1017,
+            "36": 0.10155,
+            "37": 0.1018,
+            "38": 0.10278,
+            "39": 0.10226,
+            "40": 0.10208,
+            "41": 0.10264,
+            "42": 0.10119,
+            "43": 0.10372,
+            "44": 0.10116,
+            "45": 0.1015,
+            "46": 0.09996,
+            "47": 0.10089,
+            "48": 0.10148,
+            "49": 0.10042,
+            "50": 0.09948,
+            "51": 0.10234,
+            "52": 0.10011,
+            "53": 0.09939,
+            "54": 0.09905,
+            "55": 0.1003,
+            "56": 0.09964,
+            "57": 0.10028,
+            "58": 0.10099,
+            "59": 0.09982,
+            "60": 0.09923,
+            "61": 0.09876,
+            "62": 0.09945,
+            "63": 0.10026,
+            "64": 0.09913,
+            "65": 0.09908,
+            "66": 0.10039,
+            "67": 0.10115,
+            "68": 0.10055,
+            "69": 0.09942,
+            "70": 0.09949,
+            "71": 0.09986,
+            "72": 0.10015,
+            "73": 0.10084,
+            "74": 0.10077,
+            "75": 0.09933,
+            "76": 0.10121,
+            "77": 0.09959,
+            "78": 0.09938,
+            "79": 0.0991,
+            "80": 0.09802,
+            "81": 0.10115,
+            "82": 0.09939,
+            "83": 0.09963,
+            "84": 0.0992,
+            "85": 0.09904,
+            "86": 0.1026,
+            "87": 0.09983,
+            "88": 0.10128,
+            "89": 0.09897,
+            "90": 0.09918,
+            "91": 0.10029,
+            "92": 0.09877,
+            "93": 0.09988,
+            "94": 0.09933,
+            "95": 0.10109,
+            "96": 0.10013,
+            "97": 0.10103,
+            "98": 0.10004,
+            "99": 0.09987,
+            "100": 0.09979
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..482e2d753b9
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.85787,
+            "2": 10.87336,
+            "3": 10.86821,
+            "4": 10.87255,
+            "5": 10.87398,
+            "6": 10.89631,
+            "7": 10.86379,
+            "8": 10.87834,
+            "9": 10.87399,
+            "10": 10.83714,
+            "11": 10.86988,
+            "12": 10.85947,
+            "13": 10.87777,
+            "14": 10.87924,
+            "15": 10.81888,
+            "16": 10.83058,
+            "17": 10.78684,
+            "18": 10.80146,
+            "19": 10.79775,
+            "20": 10.71155,
+            "21": 10.6865,
+            "22": 10.55277,
+            "23": 10.7014,
+            "24": 10.58527,
+            "25": 10.52658,
+            "26": 10.58299,
+            "27": 10.59487,
+            "28": 10.54787,
+            "29": 10.55928,
+            "30": 10.32818,
+            "31": 10.08272,
+            "32": 10.44699,
+            "33": 10.42755,
+            "34": 10.17932,
+            "35": 10.24095,
+            "36": 10.18094,
+            "37": 10.32809,
+            "38": 10.16727,
+            "39": 10.37344,
+            "40": 10.05079,
+            "41": 10.10728,
+            "42": 10.17799,
+            "43": 9.77846,
+            "44": 9.91207,
+            "45": 9.77392,
+            "46": 9.75431,
+            "47": 10.09497,
+            "48": 9.79523,
+            "49": 9.46391,
+            "50": 9.8673,
+            "51": 9.80381,
+            "52": 9.68202,
+            "53": 10.02345,
+            "54": 9.91634,
+            "55": 9.82456,
+            "56": 9.56974,
+            "57": 9.42672,
+            "58": 9.78081,
+            "59": 9.53243,
+            "60": 9.44593,
+            "61": 9.64254,
+            "62": 9.94293,
+            "63": 9.31764,
+            "64": 9.72548,
+            "65": 8.88739,
+            "66": 9.65691,
+            "67": 9.31749,
+            "68": 9.73495,
+            "69": 9.74866,
+            "70": 9.69625,
+            "71": 9.57689,
+            "72": 9.52422,
+            "73": 9.45595,
+            "74": 8.88269,
+            "75": 9.37584,
+            "76": 9.01136,
+            "77": 10.02287,
+            "78": 9.67963,
+            "79": 9.33172,
+            "80": 9.35826,
+            "81": 9.43394,
+            "82": 9.65054,
+            "83": 9.25503,
+            "84": 9.3714,
+            "85": 9.5623,
+            "86": 9.03489,
+            "87": 9.54614,
+            "88": 9.69785,
+            "89": 9.54656,
+            "90": 9.77624,
+            "91": 9.2884,
+            "92": 9.30662,
+            "93": 9.02647,
+            "94": 8.78837,
+            "95": 9.48027,
+            "96": 9.47974,
+            "97": 9.25611,
+            "98": 9.61949,
+            "99": 8.83824,
+            "100": 9.35135
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1858.0,
+            "2": 1854.0,
+            "3": 1803.0,
+            "4": 1955.0,
+            "5": 2000.0,
+            "6": 2036.0,
+            "7": 1932.0,
+            "8": 1791.0,
+            "9": 1935.0,
+            "10": 1654.0,
+            "11": 2080.0,
+            "12": 1881.0,
+            "13": 1977.0,
+            "14": 2080.0,
+            "15": 1957.0,
+            "16": 1910.0,
+            "17": 1974.0,
+            "18": 1896.0,
+            "19": 1955.0,
+            "20": 1816.0,
+            "21": 1906.0,
+            "22": 1972.0,
+            "23": 2062.0,
+            "24": 1897.0,
+            "25": 1830.0,
+            "26": 1788.0,
+            "27": 1849.0,
+            "28": 2008.0,
+            "29": 2128.0,
+            "30": 1969.0,
+            "31": 1630.0,
+            "32": 2057.0,
+            "33": 2171.0,
+            "34": 1947.0,
+            "35": 2097.0,
+            "36": 1972.0,
+            "37": 2348.0,
+            "38": 2186.0,
+            "39": 2378.0,
+            "40": 2181.0,
+            "41": 2326.0,
+            "42": 2334.0,
+            "43": 2219.0,
+            "44": 2234.0,
+            "45": 2231.0,
+            "46": 2229.0,
+            "47": 2449.0,
+            "48": 2439.0,
+            "49": 2159.0,
+            "50": 2290.0,
+            "51": 2514.0,
+            "52": 2513.0,
+            "53": 2894.0,
+            "54": 2656.0,
+            "55": 2348.0,
+            "56": 2506.0,
+            "57": 2501.0,
+            "58": 2770.0,
+            "59": 2681.0,
+            "60": 2434.0,
+            "61": 2776.0,
+            "62": 2596.0,
+            "63": 2617.0,
+            "64": 3012.0,
+            "65": 2657.0,
+            "66": 2947.0,
+            "67": 3089.0,
+            "68": 2818.0,
+            "69": 2909.0,
+            "70": 3025.0,
+            "71": 2924.0,
+            "72": 2702.0,
+            "73": 2947.0,
+            "74": 2306.0,
+            "75": 2791.0,
+            "76": 3093.0,
+            "77": 3107.0,
+            "78": 3134.0,
+            "79": 3205.0,
+            "80": 3123.0,
+            "81": 3290.0,
+            "82": 3172.0,
+            "83": 2719.0,
+            "84": 3328.0,
+            "85": 3255.0,
+            "86": 2546.0,
+            "87": 3472.0,
+            "88": 3068.0,
+            "89": 2953.0,
+            "90": 3300.0,
+            "91": 3154.0,
+            "92": 3061.0,
+            "93": 2889.0,
+            "94": 3535.0,
+            "95": 3078.0,
+            "96": 3181.0,
+            "97": 3135.0,
+            "98": 3569.0,
+            "99": 3319.0,
+            "100": 3223.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 921653248.0,
+            "2": 921653248.0,
+            "3": 921653248.0,
+            "4": 921653248.0,
+            "5": 921653248.0,
+            "6": 921653248.0,
+            "7": 921653248.0,
+            "8": 921653248.0,
+            "9": 921653248.0,
+            "10": 921653248.0,
+            "11": 921653248.0,
+            "12": 921653248.0,
+            "13": 921653248.0,
+            "14": 921653248.0,
+            "15": 921653248.0,
+            "16": 921653248.0,
+            "17": 921653248.0,
+            "18": 921653248.0,
+            "19": 921653248.0,
+            "20": 921653248.0,
+            "21": 921653248.0,
+            "22": 921653248.0,
+            "23": 921653248.0,
+            "24": 921653248.0,
+            "25": 921653248.0,
+            "26": 921653248.0,
+            "27": 921653248.0,
+            "28": 921653248.0,
+            "29": 921653248.0,
+            "30": 921653248.0,
+            "31": 921653248.0,
+            "32": 921653248.0,
+            "33": 921653248.0,
+            "34": 921653248.0,
+            "35": 921653248.0,
+            "36": 921653248.0,
+            "37": 921653248.0,
+            "38": 921653248.0,
+            "39": 921653248.0,
+            "40": 921653248.0,
+            "41": 921653248.0,
+            "42": 921653248.0,
+            "43": 921653248.0,
+            "44": 921653248.0,
+            "45": 921653248.0,
+            "46": 921653248.0,
+            "47": 921653248.0,
+            "48": 921653248.0,
+            "49": 921653248.0,
+            "50": 921653248.0,
+            "51": 921653248.0,
+            "52": 921653248.0,
+            "53": 921653248.0,
+            "54": 921653248.0,
+            "55": 921653248.0,
+            "56": 921653248.0,
+            "57": 921653248.0,
+            "58": 921653248.0,
+            "59": 921653248.0,
+            "60": 921653248.0,
+            "61": 921653248.0,
+            "62": 921653248.0,
+            "63": 921653248.0,
+            "64": 921653248.0,
+            "65": 921653248.0,
+            "66": 921653248.0,
+            "67": 921653248.0,
+            "68": 921653248.0,
+            "69": 921653248.0,
+            "70": 921653248.0,
+            "71": 921653248.0,
+            "72": 921653248.0,
+            "73": 921653248.0,
+            "74": 921653248.0,
+            "75": 921653248.0,
+            "76": 921653248.0,
+            "77": 921653248.0,
+            "78": 921653248.0,
+            "79": 921653248.0,
+            "80": 921653248.0,
+            "81": 921653248.0,
+            "82": 921653248.0,
+            "83": 921653248.0,
+            "84": 921653248.0,
+            "85": 921653248.0,
+            "86": 921653248.0,
+            "87": 921653248.0,
+            "88": 921653248.0,
+            "89": 921653248.0,
+            "90": 921653248.0,
+            "91": 921653248.0,
+            "92": 921653248.0,
+            "93": 921653248.0,
+            "94": 921653248.0,
+            "95": 921653248.0,
+            "96": 921653248.0,
+            "97": 921653248.0,
+            "98": 921653248.0,
+            "99": 921653248.0,
+            "100": 921653248.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2237722624.0,
+            "2": 2600334336.0,
+            "3": 2600334336.0,
+            "4": 2600334336.0,
+            "5": 2600334336.0,
+            "6": 2600334336.0,
+            "7": 2600334336.0,
+            "8": 2600334336.0,
+            "9": 2600334336.0,
+            "10": 2600334336.0,
+            "11": 2600334336.0,
+            "12": 2600334336.0,
+            "13": 2600334336.0,
+            "14": 2600334336.0,
+            "15": 2600334336.0,
+            "16": 2600334336.0,
+            "17": 2600334336.0,
+            "18": 2600334336.0,
+            "19": 2600334336.0,
+            "20": 2600334336.0,
+            "21": 2600334336.0,
+            "22": 2600334336.0,
+            "23": 2600334336.0,
+            "24": 2600334336.0,
+            "25": 2600334336.0,
+            "26": 2600334336.0,
+            "27": 2600334336.0,
+            "28": 2600334336.0,
+            "29": 2600334336.0,
+            "30": 2600334336.0,
+            "31": 2600334336.0,
+            "32": 2600334336.0,
+            "33": 2600334336.0,
+            "34": 2600334336.0,
+            "35": 2600334336.0,
+            "36": 2600334336.0,
+            "37": 2600334336.0,
+            "38": 2600334336.0,
+            "39": 2600334336.0,
+            "40": 2600334336.0,
+            "41": 2600334336.0,
+            "42": 2600334336.0,
+            "43": 2600334336.0,
+            "44": 2600334336.0,
+            "45": 2600334336.0,
+            "46": 2600334336.0,
+            "47": 2600334336.0,
+            "48": 2600334336.0,
+            "49": 2600334336.0,
+            "50": 2600334336.0,
+            "51": 2600334336.0,
+            "52": 2600334336.0,
+            "53": 2600334336.0,
+            "54": 2600334336.0,
+            "55": 2600334336.0,
+            "56": 2600334336.0,
+            "57": 2600334336.0,
+            "58": 2600334336.0,
+            "59": 2600334336.0,
+            "60": 2600334336.0,
+            "61": 2600334336.0,
+            "62": 2600334336.0,
+            "63": 2600334336.0,
+            "64": 2600334336.0,
+            "65": 2600334336.0,
+            "66": 2600334336.0,
+            "67": 2600334336.0,
+            "68": 2600334336.0,
+            "69": 2600334336.0,
+            "70": 2600334336.0,
+            "71": 2600334336.0,
+            "72": 2600334336.0,
+            "73": 2600334336.0,
+            "74": 2600334336.0,
+            "75": 2600334336.0,
+            "76": 2600334336.0,
+            "77": 2600334336.0,
+            "78": 2600334336.0,
+            "79": 2600334336.0,
+            "80": 2600334336.0,
+            "81": 2600334336.0,
+            "82": 2600334336.0,
+            "83": 2600334336.0,
+            "84": 2600334336.0,
+            "85": 2600334336.0,
+            "86": 2600334336.0,
+            "87": 2600334336.0,
+            "88": 2600334336.0,
+            "89": 2600334336.0,
+            "90": 2600334336.0,
+            "91": 2600334336.0,
+            "92": 2600334336.0,
+            "93": 2600334336.0,
+            "94": 2600334336.0,
+            "95": 2600334336.0,
+            "96": 2600334336.0,
+            "97": 2600334336.0,
+            "98": 2600334336.0,
+            "99": 2600334336.0,
+            "100": 2600334336.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 9.95491,
+            "2": 0.12886,
+            "3": 0.09196,
+            "4": 0.09036,
+            "5": 0.0891,
+            "6": 0.08806,
+            "7": 0.08916,
+            "8": 0.08903,
+            "9": 0.08912,
+            "10": 0.08738,
+            "11": 0.08775,
+            "12": 0.08738,
+            "13": 0.08675,
+            "14": 0.08535,
+            "15": 0.08586,
+            "16": 0.0851,
+            "17": 0.08505,
+            "18": 0.08481,
+            "19": 0.08648,
+            "20": 0.08679,
+            "21": 0.08735,
+            "22": 0.08776,
+            "23": 0.0857,
+            "24": 0.0851,
+            "25": 0.08801,
+            "26": 0.08761,
+            "27": 0.08685,
+            "28": 0.08721,
+            "29": 0.08807,
+            "30": 0.08783,
+            "31": 0.08825,
+            "32": 0.08805,
+            "33": 0.08749,
+            "34": 0.08564,
+            "35": 0.085,
+            "36": 0.08606,
+            "37": 0.08494,
+            "38": 0.08477,
+            "39": 0.08603,
+            "40": 0.08627,
+            "41": 0.08694,
+            "42": 0.08578,
+            "43": 0.08584,
+            "44": 0.08577,
+            "45": 0.08596,
+            "46": 0.08538,
+            "47": 0.0862,
+            "48": 0.08574,
+            "49": 0.08854,
+            "50": 0.08527,
+            "51": 0.09439,
+            "52": 0.08466,
+            "53": 0.08545,
+            "54": 0.08497,
+            "55": 0.08493,
+            "56": 0.08787,
+            "57": 0.08631,
+            "58": 0.08602,
+            "59": 0.08587,
+            "60": 0.0854,
+            "61": 0.08742,
+            "62": 0.0911,
+            "63": 0.09274,
+            "64": 0.08551,
+            "65": 0.08568,
+            "66": 0.0853,
+            "67": 0.08594,
+            "68": 0.08625,
+            "69": 0.08637,
+            "70": 0.08573,
+            "71": 0.08555,
+            "72": 0.0872,
+            "73": 0.08585,
+            "74": 0.08614,
+            "75": 0.08597,
+            "76": 0.08636,
+            "77": 0.08583,
+            "78": 0.08519,
+            "79": 0.0856,
+            "80": 0.08653,
+            "81": 0.08552,
+            "82": 0.08602,
+            "83": 0.08556,
+            "84": 0.08482,
+            "85": 0.08554,
+            "86": 0.08706,
+            "87": 0.08629,
+            "88": 0.08512,
+            "89": 0.08574,
+            "90": 0.08568,
+            "91": 0.08531,
+            "92": 0.08556,
+            "93": 0.08519,
+            "94": 0.08579,
+            "95": 0.0868,
+            "96": 0.08804,
+            "97": 0.08724,
+            "98": 0.08666,
+            "99": 0.08515,
+            "100": 0.08511
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..b0474f2f8ec
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.85787,
+            "2": 10.87336,
+            "3": 10.86821,
+            "4": 10.87255,
+            "5": 10.87398,
+            "6": 10.89631,
+            "7": 10.86379,
+            "8": 10.87834,
+            "9": 10.87399,
+            "10": 10.83714,
+            "11": 10.86988,
+            "12": 10.85947,
+            "13": 10.87777,
+            "14": 10.87924,
+            "15": 10.81888,
+            "16": 10.83058,
+            "17": 10.78684,
+            "18": 10.80146,
+            "19": 10.79775,
+            "20": 10.71155,
+            "21": 10.6865,
+            "22": 10.55277,
+            "23": 10.7014,
+            "24": 10.58527,
+            "25": 10.52658,
+            "26": 10.58299,
+            "27": 10.59487,
+            "28": 10.54787,
+            "29": 10.55928,
+            "30": 10.32818,
+            "31": 10.08272,
+            "32": 10.44699,
+            "33": 10.42755,
+            "34": 10.17932,
+            "35": 10.24095,
+            "36": 10.18094,
+            "37": 10.32809,
+            "38": 10.16727,
+            "39": 10.37344,
+            "40": 10.05079,
+            "41": 10.10728,
+            "42": 10.17799,
+            "43": 9.77846,
+            "44": 9.91207,
+            "45": 9.77392,
+            "46": 9.75431,
+            "47": 10.09497,
+            "48": 9.79523,
+            "49": 9.46391,
+            "50": 9.8673,
+            "51": 9.80381,
+            "52": 9.68202,
+            "53": 10.02345,
+            "54": 9.91634,
+            "55": 9.82456,
+            "56": 9.56974,
+            "57": 9.42672,
+            "58": 9.78081,
+            "59": 9.53243,
+            "60": 9.44593,
+            "61": 9.64254,
+            "62": 9.94293,
+            "63": 9.31764,
+            "64": 9.72548,
+            "65": 8.88739,
+            "66": 9.65691,
+            "67": 9.31749,
+            "68": 9.73495,
+            "69": 9.74866,
+            "70": 9.69625,
+            "71": 9.57689,
+            "72": 9.52422,
+            "73": 9.45595,
+            "74": 8.88269,
+            "75": 9.37584,
+            "76": 9.01136,
+            "77": 10.02287,
+            "78": 9.67963,
+            "79": 9.33172,
+            "80": 9.35826,
+            "81": 9.43394,
+            "82": 9.65054,
+            "83": 9.25503,
+            "84": 9.3714,
+            "85": 9.5623,
+            "86": 9.03489,
+            "87": 9.54614,
+            "88": 9.69785,
+            "89": 9.54656,
+            "90": 9.77624,
+            "91": 9.2884,
+            "92": 9.30662,
+            "93": 9.02647,
+            "94": 8.78837,
+            "95": 9.48027,
+            "96": 9.47974,
+            "97": 9.25611,
+            "98": 9.61949,
+            "99": 8.83824,
+            "100": 9.35135
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1858.0,
+            "2": 1854.0,
+            "3": 1803.0,
+            "4": 1955.0,
+            "5": 2000.0,
+            "6": 2036.0,
+            "7": 1932.0,
+            "8": 1791.0,
+            "9": 1935.0,
+            "10": 1654.0,
+            "11": 2080.0,
+            "12": 1881.0,
+            "13": 1977.0,
+            "14": 2080.0,
+            "15": 1957.0,
+            "16": 1910.0,
+            "17": 1974.0,
+            "18": 1896.0,
+            "19": 1955.0,
+            "20": 1816.0,
+            "21": 1906.0,
+            "22": 1972.0,
+            "23": 2062.0,
+            "24": 1897.0,
+            "25": 1830.0,
+            "26": 1788.0,
+            "27": 1849.0,
+            "28": 2008.0,
+            "29": 2128.0,
+            "30": 1969.0,
+            "31": 1630.0,
+            "32": 2057.0,
+            "33": 2171.0,
+            "34": 1947.0,
+            "35": 2097.0,
+            "36": 1972.0,
+            "37": 2348.0,
+            "38": 2186.0,
+            "39": 2378.0,
+            "40": 2181.0,
+            "41": 2326.0,
+            "42": 2334.0,
+            "43": 2219.0,
+            "44": 2234.0,
+            "45": 2231.0,
+            "46": 2229.0,
+            "47": 2449.0,
+            "48": 2439.0,
+            "49": 2159.0,
+            "50": 2290.0,
+            "51": 2514.0,
+            "52": 2513.0,
+            "53": 2894.0,
+            "54": 2656.0,
+            "55": 2348.0,
+            "56": 2506.0,
+            "57": 2501.0,
+            "58": 2770.0,
+            "59": 2681.0,
+            "60": 2434.0,
+            "61": 2776.0,
+            "62": 2596.0,
+            "63": 2617.0,
+            "64": 3012.0,
+            "65": 2657.0,
+            "66": 2947.0,
+            "67": 3089.0,
+            "68": 2818.0,
+            "69": 2909.0,
+            "70": 3025.0,
+            "71": 2924.0,
+            "72": 2702.0,
+            "73": 2947.0,
+            "74": 2306.0,
+            "75": 2791.0,
+            "76": 3093.0,
+            "77": 3107.0,
+            "78": 3134.0,
+            "79": 3205.0,
+            "80": 3123.0,
+            "81": 3290.0,
+            "82": 3172.0,
+            "83": 2719.0,
+            "84": 3328.0,
+            "85": 3255.0,
+            "86": 2546.0,
+            "87": 3472.0,
+            "88": 3068.0,
+            "89": 2953.0,
+            "90": 3300.0,
+            "91": 3154.0,
+            "92": 3061.0,
+            "93": 2889.0,
+            "94": 3535.0,
+            "95": 3078.0,
+            "96": 3181.0,
+            "97": 3135.0,
+            "98": 3569.0,
+            "99": 3319.0,
+            "100": 3223.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 921653248.0,
+            "2": 921653248.0,
+            "3": 921653248.0,
+            "4": 921653248.0,
+            "5": 921653248.0,
+            "6": 921653248.0,
+            "7": 921653248.0,
+            "8": 921653248.0,
+            "9": 921653248.0,
+            "10": 921653248.0,
+            "11": 921653248.0,
+            "12": 921653248.0,
+            "13": 921653248.0,
+            "14": 921653248.0,
+            "15": 921653248.0,
+            "16": 921653248.0,
+            "17": 921653248.0,
+            "18": 921653248.0,
+            "19": 921653248.0,
+            "20": 921653248.0,
+            "21": 921653248.0,
+            "22": 921653248.0,
+            "23": 921653248.0,
+            "24": 921653248.0,
+            "25": 921653248.0,
+            "26": 921653248.0,
+            "27": 921653248.0,
+            "28": 921653248.0,
+            "29": 921653248.0,
+            "30": 921653248.0,
+            "31": 921653248.0,
+            "32": 921653248.0,
+            "33": 921653248.0,
+            "34": 921653248.0,
+            "35": 921653248.0,
+            "36": 921653248.0,
+            "37": 921653248.0,
+            "38": 921653248.0,
+            "39": 921653248.0,
+            "40": 921653248.0,
+            "41": 921653248.0,
+            "42": 921653248.0,
+            "43": 921653248.0,
+            "44": 921653248.0,
+            "45": 921653248.0,
+            "46": 921653248.0,
+            "47": 921653248.0,
+            "48": 921653248.0,
+            "49": 921653248.0,
+            "50": 921653248.0,
+            "51": 921653248.0,
+            "52": 921653248.0,
+            "53": 921653248.0,
+            "54": 921653248.0,
+            "55": 921653248.0,
+            "56": 921653248.0,
+            "57": 921653248.0,
+            "58": 921653248.0,
+            "59": 921653248.0,
+            "60": 921653248.0,
+            "61": 921653248.0,
+            "62": 921653248.0,
+            "63": 921653248.0,
+            "64": 921653248.0,
+            "65": 921653248.0,
+            "66": 921653248.0,
+            "67": 921653248.0,
+            "68": 921653248.0,
+            "69": 921653248.0,
+            "70": 921653248.0,
+            "71": 921653248.0,
+            "72": 921653248.0,
+            "73": 921653248.0,
+            "74": 921653248.0,
+            "75": 921653248.0,
+            "76": 921653248.0,
+            "77": 921653248.0,
+            "78": 921653248.0,
+            "79": 921653248.0,
+            "80": 921653248.0,
+            "81": 921653248.0,
+            "82": 921653248.0,
+            "83": 921653248.0,
+            "84": 921653248.0,
+            "85": 921653248.0,
+            "86": 921653248.0,
+            "87": 921653248.0,
+            "88": 921653248.0,
+            "89": 921653248.0,
+            "90": 921653248.0,
+            "91": 921653248.0,
+            "92": 921653248.0,
+            "93": 921653248.0,
+            "94": 921653248.0,
+            "95": 921653248.0,
+            "96": 921653248.0,
+            "97": 921653248.0,
+            "98": 921653248.0,
+            "99": 921653248.0,
+            "100": 921653248.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2237722624.0,
+            "2": 2600334336.0,
+            "3": 2600334336.0,
+            "4": 2600334336.0,
+            "5": 2600334336.0,
+            "6": 2600334336.0,
+            "7": 2600334336.0,
+            "8": 2600334336.0,
+            "9": 2600334336.0,
+            "10": 2600334336.0,
+            "11": 2600334336.0,
+            "12": 2600334336.0,
+            "13": 2600334336.0,
+            "14": 2600334336.0,
+            "15": 2600334336.0,
+            "16": 2600334336.0,
+            "17": 2600334336.0,
+            "18": 2600334336.0,
+            "19": 2600334336.0,
+            "20": 2600334336.0,
+            "21": 2600334336.0,
+            "22": 2600334336.0,
+            "23": 2600334336.0,
+            "24": 2600334336.0,
+            "25": 2600334336.0,
+            "26": 2600334336.0,
+            "27": 2600334336.0,
+            "28": 2600334336.0,
+            "29": 2600334336.0,
+            "30": 2600334336.0,
+            "31": 2600334336.0,
+            "32": 2600334336.0,
+            "33": 2600334336.0,
+            "34": 2600334336.0,
+            "35": 2600334336.0,
+            "36": 2600334336.0,
+            "37": 2600334336.0,
+            "38": 2600334336.0,
+            "39": 2600334336.0,
+            "40": 2600334336.0,
+            "41": 2600334336.0,
+            "42": 2600334336.0,
+            "43": 2600334336.0,
+            "44": 2600334336.0,
+            "45": 2600334336.0,
+            "46": 2600334336.0,
+            "47": 2600334336.0,
+            "48": 2600334336.0,
+            "49": 2600334336.0,
+            "50": 2600334336.0,
+            "51": 2600334336.0,
+            "52": 2600334336.0,
+            "53": 2600334336.0,
+            "54": 2600334336.0,
+            "55": 2600334336.0,
+            "56": 2600334336.0,
+            "57": 2600334336.0,
+            "58": 2600334336.0,
+            "59": 2600334336.0,
+            "60": 2600334336.0,
+            "61": 2600334336.0,
+            "62": 2600334336.0,
+            "63": 2600334336.0,
+            "64": 2600334336.0,
+            "65": 2600334336.0,
+            "66": 2600334336.0,
+            "67": 2600334336.0,
+            "68": 2600334336.0,
+            "69": 2600334336.0,
+            "70": 2600334336.0,
+            "71": 2600334336.0,
+            "72": 2600334336.0,
+            "73": 2600334336.0,
+            "74": 2600334336.0,
+            "75": 2600334336.0,
+            "76": 2600334336.0,
+            "77": 2600334336.0,
+            "78": 2600334336.0,
+            "79": 2600334336.0,
+            "80": 2600334336.0,
+            "81": 2600334336.0,
+            "82": 2600334336.0,
+            "83": 2600334336.0,
+            "84": 2600334336.0,
+            "85": 2600334336.0,
+            "86": 2600334336.0,
+            "87": 2600334336.0,
+            "88": 2600334336.0,
+            "89": 2600334336.0,
+            "90": 2600334336.0,
+            "91": 2600334336.0,
+            "92": 2600334336.0,
+            "93": 2600334336.0,
+            "94": 2600334336.0,
+            "95": 2600334336.0,
+            "96": 2600334336.0,
+            "97": 2600334336.0,
+            "98": 2600334336.0,
+            "99": 2600334336.0,
+            "100": 2600334336.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.39748,
+            "2": 0.11699,
+            "3": 0.10324,
+            "4": 0.10602,
+            "5": 0.10273,
+            "6": 0.10169,
+            "7": 0.10402,
+            "8": 0.10582,
+            "9": 0.10893,
+            "10": 0.10156,
+            "11": 0.10006,
+            "12": 0.10034,
+            "13": 0.10111,
+            "14": 0.10835,
+            "15": 0.10198,
+            "16": 0.10295,
+            "17": 0.10379,
+            "18": 0.10096,
+            "19": 0.10678,
+            "20": 0.10208,
+            "21": 0.10213,
+            "22": 0.10179,
+            "23": 0.10357,
+            "24": 0.10282,
+            "25": 0.09979,
+            "26": 0.10143,
+            "27": 0.10197,
+            "28": 0.10127,
+            "29": 0.10116,
+            "30": 0.10243,
+            "31": 0.10107,
+            "32": 0.10147,
+            "33": 0.10181,
+            "34": 0.1038,
+            "35": 0.10095,
+            "36": 0.09889,
+            "37": 0.09992,
+            "38": 0.10001,
+            "39": 0.10006,
+            "40": 0.10004,
+            "41": 0.09886,
+            "42": 0.09836,
+            "43": 0.09974,
+            "44": 0.10016,
+            "45": 0.10004,
+            "46": 0.09945,
+            "47": 0.0989,
+            "48": 0.09882,
+            "49": 0.09906,
+            "50": 0.09893,
+            "51": 0.10108,
+            "52": 0.10571,
+            "53": 0.10114,
+            "54": 0.09935,
+            "55": 0.09893,
+            "56": 0.09871,
+            "57": 0.10568,
+            "58": 0.09952,
+            "59": 0.10185,
+            "60": 0.09937,
+            "61": 0.09902,
+            "62": 0.10469,
+            "63": 0.10029,
+            "64": 0.09881,
+            "65": 0.09927,
+            "66": 0.09932,
+            "67": 0.10538,
+            "68": 0.09988,
+            "69": 0.10144,
+            "70": 0.09918,
+            "71": 0.10686,
+            "72": 0.09922,
+            "73": 0.09936,
+            "74": 0.09915,
+            "75": 0.09862,
+            "76": 0.1068,
+            "77": 0.09885,
+            "78": 0.09998,
+            "79": 0.1002,
+            "80": 0.09911,
+            "81": 0.10038,
+            "82": 0.09931,
+            "83": 0.09871,
+            "84": 0.09987,
+            "85": 0.09983,
+            "86": 0.10014,
+            "87": 0.0994,
+            "88": 0.09924,
+            "89": 0.10058,
+            "90": 0.10033,
+            "91": 0.10009,
+            "92": 0.10037,
+            "93": 0.09877,
+            "94": 0.09968,
+            "95": 0.10011,
+            "96": 0.09929,
+            "97": 0.09969,
+            "98": 0.09929,
+            "99": 0.10037,
+            "100": 0.10155
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
index 9cc113af90f..866cb310652 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
@@ -2,141 +2,536 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 10.85902,
+            "2": 10.87345,
+            "3": 10.86919,
+            "4": 10.87273,
             "5": 10.87389,
+            "6": 10.89658,
+            "7": 10.86387,
+            "8": 10.87869,
+            "9": 10.87439,
             "10": 10.83846,
+            "11": 10.87012,
+            "12": 10.86011,
+            "13": 10.87824,
+            "14": 10.87935,
             "15": 10.8191,
+            "16": 10.83109,
+            "17": 10.78722,
+            "18": 10.80215,
+            "19": 10.7983,
             "20": 10.71224,
+            "21": 10.68683,
+            "22": 10.55402,
+            "23": 10.70111,
+            "24": 10.58621,
             "25": 10.52673,
+            "26": 10.5837,
+            "27": 10.59499,
+            "28": 10.54816,
+            "29": 10.55965,
             "30": 10.32899,
+            "31": 10.08331,
+            "32": 10.44752,
+            "33": 10.4278,
+            "34": 10.1796,
             "35": 10.24121,
+            "36": 10.18155,
+            "37": 10.32827,
+            "38": 10.16792,
+            "39": 10.37357,
             "40": 10.05111,
+            "41": 10.10708,
+            "42": 10.17823,
+            "43": 9.77867,
+            "44": 9.91197,
             "45": 9.77404,
+            "46": 9.75415,
+            "47": 10.09501,
+            "48": 9.79531,
+            "49": 9.46422,
             "50": 9.86729,
+            "51": 9.80375,
+            "52": 9.68218,
+            "53": 10.02348,
+            "54": 9.91595,
             "55": 9.82442,
+            "56": 9.56994,
+            "57": 9.42628,
+            "58": 9.78075,
+            "59": 9.53254,
             "60": 9.44561,
+            "61": 9.64249,
+            "62": 9.94298,
+            "63": 9.31745,
+            "64": 9.7256,
             "65": 8.88735,
+            "66": 9.65711,
+            "67": 9.31747,
+            "68": 9.73506,
+            "69": 9.74863,
             "70": 9.69601,
+            "71": 9.57682,
+            "72": 9.52425,
+            "73": 9.4558,
+            "74": 8.8826,
             "75": 9.37563,
+            "76": 9.01106,
+            "77": 10.02278,
+            "78": 9.6796,
+            "79": 9.33171,
             "80": 9.35836,
+            "81": 9.43399,
+            "82": 9.65055,
+            "83": 9.2551,
+            "84": 9.37131,
             "85": 9.56237,
+            "86": 9.0351,
+            "87": 9.54617,
+            "88": 9.69806,
+            "89": 9.54657,
             "90": 9.77627,
+            "91": 9.28858,
+            "92": 9.30652,
+            "93": 9.02646,
+            "94": 8.7883,
             "95": 9.48041,
+            "96": 9.47962,
+            "97": 9.25545,
+            "98": 9.61947,
+            "99": 8.83854,
             "100": 9.35116
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 1862.0,
+            "2": 1874.0,
+            "3": 1748.0,
+            "4": 1955.0,
             "5": 2050.0,
+            "6": 1997.0,
+            "7": 1967.0,
+            "8": 1853.0,
+            "9": 1965.0,
             "10": 1652.0,
+            "11": 2042.0,
+            "12": 1877.0,
+            "13": 2076.0,
+            "14": 1956.0,
             "15": 1953.0,
+            "16": 1915.0,
+            "17": 2045.0,
+            "18": 1965.0,
+            "19": 1988.0,
             "20": 1785.0,
+            "21": 1941.0,
+            "22": 1928.0,
+            "23": 2112.0,
+            "24": 1802.0,
             "25": 1933.0,
+            "26": 1786.0,
+            "27": 1945.0,
+            "28": 2037.0,
+            "29": 2119.0,
             "30": 2022.0,
+            "31": 1699.0,
+            "32": 2130.0,
+            "33": 2187.0,
+            "34": 1929.0,
             "35": 2092.0,
+            "36": 2109.0,
+            "37": 2362.0,
+            "38": 2211.0,
+            "39": 2383.0,
             "40": 2203.0,
+            "41": 2288.0,
+            "42": 2224.0,
+            "43": 2150.0,
+            "44": 2206.0,
             "45": 2187.0,
+            "46": 2181.0,
+            "47": 2260.0,
+            "48": 2341.0,
+            "49": 2210.0,
             "50": 2219.0,
+            "51": 2508.0,
+            "52": 2483.0,
+            "53": 2959.0,
+            "54": 2554.0,
             "55": 2408.0,
+            "56": 2452.0,
+            "57": 2528.0,
+            "58": 2594.0,
+            "59": 2750.0,
             "60": 2563.0,
+            "61": 2794.0,
+            "62": 2495.0,
+            "63": 2493.0,
+            "64": 2965.0,
             "65": 2569.0,
+            "66": 2877.0,
+            "67": 2969.0,
+            "68": 2803.0,
+            "69": 2944.0,
             "70": 3001.0,
+            "71": 2867.0,
+            "72": 2714.0,
+            "73": 3017.0,
+            "74": 2281.0,
             "75": 2774.0,
+            "76": 2983.0,
+            "77": 2955.0,
+            "78": 3148.0,
+            "79": 3076.0,
             "80": 2992.0,
+            "81": 3255.0,
+            "82": 3212.0,
+            "83": 2809.0,
+            "84": 3266.0,
             "85": 3188.0,
+            "86": 2616.0,
+            "87": 3492.0,
+            "88": 3130.0,
+            "89": 3020.0,
             "90": 3238.0,
+            "91": 3106.0,
+            "92": 3183.0,
+            "93": 2960.0,
+            "94": 3492.0,
             "95": 3112.0,
+            "96": 3256.0,
+            "97": 3055.0,
+            "98": 3558.0,
+            "99": 3196.0,
             "100": 3109.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 921653248.0,
+            "2": 921653248.0,
+            "3": 921653248.0,
+            "4": 921653248.0,
             "5": 921653248.0,
+            "6": 921653248.0,
+            "7": 921653248.0,
+            "8": 921653248.0,
+            "9": 921653248.0,
             "10": 921653248.0,
+            "11": 921653248.0,
+            "12": 921653248.0,
+            "13": 921653248.0,
+            "14": 921653248.0,
             "15": 921653248.0,
+            "16": 921653248.0,
+            "17": 921653248.0,
+            "18": 921653248.0,
+            "19": 921653248.0,
             "20": 921653248.0,
+            "21": 921653248.0,
+            "22": 921653248.0,
+            "23": 921653248.0,
+            "24": 921653248.0,
             "25": 921653248.0,
+            "26": 921653248.0,
+            "27": 921653248.0,
+            "28": 921653248.0,
+            "29": 921653248.0,
             "30": 921653248.0,
+            "31": 921653248.0,
+            "32": 921653248.0,
+            "33": 921653248.0,
+            "34": 921653248.0,
             "35": 921653248.0,
+            "36": 921653248.0,
+            "37": 921653248.0,
+            "38": 921653248.0,
+            "39": 921653248.0,
             "40": 921653248.0,
+            "41": 921653248.0,
+            "42": 921653248.0,
+            "43": 921653248.0,
+            "44": 921653248.0,
             "45": 921653248.0,
+            "46": 921653248.0,
+            "47": 921653248.0,
+            "48": 921653248.0,
+            "49": 921653248.0,
             "50": 921653248.0,
+            "51": 921653248.0,
+            "52": 921653248.0,
+            "53": 921653248.0,
+            "54": 921653248.0,
             "55": 921653248.0,
+            "56": 921653248.0,
+            "57": 921653248.0,
+            "58": 921653248.0,
+            "59": 921653248.0,
             "60": 921653248.0,
+            "61": 921653248.0,
+            "62": 921653248.0,
+            "63": 921653248.0,
+            "64": 921653248.0,
             "65": 921653248.0,
+            "66": 921653248.0,
+            "67": 921653248.0,
+            "68": 921653248.0,
+            "69": 921653248.0,
             "70": 921653248.0,
+            "71": 921653248.0,
+            "72": 921653248.0,
+            "73": 921653248.0,
+            "74": 921653248.0,
             "75": 921653248.0,
+            "76": 921653248.0,
+            "77": 921653248.0,
+            "78": 921653248.0,
+            "79": 921653248.0,
             "80": 921653248.0,
+            "81": 921653248.0,
+            "82": 921653248.0,
+            "83": 921653248.0,
+            "84": 921653248.0,
             "85": 921653248.0,
+            "86": 921653248.0,
+            "87": 921653248.0,
+            "88": 921653248.0,
+            "89": 921653248.0,
             "90": 921653248.0,
+            "91": 921653248.0,
+            "92": 921653248.0,
+            "93": 921653248.0,
+            "94": 921653248.0,
             "95": 921653248.0,
+            "96": 921653248.0,
+            "97": 921653248.0,
+            "98": 921653248.0,
+            "99": 921653248.0,
             "100": 921653248.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 2240868352.0,
+            "2": 2603480064.0,
+            "3": 2603480064.0,
+            "4": 2603480064.0,
             "5": 2603480064.0,
+            "6": 2603480064.0,
+            "7": 2603480064.0,
+            "8": 2603480064.0,
+            "9": 2603480064.0,
             "10": 2603480064.0,
+            "11": 2603480064.0,
+            "12": 2603480064.0,
+            "13": 2603480064.0,
+            "14": 2603480064.0,
             "15": 2603480064.0,
+            "16": 2603480064.0,
+            "17": 2603480064.0,
+            "18": 2603480064.0,
+            "19": 2603480064.0,
             "20": 2603480064.0,
+            "21": 2603480064.0,
+            "22": 2603480064.0,
+            "23": 2603480064.0,
+            "24": 2603480064.0,
             "25": 2603480064.0,
+            "26": 2603480064.0,
+            "27": 2603480064.0,
+            "28": 2603480064.0,
+            "29": 2603480064.0,
             "30": 2603480064.0,
+            "31": 2603480064.0,
+            "32": 2603480064.0,
+            "33": 2603480064.0,
+            "34": 2603480064.0,
             "35": 2603480064.0,
+            "36": 2603480064.0,
+            "37": 2603480064.0,
+            "38": 2603480064.0,
+            "39": 2603480064.0,
             "40": 2603480064.0,
+            "41": 2603480064.0,
+            "42": 2603480064.0,
+            "43": 2603480064.0,
+            "44": 2603480064.0,
             "45": 2603480064.0,
+            "46": 2603480064.0,
+            "47": 2603480064.0,
+            "48": 2603480064.0,
+            "49": 2603480064.0,
             "50": 2603480064.0,
+            "51": 2603480064.0,
+            "52": 2603480064.0,
+            "53": 2603480064.0,
+            "54": 2603480064.0,
             "55": 2603480064.0,
+            "56": 2603480064.0,
+            "57": 2603480064.0,
+            "58": 2603480064.0,
+            "59": 2603480064.0,
             "60": 2603480064.0,
+            "61": 2603480064.0,
+            "62": 2603480064.0,
+            "63": 2603480064.0,
+            "64": 2603480064.0,
             "65": 2603480064.0,
+            "66": 2603480064.0,
+            "67": 2603480064.0,
+            "68": 2603480064.0,
+            "69": 2603480064.0,
             "70": 2603480064.0,
+            "71": 2603480064.0,
+            "72": 2603480064.0,
+            "73": 2603480064.0,
+            "74": 2603480064.0,
             "75": 2603480064.0,
+            "76": 2603480064.0,
+            "77": 2603480064.0,
+            "78": 2603480064.0,
+            "79": 2603480064.0,
             "80": 2603480064.0,
+            "81": 2603480064.0,
+            "82": 2603480064.0,
+            "83": 2603480064.0,
+            "84": 2603480064.0,
             "85": 2603480064.0,
+            "86": 2603480064.0,
+            "87": 2603480064.0,
+            "88": 2603480064.0,
+            "89": 2603480064.0,
             "90": 2603480064.0,
+            "91": 2603480064.0,
+            "92": 2603480064.0,
+            "93": 2603480064.0,
+            "94": 2603480064.0,
             "95": 2603480064.0,
+            "96": 2603480064.0,
+            "97": 2603480064.0,
+            "98": 2603480064.0,
+            "99": 2603480064.0,
             "100": 2603480064.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 7.78165,
-            "5": 0.09513,
-            "10": 0.10651,
-            "15": 0.10345,
-            "20": 0.10578,
-            "25": 0.10549,
-            "30": 0.09676,
-            "35": 0.09698,
-            "40": 0.10038,
-            "45": 0.09627,
-            "50": 0.09595,
-            "55": 0.0993,
-            "60": 0.09556,
-            "65": 0.09917,
-            "70": 0.09623,
-            "75": 0.09539,
-            "80": 0.09584,
-            "85": 0.09887,
-            "90": 0.09565,
-            "95": 0.09717,
-            "100": 0.09806
+            "1": 10.22635,
+            "2": 0.13443,
+            "3": 0.11453,
+            "4": 0.11544,
+            "5": 0.11529,
+            "6": 0.1139,
+            "7": 0.11696,
+            "8": 0.11432,
+            "9": 0.11422,
+            "10": 0.11467,
+            "11": 0.1115,
+            "12": 0.11137,
+            "13": 0.11192,
+            "14": 0.1124,
+            "15": 0.11313,
+            "16": 0.11436,
+            "17": 0.11212,
+            "18": 0.11209,
+            "19": 0.11518,
+            "20": 0.11167,
+            "21": 0.11083,
+            "22": 0.11186,
+            "23": 0.11362,
+            "24": 0.11218,
+            "25": 0.1144,
+            "26": 0.11178,
+            "27": 0.11153,
+            "28": 0.11303,
+            "29": 0.11052,
+            "30": 0.11214,
+            "31": 0.1141,
+            "32": 0.1126,
+            "33": 0.11238,
+            "34": 0.1134,
+            "35": 0.11232,
+            "36": 0.11052,
+            "37": 0.11225,
+            "38": 0.1121,
+            "39": 0.113,
+            "40": 0.11315,
+            "41": 0.11169,
+            "42": 0.11263,
+            "43": 0.11419,
+            "44": 0.11234,
+            "45": 0.11091,
+            "46": 0.11336,
+            "47": 0.11328,
+            "48": 0.11388,
+            "49": 0.11279,
+            "50": 0.11198,
+            "51": 0.13191,
+            "52": 0.11591,
+            "53": 0.11273,
+            "54": 0.11461,
+            "55": 0.11358,
+            "56": 0.11259,
+            "57": 0.11325,
+            "58": 0.1162,
+            "59": 0.11491,
+            "60": 0.11726,
+            "61": 0.11465,
+            "62": 0.11311,
+            "63": 0.11801,
+            "64": 0.11752,
+            "65": 0.11546,
+            "66": 0.11225,
+            "67": 0.11448,
+            "68": 0.11548,
+            "69": 0.11397,
+            "70": 0.11275,
+            "71": 0.11441,
+            "72": 0.11487,
+            "73": 0.11522,
+            "74": 0.11426,
+            "75": 0.11345,
+            "76": 0.11269,
+            "77": 0.1157,
+            "78": 0.11597,
+            "79": 0.11379,
+            "80": 0.11587,
+            "81": 0.11486,
+            "82": 0.11305,
+            "83": 0.1127,
+            "84": 0.11361,
+            "85": 0.11384,
+            "86": 0.11703,
+            "87": 0.11426,
+            "88": 0.11283,
+            "89": 0.1146,
+            "90": 0.11235,
+            "91": 0.11207,
+            "92": 0.11217,
+            "93": 0.11286,
+            "94": 0.11446,
+            "95": 0.11504,
+            "96": 0.11469,
+            "97": 0.11241,
+            "98": 0.11333,
+            "99": 0.11104,
+            "100": 0.1126
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..444ff2cd262
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.85902,
+            "2": 10.87345,
+            "3": 10.86919,
+            "4": 10.87273,
+            "5": 10.87389,
+            "6": 10.89658,
+            "7": 10.86387,
+            "8": 10.87869,
+            "9": 10.87439,
+            "10": 10.83846,
+            "11": 10.87012,
+            "12": 10.86011,
+            "13": 10.87824,
+            "14": 10.87935,
+            "15": 10.8191,
+            "16": 10.83109,
+            "17": 10.78722,
+            "18": 10.80215,
+            "19": 10.7983,
+            "20": 10.71224,
+            "21": 10.68683,
+            "22": 10.55402,
+            "23": 10.70111,
+            "24": 10.58621,
+            "25": 10.52673,
+            "26": 10.5837,
+            "27": 10.59499,
+            "28": 10.54816,
+            "29": 10.55965,
+            "30": 10.32899,
+            "31": 10.08331,
+            "32": 10.44752,
+            "33": 10.4278,
+            "34": 10.1796,
+            "35": 10.24121,
+            "36": 10.18155,
+            "37": 10.32827,
+            "38": 10.16792,
+            "39": 10.37357,
+            "40": 10.05111,
+            "41": 10.10708,
+            "42": 10.17823,
+            "43": 9.77867,
+            "44": 9.91197,
+            "45": 9.77404,
+            "46": 9.75415,
+            "47": 10.09501,
+            "48": 9.79531,
+            "49": 9.46422,
+            "50": 9.86729,
+            "51": 9.80375,
+            "52": 9.68218,
+            "53": 10.02348,
+            "54": 9.91595,
+            "55": 9.82442,
+            "56": 9.56994,
+            "57": 9.42628,
+            "58": 9.78075,
+            "59": 9.53254,
+            "60": 9.44561,
+            "61": 9.64249,
+            "62": 9.94298,
+            "63": 9.31745,
+            "64": 9.7256,
+            "65": 8.88735,
+            "66": 9.65711,
+            "67": 9.31747,
+            "68": 9.73506,
+            "69": 9.74863,
+            "70": 9.69601,
+            "71": 9.57682,
+            "72": 9.52425,
+            "73": 9.4558,
+            "74": 8.8826,
+            "75": 9.37563,
+            "76": 9.01106,
+            "77": 10.02278,
+            "78": 9.6796,
+            "79": 9.33171,
+            "80": 9.35836,
+            "81": 9.43399,
+            "82": 9.65055,
+            "83": 9.2551,
+            "84": 9.37131,
+            "85": 9.56237,
+            "86": 9.0351,
+            "87": 9.54617,
+            "88": 9.69806,
+            "89": 9.54657,
+            "90": 9.77627,
+            "91": 9.28858,
+            "92": 9.30652,
+            "93": 9.02646,
+            "94": 8.7883,
+            "95": 9.48041,
+            "96": 9.47962,
+            "97": 9.25545,
+            "98": 9.61947,
+            "99": 8.83854,
+            "100": 9.35116
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1862.0,
+            "2": 1874.0,
+            "3": 1748.0,
+            "4": 1955.0,
+            "5": 2050.0,
+            "6": 1997.0,
+            "7": 1967.0,
+            "8": 1853.0,
+            "9": 1965.0,
+            "10": 1652.0,
+            "11": 2042.0,
+            "12": 1877.0,
+            "13": 2076.0,
+            "14": 1956.0,
+            "15": 1953.0,
+            "16": 1915.0,
+            "17": 2045.0,
+            "18": 1965.0,
+            "19": 1988.0,
+            "20": 1785.0,
+            "21": 1941.0,
+            "22": 1928.0,
+            "23": 2112.0,
+            "24": 1802.0,
+            "25": 1933.0,
+            "26": 1786.0,
+            "27": 1945.0,
+            "28": 2037.0,
+            "29": 2119.0,
+            "30": 2022.0,
+            "31": 1699.0,
+            "32": 2130.0,
+            "33": 2187.0,
+            "34": 1929.0,
+            "35": 2092.0,
+            "36": 2109.0,
+            "37": 2362.0,
+            "38": 2211.0,
+            "39": 2383.0,
+            "40": 2203.0,
+            "41": 2288.0,
+            "42": 2224.0,
+            "43": 2150.0,
+            "44": 2206.0,
+            "45": 2187.0,
+            "46": 2181.0,
+            "47": 2260.0,
+            "48": 2341.0,
+            "49": 2210.0,
+            "50": 2219.0,
+            "51": 2508.0,
+            "52": 2483.0,
+            "53": 2959.0,
+            "54": 2554.0,
+            "55": 2408.0,
+            "56": 2452.0,
+            "57": 2528.0,
+            "58": 2594.0,
+            "59": 2750.0,
+            "60": 2563.0,
+            "61": 2794.0,
+            "62": 2495.0,
+            "63": 2493.0,
+            "64": 2965.0,
+            "65": 2569.0,
+            "66": 2877.0,
+            "67": 2969.0,
+            "68": 2803.0,
+            "69": 2944.0,
+            "70": 3001.0,
+            "71": 2867.0,
+            "72": 2714.0,
+            "73": 3017.0,
+            "74": 2281.0,
+            "75": 2774.0,
+            "76": 2983.0,
+            "77": 2955.0,
+            "78": 3148.0,
+            "79": 3076.0,
+            "80": 2992.0,
+            "81": 3255.0,
+            "82": 3212.0,
+            "83": 2809.0,
+            "84": 3266.0,
+            "85": 3188.0,
+            "86": 2616.0,
+            "87": 3492.0,
+            "88": 3130.0,
+            "89": 3020.0,
+            "90": 3238.0,
+            "91": 3106.0,
+            "92": 3183.0,
+            "93": 2960.0,
+            "94": 3492.0,
+            "95": 3112.0,
+            "96": 3256.0,
+            "97": 3055.0,
+            "98": 3558.0,
+            "99": 3196.0,
+            "100": 3109.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 921653248.0,
+            "2": 921653248.0,
+            "3": 921653248.0,
+            "4": 921653248.0,
+            "5": 921653248.0,
+            "6": 921653248.0,
+            "7": 921653248.0,
+            "8": 921653248.0,
+            "9": 921653248.0,
+            "10": 921653248.0,
+            "11": 921653248.0,
+            "12": 921653248.0,
+            "13": 921653248.0,
+            "14": 921653248.0,
+            "15": 921653248.0,
+            "16": 921653248.0,
+            "17": 921653248.0,
+            "18": 921653248.0,
+            "19": 921653248.0,
+            "20": 921653248.0,
+            "21": 921653248.0,
+            "22": 921653248.0,
+            "23": 921653248.0,
+            "24": 921653248.0,
+            "25": 921653248.0,
+            "26": 921653248.0,
+            "27": 921653248.0,
+            "28": 921653248.0,
+            "29": 921653248.0,
+            "30": 921653248.0,
+            "31": 921653248.0,
+            "32": 921653248.0,
+            "33": 921653248.0,
+            "34": 921653248.0,
+            "35": 921653248.0,
+            "36": 921653248.0,
+            "37": 921653248.0,
+            "38": 921653248.0,
+            "39": 921653248.0,
+            "40": 921653248.0,
+            "41": 921653248.0,
+            "42": 921653248.0,
+            "43": 921653248.0,
+            "44": 921653248.0,
+            "45": 921653248.0,
+            "46": 921653248.0,
+            "47": 921653248.0,
+            "48": 921653248.0,
+            "49": 921653248.0,
+            "50": 921653248.0,
+            "51": 921653248.0,
+            "52": 921653248.0,
+            "53": 921653248.0,
+            "54": 921653248.0,
+            "55": 921653248.0,
+            "56": 921653248.0,
+            "57": 921653248.0,
+            "58": 921653248.0,
+            "59": 921653248.0,
+            "60": 921653248.0,
+            "61": 921653248.0,
+            "62": 921653248.0,
+            "63": 921653248.0,
+            "64": 921653248.0,
+            "65": 921653248.0,
+            "66": 921653248.0,
+            "67": 921653248.0,
+            "68": 921653248.0,
+            "69": 921653248.0,
+            "70": 921653248.0,
+            "71": 921653248.0,
+            "72": 921653248.0,
+            "73": 921653248.0,
+            "74": 921653248.0,
+            "75": 921653248.0,
+            "76": 921653248.0,
+            "77": 921653248.0,
+            "78": 921653248.0,
+            "79": 921653248.0,
+            "80": 921653248.0,
+            "81": 921653248.0,
+            "82": 921653248.0,
+            "83": 921653248.0,
+            "84": 921653248.0,
+            "85": 921653248.0,
+            "86": 921653248.0,
+            "87": 921653248.0,
+            "88": 921653248.0,
+            "89": 921653248.0,
+            "90": 921653248.0,
+            "91": 921653248.0,
+            "92": 921653248.0,
+            "93": 921653248.0,
+            "94": 921653248.0,
+            "95": 921653248.0,
+            "96": 921653248.0,
+            "97": 921653248.0,
+            "98": 921653248.0,
+            "99": 921653248.0,
+            "100": 921653248.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2240868352.0,
+            "2": 2603480064.0,
+            "3": 2603480064.0,
+            "4": 2603480064.0,
+            "5": 2603480064.0,
+            "6": 2603480064.0,
+            "7": 2603480064.0,
+            "8": 2603480064.0,
+            "9": 2603480064.0,
+            "10": 2603480064.0,
+            "11": 2603480064.0,
+            "12": 2603480064.0,
+            "13": 2603480064.0,
+            "14": 2603480064.0,
+            "15": 2603480064.0,
+            "16": 2603480064.0,
+            "17": 2603480064.0,
+            "18": 2603480064.0,
+            "19": 2603480064.0,
+            "20": 2603480064.0,
+            "21": 2603480064.0,
+            "22": 2603480064.0,
+            "23": 2603480064.0,
+            "24": 2603480064.0,
+            "25": 2603480064.0,
+            "26": 2603480064.0,
+            "27": 2603480064.0,
+            "28": 2603480064.0,
+            "29": 2603480064.0,
+            "30": 2603480064.0,
+            "31": 2603480064.0,
+            "32": 2603480064.0,
+            "33": 2603480064.0,
+            "34": 2603480064.0,
+            "35": 2603480064.0,
+            "36": 2603480064.0,
+            "37": 2603480064.0,
+            "38": 2603480064.0,
+            "39": 2603480064.0,
+            "40": 2603480064.0,
+            "41": 2603480064.0,
+            "42": 2603480064.0,
+            "43": 2603480064.0,
+            "44": 2603480064.0,
+            "45": 2603480064.0,
+            "46": 2603480064.0,
+            "47": 2603480064.0,
+            "48": 2603480064.0,
+            "49": 2603480064.0,
+            "50": 2603480064.0,
+            "51": 2603480064.0,
+            "52": 2603480064.0,
+            "53": 2603480064.0,
+            "54": 2603480064.0,
+            "55": 2603480064.0,
+            "56": 2603480064.0,
+            "57": 2603480064.0,
+            "58": 2603480064.0,
+            "59": 2603480064.0,
+            "60": 2603480064.0,
+            "61": 2603480064.0,
+            "62": 2603480064.0,
+            "63": 2603480064.0,
+            "64": 2603480064.0,
+            "65": 2603480064.0,
+            "66": 2603480064.0,
+            "67": 2603480064.0,
+            "68": 2603480064.0,
+            "69": 2603480064.0,
+            "70": 2603480064.0,
+            "71": 2603480064.0,
+            "72": 2603480064.0,
+            "73": 2603480064.0,
+            "74": 2603480064.0,
+            "75": 2603480064.0,
+            "76": 2603480064.0,
+            "77": 2603480064.0,
+            "78": 2603480064.0,
+            "79": 2603480064.0,
+            "80": 2603480064.0,
+            "81": 2603480064.0,
+            "82": 2603480064.0,
+            "83": 2603480064.0,
+            "84": 2603480064.0,
+            "85": 2603480064.0,
+            "86": 2603480064.0,
+            "87": 2603480064.0,
+            "88": 2603480064.0,
+            "89": 2603480064.0,
+            "90": 2603480064.0,
+            "91": 2603480064.0,
+            "92": 2603480064.0,
+            "93": 2603480064.0,
+            "94": 2603480064.0,
+            "95": 2603480064.0,
+            "96": 2603480064.0,
+            "97": 2603480064.0,
+            "98": 2603480064.0,
+            "99": 2603480064.0,
+            "100": 2603480064.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 8.53967,
+            "2": 0.14008,
+            "3": 0.1043,
+            "4": 0.16652,
+            "5": 0.10343,
+            "6": 0.10275,
+            "7": 0.10316,
+            "8": 0.10367,
+            "9": 0.10405,
+            "10": 0.10359,
+            "11": 0.09939,
+            "12": 0.09913,
+            "13": 0.09947,
+            "14": 0.09988,
+            "15": 0.10308,
+            "16": 0.0992,
+            "17": 0.10106,
+            "18": 0.0992,
+            "19": 0.09921,
+            "20": 0.1056,
+            "21": 0.10004,
+            "22": 0.10135,
+            "23": 0.1021,
+            "24": 0.10492,
+            "25": 0.09982,
+            "26": 0.10268,
+            "27": 0.10169,
+            "28": 0.1028,
+            "29": 0.10458,
+            "30": 0.10225,
+            "31": 0.09971,
+            "32": 0.09988,
+            "33": 0.10453,
+            "34": 0.10059,
+            "35": 0.10094,
+            "36": 0.1008,
+            "37": 0.10217,
+            "38": 0.10611,
+            "39": 0.10301,
+            "40": 0.10034,
+            "41": 0.09987,
+            "42": 0.09958,
+            "43": 0.10624,
+            "44": 0.09987,
+            "45": 0.09978,
+            "46": 0.09969,
+            "47": 0.10044,
+            "48": 0.10951,
+            "49": 0.10288,
+            "50": 0.10274,
+            "51": 0.10908,
+            "52": 0.10956,
+            "53": 0.10353,
+            "54": 0.10291,
+            "55": 0.09986,
+            "56": 0.10048,
+            "57": 0.10053,
+            "58": 0.10032,
+            "59": 0.09989,
+            "60": 0.09972,
+            "61": 0.09968,
+            "62": 0.09979,
+            "63": 0.10038,
+            "64": 0.09948,
+            "65": 0.10028,
+            "66": 0.0996,
+            "67": 0.10025,
+            "68": 0.09985,
+            "69": 0.1,
+            "70": 0.10176,
+            "71": 0.10036,
+            "72": 0.09961,
+            "73": 0.09996,
+            "74": 0.10022,
+            "75": 0.10121,
+            "76": 0.1012,
+            "77": 0.10049,
+            "78": 0.10212,
+            "79": 0.10036,
+            "80": 0.10284,
+            "81": 0.10151,
+            "82": 0.10433,
+            "83": 0.10034,
+            "84": 0.09991,
+            "85": 0.10037,
+            "86": 0.10005,
+            "87": 0.10117,
+            "88": 0.10004,
+            "89": 0.10192,
+            "90": 0.09956,
+            "91": 0.09987,
+            "92": 0.0995,
+            "93": 0.10044,
+            "94": 0.10249,
+            "95": 0.10315,
+            "96": 0.10488,
+            "97": 0.10312,
+            "98": 0.10392,
+            "99": 0.10217,
+            "100": 0.10295
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..8655a61eb9b
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.85902,
+            "2": 10.87345,
+            "3": 10.86919,
+            "4": 10.87273,
+            "5": 10.87389,
+            "6": 10.89658,
+            "7": 10.86387,
+            "8": 10.87869,
+            "9": 10.87439,
+            "10": 10.83846,
+            "11": 10.87012,
+            "12": 10.86011,
+            "13": 10.87824,
+            "14": 10.87935,
+            "15": 10.8191,
+            "16": 10.83109,
+            "17": 10.78722,
+            "18": 10.80215,
+            "19": 10.7983,
+            "20": 10.71224,
+            "21": 10.68683,
+            "22": 10.55402,
+            "23": 10.70111,
+            "24": 10.58621,
+            "25": 10.52673,
+            "26": 10.5837,
+            "27": 10.59499,
+            "28": 10.54816,
+            "29": 10.55965,
+            "30": 10.32899,
+            "31": 10.08331,
+            "32": 10.44752,
+            "33": 10.4278,
+            "34": 10.1796,
+            "35": 10.24121,
+            "36": 10.18155,
+            "37": 10.32827,
+            "38": 10.16792,
+            "39": 10.37357,
+            "40": 10.05111,
+            "41": 10.10708,
+            "42": 10.17823,
+            "43": 9.77867,
+            "44": 9.91197,
+            "45": 9.77404,
+            "46": 9.75415,
+            "47": 10.09501,
+            "48": 9.79531,
+            "49": 9.46422,
+            "50": 9.86729,
+            "51": 9.80375,
+            "52": 9.68218,
+            "53": 10.02348,
+            "54": 9.91595,
+            "55": 9.82442,
+            "56": 9.56994,
+            "57": 9.42628,
+            "58": 9.78075,
+            "59": 9.53254,
+            "60": 9.44561,
+            "61": 9.64249,
+            "62": 9.94298,
+            "63": 9.31745,
+            "64": 9.7256,
+            "65": 8.88735,
+            "66": 9.65711,
+            "67": 9.31747,
+            "68": 9.73506,
+            "69": 9.74863,
+            "70": 9.69601,
+            "71": 9.57682,
+            "72": 9.52425,
+            "73": 9.4558,
+            "74": 8.8826,
+            "75": 9.37563,
+            "76": 9.01106,
+            "77": 10.02278,
+            "78": 9.6796,
+            "79": 9.33171,
+            "80": 9.35836,
+            "81": 9.43399,
+            "82": 9.65055,
+            "83": 9.2551,
+            "84": 9.37131,
+            "85": 9.56237,
+            "86": 9.0351,
+            "87": 9.54617,
+            "88": 9.69806,
+            "89": 9.54657,
+            "90": 9.77627,
+            "91": 9.28858,
+            "92": 9.30652,
+            "93": 9.02646,
+            "94": 8.7883,
+            "95": 9.48041,
+            "96": 9.47962,
+            "97": 9.25545,
+            "98": 9.61947,
+            "99": 8.83854,
+            "100": 9.35116
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1862.0,
+            "2": 1874.0,
+            "3": 1748.0,
+            "4": 1955.0,
+            "5": 2050.0,
+            "6": 1997.0,
+            "7": 1967.0,
+            "8": 1853.0,
+            "9": 1965.0,
+            "10": 1652.0,
+            "11": 2042.0,
+            "12": 1877.0,
+            "13": 2076.0,
+            "14": 1956.0,
+            "15": 1953.0,
+            "16": 1915.0,
+            "17": 2045.0,
+            "18": 1965.0,
+            "19": 1988.0,
+            "20": 1785.0,
+            "21": 1941.0,
+            "22": 1928.0,
+            "23": 2112.0,
+            "24": 1802.0,
+            "25": 1933.0,
+            "26": 1786.0,
+            "27": 1945.0,
+            "28": 2037.0,
+            "29": 2119.0,
+            "30": 2022.0,
+            "31": 1699.0,
+            "32": 2130.0,
+            "33": 2187.0,
+            "34": 1929.0,
+            "35": 2092.0,
+            "36": 2109.0,
+            "37": 2362.0,
+            "38": 2211.0,
+            "39": 2383.0,
+            "40": 2203.0,
+            "41": 2288.0,
+            "42": 2224.0,
+            "43": 2150.0,
+            "44": 2206.0,
+            "45": 2187.0,
+            "46": 2181.0,
+            "47": 2260.0,
+            "48": 2341.0,
+            "49": 2210.0,
+            "50": 2219.0,
+            "51": 2508.0,
+            "52": 2483.0,
+            "53": 2959.0,
+            "54": 2554.0,
+            "55": 2408.0,
+            "56": 2452.0,
+            "57": 2528.0,
+            "58": 2594.0,
+            "59": 2750.0,
+            "60": 2563.0,
+            "61": 2794.0,
+            "62": 2495.0,
+            "63": 2493.0,
+            "64": 2965.0,
+            "65": 2569.0,
+            "66": 2877.0,
+            "67": 2969.0,
+            "68": 2803.0,
+            "69": 2944.0,
+            "70": 3001.0,
+            "71": 2867.0,
+            "72": 2714.0,
+            "73": 3017.0,
+            "74": 2281.0,
+            "75": 2774.0,
+            "76": 2983.0,
+            "77": 2955.0,
+            "78": 3148.0,
+            "79": 3076.0,
+            "80": 2992.0,
+            "81": 3255.0,
+            "82": 3212.0,
+            "83": 2809.0,
+            "84": 3266.0,
+            "85": 3188.0,
+            "86": 2616.0,
+            "87": 3492.0,
+            "88": 3130.0,
+            "89": 3020.0,
+            "90": 3238.0,
+            "91": 3106.0,
+            "92": 3183.0,
+            "93": 2960.0,
+            "94": 3492.0,
+            "95": 3112.0,
+            "96": 3256.0,
+            "97": 3055.0,
+            "98": 3558.0,
+            "99": 3196.0,
+            "100": 3109.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 921653248.0,
+            "2": 921653248.0,
+            "3": 921653248.0,
+            "4": 921653248.0,
+            "5": 921653248.0,
+            "6": 921653248.0,
+            "7": 921653248.0,
+            "8": 921653248.0,
+            "9": 921653248.0,
+            "10": 921653248.0,
+            "11": 921653248.0,
+            "12": 921653248.0,
+            "13": 921653248.0,
+            "14": 921653248.0,
+            "15": 921653248.0,
+            "16": 921653248.0,
+            "17": 921653248.0,
+            "18": 921653248.0,
+            "19": 921653248.0,
+            "20": 921653248.0,
+            "21": 921653248.0,
+            "22": 921653248.0,
+            "23": 921653248.0,
+            "24": 921653248.0,
+            "25": 921653248.0,
+            "26": 921653248.0,
+            "27": 921653248.0,
+            "28": 921653248.0,
+            "29": 921653248.0,
+            "30": 921653248.0,
+            "31": 921653248.0,
+            "32": 921653248.0,
+            "33": 921653248.0,
+            "34": 921653248.0,
+            "35": 921653248.0,
+            "36": 921653248.0,
+            "37": 921653248.0,
+            "38": 921653248.0,
+            "39": 921653248.0,
+            "40": 921653248.0,
+            "41": 921653248.0,
+            "42": 921653248.0,
+            "43": 921653248.0,
+            "44": 921653248.0,
+            "45": 921653248.0,
+            "46": 921653248.0,
+            "47": 921653248.0,
+            "48": 921653248.0,
+            "49": 921653248.0,
+            "50": 921653248.0,
+            "51": 921653248.0,
+            "52": 921653248.0,
+            "53": 921653248.0,
+            "54": 921653248.0,
+            "55": 921653248.0,
+            "56": 921653248.0,
+            "57": 921653248.0,
+            "58": 921653248.0,
+            "59": 921653248.0,
+            "60": 921653248.0,
+            "61": 921653248.0,
+            "62": 921653248.0,
+            "63": 921653248.0,
+            "64": 921653248.0,
+            "65": 921653248.0,
+            "66": 921653248.0,
+            "67": 921653248.0,
+            "68": 921653248.0,
+            "69": 921653248.0,
+            "70": 921653248.0,
+            "71": 921653248.0,
+            "72": 921653248.0,
+            "73": 921653248.0,
+            "74": 921653248.0,
+            "75": 921653248.0,
+            "76": 921653248.0,
+            "77": 921653248.0,
+            "78": 921653248.0,
+            "79": 921653248.0,
+            "80": 921653248.0,
+            "81": 921653248.0,
+            "82": 921653248.0,
+            "83": 921653248.0,
+            "84": 921653248.0,
+            "85": 921653248.0,
+            "86": 921653248.0,
+            "87": 921653248.0,
+            "88": 921653248.0,
+            "89": 921653248.0,
+            "90": 921653248.0,
+            "91": 921653248.0,
+            "92": 921653248.0,
+            "93": 921653248.0,
+            "94": 921653248.0,
+            "95": 921653248.0,
+            "96": 921653248.0,
+            "97": 921653248.0,
+            "98": 921653248.0,
+            "99": 921653248.0,
+            "100": 921653248.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2240868352.0,
+            "2": 2603480064.0,
+            "3": 2603480064.0,
+            "4": 2603480064.0,
+            "5": 2603480064.0,
+            "6": 2603480064.0,
+            "7": 2603480064.0,
+            "8": 2603480064.0,
+            "9": 2603480064.0,
+            "10": 2603480064.0,
+            "11": 2603480064.0,
+            "12": 2603480064.0,
+            "13": 2603480064.0,
+            "14": 2603480064.0,
+            "15": 2603480064.0,
+            "16": 2603480064.0,
+            "17": 2603480064.0,
+            "18": 2603480064.0,
+            "19": 2603480064.0,
+            "20": 2603480064.0,
+            "21": 2603480064.0,
+            "22": 2603480064.0,
+            "23": 2603480064.0,
+            "24": 2603480064.0,
+            "25": 2603480064.0,
+            "26": 2603480064.0,
+            "27": 2603480064.0,
+            "28": 2603480064.0,
+            "29": 2603480064.0,
+            "30": 2603480064.0,
+            "31": 2603480064.0,
+            "32": 2603480064.0,
+            "33": 2603480064.0,
+            "34": 2603480064.0,
+            "35": 2603480064.0,
+            "36": 2603480064.0,
+            "37": 2603480064.0,
+            "38": 2603480064.0,
+            "39": 2603480064.0,
+            "40": 2603480064.0,
+            "41": 2603480064.0,
+            "42": 2603480064.0,
+            "43": 2603480064.0,
+            "44": 2603480064.0,
+            "45": 2603480064.0,
+            "46": 2603480064.0,
+            "47": 2603480064.0,
+            "48": 2603480064.0,
+            "49": 2603480064.0,
+            "50": 2603480064.0,
+            "51": 2603480064.0,
+            "52": 2603480064.0,
+            "53": 2603480064.0,
+            "54": 2603480064.0,
+            "55": 2603480064.0,
+            "56": 2603480064.0,
+            "57": 2603480064.0,
+            "58": 2603480064.0,
+            "59": 2603480064.0,
+            "60": 2603480064.0,
+            "61": 2603480064.0,
+            "62": 2603480064.0,
+            "63": 2603480064.0,
+            "64": 2603480064.0,
+            "65": 2603480064.0,
+            "66": 2603480064.0,
+            "67": 2603480064.0,
+            "68": 2603480064.0,
+            "69": 2603480064.0,
+            "70": 2603480064.0,
+            "71": 2603480064.0,
+            "72": 2603480064.0,
+            "73": 2603480064.0,
+            "74": 2603480064.0,
+            "75": 2603480064.0,
+            "76": 2603480064.0,
+            "77": 2603480064.0,
+            "78": 2603480064.0,
+            "79": 2603480064.0,
+            "80": 2603480064.0,
+            "81": 2603480064.0,
+            "82": 2603480064.0,
+            "83": 2603480064.0,
+            "84": 2603480064.0,
+            "85": 2603480064.0,
+            "86": 2603480064.0,
+            "87": 2603480064.0,
+            "88": 2603480064.0,
+            "89": 2603480064.0,
+            "90": 2603480064.0,
+            "91": 2603480064.0,
+            "92": 2603480064.0,
+            "93": 2603480064.0,
+            "94": 2603480064.0,
+            "95": 2603480064.0,
+            "96": 2603480064.0,
+            "97": 2603480064.0,
+            "98": 2603480064.0,
+            "99": 2603480064.0,
+            "100": 2603480064.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.33977,
+            "2": 0.14663,
+            "3": 0.12463,
+            "4": 0.11901,
+            "5": 0.118,
+            "6": 0.11842,
+            "7": 0.11849,
+            "8": 0.11649,
+            "9": 0.11703,
+            "10": 0.11655,
+            "11": 0.11646,
+            "12": 0.11802,
+            "13": 0.11742,
+            "14": 0.1167,
+            "15": 0.11429,
+            "16": 0.11654,
+            "17": 0.11533,
+            "18": 0.11853,
+            "19": 0.1171,
+            "20": 0.11735,
+            "21": 0.11515,
+            "22": 0.11632,
+            "23": 0.11865,
+            "24": 0.11706,
+            "25": 0.11644,
+            "26": 0.11684,
+            "27": 0.11688,
+            "28": 0.11839,
+            "29": 0.11706,
+            "30": 0.11761,
+            "31": 0.11696,
+            "32": 0.11567,
+            "33": 0.1149,
+            "34": 0.11395,
+            "35": 0.11367,
+            "36": 0.11567,
+            "37": 0.11646,
+            "38": 0.11392,
+            "39": 0.11516,
+            "40": 0.11529,
+            "41": 0.11559,
+            "42": 0.11519,
+            "43": 0.11808,
+            "44": 0.11599,
+            "45": 0.11605,
+            "46": 0.11502,
+            "47": 0.11651,
+            "48": 0.11713,
+            "49": 0.11667,
+            "50": 0.11432,
+            "51": 0.12857,
+            "52": 0.12187,
+            "53": 0.11684,
+            "54": 0.11222,
+            "55": 0.11538,
+            "56": 0.11241,
+            "57": 0.11229,
+            "58": 0.11087,
+            "59": 0.11183,
+            "60": 0.11124,
+            "61": 0.11009,
+            "62": 0.11052,
+            "63": 0.11585,
+            "64": 0.11262,
+            "65": 0.11148,
+            "66": 0.11248,
+            "67": 0.11274,
+            "68": 0.11394,
+            "69": 0.11397,
+            "70": 0.11233,
+            "71": 0.11354,
+            "72": 0.11589,
+            "73": 0.11373,
+            "74": 0.11483,
+            "75": 0.11512,
+            "76": 0.11378,
+            "77": 0.11431,
+            "78": 0.11374,
+            "79": 0.11521,
+            "80": 0.11486,
+            "81": 0.11364,
+            "82": 0.11419,
+            "83": 0.11439,
+            "84": 0.11589,
+            "85": 0.11422,
+            "86": 0.11458,
+            "87": 0.11184,
+            "88": 0.11418,
+            "89": 0.11264,
+            "90": 0.11169,
+            "91": 0.11452,
+            "92": 0.11215,
+            "93": 0.11431,
+            "94": 0.11145,
+            "95": 0.11129,
+            "96": 0.11113,
+            "97": 0.11365,
+            "98": 0.11127,
+            "99": 0.11136,
+            "100": 0.11229
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
new file mode 100644
index 00000000000..11db16901fd
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.84474,
+            "2": 10.84714,
+            "3": 10.84155,
+            "4": 10.82474,
+            "5": 10.86418,
+            "6": 10.87687,
+            "7": 10.86881,
+            "8": 10.85782,
+            "9": 10.86927,
+            "10": 10.82155,
+            "11": 10.90254,
+            "12": 10.87935,
+            "13": 10.88455,
+            "14": 10.89946,
+            "15": 10.81195,
+            "16": 10.81872,
+            "17": 10.8008,
+            "18": 10.82581,
+            "19": 10.82045,
+            "20": 10.71872,
+            "21": 10.67848,
+            "22": 10.5397,
+            "23": 10.71982,
+            "24": 10.57533,
+            "25": 10.53036,
+            "26": 10.60075,
+            "27": 10.61432,
+            "28": 10.57308,
+            "29": 10.58758,
+            "30": 10.3358,
+            "31": 10.06363,
+            "32": 10.46475,
+            "33": 10.43552,
+            "34": 10.17388,
+            "35": 10.24081,
+            "36": 10.19268,
+            "37": 10.3222,
+            "38": 10.15004,
+            "39": 10.37797,
+            "40": 10.05008,
+            "41": 10.11342,
+            "42": 10.17323,
+            "43": 9.76225,
+            "44": 9.89234,
+            "45": 9.76762,
+            "46": 9.75986,
+            "47": 10.09534,
+            "48": 9.78722,
+            "49": 9.45529,
+            "50": 9.85505,
+            "51": 9.79116,
+            "52": 9.68704,
+            "53": 10.02199,
+            "54": 9.90262,
+            "55": 9.82465,
+            "56": 9.56989,
+            "57": 9.40892,
+            "58": 9.77732,
+            "59": 9.52733,
+            "60": 9.44306,
+            "61": 9.64215,
+            "62": 9.94224,
+            "63": 9.31031,
+            "64": 9.72428,
+            "65": 8.89104,
+            "66": 9.65351,
+            "67": 9.31775,
+            "68": 9.73884,
+            "69": 9.7436,
+            "70": 9.67902,
+            "71": 9.56185,
+            "72": 9.53074,
+            "73": 9.44621,
+            "74": 8.88449,
+            "75": 9.36836,
+            "76": 9.02423,
+            "77": 10.0162,
+            "78": 9.68193,
+            "79": 9.327,
+            "80": 9.35799,
+            "81": 9.43376,
+            "82": 9.64749,
+            "83": 9.25646,
+            "84": 9.3666,
+            "85": 9.56032,
+            "86": 9.0356,
+            "87": 9.54626,
+            "88": 9.70003,
+            "89": 9.54986,
+            "90": 9.77055,
+            "91": 9.28744,
+            "92": 9.31156,
+            "93": 9.03212,
+            "94": 8.78135,
+            "95": 9.48101,
+            "96": 9.47679,
+            "97": 9.24913,
+            "98": 9.61711,
+            "99": 8.83684,
+            "100": 9.34997
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1776.0,
+            "2": 1837.0,
+            "3": 1749.0,
+            "4": 1902.0,
+            "5": 2128.0,
+            "6": 2161.0,
+            "7": 1990.0,
+            "8": 1860.0,
+            "9": 1953.0,
+            "10": 1615.0,
+            "11": 2052.0,
+            "12": 1809.0,
+            "13": 2136.0,
+            "14": 1966.0,
+            "15": 2021.0,
+            "16": 1892.0,
+            "17": 1945.0,
+            "18": 1826.0,
+            "19": 1858.0,
+            "20": 1775.0,
+            "21": 1971.0,
+            "22": 1818.0,
+            "23": 2137.0,
+            "24": 1842.0,
+            "25": 1916.0,
+            "26": 1946.0,
+            "27": 1940.0,
+            "28": 2046.0,
+            "29": 2000.0,
+            "30": 2029.0,
+            "31": 1701.0,
+            "32": 2056.0,
+            "33": 2208.0,
+            "34": 2024.0,
+            "35": 2107.0,
+            "36": 1985.0,
+            "37": 2243.0,
+            "38": 2228.0,
+            "39": 2433.0,
+            "40": 2174.0,
+            "41": 2295.0,
+            "42": 2262.0,
+            "43": 2097.0,
+            "44": 2291.0,
+            "45": 2110.0,
+            "46": 2293.0,
+            "47": 2553.0,
+            "48": 2368.0,
+            "49": 2280.0,
+            "50": 2363.0,
+            "51": 2596.0,
+            "52": 2582.0,
+            "53": 2816.0,
+            "54": 2729.0,
+            "55": 2460.0,
+            "56": 2735.0,
+            "57": 2451.0,
+            "58": 2746.0,
+            "59": 2848.0,
+            "60": 2462.0,
+            "61": 2890.0,
+            "62": 2565.0,
+            "63": 2520.0,
+            "64": 2932.0,
+            "65": 2724.0,
+            "66": 3014.0,
+            "67": 2958.0,
+            "68": 2847.0,
+            "69": 2937.0,
+            "70": 2952.0,
+            "71": 2954.0,
+            "72": 2617.0,
+            "73": 3068.0,
+            "74": 2239.0,
+            "75": 2823.0,
+            "76": 3073.0,
+            "77": 3109.0,
+            "78": 3263.0,
+            "79": 3254.0,
+            "80": 3222.0,
+            "81": 3475.0,
+            "82": 3277.0,
+            "83": 2732.0,
+            "84": 3393.0,
+            "85": 3314.0,
+            "86": 2674.0,
+            "87": 3433.0,
+            "88": 3250.0,
+            "89": 3089.0,
+            "90": 3087.0,
+            "91": 3070.0,
+            "92": 3358.0,
+            "93": 2823.0,
+            "94": 3442.0,
+            "95": 3146.0,
+            "96": 3256.0,
+            "97": 3086.0,
+            "98": 3563.0,
+            "99": 3247.0,
+            "100": 3331.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 888098304.0,
+            "2": 888098304.0,
+            "3": 888098304.0,
+            "4": 888098304.0,
+            "5": 888098304.0,
+            "6": 888098304.0,
+            "7": 888098304.0,
+            "8": 888098304.0,
+            "9": 888098304.0,
+            "10": 888098304.0,
+            "11": 888098304.0,
+            "12": 888098304.0,
+            "13": 888098304.0,
+            "14": 888098304.0,
+            "15": 888098304.0,
+            "16": 888098304.0,
+            "17": 888098304.0,
+            "18": 888098304.0,
+            "19": 888098304.0,
+            "20": 888098304.0,
+            "21": 888098304.0,
+            "22": 888098304.0,
+            "23": 888098304.0,
+            "24": 888098304.0,
+            "25": 888098304.0,
+            "26": 888098304.0,
+            "27": 888098304.0,
+            "28": 888098304.0,
+            "29": 888098304.0,
+            "30": 888098304.0,
+            "31": 888098304.0,
+            "32": 888098304.0,
+            "33": 888098304.0,
+            "34": 888098304.0,
+            "35": 888098304.0,
+            "36": 888098304.0,
+            "37": 888098304.0,
+            "38": 888098304.0,
+            "39": 888098304.0,
+            "40": 888098304.0,
+            "41": 888098304.0,
+            "42": 888098304.0,
+            "43": 888098304.0,
+            "44": 888098304.0,
+            "45": 888098304.0,
+            "46": 888098304.0,
+            "47": 888098304.0,
+            "48": 888098304.0,
+            "49": 888098304.0,
+            "50": 888098304.0,
+            "51": 888098304.0,
+            "52": 888098304.0,
+            "53": 888098304.0,
+            "54": 888098304.0,
+            "55": 888098304.0,
+            "56": 888098304.0,
+            "57": 888098304.0,
+            "58": 888098304.0,
+            "59": 888098304.0,
+            "60": 888098304.0,
+            "61": 888098304.0,
+            "62": 888098304.0,
+            "63": 888098304.0,
+            "64": 888098304.0,
+            "65": 888098304.0,
+            "66": 888098304.0,
+            "67": 888098304.0,
+            "68": 888098304.0,
+            "69": 888098304.0,
+            "70": 888098304.0,
+            "71": 888098304.0,
+            "72": 888098304.0,
+            "73": 888098304.0,
+            "74": 888098304.0,
+            "75": 888098304.0,
+            "76": 888098304.0,
+            "77": 888098304.0,
+            "78": 888098304.0,
+            "79": 888098304.0,
+            "80": 888098304.0,
+            "81": 888098304.0,
+            "82": 888098304.0,
+            "83": 888098304.0,
+            "84": 888098304.0,
+            "85": 888098304.0,
+            "86": 888098304.0,
+            "87": 888098304.0,
+            "88": 888098304.0,
+            "89": 888098304.0,
+            "90": 888098304.0,
+            "91": 888098304.0,
+            "92": 888098304.0,
+            "93": 888098304.0,
+            "94": 888098304.0,
+            "95": 888098304.0,
+            "96": 888098304.0,
+            "97": 888098304.0,
+            "98": 888098304.0,
+            "99": 888098304.0,
+            "100": 888098304.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 3216302592.0,
+            "2": 3575768576.0,
+            "3": 3575768576.0,
+            "4": 3575768576.0,
+            "5": 3575768576.0,
+            "6": 3575768576.0,
+            "7": 3575768576.0,
+            "8": 3575768576.0,
+            "9": 3575768576.0,
+            "10": 3575768576.0,
+            "11": 3575768576.0,
+            "12": 3575768576.0,
+            "13": 3575768576.0,
+            "14": 3575768576.0,
+            "15": 3575768576.0,
+            "16": 3575768576.0,
+            "17": 3575768576.0,
+            "18": 3575768576.0,
+            "19": 3575768576.0,
+            "20": 3575768576.0,
+            "21": 3575768576.0,
+            "22": 3575768576.0,
+            "23": 3575768576.0,
+            "24": 3575768576.0,
+            "25": 3575768576.0,
+            "26": 3575768576.0,
+            "27": 3575768576.0,
+            "28": 3575768576.0,
+            "29": 3575768576.0,
+            "30": 3575768576.0,
+            "31": 3575768576.0,
+            "32": 3575768576.0,
+            "33": 3575768576.0,
+            "34": 3575768576.0,
+            "35": 3575768576.0,
+            "36": 3575768576.0,
+            "37": 3575768576.0,
+            "38": 3575768576.0,
+            "39": 3575768576.0,
+            "40": 3575768576.0,
+            "41": 3575768576.0,
+            "42": 3575768576.0,
+            "43": 3575768576.0,
+            "44": 3575768576.0,
+            "45": 3575768576.0,
+            "46": 3575768576.0,
+            "47": 3575768576.0,
+            "48": 3575768576.0,
+            "49": 3575768576.0,
+            "50": 3575768576.0,
+            "51": 3575768576.0,
+            "52": 3575768576.0,
+            "53": 3575768576.0,
+            "54": 3575768576.0,
+            "55": 3575768576.0,
+            "56": 3575768576.0,
+            "57": 3575768576.0,
+            "58": 3575768576.0,
+            "59": 3575768576.0,
+            "60": 3575768576.0,
+            "61": 3575768576.0,
+            "62": 3575768576.0,
+            "63": 3575768576.0,
+            "64": 3575768576.0,
+            "65": 3575768576.0,
+            "66": 3575768576.0,
+            "67": 3575768576.0,
+            "68": 3575768576.0,
+            "69": 3575768576.0,
+            "70": 3575768576.0,
+            "71": 3575768576.0,
+            "72": 3575768576.0,
+            "73": 3575768576.0,
+            "74": 3575768576.0,
+            "75": 3575768576.0,
+            "76": 3575768576.0,
+            "77": 3575768576.0,
+            "78": 3575768576.0,
+            "79": 3575768576.0,
+            "80": 3575768576.0,
+            "81": 3575768576.0,
+            "82": 3575768576.0,
+            "83": 3575768576.0,
+            "84": 3575768576.0,
+            "85": 3575768576.0,
+            "86": 3575768576.0,
+            "87": 3575768576.0,
+            "88": 3575768576.0,
+            "89": 3575768576.0,
+            "90": 3575768576.0,
+            "91": 3575768576.0,
+            "92": 3575768576.0,
+            "93": 3575768576.0,
+            "94": 3575768576.0,
+            "95": 3575768576.0,
+            "96": 3575768576.0,
+            "97": 3575768576.0,
+            "98": 3575768576.0,
+            "99": 3575768576.0,
+            "100": 3575768576.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 11.22961,
+            "2": 0.22748,
+            "3": 0.18391,
+            "4": 0.18331,
+            "5": 0.1874,
+            "6": 0.18206,
+            "7": 0.18807,
+            "8": 0.18736,
+            "9": 0.17626,
+            "10": 0.18332,
+            "11": 0.18368,
+            "12": 0.42125,
+            "13": 0.18444,
+            "14": 0.18305,
+            "15": 0.1848,
+            "16": 0.18368,
+            "17": 0.18426,
+            "18": 0.18316,
+            "19": 0.18444,
+            "20": 0.18426,
+            "21": 0.18455,
+            "22": 0.18314,
+            "23": 0.18337,
+            "24": 0.18472,
+            "25": 0.18337,
+            "26": 0.18358,
+            "27": 0.18264,
+            "28": 0.18257,
+            "29": 0.18324,
+            "30": 0.18335,
+            "31": 0.18284,
+            "32": 0.18259,
+            "33": 0.18301,
+            "34": 0.18387,
+            "35": 0.1854,
+            "36": 0.18356,
+            "37": 0.18347,
+            "38": 0.18279,
+            "39": 0.18388,
+            "40": 0.18293,
+            "41": 0.1825,
+            "42": 0.17397,
+            "43": 0.17567,
+            "44": 0.17489,
+            "45": 0.17541,
+            "46": 0.17602,
+            "47": 0.38172,
+            "48": 0.1751,
+            "49": 0.1743,
+            "50": 0.17335,
+            "51": 0.17566,
+            "52": 0.1679,
+            "53": 0.16794,
+            "54": 0.16866,
+            "55": 0.16905,
+            "56": 0.16842,
+            "57": 0.16848,
+            "58": 0.16761,
+            "59": 0.16753,
+            "60": 0.16801,
+            "61": 0.16865,
+            "62": 0.16798,
+            "63": 0.16843,
+            "64": 0.16707,
+            "65": 0.16694,
+            "66": 0.16951,
+            "67": 0.16784,
+            "68": 0.16521,
+            "69": 0.16496,
+            "70": 0.16411,
+            "71": 0.16368,
+            "72": 0.16388,
+            "73": 0.16443,
+            "74": 0.16404,
+            "75": 0.16491,
+            "76": 0.16453,
+            "77": 0.16357,
+            "78": 0.1639,
+            "79": 0.16482,
+            "80": 0.1642,
+            "81": 0.17333,
+            "82": 0.17353,
+            "83": 0.17251,
+            "84": 0.17307,
+            "85": 0.17382,
+            "86": 0.17698,
+            "87": 0.18538,
+            "88": 0.18078,
+            "89": 0.17207,
+            "90": 0.17225,
+            "91": 0.17489,
+            "92": 0.17401,
+            "93": 0.17299,
+            "94": 0.17352,
+            "95": 0.17399,
+            "96": 0.1736,
+            "97": 0.17413,
+            "98": 0.17369,
+            "99": 0.17278,
+            "100": 0.17242
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
new file mode 100644
index 00000000000..9af18296737
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.84474,
+            "2": 10.84714,
+            "3": 10.84155,
+            "4": 10.82474,
+            "5": 10.86418,
+            "6": 10.87687,
+            "7": 10.86881,
+            "8": 10.85782,
+            "9": 10.86927,
+            "10": 10.82155,
+            "11": 10.90254,
+            "12": 10.87935,
+            "13": 10.88455,
+            "14": 10.89946,
+            "15": 10.81195,
+            "16": 10.81872,
+            "17": 10.8008,
+            "18": 10.82581,
+            "19": 10.82045,
+            "20": 10.71872,
+            "21": 10.67848,
+            "22": 10.5397,
+            "23": 10.71982,
+            "24": 10.57533,
+            "25": 10.53036,
+            "26": 10.60075,
+            "27": 10.61432,
+            "28": 10.57308,
+            "29": 10.58758,
+            "30": 10.3358,
+            "31": 10.06363,
+            "32": 10.46475,
+            "33": 10.43552,
+            "34": 10.17388,
+            "35": 10.24081,
+            "36": 10.19268,
+            "37": 10.3222,
+            "38": 10.15004,
+            "39": 10.37797,
+            "40": 10.05008,
+            "41": 10.11342,
+            "42": 10.17323,
+            "43": 9.76225,
+            "44": 9.89234,
+            "45": 9.76762,
+            "46": 9.75986,
+            "47": 10.09534,
+            "48": 9.78722,
+            "49": 9.45529,
+            "50": 9.85505,
+            "51": 9.79116,
+            "52": 9.68704,
+            "53": 10.02199,
+            "54": 9.90262,
+            "55": 9.82465,
+            "56": 9.56989,
+            "57": 9.40892,
+            "58": 9.77732,
+            "59": 9.52733,
+            "60": 9.44306,
+            "61": 9.64215,
+            "62": 9.94224,
+            "63": 9.31031,
+            "64": 9.72428,
+            "65": 8.89104,
+            "66": 9.65351,
+            "67": 9.31775,
+            "68": 9.73884,
+            "69": 9.7436,
+            "70": 9.67902,
+            "71": 9.56185,
+            "72": 9.53074,
+            "73": 9.44621,
+            "74": 8.88449,
+            "75": 9.36836,
+            "76": 9.02423,
+            "77": 10.0162,
+            "78": 9.68193,
+            "79": 9.327,
+            "80": 9.35799,
+            "81": 9.43376,
+            "82": 9.64749,
+            "83": 9.25646,
+            "84": 9.3666,
+            "85": 9.56032,
+            "86": 9.0356,
+            "87": 9.54626,
+            "88": 9.70003,
+            "89": 9.54986,
+            "90": 9.77055,
+            "91": 9.28744,
+            "92": 9.31156,
+            "93": 9.03212,
+            "94": 8.78135,
+            "95": 9.48101,
+            "96": 9.47679,
+            "97": 9.24913,
+            "98": 9.61711,
+            "99": 8.83684,
+            "100": 9.34997
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1776.0,
+            "2": 1837.0,
+            "3": 1749.0,
+            "4": 1902.0,
+            "5": 2128.0,
+            "6": 2161.0,
+            "7": 1990.0,
+            "8": 1860.0,
+            "9": 1953.0,
+            "10": 1615.0,
+            "11": 2052.0,
+            "12": 1809.0,
+            "13": 2136.0,
+            "14": 1966.0,
+            "15": 2021.0,
+            "16": 1892.0,
+            "17": 1945.0,
+            "18": 1826.0,
+            "19": 1858.0,
+            "20": 1775.0,
+            "21": 1971.0,
+            "22": 1818.0,
+            "23": 2137.0,
+            "24": 1842.0,
+            "25": 1916.0,
+            "26": 1946.0,
+            "27": 1940.0,
+            "28": 2046.0,
+            "29": 2000.0,
+            "30": 2029.0,
+            "31": 1701.0,
+            "32": 2056.0,
+            "33": 2208.0,
+            "34": 2024.0,
+            "35": 2107.0,
+            "36": 1985.0,
+            "37": 2243.0,
+            "38": 2228.0,
+            "39": 2433.0,
+            "40": 2174.0,
+            "41": 2295.0,
+            "42": 2262.0,
+            "43": 2097.0,
+            "44": 2291.0,
+            "45": 2110.0,
+            "46": 2293.0,
+            "47": 2553.0,
+            "48": 2368.0,
+            "49": 2280.0,
+            "50": 2363.0,
+            "51": 2596.0,
+            "52": 2582.0,
+            "53": 2816.0,
+            "54": 2729.0,
+            "55": 2460.0,
+            "56": 2735.0,
+            "57": 2451.0,
+            "58": 2746.0,
+            "59": 2848.0,
+            "60": 2462.0,
+            "61": 2890.0,
+            "62": 2565.0,
+            "63": 2520.0,
+            "64": 2932.0,
+            "65": 2724.0,
+            "66": 3014.0,
+            "67": 2958.0,
+            "68": 2847.0,
+            "69": 2937.0,
+            "70": 2952.0,
+            "71": 2954.0,
+            "72": 2617.0,
+            "73": 3068.0,
+            "74": 2239.0,
+            "75": 2823.0,
+            "76": 3073.0,
+            "77": 3109.0,
+            "78": 3263.0,
+            "79": 3254.0,
+            "80": 3222.0,
+            "81": 3475.0,
+            "82": 3277.0,
+            "83": 2732.0,
+            "84": 3393.0,
+            "85": 3314.0,
+            "86": 2674.0,
+            "87": 3433.0,
+            "88": 3250.0,
+            "89": 3089.0,
+            "90": 3087.0,
+            "91": 3070.0,
+            "92": 3358.0,
+            "93": 2823.0,
+            "94": 3442.0,
+            "95": 3146.0,
+            "96": 3256.0,
+            "97": 3086.0,
+            "98": 3563.0,
+            "99": 3247.0,
+            "100": 3331.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 888098304.0,
+            "2": 888098304.0,
+            "3": 888098304.0,
+            "4": 888098304.0,
+            "5": 888098304.0,
+            "6": 888098304.0,
+            "7": 888098304.0,
+            "8": 888098304.0,
+            "9": 888098304.0,
+            "10": 888098304.0,
+            "11": 888098304.0,
+            "12": 888098304.0,
+            "13": 888098304.0,
+            "14": 888098304.0,
+            "15": 888098304.0,
+            "16": 888098304.0,
+            "17": 888098304.0,
+            "18": 888098304.0,
+            "19": 888098304.0,
+            "20": 888098304.0,
+            "21": 888098304.0,
+            "22": 888098304.0,
+            "23": 888098304.0,
+            "24": 888098304.0,
+            "25": 888098304.0,
+            "26": 888098304.0,
+            "27": 888098304.0,
+            "28": 888098304.0,
+            "29": 888098304.0,
+            "30": 888098304.0,
+            "31": 888098304.0,
+            "32": 888098304.0,
+            "33": 888098304.0,
+            "34": 888098304.0,
+            "35": 888098304.0,
+            "36": 888098304.0,
+            "37": 888098304.0,
+            "38": 888098304.0,
+            "39": 888098304.0,
+            "40": 888098304.0,
+            "41": 888098304.0,
+            "42": 888098304.0,
+            "43": 888098304.0,
+            "44": 888098304.0,
+            "45": 888098304.0,
+            "46": 888098304.0,
+            "47": 888098304.0,
+            "48": 888098304.0,
+            "49": 888098304.0,
+            "50": 888098304.0,
+            "51": 888098304.0,
+            "52": 888098304.0,
+            "53": 888098304.0,
+            "54": 888098304.0,
+            "55": 888098304.0,
+            "56": 888098304.0,
+            "57": 888098304.0,
+            "58": 888098304.0,
+            "59": 888098304.0,
+            "60": 888098304.0,
+            "61": 888098304.0,
+            "62": 888098304.0,
+            "63": 888098304.0,
+            "64": 888098304.0,
+            "65": 888098304.0,
+            "66": 888098304.0,
+            "67": 888098304.0,
+            "68": 888098304.0,
+            "69": 888098304.0,
+            "70": 888098304.0,
+            "71": 888098304.0,
+            "72": 888098304.0,
+            "73": 888098304.0,
+            "74": 888098304.0,
+            "75": 888098304.0,
+            "76": 888098304.0,
+            "77": 888098304.0,
+            "78": 888098304.0,
+            "79": 888098304.0,
+            "80": 888098304.0,
+            "81": 888098304.0,
+            "82": 888098304.0,
+            "83": 888098304.0,
+            "84": 888098304.0,
+            "85": 888098304.0,
+            "86": 888098304.0,
+            "87": 888098304.0,
+            "88": 888098304.0,
+            "89": 888098304.0,
+            "90": 888098304.0,
+            "91": 888098304.0,
+            "92": 888098304.0,
+            "93": 888098304.0,
+            "94": 888098304.0,
+            "95": 888098304.0,
+            "96": 888098304.0,
+            "97": 888098304.0,
+            "98": 888098304.0,
+            "99": 888098304.0,
+            "100": 888098304.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 3216302592.0,
+            "2": 3575768576.0,
+            "3": 3575768576.0,
+            "4": 3575768576.0,
+            "5": 3575768576.0,
+            "6": 3575768576.0,
+            "7": 3575768576.0,
+            "8": 3575768576.0,
+            "9": 3575768576.0,
+            "10": 3575768576.0,
+            "11": 3575768576.0,
+            "12": 3575768576.0,
+            "13": 3575768576.0,
+            "14": 3575768576.0,
+            "15": 3575768576.0,
+            "16": 3575768576.0,
+            "17": 3575768576.0,
+            "18": 3575768576.0,
+            "19": 3575768576.0,
+            "20": 3575768576.0,
+            "21": 3575768576.0,
+            "22": 3575768576.0,
+            "23": 3575768576.0,
+            "24": 3575768576.0,
+            "25": 3575768576.0,
+            "26": 3575768576.0,
+            "27": 3575768576.0,
+            "28": 3575768576.0,
+            "29": 3575768576.0,
+            "30": 3575768576.0,
+            "31": 3575768576.0,
+            "32": 3575768576.0,
+            "33": 3575768576.0,
+            "34": 3575768576.0,
+            "35": 3575768576.0,
+            "36": 3575768576.0,
+            "37": 3575768576.0,
+            "38": 3575768576.0,
+            "39": 3575768576.0,
+            "40": 3575768576.0,
+            "41": 3575768576.0,
+            "42": 3575768576.0,
+            "43": 3575768576.0,
+            "44": 3575768576.0,
+            "45": 3575768576.0,
+            "46": 3575768576.0,
+            "47": 3575768576.0,
+            "48": 3575768576.0,
+            "49": 3575768576.0,
+            "50": 3575768576.0,
+            "51": 3575768576.0,
+            "52": 3575768576.0,
+            "53": 3575768576.0,
+            "54": 3575768576.0,
+            "55": 3575768576.0,
+            "56": 3575768576.0,
+            "57": 3575768576.0,
+            "58": 3575768576.0,
+            "59": 3575768576.0,
+            "60": 3575768576.0,
+            "61": 3575768576.0,
+            "62": 3575768576.0,
+            "63": 3575768576.0,
+            "64": 3575768576.0,
+            "65": 3575768576.0,
+            "66": 3575768576.0,
+            "67": 3575768576.0,
+            "68": 3575768576.0,
+            "69": 3575768576.0,
+            "70": 3575768576.0,
+            "71": 3575768576.0,
+            "72": 3575768576.0,
+            "73": 3575768576.0,
+            "74": 3575768576.0,
+            "75": 3575768576.0,
+            "76": 3575768576.0,
+            "77": 3575768576.0,
+            "78": 3575768576.0,
+            "79": 3575768576.0,
+            "80": 3575768576.0,
+            "81": 3575768576.0,
+            "82": 3575768576.0,
+            "83": 3575768576.0,
+            "84": 3575768576.0,
+            "85": 3575768576.0,
+            "86": 3575768576.0,
+            "87": 3575768576.0,
+            "88": 3575768576.0,
+            "89": 3575768576.0,
+            "90": 3575768576.0,
+            "91": 3575768576.0,
+            "92": 3575768576.0,
+            "93": 3575768576.0,
+            "94": 3575768576.0,
+            "95": 3575768576.0,
+            "96": 3575768576.0,
+            "97": 3575768576.0,
+            "98": 3575768576.0,
+            "99": 3575768576.0,
+            "100": 3575768576.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.66914,
+            "2": 0.21684,
+            "3": 0.17892,
+            "4": 0.17346,
+            "5": 0.17105,
+            "6": 0.17127,
+            "7": 0.17098,
+            "8": 0.17217,
+            "9": 0.17182,
+            "10": 0.17103,
+            "11": 0.17137,
+            "12": 0.17055,
+            "13": 0.17065,
+            "14": 0.17142,
+            "15": 0.17038,
+            "16": 0.16903,
+            "17": 0.16848,
+            "18": 0.16975,
+            "19": 0.16977,
+            "20": 0.17019,
+            "21": 0.16985,
+            "22": 0.16955,
+            "23": 0.16804,
+            "24": 0.16891,
+            "25": 0.16902,
+            "26": 0.16957,
+            "27": 0.16863,
+            "28": 0.16926,
+            "29": 0.16921,
+            "30": 0.168,
+            "31": 0.16922,
+            "32": 0.16856,
+            "33": 0.17245,
+            "34": 0.16964,
+            "35": 0.16929,
+            "36": 0.16825,
+            "37": 0.16872,
+            "38": 0.16843,
+            "39": 0.16954,
+            "40": 0.16969,
+            "41": 0.16937,
+            "42": 0.1686,
+            "43": 0.34614,
+            "44": 0.16943,
+            "45": 0.16912,
+            "46": 0.16957,
+            "47": 0.16789,
+            "48": 0.16768,
+            "49": 0.16897,
+            "50": 0.16779,
+            "51": 0.3373,
+            "52": 0.17048,
+            "53": 0.16638,
+            "54": 0.16813,
+            "55": 0.16767,
+            "56": 0.16807,
+            "57": 0.16799,
+            "58": 0.16657,
+            "59": 0.16804,
+            "60": 0.16874,
+            "61": 0.1679,
+            "62": 0.16609,
+            "63": 0.16577,
+            "64": 0.16659,
+            "65": 0.16778,
+            "66": 0.16673,
+            "67": 0.16832,
+            "68": 0.16874,
+            "69": 0.16895,
+            "70": 0.16685,
+            "71": 0.16724,
+            "72": 0.1677,
+            "73": 0.16716,
+            "74": 0.16899,
+            "75": 0.1687,
+            "76": 0.16719,
+            "77": 0.16812,
+            "78": 0.1671,
+            "79": 0.1671,
+            "80": 0.16726,
+            "81": 0.16712,
+            "82": 0.16866,
+            "83": 0.16717,
+            "84": 0.16749,
+            "85": 0.16759,
+            "86": 0.16853,
+            "87": 0.16786,
+            "88": 0.16717,
+            "89": 0.16661,
+            "90": 0.16719,
+            "91": 0.17397,
+            "92": 0.17387,
+            "93": 0.17474,
+            "94": 0.17341,
+            "95": 0.17473,
+            "96": 0.17386,
+            "97": 0.17453,
+            "98": 0.17503,
+            "99": 0.17293,
+            "100": 0.17243
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
index 17ee04cf0ae..63425028dd5 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
@@ -2,141 +2,536 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 10.85599,
+            "2": 10.8648,
+            "3": 10.87042,
+            "4": 10.85288,
             "5": 10.88397,
+            "6": 10.89184,
+            "7": 10.86732,
+            "8": 10.87057,
+            "9": 10.87432,
             "10": 10.84185,
+            "11": 10.87989,
+            "12": 10.87417,
+            "13": 10.87884,
+            "14": 10.89184,
             "15": 10.82659,
+            "16": 10.83027,
+            "17": 10.80933,
+            "18": 10.81431,
+            "19": 10.8167,
             "20": 10.72165,
+            "21": 10.70557,
+            "22": 10.56881,
+            "23": 10.72025,
+            "24": 10.61194,
             "25": 10.55765,
+            "26": 10.61149,
+            "27": 10.62635,
+            "28": 10.57155,
+            "29": 10.58212,
             "30": 10.36267,
+            "31": 10.11682,
+            "32": 10.4682,
+            "33": 10.45411,
+            "34": 10.21121,
             "35": 10.27207,
+            "36": 10.22246,
+            "37": 10.34079,
+            "38": 10.18964,
+            "39": 10.40228,
             "40": 10.08758,
+            "41": 10.13714,
+            "42": 10.21175,
+            "43": 9.82878,
+            "44": 9.96255,
             "45": 9.82846,
+            "46": 9.80952,
+            "47": 10.13734,
+            "48": 9.84349,
+            "49": 9.52888,
             "50": 9.91046,
+            "51": 9.85075,
+            "52": 9.73181,
+            "53": 10.06388,
+            "54": 9.95432,
             "55": 9.87204,
+            "56": 9.61823,
+            "57": 9.47467,
+            "58": 9.82802,
+            "59": 9.57962,
             "60": 9.49074,
+            "61": 9.68473,
+            "62": 9.99245,
+            "63": 9.38364,
+            "64": 9.77766,
             "65": 8.94008,
+            "66": 9.70099,
+            "67": 9.3605,
+            "68": 9.77766,
+            "69": 9.78865,
             "70": 9.73813,
+            "71": 9.61811,
+            "72": 9.58068,
+            "73": 9.4964,
+            "74": 8.93812,
             "75": 9.42081,
+            "76": 9.07416,
+            "77": 10.06077,
+            "78": 9.71952,
+            "79": 9.37088,
             "80": 9.39874,
+            "81": 9.47802,
+            "82": 9.69299,
+            "83": 9.30276,
+            "84": 9.41548,
             "85": 9.60883,
+            "86": 9.07461,
+            "87": 9.58826,
+            "88": 9.74392,
+            "89": 9.5951,
             "90": 9.81217,
+            "91": 9.33796,
+            "92": 9.3534,
+            "93": 9.07315,
+            "94": 8.83127,
             "95": 9.51524,
+            "96": 9.52183,
+            "97": 9.31012,
+            "98": 9.66532,
+            "99": 8.88179,
             "100": 9.39375
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 1640.0,
+            "2": 1738.0,
+            "3": 1638.0,
+            "4": 1810.0,
             "5": 1755.0,
+            "6": 1681.0,
+            "7": 1781.0,
+            "8": 1502.0,
+            "9": 1817.0,
             "10": 1394.0,
+            "11": 1927.0,
+            "12": 1691.0,
+            "13": 1901.0,
+            "14": 1631.0,
             "15": 1765.0,
+            "16": 1864.0,
+            "17": 1704.0,
+            "18": 1771.0,
+            "19": 1817.0,
             "20": 1831.0,
+            "21": 1813.0,
+            "22": 1673.0,
+            "23": 2005.0,
+            "24": 1553.0,
             "25": 1577.0,
+            "26": 1656.0,
+            "27": 1734.0,
+            "28": 1896.0,
+            "29": 2051.0,
             "30": 1897.0,
+            "31": 1452.0,
+            "32": 1785.0,
+            "33": 2061.0,
+            "34": 1857.0,
             "35": 1920.0,
+            "36": 1990.0,
+            "37": 2191.0,
+            "38": 2142.0,
+            "39": 2215.0,
             "40": 2166.0,
+            "41": 2154.0,
+            "42": 2148.0,
+            "43": 1881.0,
+            "44": 2066.0,
             "45": 1952.0,
+            "46": 2217.0,
+            "47": 2513.0,
+            "48": 2356.0,
+            "49": 2294.0,
             "50": 2140.0,
+            "51": 2509.0,
+            "52": 2528.0,
+            "53": 2851.0,
+            "54": 2747.0,
             "55": 2333.0,
+            "56": 2724.0,
+            "57": 2315.0,
+            "58": 2754.0,
+            "59": 2774.0,
             "60": 2336.0,
+            "61": 2912.0,
+            "62": 2415.0,
+            "63": 2341.0,
+            "64": 2837.0,
             "65": 2661.0,
+            "66": 3000.0,
+            "67": 2779.0,
+            "68": 2691.0,
+            "69": 2793.0,
             "70": 3183.0,
+            "71": 2962.0,
+            "72": 2393.0,
+            "73": 2997.0,
+            "74": 1935.0,
             "75": 2463.0,
+            "76": 3065.0,
+            "77": 3184.0,
+            "78": 3154.0,
+            "79": 3127.0,
             "80": 3286.0,
+            "81": 3386.0,
+            "82": 3128.0,
+            "83": 2608.0,
+            "84": 3079.0,
             "85": 3260.0,
+            "86": 2687.0,
+            "87": 3591.0,
+            "88": 3035.0,
+            "89": 3165.0,
             "90": 3166.0,
+            "91": 2690.0,
+            "92": 2897.0,
+            "93": 2630.0,
+            "94": 3348.0,
             "95": 3349.0,
+            "96": 3288.0,
+            "97": 3055.0,
+            "98": 3516.0,
+            "99": 3035.0,
             "100": 3109.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 746194432.0,
+            "2": 746194432.0,
+            "3": 746194432.0,
+            "4": 746194432.0,
             "5": 746194432.0,
+            "6": 746194432.0,
+            "7": 746194432.0,
+            "8": 746194432.0,
+            "9": 746194432.0,
             "10": 746194432.0,
+            "11": 746194432.0,
+            "12": 746194432.0,
+            "13": 746194432.0,
+            "14": 746194432.0,
             "15": 746194432.0,
+            "16": 746194432.0,
+            "17": 746194432.0,
+            "18": 746194432.0,
+            "19": 746194432.0,
             "20": 746194432.0,
+            "21": 746194432.0,
+            "22": 746194432.0,
+            "23": 746194432.0,
+            "24": 746194432.0,
             "25": 746194432.0,
+            "26": 746194432.0,
+            "27": 746194432.0,
+            "28": 746194432.0,
+            "29": 746194432.0,
             "30": 746194432.0,
+            "31": 746194432.0,
+            "32": 746194432.0,
+            "33": 746194432.0,
+            "34": 746194432.0,
             "35": 746194432.0,
+            "36": 746194432.0,
+            "37": 746194432.0,
+            "38": 746194432.0,
+            "39": 746194432.0,
             "40": 746194432.0,
+            "41": 746194432.0,
+            "42": 746194432.0,
+            "43": 746194432.0,
+            "44": 746194432.0,
             "45": 746194432.0,
+            "46": 746194432.0,
+            "47": 746194432.0,
+            "48": 746194432.0,
+            "49": 746194432.0,
             "50": 746194432.0,
+            "51": 746194432.0,
+            "52": 746194432.0,
+            "53": 746194432.0,
+            "54": 746194432.0,
             "55": 746194432.0,
+            "56": 746194432.0,
+            "57": 746194432.0,
+            "58": 746194432.0,
+            "59": 746194432.0,
             "60": 746194432.0,
+            "61": 746194432.0,
+            "62": 746194432.0,
+            "63": 746194432.0,
+            "64": 746194432.0,
             "65": 746194432.0,
+            "66": 746194432.0,
+            "67": 746194432.0,
+            "68": 746194432.0,
+            "69": 746194432.0,
             "70": 746194432.0,
+            "71": 746194432.0,
+            "72": 746194432.0,
+            "73": 746194432.0,
+            "74": 746194432.0,
             "75": 746194432.0,
+            "76": 746194432.0,
+            "77": 746194432.0,
+            "78": 746194432.0,
+            "79": 746194432.0,
             "80": 746194432.0,
+            "81": 746194432.0,
+            "82": 746194432.0,
+            "83": 746194432.0,
+            "84": 746194432.0,
             "85": 746194432.0,
+            "86": 746194432.0,
+            "87": 746194432.0,
+            "88": 746194432.0,
+            "89": 746194432.0,
             "90": 746194432.0,
+            "91": 746194432.0,
+            "92": 746194432.0,
+            "93": 746194432.0,
+            "94": 746194432.0,
             "95": 746194432.0,
+            "96": 746194432.0,
+            "97": 746194432.0,
+            "98": 746194432.0,
+            "99": 746194432.0,
             "100": 746194432.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 1926153216.0,
+            "2": 2209851392.0,
+            "3": 2209851392.0,
+            "4": 2209851392.0,
             "5": 2209851392.0,
+            "6": 2209851392.0,
+            "7": 2209851392.0,
+            "8": 2209851392.0,
+            "9": 2209851392.0,
             "10": 2209851392.0,
+            "11": 2209851392.0,
+            "12": 2209851392.0,
+            "13": 2209851392.0,
+            "14": 2209851392.0,
             "15": 2209851392.0,
+            "16": 2209851392.0,
+            "17": 2209851392.0,
+            "18": 2209851392.0,
+            "19": 2209851392.0,
             "20": 2209851392.0,
+            "21": 2209851392.0,
+            "22": 2209851392.0,
+            "23": 2209851392.0,
+            "24": 2209851392.0,
             "25": 2209851392.0,
+            "26": 2209851392.0,
+            "27": 2209851392.0,
+            "28": 2209851392.0,
+            "29": 2209851392.0,
             "30": 2209851392.0,
+            "31": 2209851392.0,
+            "32": 2209851392.0,
+            "33": 2209851392.0,
+            "34": 2209851392.0,
             "35": 2209851392.0,
+            "36": 2209851392.0,
+            "37": 2209851392.0,
+            "38": 2209851392.0,
+            "39": 2209851392.0,
             "40": 2209851392.0,
+            "41": 2209851392.0,
+            "42": 2209851392.0,
+            "43": 2209851392.0,
+            "44": 2209851392.0,
             "45": 2209851392.0,
+            "46": 2209851392.0,
+            "47": 2209851392.0,
+            "48": 2209851392.0,
+            "49": 2209851392.0,
             "50": 2209851392.0,
+            "51": 2209851392.0,
+            "52": 2209851392.0,
+            "53": 2209851392.0,
+            "54": 2209851392.0,
             "55": 2209851392.0,
+            "56": 2209851392.0,
+            "57": 2209851392.0,
+            "58": 2209851392.0,
+            "59": 2209851392.0,
             "60": 2209851392.0,
+            "61": 2209851392.0,
+            "62": 2209851392.0,
+            "63": 2209851392.0,
+            "64": 2209851392.0,
             "65": 2209851392.0,
+            "66": 2209851392.0,
+            "67": 2209851392.0,
+            "68": 2209851392.0,
+            "69": 2209851392.0,
             "70": 2209851392.0,
+            "71": 2209851392.0,
+            "72": 2209851392.0,
+            "73": 2209851392.0,
+            "74": 2209851392.0,
             "75": 2209851392.0,
+            "76": 2209851392.0,
+            "77": 2209851392.0,
+            "78": 2209851392.0,
+            "79": 2209851392.0,
             "80": 2209851392.0,
+            "81": 2209851392.0,
+            "82": 2209851392.0,
+            "83": 2209851392.0,
+            "84": 2209851392.0,
             "85": 2209851392.0,
+            "86": 2209851392.0,
+            "87": 2209851392.0,
+            "88": 2209851392.0,
+            "89": 2209851392.0,
             "90": 2209851392.0,
+            "91": 2209851392.0,
+            "92": 2209851392.0,
+            "93": 2209851392.0,
+            "94": 2209851392.0,
             "95": 2209851392.0,
+            "96": 2209851392.0,
+            "97": 2209851392.0,
+            "98": 2209851392.0,
+            "99": 2209851392.0,
             "100": 2209851392.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 10.15333,
-            "5": 0.09518,
-            "10": 0.09562,
-            "15": 0.09503,
-            "20": 0.09503,
-            "25": 0.09461,
-            "30": 0.09547,
-            "35": 0.09528,
-            "40": 0.0967,
-            "45": 0.09344,
-            "50": 0.09511,
-            "55": 0.09515,
-            "60": 0.09496,
-            "65": 0.09478,
-            "70": 0.09504,
-            "75": 0.09415,
-            "80": 0.09367,
-            "85": 0.09449,
-            "90": 0.09786,
-            "95": 0.09592,
-            "100": 0.09477
+            "1": 12.78916,
+            "2": 0.129,
+            "3": 0.1167,
+            "4": 0.11497,
+            "5": 0.10818,
+            "6": 0.10473,
+            "7": 0.10532,
+            "8": 0.10616,
+            "9": 0.10723,
+            "10": 0.10865,
+            "11": 0.10729,
+            "12": 0.10632,
+            "13": 0.10608,
+            "14": 0.1066,
+            "15": 0.10589,
+            "16": 0.10567,
+            "17": 0.10574,
+            "18": 0.10663,
+            "19": 0.10656,
+            "20": 0.10767,
+            "21": 0.10522,
+            "22": 0.10601,
+            "23": 0.10475,
+            "24": 0.10392,
+            "25": 0.10556,
+            "26": 0.10438,
+            "27": 0.10635,
+            "28": 0.10742,
+            "29": 0.10795,
+            "30": 0.10745,
+            "31": 0.10836,
+            "32": 0.10639,
+            "33": 0.10597,
+            "34": 0.1064,
+            "35": 0.10496,
+            "36": 0.10549,
+            "37": 0.10538,
+            "38": 0.107,
+            "39": 0.10567,
+            "40": 0.10655,
+            "41": 0.10552,
+            "42": 0.10527,
+            "43": 0.10546,
+            "44": 0.10643,
+            "45": 0.10624,
+            "46": 0.10787,
+            "47": 0.1068,
+            "48": 0.1075,
+            "49": 0.10525,
+            "50": 0.10727,
+            "51": 0.126,
+            "52": 0.1146,
+            "53": 0.11042,
+            "54": 0.12389,
+            "55": 0.10643,
+            "56": 0.10676,
+            "57": 0.10677,
+            "58": 0.10573,
+            "59": 0.10709,
+            "60": 0.10515,
+            "61": 0.10668,
+            "62": 0.10599,
+            "63": 0.10616,
+            "64": 0.10462,
+            "65": 0.10742,
+            "66": 0.10693,
+            "67": 0.10628,
+            "68": 0.10748,
+            "69": 0.10707,
+            "70": 0.10621,
+            "71": 0.105,
+            "72": 0.10801,
+            "73": 0.10662,
+            "74": 0.10641,
+            "75": 0.10562,
+            "76": 0.10643,
+            "77": 0.10629,
+            "78": 0.10538,
+            "79": 0.1047,
+            "80": 0.10541,
+            "81": 0.10526,
+            "82": 0.10753,
+            "83": 0.10562,
+            "84": 0.10631,
+            "85": 0.10586,
+            "86": 0.10685,
+            "87": 0.1065,
+            "88": 0.10696,
+            "89": 0.10619,
+            "90": 0.10588,
+            "91": 0.10452,
+            "92": 0.10667,
+            "93": 0.10546,
+            "94": 0.1036,
+            "95": 0.10483,
+            "96": 0.10512,
+            "97": 0.10433,
+            "98": 0.10471,
+            "99": 0.10514,
+            "100": 0.10516
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..f5a45f2f146
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.85599,
+            "2": 10.8648,
+            "3": 10.87042,
+            "4": 10.85288,
+            "5": 10.88397,
+            "6": 10.89184,
+            "7": 10.86732,
+            "8": 10.87057,
+            "9": 10.87432,
+            "10": 10.84185,
+            "11": 10.87989,
+            "12": 10.87417,
+            "13": 10.87884,
+            "14": 10.89184,
+            "15": 10.82659,
+            "16": 10.83027,
+            "17": 10.80933,
+            "18": 10.81431,
+            "19": 10.8167,
+            "20": 10.72165,
+            "21": 10.70557,
+            "22": 10.56881,
+            "23": 10.72025,
+            "24": 10.61194,
+            "25": 10.55765,
+            "26": 10.61149,
+            "27": 10.62635,
+            "28": 10.57155,
+            "29": 10.58212,
+            "30": 10.36267,
+            "31": 10.11682,
+            "32": 10.4682,
+            "33": 10.45411,
+            "34": 10.21121,
+            "35": 10.27207,
+            "36": 10.22246,
+            "37": 10.34079,
+            "38": 10.18964,
+            "39": 10.40228,
+            "40": 10.08758,
+            "41": 10.13714,
+            "42": 10.21175,
+            "43": 9.82878,
+            "44": 9.96255,
+            "45": 9.82846,
+            "46": 9.80952,
+            "47": 10.13734,
+            "48": 9.84349,
+            "49": 9.52888,
+            "50": 9.91046,
+            "51": 9.85075,
+            "52": 9.73181,
+            "53": 10.06388,
+            "54": 9.95432,
+            "55": 9.87204,
+            "56": 9.61823,
+            "57": 9.47467,
+            "58": 9.82802,
+            "59": 9.57962,
+            "60": 9.49074,
+            "61": 9.68473,
+            "62": 9.99245,
+            "63": 9.38364,
+            "64": 9.77766,
+            "65": 8.94008,
+            "66": 9.70099,
+            "67": 9.3605,
+            "68": 9.77766,
+            "69": 9.78865,
+            "70": 9.73813,
+            "71": 9.61811,
+            "72": 9.58068,
+            "73": 9.4964,
+            "74": 8.93812,
+            "75": 9.42081,
+            "76": 9.07416,
+            "77": 10.06077,
+            "78": 9.71952,
+            "79": 9.37088,
+            "80": 9.39874,
+            "81": 9.47802,
+            "82": 9.69299,
+            "83": 9.30276,
+            "84": 9.41548,
+            "85": 9.60883,
+            "86": 9.07461,
+            "87": 9.58826,
+            "88": 9.74392,
+            "89": 9.5951,
+            "90": 9.81217,
+            "91": 9.33796,
+            "92": 9.3534,
+            "93": 9.07315,
+            "94": 8.83127,
+            "95": 9.51524,
+            "96": 9.52183,
+            "97": 9.31012,
+            "98": 9.66532,
+            "99": 8.88179,
+            "100": 9.39375
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1640.0,
+            "2": 1738.0,
+            "3": 1638.0,
+            "4": 1810.0,
+            "5": 1755.0,
+            "6": 1681.0,
+            "7": 1781.0,
+            "8": 1502.0,
+            "9": 1817.0,
+            "10": 1394.0,
+            "11": 1927.0,
+            "12": 1691.0,
+            "13": 1901.0,
+            "14": 1631.0,
+            "15": 1765.0,
+            "16": 1864.0,
+            "17": 1704.0,
+            "18": 1771.0,
+            "19": 1817.0,
+            "20": 1831.0,
+            "21": 1813.0,
+            "22": 1673.0,
+            "23": 2005.0,
+            "24": 1553.0,
+            "25": 1577.0,
+            "26": 1656.0,
+            "27": 1734.0,
+            "28": 1896.0,
+            "29": 2051.0,
+            "30": 1897.0,
+            "31": 1452.0,
+            "32": 1785.0,
+            "33": 2061.0,
+            "34": 1857.0,
+            "35": 1920.0,
+            "36": 1990.0,
+            "37": 2191.0,
+            "38": 2142.0,
+            "39": 2215.0,
+            "40": 2166.0,
+            "41": 2154.0,
+            "42": 2148.0,
+            "43": 1881.0,
+            "44": 2066.0,
+            "45": 1952.0,
+            "46": 2217.0,
+            "47": 2513.0,
+            "48": 2356.0,
+            "49": 2294.0,
+            "50": 2140.0,
+            "51": 2509.0,
+            "52": 2528.0,
+            "53": 2851.0,
+            "54": 2747.0,
+            "55": 2333.0,
+            "56": 2724.0,
+            "57": 2315.0,
+            "58": 2754.0,
+            "59": 2774.0,
+            "60": 2336.0,
+            "61": 2912.0,
+            "62": 2415.0,
+            "63": 2341.0,
+            "64": 2837.0,
+            "65": 2661.0,
+            "66": 3000.0,
+            "67": 2779.0,
+            "68": 2691.0,
+            "69": 2793.0,
+            "70": 3183.0,
+            "71": 2962.0,
+            "72": 2393.0,
+            "73": 2997.0,
+            "74": 1935.0,
+            "75": 2463.0,
+            "76": 3065.0,
+            "77": 3184.0,
+            "78": 3154.0,
+            "79": 3127.0,
+            "80": 3286.0,
+            "81": 3386.0,
+            "82": 3128.0,
+            "83": 2608.0,
+            "84": 3079.0,
+            "85": 3260.0,
+            "86": 2687.0,
+            "87": 3591.0,
+            "88": 3035.0,
+            "89": 3165.0,
+            "90": 3166.0,
+            "91": 2690.0,
+            "92": 2897.0,
+            "93": 2630.0,
+            "94": 3348.0,
+            "95": 3349.0,
+            "96": 3288.0,
+            "97": 3055.0,
+            "98": 3516.0,
+            "99": 3035.0,
+            "100": 3109.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 746194432.0,
+            "2": 746194432.0,
+            "3": 746194432.0,
+            "4": 746194432.0,
+            "5": 746194432.0,
+            "6": 746194432.0,
+            "7": 746194432.0,
+            "8": 746194432.0,
+            "9": 746194432.0,
+            "10": 746194432.0,
+            "11": 746194432.0,
+            "12": 746194432.0,
+            "13": 746194432.0,
+            "14": 746194432.0,
+            "15": 746194432.0,
+            "16": 746194432.0,
+            "17": 746194432.0,
+            "18": 746194432.0,
+            "19": 746194432.0,
+            "20": 746194432.0,
+            "21": 746194432.0,
+            "22": 746194432.0,
+            "23": 746194432.0,
+            "24": 746194432.0,
+            "25": 746194432.0,
+            "26": 746194432.0,
+            "27": 746194432.0,
+            "28": 746194432.0,
+            "29": 746194432.0,
+            "30": 746194432.0,
+            "31": 746194432.0,
+            "32": 746194432.0,
+            "33": 746194432.0,
+            "34": 746194432.0,
+            "35": 746194432.0,
+            "36": 746194432.0,
+            "37": 746194432.0,
+            "38": 746194432.0,
+            "39": 746194432.0,
+            "40": 746194432.0,
+            "41": 746194432.0,
+            "42": 746194432.0,
+            "43": 746194432.0,
+            "44": 746194432.0,
+            "45": 746194432.0,
+            "46": 746194432.0,
+            "47": 746194432.0,
+            "48": 746194432.0,
+            "49": 746194432.0,
+            "50": 746194432.0,
+            "51": 746194432.0,
+            "52": 746194432.0,
+            "53": 746194432.0,
+            "54": 746194432.0,
+            "55": 746194432.0,
+            "56": 746194432.0,
+            "57": 746194432.0,
+            "58": 746194432.0,
+            "59": 746194432.0,
+            "60": 746194432.0,
+            "61": 746194432.0,
+            "62": 746194432.0,
+            "63": 746194432.0,
+            "64": 746194432.0,
+            "65": 746194432.0,
+            "66": 746194432.0,
+            "67": 746194432.0,
+            "68": 746194432.0,
+            "69": 746194432.0,
+            "70": 746194432.0,
+            "71": 746194432.0,
+            "72": 746194432.0,
+            "73": 746194432.0,
+            "74": 746194432.0,
+            "75": 746194432.0,
+            "76": 746194432.0,
+            "77": 746194432.0,
+            "78": 746194432.0,
+            "79": 746194432.0,
+            "80": 746194432.0,
+            "81": 746194432.0,
+            "82": 746194432.0,
+            "83": 746194432.0,
+            "84": 746194432.0,
+            "85": 746194432.0,
+            "86": 746194432.0,
+            "87": 746194432.0,
+            "88": 746194432.0,
+            "89": 746194432.0,
+            "90": 746194432.0,
+            "91": 746194432.0,
+            "92": 746194432.0,
+            "93": 746194432.0,
+            "94": 746194432.0,
+            "95": 746194432.0,
+            "96": 746194432.0,
+            "97": 746194432.0,
+            "98": 746194432.0,
+            "99": 746194432.0,
+            "100": 746194432.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1926153216.0,
+            "2": 2209851392.0,
+            "3": 2209851392.0,
+            "4": 2209851392.0,
+            "5": 2209851392.0,
+            "6": 2209851392.0,
+            "7": 2209851392.0,
+            "8": 2209851392.0,
+            "9": 2209851392.0,
+            "10": 2209851392.0,
+            "11": 2209851392.0,
+            "12": 2209851392.0,
+            "13": 2209851392.0,
+            "14": 2209851392.0,
+            "15": 2209851392.0,
+            "16": 2209851392.0,
+            "17": 2209851392.0,
+            "18": 2209851392.0,
+            "19": 2209851392.0,
+            "20": 2209851392.0,
+            "21": 2209851392.0,
+            "22": 2209851392.0,
+            "23": 2209851392.0,
+            "24": 2209851392.0,
+            "25": 2209851392.0,
+            "26": 2209851392.0,
+            "27": 2209851392.0,
+            "28": 2209851392.0,
+            "29": 2209851392.0,
+            "30": 2209851392.0,
+            "31": 2209851392.0,
+            "32": 2209851392.0,
+            "33": 2209851392.0,
+            "34": 2209851392.0,
+            "35": 2209851392.0,
+            "36": 2209851392.0,
+            "37": 2209851392.0,
+            "38": 2209851392.0,
+            "39": 2209851392.0,
+            "40": 2209851392.0,
+            "41": 2209851392.0,
+            "42": 2209851392.0,
+            "43": 2209851392.0,
+            "44": 2209851392.0,
+            "45": 2209851392.0,
+            "46": 2209851392.0,
+            "47": 2209851392.0,
+            "48": 2209851392.0,
+            "49": 2209851392.0,
+            "50": 2209851392.0,
+            "51": 2209851392.0,
+            "52": 2209851392.0,
+            "53": 2209851392.0,
+            "54": 2209851392.0,
+            "55": 2209851392.0,
+            "56": 2209851392.0,
+            "57": 2209851392.0,
+            "58": 2209851392.0,
+            "59": 2209851392.0,
+            "60": 2209851392.0,
+            "61": 2209851392.0,
+            "62": 2209851392.0,
+            "63": 2209851392.0,
+            "64": 2209851392.0,
+            "65": 2209851392.0,
+            "66": 2209851392.0,
+            "67": 2209851392.0,
+            "68": 2209851392.0,
+            "69": 2209851392.0,
+            "70": 2209851392.0,
+            "71": 2209851392.0,
+            "72": 2209851392.0,
+            "73": 2209851392.0,
+            "74": 2209851392.0,
+            "75": 2209851392.0,
+            "76": 2209851392.0,
+            "77": 2209851392.0,
+            "78": 2209851392.0,
+            "79": 2209851392.0,
+            "80": 2209851392.0,
+            "81": 2209851392.0,
+            "82": 2209851392.0,
+            "83": 2209851392.0,
+            "84": 2209851392.0,
+            "85": 2209851392.0,
+            "86": 2209851392.0,
+            "87": 2209851392.0,
+            "88": 2209851392.0,
+            "89": 2209851392.0,
+            "90": 2209851392.0,
+            "91": 2209851392.0,
+            "92": 2209851392.0,
+            "93": 2209851392.0,
+            "94": 2209851392.0,
+            "95": 2209851392.0,
+            "96": 2209851392.0,
+            "97": 2209851392.0,
+            "98": 2209851392.0,
+            "99": 2209851392.0,
+            "100": 2209851392.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 11.78981,
+            "2": 0.14641,
+            "3": 0.09823,
+            "4": 0.09626,
+            "5": 0.09543,
+            "6": 0.09563,
+            "7": 0.09569,
+            "8": 0.0947,
+            "9": 0.09571,
+            "10": 0.09565,
+            "11": 0.09526,
+            "12": 0.09451,
+            "13": 0.09577,
+            "14": 0.09578,
+            "15": 0.0954,
+            "16": 0.09495,
+            "17": 0.09576,
+            "18": 0.09506,
+            "19": 0.09526,
+            "20": 0.09508,
+            "21": 0.09525,
+            "22": 0.09601,
+            "23": 0.09712,
+            "24": 0.09956,
+            "25": 0.09858,
+            "26": 0.09859,
+            "27": 0.097,
+            "28": 0.0963,
+            "29": 0.09742,
+            "30": 0.09459,
+            "31": 0.09583,
+            "32": 0.09745,
+            "33": 0.09523,
+            "34": 0.09486,
+            "35": 0.09594,
+            "36": 0.09571,
+            "37": 0.09608,
+            "38": 0.09689,
+            "39": 0.09574,
+            "40": 0.09565,
+            "41": 0.0958,
+            "42": 0.09573,
+            "43": 0.0958,
+            "44": 0.09524,
+            "45": 0.09519,
+            "46": 0.0952,
+            "47": 0.09476,
+            "48": 0.09432,
+            "49": 0.09445,
+            "50": 0.09411,
+            "51": 0.11832,
+            "52": 0.10335,
+            "53": 0.10105,
+            "54": 0.11751,
+            "55": 0.09996,
+            "56": 0.09926,
+            "57": 0.1014,
+            "58": 0.10002,
+            "59": 0.10069,
+            "60": 0.09932,
+            "61": 0.09999,
+            "62": 0.10028,
+            "63": 0.09961,
+            "64": 0.09886,
+            "65": 0.10127,
+            "66": 0.09994,
+            "67": 0.09975,
+            "68": 0.10037,
+            "69": 0.09896,
+            "70": 0.09847,
+            "71": 0.09907,
+            "72": 0.09929,
+            "73": 0.09893,
+            "74": 0.09893,
+            "75": 0.09961,
+            "76": 0.09928,
+            "77": 0.0991,
+            "78": 0.10211,
+            "79": 0.09934,
+            "80": 0.10027,
+            "81": 0.0996,
+            "82": 0.09986,
+            "83": 0.09951,
+            "84": 0.09761,
+            "85": 0.09909,
+            "86": 0.099,
+            "87": 0.09903,
+            "88": 0.09905,
+            "89": 0.0999,
+            "90": 0.09942,
+            "91": 0.09983,
+            "92": 0.09886,
+            "93": 0.09982,
+            "94": 0.09894,
+            "95": 0.09946,
+            "96": 0.09983,
+            "97": 0.09904,
+            "98": 0.09902,
+            "99": 0.09961,
+            "100": 0.09808
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..72743900cff
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.85599,
+            "2": 10.8648,
+            "3": 10.87042,
+            "4": 10.85288,
+            "5": 10.88397,
+            "6": 10.89184,
+            "7": 10.86732,
+            "8": 10.87057,
+            "9": 10.87432,
+            "10": 10.84185,
+            "11": 10.87989,
+            "12": 10.87417,
+            "13": 10.87884,
+            "14": 10.89184,
+            "15": 10.82659,
+            "16": 10.83027,
+            "17": 10.80933,
+            "18": 10.81431,
+            "19": 10.8167,
+            "20": 10.72165,
+            "21": 10.70557,
+            "22": 10.56881,
+            "23": 10.72025,
+            "24": 10.61194,
+            "25": 10.55765,
+            "26": 10.61149,
+            "27": 10.62635,
+            "28": 10.57155,
+            "29": 10.58212,
+            "30": 10.36267,
+            "31": 10.11682,
+            "32": 10.4682,
+            "33": 10.45411,
+            "34": 10.21121,
+            "35": 10.27207,
+            "36": 10.22246,
+            "37": 10.34079,
+            "38": 10.18964,
+            "39": 10.40228,
+            "40": 10.08758,
+            "41": 10.13714,
+            "42": 10.21175,
+            "43": 9.82878,
+            "44": 9.96255,
+            "45": 9.82846,
+            "46": 9.80952,
+            "47": 10.13734,
+            "48": 9.84349,
+            "49": 9.52888,
+            "50": 9.91046,
+            "51": 9.85075,
+            "52": 9.73181,
+            "53": 10.06388,
+            "54": 9.95432,
+            "55": 9.87204,
+            "56": 9.61823,
+            "57": 9.47467,
+            "58": 9.82802,
+            "59": 9.57962,
+            "60": 9.49074,
+            "61": 9.68473,
+            "62": 9.99245,
+            "63": 9.38364,
+            "64": 9.77766,
+            "65": 8.94008,
+            "66": 9.70099,
+            "67": 9.3605,
+            "68": 9.77766,
+            "69": 9.78865,
+            "70": 9.73813,
+            "71": 9.61811,
+            "72": 9.58068,
+            "73": 9.4964,
+            "74": 8.93812,
+            "75": 9.42081,
+            "76": 9.07416,
+            "77": 10.06077,
+            "78": 9.71952,
+            "79": 9.37088,
+            "80": 9.39874,
+            "81": 9.47802,
+            "82": 9.69299,
+            "83": 9.30276,
+            "84": 9.41548,
+            "85": 9.60883,
+            "86": 9.07461,
+            "87": 9.58826,
+            "88": 9.74392,
+            "89": 9.5951,
+            "90": 9.81217,
+            "91": 9.33796,
+            "92": 9.3534,
+            "93": 9.07315,
+            "94": 8.83127,
+            "95": 9.51524,
+            "96": 9.52183,
+            "97": 9.31012,
+            "98": 9.66532,
+            "99": 8.88179,
+            "100": 9.39375
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1640.0,
+            "2": 1738.0,
+            "3": 1638.0,
+            "4": 1810.0,
+            "5": 1755.0,
+            "6": 1681.0,
+            "7": 1781.0,
+            "8": 1502.0,
+            "9": 1817.0,
+            "10": 1394.0,
+            "11": 1927.0,
+            "12": 1691.0,
+            "13": 1901.0,
+            "14": 1631.0,
+            "15": 1765.0,
+            "16": 1864.0,
+            "17": 1704.0,
+            "18": 1771.0,
+            "19": 1817.0,
+            "20": 1831.0,
+            "21": 1813.0,
+            "22": 1673.0,
+            "23": 2005.0,
+            "24": 1553.0,
+            "25": 1577.0,
+            "26": 1656.0,
+            "27": 1734.0,
+            "28": 1896.0,
+            "29": 2051.0,
+            "30": 1897.0,
+            "31": 1452.0,
+            "32": 1785.0,
+            "33": 2061.0,
+            "34": 1857.0,
+            "35": 1920.0,
+            "36": 1990.0,
+            "37": 2191.0,
+            "38": 2142.0,
+            "39": 2215.0,
+            "40": 2166.0,
+            "41": 2154.0,
+            "42": 2148.0,
+            "43": 1881.0,
+            "44": 2066.0,
+            "45": 1952.0,
+            "46": 2217.0,
+            "47": 2513.0,
+            "48": 2356.0,
+            "49": 2294.0,
+            "50": 2140.0,
+            "51": 2509.0,
+            "52": 2528.0,
+            "53": 2851.0,
+            "54": 2747.0,
+            "55": 2333.0,
+            "56": 2724.0,
+            "57": 2315.0,
+            "58": 2754.0,
+            "59": 2774.0,
+            "60": 2336.0,
+            "61": 2912.0,
+            "62": 2415.0,
+            "63": 2341.0,
+            "64": 2837.0,
+            "65": 2661.0,
+            "66": 3000.0,
+            "67": 2779.0,
+            "68": 2691.0,
+            "69": 2793.0,
+            "70": 3183.0,
+            "71": 2962.0,
+            "72": 2393.0,
+            "73": 2997.0,
+            "74": 1935.0,
+            "75": 2463.0,
+            "76": 3065.0,
+            "77": 3184.0,
+            "78": 3154.0,
+            "79": 3127.0,
+            "80": 3286.0,
+            "81": 3386.0,
+            "82": 3128.0,
+            "83": 2608.0,
+            "84": 3079.0,
+            "85": 3260.0,
+            "86": 2687.0,
+            "87": 3591.0,
+            "88": 3035.0,
+            "89": 3165.0,
+            "90": 3166.0,
+            "91": 2690.0,
+            "92": 2897.0,
+            "93": 2630.0,
+            "94": 3348.0,
+            "95": 3349.0,
+            "96": 3288.0,
+            "97": 3055.0,
+            "98": 3516.0,
+            "99": 3035.0,
+            "100": 3109.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 746194432.0,
+            "2": 746194432.0,
+            "3": 746194432.0,
+            "4": 746194432.0,
+            "5": 746194432.0,
+            "6": 746194432.0,
+            "7": 746194432.0,
+            "8": 746194432.0,
+            "9": 746194432.0,
+            "10": 746194432.0,
+            "11": 746194432.0,
+            "12": 746194432.0,
+            "13": 746194432.0,
+            "14": 746194432.0,
+            "15": 746194432.0,
+            "16": 746194432.0,
+            "17": 746194432.0,
+            "18": 746194432.0,
+            "19": 746194432.0,
+            "20": 746194432.0,
+            "21": 746194432.0,
+            "22": 746194432.0,
+            "23": 746194432.0,
+            "24": 746194432.0,
+            "25": 746194432.0,
+            "26": 746194432.0,
+            "27": 746194432.0,
+            "28": 746194432.0,
+            "29": 746194432.0,
+            "30": 746194432.0,
+            "31": 746194432.0,
+            "32": 746194432.0,
+            "33": 746194432.0,
+            "34": 746194432.0,
+            "35": 746194432.0,
+            "36": 746194432.0,
+            "37": 746194432.0,
+            "38": 746194432.0,
+            "39": 746194432.0,
+            "40": 746194432.0,
+            "41": 746194432.0,
+            "42": 746194432.0,
+            "43": 746194432.0,
+            "44": 746194432.0,
+            "45": 746194432.0,
+            "46": 746194432.0,
+            "47": 746194432.0,
+            "48": 746194432.0,
+            "49": 746194432.0,
+            "50": 746194432.0,
+            "51": 746194432.0,
+            "52": 746194432.0,
+            "53": 746194432.0,
+            "54": 746194432.0,
+            "55": 746194432.0,
+            "56": 746194432.0,
+            "57": 746194432.0,
+            "58": 746194432.0,
+            "59": 746194432.0,
+            "60": 746194432.0,
+            "61": 746194432.0,
+            "62": 746194432.0,
+            "63": 746194432.0,
+            "64": 746194432.0,
+            "65": 746194432.0,
+            "66": 746194432.0,
+            "67": 746194432.0,
+            "68": 746194432.0,
+            "69": 746194432.0,
+            "70": 746194432.0,
+            "71": 746194432.0,
+            "72": 746194432.0,
+            "73": 746194432.0,
+            "74": 746194432.0,
+            "75": 746194432.0,
+            "76": 746194432.0,
+            "77": 746194432.0,
+            "78": 746194432.0,
+            "79": 746194432.0,
+            "80": 746194432.0,
+            "81": 746194432.0,
+            "82": 746194432.0,
+            "83": 746194432.0,
+            "84": 746194432.0,
+            "85": 746194432.0,
+            "86": 746194432.0,
+            "87": 746194432.0,
+            "88": 746194432.0,
+            "89": 746194432.0,
+            "90": 746194432.0,
+            "91": 746194432.0,
+            "92": 746194432.0,
+            "93": 746194432.0,
+            "94": 746194432.0,
+            "95": 746194432.0,
+            "96": 746194432.0,
+            "97": 746194432.0,
+            "98": 746194432.0,
+            "99": 746194432.0,
+            "100": 746194432.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1926153216.0,
+            "2": 2209851392.0,
+            "3": 2209851392.0,
+            "4": 2209851392.0,
+            "5": 2209851392.0,
+            "6": 2209851392.0,
+            "7": 2209851392.0,
+            "8": 2209851392.0,
+            "9": 2209851392.0,
+            "10": 2209851392.0,
+            "11": 2209851392.0,
+            "12": 2209851392.0,
+            "13": 2209851392.0,
+            "14": 2209851392.0,
+            "15": 2209851392.0,
+            "16": 2209851392.0,
+            "17": 2209851392.0,
+            "18": 2209851392.0,
+            "19": 2209851392.0,
+            "20": 2209851392.0,
+            "21": 2209851392.0,
+            "22": 2209851392.0,
+            "23": 2209851392.0,
+            "24": 2209851392.0,
+            "25": 2209851392.0,
+            "26": 2209851392.0,
+            "27": 2209851392.0,
+            "28": 2209851392.0,
+            "29": 2209851392.0,
+            "30": 2209851392.0,
+            "31": 2209851392.0,
+            "32": 2209851392.0,
+            "33": 2209851392.0,
+            "34": 2209851392.0,
+            "35": 2209851392.0,
+            "36": 2209851392.0,
+            "37": 2209851392.0,
+            "38": 2209851392.0,
+            "39": 2209851392.0,
+            "40": 2209851392.0,
+            "41": 2209851392.0,
+            "42": 2209851392.0,
+            "43": 2209851392.0,
+            "44": 2209851392.0,
+            "45": 2209851392.0,
+            "46": 2209851392.0,
+            "47": 2209851392.0,
+            "48": 2209851392.0,
+            "49": 2209851392.0,
+            "50": 2209851392.0,
+            "51": 2209851392.0,
+            "52": 2209851392.0,
+            "53": 2209851392.0,
+            "54": 2209851392.0,
+            "55": 2209851392.0,
+            "56": 2209851392.0,
+            "57": 2209851392.0,
+            "58": 2209851392.0,
+            "59": 2209851392.0,
+            "60": 2209851392.0,
+            "61": 2209851392.0,
+            "62": 2209851392.0,
+            "63": 2209851392.0,
+            "64": 2209851392.0,
+            "65": 2209851392.0,
+            "66": 2209851392.0,
+            "67": 2209851392.0,
+            "68": 2209851392.0,
+            "69": 2209851392.0,
+            "70": 2209851392.0,
+            "71": 2209851392.0,
+            "72": 2209851392.0,
+            "73": 2209851392.0,
+            "74": 2209851392.0,
+            "75": 2209851392.0,
+            "76": 2209851392.0,
+            "77": 2209851392.0,
+            "78": 2209851392.0,
+            "79": 2209851392.0,
+            "80": 2209851392.0,
+            "81": 2209851392.0,
+            "82": 2209851392.0,
+            "83": 2209851392.0,
+            "84": 2209851392.0,
+            "85": 2209851392.0,
+            "86": 2209851392.0,
+            "87": 2209851392.0,
+            "88": 2209851392.0,
+            "89": 2209851392.0,
+            "90": 2209851392.0,
+            "91": 2209851392.0,
+            "92": 2209851392.0,
+            "93": 2209851392.0,
+            "94": 2209851392.0,
+            "95": 2209851392.0,
+            "96": 2209851392.0,
+            "97": 2209851392.0,
+            "98": 2209851392.0,
+            "99": 2209851392.0,
+            "100": 2209851392.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 12.71973,
+            "2": 0.14026,
+            "3": 0.11862,
+            "4": 0.10675,
+            "5": 0.10706,
+            "6": 0.10639,
+            "7": 0.10733,
+            "8": 0.10668,
+            "9": 0.10876,
+            "10": 0.10818,
+            "11": 0.10917,
+            "12": 0.1083,
+            "13": 0.10781,
+            "14": 0.10774,
+            "15": 0.10649,
+            "16": 0.10734,
+            "17": 0.10691,
+            "18": 0.10561,
+            "19": 0.10658,
+            "20": 0.10698,
+            "21": 0.10786,
+            "22": 0.10799,
+            "23": 0.10759,
+            "24": 0.10883,
+            "25": 0.10795,
+            "26": 0.10754,
+            "27": 0.10823,
+            "28": 0.10763,
+            "29": 0.10845,
+            "30": 0.10831,
+            "31": 0.10745,
+            "32": 0.10718,
+            "33": 0.10787,
+            "34": 0.10797,
+            "35": 0.1082,
+            "36": 0.10752,
+            "37": 0.10829,
+            "38": 0.10875,
+            "39": 0.10866,
+            "40": 0.1088,
+            "41": 0.10879,
+            "42": 0.10749,
+            "43": 0.10899,
+            "44": 0.10725,
+            "45": 0.10697,
+            "46": 0.10761,
+            "47": 0.10683,
+            "48": 0.10976,
+            "49": 0.10965,
+            "50": 0.10766,
+            "51": 0.123,
+            "52": 0.11396,
+            "53": 0.10816,
+            "54": 0.10864,
+            "55": 0.12449,
+            "56": 0.1076,
+            "57": 0.10895,
+            "58": 0.10793,
+            "59": 0.10902,
+            "60": 0.10551,
+            "61": 0.10575,
+            "62": 0.10761,
+            "63": 0.10614,
+            "64": 0.10584,
+            "65": 0.10699,
+            "66": 0.1077,
+            "67": 0.10786,
+            "68": 0.10744,
+            "69": 0.10671,
+            "70": 0.10786,
+            "71": 0.10765,
+            "72": 0.10586,
+            "73": 0.10669,
+            "74": 0.10611,
+            "75": 0.10692,
+            "76": 0.10782,
+            "77": 0.10601,
+            "78": 0.10616,
+            "79": 0.10555,
+            "80": 0.10728,
+            "81": 0.10656,
+            "82": 0.10848,
+            "83": 0.10786,
+            "84": 0.10935,
+            "85": 0.11246,
+            "86": 0.11271,
+            "87": 0.10885,
+            "88": 0.10616,
+            "89": 0.10731,
+            "90": 0.10705,
+            "91": 0.10547,
+            "92": 0.10622,
+            "93": 0.10619,
+            "94": 0.10678,
+            "95": 0.10769,
+            "96": 0.10574,
+            "97": 0.10691,
+            "98": 0.10682,
+            "99": 0.10685,
+            "100": 0.10542
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
index b6823bec847..2125b88c754 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
@@ -2,141 +2,536 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 10.85599,
+            "2": 10.8648,
+            "3": 10.87042,
+            "4": 10.85288,
             "5": 10.88397,
+            "6": 10.89184,
+            "7": 10.86732,
+            "8": 10.87057,
+            "9": 10.87432,
             "10": 10.84185,
+            "11": 10.87989,
+            "12": 10.87417,
+            "13": 10.87884,
+            "14": 10.89184,
             "15": 10.82659,
+            "16": 10.83027,
+            "17": 10.80933,
+            "18": 10.81431,
+            "19": 10.8167,
             "20": 10.72165,
+            "21": 10.70557,
+            "22": 10.56881,
+            "23": 10.72025,
+            "24": 10.61194,
             "25": 10.55765,
+            "26": 10.61149,
+            "27": 10.62635,
+            "28": 10.57155,
+            "29": 10.58212,
             "30": 10.36267,
+            "31": 10.11682,
+            "32": 10.4682,
+            "33": 10.45411,
+            "34": 10.21121,
             "35": 10.27207,
+            "36": 10.22246,
+            "37": 10.34079,
+            "38": 10.18964,
+            "39": 10.40228,
             "40": 10.08758,
+            "41": 10.13714,
+            "42": 10.21175,
+            "43": 9.82878,
+            "44": 9.96255,
             "45": 9.82846,
+            "46": 9.80952,
+            "47": 10.13734,
+            "48": 9.84349,
+            "49": 9.52888,
             "50": 9.91046,
+            "51": 9.85075,
+            "52": 9.73181,
+            "53": 10.06388,
+            "54": 9.95432,
             "55": 9.87204,
+            "56": 9.61823,
+            "57": 9.47467,
+            "58": 9.82802,
+            "59": 9.57962,
             "60": 9.49074,
+            "61": 9.68473,
+            "62": 9.99245,
+            "63": 9.38364,
+            "64": 9.77766,
             "65": 8.94008,
+            "66": 9.70099,
+            "67": 9.3605,
+            "68": 9.77766,
+            "69": 9.78865,
             "70": 9.73813,
+            "71": 9.61811,
+            "72": 9.58068,
+            "73": 9.4964,
+            "74": 8.93812,
             "75": 9.42081,
+            "76": 9.07416,
+            "77": 10.06077,
+            "78": 9.71952,
+            "79": 9.37088,
             "80": 9.39874,
+            "81": 9.47802,
+            "82": 9.69299,
+            "83": 9.30276,
+            "84": 9.41548,
             "85": 9.60883,
+            "86": 9.07461,
+            "87": 9.58826,
+            "88": 9.74392,
+            "89": 9.5951,
             "90": 9.81217,
+            "91": 9.33796,
+            "92": 9.3534,
+            "93": 9.07315,
+            "94": 8.83127,
             "95": 9.51524,
+            "96": 9.52183,
+            "97": 9.31012,
+            "98": 9.66532,
+            "99": 8.88179,
             "100": 9.39375
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 1640.0,
+            "2": 1738.0,
+            "3": 1638.0,
+            "4": 1810.0,
             "5": 1755.0,
+            "6": 1681.0,
+            "7": 1781.0,
+            "8": 1502.0,
+            "9": 1817.0,
             "10": 1394.0,
+            "11": 1927.0,
+            "12": 1691.0,
+            "13": 1901.0,
+            "14": 1631.0,
             "15": 1765.0,
+            "16": 1864.0,
+            "17": 1704.0,
+            "18": 1771.0,
+            "19": 1817.0,
             "20": 1831.0,
+            "21": 1813.0,
+            "22": 1673.0,
+            "23": 2005.0,
+            "24": 1553.0,
             "25": 1577.0,
+            "26": 1656.0,
+            "27": 1734.0,
+            "28": 1896.0,
+            "29": 2051.0,
             "30": 1897.0,
+            "31": 1452.0,
+            "32": 1785.0,
+            "33": 2061.0,
+            "34": 1857.0,
             "35": 1920.0,
+            "36": 1990.0,
+            "37": 2191.0,
+            "38": 2142.0,
+            "39": 2215.0,
             "40": 2166.0,
+            "41": 2154.0,
+            "42": 2148.0,
+            "43": 1881.0,
+            "44": 2066.0,
             "45": 1952.0,
+            "46": 2217.0,
+            "47": 2513.0,
+            "48": 2356.0,
+            "49": 2294.0,
             "50": 2140.0,
+            "51": 2509.0,
+            "52": 2528.0,
+            "53": 2851.0,
+            "54": 2747.0,
             "55": 2333.0,
+            "56": 2724.0,
+            "57": 2315.0,
+            "58": 2754.0,
+            "59": 2774.0,
             "60": 2336.0,
+            "61": 2912.0,
+            "62": 2415.0,
+            "63": 2341.0,
+            "64": 2837.0,
             "65": 2661.0,
+            "66": 3000.0,
+            "67": 2779.0,
+            "68": 2691.0,
+            "69": 2793.0,
             "70": 3183.0,
+            "71": 2962.0,
+            "72": 2393.0,
+            "73": 2997.0,
+            "74": 1935.0,
             "75": 2463.0,
+            "76": 3065.0,
+            "77": 3184.0,
+            "78": 3154.0,
+            "79": 3127.0,
             "80": 3286.0,
+            "81": 3386.0,
+            "82": 3128.0,
+            "83": 2608.0,
+            "84": 3079.0,
             "85": 3260.0,
+            "86": 2687.0,
+            "87": 3591.0,
+            "88": 3035.0,
+            "89": 3165.0,
             "90": 3166.0,
+            "91": 2690.0,
+            "92": 2897.0,
+            "93": 2630.0,
+            "94": 3348.0,
             "95": 3349.0,
+            "96": 3288.0,
+            "97": 3055.0,
+            "98": 3516.0,
+            "99": 3035.0,
             "100": 3109.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 746194432.0,
+            "2": 746194432.0,
+            "3": 746194432.0,
+            "4": 746194432.0,
             "5": 746194432.0,
+            "6": 746194432.0,
+            "7": 746194432.0,
+            "8": 746194432.0,
+            "9": 746194432.0,
             "10": 746194432.0,
+            "11": 746194432.0,
+            "12": 746194432.0,
+            "13": 746194432.0,
+            "14": 746194432.0,
             "15": 746194432.0,
+            "16": 746194432.0,
+            "17": 746194432.0,
+            "18": 746194432.0,
+            "19": 746194432.0,
             "20": 746194432.0,
+            "21": 746194432.0,
+            "22": 746194432.0,
+            "23": 746194432.0,
+            "24": 746194432.0,
             "25": 746194432.0,
+            "26": 746194432.0,
+            "27": 746194432.0,
+            "28": 746194432.0,
+            "29": 746194432.0,
             "30": 746194432.0,
+            "31": 746194432.0,
+            "32": 746194432.0,
+            "33": 746194432.0,
+            "34": 746194432.0,
             "35": 746194432.0,
+            "36": 746194432.0,
+            "37": 746194432.0,
+            "38": 746194432.0,
+            "39": 746194432.0,
             "40": 746194432.0,
+            "41": 746194432.0,
+            "42": 746194432.0,
+            "43": 746194432.0,
+            "44": 746194432.0,
             "45": 746194432.0,
+            "46": 746194432.0,
+            "47": 746194432.0,
+            "48": 746194432.0,
+            "49": 746194432.0,
             "50": 746194432.0,
+            "51": 746194432.0,
+            "52": 746194432.0,
+            "53": 746194432.0,
+            "54": 746194432.0,
             "55": 746194432.0,
+            "56": 746194432.0,
+            "57": 746194432.0,
+            "58": 746194432.0,
+            "59": 746194432.0,
             "60": 746194432.0,
+            "61": 746194432.0,
+            "62": 746194432.0,
+            "63": 746194432.0,
+            "64": 746194432.0,
             "65": 746194432.0,
+            "66": 746194432.0,
+            "67": 746194432.0,
+            "68": 746194432.0,
+            "69": 746194432.0,
             "70": 746194432.0,
+            "71": 746194432.0,
+            "72": 746194432.0,
+            "73": 746194432.0,
+            "74": 746194432.0,
             "75": 746194432.0,
+            "76": 746194432.0,
+            "77": 746194432.0,
+            "78": 746194432.0,
+            "79": 746194432.0,
             "80": 746194432.0,
+            "81": 746194432.0,
+            "82": 746194432.0,
+            "83": 746194432.0,
+            "84": 746194432.0,
             "85": 746194432.0,
+            "86": 746194432.0,
+            "87": 746194432.0,
+            "88": 746194432.0,
+            "89": 746194432.0,
             "90": 746194432.0,
+            "91": 746194432.0,
+            "92": 746194432.0,
+            "93": 746194432.0,
+            "94": 746194432.0,
             "95": 746194432.0,
+            "96": 746194432.0,
+            "97": 746194432.0,
+            "98": 746194432.0,
+            "99": 746194432.0,
             "100": 746194432.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 1926153216.0,
+            "2": 2209851392.0,
+            "3": 2209851392.0,
+            "4": 2209851392.0,
             "5": 2209851392.0,
+            "6": 2209851392.0,
+            "7": 2209851392.0,
+            "8": 2209851392.0,
+            "9": 2209851392.0,
             "10": 2209851392.0,
+            "11": 2209851392.0,
+            "12": 2209851392.0,
+            "13": 2209851392.0,
+            "14": 2209851392.0,
             "15": 2209851392.0,
+            "16": 2209851392.0,
+            "17": 2209851392.0,
+            "18": 2209851392.0,
+            "19": 2209851392.0,
             "20": 2209851392.0,
+            "21": 2209851392.0,
+            "22": 2209851392.0,
+            "23": 2209851392.0,
+            "24": 2209851392.0,
             "25": 2209851392.0,
+            "26": 2209851392.0,
+            "27": 2209851392.0,
+            "28": 2209851392.0,
+            "29": 2209851392.0,
             "30": 2209851392.0,
+            "31": 2209851392.0,
+            "32": 2209851392.0,
+            "33": 2209851392.0,
+            "34": 2209851392.0,
             "35": 2209851392.0,
+            "36": 2209851392.0,
+            "37": 2209851392.0,
+            "38": 2209851392.0,
+            "39": 2209851392.0,
             "40": 2209851392.0,
+            "41": 2209851392.0,
+            "42": 2209851392.0,
+            "43": 2209851392.0,
+            "44": 2209851392.0,
             "45": 2209851392.0,
+            "46": 2209851392.0,
+            "47": 2209851392.0,
+            "48": 2209851392.0,
+            "49": 2209851392.0,
             "50": 2209851392.0,
+            "51": 2209851392.0,
+            "52": 2209851392.0,
+            "53": 2209851392.0,
+            "54": 2209851392.0,
             "55": 2209851392.0,
+            "56": 2209851392.0,
+            "57": 2209851392.0,
+            "58": 2209851392.0,
+            "59": 2209851392.0,
             "60": 2209851392.0,
+            "61": 2209851392.0,
+            "62": 2209851392.0,
+            "63": 2209851392.0,
+            "64": 2209851392.0,
             "65": 2209851392.0,
+            "66": 2209851392.0,
+            "67": 2209851392.0,
+            "68": 2209851392.0,
+            "69": 2209851392.0,
             "70": 2209851392.0,
+            "71": 2209851392.0,
+            "72": 2209851392.0,
+            "73": 2209851392.0,
+            "74": 2209851392.0,
             "75": 2209851392.0,
+            "76": 2209851392.0,
+            "77": 2209851392.0,
+            "78": 2209851392.0,
+            "79": 2209851392.0,
             "80": 2209851392.0,
+            "81": 2209851392.0,
+            "82": 2209851392.0,
+            "83": 2209851392.0,
+            "84": 2209851392.0,
             "85": 2209851392.0,
+            "86": 2209851392.0,
+            "87": 2209851392.0,
+            "88": 2209851392.0,
+            "89": 2209851392.0,
             "90": 2209851392.0,
+            "91": 2209851392.0,
+            "92": 2209851392.0,
+            "93": 2209851392.0,
+            "94": 2209851392.0,
             "95": 2209851392.0,
+            "96": 2209851392.0,
+            "97": 2209851392.0,
+            "98": 2209851392.0,
+            "99": 2209851392.0,
             "100": 2209851392.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 11.76041,
-            "5": 0.0928,
-            "10": 0.09401,
-            "15": 0.09246,
-            "20": 0.09284,
-            "25": 0.09344,
-            "30": 0.09267,
-            "35": 0.09314,
-            "40": 0.0926,
-            "45": 0.09244,
-            "50": 0.0925,
-            "55": 0.09481,
-            "60": 0.09314,
-            "65": 0.09243,
-            "70": 0.09297,
-            "75": 0.09278,
-            "80": 0.0928,
-            "85": 0.09198,
-            "90": 0.09259,
-            "95": 0.09244,
-            "100": 0.09223
+            "1": 12.82981,
+            "2": 0.12202,
+            "3": 0.10747,
+            "4": 0.10702,
+            "5": 0.10713,
+            "6": 0.10667,
+            "7": 0.10627,
+            "8": 0.10699,
+            "9": 0.10657,
+            "10": 0.10715,
+            "11": 0.10642,
+            "12": 0.10705,
+            "13": 0.10495,
+            "14": 0.10784,
+            "15": 0.1107,
+            "16": 0.1105,
+            "17": 0.11162,
+            "18": 0.11128,
+            "19": 0.11269,
+            "20": 0.10842,
+            "21": 0.10915,
+            "22": 0.10863,
+            "23": 0.10818,
+            "24": 0.10975,
+            "25": 0.10577,
+            "26": 0.10559,
+            "27": 0.10659,
+            "28": 0.10616,
+            "29": 0.10712,
+            "30": 0.10735,
+            "31": 0.1064,
+            "32": 0.10562,
+            "33": 0.10538,
+            "34": 0.10678,
+            "35": 0.10507,
+            "36": 0.10502,
+            "37": 0.10532,
+            "38": 0.10636,
+            "39": 0.10511,
+            "40": 0.10497,
+            "41": 0.10557,
+            "42": 0.10413,
+            "43": 0.10684,
+            "44": 0.10567,
+            "45": 0.10719,
+            "46": 0.10887,
+            "47": 0.11215,
+            "48": 0.11102,
+            "49": 0.10907,
+            "50": 0.10761,
+            "51": 0.12141,
+            "52": 0.13372,
+            "53": 0.10585,
+            "54": 0.10595,
+            "55": 0.10712,
+            "56": 0.10573,
+            "57": 0.10825,
+            "58": 0.10991,
+            "59": 0.10753,
+            "60": 0.10565,
+            "61": 0.10639,
+            "62": 0.11,
+            "63": 0.10465,
+            "64": 0.10596,
+            "65": 0.10785,
+            "66": 0.11597,
+            "67": 0.10697,
+            "68": 0.10722,
+            "69": 0.10693,
+            "70": 0.1079,
+            "71": 0.10852,
+            "72": 0.10729,
+            "73": 0.10617,
+            "74": 0.1046,
+            "75": 0.10476,
+            "76": 0.11096,
+            "77": 0.10553,
+            "78": 0.10593,
+            "79": 0.1069,
+            "80": 0.10615,
+            "81": 0.11416,
+            "82": 0.10544,
+            "83": 0.10562,
+            "84": 0.10576,
+            "85": 0.10568,
+            "86": 0.10984,
+            "87": 0.10814,
+            "88": 0.10556,
+            "89": 0.10524,
+            "90": 0.1051,
+            "91": 0.11373,
+            "92": 0.10616,
+            "93": 0.10743,
+            "94": 0.10695,
+            "95": 0.11373,
+            "96": 0.10777,
+            "97": 0.10685,
+            "98": 0.10614,
+            "99": 0.10571,
+            "100": 0.10707
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..f5278baae82
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.85599,
+            "2": 10.8648,
+            "3": 10.87042,
+            "4": 10.85288,
+            "5": 10.88397,
+            "6": 10.89184,
+            "7": 10.86732,
+            "8": 10.87057,
+            "9": 10.87432,
+            "10": 10.84185,
+            "11": 10.87989,
+            "12": 10.87417,
+            "13": 10.87884,
+            "14": 10.89184,
+            "15": 10.82659,
+            "16": 10.83027,
+            "17": 10.80933,
+            "18": 10.81431,
+            "19": 10.8167,
+            "20": 10.72165,
+            "21": 10.70557,
+            "22": 10.56881,
+            "23": 10.72025,
+            "24": 10.61194,
+            "25": 10.55765,
+            "26": 10.61149,
+            "27": 10.62635,
+            "28": 10.57155,
+            "29": 10.58212,
+            "30": 10.36267,
+            "31": 10.11682,
+            "32": 10.4682,
+            "33": 10.45411,
+            "34": 10.21121,
+            "35": 10.27207,
+            "36": 10.22246,
+            "37": 10.34079,
+            "38": 10.18964,
+            "39": 10.40228,
+            "40": 10.08758,
+            "41": 10.13714,
+            "42": 10.21175,
+            "43": 9.82878,
+            "44": 9.96255,
+            "45": 9.82846,
+            "46": 9.80952,
+            "47": 10.13734,
+            "48": 9.84349,
+            "49": 9.52888,
+            "50": 9.91046,
+            "51": 9.85075,
+            "52": 9.73181,
+            "53": 10.06388,
+            "54": 9.95432,
+            "55": 9.87204,
+            "56": 9.61823,
+            "57": 9.47467,
+            "58": 9.82802,
+            "59": 9.57962,
+            "60": 9.49074,
+            "61": 9.68473,
+            "62": 9.99245,
+            "63": 9.38364,
+            "64": 9.77766,
+            "65": 8.94008,
+            "66": 9.70099,
+            "67": 9.3605,
+            "68": 9.77766,
+            "69": 9.78865,
+            "70": 9.73813,
+            "71": 9.61811,
+            "72": 9.58068,
+            "73": 9.4964,
+            "74": 8.93812,
+            "75": 9.42081,
+            "76": 9.07416,
+            "77": 10.06077,
+            "78": 9.71952,
+            "79": 9.37088,
+            "80": 9.39874,
+            "81": 9.47802,
+            "82": 9.69299,
+            "83": 9.30276,
+            "84": 9.41548,
+            "85": 9.60883,
+            "86": 9.07461,
+            "87": 9.58826,
+            "88": 9.74392,
+            "89": 9.5951,
+            "90": 9.81217,
+            "91": 9.33796,
+            "92": 9.3534,
+            "93": 9.07315,
+            "94": 8.83127,
+            "95": 9.51524,
+            "96": 9.52183,
+            "97": 9.31012,
+            "98": 9.66532,
+            "99": 8.88179,
+            "100": 9.39375
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1640.0,
+            "2": 1738.0,
+            "3": 1638.0,
+            "4": 1810.0,
+            "5": 1755.0,
+            "6": 1681.0,
+            "7": 1781.0,
+            "8": 1502.0,
+            "9": 1817.0,
+            "10": 1394.0,
+            "11": 1927.0,
+            "12": 1691.0,
+            "13": 1901.0,
+            "14": 1631.0,
+            "15": 1765.0,
+            "16": 1864.0,
+            "17": 1704.0,
+            "18": 1771.0,
+            "19": 1817.0,
+            "20": 1831.0,
+            "21": 1813.0,
+            "22": 1673.0,
+            "23": 2005.0,
+            "24": 1553.0,
+            "25": 1577.0,
+            "26": 1656.0,
+            "27": 1734.0,
+            "28": 1896.0,
+            "29": 2051.0,
+            "30": 1897.0,
+            "31": 1452.0,
+            "32": 1785.0,
+            "33": 2061.0,
+            "34": 1857.0,
+            "35": 1920.0,
+            "36": 1990.0,
+            "37": 2191.0,
+            "38": 2142.0,
+            "39": 2215.0,
+            "40": 2166.0,
+            "41": 2154.0,
+            "42": 2148.0,
+            "43": 1881.0,
+            "44": 2066.0,
+            "45": 1952.0,
+            "46": 2217.0,
+            "47": 2513.0,
+            "48": 2356.0,
+            "49": 2294.0,
+            "50": 2140.0,
+            "51": 2509.0,
+            "52": 2528.0,
+            "53": 2851.0,
+            "54": 2747.0,
+            "55": 2333.0,
+            "56": 2724.0,
+            "57": 2315.0,
+            "58": 2754.0,
+            "59": 2774.0,
+            "60": 2336.0,
+            "61": 2912.0,
+            "62": 2415.0,
+            "63": 2341.0,
+            "64": 2837.0,
+            "65": 2661.0,
+            "66": 3000.0,
+            "67": 2779.0,
+            "68": 2691.0,
+            "69": 2793.0,
+            "70": 3183.0,
+            "71": 2962.0,
+            "72": 2393.0,
+            "73": 2997.0,
+            "74": 1935.0,
+            "75": 2463.0,
+            "76": 3065.0,
+            "77": 3184.0,
+            "78": 3154.0,
+            "79": 3127.0,
+            "80": 3286.0,
+            "81": 3386.0,
+            "82": 3128.0,
+            "83": 2608.0,
+            "84": 3079.0,
+            "85": 3260.0,
+            "86": 2687.0,
+            "87": 3591.0,
+            "88": 3035.0,
+            "89": 3165.0,
+            "90": 3166.0,
+            "91": 2690.0,
+            "92": 2897.0,
+            "93": 2630.0,
+            "94": 3348.0,
+            "95": 3349.0,
+            "96": 3288.0,
+            "97": 3055.0,
+            "98": 3516.0,
+            "99": 3035.0,
+            "100": 3109.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 746194432.0,
+            "2": 746194432.0,
+            "3": 746194432.0,
+            "4": 746194432.0,
+            "5": 746194432.0,
+            "6": 746194432.0,
+            "7": 746194432.0,
+            "8": 746194432.0,
+            "9": 746194432.0,
+            "10": 746194432.0,
+            "11": 746194432.0,
+            "12": 746194432.0,
+            "13": 746194432.0,
+            "14": 746194432.0,
+            "15": 746194432.0,
+            "16": 746194432.0,
+            "17": 746194432.0,
+            "18": 746194432.0,
+            "19": 746194432.0,
+            "20": 746194432.0,
+            "21": 746194432.0,
+            "22": 746194432.0,
+            "23": 746194432.0,
+            "24": 746194432.0,
+            "25": 746194432.0,
+            "26": 746194432.0,
+            "27": 746194432.0,
+            "28": 746194432.0,
+            "29": 746194432.0,
+            "30": 746194432.0,
+            "31": 746194432.0,
+            "32": 746194432.0,
+            "33": 746194432.0,
+            "34": 746194432.0,
+            "35": 746194432.0,
+            "36": 746194432.0,
+            "37": 746194432.0,
+            "38": 746194432.0,
+            "39": 746194432.0,
+            "40": 746194432.0,
+            "41": 746194432.0,
+            "42": 746194432.0,
+            "43": 746194432.0,
+            "44": 746194432.0,
+            "45": 746194432.0,
+            "46": 746194432.0,
+            "47": 746194432.0,
+            "48": 746194432.0,
+            "49": 746194432.0,
+            "50": 746194432.0,
+            "51": 746194432.0,
+            "52": 746194432.0,
+            "53": 746194432.0,
+            "54": 746194432.0,
+            "55": 746194432.0,
+            "56": 746194432.0,
+            "57": 746194432.0,
+            "58": 746194432.0,
+            "59": 746194432.0,
+            "60": 746194432.0,
+            "61": 746194432.0,
+            "62": 746194432.0,
+            "63": 746194432.0,
+            "64": 746194432.0,
+            "65": 746194432.0,
+            "66": 746194432.0,
+            "67": 746194432.0,
+            "68": 746194432.0,
+            "69": 746194432.0,
+            "70": 746194432.0,
+            "71": 746194432.0,
+            "72": 746194432.0,
+            "73": 746194432.0,
+            "74": 746194432.0,
+            "75": 746194432.0,
+            "76": 746194432.0,
+            "77": 746194432.0,
+            "78": 746194432.0,
+            "79": 746194432.0,
+            "80": 746194432.0,
+            "81": 746194432.0,
+            "82": 746194432.0,
+            "83": 746194432.0,
+            "84": 746194432.0,
+            "85": 746194432.0,
+            "86": 746194432.0,
+            "87": 746194432.0,
+            "88": 746194432.0,
+            "89": 746194432.0,
+            "90": 746194432.0,
+            "91": 746194432.0,
+            "92": 746194432.0,
+            "93": 746194432.0,
+            "94": 746194432.0,
+            "95": 746194432.0,
+            "96": 746194432.0,
+            "97": 746194432.0,
+            "98": 746194432.0,
+            "99": 746194432.0,
+            "100": 746194432.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1926153216.0,
+            "2": 2209851392.0,
+            "3": 2209851392.0,
+            "4": 2209851392.0,
+            "5": 2209851392.0,
+            "6": 2209851392.0,
+            "7": 2209851392.0,
+            "8": 2209851392.0,
+            "9": 2209851392.0,
+            "10": 2209851392.0,
+            "11": 2209851392.0,
+            "12": 2209851392.0,
+            "13": 2209851392.0,
+            "14": 2209851392.0,
+            "15": 2209851392.0,
+            "16": 2209851392.0,
+            "17": 2209851392.0,
+            "18": 2209851392.0,
+            "19": 2209851392.0,
+            "20": 2209851392.0,
+            "21": 2209851392.0,
+            "22": 2209851392.0,
+            "23": 2209851392.0,
+            "24": 2209851392.0,
+            "25": 2209851392.0,
+            "26": 2209851392.0,
+            "27": 2209851392.0,
+            "28": 2209851392.0,
+            "29": 2209851392.0,
+            "30": 2209851392.0,
+            "31": 2209851392.0,
+            "32": 2209851392.0,
+            "33": 2209851392.0,
+            "34": 2209851392.0,
+            "35": 2209851392.0,
+            "36": 2209851392.0,
+            "37": 2209851392.0,
+            "38": 2209851392.0,
+            "39": 2209851392.0,
+            "40": 2209851392.0,
+            "41": 2209851392.0,
+            "42": 2209851392.0,
+            "43": 2209851392.0,
+            "44": 2209851392.0,
+            "45": 2209851392.0,
+            "46": 2209851392.0,
+            "47": 2209851392.0,
+            "48": 2209851392.0,
+            "49": 2209851392.0,
+            "50": 2209851392.0,
+            "51": 2209851392.0,
+            "52": 2209851392.0,
+            "53": 2209851392.0,
+            "54": 2209851392.0,
+            "55": 2209851392.0,
+            "56": 2209851392.0,
+            "57": 2209851392.0,
+            "58": 2209851392.0,
+            "59": 2209851392.0,
+            "60": 2209851392.0,
+            "61": 2209851392.0,
+            "62": 2209851392.0,
+            "63": 2209851392.0,
+            "64": 2209851392.0,
+            "65": 2209851392.0,
+            "66": 2209851392.0,
+            "67": 2209851392.0,
+            "68": 2209851392.0,
+            "69": 2209851392.0,
+            "70": 2209851392.0,
+            "71": 2209851392.0,
+            "72": 2209851392.0,
+            "73": 2209851392.0,
+            "74": 2209851392.0,
+            "75": 2209851392.0,
+            "76": 2209851392.0,
+            "77": 2209851392.0,
+            "78": 2209851392.0,
+            "79": 2209851392.0,
+            "80": 2209851392.0,
+            "81": 2209851392.0,
+            "82": 2209851392.0,
+            "83": 2209851392.0,
+            "84": 2209851392.0,
+            "85": 2209851392.0,
+            "86": 2209851392.0,
+            "87": 2209851392.0,
+            "88": 2209851392.0,
+            "89": 2209851392.0,
+            "90": 2209851392.0,
+            "91": 2209851392.0,
+            "92": 2209851392.0,
+            "93": 2209851392.0,
+            "94": 2209851392.0,
+            "95": 2209851392.0,
+            "96": 2209851392.0,
+            "97": 2209851392.0,
+            "98": 2209851392.0,
+            "99": 2209851392.0,
+            "100": 2209851392.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 9.70442,
+            "2": 0.13019,
+            "3": 0.0979,
+            "4": 0.09686,
+            "5": 0.09768,
+            "6": 0.09685,
+            "7": 0.09593,
+            "8": 0.09527,
+            "9": 0.09564,
+            "10": 0.09666,
+            "11": 0.09434,
+            "12": 0.09507,
+            "13": 0.09515,
+            "14": 0.09479,
+            "15": 0.09471,
+            "16": 0.09457,
+            "17": 0.09471,
+            "18": 0.09471,
+            "19": 0.09425,
+            "20": 0.09404,
+            "21": 0.09478,
+            "22": 0.09431,
+            "23": 0.09582,
+            "24": 0.09629,
+            "25": 0.09606,
+            "26": 0.09601,
+            "27": 0.09669,
+            "28": 0.0955,
+            "29": 0.09877,
+            "30": 0.09681,
+            "31": 0.09783,
+            "32": 0.09679,
+            "33": 0.09636,
+            "34": 0.09497,
+            "35": 0.0955,
+            "36": 0.09533,
+            "37": 0.09488,
+            "38": 0.10172,
+            "39": 0.09491,
+            "40": 0.09435,
+            "41": 0.09527,
+            "42": 0.09493,
+            "43": 0.10246,
+            "44": 0.10248,
+            "45": 0.10163,
+            "46": 0.10184,
+            "47": 0.10193,
+            "48": 0.10237,
+            "49": 0.10206,
+            "50": 0.10141,
+            "51": 0.11047,
+            "52": 0.12328,
+            "53": 0.10274,
+            "54": 0.0969,
+            "55": 0.09666,
+            "56": 0.09655,
+            "57": 0.09837,
+            "58": 0.10123,
+            "59": 0.10037,
+            "60": 0.09607,
+            "61": 0.09522,
+            "62": 0.09645,
+            "63": 0.09756,
+            "64": 0.09502,
+            "65": 0.09541,
+            "66": 0.09681,
+            "67": 0.09707,
+            "68": 0.09483,
+            "69": 0.09531,
+            "70": 0.0962,
+            "71": 0.09572,
+            "72": 0.09677,
+            "73": 0.09704,
+            "74": 0.09624,
+            "75": 0.09474,
+            "76": 0.09532,
+            "77": 0.09678,
+            "78": 0.09534,
+            "79": 0.09817,
+            "80": 0.09669,
+            "81": 0.09724,
+            "82": 0.09754,
+            "83": 0.09837,
+            "84": 0.09528,
+            "85": 0.09597,
+            "86": 0.09653,
+            "87": 0.09565,
+            "88": 0.0961,
+            "89": 0.09685,
+            "90": 0.0967,
+            "91": 0.0944,
+            "92": 0.09565,
+            "93": 0.09526,
+            "94": 0.09573,
+            "95": 0.09396,
+            "96": 0.09557,
+            "97": 0.09618,
+            "98": 0.0957,
+            "99": 0.09558,
+            "100": 0.09514
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..50639a30816
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.85599,
+            "2": 10.8648,
+            "3": 10.87042,
+            "4": 10.85288,
+            "5": 10.88397,
+            "6": 10.89184,
+            "7": 10.86732,
+            "8": 10.87057,
+            "9": 10.87432,
+            "10": 10.84185,
+            "11": 10.87989,
+            "12": 10.87417,
+            "13": 10.87884,
+            "14": 10.89184,
+            "15": 10.82659,
+            "16": 10.83027,
+            "17": 10.80933,
+            "18": 10.81431,
+            "19": 10.8167,
+            "20": 10.72165,
+            "21": 10.70557,
+            "22": 10.56881,
+            "23": 10.72025,
+            "24": 10.61194,
+            "25": 10.55765,
+            "26": 10.61149,
+            "27": 10.62635,
+            "28": 10.57155,
+            "29": 10.58212,
+            "30": 10.36267,
+            "31": 10.11682,
+            "32": 10.4682,
+            "33": 10.45411,
+            "34": 10.21121,
+            "35": 10.27207,
+            "36": 10.22246,
+            "37": 10.34079,
+            "38": 10.18964,
+            "39": 10.40228,
+            "40": 10.08758,
+            "41": 10.13714,
+            "42": 10.21175,
+            "43": 9.82878,
+            "44": 9.96255,
+            "45": 9.82846,
+            "46": 9.80952,
+            "47": 10.13734,
+            "48": 9.84349,
+            "49": 9.52888,
+            "50": 9.91046,
+            "51": 9.85075,
+            "52": 9.73181,
+            "53": 10.06388,
+            "54": 9.95432,
+            "55": 9.87204,
+            "56": 9.61823,
+            "57": 9.47467,
+            "58": 9.82802,
+            "59": 9.57962,
+            "60": 9.49074,
+            "61": 9.68473,
+            "62": 9.99245,
+            "63": 9.38364,
+            "64": 9.77766,
+            "65": 8.94008,
+            "66": 9.70099,
+            "67": 9.3605,
+            "68": 9.77766,
+            "69": 9.78865,
+            "70": 9.73813,
+            "71": 9.61811,
+            "72": 9.58068,
+            "73": 9.4964,
+            "74": 8.93812,
+            "75": 9.42081,
+            "76": 9.07416,
+            "77": 10.06077,
+            "78": 9.71952,
+            "79": 9.37088,
+            "80": 9.39874,
+            "81": 9.47802,
+            "82": 9.69299,
+            "83": 9.30276,
+            "84": 9.41548,
+            "85": 9.60883,
+            "86": 9.07461,
+            "87": 9.58826,
+            "88": 9.74392,
+            "89": 9.5951,
+            "90": 9.81217,
+            "91": 9.33796,
+            "92": 9.3534,
+            "93": 9.07315,
+            "94": 8.83127,
+            "95": 9.51524,
+            "96": 9.52183,
+            "97": 9.31012,
+            "98": 9.66532,
+            "99": 8.88179,
+            "100": 9.39375
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1640.0,
+            "2": 1738.0,
+            "3": 1638.0,
+            "4": 1810.0,
+            "5": 1755.0,
+            "6": 1681.0,
+            "7": 1781.0,
+            "8": 1502.0,
+            "9": 1817.0,
+            "10": 1394.0,
+            "11": 1927.0,
+            "12": 1691.0,
+            "13": 1901.0,
+            "14": 1631.0,
+            "15": 1765.0,
+            "16": 1864.0,
+            "17": 1704.0,
+            "18": 1771.0,
+            "19": 1817.0,
+            "20": 1831.0,
+            "21": 1813.0,
+            "22": 1673.0,
+            "23": 2005.0,
+            "24": 1553.0,
+            "25": 1577.0,
+            "26": 1656.0,
+            "27": 1734.0,
+            "28": 1896.0,
+            "29": 2051.0,
+            "30": 1897.0,
+            "31": 1452.0,
+            "32": 1785.0,
+            "33": 2061.0,
+            "34": 1857.0,
+            "35": 1920.0,
+            "36": 1990.0,
+            "37": 2191.0,
+            "38": 2142.0,
+            "39": 2215.0,
+            "40": 2166.0,
+            "41": 2154.0,
+            "42": 2148.0,
+            "43": 1881.0,
+            "44": 2066.0,
+            "45": 1952.0,
+            "46": 2217.0,
+            "47": 2513.0,
+            "48": 2356.0,
+            "49": 2294.0,
+            "50": 2140.0,
+            "51": 2509.0,
+            "52": 2528.0,
+            "53": 2851.0,
+            "54": 2747.0,
+            "55": 2333.0,
+            "56": 2724.0,
+            "57": 2315.0,
+            "58": 2754.0,
+            "59": 2774.0,
+            "60": 2336.0,
+            "61": 2912.0,
+            "62": 2415.0,
+            "63": 2341.0,
+            "64": 2837.0,
+            "65": 2661.0,
+            "66": 3000.0,
+            "67": 2779.0,
+            "68": 2691.0,
+            "69": 2793.0,
+            "70": 3183.0,
+            "71": 2962.0,
+            "72": 2393.0,
+            "73": 2997.0,
+            "74": 1935.0,
+            "75": 2463.0,
+            "76": 3065.0,
+            "77": 3184.0,
+            "78": 3154.0,
+            "79": 3127.0,
+            "80": 3286.0,
+            "81": 3386.0,
+            "82": 3128.0,
+            "83": 2608.0,
+            "84": 3079.0,
+            "85": 3260.0,
+            "86": 2687.0,
+            "87": 3591.0,
+            "88": 3035.0,
+            "89": 3165.0,
+            "90": 3166.0,
+            "91": 2690.0,
+            "92": 2897.0,
+            "93": 2630.0,
+            "94": 3348.0,
+            "95": 3349.0,
+            "96": 3288.0,
+            "97": 3055.0,
+            "98": 3516.0,
+            "99": 3035.0,
+            "100": 3109.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 746194432.0,
+            "2": 746194432.0,
+            "3": 746194432.0,
+            "4": 746194432.0,
+            "5": 746194432.0,
+            "6": 746194432.0,
+            "7": 746194432.0,
+            "8": 746194432.0,
+            "9": 746194432.0,
+            "10": 746194432.0,
+            "11": 746194432.0,
+            "12": 746194432.0,
+            "13": 746194432.0,
+            "14": 746194432.0,
+            "15": 746194432.0,
+            "16": 746194432.0,
+            "17": 746194432.0,
+            "18": 746194432.0,
+            "19": 746194432.0,
+            "20": 746194432.0,
+            "21": 746194432.0,
+            "22": 746194432.0,
+            "23": 746194432.0,
+            "24": 746194432.0,
+            "25": 746194432.0,
+            "26": 746194432.0,
+            "27": 746194432.0,
+            "28": 746194432.0,
+            "29": 746194432.0,
+            "30": 746194432.0,
+            "31": 746194432.0,
+            "32": 746194432.0,
+            "33": 746194432.0,
+            "34": 746194432.0,
+            "35": 746194432.0,
+            "36": 746194432.0,
+            "37": 746194432.0,
+            "38": 746194432.0,
+            "39": 746194432.0,
+            "40": 746194432.0,
+            "41": 746194432.0,
+            "42": 746194432.0,
+            "43": 746194432.0,
+            "44": 746194432.0,
+            "45": 746194432.0,
+            "46": 746194432.0,
+            "47": 746194432.0,
+            "48": 746194432.0,
+            "49": 746194432.0,
+            "50": 746194432.0,
+            "51": 746194432.0,
+            "52": 746194432.0,
+            "53": 746194432.0,
+            "54": 746194432.0,
+            "55": 746194432.0,
+            "56": 746194432.0,
+            "57": 746194432.0,
+            "58": 746194432.0,
+            "59": 746194432.0,
+            "60": 746194432.0,
+            "61": 746194432.0,
+            "62": 746194432.0,
+            "63": 746194432.0,
+            "64": 746194432.0,
+            "65": 746194432.0,
+            "66": 746194432.0,
+            "67": 746194432.0,
+            "68": 746194432.0,
+            "69": 746194432.0,
+            "70": 746194432.0,
+            "71": 746194432.0,
+            "72": 746194432.0,
+            "73": 746194432.0,
+            "74": 746194432.0,
+            "75": 746194432.0,
+            "76": 746194432.0,
+            "77": 746194432.0,
+            "78": 746194432.0,
+            "79": 746194432.0,
+            "80": 746194432.0,
+            "81": 746194432.0,
+            "82": 746194432.0,
+            "83": 746194432.0,
+            "84": 746194432.0,
+            "85": 746194432.0,
+            "86": 746194432.0,
+            "87": 746194432.0,
+            "88": 746194432.0,
+            "89": 746194432.0,
+            "90": 746194432.0,
+            "91": 746194432.0,
+            "92": 746194432.0,
+            "93": 746194432.0,
+            "94": 746194432.0,
+            "95": 746194432.0,
+            "96": 746194432.0,
+            "97": 746194432.0,
+            "98": 746194432.0,
+            "99": 746194432.0,
+            "100": 746194432.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1926153216.0,
+            "2": 2209851392.0,
+            "3": 2209851392.0,
+            "4": 2209851392.0,
+            "5": 2209851392.0,
+            "6": 2209851392.0,
+            "7": 2209851392.0,
+            "8": 2209851392.0,
+            "9": 2209851392.0,
+            "10": 2209851392.0,
+            "11": 2209851392.0,
+            "12": 2209851392.0,
+            "13": 2209851392.0,
+            "14": 2209851392.0,
+            "15": 2209851392.0,
+            "16": 2209851392.0,
+            "17": 2209851392.0,
+            "18": 2209851392.0,
+            "19": 2209851392.0,
+            "20": 2209851392.0,
+            "21": 2209851392.0,
+            "22": 2209851392.0,
+            "23": 2209851392.0,
+            "24": 2209851392.0,
+            "25": 2209851392.0,
+            "26": 2209851392.0,
+            "27": 2209851392.0,
+            "28": 2209851392.0,
+            "29": 2209851392.0,
+            "30": 2209851392.0,
+            "31": 2209851392.0,
+            "32": 2209851392.0,
+            "33": 2209851392.0,
+            "34": 2209851392.0,
+            "35": 2209851392.0,
+            "36": 2209851392.0,
+            "37": 2209851392.0,
+            "38": 2209851392.0,
+            "39": 2209851392.0,
+            "40": 2209851392.0,
+            "41": 2209851392.0,
+            "42": 2209851392.0,
+            "43": 2209851392.0,
+            "44": 2209851392.0,
+            "45": 2209851392.0,
+            "46": 2209851392.0,
+            "47": 2209851392.0,
+            "48": 2209851392.0,
+            "49": 2209851392.0,
+            "50": 2209851392.0,
+            "51": 2209851392.0,
+            "52": 2209851392.0,
+            "53": 2209851392.0,
+            "54": 2209851392.0,
+            "55": 2209851392.0,
+            "56": 2209851392.0,
+            "57": 2209851392.0,
+            "58": 2209851392.0,
+            "59": 2209851392.0,
+            "60": 2209851392.0,
+            "61": 2209851392.0,
+            "62": 2209851392.0,
+            "63": 2209851392.0,
+            "64": 2209851392.0,
+            "65": 2209851392.0,
+            "66": 2209851392.0,
+            "67": 2209851392.0,
+            "68": 2209851392.0,
+            "69": 2209851392.0,
+            "70": 2209851392.0,
+            "71": 2209851392.0,
+            "72": 2209851392.0,
+            "73": 2209851392.0,
+            "74": 2209851392.0,
+            "75": 2209851392.0,
+            "76": 2209851392.0,
+            "77": 2209851392.0,
+            "78": 2209851392.0,
+            "79": 2209851392.0,
+            "80": 2209851392.0,
+            "81": 2209851392.0,
+            "82": 2209851392.0,
+            "83": 2209851392.0,
+            "84": 2209851392.0,
+            "85": 2209851392.0,
+            "86": 2209851392.0,
+            "87": 2209851392.0,
+            "88": 2209851392.0,
+            "89": 2209851392.0,
+            "90": 2209851392.0,
+            "91": 2209851392.0,
+            "92": 2209851392.0,
+            "93": 2209851392.0,
+            "94": 2209851392.0,
+            "95": 2209851392.0,
+            "96": 2209851392.0,
+            "97": 2209851392.0,
+            "98": 2209851392.0,
+            "99": 2209851392.0,
+            "100": 2209851392.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 12.88983,
+            "2": 0.12288,
+            "3": 0.10944,
+            "4": 0.10822,
+            "5": 0.10919,
+            "6": 0.10835,
+            "7": 0.11035,
+            "8": 0.10879,
+            "9": 0.11001,
+            "10": 0.11009,
+            "11": 0.10945,
+            "12": 0.10868,
+            "13": 0.1086,
+            "14": 0.10899,
+            "15": 0.10852,
+            "16": 0.10822,
+            "17": 0.10818,
+            "18": 0.10877,
+            "19": 0.10888,
+            "20": 0.10828,
+            "21": 0.109,
+            "22": 0.108,
+            "23": 0.10722,
+            "24": 0.10731,
+            "25": 0.1075,
+            "26": 0.10744,
+            "27": 0.10843,
+            "28": 0.10831,
+            "29": 0.10841,
+            "30": 0.10718,
+            "31": 0.10837,
+            "32": 0.10773,
+            "33": 0.10792,
+            "34": 0.10698,
+            "35": 0.10976,
+            "36": 0.10758,
+            "37": 0.10825,
+            "38": 0.10781,
+            "39": 0.10912,
+            "40": 0.10847,
+            "41": 0.10786,
+            "42": 0.10767,
+            "43": 0.10761,
+            "44": 0.1076,
+            "45": 0.1078,
+            "46": 0.10992,
+            "47": 0.1061,
+            "48": 0.10654,
+            "49": 0.10566,
+            "50": 0.1066,
+            "51": 0.11234,
+            "52": 0.11065,
+            "53": 0.10795,
+            "54": 0.10668,
+            "55": 0.10678,
+            "56": 0.10889,
+            "57": 0.10802,
+            "58": 0.12482,
+            "59": 0.10666,
+            "60": 0.10637,
+            "61": 0.10776,
+            "62": 0.10743,
+            "63": 0.10782,
+            "64": 0.10634,
+            "65": 0.10744,
+            "66": 0.10859,
+            "67": 0.10949,
+            "68": 0.1075,
+            "69": 0.10803,
+            "70": 0.10688,
+            "71": 0.10797,
+            "72": 0.10752,
+            "73": 0.10816,
+            "74": 0.10734,
+            "75": 0.10832,
+            "76": 0.10815,
+            "77": 0.10868,
+            "78": 0.10839,
+            "79": 0.1074,
+            "80": 0.10866,
+            "81": 0.11122,
+            "82": 0.11035,
+            "83": 0.1101,
+            "84": 0.1122,
+            "85": 0.10866,
+            "86": 0.10915,
+            "87": 0.10842,
+            "88": 0.10723,
+            "89": 0.10849,
+            "90": 0.10814,
+            "91": 0.10833,
+            "92": 0.10719,
+            "93": 0.10725,
+            "94": 0.10754,
+            "95": 0.10758,
+            "96": 0.1082,
+            "97": 0.10768,
+            "98": 0.10708,
+            "99": 0.10785,
+            "100": 0.10841
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
new file mode 100644
index 00000000000..5de8b526700
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.79205,
+            "2": 10.80272,
+            "3": 10.80707,
+            "4": 10.77315,
+            "5": 10.84695,
+            "6": 10.86789,
+            "7": 10.82655,
+            "8": 10.81333,
+            "9": 10.83441,
+            "10": 10.77106,
+            "11": 10.89149,
+            "12": 10.84617,
+            "13": 10.85969,
+            "14": 10.8812,
+            "15": 10.79093,
+            "16": 10.78328,
+            "17": 10.75926,
+            "18": 10.79337,
+            "19": 10.797,
+            "20": 10.68042,
+            "21": 10.66126,
+            "22": 10.50248,
+            "23": 10.71375,
+            "24": 10.55253,
+            "25": 10.50715,
+            "26": 10.58275,
+            "27": 10.58672,
+            "28": 10.55873,
+            "29": 10.56101,
+            "30": 10.33325,
+            "31": 10.08467,
+            "32": 10.44744,
+            "33": 10.44372,
+            "34": 10.2003,
+            "35": 10.25545,
+            "36": 10.19448,
+            "37": 10.32113,
+            "38": 10.1659,
+            "39": 10.37726,
+            "40": 10.05544,
+            "41": 10.13785,
+            "42": 10.19159,
+            "43": 9.80956,
+            "44": 9.92967,
+            "45": 9.80575,
+            "46": 9.81454,
+            "47": 10.12933,
+            "48": 9.82644,
+            "49": 9.51395,
+            "50": 9.89082,
+            "51": 9.8397,
+            "52": 9.73412,
+            "53": 10.05515,
+            "54": 9.94093,
+            "55": 9.87063,
+            "56": 9.61009,
+            "57": 9.46055,
+            "58": 9.81541,
+            "59": 9.57905,
+            "60": 9.48478,
+            "61": 9.68485,
+            "62": 9.97574,
+            "63": 9.36483,
+            "64": 9.76838,
+            "65": 8.94022,
+            "66": 9.68864,
+            "67": 9.36647,
+            "68": 9.77611,
+            "69": 9.78404,
+            "70": 9.72243,
+            "71": 9.6082,
+            "72": 9.57758,
+            "73": 9.48936,
+            "74": 8.9399,
+            "75": 9.40907,
+            "76": 9.08135,
+            "77": 10.05639,
+            "78": 9.72293,
+            "79": 9.36509,
+            "80": 9.3976,
+            "81": 9.47445,
+            "82": 9.68843,
+            "83": 9.30263,
+            "84": 9.4102,
+            "85": 9.60746,
+            "86": 9.07122,
+            "87": 9.58742,
+            "88": 9.74129,
+            "89": 9.59922,
+            "90": 9.81041,
+            "91": 9.33141,
+            "92": 9.35529,
+            "93": 9.07461,
+            "94": 8.82759,
+            "95": 9.5116,
+            "96": 9.51899,
+            "97": 9.30162,
+            "98": 9.66741,
+            "99": 8.88218,
+            "100": 9.39722
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1580.0,
+            "2": 1686.0,
+            "3": 1726.0,
+            "4": 1795.0,
+            "5": 1901.0,
+            "6": 1778.0,
+            "7": 1963.0,
+            "8": 1704.0,
+            "9": 1811.0,
+            "10": 1346.0,
+            "11": 1849.0,
+            "12": 1683.0,
+            "13": 1888.0,
+            "14": 1711.0,
+            "15": 1926.0,
+            "16": 1841.0,
+            "17": 1931.0,
+            "18": 1716.0,
+            "19": 1765.0,
+            "20": 1643.0,
+            "21": 1884.0,
+            "22": 1626.0,
+            "23": 1954.0,
+            "24": 1715.0,
+            "25": 1683.0,
+            "26": 1679.0,
+            "27": 1817.0,
+            "28": 2019.0,
+            "29": 1946.0,
+            "30": 1867.0,
+            "31": 1544.0,
+            "32": 1832.0,
+            "33": 2119.0,
+            "34": 1921.0,
+            "35": 2020.0,
+            "36": 1953.0,
+            "37": 2350.0,
+            "38": 2210.0,
+            "39": 2319.0,
+            "40": 2252.0,
+            "41": 2449.0,
+            "42": 2364.0,
+            "43": 2089.0,
+            "44": 2094.0,
+            "45": 2243.0,
+            "46": 2335.0,
+            "47": 2406.0,
+            "48": 2410.0,
+            "49": 2341.0,
+            "50": 2459.0,
+            "51": 2611.0,
+            "52": 2427.0,
+            "53": 2838.0,
+            "54": 2632.0,
+            "55": 2291.0,
+            "56": 2663.0,
+            "57": 2276.0,
+            "58": 2777.0,
+            "59": 2601.0,
+            "60": 2404.0,
+            "61": 2985.0,
+            "62": 2595.0,
+            "63": 2454.0,
+            "64": 3101.0,
+            "65": 2474.0,
+            "66": 3006.0,
+            "67": 2671.0,
+            "68": 2874.0,
+            "69": 2956.0,
+            "70": 3102.0,
+            "71": 2891.0,
+            "72": 2543.0,
+            "73": 2860.0,
+            "74": 1888.0,
+            "75": 2603.0,
+            "76": 2813.0,
+            "77": 3361.0,
+            "78": 3252.0,
+            "79": 3007.0,
+            "80": 3420.0,
+            "81": 3624.0,
+            "82": 3184.0,
+            "83": 2708.0,
+            "84": 3138.0,
+            "85": 3388.0,
+            "86": 2619.0,
+            "87": 3682.0,
+            "88": 3074.0,
+            "89": 3260.0,
+            "90": 2904.0,
+            "91": 2634.0,
+            "92": 3097.0,
+            "93": 2745.0,
+            "94": 3484.0,
+            "95": 3333.0,
+            "96": 3292.0,
+            "97": 3141.0,
+            "98": 3550.0,
+            "99": 3170.0,
+            "100": 3347.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 714736640.0,
+            "2": 714736640.0,
+            "3": 714736640.0,
+            "4": 714736640.0,
+            "5": 714736640.0,
+            "6": 714736640.0,
+            "7": 714736640.0,
+            "8": 714736640.0,
+            "9": 714736640.0,
+            "10": 714736640.0,
+            "11": 714736640.0,
+            "12": 714736640.0,
+            "13": 714736640.0,
+            "14": 714736640.0,
+            "15": 714736640.0,
+            "16": 714736640.0,
+            "17": 714736640.0,
+            "18": 714736640.0,
+            "19": 714736640.0,
+            "20": 714736640.0,
+            "21": 714736640.0,
+            "22": 714736640.0,
+            "23": 714736640.0,
+            "24": 714736640.0,
+            "25": 714736640.0,
+            "26": 714736640.0,
+            "27": 714736640.0,
+            "28": 714736640.0,
+            "29": 714736640.0,
+            "30": 714736640.0,
+            "31": 714736640.0,
+            "32": 714736640.0,
+            "33": 714736640.0,
+            "34": 714736640.0,
+            "35": 714736640.0,
+            "36": 714736640.0,
+            "37": 714736640.0,
+            "38": 714736640.0,
+            "39": 714736640.0,
+            "40": 714736640.0,
+            "41": 714736640.0,
+            "42": 714736640.0,
+            "43": 714736640.0,
+            "44": 714736640.0,
+            "45": 714736640.0,
+            "46": 714736640.0,
+            "47": 714736640.0,
+            "48": 714736640.0,
+            "49": 714736640.0,
+            "50": 714736640.0,
+            "51": 714736640.0,
+            "52": 714736640.0,
+            "53": 714736640.0,
+            "54": 714736640.0,
+            "55": 714736640.0,
+            "56": 714736640.0,
+            "57": 714736640.0,
+            "58": 714736640.0,
+            "59": 714736640.0,
+            "60": 714736640.0,
+            "61": 714736640.0,
+            "62": 714736640.0,
+            "63": 714736640.0,
+            "64": 714736640.0,
+            "65": 714736640.0,
+            "66": 714736640.0,
+            "67": 714736640.0,
+            "68": 714736640.0,
+            "69": 714736640.0,
+            "70": 714736640.0,
+            "71": 714736640.0,
+            "72": 714736640.0,
+            "73": 714736640.0,
+            "74": 714736640.0,
+            "75": 714736640.0,
+            "76": 714736640.0,
+            "77": 714736640.0,
+            "78": 714736640.0,
+            "79": 714736640.0,
+            "80": 714736640.0,
+            "81": 714736640.0,
+            "82": 714736640.0,
+            "83": 714736640.0,
+            "84": 714736640.0,
+            "85": 714736640.0,
+            "86": 714736640.0,
+            "87": 714736640.0,
+            "88": 714736640.0,
+            "89": 714736640.0,
+            "90": 714736640.0,
+            "91": 714736640.0,
+            "92": 714736640.0,
+            "93": 714736640.0,
+            "94": 714736640.0,
+            "95": 714736640.0,
+            "96": 714736640.0,
+            "97": 714736640.0,
+            "98": 714736640.0,
+            "99": 714736640.0,
+            "100": 714736640.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2399714304.0,
+            "2": 2681315328.0,
+            "3": 2681315328.0,
+            "4": 2681315328.0,
+            "5": 2681315328.0,
+            "6": 2681315328.0,
+            "7": 2681315328.0,
+            "8": 2681315328.0,
+            "9": 2681315328.0,
+            "10": 2681315328.0,
+            "11": 2681315328.0,
+            "12": 2681315328.0,
+            "13": 2681315328.0,
+            "14": 2681315328.0,
+            "15": 2681315328.0,
+            "16": 2681315328.0,
+            "17": 2681315328.0,
+            "18": 2681315328.0,
+            "19": 2681315328.0,
+            "20": 2681315328.0,
+            "21": 2681315328.0,
+            "22": 2681315328.0,
+            "23": 2681315328.0,
+            "24": 2681315328.0,
+            "25": 2681315328.0,
+            "26": 2681315328.0,
+            "27": 2681315328.0,
+            "28": 2681315328.0,
+            "29": 2681315328.0,
+            "30": 2681315328.0,
+            "31": 2681315328.0,
+            "32": 2681315328.0,
+            "33": 2681315328.0,
+            "34": 2681315328.0,
+            "35": 2681315328.0,
+            "36": 2681315328.0,
+            "37": 2681315328.0,
+            "38": 2681315328.0,
+            "39": 2681315328.0,
+            "40": 2681315328.0,
+            "41": 2681315328.0,
+            "42": 2681315328.0,
+            "43": 2681315328.0,
+            "44": 2681315328.0,
+            "45": 2681315328.0,
+            "46": 2681315328.0,
+            "47": 2681315328.0,
+            "48": 2681315328.0,
+            "49": 2681315328.0,
+            "50": 2681315328.0,
+            "51": 2681315328.0,
+            "52": 2681315328.0,
+            "53": 2681315328.0,
+            "54": 2681315328.0,
+            "55": 2681315328.0,
+            "56": 2681315328.0,
+            "57": 2681315328.0,
+            "58": 2681315328.0,
+            "59": 2681315328.0,
+            "60": 2681315328.0,
+            "61": 2681315328.0,
+            "62": 2681315328.0,
+            "63": 2681315328.0,
+            "64": 2681315328.0,
+            "65": 2681315328.0,
+            "66": 2681315328.0,
+            "67": 2681315328.0,
+            "68": 2681315328.0,
+            "69": 2681315328.0,
+            "70": 2681315328.0,
+            "71": 2681315328.0,
+            "72": 2681315328.0,
+            "73": 2681315328.0,
+            "74": 2681315328.0,
+            "75": 2681315328.0,
+            "76": 2681315328.0,
+            "77": 2681315328.0,
+            "78": 2681315328.0,
+            "79": 2681315328.0,
+            "80": 2681315328.0,
+            "81": 2681315328.0,
+            "82": 2681315328.0,
+            "83": 2681315328.0,
+            "84": 2681315328.0,
+            "85": 2681315328.0,
+            "86": 2681315328.0,
+            "87": 2681315328.0,
+            "88": 2681315328.0,
+            "89": 2681315328.0,
+            "90": 2681315328.0,
+            "91": 2681315328.0,
+            "92": 2681315328.0,
+            "93": 2681315328.0,
+            "94": 2681315328.0,
+            "95": 2681315328.0,
+            "96": 2681315328.0,
+            "97": 2681315328.0,
+            "98": 2681315328.0,
+            "99": 2681315328.0,
+            "100": 2681315328.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 12.214,
+            "2": 0.2986,
+            "3": 0.17295,
+            "4": 0.16821,
+            "5": 0.16854,
+            "6": 0.16781,
+            "7": 0.16849,
+            "8": 0.16759,
+            "9": 0.16821,
+            "10": 0.16905,
+            "11": 0.16939,
+            "12": 0.16739,
+            "13": 0.16719,
+            "14": 0.16712,
+            "15": 0.16829,
+            "16": 0.1725,
+            "17": 0.16696,
+            "18": 0.16586,
+            "19": 0.16737,
+            "20": 0.16711,
+            "21": 0.16776,
+            "22": 0.16801,
+            "23": 0.16812,
+            "24": 0.16559,
+            "25": 0.16732,
+            "26": 0.16954,
+            "27": 0.16886,
+            "28": 0.1669,
+            "29": 0.16695,
+            "30": 0.16775,
+            "31": 0.16795,
+            "32": 0.16696,
+            "33": 0.16584,
+            "34": 0.16695,
+            "35": 0.16714,
+            "36": 0.16747,
+            "37": 0.16686,
+            "38": 0.16675,
+            "39": 0.16654,
+            "40": 0.18817,
+            "41": 0.16797,
+            "42": 0.16692,
+            "43": 0.16746,
+            "44": 0.16567,
+            "45": 0.1672,
+            "46": 0.1681,
+            "47": 0.16794,
+            "48": 0.17384,
+            "49": 0.17344,
+            "50": 0.17178,
+            "51": 0.17498,
+            "52": 0.16896,
+            "53": 0.2031,
+            "54": 0.16689,
+            "55": 0.16738,
+            "56": 0.1658,
+            "57": 0.16757,
+            "58": 0.16947,
+            "59": 0.16981,
+            "60": 0.16658,
+            "61": 0.16728,
+            "62": 0.16586,
+            "63": 0.16601,
+            "64": 0.16674,
+            "65": 0.16826,
+            "66": 0.16662,
+            "67": 0.16681,
+            "68": 0.1673,
+            "69": 0.16747,
+            "70": 0.16723,
+            "71": 0.16746,
+            "72": 0.16639,
+            "73": 0.16738,
+            "74": 0.16734,
+            "75": 0.16723,
+            "76": 0.16734,
+            "77": 0.16644,
+            "78": 0.16664,
+            "79": 0.16693,
+            "80": 0.16638,
+            "81": 0.16693,
+            "82": 0.16667,
+            "83": 0.1665,
+            "84": 0.16715,
+            "85": 0.16683,
+            "86": 0.16633,
+            "87": 0.16713,
+            "88": 0.16671,
+            "89": 0.16706,
+            "90": 0.16702,
+            "91": 0.16739,
+            "92": 0.16596,
+            "93": 0.1665,
+            "94": 0.16701,
+            "95": 0.16634,
+            "96": 0.16704,
+            "97": 0.16737,
+            "98": 0.16691,
+            "99": 0.16712,
+            "100": 0.16653
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
new file mode 100644
index 00000000000..fba68f73b6e
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.79205,
+            "2": 10.80272,
+            "3": 10.80707,
+            "4": 10.77315,
+            "5": 10.84695,
+            "6": 10.86789,
+            "7": 10.82655,
+            "8": 10.81333,
+            "9": 10.83441,
+            "10": 10.77106,
+            "11": 10.89149,
+            "12": 10.84617,
+            "13": 10.85969,
+            "14": 10.8812,
+            "15": 10.79093,
+            "16": 10.78328,
+            "17": 10.75926,
+            "18": 10.79337,
+            "19": 10.797,
+            "20": 10.68042,
+            "21": 10.66126,
+            "22": 10.50248,
+            "23": 10.71375,
+            "24": 10.55253,
+            "25": 10.50715,
+            "26": 10.58275,
+            "27": 10.58672,
+            "28": 10.55873,
+            "29": 10.56101,
+            "30": 10.33325,
+            "31": 10.08467,
+            "32": 10.44744,
+            "33": 10.44372,
+            "34": 10.2003,
+            "35": 10.25545,
+            "36": 10.19448,
+            "37": 10.32113,
+            "38": 10.1659,
+            "39": 10.37726,
+            "40": 10.05544,
+            "41": 10.13785,
+            "42": 10.19159,
+            "43": 9.80956,
+            "44": 9.92967,
+            "45": 9.80575,
+            "46": 9.81454,
+            "47": 10.12933,
+            "48": 9.82644,
+            "49": 9.51395,
+            "50": 9.89082,
+            "51": 9.8397,
+            "52": 9.73412,
+            "53": 10.05515,
+            "54": 9.94093,
+            "55": 9.87063,
+            "56": 9.61009,
+            "57": 9.46055,
+            "58": 9.81541,
+            "59": 9.57905,
+            "60": 9.48478,
+            "61": 9.68485,
+            "62": 9.97574,
+            "63": 9.36483,
+            "64": 9.76838,
+            "65": 8.94022,
+            "66": 9.68864,
+            "67": 9.36647,
+            "68": 9.77611,
+            "69": 9.78404,
+            "70": 9.72243,
+            "71": 9.6082,
+            "72": 9.57758,
+            "73": 9.48936,
+            "74": 8.9399,
+            "75": 9.40907,
+            "76": 9.08135,
+            "77": 10.05639,
+            "78": 9.72293,
+            "79": 9.36509,
+            "80": 9.3976,
+            "81": 9.47445,
+            "82": 9.68843,
+            "83": 9.30263,
+            "84": 9.4102,
+            "85": 9.60746,
+            "86": 9.07122,
+            "87": 9.58742,
+            "88": 9.74129,
+            "89": 9.59922,
+            "90": 9.81041,
+            "91": 9.33141,
+            "92": 9.35529,
+            "93": 9.07461,
+            "94": 8.82759,
+            "95": 9.5116,
+            "96": 9.51899,
+            "97": 9.30162,
+            "98": 9.66741,
+            "99": 8.88218,
+            "100": 9.39722
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1580.0,
+            "2": 1686.0,
+            "3": 1726.0,
+            "4": 1795.0,
+            "5": 1901.0,
+            "6": 1778.0,
+            "7": 1963.0,
+            "8": 1704.0,
+            "9": 1811.0,
+            "10": 1346.0,
+            "11": 1849.0,
+            "12": 1683.0,
+            "13": 1888.0,
+            "14": 1711.0,
+            "15": 1926.0,
+            "16": 1841.0,
+            "17": 1931.0,
+            "18": 1716.0,
+            "19": 1765.0,
+            "20": 1643.0,
+            "21": 1884.0,
+            "22": 1626.0,
+            "23": 1954.0,
+            "24": 1715.0,
+            "25": 1683.0,
+            "26": 1679.0,
+            "27": 1817.0,
+            "28": 2019.0,
+            "29": 1946.0,
+            "30": 1867.0,
+            "31": 1544.0,
+            "32": 1832.0,
+            "33": 2119.0,
+            "34": 1921.0,
+            "35": 2020.0,
+            "36": 1953.0,
+            "37": 2350.0,
+            "38": 2210.0,
+            "39": 2319.0,
+            "40": 2252.0,
+            "41": 2449.0,
+            "42": 2364.0,
+            "43": 2089.0,
+            "44": 2094.0,
+            "45": 2243.0,
+            "46": 2335.0,
+            "47": 2406.0,
+            "48": 2410.0,
+            "49": 2341.0,
+            "50": 2459.0,
+            "51": 2611.0,
+            "52": 2427.0,
+            "53": 2838.0,
+            "54": 2632.0,
+            "55": 2291.0,
+            "56": 2663.0,
+            "57": 2276.0,
+            "58": 2777.0,
+            "59": 2601.0,
+            "60": 2404.0,
+            "61": 2985.0,
+            "62": 2595.0,
+            "63": 2454.0,
+            "64": 3101.0,
+            "65": 2474.0,
+            "66": 3006.0,
+            "67": 2671.0,
+            "68": 2874.0,
+            "69": 2956.0,
+            "70": 3102.0,
+            "71": 2891.0,
+            "72": 2543.0,
+            "73": 2860.0,
+            "74": 1888.0,
+            "75": 2603.0,
+            "76": 2813.0,
+            "77": 3361.0,
+            "78": 3252.0,
+            "79": 3007.0,
+            "80": 3420.0,
+            "81": 3624.0,
+            "82": 3184.0,
+            "83": 2708.0,
+            "84": 3138.0,
+            "85": 3388.0,
+            "86": 2619.0,
+            "87": 3682.0,
+            "88": 3074.0,
+            "89": 3260.0,
+            "90": 2904.0,
+            "91": 2634.0,
+            "92": 3097.0,
+            "93": 2745.0,
+            "94": 3484.0,
+            "95": 3333.0,
+            "96": 3292.0,
+            "97": 3141.0,
+            "98": 3550.0,
+            "99": 3170.0,
+            "100": 3347.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 714736640.0,
+            "2": 714736640.0,
+            "3": 714736640.0,
+            "4": 714736640.0,
+            "5": 714736640.0,
+            "6": 714736640.0,
+            "7": 714736640.0,
+            "8": 714736640.0,
+            "9": 714736640.0,
+            "10": 714736640.0,
+            "11": 714736640.0,
+            "12": 714736640.0,
+            "13": 714736640.0,
+            "14": 714736640.0,
+            "15": 714736640.0,
+            "16": 714736640.0,
+            "17": 714736640.0,
+            "18": 714736640.0,
+            "19": 714736640.0,
+            "20": 714736640.0,
+            "21": 714736640.0,
+            "22": 714736640.0,
+            "23": 714736640.0,
+            "24": 714736640.0,
+            "25": 714736640.0,
+            "26": 714736640.0,
+            "27": 714736640.0,
+            "28": 714736640.0,
+            "29": 714736640.0,
+            "30": 714736640.0,
+            "31": 714736640.0,
+            "32": 714736640.0,
+            "33": 714736640.0,
+            "34": 714736640.0,
+            "35": 714736640.0,
+            "36": 714736640.0,
+            "37": 714736640.0,
+            "38": 714736640.0,
+            "39": 714736640.0,
+            "40": 714736640.0,
+            "41": 714736640.0,
+            "42": 714736640.0,
+            "43": 714736640.0,
+            "44": 714736640.0,
+            "45": 714736640.0,
+            "46": 714736640.0,
+            "47": 714736640.0,
+            "48": 714736640.0,
+            "49": 714736640.0,
+            "50": 714736640.0,
+            "51": 714736640.0,
+            "52": 714736640.0,
+            "53": 714736640.0,
+            "54": 714736640.0,
+            "55": 714736640.0,
+            "56": 714736640.0,
+            "57": 714736640.0,
+            "58": 714736640.0,
+            "59": 714736640.0,
+            "60": 714736640.0,
+            "61": 714736640.0,
+            "62": 714736640.0,
+            "63": 714736640.0,
+            "64": 714736640.0,
+            "65": 714736640.0,
+            "66": 714736640.0,
+            "67": 714736640.0,
+            "68": 714736640.0,
+            "69": 714736640.0,
+            "70": 714736640.0,
+            "71": 714736640.0,
+            "72": 714736640.0,
+            "73": 714736640.0,
+            "74": 714736640.0,
+            "75": 714736640.0,
+            "76": 714736640.0,
+            "77": 714736640.0,
+            "78": 714736640.0,
+            "79": 714736640.0,
+            "80": 714736640.0,
+            "81": 714736640.0,
+            "82": 714736640.0,
+            "83": 714736640.0,
+            "84": 714736640.0,
+            "85": 714736640.0,
+            "86": 714736640.0,
+            "87": 714736640.0,
+            "88": 714736640.0,
+            "89": 714736640.0,
+            "90": 714736640.0,
+            "91": 714736640.0,
+            "92": 714736640.0,
+            "93": 714736640.0,
+            "94": 714736640.0,
+            "95": 714736640.0,
+            "96": 714736640.0,
+            "97": 714736640.0,
+            "98": 714736640.0,
+            "99": 714736640.0,
+            "100": 714736640.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2399714304.0,
+            "2": 2681315328.0,
+            "3": 2681315328.0,
+            "4": 2681315328.0,
+            "5": 2681315328.0,
+            "6": 2681315328.0,
+            "7": 2681315328.0,
+            "8": 2681315328.0,
+            "9": 2681315328.0,
+            "10": 2681315328.0,
+            "11": 2681315328.0,
+            "12": 2681315328.0,
+            "13": 2681315328.0,
+            "14": 2681315328.0,
+            "15": 2681315328.0,
+            "16": 2681315328.0,
+            "17": 2681315328.0,
+            "18": 2681315328.0,
+            "19": 2681315328.0,
+            "20": 2681315328.0,
+            "21": 2681315328.0,
+            "22": 2681315328.0,
+            "23": 2681315328.0,
+            "24": 2681315328.0,
+            "25": 2681315328.0,
+            "26": 2681315328.0,
+            "27": 2681315328.0,
+            "28": 2681315328.0,
+            "29": 2681315328.0,
+            "30": 2681315328.0,
+            "31": 2681315328.0,
+            "32": 2681315328.0,
+            "33": 2681315328.0,
+            "34": 2681315328.0,
+            "35": 2681315328.0,
+            "36": 2681315328.0,
+            "37": 2681315328.0,
+            "38": 2681315328.0,
+            "39": 2681315328.0,
+            "40": 2681315328.0,
+            "41": 2681315328.0,
+            "42": 2681315328.0,
+            "43": 2681315328.0,
+            "44": 2681315328.0,
+            "45": 2681315328.0,
+            "46": 2681315328.0,
+            "47": 2681315328.0,
+            "48": 2681315328.0,
+            "49": 2681315328.0,
+            "50": 2681315328.0,
+            "51": 2681315328.0,
+            "52": 2681315328.0,
+            "53": 2681315328.0,
+            "54": 2681315328.0,
+            "55": 2681315328.0,
+            "56": 2681315328.0,
+            "57": 2681315328.0,
+            "58": 2681315328.0,
+            "59": 2681315328.0,
+            "60": 2681315328.0,
+            "61": 2681315328.0,
+            "62": 2681315328.0,
+            "63": 2681315328.0,
+            "64": 2681315328.0,
+            "65": 2681315328.0,
+            "66": 2681315328.0,
+            "67": 2681315328.0,
+            "68": 2681315328.0,
+            "69": 2681315328.0,
+            "70": 2681315328.0,
+            "71": 2681315328.0,
+            "72": 2681315328.0,
+            "73": 2681315328.0,
+            "74": 2681315328.0,
+            "75": 2681315328.0,
+            "76": 2681315328.0,
+            "77": 2681315328.0,
+            "78": 2681315328.0,
+            "79": 2681315328.0,
+            "80": 2681315328.0,
+            "81": 2681315328.0,
+            "82": 2681315328.0,
+            "83": 2681315328.0,
+            "84": 2681315328.0,
+            "85": 2681315328.0,
+            "86": 2681315328.0,
+            "87": 2681315328.0,
+            "88": 2681315328.0,
+            "89": 2681315328.0,
+            "90": 2681315328.0,
+            "91": 2681315328.0,
+            "92": 2681315328.0,
+            "93": 2681315328.0,
+            "94": 2681315328.0,
+            "95": 2681315328.0,
+            "96": 2681315328.0,
+            "97": 2681315328.0,
+            "98": 2681315328.0,
+            "99": 2681315328.0,
+            "100": 2681315328.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 12.16871,
+            "2": 0.19825,
+            "3": 0.17764,
+            "4": 0.17796,
+            "5": 0.17192,
+            "6": 0.17224,
+            "7": 0.17188,
+            "8": 0.17172,
+            "9": 0.17327,
+            "10": 0.17337,
+            "11": 0.17262,
+            "12": 0.17206,
+            "13": 0.17211,
+            "14": 0.17318,
+            "15": 0.17218,
+            "16": 0.17375,
+            "17": 0.17267,
+            "18": 0.1736,
+            "19": 0.17211,
+            "20": 0.16903,
+            "21": 0.16941,
+            "22": 0.17049,
+            "23": 0.17119,
+            "24": 0.173,
+            "25": 0.16874,
+            "26": 0.16822,
+            "27": 0.16694,
+            "28": 0.16671,
+            "29": 0.16762,
+            "30": 0.16932,
+            "31": 0.17431,
+            "32": 0.16784,
+            "33": 0.16633,
+            "34": 0.16587,
+            "35": 0.16729,
+            "36": 0.16658,
+            "37": 0.16788,
+            "38": 0.1666,
+            "39": 0.16597,
+            "40": 0.16589,
+            "41": 0.16706,
+            "42": 0.16633,
+            "43": 0.16631,
+            "44": 0.16797,
+            "45": 0.16699,
+            "46": 0.16824,
+            "47": 0.167,
+            "48": 0.16653,
+            "49": 0.16587,
+            "50": 0.16635,
+            "51": 0.18233,
+            "52": 0.21141,
+            "53": 0.16986,
+            "54": 0.1702,
+            "55": 0.16952,
+            "56": 0.16978,
+            "57": 0.16872,
+            "58": 0.16891,
+            "59": 0.17005,
+            "60": 0.16948,
+            "61": 0.16922,
+            "62": 0.16913,
+            "63": 0.1694,
+            "64": 0.16954,
+            "65": 0.16972,
+            "66": 0.16677,
+            "67": 0.16621,
+            "68": 0.16658,
+            "69": 0.16617,
+            "70": 0.1656,
+            "71": 0.16718,
+            "72": 0.16666,
+            "73": 0.16987,
+            "74": 0.17045,
+            "75": 0.16726,
+            "76": 0.1671,
+            "77": 0.16753,
+            "78": 0.17072,
+            "79": 0.16826,
+            "80": 0.16784,
+            "81": 0.16717,
+            "82": 0.16591,
+            "83": 0.16729,
+            "84": 0.16631,
+            "85": 0.16697,
+            "86": 0.1677,
+            "87": 0.16577,
+            "88": 0.1676,
+            "89": 0.16708,
+            "90": 0.16577,
+            "91": 0.16637,
+            "92": 0.16659,
+            "93": 0.16604,
+            "94": 0.16681,
+            "95": 0.16705,
+            "96": 0.16588,
+            "97": 0.16674,
+            "98": 0.16703,
+            "99": 0.16605,
+            "100": 0.16691
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_swiglu_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_swiglu_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
index 0e382b4ce7b..732eb3335b2 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_swiglu_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_swiglu_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
@@ -2,141 +2,536 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 10.76985,
+            "2": 10.81791,
+            "3": 10.784,
+            "4": 10.788,
             "5": 10.81927,
+            "6": 10.84306,
+            "7": 10.83464,
+            "8": 10.8066,
+            "9": 10.83359,
             "10": 10.73562,
+            "11": 10.86814,
+            "12": 10.85075,
+            "13": 10.84505,
+            "14": 10.87136,
             "15": 10.8218,
+            "16": 10.80433,
+            "17": 10.76124,
+            "18": 10.80363,
+            "19": 10.80599,
             "20": 10.74747,
+            "21": 10.7254,
+            "22": 10.60597,
+            "23": 10.74387,
+            "24": 10.65549,
             "25": 10.58002,
+            "26": 10.64496,
+            "27": 10.67191,
+            "28": 10.66903,
+            "29": 10.66652,
             "30": 10.46947,
+            "31": 10.26264,
+            "32": 10.56932,
+            "33": 10.54232,
+            "34": 10.36113,
             "35": 10.39558,
+            "36": 10.36866,
+            "37": 10.47523,
+            "38": 10.33715,
+            "39": 10.49947,
             "40": 10.23019,
+            "41": 10.30905,
+            "42": 10.33124,
+            "43": 9.99091,
+            "44": 10.09605,
             "45": 10.00787,
+            "46": 9.96718,
+            "47": 10.27077,
+            "48": 10.01043,
+            "49": 9.73437,
             "50": 10.04737,
+            "51": 10.00084,
+            "52": 9.89672,
+            "53": 10.19876,
+            "54": 10.09066,
             "55": 10.00567,
+            "56": 9.77199,
+            "57": 9.64533,
+            "58": 9.98587,
+            "59": 9.72608,
             "60": 9.6777,
+            "61": 9.8157,
+            "62": 10.092,
+            "63": 9.54758,
+            "64": 9.90438,
             "65": 9.09492,
+            "66": 9.84068,
+            "67": 9.48471,
+            "68": 9.88996,
+            "69": 9.87691,
             "70": 9.85294,
+            "71": 9.73278,
+            "72": 9.72558,
+            "73": 9.63706,
+            "74": 9.12334,
             "75": 9.55335,
+            "76": 9.21765,
+            "77": 10.15202,
+            "78": 9.81465,
+            "79": 9.47558,
             "80": 9.52073,
+            "81": 9.5872,
+            "82": 9.79125,
+            "83": 9.44848,
+            "84": 9.49585,
             "85": 9.72189,
+            "86": 9.18037,
+            "87": 9.66127,
+            "88": 9.84359,
+            "89": 9.71651,
             "90": 9.88102,
+            "91": 9.48434,
+            "92": 9.4705,
+            "93": 9.20911,
+            "94": 8.95382,
             "95": 9.60554,
+            "96": 9.63976,
+            "97": 9.38762,
+            "98": 9.7573,
+            "99": 9.0159,
             "100": 9.49925
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 2680.0,
+            "2": 2615.0,
+            "3": 2642.0,
+            "4": 2479.0,
             "5": 2971.0,
+            "6": 2822.0,
+            "7": 2833.0,
+            "8": 2508.0,
+            "9": 2922.0,
             "10": 2508.0,
+            "11": 2917.0,
+            "12": 2817.0,
+            "13": 2935.0,
+            "14": 2969.0,
             "15": 2679.0,
+            "16": 2976.0,
+            "17": 2609.0,
+            "18": 2868.0,
+            "19": 2790.0,
             "20": 2461.0,
+            "21": 2636.0,
+            "22": 2356.0,
+            "23": 2798.0,
+            "24": 2613.0,
             "25": 2640.0,
+            "26": 2701.0,
+            "27": 2761.0,
+            "28": 2801.0,
+            "29": 2971.0,
             "30": 2590.0,
+            "31": 2307.0,
+            "32": 2751.0,
+            "33": 2881.0,
+            "34": 2352.0,
             "35": 2480.0,
+            "36": 2443.0,
+            "37": 2748.0,
+            "38": 2692.0,
+            "39": 2709.0,
             "40": 2570.0,
+            "41": 2752.0,
+            "42": 2689.0,
+            "43": 2381.0,
+            "44": 2483.0,
             "45": 2397.0,
+            "46": 2281.0,
+            "47": 2684.0,
+            "48": 2330.0,
+            "49": 2293.0,
             "50": 2740.0,
+            "51": 2575.0,
+            "52": 2621.0,
+            "53": 2891.0,
+            "54": 2655.0,
             "55": 2559.0,
+            "56": 2566.0,
+            "57": 2471.0,
+            "58": 2767.0,
+            "59": 2529.0,
             "60": 2289.0,
+            "61": 2642.0,
+            "62": 2820.0,
+            "63": 2654.0,
+            "64": 3020.0,
             "65": 2687.0,
+            "66": 2884.0,
+            "67": 2666.0,
+            "68": 2720.0,
+            "69": 2738.0,
             "70": 3004.0,
+            "71": 2816.0,
+            "72": 2537.0,
+            "73": 2826.0,
+            "74": 2192.0,
             "75": 2647.0,
+            "76": 3048.0,
+            "77": 3019.0,
+            "78": 3134.0,
+            "79": 3092.0,
             "80": 3054.0,
+            "81": 3298.0,
+            "82": 3350.0,
+            "83": 2597.0,
+            "84": 3436.0,
             "85": 3350.0,
+            "86": 2993.0,
+            "87": 3509.0,
+            "88": 3403.0,
+            "89": 3490.0,
             "90": 3368.0,
+            "91": 2461.0,
+            "92": 2803.0,
+            "93": 2933.0,
+            "94": 2888.0,
             "95": 3138.0,
+            "96": 3047.0,
+            "97": 3016.0,
+            "98": 3382.0,
+            "99": 2995.0,
             "100": 2490.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 745731584.0,
+            "2": 745731584.0,
+            "3": 745731584.0,
+            "4": 745731584.0,
             "5": 745731584.0,
+            "6": 745731584.0,
+            "7": 745731584.0,
+            "8": 745731584.0,
+            "9": 745731584.0,
             "10": 745731584.0,
+            "11": 745731584.0,
+            "12": 745731584.0,
+            "13": 745731584.0,
+            "14": 745731584.0,
             "15": 745731584.0,
+            "16": 745731584.0,
+            "17": 745731584.0,
+            "18": 745731584.0,
+            "19": 745731584.0,
             "20": 745731584.0,
+            "21": 745731584.0,
+            "22": 745731584.0,
+            "23": 745731584.0,
+            "24": 745731584.0,
             "25": 745731584.0,
+            "26": 745731584.0,
+            "27": 745731584.0,
+            "28": 745731584.0,
+            "29": 745731584.0,
             "30": 745731584.0,
+            "31": 745731584.0,
+            "32": 745731584.0,
+            "33": 745731584.0,
+            "34": 745731584.0,
             "35": 745731584.0,
+            "36": 745731584.0,
+            "37": 745731584.0,
+            "38": 745731584.0,
+            "39": 745731584.0,
             "40": 745731584.0,
+            "41": 745731584.0,
+            "42": 745731584.0,
+            "43": 745731584.0,
+            "44": 745731584.0,
             "45": 745731584.0,
+            "46": 745731584.0,
+            "47": 745731584.0,
+            "48": 745731584.0,
+            "49": 745731584.0,
             "50": 745731584.0,
+            "51": 745731584.0,
+            "52": 745731584.0,
+            "53": 745731584.0,
+            "54": 745731584.0,
             "55": 745731584.0,
+            "56": 745731584.0,
+            "57": 745731584.0,
+            "58": 745731584.0,
+            "59": 745731584.0,
             "60": 745731584.0,
+            "61": 745731584.0,
+            "62": 745731584.0,
+            "63": 745731584.0,
+            "64": 745731584.0,
             "65": 745731584.0,
+            "66": 745731584.0,
+            "67": 745731584.0,
+            "68": 745731584.0,
+            "69": 745731584.0,
             "70": 745731584.0,
+            "71": 745731584.0,
+            "72": 745731584.0,
+            "73": 745731584.0,
+            "74": 745731584.0,
             "75": 745731584.0,
+            "76": 745731584.0,
+            "77": 745731584.0,
+            "78": 745731584.0,
+            "79": 745731584.0,
             "80": 745731584.0,
+            "81": 745731584.0,
+            "82": 745731584.0,
+            "83": 745731584.0,
+            "84": 745731584.0,
             "85": 745731584.0,
+            "86": 745731584.0,
+            "87": 745731584.0,
+            "88": 745731584.0,
+            "89": 745731584.0,
             "90": 745731584.0,
+            "91": 745731584.0,
+            "92": 745731584.0,
+            "93": 745731584.0,
+            "94": 745731584.0,
             "95": 745731584.0,
+            "96": 745731584.0,
+            "97": 745731584.0,
+            "98": 745731584.0,
+            "99": 745731584.0,
             "100": 745731584.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 1928906752.0,
+            "2": 2210568192.0,
+            "3": 2210568192.0,
+            "4": 2210568192.0,
             "5": 2210568192.0,
+            "6": 2210568192.0,
+            "7": 2210568192.0,
+            "8": 2210568192.0,
+            "9": 2210568192.0,
             "10": 2210568192.0,
+            "11": 2210568192.0,
+            "12": 2210568192.0,
+            "13": 2210568192.0,
+            "14": 2210568192.0,
             "15": 2210568192.0,
+            "16": 2210568192.0,
+            "17": 2210568192.0,
+            "18": 2210568192.0,
+            "19": 2210568192.0,
             "20": 2210568192.0,
+            "21": 2210568192.0,
+            "22": 2210568192.0,
+            "23": 2210568192.0,
+            "24": 2210568192.0,
             "25": 2210568192.0,
+            "26": 2210568192.0,
+            "27": 2210568192.0,
+            "28": 2210568192.0,
+            "29": 2210568192.0,
             "30": 2210568192.0,
+            "31": 2210568192.0,
+            "32": 2210568192.0,
+            "33": 2210568192.0,
+            "34": 2210568192.0,
             "35": 2210568192.0,
+            "36": 2210568192.0,
+            "37": 2210568192.0,
+            "38": 2210568192.0,
+            "39": 2210568192.0,
             "40": 2210568192.0,
+            "41": 2210568192.0,
+            "42": 2210568192.0,
+            "43": 2210568192.0,
+            "44": 2210568192.0,
             "45": 2210568192.0,
+            "46": 2210568192.0,
+            "47": 2210568192.0,
+            "48": 2210568192.0,
+            "49": 2210568192.0,
             "50": 2210568192.0,
+            "51": 2210568192.0,
+            "52": 2210568192.0,
+            "53": 2210568192.0,
+            "54": 2210568192.0,
             "55": 2210568192.0,
+            "56": 2210568192.0,
+            "57": 2210568192.0,
+            "58": 2210568192.0,
+            "59": 2210568192.0,
             "60": 2210568192.0,
+            "61": 2210568192.0,
+            "62": 2210568192.0,
+            "63": 2210568192.0,
+            "64": 2210568192.0,
             "65": 2210568192.0,
+            "66": 2210568192.0,
+            "67": 2210568192.0,
+            "68": 2210568192.0,
+            "69": 2210568192.0,
             "70": 2210568192.0,
+            "71": 2210568192.0,
+            "72": 2210568192.0,
+            "73": 2210568192.0,
+            "74": 2210568192.0,
             "75": 2210568192.0,
+            "76": 2210568192.0,
+            "77": 2210568192.0,
+            "78": 2210568192.0,
+            "79": 2210568192.0,
             "80": 2210568192.0,
+            "81": 2210568192.0,
+            "82": 2210568192.0,
+            "83": 2210568192.0,
+            "84": 2210568192.0,
             "85": 2210568192.0,
+            "86": 2210568192.0,
+            "87": 2210568192.0,
+            "88": 2210568192.0,
+            "89": 2210568192.0,
             "90": 2210568192.0,
+            "91": 2210568192.0,
+            "92": 2210568192.0,
+            "93": 2210568192.0,
+            "94": 2210568192.0,
             "95": 2210568192.0,
+            "96": 2210568192.0,
+            "97": 2210568192.0,
+            "98": 2210568192.0,
+            "99": 2210568192.0,
             "100": 2210568192.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 13.90495,
-            "5": 0.1093,
-            "10": 0.10381,
-            "15": 0.10282,
-            "20": 0.10222,
-            "25": 0.10608,
-            "30": 0.10579,
-            "35": 0.113,
-            "40": 0.10704,
-            "45": 0.10527,
-            "50": 0.1051,
-            "55": 0.10242,
-            "60": 0.10626,
-            "65": 0.10167,
-            "70": 0.10143,
-            "75": 0.10116,
-            "80": 0.10173,
-            "85": 0.10268,
-            "90": 0.10062,
-            "95": 0.10308,
-            "100": 0.10193
+            "1": 15.52736,
+            "2": 0.14752,
+            "3": 0.12429,
+            "4": 0.12037,
+            "5": 0.12096,
+            "6": 0.11965,
+            "7": 0.1198,
+            "8": 0.12021,
+            "9": 0.12041,
+            "10": 0.12377,
+            "11": 0.11828,
+            "12": 0.11903,
+            "13": 0.12052,
+            "14": 0.11683,
+            "15": 0.1179,
+            "16": 0.1185,
+            "17": 0.1178,
+            "18": 0.12085,
+            "19": 0.11844,
+            "20": 0.11779,
+            "21": 0.11689,
+            "22": 0.11623,
+            "23": 0.11674,
+            "24": 0.11908,
+            "25": 0.11762,
+            "26": 0.11952,
+            "27": 0.11831,
+            "28": 0.11712,
+            "29": 0.11898,
+            "30": 0.11914,
+            "31": 0.11719,
+            "32": 0.11849,
+            "33": 0.1193,
+            "34": 0.11601,
+            "35": 0.1215,
+            "36": 0.11653,
+            "37": 0.11596,
+            "38": 0.11751,
+            "39": 0.1194,
+            "40": 0.11662,
+            "41": 0.11896,
+            "42": 0.11624,
+            "43": 0.11775,
+            "44": 0.11757,
+            "45": 0.11618,
+            "46": 0.1194,
+            "47": 0.11754,
+            "48": 0.11775,
+            "49": 0.11637,
+            "50": 0.11524,
+            "51": 0.14043,
+            "52": 0.12567,
+            "53": 0.12158,
+            "54": 0.1217,
+            "55": 0.15002,
+            "56": 0.11858,
+            "57": 0.11887,
+            "58": 0.11705,
+            "59": 0.11599,
+            "60": 0.11585,
+            "61": 0.11429,
+            "62": 0.11598,
+            "63": 0.116,
+            "64": 0.11878,
+            "65": 0.11921,
+            "66": 0.11734,
+            "67": 0.11708,
+            "68": 0.11543,
+            "69": 0.11703,
+            "70": 0.11514,
+            "71": 0.1178,
+            "72": 0.1154,
+            "73": 0.12116,
+            "74": 0.12077,
+            "75": 0.1166,
+            "76": 0.11599,
+            "77": 0.11628,
+            "78": 0.11749,
+            "79": 0.11828,
+            "80": 0.12013,
+            "81": 0.11887,
+            "82": 0.1195,
+            "83": 0.11685,
+            "84": 0.11603,
+            "85": 0.11434,
+            "86": 0.11762,
+            "87": 0.11821,
+            "88": 0.12276,
+            "89": 0.12384,
+            "90": 0.11892,
+            "91": 0.11831,
+            "92": 0.11619,
+            "93": 0.11613,
+            "94": 0.11455,
+            "95": 0.1172,
+            "96": 0.11583,
+            "97": 0.11939,
+            "98": 0.11877,
+            "99": 0.11703,
+            "100": 0.12143
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_swiglu_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_swiglu_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..5147f8fd670
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_swiglu_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.76985,
+            "2": 10.81791,
+            "3": 10.784,
+            "4": 10.788,
+            "5": 10.81927,
+            "6": 10.84306,
+            "7": 10.83464,
+            "8": 10.8066,
+            "9": 10.83359,
+            "10": 10.73562,
+            "11": 10.86814,
+            "12": 10.85075,
+            "13": 10.84505,
+            "14": 10.87136,
+            "15": 10.8218,
+            "16": 10.80433,
+            "17": 10.76124,
+            "18": 10.80363,
+            "19": 10.80599,
+            "20": 10.74747,
+            "21": 10.7254,
+            "22": 10.60597,
+            "23": 10.74387,
+            "24": 10.65549,
+            "25": 10.58002,
+            "26": 10.64496,
+            "27": 10.67191,
+            "28": 10.66903,
+            "29": 10.66652,
+            "30": 10.46947,
+            "31": 10.26264,
+            "32": 10.56932,
+            "33": 10.54232,
+            "34": 10.36113,
+            "35": 10.39558,
+            "36": 10.36866,
+            "37": 10.47523,
+            "38": 10.33715,
+            "39": 10.49947,
+            "40": 10.23019,
+            "41": 10.30905,
+            "42": 10.33124,
+            "43": 9.99091,
+            "44": 10.09605,
+            "45": 10.00787,
+            "46": 9.96718,
+            "47": 10.27077,
+            "48": 10.01043,
+            "49": 9.73437,
+            "50": 10.04737,
+            "51": 10.00084,
+            "52": 9.89672,
+            "53": 10.19876,
+            "54": 10.09066,
+            "55": 10.00567,
+            "56": 9.77199,
+            "57": 9.64533,
+            "58": 9.98587,
+            "59": 9.72608,
+            "60": 9.6777,
+            "61": 9.8157,
+            "62": 10.092,
+            "63": 9.54758,
+            "64": 9.90438,
+            "65": 9.09492,
+            "66": 9.84068,
+            "67": 9.48471,
+            "68": 9.88996,
+            "69": 9.87691,
+            "70": 9.85294,
+            "71": 9.73278,
+            "72": 9.72558,
+            "73": 9.63706,
+            "74": 9.12334,
+            "75": 9.55335,
+            "76": 9.21765,
+            "77": 10.15202,
+            "78": 9.81465,
+            "79": 9.47558,
+            "80": 9.52073,
+            "81": 9.5872,
+            "82": 9.79125,
+            "83": 9.44848,
+            "84": 9.49585,
+            "85": 9.72189,
+            "86": 9.18037,
+            "87": 9.66127,
+            "88": 9.84359,
+            "89": 9.71651,
+            "90": 9.88102,
+            "91": 9.48434,
+            "92": 9.4705,
+            "93": 9.20911,
+            "94": 8.95382,
+            "95": 9.60554,
+            "96": 9.63976,
+            "97": 9.38762,
+            "98": 9.7573,
+            "99": 9.0159,
+            "100": 9.49925
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2680.0,
+            "2": 2615.0,
+            "3": 2642.0,
+            "4": 2479.0,
+            "5": 2971.0,
+            "6": 2822.0,
+            "7": 2833.0,
+            "8": 2508.0,
+            "9": 2922.0,
+            "10": 2508.0,
+            "11": 2917.0,
+            "12": 2817.0,
+            "13": 2935.0,
+            "14": 2969.0,
+            "15": 2679.0,
+            "16": 2976.0,
+            "17": 2609.0,
+            "18": 2868.0,
+            "19": 2790.0,
+            "20": 2461.0,
+            "21": 2636.0,
+            "22": 2356.0,
+            "23": 2798.0,
+            "24": 2613.0,
+            "25": 2640.0,
+            "26": 2701.0,
+            "27": 2761.0,
+            "28": 2801.0,
+            "29": 2971.0,
+            "30": 2590.0,
+            "31": 2307.0,
+            "32": 2751.0,
+            "33": 2881.0,
+            "34": 2352.0,
+            "35": 2480.0,
+            "36": 2443.0,
+            "37": 2748.0,
+            "38": 2692.0,
+            "39": 2709.0,
+            "40": 2570.0,
+            "41": 2752.0,
+            "42": 2689.0,
+            "43": 2381.0,
+            "44": 2483.0,
+            "45": 2397.0,
+            "46": 2281.0,
+            "47": 2684.0,
+            "48": 2330.0,
+            "49": 2293.0,
+            "50": 2740.0,
+            "51": 2575.0,
+            "52": 2621.0,
+            "53": 2891.0,
+            "54": 2655.0,
+            "55": 2559.0,
+            "56": 2566.0,
+            "57": 2471.0,
+            "58": 2767.0,
+            "59": 2529.0,
+            "60": 2289.0,
+            "61": 2642.0,
+            "62": 2820.0,
+            "63": 2654.0,
+            "64": 3020.0,
+            "65": 2687.0,
+            "66": 2884.0,
+            "67": 2666.0,
+            "68": 2720.0,
+            "69": 2738.0,
+            "70": 3004.0,
+            "71": 2816.0,
+            "72": 2537.0,
+            "73": 2826.0,
+            "74": 2192.0,
+            "75": 2647.0,
+            "76": 3048.0,
+            "77": 3019.0,
+            "78": 3134.0,
+            "79": 3092.0,
+            "80": 3054.0,
+            "81": 3298.0,
+            "82": 3350.0,
+            "83": 2597.0,
+            "84": 3436.0,
+            "85": 3350.0,
+            "86": 2993.0,
+            "87": 3509.0,
+            "88": 3403.0,
+            "89": 3490.0,
+            "90": 3368.0,
+            "91": 2461.0,
+            "92": 2803.0,
+            "93": 2933.0,
+            "94": 2888.0,
+            "95": 3138.0,
+            "96": 3047.0,
+            "97": 3016.0,
+            "98": 3382.0,
+            "99": 2995.0,
+            "100": 2490.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 745731584.0,
+            "2": 745731584.0,
+            "3": 745731584.0,
+            "4": 745731584.0,
+            "5": 745731584.0,
+            "6": 745731584.0,
+            "7": 745731584.0,
+            "8": 745731584.0,
+            "9": 745731584.0,
+            "10": 745731584.0,
+            "11": 745731584.0,
+            "12": 745731584.0,
+            "13": 745731584.0,
+            "14": 745731584.0,
+            "15": 745731584.0,
+            "16": 745731584.0,
+            "17": 745731584.0,
+            "18": 745731584.0,
+            "19": 745731584.0,
+            "20": 745731584.0,
+            "21": 745731584.0,
+            "22": 745731584.0,
+            "23": 745731584.0,
+            "24": 745731584.0,
+            "25": 745731584.0,
+            "26": 745731584.0,
+            "27": 745731584.0,
+            "28": 745731584.0,
+            "29": 745731584.0,
+            "30": 745731584.0,
+            "31": 745731584.0,
+            "32": 745731584.0,
+            "33": 745731584.0,
+            "34": 745731584.0,
+            "35": 745731584.0,
+            "36": 745731584.0,
+            "37": 745731584.0,
+            "38": 745731584.0,
+            "39": 745731584.0,
+            "40": 745731584.0,
+            "41": 745731584.0,
+            "42": 745731584.0,
+            "43": 745731584.0,
+            "44": 745731584.0,
+            "45": 745731584.0,
+            "46": 745731584.0,
+            "47": 745731584.0,
+            "48": 745731584.0,
+            "49": 745731584.0,
+            "50": 745731584.0,
+            "51": 745731584.0,
+            "52": 745731584.0,
+            "53": 745731584.0,
+            "54": 745731584.0,
+            "55": 745731584.0,
+            "56": 745731584.0,
+            "57": 745731584.0,
+            "58": 745731584.0,
+            "59": 745731584.0,
+            "60": 745731584.0,
+            "61": 745731584.0,
+            "62": 745731584.0,
+            "63": 745731584.0,
+            "64": 745731584.0,
+            "65": 745731584.0,
+            "66": 745731584.0,
+            "67": 745731584.0,
+            "68": 745731584.0,
+            "69": 745731584.0,
+            "70": 745731584.0,
+            "71": 745731584.0,
+            "72": 745731584.0,
+            "73": 745731584.0,
+            "74": 745731584.0,
+            "75": 745731584.0,
+            "76": 745731584.0,
+            "77": 745731584.0,
+            "78": 745731584.0,
+            "79": 745731584.0,
+            "80": 745731584.0,
+            "81": 745731584.0,
+            "82": 745731584.0,
+            "83": 745731584.0,
+            "84": 745731584.0,
+            "85": 745731584.0,
+            "86": 745731584.0,
+            "87": 745731584.0,
+            "88": 745731584.0,
+            "89": 745731584.0,
+            "90": 745731584.0,
+            "91": 745731584.0,
+            "92": 745731584.0,
+            "93": 745731584.0,
+            "94": 745731584.0,
+            "95": 745731584.0,
+            "96": 745731584.0,
+            "97": 745731584.0,
+            "98": 745731584.0,
+            "99": 745731584.0,
+            "100": 745731584.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1928906752.0,
+            "2": 2210568192.0,
+            "3": 2210568192.0,
+            "4": 2210568192.0,
+            "5": 2210568192.0,
+            "6": 2210568192.0,
+            "7": 2210568192.0,
+            "8": 2210568192.0,
+            "9": 2210568192.0,
+            "10": 2210568192.0,
+            "11": 2210568192.0,
+            "12": 2210568192.0,
+            "13": 2210568192.0,
+            "14": 2210568192.0,
+            "15": 2210568192.0,
+            "16": 2210568192.0,
+            "17": 2210568192.0,
+            "18": 2210568192.0,
+            "19": 2210568192.0,
+            "20": 2210568192.0,
+            "21": 2210568192.0,
+            "22": 2210568192.0,
+            "23": 2210568192.0,
+            "24": 2210568192.0,
+            "25": 2210568192.0,
+            "26": 2210568192.0,
+            "27": 2210568192.0,
+            "28": 2210568192.0,
+            "29": 2210568192.0,
+            "30": 2210568192.0,
+            "31": 2210568192.0,
+            "32": 2210568192.0,
+            "33": 2210568192.0,
+            "34": 2210568192.0,
+            "35": 2210568192.0,
+            "36": 2210568192.0,
+            "37": 2210568192.0,
+            "38": 2210568192.0,
+            "39": 2210568192.0,
+            "40": 2210568192.0,
+            "41": 2210568192.0,
+            "42": 2210568192.0,
+            "43": 2210568192.0,
+            "44": 2210568192.0,
+            "45": 2210568192.0,
+            "46": 2210568192.0,
+            "47": 2210568192.0,
+            "48": 2210568192.0,
+            "49": 2210568192.0,
+            "50": 2210568192.0,
+            "51": 2210568192.0,
+            "52": 2210568192.0,
+            "53": 2210568192.0,
+            "54": 2210568192.0,
+            "55": 2210568192.0,
+            "56": 2210568192.0,
+            "57": 2210568192.0,
+            "58": 2210568192.0,
+            "59": 2210568192.0,
+            "60": 2210568192.0,
+            "61": 2210568192.0,
+            "62": 2210568192.0,
+            "63": 2210568192.0,
+            "64": 2210568192.0,
+            "65": 2210568192.0,
+            "66": 2210568192.0,
+            "67": 2210568192.0,
+            "68": 2210568192.0,
+            "69": 2210568192.0,
+            "70": 2210568192.0,
+            "71": 2210568192.0,
+            "72": 2210568192.0,
+            "73": 2210568192.0,
+            "74": 2210568192.0,
+            "75": 2210568192.0,
+            "76": 2210568192.0,
+            "77": 2210568192.0,
+            "78": 2210568192.0,
+            "79": 2210568192.0,
+            "80": 2210568192.0,
+            "81": 2210568192.0,
+            "82": 2210568192.0,
+            "83": 2210568192.0,
+            "84": 2210568192.0,
+            "85": 2210568192.0,
+            "86": 2210568192.0,
+            "87": 2210568192.0,
+            "88": 2210568192.0,
+            "89": 2210568192.0,
+            "90": 2210568192.0,
+            "91": 2210568192.0,
+            "92": 2210568192.0,
+            "93": 2210568192.0,
+            "94": 2210568192.0,
+            "95": 2210568192.0,
+            "96": 2210568192.0,
+            "97": 2210568192.0,
+            "98": 2210568192.0,
+            "99": 2210568192.0,
+            "100": 2210568192.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 14.07236,
+            "2": 0.1439,
+            "3": 0.10617,
+            "4": 0.10423,
+            "5": 0.10661,
+            "6": 0.10547,
+            "7": 0.10337,
+            "8": 0.10254,
+            "9": 0.10285,
+            "10": 0.10538,
+            "11": 0.10211,
+            "12": 0.10209,
+            "13": 0.10172,
+            "14": 0.10352,
+            "15": 0.10417,
+            "16": 0.10185,
+            "17": 0.10199,
+            "18": 0.10179,
+            "19": 0.10297,
+            "20": 0.1054,
+            "21": 0.1025,
+            "22": 0.10172,
+            "23": 0.10344,
+            "24": 0.10371,
+            "25": 0.10166,
+            "26": 0.10183,
+            "27": 0.10449,
+            "28": 0.10545,
+            "29": 0.10167,
+            "30": 0.10337,
+            "31": 0.10277,
+            "32": 0.10385,
+            "33": 0.10255,
+            "34": 0.10441,
+            "35": 0.10202,
+            "36": 0.10215,
+            "37": 0.10277,
+            "38": 0.10448,
+            "39": 0.10501,
+            "40": 0.10325,
+            "41": 0.1085,
+            "42": 0.10236,
+            "43": 0.10413,
+            "44": 0.106,
+            "45": 0.10424,
+            "46": 0.10394,
+            "47": 0.1034,
+            "48": 0.10504,
+            "49": 0.10449,
+            "50": 0.10267,
+            "51": 0.12806,
+            "52": 0.11548,
+            "53": 0.11073,
+            "54": 0.1334,
+            "55": 0.10772,
+            "56": 0.11009,
+            "57": 0.10972,
+            "58": 0.1102,
+            "59": 0.11446,
+            "60": 0.11073,
+            "61": 0.10863,
+            "62": 0.10838,
+            "63": 0.10921,
+            "64": 0.10822,
+            "65": 0.11173,
+            "66": 0.1072,
+            "67": 0.10938,
+            "68": 0.1065,
+            "69": 0.10824,
+            "70": 0.10675,
+            "71": 0.10695,
+            "72": 0.10752,
+            "73": 0.10679,
+            "74": 0.10848,
+            "75": 0.1071,
+            "76": 0.10649,
+            "77": 0.1042,
+            "78": 0.10173,
+            "79": 0.10326,
+            "80": 0.10215,
+            "81": 0.10267,
+            "82": 0.10344,
+            "83": 0.10345,
+            "84": 0.10379,
+            "85": 0.10264,
+            "86": 0.1045,
+            "87": 0.10535,
+            "88": 0.10336,
+            "89": 0.1083,
+            "90": 0.10383,
+            "91": 0.10217,
+            "92": 0.10152,
+            "93": 0.10202,
+            "94": 0.10212,
+            "95": 0.10185,
+            "96": 0.10273,
+            "97": 0.10301,
+            "98": 0.10313,
+            "99": 0.10255,
+            "100": 0.1027
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_swiglu_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_swiglu_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..245c396be68
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_swiglu_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.76985,
+            "2": 10.81791,
+            "3": 10.784,
+            "4": 10.788,
+            "5": 10.81927,
+            "6": 10.84306,
+            "7": 10.83464,
+            "8": 10.8066,
+            "9": 10.83359,
+            "10": 10.73562,
+            "11": 10.86814,
+            "12": 10.85075,
+            "13": 10.84505,
+            "14": 10.87136,
+            "15": 10.8218,
+            "16": 10.80433,
+            "17": 10.76124,
+            "18": 10.80363,
+            "19": 10.80599,
+            "20": 10.74747,
+            "21": 10.7254,
+            "22": 10.60597,
+            "23": 10.74387,
+            "24": 10.65549,
+            "25": 10.58002,
+            "26": 10.64496,
+            "27": 10.67191,
+            "28": 10.66903,
+            "29": 10.66652,
+            "30": 10.46947,
+            "31": 10.26264,
+            "32": 10.56932,
+            "33": 10.54232,
+            "34": 10.36113,
+            "35": 10.39558,
+            "36": 10.36866,
+            "37": 10.47523,
+            "38": 10.33715,
+            "39": 10.49947,
+            "40": 10.23019,
+            "41": 10.30905,
+            "42": 10.33124,
+            "43": 9.99091,
+            "44": 10.09605,
+            "45": 10.00787,
+            "46": 9.96718,
+            "47": 10.27077,
+            "48": 10.01043,
+            "49": 9.73437,
+            "50": 10.04737,
+            "51": 10.00084,
+            "52": 9.89672,
+            "53": 10.19876,
+            "54": 10.09066,
+            "55": 10.00567,
+            "56": 9.77199,
+            "57": 9.64533,
+            "58": 9.98587,
+            "59": 9.72608,
+            "60": 9.6777,
+            "61": 9.8157,
+            "62": 10.092,
+            "63": 9.54758,
+            "64": 9.90438,
+            "65": 9.09492,
+            "66": 9.84068,
+            "67": 9.48471,
+            "68": 9.88996,
+            "69": 9.87691,
+            "70": 9.85294,
+            "71": 9.73278,
+            "72": 9.72558,
+            "73": 9.63706,
+            "74": 9.12334,
+            "75": 9.55335,
+            "76": 9.21765,
+            "77": 10.15202,
+            "78": 9.81465,
+            "79": 9.47558,
+            "80": 9.52073,
+            "81": 9.5872,
+            "82": 9.79125,
+            "83": 9.44848,
+            "84": 9.49585,
+            "85": 9.72189,
+            "86": 9.18037,
+            "87": 9.66127,
+            "88": 9.84359,
+            "89": 9.71651,
+            "90": 9.88102,
+            "91": 9.48434,
+            "92": 9.4705,
+            "93": 9.20911,
+            "94": 8.95382,
+            "95": 9.60554,
+            "96": 9.63976,
+            "97": 9.38762,
+            "98": 9.7573,
+            "99": 9.0159,
+            "100": 9.49925
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2680.0,
+            "2": 2615.0,
+            "3": 2642.0,
+            "4": 2479.0,
+            "5": 2971.0,
+            "6": 2822.0,
+            "7": 2833.0,
+            "8": 2508.0,
+            "9": 2922.0,
+            "10": 2508.0,
+            "11": 2917.0,
+            "12": 2817.0,
+            "13": 2935.0,
+            "14": 2969.0,
+            "15": 2679.0,
+            "16": 2976.0,
+            "17": 2609.0,
+            "18": 2868.0,
+            "19": 2790.0,
+            "20": 2461.0,
+            "21": 2636.0,
+            "22": 2356.0,
+            "23": 2798.0,
+            "24": 2613.0,
+            "25": 2640.0,
+            "26": 2701.0,
+            "27": 2761.0,
+            "28": 2801.0,
+            "29": 2971.0,
+            "30": 2590.0,
+            "31": 2307.0,
+            "32": 2751.0,
+            "33": 2881.0,
+            "34": 2352.0,
+            "35": 2480.0,
+            "36": 2443.0,
+            "37": 2748.0,
+            "38": 2692.0,
+            "39": 2709.0,
+            "40": 2570.0,
+            "41": 2752.0,
+            "42": 2689.0,
+            "43": 2381.0,
+            "44": 2483.0,
+            "45": 2397.0,
+            "46": 2281.0,
+            "47": 2684.0,
+            "48": 2330.0,
+            "49": 2293.0,
+            "50": 2740.0,
+            "51": 2575.0,
+            "52": 2621.0,
+            "53": 2891.0,
+            "54": 2655.0,
+            "55": 2559.0,
+            "56": 2566.0,
+            "57": 2471.0,
+            "58": 2767.0,
+            "59": 2529.0,
+            "60": 2289.0,
+            "61": 2642.0,
+            "62": 2820.0,
+            "63": 2654.0,
+            "64": 3020.0,
+            "65": 2687.0,
+            "66": 2884.0,
+            "67": 2666.0,
+            "68": 2720.0,
+            "69": 2738.0,
+            "70": 3004.0,
+            "71": 2816.0,
+            "72": 2537.0,
+            "73": 2826.0,
+            "74": 2192.0,
+            "75": 2647.0,
+            "76": 3048.0,
+            "77": 3019.0,
+            "78": 3134.0,
+            "79": 3092.0,
+            "80": 3054.0,
+            "81": 3298.0,
+            "82": 3350.0,
+            "83": 2597.0,
+            "84": 3436.0,
+            "85": 3350.0,
+            "86": 2993.0,
+            "87": 3509.0,
+            "88": 3403.0,
+            "89": 3490.0,
+            "90": 3368.0,
+            "91": 2461.0,
+            "92": 2803.0,
+            "93": 2933.0,
+            "94": 2888.0,
+            "95": 3138.0,
+            "96": 3047.0,
+            "97": 3016.0,
+            "98": 3382.0,
+            "99": 2995.0,
+            "100": 2490.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 745731584.0,
+            "2": 745731584.0,
+            "3": 745731584.0,
+            "4": 745731584.0,
+            "5": 745731584.0,
+            "6": 745731584.0,
+            "7": 745731584.0,
+            "8": 745731584.0,
+            "9": 745731584.0,
+            "10": 745731584.0,
+            "11": 745731584.0,
+            "12": 745731584.0,
+            "13": 745731584.0,
+            "14": 745731584.0,
+            "15": 745731584.0,
+            "16": 745731584.0,
+            "17": 745731584.0,
+            "18": 745731584.0,
+            "19": 745731584.0,
+            "20": 745731584.0,
+            "21": 745731584.0,
+            "22": 745731584.0,
+            "23": 745731584.0,
+            "24": 745731584.0,
+            "25": 745731584.0,
+            "26": 745731584.0,
+            "27": 745731584.0,
+            "28": 745731584.0,
+            "29": 745731584.0,
+            "30": 745731584.0,
+            "31": 745731584.0,
+            "32": 745731584.0,
+            "33": 745731584.0,
+            "34": 745731584.0,
+            "35": 745731584.0,
+            "36": 745731584.0,
+            "37": 745731584.0,
+            "38": 745731584.0,
+            "39": 745731584.0,
+            "40": 745731584.0,
+            "41": 745731584.0,
+            "42": 745731584.0,
+            "43": 745731584.0,
+            "44": 745731584.0,
+            "45": 745731584.0,
+            "46": 745731584.0,
+            "47": 745731584.0,
+            "48": 745731584.0,
+            "49": 745731584.0,
+            "50": 745731584.0,
+            "51": 745731584.0,
+            "52": 745731584.0,
+            "53": 745731584.0,
+            "54": 745731584.0,
+            "55": 745731584.0,
+            "56": 745731584.0,
+            "57": 745731584.0,
+            "58": 745731584.0,
+            "59": 745731584.0,
+            "60": 745731584.0,
+            "61": 745731584.0,
+            "62": 745731584.0,
+            "63": 745731584.0,
+            "64": 745731584.0,
+            "65": 745731584.0,
+            "66": 745731584.0,
+            "67": 745731584.0,
+            "68": 745731584.0,
+            "69": 745731584.0,
+            "70": 745731584.0,
+            "71": 745731584.0,
+            "72": 745731584.0,
+            "73": 745731584.0,
+            "74": 745731584.0,
+            "75": 745731584.0,
+            "76": 745731584.0,
+            "77": 745731584.0,
+            "78": 745731584.0,
+            "79": 745731584.0,
+            "80": 745731584.0,
+            "81": 745731584.0,
+            "82": 745731584.0,
+            "83": 745731584.0,
+            "84": 745731584.0,
+            "85": 745731584.0,
+            "86": 745731584.0,
+            "87": 745731584.0,
+            "88": 745731584.0,
+            "89": 745731584.0,
+            "90": 745731584.0,
+            "91": 745731584.0,
+            "92": 745731584.0,
+            "93": 745731584.0,
+            "94": 745731584.0,
+            "95": 745731584.0,
+            "96": 745731584.0,
+            "97": 745731584.0,
+            "98": 745731584.0,
+            "99": 745731584.0,
+            "100": 745731584.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1928906752.0,
+            "2": 2210568192.0,
+            "3": 2210568192.0,
+            "4": 2210568192.0,
+            "5": 2210568192.0,
+            "6": 2210568192.0,
+            "7": 2210568192.0,
+            "8": 2210568192.0,
+            "9": 2210568192.0,
+            "10": 2210568192.0,
+            "11": 2210568192.0,
+            "12": 2210568192.0,
+            "13": 2210568192.0,
+            "14": 2210568192.0,
+            "15": 2210568192.0,
+            "16": 2210568192.0,
+            "17": 2210568192.0,
+            "18": 2210568192.0,
+            "19": 2210568192.0,
+            "20": 2210568192.0,
+            "21": 2210568192.0,
+            "22": 2210568192.0,
+            "23": 2210568192.0,
+            "24": 2210568192.0,
+            "25": 2210568192.0,
+            "26": 2210568192.0,
+            "27": 2210568192.0,
+            "28": 2210568192.0,
+            "29": 2210568192.0,
+            "30": 2210568192.0,
+            "31": 2210568192.0,
+            "32": 2210568192.0,
+            "33": 2210568192.0,
+            "34": 2210568192.0,
+            "35": 2210568192.0,
+            "36": 2210568192.0,
+            "37": 2210568192.0,
+            "38": 2210568192.0,
+            "39": 2210568192.0,
+            "40": 2210568192.0,
+            "41": 2210568192.0,
+            "42": 2210568192.0,
+            "43": 2210568192.0,
+            "44": 2210568192.0,
+            "45": 2210568192.0,
+            "46": 2210568192.0,
+            "47": 2210568192.0,
+            "48": 2210568192.0,
+            "49": 2210568192.0,
+            "50": 2210568192.0,
+            "51": 2210568192.0,
+            "52": 2210568192.0,
+            "53": 2210568192.0,
+            "54": 2210568192.0,
+            "55": 2210568192.0,
+            "56": 2210568192.0,
+            "57": 2210568192.0,
+            "58": 2210568192.0,
+            "59": 2210568192.0,
+            "60": 2210568192.0,
+            "61": 2210568192.0,
+            "62": 2210568192.0,
+            "63": 2210568192.0,
+            "64": 2210568192.0,
+            "65": 2210568192.0,
+            "66": 2210568192.0,
+            "67": 2210568192.0,
+            "68": 2210568192.0,
+            "69": 2210568192.0,
+            "70": 2210568192.0,
+            "71": 2210568192.0,
+            "72": 2210568192.0,
+            "73": 2210568192.0,
+            "74": 2210568192.0,
+            "75": 2210568192.0,
+            "76": 2210568192.0,
+            "77": 2210568192.0,
+            "78": 2210568192.0,
+            "79": 2210568192.0,
+            "80": 2210568192.0,
+            "81": 2210568192.0,
+            "82": 2210568192.0,
+            "83": 2210568192.0,
+            "84": 2210568192.0,
+            "85": 2210568192.0,
+            "86": 2210568192.0,
+            "87": 2210568192.0,
+            "88": 2210568192.0,
+            "89": 2210568192.0,
+            "90": 2210568192.0,
+            "91": 2210568192.0,
+            "92": 2210568192.0,
+            "93": 2210568192.0,
+            "94": 2210568192.0,
+            "95": 2210568192.0,
+            "96": 2210568192.0,
+            "97": 2210568192.0,
+            "98": 2210568192.0,
+            "99": 2210568192.0,
+            "100": 2210568192.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 15.33061,
+            "2": 0.15156,
+            "3": 0.12174,
+            "4": 0.12197,
+            "5": 0.12023,
+            "6": 0.11997,
+            "7": 0.11882,
+            "8": 0.11859,
+            "9": 0.11967,
+            "10": 0.11724,
+            "11": 0.11735,
+            "12": 0.11593,
+            "13": 0.11661,
+            "14": 0.11794,
+            "15": 0.11649,
+            "16": 0.11682,
+            "17": 0.11623,
+            "18": 0.11719,
+            "19": 0.11753,
+            "20": 0.11581,
+            "21": 0.11757,
+            "22": 0.11628,
+            "23": 0.11692,
+            "24": 0.1163,
+            "25": 0.1167,
+            "26": 0.11646,
+            "27": 0.11803,
+            "28": 0.11984,
+            "29": 0.11941,
+            "30": 0.11857,
+            "31": 0.11687,
+            "32": 0.11515,
+            "33": 0.11754,
+            "34": 0.11591,
+            "35": 0.11819,
+            "36": 0.11754,
+            "37": 0.11694,
+            "38": 0.11726,
+            "39": 0.11761,
+            "40": 0.11745,
+            "41": 0.11768,
+            "42": 0.11775,
+            "43": 0.11661,
+            "44": 0.11724,
+            "45": 0.1189,
+            "46": 0.11964,
+            "47": 0.11985,
+            "48": 0.12086,
+            "49": 0.11855,
+            "50": 0.11941,
+            "51": 0.13155,
+            "52": 0.12627,
+            "53": 0.12132,
+            "54": 0.12027,
+            "55": 0.12076,
+            "56": 0.14178,
+            "57": 0.12294,
+            "58": 0.12155,
+            "59": 0.11843,
+            "60": 0.11687,
+            "61": 0.11827,
+            "62": 0.11957,
+            "63": 0.11945,
+            "64": 0.11781,
+            "65": 0.12041,
+            "66": 0.11949,
+            "67": 0.12059,
+            "68": 0.11821,
+            "69": 0.11858,
+            "70": 0.11799,
+            "71": 0.12009,
+            "72": 0.12095,
+            "73": 0.11845,
+            "74": 0.11834,
+            "75": 0.11893,
+            "76": 0.1214,
+            "77": 0.1195,
+            "78": 0.11933,
+            "79": 0.11885,
+            "80": 0.11948,
+            "81": 0.12097,
+            "82": 0.12,
+            "83": 0.11954,
+            "84": 0.11693,
+            "85": 0.1175,
+            "86": 0.11941,
+            "87": 0.11723,
+            "88": 0.11941,
+            "89": 0.11804,
+            "90": 0.11751,
+            "91": 0.11952,
+            "92": 0.11778,
+            "93": 0.11924,
+            "94": 0.11755,
+            "95": 0.11789,
+            "96": 0.11673,
+            "97": 0.11967,
+            "98": 0.11752,
+            "99": 0.11926,
+            "100": 0.11806
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
index ac706ac960b..7b9a1722673 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
@@ -2,141 +2,536 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 10.91349,
+            "2": 10.90719,
+            "3": 10.91328,
+            "4": 10.87838,
             "5": 10.91769,
+            "6": 10.93821,
+            "7": 10.90469,
+            "8": 10.90393,
+            "9": 10.90876,
             "10": 10.89645,
+            "11": 10.92562,
+            "12": 10.91891,
+            "13": 10.91537,
+            "14": 10.93343,
             "15": 10.86115,
+            "16": 10.85374,
+            "17": 10.82717,
+            "18": 10.86544,
+            "19": 10.86225,
             "20": 10.76737,
+            "21": 10.74634,
+            "22": 10.62228,
+            "23": 10.76122,
+            "24": 10.64732,
             "25": 10.59597,
+            "26": 10.66352,
+            "27": 10.6542,
+            "28": 10.6077,
+            "29": 10.62581,
             "30": 10.41591,
+            "31": 10.16855,
+            "32": 10.50267,
+            "33": 10.50304,
+            "34": 10.25481,
             "35": 10.31879,
+            "36": 10.27167,
+            "37": 10.37751,
+            "38": 10.22122,
+            "39": 10.44798,
             "40": 10.14166,
+            "41": 10.1771,
+            "42": 10.2426,
+            "43": 9.87148,
+            "44": 9.99875,
             "45": 9.88702,
+            "46": 9.86139,
+            "47": 10.18144,
+            "48": 9.87873,
+            "49": 9.58706,
             "50": 9.9542,
+            "51": 9.8866,
+            "52": 9.78429,
+            "53": 10.10842,
+            "54": 9.97368,
             "55": 9.89803,
+            "56": 9.65427,
+            "57": 9.52013,
+            "58": 9.87297,
+            "59": 9.6132,
             "60": 9.54967,
+            "61": 9.70681,
+            "62": 9.98533,
+            "63": 9.41357,
+            "64": 9.80966,
             "65": 8.97052,
+            "66": 9.72773,
+            "67": 9.39183,
+            "68": 9.8084,
+            "69": 9.82052,
             "70": 9.76655,
+            "71": 9.63414,
+            "72": 9.60485,
+            "73": 9.52299,
+            "74": 8.9718,
             "75": 9.42321,
+            "76": 9.10113,
+            "77": 10.0716,
+            "78": 9.74266,
+            "79": 9.40343,
             "80": 9.41333,
+            "81": 9.49931,
+            "82": 9.70236,
+            "83": 9.33436,
+            "84": 9.43774,
             "85": 9.63924,
+            "86": 9.07931,
+            "87": 9.60447,
+            "88": 9.7824,
+            "89": 9.62386,
             "90": 9.84241,
+            "91": 9.35506,
+            "92": 9.38398,
+            "93": 9.09747,
+            "94": 8.8471,
             "95": 9.5314,
+            "96": 9.54263,
+            "97": 9.32886,
+            "98": 9.6926,
+            "99": 8.89976,
             "100": 9.43124
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 22727424.0,
+            "2": 22925204.0,
+            "3": 22596900.0,
+            "4": 23219556.0,
             "5": 22714624.0,
+            "6": 23021776.0,
+            "7": 22771632.0,
+            "8": 22926560.0,
+            "9": 22842156.0,
             "10": 22918168.0,
+            "11": 22500688.0,
+            "12": 22459470.0,
+            "13": 22917228.0,
+            "14": 22387988.0,
             "15": 22821732.0,
+            "16": 22830306.0,
+            "17": 22819520.0,
+            "18": 22582628.0,
+            "19": 22618028.0,
             "20": 22693852.0,
+            "21": 22739344.0,
+            "22": 22799596.0,
+            "23": 22539016.0,
+            "24": 22770946.0,
             "25": 22819324.0,
+            "26": 22547928.0,
+            "27": 22468716.0,
+            "28": 22453820.0,
+            "29": 22529898.0,
             "30": 22631220.0,
+            "31": 22955420.0,
+            "32": 22585276.0,
+            "33": 22558602.0,
+            "34": 22835792.0,
             "35": 22788208.0,
+            "36": 22589796.0,
+            "37": 22496928.0,
+            "38": 22896192.0,
+            "39": 22801858.0,
             "40": 22657640.0,
+            "41": 22658982.0,
+            "42": 22667052.0,
+            "43": 22975816.0,
+            "44": 22747688.0,
             "45": 22674846.0,
+            "46": 22884684.0,
+            "47": 22633708.0,
+            "48": 22928466.0,
+            "49": 22728092.0,
             "50": 22905080.0,
+            "51": 22791108.0,
+            "52": 22748190.0,
+            "53": 22924900.0,
+            "54": 22840164.0,
             "55": 22518344.0,
+            "56": 22877680.0,
+            "57": 23113944.0,
+            "58": 22846268.0,
+            "59": 22716084.0,
             "60": 22742984.0,
+            "61": 22724584.0,
+            "62": 22672944.0,
+            "63": 22846388.0,
+            "64": 22823650.0,
             "65": 23061058.0,
+            "66": 22729266.0,
+            "67": 22908888.0,
+            "68": 22610020.0,
+            "69": 22583826.0,
             "70": 22829374.0,
+            "71": 22748240.0,
+            "72": 22654480.0,
+            "73": 22741180.0,
+            "74": 23047914.0,
             "75": 23054396.0,
+            "76": 22900788.0,
+            "77": 22271588.0,
+            "78": 22789024.0,
+            "79": 22743632.0,
             "80": 22706696.0,
+            "81": 22891372.0,
+            "82": 22777860.0,
+            "83": 22840532.0,
+            "84": 23010386.0,
             "85": 22711212.0,
+            "86": 23103006.0,
+            "87": 22734564.0,
+            "88": 22637848.0,
+            "89": 22497850.0,
             "90": 22972712.0,
+            "91": 22767188.0,
+            "92": 22808834.0,
+            "93": 22659304.0,
+            "94": 22911552.0,
             "95": 23047794.0,
+            "96": 22829386.0,
+            "97": 22608168.0,
+            "98": 22762756.0,
+            "99": 22905900.0,
             "100": 23015488.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 746443264.0,
+            "2": 746443264.0,
+            "3": 746443264.0,
+            "4": 746443264.0,
             "5": 746443264.0,
+            "6": 746443264.0,
+            "7": 746443264.0,
+            "8": 746443264.0,
+            "9": 746443264.0,
             "10": 746443264.0,
+            "11": 746443264.0,
+            "12": 746443264.0,
+            "13": 746443264.0,
+            "14": 746443264.0,
             "15": 746443264.0,
+            "16": 746443264.0,
+            "17": 746443264.0,
+            "18": 746443264.0,
+            "19": 746443264.0,
             "20": 746443264.0,
+            "21": 746443264.0,
+            "22": 746443264.0,
+            "23": 746443264.0,
+            "24": 746443264.0,
             "25": 746443264.0,
+            "26": 746443264.0,
+            "27": 746443264.0,
+            "28": 746443264.0,
+            "29": 746443264.0,
             "30": 746443264.0,
+            "31": 746443264.0,
+            "32": 746443264.0,
+            "33": 746443264.0,
+            "34": 746443264.0,
             "35": 746443264.0,
+            "36": 746443264.0,
+            "37": 746443264.0,
+            "38": 746443264.0,
+            "39": 746443264.0,
             "40": 746443264.0,
+            "41": 746443264.0,
+            "42": 746443264.0,
+            "43": 746443264.0,
+            "44": 746443264.0,
             "45": 746443264.0,
+            "46": 746443264.0,
+            "47": 746443264.0,
+            "48": 746443264.0,
+            "49": 746443264.0,
             "50": 746443264.0,
+            "51": 746443264.0,
+            "52": 746443264.0,
+            "53": 746443264.0,
+            "54": 746443264.0,
             "55": 746443264.0,
+            "56": 746443264.0,
+            "57": 746443264.0,
+            "58": 746443264.0,
+            "59": 746443264.0,
             "60": 746443264.0,
+            "61": 746443264.0,
+            "62": 746443264.0,
+            "63": 746443264.0,
+            "64": 746443264.0,
             "65": 746443264.0,
+            "66": 746443264.0,
+            "67": 746443264.0,
+            "68": 746443264.0,
+            "69": 746443264.0,
             "70": 746443264.0,
+            "71": 746443264.0,
+            "72": 746443264.0,
+            "73": 746443264.0,
+            "74": 746443264.0,
             "75": 746443264.0,
+            "76": 746443264.0,
+            "77": 746443264.0,
+            "78": 746443264.0,
+            "79": 746443264.0,
             "80": 746443264.0,
+            "81": 746443264.0,
+            "82": 746443264.0,
+            "83": 746443264.0,
+            "84": 746443264.0,
             "85": 746443264.0,
+            "86": 746443264.0,
+            "87": 746443264.0,
+            "88": 746443264.0,
+            "89": 746443264.0,
             "90": 746443264.0,
+            "91": 746443264.0,
+            "92": 746443264.0,
+            "93": 746443264.0,
+            "94": 746443264.0,
             "95": 746443264.0,
+            "96": 746443264.0,
+            "97": 746443264.0,
+            "98": 746443264.0,
+            "99": 746443264.0,
             "100": 746443264.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 1926291456.0,
+            "2": 2210100224.0,
+            "3": 2210100224.0,
+            "4": 2210100224.0,
             "5": 2210100224.0,
+            "6": 2210100224.0,
+            "7": 2210100224.0,
+            "8": 2210100224.0,
+            "9": 2210100224.0,
             "10": 2210100224.0,
+            "11": 2210100224.0,
+            "12": 2210100224.0,
+            "13": 2210100224.0,
+            "14": 2210100224.0,
             "15": 2210100224.0,
+            "16": 2210100224.0,
+            "17": 2210100224.0,
+            "18": 2210100224.0,
+            "19": 2210100224.0,
             "20": 2210100224.0,
+            "21": 2210100224.0,
+            "22": 2210100224.0,
+            "23": 2210100224.0,
+            "24": 2210100224.0,
             "25": 2210100224.0,
+            "26": 2210100224.0,
+            "27": 2210100224.0,
+            "28": 2210100224.0,
+            "29": 2210100224.0,
             "30": 2210100224.0,
+            "31": 2210100224.0,
+            "32": 2210100224.0,
+            "33": 2210100224.0,
+            "34": 2210100224.0,
             "35": 2210100224.0,
+            "36": 2210100224.0,
+            "37": 2210100224.0,
+            "38": 2210100224.0,
+            "39": 2210100224.0,
             "40": 2210100224.0,
+            "41": 2210100224.0,
+            "42": 2210100224.0,
+            "43": 2210100224.0,
+            "44": 2210100224.0,
             "45": 2210100224.0,
+            "46": 2210100224.0,
+            "47": 2210100224.0,
+            "48": 2210100224.0,
+            "49": 2210100224.0,
             "50": 2210100224.0,
+            "51": 2210100224.0,
+            "52": 2210100224.0,
+            "53": 2210100224.0,
+            "54": 2210100224.0,
             "55": 2210100224.0,
+            "56": 2210100224.0,
+            "57": 2210100224.0,
+            "58": 2210100224.0,
+            "59": 2210100224.0,
             "60": 2210100224.0,
+            "61": 2210100224.0,
+            "62": 2210100224.0,
+            "63": 2210100224.0,
+            "64": 2210100224.0,
             "65": 2210100224.0,
+            "66": 2210100224.0,
+            "67": 2210100224.0,
+            "68": 2210100224.0,
+            "69": 2210100224.0,
             "70": 2210100224.0,
+            "71": 2210100224.0,
+            "72": 2210100224.0,
+            "73": 2210100224.0,
+            "74": 2210100224.0,
             "75": 2210100224.0,
+            "76": 2210100224.0,
+            "77": 2210100224.0,
+            "78": 2210100224.0,
+            "79": 2210100224.0,
             "80": 2210100224.0,
+            "81": 2210100224.0,
+            "82": 2210100224.0,
+            "83": 2210100224.0,
+            "84": 2210100224.0,
             "85": 2210100224.0,
+            "86": 2210100224.0,
+            "87": 2210100224.0,
+            "88": 2210100224.0,
+            "89": 2210100224.0,
             "90": 2210100224.0,
+            "91": 2210100224.0,
+            "92": 2210100224.0,
+            "93": 2210100224.0,
+            "94": 2210100224.0,
             "95": 2210100224.0,
+            "96": 2210100224.0,
+            "97": 2210100224.0,
+            "98": 2210100224.0,
+            "99": 2210100224.0,
             "100": 2210100224.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 12.51362,
-            "5": 0.10049,
-            "10": 0.10087,
-            "15": 0.09868,
-            "20": 0.09931,
-            "25": 0.09841,
-            "30": 0.09873,
-            "35": 0.09844,
-            "40": 0.09896,
-            "45": 0.09974,
-            "50": 0.09906,
-            "55": 0.10067,
-            "60": 0.09886,
-            "65": 0.0994,
-            "70": 0.09923,
-            "75": 0.09864,
-            "80": 0.09906,
-            "85": 0.09932,
-            "90": 0.09976,
-            "95": 0.09902,
-            "100": 0.09871
+            "1": 14.52368,
+            "2": 0.12904,
+            "3": 0.11517,
+            "4": 0.11756,
+            "5": 0.11573,
+            "6": 0.11676,
+            "7": 0.11475,
+            "8": 0.11625,
+            "9": 0.11519,
+            "10": 0.12088,
+            "11": 0.11883,
+            "12": 0.11908,
+            "13": 0.11781,
+            "14": 0.11708,
+            "15": 0.11808,
+            "16": 0.11499,
+            "17": 0.11904,
+            "18": 0.11758,
+            "19": 0.11836,
+            "20": 0.11696,
+            "21": 0.11517,
+            "22": 0.11537,
+            "23": 0.11509,
+            "24": 0.11668,
+            "25": 0.11421,
+            "26": 0.11535,
+            "27": 0.1148,
+            "28": 0.11573,
+            "29": 0.11684,
+            "30": 0.11652,
+            "31": 0.11749,
+            "32": 0.11508,
+            "33": 0.11651,
+            "34": 0.11541,
+            "35": 0.11609,
+            "36": 0.11722,
+            "37": 0.11735,
+            "38": 0.11849,
+            "39": 0.11931,
+            "40": 0.11381,
+            "41": 0.11418,
+            "42": 0.11682,
+            "43": 0.1172,
+            "44": 0.11595,
+            "45": 0.1149,
+            "46": 0.11591,
+            "47": 0.11441,
+            "48": 0.11991,
+            "49": 0.11482,
+            "50": 0.11551,
+            "51": 0.12066,
+            "52": 0.11485,
+            "53": 0.11554,
+            "54": 0.11513,
+            "55": 0.11749,
+            "56": 0.11612,
+            "57": 0.11313,
+            "58": 0.1131,
+            "59": 0.11488,
+            "60": 0.11602,
+            "61": 0.11343,
+            "62": 0.11313,
+            "63": 0.11487,
+            "64": 0.11581,
+            "65": 0.11438,
+            "66": 0.11344,
+            "67": 0.11567,
+            "68": 0.11465,
+            "69": 0.11374,
+            "70": 0.11452,
+            "71": 0.11431,
+            "72": 0.1157,
+            "73": 0.11626,
+            "74": 0.11498,
+            "75": 0.11329,
+            "76": 0.11264,
+            "77": 0.11291,
+            "78": 0.11343,
+            "79": 0.11536,
+            "80": 0.11515,
+            "81": 0.11726,
+            "82": 0.11537,
+            "83": 0.11363,
+            "84": 0.11591,
+            "85": 0.11747,
+            "86": 0.11816,
+            "87": 0.11504,
+            "88": 0.11547,
+            "89": 0.11463,
+            "90": 0.11598,
+            "91": 0.11209,
+            "92": 0.11386,
+            "93": 0.11296,
+            "94": 0.11351,
+            "95": 0.11409,
+            "96": 0.11256,
+            "97": 0.11707,
+            "98": 0.1149,
+            "99": 0.11577,
+            "100": 0.1143
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..6e9f643a273
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.91349,
+            "2": 10.90719,
+            "3": 10.91328,
+            "4": 10.87838,
+            "5": 10.91769,
+            "6": 10.93821,
+            "7": 10.90469,
+            "8": 10.90393,
+            "9": 10.90876,
+            "10": 10.89645,
+            "11": 10.92562,
+            "12": 10.91891,
+            "13": 10.91537,
+            "14": 10.93343,
+            "15": 10.86115,
+            "16": 10.85374,
+            "17": 10.82717,
+            "18": 10.86544,
+            "19": 10.86225,
+            "20": 10.76737,
+            "21": 10.74634,
+            "22": 10.62228,
+            "23": 10.76122,
+            "24": 10.64732,
+            "25": 10.59597,
+            "26": 10.66352,
+            "27": 10.6542,
+            "28": 10.6077,
+            "29": 10.62581,
+            "30": 10.41591,
+            "31": 10.16855,
+            "32": 10.50267,
+            "33": 10.50304,
+            "34": 10.25481,
+            "35": 10.31879,
+            "36": 10.27167,
+            "37": 10.37751,
+            "38": 10.22122,
+            "39": 10.44798,
+            "40": 10.14166,
+            "41": 10.1771,
+            "42": 10.2426,
+            "43": 9.87148,
+            "44": 9.99875,
+            "45": 9.88702,
+            "46": 9.86139,
+            "47": 10.18144,
+            "48": 9.87873,
+            "49": 9.58706,
+            "50": 9.9542,
+            "51": 9.8866,
+            "52": 9.78429,
+            "53": 10.10842,
+            "54": 9.97368,
+            "55": 9.89803,
+            "56": 9.65427,
+            "57": 9.52013,
+            "58": 9.87297,
+            "59": 9.6132,
+            "60": 9.54967,
+            "61": 9.70681,
+            "62": 9.98533,
+            "63": 9.41357,
+            "64": 9.80966,
+            "65": 8.97052,
+            "66": 9.72773,
+            "67": 9.39183,
+            "68": 9.8084,
+            "69": 9.82052,
+            "70": 9.76655,
+            "71": 9.63414,
+            "72": 9.60485,
+            "73": 9.52299,
+            "74": 8.9718,
+            "75": 9.42321,
+            "76": 9.10113,
+            "77": 10.0716,
+            "78": 9.74266,
+            "79": 9.40343,
+            "80": 9.41333,
+            "81": 9.49931,
+            "82": 9.70236,
+            "83": 9.33436,
+            "84": 9.43774,
+            "85": 9.63924,
+            "86": 9.07931,
+            "87": 9.60447,
+            "88": 9.7824,
+            "89": 9.62386,
+            "90": 9.84241,
+            "91": 9.35506,
+            "92": 9.38398,
+            "93": 9.09747,
+            "94": 8.8471,
+            "95": 9.5314,
+            "96": 9.54263,
+            "97": 9.32886,
+            "98": 9.6926,
+            "99": 8.89976,
+            "100": 9.43124
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 22727424.0,
+            "2": 22925204.0,
+            "3": 22596900.0,
+            "4": 23219556.0,
+            "5": 22714624.0,
+            "6": 23021776.0,
+            "7": 22771632.0,
+            "8": 22926560.0,
+            "9": 22842156.0,
+            "10": 22918168.0,
+            "11": 22500688.0,
+            "12": 22459470.0,
+            "13": 22917228.0,
+            "14": 22387988.0,
+            "15": 22821732.0,
+            "16": 22830306.0,
+            "17": 22819520.0,
+            "18": 22582628.0,
+            "19": 22618028.0,
+            "20": 22693852.0,
+            "21": 22739344.0,
+            "22": 22799596.0,
+            "23": 22539016.0,
+            "24": 22770946.0,
+            "25": 22819324.0,
+            "26": 22547928.0,
+            "27": 22468716.0,
+            "28": 22453820.0,
+            "29": 22529898.0,
+            "30": 22631220.0,
+            "31": 22955420.0,
+            "32": 22585276.0,
+            "33": 22558602.0,
+            "34": 22835792.0,
+            "35": 22788208.0,
+            "36": 22589796.0,
+            "37": 22496928.0,
+            "38": 22896192.0,
+            "39": 22801858.0,
+            "40": 22657640.0,
+            "41": 22658982.0,
+            "42": 22667052.0,
+            "43": 22975816.0,
+            "44": 22747688.0,
+            "45": 22674846.0,
+            "46": 22884684.0,
+            "47": 22633708.0,
+            "48": 22928466.0,
+            "49": 22728092.0,
+            "50": 22905080.0,
+            "51": 22791108.0,
+            "52": 22748190.0,
+            "53": 22924900.0,
+            "54": 22840164.0,
+            "55": 22518344.0,
+            "56": 22877680.0,
+            "57": 23113944.0,
+            "58": 22846268.0,
+            "59": 22716084.0,
+            "60": 22742984.0,
+            "61": 22724584.0,
+            "62": 22672944.0,
+            "63": 22846388.0,
+            "64": 22823650.0,
+            "65": 23061058.0,
+            "66": 22729266.0,
+            "67": 22908888.0,
+            "68": 22610020.0,
+            "69": 22583826.0,
+            "70": 22829374.0,
+            "71": 22748240.0,
+            "72": 22654480.0,
+            "73": 22741180.0,
+            "74": 23047914.0,
+            "75": 23054396.0,
+            "76": 22900788.0,
+            "77": 22271588.0,
+            "78": 22789024.0,
+            "79": 22743632.0,
+            "80": 22706696.0,
+            "81": 22891372.0,
+            "82": 22777860.0,
+            "83": 22840532.0,
+            "84": 23010386.0,
+            "85": 22711212.0,
+            "86": 23103006.0,
+            "87": 22734564.0,
+            "88": 22637848.0,
+            "89": 22497850.0,
+            "90": 22972712.0,
+            "91": 22767188.0,
+            "92": 22808834.0,
+            "93": 22659304.0,
+            "94": 22911552.0,
+            "95": 23047794.0,
+            "96": 22829386.0,
+            "97": 22608168.0,
+            "98": 22762756.0,
+            "99": 22905900.0,
+            "100": 23015488.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 746443264.0,
+            "2": 746443264.0,
+            "3": 746443264.0,
+            "4": 746443264.0,
+            "5": 746443264.0,
+            "6": 746443264.0,
+            "7": 746443264.0,
+            "8": 746443264.0,
+            "9": 746443264.0,
+            "10": 746443264.0,
+            "11": 746443264.0,
+            "12": 746443264.0,
+            "13": 746443264.0,
+            "14": 746443264.0,
+            "15": 746443264.0,
+            "16": 746443264.0,
+            "17": 746443264.0,
+            "18": 746443264.0,
+            "19": 746443264.0,
+            "20": 746443264.0,
+            "21": 746443264.0,
+            "22": 746443264.0,
+            "23": 746443264.0,
+            "24": 746443264.0,
+            "25": 746443264.0,
+            "26": 746443264.0,
+            "27": 746443264.0,
+            "28": 746443264.0,
+            "29": 746443264.0,
+            "30": 746443264.0,
+            "31": 746443264.0,
+            "32": 746443264.0,
+            "33": 746443264.0,
+            "34": 746443264.0,
+            "35": 746443264.0,
+            "36": 746443264.0,
+            "37": 746443264.0,
+            "38": 746443264.0,
+            "39": 746443264.0,
+            "40": 746443264.0,
+            "41": 746443264.0,
+            "42": 746443264.0,
+            "43": 746443264.0,
+            "44": 746443264.0,
+            "45": 746443264.0,
+            "46": 746443264.0,
+            "47": 746443264.0,
+            "48": 746443264.0,
+            "49": 746443264.0,
+            "50": 746443264.0,
+            "51": 746443264.0,
+            "52": 746443264.0,
+            "53": 746443264.0,
+            "54": 746443264.0,
+            "55": 746443264.0,
+            "56": 746443264.0,
+            "57": 746443264.0,
+            "58": 746443264.0,
+            "59": 746443264.0,
+            "60": 746443264.0,
+            "61": 746443264.0,
+            "62": 746443264.0,
+            "63": 746443264.0,
+            "64": 746443264.0,
+            "65": 746443264.0,
+            "66": 746443264.0,
+            "67": 746443264.0,
+            "68": 746443264.0,
+            "69": 746443264.0,
+            "70": 746443264.0,
+            "71": 746443264.0,
+            "72": 746443264.0,
+            "73": 746443264.0,
+            "74": 746443264.0,
+            "75": 746443264.0,
+            "76": 746443264.0,
+            "77": 746443264.0,
+            "78": 746443264.0,
+            "79": 746443264.0,
+            "80": 746443264.0,
+            "81": 746443264.0,
+            "82": 746443264.0,
+            "83": 746443264.0,
+            "84": 746443264.0,
+            "85": 746443264.0,
+            "86": 746443264.0,
+            "87": 746443264.0,
+            "88": 746443264.0,
+            "89": 746443264.0,
+            "90": 746443264.0,
+            "91": 746443264.0,
+            "92": 746443264.0,
+            "93": 746443264.0,
+            "94": 746443264.0,
+            "95": 746443264.0,
+            "96": 746443264.0,
+            "97": 746443264.0,
+            "98": 746443264.0,
+            "99": 746443264.0,
+            "100": 746443264.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1926291456.0,
+            "2": 2210100224.0,
+            "3": 2210100224.0,
+            "4": 2210100224.0,
+            "5": 2210100224.0,
+            "6": 2210100224.0,
+            "7": 2210100224.0,
+            "8": 2210100224.0,
+            "9": 2210100224.0,
+            "10": 2210100224.0,
+            "11": 2210100224.0,
+            "12": 2210100224.0,
+            "13": 2210100224.0,
+            "14": 2210100224.0,
+            "15": 2210100224.0,
+            "16": 2210100224.0,
+            "17": 2210100224.0,
+            "18": 2210100224.0,
+            "19": 2210100224.0,
+            "20": 2210100224.0,
+            "21": 2210100224.0,
+            "22": 2210100224.0,
+            "23": 2210100224.0,
+            "24": 2210100224.0,
+            "25": 2210100224.0,
+            "26": 2210100224.0,
+            "27": 2210100224.0,
+            "28": 2210100224.0,
+            "29": 2210100224.0,
+            "30": 2210100224.0,
+            "31": 2210100224.0,
+            "32": 2210100224.0,
+            "33": 2210100224.0,
+            "34": 2210100224.0,
+            "35": 2210100224.0,
+            "36": 2210100224.0,
+            "37": 2210100224.0,
+            "38": 2210100224.0,
+            "39": 2210100224.0,
+            "40": 2210100224.0,
+            "41": 2210100224.0,
+            "42": 2210100224.0,
+            "43": 2210100224.0,
+            "44": 2210100224.0,
+            "45": 2210100224.0,
+            "46": 2210100224.0,
+            "47": 2210100224.0,
+            "48": 2210100224.0,
+            "49": 2210100224.0,
+            "50": 2210100224.0,
+            "51": 2210100224.0,
+            "52": 2210100224.0,
+            "53": 2210100224.0,
+            "54": 2210100224.0,
+            "55": 2210100224.0,
+            "56": 2210100224.0,
+            "57": 2210100224.0,
+            "58": 2210100224.0,
+            "59": 2210100224.0,
+            "60": 2210100224.0,
+            "61": 2210100224.0,
+            "62": 2210100224.0,
+            "63": 2210100224.0,
+            "64": 2210100224.0,
+            "65": 2210100224.0,
+            "66": 2210100224.0,
+            "67": 2210100224.0,
+            "68": 2210100224.0,
+            "69": 2210100224.0,
+            "70": 2210100224.0,
+            "71": 2210100224.0,
+            "72": 2210100224.0,
+            "73": 2210100224.0,
+            "74": 2210100224.0,
+            "75": 2210100224.0,
+            "76": 2210100224.0,
+            "77": 2210100224.0,
+            "78": 2210100224.0,
+            "79": 2210100224.0,
+            "80": 2210100224.0,
+            "81": 2210100224.0,
+            "82": 2210100224.0,
+            "83": 2210100224.0,
+            "84": 2210100224.0,
+            "85": 2210100224.0,
+            "86": 2210100224.0,
+            "87": 2210100224.0,
+            "88": 2210100224.0,
+            "89": 2210100224.0,
+            "90": 2210100224.0,
+            "91": 2210100224.0,
+            "92": 2210100224.0,
+            "93": 2210100224.0,
+            "94": 2210100224.0,
+            "95": 2210100224.0,
+            "96": 2210100224.0,
+            "97": 2210100224.0,
+            "98": 2210100224.0,
+            "99": 2210100224.0,
+            "100": 2210100224.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 12.93568,
+            "2": 0.13825,
+            "3": 0.10934,
+            "4": 0.10452,
+            "5": 0.10497,
+            "6": 0.104,
+            "7": 0.10328,
+            "8": 0.10258,
+            "9": 0.10234,
+            "10": 0.10351,
+            "11": 0.10272,
+            "12": 0.10199,
+            "13": 0.10258,
+            "14": 0.1027,
+            "15": 0.10293,
+            "16": 0.10182,
+            "17": 0.10316,
+            "18": 0.10197,
+            "19": 0.10305,
+            "20": 0.10272,
+            "21": 0.11174,
+            "22": 0.10459,
+            "23": 0.10481,
+            "24": 0.10575,
+            "25": 0.10937,
+            "26": 0.10268,
+            "27": 0.10583,
+            "28": 0.10249,
+            "29": 0.10137,
+            "30": 0.10307,
+            "31": 0.10524,
+            "32": 0.10586,
+            "33": 0.1041,
+            "34": 0.10278,
+            "35": 0.10412,
+            "36": 0.10185,
+            "37": 0.10244,
+            "38": 0.10111,
+            "39": 0.10231,
+            "40": 0.10346,
+            "41": 0.10527,
+            "42": 0.10187,
+            "43": 0.10283,
+            "44": 0.10242,
+            "45": 0.10465,
+            "46": 0.10208,
+            "47": 0.10316,
+            "48": 0.10189,
+            "49": 0.10524,
+            "50": 0.10242,
+            "51": 0.10733,
+            "52": 0.10211,
+            "53": 0.10215,
+            "54": 0.10143,
+            "55": 0.10092,
+            "56": 0.10225,
+            "57": 0.1029,
+            "58": 0.10504,
+            "59": 0.10464,
+            "60": 0.10364,
+            "61": 0.10221,
+            "62": 0.10154,
+            "63": 0.10225,
+            "64": 0.1013,
+            "65": 0.10347,
+            "66": 0.10142,
+            "67": 0.102,
+            "68": 0.10339,
+            "69": 0.10291,
+            "70": 0.10294,
+            "71": 0.10164,
+            "72": 0.1026,
+            "73": 0.10225,
+            "74": 0.10241,
+            "75": 0.10146,
+            "76": 0.10155,
+            "77": 0.10259,
+            "78": 0.10243,
+            "79": 0.10169,
+            "80": 0.10195,
+            "81": 0.10134,
+            "82": 0.10222,
+            "83": 0.10368,
+            "84": 0.10065,
+            "85": 0.10117,
+            "86": 0.10158,
+            "87": 0.10243,
+            "88": 0.10233,
+            "89": 0.10157,
+            "90": 0.10229,
+            "91": 0.10188,
+            "92": 0.10172,
+            "93": 0.1013,
+            "94": 0.1011,
+            "95": 0.10202,
+            "96": 0.10173,
+            "97": 0.10128,
+            "98": 0.10222,
+            "99": 0.10127,
+            "100": 0.10148
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..d3d593b49c2
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.91349,
+            "2": 10.90719,
+            "3": 10.91328,
+            "4": 10.87838,
+            "5": 10.91769,
+            "6": 10.93821,
+            "7": 10.90469,
+            "8": 10.90393,
+            "9": 10.90876,
+            "10": 10.89645,
+            "11": 10.92562,
+            "12": 10.91891,
+            "13": 10.91537,
+            "14": 10.93343,
+            "15": 10.86115,
+            "16": 10.85374,
+            "17": 10.82717,
+            "18": 10.86544,
+            "19": 10.86225,
+            "20": 10.76737,
+            "21": 10.74634,
+            "22": 10.62228,
+            "23": 10.76122,
+            "24": 10.64732,
+            "25": 10.59597,
+            "26": 10.66352,
+            "27": 10.6542,
+            "28": 10.6077,
+            "29": 10.62581,
+            "30": 10.41591,
+            "31": 10.16855,
+            "32": 10.50267,
+            "33": 10.50304,
+            "34": 10.25481,
+            "35": 10.31879,
+            "36": 10.27167,
+            "37": 10.37751,
+            "38": 10.22122,
+            "39": 10.44798,
+            "40": 10.14166,
+            "41": 10.1771,
+            "42": 10.2426,
+            "43": 9.87148,
+            "44": 9.99875,
+            "45": 9.88702,
+            "46": 9.86139,
+            "47": 10.18144,
+            "48": 9.87873,
+            "49": 9.58706,
+            "50": 9.9542,
+            "51": 9.8866,
+            "52": 9.78429,
+            "53": 10.10842,
+            "54": 9.97368,
+            "55": 9.89803,
+            "56": 9.65427,
+            "57": 9.52013,
+            "58": 9.87297,
+            "59": 9.6132,
+            "60": 9.54967,
+            "61": 9.70681,
+            "62": 9.98533,
+            "63": 9.41357,
+            "64": 9.80966,
+            "65": 8.97052,
+            "66": 9.72773,
+            "67": 9.39183,
+            "68": 9.8084,
+            "69": 9.82052,
+            "70": 9.76655,
+            "71": 9.63414,
+            "72": 9.60485,
+            "73": 9.52299,
+            "74": 8.9718,
+            "75": 9.42321,
+            "76": 9.10113,
+            "77": 10.0716,
+            "78": 9.74266,
+            "79": 9.40343,
+            "80": 9.41333,
+            "81": 9.49931,
+            "82": 9.70236,
+            "83": 9.33436,
+            "84": 9.43774,
+            "85": 9.63924,
+            "86": 9.07931,
+            "87": 9.60447,
+            "88": 9.7824,
+            "89": 9.62386,
+            "90": 9.84241,
+            "91": 9.35506,
+            "92": 9.38398,
+            "93": 9.09747,
+            "94": 8.8471,
+            "95": 9.5314,
+            "96": 9.54263,
+            "97": 9.32886,
+            "98": 9.6926,
+            "99": 8.89976,
+            "100": 9.43124
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 22727424.0,
+            "2": 22925204.0,
+            "3": 22596900.0,
+            "4": 23219556.0,
+            "5": 22714624.0,
+            "6": 23021776.0,
+            "7": 22771632.0,
+            "8": 22926560.0,
+            "9": 22842156.0,
+            "10": 22918168.0,
+            "11": 22500688.0,
+            "12": 22459470.0,
+            "13": 22917228.0,
+            "14": 22387988.0,
+            "15": 22821732.0,
+            "16": 22830306.0,
+            "17": 22819520.0,
+            "18": 22582628.0,
+            "19": 22618028.0,
+            "20": 22693852.0,
+            "21": 22739344.0,
+            "22": 22799596.0,
+            "23": 22539016.0,
+            "24": 22770946.0,
+            "25": 22819324.0,
+            "26": 22547928.0,
+            "27": 22468716.0,
+            "28": 22453820.0,
+            "29": 22529898.0,
+            "30": 22631220.0,
+            "31": 22955420.0,
+            "32": 22585276.0,
+            "33": 22558602.0,
+            "34": 22835792.0,
+            "35": 22788208.0,
+            "36": 22589796.0,
+            "37": 22496928.0,
+            "38": 22896192.0,
+            "39": 22801858.0,
+            "40": 22657640.0,
+            "41": 22658982.0,
+            "42": 22667052.0,
+            "43": 22975816.0,
+            "44": 22747688.0,
+            "45": 22674846.0,
+            "46": 22884684.0,
+            "47": 22633708.0,
+            "48": 22928466.0,
+            "49": 22728092.0,
+            "50": 22905080.0,
+            "51": 22791108.0,
+            "52": 22748190.0,
+            "53": 22924900.0,
+            "54": 22840164.0,
+            "55": 22518344.0,
+            "56": 22877680.0,
+            "57": 23113944.0,
+            "58": 22846268.0,
+            "59": 22716084.0,
+            "60": 22742984.0,
+            "61": 22724584.0,
+            "62": 22672944.0,
+            "63": 22846388.0,
+            "64": 22823650.0,
+            "65": 23061058.0,
+            "66": 22729266.0,
+            "67": 22908888.0,
+            "68": 22610020.0,
+            "69": 22583826.0,
+            "70": 22829374.0,
+            "71": 22748240.0,
+            "72": 22654480.0,
+            "73": 22741180.0,
+            "74": 23047914.0,
+            "75": 23054396.0,
+            "76": 22900788.0,
+            "77": 22271588.0,
+            "78": 22789024.0,
+            "79": 22743632.0,
+            "80": 22706696.0,
+            "81": 22891372.0,
+            "82": 22777860.0,
+            "83": 22840532.0,
+            "84": 23010386.0,
+            "85": 22711212.0,
+            "86": 23103006.0,
+            "87": 22734564.0,
+            "88": 22637848.0,
+            "89": 22497850.0,
+            "90": 22972712.0,
+            "91": 22767188.0,
+            "92": 22808834.0,
+            "93": 22659304.0,
+            "94": 22911552.0,
+            "95": 23047794.0,
+            "96": 22829386.0,
+            "97": 22608168.0,
+            "98": 22762756.0,
+            "99": 22905900.0,
+            "100": 23015488.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 746443264.0,
+            "2": 746443264.0,
+            "3": 746443264.0,
+            "4": 746443264.0,
+            "5": 746443264.0,
+            "6": 746443264.0,
+            "7": 746443264.0,
+            "8": 746443264.0,
+            "9": 746443264.0,
+            "10": 746443264.0,
+            "11": 746443264.0,
+            "12": 746443264.0,
+            "13": 746443264.0,
+            "14": 746443264.0,
+            "15": 746443264.0,
+            "16": 746443264.0,
+            "17": 746443264.0,
+            "18": 746443264.0,
+            "19": 746443264.0,
+            "20": 746443264.0,
+            "21": 746443264.0,
+            "22": 746443264.0,
+            "23": 746443264.0,
+            "24": 746443264.0,
+            "25": 746443264.0,
+            "26": 746443264.0,
+            "27": 746443264.0,
+            "28": 746443264.0,
+            "29": 746443264.0,
+            "30": 746443264.0,
+            "31": 746443264.0,
+            "32": 746443264.0,
+            "33": 746443264.0,
+            "34": 746443264.0,
+            "35": 746443264.0,
+            "36": 746443264.0,
+            "37": 746443264.0,
+            "38": 746443264.0,
+            "39": 746443264.0,
+            "40": 746443264.0,
+            "41": 746443264.0,
+            "42": 746443264.0,
+            "43": 746443264.0,
+            "44": 746443264.0,
+            "45": 746443264.0,
+            "46": 746443264.0,
+            "47": 746443264.0,
+            "48": 746443264.0,
+            "49": 746443264.0,
+            "50": 746443264.0,
+            "51": 746443264.0,
+            "52": 746443264.0,
+            "53": 746443264.0,
+            "54": 746443264.0,
+            "55": 746443264.0,
+            "56": 746443264.0,
+            "57": 746443264.0,
+            "58": 746443264.0,
+            "59": 746443264.0,
+            "60": 746443264.0,
+            "61": 746443264.0,
+            "62": 746443264.0,
+            "63": 746443264.0,
+            "64": 746443264.0,
+            "65": 746443264.0,
+            "66": 746443264.0,
+            "67": 746443264.0,
+            "68": 746443264.0,
+            "69": 746443264.0,
+            "70": 746443264.0,
+            "71": 746443264.0,
+            "72": 746443264.0,
+            "73": 746443264.0,
+            "74": 746443264.0,
+            "75": 746443264.0,
+            "76": 746443264.0,
+            "77": 746443264.0,
+            "78": 746443264.0,
+            "79": 746443264.0,
+            "80": 746443264.0,
+            "81": 746443264.0,
+            "82": 746443264.0,
+            "83": 746443264.0,
+            "84": 746443264.0,
+            "85": 746443264.0,
+            "86": 746443264.0,
+            "87": 746443264.0,
+            "88": 746443264.0,
+            "89": 746443264.0,
+            "90": 746443264.0,
+            "91": 746443264.0,
+            "92": 746443264.0,
+            "93": 746443264.0,
+            "94": 746443264.0,
+            "95": 746443264.0,
+            "96": 746443264.0,
+            "97": 746443264.0,
+            "98": 746443264.0,
+            "99": 746443264.0,
+            "100": 746443264.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1926291456.0,
+            "2": 2210100224.0,
+            "3": 2210100224.0,
+            "4": 2210100224.0,
+            "5": 2210100224.0,
+            "6": 2210100224.0,
+            "7": 2210100224.0,
+            "8": 2210100224.0,
+            "9": 2210100224.0,
+            "10": 2210100224.0,
+            "11": 2210100224.0,
+            "12": 2210100224.0,
+            "13": 2210100224.0,
+            "14": 2210100224.0,
+            "15": 2210100224.0,
+            "16": 2210100224.0,
+            "17": 2210100224.0,
+            "18": 2210100224.0,
+            "19": 2210100224.0,
+            "20": 2210100224.0,
+            "21": 2210100224.0,
+            "22": 2210100224.0,
+            "23": 2210100224.0,
+            "24": 2210100224.0,
+            "25": 2210100224.0,
+            "26": 2210100224.0,
+            "27": 2210100224.0,
+            "28": 2210100224.0,
+            "29": 2210100224.0,
+            "30": 2210100224.0,
+            "31": 2210100224.0,
+            "32": 2210100224.0,
+            "33": 2210100224.0,
+            "34": 2210100224.0,
+            "35": 2210100224.0,
+            "36": 2210100224.0,
+            "37": 2210100224.0,
+            "38": 2210100224.0,
+            "39": 2210100224.0,
+            "40": 2210100224.0,
+            "41": 2210100224.0,
+            "42": 2210100224.0,
+            "43": 2210100224.0,
+            "44": 2210100224.0,
+            "45": 2210100224.0,
+            "46": 2210100224.0,
+            "47": 2210100224.0,
+            "48": 2210100224.0,
+            "49": 2210100224.0,
+            "50": 2210100224.0,
+            "51": 2210100224.0,
+            "52": 2210100224.0,
+            "53": 2210100224.0,
+            "54": 2210100224.0,
+            "55": 2210100224.0,
+            "56": 2210100224.0,
+            "57": 2210100224.0,
+            "58": 2210100224.0,
+            "59": 2210100224.0,
+            "60": 2210100224.0,
+            "61": 2210100224.0,
+            "62": 2210100224.0,
+            "63": 2210100224.0,
+            "64": 2210100224.0,
+            "65": 2210100224.0,
+            "66": 2210100224.0,
+            "67": 2210100224.0,
+            "68": 2210100224.0,
+            "69": 2210100224.0,
+            "70": 2210100224.0,
+            "71": 2210100224.0,
+            "72": 2210100224.0,
+            "73": 2210100224.0,
+            "74": 2210100224.0,
+            "75": 2210100224.0,
+            "76": 2210100224.0,
+            "77": 2210100224.0,
+            "78": 2210100224.0,
+            "79": 2210100224.0,
+            "80": 2210100224.0,
+            "81": 2210100224.0,
+            "82": 2210100224.0,
+            "83": 2210100224.0,
+            "84": 2210100224.0,
+            "85": 2210100224.0,
+            "86": 2210100224.0,
+            "87": 2210100224.0,
+            "88": 2210100224.0,
+            "89": 2210100224.0,
+            "90": 2210100224.0,
+            "91": 2210100224.0,
+            "92": 2210100224.0,
+            "93": 2210100224.0,
+            "94": 2210100224.0,
+            "95": 2210100224.0,
+            "96": 2210100224.0,
+            "97": 2210100224.0,
+            "98": 2210100224.0,
+            "99": 2210100224.0,
+            "100": 2210100224.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 14.49723,
+            "2": 0.13917,
+            "3": 0.12323,
+            "4": 0.12243,
+            "5": 0.12247,
+            "6": 0.12126,
+            "7": 0.12098,
+            "8": 0.1227,
+            "9": 0.12232,
+            "10": 0.12216,
+            "11": 0.12203,
+            "12": 0.12472,
+            "13": 0.11919,
+            "14": 0.12363,
+            "15": 0.11934,
+            "16": 0.12078,
+            "17": 0.1214,
+            "18": 0.12382,
+            "19": 0.11938,
+            "20": 0.11818,
+            "21": 0.1195,
+            "22": 0.1193,
+            "23": 0.11729,
+            "24": 0.11671,
+            "25": 0.11812,
+            "26": 0.11788,
+            "27": 0.11835,
+            "28": 0.11687,
+            "29": 0.11683,
+            "30": 0.1185,
+            "31": 0.11738,
+            "32": 0.11696,
+            "33": 0.11541,
+            "34": 0.11482,
+            "35": 0.11307,
+            "36": 0.11445,
+            "37": 0.11503,
+            "38": 0.11448,
+            "39": 0.11562,
+            "40": 0.11468,
+            "41": 0.11341,
+            "42": 0.11368,
+            "43": 0.11604,
+            "44": 0.11649,
+            "45": 0.11581,
+            "46": 0.11637,
+            "47": 0.11699,
+            "48": 0.11661,
+            "49": 0.11522,
+            "50": 0.11451,
+            "51": 0.12299,
+            "52": 0.11449,
+            "53": 0.11137,
+            "54": 0.11274,
+            "55": 0.1121,
+            "56": 0.11212,
+            "57": 0.11573,
+            "58": 0.11206,
+            "59": 0.11388,
+            "60": 0.11369,
+            "61": 0.11208,
+            "62": 0.11287,
+            "63": 0.11238,
+            "64": 0.11193,
+            "65": 0.11205,
+            "66": 0.11482,
+            "67": 0.1131,
+            "68": 0.11433,
+            "69": 0.11257,
+            "70": 0.1116,
+            "71": 0.11365,
+            "72": 0.11214,
+            "73": 0.11376,
+            "74": 0.11389,
+            "75": 0.11397,
+            "76": 0.11359,
+            "77": 0.11346,
+            "78": 0.11235,
+            "79": 0.11282,
+            "80": 0.11301,
+            "81": 0.11347,
+            "82": 0.11356,
+            "83": 0.11321,
+            "84": 0.11412,
+            "85": 0.11256,
+            "86": 0.11555,
+            "87": 0.11224,
+            "88": 0.11344,
+            "89": 0.11351,
+            "90": 0.11218,
+            "91": 0.11235,
+            "92": 0.11417,
+            "93": 0.11691,
+            "94": 0.11326,
+            "95": 0.11519,
+            "96": 0.11321,
+            "97": 0.11272,
+            "98": 0.11268,
+            "99": 0.11187,
+            "100": 0.11371
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
new file mode 100644
index 00000000000..0c4a176491d
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.90105,
+            "2": 10.89262,
+            "3": 10.90042,
+            "4": 10.88139,
+            "5": 10.89686,
+            "6": 10.91104,
+            "7": 10.90071,
+            "8": 10.88372,
+            "9": 10.89705,
+            "10": 10.88269,
+            "11": 10.91638,
+            "12": 10.88862,
+            "13": 10.89506,
+            "14": 10.90397,
+            "15": 10.83975,
+            "16": 10.84821,
+            "17": 10.83519,
+            "18": 10.83782,
+            "19": 10.83204,
+            "20": 10.74037,
+            "21": 10.70726,
+            "22": 10.5989,
+            "23": 10.72135,
+            "24": 10.60586,
+            "25": 10.57931,
+            "26": 10.63021,
+            "27": 10.62207,
+            "28": 10.57267,
+            "29": 10.60724,
+            "30": 10.37738,
+            "31": 10.15237,
+            "32": 10.47733,
+            "33": 10.48045,
+            "34": 10.24256,
+            "35": 10.29033,
+            "36": 10.26052,
+            "37": 10.36236,
+            "38": 10.2143,
+            "39": 10.44546,
+            "40": 10.1156,
+            "41": 10.15998,
+            "42": 10.23373,
+            "43": 9.85188,
+            "44": 9.97725,
+            "45": 9.85639,
+            "46": 9.83161,
+            "47": 10.17999,
+            "48": 9.85771,
+            "49": 9.54486,
+            "50": 9.93378,
+            "51": 9.86811,
+            "52": 9.76315,
+            "53": 10.10886,
+            "54": 9.95631,
+            "55": 9.87553,
+            "56": 9.64641,
+            "57": 9.49014,
+            "58": 9.85454,
+            "59": 9.59336,
+            "60": 9.528,
+            "61": 9.69542,
+            "62": 10.01688,
+            "63": 9.38936,
+            "64": 9.80315,
+            "65": 8.95041,
+            "66": 9.72761,
+            "67": 9.37481,
+            "68": 9.80513,
+            "69": 9.81015,
+            "70": 9.76634,
+            "71": 9.63164,
+            "72": 9.57894,
+            "73": 9.52071,
+            "74": 8.94946,
+            "75": 9.4304,
+            "76": 9.0845,
+            "77": 10.08945,
+            "78": 9.72783,
+            "79": 9.37638,
+            "80": 9.40916,
+            "81": 9.4973,
+            "82": 9.71293,
+            "83": 9.33328,
+            "84": 9.44016,
+            "85": 9.63365,
+            "86": 9.07079,
+            "87": 9.61271,
+            "88": 9.78341,
+            "89": 9.60939,
+            "90": 9.8516,
+            "91": 9.34566,
+            "92": 9.38259,
+            "93": 9.07364,
+            "94": 8.81745,
+            "95": 9.51874,
+            "96": 9.54064,
+            "97": 9.3403,
+            "98": 9.7014,
+            "99": 8.88889,
+            "100": 9.43257
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 22727086.0,
+            "2": 22925536.0,
+            "3": 22597166.0,
+            "4": 23219856.0,
+            "5": 22714736.0,
+            "6": 23021732.0,
+            "7": 22770914.0,
+            "8": 22927056.0,
+            "9": 22842296.0,
+            "10": 22918912.0,
+            "11": 22500920.0,
+            "12": 22460280.0,
+            "13": 22917408.0,
+            "14": 22388720.0,
+            "15": 22821334.0,
+            "16": 22830758.0,
+            "17": 22818604.0,
+            "18": 22581868.0,
+            "19": 22618000.0,
+            "20": 22694008.0,
+            "21": 22739396.0,
+            "22": 22800094.0,
+            "23": 22540104.0,
+            "24": 22771496.0,
+            "25": 22818912.0,
+            "26": 22547352.0,
+            "27": 22469568.0,
+            "28": 22453522.0,
+            "29": 22530096.0,
+            "30": 22631266.0,
+            "31": 22955564.0,
+            "32": 22585980.0,
+            "33": 22558174.0,
+            "34": 22835734.0,
+            "35": 22787944.0,
+            "36": 22590020.0,
+            "37": 22497168.0,
+            "38": 22896692.0,
+            "39": 22801708.0,
+            "40": 22658196.0,
+            "41": 22659512.0,
+            "42": 22667920.0,
+            "43": 22975524.0,
+            "44": 22746310.0,
+            "45": 22675296.0,
+            "46": 22884630.0,
+            "47": 22633552.0,
+            "48": 22929508.0,
+            "49": 22727314.0,
+            "50": 22904808.0,
+            "51": 22791580.0,
+            "52": 22748196.0,
+            "53": 22926080.0,
+            "54": 22839468.0,
+            "55": 22518754.0,
+            "56": 22877424.0,
+            "57": 23112764.0,
+            "58": 22845208.0,
+            "59": 22716140.0,
+            "60": 22743504.0,
+            "61": 22724840.0,
+            "62": 22672332.0,
+            "63": 22846080.0,
+            "64": 22823362.0,
+            "65": 23060460.0,
+            "66": 22729572.0,
+            "67": 22907836.0,
+            "68": 22610520.0,
+            "69": 22584436.0,
+            "70": 22829772.0,
+            "71": 22749364.0,
+            "72": 22653792.0,
+            "73": 22740804.0,
+            "74": 23047852.0,
+            "75": 23054048.0,
+            "76": 22901336.0,
+            "77": 22271880.0,
+            "78": 22789702.0,
+            "79": 22743626.0,
+            "80": 22706308.0,
+            "81": 22891444.0,
+            "82": 22776950.0,
+            "83": 22839442.0,
+            "84": 23010112.0,
+            "85": 22712054.0,
+            "86": 23103248.0,
+            "87": 22735596.0,
+            "88": 22636964.0,
+            "89": 22499088.0,
+            "90": 22972128.0,
+            "91": 22767228.0,
+            "92": 22810212.0,
+            "93": 22659490.0,
+            "94": 22911654.0,
+            "95": 23048144.0,
+            "96": 22828752.0,
+            "97": 22608416.0,
+            "98": 22762932.0,
+            "99": 22906240.0,
+            "100": 23015824.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 717082624.0,
+            "2": 717082624.0,
+            "3": 717082624.0,
+            "4": 717082624.0,
+            "5": 717082624.0,
+            "6": 717082624.0,
+            "7": 717082624.0,
+            "8": 717082624.0,
+            "9": 717082624.0,
+            "10": 717082624.0,
+            "11": 717082624.0,
+            "12": 717082624.0,
+            "13": 717082624.0,
+            "14": 717082624.0,
+            "15": 717082624.0,
+            "16": 717082624.0,
+            "17": 717082624.0,
+            "18": 717082624.0,
+            "19": 717082624.0,
+            "20": 717082624.0,
+            "21": 717082624.0,
+            "22": 717082624.0,
+            "23": 717082624.0,
+            "24": 717082624.0,
+            "25": 717082624.0,
+            "26": 717082624.0,
+            "27": 717082624.0,
+            "28": 717082624.0,
+            "29": 717082624.0,
+            "30": 717082624.0,
+            "31": 717082624.0,
+            "32": 717082624.0,
+            "33": 717082624.0,
+            "34": 717082624.0,
+            "35": 717082624.0,
+            "36": 717082624.0,
+            "37": 717082624.0,
+            "38": 717082624.0,
+            "39": 717082624.0,
+            "40": 717082624.0,
+            "41": 717082624.0,
+            "42": 717082624.0,
+            "43": 717082624.0,
+            "44": 717082624.0,
+            "45": 717082624.0,
+            "46": 717082624.0,
+            "47": 717082624.0,
+            "48": 717082624.0,
+            "49": 717082624.0,
+            "50": 717082624.0,
+            "51": 717082624.0,
+            "52": 717082624.0,
+            "53": 717082624.0,
+            "54": 717082624.0,
+            "55": 717082624.0,
+            "56": 717082624.0,
+            "57": 717082624.0,
+            "58": 717082624.0,
+            "59": 717082624.0,
+            "60": 717082624.0,
+            "61": 717082624.0,
+            "62": 717082624.0,
+            "63": 717082624.0,
+            "64": 717082624.0,
+            "65": 717082624.0,
+            "66": 717082624.0,
+            "67": 717082624.0,
+            "68": 717082624.0,
+            "69": 717082624.0,
+            "70": 717082624.0,
+            "71": 717082624.0,
+            "72": 717082624.0,
+            "73": 717082624.0,
+            "74": 717082624.0,
+            "75": 717082624.0,
+            "76": 717082624.0,
+            "77": 717082624.0,
+            "78": 717082624.0,
+            "79": 717082624.0,
+            "80": 717082624.0,
+            "81": 717082624.0,
+            "82": 717082624.0,
+            "83": 717082624.0,
+            "84": 717082624.0,
+            "85": 717082624.0,
+            "86": 717082624.0,
+            "87": 717082624.0,
+            "88": 717082624.0,
+            "89": 717082624.0,
+            "90": 717082624.0,
+            "91": 717082624.0,
+            "92": 717082624.0,
+            "93": 717082624.0,
+            "94": 717082624.0,
+            "95": 717082624.0,
+            "96": 717082624.0,
+            "97": 717082624.0,
+            "98": 717082624.0,
+            "99": 717082624.0,
+            "100": 717082624.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2399852544.0,
+            "2": 2683661312.0,
+            "3": 2683661312.0,
+            "4": 2683661312.0,
+            "5": 2683661312.0,
+            "6": 2683661312.0,
+            "7": 2683661312.0,
+            "8": 2683661312.0,
+            "9": 2683661312.0,
+            "10": 2683661312.0,
+            "11": 2683661312.0,
+            "12": 2683661312.0,
+            "13": 2683661312.0,
+            "14": 2683661312.0,
+            "15": 2683661312.0,
+            "16": 2683661312.0,
+            "17": 2683661312.0,
+            "18": 2683661312.0,
+            "19": 2683661312.0,
+            "20": 2683661312.0,
+            "21": 2683661312.0,
+            "22": 2683661312.0,
+            "23": 2683661312.0,
+            "24": 2683661312.0,
+            "25": 2683661312.0,
+            "26": 2683661312.0,
+            "27": 2683661312.0,
+            "28": 2683661312.0,
+            "29": 2683661312.0,
+            "30": 2683661312.0,
+            "31": 2683661312.0,
+            "32": 2683661312.0,
+            "33": 2683661312.0,
+            "34": 2683661312.0,
+            "35": 2683661312.0,
+            "36": 2683661312.0,
+            "37": 2683661312.0,
+            "38": 2683661312.0,
+            "39": 2683661312.0,
+            "40": 2683661312.0,
+            "41": 2683661312.0,
+            "42": 2683661312.0,
+            "43": 2683661312.0,
+            "44": 2683661312.0,
+            "45": 2683661312.0,
+            "46": 2683661312.0,
+            "47": 2683661312.0,
+            "48": 2683661312.0,
+            "49": 2683661312.0,
+            "50": 2683661312.0,
+            "51": 2683661312.0,
+            "52": 2683661312.0,
+            "53": 2683661312.0,
+            "54": 2683661312.0,
+            "55": 2683661312.0,
+            "56": 2683661312.0,
+            "57": 2683661312.0,
+            "58": 2683661312.0,
+            "59": 2683661312.0,
+            "60": 2683661312.0,
+            "61": 2683661312.0,
+            "62": 2683661312.0,
+            "63": 2683661312.0,
+            "64": 2683661312.0,
+            "65": 2683661312.0,
+            "66": 2683661312.0,
+            "67": 2683661312.0,
+            "68": 2683661312.0,
+            "69": 2683661312.0,
+            "70": 2683661312.0,
+            "71": 2683661312.0,
+            "72": 2683661312.0,
+            "73": 2683661312.0,
+            "74": 2683661312.0,
+            "75": 2683661312.0,
+            "76": 2683661312.0,
+            "77": 2683661312.0,
+            "78": 2683661312.0,
+            "79": 2683661312.0,
+            "80": 2683661312.0,
+            "81": 2683661312.0,
+            "82": 2683661312.0,
+            "83": 2683661312.0,
+            "84": 2683661312.0,
+            "85": 2683661312.0,
+            "86": 2683661312.0,
+            "87": 2683661312.0,
+            "88": 2683661312.0,
+            "89": 2683661312.0,
+            "90": 2683661312.0,
+            "91": 2683661312.0,
+            "92": 2683661312.0,
+            "93": 2683661312.0,
+            "94": 2683661312.0,
+            "95": 2683661312.0,
+            "96": 2683661312.0,
+            "97": 2683661312.0,
+            "98": 2683661312.0,
+            "99": 2683661312.0,
+            "100": 2683661312.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 16.63764,
+            "2": 0.21125,
+            "3": 0.18805,
+            "4": 0.18329,
+            "5": 0.1823,
+            "6": 0.18232,
+            "7": 0.18144,
+            "8": 0.18027,
+            "9": 0.17969,
+            "10": 0.18238,
+            "11": 0.18028,
+            "12": 0.36174,
+            "13": 0.18167,
+            "14": 0.1837,
+            "15": 0.18267,
+            "16": 0.18257,
+            "17": 0.18024,
+            "18": 0.18275,
+            "19": 0.1832,
+            "20": 0.17831,
+            "21": 0.18017,
+            "22": 0.18109,
+            "23": 0.17885,
+            "24": 0.18267,
+            "25": 0.18058,
+            "26": 0.1773,
+            "27": 0.1794,
+            "28": 0.17907,
+            "29": 0.18081,
+            "30": 0.17905,
+            "31": 0.17854,
+            "32": 0.17894,
+            "33": 0.17849,
+            "34": 0.17658,
+            "35": 0.17776,
+            "36": 0.17727,
+            "37": 0.17642,
+            "38": 0.17777,
+            "39": 0.17803,
+            "40": 0.17642,
+            "41": 0.17693,
+            "42": 0.17625,
+            "43": 0.17866,
+            "44": 0.17762,
+            "45": 0.17754,
+            "46": 0.17702,
+            "47": 0.17711,
+            "48": 0.17758,
+            "49": 0.17715,
+            "50": 0.17757,
+            "51": 0.18445,
+            "52": 0.1799,
+            "53": 0.18208,
+            "54": 0.17612,
+            "55": 0.17944,
+            "56": 0.17873,
+            "57": 0.18258,
+            "58": 0.17483,
+            "59": 0.17477,
+            "60": 0.17433,
+            "61": 0.17366,
+            "62": 0.44447,
+            "63": 0.17665,
+            "64": 0.17466,
+            "65": 0.17524,
+            "66": 0.17467,
+            "67": 0.17584,
+            "68": 0.17461,
+            "69": 0.17423,
+            "70": 0.1742,
+            "71": 0.1735,
+            "72": 0.17461,
+            "73": 0.17526,
+            "74": 0.17447,
+            "75": 0.17297,
+            "76": 0.17355,
+            "77": 0.17305,
+            "78": 0.17366,
+            "79": 0.17341,
+            "80": 0.17382,
+            "81": 0.17396,
+            "82": 0.17489,
+            "83": 0.17464,
+            "84": 0.17401,
+            "85": 0.17498,
+            "86": 0.17379,
+            "87": 0.1725,
+            "88": 0.17312,
+            "89": 0.17427,
+            "90": 0.17333,
+            "91": 0.1738,
+            "92": 0.1743,
+            "93": 0.1732,
+            "94": 0.1739,
+            "95": 0.17949,
+            "96": 0.17499,
+            "97": 0.17375,
+            "98": 0.17377,
+            "99": 0.17343,
+            "100": 0.17383
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
new file mode 100644
index 00000000000..0fb0b846d53
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.90105,
+            "2": 10.89262,
+            "3": 10.90042,
+            "4": 10.88139,
+            "5": 10.89686,
+            "6": 10.91104,
+            "7": 10.90071,
+            "8": 10.88372,
+            "9": 10.89705,
+            "10": 10.88269,
+            "11": 10.91638,
+            "12": 10.88862,
+            "13": 10.89506,
+            "14": 10.90397,
+            "15": 10.83975,
+            "16": 10.84821,
+            "17": 10.83519,
+            "18": 10.83782,
+            "19": 10.83204,
+            "20": 10.74037,
+            "21": 10.70726,
+            "22": 10.5989,
+            "23": 10.72135,
+            "24": 10.60586,
+            "25": 10.57931,
+            "26": 10.63021,
+            "27": 10.62207,
+            "28": 10.57267,
+            "29": 10.60724,
+            "30": 10.37738,
+            "31": 10.15237,
+            "32": 10.47733,
+            "33": 10.48045,
+            "34": 10.24256,
+            "35": 10.29033,
+            "36": 10.26052,
+            "37": 10.36236,
+            "38": 10.2143,
+            "39": 10.44546,
+            "40": 10.1156,
+            "41": 10.15998,
+            "42": 10.23373,
+            "43": 9.85188,
+            "44": 9.97725,
+            "45": 9.85639,
+            "46": 9.83161,
+            "47": 10.17999,
+            "48": 9.85771,
+            "49": 9.54486,
+            "50": 9.93378,
+            "51": 9.86811,
+            "52": 9.76315,
+            "53": 10.10886,
+            "54": 9.95631,
+            "55": 9.87553,
+            "56": 9.64641,
+            "57": 9.49014,
+            "58": 9.85454,
+            "59": 9.59336,
+            "60": 9.528,
+            "61": 9.69542,
+            "62": 10.01688,
+            "63": 9.38936,
+            "64": 9.80315,
+            "65": 8.95041,
+            "66": 9.72761,
+            "67": 9.37481,
+            "68": 9.80513,
+            "69": 9.81015,
+            "70": 9.76634,
+            "71": 9.63164,
+            "72": 9.57894,
+            "73": 9.52071,
+            "74": 8.94946,
+            "75": 9.4304,
+            "76": 9.0845,
+            "77": 10.08945,
+            "78": 9.72783,
+            "79": 9.37638,
+            "80": 9.40916,
+            "81": 9.4973,
+            "82": 9.71293,
+            "83": 9.33328,
+            "84": 9.44016,
+            "85": 9.63365,
+            "86": 9.07079,
+            "87": 9.61271,
+            "88": 9.78341,
+            "89": 9.60939,
+            "90": 9.8516,
+            "91": 9.34566,
+            "92": 9.38259,
+            "93": 9.07364,
+            "94": 8.81745,
+            "95": 9.51874,
+            "96": 9.54064,
+            "97": 9.3403,
+            "98": 9.7014,
+            "99": 8.88889,
+            "100": 9.43257
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 22727086.0,
+            "2": 22925536.0,
+            "3": 22597166.0,
+            "4": 23219856.0,
+            "5": 22714736.0,
+            "6": 23021732.0,
+            "7": 22770914.0,
+            "8": 22927056.0,
+            "9": 22842296.0,
+            "10": 22918912.0,
+            "11": 22500920.0,
+            "12": 22460280.0,
+            "13": 22917408.0,
+            "14": 22388720.0,
+            "15": 22821334.0,
+            "16": 22830758.0,
+            "17": 22818604.0,
+            "18": 22581868.0,
+            "19": 22618000.0,
+            "20": 22694008.0,
+            "21": 22739396.0,
+            "22": 22800094.0,
+            "23": 22540104.0,
+            "24": 22771496.0,
+            "25": 22818912.0,
+            "26": 22547352.0,
+            "27": 22469568.0,
+            "28": 22453522.0,
+            "29": 22530096.0,
+            "30": 22631266.0,
+            "31": 22955564.0,
+            "32": 22585980.0,
+            "33": 22558174.0,
+            "34": 22835734.0,
+            "35": 22787944.0,
+            "36": 22590020.0,
+            "37": 22497168.0,
+            "38": 22896692.0,
+            "39": 22801708.0,
+            "40": 22658196.0,
+            "41": 22659512.0,
+            "42": 22667920.0,
+            "43": 22975524.0,
+            "44": 22746310.0,
+            "45": 22675296.0,
+            "46": 22884630.0,
+            "47": 22633552.0,
+            "48": 22929508.0,
+            "49": 22727314.0,
+            "50": 22904808.0,
+            "51": 22791580.0,
+            "52": 22748196.0,
+            "53": 22926080.0,
+            "54": 22839468.0,
+            "55": 22518754.0,
+            "56": 22877424.0,
+            "57": 23112764.0,
+            "58": 22845208.0,
+            "59": 22716140.0,
+            "60": 22743504.0,
+            "61": 22724840.0,
+            "62": 22672332.0,
+            "63": 22846080.0,
+            "64": 22823362.0,
+            "65": 23060460.0,
+            "66": 22729572.0,
+            "67": 22907836.0,
+            "68": 22610520.0,
+            "69": 22584436.0,
+            "70": 22829772.0,
+            "71": 22749364.0,
+            "72": 22653792.0,
+            "73": 22740804.0,
+            "74": 23047852.0,
+            "75": 23054048.0,
+            "76": 22901336.0,
+            "77": 22271880.0,
+            "78": 22789702.0,
+            "79": 22743626.0,
+            "80": 22706308.0,
+            "81": 22891444.0,
+            "82": 22776950.0,
+            "83": 22839442.0,
+            "84": 23010112.0,
+            "85": 22712054.0,
+            "86": 23103248.0,
+            "87": 22735596.0,
+            "88": 22636964.0,
+            "89": 22499088.0,
+            "90": 22972128.0,
+            "91": 22767228.0,
+            "92": 22810212.0,
+            "93": 22659490.0,
+            "94": 22911654.0,
+            "95": 23048144.0,
+            "96": 22828752.0,
+            "97": 22608416.0,
+            "98": 22762932.0,
+            "99": 22906240.0,
+            "100": 23015824.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 717082624.0,
+            "2": 717082624.0,
+            "3": 717082624.0,
+            "4": 717082624.0,
+            "5": 717082624.0,
+            "6": 717082624.0,
+            "7": 717082624.0,
+            "8": 717082624.0,
+            "9": 717082624.0,
+            "10": 717082624.0,
+            "11": 717082624.0,
+            "12": 717082624.0,
+            "13": 717082624.0,
+            "14": 717082624.0,
+            "15": 717082624.0,
+            "16": 717082624.0,
+            "17": 717082624.0,
+            "18": 717082624.0,
+            "19": 717082624.0,
+            "20": 717082624.0,
+            "21": 717082624.0,
+            "22": 717082624.0,
+            "23": 717082624.0,
+            "24": 717082624.0,
+            "25": 717082624.0,
+            "26": 717082624.0,
+            "27": 717082624.0,
+            "28": 717082624.0,
+            "29": 717082624.0,
+            "30": 717082624.0,
+            "31": 717082624.0,
+            "32": 717082624.0,
+            "33": 717082624.0,
+            "34": 717082624.0,
+            "35": 717082624.0,
+            "36": 717082624.0,
+            "37": 717082624.0,
+            "38": 717082624.0,
+            "39": 717082624.0,
+            "40": 717082624.0,
+            "41": 717082624.0,
+            "42": 717082624.0,
+            "43": 717082624.0,
+            "44": 717082624.0,
+            "45": 717082624.0,
+            "46": 717082624.0,
+            "47": 717082624.0,
+            "48": 717082624.0,
+            "49": 717082624.0,
+            "50": 717082624.0,
+            "51": 717082624.0,
+            "52": 717082624.0,
+            "53": 717082624.0,
+            "54": 717082624.0,
+            "55": 717082624.0,
+            "56": 717082624.0,
+            "57": 717082624.0,
+            "58": 717082624.0,
+            "59": 717082624.0,
+            "60": 717082624.0,
+            "61": 717082624.0,
+            "62": 717082624.0,
+            "63": 717082624.0,
+            "64": 717082624.0,
+            "65": 717082624.0,
+            "66": 717082624.0,
+            "67": 717082624.0,
+            "68": 717082624.0,
+            "69": 717082624.0,
+            "70": 717082624.0,
+            "71": 717082624.0,
+            "72": 717082624.0,
+            "73": 717082624.0,
+            "74": 717082624.0,
+            "75": 717082624.0,
+            "76": 717082624.0,
+            "77": 717082624.0,
+            "78": 717082624.0,
+            "79": 717082624.0,
+            "80": 717082624.0,
+            "81": 717082624.0,
+            "82": 717082624.0,
+            "83": 717082624.0,
+            "84": 717082624.0,
+            "85": 717082624.0,
+            "86": 717082624.0,
+            "87": 717082624.0,
+            "88": 717082624.0,
+            "89": 717082624.0,
+            "90": 717082624.0,
+            "91": 717082624.0,
+            "92": 717082624.0,
+            "93": 717082624.0,
+            "94": 717082624.0,
+            "95": 717082624.0,
+            "96": 717082624.0,
+            "97": 717082624.0,
+            "98": 717082624.0,
+            "99": 717082624.0,
+            "100": 717082624.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2399852544.0,
+            "2": 2683661312.0,
+            "3": 2683661312.0,
+            "4": 2683661312.0,
+            "5": 2683661312.0,
+            "6": 2683661312.0,
+            "7": 2683661312.0,
+            "8": 2683661312.0,
+            "9": 2683661312.0,
+            "10": 2683661312.0,
+            "11": 2683661312.0,
+            "12": 2683661312.0,
+            "13": 2683661312.0,
+            "14": 2683661312.0,
+            "15": 2683661312.0,
+            "16": 2683661312.0,
+            "17": 2683661312.0,
+            "18": 2683661312.0,
+            "19": 2683661312.0,
+            "20": 2683661312.0,
+            "21": 2683661312.0,
+            "22": 2683661312.0,
+            "23": 2683661312.0,
+            "24": 2683661312.0,
+            "25": 2683661312.0,
+            "26": 2683661312.0,
+            "27": 2683661312.0,
+            "28": 2683661312.0,
+            "29": 2683661312.0,
+            "30": 2683661312.0,
+            "31": 2683661312.0,
+            "32": 2683661312.0,
+            "33": 2683661312.0,
+            "34": 2683661312.0,
+            "35": 2683661312.0,
+            "36": 2683661312.0,
+            "37": 2683661312.0,
+            "38": 2683661312.0,
+            "39": 2683661312.0,
+            "40": 2683661312.0,
+            "41": 2683661312.0,
+            "42": 2683661312.0,
+            "43": 2683661312.0,
+            "44": 2683661312.0,
+            "45": 2683661312.0,
+            "46": 2683661312.0,
+            "47": 2683661312.0,
+            "48": 2683661312.0,
+            "49": 2683661312.0,
+            "50": 2683661312.0,
+            "51": 2683661312.0,
+            "52": 2683661312.0,
+            "53": 2683661312.0,
+            "54": 2683661312.0,
+            "55": 2683661312.0,
+            "56": 2683661312.0,
+            "57": 2683661312.0,
+            "58": 2683661312.0,
+            "59": 2683661312.0,
+            "60": 2683661312.0,
+            "61": 2683661312.0,
+            "62": 2683661312.0,
+            "63": 2683661312.0,
+            "64": 2683661312.0,
+            "65": 2683661312.0,
+            "66": 2683661312.0,
+            "67": 2683661312.0,
+            "68": 2683661312.0,
+            "69": 2683661312.0,
+            "70": 2683661312.0,
+            "71": 2683661312.0,
+            "72": 2683661312.0,
+            "73": 2683661312.0,
+            "74": 2683661312.0,
+            "75": 2683661312.0,
+            "76": 2683661312.0,
+            "77": 2683661312.0,
+            "78": 2683661312.0,
+            "79": 2683661312.0,
+            "80": 2683661312.0,
+            "81": 2683661312.0,
+            "82": 2683661312.0,
+            "83": 2683661312.0,
+            "84": 2683661312.0,
+            "85": 2683661312.0,
+            "86": 2683661312.0,
+            "87": 2683661312.0,
+            "88": 2683661312.0,
+            "89": 2683661312.0,
+            "90": 2683661312.0,
+            "91": 2683661312.0,
+            "92": 2683661312.0,
+            "93": 2683661312.0,
+            "94": 2683661312.0,
+            "95": 2683661312.0,
+            "96": 2683661312.0,
+            "97": 2683661312.0,
+            "98": 2683661312.0,
+            "99": 2683661312.0,
+            "100": 2683661312.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 19.93377,
+            "2": 0.214,
+            "3": 0.18589,
+            "4": 0.17894,
+            "5": 0.1785,
+            "6": 0.17891,
+            "7": 0.18156,
+            "8": 0.18079,
+            "9": 0.17824,
+            "10": 0.17989,
+            "11": 0.17805,
+            "12": 0.17716,
+            "13": 0.17836,
+            "14": 0.17787,
+            "15": 0.17769,
+            "16": 0.17666,
+            "17": 0.17653,
+            "18": 0.1758,
+            "19": 0.17562,
+            "20": 0.1768,
+            "21": 0.1768,
+            "22": 0.17624,
+            "23": 0.17472,
+            "24": 0.17432,
+            "25": 0.1736,
+            "26": 0.1746,
+            "27": 0.17474,
+            "28": 0.17601,
+            "29": 0.17807,
+            "30": 0.17493,
+            "31": 0.17335,
+            "32": 0.17319,
+            "33": 0.17268,
+            "34": 0.17305,
+            "35": 0.17412,
+            "36": 0.17335,
+            "37": 0.17266,
+            "38": 0.17413,
+            "39": 0.17304,
+            "40": 0.17432,
+            "41": 0.17519,
+            "42": 0.17337,
+            "43": 0.17392,
+            "44": 0.17265,
+            "45": 0.17279,
+            "46": 0.17548,
+            "47": 0.17651,
+            "48": 0.17389,
+            "49": 0.17631,
+            "50": 0.17232,
+            "51": 0.18407,
+            "52": 0.17581,
+            "53": 0.37263,
+            "54": 0.17452,
+            "55": 0.17442,
+            "56": 0.1745,
+            "57": 0.17483,
+            "58": 0.17583,
+            "59": 0.17494,
+            "60": 0.17407,
+            "61": 0.17423,
+            "62": 0.17441,
+            "63": 0.17659,
+            "64": 0.17537,
+            "65": 0.17556,
+            "66": 0.3524,
+            "67": 0.17531,
+            "68": 0.17588,
+            "69": 0.17592,
+            "70": 0.17431,
+            "71": 0.17395,
+            "72": 0.17604,
+            "73": 0.17728,
+            "74": 0.17752,
+            "75": 0.1758,
+            "76": 0.17612,
+            "77": 0.17411,
+            "78": 0.17662,
+            "79": 0.17605,
+            "80": 0.17671,
+            "81": 0.17596,
+            "82": 0.1766,
+            "83": 0.17666,
+            "84": 0.17679,
+            "85": 0.17653,
+            "86": 0.17635,
+            "87": 0.17598,
+            "88": 0.17546,
+            "89": 0.17602,
+            "90": 0.17567,
+            "91": 0.17695,
+            "92": 0.17831,
+            "93": 0.17683,
+            "94": 0.17578,
+            "95": 0.17724,
+            "96": 0.17805,
+            "97": 0.17524,
+            "98": 0.17706,
+            "99": 0.1768,
+            "100": 0.17633
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
index 0568628b7b7..9ec4370d823 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
@@ -1 +1,287 @@
-{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.84523, "5": 10.87428, "10": 10.82858, "15": 10.81926, "20": 10.72749, "25": 10.55195, "30": 10.36504, "35": 10.27845, "40": 10.09773, "45": 9.84203, "50": 9.91254}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1725.0, "5": 1834.0, "10": 1478.0, "15": 1891.0, "20": 1639.0, "25": 1623.0, "30": 1882.0, "35": 2043.0, "40": 2168.0, "45": 2159.0, "50": 2319.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 763220480.0, "5": 763220480.0, "10": 763220480.0, "15": 763220480.0, "20": 763220480.0, "25": 763220480.0, "30": 763220480.0, "35": 763220480.0, "40": 763220480.0, "45": 763220480.0, "50": 763220480.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 3868255744.0, "5": 4152064512.0, "10": 4152064512.0, "15": 4152064512.0, "20": 4152064512.0, "25": 4152064512.0, "30": 4152064512.0, "35": 4152064512.0, "40": 4152064512.0, "45": 4152064512.0, "50": 4152064512.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 13.37152, "5": 0.10735, "10": 0.10615, "15": 0.10727, "20": 0.10475, "25": 0.10789, "30": 0.10639, "35": 0.1051, "40": 0.10657, "45": 0.10582, "50": 0.1069}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.84523,
+            "2": 10.85412,
+            "3": 10.85365,
+            "4": 10.83867,
+            "5": 10.87428,
+            "6": 10.89334,
+            "7": 10.8541,
+            "8": 10.86232,
+            "9": 10.86355,
+            "10": 10.82858,
+            "11": 10.88772,
+            "12": 10.87148,
+            "13": 10.87939,
+            "14": 10.89122,
+            "15": 10.81926,
+            "16": 10.83064,
+            "17": 10.79873,
+            "18": 10.81769,
+            "19": 10.8196,
+            "20": 10.72749,
+            "21": 10.70555,
+            "22": 10.56395,
+            "23": 10.7282,
+            "24": 10.60841,
+            "25": 10.55195,
+            "26": 10.60869,
+            "27": 10.62878,
+            "28": 10.5827,
+            "29": 10.59984,
+            "30": 10.36504,
+            "31": 10.12095,
+            "32": 10.47626,
+            "33": 10.46908,
+            "34": 10.22325,
+            "35": 10.27845,
+            "36": 10.22879,
+            "37": 10.35946,
+            "38": 10.19333,
+            "39": 10.41585,
+            "40": 10.09773,
+            "41": 10.15714,
+            "42": 10.22441,
+            "43": 9.8328,
+            "44": 9.96934,
+            "45": 9.84203,
+            "46": 9.83023,
+            "47": 10.15603,
+            "48": 9.85506,
+            "49": 9.54051,
+            "50": 9.91254
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1725.0,
+            "2": 1664.0,
+            "3": 1710.0,
+            "4": 1712.0,
+            "5": 1834.0,
+            "6": 1743.0,
+            "7": 1803.0,
+            "8": 1744.0,
+            "9": 1770.0,
+            "10": 1478.0,
+            "11": 1879.0,
+            "12": 1696.0,
+            "13": 1952.0,
+            "14": 1732.0,
+            "15": 1891.0,
+            "16": 1872.0,
+            "17": 1737.0,
+            "18": 1744.0,
+            "19": 1843.0,
+            "20": 1639.0,
+            "21": 1817.0,
+            "22": 1615.0,
+            "23": 1960.0,
+            "24": 1646.0,
+            "25": 1623.0,
+            "26": 1671.0,
+            "27": 1841.0,
+            "28": 2009.0,
+            "29": 1956.0,
+            "30": 1882.0,
+            "31": 1597.0,
+            "32": 1921.0,
+            "33": 2114.0,
+            "34": 1828.0,
+            "35": 2043.0,
+            "36": 1947.0,
+            "37": 2338.0,
+            "38": 2227.0,
+            "39": 2346.0,
+            "40": 2168.0,
+            "41": 2204.0,
+            "42": 2247.0,
+            "43": 2078.0,
+            "44": 2064.0,
+            "45": 2159.0,
+            "46": 2489.0,
+            "47": 2497.0,
+            "48": 2305.0,
+            "49": 2272.0,
+            "50": 2319.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 759681536.0,
+            "2": 759681536.0,
+            "3": 759681536.0,
+            "4": 759681536.0,
+            "5": 759681536.0,
+            "6": 759681536.0,
+            "7": 759681536.0,
+            "8": 759681536.0,
+            "9": 759681536.0,
+            "10": 759681536.0,
+            "11": 759681536.0,
+            "12": 759681536.0,
+            "13": 759681536.0,
+            "14": 759681536.0,
+            "15": 759681536.0,
+            "16": 759681536.0,
+            "17": 759681536.0,
+            "18": 759681536.0,
+            "19": 759681536.0,
+            "20": 759681536.0,
+            "21": 759681536.0,
+            "22": 759681536.0,
+            "23": 759681536.0,
+            "24": 759681536.0,
+            "25": 759681536.0,
+            "26": 759681536.0,
+            "27": 759681536.0,
+            "28": 759681536.0,
+            "29": 759681536.0,
+            "30": 759681536.0,
+            "31": 759681536.0,
+            "32": 759681536.0,
+            "33": 759681536.0,
+            "34": 759681536.0,
+            "35": 759681536.0,
+            "36": 759681536.0,
+            "37": 759681536.0,
+            "38": 759681536.0,
+            "39": 759681536.0,
+            "40": 759681536.0,
+            "41": 759681536.0,
+            "42": 759681536.0,
+            "43": 759681536.0,
+            "44": 759681536.0,
+            "45": 759681536.0,
+            "46": 759681536.0,
+            "47": 759681536.0,
+            "48": 759681536.0,
+            "49": 759681536.0,
+            "50": 759681536.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 3866813952.0,
+            "2": 4148525568.0,
+            "3": 4148525568.0,
+            "4": 4148525568.0,
+            "5": 4148525568.0,
+            "6": 4148525568.0,
+            "7": 4148525568.0,
+            "8": 4148525568.0,
+            "9": 4148525568.0,
+            "10": 4148525568.0,
+            "11": 4148525568.0,
+            "12": 4148525568.0,
+            "13": 4148525568.0,
+            "14": 4148525568.0,
+            "15": 4148525568.0,
+            "16": 4148525568.0,
+            "17": 4148525568.0,
+            "18": 4148525568.0,
+            "19": 4148525568.0,
+            "20": 4148525568.0,
+            "21": 4148525568.0,
+            "22": 4148525568.0,
+            "23": 4148525568.0,
+            "24": 4148525568.0,
+            "25": 4148525568.0,
+            "26": 4148525568.0,
+            "27": 4148525568.0,
+            "28": 4148525568.0,
+            "29": 4148525568.0,
+            "30": 4148525568.0,
+            "31": 4148525568.0,
+            "32": 4148525568.0,
+            "33": 4148525568.0,
+            "34": 4148525568.0,
+            "35": 4148525568.0,
+            "36": 4148525568.0,
+            "37": 4148525568.0,
+            "38": 4148525568.0,
+            "39": 4148525568.0,
+            "40": 4148525568.0,
+            "41": 4148525568.0,
+            "42": 4148525568.0,
+            "43": 4148525568.0,
+            "44": 4148525568.0,
+            "45": 4148525568.0,
+            "46": 4148525568.0,
+            "47": 4148525568.0,
+            "48": 4148525568.0,
+            "49": 4148525568.0,
+            "50": 4148525568.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 12.73497,
+            "2": 0.13463,
+            "3": 0.12132,
+            "4": 0.12121,
+            "5": 0.12122,
+            "6": 0.11968,
+            "7": 0.12077,
+            "8": 0.12029,
+            "9": 0.12102,
+            "10": 0.12242,
+            "11": 0.12132,
+            "12": 0.11963,
+            "13": 0.11976,
+            "14": 0.12077,
+            "15": 0.12284,
+            "16": 0.12192,
+            "17": 0.12079,
+            "18": 0.12083,
+            "19": 0.12289,
+            "20": 0.12192,
+            "21": 0.12178,
+            "22": 0.1217,
+            "23": 0.1195,
+            "24": 0.12278,
+            "25": 0.12076,
+            "26": 0.11902,
+            "27": 0.12039,
+            "28": 0.12124,
+            "29": 0.12162,
+            "30": 0.12043,
+            "31": 0.12129,
+            "32": 0.11876,
+            "33": 0.12087,
+            "34": 0.12139,
+            "35": 0.11913,
+            "36": 0.12007,
+            "37": 0.11949,
+            "38": 0.12009,
+            "39": 0.12132,
+            "40": 0.1201,
+            "41": 0.12285,
+            "42": 0.12083,
+            "43": 0.12338,
+            "44": 0.12174,
+            "45": 0.12023,
+            "46": 0.11927,
+            "47": 0.11992,
+            "48": 0.12123,
+            "49": 0.12216,
+            "50": 0.11881
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..796e07451cc
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.84523,
+            "2": 10.85412,
+            "3": 10.85365,
+            "4": 10.83867,
+            "5": 10.87428,
+            "6": 10.89334,
+            "7": 10.8541,
+            "8": 10.86232,
+            "9": 10.86355,
+            "10": 10.82858,
+            "11": 10.88772,
+            "12": 10.87148,
+            "13": 10.87939,
+            "14": 10.89122,
+            "15": 10.81926,
+            "16": 10.83064,
+            "17": 10.79873,
+            "18": 10.81769,
+            "19": 10.8196,
+            "20": 10.72749,
+            "21": 10.70555,
+            "22": 10.56395,
+            "23": 10.7282,
+            "24": 10.60841,
+            "25": 10.55195,
+            "26": 10.60869,
+            "27": 10.62878,
+            "28": 10.5827,
+            "29": 10.59984,
+            "30": 10.36504,
+            "31": 10.12095,
+            "32": 10.47626,
+            "33": 10.46908,
+            "34": 10.22325,
+            "35": 10.27845,
+            "36": 10.22879,
+            "37": 10.35946,
+            "38": 10.19333,
+            "39": 10.41585,
+            "40": 10.09773,
+            "41": 10.15714,
+            "42": 10.22441,
+            "43": 9.8328,
+            "44": 9.96934,
+            "45": 9.84203,
+            "46": 9.83023,
+            "47": 10.15603,
+            "48": 9.85506,
+            "49": 9.54051,
+            "50": 9.91254
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1725.0,
+            "2": 1664.0,
+            "3": 1710.0,
+            "4": 1712.0,
+            "5": 1834.0,
+            "6": 1743.0,
+            "7": 1803.0,
+            "8": 1744.0,
+            "9": 1770.0,
+            "10": 1478.0,
+            "11": 1879.0,
+            "12": 1696.0,
+            "13": 1952.0,
+            "14": 1732.0,
+            "15": 1891.0,
+            "16": 1872.0,
+            "17": 1737.0,
+            "18": 1744.0,
+            "19": 1843.0,
+            "20": 1639.0,
+            "21": 1817.0,
+            "22": 1615.0,
+            "23": 1960.0,
+            "24": 1646.0,
+            "25": 1623.0,
+            "26": 1671.0,
+            "27": 1841.0,
+            "28": 2009.0,
+            "29": 1956.0,
+            "30": 1882.0,
+            "31": 1597.0,
+            "32": 1921.0,
+            "33": 2114.0,
+            "34": 1828.0,
+            "35": 2043.0,
+            "36": 1947.0,
+            "37": 2338.0,
+            "38": 2227.0,
+            "39": 2346.0,
+            "40": 2168.0,
+            "41": 2204.0,
+            "42": 2247.0,
+            "43": 2078.0,
+            "44": 2064.0,
+            "45": 2159.0,
+            "46": 2489.0,
+            "47": 2497.0,
+            "48": 2305.0,
+            "49": 2272.0,
+            "50": 2319.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 759681536.0,
+            "2": 759681536.0,
+            "3": 759681536.0,
+            "4": 759681536.0,
+            "5": 759681536.0,
+            "6": 759681536.0,
+            "7": 759681536.0,
+            "8": 759681536.0,
+            "9": 759681536.0,
+            "10": 759681536.0,
+            "11": 759681536.0,
+            "12": 759681536.0,
+            "13": 759681536.0,
+            "14": 759681536.0,
+            "15": 759681536.0,
+            "16": 759681536.0,
+            "17": 759681536.0,
+            "18": 759681536.0,
+            "19": 759681536.0,
+            "20": 759681536.0,
+            "21": 759681536.0,
+            "22": 759681536.0,
+            "23": 759681536.0,
+            "24": 759681536.0,
+            "25": 759681536.0,
+            "26": 759681536.0,
+            "27": 759681536.0,
+            "28": 759681536.0,
+            "29": 759681536.0,
+            "30": 759681536.0,
+            "31": 759681536.0,
+            "32": 759681536.0,
+            "33": 759681536.0,
+            "34": 759681536.0,
+            "35": 759681536.0,
+            "36": 759681536.0,
+            "37": 759681536.0,
+            "38": 759681536.0,
+            "39": 759681536.0,
+            "40": 759681536.0,
+            "41": 759681536.0,
+            "42": 759681536.0,
+            "43": 759681536.0,
+            "44": 759681536.0,
+            "45": 759681536.0,
+            "46": 759681536.0,
+            "47": 759681536.0,
+            "48": 759681536.0,
+            "49": 759681536.0,
+            "50": 759681536.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 3866813952.0,
+            "2": 4148525568.0,
+            "3": 4148525568.0,
+            "4": 4148525568.0,
+            "5": 4148525568.0,
+            "6": 4148525568.0,
+            "7": 4148525568.0,
+            "8": 4148525568.0,
+            "9": 4148525568.0,
+            "10": 4148525568.0,
+            "11": 4148525568.0,
+            "12": 4148525568.0,
+            "13": 4148525568.0,
+            "14": 4148525568.0,
+            "15": 4148525568.0,
+            "16": 4148525568.0,
+            "17": 4148525568.0,
+            "18": 4148525568.0,
+            "19": 4148525568.0,
+            "20": 4148525568.0,
+            "21": 4148525568.0,
+            "22": 4148525568.0,
+            "23": 4148525568.0,
+            "24": 4148525568.0,
+            "25": 4148525568.0,
+            "26": 4148525568.0,
+            "27": 4148525568.0,
+            "28": 4148525568.0,
+            "29": 4148525568.0,
+            "30": 4148525568.0,
+            "31": 4148525568.0,
+            "32": 4148525568.0,
+            "33": 4148525568.0,
+            "34": 4148525568.0,
+            "35": 4148525568.0,
+            "36": 4148525568.0,
+            "37": 4148525568.0,
+            "38": 4148525568.0,
+            "39": 4148525568.0,
+            "40": 4148525568.0,
+            "41": 4148525568.0,
+            "42": 4148525568.0,
+            "43": 4148525568.0,
+            "44": 4148525568.0,
+            "45": 4148525568.0,
+            "46": 4148525568.0,
+            "47": 4148525568.0,
+            "48": 4148525568.0,
+            "49": 4148525568.0,
+            "50": 4148525568.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 13.82235,
+            "2": 0.15582,
+            "3": 0.10905,
+            "4": 0.1073,
+            "5": 0.109,
+            "6": 0.10732,
+            "7": 0.10878,
+            "8": 0.11223,
+            "9": 0.10518,
+            "10": 0.10855,
+            "11": 0.11135,
+            "12": 0.10511,
+            "13": 0.1065,
+            "14": 0.10507,
+            "15": 0.10485,
+            "16": 0.10494,
+            "17": 0.10498,
+            "18": 0.10434,
+            "19": 0.10497,
+            "20": 0.10409,
+            "21": 0.10596,
+            "22": 0.10798,
+            "23": 0.10596,
+            "24": 0.10493,
+            "25": 0.10426,
+            "26": 0.10473,
+            "27": 0.10393,
+            "28": 0.10415,
+            "29": 0.10372,
+            "30": 0.10375,
+            "31": 0.10526,
+            "32": 0.10354,
+            "33": 0.10378,
+            "34": 0.10407,
+            "35": 0.10415,
+            "36": 0.10637,
+            "37": 0.10889,
+            "38": 0.10823,
+            "39": 0.10551,
+            "40": 0.10613,
+            "41": 0.10424,
+            "42": 0.10385,
+            "43": 0.10519,
+            "44": 0.1044,
+            "45": 0.10488,
+            "46": 0.10678,
+            "47": 0.10342,
+            "48": 0.10517,
+            "49": 0.10469,
+            "50": 0.10438
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..b5d55ac433c
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.84523,
+            "2": 10.85412,
+            "3": 10.85365,
+            "4": 10.83867,
+            "5": 10.87428,
+            "6": 10.89334,
+            "7": 10.8541,
+            "8": 10.86232,
+            "9": 10.86355,
+            "10": 10.82858,
+            "11": 10.88772,
+            "12": 10.87148,
+            "13": 10.87939,
+            "14": 10.89122,
+            "15": 10.81926,
+            "16": 10.83064,
+            "17": 10.79873,
+            "18": 10.81769,
+            "19": 10.8196,
+            "20": 10.72749,
+            "21": 10.70555,
+            "22": 10.56395,
+            "23": 10.7282,
+            "24": 10.60841,
+            "25": 10.55195,
+            "26": 10.60869,
+            "27": 10.62878,
+            "28": 10.5827,
+            "29": 10.59984,
+            "30": 10.36504,
+            "31": 10.12095,
+            "32": 10.47626,
+            "33": 10.46908,
+            "34": 10.22325,
+            "35": 10.27845,
+            "36": 10.22879,
+            "37": 10.35946,
+            "38": 10.19333,
+            "39": 10.41585,
+            "40": 10.09773,
+            "41": 10.15714,
+            "42": 10.22441,
+            "43": 9.8328,
+            "44": 9.96934,
+            "45": 9.84203,
+            "46": 9.83023,
+            "47": 10.15603,
+            "48": 9.85506,
+            "49": 9.54051,
+            "50": 9.91254
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1725.0,
+            "2": 1664.0,
+            "3": 1710.0,
+            "4": 1712.0,
+            "5": 1834.0,
+            "6": 1743.0,
+            "7": 1803.0,
+            "8": 1744.0,
+            "9": 1770.0,
+            "10": 1478.0,
+            "11": 1879.0,
+            "12": 1696.0,
+            "13": 1952.0,
+            "14": 1732.0,
+            "15": 1891.0,
+            "16": 1872.0,
+            "17": 1737.0,
+            "18": 1744.0,
+            "19": 1843.0,
+            "20": 1639.0,
+            "21": 1817.0,
+            "22": 1615.0,
+            "23": 1960.0,
+            "24": 1646.0,
+            "25": 1623.0,
+            "26": 1671.0,
+            "27": 1841.0,
+            "28": 2009.0,
+            "29": 1956.0,
+            "30": 1882.0,
+            "31": 1597.0,
+            "32": 1921.0,
+            "33": 2114.0,
+            "34": 1828.0,
+            "35": 2043.0,
+            "36": 1947.0,
+            "37": 2338.0,
+            "38": 2227.0,
+            "39": 2346.0,
+            "40": 2168.0,
+            "41": 2204.0,
+            "42": 2247.0,
+            "43": 2078.0,
+            "44": 2064.0,
+            "45": 2159.0,
+            "46": 2489.0,
+            "47": 2497.0,
+            "48": 2305.0,
+            "49": 2272.0,
+            "50": 2319.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 759681536.0,
+            "2": 759681536.0,
+            "3": 759681536.0,
+            "4": 759681536.0,
+            "5": 759681536.0,
+            "6": 759681536.0,
+            "7": 759681536.0,
+            "8": 759681536.0,
+            "9": 759681536.0,
+            "10": 759681536.0,
+            "11": 759681536.0,
+            "12": 759681536.0,
+            "13": 759681536.0,
+            "14": 759681536.0,
+            "15": 759681536.0,
+            "16": 759681536.0,
+            "17": 759681536.0,
+            "18": 759681536.0,
+            "19": 759681536.0,
+            "20": 759681536.0,
+            "21": 759681536.0,
+            "22": 759681536.0,
+            "23": 759681536.0,
+            "24": 759681536.0,
+            "25": 759681536.0,
+            "26": 759681536.0,
+            "27": 759681536.0,
+            "28": 759681536.0,
+            "29": 759681536.0,
+            "30": 759681536.0,
+            "31": 759681536.0,
+            "32": 759681536.0,
+            "33": 759681536.0,
+            "34": 759681536.0,
+            "35": 759681536.0,
+            "36": 759681536.0,
+            "37": 759681536.0,
+            "38": 759681536.0,
+            "39": 759681536.0,
+            "40": 759681536.0,
+            "41": 759681536.0,
+            "42": 759681536.0,
+            "43": 759681536.0,
+            "44": 759681536.0,
+            "45": 759681536.0,
+            "46": 759681536.0,
+            "47": 759681536.0,
+            "48": 759681536.0,
+            "49": 759681536.0,
+            "50": 759681536.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 3866813952.0,
+            "2": 4148525568.0,
+            "3": 4148525568.0,
+            "4": 4148525568.0,
+            "5": 4148525568.0,
+            "6": 4148525568.0,
+            "7": 4148525568.0,
+            "8": 4148525568.0,
+            "9": 4148525568.0,
+            "10": 4148525568.0,
+            "11": 4148525568.0,
+            "12": 4148525568.0,
+            "13": 4148525568.0,
+            "14": 4148525568.0,
+            "15": 4148525568.0,
+            "16": 4148525568.0,
+            "17": 4148525568.0,
+            "18": 4148525568.0,
+            "19": 4148525568.0,
+            "20": 4148525568.0,
+            "21": 4148525568.0,
+            "22": 4148525568.0,
+            "23": 4148525568.0,
+            "24": 4148525568.0,
+            "25": 4148525568.0,
+            "26": 4148525568.0,
+            "27": 4148525568.0,
+            "28": 4148525568.0,
+            "29": 4148525568.0,
+            "30": 4148525568.0,
+            "31": 4148525568.0,
+            "32": 4148525568.0,
+            "33": 4148525568.0,
+            "34": 4148525568.0,
+            "35": 4148525568.0,
+            "36": 4148525568.0,
+            "37": 4148525568.0,
+            "38": 4148525568.0,
+            "39": 4148525568.0,
+            "40": 4148525568.0,
+            "41": 4148525568.0,
+            "42": 4148525568.0,
+            "43": 4148525568.0,
+            "44": 4148525568.0,
+            "45": 4148525568.0,
+            "46": 4148525568.0,
+            "47": 4148525568.0,
+            "48": 4148525568.0,
+            "49": 4148525568.0,
+            "50": 4148525568.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 12.80183,
+            "2": 0.14507,
+            "3": 0.13423,
+            "4": 0.12539,
+            "5": 0.12233,
+            "6": 0.12325,
+            "7": 0.12437,
+            "8": 0.12453,
+            "9": 0.12348,
+            "10": 0.12305,
+            "11": 0.12491,
+            "12": 0.12346,
+            "13": 0.1234,
+            "14": 0.12145,
+            "15": 0.12227,
+            "16": 0.12254,
+            "17": 0.12422,
+            "18": 0.12237,
+            "19": 0.12342,
+            "20": 0.1219,
+            "21": 0.1212,
+            "22": 0.12243,
+            "23": 0.11962,
+            "24": 0.1224,
+            "25": 0.12155,
+            "26": 0.12253,
+            "27": 0.12095,
+            "28": 0.12035,
+            "29": 0.12115,
+            "30": 0.11898,
+            "31": 0.12063,
+            "32": 0.1189,
+            "33": 0.12106,
+            "34": 0.11766,
+            "35": 0.11962,
+            "36": 0.12112,
+            "37": 0.11847,
+            "38": 0.11727,
+            "39": 0.11905,
+            "40": 0.11887,
+            "41": 0.11948,
+            "42": 0.11832,
+            "43": 0.11858,
+            "44": 0.1186,
+            "45": 0.12057,
+            "46": 0.1186,
+            "47": 0.12097,
+            "48": 0.11934,
+            "49": 0.11972,
+            "50": 0.12006
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
new file mode 100644
index 00000000000..ed32255e786
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.82005,
+            "2": 10.81907,
+            "3": 10.81396,
+            "4": 10.78497,
+            "5": 10.85284,
+            "6": 10.87449,
+            "7": 10.83201,
+            "8": 10.83297,
+            "9": 10.83935,
+            "10": 10.78455,
+            "11": 10.87798,
+            "12": 10.86112,
+            "13": 10.86444,
+            "14": 10.87605,
+            "15": 10.79229,
+            "16": 10.79509,
+            "17": 10.76768,
+            "18": 10.81005,
+            "19": 10.79719,
+            "20": 10.69211,
+            "21": 10.68164,
+            "22": 10.52085,
+            "23": 10.70893,
+            "24": 10.57599,
+            "25": 10.52412,
+            "26": 10.59517,
+            "27": 10.58426,
+            "28": 10.56233,
+            "29": 10.57013,
+            "30": 10.34552,
+            "31": 10.10049,
+            "32": 10.45378,
+            "33": 10.44627,
+            "34": 10.20606,
+            "35": 10.26239,
+            "36": 10.21239,
+            "37": 10.32522,
+            "38": 10.16777,
+            "39": 10.38334,
+            "40": 10.07241,
+            "41": 10.13863,
+            "42": 10.19814,
+            "43": 9.81073,
+            "44": 9.93244,
+            "45": 9.81101,
+            "46": 9.80877,
+            "47": 10.12608,
+            "48": 9.82108,
+            "49": 9.50625,
+            "50": 9.88422
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1559.0,
+            "2": 1591.0,
+            "3": 1727.0,
+            "4": 1835.0,
+            "5": 1840.0,
+            "6": 1719.0,
+            "7": 1740.0,
+            "8": 1591.0,
+            "9": 1839.0,
+            "10": 1380.0,
+            "11": 1856.0,
+            "12": 1693.0,
+            "13": 1906.0,
+            "14": 1757.0,
+            "15": 1848.0,
+            "16": 1791.0,
+            "17": 1752.0,
+            "18": 1669.0,
+            "19": 1722.0,
+            "20": 1601.0,
+            "21": 1900.0,
+            "22": 1662.0,
+            "23": 2006.0,
+            "24": 1597.0,
+            "25": 1635.0,
+            "26": 1709.0,
+            "27": 1931.0,
+            "28": 2043.0,
+            "29": 1888.0,
+            "30": 1936.0,
+            "31": 1550.0,
+            "32": 1913.0,
+            "33": 2135.0,
+            "34": 1703.0,
+            "35": 1908.0,
+            "36": 1953.0,
+            "37": 2291.0,
+            "38": 2210.0,
+            "39": 2334.0,
+            "40": 2100.0,
+            "41": 2300.0,
+            "42": 2236.0,
+            "43": 1897.0,
+            "44": 1993.0,
+            "45": 2098.0,
+            "46": 2298.0,
+            "47": 2504.0,
+            "48": 2356.0,
+            "49": 2268.0,
+            "50": 2333.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 730320896.0,
+            "2": 730320896.0,
+            "3": 730320896.0,
+            "4": 730320896.0,
+            "5": 730320896.0,
+            "6": 730320896.0,
+            "7": 730320896.0,
+            "8": 730320896.0,
+            "9": 730320896.0,
+            "10": 730320896.0,
+            "11": 730320896.0,
+            "12": 730320896.0,
+            "13": 730320896.0,
+            "14": 730320896.0,
+            "15": 730320896.0,
+            "16": 730320896.0,
+            "17": 730320896.0,
+            "18": 730320896.0,
+            "19": 730320896.0,
+            "20": 730320896.0,
+            "21": 730320896.0,
+            "22": 730320896.0,
+            "23": 730320896.0,
+            "24": 730320896.0,
+            "25": 730320896.0,
+            "26": 730320896.0,
+            "27": 730320896.0,
+            "28": 730320896.0,
+            "29": 730320896.0,
+            "30": 730320896.0,
+            "31": 730320896.0,
+            "32": 730320896.0,
+            "33": 730320896.0,
+            "34": 730320896.0,
+            "35": 730320896.0,
+            "36": 730320896.0,
+            "37": 730320896.0,
+            "38": 730320896.0,
+            "39": 730320896.0,
+            "40": 730320896.0,
+            "41": 730320896.0,
+            "42": 730320896.0,
+            "43": 730320896.0,
+            "44": 730320896.0,
+            "45": 730320896.0,
+            "46": 730320896.0,
+            "47": 730320896.0,
+            "48": 730320896.0,
+            "49": 730320896.0,
+            "50": 730320896.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 3837453312.0,
+            "2": 4119164928.0,
+            "3": 4119164928.0,
+            "4": 4119164928.0,
+            "5": 4119164928.0,
+            "6": 4119164928.0,
+            "7": 4119164928.0,
+            "8": 4119164928.0,
+            "9": 4119164928.0,
+            "10": 4119164928.0,
+            "11": 4119164928.0,
+            "12": 4119164928.0,
+            "13": 4119164928.0,
+            "14": 4119164928.0,
+            "15": 4119164928.0,
+            "16": 4119164928.0,
+            "17": 4119164928.0,
+            "18": 4119164928.0,
+            "19": 4119164928.0,
+            "20": 4119164928.0,
+            "21": 4119164928.0,
+            "22": 4119164928.0,
+            "23": 4119164928.0,
+            "24": 4119164928.0,
+            "25": 4119164928.0,
+            "26": 4119164928.0,
+            "27": 4119164928.0,
+            "28": 4119164928.0,
+            "29": 4119164928.0,
+            "30": 4119164928.0,
+            "31": 4119164928.0,
+            "32": 4119164928.0,
+            "33": 4119164928.0,
+            "34": 4119164928.0,
+            "35": 4119164928.0,
+            "36": 4119164928.0,
+            "37": 4119164928.0,
+            "38": 4119164928.0,
+            "39": 4119164928.0,
+            "40": 4119164928.0,
+            "41": 4119164928.0,
+            "42": 4119164928.0,
+            "43": 4119164928.0,
+            "44": 4119164928.0,
+            "45": 4119164928.0,
+            "46": 4119164928.0,
+            "47": 4119164928.0,
+            "48": 4119164928.0,
+            "49": 4119164928.0,
+            "50": 4119164928.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 21.82644,
+            "2": 0.19908,
+            "3": 0.17208,
+            "4": 0.17348,
+            "5": 0.40692,
+            "6": 0.17348,
+            "7": 0.17221,
+            "8": 0.17282,
+            "9": 0.17343,
+            "10": 0.17259,
+            "11": 0.44574,
+            "12": 0.17197,
+            "13": 0.17235,
+            "14": 0.17135,
+            "15": 0.17217,
+            "16": 0.17214,
+            "17": 0.17346,
+            "18": 0.17055,
+            "19": 0.17076,
+            "20": 0.17071,
+            "21": 0.17349,
+            "22": 0.17417,
+            "23": 0.16998,
+            "24": 0.17303,
+            "25": 0.17019,
+            "26": 0.16905,
+            "27": 0.16967,
+            "28": 0.17087,
+            "29": 0.16779,
+            "30": 0.16786,
+            "31": 0.1689,
+            "32": 0.16672,
+            "33": 0.1672,
+            "34": 0.16926,
+            "35": 0.16914,
+            "36": 0.16747,
+            "37": 0.16765,
+            "38": 0.16682,
+            "39": 0.1667,
+            "40": 0.16914,
+            "41": 0.16662,
+            "42": 0.16688,
+            "43": 0.16639,
+            "44": 0.16515,
+            "45": 0.16517,
+            "46": 0.16701,
+            "47": 0.16705,
+            "48": 0.16627,
+            "49": 0.16652,
+            "50": 0.16472
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
new file mode 100644
index 00000000000..13f8dfbd7e8
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.82005,
+            "2": 10.81907,
+            "3": 10.81396,
+            "4": 10.78497,
+            "5": 10.85284,
+            "6": 10.87449,
+            "7": 10.83201,
+            "8": 10.83297,
+            "9": 10.83935,
+            "10": 10.78455,
+            "11": 10.87798,
+            "12": 10.86112,
+            "13": 10.86444,
+            "14": 10.87605,
+            "15": 10.79229,
+            "16": 10.79509,
+            "17": 10.76768,
+            "18": 10.81005,
+            "19": 10.79719,
+            "20": 10.69211,
+            "21": 10.68164,
+            "22": 10.52085,
+            "23": 10.70893,
+            "24": 10.57599,
+            "25": 10.52412,
+            "26": 10.59517,
+            "27": 10.58426,
+            "28": 10.56233,
+            "29": 10.57013,
+            "30": 10.34552,
+            "31": 10.10049,
+            "32": 10.45378,
+            "33": 10.44627,
+            "34": 10.20606,
+            "35": 10.26239,
+            "36": 10.21239,
+            "37": 10.32522,
+            "38": 10.16777,
+            "39": 10.38334,
+            "40": 10.07241,
+            "41": 10.13863,
+            "42": 10.19814,
+            "43": 9.81073,
+            "44": 9.93244,
+            "45": 9.81101,
+            "46": 9.80877,
+            "47": 10.12608,
+            "48": 9.82108,
+            "49": 9.50625,
+            "50": 9.88422
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1559.0,
+            "2": 1591.0,
+            "3": 1727.0,
+            "4": 1835.0,
+            "5": 1840.0,
+            "6": 1719.0,
+            "7": 1740.0,
+            "8": 1591.0,
+            "9": 1839.0,
+            "10": 1380.0,
+            "11": 1856.0,
+            "12": 1693.0,
+            "13": 1906.0,
+            "14": 1757.0,
+            "15": 1848.0,
+            "16": 1791.0,
+            "17": 1752.0,
+            "18": 1669.0,
+            "19": 1722.0,
+            "20": 1601.0,
+            "21": 1900.0,
+            "22": 1662.0,
+            "23": 2006.0,
+            "24": 1597.0,
+            "25": 1635.0,
+            "26": 1709.0,
+            "27": 1931.0,
+            "28": 2043.0,
+            "29": 1888.0,
+            "30": 1936.0,
+            "31": 1550.0,
+            "32": 1913.0,
+            "33": 2135.0,
+            "34": 1703.0,
+            "35": 1908.0,
+            "36": 1953.0,
+            "37": 2291.0,
+            "38": 2210.0,
+            "39": 2334.0,
+            "40": 2100.0,
+            "41": 2300.0,
+            "42": 2236.0,
+            "43": 1897.0,
+            "44": 1993.0,
+            "45": 2098.0,
+            "46": 2298.0,
+            "47": 2504.0,
+            "48": 2356.0,
+            "49": 2268.0,
+            "50": 2333.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 730320896.0,
+            "2": 730320896.0,
+            "3": 730320896.0,
+            "4": 730320896.0,
+            "5": 730320896.0,
+            "6": 730320896.0,
+            "7": 730320896.0,
+            "8": 730320896.0,
+            "9": 730320896.0,
+            "10": 730320896.0,
+            "11": 730320896.0,
+            "12": 730320896.0,
+            "13": 730320896.0,
+            "14": 730320896.0,
+            "15": 730320896.0,
+            "16": 730320896.0,
+            "17": 730320896.0,
+            "18": 730320896.0,
+            "19": 730320896.0,
+            "20": 730320896.0,
+            "21": 730320896.0,
+            "22": 730320896.0,
+            "23": 730320896.0,
+            "24": 730320896.0,
+            "25": 730320896.0,
+            "26": 730320896.0,
+            "27": 730320896.0,
+            "28": 730320896.0,
+            "29": 730320896.0,
+            "30": 730320896.0,
+            "31": 730320896.0,
+            "32": 730320896.0,
+            "33": 730320896.0,
+            "34": 730320896.0,
+            "35": 730320896.0,
+            "36": 730320896.0,
+            "37": 730320896.0,
+            "38": 730320896.0,
+            "39": 730320896.0,
+            "40": 730320896.0,
+            "41": 730320896.0,
+            "42": 730320896.0,
+            "43": 730320896.0,
+            "44": 730320896.0,
+            "45": 730320896.0,
+            "46": 730320896.0,
+            "47": 730320896.0,
+            "48": 730320896.0,
+            "49": 730320896.0,
+            "50": 730320896.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 3837453312.0,
+            "2": 4119164928.0,
+            "3": 4119164928.0,
+            "4": 4119164928.0,
+            "5": 4119164928.0,
+            "6": 4119164928.0,
+            "7": 4119164928.0,
+            "8": 4119164928.0,
+            "9": 4119164928.0,
+            "10": 4119164928.0,
+            "11": 4119164928.0,
+            "12": 4119164928.0,
+            "13": 4119164928.0,
+            "14": 4119164928.0,
+            "15": 4119164928.0,
+            "16": 4119164928.0,
+            "17": 4119164928.0,
+            "18": 4119164928.0,
+            "19": 4119164928.0,
+            "20": 4119164928.0,
+            "21": 4119164928.0,
+            "22": 4119164928.0,
+            "23": 4119164928.0,
+            "24": 4119164928.0,
+            "25": 4119164928.0,
+            "26": 4119164928.0,
+            "27": 4119164928.0,
+            "28": 4119164928.0,
+            "29": 4119164928.0,
+            "30": 4119164928.0,
+            "31": 4119164928.0,
+            "32": 4119164928.0,
+            "33": 4119164928.0,
+            "34": 4119164928.0,
+            "35": 4119164928.0,
+            "36": 4119164928.0,
+            "37": 4119164928.0,
+            "38": 4119164928.0,
+            "39": 4119164928.0,
+            "40": 4119164928.0,
+            "41": 4119164928.0,
+            "42": 4119164928.0,
+            "43": 4119164928.0,
+            "44": 4119164928.0,
+            "45": 4119164928.0,
+            "46": 4119164928.0,
+            "47": 4119164928.0,
+            "48": 4119164928.0,
+            "49": 4119164928.0,
+            "50": 4119164928.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 19.01426,
+            "2": 0.19331,
+            "3": 0.17686,
+            "4": 0.17351,
+            "5": 0.17409,
+            "6": 0.39233,
+            "7": 0.17062,
+            "8": 0.17244,
+            "9": 0.1721,
+            "10": 0.1728,
+            "11": 0.16853,
+            "12": 0.16766,
+            "13": 0.45674,
+            "14": 0.17028,
+            "15": 0.16973,
+            "16": 0.16893,
+            "17": 0.16884,
+            "18": 0.17013,
+            "19": 0.16961,
+            "20": 0.17167,
+            "21": 0.1673,
+            "22": 0.16984,
+            "23": 0.17183,
+            "24": 0.17023,
+            "25": 0.16914,
+            "26": 0.16981,
+            "27": 0.1674,
+            "28": 0.16751,
+            "29": 0.16693,
+            "30": 0.16857,
+            "31": 0.16737,
+            "32": 0.16785,
+            "33": 0.16718,
+            "34": 0.16686,
+            "35": 0.16592,
+            "36": 0.16924,
+            "37": 0.16753,
+            "38": 0.16813,
+            "39": 0.16663,
+            "40": 0.22514,
+            "41": 0.16853,
+            "42": 0.17036,
+            "43": 0.16917,
+            "44": 0.167,
+            "45": 0.16766,
+            "46": 0.167,
+            "47": 0.16654,
+            "48": 0.16869,
+            "49": 0.16681,
+            "50": 0.16794
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
index a8768535dbb..f88bc4dbaad 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
@@ -1 +1,287 @@
-{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.84523, "5": 10.87428, "10": 10.82859, "15": 10.81927, "20": 10.72749, "25": 10.55198, "30": 10.36511, "35": 10.27848, "40": 10.09773, "45": 9.84205, "50": 9.91258}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1725.0, "5": 1834.0, "10": 1459.0, "15": 1886.0, "20": 1649.0, "25": 1647.0, "30": 1964.0, "35": 2017.0, "40": 2207.0, "45": 2164.0, "50": 2224.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 551155200.0, "5": 551155200.0, "10": 551155200.0, "15": 551155200.0, "20": 551155200.0, "25": 551155200.0, "30": 551155200.0, "35": 551155200.0, "40": 551155200.0, "45": 551155200.0, "50": 551155200.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 3798206976.0, "5": 3940916736.0, "10": 3940916736.0, "15": 3940916736.0, "20": 3940916736.0, "25": 3940916736.0, "30": 3940916736.0, "35": 3940916736.0, "40": 3940916736.0, "45": 3940916736.0, "50": 3940916736.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 17.08492, "5": 0.11359, "10": 0.11447, "15": 0.11042, "20": 0.1105, "25": 0.11485, "30": 0.11374, "35": 0.1115, "40": 0.10857, "45": 0.11114, "50": 0.10673}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.84523,
+            "2": 10.85412,
+            "3": 10.85365,
+            "4": 10.83867,
+            "5": 10.87428,
+            "6": 10.89334,
+            "7": 10.8541,
+            "8": 10.86235,
+            "9": 10.86352,
+            "10": 10.82859,
+            "11": 10.88772,
+            "12": 10.87148,
+            "13": 10.87938,
+            "14": 10.89123,
+            "15": 10.81927,
+            "16": 10.83063,
+            "17": 10.79878,
+            "18": 10.81771,
+            "19": 10.81957,
+            "20": 10.72749,
+            "21": 10.70552,
+            "22": 10.56396,
+            "23": 10.72823,
+            "24": 10.60839,
+            "25": 10.55198,
+            "26": 10.60868,
+            "27": 10.62879,
+            "28": 10.58271,
+            "29": 10.59982,
+            "30": 10.36511,
+            "31": 10.12096,
+            "32": 10.47628,
+            "33": 10.46906,
+            "34": 10.22326,
+            "35": 10.27848,
+            "36": 10.22883,
+            "37": 10.35947,
+            "38": 10.19331,
+            "39": 10.41586,
+            "40": 10.09773,
+            "41": 10.15718,
+            "42": 10.22441,
+            "43": 9.83281,
+            "44": 9.96935,
+            "45": 9.84205,
+            "46": 9.83017,
+            "47": 10.15602,
+            "48": 9.85503,
+            "49": 9.54049,
+            "50": 9.91258
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1725.0,
+            "2": 1664.0,
+            "3": 1710.0,
+            "4": 1712.0,
+            "5": 1834.0,
+            "6": 1743.0,
+            "7": 1803.0,
+            "8": 1737.0,
+            "9": 1779.0,
+            "10": 1459.0,
+            "11": 1898.0,
+            "12": 1661.0,
+            "13": 1860.0,
+            "14": 1764.0,
+            "15": 1886.0,
+            "16": 1916.0,
+            "17": 1773.0,
+            "18": 1702.0,
+            "19": 1742.0,
+            "20": 1649.0,
+            "21": 1899.0,
+            "22": 1631.0,
+            "23": 1960.0,
+            "24": 1570.0,
+            "25": 1647.0,
+            "26": 1649.0,
+            "27": 1811.0,
+            "28": 1930.0,
+            "29": 1910.0,
+            "30": 1964.0,
+            "31": 1536.0,
+            "32": 1873.0,
+            "33": 2191.0,
+            "34": 1838.0,
+            "35": 2017.0,
+            "36": 1916.0,
+            "37": 2345.0,
+            "38": 2247.0,
+            "39": 2374.0,
+            "40": 2207.0,
+            "41": 2246.0,
+            "42": 2291.0,
+            "43": 2027.0,
+            "44": 2147.0,
+            "45": 2164.0,
+            "46": 2300.0,
+            "47": 2418.0,
+            "48": 2467.0,
+            "49": 2255.0,
+            "50": 2224.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 552054272.0,
+            "2": 552054272.0,
+            "3": 552054272.0,
+            "4": 552054272.0,
+            "5": 552054272.0,
+            "6": 552054272.0,
+            "7": 552054272.0,
+            "8": 552054272.0,
+            "9": 552054272.0,
+            "10": 552054272.0,
+            "11": 552054272.0,
+            "12": 552054272.0,
+            "13": 552054272.0,
+            "14": 552054272.0,
+            "15": 552054272.0,
+            "16": 552054272.0,
+            "17": 552054272.0,
+            "18": 552054272.0,
+            "19": 552054272.0,
+            "20": 552054272.0,
+            "21": 552054272.0,
+            "22": 552054272.0,
+            "23": 552054272.0,
+            "24": 552054272.0,
+            "25": 552054272.0,
+            "26": 552054272.0,
+            "27": 552054272.0,
+            "28": 552054272.0,
+            "29": 552054272.0,
+            "30": 552054272.0,
+            "31": 552054272.0,
+            "32": 552054272.0,
+            "33": 552054272.0,
+            "34": 552054272.0,
+            "35": 552054272.0,
+            "36": 552054272.0,
+            "37": 552054272.0,
+            "38": 552054272.0,
+            "39": 552054272.0,
+            "40": 552054272.0,
+            "41": 552054272.0,
+            "42": 552054272.0,
+            "43": 552054272.0,
+            "44": 552054272.0,
+            "45": 552054272.0,
+            "46": 552054272.0,
+            "47": 552054272.0,
+            "48": 552054272.0,
+            "49": 552054272.0,
+            "50": 552054272.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 3798206976.0,
+            "2": 3940899328.0,
+            "3": 3940899328.0,
+            "4": 3940899328.0,
+            "5": 3940899328.0,
+            "6": 3940899328.0,
+            "7": 3940899328.0,
+            "8": 3940899328.0,
+            "9": 3940899328.0,
+            "10": 3940899328.0,
+            "11": 3940899328.0,
+            "12": 3940899328.0,
+            "13": 3940899328.0,
+            "14": 3940899328.0,
+            "15": 3940899328.0,
+            "16": 3940899328.0,
+            "17": 3940899328.0,
+            "18": 3940899328.0,
+            "19": 3940899328.0,
+            "20": 3940899328.0,
+            "21": 3940899328.0,
+            "22": 3940899328.0,
+            "23": 3940899328.0,
+            "24": 3940899328.0,
+            "25": 3940899328.0,
+            "26": 3940899328.0,
+            "27": 3940899328.0,
+            "28": 3940899328.0,
+            "29": 3940899328.0,
+            "30": 3940899328.0,
+            "31": 3940899328.0,
+            "32": 3940899328.0,
+            "33": 3940899328.0,
+            "34": 3940899328.0,
+            "35": 3940899328.0,
+            "36": 3940899328.0,
+            "37": 3940899328.0,
+            "38": 3940899328.0,
+            "39": 3940899328.0,
+            "40": 3940899328.0,
+            "41": 3940899328.0,
+            "42": 3940899328.0,
+            "43": 3940899328.0,
+            "44": 3940899328.0,
+            "45": 3940899328.0,
+            "46": 3940899328.0,
+            "47": 3940899328.0,
+            "48": 3940899328.0,
+            "49": 3940899328.0,
+            "50": 3940899328.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 13.77378,
+            "2": 0.15884,
+            "3": 0.14867,
+            "4": 0.12729,
+            "5": 0.12441,
+            "6": 0.12501,
+            "7": 0.12396,
+            "8": 0.12217,
+            "9": 0.12636,
+            "10": 0.12685,
+            "11": 0.28489,
+            "12": 0.1228,
+            "13": 0.12284,
+            "14": 0.12293,
+            "15": 0.12456,
+            "16": 0.12522,
+            "17": 0.12575,
+            "18": 0.12506,
+            "19": 0.12636,
+            "20": 0.12549,
+            "21": 0.28282,
+            "22": 0.12596,
+            "23": 0.12451,
+            "24": 0.12852,
+            "25": 0.12585,
+            "26": 0.1249,
+            "27": 0.12809,
+            "28": 0.12564,
+            "29": 0.12685,
+            "30": 0.12691,
+            "31": 0.29536,
+            "32": 0.12574,
+            "33": 0.12648,
+            "34": 0.12772,
+            "35": 0.12732,
+            "36": 0.12522,
+            "37": 0.12739,
+            "38": 0.12791,
+            "39": 0.12659,
+            "40": 0.12766,
+            "41": 0.28835,
+            "42": 0.12796,
+            "43": 0.12957,
+            "44": 0.12516,
+            "45": 0.12485,
+            "46": 0.12641,
+            "47": 0.12384,
+            "48": 0.12562,
+            "49": 0.12302,
+            "50": 0.12604
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..24a2e339e46
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.84523,
+            "2": 10.85412,
+            "3": 10.85365,
+            "4": 10.83867,
+            "5": 10.87428,
+            "6": 10.89334,
+            "7": 10.8541,
+            "8": 10.86235,
+            "9": 10.86352,
+            "10": 10.82859,
+            "11": 10.88772,
+            "12": 10.87148,
+            "13": 10.87938,
+            "14": 10.89123,
+            "15": 10.81927,
+            "16": 10.83063,
+            "17": 10.79878,
+            "18": 10.81771,
+            "19": 10.81957,
+            "20": 10.72749,
+            "21": 10.70552,
+            "22": 10.56396,
+            "23": 10.72823,
+            "24": 10.60839,
+            "25": 10.55198,
+            "26": 10.60868,
+            "27": 10.62879,
+            "28": 10.58271,
+            "29": 10.59982,
+            "30": 10.36511,
+            "31": 10.12096,
+            "32": 10.47628,
+            "33": 10.46906,
+            "34": 10.22326,
+            "35": 10.27848,
+            "36": 10.22883,
+            "37": 10.35947,
+            "38": 10.19331,
+            "39": 10.41586,
+            "40": 10.09773,
+            "41": 10.15718,
+            "42": 10.22441,
+            "43": 9.83281,
+            "44": 9.96935,
+            "45": 9.84205,
+            "46": 9.83017,
+            "47": 10.15602,
+            "48": 9.85503,
+            "49": 9.54049,
+            "50": 9.91258
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1725.0,
+            "2": 1664.0,
+            "3": 1710.0,
+            "4": 1712.0,
+            "5": 1834.0,
+            "6": 1743.0,
+            "7": 1803.0,
+            "8": 1737.0,
+            "9": 1779.0,
+            "10": 1459.0,
+            "11": 1898.0,
+            "12": 1661.0,
+            "13": 1860.0,
+            "14": 1764.0,
+            "15": 1886.0,
+            "16": 1916.0,
+            "17": 1773.0,
+            "18": 1702.0,
+            "19": 1742.0,
+            "20": 1649.0,
+            "21": 1899.0,
+            "22": 1631.0,
+            "23": 1960.0,
+            "24": 1570.0,
+            "25": 1647.0,
+            "26": 1649.0,
+            "27": 1811.0,
+            "28": 1930.0,
+            "29": 1910.0,
+            "30": 1964.0,
+            "31": 1536.0,
+            "32": 1873.0,
+            "33": 2191.0,
+            "34": 1838.0,
+            "35": 2017.0,
+            "36": 1916.0,
+            "37": 2345.0,
+            "38": 2247.0,
+            "39": 2374.0,
+            "40": 2207.0,
+            "41": 2246.0,
+            "42": 2291.0,
+            "43": 2027.0,
+            "44": 2147.0,
+            "45": 2164.0,
+            "46": 2300.0,
+            "47": 2418.0,
+            "48": 2467.0,
+            "49": 2255.0,
+            "50": 2224.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 552054272.0,
+            "2": 552054272.0,
+            "3": 552054272.0,
+            "4": 552054272.0,
+            "5": 552054272.0,
+            "6": 552054272.0,
+            "7": 552054272.0,
+            "8": 552054272.0,
+            "9": 552054272.0,
+            "10": 552054272.0,
+            "11": 552054272.0,
+            "12": 552054272.0,
+            "13": 552054272.0,
+            "14": 552054272.0,
+            "15": 552054272.0,
+            "16": 552054272.0,
+            "17": 552054272.0,
+            "18": 552054272.0,
+            "19": 552054272.0,
+            "20": 552054272.0,
+            "21": 552054272.0,
+            "22": 552054272.0,
+            "23": 552054272.0,
+            "24": 552054272.0,
+            "25": 552054272.0,
+            "26": 552054272.0,
+            "27": 552054272.0,
+            "28": 552054272.0,
+            "29": 552054272.0,
+            "30": 552054272.0,
+            "31": 552054272.0,
+            "32": 552054272.0,
+            "33": 552054272.0,
+            "34": 552054272.0,
+            "35": 552054272.0,
+            "36": 552054272.0,
+            "37": 552054272.0,
+            "38": 552054272.0,
+            "39": 552054272.0,
+            "40": 552054272.0,
+            "41": 552054272.0,
+            "42": 552054272.0,
+            "43": 552054272.0,
+            "44": 552054272.0,
+            "45": 552054272.0,
+            "46": 552054272.0,
+            "47": 552054272.0,
+            "48": 552054272.0,
+            "49": 552054272.0,
+            "50": 552054272.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 3798206976.0,
+            "2": 3940899328.0,
+            "3": 3940899328.0,
+            "4": 3940899328.0,
+            "5": 3940899328.0,
+            "6": 3940899328.0,
+            "7": 3940899328.0,
+            "8": 3940899328.0,
+            "9": 3940899328.0,
+            "10": 3940899328.0,
+            "11": 3940899328.0,
+            "12": 3940899328.0,
+            "13": 3940899328.0,
+            "14": 3940899328.0,
+            "15": 3940899328.0,
+            "16": 3940899328.0,
+            "17": 3940899328.0,
+            "18": 3940899328.0,
+            "19": 3940899328.0,
+            "20": 3940899328.0,
+            "21": 3940899328.0,
+            "22": 3940899328.0,
+            "23": 3940899328.0,
+            "24": 3940899328.0,
+            "25": 3940899328.0,
+            "26": 3940899328.0,
+            "27": 3940899328.0,
+            "28": 3940899328.0,
+            "29": 3940899328.0,
+            "30": 3940899328.0,
+            "31": 3940899328.0,
+            "32": 3940899328.0,
+            "33": 3940899328.0,
+            "34": 3940899328.0,
+            "35": 3940899328.0,
+            "36": 3940899328.0,
+            "37": 3940899328.0,
+            "38": 3940899328.0,
+            "39": 3940899328.0,
+            "40": 3940899328.0,
+            "41": 3940899328.0,
+            "42": 3940899328.0,
+            "43": 3940899328.0,
+            "44": 3940899328.0,
+            "45": 3940899328.0,
+            "46": 3940899328.0,
+            "47": 3940899328.0,
+            "48": 3940899328.0,
+            "49": 3940899328.0,
+            "50": 3940899328.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 17.59634,
+            "2": 0.14856,
+            "3": 0.11161,
+            "4": 0.11302,
+            "5": 0.11107,
+            "6": 0.1136,
+            "7": 0.11041,
+            "8": 0.10987,
+            "9": 0.10957,
+            "10": 0.11046,
+            "11": 0.24569,
+            "12": 0.11057,
+            "13": 0.11113,
+            "14": 0.10972,
+            "15": 0.10919,
+            "16": 0.10934,
+            "17": 0.11,
+            "18": 0.11335,
+            "19": 0.11254,
+            "20": 0.11141,
+            "21": 0.24662,
+            "22": 0.11244,
+            "23": 0.11141,
+            "24": 0.11252,
+            "25": 0.11118,
+            "26": 0.11137,
+            "27": 0.1105,
+            "28": 0.11086,
+            "29": 0.11045,
+            "30": 0.11129,
+            "31": 0.24072,
+            "32": 0.11093,
+            "33": 0.11087,
+            "34": 0.11452,
+            "35": 0.12015,
+            "36": 0.11133,
+            "37": 0.1109,
+            "38": 0.11245,
+            "39": 0.11262,
+            "40": 0.11211,
+            "41": 0.23988,
+            "42": 0.11163,
+            "43": 0.11285,
+            "44": 0.1115,
+            "45": 0.1137,
+            "46": 0.11213,
+            "47": 0.11057,
+            "48": 0.11163,
+            "49": 0.11229,
+            "50": 0.11164
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..5e069163f6c
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.84523,
+            "2": 10.85412,
+            "3": 10.85365,
+            "4": 10.83867,
+            "5": 10.87428,
+            "6": 10.89334,
+            "7": 10.8541,
+            "8": 10.86235,
+            "9": 10.86352,
+            "10": 10.82859,
+            "11": 10.88772,
+            "12": 10.87148,
+            "13": 10.87938,
+            "14": 10.89123,
+            "15": 10.81927,
+            "16": 10.83063,
+            "17": 10.79878,
+            "18": 10.81771,
+            "19": 10.81957,
+            "20": 10.72749,
+            "21": 10.70552,
+            "22": 10.56396,
+            "23": 10.72823,
+            "24": 10.60839,
+            "25": 10.55198,
+            "26": 10.60868,
+            "27": 10.62879,
+            "28": 10.58271,
+            "29": 10.59982,
+            "30": 10.36511,
+            "31": 10.12096,
+            "32": 10.47628,
+            "33": 10.46906,
+            "34": 10.22326,
+            "35": 10.27848,
+            "36": 10.22883,
+            "37": 10.35947,
+            "38": 10.19331,
+            "39": 10.41586,
+            "40": 10.09773,
+            "41": 10.15718,
+            "42": 10.22441,
+            "43": 9.83281,
+            "44": 9.96935,
+            "45": 9.84205,
+            "46": 9.83017,
+            "47": 10.15602,
+            "48": 9.85503,
+            "49": 9.54049,
+            "50": 9.91258
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1725.0,
+            "2": 1664.0,
+            "3": 1710.0,
+            "4": 1712.0,
+            "5": 1834.0,
+            "6": 1743.0,
+            "7": 1803.0,
+            "8": 1737.0,
+            "9": 1779.0,
+            "10": 1459.0,
+            "11": 1898.0,
+            "12": 1661.0,
+            "13": 1860.0,
+            "14": 1764.0,
+            "15": 1886.0,
+            "16": 1916.0,
+            "17": 1773.0,
+            "18": 1702.0,
+            "19": 1742.0,
+            "20": 1649.0,
+            "21": 1899.0,
+            "22": 1631.0,
+            "23": 1960.0,
+            "24": 1570.0,
+            "25": 1647.0,
+            "26": 1649.0,
+            "27": 1811.0,
+            "28": 1930.0,
+            "29": 1910.0,
+            "30": 1964.0,
+            "31": 1536.0,
+            "32": 1873.0,
+            "33": 2191.0,
+            "34": 1838.0,
+            "35": 2017.0,
+            "36": 1916.0,
+            "37": 2345.0,
+            "38": 2247.0,
+            "39": 2374.0,
+            "40": 2207.0,
+            "41": 2246.0,
+            "42": 2291.0,
+            "43": 2027.0,
+            "44": 2147.0,
+            "45": 2164.0,
+            "46": 2300.0,
+            "47": 2418.0,
+            "48": 2467.0,
+            "49": 2255.0,
+            "50": 2224.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 552054272.0,
+            "2": 552054272.0,
+            "3": 552054272.0,
+            "4": 552054272.0,
+            "5": 552054272.0,
+            "6": 552054272.0,
+            "7": 552054272.0,
+            "8": 552054272.0,
+            "9": 552054272.0,
+            "10": 552054272.0,
+            "11": 552054272.0,
+            "12": 552054272.0,
+            "13": 552054272.0,
+            "14": 552054272.0,
+            "15": 552054272.0,
+            "16": 552054272.0,
+            "17": 552054272.0,
+            "18": 552054272.0,
+            "19": 552054272.0,
+            "20": 552054272.0,
+            "21": 552054272.0,
+            "22": 552054272.0,
+            "23": 552054272.0,
+            "24": 552054272.0,
+            "25": 552054272.0,
+            "26": 552054272.0,
+            "27": 552054272.0,
+            "28": 552054272.0,
+            "29": 552054272.0,
+            "30": 552054272.0,
+            "31": 552054272.0,
+            "32": 552054272.0,
+            "33": 552054272.0,
+            "34": 552054272.0,
+            "35": 552054272.0,
+            "36": 552054272.0,
+            "37": 552054272.0,
+            "38": 552054272.0,
+            "39": 552054272.0,
+            "40": 552054272.0,
+            "41": 552054272.0,
+            "42": 552054272.0,
+            "43": 552054272.0,
+            "44": 552054272.0,
+            "45": 552054272.0,
+            "46": 552054272.0,
+            "47": 552054272.0,
+            "48": 552054272.0,
+            "49": 552054272.0,
+            "50": 552054272.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 3798206976.0,
+            "2": 3940899328.0,
+            "3": 3940899328.0,
+            "4": 3940899328.0,
+            "5": 3940899328.0,
+            "6": 3940899328.0,
+            "7": 3940899328.0,
+            "8": 3940899328.0,
+            "9": 3940899328.0,
+            "10": 3940899328.0,
+            "11": 3940899328.0,
+            "12": 3940899328.0,
+            "13": 3940899328.0,
+            "14": 3940899328.0,
+            "15": 3940899328.0,
+            "16": 3940899328.0,
+            "17": 3940899328.0,
+            "18": 3940899328.0,
+            "19": 3940899328.0,
+            "20": 3940899328.0,
+            "21": 3940899328.0,
+            "22": 3940899328.0,
+            "23": 3940899328.0,
+            "24": 3940899328.0,
+            "25": 3940899328.0,
+            "26": 3940899328.0,
+            "27": 3940899328.0,
+            "28": 3940899328.0,
+            "29": 3940899328.0,
+            "30": 3940899328.0,
+            "31": 3940899328.0,
+            "32": 3940899328.0,
+            "33": 3940899328.0,
+            "34": 3940899328.0,
+            "35": 3940899328.0,
+            "36": 3940899328.0,
+            "37": 3940899328.0,
+            "38": 3940899328.0,
+            "39": 3940899328.0,
+            "40": 3940899328.0,
+            "41": 3940899328.0,
+            "42": 3940899328.0,
+            "43": 3940899328.0,
+            "44": 3940899328.0,
+            "45": 3940899328.0,
+            "46": 3940899328.0,
+            "47": 3940899328.0,
+            "48": 3940899328.0,
+            "49": 3940899328.0,
+            "50": 3940899328.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 15.65845,
+            "2": 0.14332,
+            "3": 0.12833,
+            "4": 0.12525,
+            "5": 0.12451,
+            "6": 0.12488,
+            "7": 0.12455,
+            "8": 0.12623,
+            "9": 0.1249,
+            "10": 0.127,
+            "11": 0.29256,
+            "12": 0.12446,
+            "13": 0.12388,
+            "14": 0.12448,
+            "15": 0.12475,
+            "16": 0.12507,
+            "17": 0.12682,
+            "18": 0.12473,
+            "19": 0.12569,
+            "20": 0.12441,
+            "21": 0.28384,
+            "22": 0.12554,
+            "23": 0.12552,
+            "24": 0.12663,
+            "25": 0.12441,
+            "26": 0.12547,
+            "27": 0.12485,
+            "28": 0.12492,
+            "29": 0.12419,
+            "30": 0.12518,
+            "31": 0.28416,
+            "32": 0.12399,
+            "33": 0.12692,
+            "34": 0.12606,
+            "35": 0.12537,
+            "36": 0.12614,
+            "37": 0.12484,
+            "38": 0.12464,
+            "39": 0.12396,
+            "40": 0.1239,
+            "41": 0.28831,
+            "42": 0.12609,
+            "43": 0.12537,
+            "44": 0.12484,
+            "45": 0.12567,
+            "46": 0.12791,
+            "47": 0.12281,
+            "48": 0.124,
+            "49": 0.12486,
+            "50": 0.12585
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
new file mode 100644
index 00000000000..62be0bafcf5
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.82005,
+            "2": 10.81907,
+            "3": 10.81396,
+            "4": 10.78497,
+            "5": 10.85284,
+            "6": 10.87449,
+            "7": 10.83201,
+            "8": 10.83297,
+            "9": 10.83935,
+            "10": 10.78455,
+            "11": 10.87798,
+            "12": 10.86112,
+            "13": 10.86444,
+            "14": 10.87605,
+            "15": 10.7923,
+            "16": 10.7951,
+            "17": 10.76773,
+            "18": 10.81002,
+            "19": 10.79715,
+            "20": 10.69213,
+            "21": 10.68165,
+            "22": 10.52083,
+            "23": 10.70895,
+            "24": 10.57597,
+            "25": 10.5241,
+            "26": 10.59512,
+            "27": 10.58424,
+            "28": 10.56231,
+            "29": 10.57009,
+            "30": 10.34556,
+            "31": 10.10048,
+            "32": 10.45377,
+            "33": 10.44632,
+            "34": 10.20606,
+            "35": 10.26241,
+            "36": 10.21241,
+            "37": 10.32522,
+            "38": 10.16779,
+            "39": 10.38327,
+            "40": 10.07237,
+            "41": 10.13863,
+            "42": 10.19814,
+            "43": 9.81079,
+            "44": 9.93246,
+            "45": 9.811,
+            "46": 9.8088,
+            "47": 10.12607,
+            "48": 9.82111,
+            "49": 9.50627,
+            "50": 9.88419
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1559.0,
+            "2": 1591.0,
+            "3": 1727.0,
+            "4": 1835.0,
+            "5": 1840.0,
+            "6": 1719.0,
+            "7": 1740.0,
+            "8": 1591.0,
+            "9": 1839.0,
+            "10": 1380.0,
+            "11": 1856.0,
+            "12": 1693.0,
+            "13": 1906.0,
+            "14": 1757.0,
+            "15": 1850.0,
+            "16": 1754.0,
+            "17": 1768.0,
+            "18": 1671.0,
+            "19": 1715.0,
+            "20": 1699.0,
+            "21": 1891.0,
+            "22": 1794.0,
+            "23": 1970.0,
+            "24": 1751.0,
+            "25": 1614.0,
+            "26": 1805.0,
+            "27": 1821.0,
+            "28": 2042.0,
+            "29": 2014.0,
+            "30": 1905.0,
+            "31": 1658.0,
+            "32": 1848.0,
+            "33": 2113.0,
+            "34": 1678.0,
+            "35": 1933.0,
+            "36": 1922.0,
+            "37": 2309.0,
+            "38": 2120.0,
+            "39": 2469.0,
+            "40": 2169.0,
+            "41": 2241.0,
+            "42": 2276.0,
+            "43": 1937.0,
+            "44": 2090.0,
+            "45": 2101.0,
+            "46": 2282.0,
+            "47": 2493.0,
+            "48": 2309.0,
+            "49": 2250.0,
+            "50": 2421.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 522346496.0,
+            "2": 522346496.0,
+            "3": 522346496.0,
+            "4": 522346496.0,
+            "5": 522346496.0,
+            "6": 522346496.0,
+            "7": 522346496.0,
+            "8": 522346496.0,
+            "9": 522346496.0,
+            "10": 522346496.0,
+            "11": 522346496.0,
+            "12": 522346496.0,
+            "13": 522346496.0,
+            "14": 522346496.0,
+            "15": 522346496.0,
+            "16": 522346496.0,
+            "17": 522346496.0,
+            "18": 522346496.0,
+            "19": 522346496.0,
+            "20": 522346496.0,
+            "21": 522346496.0,
+            "22": 522346496.0,
+            "23": 522346496.0,
+            "24": 522346496.0,
+            "25": 522346496.0,
+            "26": 522346496.0,
+            "27": 522346496.0,
+            "28": 522346496.0,
+            "29": 522346496.0,
+            "30": 522346496.0,
+            "31": 522346496.0,
+            "32": 522346496.0,
+            "33": 522346496.0,
+            "34": 522346496.0,
+            "35": 522346496.0,
+            "36": 522346496.0,
+            "37": 522346496.0,
+            "38": 522346496.0,
+            "39": 522346496.0,
+            "40": 522346496.0,
+            "41": 522346496.0,
+            "42": 522346496.0,
+            "43": 522346496.0,
+            "44": 522346496.0,
+            "45": 522346496.0,
+            "46": 522346496.0,
+            "47": 522346496.0,
+            "48": 522346496.0,
+            "49": 522346496.0,
+            "50": 522346496.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 3769791488.0,
+            "2": 3912108032.0,
+            "3": 3912108032.0,
+            "4": 3912108032.0,
+            "5": 3912108032.0,
+            "6": 3912108032.0,
+            "7": 3912108032.0,
+            "8": 3912108032.0,
+            "9": 3912108032.0,
+            "10": 3912108032.0,
+            "11": 3912108032.0,
+            "12": 3912108032.0,
+            "13": 3912108032.0,
+            "14": 3912108032.0,
+            "15": 3912108032.0,
+            "16": 3912108032.0,
+            "17": 3912108032.0,
+            "18": 3912108032.0,
+            "19": 3912108032.0,
+            "20": 3912108032.0,
+            "21": 3912108032.0,
+            "22": 3912108032.0,
+            "23": 3912108032.0,
+            "24": 3912108032.0,
+            "25": 3912108032.0,
+            "26": 3912108032.0,
+            "27": 3912108032.0,
+            "28": 3912108032.0,
+            "29": 3912108032.0,
+            "30": 3912108032.0,
+            "31": 3912108032.0,
+            "32": 3912108032.0,
+            "33": 3912108032.0,
+            "34": 3912108032.0,
+            "35": 3912108032.0,
+            "36": 3912108032.0,
+            "37": 3912108032.0,
+            "38": 3912108032.0,
+            "39": 3912108032.0,
+            "40": 3912108032.0,
+            "41": 3912108032.0,
+            "42": 3912108032.0,
+            "43": 3912108032.0,
+            "44": 3912108032.0,
+            "45": 3912108032.0,
+            "46": 3912108032.0,
+            "47": 3912108032.0,
+            "48": 3912108032.0,
+            "49": 3912108032.0,
+            "50": 3912108032.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 22.86952,
+            "2": 0.20661,
+            "3": 0.18026,
+            "4": 0.17656,
+            "5": 0.17996,
+            "6": 0.17701,
+            "7": 0.17871,
+            "8": 0.17528,
+            "9": 0.17563,
+            "10": 0.17569,
+            "11": 0.74111,
+            "12": 0.17396,
+            "13": 0.17377,
+            "14": 0.1738,
+            "15": 0.17271,
+            "16": 0.17324,
+            "17": 0.17404,
+            "18": 0.17229,
+            "19": 0.17205,
+            "20": 0.17274,
+            "21": 0.30088,
+            "22": 0.17329,
+            "23": 0.17535,
+            "24": 0.17212,
+            "25": 0.17389,
+            "26": 0.19974,
+            "27": 0.19407,
+            "28": 0.17531,
+            "29": 0.17514,
+            "30": 0.17299,
+            "31": 0.30323,
+            "32": 0.17369,
+            "33": 0.17341,
+            "34": 0.1737,
+            "35": 0.17388,
+            "36": 0.17546,
+            "37": 0.17373,
+            "38": 0.17505,
+            "39": 0.17758,
+            "40": 0.17506,
+            "41": 0.3082,
+            "42": 0.17306,
+            "43": 0.17922,
+            "44": 0.17678,
+            "45": 0.17538,
+            "46": 0.17386,
+            "47": 0.17387,
+            "48": 0.17425,
+            "49": 0.1761,
+            "50": 0.17415
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
new file mode 100644
index 00000000000..f7a81a7b3e4
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.82005,
+            "2": 10.81907,
+            "3": 10.81396,
+            "4": 10.78497,
+            "5": 10.85284,
+            "6": 10.87449,
+            "7": 10.83201,
+            "8": 10.83297,
+            "9": 10.83935,
+            "10": 10.78455,
+            "11": 10.87798,
+            "12": 10.86112,
+            "13": 10.86444,
+            "14": 10.87605,
+            "15": 10.7923,
+            "16": 10.7951,
+            "17": 10.76773,
+            "18": 10.81002,
+            "19": 10.79715,
+            "20": 10.69213,
+            "21": 10.68165,
+            "22": 10.52083,
+            "23": 10.70895,
+            "24": 10.57597,
+            "25": 10.5241,
+            "26": 10.59512,
+            "27": 10.58424,
+            "28": 10.56231,
+            "29": 10.57009,
+            "30": 10.34556,
+            "31": 10.10048,
+            "32": 10.45377,
+            "33": 10.44632,
+            "34": 10.20606,
+            "35": 10.26241,
+            "36": 10.21241,
+            "37": 10.32522,
+            "38": 10.16779,
+            "39": 10.38327,
+            "40": 10.07237,
+            "41": 10.13863,
+            "42": 10.19814,
+            "43": 9.81079,
+            "44": 9.93246,
+            "45": 9.811,
+            "46": 9.8088,
+            "47": 10.12607,
+            "48": 9.82111,
+            "49": 9.50627,
+            "50": 9.88419
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1559.0,
+            "2": 1591.0,
+            "3": 1727.0,
+            "4": 1835.0,
+            "5": 1840.0,
+            "6": 1719.0,
+            "7": 1740.0,
+            "8": 1591.0,
+            "9": 1839.0,
+            "10": 1380.0,
+            "11": 1856.0,
+            "12": 1693.0,
+            "13": 1906.0,
+            "14": 1757.0,
+            "15": 1850.0,
+            "16": 1754.0,
+            "17": 1768.0,
+            "18": 1671.0,
+            "19": 1715.0,
+            "20": 1699.0,
+            "21": 1891.0,
+            "22": 1794.0,
+            "23": 1970.0,
+            "24": 1751.0,
+            "25": 1614.0,
+            "26": 1805.0,
+            "27": 1821.0,
+            "28": 2042.0,
+            "29": 2014.0,
+            "30": 1905.0,
+            "31": 1658.0,
+            "32": 1848.0,
+            "33": 2113.0,
+            "34": 1678.0,
+            "35": 1933.0,
+            "36": 1922.0,
+            "37": 2309.0,
+            "38": 2120.0,
+            "39": 2469.0,
+            "40": 2169.0,
+            "41": 2241.0,
+            "42": 2276.0,
+            "43": 1937.0,
+            "44": 2090.0,
+            "45": 2101.0,
+            "46": 2282.0,
+            "47": 2493.0,
+            "48": 2309.0,
+            "49": 2250.0,
+            "50": 2421.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 522346496.0,
+            "2": 522346496.0,
+            "3": 522346496.0,
+            "4": 522346496.0,
+            "5": 522346496.0,
+            "6": 522346496.0,
+            "7": 522346496.0,
+            "8": 522346496.0,
+            "9": 522346496.0,
+            "10": 522346496.0,
+            "11": 522346496.0,
+            "12": 522346496.0,
+            "13": 522346496.0,
+            "14": 522346496.0,
+            "15": 522346496.0,
+            "16": 522346496.0,
+            "17": 522346496.0,
+            "18": 522346496.0,
+            "19": 522346496.0,
+            "20": 522346496.0,
+            "21": 522346496.0,
+            "22": 522346496.0,
+            "23": 522346496.0,
+            "24": 522346496.0,
+            "25": 522346496.0,
+            "26": 522346496.0,
+            "27": 522346496.0,
+            "28": 522346496.0,
+            "29": 522346496.0,
+            "30": 522346496.0,
+            "31": 522346496.0,
+            "32": 522346496.0,
+            "33": 522346496.0,
+            "34": 522346496.0,
+            "35": 522346496.0,
+            "36": 522346496.0,
+            "37": 522346496.0,
+            "38": 522346496.0,
+            "39": 522346496.0,
+            "40": 522346496.0,
+            "41": 522346496.0,
+            "42": 522346496.0,
+            "43": 522346496.0,
+            "44": 522346496.0,
+            "45": 522346496.0,
+            "46": 522346496.0,
+            "47": 522346496.0,
+            "48": 522346496.0,
+            "49": 522346496.0,
+            "50": 522346496.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 3769791488.0,
+            "2": 3912108032.0,
+            "3": 3912108032.0,
+            "4": 3912108032.0,
+            "5": 3912108032.0,
+            "6": 3912108032.0,
+            "7": 3912108032.0,
+            "8": 3912108032.0,
+            "9": 3912108032.0,
+            "10": 3912108032.0,
+            "11": 3912108032.0,
+            "12": 3912108032.0,
+            "13": 3912108032.0,
+            "14": 3912108032.0,
+            "15": 3912108032.0,
+            "16": 3912108032.0,
+            "17": 3912108032.0,
+            "18": 3912108032.0,
+            "19": 3912108032.0,
+            "20": 3912108032.0,
+            "21": 3912108032.0,
+            "22": 3912108032.0,
+            "23": 3912108032.0,
+            "24": 3912108032.0,
+            "25": 3912108032.0,
+            "26": 3912108032.0,
+            "27": 3912108032.0,
+            "28": 3912108032.0,
+            "29": 3912108032.0,
+            "30": 3912108032.0,
+            "31": 3912108032.0,
+            "32": 3912108032.0,
+            "33": 3912108032.0,
+            "34": 3912108032.0,
+            "35": 3912108032.0,
+            "36": 3912108032.0,
+            "37": 3912108032.0,
+            "38": 3912108032.0,
+            "39": 3912108032.0,
+            "40": 3912108032.0,
+            "41": 3912108032.0,
+            "42": 3912108032.0,
+            "43": 3912108032.0,
+            "44": 3912108032.0,
+            "45": 3912108032.0,
+            "46": 3912108032.0,
+            "47": 3912108032.0,
+            "48": 3912108032.0,
+            "49": 3912108032.0,
+            "50": 3912108032.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 26.03973,
+            "2": 0.20991,
+            "3": 0.18001,
+            "4": 0.17535,
+            "5": 0.37487,
+            "6": 0.17569,
+            "7": 0.17538,
+            "8": 0.17644,
+            "9": 0.17601,
+            "10": 0.17454,
+            "11": 0.32086,
+            "12": 0.17452,
+            "13": 0.17725,
+            "14": 0.17806,
+            "15": 0.17968,
+            "16": 0.17731,
+            "17": 0.18214,
+            "18": 0.17979,
+            "19": 0.18197,
+            "20": 0.18282,
+            "21": 0.31872,
+            "22": 0.17621,
+            "23": 0.18154,
+            "24": 0.17536,
+            "25": 0.17248,
+            "26": 0.3922,
+            "27": 0.17401,
+            "28": 0.17258,
+            "29": 0.17486,
+            "30": 0.17468,
+            "31": 0.31294,
+            "32": 0.17218,
+            "33": 0.17311,
+            "34": 0.17553,
+            "35": 0.17239,
+            "36": 0.17742,
+            "37": 0.17354,
+            "38": 0.17694,
+            "39": 0.17551,
+            "40": 0.38673,
+            "41": 0.31702,
+            "42": 0.17359,
+            "43": 0.17781,
+            "44": 0.17499,
+            "45": 0.17326,
+            "46": 0.17496,
+            "47": 0.17486,
+            "48": 0.17727,
+            "49": 0.17954,
+            "50": 0.17661
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
index 3f5bf549afb..0c1982c8b78 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
@@ -1 +1,537 @@
-{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.84517, "5": 10.87422, "10": 10.82907, "15": 10.81973, "20": 10.72685, "25": 10.55128, "30": 10.36566, "35": 10.2744, "40": 10.0956, "45": 9.83425, "50": 9.90532, "55": 9.87297, "60": 9.48861, "65": 8.93435, "70": 9.72364, "75": 9.40392, "80": 9.38215, "85": 9.5893, "90": 9.78202, "95": 9.47913, "100": 9.34982}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1655.0, "5": 1897.0, "10": 1441.0, "15": 1918.0, "20": 1610.0, "25": 1597.0, "30": 1875.0, "35": 2045.0, "40": 2184.0, "45": 2077.0, "50": 2196.0, "55": 2351.0, "60": 2359.0, "65": 2577.0, "70": 3151.0, "75": 2425.0, "80": 3254.0, "85": 3492.0, "90": 3160.0, "95": 3247.0, "100": 3076.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 763220480.0, "5": 763220480.0, "10": 763220480.0, "15": 763220480.0, "20": 763220480.0, "25": 763220480.0, "30": 763220480.0, "35": 763220480.0, "40": 763220480.0, "45": 763220480.0, "50": 763220480.0, "55": 763220480.0, "60": 763220480.0, "65": 763220480.0, "70": 763220480.0, "75": 763220480.0, "80": 763220480.0, "85": 763220480.0, "90": 763220480.0, "95": 763220480.0, "100": 763220480.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 2359490560.0, "5": 2643299328.0, "10": 2643299328.0, "15": 2643299328.0, "20": 2643299328.0, "25": 2643299328.0, "30": 2643299328.0, "35": 2643299328.0, "40": 2643299328.0, "45": 2643299328.0, "50": 2643299328.0, "55": 2643299328.0, "60": 2643299328.0, "65": 2643299328.0, "70": 2643299328.0, "75": 2643299328.0, "80": 2643299328.0, "85": 2643299328.0, "90": 2643299328.0, "95": 2643299328.0, "100": 2643299328.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 12.65344, "5": 0.0984, "10": 0.10108, "15": 0.09929, "20": 0.10139, "25": 0.09855, "30": 0.10032, "35": 0.09726, "40": 0.09784, "45": 0.09917, "50": 0.09956, "55": 0.10014, "60": 0.10632, "65": 0.09944, "70": 0.09595, "75": 0.09574, "80": 0.09657, "85": 0.10004, "90": 0.0985, "95": 0.10078, "100": 0.09765}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.84517,
+            "2": 10.85349,
+            "3": 10.8539,
+            "4": 10.83826,
+            "5": 10.87422,
+            "6": 10.89306,
+            "7": 10.85452,
+            "8": 10.8626,
+            "9": 10.86463,
+            "10": 10.82907,
+            "11": 10.88787,
+            "12": 10.87098,
+            "13": 10.87914,
+            "14": 10.89069,
+            "15": 10.81973,
+            "16": 10.83156,
+            "17": 10.79863,
+            "18": 10.81648,
+            "19": 10.8189,
+            "20": 10.72685,
+            "21": 10.70581,
+            "22": 10.56347,
+            "23": 10.72794,
+            "24": 10.60761,
+            "25": 10.55128,
+            "26": 10.60749,
+            "27": 10.6277,
+            "28": 10.58262,
+            "29": 10.59959,
+            "30": 10.36566,
+            "31": 10.11988,
+            "32": 10.4755,
+            "33": 10.46637,
+            "34": 10.22009,
+            "35": 10.2744,
+            "36": 10.22594,
+            "37": 10.35729,
+            "38": 10.19156,
+            "39": 10.41342,
+            "40": 10.0956,
+            "41": 10.15511,
+            "42": 10.22085,
+            "43": 9.82797,
+            "44": 9.96276,
+            "45": 9.83425,
+            "46": 9.82209,
+            "47": 10.14765,
+            "48": 9.84681,
+            "49": 9.53377,
+            "50": 9.90532,
+            "51": 9.85116,
+            "52": 9.73516,
+            "53": 10.05863,
+            "54": 9.94369,
+            "55": 9.87297,
+            "56": 9.61703,
+            "57": 9.4675,
+            "58": 9.82223,
+            "59": 9.57338,
+            "60": 9.48861,
+            "61": 9.67921,
+            "62": 9.97513,
+            "63": 9.37045,
+            "64": 9.76643,
+            "65": 8.93435,
+            "66": 9.69463,
+            "67": 9.35357,
+            "68": 9.76826,
+            "69": 9.77682,
+            "70": 9.72364,
+            "71": 9.59895,
+            "72": 9.56454,
+            "73": 9.48327,
+            "74": 8.92062,
+            "75": 9.40392,
+            "76": 9.05301,
+            "77": 10.04175,
+            "78": 9.69879,
+            "79": 9.35128,
+            "80": 9.38215,
+            "81": 9.45866,
+            "82": 9.67518,
+            "83": 9.28411,
+            "84": 9.39313,
+            "85": 9.5893,
+            "86": 9.05182,
+            "87": 9.56419,
+            "88": 9.71756,
+            "89": 9.57129,
+            "90": 9.78202,
+            "91": 9.3061,
+            "92": 9.32048,
+            "93": 9.03942,
+            "94": 8.79522,
+            "95": 9.47913,
+            "96": 9.48454,
+            "97": 9.2699,
+            "98": 9.62563,
+            "99": 8.84255,
+            "100": 9.34982
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1655.0,
+            "2": 1697.0,
+            "3": 1773.0,
+            "4": 1782.0,
+            "5": 1897.0,
+            "6": 1802.0,
+            "7": 1874.0,
+            "8": 1653.0,
+            "9": 1814.0,
+            "10": 1441.0,
+            "11": 1909.0,
+            "12": 1645.0,
+            "13": 1931.0,
+            "14": 1678.0,
+            "15": 1918.0,
+            "16": 1961.0,
+            "17": 1711.0,
+            "18": 1658.0,
+            "19": 1791.0,
+            "20": 1610.0,
+            "21": 1815.0,
+            "22": 1677.0,
+            "23": 1952.0,
+            "24": 1612.0,
+            "25": 1597.0,
+            "26": 1657.0,
+            "27": 1850.0,
+            "28": 2013.0,
+            "29": 1966.0,
+            "30": 1875.0,
+            "31": 1585.0,
+            "32": 1941.0,
+            "33": 2085.0,
+            "34": 1837.0,
+            "35": 2045.0,
+            "36": 1898.0,
+            "37": 2333.0,
+            "38": 2247.0,
+            "39": 2266.0,
+            "40": 2184.0,
+            "41": 2209.0,
+            "42": 2164.0,
+            "43": 2076.0,
+            "44": 2169.0,
+            "45": 2077.0,
+            "46": 2325.0,
+            "47": 2505.0,
+            "48": 2442.0,
+            "49": 2205.0,
+            "50": 2196.0,
+            "51": 2500.0,
+            "52": 2572.0,
+            "53": 2905.0,
+            "54": 2794.0,
+            "55": 2351.0,
+            "56": 2606.0,
+            "57": 2388.0,
+            "58": 2864.0,
+            "59": 2726.0,
+            "60": 2359.0,
+            "61": 2915.0,
+            "62": 2610.0,
+            "63": 2397.0,
+            "64": 2886.0,
+            "65": 2577.0,
+            "66": 2913.0,
+            "67": 2715.0,
+            "68": 2646.0,
+            "69": 2805.0,
+            "70": 3151.0,
+            "71": 2917.0,
+            "72": 2403.0,
+            "73": 2948.0,
+            "74": 1994.0,
+            "75": 2425.0,
+            "76": 2898.0,
+            "77": 3085.0,
+            "78": 3228.0,
+            "79": 2981.0,
+            "80": 3254.0,
+            "81": 3499.0,
+            "82": 3121.0,
+            "83": 2711.0,
+            "84": 3105.0,
+            "85": 3492.0,
+            "86": 2693.0,
+            "87": 3602.0,
+            "88": 3052.0,
+            "89": 3230.0,
+            "90": 3160.0,
+            "91": 2647.0,
+            "92": 3160.0,
+            "93": 2650.0,
+            "94": 3430.0,
+            "95": 3247.0,
+            "96": 3353.0,
+            "97": 3064.0,
+            "98": 3486.0,
+            "99": 3190.0,
+            "100": 3076.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 759681536.0,
+            "2": 759681536.0,
+            "3": 759681536.0,
+            "4": 759681536.0,
+            "5": 759681536.0,
+            "6": 759681536.0,
+            "7": 759681536.0,
+            "8": 759681536.0,
+            "9": 759681536.0,
+            "10": 759681536.0,
+            "11": 759681536.0,
+            "12": 759681536.0,
+            "13": 759681536.0,
+            "14": 759681536.0,
+            "15": 759681536.0,
+            "16": 759681536.0,
+            "17": 759681536.0,
+            "18": 759681536.0,
+            "19": 759681536.0,
+            "20": 759681536.0,
+            "21": 759681536.0,
+            "22": 759681536.0,
+            "23": 759681536.0,
+            "24": 759681536.0,
+            "25": 759681536.0,
+            "26": 759681536.0,
+            "27": 759681536.0,
+            "28": 759681536.0,
+            "29": 759681536.0,
+            "30": 759681536.0,
+            "31": 759681536.0,
+            "32": 759681536.0,
+            "33": 759681536.0,
+            "34": 759681536.0,
+            "35": 759681536.0,
+            "36": 759681536.0,
+            "37": 759681536.0,
+            "38": 759681536.0,
+            "39": 759681536.0,
+            "40": 759681536.0,
+            "41": 759681536.0,
+            "42": 759681536.0,
+            "43": 759681536.0,
+            "44": 759681536.0,
+            "45": 759681536.0,
+            "46": 759681536.0,
+            "47": 759681536.0,
+            "48": 759681536.0,
+            "49": 759681536.0,
+            "50": 759681536.0,
+            "51": 759681536.0,
+            "52": 759681536.0,
+            "53": 759681536.0,
+            "54": 759681536.0,
+            "55": 759681536.0,
+            "56": 759681536.0,
+            "57": 759681536.0,
+            "58": 759681536.0,
+            "59": 759681536.0,
+            "60": 759681536.0,
+            "61": 759681536.0,
+            "62": 759681536.0,
+            "63": 759681536.0,
+            "64": 759681536.0,
+            "65": 759681536.0,
+            "66": 759681536.0,
+            "67": 759681536.0,
+            "68": 759681536.0,
+            "69": 759681536.0,
+            "70": 759681536.0,
+            "71": 759681536.0,
+            "72": 759681536.0,
+            "73": 759681536.0,
+            "74": 759681536.0,
+            "75": 759681536.0,
+            "76": 759681536.0,
+            "77": 759681536.0,
+            "78": 759681536.0,
+            "79": 759681536.0,
+            "80": 759681536.0,
+            "81": 759681536.0,
+            "82": 759681536.0,
+            "83": 759681536.0,
+            "84": 759681536.0,
+            "85": 759681536.0,
+            "86": 759681536.0,
+            "87": 759681536.0,
+            "88": 759681536.0,
+            "89": 759681536.0,
+            "90": 759681536.0,
+            "91": 759681536.0,
+            "92": 759681536.0,
+            "93": 759681536.0,
+            "94": 759681536.0,
+            "95": 759681536.0,
+            "96": 759681536.0,
+            "97": 759681536.0,
+            "98": 759681536.0,
+            "99": 759681536.0,
+            "100": 759681536.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2358048768.0,
+            "2": 2639760384.0,
+            "3": 2639760384.0,
+            "4": 2639760384.0,
+            "5": 2639760384.0,
+            "6": 2639760384.0,
+            "7": 2639760384.0,
+            "8": 2639760384.0,
+            "9": 2639760384.0,
+            "10": 2639760384.0,
+            "11": 2639760384.0,
+            "12": 2639760384.0,
+            "13": 2639760384.0,
+            "14": 2639760384.0,
+            "15": 2639760384.0,
+            "16": 2639760384.0,
+            "17": 2639760384.0,
+            "18": 2639760384.0,
+            "19": 2639760384.0,
+            "20": 2639760384.0,
+            "21": 2639760384.0,
+            "22": 2639760384.0,
+            "23": 2639760384.0,
+            "24": 2639760384.0,
+            "25": 2639760384.0,
+            "26": 2639760384.0,
+            "27": 2639760384.0,
+            "28": 2639760384.0,
+            "29": 2639760384.0,
+            "30": 2639760384.0,
+            "31": 2639760384.0,
+            "32": 2639760384.0,
+            "33": 2639760384.0,
+            "34": 2639760384.0,
+            "35": 2639760384.0,
+            "36": 2639760384.0,
+            "37": 2639760384.0,
+            "38": 2639760384.0,
+            "39": 2639760384.0,
+            "40": 2639760384.0,
+            "41": 2639760384.0,
+            "42": 2639760384.0,
+            "43": 2639760384.0,
+            "44": 2639760384.0,
+            "45": 2639760384.0,
+            "46": 2639760384.0,
+            "47": 2639760384.0,
+            "48": 2639760384.0,
+            "49": 2639760384.0,
+            "50": 2639760384.0,
+            "51": 2639760384.0,
+            "52": 2639760384.0,
+            "53": 2639760384.0,
+            "54": 2639760384.0,
+            "55": 2639760384.0,
+            "56": 2639760384.0,
+            "57": 2639760384.0,
+            "58": 2639760384.0,
+            "59": 2639760384.0,
+            "60": 2639760384.0,
+            "61": 2639760384.0,
+            "62": 2639760384.0,
+            "63": 2639760384.0,
+            "64": 2639760384.0,
+            "65": 2639760384.0,
+            "66": 2639760384.0,
+            "67": 2639760384.0,
+            "68": 2639760384.0,
+            "69": 2639760384.0,
+            "70": 2639760384.0,
+            "71": 2639760384.0,
+            "72": 2639760384.0,
+            "73": 2639760384.0,
+            "74": 2639760384.0,
+            "75": 2639760384.0,
+            "76": 2639760384.0,
+            "77": 2639760384.0,
+            "78": 2639760384.0,
+            "79": 2639760384.0,
+            "80": 2639760384.0,
+            "81": 2639760384.0,
+            "82": 2639760384.0,
+            "83": 2639760384.0,
+            "84": 2639760384.0,
+            "85": 2639760384.0,
+            "86": 2639760384.0,
+            "87": 2639760384.0,
+            "88": 2639760384.0,
+            "89": 2639760384.0,
+            "90": 2639760384.0,
+            "91": 2639760384.0,
+            "92": 2639760384.0,
+            "93": 2639760384.0,
+            "94": 2639760384.0,
+            "95": 2639760384.0,
+            "96": 2639760384.0,
+            "97": 2639760384.0,
+            "98": 2639760384.0,
+            "99": 2639760384.0,
+            "100": 2639760384.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 15.75462,
+            "2": 0.12782,
+            "3": 0.11297,
+            "4": 0.11221,
+            "5": 0.11226,
+            "6": 0.11209,
+            "7": 0.11157,
+            "8": 0.11109,
+            "9": 0.11159,
+            "10": 0.11411,
+            "11": 0.11336,
+            "12": 0.10975,
+            "13": 0.11129,
+            "14": 0.11016,
+            "15": 0.11082,
+            "16": 0.11173,
+            "17": 0.1107,
+            "18": 0.113,
+            "19": 0.11419,
+            "20": 0.11333,
+            "21": 0.11169,
+            "22": 0.11202,
+            "23": 0.11053,
+            "24": 0.1123,
+            "25": 0.11015,
+            "26": 0.11042,
+            "27": 0.11289,
+            "28": 0.11429,
+            "29": 0.11129,
+            "30": 0.11046,
+            "31": 0.11122,
+            "32": 0.1104,
+            "33": 0.11073,
+            "34": 0.11003,
+            "35": 0.1113,
+            "36": 0.11176,
+            "37": 0.11321,
+            "38": 0.10946,
+            "39": 0.10923,
+            "40": 0.10989,
+            "41": 0.11025,
+            "42": 0.11059,
+            "43": 0.11079,
+            "44": 0.11083,
+            "45": 0.1125,
+            "46": 0.11427,
+            "47": 0.10872,
+            "48": 0.11101,
+            "49": 0.10925,
+            "50": 0.10952,
+            "51": 0.11025,
+            "52": 0.11105,
+            "53": 0.11002,
+            "54": 0.10971,
+            "55": 0.11074,
+            "56": 0.11019,
+            "57": 0.11283,
+            "58": 0.11172,
+            "59": 0.1132,
+            "60": 0.11512,
+            "61": 0.11318,
+            "62": 0.11088,
+            "63": 0.11201,
+            "64": 0.10971,
+            "65": 0.11109,
+            "66": 0.11046,
+            "67": 0.1107,
+            "68": 0.11123,
+            "69": 0.1121,
+            "70": 0.11129,
+            "71": 0.1106,
+            "72": 0.11162,
+            "73": 0.11219,
+            "74": 0.11285,
+            "75": 0.11259,
+            "76": 0.11452,
+            "77": 0.11103,
+            "78": 0.11112,
+            "79": 0.11137,
+            "80": 0.11228,
+            "81": 0.11061,
+            "82": 0.11185,
+            "83": 0.111,
+            "84": 0.11067,
+            "85": 0.11266,
+            "86": 0.11269,
+            "87": 0.11295,
+            "88": 0.10971,
+            "89": 0.11137,
+            "90": 0.11022,
+            "91": 0.11153,
+            "92": 0.10828,
+            "93": 0.1125,
+            "94": 0.11279,
+            "95": 0.11157,
+            "96": 0.11174,
+            "97": 0.10966,
+            "98": 0.11031,
+            "99": 0.11036,
+            "100": 0.10984
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..73ffbc48219
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.84517,
+            "2": 10.85349,
+            "3": 10.8539,
+            "4": 10.83826,
+            "5": 10.87422,
+            "6": 10.89306,
+            "7": 10.85452,
+            "8": 10.8626,
+            "9": 10.86463,
+            "10": 10.82907,
+            "11": 10.88787,
+            "12": 10.87098,
+            "13": 10.87914,
+            "14": 10.89069,
+            "15": 10.81973,
+            "16": 10.83156,
+            "17": 10.79863,
+            "18": 10.81648,
+            "19": 10.8189,
+            "20": 10.72685,
+            "21": 10.70581,
+            "22": 10.56347,
+            "23": 10.72794,
+            "24": 10.60761,
+            "25": 10.55128,
+            "26": 10.60749,
+            "27": 10.6277,
+            "28": 10.58262,
+            "29": 10.59959,
+            "30": 10.36566,
+            "31": 10.11988,
+            "32": 10.4755,
+            "33": 10.46637,
+            "34": 10.22009,
+            "35": 10.2744,
+            "36": 10.22594,
+            "37": 10.35729,
+            "38": 10.19156,
+            "39": 10.41342,
+            "40": 10.0956,
+            "41": 10.15511,
+            "42": 10.22085,
+            "43": 9.82797,
+            "44": 9.96276,
+            "45": 9.83425,
+            "46": 9.82209,
+            "47": 10.14765,
+            "48": 9.84681,
+            "49": 9.53377,
+            "50": 9.90532,
+            "51": 9.85116,
+            "52": 9.73516,
+            "53": 10.05863,
+            "54": 9.94369,
+            "55": 9.87297,
+            "56": 9.61703,
+            "57": 9.4675,
+            "58": 9.82223,
+            "59": 9.57338,
+            "60": 9.48861,
+            "61": 9.67921,
+            "62": 9.97513,
+            "63": 9.37045,
+            "64": 9.76643,
+            "65": 8.93435,
+            "66": 9.69463,
+            "67": 9.35357,
+            "68": 9.76826,
+            "69": 9.77682,
+            "70": 9.72364,
+            "71": 9.59895,
+            "72": 9.56454,
+            "73": 9.48327,
+            "74": 8.92062,
+            "75": 9.40392,
+            "76": 9.05301,
+            "77": 10.04175,
+            "78": 9.69879,
+            "79": 9.35128,
+            "80": 9.38215,
+            "81": 9.45866,
+            "82": 9.67518,
+            "83": 9.28411,
+            "84": 9.39313,
+            "85": 9.5893,
+            "86": 9.05182,
+            "87": 9.56419,
+            "88": 9.71756,
+            "89": 9.57129,
+            "90": 9.78202,
+            "91": 9.3061,
+            "92": 9.32048,
+            "93": 9.03942,
+            "94": 8.79522,
+            "95": 9.47913,
+            "96": 9.48454,
+            "97": 9.2699,
+            "98": 9.62563,
+            "99": 8.84255,
+            "100": 9.34982
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1655.0,
+            "2": 1697.0,
+            "3": 1773.0,
+            "4": 1782.0,
+            "5": 1897.0,
+            "6": 1802.0,
+            "7": 1874.0,
+            "8": 1653.0,
+            "9": 1814.0,
+            "10": 1441.0,
+            "11": 1909.0,
+            "12": 1645.0,
+            "13": 1931.0,
+            "14": 1678.0,
+            "15": 1918.0,
+            "16": 1961.0,
+            "17": 1711.0,
+            "18": 1658.0,
+            "19": 1791.0,
+            "20": 1610.0,
+            "21": 1815.0,
+            "22": 1677.0,
+            "23": 1952.0,
+            "24": 1612.0,
+            "25": 1597.0,
+            "26": 1657.0,
+            "27": 1850.0,
+            "28": 2013.0,
+            "29": 1966.0,
+            "30": 1875.0,
+            "31": 1585.0,
+            "32": 1941.0,
+            "33": 2085.0,
+            "34": 1837.0,
+            "35": 2045.0,
+            "36": 1898.0,
+            "37": 2333.0,
+            "38": 2247.0,
+            "39": 2266.0,
+            "40": 2184.0,
+            "41": 2209.0,
+            "42": 2164.0,
+            "43": 2076.0,
+            "44": 2169.0,
+            "45": 2077.0,
+            "46": 2325.0,
+            "47": 2505.0,
+            "48": 2442.0,
+            "49": 2205.0,
+            "50": 2196.0,
+            "51": 2500.0,
+            "52": 2572.0,
+            "53": 2905.0,
+            "54": 2794.0,
+            "55": 2351.0,
+            "56": 2606.0,
+            "57": 2388.0,
+            "58": 2864.0,
+            "59": 2726.0,
+            "60": 2359.0,
+            "61": 2915.0,
+            "62": 2610.0,
+            "63": 2397.0,
+            "64": 2886.0,
+            "65": 2577.0,
+            "66": 2913.0,
+            "67": 2715.0,
+            "68": 2646.0,
+            "69": 2805.0,
+            "70": 3151.0,
+            "71": 2917.0,
+            "72": 2403.0,
+            "73": 2948.0,
+            "74": 1994.0,
+            "75": 2425.0,
+            "76": 2898.0,
+            "77": 3085.0,
+            "78": 3228.0,
+            "79": 2981.0,
+            "80": 3254.0,
+            "81": 3499.0,
+            "82": 3121.0,
+            "83": 2711.0,
+            "84": 3105.0,
+            "85": 3492.0,
+            "86": 2693.0,
+            "87": 3602.0,
+            "88": 3052.0,
+            "89": 3230.0,
+            "90": 3160.0,
+            "91": 2647.0,
+            "92": 3160.0,
+            "93": 2650.0,
+            "94": 3430.0,
+            "95": 3247.0,
+            "96": 3353.0,
+            "97": 3064.0,
+            "98": 3486.0,
+            "99": 3190.0,
+            "100": 3076.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 759681536.0,
+            "2": 759681536.0,
+            "3": 759681536.0,
+            "4": 759681536.0,
+            "5": 759681536.0,
+            "6": 759681536.0,
+            "7": 759681536.0,
+            "8": 759681536.0,
+            "9": 759681536.0,
+            "10": 759681536.0,
+            "11": 759681536.0,
+            "12": 759681536.0,
+            "13": 759681536.0,
+            "14": 759681536.0,
+            "15": 759681536.0,
+            "16": 759681536.0,
+            "17": 759681536.0,
+            "18": 759681536.0,
+            "19": 759681536.0,
+            "20": 759681536.0,
+            "21": 759681536.0,
+            "22": 759681536.0,
+            "23": 759681536.0,
+            "24": 759681536.0,
+            "25": 759681536.0,
+            "26": 759681536.0,
+            "27": 759681536.0,
+            "28": 759681536.0,
+            "29": 759681536.0,
+            "30": 759681536.0,
+            "31": 759681536.0,
+            "32": 759681536.0,
+            "33": 759681536.0,
+            "34": 759681536.0,
+            "35": 759681536.0,
+            "36": 759681536.0,
+            "37": 759681536.0,
+            "38": 759681536.0,
+            "39": 759681536.0,
+            "40": 759681536.0,
+            "41": 759681536.0,
+            "42": 759681536.0,
+            "43": 759681536.0,
+            "44": 759681536.0,
+            "45": 759681536.0,
+            "46": 759681536.0,
+            "47": 759681536.0,
+            "48": 759681536.0,
+            "49": 759681536.0,
+            "50": 759681536.0,
+            "51": 759681536.0,
+            "52": 759681536.0,
+            "53": 759681536.0,
+            "54": 759681536.0,
+            "55": 759681536.0,
+            "56": 759681536.0,
+            "57": 759681536.0,
+            "58": 759681536.0,
+            "59": 759681536.0,
+            "60": 759681536.0,
+            "61": 759681536.0,
+            "62": 759681536.0,
+            "63": 759681536.0,
+            "64": 759681536.0,
+            "65": 759681536.0,
+            "66": 759681536.0,
+            "67": 759681536.0,
+            "68": 759681536.0,
+            "69": 759681536.0,
+            "70": 759681536.0,
+            "71": 759681536.0,
+            "72": 759681536.0,
+            "73": 759681536.0,
+            "74": 759681536.0,
+            "75": 759681536.0,
+            "76": 759681536.0,
+            "77": 759681536.0,
+            "78": 759681536.0,
+            "79": 759681536.0,
+            "80": 759681536.0,
+            "81": 759681536.0,
+            "82": 759681536.0,
+            "83": 759681536.0,
+            "84": 759681536.0,
+            "85": 759681536.0,
+            "86": 759681536.0,
+            "87": 759681536.0,
+            "88": 759681536.0,
+            "89": 759681536.0,
+            "90": 759681536.0,
+            "91": 759681536.0,
+            "92": 759681536.0,
+            "93": 759681536.0,
+            "94": 759681536.0,
+            "95": 759681536.0,
+            "96": 759681536.0,
+            "97": 759681536.0,
+            "98": 759681536.0,
+            "99": 759681536.0,
+            "100": 759681536.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2358048768.0,
+            "2": 2639760384.0,
+            "3": 2639760384.0,
+            "4": 2639760384.0,
+            "5": 2639760384.0,
+            "6": 2639760384.0,
+            "7": 2639760384.0,
+            "8": 2639760384.0,
+            "9": 2639760384.0,
+            "10": 2639760384.0,
+            "11": 2639760384.0,
+            "12": 2639760384.0,
+            "13": 2639760384.0,
+            "14": 2639760384.0,
+            "15": 2639760384.0,
+            "16": 2639760384.0,
+            "17": 2639760384.0,
+            "18": 2639760384.0,
+            "19": 2639760384.0,
+            "20": 2639760384.0,
+            "21": 2639760384.0,
+            "22": 2639760384.0,
+            "23": 2639760384.0,
+            "24": 2639760384.0,
+            "25": 2639760384.0,
+            "26": 2639760384.0,
+            "27": 2639760384.0,
+            "28": 2639760384.0,
+            "29": 2639760384.0,
+            "30": 2639760384.0,
+            "31": 2639760384.0,
+            "32": 2639760384.0,
+            "33": 2639760384.0,
+            "34": 2639760384.0,
+            "35": 2639760384.0,
+            "36": 2639760384.0,
+            "37": 2639760384.0,
+            "38": 2639760384.0,
+            "39": 2639760384.0,
+            "40": 2639760384.0,
+            "41": 2639760384.0,
+            "42": 2639760384.0,
+            "43": 2639760384.0,
+            "44": 2639760384.0,
+            "45": 2639760384.0,
+            "46": 2639760384.0,
+            "47": 2639760384.0,
+            "48": 2639760384.0,
+            "49": 2639760384.0,
+            "50": 2639760384.0,
+            "51": 2639760384.0,
+            "52": 2639760384.0,
+            "53": 2639760384.0,
+            "54": 2639760384.0,
+            "55": 2639760384.0,
+            "56": 2639760384.0,
+            "57": 2639760384.0,
+            "58": 2639760384.0,
+            "59": 2639760384.0,
+            "60": 2639760384.0,
+            "61": 2639760384.0,
+            "62": 2639760384.0,
+            "63": 2639760384.0,
+            "64": 2639760384.0,
+            "65": 2639760384.0,
+            "66": 2639760384.0,
+            "67": 2639760384.0,
+            "68": 2639760384.0,
+            "69": 2639760384.0,
+            "70": 2639760384.0,
+            "71": 2639760384.0,
+            "72": 2639760384.0,
+            "73": 2639760384.0,
+            "74": 2639760384.0,
+            "75": 2639760384.0,
+            "76": 2639760384.0,
+            "77": 2639760384.0,
+            "78": 2639760384.0,
+            "79": 2639760384.0,
+            "80": 2639760384.0,
+            "81": 2639760384.0,
+            "82": 2639760384.0,
+            "83": 2639760384.0,
+            "84": 2639760384.0,
+            "85": 2639760384.0,
+            "86": 2639760384.0,
+            "87": 2639760384.0,
+            "88": 2639760384.0,
+            "89": 2639760384.0,
+            "90": 2639760384.0,
+            "91": 2639760384.0,
+            "92": 2639760384.0,
+            "93": 2639760384.0,
+            "94": 2639760384.0,
+            "95": 2639760384.0,
+            "96": 2639760384.0,
+            "97": 2639760384.0,
+            "98": 2639760384.0,
+            "99": 2639760384.0,
+            "100": 2639760384.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 14.25777,
+            "2": 0.13394,
+            "3": 0.09922,
+            "4": 0.09894,
+            "5": 0.09775,
+            "6": 0.09731,
+            "7": 0.09832,
+            "8": 0.09902,
+            "9": 0.0976,
+            "10": 0.09738,
+            "11": 0.09769,
+            "12": 0.09775,
+            "13": 0.0973,
+            "14": 0.09697,
+            "15": 0.09749,
+            "16": 0.09763,
+            "17": 0.09815,
+            "18": 0.09802,
+            "19": 0.09718,
+            "20": 0.09775,
+            "21": 0.09758,
+            "22": 0.09773,
+            "23": 0.09785,
+            "24": 0.09828,
+            "25": 0.09821,
+            "26": 0.09669,
+            "27": 0.09722,
+            "28": 0.09732,
+            "29": 0.09861,
+            "30": 0.09875,
+            "31": 0.09867,
+            "32": 0.09834,
+            "33": 0.0982,
+            "34": 0.09928,
+            "35": 0.09811,
+            "36": 0.09669,
+            "37": 0.09757,
+            "38": 0.09767,
+            "39": 0.09702,
+            "40": 0.09753,
+            "41": 0.09794,
+            "42": 0.09878,
+            "43": 0.09912,
+            "44": 0.09929,
+            "45": 0.09921,
+            "46": 0.09947,
+            "47": 0.10001,
+            "48": 0.09906,
+            "49": 0.09991,
+            "50": 0.0993,
+            "51": 0.10133,
+            "52": 0.09956,
+            "53": 0.09824,
+            "54": 0.09904,
+            "55": 0.09915,
+            "56": 0.09925,
+            "57": 0.09859,
+            "58": 0.09644,
+            "59": 0.09661,
+            "60": 0.09755,
+            "61": 0.09709,
+            "62": 0.09665,
+            "63": 0.09681,
+            "64": 0.09617,
+            "65": 0.09641,
+            "66": 0.09621,
+            "67": 0.09683,
+            "68": 0.09678,
+            "69": 0.09664,
+            "70": 0.09803,
+            "71": 0.09677,
+            "72": 0.09645,
+            "73": 0.09681,
+            "74": 0.09753,
+            "75": 0.09704,
+            "76": 0.09776,
+            "77": 0.09822,
+            "78": 0.09631,
+            "79": 0.09728,
+            "80": 0.09766,
+            "81": 0.09703,
+            "82": 0.0976,
+            "83": 0.09876,
+            "84": 0.09779,
+            "85": 0.0973,
+            "86": 0.09965,
+            "87": 0.09825,
+            "88": 0.09698,
+            "89": 0.09761,
+            "90": 0.09663,
+            "91": 0.09746,
+            "92": 0.09681,
+            "93": 0.09761,
+            "94": 0.09917,
+            "95": 0.09904,
+            "96": 0.09748,
+            "97": 0.09707,
+            "98": 0.09661,
+            "99": 0.09831,
+            "100": 0.09719
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..603dba4c2e5
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.84517,
+            "2": 10.85349,
+            "3": 10.8539,
+            "4": 10.83826,
+            "5": 10.87422,
+            "6": 10.89306,
+            "7": 10.85452,
+            "8": 10.8626,
+            "9": 10.86463,
+            "10": 10.82907,
+            "11": 10.88787,
+            "12": 10.87098,
+            "13": 10.87914,
+            "14": 10.89069,
+            "15": 10.81973,
+            "16": 10.83156,
+            "17": 10.79863,
+            "18": 10.81648,
+            "19": 10.8189,
+            "20": 10.72685,
+            "21": 10.70581,
+            "22": 10.56347,
+            "23": 10.72794,
+            "24": 10.60761,
+            "25": 10.55128,
+            "26": 10.60749,
+            "27": 10.6277,
+            "28": 10.58262,
+            "29": 10.59959,
+            "30": 10.36566,
+            "31": 10.11988,
+            "32": 10.4755,
+            "33": 10.46637,
+            "34": 10.22009,
+            "35": 10.2744,
+            "36": 10.22594,
+            "37": 10.35729,
+            "38": 10.19156,
+            "39": 10.41342,
+            "40": 10.0956,
+            "41": 10.15511,
+            "42": 10.22085,
+            "43": 9.82797,
+            "44": 9.96276,
+            "45": 9.83425,
+            "46": 9.82209,
+            "47": 10.14765,
+            "48": 9.84681,
+            "49": 9.53377,
+            "50": 9.90532,
+            "51": 9.85116,
+            "52": 9.73516,
+            "53": 10.05863,
+            "54": 9.94369,
+            "55": 9.87297,
+            "56": 9.61703,
+            "57": 9.4675,
+            "58": 9.82223,
+            "59": 9.57338,
+            "60": 9.48861,
+            "61": 9.67921,
+            "62": 9.97513,
+            "63": 9.37045,
+            "64": 9.76643,
+            "65": 8.93435,
+            "66": 9.69463,
+            "67": 9.35357,
+            "68": 9.76826,
+            "69": 9.77682,
+            "70": 9.72364,
+            "71": 9.59895,
+            "72": 9.56454,
+            "73": 9.48327,
+            "74": 8.92062,
+            "75": 9.40392,
+            "76": 9.05301,
+            "77": 10.04175,
+            "78": 9.69879,
+            "79": 9.35128,
+            "80": 9.38215,
+            "81": 9.45866,
+            "82": 9.67518,
+            "83": 9.28411,
+            "84": 9.39313,
+            "85": 9.5893,
+            "86": 9.05182,
+            "87": 9.56419,
+            "88": 9.71756,
+            "89": 9.57129,
+            "90": 9.78202,
+            "91": 9.3061,
+            "92": 9.32048,
+            "93": 9.03942,
+            "94": 8.79522,
+            "95": 9.47913,
+            "96": 9.48454,
+            "97": 9.2699,
+            "98": 9.62563,
+            "99": 8.84255,
+            "100": 9.34982
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1655.0,
+            "2": 1697.0,
+            "3": 1773.0,
+            "4": 1782.0,
+            "5": 1897.0,
+            "6": 1802.0,
+            "7": 1874.0,
+            "8": 1653.0,
+            "9": 1814.0,
+            "10": 1441.0,
+            "11": 1909.0,
+            "12": 1645.0,
+            "13": 1931.0,
+            "14": 1678.0,
+            "15": 1918.0,
+            "16": 1961.0,
+            "17": 1711.0,
+            "18": 1658.0,
+            "19": 1791.0,
+            "20": 1610.0,
+            "21": 1815.0,
+            "22": 1677.0,
+            "23": 1952.0,
+            "24": 1612.0,
+            "25": 1597.0,
+            "26": 1657.0,
+            "27": 1850.0,
+            "28": 2013.0,
+            "29": 1966.0,
+            "30": 1875.0,
+            "31": 1585.0,
+            "32": 1941.0,
+            "33": 2085.0,
+            "34": 1837.0,
+            "35": 2045.0,
+            "36": 1898.0,
+            "37": 2333.0,
+            "38": 2247.0,
+            "39": 2266.0,
+            "40": 2184.0,
+            "41": 2209.0,
+            "42": 2164.0,
+            "43": 2076.0,
+            "44": 2169.0,
+            "45": 2077.0,
+            "46": 2325.0,
+            "47": 2505.0,
+            "48": 2442.0,
+            "49": 2205.0,
+            "50": 2196.0,
+            "51": 2500.0,
+            "52": 2572.0,
+            "53": 2905.0,
+            "54": 2794.0,
+            "55": 2351.0,
+            "56": 2606.0,
+            "57": 2388.0,
+            "58": 2864.0,
+            "59": 2726.0,
+            "60": 2359.0,
+            "61": 2915.0,
+            "62": 2610.0,
+            "63": 2397.0,
+            "64": 2886.0,
+            "65": 2577.0,
+            "66": 2913.0,
+            "67": 2715.0,
+            "68": 2646.0,
+            "69": 2805.0,
+            "70": 3151.0,
+            "71": 2917.0,
+            "72": 2403.0,
+            "73": 2948.0,
+            "74": 1994.0,
+            "75": 2425.0,
+            "76": 2898.0,
+            "77": 3085.0,
+            "78": 3228.0,
+            "79": 2981.0,
+            "80": 3254.0,
+            "81": 3499.0,
+            "82": 3121.0,
+            "83": 2711.0,
+            "84": 3105.0,
+            "85": 3492.0,
+            "86": 2693.0,
+            "87": 3602.0,
+            "88": 3052.0,
+            "89": 3230.0,
+            "90": 3160.0,
+            "91": 2647.0,
+            "92": 3160.0,
+            "93": 2650.0,
+            "94": 3430.0,
+            "95": 3247.0,
+            "96": 3353.0,
+            "97": 3064.0,
+            "98": 3486.0,
+            "99": 3190.0,
+            "100": 3076.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 759681536.0,
+            "2": 759681536.0,
+            "3": 759681536.0,
+            "4": 759681536.0,
+            "5": 759681536.0,
+            "6": 759681536.0,
+            "7": 759681536.0,
+            "8": 759681536.0,
+            "9": 759681536.0,
+            "10": 759681536.0,
+            "11": 759681536.0,
+            "12": 759681536.0,
+            "13": 759681536.0,
+            "14": 759681536.0,
+            "15": 759681536.0,
+            "16": 759681536.0,
+            "17": 759681536.0,
+            "18": 759681536.0,
+            "19": 759681536.0,
+            "20": 759681536.0,
+            "21": 759681536.0,
+            "22": 759681536.0,
+            "23": 759681536.0,
+            "24": 759681536.0,
+            "25": 759681536.0,
+            "26": 759681536.0,
+            "27": 759681536.0,
+            "28": 759681536.0,
+            "29": 759681536.0,
+            "30": 759681536.0,
+            "31": 759681536.0,
+            "32": 759681536.0,
+            "33": 759681536.0,
+            "34": 759681536.0,
+            "35": 759681536.0,
+            "36": 759681536.0,
+            "37": 759681536.0,
+            "38": 759681536.0,
+            "39": 759681536.0,
+            "40": 759681536.0,
+            "41": 759681536.0,
+            "42": 759681536.0,
+            "43": 759681536.0,
+            "44": 759681536.0,
+            "45": 759681536.0,
+            "46": 759681536.0,
+            "47": 759681536.0,
+            "48": 759681536.0,
+            "49": 759681536.0,
+            "50": 759681536.0,
+            "51": 759681536.0,
+            "52": 759681536.0,
+            "53": 759681536.0,
+            "54": 759681536.0,
+            "55": 759681536.0,
+            "56": 759681536.0,
+            "57": 759681536.0,
+            "58": 759681536.0,
+            "59": 759681536.0,
+            "60": 759681536.0,
+            "61": 759681536.0,
+            "62": 759681536.0,
+            "63": 759681536.0,
+            "64": 759681536.0,
+            "65": 759681536.0,
+            "66": 759681536.0,
+            "67": 759681536.0,
+            "68": 759681536.0,
+            "69": 759681536.0,
+            "70": 759681536.0,
+            "71": 759681536.0,
+            "72": 759681536.0,
+            "73": 759681536.0,
+            "74": 759681536.0,
+            "75": 759681536.0,
+            "76": 759681536.0,
+            "77": 759681536.0,
+            "78": 759681536.0,
+            "79": 759681536.0,
+            "80": 759681536.0,
+            "81": 759681536.0,
+            "82": 759681536.0,
+            "83": 759681536.0,
+            "84": 759681536.0,
+            "85": 759681536.0,
+            "86": 759681536.0,
+            "87": 759681536.0,
+            "88": 759681536.0,
+            "89": 759681536.0,
+            "90": 759681536.0,
+            "91": 759681536.0,
+            "92": 759681536.0,
+            "93": 759681536.0,
+            "94": 759681536.0,
+            "95": 759681536.0,
+            "96": 759681536.0,
+            "97": 759681536.0,
+            "98": 759681536.0,
+            "99": 759681536.0,
+            "100": 759681536.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2358048768.0,
+            "2": 2639760384.0,
+            "3": 2639760384.0,
+            "4": 2639760384.0,
+            "5": 2639760384.0,
+            "6": 2639760384.0,
+            "7": 2639760384.0,
+            "8": 2639760384.0,
+            "9": 2639760384.0,
+            "10": 2639760384.0,
+            "11": 2639760384.0,
+            "12": 2639760384.0,
+            "13": 2639760384.0,
+            "14": 2639760384.0,
+            "15": 2639760384.0,
+            "16": 2639760384.0,
+            "17": 2639760384.0,
+            "18": 2639760384.0,
+            "19": 2639760384.0,
+            "20": 2639760384.0,
+            "21": 2639760384.0,
+            "22": 2639760384.0,
+            "23": 2639760384.0,
+            "24": 2639760384.0,
+            "25": 2639760384.0,
+            "26": 2639760384.0,
+            "27": 2639760384.0,
+            "28": 2639760384.0,
+            "29": 2639760384.0,
+            "30": 2639760384.0,
+            "31": 2639760384.0,
+            "32": 2639760384.0,
+            "33": 2639760384.0,
+            "34": 2639760384.0,
+            "35": 2639760384.0,
+            "36": 2639760384.0,
+            "37": 2639760384.0,
+            "38": 2639760384.0,
+            "39": 2639760384.0,
+            "40": 2639760384.0,
+            "41": 2639760384.0,
+            "42": 2639760384.0,
+            "43": 2639760384.0,
+            "44": 2639760384.0,
+            "45": 2639760384.0,
+            "46": 2639760384.0,
+            "47": 2639760384.0,
+            "48": 2639760384.0,
+            "49": 2639760384.0,
+            "50": 2639760384.0,
+            "51": 2639760384.0,
+            "52": 2639760384.0,
+            "53": 2639760384.0,
+            "54": 2639760384.0,
+            "55": 2639760384.0,
+            "56": 2639760384.0,
+            "57": 2639760384.0,
+            "58": 2639760384.0,
+            "59": 2639760384.0,
+            "60": 2639760384.0,
+            "61": 2639760384.0,
+            "62": 2639760384.0,
+            "63": 2639760384.0,
+            "64": 2639760384.0,
+            "65": 2639760384.0,
+            "66": 2639760384.0,
+            "67": 2639760384.0,
+            "68": 2639760384.0,
+            "69": 2639760384.0,
+            "70": 2639760384.0,
+            "71": 2639760384.0,
+            "72": 2639760384.0,
+            "73": 2639760384.0,
+            "74": 2639760384.0,
+            "75": 2639760384.0,
+            "76": 2639760384.0,
+            "77": 2639760384.0,
+            "78": 2639760384.0,
+            "79": 2639760384.0,
+            "80": 2639760384.0,
+            "81": 2639760384.0,
+            "82": 2639760384.0,
+            "83": 2639760384.0,
+            "84": 2639760384.0,
+            "85": 2639760384.0,
+            "86": 2639760384.0,
+            "87": 2639760384.0,
+            "88": 2639760384.0,
+            "89": 2639760384.0,
+            "90": 2639760384.0,
+            "91": 2639760384.0,
+            "92": 2639760384.0,
+            "93": 2639760384.0,
+            "94": 2639760384.0,
+            "95": 2639760384.0,
+            "96": 2639760384.0,
+            "97": 2639760384.0,
+            "98": 2639760384.0,
+            "99": 2639760384.0,
+            "100": 2639760384.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 16.0335,
+            "2": 0.14377,
+            "3": 0.129,
+            "4": 0.12162,
+            "5": 0.11612,
+            "6": 0.11324,
+            "7": 0.11415,
+            "8": 0.11274,
+            "9": 0.11392,
+            "10": 0.11729,
+            "11": 0.11228,
+            "12": 0.11141,
+            "13": 0.11245,
+            "14": 0.11042,
+            "15": 0.11174,
+            "16": 0.1114,
+            "17": 0.11204,
+            "18": 0.11241,
+            "19": 0.11298,
+            "20": 0.11272,
+            "21": 0.11169,
+            "22": 0.11228,
+            "23": 0.11255,
+            "24": 0.11124,
+            "25": 0.11188,
+            "26": 0.11351,
+            "27": 0.11159,
+            "28": 0.11318,
+            "29": 0.11016,
+            "30": 0.11051,
+            "31": 0.11184,
+            "32": 0.11116,
+            "33": 0.1106,
+            "34": 0.11105,
+            "35": 0.113,
+            "36": 0.11198,
+            "37": 0.1117,
+            "38": 0.11109,
+            "39": 0.1099,
+            "40": 0.11097,
+            "41": 0.11159,
+            "42": 0.11191,
+            "43": 0.11283,
+            "44": 0.11266,
+            "45": 0.111,
+            "46": 0.11347,
+            "47": 0.1099,
+            "48": 0.10973,
+            "49": 0.11225,
+            "50": 0.11231,
+            "51": 0.1122,
+            "52": 0.10985,
+            "53": 0.11147,
+            "54": 0.11064,
+            "55": 0.11101,
+            "56": 0.11356,
+            "57": 0.11368,
+            "58": 0.11185,
+            "59": 0.11193,
+            "60": 0.11205,
+            "61": 0.11176,
+            "62": 0.11293,
+            "63": 0.1127,
+            "64": 0.11343,
+            "65": 0.11282,
+            "66": 0.11245,
+            "67": 0.11385,
+            "68": 0.11071,
+            "69": 0.11079,
+            "70": 0.112,
+            "71": 0.1108,
+            "72": 0.11299,
+            "73": 0.11305,
+            "74": 0.11343,
+            "75": 0.11155,
+            "76": 0.11323,
+            "77": 0.11174,
+            "78": 0.11138,
+            "79": 0.11246,
+            "80": 0.11252,
+            "81": 0.11217,
+            "82": 0.11269,
+            "83": 0.11312,
+            "84": 0.11075,
+            "85": 0.11227,
+            "86": 0.11159,
+            "87": 0.11227,
+            "88": 0.11227,
+            "89": 0.11277,
+            "90": 0.11219,
+            "91": 0.11067,
+            "92": 0.10961,
+            "93": 0.10907,
+            "94": 0.11584,
+            "95": 0.1087,
+            "96": 0.11107,
+            "97": 0.11046,
+            "98": 0.10986,
+            "99": 0.11249,
+            "100": 0.1095
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
new file mode 100644
index 00000000000..cf2c7b97468
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.82005,
+            "2": 10.81907,
+            "3": 10.81397,
+            "4": 10.78498,
+            "5": 10.85285,
+            "6": 10.87448,
+            "7": 10.83201,
+            "8": 10.83296,
+            "9": 10.83936,
+            "10": 10.78449,
+            "11": 10.87794,
+            "12": 10.86113,
+            "13": 10.86438,
+            "14": 10.87595,
+            "15": 10.79226,
+            "16": 10.79507,
+            "17": 10.76764,
+            "18": 10.80977,
+            "19": 10.79693,
+            "20": 10.69196,
+            "21": 10.68154,
+            "22": 10.52072,
+            "23": 10.70881,
+            "24": 10.5753,
+            "25": 10.52318,
+            "26": 10.59411,
+            "27": 10.58357,
+            "28": 10.56188,
+            "29": 10.5696,
+            "30": 10.34505,
+            "31": 10.09986,
+            "32": 10.45209,
+            "33": 10.44378,
+            "34": 10.20285,
+            "35": 10.25888,
+            "36": 10.20951,
+            "37": 10.32305,
+            "38": 10.1656,
+            "39": 10.38115,
+            "40": 10.07032,
+            "41": 10.1364,
+            "42": 10.19467,
+            "43": 9.80541,
+            "44": 9.92556,
+            "45": 9.803,
+            "46": 9.80008,
+            "47": 10.11716,
+            "48": 9.81309,
+            "49": 9.49911,
+            "50": 9.87675,
+            "51": 9.82883,
+            "52": 9.71745,
+            "53": 10.03867,
+            "54": 9.92195,
+            "55": 9.85523,
+            "56": 9.5922,
+            "57": 9.44053,
+            "58": 9.79679,
+            "59": 9.5545,
+            "60": 9.46634,
+            "61": 9.66578,
+            "62": 9.95346,
+            "63": 9.33681,
+            "64": 9.74137,
+            "65": 8.91657,
+            "66": 9.66586,
+            "67": 9.34349,
+            "68": 9.75312,
+            "69": 9.75728,
+            "70": 9.69276,
+            "71": 9.58799,
+            "72": 9.55054,
+            "73": 9.46306,
+            "74": 8.90575,
+            "75": 9.37813,
+            "76": 9.04954,
+            "77": 10.02987,
+            "78": 9.69223,
+            "79": 9.33487,
+            "80": 9.368,
+            "81": 9.44383,
+            "82": 9.66162,
+            "83": 9.27183,
+            "84": 9.38074,
+            "85": 9.57598,
+            "86": 9.0429,
+            "87": 9.55787,
+            "88": 9.70459,
+            "89": 9.56609,
+            "90": 9.77247,
+            "91": 9.29341,
+            "92": 9.31916,
+            "93": 9.03465,
+            "94": 8.78492,
+            "95": 9.46912,
+            "96": 9.47453,
+            "97": 9.25689,
+            "98": 9.61859,
+            "99": 8.83266,
+            "100": 9.34574
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1559.0,
+            "2": 1591.0,
+            "3": 1686.0,
+            "4": 1707.0,
+            "5": 1915.0,
+            "6": 1734.0,
+            "7": 1735.0,
+            "8": 1584.0,
+            "9": 1810.0,
+            "10": 1361.0,
+            "11": 1884.0,
+            "12": 1714.0,
+            "13": 1923.0,
+            "14": 1736.0,
+            "15": 1831.0,
+            "16": 1684.0,
+            "17": 1787.0,
+            "18": 1707.0,
+            "19": 1680.0,
+            "20": 1695.0,
+            "21": 1815.0,
+            "22": 1711.0,
+            "23": 2079.0,
+            "24": 1677.0,
+            "25": 1650.0,
+            "26": 1714.0,
+            "27": 1813.0,
+            "28": 1998.0,
+            "29": 1931.0,
+            "30": 1861.0,
+            "31": 1573.0,
+            "32": 1934.0,
+            "33": 2063.0,
+            "34": 1891.0,
+            "35": 1916.0,
+            "36": 1939.0,
+            "37": 2299.0,
+            "38": 2235.0,
+            "39": 2352.0,
+            "40": 2109.0,
+            "41": 2286.0,
+            "42": 2232.0,
+            "43": 1919.0,
+            "44": 2032.0,
+            "45": 2098.0,
+            "46": 2287.0,
+            "47": 2513.0,
+            "48": 2360.0,
+            "49": 2126.0,
+            "50": 2424.0,
+            "51": 2433.0,
+            "52": 2566.0,
+            "53": 2902.0,
+            "54": 2589.0,
+            "55": 2309.0,
+            "56": 2761.0,
+            "57": 2265.0,
+            "58": 2876.0,
+            "59": 2821.0,
+            "60": 2432.0,
+            "61": 3073.0,
+            "62": 2638.0,
+            "63": 2426.0,
+            "64": 2913.0,
+            "65": 2660.0,
+            "66": 2985.0,
+            "67": 2723.0,
+            "68": 2790.0,
+            "69": 2997.0,
+            "70": 3132.0,
+            "71": 2837.0,
+            "72": 2291.0,
+            "73": 2780.0,
+            "74": 1936.0,
+            "75": 2555.0,
+            "76": 3028.0,
+            "77": 3175.0,
+            "78": 3109.0,
+            "79": 2994.0,
+            "80": 3370.0,
+            "81": 3552.0,
+            "82": 3308.0,
+            "83": 2898.0,
+            "84": 3285.0,
+            "85": 3434.0,
+            "86": 2573.0,
+            "87": 3858.0,
+            "88": 2920.0,
+            "89": 3217.0,
+            "90": 2868.0,
+            "91": 2784.0,
+            "92": 3011.0,
+            "93": 2700.0,
+            "94": 3372.0,
+            "95": 3273.0,
+            "96": 3557.0,
+            "97": 3145.0,
+            "98": 3635.0,
+            "99": 3308.0,
+            "100": 3359.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 730320896.0,
+            "2": 730320896.0,
+            "3": 730320896.0,
+            "4": 730320896.0,
+            "5": 730320896.0,
+            "6": 730320896.0,
+            "7": 730320896.0,
+            "8": 730320896.0,
+            "9": 730320896.0,
+            "10": 730320896.0,
+            "11": 730320896.0,
+            "12": 730320896.0,
+            "13": 730320896.0,
+            "14": 730320896.0,
+            "15": 730320896.0,
+            "16": 730320896.0,
+            "17": 730320896.0,
+            "18": 730320896.0,
+            "19": 730320896.0,
+            "20": 730320896.0,
+            "21": 730320896.0,
+            "22": 730320896.0,
+            "23": 730320896.0,
+            "24": 730320896.0,
+            "25": 730320896.0,
+            "26": 730320896.0,
+            "27": 730320896.0,
+            "28": 730320896.0,
+            "29": 730320896.0,
+            "30": 730320896.0,
+            "31": 730320896.0,
+            "32": 730320896.0,
+            "33": 730320896.0,
+            "34": 730320896.0,
+            "35": 730320896.0,
+            "36": 730320896.0,
+            "37": 730320896.0,
+            "38": 730320896.0,
+            "39": 730320896.0,
+            "40": 730320896.0,
+            "41": 730320896.0,
+            "42": 730320896.0,
+            "43": 730320896.0,
+            "44": 730320896.0,
+            "45": 730320896.0,
+            "46": 730320896.0,
+            "47": 730320896.0,
+            "48": 730320896.0,
+            "49": 730320896.0,
+            "50": 730320896.0,
+            "51": 730320896.0,
+            "52": 730320896.0,
+            "53": 730320896.0,
+            "54": 730320896.0,
+            "55": 730320896.0,
+            "56": 730320896.0,
+            "57": 730320896.0,
+            "58": 730320896.0,
+            "59": 730320896.0,
+            "60": 730320896.0,
+            "61": 730320896.0,
+            "62": 730320896.0,
+            "63": 730320896.0,
+            "64": 730320896.0,
+            "65": 730320896.0,
+            "66": 730320896.0,
+            "67": 730320896.0,
+            "68": 730320896.0,
+            "69": 730320896.0,
+            "70": 730320896.0,
+            "71": 730320896.0,
+            "72": 730320896.0,
+            "73": 730320896.0,
+            "74": 730320896.0,
+            "75": 730320896.0,
+            "76": 730320896.0,
+            "77": 730320896.0,
+            "78": 730320896.0,
+            "79": 730320896.0,
+            "80": 730320896.0,
+            "81": 730320896.0,
+            "82": 730320896.0,
+            "83": 730320896.0,
+            "84": 730320896.0,
+            "85": 730320896.0,
+            "86": 730320896.0,
+            "87": 730320896.0,
+            "88": 730320896.0,
+            "89": 730320896.0,
+            "90": 730320896.0,
+            "91": 730320896.0,
+            "92": 730320896.0,
+            "93": 730320896.0,
+            "94": 730320896.0,
+            "95": 730320896.0,
+            "96": 730320896.0,
+            "97": 730320896.0,
+            "98": 730320896.0,
+            "99": 730320896.0,
+            "100": 730320896.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 3837453312.0,
+            "2": 4119164928.0,
+            "3": 4119164928.0,
+            "4": 4119164928.0,
+            "5": 4119164928.0,
+            "6": 4119164928.0,
+            "7": 4119164928.0,
+            "8": 4119164928.0,
+            "9": 4119164928.0,
+            "10": 4119164928.0,
+            "11": 4119164928.0,
+            "12": 4119164928.0,
+            "13": 4119164928.0,
+            "14": 4119164928.0,
+            "15": 4119164928.0,
+            "16": 4119164928.0,
+            "17": 4119164928.0,
+            "18": 4119164928.0,
+            "19": 4119164928.0,
+            "20": 4119164928.0,
+            "21": 4119164928.0,
+            "22": 4119164928.0,
+            "23": 4119164928.0,
+            "24": 4119164928.0,
+            "25": 4119164928.0,
+            "26": 4119164928.0,
+            "27": 4119164928.0,
+            "28": 4119164928.0,
+            "29": 4119164928.0,
+            "30": 4119164928.0,
+            "31": 4119164928.0,
+            "32": 4119164928.0,
+            "33": 4119164928.0,
+            "34": 4119164928.0,
+            "35": 4119164928.0,
+            "36": 4119164928.0,
+            "37": 4119164928.0,
+            "38": 4119164928.0,
+            "39": 4119164928.0,
+            "40": 4119164928.0,
+            "41": 4119164928.0,
+            "42": 4119164928.0,
+            "43": 4119164928.0,
+            "44": 4119164928.0,
+            "45": 4119164928.0,
+            "46": 4119164928.0,
+            "47": 4119164928.0,
+            "48": 4119164928.0,
+            "49": 4119164928.0,
+            "50": 4119164928.0,
+            "51": 4119164928.0,
+            "52": 4119164928.0,
+            "53": 4119164928.0,
+            "54": 4119164928.0,
+            "55": 4119164928.0,
+            "56": 4119164928.0,
+            "57": 4119164928.0,
+            "58": 4119164928.0,
+            "59": 4119164928.0,
+            "60": 4119164928.0,
+            "61": 4119164928.0,
+            "62": 4119164928.0,
+            "63": 4119164928.0,
+            "64": 4119164928.0,
+            "65": 4119164928.0,
+            "66": 4119164928.0,
+            "67": 4119164928.0,
+            "68": 4119164928.0,
+            "69": 4119164928.0,
+            "70": 4119164928.0,
+            "71": 4119164928.0,
+            "72": 4119164928.0,
+            "73": 4119164928.0,
+            "74": 4119164928.0,
+            "75": 4119164928.0,
+            "76": 4119164928.0,
+            "77": 4119164928.0,
+            "78": 4119164928.0,
+            "79": 4119164928.0,
+            "80": 4119164928.0,
+            "81": 4119164928.0,
+            "82": 4119164928.0,
+            "83": 4119164928.0,
+            "84": 4119164928.0,
+            "85": 4119164928.0,
+            "86": 4119164928.0,
+            "87": 4119164928.0,
+            "88": 4119164928.0,
+            "89": 4119164928.0,
+            "90": 4119164928.0,
+            "91": 4119164928.0,
+            "92": 4119164928.0,
+            "93": 4119164928.0,
+            "94": 4119164928.0,
+            "95": 4119164928.0,
+            "96": 4119164928.0,
+            "97": 4119164928.0,
+            "98": 4119164928.0,
+            "99": 4119164928.0,
+            "100": 4119164928.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 20.0062,
+            "2": 0.22515,
+            "3": 0.1977,
+            "4": 0.18911,
+            "5": 0.18615,
+            "6": 0.17034,
+            "7": 0.16978,
+            "8": 0.172,
+            "9": 0.17258,
+            "10": 0.17365,
+            "11": 0.17197,
+            "12": 0.17127,
+            "13": 0.16991,
+            "14": 0.16997,
+            "15": 0.16994,
+            "16": 0.17143,
+            "17": 0.17095,
+            "18": 0.17098,
+            "19": 0.16956,
+            "20": 0.1705,
+            "21": 0.17016,
+            "22": 0.1709,
+            "23": 0.18003,
+            "24": 0.1728,
+            "25": 0.17179,
+            "26": 0.17099,
+            "27": 0.1721,
+            "28": 0.17027,
+            "29": 0.17076,
+            "30": 0.17085,
+            "31": 0.17145,
+            "32": 0.17023,
+            "33": 0.17166,
+            "34": 0.17042,
+            "35": 0.17306,
+            "36": 0.17083,
+            "37": 0.17109,
+            "38": 0.17096,
+            "39": 0.17162,
+            "40": 0.1709,
+            "41": 0.17007,
+            "42": 0.17021,
+            "43": 0.1703,
+            "44": 0.1709,
+            "45": 0.17091,
+            "46": 0.1708,
+            "47": 0.17037,
+            "48": 0.17053,
+            "49": 0.17145,
+            "50": 0.17057,
+            "51": 0.17728,
+            "52": 0.17072,
+            "53": 0.17004,
+            "54": 0.17259,
+            "55": 0.17417,
+            "56": 0.17223,
+            "57": 0.1731,
+            "58": 0.172,
+            "59": 0.17128,
+            "60": 0.17384,
+            "61": 0.17393,
+            "62": 0.17367,
+            "63": 0.17427,
+            "64": 0.17235,
+            "65": 0.17484,
+            "66": 0.1728,
+            "67": 0.17351,
+            "68": 0.17401,
+            "69": 0.17395,
+            "70": 0.1725,
+            "71": 0.17219,
+            "72": 0.17187,
+            "73": 0.17393,
+            "74": 0.17345,
+            "75": 0.17421,
+            "76": 0.17406,
+            "77": 0.17155,
+            "78": 0.1728,
+            "79": 0.17462,
+            "80": 0.17582,
+            "81": 0.17113,
+            "82": 0.17105,
+            "83": 0.17061,
+            "84": 0.17127,
+            "85": 0.17361,
+            "86": 0.17294,
+            "87": 0.17183,
+            "88": 0.17162,
+            "89": 0.17105,
+            "90": 0.17179,
+            "91": 0.17278,
+            "92": 0.17216,
+            "93": 0.17178,
+            "94": 0.17267,
+            "95": 0.1706,
+            "96": 0.17363,
+            "97": 0.17455,
+            "98": 0.17149,
+            "99": 0.17187,
+            "100": 0.1711
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
new file mode 100644
index 00000000000..f2fcc6e9139
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.82005,
+            "2": 10.81907,
+            "3": 10.81397,
+            "4": 10.78498,
+            "5": 10.85285,
+            "6": 10.87448,
+            "7": 10.83201,
+            "8": 10.83296,
+            "9": 10.83936,
+            "10": 10.78449,
+            "11": 10.87794,
+            "12": 10.86113,
+            "13": 10.86438,
+            "14": 10.87595,
+            "15": 10.79226,
+            "16": 10.79507,
+            "17": 10.76764,
+            "18": 10.80977,
+            "19": 10.79693,
+            "20": 10.69196,
+            "21": 10.68154,
+            "22": 10.52072,
+            "23": 10.70881,
+            "24": 10.5753,
+            "25": 10.52318,
+            "26": 10.59411,
+            "27": 10.58357,
+            "28": 10.56188,
+            "29": 10.5696,
+            "30": 10.34505,
+            "31": 10.09986,
+            "32": 10.45209,
+            "33": 10.44378,
+            "34": 10.20285,
+            "35": 10.25888,
+            "36": 10.20951,
+            "37": 10.32305,
+            "38": 10.1656,
+            "39": 10.38115,
+            "40": 10.07032,
+            "41": 10.1364,
+            "42": 10.19467,
+            "43": 9.80541,
+            "44": 9.92556,
+            "45": 9.803,
+            "46": 9.80008,
+            "47": 10.11716,
+            "48": 9.81309,
+            "49": 9.49911,
+            "50": 9.87675,
+            "51": 9.82883,
+            "52": 9.71745,
+            "53": 10.03867,
+            "54": 9.92195,
+            "55": 9.85523,
+            "56": 9.5922,
+            "57": 9.44053,
+            "58": 9.79679,
+            "59": 9.5545,
+            "60": 9.46634,
+            "61": 9.66578,
+            "62": 9.95346,
+            "63": 9.33681,
+            "64": 9.74137,
+            "65": 8.91657,
+            "66": 9.66586,
+            "67": 9.34349,
+            "68": 9.75312,
+            "69": 9.75728,
+            "70": 9.69276,
+            "71": 9.58799,
+            "72": 9.55054,
+            "73": 9.46306,
+            "74": 8.90575,
+            "75": 9.37813,
+            "76": 9.04954,
+            "77": 10.02987,
+            "78": 9.69223,
+            "79": 9.33487,
+            "80": 9.368,
+            "81": 9.44383,
+            "82": 9.66162,
+            "83": 9.27183,
+            "84": 9.38074,
+            "85": 9.57598,
+            "86": 9.0429,
+            "87": 9.55787,
+            "88": 9.70459,
+            "89": 9.56609,
+            "90": 9.77247,
+            "91": 9.29341,
+            "92": 9.31916,
+            "93": 9.03465,
+            "94": 8.78492,
+            "95": 9.46912,
+            "96": 9.47453,
+            "97": 9.25689,
+            "98": 9.61859,
+            "99": 8.83266,
+            "100": 9.34574
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1559.0,
+            "2": 1591.0,
+            "3": 1686.0,
+            "4": 1707.0,
+            "5": 1915.0,
+            "6": 1734.0,
+            "7": 1735.0,
+            "8": 1584.0,
+            "9": 1810.0,
+            "10": 1361.0,
+            "11": 1884.0,
+            "12": 1714.0,
+            "13": 1923.0,
+            "14": 1736.0,
+            "15": 1831.0,
+            "16": 1684.0,
+            "17": 1787.0,
+            "18": 1707.0,
+            "19": 1680.0,
+            "20": 1695.0,
+            "21": 1815.0,
+            "22": 1711.0,
+            "23": 2079.0,
+            "24": 1677.0,
+            "25": 1650.0,
+            "26": 1714.0,
+            "27": 1813.0,
+            "28": 1998.0,
+            "29": 1931.0,
+            "30": 1861.0,
+            "31": 1573.0,
+            "32": 1934.0,
+            "33": 2063.0,
+            "34": 1891.0,
+            "35": 1916.0,
+            "36": 1939.0,
+            "37": 2299.0,
+            "38": 2235.0,
+            "39": 2352.0,
+            "40": 2109.0,
+            "41": 2286.0,
+            "42": 2232.0,
+            "43": 1919.0,
+            "44": 2032.0,
+            "45": 2098.0,
+            "46": 2287.0,
+            "47": 2513.0,
+            "48": 2360.0,
+            "49": 2126.0,
+            "50": 2424.0,
+            "51": 2433.0,
+            "52": 2566.0,
+            "53": 2902.0,
+            "54": 2589.0,
+            "55": 2309.0,
+            "56": 2761.0,
+            "57": 2265.0,
+            "58": 2876.0,
+            "59": 2821.0,
+            "60": 2432.0,
+            "61": 3073.0,
+            "62": 2638.0,
+            "63": 2426.0,
+            "64": 2913.0,
+            "65": 2660.0,
+            "66": 2985.0,
+            "67": 2723.0,
+            "68": 2790.0,
+            "69": 2997.0,
+            "70": 3132.0,
+            "71": 2837.0,
+            "72": 2291.0,
+            "73": 2780.0,
+            "74": 1936.0,
+            "75": 2555.0,
+            "76": 3028.0,
+            "77": 3175.0,
+            "78": 3109.0,
+            "79": 2994.0,
+            "80": 3370.0,
+            "81": 3552.0,
+            "82": 3308.0,
+            "83": 2898.0,
+            "84": 3285.0,
+            "85": 3434.0,
+            "86": 2573.0,
+            "87": 3858.0,
+            "88": 2920.0,
+            "89": 3217.0,
+            "90": 2868.0,
+            "91": 2784.0,
+            "92": 3011.0,
+            "93": 2700.0,
+            "94": 3372.0,
+            "95": 3273.0,
+            "96": 3557.0,
+            "97": 3145.0,
+            "98": 3635.0,
+            "99": 3308.0,
+            "100": 3359.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 730320896.0,
+            "2": 730320896.0,
+            "3": 730320896.0,
+            "4": 730320896.0,
+            "5": 730320896.0,
+            "6": 730320896.0,
+            "7": 730320896.0,
+            "8": 730320896.0,
+            "9": 730320896.0,
+            "10": 730320896.0,
+            "11": 730320896.0,
+            "12": 730320896.0,
+            "13": 730320896.0,
+            "14": 730320896.0,
+            "15": 730320896.0,
+            "16": 730320896.0,
+            "17": 730320896.0,
+            "18": 730320896.0,
+            "19": 730320896.0,
+            "20": 730320896.0,
+            "21": 730320896.0,
+            "22": 730320896.0,
+            "23": 730320896.0,
+            "24": 730320896.0,
+            "25": 730320896.0,
+            "26": 730320896.0,
+            "27": 730320896.0,
+            "28": 730320896.0,
+            "29": 730320896.0,
+            "30": 730320896.0,
+            "31": 730320896.0,
+            "32": 730320896.0,
+            "33": 730320896.0,
+            "34": 730320896.0,
+            "35": 730320896.0,
+            "36": 730320896.0,
+            "37": 730320896.0,
+            "38": 730320896.0,
+            "39": 730320896.0,
+            "40": 730320896.0,
+            "41": 730320896.0,
+            "42": 730320896.0,
+            "43": 730320896.0,
+            "44": 730320896.0,
+            "45": 730320896.0,
+            "46": 730320896.0,
+            "47": 730320896.0,
+            "48": 730320896.0,
+            "49": 730320896.0,
+            "50": 730320896.0,
+            "51": 730320896.0,
+            "52": 730320896.0,
+            "53": 730320896.0,
+            "54": 730320896.0,
+            "55": 730320896.0,
+            "56": 730320896.0,
+            "57": 730320896.0,
+            "58": 730320896.0,
+            "59": 730320896.0,
+            "60": 730320896.0,
+            "61": 730320896.0,
+            "62": 730320896.0,
+            "63": 730320896.0,
+            "64": 730320896.0,
+            "65": 730320896.0,
+            "66": 730320896.0,
+            "67": 730320896.0,
+            "68": 730320896.0,
+            "69": 730320896.0,
+            "70": 730320896.0,
+            "71": 730320896.0,
+            "72": 730320896.0,
+            "73": 730320896.0,
+            "74": 730320896.0,
+            "75": 730320896.0,
+            "76": 730320896.0,
+            "77": 730320896.0,
+            "78": 730320896.0,
+            "79": 730320896.0,
+            "80": 730320896.0,
+            "81": 730320896.0,
+            "82": 730320896.0,
+            "83": 730320896.0,
+            "84": 730320896.0,
+            "85": 730320896.0,
+            "86": 730320896.0,
+            "87": 730320896.0,
+            "88": 730320896.0,
+            "89": 730320896.0,
+            "90": 730320896.0,
+            "91": 730320896.0,
+            "92": 730320896.0,
+            "93": 730320896.0,
+            "94": 730320896.0,
+            "95": 730320896.0,
+            "96": 730320896.0,
+            "97": 730320896.0,
+            "98": 730320896.0,
+            "99": 730320896.0,
+            "100": 730320896.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 3837453312.0,
+            "2": 4119164928.0,
+            "3": 4119164928.0,
+            "4": 4119164928.0,
+            "5": 4119164928.0,
+            "6": 4119164928.0,
+            "7": 4119164928.0,
+            "8": 4119164928.0,
+            "9": 4119164928.0,
+            "10": 4119164928.0,
+            "11": 4119164928.0,
+            "12": 4119164928.0,
+            "13": 4119164928.0,
+            "14": 4119164928.0,
+            "15": 4119164928.0,
+            "16": 4119164928.0,
+            "17": 4119164928.0,
+            "18": 4119164928.0,
+            "19": 4119164928.0,
+            "20": 4119164928.0,
+            "21": 4119164928.0,
+            "22": 4119164928.0,
+            "23": 4119164928.0,
+            "24": 4119164928.0,
+            "25": 4119164928.0,
+            "26": 4119164928.0,
+            "27": 4119164928.0,
+            "28": 4119164928.0,
+            "29": 4119164928.0,
+            "30": 4119164928.0,
+            "31": 4119164928.0,
+            "32": 4119164928.0,
+            "33": 4119164928.0,
+            "34": 4119164928.0,
+            "35": 4119164928.0,
+            "36": 4119164928.0,
+            "37": 4119164928.0,
+            "38": 4119164928.0,
+            "39": 4119164928.0,
+            "40": 4119164928.0,
+            "41": 4119164928.0,
+            "42": 4119164928.0,
+            "43": 4119164928.0,
+            "44": 4119164928.0,
+            "45": 4119164928.0,
+            "46": 4119164928.0,
+            "47": 4119164928.0,
+            "48": 4119164928.0,
+            "49": 4119164928.0,
+            "50": 4119164928.0,
+            "51": 4119164928.0,
+            "52": 4119164928.0,
+            "53": 4119164928.0,
+            "54": 4119164928.0,
+            "55": 4119164928.0,
+            "56": 4119164928.0,
+            "57": 4119164928.0,
+            "58": 4119164928.0,
+            "59": 4119164928.0,
+            "60": 4119164928.0,
+            "61": 4119164928.0,
+            "62": 4119164928.0,
+            "63": 4119164928.0,
+            "64": 4119164928.0,
+            "65": 4119164928.0,
+            "66": 4119164928.0,
+            "67": 4119164928.0,
+            "68": 4119164928.0,
+            "69": 4119164928.0,
+            "70": 4119164928.0,
+            "71": 4119164928.0,
+            "72": 4119164928.0,
+            "73": 4119164928.0,
+            "74": 4119164928.0,
+            "75": 4119164928.0,
+            "76": 4119164928.0,
+            "77": 4119164928.0,
+            "78": 4119164928.0,
+            "79": 4119164928.0,
+            "80": 4119164928.0,
+            "81": 4119164928.0,
+            "82": 4119164928.0,
+            "83": 4119164928.0,
+            "84": 4119164928.0,
+            "85": 4119164928.0,
+            "86": 4119164928.0,
+            "87": 4119164928.0,
+            "88": 4119164928.0,
+            "89": 4119164928.0,
+            "90": 4119164928.0,
+            "91": 4119164928.0,
+            "92": 4119164928.0,
+            "93": 4119164928.0,
+            "94": 4119164928.0,
+            "95": 4119164928.0,
+            "96": 4119164928.0,
+            "97": 4119164928.0,
+            "98": 4119164928.0,
+            "99": 4119164928.0,
+            "100": 4119164928.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 20.54847,
+            "2": 0.20654,
+            "3": 0.17899,
+            "4": 0.17609,
+            "5": 0.17607,
+            "6": 0.17545,
+            "7": 0.17582,
+            "8": 0.3981,
+            "9": 0.17427,
+            "10": 0.17111,
+            "11": 0.1706,
+            "12": 0.17427,
+            "13": 0.17652,
+            "14": 0.17107,
+            "15": 0.17191,
+            "16": 0.1696,
+            "17": 0.17104,
+            "18": 0.16925,
+            "19": 0.16894,
+            "20": 0.17181,
+            "21": 0.1703,
+            "22": 0.1722,
+            "23": 0.16959,
+            "24": 0.18369,
+            "25": 0.17058,
+            "26": 0.17105,
+            "27": 0.16942,
+            "28": 0.1691,
+            "29": 0.16894,
+            "30": 0.17,
+            "31": 0.17083,
+            "32": 0.17034,
+            "33": 0.16855,
+            "34": 0.16981,
+            "35": 0.1699,
+            "36": 0.16909,
+            "37": 0.16901,
+            "38": 0.16998,
+            "39": 0.16957,
+            "40": 0.17038,
+            "41": 0.16846,
+            "42": 0.16847,
+            "43": 0.16956,
+            "44": 0.16964,
+            "45": 0.16919,
+            "46": 0.16891,
+            "47": 0.16901,
+            "48": 0.16904,
+            "49": 0.16981,
+            "50": 0.17034,
+            "51": 0.17135,
+            "52": 0.16786,
+            "53": 0.1668,
+            "54": 0.1671,
+            "55": 0.16695,
+            "56": 0.16737,
+            "57": 0.1668,
+            "58": 0.16761,
+            "59": 0.16755,
+            "60": 0.16907,
+            "61": 0.16638,
+            "62": 0.16819,
+            "63": 0.16827,
+            "64": 0.17031,
+            "65": 0.167,
+            "66": 0.39277,
+            "67": 0.16989,
+            "68": 0.16709,
+            "69": 0.16761,
+            "70": 0.16602,
+            "71": 0.168,
+            "72": 0.16646,
+            "73": 0.16976,
+            "74": 0.16686,
+            "75": 0.16959,
+            "76": 0.16956,
+            "77": 0.1686,
+            "78": 0.16588,
+            "79": 0.16726,
+            "80": 0.16802,
+            "81": 0.16806,
+            "82": 0.1664,
+            "83": 0.16817,
+            "84": 0.16729,
+            "85": 0.1687,
+            "86": 0.16736,
+            "87": 0.1677,
+            "88": 0.16777,
+            "89": 0.16794,
+            "90": 0.16675,
+            "91": 0.1685,
+            "92": 0.1679,
+            "93": 0.16927,
+            "94": 0.16945,
+            "95": 0.171,
+            "96": 0.1671,
+            "97": 0.38537,
+            "98": 0.16869,
+            "99": 0.1704,
+            "100": 0.16709
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
index 4a4be7c6755..c681b5bd1b4 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
@@ -1 +1,537 @@
-{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.84517, "5": 10.87427, "10": 10.82907, "15": 10.81974, "20": 10.727, "25": 10.55217, "30": 10.36614, "35": 10.2778, "40": 10.0976, "45": 9.84196, "50": 9.9125, "55": 9.88096, "60": 9.50125, "65": 8.94761, "70": 9.7424, "75": 9.42532, "80": 9.40396, "85": 9.61405, "90": 9.81418, "95": 9.5173, "100": 9.39541}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1655.0, "5": 1803.0, "10": 1448.0, "15": 1879.0, "20": 1657.0, "25": 1625.0, "30": 1882.0, "35": 1954.0, "40": 2191.0, "45": 2091.0, "50": 2189.0, "55": 2325.0, "60": 2361.0, "65": 2673.0, "70": 3139.0, "75": 2519.0, "80": 3205.0, "85": 3209.0, "90": 3168.0, "95": 3261.0, "100": 3135.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 763220480.0, "5": 763220480.0, "10": 763220480.0, "15": 763220480.0, "20": 763220480.0, "25": 763220480.0, "30": 763220480.0, "35": 763220480.0, "40": 763220480.0, "45": 763220480.0, "50": 763220480.0, "55": 763220480.0, "60": 763220480.0, "65": 763220480.0, "70": 763220480.0, "75": 763220480.0, "80": 763220480.0, "85": 763220480.0, "90": 763220480.0, "95": 763220480.0, "100": 763220480.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 2359490560.0, "5": 2643299328.0, "10": 2643299328.0, "15": 2643299328.0, "20": 2643299328.0, "25": 2643299328.0, "30": 2643299328.0, "35": 2643299328.0, "40": 2643299328.0, "45": 2643299328.0, "50": 2643299328.0, "55": 2643299328.0, "60": 2643299328.0, "65": 2643299328.0, "70": 2643299328.0, "75": 2643299328.0, "80": 2643299328.0, "85": 2643299328.0, "90": 2643299328.0, "95": 2643299328.0, "100": 2643299328.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 16.90194, "5": 0.09713, "10": 0.1002, "15": 0.09686, "20": 0.0971, "25": 0.09785, "30": 0.10076, "35": 0.09808, "40": 0.10148, "45": 0.10005, "50": 0.09728, "55": 0.09621, "60": 0.09718, "65": 0.10047, "70": 0.09897, "75": 0.10302, "80": 0.10138, "85": 0.10032, "90": 0.097, "95": 0.09743, "100": 0.09586}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.84517,
+            "2": 10.85349,
+            "3": 10.8539,
+            "4": 10.83825,
+            "5": 10.87427,
+            "6": 10.89307,
+            "7": 10.85454,
+            "8": 10.8626,
+            "9": 10.86468,
+            "10": 10.82907,
+            "11": 10.88789,
+            "12": 10.87095,
+            "13": 10.87916,
+            "14": 10.89079,
+            "15": 10.81974,
+            "16": 10.83162,
+            "17": 10.79863,
+            "18": 10.81667,
+            "19": 10.81919,
+            "20": 10.727,
+            "21": 10.70594,
+            "22": 10.56364,
+            "23": 10.72802,
+            "24": 10.60832,
+            "25": 10.55217,
+            "26": 10.60845,
+            "27": 10.62847,
+            "28": 10.5831,
+            "29": 10.60012,
+            "30": 10.36614,
+            "31": 10.12044,
+            "32": 10.47684,
+            "33": 10.46873,
+            "34": 10.22319,
+            "35": 10.2778,
+            "36": 10.22892,
+            "37": 10.35949,
+            "38": 10.19371,
+            "39": 10.4155,
+            "40": 10.0976,
+            "41": 10.15737,
+            "42": 10.22396,
+            "43": 9.83286,
+            "44": 9.96916,
+            "45": 9.84196,
+            "46": 9.83045,
+            "47": 10.15628,
+            "48": 9.85484,
+            "49": 9.54086,
+            "50": 9.9125,
+            "51": 9.8587,
+            "52": 9.74287,
+            "53": 10.06647,
+            "54": 9.95168,
+            "55": 9.88096,
+            "56": 9.62625,
+            "57": 9.47766,
+            "58": 9.8335,
+            "59": 9.58522,
+            "60": 9.50125,
+            "61": 9.69186,
+            "62": 9.98858,
+            "63": 9.38478,
+            "64": 9.78027,
+            "65": 8.94761,
+            "66": 9.70857,
+            "67": 9.36847,
+            "68": 9.78438,
+            "69": 9.79407,
+            "70": 9.7424,
+            "71": 9.61808,
+            "72": 9.58427,
+            "73": 9.50347,
+            "74": 8.9422,
+            "75": 9.42532,
+            "76": 9.07407,
+            "77": 10.06351,
+            "78": 9.7208,
+            "79": 9.37296,
+            "80": 9.40396,
+            "81": 9.48168,
+            "82": 9.69778,
+            "83": 9.30711,
+            "84": 9.41712,
+            "85": 9.61405,
+            "86": 9.07618,
+            "87": 9.59088,
+            "88": 9.7464,
+            "89": 9.59987,
+            "90": 9.81418,
+            "91": 9.33775,
+            "92": 9.35372,
+            "93": 9.07397,
+            "94": 8.8317,
+            "95": 9.5173,
+            "96": 9.52412,
+            "97": 9.30995,
+            "98": 9.66807,
+            "99": 8.8859,
+            "100": 9.39541
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1655.0,
+            "2": 1697.0,
+            "3": 1724.0,
+            "4": 1720.0,
+            "5": 1803.0,
+            "6": 1772.0,
+            "7": 1811.0,
+            "8": 1678.0,
+            "9": 1828.0,
+            "10": 1448.0,
+            "11": 1890.0,
+            "12": 1657.0,
+            "13": 1852.0,
+            "14": 1717.0,
+            "15": 1879.0,
+            "16": 1921.0,
+            "17": 1666.0,
+            "18": 1729.0,
+            "19": 1767.0,
+            "20": 1657.0,
+            "21": 1827.0,
+            "22": 1594.0,
+            "23": 1918.0,
+            "24": 1622.0,
+            "25": 1625.0,
+            "26": 1649.0,
+            "27": 1788.0,
+            "28": 2030.0,
+            "29": 1980.0,
+            "30": 1882.0,
+            "31": 1564.0,
+            "32": 1918.0,
+            "33": 2045.0,
+            "34": 1884.0,
+            "35": 1954.0,
+            "36": 1910.0,
+            "37": 2267.0,
+            "38": 2195.0,
+            "39": 2346.0,
+            "40": 2191.0,
+            "41": 2171.0,
+            "42": 2246.0,
+            "43": 1997.0,
+            "44": 2156.0,
+            "45": 2091.0,
+            "46": 2439.0,
+            "47": 2539.0,
+            "48": 2418.0,
+            "49": 2207.0,
+            "50": 2189.0,
+            "51": 2608.0,
+            "52": 2444.0,
+            "53": 2898.0,
+            "54": 2664.0,
+            "55": 2325.0,
+            "56": 2614.0,
+            "57": 2394.0,
+            "58": 2812.0,
+            "59": 2771.0,
+            "60": 2361.0,
+            "61": 2855.0,
+            "62": 2675.0,
+            "63": 2393.0,
+            "64": 3014.0,
+            "65": 2673.0,
+            "66": 3051.0,
+            "67": 2657.0,
+            "68": 2662.0,
+            "69": 2736.0,
+            "70": 3139.0,
+            "71": 2943.0,
+            "72": 2293.0,
+            "73": 2908.0,
+            "74": 1887.0,
+            "75": 2519.0,
+            "76": 3060.0,
+            "77": 3191.0,
+            "78": 3211.0,
+            "79": 3081.0,
+            "80": 3205.0,
+            "81": 3563.0,
+            "82": 3201.0,
+            "83": 2614.0,
+            "84": 3162.0,
+            "85": 3209.0,
+            "86": 2660.0,
+            "87": 3729.0,
+            "88": 3002.0,
+            "89": 3160.0,
+            "90": 3168.0,
+            "91": 2753.0,
+            "92": 3258.0,
+            "93": 2617.0,
+            "94": 3341.0,
+            "95": 3261.0,
+            "96": 3370.0,
+            "97": 3163.0,
+            "98": 3566.0,
+            "99": 3179.0,
+            "100": 3135.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 763220480.0,
+            "2": 763220480.0,
+            "3": 763220480.0,
+            "4": 763220480.0,
+            "5": 763220480.0,
+            "6": 763220480.0,
+            "7": 763220480.0,
+            "8": 763220480.0,
+            "9": 763220480.0,
+            "10": 763220480.0,
+            "11": 763220480.0,
+            "12": 763220480.0,
+            "13": 763220480.0,
+            "14": 763220480.0,
+            "15": 763220480.0,
+            "16": 763220480.0,
+            "17": 763220480.0,
+            "18": 763220480.0,
+            "19": 763220480.0,
+            "20": 763220480.0,
+            "21": 763220480.0,
+            "22": 763220480.0,
+            "23": 763220480.0,
+            "24": 763220480.0,
+            "25": 763220480.0,
+            "26": 763220480.0,
+            "27": 763220480.0,
+            "28": 763220480.0,
+            "29": 763220480.0,
+            "30": 763220480.0,
+            "31": 763220480.0,
+            "32": 763220480.0,
+            "33": 763220480.0,
+            "34": 763220480.0,
+            "35": 763220480.0,
+            "36": 763220480.0,
+            "37": 763220480.0,
+            "38": 763220480.0,
+            "39": 763220480.0,
+            "40": 763220480.0,
+            "41": 763220480.0,
+            "42": 763220480.0,
+            "43": 763220480.0,
+            "44": 763220480.0,
+            "45": 763220480.0,
+            "46": 763220480.0,
+            "47": 763220480.0,
+            "48": 763220480.0,
+            "49": 763220480.0,
+            "50": 763220480.0,
+            "51": 763220480.0,
+            "52": 763220480.0,
+            "53": 763220480.0,
+            "54": 763220480.0,
+            "55": 763220480.0,
+            "56": 763220480.0,
+            "57": 763220480.0,
+            "58": 763220480.0,
+            "59": 763220480.0,
+            "60": 763220480.0,
+            "61": 763220480.0,
+            "62": 763220480.0,
+            "63": 763220480.0,
+            "64": 763220480.0,
+            "65": 763220480.0,
+            "66": 763220480.0,
+            "67": 763220480.0,
+            "68": 763220480.0,
+            "69": 763220480.0,
+            "70": 763220480.0,
+            "71": 763220480.0,
+            "72": 763220480.0,
+            "73": 763220480.0,
+            "74": 763220480.0,
+            "75": 763220480.0,
+            "76": 763220480.0,
+            "77": 763220480.0,
+            "78": 763220480.0,
+            "79": 763220480.0,
+            "80": 763220480.0,
+            "81": 763220480.0,
+            "82": 763220480.0,
+            "83": 763220480.0,
+            "84": 763220480.0,
+            "85": 763220480.0,
+            "86": 763220480.0,
+            "87": 763220480.0,
+            "88": 763220480.0,
+            "89": 763220480.0,
+            "90": 763220480.0,
+            "91": 763220480.0,
+            "92": 763220480.0,
+            "93": 763220480.0,
+            "94": 763220480.0,
+            "95": 763220480.0,
+            "96": 763220480.0,
+            "97": 763220480.0,
+            "98": 763220480.0,
+            "99": 763220480.0,
+            "100": 763220480.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2359490560.0,
+            "2": 2643299328.0,
+            "3": 2643299328.0,
+            "4": 2643299328.0,
+            "5": 2643299328.0,
+            "6": 2643299328.0,
+            "7": 2643299328.0,
+            "8": 2643299328.0,
+            "9": 2643299328.0,
+            "10": 2643299328.0,
+            "11": 2643299328.0,
+            "12": 2643299328.0,
+            "13": 2643299328.0,
+            "14": 2643299328.0,
+            "15": 2643299328.0,
+            "16": 2643299328.0,
+            "17": 2643299328.0,
+            "18": 2643299328.0,
+            "19": 2643299328.0,
+            "20": 2643299328.0,
+            "21": 2643299328.0,
+            "22": 2643299328.0,
+            "23": 2643299328.0,
+            "24": 2643299328.0,
+            "25": 2643299328.0,
+            "26": 2643299328.0,
+            "27": 2643299328.0,
+            "28": 2643299328.0,
+            "29": 2643299328.0,
+            "30": 2643299328.0,
+            "31": 2643299328.0,
+            "32": 2643299328.0,
+            "33": 2643299328.0,
+            "34": 2643299328.0,
+            "35": 2643299328.0,
+            "36": 2643299328.0,
+            "37": 2643299328.0,
+            "38": 2643299328.0,
+            "39": 2643299328.0,
+            "40": 2643299328.0,
+            "41": 2643299328.0,
+            "42": 2643299328.0,
+            "43": 2643299328.0,
+            "44": 2643299328.0,
+            "45": 2643299328.0,
+            "46": 2643299328.0,
+            "47": 2643299328.0,
+            "48": 2643299328.0,
+            "49": 2643299328.0,
+            "50": 2643299328.0,
+            "51": 2643299328.0,
+            "52": 2643299328.0,
+            "53": 2643299328.0,
+            "54": 2643299328.0,
+            "55": 2643299328.0,
+            "56": 2643299328.0,
+            "57": 2643299328.0,
+            "58": 2643299328.0,
+            "59": 2643299328.0,
+            "60": 2643299328.0,
+            "61": 2643299328.0,
+            "62": 2643299328.0,
+            "63": 2643299328.0,
+            "64": 2643299328.0,
+            "65": 2643299328.0,
+            "66": 2643299328.0,
+            "67": 2643299328.0,
+            "68": 2643299328.0,
+            "69": 2643299328.0,
+            "70": 2643299328.0,
+            "71": 2643299328.0,
+            "72": 2643299328.0,
+            "73": 2643299328.0,
+            "74": 2643299328.0,
+            "75": 2643299328.0,
+            "76": 2643299328.0,
+            "77": 2643299328.0,
+            "78": 2643299328.0,
+            "79": 2643299328.0,
+            "80": 2643299328.0,
+            "81": 2643299328.0,
+            "82": 2643299328.0,
+            "83": 2643299328.0,
+            "84": 2643299328.0,
+            "85": 2643299328.0,
+            "86": 2643299328.0,
+            "87": 2643299328.0,
+            "88": 2643299328.0,
+            "89": 2643299328.0,
+            "90": 2643299328.0,
+            "91": 2643299328.0,
+            "92": 2643299328.0,
+            "93": 2643299328.0,
+            "94": 2643299328.0,
+            "95": 2643299328.0,
+            "96": 2643299328.0,
+            "97": 2643299328.0,
+            "98": 2643299328.0,
+            "99": 2643299328.0,
+            "100": 2643299328.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 17.55882,
+            "2": 0.13655,
+            "3": 0.11858,
+            "4": 0.11941,
+            "5": 0.11739,
+            "6": 0.11681,
+            "7": 0.11862,
+            "8": 0.11921,
+            "9": 0.11665,
+            "10": 0.11215,
+            "11": 0.11312,
+            "12": 0.1133,
+            "13": 0.11518,
+            "14": 0.11608,
+            "15": 0.11464,
+            "16": 0.11376,
+            "17": 0.11276,
+            "18": 0.11015,
+            "19": 0.11044,
+            "20": 0.11079,
+            "21": 0.11474,
+            "22": 0.11541,
+            "23": 0.11297,
+            "24": 0.11166,
+            "25": 0.11284,
+            "26": 0.11199,
+            "27": 0.11465,
+            "28": 0.11372,
+            "29": 0.10904,
+            "30": 0.10993,
+            "31": 0.1098,
+            "32": 0.10938,
+            "33": 0.10814,
+            "34": 0.11037,
+            "35": 0.11052,
+            "36": 0.1106,
+            "37": 0.11033,
+            "38": 0.10993,
+            "39": 0.11259,
+            "40": 0.11019,
+            "41": 0.11104,
+            "42": 0.10843,
+            "43": 0.10994,
+            "44": 0.10984,
+            "45": 0.11066,
+            "46": 0.11026,
+            "47": 0.11119,
+            "48": 0.11328,
+            "49": 0.11122,
+            "50": 0.11048,
+            "51": 0.11634,
+            "52": 0.10989,
+            "53": 0.10877,
+            "54": 0.10843,
+            "55": 0.1103,
+            "56": 0.11044,
+            "57": 0.11032,
+            "58": 0.10904,
+            "59": 0.1093,
+            "60": 0.10814,
+            "61": 0.10768,
+            "62": 0.10827,
+            "63": 0.11047,
+            "64": 0.10921,
+            "65": 0.11011,
+            "66": 0.11245,
+            "67": 0.10798,
+            "68": 0.11072,
+            "69": 0.10966,
+            "70": 0.10787,
+            "71": 0.10889,
+            "72": 0.10915,
+            "73": 0.10943,
+            "74": 0.11136,
+            "75": 0.11012,
+            "76": 0.11056,
+            "77": 0.1092,
+            "78": 0.11055,
+            "79": 0.11067,
+            "80": 0.11178,
+            "81": 0.11295,
+            "82": 0.11012,
+            "83": 0.11251,
+            "84": 0.11453,
+            "85": 0.11392,
+            "86": 0.1136,
+            "87": 0.10936,
+            "88": 0.10748,
+            "89": 0.109,
+            "90": 0.10971,
+            "91": 0.10877,
+            "92": 0.1101,
+            "93": 0.11367,
+            "94": 0.11157,
+            "95": 0.11149,
+            "96": 0.10884,
+            "97": 0.10884,
+            "98": 0.10766,
+            "99": 0.10924,
+            "100": 0.10913
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..14b95ca2ef5
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.84517,
+            "2": 10.85349,
+            "3": 10.8539,
+            "4": 10.83825,
+            "5": 10.87427,
+            "6": 10.89307,
+            "7": 10.85454,
+            "8": 10.8626,
+            "9": 10.86468,
+            "10": 10.82907,
+            "11": 10.88789,
+            "12": 10.87095,
+            "13": 10.87916,
+            "14": 10.89079,
+            "15": 10.81974,
+            "16": 10.83162,
+            "17": 10.79863,
+            "18": 10.81667,
+            "19": 10.81919,
+            "20": 10.727,
+            "21": 10.70594,
+            "22": 10.56364,
+            "23": 10.72802,
+            "24": 10.60832,
+            "25": 10.55217,
+            "26": 10.60845,
+            "27": 10.62847,
+            "28": 10.5831,
+            "29": 10.60012,
+            "30": 10.36614,
+            "31": 10.12044,
+            "32": 10.47684,
+            "33": 10.46873,
+            "34": 10.22319,
+            "35": 10.2778,
+            "36": 10.22892,
+            "37": 10.35949,
+            "38": 10.19371,
+            "39": 10.4155,
+            "40": 10.0976,
+            "41": 10.15737,
+            "42": 10.22396,
+            "43": 9.83286,
+            "44": 9.96916,
+            "45": 9.84196,
+            "46": 9.83045,
+            "47": 10.15628,
+            "48": 9.85484,
+            "49": 9.54086,
+            "50": 9.9125,
+            "51": 9.8587,
+            "52": 9.74287,
+            "53": 10.06647,
+            "54": 9.95168,
+            "55": 9.88096,
+            "56": 9.62625,
+            "57": 9.47766,
+            "58": 9.8335,
+            "59": 9.58522,
+            "60": 9.50125,
+            "61": 9.69186,
+            "62": 9.98858,
+            "63": 9.38478,
+            "64": 9.78027,
+            "65": 8.94761,
+            "66": 9.70857,
+            "67": 9.36847,
+            "68": 9.78438,
+            "69": 9.79407,
+            "70": 9.7424,
+            "71": 9.61808,
+            "72": 9.58427,
+            "73": 9.50347,
+            "74": 8.9422,
+            "75": 9.42532,
+            "76": 9.07407,
+            "77": 10.06351,
+            "78": 9.7208,
+            "79": 9.37296,
+            "80": 9.40396,
+            "81": 9.48168,
+            "82": 9.69778,
+            "83": 9.30711,
+            "84": 9.41712,
+            "85": 9.61405,
+            "86": 9.07618,
+            "87": 9.59088,
+            "88": 9.7464,
+            "89": 9.59987,
+            "90": 9.81418,
+            "91": 9.33775,
+            "92": 9.35372,
+            "93": 9.07397,
+            "94": 8.8317,
+            "95": 9.5173,
+            "96": 9.52412,
+            "97": 9.30995,
+            "98": 9.66807,
+            "99": 8.8859,
+            "100": 9.39541
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1655.0,
+            "2": 1697.0,
+            "3": 1724.0,
+            "4": 1720.0,
+            "5": 1803.0,
+            "6": 1772.0,
+            "7": 1811.0,
+            "8": 1678.0,
+            "9": 1828.0,
+            "10": 1448.0,
+            "11": 1890.0,
+            "12": 1657.0,
+            "13": 1852.0,
+            "14": 1717.0,
+            "15": 1879.0,
+            "16": 1921.0,
+            "17": 1666.0,
+            "18": 1729.0,
+            "19": 1767.0,
+            "20": 1657.0,
+            "21": 1827.0,
+            "22": 1594.0,
+            "23": 1918.0,
+            "24": 1622.0,
+            "25": 1625.0,
+            "26": 1649.0,
+            "27": 1788.0,
+            "28": 2030.0,
+            "29": 1980.0,
+            "30": 1882.0,
+            "31": 1564.0,
+            "32": 1918.0,
+            "33": 2045.0,
+            "34": 1884.0,
+            "35": 1954.0,
+            "36": 1910.0,
+            "37": 2267.0,
+            "38": 2195.0,
+            "39": 2346.0,
+            "40": 2191.0,
+            "41": 2171.0,
+            "42": 2246.0,
+            "43": 1997.0,
+            "44": 2156.0,
+            "45": 2091.0,
+            "46": 2439.0,
+            "47": 2539.0,
+            "48": 2418.0,
+            "49": 2207.0,
+            "50": 2189.0,
+            "51": 2608.0,
+            "52": 2444.0,
+            "53": 2898.0,
+            "54": 2664.0,
+            "55": 2325.0,
+            "56": 2614.0,
+            "57": 2394.0,
+            "58": 2812.0,
+            "59": 2771.0,
+            "60": 2361.0,
+            "61": 2855.0,
+            "62": 2675.0,
+            "63": 2393.0,
+            "64": 3014.0,
+            "65": 2673.0,
+            "66": 3051.0,
+            "67": 2657.0,
+            "68": 2662.0,
+            "69": 2736.0,
+            "70": 3139.0,
+            "71": 2943.0,
+            "72": 2293.0,
+            "73": 2908.0,
+            "74": 1887.0,
+            "75": 2519.0,
+            "76": 3060.0,
+            "77": 3191.0,
+            "78": 3211.0,
+            "79": 3081.0,
+            "80": 3205.0,
+            "81": 3563.0,
+            "82": 3201.0,
+            "83": 2614.0,
+            "84": 3162.0,
+            "85": 3209.0,
+            "86": 2660.0,
+            "87": 3729.0,
+            "88": 3002.0,
+            "89": 3160.0,
+            "90": 3168.0,
+            "91": 2753.0,
+            "92": 3258.0,
+            "93": 2617.0,
+            "94": 3341.0,
+            "95": 3261.0,
+            "96": 3370.0,
+            "97": 3163.0,
+            "98": 3566.0,
+            "99": 3179.0,
+            "100": 3135.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 763220480.0,
+            "2": 763220480.0,
+            "3": 763220480.0,
+            "4": 763220480.0,
+            "5": 763220480.0,
+            "6": 763220480.0,
+            "7": 763220480.0,
+            "8": 763220480.0,
+            "9": 763220480.0,
+            "10": 763220480.0,
+            "11": 763220480.0,
+            "12": 763220480.0,
+            "13": 763220480.0,
+            "14": 763220480.0,
+            "15": 763220480.0,
+            "16": 763220480.0,
+            "17": 763220480.0,
+            "18": 763220480.0,
+            "19": 763220480.0,
+            "20": 763220480.0,
+            "21": 763220480.0,
+            "22": 763220480.0,
+            "23": 763220480.0,
+            "24": 763220480.0,
+            "25": 763220480.0,
+            "26": 763220480.0,
+            "27": 763220480.0,
+            "28": 763220480.0,
+            "29": 763220480.0,
+            "30": 763220480.0,
+            "31": 763220480.0,
+            "32": 763220480.0,
+            "33": 763220480.0,
+            "34": 763220480.0,
+            "35": 763220480.0,
+            "36": 763220480.0,
+            "37": 763220480.0,
+            "38": 763220480.0,
+            "39": 763220480.0,
+            "40": 763220480.0,
+            "41": 763220480.0,
+            "42": 763220480.0,
+            "43": 763220480.0,
+            "44": 763220480.0,
+            "45": 763220480.0,
+            "46": 763220480.0,
+            "47": 763220480.0,
+            "48": 763220480.0,
+            "49": 763220480.0,
+            "50": 763220480.0,
+            "51": 763220480.0,
+            "52": 763220480.0,
+            "53": 763220480.0,
+            "54": 763220480.0,
+            "55": 763220480.0,
+            "56": 763220480.0,
+            "57": 763220480.0,
+            "58": 763220480.0,
+            "59": 763220480.0,
+            "60": 763220480.0,
+            "61": 763220480.0,
+            "62": 763220480.0,
+            "63": 763220480.0,
+            "64": 763220480.0,
+            "65": 763220480.0,
+            "66": 763220480.0,
+            "67": 763220480.0,
+            "68": 763220480.0,
+            "69": 763220480.0,
+            "70": 763220480.0,
+            "71": 763220480.0,
+            "72": 763220480.0,
+            "73": 763220480.0,
+            "74": 763220480.0,
+            "75": 763220480.0,
+            "76": 763220480.0,
+            "77": 763220480.0,
+            "78": 763220480.0,
+            "79": 763220480.0,
+            "80": 763220480.0,
+            "81": 763220480.0,
+            "82": 763220480.0,
+            "83": 763220480.0,
+            "84": 763220480.0,
+            "85": 763220480.0,
+            "86": 763220480.0,
+            "87": 763220480.0,
+            "88": 763220480.0,
+            "89": 763220480.0,
+            "90": 763220480.0,
+            "91": 763220480.0,
+            "92": 763220480.0,
+            "93": 763220480.0,
+            "94": 763220480.0,
+            "95": 763220480.0,
+            "96": 763220480.0,
+            "97": 763220480.0,
+            "98": 763220480.0,
+            "99": 763220480.0,
+            "100": 763220480.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2359490560.0,
+            "2": 2643299328.0,
+            "3": 2643299328.0,
+            "4": 2643299328.0,
+            "5": 2643299328.0,
+            "6": 2643299328.0,
+            "7": 2643299328.0,
+            "8": 2643299328.0,
+            "9": 2643299328.0,
+            "10": 2643299328.0,
+            "11": 2643299328.0,
+            "12": 2643299328.0,
+            "13": 2643299328.0,
+            "14": 2643299328.0,
+            "15": 2643299328.0,
+            "16": 2643299328.0,
+            "17": 2643299328.0,
+            "18": 2643299328.0,
+            "19": 2643299328.0,
+            "20": 2643299328.0,
+            "21": 2643299328.0,
+            "22": 2643299328.0,
+            "23": 2643299328.0,
+            "24": 2643299328.0,
+            "25": 2643299328.0,
+            "26": 2643299328.0,
+            "27": 2643299328.0,
+            "28": 2643299328.0,
+            "29": 2643299328.0,
+            "30": 2643299328.0,
+            "31": 2643299328.0,
+            "32": 2643299328.0,
+            "33": 2643299328.0,
+            "34": 2643299328.0,
+            "35": 2643299328.0,
+            "36": 2643299328.0,
+            "37": 2643299328.0,
+            "38": 2643299328.0,
+            "39": 2643299328.0,
+            "40": 2643299328.0,
+            "41": 2643299328.0,
+            "42": 2643299328.0,
+            "43": 2643299328.0,
+            "44": 2643299328.0,
+            "45": 2643299328.0,
+            "46": 2643299328.0,
+            "47": 2643299328.0,
+            "48": 2643299328.0,
+            "49": 2643299328.0,
+            "50": 2643299328.0,
+            "51": 2643299328.0,
+            "52": 2643299328.0,
+            "53": 2643299328.0,
+            "54": 2643299328.0,
+            "55": 2643299328.0,
+            "56": 2643299328.0,
+            "57": 2643299328.0,
+            "58": 2643299328.0,
+            "59": 2643299328.0,
+            "60": 2643299328.0,
+            "61": 2643299328.0,
+            "62": 2643299328.0,
+            "63": 2643299328.0,
+            "64": 2643299328.0,
+            "65": 2643299328.0,
+            "66": 2643299328.0,
+            "67": 2643299328.0,
+            "68": 2643299328.0,
+            "69": 2643299328.0,
+            "70": 2643299328.0,
+            "71": 2643299328.0,
+            "72": 2643299328.0,
+            "73": 2643299328.0,
+            "74": 2643299328.0,
+            "75": 2643299328.0,
+            "76": 2643299328.0,
+            "77": 2643299328.0,
+            "78": 2643299328.0,
+            "79": 2643299328.0,
+            "80": 2643299328.0,
+            "81": 2643299328.0,
+            "82": 2643299328.0,
+            "83": 2643299328.0,
+            "84": 2643299328.0,
+            "85": 2643299328.0,
+            "86": 2643299328.0,
+            "87": 2643299328.0,
+            "88": 2643299328.0,
+            "89": 2643299328.0,
+            "90": 2643299328.0,
+            "91": 2643299328.0,
+            "92": 2643299328.0,
+            "93": 2643299328.0,
+            "94": 2643299328.0,
+            "95": 2643299328.0,
+            "96": 2643299328.0,
+            "97": 2643299328.0,
+            "98": 2643299328.0,
+            "99": 2643299328.0,
+            "100": 2643299328.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 16.57994,
+            "2": 0.13128,
+            "3": 0.10309,
+            "4": 0.10229,
+            "5": 0.10072,
+            "6": 0.09862,
+            "7": 0.10136,
+            "8": 0.10155,
+            "9": 0.10115,
+            "10": 0.09973,
+            "11": 0.10272,
+            "12": 0.10529,
+            "13": 0.10516,
+            "14": 0.10397,
+            "15": 0.10407,
+            "16": 0.10362,
+            "17": 0.10333,
+            "18": 0.10307,
+            "19": 0.10283,
+            "20": 0.09949,
+            "21": 0.09817,
+            "22": 0.1027,
+            "23": 0.10231,
+            "24": 0.10218,
+            "25": 0.10307,
+            "26": 0.10424,
+            "27": 0.10183,
+            "28": 0.10321,
+            "29": 0.10228,
+            "30": 0.10178,
+            "31": 0.10491,
+            "32": 0.10267,
+            "33": 0.10205,
+            "34": 0.10154,
+            "35": 0.10239,
+            "36": 0.10188,
+            "37": 0.10547,
+            "38": 0.10217,
+            "39": 0.10273,
+            "40": 0.09793,
+            "41": 0.09773,
+            "42": 0.09752,
+            "43": 0.09866,
+            "44": 0.0975,
+            "45": 0.09867,
+            "46": 0.09876,
+            "47": 0.09929,
+            "48": 0.09909,
+            "49": 0.101,
+            "50": 0.0978,
+            "51": 0.10715,
+            "52": 0.10113,
+            "53": 0.10133,
+            "54": 0.10021,
+            "55": 0.10053,
+            "56": 0.10041,
+            "57": 0.10033,
+            "58": 0.10121,
+            "59": 0.09846,
+            "60": 0.09725,
+            "61": 0.09803,
+            "62": 0.09772,
+            "63": 0.09712,
+            "64": 0.10005,
+            "65": 0.09924,
+            "66": 0.09828,
+            "67": 0.09806,
+            "68": 0.09771,
+            "69": 0.103,
+            "70": 0.10104,
+            "71": 0.10088,
+            "72": 0.1012,
+            "73": 0.10067,
+            "74": 0.1036,
+            "75": 0.09878,
+            "76": 0.10012,
+            "77": 0.09887,
+            "78": 0.09891,
+            "79": 0.09932,
+            "80": 0.09828,
+            "81": 0.1,
+            "82": 0.10177,
+            "83": 0.09881,
+            "84": 0.09963,
+            "85": 0.09854,
+            "86": 0.09886,
+            "87": 0.10179,
+            "88": 0.10085,
+            "89": 0.10134,
+            "90": 0.1035,
+            "91": 0.10105,
+            "92": 0.10027,
+            "93": 0.10157,
+            "94": 0.10164,
+            "95": 0.10203,
+            "96": 0.09929,
+            "97": 0.10135,
+            "98": 0.10191,
+            "99": 0.10128,
+            "100": 0.1009
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..f0d9be9be9d
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.84517,
+            "2": 10.85349,
+            "3": 10.8539,
+            "4": 10.83825,
+            "5": 10.87427,
+            "6": 10.89307,
+            "7": 10.85454,
+            "8": 10.8626,
+            "9": 10.86468,
+            "10": 10.82907,
+            "11": 10.88789,
+            "12": 10.87095,
+            "13": 10.87916,
+            "14": 10.89079,
+            "15": 10.81974,
+            "16": 10.83162,
+            "17": 10.79863,
+            "18": 10.81667,
+            "19": 10.81919,
+            "20": 10.727,
+            "21": 10.70594,
+            "22": 10.56364,
+            "23": 10.72802,
+            "24": 10.60832,
+            "25": 10.55217,
+            "26": 10.60845,
+            "27": 10.62847,
+            "28": 10.5831,
+            "29": 10.60012,
+            "30": 10.36614,
+            "31": 10.12044,
+            "32": 10.47684,
+            "33": 10.46873,
+            "34": 10.22319,
+            "35": 10.2778,
+            "36": 10.22892,
+            "37": 10.35949,
+            "38": 10.19371,
+            "39": 10.4155,
+            "40": 10.0976,
+            "41": 10.15737,
+            "42": 10.22396,
+            "43": 9.83286,
+            "44": 9.96916,
+            "45": 9.84196,
+            "46": 9.83045,
+            "47": 10.15628,
+            "48": 9.85484,
+            "49": 9.54086,
+            "50": 9.9125,
+            "51": 9.8587,
+            "52": 9.74287,
+            "53": 10.06647,
+            "54": 9.95168,
+            "55": 9.88096,
+            "56": 9.62625,
+            "57": 9.47766,
+            "58": 9.8335,
+            "59": 9.58522,
+            "60": 9.50125,
+            "61": 9.69186,
+            "62": 9.98858,
+            "63": 9.38478,
+            "64": 9.78027,
+            "65": 8.94761,
+            "66": 9.70857,
+            "67": 9.36847,
+            "68": 9.78438,
+            "69": 9.79407,
+            "70": 9.7424,
+            "71": 9.61808,
+            "72": 9.58427,
+            "73": 9.50347,
+            "74": 8.9422,
+            "75": 9.42532,
+            "76": 9.07407,
+            "77": 10.06351,
+            "78": 9.7208,
+            "79": 9.37296,
+            "80": 9.40396,
+            "81": 9.48168,
+            "82": 9.69778,
+            "83": 9.30711,
+            "84": 9.41712,
+            "85": 9.61405,
+            "86": 9.07618,
+            "87": 9.59088,
+            "88": 9.7464,
+            "89": 9.59987,
+            "90": 9.81418,
+            "91": 9.33775,
+            "92": 9.35372,
+            "93": 9.07397,
+            "94": 8.8317,
+            "95": 9.5173,
+            "96": 9.52412,
+            "97": 9.30995,
+            "98": 9.66807,
+            "99": 8.8859,
+            "100": 9.39541
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1655.0,
+            "2": 1697.0,
+            "3": 1724.0,
+            "4": 1720.0,
+            "5": 1803.0,
+            "6": 1772.0,
+            "7": 1811.0,
+            "8": 1678.0,
+            "9": 1828.0,
+            "10": 1448.0,
+            "11": 1890.0,
+            "12": 1657.0,
+            "13": 1852.0,
+            "14": 1717.0,
+            "15": 1879.0,
+            "16": 1921.0,
+            "17": 1666.0,
+            "18": 1729.0,
+            "19": 1767.0,
+            "20": 1657.0,
+            "21": 1827.0,
+            "22": 1594.0,
+            "23": 1918.0,
+            "24": 1622.0,
+            "25": 1625.0,
+            "26": 1649.0,
+            "27": 1788.0,
+            "28": 2030.0,
+            "29": 1980.0,
+            "30": 1882.0,
+            "31": 1564.0,
+            "32": 1918.0,
+            "33": 2045.0,
+            "34": 1884.0,
+            "35": 1954.0,
+            "36": 1910.0,
+            "37": 2267.0,
+            "38": 2195.0,
+            "39": 2346.0,
+            "40": 2191.0,
+            "41": 2171.0,
+            "42": 2246.0,
+            "43": 1997.0,
+            "44": 2156.0,
+            "45": 2091.0,
+            "46": 2439.0,
+            "47": 2539.0,
+            "48": 2418.0,
+            "49": 2207.0,
+            "50": 2189.0,
+            "51": 2608.0,
+            "52": 2444.0,
+            "53": 2898.0,
+            "54": 2664.0,
+            "55": 2325.0,
+            "56": 2614.0,
+            "57": 2394.0,
+            "58": 2812.0,
+            "59": 2771.0,
+            "60": 2361.0,
+            "61": 2855.0,
+            "62": 2675.0,
+            "63": 2393.0,
+            "64": 3014.0,
+            "65": 2673.0,
+            "66": 3051.0,
+            "67": 2657.0,
+            "68": 2662.0,
+            "69": 2736.0,
+            "70": 3139.0,
+            "71": 2943.0,
+            "72": 2293.0,
+            "73": 2908.0,
+            "74": 1887.0,
+            "75": 2519.0,
+            "76": 3060.0,
+            "77": 3191.0,
+            "78": 3211.0,
+            "79": 3081.0,
+            "80": 3205.0,
+            "81": 3563.0,
+            "82": 3201.0,
+            "83": 2614.0,
+            "84": 3162.0,
+            "85": 3209.0,
+            "86": 2660.0,
+            "87": 3729.0,
+            "88": 3002.0,
+            "89": 3160.0,
+            "90": 3168.0,
+            "91": 2753.0,
+            "92": 3258.0,
+            "93": 2617.0,
+            "94": 3341.0,
+            "95": 3261.0,
+            "96": 3370.0,
+            "97": 3163.0,
+            "98": 3566.0,
+            "99": 3179.0,
+            "100": 3135.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 763220480.0,
+            "2": 763220480.0,
+            "3": 763220480.0,
+            "4": 763220480.0,
+            "5": 763220480.0,
+            "6": 763220480.0,
+            "7": 763220480.0,
+            "8": 763220480.0,
+            "9": 763220480.0,
+            "10": 763220480.0,
+            "11": 763220480.0,
+            "12": 763220480.0,
+            "13": 763220480.0,
+            "14": 763220480.0,
+            "15": 763220480.0,
+            "16": 763220480.0,
+            "17": 763220480.0,
+            "18": 763220480.0,
+            "19": 763220480.0,
+            "20": 763220480.0,
+            "21": 763220480.0,
+            "22": 763220480.0,
+            "23": 763220480.0,
+            "24": 763220480.0,
+            "25": 763220480.0,
+            "26": 763220480.0,
+            "27": 763220480.0,
+            "28": 763220480.0,
+            "29": 763220480.0,
+            "30": 763220480.0,
+            "31": 763220480.0,
+            "32": 763220480.0,
+            "33": 763220480.0,
+            "34": 763220480.0,
+            "35": 763220480.0,
+            "36": 763220480.0,
+            "37": 763220480.0,
+            "38": 763220480.0,
+            "39": 763220480.0,
+            "40": 763220480.0,
+            "41": 763220480.0,
+            "42": 763220480.0,
+            "43": 763220480.0,
+            "44": 763220480.0,
+            "45": 763220480.0,
+            "46": 763220480.0,
+            "47": 763220480.0,
+            "48": 763220480.0,
+            "49": 763220480.0,
+            "50": 763220480.0,
+            "51": 763220480.0,
+            "52": 763220480.0,
+            "53": 763220480.0,
+            "54": 763220480.0,
+            "55": 763220480.0,
+            "56": 763220480.0,
+            "57": 763220480.0,
+            "58": 763220480.0,
+            "59": 763220480.0,
+            "60": 763220480.0,
+            "61": 763220480.0,
+            "62": 763220480.0,
+            "63": 763220480.0,
+            "64": 763220480.0,
+            "65": 763220480.0,
+            "66": 763220480.0,
+            "67": 763220480.0,
+            "68": 763220480.0,
+            "69": 763220480.0,
+            "70": 763220480.0,
+            "71": 763220480.0,
+            "72": 763220480.0,
+            "73": 763220480.0,
+            "74": 763220480.0,
+            "75": 763220480.0,
+            "76": 763220480.0,
+            "77": 763220480.0,
+            "78": 763220480.0,
+            "79": 763220480.0,
+            "80": 763220480.0,
+            "81": 763220480.0,
+            "82": 763220480.0,
+            "83": 763220480.0,
+            "84": 763220480.0,
+            "85": 763220480.0,
+            "86": 763220480.0,
+            "87": 763220480.0,
+            "88": 763220480.0,
+            "89": 763220480.0,
+            "90": 763220480.0,
+            "91": 763220480.0,
+            "92": 763220480.0,
+            "93": 763220480.0,
+            "94": 763220480.0,
+            "95": 763220480.0,
+            "96": 763220480.0,
+            "97": 763220480.0,
+            "98": 763220480.0,
+            "99": 763220480.0,
+            "100": 763220480.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2359490560.0,
+            "2": 2643299328.0,
+            "3": 2643299328.0,
+            "4": 2643299328.0,
+            "5": 2643299328.0,
+            "6": 2643299328.0,
+            "7": 2643299328.0,
+            "8": 2643299328.0,
+            "9": 2643299328.0,
+            "10": 2643299328.0,
+            "11": 2643299328.0,
+            "12": 2643299328.0,
+            "13": 2643299328.0,
+            "14": 2643299328.0,
+            "15": 2643299328.0,
+            "16": 2643299328.0,
+            "17": 2643299328.0,
+            "18": 2643299328.0,
+            "19": 2643299328.0,
+            "20": 2643299328.0,
+            "21": 2643299328.0,
+            "22": 2643299328.0,
+            "23": 2643299328.0,
+            "24": 2643299328.0,
+            "25": 2643299328.0,
+            "26": 2643299328.0,
+            "27": 2643299328.0,
+            "28": 2643299328.0,
+            "29": 2643299328.0,
+            "30": 2643299328.0,
+            "31": 2643299328.0,
+            "32": 2643299328.0,
+            "33": 2643299328.0,
+            "34": 2643299328.0,
+            "35": 2643299328.0,
+            "36": 2643299328.0,
+            "37": 2643299328.0,
+            "38": 2643299328.0,
+            "39": 2643299328.0,
+            "40": 2643299328.0,
+            "41": 2643299328.0,
+            "42": 2643299328.0,
+            "43": 2643299328.0,
+            "44": 2643299328.0,
+            "45": 2643299328.0,
+            "46": 2643299328.0,
+            "47": 2643299328.0,
+            "48": 2643299328.0,
+            "49": 2643299328.0,
+            "50": 2643299328.0,
+            "51": 2643299328.0,
+            "52": 2643299328.0,
+            "53": 2643299328.0,
+            "54": 2643299328.0,
+            "55": 2643299328.0,
+            "56": 2643299328.0,
+            "57": 2643299328.0,
+            "58": 2643299328.0,
+            "59": 2643299328.0,
+            "60": 2643299328.0,
+            "61": 2643299328.0,
+            "62": 2643299328.0,
+            "63": 2643299328.0,
+            "64": 2643299328.0,
+            "65": 2643299328.0,
+            "66": 2643299328.0,
+            "67": 2643299328.0,
+            "68": 2643299328.0,
+            "69": 2643299328.0,
+            "70": 2643299328.0,
+            "71": 2643299328.0,
+            "72": 2643299328.0,
+            "73": 2643299328.0,
+            "74": 2643299328.0,
+            "75": 2643299328.0,
+            "76": 2643299328.0,
+            "77": 2643299328.0,
+            "78": 2643299328.0,
+            "79": 2643299328.0,
+            "80": 2643299328.0,
+            "81": 2643299328.0,
+            "82": 2643299328.0,
+            "83": 2643299328.0,
+            "84": 2643299328.0,
+            "85": 2643299328.0,
+            "86": 2643299328.0,
+            "87": 2643299328.0,
+            "88": 2643299328.0,
+            "89": 2643299328.0,
+            "90": 2643299328.0,
+            "91": 2643299328.0,
+            "92": 2643299328.0,
+            "93": 2643299328.0,
+            "94": 2643299328.0,
+            "95": 2643299328.0,
+            "96": 2643299328.0,
+            "97": 2643299328.0,
+            "98": 2643299328.0,
+            "99": 2643299328.0,
+            "100": 2643299328.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 17.57509,
+            "2": 0.1453,
+            "3": 0.11184,
+            "4": 0.11457,
+            "5": 0.12345,
+            "6": 0.12167,
+            "7": 0.12451,
+            "8": 0.11003,
+            "9": 0.11229,
+            "10": 0.11078,
+            "11": 0.11178,
+            "12": 0.11071,
+            "13": 0.11183,
+            "14": 0.1131,
+            "15": 0.11195,
+            "16": 0.11109,
+            "17": 0.11155,
+            "18": 0.11436,
+            "19": 0.11335,
+            "20": 0.11235,
+            "21": 0.11323,
+            "22": 0.11234,
+            "23": 0.1131,
+            "24": 0.11154,
+            "25": 0.11274,
+            "26": 0.11525,
+            "27": 0.11435,
+            "28": 0.11247,
+            "29": 0.11318,
+            "30": 0.11126,
+            "31": 0.11489,
+            "32": 0.11045,
+            "33": 0.1114,
+            "34": 0.11253,
+            "35": 0.11114,
+            "36": 0.114,
+            "37": 0.11201,
+            "38": 0.10979,
+            "39": 0.11069,
+            "40": 0.11078,
+            "41": 0.11142,
+            "42": 0.11091,
+            "43": 0.11324,
+            "44": 0.11151,
+            "45": 0.11295,
+            "46": 0.11174,
+            "47": 0.10954,
+            "48": 0.11083,
+            "49": 0.11195,
+            "50": 0.11251,
+            "51": 0.11627,
+            "52": 0.11199,
+            "53": 0.11127,
+            "54": 0.11464,
+            "55": 0.11072,
+            "56": 0.1136,
+            "57": 0.11119,
+            "58": 0.11025,
+            "59": 0.11083,
+            "60": 0.11126,
+            "61": 0.10968,
+            "62": 0.11104,
+            "63": 0.11515,
+            "64": 0.11136,
+            "65": 0.11454,
+            "66": 0.10994,
+            "67": 0.11003,
+            "68": 0.10997,
+            "69": 0.11155,
+            "70": 0.11002,
+            "71": 0.1121,
+            "72": 0.11334,
+            "73": 0.11221,
+            "74": 0.11542,
+            "75": 0.11082,
+            "76": 0.10997,
+            "77": 0.11087,
+            "78": 0.11222,
+            "79": 0.11343,
+            "80": 0.11462,
+            "81": 0.11272,
+            "82": 0.11293,
+            "83": 0.113,
+            "84": 0.11134,
+            "85": 0.11308,
+            "86": 0.11357,
+            "87": 0.11341,
+            "88": 0.11349,
+            "89": 0.11342,
+            "90": 0.11212,
+            "91": 0.11377,
+            "92": 0.11421,
+            "93": 0.1115,
+            "94": 0.11293,
+            "95": 0.11334,
+            "96": 0.11303,
+            "97": 0.11198,
+            "98": 0.11326,
+            "99": 0.11128,
+            "100": 0.1117
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
new file mode 100644
index 00000000000..9bafb7796c5
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.82005,
+            "2": 10.81907,
+            "3": 10.81396,
+            "4": 10.78497,
+            "5": 10.85284,
+            "6": 10.87449,
+            "7": 10.83201,
+            "8": 10.83297,
+            "9": 10.83935,
+            "10": 10.78455,
+            "11": 10.87798,
+            "12": 10.86112,
+            "13": 10.86444,
+            "14": 10.87605,
+            "15": 10.79229,
+            "16": 10.79509,
+            "17": 10.76768,
+            "18": 10.81005,
+            "19": 10.79719,
+            "20": 10.69211,
+            "21": 10.68164,
+            "22": 10.52085,
+            "23": 10.70893,
+            "24": 10.57599,
+            "25": 10.52412,
+            "26": 10.59517,
+            "27": 10.58426,
+            "28": 10.56233,
+            "29": 10.57013,
+            "30": 10.34552,
+            "31": 10.10049,
+            "32": 10.45378,
+            "33": 10.44627,
+            "34": 10.20606,
+            "35": 10.26239,
+            "36": 10.21239,
+            "37": 10.32522,
+            "38": 10.16777,
+            "39": 10.38334,
+            "40": 10.07241,
+            "41": 10.13863,
+            "42": 10.19814,
+            "43": 9.81073,
+            "44": 9.93244,
+            "45": 9.81101,
+            "46": 9.80877,
+            "47": 10.12608,
+            "48": 9.82108,
+            "49": 9.50625,
+            "50": 9.88422,
+            "51": 9.83655,
+            "52": 9.72542,
+            "53": 10.04681,
+            "54": 9.93029,
+            "55": 9.86374,
+            "56": 9.60187,
+            "57": 9.4509,
+            "58": 9.80848,
+            "59": 9.56669,
+            "60": 9.47965,
+            "61": 9.67901,
+            "62": 9.96739,
+            "63": 9.35162,
+            "64": 9.75606,
+            "65": 8.93063,
+            "66": 9.68053,
+            "67": 9.35888,
+            "68": 9.76985,
+            "69": 9.77496,
+            "70": 9.71215,
+            "71": 9.60754,
+            "72": 9.57085,
+            "73": 9.48404,
+            "74": 8.92823,
+            "75": 9.40048,
+            "76": 9.07196,
+            "77": 10.05227,
+            "78": 9.71519,
+            "79": 9.35769,
+            "80": 9.39077,
+            "81": 9.46749,
+            "82": 9.68504,
+            "83": 9.29553,
+            "84": 9.40532,
+            "85": 9.60141,
+            "86": 9.06774,
+            "87": 9.585,
+            "88": 9.73363,
+            "89": 9.59519,
+            "90": 9.80501,
+            "91": 9.3255,
+            "92": 9.35331,
+            "93": 9.06981,
+            "94": 8.82231,
+            "95": 9.50816,
+            "96": 9.51534,
+            "97": 9.29772,
+            "98": 9.66202,
+            "99": 8.87692,
+            "100": 9.3924
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1559.0,
+            "2": 1591.0,
+            "3": 1727.0,
+            "4": 1835.0,
+            "5": 1840.0,
+            "6": 1719.0,
+            "7": 1740.0,
+            "8": 1591.0,
+            "9": 1839.0,
+            "10": 1380.0,
+            "11": 1856.0,
+            "12": 1693.0,
+            "13": 1906.0,
+            "14": 1757.0,
+            "15": 1848.0,
+            "16": 1791.0,
+            "17": 1752.0,
+            "18": 1669.0,
+            "19": 1722.0,
+            "20": 1601.0,
+            "21": 1900.0,
+            "22": 1662.0,
+            "23": 2006.0,
+            "24": 1597.0,
+            "25": 1635.0,
+            "26": 1709.0,
+            "27": 1931.0,
+            "28": 2043.0,
+            "29": 1888.0,
+            "30": 1936.0,
+            "31": 1550.0,
+            "32": 1913.0,
+            "33": 2135.0,
+            "34": 1703.0,
+            "35": 1908.0,
+            "36": 1953.0,
+            "37": 2291.0,
+            "38": 2210.0,
+            "39": 2334.0,
+            "40": 2100.0,
+            "41": 2300.0,
+            "42": 2236.0,
+            "43": 1897.0,
+            "44": 1993.0,
+            "45": 2098.0,
+            "46": 2298.0,
+            "47": 2504.0,
+            "48": 2356.0,
+            "49": 2268.0,
+            "50": 2333.0,
+            "51": 2487.0,
+            "52": 2422.0,
+            "53": 2969.0,
+            "54": 2698.0,
+            "55": 2260.0,
+            "56": 2773.0,
+            "57": 2153.0,
+            "58": 2903.0,
+            "59": 2750.0,
+            "60": 2399.0,
+            "61": 2943.0,
+            "62": 2646.0,
+            "63": 2470.0,
+            "64": 2952.0,
+            "65": 2656.0,
+            "66": 3077.0,
+            "67": 2683.0,
+            "68": 2841.0,
+            "69": 3047.0,
+            "70": 3077.0,
+            "71": 2947.0,
+            "72": 2446.0,
+            "73": 2719.0,
+            "74": 1886.0,
+            "75": 2547.0,
+            "76": 2983.0,
+            "77": 3150.0,
+            "78": 3223.0,
+            "79": 3085.0,
+            "80": 3315.0,
+            "81": 3695.0,
+            "82": 3285.0,
+            "83": 2818.0,
+            "84": 3328.0,
+            "85": 3371.0,
+            "86": 2574.0,
+            "87": 3733.0,
+            "88": 3046.0,
+            "89": 3195.0,
+            "90": 2943.0,
+            "91": 2825.0,
+            "92": 3086.0,
+            "93": 2711.0,
+            "94": 3416.0,
+            "95": 3457.0,
+            "96": 3408.0,
+            "97": 3161.0,
+            "98": 3616.0,
+            "99": 3374.0,
+            "100": 3292.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 733859840.0,
+            "2": 733859840.0,
+            "3": 733859840.0,
+            "4": 733859840.0,
+            "5": 733859840.0,
+            "6": 733859840.0,
+            "7": 733859840.0,
+            "8": 733859840.0,
+            "9": 733859840.0,
+            "10": 733859840.0,
+            "11": 733859840.0,
+            "12": 733859840.0,
+            "13": 733859840.0,
+            "14": 733859840.0,
+            "15": 733859840.0,
+            "16": 733859840.0,
+            "17": 733859840.0,
+            "18": 733859840.0,
+            "19": 733859840.0,
+            "20": 733859840.0,
+            "21": 733859840.0,
+            "22": 733859840.0,
+            "23": 733859840.0,
+            "24": 733859840.0,
+            "25": 733859840.0,
+            "26": 733859840.0,
+            "27": 733859840.0,
+            "28": 733859840.0,
+            "29": 733859840.0,
+            "30": 733859840.0,
+            "31": 733859840.0,
+            "32": 733859840.0,
+            "33": 733859840.0,
+            "34": 733859840.0,
+            "35": 733859840.0,
+            "36": 733859840.0,
+            "37": 733859840.0,
+            "38": 733859840.0,
+            "39": 733859840.0,
+            "40": 733859840.0,
+            "41": 733859840.0,
+            "42": 733859840.0,
+            "43": 733859840.0,
+            "44": 733859840.0,
+            "45": 733859840.0,
+            "46": 733859840.0,
+            "47": 733859840.0,
+            "48": 733859840.0,
+            "49": 733859840.0,
+            "50": 733859840.0,
+            "51": 733859840.0,
+            "52": 733859840.0,
+            "53": 733859840.0,
+            "54": 733859840.0,
+            "55": 733859840.0,
+            "56": 733859840.0,
+            "57": 733859840.0,
+            "58": 733859840.0,
+            "59": 733859840.0,
+            "60": 733859840.0,
+            "61": 733859840.0,
+            "62": 733859840.0,
+            "63": 733859840.0,
+            "64": 733859840.0,
+            "65": 733859840.0,
+            "66": 733859840.0,
+            "67": 733859840.0,
+            "68": 733859840.0,
+            "69": 733859840.0,
+            "70": 733859840.0,
+            "71": 733859840.0,
+            "72": 733859840.0,
+            "73": 733859840.0,
+            "74": 733859840.0,
+            "75": 733859840.0,
+            "76": 733859840.0,
+            "77": 733859840.0,
+            "78": 733859840.0,
+            "79": 733859840.0,
+            "80": 733859840.0,
+            "81": 733859840.0,
+            "82": 733859840.0,
+            "83": 733859840.0,
+            "84": 733859840.0,
+            "85": 733859840.0,
+            "86": 733859840.0,
+            "87": 733859840.0,
+            "88": 733859840.0,
+            "89": 733859840.0,
+            "90": 733859840.0,
+            "91": 733859840.0,
+            "92": 733859840.0,
+            "93": 733859840.0,
+            "94": 733859840.0,
+            "95": 733859840.0,
+            "96": 733859840.0,
+            "97": 733859840.0,
+            "98": 733859840.0,
+            "99": 733859840.0,
+            "100": 733859840.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 3838895104.0,
+            "2": 4122703872.0,
+            "3": 4122703872.0,
+            "4": 4122703872.0,
+            "5": 4122703872.0,
+            "6": 4122703872.0,
+            "7": 4122703872.0,
+            "8": 4122703872.0,
+            "9": 4122703872.0,
+            "10": 4122703872.0,
+            "11": 4122703872.0,
+            "12": 4122703872.0,
+            "13": 4122703872.0,
+            "14": 4122703872.0,
+            "15": 4122703872.0,
+            "16": 4122703872.0,
+            "17": 4122703872.0,
+            "18": 4122703872.0,
+            "19": 4122703872.0,
+            "20": 4122703872.0,
+            "21": 4122703872.0,
+            "22": 4122703872.0,
+            "23": 4122703872.0,
+            "24": 4122703872.0,
+            "25": 4122703872.0,
+            "26": 4122703872.0,
+            "27": 4122703872.0,
+            "28": 4122703872.0,
+            "29": 4122703872.0,
+            "30": 4122703872.0,
+            "31": 4122703872.0,
+            "32": 4122703872.0,
+            "33": 4122703872.0,
+            "34": 4122703872.0,
+            "35": 4122703872.0,
+            "36": 4122703872.0,
+            "37": 4122703872.0,
+            "38": 4122703872.0,
+            "39": 4122703872.0,
+            "40": 4122703872.0,
+            "41": 4122703872.0,
+            "42": 4122703872.0,
+            "43": 4122703872.0,
+            "44": 4122703872.0,
+            "45": 4122703872.0,
+            "46": 4122703872.0,
+            "47": 4122703872.0,
+            "48": 4122703872.0,
+            "49": 4122703872.0,
+            "50": 4122703872.0,
+            "51": 4122703872.0,
+            "52": 4122703872.0,
+            "53": 4122703872.0,
+            "54": 4122703872.0,
+            "55": 4122703872.0,
+            "56": 4122703872.0,
+            "57": 4122703872.0,
+            "58": 4122703872.0,
+            "59": 4122703872.0,
+            "60": 4122703872.0,
+            "61": 4122703872.0,
+            "62": 4122703872.0,
+            "63": 4122703872.0,
+            "64": 4122703872.0,
+            "65": 4122703872.0,
+            "66": 4122703872.0,
+            "67": 4122703872.0,
+            "68": 4122703872.0,
+            "69": 4122703872.0,
+            "70": 4122703872.0,
+            "71": 4122703872.0,
+            "72": 4122703872.0,
+            "73": 4122703872.0,
+            "74": 4122703872.0,
+            "75": 4122703872.0,
+            "76": 4122703872.0,
+            "77": 4122703872.0,
+            "78": 4122703872.0,
+            "79": 4122703872.0,
+            "80": 4122703872.0,
+            "81": 4122703872.0,
+            "82": 4122703872.0,
+            "83": 4122703872.0,
+            "84": 4122703872.0,
+            "85": 4122703872.0,
+            "86": 4122703872.0,
+            "87": 4122703872.0,
+            "88": 4122703872.0,
+            "89": 4122703872.0,
+            "90": 4122703872.0,
+            "91": 4122703872.0,
+            "92": 4122703872.0,
+            "93": 4122703872.0,
+            "94": 4122703872.0,
+            "95": 4122703872.0,
+            "96": 4122703872.0,
+            "97": 4122703872.0,
+            "98": 4122703872.0,
+            "99": 4122703872.0,
+            "100": 4122703872.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 20.74392,
+            "2": 0.20458,
+            "3": 0.17337,
+            "4": 0.17372,
+            "5": 0.17406,
+            "6": 0.17407,
+            "7": 0.1701,
+            "8": 0.1709,
+            "9": 0.17096,
+            "10": 0.17284,
+            "11": 0.17356,
+            "12": 0.17143,
+            "13": 0.17133,
+            "14": 0.17078,
+            "15": 0.17163,
+            "16": 0.17206,
+            "17": 0.17227,
+            "18": 0.1714,
+            "19": 0.17121,
+            "20": 0.17143,
+            "21": 0.17086,
+            "22": 0.17241,
+            "23": 0.17251,
+            "24": 0.17165,
+            "25": 0.17082,
+            "26": 0.17042,
+            "27": 0.1695,
+            "28": 0.17064,
+            "29": 0.17259,
+            "30": 0.17056,
+            "31": 0.17093,
+            "32": 0.16764,
+            "33": 0.1668,
+            "34": 0.16801,
+            "35": 0.1684,
+            "36": 0.1676,
+            "37": 0.16666,
+            "38": 0.16729,
+            "39": 0.16578,
+            "40": 0.16707,
+            "41": 0.16873,
+            "42": 0.16705,
+            "43": 0.16817,
+            "44": 0.16766,
+            "45": 0.16793,
+            "46": 0.16745,
+            "47": 0.16825,
+            "48": 0.16561,
+            "49": 0.16693,
+            "50": 0.167,
+            "51": 0.17408,
+            "52": 0.17381,
+            "53": 0.17359,
+            "54": 0.17167,
+            "55": 0.17219,
+            "56": 0.17329,
+            "57": 0.17468,
+            "58": 0.17336,
+            "59": 0.17436,
+            "60": 0.17289,
+            "61": 0.17216,
+            "62": 0.17277,
+            "63": 0.17306,
+            "64": 0.17382,
+            "65": 0.17362,
+            "66": 0.1721,
+            "67": 0.17256,
+            "68": 0.17189,
+            "69": 0.17201,
+            "70": 0.17356,
+            "71": 0.1728,
+            "72": 0.17241,
+            "73": 0.17349,
+            "74": 0.17357,
+            "75": 0.17454,
+            "76": 0.17395,
+            "77": 0.17253,
+            "78": 0.17295,
+            "79": 0.17219,
+            "80": 0.1746,
+            "81": 0.17297,
+            "82": 0.1742,
+            "83": 0.17306,
+            "84": 0.17236,
+            "85": 0.17328,
+            "86": 0.17434,
+            "87": 0.17285,
+            "88": 0.17502,
+            "89": 0.17257,
+            "90": 0.1726,
+            "91": 0.17295,
+            "92": 0.17284,
+            "93": 0.17452,
+            "94": 0.17398,
+            "95": 0.17312,
+            "96": 0.1727,
+            "97": 0.17207,
+            "98": 0.17436,
+            "99": 0.17586,
+            "100": 0.17341
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
new file mode 100644
index 00000000000..e0f27834c5c
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.82005,
+            "2": 10.81907,
+            "3": 10.81396,
+            "4": 10.78497,
+            "5": 10.85284,
+            "6": 10.87449,
+            "7": 10.83201,
+            "8": 10.83297,
+            "9": 10.83935,
+            "10": 10.78455,
+            "11": 10.87798,
+            "12": 10.86112,
+            "13": 10.86444,
+            "14": 10.87605,
+            "15": 10.79229,
+            "16": 10.79509,
+            "17": 10.76768,
+            "18": 10.81005,
+            "19": 10.79719,
+            "20": 10.69211,
+            "21": 10.68164,
+            "22": 10.52085,
+            "23": 10.70893,
+            "24": 10.57599,
+            "25": 10.52412,
+            "26": 10.59517,
+            "27": 10.58426,
+            "28": 10.56233,
+            "29": 10.57013,
+            "30": 10.34552,
+            "31": 10.10049,
+            "32": 10.45378,
+            "33": 10.44627,
+            "34": 10.20606,
+            "35": 10.26239,
+            "36": 10.21239,
+            "37": 10.32522,
+            "38": 10.16777,
+            "39": 10.38334,
+            "40": 10.07241,
+            "41": 10.13863,
+            "42": 10.19814,
+            "43": 9.81073,
+            "44": 9.93244,
+            "45": 9.81101,
+            "46": 9.80877,
+            "47": 10.12608,
+            "48": 9.82108,
+            "49": 9.50625,
+            "50": 9.88422,
+            "51": 9.83655,
+            "52": 9.72542,
+            "53": 10.04681,
+            "54": 9.93029,
+            "55": 9.86374,
+            "56": 9.60187,
+            "57": 9.4509,
+            "58": 9.80848,
+            "59": 9.56669,
+            "60": 9.47965,
+            "61": 9.67901,
+            "62": 9.96739,
+            "63": 9.35162,
+            "64": 9.75606,
+            "65": 8.93063,
+            "66": 9.68053,
+            "67": 9.35888,
+            "68": 9.76985,
+            "69": 9.77496,
+            "70": 9.71215,
+            "71": 9.60754,
+            "72": 9.57085,
+            "73": 9.48404,
+            "74": 8.92823,
+            "75": 9.40048,
+            "76": 9.07196,
+            "77": 10.05227,
+            "78": 9.71519,
+            "79": 9.35769,
+            "80": 9.39077,
+            "81": 9.46749,
+            "82": 9.68504,
+            "83": 9.29553,
+            "84": 9.40532,
+            "85": 9.60141,
+            "86": 9.06774,
+            "87": 9.585,
+            "88": 9.73363,
+            "89": 9.59519,
+            "90": 9.80501,
+            "91": 9.3255,
+            "92": 9.35331,
+            "93": 9.06981,
+            "94": 8.82231,
+            "95": 9.50816,
+            "96": 9.51534,
+            "97": 9.29772,
+            "98": 9.66202,
+            "99": 8.87692,
+            "100": 9.3924
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1559.0,
+            "2": 1591.0,
+            "3": 1727.0,
+            "4": 1835.0,
+            "5": 1840.0,
+            "6": 1719.0,
+            "7": 1740.0,
+            "8": 1591.0,
+            "9": 1839.0,
+            "10": 1380.0,
+            "11": 1856.0,
+            "12": 1693.0,
+            "13": 1906.0,
+            "14": 1757.0,
+            "15": 1848.0,
+            "16": 1791.0,
+            "17": 1752.0,
+            "18": 1669.0,
+            "19": 1722.0,
+            "20": 1601.0,
+            "21": 1900.0,
+            "22": 1662.0,
+            "23": 2006.0,
+            "24": 1597.0,
+            "25": 1635.0,
+            "26": 1709.0,
+            "27": 1931.0,
+            "28": 2043.0,
+            "29": 1888.0,
+            "30": 1936.0,
+            "31": 1550.0,
+            "32": 1913.0,
+            "33": 2135.0,
+            "34": 1703.0,
+            "35": 1908.0,
+            "36": 1953.0,
+            "37": 2291.0,
+            "38": 2210.0,
+            "39": 2334.0,
+            "40": 2100.0,
+            "41": 2300.0,
+            "42": 2236.0,
+            "43": 1897.0,
+            "44": 1993.0,
+            "45": 2098.0,
+            "46": 2298.0,
+            "47": 2504.0,
+            "48": 2356.0,
+            "49": 2268.0,
+            "50": 2333.0,
+            "51": 2487.0,
+            "52": 2422.0,
+            "53": 2969.0,
+            "54": 2698.0,
+            "55": 2260.0,
+            "56": 2773.0,
+            "57": 2153.0,
+            "58": 2903.0,
+            "59": 2750.0,
+            "60": 2399.0,
+            "61": 2943.0,
+            "62": 2646.0,
+            "63": 2470.0,
+            "64": 2952.0,
+            "65": 2656.0,
+            "66": 3077.0,
+            "67": 2683.0,
+            "68": 2841.0,
+            "69": 3047.0,
+            "70": 3077.0,
+            "71": 2947.0,
+            "72": 2446.0,
+            "73": 2719.0,
+            "74": 1886.0,
+            "75": 2547.0,
+            "76": 2983.0,
+            "77": 3150.0,
+            "78": 3223.0,
+            "79": 3085.0,
+            "80": 3315.0,
+            "81": 3695.0,
+            "82": 3285.0,
+            "83": 2818.0,
+            "84": 3328.0,
+            "85": 3371.0,
+            "86": 2574.0,
+            "87": 3733.0,
+            "88": 3046.0,
+            "89": 3195.0,
+            "90": 2943.0,
+            "91": 2825.0,
+            "92": 3086.0,
+            "93": 2711.0,
+            "94": 3416.0,
+            "95": 3457.0,
+            "96": 3408.0,
+            "97": 3161.0,
+            "98": 3616.0,
+            "99": 3374.0,
+            "100": 3292.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 733859840.0,
+            "2": 733859840.0,
+            "3": 733859840.0,
+            "4": 733859840.0,
+            "5": 733859840.0,
+            "6": 733859840.0,
+            "7": 733859840.0,
+            "8": 733859840.0,
+            "9": 733859840.0,
+            "10": 733859840.0,
+            "11": 733859840.0,
+            "12": 733859840.0,
+            "13": 733859840.0,
+            "14": 733859840.0,
+            "15": 733859840.0,
+            "16": 733859840.0,
+            "17": 733859840.0,
+            "18": 733859840.0,
+            "19": 733859840.0,
+            "20": 733859840.0,
+            "21": 733859840.0,
+            "22": 733859840.0,
+            "23": 733859840.0,
+            "24": 733859840.0,
+            "25": 733859840.0,
+            "26": 733859840.0,
+            "27": 733859840.0,
+            "28": 733859840.0,
+            "29": 733859840.0,
+            "30": 733859840.0,
+            "31": 733859840.0,
+            "32": 733859840.0,
+            "33": 733859840.0,
+            "34": 733859840.0,
+            "35": 733859840.0,
+            "36": 733859840.0,
+            "37": 733859840.0,
+            "38": 733859840.0,
+            "39": 733859840.0,
+            "40": 733859840.0,
+            "41": 733859840.0,
+            "42": 733859840.0,
+            "43": 733859840.0,
+            "44": 733859840.0,
+            "45": 733859840.0,
+            "46": 733859840.0,
+            "47": 733859840.0,
+            "48": 733859840.0,
+            "49": 733859840.0,
+            "50": 733859840.0,
+            "51": 733859840.0,
+            "52": 733859840.0,
+            "53": 733859840.0,
+            "54": 733859840.0,
+            "55": 733859840.0,
+            "56": 733859840.0,
+            "57": 733859840.0,
+            "58": 733859840.0,
+            "59": 733859840.0,
+            "60": 733859840.0,
+            "61": 733859840.0,
+            "62": 733859840.0,
+            "63": 733859840.0,
+            "64": 733859840.0,
+            "65": 733859840.0,
+            "66": 733859840.0,
+            "67": 733859840.0,
+            "68": 733859840.0,
+            "69": 733859840.0,
+            "70": 733859840.0,
+            "71": 733859840.0,
+            "72": 733859840.0,
+            "73": 733859840.0,
+            "74": 733859840.0,
+            "75": 733859840.0,
+            "76": 733859840.0,
+            "77": 733859840.0,
+            "78": 733859840.0,
+            "79": 733859840.0,
+            "80": 733859840.0,
+            "81": 733859840.0,
+            "82": 733859840.0,
+            "83": 733859840.0,
+            "84": 733859840.0,
+            "85": 733859840.0,
+            "86": 733859840.0,
+            "87": 733859840.0,
+            "88": 733859840.0,
+            "89": 733859840.0,
+            "90": 733859840.0,
+            "91": 733859840.0,
+            "92": 733859840.0,
+            "93": 733859840.0,
+            "94": 733859840.0,
+            "95": 733859840.0,
+            "96": 733859840.0,
+            "97": 733859840.0,
+            "98": 733859840.0,
+            "99": 733859840.0,
+            "100": 733859840.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 3838895104.0,
+            "2": 4122703872.0,
+            "3": 4122703872.0,
+            "4": 4122703872.0,
+            "5": 4122703872.0,
+            "6": 4122703872.0,
+            "7": 4122703872.0,
+            "8": 4122703872.0,
+            "9": 4122703872.0,
+            "10": 4122703872.0,
+            "11": 4122703872.0,
+            "12": 4122703872.0,
+            "13": 4122703872.0,
+            "14": 4122703872.0,
+            "15": 4122703872.0,
+            "16": 4122703872.0,
+            "17": 4122703872.0,
+            "18": 4122703872.0,
+            "19": 4122703872.0,
+            "20": 4122703872.0,
+            "21": 4122703872.0,
+            "22": 4122703872.0,
+            "23": 4122703872.0,
+            "24": 4122703872.0,
+            "25": 4122703872.0,
+            "26": 4122703872.0,
+            "27": 4122703872.0,
+            "28": 4122703872.0,
+            "29": 4122703872.0,
+            "30": 4122703872.0,
+            "31": 4122703872.0,
+            "32": 4122703872.0,
+            "33": 4122703872.0,
+            "34": 4122703872.0,
+            "35": 4122703872.0,
+            "36": 4122703872.0,
+            "37": 4122703872.0,
+            "38": 4122703872.0,
+            "39": 4122703872.0,
+            "40": 4122703872.0,
+            "41": 4122703872.0,
+            "42": 4122703872.0,
+            "43": 4122703872.0,
+            "44": 4122703872.0,
+            "45": 4122703872.0,
+            "46": 4122703872.0,
+            "47": 4122703872.0,
+            "48": 4122703872.0,
+            "49": 4122703872.0,
+            "50": 4122703872.0,
+            "51": 4122703872.0,
+            "52": 4122703872.0,
+            "53": 4122703872.0,
+            "54": 4122703872.0,
+            "55": 4122703872.0,
+            "56": 4122703872.0,
+            "57": 4122703872.0,
+            "58": 4122703872.0,
+            "59": 4122703872.0,
+            "60": 4122703872.0,
+            "61": 4122703872.0,
+            "62": 4122703872.0,
+            "63": 4122703872.0,
+            "64": 4122703872.0,
+            "65": 4122703872.0,
+            "66": 4122703872.0,
+            "67": 4122703872.0,
+            "68": 4122703872.0,
+            "69": 4122703872.0,
+            "70": 4122703872.0,
+            "71": 4122703872.0,
+            "72": 4122703872.0,
+            "73": 4122703872.0,
+            "74": 4122703872.0,
+            "75": 4122703872.0,
+            "76": 4122703872.0,
+            "77": 4122703872.0,
+            "78": 4122703872.0,
+            "79": 4122703872.0,
+            "80": 4122703872.0,
+            "81": 4122703872.0,
+            "82": 4122703872.0,
+            "83": 4122703872.0,
+            "84": 4122703872.0,
+            "85": 4122703872.0,
+            "86": 4122703872.0,
+            "87": 4122703872.0,
+            "88": 4122703872.0,
+            "89": 4122703872.0,
+            "90": 4122703872.0,
+            "91": 4122703872.0,
+            "92": 4122703872.0,
+            "93": 4122703872.0,
+            "94": 4122703872.0,
+            "95": 4122703872.0,
+            "96": 4122703872.0,
+            "97": 4122703872.0,
+            "98": 4122703872.0,
+            "99": 4122703872.0,
+            "100": 4122703872.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 21.63875,
+            "2": 0.20787,
+            "3": 0.17721,
+            "4": 0.17658,
+            "5": 0.17528,
+            "6": 0.17173,
+            "7": 0.17222,
+            "8": 0.17098,
+            "9": 0.16832,
+            "10": 0.16824,
+            "11": 0.16991,
+            "12": 0.16843,
+            "13": 0.42886,
+            "14": 0.16771,
+            "15": 0.16923,
+            "16": 0.16925,
+            "17": 0.16721,
+            "18": 0.16835,
+            "19": 0.16585,
+            "20": 0.16956,
+            "21": 0.16767,
+            "22": 0.16714,
+            "23": 0.16974,
+            "24": 0.16792,
+            "25": 0.16824,
+            "26": 0.16516,
+            "27": 0.16767,
+            "28": 0.16689,
+            "29": 0.16698,
+            "30": 0.16729,
+            "31": 0.16513,
+            "32": 0.1676,
+            "33": 0.16825,
+            "34": 0.16806,
+            "35": 0.16705,
+            "36": 0.16629,
+            "37": 0.16592,
+            "38": 0.16499,
+            "39": 0.16482,
+            "40": 0.1659,
+            "41": 0.167,
+            "42": 0.16751,
+            "43": 0.16596,
+            "44": 0.16515,
+            "45": 0.1666,
+            "46": 0.17084,
+            "47": 0.16836,
+            "48": 0.16826,
+            "49": 0.16977,
+            "50": 0.16743,
+            "51": 0.17999,
+            "52": 0.17241,
+            "53": 0.17103,
+            "54": 0.17085,
+            "55": 0.17395,
+            "56": 0.17509,
+            "57": 0.17396,
+            "58": 0.1719,
+            "59": 0.171,
+            "60": 0.17345,
+            "61": 0.16946,
+            "62": 0.17066,
+            "63": 0.17284,
+            "64": 0.17167,
+            "65": 0.17007,
+            "66": 0.17279,
+            "67": 0.17225,
+            "68": 0.17054,
+            "69": 0.17013,
+            "70": 0.16853,
+            "71": 0.17021,
+            "72": 0.17001,
+            "73": 0.17136,
+            "74": 0.17139,
+            "75": 0.17396,
+            "76": 0.17179,
+            "77": 0.1705,
+            "78": 0.17116,
+            "79": 0.17303,
+            "80": 0.17196,
+            "81": 0.17269,
+            "82": 0.16795,
+            "83": 0.16966,
+            "84": 0.17044,
+            "85": 0.17085,
+            "86": 0.17338,
+            "87": 0.1704,
+            "88": 0.17066,
+            "89": 0.16954,
+            "90": 0.16994,
+            "91": 0.17172,
+            "92": 0.17222,
+            "93": 0.17163,
+            "94": 0.17173,
+            "95": 0.17012,
+            "96": 0.16985,
+            "97": 0.17078,
+            "98": 0.17262,
+            "99": 0.17354,
+            "100": 0.1683
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
index 191ec6ee23e..39c385529c2 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
@@ -1 +1,537 @@
-{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.84517, "5": 10.87427, "10": 10.82906, "15": 10.81976, "20": 10.72701, "25": 10.5522, "30": 10.36616, "35": 10.27781, "40": 10.09758, "45": 9.84191, "50": 9.91248, "55": 9.88096, "60": 9.50125, "65": 8.94762, "70": 9.74241, "75": 9.42529, "80": 9.40396, "85": 9.61407, "90": 9.8142, "95": 9.51734, "100": 9.39538}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1655.0, "5": 1803.0, "10": 1413.0, "15": 1951.0, "20": 1561.0, "25": 1665.0, "30": 1893.0, "35": 2010.0, "40": 2188.0, "45": 2126.0, "50": 2250.0, "55": 2351.0, "60": 2440.0, "65": 2602.0, "70": 3234.0, "75": 2388.0, "80": 3186.0, "85": 3262.0, "90": 3018.0, "95": 3426.0, "100": 3204.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 551288320.0, "5": 551288320.0, "10": 551288320.0, "15": 551288320.0, "20": 551288320.0, "25": 551288320.0, "30": 551288320.0, "35": 551288320.0, "40": 551288320.0, "45": 551288320.0, "50": 551288320.0, "55": 551288320.0, "60": 551288320.0, "65": 551288320.0, "70": 551288320.0, "75": 551288320.0, "80": 551288320.0, "85": 551288320.0, "90": 551288320.0, "95": 551288320.0, "100": 551288320.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 2289440768.0, "5": 2431367168.0, "10": 2431367168.0, "15": 2431367168.0, "20": 2431367168.0, "25": 2431367168.0, "30": 2431367168.0, "35": 2431367168.0, "40": 2431367168.0, "45": 2431367168.0, "50": 2431367168.0, "55": 2431367168.0, "60": 2431367168.0, "65": 2431367168.0, "70": 2431367168.0, "75": 2431367168.0, "80": 2431367168.0, "85": 2431367168.0, "90": 2431367168.0, "95": 2431367168.0, "100": 2431367168.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 16.78965, "5": 0.09699, "10": 0.09747, "15": 0.09725, "20": 0.09706, "25": 0.09768, "30": 0.09735, "35": 0.09599, "40": 0.09512, "45": 0.09648, "50": 0.09612, "55": 0.10241, "60": 0.09796, "65": 0.10117, "70": 0.09751, "75": 0.09884, "80": 0.10009, "85": 0.09677, "90": 0.09652, "95": 0.1026, "100": 0.09685}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.84517,
+            "2": 10.85349,
+            "3": 10.8539,
+            "4": 10.83825,
+            "5": 10.87427,
+            "6": 10.89307,
+            "7": 10.85454,
+            "8": 10.8626,
+            "9": 10.86464,
+            "10": 10.82906,
+            "11": 10.88792,
+            "12": 10.87099,
+            "13": 10.87921,
+            "14": 10.89078,
+            "15": 10.81976,
+            "16": 10.83158,
+            "17": 10.79868,
+            "18": 10.81672,
+            "19": 10.81919,
+            "20": 10.72701,
+            "21": 10.70594,
+            "22": 10.56367,
+            "23": 10.72804,
+            "24": 10.60832,
+            "25": 10.5522,
+            "26": 10.60853,
+            "27": 10.62847,
+            "28": 10.58306,
+            "29": 10.60011,
+            "30": 10.36616,
+            "31": 10.12043,
+            "32": 10.47685,
+            "33": 10.46868,
+            "34": 10.22316,
+            "35": 10.27781,
+            "36": 10.22892,
+            "37": 10.35949,
+            "38": 10.19369,
+            "39": 10.41549,
+            "40": 10.09758,
+            "41": 10.1573,
+            "42": 10.22398,
+            "43": 9.83289,
+            "44": 9.96912,
+            "45": 9.84191,
+            "46": 9.83041,
+            "47": 10.15626,
+            "48": 9.85486,
+            "49": 9.54086,
+            "50": 9.91248,
+            "51": 9.85868,
+            "52": 9.74284,
+            "53": 10.06645,
+            "54": 9.95167,
+            "55": 9.88096,
+            "56": 9.62626,
+            "57": 9.47768,
+            "58": 9.83346,
+            "59": 9.58526,
+            "60": 9.50125,
+            "61": 9.69182,
+            "62": 9.98853,
+            "63": 9.38476,
+            "64": 9.7803,
+            "65": 8.94762,
+            "66": 9.70856,
+            "67": 9.36852,
+            "68": 9.78439,
+            "69": 9.79406,
+            "70": 9.74241,
+            "71": 9.61808,
+            "72": 9.58428,
+            "73": 9.5035,
+            "74": 8.94221,
+            "75": 9.42529,
+            "76": 9.07408,
+            "77": 10.06351,
+            "78": 9.7208,
+            "79": 9.37294,
+            "80": 9.40396,
+            "81": 9.48168,
+            "82": 9.69778,
+            "83": 9.30714,
+            "84": 9.41712,
+            "85": 9.61407,
+            "86": 9.07615,
+            "87": 9.59094,
+            "88": 9.74641,
+            "89": 9.59993,
+            "90": 9.8142,
+            "91": 9.33773,
+            "92": 9.35373,
+            "93": 9.07395,
+            "94": 8.83173,
+            "95": 9.51734,
+            "96": 9.52415,
+            "97": 9.30995,
+            "98": 9.66805,
+            "99": 8.88588,
+            "100": 9.39538
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1655.0,
+            "2": 1697.0,
+            "3": 1724.0,
+            "4": 1720.0,
+            "5": 1803.0,
+            "6": 1772.0,
+            "7": 1811.0,
+            "8": 1766.0,
+            "9": 1750.0,
+            "10": 1413.0,
+            "11": 1861.0,
+            "12": 1650.0,
+            "13": 1895.0,
+            "14": 1662.0,
+            "15": 1951.0,
+            "16": 1998.0,
+            "17": 1798.0,
+            "18": 1687.0,
+            "19": 1856.0,
+            "20": 1561.0,
+            "21": 1882.0,
+            "22": 1652.0,
+            "23": 2075.0,
+            "24": 1606.0,
+            "25": 1665.0,
+            "26": 1686.0,
+            "27": 1839.0,
+            "28": 2053.0,
+            "29": 1907.0,
+            "30": 1893.0,
+            "31": 1581.0,
+            "32": 1791.0,
+            "33": 2149.0,
+            "34": 1872.0,
+            "35": 2010.0,
+            "36": 1799.0,
+            "37": 2311.0,
+            "38": 2221.0,
+            "39": 2261.0,
+            "40": 2188.0,
+            "41": 2204.0,
+            "42": 2300.0,
+            "43": 2001.0,
+            "44": 2119.0,
+            "45": 2126.0,
+            "46": 2374.0,
+            "47": 2468.0,
+            "48": 2405.0,
+            "49": 2247.0,
+            "50": 2250.0,
+            "51": 2607.0,
+            "52": 2618.0,
+            "53": 2828.0,
+            "54": 2730.0,
+            "55": 2351.0,
+            "56": 2753.0,
+            "57": 2323.0,
+            "58": 2809.0,
+            "59": 2721.0,
+            "60": 2440.0,
+            "61": 2875.0,
+            "62": 2726.0,
+            "63": 2444.0,
+            "64": 3001.0,
+            "65": 2602.0,
+            "66": 2981.0,
+            "67": 2676.0,
+            "68": 2623.0,
+            "69": 2802.0,
+            "70": 3234.0,
+            "71": 2902.0,
+            "72": 2337.0,
+            "73": 2856.0,
+            "74": 1903.0,
+            "75": 2388.0,
+            "76": 3118.0,
+            "77": 3108.0,
+            "78": 3122.0,
+            "79": 2994.0,
+            "80": 3186.0,
+            "81": 3470.0,
+            "82": 3164.0,
+            "83": 2726.0,
+            "84": 3214.0,
+            "85": 3262.0,
+            "86": 2602.0,
+            "87": 3658.0,
+            "88": 2906.0,
+            "89": 3054.0,
+            "90": 3018.0,
+            "91": 2690.0,
+            "92": 3106.0,
+            "93": 2701.0,
+            "94": 3263.0,
+            "95": 3426.0,
+            "96": 3405.0,
+            "97": 3087.0,
+            "98": 3510.0,
+            "99": 3148.0,
+            "100": 3204.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 551269888.0,
+            "2": 551269888.0,
+            "3": 551269888.0,
+            "4": 552318464.0,
+            "5": 551269888.0,
+            "6": 551269888.0,
+            "7": 551269888.0,
+            "8": 551269888.0,
+            "9": 551269888.0,
+            "10": 551269888.0,
+            "11": 551269888.0,
+            "12": 551269888.0,
+            "13": 551269888.0,
+            "14": 551269888.0,
+            "15": 551269888.0,
+            "16": 551269888.0,
+            "17": 551269888.0,
+            "18": 551269888.0,
+            "19": 551269888.0,
+            "20": 551269888.0,
+            "21": 551269888.0,
+            "22": 551269888.0,
+            "23": 551269888.0,
+            "24": 551269888.0,
+            "25": 551269888.0,
+            "26": 551269888.0,
+            "27": 551269888.0,
+            "28": 551269888.0,
+            "29": 551269888.0,
+            "30": 551269888.0,
+            "31": 551269888.0,
+            "32": 551269888.0,
+            "33": 551269888.0,
+            "34": 551269888.0,
+            "35": 551269888.0,
+            "36": 551269888.0,
+            "37": 551269888.0,
+            "38": 551269888.0,
+            "39": 551269888.0,
+            "40": 551269888.0,
+            "41": 551269888.0,
+            "42": 551269888.0,
+            "43": 551269888.0,
+            "44": 551269888.0,
+            "45": 551269888.0,
+            "46": 551269888.0,
+            "47": 551269888.0,
+            "48": 551269888.0,
+            "49": 551269888.0,
+            "50": 551269888.0,
+            "51": 551269888.0,
+            "52": 551269888.0,
+            "53": 551269888.0,
+            "54": 551269888.0,
+            "55": 551269888.0,
+            "56": 551269888.0,
+            "57": 551269888.0,
+            "58": 551269888.0,
+            "59": 551269888.0,
+            "60": 551269888.0,
+            "61": 551269888.0,
+            "62": 551269888.0,
+            "63": 551269888.0,
+            "64": 551269888.0,
+            "65": 551269888.0,
+            "66": 551269888.0,
+            "67": 551269888.0,
+            "68": 551269888.0,
+            "69": 551269888.0,
+            "70": 551269888.0,
+            "71": 551269888.0,
+            "72": 551269888.0,
+            "73": 551269888.0,
+            "74": 551269888.0,
+            "75": 551269888.0,
+            "76": 551269888.0,
+            "77": 551269888.0,
+            "78": 551269888.0,
+            "79": 551269888.0,
+            "80": 551269888.0,
+            "81": 551269888.0,
+            "82": 551269888.0,
+            "83": 551269888.0,
+            "84": 551269888.0,
+            "85": 551269888.0,
+            "86": 551269888.0,
+            "87": 551269888.0,
+            "88": 551269888.0,
+            "89": 551269888.0,
+            "90": 551269888.0,
+            "91": 551269888.0,
+            "92": 551269888.0,
+            "93": 551269888.0,
+            "94": 551269888.0,
+            "95": 551269888.0,
+            "96": 551269888.0,
+            "97": 551269888.0,
+            "98": 551269888.0,
+            "99": 551269888.0,
+            "100": 551269888.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2290489344.0,
+            "2": 2432397312.0,
+            "3": 2432397312.0,
+            "4": 2432397312.0,
+            "5": 2432397312.0,
+            "6": 2432397312.0,
+            "7": 2432397312.0,
+            "8": 2432397312.0,
+            "9": 2432397312.0,
+            "10": 2432397312.0,
+            "11": 2432397312.0,
+            "12": 2432397312.0,
+            "13": 2432397312.0,
+            "14": 2432397312.0,
+            "15": 2432397312.0,
+            "16": 2432397312.0,
+            "17": 2432397312.0,
+            "18": 2432397312.0,
+            "19": 2432397312.0,
+            "20": 2432397312.0,
+            "21": 2432397312.0,
+            "22": 2432397312.0,
+            "23": 2432397312.0,
+            "24": 2432397312.0,
+            "25": 2432397312.0,
+            "26": 2432397312.0,
+            "27": 2432397312.0,
+            "28": 2432397312.0,
+            "29": 2432397312.0,
+            "30": 2432397312.0,
+            "31": 2432397312.0,
+            "32": 2432397312.0,
+            "33": 2432397312.0,
+            "34": 2432397312.0,
+            "35": 2432397312.0,
+            "36": 2432397312.0,
+            "37": 2432397312.0,
+            "38": 2432397312.0,
+            "39": 2432397312.0,
+            "40": 2432397312.0,
+            "41": 2432397312.0,
+            "42": 2432397312.0,
+            "43": 2432397312.0,
+            "44": 2432397312.0,
+            "45": 2432397312.0,
+            "46": 2432397312.0,
+            "47": 2432397312.0,
+            "48": 2432397312.0,
+            "49": 2432397312.0,
+            "50": 2432397312.0,
+            "51": 2432397312.0,
+            "52": 2432397312.0,
+            "53": 2432397312.0,
+            "54": 2432397312.0,
+            "55": 2432397312.0,
+            "56": 2432397312.0,
+            "57": 2432397312.0,
+            "58": 2432397312.0,
+            "59": 2432397312.0,
+            "60": 2432397312.0,
+            "61": 2432397312.0,
+            "62": 2432397312.0,
+            "63": 2432397312.0,
+            "64": 2432397312.0,
+            "65": 2432397312.0,
+            "66": 2432397312.0,
+            "67": 2432397312.0,
+            "68": 2432397312.0,
+            "69": 2432397312.0,
+            "70": 2432397312.0,
+            "71": 2432397312.0,
+            "72": 2432397312.0,
+            "73": 2432397312.0,
+            "74": 2432397312.0,
+            "75": 2432397312.0,
+            "76": 2432397312.0,
+            "77": 2432397312.0,
+            "78": 2432397312.0,
+            "79": 2432397312.0,
+            "80": 2432397312.0,
+            "81": 2432397312.0,
+            "82": 2432397312.0,
+            "83": 2432397312.0,
+            "84": 2432397312.0,
+            "85": 2432397312.0,
+            "86": 2432397312.0,
+            "87": 2432397312.0,
+            "88": 2432397312.0,
+            "89": 2432397312.0,
+            "90": 2432397312.0,
+            "91": 2432397312.0,
+            "92": 2432397312.0,
+            "93": 2432397312.0,
+            "94": 2432397312.0,
+            "95": 2432397312.0,
+            "96": 2432397312.0,
+            "97": 2432397312.0,
+            "98": 2432397312.0,
+            "99": 2432397312.0,
+            "100": 2432397312.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 17.54138,
+            "2": 0.13158,
+            "3": 0.11931,
+            "4": 0.11269,
+            "5": 0.1124,
+            "6": 0.11102,
+            "7": 0.11179,
+            "8": 0.11071,
+            "9": 0.11115,
+            "10": 0.11216,
+            "11": 0.11019,
+            "12": 0.10929,
+            "13": 0.10974,
+            "14": 0.11072,
+            "15": 0.11028,
+            "16": 0.10961,
+            "17": 0.1105,
+            "18": 0.1098,
+            "19": 0.11053,
+            "20": 0.11011,
+            "21": 0.10991,
+            "22": 0.10929,
+            "23": 0.11003,
+            "24": 0.10899,
+            "25": 0.10976,
+            "26": 0.10976,
+            "27": 0.11215,
+            "28": 0.11012,
+            "29": 0.11201,
+            "30": 0.11164,
+            "31": 0.10958,
+            "32": 0.10984,
+            "33": 0.10959,
+            "34": 0.10961,
+            "35": 0.11104,
+            "36": 0.11182,
+            "37": 0.11063,
+            "38": 0.11001,
+            "39": 0.10974,
+            "40": 0.10932,
+            "41": 0.10961,
+            "42": 0.1101,
+            "43": 0.11018,
+            "44": 0.11136,
+            "45": 0.1111,
+            "46": 0.11139,
+            "47": 0.1089,
+            "48": 0.10943,
+            "49": 0.10954,
+            "50": 0.10991,
+            "51": 0.11785,
+            "52": 0.11209,
+            "53": 0.11006,
+            "54": 0.11154,
+            "55": 0.11442,
+            "56": 0.11224,
+            "57": 0.11144,
+            "58": 0.11019,
+            "59": 0.11203,
+            "60": 0.11138,
+            "61": 0.11054,
+            "62": 0.10988,
+            "63": 0.11137,
+            "64": 0.11375,
+            "65": 0.11099,
+            "66": 0.11062,
+            "67": 0.11059,
+            "68": 0.1103,
+            "69": 0.11052,
+            "70": 0.11117,
+            "71": 0.11388,
+            "72": 0.1141,
+            "73": 0.11416,
+            "74": 0.11486,
+            "75": 0.11283,
+            "76": 0.1123,
+            "77": 0.11047,
+            "78": 0.11279,
+            "79": 0.11417,
+            "80": 0.11037,
+            "81": 0.11258,
+            "82": 0.1135,
+            "83": 0.11215,
+            "84": 0.11183,
+            "85": 0.1122,
+            "86": 0.11261,
+            "87": 0.1097,
+            "88": 0.1112,
+            "89": 0.11201,
+            "90": 0.11377,
+            "91": 0.11526,
+            "92": 0.11074,
+            "93": 0.11279,
+            "94": 0.11178,
+            "95": 0.11134,
+            "96": 0.11018,
+            "97": 0.11123,
+            "98": 0.11129,
+            "99": 0.11384,
+            "100": 0.11183
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..d31da6ac7cf
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.84517,
+            "2": 10.85349,
+            "3": 10.8539,
+            "4": 10.83825,
+            "5": 10.87427,
+            "6": 10.89307,
+            "7": 10.85454,
+            "8": 10.8626,
+            "9": 10.86464,
+            "10": 10.82906,
+            "11": 10.88792,
+            "12": 10.87099,
+            "13": 10.87921,
+            "14": 10.89078,
+            "15": 10.81976,
+            "16": 10.83158,
+            "17": 10.79868,
+            "18": 10.81672,
+            "19": 10.81919,
+            "20": 10.72701,
+            "21": 10.70594,
+            "22": 10.56367,
+            "23": 10.72804,
+            "24": 10.60832,
+            "25": 10.5522,
+            "26": 10.60853,
+            "27": 10.62847,
+            "28": 10.58306,
+            "29": 10.60011,
+            "30": 10.36616,
+            "31": 10.12043,
+            "32": 10.47685,
+            "33": 10.46868,
+            "34": 10.22316,
+            "35": 10.27781,
+            "36": 10.22892,
+            "37": 10.35949,
+            "38": 10.19369,
+            "39": 10.41549,
+            "40": 10.09758,
+            "41": 10.1573,
+            "42": 10.22398,
+            "43": 9.83289,
+            "44": 9.96912,
+            "45": 9.84191,
+            "46": 9.83041,
+            "47": 10.15626,
+            "48": 9.85486,
+            "49": 9.54086,
+            "50": 9.91248,
+            "51": 9.85868,
+            "52": 9.74284,
+            "53": 10.06645,
+            "54": 9.95167,
+            "55": 9.88096,
+            "56": 9.62626,
+            "57": 9.47768,
+            "58": 9.83346,
+            "59": 9.58526,
+            "60": 9.50125,
+            "61": 9.69182,
+            "62": 9.98853,
+            "63": 9.38476,
+            "64": 9.7803,
+            "65": 8.94762,
+            "66": 9.70856,
+            "67": 9.36852,
+            "68": 9.78439,
+            "69": 9.79406,
+            "70": 9.74241,
+            "71": 9.61808,
+            "72": 9.58428,
+            "73": 9.5035,
+            "74": 8.94221,
+            "75": 9.42529,
+            "76": 9.07408,
+            "77": 10.06351,
+            "78": 9.7208,
+            "79": 9.37294,
+            "80": 9.40396,
+            "81": 9.48168,
+            "82": 9.69778,
+            "83": 9.30714,
+            "84": 9.41712,
+            "85": 9.61407,
+            "86": 9.07615,
+            "87": 9.59094,
+            "88": 9.74641,
+            "89": 9.59993,
+            "90": 9.8142,
+            "91": 9.33773,
+            "92": 9.35373,
+            "93": 9.07395,
+            "94": 8.83173,
+            "95": 9.51734,
+            "96": 9.52415,
+            "97": 9.30995,
+            "98": 9.66805,
+            "99": 8.88588,
+            "100": 9.39538
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1655.0,
+            "2": 1697.0,
+            "3": 1724.0,
+            "4": 1720.0,
+            "5": 1803.0,
+            "6": 1772.0,
+            "7": 1811.0,
+            "8": 1766.0,
+            "9": 1750.0,
+            "10": 1413.0,
+            "11": 1861.0,
+            "12": 1650.0,
+            "13": 1895.0,
+            "14": 1662.0,
+            "15": 1951.0,
+            "16": 1998.0,
+            "17": 1798.0,
+            "18": 1687.0,
+            "19": 1856.0,
+            "20": 1561.0,
+            "21": 1882.0,
+            "22": 1652.0,
+            "23": 2075.0,
+            "24": 1606.0,
+            "25": 1665.0,
+            "26": 1686.0,
+            "27": 1839.0,
+            "28": 2053.0,
+            "29": 1907.0,
+            "30": 1893.0,
+            "31": 1581.0,
+            "32": 1791.0,
+            "33": 2149.0,
+            "34": 1872.0,
+            "35": 2010.0,
+            "36": 1799.0,
+            "37": 2311.0,
+            "38": 2221.0,
+            "39": 2261.0,
+            "40": 2188.0,
+            "41": 2204.0,
+            "42": 2300.0,
+            "43": 2001.0,
+            "44": 2119.0,
+            "45": 2126.0,
+            "46": 2374.0,
+            "47": 2468.0,
+            "48": 2405.0,
+            "49": 2247.0,
+            "50": 2250.0,
+            "51": 2607.0,
+            "52": 2618.0,
+            "53": 2828.0,
+            "54": 2730.0,
+            "55": 2351.0,
+            "56": 2753.0,
+            "57": 2323.0,
+            "58": 2809.0,
+            "59": 2721.0,
+            "60": 2440.0,
+            "61": 2875.0,
+            "62": 2726.0,
+            "63": 2444.0,
+            "64": 3001.0,
+            "65": 2602.0,
+            "66": 2981.0,
+            "67": 2676.0,
+            "68": 2623.0,
+            "69": 2802.0,
+            "70": 3234.0,
+            "71": 2902.0,
+            "72": 2337.0,
+            "73": 2856.0,
+            "74": 1903.0,
+            "75": 2388.0,
+            "76": 3118.0,
+            "77": 3108.0,
+            "78": 3122.0,
+            "79": 2994.0,
+            "80": 3186.0,
+            "81": 3470.0,
+            "82": 3164.0,
+            "83": 2726.0,
+            "84": 3214.0,
+            "85": 3262.0,
+            "86": 2602.0,
+            "87": 3658.0,
+            "88": 2906.0,
+            "89": 3054.0,
+            "90": 3018.0,
+            "91": 2690.0,
+            "92": 3106.0,
+            "93": 2701.0,
+            "94": 3263.0,
+            "95": 3426.0,
+            "96": 3405.0,
+            "97": 3087.0,
+            "98": 3510.0,
+            "99": 3148.0,
+            "100": 3204.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 551269888.0,
+            "2": 551269888.0,
+            "3": 551269888.0,
+            "4": 552318464.0,
+            "5": 551269888.0,
+            "6": 551269888.0,
+            "7": 551269888.0,
+            "8": 551269888.0,
+            "9": 551269888.0,
+            "10": 551269888.0,
+            "11": 551269888.0,
+            "12": 551269888.0,
+            "13": 551269888.0,
+            "14": 551269888.0,
+            "15": 551269888.0,
+            "16": 551269888.0,
+            "17": 551269888.0,
+            "18": 551269888.0,
+            "19": 551269888.0,
+            "20": 551269888.0,
+            "21": 551269888.0,
+            "22": 551269888.0,
+            "23": 551269888.0,
+            "24": 551269888.0,
+            "25": 551269888.0,
+            "26": 551269888.0,
+            "27": 551269888.0,
+            "28": 551269888.0,
+            "29": 551269888.0,
+            "30": 551269888.0,
+            "31": 551269888.0,
+            "32": 551269888.0,
+            "33": 551269888.0,
+            "34": 551269888.0,
+            "35": 551269888.0,
+            "36": 551269888.0,
+            "37": 551269888.0,
+            "38": 551269888.0,
+            "39": 551269888.0,
+            "40": 551269888.0,
+            "41": 551269888.0,
+            "42": 551269888.0,
+            "43": 551269888.0,
+            "44": 551269888.0,
+            "45": 551269888.0,
+            "46": 551269888.0,
+            "47": 551269888.0,
+            "48": 551269888.0,
+            "49": 551269888.0,
+            "50": 551269888.0,
+            "51": 551269888.0,
+            "52": 551269888.0,
+            "53": 551269888.0,
+            "54": 551269888.0,
+            "55": 551269888.0,
+            "56": 551269888.0,
+            "57": 551269888.0,
+            "58": 551269888.0,
+            "59": 551269888.0,
+            "60": 551269888.0,
+            "61": 551269888.0,
+            "62": 551269888.0,
+            "63": 551269888.0,
+            "64": 551269888.0,
+            "65": 551269888.0,
+            "66": 551269888.0,
+            "67": 551269888.0,
+            "68": 551269888.0,
+            "69": 551269888.0,
+            "70": 551269888.0,
+            "71": 551269888.0,
+            "72": 551269888.0,
+            "73": 551269888.0,
+            "74": 551269888.0,
+            "75": 551269888.0,
+            "76": 551269888.0,
+            "77": 551269888.0,
+            "78": 551269888.0,
+            "79": 551269888.0,
+            "80": 551269888.0,
+            "81": 551269888.0,
+            "82": 551269888.0,
+            "83": 551269888.0,
+            "84": 551269888.0,
+            "85": 551269888.0,
+            "86": 551269888.0,
+            "87": 551269888.0,
+            "88": 551269888.0,
+            "89": 551269888.0,
+            "90": 551269888.0,
+            "91": 551269888.0,
+            "92": 551269888.0,
+            "93": 551269888.0,
+            "94": 551269888.0,
+            "95": 551269888.0,
+            "96": 551269888.0,
+            "97": 551269888.0,
+            "98": 551269888.0,
+            "99": 551269888.0,
+            "100": 551269888.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2290489344.0,
+            "2": 2432397312.0,
+            "3": 2432397312.0,
+            "4": 2432397312.0,
+            "5": 2432397312.0,
+            "6": 2432397312.0,
+            "7": 2432397312.0,
+            "8": 2432397312.0,
+            "9": 2432397312.0,
+            "10": 2432397312.0,
+            "11": 2432397312.0,
+            "12": 2432397312.0,
+            "13": 2432397312.0,
+            "14": 2432397312.0,
+            "15": 2432397312.0,
+            "16": 2432397312.0,
+            "17": 2432397312.0,
+            "18": 2432397312.0,
+            "19": 2432397312.0,
+            "20": 2432397312.0,
+            "21": 2432397312.0,
+            "22": 2432397312.0,
+            "23": 2432397312.0,
+            "24": 2432397312.0,
+            "25": 2432397312.0,
+            "26": 2432397312.0,
+            "27": 2432397312.0,
+            "28": 2432397312.0,
+            "29": 2432397312.0,
+            "30": 2432397312.0,
+            "31": 2432397312.0,
+            "32": 2432397312.0,
+            "33": 2432397312.0,
+            "34": 2432397312.0,
+            "35": 2432397312.0,
+            "36": 2432397312.0,
+            "37": 2432397312.0,
+            "38": 2432397312.0,
+            "39": 2432397312.0,
+            "40": 2432397312.0,
+            "41": 2432397312.0,
+            "42": 2432397312.0,
+            "43": 2432397312.0,
+            "44": 2432397312.0,
+            "45": 2432397312.0,
+            "46": 2432397312.0,
+            "47": 2432397312.0,
+            "48": 2432397312.0,
+            "49": 2432397312.0,
+            "50": 2432397312.0,
+            "51": 2432397312.0,
+            "52": 2432397312.0,
+            "53": 2432397312.0,
+            "54": 2432397312.0,
+            "55": 2432397312.0,
+            "56": 2432397312.0,
+            "57": 2432397312.0,
+            "58": 2432397312.0,
+            "59": 2432397312.0,
+            "60": 2432397312.0,
+            "61": 2432397312.0,
+            "62": 2432397312.0,
+            "63": 2432397312.0,
+            "64": 2432397312.0,
+            "65": 2432397312.0,
+            "66": 2432397312.0,
+            "67": 2432397312.0,
+            "68": 2432397312.0,
+            "69": 2432397312.0,
+            "70": 2432397312.0,
+            "71": 2432397312.0,
+            "72": 2432397312.0,
+            "73": 2432397312.0,
+            "74": 2432397312.0,
+            "75": 2432397312.0,
+            "76": 2432397312.0,
+            "77": 2432397312.0,
+            "78": 2432397312.0,
+            "79": 2432397312.0,
+            "80": 2432397312.0,
+            "81": 2432397312.0,
+            "82": 2432397312.0,
+            "83": 2432397312.0,
+            "84": 2432397312.0,
+            "85": 2432397312.0,
+            "86": 2432397312.0,
+            "87": 2432397312.0,
+            "88": 2432397312.0,
+            "89": 2432397312.0,
+            "90": 2432397312.0,
+            "91": 2432397312.0,
+            "92": 2432397312.0,
+            "93": 2432397312.0,
+            "94": 2432397312.0,
+            "95": 2432397312.0,
+            "96": 2432397312.0,
+            "97": 2432397312.0,
+            "98": 2432397312.0,
+            "99": 2432397312.0,
+            "100": 2432397312.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 16.46548,
+            "2": 0.12959,
+            "3": 0.10184,
+            "4": 0.09901,
+            "5": 0.09738,
+            "6": 0.09779,
+            "7": 0.09844,
+            "8": 0.09824,
+            "9": 0.0976,
+            "10": 0.0989,
+            "11": 0.09806,
+            "12": 0.09847,
+            "13": 0.09693,
+            "14": 0.0975,
+            "15": 0.09734,
+            "16": 0.09676,
+            "17": 0.09761,
+            "18": 0.10064,
+            "19": 0.10268,
+            "20": 0.10193,
+            "21": 0.09868,
+            "22": 0.10036,
+            "23": 0.10125,
+            "24": 0.10069,
+            "25": 0.09985,
+            "26": 0.09933,
+            "27": 0.10255,
+            "28": 0.09872,
+            "29": 0.09702,
+            "30": 0.09893,
+            "31": 0.10092,
+            "32": 0.10188,
+            "33": 0.09747,
+            "34": 0.09867,
+            "35": 0.09716,
+            "36": 0.09808,
+            "37": 0.09735,
+            "38": 0.09948,
+            "39": 0.10526,
+            "40": 0.10139,
+            "41": 0.09798,
+            "42": 0.10054,
+            "43": 0.09915,
+            "44": 0.09761,
+            "45": 0.09943,
+            "46": 0.09837,
+            "47": 0.10213,
+            "48": 0.0976,
+            "49": 0.09851,
+            "50": 0.09815,
+            "51": 0.10646,
+            "52": 0.10032,
+            "53": 0.10073,
+            "54": 0.10074,
+            "55": 0.10099,
+            "56": 0.09991,
+            "57": 0.10044,
+            "58": 0.10136,
+            "59": 0.10068,
+            "60": 0.10185,
+            "61": 0.10193,
+            "62": 0.10012,
+            "63": 0.09915,
+            "64": 0.09898,
+            "65": 0.10063,
+            "66": 0.10749,
+            "67": 0.09751,
+            "68": 0.10261,
+            "69": 0.10397,
+            "70": 0.10225,
+            "71": 0.10161,
+            "72": 0.09906,
+            "73": 0.09842,
+            "74": 0.10577,
+            "75": 0.1039,
+            "76": 0.10082,
+            "77": 0.09852,
+            "78": 0.09796,
+            "79": 0.10077,
+            "80": 0.10371,
+            "81": 0.10025,
+            "82": 0.10234,
+            "83": 0.10234,
+            "84": 0.10127,
+            "85": 0.10403,
+            "86": 0.10427,
+            "87": 0.10111,
+            "88": 0.10052,
+            "89": 0.10059,
+            "90": 0.10355,
+            "91": 0.10168,
+            "92": 0.1012,
+            "93": 0.10032,
+            "94": 0.10123,
+            "95": 0.10403,
+            "96": 0.10413,
+            "97": 0.10405,
+            "98": 0.11267,
+            "99": 0.11812,
+            "100": 0.11125
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..acadb81abbe
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.84517,
+            "2": 10.85349,
+            "3": 10.8539,
+            "4": 10.83825,
+            "5": 10.87427,
+            "6": 10.89307,
+            "7": 10.85454,
+            "8": 10.8626,
+            "9": 10.86464,
+            "10": 10.82906,
+            "11": 10.88792,
+            "12": 10.87099,
+            "13": 10.87921,
+            "14": 10.89078,
+            "15": 10.81976,
+            "16": 10.83158,
+            "17": 10.79868,
+            "18": 10.81672,
+            "19": 10.81919,
+            "20": 10.72701,
+            "21": 10.70594,
+            "22": 10.56367,
+            "23": 10.72804,
+            "24": 10.60832,
+            "25": 10.5522,
+            "26": 10.60853,
+            "27": 10.62847,
+            "28": 10.58306,
+            "29": 10.60011,
+            "30": 10.36616,
+            "31": 10.12043,
+            "32": 10.47685,
+            "33": 10.46868,
+            "34": 10.22316,
+            "35": 10.27781,
+            "36": 10.22892,
+            "37": 10.35949,
+            "38": 10.19369,
+            "39": 10.41549,
+            "40": 10.09758,
+            "41": 10.1573,
+            "42": 10.22398,
+            "43": 9.83289,
+            "44": 9.96912,
+            "45": 9.84191,
+            "46": 9.83041,
+            "47": 10.15626,
+            "48": 9.85486,
+            "49": 9.54086,
+            "50": 9.91248,
+            "51": 9.85868,
+            "52": 9.74284,
+            "53": 10.06645,
+            "54": 9.95167,
+            "55": 9.88096,
+            "56": 9.62626,
+            "57": 9.47768,
+            "58": 9.83346,
+            "59": 9.58526,
+            "60": 9.50125,
+            "61": 9.69182,
+            "62": 9.98853,
+            "63": 9.38476,
+            "64": 9.7803,
+            "65": 8.94762,
+            "66": 9.70856,
+            "67": 9.36852,
+            "68": 9.78439,
+            "69": 9.79406,
+            "70": 9.74241,
+            "71": 9.61808,
+            "72": 9.58428,
+            "73": 9.5035,
+            "74": 8.94221,
+            "75": 9.42529,
+            "76": 9.07408,
+            "77": 10.06351,
+            "78": 9.7208,
+            "79": 9.37294,
+            "80": 9.40396,
+            "81": 9.48168,
+            "82": 9.69778,
+            "83": 9.30714,
+            "84": 9.41712,
+            "85": 9.61407,
+            "86": 9.07615,
+            "87": 9.59094,
+            "88": 9.74641,
+            "89": 9.59993,
+            "90": 9.8142,
+            "91": 9.33773,
+            "92": 9.35373,
+            "93": 9.07395,
+            "94": 8.83173,
+            "95": 9.51734,
+            "96": 9.52415,
+            "97": 9.30995,
+            "98": 9.66805,
+            "99": 8.88588,
+            "100": 9.39538
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1655.0,
+            "2": 1697.0,
+            "3": 1724.0,
+            "4": 1720.0,
+            "5": 1803.0,
+            "6": 1772.0,
+            "7": 1811.0,
+            "8": 1766.0,
+            "9": 1750.0,
+            "10": 1413.0,
+            "11": 1861.0,
+            "12": 1650.0,
+            "13": 1895.0,
+            "14": 1662.0,
+            "15": 1951.0,
+            "16": 1998.0,
+            "17": 1798.0,
+            "18": 1687.0,
+            "19": 1856.0,
+            "20": 1561.0,
+            "21": 1882.0,
+            "22": 1652.0,
+            "23": 2075.0,
+            "24": 1606.0,
+            "25": 1665.0,
+            "26": 1686.0,
+            "27": 1839.0,
+            "28": 2053.0,
+            "29": 1907.0,
+            "30": 1893.0,
+            "31": 1581.0,
+            "32": 1791.0,
+            "33": 2149.0,
+            "34": 1872.0,
+            "35": 2010.0,
+            "36": 1799.0,
+            "37": 2311.0,
+            "38": 2221.0,
+            "39": 2261.0,
+            "40": 2188.0,
+            "41": 2204.0,
+            "42": 2300.0,
+            "43": 2001.0,
+            "44": 2119.0,
+            "45": 2126.0,
+            "46": 2374.0,
+            "47": 2468.0,
+            "48": 2405.0,
+            "49": 2247.0,
+            "50": 2250.0,
+            "51": 2607.0,
+            "52": 2618.0,
+            "53": 2828.0,
+            "54": 2730.0,
+            "55": 2351.0,
+            "56": 2753.0,
+            "57": 2323.0,
+            "58": 2809.0,
+            "59": 2721.0,
+            "60": 2440.0,
+            "61": 2875.0,
+            "62": 2726.0,
+            "63": 2444.0,
+            "64": 3001.0,
+            "65": 2602.0,
+            "66": 2981.0,
+            "67": 2676.0,
+            "68": 2623.0,
+            "69": 2802.0,
+            "70": 3234.0,
+            "71": 2902.0,
+            "72": 2337.0,
+            "73": 2856.0,
+            "74": 1903.0,
+            "75": 2388.0,
+            "76": 3118.0,
+            "77": 3108.0,
+            "78": 3122.0,
+            "79": 2994.0,
+            "80": 3186.0,
+            "81": 3470.0,
+            "82": 3164.0,
+            "83": 2726.0,
+            "84": 3214.0,
+            "85": 3262.0,
+            "86": 2602.0,
+            "87": 3658.0,
+            "88": 2906.0,
+            "89": 3054.0,
+            "90": 3018.0,
+            "91": 2690.0,
+            "92": 3106.0,
+            "93": 2701.0,
+            "94": 3263.0,
+            "95": 3426.0,
+            "96": 3405.0,
+            "97": 3087.0,
+            "98": 3510.0,
+            "99": 3148.0,
+            "100": 3204.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 551269888.0,
+            "2": 551269888.0,
+            "3": 551269888.0,
+            "4": 552318464.0,
+            "5": 551269888.0,
+            "6": 551269888.0,
+            "7": 551269888.0,
+            "8": 551269888.0,
+            "9": 551269888.0,
+            "10": 551269888.0,
+            "11": 551269888.0,
+            "12": 551269888.0,
+            "13": 551269888.0,
+            "14": 551269888.0,
+            "15": 551269888.0,
+            "16": 551269888.0,
+            "17": 551269888.0,
+            "18": 551269888.0,
+            "19": 551269888.0,
+            "20": 551269888.0,
+            "21": 551269888.0,
+            "22": 551269888.0,
+            "23": 551269888.0,
+            "24": 551269888.0,
+            "25": 551269888.0,
+            "26": 551269888.0,
+            "27": 551269888.0,
+            "28": 551269888.0,
+            "29": 551269888.0,
+            "30": 551269888.0,
+            "31": 551269888.0,
+            "32": 551269888.0,
+            "33": 551269888.0,
+            "34": 551269888.0,
+            "35": 551269888.0,
+            "36": 551269888.0,
+            "37": 551269888.0,
+            "38": 551269888.0,
+            "39": 551269888.0,
+            "40": 551269888.0,
+            "41": 551269888.0,
+            "42": 551269888.0,
+            "43": 551269888.0,
+            "44": 551269888.0,
+            "45": 551269888.0,
+            "46": 551269888.0,
+            "47": 551269888.0,
+            "48": 551269888.0,
+            "49": 551269888.0,
+            "50": 551269888.0,
+            "51": 551269888.0,
+            "52": 551269888.0,
+            "53": 551269888.0,
+            "54": 551269888.0,
+            "55": 551269888.0,
+            "56": 551269888.0,
+            "57": 551269888.0,
+            "58": 551269888.0,
+            "59": 551269888.0,
+            "60": 551269888.0,
+            "61": 551269888.0,
+            "62": 551269888.0,
+            "63": 551269888.0,
+            "64": 551269888.0,
+            "65": 551269888.0,
+            "66": 551269888.0,
+            "67": 551269888.0,
+            "68": 551269888.0,
+            "69": 551269888.0,
+            "70": 551269888.0,
+            "71": 551269888.0,
+            "72": 551269888.0,
+            "73": 551269888.0,
+            "74": 551269888.0,
+            "75": 551269888.0,
+            "76": 551269888.0,
+            "77": 551269888.0,
+            "78": 551269888.0,
+            "79": 551269888.0,
+            "80": 551269888.0,
+            "81": 551269888.0,
+            "82": 551269888.0,
+            "83": 551269888.0,
+            "84": 551269888.0,
+            "85": 551269888.0,
+            "86": 551269888.0,
+            "87": 551269888.0,
+            "88": 551269888.0,
+            "89": 551269888.0,
+            "90": 551269888.0,
+            "91": 551269888.0,
+            "92": 551269888.0,
+            "93": 551269888.0,
+            "94": 551269888.0,
+            "95": 551269888.0,
+            "96": 551269888.0,
+            "97": 551269888.0,
+            "98": 551269888.0,
+            "99": 551269888.0,
+            "100": 551269888.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2290489344.0,
+            "2": 2432397312.0,
+            "3": 2432397312.0,
+            "4": 2432397312.0,
+            "5": 2432397312.0,
+            "6": 2432397312.0,
+            "7": 2432397312.0,
+            "8": 2432397312.0,
+            "9": 2432397312.0,
+            "10": 2432397312.0,
+            "11": 2432397312.0,
+            "12": 2432397312.0,
+            "13": 2432397312.0,
+            "14": 2432397312.0,
+            "15": 2432397312.0,
+            "16": 2432397312.0,
+            "17": 2432397312.0,
+            "18": 2432397312.0,
+            "19": 2432397312.0,
+            "20": 2432397312.0,
+            "21": 2432397312.0,
+            "22": 2432397312.0,
+            "23": 2432397312.0,
+            "24": 2432397312.0,
+            "25": 2432397312.0,
+            "26": 2432397312.0,
+            "27": 2432397312.0,
+            "28": 2432397312.0,
+            "29": 2432397312.0,
+            "30": 2432397312.0,
+            "31": 2432397312.0,
+            "32": 2432397312.0,
+            "33": 2432397312.0,
+            "34": 2432397312.0,
+            "35": 2432397312.0,
+            "36": 2432397312.0,
+            "37": 2432397312.0,
+            "38": 2432397312.0,
+            "39": 2432397312.0,
+            "40": 2432397312.0,
+            "41": 2432397312.0,
+            "42": 2432397312.0,
+            "43": 2432397312.0,
+            "44": 2432397312.0,
+            "45": 2432397312.0,
+            "46": 2432397312.0,
+            "47": 2432397312.0,
+            "48": 2432397312.0,
+            "49": 2432397312.0,
+            "50": 2432397312.0,
+            "51": 2432397312.0,
+            "52": 2432397312.0,
+            "53": 2432397312.0,
+            "54": 2432397312.0,
+            "55": 2432397312.0,
+            "56": 2432397312.0,
+            "57": 2432397312.0,
+            "58": 2432397312.0,
+            "59": 2432397312.0,
+            "60": 2432397312.0,
+            "61": 2432397312.0,
+            "62": 2432397312.0,
+            "63": 2432397312.0,
+            "64": 2432397312.0,
+            "65": 2432397312.0,
+            "66": 2432397312.0,
+            "67": 2432397312.0,
+            "68": 2432397312.0,
+            "69": 2432397312.0,
+            "70": 2432397312.0,
+            "71": 2432397312.0,
+            "72": 2432397312.0,
+            "73": 2432397312.0,
+            "74": 2432397312.0,
+            "75": 2432397312.0,
+            "76": 2432397312.0,
+            "77": 2432397312.0,
+            "78": 2432397312.0,
+            "79": 2432397312.0,
+            "80": 2432397312.0,
+            "81": 2432397312.0,
+            "82": 2432397312.0,
+            "83": 2432397312.0,
+            "84": 2432397312.0,
+            "85": 2432397312.0,
+            "86": 2432397312.0,
+            "87": 2432397312.0,
+            "88": 2432397312.0,
+            "89": 2432397312.0,
+            "90": 2432397312.0,
+            "91": 2432397312.0,
+            "92": 2432397312.0,
+            "93": 2432397312.0,
+            "94": 2432397312.0,
+            "95": 2432397312.0,
+            "96": 2432397312.0,
+            "97": 2432397312.0,
+            "98": 2432397312.0,
+            "99": 2432397312.0,
+            "100": 2432397312.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 17.61957,
+            "2": 0.12347,
+            "3": 0.11094,
+            "4": 0.11482,
+            "5": 0.11141,
+            "6": 0.10928,
+            "7": 0.10905,
+            "8": 0.11026,
+            "9": 0.11003,
+            "10": 0.11095,
+            "11": 0.11002,
+            "12": 0.1122,
+            "13": 0.11472,
+            "14": 0.11511,
+            "15": 0.11073,
+            "16": 0.11228,
+            "17": 0.11342,
+            "18": 0.11197,
+            "19": 0.11062,
+            "20": 0.11097,
+            "21": 0.11081,
+            "22": 0.11379,
+            "23": 0.10968,
+            "24": 0.11083,
+            "25": 0.11649,
+            "26": 0.11043,
+            "27": 0.11175,
+            "28": 0.11122,
+            "29": 0.11218,
+            "30": 0.11261,
+            "31": 0.11314,
+            "32": 0.10971,
+            "33": 0.11028,
+            "34": 0.11149,
+            "35": 0.11122,
+            "36": 0.11079,
+            "37": 0.11188,
+            "38": 0.1115,
+            "39": 0.11238,
+            "40": 0.11528,
+            "41": 0.11165,
+            "42": 0.11137,
+            "43": 0.11139,
+            "44": 0.11074,
+            "45": 0.11141,
+            "46": 0.11158,
+            "47": 0.1105,
+            "48": 0.11128,
+            "49": 0.11164,
+            "50": 0.11572,
+            "51": 0.11625,
+            "52": 0.10969,
+            "53": 0.10904,
+            "54": 0.1098,
+            "55": 0.10896,
+            "56": 0.11225,
+            "57": 0.11301,
+            "58": 0.11047,
+            "59": 0.10959,
+            "60": 0.11005,
+            "61": 0.11018,
+            "62": 0.10831,
+            "63": 0.10997,
+            "64": 0.10896,
+            "65": 0.11116,
+            "66": 0.11148,
+            "67": 0.1092,
+            "68": 0.10947,
+            "69": 0.10933,
+            "70": 0.10869,
+            "71": 0.10873,
+            "72": 0.10849,
+            "73": 0.10872,
+            "74": 0.10951,
+            "75": 0.1119,
+            "76": 0.1109,
+            "77": 0.10896,
+            "78": 0.10963,
+            "79": 0.11057,
+            "80": 0.10858,
+            "81": 0.10732,
+            "82": 0.10824,
+            "83": 0.11006,
+            "84": 0.11062,
+            "85": 0.1096,
+            "86": 0.10933,
+            "87": 0.11001,
+            "88": 0.11053,
+            "89": 0.10899,
+            "90": 0.10989,
+            "91": 0.10903,
+            "92": 0.10959,
+            "93": 0.11185,
+            "94": 0.11166,
+            "95": 0.11067,
+            "96": 0.11183,
+            "97": 0.11136,
+            "98": 0.11022,
+            "99": 0.11091,
+            "100": 0.10951
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
new file mode 100644
index 00000000000..b3879ab6045
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.82005,
+            "2": 10.81907,
+            "3": 10.81396,
+            "4": 10.78497,
+            "5": 10.85284,
+            "6": 10.87449,
+            "7": 10.83201,
+            "8": 10.83297,
+            "9": 10.83935,
+            "10": 10.78455,
+            "11": 10.87798,
+            "12": 10.86112,
+            "13": 10.86444,
+            "14": 10.87605,
+            "15": 10.79229,
+            "16": 10.79509,
+            "17": 10.76768,
+            "18": 10.81006,
+            "19": 10.79716,
+            "20": 10.69212,
+            "21": 10.68168,
+            "22": 10.52085,
+            "23": 10.70898,
+            "24": 10.576,
+            "25": 10.52413,
+            "26": 10.59515,
+            "27": 10.58426,
+            "28": 10.56233,
+            "29": 10.57012,
+            "30": 10.34552,
+            "31": 10.10047,
+            "32": 10.45375,
+            "33": 10.44623,
+            "34": 10.20608,
+            "35": 10.26241,
+            "36": 10.2124,
+            "37": 10.3252,
+            "38": 10.16775,
+            "39": 10.38332,
+            "40": 10.07236,
+            "41": 10.13863,
+            "42": 10.19811,
+            "43": 9.81071,
+            "44": 9.93244,
+            "45": 9.81098,
+            "46": 9.80879,
+            "47": 10.1261,
+            "48": 9.82105,
+            "49": 9.50626,
+            "50": 9.88418,
+            "51": 9.8366,
+            "52": 9.7254,
+            "53": 10.04687,
+            "54": 9.93029,
+            "55": 9.86374,
+            "56": 9.60183,
+            "57": 9.4509,
+            "58": 9.80845,
+            "59": 9.56672,
+            "60": 9.47963,
+            "61": 9.67901,
+            "62": 9.96737,
+            "63": 9.3516,
+            "64": 9.75605,
+            "65": 8.93065,
+            "66": 9.68055,
+            "67": 9.3589,
+            "68": 9.76988,
+            "69": 9.77495,
+            "70": 9.71218,
+            "71": 9.60756,
+            "72": 9.57084,
+            "73": 9.48407,
+            "74": 8.92824,
+            "75": 9.4005,
+            "76": 9.07193,
+            "77": 10.05226,
+            "78": 9.71515,
+            "79": 9.35771,
+            "80": 9.39078,
+            "81": 9.46751,
+            "82": 9.68504,
+            "83": 9.29556,
+            "84": 9.4053,
+            "85": 9.60138,
+            "86": 9.06772,
+            "87": 9.58501,
+            "88": 9.73362,
+            "89": 9.59515,
+            "90": 9.80502,
+            "91": 9.3255,
+            "92": 9.35334,
+            "93": 9.06984,
+            "94": 8.8223,
+            "95": 9.50821,
+            "96": 9.51534,
+            "97": 9.29768,
+            "98": 9.66205,
+            "99": 8.87695,
+            "100": 9.3924
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1559.0,
+            "2": 1591.0,
+            "3": 1727.0,
+            "4": 1835.0,
+            "5": 1840.0,
+            "6": 1719.0,
+            "7": 1740.0,
+            "8": 1591.0,
+            "9": 1839.0,
+            "10": 1380.0,
+            "11": 1856.0,
+            "12": 1693.0,
+            "13": 1906.0,
+            "14": 1757.0,
+            "15": 1848.0,
+            "16": 1791.0,
+            "17": 1729.0,
+            "18": 1672.0,
+            "19": 1718.0,
+            "20": 1621.0,
+            "21": 1931.0,
+            "22": 1738.0,
+            "23": 1992.0,
+            "24": 1676.0,
+            "25": 1689.0,
+            "26": 1748.0,
+            "27": 1801.0,
+            "28": 1986.0,
+            "29": 2043.0,
+            "30": 1907.0,
+            "31": 1627.0,
+            "32": 1918.0,
+            "33": 2003.0,
+            "34": 1779.0,
+            "35": 1922.0,
+            "36": 1942.0,
+            "37": 2294.0,
+            "38": 2145.0,
+            "39": 2395.0,
+            "40": 2045.0,
+            "41": 2415.0,
+            "42": 2277.0,
+            "43": 1863.0,
+            "44": 2087.0,
+            "45": 2097.0,
+            "46": 2265.0,
+            "47": 2436.0,
+            "48": 2460.0,
+            "49": 2217.0,
+            "50": 2368.0,
+            "51": 2552.0,
+            "52": 2541.0,
+            "53": 2907.0,
+            "54": 2604.0,
+            "55": 2383.0,
+            "56": 2762.0,
+            "57": 2128.0,
+            "58": 3040.0,
+            "59": 2797.0,
+            "60": 2509.0,
+            "61": 3041.0,
+            "62": 2642.0,
+            "63": 2401.0,
+            "64": 2913.0,
+            "65": 2628.0,
+            "66": 2934.0,
+            "67": 2791.0,
+            "68": 2718.0,
+            "69": 3050.0,
+            "70": 3129.0,
+            "71": 3014.0,
+            "72": 2263.0,
+            "73": 2761.0,
+            "74": 1887.0,
+            "75": 2552.0,
+            "76": 3111.0,
+            "77": 3240.0,
+            "78": 3150.0,
+            "79": 3139.0,
+            "80": 3279.0,
+            "81": 3595.0,
+            "82": 3194.0,
+            "83": 2797.0,
+            "84": 3272.0,
+            "85": 3344.0,
+            "86": 2611.0,
+            "87": 3802.0,
+            "88": 3054.0,
+            "89": 3205.0,
+            "90": 2980.0,
+            "91": 2726.0,
+            "92": 3043.0,
+            "93": 2751.0,
+            "94": 3247.0,
+            "95": 3324.0,
+            "96": 3503.0,
+            "97": 3057.0,
+            "98": 3465.0,
+            "99": 3320.0,
+            "100": 3467.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 522345472.0,
+            "2": 522345472.0,
+            "3": 522345472.0,
+            "4": 522345472.0,
+            "5": 522345472.0,
+            "6": 522345472.0,
+            "7": 522345472.0,
+            "8": 522345472.0,
+            "9": 522345472.0,
+            "10": 522345472.0,
+            "11": 522345472.0,
+            "12": 522345472.0,
+            "13": 522345472.0,
+            "14": 522345472.0,
+            "15": 522345472.0,
+            "16": 522345472.0,
+            "17": 522345472.0,
+            "18": 522345472.0,
+            "19": 522345472.0,
+            "20": 522345472.0,
+            "21": 522345472.0,
+            "22": 522345472.0,
+            "23": 522345472.0,
+            "24": 522345472.0,
+            "25": 522345472.0,
+            "26": 522345472.0,
+            "27": 522345472.0,
+            "28": 522345472.0,
+            "29": 522345472.0,
+            "30": 522345472.0,
+            "31": 522345472.0,
+            "32": 522345472.0,
+            "33": 522345472.0,
+            "34": 522345472.0,
+            "35": 522345472.0,
+            "36": 522345472.0,
+            "37": 522345472.0,
+            "38": 522345472.0,
+            "39": 522345472.0,
+            "40": 522345472.0,
+            "41": 522345472.0,
+            "42": 522345472.0,
+            "43": 522345472.0,
+            "44": 522345472.0,
+            "45": 522345472.0,
+            "46": 522345472.0,
+            "47": 522345472.0,
+            "48": 522345472.0,
+            "49": 522345472.0,
+            "50": 522345472.0,
+            "51": 522345472.0,
+            "52": 522345472.0,
+            "53": 522345472.0,
+            "54": 522345472.0,
+            "55": 522345472.0,
+            "56": 522345472.0,
+            "57": 522345472.0,
+            "58": 522345472.0,
+            "59": 522345472.0,
+            "60": 522345472.0,
+            "61": 522345472.0,
+            "62": 522345472.0,
+            "63": 522345472.0,
+            "64": 522345472.0,
+            "65": 522345472.0,
+            "66": 522345472.0,
+            "67": 522345472.0,
+            "68": 522345472.0,
+            "69": 522345472.0,
+            "70": 522345472.0,
+            "71": 522345472.0,
+            "72": 522345472.0,
+            "73": 522345472.0,
+            "74": 522345472.0,
+            "75": 522345472.0,
+            "76": 522345472.0,
+            "77": 522345472.0,
+            "78": 522345472.0,
+            "79": 522345472.0,
+            "80": 522345472.0,
+            "81": 522345472.0,
+            "82": 522345472.0,
+            "83": 522345472.0,
+            "84": 522345472.0,
+            "85": 522345472.0,
+            "86": 522345472.0,
+            "87": 522345472.0,
+            "88": 522345472.0,
+            "89": 522345472.0,
+            "90": 522345472.0,
+            "91": 522345472.0,
+            "92": 522345472.0,
+            "93": 522345472.0,
+            "94": 522345472.0,
+            "95": 522345472.0,
+            "96": 522345472.0,
+            "97": 522345472.0,
+            "98": 522345472.0,
+            "99": 522345472.0,
+            "100": 522345472.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 3769790464.0,
+            "2": 3912107008.0,
+            "3": 3912107008.0,
+            "4": 3912107008.0,
+            "5": 3912107008.0,
+            "6": 3912107008.0,
+            "7": 3912107008.0,
+            "8": 3912107008.0,
+            "9": 3912107008.0,
+            "10": 3912107008.0,
+            "11": 3912107008.0,
+            "12": 3912107008.0,
+            "13": 3912107008.0,
+            "14": 3912107008.0,
+            "15": 3912107008.0,
+            "16": 3912107008.0,
+            "17": 3912107008.0,
+            "18": 3912107008.0,
+            "19": 3912107008.0,
+            "20": 3912107008.0,
+            "21": 3912107008.0,
+            "22": 3912107008.0,
+            "23": 3912107008.0,
+            "24": 3912107008.0,
+            "25": 3912107008.0,
+            "26": 3912107008.0,
+            "27": 3912107008.0,
+            "28": 3912107008.0,
+            "29": 3912107008.0,
+            "30": 3912107008.0,
+            "31": 3912107008.0,
+            "32": 3912107008.0,
+            "33": 3912107008.0,
+            "34": 3912107008.0,
+            "35": 3912107008.0,
+            "36": 3912107008.0,
+            "37": 3912107008.0,
+            "38": 3912107008.0,
+            "39": 3912107008.0,
+            "40": 3912107008.0,
+            "41": 3912107008.0,
+            "42": 3912107008.0,
+            "43": 3912107008.0,
+            "44": 3912107008.0,
+            "45": 3912107008.0,
+            "46": 3912107008.0,
+            "47": 3912107008.0,
+            "48": 3912107008.0,
+            "49": 3912107008.0,
+            "50": 3912107008.0,
+            "51": 3912107008.0,
+            "52": 3912107008.0,
+            "53": 3912107008.0,
+            "54": 3912107008.0,
+            "55": 3912107008.0,
+            "56": 3912107008.0,
+            "57": 3912107008.0,
+            "58": 3912107008.0,
+            "59": 3912107008.0,
+            "60": 3912107008.0,
+            "61": 3912107008.0,
+            "62": 3912107008.0,
+            "63": 3912107008.0,
+            "64": 3912107008.0,
+            "65": 3912107008.0,
+            "66": 3912107008.0,
+            "67": 3912107008.0,
+            "68": 3912107008.0,
+            "69": 3912107008.0,
+            "70": 3912107008.0,
+            "71": 3912107008.0,
+            "72": 3912107008.0,
+            "73": 3912107008.0,
+            "74": 3912107008.0,
+            "75": 3912107008.0,
+            "76": 3912107008.0,
+            "77": 3912107008.0,
+            "78": 3912107008.0,
+            "79": 3912107008.0,
+            "80": 3912107008.0,
+            "81": 3912107008.0,
+            "82": 3912107008.0,
+            "83": 3912107008.0,
+            "84": 3912107008.0,
+            "85": 3912107008.0,
+            "86": 3912107008.0,
+            "87": 3912107008.0,
+            "88": 3912107008.0,
+            "89": 3912107008.0,
+            "90": 3912107008.0,
+            "91": 3912107008.0,
+            "92": 3912107008.0,
+            "93": 3912107008.0,
+            "94": 3912107008.0,
+            "95": 3912107008.0,
+            "96": 3912107008.0,
+            "97": 3912107008.0,
+            "98": 3912107008.0,
+            "99": 3912107008.0,
+            "100": 3912107008.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 22.15873,
+            "2": 0.19792,
+            "3": 0.1814,
+            "4": 0.17908,
+            "5": 0.17702,
+            "6": 0.17453,
+            "7": 0.17287,
+            "8": 0.17032,
+            "9": 0.17054,
+            "10": 0.44712,
+            "11": 0.17227,
+            "12": 0.17101,
+            "13": 0.17082,
+            "14": 0.17199,
+            "15": 0.17186,
+            "16": 0.17114,
+            "17": 0.1707,
+            "18": 0.17045,
+            "19": 0.17481,
+            "20": 0.17111,
+            "21": 0.17083,
+            "22": 0.17129,
+            "23": 0.17239,
+            "24": 0.17005,
+            "25": 0.17192,
+            "26": 0.1691,
+            "27": 0.17032,
+            "28": 0.16887,
+            "29": 0.16717,
+            "30": 0.16807,
+            "31": 0.17067,
+            "32": 0.16897,
+            "33": 0.17243,
+            "34": 0.17258,
+            "35": 0.17272,
+            "36": 0.17383,
+            "37": 0.17386,
+            "38": 0.17203,
+            "39": 0.17038,
+            "40": 0.17096,
+            "41": 0.1719,
+            "42": 0.1709,
+            "43": 0.17197,
+            "44": 0.17101,
+            "45": 0.17489,
+            "46": 0.17609,
+            "47": 0.16812,
+            "48": 0.16806,
+            "49": 0.16849,
+            "50": 0.1703,
+            "51": 0.17862,
+            "52": 0.41416,
+            "53": 0.1718,
+            "54": 0.17191,
+            "55": 0.41423,
+            "56": 0.47793,
+            "57": 0.17285,
+            "58": 0.17132,
+            "59": 0.17185,
+            "60": 0.17227,
+            "61": 0.17122,
+            "62": 0.17318,
+            "63": 0.17212,
+            "64": 0.17031,
+            "65": 0.17228,
+            "66": 0.17232,
+            "67": 0.17242,
+            "68": 0.17235,
+            "69": 0.17144,
+            "70": 0.17165,
+            "71": 0.17203,
+            "72": 0.17267,
+            "73": 0.17307,
+            "74": 0.17368,
+            "75": 0.17116,
+            "76": 0.17269,
+            "77": 0.17015,
+            "78": 0.17294,
+            "79": 0.17314,
+            "80": 0.17169,
+            "81": 0.1715,
+            "82": 0.17089,
+            "83": 0.17291,
+            "84": 0.17115,
+            "85": 0.17524,
+            "86": 0.17227,
+            "87": 0.17185,
+            "88": 0.17129,
+            "89": 0.17337,
+            "90": 0.17103,
+            "91": 0.17221,
+            "92": 0.17181,
+            "93": 0.17265,
+            "94": 0.17245,
+            "95": 0.17227,
+            "96": 0.17215,
+            "97": 0.17169,
+            "98": 0.17141,
+            "99": 0.17414,
+            "100": 0.17196
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
new file mode 100644
index 00000000000..1d2aa1ec3ba
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.82005,
+            "2": 10.81907,
+            "3": 10.81396,
+            "4": 10.78497,
+            "5": 10.85284,
+            "6": 10.87449,
+            "7": 10.83201,
+            "8": 10.83297,
+            "9": 10.83935,
+            "10": 10.78455,
+            "11": 10.87798,
+            "12": 10.86112,
+            "13": 10.86444,
+            "14": 10.87605,
+            "15": 10.79229,
+            "16": 10.79509,
+            "17": 10.76768,
+            "18": 10.81006,
+            "19": 10.79716,
+            "20": 10.69212,
+            "21": 10.68168,
+            "22": 10.52085,
+            "23": 10.70898,
+            "24": 10.576,
+            "25": 10.52413,
+            "26": 10.59515,
+            "27": 10.58426,
+            "28": 10.56233,
+            "29": 10.57012,
+            "30": 10.34552,
+            "31": 10.10047,
+            "32": 10.45375,
+            "33": 10.44623,
+            "34": 10.20608,
+            "35": 10.26241,
+            "36": 10.2124,
+            "37": 10.3252,
+            "38": 10.16775,
+            "39": 10.38332,
+            "40": 10.07236,
+            "41": 10.13863,
+            "42": 10.19811,
+            "43": 9.81071,
+            "44": 9.93244,
+            "45": 9.81098,
+            "46": 9.80879,
+            "47": 10.1261,
+            "48": 9.82105,
+            "49": 9.50626,
+            "50": 9.88418,
+            "51": 9.8366,
+            "52": 9.7254,
+            "53": 10.04687,
+            "54": 9.93029,
+            "55": 9.86374,
+            "56": 9.60183,
+            "57": 9.4509,
+            "58": 9.80845,
+            "59": 9.56672,
+            "60": 9.47963,
+            "61": 9.67901,
+            "62": 9.96737,
+            "63": 9.3516,
+            "64": 9.75605,
+            "65": 8.93065,
+            "66": 9.68055,
+            "67": 9.3589,
+            "68": 9.76988,
+            "69": 9.77495,
+            "70": 9.71218,
+            "71": 9.60756,
+            "72": 9.57084,
+            "73": 9.48407,
+            "74": 8.92824,
+            "75": 9.4005,
+            "76": 9.07193,
+            "77": 10.05226,
+            "78": 9.71515,
+            "79": 9.35771,
+            "80": 9.39078,
+            "81": 9.46751,
+            "82": 9.68504,
+            "83": 9.29556,
+            "84": 9.4053,
+            "85": 9.60138,
+            "86": 9.06772,
+            "87": 9.58501,
+            "88": 9.73362,
+            "89": 9.59515,
+            "90": 9.80502,
+            "91": 9.3255,
+            "92": 9.35334,
+            "93": 9.06984,
+            "94": 8.8223,
+            "95": 9.50821,
+            "96": 9.51534,
+            "97": 9.29768,
+            "98": 9.66205,
+            "99": 8.87695,
+            "100": 9.3924
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1559.0,
+            "2": 1591.0,
+            "3": 1727.0,
+            "4": 1835.0,
+            "5": 1840.0,
+            "6": 1719.0,
+            "7": 1740.0,
+            "8": 1591.0,
+            "9": 1839.0,
+            "10": 1380.0,
+            "11": 1856.0,
+            "12": 1693.0,
+            "13": 1906.0,
+            "14": 1757.0,
+            "15": 1848.0,
+            "16": 1791.0,
+            "17": 1729.0,
+            "18": 1672.0,
+            "19": 1718.0,
+            "20": 1621.0,
+            "21": 1931.0,
+            "22": 1738.0,
+            "23": 1992.0,
+            "24": 1676.0,
+            "25": 1689.0,
+            "26": 1748.0,
+            "27": 1801.0,
+            "28": 1986.0,
+            "29": 2043.0,
+            "30": 1907.0,
+            "31": 1627.0,
+            "32": 1918.0,
+            "33": 2003.0,
+            "34": 1779.0,
+            "35": 1922.0,
+            "36": 1942.0,
+            "37": 2294.0,
+            "38": 2145.0,
+            "39": 2395.0,
+            "40": 2045.0,
+            "41": 2415.0,
+            "42": 2277.0,
+            "43": 1863.0,
+            "44": 2087.0,
+            "45": 2097.0,
+            "46": 2265.0,
+            "47": 2436.0,
+            "48": 2460.0,
+            "49": 2217.0,
+            "50": 2368.0,
+            "51": 2552.0,
+            "52": 2541.0,
+            "53": 2907.0,
+            "54": 2604.0,
+            "55": 2383.0,
+            "56": 2762.0,
+            "57": 2128.0,
+            "58": 3040.0,
+            "59": 2797.0,
+            "60": 2509.0,
+            "61": 3041.0,
+            "62": 2642.0,
+            "63": 2401.0,
+            "64": 2913.0,
+            "65": 2628.0,
+            "66": 2934.0,
+            "67": 2791.0,
+            "68": 2718.0,
+            "69": 3050.0,
+            "70": 3129.0,
+            "71": 3014.0,
+            "72": 2263.0,
+            "73": 2761.0,
+            "74": 1887.0,
+            "75": 2552.0,
+            "76": 3111.0,
+            "77": 3240.0,
+            "78": 3150.0,
+            "79": 3139.0,
+            "80": 3279.0,
+            "81": 3595.0,
+            "82": 3194.0,
+            "83": 2797.0,
+            "84": 3272.0,
+            "85": 3344.0,
+            "86": 2611.0,
+            "87": 3802.0,
+            "88": 3054.0,
+            "89": 3205.0,
+            "90": 2980.0,
+            "91": 2726.0,
+            "92": 3043.0,
+            "93": 2751.0,
+            "94": 3247.0,
+            "95": 3324.0,
+            "96": 3503.0,
+            "97": 3057.0,
+            "98": 3465.0,
+            "99": 3320.0,
+            "100": 3467.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 522345472.0,
+            "2": 522345472.0,
+            "3": 522345472.0,
+            "4": 522345472.0,
+            "5": 522345472.0,
+            "6": 522345472.0,
+            "7": 522345472.0,
+            "8": 522345472.0,
+            "9": 522345472.0,
+            "10": 522345472.0,
+            "11": 522345472.0,
+            "12": 522345472.0,
+            "13": 522345472.0,
+            "14": 522345472.0,
+            "15": 522345472.0,
+            "16": 522345472.0,
+            "17": 522345472.0,
+            "18": 522345472.0,
+            "19": 522345472.0,
+            "20": 522345472.0,
+            "21": 522345472.0,
+            "22": 522345472.0,
+            "23": 522345472.0,
+            "24": 522345472.0,
+            "25": 522345472.0,
+            "26": 522345472.0,
+            "27": 522345472.0,
+            "28": 522345472.0,
+            "29": 522345472.0,
+            "30": 522345472.0,
+            "31": 522345472.0,
+            "32": 522345472.0,
+            "33": 522345472.0,
+            "34": 522345472.0,
+            "35": 522345472.0,
+            "36": 522345472.0,
+            "37": 522345472.0,
+            "38": 522345472.0,
+            "39": 522345472.0,
+            "40": 522345472.0,
+            "41": 522345472.0,
+            "42": 522345472.0,
+            "43": 522345472.0,
+            "44": 522345472.0,
+            "45": 522345472.0,
+            "46": 522345472.0,
+            "47": 522345472.0,
+            "48": 522345472.0,
+            "49": 522345472.0,
+            "50": 522345472.0,
+            "51": 522345472.0,
+            "52": 522345472.0,
+            "53": 522345472.0,
+            "54": 522345472.0,
+            "55": 522345472.0,
+            "56": 522345472.0,
+            "57": 522345472.0,
+            "58": 522345472.0,
+            "59": 522345472.0,
+            "60": 522345472.0,
+            "61": 522345472.0,
+            "62": 522345472.0,
+            "63": 522345472.0,
+            "64": 522345472.0,
+            "65": 522345472.0,
+            "66": 522345472.0,
+            "67": 522345472.0,
+            "68": 522345472.0,
+            "69": 522345472.0,
+            "70": 522345472.0,
+            "71": 522345472.0,
+            "72": 522345472.0,
+            "73": 522345472.0,
+            "74": 522345472.0,
+            "75": 522345472.0,
+            "76": 522345472.0,
+            "77": 522345472.0,
+            "78": 522345472.0,
+            "79": 522345472.0,
+            "80": 522345472.0,
+            "81": 522345472.0,
+            "82": 522345472.0,
+            "83": 522345472.0,
+            "84": 522345472.0,
+            "85": 522345472.0,
+            "86": 522345472.0,
+            "87": 522345472.0,
+            "88": 522345472.0,
+            "89": 522345472.0,
+            "90": 522345472.0,
+            "91": 522345472.0,
+            "92": 522345472.0,
+            "93": 522345472.0,
+            "94": 522345472.0,
+            "95": 522345472.0,
+            "96": 522345472.0,
+            "97": 522345472.0,
+            "98": 522345472.0,
+            "99": 522345472.0,
+            "100": 522345472.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 3769790464.0,
+            "2": 3912107008.0,
+            "3": 3912107008.0,
+            "4": 3912107008.0,
+            "5": 3912107008.0,
+            "6": 3912107008.0,
+            "7": 3912107008.0,
+            "8": 3912107008.0,
+            "9": 3912107008.0,
+            "10": 3912107008.0,
+            "11": 3912107008.0,
+            "12": 3912107008.0,
+            "13": 3912107008.0,
+            "14": 3912107008.0,
+            "15": 3912107008.0,
+            "16": 3912107008.0,
+            "17": 3912107008.0,
+            "18": 3912107008.0,
+            "19": 3912107008.0,
+            "20": 3912107008.0,
+            "21": 3912107008.0,
+            "22": 3912107008.0,
+            "23": 3912107008.0,
+            "24": 3912107008.0,
+            "25": 3912107008.0,
+            "26": 3912107008.0,
+            "27": 3912107008.0,
+            "28": 3912107008.0,
+            "29": 3912107008.0,
+            "30": 3912107008.0,
+            "31": 3912107008.0,
+            "32": 3912107008.0,
+            "33": 3912107008.0,
+            "34": 3912107008.0,
+            "35": 3912107008.0,
+            "36": 3912107008.0,
+            "37": 3912107008.0,
+            "38": 3912107008.0,
+            "39": 3912107008.0,
+            "40": 3912107008.0,
+            "41": 3912107008.0,
+            "42": 3912107008.0,
+            "43": 3912107008.0,
+            "44": 3912107008.0,
+            "45": 3912107008.0,
+            "46": 3912107008.0,
+            "47": 3912107008.0,
+            "48": 3912107008.0,
+            "49": 3912107008.0,
+            "50": 3912107008.0,
+            "51": 3912107008.0,
+            "52": 3912107008.0,
+            "53": 3912107008.0,
+            "54": 3912107008.0,
+            "55": 3912107008.0,
+            "56": 3912107008.0,
+            "57": 3912107008.0,
+            "58": 3912107008.0,
+            "59": 3912107008.0,
+            "60": 3912107008.0,
+            "61": 3912107008.0,
+            "62": 3912107008.0,
+            "63": 3912107008.0,
+            "64": 3912107008.0,
+            "65": 3912107008.0,
+            "66": 3912107008.0,
+            "67": 3912107008.0,
+            "68": 3912107008.0,
+            "69": 3912107008.0,
+            "70": 3912107008.0,
+            "71": 3912107008.0,
+            "72": 3912107008.0,
+            "73": 3912107008.0,
+            "74": 3912107008.0,
+            "75": 3912107008.0,
+            "76": 3912107008.0,
+            "77": 3912107008.0,
+            "78": 3912107008.0,
+            "79": 3912107008.0,
+            "80": 3912107008.0,
+            "81": 3912107008.0,
+            "82": 3912107008.0,
+            "83": 3912107008.0,
+            "84": 3912107008.0,
+            "85": 3912107008.0,
+            "86": 3912107008.0,
+            "87": 3912107008.0,
+            "88": 3912107008.0,
+            "89": 3912107008.0,
+            "90": 3912107008.0,
+            "91": 3912107008.0,
+            "92": 3912107008.0,
+            "93": 3912107008.0,
+            "94": 3912107008.0,
+            "95": 3912107008.0,
+            "96": 3912107008.0,
+            "97": 3912107008.0,
+            "98": 3912107008.0,
+            "99": 3912107008.0,
+            "100": 3912107008.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 22.61328,
+            "2": 0.20632,
+            "3": 0.1825,
+            "4": 0.17425,
+            "5": 0.17426,
+            "6": 0.17288,
+            "7": 0.17611,
+            "8": 0.17588,
+            "9": 0.17544,
+            "10": 0.17232,
+            "11": 0.17362,
+            "12": 0.17368,
+            "13": 0.17578,
+            "14": 0.17305,
+            "15": 0.17514,
+            "16": 0.17367,
+            "17": 0.17474,
+            "18": 0.17196,
+            "19": 0.1737,
+            "20": 0.17359,
+            "21": 0.17277,
+            "22": 0.17502,
+            "23": 0.17321,
+            "24": 0.172,
+            "25": 0.17239,
+            "26": 0.17041,
+            "27": 0.17172,
+            "28": 0.17178,
+            "29": 0.17225,
+            "30": 0.17082,
+            "31": 0.17234,
+            "32": 0.17192,
+            "33": 0.17201,
+            "34": 0.17283,
+            "35": 0.17212,
+            "36": 0.17393,
+            "37": 0.17078,
+            "38": 0.17394,
+            "39": 0.17341,
+            "40": 0.17259,
+            "41": 0.17595,
+            "42": 0.17237,
+            "43": 0.17334,
+            "44": 0.17079,
+            "45": 0.17254,
+            "46": 0.17378,
+            "47": 0.17228,
+            "48": 0.17193,
+            "49": 0.17207,
+            "50": 0.17337,
+            "51": 0.18317,
+            "52": 0.44439,
+            "53": 0.17445,
+            "54": 0.1761,
+            "55": 0.17625,
+            "56": 0.17729,
+            "57": 0.17831,
+            "58": 0.17704,
+            "59": 0.17623,
+            "60": 0.17946,
+            "61": 0.17712,
+            "62": 0.17274,
+            "63": 0.17809,
+            "64": 0.17585,
+            "65": 0.179,
+            "66": 0.17777,
+            "67": 0.17718,
+            "68": 0.17654,
+            "69": 0.17491,
+            "70": 0.17913,
+            "71": 0.17578,
+            "72": 0.17669,
+            "73": 0.17735,
+            "74": 0.17979,
+            "75": 0.17759,
+            "76": 0.17852,
+            "77": 0.1802,
+            "78": 0.17531,
+            "79": 0.17834,
+            "80": 0.17782,
+            "81": 0.17526,
+            "82": 0.17347,
+            "83": 0.17511,
+            "84": 0.17403,
+            "85": 0.17634,
+            "86": 0.1725,
+            "87": 0.17606,
+            "88": 0.17534,
+            "89": 0.17477,
+            "90": 0.17578,
+            "91": 0.1753,
+            "92": 0.17582,
+            "93": 0.17671,
+            "94": 0.17621,
+            "95": 0.17573,
+            "96": 0.17511,
+            "97": 0.17469,
+            "98": 0.17498,
+            "99": 0.41864,
+            "100": 0.17148
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
new file mode 100644
index 00000000000..c903b0c0464
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.82005,
+            "2": 10.81907,
+            "3": 10.81396,
+            "4": 10.78497,
+            "5": 10.85284,
+            "6": 10.87449,
+            "7": 10.83201,
+            "8": 10.83297,
+            "9": 10.83935,
+            "10": 10.78455,
+            "11": 10.87798,
+            "12": 10.86112,
+            "13": 10.86444,
+            "14": 10.87605,
+            "15": 10.79229,
+            "16": 10.79509,
+            "17": 10.76768,
+            "18": 10.81006,
+            "19": 10.79716,
+            "20": 10.69212,
+            "21": 10.68168,
+            "22": 10.52085,
+            "23": 10.70898,
+            "24": 10.576,
+            "25": 10.52413,
+            "26": 10.59515,
+            "27": 10.58426,
+            "28": 10.56233,
+            "29": 10.57012,
+            "30": 10.34552,
+            "31": 10.10047,
+            "32": 10.45375,
+            "33": 10.44623,
+            "34": 10.20608,
+            "35": 10.26241,
+            "36": 10.2124,
+            "37": 10.3252,
+            "38": 10.16775,
+            "39": 10.38332,
+            "40": 10.07236,
+            "41": 10.13863,
+            "42": 10.19811,
+            "43": 9.81071,
+            "44": 9.93244,
+            "45": 9.81098,
+            "46": 9.80879,
+            "47": 10.1261,
+            "48": 9.82105,
+            "49": 9.50626,
+            "50": 9.88418,
+            "51": 9.8366,
+            "52": 9.7254,
+            "53": 10.04687,
+            "54": 9.93029,
+            "55": 9.86374,
+            "56": 9.60183,
+            "57": 9.4509,
+            "58": 9.80845,
+            "59": 9.56672,
+            "60": 9.47963,
+            "61": 9.67901,
+            "62": 9.96737,
+            "63": 9.3516,
+            "64": 9.75605,
+            "65": 8.93065,
+            "66": 9.68055,
+            "67": 9.3589,
+            "68": 9.76988,
+            "69": 9.77495,
+            "70": 9.71218,
+            "71": 9.60756,
+            "72": 9.57084,
+            "73": 9.48407,
+            "74": 8.92824,
+            "75": 9.4005,
+            "76": 9.07193,
+            "77": 10.05226,
+            "78": 9.71515,
+            "79": 9.35771,
+            "80": 9.39078,
+            "81": 9.46751,
+            "82": 9.68504,
+            "83": 9.29556,
+            "84": 9.4053,
+            "85": 9.60138,
+            "86": 9.06772,
+            "87": 9.58501,
+            "88": 9.73362,
+            "89": 9.59515,
+            "90": 9.80502,
+            "91": 9.3255,
+            "92": 9.35334,
+            "93": 9.06984,
+            "94": 8.8223,
+            "95": 9.50821,
+            "96": 9.51534,
+            "97": 9.29768,
+            "98": 9.66205,
+            "99": 8.87695,
+            "100": 9.3924
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1559.0,
+            "2": 1591.0,
+            "3": 1727.0,
+            "4": 1835.0,
+            "5": 1840.0,
+            "6": 1719.0,
+            "7": 1740.0,
+            "8": 1591.0,
+            "9": 1839.0,
+            "10": 1380.0,
+            "11": 1856.0,
+            "12": 1693.0,
+            "13": 1906.0,
+            "14": 1757.0,
+            "15": 1848.0,
+            "16": 1791.0,
+            "17": 1729.0,
+            "18": 1672.0,
+            "19": 1718.0,
+            "20": 1621.0,
+            "21": 1931.0,
+            "22": 1738.0,
+            "23": 1992.0,
+            "24": 1676.0,
+            "25": 1689.0,
+            "26": 1748.0,
+            "27": 1801.0,
+            "28": 1986.0,
+            "29": 2043.0,
+            "30": 1907.0,
+            "31": 1627.0,
+            "32": 1918.0,
+            "33": 2003.0,
+            "34": 1779.0,
+            "35": 1922.0,
+            "36": 1942.0,
+            "37": 2294.0,
+            "38": 2145.0,
+            "39": 2395.0,
+            "40": 2045.0,
+            "41": 2415.0,
+            "42": 2277.0,
+            "43": 1863.0,
+            "44": 2087.0,
+            "45": 2097.0,
+            "46": 2265.0,
+            "47": 2436.0,
+            "48": 2460.0,
+            "49": 2217.0,
+            "50": 2368.0,
+            "51": 2552.0,
+            "52": 2541.0,
+            "53": 2907.0,
+            "54": 2604.0,
+            "55": 2383.0,
+            "56": 2762.0,
+            "57": 2128.0,
+            "58": 3040.0,
+            "59": 2797.0,
+            "60": 2509.0,
+            "61": 3041.0,
+            "62": 2642.0,
+            "63": 2401.0,
+            "64": 2913.0,
+            "65": 2628.0,
+            "66": 2934.0,
+            "67": 2791.0,
+            "68": 2718.0,
+            "69": 3050.0,
+            "70": 3129.0,
+            "71": 3014.0,
+            "72": 2263.0,
+            "73": 2761.0,
+            "74": 1887.0,
+            "75": 2552.0,
+            "76": 3111.0,
+            "77": 3240.0,
+            "78": 3150.0,
+            "79": 3139.0,
+            "80": 3279.0,
+            "81": 3595.0,
+            "82": 3194.0,
+            "83": 2797.0,
+            "84": 3272.0,
+            "85": 3344.0,
+            "86": 2611.0,
+            "87": 3802.0,
+            "88": 3054.0,
+            "89": 3205.0,
+            "90": 2980.0,
+            "91": 2726.0,
+            "92": 3043.0,
+            "93": 2751.0,
+            "94": 3247.0,
+            "95": 3324.0,
+            "96": 3503.0,
+            "97": 3057.0,
+            "98": 3465.0,
+            "99": 3320.0,
+            "100": 3467.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 519065600.0,
+            "2": 519065600.0,
+            "3": 519065600.0,
+            "4": 519065600.0,
+            "5": 519065600.0,
+            "6": 519065600.0,
+            "7": 519065600.0,
+            "8": 519065600.0,
+            "9": 519065600.0,
+            "10": 519065600.0,
+            "11": 519065600.0,
+            "12": 519065600.0,
+            "13": 519065600.0,
+            "14": 519065600.0,
+            "15": 519065600.0,
+            "16": 519065600.0,
+            "17": 519065600.0,
+            "18": 519065600.0,
+            "19": 519065600.0,
+            "20": 519065600.0,
+            "21": 519065600.0,
+            "22": 519065600.0,
+            "23": 519065600.0,
+            "24": 519065600.0,
+            "25": 519065600.0,
+            "26": 519065600.0,
+            "27": 519065600.0,
+            "28": 519065600.0,
+            "29": 519065600.0,
+            "30": 519065600.0,
+            "31": 519065600.0,
+            "32": 519065600.0,
+            "33": 519065600.0,
+            "34": 519065600.0,
+            "35": 519065600.0,
+            "36": 519065600.0,
+            "37": 519065600.0,
+            "38": 519065600.0,
+            "39": 519065600.0,
+            "40": 519065600.0,
+            "41": 519065600.0,
+            "42": 519065600.0,
+            "43": 519065600.0,
+            "44": 519065600.0,
+            "45": 519065600.0,
+            "46": 519065600.0,
+            "47": 519065600.0,
+            "48": 519065600.0,
+            "49": 519065600.0,
+            "50": 519065600.0,
+            "51": 519065600.0,
+            "52": 519065600.0,
+            "53": 519065600.0,
+            "54": 519065600.0,
+            "55": 519065600.0,
+            "56": 519065600.0,
+            "57": 519065600.0,
+            "58": 519065600.0,
+            "59": 519065600.0,
+            "60": 519065600.0,
+            "61": 519065600.0,
+            "62": 519065600.0,
+            "63": 519065600.0,
+            "64": 519065600.0,
+            "65": 519065600.0,
+            "66": 519065600.0,
+            "67": 519065600.0,
+            "68": 519065600.0,
+            "69": 519065600.0,
+            "70": 519065600.0,
+            "71": 519065600.0,
+            "72": 519065600.0,
+            "73": 519065600.0,
+            "74": 519065600.0,
+            "75": 519065600.0,
+            "76": 519065600.0,
+            "77": 519065600.0,
+            "78": 519065600.0,
+            "79": 519065600.0,
+            "80": 519065600.0,
+            "81": 519065600.0,
+            "82": 519065600.0,
+            "83": 519065600.0,
+            "84": 519065600.0,
+            "85": 519065600.0,
+            "86": 519065600.0,
+            "87": 519065600.0,
+            "88": 519065600.0,
+            "89": 519065600.0,
+            "90": 519065600.0,
+            "91": 519065600.0,
+            "92": 519065600.0,
+            "93": 519065600.0,
+            "94": 519065600.0,
+            "95": 519065600.0,
+            "96": 519065600.0,
+            "97": 519065600.0,
+            "98": 519065600.0,
+            "99": 519065600.0,
+            "100": 519065600.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 3767053312.0,
+            "2": 3907909632.0,
+            "3": 3907909632.0,
+            "4": 3907909632.0,
+            "5": 3907909632.0,
+            "6": 3907909632.0,
+            "7": 3907909632.0,
+            "8": 3907909632.0,
+            "9": 3907909632.0,
+            "10": 3907909632.0,
+            "11": 3907909632.0,
+            "12": 3907909632.0,
+            "13": 3907909632.0,
+            "14": 3907909632.0,
+            "15": 3907909632.0,
+            "16": 3907909632.0,
+            "17": 3907909632.0,
+            "18": 3907909632.0,
+            "19": 3907909632.0,
+            "20": 3907909632.0,
+            "21": 3907909632.0,
+            "22": 3907909632.0,
+            "23": 3907909632.0,
+            "24": 3907909632.0,
+            "25": 3907909632.0,
+            "26": 3907909632.0,
+            "27": 3907909632.0,
+            "28": 3907909632.0,
+            "29": 3907909632.0,
+            "30": 3907909632.0,
+            "31": 3907909632.0,
+            "32": 3907909632.0,
+            "33": 3907909632.0,
+            "34": 3907909632.0,
+            "35": 3907909632.0,
+            "36": 3907909632.0,
+            "37": 3907909632.0,
+            "38": 3907909632.0,
+            "39": 3907909632.0,
+            "40": 3907909632.0,
+            "41": 3907909632.0,
+            "42": 3907909632.0,
+            "43": 3907909632.0,
+            "44": 3907909632.0,
+            "45": 3907909632.0,
+            "46": 3907909632.0,
+            "47": 3907909632.0,
+            "48": 3907909632.0,
+            "49": 3907909632.0,
+            "50": 3907909632.0,
+            "51": 3907909632.0,
+            "52": 3907909632.0,
+            "53": 3907909632.0,
+            "54": 3907909632.0,
+            "55": 3907909632.0,
+            "56": 3907909632.0,
+            "57": 3907909632.0,
+            "58": 3907909632.0,
+            "59": 3907909632.0,
+            "60": 3907909632.0,
+            "61": 3907909632.0,
+            "62": 3907909632.0,
+            "63": 3907909632.0,
+            "64": 3907909632.0,
+            "65": 3907909632.0,
+            "66": 3907909632.0,
+            "67": 3907909632.0,
+            "68": 3907909632.0,
+            "69": 3907909632.0,
+            "70": 3907909632.0,
+            "71": 3907909632.0,
+            "72": 3907909632.0,
+            "73": 3907909632.0,
+            "74": 3907909632.0,
+            "75": 3907909632.0,
+            "76": 3907909632.0,
+            "77": 3907909632.0,
+            "78": 3907909632.0,
+            "79": 3907909632.0,
+            "80": 3907909632.0,
+            "81": 3907909632.0,
+            "82": 3907909632.0,
+            "83": 3907909632.0,
+            "84": 3907909632.0,
+            "85": 3907909632.0,
+            "86": 3907909632.0,
+            "87": 3907909632.0,
+            "88": 3907909632.0,
+            "89": 3907909632.0,
+            "90": 3907909632.0,
+            "91": 3907909632.0,
+            "92": 3907909632.0,
+            "93": 3907909632.0,
+            "94": 3907909632.0,
+            "95": 3907909632.0,
+            "96": 3907909632.0,
+            "97": 3907909632.0,
+            "98": 3907909632.0,
+            "99": 3907909632.0,
+            "100": 3907909632.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 23.61626,
+            "2": 0.20825,
+            "3": 0.18598,
+            "4": 0.17768,
+            "5": 0.1774,
+            "6": 0.17565,
+            "7": 0.17554,
+            "8": 0.17574,
+            "9": 0.17822,
+            "10": 0.18542,
+            "11": 0.3344,
+            "12": 0.17809,
+            "13": 0.17774,
+            "14": 0.17628,
+            "15": 0.17758,
+            "16": 0.17752,
+            "17": 0.17677,
+            "18": 0.17866,
+            "19": 0.17775,
+            "20": 0.17503,
+            "21": 0.32873,
+            "22": 0.17696,
+            "23": 0.17781,
+            "24": 0.17815,
+            "25": 0.17477,
+            "26": 0.17422,
+            "27": 0.17425,
+            "28": 0.17474,
+            "29": 0.17648,
+            "30": 0.17377,
+            "31": 0.33173,
+            "32": 0.17366,
+            "33": 0.17393,
+            "34": 0.17333,
+            "35": 0.17469,
+            "36": 0.1737,
+            "37": 0.17376,
+            "38": 0.17511,
+            "39": 0.17374,
+            "40": 0.38462,
+            "41": 0.33019,
+            "42": 0.18095,
+            "43": 0.17639,
+            "44": 0.17398,
+            "45": 0.17539,
+            "46": 0.17369,
+            "47": 0.1733,
+            "48": 0.17495,
+            "49": 0.1737,
+            "50": 0.1733,
+            "51": 0.3281,
+            "52": 0.17681,
+            "53": 0.17706,
+            "54": 0.17883,
+            "55": 0.18057,
+            "56": 0.18194,
+            "57": 0.18281,
+            "58": 0.1833,
+            "59": 0.18471,
+            "60": 0.40872,
+            "61": 0.33723,
+            "62": 0.18166,
+            "63": 0.38808,
+            "64": 0.17968,
+            "65": 0.18147,
+            "66": 0.17961,
+            "67": 0.17851,
+            "68": 0.17748,
+            "69": 0.17797,
+            "70": 0.17994,
+            "71": 0.33627,
+            "72": 0.17952,
+            "73": 0.178,
+            "74": 0.17922,
+            "75": 0.17803,
+            "76": 0.18159,
+            "77": 0.17818,
+            "78": 0.17782,
+            "79": 0.36281,
+            "80": 0.18081,
+            "81": 0.33928,
+            "82": 0.17691,
+            "83": 0.17684,
+            "84": 0.17781,
+            "85": 0.18012,
+            "86": 0.17905,
+            "87": 0.17785,
+            "88": 0.17817,
+            "89": 0.17743,
+            "90": 0.17902,
+            "91": 0.33283,
+            "92": 0.17956,
+            "93": 0.17935,
+            "94": 0.18039,
+            "95": 0.17971,
+            "96": 0.18011,
+            "97": 0.18031,
+            "98": 0.1785,
+            "99": 0.18155,
+            "100": 0.17741
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
new file mode 100644
index 00000000000..9d14156b3a0
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.82005,
+            "2": 10.81907,
+            "3": 10.81396,
+            "4": 10.78497,
+            "5": 10.85284,
+            "6": 10.87449,
+            "7": 10.83201,
+            "8": 10.83297,
+            "9": 10.83935,
+            "10": 10.78455,
+            "11": 10.87798,
+            "12": 10.86112,
+            "13": 10.86444,
+            "14": 10.87605,
+            "15": 10.79229,
+            "16": 10.79509,
+            "17": 10.76768,
+            "18": 10.81006,
+            "19": 10.79716,
+            "20": 10.69212,
+            "21": 10.68168,
+            "22": 10.52085,
+            "23": 10.70898,
+            "24": 10.576,
+            "25": 10.52413,
+            "26": 10.59515,
+            "27": 10.58426,
+            "28": 10.56233,
+            "29": 10.57012,
+            "30": 10.34552,
+            "31": 10.10047,
+            "32": 10.45375,
+            "33": 10.44623,
+            "34": 10.20608,
+            "35": 10.26241,
+            "36": 10.2124,
+            "37": 10.3252,
+            "38": 10.16775,
+            "39": 10.38332,
+            "40": 10.07236,
+            "41": 10.13863,
+            "42": 10.19811,
+            "43": 9.81071,
+            "44": 9.93244,
+            "45": 9.81098,
+            "46": 9.80879,
+            "47": 10.1261,
+            "48": 9.82105,
+            "49": 9.50626,
+            "50": 9.88418,
+            "51": 9.8366,
+            "52": 9.7254,
+            "53": 10.04687,
+            "54": 9.93029,
+            "55": 9.86374,
+            "56": 9.60183,
+            "57": 9.4509,
+            "58": 9.80845,
+            "59": 9.56672,
+            "60": 9.47963,
+            "61": 9.67901,
+            "62": 9.96737,
+            "63": 9.3516,
+            "64": 9.75605,
+            "65": 8.93065,
+            "66": 9.68055,
+            "67": 9.3589,
+            "68": 9.76988,
+            "69": 9.77495,
+            "70": 9.71218,
+            "71": 9.60756,
+            "72": 9.57084,
+            "73": 9.48407,
+            "74": 8.92824,
+            "75": 9.4005,
+            "76": 9.07193,
+            "77": 10.05226,
+            "78": 9.71515,
+            "79": 9.35771,
+            "80": 9.39078,
+            "81": 9.46751,
+            "82": 9.68504,
+            "83": 9.29556,
+            "84": 9.4053,
+            "85": 9.60138,
+            "86": 9.06772,
+            "87": 9.58501,
+            "88": 9.73362,
+            "89": 9.59515,
+            "90": 9.80502,
+            "91": 9.3255,
+            "92": 9.35334,
+            "93": 9.06984,
+            "94": 8.8223,
+            "95": 9.50821,
+            "96": 9.51534,
+            "97": 9.29768,
+            "98": 9.66205,
+            "99": 8.87695,
+            "100": 9.3924
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1559.0,
+            "2": 1591.0,
+            "3": 1727.0,
+            "4": 1835.0,
+            "5": 1840.0,
+            "6": 1719.0,
+            "7": 1740.0,
+            "8": 1591.0,
+            "9": 1839.0,
+            "10": 1380.0,
+            "11": 1856.0,
+            "12": 1693.0,
+            "13": 1906.0,
+            "14": 1757.0,
+            "15": 1848.0,
+            "16": 1791.0,
+            "17": 1729.0,
+            "18": 1672.0,
+            "19": 1718.0,
+            "20": 1621.0,
+            "21": 1931.0,
+            "22": 1738.0,
+            "23": 1992.0,
+            "24": 1676.0,
+            "25": 1689.0,
+            "26": 1748.0,
+            "27": 1801.0,
+            "28": 1986.0,
+            "29": 2043.0,
+            "30": 1907.0,
+            "31": 1627.0,
+            "32": 1918.0,
+            "33": 2003.0,
+            "34": 1779.0,
+            "35": 1922.0,
+            "36": 1942.0,
+            "37": 2294.0,
+            "38": 2145.0,
+            "39": 2395.0,
+            "40": 2045.0,
+            "41": 2415.0,
+            "42": 2277.0,
+            "43": 1863.0,
+            "44": 2087.0,
+            "45": 2097.0,
+            "46": 2265.0,
+            "47": 2436.0,
+            "48": 2460.0,
+            "49": 2217.0,
+            "50": 2368.0,
+            "51": 2552.0,
+            "52": 2541.0,
+            "53": 2907.0,
+            "54": 2604.0,
+            "55": 2383.0,
+            "56": 2762.0,
+            "57": 2128.0,
+            "58": 3040.0,
+            "59": 2797.0,
+            "60": 2509.0,
+            "61": 3041.0,
+            "62": 2642.0,
+            "63": 2401.0,
+            "64": 2913.0,
+            "65": 2628.0,
+            "66": 2934.0,
+            "67": 2791.0,
+            "68": 2718.0,
+            "69": 3050.0,
+            "70": 3129.0,
+            "71": 3014.0,
+            "72": 2263.0,
+            "73": 2761.0,
+            "74": 1887.0,
+            "75": 2552.0,
+            "76": 3111.0,
+            "77": 3240.0,
+            "78": 3150.0,
+            "79": 3139.0,
+            "80": 3279.0,
+            "81": 3595.0,
+            "82": 3194.0,
+            "83": 2797.0,
+            "84": 3272.0,
+            "85": 3344.0,
+            "86": 2611.0,
+            "87": 3802.0,
+            "88": 3054.0,
+            "89": 3205.0,
+            "90": 2980.0,
+            "91": 2726.0,
+            "92": 3043.0,
+            "93": 2751.0,
+            "94": 3247.0,
+            "95": 3324.0,
+            "96": 3503.0,
+            "97": 3057.0,
+            "98": 3465.0,
+            "99": 3320.0,
+            "100": 3467.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 519065600.0,
+            "2": 519065600.0,
+            "3": 519065600.0,
+            "4": 519065600.0,
+            "5": 519065600.0,
+            "6": 519065600.0,
+            "7": 519065600.0,
+            "8": 519065600.0,
+            "9": 519065600.0,
+            "10": 519065600.0,
+            "11": 519065600.0,
+            "12": 519065600.0,
+            "13": 519065600.0,
+            "14": 519065600.0,
+            "15": 519065600.0,
+            "16": 519065600.0,
+            "17": 519065600.0,
+            "18": 519065600.0,
+            "19": 519065600.0,
+            "20": 519065600.0,
+            "21": 519065600.0,
+            "22": 519065600.0,
+            "23": 519065600.0,
+            "24": 519065600.0,
+            "25": 519065600.0,
+            "26": 519065600.0,
+            "27": 519065600.0,
+            "28": 519065600.0,
+            "29": 519065600.0,
+            "30": 519065600.0,
+            "31": 519065600.0,
+            "32": 519065600.0,
+            "33": 519065600.0,
+            "34": 519065600.0,
+            "35": 519065600.0,
+            "36": 519065600.0,
+            "37": 519065600.0,
+            "38": 519065600.0,
+            "39": 519065600.0,
+            "40": 519065600.0,
+            "41": 519065600.0,
+            "42": 519065600.0,
+            "43": 519065600.0,
+            "44": 519065600.0,
+            "45": 519065600.0,
+            "46": 519065600.0,
+            "47": 519065600.0,
+            "48": 519065600.0,
+            "49": 519065600.0,
+            "50": 519065600.0,
+            "51": 519065600.0,
+            "52": 519065600.0,
+            "53": 519065600.0,
+            "54": 519065600.0,
+            "55": 519065600.0,
+            "56": 519065600.0,
+            "57": 519065600.0,
+            "58": 519065600.0,
+            "59": 519065600.0,
+            "60": 519065600.0,
+            "61": 519065600.0,
+            "62": 519065600.0,
+            "63": 519065600.0,
+            "64": 519065600.0,
+            "65": 519065600.0,
+            "66": 519065600.0,
+            "67": 519065600.0,
+            "68": 519065600.0,
+            "69": 519065600.0,
+            "70": 519065600.0,
+            "71": 519065600.0,
+            "72": 519065600.0,
+            "73": 519065600.0,
+            "74": 519065600.0,
+            "75": 519065600.0,
+            "76": 519065600.0,
+            "77": 519065600.0,
+            "78": 519065600.0,
+            "79": 519065600.0,
+            "80": 519065600.0,
+            "81": 519065600.0,
+            "82": 519065600.0,
+            "83": 519065600.0,
+            "84": 519065600.0,
+            "85": 519065600.0,
+            "86": 519065600.0,
+            "87": 519065600.0,
+            "88": 519065600.0,
+            "89": 519065600.0,
+            "90": 519065600.0,
+            "91": 519065600.0,
+            "92": 519065600.0,
+            "93": 519065600.0,
+            "94": 519065600.0,
+            "95": 519065600.0,
+            "96": 519065600.0,
+            "97": 519065600.0,
+            "98": 519065600.0,
+            "99": 519065600.0,
+            "100": 519065600.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 3767053312.0,
+            "2": 3907909632.0,
+            "3": 3907909632.0,
+            "4": 3907909632.0,
+            "5": 3907909632.0,
+            "6": 3907909632.0,
+            "7": 3907909632.0,
+            "8": 3907909632.0,
+            "9": 3907909632.0,
+            "10": 3907909632.0,
+            "11": 3907909632.0,
+            "12": 3907909632.0,
+            "13": 3907909632.0,
+            "14": 3907909632.0,
+            "15": 3907909632.0,
+            "16": 3907909632.0,
+            "17": 3907909632.0,
+            "18": 3907909632.0,
+            "19": 3907909632.0,
+            "20": 3907909632.0,
+            "21": 3907909632.0,
+            "22": 3907909632.0,
+            "23": 3907909632.0,
+            "24": 3907909632.0,
+            "25": 3907909632.0,
+            "26": 3907909632.0,
+            "27": 3907909632.0,
+            "28": 3907909632.0,
+            "29": 3907909632.0,
+            "30": 3907909632.0,
+            "31": 3907909632.0,
+            "32": 3907909632.0,
+            "33": 3907909632.0,
+            "34": 3907909632.0,
+            "35": 3907909632.0,
+            "36": 3907909632.0,
+            "37": 3907909632.0,
+            "38": 3907909632.0,
+            "39": 3907909632.0,
+            "40": 3907909632.0,
+            "41": 3907909632.0,
+            "42": 3907909632.0,
+            "43": 3907909632.0,
+            "44": 3907909632.0,
+            "45": 3907909632.0,
+            "46": 3907909632.0,
+            "47": 3907909632.0,
+            "48": 3907909632.0,
+            "49": 3907909632.0,
+            "50": 3907909632.0,
+            "51": 3907909632.0,
+            "52": 3907909632.0,
+            "53": 3907909632.0,
+            "54": 3907909632.0,
+            "55": 3907909632.0,
+            "56": 3907909632.0,
+            "57": 3907909632.0,
+            "58": 3907909632.0,
+            "59": 3907909632.0,
+            "60": 3907909632.0,
+            "61": 3907909632.0,
+            "62": 3907909632.0,
+            "63": 3907909632.0,
+            "64": 3907909632.0,
+            "65": 3907909632.0,
+            "66": 3907909632.0,
+            "67": 3907909632.0,
+            "68": 3907909632.0,
+            "69": 3907909632.0,
+            "70": 3907909632.0,
+            "71": 3907909632.0,
+            "72": 3907909632.0,
+            "73": 3907909632.0,
+            "74": 3907909632.0,
+            "75": 3907909632.0,
+            "76": 3907909632.0,
+            "77": 3907909632.0,
+            "78": 3907909632.0,
+            "79": 3907909632.0,
+            "80": 3907909632.0,
+            "81": 3907909632.0,
+            "82": 3907909632.0,
+            "83": 3907909632.0,
+            "84": 3907909632.0,
+            "85": 3907909632.0,
+            "86": 3907909632.0,
+            "87": 3907909632.0,
+            "88": 3907909632.0,
+            "89": 3907909632.0,
+            "90": 3907909632.0,
+            "91": 3907909632.0,
+            "92": 3907909632.0,
+            "93": 3907909632.0,
+            "94": 3907909632.0,
+            "95": 3907909632.0,
+            "96": 3907909632.0,
+            "97": 3907909632.0,
+            "98": 3907909632.0,
+            "99": 3907909632.0,
+            "100": 3907909632.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 21.99574,
+            "2": 0.195,
+            "3": 0.1744,
+            "4": 0.17427,
+            "5": 0.17308,
+            "6": 0.16861,
+            "7": 0.17429,
+            "8": 0.1716,
+            "9": 0.16924,
+            "10": 0.16858,
+            "11": 0.33896,
+            "12": 0.17029,
+            "13": 0.16981,
+            "14": 0.16723,
+            "15": 0.16853,
+            "16": 0.16865,
+            "17": 0.16777,
+            "18": 0.16879,
+            "19": 0.16785,
+            "20": 0.16886,
+            "21": 0.3357,
+            "22": 0.17081,
+            "23": 0.17048,
+            "24": 0.16879,
+            "25": 0.1687,
+            "26": 0.16713,
+            "27": 0.16939,
+            "28": 0.1692,
+            "29": 0.17134,
+            "30": 0.17092,
+            "31": 0.3812,
+            "32": 0.17397,
+            "33": 0.17588,
+            "34": 0.17999,
+            "35": 0.17703,
+            "36": 0.1801,
+            "37": 0.1707,
+            "38": 0.17289,
+            "39": 0.17016,
+            "40": 0.17112,
+            "41": 0.33944,
+            "42": 0.17206,
+            "43": 0.17137,
+            "44": 0.16906,
+            "45": 0.42618,
+            "46": 0.1703,
+            "47": 0.17243,
+            "48": 0.17004,
+            "49": 0.16966,
+            "50": 0.16756,
+            "51": 0.51274,
+            "52": 0.17278,
+            "53": 0.17206,
+            "54": 0.17409,
+            "55": 0.17339,
+            "56": 0.17492,
+            "57": 0.17254,
+            "58": 0.17691,
+            "59": 0.46979,
+            "60": 0.37194,
+            "61": 0.34378,
+            "62": 0.17598,
+            "63": 0.48505,
+            "64": 0.17494,
+            "65": 0.18089,
+            "66": 0.17632,
+            "67": 0.1754,
+            "68": 0.17476,
+            "69": 0.172,
+            "70": 0.1727,
+            "71": 0.33976,
+            "72": 0.17542,
+            "73": 0.17238,
+            "74": 0.17531,
+            "75": 0.1747,
+            "76": 0.17675,
+            "77": 0.17303,
+            "78": 0.17397,
+            "79": 0.17413,
+            "80": 0.17841,
+            "81": 0.34399,
+            "82": 0.17266,
+            "83": 0.17424,
+            "84": 0.17542,
+            "85": 0.17322,
+            "86": 0.17628,
+            "87": 0.17307,
+            "88": 0.17357,
+            "89": 0.17221,
+            "90": 0.17402,
+            "91": 0.34115,
+            "92": 0.17524,
+            "93": 0.21142,
+            "94": 0.18543,
+            "95": 0.19932,
+            "96": 0.20217,
+            "97": 0.21251,
+            "98": 0.20217,
+            "99": 0.19729,
+            "100": 0.19649
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
new file mode 100644
index 00000000000..31d5de38121
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.9359,
+            "2": 10.92235,
+            "3": 10.92366,
+            "4": 10.90567,
+            "5": 10.93225,
+            "6": 10.93547,
+            "7": 10.92702,
+            "8": 10.92052,
+            "9": 10.9395,
+            "10": 10.91083,
+            "11": 10.94242,
+            "12": 10.93185,
+            "13": 10.92496,
+            "14": 10.94487,
+            "15": 10.85723,
+            "16": 10.88074,
+            "17": 10.87011,
+            "18": 10.88561,
+            "19": 10.87042,
+            "20": 10.77088,
+            "21": 10.7565,
+            "22": 10.62779,
+            "23": 10.77022,
+            "24": 10.65205,
+            "25": 10.60556,
+            "26": 10.66333,
+            "27": 10.66552,
+            "28": 10.60547,
+            "29": 10.6471,
+            "30": 10.40549,
+            "31": 10.16719,
+            "32": 10.51369,
+            "33": 10.5051,
+            "34": 10.27046,
+            "35": 10.31366,
+            "36": 10.27241,
+            "37": 10.38617,
+            "38": 10.23179,
+            "39": 10.45437,
+            "40": 10.12334,
+            "41": 10.19576,
+            "42": 10.25282,
+            "43": 9.86635,
+            "44": 9.99502,
+            "45": 9.87564,
+            "46": 9.86006,
+            "47": 10.19474,
+            "48": 9.87777,
+            "49": 9.56673,
+            "50": 9.94452,
+            "51": 9.89728,
+            "52": 9.7879,
+            "53": 10.1278,
+            "54": 9.98346,
+            "55": 9.90094,
+            "56": 9.66557,
+            "57": 9.50042,
+            "58": 9.87703,
+            "59": 9.61777,
+            "60": 9.55238,
+            "61": 9.71568,
+            "62": 10.03384,
+            "63": 9.41318,
+            "64": 9.8198,
+            "65": 8.96792,
+            "66": 9.74791,
+            "67": 9.39412,
+            "68": 9.82081,
+            "69": 9.82389,
+            "70": 9.77835,
+            "71": 9.64728,
+            "72": 9.59599,
+            "73": 9.53704,
+            "74": 8.96545,
+            "75": 9.44605,
+            "76": 9.10011,
+            "77": 10.09977,
+            "78": 9.7355,
+            "79": 9.38643,
+            "80": 9.42014,
+            "81": 9.50916,
+            "82": 9.72306,
+            "83": 9.3462,
+            "84": 9.44805,
+            "85": 9.64324,
+            "86": 9.07728,
+            "87": 9.61635,
+            "88": 9.79137,
+            "89": 9.61978,
+            "90": 9.85827,
+            "91": 9.35282,
+            "92": 9.38717,
+            "93": 9.08084,
+            "94": 8.82234,
+            "95": 9.52085,
+            "96": 9.54578,
+            "97": 9.34183,
+            "98": 9.70521,
+            "99": 8.89223,
+            "100": 9.43415
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 22727686.0,
+            "2": 22924976.0,
+            "3": 22597376.0,
+            "4": 23218740.0,
+            "5": 22715312.0,
+            "6": 23020980.0,
+            "7": 22770736.0,
+            "8": 22927078.0,
+            "9": 22841964.0,
+            "10": 22919060.0,
+            "11": 22501344.0,
+            "12": 22460424.0,
+            "13": 22916824.0,
+            "14": 22388904.0,
+            "15": 22821200.0,
+            "16": 22829956.0,
+            "17": 22819072.0,
+            "18": 22582680.0,
+            "19": 22618528.0,
+            "20": 22693840.0,
+            "21": 22739692.0,
+            "22": 22799900.0,
+            "23": 22538946.0,
+            "24": 22771530.0,
+            "25": 22819524.0,
+            "26": 22548320.0,
+            "27": 22468868.0,
+            "28": 22452892.0,
+            "29": 22530184.0,
+            "30": 22631232.0,
+            "31": 22955646.0,
+            "32": 22584920.0,
+            "33": 22558000.0,
+            "34": 22835968.0,
+            "35": 22787888.0,
+            "36": 22589844.0,
+            "37": 22497188.0,
+            "38": 22896516.0,
+            "39": 22801334.0,
+            "40": 22658144.0,
+            "41": 22659958.0,
+            "42": 22667478.0,
+            "43": 22975596.0,
+            "44": 22746734.0,
+            "45": 22674630.0,
+            "46": 22884436.0,
+            "47": 22633878.0,
+            "48": 22929042.0,
+            "49": 22727064.0,
+            "50": 22904452.0,
+            "51": 22791508.0,
+            "52": 22748880.0,
+            "53": 22925802.0,
+            "54": 22840006.0,
+            "55": 22519094.0,
+            "56": 22878426.0,
+            "57": 23113192.0,
+            "58": 22845340.0,
+            "59": 22716044.0,
+            "60": 22743052.0,
+            "61": 22724280.0,
+            "62": 22673222.0,
+            "63": 22845776.0,
+            "64": 22823900.0,
+            "65": 23061016.0,
+            "66": 22729616.0,
+            "67": 22907968.0,
+            "68": 22610332.0,
+            "69": 22584232.0,
+            "70": 22829332.0,
+            "71": 22748216.0,
+            "72": 22654286.0,
+            "73": 22740516.0,
+            "74": 23047704.0,
+            "75": 23054164.0,
+            "76": 22901462.0,
+            "77": 22272388.0,
+            "78": 22789468.0,
+            "79": 22744352.0,
+            "80": 22707344.0,
+            "81": 22890704.0,
+            "82": 22777178.0,
+            "83": 22839028.0,
+            "84": 23010036.0,
+            "85": 22712182.0,
+            "86": 23103124.0,
+            "87": 22735052.0,
+            "88": 22637176.0,
+            "89": 22499076.0,
+            "90": 22971846.0,
+            "91": 22767066.0,
+            "92": 22808462.0,
+            "93": 22659702.0,
+            "94": 22912288.0,
+            "95": 23047676.0,
+            "96": 22828984.0,
+            "97": 22608528.0,
+            "98": 22763476.0,
+            "99": 22905460.0,
+            "100": 23015938.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 519065600.0,
+            "2": 519065600.0,
+            "3": 519065600.0,
+            "4": 519065600.0,
+            "5": 519065600.0,
+            "6": 519065600.0,
+            "7": 519065600.0,
+            "8": 519065600.0,
+            "9": 519065600.0,
+            "10": 519065600.0,
+            "11": 519065600.0,
+            "12": 519065600.0,
+            "13": 519065600.0,
+            "14": 519065600.0,
+            "15": 519065600.0,
+            "16": 519065600.0,
+            "17": 519065600.0,
+            "18": 519065600.0,
+            "19": 519065600.0,
+            "20": 519065600.0,
+            "21": 519065600.0,
+            "22": 519065600.0,
+            "23": 519065600.0,
+            "24": 519065600.0,
+            "25": 519065600.0,
+            "26": 519065600.0,
+            "27": 519065600.0,
+            "28": 519065600.0,
+            "29": 519065600.0,
+            "30": 519065600.0,
+            "31": 519065600.0,
+            "32": 519065600.0,
+            "33": 519065600.0,
+            "34": 519065600.0,
+            "35": 519065600.0,
+            "36": 519065600.0,
+            "37": 519065600.0,
+            "38": 519065600.0,
+            "39": 519065600.0,
+            "40": 519065600.0,
+            "41": 519065600.0,
+            "42": 519065600.0,
+            "43": 519065600.0,
+            "44": 519065600.0,
+            "45": 519065600.0,
+            "46": 519065600.0,
+            "47": 519065600.0,
+            "48": 519065600.0,
+            "49": 519065600.0,
+            "50": 519065600.0,
+            "51": 519065600.0,
+            "52": 519065600.0,
+            "53": 519065600.0,
+            "54": 519065600.0,
+            "55": 519065600.0,
+            "56": 519065600.0,
+            "57": 519065600.0,
+            "58": 519065600.0,
+            "59": 519065600.0,
+            "60": 519065600.0,
+            "61": 519065600.0,
+            "62": 519065600.0,
+            "63": 519065600.0,
+            "64": 519065600.0,
+            "65": 519065600.0,
+            "66": 519065600.0,
+            "67": 519065600.0,
+            "68": 519065600.0,
+            "69": 519065600.0,
+            "70": 519065600.0,
+            "71": 519065600.0,
+            "72": 519065600.0,
+            "73": 519065600.0,
+            "74": 519065600.0,
+            "75": 519065600.0,
+            "76": 519065600.0,
+            "77": 519065600.0,
+            "78": 519065600.0,
+            "79": 519065600.0,
+            "80": 519065600.0,
+            "81": 519065600.0,
+            "82": 519065600.0,
+            "83": 519065600.0,
+            "84": 519065600.0,
+            "85": 519065600.0,
+            "86": 519065600.0,
+            "87": 519065600.0,
+            "88": 519065600.0,
+            "89": 519065600.0,
+            "90": 519065600.0,
+            "91": 519065600.0,
+            "92": 519065600.0,
+            "93": 519065600.0,
+            "94": 519065600.0,
+            "95": 519065600.0,
+            "96": 519065600.0,
+            "97": 519065600.0,
+            "98": 519065600.0,
+            "99": 519065600.0,
+            "100": 519065600.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 3767053312.0,
+            "2": 3907909632.0,
+            "3": 3907909632.0,
+            "4": 3907909632.0,
+            "5": 3907909632.0,
+            "6": 3907909632.0,
+            "7": 3907909632.0,
+            "8": 3907909632.0,
+            "9": 3907909632.0,
+            "10": 3907909632.0,
+            "11": 3907909632.0,
+            "12": 3907909632.0,
+            "13": 3907909632.0,
+            "14": 3907909632.0,
+            "15": 3907909632.0,
+            "16": 3907909632.0,
+            "17": 3907909632.0,
+            "18": 3907909632.0,
+            "19": 3907909632.0,
+            "20": 3907909632.0,
+            "21": 3907909632.0,
+            "22": 3907909632.0,
+            "23": 3907909632.0,
+            "24": 3907909632.0,
+            "25": 3907909632.0,
+            "26": 3907909632.0,
+            "27": 3907909632.0,
+            "28": 3907909632.0,
+            "29": 3907909632.0,
+            "30": 3907909632.0,
+            "31": 3907909632.0,
+            "32": 3907909632.0,
+            "33": 3907909632.0,
+            "34": 3907909632.0,
+            "35": 3907909632.0,
+            "36": 3907909632.0,
+            "37": 3907909632.0,
+            "38": 3907909632.0,
+            "39": 3907909632.0,
+            "40": 3907909632.0,
+            "41": 3907909632.0,
+            "42": 3907909632.0,
+            "43": 3907909632.0,
+            "44": 3907909632.0,
+            "45": 3907909632.0,
+            "46": 3907909632.0,
+            "47": 3907909632.0,
+            "48": 3907909632.0,
+            "49": 3907909632.0,
+            "50": 3907909632.0,
+            "51": 3907909632.0,
+            "52": 3907909632.0,
+            "53": 3907909632.0,
+            "54": 3907909632.0,
+            "55": 3907909632.0,
+            "56": 3907909632.0,
+            "57": 3907909632.0,
+            "58": 3907909632.0,
+            "59": 3907909632.0,
+            "60": 3907909632.0,
+            "61": 3907909632.0,
+            "62": 3907909632.0,
+            "63": 3907909632.0,
+            "64": 3907909632.0,
+            "65": 3907909632.0,
+            "66": 3907909632.0,
+            "67": 3907909632.0,
+            "68": 3907909632.0,
+            "69": 3907909632.0,
+            "70": 3907909632.0,
+            "71": 3907909632.0,
+            "72": 3907909632.0,
+            "73": 3907909632.0,
+            "74": 3907909632.0,
+            "75": 3907909632.0,
+            "76": 3907909632.0,
+            "77": 3907909632.0,
+            "78": 3907909632.0,
+            "79": 3907909632.0,
+            "80": 3907909632.0,
+            "81": 3907909632.0,
+            "82": 3907909632.0,
+            "83": 3907909632.0,
+            "84": 3907909632.0,
+            "85": 3907909632.0,
+            "86": 3907909632.0,
+            "87": 3907909632.0,
+            "88": 3907909632.0,
+            "89": 3907909632.0,
+            "90": 3907909632.0,
+            "91": 3907909632.0,
+            "92": 3907909632.0,
+            "93": 3907909632.0,
+            "94": 3907909632.0,
+            "95": 3907909632.0,
+            "96": 3907909632.0,
+            "97": 3907909632.0,
+            "98": 3907909632.0,
+            "99": 3907909632.0,
+            "100": 3907909632.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 20.87438,
+            "2": 0.21694,
+            "3": 0.17509,
+            "4": 0.17193,
+            "5": 0.17145,
+            "6": 0.17454,
+            "7": 0.1709,
+            "8": 0.1729,
+            "9": 0.17295,
+            "10": 0.17277,
+            "11": 0.17318,
+            "12": 0.17273,
+            "13": 0.171,
+            "14": 0.17232,
+            "15": 0.1722,
+            "16": 0.17261,
+            "17": 0.17438,
+            "18": 0.17353,
+            "19": 0.1731,
+            "20": 0.17122,
+            "21": 0.17049,
+            "22": 0.17348,
+            "23": 0.17169,
+            "24": 0.17293,
+            "25": 0.17364,
+            "26": 0.17003,
+            "27": 0.17011,
+            "28": 0.17126,
+            "29": 0.1722,
+            "30": 0.17039,
+            "31": 0.17016,
+            "32": 0.17105,
+            "33": 0.16994,
+            "34": 0.17076,
+            "35": 0.17327,
+            "36": 0.17175,
+            "37": 0.17048,
+            "38": 0.1719,
+            "39": 0.17008,
+            "40": 0.17063,
+            "41": 0.17257,
+            "42": 0.17094,
+            "43": 0.17115,
+            "44": 0.17118,
+            "45": 0.171,
+            "46": 0.17132,
+            "47": 0.16943,
+            "48": 0.17114,
+            "49": 0.17083,
+            "50": 0.16974,
+            "51": 0.17654,
+            "52": 0.17131,
+            "53": 0.35484,
+            "54": 0.16981,
+            "55": 0.16969,
+            "56": 0.17178,
+            "57": 0.16951,
+            "58": 0.16856,
+            "59": 0.17046,
+            "60": 0.45725,
+            "61": 0.17092,
+            "62": 0.171,
+            "63": 0.17125,
+            "64": 0.17131,
+            "65": 0.17462,
+            "66": 0.17192,
+            "67": 0.16865,
+            "68": 0.17104,
+            "69": 0.16936,
+            "70": 0.17219,
+            "71": 0.174,
+            "72": 0.17689,
+            "73": 0.17007,
+            "74": 0.16999,
+            "75": 0.16903,
+            "76": 0.17096,
+            "77": 0.16876,
+            "78": 0.17318,
+            "79": 0.17216,
+            "80": 0.17036,
+            "81": 0.16928,
+            "82": 0.17019,
+            "83": 0.17001,
+            "84": 0.17182,
+            "85": 0.16951,
+            "86": 0.4678,
+            "87": 0.16886,
+            "88": 0.1689,
+            "89": 0.16837,
+            "90": 0.16751,
+            "91": 0.168,
+            "92": 0.1724,
+            "93": 0.16907,
+            "94": 0.17236,
+            "95": 0.16852,
+            "96": 0.16884,
+            "97": 0.16823,
+            "98": 0.16821,
+            "99": 0.16981,
+            "100": 0.1715
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
new file mode 100644
index 00000000000..0805966b94c
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.9359,
+            "2": 10.92235,
+            "3": 10.92366,
+            "4": 10.90567,
+            "5": 10.93225,
+            "6": 10.93547,
+            "7": 10.92702,
+            "8": 10.92052,
+            "9": 10.9395,
+            "10": 10.91083,
+            "11": 10.94242,
+            "12": 10.93185,
+            "13": 10.92496,
+            "14": 10.94487,
+            "15": 10.85723,
+            "16": 10.88074,
+            "17": 10.87011,
+            "18": 10.88561,
+            "19": 10.87042,
+            "20": 10.77088,
+            "21": 10.7565,
+            "22": 10.62779,
+            "23": 10.77022,
+            "24": 10.65205,
+            "25": 10.60556,
+            "26": 10.66333,
+            "27": 10.66552,
+            "28": 10.60547,
+            "29": 10.6471,
+            "30": 10.40549,
+            "31": 10.16719,
+            "32": 10.51369,
+            "33": 10.5051,
+            "34": 10.27046,
+            "35": 10.31366,
+            "36": 10.27241,
+            "37": 10.38617,
+            "38": 10.23179,
+            "39": 10.45437,
+            "40": 10.12334,
+            "41": 10.19576,
+            "42": 10.25282,
+            "43": 9.86635,
+            "44": 9.99502,
+            "45": 9.87564,
+            "46": 9.86006,
+            "47": 10.19474,
+            "48": 9.87777,
+            "49": 9.56673,
+            "50": 9.94452,
+            "51": 9.89728,
+            "52": 9.7879,
+            "53": 10.1278,
+            "54": 9.98346,
+            "55": 9.90094,
+            "56": 9.66557,
+            "57": 9.50042,
+            "58": 9.87703,
+            "59": 9.61777,
+            "60": 9.55238,
+            "61": 9.71568,
+            "62": 10.03384,
+            "63": 9.41318,
+            "64": 9.8198,
+            "65": 8.96792,
+            "66": 9.74791,
+            "67": 9.39412,
+            "68": 9.82081,
+            "69": 9.82389,
+            "70": 9.77835,
+            "71": 9.64728,
+            "72": 9.59599,
+            "73": 9.53704,
+            "74": 8.96545,
+            "75": 9.44605,
+            "76": 9.10011,
+            "77": 10.09977,
+            "78": 9.7355,
+            "79": 9.38643,
+            "80": 9.42014,
+            "81": 9.50916,
+            "82": 9.72306,
+            "83": 9.3462,
+            "84": 9.44805,
+            "85": 9.64324,
+            "86": 9.07728,
+            "87": 9.61635,
+            "88": 9.79137,
+            "89": 9.61978,
+            "90": 9.85827,
+            "91": 9.35282,
+            "92": 9.38717,
+            "93": 9.08084,
+            "94": 8.82234,
+            "95": 9.52085,
+            "96": 9.54578,
+            "97": 9.34183,
+            "98": 9.70521,
+            "99": 8.89223,
+            "100": 9.43415
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 22727686.0,
+            "2": 22924976.0,
+            "3": 22597376.0,
+            "4": 23218740.0,
+            "5": 22715312.0,
+            "6": 23020980.0,
+            "7": 22770736.0,
+            "8": 22927078.0,
+            "9": 22841964.0,
+            "10": 22919060.0,
+            "11": 22501344.0,
+            "12": 22460424.0,
+            "13": 22916824.0,
+            "14": 22388904.0,
+            "15": 22821200.0,
+            "16": 22829956.0,
+            "17": 22819072.0,
+            "18": 22582680.0,
+            "19": 22618528.0,
+            "20": 22693840.0,
+            "21": 22739692.0,
+            "22": 22799900.0,
+            "23": 22538946.0,
+            "24": 22771530.0,
+            "25": 22819524.0,
+            "26": 22548320.0,
+            "27": 22468868.0,
+            "28": 22452892.0,
+            "29": 22530184.0,
+            "30": 22631232.0,
+            "31": 22955646.0,
+            "32": 22584920.0,
+            "33": 22558000.0,
+            "34": 22835968.0,
+            "35": 22787888.0,
+            "36": 22589844.0,
+            "37": 22497188.0,
+            "38": 22896516.0,
+            "39": 22801334.0,
+            "40": 22658144.0,
+            "41": 22659958.0,
+            "42": 22667478.0,
+            "43": 22975596.0,
+            "44": 22746734.0,
+            "45": 22674630.0,
+            "46": 22884436.0,
+            "47": 22633878.0,
+            "48": 22929042.0,
+            "49": 22727064.0,
+            "50": 22904452.0,
+            "51": 22791508.0,
+            "52": 22748880.0,
+            "53": 22925802.0,
+            "54": 22840006.0,
+            "55": 22519094.0,
+            "56": 22878426.0,
+            "57": 23113192.0,
+            "58": 22845340.0,
+            "59": 22716044.0,
+            "60": 22743052.0,
+            "61": 22724280.0,
+            "62": 22673222.0,
+            "63": 22845776.0,
+            "64": 22823900.0,
+            "65": 23061016.0,
+            "66": 22729616.0,
+            "67": 22907968.0,
+            "68": 22610332.0,
+            "69": 22584232.0,
+            "70": 22829332.0,
+            "71": 22748216.0,
+            "72": 22654286.0,
+            "73": 22740516.0,
+            "74": 23047704.0,
+            "75": 23054164.0,
+            "76": 22901462.0,
+            "77": 22272388.0,
+            "78": 22789468.0,
+            "79": 22744352.0,
+            "80": 22707344.0,
+            "81": 22890704.0,
+            "82": 22777178.0,
+            "83": 22839028.0,
+            "84": 23010036.0,
+            "85": 22712182.0,
+            "86": 23103124.0,
+            "87": 22735052.0,
+            "88": 22637176.0,
+            "89": 22499076.0,
+            "90": 22971846.0,
+            "91": 22767066.0,
+            "92": 22808462.0,
+            "93": 22659702.0,
+            "94": 22912288.0,
+            "95": 23047676.0,
+            "96": 22828984.0,
+            "97": 22608528.0,
+            "98": 22763476.0,
+            "99": 22905460.0,
+            "100": 23015938.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 519065600.0,
+            "2": 519065600.0,
+            "3": 519065600.0,
+            "4": 519065600.0,
+            "5": 519065600.0,
+            "6": 519065600.0,
+            "7": 519065600.0,
+            "8": 519065600.0,
+            "9": 519065600.0,
+            "10": 519065600.0,
+            "11": 519065600.0,
+            "12": 519065600.0,
+            "13": 519065600.0,
+            "14": 519065600.0,
+            "15": 519065600.0,
+            "16": 519065600.0,
+            "17": 519065600.0,
+            "18": 519065600.0,
+            "19": 519065600.0,
+            "20": 519065600.0,
+            "21": 519065600.0,
+            "22": 519065600.0,
+            "23": 519065600.0,
+            "24": 519065600.0,
+            "25": 519065600.0,
+            "26": 519065600.0,
+            "27": 519065600.0,
+            "28": 519065600.0,
+            "29": 519065600.0,
+            "30": 519065600.0,
+            "31": 519065600.0,
+            "32": 519065600.0,
+            "33": 519065600.0,
+            "34": 519065600.0,
+            "35": 519065600.0,
+            "36": 519065600.0,
+            "37": 519065600.0,
+            "38": 519065600.0,
+            "39": 519065600.0,
+            "40": 519065600.0,
+            "41": 519065600.0,
+            "42": 519065600.0,
+            "43": 519065600.0,
+            "44": 519065600.0,
+            "45": 519065600.0,
+            "46": 519065600.0,
+            "47": 519065600.0,
+            "48": 519065600.0,
+            "49": 519065600.0,
+            "50": 519065600.0,
+            "51": 519065600.0,
+            "52": 519065600.0,
+            "53": 519065600.0,
+            "54": 519065600.0,
+            "55": 519065600.0,
+            "56": 519065600.0,
+            "57": 519065600.0,
+            "58": 519065600.0,
+            "59": 519065600.0,
+            "60": 519065600.0,
+            "61": 519065600.0,
+            "62": 519065600.0,
+            "63": 519065600.0,
+            "64": 519065600.0,
+            "65": 519065600.0,
+            "66": 519065600.0,
+            "67": 519065600.0,
+            "68": 519065600.0,
+            "69": 519065600.0,
+            "70": 519065600.0,
+            "71": 519065600.0,
+            "72": 519065600.0,
+            "73": 519065600.0,
+            "74": 519065600.0,
+            "75": 519065600.0,
+            "76": 519065600.0,
+            "77": 519065600.0,
+            "78": 519065600.0,
+            "79": 519065600.0,
+            "80": 519065600.0,
+            "81": 519065600.0,
+            "82": 519065600.0,
+            "83": 519065600.0,
+            "84": 519065600.0,
+            "85": 519065600.0,
+            "86": 519065600.0,
+            "87": 519065600.0,
+            "88": 519065600.0,
+            "89": 519065600.0,
+            "90": 519065600.0,
+            "91": 519065600.0,
+            "92": 519065600.0,
+            "93": 519065600.0,
+            "94": 519065600.0,
+            "95": 519065600.0,
+            "96": 519065600.0,
+            "97": 519065600.0,
+            "98": 519065600.0,
+            "99": 519065600.0,
+            "100": 519065600.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 3767053312.0,
+            "2": 3907909632.0,
+            "3": 3907909632.0,
+            "4": 3907909632.0,
+            "5": 3907909632.0,
+            "6": 3907909632.0,
+            "7": 3907909632.0,
+            "8": 3907909632.0,
+            "9": 3907909632.0,
+            "10": 3907909632.0,
+            "11": 3907909632.0,
+            "12": 3907909632.0,
+            "13": 3907909632.0,
+            "14": 3907909632.0,
+            "15": 3907909632.0,
+            "16": 3907909632.0,
+            "17": 3907909632.0,
+            "18": 3907909632.0,
+            "19": 3907909632.0,
+            "20": 3907909632.0,
+            "21": 3907909632.0,
+            "22": 3907909632.0,
+            "23": 3907909632.0,
+            "24": 3907909632.0,
+            "25": 3907909632.0,
+            "26": 3907909632.0,
+            "27": 3907909632.0,
+            "28": 3907909632.0,
+            "29": 3907909632.0,
+            "30": 3907909632.0,
+            "31": 3907909632.0,
+            "32": 3907909632.0,
+            "33": 3907909632.0,
+            "34": 3907909632.0,
+            "35": 3907909632.0,
+            "36": 3907909632.0,
+            "37": 3907909632.0,
+            "38": 3907909632.0,
+            "39": 3907909632.0,
+            "40": 3907909632.0,
+            "41": 3907909632.0,
+            "42": 3907909632.0,
+            "43": 3907909632.0,
+            "44": 3907909632.0,
+            "45": 3907909632.0,
+            "46": 3907909632.0,
+            "47": 3907909632.0,
+            "48": 3907909632.0,
+            "49": 3907909632.0,
+            "50": 3907909632.0,
+            "51": 3907909632.0,
+            "52": 3907909632.0,
+            "53": 3907909632.0,
+            "54": 3907909632.0,
+            "55": 3907909632.0,
+            "56": 3907909632.0,
+            "57": 3907909632.0,
+            "58": 3907909632.0,
+            "59": 3907909632.0,
+            "60": 3907909632.0,
+            "61": 3907909632.0,
+            "62": 3907909632.0,
+            "63": 3907909632.0,
+            "64": 3907909632.0,
+            "65": 3907909632.0,
+            "66": 3907909632.0,
+            "67": 3907909632.0,
+            "68": 3907909632.0,
+            "69": 3907909632.0,
+            "70": 3907909632.0,
+            "71": 3907909632.0,
+            "72": 3907909632.0,
+            "73": 3907909632.0,
+            "74": 3907909632.0,
+            "75": 3907909632.0,
+            "76": 3907909632.0,
+            "77": 3907909632.0,
+            "78": 3907909632.0,
+            "79": 3907909632.0,
+            "80": 3907909632.0,
+            "81": 3907909632.0,
+            "82": 3907909632.0,
+            "83": 3907909632.0,
+            "84": 3907909632.0,
+            "85": 3907909632.0,
+            "86": 3907909632.0,
+            "87": 3907909632.0,
+            "88": 3907909632.0,
+            "89": 3907909632.0,
+            "90": 3907909632.0,
+            "91": 3907909632.0,
+            "92": 3907909632.0,
+            "93": 3907909632.0,
+            "94": 3907909632.0,
+            "95": 3907909632.0,
+            "96": 3907909632.0,
+            "97": 3907909632.0,
+            "98": 3907909632.0,
+            "99": 3907909632.0,
+            "100": 3907909632.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 23.45694,
+            "2": 0.20346,
+            "3": 0.36409,
+            "4": 0.17107,
+            "5": 0.17023,
+            "6": 0.17074,
+            "7": 0.38699,
+            "8": 0.17041,
+            "9": 0.16888,
+            "10": 0.16794,
+            "11": 0.16767,
+            "12": 0.16767,
+            "13": 0.16663,
+            "14": 0.16756,
+            "15": 0.16615,
+            "16": 0.16657,
+            "17": 0.16641,
+            "18": 0.16668,
+            "19": 0.16729,
+            "20": 0.16771,
+            "21": 0.16737,
+            "22": 0.17089,
+            "23": 0.16854,
+            "24": 0.16704,
+            "25": 0.16752,
+            "26": 0.16872,
+            "27": 0.16766,
+            "28": 0.16803,
+            "29": 0.16634,
+            "30": 0.16703,
+            "31": 0.17358,
+            "32": 0.16783,
+            "33": 0.1671,
+            "34": 0.16686,
+            "35": 0.16729,
+            "36": 0.16745,
+            "37": 0.16819,
+            "38": 0.16726,
+            "39": 0.16705,
+            "40": 0.16771,
+            "41": 0.16664,
+            "42": 0.1698,
+            "43": 0.16915,
+            "44": 0.16724,
+            "45": 0.16752,
+            "46": 0.16605,
+            "47": 0.16613,
+            "48": 0.16709,
+            "49": 0.17009,
+            "50": 0.1677,
+            "51": 0.17196,
+            "52": 0.16857,
+            "53": 0.16835,
+            "54": 0.16769,
+            "55": 0.16954,
+            "56": 0.16851,
+            "57": 0.17085,
+            "58": 0.16981,
+            "59": 0.17076,
+            "60": 0.45985,
+            "61": 0.1701,
+            "62": 0.16952,
+            "63": 0.16919,
+            "64": 0.16816,
+            "65": 0.16858,
+            "66": 0.16768,
+            "67": 0.16965,
+            "68": 0.16881,
+            "69": 0.16837,
+            "70": 0.16824,
+            "71": 0.16956,
+            "72": 0.16914,
+            "73": 0.17096,
+            "74": 0.16954,
+            "75": 0.16772,
+            "76": 0.16933,
+            "77": 0.16793,
+            "78": 0.16698,
+            "79": 0.17038,
+            "80": 0.16791,
+            "81": 0.16747,
+            "82": 0.16745,
+            "83": 0.16958,
+            "84": 0.16855,
+            "85": 0.16833,
+            "86": 0.16922,
+            "87": 0.16839,
+            "88": 0.16805,
+            "89": 0.16825,
+            "90": 0.16691,
+            "91": 0.16873,
+            "92": 0.16882,
+            "93": 0.16822,
+            "94": 0.16847,
+            "95": 0.16712,
+            "96": 0.16757,
+            "97": 0.16817,
+            "98": 0.168,
+            "99": 0.16812,
+            "100": 0.16722
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
new file mode 100644
index 00000000000..796cf7943e2
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.81184,
+            "2": 10.80964,
+            "3": 10.8261,
+            "4": 10.83055,
+            "5": 10.85464,
+            "6": 10.84052,
+            "7": 10.83581,
+            "8": 10.80288,
+            "9": 10.87748,
+            "10": 10.88256,
+            "11": 10.87624,
+            "12": 10.82598,
+            "13": 10.84134,
+            "14": 10.81521,
+            "15": 10.80679,
+            "16": 10.79904,
+            "17": 10.76842,
+            "18": 10.77939,
+            "19": 10.75192,
+            "20": 10.63196,
+            "21": 10.68212,
+            "22": 10.63985,
+            "23": 10.75592,
+            "24": 10.60961,
+            "25": 10.47374,
+            "26": 10.59698,
+            "27": 10.54094,
+            "28": 10.44971,
+            "29": 10.39259,
+            "30": 10.39285,
+            "31": 10.49257,
+            "32": 10.31859,
+            "33": 10.27757,
+            "34": 10.44435,
+            "35": 9.96791,
+            "36": 10.11232,
+            "37": 10.02385,
+            "38": 10.37514,
+            "39": 9.78682,
+            "40": 10.1,
+            "41": 10.12396,
+            "42": 10.03,
+            "43": 10.19936,
+            "44": 10.0547,
+            "45": 9.68344,
+            "46": 9.98163,
+            "47": 9.92505,
+            "48": 9.6694,
+            "49": 9.91809,
+            "50": 9.92465,
+            "51": 9.79329,
+            "52": 9.32763,
+            "53": 9.64981,
+            "54": 9.86048,
+            "55": 9.98132,
+            "56": 9.81689,
+            "57": 9.74442,
+            "58": 9.83018,
+            "59": 9.32863,
+            "60": 9.3523,
+            "61": 9.45116,
+            "62": 10.19127,
+            "63": 9.35566,
+            "64": 9.62798,
+            "65": 9.70213,
+            "66": 9.52535,
+            "67": 9.66178,
+            "68": 9.58762,
+            "69": 9.38587,
+            "70": 9.73809,
+            "71": 9.87613,
+            "72": 9.69256,
+            "73": 9.39159,
+            "74": 9.44032,
+            "75": 8.95616,
+            "76": 9.56366,
+            "77": 9.61319,
+            "78": 9.39159,
+            "79": 9.52907,
+            "80": 9.31501,
+            "81": 9.70173,
+            "82": 9.90394,
+            "83": 9.31634,
+            "84": 9.47172,
+            "85": 8.97886,
+            "86": 9.6647,
+            "87": 9.43234,
+            "88": 9.58689,
+            "89": 9.52323,
+            "90": 9.55812,
+            "91": 9.62767,
+            "92": 9.13988,
+            "93": 9.42377,
+            "94": 9.54545,
+            "95": 9.13529,
+            "96": 8.75175,
+            "97": 9.58148,
+            "98": 9.78964,
+            "99": 9.37931,
+            "100": 9.21091
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1125.0,
+            "2": 1177.0,
+            "3": 1265.0,
+            "4": 1241.0,
+            "5": 1255.0,
+            "6": 1304.0,
+            "7": 1204.0,
+            "8": 998.0,
+            "9": 1236.0,
+            "10": 1367.0,
+            "11": 1252.0,
+            "12": 1281.0,
+            "13": 1254.0,
+            "14": 1148.0,
+            "15": 1127.0,
+            "16": 1102.0,
+            "17": 1193.0,
+            "18": 1248.0,
+            "19": 1072.0,
+            "20": 1082.0,
+            "21": 1201.0,
+            "22": 1302.0,
+            "23": 1336.0,
+            "24": 1317.0,
+            "25": 1114.0,
+            "26": 1200.0,
+            "27": 1255.0,
+            "28": 1323.0,
+            "29": 1288.0,
+            "30": 1558.0,
+            "31": 1489.0,
+            "32": 1390.0,
+            "33": 1413.0,
+            "34": 1518.0,
+            "35": 1292.0,
+            "36": 1395.0,
+            "37": 1487.0,
+            "38": 1573.0,
+            "39": 1376.0,
+            "40": 1433.0,
+            "41": 1677.0,
+            "42": 1728.0,
+            "43": 1669.0,
+            "44": 1607.0,
+            "45": 1564.0,
+            "46": 1874.0,
+            "47": 1660.0,
+            "48": 1554.0,
+            "49": 1781.0,
+            "50": 1749.0,
+            "51": 1747.0,
+            "52": 1656.0,
+            "53": 1912.0,
+            "54": 1870.0,
+            "55": 1718.0,
+            "56": 1972.0,
+            "57": 1917.0,
+            "58": 1686.0,
+            "59": 1542.0,
+            "60": 1872.0,
+            "61": 2198.0,
+            "62": 2145.0,
+            "63": 1975.0,
+            "64": 2111.0,
+            "65": 2464.0,
+            "66": 2160.0,
+            "67": 2311.0,
+            "68": 2259.0,
+            "69": 2255.0,
+            "70": 2564.0,
+            "71": 2402.0,
+            "72": 2424.0,
+            "73": 1990.0,
+            "74": 2221.0,
+            "75": 1884.0,
+            "76": 2375.0,
+            "77": 2394.0,
+            "78": 2450.0,
+            "79": 2674.0,
+            "80": 1924.0,
+            "81": 2394.0,
+            "82": 2612.0,
+            "83": 2579.0,
+            "84": 2243.0,
+            "85": 2150.0,
+            "86": 2358.0,
+            "87": 2678.0,
+            "88": 2260.0,
+            "89": 2556.0,
+            "90": 2319.0,
+            "91": 2452.0,
+            "92": 1952.0,
+            "93": 2189.0,
+            "94": 2451.0,
+            "95": 2518.0,
+            "96": 2182.0,
+            "97": 2162.0,
+            "98": 2332.0,
+            "99": 2331.0,
+            "100": 2071.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 730320896.0,
+            "2": 730320896.0,
+            "3": 730320896.0,
+            "4": 730320896.0,
+            "5": 730320896.0,
+            "6": 730320896.0,
+            "7": 730320896.0,
+            "8": 730320896.0,
+            "9": 730320896.0,
+            "10": 730320896.0,
+            "11": 730320896.0,
+            "12": 730320896.0,
+            "13": 730320896.0,
+            "14": 730320896.0,
+            "15": 730320896.0,
+            "16": 730320896.0,
+            "17": 730320896.0,
+            "18": 730320896.0,
+            "19": 730320896.0,
+            "20": 730320896.0,
+            "21": 730320896.0,
+            "22": 730320896.0,
+            "23": 730320896.0,
+            "24": 730320896.0,
+            "25": 730320896.0,
+            "26": 730320896.0,
+            "27": 730320896.0,
+            "28": 730320896.0,
+            "29": 730320896.0,
+            "30": 730320896.0,
+            "31": 730320896.0,
+            "32": 730320896.0,
+            "33": 730320896.0,
+            "34": 730320896.0,
+            "35": 730320896.0,
+            "36": 730320896.0,
+            "37": 730320896.0,
+            "38": 730320896.0,
+            "39": 730320896.0,
+            "40": 730320896.0,
+            "41": 730320896.0,
+            "42": 730320896.0,
+            "43": 730320896.0,
+            "44": 730320896.0,
+            "45": 730320896.0,
+            "46": 730320896.0,
+            "47": 730320896.0,
+            "48": 730320896.0,
+            "49": 730320896.0,
+            "50": 730320896.0,
+            "51": 730320896.0,
+            "52": 730320896.0,
+            "53": 730320896.0,
+            "54": 730320896.0,
+            "55": 730320896.0,
+            "56": 730320896.0,
+            "57": 730320896.0,
+            "58": 730320896.0,
+            "59": 730320896.0,
+            "60": 730320896.0,
+            "61": 730320896.0,
+            "62": 730320896.0,
+            "63": 730320896.0,
+            "64": 730320896.0,
+            "65": 730320896.0,
+            "66": 730320896.0,
+            "67": 730320896.0,
+            "68": 730320896.0,
+            "69": 730320896.0,
+            "70": 730320896.0,
+            "71": 730320896.0,
+            "72": 730320896.0,
+            "73": 730320896.0,
+            "74": 730320896.0,
+            "75": 730320896.0,
+            "76": 730320896.0,
+            "77": 730320896.0,
+            "78": 730320896.0,
+            "79": 730320896.0,
+            "80": 730320896.0,
+            "81": 730320896.0,
+            "82": 730320896.0,
+            "83": 730320896.0,
+            "84": 730320896.0,
+            "85": 730320896.0,
+            "86": 730320896.0,
+            "87": 730320896.0,
+            "88": 730320896.0,
+            "89": 730320896.0,
+            "90": 730320896.0,
+            "91": 730320896.0,
+            "92": 730320896.0,
+            "93": 730320896.0,
+            "94": 730320896.0,
+            "95": 730320896.0,
+            "96": 730320896.0,
+            "97": 730320896.0,
+            "98": 730320896.0,
+            "99": 730320896.0,
+            "100": 730320896.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 4311542272.0,
+            "2": 4593253888.0,
+            "3": 4593253888.0,
+            "4": 4593253888.0,
+            "5": 4593253888.0,
+            "6": 4593253888.0,
+            "7": 4593253888.0,
+            "8": 4593253888.0,
+            "9": 4593253888.0,
+            "10": 4593253888.0,
+            "11": 4593253888.0,
+            "12": 4593253888.0,
+            "13": 4593253888.0,
+            "14": 4593253888.0,
+            "15": 4593253888.0,
+            "16": 4593253888.0,
+            "17": 4593253888.0,
+            "18": 4593253888.0,
+            "19": 4593253888.0,
+            "20": 4593253888.0,
+            "21": 4593253888.0,
+            "22": 4593253888.0,
+            "23": 4593253888.0,
+            "24": 4593253888.0,
+            "25": 4593253888.0,
+            "26": 4593253888.0,
+            "27": 4593253888.0,
+            "28": 4593253888.0,
+            "29": 4593253888.0,
+            "30": 4593253888.0,
+            "31": 4593253888.0,
+            "32": 4593253888.0,
+            "33": 4593253888.0,
+            "34": 4593253888.0,
+            "35": 4593253888.0,
+            "36": 4593253888.0,
+            "37": 4593253888.0,
+            "38": 4593253888.0,
+            "39": 4593253888.0,
+            "40": 4593253888.0,
+            "41": 4593253888.0,
+            "42": 4593253888.0,
+            "43": 4593253888.0,
+            "44": 4593253888.0,
+            "45": 4593253888.0,
+            "46": 4593253888.0,
+            "47": 4593253888.0,
+            "48": 4593253888.0,
+            "49": 4593253888.0,
+            "50": 4593253888.0,
+            "51": 4593253888.0,
+            "52": 4593253888.0,
+            "53": 4593253888.0,
+            "54": 4593253888.0,
+            "55": 4593253888.0,
+            "56": 4593253888.0,
+            "57": 4593253888.0,
+            "58": 4593253888.0,
+            "59": 4593253888.0,
+            "60": 4593253888.0,
+            "61": 4593253888.0,
+            "62": 4593253888.0,
+            "63": 4593253888.0,
+            "64": 4593253888.0,
+            "65": 4593253888.0,
+            "66": 4593253888.0,
+            "67": 4593253888.0,
+            "68": 4593253888.0,
+            "69": 4593253888.0,
+            "70": 4593253888.0,
+            "71": 4593253888.0,
+            "72": 4593253888.0,
+            "73": 4593253888.0,
+            "74": 4593253888.0,
+            "75": 4593253888.0,
+            "76": 4593253888.0,
+            "77": 4593253888.0,
+            "78": 4593253888.0,
+            "79": 4593253888.0,
+            "80": 4593253888.0,
+            "81": 4593253888.0,
+            "82": 4593253888.0,
+            "83": 4593253888.0,
+            "84": 4593253888.0,
+            "85": 4593253888.0,
+            "86": 4593253888.0,
+            "87": 4593253888.0,
+            "88": 4593253888.0,
+            "89": 4593253888.0,
+            "90": 4593253888.0,
+            "91": 4593253888.0,
+            "92": 4593253888.0,
+            "93": 4593253888.0,
+            "94": 4593253888.0,
+            "95": 4593253888.0,
+            "96": 4593253888.0,
+            "97": 4593253888.0,
+            "98": 4593253888.0,
+            "99": 4593253888.0,
+            "100": 4593253888.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 17.52326,
+            "2": 0.393,
+            "3": 0.36565,
+            "4": 0.55958,
+            "5": 0.59432,
+            "6": 0.36552,
+            "7": 0.3644,
+            "8": 0.36754,
+            "9": 0.36565,
+            "10": 0.36536,
+            "11": 0.36628,
+            "12": 0.36391,
+            "13": 0.36591,
+            "14": 0.3664,
+            "15": 0.36556,
+            "16": 0.3646,
+            "17": 0.36476,
+            "18": 0.36531,
+            "19": 0.36649,
+            "20": 0.36649,
+            "21": 0.36435,
+            "22": 0.3664,
+            "23": 0.36307,
+            "24": 0.36376,
+            "25": 0.36657,
+            "26": 0.36362,
+            "27": 0.36425,
+            "28": 0.36383,
+            "29": 0.36442,
+            "30": 0.36444,
+            "31": 0.3654,
+            "32": 0.36458,
+            "33": 0.36385,
+            "34": 0.36266,
+            "35": 0.36477,
+            "36": 0.36485,
+            "37": 0.36372,
+            "38": 0.36353,
+            "39": 0.36479,
+            "40": 0.36451,
+            "41": 0.36779,
+            "42": 0.36291,
+            "43": 0.36064,
+            "44": 0.36562,
+            "45": 0.36059,
+            "46": 0.36061,
+            "47": 0.36334,
+            "48": 0.35858,
+            "49": 0.36178,
+            "50": 0.36084,
+            "51": 0.36846,
+            "52": 0.36344,
+            "53": 0.36176,
+            "54": 0.36135,
+            "55": 0.36414,
+            "56": 0.36441,
+            "57": 0.36275,
+            "58": 0.36148,
+            "59": 0.36257,
+            "60": 0.36232,
+            "61": 0.36496,
+            "62": 0.36046,
+            "63": 0.36356,
+            "64": 0.36319,
+            "65": 0.3607,
+            "66": 0.36207,
+            "67": 0.36075,
+            "68": 0.35944,
+            "69": 0.36108,
+            "70": 0.35673,
+            "71": 0.36006,
+            "72": 0.3571,
+            "73": 0.36016,
+            "74": 0.36157,
+            "75": 0.36375,
+            "76": 0.35881,
+            "77": 0.36157,
+            "78": 0.35722,
+            "79": 0.35554,
+            "80": 0.35834,
+            "81": 0.35751,
+            "82": 0.35515,
+            "83": 0.35648,
+            "84": 0.5928,
+            "85": 0.35925,
+            "86": 0.3557,
+            "87": 0.3574,
+            "88": 0.35737,
+            "89": 0.4081,
+            "90": 0.56444,
+            "91": 0.35647,
+            "92": 0.35632,
+            "93": 0.35846,
+            "94": 0.35392,
+            "95": 0.35892,
+            "96": 0.36197,
+            "97": 0.36101,
+            "98": 0.35768,
+            "99": 0.36307,
+            "100": 0.35815
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
new file mode 100644
index 00000000000..ec432ff7884
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.81184,
+            "2": 10.80964,
+            "3": 10.8261,
+            "4": 10.83055,
+            "5": 10.85464,
+            "6": 10.84052,
+            "7": 10.83581,
+            "8": 10.80288,
+            "9": 10.87748,
+            "10": 10.88256,
+            "11": 10.87624,
+            "12": 10.82598,
+            "13": 10.84134,
+            "14": 10.81521,
+            "15": 10.80679,
+            "16": 10.79904,
+            "17": 10.76842,
+            "18": 10.77939,
+            "19": 10.75192,
+            "20": 10.63196,
+            "21": 10.68212,
+            "22": 10.63985,
+            "23": 10.75592,
+            "24": 10.60961,
+            "25": 10.47374,
+            "26": 10.59698,
+            "27": 10.54094,
+            "28": 10.44971,
+            "29": 10.39259,
+            "30": 10.39285,
+            "31": 10.49257,
+            "32": 10.31859,
+            "33": 10.27757,
+            "34": 10.44435,
+            "35": 9.96791,
+            "36": 10.11232,
+            "37": 10.02385,
+            "38": 10.37514,
+            "39": 9.78682,
+            "40": 10.1,
+            "41": 10.12396,
+            "42": 10.03,
+            "43": 10.19936,
+            "44": 10.0547,
+            "45": 9.68344,
+            "46": 9.98163,
+            "47": 9.92505,
+            "48": 9.6694,
+            "49": 9.91809,
+            "50": 9.92465,
+            "51": 9.79329,
+            "52": 9.32763,
+            "53": 9.64981,
+            "54": 9.86048,
+            "55": 9.98132,
+            "56": 9.81689,
+            "57": 9.74442,
+            "58": 9.83018,
+            "59": 9.32863,
+            "60": 9.3523,
+            "61": 9.45116,
+            "62": 10.19127,
+            "63": 9.35566,
+            "64": 9.62798,
+            "65": 9.70213,
+            "66": 9.52535,
+            "67": 9.66178,
+            "68": 9.58762,
+            "69": 9.38587,
+            "70": 9.73809,
+            "71": 9.87613,
+            "72": 9.69256,
+            "73": 9.39159,
+            "74": 9.44032,
+            "75": 8.95616,
+            "76": 9.56366,
+            "77": 9.61319,
+            "78": 9.39159,
+            "79": 9.52907,
+            "80": 9.31501,
+            "81": 9.70173,
+            "82": 9.90394,
+            "83": 9.31634,
+            "84": 9.47172,
+            "85": 8.97886,
+            "86": 9.6647,
+            "87": 9.43234,
+            "88": 9.58689,
+            "89": 9.52323,
+            "90": 9.55812,
+            "91": 9.62767,
+            "92": 9.13988,
+            "93": 9.42377,
+            "94": 9.54545,
+            "95": 9.13529,
+            "96": 8.75175,
+            "97": 9.58148,
+            "98": 9.78964,
+            "99": 9.37931,
+            "100": 9.21091
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1125.0,
+            "2": 1177.0,
+            "3": 1265.0,
+            "4": 1241.0,
+            "5": 1255.0,
+            "6": 1304.0,
+            "7": 1204.0,
+            "8": 998.0,
+            "9": 1236.0,
+            "10": 1367.0,
+            "11": 1252.0,
+            "12": 1281.0,
+            "13": 1254.0,
+            "14": 1148.0,
+            "15": 1127.0,
+            "16": 1102.0,
+            "17": 1193.0,
+            "18": 1248.0,
+            "19": 1072.0,
+            "20": 1082.0,
+            "21": 1201.0,
+            "22": 1302.0,
+            "23": 1336.0,
+            "24": 1317.0,
+            "25": 1114.0,
+            "26": 1200.0,
+            "27": 1255.0,
+            "28": 1323.0,
+            "29": 1288.0,
+            "30": 1558.0,
+            "31": 1489.0,
+            "32": 1390.0,
+            "33": 1413.0,
+            "34": 1518.0,
+            "35": 1292.0,
+            "36": 1395.0,
+            "37": 1487.0,
+            "38": 1573.0,
+            "39": 1376.0,
+            "40": 1433.0,
+            "41": 1677.0,
+            "42": 1728.0,
+            "43": 1669.0,
+            "44": 1607.0,
+            "45": 1564.0,
+            "46": 1874.0,
+            "47": 1660.0,
+            "48": 1554.0,
+            "49": 1781.0,
+            "50": 1749.0,
+            "51": 1747.0,
+            "52": 1656.0,
+            "53": 1912.0,
+            "54": 1870.0,
+            "55": 1718.0,
+            "56": 1972.0,
+            "57": 1917.0,
+            "58": 1686.0,
+            "59": 1542.0,
+            "60": 1872.0,
+            "61": 2198.0,
+            "62": 2145.0,
+            "63": 1975.0,
+            "64": 2111.0,
+            "65": 2464.0,
+            "66": 2160.0,
+            "67": 2311.0,
+            "68": 2259.0,
+            "69": 2255.0,
+            "70": 2564.0,
+            "71": 2402.0,
+            "72": 2424.0,
+            "73": 1990.0,
+            "74": 2221.0,
+            "75": 1884.0,
+            "76": 2375.0,
+            "77": 2394.0,
+            "78": 2450.0,
+            "79": 2674.0,
+            "80": 1924.0,
+            "81": 2394.0,
+            "82": 2612.0,
+            "83": 2579.0,
+            "84": 2243.0,
+            "85": 2150.0,
+            "86": 2358.0,
+            "87": 2678.0,
+            "88": 2260.0,
+            "89": 2556.0,
+            "90": 2319.0,
+            "91": 2452.0,
+            "92": 1952.0,
+            "93": 2189.0,
+            "94": 2451.0,
+            "95": 2518.0,
+            "96": 2182.0,
+            "97": 2162.0,
+            "98": 2332.0,
+            "99": 2331.0,
+            "100": 2071.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 730320896.0,
+            "2": 730320896.0,
+            "3": 730320896.0,
+            "4": 730320896.0,
+            "5": 730320896.0,
+            "6": 730320896.0,
+            "7": 730320896.0,
+            "8": 730320896.0,
+            "9": 730320896.0,
+            "10": 730320896.0,
+            "11": 730320896.0,
+            "12": 730320896.0,
+            "13": 730320896.0,
+            "14": 730320896.0,
+            "15": 730320896.0,
+            "16": 730320896.0,
+            "17": 730320896.0,
+            "18": 730320896.0,
+            "19": 730320896.0,
+            "20": 730320896.0,
+            "21": 730320896.0,
+            "22": 730320896.0,
+            "23": 730320896.0,
+            "24": 730320896.0,
+            "25": 730320896.0,
+            "26": 730320896.0,
+            "27": 730320896.0,
+            "28": 730320896.0,
+            "29": 730320896.0,
+            "30": 730320896.0,
+            "31": 730320896.0,
+            "32": 730320896.0,
+            "33": 730320896.0,
+            "34": 730320896.0,
+            "35": 730320896.0,
+            "36": 730320896.0,
+            "37": 730320896.0,
+            "38": 730320896.0,
+            "39": 730320896.0,
+            "40": 730320896.0,
+            "41": 730320896.0,
+            "42": 730320896.0,
+            "43": 730320896.0,
+            "44": 730320896.0,
+            "45": 730320896.0,
+            "46": 730320896.0,
+            "47": 730320896.0,
+            "48": 730320896.0,
+            "49": 730320896.0,
+            "50": 730320896.0,
+            "51": 730320896.0,
+            "52": 730320896.0,
+            "53": 730320896.0,
+            "54": 730320896.0,
+            "55": 730320896.0,
+            "56": 730320896.0,
+            "57": 730320896.0,
+            "58": 730320896.0,
+            "59": 730320896.0,
+            "60": 730320896.0,
+            "61": 730320896.0,
+            "62": 730320896.0,
+            "63": 730320896.0,
+            "64": 730320896.0,
+            "65": 730320896.0,
+            "66": 730320896.0,
+            "67": 730320896.0,
+            "68": 730320896.0,
+            "69": 730320896.0,
+            "70": 730320896.0,
+            "71": 730320896.0,
+            "72": 730320896.0,
+            "73": 730320896.0,
+            "74": 730320896.0,
+            "75": 730320896.0,
+            "76": 730320896.0,
+            "77": 730320896.0,
+            "78": 730320896.0,
+            "79": 730320896.0,
+            "80": 730320896.0,
+            "81": 730320896.0,
+            "82": 730320896.0,
+            "83": 730320896.0,
+            "84": 730320896.0,
+            "85": 730320896.0,
+            "86": 730320896.0,
+            "87": 730320896.0,
+            "88": 730320896.0,
+            "89": 730320896.0,
+            "90": 730320896.0,
+            "91": 730320896.0,
+            "92": 730320896.0,
+            "93": 730320896.0,
+            "94": 730320896.0,
+            "95": 730320896.0,
+            "96": 730320896.0,
+            "97": 730320896.0,
+            "98": 730320896.0,
+            "99": 730320896.0,
+            "100": 730320896.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 4311542272.0,
+            "2": 4593253888.0,
+            "3": 4593253888.0,
+            "4": 4593253888.0,
+            "5": 4593253888.0,
+            "6": 4593253888.0,
+            "7": 4593253888.0,
+            "8": 4593253888.0,
+            "9": 4593253888.0,
+            "10": 4593253888.0,
+            "11": 4593253888.0,
+            "12": 4593253888.0,
+            "13": 4593253888.0,
+            "14": 4593253888.0,
+            "15": 4593253888.0,
+            "16": 4593253888.0,
+            "17": 4593253888.0,
+            "18": 4593253888.0,
+            "19": 4593253888.0,
+            "20": 4593253888.0,
+            "21": 4593253888.0,
+            "22": 4593253888.0,
+            "23": 4593253888.0,
+            "24": 4593253888.0,
+            "25": 4593253888.0,
+            "26": 4593253888.0,
+            "27": 4593253888.0,
+            "28": 4593253888.0,
+            "29": 4593253888.0,
+            "30": 4593253888.0,
+            "31": 4593253888.0,
+            "32": 4593253888.0,
+            "33": 4593253888.0,
+            "34": 4593253888.0,
+            "35": 4593253888.0,
+            "36": 4593253888.0,
+            "37": 4593253888.0,
+            "38": 4593253888.0,
+            "39": 4593253888.0,
+            "40": 4593253888.0,
+            "41": 4593253888.0,
+            "42": 4593253888.0,
+            "43": 4593253888.0,
+            "44": 4593253888.0,
+            "45": 4593253888.0,
+            "46": 4593253888.0,
+            "47": 4593253888.0,
+            "48": 4593253888.0,
+            "49": 4593253888.0,
+            "50": 4593253888.0,
+            "51": 4593253888.0,
+            "52": 4593253888.0,
+            "53": 4593253888.0,
+            "54": 4593253888.0,
+            "55": 4593253888.0,
+            "56": 4593253888.0,
+            "57": 4593253888.0,
+            "58": 4593253888.0,
+            "59": 4593253888.0,
+            "60": 4593253888.0,
+            "61": 4593253888.0,
+            "62": 4593253888.0,
+            "63": 4593253888.0,
+            "64": 4593253888.0,
+            "65": 4593253888.0,
+            "66": 4593253888.0,
+            "67": 4593253888.0,
+            "68": 4593253888.0,
+            "69": 4593253888.0,
+            "70": 4593253888.0,
+            "71": 4593253888.0,
+            "72": 4593253888.0,
+            "73": 4593253888.0,
+            "74": 4593253888.0,
+            "75": 4593253888.0,
+            "76": 4593253888.0,
+            "77": 4593253888.0,
+            "78": 4593253888.0,
+            "79": 4593253888.0,
+            "80": 4593253888.0,
+            "81": 4593253888.0,
+            "82": 4593253888.0,
+            "83": 4593253888.0,
+            "84": 4593253888.0,
+            "85": 4593253888.0,
+            "86": 4593253888.0,
+            "87": 4593253888.0,
+            "88": 4593253888.0,
+            "89": 4593253888.0,
+            "90": 4593253888.0,
+            "91": 4593253888.0,
+            "92": 4593253888.0,
+            "93": 4593253888.0,
+            "94": 4593253888.0,
+            "95": 4593253888.0,
+            "96": 4593253888.0,
+            "97": 4593253888.0,
+            "98": 4593253888.0,
+            "99": 4593253888.0,
+            "100": 4593253888.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 21.09115,
+            "2": 0.41164,
+            "3": 0.38182,
+            "4": 0.38049,
+            "5": 0.60969,
+            "6": 0.36583,
+            "7": 0.36416,
+            "8": 0.37604,
+            "9": 0.3679,
+            "10": 0.36785,
+            "11": 0.36954,
+            "12": 0.36975,
+            "13": 0.36874,
+            "14": 0.36917,
+            "15": 0.37218,
+            "16": 0.37039,
+            "17": 0.36749,
+            "18": 0.36956,
+            "19": 0.37349,
+            "20": 0.37202,
+            "21": 0.36788,
+            "22": 0.37092,
+            "23": 0.36616,
+            "24": 0.36575,
+            "25": 0.36576,
+            "26": 0.36657,
+            "27": 0.36754,
+            "28": 0.36677,
+            "29": 0.36466,
+            "30": 0.36792,
+            "31": 0.36536,
+            "32": 0.36562,
+            "33": 0.36872,
+            "34": 0.36339,
+            "35": 0.36568,
+            "36": 0.36568,
+            "37": 0.36366,
+            "38": 0.36485,
+            "39": 0.36421,
+            "40": 0.35995,
+            "41": 0.36131,
+            "42": 0.36351,
+            "43": 0.36398,
+            "44": 0.3645,
+            "45": 0.359,
+            "46": 0.3614,
+            "47": 0.35954,
+            "48": 0.36106,
+            "49": 0.36508,
+            "50": 0.36162,
+            "51": 0.36692,
+            "52": 0.36519,
+            "53": 0.3602,
+            "54": 0.36089,
+            "55": 0.36195,
+            "56": 0.35943,
+            "57": 0.36048,
+            "58": 0.36032,
+            "59": 0.36446,
+            "60": 0.36455,
+            "61": 0.36016,
+            "62": 0.36345,
+            "63": 0.3602,
+            "64": 0.36067,
+            "65": 0.36076,
+            "66": 0.36538,
+            "67": 0.57124,
+            "68": 0.36375,
+            "69": 0.36298,
+            "70": 0.3623,
+            "71": 0.36583,
+            "72": 0.36199,
+            "73": 0.36503,
+            "74": 0.3612,
+            "75": 0.36467,
+            "76": 0.36386,
+            "77": 0.36345,
+            "78": 0.36764,
+            "79": 0.36585,
+            "80": 0.36636,
+            "81": 0.36354,
+            "82": 0.36426,
+            "83": 0.36781,
+            "84": 0.58958,
+            "85": 0.36576,
+            "86": 0.36705,
+            "87": 0.36285,
+            "88": 0.3685,
+            "89": 0.36603,
+            "90": 0.36553,
+            "91": 0.36328,
+            "92": 0.36279,
+            "93": 0.36243,
+            "94": 0.3647,
+            "95": 0.3673,
+            "96": 0.36551,
+            "97": 0.36297,
+            "98": 0.36326,
+            "99": 0.3621,
+            "100": 0.36226
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
index 74df36b8e05..ef753336010 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
@@ -1 +1,287 @@
-{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.83936, "5": 10.87939, "10": 10.8926, "15": 10.83088, "20": 10.6635, "25": 10.50497, "30": 10.42916, "35": 9.99632, "40": 10.12495, "45": 9.71369, "50": 9.96042}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1026.0, "5": 1259.0, "10": 1319.0, "15": 1217.0, "20": 1019.0, "25": 1066.0, "30": 1532.0, "35": 1235.0, "40": 1513.0, "45": 1501.0, "50": 1639.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 763220480.0, "5": 763220480.0, "10": 763220480.0, "15": 763220480.0, "20": 763220480.0, "25": 763220480.0, "30": 763220480.0, "35": 763220480.0, "40": 763220480.0, "45": 763220480.0, "50": 763220480.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 4342344704.0, "5": 4626153472.0, "10": 4626153472.0, "15": 4626153472.0, "20": 4626153472.0, "25": 4626153472.0, "30": 4626153472.0, "35": 4626153472.0, "40": 4626153472.0, "45": 4626153472.0, "50": 4626153472.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 13.4691, "5": 0.23174, "10": 0.22417, "15": 0.22833, "20": 0.22378, "25": 0.23805, "30": 0.22623, "35": 0.22839, "40": 0.22689, "45": 0.22807, "50": 0.22843}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.83936,
+            "2": 10.8442,
+            "3": 10.86813,
+            "4": 10.86022,
+            "5": 10.87939,
+            "6": 10.85969,
+            "7": 10.86386,
+            "8": 10.8444,
+            "9": 10.88995,
+            "10": 10.8926,
+            "11": 10.89136,
+            "12": 10.85312,
+            "13": 10.87319,
+            "14": 10.83805,
+            "15": 10.83088,
+            "16": 10.82011,
+            "17": 10.79138,
+            "18": 10.81055,
+            "19": 10.77977,
+            "20": 10.6635,
+            "21": 10.69765,
+            "22": 10.67421,
+            "23": 10.77344,
+            "24": 10.63919,
+            "25": 10.50497,
+            "26": 10.61911,
+            "27": 10.56921,
+            "28": 10.46859,
+            "29": 10.41119,
+            "30": 10.42916,
+            "31": 10.52553,
+            "32": 10.34942,
+            "33": 10.2967,
+            "34": 10.46909,
+            "35": 9.99632,
+            "36": 10.13945,
+            "37": 10.0434,
+            "38": 10.4139,
+            "39": 9.80941,
+            "40": 10.12495,
+            "41": 10.14883,
+            "42": 10.04042,
+            "43": 10.22142,
+            "44": 10.07348,
+            "45": 9.71369,
+            "46": 10.00449,
+            "47": 9.94758,
+            "48": 9.68856,
+            "49": 9.93637,
+            "50": 9.96042
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1026.0,
+            "2": 1184.0,
+            "3": 1226.0,
+            "4": 1248.0,
+            "5": 1259.0,
+            "6": 1421.0,
+            "7": 1182.0,
+            "8": 1036.0,
+            "9": 1293.0,
+            "10": 1319.0,
+            "11": 1212.0,
+            "12": 1373.0,
+            "13": 1327.0,
+            "14": 1121.0,
+            "15": 1217.0,
+            "16": 1163.0,
+            "17": 1246.0,
+            "18": 1280.0,
+            "19": 1128.0,
+            "20": 1019.0,
+            "21": 1147.0,
+            "22": 1156.0,
+            "23": 1341.0,
+            "24": 1312.0,
+            "25": 1066.0,
+            "26": 1138.0,
+            "27": 1270.0,
+            "28": 1260.0,
+            "29": 1292.0,
+            "30": 1532.0,
+            "31": 1477.0,
+            "32": 1460.0,
+            "33": 1537.0,
+            "34": 1513.0,
+            "35": 1235.0,
+            "36": 1316.0,
+            "37": 1466.0,
+            "38": 1564.0,
+            "39": 1380.0,
+            "40": 1513.0,
+            "41": 1633.0,
+            "42": 1509.0,
+            "43": 1731.0,
+            "44": 1636.0,
+            "45": 1501.0,
+            "46": 1884.0,
+            "47": 1567.0,
+            "48": 1631.0,
+            "49": 1825.0,
+            "50": 1639.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 759681536.0,
+            "2": 759681536.0,
+            "3": 759681536.0,
+            "4": 759681536.0,
+            "5": 759681536.0,
+            "6": 759681536.0,
+            "7": 759681536.0,
+            "8": 759681536.0,
+            "9": 759681536.0,
+            "10": 759681536.0,
+            "11": 759681536.0,
+            "12": 759681536.0,
+            "13": 759681536.0,
+            "14": 759681536.0,
+            "15": 759681536.0,
+            "16": 759681536.0,
+            "17": 759681536.0,
+            "18": 759681536.0,
+            "19": 759681536.0,
+            "20": 759681536.0,
+            "21": 759681536.0,
+            "22": 759681536.0,
+            "23": 759681536.0,
+            "24": 759681536.0,
+            "25": 759681536.0,
+            "26": 759681536.0,
+            "27": 759681536.0,
+            "28": 759681536.0,
+            "29": 759681536.0,
+            "30": 759681536.0,
+            "31": 759681536.0,
+            "32": 759681536.0,
+            "33": 759681536.0,
+            "34": 759681536.0,
+            "35": 759681536.0,
+            "36": 759681536.0,
+            "37": 759681536.0,
+            "38": 759681536.0,
+            "39": 759681536.0,
+            "40": 759681536.0,
+            "41": 759681536.0,
+            "42": 759681536.0,
+            "43": 759681536.0,
+            "44": 759681536.0,
+            "45": 759681536.0,
+            "46": 759681536.0,
+            "47": 759681536.0,
+            "48": 759681536.0,
+            "49": 759681536.0,
+            "50": 759681536.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 4340902912.0,
+            "2": 4622614528.0,
+            "3": 4622614528.0,
+            "4": 4622614528.0,
+            "5": 4622614528.0,
+            "6": 4622614528.0,
+            "7": 4622614528.0,
+            "8": 4622614528.0,
+            "9": 4622614528.0,
+            "10": 4622614528.0,
+            "11": 4622614528.0,
+            "12": 4622614528.0,
+            "13": 4622614528.0,
+            "14": 4622614528.0,
+            "15": 4622614528.0,
+            "16": 4622614528.0,
+            "17": 4622614528.0,
+            "18": 4622614528.0,
+            "19": 4622614528.0,
+            "20": 4622614528.0,
+            "21": 4622614528.0,
+            "22": 4622614528.0,
+            "23": 4622614528.0,
+            "24": 4622614528.0,
+            "25": 4622614528.0,
+            "26": 4622614528.0,
+            "27": 4622614528.0,
+            "28": 4622614528.0,
+            "29": 4622614528.0,
+            "30": 4622614528.0,
+            "31": 4622614528.0,
+            "32": 4622614528.0,
+            "33": 4622614528.0,
+            "34": 4622614528.0,
+            "35": 4622614528.0,
+            "36": 4622614528.0,
+            "37": 4622614528.0,
+            "38": 4622614528.0,
+            "39": 4622614528.0,
+            "40": 4622614528.0,
+            "41": 4622614528.0,
+            "42": 4622614528.0,
+            "43": 4622614528.0,
+            "44": 4622614528.0,
+            "45": 4622614528.0,
+            "46": 4622614528.0,
+            "47": 4622614528.0,
+            "48": 4622614528.0,
+            "49": 4622614528.0,
+            "50": 4622614528.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 12.91878,
+            "2": 0.30301,
+            "3": 0.26726,
+            "4": 0.26031,
+            "5": 0.25815,
+            "6": 0.26195,
+            "7": 0.26064,
+            "8": 0.26459,
+            "9": 0.25765,
+            "10": 0.26159,
+            "11": 0.25801,
+            "12": 0.2577,
+            "13": 0.25882,
+            "14": 0.25879,
+            "15": 0.25853,
+            "16": 0.25689,
+            "17": 0.25763,
+            "18": 0.26042,
+            "19": 0.25687,
+            "20": 0.25459,
+            "21": 0.25315,
+            "22": 0.2615,
+            "23": 0.25473,
+            "24": 0.2558,
+            "25": 0.25524,
+            "26": 0.25354,
+            "27": 0.25658,
+            "28": 0.25019,
+            "29": 0.2622,
+            "30": 0.25785,
+            "31": 0.25516,
+            "32": 0.25092,
+            "33": 0.25655,
+            "34": 0.25493,
+            "35": 0.2541,
+            "36": 0.25492,
+            "37": 0.25229,
+            "38": 0.25775,
+            "39": 0.25432,
+            "40": 0.25358,
+            "41": 0.25502,
+            "42": 0.25428,
+            "43": 0.25111,
+            "44": 0.25239,
+            "45": 0.25573,
+            "46": 0.25505,
+            "47": 0.25199,
+            "48": 0.25057,
+            "49": 0.25588,
+            "50": 0.2569
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..67c8ef8abff
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.83936,
+            "2": 10.8442,
+            "3": 10.86813,
+            "4": 10.86022,
+            "5": 10.87939,
+            "6": 10.85969,
+            "7": 10.86386,
+            "8": 10.8444,
+            "9": 10.88995,
+            "10": 10.8926,
+            "11": 10.89136,
+            "12": 10.85312,
+            "13": 10.87319,
+            "14": 10.83805,
+            "15": 10.83088,
+            "16": 10.82011,
+            "17": 10.79138,
+            "18": 10.81055,
+            "19": 10.77977,
+            "20": 10.6635,
+            "21": 10.69765,
+            "22": 10.67421,
+            "23": 10.77344,
+            "24": 10.63919,
+            "25": 10.50497,
+            "26": 10.61911,
+            "27": 10.56921,
+            "28": 10.46859,
+            "29": 10.41119,
+            "30": 10.42916,
+            "31": 10.52553,
+            "32": 10.34942,
+            "33": 10.2967,
+            "34": 10.46909,
+            "35": 9.99632,
+            "36": 10.13945,
+            "37": 10.0434,
+            "38": 10.4139,
+            "39": 9.80941,
+            "40": 10.12495,
+            "41": 10.14883,
+            "42": 10.04042,
+            "43": 10.22142,
+            "44": 10.07348,
+            "45": 9.71369,
+            "46": 10.00449,
+            "47": 9.94758,
+            "48": 9.68856,
+            "49": 9.93637,
+            "50": 9.96042
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1026.0,
+            "2": 1184.0,
+            "3": 1226.0,
+            "4": 1248.0,
+            "5": 1259.0,
+            "6": 1421.0,
+            "7": 1182.0,
+            "8": 1036.0,
+            "9": 1293.0,
+            "10": 1319.0,
+            "11": 1212.0,
+            "12": 1373.0,
+            "13": 1327.0,
+            "14": 1121.0,
+            "15": 1217.0,
+            "16": 1163.0,
+            "17": 1246.0,
+            "18": 1280.0,
+            "19": 1128.0,
+            "20": 1019.0,
+            "21": 1147.0,
+            "22": 1156.0,
+            "23": 1341.0,
+            "24": 1312.0,
+            "25": 1066.0,
+            "26": 1138.0,
+            "27": 1270.0,
+            "28": 1260.0,
+            "29": 1292.0,
+            "30": 1532.0,
+            "31": 1477.0,
+            "32": 1460.0,
+            "33": 1537.0,
+            "34": 1513.0,
+            "35": 1235.0,
+            "36": 1316.0,
+            "37": 1466.0,
+            "38": 1564.0,
+            "39": 1380.0,
+            "40": 1513.0,
+            "41": 1633.0,
+            "42": 1509.0,
+            "43": 1731.0,
+            "44": 1636.0,
+            "45": 1501.0,
+            "46": 1884.0,
+            "47": 1567.0,
+            "48": 1631.0,
+            "49": 1825.0,
+            "50": 1639.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 759681536.0,
+            "2": 759681536.0,
+            "3": 759681536.0,
+            "4": 759681536.0,
+            "5": 759681536.0,
+            "6": 759681536.0,
+            "7": 759681536.0,
+            "8": 759681536.0,
+            "9": 759681536.0,
+            "10": 759681536.0,
+            "11": 759681536.0,
+            "12": 759681536.0,
+            "13": 759681536.0,
+            "14": 759681536.0,
+            "15": 759681536.0,
+            "16": 759681536.0,
+            "17": 759681536.0,
+            "18": 759681536.0,
+            "19": 759681536.0,
+            "20": 759681536.0,
+            "21": 759681536.0,
+            "22": 759681536.0,
+            "23": 759681536.0,
+            "24": 759681536.0,
+            "25": 759681536.0,
+            "26": 759681536.0,
+            "27": 759681536.0,
+            "28": 759681536.0,
+            "29": 759681536.0,
+            "30": 759681536.0,
+            "31": 759681536.0,
+            "32": 759681536.0,
+            "33": 759681536.0,
+            "34": 759681536.0,
+            "35": 759681536.0,
+            "36": 759681536.0,
+            "37": 759681536.0,
+            "38": 759681536.0,
+            "39": 759681536.0,
+            "40": 759681536.0,
+            "41": 759681536.0,
+            "42": 759681536.0,
+            "43": 759681536.0,
+            "44": 759681536.0,
+            "45": 759681536.0,
+            "46": 759681536.0,
+            "47": 759681536.0,
+            "48": 759681536.0,
+            "49": 759681536.0,
+            "50": 759681536.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 4340902912.0,
+            "2": 4622614528.0,
+            "3": 4622614528.0,
+            "4": 4622614528.0,
+            "5": 4622614528.0,
+            "6": 4622614528.0,
+            "7": 4622614528.0,
+            "8": 4622614528.0,
+            "9": 4622614528.0,
+            "10": 4622614528.0,
+            "11": 4622614528.0,
+            "12": 4622614528.0,
+            "13": 4622614528.0,
+            "14": 4622614528.0,
+            "15": 4622614528.0,
+            "16": 4622614528.0,
+            "17": 4622614528.0,
+            "18": 4622614528.0,
+            "19": 4622614528.0,
+            "20": 4622614528.0,
+            "21": 4622614528.0,
+            "22": 4622614528.0,
+            "23": 4622614528.0,
+            "24": 4622614528.0,
+            "25": 4622614528.0,
+            "26": 4622614528.0,
+            "27": 4622614528.0,
+            "28": 4622614528.0,
+            "29": 4622614528.0,
+            "30": 4622614528.0,
+            "31": 4622614528.0,
+            "32": 4622614528.0,
+            "33": 4622614528.0,
+            "34": 4622614528.0,
+            "35": 4622614528.0,
+            "36": 4622614528.0,
+            "37": 4622614528.0,
+            "38": 4622614528.0,
+            "39": 4622614528.0,
+            "40": 4622614528.0,
+            "41": 4622614528.0,
+            "42": 4622614528.0,
+            "43": 4622614528.0,
+            "44": 4622614528.0,
+            "45": 4622614528.0,
+            "46": 4622614528.0,
+            "47": 4622614528.0,
+            "48": 4622614528.0,
+            "49": 4622614528.0,
+            "50": 4622614528.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 13.91724,
+            "2": 0.27573,
+            "3": 0.23467,
+            "4": 0.23594,
+            "5": 0.23302,
+            "6": 0.23216,
+            "7": 0.23399,
+            "8": 0.23423,
+            "9": 0.23365,
+            "10": 0.23211,
+            "11": 0.2332,
+            "12": 0.23283,
+            "13": 0.23445,
+            "14": 0.23405,
+            "15": 0.23349,
+            "16": 0.23298,
+            "17": 0.23305,
+            "18": 0.23251,
+            "19": 0.23322,
+            "20": 0.23348,
+            "21": 0.23189,
+            "22": 0.23316,
+            "23": 0.2316,
+            "24": 0.23233,
+            "25": 0.23512,
+            "26": 0.23232,
+            "27": 0.23306,
+            "28": 0.23244,
+            "29": 0.23331,
+            "30": 0.23258,
+            "31": 0.23311,
+            "32": 0.23326,
+            "33": 0.23418,
+            "34": 0.23411,
+            "35": 0.23489,
+            "36": 0.2317,
+            "37": 0.23483,
+            "38": 0.23235,
+            "39": 0.23511,
+            "40": 0.23413,
+            "41": 0.23395,
+            "42": 0.23405,
+            "43": 0.23331,
+            "44": 0.23297,
+            "45": 0.23473,
+            "46": 0.23192,
+            "47": 0.23377,
+            "48": 0.23322,
+            "49": 0.23042,
+            "50": 0.23263
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..5e0ca24c497
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.83936,
+            "2": 10.8442,
+            "3": 10.86813,
+            "4": 10.86022,
+            "5": 10.87939,
+            "6": 10.85969,
+            "7": 10.86386,
+            "8": 10.8444,
+            "9": 10.88995,
+            "10": 10.8926,
+            "11": 10.89136,
+            "12": 10.85312,
+            "13": 10.87319,
+            "14": 10.83805,
+            "15": 10.83088,
+            "16": 10.82011,
+            "17": 10.79138,
+            "18": 10.81055,
+            "19": 10.77977,
+            "20": 10.6635,
+            "21": 10.69765,
+            "22": 10.67421,
+            "23": 10.77344,
+            "24": 10.63919,
+            "25": 10.50497,
+            "26": 10.61911,
+            "27": 10.56921,
+            "28": 10.46859,
+            "29": 10.41119,
+            "30": 10.42916,
+            "31": 10.52553,
+            "32": 10.34942,
+            "33": 10.2967,
+            "34": 10.46909,
+            "35": 9.99632,
+            "36": 10.13945,
+            "37": 10.0434,
+            "38": 10.4139,
+            "39": 9.80941,
+            "40": 10.12495,
+            "41": 10.14883,
+            "42": 10.04042,
+            "43": 10.22142,
+            "44": 10.07348,
+            "45": 9.71369,
+            "46": 10.00449,
+            "47": 9.94758,
+            "48": 9.68856,
+            "49": 9.93637,
+            "50": 9.96042
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1026.0,
+            "2": 1184.0,
+            "3": 1226.0,
+            "4": 1248.0,
+            "5": 1259.0,
+            "6": 1421.0,
+            "7": 1182.0,
+            "8": 1036.0,
+            "9": 1293.0,
+            "10": 1319.0,
+            "11": 1212.0,
+            "12": 1373.0,
+            "13": 1327.0,
+            "14": 1121.0,
+            "15": 1217.0,
+            "16": 1163.0,
+            "17": 1246.0,
+            "18": 1280.0,
+            "19": 1128.0,
+            "20": 1019.0,
+            "21": 1147.0,
+            "22": 1156.0,
+            "23": 1341.0,
+            "24": 1312.0,
+            "25": 1066.0,
+            "26": 1138.0,
+            "27": 1270.0,
+            "28": 1260.0,
+            "29": 1292.0,
+            "30": 1532.0,
+            "31": 1477.0,
+            "32": 1460.0,
+            "33": 1537.0,
+            "34": 1513.0,
+            "35": 1235.0,
+            "36": 1316.0,
+            "37": 1466.0,
+            "38": 1564.0,
+            "39": 1380.0,
+            "40": 1513.0,
+            "41": 1633.0,
+            "42": 1509.0,
+            "43": 1731.0,
+            "44": 1636.0,
+            "45": 1501.0,
+            "46": 1884.0,
+            "47": 1567.0,
+            "48": 1631.0,
+            "49": 1825.0,
+            "50": 1639.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 759681536.0,
+            "2": 759681536.0,
+            "3": 759681536.0,
+            "4": 759681536.0,
+            "5": 759681536.0,
+            "6": 759681536.0,
+            "7": 759681536.0,
+            "8": 759681536.0,
+            "9": 759681536.0,
+            "10": 759681536.0,
+            "11": 759681536.0,
+            "12": 759681536.0,
+            "13": 759681536.0,
+            "14": 759681536.0,
+            "15": 759681536.0,
+            "16": 759681536.0,
+            "17": 759681536.0,
+            "18": 759681536.0,
+            "19": 759681536.0,
+            "20": 759681536.0,
+            "21": 759681536.0,
+            "22": 759681536.0,
+            "23": 759681536.0,
+            "24": 759681536.0,
+            "25": 759681536.0,
+            "26": 759681536.0,
+            "27": 759681536.0,
+            "28": 759681536.0,
+            "29": 759681536.0,
+            "30": 759681536.0,
+            "31": 759681536.0,
+            "32": 759681536.0,
+            "33": 759681536.0,
+            "34": 759681536.0,
+            "35": 759681536.0,
+            "36": 759681536.0,
+            "37": 759681536.0,
+            "38": 759681536.0,
+            "39": 759681536.0,
+            "40": 759681536.0,
+            "41": 759681536.0,
+            "42": 759681536.0,
+            "43": 759681536.0,
+            "44": 759681536.0,
+            "45": 759681536.0,
+            "46": 759681536.0,
+            "47": 759681536.0,
+            "48": 759681536.0,
+            "49": 759681536.0,
+            "50": 759681536.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 4340902912.0,
+            "2": 4622614528.0,
+            "3": 4622614528.0,
+            "4": 4622614528.0,
+            "5": 4622614528.0,
+            "6": 4622614528.0,
+            "7": 4622614528.0,
+            "8": 4622614528.0,
+            "9": 4622614528.0,
+            "10": 4622614528.0,
+            "11": 4622614528.0,
+            "12": 4622614528.0,
+            "13": 4622614528.0,
+            "14": 4622614528.0,
+            "15": 4622614528.0,
+            "16": 4622614528.0,
+            "17": 4622614528.0,
+            "18": 4622614528.0,
+            "19": 4622614528.0,
+            "20": 4622614528.0,
+            "21": 4622614528.0,
+            "22": 4622614528.0,
+            "23": 4622614528.0,
+            "24": 4622614528.0,
+            "25": 4622614528.0,
+            "26": 4622614528.0,
+            "27": 4622614528.0,
+            "28": 4622614528.0,
+            "29": 4622614528.0,
+            "30": 4622614528.0,
+            "31": 4622614528.0,
+            "32": 4622614528.0,
+            "33": 4622614528.0,
+            "34": 4622614528.0,
+            "35": 4622614528.0,
+            "36": 4622614528.0,
+            "37": 4622614528.0,
+            "38": 4622614528.0,
+            "39": 4622614528.0,
+            "40": 4622614528.0,
+            "41": 4622614528.0,
+            "42": 4622614528.0,
+            "43": 4622614528.0,
+            "44": 4622614528.0,
+            "45": 4622614528.0,
+            "46": 4622614528.0,
+            "47": 4622614528.0,
+            "48": 4622614528.0,
+            "49": 4622614528.0,
+            "50": 4622614528.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 12.785,
+            "2": 0.28429,
+            "3": 0.25654,
+            "4": 0.25675,
+            "5": 0.25763,
+            "6": 0.25556,
+            "7": 0.25403,
+            "8": 0.25276,
+            "9": 0.25351,
+            "10": 0.25546,
+            "11": 0.25488,
+            "12": 0.25607,
+            "13": 0.25404,
+            "14": 0.25256,
+            "15": 0.25733,
+            "16": 0.25987,
+            "17": 0.25778,
+            "18": 0.25053,
+            "19": 0.25288,
+            "20": 0.258,
+            "21": 0.25606,
+            "22": 0.25231,
+            "23": 0.25223,
+            "24": 0.26464,
+            "25": 0.26469,
+            "26": 0.25015,
+            "27": 0.25378,
+            "28": 0.25459,
+            "29": 0.26134,
+            "30": 0.26129,
+            "31": 0.2595,
+            "32": 0.26444,
+            "33": 0.25568,
+            "34": 0.25514,
+            "35": 0.25087,
+            "36": 0.25275,
+            "37": 0.25383,
+            "38": 0.24953,
+            "39": 0.24996,
+            "40": 0.25393,
+            "41": 0.25556,
+            "42": 0.25158,
+            "43": 0.25124,
+            "44": 0.25,
+            "45": 0.25586,
+            "46": 0.26057,
+            "47": 0.25868,
+            "48": 0.26304,
+            "49": 0.2615,
+            "50": 0.26261
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
new file mode 100644
index 00000000000..2685ca10966
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.81184,
+            "2": 10.80964,
+            "3": 10.8261,
+            "4": 10.83055,
+            "5": 10.85464,
+            "6": 10.84052,
+            "7": 10.83581,
+            "8": 10.80288,
+            "9": 10.87748,
+            "10": 10.88256,
+            "11": 10.87624,
+            "12": 10.82598,
+            "13": 10.84134,
+            "14": 10.81521,
+            "15": 10.80679,
+            "16": 10.79904,
+            "17": 10.76842,
+            "18": 10.77939,
+            "19": 10.75192,
+            "20": 10.63196,
+            "21": 10.68212,
+            "22": 10.63985,
+            "23": 10.75592,
+            "24": 10.60961,
+            "25": 10.47374,
+            "26": 10.59698,
+            "27": 10.54094,
+            "28": 10.44971,
+            "29": 10.39259,
+            "30": 10.39285,
+            "31": 10.49257,
+            "32": 10.31859,
+            "33": 10.27757,
+            "34": 10.44435,
+            "35": 9.96791,
+            "36": 10.11232,
+            "37": 10.02385,
+            "38": 10.37514,
+            "39": 9.78682,
+            "40": 10.1,
+            "41": 10.12396,
+            "42": 10.03,
+            "43": 10.19936,
+            "44": 10.0547,
+            "45": 9.68344,
+            "46": 9.98163,
+            "47": 9.92505,
+            "48": 9.6694,
+            "49": 9.91809,
+            "50": 9.92465
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1125.0,
+            "2": 1177.0,
+            "3": 1265.0,
+            "4": 1241.0,
+            "5": 1255.0,
+            "6": 1304.0,
+            "7": 1204.0,
+            "8": 998.0,
+            "9": 1236.0,
+            "10": 1367.0,
+            "11": 1252.0,
+            "12": 1281.0,
+            "13": 1254.0,
+            "14": 1148.0,
+            "15": 1127.0,
+            "16": 1102.0,
+            "17": 1193.0,
+            "18": 1248.0,
+            "19": 1072.0,
+            "20": 1082.0,
+            "21": 1201.0,
+            "22": 1302.0,
+            "23": 1336.0,
+            "24": 1317.0,
+            "25": 1114.0,
+            "26": 1200.0,
+            "27": 1255.0,
+            "28": 1323.0,
+            "29": 1288.0,
+            "30": 1558.0,
+            "31": 1489.0,
+            "32": 1390.0,
+            "33": 1413.0,
+            "34": 1518.0,
+            "35": 1292.0,
+            "36": 1395.0,
+            "37": 1487.0,
+            "38": 1573.0,
+            "39": 1376.0,
+            "40": 1433.0,
+            "41": 1677.0,
+            "42": 1728.0,
+            "43": 1669.0,
+            "44": 1607.0,
+            "45": 1564.0,
+            "46": 1874.0,
+            "47": 1660.0,
+            "48": 1554.0,
+            "49": 1781.0,
+            "50": 1749.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 730320896.0,
+            "2": 730320896.0,
+            "3": 730320896.0,
+            "4": 730320896.0,
+            "5": 730320896.0,
+            "6": 730320896.0,
+            "7": 730320896.0,
+            "8": 730320896.0,
+            "9": 730320896.0,
+            "10": 730320896.0,
+            "11": 730320896.0,
+            "12": 730320896.0,
+            "13": 730320896.0,
+            "14": 730320896.0,
+            "15": 730320896.0,
+            "16": 730320896.0,
+            "17": 730320896.0,
+            "18": 730320896.0,
+            "19": 730320896.0,
+            "20": 730320896.0,
+            "21": 730320896.0,
+            "22": 730320896.0,
+            "23": 730320896.0,
+            "24": 730320896.0,
+            "25": 730320896.0,
+            "26": 730320896.0,
+            "27": 730320896.0,
+            "28": 730320896.0,
+            "29": 730320896.0,
+            "30": 730320896.0,
+            "31": 730320896.0,
+            "32": 730320896.0,
+            "33": 730320896.0,
+            "34": 730320896.0,
+            "35": 730320896.0,
+            "36": 730320896.0,
+            "37": 730320896.0,
+            "38": 730320896.0,
+            "39": 730320896.0,
+            "40": 730320896.0,
+            "41": 730320896.0,
+            "42": 730320896.0,
+            "43": 730320896.0,
+            "44": 730320896.0,
+            "45": 730320896.0,
+            "46": 730320896.0,
+            "47": 730320896.0,
+            "48": 730320896.0,
+            "49": 730320896.0,
+            "50": 730320896.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 4311542272.0,
+            "2": 4593253888.0,
+            "3": 4593253888.0,
+            "4": 4593253888.0,
+            "5": 4593253888.0,
+            "6": 4593253888.0,
+            "7": 4593253888.0,
+            "8": 4593253888.0,
+            "9": 4593253888.0,
+            "10": 4593253888.0,
+            "11": 4593253888.0,
+            "12": 4593253888.0,
+            "13": 4593253888.0,
+            "14": 4593253888.0,
+            "15": 4593253888.0,
+            "16": 4593253888.0,
+            "17": 4593253888.0,
+            "18": 4593253888.0,
+            "19": 4593253888.0,
+            "20": 4593253888.0,
+            "21": 4593253888.0,
+            "22": 4593253888.0,
+            "23": 4593253888.0,
+            "24": 4593253888.0,
+            "25": 4593253888.0,
+            "26": 4593253888.0,
+            "27": 4593253888.0,
+            "28": 4593253888.0,
+            "29": 4593253888.0,
+            "30": 4593253888.0,
+            "31": 4593253888.0,
+            "32": 4593253888.0,
+            "33": 4593253888.0,
+            "34": 4593253888.0,
+            "35": 4593253888.0,
+            "36": 4593253888.0,
+            "37": 4593253888.0,
+            "38": 4593253888.0,
+            "39": 4593253888.0,
+            "40": 4593253888.0,
+            "41": 4593253888.0,
+            "42": 4593253888.0,
+            "43": 4593253888.0,
+            "44": 4593253888.0,
+            "45": 4593253888.0,
+            "46": 4593253888.0,
+            "47": 4593253888.0,
+            "48": 4593253888.0,
+            "49": 4593253888.0,
+            "50": 4593253888.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 23.63558,
+            "2": 0.38944,
+            "3": 0.36089,
+            "4": 0.36151,
+            "5": 0.5961,
+            "6": 0.35637,
+            "7": 0.35787,
+            "8": 0.35755,
+            "9": 0.35356,
+            "10": 0.35923,
+            "11": 0.35827,
+            "12": 0.35689,
+            "13": 0.97539,
+            "14": 0.35703,
+            "15": 0.35633,
+            "16": 0.35889,
+            "17": 0.35586,
+            "18": 0.35688,
+            "19": 0.35645,
+            "20": 0.35976,
+            "21": 0.35733,
+            "22": 0.35708,
+            "23": 0.35968,
+            "24": 0.35728,
+            "25": 0.35727,
+            "26": 0.35822,
+            "27": 0.35734,
+            "28": 0.35672,
+            "29": 0.35566,
+            "30": 0.35576,
+            "31": 0.35716,
+            "32": 0.35824,
+            "33": 0.35667,
+            "34": 0.35897,
+            "35": 0.35713,
+            "36": 0.35482,
+            "37": 0.35925,
+            "38": 0.35547,
+            "39": 0.35781,
+            "40": 0.35516,
+            "41": 0.35633,
+            "42": 0.35674,
+            "43": 0.35645,
+            "44": 0.35797,
+            "45": 0.35717,
+            "46": 0.35635,
+            "47": 0.35374,
+            "48": 0.35743,
+            "49": 0.35664,
+            "50": 0.35474
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
new file mode 100644
index 00000000000..516c7e99194
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.81184,
+            "2": 10.80964,
+            "3": 10.8261,
+            "4": 10.83055,
+            "5": 10.85464,
+            "6": 10.84052,
+            "7": 10.83581,
+            "8": 10.80288,
+            "9": 10.87748,
+            "10": 10.88256,
+            "11": 10.87624,
+            "12": 10.82598,
+            "13": 10.84134,
+            "14": 10.81521,
+            "15": 10.80679,
+            "16": 10.79904,
+            "17": 10.76842,
+            "18": 10.77939,
+            "19": 10.75192,
+            "20": 10.63196,
+            "21": 10.68212,
+            "22": 10.63985,
+            "23": 10.75592,
+            "24": 10.60961,
+            "25": 10.47374,
+            "26": 10.59698,
+            "27": 10.54094,
+            "28": 10.44971,
+            "29": 10.39259,
+            "30": 10.39285,
+            "31": 10.49257,
+            "32": 10.31859,
+            "33": 10.27757,
+            "34": 10.44435,
+            "35": 9.96791,
+            "36": 10.11232,
+            "37": 10.02385,
+            "38": 10.37514,
+            "39": 9.78682,
+            "40": 10.1,
+            "41": 10.12396,
+            "42": 10.03,
+            "43": 10.19936,
+            "44": 10.0547,
+            "45": 9.68344,
+            "46": 9.98163,
+            "47": 9.92505,
+            "48": 9.6694,
+            "49": 9.91809,
+            "50": 9.92465
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1125.0,
+            "2": 1177.0,
+            "3": 1265.0,
+            "4": 1241.0,
+            "5": 1255.0,
+            "6": 1304.0,
+            "7": 1204.0,
+            "8": 998.0,
+            "9": 1236.0,
+            "10": 1367.0,
+            "11": 1252.0,
+            "12": 1281.0,
+            "13": 1254.0,
+            "14": 1148.0,
+            "15": 1127.0,
+            "16": 1102.0,
+            "17": 1193.0,
+            "18": 1248.0,
+            "19": 1072.0,
+            "20": 1082.0,
+            "21": 1201.0,
+            "22": 1302.0,
+            "23": 1336.0,
+            "24": 1317.0,
+            "25": 1114.0,
+            "26": 1200.0,
+            "27": 1255.0,
+            "28": 1323.0,
+            "29": 1288.0,
+            "30": 1558.0,
+            "31": 1489.0,
+            "32": 1390.0,
+            "33": 1413.0,
+            "34": 1518.0,
+            "35": 1292.0,
+            "36": 1395.0,
+            "37": 1487.0,
+            "38": 1573.0,
+            "39": 1376.0,
+            "40": 1433.0,
+            "41": 1677.0,
+            "42": 1728.0,
+            "43": 1669.0,
+            "44": 1607.0,
+            "45": 1564.0,
+            "46": 1874.0,
+            "47": 1660.0,
+            "48": 1554.0,
+            "49": 1781.0,
+            "50": 1749.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 730320896.0,
+            "2": 730320896.0,
+            "3": 730320896.0,
+            "4": 730320896.0,
+            "5": 730320896.0,
+            "6": 730320896.0,
+            "7": 730320896.0,
+            "8": 730320896.0,
+            "9": 730320896.0,
+            "10": 730320896.0,
+            "11": 730320896.0,
+            "12": 730320896.0,
+            "13": 730320896.0,
+            "14": 730320896.0,
+            "15": 730320896.0,
+            "16": 730320896.0,
+            "17": 730320896.0,
+            "18": 730320896.0,
+            "19": 730320896.0,
+            "20": 730320896.0,
+            "21": 730320896.0,
+            "22": 730320896.0,
+            "23": 730320896.0,
+            "24": 730320896.0,
+            "25": 730320896.0,
+            "26": 730320896.0,
+            "27": 730320896.0,
+            "28": 730320896.0,
+            "29": 730320896.0,
+            "30": 730320896.0,
+            "31": 730320896.0,
+            "32": 730320896.0,
+            "33": 730320896.0,
+            "34": 730320896.0,
+            "35": 730320896.0,
+            "36": 730320896.0,
+            "37": 730320896.0,
+            "38": 730320896.0,
+            "39": 730320896.0,
+            "40": 730320896.0,
+            "41": 730320896.0,
+            "42": 730320896.0,
+            "43": 730320896.0,
+            "44": 730320896.0,
+            "45": 730320896.0,
+            "46": 730320896.0,
+            "47": 730320896.0,
+            "48": 730320896.0,
+            "49": 730320896.0,
+            "50": 730320896.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 4311542272.0,
+            "2": 4593253888.0,
+            "3": 4593253888.0,
+            "4": 4593253888.0,
+            "5": 4593253888.0,
+            "6": 4593253888.0,
+            "7": 4593253888.0,
+            "8": 4593253888.0,
+            "9": 4593253888.0,
+            "10": 4593253888.0,
+            "11": 4593253888.0,
+            "12": 4593253888.0,
+            "13": 4593253888.0,
+            "14": 4593253888.0,
+            "15": 4593253888.0,
+            "16": 4593253888.0,
+            "17": 4593253888.0,
+            "18": 4593253888.0,
+            "19": 4593253888.0,
+            "20": 4593253888.0,
+            "21": 4593253888.0,
+            "22": 4593253888.0,
+            "23": 4593253888.0,
+            "24": 4593253888.0,
+            "25": 4593253888.0,
+            "26": 4593253888.0,
+            "27": 4593253888.0,
+            "28": 4593253888.0,
+            "29": 4593253888.0,
+            "30": 4593253888.0,
+            "31": 4593253888.0,
+            "32": 4593253888.0,
+            "33": 4593253888.0,
+            "34": 4593253888.0,
+            "35": 4593253888.0,
+            "36": 4593253888.0,
+            "37": 4593253888.0,
+            "38": 4593253888.0,
+            "39": 4593253888.0,
+            "40": 4593253888.0,
+            "41": 4593253888.0,
+            "42": 4593253888.0,
+            "43": 4593253888.0,
+            "44": 4593253888.0,
+            "45": 4593253888.0,
+            "46": 4593253888.0,
+            "47": 4593253888.0,
+            "48": 4593253888.0,
+            "49": 4593253888.0,
+            "50": 4593253888.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 19.94048,
+            "2": 0.39367,
+            "3": 0.37589,
+            "4": 0.37388,
+            "5": 0.66307,
+            "6": 0.36351,
+            "7": 0.3595,
+            "8": 0.36116,
+            "9": 0.36043,
+            "10": 0.35758,
+            "11": 0.36057,
+            "12": 0.35963,
+            "13": 0.36072,
+            "14": 0.35903,
+            "15": 0.35994,
+            "16": 0.35763,
+            "17": 0.36245,
+            "18": 0.35747,
+            "19": 0.35878,
+            "20": 0.35982,
+            "21": 0.35849,
+            "22": 0.35936,
+            "23": 0.35823,
+            "24": 0.35778,
+            "25": 0.3606,
+            "26": 0.35907,
+            "27": 0.35852,
+            "28": 0.35911,
+            "29": 0.35837,
+            "30": 0.35815,
+            "31": 0.35909,
+            "32": 0.35701,
+            "33": 0.3602,
+            "34": 0.35976,
+            "35": 0.36009,
+            "36": 0.35943,
+            "37": 0.35776,
+            "38": 0.35664,
+            "39": 0.36098,
+            "40": 0.35836,
+            "41": 0.35857,
+            "42": 0.35915,
+            "43": 0.3572,
+            "44": 0.35779,
+            "45": 0.36243,
+            "46": 0.35772,
+            "47": 0.35984,
+            "48": 0.35743,
+            "49": 0.35726,
+            "50": 0.35872
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
index bdbd770075f..ecbd1bac9aa 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
@@ -1 +1,287 @@
-{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.88372, "5": 10.88547, "10": 10.86477, "15": 10.81334, "20": 10.71864, "25": 10.55396, "30": 10.36075, "35": 10.25855, "40": 10.0779, "45": 9.84493, "50": 9.89982}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 22726932.0, "5": 22713776.0, "10": 22918608.0, "15": 22821768.0, "20": 22693536.0, "25": 22819092.0, "30": 22630868.0, "35": 22788568.0, "40": 22657832.0, "45": 22674860.0, "50": 22904840.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 688127488.0, "5": 688127488.0, "10": 688127488.0, "15": 688127488.0, "20": 688127488.0, "25": 688127488.0, "30": 688127488.0, "35": 688127488.0, "40": 688127488.0, "45": 688127488.0, "50": 688127488.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 2159072768.0, "5": 2415565312.0, "10": 2415565312.0, "15": 2415565312.0, "20": 2415565312.0, "25": 2415565312.0, "30": 2415565312.0, "35": 2415565312.0, "40": 2415565312.0, "45": 2415565312.0, "50": 2415565312.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 12.21878, "5": 0.09761, "10": 0.10322, "15": 0.09934, "20": 0.09992, "25": 0.10002, "30": 0.09769, "35": 0.09817, "40": 0.09665, "45": 0.09737, "50": 0.09814}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.88372,
+            "2": 10.87208,
+            "3": 10.8784,
+            "4": 10.85806,
+            "5": 10.88547,
+            "6": 10.89556,
+            "7": 10.88051,
+            "8": 10.87687,
+            "9": 10.868,
+            "10": 10.86477,
+            "11": 10.87779,
+            "12": 10.8736,
+            "13": 10.8617,
+            "14": 10.88756,
+            "15": 10.81334,
+            "16": 10.8276,
+            "17": 10.80766,
+            "18": 10.81067,
+            "19": 10.81127,
+            "20": 10.71864,
+            "21": 10.69427,
+            "22": 10.58083,
+            "23": 10.69548,
+            "24": 10.60367,
+            "25": 10.55396,
+            "26": 10.61304,
+            "27": 10.59026,
+            "28": 10.54029,
+            "29": 10.55687,
+            "30": 10.36075,
+            "31": 10.13943,
+            "32": 10.44344,
+            "33": 10.44459,
+            "34": 10.21087,
+            "35": 10.25855,
+            "36": 10.22779,
+            "37": 10.32843,
+            "38": 10.18154,
+            "39": 10.37655,
+            "40": 10.0779,
+            "41": 10.12618,
+            "42": 10.19378,
+            "43": 9.85406,
+            "44": 9.94224,
+            "45": 9.84493,
+            "46": 9.831,
+            "47": 10.13553,
+            "48": 9.84455,
+            "49": 9.5571,
+            "50": 9.89982
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 22726932.0,
+            "2": 22924916.0,
+            "3": 22597332.0,
+            "4": 23219544.0,
+            "5": 22713776.0,
+            "6": 23021572.0,
+            "7": 22771346.0,
+            "8": 22926354.0,
+            "9": 22842338.0,
+            "10": 22918608.0,
+            "11": 22500808.0,
+            "12": 22460148.0,
+            "13": 22917564.0,
+            "14": 22389452.0,
+            "15": 22821768.0,
+            "16": 22831588.0,
+            "17": 22819586.0,
+            "18": 22582872.0,
+            "19": 22618426.0,
+            "20": 22693536.0,
+            "21": 22739728.0,
+            "22": 22800622.0,
+            "23": 22539616.0,
+            "24": 22771504.0,
+            "25": 22819092.0,
+            "26": 22547456.0,
+            "27": 22468726.0,
+            "28": 22453546.0,
+            "29": 22529680.0,
+            "30": 22630868.0,
+            "31": 22955432.0,
+            "32": 22585376.0,
+            "33": 22557692.0,
+            "34": 22835582.0,
+            "35": 22788568.0,
+            "36": 22588652.0,
+            "37": 22497950.0,
+            "38": 22895768.0,
+            "39": 22801524.0,
+            "40": 22657832.0,
+            "41": 22659668.0,
+            "42": 22667616.0,
+            "43": 22975828.0,
+            "44": 22746024.0,
+            "45": 22674860.0,
+            "46": 22884404.0,
+            "47": 22633804.0,
+            "48": 22928614.0,
+            "49": 22728000.0,
+            "50": 22904840.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 689176064.0,
+            "2": 689176064.0,
+            "3": 689176064.0,
+            "4": 689176064.0,
+            "5": 689176064.0,
+            "6": 689176064.0,
+            "7": 689176064.0,
+            "8": 689176064.0,
+            "9": 689176064.0,
+            "10": 689176064.0,
+            "11": 689176064.0,
+            "12": 689176064.0,
+            "13": 689176064.0,
+            "14": 689176064.0,
+            "15": 689176064.0,
+            "16": 689176064.0,
+            "17": 689176064.0,
+            "18": 689176064.0,
+            "19": 689176064.0,
+            "20": 689176064.0,
+            "21": 689176064.0,
+            "22": 689176064.0,
+            "23": 689176064.0,
+            "24": 689176064.0,
+            "25": 689176064.0,
+            "26": 689176064.0,
+            "27": 689176064.0,
+            "28": 689176064.0,
+            "29": 689176064.0,
+            "30": 689176064.0,
+            "31": 689176064.0,
+            "32": 689176064.0,
+            "33": 689176064.0,
+            "34": 689176064.0,
+            "35": 689176064.0,
+            "36": 689176064.0,
+            "37": 689176064.0,
+            "38": 689176064.0,
+            "39": 689176064.0,
+            "40": 689176064.0,
+            "41": 689176064.0,
+            "42": 689176064.0,
+            "43": 689176064.0,
+            "44": 689176064.0,
+            "45": 689176064.0,
+            "46": 689176064.0,
+            "47": 689176064.0,
+            "48": 689176064.0,
+            "49": 689176064.0,
+            "50": 689176064.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 2158024192.0,
+            "2": 2416613888.0,
+            "3": 2416613888.0,
+            "4": 2416613888.0,
+            "5": 2416613888.0,
+            "6": 2416613888.0,
+            "7": 2416613888.0,
+            "8": 2416613888.0,
+            "9": 2416613888.0,
+            "10": 2416613888.0,
+            "11": 2416613888.0,
+            "12": 2416613888.0,
+            "13": 2416613888.0,
+            "14": 2416613888.0,
+            "15": 2416613888.0,
+            "16": 2416613888.0,
+            "17": 2416613888.0,
+            "18": 2416613888.0,
+            "19": 2416613888.0,
+            "20": 2416613888.0,
+            "21": 2416613888.0,
+            "22": 2416613888.0,
+            "23": 2416613888.0,
+            "24": 2416613888.0,
+            "25": 2416613888.0,
+            "26": 2416613888.0,
+            "27": 2416613888.0,
+            "28": 2416613888.0,
+            "29": 2416613888.0,
+            "30": 2416613888.0,
+            "31": 2416613888.0,
+            "32": 2416613888.0,
+            "33": 2416613888.0,
+            "34": 2416613888.0,
+            "35": 2416613888.0,
+            "36": 2416613888.0,
+            "37": 2416613888.0,
+            "38": 2416613888.0,
+            "39": 2416613888.0,
+            "40": 2416613888.0,
+            "41": 2416613888.0,
+            "42": 2416613888.0,
+            "43": 2416613888.0,
+            "44": 2416613888.0,
+            "45": 2416613888.0,
+            "46": 2416613888.0,
+            "47": 2416613888.0,
+            "48": 2416613888.0,
+            "49": 2416613888.0,
+            "50": 2416613888.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 11.59299,
+            "2": 0.13612,
+            "3": 0.11964,
+            "4": 0.11995,
+            "5": 0.12152,
+            "6": 0.121,
+            "7": 0.1191,
+            "8": 0.11751,
+            "9": 0.11711,
+            "10": 0.11878,
+            "11": 0.12221,
+            "12": 0.11956,
+            "13": 0.11737,
+            "14": 0.11954,
+            "15": 0.11916,
+            "16": 0.12038,
+            "17": 0.11939,
+            "18": 0.11747,
+            "19": 0.11879,
+            "20": 0.11955,
+            "21": 0.12128,
+            "22": 0.11892,
+            "23": 0.12306,
+            "24": 0.11834,
+            "25": 0.11924,
+            "26": 0.11961,
+            "27": 0.11912,
+            "28": 0.11913,
+            "29": 0.11896,
+            "30": 0.11897,
+            "31": 0.12121,
+            "32": 0.1215,
+            "33": 0.11867,
+            "34": 0.11783,
+            "35": 0.11835,
+            "36": 0.12172,
+            "37": 0.11939,
+            "38": 0.11963,
+            "39": 0.11846,
+            "40": 0.11889,
+            "41": 0.11897,
+            "42": 0.11775,
+            "43": 0.12004,
+            "44": 0.1201,
+            "45": 0.11742,
+            "46": 0.1204,
+            "47": 0.11915,
+            "48": 0.1208,
+            "49": 0.11898,
+            "50": 0.1165
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..19e0972675c
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.88372,
+            "2": 10.87208,
+            "3": 10.8784,
+            "4": 10.85806,
+            "5": 10.88547,
+            "6": 10.89556,
+            "7": 10.88051,
+            "8": 10.87687,
+            "9": 10.868,
+            "10": 10.86477,
+            "11": 10.87779,
+            "12": 10.8736,
+            "13": 10.8617,
+            "14": 10.88756,
+            "15": 10.81334,
+            "16": 10.8276,
+            "17": 10.80766,
+            "18": 10.81067,
+            "19": 10.81127,
+            "20": 10.71864,
+            "21": 10.69427,
+            "22": 10.58083,
+            "23": 10.69548,
+            "24": 10.60367,
+            "25": 10.55396,
+            "26": 10.61304,
+            "27": 10.59026,
+            "28": 10.54029,
+            "29": 10.55687,
+            "30": 10.36075,
+            "31": 10.13943,
+            "32": 10.44344,
+            "33": 10.44459,
+            "34": 10.21087,
+            "35": 10.25855,
+            "36": 10.22779,
+            "37": 10.32843,
+            "38": 10.18154,
+            "39": 10.37655,
+            "40": 10.0779,
+            "41": 10.12618,
+            "42": 10.19378,
+            "43": 9.85406,
+            "44": 9.94224,
+            "45": 9.84493,
+            "46": 9.831,
+            "47": 10.13553,
+            "48": 9.84455,
+            "49": 9.5571,
+            "50": 9.89982
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 22726932.0,
+            "2": 22924916.0,
+            "3": 22597332.0,
+            "4": 23219544.0,
+            "5": 22713776.0,
+            "6": 23021572.0,
+            "7": 22771346.0,
+            "8": 22926354.0,
+            "9": 22842338.0,
+            "10": 22918608.0,
+            "11": 22500808.0,
+            "12": 22460148.0,
+            "13": 22917564.0,
+            "14": 22389452.0,
+            "15": 22821768.0,
+            "16": 22831588.0,
+            "17": 22819586.0,
+            "18": 22582872.0,
+            "19": 22618426.0,
+            "20": 22693536.0,
+            "21": 22739728.0,
+            "22": 22800622.0,
+            "23": 22539616.0,
+            "24": 22771504.0,
+            "25": 22819092.0,
+            "26": 22547456.0,
+            "27": 22468726.0,
+            "28": 22453546.0,
+            "29": 22529680.0,
+            "30": 22630868.0,
+            "31": 22955432.0,
+            "32": 22585376.0,
+            "33": 22557692.0,
+            "34": 22835582.0,
+            "35": 22788568.0,
+            "36": 22588652.0,
+            "37": 22497950.0,
+            "38": 22895768.0,
+            "39": 22801524.0,
+            "40": 22657832.0,
+            "41": 22659668.0,
+            "42": 22667616.0,
+            "43": 22975828.0,
+            "44": 22746024.0,
+            "45": 22674860.0,
+            "46": 22884404.0,
+            "47": 22633804.0,
+            "48": 22928614.0,
+            "49": 22728000.0,
+            "50": 22904840.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 689176064.0,
+            "2": 689176064.0,
+            "3": 689176064.0,
+            "4": 689176064.0,
+            "5": 689176064.0,
+            "6": 689176064.0,
+            "7": 689176064.0,
+            "8": 689176064.0,
+            "9": 689176064.0,
+            "10": 689176064.0,
+            "11": 689176064.0,
+            "12": 689176064.0,
+            "13": 689176064.0,
+            "14": 689176064.0,
+            "15": 689176064.0,
+            "16": 689176064.0,
+            "17": 689176064.0,
+            "18": 689176064.0,
+            "19": 689176064.0,
+            "20": 689176064.0,
+            "21": 689176064.0,
+            "22": 689176064.0,
+            "23": 689176064.0,
+            "24": 689176064.0,
+            "25": 689176064.0,
+            "26": 689176064.0,
+            "27": 689176064.0,
+            "28": 689176064.0,
+            "29": 689176064.0,
+            "30": 689176064.0,
+            "31": 689176064.0,
+            "32": 689176064.0,
+            "33": 689176064.0,
+            "34": 689176064.0,
+            "35": 689176064.0,
+            "36": 689176064.0,
+            "37": 689176064.0,
+            "38": 689176064.0,
+            "39": 689176064.0,
+            "40": 689176064.0,
+            "41": 689176064.0,
+            "42": 689176064.0,
+            "43": 689176064.0,
+            "44": 689176064.0,
+            "45": 689176064.0,
+            "46": 689176064.0,
+            "47": 689176064.0,
+            "48": 689176064.0,
+            "49": 689176064.0,
+            "50": 689176064.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 2158024192.0,
+            "2": 2416613888.0,
+            "3": 2416613888.0,
+            "4": 2416613888.0,
+            "5": 2416613888.0,
+            "6": 2416613888.0,
+            "7": 2416613888.0,
+            "8": 2416613888.0,
+            "9": 2416613888.0,
+            "10": 2416613888.0,
+            "11": 2416613888.0,
+            "12": 2416613888.0,
+            "13": 2416613888.0,
+            "14": 2416613888.0,
+            "15": 2416613888.0,
+            "16": 2416613888.0,
+            "17": 2416613888.0,
+            "18": 2416613888.0,
+            "19": 2416613888.0,
+            "20": 2416613888.0,
+            "21": 2416613888.0,
+            "22": 2416613888.0,
+            "23": 2416613888.0,
+            "24": 2416613888.0,
+            "25": 2416613888.0,
+            "26": 2416613888.0,
+            "27": 2416613888.0,
+            "28": 2416613888.0,
+            "29": 2416613888.0,
+            "30": 2416613888.0,
+            "31": 2416613888.0,
+            "32": 2416613888.0,
+            "33": 2416613888.0,
+            "34": 2416613888.0,
+            "35": 2416613888.0,
+            "36": 2416613888.0,
+            "37": 2416613888.0,
+            "38": 2416613888.0,
+            "39": 2416613888.0,
+            "40": 2416613888.0,
+            "41": 2416613888.0,
+            "42": 2416613888.0,
+            "43": 2416613888.0,
+            "44": 2416613888.0,
+            "45": 2416613888.0,
+            "46": 2416613888.0,
+            "47": 2416613888.0,
+            "48": 2416613888.0,
+            "49": 2416613888.0,
+            "50": 2416613888.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.71503,
+            "2": 0.1487,
+            "3": 1.53681,
+            "4": 2.08776,
+            "5": 2.61238,
+            "6": 1.60198,
+            "7": 0.87803,
+            "8": 0.10645,
+            "9": 1.03031,
+            "10": 0.10629,
+            "11": 0.2821,
+            "12": 0.10863,
+            "13": 0.10328,
+            "14": 0.10854,
+            "15": 0.10326,
+            "16": 0.10341,
+            "17": 0.10778,
+            "18": 0.11121,
+            "19": 0.10959,
+            "20": 0.10422,
+            "21": 0.10422,
+            "22": 0.1042,
+            "23": 0.10422,
+            "24": 0.10385,
+            "25": 0.10416,
+            "26": 0.1052,
+            "27": 0.10423,
+            "28": 0.10355,
+            "29": 0.10327,
+            "30": 0.10455,
+            "31": 0.10463,
+            "32": 0.1045,
+            "33": 0.10325,
+            "34": 0.10331,
+            "35": 0.10475,
+            "36": 0.10327,
+            "37": 0.10355,
+            "38": 0.10433,
+            "39": 0.10353,
+            "40": 0.10394,
+            "41": 0.10379,
+            "42": 0.10774,
+            "43": 0.10625,
+            "44": 0.10346,
+            "45": 0.10532,
+            "46": 0.10766,
+            "47": 0.10537,
+            "48": 0.10462,
+            "49": 0.1051,
+            "50": 0.1039
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..ea2bd7effce
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.88372,
+            "2": 10.87208,
+            "3": 10.8784,
+            "4": 10.85806,
+            "5": 10.88547,
+            "6": 10.89556,
+            "7": 10.88051,
+            "8": 10.87687,
+            "9": 10.868,
+            "10": 10.86477,
+            "11": 10.87779,
+            "12": 10.8736,
+            "13": 10.8617,
+            "14": 10.88756,
+            "15": 10.81334,
+            "16": 10.8276,
+            "17": 10.80766,
+            "18": 10.81067,
+            "19": 10.81127,
+            "20": 10.71864,
+            "21": 10.69427,
+            "22": 10.58083,
+            "23": 10.69548,
+            "24": 10.60367,
+            "25": 10.55396,
+            "26": 10.61304,
+            "27": 10.59026,
+            "28": 10.54029,
+            "29": 10.55687,
+            "30": 10.36075,
+            "31": 10.13943,
+            "32": 10.44344,
+            "33": 10.44459,
+            "34": 10.21087,
+            "35": 10.25855,
+            "36": 10.22779,
+            "37": 10.32843,
+            "38": 10.18154,
+            "39": 10.37655,
+            "40": 10.0779,
+            "41": 10.12618,
+            "42": 10.19378,
+            "43": 9.85406,
+            "44": 9.94224,
+            "45": 9.84493,
+            "46": 9.831,
+            "47": 10.13553,
+            "48": 9.84455,
+            "49": 9.5571,
+            "50": 9.89982
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 22726932.0,
+            "2": 22924916.0,
+            "3": 22597332.0,
+            "4": 23219544.0,
+            "5": 22713776.0,
+            "6": 23021572.0,
+            "7": 22771346.0,
+            "8": 22926354.0,
+            "9": 22842338.0,
+            "10": 22918608.0,
+            "11": 22500808.0,
+            "12": 22460148.0,
+            "13": 22917564.0,
+            "14": 22389452.0,
+            "15": 22821768.0,
+            "16": 22831588.0,
+            "17": 22819586.0,
+            "18": 22582872.0,
+            "19": 22618426.0,
+            "20": 22693536.0,
+            "21": 22739728.0,
+            "22": 22800622.0,
+            "23": 22539616.0,
+            "24": 22771504.0,
+            "25": 22819092.0,
+            "26": 22547456.0,
+            "27": 22468726.0,
+            "28": 22453546.0,
+            "29": 22529680.0,
+            "30": 22630868.0,
+            "31": 22955432.0,
+            "32": 22585376.0,
+            "33": 22557692.0,
+            "34": 22835582.0,
+            "35": 22788568.0,
+            "36": 22588652.0,
+            "37": 22497950.0,
+            "38": 22895768.0,
+            "39": 22801524.0,
+            "40": 22657832.0,
+            "41": 22659668.0,
+            "42": 22667616.0,
+            "43": 22975828.0,
+            "44": 22746024.0,
+            "45": 22674860.0,
+            "46": 22884404.0,
+            "47": 22633804.0,
+            "48": 22928614.0,
+            "49": 22728000.0,
+            "50": 22904840.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 689176064.0,
+            "2": 689176064.0,
+            "3": 689176064.0,
+            "4": 689176064.0,
+            "5": 689176064.0,
+            "6": 689176064.0,
+            "7": 689176064.0,
+            "8": 689176064.0,
+            "9": 689176064.0,
+            "10": 689176064.0,
+            "11": 689176064.0,
+            "12": 689176064.0,
+            "13": 689176064.0,
+            "14": 689176064.0,
+            "15": 689176064.0,
+            "16": 689176064.0,
+            "17": 689176064.0,
+            "18": 689176064.0,
+            "19": 689176064.0,
+            "20": 689176064.0,
+            "21": 689176064.0,
+            "22": 689176064.0,
+            "23": 689176064.0,
+            "24": 689176064.0,
+            "25": 689176064.0,
+            "26": 689176064.0,
+            "27": 689176064.0,
+            "28": 689176064.0,
+            "29": 689176064.0,
+            "30": 689176064.0,
+            "31": 689176064.0,
+            "32": 689176064.0,
+            "33": 689176064.0,
+            "34": 689176064.0,
+            "35": 689176064.0,
+            "36": 689176064.0,
+            "37": 689176064.0,
+            "38": 689176064.0,
+            "39": 689176064.0,
+            "40": 689176064.0,
+            "41": 689176064.0,
+            "42": 689176064.0,
+            "43": 689176064.0,
+            "44": 689176064.0,
+            "45": 689176064.0,
+            "46": 689176064.0,
+            "47": 689176064.0,
+            "48": 689176064.0,
+            "49": 689176064.0,
+            "50": 689176064.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 2158024192.0,
+            "2": 2416613888.0,
+            "3": 2416613888.0,
+            "4": 2416613888.0,
+            "5": 2416613888.0,
+            "6": 2416613888.0,
+            "7": 2416613888.0,
+            "8": 2416613888.0,
+            "9": 2416613888.0,
+            "10": 2416613888.0,
+            "11": 2416613888.0,
+            "12": 2416613888.0,
+            "13": 2416613888.0,
+            "14": 2416613888.0,
+            "15": 2416613888.0,
+            "16": 2416613888.0,
+            "17": 2416613888.0,
+            "18": 2416613888.0,
+            "19": 2416613888.0,
+            "20": 2416613888.0,
+            "21": 2416613888.0,
+            "22": 2416613888.0,
+            "23": 2416613888.0,
+            "24": 2416613888.0,
+            "25": 2416613888.0,
+            "26": 2416613888.0,
+            "27": 2416613888.0,
+            "28": 2416613888.0,
+            "29": 2416613888.0,
+            "30": 2416613888.0,
+            "31": 2416613888.0,
+            "32": 2416613888.0,
+            "33": 2416613888.0,
+            "34": 2416613888.0,
+            "35": 2416613888.0,
+            "36": 2416613888.0,
+            "37": 2416613888.0,
+            "38": 2416613888.0,
+            "39": 2416613888.0,
+            "40": 2416613888.0,
+            "41": 2416613888.0,
+            "42": 2416613888.0,
+            "43": 2416613888.0,
+            "44": 2416613888.0,
+            "45": 2416613888.0,
+            "46": 2416613888.0,
+            "47": 2416613888.0,
+            "48": 2416613888.0,
+            "49": 2416613888.0,
+            "50": 2416613888.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 11.4694,
+            "2": 0.13977,
+            "3": 0.12731,
+            "4": 0.12879,
+            "5": 0.11865,
+            "6": 0.118,
+            "7": 0.11942,
+            "8": 0.11938,
+            "9": 0.11951,
+            "10": 0.11735,
+            "11": 0.11836,
+            "12": 0.11978,
+            "13": 0.11914,
+            "14": 0.11821,
+            "15": 0.11692,
+            "16": 0.11708,
+            "17": 0.11825,
+            "18": 0.11909,
+            "19": 0.11996,
+            "20": 0.11962,
+            "21": 0.12002,
+            "22": 0.11972,
+            "23": 0.11943,
+            "24": 0.11873,
+            "25": 0.11787,
+            "26": 0.1172,
+            "27": 0.11703,
+            "28": 0.12106,
+            "29": 0.11863,
+            "30": 0.11927,
+            "31": 0.11941,
+            "32": 0.11801,
+            "33": 0.11903,
+            "34": 0.1181,
+            "35": 0.11794,
+            "36": 0.11973,
+            "37": 0.11831,
+            "38": 0.11753,
+            "39": 0.11901,
+            "40": 0.11713,
+            "41": 0.11926,
+            "42": 0.11756,
+            "43": 0.1189,
+            "44": 0.11853,
+            "45": 0.12132,
+            "46": 0.11905,
+            "47": 0.11892,
+            "48": 0.11664,
+            "49": 0.11721,
+            "50": 0.11854
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
new file mode 100644
index 00000000000..0f1e0462ded
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.9735,
+            "2": 10.96394,
+            "3": 10.96467,
+            "4": 10.96021,
+            "5": 10.95594,
+            "6": 10.96043,
+            "7": 10.95626,
+            "8": 10.96144,
+            "9": 10.965,
+            "10": 10.94989,
+            "11": 10.95576,
+            "12": 10.947,
+            "13": 10.94636,
+            "14": 10.95394,
+            "15": 10.9115,
+            "16": 10.91038,
+            "17": 10.88885,
+            "18": 10.89782,
+            "19": 10.89048,
+            "20": 10.80975,
+            "21": 10.78792,
+            "22": 10.69838,
+            "23": 10.79225,
+            "24": 10.69861,
+            "25": 10.6662,
+            "26": 10.71196,
+            "27": 10.68312,
+            "28": 10.62307,
+            "29": 10.65054,
+            "30": 10.45501,
+            "31": 10.22425,
+            "32": 10.52333,
+            "33": 10.52504,
+            "34": 10.29088,
+            "35": 10.33418,
+            "36": 10.28927,
+            "37": 10.39816,
+            "38": 10.25546,
+            "39": 10.44879,
+            "40": 10.14646,
+            "41": 10.19054,
+            "42": 10.24672,
+            "43": 9.89533,
+            "44": 10.00885,
+            "45": 9.89112,
+            "46": 9.86375,
+            "47": 10.165,
+            "48": 9.87995,
+            "49": 9.5695,
+            "50": 9.9526
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 22727052.0,
+            "2": 22925412.0,
+            "3": 22596906.0,
+            "4": 23219222.0,
+            "5": 22714228.0,
+            "6": 23021930.0,
+            "7": 22770230.0,
+            "8": 22926370.0,
+            "9": 22841956.0,
+            "10": 22918376.0,
+            "11": 22501022.0,
+            "12": 22459784.0,
+            "13": 22916644.0,
+            "14": 22389748.0,
+            "15": 22820932.0,
+            "16": 22831208.0,
+            "17": 22819716.0,
+            "18": 22582820.0,
+            "19": 22618452.0,
+            "20": 22694228.0,
+            "21": 22740076.0,
+            "22": 22799292.0,
+            "23": 22539898.0,
+            "24": 22771252.0,
+            "25": 22819528.0,
+            "26": 22547832.0,
+            "27": 22468264.0,
+            "28": 22453304.0,
+            "29": 22529758.0,
+            "30": 22631178.0,
+            "31": 22955168.0,
+            "32": 22584982.0,
+            "33": 22558648.0,
+            "34": 22835982.0,
+            "35": 22787526.0,
+            "36": 22589358.0,
+            "37": 22496568.0,
+            "38": 22896700.0,
+            "39": 22801666.0,
+            "40": 22657932.0,
+            "41": 22658800.0,
+            "42": 22666830.0,
+            "43": 22975584.0,
+            "44": 22746628.0,
+            "45": 22674550.0,
+            "46": 22885018.0,
+            "47": 22633780.0,
+            "48": 22929278.0,
+            "49": 22728106.0,
+            "50": 22905400.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 657718272.0,
+            "2": 657718272.0,
+            "3": 657718272.0,
+            "4": 657718272.0,
+            "5": 657718272.0,
+            "6": 657718272.0,
+            "7": 657718272.0,
+            "8": 657718272.0,
+            "9": 657718272.0,
+            "10": 657718272.0,
+            "11": 657718272.0,
+            "12": 657718272.0,
+            "13": 657718272.0,
+            "14": 657718272.0,
+            "15": 657718272.0,
+            "16": 657718272.0,
+            "17": 657718272.0,
+            "18": 657718272.0,
+            "19": 657718272.0,
+            "20": 657718272.0,
+            "21": 657718272.0,
+            "22": 657718272.0,
+            "23": 657718272.0,
+            "24": 657718272.0,
+            "25": 657718272.0,
+            "26": 657718272.0,
+            "27": 657718272.0,
+            "28": 657718272.0,
+            "29": 657718272.0,
+            "30": 657718272.0,
+            "31": 657718272.0,
+            "32": 657718272.0,
+            "33": 657718272.0,
+            "34": 657718272.0,
+            "35": 657718272.0,
+            "36": 657718272.0,
+            "37": 657718272.0,
+            "38": 657718272.0,
+            "39": 657718272.0,
+            "40": 657718272.0,
+            "41": 657718272.0,
+            "42": 657718272.0,
+            "43": 657718272.0,
+            "44": 657718272.0,
+            "45": 657718272.0,
+            "46": 657718272.0,
+            "47": 657718272.0,
+            "48": 657718272.0,
+            "49": 657718272.0,
+            "50": 657718272.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 2129712128.0,
+            "2": 2385156096.0,
+            "3": 2385156096.0,
+            "4": 2385156096.0,
+            "5": 2385156096.0,
+            "6": 2385156096.0,
+            "7": 2385156096.0,
+            "8": 2385156096.0,
+            "9": 2385156096.0,
+            "10": 2385156096.0,
+            "11": 2385156096.0,
+            "12": 2385156096.0,
+            "13": 2385156096.0,
+            "14": 2385156096.0,
+            "15": 2385156096.0,
+            "16": 2385156096.0,
+            "17": 2385156096.0,
+            "18": 2385156096.0,
+            "19": 2385156096.0,
+            "20": 2385156096.0,
+            "21": 2385156096.0,
+            "22": 2385156096.0,
+            "23": 2385156096.0,
+            "24": 2385156096.0,
+            "25": 2385156096.0,
+            "26": 2385156096.0,
+            "27": 2385156096.0,
+            "28": 2385156096.0,
+            "29": 2385156096.0,
+            "30": 2385156096.0,
+            "31": 2385156096.0,
+            "32": 2385156096.0,
+            "33": 2385156096.0,
+            "34": 2385156096.0,
+            "35": 2385156096.0,
+            "36": 2385156096.0,
+            "37": 2385156096.0,
+            "38": 2385156096.0,
+            "39": 2385156096.0,
+            "40": 2385156096.0,
+            "41": 2385156096.0,
+            "42": 2385156096.0,
+            "43": 2385156096.0,
+            "44": 2385156096.0,
+            "45": 2385156096.0,
+            "46": 2385156096.0,
+            "47": 2385156096.0,
+            "48": 2385156096.0,
+            "49": 2385156096.0,
+            "50": 2385156096.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 17.63368,
+            "2": 0.20019,
+            "3": 0.17416,
+            "4": 0.17243,
+            "5": 0.17154,
+            "6": 0.17102,
+            "7": 0.17145,
+            "8": 0.17064,
+            "9": 0.17149,
+            "10": 0.17097,
+            "11": 0.1712,
+            "12": 0.17013,
+            "13": 0.17029,
+            "14": 0.17017,
+            "15": 0.4213,
+            "16": 0.44794,
+            "17": 0.16976,
+            "18": 0.16874,
+            "19": 0.16893,
+            "20": 0.16955,
+            "21": 0.16934,
+            "22": 0.16862,
+            "23": 0.16838,
+            "24": 0.16917,
+            "25": 0.16984,
+            "26": 0.16954,
+            "27": 0.16772,
+            "28": 0.16867,
+            "29": 0.16821,
+            "30": 0.16849,
+            "31": 0.1682,
+            "32": 0.16841,
+            "33": 0.16791,
+            "34": 0.16857,
+            "35": 0.16849,
+            "36": 0.16691,
+            "37": 0.16837,
+            "38": 0.16784,
+            "39": 0.1683,
+            "40": 0.16832,
+            "41": 0.16851,
+            "42": 0.16835,
+            "43": 0.16781,
+            "44": 0.16765,
+            "45": 0.16745,
+            "46": 0.1685,
+            "47": 0.168,
+            "48": 0.16906,
+            "49": 0.16772,
+            "50": 0.16771
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
new file mode 100644
index 00000000000..5b8869bf6ef
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.9735,
+            "2": 10.96394,
+            "3": 10.96467,
+            "4": 10.96021,
+            "5": 10.95594,
+            "6": 10.96043,
+            "7": 10.95626,
+            "8": 10.96144,
+            "9": 10.965,
+            "10": 10.94989,
+            "11": 10.95576,
+            "12": 10.947,
+            "13": 10.94636,
+            "14": 10.95394,
+            "15": 10.9115,
+            "16": 10.91038,
+            "17": 10.88885,
+            "18": 10.89782,
+            "19": 10.89048,
+            "20": 10.80975,
+            "21": 10.78792,
+            "22": 10.69838,
+            "23": 10.79225,
+            "24": 10.69861,
+            "25": 10.6662,
+            "26": 10.71196,
+            "27": 10.68312,
+            "28": 10.62307,
+            "29": 10.65054,
+            "30": 10.45501,
+            "31": 10.22425,
+            "32": 10.52333,
+            "33": 10.52504,
+            "34": 10.29088,
+            "35": 10.33418,
+            "36": 10.28927,
+            "37": 10.39816,
+            "38": 10.25546,
+            "39": 10.44879,
+            "40": 10.14646,
+            "41": 10.19054,
+            "42": 10.24672,
+            "43": 9.89533,
+            "44": 10.00885,
+            "45": 9.89112,
+            "46": 9.86375,
+            "47": 10.165,
+            "48": 9.87995,
+            "49": 9.5695,
+            "50": 9.9526
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 22727052.0,
+            "2": 22925412.0,
+            "3": 22596906.0,
+            "4": 23219222.0,
+            "5": 22714228.0,
+            "6": 23021930.0,
+            "7": 22770230.0,
+            "8": 22926370.0,
+            "9": 22841956.0,
+            "10": 22918376.0,
+            "11": 22501022.0,
+            "12": 22459784.0,
+            "13": 22916644.0,
+            "14": 22389748.0,
+            "15": 22820932.0,
+            "16": 22831208.0,
+            "17": 22819716.0,
+            "18": 22582820.0,
+            "19": 22618452.0,
+            "20": 22694228.0,
+            "21": 22740076.0,
+            "22": 22799292.0,
+            "23": 22539898.0,
+            "24": 22771252.0,
+            "25": 22819528.0,
+            "26": 22547832.0,
+            "27": 22468264.0,
+            "28": 22453304.0,
+            "29": 22529758.0,
+            "30": 22631178.0,
+            "31": 22955168.0,
+            "32": 22584982.0,
+            "33": 22558648.0,
+            "34": 22835982.0,
+            "35": 22787526.0,
+            "36": 22589358.0,
+            "37": 22496568.0,
+            "38": 22896700.0,
+            "39": 22801666.0,
+            "40": 22657932.0,
+            "41": 22658800.0,
+            "42": 22666830.0,
+            "43": 22975584.0,
+            "44": 22746628.0,
+            "45": 22674550.0,
+            "46": 22885018.0,
+            "47": 22633780.0,
+            "48": 22929278.0,
+            "49": 22728106.0,
+            "50": 22905400.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 657718272.0,
+            "2": 657718272.0,
+            "3": 657718272.0,
+            "4": 657718272.0,
+            "5": 657718272.0,
+            "6": 657718272.0,
+            "7": 657718272.0,
+            "8": 657718272.0,
+            "9": 657718272.0,
+            "10": 657718272.0,
+            "11": 657718272.0,
+            "12": 657718272.0,
+            "13": 657718272.0,
+            "14": 657718272.0,
+            "15": 657718272.0,
+            "16": 657718272.0,
+            "17": 657718272.0,
+            "18": 657718272.0,
+            "19": 657718272.0,
+            "20": 657718272.0,
+            "21": 657718272.0,
+            "22": 657718272.0,
+            "23": 657718272.0,
+            "24": 657718272.0,
+            "25": 657718272.0,
+            "26": 657718272.0,
+            "27": 657718272.0,
+            "28": 657718272.0,
+            "29": 657718272.0,
+            "30": 657718272.0,
+            "31": 657718272.0,
+            "32": 657718272.0,
+            "33": 657718272.0,
+            "34": 657718272.0,
+            "35": 657718272.0,
+            "36": 657718272.0,
+            "37": 657718272.0,
+            "38": 657718272.0,
+            "39": 657718272.0,
+            "40": 657718272.0,
+            "41": 657718272.0,
+            "42": 657718272.0,
+            "43": 657718272.0,
+            "44": 657718272.0,
+            "45": 657718272.0,
+            "46": 657718272.0,
+            "47": 657718272.0,
+            "48": 657718272.0,
+            "49": 657718272.0,
+            "50": 657718272.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 2129712128.0,
+            "2": 2385156096.0,
+            "3": 2385156096.0,
+            "4": 2385156096.0,
+            "5": 2385156096.0,
+            "6": 2385156096.0,
+            "7": 2385156096.0,
+            "8": 2385156096.0,
+            "9": 2385156096.0,
+            "10": 2385156096.0,
+            "11": 2385156096.0,
+            "12": 2385156096.0,
+            "13": 2385156096.0,
+            "14": 2385156096.0,
+            "15": 2385156096.0,
+            "16": 2385156096.0,
+            "17": 2385156096.0,
+            "18": 2385156096.0,
+            "19": 2385156096.0,
+            "20": 2385156096.0,
+            "21": 2385156096.0,
+            "22": 2385156096.0,
+            "23": 2385156096.0,
+            "24": 2385156096.0,
+            "25": 2385156096.0,
+            "26": 2385156096.0,
+            "27": 2385156096.0,
+            "28": 2385156096.0,
+            "29": 2385156096.0,
+            "30": 2385156096.0,
+            "31": 2385156096.0,
+            "32": 2385156096.0,
+            "33": 2385156096.0,
+            "34": 2385156096.0,
+            "35": 2385156096.0,
+            "36": 2385156096.0,
+            "37": 2385156096.0,
+            "38": 2385156096.0,
+            "39": 2385156096.0,
+            "40": 2385156096.0,
+            "41": 2385156096.0,
+            "42": 2385156096.0,
+            "43": 2385156096.0,
+            "44": 2385156096.0,
+            "45": 2385156096.0,
+            "46": 2385156096.0,
+            "47": 2385156096.0,
+            "48": 2385156096.0,
+            "49": 2385156096.0,
+            "50": 2385156096.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 16.59745,
+            "2": 0.20599,
+            "3": 0.17301,
+            "4": 0.16858,
+            "5": 0.16742,
+            "6": 0.16685,
+            "7": 0.16812,
+            "8": 0.16712,
+            "9": 0.16761,
+            "10": 0.17297,
+            "11": 0.16947,
+            "12": 0.16929,
+            "13": 0.16969,
+            "14": 0.17093,
+            "15": 0.41089,
+            "16": 0.16958,
+            "17": 0.17028,
+            "18": 0.16804,
+            "19": 0.168,
+            "20": 0.16883,
+            "21": 0.16811,
+            "22": 0.16849,
+            "23": 0.17004,
+            "24": 0.16922,
+            "25": 0.16921,
+            "26": 0.16876,
+            "27": 0.16877,
+            "28": 0.16916,
+            "29": 0.16991,
+            "30": 0.16846,
+            "31": 0.16951,
+            "32": 0.16845,
+            "33": 0.1685,
+            "34": 0.16865,
+            "35": 0.16813,
+            "36": 0.16739,
+            "37": 0.16866,
+            "38": 0.16859,
+            "39": 0.16669,
+            "40": 0.16917,
+            "41": 0.16941,
+            "42": 0.1688,
+            "43": 0.1693,
+            "44": 0.16931,
+            "45": 0.16903,
+            "46": 0.16894,
+            "47": 0.16682,
+            "48": 0.16811,
+            "49": 0.1682,
+            "50": 0.16932
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
index f770cd4d016..10eb9e57910 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
@@ -1 +1,287 @@
-{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.89824, "5": 10.88993, "10": 10.88255, "15": 10.86969, "20": 10.84335, "25": 10.75377, "30": 10.62875, "35": 10.56066, "40": 10.36652, "45": 10.15385, "50": 10.18997}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 22727178.0, "5": 22714208.0, "10": 22918036.0, "15": 22820856.0, "20": 22693674.0, "25": 22818024.0, "30": 22630720.0, "35": 22787216.0, "40": 22657316.0, "45": 22674868.0, "50": 22903748.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 641870336.0, "5": 641870336.0, "10": 641870336.0, "15": 641870336.0, "20": 641870336.0, "25": 641870336.0, "30": 641870336.0, "35": 641870336.0, "40": 641870336.0, "45": 641870336.0, "50": 641870336.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 2611572224.0, "5": 2843894272.0, "10": 2843894272.0, "15": 2843894272.0, "20": 2843894272.0, "25": 2843894272.0, "30": 2843894272.0, "35": 2843894272.0, "40": 2843894272.0, "45": 2843894272.0, "50": 2843894272.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 13.42997, "5": 0.07593, "10": 0.06948, "15": 0.07002, "20": 0.07394, "25": 0.07013, "30": 0.07189, "35": 0.07303, "40": 0.07285, "45": 0.0679, "50": 0.069}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.89824,
+            "2": 10.90282,
+            "3": 10.89982,
+            "4": 10.86583,
+            "5": 10.88993,
+            "6": 10.9049,
+            "7": 10.89182,
+            "8": 10.90189,
+            "9": 10.88632,
+            "10": 10.88255,
+            "11": 10.91544,
+            "12": 10.90811,
+            "13": 10.91696,
+            "14": 10.92165,
+            "15": 10.86969,
+            "16": 10.8841,
+            "17": 10.87056,
+            "18": 10.88709,
+            "19": 10.87706,
+            "20": 10.84335,
+            "21": 10.83631,
+            "22": 10.76629,
+            "23": 10.83029,
+            "24": 10.79277,
+            "25": 10.75377,
+            "26": 10.78891,
+            "27": 10.79166,
+            "28": 10.74336,
+            "29": 10.75965,
+            "30": 10.62875,
+            "31": 10.45418,
+            "32": 10.68825,
+            "33": 10.68615,
+            "34": 10.52385,
+            "35": 10.56066,
+            "36": 10.53762,
+            "37": 10.60286,
+            "38": 10.46752,
+            "39": 10.60804,
+            "40": 10.36652,
+            "41": 10.38788,
+            "42": 10.45579,
+            "43": 10.15865,
+            "44": 10.24803,
+            "45": 10.15385,
+            "46": 10.13564,
+            "47": 10.39205,
+            "48": 10.1415,
+            "49": 9.88025,
+            "50": 10.18997
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 22727178.0,
+            "2": 22924812.0,
+            "3": 22596704.0,
+            "4": 23218766.0,
+            "5": 22714208.0,
+            "6": 23020316.0,
+            "7": 22771086.0,
+            "8": 22926440.0,
+            "9": 22842352.0,
+            "10": 22918036.0,
+            "11": 22500516.0,
+            "12": 22459304.0,
+            "13": 22916284.0,
+            "14": 22387532.0,
+            "15": 22820856.0,
+            "16": 22830090.0,
+            "17": 22818880.0,
+            "18": 22582012.0,
+            "19": 22616784.0,
+            "20": 22693674.0,
+            "21": 22739360.0,
+            "22": 22799250.0,
+            "23": 22538774.0,
+            "24": 22770954.0,
+            "25": 22818024.0,
+            "26": 22547278.0,
+            "27": 22468476.0,
+            "28": 22452228.0,
+            "29": 22527980.0,
+            "30": 22630720.0,
+            "31": 22954516.0,
+            "32": 22584820.0,
+            "33": 22557266.0,
+            "34": 22834728.0,
+            "35": 22787216.0,
+            "36": 22588668.0,
+            "37": 22496474.0,
+            "38": 22895320.0,
+            "39": 22800062.0,
+            "40": 22657316.0,
+            "41": 22658142.0,
+            "42": 22666692.0,
+            "43": 22974950.0,
+            "44": 22745468.0,
+            "45": 22674868.0,
+            "46": 22883238.0,
+            "47": 22632908.0,
+            "48": 22927884.0,
+            "49": 22727252.0,
+            "50": 22903748.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 638724608.0,
+            "2": 638724608.0,
+            "3": 638724608.0,
+            "4": 638724608.0,
+            "5": 638724608.0,
+            "6": 638724608.0,
+            "7": 638724608.0,
+            "8": 638724608.0,
+            "9": 638724608.0,
+            "10": 638724608.0,
+            "11": 638724608.0,
+            "12": 638724608.0,
+            "13": 638724608.0,
+            "14": 638724608.0,
+            "15": 638724608.0,
+            "16": 638724608.0,
+            "17": 638724608.0,
+            "18": 638724608.0,
+            "19": 638724608.0,
+            "20": 638724608.0,
+            "21": 638724608.0,
+            "22": 638724608.0,
+            "23": 638724608.0,
+            "24": 638724608.0,
+            "25": 638724608.0,
+            "26": 638724608.0,
+            "27": 638724608.0,
+            "28": 638724608.0,
+            "29": 638724608.0,
+            "30": 638724608.0,
+            "31": 638724608.0,
+            "32": 638724608.0,
+            "33": 638724608.0,
+            "34": 638724608.0,
+            "35": 638724608.0,
+            "36": 638724608.0,
+            "37": 638724608.0,
+            "38": 638724608.0,
+            "39": 638724608.0,
+            "40": 638724608.0,
+            "41": 638724608.0,
+            "42": 638724608.0,
+            "43": 638724608.0,
+            "44": 638724608.0,
+            "45": 638724608.0,
+            "46": 638724608.0,
+            "47": 638724608.0,
+            "48": 638724608.0,
+            "49": 638724608.0,
+            "50": 638724608.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 2610025984.0,
+            "2": 2840250880.0,
+            "3": 2840250880.0,
+            "4": 2840250880.0,
+            "5": 2840250880.0,
+            "6": 2840250880.0,
+            "7": 2840250880.0,
+            "8": 2840250880.0,
+            "9": 2840250880.0,
+            "10": 2840250880.0,
+            "11": 2840250880.0,
+            "12": 2840250880.0,
+            "13": 2840250880.0,
+            "14": 2840250880.0,
+            "15": 2840250880.0,
+            "16": 2840250880.0,
+            "17": 2840250880.0,
+            "18": 2840250880.0,
+            "19": 2840250880.0,
+            "20": 2840250880.0,
+            "21": 2840250880.0,
+            "22": 2840250880.0,
+            "23": 2840250880.0,
+            "24": 2840250880.0,
+            "25": 2840250880.0,
+            "26": 2840250880.0,
+            "27": 2840250880.0,
+            "28": 2840250880.0,
+            "29": 2840250880.0,
+            "30": 2840250880.0,
+            "31": 2840250880.0,
+            "32": 2840250880.0,
+            "33": 2840250880.0,
+            "34": 2840250880.0,
+            "35": 2840250880.0,
+            "36": 2840250880.0,
+            "37": 2840250880.0,
+            "38": 2840250880.0,
+            "39": 2840250880.0,
+            "40": 2840250880.0,
+            "41": 2840250880.0,
+            "42": 2840250880.0,
+            "43": 2840250880.0,
+            "44": 2840250880.0,
+            "45": 2840250880.0,
+            "46": 2840250880.0,
+            "47": 2840250880.0,
+            "48": 2840250880.0,
+            "49": 2840250880.0,
+            "50": 2840250880.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 12.82473,
+            "2": 0.09608,
+            "3": 0.08117,
+            "4": 0.08184,
+            "5": 0.08242,
+            "6": 0.07918,
+            "7": 0.07939,
+            "8": 0.07963,
+            "9": 0.07945,
+            "10": 0.081,
+            "11": 0.07867,
+            "12": 0.07897,
+            "13": 0.0828,
+            "14": 0.08361,
+            "15": 0.08417,
+            "16": 0.08323,
+            "17": 0.08405,
+            "18": 0.08256,
+            "19": 0.08229,
+            "20": 0.0827,
+            "21": 0.08446,
+            "22": 0.08314,
+            "23": 0.08296,
+            "24": 0.08234,
+            "25": 0.0813,
+            "26": 0.08393,
+            "27": 0.08424,
+            "28": 0.08312,
+            "29": 0.08286,
+            "30": 0.08113,
+            "31": 0.07871,
+            "32": 0.08259,
+            "33": 0.08088,
+            "34": 0.07808,
+            "35": 0.07855,
+            "36": 0.07792,
+            "37": 0.07877,
+            "38": 0.07813,
+            "39": 0.07792,
+            "40": 0.07826,
+            "41": 0.07872,
+            "42": 0.07977,
+            "43": 0.07875,
+            "44": 0.07847,
+            "45": 0.07879,
+            "46": 0.07965,
+            "47": 0.08085,
+            "48": 0.07886,
+            "49": 0.07904,
+            "50": 0.07778
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..f1fd0f05b76
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.89824,
+            "2": 10.90282,
+            "3": 10.89982,
+            "4": 10.86583,
+            "5": 10.88993,
+            "6": 10.9049,
+            "7": 10.89182,
+            "8": 10.90189,
+            "9": 10.88632,
+            "10": 10.88255,
+            "11": 10.91544,
+            "12": 10.90811,
+            "13": 10.91696,
+            "14": 10.92165,
+            "15": 10.86969,
+            "16": 10.8841,
+            "17": 10.87056,
+            "18": 10.88709,
+            "19": 10.87706,
+            "20": 10.84335,
+            "21": 10.83631,
+            "22": 10.76629,
+            "23": 10.83029,
+            "24": 10.79277,
+            "25": 10.75377,
+            "26": 10.78891,
+            "27": 10.79166,
+            "28": 10.74336,
+            "29": 10.75965,
+            "30": 10.62875,
+            "31": 10.45418,
+            "32": 10.68825,
+            "33": 10.68615,
+            "34": 10.52385,
+            "35": 10.56066,
+            "36": 10.53762,
+            "37": 10.60286,
+            "38": 10.46752,
+            "39": 10.60804,
+            "40": 10.36652,
+            "41": 10.38788,
+            "42": 10.45579,
+            "43": 10.15865,
+            "44": 10.24803,
+            "45": 10.15385,
+            "46": 10.13564,
+            "47": 10.39205,
+            "48": 10.1415,
+            "49": 9.88025,
+            "50": 10.18997
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 22727178.0,
+            "2": 22924812.0,
+            "3": 22596704.0,
+            "4": 23218766.0,
+            "5": 22714208.0,
+            "6": 23020316.0,
+            "7": 22771086.0,
+            "8": 22926440.0,
+            "9": 22842352.0,
+            "10": 22918036.0,
+            "11": 22500516.0,
+            "12": 22459304.0,
+            "13": 22916284.0,
+            "14": 22387532.0,
+            "15": 22820856.0,
+            "16": 22830090.0,
+            "17": 22818880.0,
+            "18": 22582012.0,
+            "19": 22616784.0,
+            "20": 22693674.0,
+            "21": 22739360.0,
+            "22": 22799250.0,
+            "23": 22538774.0,
+            "24": 22770954.0,
+            "25": 22818024.0,
+            "26": 22547278.0,
+            "27": 22468476.0,
+            "28": 22452228.0,
+            "29": 22527980.0,
+            "30": 22630720.0,
+            "31": 22954516.0,
+            "32": 22584820.0,
+            "33": 22557266.0,
+            "34": 22834728.0,
+            "35": 22787216.0,
+            "36": 22588668.0,
+            "37": 22496474.0,
+            "38": 22895320.0,
+            "39": 22800062.0,
+            "40": 22657316.0,
+            "41": 22658142.0,
+            "42": 22666692.0,
+            "43": 22974950.0,
+            "44": 22745468.0,
+            "45": 22674868.0,
+            "46": 22883238.0,
+            "47": 22632908.0,
+            "48": 22927884.0,
+            "49": 22727252.0,
+            "50": 22903748.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 638724608.0,
+            "2": 638724608.0,
+            "3": 638724608.0,
+            "4": 638724608.0,
+            "5": 638724608.0,
+            "6": 638724608.0,
+            "7": 638724608.0,
+            "8": 638724608.0,
+            "9": 638724608.0,
+            "10": 638724608.0,
+            "11": 638724608.0,
+            "12": 638724608.0,
+            "13": 638724608.0,
+            "14": 638724608.0,
+            "15": 638724608.0,
+            "16": 638724608.0,
+            "17": 638724608.0,
+            "18": 638724608.0,
+            "19": 638724608.0,
+            "20": 638724608.0,
+            "21": 638724608.0,
+            "22": 638724608.0,
+            "23": 638724608.0,
+            "24": 638724608.0,
+            "25": 638724608.0,
+            "26": 638724608.0,
+            "27": 638724608.0,
+            "28": 638724608.0,
+            "29": 638724608.0,
+            "30": 638724608.0,
+            "31": 638724608.0,
+            "32": 638724608.0,
+            "33": 638724608.0,
+            "34": 638724608.0,
+            "35": 638724608.0,
+            "36": 638724608.0,
+            "37": 638724608.0,
+            "38": 638724608.0,
+            "39": 638724608.0,
+            "40": 638724608.0,
+            "41": 638724608.0,
+            "42": 638724608.0,
+            "43": 638724608.0,
+            "44": 638724608.0,
+            "45": 638724608.0,
+            "46": 638724608.0,
+            "47": 638724608.0,
+            "48": 638724608.0,
+            "49": 638724608.0,
+            "50": 638724608.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 2610025984.0,
+            "2": 2840250880.0,
+            "3": 2840250880.0,
+            "4": 2840250880.0,
+            "5": 2840250880.0,
+            "6": 2840250880.0,
+            "7": 2840250880.0,
+            "8": 2840250880.0,
+            "9": 2840250880.0,
+            "10": 2840250880.0,
+            "11": 2840250880.0,
+            "12": 2840250880.0,
+            "13": 2840250880.0,
+            "14": 2840250880.0,
+            "15": 2840250880.0,
+            "16": 2840250880.0,
+            "17": 2840250880.0,
+            "18": 2840250880.0,
+            "19": 2840250880.0,
+            "20": 2840250880.0,
+            "21": 2840250880.0,
+            "22": 2840250880.0,
+            "23": 2840250880.0,
+            "24": 2840250880.0,
+            "25": 2840250880.0,
+            "26": 2840250880.0,
+            "27": 2840250880.0,
+            "28": 2840250880.0,
+            "29": 2840250880.0,
+            "30": 2840250880.0,
+            "31": 2840250880.0,
+            "32": 2840250880.0,
+            "33": 2840250880.0,
+            "34": 2840250880.0,
+            "35": 2840250880.0,
+            "36": 2840250880.0,
+            "37": 2840250880.0,
+            "38": 2840250880.0,
+            "39": 2840250880.0,
+            "40": 2840250880.0,
+            "41": 2840250880.0,
+            "42": 2840250880.0,
+            "43": 2840250880.0,
+            "44": 2840250880.0,
+            "45": 2840250880.0,
+            "46": 2840250880.0,
+            "47": 2840250880.0,
+            "48": 2840250880.0,
+            "49": 2840250880.0,
+            "50": 2840250880.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 14.66119,
+            "2": 0.10511,
+            "3": 0.07267,
+            "4": 0.07159,
+            "5": 0.07147,
+            "6": 0.07254,
+            "7": 0.07213,
+            "8": 0.07141,
+            "9": 0.07159,
+            "10": 0.07239,
+            "11": 0.07155,
+            "12": 0.0717,
+            "13": 0.07155,
+            "14": 0.07174,
+            "15": 0.07179,
+            "16": 0.07185,
+            "17": 0.0714,
+            "18": 0.07139,
+            "19": 0.0717,
+            "20": 0.07106,
+            "21": 0.0716,
+            "22": 0.07218,
+            "23": 0.07161,
+            "24": 0.07166,
+            "25": 0.07144,
+            "26": 0.07156,
+            "27": 0.0718,
+            "28": 0.07207,
+            "29": 0.07096,
+            "30": 0.07235,
+            "31": 0.07223,
+            "32": 0.07219,
+            "33": 0.07195,
+            "34": 0.07232,
+            "35": 0.07433,
+            "36": 0.07598,
+            "37": 0.07242,
+            "38": 0.07166,
+            "39": 0.07174,
+            "40": 0.07148,
+            "41": 0.0722,
+            "42": 0.07169,
+            "43": 0.07213,
+            "44": 0.07193,
+            "45": 0.07163,
+            "46": 0.07302,
+            "47": 0.07199,
+            "48": 0.07329,
+            "49": 0.07491,
+            "50": 0.07339
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..8f65ccec75e
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.89824,
+            "2": 10.90282,
+            "3": 10.89982,
+            "4": 10.86583,
+            "5": 10.88993,
+            "6": 10.9049,
+            "7": 10.89182,
+            "8": 10.90189,
+            "9": 10.88632,
+            "10": 10.88255,
+            "11": 10.91544,
+            "12": 10.90811,
+            "13": 10.91696,
+            "14": 10.92165,
+            "15": 10.86969,
+            "16": 10.8841,
+            "17": 10.87056,
+            "18": 10.88709,
+            "19": 10.87706,
+            "20": 10.84335,
+            "21": 10.83631,
+            "22": 10.76629,
+            "23": 10.83029,
+            "24": 10.79277,
+            "25": 10.75377,
+            "26": 10.78891,
+            "27": 10.79166,
+            "28": 10.74336,
+            "29": 10.75965,
+            "30": 10.62875,
+            "31": 10.45418,
+            "32": 10.68825,
+            "33": 10.68615,
+            "34": 10.52385,
+            "35": 10.56066,
+            "36": 10.53762,
+            "37": 10.60286,
+            "38": 10.46752,
+            "39": 10.60804,
+            "40": 10.36652,
+            "41": 10.38788,
+            "42": 10.45579,
+            "43": 10.15865,
+            "44": 10.24803,
+            "45": 10.15385,
+            "46": 10.13564,
+            "47": 10.39205,
+            "48": 10.1415,
+            "49": 9.88025,
+            "50": 10.18997
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 22727178.0,
+            "2": 22924812.0,
+            "3": 22596704.0,
+            "4": 23218766.0,
+            "5": 22714208.0,
+            "6": 23020316.0,
+            "7": 22771086.0,
+            "8": 22926440.0,
+            "9": 22842352.0,
+            "10": 22918036.0,
+            "11": 22500516.0,
+            "12": 22459304.0,
+            "13": 22916284.0,
+            "14": 22387532.0,
+            "15": 22820856.0,
+            "16": 22830090.0,
+            "17": 22818880.0,
+            "18": 22582012.0,
+            "19": 22616784.0,
+            "20": 22693674.0,
+            "21": 22739360.0,
+            "22": 22799250.0,
+            "23": 22538774.0,
+            "24": 22770954.0,
+            "25": 22818024.0,
+            "26": 22547278.0,
+            "27": 22468476.0,
+            "28": 22452228.0,
+            "29": 22527980.0,
+            "30": 22630720.0,
+            "31": 22954516.0,
+            "32": 22584820.0,
+            "33": 22557266.0,
+            "34": 22834728.0,
+            "35": 22787216.0,
+            "36": 22588668.0,
+            "37": 22496474.0,
+            "38": 22895320.0,
+            "39": 22800062.0,
+            "40": 22657316.0,
+            "41": 22658142.0,
+            "42": 22666692.0,
+            "43": 22974950.0,
+            "44": 22745468.0,
+            "45": 22674868.0,
+            "46": 22883238.0,
+            "47": 22632908.0,
+            "48": 22927884.0,
+            "49": 22727252.0,
+            "50": 22903748.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 638724608.0,
+            "2": 638724608.0,
+            "3": 638724608.0,
+            "4": 638724608.0,
+            "5": 638724608.0,
+            "6": 638724608.0,
+            "7": 638724608.0,
+            "8": 638724608.0,
+            "9": 638724608.0,
+            "10": 638724608.0,
+            "11": 638724608.0,
+            "12": 638724608.0,
+            "13": 638724608.0,
+            "14": 638724608.0,
+            "15": 638724608.0,
+            "16": 638724608.0,
+            "17": 638724608.0,
+            "18": 638724608.0,
+            "19": 638724608.0,
+            "20": 638724608.0,
+            "21": 638724608.0,
+            "22": 638724608.0,
+            "23": 638724608.0,
+            "24": 638724608.0,
+            "25": 638724608.0,
+            "26": 638724608.0,
+            "27": 638724608.0,
+            "28": 638724608.0,
+            "29": 638724608.0,
+            "30": 638724608.0,
+            "31": 638724608.0,
+            "32": 638724608.0,
+            "33": 638724608.0,
+            "34": 638724608.0,
+            "35": 638724608.0,
+            "36": 638724608.0,
+            "37": 638724608.0,
+            "38": 638724608.0,
+            "39": 638724608.0,
+            "40": 638724608.0,
+            "41": 638724608.0,
+            "42": 638724608.0,
+            "43": 638724608.0,
+            "44": 638724608.0,
+            "45": 638724608.0,
+            "46": 638724608.0,
+            "47": 638724608.0,
+            "48": 638724608.0,
+            "49": 638724608.0,
+            "50": 638724608.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 2610025984.0,
+            "2": 2840250880.0,
+            "3": 2840250880.0,
+            "4": 2840250880.0,
+            "5": 2840250880.0,
+            "6": 2840250880.0,
+            "7": 2840250880.0,
+            "8": 2840250880.0,
+            "9": 2840250880.0,
+            "10": 2840250880.0,
+            "11": 2840250880.0,
+            "12": 2840250880.0,
+            "13": 2840250880.0,
+            "14": 2840250880.0,
+            "15": 2840250880.0,
+            "16": 2840250880.0,
+            "17": 2840250880.0,
+            "18": 2840250880.0,
+            "19": 2840250880.0,
+            "20": 2840250880.0,
+            "21": 2840250880.0,
+            "22": 2840250880.0,
+            "23": 2840250880.0,
+            "24": 2840250880.0,
+            "25": 2840250880.0,
+            "26": 2840250880.0,
+            "27": 2840250880.0,
+            "28": 2840250880.0,
+            "29": 2840250880.0,
+            "30": 2840250880.0,
+            "31": 2840250880.0,
+            "32": 2840250880.0,
+            "33": 2840250880.0,
+            "34": 2840250880.0,
+            "35": 2840250880.0,
+            "36": 2840250880.0,
+            "37": 2840250880.0,
+            "38": 2840250880.0,
+            "39": 2840250880.0,
+            "40": 2840250880.0,
+            "41": 2840250880.0,
+            "42": 2840250880.0,
+            "43": 2840250880.0,
+            "44": 2840250880.0,
+            "45": 2840250880.0,
+            "46": 2840250880.0,
+            "47": 2840250880.0,
+            "48": 2840250880.0,
+            "49": 2840250880.0,
+            "50": 2840250880.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 12.45868,
+            "2": 0.10817,
+            "3": 0.08964,
+            "4": 0.08342,
+            "5": 0.08198,
+            "6": 0.08179,
+            "7": 0.08172,
+            "8": 0.08319,
+            "9": 0.07964,
+            "10": 0.07872,
+            "11": 0.07783,
+            "12": 0.07839,
+            "13": 0.07961,
+            "14": 0.07913,
+            "15": 0.08021,
+            "16": 0.07965,
+            "17": 0.07946,
+            "18": 0.07924,
+            "19": 0.0792,
+            "20": 0.07919,
+            "21": 0.07872,
+            "22": 0.07958,
+            "23": 0.07857,
+            "24": 0.0793,
+            "25": 0.07936,
+            "26": 0.07956,
+            "27": 0.07904,
+            "28": 0.07939,
+            "29": 0.08007,
+            "30": 0.07912,
+            "31": 0.07945,
+            "32": 0.07845,
+            "33": 0.07804,
+            "34": 0.07801,
+            "35": 0.07775,
+            "36": 0.07835,
+            "37": 0.0781,
+            "38": 0.07939,
+            "39": 0.07789,
+            "40": 0.07803,
+            "41": 0.07935,
+            "42": 0.07838,
+            "43": 0.07862,
+            "44": 0.07884,
+            "45": 0.07747,
+            "46": 0.07832,
+            "47": 0.07792,
+            "48": 0.07896,
+            "49": 0.07798,
+            "50": 0.0779
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
new file mode 100644
index 00000000000..6c887e9458f
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.84528,
+            "2": 10.85311,
+            "3": 10.85731,
+            "4": 10.84281,
+            "5": 10.87387,
+            "6": 10.88121,
+            "7": 10.8659,
+            "8": 10.84699,
+            "9": 10.86717,
+            "10": 10.83535,
+            "11": 10.91365,
+            "12": 10.87413,
+            "13": 10.86738,
+            "14": 10.89179,
+            "15": 10.84228,
+            "16": 10.84293,
+            "17": 10.81858,
+            "18": 10.85434,
+            "19": 10.85509,
+            "20": 10.80167,
+            "21": 10.79018,
+            "22": 10.72544,
+            "23": 10.8153,
+            "24": 10.74295,
+            "25": 10.71149,
+            "26": 10.77065,
+            "27": 10.78549,
+            "28": 10.73165,
+            "29": 10.75732,
+            "30": 10.58467,
+            "31": 10.4336,
+            "32": 10.68109,
+            "33": 10.66825,
+            "34": 10.49989,
+            "35": 10.53287,
+            "36": 10.52052,
+            "37": 10.59723,
+            "38": 10.45735,
+            "39": 10.62122,
+            "40": 10.35652,
+            "41": 10.40323,
+            "42": 10.45573,
+            "43": 10.11522,
+            "44": 10.24355,
+            "45": 10.13839,
+            "46": 10.11493,
+            "47": 10.39794,
+            "48": 10.14359,
+            "49": 9.89174,
+            "50": 10.20005
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 22726236.0,
+            "2": 22925004.0,
+            "3": 22596304.0,
+            "4": 23218272.0,
+            "5": 22714030.0,
+            "6": 23020852.0,
+            "7": 22770078.0,
+            "8": 22926044.0,
+            "9": 22841056.0,
+            "10": 22918036.0,
+            "11": 22500304.0,
+            "12": 22458314.0,
+            "13": 22916576.0,
+            "14": 22387996.0,
+            "15": 22821520.0,
+            "16": 22830056.0,
+            "17": 22819198.0,
+            "18": 22582774.0,
+            "19": 22617328.0,
+            "20": 22693656.0,
+            "21": 22739808.0,
+            "22": 22798880.0,
+            "23": 22539324.0,
+            "24": 22770360.0,
+            "25": 22819138.0,
+            "26": 22547248.0,
+            "27": 22468282.0,
+            "28": 22452480.0,
+            "29": 22528584.0,
+            "30": 22630790.0,
+            "31": 22954356.0,
+            "32": 22584864.0,
+            "33": 22557742.0,
+            "34": 22834464.0,
+            "35": 22787508.0,
+            "36": 22588878.0,
+            "37": 22496888.0,
+            "38": 22894876.0,
+            "39": 22800580.0,
+            "40": 22657590.0,
+            "41": 22658712.0,
+            "42": 22665704.0,
+            "43": 22975164.0,
+            "44": 22746238.0,
+            "45": 22674508.0,
+            "46": 22883428.0,
+            "47": 22632120.0,
+            "48": 22927616.0,
+            "49": 22726280.0,
+            "50": 22904058.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 609363968.0,
+            "2": 609363968.0,
+            "3": 609363968.0,
+            "4": 609363968.0,
+            "5": 609363968.0,
+            "6": 609363968.0,
+            "7": 609363968.0,
+            "8": 609363968.0,
+            "9": 609363968.0,
+            "10": 609363968.0,
+            "11": 609363968.0,
+            "12": 609363968.0,
+            "13": 609363968.0,
+            "14": 609363968.0,
+            "15": 609363968.0,
+            "16": 609363968.0,
+            "17": 609363968.0,
+            "18": 609363968.0,
+            "19": 609363968.0,
+            "20": 609363968.0,
+            "21": 609363968.0,
+            "22": 609363968.0,
+            "23": 609363968.0,
+            "24": 609363968.0,
+            "25": 609363968.0,
+            "26": 609363968.0,
+            "27": 609363968.0,
+            "28": 609363968.0,
+            "29": 609363968.0,
+            "30": 609363968.0,
+            "31": 609363968.0,
+            "32": 609363968.0,
+            "33": 609363968.0,
+            "34": 609363968.0,
+            "35": 609363968.0,
+            "36": 609363968.0,
+            "37": 609363968.0,
+            "38": 609363968.0,
+            "39": 609363968.0,
+            "40": 609363968.0,
+            "41": 609363968.0,
+            "42": 609363968.0,
+            "43": 609363968.0,
+            "44": 609363968.0,
+            "45": 609363968.0,
+            "46": 609363968.0,
+            "47": 609363968.0,
+            "48": 609363968.0,
+            "49": 609363968.0,
+            "50": 609363968.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 2580665344.0,
+            "2": 2810890240.0,
+            "3": 2810890240.0,
+            "4": 2810890240.0,
+            "5": 2810890240.0,
+            "6": 2810890240.0,
+            "7": 2810890240.0,
+            "8": 2810890240.0,
+            "9": 2810890240.0,
+            "10": 2810890240.0,
+            "11": 2810890240.0,
+            "12": 2810890240.0,
+            "13": 2810890240.0,
+            "14": 2810890240.0,
+            "15": 2810890240.0,
+            "16": 2810890240.0,
+            "17": 2810890240.0,
+            "18": 2810890240.0,
+            "19": 2810890240.0,
+            "20": 2810890240.0,
+            "21": 2810890240.0,
+            "22": 2810890240.0,
+            "23": 2810890240.0,
+            "24": 2810890240.0,
+            "25": 2810890240.0,
+            "26": 2810890240.0,
+            "27": 2810890240.0,
+            "28": 2810890240.0,
+            "29": 2810890240.0,
+            "30": 2810890240.0,
+            "31": 2810890240.0,
+            "32": 2810890240.0,
+            "33": 2810890240.0,
+            "34": 2810890240.0,
+            "35": 2810890240.0,
+            "36": 2810890240.0,
+            "37": 2810890240.0,
+            "38": 2810890240.0,
+            "39": 2810890240.0,
+            "40": 2810890240.0,
+            "41": 2810890240.0,
+            "42": 2810890240.0,
+            "43": 2810890240.0,
+            "44": 2810890240.0,
+            "45": 2810890240.0,
+            "46": 2810890240.0,
+            "47": 2810890240.0,
+            "48": 2810890240.0,
+            "49": 2810890240.0,
+            "50": 2810890240.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 21.45212,
+            "2": 0.14782,
+            "3": 0.12419,
+            "4": 0.12287,
+            "5": 0.12472,
+            "6": 0.12792,
+            "7": 0.11932,
+            "8": 0.12137,
+            "9": 0.11933,
+            "10": 0.11994,
+            "11": 0.11962,
+            "12": 0.11989,
+            "13": 0.11879,
+            "14": 0.11883,
+            "15": 0.11974,
+            "16": 0.1189,
+            "17": 0.121,
+            "18": 0.12116,
+            "19": 0.12032,
+            "20": 0.1212,
+            "21": 0.11987,
+            "22": 0.1217,
+            "23": 0.12108,
+            "24": 0.12179,
+            "25": 0.12038,
+            "26": 0.11988,
+            "27": 0.12062,
+            "28": 0.12611,
+            "29": 0.11789,
+            "30": 0.11799,
+            "31": 0.11768,
+            "32": 0.11881,
+            "33": 0.11737,
+            "34": 0.11841,
+            "35": 0.11781,
+            "36": 0.11854,
+            "37": 0.1174,
+            "38": 0.11872,
+            "39": 0.11623,
+            "40": 0.1178,
+            "41": 0.11984,
+            "42": 0.11948,
+            "43": 0.12006,
+            "44": 0.11861,
+            "45": 0.11968,
+            "46": 0.12944,
+            "47": 0.11845,
+            "48": 0.12012,
+            "49": 0.11921,
+            "50": 0.11821
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
new file mode 100644
index 00000000000..c213f354c2a
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.84528,
+            "2": 10.85311,
+            "3": 10.85731,
+            "4": 10.84281,
+            "5": 10.87387,
+            "6": 10.88121,
+            "7": 10.8659,
+            "8": 10.84699,
+            "9": 10.86717,
+            "10": 10.83535,
+            "11": 10.91365,
+            "12": 10.87413,
+            "13": 10.86738,
+            "14": 10.89179,
+            "15": 10.84228,
+            "16": 10.84293,
+            "17": 10.81858,
+            "18": 10.85434,
+            "19": 10.85509,
+            "20": 10.80167,
+            "21": 10.79018,
+            "22": 10.72544,
+            "23": 10.8153,
+            "24": 10.74295,
+            "25": 10.71149,
+            "26": 10.77065,
+            "27": 10.78549,
+            "28": 10.73165,
+            "29": 10.75732,
+            "30": 10.58467,
+            "31": 10.4336,
+            "32": 10.68109,
+            "33": 10.66825,
+            "34": 10.49989,
+            "35": 10.53287,
+            "36": 10.52052,
+            "37": 10.59723,
+            "38": 10.45735,
+            "39": 10.62122,
+            "40": 10.35652,
+            "41": 10.40323,
+            "42": 10.45573,
+            "43": 10.11522,
+            "44": 10.24355,
+            "45": 10.13839,
+            "46": 10.11493,
+            "47": 10.39794,
+            "48": 10.14359,
+            "49": 9.89174,
+            "50": 10.20005
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 22726236.0,
+            "2": 22925004.0,
+            "3": 22596304.0,
+            "4": 23218272.0,
+            "5": 22714030.0,
+            "6": 23020852.0,
+            "7": 22770078.0,
+            "8": 22926044.0,
+            "9": 22841056.0,
+            "10": 22918036.0,
+            "11": 22500304.0,
+            "12": 22458314.0,
+            "13": 22916576.0,
+            "14": 22387996.0,
+            "15": 22821520.0,
+            "16": 22830056.0,
+            "17": 22819198.0,
+            "18": 22582774.0,
+            "19": 22617328.0,
+            "20": 22693656.0,
+            "21": 22739808.0,
+            "22": 22798880.0,
+            "23": 22539324.0,
+            "24": 22770360.0,
+            "25": 22819138.0,
+            "26": 22547248.0,
+            "27": 22468282.0,
+            "28": 22452480.0,
+            "29": 22528584.0,
+            "30": 22630790.0,
+            "31": 22954356.0,
+            "32": 22584864.0,
+            "33": 22557742.0,
+            "34": 22834464.0,
+            "35": 22787508.0,
+            "36": 22588878.0,
+            "37": 22496888.0,
+            "38": 22894876.0,
+            "39": 22800580.0,
+            "40": 22657590.0,
+            "41": 22658712.0,
+            "42": 22665704.0,
+            "43": 22975164.0,
+            "44": 22746238.0,
+            "45": 22674508.0,
+            "46": 22883428.0,
+            "47": 22632120.0,
+            "48": 22927616.0,
+            "49": 22726280.0,
+            "50": 22904058.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 609363968.0,
+            "2": 609363968.0,
+            "3": 609363968.0,
+            "4": 609363968.0,
+            "5": 609363968.0,
+            "6": 609363968.0,
+            "7": 609363968.0,
+            "8": 609363968.0,
+            "9": 609363968.0,
+            "10": 609363968.0,
+            "11": 609363968.0,
+            "12": 609363968.0,
+            "13": 609363968.0,
+            "14": 609363968.0,
+            "15": 609363968.0,
+            "16": 609363968.0,
+            "17": 609363968.0,
+            "18": 609363968.0,
+            "19": 609363968.0,
+            "20": 609363968.0,
+            "21": 609363968.0,
+            "22": 609363968.0,
+            "23": 609363968.0,
+            "24": 609363968.0,
+            "25": 609363968.0,
+            "26": 609363968.0,
+            "27": 609363968.0,
+            "28": 609363968.0,
+            "29": 609363968.0,
+            "30": 609363968.0,
+            "31": 609363968.0,
+            "32": 609363968.0,
+            "33": 609363968.0,
+            "34": 609363968.0,
+            "35": 609363968.0,
+            "36": 609363968.0,
+            "37": 609363968.0,
+            "38": 609363968.0,
+            "39": 609363968.0,
+            "40": 609363968.0,
+            "41": 609363968.0,
+            "42": 609363968.0,
+            "43": 609363968.0,
+            "44": 609363968.0,
+            "45": 609363968.0,
+            "46": 609363968.0,
+            "47": 609363968.0,
+            "48": 609363968.0,
+            "49": 609363968.0,
+            "50": 609363968.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 2580665344.0,
+            "2": 2810890240.0,
+            "3": 2810890240.0,
+            "4": 2810890240.0,
+            "5": 2810890240.0,
+            "6": 2810890240.0,
+            "7": 2810890240.0,
+            "8": 2810890240.0,
+            "9": 2810890240.0,
+            "10": 2810890240.0,
+            "11": 2810890240.0,
+            "12": 2810890240.0,
+            "13": 2810890240.0,
+            "14": 2810890240.0,
+            "15": 2810890240.0,
+            "16": 2810890240.0,
+            "17": 2810890240.0,
+            "18": 2810890240.0,
+            "19": 2810890240.0,
+            "20": 2810890240.0,
+            "21": 2810890240.0,
+            "22": 2810890240.0,
+            "23": 2810890240.0,
+            "24": 2810890240.0,
+            "25": 2810890240.0,
+            "26": 2810890240.0,
+            "27": 2810890240.0,
+            "28": 2810890240.0,
+            "29": 2810890240.0,
+            "30": 2810890240.0,
+            "31": 2810890240.0,
+            "32": 2810890240.0,
+            "33": 2810890240.0,
+            "34": 2810890240.0,
+            "35": 2810890240.0,
+            "36": 2810890240.0,
+            "37": 2810890240.0,
+            "38": 2810890240.0,
+            "39": 2810890240.0,
+            "40": 2810890240.0,
+            "41": 2810890240.0,
+            "42": 2810890240.0,
+            "43": 2810890240.0,
+            "44": 2810890240.0,
+            "45": 2810890240.0,
+            "46": 2810890240.0,
+            "47": 2810890240.0,
+            "48": 2810890240.0,
+            "49": 2810890240.0,
+            "50": 2810890240.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 17.94763,
+            "2": 0.1464,
+            "3": 0.12192,
+            "4": 0.12042,
+            "5": 0.12369,
+            "6": 0.1197,
+            "7": 0.12002,
+            "8": 0.12026,
+            "9": 0.11856,
+            "10": 0.11993,
+            "11": 0.11958,
+            "12": 0.11934,
+            "13": 0.11858,
+            "14": 0.11928,
+            "15": 0.11863,
+            "16": 0.11911,
+            "17": 0.11905,
+            "18": 0.12098,
+            "19": 0.11814,
+            "20": 0.11768,
+            "21": 0.11925,
+            "22": 0.11811,
+            "23": 0.11686,
+            "24": 0.11706,
+            "25": 0.11682,
+            "26": 0.11906,
+            "27": 0.11759,
+            "28": 0.11866,
+            "29": 0.11785,
+            "30": 0.11772,
+            "31": 0.11912,
+            "32": 0.118,
+            "33": 0.11808,
+            "34": 0.1174,
+            "35": 0.11853,
+            "36": 0.1174,
+            "37": 0.11808,
+            "38": 0.1194,
+            "39": 0.11749,
+            "40": 0.11871,
+            "41": 0.11887,
+            "42": 0.11731,
+            "43": 0.11929,
+            "44": 0.11811,
+            "45": 0.11913,
+            "46": 0.11806,
+            "47": 0.11686,
+            "48": 0.11726,
+            "49": 0.11729,
+            "50": 0.11729
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
index 3a679ee1d68..b668521f995 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
@@ -2,141 +2,536 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 10.85163,
-            "5": 10.8787,
-            "10": 10.80636,
-            "15": 10.81034,
-            "20": 10.68692,
-            "25": 10.49703,
-            "30": 10.32668,
-            "35": 10.2249,
-            "40": 10.04381,
+            "2": 10.85389,
+            "3": 10.83867,
+            "4": 10.84326,
+            "5": 10.87865,
+            "6": 10.87589,
+            "7": 10.86185,
+            "8": 10.84926,
+            "9": 10.84876,
+            "10": 10.80639,
+            "11": 10.88684,
+            "12": 10.85677,
+            "13": 10.86234,
+            "14": 10.87768,
+            "15": 10.81036,
+            "16": 10.81987,
+            "17": 10.78281,
+            "18": 10.80322,
+            "19": 10.78354,
+            "20": 10.6869,
+            "21": 10.66901,
+            "22": 10.5231,
+            "23": 10.68441,
+            "24": 10.56577,
+            "25": 10.49701,
+            "26": 10.5655,
+            "27": 10.58174,
+            "28": 10.52997,
+            "29": 10.55562,
+            "30": 10.32673,
+            "31": 10.07635,
+            "32": 10.43058,
+            "33": 10.42459,
+            "34": 10.16648,
+            "35": 10.22488,
+            "36": 10.1834,
+            "37": 10.29955,
+            "38": 10.145,
+            "39": 10.37068,
+            "40": 10.04384,
+            "41": 10.09449,
+            "42": 10.1738,
+            "43": 9.77535,
+            "44": 9.90309,
             "45": 9.77899,
-            "50": 9.85789,
-            "55": 9.83807,
-            "60": 9.44187,
-            "65": 8.88428,
-            "70": 9.70474,
+            "46": 9.76547,
+            "47": 10.1072,
+            "48": 9.80031,
+            "49": 9.47524,
+            "50": 9.85793,
+            "51": 9.80033,
+            "52": 9.69511,
+            "53": 10.02851,
+            "54": 9.91434,
+            "55": 9.83811,
+            "56": 9.57832,
+            "57": 9.42584,
+            "58": 9.79169,
+            "59": 9.53621,
+            "60": 9.44188,
+            "61": 9.65656,
+            "62": 9.9438,
+            "63": 9.32147,
+            "64": 9.73338,
+            "65": 8.88431,
+            "66": 9.65528,
+            "67": 9.32102,
+            "68": 9.75063,
+            "69": 9.76395,
+            "70": 9.70471,
+            "71": 9.56858,
+            "72": 9.53902,
+            "73": 9.45226,
+            "74": 8.87734,
             "75": 9.37931,
-            "80": 9.36592,
-            "85": 9.57422,
-            "90": 9.78804,
-            "95": 9.48833,
-            "100": 9.35873
+            "76": 9.01864,
+            "77": 10.0352,
+            "78": 9.69265,
+            "79": 9.33457,
+            "80": 9.36591,
+            "81": 9.4392,
+            "82": 9.66576,
+            "83": 9.25445,
+            "84": 9.37801,
+            "85": 9.57423,
+            "86": 9.03279,
+            "87": 9.55778,
+            "88": 9.71526,
+            "89": 9.55706,
+            "90": 9.78807,
+            "91": 9.29512,
+            "92": 9.31513,
+            "93": 9.03245,
+            "94": 8.79084,
+            "95": 9.48837,
+            "96": 9.49575,
+            "97": 9.27132,
+            "98": 9.64072,
+            "99": 8.84738,
+            "100": 9.3587
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 619.0,
-            "5": 646.0,
-            "10": 582.0,
-            "15": 710.0,
-            "20": 672.0,
-            "25": 605.0,
-            "30": 745.0,
-            "35": 753.0,
-            "40": 797.0,
-            "45": 727.0,
-            "50": 852.0,
-            "55": 882.0,
-            "60": 892.0,
-            "65": 934.0,
-            "70": 1066.0,
-            "75": 928.0,
-            "80": 1058.0,
-            "85": 1127.0,
-            "90": 1130.0,
-            "95": 1034.0,
-            "100": 1064.0
+            "1": 604.0,
+            "2": 601.0,
+            "3": 657.0,
+            "4": 631.0,
+            "5": 677.0,
+            "6": 630.0,
+            "7": 662.0,
+            "8": 607.0,
+            "9": 614.0,
+            "10": 588.0,
+            "11": 713.0,
+            "12": 679.0,
+            "13": 667.0,
+            "14": 649.0,
+            "15": 667.0,
+            "16": 659.0,
+            "17": 681.0,
+            "18": 674.0,
+            "19": 586.0,
+            "20": 668.0,
+            "21": 679.0,
+            "22": 646.0,
+            "23": 757.0,
+            "24": 633.0,
+            "25": 653.0,
+            "26": 662.0,
+            "27": 682.0,
+            "28": 746.0,
+            "29": 758.0,
+            "30": 711.0,
+            "31": 645.0,
+            "32": 705.0,
+            "33": 759.0,
+            "34": 667.0,
+            "35": 745.0,
+            "36": 744.0,
+            "37": 799.0,
+            "38": 781.0,
+            "39": 903.0,
+            "40": 806.0,
+            "41": 804.0,
+            "42": 853.0,
+            "43": 651.0,
+            "44": 817.0,
+            "45": 834.0,
+            "46": 842.0,
+            "47": 859.0,
+            "48": 846.0,
+            "49": 831.0,
+            "50": 774.0,
+            "51": 927.0,
+            "52": 907.0,
+            "53": 981.0,
+            "54": 884.0,
+            "55": 858.0,
+            "56": 950.0,
+            "57": 885.0,
+            "58": 961.0,
+            "59": 949.0,
+            "60": 837.0,
+            "61": 953.0,
+            "62": 907.0,
+            "63": 911.0,
+            "64": 1085.0,
+            "65": 964.0,
+            "66": 1054.0,
+            "67": 1008.0,
+            "68": 975.0,
+            "69": 1027.0,
+            "70": 1025.0,
+            "71": 1093.0,
+            "72": 882.0,
+            "73": 988.0,
+            "74": 685.0,
+            "75": 857.0,
+            "76": 1040.0,
+            "77": 1138.0,
+            "78": 1115.0,
+            "79": 1049.0,
+            "80": 1127.0,
+            "81": 1260.0,
+            "82": 1089.0,
+            "83": 1000.0,
+            "84": 1123.0,
+            "85": 1179.0,
+            "86": 927.0,
+            "87": 1264.0,
+            "88": 1041.0,
+            "89": 1165.0,
+            "90": 1105.0,
+            "91": 1136.0,
+            "92": 1151.0,
+            "93": 880.0,
+            "94": 1183.0,
+            "95": 1125.0,
+            "96": 1202.0,
+            "97": 1026.0,
+            "98": 1189.0,
+            "99": 1171.0,
+            "100": 1097.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 689356288.0,
-            "5": 689356288.0,
-            "10": 689356288.0,
-            "15": 689356288.0,
-            "20": 689356288.0,
-            "25": 689356288.0,
-            "30": 689356288.0,
-            "35": 689356288.0,
-            "40": 689356288.0,
-            "45": 689356288.0,
-            "50": 689356288.0,
-            "55": 689356288.0,
-            "60": 689356288.0,
-            "65": 689356288.0,
-            "70": 689356288.0,
-            "75": 689356288.0,
-            "80": 689356288.0,
-            "85": 689356288.0,
-            "90": 689356288.0,
-            "95": 689356288.0,
-            "100": 689356288.0
+            "1": 689618432.0,
+            "2": 689618432.0,
+            "3": 689618432.0,
+            "4": 689618432.0,
+            "5": 689618432.0,
+            "6": 689618432.0,
+            "7": 689618432.0,
+            "8": 689618432.0,
+            "9": 689618432.0,
+            "10": 689618432.0,
+            "11": 689618432.0,
+            "12": 689618432.0,
+            "13": 689618432.0,
+            "14": 689618432.0,
+            "15": 689618432.0,
+            "16": 689618432.0,
+            "17": 689618432.0,
+            "18": 689618432.0,
+            "19": 689618432.0,
+            "20": 689618432.0,
+            "21": 689618432.0,
+            "22": 689618432.0,
+            "23": 689618432.0,
+            "24": 689618432.0,
+            "25": 689618432.0,
+            "26": 689618432.0,
+            "27": 689618432.0,
+            "28": 689618432.0,
+            "29": 689618432.0,
+            "30": 689618432.0,
+            "31": 689618432.0,
+            "32": 689618432.0,
+            "33": 689618432.0,
+            "34": 689618432.0,
+            "35": 689618432.0,
+            "36": 689618432.0,
+            "37": 689618432.0,
+            "38": 689618432.0,
+            "39": 689618432.0,
+            "40": 689618432.0,
+            "41": 689618432.0,
+            "42": 689618432.0,
+            "43": 689618432.0,
+            "44": 689618432.0,
+            "45": 689618432.0,
+            "46": 689618432.0,
+            "47": 689618432.0,
+            "48": 689618432.0,
+            "49": 689618432.0,
+            "50": 689618432.0,
+            "51": 689618432.0,
+            "52": 689618432.0,
+            "53": 689618432.0,
+            "54": 689618432.0,
+            "55": 689618432.0,
+            "56": 689618432.0,
+            "57": 689618432.0,
+            "58": 689618432.0,
+            "59": 689618432.0,
+            "60": 689618432.0,
+            "61": 689618432.0,
+            "62": 689618432.0,
+            "63": 689618432.0,
+            "64": 689618432.0,
+            "65": 689618432.0,
+            "66": 689618432.0,
+            "67": 689618432.0,
+            "68": 689618432.0,
+            "69": 689618432.0,
+            "70": 689618432.0,
+            "71": 689618432.0,
+            "72": 689618432.0,
+            "73": 689618432.0,
+            "74": 689618432.0,
+            "75": 689618432.0,
+            "76": 689618432.0,
+            "77": 689618432.0,
+            "78": 689618432.0,
+            "79": 689618432.0,
+            "80": 689618432.0,
+            "81": 689618432.0,
+            "82": 689618432.0,
+            "83": 689618432.0,
+            "84": 689618432.0,
+            "85": 689618432.0,
+            "86": 689618432.0,
+            "87": 689618432.0,
+            "88": 689618432.0,
+            "89": 689618432.0,
+            "90": 689618432.0,
+            "91": 689618432.0,
+            "92": 689618432.0,
+            "93": 689618432.0,
+            "94": 689618432.0,
+            "95": 689618432.0,
+            "96": 689618432.0,
+            "97": 689618432.0,
+            "98": 689618432.0,
+            "99": 689618432.0,
+            "100": 689618432.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 961750016.0,
-            "5": 1220176384.0,
-            "10": 1223321600.0,
-            "15": 1223321600.0,
-            "20": 1223321600.0,
-            "25": 1223321600.0,
-            "30": 1223321600.0,
-            "35": 1223321600.0,
-            "40": 1223321600.0,
-            "45": 1223321600.0,
-            "50": 1223321600.0,
-            "55": 1223321600.0,
-            "60": 1223321600.0,
-            "65": 1223321600.0,
-            "70": 1223321600.0,
-            "75": 1223321600.0,
-            "80": 1223321600.0,
-            "85": 1223321600.0,
-            "90": 1223321600.0,
-            "95": 1223321600.0,
-            "100": 1223321600.0
+            "1": 959652864.0,
+            "2": 1220175872.0,
+            "3": 1221224448.0,
+            "4": 1221224448.0,
+            "5": 1221224448.0,
+            "6": 1221224448.0,
+            "7": 1221224448.0,
+            "8": 1221224448.0,
+            "9": 1221224448.0,
+            "10": 1221224448.0,
+            "11": 1221224448.0,
+            "12": 1221224448.0,
+            "13": 1221224448.0,
+            "14": 1221224448.0,
+            "15": 1221224448.0,
+            "16": 1221224448.0,
+            "17": 1221224448.0,
+            "18": 1221224448.0,
+            "19": 1221224448.0,
+            "20": 1221224448.0,
+            "21": 1221224448.0,
+            "22": 1221224448.0,
+            "23": 1221224448.0,
+            "24": 1221224448.0,
+            "25": 1221224448.0,
+            "26": 1221224448.0,
+            "27": 1221224448.0,
+            "28": 1221224448.0,
+            "29": 1221224448.0,
+            "30": 1221224448.0,
+            "31": 1221224448.0,
+            "32": 1221224448.0,
+            "33": 1221224448.0,
+            "34": 1221224448.0,
+            "35": 1221224448.0,
+            "36": 1221224448.0,
+            "37": 1221224448.0,
+            "38": 1221224448.0,
+            "39": 1221224448.0,
+            "40": 1221224448.0,
+            "41": 1221224448.0,
+            "42": 1221224448.0,
+            "43": 1221224448.0,
+            "44": 1221224448.0,
+            "45": 1221224448.0,
+            "46": 1221224448.0,
+            "47": 1221224448.0,
+            "48": 1221224448.0,
+            "49": 1221224448.0,
+            "50": 1221224448.0,
+            "51": 1221486080.0,
+            "52": 1221486080.0,
+            "53": 1221486080.0,
+            "54": 1221486080.0,
+            "55": 1221486080.0,
+            "56": 1221486080.0,
+            "57": 1221486080.0,
+            "58": 1221486080.0,
+            "59": 1221486080.0,
+            "60": 1221486080.0,
+            "61": 1221486080.0,
+            "62": 1221486080.0,
+            "63": 1221486080.0,
+            "64": 1221486080.0,
+            "65": 1221486080.0,
+            "66": 1221486080.0,
+            "67": 1221486080.0,
+            "68": 1221486080.0,
+            "69": 1221487104.0,
+            "70": 1221487104.0,
+            "71": 1221487104.0,
+            "72": 1221487104.0,
+            "73": 1221487104.0,
+            "74": 1221487104.0,
+            "75": 1221487104.0,
+            "76": 1221487104.0,
+            "77": 1221487104.0,
+            "78": 1221487104.0,
+            "79": 1221487104.0,
+            "80": 1221487104.0,
+            "81": 1221487104.0,
+            "82": 1221487104.0,
+            "83": 1221487104.0,
+            "84": 1221487104.0,
+            "85": 1221487104.0,
+            "86": 1221487104.0,
+            "87": 1221487104.0,
+            "88": 1221487104.0,
+            "89": 1221487104.0,
+            "90": 1221487104.0,
+            "91": 1221487104.0,
+            "92": 1221487104.0,
+            "93": 1221487104.0,
+            "94": 1221487104.0,
+            "95": 1221487104.0,
+            "96": 1221487104.0,
+            "97": 1221487104.0,
+            "98": 1221487104.0,
+            "99": 1221487104.0,
+            "100": 1221487104.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 9.33137,
-            "5": 0.24439,
-            "10": 0.24539,
-            "15": 0.24239,
-            "20": 0.24713,
-            "25": 0.24683,
-            "30": 0.24516,
-            "35": 0.24456,
-            "40": 0.25161,
-            "45": 0.24886,
-            "50": 0.24548,
-            "55": 0.25414,
-            "60": 0.24546,
-            "65": 0.25395,
-            "70": 0.24573,
-            "75": 0.24821,
-            "80": 0.25298,
-            "85": 0.2568,
-            "90": 0.24531,
-            "95": 0.24617,
-            "100": 0.25395
+            "1": 10.63286,
+            "2": 0.29932,
+            "3": 0.28799,
+            "4": 0.28475,
+            "5": 0.28729,
+            "6": 0.28613,
+            "7": 0.28182,
+            "8": 0.28376,
+            "9": 0.28071,
+            "10": 0.28064,
+            "11": 0.28008,
+            "12": 0.27999,
+            "13": 0.27369,
+            "14": 0.27735,
+            "15": 0.27802,
+            "16": 0.27647,
+            "17": 0.28017,
+            "18": 0.27624,
+            "19": 0.27907,
+            "20": 0.28457,
+            "21": 0.28621,
+            "22": 0.27968,
+            "23": 0.2788,
+            "24": 0.27704,
+            "25": 0.27774,
+            "26": 0.27744,
+            "27": 0.27759,
+            "28": 0.27978,
+            "29": 0.28051,
+            "30": 0.28034,
+            "31": 0.27733,
+            "32": 0.27813,
+            "33": 0.27733,
+            "34": 0.28166,
+            "35": 0.27601,
+            "36": 0.27766,
+            "37": 0.27784,
+            "38": 0.27709,
+            "39": 0.2776,
+            "40": 0.27758,
+            "41": 0.27975,
+            "42": 0.27633,
+            "43": 0.27864,
+            "44": 0.27802,
+            "45": 0.27955,
+            "46": 0.27725,
+            "47": 0.27926,
+            "48": 0.28083,
+            "49": 0.2781,
+            "50": 0.27962,
+            "51": 0.30289,
+            "52": 0.2758,
+            "53": 0.27484,
+            "54": 0.29013,
+            "55": 0.28835,
+            "56": 0.274,
+            "57": 0.27512,
+            "58": 0.27238,
+            "59": 0.27429,
+            "60": 0.27435,
+            "61": 0.27493,
+            "62": 0.27237,
+            "63": 0.27125,
+            "64": 0.27873,
+            "65": 0.27559,
+            "66": 0.27509,
+            "67": 0.27136,
+            "68": 0.27248,
+            "69": 0.27308,
+            "70": 0.27367,
+            "71": 0.27224,
+            "72": 0.27404,
+            "73": 0.27347,
+            "74": 0.27274,
+            "75": 0.27659,
+            "76": 0.27508,
+            "77": 0.27421,
+            "78": 0.27262,
+            "79": 0.27496,
+            "80": 0.27635,
+            "81": 0.60573,
+            "82": 0.27646,
+            "83": 0.27511,
+            "84": 0.27432,
+            "85": 0.27697,
+            "86": 0.27845,
+            "87": 0.27696,
+            "88": 0.27613,
+            "89": 0.28436,
+            "90": 0.27824,
+            "91": 0.27389,
+            "92": 0.27309,
+            "93": 0.27377,
+            "94": 0.27986,
+            "95": 0.27303,
+            "96": 0.2751,
+            "97": 0.2752,
+            "98": 0.27677,
+            "99": 0.27534,
+            "100": 0.27167
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..3a7a72a10c2
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.85163,
+            "2": 10.85389,
+            "3": 10.83863,
+            "4": 10.84324,
+            "5": 10.87867,
+            "6": 10.87588,
+            "7": 10.86181,
+            "8": 10.84924,
+            "9": 10.84875,
+            "10": 10.80634,
+            "11": 10.8868,
+            "12": 10.8568,
+            "13": 10.86235,
+            "14": 10.87766,
+            "15": 10.81037,
+            "16": 10.8198,
+            "17": 10.7828,
+            "18": 10.80323,
+            "19": 10.78353,
+            "20": 10.6869,
+            "21": 10.66905,
+            "22": 10.52312,
+            "23": 10.68437,
+            "24": 10.56579,
+            "25": 10.49701,
+            "26": 10.56552,
+            "27": 10.58172,
+            "28": 10.52997,
+            "29": 10.55561,
+            "30": 10.32668,
+            "31": 10.07633,
+            "32": 10.43056,
+            "33": 10.42454,
+            "34": 10.16648,
+            "35": 10.22486,
+            "36": 10.18345,
+            "37": 10.29955,
+            "38": 10.14498,
+            "39": 10.37064,
+            "40": 10.04385,
+            "41": 10.09446,
+            "42": 10.1738,
+            "43": 9.77535,
+            "44": 9.9031,
+            "45": 9.779,
+            "46": 9.76548,
+            "47": 10.10718,
+            "48": 9.80028,
+            "49": 9.4752,
+            "50": 9.85787,
+            "51": 9.80034,
+            "52": 9.69507,
+            "53": 10.0285,
+            "54": 9.91432,
+            "55": 9.83807,
+            "56": 9.57827,
+            "57": 9.42584,
+            "58": 9.79171,
+            "59": 9.53621,
+            "60": 9.44186,
+            "61": 9.65655,
+            "62": 9.94377,
+            "63": 9.32146,
+            "64": 9.7334,
+            "65": 8.88429,
+            "66": 9.65527,
+            "67": 9.321,
+            "68": 9.75066,
+            "69": 9.76398,
+            "70": 9.70468,
+            "71": 9.56857,
+            "72": 9.53903,
+            "73": 9.45227,
+            "74": 8.87742,
+            "75": 9.37933,
+            "76": 9.0186,
+            "77": 10.03521,
+            "78": 9.69265,
+            "79": 9.33456,
+            "80": 9.36592,
+            "81": 9.4392,
+            "82": 9.66571,
+            "83": 9.25447,
+            "84": 9.378,
+            "85": 9.57419,
+            "86": 9.03278,
+            "87": 9.55776,
+            "88": 9.71523,
+            "89": 9.55706,
+            "90": 9.78804,
+            "91": 9.29518,
+            "92": 9.31513,
+            "93": 9.03243,
+            "94": 8.79087,
+            "95": 9.48835,
+            "96": 9.49572,
+            "97": 9.27133,
+            "98": 9.64071,
+            "99": 8.84737,
+            "100": 9.35871
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 627.0,
+            "2": 608.0,
+            "3": 673.0,
+            "4": 679.0,
+            "5": 640.0,
+            "6": 694.0,
+            "7": 628.0,
+            "8": 602.0,
+            "9": 653.0,
+            "10": 534.0,
+            "11": 712.0,
+            "12": 631.0,
+            "13": 674.0,
+            "14": 682.0,
+            "15": 711.0,
+            "16": 655.0,
+            "17": 720.0,
+            "18": 660.0,
+            "19": 641.0,
+            "20": 653.0,
+            "21": 651.0,
+            "22": 628.0,
+            "23": 722.0,
+            "24": 647.0,
+            "25": 682.0,
+            "26": 658.0,
+            "27": 655.0,
+            "28": 725.0,
+            "29": 794.0,
+            "30": 729.0,
+            "31": 632.0,
+            "32": 733.0,
+            "33": 803.0,
+            "34": 704.0,
+            "35": 728.0,
+            "36": 797.0,
+            "37": 839.0,
+            "38": 830.0,
+            "39": 885.0,
+            "40": 788.0,
+            "41": 878.0,
+            "42": 897.0,
+            "43": 770.0,
+            "44": 867.0,
+            "45": 735.0,
+            "46": 812.0,
+            "47": 884.0,
+            "48": 879.0,
+            "49": 828.0,
+            "50": 812.0,
+            "51": 896.0,
+            "52": 876.0,
+            "53": 976.0,
+            "54": 939.0,
+            "55": 875.0,
+            "56": 951.0,
+            "57": 865.0,
+            "58": 1011.0,
+            "59": 947.0,
+            "60": 786.0,
+            "61": 1059.0,
+            "62": 920.0,
+            "63": 917.0,
+            "64": 1022.0,
+            "65": 940.0,
+            "66": 1052.0,
+            "67": 994.0,
+            "68": 1024.0,
+            "69": 980.0,
+            "70": 1046.0,
+            "71": 1132.0,
+            "72": 911.0,
+            "73": 1006.0,
+            "74": 688.0,
+            "75": 889.0,
+            "76": 972.0,
+            "77": 1162.0,
+            "78": 1045.0,
+            "79": 1008.0,
+            "80": 1089.0,
+            "81": 1209.0,
+            "82": 1067.0,
+            "83": 999.0,
+            "84": 1135.0,
+            "85": 1194.0,
+            "86": 936.0,
+            "87": 1271.0,
+            "88": 1144.0,
+            "89": 1099.0,
+            "90": 1140.0,
+            "91": 1115.0,
+            "92": 1127.0,
+            "93": 961.0,
+            "94": 1203.0,
+            "95": 1140.0,
+            "96": 1177.0,
+            "97": 1055.0,
+            "98": 1335.0,
+            "99": 1164.0,
+            "100": 1093.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 689356288.0,
+            "2": 689356288.0,
+            "3": 689356288.0,
+            "4": 689356288.0,
+            "5": 689356288.0,
+            "6": 689356288.0,
+            "7": 689356288.0,
+            "8": 689356288.0,
+            "9": 689356288.0,
+            "10": 689356288.0,
+            "11": 689356288.0,
+            "12": 689356288.0,
+            "13": 689356288.0,
+            "14": 689356288.0,
+            "15": 689356288.0,
+            "16": 689356288.0,
+            "17": 689356288.0,
+            "18": 689356288.0,
+            "19": 689356288.0,
+            "20": 689356288.0,
+            "21": 689356288.0,
+            "22": 689356288.0,
+            "23": 689356288.0,
+            "24": 689356288.0,
+            "25": 689356288.0,
+            "26": 689356288.0,
+            "27": 689356288.0,
+            "28": 689356288.0,
+            "29": 689356288.0,
+            "30": 689356288.0,
+            "31": 689356288.0,
+            "32": 689356288.0,
+            "33": 689356288.0,
+            "34": 689356288.0,
+            "35": 689356288.0,
+            "36": 689356288.0,
+            "37": 689356288.0,
+            "38": 689356288.0,
+            "39": 689356288.0,
+            "40": 689356288.0,
+            "41": 689356288.0,
+            "42": 689356288.0,
+            "43": 689356288.0,
+            "44": 689356288.0,
+            "45": 689356288.0,
+            "46": 689356288.0,
+            "47": 689356288.0,
+            "48": 689356288.0,
+            "49": 689356288.0,
+            "50": 689356288.0,
+            "51": 689356288.0,
+            "52": 689356288.0,
+            "53": 689356288.0,
+            "54": 689356288.0,
+            "55": 689356288.0,
+            "56": 689356288.0,
+            "57": 689356288.0,
+            "58": 689356288.0,
+            "59": 689356288.0,
+            "60": 689356288.0,
+            "61": 689356288.0,
+            "62": 689356288.0,
+            "63": 689356288.0,
+            "64": 689356288.0,
+            "65": 689356288.0,
+            "66": 689356288.0,
+            "67": 689356288.0,
+            "68": 689356288.0,
+            "69": 689356288.0,
+            "70": 689356288.0,
+            "71": 689356288.0,
+            "72": 689356288.0,
+            "73": 689356288.0,
+            "74": 689356288.0,
+            "75": 689356288.0,
+            "76": 689356288.0,
+            "77": 689356288.0,
+            "78": 689356288.0,
+            "79": 689356288.0,
+            "80": 689356288.0,
+            "81": 689356288.0,
+            "82": 689356288.0,
+            "83": 689356288.0,
+            "84": 689356288.0,
+            "85": 689356288.0,
+            "86": 689356288.0,
+            "87": 689356288.0,
+            "88": 689356288.0,
+            "89": 689356288.0,
+            "90": 689356288.0,
+            "91": 689356288.0,
+            "92": 689356288.0,
+            "93": 689356288.0,
+            "94": 689356288.0,
+            "95": 689356288.0,
+            "96": 689356288.0,
+            "97": 689356288.0,
+            "98": 689356288.0,
+            "99": 689356288.0,
+            "100": 689356288.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 962798592.0,
+            "2": 1220175872.0,
+            "3": 1220175872.0,
+            "4": 1220175872.0,
+            "5": 1220175872.0,
+            "6": 1220175872.0,
+            "7": 1220175872.0,
+            "8": 1220175872.0,
+            "9": 1220175872.0,
+            "10": 1220175872.0,
+            "11": 1220175872.0,
+            "12": 1220175872.0,
+            "13": 1220175872.0,
+            "14": 1220175872.0,
+            "15": 1220175872.0,
+            "16": 1220175872.0,
+            "17": 1220175872.0,
+            "18": 1220175872.0,
+            "19": 1220175872.0,
+            "20": 1220175872.0,
+            "21": 1220175872.0,
+            "22": 1220175872.0,
+            "23": 1220175872.0,
+            "24": 1220175872.0,
+            "25": 1220175872.0,
+            "26": 1221224960.0,
+            "27": 1221224960.0,
+            "28": 1221224960.0,
+            "29": 1221224960.0,
+            "30": 1221224960.0,
+            "31": 1221224960.0,
+            "32": 1221224960.0,
+            "33": 1221224960.0,
+            "34": 1221224960.0,
+            "35": 1221224960.0,
+            "36": 1221224960.0,
+            "37": 1221224960.0,
+            "38": 1221224960.0,
+            "39": 1221224960.0,
+            "40": 1221224960.0,
+            "41": 1221224960.0,
+            "42": 1221224960.0,
+            "43": 1221224960.0,
+            "44": 1221224960.0,
+            "45": 1221224960.0,
+            "46": 1221224960.0,
+            "47": 1221224960.0,
+            "48": 1221224960.0,
+            "49": 1221224960.0,
+            "50": 1221224960.0,
+            "51": 1221224960.0,
+            "52": 1221224960.0,
+            "53": 1221224960.0,
+            "54": 1221224960.0,
+            "55": 1221224960.0,
+            "56": 1221224960.0,
+            "57": 1221224960.0,
+            "58": 1221224960.0,
+            "59": 1221224960.0,
+            "60": 1221224960.0,
+            "61": 1221224960.0,
+            "62": 1221224960.0,
+            "63": 1221224960.0,
+            "64": 1221224960.0,
+            "65": 1221224960.0,
+            "66": 1221224960.0,
+            "67": 1221224960.0,
+            "68": 1221224960.0,
+            "69": 1221224960.0,
+            "70": 1221224960.0,
+            "71": 1221224960.0,
+            "72": 1221224960.0,
+            "73": 1221224960.0,
+            "74": 1221224960.0,
+            "75": 1221224960.0,
+            "76": 1221224960.0,
+            "77": 1221224960.0,
+            "78": 1221224960.0,
+            "79": 1221224960.0,
+            "80": 1221224960.0,
+            "81": 1221224960.0,
+            "82": 1221224960.0,
+            "83": 1221224960.0,
+            "84": 1221224960.0,
+            "85": 1221224960.0,
+            "86": 1221224960.0,
+            "87": 1221224960.0,
+            "88": 1221224960.0,
+            "89": 1221224960.0,
+            "90": 1221224960.0,
+            "91": 1221224960.0,
+            "92": 1221224960.0,
+            "93": 1221224960.0,
+            "94": 1221224960.0,
+            "95": 1221224960.0,
+            "96": 1221224960.0,
+            "97": 1221224960.0,
+            "98": 1221224960.0,
+            "99": 1221224960.0,
+            "100": 1221224960.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.57061,
+            "2": 0.29948,
+            "3": 0.25664,
+            "4": 0.25525,
+            "5": 0.25975,
+            "6": 0.25312,
+            "7": 0.25214,
+            "8": 0.25198,
+            "9": 0.25236,
+            "10": 0.25037,
+            "11": 0.2502,
+            "12": 0.26,
+            "13": 0.25174,
+            "14": 0.2554,
+            "15": 0.25351,
+            "16": 0.25165,
+            "17": 0.25076,
+            "18": 0.2547,
+            "19": 0.26231,
+            "20": 0.24779,
+            "21": 0.2545,
+            "22": 0.2531,
+            "23": 0.25207,
+            "24": 0.25132,
+            "25": 0.25306,
+            "26": 0.25309,
+            "27": 0.25693,
+            "28": 0.25352,
+            "29": 0.25148,
+            "30": 0.29402,
+            "31": 0.26128,
+            "32": 0.24916,
+            "33": 0.24618,
+            "34": 0.25663,
+            "35": 0.25422,
+            "36": 0.24893,
+            "37": 0.2479,
+            "38": 0.24866,
+            "39": 0.2519,
+            "40": 0.24703,
+            "41": 0.26177,
+            "42": 0.26238,
+            "43": 0.26445,
+            "44": 0.25941,
+            "45": 0.25966,
+            "46": 0.26213,
+            "47": 0.2596,
+            "48": 0.2599,
+            "49": 0.26099,
+            "50": 0.25831,
+            "51": 0.26468,
+            "52": 0.27616,
+            "53": 0.28242,
+            "54": 0.25962,
+            "55": 0.25746,
+            "56": 0.2557,
+            "57": 0.25914,
+            "58": 0.26888,
+            "59": 0.25926,
+            "60": 0.2602,
+            "61": 0.25903,
+            "62": 0.59856,
+            "63": 0.25221,
+            "64": 0.26626,
+            "65": 0.25583,
+            "66": 0.25184,
+            "67": 0.25017,
+            "68": 0.24797,
+            "69": 0.25276,
+            "70": 0.24957,
+            "71": 0.25739,
+            "72": 0.25804,
+            "73": 0.24807,
+            "74": 0.24833,
+            "75": 0.24684,
+            "76": 0.24858,
+            "77": 0.2483,
+            "78": 0.24799,
+            "79": 0.24873,
+            "80": 0.25713,
+            "81": 0.24828,
+            "82": 0.25747,
+            "83": 0.25481,
+            "84": 0.25333,
+            "85": 0.25368,
+            "86": 0.24984,
+            "87": 0.24993,
+            "88": 0.24848,
+            "89": 0.24598,
+            "90": 0.24825,
+            "91": 0.24841,
+            "92": 0.24485,
+            "93": 0.24192,
+            "94": 0.24464,
+            "95": 0.24499,
+            "96": 0.24711,
+            "97": 0.2469,
+            "98": 0.24804,
+            "99": 0.25199,
+            "100": 0.24705
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..e88d1fcb739
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.85163,
+            "2": 10.85389,
+            "3": 10.83866,
+            "4": 10.84328,
+            "5": 10.8787,
+            "6": 10.87586,
+            "7": 10.86186,
+            "8": 10.84928,
+            "9": 10.84877,
+            "10": 10.80639,
+            "11": 10.88679,
+            "12": 10.85682,
+            "13": 10.86235,
+            "14": 10.87768,
+            "15": 10.81037,
+            "16": 10.81984,
+            "17": 10.7828,
+            "18": 10.80322,
+            "19": 10.78358,
+            "20": 10.68694,
+            "21": 10.66905,
+            "22": 10.52315,
+            "23": 10.68436,
+            "24": 10.56577,
+            "25": 10.49705,
+            "26": 10.56553,
+            "27": 10.58171,
+            "28": 10.52995,
+            "29": 10.55561,
+            "30": 10.32672,
+            "31": 10.07636,
+            "32": 10.43058,
+            "33": 10.42455,
+            "34": 10.16647,
+            "35": 10.22486,
+            "36": 10.18341,
+            "37": 10.29956,
+            "38": 10.14498,
+            "39": 10.37061,
+            "40": 10.04385,
+            "41": 10.0945,
+            "42": 10.17381,
+            "43": 9.77538,
+            "44": 9.90308,
+            "45": 9.779,
+            "46": 9.76548,
+            "47": 10.10723,
+            "48": 9.80029,
+            "49": 9.47526,
+            "50": 9.85792,
+            "51": 9.80039,
+            "52": 9.69506,
+            "53": 10.0285,
+            "54": 9.9143,
+            "55": 9.83807,
+            "56": 9.57833,
+            "57": 9.42582,
+            "58": 9.79172,
+            "59": 9.53617,
+            "60": 9.44186,
+            "61": 9.65656,
+            "62": 9.94377,
+            "63": 9.32151,
+            "64": 9.73339,
+            "65": 8.88427,
+            "66": 9.65533,
+            "67": 9.32106,
+            "68": 9.75064,
+            "69": 9.764,
+            "70": 9.70469,
+            "71": 9.56861,
+            "72": 9.53902,
+            "73": 9.45226,
+            "74": 8.87736,
+            "75": 9.37933,
+            "76": 9.01867,
+            "77": 10.03519,
+            "78": 9.69263,
+            "79": 9.33459,
+            "80": 9.36591,
+            "81": 9.43919,
+            "82": 9.66572,
+            "83": 9.25441,
+            "84": 9.378,
+            "85": 9.57422,
+            "86": 9.03277,
+            "87": 9.55775,
+            "88": 9.71521,
+            "89": 9.55703,
+            "90": 9.788,
+            "91": 9.29518,
+            "92": 9.31516,
+            "93": 9.03246,
+            "94": 8.79087,
+            "95": 9.48833,
+            "96": 9.49574,
+            "97": 9.2713,
+            "98": 9.64071,
+            "99": 8.84741,
+            "100": 9.35871
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 600.0,
+            "2": 574.0,
+            "3": 677.0,
+            "4": 617.0,
+            "5": 669.0,
+            "6": 650.0,
+            "7": 700.0,
+            "8": 624.0,
+            "9": 649.0,
+            "10": 562.0,
+            "11": 661.0,
+            "12": 622.0,
+            "13": 711.0,
+            "14": 656.0,
+            "15": 688.0,
+            "16": 667.0,
+            "17": 696.0,
+            "18": 660.0,
+            "19": 607.0,
+            "20": 649.0,
+            "21": 646.0,
+            "22": 653.0,
+            "23": 743.0,
+            "24": 678.0,
+            "25": 663.0,
+            "26": 661.0,
+            "27": 703.0,
+            "28": 769.0,
+            "29": 775.0,
+            "30": 767.0,
+            "31": 606.0,
+            "32": 755.0,
+            "33": 764.0,
+            "34": 676.0,
+            "35": 779.0,
+            "36": 768.0,
+            "37": 824.0,
+            "38": 808.0,
+            "39": 893.0,
+            "40": 795.0,
+            "41": 774.0,
+            "42": 895.0,
+            "43": 758.0,
+            "44": 770.0,
+            "45": 738.0,
+            "46": 856.0,
+            "47": 912.0,
+            "48": 843.0,
+            "49": 884.0,
+            "50": 782.0,
+            "51": 967.0,
+            "52": 940.0,
+            "53": 988.0,
+            "54": 937.0,
+            "55": 870.0,
+            "56": 981.0,
+            "57": 838.0,
+            "58": 909.0,
+            "59": 969.0,
+            "60": 821.0,
+            "61": 1016.0,
+            "62": 953.0,
+            "63": 895.0,
+            "64": 1137.0,
+            "65": 917.0,
+            "66": 1050.0,
+            "67": 946.0,
+            "68": 974.0,
+            "69": 1091.0,
+            "70": 1024.0,
+            "71": 1104.0,
+            "72": 888.0,
+            "73": 967.0,
+            "74": 657.0,
+            "75": 879.0,
+            "76": 977.0,
+            "77": 1172.0,
+            "78": 1085.0,
+            "79": 1107.0,
+            "80": 1178.0,
+            "81": 1236.0,
+            "82": 1103.0,
+            "83": 975.0,
+            "84": 1164.0,
+            "85": 1160.0,
+            "86": 879.0,
+            "87": 1184.0,
+            "88": 1102.0,
+            "89": 1105.0,
+            "90": 1122.0,
+            "91": 1065.0,
+            "92": 1090.0,
+            "93": 848.0,
+            "94": 1158.0,
+            "95": 1173.0,
+            "96": 1140.0,
+            "97": 1074.0,
+            "98": 1203.0,
+            "99": 1141.0,
+            "100": 1111.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 689356288.0,
+            "2": 689356288.0,
+            "3": 689356288.0,
+            "4": 689356288.0,
+            "5": 689356288.0,
+            "6": 689356288.0,
+            "7": 689356288.0,
+            "8": 689356288.0,
+            "9": 689356288.0,
+            "10": 689356288.0,
+            "11": 689356288.0,
+            "12": 689356288.0,
+            "13": 689356288.0,
+            "14": 689356288.0,
+            "15": 689356288.0,
+            "16": 689356288.0,
+            "17": 689356288.0,
+            "18": 689356288.0,
+            "19": 689356288.0,
+            "20": 689356288.0,
+            "21": 689356288.0,
+            "22": 689356288.0,
+            "23": 689356288.0,
+            "24": 689356288.0,
+            "25": 689356288.0,
+            "26": 689356288.0,
+            "27": 689356288.0,
+            "28": 689356288.0,
+            "29": 689356288.0,
+            "30": 689356288.0,
+            "31": 689356288.0,
+            "32": 689356288.0,
+            "33": 689356288.0,
+            "34": 689356288.0,
+            "35": 689356288.0,
+            "36": 689356288.0,
+            "37": 689356288.0,
+            "38": 689356288.0,
+            "39": 689356288.0,
+            "40": 689356288.0,
+            "41": 689356288.0,
+            "42": 689356288.0,
+            "43": 689356288.0,
+            "44": 689356288.0,
+            "45": 689356288.0,
+            "46": 689356288.0,
+            "47": 689356288.0,
+            "48": 689356288.0,
+            "49": 689356288.0,
+            "50": 689356288.0,
+            "51": 689356288.0,
+            "52": 689356288.0,
+            "53": 689356288.0,
+            "54": 689356288.0,
+            "55": 689356288.0,
+            "56": 689356288.0,
+            "57": 689356288.0,
+            "58": 689356288.0,
+            "59": 689356288.0,
+            "60": 689356288.0,
+            "61": 689356288.0,
+            "62": 689356288.0,
+            "63": 689356288.0,
+            "64": 689356288.0,
+            "65": 689356288.0,
+            "66": 689356288.0,
+            "67": 689356288.0,
+            "68": 689356288.0,
+            "69": 689356288.0,
+            "70": 689356288.0,
+            "71": 689356288.0,
+            "72": 689356288.0,
+            "73": 689356288.0,
+            "74": 689356288.0,
+            "75": 689356288.0,
+            "76": 689356288.0,
+            "77": 689356288.0,
+            "78": 689356288.0,
+            "79": 689356288.0,
+            "80": 689356288.0,
+            "81": 689356288.0,
+            "82": 689356288.0,
+            "83": 689356288.0,
+            "84": 689356288.0,
+            "85": 689356288.0,
+            "86": 689356288.0,
+            "87": 689356288.0,
+            "88": 689356288.0,
+            "89": 689356288.0,
+            "90": 689356288.0,
+            "91": 689356288.0,
+            "92": 689356288.0,
+            "93": 689356288.0,
+            "94": 689356288.0,
+            "95": 689356288.0,
+            "96": 689356288.0,
+            "97": 689356288.0,
+            "98": 689356288.0,
+            "99": 689356288.0,
+            "100": 689356288.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 959652864.0,
+            "2": 1221223936.0,
+            "3": 1221224960.0,
+            "4": 1221224960.0,
+            "5": 1221224960.0,
+            "6": 1221224960.0,
+            "7": 1221224960.0,
+            "8": 1221224960.0,
+            "9": 1221224960.0,
+            "10": 1221224960.0,
+            "11": 1221224960.0,
+            "12": 1221224960.0,
+            "13": 1221224960.0,
+            "14": 1221224960.0,
+            "15": 1221224960.0,
+            "16": 1221224960.0,
+            "17": 1221224960.0,
+            "18": 1221224960.0,
+            "19": 1221224960.0,
+            "20": 1221224960.0,
+            "21": 1221224960.0,
+            "22": 1221224960.0,
+            "23": 1221224960.0,
+            "24": 1221224960.0,
+            "25": 1221224960.0,
+            "26": 1221224960.0,
+            "27": 1221224960.0,
+            "28": 1221224960.0,
+            "29": 1221224960.0,
+            "30": 1221224960.0,
+            "31": 1221224960.0,
+            "32": 1221224960.0,
+            "33": 1221224960.0,
+            "34": 1221224960.0,
+            "35": 1221224960.0,
+            "36": 1221224960.0,
+            "37": 1221224960.0,
+            "38": 1221224960.0,
+            "39": 1221224960.0,
+            "40": 1221224960.0,
+            "41": 1221224960.0,
+            "42": 1221224960.0,
+            "43": 1221224960.0,
+            "44": 1221224960.0,
+            "45": 1221224960.0,
+            "46": 1221224960.0,
+            "47": 1221224960.0,
+            "48": 1221224960.0,
+            "49": 1221224960.0,
+            "50": 1221224960.0,
+            "51": 1221224960.0,
+            "52": 1221224960.0,
+            "53": 1221224960.0,
+            "54": 1221224960.0,
+            "55": 1221224960.0,
+            "56": 1221224960.0,
+            "57": 1221224960.0,
+            "58": 1221224960.0,
+            "59": 1221224960.0,
+            "60": 1221224960.0,
+            "61": 1221224960.0,
+            "62": 1221224960.0,
+            "63": 1221224960.0,
+            "64": 1221224960.0,
+            "65": 1221224960.0,
+            "66": 1221224960.0,
+            "67": 1221224960.0,
+            "68": 1221224960.0,
+            "69": 1221224960.0,
+            "70": 1221224960.0,
+            "71": 1221224960.0,
+            "72": 1221224960.0,
+            "73": 1221224960.0,
+            "74": 1221224960.0,
+            "75": 1221224960.0,
+            "76": 1221224960.0,
+            "77": 1221224960.0,
+            "78": 1221224960.0,
+            "79": 1221224960.0,
+            "80": 1221224960.0,
+            "81": 1221224960.0,
+            "82": 1221224960.0,
+            "83": 1221224960.0,
+            "84": 1221224960.0,
+            "85": 1221224960.0,
+            "86": 1221224960.0,
+            "87": 1221224960.0,
+            "88": 1221224960.0,
+            "89": 1221224960.0,
+            "90": 1221224960.0,
+            "91": 1221224960.0,
+            "92": 1221224960.0,
+            "93": 1221224960.0,
+            "94": 1221224960.0,
+            "95": 1221224960.0,
+            "96": 1221224960.0,
+            "97": 1221224960.0,
+            "98": 1221224960.0,
+            "99": 1221224960.0,
+            "100": 1221224960.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.34397,
+            "2": 0.2989,
+            "3": 0.28701,
+            "4": 0.28299,
+            "5": 0.28509,
+            "6": 0.28378,
+            "7": 0.28776,
+            "8": 0.28423,
+            "9": 0.28722,
+            "10": 0.28077,
+            "11": 0.28936,
+            "12": 0.28752,
+            "13": 0.2827,
+            "14": 0.28574,
+            "15": 0.28467,
+            "16": 0.28217,
+            "17": 0.28486,
+            "18": 0.28581,
+            "19": 0.28155,
+            "20": 0.28509,
+            "21": 0.28251,
+            "22": 0.28381,
+            "23": 0.27876,
+            "24": 0.28748,
+            "25": 0.28028,
+            "26": 0.28778,
+            "27": 0.28262,
+            "28": 0.28332,
+            "29": 0.28115,
+            "30": 0.28178,
+            "31": 0.28495,
+            "32": 0.28165,
+            "33": 0.28663,
+            "34": 0.29207,
+            "35": 0.28688,
+            "36": 0.27656,
+            "37": 0.28363,
+            "38": 0.28429,
+            "39": 0.28629,
+            "40": 0.27969,
+            "41": 0.27978,
+            "42": 0.28454,
+            "43": 0.28022,
+            "44": 0.28402,
+            "45": 0.27645,
+            "46": 0.28795,
+            "47": 0.28097,
+            "48": 0.28395,
+            "49": 0.28183,
+            "50": 0.28615,
+            "51": 0.28373,
+            "52": 0.27449,
+            "53": 0.27345,
+            "54": 0.27869,
+            "55": 0.27079,
+            "56": 0.27901,
+            "57": 0.27662,
+            "58": 0.27749,
+            "59": 0.27681,
+            "60": 0.27639,
+            "61": 0.27275,
+            "62": 0.27644,
+            "63": 0.27655,
+            "64": 0.2741,
+            "65": 0.27749,
+            "66": 0.27321,
+            "67": 0.27962,
+            "68": 0.2759,
+            "69": 0.27771,
+            "70": 0.27472,
+            "71": 0.27602,
+            "72": 0.27221,
+            "73": 0.27682,
+            "74": 0.27563,
+            "75": 0.27287,
+            "76": 0.27345,
+            "77": 0.27491,
+            "78": 0.27512,
+            "79": 0.27463,
+            "80": 0.27721,
+            "81": 0.27482,
+            "82": 0.27638,
+            "83": 0.27219,
+            "84": 0.27519,
+            "85": 0.27727,
+            "86": 0.2756,
+            "87": 0.27351,
+            "88": 0.27369,
+            "89": 0.27604,
+            "90": 0.27461,
+            "91": 0.27436,
+            "92": 0.27679,
+            "93": 0.27705,
+            "94": 0.27348,
+            "95": 0.28014,
+            "96": 0.27482,
+            "97": 0.27546,
+            "98": 0.27381,
+            "99": 0.27767,
+            "100": 0.27505
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
new file mode 100644
index 00000000000..27f7687927e
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.88759,
+            "2": 10.90372,
+            "3": 10.87084,
+            "4": 10.8703,
+            "5": 10.9019,
+            "6": 10.90847,
+            "7": 10.88782,
+            "8": 10.87732,
+            "9": 10.88357,
+            "10": 10.8685,
+            "11": 10.881,
+            "12": 10.88499,
+            "13": 10.90361,
+            "14": 10.89973,
+            "15": 10.84836,
+            "16": 10.84523,
+            "17": 10.8009,
+            "18": 10.82612,
+            "19": 10.81899,
+            "20": 10.71771,
+            "21": 10.69282,
+            "22": 10.57372,
+            "23": 10.70806,
+            "24": 10.58164,
+            "25": 10.54272,
+            "26": 10.60193,
+            "27": 10.59774,
+            "28": 10.55016,
+            "29": 10.56339,
+            "30": 10.33644,
+            "31": 10.09546,
+            "32": 10.4367,
+            "33": 10.43049,
+            "34": 10.17724,
+            "35": 10.23973,
+            "36": 10.1824,
+            "37": 10.30496,
+            "38": 10.14903,
+            "39": 10.35864,
+            "40": 10.0326,
+            "41": 10.08767,
+            "42": 10.16354,
+            "43": 9.78196,
+            "44": 9.89592,
+            "45": 9.76817,
+            "46": 9.7675,
+            "47": 10.08837,
+            "48": 9.78334,
+            "49": 9.45719,
+            "50": 9.85325,
+            "51": 9.78848,
+            "52": 9.67834,
+            "53": 10.01957,
+            "54": 9.90016,
+            "55": 9.82267,
+            "56": 9.56373,
+            "57": 9.41789,
+            "58": 9.77443,
+            "59": 9.52365,
+            "60": 9.43758,
+            "61": 9.64823,
+            "62": 9.93687,
+            "63": 9.30556,
+            "64": 9.72235,
+            "65": 8.87846,
+            "66": 9.65137,
+            "67": 9.31592,
+            "68": 9.73885,
+            "69": 9.74593,
+            "70": 9.68162,
+            "71": 9.56047,
+            "72": 9.53909,
+            "73": 9.44523,
+            "74": 8.88643,
+            "75": 9.37197,
+            "76": 9.03136,
+            "77": 10.03086,
+            "78": 9.6894,
+            "79": 9.33246,
+            "80": 9.35658,
+            "81": 9.43622,
+            "82": 9.65385,
+            "83": 9.2576,
+            "84": 9.3653,
+            "85": 9.57144,
+            "86": 9.03654,
+            "87": 9.55861,
+            "88": 9.70775,
+            "89": 9.55527,
+            "90": 9.7773,
+            "91": 9.29751,
+            "92": 9.32182,
+            "93": 9.0299,
+            "94": 8.78447,
+            "95": 9.48561,
+            "96": 9.48707,
+            "97": 9.27002,
+            "98": 9.63516,
+            "99": 8.83979,
+            "100": 9.35905
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 568.0,
+            "2": 629.0,
+            "3": 632.0,
+            "4": 645.0,
+            "5": 701.0,
+            "6": 581.0,
+            "7": 683.0,
+            "8": 582.0,
+            "9": 635.0,
+            "10": 541.0,
+            "11": 670.0,
+            "12": 548.0,
+            "13": 678.0,
+            "14": 681.0,
+            "15": 687.0,
+            "16": 686.0,
+            "17": 698.0,
+            "18": 652.0,
+            "19": 625.0,
+            "20": 614.0,
+            "21": 657.0,
+            "22": 589.0,
+            "23": 691.0,
+            "24": 607.0,
+            "25": 633.0,
+            "26": 695.0,
+            "27": 697.0,
+            "28": 701.0,
+            "29": 744.0,
+            "30": 666.0,
+            "31": 582.0,
+            "32": 675.0,
+            "33": 703.0,
+            "34": 648.0,
+            "35": 699.0,
+            "36": 763.0,
+            "37": 803.0,
+            "38": 848.0,
+            "39": 846.0,
+            "40": 769.0,
+            "41": 806.0,
+            "42": 858.0,
+            "43": 708.0,
+            "44": 779.0,
+            "45": 854.0,
+            "46": 804.0,
+            "47": 892.0,
+            "48": 866.0,
+            "49": 827.0,
+            "50": 819.0,
+            "51": 913.0,
+            "52": 837.0,
+            "53": 1076.0,
+            "54": 934.0,
+            "55": 892.0,
+            "56": 945.0,
+            "57": 850.0,
+            "58": 1041.0,
+            "59": 994.0,
+            "60": 875.0,
+            "61": 996.0,
+            "62": 983.0,
+            "63": 909.0,
+            "64": 1115.0,
+            "65": 922.0,
+            "66": 1137.0,
+            "67": 958.0,
+            "68": 996.0,
+            "69": 1065.0,
+            "70": 1077.0,
+            "71": 1119.0,
+            "72": 837.0,
+            "73": 1022.0,
+            "74": 750.0,
+            "75": 904.0,
+            "76": 1058.0,
+            "77": 1193.0,
+            "78": 1146.0,
+            "79": 1023.0,
+            "80": 1111.0,
+            "81": 1212.0,
+            "82": 1045.0,
+            "83": 1022.0,
+            "84": 1202.0,
+            "85": 1159.0,
+            "86": 885.0,
+            "87": 1249.0,
+            "88": 1065.0,
+            "89": 1158.0,
+            "90": 1045.0,
+            "91": 1061.0,
+            "92": 1143.0,
+            "93": 908.0,
+            "94": 1118.0,
+            "95": 1071.0,
+            "96": 1147.0,
+            "97": 1091.0,
+            "98": 1214.0,
+            "99": 1103.0,
+            "100": 1140.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 610712576.0,
+            "2": 610712576.0,
+            "3": 610712576.0,
+            "4": 610712576.0,
+            "5": 610712576.0,
+            "6": 610712576.0,
+            "7": 610712576.0,
+            "8": 610712576.0,
+            "9": 610712576.0,
+            "10": 610712576.0,
+            "11": 610712576.0,
+            "12": 610712576.0,
+            "13": 610712576.0,
+            "14": 610712576.0,
+            "15": 610712576.0,
+            "16": 610712576.0,
+            "17": 610712576.0,
+            "18": 610712576.0,
+            "19": 610712576.0,
+            "20": 610712576.0,
+            "21": 610712576.0,
+            "22": 610712576.0,
+            "23": 610712576.0,
+            "24": 610712576.0,
+            "25": 610712576.0,
+            "26": 610712576.0,
+            "27": 610712576.0,
+            "28": 610712576.0,
+            "29": 610712576.0,
+            "30": 610712576.0,
+            "31": 610712576.0,
+            "32": 610712576.0,
+            "33": 610712576.0,
+            "34": 610712576.0,
+            "35": 610712576.0,
+            "36": 610712576.0,
+            "37": 610712576.0,
+            "38": 610712576.0,
+            "39": 610712576.0,
+            "40": 610712576.0,
+            "41": 610712576.0,
+            "42": 610712576.0,
+            "43": 610712576.0,
+            "44": 610712576.0,
+            "45": 610712576.0,
+            "46": 610712576.0,
+            "47": 610712576.0,
+            "48": 610712576.0,
+            "49": 610712576.0,
+            "50": 610712576.0,
+            "51": 610712576.0,
+            "52": 610712576.0,
+            "53": 610712576.0,
+            "54": 610712576.0,
+            "55": 610712576.0,
+            "56": 610712576.0,
+            "57": 610712576.0,
+            "58": 610712576.0,
+            "59": 610712576.0,
+            "60": 610712576.0,
+            "61": 610712576.0,
+            "62": 610712576.0,
+            "63": 610712576.0,
+            "64": 610712576.0,
+            "65": 610712576.0,
+            "66": 610712576.0,
+            "67": 610712576.0,
+            "68": 610712576.0,
+            "69": 610712576.0,
+            "70": 610712576.0,
+            "71": 610712576.0,
+            "72": 610712576.0,
+            "73": 610712576.0,
+            "74": 610712576.0,
+            "75": 610712576.0,
+            "76": 610712576.0,
+            "77": 610712576.0,
+            "78": 610712576.0,
+            "79": 610712576.0,
+            "80": 610712576.0,
+            "81": 610712576.0,
+            "82": 610712576.0,
+            "83": 610712576.0,
+            "84": 610712576.0,
+            "85": 610712576.0,
+            "86": 610712576.0,
+            "87": 610712576.0,
+            "88": 610712576.0,
+            "89": 610712576.0,
+            "90": 610712576.0,
+            "91": 610712576.0,
+            "92": 610712576.0,
+            "93": 610712576.0,
+            "94": 610712576.0,
+            "95": 610712576.0,
+            "96": 610712576.0,
+            "97": 610712576.0,
+            "98": 610712576.0,
+            "99": 610712576.0,
+            "100": 610712576.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 882344448.0,
+            "2": 1142590976.0,
+            "3": 1142590976.0,
+            "4": 1142590976.0,
+            "5": 1142590976.0,
+            "6": 1142590976.0,
+            "7": 1142590976.0,
+            "8": 1142590976.0,
+            "9": 1142590976.0,
+            "10": 1142590976.0,
+            "11": 1142590976.0,
+            "12": 1142590976.0,
+            "13": 1142590976.0,
+            "14": 1142590976.0,
+            "15": 1142605824.0,
+            "16": 1142605824.0,
+            "17": 1142605824.0,
+            "18": 1142605824.0,
+            "19": 1142605824.0,
+            "20": 1142605824.0,
+            "21": 1142605824.0,
+            "22": 1142605824.0,
+            "23": 1142605824.0,
+            "24": 1142605824.0,
+            "25": 1142605824.0,
+            "26": 1142605824.0,
+            "27": 1142605824.0,
+            "28": 1142605824.0,
+            "29": 1142605824.0,
+            "30": 1142605824.0,
+            "31": 1142605824.0,
+            "32": 1142605824.0,
+            "33": 1142605824.0,
+            "34": 1142605824.0,
+            "35": 1142605824.0,
+            "36": 1142605824.0,
+            "37": 1142605824.0,
+            "38": 1142605824.0,
+            "39": 1142605824.0,
+            "40": 1142605824.0,
+            "41": 1142605824.0,
+            "42": 1142605824.0,
+            "43": 1142605824.0,
+            "44": 1142605824.0,
+            "45": 1142605824.0,
+            "46": 1142605824.0,
+            "47": 1142605824.0,
+            "48": 1142605824.0,
+            "49": 1142605824.0,
+            "50": 1142605824.0,
+            "51": 1142605824.0,
+            "52": 1142605824.0,
+            "53": 1142605824.0,
+            "54": 1142605824.0,
+            "55": 1142605824.0,
+            "56": 1142605824.0,
+            "57": 1142605824.0,
+            "58": 1142605824.0,
+            "59": 1142605824.0,
+            "60": 1142605824.0,
+            "61": 1142605824.0,
+            "62": 1142605824.0,
+            "63": 1142605824.0,
+            "64": 1142605824.0,
+            "65": 1142605824.0,
+            "66": 1142605824.0,
+            "67": 1142605824.0,
+            "68": 1142605824.0,
+            "69": 1142605824.0,
+            "70": 1142605824.0,
+            "71": 1142605824.0,
+            "72": 1142605824.0,
+            "73": 1142605824.0,
+            "74": 1142605824.0,
+            "75": 1142605824.0,
+            "76": 1142605824.0,
+            "77": 1142605824.0,
+            "78": 1142605824.0,
+            "79": 1142605824.0,
+            "80": 1142605824.0,
+            "81": 1142605824.0,
+            "82": 1142605824.0,
+            "83": 1142605824.0,
+            "84": 1142605824.0,
+            "85": 1142605824.0,
+            "86": 1142605824.0,
+            "87": 1142605824.0,
+            "88": 1142605824.0,
+            "89": 1142605824.0,
+            "90": 1142605824.0,
+            "91": 1142605824.0,
+            "92": 1142605824.0,
+            "93": 1142605824.0,
+            "94": 1142605824.0,
+            "95": 1142605824.0,
+            "96": 1142605824.0,
+            "97": 1142605824.0,
+            "98": 1142605824.0,
+            "99": 1142605824.0,
+            "100": 1142605824.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 8.61399,
+            "2": 0.3945,
+            "3": 0.34953,
+            "4": 0.35042,
+            "5": 0.35976,
+            "6": 0.34775,
+            "7": 0.34855,
+            "8": 0.3567,
+            "9": 0.57776,
+            "10": 0.35283,
+            "11": 0.34546,
+            "12": 0.66208,
+            "13": 0.3538,
+            "14": 0.33888,
+            "15": 0.34934,
+            "16": 0.3406,
+            "17": 0.34067,
+            "18": 0.34972,
+            "19": 0.33929,
+            "20": 0.57923,
+            "21": 0.33789,
+            "22": 0.63069,
+            "23": 0.33968,
+            "24": 0.3363,
+            "25": 0.35184,
+            "26": 0.33895,
+            "27": 0.33764,
+            "28": 0.36204,
+            "29": 0.33822,
+            "30": 0.3377,
+            "31": 0.35301,
+            "32": 0.33764,
+            "33": 0.33768,
+            "34": 0.35102,
+            "35": 0.33833,
+            "36": 0.33797,
+            "37": 0.35167,
+            "38": 0.33758,
+            "39": 0.33772,
+            "40": 0.34854,
+            "41": 0.33774,
+            "42": 0.33744,
+            "43": 0.35268,
+            "44": 0.33831,
+            "45": 0.34111,
+            "46": 0.36265,
+            "47": 0.33842,
+            "48": 0.33892,
+            "49": 0.35205,
+            "50": 0.33895,
+            "51": 0.35452,
+            "52": 0.3491,
+            "53": 0.34427,
+            "54": 0.3643,
+            "55": 0.34634,
+            "56": 0.34328,
+            "57": 0.35888,
+            "58": 0.34339,
+            "59": 0.3441,
+            "60": 0.35965,
+            "61": 0.34295,
+            "62": 0.3437,
+            "63": 0.35875,
+            "64": 0.34325,
+            "65": 0.34385,
+            "66": 0.35947,
+            "67": 0.34189,
+            "68": 0.34267,
+            "69": 0.35835,
+            "70": 0.3399,
+            "71": 0.34054,
+            "72": 0.36119,
+            "73": 0.3405,
+            "74": 0.34184,
+            "75": 0.36047,
+            "76": 0.34108,
+            "77": 0.35201,
+            "78": 0.3566,
+            "79": 0.34417,
+            "80": 0.36209,
+            "81": 0.3499,
+            "82": 0.34382,
+            "83": 0.35876,
+            "84": 0.34299,
+            "85": 0.34373,
+            "86": 0.3589,
+            "87": 0.3438,
+            "88": 0.3435,
+            "89": 0.35918,
+            "90": 0.34314,
+            "91": 0.34454,
+            "92": 0.3605,
+            "93": 0.35594,
+            "94": 0.34422,
+            "95": 0.36259,
+            "96": 0.34401,
+            "97": 0.34507,
+            "98": 0.3692,
+            "99": 0.34387,
+            "100": 0.35445
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
new file mode 100644
index 00000000000..d39fc02d394
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.88759,
+            "2": 10.90372,
+            "3": 10.87084,
+            "4": 10.87028,
+            "5": 10.90194,
+            "6": 10.90848,
+            "7": 10.88784,
+            "8": 10.87729,
+            "9": 10.8836,
+            "10": 10.86849,
+            "11": 10.88103,
+            "12": 10.88497,
+            "13": 10.90361,
+            "14": 10.89973,
+            "15": 10.84833,
+            "16": 10.84522,
+            "17": 10.80087,
+            "18": 10.82613,
+            "19": 10.81897,
+            "20": 10.7177,
+            "21": 10.69285,
+            "22": 10.57376,
+            "23": 10.70805,
+            "24": 10.5816,
+            "25": 10.54269,
+            "26": 10.60192,
+            "27": 10.59777,
+            "28": 10.55013,
+            "29": 10.5634,
+            "30": 10.3364,
+            "31": 10.09543,
+            "32": 10.43669,
+            "33": 10.43049,
+            "34": 10.17722,
+            "35": 10.23976,
+            "36": 10.18239,
+            "37": 10.30493,
+            "38": 10.14901,
+            "39": 10.35864,
+            "40": 10.03267,
+            "41": 10.08765,
+            "42": 10.16354,
+            "43": 9.78194,
+            "44": 9.89592,
+            "45": 9.76819,
+            "46": 9.76746,
+            "47": 10.08836,
+            "48": 9.78334,
+            "49": 9.45723,
+            "50": 9.85323,
+            "51": 9.78852,
+            "52": 9.67832,
+            "53": 10.01958,
+            "54": 9.90021,
+            "55": 9.82267,
+            "56": 9.56373,
+            "57": 9.41792,
+            "58": 9.77442,
+            "59": 9.52363,
+            "60": 9.43757,
+            "61": 9.64824,
+            "62": 9.93692,
+            "63": 9.30557,
+            "64": 9.72235,
+            "65": 8.87843,
+            "66": 9.65136,
+            "67": 9.31594,
+            "68": 9.7388,
+            "69": 9.74596,
+            "70": 9.68161,
+            "71": 9.5605,
+            "72": 9.53909,
+            "73": 9.4452,
+            "74": 8.88639,
+            "75": 9.372,
+            "76": 9.03138,
+            "77": 10.03084,
+            "78": 9.68943,
+            "79": 9.33251,
+            "80": 9.35653,
+            "81": 9.4362,
+            "82": 9.65384,
+            "83": 9.2576,
+            "84": 9.36531,
+            "85": 9.57145,
+            "86": 9.0365,
+            "87": 9.55862,
+            "88": 9.70774,
+            "89": 9.55529,
+            "90": 9.7773,
+            "91": 9.29748,
+            "92": 9.32182,
+            "93": 9.02991,
+            "94": 8.78449,
+            "95": 9.48563,
+            "96": 9.48709,
+            "97": 9.27007,
+            "98": 9.63511,
+            "99": 8.83981,
+            "100": 9.35907
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 600.0,
+            "2": 622.0,
+            "3": 611.0,
+            "4": 564.0,
+            "5": 653.0,
+            "6": 733.0,
+            "7": 686.0,
+            "8": 617.0,
+            "9": 679.0,
+            "10": 535.0,
+            "11": 644.0,
+            "12": 616.0,
+            "13": 708.0,
+            "14": 646.0,
+            "15": 648.0,
+            "16": 648.0,
+            "17": 683.0,
+            "18": 638.0,
+            "19": 643.0,
+            "20": 587.0,
+            "21": 656.0,
+            "22": 578.0,
+            "23": 707.0,
+            "24": 640.0,
+            "25": 626.0,
+            "26": 675.0,
+            "27": 697.0,
+            "28": 740.0,
+            "29": 731.0,
+            "30": 656.0,
+            "31": 589.0,
+            "32": 704.0,
+            "33": 740.0,
+            "34": 711.0,
+            "35": 677.0,
+            "36": 723.0,
+            "37": 790.0,
+            "38": 759.0,
+            "39": 846.0,
+            "40": 797.0,
+            "41": 748.0,
+            "42": 817.0,
+            "43": 706.0,
+            "44": 809.0,
+            "45": 749.0,
+            "46": 812.0,
+            "47": 914.0,
+            "48": 890.0,
+            "49": 795.0,
+            "50": 864.0,
+            "51": 963.0,
+            "52": 907.0,
+            "53": 1040.0,
+            "54": 981.0,
+            "55": 836.0,
+            "56": 1022.0,
+            "57": 804.0,
+            "58": 964.0,
+            "59": 1012.0,
+            "60": 849.0,
+            "61": 996.0,
+            "62": 1016.0,
+            "63": 890.0,
+            "64": 1092.0,
+            "65": 1006.0,
+            "66": 1113.0,
+            "67": 916.0,
+            "68": 1065.0,
+            "69": 1073.0,
+            "70": 1156.0,
+            "71": 1034.0,
+            "72": 844.0,
+            "73": 1014.0,
+            "74": 748.0,
+            "75": 893.0,
+            "76": 1008.0,
+            "77": 1179.0,
+            "78": 1170.0,
+            "79": 1060.0,
+            "80": 1130.0,
+            "81": 1160.0,
+            "82": 1011.0,
+            "83": 964.0,
+            "84": 1205.0,
+            "85": 1082.0,
+            "86": 842.0,
+            "87": 1113.0,
+            "88": 1053.0,
+            "89": 1124.0,
+            "90": 1058.0,
+            "91": 1066.0,
+            "92": 1170.0,
+            "93": 894.0,
+            "94": 1207.0,
+            "95": 1104.0,
+            "96": 1196.0,
+            "97": 1081.0,
+            "98": 1247.0,
+            "99": 1088.0,
+            "100": 1138.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 610712576.0,
+            "2": 610712576.0,
+            "3": 610712576.0,
+            "4": 610712576.0,
+            "5": 610712576.0,
+            "6": 610712576.0,
+            "7": 610712576.0,
+            "8": 610712576.0,
+            "9": 610712576.0,
+            "10": 610712576.0,
+            "11": 610712576.0,
+            "12": 610712576.0,
+            "13": 610712576.0,
+            "14": 610712576.0,
+            "15": 610712576.0,
+            "16": 610712576.0,
+            "17": 610712576.0,
+            "18": 610712576.0,
+            "19": 610712576.0,
+            "20": 610712576.0,
+            "21": 610712576.0,
+            "22": 610712576.0,
+            "23": 610712576.0,
+            "24": 610712576.0,
+            "25": 610712576.0,
+            "26": 610712576.0,
+            "27": 610712576.0,
+            "28": 610712576.0,
+            "29": 610712576.0,
+            "30": 610712576.0,
+            "31": 610712576.0,
+            "32": 610712576.0,
+            "33": 610712576.0,
+            "34": 610712576.0,
+            "35": 610712576.0,
+            "36": 610712576.0,
+            "37": 610712576.0,
+            "38": 610712576.0,
+            "39": 610712576.0,
+            "40": 610712576.0,
+            "41": 610712576.0,
+            "42": 610712576.0,
+            "43": 610712576.0,
+            "44": 610712576.0,
+            "45": 610712576.0,
+            "46": 610712576.0,
+            "47": 610712576.0,
+            "48": 610712576.0,
+            "49": 610712576.0,
+            "50": 610712576.0,
+            "51": 610712576.0,
+            "52": 610712576.0,
+            "53": 610712576.0,
+            "54": 610712576.0,
+            "55": 610712576.0,
+            "56": 610712576.0,
+            "57": 610712576.0,
+            "58": 610712576.0,
+            "59": 610712576.0,
+            "60": 610712576.0,
+            "61": 610712576.0,
+            "62": 610712576.0,
+            "63": 610712576.0,
+            "64": 610712576.0,
+            "65": 610712576.0,
+            "66": 610712576.0,
+            "67": 610712576.0,
+            "68": 610712576.0,
+            "69": 610712576.0,
+            "70": 610712576.0,
+            "71": 610712576.0,
+            "72": 610712576.0,
+            "73": 610712576.0,
+            "74": 610712576.0,
+            "75": 610712576.0,
+            "76": 610712576.0,
+            "77": 610712576.0,
+            "78": 610712576.0,
+            "79": 610712576.0,
+            "80": 610712576.0,
+            "81": 610712576.0,
+            "82": 610712576.0,
+            "83": 610712576.0,
+            "84": 610712576.0,
+            "85": 610712576.0,
+            "86": 610712576.0,
+            "87": 610712576.0,
+            "88": 610712576.0,
+            "89": 610712576.0,
+            "90": 610712576.0,
+            "91": 610712576.0,
+            "92": 610712576.0,
+            "93": 610712576.0,
+            "94": 610712576.0,
+            "95": 610712576.0,
+            "96": 610712576.0,
+            "97": 610712576.0,
+            "98": 610712576.0,
+            "99": 610712576.0,
+            "100": 610712576.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 879199232.0,
+            "2": 1141542400.0,
+            "3": 1141557248.0,
+            "4": 1141557248.0,
+            "5": 1141557248.0,
+            "6": 1141557248.0,
+            "7": 1141557248.0,
+            "8": 1141557248.0,
+            "9": 1141557248.0,
+            "10": 1141557248.0,
+            "11": 1141557248.0,
+            "12": 1141557248.0,
+            "13": 1141557248.0,
+            "14": 1141557248.0,
+            "15": 1141557248.0,
+            "16": 1141557248.0,
+            "17": 1141557248.0,
+            "18": 1141557248.0,
+            "19": 1141557248.0,
+            "20": 1141557248.0,
+            "21": 1141557248.0,
+            "22": 1141557248.0,
+            "23": 1141557248.0,
+            "24": 1141557248.0,
+            "25": 1141557248.0,
+            "26": 1141557248.0,
+            "27": 1141557248.0,
+            "28": 1141557248.0,
+            "29": 1141557248.0,
+            "30": 1141557248.0,
+            "31": 1141557248.0,
+            "32": 1141557248.0,
+            "33": 1141557248.0,
+            "34": 1141557248.0,
+            "35": 1141557248.0,
+            "36": 1141557248.0,
+            "37": 1141557248.0,
+            "38": 1141557248.0,
+            "39": 1141557248.0,
+            "40": 1141557248.0,
+            "41": 1141557248.0,
+            "42": 1141557248.0,
+            "43": 1141557248.0,
+            "44": 1141557248.0,
+            "45": 1141557248.0,
+            "46": 1141557248.0,
+            "47": 1141557248.0,
+            "48": 1141557248.0,
+            "49": 1141557248.0,
+            "50": 1141557248.0,
+            "51": 1141557248.0,
+            "52": 1141557248.0,
+            "53": 1141557248.0,
+            "54": 1141557248.0,
+            "55": 1141557248.0,
+            "56": 1141557248.0,
+            "57": 1141557248.0,
+            "58": 1141557248.0,
+            "59": 1141557248.0,
+            "60": 1141557248.0,
+            "61": 1142604800.0,
+            "62": 1142604800.0,
+            "63": 1142604800.0,
+            "64": 1142604800.0,
+            "65": 1142604800.0,
+            "66": 1142605824.0,
+            "67": 1142605824.0,
+            "68": 1142605824.0,
+            "69": 1142605824.0,
+            "70": 1142605824.0,
+            "71": 1142605824.0,
+            "72": 1142605824.0,
+            "73": 1142605824.0,
+            "74": 1142605824.0,
+            "75": 1142605824.0,
+            "76": 1142605824.0,
+            "77": 1142605824.0,
+            "78": 1142605824.0,
+            "79": 1142605824.0,
+            "80": 1142605824.0,
+            "81": 1142605824.0,
+            "82": 1142605824.0,
+            "83": 1142605824.0,
+            "84": 1142605824.0,
+            "85": 1142605824.0,
+            "86": 1142605824.0,
+            "87": 1142605824.0,
+            "88": 1142605824.0,
+            "89": 1142605824.0,
+            "90": 1142605824.0,
+            "91": 1142605824.0,
+            "92": 1142605824.0,
+            "93": 1143639552.0,
+            "94": 1143639552.0,
+            "95": 1143639552.0,
+            "96": 1143639552.0,
+            "97": 1143639552.0,
+            "98": 1143639552.0,
+            "99": 1143639552.0,
+            "100": 1143639552.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 8.52918,
+            "2": 0.38912,
+            "3": 0.35372,
+            "4": 0.34811,
+            "5": 0.35505,
+            "6": 0.35402,
+            "7": 0.55808,
+            "8": 0.3492,
+            "9": 0.34355,
+            "10": 0.82935,
+            "11": 0.34715,
+            "12": 0.34905,
+            "13": 0.55638,
+            "14": 0.35683,
+            "15": 0.34903,
+            "16": 0.34374,
+            "17": 0.35024,
+            "18": 0.35007,
+            "19": 0.34305,
+            "20": 0.35453,
+            "21": 0.3508,
+            "22": 0.35066,
+            "23": 0.34925,
+            "24": 0.35006,
+            "25": 0.34932,
+            "26": 0.66663,
+            "27": 0.34789,
+            "28": 0.34677,
+            "29": 0.34709,
+            "30": 0.35185,
+            "31": 0.34811,
+            "32": 0.35284,
+            "33": 0.35196,
+            "34": 0.35397,
+            "35": 0.34638,
+            "36": 0.35167,
+            "37": 0.35284,
+            "38": 0.34596,
+            "39": 0.35367,
+            "40": 0.35293,
+            "41": 0.34542,
+            "42": 0.35234,
+            "43": 0.35494,
+            "44": 0.34767,
+            "45": 0.35264,
+            "46": 0.35205,
+            "47": 0.35099,
+            "48": 0.34893,
+            "49": 0.34959,
+            "50": 0.34935,
+            "51": 0.35425,
+            "52": 0.34505,
+            "53": 0.34281,
+            "54": 0.35622,
+            "55": 0.3559,
+            "56": 0.34855,
+            "57": 0.34974,
+            "58": 0.34693,
+            "59": 0.34844,
+            "60": 0.34963,
+            "61": 0.34651,
+            "62": 0.349,
+            "63": 0.35001,
+            "64": 0.34701,
+            "65": 0.34907,
+            "66": 0.34895,
+            "67": 0.34615,
+            "68": 0.34859,
+            "69": 0.36095,
+            "70": 0.34112,
+            "71": 0.34777,
+            "72": 0.35188,
+            "73": 0.34151,
+            "74": 0.34797,
+            "75": 0.35077,
+            "76": 0.34341,
+            "77": 0.35012,
+            "78": 0.34839,
+            "79": 0.34146,
+            "80": 0.35541,
+            "81": 0.34764,
+            "82": 0.34184,
+            "83": 0.35606,
+            "84": 0.34949,
+            "85": 0.34885,
+            "86": 0.3509,
+            "87": 0.35235,
+            "88": 0.34695,
+            "89": 0.35078,
+            "90": 0.35066,
+            "91": 0.352,
+            "92": 0.34948,
+            "93": 0.35191,
+            "94": 0.35111,
+            "95": 0.35751,
+            "96": 0.3453,
+            "97": 0.3509,
+            "98": 0.35322,
+            "99": 0.34448,
+            "100": 0.35525
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
index 2632047f775..f1d9edf458f 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
@@ -2,141 +2,536 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 10.84466,
+            "2": 10.84794,
+            "3": 10.84923,
+            "4": 10.8433,
             "5": 10.88246,
+            "6": 10.8808,
+            "7": 10.86574,
+            "8": 10.85417,
+            "9": 10.85542,
             "10": 10.81812,
+            "11": 10.88726,
+            "12": 10.86329,
+            "13": 10.86656,
+            "14": 10.884,
             "15": 10.8231,
+            "16": 10.82809,
+            "17": 10.79467,
+            "18": 10.81466,
+            "19": 10.80122,
             "20": 10.71614,
+            "21": 10.69886,
+            "22": 10.56738,
+            "23": 10.71707,
+            "24": 10.60503,
             "25": 10.55053,
+            "26": 10.60941,
+            "27": 10.62543,
+            "28": 10.57767,
+            "29": 10.59725,
             "30": 10.38488,
+            "31": 10.15554,
+            "32": 10.48231,
+            "33": 10.4763,
+            "34": 10.2393,
             "35": 10.29064,
+            "36": 10.25146,
+            "37": 10.35662,
+            "38": 10.21142,
+            "39": 10.42144,
             "40": 10.11569,
+            "41": 10.16423,
+            "42": 10.23644,
+            "43": 9.86597,
+            "44": 9.98146,
             "45": 9.86983,
+            "46": 9.85349,
+            "47": 10.16995,
+            "48": 9.876,
+            "49": 9.57237,
             "50": 9.92525,
+            "51": 9.8709,
+            "52": 9.7737,
+            "53": 10.08149,
+            "54": 9.97376,
             "55": 9.90036,
+            "56": 9.64783,
+            "57": 9.50136,
+            "58": 9.85199,
+            "59": 9.6034,
             "60": 9.50993,
+            "61": 9.71315,
+            "62": 9.99373,
+            "63": 9.39358,
+            "64": 9.78904,
             "65": 8.96358,
+            "66": 9.71142,
+            "67": 9.38175,
+            "68": 9.79833,
+            "69": 9.80889,
             "70": 9.75039,
+            "71": 9.62004,
+            "72": 9.59387,
+            "73": 9.50631,
+            "74": 8.94916,
             "75": 9.43188,
+            "76": 9.08702,
+            "77": 10.06886,
+            "78": 9.73459,
+            "79": 9.38325,
             "80": 9.41272,
+            "81": 9.48499,
+            "82": 9.70672,
+            "83": 9.30939,
+            "84": 9.42428,
             "85": 9.61991,
+            "86": 9.07811,
+            "87": 9.59541,
+            "88": 9.75596,
+            "89": 9.60274,
             "90": 9.82165,
+            "91": 9.34268,
+            "92": 9.35878,
+            "93": 9.08116,
+            "94": 8.83791,
             "95": 9.5238,
+            "96": 9.53556,
+            "97": 9.31807,
+            "98": 9.68183,
+            "99": 8.89422,
             "100": 9.40138
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 1713.0,
+            "2": 1750.0,
+            "3": 1744.0,
+            "4": 1895.0,
             "5": 1839.0,
+            "6": 1881.0,
+            "7": 1850.0,
+            "8": 1743.0,
+            "9": 1810.0,
             "10": 1452.0,
+            "11": 1886.0,
+            "12": 1752.0,
+            "13": 1834.0,
+            "14": 1774.0,
             "15": 1909.0,
+            "16": 1803.0,
+            "17": 1927.0,
+            "18": 1765.0,
+            "19": 1847.0,
             "20": 1707.0,
+            "21": 1950.0,
+            "22": 1794.0,
+            "23": 1974.0,
+            "24": 1676.0,
             "25": 1652.0,
+            "26": 1774.0,
+            "27": 1799.0,
+            "28": 2135.0,
+            "29": 2048.0,
             "30": 2032.0,
+            "31": 1599.0,
+            "32": 1929.0,
+            "33": 2143.0,
+            "34": 1874.0,
             "35": 1974.0,
+            "36": 2011.0,
+            "37": 2364.0,
+            "38": 2199.0,
+            "39": 2363.0,
             "40": 2239.0,
+            "41": 2269.0,
+            "42": 2228.0,
+            "43": 1972.0,
+            "44": 2070.0,
             "45": 2033.0,
+            "46": 2357.0,
+            "47": 2520.0,
+            "48": 2316.0,
+            "49": 2307.0,
             "50": 2302.0,
+            "51": 2514.0,
+            "52": 2430.0,
+            "53": 2840.0,
+            "54": 2677.0,
             "55": 2394.0,
+            "56": 2601.0,
+            "57": 2341.0,
+            "58": 2837.0,
+            "59": 2789.0,
             "60": 2425.0,
+            "61": 2923.0,
+            "62": 2591.0,
+            "63": 2416.0,
+            "64": 2937.0,
             "65": 2572.0,
+            "66": 3008.0,
+            "67": 2843.0,
+            "68": 2761.0,
+            "69": 2834.0,
             "70": 3108.0,
+            "71": 2989.0,
+            "72": 2316.0,
+            "73": 2950.0,
+            "74": 1899.0,
             "75": 2378.0,
+            "76": 2962.0,
+            "77": 3343.0,
+            "78": 3183.0,
+            "79": 2979.0,
             "80": 3209.0,
+            "81": 3583.0,
+            "82": 3160.0,
+            "83": 2776.0,
+            "84": 3242.0,
             "85": 3425.0,
+            "86": 2720.0,
+            "87": 3820.0,
+            "88": 3050.0,
+            "89": 3297.0,
             "90": 3069.0,
+            "91": 2685.0,
+            "92": 3061.0,
+            "93": 2584.0,
+            "94": 3338.0,
             "95": 3406.0,
+            "96": 3389.0,
+            "97": 3104.0,
+            "98": 3583.0,
+            "99": 3229.0,
             "100": 3225.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 490700288.0,
+            "2": 490700288.0,
+            "3": 490700288.0,
+            "4": 490700288.0,
             "5": 490700288.0,
+            "6": 490700288.0,
+            "7": 490700288.0,
+            "8": 490700288.0,
+            "9": 490700288.0,
             "10": 490700288.0,
+            "11": 490700288.0,
+            "12": 490700288.0,
+            "13": 490700288.0,
+            "14": 490700288.0,
             "15": 490700288.0,
+            "16": 490700288.0,
+            "17": 490700288.0,
+            "18": 490700288.0,
+            "19": 490700288.0,
             "20": 490700288.0,
+            "21": 490700288.0,
+            "22": 490700288.0,
+            "23": 490700288.0,
+            "24": 490700288.0,
             "25": 490700288.0,
+            "26": 490700288.0,
+            "27": 490700288.0,
+            "28": 490700288.0,
+            "29": 490700288.0,
             "30": 490700288.0,
+            "31": 490700288.0,
+            "32": 490700288.0,
+            "33": 490700288.0,
+            "34": 490700288.0,
             "35": 490700288.0,
+            "36": 490700288.0,
+            "37": 490700288.0,
+            "38": 490700288.0,
+            "39": 490700288.0,
             "40": 490700288.0,
+            "41": 490700288.0,
+            "42": 490700288.0,
+            "43": 490700288.0,
+            "44": 490700288.0,
             "45": 490700288.0,
+            "46": 490700288.0,
+            "47": 490700288.0,
+            "48": 490700288.0,
+            "49": 490700288.0,
             "50": 490700288.0,
+            "51": 490700288.0,
+            "52": 490700288.0,
+            "53": 490700288.0,
+            "54": 490700288.0,
             "55": 490700288.0,
+            "56": 490700288.0,
+            "57": 490700288.0,
+            "58": 490700288.0,
+            "59": 490700288.0,
             "60": 490700288.0,
+            "61": 490700288.0,
+            "62": 490700288.0,
+            "63": 490700288.0,
+            "64": 490700288.0,
             "65": 490700288.0,
+            "66": 490700288.0,
+            "67": 490700288.0,
+            "68": 490700288.0,
+            "69": 490700288.0,
             "70": 490700288.0,
+            "71": 490700288.0,
+            "72": 490700288.0,
+            "73": 490700288.0,
+            "74": 490700288.0,
             "75": 490700288.0,
+            "76": 490700288.0,
+            "77": 490700288.0,
+            "78": 490700288.0,
+            "79": 490700288.0,
             "80": 490700288.0,
+            "81": 490700288.0,
+            "82": 490700288.0,
+            "83": 490700288.0,
+            "84": 490700288.0,
             "85": 490700288.0,
+            "86": 490700288.0,
+            "87": 490700288.0,
+            "88": 490700288.0,
+            "89": 490700288.0,
             "90": 490700288.0,
+            "91": 490700288.0,
+            "92": 490700288.0,
+            "93": 490700288.0,
+            "94": 490700288.0,
             "95": 490700288.0,
+            "96": 490700288.0,
+            "97": 490700288.0,
+            "98": 490700288.0,
+            "99": 490700288.0,
             "100": 490700288.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 1553275392.0,
+            "2": 1681702400.0,
+            "3": 1681702400.0,
+            "4": 1681702400.0,
             "5": 1681702400.0,
+            "6": 1681702400.0,
+            "7": 1681702400.0,
+            "8": 1681702400.0,
+            "9": 1681702400.0,
             "10": 1681702400.0,
+            "11": 1681702400.0,
+            "12": 1681702400.0,
+            "13": 1681702400.0,
+            "14": 1681702400.0,
             "15": 1681702400.0,
+            "16": 1681702400.0,
+            "17": 1681702400.0,
+            "18": 1681702400.0,
+            "19": 1681702400.0,
             "20": 1681702400.0,
+            "21": 1681702400.0,
+            "22": 1681702400.0,
+            "23": 1681702400.0,
+            "24": 1681702400.0,
             "25": 1681702400.0,
+            "26": 1681702400.0,
+            "27": 1681702400.0,
+            "28": 1681702400.0,
+            "29": 1681702400.0,
             "30": 1681702400.0,
+            "31": 1681702400.0,
+            "32": 1681702400.0,
+            "33": 1681702400.0,
+            "34": 1681702400.0,
             "35": 1681702400.0,
+            "36": 1681702400.0,
+            "37": 1681702400.0,
+            "38": 1681702400.0,
+            "39": 1681702400.0,
             "40": 1681702400.0,
+            "41": 1681702400.0,
+            "42": 1681702400.0,
+            "43": 1681702400.0,
+            "44": 1681702400.0,
             "45": 1681702400.0,
+            "46": 1681702400.0,
+            "47": 1681702400.0,
+            "48": 1681702400.0,
+            "49": 1681702400.0,
             "50": 1681702400.0,
+            "51": 1681702400.0,
+            "52": 1681702400.0,
+            "53": 1681702400.0,
+            "54": 1681702400.0,
             "55": 1681702400.0,
+            "56": 1681702400.0,
+            "57": 1681702400.0,
+            "58": 1681702400.0,
+            "59": 1681702400.0,
             "60": 1681702400.0,
+            "61": 1681702400.0,
+            "62": 1681702400.0,
+            "63": 1681702400.0,
+            "64": 1681702400.0,
             "65": 1681702400.0,
+            "66": 1681702400.0,
+            "67": 1681702400.0,
+            "68": 1681702400.0,
+            "69": 1681702400.0,
             "70": 1681702400.0,
+            "71": 1681702400.0,
+            "72": 1681702400.0,
+            "73": 1681702400.0,
+            "74": 1681702400.0,
             "75": 1681702400.0,
+            "76": 1681702400.0,
+            "77": 1681702400.0,
+            "78": 1681702400.0,
+            "79": 1681702400.0,
             "80": 1681702400.0,
+            "81": 1681702400.0,
+            "82": 1681702400.0,
+            "83": 1681702400.0,
+            "84": 1681702400.0,
             "85": 1681702400.0,
+            "86": 1681702400.0,
+            "87": 1681702400.0,
+            "88": 1681702400.0,
+            "89": 1681702400.0,
             "90": 1681702400.0,
+            "91": 1681702400.0,
+            "92": 1681702400.0,
+            "93": 1681702400.0,
+            "94": 1681702400.0,
             "95": 1681702400.0,
+            "96": 1681702400.0,
+            "97": 1681702400.0,
+            "98": 1681702400.0,
+            "99": 1681702400.0,
             "100": 1681702400.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 9.9076,
-            "5": 0.11074,
-            "10": 0.12173,
-            "15": 0.11269,
-            "20": 0.11096,
-            "25": 0.11356,
-            "30": 0.11295,
-            "35": 0.11469,
-            "40": 0.11165,
-            "45": 0.11166,
-            "50": 0.11293,
-            "55": 0.11499,
-            "60": 0.11319,
-            "65": 0.11468,
-            "70": 0.11141,
-            "75": 0.11225,
-            "80": 0.11302,
-            "85": 0.11225,
-            "90": 0.11321,
-            "95": 0.11254,
-            "100": 0.1116
+            "1": 12.86117,
+            "2": 0.13933,
+            "3": 0.12865,
+            "4": 0.12909,
+            "5": 0.13086,
+            "6": 0.12937,
+            "7": 0.12955,
+            "8": 0.12832,
+            "9": 0.13012,
+            "10": 0.12917,
+            "11": 0.13042,
+            "12": 0.13029,
+            "13": 0.12973,
+            "14": 0.1288,
+            "15": 0.13228,
+            "16": 0.13052,
+            "17": 0.13054,
+            "18": 0.12967,
+            "19": 0.13242,
+            "20": 0.12969,
+            "21": 0.13088,
+            "22": 0.13019,
+            "23": 0.12965,
+            "24": 0.12899,
+            "25": 0.13258,
+            "26": 0.13001,
+            "27": 0.12913,
+            "28": 0.13084,
+            "29": 0.13114,
+            "30": 0.13032,
+            "31": 0.13065,
+            "32": 0.13047,
+            "33": 0.13027,
+            "34": 0.13197,
+            "35": 0.13065,
+            "36": 0.13067,
+            "37": 0.12989,
+            "38": 0.13114,
+            "39": 0.12933,
+            "40": 0.12861,
+            "41": 0.12817,
+            "42": 0.13081,
+            "43": 0.12928,
+            "44": 0.13005,
+            "45": 0.13082,
+            "46": 0.12995,
+            "47": 0.12857,
+            "48": 0.13137,
+            "49": 0.12979,
+            "50": 0.13191,
+            "51": 0.15409,
+            "52": 0.13157,
+            "53": 0.14032,
+            "54": 0.13375,
+            "55": 0.13825,
+            "56": 0.13176,
+            "57": 0.13198,
+            "58": 0.13061,
+            "59": 0.12937,
+            "60": 0.1313,
+            "61": 0.14432,
+            "62": 0.1338,
+            "63": 0.13267,
+            "64": 0.13096,
+            "65": 0.13182,
+            "66": 0.13165,
+            "67": 0.13147,
+            "68": 0.13711,
+            "69": 0.13191,
+            "70": 0.13223,
+            "71": 0.13057,
+            "72": 0.13123,
+            "73": 0.13196,
+            "74": 0.1341,
+            "75": 0.13029,
+            "76": 0.13292,
+            "77": 0.13191,
+            "78": 0.1325,
+            "79": 0.13167,
+            "80": 0.1322,
+            "81": 0.13122,
+            "82": 0.1304,
+            "83": 0.1321,
+            "84": 0.13338,
+            "85": 0.13207,
+            "86": 0.13126,
+            "87": 0.13079,
+            "88": 0.13219,
+            "89": 0.13079,
+            "90": 0.13174,
+            "91": 0.13224,
+            "92": 0.13121,
+            "93": 0.13434,
+            "94": 0.13083,
+            "95": 0.13012,
+            "96": 0.13136,
+            "97": 0.13212,
+            "98": 0.13196,
+            "99": 0.13215,
+            "100": 0.13279
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..48eca17dac7
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.84466,
+            "2": 10.84794,
+            "3": 10.84923,
+            "4": 10.8433,
+            "5": 10.88246,
+            "6": 10.8808,
+            "7": 10.86574,
+            "8": 10.85417,
+            "9": 10.85542,
+            "10": 10.81812,
+            "11": 10.88726,
+            "12": 10.86329,
+            "13": 10.86656,
+            "14": 10.884,
+            "15": 10.8231,
+            "16": 10.82809,
+            "17": 10.79467,
+            "18": 10.81466,
+            "19": 10.80122,
+            "20": 10.71614,
+            "21": 10.69886,
+            "22": 10.56738,
+            "23": 10.71707,
+            "24": 10.60503,
+            "25": 10.55053,
+            "26": 10.60941,
+            "27": 10.62543,
+            "28": 10.57767,
+            "29": 10.59725,
+            "30": 10.38488,
+            "31": 10.15554,
+            "32": 10.48231,
+            "33": 10.4763,
+            "34": 10.2393,
+            "35": 10.29064,
+            "36": 10.25146,
+            "37": 10.35662,
+            "38": 10.21142,
+            "39": 10.42144,
+            "40": 10.11569,
+            "41": 10.16423,
+            "42": 10.23644,
+            "43": 9.86597,
+            "44": 9.98146,
+            "45": 9.86983,
+            "46": 9.85349,
+            "47": 10.16995,
+            "48": 9.876,
+            "49": 9.57237,
+            "50": 9.92525,
+            "51": 9.8709,
+            "52": 9.7737,
+            "53": 10.08149,
+            "54": 9.97376,
+            "55": 9.90036,
+            "56": 9.64783,
+            "57": 9.50136,
+            "58": 9.85199,
+            "59": 9.6034,
+            "60": 9.50993,
+            "61": 9.71315,
+            "62": 9.99373,
+            "63": 9.39358,
+            "64": 9.78904,
+            "65": 8.96358,
+            "66": 9.71142,
+            "67": 9.38175,
+            "68": 9.79833,
+            "69": 9.80889,
+            "70": 9.75039,
+            "71": 9.62004,
+            "72": 9.59387,
+            "73": 9.50631,
+            "74": 8.94916,
+            "75": 9.43188,
+            "76": 9.08702,
+            "77": 10.06886,
+            "78": 9.73459,
+            "79": 9.38325,
+            "80": 9.41272,
+            "81": 9.48499,
+            "82": 9.70672,
+            "83": 9.30939,
+            "84": 9.42428,
+            "85": 9.61991,
+            "86": 9.07811,
+            "87": 9.59541,
+            "88": 9.75596,
+            "89": 9.60274,
+            "90": 9.82165,
+            "91": 9.34268,
+            "92": 9.35878,
+            "93": 9.08116,
+            "94": 8.83791,
+            "95": 9.5238,
+            "96": 9.53556,
+            "97": 9.31807,
+            "98": 9.68183,
+            "99": 8.89422,
+            "100": 9.40138
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1713.0,
+            "2": 1750.0,
+            "3": 1744.0,
+            "4": 1895.0,
+            "5": 1839.0,
+            "6": 1881.0,
+            "7": 1850.0,
+            "8": 1743.0,
+            "9": 1810.0,
+            "10": 1452.0,
+            "11": 1886.0,
+            "12": 1752.0,
+            "13": 1834.0,
+            "14": 1774.0,
+            "15": 1909.0,
+            "16": 1803.0,
+            "17": 1927.0,
+            "18": 1765.0,
+            "19": 1847.0,
+            "20": 1707.0,
+            "21": 1950.0,
+            "22": 1794.0,
+            "23": 1974.0,
+            "24": 1676.0,
+            "25": 1652.0,
+            "26": 1774.0,
+            "27": 1799.0,
+            "28": 2135.0,
+            "29": 2048.0,
+            "30": 2032.0,
+            "31": 1599.0,
+            "32": 1929.0,
+            "33": 2143.0,
+            "34": 1874.0,
+            "35": 1974.0,
+            "36": 2011.0,
+            "37": 2364.0,
+            "38": 2199.0,
+            "39": 2363.0,
+            "40": 2239.0,
+            "41": 2269.0,
+            "42": 2228.0,
+            "43": 1972.0,
+            "44": 2070.0,
+            "45": 2033.0,
+            "46": 2357.0,
+            "47": 2520.0,
+            "48": 2316.0,
+            "49": 2307.0,
+            "50": 2302.0,
+            "51": 2514.0,
+            "52": 2430.0,
+            "53": 2840.0,
+            "54": 2677.0,
+            "55": 2394.0,
+            "56": 2601.0,
+            "57": 2341.0,
+            "58": 2837.0,
+            "59": 2789.0,
+            "60": 2425.0,
+            "61": 2923.0,
+            "62": 2591.0,
+            "63": 2416.0,
+            "64": 2937.0,
+            "65": 2572.0,
+            "66": 3008.0,
+            "67": 2843.0,
+            "68": 2761.0,
+            "69": 2834.0,
+            "70": 3108.0,
+            "71": 2989.0,
+            "72": 2316.0,
+            "73": 2950.0,
+            "74": 1899.0,
+            "75": 2378.0,
+            "76": 2962.0,
+            "77": 3343.0,
+            "78": 3183.0,
+            "79": 2979.0,
+            "80": 3209.0,
+            "81": 3583.0,
+            "82": 3160.0,
+            "83": 2776.0,
+            "84": 3242.0,
+            "85": 3425.0,
+            "86": 2720.0,
+            "87": 3820.0,
+            "88": 3050.0,
+            "89": 3297.0,
+            "90": 3069.0,
+            "91": 2685.0,
+            "92": 3061.0,
+            "93": 2584.0,
+            "94": 3338.0,
+            "95": 3406.0,
+            "96": 3389.0,
+            "97": 3104.0,
+            "98": 3583.0,
+            "99": 3229.0,
+            "100": 3225.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 490700288.0,
+            "2": 490700288.0,
+            "3": 490700288.0,
+            "4": 490700288.0,
+            "5": 490700288.0,
+            "6": 490700288.0,
+            "7": 490700288.0,
+            "8": 490700288.0,
+            "9": 490700288.0,
+            "10": 490700288.0,
+            "11": 490700288.0,
+            "12": 490700288.0,
+            "13": 490700288.0,
+            "14": 490700288.0,
+            "15": 490700288.0,
+            "16": 490700288.0,
+            "17": 490700288.0,
+            "18": 490700288.0,
+            "19": 490700288.0,
+            "20": 490700288.0,
+            "21": 490700288.0,
+            "22": 490700288.0,
+            "23": 490700288.0,
+            "24": 490700288.0,
+            "25": 490700288.0,
+            "26": 490700288.0,
+            "27": 490700288.0,
+            "28": 490700288.0,
+            "29": 490700288.0,
+            "30": 490700288.0,
+            "31": 490700288.0,
+            "32": 490700288.0,
+            "33": 490700288.0,
+            "34": 490700288.0,
+            "35": 490700288.0,
+            "36": 490700288.0,
+            "37": 490700288.0,
+            "38": 490700288.0,
+            "39": 490700288.0,
+            "40": 490700288.0,
+            "41": 490700288.0,
+            "42": 490700288.0,
+            "43": 490700288.0,
+            "44": 490700288.0,
+            "45": 490700288.0,
+            "46": 490700288.0,
+            "47": 490700288.0,
+            "48": 490700288.0,
+            "49": 490700288.0,
+            "50": 490700288.0,
+            "51": 490700288.0,
+            "52": 490700288.0,
+            "53": 490700288.0,
+            "54": 490700288.0,
+            "55": 490700288.0,
+            "56": 490700288.0,
+            "57": 490700288.0,
+            "58": 490700288.0,
+            "59": 490700288.0,
+            "60": 490700288.0,
+            "61": 490700288.0,
+            "62": 490700288.0,
+            "63": 490700288.0,
+            "64": 490700288.0,
+            "65": 490700288.0,
+            "66": 490700288.0,
+            "67": 490700288.0,
+            "68": 490700288.0,
+            "69": 490700288.0,
+            "70": 490700288.0,
+            "71": 490700288.0,
+            "72": 490700288.0,
+            "73": 490700288.0,
+            "74": 490700288.0,
+            "75": 490700288.0,
+            "76": 490700288.0,
+            "77": 490700288.0,
+            "78": 490700288.0,
+            "79": 490700288.0,
+            "80": 490700288.0,
+            "81": 490700288.0,
+            "82": 490700288.0,
+            "83": 490700288.0,
+            "84": 490700288.0,
+            "85": 490700288.0,
+            "86": 490700288.0,
+            "87": 490700288.0,
+            "88": 490700288.0,
+            "89": 490700288.0,
+            "90": 490700288.0,
+            "91": 490700288.0,
+            "92": 490700288.0,
+            "93": 490700288.0,
+            "94": 490700288.0,
+            "95": 490700288.0,
+            "96": 490700288.0,
+            "97": 490700288.0,
+            "98": 490700288.0,
+            "99": 490700288.0,
+            "100": 490700288.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1553275392.0,
+            "2": 1681702400.0,
+            "3": 1681702400.0,
+            "4": 1681702400.0,
+            "5": 1681702400.0,
+            "6": 1681702400.0,
+            "7": 1681702400.0,
+            "8": 1681702400.0,
+            "9": 1681702400.0,
+            "10": 1681702400.0,
+            "11": 1681702400.0,
+            "12": 1681702400.0,
+            "13": 1681702400.0,
+            "14": 1681702400.0,
+            "15": 1681702400.0,
+            "16": 1681702400.0,
+            "17": 1681702400.0,
+            "18": 1681702400.0,
+            "19": 1681702400.0,
+            "20": 1681702400.0,
+            "21": 1681702400.0,
+            "22": 1681702400.0,
+            "23": 1681702400.0,
+            "24": 1681702400.0,
+            "25": 1681702400.0,
+            "26": 1681702400.0,
+            "27": 1681702400.0,
+            "28": 1681702400.0,
+            "29": 1681702400.0,
+            "30": 1681702400.0,
+            "31": 1681702400.0,
+            "32": 1681702400.0,
+            "33": 1681702400.0,
+            "34": 1681702400.0,
+            "35": 1681702400.0,
+            "36": 1681702400.0,
+            "37": 1681702400.0,
+            "38": 1681702400.0,
+            "39": 1681702400.0,
+            "40": 1681702400.0,
+            "41": 1681702400.0,
+            "42": 1681702400.0,
+            "43": 1681702400.0,
+            "44": 1681702400.0,
+            "45": 1681702400.0,
+            "46": 1681702400.0,
+            "47": 1681702400.0,
+            "48": 1681702400.0,
+            "49": 1681702400.0,
+            "50": 1681702400.0,
+            "51": 1681702400.0,
+            "52": 1681702400.0,
+            "53": 1681702400.0,
+            "54": 1681702400.0,
+            "55": 1681702400.0,
+            "56": 1681702400.0,
+            "57": 1681702400.0,
+            "58": 1681702400.0,
+            "59": 1681702400.0,
+            "60": 1681702400.0,
+            "61": 1681702400.0,
+            "62": 1681702400.0,
+            "63": 1681702400.0,
+            "64": 1681702400.0,
+            "65": 1681702400.0,
+            "66": 1681702400.0,
+            "67": 1681702400.0,
+            "68": 1681702400.0,
+            "69": 1681702400.0,
+            "70": 1681702400.0,
+            "71": 1681702400.0,
+            "72": 1681702400.0,
+            "73": 1681702400.0,
+            "74": 1681702400.0,
+            "75": 1681702400.0,
+            "76": 1681702400.0,
+            "77": 1681702400.0,
+            "78": 1681702400.0,
+            "79": 1681702400.0,
+            "80": 1681702400.0,
+            "81": 1681702400.0,
+            "82": 1681702400.0,
+            "83": 1681702400.0,
+            "84": 1681702400.0,
+            "85": 1681702400.0,
+            "86": 1681702400.0,
+            "87": 1681702400.0,
+            "88": 1681702400.0,
+            "89": 1681702400.0,
+            "90": 1681702400.0,
+            "91": 1681702400.0,
+            "92": 1681702400.0,
+            "93": 1681702400.0,
+            "94": 1681702400.0,
+            "95": 1681702400.0,
+            "96": 1681702400.0,
+            "97": 1681702400.0,
+            "98": 1681702400.0,
+            "99": 1681702400.0,
+            "100": 1681702400.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 13.69891,
+            "2": 0.13291,
+            "3": 0.11069,
+            "4": 0.11005,
+            "5": 0.11137,
+            "6": 0.11181,
+            "7": 0.11024,
+            "8": 0.1118,
+            "9": 0.11019,
+            "10": 0.1115,
+            "11": 0.10932,
+            "12": 0.11102,
+            "13": 0.11122,
+            "14": 0.10885,
+            "15": 0.11063,
+            "16": 0.10921,
+            "17": 0.11073,
+            "18": 0.11138,
+            "19": 0.10984,
+            "20": 0.1097,
+            "21": 0.11067,
+            "22": 0.10976,
+            "23": 0.11182,
+            "24": 0.11128,
+            "25": 0.11361,
+            "26": 0.11246,
+            "27": 0.11156,
+            "28": 0.11079,
+            "29": 0.11109,
+            "30": 0.11063,
+            "31": 0.11335,
+            "32": 0.11146,
+            "33": 0.10977,
+            "34": 0.10982,
+            "35": 0.11082,
+            "36": 0.11114,
+            "37": 0.11175,
+            "38": 0.11066,
+            "39": 0.10976,
+            "40": 0.11142,
+            "41": 0.10972,
+            "42": 0.11235,
+            "43": 0.11078,
+            "44": 0.11209,
+            "45": 0.11117,
+            "46": 0.112,
+            "47": 0.11091,
+            "48": 0.11186,
+            "49": 0.1122,
+            "50": 0.11209,
+            "51": 0.11626,
+            "52": 0.1141,
+            "53": 0.11342,
+            "54": 0.11372,
+            "55": 0.1122,
+            "56": 0.11383,
+            "57": 0.1146,
+            "58": 0.1142,
+            "59": 0.11394,
+            "60": 0.1139,
+            "61": 0.11353,
+            "62": 0.11377,
+            "63": 0.11401,
+            "64": 0.11264,
+            "65": 0.11272,
+            "66": 0.11265,
+            "67": 0.11267,
+            "68": 0.11872,
+            "69": 0.1156,
+            "70": 0.11377,
+            "71": 0.11536,
+            "72": 0.11453,
+            "73": 0.11588,
+            "74": 0.11658,
+            "75": 0.11499,
+            "76": 0.11315,
+            "77": 0.11296,
+            "78": 0.11428,
+            "79": 0.11415,
+            "80": 0.11548,
+            "81": 0.11393,
+            "82": 0.11142,
+            "83": 0.11373,
+            "84": 0.1132,
+            "85": 0.11294,
+            "86": 0.11271,
+            "87": 0.11374,
+            "88": 0.11311,
+            "89": 0.11318,
+            "90": 0.1122,
+            "91": 0.11311,
+            "92": 0.11396,
+            "93": 0.11384,
+            "94": 0.11636,
+            "95": 0.11934,
+            "96": 0.12031,
+            "97": 0.11987,
+            "98": 0.11805,
+            "99": 0.12232,
+            "100": 0.12103
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..077c5e1317a
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.84466,
+            "2": 10.84794,
+            "3": 10.84923,
+            "4": 10.8433,
+            "5": 10.88246,
+            "6": 10.8808,
+            "7": 10.86574,
+            "8": 10.85417,
+            "9": 10.85542,
+            "10": 10.81812,
+            "11": 10.88726,
+            "12": 10.86329,
+            "13": 10.86656,
+            "14": 10.884,
+            "15": 10.8231,
+            "16": 10.82809,
+            "17": 10.79467,
+            "18": 10.81466,
+            "19": 10.80122,
+            "20": 10.71614,
+            "21": 10.69886,
+            "22": 10.56738,
+            "23": 10.71707,
+            "24": 10.60503,
+            "25": 10.55053,
+            "26": 10.60941,
+            "27": 10.62543,
+            "28": 10.57767,
+            "29": 10.59725,
+            "30": 10.38488,
+            "31": 10.15554,
+            "32": 10.48231,
+            "33": 10.4763,
+            "34": 10.2393,
+            "35": 10.29064,
+            "36": 10.25146,
+            "37": 10.35662,
+            "38": 10.21142,
+            "39": 10.42144,
+            "40": 10.11569,
+            "41": 10.16423,
+            "42": 10.23644,
+            "43": 9.86597,
+            "44": 9.98146,
+            "45": 9.86983,
+            "46": 9.85349,
+            "47": 10.16995,
+            "48": 9.876,
+            "49": 9.57237,
+            "50": 9.92525,
+            "51": 9.8709,
+            "52": 9.7737,
+            "53": 10.08149,
+            "54": 9.97376,
+            "55": 9.90036,
+            "56": 9.64783,
+            "57": 9.50136,
+            "58": 9.85199,
+            "59": 9.6034,
+            "60": 9.50993,
+            "61": 9.71315,
+            "62": 9.99373,
+            "63": 9.39358,
+            "64": 9.78904,
+            "65": 8.96358,
+            "66": 9.71142,
+            "67": 9.38175,
+            "68": 9.79833,
+            "69": 9.80889,
+            "70": 9.75039,
+            "71": 9.62004,
+            "72": 9.59387,
+            "73": 9.50631,
+            "74": 8.94916,
+            "75": 9.43188,
+            "76": 9.08702,
+            "77": 10.06886,
+            "78": 9.73459,
+            "79": 9.38325,
+            "80": 9.41272,
+            "81": 9.48499,
+            "82": 9.70672,
+            "83": 9.30939,
+            "84": 9.42428,
+            "85": 9.61991,
+            "86": 9.07811,
+            "87": 9.59541,
+            "88": 9.75596,
+            "89": 9.60274,
+            "90": 9.82165,
+            "91": 9.34268,
+            "92": 9.35878,
+            "93": 9.08116,
+            "94": 8.83791,
+            "95": 9.5238,
+            "96": 9.53556,
+            "97": 9.31807,
+            "98": 9.68183,
+            "99": 8.89422,
+            "100": 9.40138
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1713.0,
+            "2": 1750.0,
+            "3": 1744.0,
+            "4": 1895.0,
+            "5": 1839.0,
+            "6": 1881.0,
+            "7": 1850.0,
+            "8": 1743.0,
+            "9": 1810.0,
+            "10": 1452.0,
+            "11": 1886.0,
+            "12": 1752.0,
+            "13": 1834.0,
+            "14": 1774.0,
+            "15": 1909.0,
+            "16": 1803.0,
+            "17": 1927.0,
+            "18": 1765.0,
+            "19": 1847.0,
+            "20": 1707.0,
+            "21": 1950.0,
+            "22": 1794.0,
+            "23": 1974.0,
+            "24": 1676.0,
+            "25": 1652.0,
+            "26": 1774.0,
+            "27": 1799.0,
+            "28": 2135.0,
+            "29": 2048.0,
+            "30": 2032.0,
+            "31": 1599.0,
+            "32": 1929.0,
+            "33": 2143.0,
+            "34": 1874.0,
+            "35": 1974.0,
+            "36": 2011.0,
+            "37": 2364.0,
+            "38": 2199.0,
+            "39": 2363.0,
+            "40": 2239.0,
+            "41": 2269.0,
+            "42": 2228.0,
+            "43": 1972.0,
+            "44": 2070.0,
+            "45": 2033.0,
+            "46": 2357.0,
+            "47": 2520.0,
+            "48": 2316.0,
+            "49": 2307.0,
+            "50": 2302.0,
+            "51": 2514.0,
+            "52": 2430.0,
+            "53": 2840.0,
+            "54": 2677.0,
+            "55": 2394.0,
+            "56": 2601.0,
+            "57": 2341.0,
+            "58": 2837.0,
+            "59": 2789.0,
+            "60": 2425.0,
+            "61": 2923.0,
+            "62": 2591.0,
+            "63": 2416.0,
+            "64": 2937.0,
+            "65": 2572.0,
+            "66": 3008.0,
+            "67": 2843.0,
+            "68": 2761.0,
+            "69": 2834.0,
+            "70": 3108.0,
+            "71": 2989.0,
+            "72": 2316.0,
+            "73": 2950.0,
+            "74": 1899.0,
+            "75": 2378.0,
+            "76": 2962.0,
+            "77": 3343.0,
+            "78": 3183.0,
+            "79": 2979.0,
+            "80": 3209.0,
+            "81": 3583.0,
+            "82": 3160.0,
+            "83": 2776.0,
+            "84": 3242.0,
+            "85": 3425.0,
+            "86": 2720.0,
+            "87": 3820.0,
+            "88": 3050.0,
+            "89": 3297.0,
+            "90": 3069.0,
+            "91": 2685.0,
+            "92": 3061.0,
+            "93": 2584.0,
+            "94": 3338.0,
+            "95": 3406.0,
+            "96": 3389.0,
+            "97": 3104.0,
+            "98": 3583.0,
+            "99": 3229.0,
+            "100": 3225.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 490700288.0,
+            "2": 490700288.0,
+            "3": 490700288.0,
+            "4": 490700288.0,
+            "5": 490700288.0,
+            "6": 490700288.0,
+            "7": 490700288.0,
+            "8": 490700288.0,
+            "9": 490700288.0,
+            "10": 490700288.0,
+            "11": 490700288.0,
+            "12": 490700288.0,
+            "13": 490700288.0,
+            "14": 490700288.0,
+            "15": 490700288.0,
+            "16": 490700288.0,
+            "17": 490700288.0,
+            "18": 490700288.0,
+            "19": 490700288.0,
+            "20": 490700288.0,
+            "21": 490700288.0,
+            "22": 490700288.0,
+            "23": 490700288.0,
+            "24": 490700288.0,
+            "25": 490700288.0,
+            "26": 490700288.0,
+            "27": 490700288.0,
+            "28": 490700288.0,
+            "29": 490700288.0,
+            "30": 490700288.0,
+            "31": 490700288.0,
+            "32": 490700288.0,
+            "33": 490700288.0,
+            "34": 490700288.0,
+            "35": 490700288.0,
+            "36": 490700288.0,
+            "37": 490700288.0,
+            "38": 490700288.0,
+            "39": 490700288.0,
+            "40": 490700288.0,
+            "41": 490700288.0,
+            "42": 490700288.0,
+            "43": 490700288.0,
+            "44": 490700288.0,
+            "45": 490700288.0,
+            "46": 490700288.0,
+            "47": 490700288.0,
+            "48": 490700288.0,
+            "49": 490700288.0,
+            "50": 490700288.0,
+            "51": 490700288.0,
+            "52": 490700288.0,
+            "53": 490700288.0,
+            "54": 490700288.0,
+            "55": 490700288.0,
+            "56": 490700288.0,
+            "57": 490700288.0,
+            "58": 490700288.0,
+            "59": 490700288.0,
+            "60": 490700288.0,
+            "61": 490700288.0,
+            "62": 490700288.0,
+            "63": 490700288.0,
+            "64": 490700288.0,
+            "65": 490700288.0,
+            "66": 490700288.0,
+            "67": 490700288.0,
+            "68": 490700288.0,
+            "69": 490700288.0,
+            "70": 490700288.0,
+            "71": 490700288.0,
+            "72": 490700288.0,
+            "73": 490700288.0,
+            "74": 490700288.0,
+            "75": 490700288.0,
+            "76": 490700288.0,
+            "77": 490700288.0,
+            "78": 490700288.0,
+            "79": 490700288.0,
+            "80": 490700288.0,
+            "81": 490700288.0,
+            "82": 490700288.0,
+            "83": 490700288.0,
+            "84": 490700288.0,
+            "85": 490700288.0,
+            "86": 490700288.0,
+            "87": 490700288.0,
+            "88": 490700288.0,
+            "89": 490700288.0,
+            "90": 490700288.0,
+            "91": 490700288.0,
+            "92": 490700288.0,
+            "93": 490700288.0,
+            "94": 490700288.0,
+            "95": 490700288.0,
+            "96": 490700288.0,
+            "97": 490700288.0,
+            "98": 490700288.0,
+            "99": 490700288.0,
+            "100": 490700288.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1553275392.0,
+            "2": 1681702400.0,
+            "3": 1681702400.0,
+            "4": 1681702400.0,
+            "5": 1681702400.0,
+            "6": 1681702400.0,
+            "7": 1681702400.0,
+            "8": 1681702400.0,
+            "9": 1681702400.0,
+            "10": 1681702400.0,
+            "11": 1681702400.0,
+            "12": 1681702400.0,
+            "13": 1681702400.0,
+            "14": 1681702400.0,
+            "15": 1681702400.0,
+            "16": 1681702400.0,
+            "17": 1681702400.0,
+            "18": 1681702400.0,
+            "19": 1681702400.0,
+            "20": 1681702400.0,
+            "21": 1681702400.0,
+            "22": 1681702400.0,
+            "23": 1681702400.0,
+            "24": 1681702400.0,
+            "25": 1681702400.0,
+            "26": 1681702400.0,
+            "27": 1681702400.0,
+            "28": 1681702400.0,
+            "29": 1681702400.0,
+            "30": 1681702400.0,
+            "31": 1681702400.0,
+            "32": 1681702400.0,
+            "33": 1681702400.0,
+            "34": 1681702400.0,
+            "35": 1681702400.0,
+            "36": 1681702400.0,
+            "37": 1681702400.0,
+            "38": 1681702400.0,
+            "39": 1681702400.0,
+            "40": 1681702400.0,
+            "41": 1681702400.0,
+            "42": 1681702400.0,
+            "43": 1681702400.0,
+            "44": 1681702400.0,
+            "45": 1681702400.0,
+            "46": 1681702400.0,
+            "47": 1681702400.0,
+            "48": 1681702400.0,
+            "49": 1681702400.0,
+            "50": 1681702400.0,
+            "51": 1681702400.0,
+            "52": 1681702400.0,
+            "53": 1681702400.0,
+            "54": 1681702400.0,
+            "55": 1681702400.0,
+            "56": 1681702400.0,
+            "57": 1681702400.0,
+            "58": 1681702400.0,
+            "59": 1681702400.0,
+            "60": 1681702400.0,
+            "61": 1681702400.0,
+            "62": 1681702400.0,
+            "63": 1681702400.0,
+            "64": 1681702400.0,
+            "65": 1681702400.0,
+            "66": 1681702400.0,
+            "67": 1681702400.0,
+            "68": 1681702400.0,
+            "69": 1681702400.0,
+            "70": 1681702400.0,
+            "71": 1681702400.0,
+            "72": 1681702400.0,
+            "73": 1681702400.0,
+            "74": 1681702400.0,
+            "75": 1681702400.0,
+            "76": 1681702400.0,
+            "77": 1681702400.0,
+            "78": 1681702400.0,
+            "79": 1681702400.0,
+            "80": 1681702400.0,
+            "81": 1681702400.0,
+            "82": 1681702400.0,
+            "83": 1681702400.0,
+            "84": 1681702400.0,
+            "85": 1681702400.0,
+            "86": 1681702400.0,
+            "87": 1681702400.0,
+            "88": 1681702400.0,
+            "89": 1681702400.0,
+            "90": 1681702400.0,
+            "91": 1681702400.0,
+            "92": 1681702400.0,
+            "93": 1681702400.0,
+            "94": 1681702400.0,
+            "95": 1681702400.0,
+            "96": 1681702400.0,
+            "97": 1681702400.0,
+            "98": 1681702400.0,
+            "99": 1681702400.0,
+            "100": 1681702400.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 12.96096,
+            "2": 0.14328,
+            "3": 0.13234,
+            "4": 0.12983,
+            "5": 0.1339,
+            "6": 0.13424,
+            "7": 0.13558,
+            "8": 0.13644,
+            "9": 0.13434,
+            "10": 0.13106,
+            "11": 0.13377,
+            "12": 0.13148,
+            "13": 0.13136,
+            "14": 0.13331,
+            "15": 0.13429,
+            "16": 0.13208,
+            "17": 0.1316,
+            "18": 0.13139,
+            "19": 0.1287,
+            "20": 0.13199,
+            "21": 0.1318,
+            "22": 0.13196,
+            "23": 0.13019,
+            "24": 0.1317,
+            "25": 0.13217,
+            "26": 0.12983,
+            "27": 0.12928,
+            "28": 0.13258,
+            "29": 0.13441,
+            "30": 0.13276,
+            "31": 0.13264,
+            "32": 0.13228,
+            "33": 0.13159,
+            "34": 0.13219,
+            "35": 0.133,
+            "36": 0.13166,
+            "37": 0.13174,
+            "38": 0.1304,
+            "39": 0.1314,
+            "40": 0.13029,
+            "41": 0.13074,
+            "42": 0.12839,
+            "43": 0.13136,
+            "44": 0.13209,
+            "45": 0.12923,
+            "46": 0.13318,
+            "47": 0.1319,
+            "48": 0.13259,
+            "49": 0.13079,
+            "50": 0.12933,
+            "51": 0.15172,
+            "52": 0.1333,
+            "53": 0.14462,
+            "54": 0.13216,
+            "55": 0.13399,
+            "56": 0.13553,
+            "57": 0.13325,
+            "58": 0.13361,
+            "59": 0.13333,
+            "60": 0.13354,
+            "61": 0.13207,
+            "62": 0.1338,
+            "63": 0.13105,
+            "64": 0.13392,
+            "65": 0.13319,
+            "66": 0.13384,
+            "67": 0.13217,
+            "68": 0.13367,
+            "69": 0.13229,
+            "70": 0.13221,
+            "71": 0.1335,
+            "72": 0.13557,
+            "73": 0.13385,
+            "74": 0.13485,
+            "75": 0.13327,
+            "76": 0.13288,
+            "77": 0.13329,
+            "78": 0.13402,
+            "79": 0.13416,
+            "80": 0.13423,
+            "81": 0.13316,
+            "82": 0.13278,
+            "83": 0.13364,
+            "84": 0.13264,
+            "85": 0.13203,
+            "86": 0.13235,
+            "87": 0.13381,
+            "88": 0.13365,
+            "89": 0.13338,
+            "90": 0.1334,
+            "91": 0.13418,
+            "92": 0.13669,
+            "93": 0.13477,
+            "94": 0.13244,
+            "95": 0.13237,
+            "96": 0.13182,
+            "97": 0.13149,
+            "98": 0.13223,
+            "99": 0.13163,
+            "100": 0.1326
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
new file mode 100644
index 00000000000..b9b764a3fd2
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.88734,
+            "2": 10.90383,
+            "3": 10.88081,
+            "4": 10.88371,
+            "5": 10.90948,
+            "6": 10.91613,
+            "7": 10.89451,
+            "8": 10.88622,
+            "9": 10.89544,
+            "10": 10.87763,
+            "11": 10.89061,
+            "12": 10.89565,
+            "13": 10.9078,
+            "14": 10.90725,
+            "15": 10.86371,
+            "16": 10.86172,
+            "17": 10.81949,
+            "18": 10.84638,
+            "19": 10.83804,
+            "20": 10.7509,
+            "21": 10.72756,
+            "22": 10.6229,
+            "23": 10.74449,
+            "24": 10.63231,
+            "25": 10.59917,
+            "26": 10.64491,
+            "27": 10.64672,
+            "28": 10.59686,
+            "29": 10.60675,
+            "30": 10.40104,
+            "31": 10.18011,
+            "32": 10.49048,
+            "33": 10.48347,
+            "34": 10.251,
+            "35": 10.30793,
+            "36": 10.25618,
+            "37": 10.36503,
+            "38": 10.2179,
+            "39": 10.41024,
+            "40": 10.10902,
+            "41": 10.16109,
+            "42": 10.22733,
+            "43": 9.87492,
+            "44": 9.97842,
+            "45": 9.85831,
+            "46": 9.85388,
+            "47": 10.15356,
+            "48": 9.86194,
+            "49": 9.55678,
+            "50": 9.92111,
+            "51": 9.86199,
+            "52": 9.75595,
+            "53": 10.07575,
+            "54": 9.96137,
+            "55": 9.88529,
+            "56": 9.63476,
+            "57": 9.49273,
+            "58": 9.83039,
+            "59": 9.59148,
+            "60": 9.50737,
+            "61": 9.70512,
+            "62": 9.98404,
+            "63": 9.37583,
+            "64": 9.77923,
+            "65": 8.95828,
+            "66": 9.70623,
+            "67": 9.37471,
+            "68": 9.78699,
+            "69": 9.78826,
+            "70": 9.72733,
+            "71": 9.61217,
+            "72": 9.5913,
+            "73": 9.49847,
+            "74": 8.95651,
+            "75": 9.42571,
+            "76": 9.09602,
+            "77": 10.06687,
+            "78": 9.73141,
+            "79": 9.37953,
+            "80": 9.40559,
+            "81": 9.48179,
+            "82": 9.694,
+            "83": 9.31183,
+            "84": 9.41312,
+            "85": 9.61572,
+            "86": 9.07774,
+            "87": 9.59695,
+            "88": 9.74877,
+            "89": 9.60255,
+            "90": 9.81277,
+            "91": 9.34555,
+            "92": 9.36555,
+            "93": 9.07714,
+            "94": 8.83102,
+            "95": 9.52119,
+            "96": 9.52503,
+            "97": 9.31354,
+            "98": 9.6769,
+            "99": 8.8896,
+            "100": 9.40111
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1614.0,
+            "2": 1820.0,
+            "3": 1724.0,
+            "4": 1889.0,
+            "5": 2021.0,
+            "6": 1920.0,
+            "7": 1930.0,
+            "8": 1736.0,
+            "9": 1989.0,
+            "10": 1399.0,
+            "11": 2051.0,
+            "12": 1859.0,
+            "13": 2007.0,
+            "14": 1830.0,
+            "15": 1872.0,
+            "16": 1877.0,
+            "17": 1960.0,
+            "18": 1747.0,
+            "19": 1815.0,
+            "20": 1692.0,
+            "21": 2039.0,
+            "22": 1713.0,
+            "23": 1963.0,
+            "24": 1743.0,
+            "25": 1784.0,
+            "26": 1793.0,
+            "27": 1860.0,
+            "28": 1956.0,
+            "29": 2152.0,
+            "30": 1900.0,
+            "31": 1685.0,
+            "32": 2000.0,
+            "33": 2085.0,
+            "34": 1867.0,
+            "35": 2081.0,
+            "36": 1975.0,
+            "37": 2341.0,
+            "38": 2316.0,
+            "39": 2438.0,
+            "40": 2233.0,
+            "41": 2306.0,
+            "42": 2319.0,
+            "43": 2082.0,
+            "44": 2158.0,
+            "45": 2144.0,
+            "46": 2227.0,
+            "47": 2675.0,
+            "48": 2473.0,
+            "49": 2231.0,
+            "50": 2513.0,
+            "51": 2611.0,
+            "52": 2560.0,
+            "53": 3169.0,
+            "54": 2698.0,
+            "55": 2493.0,
+            "56": 2791.0,
+            "57": 2298.0,
+            "58": 3182.0,
+            "59": 2851.0,
+            "60": 2440.0,
+            "61": 2909.0,
+            "62": 2834.0,
+            "63": 2389.0,
+            "64": 3187.0,
+            "65": 2763.0,
+            "66": 3321.0,
+            "67": 2818.0,
+            "68": 2835.0,
+            "69": 3037.0,
+            "70": 3219.0,
+            "71": 3046.0,
+            "72": 2359.0,
+            "73": 2939.0,
+            "74": 2061.0,
+            "75": 2601.0,
+            "76": 2971.0,
+            "77": 3400.0,
+            "78": 3295.0,
+            "79": 3211.0,
+            "80": 3341.0,
+            "81": 3756.0,
+            "82": 3240.0,
+            "83": 2851.0,
+            "84": 3378.0,
+            "85": 3433.0,
+            "86": 2818.0,
+            "87": 3852.0,
+            "88": 3000.0,
+            "89": 3574.0,
+            "90": 3019.0,
+            "91": 2624.0,
+            "92": 3179.0,
+            "93": 2831.0,
+            "94": 3483.0,
+            "95": 3417.0,
+            "96": 3492.0,
+            "97": 3114.0,
+            "98": 3675.0,
+            "99": 3172.0,
+            "100": 3372.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 462455808.0,
+            "2": 462455808.0,
+            "3": 462455808.0,
+            "4": 462455808.0,
+            "5": 462455808.0,
+            "6": 462455808.0,
+            "7": 462455808.0,
+            "8": 462455808.0,
+            "9": 462455808.0,
+            "10": 462455808.0,
+            "11": 462455808.0,
+            "12": 462455808.0,
+            "13": 462455808.0,
+            "14": 462455808.0,
+            "15": 462455808.0,
+            "16": 462455808.0,
+            "17": 462455808.0,
+            "18": 462455808.0,
+            "19": 462455808.0,
+            "20": 462455808.0,
+            "21": 462455808.0,
+            "22": 462455808.0,
+            "23": 462455808.0,
+            "24": 462455808.0,
+            "25": 462455808.0,
+            "26": 462455808.0,
+            "27": 462455808.0,
+            "28": 462455808.0,
+            "29": 462455808.0,
+            "30": 462455808.0,
+            "31": 462455808.0,
+            "32": 462455808.0,
+            "33": 462455808.0,
+            "34": 462455808.0,
+            "35": 462455808.0,
+            "36": 462455808.0,
+            "37": 462455808.0,
+            "38": 462455808.0,
+            "39": 462455808.0,
+            "40": 462455808.0,
+            "41": 462455808.0,
+            "42": 462455808.0,
+            "43": 462455808.0,
+            "44": 462455808.0,
+            "45": 462455808.0,
+            "46": 462455808.0,
+            "47": 462455808.0,
+            "48": 462455808.0,
+            "49": 462455808.0,
+            "50": 462455808.0,
+            "51": 462455808.0,
+            "52": 462455808.0,
+            "53": 462455808.0,
+            "54": 462455808.0,
+            "55": 462455808.0,
+            "56": 462455808.0,
+            "57": 462455808.0,
+            "58": 462455808.0,
+            "59": 462455808.0,
+            "60": 462455808.0,
+            "61": 462455808.0,
+            "62": 462455808.0,
+            "63": 462455808.0,
+            "64": 462455808.0,
+            "65": 462455808.0,
+            "66": 462455808.0,
+            "67": 462455808.0,
+            "68": 462455808.0,
+            "69": 462455808.0,
+            "70": 462455808.0,
+            "71": 462455808.0,
+            "72": 462455808.0,
+            "73": 462455808.0,
+            "74": 462455808.0,
+            "75": 462455808.0,
+            "76": 462455808.0,
+            "77": 462455808.0,
+            "78": 462455808.0,
+            "79": 462455808.0,
+            "80": 462455808.0,
+            "81": 462455808.0,
+            "82": 462455808.0,
+            "83": 462455808.0,
+            "84": 462455808.0,
+            "85": 462455808.0,
+            "86": 462455808.0,
+            "87": 462455808.0,
+            "88": 462455808.0,
+            "89": 462455808.0,
+            "90": 462455808.0,
+            "91": 462455808.0,
+            "92": 462455808.0,
+            "93": 462455808.0,
+            "94": 462455808.0,
+            "95": 462455808.0,
+            "96": 462455808.0,
+            "97": 462455808.0,
+            "98": 462455808.0,
+            "99": 462455808.0,
+            "100": 462455808.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2529822720.0,
+            "2": 2658249728.0,
+            "3": 2658249728.0,
+            "4": 2658249728.0,
+            "5": 2658249728.0,
+            "6": 2658249728.0,
+            "7": 2658249728.0,
+            "8": 2658249728.0,
+            "9": 2658249728.0,
+            "10": 2658249728.0,
+            "11": 2658249728.0,
+            "12": 2658249728.0,
+            "13": 2658249728.0,
+            "14": 2658249728.0,
+            "15": 2658249728.0,
+            "16": 2658249728.0,
+            "17": 2658249728.0,
+            "18": 2658249728.0,
+            "19": 2658249728.0,
+            "20": 2658249728.0,
+            "21": 2658249728.0,
+            "22": 2658249728.0,
+            "23": 2658249728.0,
+            "24": 2658249728.0,
+            "25": 2658249728.0,
+            "26": 2658249728.0,
+            "27": 2658249728.0,
+            "28": 2658249728.0,
+            "29": 2658249728.0,
+            "30": 2658249728.0,
+            "31": 2658249728.0,
+            "32": 2658249728.0,
+            "33": 2658249728.0,
+            "34": 2658249728.0,
+            "35": 2658249728.0,
+            "36": 2658249728.0,
+            "37": 2658249728.0,
+            "38": 2658249728.0,
+            "39": 2658249728.0,
+            "40": 2658249728.0,
+            "41": 2658249728.0,
+            "42": 2658249728.0,
+            "43": 2658249728.0,
+            "44": 2658249728.0,
+            "45": 2658249728.0,
+            "46": 2658249728.0,
+            "47": 2658249728.0,
+            "48": 2658249728.0,
+            "49": 2658249728.0,
+            "50": 2658249728.0,
+            "51": 2658249728.0,
+            "52": 2658249728.0,
+            "53": 2658249728.0,
+            "54": 2658249728.0,
+            "55": 2658249728.0,
+            "56": 2658249728.0,
+            "57": 2658249728.0,
+            "58": 2658249728.0,
+            "59": 2658249728.0,
+            "60": 2658249728.0,
+            "61": 2658249728.0,
+            "62": 2658249728.0,
+            "63": 2658249728.0,
+            "64": 2658249728.0,
+            "65": 2658249728.0,
+            "66": 2658249728.0,
+            "67": 2658249728.0,
+            "68": 2658249728.0,
+            "69": 2658249728.0,
+            "70": 2658249728.0,
+            "71": 2658249728.0,
+            "72": 2658249728.0,
+            "73": 2658249728.0,
+            "74": 2658249728.0,
+            "75": 2658249728.0,
+            "76": 2658249728.0,
+            "77": 2658249728.0,
+            "78": 2658249728.0,
+            "79": 2658249728.0,
+            "80": 2658249728.0,
+            "81": 2658249728.0,
+            "82": 2658249728.0,
+            "83": 2658249728.0,
+            "84": 2658249728.0,
+            "85": 2658249728.0,
+            "86": 2658249728.0,
+            "87": 2658249728.0,
+            "88": 2658249728.0,
+            "89": 2658249728.0,
+            "90": 2658249728.0,
+            "91": 2658249728.0,
+            "92": 2658249728.0,
+            "93": 2658249728.0,
+            "94": 2658249728.0,
+            "95": 2658249728.0,
+            "96": 2658249728.0,
+            "97": 2658249728.0,
+            "98": 2658249728.0,
+            "99": 2658249728.0,
+            "100": 2658249728.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 15.80127,
+            "2": 0.21048,
+            "3": 0.19424,
+            "4": 0.19406,
+            "5": 0.19305,
+            "6": 0.46258,
+            "7": 0.19395,
+            "8": 0.19336,
+            "9": 0.19347,
+            "10": 0.19469,
+            "11": 0.19315,
+            "12": 0.19201,
+            "13": 0.19467,
+            "14": 0.19268,
+            "15": 0.19342,
+            "16": 0.19454,
+            "17": 0.1928,
+            "18": 0.19024,
+            "19": 0.19035,
+            "20": 0.19633,
+            "21": 0.19068,
+            "22": 0.19007,
+            "23": 0.19089,
+            "24": 0.18966,
+            "25": 0.18965,
+            "26": 0.19703,
+            "27": 0.19046,
+            "28": 0.18906,
+            "29": 0.18887,
+            "30": 0.19,
+            "31": 0.19237,
+            "32": 0.19083,
+            "33": 0.18835,
+            "34": 0.18864,
+            "35": 0.18967,
+            "36": 0.19256,
+            "37": 0.18907,
+            "38": 0.18914,
+            "39": 0.18932,
+            "40": 0.18927,
+            "41": 0.18947,
+            "42": 0.19022,
+            "43": 0.18879,
+            "44": 0.1889,
+            "45": 0.19016,
+            "46": 0.18968,
+            "47": 0.19422,
+            "48": 0.19149,
+            "49": 0.19174,
+            "50": 0.18898,
+            "51": 0.19117,
+            "52": 0.18823,
+            "53": 0.42924,
+            "54": 0.18787,
+            "55": 0.18684,
+            "56": 0.19129,
+            "57": 0.18962,
+            "58": 0.18731,
+            "59": 0.18736,
+            "60": 0.18779,
+            "61": 0.19123,
+            "62": 0.1899,
+            "63": 0.18761,
+            "64": 0.24503,
+            "65": 0.2384,
+            "66": 0.24805,
+            "67": 0.23845,
+            "68": 0.23074,
+            "69": 0.23115,
+            "70": 0.23619,
+            "71": 0.23855,
+            "72": 0.24362,
+            "73": 0.28624,
+            "74": 0.30988,
+            "75": 0.31666,
+            "76": 0.25387,
+            "77": 0.2495,
+            "78": 0.1922,
+            "79": 0.18998,
+            "80": 0.18827,
+            "81": 0.18839,
+            "82": 0.18827,
+            "83": 0.19179,
+            "84": 0.18895,
+            "85": 0.18764,
+            "86": 0.18715,
+            "87": 0.18798,
+            "88": 0.19102,
+            "89": 0.18913,
+            "90": 0.18734,
+            "91": 0.18768,
+            "92": 0.1878,
+            "93": 0.19083,
+            "94": 0.19033,
+            "95": 0.18891,
+            "96": 0.18801,
+            "97": 0.1884,
+            "98": 0.18802,
+            "99": 0.1921,
+            "100": 0.1908
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
new file mode 100644
index 00000000000..37b3ad50408
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.88734,
+            "2": 10.90383,
+            "3": 10.88081,
+            "4": 10.88371,
+            "5": 10.90948,
+            "6": 10.91613,
+            "7": 10.89451,
+            "8": 10.88622,
+            "9": 10.89544,
+            "10": 10.87763,
+            "11": 10.89061,
+            "12": 10.89565,
+            "13": 10.9078,
+            "14": 10.90725,
+            "15": 10.86371,
+            "16": 10.86172,
+            "17": 10.81949,
+            "18": 10.84638,
+            "19": 10.83804,
+            "20": 10.7509,
+            "21": 10.72756,
+            "22": 10.6229,
+            "23": 10.74449,
+            "24": 10.63231,
+            "25": 10.59917,
+            "26": 10.64491,
+            "27": 10.64672,
+            "28": 10.59686,
+            "29": 10.60675,
+            "30": 10.40104,
+            "31": 10.18011,
+            "32": 10.49048,
+            "33": 10.48347,
+            "34": 10.251,
+            "35": 10.30793,
+            "36": 10.25618,
+            "37": 10.36503,
+            "38": 10.2179,
+            "39": 10.41024,
+            "40": 10.10902,
+            "41": 10.16109,
+            "42": 10.22733,
+            "43": 9.87492,
+            "44": 9.97842,
+            "45": 9.85831,
+            "46": 9.85388,
+            "47": 10.15356,
+            "48": 9.86194,
+            "49": 9.55678,
+            "50": 9.92111,
+            "51": 9.86199,
+            "52": 9.75595,
+            "53": 10.07575,
+            "54": 9.96137,
+            "55": 9.88529,
+            "56": 9.63476,
+            "57": 9.49273,
+            "58": 9.83039,
+            "59": 9.59148,
+            "60": 9.50737,
+            "61": 9.70512,
+            "62": 9.98404,
+            "63": 9.37583,
+            "64": 9.77923,
+            "65": 8.95828,
+            "66": 9.70623,
+            "67": 9.37471,
+            "68": 9.78699,
+            "69": 9.78826,
+            "70": 9.72733,
+            "71": 9.61217,
+            "72": 9.5913,
+            "73": 9.49847,
+            "74": 8.95651,
+            "75": 9.42571,
+            "76": 9.09602,
+            "77": 10.06687,
+            "78": 9.73141,
+            "79": 9.37953,
+            "80": 9.40559,
+            "81": 9.48179,
+            "82": 9.694,
+            "83": 9.31183,
+            "84": 9.41312,
+            "85": 9.61572,
+            "86": 9.07774,
+            "87": 9.59695,
+            "88": 9.74877,
+            "89": 9.60255,
+            "90": 9.81277,
+            "91": 9.34555,
+            "92": 9.36555,
+            "93": 9.07714,
+            "94": 8.83102,
+            "95": 9.52119,
+            "96": 9.52503,
+            "97": 9.31354,
+            "98": 9.6769,
+            "99": 8.8896,
+            "100": 9.40111
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1614.0,
+            "2": 1820.0,
+            "3": 1724.0,
+            "4": 1889.0,
+            "5": 2021.0,
+            "6": 1920.0,
+            "7": 1930.0,
+            "8": 1736.0,
+            "9": 1989.0,
+            "10": 1399.0,
+            "11": 2051.0,
+            "12": 1859.0,
+            "13": 2007.0,
+            "14": 1830.0,
+            "15": 1872.0,
+            "16": 1877.0,
+            "17": 1960.0,
+            "18": 1747.0,
+            "19": 1815.0,
+            "20": 1692.0,
+            "21": 2039.0,
+            "22": 1713.0,
+            "23": 1963.0,
+            "24": 1743.0,
+            "25": 1784.0,
+            "26": 1793.0,
+            "27": 1860.0,
+            "28": 1956.0,
+            "29": 2152.0,
+            "30": 1900.0,
+            "31": 1685.0,
+            "32": 2000.0,
+            "33": 2085.0,
+            "34": 1867.0,
+            "35": 2081.0,
+            "36": 1975.0,
+            "37": 2341.0,
+            "38": 2316.0,
+            "39": 2438.0,
+            "40": 2233.0,
+            "41": 2306.0,
+            "42": 2319.0,
+            "43": 2082.0,
+            "44": 2158.0,
+            "45": 2144.0,
+            "46": 2227.0,
+            "47": 2675.0,
+            "48": 2473.0,
+            "49": 2231.0,
+            "50": 2513.0,
+            "51": 2611.0,
+            "52": 2560.0,
+            "53": 3169.0,
+            "54": 2698.0,
+            "55": 2493.0,
+            "56": 2791.0,
+            "57": 2298.0,
+            "58": 3182.0,
+            "59": 2851.0,
+            "60": 2440.0,
+            "61": 2909.0,
+            "62": 2834.0,
+            "63": 2389.0,
+            "64": 3187.0,
+            "65": 2763.0,
+            "66": 3321.0,
+            "67": 2818.0,
+            "68": 2835.0,
+            "69": 3037.0,
+            "70": 3219.0,
+            "71": 3046.0,
+            "72": 2359.0,
+            "73": 2939.0,
+            "74": 2061.0,
+            "75": 2601.0,
+            "76": 2971.0,
+            "77": 3400.0,
+            "78": 3295.0,
+            "79": 3211.0,
+            "80": 3341.0,
+            "81": 3756.0,
+            "82": 3240.0,
+            "83": 2851.0,
+            "84": 3378.0,
+            "85": 3433.0,
+            "86": 2818.0,
+            "87": 3852.0,
+            "88": 3000.0,
+            "89": 3574.0,
+            "90": 3019.0,
+            "91": 2624.0,
+            "92": 3179.0,
+            "93": 2831.0,
+            "94": 3483.0,
+            "95": 3417.0,
+            "96": 3492.0,
+            "97": 3114.0,
+            "98": 3675.0,
+            "99": 3172.0,
+            "100": 3372.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 462455808.0,
+            "2": 462455808.0,
+            "3": 462455808.0,
+            "4": 462455808.0,
+            "5": 462455808.0,
+            "6": 462455808.0,
+            "7": 462455808.0,
+            "8": 462455808.0,
+            "9": 462455808.0,
+            "10": 462455808.0,
+            "11": 462455808.0,
+            "12": 462455808.0,
+            "13": 462455808.0,
+            "14": 462455808.0,
+            "15": 462455808.0,
+            "16": 462455808.0,
+            "17": 462455808.0,
+            "18": 462455808.0,
+            "19": 462455808.0,
+            "20": 462455808.0,
+            "21": 462455808.0,
+            "22": 462455808.0,
+            "23": 462455808.0,
+            "24": 462455808.0,
+            "25": 462455808.0,
+            "26": 462455808.0,
+            "27": 462455808.0,
+            "28": 462455808.0,
+            "29": 462455808.0,
+            "30": 462455808.0,
+            "31": 462455808.0,
+            "32": 462455808.0,
+            "33": 462455808.0,
+            "34": 462455808.0,
+            "35": 462455808.0,
+            "36": 462455808.0,
+            "37": 462455808.0,
+            "38": 462455808.0,
+            "39": 462455808.0,
+            "40": 462455808.0,
+            "41": 462455808.0,
+            "42": 462455808.0,
+            "43": 462455808.0,
+            "44": 462455808.0,
+            "45": 462455808.0,
+            "46": 462455808.0,
+            "47": 462455808.0,
+            "48": 462455808.0,
+            "49": 462455808.0,
+            "50": 462455808.0,
+            "51": 462455808.0,
+            "52": 462455808.0,
+            "53": 462455808.0,
+            "54": 462455808.0,
+            "55": 462455808.0,
+            "56": 462455808.0,
+            "57": 462455808.0,
+            "58": 462455808.0,
+            "59": 462455808.0,
+            "60": 462455808.0,
+            "61": 462455808.0,
+            "62": 462455808.0,
+            "63": 462455808.0,
+            "64": 462455808.0,
+            "65": 462455808.0,
+            "66": 462455808.0,
+            "67": 462455808.0,
+            "68": 462455808.0,
+            "69": 462455808.0,
+            "70": 462455808.0,
+            "71": 462455808.0,
+            "72": 462455808.0,
+            "73": 462455808.0,
+            "74": 462455808.0,
+            "75": 462455808.0,
+            "76": 462455808.0,
+            "77": 462455808.0,
+            "78": 462455808.0,
+            "79": 462455808.0,
+            "80": 462455808.0,
+            "81": 462455808.0,
+            "82": 462455808.0,
+            "83": 462455808.0,
+            "84": 462455808.0,
+            "85": 462455808.0,
+            "86": 462455808.0,
+            "87": 462455808.0,
+            "88": 462455808.0,
+            "89": 462455808.0,
+            "90": 462455808.0,
+            "91": 462455808.0,
+            "92": 462455808.0,
+            "93": 462455808.0,
+            "94": 462455808.0,
+            "95": 462455808.0,
+            "96": 462455808.0,
+            "97": 462455808.0,
+            "98": 462455808.0,
+            "99": 462455808.0,
+            "100": 462455808.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2529822720.0,
+            "2": 2658249728.0,
+            "3": 2658249728.0,
+            "4": 2658249728.0,
+            "5": 2658249728.0,
+            "6": 2658249728.0,
+            "7": 2658249728.0,
+            "8": 2658249728.0,
+            "9": 2658249728.0,
+            "10": 2658249728.0,
+            "11": 2658249728.0,
+            "12": 2658249728.0,
+            "13": 2658249728.0,
+            "14": 2658249728.0,
+            "15": 2658249728.0,
+            "16": 2658249728.0,
+            "17": 2658249728.0,
+            "18": 2658249728.0,
+            "19": 2658249728.0,
+            "20": 2658249728.0,
+            "21": 2658249728.0,
+            "22": 2658249728.0,
+            "23": 2658249728.0,
+            "24": 2658249728.0,
+            "25": 2658249728.0,
+            "26": 2658249728.0,
+            "27": 2658249728.0,
+            "28": 2658249728.0,
+            "29": 2658249728.0,
+            "30": 2658249728.0,
+            "31": 2658249728.0,
+            "32": 2658249728.0,
+            "33": 2658249728.0,
+            "34": 2658249728.0,
+            "35": 2658249728.0,
+            "36": 2658249728.0,
+            "37": 2658249728.0,
+            "38": 2658249728.0,
+            "39": 2658249728.0,
+            "40": 2658249728.0,
+            "41": 2658249728.0,
+            "42": 2658249728.0,
+            "43": 2658249728.0,
+            "44": 2658249728.0,
+            "45": 2658249728.0,
+            "46": 2658249728.0,
+            "47": 2658249728.0,
+            "48": 2658249728.0,
+            "49": 2658249728.0,
+            "50": 2658249728.0,
+            "51": 2658249728.0,
+            "52": 2658249728.0,
+            "53": 2658249728.0,
+            "54": 2658249728.0,
+            "55": 2658249728.0,
+            "56": 2658249728.0,
+            "57": 2658249728.0,
+            "58": 2658249728.0,
+            "59": 2658249728.0,
+            "60": 2658249728.0,
+            "61": 2658249728.0,
+            "62": 2658249728.0,
+            "63": 2658249728.0,
+            "64": 2658249728.0,
+            "65": 2658249728.0,
+            "66": 2658249728.0,
+            "67": 2658249728.0,
+            "68": 2658249728.0,
+            "69": 2658249728.0,
+            "70": 2658249728.0,
+            "71": 2658249728.0,
+            "72": 2658249728.0,
+            "73": 2658249728.0,
+            "74": 2658249728.0,
+            "75": 2658249728.0,
+            "76": 2658249728.0,
+            "77": 2658249728.0,
+            "78": 2658249728.0,
+            "79": 2658249728.0,
+            "80": 2658249728.0,
+            "81": 2658249728.0,
+            "82": 2658249728.0,
+            "83": 2658249728.0,
+            "84": 2658249728.0,
+            "85": 2658249728.0,
+            "86": 2658249728.0,
+            "87": 2658249728.0,
+            "88": 2658249728.0,
+            "89": 2658249728.0,
+            "90": 2658249728.0,
+            "91": 2658249728.0,
+            "92": 2658249728.0,
+            "93": 2658249728.0,
+            "94": 2658249728.0,
+            "95": 2658249728.0,
+            "96": 2658249728.0,
+            "97": 2658249728.0,
+            "98": 2658249728.0,
+            "99": 2658249728.0,
+            "100": 2658249728.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.21979,
+            "2": 0.23993,
+            "3": 0.20666,
+            "4": 0.20438,
+            "5": 0.18758,
+            "6": 0.18742,
+            "7": 0.35545,
+            "8": 0.19091,
+            "9": 0.18666,
+            "10": 0.18676,
+            "11": 0.18722,
+            "12": 0.18603,
+            "13": 0.18977,
+            "14": 0.18646,
+            "15": 0.18634,
+            "16": 0.18662,
+            "17": 0.1894,
+            "18": 0.18693,
+            "19": 0.18807,
+            "20": 0.18641,
+            "21": 0.18648,
+            "22": 0.18729,
+            "23": 0.18572,
+            "24": 0.18999,
+            "25": 0.18548,
+            "26": 0.1861,
+            "27": 0.18884,
+            "28": 0.18544,
+            "29": 0.18916,
+            "30": 0.18587,
+            "31": 0.18557,
+            "32": 0.1855,
+            "33": 0.18841,
+            "34": 0.18606,
+            "35": 0.18832,
+            "36": 0.18518,
+            "37": 0.37059,
+            "38": 0.18603,
+            "39": 0.18695,
+            "40": 0.18575,
+            "41": 0.18563,
+            "42": 0.1854,
+            "43": 0.18938,
+            "44": 0.18881,
+            "45": 0.18598,
+            "46": 0.18518,
+            "47": 0.18498,
+            "48": 0.18591,
+            "49": 0.44149,
+            "50": 0.18979,
+            "51": 0.19055,
+            "52": 0.18685,
+            "53": 0.18664,
+            "54": 0.1883,
+            "55": 0.18876,
+            "56": 0.18804,
+            "57": 0.19098,
+            "58": 0.1906,
+            "59": 0.18982,
+            "60": 0.19201,
+            "61": 0.18888,
+            "62": 0.18984,
+            "63": 0.19266,
+            "64": 0.19293,
+            "65": 0.19379,
+            "66": 0.1901,
+            "67": 0.18841,
+            "68": 0.19003,
+            "69": 0.18922,
+            "70": 0.19267,
+            "71": 0.1883,
+            "72": 0.18753,
+            "73": 0.18871,
+            "74": 0.18988,
+            "75": 0.18979,
+            "76": 0.18974,
+            "77": 0.18868,
+            "78": 0.19111,
+            "79": 0.19033,
+            "80": 0.18892,
+            "81": 0.19389,
+            "82": 0.18863,
+            "83": 0.1889,
+            "84": 0.19203,
+            "85": 0.18938,
+            "86": 0.19151,
+            "87": 0.18754,
+            "88": 0.18794,
+            "89": 0.18964,
+            "90": 0.1881,
+            "91": 0.19389,
+            "92": 0.19072,
+            "93": 0.18826,
+            "94": 0.18909,
+            "95": 0.19026,
+            "96": 0.1894,
+            "97": 0.18891,
+            "98": 0.18715,
+            "99": 0.18688,
+            "100": 0.1904
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
index 88e3f568e8a..c8c73bdbafc 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
@@ -2,91 +2,286 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 10.86535,
+            "2": 10.85873,
+            "3": 10.86284,
+            "4": 10.84009,
             "5": 10.87856,
+            "6": 10.88856,
+            "7": 10.86532,
+            "8": 10.86017,
+            "9": 10.8599,
             "10": 10.82981,
+            "11": 10.8895,
+            "12": 10.8751,
+            "13": 10.87423,
+            "14": 10.89675,
             "15": 10.82054,
+            "16": 10.82504,
+            "17": 10.78983,
+            "18": 10.81029,
+            "19": 10.80535,
             "20": 10.70398,
+            "21": 10.66993,
+            "22": 10.50643,
+            "23": 10.69004,
+            "24": 10.56314,
             "25": 10.4942,
+            "26": 10.56628,
+            "27": 10.58025,
+            "28": 10.51571,
+            "29": 10.55299,
             "30": 10.30549,
+            "31": 10.02245,
+            "32": 10.40614,
+            "33": 10.39874,
+            "34": 10.13771,
             "35": 10.20184,
+            "36": 10.16052,
+            "37": 10.28973,
+            "38": 10.11474,
+            "39": 10.361,
             "40": 10.01903,
+            "41": 10.07292,
+            "42": 10.14698,
+            "43": 9.74687,
+            "44": 9.87766,
             "45": 9.74966,
+            "46": 9.73383,
+            "47": 10.07535,
+            "48": 9.78068,
+            "49": 9.44784,
             "50": 9.8399
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 653.0,
+            "2": 642.0,
+            "3": 630.0,
+            "4": 585.0,
             "5": 635.0,
+            "6": 687.0,
+            "7": 615.0,
+            "8": 601.0,
+            "9": 607.0,
             "10": 522.0,
+            "11": 637.0,
+            "12": 675.0,
+            "13": 649.0,
+            "14": 648.0,
             "15": 640.0,
+            "16": 602.0,
+            "17": 668.0,
+            "18": 634.0,
+            "19": 593.0,
             "20": 579.0,
+            "21": 633.0,
+            "22": 597.0,
+            "23": 756.0,
+            "24": 612.0,
             "25": 591.0,
+            "26": 620.0,
+            "27": 700.0,
+            "28": 705.0,
+            "29": 795.0,
             "30": 752.0,
+            "31": 628.0,
+            "32": 712.0,
+            "33": 752.0,
+            "34": 737.0,
             "35": 741.0,
+            "36": 770.0,
+            "37": 861.0,
+            "38": 823.0,
+            "39": 812.0,
             "40": 814.0,
+            "41": 826.0,
+            "42": 801.0,
+            "43": 769.0,
+            "44": 822.0,
             "45": 777.0,
+            "46": 828.0,
+            "47": 878.0,
+            "48": 915.0,
+            "49": 908.0,
             "50": 848.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 510165504.0,
-            "5": 510165504.0,
-            "10": 510165504.0,
-            "15": 510165504.0,
-            "20": 510165504.0,
-            "25": 510165504.0,
-            "30": 510165504.0,
-            "35": 510165504.0,
-            "40": 510165504.0,
-            "45": 510165504.0,
-            "50": 510165504.0
+            "1": 510689792.0,
+            "2": 510689792.0,
+            "3": 510689792.0,
+            "4": 510689792.0,
+            "5": 510689792.0,
+            "6": 510689792.0,
+            "7": 510689792.0,
+            "8": 510689792.0,
+            "9": 510689792.0,
+            "10": 510689792.0,
+            "11": 510689792.0,
+            "12": 510689792.0,
+            "13": 510689792.0,
+            "14": 510689792.0,
+            "15": 510689792.0,
+            "16": 510689792.0,
+            "17": 510689792.0,
+            "18": 510689792.0,
+            "19": 510689792.0,
+            "20": 510689792.0,
+            "21": 510689792.0,
+            "22": 510689792.0,
+            "23": 510689792.0,
+            "24": 510689792.0,
+            "25": 510689792.0,
+            "26": 510689792.0,
+            "27": 510689792.0,
+            "28": 510689792.0,
+            "29": 510689792.0,
+            "30": 510689792.0,
+            "31": 510689792.0,
+            "32": 510689792.0,
+            "33": 510689792.0,
+            "34": 510689792.0,
+            "35": 510689792.0,
+            "36": 510689792.0,
+            "37": 510689792.0,
+            "38": 510689792.0,
+            "39": 510689792.0,
+            "40": 510689792.0,
+            "41": 510689792.0,
+            "42": 510689792.0,
+            "43": 510689792.0,
+            "44": 510689792.0,
+            "45": 510689792.0,
+            "46": 510689792.0,
+            "47": 510689792.0,
+            "48": 510689792.0,
+            "49": 510689792.0,
+            "50": 510689792.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 755704320.0,
-            "5": 933680128.0,
-            "10": 933680640.0,
-            "15": 933680640.0,
-            "20": 933680640.0,
-            "25": 933680640.0,
-            "30": 933680640.0,
-            "35": 933680640.0,
-            "40": 933680640.0,
-            "45": 933680640.0,
-            "50": 933680640.0
+            "1": 759895552.0,
+            "2": 933156352.0,
+            "3": 933156352.0,
+            "4": 933156352.0,
+            "5": 933156352.0,
+            "6": 933156352.0,
+            "7": 933156352.0,
+            "8": 933156352.0,
+            "9": 933156352.0,
+            "10": 933156352.0,
+            "11": 933156352.0,
+            "12": 933156352.0,
+            "13": 933156352.0,
+            "14": 933156352.0,
+            "15": 933156352.0,
+            "16": 933156352.0,
+            "17": 933156352.0,
+            "18": 933156352.0,
+            "19": 933156352.0,
+            "20": 933156352.0,
+            "21": 933156352.0,
+            "22": 933156352.0,
+            "23": 933156352.0,
+            "24": 933156352.0,
+            "25": 933156352.0,
+            "26": 933156352.0,
+            "27": 933156352.0,
+            "28": 933156352.0,
+            "29": 933156352.0,
+            "30": 933156352.0,
+            "31": 933156352.0,
+            "32": 933156352.0,
+            "33": 933156352.0,
+            "34": 933156352.0,
+            "35": 933156352.0,
+            "36": 933156352.0,
+            "37": 933156352.0,
+            "38": 933156352.0,
+            "39": 933156352.0,
+            "40": 933156352.0,
+            "41": 933156352.0,
+            "42": 933156352.0,
+            "43": 933156352.0,
+            "44": 933156352.0,
+            "45": 933156352.0,
+            "46": 933156352.0,
+            "47": 933156352.0,
+            "48": 933156352.0,
+            "49": 933156352.0,
+            "50": 933156352.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 14.27411,
-            "5": 0.27049,
-            "10": 0.2735,
-            "15": 0.2699,
-            "20": 0.28311,
-            "25": 0.28368,
-            "30": 0.28623,
-            "35": 0.28201,
-            "40": 0.27349,
-            "45": 0.28,
-            "50": 0.28987
+            "1": 16.50426,
+            "2": 0.36653,
+            "3": 0.34466,
+            "4": 0.34777,
+            "5": 0.33341,
+            "6": 0.3232,
+            "7": 0.32752,
+            "8": 0.32335,
+            "9": 0.32468,
+            "10": 0.32504,
+            "11": 0.32396,
+            "12": 0.32512,
+            "13": 0.32567,
+            "14": 0.32353,
+            "15": 0.31982,
+            "16": 0.3257,
+            "17": 0.32525,
+            "18": 0.32037,
+            "19": 0.32059,
+            "20": 0.32739,
+            "21": 0.32382,
+            "22": 0.32191,
+            "23": 0.3644,
+            "24": 0.35527,
+            "25": 0.32169,
+            "26": 0.3265,
+            "27": 0.3207,
+            "28": 0.31972,
+            "29": 0.32327,
+            "30": 0.31924,
+            "31": 0.32108,
+            "32": 0.32626,
+            "33": 0.31775,
+            "34": 0.31872,
+            "35": 0.32546,
+            "36": 0.317,
+            "37": 0.31972,
+            "38": 0.32263,
+            "39": 0.32037,
+            "40": 0.32326,
+            "41": 0.32505,
+            "42": 0.3215,
+            "43": 0.31898,
+            "44": 0.32895,
+            "45": 0.32343,
+            "46": 0.3229,
+            "47": 0.32813,
+            "48": 0.32454,
+            "49": 0.31943,
+            "50": 0.32434
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..88252ac05b0
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.86535,
+            "2": 10.85873,
+            "3": 10.86284,
+            "4": 10.84009,
+            "5": 10.87856,
+            "6": 10.88856,
+            "7": 10.86532,
+            "8": 10.86017,
+            "9": 10.8599,
+            "10": 10.82981,
+            "11": 10.8895,
+            "12": 10.8751,
+            "13": 10.87423,
+            "14": 10.89675,
+            "15": 10.82054,
+            "16": 10.82504,
+            "17": 10.78983,
+            "18": 10.81029,
+            "19": 10.80535,
+            "20": 10.70398,
+            "21": 10.66993,
+            "22": 10.50643,
+            "23": 10.69004,
+            "24": 10.56314,
+            "25": 10.4942,
+            "26": 10.56628,
+            "27": 10.58025,
+            "28": 10.51571,
+            "29": 10.55299,
+            "30": 10.30549,
+            "31": 10.02245,
+            "32": 10.40614,
+            "33": 10.39874,
+            "34": 10.13771,
+            "35": 10.20184,
+            "36": 10.16052,
+            "37": 10.28973,
+            "38": 10.11474,
+            "39": 10.361,
+            "40": 10.01903,
+            "41": 10.07292,
+            "42": 10.14698,
+            "43": 9.74687,
+            "44": 9.87766,
+            "45": 9.74966,
+            "46": 9.73383,
+            "47": 10.07535,
+            "48": 9.78068,
+            "49": 9.44784,
+            "50": 9.8399
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 653.0,
+            "2": 642.0,
+            "3": 630.0,
+            "4": 585.0,
+            "5": 635.0,
+            "6": 687.0,
+            "7": 615.0,
+            "8": 601.0,
+            "9": 607.0,
+            "10": 522.0,
+            "11": 637.0,
+            "12": 675.0,
+            "13": 649.0,
+            "14": 648.0,
+            "15": 640.0,
+            "16": 602.0,
+            "17": 668.0,
+            "18": 634.0,
+            "19": 593.0,
+            "20": 579.0,
+            "21": 633.0,
+            "22": 597.0,
+            "23": 756.0,
+            "24": 612.0,
+            "25": 591.0,
+            "26": 620.0,
+            "27": 700.0,
+            "28": 705.0,
+            "29": 795.0,
+            "30": 752.0,
+            "31": 628.0,
+            "32": 712.0,
+            "33": 752.0,
+            "34": 737.0,
+            "35": 741.0,
+            "36": 770.0,
+            "37": 861.0,
+            "38": 823.0,
+            "39": 812.0,
+            "40": 814.0,
+            "41": 826.0,
+            "42": 801.0,
+            "43": 769.0,
+            "44": 822.0,
+            "45": 777.0,
+            "46": 828.0,
+            "47": 878.0,
+            "48": 915.0,
+            "49": 908.0,
+            "50": 848.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 510689792.0,
+            "2": 510689792.0,
+            "3": 510689792.0,
+            "4": 510689792.0,
+            "5": 510689792.0,
+            "6": 510689792.0,
+            "7": 510689792.0,
+            "8": 510689792.0,
+            "9": 510689792.0,
+            "10": 510689792.0,
+            "11": 510689792.0,
+            "12": 510689792.0,
+            "13": 510689792.0,
+            "14": 510689792.0,
+            "15": 510689792.0,
+            "16": 510689792.0,
+            "17": 510689792.0,
+            "18": 510689792.0,
+            "19": 510689792.0,
+            "20": 510689792.0,
+            "21": 510689792.0,
+            "22": 510689792.0,
+            "23": 510689792.0,
+            "24": 510689792.0,
+            "25": 510689792.0,
+            "26": 510689792.0,
+            "27": 510689792.0,
+            "28": 510689792.0,
+            "29": 510689792.0,
+            "30": 510689792.0,
+            "31": 510689792.0,
+            "32": 510689792.0,
+            "33": 510689792.0,
+            "34": 510689792.0,
+            "35": 510689792.0,
+            "36": 510689792.0,
+            "37": 510689792.0,
+            "38": 510689792.0,
+            "39": 510689792.0,
+            "40": 510689792.0,
+            "41": 510689792.0,
+            "42": 510689792.0,
+            "43": 510689792.0,
+            "44": 510689792.0,
+            "45": 510689792.0,
+            "46": 510689792.0,
+            "47": 510689792.0,
+            "48": 510689792.0,
+            "49": 510689792.0,
+            "50": 510689792.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 759898624.0,
+            "2": 933156352.0,
+            "3": 933156352.0,
+            "4": 933156352.0,
+            "5": 933156352.0,
+            "6": 933156352.0,
+            "7": 933156352.0,
+            "8": 933156352.0,
+            "9": 933156352.0,
+            "10": 933156352.0,
+            "11": 933156352.0,
+            "12": 933156352.0,
+            "13": 933156352.0,
+            "14": 933156352.0,
+            "15": 933156352.0,
+            "16": 933156352.0,
+            "17": 933156352.0,
+            "18": 933156352.0,
+            "19": 933156352.0,
+            "20": 933156352.0,
+            "21": 933156352.0,
+            "22": 933156352.0,
+            "23": 933156352.0,
+            "24": 933156352.0,
+            "25": 933156352.0,
+            "26": 933156352.0,
+            "27": 933156352.0,
+            "28": 933156352.0,
+            "29": 933156352.0,
+            "30": 933156352.0,
+            "31": 933156352.0,
+            "32": 933156352.0,
+            "33": 933156352.0,
+            "34": 933156352.0,
+            "35": 933156352.0,
+            "36": 933156352.0,
+            "37": 933156352.0,
+            "38": 933156352.0,
+            "39": 933156352.0,
+            "40": 933156352.0,
+            "41": 933156352.0,
+            "42": 933156352.0,
+            "43": 933156352.0,
+            "44": 933156352.0,
+            "45": 933156352.0,
+            "46": 933156352.0,
+            "47": 933156352.0,
+            "48": 933156352.0,
+            "49": 933156352.0,
+            "50": 933156352.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 17.48733,
+            "2": 0.32636,
+            "3": 0.28113,
+            "4": 0.28069,
+            "5": 0.28063,
+            "6": 0.28085,
+            "7": 0.27912,
+            "8": 0.27833,
+            "9": 0.27983,
+            "10": 0.28235,
+            "11": 0.28033,
+            "12": 0.27634,
+            "13": 0.27743,
+            "14": 0.27968,
+            "15": 0.27741,
+            "16": 0.27901,
+            "17": 0.27898,
+            "18": 0.28259,
+            "19": 0.27738,
+            "20": 0.27602,
+            "21": 0.27999,
+            "22": 0.27615,
+            "23": 0.27868,
+            "24": 0.27928,
+            "25": 0.27684,
+            "26": 0.27875,
+            "27": 0.27628,
+            "28": 0.28571,
+            "29": 0.27681,
+            "30": 0.28404,
+            "31": 0.28086,
+            "32": 0.28479,
+            "33": 0.28538,
+            "34": 0.28086,
+            "35": 0.28036,
+            "36": 0.28227,
+            "37": 0.28585,
+            "38": 0.28963,
+            "39": 0.28114,
+            "40": 0.28277,
+            "41": 0.28191,
+            "42": 0.28102,
+            "43": 0.29373,
+            "44": 0.2876,
+            "45": 0.27991,
+            "46": 0.27977,
+            "47": 0.28135,
+            "48": 0.28282,
+            "49": 0.28275,
+            "50": 0.28218
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..f2adbef4530
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.86535,
+            "2": 10.85873,
+            "3": 10.86284,
+            "4": 10.84009,
+            "5": 10.87856,
+            "6": 10.88856,
+            "7": 10.86532,
+            "8": 10.86017,
+            "9": 10.8599,
+            "10": 10.82981,
+            "11": 10.8895,
+            "12": 10.8751,
+            "13": 10.87423,
+            "14": 10.89675,
+            "15": 10.82054,
+            "16": 10.82504,
+            "17": 10.78983,
+            "18": 10.81029,
+            "19": 10.80535,
+            "20": 10.70398,
+            "21": 10.66993,
+            "22": 10.50643,
+            "23": 10.69004,
+            "24": 10.56314,
+            "25": 10.4942,
+            "26": 10.56628,
+            "27": 10.58025,
+            "28": 10.51571,
+            "29": 10.55299,
+            "30": 10.30549,
+            "31": 10.02245,
+            "32": 10.40614,
+            "33": 10.39874,
+            "34": 10.13771,
+            "35": 10.20184,
+            "36": 10.16052,
+            "37": 10.28973,
+            "38": 10.11474,
+            "39": 10.361,
+            "40": 10.01903,
+            "41": 10.07292,
+            "42": 10.14698,
+            "43": 9.74687,
+            "44": 9.87766,
+            "45": 9.74966,
+            "46": 9.73383,
+            "47": 10.07535,
+            "48": 9.78068,
+            "49": 9.44784,
+            "50": 9.8399
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 653.0,
+            "2": 642.0,
+            "3": 630.0,
+            "4": 585.0,
+            "5": 635.0,
+            "6": 687.0,
+            "7": 615.0,
+            "8": 601.0,
+            "9": 607.0,
+            "10": 522.0,
+            "11": 637.0,
+            "12": 675.0,
+            "13": 649.0,
+            "14": 648.0,
+            "15": 640.0,
+            "16": 602.0,
+            "17": 668.0,
+            "18": 634.0,
+            "19": 593.0,
+            "20": 579.0,
+            "21": 633.0,
+            "22": 597.0,
+            "23": 756.0,
+            "24": 612.0,
+            "25": 591.0,
+            "26": 620.0,
+            "27": 700.0,
+            "28": 705.0,
+            "29": 795.0,
+            "30": 752.0,
+            "31": 628.0,
+            "32": 712.0,
+            "33": 752.0,
+            "34": 737.0,
+            "35": 741.0,
+            "36": 770.0,
+            "37": 861.0,
+            "38": 823.0,
+            "39": 812.0,
+            "40": 814.0,
+            "41": 826.0,
+            "42": 801.0,
+            "43": 769.0,
+            "44": 822.0,
+            "45": 777.0,
+            "46": 828.0,
+            "47": 878.0,
+            "48": 915.0,
+            "49": 908.0,
+            "50": 848.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 510689792.0,
+            "2": 510689792.0,
+            "3": 510689792.0,
+            "4": 510689792.0,
+            "5": 510689792.0,
+            "6": 510689792.0,
+            "7": 510689792.0,
+            "8": 510689792.0,
+            "9": 510689792.0,
+            "10": 510689792.0,
+            "11": 510689792.0,
+            "12": 510689792.0,
+            "13": 510689792.0,
+            "14": 510689792.0,
+            "15": 510689792.0,
+            "16": 510689792.0,
+            "17": 510689792.0,
+            "18": 510689792.0,
+            "19": 510689792.0,
+            "20": 510689792.0,
+            "21": 510689792.0,
+            "22": 510689792.0,
+            "23": 510689792.0,
+            "24": 510689792.0,
+            "25": 510689792.0,
+            "26": 510689792.0,
+            "27": 510689792.0,
+            "28": 510689792.0,
+            "29": 510689792.0,
+            "30": 510689792.0,
+            "31": 510689792.0,
+            "32": 510689792.0,
+            "33": 510689792.0,
+            "34": 510689792.0,
+            "35": 510689792.0,
+            "36": 510689792.0,
+            "37": 510689792.0,
+            "38": 510689792.0,
+            "39": 510689792.0,
+            "40": 510689792.0,
+            "41": 510689792.0,
+            "42": 510689792.0,
+            "43": 510689792.0,
+            "44": 510689792.0,
+            "45": 510689792.0,
+            "46": 510689792.0,
+            "47": 510689792.0,
+            "48": 510689792.0,
+            "49": 510689792.0,
+            "50": 510689792.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 759898624.0,
+            "2": 933156352.0,
+            "3": 933156352.0,
+            "4": 933156352.0,
+            "5": 933156352.0,
+            "6": 933156352.0,
+            "7": 933156352.0,
+            "8": 933156352.0,
+            "9": 933156352.0,
+            "10": 933156352.0,
+            "11": 933156352.0,
+            "12": 933156352.0,
+            "13": 933156352.0,
+            "14": 933156352.0,
+            "15": 933156352.0,
+            "16": 933156352.0,
+            "17": 933156352.0,
+            "18": 933156352.0,
+            "19": 933156352.0,
+            "20": 933156352.0,
+            "21": 933156352.0,
+            "22": 933156352.0,
+            "23": 933156352.0,
+            "24": 933156352.0,
+            "25": 933156352.0,
+            "26": 933156352.0,
+            "27": 933156352.0,
+            "28": 933156352.0,
+            "29": 933156352.0,
+            "30": 933156352.0,
+            "31": 933156352.0,
+            "32": 933156352.0,
+            "33": 933156352.0,
+            "34": 933156352.0,
+            "35": 933156352.0,
+            "36": 933156352.0,
+            "37": 933156352.0,
+            "38": 933156352.0,
+            "39": 933156352.0,
+            "40": 933156352.0,
+            "41": 933156352.0,
+            "42": 933156352.0,
+            "43": 933156352.0,
+            "44": 933156352.0,
+            "45": 933156352.0,
+            "46": 933156352.0,
+            "47": 933156352.0,
+            "48": 933156352.0,
+            "49": 933156352.0,
+            "50": 933156352.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 16.72434,
+            "2": 0.40342,
+            "3": 0.32477,
+            "4": 0.32459,
+            "5": 0.32511,
+            "6": 0.32478,
+            "7": 0.32469,
+            "8": 0.32479,
+            "9": 0.32229,
+            "10": 0.32534,
+            "11": 0.32568,
+            "12": 0.32325,
+            "13": 0.3234,
+            "14": 0.32735,
+            "15": 0.32264,
+            "16": 0.32664,
+            "17": 0.32289,
+            "18": 0.32328,
+            "19": 0.32997,
+            "20": 0.32955,
+            "21": 0.32699,
+            "22": 0.3292,
+            "23": 0.32982,
+            "24": 0.32452,
+            "25": 0.32644,
+            "26": 0.32596,
+            "27": 0.32426,
+            "28": 0.32527,
+            "29": 0.32409,
+            "30": 0.32549,
+            "31": 0.32259,
+            "32": 0.32488,
+            "33": 0.32331,
+            "34": 0.3242,
+            "35": 0.3261,
+            "36": 0.32048,
+            "37": 0.32127,
+            "38": 0.32479,
+            "39": 0.32338,
+            "40": 0.32137,
+            "41": 0.32292,
+            "42": 0.32202,
+            "43": 0.32321,
+            "44": 0.32105,
+            "45": 0.32265,
+            "46": 0.32148,
+            "47": 0.32443,
+            "48": 0.32158,
+            "49": 0.32089,
+            "50": 0.32389
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
index 386d5fed474..67aa60490cf 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
@@ -2,91 +2,286 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 10.86535,
-            "5": 10.87853,
-            "10": 10.82982,
-            "15": 10.82054,
-            "20": 10.704,
-            "25": 10.49417,
-            "30": 10.30549,
-            "35": 10.20186,
-            "40": 10.01901,
-            "45": 9.74963,
-            "50": 9.8399
+            "2": 10.85873,
+            "3": 10.86284,
+            "4": 10.84007,
+            "5": 10.87855,
+            "6": 10.88852,
+            "7": 10.86534,
+            "8": 10.86018,
+            "9": 10.85988,
+            "10": 10.8298,
+            "11": 10.88947,
+            "12": 10.87509,
+            "13": 10.87426,
+            "14": 10.89675,
+            "15": 10.82058,
+            "16": 10.82501,
+            "17": 10.78981,
+            "18": 10.81029,
+            "19": 10.80531,
+            "20": 10.70396,
+            "21": 10.66991,
+            "22": 10.5064,
+            "23": 10.69006,
+            "24": 10.56312,
+            "25": 10.49419,
+            "26": 10.56627,
+            "27": 10.58024,
+            "28": 10.51573,
+            "29": 10.55298,
+            "30": 10.30548,
+            "31": 10.02248,
+            "32": 10.40615,
+            "33": 10.39876,
+            "34": 10.13771,
+            "35": 10.20187,
+            "36": 10.16047,
+            "37": 10.28972,
+            "38": 10.11475,
+            "39": 10.36102,
+            "40": 10.01904,
+            "41": 10.07293,
+            "42": 10.14696,
+            "43": 9.74687,
+            "44": 9.87765,
+            "45": 9.74966,
+            "46": 9.73379,
+            "47": 10.07533,
+            "48": 9.78071,
+            "49": 9.44786,
+            "50": 9.83991
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 565.0,
-            "5": 634.0,
-            "10": 570.0,
-            "15": 645.0,
-            "20": 616.0,
-            "25": 577.0,
-            "30": 747.0,
-            "35": 760.0,
-            "40": 765.0,
-            "45": 838.0,
-            "50": 895.0
+            "1": 594.0,
+            "2": 641.0,
+            "3": 677.0,
+            "4": 648.0,
+            "5": 645.0,
+            "6": 681.0,
+            "7": 639.0,
+            "8": 590.0,
+            "9": 648.0,
+            "10": 519.0,
+            "11": 703.0,
+            "12": 589.0,
+            "13": 650.0,
+            "14": 706.0,
+            "15": 675.0,
+            "16": 652.0,
+            "17": 685.0,
+            "18": 596.0,
+            "19": 672.0,
+            "20": 667.0,
+            "21": 650.0,
+            "22": 656.0,
+            "23": 706.0,
+            "24": 595.0,
+            "25": 593.0,
+            "26": 595.0,
+            "27": 685.0,
+            "28": 756.0,
+            "29": 674.0,
+            "30": 743.0,
+            "31": 612.0,
+            "32": 723.0,
+            "33": 778.0,
+            "34": 695.0,
+            "35": 716.0,
+            "36": 683.0,
+            "37": 805.0,
+            "38": 756.0,
+            "39": 850.0,
+            "40": 822.0,
+            "41": 870.0,
+            "42": 767.0,
+            "43": 747.0,
+            "44": 798.0,
+            "45": 782.0,
+            "46": 891.0,
+            "47": 887.0,
+            "48": 898.0,
+            "49": 890.0,
+            "50": 881.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 510689792.0,
+            "2": 510689792.0,
+            "3": 510689792.0,
+            "4": 510689792.0,
             "5": 510689792.0,
+            "6": 510689792.0,
+            "7": 510689792.0,
+            "8": 510689792.0,
+            "9": 510689792.0,
             "10": 510689792.0,
+            "11": 510689792.0,
+            "12": 510689792.0,
+            "13": 510689792.0,
+            "14": 510689792.0,
             "15": 510689792.0,
+            "16": 510689792.0,
+            "17": 510689792.0,
+            "18": 510689792.0,
+            "19": 510689792.0,
             "20": 510689792.0,
+            "21": 510689792.0,
+            "22": 510689792.0,
+            "23": 510689792.0,
+            "24": 510689792.0,
             "25": 510689792.0,
+            "26": 510689792.0,
+            "27": 510689792.0,
+            "28": 510689792.0,
+            "29": 510689792.0,
             "30": 510689792.0,
+            "31": 510689792.0,
+            "32": 510689792.0,
+            "33": 510689792.0,
+            "34": 510689792.0,
             "35": 510689792.0,
+            "36": 510689792.0,
+            "37": 510689792.0,
+            "38": 510689792.0,
+            "39": 510689792.0,
             "40": 510689792.0,
+            "41": 510689792.0,
+            "42": 510689792.0,
+            "43": 510689792.0,
+            "44": 510689792.0,
             "45": 510689792.0,
+            "46": 510689792.0,
+            "47": 510689792.0,
+            "48": 510689792.0,
+            "49": 510689792.0,
             "50": 510689792.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 759895552.0,
+            "2": 933156352.0,
+            "3": 933156352.0,
+            "4": 933156352.0,
             "5": 933156352.0,
+            "6": 933156352.0,
+            "7": 933156352.0,
+            "8": 933156352.0,
+            "9": 933156352.0,
             "10": 933156352.0,
+            "11": 933156352.0,
+            "12": 933156352.0,
+            "13": 933156352.0,
+            "14": 933156352.0,
             "15": 933156352.0,
+            "16": 933156352.0,
+            "17": 933156352.0,
+            "18": 933156352.0,
+            "19": 933156352.0,
             "20": 933156352.0,
+            "21": 933156352.0,
+            "22": 933156352.0,
+            "23": 933156352.0,
+            "24": 933156352.0,
             "25": 933156352.0,
+            "26": 933156352.0,
+            "27": 933156352.0,
+            "28": 933156352.0,
+            "29": 933156352.0,
             "30": 933156352.0,
+            "31": 933156352.0,
+            "32": 933156352.0,
+            "33": 933156352.0,
+            "34": 933156352.0,
             "35": 933156352.0,
+            "36": 933156352.0,
+            "37": 933156352.0,
+            "38": 933156352.0,
+            "39": 933156352.0,
             "40": 933156352.0,
+            "41": 933156352.0,
+            "42": 933156352.0,
+            "43": 933156352.0,
+            "44": 933156352.0,
             "45": 933156352.0,
-            "50": 934202368.0
+            "46": 933156352.0,
+            "47": 933156352.0,
+            "48": 933156352.0,
+            "49": 933156352.0,
+            "50": 933156352.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 18.05689,
-            "5": 0.28787,
-            "10": 0.2889,
-            "15": 0.28608,
-            "20": 0.28427,
-            "25": 0.29621,
-            "30": 0.28048,
-            "35": 0.2827,
-            "40": 0.28468,
-            "45": 0.27947,
-            "50": 0.30286
+            "1": 16.5651,
+            "2": 0.34314,
+            "3": 0.32308,
+            "4": 0.32445,
+            "5": 0.33098,
+            "6": 0.32202,
+            "7": 0.32251,
+            "8": 0.32355,
+            "9": 0.32346,
+            "10": 0.31687,
+            "11": 0.32105,
+            "12": 0.32381,
+            "13": 0.32098,
+            "14": 0.32322,
+            "15": 0.31579,
+            "16": 0.31699,
+            "17": 0.32307,
+            "18": 0.32662,
+            "19": 0.33548,
+            "20": 0.32088,
+            "21": 0.32691,
+            "22": 0.32206,
+            "23": 0.32261,
+            "24": 0.32621,
+            "25": 0.32403,
+            "26": 0.32368,
+            "27": 0.32665,
+            "28": 0.32924,
+            "29": 0.32322,
+            "30": 0.32903,
+            "31": 0.32199,
+            "32": 0.32034,
+            "33": 0.32453,
+            "34": 0.32691,
+            "35": 0.32014,
+            "36": 0.3206,
+            "37": 0.31874,
+            "38": 0.32448,
+            "39": 0.32813,
+            "40": 0.32242,
+            "41": 0.32196,
+            "42": 0.32843,
+            "43": 0.32328,
+            "44": 0.32049,
+            "45": 0.3265,
+            "46": 0.31996,
+            "47": 0.32173,
+            "48": 0.323,
+            "49": 0.32398,
+            "50": 0.3329
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..303a87c0069
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.86535,
+            "2": 10.85873,
+            "3": 10.86281,
+            "4": 10.8401,
+            "5": 10.87858,
+            "6": 10.88853,
+            "7": 10.86535,
+            "8": 10.86017,
+            "9": 10.8599,
+            "10": 10.82979,
+            "11": 10.88945,
+            "12": 10.87509,
+            "13": 10.87423,
+            "14": 10.89675,
+            "15": 10.8205,
+            "16": 10.825,
+            "17": 10.78982,
+            "18": 10.81028,
+            "19": 10.80532,
+            "20": 10.70394,
+            "21": 10.66988,
+            "22": 10.50642,
+            "23": 10.69005,
+            "24": 10.56311,
+            "25": 10.49417,
+            "26": 10.56628,
+            "27": 10.58023,
+            "28": 10.5157,
+            "29": 10.55296,
+            "30": 10.30548,
+            "31": 10.02248,
+            "32": 10.40617,
+            "33": 10.39875,
+            "34": 10.13774,
+            "35": 10.20186,
+            "36": 10.16048,
+            "37": 10.28974,
+            "38": 10.1148,
+            "39": 10.36104,
+            "40": 10.01904,
+            "41": 10.07288,
+            "42": 10.14695,
+            "43": 9.74684,
+            "44": 9.87761,
+            "45": 9.74967,
+            "46": 9.73383,
+            "47": 10.07539,
+            "48": 9.78069,
+            "49": 9.44781,
+            "50": 9.83988
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 593.0,
+            "2": 628.0,
+            "3": 611.0,
+            "4": 628.0,
+            "5": 651.0,
+            "6": 650.0,
+            "7": 630.0,
+            "8": 551.0,
+            "9": 708.0,
+            "10": 508.0,
+            "11": 656.0,
+            "12": 633.0,
+            "13": 683.0,
+            "14": 683.0,
+            "15": 633.0,
+            "16": 614.0,
+            "17": 628.0,
+            "18": 626.0,
+            "19": 574.0,
+            "20": 620.0,
+            "21": 684.0,
+            "22": 598.0,
+            "23": 752.0,
+            "24": 593.0,
+            "25": 549.0,
+            "26": 607.0,
+            "27": 661.0,
+            "28": 739.0,
+            "29": 699.0,
+            "30": 728.0,
+            "31": 571.0,
+            "32": 695.0,
+            "33": 761.0,
+            "34": 670.0,
+            "35": 708.0,
+            "36": 677.0,
+            "37": 861.0,
+            "38": 768.0,
+            "39": 836.0,
+            "40": 789.0,
+            "41": 818.0,
+            "42": 853.0,
+            "43": 774.0,
+            "44": 800.0,
+            "45": 743.0,
+            "46": 832.0,
+            "47": 902.0,
+            "48": 827.0,
+            "49": 914.0,
+            "50": 878.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 510689792.0,
+            "2": 510689792.0,
+            "3": 510689792.0,
+            "4": 510689792.0,
+            "5": 510689792.0,
+            "6": 510689792.0,
+            "7": 510689792.0,
+            "8": 510689792.0,
+            "9": 510689792.0,
+            "10": 510689792.0,
+            "11": 510689792.0,
+            "12": 510689792.0,
+            "13": 510689792.0,
+            "14": 510689792.0,
+            "15": 510689792.0,
+            "16": 510689792.0,
+            "17": 510689792.0,
+            "18": 510689792.0,
+            "19": 510689792.0,
+            "20": 510689792.0,
+            "21": 510689792.0,
+            "22": 510689792.0,
+            "23": 510689792.0,
+            "24": 510689792.0,
+            "25": 510689792.0,
+            "26": 510689792.0,
+            "27": 510689792.0,
+            "28": 510689792.0,
+            "29": 510689792.0,
+            "30": 510689792.0,
+            "31": 510689792.0,
+            "32": 510689792.0,
+            "33": 510689792.0,
+            "34": 510689792.0,
+            "35": 510689792.0,
+            "36": 510689792.0,
+            "37": 510689792.0,
+            "38": 510689792.0,
+            "39": 510689792.0,
+            "40": 510689792.0,
+            "41": 510689792.0,
+            "42": 510689792.0,
+            "43": 510689792.0,
+            "44": 510689792.0,
+            "45": 510689792.0,
+            "46": 510689792.0,
+            "47": 510689792.0,
+            "48": 510689792.0,
+            "49": 510689792.0,
+            "50": 510689792.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 759895552.0,
+            "2": 933156352.0,
+            "3": 933156352.0,
+            "4": 933156352.0,
+            "5": 933156352.0,
+            "6": 933156352.0,
+            "7": 933156352.0,
+            "8": 933156352.0,
+            "9": 933156352.0,
+            "10": 933156352.0,
+            "11": 933156352.0,
+            "12": 933156352.0,
+            "13": 933156352.0,
+            "14": 933156352.0,
+            "15": 933156352.0,
+            "16": 933156352.0,
+            "17": 933156352.0,
+            "18": 933156352.0,
+            "19": 933156352.0,
+            "20": 933156352.0,
+            "21": 933156352.0,
+            "22": 933156352.0,
+            "23": 933156352.0,
+            "24": 933156352.0,
+            "25": 933156352.0,
+            "26": 933156352.0,
+            "27": 933156352.0,
+            "28": 933156352.0,
+            "29": 933156352.0,
+            "30": 933156352.0,
+            "31": 933156352.0,
+            "32": 933156352.0,
+            "33": 933156352.0,
+            "34": 933156352.0,
+            "35": 933156352.0,
+            "36": 933156352.0,
+            "37": 933156352.0,
+            "38": 933156352.0,
+            "39": 933156352.0,
+            "40": 933156352.0,
+            "41": 933156352.0,
+            "42": 933156352.0,
+            "43": 933156352.0,
+            "44": 933156352.0,
+            "45": 933156352.0,
+            "46": 933156352.0,
+            "47": 933156352.0,
+            "48": 933156352.0,
+            "49": 933156352.0,
+            "50": 933156352.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 14.98198,
+            "2": 0.32508,
+            "3": 0.27859,
+            "4": 0.28973,
+            "5": 0.28871,
+            "6": 0.28743,
+            "7": 0.28586,
+            "8": 0.28626,
+            "9": 0.28734,
+            "10": 0.28834,
+            "11": 0.29037,
+            "12": 0.29031,
+            "13": 0.27847,
+            "14": 0.28002,
+            "15": 0.28617,
+            "16": 0.28603,
+            "17": 0.28309,
+            "18": 0.28753,
+            "19": 0.34589,
+            "20": 0.28022,
+            "21": 0.28261,
+            "22": 0.28865,
+            "23": 0.28869,
+            "24": 0.2851,
+            "25": 0.28458,
+            "26": 0.28706,
+            "27": 0.28515,
+            "28": 0.29088,
+            "29": 0.28891,
+            "30": 0.28446,
+            "31": 0.28444,
+            "32": 0.28347,
+            "33": 0.28941,
+            "34": 0.28783,
+            "35": 0.28386,
+            "36": 0.28238,
+            "37": 0.28325,
+            "38": 0.28579,
+            "39": 0.29406,
+            "40": 0.28819,
+            "41": 0.29033,
+            "42": 0.28815,
+            "43": 0.2919,
+            "44": 0.2895,
+            "45": 0.28613,
+            "46": 0.28704,
+            "47": 0.29081,
+            "48": 0.29057,
+            "49": 0.2897,
+            "50": 0.28865
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..a74ab8d8415
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.86535,
+            "2": 10.85873,
+            "3": 10.86281,
+            "4": 10.84011,
+            "5": 10.87855,
+            "6": 10.88849,
+            "7": 10.86536,
+            "8": 10.86016,
+            "9": 10.85987,
+            "10": 10.82979,
+            "11": 10.88946,
+            "12": 10.87508,
+            "13": 10.87423,
+            "14": 10.89679,
+            "15": 10.82052,
+            "16": 10.825,
+            "17": 10.78984,
+            "18": 10.81026,
+            "19": 10.80535,
+            "20": 10.70395,
+            "21": 10.66988,
+            "22": 10.50641,
+            "23": 10.69004,
+            "24": 10.56309,
+            "25": 10.49417,
+            "26": 10.56626,
+            "27": 10.58024,
+            "28": 10.51572,
+            "29": 10.55294,
+            "30": 10.30552,
+            "31": 10.02243,
+            "32": 10.40616,
+            "33": 10.39875,
+            "34": 10.13772,
+            "35": 10.20189,
+            "36": 10.16048,
+            "37": 10.28972,
+            "38": 10.11479,
+            "39": 10.361,
+            "40": 10.01902,
+            "41": 10.07292,
+            "42": 10.14694,
+            "43": 9.74686,
+            "44": 9.87768,
+            "45": 9.74966,
+            "46": 9.7338,
+            "47": 10.07535,
+            "48": 9.7807,
+            "49": 9.44783,
+            "50": 9.83991
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 600.0,
+            "2": 620.0,
+            "3": 606.0,
+            "4": 684.0,
+            "5": 647.0,
+            "6": 679.0,
+            "7": 630.0,
+            "8": 568.0,
+            "9": 627.0,
+            "10": 519.0,
+            "11": 635.0,
+            "12": 640.0,
+            "13": 677.0,
+            "14": 631.0,
+            "15": 668.0,
+            "16": 666.0,
+            "17": 671.0,
+            "18": 623.0,
+            "19": 658.0,
+            "20": 639.0,
+            "21": 624.0,
+            "22": 614.0,
+            "23": 741.0,
+            "24": 607.0,
+            "25": 636.0,
+            "26": 639.0,
+            "27": 689.0,
+            "28": 751.0,
+            "29": 724.0,
+            "30": 771.0,
+            "31": 564.0,
+            "32": 750.0,
+            "33": 765.0,
+            "34": 693.0,
+            "35": 737.0,
+            "36": 754.0,
+            "37": 807.0,
+            "38": 786.0,
+            "39": 879.0,
+            "40": 737.0,
+            "41": 817.0,
+            "42": 857.0,
+            "43": 709.0,
+            "44": 808.0,
+            "45": 795.0,
+            "46": 837.0,
+            "47": 879.0,
+            "48": 899.0,
+            "49": 890.0,
+            "50": 860.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 510689792.0,
+            "2": 510689792.0,
+            "3": 510689792.0,
+            "4": 510689792.0,
+            "5": 510689792.0,
+            "6": 510689792.0,
+            "7": 510689792.0,
+            "8": 510689792.0,
+            "9": 510689792.0,
+            "10": 510689792.0,
+            "11": 510689792.0,
+            "12": 510689792.0,
+            "13": 510689792.0,
+            "14": 510689792.0,
+            "15": 510689792.0,
+            "16": 510689792.0,
+            "17": 510689792.0,
+            "18": 510689792.0,
+            "19": 510689792.0,
+            "20": 510689792.0,
+            "21": 510689792.0,
+            "22": 510689792.0,
+            "23": 510689792.0,
+            "24": 510689792.0,
+            "25": 510689792.0,
+            "26": 510689792.0,
+            "27": 510689792.0,
+            "28": 510689792.0,
+            "29": 510689792.0,
+            "30": 510689792.0,
+            "31": 510689792.0,
+            "32": 510689792.0,
+            "33": 510689792.0,
+            "34": 510689792.0,
+            "35": 510689792.0,
+            "36": 510689792.0,
+            "37": 510689792.0,
+            "38": 510689792.0,
+            "39": 510689792.0,
+            "40": 510689792.0,
+            "41": 510689792.0,
+            "42": 510689792.0,
+            "43": 510689792.0,
+            "44": 510689792.0,
+            "45": 510689792.0,
+            "46": 510689792.0,
+            "47": 510689792.0,
+            "48": 510689792.0,
+            "49": 510689792.0,
+            "50": 510689792.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 759895552.0,
+            "2": 933156352.0,
+            "3": 933156352.0,
+            "4": 933156352.0,
+            "5": 933156352.0,
+            "6": 933156352.0,
+            "7": 933156352.0,
+            "8": 933156352.0,
+            "9": 933156352.0,
+            "10": 933156352.0,
+            "11": 933156352.0,
+            "12": 933156352.0,
+            "13": 933156352.0,
+            "14": 933156352.0,
+            "15": 933156352.0,
+            "16": 933156352.0,
+            "17": 933156352.0,
+            "18": 933156352.0,
+            "19": 933156352.0,
+            "20": 933156352.0,
+            "21": 933156352.0,
+            "22": 933156352.0,
+            "23": 933156352.0,
+            "24": 934204928.0,
+            "25": 934204928.0,
+            "26": 934204928.0,
+            "27": 934204928.0,
+            "28": 934204928.0,
+            "29": 934204928.0,
+            "30": 934204928.0,
+            "31": 934204928.0,
+            "32": 934204928.0,
+            "33": 934204928.0,
+            "34": 934204928.0,
+            "35": 934204928.0,
+            "36": 934204928.0,
+            "37": 934204928.0,
+            "38": 934204928.0,
+            "39": 934204928.0,
+            "40": 934204928.0,
+            "41": 934204928.0,
+            "42": 934204928.0,
+            "43": 934204928.0,
+            "44": 934204928.0,
+            "45": 934204928.0,
+            "46": 934204928.0,
+            "47": 934204928.0,
+            "48": 934204928.0,
+            "49": 934204928.0,
+            "50": 934204928.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 16.61636,
+            "2": 0.35255,
+            "3": 0.33784,
+            "4": 0.33448,
+            "5": 0.33388,
+            "6": 0.33362,
+            "7": 0.33399,
+            "8": 0.33377,
+            "9": 0.3345,
+            "10": 0.33436,
+            "11": 0.33616,
+            "12": 0.33216,
+            "13": 0.32717,
+            "14": 0.3285,
+            "15": 0.31893,
+            "16": 0.32207,
+            "17": 0.32068,
+            "18": 0.3232,
+            "19": 0.31799,
+            "20": 0.32295,
+            "21": 0.32148,
+            "22": 0.3312,
+            "23": 0.33388,
+            "24": 0.33493,
+            "25": 0.33793,
+            "26": 0.33838,
+            "27": 0.33827,
+            "28": 0.34,
+            "29": 0.33074,
+            "30": 0.32608,
+            "31": 0.32629,
+            "32": 0.3285,
+            "33": 0.32776,
+            "34": 0.32575,
+            "35": 0.32648,
+            "36": 0.3252,
+            "37": 0.32697,
+            "38": 0.33001,
+            "39": 0.3354,
+            "40": 0.33513,
+            "41": 0.33447,
+            "42": 0.3352,
+            "43": 0.33163,
+            "44": 0.32495,
+            "45": 0.32668,
+            "46": 0.32429,
+            "47": 0.32917,
+            "48": 0.32614,
+            "49": 0.32637,
+            "50": 0.32702
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
new file mode 100644
index 00000000000..93a6863f9ba
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.93292,
+            "2": 10.93423,
+            "3": 10.91343,
+            "4": 10.9032,
+            "5": 10.9297,
+            "6": 10.93654,
+            "7": 10.90278,
+            "8": 10.92115,
+            "9": 10.90703,
+            "10": 10.90474,
+            "11": 10.88784,
+            "12": 10.91739,
+            "13": 10.91191,
+            "14": 10.91502,
+            "15": 10.87124,
+            "16": 10.86128,
+            "17": 10.82695,
+            "18": 10.8568,
+            "19": 10.84056,
+            "20": 10.75,
+            "21": 10.71506,
+            "22": 10.58117,
+            "23": 10.72641,
+            "24": 10.60731,
+            "25": 10.53752,
+            "26": 10.61071,
+            "27": 10.5993,
+            "28": 10.54954,
+            "29": 10.56604,
+            "30": 10.32554,
+            "31": 10.06698,
+            "32": 10.43804,
+            "33": 10.42362,
+            "34": 10.16013,
+            "35": 10.22894,
+            "36": 10.17616,
+            "37": 10.29237,
+            "38": 10.13292,
+            "39": 10.34958,
+            "40": 10.01974,
+            "41": 10.07538,
+            "42": 10.15409,
+            "43": 9.76091,
+            "44": 9.88355,
+            "45": 9.75545,
+            "46": 9.74961,
+            "47": 10.07545,
+            "48": 9.77938,
+            "49": 9.43818,
+            "50": 9.84069
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 575.0,
+            "2": 559.0,
+            "3": 613.0,
+            "4": 620.0,
+            "5": 596.0,
+            "6": 632.0,
+            "7": 610.0,
+            "8": 563.0,
+            "9": 590.0,
+            "10": 556.0,
+            "11": 680.0,
+            "12": 555.0,
+            "13": 624.0,
+            "14": 619.0,
+            "15": 609.0,
+            "16": 656.0,
+            "17": 643.0,
+            "18": 621.0,
+            "19": 604.0,
+            "20": 628.0,
+            "21": 608.0,
+            "22": 623.0,
+            "23": 640.0,
+            "24": 607.0,
+            "25": 605.0,
+            "26": 644.0,
+            "27": 664.0,
+            "28": 703.0,
+            "29": 741.0,
+            "30": 670.0,
+            "31": 602.0,
+            "32": 687.0,
+            "33": 780.0,
+            "34": 661.0,
+            "35": 672.0,
+            "36": 726.0,
+            "37": 776.0,
+            "38": 756.0,
+            "39": 843.0,
+            "40": 832.0,
+            "41": 850.0,
+            "42": 793.0,
+            "43": 719.0,
+            "44": 800.0,
+            "45": 716.0,
+            "46": 811.0,
+            "47": 828.0,
+            "48": 865.0,
+            "49": 810.0,
+            "50": 875.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 431783936.0,
+            "2": 431783936.0,
+            "3": 431783936.0,
+            "4": 431783936.0,
+            "5": 431783936.0,
+            "6": 431783936.0,
+            "7": 431783936.0,
+            "8": 431783936.0,
+            "9": 431783936.0,
+            "10": 431783936.0,
+            "11": 431783936.0,
+            "12": 431783936.0,
+            "13": 431783936.0,
+            "14": 431783936.0,
+            "15": 431783936.0,
+            "16": 431783936.0,
+            "17": 431783936.0,
+            "18": 431783936.0,
+            "19": 431783936.0,
+            "20": 431783936.0,
+            "21": 431783936.0,
+            "22": 431783936.0,
+            "23": 431783936.0,
+            "24": 431783936.0,
+            "25": 431783936.0,
+            "26": 431783936.0,
+            "27": 431783936.0,
+            "28": 431783936.0,
+            "29": 431783936.0,
+            "30": 431783936.0,
+            "31": 431783936.0,
+            "32": 431783936.0,
+            "33": 431783936.0,
+            "34": 431783936.0,
+            "35": 431783936.0,
+            "36": 431783936.0,
+            "37": 431783936.0,
+            "38": 431783936.0,
+            "39": 431783936.0,
+            "40": 431783936.0,
+            "41": 431783936.0,
+            "42": 431783936.0,
+            "43": 431783936.0,
+            "44": 431783936.0,
+            "45": 431783936.0,
+            "46": 431783936.0,
+            "47": 431783936.0,
+            "48": 431783936.0,
+            "49": 431783936.0,
+            "50": 431783936.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 677333504.0,
+            "2": 855308800.0,
+            "3": 855308800.0,
+            "4": 855308800.0,
+            "5": 855308800.0,
+            "6": 855308800.0,
+            "7": 855308800.0,
+            "8": 855308800.0,
+            "9": 855308800.0,
+            "10": 855308800.0,
+            "11": 855308800.0,
+            "12": 855308800.0,
+            "13": 855308800.0,
+            "14": 855308800.0,
+            "15": 855308800.0,
+            "16": 855308800.0,
+            "17": 855308800.0,
+            "18": 855308800.0,
+            "19": 855310336.0,
+            "20": 855310336.0,
+            "21": 855310336.0,
+            "22": 855310336.0,
+            "23": 855310336.0,
+            "24": 855310336.0,
+            "25": 855310336.0,
+            "26": 855311360.0,
+            "27": 855311360.0,
+            "28": 855311360.0,
+            "29": 855311360.0,
+            "30": 855311360.0,
+            "31": 855311360.0,
+            "32": 855311360.0,
+            "33": 855311360.0,
+            "34": 855311360.0,
+            "35": 855311360.0,
+            "36": 855311360.0,
+            "37": 855311360.0,
+            "38": 855311360.0,
+            "39": 855311360.0,
+            "40": 855311360.0,
+            "41": 855311360.0,
+            "42": 855311360.0,
+            "43": 855311360.0,
+            "44": 855311360.0,
+            "45": 855311360.0,
+            "46": 855311360.0,
+            "47": 855311360.0,
+            "48": 855311360.0,
+            "49": 855311360.0,
+            "50": 855311360.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 14.36326,
+            "2": 0.4559,
+            "3": 0.42105,
+            "4": 0.43438,
+            "5": 0.42464,
+            "6": 0.41381,
+            "7": 0.42997,
+            "8": 0.41256,
+            "9": 0.42034,
+            "10": 0.41575,
+            "11": 0.41092,
+            "12": 0.42374,
+            "13": 0.41123,
+            "14": 0.42677,
+            "15": 0.41074,
+            "16": 0.42059,
+            "17": 0.41911,
+            "18": 0.41172,
+            "19": 0.42617,
+            "20": 0.41085,
+            "21": 0.42288,
+            "22": 0.41567,
+            "23": 0.41045,
+            "24": 0.42041,
+            "25": 0.40891,
+            "26": 0.42104,
+            "27": 0.41476,
+            "28": 0.4134,
+            "29": 0.41023,
+            "30": 0.40616,
+            "31": 0.41979,
+            "32": 0.40666,
+            "33": 0.41352,
+            "34": 0.42345,
+            "35": 0.40886,
+            "36": 0.42443,
+            "37": 0.40786,
+            "38": 0.41631,
+            "39": 0.41181,
+            "40": 0.40693,
+            "41": 0.41652,
+            "42": 0.40701,
+            "43": 0.42407,
+            "44": 0.41181,
+            "45": 0.40787,
+            "46": 0.41861,
+            "47": 0.40384,
+            "48": 0.4279,
+            "49": 0.40721,
+            "50": 0.41192
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
new file mode 100644
index 00000000000..fcf25e804f7
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.93292,
+            "2": 10.93423,
+            "3": 10.91343,
+            "4": 10.90318,
+            "5": 10.92969,
+            "6": 10.93655,
+            "7": 10.90278,
+            "8": 10.92114,
+            "9": 10.90705,
+            "10": 10.90476,
+            "11": 10.88784,
+            "12": 10.91738,
+            "13": 10.91192,
+            "14": 10.91507,
+            "15": 10.87121,
+            "16": 10.8613,
+            "17": 10.82698,
+            "18": 10.85677,
+            "19": 10.8406,
+            "20": 10.74995,
+            "21": 10.7151,
+            "22": 10.58115,
+            "23": 10.72643,
+            "24": 10.60731,
+            "25": 10.53752,
+            "26": 10.61065,
+            "27": 10.59933,
+            "28": 10.54956,
+            "29": 10.56604,
+            "30": 10.32551,
+            "31": 10.06702,
+            "32": 10.43808,
+            "33": 10.42361,
+            "34": 10.16018,
+            "35": 10.22893,
+            "36": 10.17618,
+            "37": 10.29235,
+            "38": 10.13293,
+            "39": 10.34955,
+            "40": 10.01975,
+            "41": 10.07537,
+            "42": 10.15408,
+            "43": 9.7609,
+            "44": 9.88355,
+            "45": 9.75548,
+            "46": 9.74966,
+            "47": 10.07548,
+            "48": 9.77939,
+            "49": 9.4382,
+            "50": 9.8407
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 584.0,
+            "2": 575.0,
+            "3": 637.0,
+            "4": 586.0,
+            "5": 643.0,
+            "6": 652.0,
+            "7": 636.0,
+            "8": 624.0,
+            "9": 699.0,
+            "10": 579.0,
+            "11": 684.0,
+            "12": 650.0,
+            "13": 645.0,
+            "14": 582.0,
+            "15": 623.0,
+            "16": 637.0,
+            "17": 675.0,
+            "18": 614.0,
+            "19": 579.0,
+            "20": 589.0,
+            "21": 643.0,
+            "22": 603.0,
+            "23": 709.0,
+            "24": 582.0,
+            "25": 632.0,
+            "26": 638.0,
+            "27": 662.0,
+            "28": 732.0,
+            "29": 705.0,
+            "30": 691.0,
+            "31": 539.0,
+            "32": 731.0,
+            "33": 809.0,
+            "34": 721.0,
+            "35": 680.0,
+            "36": 701.0,
+            "37": 779.0,
+            "38": 770.0,
+            "39": 816.0,
+            "40": 795.0,
+            "41": 793.0,
+            "42": 826.0,
+            "43": 747.0,
+            "44": 782.0,
+            "45": 724.0,
+            "46": 813.0,
+            "47": 858.0,
+            "48": 880.0,
+            "49": 822.0,
+            "50": 851.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 431783936.0,
+            "2": 431783936.0,
+            "3": 431783936.0,
+            "4": 431783936.0,
+            "5": 431783936.0,
+            "6": 431783936.0,
+            "7": 431783936.0,
+            "8": 431783936.0,
+            "9": 431783936.0,
+            "10": 431783936.0,
+            "11": 431783936.0,
+            "12": 431783936.0,
+            "13": 431783936.0,
+            "14": 431783936.0,
+            "15": 431783936.0,
+            "16": 431783936.0,
+            "17": 431783936.0,
+            "18": 431783936.0,
+            "19": 431783936.0,
+            "20": 431783936.0,
+            "21": 431783936.0,
+            "22": 431783936.0,
+            "23": 431783936.0,
+            "24": 431783936.0,
+            "25": 431783936.0,
+            "26": 431783936.0,
+            "27": 431783936.0,
+            "28": 431783936.0,
+            "29": 431783936.0,
+            "30": 431783936.0,
+            "31": 431783936.0,
+            "32": 431783936.0,
+            "33": 431783936.0,
+            "34": 431783936.0,
+            "35": 431783936.0,
+            "36": 431783936.0,
+            "37": 431783936.0,
+            "38": 431783936.0,
+            "39": 431783936.0,
+            "40": 431783936.0,
+            "41": 431783936.0,
+            "42": 431783936.0,
+            "43": 431783936.0,
+            "44": 431783936.0,
+            "45": 431783936.0,
+            "46": 431783936.0,
+            "47": 431783936.0,
+            "48": 431783936.0,
+            "49": 431783936.0,
+            "50": 431783936.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 677335040.0,
+            "2": 853214208.0,
+            "3": 854260224.0,
+            "4": 854260224.0,
+            "5": 854260224.0,
+            "6": 854260224.0,
+            "7": 854260224.0,
+            "8": 854260224.0,
+            "9": 854261760.0,
+            "10": 854261760.0,
+            "11": 854261760.0,
+            "12": 854261760.0,
+            "13": 854261760.0,
+            "14": 854261760.0,
+            "15": 854261760.0,
+            "16": 854261760.0,
+            "17": 854261760.0,
+            "18": 854261760.0,
+            "19": 854261760.0,
+            "20": 854261760.0,
+            "21": 854261760.0,
+            "22": 854261760.0,
+            "23": 854261760.0,
+            "24": 854262784.0,
+            "25": 854262784.0,
+            "26": 854262784.0,
+            "27": 854262784.0,
+            "28": 854262784.0,
+            "29": 854262784.0,
+            "30": 854262784.0,
+            "31": 854262784.0,
+            "32": 854262784.0,
+            "33": 854262784.0,
+            "34": 854262784.0,
+            "35": 854262784.0,
+            "36": 854262784.0,
+            "37": 854262784.0,
+            "38": 854262784.0,
+            "39": 854262784.0,
+            "40": 854262784.0,
+            "41": 854262784.0,
+            "42": 854262784.0,
+            "43": 854262784.0,
+            "44": 854262784.0,
+            "45": 854262784.0,
+            "46": 854262784.0,
+            "47": 854262784.0,
+            "48": 854262784.0,
+            "49": 854262784.0,
+            "50": 854262784.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 16.47386,
+            "2": 0.47756,
+            "3": 0.45149,
+            "4": 0.3974,
+            "5": 0.40219,
+            "6": 0.40118,
+            "7": 0.39646,
+            "8": 0.399,
+            "9": 0.40423,
+            "10": 0.39996,
+            "11": 0.40013,
+            "12": 0.39333,
+            "13": 0.40016,
+            "14": 0.40246,
+            "15": 0.39824,
+            "16": 0.39607,
+            "17": 0.38883,
+            "18": 0.39558,
+            "19": 0.40073,
+            "20": 0.39465,
+            "21": 0.39509,
+            "22": 0.39239,
+            "23": 0.39366,
+            "24": 0.39612,
+            "25": 0.39292,
+            "26": 0.39495,
+            "27": 0.39096,
+            "28": 0.39872,
+            "29": 0.39945,
+            "30": 0.38903,
+            "31": 0.40121,
+            "32": 0.3932,
+            "33": 0.39872,
+            "34": 0.4027,
+            "35": 0.38761,
+            "36": 0.39596,
+            "37": 0.40133,
+            "38": 0.39669,
+            "39": 0.39549,
+            "40": 0.39351,
+            "41": 0.39605,
+            "42": 0.39902,
+            "43": 0.39692,
+            "44": 0.39866,
+            "45": 0.38737,
+            "46": 0.40095,
+            "47": 0.40062,
+            "48": 0.39784,
+            "49": 0.39656,
+            "50": 0.39145
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
index 94d3531293f..db2baf5c599 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
@@ -2,91 +2,286 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 10.86535,
+            "2": 10.85873,
+            "3": 10.86284,
+            "4": 10.84009,
             "5": 10.87856,
+            "6": 10.88856,
+            "7": 10.86532,
+            "8": 10.86017,
+            "9": 10.8599,
             "10": 10.82981,
+            "11": 10.8895,
+            "12": 10.8751,
+            "13": 10.87423,
+            "14": 10.89675,
             "15": 10.82054,
+            "16": 10.82504,
+            "17": 10.78983,
+            "18": 10.81029,
+            "19": 10.80535,
             "20": 10.70398,
+            "21": 10.66993,
+            "22": 10.50643,
+            "23": 10.69004,
+            "24": 10.56314,
             "25": 10.4942,
+            "26": 10.56628,
+            "27": 10.58025,
+            "28": 10.51571,
+            "29": 10.55299,
             "30": 10.30549,
+            "31": 10.02245,
+            "32": 10.40614,
+            "33": 10.39874,
+            "34": 10.13771,
             "35": 10.20184,
+            "36": 10.16052,
+            "37": 10.28973,
+            "38": 10.11474,
+            "39": 10.361,
             "40": 10.01903,
+            "41": 10.07292,
+            "42": 10.14698,
+            "43": 9.74687,
+            "44": 9.87766,
             "45": 9.74966,
+            "46": 9.73383,
+            "47": 10.07535,
+            "48": 9.78068,
+            "49": 9.44784,
             "50": 9.8399
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 653.0,
+            "2": 642.0,
+            "3": 630.0,
+            "4": 585.0,
             "5": 635.0,
+            "6": 687.0,
+            "7": 615.0,
+            "8": 601.0,
+            "9": 607.0,
             "10": 522.0,
+            "11": 637.0,
+            "12": 675.0,
+            "13": 649.0,
+            "14": 648.0,
             "15": 640.0,
+            "16": 602.0,
+            "17": 668.0,
+            "18": 634.0,
+            "19": 593.0,
             "20": 579.0,
+            "21": 633.0,
+            "22": 597.0,
+            "23": 756.0,
+            "24": 612.0,
             "25": 591.0,
+            "26": 620.0,
+            "27": 700.0,
+            "28": 705.0,
+            "29": 795.0,
             "30": 752.0,
+            "31": 628.0,
+            "32": 712.0,
+            "33": 752.0,
+            "34": 737.0,
             "35": 741.0,
+            "36": 770.0,
+            "37": 861.0,
+            "38": 823.0,
+            "39": 812.0,
             "40": 814.0,
+            "41": 826.0,
+            "42": 801.0,
+            "43": 769.0,
+            "44": 822.0,
             "45": 777.0,
+            "46": 828.0,
+            "47": 878.0,
+            "48": 915.0,
+            "49": 908.0,
             "50": 848.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 510689792.0,
+            "2": 510689792.0,
+            "3": 510689792.0,
+            "4": 510689792.0,
             "5": 510689792.0,
+            "6": 510689792.0,
+            "7": 510689792.0,
+            "8": 510689792.0,
+            "9": 510689792.0,
             "10": 510689792.0,
+            "11": 510689792.0,
+            "12": 510689792.0,
+            "13": 510689792.0,
+            "14": 510689792.0,
             "15": 510689792.0,
+            "16": 510689792.0,
+            "17": 510689792.0,
+            "18": 510689792.0,
+            "19": 510689792.0,
             "20": 510689792.0,
+            "21": 510689792.0,
+            "22": 510689792.0,
+            "23": 510689792.0,
+            "24": 510689792.0,
             "25": 510689792.0,
+            "26": 510689792.0,
+            "27": 510689792.0,
+            "28": 510689792.0,
+            "29": 510689792.0,
             "30": 510689792.0,
+            "31": 510689792.0,
+            "32": 510689792.0,
+            "33": 510689792.0,
+            "34": 510689792.0,
             "35": 510689792.0,
+            "36": 510689792.0,
+            "37": 510689792.0,
+            "38": 510689792.0,
+            "39": 510689792.0,
             "40": 510689792.0,
+            "41": 510689792.0,
+            "42": 510689792.0,
+            "43": 510689792.0,
+            "44": 510689792.0,
             "45": 510689792.0,
+            "46": 510689792.0,
+            "47": 510689792.0,
+            "48": 510689792.0,
+            "49": 510689792.0,
             "50": 510689792.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 759895552.0,
-            "5": 934204928.0,
-            "10": 934204928.0,
-            "15": 934204928.0,
-            "20": 934204928.0,
-            "25": 934204928.0,
-            "30": 934204928.0,
-            "35": 934204928.0,
-            "40": 934204928.0,
-            "45": 934204928.0,
-            "50": 934204928.0
+            "2": 933156352.0,
+            "3": 933156352.0,
+            "4": 933156352.0,
+            "5": 933156352.0,
+            "6": 933156352.0,
+            "7": 933156352.0,
+            "8": 933156352.0,
+            "9": 933156352.0,
+            "10": 933156352.0,
+            "11": 933156352.0,
+            "12": 933156352.0,
+            "13": 933156352.0,
+            "14": 933156352.0,
+            "15": 933156352.0,
+            "16": 933156352.0,
+            "17": 933156352.0,
+            "18": 933156352.0,
+            "19": 933156352.0,
+            "20": 933156352.0,
+            "21": 933156352.0,
+            "22": 933156352.0,
+            "23": 933156352.0,
+            "24": 933156352.0,
+            "25": 933156352.0,
+            "26": 933156352.0,
+            "27": 933156352.0,
+            "28": 933156352.0,
+            "29": 933156352.0,
+            "30": 933156352.0,
+            "31": 933156352.0,
+            "32": 933156352.0,
+            "33": 933156352.0,
+            "34": 933156352.0,
+            "35": 933156352.0,
+            "36": 933156352.0,
+            "37": 933156352.0,
+            "38": 933156352.0,
+            "39": 933156352.0,
+            "40": 933156352.0,
+            "41": 933156352.0,
+            "42": 933156352.0,
+            "43": 933156352.0,
+            "44": 933156352.0,
+            "45": 933156352.0,
+            "46": 933156352.0,
+            "47": 933156352.0,
+            "48": 933156352.0,
+            "49": 933156352.0,
+            "50": 933156352.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 16.20665,
-            "5": 0.29885,
-            "10": 0.28312,
-            "15": 0.28379,
-            "20": 0.29142,
-            "25": 0.28821,
-            "30": 0.28552,
-            "35": 0.29704,
-            "40": 0.29487,
-            "45": 0.28474,
-            "50": 0.29091
+            "1": 16.00603,
+            "2": 0.37533,
+            "3": 0.32669,
+            "4": 0.33301,
+            "5": 0.33912,
+            "6": 0.32887,
+            "7": 0.32417,
+            "8": 0.32988,
+            "9": 0.33113,
+            "10": 0.32547,
+            "11": 0.32805,
+            "12": 0.328,
+            "13": 0.33007,
+            "14": 0.33264,
+            "15": 0.3341,
+            "16": 0.33744,
+            "17": 0.33776,
+            "18": 0.33727,
+            "19": 0.33724,
+            "20": 0.33333,
+            "21": 0.32884,
+            "22": 0.32956,
+            "23": 0.33051,
+            "24": 0.33032,
+            "25": 0.3332,
+            "26": 0.32905,
+            "27": 0.32375,
+            "28": 0.3404,
+            "29": 0.33196,
+            "30": 0.33981,
+            "31": 0.33813,
+            "32": 0.34997,
+            "33": 0.34437,
+            "34": 0.33045,
+            "35": 0.32839,
+            "36": 0.32738,
+            "37": 0.32817,
+            "38": 0.32837,
+            "39": 0.32923,
+            "40": 0.33033,
+            "41": 0.32725,
+            "42": 0.32793,
+            "43": 0.32998,
+            "44": 0.32897,
+            "45": 0.32784,
+            "46": 0.32856,
+            "47": 0.33025,
+            "48": 0.32747,
+            "49": 0.32752,
+            "50": 0.32926
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..7b244eb8d53
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.86535,
+            "2": 10.85873,
+            "3": 10.86284,
+            "4": 10.84009,
+            "5": 10.87856,
+            "6": 10.88856,
+            "7": 10.86532,
+            "8": 10.86017,
+            "9": 10.8599,
+            "10": 10.82981,
+            "11": 10.8895,
+            "12": 10.8751,
+            "13": 10.87423,
+            "14": 10.89675,
+            "15": 10.82054,
+            "16": 10.82504,
+            "17": 10.78983,
+            "18": 10.81029,
+            "19": 10.80535,
+            "20": 10.70398,
+            "21": 10.66993,
+            "22": 10.50643,
+            "23": 10.69004,
+            "24": 10.56314,
+            "25": 10.4942,
+            "26": 10.56628,
+            "27": 10.58025,
+            "28": 10.51571,
+            "29": 10.55299,
+            "30": 10.30549,
+            "31": 10.02245,
+            "32": 10.40614,
+            "33": 10.39874,
+            "34": 10.13771,
+            "35": 10.20184,
+            "36": 10.16052,
+            "37": 10.28973,
+            "38": 10.11474,
+            "39": 10.361,
+            "40": 10.01903,
+            "41": 10.07292,
+            "42": 10.14698,
+            "43": 9.74687,
+            "44": 9.87766,
+            "45": 9.74966,
+            "46": 9.73383,
+            "47": 10.07535,
+            "48": 9.78068,
+            "49": 9.44784,
+            "50": 9.8399
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 653.0,
+            "2": 642.0,
+            "3": 630.0,
+            "4": 585.0,
+            "5": 635.0,
+            "6": 687.0,
+            "7": 615.0,
+            "8": 601.0,
+            "9": 607.0,
+            "10": 522.0,
+            "11": 637.0,
+            "12": 675.0,
+            "13": 649.0,
+            "14": 648.0,
+            "15": 640.0,
+            "16": 602.0,
+            "17": 668.0,
+            "18": 634.0,
+            "19": 593.0,
+            "20": 579.0,
+            "21": 633.0,
+            "22": 597.0,
+            "23": 756.0,
+            "24": 612.0,
+            "25": 591.0,
+            "26": 620.0,
+            "27": 700.0,
+            "28": 705.0,
+            "29": 795.0,
+            "30": 752.0,
+            "31": 628.0,
+            "32": 712.0,
+            "33": 752.0,
+            "34": 737.0,
+            "35": 741.0,
+            "36": 770.0,
+            "37": 861.0,
+            "38": 823.0,
+            "39": 812.0,
+            "40": 814.0,
+            "41": 826.0,
+            "42": 801.0,
+            "43": 769.0,
+            "44": 822.0,
+            "45": 777.0,
+            "46": 828.0,
+            "47": 878.0,
+            "48": 915.0,
+            "49": 908.0,
+            "50": 848.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 510689792.0,
+            "2": 510689792.0,
+            "3": 510689792.0,
+            "4": 510689792.0,
+            "5": 510689792.0,
+            "6": 510689792.0,
+            "7": 510689792.0,
+            "8": 510689792.0,
+            "9": 510689792.0,
+            "10": 510689792.0,
+            "11": 510689792.0,
+            "12": 510689792.0,
+            "13": 510689792.0,
+            "14": 510689792.0,
+            "15": 510689792.0,
+            "16": 510689792.0,
+            "17": 510689792.0,
+            "18": 510689792.0,
+            "19": 510689792.0,
+            "20": 510689792.0,
+            "21": 510689792.0,
+            "22": 510689792.0,
+            "23": 510689792.0,
+            "24": 510689792.0,
+            "25": 510689792.0,
+            "26": 510689792.0,
+            "27": 510689792.0,
+            "28": 510689792.0,
+            "29": 510689792.0,
+            "30": 510689792.0,
+            "31": 510689792.0,
+            "32": 510689792.0,
+            "33": 510689792.0,
+            "34": 510689792.0,
+            "35": 510689792.0,
+            "36": 510689792.0,
+            "37": 510689792.0,
+            "38": 510689792.0,
+            "39": 510689792.0,
+            "40": 510689792.0,
+            "41": 510689792.0,
+            "42": 510689792.0,
+            "43": 510689792.0,
+            "44": 510689792.0,
+            "45": 510689792.0,
+            "46": 510689792.0,
+            "47": 510689792.0,
+            "48": 510689792.0,
+            "49": 510689792.0,
+            "50": 510689792.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 759898624.0,
+            "2": 933156352.0,
+            "3": 933156352.0,
+            "4": 933156352.0,
+            "5": 933156352.0,
+            "6": 933156352.0,
+            "7": 933156352.0,
+            "8": 933156352.0,
+            "9": 933156352.0,
+            "10": 933156352.0,
+            "11": 933156352.0,
+            "12": 933156352.0,
+            "13": 933156352.0,
+            "14": 933156352.0,
+            "15": 933156352.0,
+            "16": 933156352.0,
+            "17": 933156352.0,
+            "18": 933156352.0,
+            "19": 933156352.0,
+            "20": 933156352.0,
+            "21": 933156352.0,
+            "22": 933156352.0,
+            "23": 933156352.0,
+            "24": 933156352.0,
+            "25": 933156352.0,
+            "26": 933156352.0,
+            "27": 933156352.0,
+            "28": 933156352.0,
+            "29": 933156352.0,
+            "30": 933156352.0,
+            "31": 933156352.0,
+            "32": 933156352.0,
+            "33": 933156352.0,
+            "34": 933156352.0,
+            "35": 933156352.0,
+            "36": 933156352.0,
+            "37": 933156352.0,
+            "38": 933156352.0,
+            "39": 933156352.0,
+            "40": 933156352.0,
+            "41": 933156352.0,
+            "42": 933156352.0,
+            "43": 933156352.0,
+            "44": 933156352.0,
+            "45": 933156352.0,
+            "46": 933156352.0,
+            "47": 933156352.0,
+            "48": 933156352.0,
+            "49": 933156352.0,
+            "50": 933156352.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 16.5499,
+            "2": 0.36629,
+            "3": 0.28373,
+            "4": 0.2889,
+            "5": 0.28714,
+            "6": 0.28308,
+            "7": 0.28631,
+            "8": 0.28716,
+            "9": 0.2827,
+            "10": 0.28014,
+            "11": 0.28458,
+            "12": 0.28337,
+            "13": 0.28673,
+            "14": 0.28763,
+            "15": 0.28453,
+            "16": 0.28536,
+            "17": 0.2915,
+            "18": 0.29241,
+            "19": 0.28738,
+            "20": 0.28157,
+            "21": 0.28725,
+            "22": 0.28594,
+            "23": 0.28463,
+            "24": 0.28697,
+            "25": 0.28822,
+            "26": 0.28636,
+            "27": 0.29484,
+            "28": 0.29612,
+            "29": 0.29284,
+            "30": 0.28832,
+            "31": 0.28707,
+            "32": 0.28946,
+            "33": 0.28737,
+            "34": 0.28546,
+            "35": 0.28437,
+            "36": 0.28751,
+            "37": 0.28834,
+            "38": 0.28784,
+            "39": 0.28871,
+            "40": 0.28919,
+            "41": 0.28543,
+            "42": 0.28646,
+            "43": 0.29593,
+            "44": 0.28978,
+            "45": 0.29038,
+            "46": 0.29126,
+            "47": 0.28667,
+            "48": 0.28881,
+            "49": 0.28809,
+            "50": 0.28744
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..02b4683ea0b
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.86535,
+            "2": 10.85873,
+            "3": 10.86284,
+            "4": 10.84009,
+            "5": 10.87856,
+            "6": 10.88856,
+            "7": 10.86532,
+            "8": 10.86017,
+            "9": 10.8599,
+            "10": 10.82981,
+            "11": 10.8895,
+            "12": 10.8751,
+            "13": 10.87423,
+            "14": 10.89675,
+            "15": 10.82054,
+            "16": 10.82504,
+            "17": 10.78983,
+            "18": 10.81029,
+            "19": 10.80535,
+            "20": 10.70398,
+            "21": 10.66993,
+            "22": 10.50643,
+            "23": 10.69004,
+            "24": 10.56314,
+            "25": 10.4942,
+            "26": 10.56628,
+            "27": 10.58025,
+            "28": 10.51571,
+            "29": 10.55299,
+            "30": 10.30549,
+            "31": 10.02245,
+            "32": 10.40614,
+            "33": 10.39874,
+            "34": 10.13771,
+            "35": 10.20184,
+            "36": 10.16052,
+            "37": 10.28973,
+            "38": 10.11474,
+            "39": 10.361,
+            "40": 10.01903,
+            "41": 10.07292,
+            "42": 10.14698,
+            "43": 9.74687,
+            "44": 9.87766,
+            "45": 9.74966,
+            "46": 9.73383,
+            "47": 10.07535,
+            "48": 9.78068,
+            "49": 9.44784,
+            "50": 9.8399
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 653.0,
+            "2": 642.0,
+            "3": 630.0,
+            "4": 585.0,
+            "5": 635.0,
+            "6": 687.0,
+            "7": 615.0,
+            "8": 601.0,
+            "9": 607.0,
+            "10": 522.0,
+            "11": 637.0,
+            "12": 675.0,
+            "13": 649.0,
+            "14": 648.0,
+            "15": 640.0,
+            "16": 602.0,
+            "17": 668.0,
+            "18": 634.0,
+            "19": 593.0,
+            "20": 579.0,
+            "21": 633.0,
+            "22": 597.0,
+            "23": 756.0,
+            "24": 612.0,
+            "25": 591.0,
+            "26": 620.0,
+            "27": 700.0,
+            "28": 705.0,
+            "29": 795.0,
+            "30": 752.0,
+            "31": 628.0,
+            "32": 712.0,
+            "33": 752.0,
+            "34": 737.0,
+            "35": 741.0,
+            "36": 770.0,
+            "37": 861.0,
+            "38": 823.0,
+            "39": 812.0,
+            "40": 814.0,
+            "41": 826.0,
+            "42": 801.0,
+            "43": 769.0,
+            "44": 822.0,
+            "45": 777.0,
+            "46": 828.0,
+            "47": 878.0,
+            "48": 915.0,
+            "49": 908.0,
+            "50": 848.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 510689792.0,
+            "2": 510689792.0,
+            "3": 510689792.0,
+            "4": 510689792.0,
+            "5": 510689792.0,
+            "6": 510689792.0,
+            "7": 510689792.0,
+            "8": 510689792.0,
+            "9": 510689792.0,
+            "10": 510689792.0,
+            "11": 510689792.0,
+            "12": 510689792.0,
+            "13": 510689792.0,
+            "14": 510689792.0,
+            "15": 510689792.0,
+            "16": 510689792.0,
+            "17": 510689792.0,
+            "18": 510689792.0,
+            "19": 510689792.0,
+            "20": 510689792.0,
+            "21": 510689792.0,
+            "22": 510689792.0,
+            "23": 510689792.0,
+            "24": 510689792.0,
+            "25": 510689792.0,
+            "26": 510689792.0,
+            "27": 510689792.0,
+            "28": 510689792.0,
+            "29": 510689792.0,
+            "30": 510689792.0,
+            "31": 510689792.0,
+            "32": 510689792.0,
+            "33": 510689792.0,
+            "34": 510689792.0,
+            "35": 510689792.0,
+            "36": 510689792.0,
+            "37": 510689792.0,
+            "38": 510689792.0,
+            "39": 510689792.0,
+            "40": 510689792.0,
+            "41": 510689792.0,
+            "42": 510689792.0,
+            "43": 510689792.0,
+            "44": 510689792.0,
+            "45": 510689792.0,
+            "46": 510689792.0,
+            "47": 510689792.0,
+            "48": 510689792.0,
+            "49": 510689792.0,
+            "50": 510689792.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 757801472.0,
+            "2": 933156352.0,
+            "3": 933156352.0,
+            "4": 933156352.0,
+            "5": 933156352.0,
+            "6": 933156352.0,
+            "7": 933156352.0,
+            "8": 933156352.0,
+            "9": 933156352.0,
+            "10": 933156352.0,
+            "11": 933156352.0,
+            "12": 933156352.0,
+            "13": 933156352.0,
+            "14": 933156352.0,
+            "15": 933156352.0,
+            "16": 933156352.0,
+            "17": 933156352.0,
+            "18": 933156352.0,
+            "19": 933156352.0,
+            "20": 933156352.0,
+            "21": 933156352.0,
+            "22": 933156352.0,
+            "23": 933156352.0,
+            "24": 933156352.0,
+            "25": 933156352.0,
+            "26": 933156352.0,
+            "27": 933156352.0,
+            "28": 933156352.0,
+            "29": 933156352.0,
+            "30": 933156352.0,
+            "31": 933156352.0,
+            "32": 933156352.0,
+            "33": 933156352.0,
+            "34": 933156352.0,
+            "35": 933156352.0,
+            "36": 933156352.0,
+            "37": 933156352.0,
+            "38": 933156352.0,
+            "39": 933156352.0,
+            "40": 933156352.0,
+            "41": 933156352.0,
+            "42": 933156352.0,
+            "43": 933156352.0,
+            "44": 933156352.0,
+            "45": 933156352.0,
+            "46": 933156352.0,
+            "47": 933156352.0,
+            "48": 933156352.0,
+            "49": 933156352.0,
+            "50": 933156352.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 15.78036,
+            "2": 0.34723,
+            "3": 0.33492,
+            "4": 0.3292,
+            "5": 0.33036,
+            "6": 0.34971,
+            "7": 0.33848,
+            "8": 0.33262,
+            "9": 0.34028,
+            "10": 0.3518,
+            "11": 0.34239,
+            "12": 0.33211,
+            "13": 0.32961,
+            "14": 0.33263,
+            "15": 0.32808,
+            "16": 0.33152,
+            "17": 0.33313,
+            "18": 0.329,
+            "19": 0.3317,
+            "20": 0.33143,
+            "21": 0.34166,
+            "22": 0.33873,
+            "23": 0.34817,
+            "24": 0.3415,
+            "25": 0.34495,
+            "26": 0.32592,
+            "27": 0.32935,
+            "28": 0.33233,
+            "29": 0.328,
+            "30": 0.32746,
+            "31": 0.3275,
+            "32": 0.327,
+            "33": 0.32765,
+            "34": 0.32542,
+            "35": 0.32703,
+            "36": 0.33052,
+            "37": 0.33413,
+            "38": 0.32701,
+            "39": 0.32816,
+            "40": 0.32555,
+            "41": 0.33676,
+            "42": 0.33367,
+            "43": 0.33748,
+            "44": 0.33125,
+            "45": 0.32793,
+            "46": 0.33387,
+            "47": 0.32628,
+            "48": 0.32993,
+            "49": 0.32747,
+            "50": 0.327
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
index 17f2535f7d8..91630133bbc 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
@@ -1 +1,287 @@
-{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.86535, "5": 10.87856, "10": 10.82981, "15": 10.82054, "20": 10.70398, "25": 10.4942, "30": 10.30549, "35": 10.20184, "40": 10.01903, "45": 9.74966, "50": 9.8399}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 653.0, "5": 635.0, "10": 522.0, "15": 640.0, "20": 579.0, "25": 591.0, "30": 752.0, "35": 741.0, "40": 814.0, "45": 777.0, "50": 848.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 510689792.0, "5": 510689792.0, "10": 510689792.0, "15": 510689792.0, "20": 510689792.0, "25": 510689792.0, "30": 510689792.0, "35": 510689792.0, "40": 510689792.0, "45": 510689792.0, "50": 510689792.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 759898624.0, "5": 933156352.0, "10": 933156352.0, "15": 933156352.0, "20": 933156352.0, "25": 934204416.0, "30": 934204416.0, "35": 934204416.0, "40": 934204416.0, "45": 934204416.0, "50": 934204416.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 19.9057, "5": 0.26754, "10": 0.26496, "15": 0.26771, "20": 0.26791, "25": 0.26865, "30": 0.26668, "35": 0.2709, "40": 0.26908, "45": 0.26408, "50": 0.27511}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.86535,
+            "2": 10.85873,
+            "3": 10.86284,
+            "4": 10.84009,
+            "5": 10.87856,
+            "6": 10.88856,
+            "7": 10.86532,
+            "8": 10.86017,
+            "9": 10.8599,
+            "10": 10.82981,
+            "11": 10.8895,
+            "12": 10.8751,
+            "13": 10.87423,
+            "14": 10.89675,
+            "15": 10.82054,
+            "16": 10.82504,
+            "17": 10.78983,
+            "18": 10.81029,
+            "19": 10.80535,
+            "20": 10.70398,
+            "21": 10.66993,
+            "22": 10.50643,
+            "23": 10.69004,
+            "24": 10.56314,
+            "25": 10.4942,
+            "26": 10.56628,
+            "27": 10.58025,
+            "28": 10.51571,
+            "29": 10.55299,
+            "30": 10.30549,
+            "31": 10.02245,
+            "32": 10.40614,
+            "33": 10.39874,
+            "34": 10.13771,
+            "35": 10.20184,
+            "36": 10.16052,
+            "37": 10.28973,
+            "38": 10.11474,
+            "39": 10.361,
+            "40": 10.01903,
+            "41": 10.07292,
+            "42": 10.14698,
+            "43": 9.74687,
+            "44": 9.87766,
+            "45": 9.74966,
+            "46": 9.73383,
+            "47": 10.07535,
+            "48": 9.78068,
+            "49": 9.44784,
+            "50": 9.8399
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 653.0,
+            "2": 642.0,
+            "3": 630.0,
+            "4": 585.0,
+            "5": 635.0,
+            "6": 687.0,
+            "7": 615.0,
+            "8": 601.0,
+            "9": 607.0,
+            "10": 522.0,
+            "11": 637.0,
+            "12": 675.0,
+            "13": 649.0,
+            "14": 648.0,
+            "15": 640.0,
+            "16": 602.0,
+            "17": 668.0,
+            "18": 634.0,
+            "19": 593.0,
+            "20": 579.0,
+            "21": 633.0,
+            "22": 597.0,
+            "23": 756.0,
+            "24": 612.0,
+            "25": 591.0,
+            "26": 620.0,
+            "27": 700.0,
+            "28": 705.0,
+            "29": 795.0,
+            "30": 752.0,
+            "31": 628.0,
+            "32": 712.0,
+            "33": 752.0,
+            "34": 737.0,
+            "35": 741.0,
+            "36": 770.0,
+            "37": 861.0,
+            "38": 823.0,
+            "39": 812.0,
+            "40": 814.0,
+            "41": 826.0,
+            "42": 801.0,
+            "43": 769.0,
+            "44": 822.0,
+            "45": 777.0,
+            "46": 828.0,
+            "47": 878.0,
+            "48": 915.0,
+            "49": 908.0,
+            "50": 848.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 510689792.0,
+            "2": 510689792.0,
+            "3": 510689792.0,
+            "4": 510689792.0,
+            "5": 510689792.0,
+            "6": 510689792.0,
+            "7": 510689792.0,
+            "8": 510689792.0,
+            "9": 510689792.0,
+            "10": 510689792.0,
+            "11": 510689792.0,
+            "12": 510689792.0,
+            "13": 510689792.0,
+            "14": 510689792.0,
+            "15": 510689792.0,
+            "16": 510689792.0,
+            "17": 510689792.0,
+            "18": 510689792.0,
+            "19": 510689792.0,
+            "20": 510689792.0,
+            "21": 510689792.0,
+            "22": 510689792.0,
+            "23": 510689792.0,
+            "24": 510689792.0,
+            "25": 510689792.0,
+            "26": 510689792.0,
+            "27": 510689792.0,
+            "28": 510689792.0,
+            "29": 510689792.0,
+            "30": 510689792.0,
+            "31": 510689792.0,
+            "32": 510689792.0,
+            "33": 510689792.0,
+            "34": 510689792.0,
+            "35": 510689792.0,
+            "36": 510689792.0,
+            "37": 510689792.0,
+            "38": 510689792.0,
+            "39": 510689792.0,
+            "40": 510689792.0,
+            "41": 510689792.0,
+            "42": 510689792.0,
+            "43": 510689792.0,
+            "44": 510689792.0,
+            "45": 510689792.0,
+            "46": 510689792.0,
+            "47": 510689792.0,
+            "48": 510689792.0,
+            "49": 510689792.0,
+            "50": 510689792.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 759895552.0,
+            "2": 933156352.0,
+            "3": 933156352.0,
+            "4": 933156352.0,
+            "5": 933156352.0,
+            "6": 933156352.0,
+            "7": 933156352.0,
+            "8": 933156352.0,
+            "9": 933156352.0,
+            "10": 933156352.0,
+            "11": 933156352.0,
+            "12": 933156352.0,
+            "13": 933156352.0,
+            "14": 933156352.0,
+            "15": 933156352.0,
+            "16": 933156352.0,
+            "17": 933156352.0,
+            "18": 933156352.0,
+            "19": 933156352.0,
+            "20": 933156352.0,
+            "21": 933156352.0,
+            "22": 933156352.0,
+            "23": 933156352.0,
+            "24": 933156352.0,
+            "25": 933156352.0,
+            "26": 933156352.0,
+            "27": 933156352.0,
+            "28": 933156352.0,
+            "29": 933156352.0,
+            "30": 933156352.0,
+            "31": 933156352.0,
+            "32": 933156352.0,
+            "33": 933156352.0,
+            "34": 933156352.0,
+            "35": 933156352.0,
+            "36": 933156352.0,
+            "37": 933156352.0,
+            "38": 933156352.0,
+            "39": 933156352.0,
+            "40": 933156352.0,
+            "41": 933156352.0,
+            "42": 933156352.0,
+            "43": 933156352.0,
+            "44": 933156352.0,
+            "45": 933156352.0,
+            "46": 933156352.0,
+            "47": 933156352.0,
+            "48": 933156352.0,
+            "49": 933156352.0,
+            "50": 933156352.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 18.74335,
+            "2": 0.3476,
+            "3": 0.32845,
+            "4": 0.34133,
+            "5": 0.34487,
+            "6": 0.34494,
+            "7": 0.33861,
+            "8": 0.33955,
+            "9": 0.34794,
+            "10": 0.32879,
+            "11": 0.32446,
+            "12": 0.3306,
+            "13": 0.32382,
+            "14": 0.33396,
+            "15": 0.32393,
+            "16": 0.32115,
+            "17": 0.32752,
+            "18": 0.32386,
+            "19": 0.32588,
+            "20": 0.32805,
+            "21": 0.32785,
+            "22": 0.32655,
+            "23": 0.32262,
+            "24": 0.32541,
+            "25": 0.32541,
+            "26": 0.32301,
+            "27": 0.32448,
+            "28": 0.32526,
+            "29": 0.32436,
+            "30": 0.32542,
+            "31": 0.32734,
+            "32": 0.32473,
+            "33": 0.32718,
+            "34": 0.32951,
+            "35": 0.33292,
+            "36": 0.34033,
+            "37": 0.34474,
+            "38": 0.34306,
+            "39": 0.34159,
+            "40": 0.32995,
+            "41": 0.33037,
+            "42": 0.33033,
+            "43": 0.33246,
+            "44": 0.33318,
+            "45": 0.33332,
+            "46": 0.32932,
+            "47": 0.33279,
+            "48": 0.33327,
+            "49": 0.33082,
+            "50": 0.33522
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..81f4d5c3832
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.86535,
+            "2": 10.85873,
+            "3": 10.86284,
+            "4": 10.84009,
+            "5": 10.87856,
+            "6": 10.88856,
+            "7": 10.86532,
+            "8": 10.86017,
+            "9": 10.8599,
+            "10": 10.82981,
+            "11": 10.8895,
+            "12": 10.8751,
+            "13": 10.87423,
+            "14": 10.89675,
+            "15": 10.82054,
+            "16": 10.82504,
+            "17": 10.78983,
+            "18": 10.81029,
+            "19": 10.80535,
+            "20": 10.70398,
+            "21": 10.66993,
+            "22": 10.50643,
+            "23": 10.69004,
+            "24": 10.56314,
+            "25": 10.4942,
+            "26": 10.56628,
+            "27": 10.58025,
+            "28": 10.51571,
+            "29": 10.55299,
+            "30": 10.30549,
+            "31": 10.02245,
+            "32": 10.40614,
+            "33": 10.39874,
+            "34": 10.13771,
+            "35": 10.20184,
+            "36": 10.16052,
+            "37": 10.28973,
+            "38": 10.11474,
+            "39": 10.361,
+            "40": 10.01903,
+            "41": 10.07292,
+            "42": 10.14698,
+            "43": 9.74687,
+            "44": 9.87766,
+            "45": 9.74966,
+            "46": 9.73383,
+            "47": 10.07535,
+            "48": 9.78068,
+            "49": 9.44784,
+            "50": 9.8399
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 653.0,
+            "2": 642.0,
+            "3": 630.0,
+            "4": 585.0,
+            "5": 635.0,
+            "6": 687.0,
+            "7": 615.0,
+            "8": 601.0,
+            "9": 607.0,
+            "10": 522.0,
+            "11": 637.0,
+            "12": 675.0,
+            "13": 649.0,
+            "14": 648.0,
+            "15": 640.0,
+            "16": 602.0,
+            "17": 668.0,
+            "18": 634.0,
+            "19": 593.0,
+            "20": 579.0,
+            "21": 633.0,
+            "22": 597.0,
+            "23": 756.0,
+            "24": 612.0,
+            "25": 591.0,
+            "26": 620.0,
+            "27": 700.0,
+            "28": 705.0,
+            "29": 795.0,
+            "30": 752.0,
+            "31": 628.0,
+            "32": 712.0,
+            "33": 752.0,
+            "34": 737.0,
+            "35": 741.0,
+            "36": 770.0,
+            "37": 861.0,
+            "38": 823.0,
+            "39": 812.0,
+            "40": 814.0,
+            "41": 826.0,
+            "42": 801.0,
+            "43": 769.0,
+            "44": 822.0,
+            "45": 777.0,
+            "46": 828.0,
+            "47": 878.0,
+            "48": 915.0,
+            "49": 908.0,
+            "50": 848.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 510689792.0,
+            "2": 510689792.0,
+            "3": 510689792.0,
+            "4": 510689792.0,
+            "5": 510689792.0,
+            "6": 510689792.0,
+            "7": 510689792.0,
+            "8": 510689792.0,
+            "9": 510689792.0,
+            "10": 510689792.0,
+            "11": 510689792.0,
+            "12": 510689792.0,
+            "13": 510689792.0,
+            "14": 510689792.0,
+            "15": 510689792.0,
+            "16": 510689792.0,
+            "17": 510689792.0,
+            "18": 510689792.0,
+            "19": 510689792.0,
+            "20": 510689792.0,
+            "21": 510689792.0,
+            "22": 510689792.0,
+            "23": 510689792.0,
+            "24": 510689792.0,
+            "25": 510689792.0,
+            "26": 510689792.0,
+            "27": 510689792.0,
+            "28": 510689792.0,
+            "29": 510689792.0,
+            "30": 510689792.0,
+            "31": 510689792.0,
+            "32": 510689792.0,
+            "33": 510689792.0,
+            "34": 510689792.0,
+            "35": 510689792.0,
+            "36": 510689792.0,
+            "37": 510689792.0,
+            "38": 510689792.0,
+            "39": 510689792.0,
+            "40": 510689792.0,
+            "41": 510689792.0,
+            "42": 510689792.0,
+            "43": 510689792.0,
+            "44": 510689792.0,
+            "45": 510689792.0,
+            "46": 510689792.0,
+            "47": 510689792.0,
+            "48": 510689792.0,
+            "49": 510689792.0,
+            "50": 510689792.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 759895552.0,
+            "2": 933156352.0,
+            "3": 933156352.0,
+            "4": 933156352.0,
+            "5": 933156352.0,
+            "6": 933156352.0,
+            "7": 933156352.0,
+            "8": 933156352.0,
+            "9": 933156352.0,
+            "10": 933156352.0,
+            "11": 933156352.0,
+            "12": 933156352.0,
+            "13": 933156352.0,
+            "14": 933156352.0,
+            "15": 933156352.0,
+            "16": 933156352.0,
+            "17": 933156352.0,
+            "18": 933156352.0,
+            "19": 933156352.0,
+            "20": 933156352.0,
+            "21": 933156352.0,
+            "22": 933156352.0,
+            "23": 933156352.0,
+            "24": 933156352.0,
+            "25": 933156352.0,
+            "26": 933156352.0,
+            "27": 934203392.0,
+            "28": 934203392.0,
+            "29": 934203392.0,
+            "30": 934203392.0,
+            "31": 934203392.0,
+            "32": 934203392.0,
+            "33": 934203392.0,
+            "34": 934203392.0,
+            "35": 934203392.0,
+            "36": 934203392.0,
+            "37": 934203392.0,
+            "38": 934203392.0,
+            "39": 934203392.0,
+            "40": 934203392.0,
+            "41": 934203392.0,
+            "42": 934203392.0,
+            "43": 934203392.0,
+            "44": 934203392.0,
+            "45": 934203392.0,
+            "46": 934203392.0,
+            "47": 934203392.0,
+            "48": 934203392.0,
+            "49": 934203392.0,
+            "50": 934203392.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 21.7688,
+            "2": 0.32156,
+            "3": 0.2747,
+            "4": 0.2768,
+            "5": 0.27883,
+            "6": 0.27703,
+            "7": 0.27847,
+            "8": 0.27539,
+            "9": 0.27303,
+            "10": 0.27375,
+            "11": 0.28033,
+            "12": 0.28202,
+            "13": 0.27965,
+            "14": 0.27594,
+            "15": 0.2733,
+            "16": 0.2734,
+            "17": 0.2761,
+            "18": 0.28051,
+            "19": 0.28074,
+            "20": 0.28674,
+            "21": 0.27278,
+            "22": 0.2765,
+            "23": 0.27317,
+            "24": 0.27474,
+            "25": 0.27496,
+            "26": 0.27426,
+            "27": 0.28705,
+            "28": 0.2814,
+            "29": 0.28559,
+            "30": 0.28098,
+            "31": 0.29666,
+            "32": 0.28302,
+            "33": 0.28642,
+            "34": 0.28282,
+            "35": 0.28457,
+            "36": 0.2843,
+            "37": 0.27728,
+            "38": 0.2746,
+            "39": 0.2774,
+            "40": 0.27644,
+            "41": 0.27658,
+            "42": 0.27835,
+            "43": 0.27776,
+            "44": 0.27654,
+            "45": 0.27705,
+            "46": 0.27383,
+            "47": 0.27806,
+            "48": 0.27418,
+            "49": 0.27617,
+            "50": 0.27185
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..f64661824cb
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.86535,
+            "2": 10.85873,
+            "3": 10.86284,
+            "4": 10.84009,
+            "5": 10.87856,
+            "6": 10.88856,
+            "7": 10.86532,
+            "8": 10.86017,
+            "9": 10.8599,
+            "10": 10.82981,
+            "11": 10.8895,
+            "12": 10.8751,
+            "13": 10.87423,
+            "14": 10.89675,
+            "15": 10.82054,
+            "16": 10.82504,
+            "17": 10.78983,
+            "18": 10.81029,
+            "19": 10.80535,
+            "20": 10.70398,
+            "21": 10.66993,
+            "22": 10.50643,
+            "23": 10.69004,
+            "24": 10.56314,
+            "25": 10.4942,
+            "26": 10.56628,
+            "27": 10.58025,
+            "28": 10.51571,
+            "29": 10.55299,
+            "30": 10.30549,
+            "31": 10.02245,
+            "32": 10.40614,
+            "33": 10.39874,
+            "34": 10.13771,
+            "35": 10.20184,
+            "36": 10.16052,
+            "37": 10.28973,
+            "38": 10.11474,
+            "39": 10.361,
+            "40": 10.01903,
+            "41": 10.07292,
+            "42": 10.14698,
+            "43": 9.74687,
+            "44": 9.87766,
+            "45": 9.74966,
+            "46": 9.73383,
+            "47": 10.07535,
+            "48": 9.78068,
+            "49": 9.44784,
+            "50": 9.8399
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 653.0,
+            "2": 642.0,
+            "3": 630.0,
+            "4": 585.0,
+            "5": 635.0,
+            "6": 687.0,
+            "7": 615.0,
+            "8": 601.0,
+            "9": 607.0,
+            "10": 522.0,
+            "11": 637.0,
+            "12": 675.0,
+            "13": 649.0,
+            "14": 648.0,
+            "15": 640.0,
+            "16": 602.0,
+            "17": 668.0,
+            "18": 634.0,
+            "19": 593.0,
+            "20": 579.0,
+            "21": 633.0,
+            "22": 597.0,
+            "23": 756.0,
+            "24": 612.0,
+            "25": 591.0,
+            "26": 620.0,
+            "27": 700.0,
+            "28": 705.0,
+            "29": 795.0,
+            "30": 752.0,
+            "31": 628.0,
+            "32": 712.0,
+            "33": 752.0,
+            "34": 737.0,
+            "35": 741.0,
+            "36": 770.0,
+            "37": 861.0,
+            "38": 823.0,
+            "39": 812.0,
+            "40": 814.0,
+            "41": 826.0,
+            "42": 801.0,
+            "43": 769.0,
+            "44": 822.0,
+            "45": 777.0,
+            "46": 828.0,
+            "47": 878.0,
+            "48": 915.0,
+            "49": 908.0,
+            "50": 848.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 510689792.0,
+            "2": 510689792.0,
+            "3": 510689792.0,
+            "4": 510689792.0,
+            "5": 510689792.0,
+            "6": 510689792.0,
+            "7": 510689792.0,
+            "8": 510689792.0,
+            "9": 510689792.0,
+            "10": 510689792.0,
+            "11": 510689792.0,
+            "12": 510689792.0,
+            "13": 510689792.0,
+            "14": 510689792.0,
+            "15": 510689792.0,
+            "16": 510689792.0,
+            "17": 510689792.0,
+            "18": 510689792.0,
+            "19": 510689792.0,
+            "20": 510689792.0,
+            "21": 510689792.0,
+            "22": 510689792.0,
+            "23": 510689792.0,
+            "24": 510689792.0,
+            "25": 510689792.0,
+            "26": 510689792.0,
+            "27": 510689792.0,
+            "28": 510689792.0,
+            "29": 510689792.0,
+            "30": 510689792.0,
+            "31": 510689792.0,
+            "32": 510689792.0,
+            "33": 510689792.0,
+            "34": 510689792.0,
+            "35": 510689792.0,
+            "36": 510689792.0,
+            "37": 510689792.0,
+            "38": 510689792.0,
+            "39": 510689792.0,
+            "40": 510689792.0,
+            "41": 510689792.0,
+            "42": 510689792.0,
+            "43": 510689792.0,
+            "44": 510689792.0,
+            "45": 510689792.0,
+            "46": 510689792.0,
+            "47": 510689792.0,
+            "48": 510689792.0,
+            "49": 510689792.0,
+            "50": 510689792.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 759898624.0,
+            "2": 933156352.0,
+            "3": 933156352.0,
+            "4": 933156352.0,
+            "5": 933156352.0,
+            "6": 933156352.0,
+            "7": 933156352.0,
+            "8": 933156352.0,
+            "9": 933156352.0,
+            "10": 933156352.0,
+            "11": 933156352.0,
+            "12": 933156352.0,
+            "13": 933156352.0,
+            "14": 933156352.0,
+            "15": 933156352.0,
+            "16": 933156352.0,
+            "17": 933156352.0,
+            "18": 933156352.0,
+            "19": 933156352.0,
+            "20": 933156352.0,
+            "21": 933156352.0,
+            "22": 933156352.0,
+            "23": 933156352.0,
+            "24": 933156352.0,
+            "25": 933156352.0,
+            "26": 933156352.0,
+            "27": 933156352.0,
+            "28": 933156352.0,
+            "29": 933156352.0,
+            "30": 933156352.0,
+            "31": 933156352.0,
+            "32": 933156352.0,
+            "33": 933156352.0,
+            "34": 933156352.0,
+            "35": 933156352.0,
+            "36": 933156352.0,
+            "37": 933156352.0,
+            "38": 933156352.0,
+            "39": 933156352.0,
+            "40": 933156352.0,
+            "41": 933156352.0,
+            "42": 933156352.0,
+            "43": 933156352.0,
+            "44": 933156352.0,
+            "45": 933156352.0,
+            "46": 933156352.0,
+            "47": 933156352.0,
+            "48": 933156352.0,
+            "49": 933156352.0,
+            "50": 933156352.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 18.71096,
+            "2": 0.39649,
+            "3": 0.33228,
+            "4": 0.33042,
+            "5": 0.33036,
+            "6": 0.3326,
+            "7": 0.33962,
+            "8": 0.37041,
+            "9": 0.33077,
+            "10": 0.33179,
+            "11": 0.33053,
+            "12": 0.33332,
+            "13": 0.33149,
+            "14": 0.32928,
+            "15": 0.33252,
+            "16": 0.3321,
+            "17": 0.32661,
+            "18": 0.32933,
+            "19": 0.32718,
+            "20": 0.32982,
+            "21": 0.32827,
+            "22": 0.3313,
+            "23": 0.32836,
+            "24": 0.3287,
+            "25": 0.33025,
+            "26": 0.32605,
+            "27": 0.33501,
+            "28": 0.32889,
+            "29": 0.32971,
+            "30": 0.3318,
+            "31": 0.33458,
+            "32": 0.33222,
+            "33": 0.33434,
+            "34": 0.3337,
+            "35": 0.33221,
+            "36": 0.32984,
+            "37": 0.32779,
+            "38": 0.33131,
+            "39": 0.33056,
+            "40": 0.32941,
+            "41": 0.32351,
+            "42": 0.32946,
+            "43": 0.32913,
+            "44": 0.3283,
+            "45": 0.32845,
+            "46": 0.32474,
+            "47": 0.33097,
+            "48": 0.32791,
+            "49": 0.33143,
+            "50": 0.33005
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
index e9d8d072b10..910068628d2 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
@@ -2,91 +2,286 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 10.86535,
-            "5": 10.87856,
-            "10": 10.82981,
-            "15": 10.82051,
+            "2": 10.85873,
+            "3": 10.86283,
+            "4": 10.84011,
+            "5": 10.87855,
+            "6": 10.88851,
+            "7": 10.86537,
+            "8": 10.86017,
+            "9": 10.85989,
+            "10": 10.8298,
+            "11": 10.88947,
+            "12": 10.87508,
+            "13": 10.87426,
+            "14": 10.89677,
+            "15": 10.82053,
+            "16": 10.825,
+            "17": 10.78979,
+            "18": 10.81027,
+            "19": 10.80535,
             "20": 10.70395,
+            "21": 10.66991,
+            "22": 10.50641,
+            "23": 10.69004,
+            "24": 10.56305,
             "25": 10.49417,
-            "30": 10.30548,
-            "35": 10.20188,
+            "26": 10.56629,
+            "27": 10.58022,
+            "28": 10.51575,
+            "29": 10.55298,
+            "30": 10.30549,
+            "31": 10.02244,
+            "32": 10.40616,
+            "33": 10.39872,
+            "34": 10.1377,
+            "35": 10.20186,
+            "36": 10.16052,
+            "37": 10.28973,
+            "38": 10.11481,
+            "39": 10.36101,
             "40": 10.019,
-            "45": 9.7497,
-            "50": 9.83994
+            "41": 10.07294,
+            "42": 10.14697,
+            "43": 9.74685,
+            "44": 9.87762,
+            "45": 9.74969,
+            "46": 9.73382,
+            "47": 10.07533,
+            "48": 9.78067,
+            "49": 9.44782,
+            "50": 9.83992
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 599.0,
-            "5": 640.0,
-            "10": 529.0,
-            "15": 691.0,
-            "20": 644.0,
-            "25": 573.0,
-            "30": 712.0,
-            "35": 736.0,
-            "40": 797.0,
-            "45": 764.0,
-            "50": 822.0
+            "1": 601.0,
+            "2": 613.0,
+            "3": 655.0,
+            "4": 593.0,
+            "5": 678.0,
+            "6": 642.0,
+            "7": 620.0,
+            "8": 549.0,
+            "9": 640.0,
+            "10": 502.0,
+            "11": 660.0,
+            "12": 645.0,
+            "13": 615.0,
+            "14": 696.0,
+            "15": 670.0,
+            "16": 631.0,
+            "17": 648.0,
+            "18": 611.0,
+            "19": 605.0,
+            "20": 621.0,
+            "21": 673.0,
+            "22": 661.0,
+            "23": 715.0,
+            "24": 654.0,
+            "25": 594.0,
+            "26": 589.0,
+            "27": 648.0,
+            "28": 690.0,
+            "29": 755.0,
+            "30": 678.0,
+            "31": 584.0,
+            "32": 712.0,
+            "33": 793.0,
+            "34": 765.0,
+            "35": 738.0,
+            "36": 737.0,
+            "37": 868.0,
+            "38": 726.0,
+            "39": 868.0,
+            "40": 809.0,
+            "41": 833.0,
+            "42": 806.0,
+            "43": 783.0,
+            "44": 785.0,
+            "45": 800.0,
+            "46": 875.0,
+            "47": 903.0,
+            "48": 899.0,
+            "49": 878.0,
+            "50": 873.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 510689792.0,
+            "2": 510689792.0,
+            "3": 510689792.0,
+            "4": 510689792.0,
             "5": 510689792.0,
+            "6": 510689792.0,
+            "7": 510689792.0,
+            "8": 510689792.0,
+            "9": 510689792.0,
             "10": 510689792.0,
+            "11": 510689792.0,
+            "12": 510689792.0,
+            "13": 510689792.0,
+            "14": 510689792.0,
             "15": 510689792.0,
+            "16": 510689792.0,
+            "17": 510689792.0,
+            "18": 510689792.0,
+            "19": 510689792.0,
             "20": 510689792.0,
+            "21": 510689792.0,
+            "22": 510689792.0,
+            "23": 510689792.0,
+            "24": 510689792.0,
             "25": 510689792.0,
+            "26": 510689792.0,
+            "27": 510689792.0,
+            "28": 510689792.0,
+            "29": 510689792.0,
             "30": 510689792.0,
+            "31": 510689792.0,
+            "32": 510689792.0,
+            "33": 510689792.0,
+            "34": 510689792.0,
             "35": 510689792.0,
+            "36": 510689792.0,
+            "37": 510689792.0,
+            "38": 510689792.0,
+            "39": 510689792.0,
             "40": 510689792.0,
+            "41": 510689792.0,
+            "42": 510689792.0,
+            "43": 510689792.0,
+            "44": 510689792.0,
             "45": 510689792.0,
+            "46": 510689792.0,
+            "47": 510689792.0,
+            "48": 510689792.0,
+            "49": 510689792.0,
             "50": 510689792.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 756752896.0,
+            "2": 933156352.0,
+            "3": 933156352.0,
+            "4": 933156352.0,
             "5": 933156352.0,
+            "6": 933156352.0,
+            "7": 933156352.0,
+            "8": 933156352.0,
+            "9": 933156352.0,
             "10": 933156352.0,
+            "11": 933156352.0,
+            "12": 933156352.0,
+            "13": 933156352.0,
+            "14": 933156352.0,
             "15": 933156352.0,
+            "16": 933156352.0,
+            "17": 933156352.0,
+            "18": 933156352.0,
+            "19": 933156352.0,
             "20": 933156352.0,
+            "21": 933156352.0,
+            "22": 933156352.0,
+            "23": 933156352.0,
+            "24": 933156352.0,
             "25": 933156352.0,
+            "26": 933156352.0,
+            "27": 933156352.0,
+            "28": 933156352.0,
+            "29": 933156352.0,
             "30": 933156352.0,
+            "31": 933156352.0,
+            "32": 933156352.0,
+            "33": 933156352.0,
+            "34": 933156352.0,
             "35": 933156352.0,
+            "36": 933156352.0,
+            "37": 933156352.0,
+            "38": 933156352.0,
+            "39": 933156352.0,
             "40": 933156352.0,
+            "41": 933156352.0,
+            "42": 933156352.0,
+            "43": 933156352.0,
+            "44": 933156352.0,
             "45": 933156352.0,
+            "46": 933156352.0,
+            "47": 933156352.0,
+            "48": 933156352.0,
+            "49": 933156352.0,
             "50": 933156352.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 19.51044,
-            "5": 0.29555,
-            "10": 0.28638,
-            "15": 0.2812,
-            "20": 0.28547,
-            "25": 0.28087,
-            "30": 0.28444,
-            "35": 0.28059,
-            "40": 0.28626,
-            "45": 0.28541,
-            "50": 0.2861
+            "1": 18.51483,
+            "2": 0.38305,
+            "3": 0.31916,
+            "4": 0.33028,
+            "5": 0.34426,
+            "6": 0.35623,
+            "7": 0.32503,
+            "8": 0.32084,
+            "9": 0.32047,
+            "10": 0.32595,
+            "11": 0.32652,
+            "12": 0.32296,
+            "13": 0.32617,
+            "14": 0.32833,
+            "15": 0.32492,
+            "16": 0.32302,
+            "17": 0.32458,
+            "18": 0.32598,
+            "19": 0.32565,
+            "20": 0.32747,
+            "21": 0.3272,
+            "22": 0.32863,
+            "23": 0.32847,
+            "24": 0.32664,
+            "25": 0.32485,
+            "26": 0.32858,
+            "27": 0.32665,
+            "28": 0.32434,
+            "29": 0.32998,
+            "30": 0.33789,
+            "31": 0.32692,
+            "32": 0.32521,
+            "33": 0.32521,
+            "34": 0.32786,
+            "35": 0.32813,
+            "36": 0.32665,
+            "37": 0.32466,
+            "38": 0.33006,
+            "39": 0.32341,
+            "40": 0.32787,
+            "41": 0.32762,
+            "42": 0.32448,
+            "43": 0.32181,
+            "44": 0.33035,
+            "45": 0.32497,
+            "46": 0.32334,
+            "47": 0.32904,
+            "48": 0.32458,
+            "49": 0.32391,
+            "50": 0.32652
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..f0eb7547392
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.86535,
+            "2": 10.85873,
+            "3": 10.86282,
+            "4": 10.84009,
+            "5": 10.87855,
+            "6": 10.88856,
+            "7": 10.86539,
+            "8": 10.86016,
+            "9": 10.85985,
+            "10": 10.82981,
+            "11": 10.8895,
+            "12": 10.87506,
+            "13": 10.87424,
+            "14": 10.89677,
+            "15": 10.82052,
+            "16": 10.825,
+            "17": 10.78983,
+            "18": 10.81027,
+            "19": 10.80534,
+            "20": 10.70395,
+            "21": 10.66987,
+            "22": 10.50641,
+            "23": 10.69005,
+            "24": 10.56316,
+            "25": 10.49414,
+            "26": 10.56627,
+            "27": 10.58026,
+            "28": 10.51573,
+            "29": 10.55295,
+            "30": 10.30554,
+            "31": 10.02245,
+            "32": 10.40617,
+            "33": 10.39881,
+            "34": 10.13768,
+            "35": 10.20187,
+            "36": 10.16048,
+            "37": 10.28976,
+            "38": 10.1148,
+            "39": 10.361,
+            "40": 10.019,
+            "41": 10.07292,
+            "42": 10.14692,
+            "43": 9.74685,
+            "44": 9.8776,
+            "45": 9.74967,
+            "46": 9.73383,
+            "47": 10.07533,
+            "48": 9.78069,
+            "49": 9.44781,
+            "50": 9.83988
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 615.0,
+            "2": 640.0,
+            "3": 586.0,
+            "4": 621.0,
+            "5": 619.0,
+            "6": 683.0,
+            "7": 667.0,
+            "8": 564.0,
+            "9": 646.0,
+            "10": 540.0,
+            "11": 654.0,
+            "12": 647.0,
+            "13": 656.0,
+            "14": 652.0,
+            "15": 658.0,
+            "16": 624.0,
+            "17": 657.0,
+            "18": 621.0,
+            "19": 555.0,
+            "20": 613.0,
+            "21": 643.0,
+            "22": 626.0,
+            "23": 749.0,
+            "24": 638.0,
+            "25": 562.0,
+            "26": 613.0,
+            "27": 653.0,
+            "28": 668.0,
+            "29": 780.0,
+            "30": 710.0,
+            "31": 577.0,
+            "32": 719.0,
+            "33": 821.0,
+            "34": 708.0,
+            "35": 690.0,
+            "36": 697.0,
+            "37": 878.0,
+            "38": 734.0,
+            "39": 867.0,
+            "40": 810.0,
+            "41": 837.0,
+            "42": 829.0,
+            "43": 687.0,
+            "44": 782.0,
+            "45": 761.0,
+            "46": 856.0,
+            "47": 896.0,
+            "48": 904.0,
+            "49": 841.0,
+            "50": 838.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 510689792.0,
+            "2": 510689792.0,
+            "3": 510689792.0,
+            "4": 510689792.0,
+            "5": 510689792.0,
+            "6": 510689792.0,
+            "7": 510689792.0,
+            "8": 510689792.0,
+            "9": 510689792.0,
+            "10": 510689792.0,
+            "11": 510689792.0,
+            "12": 510689792.0,
+            "13": 510689792.0,
+            "14": 510689792.0,
+            "15": 510689792.0,
+            "16": 510689792.0,
+            "17": 510689792.0,
+            "18": 510689792.0,
+            "19": 510689792.0,
+            "20": 510689792.0,
+            "21": 510689792.0,
+            "22": 510689792.0,
+            "23": 510689792.0,
+            "24": 510689792.0,
+            "25": 510689792.0,
+            "26": 510689792.0,
+            "27": 510689792.0,
+            "28": 510689792.0,
+            "29": 510689792.0,
+            "30": 510689792.0,
+            "31": 510689792.0,
+            "32": 510689792.0,
+            "33": 510689792.0,
+            "34": 510689792.0,
+            "35": 510689792.0,
+            "36": 510689792.0,
+            "37": 510689792.0,
+            "38": 510689792.0,
+            "39": 510689792.0,
+            "40": 510689792.0,
+            "41": 510689792.0,
+            "42": 510689792.0,
+            "43": 510689792.0,
+            "44": 510689792.0,
+            "45": 510689792.0,
+            "46": 510689792.0,
+            "47": 510689792.0,
+            "48": 510689792.0,
+            "49": 510689792.0,
+            "50": 510689792.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 757799936.0,
+            "2": 933156352.0,
+            "3": 933156352.0,
+            "4": 933156352.0,
+            "5": 933156352.0,
+            "6": 933156352.0,
+            "7": 933156352.0,
+            "8": 933156352.0,
+            "9": 933156352.0,
+            "10": 933156352.0,
+            "11": 933156352.0,
+            "12": 933156352.0,
+            "13": 933156352.0,
+            "14": 933156352.0,
+            "15": 933156352.0,
+            "16": 933156352.0,
+            "17": 933156352.0,
+            "18": 933156352.0,
+            "19": 933156352.0,
+            "20": 933156352.0,
+            "21": 933156352.0,
+            "22": 933156352.0,
+            "23": 933156352.0,
+            "24": 933156352.0,
+            "25": 933156352.0,
+            "26": 933156352.0,
+            "27": 933156352.0,
+            "28": 933156352.0,
+            "29": 933156352.0,
+            "30": 933156352.0,
+            "31": 933156352.0,
+            "32": 933156352.0,
+            "33": 933156352.0,
+            "34": 933156352.0,
+            "35": 933156352.0,
+            "36": 933156352.0,
+            "37": 933156352.0,
+            "38": 933156352.0,
+            "39": 933156352.0,
+            "40": 933156352.0,
+            "41": 933156352.0,
+            "42": 933156352.0,
+            "43": 933156352.0,
+            "44": 933156352.0,
+            "45": 933156352.0,
+            "46": 933156352.0,
+            "47": 933156352.0,
+            "48": 933156352.0,
+            "49": 933156352.0,
+            "50": 933156352.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 19.15382,
+            "2": 0.382,
+            "3": 0.2953,
+            "4": 0.30669,
+            "5": 0.2864,
+            "6": 0.28721,
+            "7": 0.28819,
+            "8": 0.28856,
+            "9": 0.3024,
+            "10": 0.29011,
+            "11": 0.29044,
+            "12": 0.28948,
+            "13": 0.29391,
+            "14": 0.29381,
+            "15": 0.29174,
+            "16": 0.29101,
+            "17": 0.29087,
+            "18": 0.30622,
+            "19": 0.28768,
+            "20": 0.29439,
+            "21": 0.28914,
+            "22": 0.28729,
+            "23": 0.28503,
+            "24": 0.28932,
+            "25": 0.28325,
+            "26": 0.2863,
+            "27": 0.28599,
+            "28": 0.28766,
+            "29": 0.28539,
+            "30": 0.28326,
+            "31": 0.2833,
+            "32": 0.28222,
+            "33": 0.28588,
+            "34": 0.28764,
+            "35": 0.28697,
+            "36": 0.28266,
+            "37": 0.2825,
+            "38": 0.28576,
+            "39": 0.28329,
+            "40": 0.28369,
+            "41": 0.28375,
+            "42": 0.28077,
+            "43": 0.28714,
+            "44": 0.28289,
+            "45": 0.28552,
+            "46": 0.28119,
+            "47": 0.28252,
+            "48": 0.28882,
+            "49": 0.30153,
+            "50": 0.299
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..cc1700ed493
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.86535,
+            "2": 10.85873,
+            "3": 10.86285,
+            "4": 10.84007,
+            "5": 10.87854,
+            "6": 10.88852,
+            "7": 10.86537,
+            "8": 10.86015,
+            "9": 10.85985,
+            "10": 10.82982,
+            "11": 10.88949,
+            "12": 10.87509,
+            "13": 10.87426,
+            "14": 10.89674,
+            "15": 10.82054,
+            "16": 10.82501,
+            "17": 10.78985,
+            "18": 10.81032,
+            "19": 10.8053,
+            "20": 10.70397,
+            "21": 10.66986,
+            "22": 10.50641,
+            "23": 10.69001,
+            "24": 10.56317,
+            "25": 10.49421,
+            "26": 10.56628,
+            "27": 10.58022,
+            "28": 10.51574,
+            "29": 10.55292,
+            "30": 10.30549,
+            "31": 10.0225,
+            "32": 10.40617,
+            "33": 10.39874,
+            "34": 10.13772,
+            "35": 10.20187,
+            "36": 10.16045,
+            "37": 10.28977,
+            "38": 10.11478,
+            "39": 10.36101,
+            "40": 10.01903,
+            "41": 10.07294,
+            "42": 10.14691,
+            "43": 9.74683,
+            "44": 9.87762,
+            "45": 9.74966,
+            "46": 9.73384,
+            "47": 10.07535,
+            "48": 9.78069,
+            "49": 9.44783,
+            "50": 9.83992
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 607.0,
+            "2": 628.0,
+            "3": 600.0,
+            "4": 658.0,
+            "5": 657.0,
+            "6": 707.0,
+            "7": 637.0,
+            "8": 593.0,
+            "9": 632.0,
+            "10": 553.0,
+            "11": 641.0,
+            "12": 631.0,
+            "13": 676.0,
+            "14": 643.0,
+            "15": 623.0,
+            "16": 611.0,
+            "17": 687.0,
+            "18": 622.0,
+            "19": 581.0,
+            "20": 609.0,
+            "21": 652.0,
+            "22": 621.0,
+            "23": 800.0,
+            "24": 618.0,
+            "25": 623.0,
+            "26": 595.0,
+            "27": 679.0,
+            "28": 726.0,
+            "29": 719.0,
+            "30": 723.0,
+            "31": 624.0,
+            "32": 737.0,
+            "33": 776.0,
+            "34": 713.0,
+            "35": 696.0,
+            "36": 759.0,
+            "37": 829.0,
+            "38": 784.0,
+            "39": 798.0,
+            "40": 813.0,
+            "41": 814.0,
+            "42": 880.0,
+            "43": 780.0,
+            "44": 775.0,
+            "45": 759.0,
+            "46": 849.0,
+            "47": 938.0,
+            "48": 876.0,
+            "49": 886.0,
+            "50": 817.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 510689792.0,
+            "2": 510689792.0,
+            "3": 510689792.0,
+            "4": 510689792.0,
+            "5": 510689792.0,
+            "6": 510689792.0,
+            "7": 510689792.0,
+            "8": 510689792.0,
+            "9": 510689792.0,
+            "10": 510689792.0,
+            "11": 510689792.0,
+            "12": 510689792.0,
+            "13": 510689792.0,
+            "14": 510689792.0,
+            "15": 510689792.0,
+            "16": 510689792.0,
+            "17": 510689792.0,
+            "18": 510689792.0,
+            "19": 510689792.0,
+            "20": 510689792.0,
+            "21": 510689792.0,
+            "22": 510689792.0,
+            "23": 510689792.0,
+            "24": 510689792.0,
+            "25": 510689792.0,
+            "26": 510689792.0,
+            "27": 510689792.0,
+            "28": 510689792.0,
+            "29": 510689792.0,
+            "30": 510689792.0,
+            "31": 510689792.0,
+            "32": 510689792.0,
+            "33": 510689792.0,
+            "34": 510689792.0,
+            "35": 510689792.0,
+            "36": 510689792.0,
+            "37": 510689792.0,
+            "38": 510689792.0,
+            "39": 510689792.0,
+            "40": 510689792.0,
+            "41": 510689792.0,
+            "42": 510689792.0,
+            "43": 510689792.0,
+            "44": 510689792.0,
+            "45": 510689792.0,
+            "46": 510689792.0,
+            "47": 510689792.0,
+            "48": 510689792.0,
+            "49": 510689792.0,
+            "50": 510689792.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 759895552.0,
+            "2": 933156352.0,
+            "3": 933156352.0,
+            "4": 933156352.0,
+            "5": 933156352.0,
+            "6": 933156352.0,
+            "7": 933156352.0,
+            "8": 933156352.0,
+            "9": 933156352.0,
+            "10": 933156352.0,
+            "11": 933156352.0,
+            "12": 933156352.0,
+            "13": 933156352.0,
+            "14": 933156352.0,
+            "15": 933156352.0,
+            "16": 933156352.0,
+            "17": 933156352.0,
+            "18": 933156352.0,
+            "19": 933156352.0,
+            "20": 933156352.0,
+            "21": 933156352.0,
+            "22": 933156352.0,
+            "23": 933156352.0,
+            "24": 933156352.0,
+            "25": 933156352.0,
+            "26": 933156352.0,
+            "27": 933156352.0,
+            "28": 933156352.0,
+            "29": 933156352.0,
+            "30": 933156352.0,
+            "31": 933156352.0,
+            "32": 933156352.0,
+            "33": 933156352.0,
+            "34": 933156352.0,
+            "35": 933156352.0,
+            "36": 933156352.0,
+            "37": 933156352.0,
+            "38": 933156352.0,
+            "39": 933156352.0,
+            "40": 933156352.0,
+            "41": 933156352.0,
+            "42": 933156352.0,
+            "43": 933156352.0,
+            "44": 933156352.0,
+            "45": 933156352.0,
+            "46": 933156352.0,
+            "47": 933156352.0,
+            "48": 933156352.0,
+            "49": 933156352.0,
+            "50": 933156352.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 18.67374,
+            "2": 0.33434,
+            "3": 0.32862,
+            "4": 0.3312,
+            "5": 0.32463,
+            "6": 0.33221,
+            "7": 0.33167,
+            "8": 0.32476,
+            "9": 0.32742,
+            "10": 0.32327,
+            "11": 0.31599,
+            "12": 0.32511,
+            "13": 0.32273,
+            "14": 0.31956,
+            "15": 0.32777,
+            "16": 0.32745,
+            "17": 0.31743,
+            "18": 0.32418,
+            "19": 0.32759,
+            "20": 0.32696,
+            "21": 0.32321,
+            "22": 0.32923,
+            "23": 0.32125,
+            "24": 0.32088,
+            "25": 0.32288,
+            "26": 0.31739,
+            "27": 0.33667,
+            "28": 0.32586,
+            "29": 0.31738,
+            "30": 0.31392,
+            "31": 0.32116,
+            "32": 0.31637,
+            "33": 0.32029,
+            "34": 0.32057,
+            "35": 0.31739,
+            "36": 0.31341,
+            "37": 0.32121,
+            "38": 0.326,
+            "39": 0.31692,
+            "40": 0.31511,
+            "41": 0.32216,
+            "42": 0.31654,
+            "43": 0.32474,
+            "44": 0.32162,
+            "45": 0.31451,
+            "46": 0.31434,
+            "47": 0.32885,
+            "48": 0.31603,
+            "49": 0.31732,
+            "50": 0.3234
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
new file mode 100644
index 00000000000..2ac9a4a8d47
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.93292,
+            "2": 10.93423,
+            "3": 10.91348,
+            "4": 10.90322,
+            "5": 10.92969,
+            "6": 10.93655,
+            "7": 10.90277,
+            "8": 10.92116,
+            "9": 10.90706,
+            "10": 10.90473,
+            "11": 10.88783,
+            "12": 10.91738,
+            "13": 10.9119,
+            "14": 10.91506,
+            "15": 10.87123,
+            "16": 10.86131,
+            "17": 10.82698,
+            "18": 10.85674,
+            "19": 10.84055,
+            "20": 10.74998,
+            "21": 10.71508,
+            "22": 10.58112,
+            "23": 10.72642,
+            "24": 10.60722,
+            "25": 10.53752,
+            "26": 10.61072,
+            "27": 10.59927,
+            "28": 10.54955,
+            "29": 10.56605,
+            "30": 10.32547,
+            "31": 10.06698,
+            "32": 10.43807,
+            "33": 10.42361,
+            "34": 10.16018,
+            "35": 10.22893,
+            "36": 10.17616,
+            "37": 10.29235,
+            "38": 10.13293,
+            "39": 10.34957,
+            "40": 10.01973,
+            "41": 10.07533,
+            "42": 10.15408,
+            "43": 9.76085,
+            "44": 9.88357,
+            "45": 9.75546,
+            "46": 9.74963,
+            "47": 10.07546,
+            "48": 9.77937,
+            "49": 9.43813,
+            "50": 9.84068
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 568.0,
+            "2": 600.0,
+            "3": 624.0,
+            "4": 589.0,
+            "5": 692.0,
+            "6": 705.0,
+            "7": 662.0,
+            "8": 616.0,
+            "9": 679.0,
+            "10": 508.0,
+            "11": 703.0,
+            "12": 638.0,
+            "13": 678.0,
+            "14": 649.0,
+            "15": 659.0,
+            "16": 606.0,
+            "17": 663.0,
+            "18": 613.0,
+            "19": 615.0,
+            "20": 598.0,
+            "21": 639.0,
+            "22": 628.0,
+            "23": 675.0,
+            "24": 590.0,
+            "25": 595.0,
+            "26": 588.0,
+            "27": 678.0,
+            "28": 687.0,
+            "29": 688.0,
+            "30": 681.0,
+            "31": 618.0,
+            "32": 706.0,
+            "33": 758.0,
+            "34": 683.0,
+            "35": 741.0,
+            "36": 694.0,
+            "37": 819.0,
+            "38": 786.0,
+            "39": 866.0,
+            "40": 779.0,
+            "41": 838.0,
+            "42": 837.0,
+            "43": 695.0,
+            "44": 716.0,
+            "45": 738.0,
+            "46": 802.0,
+            "47": 926.0,
+            "48": 854.0,
+            "49": 811.0,
+            "50": 807.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 431783936.0,
+            "2": 431783936.0,
+            "3": 431783936.0,
+            "4": 431783936.0,
+            "5": 431783936.0,
+            "6": 431783936.0,
+            "7": 431783936.0,
+            "8": 431783936.0,
+            "9": 431783936.0,
+            "10": 431783936.0,
+            "11": 431783936.0,
+            "12": 431783936.0,
+            "13": 431783936.0,
+            "14": 431783936.0,
+            "15": 431783936.0,
+            "16": 431783936.0,
+            "17": 431783936.0,
+            "18": 431783936.0,
+            "19": 431783936.0,
+            "20": 431783936.0,
+            "21": 431783936.0,
+            "22": 431783936.0,
+            "23": 431783936.0,
+            "24": 431783936.0,
+            "25": 431783936.0,
+            "26": 431783936.0,
+            "27": 431783936.0,
+            "28": 431783936.0,
+            "29": 431783936.0,
+            "30": 431783936.0,
+            "31": 431783936.0,
+            "32": 431783936.0,
+            "33": 431783936.0,
+            "34": 431783936.0,
+            "35": 431783936.0,
+            "36": 431783936.0,
+            "37": 431783936.0,
+            "38": 431783936.0,
+            "39": 431783936.0,
+            "40": 431783936.0,
+            "41": 431783936.0,
+            "42": 431783936.0,
+            "43": 431783936.0,
+            "44": 431783936.0,
+            "45": 431783936.0,
+            "46": 431783936.0,
+            "47": 431783936.0,
+            "48": 431783936.0,
+            "49": 431783936.0,
+            "50": 431783936.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 678383616.0,
+            "2": 854262272.0,
+            "3": 854262272.0,
+            "4": 854262272.0,
+            "5": 854262272.0,
+            "6": 854262272.0,
+            "7": 855309824.0,
+            "8": 855309824.0,
+            "9": 855309824.0,
+            "10": 855309824.0,
+            "11": 855309824.0,
+            "12": 855309824.0,
+            "13": 855309824.0,
+            "14": 855310848.0,
+            "15": 855310848.0,
+            "16": 855310848.0,
+            "17": 855310848.0,
+            "18": 855310848.0,
+            "19": 855310848.0,
+            "20": 855310848.0,
+            "21": 855310848.0,
+            "22": 855310848.0,
+            "23": 855310848.0,
+            "24": 855310848.0,
+            "25": 855310848.0,
+            "26": 855310848.0,
+            "27": 855310848.0,
+            "28": 855310848.0,
+            "29": 855310848.0,
+            "30": 855310848.0,
+            "31": 855310848.0,
+            "32": 855310848.0,
+            "33": 855310848.0,
+            "34": 855310848.0,
+            "35": 855310848.0,
+            "36": 855310848.0,
+            "37": 855310848.0,
+            "38": 855310848.0,
+            "39": 855310848.0,
+            "40": 855310848.0,
+            "41": 855310848.0,
+            "42": 855310848.0,
+            "43": 855310848.0,
+            "44": 855310848.0,
+            "45": 855310848.0,
+            "46": 855311360.0,
+            "47": 855311360.0,
+            "48": 855311360.0,
+            "49": 855311360.0,
+            "50": 855311360.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 23.53527,
+            "2": 0.45843,
+            "3": 0.41722,
+            "4": 0.41343,
+            "5": 0.43098,
+            "6": 0.41032,
+            "7": 0.42789,
+            "8": 0.4109,
+            "9": 0.41334,
+            "10": 0.42277,
+            "11": 0.41109,
+            "12": 0.4255,
+            "13": 0.41083,
+            "14": 0.41498,
+            "15": 0.4158,
+            "16": 0.40724,
+            "17": 0.42608,
+            "18": 0.40815,
+            "19": 0.41361,
+            "20": 0.40774,
+            "21": 0.41448,
+            "22": 0.42245,
+            "23": 0.40681,
+            "24": 0.41744,
+            "25": 0.41008,
+            "26": 0.41229,
+            "27": 0.42006,
+            "28": 0.40569,
+            "29": 0.44026,
+            "30": 0.40835,
+            "31": 0.41007,
+            "32": 0.41186,
+            "33": 0.40618,
+            "34": 0.42247,
+            "35": 0.40587,
+            "36": 0.41189,
+            "37": 0.40876,
+            "38": 0.41309,
+            "39": 0.42068,
+            "40": 0.40576,
+            "41": 0.41665,
+            "42": 0.40588,
+            "43": 0.41519,
+            "44": 0.41465,
+            "45": 0.63205,
+            "46": 0.42162,
+            "47": 0.41448,
+            "48": 0.42206,
+            "49": 0.41268,
+            "50": 0.41606
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
new file mode 100644
index 00000000000..1e9b2b8989e
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.93292,
+            "2": 10.93423,
+            "3": 10.91345,
+            "4": 10.90321,
+            "5": 10.92971,
+            "6": 10.93655,
+            "7": 10.90279,
+            "8": 10.92115,
+            "9": 10.90703,
+            "10": 10.90476,
+            "11": 10.88787,
+            "12": 10.91736,
+            "13": 10.91188,
+            "14": 10.91505,
+            "15": 10.87126,
+            "16": 10.86126,
+            "17": 10.82696,
+            "18": 10.85675,
+            "19": 10.8406,
+            "20": 10.74999,
+            "21": 10.71507,
+            "22": 10.58116,
+            "23": 10.72641,
+            "24": 10.60728,
+            "25": 10.53754,
+            "26": 10.61066,
+            "27": 10.59928,
+            "28": 10.54957,
+            "29": 10.56599,
+            "30": 10.32553,
+            "31": 10.06697,
+            "32": 10.43809,
+            "33": 10.42361,
+            "34": 10.16014,
+            "35": 10.22896,
+            "36": 10.17612,
+            "37": 10.29237,
+            "38": 10.13298,
+            "39": 10.34958,
+            "40": 10.01972,
+            "41": 10.07534,
+            "42": 10.1541,
+            "43": 9.76093,
+            "44": 9.8836,
+            "45": 9.75546,
+            "46": 9.74961,
+            "47": 10.07546,
+            "48": 9.77936,
+            "49": 9.43816,
+            "50": 9.84073
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 565.0,
+            "2": 625.0,
+            "3": 618.0,
+            "4": 618.0,
+            "5": 630.0,
+            "6": 653.0,
+            "7": 581.0,
+            "8": 630.0,
+            "9": 648.0,
+            "10": 502.0,
+            "11": 696.0,
+            "12": 653.0,
+            "13": 680.0,
+            "14": 629.0,
+            "15": 599.0,
+            "16": 670.0,
+            "17": 649.0,
+            "18": 580.0,
+            "19": 594.0,
+            "20": 578.0,
+            "21": 616.0,
+            "22": 609.0,
+            "23": 655.0,
+            "24": 611.0,
+            "25": 593.0,
+            "26": 595.0,
+            "27": 660.0,
+            "28": 756.0,
+            "29": 745.0,
+            "30": 691.0,
+            "31": 611.0,
+            "32": 676.0,
+            "33": 767.0,
+            "34": 669.0,
+            "35": 757.0,
+            "36": 794.0,
+            "37": 793.0,
+            "38": 778.0,
+            "39": 833.0,
+            "40": 785.0,
+            "41": 787.0,
+            "42": 769.0,
+            "43": 751.0,
+            "44": 714.0,
+            "45": 769.0,
+            "46": 835.0,
+            "47": 902.0,
+            "48": 853.0,
+            "49": 807.0,
+            "50": 823.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 431783936.0,
+            "2": 431783936.0,
+            "3": 431783936.0,
+            "4": 431783936.0,
+            "5": 431783936.0,
+            "6": 431783936.0,
+            "7": 431783936.0,
+            "8": 431783936.0,
+            "9": 431783936.0,
+            "10": 431783936.0,
+            "11": 431783936.0,
+            "12": 431783936.0,
+            "13": 431783936.0,
+            "14": 431783936.0,
+            "15": 431783936.0,
+            "16": 431783936.0,
+            "17": 431783936.0,
+            "18": 431783936.0,
+            "19": 431783936.0,
+            "20": 431783936.0,
+            "21": 431783936.0,
+            "22": 431783936.0,
+            "23": 431783936.0,
+            "24": 431783936.0,
+            "25": 431783936.0,
+            "26": 431783936.0,
+            "27": 431783936.0,
+            "28": 431783936.0,
+            "29": 431783936.0,
+            "30": 431783936.0,
+            "31": 431783936.0,
+            "32": 431783936.0,
+            "33": 431783936.0,
+            "34": 431783936.0,
+            "35": 431783936.0,
+            "36": 431783936.0,
+            "37": 431783936.0,
+            "38": 431783936.0,
+            "39": 431783936.0,
+            "40": 431783936.0,
+            "41": 431783936.0,
+            "42": 431783936.0,
+            "43": 431783936.0,
+            "44": 431783936.0,
+            "45": 431783936.0,
+            "46": 431783936.0,
+            "47": 431783936.0,
+            "48": 431783936.0,
+            "49": 431783936.0,
+            "50": 431783936.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 677335040.0,
+            "2": 854262784.0,
+            "3": 854262784.0,
+            "4": 854262784.0,
+            "5": 854262784.0,
+            "6": 854262784.0,
+            "7": 854262784.0,
+            "8": 854262784.0,
+            "9": 854262784.0,
+            "10": 854262784.0,
+            "11": 854262784.0,
+            "12": 854262784.0,
+            "13": 854262784.0,
+            "14": 854262784.0,
+            "15": 854262784.0,
+            "16": 854262784.0,
+            "17": 854262784.0,
+            "18": 854262784.0,
+            "19": 854262784.0,
+            "20": 854262784.0,
+            "21": 854262784.0,
+            "22": 854262784.0,
+            "23": 854262784.0,
+            "24": 854262784.0,
+            "25": 854262784.0,
+            "26": 854262784.0,
+            "27": 854262784.0,
+            "28": 854262784.0,
+            "29": 854262784.0,
+            "30": 854262784.0,
+            "31": 854262784.0,
+            "32": 854262784.0,
+            "33": 854262784.0,
+            "34": 854262784.0,
+            "35": 854262784.0,
+            "36": 855311360.0,
+            "37": 855311360.0,
+            "38": 855311360.0,
+            "39": 855311360.0,
+            "40": 855311360.0,
+            "41": 855311360.0,
+            "42": 855311360.0,
+            "43": 855311360.0,
+            "44": 855311360.0,
+            "45": 855311360.0,
+            "46": 855311360.0,
+            "47": 855311360.0,
+            "48": 855311360.0,
+            "49": 855311360.0,
+            "50": 855311360.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 20.54291,
+            "2": 0.45304,
+            "3": 0.40799,
+            "4": 0.41533,
+            "5": 0.59635,
+            "6": 0.41138,
+            "7": 0.41402,
+            "8": 0.41118,
+            "9": 0.41133,
+            "10": 0.41277,
+            "11": 0.41021,
+            "12": 0.41466,
+            "13": 0.40958,
+            "14": 0.40717,
+            "15": 0.40964,
+            "16": 0.40616,
+            "17": 0.41407,
+            "18": 0.40562,
+            "19": 0.40279,
+            "20": 0.40656,
+            "21": 0.40188,
+            "22": 0.4164,
+            "23": 0.40487,
+            "24": 0.41094,
+            "25": 0.4165,
+            "26": 0.40755,
+            "27": 0.41769,
+            "28": 0.40789,
+            "29": 0.41516,
+            "30": 0.41364,
+            "31": 0.41649,
+            "32": 0.4104,
+            "33": 0.40992,
+            "34": 0.41619,
+            "35": 0.41207,
+            "36": 0.40835,
+            "37": 0.41126,
+            "38": 0.40711,
+            "39": 0.4143,
+            "40": 0.40503,
+            "41": 0.40421,
+            "42": 0.40304,
+            "43": 0.39915,
+            "44": 0.41215,
+            "45": 0.40298,
+            "46": 0.40298,
+            "47": 0.611,
+            "48": 0.39997,
+            "49": 0.40324,
+            "50": 0.40197
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_dp_last_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_dp_last_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
index ecd9a58df01..5fd95d06800 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_dp_last_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_dp_last_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
@@ -2,91 +2,286 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 10.86535,
+            "2": 10.85873,
+            "3": 10.86284,
+            "4": 10.84009,
             "5": 10.87856,
+            "6": 10.88856,
+            "7": 10.86532,
+            "8": 10.86017,
+            "9": 10.8599,
             "10": 10.82981,
+            "11": 10.8895,
+            "12": 10.8751,
+            "13": 10.87423,
+            "14": 10.89675,
             "15": 10.82054,
+            "16": 10.82504,
+            "17": 10.78983,
+            "18": 10.81029,
+            "19": 10.80535,
             "20": 10.70398,
+            "21": 10.66993,
+            "22": 10.50643,
+            "23": 10.69004,
+            "24": 10.56314,
             "25": 10.4942,
+            "26": 10.56628,
+            "27": 10.58025,
+            "28": 10.51571,
+            "29": 10.55299,
             "30": 10.30549,
+            "31": 10.02245,
+            "32": 10.40614,
+            "33": 10.39874,
+            "34": 10.13771,
             "35": 10.20184,
+            "36": 10.16052,
+            "37": 10.28973,
+            "38": 10.11474,
+            "39": 10.361,
             "40": 10.01903,
+            "41": 10.07292,
+            "42": 10.14698,
+            "43": 9.74687,
+            "44": 9.87766,
             "45": 9.74966,
+            "46": 9.73383,
+            "47": 10.07535,
+            "48": 9.78068,
+            "49": 9.44784,
             "50": 9.8399
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 653.0,
+            "2": 642.0,
+            "3": 630.0,
+            "4": 585.0,
             "5": 635.0,
+            "6": 687.0,
+            "7": 615.0,
+            "8": 601.0,
+            "9": 607.0,
             "10": 522.0,
+            "11": 637.0,
+            "12": 675.0,
+            "13": 649.0,
+            "14": 648.0,
             "15": 640.0,
+            "16": 602.0,
+            "17": 668.0,
+            "18": 634.0,
+            "19": 593.0,
             "20": 579.0,
+            "21": 633.0,
+            "22": 597.0,
+            "23": 756.0,
+            "24": 612.0,
             "25": 591.0,
+            "26": 620.0,
+            "27": 700.0,
+            "28": 705.0,
+            "29": 795.0,
             "30": 752.0,
+            "31": 628.0,
+            "32": 712.0,
+            "33": 752.0,
+            "34": 737.0,
             "35": 741.0,
+            "36": 770.0,
+            "37": 861.0,
+            "38": 823.0,
+            "39": 812.0,
             "40": 814.0,
+            "41": 826.0,
+            "42": 801.0,
+            "43": 769.0,
+            "44": 822.0,
             "45": 777.0,
+            "46": 828.0,
+            "47": 878.0,
+            "48": 915.0,
+            "49": 908.0,
             "50": 848.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 512262656.0,
-            "5": 512262656.0,
-            "10": 512262656.0,
-            "15": 512262656.0,
-            "20": 512262656.0,
-            "25": 512262656.0,
-            "30": 512262656.0,
-            "35": 512262656.0,
-            "40": 512262656.0,
-            "45": 512262656.0,
-            "50": 512262656.0
+            "1": 510689792.0,
+            "2": 510689792.0,
+            "3": 510689792.0,
+            "4": 510689792.0,
+            "5": 510689792.0,
+            "6": 510689792.0,
+            "7": 510689792.0,
+            "8": 510689792.0,
+            "9": 510689792.0,
+            "10": 510689792.0,
+            "11": 510689792.0,
+            "12": 510689792.0,
+            "13": 510689792.0,
+            "14": 510689792.0,
+            "15": 510689792.0,
+            "16": 510689792.0,
+            "17": 510689792.0,
+            "18": 510689792.0,
+            "19": 510689792.0,
+            "20": 510689792.0,
+            "21": 510689792.0,
+            "22": 510689792.0,
+            "23": 510689792.0,
+            "24": 510689792.0,
+            "25": 510689792.0,
+            "26": 510689792.0,
+            "27": 510689792.0,
+            "28": 510689792.0,
+            "29": 510689792.0,
+            "30": 510689792.0,
+            "31": 510689792.0,
+            "32": 510689792.0,
+            "33": 510689792.0,
+            "34": 510689792.0,
+            "35": 510689792.0,
+            "36": 510689792.0,
+            "37": 510689792.0,
+            "38": 510689792.0,
+            "39": 510689792.0,
+            "40": 510689792.0,
+            "41": 510689792.0,
+            "42": 510689792.0,
+            "43": 510689792.0,
+            "44": 510689792.0,
+            "45": 510689792.0,
+            "46": 510689792.0,
+            "47": 510689792.0,
+            "48": 510689792.0,
+            "49": 510689792.0,
+            "50": 510689792.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 755703296.0,
-            "5": 941019136.0,
-            "10": 941019136.0,
-            "15": 941020160.0,
-            "20": 941020160.0,
-            "25": 941020160.0,
-            "30": 941020160.0,
-            "35": 941020160.0,
-            "40": 941020160.0,
-            "45": 941020160.0,
-            "50": 941020160.0
+            "1": 756752896.0,
+            "2": 933156352.0,
+            "3": 933156352.0,
+            "4": 933156352.0,
+            "5": 933156352.0,
+            "6": 933156352.0,
+            "7": 933156352.0,
+            "8": 933156352.0,
+            "9": 933156352.0,
+            "10": 933156352.0,
+            "11": 933156352.0,
+            "12": 933156352.0,
+            "13": 933156352.0,
+            "14": 933156352.0,
+            "15": 933156352.0,
+            "16": 933156352.0,
+            "17": 933156352.0,
+            "18": 933156352.0,
+            "19": 933156352.0,
+            "20": 933156352.0,
+            "21": 933156352.0,
+            "22": 933156352.0,
+            "23": 933156352.0,
+            "24": 933156352.0,
+            "25": 933156352.0,
+            "26": 933156352.0,
+            "27": 933156352.0,
+            "28": 933156352.0,
+            "29": 933156352.0,
+            "30": 933156352.0,
+            "31": 933156352.0,
+            "32": 933156352.0,
+            "33": 933156352.0,
+            "34": 933156352.0,
+            "35": 933156352.0,
+            "36": 933156352.0,
+            "37": 933156352.0,
+            "38": 933156352.0,
+            "39": 933156352.0,
+            "40": 933156352.0,
+            "41": 933156352.0,
+            "42": 933156352.0,
+            "43": 933156352.0,
+            "44": 933156352.0,
+            "45": 933156352.0,
+            "46": 933156352.0,
+            "47": 933156352.0,
+            "48": 933156352.0,
+            "49": 933156352.0,
+            "50": 933156352.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 15.67966,
-            "5": 0.28203,
-            "10": 0.27605,
-            "15": 0.28683,
-            "20": 0.2914,
-            "25": 0.28469,
-            "30": 0.2918,
-            "35": 0.28556,
-            "40": 0.28361,
-            "45": 0.28565,
-            "50": 0.28831
+            "1": 17.87202,
+            "2": 0.35495,
+            "3": 0.32873,
+            "4": 0.33459,
+            "5": 0.32873,
+            "6": 0.33081,
+            "7": 0.33232,
+            "8": 0.3289,
+            "9": 0.33298,
+            "10": 0.33358,
+            "11": 0.33283,
+            "12": 0.33379,
+            "13": 0.33111,
+            "14": 0.3333,
+            "15": 0.33177,
+            "16": 0.33147,
+            "17": 0.33096,
+            "18": 0.33187,
+            "19": 0.33163,
+            "20": 0.33051,
+            "21": 0.33361,
+            "22": 0.32835,
+            "23": 0.32736,
+            "24": 0.32984,
+            "25": 0.32922,
+            "26": 0.32419,
+            "27": 0.32825,
+            "28": 0.33117,
+            "29": 0.32926,
+            "30": 0.32943,
+            "31": 0.33565,
+            "32": 0.33382,
+            "33": 0.33313,
+            "34": 0.33602,
+            "35": 0.32634,
+            "36": 0.33173,
+            "37": 0.33173,
+            "38": 0.33145,
+            "39": 0.32666,
+            "40": 0.33039,
+            "41": 0.3278,
+            "42": 0.32774,
+            "43": 0.33361,
+            "44": 0.32996,
+            "45": 0.32769,
+            "46": 0.3288,
+            "47": 0.33016,
+            "48": 0.33102,
+            "49": 0.33052,
+            "50": 0.33008
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_dp_last_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_dp_last_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..3730bf58aa1
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_dp_last_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.86535,
+            "2": 10.85873,
+            "3": 10.86284,
+            "4": 10.84009,
+            "5": 10.87856,
+            "6": 10.88856,
+            "7": 10.86532,
+            "8": 10.86017,
+            "9": 10.8599,
+            "10": 10.82981,
+            "11": 10.8895,
+            "12": 10.8751,
+            "13": 10.87423,
+            "14": 10.89675,
+            "15": 10.82054,
+            "16": 10.82504,
+            "17": 10.78983,
+            "18": 10.81029,
+            "19": 10.80535,
+            "20": 10.70398,
+            "21": 10.66993,
+            "22": 10.50643,
+            "23": 10.69004,
+            "24": 10.56314,
+            "25": 10.4942,
+            "26": 10.56628,
+            "27": 10.58025,
+            "28": 10.51571,
+            "29": 10.55299,
+            "30": 10.30549,
+            "31": 10.02245,
+            "32": 10.40614,
+            "33": 10.39874,
+            "34": 10.13771,
+            "35": 10.20184,
+            "36": 10.16052,
+            "37": 10.28973,
+            "38": 10.11474,
+            "39": 10.361,
+            "40": 10.01903,
+            "41": 10.07292,
+            "42": 10.14698,
+            "43": 9.74687,
+            "44": 9.87766,
+            "45": 9.74966,
+            "46": 9.73383,
+            "47": 10.07535,
+            "48": 9.78068,
+            "49": 9.44784,
+            "50": 9.8399
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 653.0,
+            "2": 642.0,
+            "3": 630.0,
+            "4": 585.0,
+            "5": 635.0,
+            "6": 687.0,
+            "7": 615.0,
+            "8": 601.0,
+            "9": 607.0,
+            "10": 522.0,
+            "11": 637.0,
+            "12": 675.0,
+            "13": 649.0,
+            "14": 648.0,
+            "15": 640.0,
+            "16": 602.0,
+            "17": 668.0,
+            "18": 634.0,
+            "19": 593.0,
+            "20": 579.0,
+            "21": 633.0,
+            "22": 597.0,
+            "23": 756.0,
+            "24": 612.0,
+            "25": 591.0,
+            "26": 620.0,
+            "27": 700.0,
+            "28": 705.0,
+            "29": 795.0,
+            "30": 752.0,
+            "31": 628.0,
+            "32": 712.0,
+            "33": 752.0,
+            "34": 737.0,
+            "35": 741.0,
+            "36": 770.0,
+            "37": 861.0,
+            "38": 823.0,
+            "39": 812.0,
+            "40": 814.0,
+            "41": 826.0,
+            "42": 801.0,
+            "43": 769.0,
+            "44": 822.0,
+            "45": 777.0,
+            "46": 828.0,
+            "47": 878.0,
+            "48": 915.0,
+            "49": 908.0,
+            "50": 848.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 510689792.0,
+            "2": 510689792.0,
+            "3": 510689792.0,
+            "4": 510689792.0,
+            "5": 510689792.0,
+            "6": 510689792.0,
+            "7": 510689792.0,
+            "8": 510689792.0,
+            "9": 510689792.0,
+            "10": 510689792.0,
+            "11": 510689792.0,
+            "12": 510689792.0,
+            "13": 510689792.0,
+            "14": 510689792.0,
+            "15": 510689792.0,
+            "16": 510689792.0,
+            "17": 510689792.0,
+            "18": 510689792.0,
+            "19": 510689792.0,
+            "20": 510689792.0,
+            "21": 510689792.0,
+            "22": 510689792.0,
+            "23": 510689792.0,
+            "24": 510689792.0,
+            "25": 510689792.0,
+            "26": 510689792.0,
+            "27": 510689792.0,
+            "28": 510689792.0,
+            "29": 510689792.0,
+            "30": 510689792.0,
+            "31": 510689792.0,
+            "32": 510689792.0,
+            "33": 510689792.0,
+            "34": 510689792.0,
+            "35": 510689792.0,
+            "36": 510689792.0,
+            "37": 510689792.0,
+            "38": 510689792.0,
+            "39": 510689792.0,
+            "40": 510689792.0,
+            "41": 510689792.0,
+            "42": 510689792.0,
+            "43": 510689792.0,
+            "44": 510689792.0,
+            "45": 510689792.0,
+            "46": 510689792.0,
+            "47": 510689792.0,
+            "48": 510689792.0,
+            "49": 510689792.0,
+            "50": 510689792.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 759895552.0,
+            "2": 933156352.0,
+            "3": 933156352.0,
+            "4": 933156352.0,
+            "5": 933156352.0,
+            "6": 933156352.0,
+            "7": 933156352.0,
+            "8": 933156352.0,
+            "9": 933156352.0,
+            "10": 933156352.0,
+            "11": 933156352.0,
+            "12": 933156352.0,
+            "13": 933156352.0,
+            "14": 933156352.0,
+            "15": 933156352.0,
+            "16": 933156352.0,
+            "17": 933156352.0,
+            "18": 933156352.0,
+            "19": 933156352.0,
+            "20": 933156352.0,
+            "21": 933156352.0,
+            "22": 933156352.0,
+            "23": 933156352.0,
+            "24": 933156352.0,
+            "25": 933156352.0,
+            "26": 933156352.0,
+            "27": 933156352.0,
+            "28": 933156352.0,
+            "29": 933156352.0,
+            "30": 933156352.0,
+            "31": 933156352.0,
+            "32": 933156352.0,
+            "33": 933156352.0,
+            "34": 933156352.0,
+            "35": 933156352.0,
+            "36": 933156352.0,
+            "37": 933156352.0,
+            "38": 934203392.0,
+            "39": 934203392.0,
+            "40": 934203392.0,
+            "41": 934203392.0,
+            "42": 934203392.0,
+            "43": 934203392.0,
+            "44": 934203392.0,
+            "45": 934203392.0,
+            "46": 934203392.0,
+            "47": 934203392.0,
+            "48": 934203392.0,
+            "49": 934203392.0,
+            "50": 934203392.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 18.70462,
+            "2": 0.49178,
+            "3": 0.30373,
+            "4": 0.3001,
+            "5": 0.29469,
+            "6": 0.29224,
+            "7": 0.29428,
+            "8": 0.29177,
+            "9": 0.2949,
+            "10": 0.29498,
+            "11": 0.29024,
+            "12": 0.28647,
+            "13": 0.29815,
+            "14": 0.28835,
+            "15": 0.28856,
+            "16": 0.29348,
+            "17": 0.28749,
+            "18": 0.28567,
+            "19": 0.28368,
+            "20": 0.29149,
+            "21": 0.29096,
+            "22": 0.28857,
+            "23": 0.28606,
+            "24": 0.29136,
+            "25": 0.29054,
+            "26": 0.28694,
+            "27": 0.28152,
+            "28": 0.28851,
+            "29": 0.28838,
+            "30": 0.2819,
+            "31": 0.29168,
+            "32": 0.28475,
+            "33": 0.28928,
+            "34": 0.32279,
+            "35": 0.28586,
+            "36": 0.2887,
+            "37": 0.2901,
+            "38": 0.29895,
+            "39": 0.28981,
+            "40": 0.28651,
+            "41": 0.30755,
+            "42": 0.3078,
+            "43": 0.30107,
+            "44": 0.28402,
+            "45": 0.28696,
+            "46": 0.28819,
+            "47": 0.2889,
+            "48": 0.28688,
+            "49": 0.28638,
+            "50": 0.28429
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_dp_last_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_dp_last_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..cd45ff021d9
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_dp_last_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.86535,
+            "2": 10.85873,
+            "3": 10.86284,
+            "4": 10.84009,
+            "5": 10.87856,
+            "6": 10.88856,
+            "7": 10.86532,
+            "8": 10.86017,
+            "9": 10.8599,
+            "10": 10.82981,
+            "11": 10.8895,
+            "12": 10.8751,
+            "13": 10.87423,
+            "14": 10.89675,
+            "15": 10.82054,
+            "16": 10.82504,
+            "17": 10.78983,
+            "18": 10.81029,
+            "19": 10.80535,
+            "20": 10.70398,
+            "21": 10.66993,
+            "22": 10.50643,
+            "23": 10.69004,
+            "24": 10.56314,
+            "25": 10.4942,
+            "26": 10.56628,
+            "27": 10.58025,
+            "28": 10.51571,
+            "29": 10.55299,
+            "30": 10.30549,
+            "31": 10.02245,
+            "32": 10.40614,
+            "33": 10.39874,
+            "34": 10.13771,
+            "35": 10.20184,
+            "36": 10.16052,
+            "37": 10.28973,
+            "38": 10.11474,
+            "39": 10.361,
+            "40": 10.01903,
+            "41": 10.07292,
+            "42": 10.14698,
+            "43": 9.74687,
+            "44": 9.87766,
+            "45": 9.74966,
+            "46": 9.73383,
+            "47": 10.07535,
+            "48": 9.78068,
+            "49": 9.44784,
+            "50": 9.8399
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 653.0,
+            "2": 642.0,
+            "3": 630.0,
+            "4": 585.0,
+            "5": 635.0,
+            "6": 687.0,
+            "7": 615.0,
+            "8": 601.0,
+            "9": 607.0,
+            "10": 522.0,
+            "11": 637.0,
+            "12": 675.0,
+            "13": 649.0,
+            "14": 648.0,
+            "15": 640.0,
+            "16": 602.0,
+            "17": 668.0,
+            "18": 634.0,
+            "19": 593.0,
+            "20": 579.0,
+            "21": 633.0,
+            "22": 597.0,
+            "23": 756.0,
+            "24": 612.0,
+            "25": 591.0,
+            "26": 620.0,
+            "27": 700.0,
+            "28": 705.0,
+            "29": 795.0,
+            "30": 752.0,
+            "31": 628.0,
+            "32": 712.0,
+            "33": 752.0,
+            "34": 737.0,
+            "35": 741.0,
+            "36": 770.0,
+            "37": 861.0,
+            "38": 823.0,
+            "39": 812.0,
+            "40": 814.0,
+            "41": 826.0,
+            "42": 801.0,
+            "43": 769.0,
+            "44": 822.0,
+            "45": 777.0,
+            "46": 828.0,
+            "47": 878.0,
+            "48": 915.0,
+            "49": 908.0,
+            "50": 848.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 510689792.0,
+            "2": 510689792.0,
+            "3": 510689792.0,
+            "4": 510689792.0,
+            "5": 510689792.0,
+            "6": 510689792.0,
+            "7": 510689792.0,
+            "8": 510689792.0,
+            "9": 510689792.0,
+            "10": 510689792.0,
+            "11": 510689792.0,
+            "12": 510689792.0,
+            "13": 510689792.0,
+            "14": 510689792.0,
+            "15": 510689792.0,
+            "16": 510689792.0,
+            "17": 510689792.0,
+            "18": 510689792.0,
+            "19": 510689792.0,
+            "20": 510689792.0,
+            "21": 510689792.0,
+            "22": 510689792.0,
+            "23": 510689792.0,
+            "24": 510689792.0,
+            "25": 510689792.0,
+            "26": 510689792.0,
+            "27": 510689792.0,
+            "28": 510689792.0,
+            "29": 510689792.0,
+            "30": 510689792.0,
+            "31": 510689792.0,
+            "32": 510689792.0,
+            "33": 510689792.0,
+            "34": 510689792.0,
+            "35": 510689792.0,
+            "36": 510689792.0,
+            "37": 510689792.0,
+            "38": 510689792.0,
+            "39": 510689792.0,
+            "40": 510689792.0,
+            "41": 510689792.0,
+            "42": 510689792.0,
+            "43": 510689792.0,
+            "44": 510689792.0,
+            "45": 510689792.0,
+            "46": 510689792.0,
+            "47": 510689792.0,
+            "48": 510689792.0,
+            "49": 510689792.0,
+            "50": 510689792.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 759895552.0,
+            "2": 933156352.0,
+            "3": 933156352.0,
+            "4": 933156352.0,
+            "5": 933156352.0,
+            "6": 933156352.0,
+            "7": 933156352.0,
+            "8": 933156352.0,
+            "9": 933156352.0,
+            "10": 933156352.0,
+            "11": 933156352.0,
+            "12": 933156352.0,
+            "13": 933156352.0,
+            "14": 933156352.0,
+            "15": 933156352.0,
+            "16": 933156352.0,
+            "17": 933156352.0,
+            "18": 933156352.0,
+            "19": 933156352.0,
+            "20": 933156352.0,
+            "21": 933156352.0,
+            "22": 933156352.0,
+            "23": 933156352.0,
+            "24": 933156352.0,
+            "25": 933156352.0,
+            "26": 933156352.0,
+            "27": 933156352.0,
+            "28": 933156352.0,
+            "29": 933156352.0,
+            "30": 933156352.0,
+            "31": 933156352.0,
+            "32": 933156352.0,
+            "33": 934201856.0,
+            "34": 934201856.0,
+            "35": 934201856.0,
+            "36": 934201856.0,
+            "37": 934201856.0,
+            "38": 934201856.0,
+            "39": 934201856.0,
+            "40": 934201856.0,
+            "41": 934201856.0,
+            "42": 934201856.0,
+            "43": 934201856.0,
+            "44": 934201856.0,
+            "45": 934201856.0,
+            "46": 934201856.0,
+            "47": 934201856.0,
+            "48": 934201856.0,
+            "49": 934201856.0,
+            "50": 934201856.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 17.72917,
+            "2": 0.36269,
+            "3": 0.33585,
+            "4": 0.33878,
+            "5": 0.33758,
+            "6": 0.33453,
+            "7": 0.33628,
+            "8": 0.33416,
+            "9": 0.33309,
+            "10": 0.33521,
+            "11": 0.33536,
+            "12": 0.33148,
+            "13": 0.33565,
+            "14": 0.33401,
+            "15": 0.33029,
+            "16": 0.33788,
+            "17": 0.33302,
+            "18": 0.33337,
+            "19": 0.33761,
+            "20": 0.33672,
+            "21": 0.33256,
+            "22": 0.3374,
+            "23": 0.33652,
+            "24": 0.33672,
+            "25": 0.33982,
+            "26": 0.3335,
+            "27": 0.3328,
+            "28": 0.33835,
+            "29": 0.33338,
+            "30": 0.33371,
+            "31": 0.33991,
+            "32": 0.33259,
+            "33": 0.33537,
+            "34": 0.33777,
+            "35": 0.33494,
+            "36": 0.33504,
+            "37": 0.33915,
+            "38": 0.33462,
+            "39": 0.33387,
+            "40": 0.33791,
+            "41": 0.33426,
+            "42": 0.33834,
+            "43": 0.33785,
+            "44": 0.32761,
+            "45": 0.32857,
+            "46": 0.33205,
+            "47": 0.3355,
+            "48": 0.33535,
+            "49": 0.33792,
+            "50": 0.33613
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
index 7d91181b5b6..7f2dfc8b2bc 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
@@ -2,91 +2,286 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 10.86535,
+            "2": 10.85873,
+            "3": 10.86283,
+            "4": 10.84007,
             "5": 10.87856,
-            "10": 10.82982,
-            "15": 10.82057,
-            "20": 10.70395,
-            "25": 10.49424,
-            "30": 10.30548,
+            "6": 10.88854,
+            "7": 10.86537,
+            "8": 10.86016,
+            "9": 10.85989,
+            "10": 10.82983,
+            "11": 10.88946,
+            "12": 10.8751,
+            "13": 10.87425,
+            "14": 10.89673,
+            "15": 10.82054,
+            "16": 10.82498,
+            "17": 10.78981,
+            "18": 10.81028,
+            "19": 10.80532,
+            "20": 10.70399,
+            "21": 10.66989,
+            "22": 10.50644,
+            "23": 10.69005,
+            "24": 10.56315,
+            "25": 10.49423,
+            "26": 10.56628,
+            "27": 10.58023,
+            "28": 10.51568,
+            "29": 10.55294,
+            "30": 10.30549,
+            "31": 10.02244,
+            "32": 10.40614,
+            "33": 10.39877,
+            "34": 10.13771,
             "35": 10.20187,
-            "40": 10.01905,
-            "45": 9.74965,
-            "50": 9.83993
+            "36": 10.16047,
+            "37": 10.28971,
+            "38": 10.11478,
+            "39": 10.36106,
+            "40": 10.01903,
+            "41": 10.0729,
+            "42": 10.14696,
+            "43": 9.74682,
+            "44": 9.87762,
+            "45": 9.74966,
+            "46": 9.73383,
+            "47": 10.07536,
+            "48": 9.7807,
+            "49": 9.44779,
+            "50": 9.83987
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 628.0,
-            "5": 596.0,
-            "10": 550.0,
-            "15": 668.0,
-            "20": 597.0,
-            "25": 596.0,
-            "30": 721.0,
-            "35": 733.0,
-            "40": 770.0,
-            "45": 787.0,
-            "50": 834.0
+            "1": 603.0,
+            "2": 644.0,
+            "3": 642.0,
+            "4": 665.0,
+            "5": 647.0,
+            "6": 668.0,
+            "7": 615.0,
+            "8": 545.0,
+            "9": 591.0,
+            "10": 540.0,
+            "11": 689.0,
+            "12": 629.0,
+            "13": 696.0,
+            "14": 658.0,
+            "15": 592.0,
+            "16": 672.0,
+            "17": 674.0,
+            "18": 623.0,
+            "19": 635.0,
+            "20": 573.0,
+            "21": 651.0,
+            "22": 625.0,
+            "23": 761.0,
+            "24": 631.0,
+            "25": 593.0,
+            "26": 614.0,
+            "27": 646.0,
+            "28": 744.0,
+            "29": 756.0,
+            "30": 699.0,
+            "31": 600.0,
+            "32": 686.0,
+            "33": 777.0,
+            "34": 734.0,
+            "35": 765.0,
+            "36": 763.0,
+            "37": 876.0,
+            "38": 802.0,
+            "39": 832.0,
+            "40": 788.0,
+            "41": 811.0,
+            "42": 850.0,
+            "43": 765.0,
+            "44": 854.0,
+            "45": 853.0,
+            "46": 878.0,
+            "47": 862.0,
+            "48": 881.0,
+            "49": 859.0,
+            "50": 919.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 510689792.0,
+            "2": 510689792.0,
+            "3": 510689792.0,
+            "4": 510689792.0,
             "5": 510689792.0,
+            "6": 510689792.0,
+            "7": 510689792.0,
+            "8": 510689792.0,
+            "9": 510689792.0,
             "10": 510689792.0,
+            "11": 510689792.0,
+            "12": 510689792.0,
+            "13": 510689792.0,
+            "14": 510689792.0,
             "15": 510689792.0,
+            "16": 510689792.0,
+            "17": 510689792.0,
+            "18": 510689792.0,
+            "19": 510689792.0,
             "20": 510689792.0,
+            "21": 510689792.0,
+            "22": 510689792.0,
+            "23": 510689792.0,
+            "24": 510689792.0,
             "25": 510689792.0,
+            "26": 510689792.0,
+            "27": 510689792.0,
+            "28": 510689792.0,
+            "29": 510689792.0,
             "30": 510689792.0,
+            "31": 510689792.0,
+            "32": 510689792.0,
+            "33": 510689792.0,
+            "34": 510689792.0,
             "35": 510689792.0,
+            "36": 510689792.0,
+            "37": 510689792.0,
+            "38": 510689792.0,
+            "39": 510689792.0,
             "40": 510689792.0,
+            "41": 510689792.0,
+            "42": 510689792.0,
+            "43": 510689792.0,
+            "44": 510689792.0,
             "45": 510689792.0,
+            "46": 510689792.0,
+            "47": 510689792.0,
+            "48": 510689792.0,
+            "49": 510689792.0,
             "50": 510689792.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 759895552.0,
+            "2": 933156352.0,
+            "3": 933156352.0,
+            "4": 933156352.0,
             "5": 933156352.0,
+            "6": 933156352.0,
+            "7": 933156352.0,
+            "8": 933156352.0,
+            "9": 933156352.0,
             "10": 933156352.0,
-            "15": 934202368.0,
-            "20": 934202368.0,
-            "25": 934202368.0,
-            "30": 934202368.0,
-            "35": 934202368.0,
-            "40": 934202368.0,
-            "45": 934202368.0,
-            "50": 934202368.0
+            "11": 933156352.0,
+            "12": 933156352.0,
+            "13": 933156352.0,
+            "14": 933156352.0,
+            "15": 933156352.0,
+            "16": 933156352.0,
+            "17": 933156352.0,
+            "18": 933156352.0,
+            "19": 933156352.0,
+            "20": 933156352.0,
+            "21": 933156352.0,
+            "22": 933156352.0,
+            "23": 933156352.0,
+            "24": 933156352.0,
+            "25": 933156352.0,
+            "26": 933156352.0,
+            "27": 933156352.0,
+            "28": 933156352.0,
+            "29": 933156352.0,
+            "30": 933156352.0,
+            "31": 933156352.0,
+            "32": 933156352.0,
+            "33": 933156352.0,
+            "34": 933156352.0,
+            "35": 933156352.0,
+            "36": 933156352.0,
+            "37": 933156352.0,
+            "38": 933156352.0,
+            "39": 933156352.0,
+            "40": 933156352.0,
+            "41": 933156352.0,
+            "42": 933156352.0,
+            "43": 933156352.0,
+            "44": 933156352.0,
+            "45": 933156352.0,
+            "46": 933156352.0,
+            "47": 933156352.0,
+            "48": 933156352.0,
+            "49": 933156352.0,
+            "50": 933156352.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 18.4128,
-            "5": 0.28948,
-            "10": 0.28908,
-            "15": 0.29449,
-            "20": 0.2915,
-            "25": 0.29014,
-            "30": 0.29089,
-            "35": 0.2912,
-            "40": 0.29097,
-            "45": 0.28976,
-            "50": 0.28881
+            "1": 17.48669,
+            "2": 0.35686,
+            "3": 0.33796,
+            "4": 0.33709,
+            "5": 0.33802,
+            "6": 0.33381,
+            "7": 0.33842,
+            "8": 0.3348,
+            "9": 0.33686,
+            "10": 0.3401,
+            "11": 0.34206,
+            "12": 0.33741,
+            "13": 0.34235,
+            "14": 0.33743,
+            "15": 0.34813,
+            "16": 0.342,
+            "17": 0.33354,
+            "18": 0.33386,
+            "19": 0.32453,
+            "20": 0.31766,
+            "21": 0.31357,
+            "22": 0.3174,
+            "23": 0.31757,
+            "24": 0.31831,
+            "25": 0.3365,
+            "26": 0.33734,
+            "27": 0.33686,
+            "28": 0.32433,
+            "29": 0.3211,
+            "30": 0.31641,
+            "31": 0.32085,
+            "32": 0.32356,
+            "33": 0.31983,
+            "34": 0.31994,
+            "35": 0.32561,
+            "36": 0.3216,
+            "37": 0.31934,
+            "38": 0.31931,
+            "39": 0.32259,
+            "40": 0.31785,
+            "41": 0.321,
+            "42": 0.32432,
+            "43": 0.32102,
+            "44": 0.31762,
+            "45": 0.32401,
+            "46": 0.32061,
+            "47": 0.3186,
+            "48": 0.32263,
+            "49": 0.31974,
+            "50": 0.31888
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..5c64711360d
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.86535,
+            "2": 10.85873,
+            "3": 10.86283,
+            "4": 10.84006,
+            "5": 10.87853,
+            "6": 10.88852,
+            "7": 10.86537,
+            "8": 10.86018,
+            "9": 10.85991,
+            "10": 10.82984,
+            "11": 10.88948,
+            "12": 10.87506,
+            "13": 10.87427,
+            "14": 10.8968,
+            "15": 10.82052,
+            "16": 10.82498,
+            "17": 10.78984,
+            "18": 10.8103,
+            "19": 10.80531,
+            "20": 10.70396,
+            "21": 10.66991,
+            "22": 10.50642,
+            "23": 10.69005,
+            "24": 10.56311,
+            "25": 10.49418,
+            "26": 10.56624,
+            "27": 10.58025,
+            "28": 10.51574,
+            "29": 10.55295,
+            "30": 10.3055,
+            "31": 10.0225,
+            "32": 10.40617,
+            "33": 10.39874,
+            "34": 10.13767,
+            "35": 10.20188,
+            "36": 10.16051,
+            "37": 10.28971,
+            "38": 10.11484,
+            "39": 10.361,
+            "40": 10.01901,
+            "41": 10.07292,
+            "42": 10.14698,
+            "43": 9.74684,
+            "44": 9.87759,
+            "45": 9.74966,
+            "46": 9.73384,
+            "47": 10.07536,
+            "48": 9.78071,
+            "49": 9.44782,
+            "50": 9.83988
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 597.0,
+            "2": 639.0,
+            "3": 612.0,
+            "4": 595.0,
+            "5": 633.0,
+            "6": 679.0,
+            "7": 626.0,
+            "8": 555.0,
+            "9": 700.0,
+            "10": 529.0,
+            "11": 658.0,
+            "12": 622.0,
+            "13": 660.0,
+            "14": 622.0,
+            "15": 690.0,
+            "16": 639.0,
+            "17": 671.0,
+            "18": 653.0,
+            "19": 595.0,
+            "20": 584.0,
+            "21": 656.0,
+            "22": 560.0,
+            "23": 743.0,
+            "24": 616.0,
+            "25": 626.0,
+            "26": 623.0,
+            "27": 680.0,
+            "28": 680.0,
+            "29": 750.0,
+            "30": 690.0,
+            "31": 560.0,
+            "32": 794.0,
+            "33": 753.0,
+            "34": 693.0,
+            "35": 696.0,
+            "36": 760.0,
+            "37": 852.0,
+            "38": 792.0,
+            "39": 849.0,
+            "40": 773.0,
+            "41": 842.0,
+            "42": 798.0,
+            "43": 732.0,
+            "44": 751.0,
+            "45": 788.0,
+            "46": 834.0,
+            "47": 853.0,
+            "48": 888.0,
+            "49": 919.0,
+            "50": 813.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 510689792.0,
+            "2": 510689792.0,
+            "3": 510689792.0,
+            "4": 510689792.0,
+            "5": 510689792.0,
+            "6": 510689792.0,
+            "7": 510689792.0,
+            "8": 510689792.0,
+            "9": 510689792.0,
+            "10": 510689792.0,
+            "11": 510689792.0,
+            "12": 510689792.0,
+            "13": 510689792.0,
+            "14": 510689792.0,
+            "15": 510689792.0,
+            "16": 510689792.0,
+            "17": 510689792.0,
+            "18": 510689792.0,
+            "19": 510689792.0,
+            "20": 510689792.0,
+            "21": 510689792.0,
+            "22": 510689792.0,
+            "23": 510689792.0,
+            "24": 510689792.0,
+            "25": 510689792.0,
+            "26": 510689792.0,
+            "27": 510689792.0,
+            "28": 510689792.0,
+            "29": 510689792.0,
+            "30": 510689792.0,
+            "31": 510689792.0,
+            "32": 510689792.0,
+            "33": 510689792.0,
+            "34": 510689792.0,
+            "35": 510689792.0,
+            "36": 510689792.0,
+            "37": 510689792.0,
+            "38": 510689792.0,
+            "39": 510689792.0,
+            "40": 510689792.0,
+            "41": 510689792.0,
+            "42": 510689792.0,
+            "43": 510689792.0,
+            "44": 510689792.0,
+            "45": 510689792.0,
+            "46": 510689792.0,
+            "47": 510689792.0,
+            "48": 510689792.0,
+            "49": 510689792.0,
+            "50": 510689792.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 759895552.0,
+            "2": 933156352.0,
+            "3": 933156352.0,
+            "4": 933156352.0,
+            "5": 933156352.0,
+            "6": 933156352.0,
+            "7": 933156352.0,
+            "8": 933156352.0,
+            "9": 933156352.0,
+            "10": 934204928.0,
+            "11": 934204928.0,
+            "12": 934204928.0,
+            "13": 934204928.0,
+            "14": 934204928.0,
+            "15": 934204928.0,
+            "16": 934204928.0,
+            "17": 934204928.0,
+            "18": 934204928.0,
+            "19": 934204928.0,
+            "20": 934204928.0,
+            "21": 934204928.0,
+            "22": 934204928.0,
+            "23": 934204928.0,
+            "24": 934204928.0,
+            "25": 934204928.0,
+            "26": 934204928.0,
+            "27": 934204928.0,
+            "28": 934204928.0,
+            "29": 934204928.0,
+            "30": 934204928.0,
+            "31": 934204928.0,
+            "32": 934204928.0,
+            "33": 934204928.0,
+            "34": 934204928.0,
+            "35": 934204928.0,
+            "36": 934204928.0,
+            "37": 934204928.0,
+            "38": 934204928.0,
+            "39": 934204928.0,
+            "40": 934204928.0,
+            "41": 934204928.0,
+            "42": 934204928.0,
+            "43": 934204928.0,
+            "44": 934204928.0,
+            "45": 934204928.0,
+            "46": 934204928.0,
+            "47": 934204928.0,
+            "48": 934204928.0,
+            "49": 934204928.0,
+            "50": 934204928.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 18.56725,
+            "2": 0.36563,
+            "3": 0.29793,
+            "4": 0.29146,
+            "5": 0.29688,
+            "6": 0.29337,
+            "7": 0.29262,
+            "8": 0.28985,
+            "9": 0.29835,
+            "10": 0.32046,
+            "11": 0.28909,
+            "12": 0.29047,
+            "13": 0.29281,
+            "14": 0.29357,
+            "15": 0.29127,
+            "16": 0.29335,
+            "17": 0.29304,
+            "18": 0.29416,
+            "19": 0.29357,
+            "20": 0.29492,
+            "21": 0.28986,
+            "22": 0.29152,
+            "23": 0.29187,
+            "24": 0.29293,
+            "25": 0.28805,
+            "26": 0.28928,
+            "27": 0.28866,
+            "28": 0.29096,
+            "29": 0.28896,
+            "30": 0.2822,
+            "31": 0.31729,
+            "32": 0.28381,
+            "33": 0.28187,
+            "34": 0.28158,
+            "35": 0.28315,
+            "36": 0.28905,
+            "37": 0.28877,
+            "38": 0.29206,
+            "39": 0.28679,
+            "40": 0.28818,
+            "41": 0.28755,
+            "42": 0.28911,
+            "43": 0.28782,
+            "44": 0.28493,
+            "45": 0.28392,
+            "46": 0.28061,
+            "47": 0.29507,
+            "48": 0.28442,
+            "49": 0.28204,
+            "50": 0.28301
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..524007ed7d6
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.86535,
+            "2": 10.85873,
+            "3": 10.86283,
+            "4": 10.84007,
+            "5": 10.87854,
+            "6": 10.88853,
+            "7": 10.86532,
+            "8": 10.8602,
+            "9": 10.85991,
+            "10": 10.82981,
+            "11": 10.8895,
+            "12": 10.87507,
+            "13": 10.87426,
+            "14": 10.89678,
+            "15": 10.82054,
+            "16": 10.825,
+            "17": 10.7898,
+            "18": 10.8103,
+            "19": 10.80536,
+            "20": 10.70398,
+            "21": 10.66992,
+            "22": 10.50644,
+            "23": 10.69005,
+            "24": 10.5631,
+            "25": 10.49418,
+            "26": 10.56626,
+            "27": 10.58028,
+            "28": 10.51572,
+            "29": 10.55298,
+            "30": 10.30549,
+            "31": 10.02244,
+            "32": 10.40615,
+            "33": 10.3988,
+            "34": 10.13773,
+            "35": 10.20188,
+            "36": 10.1605,
+            "37": 10.28974,
+            "38": 10.11477,
+            "39": 10.36102,
+            "40": 10.01902,
+            "41": 10.07292,
+            "42": 10.14694,
+            "43": 9.74685,
+            "44": 9.87766,
+            "45": 9.74965,
+            "46": 9.73384,
+            "47": 10.07535,
+            "48": 9.7807,
+            "49": 9.44783,
+            "50": 9.83991
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 647.0,
+            "2": 614.0,
+            "3": 640.0,
+            "4": 603.0,
+            "5": 600.0,
+            "6": 683.0,
+            "7": 630.0,
+            "8": 565.0,
+            "9": 671.0,
+            "10": 531.0,
+            "11": 670.0,
+            "12": 643.0,
+            "13": 626.0,
+            "14": 635.0,
+            "15": 655.0,
+            "16": 643.0,
+            "17": 693.0,
+            "18": 634.0,
+            "19": 648.0,
+            "20": 644.0,
+            "21": 690.0,
+            "22": 606.0,
+            "23": 694.0,
+            "24": 565.0,
+            "25": 605.0,
+            "26": 636.0,
+            "27": 638.0,
+            "28": 721.0,
+            "29": 750.0,
+            "30": 760.0,
+            "31": 572.0,
+            "32": 705.0,
+            "33": 816.0,
+            "34": 737.0,
+            "35": 720.0,
+            "36": 710.0,
+            "37": 862.0,
+            "38": 763.0,
+            "39": 909.0,
+            "40": 795.0,
+            "41": 776.0,
+            "42": 858.0,
+            "43": 771.0,
+            "44": 858.0,
+            "45": 857.0,
+            "46": 864.0,
+            "47": 880.0,
+            "48": 923.0,
+            "49": 899.0,
+            "50": 868.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 510689792.0,
+            "2": 510689792.0,
+            "3": 510689792.0,
+            "4": 510689792.0,
+            "5": 510689792.0,
+            "6": 510689792.0,
+            "7": 510689792.0,
+            "8": 510689792.0,
+            "9": 510689792.0,
+            "10": 510689792.0,
+            "11": 510689792.0,
+            "12": 510689792.0,
+            "13": 510689792.0,
+            "14": 510689792.0,
+            "15": 510689792.0,
+            "16": 510689792.0,
+            "17": 510689792.0,
+            "18": 510689792.0,
+            "19": 510689792.0,
+            "20": 510689792.0,
+            "21": 510689792.0,
+            "22": 510689792.0,
+            "23": 510689792.0,
+            "24": 510689792.0,
+            "25": 510689792.0,
+            "26": 510689792.0,
+            "27": 510689792.0,
+            "28": 510689792.0,
+            "29": 510689792.0,
+            "30": 510689792.0,
+            "31": 510689792.0,
+            "32": 510689792.0,
+            "33": 510689792.0,
+            "34": 510689792.0,
+            "35": 510689792.0,
+            "36": 510689792.0,
+            "37": 510689792.0,
+            "38": 510689792.0,
+            "39": 510689792.0,
+            "40": 510689792.0,
+            "41": 510689792.0,
+            "42": 510689792.0,
+            "43": 510689792.0,
+            "44": 510689792.0,
+            "45": 510689792.0,
+            "46": 510689792.0,
+            "47": 510689792.0,
+            "48": 510689792.0,
+            "49": 510689792.0,
+            "50": 510689792.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 757801472.0,
+            "2": 933156352.0,
+            "3": 933156352.0,
+            "4": 933156352.0,
+            "5": 933156352.0,
+            "6": 933156352.0,
+            "7": 933156352.0,
+            "8": 933156352.0,
+            "9": 933156352.0,
+            "10": 933156352.0,
+            "11": 933156352.0,
+            "12": 933156352.0,
+            "13": 933156352.0,
+            "14": 933156352.0,
+            "15": 933156352.0,
+            "16": 933156352.0,
+            "17": 933156352.0,
+            "18": 933156352.0,
+            "19": 933156352.0,
+            "20": 933156352.0,
+            "21": 933156352.0,
+            "22": 933156352.0,
+            "23": 933156352.0,
+            "24": 933156352.0,
+            "25": 933156352.0,
+            "26": 933156352.0,
+            "27": 933156352.0,
+            "28": 933156352.0,
+            "29": 933156352.0,
+            "30": 933156352.0,
+            "31": 933156352.0,
+            "32": 933156352.0,
+            "33": 933156352.0,
+            "34": 933156352.0,
+            "35": 933156352.0,
+            "36": 933156352.0,
+            "37": 933156352.0,
+            "38": 933156352.0,
+            "39": 933156352.0,
+            "40": 933156352.0,
+            "41": 933156352.0,
+            "42": 933156352.0,
+            "43": 933156352.0,
+            "44": 933156352.0,
+            "45": 933156352.0,
+            "46": 933156352.0,
+            "47": 933156352.0,
+            "48": 933156352.0,
+            "49": 933156352.0,
+            "50": 933156352.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 17.58309,
+            "2": 0.34736,
+            "3": 0.32683,
+            "4": 0.3279,
+            "5": 0.32934,
+            "6": 0.33179,
+            "7": 0.3281,
+            "8": 0.3324,
+            "9": 0.32989,
+            "10": 0.32742,
+            "11": 0.33009,
+            "12": 0.3345,
+            "13": 0.33455,
+            "14": 0.3346,
+            "15": 0.33747,
+            "16": 0.33625,
+            "17": 0.3454,
+            "18": 0.33586,
+            "19": 0.33227,
+            "20": 0.33242,
+            "21": 0.33093,
+            "22": 0.33378,
+            "23": 0.33439,
+            "24": 0.33159,
+            "25": 0.32826,
+            "26": 0.33259,
+            "27": 0.33154,
+            "28": 0.32855,
+            "29": 0.32973,
+            "30": 0.33267,
+            "31": 0.33156,
+            "32": 0.32832,
+            "33": 0.33304,
+            "34": 0.32817,
+            "35": 0.32993,
+            "36": 0.33154,
+            "37": 0.32842,
+            "38": 0.32508,
+            "39": 0.33067,
+            "40": 0.33115,
+            "41": 0.32719,
+            "42": 0.33205,
+            "43": 0.3472,
+            "44": 0.33564,
+            "45": 0.33202,
+            "46": 0.33051,
+            "47": 0.32871,
+            "48": 0.33055,
+            "49": 0.33399,
+            "50": 0.33114
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
new file mode 100644
index 00000000000..14cd1d474ea
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.93292,
+            "2": 10.93423,
+            "3": 10.91347,
+            "4": 10.90321,
+            "5": 10.92968,
+            "6": 10.93655,
+            "7": 10.90282,
+            "8": 10.92114,
+            "9": 10.9071,
+            "10": 10.90475,
+            "11": 10.88788,
+            "12": 10.91736,
+            "13": 10.91189,
+            "14": 10.91506,
+            "15": 10.87125,
+            "16": 10.86126,
+            "17": 10.82696,
+            "18": 10.85678,
+            "19": 10.84055,
+            "20": 10.75,
+            "21": 10.71504,
+            "22": 10.58118,
+            "23": 10.72644,
+            "24": 10.60729,
+            "25": 10.53753,
+            "26": 10.61069,
+            "27": 10.5993,
+            "28": 10.54958,
+            "29": 10.56602,
+            "30": 10.32554,
+            "31": 10.06693,
+            "32": 10.4381,
+            "33": 10.42361,
+            "34": 10.16014,
+            "35": 10.22895,
+            "36": 10.17612,
+            "37": 10.29235,
+            "38": 10.13293,
+            "39": 10.34955,
+            "40": 10.01972,
+            "41": 10.07533,
+            "42": 10.1541,
+            "43": 9.76091,
+            "44": 9.88354,
+            "45": 9.75546,
+            "46": 9.7496,
+            "47": 10.07548,
+            "48": 9.77939,
+            "49": 9.43816,
+            "50": 9.84074
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 602.0,
+            "2": 601.0,
+            "3": 651.0,
+            "4": 566.0,
+            "5": 693.0,
+            "6": 637.0,
+            "7": 601.0,
+            "8": 628.0,
+            "9": 593.0,
+            "10": 579.0,
+            "11": 685.0,
+            "12": 630.0,
+            "13": 654.0,
+            "14": 624.0,
+            "15": 569.0,
+            "16": 630.0,
+            "17": 623.0,
+            "18": 588.0,
+            "19": 594.0,
+            "20": 599.0,
+            "21": 633.0,
+            "22": 585.0,
+            "23": 642.0,
+            "24": 613.0,
+            "25": 592.0,
+            "26": 662.0,
+            "27": 617.0,
+            "28": 709.0,
+            "29": 691.0,
+            "30": 693.0,
+            "31": 574.0,
+            "32": 708.0,
+            "33": 781.0,
+            "34": 693.0,
+            "35": 712.0,
+            "36": 777.0,
+            "37": 799.0,
+            "38": 765.0,
+            "39": 865.0,
+            "40": 811.0,
+            "41": 795.0,
+            "42": 818.0,
+            "43": 730.0,
+            "44": 730.0,
+            "45": 781.0,
+            "46": 788.0,
+            "47": 884.0,
+            "48": 833.0,
+            "49": 841.0,
+            "50": 839.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 431783936.0,
+            "2": 431783936.0,
+            "3": 431783936.0,
+            "4": 431783936.0,
+            "5": 431783936.0,
+            "6": 431783936.0,
+            "7": 431783936.0,
+            "8": 431783936.0,
+            "9": 431783936.0,
+            "10": 431783936.0,
+            "11": 431783936.0,
+            "12": 431783936.0,
+            "13": 431783936.0,
+            "14": 431783936.0,
+            "15": 431783936.0,
+            "16": 431783936.0,
+            "17": 431783936.0,
+            "18": 431783936.0,
+            "19": 431783936.0,
+            "20": 431783936.0,
+            "21": 431783936.0,
+            "22": 431783936.0,
+            "23": 431783936.0,
+            "24": 431783936.0,
+            "25": 431783936.0,
+            "26": 431783936.0,
+            "27": 431783936.0,
+            "28": 431783936.0,
+            "29": 431783936.0,
+            "30": 431783936.0,
+            "31": 431783936.0,
+            "32": 431783936.0,
+            "33": 431783936.0,
+            "34": 431783936.0,
+            "35": 431783936.0,
+            "36": 431783936.0,
+            "37": 431783936.0,
+            "38": 431783936.0,
+            "39": 431783936.0,
+            "40": 431783936.0,
+            "41": 431783936.0,
+            "42": 431783936.0,
+            "43": 431783936.0,
+            "44": 431783936.0,
+            "45": 431783936.0,
+            "46": 431783936.0,
+            "47": 431783936.0,
+            "48": 431783936.0,
+            "49": 431783936.0,
+            "50": 431783936.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 677335040.0,
+            "2": 853214208.0,
+            "3": 853214208.0,
+            "4": 853214208.0,
+            "5": 854262272.0,
+            "6": 854262272.0,
+            "7": 854262272.0,
+            "8": 854262272.0,
+            "9": 854262272.0,
+            "10": 854262272.0,
+            "11": 854262272.0,
+            "12": 854262272.0,
+            "13": 854262272.0,
+            "14": 854262272.0,
+            "15": 854262784.0,
+            "16": 854262784.0,
+            "17": 854262784.0,
+            "18": 854262784.0,
+            "19": 854262784.0,
+            "20": 854262784.0,
+            "21": 854262784.0,
+            "22": 855309824.0,
+            "23": 855309824.0,
+            "24": 855309824.0,
+            "25": 855309824.0,
+            "26": 855309824.0,
+            "27": 855309824.0,
+            "28": 855309824.0,
+            "29": 855309824.0,
+            "30": 855309824.0,
+            "31": 855309824.0,
+            "32": 855309824.0,
+            "33": 855309824.0,
+            "34": 855309824.0,
+            "35": 855309824.0,
+            "36": 855309824.0,
+            "37": 855309824.0,
+            "38": 855309824.0,
+            "39": 855309824.0,
+            "40": 855309824.0,
+            "41": 855309824.0,
+            "42": 855309824.0,
+            "43": 855309824.0,
+            "44": 855309824.0,
+            "45": 855309824.0,
+            "46": 855309824.0,
+            "47": 855309824.0,
+            "48": 855309824.0,
+            "49": 855309824.0,
+            "50": 855309824.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 17.80821,
+            "2": 0.44808,
+            "3": 0.40988,
+            "4": 0.40164,
+            "5": 0.4125,
+            "6": 0.40088,
+            "7": 0.40048,
+            "8": 0.40898,
+            "9": 0.39981,
+            "10": 0.40981,
+            "11": 0.3988,
+            "12": 0.39912,
+            "13": 0.40567,
+            "14": 0.39849,
+            "15": 0.40867,
+            "16": 0.39758,
+            "17": 0.39933,
+            "18": 0.40941,
+            "19": 0.39811,
+            "20": 0.40972,
+            "21": 0.39879,
+            "22": 0.40217,
+            "23": 0.40454,
+            "24": 0.397,
+            "25": 0.4072,
+            "26": 0.39671,
+            "27": 0.3982,
+            "28": 0.40691,
+            "29": 0.39562,
+            "30": 0.40833,
+            "31": 0.39669,
+            "32": 0.39668,
+            "33": 0.40988,
+            "34": 0.39562,
+            "35": 0.41063,
+            "36": 0.39531,
+            "37": 0.39635,
+            "38": 0.41178,
+            "39": 0.39606,
+            "40": 0.41007,
+            "41": 0.39542,
+            "42": 0.39788,
+            "43": 0.41102,
+            "44": 0.3969,
+            "45": 0.41204,
+            "46": 0.39665,
+            "47": 0.39695,
+            "48": 0.41099,
+            "49": 0.39625,
+            "50": 0.4146
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
new file mode 100644
index 00000000000..9c3dab558ec
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.93292,
+            "2": 10.93423,
+            "3": 10.91345,
+            "4": 10.90321,
+            "5": 10.9297,
+            "6": 10.93657,
+            "7": 10.90281,
+            "8": 10.92116,
+            "9": 10.90702,
+            "10": 10.90475,
+            "11": 10.88789,
+            "12": 10.91738,
+            "13": 10.91188,
+            "14": 10.91509,
+            "15": 10.87126,
+            "16": 10.8613,
+            "17": 10.82702,
+            "18": 10.85677,
+            "19": 10.84056,
+            "20": 10.75001,
+            "21": 10.71508,
+            "22": 10.58113,
+            "23": 10.7264,
+            "24": 10.60734,
+            "25": 10.53754,
+            "26": 10.61068,
+            "27": 10.59932,
+            "28": 10.54956,
+            "29": 10.56601,
+            "30": 10.32552,
+            "31": 10.06698,
+            "32": 10.43809,
+            "33": 10.4236,
+            "34": 10.16018,
+            "35": 10.22896,
+            "36": 10.17616,
+            "37": 10.29237,
+            "38": 10.13292,
+            "39": 10.34956,
+            "40": 10.01975,
+            "41": 10.07535,
+            "42": 10.15409,
+            "43": 9.7609,
+            "44": 9.88356,
+            "45": 9.75543,
+            "46": 9.74958,
+            "47": 10.07545,
+            "48": 9.77939,
+            "49": 9.43818,
+            "50": 9.84071
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 618.0,
+            "2": 622.0,
+            "3": 667.0,
+            "4": 559.0,
+            "5": 671.0,
+            "6": 625.0,
+            "7": 656.0,
+            "8": 584.0,
+            "9": 654.0,
+            "10": 511.0,
+            "11": 690.0,
+            "12": 601.0,
+            "13": 628.0,
+            "14": 654.0,
+            "15": 604.0,
+            "16": 652.0,
+            "17": 646.0,
+            "18": 640.0,
+            "19": 579.0,
+            "20": 532.0,
+            "21": 644.0,
+            "22": 584.0,
+            "23": 649.0,
+            "24": 595.0,
+            "25": 614.0,
+            "26": 621.0,
+            "27": 648.0,
+            "28": 727.0,
+            "29": 683.0,
+            "30": 657.0,
+            "31": 553.0,
+            "32": 700.0,
+            "33": 776.0,
+            "34": 645.0,
+            "35": 729.0,
+            "36": 740.0,
+            "37": 733.0,
+            "38": 740.0,
+            "39": 816.0,
+            "40": 792.0,
+            "41": 769.0,
+            "42": 828.0,
+            "43": 740.0,
+            "44": 784.0,
+            "45": 761.0,
+            "46": 831.0,
+            "47": 833.0,
+            "48": 866.0,
+            "49": 819.0,
+            "50": 876.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 431783936.0,
+            "2": 431783936.0,
+            "3": 431783936.0,
+            "4": 431783936.0,
+            "5": 431783936.0,
+            "6": 431783936.0,
+            "7": 431783936.0,
+            "8": 431783936.0,
+            "9": 431783936.0,
+            "10": 431783936.0,
+            "11": 431783936.0,
+            "12": 431783936.0,
+            "13": 431783936.0,
+            "14": 431783936.0,
+            "15": 431783936.0,
+            "16": 431783936.0,
+            "17": 431783936.0,
+            "18": 431783936.0,
+            "19": 431783936.0,
+            "20": 431783936.0,
+            "21": 431783936.0,
+            "22": 431783936.0,
+            "23": 431783936.0,
+            "24": 431783936.0,
+            "25": 431783936.0,
+            "26": 431783936.0,
+            "27": 431783936.0,
+            "28": 431783936.0,
+            "29": 431783936.0,
+            "30": 431783936.0,
+            "31": 431783936.0,
+            "32": 431783936.0,
+            "33": 431783936.0,
+            "34": 431783936.0,
+            "35": 431783936.0,
+            "36": 431783936.0,
+            "37": 431783936.0,
+            "38": 431783936.0,
+            "39": 431783936.0,
+            "40": 431783936.0,
+            "41": 431783936.0,
+            "42": 431783936.0,
+            "43": 431783936.0,
+            "44": 431783936.0,
+            "45": 431783936.0,
+            "46": 431783936.0,
+            "47": 431783936.0,
+            "48": 431783936.0,
+            "49": 431783936.0,
+            "50": 431783936.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 678382080.0,
+            "2": 854262784.0,
+            "3": 854262784.0,
+            "4": 855310848.0,
+            "5": 855310848.0,
+            "6": 855310848.0,
+            "7": 855310848.0,
+            "8": 855310848.0,
+            "9": 855310848.0,
+            "10": 855310848.0,
+            "11": 855310848.0,
+            "12": 855310848.0,
+            "13": 855310848.0,
+            "14": 855310848.0,
+            "15": 855310848.0,
+            "16": 855310848.0,
+            "17": 855310848.0,
+            "18": 855310848.0,
+            "19": 855310848.0,
+            "20": 855310848.0,
+            "21": 855310848.0,
+            "22": 855310848.0,
+            "23": 855310848.0,
+            "24": 855310848.0,
+            "25": 855310848.0,
+            "26": 855310848.0,
+            "27": 855310848.0,
+            "28": 855310848.0,
+            "29": 855310848.0,
+            "30": 855311360.0,
+            "31": 855311360.0,
+            "32": 855311360.0,
+            "33": 855311360.0,
+            "34": 855311360.0,
+            "35": 855311360.0,
+            "36": 855311360.0,
+            "37": 855311360.0,
+            "38": 855311360.0,
+            "39": 855311360.0,
+            "40": 855311360.0,
+            "41": 855311360.0,
+            "42": 855311360.0,
+            "43": 855311360.0,
+            "44": 855311360.0,
+            "45": 855311360.0,
+            "46": 855311360.0,
+            "47": 855311360.0,
+            "48": 855311360.0,
+            "49": 855311360.0,
+            "50": 855311360.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 20.59672,
+            "2": 0.48034,
+            "3": 0.40738,
+            "4": 0.42161,
+            "5": 0.40858,
+            "6": 0.39543,
+            "7": 0.40287,
+            "8": 0.3966,
+            "9": 0.41138,
+            "10": 0.3986,
+            "11": 0.39331,
+            "12": 0.40756,
+            "13": 0.3935,
+            "14": 0.40339,
+            "15": 0.39322,
+            "16": 0.38875,
+            "17": 0.3989,
+            "18": 0.39441,
+            "19": 0.4034,
+            "20": 0.39017,
+            "21": 0.39088,
+            "22": 0.40266,
+            "23": 0.39396,
+            "24": 0.40055,
+            "25": 0.39308,
+            "26": 0.38936,
+            "27": 0.40304,
+            "28": 0.40539,
+            "29": 0.39709,
+            "30": 0.39502,
+            "31": 0.3928,
+            "32": 0.40816,
+            "33": 0.39533,
+            "34": 0.39686,
+            "35": 0.39825,
+            "36": 0.39554,
+            "37": 0.40729,
+            "38": 0.39634,
+            "39": 0.39853,
+            "40": 0.39904,
+            "41": 0.39615,
+            "42": 0.40732,
+            "43": 0.39538,
+            "44": 0.40115,
+            "45": 0.40237,
+            "46": 0.40262,
+            "47": 0.6094,
+            "48": 0.396,
+            "49": 0.40787,
+            "50": 0.3942
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
index 7bc5d3556fa..bb6bba8ed0e 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
@@ -2,91 +2,286 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 10.86535,
-            "5": 10.87856,
+            "2": 10.85873,
+            "3": 10.8628,
+            "4": 10.84009,
+            "5": 10.87853,
+            "6": 10.88854,
+            "7": 10.86533,
+            "8": 10.86016,
+            "9": 10.85986,
             "10": 10.82978,
-            "15": 10.8205,
-            "20": 10.70397,
-            "25": 10.49419,
-            "30": 10.30553,
-            "35": 10.20189,
-            "40": 10.019,
-            "45": 9.74966,
+            "11": 10.88951,
+            "12": 10.8751,
+            "13": 10.87423,
+            "14": 10.89676,
+            "15": 10.82054,
+            "16": 10.82498,
+            "17": 10.78983,
+            "18": 10.8103,
+            "19": 10.80532,
+            "20": 10.70395,
+            "21": 10.66992,
+            "22": 10.50638,
+            "23": 10.69003,
+            "24": 10.5631,
+            "25": 10.4942,
+            "26": 10.56628,
+            "27": 10.58022,
+            "28": 10.51569,
+            "29": 10.55298,
+            "30": 10.30552,
+            "31": 10.02248,
+            "32": 10.40616,
+            "33": 10.39876,
+            "34": 10.13775,
+            "35": 10.20182,
+            "36": 10.16045,
+            "37": 10.28971,
+            "38": 10.11479,
+            "39": 10.36102,
+            "40": 10.01903,
+            "41": 10.07292,
+            "42": 10.14694,
+            "43": 9.74688,
+            "44": 9.87761,
+            "45": 9.74964,
+            "46": 9.73382,
+            "47": 10.07536,
+            "48": 9.78068,
+            "49": 9.44785,
             "50": 9.8399
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 592.0,
-            "5": 682.0,
-            "10": 528.0,
-            "15": 610.0,
-            "20": 610.0,
-            "25": 585.0,
-            "30": 690.0,
-            "35": 743.0,
-            "40": 769.0,
-            "45": 776.0,
-            "50": 793.0
+            "1": 575.0,
+            "2": 661.0,
+            "3": 612.0,
+            "4": 601.0,
+            "5": 654.0,
+            "6": 680.0,
+            "7": 639.0,
+            "8": 567.0,
+            "9": 683.0,
+            "10": 559.0,
+            "11": 618.0,
+            "12": 620.0,
+            "13": 668.0,
+            "14": 681.0,
+            "15": 642.0,
+            "16": 637.0,
+            "17": 645.0,
+            "18": 610.0,
+            "19": 622.0,
+            "20": 611.0,
+            "21": 667.0,
+            "22": 590.0,
+            "23": 734.0,
+            "24": 615.0,
+            "25": 598.0,
+            "26": 634.0,
+            "27": 667.0,
+            "28": 675.0,
+            "29": 769.0,
+            "30": 715.0,
+            "31": 607.0,
+            "32": 763.0,
+            "33": 814.0,
+            "34": 694.0,
+            "35": 713.0,
+            "36": 780.0,
+            "37": 817.0,
+            "38": 759.0,
+            "39": 886.0,
+            "40": 790.0,
+            "41": 758.0,
+            "42": 895.0,
+            "43": 763.0,
+            "44": 846.0,
+            "45": 765.0,
+            "46": 822.0,
+            "47": 882.0,
+            "48": 890.0,
+            "49": 875.0,
+            "50": 829.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 510689792.0,
+            "2": 510689792.0,
+            "3": 510689792.0,
+            "4": 510689792.0,
             "5": 510689792.0,
+            "6": 510689792.0,
+            "7": 510689792.0,
+            "8": 510689792.0,
+            "9": 510689792.0,
             "10": 510689792.0,
+            "11": 510689792.0,
+            "12": 510689792.0,
+            "13": 510689792.0,
+            "14": 510689792.0,
             "15": 510689792.0,
+            "16": 510689792.0,
+            "17": 510689792.0,
+            "18": 510689792.0,
+            "19": 510689792.0,
             "20": 510689792.0,
+            "21": 510689792.0,
+            "22": 510689792.0,
+            "23": 510689792.0,
+            "24": 510689792.0,
             "25": 510689792.0,
+            "26": 510689792.0,
+            "27": 510689792.0,
+            "28": 510689792.0,
+            "29": 510689792.0,
             "30": 510689792.0,
+            "31": 510689792.0,
+            "32": 510689792.0,
+            "33": 510689792.0,
+            "34": 510689792.0,
             "35": 510689792.0,
+            "36": 510689792.0,
+            "37": 510689792.0,
+            "38": 510689792.0,
+            "39": 510689792.0,
             "40": 510689792.0,
+            "41": 510689792.0,
+            "42": 510689792.0,
+            "43": 510689792.0,
+            "44": 510689792.0,
             "45": 510689792.0,
+            "46": 510689792.0,
+            "47": 510689792.0,
+            "48": 510689792.0,
+            "49": 510689792.0,
             "50": 510689792.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 759895552.0,
-            "5": 933156352.0,
-            "10": 933156352.0,
-            "15": 933156352.0,
-            "20": 933156352.0,
-            "25": 933156352.0,
-            "30": 933156352.0,
-            "35": 934204928.0,
-            "40": 934204928.0,
-            "45": 934204928.0,
-            "50": 934204928.0
+            "2": 934203904.0,
+            "3": 934203904.0,
+            "4": 934203904.0,
+            "5": 934203904.0,
+            "6": 934203904.0,
+            "7": 934203904.0,
+            "8": 934203904.0,
+            "9": 934203904.0,
+            "10": 934203904.0,
+            "11": 934203904.0,
+            "12": 934203904.0,
+            "13": 934203904.0,
+            "14": 934203904.0,
+            "15": 934203904.0,
+            "16": 934203904.0,
+            "17": 934203904.0,
+            "18": 934203904.0,
+            "19": 934203904.0,
+            "20": 934203904.0,
+            "21": 934203904.0,
+            "22": 934203904.0,
+            "23": 934203904.0,
+            "24": 934203904.0,
+            "25": 934203904.0,
+            "26": 934203904.0,
+            "27": 934203904.0,
+            "28": 934203904.0,
+            "29": 934203904.0,
+            "30": 934203904.0,
+            "31": 934203904.0,
+            "32": 934203904.0,
+            "33": 934203904.0,
+            "34": 934203904.0,
+            "35": 934203904.0,
+            "36": 934203904.0,
+            "37": 934203904.0,
+            "38": 934203904.0,
+            "39": 934203904.0,
+            "40": 934203904.0,
+            "41": 934203904.0,
+            "42": 934203904.0,
+            "43": 934203904.0,
+            "44": 934203904.0,
+            "45": 934203904.0,
+            "46": 934203904.0,
+            "47": 934203904.0,
+            "48": 934203904.0,
+            "49": 934203904.0,
+            "50": 934203904.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 16.08421,
-            "5": 0.28702,
-            "10": 0.28776,
-            "15": 0.28313,
-            "20": 0.29045,
-            "25": 0.28998,
-            "30": 0.29456,
-            "35": 0.28602,
-            "40": 0.29367,
-            "45": 0.28709,
-            "50": 0.2778
+            "1": 15.70977,
+            "2": 0.39393,
+            "3": 0.33447,
+            "4": 0.34165,
+            "5": 0.33487,
+            "6": 0.33525,
+            "7": 0.33869,
+            "8": 0.33407,
+            "9": 0.32508,
+            "10": 0.32918,
+            "11": 0.32205,
+            "12": 0.32514,
+            "13": 0.32309,
+            "14": 0.32866,
+            "15": 0.32578,
+            "16": 0.32709,
+            "17": 0.32494,
+            "18": 0.3252,
+            "19": 0.32806,
+            "20": 0.32441,
+            "21": 0.32296,
+            "22": 0.32925,
+            "23": 0.32839,
+            "24": 0.32762,
+            "25": 0.33125,
+            "26": 0.3356,
+            "27": 0.32827,
+            "28": 0.32644,
+            "29": 0.32972,
+            "30": 0.32228,
+            "31": 0.3298,
+            "32": 0.32343,
+            "33": 0.32498,
+            "34": 0.32618,
+            "35": 0.32714,
+            "36": 0.32467,
+            "37": 0.32506,
+            "38": 0.32635,
+            "39": 0.3247,
+            "40": 0.32635,
+            "41": 0.32613,
+            "42": 0.32304,
+            "43": 0.32555,
+            "44": 0.32911,
+            "45": 0.3247,
+            "46": 0.32199,
+            "47": 0.32475,
+            "48": 0.32466,
+            "49": 0.32582,
+            "50": 0.32505
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..8e79ecc164b
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.86535,
+            "2": 10.85873,
+            "3": 10.86283,
+            "4": 10.84004,
+            "5": 10.87856,
+            "6": 10.88851,
+            "7": 10.86535,
+            "8": 10.86016,
+            "9": 10.8599,
+            "10": 10.8298,
+            "11": 10.88949,
+            "12": 10.87507,
+            "13": 10.87424,
+            "14": 10.89675,
+            "15": 10.82057,
+            "16": 10.82503,
+            "17": 10.7898,
+            "18": 10.81025,
+            "19": 10.80535,
+            "20": 10.70398,
+            "21": 10.6699,
+            "22": 10.50643,
+            "23": 10.69004,
+            "24": 10.5631,
+            "25": 10.49418,
+            "26": 10.56626,
+            "27": 10.58022,
+            "28": 10.5157,
+            "29": 10.55297,
+            "30": 10.30551,
+            "31": 10.02249,
+            "32": 10.40617,
+            "33": 10.3988,
+            "34": 10.13771,
+            "35": 10.20187,
+            "36": 10.16052,
+            "37": 10.28969,
+            "38": 10.11482,
+            "39": 10.36105,
+            "40": 10.01899,
+            "41": 10.0729,
+            "42": 10.14695,
+            "43": 9.74686,
+            "44": 9.87766,
+            "45": 9.74967,
+            "46": 9.73385,
+            "47": 10.07539,
+            "48": 9.7807,
+            "49": 9.4478,
+            "50": 9.83992
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 594.0,
+            "2": 655.0,
+            "3": 626.0,
+            "4": 604.0,
+            "5": 612.0,
+            "6": 667.0,
+            "7": 653.0,
+            "8": 575.0,
+            "9": 673.0,
+            "10": 542.0,
+            "11": 672.0,
+            "12": 584.0,
+            "13": 616.0,
+            "14": 673.0,
+            "15": 695.0,
+            "16": 655.0,
+            "17": 640.0,
+            "18": 640.0,
+            "19": 637.0,
+            "20": 601.0,
+            "21": 680.0,
+            "22": 565.0,
+            "23": 706.0,
+            "24": 615.0,
+            "25": 603.0,
+            "26": 591.0,
+            "27": 653.0,
+            "28": 696.0,
+            "29": 781.0,
+            "30": 767.0,
+            "31": 608.0,
+            "32": 740.0,
+            "33": 839.0,
+            "34": 727.0,
+            "35": 729.0,
+            "36": 720.0,
+            "37": 821.0,
+            "38": 818.0,
+            "39": 826.0,
+            "40": 750.0,
+            "41": 855.0,
+            "42": 871.0,
+            "43": 719.0,
+            "44": 838.0,
+            "45": 761.0,
+            "46": 886.0,
+            "47": 852.0,
+            "48": 876.0,
+            "49": 905.0,
+            "50": 872.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 510689792.0,
+            "2": 510689792.0,
+            "3": 510689792.0,
+            "4": 510689792.0,
+            "5": 510689792.0,
+            "6": 510689792.0,
+            "7": 510689792.0,
+            "8": 510689792.0,
+            "9": 510689792.0,
+            "10": 510689792.0,
+            "11": 510689792.0,
+            "12": 510689792.0,
+            "13": 510689792.0,
+            "14": 510689792.0,
+            "15": 510689792.0,
+            "16": 510689792.0,
+            "17": 510689792.0,
+            "18": 510689792.0,
+            "19": 510689792.0,
+            "20": 510689792.0,
+            "21": 510689792.0,
+            "22": 510689792.0,
+            "23": 510689792.0,
+            "24": 510689792.0,
+            "25": 510689792.0,
+            "26": 510689792.0,
+            "27": 510689792.0,
+            "28": 510689792.0,
+            "29": 510689792.0,
+            "30": 510689792.0,
+            "31": 510689792.0,
+            "32": 510689792.0,
+            "33": 510689792.0,
+            "34": 510689792.0,
+            "35": 510689792.0,
+            "36": 510689792.0,
+            "37": 510689792.0,
+            "38": 510689792.0,
+            "39": 510689792.0,
+            "40": 510689792.0,
+            "41": 510689792.0,
+            "42": 510689792.0,
+            "43": 510689792.0,
+            "44": 510689792.0,
+            "45": 510689792.0,
+            "46": 510689792.0,
+            "47": 510689792.0,
+            "48": 510689792.0,
+            "49": 510689792.0,
+            "50": 510689792.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 759895552.0,
+            "2": 933156352.0,
+            "3": 933156352.0,
+            "4": 934202368.0,
+            "5": 934202368.0,
+            "6": 934202368.0,
+            "7": 934202368.0,
+            "8": 934202368.0,
+            "9": 934202368.0,
+            "10": 934202368.0,
+            "11": 934202368.0,
+            "12": 934202368.0,
+            "13": 934202368.0,
+            "14": 934202368.0,
+            "15": 934202368.0,
+            "16": 934202368.0,
+            "17": 934202368.0,
+            "18": 934202368.0,
+            "19": 934202368.0,
+            "20": 934202368.0,
+            "21": 934202368.0,
+            "22": 934202368.0,
+            "23": 934202368.0,
+            "24": 934202368.0,
+            "25": 934202368.0,
+            "26": 934202368.0,
+            "27": 934202368.0,
+            "28": 934202368.0,
+            "29": 934202368.0,
+            "30": 934202368.0,
+            "31": 934202368.0,
+            "32": 934202368.0,
+            "33": 934202368.0,
+            "34": 934202368.0,
+            "35": 934202368.0,
+            "36": 934202368.0,
+            "37": 934202368.0,
+            "38": 934202368.0,
+            "39": 934202368.0,
+            "40": 934202368.0,
+            "41": 934202368.0,
+            "42": 934202368.0,
+            "43": 934202368.0,
+            "44": 934202368.0,
+            "45": 934202368.0,
+            "46": 934202368.0,
+            "47": 934202368.0,
+            "48": 934202368.0,
+            "49": 934202368.0,
+            "50": 934202368.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 16.29804,
+            "2": 0.33247,
+            "3": 0.3002,
+            "4": 0.29387,
+            "5": 0.28202,
+            "6": 0.28144,
+            "7": 0.28667,
+            "8": 0.28202,
+            "9": 0.28668,
+            "10": 0.28475,
+            "11": 0.28037,
+            "12": 0.28061,
+            "13": 0.28479,
+            "14": 0.28709,
+            "15": 0.28259,
+            "16": 0.28648,
+            "17": 0.28752,
+            "18": 0.28427,
+            "19": 0.28253,
+            "20": 0.28216,
+            "21": 0.28394,
+            "22": 0.28202,
+            "23": 0.2842,
+            "24": 0.28848,
+            "25": 0.29137,
+            "26": 0.29314,
+            "27": 0.29412,
+            "28": 0.29477,
+            "29": 0.2847,
+            "30": 0.29036,
+            "31": 0.29596,
+            "32": 0.29187,
+            "33": 0.2913,
+            "34": 0.28636,
+            "35": 0.29547,
+            "36": 0.29476,
+            "37": 0.29213,
+            "38": 0.28835,
+            "39": 0.28597,
+            "40": 0.28573,
+            "41": 0.28673,
+            "42": 0.28864,
+            "43": 0.28774,
+            "44": 0.2871,
+            "45": 0.28744,
+            "46": 0.28594,
+            "47": 0.29182,
+            "48": 0.28838,
+            "49": 0.28221,
+            "50": 0.28369
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..fb8e93ed571
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.86535,
+            "2": 10.85873,
+            "3": 10.86285,
+            "4": 10.84011,
+            "5": 10.87856,
+            "6": 10.88852,
+            "7": 10.86536,
+            "8": 10.86016,
+            "9": 10.85989,
+            "10": 10.82982,
+            "11": 10.88947,
+            "12": 10.8751,
+            "13": 10.87425,
+            "14": 10.89675,
+            "15": 10.82051,
+            "16": 10.82498,
+            "17": 10.78982,
+            "18": 10.81029,
+            "19": 10.80533,
+            "20": 10.70397,
+            "21": 10.66991,
+            "22": 10.50644,
+            "23": 10.69004,
+            "24": 10.56312,
+            "25": 10.49421,
+            "26": 10.56627,
+            "27": 10.58027,
+            "28": 10.51573,
+            "29": 10.553,
+            "30": 10.30549,
+            "31": 10.02248,
+            "32": 10.40616,
+            "33": 10.39874,
+            "34": 10.13771,
+            "35": 10.20187,
+            "36": 10.16049,
+            "37": 10.28975,
+            "38": 10.11483,
+            "39": 10.36101,
+            "40": 10.01902,
+            "41": 10.07289,
+            "42": 10.14695,
+            "43": 9.74689,
+            "44": 9.87763,
+            "45": 9.74967,
+            "46": 9.73381,
+            "47": 10.07535,
+            "48": 9.78068,
+            "49": 9.44781,
+            "50": 9.8399
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 625.0,
+            "2": 644.0,
+            "3": 614.0,
+            "4": 636.0,
+            "5": 605.0,
+            "6": 649.0,
+            "7": 606.0,
+            "8": 559.0,
+            "9": 658.0,
+            "10": 524.0,
+            "11": 693.0,
+            "12": 598.0,
+            "13": 702.0,
+            "14": 660.0,
+            "15": 638.0,
+            "16": 596.0,
+            "17": 662.0,
+            "18": 586.0,
+            "19": 594.0,
+            "20": 598.0,
+            "21": 656.0,
+            "22": 608.0,
+            "23": 706.0,
+            "24": 609.0,
+            "25": 610.0,
+            "26": 632.0,
+            "27": 664.0,
+            "28": 766.0,
+            "29": 765.0,
+            "30": 755.0,
+            "31": 606.0,
+            "32": 708.0,
+            "33": 775.0,
+            "34": 735.0,
+            "35": 729.0,
+            "36": 739.0,
+            "37": 840.0,
+            "38": 749.0,
+            "39": 911.0,
+            "40": 763.0,
+            "41": 830.0,
+            "42": 835.0,
+            "43": 755.0,
+            "44": 823.0,
+            "45": 799.0,
+            "46": 811.0,
+            "47": 869.0,
+            "48": 839.0,
+            "49": 897.0,
+            "50": 869.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 510689792.0,
+            "2": 510689792.0,
+            "3": 510689792.0,
+            "4": 510689792.0,
+            "5": 510689792.0,
+            "6": 510689792.0,
+            "7": 510689792.0,
+            "8": 510689792.0,
+            "9": 510689792.0,
+            "10": 510689792.0,
+            "11": 510689792.0,
+            "12": 510689792.0,
+            "13": 510689792.0,
+            "14": 510689792.0,
+            "15": 510689792.0,
+            "16": 510689792.0,
+            "17": 510689792.0,
+            "18": 510689792.0,
+            "19": 510689792.0,
+            "20": 510689792.0,
+            "21": 510689792.0,
+            "22": 510689792.0,
+            "23": 510689792.0,
+            "24": 510689792.0,
+            "25": 510689792.0,
+            "26": 510689792.0,
+            "27": 510689792.0,
+            "28": 510689792.0,
+            "29": 510689792.0,
+            "30": 510689792.0,
+            "31": 510689792.0,
+            "32": 510689792.0,
+            "33": 510689792.0,
+            "34": 510689792.0,
+            "35": 510689792.0,
+            "36": 510689792.0,
+            "37": 510689792.0,
+            "38": 510689792.0,
+            "39": 510689792.0,
+            "40": 510689792.0,
+            "41": 510689792.0,
+            "42": 510689792.0,
+            "43": 510689792.0,
+            "44": 510689792.0,
+            "45": 510689792.0,
+            "46": 510689792.0,
+            "47": 510689792.0,
+            "48": 510689792.0,
+            "49": 510689792.0,
+            "50": 510689792.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 759898624.0,
+            "2": 933156352.0,
+            "3": 933156352.0,
+            "4": 934202368.0,
+            "5": 934202368.0,
+            "6": 934202368.0,
+            "7": 934202368.0,
+            "8": 934202368.0,
+            "9": 934202368.0,
+            "10": 934202368.0,
+            "11": 934202368.0,
+            "12": 934202368.0,
+            "13": 934202368.0,
+            "14": 934202368.0,
+            "15": 934202368.0,
+            "16": 934202368.0,
+            "17": 934202368.0,
+            "18": 934202368.0,
+            "19": 934202368.0,
+            "20": 934202368.0,
+            "21": 934202368.0,
+            "22": 934202368.0,
+            "23": 934202368.0,
+            "24": 934202368.0,
+            "25": 934202368.0,
+            "26": 934202368.0,
+            "27": 934202368.0,
+            "28": 934202368.0,
+            "29": 934202368.0,
+            "30": 934202368.0,
+            "31": 934202368.0,
+            "32": 934202368.0,
+            "33": 934202368.0,
+            "34": 934202368.0,
+            "35": 934202368.0,
+            "36": 934202368.0,
+            "37": 934202368.0,
+            "38": 934202368.0,
+            "39": 934202368.0,
+            "40": 934202368.0,
+            "41": 934202368.0,
+            "42": 934202368.0,
+            "43": 934202368.0,
+            "44": 934202368.0,
+            "45": 934202368.0,
+            "46": 934202368.0,
+            "47": 934202368.0,
+            "48": 934202368.0,
+            "49": 934202368.0,
+            "50": 934202368.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 15.91359,
+            "2": 0.40136,
+            "3": 0.32913,
+            "4": 0.33946,
+            "5": 0.32404,
+            "6": 0.31963,
+            "7": 0.32283,
+            "8": 0.32302,
+            "9": 0.32004,
+            "10": 0.32058,
+            "11": 0.33128,
+            "12": 0.32725,
+            "13": 0.3253,
+            "14": 0.32532,
+            "15": 0.32194,
+            "16": 0.32237,
+            "17": 0.31946,
+            "18": 0.31937,
+            "19": 0.3185,
+            "20": 0.3193,
+            "21": 0.32216,
+            "22": 0.328,
+            "23": 0.32251,
+            "24": 0.32294,
+            "25": 0.32205,
+            "26": 0.32393,
+            "27": 0.32132,
+            "28": 0.32221,
+            "29": 0.32269,
+            "30": 0.32422,
+            "31": 0.32527,
+            "32": 0.32866,
+            "33": 0.32346,
+            "34": 0.32064,
+            "35": 0.3199,
+            "36": 0.32198,
+            "37": 0.32252,
+            "38": 0.32103,
+            "39": 0.32486,
+            "40": 0.32573,
+            "41": 0.32643,
+            "42": 0.3234,
+            "43": 0.32778,
+            "44": 0.32302,
+            "45": 0.32434,
+            "46": 0.32532,
+            "47": 0.32115,
+            "48": 0.31979,
+            "49": 0.3233,
+            "50": 0.31776
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
new file mode 100644
index 00000000000..ca10e306407
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.93292,
+            "2": 10.93423,
+            "3": 10.91347,
+            "4": 10.90322,
+            "5": 10.92969,
+            "6": 10.93655,
+            "7": 10.90282,
+            "8": 10.92116,
+            "9": 10.90706,
+            "10": 10.90475,
+            "11": 10.8879,
+            "12": 10.91737,
+            "13": 10.9119,
+            "14": 10.91505,
+            "15": 10.87123,
+            "16": 10.86125,
+            "17": 10.82702,
+            "18": 10.85679,
+            "19": 10.84058,
+            "20": 10.75,
+            "21": 10.71511,
+            "22": 10.58115,
+            "23": 10.72641,
+            "24": 10.60726,
+            "25": 10.53753,
+            "26": 10.61066,
+            "27": 10.59933,
+            "28": 10.54955,
+            "29": 10.566,
+            "30": 10.32548,
+            "31": 10.06696,
+            "32": 10.4381,
+            "33": 10.4236,
+            "34": 10.16016,
+            "35": 10.22896,
+            "36": 10.17617,
+            "37": 10.29231,
+            "38": 10.13293,
+            "39": 10.34955,
+            "40": 10.01977,
+            "41": 10.07533,
+            "42": 10.1541,
+            "43": 9.7609,
+            "44": 9.88356,
+            "45": 9.75549,
+            "46": 9.74959,
+            "47": 10.07543,
+            "48": 9.7794,
+            "49": 9.4382,
+            "50": 9.84069
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 595.0,
+            "2": 593.0,
+            "3": 625.0,
+            "4": 603.0,
+            "5": 636.0,
+            "6": 612.0,
+            "7": 635.0,
+            "8": 619.0,
+            "9": 658.0,
+            "10": 526.0,
+            "11": 694.0,
+            "12": 570.0,
+            "13": 643.0,
+            "14": 639.0,
+            "15": 648.0,
+            "16": 647.0,
+            "17": 627.0,
+            "18": 586.0,
+            "19": 632.0,
+            "20": 663.0,
+            "21": 628.0,
+            "22": 545.0,
+            "23": 679.0,
+            "24": 624.0,
+            "25": 532.0,
+            "26": 623.0,
+            "27": 656.0,
+            "28": 719.0,
+            "29": 710.0,
+            "30": 707.0,
+            "31": 635.0,
+            "32": 710.0,
+            "33": 784.0,
+            "34": 679.0,
+            "35": 680.0,
+            "36": 695.0,
+            "37": 767.0,
+            "38": 782.0,
+            "39": 858.0,
+            "40": 746.0,
+            "41": 797.0,
+            "42": 774.0,
+            "43": 698.0,
+            "44": 748.0,
+            "45": 789.0,
+            "46": 819.0,
+            "47": 867.0,
+            "48": 871.0,
+            "49": 894.0,
+            "50": 868.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 431783936.0,
+            "2": 431783936.0,
+            "3": 431783936.0,
+            "4": 431783936.0,
+            "5": 431783936.0,
+            "6": 431783936.0,
+            "7": 431783936.0,
+            "8": 431783936.0,
+            "9": 431783936.0,
+            "10": 431783936.0,
+            "11": 431783936.0,
+            "12": 431783936.0,
+            "13": 431783936.0,
+            "14": 431783936.0,
+            "15": 431783936.0,
+            "16": 431783936.0,
+            "17": 431783936.0,
+            "18": 431783936.0,
+            "19": 431783936.0,
+            "20": 431783936.0,
+            "21": 431783936.0,
+            "22": 431783936.0,
+            "23": 431783936.0,
+            "24": 431783936.0,
+            "25": 431783936.0,
+            "26": 431783936.0,
+            "27": 431783936.0,
+            "28": 431783936.0,
+            "29": 431783936.0,
+            "30": 431783936.0,
+            "31": 431783936.0,
+            "32": 431783936.0,
+            "33": 431783936.0,
+            "34": 431783936.0,
+            "35": 431783936.0,
+            "36": 431783936.0,
+            "37": 431783936.0,
+            "38": 431783936.0,
+            "39": 431783936.0,
+            "40": 431783936.0,
+            "41": 431783936.0,
+            "42": 431783936.0,
+            "43": 431783936.0,
+            "44": 431783936.0,
+            "45": 431783936.0,
+            "46": 431783936.0,
+            "47": 431783936.0,
+            "48": 431783936.0,
+            "49": 431783936.0,
+            "50": 431783936.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 677334528.0,
+            "2": 854262272.0,
+            "3": 855309312.0,
+            "4": 855309312.0,
+            "5": 855309312.0,
+            "6": 855309312.0,
+            "7": 855309312.0,
+            "8": 855309312.0,
+            "9": 855309312.0,
+            "10": 855309312.0,
+            "11": 855309312.0,
+            "12": 855309312.0,
+            "13": 855309312.0,
+            "14": 855309312.0,
+            "15": 855309312.0,
+            "16": 855309312.0,
+            "17": 855309824.0,
+            "18": 855309824.0,
+            "19": 855309824.0,
+            "20": 855309824.0,
+            "21": 855309824.0,
+            "22": 855309824.0,
+            "23": 855309824.0,
+            "24": 855309824.0,
+            "25": 855309824.0,
+            "26": 855309824.0,
+            "27": 855309824.0,
+            "28": 855309824.0,
+            "29": 855309824.0,
+            "30": 855309824.0,
+            "31": 855310848.0,
+            "32": 855310848.0,
+            "33": 855310848.0,
+            "34": 855310848.0,
+            "35": 855310848.0,
+            "36": 855310848.0,
+            "37": 855310848.0,
+            "38": 855310848.0,
+            "39": 855310848.0,
+            "40": 855310848.0,
+            "41": 855310848.0,
+            "42": 855310848.0,
+            "43": 855310848.0,
+            "44": 855310848.0,
+            "45": 855310848.0,
+            "46": 855310848.0,
+            "47": 855310848.0,
+            "48": 855310848.0,
+            "49": 855310848.0,
+            "50": 855310848.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 15.39243,
+            "2": 0.47114,
+            "3": 0.4118,
+            "4": 0.4088,
+            "5": 0.41627,
+            "6": 0.40803,
+            "7": 0.41796,
+            "8": 0.40621,
+            "9": 0.40868,
+            "10": 0.41207,
+            "11": 0.40628,
+            "12": 0.41887,
+            "13": 0.40513,
+            "14": 0.41436,
+            "15": 0.40824,
+            "16": 0.40927,
+            "17": 0.41859,
+            "18": 0.40493,
+            "19": 0.41309,
+            "20": 0.4031,
+            "21": 0.40742,
+            "22": 0.41395,
+            "23": 0.40602,
+            "24": 0.41635,
+            "25": 0.40363,
+            "26": 0.40541,
+            "27": 0.41468,
+            "28": 0.40626,
+            "29": 0.41736,
+            "30": 0.41505,
+            "31": 0.42497,
+            "32": 0.42917,
+            "33": 0.41862,
+            "34": 0.40386,
+            "35": 0.39199,
+            "36": 0.39203,
+            "37": 0.4022,
+            "38": 0.39232,
+            "39": 0.40413,
+            "40": 0.39067,
+            "41": 0.39156,
+            "42": 0.40281,
+            "43": 0.3918,
+            "44": 0.40265,
+            "45": 0.39137,
+            "46": 0.39193,
+            "47": 0.4014,
+            "48": 0.3911,
+            "49": 0.40482,
+            "50": 0.38988
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
new file mode 100644
index 00000000000..de27a6084a7
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.93292,
+            "2": 10.93423,
+            "3": 10.91344,
+            "4": 10.9032,
+            "5": 10.92965,
+            "6": 10.93658,
+            "7": 10.90279,
+            "8": 10.92116,
+            "9": 10.90707,
+            "10": 10.90476,
+            "11": 10.88785,
+            "12": 10.91736,
+            "13": 10.91188,
+            "14": 10.91506,
+            "15": 10.87121,
+            "16": 10.86128,
+            "17": 10.827,
+            "18": 10.85677,
+            "19": 10.84058,
+            "20": 10.74999,
+            "21": 10.71508,
+            "22": 10.58119,
+            "23": 10.72643,
+            "24": 10.60729,
+            "25": 10.53754,
+            "26": 10.61069,
+            "27": 10.59933,
+            "28": 10.54956,
+            "29": 10.56602,
+            "30": 10.32552,
+            "31": 10.06695,
+            "32": 10.43807,
+            "33": 10.42362,
+            "34": 10.16012,
+            "35": 10.22898,
+            "36": 10.17617,
+            "37": 10.29237,
+            "38": 10.13296,
+            "39": 10.34957,
+            "40": 10.01974,
+            "41": 10.07532,
+            "42": 10.15409,
+            "43": 9.76091,
+            "44": 9.88357,
+            "45": 9.75551,
+            "46": 9.74958,
+            "47": 10.07547,
+            "48": 9.77938,
+            "49": 9.43818,
+            "50": 9.84068
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 575.0,
+            "2": 590.0,
+            "3": 619.0,
+            "4": 585.0,
+            "5": 619.0,
+            "6": 641.0,
+            "7": 615.0,
+            "8": 599.0,
+            "9": 674.0,
+            "10": 511.0,
+            "11": 678.0,
+            "12": 632.0,
+            "13": 669.0,
+            "14": 614.0,
+            "15": 668.0,
+            "16": 647.0,
+            "17": 611.0,
+            "18": 625.0,
+            "19": 612.0,
+            "20": 548.0,
+            "21": 583.0,
+            "22": 599.0,
+            "23": 677.0,
+            "24": 570.0,
+            "25": 554.0,
+            "26": 661.0,
+            "27": 691.0,
+            "28": 745.0,
+            "29": 688.0,
+            "30": 770.0,
+            "31": 555.0,
+            "32": 712.0,
+            "33": 790.0,
+            "34": 637.0,
+            "35": 690.0,
+            "36": 736.0,
+            "37": 795.0,
+            "38": 728.0,
+            "39": 808.0,
+            "40": 740.0,
+            "41": 791.0,
+            "42": 800.0,
+            "43": 708.0,
+            "44": 730.0,
+            "45": 777.0,
+            "46": 786.0,
+            "47": 894.0,
+            "48": 897.0,
+            "49": 825.0,
+            "50": 850.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 431783936.0,
+            "2": 431783936.0,
+            "3": 431783936.0,
+            "4": 431783936.0,
+            "5": 431783936.0,
+            "6": 431783936.0,
+            "7": 431783936.0,
+            "8": 431783936.0,
+            "9": 431783936.0,
+            "10": 431783936.0,
+            "11": 431783936.0,
+            "12": 431783936.0,
+            "13": 431783936.0,
+            "14": 431783936.0,
+            "15": 431783936.0,
+            "16": 431783936.0,
+            "17": 431783936.0,
+            "18": 431783936.0,
+            "19": 431783936.0,
+            "20": 431783936.0,
+            "21": 431783936.0,
+            "22": 431783936.0,
+            "23": 431783936.0,
+            "24": 431783936.0,
+            "25": 431783936.0,
+            "26": 431783936.0,
+            "27": 431783936.0,
+            "28": 431783936.0,
+            "29": 431783936.0,
+            "30": 431783936.0,
+            "31": 431783936.0,
+            "32": 431783936.0,
+            "33": 431783936.0,
+            "34": 431783936.0,
+            "35": 431783936.0,
+            "36": 431783936.0,
+            "37": 431783936.0,
+            "38": 431783936.0,
+            "39": 431783936.0,
+            "40": 431783936.0,
+            "41": 431783936.0,
+            "42": 431783936.0,
+            "43": 431783936.0,
+            "44": 431783936.0,
+            "45": 431783936.0,
+            "46": 431783936.0,
+            "47": 431783936.0,
+            "48": 431783936.0,
+            "49": 431783936.0,
+            "50": 431783936.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 677335040.0,
+            "2": 854262784.0,
+            "3": 854262784.0,
+            "4": 854262784.0,
+            "5": 854262784.0,
+            "6": 854262784.0,
+            "7": 854262784.0,
+            "8": 855310848.0,
+            "9": 855310848.0,
+            "10": 855310848.0,
+            "11": 855310848.0,
+            "12": 855310848.0,
+            "13": 855310848.0,
+            "14": 855310848.0,
+            "15": 855310848.0,
+            "16": 855310848.0,
+            "17": 855311360.0,
+            "18": 855311360.0,
+            "19": 855311360.0,
+            "20": 855311360.0,
+            "21": 855311360.0,
+            "22": 855311360.0,
+            "23": 855311360.0,
+            "24": 855311360.0,
+            "25": 855311360.0,
+            "26": 855311360.0,
+            "27": 855311360.0,
+            "28": 855311360.0,
+            "29": 855311360.0,
+            "30": 855311360.0,
+            "31": 855311360.0,
+            "32": 855311360.0,
+            "33": 855311360.0,
+            "34": 855311360.0,
+            "35": 855311360.0,
+            "36": 855311360.0,
+            "37": 855311360.0,
+            "38": 855311360.0,
+            "39": 855311360.0,
+            "40": 855311360.0,
+            "41": 855311360.0,
+            "42": 855311360.0,
+            "43": 855311360.0,
+            "44": 855311360.0,
+            "45": 855311360.0,
+            "46": 855311360.0,
+            "47": 855311360.0,
+            "48": 855311360.0,
+            "49": 855311360.0,
+            "50": 855311360.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 14.21722,
+            "2": 0.44346,
+            "3": 0.4048,
+            "4": 0.4153,
+            "5": 0.40403,
+            "6": 0.40186,
+            "7": 0.40648,
+            "8": 0.39996,
+            "9": 0.41082,
+            "10": 0.39802,
+            "11": 0.40029,
+            "12": 0.4031,
+            "13": 0.39772,
+            "14": 0.40795,
+            "15": 0.39818,
+            "16": 0.39779,
+            "17": 0.40587,
+            "18": 0.3977,
+            "19": 0.40697,
+            "20": 0.39617,
+            "21": 0.39797,
+            "22": 0.40462,
+            "23": 0.39629,
+            "24": 0.41062,
+            "25": 0.396,
+            "26": 0.39789,
+            "27": 0.3983,
+            "28": 0.39459,
+            "29": 0.40633,
+            "30": 0.39484,
+            "31": 0.3948,
+            "32": 0.4047,
+            "33": 0.39655,
+            "34": 0.40817,
+            "35": 0.39452,
+            "36": 0.39485,
+            "37": 0.40608,
+            "38": 0.39482,
+            "39": 0.40667,
+            "40": 0.39484,
+            "41": 0.39476,
+            "42": 0.40733,
+            "43": 0.39462,
+            "44": 0.41255,
+            "45": 0.39333,
+            "46": 0.39499,
+            "47": 0.40452,
+            "48": 0.39484,
+            "49": 0.40745,
+            "50": 0.39497
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
index f2137d28953..2fa70eac521 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
@@ -2,91 +2,286 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 10.85949,
+            "2": 10.85553,
+            "3": 10.86548,
+            "4": 10.84554,
             "5": 10.88344,
+            "6": 10.89429,
+            "7": 10.87068,
+            "8": 10.86983,
+            "9": 10.86919,
             "10": 10.83883,
+            "11": 10.89435,
+            "12": 10.8798,
+            "13": 10.87987,
+            "14": 10.90317,
             "15": 10.8405,
+            "16": 10.83786,
+            "17": 10.80668,
+            "18": 10.83025,
+            "19": 10.82262,
             "20": 10.73192,
+            "21": 10.7075,
+            "22": 10.56005,
+            "23": 10.72406,
+            "24": 10.61116,
             "25": 10.5481,
+            "26": 10.61334,
+            "27": 10.6305,
+            "28": 10.56645,
+            "29": 10.59672,
             "30": 10.37136,
+            "31": 10.11721,
+            "32": 10.46127,
+            "33": 10.45247,
+            "34": 10.21687,
             "35": 10.27171,
+            "36": 10.2312,
+            "37": 10.34809,
+            "38": 10.18842,
+            "39": 10.41042,
             "40": 10.09426,
+            "41": 10.14711,
+            "42": 10.21247,
+            "43": 9.84106,
+            "44": 9.95919,
             "45": 9.84082,
+            "46": 9.82482,
+            "47": 10.13882,
+            "48": 9.85839,
+            "49": 9.5472,
             "50": 9.90883
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 1690.0,
+            "2": 1776.0,
+            "3": 1642.0,
+            "4": 1825.0,
             "5": 1809.0,
+            "6": 1795.0,
+            "7": 1830.0,
+            "8": 1626.0,
+            "9": 1878.0,
             "10": 1423.0,
+            "11": 1868.0,
+            "12": 1653.0,
+            "13": 1897.0,
+            "14": 1783.0,
             "15": 1861.0,
+            "16": 1938.0,
+            "17": 1825.0,
+            "18": 1730.0,
+            "19": 1727.0,
             "20": 1735.0,
+            "21": 1783.0,
+            "22": 1576.0,
+            "23": 1949.0,
+            "24": 1630.0,
             "25": 1498.0,
+            "26": 1649.0,
+            "27": 1809.0,
+            "28": 2019.0,
+            "29": 2009.0,
             "30": 1832.0,
+            "31": 1524.0,
+            "32": 1943.0,
+            "33": 2081.0,
+            "34": 1888.0,
             "35": 1935.0,
+            "36": 1898.0,
+            "37": 2325.0,
+            "38": 2070.0,
+            "39": 2248.0,
             "40": 2199.0,
+            "41": 2264.0,
+            "42": 2349.0,
+            "43": 2087.0,
+            "44": 2107.0,
             "45": 2098.0,
+            "46": 2407.0,
+            "47": 2456.0,
+            "48": 2404.0,
+            "49": 2417.0,
             "50": 2407.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 516194816.0,
+            "2": 516194816.0,
+            "3": 516194816.0,
+            "4": 516194816.0,
             "5": 516194816.0,
+            "6": 516194816.0,
+            "7": 516194816.0,
+            "8": 516194816.0,
+            "9": 516194816.0,
             "10": 516194816.0,
+            "11": 516194816.0,
+            "12": 516194816.0,
+            "13": 516194816.0,
+            "14": 516194816.0,
             "15": 516194816.0,
+            "16": 516194816.0,
+            "17": 516194816.0,
+            "18": 516194816.0,
+            "19": 516194816.0,
             "20": 516194816.0,
+            "21": 516194816.0,
+            "22": 516194816.0,
+            "23": 516194816.0,
+            "24": 516194816.0,
             "25": 516194816.0,
+            "26": 516194816.0,
+            "27": 516194816.0,
+            "28": 516194816.0,
+            "29": 516194816.0,
             "30": 516194816.0,
+            "31": 516194816.0,
+            "32": 516194816.0,
+            "33": 516194816.0,
+            "34": 516194816.0,
             "35": 516194816.0,
+            "36": 516194816.0,
+            "37": 516194816.0,
+            "38": 516194816.0,
+            "39": 516194816.0,
             "40": 516194816.0,
+            "41": 516194816.0,
+            "42": 516194816.0,
+            "43": 516194816.0,
+            "44": 516194816.0,
             "45": 516194816.0,
+            "46": 516194816.0,
+            "47": 516194816.0,
+            "48": 516194816.0,
+            "49": 516194816.0,
             "50": 516194816.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 1670130688.0,
+            "2": 1840523776.0,
+            "3": 1840523776.0,
+            "4": 1840523776.0,
             "5": 1840523776.0,
+            "6": 1840523776.0,
+            "7": 1840523776.0,
+            "8": 1840523776.0,
+            "9": 1840523776.0,
             "10": 1840523776.0,
-            "15": 1841310208.0,
-            "20": 1841310208.0,
-            "25": 1841310208.0,
-            "30": 1841310208.0,
-            "35": 1841310208.0,
-            "40": 1841310208.0,
-            "45": 1841310208.0,
-            "50": 1841310208.0
+            "11": 1840523776.0,
+            "12": 1840523776.0,
+            "13": 1840523776.0,
+            "14": 1840523776.0,
+            "15": 1840523776.0,
+            "16": 1840523776.0,
+            "17": 1840523776.0,
+            "18": 1840523776.0,
+            "19": 1840523776.0,
+            "20": 1840523776.0,
+            "21": 1840523776.0,
+            "22": 1840523776.0,
+            "23": 1840523776.0,
+            "24": 1840523776.0,
+            "25": 1840523776.0,
+            "26": 1840523776.0,
+            "27": 1840523776.0,
+            "28": 1840523776.0,
+            "29": 1840523776.0,
+            "30": 1840523776.0,
+            "31": 1840523776.0,
+            "32": 1840523776.0,
+            "33": 1840523776.0,
+            "34": 1840523776.0,
+            "35": 1840523776.0,
+            "36": 1840523776.0,
+            "37": 1840523776.0,
+            "38": 1840523776.0,
+            "39": 1840523776.0,
+            "40": 1840523776.0,
+            "41": 1840523776.0,
+            "42": 1840523776.0,
+            "43": 1840523776.0,
+            "44": 1840523776.0,
+            "45": 1840523776.0,
+            "46": 1840523776.0,
+            "47": 1840523776.0,
+            "48": 1840523776.0,
+            "49": 1840523776.0,
+            "50": 1840523776.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 15.15592,
-            "5": 0.12534,
-            "10": 0.11995,
-            "15": 0.12083,
-            "20": 0.11947,
-            "25": 0.11848,
-            "30": 0.11832,
-            "35": 0.11938,
-            "40": 0.12709,
-            "45": 0.11947,
-            "50": 0.11811
+            "1": 15.46989,
+            "2": 0.15818,
+            "3": 0.14336,
+            "4": 0.14305,
+            "5": 0.14285,
+            "6": 0.14415,
+            "7": 0.14655,
+            "8": 0.14457,
+            "9": 0.14518,
+            "10": 0.14657,
+            "11": 0.14517,
+            "12": 0.14486,
+            "13": 0.14388,
+            "14": 0.14419,
+            "15": 0.14463,
+            "16": 0.146,
+            "17": 0.14212,
+            "18": 0.14726,
+            "19": 0.14464,
+            "20": 0.14514,
+            "21": 0.14341,
+            "22": 0.14454,
+            "23": 0.14327,
+            "24": 0.14354,
+            "25": 0.14453,
+            "26": 0.14409,
+            "27": 0.14547,
+            "28": 0.14291,
+            "29": 0.14484,
+            "30": 0.1444,
+            "31": 0.14388,
+            "32": 0.14651,
+            "33": 0.14385,
+            "34": 0.14057,
+            "35": 0.14021,
+            "36": 0.14028,
+            "37": 0.13912,
+            "38": 0.13925,
+            "39": 0.14191,
+            "40": 0.14024,
+            "41": 0.14034,
+            "42": 0.14027,
+            "43": 0.14125,
+            "44": 0.14142,
+            "45": 0.14126,
+            "46": 0.14404,
+            "47": 0.1403,
+            "48": 0.14011,
+            "49": 0.14086,
+            "50": 0.13902
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..9a1bfb0707b
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.85949,
+            "2": 10.85553,
+            "3": 10.86548,
+            "4": 10.84554,
+            "5": 10.88344,
+            "6": 10.89429,
+            "7": 10.87068,
+            "8": 10.86983,
+            "9": 10.86919,
+            "10": 10.83883,
+            "11": 10.89435,
+            "12": 10.8798,
+            "13": 10.87987,
+            "14": 10.90317,
+            "15": 10.8405,
+            "16": 10.83786,
+            "17": 10.80668,
+            "18": 10.83025,
+            "19": 10.82262,
+            "20": 10.73192,
+            "21": 10.7075,
+            "22": 10.56005,
+            "23": 10.72406,
+            "24": 10.61116,
+            "25": 10.5481,
+            "26": 10.61334,
+            "27": 10.6305,
+            "28": 10.56645,
+            "29": 10.59672,
+            "30": 10.37136,
+            "31": 10.11721,
+            "32": 10.46127,
+            "33": 10.45247,
+            "34": 10.21687,
+            "35": 10.27171,
+            "36": 10.2312,
+            "37": 10.34809,
+            "38": 10.18842,
+            "39": 10.41042,
+            "40": 10.09426,
+            "41": 10.14711,
+            "42": 10.21247,
+            "43": 9.84106,
+            "44": 9.95919,
+            "45": 9.84082,
+            "46": 9.82482,
+            "47": 10.13882,
+            "48": 9.85839,
+            "49": 9.5472,
+            "50": 9.90883
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1690.0,
+            "2": 1776.0,
+            "3": 1642.0,
+            "4": 1825.0,
+            "5": 1809.0,
+            "6": 1795.0,
+            "7": 1830.0,
+            "8": 1626.0,
+            "9": 1878.0,
+            "10": 1423.0,
+            "11": 1868.0,
+            "12": 1653.0,
+            "13": 1897.0,
+            "14": 1783.0,
+            "15": 1861.0,
+            "16": 1938.0,
+            "17": 1825.0,
+            "18": 1730.0,
+            "19": 1727.0,
+            "20": 1735.0,
+            "21": 1783.0,
+            "22": 1576.0,
+            "23": 1949.0,
+            "24": 1630.0,
+            "25": 1498.0,
+            "26": 1649.0,
+            "27": 1809.0,
+            "28": 2019.0,
+            "29": 2009.0,
+            "30": 1832.0,
+            "31": 1524.0,
+            "32": 1943.0,
+            "33": 2081.0,
+            "34": 1888.0,
+            "35": 1935.0,
+            "36": 1898.0,
+            "37": 2325.0,
+            "38": 2070.0,
+            "39": 2248.0,
+            "40": 2199.0,
+            "41": 2264.0,
+            "42": 2349.0,
+            "43": 2087.0,
+            "44": 2107.0,
+            "45": 2098.0,
+            "46": 2407.0,
+            "47": 2456.0,
+            "48": 2404.0,
+            "49": 2417.0,
+            "50": 2407.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 516194816.0,
+            "2": 516194816.0,
+            "3": 516194816.0,
+            "4": 516194816.0,
+            "5": 516194816.0,
+            "6": 516194816.0,
+            "7": 516194816.0,
+            "8": 516194816.0,
+            "9": 516194816.0,
+            "10": 516194816.0,
+            "11": 516194816.0,
+            "12": 516194816.0,
+            "13": 516194816.0,
+            "14": 516194816.0,
+            "15": 516194816.0,
+            "16": 516194816.0,
+            "17": 516194816.0,
+            "18": 516194816.0,
+            "19": 516194816.0,
+            "20": 516194816.0,
+            "21": 516194816.0,
+            "22": 516194816.0,
+            "23": 516194816.0,
+            "24": 516194816.0,
+            "25": 516194816.0,
+            "26": 516194816.0,
+            "27": 516194816.0,
+            "28": 516194816.0,
+            "29": 516194816.0,
+            "30": 516194816.0,
+            "31": 516194816.0,
+            "32": 516194816.0,
+            "33": 516194816.0,
+            "34": 516194816.0,
+            "35": 516194816.0,
+            "36": 516194816.0,
+            "37": 516194816.0,
+            "38": 516194816.0,
+            "39": 516194816.0,
+            "40": 516194816.0,
+            "41": 516194816.0,
+            "42": 516194816.0,
+            "43": 516194816.0,
+            "44": 516194816.0,
+            "45": 516194816.0,
+            "46": 516194816.0,
+            "47": 516194816.0,
+            "48": 516194816.0,
+            "49": 516194816.0,
+            "50": 516194816.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1670130688.0,
+            "2": 1840523776.0,
+            "3": 1841310208.0,
+            "4": 1841310208.0,
+            "5": 1841310208.0,
+            "6": 1841310208.0,
+            "7": 1841310208.0,
+            "8": 1841310208.0,
+            "9": 1841310208.0,
+            "10": 1841310208.0,
+            "11": 1841310208.0,
+            "12": 1841310208.0,
+            "13": 1841310208.0,
+            "14": 1841310208.0,
+            "15": 1841310208.0,
+            "16": 1841310208.0,
+            "17": 1841310208.0,
+            "18": 1841310208.0,
+            "19": 1841310208.0,
+            "20": 1841310208.0,
+            "21": 1841310208.0,
+            "22": 1841310208.0,
+            "23": 1841310208.0,
+            "24": 1841310208.0,
+            "25": 1841310208.0,
+            "26": 1841310208.0,
+            "27": 1841310208.0,
+            "28": 1841310208.0,
+            "29": 1841310208.0,
+            "30": 1841310208.0,
+            "31": 1841310208.0,
+            "32": 1841310208.0,
+            "33": 1841310208.0,
+            "34": 1841310208.0,
+            "35": 1841310208.0,
+            "36": 1841310208.0,
+            "37": 1841310208.0,
+            "38": 1841310208.0,
+            "39": 1841310208.0,
+            "40": 1841310208.0,
+            "41": 1841310208.0,
+            "42": 1841310208.0,
+            "43": 1841310208.0,
+            "44": 1841310208.0,
+            "45": 1841310208.0,
+            "46": 1841310208.0,
+            "47": 1841310208.0,
+            "48": 1841310208.0,
+            "49": 1841310208.0,
+            "50": 1841310208.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 12.9332,
+            "2": 0.16326,
+            "3": 0.12463,
+            "4": 0.12744,
+            "5": 0.12912,
+            "6": 0.12823,
+            "7": 0.12454,
+            "8": 0.12362,
+            "9": 0.12458,
+            "10": 0.12419,
+            "11": 0.12352,
+            "12": 0.12552,
+            "13": 0.12365,
+            "14": 0.12466,
+            "15": 0.12255,
+            "16": 0.12286,
+            "17": 0.12294,
+            "18": 0.12246,
+            "19": 0.12292,
+            "20": 0.12533,
+            "21": 0.12268,
+            "22": 0.12434,
+            "23": 0.11979,
+            "24": 0.11976,
+            "25": 0.11744,
+            "26": 0.11555,
+            "27": 0.11746,
+            "28": 0.11709,
+            "29": 0.12764,
+            "30": 0.11818,
+            "31": 0.11917,
+            "32": 0.11662,
+            "33": 0.11909,
+            "34": 0.11844,
+            "35": 0.1167,
+            "36": 0.12045,
+            "37": 0.11624,
+            "38": 0.11602,
+            "39": 0.11985,
+            "40": 0.11702,
+            "41": 0.11671,
+            "42": 0.11663,
+            "43": 0.11741,
+            "44": 0.11703,
+            "45": 0.11752,
+            "46": 0.11604,
+            "47": 0.11836,
+            "48": 0.12278,
+            "49": 0.12884,
+            "50": 0.11659
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..379b1c16f29
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.85949,
+            "2": 10.85553,
+            "3": 10.86548,
+            "4": 10.84554,
+            "5": 10.88344,
+            "6": 10.89429,
+            "7": 10.87068,
+            "8": 10.86983,
+            "9": 10.86919,
+            "10": 10.83883,
+            "11": 10.89435,
+            "12": 10.8798,
+            "13": 10.87987,
+            "14": 10.90317,
+            "15": 10.8405,
+            "16": 10.83786,
+            "17": 10.80668,
+            "18": 10.83025,
+            "19": 10.82262,
+            "20": 10.73192,
+            "21": 10.7075,
+            "22": 10.56005,
+            "23": 10.72406,
+            "24": 10.61116,
+            "25": 10.5481,
+            "26": 10.61334,
+            "27": 10.6305,
+            "28": 10.56645,
+            "29": 10.59672,
+            "30": 10.37136,
+            "31": 10.11721,
+            "32": 10.46127,
+            "33": 10.45247,
+            "34": 10.21687,
+            "35": 10.27171,
+            "36": 10.2312,
+            "37": 10.34809,
+            "38": 10.18842,
+            "39": 10.41042,
+            "40": 10.09426,
+            "41": 10.14711,
+            "42": 10.21247,
+            "43": 9.84106,
+            "44": 9.95919,
+            "45": 9.84082,
+            "46": 9.82482,
+            "47": 10.13882,
+            "48": 9.85839,
+            "49": 9.5472,
+            "50": 9.90883
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1690.0,
+            "2": 1776.0,
+            "3": 1642.0,
+            "4": 1825.0,
+            "5": 1809.0,
+            "6": 1795.0,
+            "7": 1830.0,
+            "8": 1626.0,
+            "9": 1878.0,
+            "10": 1423.0,
+            "11": 1868.0,
+            "12": 1653.0,
+            "13": 1897.0,
+            "14": 1783.0,
+            "15": 1861.0,
+            "16": 1938.0,
+            "17": 1825.0,
+            "18": 1730.0,
+            "19": 1727.0,
+            "20": 1735.0,
+            "21": 1783.0,
+            "22": 1576.0,
+            "23": 1949.0,
+            "24": 1630.0,
+            "25": 1498.0,
+            "26": 1649.0,
+            "27": 1809.0,
+            "28": 2019.0,
+            "29": 2009.0,
+            "30": 1832.0,
+            "31": 1524.0,
+            "32": 1943.0,
+            "33": 2081.0,
+            "34": 1888.0,
+            "35": 1935.0,
+            "36": 1898.0,
+            "37": 2325.0,
+            "38": 2070.0,
+            "39": 2248.0,
+            "40": 2199.0,
+            "41": 2264.0,
+            "42": 2349.0,
+            "43": 2087.0,
+            "44": 2107.0,
+            "45": 2098.0,
+            "46": 2407.0,
+            "47": 2456.0,
+            "48": 2404.0,
+            "49": 2417.0,
+            "50": 2407.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 516194816.0,
+            "2": 516194816.0,
+            "3": 516194816.0,
+            "4": 516194816.0,
+            "5": 516194816.0,
+            "6": 516194816.0,
+            "7": 516194816.0,
+            "8": 516194816.0,
+            "9": 516194816.0,
+            "10": 516194816.0,
+            "11": 516194816.0,
+            "12": 516194816.0,
+            "13": 516194816.0,
+            "14": 516194816.0,
+            "15": 516194816.0,
+            "16": 516194816.0,
+            "17": 516194816.0,
+            "18": 516194816.0,
+            "19": 516194816.0,
+            "20": 516194816.0,
+            "21": 516194816.0,
+            "22": 516194816.0,
+            "23": 516194816.0,
+            "24": 516194816.0,
+            "25": 516194816.0,
+            "26": 516194816.0,
+            "27": 516194816.0,
+            "28": 516194816.0,
+            "29": 516194816.0,
+            "30": 516194816.0,
+            "31": 516194816.0,
+            "32": 516194816.0,
+            "33": 516194816.0,
+            "34": 516194816.0,
+            "35": 516194816.0,
+            "36": 516194816.0,
+            "37": 516194816.0,
+            "38": 516194816.0,
+            "39": 516194816.0,
+            "40": 516194816.0,
+            "41": 516194816.0,
+            "42": 516194816.0,
+            "43": 516194816.0,
+            "44": 516194816.0,
+            "45": 516194816.0,
+            "46": 516194816.0,
+            "47": 516194816.0,
+            "48": 516194816.0,
+            "49": 516194816.0,
+            "50": 516194816.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1670130688.0,
+            "2": 1840523776.0,
+            "3": 1840523776.0,
+            "4": 1840523776.0,
+            "5": 1840523776.0,
+            "6": 1840523776.0,
+            "7": 1840523776.0,
+            "8": 1840523776.0,
+            "9": 1840523776.0,
+            "10": 1840523776.0,
+            "11": 1840523776.0,
+            "12": 1840523776.0,
+            "13": 1840523776.0,
+            "14": 1840523776.0,
+            "15": 1840523776.0,
+            "16": 1840523776.0,
+            "17": 1840523776.0,
+            "18": 1840523776.0,
+            "19": 1840523776.0,
+            "20": 1840523776.0,
+            "21": 1840523776.0,
+            "22": 1840523776.0,
+            "23": 1840523776.0,
+            "24": 1840523776.0,
+            "25": 1840523776.0,
+            "26": 1840523776.0,
+            "27": 1840523776.0,
+            "28": 1840523776.0,
+            "29": 1840523776.0,
+            "30": 1840523776.0,
+            "31": 1840523776.0,
+            "32": 1840523776.0,
+            "33": 1840523776.0,
+            "34": 1840523776.0,
+            "35": 1840523776.0,
+            "36": 1840523776.0,
+            "37": 1840523776.0,
+            "38": 1840523776.0,
+            "39": 1840523776.0,
+            "40": 1840523776.0,
+            "41": 1840523776.0,
+            "42": 1840523776.0,
+            "43": 1840523776.0,
+            "44": 1840523776.0,
+            "45": 1840523776.0,
+            "46": 1840523776.0,
+            "47": 1840523776.0,
+            "48": 1840523776.0,
+            "49": 1840523776.0,
+            "50": 1840523776.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 15.2683,
+            "2": 0.15358,
+            "3": 0.13619,
+            "4": 0.13976,
+            "5": 0.13713,
+            "6": 0.13753,
+            "7": 0.13575,
+            "8": 0.13485,
+            "9": 0.13779,
+            "10": 0.13697,
+            "11": 0.14178,
+            "12": 0.1397,
+            "13": 0.13744,
+            "14": 0.14039,
+            "15": 0.13739,
+            "16": 0.1361,
+            "17": 0.13816,
+            "18": 0.13722,
+            "19": 0.15342,
+            "20": 0.14613,
+            "21": 0.14806,
+            "22": 0.14423,
+            "23": 0.14791,
+            "24": 0.14345,
+            "25": 0.14474,
+            "26": 0.14564,
+            "27": 0.14168,
+            "28": 0.14148,
+            "29": 0.13863,
+            "30": 0.13751,
+            "31": 0.14015,
+            "32": 0.13821,
+            "33": 0.14038,
+            "34": 0.13859,
+            "35": 0.14531,
+            "36": 0.14468,
+            "37": 0.13783,
+            "38": 0.13787,
+            "39": 0.13879,
+            "40": 0.14072,
+            "41": 0.14065,
+            "42": 0.13865,
+            "43": 0.13953,
+            "44": 0.13882,
+            "45": 0.13622,
+            "46": 0.14034,
+            "47": 0.13659,
+            "48": 0.14369,
+            "49": 0.13987,
+            "50": 0.13803
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
new file mode 100644
index 00000000000..d381ff1bd8e
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.92655,
+            "2": 10.92585,
+            "3": 10.91514,
+            "4": 10.90899,
+            "5": 10.92719,
+            "6": 10.9356,
+            "7": 10.90644,
+            "8": 10.92124,
+            "9": 10.91072,
+            "10": 10.9079,
+            "11": 10.89279,
+            "12": 10.9243,
+            "13": 10.91492,
+            "14": 10.9214,
+            "15": 10.88295,
+            "16": 10.87305,
+            "17": 10.84065,
+            "18": 10.87298,
+            "19": 10.85634,
+            "20": 10.77595,
+            "21": 10.74894,
+            "22": 10.63082,
+            "23": 10.75618,
+            "24": 10.65648,
+            "25": 10.59261,
+            "26": 10.65439,
+            "27": 10.64911,
+            "28": 10.59499,
+            "29": 10.60946,
+            "30": 10.39175,
+            "31": 10.1572,
+            "32": 10.49109,
+            "33": 10.47964,
+            "34": 10.24073,
+            "35": 10.29696,
+            "36": 10.2467,
+            "37": 10.35242,
+            "38": 10.20484,
+            "39": 10.40504,
+            "40": 10.09662,
+            "41": 10.15197,
+            "42": 10.22064,
+            "43": 9.85509,
+            "44": 9.96162,
+            "45": 9.84469,
+            "46": 9.83833,
+            "47": 10.14003,
+            "48": 9.85758,
+            "49": 9.53744,
+            "50": 9.90944
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1594.0,
+            "2": 1834.0,
+            "3": 1682.0,
+            "4": 1736.0,
+            "5": 1923.0,
+            "6": 1815.0,
+            "7": 1879.0,
+            "8": 1755.0,
+            "9": 1905.0,
+            "10": 1370.0,
+            "11": 1981.0,
+            "12": 1780.0,
+            "13": 2007.0,
+            "14": 1848.0,
+            "15": 1887.0,
+            "16": 1753.0,
+            "17": 1859.0,
+            "18": 1752.0,
+            "19": 1820.0,
+            "20": 1591.0,
+            "21": 1835.0,
+            "22": 1655.0,
+            "23": 1972.0,
+            "24": 1667.0,
+            "25": 1655.0,
+            "26": 1798.0,
+            "27": 1853.0,
+            "28": 1993.0,
+            "29": 1998.0,
+            "30": 1946.0,
+            "31": 1613.0,
+            "32": 1954.0,
+            "33": 2212.0,
+            "34": 1965.0,
+            "35": 1940.0,
+            "36": 1954.0,
+            "37": 2289.0,
+            "38": 2173.0,
+            "39": 2478.0,
+            "40": 2097.0,
+            "41": 2342.0,
+            "42": 2362.0,
+            "43": 1952.0,
+            "44": 2105.0,
+            "45": 2063.0,
+            "46": 2234.0,
+            "47": 2444.0,
+            "48": 2395.0,
+            "49": 2316.0,
+            "50": 2445.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 438468608.0,
+            "2": 438468608.0,
+            "3": 438468608.0,
+            "4": 438468608.0,
+            "5": 438468608.0,
+            "6": 438468608.0,
+            "7": 438468608.0,
+            "8": 438468608.0,
+            "9": 438468608.0,
+            "10": 438468608.0,
+            "11": 438468608.0,
+            "12": 438468608.0,
+            "13": 438468608.0,
+            "14": 438468608.0,
+            "15": 438468608.0,
+            "16": 438468608.0,
+            "17": 438468608.0,
+            "18": 438468608.0,
+            "19": 438468608.0,
+            "20": 438468608.0,
+            "21": 438468608.0,
+            "22": 438468608.0,
+            "23": 438468608.0,
+            "24": 438468608.0,
+            "25": 438468608.0,
+            "26": 438468608.0,
+            "27": 438468608.0,
+            "28": 438468608.0,
+            "29": 438468608.0,
+            "30": 438468608.0,
+            "31": 438468608.0,
+            "32": 438468608.0,
+            "33": 438468608.0,
+            "34": 438468608.0,
+            "35": 438468608.0,
+            "36": 438468608.0,
+            "37": 438468608.0,
+            "38": 438468608.0,
+            "39": 438468608.0,
+            "40": 438468608.0,
+            "41": 438468608.0,
+            "42": 438468608.0,
+            "43": 438468608.0,
+            "44": 438468608.0,
+            "45": 438468608.0,
+            "46": 438468608.0,
+            "47": 438468608.0,
+            "48": 438468608.0,
+            "49": 438468608.0,
+            "50": 438468608.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 2658189824.0,
+            "2": 2658189824.0,
+            "3": 2658189824.0,
+            "4": 2658189824.0,
+            "5": 2658189824.0,
+            "6": 2658189824.0,
+            "7": 2658189824.0,
+            "8": 2658189824.0,
+            "9": 2658189824.0,
+            "10": 2658189824.0,
+            "11": 2658189824.0,
+            "12": 2658189824.0,
+            "13": 2658189824.0,
+            "14": 2658189824.0,
+            "15": 2658189824.0,
+            "16": 2658189824.0,
+            "17": 2658189824.0,
+            "18": 2658189824.0,
+            "19": 2658189824.0,
+            "20": 2658189824.0,
+            "21": 2658189824.0,
+            "22": 2658189824.0,
+            "23": 2658189824.0,
+            "24": 2658189824.0,
+            "25": 2658189824.0,
+            "26": 2658189824.0,
+            "27": 2658189824.0,
+            "28": 2658189824.0,
+            "29": 2658189824.0,
+            "30": 2658189824.0,
+            "31": 2658189824.0,
+            "32": 2658189824.0,
+            "33": 2658189824.0,
+            "34": 2658189824.0,
+            "35": 2658189824.0,
+            "36": 2658189824.0,
+            "37": 2658189824.0,
+            "38": 2658189824.0,
+            "39": 2658189824.0,
+            "40": 2658189824.0,
+            "41": 2658189824.0,
+            "42": 2658189824.0,
+            "43": 2658189824.0,
+            "44": 2658189824.0,
+            "45": 2658189824.0,
+            "46": 2658189824.0,
+            "47": 2658189824.0,
+            "48": 2658189824.0,
+            "49": 2658189824.0,
+            "50": 2658189824.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 16.89692,
+            "2": 0.22636,
+            "3": 0.19282,
+            "4": 0.19102,
+            "5": 0.18966,
+            "6": 0.19089,
+            "7": 0.18785,
+            "8": 0.19603,
+            "9": 0.20181,
+            "10": 0.20496,
+            "11": 0.21259,
+            "12": 0.22807,
+            "13": 0.20894,
+            "14": 0.23285,
+            "15": 0.21589,
+            "16": 0.21307,
+            "17": 0.2066,
+            "18": 0.20281,
+            "19": 0.20035,
+            "20": 0.21165,
+            "21": 0.21499,
+            "22": 0.20787,
+            "23": 0.20796,
+            "24": 0.20107,
+            "25": 0.20655,
+            "26": 0.19066,
+            "27": 0.19278,
+            "28": 0.18972,
+            "29": 0.18934,
+            "30": 0.18911,
+            "31": 0.18621,
+            "32": 0.18488,
+            "33": 0.18787,
+            "34": 0.18483,
+            "35": 0.18634,
+            "36": 0.18614,
+            "37": 0.18598,
+            "38": 0.19035,
+            "39": 0.1965,
+            "40": 0.22208,
+            "41": 0.21118,
+            "42": 0.21696,
+            "43": 0.2487,
+            "44": 0.25093,
+            "45": 0.25052,
+            "46": 0.23122,
+            "47": 0.23444,
+            "48": 0.23094,
+            "49": 0.23714,
+            "50": 0.41655
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
new file mode 100644
index 00000000000..7c826222075
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.92655,
+            "2": 10.92585,
+            "3": 10.91514,
+            "4": 10.90898,
+            "5": 10.92718,
+            "6": 10.9356,
+            "7": 10.90644,
+            "8": 10.9212,
+            "9": 10.91072,
+            "10": 10.90791,
+            "11": 10.89277,
+            "12": 10.92427,
+            "13": 10.91491,
+            "14": 10.92144,
+            "15": 10.88294,
+            "16": 10.8731,
+            "17": 10.84065,
+            "18": 10.87301,
+            "19": 10.85632,
+            "20": 10.77595,
+            "21": 10.74892,
+            "22": 10.63083,
+            "23": 10.75616,
+            "24": 10.65644,
+            "25": 10.59263,
+            "26": 10.65439,
+            "27": 10.64917,
+            "28": 10.59496,
+            "29": 10.60945,
+            "30": 10.39175,
+            "31": 10.15721,
+            "32": 10.49112,
+            "33": 10.4796,
+            "34": 10.24073,
+            "35": 10.297,
+            "36": 10.24673,
+            "37": 10.35244,
+            "38": 10.20481,
+            "39": 10.40504,
+            "40": 10.09662,
+            "41": 10.15197,
+            "42": 10.22065,
+            "43": 9.85507,
+            "44": 9.96161,
+            "45": 9.84469,
+            "46": 9.83836,
+            "47": 10.14002,
+            "48": 9.85758,
+            "49": 9.53747,
+            "50": 9.90948
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1610.0,
+            "2": 1834.0,
+            "3": 1691.0,
+            "4": 1762.0,
+            "5": 1930.0,
+            "6": 1842.0,
+            "7": 1885.0,
+            "8": 1832.0,
+            "9": 1917.0,
+            "10": 1419.0,
+            "11": 1991.0,
+            "12": 1756.0,
+            "13": 2014.0,
+            "14": 1811.0,
+            "15": 1937.0,
+            "16": 1771.0,
+            "17": 1873.0,
+            "18": 1717.0,
+            "19": 1721.0,
+            "20": 1631.0,
+            "21": 1842.0,
+            "22": 1808.0,
+            "23": 1932.0,
+            "24": 1572.0,
+            "25": 1667.0,
+            "26": 1818.0,
+            "27": 1928.0,
+            "28": 2063.0,
+            "29": 2105.0,
+            "30": 1908.0,
+            "31": 1554.0,
+            "32": 1943.0,
+            "33": 2262.0,
+            "34": 1908.0,
+            "35": 1939.0,
+            "36": 2027.0,
+            "37": 2400.0,
+            "38": 2269.0,
+            "39": 2458.0,
+            "40": 2109.0,
+            "41": 2257.0,
+            "42": 2224.0,
+            "43": 2059.0,
+            "44": 2118.0,
+            "45": 2090.0,
+            "46": 2409.0,
+            "47": 2607.0,
+            "48": 2457.0,
+            "49": 2239.0,
+            "50": 2412.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 436764672.0,
+            "2": 436764672.0,
+            "3": 436764672.0,
+            "4": 436764672.0,
+            "5": 436764672.0,
+            "6": 436764672.0,
+            "7": 436764672.0,
+            "8": 436764672.0,
+            "9": 436764672.0,
+            "10": 436764672.0,
+            "11": 436764672.0,
+            "12": 436764672.0,
+            "13": 436764672.0,
+            "14": 436764672.0,
+            "15": 436764672.0,
+            "16": 436764672.0,
+            "17": 436764672.0,
+            "18": 436764672.0,
+            "19": 436764672.0,
+            "20": 436764672.0,
+            "21": 436764672.0,
+            "22": 436764672.0,
+            "23": 436764672.0,
+            "24": 436764672.0,
+            "25": 436764672.0,
+            "26": 436764672.0,
+            "27": 436764672.0,
+            "28": 436764672.0,
+            "29": 436764672.0,
+            "30": 436764672.0,
+            "31": 436764672.0,
+            "32": 436764672.0,
+            "33": 436764672.0,
+            "34": 436764672.0,
+            "35": 436764672.0,
+            "36": 436764672.0,
+            "37": 436764672.0,
+            "38": 436764672.0,
+            "39": 436764672.0,
+            "40": 436764672.0,
+            "41": 436764672.0,
+            "42": 436764672.0,
+            "43": 436764672.0,
+            "44": 436764672.0,
+            "45": 436764672.0,
+            "46": 436764672.0,
+            "47": 436764672.0,
+            "48": 436764672.0,
+            "49": 436764672.0,
+            "50": 436764672.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 2658189824.0,
+            "2": 2658189824.0,
+            "3": 2658189824.0,
+            "4": 2658189824.0,
+            "5": 2658189824.0,
+            "6": 2658189824.0,
+            "7": 2658189824.0,
+            "8": 2658189824.0,
+            "9": 2658189824.0,
+            "10": 2658189824.0,
+            "11": 2658189824.0,
+            "12": 2658189824.0,
+            "13": 2658189824.0,
+            "14": 2658189824.0,
+            "15": 2658189824.0,
+            "16": 2658189824.0,
+            "17": 2658189824.0,
+            "18": 2658189824.0,
+            "19": 2658189824.0,
+            "20": 2658189824.0,
+            "21": 2658189824.0,
+            "22": 2658189824.0,
+            "23": 2658189824.0,
+            "24": 2658189824.0,
+            "25": 2658189824.0,
+            "26": 2658189824.0,
+            "27": 2658189824.0,
+            "28": 2658189824.0,
+            "29": 2658189824.0,
+            "30": 2658189824.0,
+            "31": 2658189824.0,
+            "32": 2658189824.0,
+            "33": 2658189824.0,
+            "34": 2658189824.0,
+            "35": 2658189824.0,
+            "36": 2658189824.0,
+            "37": 2658189824.0,
+            "38": 2658189824.0,
+            "39": 2658189824.0,
+            "40": 2658189824.0,
+            "41": 2658189824.0,
+            "42": 2658189824.0,
+            "43": 2658189824.0,
+            "44": 2658189824.0,
+            "45": 2658189824.0,
+            "46": 2658189824.0,
+            "47": 2658189824.0,
+            "48": 2658189824.0,
+            "49": 2658189824.0,
+            "50": 2658189824.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 18.07715,
+            "2": 0.23504,
+            "3": 0.18606,
+            "4": 0.186,
+            "5": 0.18473,
+            "6": 0.18533,
+            "7": 0.18715,
+            "8": 0.18676,
+            "9": 0.18665,
+            "10": 0.18428,
+            "11": 0.18511,
+            "12": 0.18619,
+            "13": 0.18461,
+            "14": 0.18647,
+            "15": 0.18581,
+            "16": 0.18608,
+            "17": 0.18299,
+            "18": 0.18471,
+            "19": 0.18333,
+            "20": 0.18288,
+            "21": 0.18432,
+            "22": 0.1817,
+            "23": 0.18526,
+            "24": 0.18337,
+            "25": 0.18381,
+            "26": 0.18253,
+            "27": 0.18309,
+            "28": 0.18721,
+            "29": 0.18268,
+            "30": 0.1853,
+            "31": 0.18365,
+            "32": 0.18239,
+            "33": 0.18174,
+            "34": 0.1823,
+            "35": 0.18255,
+            "36": 0.18445,
+            "37": 0.18019,
+            "38": 0.18127,
+            "39": 0.18126,
+            "40": 0.18097,
+            "41": 0.18271,
+            "42": 0.18269,
+            "43": 0.182,
+            "44": 0.18282,
+            "45": 0.18347,
+            "46": 0.18363,
+            "47": 0.18571,
+            "48": 0.18216,
+            "49": 0.18221,
+            "50": 0.18026
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_mla_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_mla_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
index f96b534490d..bac5baf3a43 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_mla_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_mla_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
@@ -2,91 +2,286 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 10.92337,
+            "2": 10.91811,
+            "3": 10.91506,
+            "4": 10.92436,
             "5": 10.92089,
+            "6": 10.92887,
+            "7": 10.92681,
+            "8": 10.91989,
+            "9": 10.92227,
             "10": 10.92192,
+            "11": 10.918,
+            "12": 10.9238,
+            "13": 10.92406,
+            "14": 10.90862,
             "15": 10.92351,
+            "16": 10.91807,
+            "17": 10.9154,
+            "18": 10.91265,
+            "19": 10.9091,
             "20": 10.90031,
+            "21": 10.8959,
+            "22": 10.8828,
+            "23": 10.89975,
+            "24": 10.88437,
             "25": 10.87827,
+            "26": 10.88155,
+            "27": 10.88649,
+            "28": 10.85679,
+            "29": 10.85657,
             "30": 10.81423,
+            "31": 10.76651,
+            "32": 10.83131,
+            "33": 10.83158,
+            "34": 10.78071,
             "35": 10.78865,
+            "36": 10.78003,
+            "37": 10.80446,
+            "38": 10.72434,
+            "39": 10.78066,
             "40": 10.65927,
+            "41": 10.69208,
+            "42": 10.70973,
+            "43": 10.56128,
+            "44": 10.61369,
             "45": 10.56875,
+            "46": 10.54455,
+            "47": 10.66751,
+            "48": 10.53792,
+            "49": 10.40861,
             "50": 10.55421
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 22791636.0,
+            "2": 22989424.0,
+            "3": 22661212.0,
+            "4": 23283558.0,
             "5": 22778528.0,
+            "6": 23085340.0,
+            "7": 22834596.0,
+            "8": 22990452.0,
+            "9": 22906466.0,
             "10": 22983232.0,
+            "11": 22564584.0,
+            "12": 22524010.0,
+            "13": 22981124.0,
+            "14": 22453096.0,
             "15": 22886400.0,
+            "16": 22895424.0,
+            "17": 22883736.0,
+            "18": 22647090.0,
+            "19": 22682526.0,
             "20": 22758358.0,
+            "21": 22804276.0,
+            "22": 22863814.0,
+            "23": 22603616.0,
+            "24": 22835172.0,
             "25": 22883742.0,
+            "26": 22611358.0,
+            "27": 22532968.0,
+            "28": 22517794.0,
+            "29": 22593448.0,
             "30": 22695256.0,
+            "31": 23019472.0,
+            "32": 22648896.0,
+            "33": 22622516.0,
+            "34": 22899620.0,
             "35": 22851572.0,
+            "36": 22653160.0,
+            "37": 22560476.0,
+            "38": 22960058.0,
+            "39": 22865476.0,
             "40": 22721680.0,
+            "41": 22723112.0,
+            "42": 22730726.0,
+            "43": 23039588.0,
+            "44": 22810020.0,
             "45": 22738904.0,
+            "46": 22948334.0,
+            "47": 22696668.0,
+            "48": 22992832.0,
+            "49": 22791208.0,
             "50": 22968272.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 387744256.0,
+            "2": 387744256.0,
+            "3": 387744256.0,
+            "4": 387744256.0,
             "5": 387744256.0,
+            "6": 387744256.0,
+            "7": 387744256.0,
+            "8": 387744256.0,
+            "9": 387744256.0,
             "10": 387744256.0,
+            "11": 387744256.0,
+            "12": 387744256.0,
+            "13": 387744256.0,
+            "14": 387744256.0,
             "15": 387744256.0,
+            "16": 387744256.0,
+            "17": 387744256.0,
+            "18": 387744256.0,
+            "19": 387744256.0,
             "20": 387744256.0,
+            "21": 387744256.0,
+            "22": 387744256.0,
+            "23": 387744256.0,
+            "24": 387744256.0,
             "25": 387744256.0,
+            "26": 387744256.0,
+            "27": 387744256.0,
+            "28": 387744256.0,
+            "29": 387744256.0,
             "30": 387744256.0,
+            "31": 387744256.0,
+            "32": 387744256.0,
+            "33": 387744256.0,
+            "34": 387744256.0,
             "35": 387744256.0,
+            "36": 387744256.0,
+            "37": 387744256.0,
+            "38": 387744256.0,
+            "39": 387744256.0,
             "40": 387744256.0,
+            "41": 387744256.0,
+            "42": 387744256.0,
+            "43": 387744256.0,
+            "44": 387744256.0,
             "45": 387744256.0,
+            "46": 387744256.0,
+            "47": 387744256.0,
+            "48": 387744256.0,
+            "49": 387744256.0,
             "50": 387744256.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 1122646528.0,
+            "2": 1245896192.0,
+            "3": 1245896192.0,
+            "4": 1245896192.0,
             "5": 1245896192.0,
+            "6": 1245896192.0,
+            "7": 1245896192.0,
+            "8": 1245896192.0,
+            "9": 1245896192.0,
             "10": 1245896192.0,
+            "11": 1245896192.0,
+            "12": 1245896192.0,
+            "13": 1245896192.0,
+            "14": 1245896192.0,
             "15": 1245896192.0,
+            "16": 1245896192.0,
+            "17": 1245896192.0,
+            "18": 1245896192.0,
+            "19": 1245896192.0,
             "20": 1245896192.0,
+            "21": 1245896192.0,
+            "22": 1245896192.0,
+            "23": 1245896192.0,
+            "24": 1245896192.0,
             "25": 1245896192.0,
+            "26": 1245896192.0,
+            "27": 1245896192.0,
+            "28": 1245896192.0,
+            "29": 1245896192.0,
             "30": 1245896192.0,
+            "31": 1245896192.0,
+            "32": 1245896192.0,
+            "33": 1245896192.0,
+            "34": 1245896192.0,
             "35": 1245896192.0,
+            "36": 1245896192.0,
+            "37": 1245896192.0,
+            "38": 1245896192.0,
+            "39": 1245896192.0,
             "40": 1245896192.0,
+            "41": 1245896192.0,
+            "42": 1245896192.0,
+            "43": 1245896192.0,
+            "44": 1245896192.0,
             "45": 1245896192.0,
+            "46": 1245896192.0,
+            "47": 1245896192.0,
+            "48": 1245896192.0,
+            "49": 1245896192.0,
             "50": 1245896192.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 9.91153,
-            "5": 0.10105,
-            "10": 0.09991,
-            "15": 0.09967,
-            "20": 0.10034,
-            "25": 0.10389,
-            "30": 0.10155,
-            "35": 0.11161,
-            "40": 0.10351,
-            "45": 0.10165,
-            "50": 0.10213
+            "1": 10.00615,
+            "2": 0.13355,
+            "3": 0.1156,
+            "4": 0.11748,
+            "5": 0.11709,
+            "6": 0.11516,
+            "7": 0.11746,
+            "8": 0.11799,
+            "9": 0.11829,
+            "10": 0.11844,
+            "11": 0.11847,
+            "12": 0.12334,
+            "13": 0.12621,
+            "14": 0.1244,
+            "15": 0.11572,
+            "16": 0.11683,
+            "17": 0.11639,
+            "18": 0.11916,
+            "19": 0.1174,
+            "20": 0.11558,
+            "21": 0.11518,
+            "22": 0.1165,
+            "23": 0.11972,
+            "24": 0.12052,
+            "25": 0.11938,
+            "26": 0.125,
+            "27": 0.11874,
+            "28": 0.11938,
+            "29": 0.11733,
+            "30": 0.11731,
+            "31": 0.11777,
+            "32": 0.11704,
+            "33": 0.121,
+            "34": 0.12101,
+            "35": 0.11619,
+            "36": 0.11824,
+            "37": 0.11821,
+            "38": 0.11953,
+            "39": 0.11906,
+            "40": 0.118,
+            "41": 0.11938,
+            "42": 0.11873,
+            "43": 0.11887,
+            "44": 0.11808,
+            "45": 0.11848,
+            "46": 0.12012,
+            "47": 0.11741,
+            "48": 0.11744,
+            "49": 0.11829,
+            "50": 0.11954
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_mla_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_mla_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..5f5b4095502
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_mla_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.92337,
+            "2": 10.91811,
+            "3": 10.91506,
+            "4": 10.92436,
+            "5": 10.92089,
+            "6": 10.92887,
+            "7": 10.92681,
+            "8": 10.91989,
+            "9": 10.92227,
+            "10": 10.92192,
+            "11": 10.918,
+            "12": 10.9238,
+            "13": 10.92406,
+            "14": 10.90862,
+            "15": 10.92351,
+            "16": 10.91807,
+            "17": 10.9154,
+            "18": 10.91265,
+            "19": 10.9091,
+            "20": 10.90031,
+            "21": 10.8959,
+            "22": 10.8828,
+            "23": 10.89975,
+            "24": 10.88437,
+            "25": 10.87827,
+            "26": 10.88155,
+            "27": 10.88649,
+            "28": 10.85679,
+            "29": 10.85657,
+            "30": 10.81423,
+            "31": 10.76651,
+            "32": 10.83131,
+            "33": 10.83158,
+            "34": 10.78071,
+            "35": 10.78865,
+            "36": 10.78003,
+            "37": 10.80446,
+            "38": 10.72434,
+            "39": 10.78066,
+            "40": 10.65927,
+            "41": 10.69208,
+            "42": 10.70973,
+            "43": 10.56128,
+            "44": 10.61369,
+            "45": 10.56875,
+            "46": 10.54455,
+            "47": 10.66751,
+            "48": 10.53792,
+            "49": 10.40861,
+            "50": 10.55421
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 22791636.0,
+            "2": 22989424.0,
+            "3": 22661212.0,
+            "4": 23283558.0,
+            "5": 22778528.0,
+            "6": 23085340.0,
+            "7": 22834596.0,
+            "8": 22990452.0,
+            "9": 22906466.0,
+            "10": 22983232.0,
+            "11": 22564584.0,
+            "12": 22524010.0,
+            "13": 22981124.0,
+            "14": 22453096.0,
+            "15": 22886400.0,
+            "16": 22895424.0,
+            "17": 22883736.0,
+            "18": 22647090.0,
+            "19": 22682526.0,
+            "20": 22758358.0,
+            "21": 22804276.0,
+            "22": 22863814.0,
+            "23": 22603616.0,
+            "24": 22835172.0,
+            "25": 22883742.0,
+            "26": 22611358.0,
+            "27": 22532968.0,
+            "28": 22517794.0,
+            "29": 22593448.0,
+            "30": 22695256.0,
+            "31": 23019472.0,
+            "32": 22648896.0,
+            "33": 22622516.0,
+            "34": 22899620.0,
+            "35": 22851572.0,
+            "36": 22653160.0,
+            "37": 22560476.0,
+            "38": 22960058.0,
+            "39": 22865476.0,
+            "40": 22721680.0,
+            "41": 22723112.0,
+            "42": 22730726.0,
+            "43": 23039588.0,
+            "44": 22810020.0,
+            "45": 22738904.0,
+            "46": 22948334.0,
+            "47": 22696668.0,
+            "48": 22992832.0,
+            "49": 22791208.0,
+            "50": 22968272.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 387744256.0,
+            "2": 387744256.0,
+            "3": 387744256.0,
+            "4": 387744256.0,
+            "5": 387744256.0,
+            "6": 387744256.0,
+            "7": 387744256.0,
+            "8": 387744256.0,
+            "9": 387744256.0,
+            "10": 387744256.0,
+            "11": 387744256.0,
+            "12": 387744256.0,
+            "13": 387744256.0,
+            "14": 387744256.0,
+            "15": 387744256.0,
+            "16": 387744256.0,
+            "17": 387744256.0,
+            "18": 387744256.0,
+            "19": 387744256.0,
+            "20": 387744256.0,
+            "21": 387744256.0,
+            "22": 387744256.0,
+            "23": 387744256.0,
+            "24": 387744256.0,
+            "25": 387744256.0,
+            "26": 387744256.0,
+            "27": 387744256.0,
+            "28": 387744256.0,
+            "29": 387744256.0,
+            "30": 387744256.0,
+            "31": 387744256.0,
+            "32": 387744256.0,
+            "33": 387744256.0,
+            "34": 387744256.0,
+            "35": 387744256.0,
+            "36": 387744256.0,
+            "37": 387744256.0,
+            "38": 387744256.0,
+            "39": 387744256.0,
+            "40": 387744256.0,
+            "41": 387744256.0,
+            "42": 387744256.0,
+            "43": 387744256.0,
+            "44": 387744256.0,
+            "45": 387744256.0,
+            "46": 387744256.0,
+            "47": 387744256.0,
+            "48": 387744256.0,
+            "49": 387744256.0,
+            "50": 387744256.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1122646528.0,
+            "2": 1245896192.0,
+            "3": 1245896192.0,
+            "4": 1245896192.0,
+            "5": 1245896192.0,
+            "6": 1245896192.0,
+            "7": 1245896192.0,
+            "8": 1245896192.0,
+            "9": 1245896192.0,
+            "10": 1245896192.0,
+            "11": 1245896192.0,
+            "12": 1245896192.0,
+            "13": 1245896192.0,
+            "14": 1245896192.0,
+            "15": 1245896192.0,
+            "16": 1245896192.0,
+            "17": 1245896192.0,
+            "18": 1245896192.0,
+            "19": 1245896192.0,
+            "20": 1245896192.0,
+            "21": 1245896192.0,
+            "22": 1245896192.0,
+            "23": 1245896192.0,
+            "24": 1245896192.0,
+            "25": 1245896192.0,
+            "26": 1245896192.0,
+            "27": 1245896192.0,
+            "28": 1245896192.0,
+            "29": 1245896192.0,
+            "30": 1245896192.0,
+            "31": 1245896192.0,
+            "32": 1245896192.0,
+            "33": 1245896192.0,
+            "34": 1245896192.0,
+            "35": 1245896192.0,
+            "36": 1245896192.0,
+            "37": 1245896192.0,
+            "38": 1245896192.0,
+            "39": 1245896192.0,
+            "40": 1245896192.0,
+            "41": 1245896192.0,
+            "42": 1245896192.0,
+            "43": 1245896192.0,
+            "44": 1245896192.0,
+            "45": 1245896192.0,
+            "46": 1245896192.0,
+            "47": 1245896192.0,
+            "48": 1245896192.0,
+            "49": 1245896192.0,
+            "50": 1245896192.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 9.48646,
+            "2": 0.13915,
+            "3": 0.11332,
+            "4": 0.11062,
+            "5": 0.10601,
+            "6": 0.10405,
+            "7": 0.10505,
+            "8": 0.10406,
+            "9": 0.10505,
+            "10": 0.10412,
+            "11": 0.1027,
+            "12": 0.10452,
+            "13": 0.10273,
+            "14": 0.10271,
+            "15": 0.10391,
+            "16": 0.10227,
+            "17": 0.10295,
+            "18": 0.10375,
+            "19": 0.10202,
+            "20": 0.10246,
+            "21": 0.10149,
+            "22": 0.1037,
+            "23": 0.10264,
+            "24": 0.10318,
+            "25": 0.10409,
+            "26": 0.11044,
+            "27": 0.10485,
+            "28": 0.10691,
+            "29": 0.10499,
+            "30": 0.10361,
+            "31": 0.10501,
+            "32": 0.10466,
+            "33": 0.1048,
+            "34": 0.10456,
+            "35": 0.10388,
+            "36": 0.10498,
+            "37": 0.10375,
+            "38": 0.10297,
+            "39": 0.10174,
+            "40": 0.10044,
+            "41": 0.10196,
+            "42": 0.10494,
+            "43": 0.10303,
+            "44": 0.10254,
+            "45": 0.10314,
+            "46": 0.10306,
+            "47": 0.10329,
+            "48": 0.10445,
+            "49": 0.10543,
+            "50": 0.1043
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_mla_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_mla_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..d0103111a28
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_mla_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.92337,
+            "2": 10.91811,
+            "3": 10.91506,
+            "4": 10.92436,
+            "5": 10.92089,
+            "6": 10.92887,
+            "7": 10.92681,
+            "8": 10.91989,
+            "9": 10.92227,
+            "10": 10.92192,
+            "11": 10.918,
+            "12": 10.9238,
+            "13": 10.92406,
+            "14": 10.90862,
+            "15": 10.92351,
+            "16": 10.91807,
+            "17": 10.9154,
+            "18": 10.91265,
+            "19": 10.9091,
+            "20": 10.90031,
+            "21": 10.8959,
+            "22": 10.8828,
+            "23": 10.89975,
+            "24": 10.88437,
+            "25": 10.87827,
+            "26": 10.88155,
+            "27": 10.88649,
+            "28": 10.85679,
+            "29": 10.85657,
+            "30": 10.81423,
+            "31": 10.76651,
+            "32": 10.83131,
+            "33": 10.83158,
+            "34": 10.78071,
+            "35": 10.78865,
+            "36": 10.78003,
+            "37": 10.80446,
+            "38": 10.72434,
+            "39": 10.78066,
+            "40": 10.65927,
+            "41": 10.69208,
+            "42": 10.70973,
+            "43": 10.56128,
+            "44": 10.61369,
+            "45": 10.56875,
+            "46": 10.54455,
+            "47": 10.66751,
+            "48": 10.53792,
+            "49": 10.40861,
+            "50": 10.55421
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 22791636.0,
+            "2": 22989424.0,
+            "3": 22661212.0,
+            "4": 23283558.0,
+            "5": 22778528.0,
+            "6": 23085340.0,
+            "7": 22834596.0,
+            "8": 22990452.0,
+            "9": 22906466.0,
+            "10": 22983232.0,
+            "11": 22564584.0,
+            "12": 22524010.0,
+            "13": 22981124.0,
+            "14": 22453096.0,
+            "15": 22886400.0,
+            "16": 22895424.0,
+            "17": 22883736.0,
+            "18": 22647090.0,
+            "19": 22682526.0,
+            "20": 22758358.0,
+            "21": 22804276.0,
+            "22": 22863814.0,
+            "23": 22603616.0,
+            "24": 22835172.0,
+            "25": 22883742.0,
+            "26": 22611358.0,
+            "27": 22532968.0,
+            "28": 22517794.0,
+            "29": 22593448.0,
+            "30": 22695256.0,
+            "31": 23019472.0,
+            "32": 22648896.0,
+            "33": 22622516.0,
+            "34": 22899620.0,
+            "35": 22851572.0,
+            "36": 22653160.0,
+            "37": 22560476.0,
+            "38": 22960058.0,
+            "39": 22865476.0,
+            "40": 22721680.0,
+            "41": 22723112.0,
+            "42": 22730726.0,
+            "43": 23039588.0,
+            "44": 22810020.0,
+            "45": 22738904.0,
+            "46": 22948334.0,
+            "47": 22696668.0,
+            "48": 22992832.0,
+            "49": 22791208.0,
+            "50": 22968272.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 387744256.0,
+            "2": 387744256.0,
+            "3": 387744256.0,
+            "4": 387744256.0,
+            "5": 387744256.0,
+            "6": 387744256.0,
+            "7": 387744256.0,
+            "8": 387744256.0,
+            "9": 387744256.0,
+            "10": 387744256.0,
+            "11": 387744256.0,
+            "12": 387744256.0,
+            "13": 387744256.0,
+            "14": 387744256.0,
+            "15": 387744256.0,
+            "16": 387744256.0,
+            "17": 387744256.0,
+            "18": 387744256.0,
+            "19": 387744256.0,
+            "20": 387744256.0,
+            "21": 387744256.0,
+            "22": 387744256.0,
+            "23": 387744256.0,
+            "24": 387744256.0,
+            "25": 387744256.0,
+            "26": 387744256.0,
+            "27": 387744256.0,
+            "28": 387744256.0,
+            "29": 387744256.0,
+            "30": 387744256.0,
+            "31": 387744256.0,
+            "32": 387744256.0,
+            "33": 387744256.0,
+            "34": 387744256.0,
+            "35": 387744256.0,
+            "36": 387744256.0,
+            "37": 387744256.0,
+            "38": 387744256.0,
+            "39": 387744256.0,
+            "40": 387744256.0,
+            "41": 387744256.0,
+            "42": 387744256.0,
+            "43": 387744256.0,
+            "44": 387744256.0,
+            "45": 387744256.0,
+            "46": 387744256.0,
+            "47": 387744256.0,
+            "48": 387744256.0,
+            "49": 387744256.0,
+            "50": 387744256.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1122646528.0,
+            "2": 1245896192.0,
+            "3": 1245896192.0,
+            "4": 1245896192.0,
+            "5": 1245896192.0,
+            "6": 1245896192.0,
+            "7": 1245896192.0,
+            "8": 1245896192.0,
+            "9": 1245896192.0,
+            "10": 1245896192.0,
+            "11": 1245896192.0,
+            "12": 1245896192.0,
+            "13": 1245896192.0,
+            "14": 1245896192.0,
+            "15": 1245896192.0,
+            "16": 1245896192.0,
+            "17": 1245896192.0,
+            "18": 1245896192.0,
+            "19": 1245896192.0,
+            "20": 1245896192.0,
+            "21": 1245896192.0,
+            "22": 1245896192.0,
+            "23": 1245896192.0,
+            "24": 1245896192.0,
+            "25": 1245896192.0,
+            "26": 1245896192.0,
+            "27": 1245896192.0,
+            "28": 1245896192.0,
+            "29": 1245896192.0,
+            "30": 1245896192.0,
+            "31": 1245896192.0,
+            "32": 1245896192.0,
+            "33": 1245896192.0,
+            "34": 1245896192.0,
+            "35": 1245896192.0,
+            "36": 1245896192.0,
+            "37": 1245896192.0,
+            "38": 1245896192.0,
+            "39": 1245896192.0,
+            "40": 1245896192.0,
+            "41": 1245896192.0,
+            "42": 1245896192.0,
+            "43": 1245896192.0,
+            "44": 1245896192.0,
+            "45": 1245896192.0,
+            "46": 1245896192.0,
+            "47": 1245896192.0,
+            "48": 1245896192.0,
+            "49": 1245896192.0,
+            "50": 1245896192.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 9.86323,
+            "2": 0.13474,
+            "3": 0.1236,
+            "4": 0.12168,
+            "5": 0.12406,
+            "6": 0.12501,
+            "7": 0.12711,
+            "8": 0.12778,
+            "9": 0.12839,
+            "10": 0.12143,
+            "11": 0.12109,
+            "12": 0.12077,
+            "13": 0.11905,
+            "14": 0.12184,
+            "15": 0.12152,
+            "16": 0.11812,
+            "17": 0.11693,
+            "18": 0.11549,
+            "19": 0.11712,
+            "20": 0.11675,
+            "21": 0.11877,
+            "22": 0.11837,
+            "23": 0.11757,
+            "24": 0.11636,
+            "25": 0.11722,
+            "26": 0.12393,
+            "27": 0.11736,
+            "28": 0.11759,
+            "29": 0.11945,
+            "30": 0.11726,
+            "31": 0.12096,
+            "32": 0.12206,
+            "33": 0.11734,
+            "34": 0.11894,
+            "35": 0.11695,
+            "36": 0.11712,
+            "37": 0.11489,
+            "38": 0.11866,
+            "39": 0.11749,
+            "40": 0.11829,
+            "41": 0.11674,
+            "42": 0.1181,
+            "43": 0.11808,
+            "44": 0.11621,
+            "45": 0.11832,
+            "46": 0.12031,
+            "47": 0.12023,
+            "48": 0.11643,
+            "49": 0.11855,
+            "50": 0.11792
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
index 92e4f61f204..4fc4344a2e0 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
@@ -2,141 +2,536 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 10.86535,
-            "5": 10.87857,
-            "10": 10.8298,
-            "15": 10.82054,
-            "20": 10.70396,
-            "25": 10.49423,
-            "30": 10.30551,
-            "35": 10.20189,
-            "40": 10.01906,
+            "2": 10.85873,
+            "3": 10.86284,
+            "4": 10.84005,
+            "5": 10.87854,
+            "6": 10.8885,
+            "7": 10.86534,
+            "8": 10.86017,
+            "9": 10.85988,
+            "10": 10.82978,
+            "11": 10.88948,
+            "12": 10.8751,
+            "13": 10.87424,
+            "14": 10.89677,
+            "15": 10.82052,
+            "16": 10.82497,
+            "17": 10.78983,
+            "18": 10.81028,
+            "19": 10.80533,
+            "20": 10.70398,
+            "21": 10.66993,
+            "22": 10.50641,
+            "23": 10.69004,
+            "24": 10.56313,
+            "25": 10.49419,
+            "26": 10.56627,
+            "27": 10.58027,
+            "28": 10.51571,
+            "29": 10.55294,
+            "30": 10.3055,
+            "31": 10.02244,
+            "32": 10.40616,
+            "33": 10.39877,
+            "34": 10.13771,
+            "35": 10.20185,
+            "36": 10.16052,
+            "37": 10.28974,
+            "38": 10.11478,
+            "39": 10.36102,
+            "40": 10.01901,
+            "41": 10.07288,
+            "42": 10.14698,
+            "43": 9.74686,
+            "44": 9.87764,
             "45": 9.74965,
-            "50": 9.83991,
-            "55": 9.81661,
-            "60": 9.43542,
-            "65": 8.87157,
+            "46": 9.73383,
+            "47": 10.07534,
+            "48": 9.78068,
+            "49": 9.4478,
+            "50": 9.8399,
+            "51": 9.78024,
+            "52": 9.67265,
+            "53": 10.02013,
+            "54": 9.8979,
+            "55": 9.81663,
+            "56": 9.56041,
+            "57": 9.4118,
+            "58": 9.77417,
+            "59": 9.51799,
+            "60": 9.43538,
+            "61": 9.64483,
+            "62": 9.93002,
+            "63": 9.30912,
+            "64": 9.72066,
+            "65": 8.87152,
+            "66": 9.64433,
+            "67": 9.31332,
+            "68": 9.74069,
+            "69": 9.75327,
             "70": 9.70004,
-            "75": 9.37312,
-            "80": 9.36163,
-            "85": 9.5694,
-            "90": 9.78468,
+            "71": 9.56557,
+            "72": 9.53091,
+            "73": 9.44385,
+            "74": 8.8678,
+            "75": 9.37308,
+            "76": 9.01275,
+            "77": 10.02855,
+            "78": 9.68739,
+            "79": 9.32795,
+            "80": 9.36169,
+            "81": 9.43364,
+            "82": 9.66094,
+            "83": 9.25137,
+            "84": 9.37353,
+            "85": 9.56936,
+            "86": 9.03179,
+            "87": 9.55585,
+            "88": 9.71056,
+            "89": 9.55398,
+            "90": 9.78472,
+            "91": 9.29079,
+            "92": 9.31245,
+            "93": 9.03137,
+            "94": 8.78667,
             "95": 9.4873,
+            "96": 9.49052,
+            "97": 9.26686,
+            "98": 9.63648,
+            "99": 8.84331,
             "100": 9.3555
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 595.0,
-            "5": 623.0,
-            "10": 551.0,
-            "15": 632.0,
-            "20": 621.0,
-            "25": 581.0,
-            "30": 691.0,
-            "35": 739.0,
-            "40": 812.0,
-            "45": 829.0,
-            "50": 869.0,
-            "55": 909.0,
-            "60": 832.0,
-            "65": 936.0,
-            "70": 1050.0,
-            "75": 816.0,
-            "80": 1140.0,
-            "85": 1203.0,
-            "90": 1108.0,
-            "95": 1190.0,
-            "100": 1117.0
+            "1": 603.0,
+            "2": 642.0,
+            "3": 648.0,
+            "4": 599.0,
+            "5": 644.0,
+            "6": 645.0,
+            "7": 625.0,
+            "8": 544.0,
+            "9": 657.0,
+            "10": 536.0,
+            "11": 673.0,
+            "12": 618.0,
+            "13": 646.0,
+            "14": 683.0,
+            "15": 639.0,
+            "16": 616.0,
+            "17": 656.0,
+            "18": 579.0,
+            "19": 637.0,
+            "20": 628.0,
+            "21": 672.0,
+            "22": 627.0,
+            "23": 744.0,
+            "24": 610.0,
+            "25": 578.0,
+            "26": 602.0,
+            "27": 633.0,
+            "28": 750.0,
+            "29": 709.0,
+            "30": 736.0,
+            "31": 626.0,
+            "32": 716.0,
+            "33": 754.0,
+            "34": 692.0,
+            "35": 707.0,
+            "36": 733.0,
+            "37": 797.0,
+            "38": 813.0,
+            "39": 878.0,
+            "40": 807.0,
+            "41": 808.0,
+            "42": 831.0,
+            "43": 703.0,
+            "44": 810.0,
+            "45": 768.0,
+            "46": 858.0,
+            "47": 879.0,
+            "48": 856.0,
+            "49": 814.0,
+            "50": 862.0,
+            "51": 928.0,
+            "52": 1001.0,
+            "53": 1019.0,
+            "54": 978.0,
+            "55": 917.0,
+            "56": 1023.0,
+            "57": 835.0,
+            "58": 1020.0,
+            "59": 1033.0,
+            "60": 900.0,
+            "61": 998.0,
+            "62": 966.0,
+            "63": 933.0,
+            "64": 1084.0,
+            "65": 960.0,
+            "66": 1081.0,
+            "67": 1043.0,
+            "68": 1032.0,
+            "69": 1029.0,
+            "70": 1108.0,
+            "71": 1123.0,
+            "72": 848.0,
+            "73": 991.0,
+            "74": 685.0,
+            "75": 878.0,
+            "76": 1149.0,
+            "77": 1198.0,
+            "78": 1087.0,
+            "79": 1095.0,
+            "80": 1114.0,
+            "81": 1229.0,
+            "82": 1048.0,
+            "83": 1002.0,
+            "84": 1115.0,
+            "85": 1228.0,
+            "86": 896.0,
+            "87": 1212.0,
+            "88": 1039.0,
+            "89": 1111.0,
+            "90": 1085.0,
+            "91": 1140.0,
+            "92": 1186.0,
+            "93": 896.0,
+            "94": 1148.0,
+            "95": 1102.0,
+            "96": 1113.0,
+            "97": 1002.0,
+            "98": 1267.0,
+            "99": 1178.0,
+            "100": 1179.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 510689792.0,
+            "2": 510689792.0,
+            "3": 510689792.0,
+            "4": 510689792.0,
             "5": 510689792.0,
+            "6": 510689792.0,
+            "7": 510689792.0,
+            "8": 510689792.0,
+            "9": 510689792.0,
             "10": 510689792.0,
+            "11": 510689792.0,
+            "12": 510689792.0,
+            "13": 510689792.0,
+            "14": 510689792.0,
             "15": 510689792.0,
+            "16": 510689792.0,
+            "17": 510689792.0,
+            "18": 510689792.0,
+            "19": 510689792.0,
             "20": 510689792.0,
+            "21": 510689792.0,
+            "22": 510689792.0,
+            "23": 510689792.0,
+            "24": 510689792.0,
             "25": 510689792.0,
+            "26": 510689792.0,
+            "27": 510689792.0,
+            "28": 510689792.0,
+            "29": 510689792.0,
             "30": 510689792.0,
+            "31": 510689792.0,
+            "32": 510689792.0,
+            "33": 510689792.0,
+            "34": 510689792.0,
             "35": 510689792.0,
+            "36": 510689792.0,
+            "37": 510689792.0,
+            "38": 510689792.0,
+            "39": 510689792.0,
             "40": 510689792.0,
+            "41": 510689792.0,
+            "42": 510689792.0,
+            "43": 510689792.0,
+            "44": 510689792.0,
             "45": 510689792.0,
+            "46": 510689792.0,
+            "47": 510689792.0,
+            "48": 510689792.0,
+            "49": 510689792.0,
             "50": 510689792.0,
+            "51": 510689792.0,
+            "52": 510689792.0,
+            "53": 510689792.0,
+            "54": 510689792.0,
             "55": 510689792.0,
+            "56": 510689792.0,
+            "57": 510689792.0,
+            "58": 510689792.0,
+            "59": 510689792.0,
             "60": 510689792.0,
+            "61": 510689792.0,
+            "62": 510689792.0,
+            "63": 510689792.0,
+            "64": 510689792.0,
             "65": 510689792.0,
+            "66": 510689792.0,
+            "67": 510689792.0,
+            "68": 510689792.0,
+            "69": 510689792.0,
             "70": 510689792.0,
+            "71": 510689792.0,
+            "72": 510689792.0,
+            "73": 510689792.0,
+            "74": 510689792.0,
             "75": 510689792.0,
+            "76": 510689792.0,
+            "77": 510689792.0,
+            "78": 510689792.0,
+            "79": 510689792.0,
             "80": 510689792.0,
+            "81": 510689792.0,
+            "82": 510689792.0,
+            "83": 510689792.0,
+            "84": 510689792.0,
             "85": 510689792.0,
+            "86": 510689792.0,
+            "87": 510689792.0,
+            "88": 510689792.0,
+            "89": 510689792.0,
             "90": 510689792.0,
+            "91": 510689792.0,
+            "92": 510689792.0,
+            "93": 510689792.0,
+            "94": 510689792.0,
             "95": 510689792.0,
+            "96": 510689792.0,
+            "97": 510689792.0,
+            "98": 510689792.0,
+            "99": 510689792.0,
             "100": 510689792.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 759895552.0,
+            "2": 933156352.0,
+            "3": 933156352.0,
+            "4": 933156352.0,
             "5": 933156352.0,
+            "6": 933156352.0,
+            "7": 933156352.0,
+            "8": 933156352.0,
+            "9": 933156352.0,
             "10": 933156352.0,
+            "11": 933156352.0,
+            "12": 933156352.0,
+            "13": 933156352.0,
+            "14": 933156352.0,
             "15": 933156352.0,
+            "16": 933156352.0,
+            "17": 933156352.0,
+            "18": 933156352.0,
+            "19": 933156352.0,
             "20": 933156352.0,
+            "21": 933156352.0,
+            "22": 933156352.0,
+            "23": 933156352.0,
+            "24": 933156352.0,
             "25": 933156352.0,
+            "26": 933156352.0,
+            "27": 933156352.0,
+            "28": 933156352.0,
+            "29": 933156352.0,
             "30": 933156352.0,
+            "31": 933156352.0,
+            "32": 933156352.0,
+            "33": 933156352.0,
+            "34": 933156352.0,
             "35": 933156352.0,
+            "36": 933156352.0,
+            "37": 933156352.0,
+            "38": 933156352.0,
+            "39": 933156352.0,
             "40": 933156352.0,
+            "41": 933156352.0,
+            "42": 933156352.0,
+            "43": 933156352.0,
+            "44": 933156352.0,
             "45": 933156352.0,
+            "46": 933156352.0,
+            "47": 933156352.0,
+            "48": 933156352.0,
+            "49": 933156352.0,
             "50": 933156352.0,
+            "51": 933156352.0,
+            "52": 933156352.0,
+            "53": 933156352.0,
+            "54": 933156352.0,
             "55": 933156352.0,
+            "56": 933156352.0,
+            "57": 933156352.0,
+            "58": 933156352.0,
+            "59": 933156352.0,
             "60": 933156352.0,
+            "61": 933156352.0,
+            "62": 933156352.0,
+            "63": 933156352.0,
+            "64": 933156352.0,
             "65": 933156352.0,
+            "66": 933156352.0,
+            "67": 933156352.0,
+            "68": 933156352.0,
+            "69": 933156352.0,
             "70": 933156352.0,
+            "71": 933156352.0,
+            "72": 933156352.0,
+            "73": 933156352.0,
+            "74": 933156352.0,
             "75": 933156352.0,
+            "76": 933156352.0,
+            "77": 933156352.0,
+            "78": 933156352.0,
+            "79": 933156352.0,
             "80": 933156352.0,
+            "81": 933156352.0,
+            "82": 933156352.0,
+            "83": 933156352.0,
+            "84": 933156352.0,
             "85": 933156352.0,
+            "86": 933156352.0,
+            "87": 933156352.0,
+            "88": 933156352.0,
+            "89": 933156352.0,
             "90": 933156352.0,
+            "91": 933156352.0,
+            "92": 933156352.0,
+            "93": 933156352.0,
+            "94": 933156352.0,
             "95": 933156352.0,
+            "96": 933156352.0,
+            "97": 933156352.0,
+            "98": 933156352.0,
+            "99": 933156352.0,
             "100": 933156352.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 14.34885,
-            "5": 0.28143,
-            "10": 0.28313,
-            "15": 0.27848,
-            "20": 0.28429,
-            "25": 0.28541,
-            "30": 0.28319,
-            "35": 0.28404,
-            "40": 0.28308,
-            "45": 0.27994,
-            "50": 0.28525,
-            "55": 0.2917,
-            "60": 0.29133,
-            "65": 0.28566,
-            "70": 0.29027,
-            "75": 0.28604,
-            "80": 0.29548,
-            "85": 0.28726,
-            "90": 0.28624,
-            "95": 0.2883,
-            "100": 0.29017
+            "1": 16.11625,
+            "2": 0.36631,
+            "3": 0.34354,
+            "4": 0.34024,
+            "5": 0.33469,
+            "6": 0.3419,
+            "7": 0.33228,
+            "8": 0.32074,
+            "9": 0.32378,
+            "10": 0.32158,
+            "11": 0.32213,
+            "12": 0.32775,
+            "13": 0.32607,
+            "14": 0.32118,
+            "15": 0.3245,
+            "16": 0.3215,
+            "17": 0.32118,
+            "18": 0.32636,
+            "19": 0.32325,
+            "20": 0.32277,
+            "21": 0.32375,
+            "22": 0.32539,
+            "23": 0.32026,
+            "24": 0.32491,
+            "25": 0.32391,
+            "26": 0.32302,
+            "27": 0.32176,
+            "28": 0.32809,
+            "29": 0.32603,
+            "30": 0.3249,
+            "31": 0.33977,
+            "32": 0.34038,
+            "33": 0.34031,
+            "34": 0.32189,
+            "35": 0.32635,
+            "36": 0.32269,
+            "37": 0.32267,
+            "38": 0.3225,
+            "39": 0.32579,
+            "40": 0.32854,
+            "41": 0.32405,
+            "42": 0.32252,
+            "43": 0.3294,
+            "44": 0.32763,
+            "45": 0.32247,
+            "46": 0.32281,
+            "47": 0.32544,
+            "48": 0.32623,
+            "49": 0.32647,
+            "50": 0.32132,
+            "51": 0.32838,
+            "52": 0.32103,
+            "53": 0.32972,
+            "54": 0.32308,
+            "55": 0.3197,
+            "56": 0.32532,
+            "57": 0.33022,
+            "58": 0.32385,
+            "59": 0.3254,
+            "60": 0.33968,
+            "61": 0.334,
+            "62": 0.33471,
+            "63": 0.33468,
+            "64": 0.32025,
+            "65": 0.31712,
+            "66": 0.327,
+            "67": 0.3195,
+            "68": 0.32296,
+            "69": 0.32809,
+            "70": 0.321,
+            "71": 0.32464,
+            "72": 0.33034,
+            "73": 0.32003,
+            "74": 0.31593,
+            "75": 0.32867,
+            "76": 0.32348,
+            "77": 0.31767,
+            "78": 0.33054,
+            "79": 0.32363,
+            "80": 0.3218,
+            "81": 0.32884,
+            "82": 0.32228,
+            "83": 0.31938,
+            "84": 0.32519,
+            "85": 0.32022,
+            "86": 0.32099,
+            "87": 0.32558,
+            "88": 0.32258,
+            "89": 0.32117,
+            "90": 0.33145,
+            "91": 0.33173,
+            "92": 0.32613,
+            "93": 0.33404,
+            "94": 0.32862,
+            "95": 0.32897,
+            "96": 0.32817,
+            "97": 0.32958,
+            "98": 0.32759,
+            "99": 0.33061,
+            "100": 0.33344
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..af0dc8991a7
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.86535,
+            "2": 10.85873,
+            "3": 10.86286,
+            "4": 10.8401,
+            "5": 10.87854,
+            "6": 10.88851,
+            "7": 10.86534,
+            "8": 10.86016,
+            "9": 10.8599,
+            "10": 10.82977,
+            "11": 10.88949,
+            "12": 10.8751,
+            "13": 10.87423,
+            "14": 10.89677,
+            "15": 10.82052,
+            "16": 10.82497,
+            "17": 10.78983,
+            "18": 10.81028,
+            "19": 10.80533,
+            "20": 10.70396,
+            "21": 10.66992,
+            "22": 10.50642,
+            "23": 10.69003,
+            "24": 10.56316,
+            "25": 10.49422,
+            "26": 10.56629,
+            "27": 10.58024,
+            "28": 10.5157,
+            "29": 10.55294,
+            "30": 10.30549,
+            "31": 10.02246,
+            "32": 10.40618,
+            "33": 10.3988,
+            "34": 10.13772,
+            "35": 10.20188,
+            "36": 10.16051,
+            "37": 10.28976,
+            "38": 10.11481,
+            "39": 10.36103,
+            "40": 10.01902,
+            "41": 10.07292,
+            "42": 10.14693,
+            "43": 9.74685,
+            "44": 9.87763,
+            "45": 9.74968,
+            "46": 9.73387,
+            "47": 10.07535,
+            "48": 9.78069,
+            "49": 9.44782,
+            "50": 9.83989,
+            "51": 9.78023,
+            "52": 9.67265,
+            "53": 10.02014,
+            "54": 9.89792,
+            "55": 9.81667,
+            "56": 9.56045,
+            "57": 9.41178,
+            "58": 9.77416,
+            "59": 9.51797,
+            "60": 9.43536,
+            "61": 9.64484,
+            "62": 9.93004,
+            "63": 9.30908,
+            "64": 9.72064,
+            "65": 8.87155,
+            "66": 9.64428,
+            "67": 9.31328,
+            "68": 9.74066,
+            "69": 9.75332,
+            "70": 9.70004,
+            "71": 9.56561,
+            "72": 9.53094,
+            "73": 9.44384,
+            "74": 8.86782,
+            "75": 9.37311,
+            "76": 9.01276,
+            "77": 10.02852,
+            "78": 9.68739,
+            "79": 9.32796,
+            "80": 9.36168,
+            "81": 9.43368,
+            "82": 9.66094,
+            "83": 9.25138,
+            "84": 9.37354,
+            "85": 9.5694,
+            "86": 9.03176,
+            "87": 9.55582,
+            "88": 9.71055,
+            "89": 9.55397,
+            "90": 9.7847,
+            "91": 9.29075,
+            "92": 9.31241,
+            "93": 9.03141,
+            "94": 8.78668,
+            "95": 9.48729,
+            "96": 9.49051,
+            "97": 9.26682,
+            "98": 9.63648,
+            "99": 8.84335,
+            "100": 9.35548
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 602.0,
+            "2": 621.0,
+            "3": 616.0,
+            "4": 577.0,
+            "5": 617.0,
+            "6": 617.0,
+            "7": 645.0,
+            "8": 568.0,
+            "9": 673.0,
+            "10": 569.0,
+            "11": 637.0,
+            "12": 647.0,
+            "13": 676.0,
+            "14": 666.0,
+            "15": 706.0,
+            "16": 627.0,
+            "17": 640.0,
+            "18": 607.0,
+            "19": 623.0,
+            "20": 620.0,
+            "21": 654.0,
+            "22": 640.0,
+            "23": 775.0,
+            "24": 581.0,
+            "25": 629.0,
+            "26": 665.0,
+            "27": 689.0,
+            "28": 707.0,
+            "29": 722.0,
+            "30": 738.0,
+            "31": 640.0,
+            "32": 746.0,
+            "33": 831.0,
+            "34": 673.0,
+            "35": 746.0,
+            "36": 749.0,
+            "37": 826.0,
+            "38": 771.0,
+            "39": 852.0,
+            "40": 746.0,
+            "41": 834.0,
+            "42": 845.0,
+            "43": 709.0,
+            "44": 739.0,
+            "45": 808.0,
+            "46": 888.0,
+            "47": 849.0,
+            "48": 880.0,
+            "49": 879.0,
+            "50": 840.0,
+            "51": 915.0,
+            "52": 896.0,
+            "53": 1048.0,
+            "54": 1044.0,
+            "55": 954.0,
+            "56": 960.0,
+            "57": 849.0,
+            "58": 1035.0,
+            "59": 1036.0,
+            "60": 875.0,
+            "61": 1010.0,
+            "62": 973.0,
+            "63": 928.0,
+            "64": 1019.0,
+            "65": 928.0,
+            "66": 1115.0,
+            "67": 966.0,
+            "68": 954.0,
+            "69": 1094.0,
+            "70": 1039.0,
+            "71": 1034.0,
+            "72": 891.0,
+            "73": 1023.0,
+            "74": 764.0,
+            "75": 903.0,
+            "76": 1061.0,
+            "77": 1149.0,
+            "78": 1070.0,
+            "79": 1063.0,
+            "80": 1091.0,
+            "81": 1242.0,
+            "82": 1047.0,
+            "83": 1012.0,
+            "84": 1154.0,
+            "85": 1199.0,
+            "86": 930.0,
+            "87": 1297.0,
+            "88": 1049.0,
+            "89": 1103.0,
+            "90": 1021.0,
+            "91": 1134.0,
+            "92": 1187.0,
+            "93": 918.0,
+            "94": 1129.0,
+            "95": 1126.0,
+            "96": 1146.0,
+            "97": 1003.0,
+            "98": 1260.0,
+            "99": 1135.0,
+            "100": 1164.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 510689792.0,
+            "2": 510689792.0,
+            "3": 510689792.0,
+            "4": 510689792.0,
+            "5": 510689792.0,
+            "6": 510689792.0,
+            "7": 510689792.0,
+            "8": 510689792.0,
+            "9": 510689792.0,
+            "10": 510689792.0,
+            "11": 510689792.0,
+            "12": 510689792.0,
+            "13": 510689792.0,
+            "14": 510689792.0,
+            "15": 510689792.0,
+            "16": 510689792.0,
+            "17": 510689792.0,
+            "18": 510689792.0,
+            "19": 510689792.0,
+            "20": 510689792.0,
+            "21": 510689792.0,
+            "22": 510689792.0,
+            "23": 510689792.0,
+            "24": 510689792.0,
+            "25": 510689792.0,
+            "26": 510689792.0,
+            "27": 510689792.0,
+            "28": 510689792.0,
+            "29": 510689792.0,
+            "30": 510689792.0,
+            "31": 510689792.0,
+            "32": 510689792.0,
+            "33": 510689792.0,
+            "34": 510689792.0,
+            "35": 510689792.0,
+            "36": 510689792.0,
+            "37": 510689792.0,
+            "38": 510689792.0,
+            "39": 510689792.0,
+            "40": 510689792.0,
+            "41": 510689792.0,
+            "42": 510689792.0,
+            "43": 510689792.0,
+            "44": 510689792.0,
+            "45": 510689792.0,
+            "46": 510689792.0,
+            "47": 510689792.0,
+            "48": 510689792.0,
+            "49": 510689792.0,
+            "50": 510689792.0,
+            "51": 510689792.0,
+            "52": 510689792.0,
+            "53": 510689792.0,
+            "54": 510689792.0,
+            "55": 510689792.0,
+            "56": 510689792.0,
+            "57": 510689792.0,
+            "58": 510689792.0,
+            "59": 510689792.0,
+            "60": 510689792.0,
+            "61": 510689792.0,
+            "62": 510689792.0,
+            "63": 510689792.0,
+            "64": 510689792.0,
+            "65": 510689792.0,
+            "66": 510689792.0,
+            "67": 510689792.0,
+            "68": 510689792.0,
+            "69": 510689792.0,
+            "70": 510689792.0,
+            "71": 510689792.0,
+            "72": 510689792.0,
+            "73": 510689792.0,
+            "74": 510689792.0,
+            "75": 510689792.0,
+            "76": 510689792.0,
+            "77": 510689792.0,
+            "78": 510689792.0,
+            "79": 510689792.0,
+            "80": 510689792.0,
+            "81": 510689792.0,
+            "82": 510689792.0,
+            "83": 510689792.0,
+            "84": 510689792.0,
+            "85": 510689792.0,
+            "86": 510689792.0,
+            "87": 510689792.0,
+            "88": 510689792.0,
+            "89": 510689792.0,
+            "90": 510689792.0,
+            "91": 510689792.0,
+            "92": 510689792.0,
+            "93": 510689792.0,
+            "94": 510689792.0,
+            "95": 510689792.0,
+            "96": 510689792.0,
+            "97": 510689792.0,
+            "98": 510689792.0,
+            "99": 510689792.0,
+            "100": 510689792.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 759895552.0,
+            "2": 933156352.0,
+            "3": 933156352.0,
+            "4": 933156352.0,
+            "5": 933156352.0,
+            "6": 933156352.0,
+            "7": 933156352.0,
+            "8": 933156352.0,
+            "9": 933156352.0,
+            "10": 933156352.0,
+            "11": 933156352.0,
+            "12": 933156352.0,
+            "13": 933156352.0,
+            "14": 933156352.0,
+            "15": 934203392.0,
+            "16": 934203392.0,
+            "17": 934203392.0,
+            "18": 934203392.0,
+            "19": 934203392.0,
+            "20": 934203392.0,
+            "21": 934203392.0,
+            "22": 934203392.0,
+            "23": 934203392.0,
+            "24": 934203392.0,
+            "25": 934203392.0,
+            "26": 934203392.0,
+            "27": 934203392.0,
+            "28": 934203392.0,
+            "29": 934203392.0,
+            "30": 934203392.0,
+            "31": 934203392.0,
+            "32": 934203392.0,
+            "33": 934203392.0,
+            "34": 934203392.0,
+            "35": 934203392.0,
+            "36": 934203392.0,
+            "37": 934203392.0,
+            "38": 934203392.0,
+            "39": 934203392.0,
+            "40": 934203392.0,
+            "41": 934203392.0,
+            "42": 934203392.0,
+            "43": 934203392.0,
+            "44": 934203392.0,
+            "45": 934203392.0,
+            "46": 934203392.0,
+            "47": 934203392.0,
+            "48": 934203392.0,
+            "49": 934203392.0,
+            "50": 934203392.0,
+            "51": 934203392.0,
+            "52": 934203392.0,
+            "53": 934203392.0,
+            "54": 934203392.0,
+            "55": 934203392.0,
+            "56": 934203392.0,
+            "57": 934203392.0,
+            "58": 934203392.0,
+            "59": 934203392.0,
+            "60": 934203392.0,
+            "61": 934203392.0,
+            "62": 934203392.0,
+            "63": 934203392.0,
+            "64": 934203392.0,
+            "65": 934203392.0,
+            "66": 934203392.0,
+            "67": 934203392.0,
+            "68": 934203392.0,
+            "69": 934203392.0,
+            "70": 934203392.0,
+            "71": 934203392.0,
+            "72": 934203392.0,
+            "73": 934203392.0,
+            "74": 934203392.0,
+            "75": 934203392.0,
+            "76": 934203392.0,
+            "77": 934203392.0,
+            "78": 934203392.0,
+            "79": 934203392.0,
+            "80": 934203392.0,
+            "81": 934203392.0,
+            "82": 934203392.0,
+            "83": 934203392.0,
+            "84": 934203392.0,
+            "85": 934203392.0,
+            "86": 934203392.0,
+            "87": 934203392.0,
+            "88": 934203392.0,
+            "89": 934203392.0,
+            "90": 934203392.0,
+            "91": 934203392.0,
+            "92": 934203392.0,
+            "93": 934203392.0,
+            "94": 934203392.0,
+            "95": 934203392.0,
+            "96": 934203392.0,
+            "97": 934203392.0,
+            "98": 934203392.0,
+            "99": 934203392.0,
+            "100": 934203392.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 17.126,
+            "2": 0.48552,
+            "3": 0.29604,
+            "4": 0.30321,
+            "5": 0.28764,
+            "6": 0.28618,
+            "7": 0.28577,
+            "8": 0.28879,
+            "9": 0.28726,
+            "10": 0.28646,
+            "11": 0.28506,
+            "12": 0.28217,
+            "13": 0.2868,
+            "14": 0.28787,
+            "15": 0.28549,
+            "16": 0.2862,
+            "17": 0.28698,
+            "18": 0.29086,
+            "19": 0.28554,
+            "20": 0.2857,
+            "21": 0.28549,
+            "22": 0.28641,
+            "23": 0.28608,
+            "24": 0.28569,
+            "25": 0.28652,
+            "26": 0.28468,
+            "27": 0.28942,
+            "28": 0.28949,
+            "29": 0.28879,
+            "30": 0.28796,
+            "31": 0.29103,
+            "32": 0.29073,
+            "33": 0.28732,
+            "34": 0.29616,
+            "35": 0.28855,
+            "36": 0.28828,
+            "37": 0.28466,
+            "38": 0.28953,
+            "39": 0.29333,
+            "40": 0.28768,
+            "41": 0.28231,
+            "42": 0.28695,
+            "43": 0.28583,
+            "44": 0.28905,
+            "45": 0.28528,
+            "46": 0.28715,
+            "47": 0.28626,
+            "48": 0.28831,
+            "49": 0.28647,
+            "50": 0.28555,
+            "51": 0.29483,
+            "52": 0.28779,
+            "53": 0.28678,
+            "54": 0.28789,
+            "55": 0.28871,
+            "56": 0.29987,
+            "57": 0.29343,
+            "58": 0.28823,
+            "59": 0.28887,
+            "60": 0.29468,
+            "61": 0.28773,
+            "62": 0.30025,
+            "63": 0.28844,
+            "64": 0.28597,
+            "65": 0.28565,
+            "66": 0.2875,
+            "67": 0.28661,
+            "68": 0.2859,
+            "69": 0.28584,
+            "70": 0.28606,
+            "71": 0.286,
+            "72": 0.2846,
+            "73": 0.29219,
+            "74": 0.28688,
+            "75": 0.28871,
+            "76": 0.28938,
+            "77": 0.28731,
+            "78": 0.28558,
+            "79": 0.28696,
+            "80": 0.28619,
+            "81": 0.28793,
+            "82": 0.28828,
+            "83": 0.28522,
+            "84": 0.29988,
+            "85": 0.29704,
+            "86": 0.28664,
+            "87": 0.2857,
+            "88": 0.28622,
+            "89": 0.28571,
+            "90": 0.2853,
+            "91": 0.29259,
+            "92": 0.28615,
+            "93": 0.285,
+            "94": 0.286,
+            "95": 0.28546,
+            "96": 0.28446,
+            "97": 0.28434,
+            "98": 0.28413,
+            "99": 0.2875,
+            "100": 0.28509
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..c677311f507
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.86535,
+            "2": 10.85873,
+            "3": 10.86285,
+            "4": 10.84007,
+            "5": 10.87856,
+            "6": 10.88856,
+            "7": 10.86538,
+            "8": 10.86017,
+            "9": 10.85991,
+            "10": 10.8298,
+            "11": 10.88947,
+            "12": 10.87508,
+            "13": 10.87422,
+            "14": 10.89677,
+            "15": 10.8205,
+            "16": 10.82499,
+            "17": 10.78984,
+            "18": 10.81029,
+            "19": 10.80536,
+            "20": 10.70396,
+            "21": 10.6699,
+            "22": 10.50644,
+            "23": 10.69003,
+            "24": 10.5631,
+            "25": 10.49417,
+            "26": 10.56624,
+            "27": 10.58026,
+            "28": 10.51571,
+            "29": 10.553,
+            "30": 10.30552,
+            "31": 10.02249,
+            "32": 10.40613,
+            "33": 10.3988,
+            "34": 10.13771,
+            "35": 10.20186,
+            "36": 10.16052,
+            "37": 10.28975,
+            "38": 10.1148,
+            "39": 10.36102,
+            "40": 10.01904,
+            "41": 10.07292,
+            "42": 10.14696,
+            "43": 9.74683,
+            "44": 9.87763,
+            "45": 9.74966,
+            "46": 9.73387,
+            "47": 10.07534,
+            "48": 9.78069,
+            "49": 9.4478,
+            "50": 9.83991,
+            "51": 9.78025,
+            "52": 9.67263,
+            "53": 10.0201,
+            "54": 9.89789,
+            "55": 9.81664,
+            "56": 9.56044,
+            "57": 9.41178,
+            "58": 9.77419,
+            "59": 9.51794,
+            "60": 9.43538,
+            "61": 9.64484,
+            "62": 9.93004,
+            "63": 9.30911,
+            "64": 9.72068,
+            "65": 8.87154,
+            "66": 9.64427,
+            "67": 9.31328,
+            "68": 9.74067,
+            "69": 9.75334,
+            "70": 9.70004,
+            "71": 9.56556,
+            "72": 9.53094,
+            "73": 9.44386,
+            "74": 8.86782,
+            "75": 9.37314,
+            "76": 9.01274,
+            "77": 10.02855,
+            "78": 9.68739,
+            "79": 9.328,
+            "80": 9.36168,
+            "81": 9.43367,
+            "82": 9.66094,
+            "83": 9.25139,
+            "84": 9.37352,
+            "85": 9.56939,
+            "86": 9.03181,
+            "87": 9.55584,
+            "88": 9.71055,
+            "89": 9.55395,
+            "90": 9.78475,
+            "91": 9.29077,
+            "92": 9.31245,
+            "93": 9.03142,
+            "94": 8.78671,
+            "95": 9.4873,
+            "96": 9.49052,
+            "97": 9.26684,
+            "98": 9.63648,
+            "99": 8.84333,
+            "100": 9.35549
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 585.0,
+            "2": 648.0,
+            "3": 630.0,
+            "4": 656.0,
+            "5": 620.0,
+            "6": 637.0,
+            "7": 641.0,
+            "8": 581.0,
+            "9": 660.0,
+            "10": 504.0,
+            "11": 664.0,
+            "12": 639.0,
+            "13": 670.0,
+            "14": 666.0,
+            "15": 652.0,
+            "16": 624.0,
+            "17": 704.0,
+            "18": 579.0,
+            "19": 682.0,
+            "20": 623.0,
+            "21": 657.0,
+            "22": 561.0,
+            "23": 763.0,
+            "24": 593.0,
+            "25": 629.0,
+            "26": 669.0,
+            "27": 691.0,
+            "28": 738.0,
+            "29": 788.0,
+            "30": 744.0,
+            "31": 604.0,
+            "32": 736.0,
+            "33": 787.0,
+            "34": 706.0,
+            "35": 692.0,
+            "36": 714.0,
+            "37": 835.0,
+            "38": 768.0,
+            "39": 894.0,
+            "40": 764.0,
+            "41": 852.0,
+            "42": 878.0,
+            "43": 733.0,
+            "44": 827.0,
+            "45": 785.0,
+            "46": 877.0,
+            "47": 927.0,
+            "48": 873.0,
+            "49": 891.0,
+            "50": 869.0,
+            "51": 928.0,
+            "52": 968.0,
+            "53": 1089.0,
+            "54": 966.0,
+            "55": 913.0,
+            "56": 983.0,
+            "57": 889.0,
+            "58": 1063.0,
+            "59": 1005.0,
+            "60": 876.0,
+            "61": 1043.0,
+            "62": 897.0,
+            "63": 971.0,
+            "64": 1100.0,
+            "65": 911.0,
+            "66": 1107.0,
+            "67": 948.0,
+            "68": 1033.0,
+            "69": 1064.0,
+            "70": 1118.0,
+            "71": 1032.0,
+            "72": 854.0,
+            "73": 1007.0,
+            "74": 739.0,
+            "75": 877.0,
+            "76": 1075.0,
+            "77": 1108.0,
+            "78": 1103.0,
+            "79": 980.0,
+            "80": 1055.0,
+            "81": 1240.0,
+            "82": 1101.0,
+            "83": 1007.0,
+            "84": 1147.0,
+            "85": 1157.0,
+            "86": 897.0,
+            "87": 1247.0,
+            "88": 1015.0,
+            "89": 1155.0,
+            "90": 1138.0,
+            "91": 1141.0,
+            "92": 1142.0,
+            "93": 947.0,
+            "94": 1116.0,
+            "95": 1119.0,
+            "96": 1099.0,
+            "97": 997.0,
+            "98": 1188.0,
+            "99": 1141.0,
+            "100": 1102.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 510689792.0,
+            "2": 510689792.0,
+            "3": 510689792.0,
+            "4": 510689792.0,
+            "5": 510689792.0,
+            "6": 510689792.0,
+            "7": 510689792.0,
+            "8": 510689792.0,
+            "9": 510689792.0,
+            "10": 510689792.0,
+            "11": 510689792.0,
+            "12": 510689792.0,
+            "13": 510689792.0,
+            "14": 510689792.0,
+            "15": 510689792.0,
+            "16": 510689792.0,
+            "17": 510689792.0,
+            "18": 510689792.0,
+            "19": 510689792.0,
+            "20": 510689792.0,
+            "21": 510689792.0,
+            "22": 510689792.0,
+            "23": 510689792.0,
+            "24": 510689792.0,
+            "25": 510689792.0,
+            "26": 510689792.0,
+            "27": 510689792.0,
+            "28": 510689792.0,
+            "29": 510689792.0,
+            "30": 510689792.0,
+            "31": 510689792.0,
+            "32": 510689792.0,
+            "33": 510689792.0,
+            "34": 510689792.0,
+            "35": 510689792.0,
+            "36": 510689792.0,
+            "37": 510689792.0,
+            "38": 510689792.0,
+            "39": 510689792.0,
+            "40": 510689792.0,
+            "41": 510689792.0,
+            "42": 510689792.0,
+            "43": 510689792.0,
+            "44": 510689792.0,
+            "45": 510689792.0,
+            "46": 510689792.0,
+            "47": 510689792.0,
+            "48": 510689792.0,
+            "49": 510689792.0,
+            "50": 510689792.0,
+            "51": 510689792.0,
+            "52": 510689792.0,
+            "53": 510689792.0,
+            "54": 510689792.0,
+            "55": 510689792.0,
+            "56": 510689792.0,
+            "57": 510689792.0,
+            "58": 510689792.0,
+            "59": 510689792.0,
+            "60": 510689792.0,
+            "61": 510689792.0,
+            "62": 510689792.0,
+            "63": 510689792.0,
+            "64": 510689792.0,
+            "65": 510689792.0,
+            "66": 510689792.0,
+            "67": 510689792.0,
+            "68": 510689792.0,
+            "69": 510689792.0,
+            "70": 510689792.0,
+            "71": 510689792.0,
+            "72": 510689792.0,
+            "73": 510689792.0,
+            "74": 510689792.0,
+            "75": 510689792.0,
+            "76": 510689792.0,
+            "77": 510689792.0,
+            "78": 510689792.0,
+            "79": 510689792.0,
+            "80": 510689792.0,
+            "81": 510689792.0,
+            "82": 510689792.0,
+            "83": 510689792.0,
+            "84": 510689792.0,
+            "85": 510689792.0,
+            "86": 510689792.0,
+            "87": 510689792.0,
+            "88": 510689792.0,
+            "89": 510689792.0,
+            "90": 510689792.0,
+            "91": 510689792.0,
+            "92": 510689792.0,
+            "93": 510689792.0,
+            "94": 510689792.0,
+            "95": 510689792.0,
+            "96": 510689792.0,
+            "97": 510689792.0,
+            "98": 510689792.0,
+            "99": 510689792.0,
+            "100": 510689792.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 759895552.0,
+            "2": 933156352.0,
+            "3": 933156352.0,
+            "4": 933156352.0,
+            "5": 933156352.0,
+            "6": 933156352.0,
+            "7": 933156352.0,
+            "8": 933156352.0,
+            "9": 933156352.0,
+            "10": 933156352.0,
+            "11": 933156352.0,
+            "12": 933156352.0,
+            "13": 933156352.0,
+            "14": 933156352.0,
+            "15": 933156352.0,
+            "16": 933156352.0,
+            "17": 933156352.0,
+            "18": 933156352.0,
+            "19": 933156352.0,
+            "20": 933156352.0,
+            "21": 933156352.0,
+            "22": 933156352.0,
+            "23": 933156352.0,
+            "24": 933156352.0,
+            "25": 933156352.0,
+            "26": 933156352.0,
+            "27": 933156352.0,
+            "28": 933156352.0,
+            "29": 933156352.0,
+            "30": 933156352.0,
+            "31": 933156352.0,
+            "32": 933156352.0,
+            "33": 933156352.0,
+            "34": 933156352.0,
+            "35": 933156352.0,
+            "36": 933156352.0,
+            "37": 933156352.0,
+            "38": 933156352.0,
+            "39": 933156352.0,
+            "40": 933156352.0,
+            "41": 933156352.0,
+            "42": 933156352.0,
+            "43": 933156352.0,
+            "44": 933156352.0,
+            "45": 933156352.0,
+            "46": 933156352.0,
+            "47": 933156352.0,
+            "48": 933156352.0,
+            "49": 933156352.0,
+            "50": 933156352.0,
+            "51": 933156352.0,
+            "52": 933156352.0,
+            "53": 933156352.0,
+            "54": 933156352.0,
+            "55": 933156352.0,
+            "56": 933156352.0,
+            "57": 933156352.0,
+            "58": 933156352.0,
+            "59": 933156352.0,
+            "60": 933156352.0,
+            "61": 933156352.0,
+            "62": 933156352.0,
+            "63": 933156352.0,
+            "64": 933156352.0,
+            "65": 933156352.0,
+            "66": 933156352.0,
+            "67": 933156352.0,
+            "68": 933156352.0,
+            "69": 933156352.0,
+            "70": 933156352.0,
+            "71": 933156352.0,
+            "72": 933156352.0,
+            "73": 933156352.0,
+            "74": 933156352.0,
+            "75": 933156352.0,
+            "76": 933156352.0,
+            "77": 933156352.0,
+            "78": 933156352.0,
+            "79": 933156352.0,
+            "80": 933156352.0,
+            "81": 933156352.0,
+            "82": 933156352.0,
+            "83": 933156352.0,
+            "84": 933156352.0,
+            "85": 933156352.0,
+            "86": 933156352.0,
+            "87": 933156352.0,
+            "88": 933156352.0,
+            "89": 933156352.0,
+            "90": 933156352.0,
+            "91": 933156352.0,
+            "92": 933156352.0,
+            "93": 933156352.0,
+            "94": 933156352.0,
+            "95": 933156352.0,
+            "96": 933156352.0,
+            "97": 933156352.0,
+            "98": 933156352.0,
+            "99": 933156352.0,
+            "100": 933156352.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 15.91944,
+            "2": 0.35854,
+            "3": 0.34422,
+            "4": 0.34655,
+            "5": 0.33791,
+            "6": 0.34327,
+            "7": 0.34394,
+            "8": 0.3383,
+            "9": 0.34058,
+            "10": 0.32396,
+            "11": 0.32631,
+            "12": 0.33064,
+            "13": 0.32832,
+            "14": 0.32645,
+            "15": 0.32686,
+            "16": 0.32351,
+            "17": 0.32796,
+            "18": 0.33094,
+            "19": 0.32865,
+            "20": 0.32722,
+            "21": 0.32666,
+            "22": 0.32679,
+            "23": 0.32717,
+            "24": 0.32824,
+            "25": 0.32793,
+            "26": 0.32517,
+            "27": 0.326,
+            "28": 0.32627,
+            "29": 0.32627,
+            "30": 0.32688,
+            "31": 0.32603,
+            "32": 0.32544,
+            "33": 0.32613,
+            "34": 0.32696,
+            "35": 0.32522,
+            "36": 0.32966,
+            "37": 0.32462,
+            "38": 0.32724,
+            "39": 0.32622,
+            "40": 0.32646,
+            "41": 0.32504,
+            "42": 0.32464,
+            "43": 0.3299,
+            "44": 0.32495,
+            "45": 0.32382,
+            "46": 0.32567,
+            "47": 0.32847,
+            "48": 0.32521,
+            "49": 0.32738,
+            "50": 0.32495,
+            "51": 0.33517,
+            "52": 0.33963,
+            "53": 0.33084,
+            "54": 0.3299,
+            "55": 0.33062,
+            "56": 0.32923,
+            "57": 0.32909,
+            "58": 0.331,
+            "59": 0.32595,
+            "60": 0.32446,
+            "61": 0.32961,
+            "62": 0.33126,
+            "63": 0.32393,
+            "64": 0.32986,
+            "65": 0.32836,
+            "66": 0.32921,
+            "67": 0.32945,
+            "68": 0.32848,
+            "69": 0.32625,
+            "70": 0.32898,
+            "71": 0.33227,
+            "72": 0.32403,
+            "73": 0.3284,
+            "74": 0.32761,
+            "75": 0.32791,
+            "76": 0.33223,
+            "77": 0.33113,
+            "78": 0.32546,
+            "79": 0.32925,
+            "80": 0.33175,
+            "81": 0.33071,
+            "82": 0.32698,
+            "83": 0.32738,
+            "84": 0.32835,
+            "85": 0.32729,
+            "86": 0.33228,
+            "87": 0.32668,
+            "88": 0.33091,
+            "89": 0.32825,
+            "90": 0.32752,
+            "91": 0.32814,
+            "92": 0.33195,
+            "93": 0.32686,
+            "94": 0.33172,
+            "95": 0.33336,
+            "96": 0.32938,
+            "97": 0.33024,
+            "98": 0.32939,
+            "99": 0.32654,
+            "100": 0.3311
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
new file mode 100644
index 00000000000..ebf6c82ee54
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.93292,
+            "2": 10.93423,
+            "3": 10.91345,
+            "4": 10.90324,
+            "5": 10.92968,
+            "6": 10.93656,
+            "7": 10.90276,
+            "8": 10.92117,
+            "9": 10.90704,
+            "10": 10.90472,
+            "11": 10.88787,
+            "12": 10.91738,
+            "13": 10.9119,
+            "14": 10.91507,
+            "15": 10.87126,
+            "16": 10.8613,
+            "17": 10.82697,
+            "18": 10.85679,
+            "19": 10.84054,
+            "20": 10.75001,
+            "21": 10.71507,
+            "22": 10.58114,
+            "23": 10.72644,
+            "24": 10.60727,
+            "25": 10.53752,
+            "26": 10.61066,
+            "27": 10.59932,
+            "28": 10.54958,
+            "29": 10.56604,
+            "30": 10.32552,
+            "31": 10.06696,
+            "32": 10.4381,
+            "33": 10.42364,
+            "34": 10.16013,
+            "35": 10.22893,
+            "36": 10.17617,
+            "37": 10.29237,
+            "38": 10.13294,
+            "39": 10.34957,
+            "40": 10.01977,
+            "41": 10.07538,
+            "42": 10.15409,
+            "43": 9.76086,
+            "44": 9.88355,
+            "45": 9.75547,
+            "46": 9.74959,
+            "47": 10.07548,
+            "48": 9.7794,
+            "49": 9.43816,
+            "50": 9.84069,
+            "51": 9.77753,
+            "52": 9.66527,
+            "53": 10.00737,
+            "54": 9.88876,
+            "55": 9.81447,
+            "56": 9.55926,
+            "57": 9.39917,
+            "58": 9.77268,
+            "59": 9.51592,
+            "60": 9.42444,
+            "61": 9.64312,
+            "62": 9.93506,
+            "63": 9.30274,
+            "64": 9.72153,
+            "65": 8.86712,
+            "66": 9.64652,
+            "67": 9.30859,
+            "68": 9.74064,
+            "69": 9.7415,
+            "70": 9.679,
+            "71": 9.55873,
+            "72": 9.53279,
+            "73": 9.43847,
+            "74": 8.88232,
+            "75": 9.36664,
+            "76": 9.02474,
+            "77": 10.02955,
+            "78": 9.68856,
+            "79": 9.32607,
+            "80": 9.35304,
+            "81": 9.43249,
+            "82": 9.65191,
+            "83": 9.25401,
+            "84": 9.36521,
+            "85": 9.56704,
+            "86": 9.03547,
+            "87": 9.55775,
+            "88": 9.70744,
+            "89": 9.55898,
+            "90": 9.77582,
+            "91": 9.29648,
+            "92": 9.32116,
+            "93": 9.02867,
+            "94": 8.78308,
+            "95": 9.48328,
+            "96": 9.48474,
+            "97": 9.26673,
+            "98": 9.63741,
+            "99": 8.83899,
+            "100": 9.35877
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 612.0,
+            "2": 654.0,
+            "3": 644.0,
+            "4": 624.0,
+            "5": 683.0,
+            "6": 610.0,
+            "7": 588.0,
+            "8": 594.0,
+            "9": 672.0,
+            "10": 520.0,
+            "11": 665.0,
+            "12": 621.0,
+            "13": 608.0,
+            "14": 635.0,
+            "15": 647.0,
+            "16": 630.0,
+            "17": 644.0,
+            "18": 624.0,
+            "19": 615.0,
+            "20": 606.0,
+            "21": 625.0,
+            "22": 608.0,
+            "23": 673.0,
+            "24": 575.0,
+            "25": 614.0,
+            "26": 607.0,
+            "27": 677.0,
+            "28": 722.0,
+            "29": 751.0,
+            "30": 740.0,
+            "31": 643.0,
+            "32": 722.0,
+            "33": 755.0,
+            "34": 656.0,
+            "35": 704.0,
+            "36": 719.0,
+            "37": 777.0,
+            "38": 788.0,
+            "39": 864.0,
+            "40": 783.0,
+            "41": 775.0,
+            "42": 842.0,
+            "43": 714.0,
+            "44": 725.0,
+            "45": 765.0,
+            "46": 880.0,
+            "47": 877.0,
+            "48": 813.0,
+            "49": 884.0,
+            "50": 806.0,
+            "51": 892.0,
+            "52": 949.0,
+            "53": 967.0,
+            "54": 953.0,
+            "55": 873.0,
+            "56": 949.0,
+            "57": 857.0,
+            "58": 1012.0,
+            "59": 993.0,
+            "60": 902.0,
+            "61": 986.0,
+            "62": 927.0,
+            "63": 856.0,
+            "64": 1097.0,
+            "65": 939.0,
+            "66": 1069.0,
+            "67": 932.0,
+            "68": 951.0,
+            "69": 1057.0,
+            "70": 1099.0,
+            "71": 1071.0,
+            "72": 884.0,
+            "73": 1024.0,
+            "74": 726.0,
+            "75": 895.0,
+            "76": 1038.0,
+            "77": 1116.0,
+            "78": 1129.0,
+            "79": 1060.0,
+            "80": 1169.0,
+            "81": 1199.0,
+            "82": 1064.0,
+            "83": 1024.0,
+            "84": 1124.0,
+            "85": 1134.0,
+            "86": 836.0,
+            "87": 1175.0,
+            "88": 1046.0,
+            "89": 1174.0,
+            "90": 1121.0,
+            "91": 1063.0,
+            "92": 1161.0,
+            "93": 925.0,
+            "94": 1129.0,
+            "95": 1168.0,
+            "96": 1212.0,
+            "97": 1019.0,
+            "98": 1216.0,
+            "99": 1131.0,
+            "100": 1070.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 431783936.0,
+            "2": 431783936.0,
+            "3": 431783936.0,
+            "4": 431783936.0,
+            "5": 431783936.0,
+            "6": 431783936.0,
+            "7": 431783936.0,
+            "8": 431783936.0,
+            "9": 431783936.0,
+            "10": 431783936.0,
+            "11": 431783936.0,
+            "12": 431783936.0,
+            "13": 431783936.0,
+            "14": 431783936.0,
+            "15": 431783936.0,
+            "16": 431783936.0,
+            "17": 431783936.0,
+            "18": 431783936.0,
+            "19": 431783936.0,
+            "20": 431783936.0,
+            "21": 431783936.0,
+            "22": 431783936.0,
+            "23": 431783936.0,
+            "24": 431783936.0,
+            "25": 431783936.0,
+            "26": 431783936.0,
+            "27": 431783936.0,
+            "28": 431783936.0,
+            "29": 431783936.0,
+            "30": 431783936.0,
+            "31": 431783936.0,
+            "32": 431783936.0,
+            "33": 431783936.0,
+            "34": 431783936.0,
+            "35": 431783936.0,
+            "36": 431783936.0,
+            "37": 431783936.0,
+            "38": 431783936.0,
+            "39": 431783936.0,
+            "40": 431783936.0,
+            "41": 431783936.0,
+            "42": 431783936.0,
+            "43": 431783936.0,
+            "44": 431783936.0,
+            "45": 431783936.0,
+            "46": 431783936.0,
+            "47": 431783936.0,
+            "48": 431783936.0,
+            "49": 431783936.0,
+            "50": 431783936.0,
+            "51": 431783936.0,
+            "52": 431783936.0,
+            "53": 431783936.0,
+            "54": 431783936.0,
+            "55": 431783936.0,
+            "56": 431783936.0,
+            "57": 431783936.0,
+            "58": 431783936.0,
+            "59": 431783936.0,
+            "60": 431783936.0,
+            "61": 431783936.0,
+            "62": 431783936.0,
+            "63": 431783936.0,
+            "64": 431783936.0,
+            "65": 431783936.0,
+            "66": 431783936.0,
+            "67": 431783936.0,
+            "68": 431783936.0,
+            "69": 431783936.0,
+            "70": 431783936.0,
+            "71": 431783936.0,
+            "72": 431783936.0,
+            "73": 431783936.0,
+            "74": 431783936.0,
+            "75": 431783936.0,
+            "76": 431783936.0,
+            "77": 431783936.0,
+            "78": 431783936.0,
+            "79": 431783936.0,
+            "80": 431783936.0,
+            "81": 431783936.0,
+            "82": 431783936.0,
+            "83": 431783936.0,
+            "84": 431783936.0,
+            "85": 431783936.0,
+            "86": 431783936.0,
+            "87": 431783936.0,
+            "88": 431783936.0,
+            "89": 431783936.0,
+            "90": 431783936.0,
+            "91": 431783936.0,
+            "92": 431783936.0,
+            "93": 431783936.0,
+            "94": 431783936.0,
+            "95": 431783936.0,
+            "96": 431783936.0,
+            "97": 431783936.0,
+            "98": 431783936.0,
+            "99": 431783936.0,
+            "100": 431783936.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 677333504.0,
+            "2": 854262272.0,
+            "3": 854262272.0,
+            "4": 854262272.0,
+            "5": 854262272.0,
+            "6": 854262784.0,
+            "7": 854262784.0,
+            "8": 854262784.0,
+            "9": 854262784.0,
+            "10": 854262784.0,
+            "11": 854262784.0,
+            "12": 854262784.0,
+            "13": 854262784.0,
+            "14": 855309824.0,
+            "15": 855309824.0,
+            "16": 855309824.0,
+            "17": 855309824.0,
+            "18": 855309824.0,
+            "19": 855309824.0,
+            "20": 855309824.0,
+            "21": 855309824.0,
+            "22": 855309824.0,
+            "23": 855309824.0,
+            "24": 855310336.0,
+            "25": 855310336.0,
+            "26": 855310336.0,
+            "27": 855310336.0,
+            "28": 855310336.0,
+            "29": 855310336.0,
+            "30": 855310336.0,
+            "31": 855310336.0,
+            "32": 855310336.0,
+            "33": 855310336.0,
+            "34": 855310336.0,
+            "35": 855310336.0,
+            "36": 855310336.0,
+            "37": 855310336.0,
+            "38": 855310336.0,
+            "39": 855310848.0,
+            "40": 855310848.0,
+            "41": 855310848.0,
+            "42": 855310848.0,
+            "43": 855310848.0,
+            "44": 855310848.0,
+            "45": 855310848.0,
+            "46": 855310848.0,
+            "47": 855310848.0,
+            "48": 855310848.0,
+            "49": 855310848.0,
+            "50": 855310848.0,
+            "51": 855310848.0,
+            "52": 855311360.0,
+            "53": 855311360.0,
+            "54": 855311360.0,
+            "55": 855311360.0,
+            "56": 855311360.0,
+            "57": 855311360.0,
+            "58": 855311360.0,
+            "59": 855311360.0,
+            "60": 855311360.0,
+            "61": 855311360.0,
+            "62": 855311360.0,
+            "63": 855311360.0,
+            "64": 855311360.0,
+            "65": 855311360.0,
+            "66": 855311360.0,
+            "67": 855311360.0,
+            "68": 855311360.0,
+            "69": 855311360.0,
+            "70": 855311360.0,
+            "71": 855311360.0,
+            "72": 855311360.0,
+            "73": 855311360.0,
+            "74": 855311360.0,
+            "75": 855311360.0,
+            "76": 855311360.0,
+            "77": 855311360.0,
+            "78": 855311360.0,
+            "79": 855311360.0,
+            "80": 855311360.0,
+            "81": 855311360.0,
+            "82": 855311360.0,
+            "83": 855311360.0,
+            "84": 855311360.0,
+            "85": 855311360.0,
+            "86": 855311360.0,
+            "87": 855311360.0,
+            "88": 855311360.0,
+            "89": 855311360.0,
+            "90": 855311360.0,
+            "91": 855311360.0,
+            "92": 855311360.0,
+            "93": 855311360.0,
+            "94": 855311360.0,
+            "95": 855311360.0,
+            "96": 855311360.0,
+            "97": 855311360.0,
+            "98": 855311360.0,
+            "99": 855311360.0,
+            "100": 855311360.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 16.64296,
+            "2": 0.44061,
+            "3": 0.39868,
+            "4": 0.40602,
+            "5": 0.39627,
+            "6": 0.40168,
+            "7": 0.40214,
+            "8": 0.39767,
+            "9": 0.41335,
+            "10": 0.39617,
+            "11": 0.40142,
+            "12": 0.40689,
+            "13": 0.39378,
+            "14": 0.4283,
+            "15": 0.39562,
+            "16": 0.40196,
+            "17": 0.40151,
+            "18": 0.3962,
+            "19": 0.40589,
+            "20": 0.39453,
+            "21": 0.3993,
+            "22": 0.40417,
+            "23": 0.39434,
+            "24": 0.40809,
+            "25": 0.39356,
+            "26": 0.3984,
+            "27": 0.39878,
+            "28": 0.39312,
+            "29": 0.40669,
+            "30": 0.39393,
+            "31": 0.40709,
+            "32": 0.39611,
+            "33": 0.3938,
+            "34": 0.40377,
+            "35": 0.39302,
+            "36": 0.40068,
+            "37": 0.40083,
+            "38": 0.39393,
+            "39": 0.40832,
+            "40": 0.39387,
+            "41": 0.4,
+            "42": 0.4025,
+            "43": 0.39558,
+            "44": 0.41322,
+            "45": 0.3943,
+            "46": 0.40231,
+            "47": 0.40377,
+            "48": 0.39613,
+            "49": 0.41098,
+            "50": 0.39556,
+            "51": 0.41526,
+            "52": 0.40592,
+            "53": 0.39522,
+            "54": 0.39643,
+            "55": 0.40606,
+            "56": 0.39472,
+            "57": 0.41022,
+            "58": 0.3949,
+            "59": 0.39351,
+            "60": 0.40774,
+            "61": 0.39377,
+            "62": 0.40683,
+            "63": 0.3959,
+            "64": 0.39778,
+            "65": 0.40721,
+            "66": 0.39636,
+            "67": 0.41074,
+            "68": 0.39529,
+            "69": 0.39586,
+            "70": 0.40972,
+            "71": 0.39753,
+            "72": 0.40958,
+            "73": 0.39662,
+            "74": 0.39837,
+            "75": 0.40947,
+            "76": 0.3973,
+            "77": 0.41202,
+            "78": 0.3967,
+            "79": 0.39826,
+            "80": 0.41197,
+            "81": 0.39832,
+            "82": 0.40955,
+            "83": 0.39814,
+            "84": 0.39694,
+            "85": 0.41004,
+            "86": 0.3965,
+            "87": 0.4108,
+            "88": 0.39649,
+            "89": 0.3978,
+            "90": 0.41151,
+            "91": 0.39705,
+            "92": 0.41097,
+            "93": 0.39242,
+            "94": 0.39997,
+            "95": 0.40901,
+            "96": 0.39359,
+            "97": 0.40554,
+            "98": 0.40278,
+            "99": 0.39673,
+            "100": 0.40583
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
new file mode 100644
index 00000000000..73ae0926a59
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.93292,
+            "2": 10.93423,
+            "3": 10.91346,
+            "4": 10.90324,
+            "5": 10.92972,
+            "6": 10.93653,
+            "7": 10.90279,
+            "8": 10.92113,
+            "9": 10.90704,
+            "10": 10.90477,
+            "11": 10.88787,
+            "12": 10.91738,
+            "13": 10.91189,
+            "14": 10.91507,
+            "15": 10.87125,
+            "16": 10.86126,
+            "17": 10.82697,
+            "18": 10.85673,
+            "19": 10.84059,
+            "20": 10.74997,
+            "21": 10.71507,
+            "22": 10.58117,
+            "23": 10.72642,
+            "24": 10.60726,
+            "25": 10.53749,
+            "26": 10.61068,
+            "27": 10.59929,
+            "28": 10.5496,
+            "29": 10.56602,
+            "30": 10.32547,
+            "31": 10.06697,
+            "32": 10.43814,
+            "33": 10.42363,
+            "34": 10.16017,
+            "35": 10.22894,
+            "36": 10.1762,
+            "37": 10.29237,
+            "38": 10.13297,
+            "39": 10.34954,
+            "40": 10.01975,
+            "41": 10.07536,
+            "42": 10.1541,
+            "43": 9.76088,
+            "44": 9.88355,
+            "45": 9.75547,
+            "46": 9.74961,
+            "47": 10.07545,
+            "48": 9.7794,
+            "49": 9.43818,
+            "50": 9.84069,
+            "51": 9.77754,
+            "52": 9.66525,
+            "53": 10.00737,
+            "54": 9.88878,
+            "55": 9.81447,
+            "56": 9.55923,
+            "57": 9.39915,
+            "58": 9.77269,
+            "59": 9.51596,
+            "60": 9.42442,
+            "61": 9.64311,
+            "62": 9.93507,
+            "63": 9.30273,
+            "64": 9.72153,
+            "65": 8.86708,
+            "66": 9.64649,
+            "67": 9.30858,
+            "68": 9.74064,
+            "69": 9.7415,
+            "70": 9.67901,
+            "71": 9.55877,
+            "72": 9.53276,
+            "73": 9.43849,
+            "74": 8.88229,
+            "75": 9.36665,
+            "76": 9.02475,
+            "77": 10.02958,
+            "78": 9.68855,
+            "79": 9.32606,
+            "80": 9.35307,
+            "81": 9.43246,
+            "82": 9.65191,
+            "83": 9.25402,
+            "84": 9.36522,
+            "85": 9.56708,
+            "86": 9.03554,
+            "87": 9.55776,
+            "88": 9.70744,
+            "89": 9.55897,
+            "90": 9.77584,
+            "91": 9.2965,
+            "92": 9.32116,
+            "93": 9.0287,
+            "94": 8.78307,
+            "95": 9.48325,
+            "96": 9.48475,
+            "97": 9.26678,
+            "98": 9.63738,
+            "99": 8.83898,
+            "100": 9.35879
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 582.0,
+            "2": 593.0,
+            "3": 619.0,
+            "4": 627.0,
+            "5": 660.0,
+            "6": 625.0,
+            "7": 597.0,
+            "8": 616.0,
+            "9": 608.0,
+            "10": 529.0,
+            "11": 692.0,
+            "12": 629.0,
+            "13": 695.0,
+            "14": 694.0,
+            "15": 606.0,
+            "16": 604.0,
+            "17": 647.0,
+            "18": 576.0,
+            "19": 570.0,
+            "20": 541.0,
+            "21": 625.0,
+            "22": 629.0,
+            "23": 676.0,
+            "24": 567.0,
+            "25": 617.0,
+            "26": 674.0,
+            "27": 680.0,
+            "28": 703.0,
+            "29": 684.0,
+            "30": 692.0,
+            "31": 565.0,
+            "32": 741.0,
+            "33": 789.0,
+            "34": 704.0,
+            "35": 718.0,
+            "36": 688.0,
+            "37": 762.0,
+            "38": 777.0,
+            "39": 847.0,
+            "40": 735.0,
+            "41": 839.0,
+            "42": 789.0,
+            "43": 710.0,
+            "44": 756.0,
+            "45": 780.0,
+            "46": 819.0,
+            "47": 844.0,
+            "48": 885.0,
+            "49": 833.0,
+            "50": 791.0,
+            "51": 878.0,
+            "52": 894.0,
+            "53": 955.0,
+            "54": 966.0,
+            "55": 923.0,
+            "56": 973.0,
+            "57": 844.0,
+            "58": 964.0,
+            "59": 977.0,
+            "60": 868.0,
+            "61": 931.0,
+            "62": 972.0,
+            "63": 884.0,
+            "64": 1042.0,
+            "65": 895.0,
+            "66": 1085.0,
+            "67": 992.0,
+            "68": 962.0,
+            "69": 1045.0,
+            "70": 1078.0,
+            "71": 1075.0,
+            "72": 935.0,
+            "73": 1035.0,
+            "74": 737.0,
+            "75": 875.0,
+            "76": 1037.0,
+            "77": 1154.0,
+            "78": 1118.0,
+            "79": 1051.0,
+            "80": 1190.0,
+            "81": 1225.0,
+            "82": 1135.0,
+            "83": 999.0,
+            "84": 1125.0,
+            "85": 1106.0,
+            "86": 866.0,
+            "87": 1201.0,
+            "88": 1075.0,
+            "89": 1177.0,
+            "90": 1092.0,
+            "91": 1055.0,
+            "92": 1162.0,
+            "93": 917.0,
+            "94": 1083.0,
+            "95": 1040.0,
+            "96": 1178.0,
+            "97": 1096.0,
+            "98": 1281.0,
+            "99": 1184.0,
+            "100": 1106.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 431783936.0,
+            "2": 431783936.0,
+            "3": 431783936.0,
+            "4": 431783936.0,
+            "5": 431783936.0,
+            "6": 431783936.0,
+            "7": 431783936.0,
+            "8": 431783936.0,
+            "9": 431783936.0,
+            "10": 431783936.0,
+            "11": 431783936.0,
+            "12": 431783936.0,
+            "13": 431783936.0,
+            "14": 431783936.0,
+            "15": 431783936.0,
+            "16": 431783936.0,
+            "17": 431783936.0,
+            "18": 431783936.0,
+            "19": 431783936.0,
+            "20": 431783936.0,
+            "21": 431783936.0,
+            "22": 431783936.0,
+            "23": 431783936.0,
+            "24": 431783936.0,
+            "25": 431783936.0,
+            "26": 431783936.0,
+            "27": 431783936.0,
+            "28": 431783936.0,
+            "29": 431783936.0,
+            "30": 431783936.0,
+            "31": 431783936.0,
+            "32": 431783936.0,
+            "33": 431783936.0,
+            "34": 431783936.0,
+            "35": 431783936.0,
+            "36": 431783936.0,
+            "37": 431783936.0,
+            "38": 431783936.0,
+            "39": 431783936.0,
+            "40": 431783936.0,
+            "41": 431783936.0,
+            "42": 431783936.0,
+            "43": 431783936.0,
+            "44": 431783936.0,
+            "45": 431783936.0,
+            "46": 431783936.0,
+            "47": 431783936.0,
+            "48": 431783936.0,
+            "49": 431783936.0,
+            "50": 431783936.0,
+            "51": 431783936.0,
+            "52": 431783936.0,
+            "53": 431783936.0,
+            "54": 431783936.0,
+            "55": 431783936.0,
+            "56": 431783936.0,
+            "57": 431783936.0,
+            "58": 431783936.0,
+            "59": 431783936.0,
+            "60": 431783936.0,
+            "61": 431783936.0,
+            "62": 431783936.0,
+            "63": 431783936.0,
+            "64": 431783936.0,
+            "65": 431783936.0,
+            "66": 431783936.0,
+            "67": 431783936.0,
+            "68": 431783936.0,
+            "69": 431783936.0,
+            "70": 431783936.0,
+            "71": 431783936.0,
+            "72": 431783936.0,
+            "73": 431783936.0,
+            "74": 431783936.0,
+            "75": 431783936.0,
+            "76": 431783936.0,
+            "77": 431783936.0,
+            "78": 431783936.0,
+            "79": 431783936.0,
+            "80": 431783936.0,
+            "81": 431783936.0,
+            "82": 431783936.0,
+            "83": 431783936.0,
+            "84": 431783936.0,
+            "85": 431783936.0,
+            "86": 431783936.0,
+            "87": 431783936.0,
+            "88": 431783936.0,
+            "89": 431783936.0,
+            "90": 431783936.0,
+            "91": 431783936.0,
+            "92": 431783936.0,
+            "93": 431783936.0,
+            "94": 431783936.0,
+            "95": 431783936.0,
+            "96": 431783936.0,
+            "97": 431783936.0,
+            "98": 431783936.0,
+            "99": 431783936.0,
+            "100": 431783936.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 678382080.0,
+            "2": 855308800.0,
+            "3": 855308800.0,
+            "4": 855308800.0,
+            "5": 855308800.0,
+            "6": 855308800.0,
+            "7": 855308800.0,
+            "8": 855308800.0,
+            "9": 855308800.0,
+            "10": 855308800.0,
+            "11": 855308800.0,
+            "12": 855308800.0,
+            "13": 855308800.0,
+            "14": 855308800.0,
+            "15": 855308800.0,
+            "16": 855310848.0,
+            "17": 855310848.0,
+            "18": 855310848.0,
+            "19": 855310848.0,
+            "20": 855310848.0,
+            "21": 855310848.0,
+            "22": 855310848.0,
+            "23": 855310848.0,
+            "24": 855310848.0,
+            "25": 855310848.0,
+            "26": 855310848.0,
+            "27": 855310848.0,
+            "28": 855310848.0,
+            "29": 855310848.0,
+            "30": 855310848.0,
+            "31": 855311360.0,
+            "32": 855311360.0,
+            "33": 855311360.0,
+            "34": 855311360.0,
+            "35": 855311360.0,
+            "36": 855311360.0,
+            "37": 855311360.0,
+            "38": 855311360.0,
+            "39": 855311360.0,
+            "40": 855311360.0,
+            "41": 855311360.0,
+            "42": 855311360.0,
+            "43": 855311360.0,
+            "44": 855311360.0,
+            "45": 855311360.0,
+            "46": 855311360.0,
+            "47": 855311360.0,
+            "48": 855311360.0,
+            "49": 855311360.0,
+            "50": 855311360.0,
+            "51": 855311360.0,
+            "52": 855311360.0,
+            "53": 855311360.0,
+            "54": 855311360.0,
+            "55": 855311360.0,
+            "56": 855311360.0,
+            "57": 855311360.0,
+            "58": 855311360.0,
+            "59": 855311360.0,
+            "60": 855311360.0,
+            "61": 855311360.0,
+            "62": 855311360.0,
+            "63": 855311360.0,
+            "64": 855311360.0,
+            "65": 855311360.0,
+            "66": 855311360.0,
+            "67": 855311360.0,
+            "68": 855311360.0,
+            "69": 855311360.0,
+            "70": 855311360.0,
+            "71": 855311360.0,
+            "72": 855311360.0,
+            "73": 855311360.0,
+            "74": 855311360.0,
+            "75": 855311360.0,
+            "76": 855311360.0,
+            "77": 855311360.0,
+            "78": 855311360.0,
+            "79": 855311360.0,
+            "80": 855311360.0,
+            "81": 855311360.0,
+            "82": 855311360.0,
+            "83": 855311360.0,
+            "84": 855311360.0,
+            "85": 855311360.0,
+            "86": 855311360.0,
+            "87": 855311360.0,
+            "88": 855311360.0,
+            "89": 855311360.0,
+            "90": 855311360.0,
+            "91": 855311360.0,
+            "92": 855311360.0,
+            "93": 855311360.0,
+            "94": 855311360.0,
+            "95": 855311360.0,
+            "96": 855311360.0,
+            "97": 855311360.0,
+            "98": 855311360.0,
+            "99": 855311360.0,
+            "100": 855311360.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 20.34843,
+            "2": 0.4496,
+            "3": 0.40575,
+            "4": 0.41925,
+            "5": 0.74795,
+            "6": 0.41468,
+            "7": 0.4068,
+            "8": 0.41689,
+            "9": 0.41436,
+            "10": 0.40801,
+            "11": 0.4195,
+            "12": 0.40914,
+            "13": 0.42647,
+            "14": 0.40668,
+            "15": 0.41793,
+            "16": 0.41417,
+            "17": 0.40751,
+            "18": 0.42901,
+            "19": 0.41369,
+            "20": 0.41147,
+            "21": 0.41666,
+            "22": 0.4069,
+            "23": 0.41601,
+            "24": 0.40503,
+            "25": 0.41667,
+            "26": 0.40986,
+            "27": 0.4062,
+            "28": 0.41374,
+            "29": 0.40694,
+            "30": 0.42156,
+            "31": 0.4086,
+            "32": 0.4087,
+            "33": 0.42034,
+            "34": 0.40632,
+            "35": 0.42126,
+            "36": 0.4059,
+            "37": 0.41875,
+            "38": 0.41448,
+            "39": 0.40473,
+            "40": 0.4248,
+            "41": 0.40265,
+            "42": 0.41245,
+            "43": 0.41222,
+            "44": 0.40565,
+            "45": 0.42043,
+            "46": 0.40713,
+            "47": 0.41725,
+            "48": 0.41199,
+            "49": 0.41368,
+            "50": 0.41468,
+            "51": 0.40417,
+            "52": 0.40097,
+            "53": 0.39853,
+            "54": 0.40708,
+            "55": 0.39518,
+            "56": 0.3992,
+            "57": 0.39785,
+            "58": 0.39681,
+            "59": 0.4057,
+            "60": 0.39395,
+            "61": 0.39896,
+            "62": 0.40375,
+            "63": 0.3954,
+            "64": 0.40498,
+            "65": 0.39366,
+            "66": 0.39924,
+            "67": 0.40424,
+            "68": 0.39447,
+            "69": 0.40703,
+            "70": 0.39461,
+            "71": 0.39881,
+            "72": 0.40382,
+            "73": 0.39319,
+            "74": 0.40889,
+            "75": 0.39321,
+            "76": 0.39854,
+            "77": 0.40156,
+            "78": 0.39432,
+            "79": 0.40811,
+            "80": 0.39353,
+            "81": 0.39894,
+            "82": 0.4043,
+            "83": 0.39208,
+            "84": 0.44003,
+            "85": 0.39225,
+            "86": 0.40107,
+            "87": 0.40581,
+            "88": 0.39601,
+            "89": 0.41177,
+            "90": 0.39396,
+            "91": 0.40039,
+            "92": 0.40383,
+            "93": 0.39686,
+            "94": 0.40986,
+            "95": 0.39506,
+            "96": 0.40327,
+            "97": 0.40327,
+            "98": 0.39659,
+            "99": 0.40763,
+            "100": 0.39858
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
index 588420ea5a1..2c78cced2a6 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
@@ -2,141 +2,536 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 10.85949,
+            "2": 10.85553,
+            "3": 10.86548,
+            "4": 10.84554,
             "5": 10.88344,
+            "6": 10.89429,
+            "7": 10.87068,
+            "8": 10.86983,
+            "9": 10.86919,
             "10": 10.83883,
+            "11": 10.89435,
+            "12": 10.8798,
+            "13": 10.87987,
+            "14": 10.90317,
             "15": 10.8405,
+            "16": 10.83786,
+            "17": 10.80668,
+            "18": 10.83025,
+            "19": 10.82262,
             "20": 10.73192,
+            "21": 10.7075,
+            "22": 10.56005,
+            "23": 10.72406,
+            "24": 10.61116,
             "25": 10.5481,
+            "26": 10.61334,
+            "27": 10.6305,
+            "28": 10.56645,
+            "29": 10.59672,
             "30": 10.37136,
+            "31": 10.11721,
+            "32": 10.46127,
+            "33": 10.45247,
+            "34": 10.21687,
             "35": 10.27171,
+            "36": 10.2312,
+            "37": 10.34809,
+            "38": 10.18842,
+            "39": 10.41042,
             "40": 10.09426,
+            "41": 10.14711,
+            "42": 10.21247,
+            "43": 9.84106,
+            "44": 9.95919,
             "45": 9.84082,
+            "46": 9.82482,
+            "47": 10.13882,
+            "48": 9.85839,
+            "49": 9.5472,
             "50": 9.90883,
+            "51": 9.85585,
+            "52": 9.75243,
+            "53": 10.07588,
+            "54": 9.95691,
             "55": 9.88207,
+            "56": 9.63139,
+            "57": 9.48649,
+            "58": 9.83116,
+            "59": 9.58907,
             "60": 9.50648,
+            "61": 9.70368,
+            "62": 9.98289,
+            "63": 9.38314,
+            "64": 9.7791,
             "65": 8.95182,
+            "66": 9.70161,
+            "67": 9.37209,
+            "68": 9.78856,
+            "69": 9.79856,
             "70": 9.74748,
+            "71": 9.6191,
+            "72": 9.585,
+            "73": 9.49728,
+            "74": 8.93928,
             "75": 9.42702,
+            "76": 9.08022,
+            "77": 10.06569,
+            "78": 9.72897,
+            "79": 9.37772,
             "80": 9.41001,
+            "81": 9.47977,
+            "82": 9.70183,
+            "83": 9.30621,
+            "84": 9.42098,
             "85": 9.61377,
+            "86": 9.07654,
+            "87": 9.59456,
+            "88": 9.75071,
+            "89": 9.60243,
             "90": 9.81899,
+            "91": 9.33898,
+            "92": 9.35718,
+            "93": 9.07884,
+            "94": 8.83509,
             "95": 9.52175,
+            "96": 9.53007,
+            "97": 9.31309,
+            "98": 9.67781,
+            "99": 8.89061,
             "100": 9.39729
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 1690.0,
+            "2": 1776.0,
+            "3": 1642.0,
+            "4": 1825.0,
             "5": 1809.0,
+            "6": 1795.0,
+            "7": 1830.0,
+            "8": 1626.0,
+            "9": 1878.0,
             "10": 1423.0,
+            "11": 1868.0,
+            "12": 1653.0,
+            "13": 1897.0,
+            "14": 1783.0,
             "15": 1861.0,
+            "16": 1938.0,
+            "17": 1825.0,
+            "18": 1730.0,
+            "19": 1727.0,
             "20": 1735.0,
+            "21": 1783.0,
+            "22": 1576.0,
+            "23": 1949.0,
+            "24": 1630.0,
             "25": 1498.0,
+            "26": 1649.0,
+            "27": 1809.0,
+            "28": 2019.0,
+            "29": 2009.0,
             "30": 1832.0,
+            "31": 1524.0,
+            "32": 1943.0,
+            "33": 2081.0,
+            "34": 1888.0,
             "35": 1935.0,
+            "36": 1898.0,
+            "37": 2325.0,
+            "38": 2070.0,
+            "39": 2248.0,
             "40": 2199.0,
+            "41": 2264.0,
+            "42": 2349.0,
+            "43": 2087.0,
+            "44": 2107.0,
             "45": 2098.0,
+            "46": 2407.0,
+            "47": 2456.0,
+            "48": 2404.0,
+            "49": 2417.0,
             "50": 2407.0,
+            "51": 2578.0,
+            "52": 2630.0,
+            "53": 2857.0,
+            "54": 2818.0,
             "55": 2368.0,
+            "56": 2757.0,
+            "57": 2423.0,
+            "58": 2776.0,
+            "59": 2742.0,
             "60": 2371.0,
+            "61": 2906.0,
+            "62": 2517.0,
+            "63": 2374.0,
+            "64": 2995.0,
             "65": 2634.0,
+            "66": 2995.0,
+            "67": 2884.0,
+            "68": 2840.0,
+            "69": 2766.0,
             "70": 3006.0,
+            "71": 3023.0,
+            "72": 2386.0,
+            "73": 2958.0,
+            "74": 1851.0,
             "75": 2585.0,
+            "76": 2973.0,
+            "77": 3244.0,
+            "78": 3142.0,
+            "79": 3185.0,
             "80": 3249.0,
+            "81": 3665.0,
+            "82": 3153.0,
+            "83": 2821.0,
+            "84": 3083.0,
             "85": 3247.0,
+            "86": 2734.0,
+            "87": 3759.0,
+            "88": 2968.0,
+            "89": 3282.0,
             "90": 3064.0,
+            "91": 2908.0,
+            "92": 2946.0,
+            "93": 2592.0,
+            "94": 3363.0,
             "95": 3423.0,
+            "96": 3259.0,
+            "97": 2976.0,
+            "98": 3683.0,
+            "99": 3173.0,
             "100": 3143.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 516194816.0,
+            "2": 516194816.0,
+            "3": 516194816.0,
+            "4": 516194816.0,
             "5": 516194816.0,
+            "6": 516194816.0,
+            "7": 516194816.0,
+            "8": 516194816.0,
+            "9": 516194816.0,
             "10": 516194816.0,
+            "11": 516194816.0,
+            "12": 516194816.0,
+            "13": 516194816.0,
+            "14": 516194816.0,
             "15": 516194816.0,
+            "16": 516194816.0,
+            "17": 516194816.0,
+            "18": 516194816.0,
+            "19": 516194816.0,
             "20": 516194816.0,
+            "21": 516194816.0,
+            "22": 516194816.0,
+            "23": 516194816.0,
+            "24": 516194816.0,
             "25": 516194816.0,
+            "26": 516194816.0,
+            "27": 516194816.0,
+            "28": 516194816.0,
+            "29": 516194816.0,
             "30": 516194816.0,
+            "31": 516194816.0,
+            "32": 516194816.0,
+            "33": 516194816.0,
+            "34": 516194816.0,
             "35": 516194816.0,
+            "36": 516194816.0,
+            "37": 516194816.0,
+            "38": 516194816.0,
+            "39": 516194816.0,
             "40": 516194816.0,
+            "41": 516194816.0,
+            "42": 516194816.0,
+            "43": 516194816.0,
+            "44": 516194816.0,
             "45": 516194816.0,
+            "46": 516194816.0,
+            "47": 516194816.0,
+            "48": 516194816.0,
+            "49": 516194816.0,
             "50": 516194816.0,
+            "51": 516194816.0,
+            "52": 516194816.0,
+            "53": 516194816.0,
+            "54": 516194816.0,
             "55": 516194816.0,
+            "56": 516194816.0,
+            "57": 516194816.0,
+            "58": 516194816.0,
+            "59": 516194816.0,
             "60": 516194816.0,
+            "61": 516194816.0,
+            "62": 516194816.0,
+            "63": 516194816.0,
+            "64": 516194816.0,
             "65": 516194816.0,
+            "66": 516194816.0,
+            "67": 516194816.0,
+            "68": 516194816.0,
+            "69": 516194816.0,
             "70": 516194816.0,
+            "71": 516194816.0,
+            "72": 516194816.0,
+            "73": 516194816.0,
+            "74": 516194816.0,
             "75": 516194816.0,
+            "76": 516194816.0,
+            "77": 516194816.0,
+            "78": 516194816.0,
+            "79": 516194816.0,
             "80": 516194816.0,
+            "81": 516194816.0,
+            "82": 516194816.0,
+            "83": 516194816.0,
+            "84": 516194816.0,
             "85": 516194816.0,
+            "86": 516194816.0,
+            "87": 516194816.0,
+            "88": 516194816.0,
+            "89": 516194816.0,
             "90": 516194816.0,
+            "91": 516194816.0,
+            "92": 516194816.0,
+            "93": 516194816.0,
+            "94": 516194816.0,
             "95": 516194816.0,
+            "96": 516194816.0,
+            "97": 516194816.0,
+            "98": 516194816.0,
+            "99": 516194816.0,
             "100": 516194816.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 1670130688.0,
+            "2": 1840523776.0,
+            "3": 1840523776.0,
+            "4": 1840523776.0,
             "5": 1840523776.0,
+            "6": 1840523776.0,
+            "7": 1840523776.0,
+            "8": 1840523776.0,
+            "9": 1840523776.0,
             "10": 1840523776.0,
+            "11": 1840523776.0,
+            "12": 1840523776.0,
+            "13": 1840523776.0,
+            "14": 1840523776.0,
             "15": 1840523776.0,
-            "20": 1841310208.0,
-            "25": 1841310208.0,
-            "30": 1841310208.0,
-            "35": 1841310208.0,
-            "40": 1841310208.0,
-            "45": 1841310208.0,
-            "50": 1841310208.0,
-            "55": 1841310208.0,
-            "60": 1841310208.0,
-            "65": 1841310208.0,
-            "70": 1841310208.0,
-            "75": 1841310208.0,
-            "80": 1841310208.0,
-            "85": 1841310208.0,
-            "90": 1841310208.0,
-            "95": 1841310208.0,
-            "100": 1841310208.0
+            "16": 1840523776.0,
+            "17": 1840523776.0,
+            "18": 1840523776.0,
+            "19": 1840523776.0,
+            "20": 1840523776.0,
+            "21": 1840523776.0,
+            "22": 1840523776.0,
+            "23": 1840523776.0,
+            "24": 1840523776.0,
+            "25": 1840523776.0,
+            "26": 1840523776.0,
+            "27": 1840523776.0,
+            "28": 1840523776.0,
+            "29": 1840523776.0,
+            "30": 1840523776.0,
+            "31": 1840523776.0,
+            "32": 1840523776.0,
+            "33": 1840523776.0,
+            "34": 1840523776.0,
+            "35": 1840523776.0,
+            "36": 1840523776.0,
+            "37": 1840523776.0,
+            "38": 1840523776.0,
+            "39": 1840523776.0,
+            "40": 1840523776.0,
+            "41": 1840523776.0,
+            "42": 1840523776.0,
+            "43": 1840523776.0,
+            "44": 1840523776.0,
+            "45": 1840523776.0,
+            "46": 1840523776.0,
+            "47": 1840523776.0,
+            "48": 1840523776.0,
+            "49": 1840523776.0,
+            "50": 1840523776.0,
+            "51": 1840523776.0,
+            "52": 1840523776.0,
+            "53": 1840523776.0,
+            "54": 1840523776.0,
+            "55": 1840523776.0,
+            "56": 1840523776.0,
+            "57": 1840523776.0,
+            "58": 1840523776.0,
+            "59": 1840523776.0,
+            "60": 1840523776.0,
+            "61": 1840523776.0,
+            "62": 1840523776.0,
+            "63": 1840523776.0,
+            "64": 1840523776.0,
+            "65": 1840523776.0,
+            "66": 1840523776.0,
+            "67": 1840523776.0,
+            "68": 1840523776.0,
+            "69": 1840523776.0,
+            "70": 1840523776.0,
+            "71": 1840523776.0,
+            "72": 1840523776.0,
+            "73": 1840523776.0,
+            "74": 1840523776.0,
+            "75": 1840523776.0,
+            "76": 1840523776.0,
+            "77": 1840523776.0,
+            "78": 1840523776.0,
+            "79": 1840523776.0,
+            "80": 1840523776.0,
+            "81": 1840523776.0,
+            "82": 1840523776.0,
+            "83": 1840523776.0,
+            "84": 1840523776.0,
+            "85": 1840523776.0,
+            "86": 1840523776.0,
+            "87": 1840523776.0,
+            "88": 1840523776.0,
+            "89": 1840523776.0,
+            "90": 1840523776.0,
+            "91": 1840523776.0,
+            "92": 1840523776.0,
+            "93": 1840523776.0,
+            "94": 1840523776.0,
+            "95": 1840523776.0,
+            "96": 1840523776.0,
+            "97": 1840523776.0,
+            "98": 1840523776.0,
+            "99": 1840523776.0,
+            "100": 1840523776.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 14.69041,
-            "5": 0.12029,
-            "10": 0.12392,
-            "15": 0.12795,
-            "20": 0.12945,
-            "25": 0.11653,
-            "30": 0.11758,
-            "35": 0.12012,
-            "40": 0.11726,
-            "45": 0.11921,
-            "50": 0.12046,
-            "55": 0.11872,
-            "60": 0.11663,
-            "65": 0.11858,
-            "70": 0.11801,
-            "75": 0.11679,
-            "80": 0.11617,
-            "85": 0.11789,
-            "90": 0.11709,
-            "95": 0.11779,
-            "100": 0.11872
+            "1": 15.10612,
+            "2": 0.1542,
+            "3": 0.13803,
+            "4": 0.14173,
+            "5": 0.13703,
+            "6": 0.13715,
+            "7": 0.13669,
+            "8": 0.13634,
+            "9": 0.13883,
+            "10": 0.13804,
+            "11": 0.13759,
+            "12": 0.1376,
+            "13": 0.1382,
+            "14": 0.13696,
+            "15": 0.13434,
+            "16": 0.13528,
+            "17": 0.13745,
+            "18": 0.13625,
+            "19": 0.13968,
+            "20": 0.13682,
+            "21": 0.13596,
+            "22": 0.13719,
+            "23": 0.13667,
+            "24": 0.13638,
+            "25": 0.13753,
+            "26": 0.13644,
+            "27": 0.13707,
+            "28": 0.13952,
+            "29": 0.1369,
+            "30": 0.13707,
+            "31": 0.13675,
+            "32": 0.13583,
+            "33": 0.1367,
+            "34": 0.13775,
+            "35": 0.13604,
+            "36": 0.13754,
+            "37": 0.13616,
+            "38": 0.13653,
+            "39": 0.13703,
+            "40": 0.13711,
+            "41": 0.13929,
+            "42": 0.1367,
+            "43": 0.13765,
+            "44": 0.1376,
+            "45": 0.13629,
+            "46": 0.13767,
+            "47": 0.13691,
+            "48": 0.13819,
+            "49": 0.13713,
+            "50": 0.13764,
+            "51": 0.14385,
+            "52": 0.13731,
+            "53": 0.13926,
+            "54": 0.13909,
+            "55": 0.13708,
+            "56": 0.13606,
+            "57": 0.1385,
+            "58": 0.13816,
+            "59": 0.13715,
+            "60": 0.13837,
+            "61": 0.13836,
+            "62": 0.13899,
+            "63": 0.13766,
+            "64": 0.13809,
+            "65": 0.1396,
+            "66": 0.13817,
+            "67": 0.13774,
+            "68": 0.13776,
+            "69": 0.13995,
+            "70": 0.14012,
+            "71": 0.13829,
+            "72": 0.14013,
+            "73": 0.13752,
+            "74": 0.13771,
+            "75": 0.13835,
+            "76": 0.13975,
+            "77": 0.13762,
+            "78": 0.13969,
+            "79": 0.14152,
+            "80": 0.13795,
+            "81": 0.13719,
+            "82": 0.13686,
+            "83": 0.13959,
+            "84": 0.13635,
+            "85": 0.13911,
+            "86": 0.13853,
+            "87": 0.13756,
+            "88": 0.13795,
+            "89": 0.13781,
+            "90": 0.13889,
+            "91": 0.1373,
+            "92": 0.14159,
+            "93": 0.13719,
+            "94": 0.13599,
+            "95": 0.13739,
+            "96": 0.13865,
+            "97": 0.13776,
+            "98": 0.14044,
+            "99": 0.13747,
+            "100": 0.13826
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..bb22d5373cc
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.85949,
+            "2": 10.85553,
+            "3": 10.86548,
+            "4": 10.84554,
+            "5": 10.88344,
+            "6": 10.89429,
+            "7": 10.87068,
+            "8": 10.86983,
+            "9": 10.86919,
+            "10": 10.83883,
+            "11": 10.89435,
+            "12": 10.8798,
+            "13": 10.87987,
+            "14": 10.90317,
+            "15": 10.8405,
+            "16": 10.83786,
+            "17": 10.80668,
+            "18": 10.83025,
+            "19": 10.82262,
+            "20": 10.73192,
+            "21": 10.7075,
+            "22": 10.56005,
+            "23": 10.72406,
+            "24": 10.61116,
+            "25": 10.5481,
+            "26": 10.61334,
+            "27": 10.6305,
+            "28": 10.56645,
+            "29": 10.59672,
+            "30": 10.37136,
+            "31": 10.11721,
+            "32": 10.46127,
+            "33": 10.45247,
+            "34": 10.21687,
+            "35": 10.27171,
+            "36": 10.2312,
+            "37": 10.34809,
+            "38": 10.18842,
+            "39": 10.41042,
+            "40": 10.09426,
+            "41": 10.14711,
+            "42": 10.21247,
+            "43": 9.84106,
+            "44": 9.95919,
+            "45": 9.84082,
+            "46": 9.82482,
+            "47": 10.13882,
+            "48": 9.85839,
+            "49": 9.5472,
+            "50": 9.90883,
+            "51": 9.85585,
+            "52": 9.75243,
+            "53": 10.07588,
+            "54": 9.95691,
+            "55": 9.88207,
+            "56": 9.63139,
+            "57": 9.48649,
+            "58": 9.83116,
+            "59": 9.58907,
+            "60": 9.50648,
+            "61": 9.70368,
+            "62": 9.98289,
+            "63": 9.38314,
+            "64": 9.7791,
+            "65": 8.95182,
+            "66": 9.70161,
+            "67": 9.37209,
+            "68": 9.78856,
+            "69": 9.79856,
+            "70": 9.74748,
+            "71": 9.6191,
+            "72": 9.585,
+            "73": 9.49728,
+            "74": 8.93928,
+            "75": 9.42702,
+            "76": 9.08022,
+            "77": 10.06569,
+            "78": 9.72897,
+            "79": 9.37772,
+            "80": 9.41001,
+            "81": 9.47977,
+            "82": 9.70183,
+            "83": 9.30621,
+            "84": 9.42098,
+            "85": 9.61377,
+            "86": 9.07654,
+            "87": 9.59456,
+            "88": 9.75071,
+            "89": 9.60243,
+            "90": 9.81899,
+            "91": 9.33898,
+            "92": 9.35718,
+            "93": 9.07884,
+            "94": 8.83509,
+            "95": 9.52175,
+            "96": 9.53007,
+            "97": 9.31309,
+            "98": 9.67781,
+            "99": 8.89061,
+            "100": 9.39729
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1690.0,
+            "2": 1776.0,
+            "3": 1642.0,
+            "4": 1825.0,
+            "5": 1809.0,
+            "6": 1795.0,
+            "7": 1830.0,
+            "8": 1626.0,
+            "9": 1878.0,
+            "10": 1423.0,
+            "11": 1868.0,
+            "12": 1653.0,
+            "13": 1897.0,
+            "14": 1783.0,
+            "15": 1861.0,
+            "16": 1938.0,
+            "17": 1825.0,
+            "18": 1730.0,
+            "19": 1727.0,
+            "20": 1735.0,
+            "21": 1783.0,
+            "22": 1576.0,
+            "23": 1949.0,
+            "24": 1630.0,
+            "25": 1498.0,
+            "26": 1649.0,
+            "27": 1809.0,
+            "28": 2019.0,
+            "29": 2009.0,
+            "30": 1832.0,
+            "31": 1524.0,
+            "32": 1943.0,
+            "33": 2081.0,
+            "34": 1888.0,
+            "35": 1935.0,
+            "36": 1898.0,
+            "37": 2325.0,
+            "38": 2070.0,
+            "39": 2248.0,
+            "40": 2199.0,
+            "41": 2264.0,
+            "42": 2349.0,
+            "43": 2087.0,
+            "44": 2107.0,
+            "45": 2098.0,
+            "46": 2407.0,
+            "47": 2456.0,
+            "48": 2404.0,
+            "49": 2417.0,
+            "50": 2407.0,
+            "51": 2578.0,
+            "52": 2630.0,
+            "53": 2857.0,
+            "54": 2818.0,
+            "55": 2368.0,
+            "56": 2757.0,
+            "57": 2423.0,
+            "58": 2776.0,
+            "59": 2742.0,
+            "60": 2371.0,
+            "61": 2906.0,
+            "62": 2517.0,
+            "63": 2374.0,
+            "64": 2995.0,
+            "65": 2634.0,
+            "66": 2995.0,
+            "67": 2884.0,
+            "68": 2840.0,
+            "69": 2766.0,
+            "70": 3006.0,
+            "71": 3023.0,
+            "72": 2386.0,
+            "73": 2958.0,
+            "74": 1851.0,
+            "75": 2585.0,
+            "76": 2973.0,
+            "77": 3244.0,
+            "78": 3142.0,
+            "79": 3185.0,
+            "80": 3249.0,
+            "81": 3665.0,
+            "82": 3153.0,
+            "83": 2821.0,
+            "84": 3083.0,
+            "85": 3247.0,
+            "86": 2734.0,
+            "87": 3759.0,
+            "88": 2968.0,
+            "89": 3282.0,
+            "90": 3064.0,
+            "91": 2908.0,
+            "92": 2946.0,
+            "93": 2592.0,
+            "94": 3363.0,
+            "95": 3423.0,
+            "96": 3259.0,
+            "97": 2976.0,
+            "98": 3683.0,
+            "99": 3173.0,
+            "100": 3143.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 516194816.0,
+            "2": 516194816.0,
+            "3": 516194816.0,
+            "4": 516194816.0,
+            "5": 516194816.0,
+            "6": 516194816.0,
+            "7": 516194816.0,
+            "8": 516194816.0,
+            "9": 516194816.0,
+            "10": 516194816.0,
+            "11": 516194816.0,
+            "12": 516194816.0,
+            "13": 516194816.0,
+            "14": 516194816.0,
+            "15": 516194816.0,
+            "16": 516194816.0,
+            "17": 516194816.0,
+            "18": 516194816.0,
+            "19": 516194816.0,
+            "20": 516194816.0,
+            "21": 516194816.0,
+            "22": 516194816.0,
+            "23": 516194816.0,
+            "24": 516194816.0,
+            "25": 516194816.0,
+            "26": 516194816.0,
+            "27": 516194816.0,
+            "28": 516194816.0,
+            "29": 516194816.0,
+            "30": 516194816.0,
+            "31": 516194816.0,
+            "32": 516194816.0,
+            "33": 516194816.0,
+            "34": 516194816.0,
+            "35": 516194816.0,
+            "36": 516194816.0,
+            "37": 516194816.0,
+            "38": 516194816.0,
+            "39": 516194816.0,
+            "40": 516194816.0,
+            "41": 516194816.0,
+            "42": 516194816.0,
+            "43": 516194816.0,
+            "44": 516194816.0,
+            "45": 516194816.0,
+            "46": 516194816.0,
+            "47": 516194816.0,
+            "48": 516194816.0,
+            "49": 516194816.0,
+            "50": 516194816.0,
+            "51": 516194816.0,
+            "52": 516194816.0,
+            "53": 516194816.0,
+            "54": 516194816.0,
+            "55": 516194816.0,
+            "56": 516194816.0,
+            "57": 516194816.0,
+            "58": 516194816.0,
+            "59": 516194816.0,
+            "60": 516194816.0,
+            "61": 516194816.0,
+            "62": 516194816.0,
+            "63": 516194816.0,
+            "64": 516194816.0,
+            "65": 516194816.0,
+            "66": 516194816.0,
+            "67": 516194816.0,
+            "68": 516194816.0,
+            "69": 516194816.0,
+            "70": 516194816.0,
+            "71": 516194816.0,
+            "72": 516194816.0,
+            "73": 516194816.0,
+            "74": 516194816.0,
+            "75": 516194816.0,
+            "76": 516194816.0,
+            "77": 516194816.0,
+            "78": 516194816.0,
+            "79": 516194816.0,
+            "80": 516194816.0,
+            "81": 516194816.0,
+            "82": 516194816.0,
+            "83": 516194816.0,
+            "84": 516194816.0,
+            "85": 516194816.0,
+            "86": 516194816.0,
+            "87": 516194816.0,
+            "88": 516194816.0,
+            "89": 516194816.0,
+            "90": 516194816.0,
+            "91": 516194816.0,
+            "92": 516194816.0,
+            "93": 516194816.0,
+            "94": 516194816.0,
+            "95": 516194816.0,
+            "96": 516194816.0,
+            "97": 516194816.0,
+            "98": 516194816.0,
+            "99": 516194816.0,
+            "100": 516194816.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1670130688.0,
+            "2": 1840523776.0,
+            "3": 1840523776.0,
+            "4": 1840523776.0,
+            "5": 1840523776.0,
+            "6": 1840523776.0,
+            "7": 1841310208.0,
+            "8": 1841310208.0,
+            "9": 1841310208.0,
+            "10": 1841310208.0,
+            "11": 1841310208.0,
+            "12": 1841310208.0,
+            "13": 1841310208.0,
+            "14": 1841310208.0,
+            "15": 1841310208.0,
+            "16": 1841310208.0,
+            "17": 1841310208.0,
+            "18": 1841310208.0,
+            "19": 1841310208.0,
+            "20": 1841310208.0,
+            "21": 1841310208.0,
+            "22": 1841310208.0,
+            "23": 1841310208.0,
+            "24": 1841310208.0,
+            "25": 1841310208.0,
+            "26": 1841310208.0,
+            "27": 1841310208.0,
+            "28": 1841310208.0,
+            "29": 1841310208.0,
+            "30": 1841310208.0,
+            "31": 1841310208.0,
+            "32": 1841310208.0,
+            "33": 1841310208.0,
+            "34": 1841310208.0,
+            "35": 1841310208.0,
+            "36": 1841310208.0,
+            "37": 1841310208.0,
+            "38": 1841310208.0,
+            "39": 1841310208.0,
+            "40": 1841310208.0,
+            "41": 1841310208.0,
+            "42": 1841310208.0,
+            "43": 1841310208.0,
+            "44": 1841310208.0,
+            "45": 1841310208.0,
+            "46": 1841310208.0,
+            "47": 1841310208.0,
+            "48": 1841310208.0,
+            "49": 1841310208.0,
+            "50": 1841310208.0,
+            "51": 1841310208.0,
+            "52": 1841310208.0,
+            "53": 1841310208.0,
+            "54": 1841310208.0,
+            "55": 1841310208.0,
+            "56": 1841310208.0,
+            "57": 1841310208.0,
+            "58": 1841310208.0,
+            "59": 1841310208.0,
+            "60": 1841310208.0,
+            "61": 1841310208.0,
+            "62": 1841310208.0,
+            "63": 1841310208.0,
+            "64": 1841310208.0,
+            "65": 1841310208.0,
+            "66": 1841310208.0,
+            "67": 1841310208.0,
+            "68": 1841310208.0,
+            "69": 1841310208.0,
+            "70": 1841310208.0,
+            "71": 1841310208.0,
+            "72": 1841310208.0,
+            "73": 1841310208.0,
+            "74": 1841310208.0,
+            "75": 1841310208.0,
+            "76": 1841310208.0,
+            "77": 1841310208.0,
+            "78": 1841310208.0,
+            "79": 1841310208.0,
+            "80": 1841310208.0,
+            "81": 1841310208.0,
+            "82": 1841310208.0,
+            "83": 1841310208.0,
+            "84": 1841310208.0,
+            "85": 1841310208.0,
+            "86": 1841310208.0,
+            "87": 1841310208.0,
+            "88": 1841310208.0,
+            "89": 1841310208.0,
+            "90": 1841310208.0,
+            "91": 1841310208.0,
+            "92": 1841310208.0,
+            "93": 1841310208.0,
+            "94": 1841310208.0,
+            "95": 1841310208.0,
+            "96": 1841310208.0,
+            "97": 1841310208.0,
+            "98": 1841310208.0,
+            "99": 1841310208.0,
+            "100": 1841310208.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 14.64403,
+            "2": 0.16797,
+            "3": 0.12497,
+            "4": 0.12885,
+            "5": 0.12618,
+            "6": 0.13062,
+            "7": 0.13213,
+            "8": 0.12464,
+            "9": 0.11932,
+            "10": 0.11974,
+            "11": 0.11909,
+            "12": 0.12055,
+            "13": 0.1201,
+            "14": 0.12035,
+            "15": 0.12245,
+            "16": 0.12189,
+            "17": 0.12194,
+            "18": 0.12112,
+            "19": 0.12294,
+            "20": 0.12528,
+            "21": 0.12355,
+            "22": 0.12627,
+            "23": 0.13006,
+            "24": 0.12885,
+            "25": 0.12289,
+            "26": 0.12586,
+            "27": 0.12347,
+            "28": 0.12378,
+            "29": 0.12521,
+            "30": 0.12152,
+            "31": 0.12233,
+            "32": 0.12264,
+            "33": 0.12293,
+            "34": 0.12188,
+            "35": 0.12305,
+            "36": 0.11979,
+            "37": 0.12011,
+            "38": 0.12066,
+            "39": 0.11933,
+            "40": 0.1218,
+            "41": 0.1229,
+            "42": 0.12279,
+            "43": 0.12218,
+            "44": 0.12191,
+            "45": 0.12293,
+            "46": 0.12168,
+            "47": 0.12842,
+            "48": 0.12658,
+            "49": 0.12505,
+            "50": 0.12387,
+            "51": 0.1324,
+            "52": 0.13379,
+            "53": 0.1261,
+            "54": 0.11854,
+            "55": 0.11853,
+            "56": 0.11881,
+            "57": 0.1209,
+            "58": 0.12111,
+            "59": 0.11838,
+            "60": 0.12687,
+            "61": 0.11751,
+            "62": 0.11883,
+            "63": 0.11928,
+            "64": 0.11974,
+            "65": 0.11845,
+            "66": 0.11894,
+            "67": 0.11846,
+            "68": 0.11858,
+            "69": 0.11994,
+            "70": 0.11764,
+            "71": 0.12093,
+            "72": 0.11968,
+            "73": 0.1186,
+            "74": 0.11964,
+            "75": 0.11783,
+            "76": 0.1194,
+            "77": 0.11791,
+            "78": 0.12113,
+            "79": 0.11779,
+            "80": 0.11874,
+            "81": 0.1199,
+            "82": 0.11927,
+            "83": 0.1179,
+            "84": 0.11758,
+            "85": 0.11656,
+            "86": 0.11748,
+            "87": 0.11919,
+            "88": 0.11702,
+            "89": 0.11924,
+            "90": 0.11761,
+            "91": 0.12024,
+            "92": 0.12008,
+            "93": 0.11955,
+            "94": 0.11864,
+            "95": 0.11843,
+            "96": 0.1186,
+            "97": 0.1208,
+            "98": 0.11919,
+            "99": 0.11935,
+            "100": 0.1196
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..eb0e5f82b03
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.85949,
+            "2": 10.85553,
+            "3": 10.86548,
+            "4": 10.84554,
+            "5": 10.88344,
+            "6": 10.89429,
+            "7": 10.87068,
+            "8": 10.86983,
+            "9": 10.86919,
+            "10": 10.83883,
+            "11": 10.89435,
+            "12": 10.8798,
+            "13": 10.87987,
+            "14": 10.90317,
+            "15": 10.8405,
+            "16": 10.83786,
+            "17": 10.80668,
+            "18": 10.83025,
+            "19": 10.82262,
+            "20": 10.73192,
+            "21": 10.7075,
+            "22": 10.56005,
+            "23": 10.72406,
+            "24": 10.61116,
+            "25": 10.5481,
+            "26": 10.61334,
+            "27": 10.6305,
+            "28": 10.56645,
+            "29": 10.59672,
+            "30": 10.37136,
+            "31": 10.11721,
+            "32": 10.46127,
+            "33": 10.45247,
+            "34": 10.21687,
+            "35": 10.27171,
+            "36": 10.2312,
+            "37": 10.34809,
+            "38": 10.18842,
+            "39": 10.41042,
+            "40": 10.09426,
+            "41": 10.14711,
+            "42": 10.21247,
+            "43": 9.84106,
+            "44": 9.95919,
+            "45": 9.84082,
+            "46": 9.82482,
+            "47": 10.13882,
+            "48": 9.85839,
+            "49": 9.5472,
+            "50": 9.90883,
+            "51": 9.85585,
+            "52": 9.75243,
+            "53": 10.07588,
+            "54": 9.95691,
+            "55": 9.88207,
+            "56": 9.63139,
+            "57": 9.48649,
+            "58": 9.83116,
+            "59": 9.58907,
+            "60": 9.50648,
+            "61": 9.70368,
+            "62": 9.98289,
+            "63": 9.38314,
+            "64": 9.7791,
+            "65": 8.95182,
+            "66": 9.70161,
+            "67": 9.37209,
+            "68": 9.78856,
+            "69": 9.79856,
+            "70": 9.74748,
+            "71": 9.6191,
+            "72": 9.585,
+            "73": 9.49728,
+            "74": 8.93928,
+            "75": 9.42702,
+            "76": 9.08022,
+            "77": 10.06569,
+            "78": 9.72897,
+            "79": 9.37772,
+            "80": 9.41001,
+            "81": 9.47977,
+            "82": 9.70183,
+            "83": 9.30621,
+            "84": 9.42098,
+            "85": 9.61377,
+            "86": 9.07654,
+            "87": 9.59456,
+            "88": 9.75071,
+            "89": 9.60243,
+            "90": 9.81899,
+            "91": 9.33898,
+            "92": 9.35718,
+            "93": 9.07884,
+            "94": 8.83509,
+            "95": 9.52175,
+            "96": 9.53007,
+            "97": 9.31309,
+            "98": 9.67781,
+            "99": 8.89061,
+            "100": 9.39729
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1690.0,
+            "2": 1776.0,
+            "3": 1642.0,
+            "4": 1825.0,
+            "5": 1809.0,
+            "6": 1795.0,
+            "7": 1830.0,
+            "8": 1626.0,
+            "9": 1878.0,
+            "10": 1423.0,
+            "11": 1868.0,
+            "12": 1653.0,
+            "13": 1897.0,
+            "14": 1783.0,
+            "15": 1861.0,
+            "16": 1938.0,
+            "17": 1825.0,
+            "18": 1730.0,
+            "19": 1727.0,
+            "20": 1735.0,
+            "21": 1783.0,
+            "22": 1576.0,
+            "23": 1949.0,
+            "24": 1630.0,
+            "25": 1498.0,
+            "26": 1649.0,
+            "27": 1809.0,
+            "28": 2019.0,
+            "29": 2009.0,
+            "30": 1832.0,
+            "31": 1524.0,
+            "32": 1943.0,
+            "33": 2081.0,
+            "34": 1888.0,
+            "35": 1935.0,
+            "36": 1898.0,
+            "37": 2325.0,
+            "38": 2070.0,
+            "39": 2248.0,
+            "40": 2199.0,
+            "41": 2264.0,
+            "42": 2349.0,
+            "43": 2087.0,
+            "44": 2107.0,
+            "45": 2098.0,
+            "46": 2407.0,
+            "47": 2456.0,
+            "48": 2404.0,
+            "49": 2417.0,
+            "50": 2407.0,
+            "51": 2578.0,
+            "52": 2630.0,
+            "53": 2857.0,
+            "54": 2818.0,
+            "55": 2368.0,
+            "56": 2757.0,
+            "57": 2423.0,
+            "58": 2776.0,
+            "59": 2742.0,
+            "60": 2371.0,
+            "61": 2906.0,
+            "62": 2517.0,
+            "63": 2374.0,
+            "64": 2995.0,
+            "65": 2634.0,
+            "66": 2995.0,
+            "67": 2884.0,
+            "68": 2840.0,
+            "69": 2766.0,
+            "70": 3006.0,
+            "71": 3023.0,
+            "72": 2386.0,
+            "73": 2958.0,
+            "74": 1851.0,
+            "75": 2585.0,
+            "76": 2973.0,
+            "77": 3244.0,
+            "78": 3142.0,
+            "79": 3185.0,
+            "80": 3249.0,
+            "81": 3665.0,
+            "82": 3153.0,
+            "83": 2821.0,
+            "84": 3083.0,
+            "85": 3247.0,
+            "86": 2734.0,
+            "87": 3759.0,
+            "88": 2968.0,
+            "89": 3282.0,
+            "90": 3064.0,
+            "91": 2908.0,
+            "92": 2946.0,
+            "93": 2592.0,
+            "94": 3363.0,
+            "95": 3423.0,
+            "96": 3259.0,
+            "97": 2976.0,
+            "98": 3683.0,
+            "99": 3173.0,
+            "100": 3143.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 516194816.0,
+            "2": 516194816.0,
+            "3": 516194816.0,
+            "4": 516194816.0,
+            "5": 516194816.0,
+            "6": 516194816.0,
+            "7": 516194816.0,
+            "8": 516194816.0,
+            "9": 516194816.0,
+            "10": 516194816.0,
+            "11": 516194816.0,
+            "12": 516194816.0,
+            "13": 516194816.0,
+            "14": 516194816.0,
+            "15": 516194816.0,
+            "16": 516194816.0,
+            "17": 516194816.0,
+            "18": 516194816.0,
+            "19": 516194816.0,
+            "20": 516194816.0,
+            "21": 516194816.0,
+            "22": 516194816.0,
+            "23": 516194816.0,
+            "24": 516194816.0,
+            "25": 516194816.0,
+            "26": 516194816.0,
+            "27": 516194816.0,
+            "28": 516194816.0,
+            "29": 516194816.0,
+            "30": 516194816.0,
+            "31": 516194816.0,
+            "32": 516194816.0,
+            "33": 516194816.0,
+            "34": 516194816.0,
+            "35": 516194816.0,
+            "36": 516194816.0,
+            "37": 516194816.0,
+            "38": 516194816.0,
+            "39": 516194816.0,
+            "40": 516194816.0,
+            "41": 516194816.0,
+            "42": 516194816.0,
+            "43": 516194816.0,
+            "44": 516194816.0,
+            "45": 516194816.0,
+            "46": 516194816.0,
+            "47": 516194816.0,
+            "48": 516194816.0,
+            "49": 516194816.0,
+            "50": 516194816.0,
+            "51": 516194816.0,
+            "52": 516194816.0,
+            "53": 516194816.0,
+            "54": 516194816.0,
+            "55": 516194816.0,
+            "56": 516194816.0,
+            "57": 516194816.0,
+            "58": 516194816.0,
+            "59": 516194816.0,
+            "60": 516194816.0,
+            "61": 516194816.0,
+            "62": 516194816.0,
+            "63": 516194816.0,
+            "64": 516194816.0,
+            "65": 516194816.0,
+            "66": 516194816.0,
+            "67": 516194816.0,
+            "68": 516194816.0,
+            "69": 516194816.0,
+            "70": 516194816.0,
+            "71": 516194816.0,
+            "72": 516194816.0,
+            "73": 516194816.0,
+            "74": 516194816.0,
+            "75": 516194816.0,
+            "76": 516194816.0,
+            "77": 516194816.0,
+            "78": 516194816.0,
+            "79": 516194816.0,
+            "80": 516194816.0,
+            "81": 516194816.0,
+            "82": 516194816.0,
+            "83": 516194816.0,
+            "84": 516194816.0,
+            "85": 516194816.0,
+            "86": 516194816.0,
+            "87": 516194816.0,
+            "88": 516194816.0,
+            "89": 516194816.0,
+            "90": 516194816.0,
+            "91": 516194816.0,
+            "92": 516194816.0,
+            "93": 516194816.0,
+            "94": 516194816.0,
+            "95": 516194816.0,
+            "96": 516194816.0,
+            "97": 516194816.0,
+            "98": 516194816.0,
+            "99": 516194816.0,
+            "100": 516194816.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1670130688.0,
+            "2": 1840523776.0,
+            "3": 1840523776.0,
+            "4": 1840523776.0,
+            "5": 1840523776.0,
+            "6": 1840523776.0,
+            "7": 1840523776.0,
+            "8": 1840523776.0,
+            "9": 1840523776.0,
+            "10": 1840523776.0,
+            "11": 1840523776.0,
+            "12": 1840523776.0,
+            "13": 1840523776.0,
+            "14": 1840523776.0,
+            "15": 1840523776.0,
+            "16": 1840523776.0,
+            "17": 1840523776.0,
+            "18": 1840523776.0,
+            "19": 1840523776.0,
+            "20": 1840523776.0,
+            "21": 1840523776.0,
+            "22": 1840523776.0,
+            "23": 1840523776.0,
+            "24": 1840523776.0,
+            "25": 1840523776.0,
+            "26": 1840523776.0,
+            "27": 1840523776.0,
+            "28": 1840523776.0,
+            "29": 1840523776.0,
+            "30": 1840523776.0,
+            "31": 1840523776.0,
+            "32": 1840523776.0,
+            "33": 1840523776.0,
+            "34": 1840523776.0,
+            "35": 1840523776.0,
+            "36": 1840523776.0,
+            "37": 1840523776.0,
+            "38": 1840523776.0,
+            "39": 1840523776.0,
+            "40": 1840523776.0,
+            "41": 1840523776.0,
+            "42": 1840523776.0,
+            "43": 1840523776.0,
+            "44": 1840523776.0,
+            "45": 1840523776.0,
+            "46": 1840523776.0,
+            "47": 1840523776.0,
+            "48": 1840523776.0,
+            "49": 1840523776.0,
+            "50": 1840523776.0,
+            "51": 1840523776.0,
+            "52": 1840523776.0,
+            "53": 1840523776.0,
+            "54": 1840523776.0,
+            "55": 1840523776.0,
+            "56": 1840523776.0,
+            "57": 1840523776.0,
+            "58": 1840523776.0,
+            "59": 1840523776.0,
+            "60": 1840523776.0,
+            "61": 1840523776.0,
+            "62": 1840523776.0,
+            "63": 1840523776.0,
+            "64": 1840523776.0,
+            "65": 1840523776.0,
+            "66": 1840523776.0,
+            "67": 1840523776.0,
+            "68": 1840523776.0,
+            "69": 1840523776.0,
+            "70": 1840523776.0,
+            "71": 1840523776.0,
+            "72": 1840523776.0,
+            "73": 1840523776.0,
+            "74": 1840523776.0,
+            "75": 1840523776.0,
+            "76": 1840523776.0,
+            "77": 1840523776.0,
+            "78": 1840523776.0,
+            "79": 1840523776.0,
+            "80": 1840523776.0,
+            "81": 1840523776.0,
+            "82": 1840523776.0,
+            "83": 1841310208.0,
+            "84": 1841310208.0,
+            "85": 1841310208.0,
+            "86": 1841310208.0,
+            "87": 1841310208.0,
+            "88": 1841310208.0,
+            "89": 1841310208.0,
+            "90": 1841310208.0,
+            "91": 1841310208.0,
+            "92": 1841310208.0,
+            "93": 1841310208.0,
+            "94": 1841310208.0,
+            "95": 1841310208.0,
+            "96": 1841310208.0,
+            "97": 1841310208.0,
+            "98": 1841310208.0,
+            "99": 1841310208.0,
+            "100": 1841310208.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 15.65402,
+            "2": 0.15533,
+            "3": 0.13713,
+            "4": 0.14193,
+            "5": 0.13861,
+            "6": 0.13948,
+            "7": 0.13637,
+            "8": 0.13619,
+            "9": 0.14162,
+            "10": 0.13725,
+            "11": 0.13988,
+            "12": 0.14179,
+            "13": 0.14346,
+            "14": 0.14488,
+            "15": 0.1468,
+            "16": 0.14288,
+            "17": 0.13708,
+            "18": 0.13765,
+            "19": 0.13957,
+            "20": 0.13778,
+            "21": 0.13931,
+            "22": 0.13758,
+            "23": 0.13751,
+            "24": 0.14023,
+            "25": 0.14508,
+            "26": 0.15744,
+            "27": 0.15391,
+            "28": 0.15519,
+            "29": 0.14118,
+            "30": 0.1391,
+            "31": 0.13604,
+            "32": 0.1366,
+            "33": 0.13813,
+            "34": 0.13786,
+            "35": 0.13728,
+            "36": 0.13981,
+            "37": 0.14024,
+            "38": 0.13688,
+            "39": 0.13391,
+            "40": 0.13738,
+            "41": 0.14059,
+            "42": 0.13512,
+            "43": 0.13775,
+            "44": 0.13641,
+            "45": 0.13686,
+            "46": 0.14053,
+            "47": 0.13951,
+            "48": 0.14166,
+            "49": 0.13555,
+            "50": 0.13577,
+            "51": 0.14328,
+            "52": 0.14201,
+            "53": 0.13861,
+            "54": 0.13965,
+            "55": 0.13807,
+            "56": 0.14044,
+            "57": 0.14358,
+            "58": 0.14042,
+            "59": 0.13858,
+            "60": 0.13959,
+            "61": 0.13788,
+            "62": 0.14032,
+            "63": 0.13843,
+            "64": 0.13942,
+            "65": 0.13742,
+            "66": 0.13948,
+            "67": 0.14263,
+            "68": 0.13848,
+            "69": 0.13944,
+            "70": 0.13874,
+            "71": 0.14302,
+            "72": 0.13748,
+            "73": 0.13837,
+            "74": 0.13911,
+            "75": 0.13965,
+            "76": 0.1466,
+            "77": 0.14259,
+            "78": 0.13635,
+            "79": 0.14025,
+            "80": 0.14725,
+            "81": 0.14592,
+            "82": 0.14832,
+            "83": 0.14727,
+            "84": 0.14437,
+            "85": 0.13721,
+            "86": 0.14235,
+            "87": 0.13812,
+            "88": 0.13937,
+            "89": 0.1389,
+            "90": 0.13661,
+            "91": 0.1432,
+            "92": 0.1389,
+            "93": 0.13881,
+            "94": 0.13803,
+            "95": 0.13815,
+            "96": 0.14203,
+            "97": 0.13816,
+            "98": 0.13963,
+            "99": 0.14236,
+            "100": 0.14371
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
new file mode 100644
index 00000000000..b037a96c895
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.92655,
+            "2": 10.92585,
+            "3": 10.91515,
+            "4": 10.90905,
+            "5": 10.92721,
+            "6": 10.93563,
+            "7": 10.90642,
+            "8": 10.92122,
+            "9": 10.91072,
+            "10": 10.9079,
+            "11": 10.89281,
+            "12": 10.92428,
+            "13": 10.91489,
+            "14": 10.92146,
+            "15": 10.88294,
+            "16": 10.87306,
+            "17": 10.84064,
+            "18": 10.87301,
+            "19": 10.85639,
+            "20": 10.77595,
+            "21": 10.74891,
+            "22": 10.63081,
+            "23": 10.75618,
+            "24": 10.65646,
+            "25": 10.59263,
+            "26": 10.65434,
+            "27": 10.64917,
+            "28": 10.59496,
+            "29": 10.60943,
+            "30": 10.39175,
+            "31": 10.15724,
+            "32": 10.49108,
+            "33": 10.47963,
+            "34": 10.24072,
+            "35": 10.29699,
+            "36": 10.24669,
+            "37": 10.35246,
+            "38": 10.2048,
+            "39": 10.40502,
+            "40": 10.09661,
+            "41": 10.15196,
+            "42": 10.22071,
+            "43": 9.85506,
+            "44": 9.96164,
+            "45": 9.84471,
+            "46": 9.83835,
+            "47": 10.14005,
+            "48": 9.85759,
+            "49": 9.53745,
+            "50": 9.90943,
+            "51": 9.84889,
+            "52": 9.74165,
+            "53": 10.0634,
+            "54": 9.94734,
+            "55": 9.87774,
+            "56": 9.62734,
+            "57": 9.47159,
+            "58": 9.82898,
+            "59": 9.58277,
+            "60": 9.49122,
+            "61": 9.69967,
+            "62": 9.97993,
+            "63": 9.37282,
+            "64": 9.77462,
+            "65": 8.94257,
+            "66": 9.69881,
+            "67": 9.36409,
+            "68": 9.78788,
+            "69": 9.78337,
+            "70": 9.72278,
+            "71": 9.6081,
+            "72": 9.5843,
+            "73": 9.48976,
+            "74": 8.9486,
+            "75": 9.41891,
+            "76": 9.08727,
+            "77": 10.06346,
+            "78": 9.72838,
+            "79": 9.37152,
+            "80": 9.40057,
+            "81": 9.47832,
+            "82": 9.69155,
+            "83": 9.30737,
+            "84": 9.41234,
+            "85": 9.61188,
+            "86": 9.07586,
+            "87": 9.59459,
+            "88": 9.74737,
+            "89": 9.60679,
+            "90": 9.81026,
+            "91": 9.34362,
+            "92": 9.36488,
+            "93": 9.07724,
+            "94": 8.83091,
+            "95": 9.5172,
+            "96": 9.52447,
+            "97": 9.31032,
+            "98": 9.67872,
+            "99": 8.88837,
+            "100": 9.40136
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1652.0,
+            "2": 1809.0,
+            "3": 1697.0,
+            "4": 1687.0,
+            "5": 1983.0,
+            "6": 1918.0,
+            "7": 1852.0,
+            "8": 1726.0,
+            "9": 1864.0,
+            "10": 1445.0,
+            "11": 1907.0,
+            "12": 1737.0,
+            "13": 1917.0,
+            "14": 1796.0,
+            "15": 1908.0,
+            "16": 1761.0,
+            "17": 1863.0,
+            "18": 1755.0,
+            "19": 1793.0,
+            "20": 1636.0,
+            "21": 1854.0,
+            "22": 1706.0,
+            "23": 1991.0,
+            "24": 1637.0,
+            "25": 1729.0,
+            "26": 1800.0,
+            "27": 1859.0,
+            "28": 2032.0,
+            "29": 2012.0,
+            "30": 1912.0,
+            "31": 1529.0,
+            "32": 1953.0,
+            "33": 2266.0,
+            "34": 1934.0,
+            "35": 1910.0,
+            "36": 1967.0,
+            "37": 2323.0,
+            "38": 2236.0,
+            "39": 2450.0,
+            "40": 2184.0,
+            "41": 2303.0,
+            "42": 2258.0,
+            "43": 2025.0,
+            "44": 2240.0,
+            "45": 2122.0,
+            "46": 2252.0,
+            "47": 2581.0,
+            "48": 2451.0,
+            "49": 2292.0,
+            "50": 2525.0,
+            "51": 2822.0,
+            "52": 2570.0,
+            "53": 2948.0,
+            "54": 2795.0,
+            "55": 2407.0,
+            "56": 2786.0,
+            "57": 2346.0,
+            "58": 3115.0,
+            "59": 2885.0,
+            "60": 2430.0,
+            "61": 2926.0,
+            "62": 2574.0,
+            "63": 2362.0,
+            "64": 2948.0,
+            "65": 2802.0,
+            "66": 3346.0,
+            "67": 2744.0,
+            "68": 2926.0,
+            "69": 2971.0,
+            "70": 3278.0,
+            "71": 2955.0,
+            "72": 2445.0,
+            "73": 3156.0,
+            "74": 1933.0,
+            "75": 2547.0,
+            "76": 3025.0,
+            "77": 3458.0,
+            "78": 3206.0,
+            "79": 3240.0,
+            "80": 3526.0,
+            "81": 3691.0,
+            "82": 3454.0,
+            "83": 2739.0,
+            "84": 3328.0,
+            "85": 3300.0,
+            "86": 2859.0,
+            "87": 3822.0,
+            "88": 3130.0,
+            "89": 3409.0,
+            "90": 3148.0,
+            "91": 2760.0,
+            "92": 3173.0,
+            "93": 2608.0,
+            "94": 3428.0,
+            "95": 3402.0,
+            "96": 3633.0,
+            "97": 3222.0,
+            "98": 3696.0,
+            "99": 3142.0,
+            "100": 3351.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 438468608.0,
+            "2": 438468608.0,
+            "3": 438468608.0,
+            "4": 438468608.0,
+            "5": 438468608.0,
+            "6": 438468608.0,
+            "7": 438468608.0,
+            "8": 438468608.0,
+            "9": 438468608.0,
+            "10": 438468608.0,
+            "11": 438468608.0,
+            "12": 438468608.0,
+            "13": 438468608.0,
+            "14": 438468608.0,
+            "15": 438468608.0,
+            "16": 438468608.0,
+            "17": 438468608.0,
+            "18": 438468608.0,
+            "19": 438468608.0,
+            "20": 438468608.0,
+            "21": 438468608.0,
+            "22": 438468608.0,
+            "23": 438468608.0,
+            "24": 438468608.0,
+            "25": 438468608.0,
+            "26": 438468608.0,
+            "27": 438468608.0,
+            "28": 438468608.0,
+            "29": 438468608.0,
+            "30": 438468608.0,
+            "31": 438468608.0,
+            "32": 438468608.0,
+            "33": 438468608.0,
+            "34": 438468608.0,
+            "35": 438468608.0,
+            "36": 438468608.0,
+            "37": 438468608.0,
+            "38": 438468608.0,
+            "39": 438468608.0,
+            "40": 438468608.0,
+            "41": 438468608.0,
+            "42": 438468608.0,
+            "43": 438468608.0,
+            "44": 438468608.0,
+            "45": 438468608.0,
+            "46": 438468608.0,
+            "47": 438468608.0,
+            "48": 438468608.0,
+            "49": 438468608.0,
+            "50": 438468608.0,
+            "51": 438468608.0,
+            "52": 438468608.0,
+            "53": 438468608.0,
+            "54": 438468608.0,
+            "55": 438468608.0,
+            "56": 438468608.0,
+            "57": 438468608.0,
+            "58": 438468608.0,
+            "59": 438468608.0,
+            "60": 438468608.0,
+            "61": 438468608.0,
+            "62": 438468608.0,
+            "63": 438468608.0,
+            "64": 438468608.0,
+            "65": 438468608.0,
+            "66": 438468608.0,
+            "67": 438468608.0,
+            "68": 438468608.0,
+            "69": 438468608.0,
+            "70": 438468608.0,
+            "71": 438468608.0,
+            "72": 438468608.0,
+            "73": 438468608.0,
+            "74": 438468608.0,
+            "75": 438468608.0,
+            "76": 438468608.0,
+            "77": 438468608.0,
+            "78": 438468608.0,
+            "79": 438468608.0,
+            "80": 438468608.0,
+            "81": 438468608.0,
+            "82": 438468608.0,
+            "83": 438468608.0,
+            "84": 438468608.0,
+            "85": 438468608.0,
+            "86": 438468608.0,
+            "87": 438468608.0,
+            "88": 438468608.0,
+            "89": 438468608.0,
+            "90": 438468608.0,
+            "91": 438468608.0,
+            "92": 438468608.0,
+            "93": 438468608.0,
+            "94": 438468608.0,
+            "95": 438468608.0,
+            "96": 438468608.0,
+            "97": 438468608.0,
+            "98": 438468608.0,
+            "99": 438468608.0,
+            "100": 438468608.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2658189824.0,
+            "2": 2658189824.0,
+            "3": 2658189824.0,
+            "4": 2658189824.0,
+            "5": 2658189824.0,
+            "6": 2658189824.0,
+            "7": 2658189824.0,
+            "8": 2658189824.0,
+            "9": 2658189824.0,
+            "10": 2658189824.0,
+            "11": 2658189824.0,
+            "12": 2658189824.0,
+            "13": 2658189824.0,
+            "14": 2658189824.0,
+            "15": 2658189824.0,
+            "16": 2658189824.0,
+            "17": 2658189824.0,
+            "18": 2658189824.0,
+            "19": 2658189824.0,
+            "20": 2658189824.0,
+            "21": 2658189824.0,
+            "22": 2658189824.0,
+            "23": 2658189824.0,
+            "24": 2658189824.0,
+            "25": 2658189824.0,
+            "26": 2658189824.0,
+            "27": 2658189824.0,
+            "28": 2658189824.0,
+            "29": 2658189824.0,
+            "30": 2658189824.0,
+            "31": 2658189824.0,
+            "32": 2658189824.0,
+            "33": 2658189824.0,
+            "34": 2658189824.0,
+            "35": 2658189824.0,
+            "36": 2658189824.0,
+            "37": 2658189824.0,
+            "38": 2658189824.0,
+            "39": 2658189824.0,
+            "40": 2658189824.0,
+            "41": 2658189824.0,
+            "42": 2658189824.0,
+            "43": 2658189824.0,
+            "44": 2658189824.0,
+            "45": 2658189824.0,
+            "46": 2658189824.0,
+            "47": 2658189824.0,
+            "48": 2658189824.0,
+            "49": 2658189824.0,
+            "50": 2658189824.0,
+            "51": 2658189824.0,
+            "52": 2658189824.0,
+            "53": 2658189824.0,
+            "54": 2658189824.0,
+            "55": 2658189824.0,
+            "56": 2658189824.0,
+            "57": 2658189824.0,
+            "58": 2658189824.0,
+            "59": 2658189824.0,
+            "60": 2658189824.0,
+            "61": 2658189824.0,
+            "62": 2658189824.0,
+            "63": 2658189824.0,
+            "64": 2658189824.0,
+            "65": 2658189824.0,
+            "66": 2658189824.0,
+            "67": 2658189824.0,
+            "68": 2658189824.0,
+            "69": 2658189824.0,
+            "70": 2658189824.0,
+            "71": 2658189824.0,
+            "72": 2658189824.0,
+            "73": 2658189824.0,
+            "74": 2658189824.0,
+            "75": 2658189824.0,
+            "76": 2658189824.0,
+            "77": 2658189824.0,
+            "78": 2658189824.0,
+            "79": 2658189824.0,
+            "80": 2658189824.0,
+            "81": 2658189824.0,
+            "82": 2658189824.0,
+            "83": 2658189824.0,
+            "84": 2658189824.0,
+            "85": 2658189824.0,
+            "86": 2658189824.0,
+            "87": 2658189824.0,
+            "88": 2658189824.0,
+            "89": 2658189824.0,
+            "90": 2658189824.0,
+            "91": 2658189824.0,
+            "92": 2658189824.0,
+            "93": 2658189824.0,
+            "94": 2658189824.0,
+            "95": 2658189824.0,
+            "96": 2658189824.0,
+            "97": 2658189824.0,
+            "98": 2658189824.0,
+            "99": 2658189824.0,
+            "100": 2658189824.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 16.21334,
+            "2": 0.23608,
+            "3": 0.19735,
+            "4": 0.19252,
+            "5": 0.19648,
+            "6": 0.19203,
+            "7": 0.19219,
+            "8": 0.18973,
+            "9": 0.18684,
+            "10": 0.19159,
+            "11": 0.18643,
+            "12": 0.18986,
+            "13": 0.19025,
+            "14": 0.19056,
+            "15": 0.19293,
+            "16": 0.44796,
+            "17": 0.19013,
+            "18": 0.18935,
+            "19": 0.19012,
+            "20": 0.19194,
+            "21": 0.44342,
+            "22": 0.18909,
+            "23": 0.19253,
+            "24": 0.18728,
+            "25": 0.18638,
+            "26": 0.18656,
+            "27": 0.1932,
+            "28": 0.18998,
+            "29": 0.18957,
+            "30": 0.18392,
+            "31": 0.18385,
+            "32": 0.18468,
+            "33": 0.18516,
+            "34": 0.18864,
+            "35": 0.18375,
+            "36": 0.18378,
+            "37": 0.18966,
+            "38": 0.18733,
+            "39": 0.18976,
+            "40": 0.18909,
+            "41": 0.18487,
+            "42": 0.18422,
+            "43": 0.1846,
+            "44": 0.18581,
+            "45": 0.18726,
+            "46": 0.18439,
+            "47": 0.1845,
+            "48": 0.18384,
+            "49": 0.18422,
+            "50": 0.18685,
+            "51": 0.39339,
+            "52": 0.19487,
+            "53": 0.19224,
+            "54": 0.18723,
+            "55": 0.18809,
+            "56": 0.18463,
+            "57": 0.18414,
+            "58": 0.18472,
+            "59": 0.18467,
+            "60": 0.19286,
+            "61": 0.18645,
+            "62": 0.18785,
+            "63": 0.18591,
+            "64": 0.18644,
+            "65": 0.1905,
+            "66": 0.18834,
+            "67": 0.18595,
+            "68": 0.1873,
+            "69": 0.1863,
+            "70": 0.19033,
+            "71": 0.19567,
+            "72": 0.18818,
+            "73": 0.18498,
+            "74": 0.18476,
+            "75": 0.18427,
+            "76": 0.19433,
+            "77": 0.18426,
+            "78": 0.18436,
+            "79": 0.18486,
+            "80": 0.18553,
+            "81": 0.18804,
+            "82": 0.18885,
+            "83": 0.18682,
+            "84": 0.18782,
+            "85": 0.18674,
+            "86": 0.18747,
+            "87": 0.19054,
+            "88": 0.18731,
+            "89": 0.18701,
+            "90": 0.18815,
+            "91": 0.1867,
+            "92": 0.19324,
+            "93": 0.1868,
+            "94": 0.18625,
+            "95": 0.18677,
+            "96": 0.18717,
+            "97": 0.1888,
+            "98": 0.19044,
+            "99": 0.19131,
+            "100": 0.18423
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
new file mode 100644
index 00000000000..f917c6cc0e4
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.92655,
+            "2": 10.92585,
+            "3": 10.91514,
+            "4": 10.90903,
+            "5": 10.92718,
+            "6": 10.93557,
+            "7": 10.90644,
+            "8": 10.92122,
+            "9": 10.91072,
+            "10": 10.90789,
+            "11": 10.89278,
+            "12": 10.9243,
+            "13": 10.91485,
+            "14": 10.92142,
+            "15": 10.8829,
+            "16": 10.87307,
+            "17": 10.84066,
+            "18": 10.87298,
+            "19": 10.85633,
+            "20": 10.77594,
+            "21": 10.74895,
+            "22": 10.63081,
+            "23": 10.75621,
+            "24": 10.65644,
+            "25": 10.59266,
+            "26": 10.65438,
+            "27": 10.64909,
+            "28": 10.59497,
+            "29": 10.60943,
+            "30": 10.39176,
+            "31": 10.15724,
+            "32": 10.4911,
+            "33": 10.47963,
+            "34": 10.24068,
+            "35": 10.29701,
+            "36": 10.24669,
+            "37": 10.35242,
+            "38": 10.20484,
+            "39": 10.40506,
+            "40": 10.09662,
+            "41": 10.15193,
+            "42": 10.22066,
+            "43": 9.85508,
+            "44": 9.96165,
+            "45": 9.84471,
+            "46": 9.83836,
+            "47": 10.14003,
+            "48": 9.85764,
+            "49": 9.53744,
+            "50": 9.90947,
+            "51": 9.84892,
+            "52": 9.74166,
+            "53": 10.06337,
+            "54": 9.9473,
+            "55": 9.87771,
+            "56": 9.62738,
+            "57": 9.47161,
+            "58": 9.82894,
+            "59": 9.58274,
+            "60": 9.49123,
+            "61": 9.69974,
+            "62": 9.9799,
+            "63": 9.37281,
+            "64": 9.77461,
+            "65": 8.94257,
+            "66": 9.69883,
+            "67": 9.36406,
+            "68": 9.78786,
+            "69": 9.78336,
+            "70": 9.72276,
+            "71": 9.6081,
+            "72": 9.58428,
+            "73": 9.48979,
+            "74": 8.94855,
+            "75": 9.4189,
+            "76": 9.08727,
+            "77": 10.06346,
+            "78": 9.72838,
+            "79": 9.37156,
+            "80": 9.40056,
+            "81": 9.47827,
+            "82": 9.69154,
+            "83": 9.30739,
+            "84": 9.41237,
+            "85": 9.61189,
+            "86": 9.07589,
+            "87": 9.59464,
+            "88": 9.74734,
+            "89": 9.60676,
+            "90": 9.81027,
+            "91": 9.3436,
+            "92": 9.36495,
+            "93": 9.07727,
+            "94": 8.83093,
+            "95": 9.51724,
+            "96": 9.52445,
+            "97": 9.31032,
+            "98": 9.67873,
+            "99": 8.88838,
+            "100": 9.40135
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1664.0,
+            "2": 1830.0,
+            "3": 1679.0,
+            "4": 1739.0,
+            "5": 1978.0,
+            "6": 1893.0,
+            "7": 1836.0,
+            "8": 1709.0,
+            "9": 1941.0,
+            "10": 1440.0,
+            "11": 1916.0,
+            "12": 1781.0,
+            "13": 1978.0,
+            "14": 1867.0,
+            "15": 1997.0,
+            "16": 1704.0,
+            "17": 1822.0,
+            "18": 1610.0,
+            "19": 1700.0,
+            "20": 1580.0,
+            "21": 1805.0,
+            "22": 1706.0,
+            "23": 1968.0,
+            "24": 1619.0,
+            "25": 1717.0,
+            "26": 1852.0,
+            "27": 1944.0,
+            "28": 2087.0,
+            "29": 2009.0,
+            "30": 1915.0,
+            "31": 1560.0,
+            "32": 1963.0,
+            "33": 2161.0,
+            "34": 2003.0,
+            "35": 1941.0,
+            "36": 1977.0,
+            "37": 2353.0,
+            "38": 2193.0,
+            "39": 2425.0,
+            "40": 2125.0,
+            "41": 2239.0,
+            "42": 2203.0,
+            "43": 1988.0,
+            "44": 2154.0,
+            "45": 2037.0,
+            "46": 2222.0,
+            "47": 2644.0,
+            "48": 2428.0,
+            "49": 2272.0,
+            "50": 2482.0,
+            "51": 2746.0,
+            "52": 2634.0,
+            "53": 2927.0,
+            "54": 2689.0,
+            "55": 2476.0,
+            "56": 2694.0,
+            "57": 2382.0,
+            "58": 3021.0,
+            "59": 2806.0,
+            "60": 2510.0,
+            "61": 2886.0,
+            "62": 2639.0,
+            "63": 2314.0,
+            "64": 3075.0,
+            "65": 2677.0,
+            "66": 3260.0,
+            "67": 2866.0,
+            "68": 2797.0,
+            "69": 2920.0,
+            "70": 3298.0,
+            "71": 3074.0,
+            "72": 2433.0,
+            "73": 3082.0,
+            "74": 1986.0,
+            "75": 2706.0,
+            "76": 3045.0,
+            "77": 3450.0,
+            "78": 3299.0,
+            "79": 3366.0,
+            "80": 3348.0,
+            "81": 3827.0,
+            "82": 3410.0,
+            "83": 2855.0,
+            "84": 3427.0,
+            "85": 3226.0,
+            "86": 2724.0,
+            "87": 3790.0,
+            "88": 3083.0,
+            "89": 3503.0,
+            "90": 3119.0,
+            "91": 2684.0,
+            "92": 3159.0,
+            "93": 2689.0,
+            "94": 3478.0,
+            "95": 3464.0,
+            "96": 3584.0,
+            "97": 3223.0,
+            "98": 3723.0,
+            "99": 3220.0,
+            "100": 3335.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 438468608.0,
+            "2": 438468608.0,
+            "3": 438468608.0,
+            "4": 438468608.0,
+            "5": 438468608.0,
+            "6": 438468608.0,
+            "7": 438468608.0,
+            "8": 438468608.0,
+            "9": 438468608.0,
+            "10": 438468608.0,
+            "11": 438468608.0,
+            "12": 438468608.0,
+            "13": 438468608.0,
+            "14": 438468608.0,
+            "15": 438468608.0,
+            "16": 438468608.0,
+            "17": 438468608.0,
+            "18": 438468608.0,
+            "19": 438468608.0,
+            "20": 438468608.0,
+            "21": 438468608.0,
+            "22": 438468608.0,
+            "23": 438468608.0,
+            "24": 438468608.0,
+            "25": 438468608.0,
+            "26": 438468608.0,
+            "27": 438468608.0,
+            "28": 438468608.0,
+            "29": 438468608.0,
+            "30": 438468608.0,
+            "31": 438468608.0,
+            "32": 438468608.0,
+            "33": 438468608.0,
+            "34": 438468608.0,
+            "35": 438468608.0,
+            "36": 438468608.0,
+            "37": 438468608.0,
+            "38": 438468608.0,
+            "39": 438468608.0,
+            "40": 438468608.0,
+            "41": 438468608.0,
+            "42": 438468608.0,
+            "43": 438468608.0,
+            "44": 438468608.0,
+            "45": 438468608.0,
+            "46": 438468608.0,
+            "47": 438468608.0,
+            "48": 438468608.0,
+            "49": 438468608.0,
+            "50": 438468608.0,
+            "51": 438468608.0,
+            "52": 438468608.0,
+            "53": 438468608.0,
+            "54": 438468608.0,
+            "55": 438468608.0,
+            "56": 438468608.0,
+            "57": 438468608.0,
+            "58": 438468608.0,
+            "59": 438468608.0,
+            "60": 438468608.0,
+            "61": 438468608.0,
+            "62": 438468608.0,
+            "63": 438468608.0,
+            "64": 438468608.0,
+            "65": 438468608.0,
+            "66": 438468608.0,
+            "67": 438468608.0,
+            "68": 438468608.0,
+            "69": 438468608.0,
+            "70": 438468608.0,
+            "71": 438468608.0,
+            "72": 438468608.0,
+            "73": 438468608.0,
+            "74": 438468608.0,
+            "75": 438468608.0,
+            "76": 438468608.0,
+            "77": 438468608.0,
+            "78": 438468608.0,
+            "79": 438468608.0,
+            "80": 438468608.0,
+            "81": 438468608.0,
+            "82": 438468608.0,
+            "83": 438468608.0,
+            "84": 438468608.0,
+            "85": 438468608.0,
+            "86": 438468608.0,
+            "87": 438468608.0,
+            "88": 438468608.0,
+            "89": 438468608.0,
+            "90": 438468608.0,
+            "91": 438468608.0,
+            "92": 438468608.0,
+            "93": 438468608.0,
+            "94": 438468608.0,
+            "95": 438468608.0,
+            "96": 438468608.0,
+            "97": 438468608.0,
+            "98": 438468608.0,
+            "99": 438468608.0,
+            "100": 438468608.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2658189824.0,
+            "2": 2658189824.0,
+            "3": 2658189824.0,
+            "4": 2658189824.0,
+            "5": 2658189824.0,
+            "6": 2658189824.0,
+            "7": 2658189824.0,
+            "8": 2658189824.0,
+            "9": 2658189824.0,
+            "10": 2658189824.0,
+            "11": 2658189824.0,
+            "12": 2658189824.0,
+            "13": 2658189824.0,
+            "14": 2658189824.0,
+            "15": 2658189824.0,
+            "16": 2658189824.0,
+            "17": 2658189824.0,
+            "18": 2658189824.0,
+            "19": 2658189824.0,
+            "20": 2658189824.0,
+            "21": 2658189824.0,
+            "22": 2658189824.0,
+            "23": 2658189824.0,
+            "24": 2658189824.0,
+            "25": 2658189824.0,
+            "26": 2658189824.0,
+            "27": 2658189824.0,
+            "28": 2658189824.0,
+            "29": 2658189824.0,
+            "30": 2658189824.0,
+            "31": 2658189824.0,
+            "32": 2658189824.0,
+            "33": 2658189824.0,
+            "34": 2658189824.0,
+            "35": 2658189824.0,
+            "36": 2658189824.0,
+            "37": 2658189824.0,
+            "38": 2658189824.0,
+            "39": 2658189824.0,
+            "40": 2658189824.0,
+            "41": 2658189824.0,
+            "42": 2658189824.0,
+            "43": 2658189824.0,
+            "44": 2658189824.0,
+            "45": 2658189824.0,
+            "46": 2658189824.0,
+            "47": 2658189824.0,
+            "48": 2658189824.0,
+            "49": 2658189824.0,
+            "50": 2658189824.0,
+            "51": 2658189824.0,
+            "52": 2658189824.0,
+            "53": 2658189824.0,
+            "54": 2658189824.0,
+            "55": 2658189824.0,
+            "56": 2658189824.0,
+            "57": 2658189824.0,
+            "58": 2658189824.0,
+            "59": 2658189824.0,
+            "60": 2658189824.0,
+            "61": 2658189824.0,
+            "62": 2658189824.0,
+            "63": 2658189824.0,
+            "64": 2658189824.0,
+            "65": 2658189824.0,
+            "66": 2658189824.0,
+            "67": 2658189824.0,
+            "68": 2658189824.0,
+            "69": 2658189824.0,
+            "70": 2658189824.0,
+            "71": 2658189824.0,
+            "72": 2658189824.0,
+            "73": 2658189824.0,
+            "74": 2658189824.0,
+            "75": 2658189824.0,
+            "76": 2658189824.0,
+            "77": 2658189824.0,
+            "78": 2658189824.0,
+            "79": 2658189824.0,
+            "80": 2658189824.0,
+            "81": 2658189824.0,
+            "82": 2658189824.0,
+            "83": 2658189824.0,
+            "84": 2658189824.0,
+            "85": 2658189824.0,
+            "86": 2658189824.0,
+            "87": 2658189824.0,
+            "88": 2658189824.0,
+            "89": 2658189824.0,
+            "90": 2658189824.0,
+            "91": 2658189824.0,
+            "92": 2658189824.0,
+            "93": 2658189824.0,
+            "94": 2658189824.0,
+            "95": 2658189824.0,
+            "96": 2658189824.0,
+            "97": 2658189824.0,
+            "98": 2658189824.0,
+            "99": 2658189824.0,
+            "100": 2658189824.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 16.23895,
+            "2": 0.20726,
+            "3": 0.17912,
+            "4": 0.18256,
+            "5": 0.18172,
+            "6": 0.18173,
+            "7": 0.18211,
+            "8": 0.18112,
+            "9": 0.18625,
+            "10": 0.18006,
+            "11": 0.18704,
+            "12": 0.17857,
+            "13": 0.17784,
+            "14": 0.18165,
+            "15": 0.1799,
+            "16": 0.17752,
+            "17": 0.17782,
+            "18": 0.1783,
+            "19": 0.17747,
+            "20": 0.18053,
+            "21": 0.17942,
+            "22": 0.17652,
+            "23": 0.17547,
+            "24": 0.17698,
+            "25": 0.17802,
+            "26": 0.17909,
+            "27": 0.1761,
+            "28": 0.17568,
+            "29": 0.17486,
+            "30": 0.17517,
+            "31": 0.18013,
+            "32": 0.18802,
+            "33": 0.18062,
+            "34": 0.18393,
+            "35": 0.18008,
+            "36": 0.18215,
+            "37": 0.18359,
+            "38": 0.18075,
+            "39": 0.17951,
+            "40": 0.17932,
+            "41": 0.18163,
+            "42": 0.18241,
+            "43": 0.18319,
+            "44": 0.18167,
+            "45": 0.18855,
+            "46": 0.18203,
+            "47": 0.17989,
+            "48": 0.18432,
+            "49": 0.18049,
+            "50": 0.18019,
+            "51": 0.1889,
+            "52": 0.18448,
+            "53": 0.18169,
+            "54": 0.1839,
+            "55": 0.18232,
+            "56": 0.18118,
+            "57": 0.18003,
+            "58": 0.37898,
+            "59": 0.18312,
+            "60": 0.17998,
+            "61": 0.17977,
+            "62": 0.18171,
+            "63": 0.181,
+            "64": 0.18283,
+            "65": 0.17995,
+            "66": 0.18199,
+            "67": 0.17999,
+            "68": 0.18052,
+            "69": 0.17988,
+            "70": 0.18409,
+            "71": 0.17919,
+            "72": 0.1808,
+            "73": 0.18072,
+            "74": 0.18009,
+            "75": 0.18701,
+            "76": 0.18172,
+            "77": 0.18079,
+            "78": 0.18125,
+            "79": 0.18109,
+            "80": 0.18217,
+            "81": 0.18459,
+            "82": 0.18212,
+            "83": 0.1828,
+            "84": 0.18156,
+            "85": 0.18308,
+            "86": 0.18586,
+            "87": 0.18076,
+            "88": 0.17994,
+            "89": 0.17997,
+            "90": 0.17982,
+            "91": 0.18361,
+            "92": 0.18438,
+            "93": 0.17977,
+            "94": 0.18014,
+            "95": 0.18079,
+            "96": 0.18168,
+            "97": 0.18546,
+            "98": 0.18181,
+            "99": 0.18024,
+            "100": 0.1811
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
index 953c7c07295..925cc0a5ec5 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
@@ -1 +1,537 @@
-{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.85949, "5": 10.88346, "10": 10.83886, "15": 10.84052, "20": 10.73193, "25": 10.54813, "30": 10.37137, "35": 10.27172, "40": 10.09425, "45": 9.84079, "50": 9.90875, "55": 9.88203, "60": 9.50643, "65": 8.95166, "70": 9.74737, "75": 9.42703, "80": 9.40982, "85": 9.61371, "90": 9.81898, "95": 9.52172, "100": 9.39725}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1688.0, "5": 1909.0, "10": 1457.0, "15": 1930.0, "20": 1596.0, "25": 1557.0, "30": 1860.0, "35": 1902.0, "40": 2207.0, "45": 2095.0, "50": 2416.0, "55": 2216.0, "60": 2457.0, "65": 2472.0, "70": 3057.0, "75": 2474.0, "80": 3338.0, "85": 3324.0, "90": 3096.0, "95": 3399.0, "100": 3128.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 517505536.0, "5": 517505536.0, "10": 517505536.0, "15": 517505536.0, "20": 517505536.0, "25": 517505536.0, "30": 517505536.0, "35": 517505536.0, "40": 517505536.0, "45": 517505536.0, "50": 517505536.0, "55": 517505536.0, "60": 517505536.0, "65": 517505536.0, "70": 517505536.0, "75": 517505536.0, "80": 517505536.0, "85": 517505536.0, "90": 517505536.0, "95": 517505536.0, "100": 517505536.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1246524928.0, "5": 1428695552.0, "10": 1428695552.0, "15": 1428695552.0, "20": 1428695552.0, "25": 1428695552.0, "30": 1428695552.0, "35": 1428695552.0, "40": 1428695552.0, "45": 1428695552.0, "50": 1428695552.0, "55": 1428695552.0, "60": 1428695552.0, "65": 1428695552.0, "70": 1428695552.0, "75": 1428695552.0, "80": 1428695552.0, "85": 1428695552.0, "90": 1428695552.0, "95": 1428695552.0, "100": 1428695552.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 11.33109, "5": 0.12233, "10": 0.12087, "15": 0.12933, "20": 0.12038, "25": 0.12097, "30": 0.12085, "35": 0.12137, "40": 0.11996, "45": 0.12054, "50": 0.12218, "55": 0.12402, "60": 0.13274, "65": 0.12088, "70": 0.12039, "75": 0.12248, "80": 0.12305, "85": 0.12385, "90": 0.12202, "95": 0.1201, "100": 0.12049}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.85949,
+            "2": 10.85553,
+            "3": 10.86543,
+            "4": 10.84553,
+            "5": 10.88346,
+            "6": 10.89431,
+            "7": 10.87067,
+            "8": 10.86979,
+            "9": 10.86918,
+            "10": 10.83886,
+            "11": 10.8943,
+            "12": 10.87983,
+            "13": 10.87985,
+            "14": 10.90321,
+            "15": 10.84052,
+            "16": 10.83787,
+            "17": 10.80669,
+            "18": 10.83026,
+            "19": 10.82261,
+            "20": 10.73193,
+            "21": 10.70748,
+            "22": 10.56005,
+            "23": 10.72399,
+            "24": 10.61114,
+            "25": 10.54813,
+            "26": 10.61329,
+            "27": 10.63053,
+            "28": 10.56646,
+            "29": 10.59668,
+            "30": 10.37137,
+            "31": 10.11725,
+            "32": 10.46127,
+            "33": 10.45249,
+            "34": 10.2169,
+            "35": 10.27172,
+            "36": 10.23119,
+            "37": 10.34809,
+            "38": 10.1884,
+            "39": 10.41044,
+            "40": 10.09425,
+            "41": 10.14707,
+            "42": 10.21242,
+            "43": 9.84105,
+            "44": 9.95918,
+            "45": 9.84079,
+            "46": 9.82479,
+            "47": 10.13878,
+            "48": 9.85831,
+            "49": 9.54705,
+            "50": 9.90875,
+            "51": 9.8558,
+            "52": 9.75237,
+            "53": 10.07589,
+            "54": 9.95688,
+            "55": 9.88203,
+            "56": 9.6313,
+            "57": 9.48649,
+            "58": 9.83109,
+            "59": 9.58897,
+            "60": 9.50643,
+            "61": 9.70363,
+            "62": 9.98286,
+            "63": 9.38302,
+            "64": 9.77901,
+            "65": 8.95166,
+            "66": 9.70158,
+            "67": 9.37203,
+            "68": 9.78849,
+            "69": 9.79851,
+            "70": 9.74737,
+            "71": 9.61908,
+            "72": 9.58502,
+            "73": 9.49721,
+            "74": 8.93927,
+            "75": 9.42703,
+            "76": 9.0802,
+            "77": 10.06567,
+            "78": 9.72893,
+            "79": 9.3776,
+            "80": 9.40982,
+            "81": 9.47976,
+            "82": 9.7018,
+            "83": 9.30612,
+            "84": 9.4209,
+            "85": 9.61371,
+            "86": 9.07649,
+            "87": 9.5945,
+            "88": 9.75068,
+            "89": 9.60238,
+            "90": 9.81898,
+            "91": 9.33894,
+            "92": 9.35716,
+            "93": 9.07879,
+            "94": 8.83503,
+            "95": 9.52172,
+            "96": 9.53003,
+            "97": 9.31306,
+            "98": 9.67783,
+            "99": 8.89058,
+            "100": 9.39725
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1688.0,
+            "2": 1806.0,
+            "3": 1675.0,
+            "4": 1842.0,
+            "5": 1909.0,
+            "6": 1908.0,
+            "7": 1783.0,
+            "8": 1611.0,
+            "9": 1753.0,
+            "10": 1457.0,
+            "11": 1880.0,
+            "12": 1683.0,
+            "13": 1907.0,
+            "14": 1733.0,
+            "15": 1930.0,
+            "16": 1840.0,
+            "17": 1892.0,
+            "18": 1650.0,
+            "19": 1790.0,
+            "20": 1596.0,
+            "21": 1765.0,
+            "22": 1616.0,
+            "23": 1974.0,
+            "24": 1621.0,
+            "25": 1557.0,
+            "26": 1745.0,
+            "27": 1722.0,
+            "28": 1976.0,
+            "29": 2068.0,
+            "30": 1860.0,
+            "31": 1536.0,
+            "32": 1883.0,
+            "33": 2071.0,
+            "34": 1894.0,
+            "35": 1902.0,
+            "36": 1885.0,
+            "37": 2231.0,
+            "38": 2129.0,
+            "39": 2333.0,
+            "40": 2207.0,
+            "41": 2193.0,
+            "42": 2322.0,
+            "43": 2015.0,
+            "44": 2089.0,
+            "45": 2095.0,
+            "46": 2392.0,
+            "47": 2430.0,
+            "48": 2414.0,
+            "49": 2340.0,
+            "50": 2416.0,
+            "51": 2613.0,
+            "52": 2538.0,
+            "53": 2792.0,
+            "54": 2801.0,
+            "55": 2216.0,
+            "56": 2858.0,
+            "57": 2381.0,
+            "58": 2854.0,
+            "59": 2787.0,
+            "60": 2457.0,
+            "61": 2941.0,
+            "62": 2543.0,
+            "63": 2408.0,
+            "64": 2968.0,
+            "65": 2472.0,
+            "66": 2977.0,
+            "67": 2839.0,
+            "68": 2775.0,
+            "69": 2832.0,
+            "70": 3057.0,
+            "71": 2909.0,
+            "72": 2421.0,
+            "73": 2982.0,
+            "74": 1922.0,
+            "75": 2474.0,
+            "76": 3059.0,
+            "77": 3177.0,
+            "78": 3067.0,
+            "79": 3052.0,
+            "80": 3338.0,
+            "81": 3644.0,
+            "82": 3234.0,
+            "83": 2798.0,
+            "84": 3196.0,
+            "85": 3324.0,
+            "86": 2855.0,
+            "87": 3820.0,
+            "88": 2962.0,
+            "89": 3379.0,
+            "90": 3096.0,
+            "91": 2857.0,
+            "92": 3077.0,
+            "93": 2693.0,
+            "94": 3312.0,
+            "95": 3399.0,
+            "96": 3378.0,
+            "97": 3030.0,
+            "98": 3619.0,
+            "99": 3160.0,
+            "100": 3128.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 517505536.0,
+            "2": 517505536.0,
+            "3": 517505536.0,
+            "4": 517505536.0,
+            "5": 517505536.0,
+            "6": 517505536.0,
+            "7": 517505536.0,
+            "8": 517505536.0,
+            "9": 517505536.0,
+            "10": 517505536.0,
+            "11": 517505536.0,
+            "12": 517505536.0,
+            "13": 517505536.0,
+            "14": 517505536.0,
+            "15": 517505536.0,
+            "16": 517505536.0,
+            "17": 517505536.0,
+            "18": 517505536.0,
+            "19": 517505536.0,
+            "20": 517505536.0,
+            "21": 517505536.0,
+            "22": 517505536.0,
+            "23": 517505536.0,
+            "24": 517505536.0,
+            "25": 517505536.0,
+            "26": 517505536.0,
+            "27": 517505536.0,
+            "28": 517505536.0,
+            "29": 517505536.0,
+            "30": 517505536.0,
+            "31": 517505536.0,
+            "32": 517505536.0,
+            "33": 517505536.0,
+            "34": 517505536.0,
+            "35": 517505536.0,
+            "36": 517505536.0,
+            "37": 517505536.0,
+            "38": 517505536.0,
+            "39": 517505536.0,
+            "40": 517505536.0,
+            "41": 517505536.0,
+            "42": 517505536.0,
+            "43": 517505536.0,
+            "44": 517505536.0,
+            "45": 517505536.0,
+            "46": 517505536.0,
+            "47": 517505536.0,
+            "48": 517505536.0,
+            "49": 517505536.0,
+            "50": 517505536.0,
+            "51": 517505536.0,
+            "52": 517505536.0,
+            "53": 517505536.0,
+            "54": 517505536.0,
+            "55": 517505536.0,
+            "56": 517505536.0,
+            "57": 517505536.0,
+            "58": 517505536.0,
+            "59": 517505536.0,
+            "60": 517505536.0,
+            "61": 517505536.0,
+            "62": 517505536.0,
+            "63": 517505536.0,
+            "64": 517505536.0,
+            "65": 517505536.0,
+            "66": 517505536.0,
+            "67": 517505536.0,
+            "68": 517505536.0,
+            "69": 517505536.0,
+            "70": 517505536.0,
+            "71": 517505536.0,
+            "72": 517505536.0,
+            "73": 517505536.0,
+            "74": 517505536.0,
+            "75": 517505536.0,
+            "76": 517505536.0,
+            "77": 517505536.0,
+            "78": 517505536.0,
+            "79": 517505536.0,
+            "80": 517505536.0,
+            "81": 517505536.0,
+            "82": 517505536.0,
+            "83": 517505536.0,
+            "84": 517505536.0,
+            "85": 517505536.0,
+            "86": 517505536.0,
+            "87": 517505536.0,
+            "88": 517505536.0,
+            "89": 517505536.0,
+            "90": 517505536.0,
+            "91": 517505536.0,
+            "92": 517505536.0,
+            "93": 517505536.0,
+            "94": 517505536.0,
+            "95": 517505536.0,
+            "96": 517505536.0,
+            "97": 517505536.0,
+            "98": 517505536.0,
+            "99": 517505536.0,
+            "100": 517505536.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1246524928.0,
+            "2": 1428695552.0,
+            "3": 1428695552.0,
+            "4": 1428695552.0,
+            "5": 1428695552.0,
+            "6": 1428695552.0,
+            "7": 1428695552.0,
+            "8": 1428695552.0,
+            "9": 1428695552.0,
+            "10": 1428695552.0,
+            "11": 1428695552.0,
+            "12": 1428695552.0,
+            "13": 1428695552.0,
+            "14": 1428695552.0,
+            "15": 1428695552.0,
+            "16": 1428695552.0,
+            "17": 1428695552.0,
+            "18": 1428695552.0,
+            "19": 1428695552.0,
+            "20": 1428695552.0,
+            "21": 1428695552.0,
+            "22": 1428695552.0,
+            "23": 1428695552.0,
+            "24": 1428695552.0,
+            "25": 1428695552.0,
+            "26": 1428695552.0,
+            "27": 1428695552.0,
+            "28": 1428695552.0,
+            "29": 1428695552.0,
+            "30": 1428695552.0,
+            "31": 1428695552.0,
+            "32": 1428695552.0,
+            "33": 1428695552.0,
+            "34": 1428695552.0,
+            "35": 1428695552.0,
+            "36": 1428695552.0,
+            "37": 1428695552.0,
+            "38": 1428695552.0,
+            "39": 1428695552.0,
+            "40": 1428695552.0,
+            "41": 1428695552.0,
+            "42": 1428695552.0,
+            "43": 1428695552.0,
+            "44": 1428695552.0,
+            "45": 1428695552.0,
+            "46": 1428695552.0,
+            "47": 1428695552.0,
+            "48": 1428695552.0,
+            "49": 1428695552.0,
+            "50": 1428695552.0,
+            "51": 1428695552.0,
+            "52": 1428695552.0,
+            "53": 1428695552.0,
+            "54": 1428695552.0,
+            "55": 1428695552.0,
+            "56": 1428695552.0,
+            "57": 1428695552.0,
+            "58": 1428695552.0,
+            "59": 1428695552.0,
+            "60": 1428695552.0,
+            "61": 1428695552.0,
+            "62": 1428695552.0,
+            "63": 1428695552.0,
+            "64": 1428695552.0,
+            "65": 1428695552.0,
+            "66": 1428695552.0,
+            "67": 1428695552.0,
+            "68": 1428695552.0,
+            "69": 1428695552.0,
+            "70": 1428695552.0,
+            "71": 1428695552.0,
+            "72": 1428695552.0,
+            "73": 1428695552.0,
+            "74": 1428695552.0,
+            "75": 1428695552.0,
+            "76": 1428695552.0,
+            "77": 1428695552.0,
+            "78": 1428695552.0,
+            "79": 1428695552.0,
+            "80": 1428695552.0,
+            "81": 1428695552.0,
+            "82": 1428695552.0,
+            "83": 1428695552.0,
+            "84": 1428695552.0,
+            "85": 1428695552.0,
+            "86": 1428695552.0,
+            "87": 1428695552.0,
+            "88": 1428695552.0,
+            "89": 1428695552.0,
+            "90": 1428695552.0,
+            "91": 1428695552.0,
+            "92": 1428695552.0,
+            "93": 1428695552.0,
+            "94": 1428695552.0,
+            "95": 1428695552.0,
+            "96": 1428695552.0,
+            "97": 1428695552.0,
+            "98": 1428695552.0,
+            "99": 1428695552.0,
+            "100": 1428695552.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 11.60342,
+            "2": 0.19062,
+            "3": 0.17106,
+            "4": 0.15064,
+            "5": 0.15065,
+            "6": 0.1494,
+            "7": 0.15215,
+            "8": 0.14914,
+            "9": 0.15232,
+            "10": 0.15441,
+            "11": 0.15247,
+            "12": 0.15046,
+            "13": 0.15058,
+            "14": 0.15219,
+            "15": 0.15133,
+            "16": 0.15023,
+            "17": 0.1509,
+            "18": 0.14938,
+            "19": 0.15103,
+            "20": 0.1515,
+            "21": 0.1522,
+            "22": 0.1489,
+            "23": 0.15182,
+            "24": 0.1502,
+            "25": 0.15153,
+            "26": 0.15174,
+            "27": 0.15257,
+            "28": 0.14921,
+            "29": 0.14989,
+            "30": 0.14944,
+            "31": 0.15201,
+            "32": 0.1504,
+            "33": 0.1493,
+            "34": 0.15189,
+            "35": 0.14934,
+            "36": 0.15042,
+            "37": 0.15128,
+            "38": 0.15671,
+            "39": 0.14985,
+            "40": 0.15139,
+            "41": 0.15056,
+            "42": 0.14937,
+            "43": 0.15027,
+            "44": 0.15158,
+            "45": 0.15159,
+            "46": 0.15106,
+            "47": 0.14958,
+            "48": 0.15078,
+            "49": 0.15171,
+            "50": 0.15469,
+            "51": 0.17266,
+            "52": 0.16844,
+            "53": 0.16496,
+            "54": 0.16828,
+            "55": 0.15512,
+            "56": 0.15061,
+            "57": 0.1542,
+            "58": 0.15315,
+            "59": 0.15262,
+            "60": 0.1507,
+            "61": 0.15164,
+            "62": 0.15223,
+            "63": 0.15172,
+            "64": 0.15124,
+            "65": 0.15315,
+            "66": 0.15108,
+            "67": 0.15238,
+            "68": 0.1491,
+            "69": 0.15112,
+            "70": 0.15218,
+            "71": 0.15542,
+            "72": 0.1514,
+            "73": 0.15306,
+            "74": 0.14963,
+            "75": 0.15272,
+            "76": 0.15,
+            "77": 0.15284,
+            "78": 0.15228,
+            "79": 0.15051,
+            "80": 0.15149,
+            "81": 0.15215,
+            "82": 0.15086,
+            "83": 0.1515,
+            "84": 0.15437,
+            "85": 0.15454,
+            "86": 0.15197,
+            "87": 0.15062,
+            "88": 0.14949,
+            "89": 0.15096,
+            "90": 0.15098,
+            "91": 0.15349,
+            "92": 0.15219,
+            "93": 0.15171,
+            "94": 0.15116,
+            "95": 0.15081,
+            "96": 0.15321,
+            "97": 0.15268,
+            "98": 0.15451,
+            "99": 0.1496,
+            "100": 0.15252
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..9d88acfb6cd
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.85949,
+            "2": 10.85553,
+            "3": 10.86543,
+            "4": 10.84553,
+            "5": 10.88346,
+            "6": 10.89431,
+            "7": 10.87067,
+            "8": 10.86979,
+            "9": 10.86918,
+            "10": 10.83886,
+            "11": 10.8943,
+            "12": 10.87983,
+            "13": 10.87985,
+            "14": 10.90321,
+            "15": 10.84052,
+            "16": 10.83787,
+            "17": 10.80669,
+            "18": 10.83026,
+            "19": 10.82261,
+            "20": 10.73193,
+            "21": 10.70748,
+            "22": 10.56005,
+            "23": 10.72399,
+            "24": 10.61114,
+            "25": 10.54813,
+            "26": 10.61329,
+            "27": 10.63053,
+            "28": 10.56646,
+            "29": 10.59668,
+            "30": 10.37137,
+            "31": 10.11725,
+            "32": 10.46127,
+            "33": 10.45249,
+            "34": 10.2169,
+            "35": 10.27172,
+            "36": 10.23119,
+            "37": 10.34809,
+            "38": 10.1884,
+            "39": 10.41044,
+            "40": 10.09425,
+            "41": 10.14707,
+            "42": 10.21242,
+            "43": 9.84105,
+            "44": 9.95918,
+            "45": 9.84079,
+            "46": 9.82479,
+            "47": 10.13878,
+            "48": 9.85831,
+            "49": 9.54705,
+            "50": 9.90875,
+            "51": 9.8558,
+            "52": 9.75237,
+            "53": 10.07589,
+            "54": 9.95688,
+            "55": 9.88203,
+            "56": 9.6313,
+            "57": 9.48649,
+            "58": 9.83109,
+            "59": 9.58897,
+            "60": 9.50643,
+            "61": 9.70363,
+            "62": 9.98286,
+            "63": 9.38302,
+            "64": 9.77901,
+            "65": 8.95166,
+            "66": 9.70158,
+            "67": 9.37203,
+            "68": 9.78849,
+            "69": 9.79851,
+            "70": 9.74737,
+            "71": 9.61908,
+            "72": 9.58502,
+            "73": 9.49721,
+            "74": 8.93927,
+            "75": 9.42703,
+            "76": 9.0802,
+            "77": 10.06567,
+            "78": 9.72893,
+            "79": 9.3776,
+            "80": 9.40982,
+            "81": 9.47976,
+            "82": 9.7018,
+            "83": 9.30612,
+            "84": 9.4209,
+            "85": 9.61371,
+            "86": 9.07649,
+            "87": 9.5945,
+            "88": 9.75068,
+            "89": 9.60238,
+            "90": 9.81898,
+            "91": 9.33894,
+            "92": 9.35716,
+            "93": 9.07879,
+            "94": 8.83503,
+            "95": 9.52172,
+            "96": 9.53003,
+            "97": 9.31306,
+            "98": 9.67783,
+            "99": 8.89058,
+            "100": 9.39725
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1688.0,
+            "2": 1806.0,
+            "3": 1675.0,
+            "4": 1842.0,
+            "5": 1909.0,
+            "6": 1908.0,
+            "7": 1783.0,
+            "8": 1611.0,
+            "9": 1753.0,
+            "10": 1457.0,
+            "11": 1880.0,
+            "12": 1683.0,
+            "13": 1907.0,
+            "14": 1733.0,
+            "15": 1930.0,
+            "16": 1840.0,
+            "17": 1892.0,
+            "18": 1650.0,
+            "19": 1790.0,
+            "20": 1596.0,
+            "21": 1765.0,
+            "22": 1616.0,
+            "23": 1974.0,
+            "24": 1621.0,
+            "25": 1557.0,
+            "26": 1745.0,
+            "27": 1722.0,
+            "28": 1976.0,
+            "29": 2068.0,
+            "30": 1860.0,
+            "31": 1536.0,
+            "32": 1883.0,
+            "33": 2071.0,
+            "34": 1894.0,
+            "35": 1902.0,
+            "36": 1885.0,
+            "37": 2231.0,
+            "38": 2129.0,
+            "39": 2333.0,
+            "40": 2207.0,
+            "41": 2193.0,
+            "42": 2322.0,
+            "43": 2015.0,
+            "44": 2089.0,
+            "45": 2095.0,
+            "46": 2392.0,
+            "47": 2430.0,
+            "48": 2414.0,
+            "49": 2340.0,
+            "50": 2416.0,
+            "51": 2613.0,
+            "52": 2538.0,
+            "53": 2792.0,
+            "54": 2801.0,
+            "55": 2216.0,
+            "56": 2858.0,
+            "57": 2381.0,
+            "58": 2854.0,
+            "59": 2787.0,
+            "60": 2457.0,
+            "61": 2941.0,
+            "62": 2543.0,
+            "63": 2408.0,
+            "64": 2968.0,
+            "65": 2472.0,
+            "66": 2977.0,
+            "67": 2839.0,
+            "68": 2775.0,
+            "69": 2832.0,
+            "70": 3057.0,
+            "71": 2909.0,
+            "72": 2421.0,
+            "73": 2982.0,
+            "74": 1922.0,
+            "75": 2474.0,
+            "76": 3059.0,
+            "77": 3177.0,
+            "78": 3067.0,
+            "79": 3052.0,
+            "80": 3338.0,
+            "81": 3644.0,
+            "82": 3234.0,
+            "83": 2798.0,
+            "84": 3196.0,
+            "85": 3324.0,
+            "86": 2855.0,
+            "87": 3820.0,
+            "88": 2962.0,
+            "89": 3379.0,
+            "90": 3096.0,
+            "91": 2857.0,
+            "92": 3077.0,
+            "93": 2693.0,
+            "94": 3312.0,
+            "95": 3399.0,
+            "96": 3378.0,
+            "97": 3030.0,
+            "98": 3619.0,
+            "99": 3160.0,
+            "100": 3128.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 517505536.0,
+            "2": 517505536.0,
+            "3": 517505536.0,
+            "4": 517505536.0,
+            "5": 517505536.0,
+            "6": 517505536.0,
+            "7": 517505536.0,
+            "8": 517505536.0,
+            "9": 517505536.0,
+            "10": 517505536.0,
+            "11": 517505536.0,
+            "12": 517505536.0,
+            "13": 517505536.0,
+            "14": 517505536.0,
+            "15": 517505536.0,
+            "16": 517505536.0,
+            "17": 517505536.0,
+            "18": 517505536.0,
+            "19": 517505536.0,
+            "20": 517505536.0,
+            "21": 517505536.0,
+            "22": 517505536.0,
+            "23": 517505536.0,
+            "24": 517505536.0,
+            "25": 517505536.0,
+            "26": 517505536.0,
+            "27": 517505536.0,
+            "28": 517505536.0,
+            "29": 517505536.0,
+            "30": 517505536.0,
+            "31": 517505536.0,
+            "32": 517505536.0,
+            "33": 517505536.0,
+            "34": 517505536.0,
+            "35": 517505536.0,
+            "36": 517505536.0,
+            "37": 517505536.0,
+            "38": 517505536.0,
+            "39": 517505536.0,
+            "40": 517505536.0,
+            "41": 517505536.0,
+            "42": 517505536.0,
+            "43": 517505536.0,
+            "44": 517505536.0,
+            "45": 517505536.0,
+            "46": 517505536.0,
+            "47": 517505536.0,
+            "48": 517505536.0,
+            "49": 517505536.0,
+            "50": 517505536.0,
+            "51": 517505536.0,
+            "52": 517505536.0,
+            "53": 517505536.0,
+            "54": 517505536.0,
+            "55": 517505536.0,
+            "56": 517505536.0,
+            "57": 517505536.0,
+            "58": 517505536.0,
+            "59": 517505536.0,
+            "60": 517505536.0,
+            "61": 517505536.0,
+            "62": 517505536.0,
+            "63": 517505536.0,
+            "64": 517505536.0,
+            "65": 517505536.0,
+            "66": 517505536.0,
+            "67": 517505536.0,
+            "68": 517505536.0,
+            "69": 517505536.0,
+            "70": 517505536.0,
+            "71": 517505536.0,
+            "72": 517505536.0,
+            "73": 517505536.0,
+            "74": 517505536.0,
+            "75": 517505536.0,
+            "76": 517505536.0,
+            "77": 517505536.0,
+            "78": 517505536.0,
+            "79": 517505536.0,
+            "80": 517505536.0,
+            "81": 517505536.0,
+            "82": 517505536.0,
+            "83": 517505536.0,
+            "84": 517505536.0,
+            "85": 517505536.0,
+            "86": 517505536.0,
+            "87": 517505536.0,
+            "88": 517505536.0,
+            "89": 517505536.0,
+            "90": 517505536.0,
+            "91": 517505536.0,
+            "92": 517505536.0,
+            "93": 517505536.0,
+            "94": 517505536.0,
+            "95": 517505536.0,
+            "96": 517505536.0,
+            "97": 517505536.0,
+            "98": 517505536.0,
+            "99": 517505536.0,
+            "100": 517505536.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1246524928.0,
+            "2": 1428695552.0,
+            "3": 1428695552.0,
+            "4": 1428695552.0,
+            "5": 1428695552.0,
+            "6": 1428695552.0,
+            "7": 1428695552.0,
+            "8": 1428695552.0,
+            "9": 1428695552.0,
+            "10": 1428695552.0,
+            "11": 1428695552.0,
+            "12": 1428695552.0,
+            "13": 1428695552.0,
+            "14": 1428695552.0,
+            "15": 1428695552.0,
+            "16": 1428695552.0,
+            "17": 1428695552.0,
+            "18": 1428695552.0,
+            "19": 1428695552.0,
+            "20": 1428695552.0,
+            "21": 1428695552.0,
+            "22": 1428695552.0,
+            "23": 1428695552.0,
+            "24": 1428695552.0,
+            "25": 1428695552.0,
+            "26": 1428695552.0,
+            "27": 1428695552.0,
+            "28": 1428695552.0,
+            "29": 1428695552.0,
+            "30": 1428695552.0,
+            "31": 1428695552.0,
+            "32": 1428695552.0,
+            "33": 1428695552.0,
+            "34": 1428695552.0,
+            "35": 1428695552.0,
+            "36": 1428695552.0,
+            "37": 1428695552.0,
+            "38": 1428695552.0,
+            "39": 1428695552.0,
+            "40": 1428695552.0,
+            "41": 1428695552.0,
+            "42": 1428695552.0,
+            "43": 1428695552.0,
+            "44": 1428695552.0,
+            "45": 1428695552.0,
+            "46": 1428695552.0,
+            "47": 1428695552.0,
+            "48": 1428695552.0,
+            "49": 1428695552.0,
+            "50": 1428695552.0,
+            "51": 1428695552.0,
+            "52": 1428695552.0,
+            "53": 1428695552.0,
+            "54": 1428695552.0,
+            "55": 1428695552.0,
+            "56": 1428695552.0,
+            "57": 1428695552.0,
+            "58": 1428695552.0,
+            "59": 1428695552.0,
+            "60": 1428695552.0,
+            "61": 1428695552.0,
+            "62": 1428695552.0,
+            "63": 1428695552.0,
+            "64": 1428695552.0,
+            "65": 1428695552.0,
+            "66": 1428695552.0,
+            "67": 1428695552.0,
+            "68": 1428695552.0,
+            "69": 1428695552.0,
+            "70": 1428695552.0,
+            "71": 1428695552.0,
+            "72": 1428695552.0,
+            "73": 1428695552.0,
+            "74": 1428695552.0,
+            "75": 1428695552.0,
+            "76": 1428695552.0,
+            "77": 1428695552.0,
+            "78": 1428695552.0,
+            "79": 1428695552.0,
+            "80": 1428695552.0,
+            "81": 1428695552.0,
+            "82": 1428695552.0,
+            "83": 1428695552.0,
+            "84": 1428695552.0,
+            "85": 1428695552.0,
+            "86": 1428695552.0,
+            "87": 1428695552.0,
+            "88": 1428695552.0,
+            "89": 1428695552.0,
+            "90": 1428695552.0,
+            "91": 1428695552.0,
+            "92": 1428695552.0,
+            "93": 1428695552.0,
+            "94": 1428695552.0,
+            "95": 1428695552.0,
+            "96": 1428695552.0,
+            "97": 1428695552.0,
+            "98": 1428695552.0,
+            "99": 1428695552.0,
+            "100": 1428695552.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 11.53219,
+            "2": 0.1684,
+            "3": 0.13213,
+            "4": 0.13603,
+            "5": 0.14526,
+            "6": 0.13427,
+            "7": 0.136,
+            "8": 0.13232,
+            "9": 0.13802,
+            "10": 0.13323,
+            "11": 0.13284,
+            "12": 0.1324,
+            "13": 0.13226,
+            "14": 0.13345,
+            "15": 0.13404,
+            "16": 0.13246,
+            "17": 0.13846,
+            "18": 0.14976,
+            "19": 0.15115,
+            "20": 0.1432,
+            "21": 0.14309,
+            "22": 0.14543,
+            "23": 0.1451,
+            "24": 0.14454,
+            "25": 0.14293,
+            "26": 0.14271,
+            "27": 0.14031,
+            "28": 0.13412,
+            "29": 0.13599,
+            "30": 0.13491,
+            "31": 0.13451,
+            "32": 0.1457,
+            "33": 0.13899,
+            "34": 0.14249,
+            "35": 0.13753,
+            "36": 0.13178,
+            "37": 0.13407,
+            "38": 0.13463,
+            "39": 0.13305,
+            "40": 0.13317,
+            "41": 0.13403,
+            "42": 0.1337,
+            "43": 0.13374,
+            "44": 0.13271,
+            "45": 0.13351,
+            "46": 0.1329,
+            "47": 0.13703,
+            "48": 0.1336,
+            "49": 0.13392,
+            "50": 0.13491,
+            "51": 0.15864,
+            "52": 0.14644,
+            "53": 0.13353,
+            "54": 0.13586,
+            "55": 0.1338,
+            "56": 0.13348,
+            "57": 0.13862,
+            "58": 0.13538,
+            "59": 0.13584,
+            "60": 0.13637,
+            "61": 0.1348,
+            "62": 0.13739,
+            "63": 0.13414,
+            "64": 0.13588,
+            "65": 0.13342,
+            "66": 0.13248,
+            "67": 0.13306,
+            "68": 0.13382,
+            "69": 0.13258,
+            "70": 0.1323,
+            "71": 0.13391,
+            "72": 0.13175,
+            "73": 0.13255,
+            "74": 0.13144,
+            "75": 0.13133,
+            "76": 0.13154,
+            "77": 0.13197,
+            "78": 0.13181,
+            "79": 0.13551,
+            "80": 0.13273,
+            "81": 0.13213,
+            "82": 0.13227,
+            "83": 0.13169,
+            "84": 0.13255,
+            "85": 0.13081,
+            "86": 0.13276,
+            "87": 0.13515,
+            "88": 0.13346,
+            "89": 0.13174,
+            "90": 0.13117,
+            "91": 0.13268,
+            "92": 0.131,
+            "93": 0.13188,
+            "94": 0.13089,
+            "95": 0.13284,
+            "96": 0.13247,
+            "97": 0.13153,
+            "98": 0.13147,
+            "99": 0.13253,
+            "100": 0.13209
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..e895f06a28a
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.85949,
+            "2": 10.85553,
+            "3": 10.86543,
+            "4": 10.84553,
+            "5": 10.88346,
+            "6": 10.89431,
+            "7": 10.87067,
+            "8": 10.86979,
+            "9": 10.86918,
+            "10": 10.83886,
+            "11": 10.8943,
+            "12": 10.87983,
+            "13": 10.87985,
+            "14": 10.90321,
+            "15": 10.84052,
+            "16": 10.83787,
+            "17": 10.80669,
+            "18": 10.83026,
+            "19": 10.82261,
+            "20": 10.73193,
+            "21": 10.70748,
+            "22": 10.56005,
+            "23": 10.72399,
+            "24": 10.61114,
+            "25": 10.54813,
+            "26": 10.61329,
+            "27": 10.63053,
+            "28": 10.56646,
+            "29": 10.59668,
+            "30": 10.37137,
+            "31": 10.11725,
+            "32": 10.46127,
+            "33": 10.45249,
+            "34": 10.2169,
+            "35": 10.27172,
+            "36": 10.23119,
+            "37": 10.34809,
+            "38": 10.1884,
+            "39": 10.41044,
+            "40": 10.09425,
+            "41": 10.14707,
+            "42": 10.21242,
+            "43": 9.84105,
+            "44": 9.95918,
+            "45": 9.84079,
+            "46": 9.82479,
+            "47": 10.13878,
+            "48": 9.85831,
+            "49": 9.54705,
+            "50": 9.90875,
+            "51": 9.8558,
+            "52": 9.75237,
+            "53": 10.07589,
+            "54": 9.95688,
+            "55": 9.88203,
+            "56": 9.6313,
+            "57": 9.48649,
+            "58": 9.83109,
+            "59": 9.58897,
+            "60": 9.50643,
+            "61": 9.70363,
+            "62": 9.98286,
+            "63": 9.38302,
+            "64": 9.77901,
+            "65": 8.95166,
+            "66": 9.70158,
+            "67": 9.37203,
+            "68": 9.78849,
+            "69": 9.79851,
+            "70": 9.74737,
+            "71": 9.61908,
+            "72": 9.58502,
+            "73": 9.49721,
+            "74": 8.93927,
+            "75": 9.42703,
+            "76": 9.0802,
+            "77": 10.06567,
+            "78": 9.72893,
+            "79": 9.3776,
+            "80": 9.40982,
+            "81": 9.47976,
+            "82": 9.7018,
+            "83": 9.30612,
+            "84": 9.4209,
+            "85": 9.61371,
+            "86": 9.07649,
+            "87": 9.5945,
+            "88": 9.75068,
+            "89": 9.60238,
+            "90": 9.81898,
+            "91": 9.33894,
+            "92": 9.35716,
+            "93": 9.07879,
+            "94": 8.83503,
+            "95": 9.52172,
+            "96": 9.53003,
+            "97": 9.31306,
+            "98": 9.67783,
+            "99": 8.89058,
+            "100": 9.39725
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1688.0,
+            "2": 1806.0,
+            "3": 1675.0,
+            "4": 1842.0,
+            "5": 1909.0,
+            "6": 1908.0,
+            "7": 1783.0,
+            "8": 1611.0,
+            "9": 1753.0,
+            "10": 1457.0,
+            "11": 1880.0,
+            "12": 1683.0,
+            "13": 1907.0,
+            "14": 1733.0,
+            "15": 1930.0,
+            "16": 1840.0,
+            "17": 1892.0,
+            "18": 1650.0,
+            "19": 1790.0,
+            "20": 1596.0,
+            "21": 1765.0,
+            "22": 1616.0,
+            "23": 1974.0,
+            "24": 1621.0,
+            "25": 1557.0,
+            "26": 1745.0,
+            "27": 1722.0,
+            "28": 1976.0,
+            "29": 2068.0,
+            "30": 1860.0,
+            "31": 1536.0,
+            "32": 1883.0,
+            "33": 2071.0,
+            "34": 1894.0,
+            "35": 1902.0,
+            "36": 1885.0,
+            "37": 2231.0,
+            "38": 2129.0,
+            "39": 2333.0,
+            "40": 2207.0,
+            "41": 2193.0,
+            "42": 2322.0,
+            "43": 2015.0,
+            "44": 2089.0,
+            "45": 2095.0,
+            "46": 2392.0,
+            "47": 2430.0,
+            "48": 2414.0,
+            "49": 2340.0,
+            "50": 2416.0,
+            "51": 2613.0,
+            "52": 2538.0,
+            "53": 2792.0,
+            "54": 2801.0,
+            "55": 2216.0,
+            "56": 2858.0,
+            "57": 2381.0,
+            "58": 2854.0,
+            "59": 2787.0,
+            "60": 2457.0,
+            "61": 2941.0,
+            "62": 2543.0,
+            "63": 2408.0,
+            "64": 2968.0,
+            "65": 2472.0,
+            "66": 2977.0,
+            "67": 2839.0,
+            "68": 2775.0,
+            "69": 2832.0,
+            "70": 3057.0,
+            "71": 2909.0,
+            "72": 2421.0,
+            "73": 2982.0,
+            "74": 1922.0,
+            "75": 2474.0,
+            "76": 3059.0,
+            "77": 3177.0,
+            "78": 3067.0,
+            "79": 3052.0,
+            "80": 3338.0,
+            "81": 3644.0,
+            "82": 3234.0,
+            "83": 2798.0,
+            "84": 3196.0,
+            "85": 3324.0,
+            "86": 2855.0,
+            "87": 3820.0,
+            "88": 2962.0,
+            "89": 3379.0,
+            "90": 3096.0,
+            "91": 2857.0,
+            "92": 3077.0,
+            "93": 2693.0,
+            "94": 3312.0,
+            "95": 3399.0,
+            "96": 3378.0,
+            "97": 3030.0,
+            "98": 3619.0,
+            "99": 3160.0,
+            "100": 3128.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 517505536.0,
+            "2": 517505536.0,
+            "3": 517505536.0,
+            "4": 517505536.0,
+            "5": 517505536.0,
+            "6": 517505536.0,
+            "7": 517505536.0,
+            "8": 517505536.0,
+            "9": 517505536.0,
+            "10": 517505536.0,
+            "11": 517505536.0,
+            "12": 517505536.0,
+            "13": 517505536.0,
+            "14": 517505536.0,
+            "15": 517505536.0,
+            "16": 517505536.0,
+            "17": 517505536.0,
+            "18": 517505536.0,
+            "19": 517505536.0,
+            "20": 517505536.0,
+            "21": 517505536.0,
+            "22": 517505536.0,
+            "23": 517505536.0,
+            "24": 517505536.0,
+            "25": 517505536.0,
+            "26": 517505536.0,
+            "27": 517505536.0,
+            "28": 517505536.0,
+            "29": 517505536.0,
+            "30": 517505536.0,
+            "31": 517505536.0,
+            "32": 517505536.0,
+            "33": 517505536.0,
+            "34": 517505536.0,
+            "35": 517505536.0,
+            "36": 517505536.0,
+            "37": 517505536.0,
+            "38": 517505536.0,
+            "39": 517505536.0,
+            "40": 517505536.0,
+            "41": 517505536.0,
+            "42": 517505536.0,
+            "43": 517505536.0,
+            "44": 517505536.0,
+            "45": 517505536.0,
+            "46": 517505536.0,
+            "47": 517505536.0,
+            "48": 517505536.0,
+            "49": 517505536.0,
+            "50": 517505536.0,
+            "51": 517505536.0,
+            "52": 517505536.0,
+            "53": 517505536.0,
+            "54": 517505536.0,
+            "55": 517505536.0,
+            "56": 517505536.0,
+            "57": 517505536.0,
+            "58": 517505536.0,
+            "59": 517505536.0,
+            "60": 517505536.0,
+            "61": 517505536.0,
+            "62": 517505536.0,
+            "63": 517505536.0,
+            "64": 517505536.0,
+            "65": 517505536.0,
+            "66": 517505536.0,
+            "67": 517505536.0,
+            "68": 517505536.0,
+            "69": 517505536.0,
+            "70": 517505536.0,
+            "71": 517505536.0,
+            "72": 517505536.0,
+            "73": 517505536.0,
+            "74": 517505536.0,
+            "75": 517505536.0,
+            "76": 517505536.0,
+            "77": 517505536.0,
+            "78": 517505536.0,
+            "79": 517505536.0,
+            "80": 517505536.0,
+            "81": 517505536.0,
+            "82": 517505536.0,
+            "83": 517505536.0,
+            "84": 517505536.0,
+            "85": 517505536.0,
+            "86": 517505536.0,
+            "87": 517505536.0,
+            "88": 517505536.0,
+            "89": 517505536.0,
+            "90": 517505536.0,
+            "91": 517505536.0,
+            "92": 517505536.0,
+            "93": 517505536.0,
+            "94": 517505536.0,
+            "95": 517505536.0,
+            "96": 517505536.0,
+            "97": 517505536.0,
+            "98": 517505536.0,
+            "99": 517505536.0,
+            "100": 517505536.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1246524928.0,
+            "2": 1428695552.0,
+            "3": 1428695552.0,
+            "4": 1428695552.0,
+            "5": 1428695552.0,
+            "6": 1428695552.0,
+            "7": 1428695552.0,
+            "8": 1428695552.0,
+            "9": 1428695552.0,
+            "10": 1428695552.0,
+            "11": 1428695552.0,
+            "12": 1428695552.0,
+            "13": 1428695552.0,
+            "14": 1428695552.0,
+            "15": 1428695552.0,
+            "16": 1428695552.0,
+            "17": 1428695552.0,
+            "18": 1428695552.0,
+            "19": 1428695552.0,
+            "20": 1428695552.0,
+            "21": 1428695552.0,
+            "22": 1428695552.0,
+            "23": 1428695552.0,
+            "24": 1428695552.0,
+            "25": 1428695552.0,
+            "26": 1428695552.0,
+            "27": 1428695552.0,
+            "28": 1428695552.0,
+            "29": 1428695552.0,
+            "30": 1428695552.0,
+            "31": 1428695552.0,
+            "32": 1428695552.0,
+            "33": 1428695552.0,
+            "34": 1428695552.0,
+            "35": 1428695552.0,
+            "36": 1428695552.0,
+            "37": 1428695552.0,
+            "38": 1428695552.0,
+            "39": 1428695552.0,
+            "40": 1428695552.0,
+            "41": 1428695552.0,
+            "42": 1428695552.0,
+            "43": 1428695552.0,
+            "44": 1428695552.0,
+            "45": 1428695552.0,
+            "46": 1428695552.0,
+            "47": 1428695552.0,
+            "48": 1428695552.0,
+            "49": 1428695552.0,
+            "50": 1428695552.0,
+            "51": 1428695552.0,
+            "52": 1428695552.0,
+            "53": 1428695552.0,
+            "54": 1428695552.0,
+            "55": 1428695552.0,
+            "56": 1428695552.0,
+            "57": 1428695552.0,
+            "58": 1428695552.0,
+            "59": 1428695552.0,
+            "60": 1428695552.0,
+            "61": 1428695552.0,
+            "62": 1428695552.0,
+            "63": 1428695552.0,
+            "64": 1428695552.0,
+            "65": 1428695552.0,
+            "66": 1428695552.0,
+            "67": 1428695552.0,
+            "68": 1428695552.0,
+            "69": 1428695552.0,
+            "70": 1428695552.0,
+            "71": 1428695552.0,
+            "72": 1428695552.0,
+            "73": 1428695552.0,
+            "74": 1428695552.0,
+            "75": 1428695552.0,
+            "76": 1428695552.0,
+            "77": 1428695552.0,
+            "78": 1428695552.0,
+            "79": 1428695552.0,
+            "80": 1428695552.0,
+            "81": 1428695552.0,
+            "82": 1428695552.0,
+            "83": 1428695552.0,
+            "84": 1428695552.0,
+            "85": 1428695552.0,
+            "86": 1428695552.0,
+            "87": 1428695552.0,
+            "88": 1428695552.0,
+            "89": 1428695552.0,
+            "90": 1428695552.0,
+            "91": 1428695552.0,
+            "92": 1428695552.0,
+            "93": 1428695552.0,
+            "94": 1428695552.0,
+            "95": 1428695552.0,
+            "96": 1428695552.0,
+            "97": 1428695552.0,
+            "98": 1428695552.0,
+            "99": 1428695552.0,
+            "100": 1428695552.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 11.77129,
+            "2": 0.18805,
+            "3": 0.15486,
+            "4": 0.15531,
+            "5": 0.15342,
+            "6": 0.15402,
+            "7": 0.15787,
+            "8": 0.15837,
+            "9": 0.15422,
+            "10": 0.1531,
+            "11": 0.1531,
+            "12": 0.1521,
+            "13": 0.15206,
+            "14": 0.15281,
+            "15": 0.15025,
+            "16": 0.15321,
+            "17": 0.15383,
+            "18": 0.15265,
+            "19": 0.15535,
+            "20": 0.15414,
+            "21": 0.15275,
+            "22": 0.152,
+            "23": 0.15456,
+            "24": 0.15209,
+            "25": 0.15358,
+            "26": 0.15228,
+            "27": 0.15217,
+            "28": 0.15204,
+            "29": 0.1526,
+            "30": 0.15259,
+            "31": 0.15237,
+            "32": 0.15885,
+            "33": 0.1577,
+            "34": 0.16029,
+            "35": 0.15618,
+            "36": 0.16006,
+            "37": 0.15686,
+            "38": 0.15897,
+            "39": 0.15985,
+            "40": 0.15818,
+            "41": 0.15734,
+            "42": 0.15623,
+            "43": 0.15982,
+            "44": 0.15844,
+            "45": 0.15965,
+            "46": 0.15995,
+            "47": 0.1576,
+            "48": 0.15787,
+            "49": 0.15857,
+            "50": 0.16598,
+            "51": 0.15831,
+            "52": 0.15281,
+            "53": 0.15278,
+            "54": 0.15155,
+            "55": 0.1544,
+            "56": 0.15102,
+            "57": 0.1505,
+            "58": 0.15177,
+            "59": 0.15275,
+            "60": 0.15179,
+            "61": 0.15138,
+            "62": 0.153,
+            "63": 0.14962,
+            "64": 0.15104,
+            "65": 0.15104,
+            "66": 0.1541,
+            "67": 0.15089,
+            "68": 0.15178,
+            "69": 0.15241,
+            "70": 0.1524,
+            "71": 0.14991,
+            "72": 0.15107,
+            "73": 0.15205,
+            "74": 0.15105,
+            "75": 0.14944,
+            "76": 0.15086,
+            "77": 0.15066,
+            "78": 0.15037,
+            "79": 0.1517,
+            "80": 0.1535,
+            "81": 0.15067,
+            "82": 0.15202,
+            "83": 0.1513,
+            "84": 0.15157,
+            "85": 0.15077,
+            "86": 0.15249,
+            "87": 0.15259,
+            "88": 0.15065,
+            "89": 0.15236,
+            "90": 0.15088,
+            "91": 0.15271,
+            "92": 0.15124,
+            "93": 0.15371,
+            "94": 0.14949,
+            "95": 0.15169,
+            "96": 0.15061,
+            "97": 0.15123,
+            "98": 0.15143,
+            "99": 0.15292,
+            "100": 0.15348
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
new file mode 100644
index 00000000000..798f3341573
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.92705,
+            "2": 10.92645,
+            "3": 10.91604,
+            "4": 10.90911,
+            "5": 10.92795,
+            "6": 10.93626,
+            "7": 10.90626,
+            "8": 10.92128,
+            "9": 10.90998,
+            "10": 10.90786,
+            "11": 10.89335,
+            "12": 10.92456,
+            "13": 10.9146,
+            "14": 10.9213,
+            "15": 10.88314,
+            "16": 10.87325,
+            "17": 10.84129,
+            "18": 10.87276,
+            "19": 10.8563,
+            "20": 10.77629,
+            "21": 10.74869,
+            "22": 10.63031,
+            "23": 10.75678,
+            "24": 10.65646,
+            "25": 10.59141,
+            "26": 10.65375,
+            "27": 10.6485,
+            "28": 10.59548,
+            "29": 10.6088,
+            "30": 10.39192,
+            "31": 10.15753,
+            "32": 10.49098,
+            "33": 10.4793,
+            "34": 10.24058,
+            "35": 10.29686,
+            "36": 10.24644,
+            "37": 10.35232,
+            "38": 10.20489,
+            "39": 10.4052,
+            "40": 10.0964,
+            "41": 10.15175,
+            "42": 10.22026,
+            "43": 9.85499,
+            "44": 9.96143,
+            "45": 9.84464,
+            "46": 9.83801,
+            "47": 10.13988,
+            "48": 9.85718,
+            "49": 9.53698,
+            "50": 9.90918,
+            "51": 9.84886,
+            "52": 9.74154,
+            "53": 10.06347,
+            "54": 9.94683,
+            "55": 9.87762,
+            "56": 9.6274,
+            "57": 9.47112,
+            "58": 9.82925,
+            "59": 9.58253,
+            "60": 9.49121,
+            "61": 9.69956,
+            "62": 9.97968,
+            "63": 9.37277,
+            "64": 9.77468,
+            "65": 8.94236,
+            "66": 9.6991,
+            "67": 9.36382,
+            "68": 9.78787,
+            "69": 9.78332,
+            "70": 9.72266,
+            "71": 9.60801,
+            "72": 9.58459,
+            "73": 9.48963,
+            "74": 8.94871,
+            "75": 9.41912,
+            "76": 9.08725,
+            "77": 10.06354,
+            "78": 9.72835,
+            "79": 9.37162,
+            "80": 9.40077,
+            "81": 9.47843,
+            "82": 9.69177,
+            "83": 9.3076,
+            "84": 9.41232,
+            "85": 9.61207,
+            "86": 9.07599,
+            "87": 9.59468,
+            "88": 9.74738,
+            "89": 9.60686,
+            "90": 9.81015,
+            "91": 9.34359,
+            "92": 9.36482,
+            "93": 9.07761,
+            "94": 8.83108,
+            "95": 9.51716,
+            "96": 9.52447,
+            "97": 9.31027,
+            "98": 9.67892,
+            "99": 8.88832,
+            "100": 9.4015
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1627.0,
+            "2": 1801.0,
+            "3": 1730.0,
+            "4": 1762.0,
+            "5": 2010.0,
+            "6": 1889.0,
+            "7": 1888.0,
+            "8": 1729.0,
+            "9": 1852.0,
+            "10": 1368.0,
+            "11": 1973.0,
+            "12": 1722.0,
+            "13": 1966.0,
+            "14": 1874.0,
+            "15": 1897.0,
+            "16": 1785.0,
+            "17": 1942.0,
+            "18": 1718.0,
+            "19": 1716.0,
+            "20": 1626.0,
+            "21": 1797.0,
+            "22": 1673.0,
+            "23": 1937.0,
+            "24": 1561.0,
+            "25": 1743.0,
+            "26": 1917.0,
+            "27": 1886.0,
+            "28": 1968.0,
+            "29": 2029.0,
+            "30": 1930.0,
+            "31": 1635.0,
+            "32": 1974.0,
+            "33": 2159.0,
+            "34": 2035.0,
+            "35": 1954.0,
+            "36": 1948.0,
+            "37": 2317.0,
+            "38": 2312.0,
+            "39": 2458.0,
+            "40": 2199.0,
+            "41": 2352.0,
+            "42": 2288.0,
+            "43": 2005.0,
+            "44": 2191.0,
+            "45": 2068.0,
+            "46": 2272.0,
+            "47": 2530.0,
+            "48": 2458.0,
+            "49": 2252.0,
+            "50": 2460.0,
+            "51": 2777.0,
+            "52": 2659.0,
+            "53": 2959.0,
+            "54": 2700.0,
+            "55": 2427.0,
+            "56": 2797.0,
+            "57": 2430.0,
+            "58": 3077.0,
+            "59": 2781.0,
+            "60": 2380.0,
+            "61": 2816.0,
+            "62": 2812.0,
+            "63": 2452.0,
+            "64": 2958.0,
+            "65": 2657.0,
+            "66": 3208.0,
+            "67": 2786.0,
+            "68": 2842.0,
+            "69": 2927.0,
+            "70": 3265.0,
+            "71": 3098.0,
+            "72": 2445.0,
+            "73": 3120.0,
+            "74": 1900.0,
+            "75": 2675.0,
+            "76": 3065.0,
+            "77": 3452.0,
+            "78": 3263.0,
+            "79": 3398.0,
+            "80": 3434.0,
+            "81": 3695.0,
+            "82": 3308.0,
+            "83": 2935.0,
+            "84": 3423.0,
+            "85": 3302.0,
+            "86": 2785.0,
+            "87": 3788.0,
+            "88": 3030.0,
+            "89": 3532.0,
+            "90": 3230.0,
+            "91": 2681.0,
+            "92": 3175.0,
+            "93": 2718.0,
+            "94": 3392.0,
+            "95": 3340.0,
+            "96": 3504.0,
+            "97": 3227.0,
+            "98": 3757.0,
+            "99": 3245.0,
+            "100": 3291.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 489193472.0,
+            "2": 489193472.0,
+            "3": 489193472.0,
+            "4": 489193472.0,
+            "5": 489193472.0,
+            "6": 489193472.0,
+            "7": 489193472.0,
+            "8": 489193472.0,
+            "9": 489193472.0,
+            "10": 489193472.0,
+            "11": 489193472.0,
+            "12": 489193472.0,
+            "13": 489193472.0,
+            "14": 489193472.0,
+            "15": 489193472.0,
+            "16": 489193472.0,
+            "17": 489193472.0,
+            "18": 489193472.0,
+            "19": 489193472.0,
+            "20": 489193472.0,
+            "21": 489193472.0,
+            "22": 489193472.0,
+            "23": 489193472.0,
+            "24": 489193472.0,
+            "25": 489193472.0,
+            "26": 489193472.0,
+            "27": 489193472.0,
+            "28": 489193472.0,
+            "29": 489193472.0,
+            "30": 489193472.0,
+            "31": 489193472.0,
+            "32": 489193472.0,
+            "33": 489193472.0,
+            "34": 489193472.0,
+            "35": 489193472.0,
+            "36": 489193472.0,
+            "37": 489193472.0,
+            "38": 489193472.0,
+            "39": 489193472.0,
+            "40": 489193472.0,
+            "41": 489193472.0,
+            "42": 489193472.0,
+            "43": 489193472.0,
+            "44": 489193472.0,
+            "45": 489193472.0,
+            "46": 489193472.0,
+            "47": 489193472.0,
+            "48": 489193472.0,
+            "49": 489193472.0,
+            "50": 489193472.0,
+            "51": 489193472.0,
+            "52": 489193472.0,
+            "53": 489193472.0,
+            "54": 489193472.0,
+            "55": 489193472.0,
+            "56": 489193472.0,
+            "57": 489193472.0,
+            "58": 489193472.0,
+            "59": 489193472.0,
+            "60": 489193472.0,
+            "61": 489193472.0,
+            "62": 489193472.0,
+            "63": 489193472.0,
+            "64": 489193472.0,
+            "65": 489193472.0,
+            "66": 489193472.0,
+            "67": 489193472.0,
+            "68": 489193472.0,
+            "69": 489193472.0,
+            "70": 489193472.0,
+            "71": 489193472.0,
+            "72": 489193472.0,
+            "73": 489193472.0,
+            "74": 489193472.0,
+            "75": 489193472.0,
+            "76": 489193472.0,
+            "77": 489193472.0,
+            "78": 489193472.0,
+            "79": 489193472.0,
+            "80": 489193472.0,
+            "81": 489193472.0,
+            "82": 489193472.0,
+            "83": 489193472.0,
+            "84": 489193472.0,
+            "85": 489193472.0,
+            "86": 489193472.0,
+            "87": 489193472.0,
+            "88": 489193472.0,
+            "89": 489193472.0,
+            "90": 489193472.0,
+            "91": 489193472.0,
+            "92": 489193472.0,
+            "93": 489193472.0,
+            "94": 489193472.0,
+            "95": 489193472.0,
+            "96": 489193472.0,
+            "97": 489193472.0,
+            "98": 489193472.0,
+            "99": 489193472.0,
+            "100": 489193472.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1720084480.0,
+            "2": 1902255104.0,
+            "3": 1902255104.0,
+            "4": 1902255104.0,
+            "5": 1902255104.0,
+            "6": 1902255104.0,
+            "7": 1902255104.0,
+            "8": 1902255104.0,
+            "9": 1902255104.0,
+            "10": 1902255104.0,
+            "11": 1902255104.0,
+            "12": 1902255104.0,
+            "13": 1902255104.0,
+            "14": 1902255104.0,
+            "15": 1902255104.0,
+            "16": 1902255104.0,
+            "17": 1902255104.0,
+            "18": 1902255104.0,
+            "19": 1902255104.0,
+            "20": 1902255104.0,
+            "21": 1902255104.0,
+            "22": 1902255104.0,
+            "23": 1902255104.0,
+            "24": 1902255104.0,
+            "25": 1902255104.0,
+            "26": 1902255104.0,
+            "27": 1902255104.0,
+            "28": 1902255104.0,
+            "29": 1902255104.0,
+            "30": 1902255104.0,
+            "31": 1902255104.0,
+            "32": 1902255104.0,
+            "33": 1902255104.0,
+            "34": 1902255104.0,
+            "35": 1902255104.0,
+            "36": 1902255104.0,
+            "37": 1902255104.0,
+            "38": 1902255104.0,
+            "39": 1902255104.0,
+            "40": 1902255104.0,
+            "41": 1902255104.0,
+            "42": 1902255104.0,
+            "43": 1902255104.0,
+            "44": 1902255104.0,
+            "45": 1902255104.0,
+            "46": 1902255104.0,
+            "47": 1902255104.0,
+            "48": 1902255104.0,
+            "49": 1902255104.0,
+            "50": 1902255104.0,
+            "51": 1902255104.0,
+            "52": 1902255104.0,
+            "53": 1902255104.0,
+            "54": 1902255104.0,
+            "55": 1902255104.0,
+            "56": 1902255104.0,
+            "57": 1902255104.0,
+            "58": 1902255104.0,
+            "59": 1902255104.0,
+            "60": 1902255104.0,
+            "61": 1902255104.0,
+            "62": 1902255104.0,
+            "63": 1902255104.0,
+            "64": 1902255104.0,
+            "65": 1902255104.0,
+            "66": 1902255104.0,
+            "67": 1902255104.0,
+            "68": 1902910464.0,
+            "69": 1902910464.0,
+            "70": 1902910464.0,
+            "71": 1902910464.0,
+            "72": 1902910464.0,
+            "73": 1902910464.0,
+            "74": 1902910464.0,
+            "75": 1902910464.0,
+            "76": 1902910464.0,
+            "77": 1902910464.0,
+            "78": 1902910464.0,
+            "79": 1902910464.0,
+            "80": 1902910464.0,
+            "81": 1902910464.0,
+            "82": 1902910464.0,
+            "83": 1902910464.0,
+            "84": 1902910464.0,
+            "85": 1902910464.0,
+            "86": 1902910464.0,
+            "87": 1902910464.0,
+            "88": 1902910464.0,
+            "89": 1902910464.0,
+            "90": 1902910464.0,
+            "91": 1902910464.0,
+            "92": 1902910464.0,
+            "93": 1902910464.0,
+            "94": 1902910464.0,
+            "95": 1902910464.0,
+            "96": 1902910464.0,
+            "97": 1902910464.0,
+            "98": 1902910464.0,
+            "99": 1902910464.0,
+            "100": 1902910464.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 11.34333,
+            "2": 0.56623,
+            "3": 0.22775,
+            "4": 0.22931,
+            "5": 0.22667,
+            "6": 0.22758,
+            "7": 0.23105,
+            "8": 0.22555,
+            "9": 0.22541,
+            "10": 0.22533,
+            "11": 0.26995,
+            "12": 0.22791,
+            "13": 0.22744,
+            "14": 0.2254,
+            "15": 0.22691,
+            "16": 0.22536,
+            "17": 0.22399,
+            "18": 0.224,
+            "19": 0.22435,
+            "20": 0.22788,
+            "21": 0.22441,
+            "22": 0.2236,
+            "23": 0.22313,
+            "24": 0.22481,
+            "25": 0.22503,
+            "26": 0.22356,
+            "27": 0.22387,
+            "28": 0.22422,
+            "29": 0.22896,
+            "30": 0.22362,
+            "31": 0.22424,
+            "32": 0.22361,
+            "33": 0.2255,
+            "34": 0.22376,
+            "35": 0.2227,
+            "36": 0.22202,
+            "37": 0.22249,
+            "38": 0.22911,
+            "39": 0.22157,
+            "40": 0.22231,
+            "41": 0.22166,
+            "42": 0.22525,
+            "43": 0.2221,
+            "44": 0.22185,
+            "45": 0.22126,
+            "46": 0.22185,
+            "47": 0.2264,
+            "48": 0.22191,
+            "49": 0.2212,
+            "50": 0.22178,
+            "51": 0.23228,
+            "52": 0.22482,
+            "53": 0.22431,
+            "54": 0.22641,
+            "55": 0.22437,
+            "56": 0.22665,
+            "57": 0.22617,
+            "58": 0.2284,
+            "59": 0.22644,
+            "60": 0.22523,
+            "61": 0.22532,
+            "62": 0.2282,
+            "63": 0.22526,
+            "64": 0.22535,
+            "65": 0.22523,
+            "66": 0.22567,
+            "67": 0.22948,
+            "68": 0.22527,
+            "69": 0.22591,
+            "70": 0.22514,
+            "71": 0.2281,
+            "72": 0.22718,
+            "73": 0.22617,
+            "74": 0.22559,
+            "75": 0.22567,
+            "76": 0.22848,
+            "77": 0.22459,
+            "78": 0.22571,
+            "79": 0.22534,
+            "80": 0.22962,
+            "81": 0.2301,
+            "82": 0.22809,
+            "83": 0.2285,
+            "84": 0.22921,
+            "85": 0.2309,
+            "86": 0.22744,
+            "87": 0.22777,
+            "88": 0.22831,
+            "89": 0.23199,
+            "90": 0.22761,
+            "91": 0.22896,
+            "92": 0.22814,
+            "93": 0.23065,
+            "94": 0.22829,
+            "95": 0.22767,
+            "96": 0.22866,
+            "97": 0.22828,
+            "98": 0.23227,
+            "99": 0.22772,
+            "100": 0.2283
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
new file mode 100644
index 00000000000..1bd58f46aa2
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.92705,
+            "2": 10.92645,
+            "3": 10.91604,
+            "4": 10.90911,
+            "5": 10.92795,
+            "6": 10.93626,
+            "7": 10.90626,
+            "8": 10.92128,
+            "9": 10.90998,
+            "10": 10.90786,
+            "11": 10.89335,
+            "12": 10.92456,
+            "13": 10.9146,
+            "14": 10.9213,
+            "15": 10.88314,
+            "16": 10.87325,
+            "17": 10.84129,
+            "18": 10.87276,
+            "19": 10.8563,
+            "20": 10.77629,
+            "21": 10.74869,
+            "22": 10.63031,
+            "23": 10.75678,
+            "24": 10.65646,
+            "25": 10.59141,
+            "26": 10.65375,
+            "27": 10.6485,
+            "28": 10.59548,
+            "29": 10.6088,
+            "30": 10.39192,
+            "31": 10.15753,
+            "32": 10.49098,
+            "33": 10.4793,
+            "34": 10.24058,
+            "35": 10.29686,
+            "36": 10.24644,
+            "37": 10.35232,
+            "38": 10.20489,
+            "39": 10.4052,
+            "40": 10.0964,
+            "41": 10.15175,
+            "42": 10.22026,
+            "43": 9.85499,
+            "44": 9.96143,
+            "45": 9.84464,
+            "46": 9.83801,
+            "47": 10.13988,
+            "48": 9.85718,
+            "49": 9.53698,
+            "50": 9.90918,
+            "51": 9.84886,
+            "52": 9.74154,
+            "53": 10.06347,
+            "54": 9.94683,
+            "55": 9.87762,
+            "56": 9.6274,
+            "57": 9.47112,
+            "58": 9.82925,
+            "59": 9.58253,
+            "60": 9.49121,
+            "61": 9.69956,
+            "62": 9.97968,
+            "63": 9.37277,
+            "64": 9.77468,
+            "65": 8.94236,
+            "66": 9.6991,
+            "67": 9.36382,
+            "68": 9.78787,
+            "69": 9.78332,
+            "70": 9.72266,
+            "71": 9.60801,
+            "72": 9.58459,
+            "73": 9.48963,
+            "74": 8.94871,
+            "75": 9.41912,
+            "76": 9.08725,
+            "77": 10.06354,
+            "78": 9.72835,
+            "79": 9.37162,
+            "80": 9.40077,
+            "81": 9.47843,
+            "82": 9.69177,
+            "83": 9.3076,
+            "84": 9.41232,
+            "85": 9.61207,
+            "86": 9.07599,
+            "87": 9.59468,
+            "88": 9.74738,
+            "89": 9.60686,
+            "90": 9.81015,
+            "91": 9.34359,
+            "92": 9.36482,
+            "93": 9.07761,
+            "94": 8.83108,
+            "95": 9.51716,
+            "96": 9.52447,
+            "97": 9.31027,
+            "98": 9.67892,
+            "99": 8.88832,
+            "100": 9.4015
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1627.0,
+            "2": 1801.0,
+            "3": 1730.0,
+            "4": 1762.0,
+            "5": 2010.0,
+            "6": 1889.0,
+            "7": 1888.0,
+            "8": 1729.0,
+            "9": 1852.0,
+            "10": 1368.0,
+            "11": 1973.0,
+            "12": 1722.0,
+            "13": 1966.0,
+            "14": 1874.0,
+            "15": 1897.0,
+            "16": 1785.0,
+            "17": 1942.0,
+            "18": 1718.0,
+            "19": 1716.0,
+            "20": 1626.0,
+            "21": 1797.0,
+            "22": 1673.0,
+            "23": 1937.0,
+            "24": 1561.0,
+            "25": 1743.0,
+            "26": 1917.0,
+            "27": 1886.0,
+            "28": 1968.0,
+            "29": 2029.0,
+            "30": 1930.0,
+            "31": 1635.0,
+            "32": 1974.0,
+            "33": 2159.0,
+            "34": 2035.0,
+            "35": 1954.0,
+            "36": 1948.0,
+            "37": 2317.0,
+            "38": 2312.0,
+            "39": 2458.0,
+            "40": 2199.0,
+            "41": 2352.0,
+            "42": 2288.0,
+            "43": 2005.0,
+            "44": 2191.0,
+            "45": 2068.0,
+            "46": 2272.0,
+            "47": 2530.0,
+            "48": 2458.0,
+            "49": 2252.0,
+            "50": 2460.0,
+            "51": 2777.0,
+            "52": 2659.0,
+            "53": 2959.0,
+            "54": 2700.0,
+            "55": 2427.0,
+            "56": 2797.0,
+            "57": 2430.0,
+            "58": 3077.0,
+            "59": 2781.0,
+            "60": 2380.0,
+            "61": 2816.0,
+            "62": 2812.0,
+            "63": 2452.0,
+            "64": 2958.0,
+            "65": 2657.0,
+            "66": 3208.0,
+            "67": 2786.0,
+            "68": 2842.0,
+            "69": 2927.0,
+            "70": 3265.0,
+            "71": 3098.0,
+            "72": 2445.0,
+            "73": 3120.0,
+            "74": 1900.0,
+            "75": 2675.0,
+            "76": 3065.0,
+            "77": 3452.0,
+            "78": 3263.0,
+            "79": 3398.0,
+            "80": 3434.0,
+            "81": 3695.0,
+            "82": 3308.0,
+            "83": 2935.0,
+            "84": 3423.0,
+            "85": 3302.0,
+            "86": 2785.0,
+            "87": 3788.0,
+            "88": 3030.0,
+            "89": 3532.0,
+            "90": 3230.0,
+            "91": 2681.0,
+            "92": 3175.0,
+            "93": 2718.0,
+            "94": 3392.0,
+            "95": 3340.0,
+            "96": 3504.0,
+            "97": 3227.0,
+            "98": 3757.0,
+            "99": 3245.0,
+            "100": 3291.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 487096320.0,
+            "2": 487096320.0,
+            "3": 487096320.0,
+            "4": 487096320.0,
+            "5": 487096320.0,
+            "6": 487096320.0,
+            "7": 487096320.0,
+            "8": 487096320.0,
+            "9": 487096320.0,
+            "10": 487096320.0,
+            "11": 487096320.0,
+            "12": 487096320.0,
+            "13": 487096320.0,
+            "14": 487096320.0,
+            "15": 487096320.0,
+            "16": 487096320.0,
+            "17": 487096320.0,
+            "18": 487096320.0,
+            "19": 487096320.0,
+            "20": 487096320.0,
+            "21": 487096320.0,
+            "22": 487096320.0,
+            "23": 487096320.0,
+            "24": 487096320.0,
+            "25": 487096320.0,
+            "26": 487096320.0,
+            "27": 487096320.0,
+            "28": 487096320.0,
+            "29": 487096320.0,
+            "30": 487096320.0,
+            "31": 487096320.0,
+            "32": 487096320.0,
+            "33": 487096320.0,
+            "34": 487096320.0,
+            "35": 487096320.0,
+            "36": 487096320.0,
+            "37": 487096320.0,
+            "38": 487096320.0,
+            "39": 487096320.0,
+            "40": 487096320.0,
+            "41": 487096320.0,
+            "42": 487096320.0,
+            "43": 487096320.0,
+            "44": 487096320.0,
+            "45": 487096320.0,
+            "46": 487096320.0,
+            "47": 487096320.0,
+            "48": 487096320.0,
+            "49": 487096320.0,
+            "50": 487096320.0,
+            "51": 487096320.0,
+            "52": 487096320.0,
+            "53": 487096320.0,
+            "54": 487096320.0,
+            "55": 487096320.0,
+            "56": 487096320.0,
+            "57": 487096320.0,
+            "58": 487096320.0,
+            "59": 487096320.0,
+            "60": 487096320.0,
+            "61": 487096320.0,
+            "62": 487096320.0,
+            "63": 487096320.0,
+            "64": 487096320.0,
+            "65": 487096320.0,
+            "66": 487096320.0,
+            "67": 487096320.0,
+            "68": 487096320.0,
+            "69": 487096320.0,
+            "70": 487096320.0,
+            "71": 487096320.0,
+            "72": 487096320.0,
+            "73": 487096320.0,
+            "74": 487096320.0,
+            "75": 487096320.0,
+            "76": 487096320.0,
+            "77": 487096320.0,
+            "78": 487096320.0,
+            "79": 487096320.0,
+            "80": 487096320.0,
+            "81": 487096320.0,
+            "82": 487096320.0,
+            "83": 487096320.0,
+            "84": 487096320.0,
+            "85": 487096320.0,
+            "86": 487096320.0,
+            "87": 487096320.0,
+            "88": 487096320.0,
+            "89": 487096320.0,
+            "90": 487096320.0,
+            "91": 487096320.0,
+            "92": 487096320.0,
+            "93": 487096320.0,
+            "94": 487096320.0,
+            "95": 487096320.0,
+            "96": 487096320.0,
+            "97": 487096320.0,
+            "98": 487096320.0,
+            "99": 487096320.0,
+            "100": 487096320.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1720084480.0,
+            "2": 1900157952.0,
+            "3": 1901074432.0,
+            "4": 1901074432.0,
+            "5": 1901074432.0,
+            "6": 1901074432.0,
+            "7": 1901074432.0,
+            "8": 1901074432.0,
+            "9": 1901074432.0,
+            "10": 1901074432.0,
+            "11": 1901074432.0,
+            "12": 1901074432.0,
+            "13": 1901074432.0,
+            "14": 1901074432.0,
+            "15": 1901074432.0,
+            "16": 1901074432.0,
+            "17": 1901074432.0,
+            "18": 1901074432.0,
+            "19": 1901074432.0,
+            "20": 1901074432.0,
+            "21": 1901074432.0,
+            "22": 1901074432.0,
+            "23": 1901074432.0,
+            "24": 1901074432.0,
+            "25": 1901074432.0,
+            "26": 1901074432.0,
+            "27": 1901074432.0,
+            "28": 1901074432.0,
+            "29": 1901074432.0,
+            "30": 1901074432.0,
+            "31": 1901074432.0,
+            "32": 1901074432.0,
+            "33": 1901074432.0,
+            "34": 1901074432.0,
+            "35": 1901074432.0,
+            "36": 1901074432.0,
+            "37": 1901074432.0,
+            "38": 1901074432.0,
+            "39": 1901074432.0,
+            "40": 1901074432.0,
+            "41": 1901074432.0,
+            "42": 1901074432.0,
+            "43": 1901074432.0,
+            "44": 1901074432.0,
+            "45": 1901074432.0,
+            "46": 1901074432.0,
+            "47": 1901074432.0,
+            "48": 1901074432.0,
+            "49": 1901074432.0,
+            "50": 1901074432.0,
+            "51": 1901074432.0,
+            "52": 1901074432.0,
+            "53": 1901074432.0,
+            "54": 1901074432.0,
+            "55": 1901074432.0,
+            "56": 1901074432.0,
+            "57": 1901074432.0,
+            "58": 1901074432.0,
+            "59": 1901074432.0,
+            "60": 1901074432.0,
+            "61": 1901074432.0,
+            "62": 1901074432.0,
+            "63": 1901074432.0,
+            "64": 1901074432.0,
+            "65": 1901074432.0,
+            "66": 1901074432.0,
+            "67": 1901074432.0,
+            "68": 1901074432.0,
+            "69": 1901074432.0,
+            "70": 1901074432.0,
+            "71": 1901074432.0,
+            "72": 1901074432.0,
+            "73": 1901074432.0,
+            "74": 1901074432.0,
+            "75": 1901074432.0,
+            "76": 1901074432.0,
+            "77": 1901074432.0,
+            "78": 1901074432.0,
+            "79": 1901074432.0,
+            "80": 1901074432.0,
+            "81": 1901074432.0,
+            "82": 1901074432.0,
+            "83": 1901074432.0,
+            "84": 1901074432.0,
+            "85": 1901074432.0,
+            "86": 1901074432.0,
+            "87": 1901074432.0,
+            "88": 1901074432.0,
+            "89": 1901074432.0,
+            "90": 1901074432.0,
+            "91": 1901074432.0,
+            "92": 1901074432.0,
+            "93": 1901074432.0,
+            "94": 1901074432.0,
+            "95": 1901074432.0,
+            "96": 1901074432.0,
+            "97": 1901075456.0,
+            "98": 1901075456.0,
+            "99": 1901075456.0,
+            "100": 1901075456.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 14.46737,
+            "2": 0.26476,
+            "3": 0.23109,
+            "4": 0.22854,
+            "5": 0.22879,
+            "6": 0.2287,
+            "7": 0.23086,
+            "8": 0.2297,
+            "9": 0.23098,
+            "10": 0.23075,
+            "11": 0.23448,
+            "12": 0.22804,
+            "13": 0.22739,
+            "14": 0.22761,
+            "15": 0.23146,
+            "16": 0.23026,
+            "17": 0.22798,
+            "18": 0.22761,
+            "19": 0.22857,
+            "20": 0.23372,
+            "21": 0.22829,
+            "22": 0.22692,
+            "23": 0.22737,
+            "24": 0.2331,
+            "25": 0.22606,
+            "26": 0.22294,
+            "27": 0.22159,
+            "28": 0.22628,
+            "29": 0.22561,
+            "30": 0.22244,
+            "31": 0.22214,
+            "32": 0.22237,
+            "33": 0.22509,
+            "34": 0.2221,
+            "35": 0.22109,
+            "36": 0.22181,
+            "37": 0.22344,
+            "38": 0.22457,
+            "39": 0.22467,
+            "40": 0.22286,
+            "41": 0.22296,
+            "42": 0.45657,
+            "43": 0.22367,
+            "44": 0.22117,
+            "45": 0.22234,
+            "46": 0.22174,
+            "47": 0.21959,
+            "48": 0.22089,
+            "49": 0.2205,
+            "50": 0.22426,
+            "51": 0.22836,
+            "52": 0.22291,
+            "53": 0.22086,
+            "54": 0.22358,
+            "55": 0.22346,
+            "56": 0.22218,
+            "57": 0.22243,
+            "58": 0.22521,
+            "59": 0.22456,
+            "60": 0.22259,
+            "61": 0.22057,
+            "62": 0.22205,
+            "63": 0.22691,
+            "64": 0.22417,
+            "65": 0.22198,
+            "66": 0.22355,
+            "67": 0.22656,
+            "68": 0.22317,
+            "69": 0.22524,
+            "70": 0.22257,
+            "71": 0.22136,
+            "72": 0.22488,
+            "73": 0.22888,
+            "74": 0.22324,
+            "75": 0.22323,
+            "76": 0.22142,
+            "77": 0.22393,
+            "78": 0.22004,
+            "79": 0.21926,
+            "80": 0.22221,
+            "81": 0.22531,
+            "82": 0.22283,
+            "83": 0.22227,
+            "84": 0.22148,
+            "85": 0.2249,
+            "86": 0.22229,
+            "87": 0.22163,
+            "88": 0.222,
+            "89": 0.22492,
+            "90": 0.23375,
+            "91": 0.22011,
+            "92": 0.21919,
+            "93": 0.2217,
+            "94": 0.22533,
+            "95": 0.22265,
+            "96": 0.22352,
+            "97": 0.2219,
+            "98": 0.22608,
+            "99": 0.23763,
+            "100": 0.22445
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
index 41c7d6f3fd5..f5b16bf0710 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
@@ -1 +1,537 @@
-{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.85949, "5": 10.88343, "10": 10.83882, "15": 10.84047, "20": 10.73196, "25": 10.54812, "30": 10.37134, "35": 10.27171, "40": 10.09427, "45": 9.84081, "50": 9.90876, "55": 9.882, "60": 9.50647, "65": 8.95171, "70": 9.74738, "75": 9.42706, "80": 9.40987, "85": 9.61376, "90": 9.81895, "95": 9.52168, "100": 9.39725}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 71.0, "5": 55.0, "10": 65.0, "15": 71.0, "20": 61.0, "25": 66.0, "30": 71.0, "35": 69.0, "40": 81.0, "45": 85.0, "50": 80.0, "55": 58.0, "60": 84.0, "65": 81.0, "70": 88.0, "75": 70.0, "80": 90.0, "85": 89.0, "90": 72.0, "95": 70.0, "100": 75.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 520651264.0, "5": 520651264.0, "10": 520651264.0, "15": 520651264.0, "20": 520651264.0, "25": 520651264.0, "30": 520651264.0, "35": 520651264.0, "40": 520651264.0, "45": 520651264.0, "50": 520651264.0, "55": 520651264.0, "60": 520651264.0, "65": 520651264.0, "70": 520651264.0, "75": 520651264.0, "80": 520651264.0, "85": 520651264.0, "90": 520651264.0, "95": 520651264.0, "100": 520651264.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1687975424.0, "5": 1870146048.0, "10": 1870146048.0, "15": 1870146048.0, "20": 1870146048.0, "25": 1870146048.0, "30": 1870146048.0, "35": 1870146048.0, "40": 1870146048.0, "45": 1870146048.0, "50": 1870146048.0, "55": 1870146048.0, "60": 1870146048.0, "65": 1870146048.0, "70": 1870146048.0, "75": 1870146048.0, "80": 1870146048.0, "85": 1870146048.0, "90": 1870146048.0, "95": 1870146048.0, "100": 1870146048.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 12.28055, "5": 0.12657, "10": 0.12544, "15": 0.13519, "20": 0.12958, "25": 0.12817, "30": 0.1293, "35": 0.12396, "40": 0.1241, "45": 0.12562, "50": 0.1228, "55": 0.127, "60": 0.12853, "65": 0.12708, "70": 0.12816, "75": 0.12308, "80": 0.12181, "85": 0.12079, "90": 0.12388, "95": 0.1228, "100": 0.12387}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.85949,
+            "2": 10.85553,
+            "3": 10.86544,
+            "4": 10.84555,
+            "5": 10.88343,
+            "6": 10.89431,
+            "7": 10.87069,
+            "8": 10.86982,
+            "9": 10.8692,
+            "10": 10.83882,
+            "11": 10.89437,
+            "12": 10.8798,
+            "13": 10.87986,
+            "14": 10.90316,
+            "15": 10.84047,
+            "16": 10.83785,
+            "17": 10.8067,
+            "18": 10.83027,
+            "19": 10.82265,
+            "20": 10.73196,
+            "21": 10.70751,
+            "22": 10.56001,
+            "23": 10.72404,
+            "24": 10.61114,
+            "25": 10.54812,
+            "26": 10.61333,
+            "27": 10.63051,
+            "28": 10.56645,
+            "29": 10.59672,
+            "30": 10.37134,
+            "31": 10.11723,
+            "32": 10.46131,
+            "33": 10.4525,
+            "34": 10.21689,
+            "35": 10.27171,
+            "36": 10.2312,
+            "37": 10.34809,
+            "38": 10.18839,
+            "39": 10.41045,
+            "40": 10.09427,
+            "41": 10.1471,
+            "42": 10.21241,
+            "43": 9.84107,
+            "44": 9.95919,
+            "45": 9.84081,
+            "46": 9.82483,
+            "47": 10.13877,
+            "48": 9.85832,
+            "49": 9.54703,
+            "50": 9.90876,
+            "51": 9.85581,
+            "52": 9.75235,
+            "53": 10.07582,
+            "54": 9.95687,
+            "55": 9.882,
+            "56": 9.63137,
+            "57": 9.48647,
+            "58": 9.83111,
+            "59": 9.58896,
+            "60": 9.50647,
+            "61": 9.70361,
+            "62": 9.98283,
+            "63": 9.38302,
+            "64": 9.77906,
+            "65": 8.95171,
+            "66": 9.70162,
+            "67": 9.372,
+            "68": 9.78849,
+            "69": 9.79851,
+            "70": 9.74738,
+            "71": 9.61908,
+            "72": 9.58496,
+            "73": 9.49723,
+            "74": 8.93927,
+            "75": 9.42706,
+            "76": 9.08018,
+            "77": 10.06566,
+            "78": 9.72889,
+            "79": 9.37757,
+            "80": 9.40987,
+            "81": 9.47974,
+            "82": 9.70177,
+            "83": 9.30611,
+            "84": 9.42088,
+            "85": 9.61376,
+            "86": 9.07651,
+            "87": 9.59452,
+            "88": 9.75067,
+            "89": 9.60239,
+            "90": 9.81895,
+            "91": 9.33895,
+            "92": 9.35712,
+            "93": 9.07879,
+            "94": 8.83504,
+            "95": 9.52168,
+            "96": 9.53002,
+            "97": 9.31306,
+            "98": 9.67783,
+            "99": 8.89053,
+            "100": 9.39725
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 71.0,
+            "2": 65.0,
+            "3": 68.0,
+            "4": 57.0,
+            "5": 55.0,
+            "6": 70.0,
+            "7": 73.0,
+            "8": 58.0,
+            "9": 66.0,
+            "10": 65.0,
+            "11": 58.0,
+            "12": 77.0,
+            "13": 50.0,
+            "14": 65.0,
+            "15": 71.0,
+            "16": 68.0,
+            "17": 58.0,
+            "18": 57.0,
+            "19": 68.0,
+            "20": 61.0,
+            "21": 65.0,
+            "22": 57.0,
+            "23": 83.0,
+            "24": 58.0,
+            "25": 66.0,
+            "26": 63.0,
+            "27": 80.0,
+            "28": 82.0,
+            "29": 72.0,
+            "30": 71.0,
+            "31": 68.0,
+            "32": 75.0,
+            "33": 85.0,
+            "34": 63.0,
+            "35": 69.0,
+            "36": 58.0,
+            "37": 83.0,
+            "38": 65.0,
+            "39": 68.0,
+            "40": 81.0,
+            "41": 72.0,
+            "42": 76.0,
+            "43": 84.0,
+            "44": 85.0,
+            "45": 85.0,
+            "46": 79.0,
+            "47": 81.0,
+            "48": 68.0,
+            "49": 89.0,
+            "50": 80.0,
+            "51": 70.0,
+            "52": 81.0,
+            "53": 95.0,
+            "54": 101.0,
+            "55": 58.0,
+            "56": 90.0,
+            "57": 83.0,
+            "58": 90.0,
+            "59": 79.0,
+            "60": 84.0,
+            "61": 92.0,
+            "62": 102.0,
+            "63": 78.0,
+            "64": 73.0,
+            "65": 81.0,
+            "66": 88.0,
+            "67": 54.0,
+            "68": 57.0,
+            "69": 72.0,
+            "70": 88.0,
+            "71": 82.0,
+            "72": 64.0,
+            "73": 78.0,
+            "74": 76.0,
+            "75": 70.0,
+            "76": 78.0,
+            "77": 67.0,
+            "78": 86.0,
+            "79": 76.0,
+            "80": 90.0,
+            "81": 92.0,
+            "82": 72.0,
+            "83": 61.0,
+            "84": 65.0,
+            "85": 89.0,
+            "86": 73.0,
+            "87": 89.0,
+            "88": 63.0,
+            "89": 83.0,
+            "90": 72.0,
+            "91": 55.0,
+            "92": 63.0,
+            "93": 47.0,
+            "94": 74.0,
+            "95": 70.0,
+            "96": 73.0,
+            "97": 80.0,
+            "98": 76.0,
+            "99": 68.0,
+            "100": 75.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 546472448.0,
+            "2": 546472448.0,
+            "3": 546472448.0,
+            "4": 546472448.0,
+            "5": 546472448.0,
+            "6": 546472448.0,
+            "7": 546472448.0,
+            "8": 546472448.0,
+            "9": 546472448.0,
+            "10": 546472448.0,
+            "11": 546472448.0,
+            "12": 546472448.0,
+            "13": 546472448.0,
+            "14": 546472448.0,
+            "15": 546472448.0,
+            "16": 546472448.0,
+            "17": 546472448.0,
+            "18": 546472448.0,
+            "19": 546472448.0,
+            "20": 546472448.0,
+            "21": 546472448.0,
+            "22": 546472448.0,
+            "23": 546472448.0,
+            "24": 546472448.0,
+            "25": 546472448.0,
+            "26": 546472448.0,
+            "27": 546472448.0,
+            "28": 546472448.0,
+            "29": 546472448.0,
+            "30": 546472448.0,
+            "31": 546472448.0,
+            "32": 546472448.0,
+            "33": 546472448.0,
+            "34": 546472448.0,
+            "35": 546472448.0,
+            "36": 546472448.0,
+            "37": 546472448.0,
+            "38": 546472448.0,
+            "39": 546472448.0,
+            "40": 546472448.0,
+            "41": 546472448.0,
+            "42": 546472448.0,
+            "43": 546472448.0,
+            "44": 546472448.0,
+            "45": 546472448.0,
+            "46": 546472448.0,
+            "47": 546472448.0,
+            "48": 546472448.0,
+            "49": 546472448.0,
+            "50": 546472448.0,
+            "51": 546472448.0,
+            "52": 546472448.0,
+            "53": 546472448.0,
+            "54": 546472448.0,
+            "55": 546472448.0,
+            "56": 546472448.0,
+            "57": 546472448.0,
+            "58": 546472448.0,
+            "59": 546472448.0,
+            "60": 546472448.0,
+            "61": 546472448.0,
+            "62": 546472448.0,
+            "63": 546472448.0,
+            "64": 546472448.0,
+            "65": 546472448.0,
+            "66": 546472448.0,
+            "67": 546472448.0,
+            "68": 546472448.0,
+            "69": 546472448.0,
+            "70": 546472448.0,
+            "71": 546472448.0,
+            "72": 546472448.0,
+            "73": 546472448.0,
+            "74": 546472448.0,
+            "75": 546472448.0,
+            "76": 546472448.0,
+            "77": 546472448.0,
+            "78": 546472448.0,
+            "79": 546472448.0,
+            "80": 546472448.0,
+            "81": 546472448.0,
+            "82": 546472448.0,
+            "83": 546472448.0,
+            "84": 546472448.0,
+            "85": 546472448.0,
+            "86": 546472448.0,
+            "87": 546472448.0,
+            "88": 546472448.0,
+            "89": 546472448.0,
+            "90": 546472448.0,
+            "91": 546472448.0,
+            "92": 546472448.0,
+            "93": 546472448.0,
+            "94": 546472448.0,
+            "95": 546472448.0,
+            "96": 546472448.0,
+            "97": 546472448.0,
+            "98": 546472448.0,
+            "99": 546472448.0,
+            "100": 546472448.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1713796608.0,
+            "2": 1895967232.0,
+            "3": 1895967232.0,
+            "4": 1895967232.0,
+            "5": 1895967232.0,
+            "6": 1895967232.0,
+            "7": 1895967232.0,
+            "8": 1895967232.0,
+            "9": 1895967232.0,
+            "10": 1895967232.0,
+            "11": 1895967232.0,
+            "12": 1895967232.0,
+            "13": 1895967232.0,
+            "14": 1895967232.0,
+            "15": 1895967232.0,
+            "16": 1895967232.0,
+            "17": 1895967232.0,
+            "18": 1895967232.0,
+            "19": 1895967232.0,
+            "20": 1895967232.0,
+            "21": 1895967232.0,
+            "22": 1895967232.0,
+            "23": 1895967232.0,
+            "24": 1895967232.0,
+            "25": 1895967232.0,
+            "26": 1895967232.0,
+            "27": 1895967232.0,
+            "28": 1895967232.0,
+            "29": 1895967232.0,
+            "30": 1895967232.0,
+            "31": 1895967232.0,
+            "32": 1895967232.0,
+            "33": 1895967232.0,
+            "34": 1895967232.0,
+            "35": 1895967232.0,
+            "36": 1895967232.0,
+            "37": 1895967232.0,
+            "38": 1895967232.0,
+            "39": 1895967232.0,
+            "40": 1895967232.0,
+            "41": 1895967232.0,
+            "42": 1895967232.0,
+            "43": 1895967232.0,
+            "44": 1895967232.0,
+            "45": 1895967232.0,
+            "46": 1895967232.0,
+            "47": 1895967232.0,
+            "48": 1895967232.0,
+            "49": 1895967232.0,
+            "50": 1895967232.0,
+            "51": 1895967232.0,
+            "52": 1895967232.0,
+            "53": 1895967232.0,
+            "54": 1895967232.0,
+            "55": 1895967232.0,
+            "56": 1895967232.0,
+            "57": 1895967232.0,
+            "58": 1895967232.0,
+            "59": 1895967232.0,
+            "60": 1895967232.0,
+            "61": 1895967232.0,
+            "62": 1895967232.0,
+            "63": 1895967232.0,
+            "64": 1895967232.0,
+            "65": 1895967232.0,
+            "66": 1895967232.0,
+            "67": 1895967232.0,
+            "68": 1895967232.0,
+            "69": 1895967232.0,
+            "70": 1895967232.0,
+            "71": 1895967232.0,
+            "72": 1895967232.0,
+            "73": 1895967232.0,
+            "74": 1895967232.0,
+            "75": 1895967232.0,
+            "76": 1895967232.0,
+            "77": 1895967232.0,
+            "78": 1895967232.0,
+            "79": 1895967232.0,
+            "80": 1895967232.0,
+            "81": 1895967232.0,
+            "82": 1895967232.0,
+            "83": 1895967232.0,
+            "84": 1895967232.0,
+            "85": 1895967232.0,
+            "86": 1895967232.0,
+            "87": 1895967232.0,
+            "88": 1895967232.0,
+            "89": 1895967232.0,
+            "90": 1895967232.0,
+            "91": 1895967232.0,
+            "92": 1895967232.0,
+            "93": 1895967232.0,
+            "94": 1895967232.0,
+            "95": 1895967232.0,
+            "96": 1895967232.0,
+            "97": 1895967232.0,
+            "98": 1895967232.0,
+            "99": 1895967232.0,
+            "100": 1895967232.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 11.72275,
+            "2": 0.17301,
+            "3": 0.15386,
+            "4": 0.16174,
+            "5": 0.16281,
+            "6": 0.16123,
+            "7": 0.16321,
+            "8": 0.15614,
+            "9": 0.15485,
+            "10": 0.15403,
+            "11": 0.15407,
+            "12": 0.15562,
+            "13": 0.15964,
+            "14": 0.15764,
+            "15": 0.15375,
+            "16": 0.1559,
+            "17": 0.15118,
+            "18": 0.15439,
+            "19": 0.15335,
+            "20": 0.15351,
+            "21": 0.15162,
+            "22": 0.15323,
+            "23": 0.15304,
+            "24": 0.15257,
+            "25": 0.15184,
+            "26": 0.15337,
+            "27": 0.15366,
+            "28": 0.1533,
+            "29": 0.15626,
+            "30": 0.15279,
+            "31": 0.15396,
+            "32": 0.15273,
+            "33": 0.15868,
+            "34": 0.15298,
+            "35": 0.15363,
+            "36": 0.15504,
+            "37": 0.15404,
+            "38": 0.15509,
+            "39": 0.15421,
+            "40": 0.15591,
+            "41": 0.15488,
+            "42": 0.15491,
+            "43": 0.15536,
+            "44": 0.15405,
+            "45": 0.15301,
+            "46": 0.1564,
+            "47": 0.1538,
+            "48": 0.15496,
+            "49": 0.15554,
+            "50": 0.15377,
+            "51": 0.16069,
+            "52": 0.15674,
+            "53": 0.15488,
+            "54": 0.15626,
+            "55": 0.15428,
+            "56": 0.15332,
+            "57": 0.15575,
+            "58": 0.15337,
+            "59": 0.1573,
+            "60": 0.15494,
+            "61": 0.15582,
+            "62": 0.15444,
+            "63": 0.15451,
+            "64": 0.15468,
+            "65": 0.15421,
+            "66": 0.15605,
+            "67": 0.15502,
+            "68": 0.1555,
+            "69": 0.15365,
+            "70": 0.15482,
+            "71": 0.15668,
+            "72": 0.15572,
+            "73": 0.15504,
+            "74": 0.15493,
+            "75": 0.15395,
+            "76": 0.1543,
+            "77": 0.15616,
+            "78": 0.15412,
+            "79": 0.15658,
+            "80": 0.15263,
+            "81": 0.15632,
+            "82": 0.15472,
+            "83": 0.1556,
+            "84": 0.15407,
+            "85": 0.15567,
+            "86": 0.15631,
+            "87": 0.15367,
+            "88": 0.15509,
+            "89": 0.1539,
+            "90": 0.15608,
+            "91": 0.15432,
+            "92": 0.155,
+            "93": 0.1529,
+            "94": 0.1541,
+            "95": 0.15468,
+            "96": 0.15535,
+            "97": 0.15603,
+            "98": 0.15443,
+            "99": 0.1563,
+            "100": 0.15285
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..00af7ef1865
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.85949,
+            "2": 10.85553,
+            "3": 10.86544,
+            "4": 10.84555,
+            "5": 10.88343,
+            "6": 10.89431,
+            "7": 10.87069,
+            "8": 10.86982,
+            "9": 10.8692,
+            "10": 10.83882,
+            "11": 10.89437,
+            "12": 10.8798,
+            "13": 10.87986,
+            "14": 10.90316,
+            "15": 10.84047,
+            "16": 10.83785,
+            "17": 10.8067,
+            "18": 10.83027,
+            "19": 10.82265,
+            "20": 10.73196,
+            "21": 10.70751,
+            "22": 10.56001,
+            "23": 10.72404,
+            "24": 10.61114,
+            "25": 10.54812,
+            "26": 10.61333,
+            "27": 10.63051,
+            "28": 10.56645,
+            "29": 10.59672,
+            "30": 10.37134,
+            "31": 10.11723,
+            "32": 10.46131,
+            "33": 10.4525,
+            "34": 10.21689,
+            "35": 10.27171,
+            "36": 10.2312,
+            "37": 10.34809,
+            "38": 10.18839,
+            "39": 10.41045,
+            "40": 10.09427,
+            "41": 10.1471,
+            "42": 10.21241,
+            "43": 9.84107,
+            "44": 9.95919,
+            "45": 9.84081,
+            "46": 9.82483,
+            "47": 10.13877,
+            "48": 9.85832,
+            "49": 9.54703,
+            "50": 9.90876,
+            "51": 9.85581,
+            "52": 9.75235,
+            "53": 10.07582,
+            "54": 9.95687,
+            "55": 9.882,
+            "56": 9.63137,
+            "57": 9.48647,
+            "58": 9.83111,
+            "59": 9.58896,
+            "60": 9.50647,
+            "61": 9.70361,
+            "62": 9.98283,
+            "63": 9.38302,
+            "64": 9.77906,
+            "65": 8.95171,
+            "66": 9.70162,
+            "67": 9.372,
+            "68": 9.78849,
+            "69": 9.79851,
+            "70": 9.74738,
+            "71": 9.61908,
+            "72": 9.58496,
+            "73": 9.49723,
+            "74": 8.93927,
+            "75": 9.42706,
+            "76": 9.08018,
+            "77": 10.06566,
+            "78": 9.72889,
+            "79": 9.37757,
+            "80": 9.40987,
+            "81": 9.47974,
+            "82": 9.70177,
+            "83": 9.30611,
+            "84": 9.42088,
+            "85": 9.61376,
+            "86": 9.07651,
+            "87": 9.59452,
+            "88": 9.75067,
+            "89": 9.60239,
+            "90": 9.81895,
+            "91": 9.33895,
+            "92": 9.35712,
+            "93": 9.07879,
+            "94": 8.83504,
+            "95": 9.52168,
+            "96": 9.53002,
+            "97": 9.31306,
+            "98": 9.67783,
+            "99": 8.89053,
+            "100": 9.39725
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 71.0,
+            "2": 65.0,
+            "3": 68.0,
+            "4": 57.0,
+            "5": 55.0,
+            "6": 70.0,
+            "7": 73.0,
+            "8": 58.0,
+            "9": 66.0,
+            "10": 65.0,
+            "11": 58.0,
+            "12": 77.0,
+            "13": 50.0,
+            "14": 65.0,
+            "15": 71.0,
+            "16": 68.0,
+            "17": 58.0,
+            "18": 57.0,
+            "19": 68.0,
+            "20": 61.0,
+            "21": 65.0,
+            "22": 57.0,
+            "23": 83.0,
+            "24": 58.0,
+            "25": 66.0,
+            "26": 63.0,
+            "27": 80.0,
+            "28": 82.0,
+            "29": 72.0,
+            "30": 71.0,
+            "31": 68.0,
+            "32": 75.0,
+            "33": 85.0,
+            "34": 63.0,
+            "35": 69.0,
+            "36": 58.0,
+            "37": 83.0,
+            "38": 65.0,
+            "39": 68.0,
+            "40": 81.0,
+            "41": 72.0,
+            "42": 76.0,
+            "43": 84.0,
+            "44": 85.0,
+            "45": 85.0,
+            "46": 79.0,
+            "47": 81.0,
+            "48": 68.0,
+            "49": 89.0,
+            "50": 80.0,
+            "51": 70.0,
+            "52": 81.0,
+            "53": 95.0,
+            "54": 101.0,
+            "55": 58.0,
+            "56": 90.0,
+            "57": 83.0,
+            "58": 90.0,
+            "59": 79.0,
+            "60": 84.0,
+            "61": 92.0,
+            "62": 102.0,
+            "63": 78.0,
+            "64": 73.0,
+            "65": 81.0,
+            "66": 88.0,
+            "67": 54.0,
+            "68": 57.0,
+            "69": 72.0,
+            "70": 88.0,
+            "71": 82.0,
+            "72": 64.0,
+            "73": 78.0,
+            "74": 76.0,
+            "75": 70.0,
+            "76": 78.0,
+            "77": 67.0,
+            "78": 86.0,
+            "79": 76.0,
+            "80": 90.0,
+            "81": 92.0,
+            "82": 72.0,
+            "83": 61.0,
+            "84": 65.0,
+            "85": 89.0,
+            "86": 73.0,
+            "87": 89.0,
+            "88": 63.0,
+            "89": 83.0,
+            "90": 72.0,
+            "91": 55.0,
+            "92": 63.0,
+            "93": 47.0,
+            "94": 74.0,
+            "95": 70.0,
+            "96": 73.0,
+            "97": 80.0,
+            "98": 76.0,
+            "99": 68.0,
+            "100": 75.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 546472448.0,
+            "2": 546472448.0,
+            "3": 546472448.0,
+            "4": 546472448.0,
+            "5": 546472448.0,
+            "6": 546472448.0,
+            "7": 546472448.0,
+            "8": 546472448.0,
+            "9": 546472448.0,
+            "10": 546472448.0,
+            "11": 546472448.0,
+            "12": 546472448.0,
+            "13": 546472448.0,
+            "14": 546472448.0,
+            "15": 546472448.0,
+            "16": 546472448.0,
+            "17": 546472448.0,
+            "18": 546472448.0,
+            "19": 546472448.0,
+            "20": 546472448.0,
+            "21": 546472448.0,
+            "22": 546472448.0,
+            "23": 546472448.0,
+            "24": 546472448.0,
+            "25": 546472448.0,
+            "26": 546472448.0,
+            "27": 546472448.0,
+            "28": 546472448.0,
+            "29": 546472448.0,
+            "30": 546472448.0,
+            "31": 546472448.0,
+            "32": 546472448.0,
+            "33": 546472448.0,
+            "34": 546472448.0,
+            "35": 546472448.0,
+            "36": 546472448.0,
+            "37": 546472448.0,
+            "38": 546472448.0,
+            "39": 546472448.0,
+            "40": 546472448.0,
+            "41": 546472448.0,
+            "42": 546472448.0,
+            "43": 546472448.0,
+            "44": 546472448.0,
+            "45": 546472448.0,
+            "46": 546472448.0,
+            "47": 546472448.0,
+            "48": 546472448.0,
+            "49": 546472448.0,
+            "50": 546472448.0,
+            "51": 546472448.0,
+            "52": 546472448.0,
+            "53": 546472448.0,
+            "54": 546472448.0,
+            "55": 546472448.0,
+            "56": 546472448.0,
+            "57": 546472448.0,
+            "58": 546472448.0,
+            "59": 546472448.0,
+            "60": 546472448.0,
+            "61": 546472448.0,
+            "62": 546472448.0,
+            "63": 546472448.0,
+            "64": 546472448.0,
+            "65": 546472448.0,
+            "66": 546472448.0,
+            "67": 546472448.0,
+            "68": 546472448.0,
+            "69": 546472448.0,
+            "70": 546472448.0,
+            "71": 546472448.0,
+            "72": 546472448.0,
+            "73": 546472448.0,
+            "74": 546472448.0,
+            "75": 546472448.0,
+            "76": 546472448.0,
+            "77": 546472448.0,
+            "78": 546472448.0,
+            "79": 546472448.0,
+            "80": 546472448.0,
+            "81": 546472448.0,
+            "82": 546472448.0,
+            "83": 546472448.0,
+            "84": 546472448.0,
+            "85": 546472448.0,
+            "86": 546472448.0,
+            "87": 546472448.0,
+            "88": 546472448.0,
+            "89": 546472448.0,
+            "90": 546472448.0,
+            "91": 546472448.0,
+            "92": 546472448.0,
+            "93": 546472448.0,
+            "94": 546472448.0,
+            "95": 546472448.0,
+            "96": 546472448.0,
+            "97": 546472448.0,
+            "98": 546472448.0,
+            "99": 546472448.0,
+            "100": 546472448.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1713796608.0,
+            "2": 1895967232.0,
+            "3": 1895967232.0,
+            "4": 1895967232.0,
+            "5": 1895967232.0,
+            "6": 1895967232.0,
+            "7": 1895967232.0,
+            "8": 1895967232.0,
+            "9": 1895967232.0,
+            "10": 1895967232.0,
+            "11": 1895967232.0,
+            "12": 1895967232.0,
+            "13": 1895967232.0,
+            "14": 1895967232.0,
+            "15": 1895967232.0,
+            "16": 1895967232.0,
+            "17": 1895967232.0,
+            "18": 1895967232.0,
+            "19": 1895967232.0,
+            "20": 1895967232.0,
+            "21": 1895967232.0,
+            "22": 1895967232.0,
+            "23": 1895967232.0,
+            "24": 1895967232.0,
+            "25": 1895967232.0,
+            "26": 1895967232.0,
+            "27": 1895967232.0,
+            "28": 1895967232.0,
+            "29": 1895967232.0,
+            "30": 1895967232.0,
+            "31": 1895967232.0,
+            "32": 1895967232.0,
+            "33": 1895967232.0,
+            "34": 1895967232.0,
+            "35": 1895967232.0,
+            "36": 1895967232.0,
+            "37": 1895967232.0,
+            "38": 1895967232.0,
+            "39": 1895967232.0,
+            "40": 1895967232.0,
+            "41": 1895967232.0,
+            "42": 1895967232.0,
+            "43": 1895967232.0,
+            "44": 1895967232.0,
+            "45": 1895967232.0,
+            "46": 1895967232.0,
+            "47": 1895967232.0,
+            "48": 1895967232.0,
+            "49": 1895967232.0,
+            "50": 1895967232.0,
+            "51": 1895967232.0,
+            "52": 1895967232.0,
+            "53": 1895967232.0,
+            "54": 1895967232.0,
+            "55": 1895967232.0,
+            "56": 1895967232.0,
+            "57": 1895967232.0,
+            "58": 1895967232.0,
+            "59": 1895967232.0,
+            "60": 1895967232.0,
+            "61": 1895967232.0,
+            "62": 1895967232.0,
+            "63": 1895967232.0,
+            "64": 1895967232.0,
+            "65": 1895967232.0,
+            "66": 1895967232.0,
+            "67": 1895967232.0,
+            "68": 1895967232.0,
+            "69": 1895967232.0,
+            "70": 1895967232.0,
+            "71": 1895967232.0,
+            "72": 1895967232.0,
+            "73": 1895967232.0,
+            "74": 1895967232.0,
+            "75": 1895967232.0,
+            "76": 1895967232.0,
+            "77": 1895967232.0,
+            "78": 1895967232.0,
+            "79": 1895967232.0,
+            "80": 1895967232.0,
+            "81": 1895967232.0,
+            "82": 1895967232.0,
+            "83": 1895967232.0,
+            "84": 1895967232.0,
+            "85": 1895967232.0,
+            "86": 1895967232.0,
+            "87": 1895967232.0,
+            "88": 1895967232.0,
+            "89": 1895967232.0,
+            "90": 1895967232.0,
+            "91": 1895967232.0,
+            "92": 1895967232.0,
+            "93": 1895967232.0,
+            "94": 1895967232.0,
+            "95": 1895967232.0,
+            "96": 1895967232.0,
+            "97": 1895967232.0,
+            "98": 1895967232.0,
+            "99": 1895967232.0,
+            "100": 1895967232.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 11.30059,
+            "2": 0.17777,
+            "3": 0.13503,
+            "4": 0.13378,
+            "5": 0.1357,
+            "6": 0.13267,
+            "7": 0.13302,
+            "8": 0.13235,
+            "9": 0.13435,
+            "10": 0.13421,
+            "11": 0.13233,
+            "12": 0.13074,
+            "13": 0.12922,
+            "14": 0.13131,
+            "15": 0.13296,
+            "16": 0.13106,
+            "17": 0.13142,
+            "18": 0.13375,
+            "19": 0.13295,
+            "20": 0.13185,
+            "21": 0.13239,
+            "22": 0.13128,
+            "23": 0.13257,
+            "24": 0.13321,
+            "25": 0.13186,
+            "26": 0.13183,
+            "27": 0.13148,
+            "28": 0.13158,
+            "29": 0.13055,
+            "30": 0.13201,
+            "31": 0.1314,
+            "32": 0.13098,
+            "33": 0.13284,
+            "34": 0.13152,
+            "35": 0.13191,
+            "36": 0.13208,
+            "37": 0.13199,
+            "38": 0.13223,
+            "39": 0.13213,
+            "40": 0.13135,
+            "41": 0.13187,
+            "42": 0.13104,
+            "43": 0.13286,
+            "44": 0.13281,
+            "45": 0.13109,
+            "46": 0.13108,
+            "47": 0.13377,
+            "48": 0.13164,
+            "49": 0.13194,
+            "50": 0.1309,
+            "51": 0.14716,
+            "52": 0.14386,
+            "53": 0.133,
+            "54": 0.13142,
+            "55": 0.12988,
+            "56": 0.13391,
+            "57": 0.14548,
+            "58": 0.1475,
+            "59": 0.1326,
+            "60": 0.13058,
+            "61": 0.13075,
+            "62": 0.13206,
+            "63": 0.13128,
+            "64": 0.13303,
+            "65": 0.13059,
+            "66": 0.12969,
+            "67": 0.13108,
+            "68": 0.13125,
+            "69": 0.1294,
+            "70": 0.13035,
+            "71": 0.13528,
+            "72": 0.13186,
+            "73": 0.13078,
+            "74": 0.12997,
+            "75": 0.13033,
+            "76": 0.13134,
+            "77": 0.13127,
+            "78": 0.12885,
+            "79": 0.13057,
+            "80": 0.13054,
+            "81": 0.131,
+            "82": 0.13102,
+            "83": 0.13228,
+            "84": 0.13261,
+            "85": 0.1312,
+            "86": 0.1324,
+            "87": 0.13346,
+            "88": 0.13044,
+            "89": 0.13079,
+            "90": 0.13018,
+            "91": 0.13115,
+            "92": 0.13135,
+            "93": 0.13062,
+            "94": 0.13049,
+            "95": 0.13131,
+            "96": 0.13099,
+            "97": 0.13099,
+            "98": 0.1311,
+            "99": 0.13221,
+            "100": 0.13235
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..c1aaf21cf26
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.85949,
+            "2": 10.85553,
+            "3": 10.86544,
+            "4": 10.84555,
+            "5": 10.88343,
+            "6": 10.89431,
+            "7": 10.87069,
+            "8": 10.86982,
+            "9": 10.8692,
+            "10": 10.83882,
+            "11": 10.89437,
+            "12": 10.8798,
+            "13": 10.87986,
+            "14": 10.90316,
+            "15": 10.84047,
+            "16": 10.83785,
+            "17": 10.8067,
+            "18": 10.83027,
+            "19": 10.82265,
+            "20": 10.73196,
+            "21": 10.70751,
+            "22": 10.56001,
+            "23": 10.72404,
+            "24": 10.61114,
+            "25": 10.54812,
+            "26": 10.61333,
+            "27": 10.63051,
+            "28": 10.56645,
+            "29": 10.59672,
+            "30": 10.37134,
+            "31": 10.11723,
+            "32": 10.46131,
+            "33": 10.4525,
+            "34": 10.21689,
+            "35": 10.27171,
+            "36": 10.2312,
+            "37": 10.34809,
+            "38": 10.18839,
+            "39": 10.41045,
+            "40": 10.09427,
+            "41": 10.1471,
+            "42": 10.21241,
+            "43": 9.84107,
+            "44": 9.95919,
+            "45": 9.84081,
+            "46": 9.82483,
+            "47": 10.13877,
+            "48": 9.85832,
+            "49": 9.54703,
+            "50": 9.90876,
+            "51": 9.85581,
+            "52": 9.75235,
+            "53": 10.07582,
+            "54": 9.95687,
+            "55": 9.882,
+            "56": 9.63137,
+            "57": 9.48647,
+            "58": 9.83111,
+            "59": 9.58896,
+            "60": 9.50647,
+            "61": 9.70361,
+            "62": 9.98283,
+            "63": 9.38302,
+            "64": 9.77906,
+            "65": 8.95171,
+            "66": 9.70162,
+            "67": 9.372,
+            "68": 9.78849,
+            "69": 9.79851,
+            "70": 9.74738,
+            "71": 9.61908,
+            "72": 9.58496,
+            "73": 9.49723,
+            "74": 8.93927,
+            "75": 9.42706,
+            "76": 9.08018,
+            "77": 10.06566,
+            "78": 9.72889,
+            "79": 9.37757,
+            "80": 9.40987,
+            "81": 9.47974,
+            "82": 9.70177,
+            "83": 9.30611,
+            "84": 9.42088,
+            "85": 9.61376,
+            "86": 9.07651,
+            "87": 9.59452,
+            "88": 9.75067,
+            "89": 9.60239,
+            "90": 9.81895,
+            "91": 9.33895,
+            "92": 9.35712,
+            "93": 9.07879,
+            "94": 8.83504,
+            "95": 9.52168,
+            "96": 9.53002,
+            "97": 9.31306,
+            "98": 9.67783,
+            "99": 8.89053,
+            "100": 9.39725
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 71.0,
+            "2": 65.0,
+            "3": 68.0,
+            "4": 57.0,
+            "5": 55.0,
+            "6": 70.0,
+            "7": 73.0,
+            "8": 58.0,
+            "9": 66.0,
+            "10": 65.0,
+            "11": 58.0,
+            "12": 77.0,
+            "13": 50.0,
+            "14": 65.0,
+            "15": 71.0,
+            "16": 68.0,
+            "17": 58.0,
+            "18": 57.0,
+            "19": 68.0,
+            "20": 61.0,
+            "21": 65.0,
+            "22": 57.0,
+            "23": 83.0,
+            "24": 58.0,
+            "25": 66.0,
+            "26": 63.0,
+            "27": 80.0,
+            "28": 82.0,
+            "29": 72.0,
+            "30": 71.0,
+            "31": 68.0,
+            "32": 75.0,
+            "33": 85.0,
+            "34": 63.0,
+            "35": 69.0,
+            "36": 58.0,
+            "37": 83.0,
+            "38": 65.0,
+            "39": 68.0,
+            "40": 81.0,
+            "41": 72.0,
+            "42": 76.0,
+            "43": 84.0,
+            "44": 85.0,
+            "45": 85.0,
+            "46": 79.0,
+            "47": 81.0,
+            "48": 68.0,
+            "49": 89.0,
+            "50": 80.0,
+            "51": 70.0,
+            "52": 81.0,
+            "53": 95.0,
+            "54": 101.0,
+            "55": 58.0,
+            "56": 90.0,
+            "57": 83.0,
+            "58": 90.0,
+            "59": 79.0,
+            "60": 84.0,
+            "61": 92.0,
+            "62": 102.0,
+            "63": 78.0,
+            "64": 73.0,
+            "65": 81.0,
+            "66": 88.0,
+            "67": 54.0,
+            "68": 57.0,
+            "69": 72.0,
+            "70": 88.0,
+            "71": 82.0,
+            "72": 64.0,
+            "73": 78.0,
+            "74": 76.0,
+            "75": 70.0,
+            "76": 78.0,
+            "77": 67.0,
+            "78": 86.0,
+            "79": 76.0,
+            "80": 90.0,
+            "81": 92.0,
+            "82": 72.0,
+            "83": 61.0,
+            "84": 65.0,
+            "85": 89.0,
+            "86": 73.0,
+            "87": 89.0,
+            "88": 63.0,
+            "89": 83.0,
+            "90": 72.0,
+            "91": 55.0,
+            "92": 63.0,
+            "93": 47.0,
+            "94": 74.0,
+            "95": 70.0,
+            "96": 73.0,
+            "97": 80.0,
+            "98": 76.0,
+            "99": 68.0,
+            "100": 75.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 546472448.0,
+            "2": 546472448.0,
+            "3": 546472448.0,
+            "4": 546472448.0,
+            "5": 546472448.0,
+            "6": 546472448.0,
+            "7": 546472448.0,
+            "8": 546472448.0,
+            "9": 546472448.0,
+            "10": 546472448.0,
+            "11": 546472448.0,
+            "12": 546472448.0,
+            "13": 546472448.0,
+            "14": 546472448.0,
+            "15": 546472448.0,
+            "16": 546472448.0,
+            "17": 546472448.0,
+            "18": 546472448.0,
+            "19": 546472448.0,
+            "20": 546472448.0,
+            "21": 546472448.0,
+            "22": 546472448.0,
+            "23": 546472448.0,
+            "24": 546472448.0,
+            "25": 546472448.0,
+            "26": 546472448.0,
+            "27": 546472448.0,
+            "28": 546472448.0,
+            "29": 546472448.0,
+            "30": 546472448.0,
+            "31": 546472448.0,
+            "32": 546472448.0,
+            "33": 546472448.0,
+            "34": 546472448.0,
+            "35": 546472448.0,
+            "36": 546472448.0,
+            "37": 546472448.0,
+            "38": 546472448.0,
+            "39": 546472448.0,
+            "40": 546472448.0,
+            "41": 546472448.0,
+            "42": 546472448.0,
+            "43": 546472448.0,
+            "44": 546472448.0,
+            "45": 546472448.0,
+            "46": 546472448.0,
+            "47": 546472448.0,
+            "48": 546472448.0,
+            "49": 546472448.0,
+            "50": 546472448.0,
+            "51": 546472448.0,
+            "52": 546472448.0,
+            "53": 546472448.0,
+            "54": 546472448.0,
+            "55": 546472448.0,
+            "56": 546472448.0,
+            "57": 546472448.0,
+            "58": 546472448.0,
+            "59": 546472448.0,
+            "60": 546472448.0,
+            "61": 546472448.0,
+            "62": 546472448.0,
+            "63": 546472448.0,
+            "64": 546472448.0,
+            "65": 546472448.0,
+            "66": 546472448.0,
+            "67": 546472448.0,
+            "68": 546472448.0,
+            "69": 546472448.0,
+            "70": 546472448.0,
+            "71": 546472448.0,
+            "72": 546472448.0,
+            "73": 546472448.0,
+            "74": 546472448.0,
+            "75": 546472448.0,
+            "76": 546472448.0,
+            "77": 546472448.0,
+            "78": 546472448.0,
+            "79": 546472448.0,
+            "80": 546472448.0,
+            "81": 546472448.0,
+            "82": 546472448.0,
+            "83": 546472448.0,
+            "84": 546472448.0,
+            "85": 546472448.0,
+            "86": 546472448.0,
+            "87": 546472448.0,
+            "88": 546472448.0,
+            "89": 546472448.0,
+            "90": 546472448.0,
+            "91": 546472448.0,
+            "92": 546472448.0,
+            "93": 546472448.0,
+            "94": 546472448.0,
+            "95": 546472448.0,
+            "96": 546472448.0,
+            "97": 546472448.0,
+            "98": 546472448.0,
+            "99": 546472448.0,
+            "100": 546472448.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1713796608.0,
+            "2": 1895967232.0,
+            "3": 1895967232.0,
+            "4": 1895967232.0,
+            "5": 1895967232.0,
+            "6": 1895967232.0,
+            "7": 1895967232.0,
+            "8": 1895967232.0,
+            "9": 1895967232.0,
+            "10": 1895967232.0,
+            "11": 1895967232.0,
+            "12": 1895967232.0,
+            "13": 1895967232.0,
+            "14": 1895967232.0,
+            "15": 1895967232.0,
+            "16": 1895967232.0,
+            "17": 1895967232.0,
+            "18": 1895967232.0,
+            "19": 1895967232.0,
+            "20": 1895967232.0,
+            "21": 1895967232.0,
+            "22": 1895967232.0,
+            "23": 1895967232.0,
+            "24": 1895967232.0,
+            "25": 1895967232.0,
+            "26": 1895967232.0,
+            "27": 1895967232.0,
+            "28": 1895967232.0,
+            "29": 1895967232.0,
+            "30": 1895967232.0,
+            "31": 1895967232.0,
+            "32": 1895967232.0,
+            "33": 1895967232.0,
+            "34": 1895967232.0,
+            "35": 1895967232.0,
+            "36": 1895967232.0,
+            "37": 1895967232.0,
+            "38": 1895967232.0,
+            "39": 1895967232.0,
+            "40": 1895967232.0,
+            "41": 1895967232.0,
+            "42": 1895967232.0,
+            "43": 1895967232.0,
+            "44": 1895967232.0,
+            "45": 1895967232.0,
+            "46": 1895967232.0,
+            "47": 1895967232.0,
+            "48": 1895967232.0,
+            "49": 1895967232.0,
+            "50": 1895967232.0,
+            "51": 1895967232.0,
+            "52": 1895967232.0,
+            "53": 1895967232.0,
+            "54": 1895967232.0,
+            "55": 1895967232.0,
+            "56": 1895967232.0,
+            "57": 1895967232.0,
+            "58": 1895967232.0,
+            "59": 1895967232.0,
+            "60": 1895967232.0,
+            "61": 1895967232.0,
+            "62": 1895967232.0,
+            "63": 1895967232.0,
+            "64": 1895967232.0,
+            "65": 1895967232.0,
+            "66": 1895967232.0,
+            "67": 1895967232.0,
+            "68": 1895967232.0,
+            "69": 1895967232.0,
+            "70": 1895967232.0,
+            "71": 1895967232.0,
+            "72": 1895967232.0,
+            "73": 1895967232.0,
+            "74": 1895967232.0,
+            "75": 1895967232.0,
+            "76": 1895967232.0,
+            "77": 1895967232.0,
+            "78": 1895967232.0,
+            "79": 1895967232.0,
+            "80": 1895967232.0,
+            "81": 1895967232.0,
+            "82": 1895967232.0,
+            "83": 1895967232.0,
+            "84": 1895967232.0,
+            "85": 1895967232.0,
+            "86": 1895967232.0,
+            "87": 1895967232.0,
+            "88": 1895967232.0,
+            "89": 1895967232.0,
+            "90": 1895967232.0,
+            "91": 1895967232.0,
+            "92": 1895967232.0,
+            "93": 1895967232.0,
+            "94": 1895967232.0,
+            "95": 1895967232.0,
+            "96": 1895967232.0,
+            "97": 1895967232.0,
+            "98": 1895967232.0,
+            "99": 1895967232.0,
+            "100": 1895967232.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 11.81196,
+            "2": 0.17008,
+            "3": 0.15523,
+            "4": 0.15249,
+            "5": 0.15434,
+            "6": 0.15515,
+            "7": 0.15378,
+            "8": 0.1528,
+            "9": 0.15287,
+            "10": 0.15479,
+            "11": 0.15442,
+            "12": 0.15952,
+            "13": 0.15843,
+            "14": 0.15559,
+            "15": 0.15333,
+            "16": 0.15363,
+            "17": 0.15594,
+            "18": 0.153,
+            "19": 0.15542,
+            "20": 0.15304,
+            "21": 0.15492,
+            "22": 0.15277,
+            "23": 0.15803,
+            "24": 0.1545,
+            "25": 0.15639,
+            "26": 0.15419,
+            "27": 0.15381,
+            "28": 0.15423,
+            "29": 0.15354,
+            "30": 0.1554,
+            "31": 0.15389,
+            "32": 0.15608,
+            "33": 0.15361,
+            "34": 0.15437,
+            "35": 0.15233,
+            "36": 0.15499,
+            "37": 0.15114,
+            "38": 0.15259,
+            "39": 0.15269,
+            "40": 0.1516,
+            "41": 0.15052,
+            "42": 0.15122,
+            "43": 0.15389,
+            "44": 0.15261,
+            "45": 0.15376,
+            "46": 0.15091,
+            "47": 0.15197,
+            "48": 0.15131,
+            "49": 0.15083,
+            "50": 0.152,
+            "51": 0.15723,
+            "52": 0.15481,
+            "53": 0.15087,
+            "54": 0.15175,
+            "55": 0.15331,
+            "56": 0.15504,
+            "57": 0.15471,
+            "58": 0.1549,
+            "59": 0.15621,
+            "60": 0.1533,
+            "61": 0.15499,
+            "62": 0.15222,
+            "63": 0.15091,
+            "64": 0.1535,
+            "65": 0.15463,
+            "66": 0.15169,
+            "67": 0.15591,
+            "68": 0.15173,
+            "69": 0.1509,
+            "70": 0.15063,
+            "71": 0.15755,
+            "72": 0.1545,
+            "73": 0.15374,
+            "74": 0.15306,
+            "75": 0.15223,
+            "76": 0.15203,
+            "77": 0.15194,
+            "78": 0.15284,
+            "79": 0.15345,
+            "80": 0.15138,
+            "81": 0.15298,
+            "82": 0.15115,
+            "83": 0.15281,
+            "84": 0.1544,
+            "85": 0.15277,
+            "86": 0.15368,
+            "87": 0.15373,
+            "88": 0.15359,
+            "89": 0.15205,
+            "90": 0.1535,
+            "91": 0.15459,
+            "92": 0.15406,
+            "93": 0.15133,
+            "94": 0.1533,
+            "95": 0.15198,
+            "96": 0.15195,
+            "97": 0.1533,
+            "98": 0.15406,
+            "99": 0.1528,
+            "100": 0.15371
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
new file mode 100644
index 00000000000..e4807dd3280
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.92705,
+            "2": 10.92645,
+            "3": 10.91603,
+            "4": 10.9091,
+            "5": 10.92799,
+            "6": 10.93628,
+            "7": 10.90625,
+            "8": 10.92129,
+            "9": 10.90998,
+            "10": 10.90789,
+            "11": 10.89335,
+            "12": 10.92458,
+            "13": 10.91459,
+            "14": 10.92129,
+            "15": 10.88313,
+            "16": 10.87322,
+            "17": 10.84129,
+            "18": 10.87278,
+            "19": 10.85629,
+            "20": 10.77626,
+            "21": 10.7487,
+            "22": 10.63028,
+            "23": 10.75683,
+            "24": 10.65647,
+            "25": 10.59138,
+            "26": 10.65379,
+            "27": 10.6485,
+            "28": 10.59548,
+            "29": 10.60882,
+            "30": 10.39195,
+            "31": 10.15754,
+            "32": 10.49101,
+            "33": 10.47929,
+            "34": 10.24061,
+            "35": 10.29687,
+            "36": 10.2464,
+            "37": 10.35228,
+            "38": 10.20491,
+            "39": 10.4052,
+            "40": 10.0964,
+            "41": 10.15176,
+            "42": 10.22032,
+            "43": 9.85497,
+            "44": 9.96138,
+            "45": 9.84466,
+            "46": 9.83805,
+            "47": 10.13984,
+            "48": 9.85719,
+            "49": 9.53694,
+            "50": 9.9092,
+            "51": 9.84886,
+            "52": 9.74156,
+            "53": 10.06349,
+            "54": 9.94683,
+            "55": 9.87764,
+            "56": 9.6274,
+            "57": 9.47111,
+            "58": 9.8292,
+            "59": 9.58251,
+            "60": 9.49121,
+            "61": 9.69959,
+            "62": 9.97969,
+            "63": 9.37277,
+            "64": 9.77468,
+            "65": 8.94232,
+            "66": 9.69905,
+            "67": 9.3638,
+            "68": 9.78788,
+            "69": 9.78333,
+            "70": 9.72263,
+            "71": 9.60795,
+            "72": 9.5846,
+            "73": 9.48966,
+            "74": 8.9487,
+            "75": 9.41912,
+            "76": 9.08728,
+            "77": 10.06356,
+            "78": 9.72834,
+            "79": 9.37163,
+            "80": 9.40079,
+            "81": 9.47845,
+            "82": 9.69179,
+            "83": 9.30761,
+            "84": 9.41229,
+            "85": 9.61209,
+            "86": 9.07599,
+            "87": 9.5947,
+            "88": 9.74743,
+            "89": 9.60687,
+            "90": 9.81012,
+            "91": 9.3436,
+            "92": 9.36483,
+            "93": 9.0776,
+            "94": 8.83107,
+            "95": 9.51718,
+            "96": 9.5245,
+            "97": 9.31025,
+            "98": 9.67895,
+            "99": 8.88829,
+            "100": 9.40153
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 68.0,
+            "2": 52.0,
+            "3": 60.0,
+            "4": 54.0,
+            "5": 64.0,
+            "6": 64.0,
+            "7": 66.0,
+            "8": 69.0,
+            "9": 75.0,
+            "10": 61.0,
+            "11": 61.0,
+            "12": 71.0,
+            "13": 54.0,
+            "14": 61.0,
+            "15": 58.0,
+            "16": 58.0,
+            "17": 66.0,
+            "18": 56.0,
+            "19": 56.0,
+            "20": 64.0,
+            "21": 55.0,
+            "22": 55.0,
+            "23": 80.0,
+            "24": 69.0,
+            "25": 58.0,
+            "26": 85.0,
+            "27": 67.0,
+            "28": 64.0,
+            "29": 60.0,
+            "30": 85.0,
+            "31": 77.0,
+            "32": 76.0,
+            "33": 85.0,
+            "34": 69.0,
+            "35": 66.0,
+            "36": 68.0,
+            "37": 68.0,
+            "38": 79.0,
+            "39": 69.0,
+            "40": 85.0,
+            "41": 71.0,
+            "42": 86.0,
+            "43": 78.0,
+            "44": 73.0,
+            "45": 84.0,
+            "46": 84.0,
+            "47": 78.0,
+            "48": 77.0,
+            "49": 76.0,
+            "50": 85.0,
+            "51": 70.0,
+            "52": 79.0,
+            "53": 78.0,
+            "54": 83.0,
+            "55": 69.0,
+            "56": 74.0,
+            "57": 76.0,
+            "58": 85.0,
+            "59": 67.0,
+            "60": 67.0,
+            "61": 81.0,
+            "62": 88.0,
+            "63": 76.0,
+            "64": 86.0,
+            "65": 65.0,
+            "66": 85.0,
+            "67": 64.0,
+            "68": 78.0,
+            "69": 67.0,
+            "70": 92.0,
+            "71": 68.0,
+            "72": 65.0,
+            "73": 90.0,
+            "74": 59.0,
+            "75": 51.0,
+            "76": 71.0,
+            "77": 73.0,
+            "78": 95.0,
+            "79": 84.0,
+            "80": 98.0,
+            "81": 65.0,
+            "82": 78.0,
+            "83": 64.0,
+            "84": 76.0,
+            "85": 86.0,
+            "86": 68.0,
+            "87": 85.0,
+            "88": 88.0,
+            "89": 88.0,
+            "90": 83.0,
+            "91": 51.0,
+            "92": 84.0,
+            "93": 69.0,
+            "94": 82.0,
+            "95": 72.0,
+            "96": 66.0,
+            "97": 83.0,
+            "98": 83.0,
+            "99": 65.0,
+            "100": 73.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 487096320.0,
+            "2": 487096320.0,
+            "3": 487096320.0,
+            "4": 487096320.0,
+            "5": 487096320.0,
+            "6": 487096320.0,
+            "7": 487096320.0,
+            "8": 487096320.0,
+            "9": 487096320.0,
+            "10": 487096320.0,
+            "11": 487096320.0,
+            "12": 487096320.0,
+            "13": 487096320.0,
+            "14": 487096320.0,
+            "15": 487096320.0,
+            "16": 487096320.0,
+            "17": 487096320.0,
+            "18": 487096320.0,
+            "19": 487096320.0,
+            "20": 487096320.0,
+            "21": 487096320.0,
+            "22": 487096320.0,
+            "23": 487096320.0,
+            "24": 487096320.0,
+            "25": 487096320.0,
+            "26": 487096320.0,
+            "27": 487096320.0,
+            "28": 487096320.0,
+            "29": 487096320.0,
+            "30": 487096320.0,
+            "31": 487096320.0,
+            "32": 487096320.0,
+            "33": 487096320.0,
+            "34": 487096320.0,
+            "35": 487096320.0,
+            "36": 487096320.0,
+            "37": 487096320.0,
+            "38": 487096320.0,
+            "39": 487096320.0,
+            "40": 487096320.0,
+            "41": 487096320.0,
+            "42": 487096320.0,
+            "43": 487096320.0,
+            "44": 487096320.0,
+            "45": 487096320.0,
+            "46": 487096320.0,
+            "47": 487096320.0,
+            "48": 487096320.0,
+            "49": 487096320.0,
+            "50": 487096320.0,
+            "51": 487096320.0,
+            "52": 487096320.0,
+            "53": 487096320.0,
+            "54": 487096320.0,
+            "55": 487096320.0,
+            "56": 487096320.0,
+            "57": 487096320.0,
+            "58": 487096320.0,
+            "59": 487096320.0,
+            "60": 487096320.0,
+            "61": 487096320.0,
+            "62": 487096320.0,
+            "63": 487096320.0,
+            "64": 487096320.0,
+            "65": 487096320.0,
+            "66": 487096320.0,
+            "67": 487096320.0,
+            "68": 487096320.0,
+            "69": 487096320.0,
+            "70": 487096320.0,
+            "71": 487096320.0,
+            "72": 487096320.0,
+            "73": 487096320.0,
+            "74": 487096320.0,
+            "75": 487096320.0,
+            "76": 487096320.0,
+            "77": 487096320.0,
+            "78": 487096320.0,
+            "79": 487096320.0,
+            "80": 487096320.0,
+            "81": 487096320.0,
+            "82": 487096320.0,
+            "83": 487096320.0,
+            "84": 487096320.0,
+            "85": 487096320.0,
+            "86": 487096320.0,
+            "87": 487096320.0,
+            "88": 487096320.0,
+            "89": 487096320.0,
+            "90": 487096320.0,
+            "91": 487096320.0,
+            "92": 487096320.0,
+            "93": 487096320.0,
+            "94": 487096320.0,
+            "95": 487096320.0,
+            "96": 487096320.0,
+            "97": 487096320.0,
+            "98": 487096320.0,
+            "99": 487096320.0,
+            "100": 487096320.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2158389248.0,
+            "2": 2338462720.0,
+            "3": 2338462720.0,
+            "4": 2339380224.0,
+            "5": 2339380224.0,
+            "6": 2339380224.0,
+            "7": 2339380224.0,
+            "8": 2339380224.0,
+            "9": 2339380224.0,
+            "10": 2339380224.0,
+            "11": 2339380224.0,
+            "12": 2339380224.0,
+            "13": 2339380224.0,
+            "14": 2339380224.0,
+            "15": 2339380224.0,
+            "16": 2339380224.0,
+            "17": 2339380224.0,
+            "18": 2339380224.0,
+            "19": 2339380224.0,
+            "20": 2339380224.0,
+            "21": 2339380224.0,
+            "22": 2339380224.0,
+            "23": 2339380224.0,
+            "24": 2339380224.0,
+            "25": 2339380224.0,
+            "26": 2339380224.0,
+            "27": 2339380224.0,
+            "28": 2339380224.0,
+            "29": 2339380224.0,
+            "30": 2339380224.0,
+            "31": 2339380224.0,
+            "32": 2339380224.0,
+            "33": 2339380224.0,
+            "34": 2339380224.0,
+            "35": 2339380224.0,
+            "36": 2339380224.0,
+            "37": 2339380224.0,
+            "38": 2339380224.0,
+            "39": 2339380224.0,
+            "40": 2339380224.0,
+            "41": 2339380224.0,
+            "42": 2339380224.0,
+            "43": 2339380224.0,
+            "44": 2339380224.0,
+            "45": 2339380224.0,
+            "46": 2339380224.0,
+            "47": 2339380224.0,
+            "48": 2339380224.0,
+            "49": 2339380224.0,
+            "50": 2339380224.0,
+            "51": 2339380224.0,
+            "52": 2339380224.0,
+            "53": 2339380224.0,
+            "54": 2339380224.0,
+            "55": 2339380224.0,
+            "56": 2339380224.0,
+            "57": 2339380224.0,
+            "58": 2339380224.0,
+            "59": 2339380224.0,
+            "60": 2339380224.0,
+            "61": 2339380224.0,
+            "62": 2339380224.0,
+            "63": 2339380224.0,
+            "64": 2339380224.0,
+            "65": 2339380224.0,
+            "66": 2339380224.0,
+            "67": 2339380224.0,
+            "68": 2339380224.0,
+            "69": 2339380224.0,
+            "70": 2339380224.0,
+            "71": 2339380224.0,
+            "72": 2339380224.0,
+            "73": 2339380224.0,
+            "74": 2339380224.0,
+            "75": 2339380224.0,
+            "76": 2339380224.0,
+            "77": 2339380224.0,
+            "78": 2339380224.0,
+            "79": 2339380224.0,
+            "80": 2339380224.0,
+            "81": 2339380224.0,
+            "82": 2339380224.0,
+            "83": 2339380224.0,
+            "84": 2339380224.0,
+            "85": 2339380224.0,
+            "86": 2339380224.0,
+            "87": 2339380224.0,
+            "88": 2339380224.0,
+            "89": 2339380224.0,
+            "90": 2339380224.0,
+            "91": 2339380224.0,
+            "92": 2339380224.0,
+            "93": 2339380224.0,
+            "94": 2339380224.0,
+            "95": 2339380224.0,
+            "96": 2339380224.0,
+            "97": 2339380224.0,
+            "98": 2339380224.0,
+            "99": 2339380224.0,
+            "100": 2339380224.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 11.83126,
+            "2": 0.26341,
+            "3": 0.23434,
+            "4": 0.23414,
+            "5": 0.243,
+            "6": 0.23093,
+            "7": 0.2349,
+            "8": 0.23447,
+            "9": 0.23241,
+            "10": 0.23155,
+            "11": 0.23263,
+            "12": 0.23115,
+            "13": 0.23168,
+            "14": 0.23309,
+            "15": 0.23146,
+            "16": 0.23206,
+            "17": 0.23373,
+            "18": 0.23689,
+            "19": 0.23192,
+            "20": 0.23083,
+            "21": 0.23324,
+            "22": 0.23339,
+            "23": 0.2311,
+            "24": 0.23003,
+            "25": 0.23092,
+            "26": 0.23001,
+            "27": 0.23221,
+            "28": 0.22984,
+            "29": 0.23347,
+            "30": 0.23349,
+            "31": 0.44414,
+            "32": 0.22811,
+            "33": 0.22989,
+            "34": 0.22796,
+            "35": 0.22895,
+            "36": 0.22701,
+            "37": 0.22772,
+            "38": 0.22966,
+            "39": 0.22791,
+            "40": 0.22768,
+            "41": 0.22809,
+            "42": 0.23136,
+            "43": 0.22907,
+            "44": 0.22647,
+            "45": 0.22963,
+            "46": 0.23039,
+            "47": 0.22951,
+            "48": 0.2281,
+            "49": 0.22875,
+            "50": 0.22865,
+            "51": 0.22909,
+            "52": 0.22123,
+            "53": 0.22076,
+            "54": 0.22154,
+            "55": 0.2222,
+            "56": 0.39897,
+            "57": 0.22058,
+            "58": 0.22118,
+            "59": 0.22849,
+            "60": 0.22871,
+            "61": 0.2225,
+            "62": 0.22208,
+            "63": 0.22298,
+            "64": 0.22377,
+            "65": 0.22446,
+            "66": 0.22435,
+            "67": 0.22221,
+            "68": 0.22386,
+            "69": 0.22616,
+            "70": 0.2232,
+            "71": 0.22301,
+            "72": 0.42061,
+            "73": 0.22703,
+            "74": 0.22271,
+            "75": 0.22204,
+            "76": 0.22282,
+            "77": 0.22517,
+            "78": 0.22207,
+            "79": 0.24309,
+            "80": 0.24317,
+            "81": 0.25879,
+            "82": 0.22268,
+            "83": 0.22204,
+            "84": 0.2228,
+            "85": 0.22447,
+            "86": 0.22388,
+            "87": 0.22291,
+            "88": 0.22259,
+            "89": 0.22341,
+            "90": 0.22502,
+            "91": 0.22225,
+            "92": 0.2218,
+            "93": 0.22176,
+            "94": 0.22225,
+            "95": 0.22471,
+            "96": 0.22277,
+            "97": 0.22023,
+            "98": 0.22426,
+            "99": 0.22626,
+            "100": 0.22111
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
new file mode 100644
index 00000000000..7a6cb6fa053
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.92705,
+            "2": 10.92645,
+            "3": 10.91603,
+            "4": 10.9091,
+            "5": 10.92799,
+            "6": 10.93628,
+            "7": 10.90625,
+            "8": 10.92129,
+            "9": 10.90998,
+            "10": 10.90789,
+            "11": 10.89335,
+            "12": 10.92458,
+            "13": 10.91459,
+            "14": 10.92129,
+            "15": 10.88313,
+            "16": 10.87322,
+            "17": 10.84129,
+            "18": 10.87278,
+            "19": 10.85629,
+            "20": 10.77626,
+            "21": 10.7487,
+            "22": 10.63028,
+            "23": 10.75683,
+            "24": 10.65647,
+            "25": 10.59138,
+            "26": 10.65379,
+            "27": 10.6485,
+            "28": 10.59548,
+            "29": 10.60882,
+            "30": 10.39195,
+            "31": 10.15754,
+            "32": 10.49101,
+            "33": 10.47929,
+            "34": 10.24061,
+            "35": 10.29687,
+            "36": 10.2464,
+            "37": 10.35228,
+            "38": 10.20491,
+            "39": 10.4052,
+            "40": 10.0964,
+            "41": 10.15176,
+            "42": 10.22032,
+            "43": 9.85497,
+            "44": 9.96138,
+            "45": 9.84466,
+            "46": 9.83805,
+            "47": 10.13984,
+            "48": 9.85719,
+            "49": 9.53694,
+            "50": 9.9092,
+            "51": 9.84886,
+            "52": 9.74156,
+            "53": 10.06349,
+            "54": 9.94683,
+            "55": 9.87764,
+            "56": 9.6274,
+            "57": 9.47111,
+            "58": 9.8292,
+            "59": 9.58251,
+            "60": 9.49121,
+            "61": 9.69959,
+            "62": 9.97969,
+            "63": 9.37277,
+            "64": 9.77468,
+            "65": 8.94232,
+            "66": 9.69905,
+            "67": 9.3638,
+            "68": 9.78788,
+            "69": 9.78333,
+            "70": 9.72263,
+            "71": 9.60795,
+            "72": 9.5846,
+            "73": 9.48966,
+            "74": 8.9487,
+            "75": 9.41912,
+            "76": 9.08728,
+            "77": 10.06356,
+            "78": 9.72834,
+            "79": 9.37163,
+            "80": 9.40079,
+            "81": 9.47845,
+            "82": 9.69179,
+            "83": 9.30761,
+            "84": 9.41229,
+            "85": 9.61209,
+            "86": 9.07599,
+            "87": 9.5947,
+            "88": 9.74743,
+            "89": 9.60687,
+            "90": 9.81012,
+            "91": 9.3436,
+            "92": 9.36483,
+            "93": 9.0776,
+            "94": 8.83107,
+            "95": 9.51718,
+            "96": 9.5245,
+            "97": 9.31025,
+            "98": 9.67895,
+            "99": 8.88829,
+            "100": 9.40153
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 68.0,
+            "2": 52.0,
+            "3": 60.0,
+            "4": 54.0,
+            "5": 64.0,
+            "6": 64.0,
+            "7": 66.0,
+            "8": 69.0,
+            "9": 75.0,
+            "10": 61.0,
+            "11": 61.0,
+            "12": 71.0,
+            "13": 54.0,
+            "14": 61.0,
+            "15": 58.0,
+            "16": 58.0,
+            "17": 66.0,
+            "18": 56.0,
+            "19": 56.0,
+            "20": 64.0,
+            "21": 55.0,
+            "22": 55.0,
+            "23": 80.0,
+            "24": 69.0,
+            "25": 58.0,
+            "26": 85.0,
+            "27": 67.0,
+            "28": 64.0,
+            "29": 60.0,
+            "30": 85.0,
+            "31": 77.0,
+            "32": 76.0,
+            "33": 85.0,
+            "34": 69.0,
+            "35": 66.0,
+            "36": 68.0,
+            "37": 68.0,
+            "38": 79.0,
+            "39": 69.0,
+            "40": 85.0,
+            "41": 71.0,
+            "42": 86.0,
+            "43": 78.0,
+            "44": 73.0,
+            "45": 84.0,
+            "46": 84.0,
+            "47": 78.0,
+            "48": 77.0,
+            "49": 76.0,
+            "50": 85.0,
+            "51": 70.0,
+            "52": 79.0,
+            "53": 78.0,
+            "54": 83.0,
+            "55": 69.0,
+            "56": 74.0,
+            "57": 76.0,
+            "58": 85.0,
+            "59": 67.0,
+            "60": 67.0,
+            "61": 81.0,
+            "62": 88.0,
+            "63": 76.0,
+            "64": 86.0,
+            "65": 65.0,
+            "66": 85.0,
+            "67": 64.0,
+            "68": 78.0,
+            "69": 67.0,
+            "70": 92.0,
+            "71": 68.0,
+            "72": 65.0,
+            "73": 90.0,
+            "74": 59.0,
+            "75": 51.0,
+            "76": 71.0,
+            "77": 73.0,
+            "78": 95.0,
+            "79": 84.0,
+            "80": 98.0,
+            "81": 65.0,
+            "82": 78.0,
+            "83": 64.0,
+            "84": 76.0,
+            "85": 86.0,
+            "86": 68.0,
+            "87": 85.0,
+            "88": 88.0,
+            "89": 88.0,
+            "90": 83.0,
+            "91": 51.0,
+            "92": 84.0,
+            "93": 69.0,
+            "94": 82.0,
+            "95": 72.0,
+            "96": 66.0,
+            "97": 83.0,
+            "98": 83.0,
+            "99": 65.0,
+            "100": 73.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 488144896.0,
+            "2": 488144896.0,
+            "3": 488144896.0,
+            "4": 488144896.0,
+            "5": 488144896.0,
+            "6": 488144896.0,
+            "7": 488144896.0,
+            "8": 488144896.0,
+            "9": 488144896.0,
+            "10": 488144896.0,
+            "11": 488144896.0,
+            "12": 488144896.0,
+            "13": 488144896.0,
+            "14": 488144896.0,
+            "15": 488144896.0,
+            "16": 488144896.0,
+            "17": 488144896.0,
+            "18": 488144896.0,
+            "19": 488144896.0,
+            "20": 488144896.0,
+            "21": 488144896.0,
+            "22": 488144896.0,
+            "23": 488144896.0,
+            "24": 488144896.0,
+            "25": 488144896.0,
+            "26": 488144896.0,
+            "27": 488144896.0,
+            "28": 488144896.0,
+            "29": 488144896.0,
+            "30": 488144896.0,
+            "31": 488144896.0,
+            "32": 488144896.0,
+            "33": 488144896.0,
+            "34": 488144896.0,
+            "35": 488144896.0,
+            "36": 488144896.0,
+            "37": 488144896.0,
+            "38": 488144896.0,
+            "39": 488144896.0,
+            "40": 488144896.0,
+            "41": 488144896.0,
+            "42": 488144896.0,
+            "43": 488144896.0,
+            "44": 488144896.0,
+            "45": 488144896.0,
+            "46": 488144896.0,
+            "47": 488144896.0,
+            "48": 488144896.0,
+            "49": 488144896.0,
+            "50": 488144896.0,
+            "51": 488144896.0,
+            "52": 488144896.0,
+            "53": 488144896.0,
+            "54": 488144896.0,
+            "55": 488144896.0,
+            "56": 488144896.0,
+            "57": 488144896.0,
+            "58": 488144896.0,
+            "59": 488144896.0,
+            "60": 488144896.0,
+            "61": 488144896.0,
+            "62": 488144896.0,
+            "63": 488144896.0,
+            "64": 488144896.0,
+            "65": 488144896.0,
+            "66": 488144896.0,
+            "67": 488144896.0,
+            "68": 488144896.0,
+            "69": 488144896.0,
+            "70": 488144896.0,
+            "71": 488144896.0,
+            "72": 488144896.0,
+            "73": 488144896.0,
+            "74": 488144896.0,
+            "75": 488144896.0,
+            "76": 488144896.0,
+            "77": 488144896.0,
+            "78": 488144896.0,
+            "79": 488144896.0,
+            "80": 488144896.0,
+            "81": 488144896.0,
+            "82": 488144896.0,
+            "83": 488144896.0,
+            "84": 488144896.0,
+            "85": 488144896.0,
+            "86": 488144896.0,
+            "87": 488144896.0,
+            "88": 488144896.0,
+            "89": 488144896.0,
+            "90": 488144896.0,
+            "91": 488144896.0,
+            "92": 488144896.0,
+            "93": 488144896.0,
+            "94": 488144896.0,
+            "95": 488144896.0,
+            "96": 488144896.0,
+            "97": 488144896.0,
+            "98": 488144896.0,
+            "99": 488144896.0,
+            "100": 488144896.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2158389248.0,
+            "2": 2340559872.0,
+            "3": 2340559872.0,
+            "4": 2340559872.0,
+            "5": 2340559872.0,
+            "6": 2340559872.0,
+            "7": 2340559872.0,
+            "8": 2340559872.0,
+            "9": 2340559872.0,
+            "10": 2340559872.0,
+            "11": 2340559872.0,
+            "12": 2340559872.0,
+            "13": 2340559872.0,
+            "14": 2340559872.0,
+            "15": 2340559872.0,
+            "16": 2340559872.0,
+            "17": 2340559872.0,
+            "18": 2340559872.0,
+            "19": 2340559872.0,
+            "20": 2340559872.0,
+            "21": 2340559872.0,
+            "22": 2340559872.0,
+            "23": 2340559872.0,
+            "24": 2340559872.0,
+            "25": 2340559872.0,
+            "26": 2340559872.0,
+            "27": 2340559872.0,
+            "28": 2340559872.0,
+            "29": 2340559872.0,
+            "30": 2340559872.0,
+            "31": 2340559872.0,
+            "32": 2340559872.0,
+            "33": 2340559872.0,
+            "34": 2340559872.0,
+            "35": 2340559872.0,
+            "36": 2340559872.0,
+            "37": 2340559872.0,
+            "38": 2340559872.0,
+            "39": 2340559872.0,
+            "40": 2340559872.0,
+            "41": 2340559872.0,
+            "42": 2342132736.0,
+            "43": 2342132736.0,
+            "44": 2342132736.0,
+            "45": 2342132736.0,
+            "46": 2342132736.0,
+            "47": 2342132736.0,
+            "48": 2342132736.0,
+            "49": 2342132736.0,
+            "50": 2342132736.0,
+            "51": 2342132736.0,
+            "52": 2342132736.0,
+            "53": 2342132736.0,
+            "54": 2342132736.0,
+            "55": 2342132736.0,
+            "56": 2342132736.0,
+            "57": 2342132736.0,
+            "58": 2342132736.0,
+            "59": 2342132736.0,
+            "60": 2342132736.0,
+            "61": 2342132736.0,
+            "62": 2342132736.0,
+            "63": 2342132736.0,
+            "64": 2342132736.0,
+            "65": 2342132736.0,
+            "66": 2342132736.0,
+            "67": 2342132736.0,
+            "68": 2342132736.0,
+            "69": 2342132736.0,
+            "70": 2342132736.0,
+            "71": 2342132736.0,
+            "72": 2342132736.0,
+            "73": 2342132736.0,
+            "74": 2342132736.0,
+            "75": 2342132736.0,
+            "76": 2342132736.0,
+            "77": 2342132736.0,
+            "78": 2342132736.0,
+            "79": 2342132736.0,
+            "80": 2342132736.0,
+            "81": 2342132736.0,
+            "82": 2342132736.0,
+            "83": 2342132736.0,
+            "84": 2342132736.0,
+            "85": 2342132736.0,
+            "86": 2342132736.0,
+            "87": 2342132736.0,
+            "88": 2342132736.0,
+            "89": 2342132736.0,
+            "90": 2342132736.0,
+            "91": 2342132736.0,
+            "92": 2342132736.0,
+            "93": 2342132736.0,
+            "94": 2342132736.0,
+            "95": 2342132736.0,
+            "96": 2342132736.0,
+            "97": 2342132736.0,
+            "98": 2342132736.0,
+            "99": 2342132736.0,
+            "100": 2342132736.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 11.5603,
+            "2": 0.27395,
+            "3": 0.25016,
+            "4": 0.23465,
+            "5": 0.23169,
+            "6": 0.22889,
+            "7": 0.23765,
+            "8": 0.22887,
+            "9": 0.23381,
+            "10": 0.2266,
+            "11": 0.23432,
+            "12": 0.22287,
+            "13": 0.23838,
+            "14": 0.22383,
+            "15": 0.22359,
+            "16": 0.22462,
+            "17": 0.22449,
+            "18": 0.22452,
+            "19": 0.22358,
+            "20": 0.22653,
+            "21": 0.23567,
+            "22": 0.22469,
+            "23": 0.22426,
+            "24": 0.22314,
+            "25": 0.22088,
+            "26": 0.22435,
+            "27": 0.22371,
+            "28": 0.22374,
+            "29": 0.22621,
+            "30": 0.22269,
+            "31": 0.22968,
+            "32": 0.22354,
+            "33": 0.21974,
+            "34": 0.21973,
+            "35": 0.22162,
+            "36": 0.21927,
+            "37": 0.21792,
+            "38": 0.22161,
+            "39": 0.218,
+            "40": 0.2218,
+            "41": 0.22011,
+            "42": 0.21906,
+            "43": 0.45489,
+            "44": 0.21843,
+            "45": 0.21693,
+            "46": 0.22243,
+            "47": 0.21818,
+            "48": 0.22186,
+            "49": 0.21947,
+            "50": 0.21913,
+            "51": 0.23038,
+            "52": 0.43735,
+            "53": 0.22226,
+            "54": 0.22253,
+            "55": 0.22038,
+            "56": 0.22255,
+            "57": 0.22026,
+            "58": 0.22445,
+            "59": 0.22812,
+            "60": 0.22248,
+            "61": 0.22206,
+            "62": 0.22823,
+            "63": 0.22874,
+            "64": 0.22255,
+            "65": 0.22446,
+            "66": 0.2261,
+            "67": 0.22601,
+            "68": 0.2276,
+            "69": 0.22081,
+            "70": 0.22481,
+            "71": 0.22176,
+            "72": 0.22629,
+            "73": 0.22287,
+            "74": 0.22171,
+            "75": 0.23035,
+            "76": 0.23044,
+            "77": 0.23294,
+            "78": 0.22982,
+            "79": 0.23205,
+            "80": 0.23206,
+            "81": 0.23504,
+            "82": 0.22297,
+            "83": 0.22323,
+            "84": 0.21927,
+            "85": 0.22167,
+            "86": 0.22409,
+            "87": 0.2216,
+            "88": 0.22052,
+            "89": 0.22173,
+            "90": 0.22337,
+            "91": 0.21893,
+            "92": 0.22093,
+            "93": 0.21931,
+            "94": 0.2206,
+            "95": 0.22306,
+            "96": 0.2207,
+            "97": 0.22191,
+            "98": 0.22163,
+            "99": 0.22443,
+            "100": 0.21867
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
index 17196f707fe..3a9edd7e4f6 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
@@ -2,141 +2,536 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 10.85949,
+            "2": 10.85553,
+            "3": 10.86543,
+            "4": 10.84553,
             "5": 10.88346,
+            "6": 10.89431,
+            "7": 10.87067,
+            "8": 10.86979,
+            "9": 10.86918,
             "10": 10.83886,
+            "11": 10.8943,
+            "12": 10.87983,
+            "13": 10.87985,
+            "14": 10.90321,
             "15": 10.84052,
+            "16": 10.83787,
+            "17": 10.80669,
+            "18": 10.83026,
+            "19": 10.82261,
             "20": 10.73193,
+            "21": 10.70748,
+            "22": 10.56005,
+            "23": 10.72399,
+            "24": 10.61114,
             "25": 10.54813,
+            "26": 10.61329,
+            "27": 10.63053,
+            "28": 10.56646,
+            "29": 10.59668,
             "30": 10.37137,
+            "31": 10.11725,
+            "32": 10.46127,
+            "33": 10.45249,
+            "34": 10.2169,
             "35": 10.27172,
+            "36": 10.23119,
+            "37": 10.34809,
+            "38": 10.1884,
+            "39": 10.41044,
             "40": 10.09425,
+            "41": 10.14707,
+            "42": 10.21242,
+            "43": 9.84105,
+            "44": 9.95918,
             "45": 9.84079,
+            "46": 9.82479,
+            "47": 10.13878,
+            "48": 9.85831,
+            "49": 9.54705,
             "50": 9.90875,
+            "51": 9.8558,
+            "52": 9.75237,
+            "53": 10.07589,
+            "54": 9.95688,
             "55": 9.88203,
+            "56": 9.6313,
+            "57": 9.48649,
+            "58": 9.83109,
+            "59": 9.58897,
             "60": 9.50643,
+            "61": 9.70363,
+            "62": 9.98286,
+            "63": 9.38302,
+            "64": 9.77901,
             "65": 8.95166,
+            "66": 9.70158,
+            "67": 9.37203,
+            "68": 9.78849,
+            "69": 9.79851,
             "70": 9.74737,
+            "71": 9.61908,
+            "72": 9.58502,
+            "73": 9.49721,
+            "74": 8.93927,
             "75": 9.42703,
+            "76": 9.0802,
+            "77": 10.06567,
+            "78": 9.72893,
+            "79": 9.3776,
             "80": 9.40982,
+            "81": 9.47976,
+            "82": 9.7018,
+            "83": 9.30612,
+            "84": 9.4209,
             "85": 9.61371,
+            "86": 9.07649,
+            "87": 9.5945,
+            "88": 9.75068,
+            "89": 9.60238,
             "90": 9.81898,
+            "91": 9.33894,
+            "92": 9.35716,
+            "93": 9.07879,
+            "94": 8.83503,
             "95": 9.52172,
+            "96": 9.53003,
+            "97": 9.31306,
+            "98": 9.67783,
+            "99": 8.89058,
             "100": 9.39725
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 1688.0,
+            "2": 1806.0,
+            "3": 1675.0,
+            "4": 1842.0,
             "5": 1909.0,
+            "6": 1908.0,
+            "7": 1783.0,
+            "8": 1611.0,
+            "9": 1753.0,
             "10": 1457.0,
+            "11": 1880.0,
+            "12": 1683.0,
+            "13": 1907.0,
+            "14": 1733.0,
             "15": 1930.0,
+            "16": 1840.0,
+            "17": 1892.0,
+            "18": 1650.0,
+            "19": 1790.0,
             "20": 1596.0,
+            "21": 1765.0,
+            "22": 1616.0,
+            "23": 1974.0,
+            "24": 1621.0,
             "25": 1557.0,
+            "26": 1745.0,
+            "27": 1722.0,
+            "28": 1976.0,
+            "29": 2068.0,
             "30": 1860.0,
+            "31": 1536.0,
+            "32": 1883.0,
+            "33": 2071.0,
+            "34": 1894.0,
             "35": 1902.0,
+            "36": 1885.0,
+            "37": 2231.0,
+            "38": 2129.0,
+            "39": 2333.0,
             "40": 2207.0,
+            "41": 2193.0,
+            "42": 2322.0,
+            "43": 2015.0,
+            "44": 2089.0,
             "45": 2095.0,
+            "46": 2392.0,
+            "47": 2430.0,
+            "48": 2414.0,
+            "49": 2340.0,
             "50": 2416.0,
+            "51": 2613.0,
+            "52": 2538.0,
+            "53": 2792.0,
+            "54": 2801.0,
             "55": 2216.0,
+            "56": 2858.0,
+            "57": 2381.0,
+            "58": 2854.0,
+            "59": 2787.0,
             "60": 2457.0,
+            "61": 2941.0,
+            "62": 2543.0,
+            "63": 2408.0,
+            "64": 2968.0,
             "65": 2472.0,
+            "66": 2977.0,
+            "67": 2839.0,
+            "68": 2775.0,
+            "69": 2832.0,
             "70": 3057.0,
+            "71": 2909.0,
+            "72": 2421.0,
+            "73": 2982.0,
+            "74": 1922.0,
             "75": 2474.0,
+            "76": 3059.0,
+            "77": 3177.0,
+            "78": 3067.0,
+            "79": 3052.0,
             "80": 3338.0,
+            "81": 3644.0,
+            "82": 3234.0,
+            "83": 2798.0,
+            "84": 3196.0,
             "85": 3324.0,
+            "86": 2855.0,
+            "87": 3820.0,
+            "88": 2962.0,
+            "89": 3379.0,
             "90": 3096.0,
+            "91": 2857.0,
+            "92": 3077.0,
+            "93": 2693.0,
+            "94": 3312.0,
             "95": 3399.0,
+            "96": 3378.0,
+            "97": 3030.0,
+            "98": 3619.0,
+            "99": 3160.0,
             "100": 3128.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 517505536.0,
+            "2": 517505536.0,
+            "3": 517505536.0,
+            "4": 517505536.0,
             "5": 517505536.0,
+            "6": 517505536.0,
+            "7": 517505536.0,
+            "8": 517505536.0,
+            "9": 517505536.0,
             "10": 517505536.0,
+            "11": 517505536.0,
+            "12": 517505536.0,
+            "13": 517505536.0,
+            "14": 517505536.0,
             "15": 517505536.0,
+            "16": 517505536.0,
+            "17": 517505536.0,
+            "18": 517505536.0,
+            "19": 517505536.0,
             "20": 517505536.0,
+            "21": 517505536.0,
+            "22": 517505536.0,
+            "23": 517505536.0,
+            "24": 517505536.0,
             "25": 517505536.0,
+            "26": 517505536.0,
+            "27": 517505536.0,
+            "28": 517505536.0,
+            "29": 517505536.0,
             "30": 517505536.0,
+            "31": 517505536.0,
+            "32": 517505536.0,
+            "33": 517505536.0,
+            "34": 517505536.0,
             "35": 517505536.0,
+            "36": 517505536.0,
+            "37": 517505536.0,
+            "38": 517505536.0,
+            "39": 517505536.0,
             "40": 517505536.0,
+            "41": 517505536.0,
+            "42": 517505536.0,
+            "43": 517505536.0,
+            "44": 517505536.0,
             "45": 517505536.0,
+            "46": 517505536.0,
+            "47": 517505536.0,
+            "48": 517505536.0,
+            "49": 517505536.0,
             "50": 517505536.0,
+            "51": 517505536.0,
+            "52": 517505536.0,
+            "53": 517505536.0,
+            "54": 517505536.0,
             "55": 517505536.0,
+            "56": 517505536.0,
+            "57": 517505536.0,
+            "58": 517505536.0,
+            "59": 517505536.0,
             "60": 517505536.0,
+            "61": 517505536.0,
+            "62": 517505536.0,
+            "63": 517505536.0,
+            "64": 517505536.0,
             "65": 517505536.0,
+            "66": 517505536.0,
+            "67": 517505536.0,
+            "68": 517505536.0,
+            "69": 517505536.0,
             "70": 517505536.0,
+            "71": 517505536.0,
+            "72": 517505536.0,
+            "73": 517505536.0,
+            "74": 517505536.0,
             "75": 517505536.0,
+            "76": 517505536.0,
+            "77": 517505536.0,
+            "78": 517505536.0,
+            "79": 517505536.0,
             "80": 517505536.0,
+            "81": 517505536.0,
+            "82": 517505536.0,
+            "83": 517505536.0,
+            "84": 517505536.0,
             "85": 517505536.0,
+            "86": 517505536.0,
+            "87": 517505536.0,
+            "88": 517505536.0,
+            "89": 517505536.0,
             "90": 517505536.0,
+            "91": 517505536.0,
+            "92": 517505536.0,
+            "93": 517505536.0,
+            "94": 517505536.0,
             "95": 517505536.0,
+            "96": 517505536.0,
+            "97": 517505536.0,
+            "98": 517505536.0,
+            "99": 517505536.0,
             "100": 517505536.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 1246524928.0,
+            "2": 1428695552.0,
+            "3": 1428695552.0,
+            "4": 1428695552.0,
             "5": 1428695552.0,
+            "6": 1428695552.0,
+            "7": 1428695552.0,
+            "8": 1428695552.0,
+            "9": 1428695552.0,
             "10": 1428695552.0,
+            "11": 1428695552.0,
+            "12": 1428695552.0,
+            "13": 1428695552.0,
+            "14": 1428695552.0,
             "15": 1428695552.0,
+            "16": 1428695552.0,
+            "17": 1428695552.0,
+            "18": 1428695552.0,
+            "19": 1428695552.0,
             "20": 1428695552.0,
+            "21": 1428695552.0,
+            "22": 1428695552.0,
+            "23": 1428695552.0,
+            "24": 1428695552.0,
             "25": 1428695552.0,
+            "26": 1428695552.0,
+            "27": 1428695552.0,
+            "28": 1428695552.0,
+            "29": 1428695552.0,
             "30": 1428695552.0,
+            "31": 1428695552.0,
+            "32": 1428695552.0,
+            "33": 1428695552.0,
+            "34": 1428695552.0,
             "35": 1428695552.0,
+            "36": 1428695552.0,
+            "37": 1428695552.0,
+            "38": 1428695552.0,
+            "39": 1428695552.0,
             "40": 1428695552.0,
+            "41": 1428695552.0,
+            "42": 1428695552.0,
+            "43": 1428695552.0,
+            "44": 1428695552.0,
             "45": 1428695552.0,
+            "46": 1428695552.0,
+            "47": 1428695552.0,
+            "48": 1428695552.0,
+            "49": 1428695552.0,
             "50": 1428695552.0,
+            "51": 1428695552.0,
+            "52": 1428695552.0,
+            "53": 1428695552.0,
+            "54": 1428695552.0,
             "55": 1428695552.0,
+            "56": 1428695552.0,
+            "57": 1428695552.0,
+            "58": 1428695552.0,
+            "59": 1428695552.0,
             "60": 1428695552.0,
+            "61": 1428695552.0,
+            "62": 1428695552.0,
+            "63": 1428695552.0,
+            "64": 1428695552.0,
             "65": 1428695552.0,
+            "66": 1428695552.0,
+            "67": 1428695552.0,
+            "68": 1428695552.0,
+            "69": 1428695552.0,
             "70": 1428695552.0,
+            "71": 1428695552.0,
+            "72": 1428695552.0,
+            "73": 1428695552.0,
+            "74": 1428695552.0,
             "75": 1428695552.0,
+            "76": 1428695552.0,
+            "77": 1428695552.0,
+            "78": 1428695552.0,
+            "79": 1428695552.0,
             "80": 1428695552.0,
+            "81": 1428695552.0,
+            "82": 1428695552.0,
+            "83": 1428695552.0,
+            "84": 1428695552.0,
             "85": 1428695552.0,
+            "86": 1428695552.0,
+            "87": 1428695552.0,
+            "88": 1428695552.0,
+            "89": 1428695552.0,
             "90": 1428695552.0,
+            "91": 1428695552.0,
+            "92": 1428695552.0,
+            "93": 1428695552.0,
+            "94": 1428695552.0,
             "95": 1428695552.0,
+            "96": 1428695552.0,
+            "97": 1428695552.0,
+            "98": 1428695552.0,
+            "99": 1428695552.0,
             "100": 1428695552.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 12.11861,
-            "5": 0.13752,
-            "10": 0.1366,
-            "15": 0.13654,
-            "20": 0.13695,
-            "25": 0.13215,
-            "30": 0.13388,
-            "35": 0.13399,
-            "40": 0.13296,
-            "45": 0.1338,
-            "50": 0.1346,
-            "55": 0.14239,
-            "60": 0.13127,
-            "65": 0.1338,
-            "70": 0.1338,
-            "75": 0.13194,
-            "80": 0.13347,
-            "85": 0.13297,
-            "90": 0.13212,
-            "95": 0.13413,
-            "100": 0.14016
+            "1": 11.73094,
+            "2": 0.19559,
+            "3": 0.1642,
+            "4": 0.1606,
+            "5": 0.15484,
+            "6": 0.15429,
+            "7": 0.15295,
+            "8": 0.15498,
+            "9": 0.15721,
+            "10": 0.1545,
+            "11": 0.15341,
+            "12": 0.15604,
+            "13": 0.15488,
+            "14": 0.15754,
+            "15": 0.15556,
+            "16": 0.15659,
+            "17": 0.15948,
+            "18": 0.15489,
+            "19": 0.15826,
+            "20": 0.15555,
+            "21": 0.15514,
+            "22": 0.15475,
+            "23": 0.15663,
+            "24": 0.15606,
+            "25": 0.15661,
+            "26": 0.15687,
+            "27": 0.15374,
+            "28": 0.15858,
+            "29": 0.15645,
+            "30": 0.15976,
+            "31": 0.1537,
+            "32": 0.15299,
+            "33": 0.1537,
+            "34": 0.15989,
+            "35": 0.16418,
+            "36": 0.16174,
+            "37": 0.15863,
+            "38": 0.15554,
+            "39": 0.14997,
+            "40": 0.15226,
+            "41": 0.14966,
+            "42": 0.15127,
+            "43": 0.15105,
+            "44": 0.15192,
+            "45": 0.15376,
+            "46": 0.15087,
+            "47": 0.15236,
+            "48": 0.15124,
+            "49": 0.15141,
+            "50": 0.15372,
+            "51": 0.17295,
+            "52": 0.16619,
+            "53": 0.16729,
+            "54": 0.15813,
+            "55": 0.15026,
+            "56": 0.15186,
+            "57": 0.1532,
+            "58": 0.1539,
+            "59": 0.153,
+            "60": 0.15346,
+            "61": 0.15406,
+            "62": 0.15229,
+            "63": 0.15251,
+            "64": 0.15279,
+            "65": 0.15341,
+            "66": 0.15398,
+            "67": 0.15765,
+            "68": 0.15411,
+            "69": 0.15465,
+            "70": 0.15275,
+            "71": 0.15486,
+            "72": 0.15324,
+            "73": 0.1548,
+            "74": 0.15612,
+            "75": 0.15592,
+            "76": 0.15644,
+            "77": 0.15832,
+            "78": 0.15223,
+            "79": 0.1545,
+            "80": 0.15466,
+            "81": 0.1518,
+            "82": 0.15396,
+            "83": 0.15168,
+            "84": 0.15232,
+            "85": 0.15293,
+            "86": 0.15384,
+            "87": 0.15453,
+            "88": 0.15446,
+            "89": 0.15333,
+            "90": 0.1576,
+            "91": 0.15805,
+            "92": 0.15474,
+            "93": 0.15345,
+            "94": 0.15146,
+            "95": 0.15371,
+            "96": 0.15549,
+            "97": 0.15452,
+            "98": 0.15437,
+            "99": 0.15398,
+            "100": 0.15413
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..39079566d74
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.85949,
+            "2": 10.85553,
+            "3": 10.86543,
+            "4": 10.84553,
+            "5": 10.88346,
+            "6": 10.89431,
+            "7": 10.87067,
+            "8": 10.86979,
+            "9": 10.86918,
+            "10": 10.83886,
+            "11": 10.8943,
+            "12": 10.87983,
+            "13": 10.87985,
+            "14": 10.90321,
+            "15": 10.84052,
+            "16": 10.83787,
+            "17": 10.80669,
+            "18": 10.83026,
+            "19": 10.82261,
+            "20": 10.73193,
+            "21": 10.70748,
+            "22": 10.56005,
+            "23": 10.72399,
+            "24": 10.61114,
+            "25": 10.54813,
+            "26": 10.61329,
+            "27": 10.63053,
+            "28": 10.56646,
+            "29": 10.59668,
+            "30": 10.37137,
+            "31": 10.11725,
+            "32": 10.46127,
+            "33": 10.45249,
+            "34": 10.2169,
+            "35": 10.27172,
+            "36": 10.23119,
+            "37": 10.34809,
+            "38": 10.1884,
+            "39": 10.41044,
+            "40": 10.09425,
+            "41": 10.14707,
+            "42": 10.21242,
+            "43": 9.84105,
+            "44": 9.95918,
+            "45": 9.84079,
+            "46": 9.82479,
+            "47": 10.13878,
+            "48": 9.85831,
+            "49": 9.54705,
+            "50": 9.90875,
+            "51": 9.8558,
+            "52": 9.75237,
+            "53": 10.07589,
+            "54": 9.95688,
+            "55": 9.88203,
+            "56": 9.6313,
+            "57": 9.48649,
+            "58": 9.83109,
+            "59": 9.58897,
+            "60": 9.50643,
+            "61": 9.70363,
+            "62": 9.98286,
+            "63": 9.38302,
+            "64": 9.77901,
+            "65": 8.95166,
+            "66": 9.70158,
+            "67": 9.37203,
+            "68": 9.78849,
+            "69": 9.79851,
+            "70": 9.74737,
+            "71": 9.61908,
+            "72": 9.58502,
+            "73": 9.49721,
+            "74": 8.93927,
+            "75": 9.42703,
+            "76": 9.0802,
+            "77": 10.06567,
+            "78": 9.72893,
+            "79": 9.3776,
+            "80": 9.40982,
+            "81": 9.47976,
+            "82": 9.7018,
+            "83": 9.30612,
+            "84": 9.4209,
+            "85": 9.61371,
+            "86": 9.07649,
+            "87": 9.5945,
+            "88": 9.75068,
+            "89": 9.60238,
+            "90": 9.81898,
+            "91": 9.33894,
+            "92": 9.35716,
+            "93": 9.07879,
+            "94": 8.83503,
+            "95": 9.52172,
+            "96": 9.53003,
+            "97": 9.31306,
+            "98": 9.67783,
+            "99": 8.89058,
+            "100": 9.39725
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1688.0,
+            "2": 1806.0,
+            "3": 1675.0,
+            "4": 1842.0,
+            "5": 1909.0,
+            "6": 1908.0,
+            "7": 1783.0,
+            "8": 1611.0,
+            "9": 1753.0,
+            "10": 1457.0,
+            "11": 1880.0,
+            "12": 1683.0,
+            "13": 1907.0,
+            "14": 1733.0,
+            "15": 1930.0,
+            "16": 1840.0,
+            "17": 1892.0,
+            "18": 1650.0,
+            "19": 1790.0,
+            "20": 1596.0,
+            "21": 1765.0,
+            "22": 1616.0,
+            "23": 1974.0,
+            "24": 1621.0,
+            "25": 1557.0,
+            "26": 1745.0,
+            "27": 1722.0,
+            "28": 1976.0,
+            "29": 2068.0,
+            "30": 1860.0,
+            "31": 1536.0,
+            "32": 1883.0,
+            "33": 2071.0,
+            "34": 1894.0,
+            "35": 1902.0,
+            "36": 1885.0,
+            "37": 2231.0,
+            "38": 2129.0,
+            "39": 2333.0,
+            "40": 2207.0,
+            "41": 2193.0,
+            "42": 2322.0,
+            "43": 2015.0,
+            "44": 2089.0,
+            "45": 2095.0,
+            "46": 2392.0,
+            "47": 2430.0,
+            "48": 2414.0,
+            "49": 2340.0,
+            "50": 2416.0,
+            "51": 2613.0,
+            "52": 2538.0,
+            "53": 2792.0,
+            "54": 2801.0,
+            "55": 2216.0,
+            "56": 2858.0,
+            "57": 2381.0,
+            "58": 2854.0,
+            "59": 2787.0,
+            "60": 2457.0,
+            "61": 2941.0,
+            "62": 2543.0,
+            "63": 2408.0,
+            "64": 2968.0,
+            "65": 2472.0,
+            "66": 2977.0,
+            "67": 2839.0,
+            "68": 2775.0,
+            "69": 2832.0,
+            "70": 3057.0,
+            "71": 2909.0,
+            "72": 2421.0,
+            "73": 2982.0,
+            "74": 1922.0,
+            "75": 2474.0,
+            "76": 3059.0,
+            "77": 3177.0,
+            "78": 3067.0,
+            "79": 3052.0,
+            "80": 3338.0,
+            "81": 3644.0,
+            "82": 3234.0,
+            "83": 2798.0,
+            "84": 3196.0,
+            "85": 3324.0,
+            "86": 2855.0,
+            "87": 3820.0,
+            "88": 2962.0,
+            "89": 3379.0,
+            "90": 3096.0,
+            "91": 2857.0,
+            "92": 3077.0,
+            "93": 2693.0,
+            "94": 3312.0,
+            "95": 3399.0,
+            "96": 3378.0,
+            "97": 3030.0,
+            "98": 3619.0,
+            "99": 3160.0,
+            "100": 3128.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 517505536.0,
+            "2": 517505536.0,
+            "3": 517505536.0,
+            "4": 517505536.0,
+            "5": 517505536.0,
+            "6": 517505536.0,
+            "7": 517505536.0,
+            "8": 517505536.0,
+            "9": 517505536.0,
+            "10": 517505536.0,
+            "11": 517505536.0,
+            "12": 517505536.0,
+            "13": 517505536.0,
+            "14": 517505536.0,
+            "15": 517505536.0,
+            "16": 517505536.0,
+            "17": 517505536.0,
+            "18": 517505536.0,
+            "19": 517505536.0,
+            "20": 517505536.0,
+            "21": 517505536.0,
+            "22": 517505536.0,
+            "23": 517505536.0,
+            "24": 517505536.0,
+            "25": 517505536.0,
+            "26": 517505536.0,
+            "27": 517505536.0,
+            "28": 517505536.0,
+            "29": 517505536.0,
+            "30": 517505536.0,
+            "31": 517505536.0,
+            "32": 517505536.0,
+            "33": 517505536.0,
+            "34": 517505536.0,
+            "35": 517505536.0,
+            "36": 517505536.0,
+            "37": 517505536.0,
+            "38": 517505536.0,
+            "39": 517505536.0,
+            "40": 517505536.0,
+            "41": 517505536.0,
+            "42": 517505536.0,
+            "43": 517505536.0,
+            "44": 517505536.0,
+            "45": 517505536.0,
+            "46": 517505536.0,
+            "47": 517505536.0,
+            "48": 517505536.0,
+            "49": 517505536.0,
+            "50": 517505536.0,
+            "51": 517505536.0,
+            "52": 517505536.0,
+            "53": 517505536.0,
+            "54": 517505536.0,
+            "55": 517505536.0,
+            "56": 517505536.0,
+            "57": 517505536.0,
+            "58": 517505536.0,
+            "59": 517505536.0,
+            "60": 517505536.0,
+            "61": 517505536.0,
+            "62": 517505536.0,
+            "63": 517505536.0,
+            "64": 517505536.0,
+            "65": 517505536.0,
+            "66": 517505536.0,
+            "67": 517505536.0,
+            "68": 517505536.0,
+            "69": 517505536.0,
+            "70": 517505536.0,
+            "71": 517505536.0,
+            "72": 517505536.0,
+            "73": 517505536.0,
+            "74": 517505536.0,
+            "75": 517505536.0,
+            "76": 517505536.0,
+            "77": 517505536.0,
+            "78": 517505536.0,
+            "79": 517505536.0,
+            "80": 517505536.0,
+            "81": 517505536.0,
+            "82": 517505536.0,
+            "83": 517505536.0,
+            "84": 517505536.0,
+            "85": 517505536.0,
+            "86": 517505536.0,
+            "87": 517505536.0,
+            "88": 517505536.0,
+            "89": 517505536.0,
+            "90": 517505536.0,
+            "91": 517505536.0,
+            "92": 517505536.0,
+            "93": 517505536.0,
+            "94": 517505536.0,
+            "95": 517505536.0,
+            "96": 517505536.0,
+            "97": 517505536.0,
+            "98": 517505536.0,
+            "99": 517505536.0,
+            "100": 517505536.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1246524928.0,
+            "2": 1428695552.0,
+            "3": 1428695552.0,
+            "4": 1428695552.0,
+            "5": 1428695552.0,
+            "6": 1428695552.0,
+            "7": 1428695552.0,
+            "8": 1428695552.0,
+            "9": 1428695552.0,
+            "10": 1428695552.0,
+            "11": 1428695552.0,
+            "12": 1428695552.0,
+            "13": 1428695552.0,
+            "14": 1428695552.0,
+            "15": 1428695552.0,
+            "16": 1428695552.0,
+            "17": 1428695552.0,
+            "18": 1428695552.0,
+            "19": 1428695552.0,
+            "20": 1428695552.0,
+            "21": 1428695552.0,
+            "22": 1428695552.0,
+            "23": 1428695552.0,
+            "24": 1428695552.0,
+            "25": 1428695552.0,
+            "26": 1428695552.0,
+            "27": 1428695552.0,
+            "28": 1428695552.0,
+            "29": 1428695552.0,
+            "30": 1428695552.0,
+            "31": 1428695552.0,
+            "32": 1428695552.0,
+            "33": 1428695552.0,
+            "34": 1428695552.0,
+            "35": 1428695552.0,
+            "36": 1428695552.0,
+            "37": 1428695552.0,
+            "38": 1428695552.0,
+            "39": 1428695552.0,
+            "40": 1428695552.0,
+            "41": 1428695552.0,
+            "42": 1428695552.0,
+            "43": 1428695552.0,
+            "44": 1428695552.0,
+            "45": 1428695552.0,
+            "46": 1428695552.0,
+            "47": 1428695552.0,
+            "48": 1428695552.0,
+            "49": 1428695552.0,
+            "50": 1428695552.0,
+            "51": 1428695552.0,
+            "52": 1428695552.0,
+            "53": 1428695552.0,
+            "54": 1428695552.0,
+            "55": 1428695552.0,
+            "56": 1428695552.0,
+            "57": 1428695552.0,
+            "58": 1428695552.0,
+            "59": 1428695552.0,
+            "60": 1428695552.0,
+            "61": 1428695552.0,
+            "62": 1428695552.0,
+            "63": 1428695552.0,
+            "64": 1428695552.0,
+            "65": 1428695552.0,
+            "66": 1428695552.0,
+            "67": 1428695552.0,
+            "68": 1428695552.0,
+            "69": 1428695552.0,
+            "70": 1428695552.0,
+            "71": 1428695552.0,
+            "72": 1428695552.0,
+            "73": 1428695552.0,
+            "74": 1428695552.0,
+            "75": 1428695552.0,
+            "76": 1428695552.0,
+            "77": 1428695552.0,
+            "78": 1428695552.0,
+            "79": 1428695552.0,
+            "80": 1428695552.0,
+            "81": 1428695552.0,
+            "82": 1428695552.0,
+            "83": 1428695552.0,
+            "84": 1428695552.0,
+            "85": 1428695552.0,
+            "86": 1428695552.0,
+            "87": 1428695552.0,
+            "88": 1428695552.0,
+            "89": 1428695552.0,
+            "90": 1428695552.0,
+            "91": 1428695552.0,
+            "92": 1428695552.0,
+            "93": 1428695552.0,
+            "94": 1428695552.0,
+            "95": 1428695552.0,
+            "96": 1428695552.0,
+            "97": 1428695552.0,
+            "98": 1428695552.0,
+            "99": 1428695552.0,
+            "100": 1428695552.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 11.53934,
+            "2": 0.16774,
+            "3": 0.13459,
+            "4": 0.13439,
+            "5": 0.13482,
+            "6": 0.13444,
+            "7": 0.13371,
+            "8": 0.1345,
+            "9": 0.13658,
+            "10": 0.13405,
+            "11": 0.13498,
+            "12": 0.13346,
+            "13": 0.13373,
+            "14": 0.14049,
+            "15": 0.13447,
+            "16": 0.13314,
+            "17": 0.13441,
+            "18": 0.14264,
+            "19": 0.15581,
+            "20": 0.14614,
+            "21": 0.14655,
+            "22": 0.14484,
+            "23": 0.13377,
+            "24": 0.13618,
+            "25": 0.13595,
+            "26": 0.13394,
+            "27": 0.13248,
+            "28": 0.13405,
+            "29": 0.13411,
+            "30": 0.13464,
+            "31": 0.13321,
+            "32": 0.134,
+            "33": 0.13496,
+            "34": 0.13356,
+            "35": 0.13325,
+            "36": 0.13329,
+            "37": 0.13359,
+            "38": 0.13442,
+            "39": 0.13494,
+            "40": 0.13456,
+            "41": 0.1333,
+            "42": 0.1357,
+            "43": 0.13407,
+            "44": 0.13499,
+            "45": 0.13371,
+            "46": 0.13423,
+            "47": 0.13545,
+            "48": 0.1355,
+            "49": 0.13329,
+            "50": 0.1329,
+            "51": 0.13926,
+            "52": 0.13217,
+            "53": 0.13369,
+            "54": 0.13177,
+            "55": 0.13062,
+            "56": 0.25118,
+            "57": 0.13283,
+            "58": 0.1331,
+            "59": 0.1388,
+            "60": 0.13244,
+            "61": 0.13219,
+            "62": 0.13234,
+            "63": 0.13297,
+            "64": 0.13104,
+            "65": 0.1339,
+            "66": 0.13079,
+            "67": 0.13112,
+            "68": 0.1322,
+            "69": 0.13305,
+            "70": 0.13172,
+            "71": 0.13249,
+            "72": 0.13138,
+            "73": 0.13329,
+            "74": 0.13115,
+            "75": 0.13263,
+            "76": 0.13234,
+            "77": 0.13051,
+            "78": 0.13097,
+            "79": 0.13092,
+            "80": 0.13147,
+            "81": 0.13202,
+            "82": 0.13235,
+            "83": 0.13167,
+            "84": 0.13099,
+            "85": 0.13063,
+            "86": 0.13192,
+            "87": 0.13259,
+            "88": 0.13267,
+            "89": 0.13154,
+            "90": 0.13131,
+            "91": 0.13195,
+            "92": 0.13132,
+            "93": 0.13226,
+            "94": 0.13075,
+            "95": 0.13002,
+            "96": 0.13313,
+            "97": 0.13202,
+            "98": 0.13321,
+            "99": 0.1318,
+            "100": 0.13349
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..7c1078c0b3d
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.85949,
+            "2": 10.85553,
+            "3": 10.86543,
+            "4": 10.84553,
+            "5": 10.88346,
+            "6": 10.89431,
+            "7": 10.87067,
+            "8": 10.86979,
+            "9": 10.86918,
+            "10": 10.83886,
+            "11": 10.8943,
+            "12": 10.87983,
+            "13": 10.87985,
+            "14": 10.90321,
+            "15": 10.84052,
+            "16": 10.83787,
+            "17": 10.80669,
+            "18": 10.83026,
+            "19": 10.82261,
+            "20": 10.73193,
+            "21": 10.70748,
+            "22": 10.56005,
+            "23": 10.72399,
+            "24": 10.61114,
+            "25": 10.54813,
+            "26": 10.61329,
+            "27": 10.63053,
+            "28": 10.56646,
+            "29": 10.59668,
+            "30": 10.37137,
+            "31": 10.11725,
+            "32": 10.46127,
+            "33": 10.45249,
+            "34": 10.2169,
+            "35": 10.27172,
+            "36": 10.23119,
+            "37": 10.34809,
+            "38": 10.1884,
+            "39": 10.41044,
+            "40": 10.09425,
+            "41": 10.14707,
+            "42": 10.21242,
+            "43": 9.84105,
+            "44": 9.95918,
+            "45": 9.84079,
+            "46": 9.82479,
+            "47": 10.13878,
+            "48": 9.85831,
+            "49": 9.54705,
+            "50": 9.90875,
+            "51": 9.8558,
+            "52": 9.75237,
+            "53": 10.07589,
+            "54": 9.95688,
+            "55": 9.88203,
+            "56": 9.6313,
+            "57": 9.48649,
+            "58": 9.83109,
+            "59": 9.58897,
+            "60": 9.50643,
+            "61": 9.70363,
+            "62": 9.98286,
+            "63": 9.38302,
+            "64": 9.77901,
+            "65": 8.95166,
+            "66": 9.70158,
+            "67": 9.37203,
+            "68": 9.78849,
+            "69": 9.79851,
+            "70": 9.74737,
+            "71": 9.61908,
+            "72": 9.58502,
+            "73": 9.49721,
+            "74": 8.93927,
+            "75": 9.42703,
+            "76": 9.0802,
+            "77": 10.06567,
+            "78": 9.72893,
+            "79": 9.3776,
+            "80": 9.40982,
+            "81": 9.47976,
+            "82": 9.7018,
+            "83": 9.30612,
+            "84": 9.4209,
+            "85": 9.61371,
+            "86": 9.07649,
+            "87": 9.5945,
+            "88": 9.75068,
+            "89": 9.60238,
+            "90": 9.81898,
+            "91": 9.33894,
+            "92": 9.35716,
+            "93": 9.07879,
+            "94": 8.83503,
+            "95": 9.52172,
+            "96": 9.53003,
+            "97": 9.31306,
+            "98": 9.67783,
+            "99": 8.89058,
+            "100": 9.39725
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1688.0,
+            "2": 1806.0,
+            "3": 1675.0,
+            "4": 1842.0,
+            "5": 1909.0,
+            "6": 1908.0,
+            "7": 1783.0,
+            "8": 1611.0,
+            "9": 1753.0,
+            "10": 1457.0,
+            "11": 1880.0,
+            "12": 1683.0,
+            "13": 1907.0,
+            "14": 1733.0,
+            "15": 1930.0,
+            "16": 1840.0,
+            "17": 1892.0,
+            "18": 1650.0,
+            "19": 1790.0,
+            "20": 1596.0,
+            "21": 1765.0,
+            "22": 1616.0,
+            "23": 1974.0,
+            "24": 1621.0,
+            "25": 1557.0,
+            "26": 1745.0,
+            "27": 1722.0,
+            "28": 1976.0,
+            "29": 2068.0,
+            "30": 1860.0,
+            "31": 1536.0,
+            "32": 1883.0,
+            "33": 2071.0,
+            "34": 1894.0,
+            "35": 1902.0,
+            "36": 1885.0,
+            "37": 2231.0,
+            "38": 2129.0,
+            "39": 2333.0,
+            "40": 2207.0,
+            "41": 2193.0,
+            "42": 2322.0,
+            "43": 2015.0,
+            "44": 2089.0,
+            "45": 2095.0,
+            "46": 2392.0,
+            "47": 2430.0,
+            "48": 2414.0,
+            "49": 2340.0,
+            "50": 2416.0,
+            "51": 2613.0,
+            "52": 2538.0,
+            "53": 2792.0,
+            "54": 2801.0,
+            "55": 2216.0,
+            "56": 2858.0,
+            "57": 2381.0,
+            "58": 2854.0,
+            "59": 2787.0,
+            "60": 2457.0,
+            "61": 2941.0,
+            "62": 2543.0,
+            "63": 2408.0,
+            "64": 2968.0,
+            "65": 2472.0,
+            "66": 2977.0,
+            "67": 2839.0,
+            "68": 2775.0,
+            "69": 2832.0,
+            "70": 3057.0,
+            "71": 2909.0,
+            "72": 2421.0,
+            "73": 2982.0,
+            "74": 1922.0,
+            "75": 2474.0,
+            "76": 3059.0,
+            "77": 3177.0,
+            "78": 3067.0,
+            "79": 3052.0,
+            "80": 3338.0,
+            "81": 3644.0,
+            "82": 3234.0,
+            "83": 2798.0,
+            "84": 3196.0,
+            "85": 3324.0,
+            "86": 2855.0,
+            "87": 3820.0,
+            "88": 2962.0,
+            "89": 3379.0,
+            "90": 3096.0,
+            "91": 2857.0,
+            "92": 3077.0,
+            "93": 2693.0,
+            "94": 3312.0,
+            "95": 3399.0,
+            "96": 3378.0,
+            "97": 3030.0,
+            "98": 3619.0,
+            "99": 3160.0,
+            "100": 3128.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 517505536.0,
+            "2": 517505536.0,
+            "3": 517505536.0,
+            "4": 517505536.0,
+            "5": 517505536.0,
+            "6": 517505536.0,
+            "7": 517505536.0,
+            "8": 517505536.0,
+            "9": 517505536.0,
+            "10": 517505536.0,
+            "11": 517505536.0,
+            "12": 517505536.0,
+            "13": 517505536.0,
+            "14": 517505536.0,
+            "15": 517505536.0,
+            "16": 517505536.0,
+            "17": 517505536.0,
+            "18": 517505536.0,
+            "19": 517505536.0,
+            "20": 517505536.0,
+            "21": 517505536.0,
+            "22": 517505536.0,
+            "23": 517505536.0,
+            "24": 517505536.0,
+            "25": 517505536.0,
+            "26": 517505536.0,
+            "27": 517505536.0,
+            "28": 517505536.0,
+            "29": 517505536.0,
+            "30": 517505536.0,
+            "31": 517505536.0,
+            "32": 517505536.0,
+            "33": 517505536.0,
+            "34": 517505536.0,
+            "35": 517505536.0,
+            "36": 517505536.0,
+            "37": 517505536.0,
+            "38": 517505536.0,
+            "39": 517505536.0,
+            "40": 517505536.0,
+            "41": 517505536.0,
+            "42": 517505536.0,
+            "43": 517505536.0,
+            "44": 517505536.0,
+            "45": 517505536.0,
+            "46": 517505536.0,
+            "47": 517505536.0,
+            "48": 517505536.0,
+            "49": 517505536.0,
+            "50": 517505536.0,
+            "51": 517505536.0,
+            "52": 517505536.0,
+            "53": 517505536.0,
+            "54": 517505536.0,
+            "55": 517505536.0,
+            "56": 517505536.0,
+            "57": 517505536.0,
+            "58": 517505536.0,
+            "59": 517505536.0,
+            "60": 517505536.0,
+            "61": 517505536.0,
+            "62": 517505536.0,
+            "63": 517505536.0,
+            "64": 517505536.0,
+            "65": 517505536.0,
+            "66": 517505536.0,
+            "67": 517505536.0,
+            "68": 517505536.0,
+            "69": 517505536.0,
+            "70": 517505536.0,
+            "71": 517505536.0,
+            "72": 517505536.0,
+            "73": 517505536.0,
+            "74": 517505536.0,
+            "75": 517505536.0,
+            "76": 517505536.0,
+            "77": 517505536.0,
+            "78": 517505536.0,
+            "79": 517505536.0,
+            "80": 517505536.0,
+            "81": 517505536.0,
+            "82": 517505536.0,
+            "83": 517505536.0,
+            "84": 517505536.0,
+            "85": 517505536.0,
+            "86": 517505536.0,
+            "87": 517505536.0,
+            "88": 517505536.0,
+            "89": 517505536.0,
+            "90": 517505536.0,
+            "91": 517505536.0,
+            "92": 517505536.0,
+            "93": 517505536.0,
+            "94": 517505536.0,
+            "95": 517505536.0,
+            "96": 517505536.0,
+            "97": 517505536.0,
+            "98": 517505536.0,
+            "99": 517505536.0,
+            "100": 517505536.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1246524928.0,
+            "2": 1428695552.0,
+            "3": 1428695552.0,
+            "4": 1428695552.0,
+            "5": 1428695552.0,
+            "6": 1428695552.0,
+            "7": 1428695552.0,
+            "8": 1428695552.0,
+            "9": 1428695552.0,
+            "10": 1428695552.0,
+            "11": 1428695552.0,
+            "12": 1428695552.0,
+            "13": 1428695552.0,
+            "14": 1428695552.0,
+            "15": 1428695552.0,
+            "16": 1428695552.0,
+            "17": 1428695552.0,
+            "18": 1428695552.0,
+            "19": 1428695552.0,
+            "20": 1428695552.0,
+            "21": 1428695552.0,
+            "22": 1428695552.0,
+            "23": 1428695552.0,
+            "24": 1428695552.0,
+            "25": 1428695552.0,
+            "26": 1428695552.0,
+            "27": 1428695552.0,
+            "28": 1428695552.0,
+            "29": 1428695552.0,
+            "30": 1428695552.0,
+            "31": 1428695552.0,
+            "32": 1428695552.0,
+            "33": 1428695552.0,
+            "34": 1428695552.0,
+            "35": 1428695552.0,
+            "36": 1428695552.0,
+            "37": 1428695552.0,
+            "38": 1428695552.0,
+            "39": 1428695552.0,
+            "40": 1428695552.0,
+            "41": 1428695552.0,
+            "42": 1428695552.0,
+            "43": 1428695552.0,
+            "44": 1428695552.0,
+            "45": 1428695552.0,
+            "46": 1428695552.0,
+            "47": 1428695552.0,
+            "48": 1428695552.0,
+            "49": 1428695552.0,
+            "50": 1428695552.0,
+            "51": 1428695552.0,
+            "52": 1428695552.0,
+            "53": 1428695552.0,
+            "54": 1428695552.0,
+            "55": 1428695552.0,
+            "56": 1428695552.0,
+            "57": 1428695552.0,
+            "58": 1428695552.0,
+            "59": 1428695552.0,
+            "60": 1428695552.0,
+            "61": 1428695552.0,
+            "62": 1428695552.0,
+            "63": 1428695552.0,
+            "64": 1428695552.0,
+            "65": 1428695552.0,
+            "66": 1428695552.0,
+            "67": 1428695552.0,
+            "68": 1428695552.0,
+            "69": 1428695552.0,
+            "70": 1428695552.0,
+            "71": 1428695552.0,
+            "72": 1428695552.0,
+            "73": 1428695552.0,
+            "74": 1428695552.0,
+            "75": 1428695552.0,
+            "76": 1428695552.0,
+            "77": 1428695552.0,
+            "78": 1428695552.0,
+            "79": 1428695552.0,
+            "80": 1428695552.0,
+            "81": 1428695552.0,
+            "82": 1428695552.0,
+            "83": 1428695552.0,
+            "84": 1428695552.0,
+            "85": 1428695552.0,
+            "86": 1428695552.0,
+            "87": 1428695552.0,
+            "88": 1428695552.0,
+            "89": 1428695552.0,
+            "90": 1428695552.0,
+            "91": 1428695552.0,
+            "92": 1428695552.0,
+            "93": 1428695552.0,
+            "94": 1428695552.0,
+            "95": 1428695552.0,
+            "96": 1428695552.0,
+            "97": 1428695552.0,
+            "98": 1428695552.0,
+            "99": 1428695552.0,
+            "100": 1428695552.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 11.96359,
+            "2": 0.17007,
+            "3": 0.15511,
+            "4": 0.15439,
+            "5": 0.15477,
+            "6": 0.15459,
+            "7": 0.15427,
+            "8": 0.15173,
+            "9": 0.15484,
+            "10": 0.15363,
+            "11": 0.15353,
+            "12": 0.15567,
+            "13": 0.15258,
+            "14": 0.15438,
+            "15": 0.15305,
+            "16": 0.15314,
+            "17": 0.15342,
+            "18": 0.15282,
+            "19": 0.15336,
+            "20": 0.15333,
+            "21": 0.15174,
+            "22": 0.15412,
+            "23": 0.15337,
+            "24": 0.15464,
+            "25": 0.15638,
+            "26": 0.15618,
+            "27": 0.15599,
+            "28": 0.15616,
+            "29": 0.15792,
+            "30": 0.15422,
+            "31": 0.15441,
+            "32": 0.15356,
+            "33": 0.15622,
+            "34": 0.15397,
+            "35": 0.15443,
+            "36": 0.15392,
+            "37": 0.15454,
+            "38": 0.15581,
+            "39": 0.15513,
+            "40": 0.15813,
+            "41": 0.1595,
+            "42": 0.15604,
+            "43": 0.15809,
+            "44": 0.15585,
+            "45": 0.15659,
+            "46": 0.15599,
+            "47": 0.15378,
+            "48": 0.15475,
+            "49": 0.1544,
+            "50": 0.15569,
+            "51": 0.16391,
+            "52": 0.16196,
+            "53": 0.16029,
+            "54": 0.16138,
+            "55": 0.15673,
+            "56": 0.1503,
+            "57": 0.15071,
+            "58": 0.15268,
+            "59": 0.15095,
+            "60": 0.15189,
+            "61": 0.15199,
+            "62": 0.14938,
+            "63": 0.15046,
+            "64": 0.14924,
+            "65": 0.15129,
+            "66": 0.14938,
+            "67": 0.15233,
+            "68": 0.15028,
+            "69": 0.1525,
+            "70": 0.15334,
+            "71": 0.15152,
+            "72": 0.15138,
+            "73": 0.15304,
+            "74": 0.1515,
+            "75": 0.15282,
+            "76": 0.1518,
+            "77": 0.15193,
+            "78": 0.15262,
+            "79": 0.15274,
+            "80": 0.15251,
+            "81": 0.15108,
+            "82": 0.15199,
+            "83": 0.15046,
+            "84": 0.15298,
+            "85": 0.15063,
+            "86": 0.15132,
+            "87": 0.15257,
+            "88": 0.15109,
+            "89": 0.1502,
+            "90": 0.15259,
+            "91": 0.15063,
+            "92": 0.15237,
+            "93": 0.15096,
+            "94": 0.1517,
+            "95": 0.15049,
+            "96": 0.15002,
+            "97": 0.15011,
+            "98": 0.15349,
+            "99": 0.1565,
+            "100": 0.15223
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
new file mode 100644
index 00000000000..fb6afd47964
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.92705,
+            "2": 10.92645,
+            "3": 10.91604,
+            "4": 10.90911,
+            "5": 10.92795,
+            "6": 10.93626,
+            "7": 10.90626,
+            "8": 10.92128,
+            "9": 10.90998,
+            "10": 10.90786,
+            "11": 10.89335,
+            "12": 10.92456,
+            "13": 10.9146,
+            "14": 10.9213,
+            "15": 10.88314,
+            "16": 10.87325,
+            "17": 10.84129,
+            "18": 10.87276,
+            "19": 10.8563,
+            "20": 10.77629,
+            "21": 10.74869,
+            "22": 10.63031,
+            "23": 10.75678,
+            "24": 10.65646,
+            "25": 10.59141,
+            "26": 10.65375,
+            "27": 10.6485,
+            "28": 10.59548,
+            "29": 10.6088,
+            "30": 10.39192,
+            "31": 10.15753,
+            "32": 10.49098,
+            "33": 10.4793,
+            "34": 10.24058,
+            "35": 10.29686,
+            "36": 10.24644,
+            "37": 10.35232,
+            "38": 10.20489,
+            "39": 10.4052,
+            "40": 10.0964,
+            "41": 10.15175,
+            "42": 10.22026,
+            "43": 9.85499,
+            "44": 9.96143,
+            "45": 9.84464,
+            "46": 9.83801,
+            "47": 10.13988,
+            "48": 9.85718,
+            "49": 9.53698,
+            "50": 9.90918,
+            "51": 9.84886,
+            "52": 9.74154,
+            "53": 10.06347,
+            "54": 9.94683,
+            "55": 9.87762,
+            "56": 9.6274,
+            "57": 9.47112,
+            "58": 9.82925,
+            "59": 9.58253,
+            "60": 9.49121,
+            "61": 9.69956,
+            "62": 9.97968,
+            "63": 9.37277,
+            "64": 9.77468,
+            "65": 8.94236,
+            "66": 9.6991,
+            "67": 9.36382,
+            "68": 9.78787,
+            "69": 9.78332,
+            "70": 9.72266,
+            "71": 9.60801,
+            "72": 9.58459,
+            "73": 9.48963,
+            "74": 8.94871,
+            "75": 9.41912,
+            "76": 9.08725,
+            "77": 10.06354,
+            "78": 9.72835,
+            "79": 9.37162,
+            "80": 9.40077,
+            "81": 9.47843,
+            "82": 9.69177,
+            "83": 9.3076,
+            "84": 9.41232,
+            "85": 9.61207,
+            "86": 9.07599,
+            "87": 9.59468,
+            "88": 9.74738,
+            "89": 9.60686,
+            "90": 9.81015,
+            "91": 9.34359,
+            "92": 9.36482,
+            "93": 9.07761,
+            "94": 8.83108,
+            "95": 9.51716,
+            "96": 9.52447,
+            "97": 9.31027,
+            "98": 9.67892,
+            "99": 8.88832,
+            "100": 9.4015
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1627.0,
+            "2": 1801.0,
+            "3": 1730.0,
+            "4": 1762.0,
+            "5": 2010.0,
+            "6": 1889.0,
+            "7": 1888.0,
+            "8": 1729.0,
+            "9": 1852.0,
+            "10": 1368.0,
+            "11": 1973.0,
+            "12": 1722.0,
+            "13": 1966.0,
+            "14": 1874.0,
+            "15": 1897.0,
+            "16": 1785.0,
+            "17": 1942.0,
+            "18": 1718.0,
+            "19": 1716.0,
+            "20": 1626.0,
+            "21": 1797.0,
+            "22": 1673.0,
+            "23": 1937.0,
+            "24": 1561.0,
+            "25": 1743.0,
+            "26": 1917.0,
+            "27": 1886.0,
+            "28": 1968.0,
+            "29": 2029.0,
+            "30": 1930.0,
+            "31": 1635.0,
+            "32": 1974.0,
+            "33": 2159.0,
+            "34": 2035.0,
+            "35": 1954.0,
+            "36": 1948.0,
+            "37": 2317.0,
+            "38": 2312.0,
+            "39": 2458.0,
+            "40": 2199.0,
+            "41": 2352.0,
+            "42": 2288.0,
+            "43": 2005.0,
+            "44": 2191.0,
+            "45": 2068.0,
+            "46": 2272.0,
+            "47": 2530.0,
+            "48": 2458.0,
+            "49": 2252.0,
+            "50": 2460.0,
+            "51": 2777.0,
+            "52": 2659.0,
+            "53": 2959.0,
+            "54": 2700.0,
+            "55": 2427.0,
+            "56": 2797.0,
+            "57": 2430.0,
+            "58": 3077.0,
+            "59": 2781.0,
+            "60": 2380.0,
+            "61": 2816.0,
+            "62": 2812.0,
+            "63": 2452.0,
+            "64": 2958.0,
+            "65": 2657.0,
+            "66": 3208.0,
+            "67": 2786.0,
+            "68": 2842.0,
+            "69": 2927.0,
+            "70": 3265.0,
+            "71": 3098.0,
+            "72": 2445.0,
+            "73": 3120.0,
+            "74": 1900.0,
+            "75": 2675.0,
+            "76": 3065.0,
+            "77": 3452.0,
+            "78": 3263.0,
+            "79": 3398.0,
+            "80": 3434.0,
+            "81": 3695.0,
+            "82": 3308.0,
+            "83": 2935.0,
+            "84": 3423.0,
+            "85": 3302.0,
+            "86": 2785.0,
+            "87": 3788.0,
+            "88": 3030.0,
+            "89": 3532.0,
+            "90": 3230.0,
+            "91": 2681.0,
+            "92": 3175.0,
+            "93": 2718.0,
+            "94": 3392.0,
+            "95": 3340.0,
+            "96": 3504.0,
+            "97": 3227.0,
+            "98": 3757.0,
+            "99": 3245.0,
+            "100": 3291.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 487096320.0,
+            "2": 487096320.0,
+            "3": 487096320.0,
+            "4": 487096320.0,
+            "5": 487096320.0,
+            "6": 487096320.0,
+            "7": 487096320.0,
+            "8": 487096320.0,
+            "9": 487096320.0,
+            "10": 487096320.0,
+            "11": 487096320.0,
+            "12": 487096320.0,
+            "13": 487096320.0,
+            "14": 487096320.0,
+            "15": 487096320.0,
+            "16": 487096320.0,
+            "17": 487096320.0,
+            "18": 487096320.0,
+            "19": 487096320.0,
+            "20": 487096320.0,
+            "21": 487096320.0,
+            "22": 487096320.0,
+            "23": 487096320.0,
+            "24": 487096320.0,
+            "25": 487096320.0,
+            "26": 487096320.0,
+            "27": 487096320.0,
+            "28": 487096320.0,
+            "29": 487096320.0,
+            "30": 487096320.0,
+            "31": 487096320.0,
+            "32": 487096320.0,
+            "33": 487096320.0,
+            "34": 487096320.0,
+            "35": 487096320.0,
+            "36": 487096320.0,
+            "37": 487096320.0,
+            "38": 487096320.0,
+            "39": 487096320.0,
+            "40": 487096320.0,
+            "41": 487096320.0,
+            "42": 487096320.0,
+            "43": 487096320.0,
+            "44": 487096320.0,
+            "45": 487096320.0,
+            "46": 487096320.0,
+            "47": 487096320.0,
+            "48": 487096320.0,
+            "49": 487096320.0,
+            "50": 487096320.0,
+            "51": 487096320.0,
+            "52": 487096320.0,
+            "53": 487096320.0,
+            "54": 487096320.0,
+            "55": 487096320.0,
+            "56": 487096320.0,
+            "57": 487096320.0,
+            "58": 487096320.0,
+            "59": 487096320.0,
+            "60": 487096320.0,
+            "61": 487096320.0,
+            "62": 487096320.0,
+            "63": 487096320.0,
+            "64": 487096320.0,
+            "65": 487096320.0,
+            "66": 487096320.0,
+            "67": 487096320.0,
+            "68": 487096320.0,
+            "69": 487096320.0,
+            "70": 487096320.0,
+            "71": 487096320.0,
+            "72": 487096320.0,
+            "73": 487096320.0,
+            "74": 487096320.0,
+            "75": 487096320.0,
+            "76": 487096320.0,
+            "77": 487096320.0,
+            "78": 487096320.0,
+            "79": 487096320.0,
+            "80": 487096320.0,
+            "81": 487096320.0,
+            "82": 487096320.0,
+            "83": 487096320.0,
+            "84": 487096320.0,
+            "85": 487096320.0,
+            "86": 487096320.0,
+            "87": 487096320.0,
+            "88": 487096320.0,
+            "89": 487096320.0,
+            "90": 487096320.0,
+            "91": 487096320.0,
+            "92": 487096320.0,
+            "93": 487096320.0,
+            "94": 487096320.0,
+            "95": 487096320.0,
+            "96": 487096320.0,
+            "97": 487096320.0,
+            "98": 487096320.0,
+            "99": 487096320.0,
+            "100": 487096320.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1720084480.0,
+            "2": 1900157952.0,
+            "3": 1900157952.0,
+            "4": 1900157952.0,
+            "5": 1900157952.0,
+            "6": 1900157952.0,
+            "7": 1900157952.0,
+            "8": 1900157952.0,
+            "9": 1900157952.0,
+            "10": 1900157952.0,
+            "11": 1900157952.0,
+            "12": 1900157952.0,
+            "13": 1900157952.0,
+            "14": 1900157952.0,
+            "15": 1900157952.0,
+            "16": 1900157952.0,
+            "17": 1900157952.0,
+            "18": 1900157952.0,
+            "19": 1900157952.0,
+            "20": 1900157952.0,
+            "21": 1900157952.0,
+            "22": 1900157952.0,
+            "23": 1900157952.0,
+            "24": 1900157952.0,
+            "25": 1900157952.0,
+            "26": 1900157952.0,
+            "27": 1900157952.0,
+            "28": 1900157952.0,
+            "29": 1900157952.0,
+            "30": 1900157952.0,
+            "31": 1900157952.0,
+            "32": 1900157952.0,
+            "33": 1900157952.0,
+            "34": 1900157952.0,
+            "35": 1900157952.0,
+            "36": 1900157952.0,
+            "37": 1900157952.0,
+            "38": 1900157952.0,
+            "39": 1900157952.0,
+            "40": 1900157952.0,
+            "41": 1900157952.0,
+            "42": 1900157952.0,
+            "43": 1900157952.0,
+            "44": 1900157952.0,
+            "45": 1900157952.0,
+            "46": 1900157952.0,
+            "47": 1900157952.0,
+            "48": 1900157952.0,
+            "49": 1900157952.0,
+            "50": 1900157952.0,
+            "51": 1900157952.0,
+            "52": 1900157952.0,
+            "53": 1900157952.0,
+            "54": 1900157952.0,
+            "55": 1900157952.0,
+            "56": 1900157952.0,
+            "57": 1900157952.0,
+            "58": 1900157952.0,
+            "59": 1900157952.0,
+            "60": 1900157952.0,
+            "61": 1900157952.0,
+            "62": 1900157952.0,
+            "63": 1900157952.0,
+            "64": 1900157952.0,
+            "65": 1900157952.0,
+            "66": 1900157952.0,
+            "67": 1900157952.0,
+            "68": 1900157952.0,
+            "69": 1900157952.0,
+            "70": 1900157952.0,
+            "71": 1900157952.0,
+            "72": 1900157952.0,
+            "73": 1900157952.0,
+            "74": 1900157952.0,
+            "75": 1900157952.0,
+            "76": 1900157952.0,
+            "77": 1900157952.0,
+            "78": 1900157952.0,
+            "79": 1900157952.0,
+            "80": 1900157952.0,
+            "81": 1900157952.0,
+            "82": 1900157952.0,
+            "83": 1900157952.0,
+            "84": 1900157952.0,
+            "85": 1900157952.0,
+            "86": 1900157952.0,
+            "87": 1900157952.0,
+            "88": 1900157952.0,
+            "89": 1900157952.0,
+            "90": 1900157952.0,
+            "91": 1900157952.0,
+            "92": 1900157952.0,
+            "93": 1900157952.0,
+            "94": 1900157952.0,
+            "95": 1900157952.0,
+            "96": 1900157952.0,
+            "97": 1900157952.0,
+            "98": 1900157952.0,
+            "99": 1900157952.0,
+            "100": 1900157952.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 14.82235,
+            "2": 0.57043,
+            "3": 0.23395,
+            "4": 0.22773,
+            "5": 0.23061,
+            "6": 0.22681,
+            "7": 0.22898,
+            "8": 0.22777,
+            "9": 0.23178,
+            "10": 0.22844,
+            "11": 0.22696,
+            "12": 0.22691,
+            "13": 0.22689,
+            "14": 0.22608,
+            "15": 0.22509,
+            "16": 0.22608,
+            "17": 0.22957,
+            "18": 0.22818,
+            "19": 0.22555,
+            "20": 0.22522,
+            "21": 0.22614,
+            "22": 0.22905,
+            "23": 0.22671,
+            "24": 0.22771,
+            "25": 0.22415,
+            "26": 0.22381,
+            "27": 0.22625,
+            "28": 0.22438,
+            "29": 0.22389,
+            "30": 0.22364,
+            "31": 0.22738,
+            "32": 0.2239,
+            "33": 0.22369,
+            "34": 0.2237,
+            "35": 0.22477,
+            "36": 0.22703,
+            "37": 0.22298,
+            "38": 0.22346,
+            "39": 0.22306,
+            "40": 0.22845,
+            "41": 0.2224,
+            "42": 0.22168,
+            "43": 0.22358,
+            "44": 0.22055,
+            "45": 0.22285,
+            "46": 0.21986,
+            "47": 0.21973,
+            "48": 0.22077,
+            "49": 0.47346,
+            "50": 0.21958,
+            "51": 0.23099,
+            "52": 0.22467,
+            "53": 0.22654,
+            "54": 0.22546,
+            "55": 0.2396,
+            "56": 0.28734,
+            "57": 0.3188,
+            "58": 0.30845,
+            "59": 0.2927,
+            "60": 0.26475,
+            "61": 0.31496,
+            "62": 0.32446,
+            "63": 0.27846,
+            "64": 0.29143,
+            "65": 0.28739,
+            "66": 0.25616,
+            "67": 0.23629,
+            "68": 0.22554,
+            "69": 0.22096,
+            "70": 0.22295,
+            "71": 0.22447,
+            "72": 0.22432,
+            "73": 0.22303,
+            "74": 0.22272,
+            "75": 0.22429,
+            "76": 0.22195,
+            "77": 0.21956,
+            "78": 0.22046,
+            "79": 0.22253,
+            "80": 0.22346,
+            "81": 0.22141,
+            "82": 0.22072,
+            "83": 0.22211,
+            "84": 0.22335,
+            "85": 0.22188,
+            "86": 0.21998,
+            "87": 0.22058,
+            "88": 0.22605,
+            "89": 0.22132,
+            "90": 0.22322,
+            "91": 0.22195,
+            "92": 0.22145,
+            "93": 0.22388,
+            "94": 0.2227,
+            "95": 0.21996,
+            "96": 0.22067,
+            "97": 0.22039,
+            "98": 0.22287,
+            "99": 0.22626,
+            "100": 0.22164
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
new file mode 100644
index 00000000000..de7286cfa2d
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.92705,
+            "2": 10.92645,
+            "3": 10.91604,
+            "4": 10.90911,
+            "5": 10.92795,
+            "6": 10.93626,
+            "7": 10.90626,
+            "8": 10.92128,
+            "9": 10.90998,
+            "10": 10.90786,
+            "11": 10.89335,
+            "12": 10.92456,
+            "13": 10.9146,
+            "14": 10.9213,
+            "15": 10.88314,
+            "16": 10.87325,
+            "17": 10.84129,
+            "18": 10.87276,
+            "19": 10.8563,
+            "20": 10.77629,
+            "21": 10.74869,
+            "22": 10.63031,
+            "23": 10.75678,
+            "24": 10.65646,
+            "25": 10.59141,
+            "26": 10.65375,
+            "27": 10.6485,
+            "28": 10.59548,
+            "29": 10.6088,
+            "30": 10.39192,
+            "31": 10.15753,
+            "32": 10.49098,
+            "33": 10.4793,
+            "34": 10.24058,
+            "35": 10.29686,
+            "36": 10.24644,
+            "37": 10.35232,
+            "38": 10.20489,
+            "39": 10.4052,
+            "40": 10.0964,
+            "41": 10.15175,
+            "42": 10.22026,
+            "43": 9.85499,
+            "44": 9.96143,
+            "45": 9.84464,
+            "46": 9.83801,
+            "47": 10.13988,
+            "48": 9.85718,
+            "49": 9.53698,
+            "50": 9.90918,
+            "51": 9.84886,
+            "52": 9.74154,
+            "53": 10.06347,
+            "54": 9.94683,
+            "55": 9.87762,
+            "56": 9.6274,
+            "57": 9.47112,
+            "58": 9.82925,
+            "59": 9.58253,
+            "60": 9.49121,
+            "61": 9.69956,
+            "62": 9.97968,
+            "63": 9.37277,
+            "64": 9.77468,
+            "65": 8.94236,
+            "66": 9.6991,
+            "67": 9.36382,
+            "68": 9.78787,
+            "69": 9.78332,
+            "70": 9.72266,
+            "71": 9.60801,
+            "72": 9.58459,
+            "73": 9.48963,
+            "74": 8.94871,
+            "75": 9.41912,
+            "76": 9.08725,
+            "77": 10.06354,
+            "78": 9.72835,
+            "79": 9.37162,
+            "80": 9.40077,
+            "81": 9.47843,
+            "82": 9.69177,
+            "83": 9.3076,
+            "84": 9.41232,
+            "85": 9.61207,
+            "86": 9.07599,
+            "87": 9.59468,
+            "88": 9.74738,
+            "89": 9.60686,
+            "90": 9.81015,
+            "91": 9.34359,
+            "92": 9.36482,
+            "93": 9.07761,
+            "94": 8.83108,
+            "95": 9.51716,
+            "96": 9.52447,
+            "97": 9.31027,
+            "98": 9.67892,
+            "99": 8.88832,
+            "100": 9.4015
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1627.0,
+            "2": 1801.0,
+            "3": 1730.0,
+            "4": 1762.0,
+            "5": 2010.0,
+            "6": 1889.0,
+            "7": 1888.0,
+            "8": 1729.0,
+            "9": 1852.0,
+            "10": 1368.0,
+            "11": 1973.0,
+            "12": 1722.0,
+            "13": 1966.0,
+            "14": 1874.0,
+            "15": 1897.0,
+            "16": 1785.0,
+            "17": 1942.0,
+            "18": 1718.0,
+            "19": 1716.0,
+            "20": 1626.0,
+            "21": 1797.0,
+            "22": 1673.0,
+            "23": 1937.0,
+            "24": 1561.0,
+            "25": 1743.0,
+            "26": 1917.0,
+            "27": 1886.0,
+            "28": 1968.0,
+            "29": 2029.0,
+            "30": 1930.0,
+            "31": 1635.0,
+            "32": 1974.0,
+            "33": 2159.0,
+            "34": 2035.0,
+            "35": 1954.0,
+            "36": 1948.0,
+            "37": 2317.0,
+            "38": 2312.0,
+            "39": 2458.0,
+            "40": 2199.0,
+            "41": 2352.0,
+            "42": 2288.0,
+            "43": 2005.0,
+            "44": 2191.0,
+            "45": 2068.0,
+            "46": 2272.0,
+            "47": 2530.0,
+            "48": 2458.0,
+            "49": 2252.0,
+            "50": 2460.0,
+            "51": 2777.0,
+            "52": 2659.0,
+            "53": 2959.0,
+            "54": 2700.0,
+            "55": 2427.0,
+            "56": 2797.0,
+            "57": 2430.0,
+            "58": 3077.0,
+            "59": 2781.0,
+            "60": 2380.0,
+            "61": 2816.0,
+            "62": 2812.0,
+            "63": 2452.0,
+            "64": 2958.0,
+            "65": 2657.0,
+            "66": 3208.0,
+            "67": 2786.0,
+            "68": 2842.0,
+            "69": 2927.0,
+            "70": 3265.0,
+            "71": 3098.0,
+            "72": 2445.0,
+            "73": 3120.0,
+            "74": 1900.0,
+            "75": 2675.0,
+            "76": 3065.0,
+            "77": 3452.0,
+            "78": 3263.0,
+            "79": 3398.0,
+            "80": 3434.0,
+            "81": 3695.0,
+            "82": 3308.0,
+            "83": 2935.0,
+            "84": 3423.0,
+            "85": 3302.0,
+            "86": 2785.0,
+            "87": 3788.0,
+            "88": 3030.0,
+            "89": 3532.0,
+            "90": 3230.0,
+            "91": 2681.0,
+            "92": 3175.0,
+            "93": 2718.0,
+            "94": 3392.0,
+            "95": 3340.0,
+            "96": 3504.0,
+            "97": 3227.0,
+            "98": 3757.0,
+            "99": 3245.0,
+            "100": 3291.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 487096320.0,
+            "2": 487096320.0,
+            "3": 487096320.0,
+            "4": 487096320.0,
+            "5": 487096320.0,
+            "6": 487096320.0,
+            "7": 487096320.0,
+            "8": 487096320.0,
+            "9": 487096320.0,
+            "10": 487096320.0,
+            "11": 487096320.0,
+            "12": 487096320.0,
+            "13": 487096320.0,
+            "14": 487096320.0,
+            "15": 487096320.0,
+            "16": 487096320.0,
+            "17": 487096320.0,
+            "18": 487096320.0,
+            "19": 487096320.0,
+            "20": 487096320.0,
+            "21": 487096320.0,
+            "22": 487096320.0,
+            "23": 487096320.0,
+            "24": 487096320.0,
+            "25": 487096320.0,
+            "26": 487096320.0,
+            "27": 487096320.0,
+            "28": 487096320.0,
+            "29": 487096320.0,
+            "30": 487096320.0,
+            "31": 487096320.0,
+            "32": 487096320.0,
+            "33": 487096320.0,
+            "34": 487096320.0,
+            "35": 487096320.0,
+            "36": 487096320.0,
+            "37": 487096320.0,
+            "38": 487096320.0,
+            "39": 487096320.0,
+            "40": 487096320.0,
+            "41": 487096320.0,
+            "42": 487096320.0,
+            "43": 487096320.0,
+            "44": 487096320.0,
+            "45": 487096320.0,
+            "46": 487096320.0,
+            "47": 487096320.0,
+            "48": 487096320.0,
+            "49": 487096320.0,
+            "50": 487096320.0,
+            "51": 487096320.0,
+            "52": 487096320.0,
+            "53": 487096320.0,
+            "54": 487096320.0,
+            "55": 487096320.0,
+            "56": 487096320.0,
+            "57": 487096320.0,
+            "58": 487096320.0,
+            "59": 487096320.0,
+            "60": 487096320.0,
+            "61": 487096320.0,
+            "62": 487096320.0,
+            "63": 487096320.0,
+            "64": 487096320.0,
+            "65": 487096320.0,
+            "66": 487096320.0,
+            "67": 487096320.0,
+            "68": 487096320.0,
+            "69": 487096320.0,
+            "70": 487096320.0,
+            "71": 487096320.0,
+            "72": 487096320.0,
+            "73": 487096320.0,
+            "74": 487096320.0,
+            "75": 487096320.0,
+            "76": 487096320.0,
+            "77": 487096320.0,
+            "78": 487096320.0,
+            "79": 487096320.0,
+            "80": 487096320.0,
+            "81": 487096320.0,
+            "82": 487096320.0,
+            "83": 487096320.0,
+            "84": 487096320.0,
+            "85": 487096320.0,
+            "86": 487096320.0,
+            "87": 487096320.0,
+            "88": 487096320.0,
+            "89": 487096320.0,
+            "90": 487096320.0,
+            "91": 487096320.0,
+            "92": 487096320.0,
+            "93": 487096320.0,
+            "94": 487096320.0,
+            "95": 487096320.0,
+            "96": 487096320.0,
+            "97": 487096320.0,
+            "98": 487096320.0,
+            "99": 487096320.0,
+            "100": 487096320.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1720084480.0,
+            "2": 1900157952.0,
+            "3": 1900157952.0,
+            "4": 1900157952.0,
+            "5": 1900157952.0,
+            "6": 1900157952.0,
+            "7": 1900157952.0,
+            "8": 1900157952.0,
+            "9": 1900157952.0,
+            "10": 1900157952.0,
+            "11": 1900157952.0,
+            "12": 1900157952.0,
+            "13": 1900157952.0,
+            "14": 1900157952.0,
+            "15": 1900157952.0,
+            "16": 1900157952.0,
+            "17": 1900157952.0,
+            "18": 1900157952.0,
+            "19": 1900157952.0,
+            "20": 1900157952.0,
+            "21": 1900157952.0,
+            "22": 1900157952.0,
+            "23": 1900157952.0,
+            "24": 1900157952.0,
+            "25": 1900157952.0,
+            "26": 1900157952.0,
+            "27": 1900157952.0,
+            "28": 1900157952.0,
+            "29": 1900157952.0,
+            "30": 1900157952.0,
+            "31": 1900157952.0,
+            "32": 1900157952.0,
+            "33": 1900157952.0,
+            "34": 1900157952.0,
+            "35": 1900157952.0,
+            "36": 1900157952.0,
+            "37": 1900157952.0,
+            "38": 1900157952.0,
+            "39": 1900157952.0,
+            "40": 1900157952.0,
+            "41": 1900157952.0,
+            "42": 1900157952.0,
+            "43": 1900157952.0,
+            "44": 1900157952.0,
+            "45": 1900157952.0,
+            "46": 1900157952.0,
+            "47": 1900157952.0,
+            "48": 1900157952.0,
+            "49": 1900157952.0,
+            "50": 1900157952.0,
+            "51": 1900157952.0,
+            "52": 1900157952.0,
+            "53": 1900157952.0,
+            "54": 1900157952.0,
+            "55": 1900157952.0,
+            "56": 1900157952.0,
+            "57": 1900157952.0,
+            "58": 1900157952.0,
+            "59": 1900157952.0,
+            "60": 1900157952.0,
+            "61": 1900157952.0,
+            "62": 1900157952.0,
+            "63": 1900157952.0,
+            "64": 1900157952.0,
+            "65": 1900157952.0,
+            "66": 1900157952.0,
+            "67": 1900157952.0,
+            "68": 1900157952.0,
+            "69": 1900157952.0,
+            "70": 1900157952.0,
+            "71": 1900157952.0,
+            "72": 1900157952.0,
+            "73": 1900157952.0,
+            "74": 1900157952.0,
+            "75": 1900157952.0,
+            "76": 1900157952.0,
+            "77": 1900157952.0,
+            "78": 1900157952.0,
+            "79": 1900157952.0,
+            "80": 1900157952.0,
+            "81": 1900157952.0,
+            "82": 1900157952.0,
+            "83": 1900157952.0,
+            "84": 1900157952.0,
+            "85": 1900157952.0,
+            "86": 1900157952.0,
+            "87": 1900157952.0,
+            "88": 1900157952.0,
+            "89": 1900157952.0,
+            "90": 1900157952.0,
+            "91": 1900157952.0,
+            "92": 1900157952.0,
+            "93": 1900157952.0,
+            "94": 1900157952.0,
+            "95": 1900157952.0,
+            "96": 1900157952.0,
+            "97": 1900157952.0,
+            "98": 1900157952.0,
+            "99": 1900157952.0,
+            "100": 1900157952.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 11.18635,
+            "2": 0.51143,
+            "3": 0.22467,
+            "4": 0.22383,
+            "5": 0.22656,
+            "6": 0.22198,
+            "7": 0.22714,
+            "8": 0.22548,
+            "9": 0.22693,
+            "10": 0.22495,
+            "11": 0.22373,
+            "12": 0.22603,
+            "13": 0.22383,
+            "14": 0.22775,
+            "15": 0.2246,
+            "16": 0.22631,
+            "17": 0.22428,
+            "18": 0.22651,
+            "19": 0.22468,
+            "20": 0.22662,
+            "21": 0.22656,
+            "22": 0.22412,
+            "23": 0.2244,
+            "24": 0.22387,
+            "25": 0.22714,
+            "26": 0.22328,
+            "27": 0.22509,
+            "28": 0.22418,
+            "29": 0.22427,
+            "30": 0.22512,
+            "31": 0.22375,
+            "32": 0.22369,
+            "33": 0.22403,
+            "34": 0.22748,
+            "35": 0.22797,
+            "36": 0.2259,
+            "37": 0.22337,
+            "38": 0.22614,
+            "39": 0.22328,
+            "40": 0.22898,
+            "41": 0.23448,
+            "42": 0.43469,
+            "43": 0.22427,
+            "44": 0.22708,
+            "45": 0.22289,
+            "46": 0.22786,
+            "47": 0.22274,
+            "48": 0.22383,
+            "49": 0.22317,
+            "50": 0.22534,
+            "51": 0.24991,
+            "52": 0.24511,
+            "53": 0.24212,
+            "54": 0.24477,
+            "55": 0.43963,
+            "56": 0.24504,
+            "57": 0.24214,
+            "58": 0.2444,
+            "59": 0.24255,
+            "60": 0.24252,
+            "61": 0.24317,
+            "62": 0.2455,
+            "63": 0.2441,
+            "64": 0.24309,
+            "65": 0.24205,
+            "66": 0.24822,
+            "67": 0.24294,
+            "68": 0.24294,
+            "69": 0.24265,
+            "70": 0.24445,
+            "71": 0.24281,
+            "72": 0.2431,
+            "73": 0.24193,
+            "74": 0.24487,
+            "75": 0.24331,
+            "76": 0.24509,
+            "77": 0.24318,
+            "78": 0.24248,
+            "79": 0.24489,
+            "80": 0.24557,
+            "81": 0.24722,
+            "82": 0.24377,
+            "83": 0.24576,
+            "84": 0.24463,
+            "85": 0.24362,
+            "86": 0.2432,
+            "87": 0.24588,
+            "88": 0.2452,
+            "89": 0.24361,
+            "90": 0.24371,
+            "91": 0.24472,
+            "92": 0.24381,
+            "93": 0.24279,
+            "94": 0.24377,
+            "95": 0.24609,
+            "96": 0.24562,
+            "97": 0.2436,
+            "98": 0.24534,
+            "99": 0.24537,
+            "100": 0.24419
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
new file mode 100644
index 00000000000..4feab32a5b8
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.92705,
+            "2": 10.92645,
+            "3": 10.91604,
+            "4": 10.90911,
+            "5": 10.92795,
+            "6": 10.93626,
+            "7": 10.90626,
+            "8": 10.92128,
+            "9": 10.90998,
+            "10": 10.90786,
+            "11": 10.89335,
+            "12": 10.92456,
+            "13": 10.9146,
+            "14": 10.9213,
+            "15": 10.88314,
+            "16": 10.87325,
+            "17": 10.84129,
+            "18": 10.87276,
+            "19": 10.8563,
+            "20": 10.77629,
+            "21": 10.74869,
+            "22": 10.63031,
+            "23": 10.75678,
+            "24": 10.65646,
+            "25": 10.59141,
+            "26": 10.65375,
+            "27": 10.6485,
+            "28": 10.59548,
+            "29": 10.6088,
+            "30": 10.39192,
+            "31": 10.15753,
+            "32": 10.49098,
+            "33": 10.4793,
+            "34": 10.24058,
+            "35": 10.29686,
+            "36": 10.24644,
+            "37": 10.35232,
+            "38": 10.20489,
+            "39": 10.4052,
+            "40": 10.0964,
+            "41": 10.15175,
+            "42": 10.22026,
+            "43": 9.85499,
+            "44": 9.96143,
+            "45": 9.84464,
+            "46": 9.83801,
+            "47": 10.13988,
+            "48": 9.85718,
+            "49": 9.53698,
+            "50": 9.90918,
+            "51": 9.84886,
+            "52": 9.74154,
+            "53": 10.06347,
+            "54": 9.94683,
+            "55": 9.87762,
+            "56": 9.6274,
+            "57": 9.47112,
+            "58": 9.82925,
+            "59": 9.58253,
+            "60": 9.49121,
+            "61": 9.69956,
+            "62": 9.97968,
+            "63": 9.37277,
+            "64": 9.77468,
+            "65": 8.94236,
+            "66": 9.6991,
+            "67": 9.36382,
+            "68": 9.78787,
+            "69": 9.78332,
+            "70": 9.72266,
+            "71": 9.60801,
+            "72": 9.58459,
+            "73": 9.48963,
+            "74": 8.94871,
+            "75": 9.41912,
+            "76": 9.08725,
+            "77": 10.06354,
+            "78": 9.72835,
+            "79": 9.37162,
+            "80": 9.40077,
+            "81": 9.47843,
+            "82": 9.69177,
+            "83": 9.3076,
+            "84": 9.41232,
+            "85": 9.61207,
+            "86": 9.07599,
+            "87": 9.59468,
+            "88": 9.74738,
+            "89": 9.60686,
+            "90": 9.81015,
+            "91": 9.34359,
+            "92": 9.36482,
+            "93": 9.07761,
+            "94": 8.83108,
+            "95": 9.51716,
+            "96": 9.52447,
+            "97": 9.31027,
+            "98": 9.67892,
+            "99": 8.88832,
+            "100": 9.4015
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1627.0,
+            "2": 1801.0,
+            "3": 1730.0,
+            "4": 1762.0,
+            "5": 2010.0,
+            "6": 1889.0,
+            "7": 1888.0,
+            "8": 1729.0,
+            "9": 1852.0,
+            "10": 1368.0,
+            "11": 1973.0,
+            "12": 1722.0,
+            "13": 1966.0,
+            "14": 1874.0,
+            "15": 1897.0,
+            "16": 1785.0,
+            "17": 1942.0,
+            "18": 1718.0,
+            "19": 1716.0,
+            "20": 1626.0,
+            "21": 1797.0,
+            "22": 1673.0,
+            "23": 1937.0,
+            "24": 1561.0,
+            "25": 1743.0,
+            "26": 1917.0,
+            "27": 1886.0,
+            "28": 1968.0,
+            "29": 2029.0,
+            "30": 1930.0,
+            "31": 1635.0,
+            "32": 1974.0,
+            "33": 2159.0,
+            "34": 2035.0,
+            "35": 1954.0,
+            "36": 1948.0,
+            "37": 2317.0,
+            "38": 2312.0,
+            "39": 2458.0,
+            "40": 2199.0,
+            "41": 2352.0,
+            "42": 2288.0,
+            "43": 2005.0,
+            "44": 2191.0,
+            "45": 2068.0,
+            "46": 2272.0,
+            "47": 2530.0,
+            "48": 2458.0,
+            "49": 2252.0,
+            "50": 2460.0,
+            "51": 2777.0,
+            "52": 2659.0,
+            "53": 2959.0,
+            "54": 2700.0,
+            "55": 2427.0,
+            "56": 2797.0,
+            "57": 2430.0,
+            "58": 3077.0,
+            "59": 2781.0,
+            "60": 2380.0,
+            "61": 2816.0,
+            "62": 2812.0,
+            "63": 2452.0,
+            "64": 2958.0,
+            "65": 2657.0,
+            "66": 3208.0,
+            "67": 2786.0,
+            "68": 2842.0,
+            "69": 2927.0,
+            "70": 3265.0,
+            "71": 3098.0,
+            "72": 2445.0,
+            "73": 3120.0,
+            "74": 1900.0,
+            "75": 2675.0,
+            "76": 3065.0,
+            "77": 3452.0,
+            "78": 3263.0,
+            "79": 3398.0,
+            "80": 3434.0,
+            "81": 3695.0,
+            "82": 3308.0,
+            "83": 2935.0,
+            "84": 3423.0,
+            "85": 3302.0,
+            "86": 2785.0,
+            "87": 3788.0,
+            "88": 3030.0,
+            "89": 3532.0,
+            "90": 3230.0,
+            "91": 2681.0,
+            "92": 3175.0,
+            "93": 2718.0,
+            "94": 3392.0,
+            "95": 3340.0,
+            "96": 3504.0,
+            "97": 3227.0,
+            "98": 3757.0,
+            "99": 3245.0,
+            "100": 3291.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 482498560.0,
+            "2": 482498560.0,
+            "3": 482498560.0,
+            "4": 482498560.0,
+            "5": 482498560.0,
+            "6": 482498560.0,
+            "7": 482498560.0,
+            "8": 482498560.0,
+            "9": 482498560.0,
+            "10": 482498560.0,
+            "11": 482498560.0,
+            "12": 482498560.0,
+            "13": 482498560.0,
+            "14": 482498560.0,
+            "15": 482498560.0,
+            "16": 482498560.0,
+            "17": 482498560.0,
+            "18": 482498560.0,
+            "19": 482498560.0,
+            "20": 482498560.0,
+            "21": 482498560.0,
+            "22": 482498560.0,
+            "23": 482498560.0,
+            "24": 482498560.0,
+            "25": 482498560.0,
+            "26": 482498560.0,
+            "27": 482498560.0,
+            "28": 482498560.0,
+            "29": 482498560.0,
+            "30": 482498560.0,
+            "31": 482498560.0,
+            "32": 482498560.0,
+            "33": 482498560.0,
+            "34": 482498560.0,
+            "35": 482498560.0,
+            "36": 482498560.0,
+            "37": 482498560.0,
+            "38": 482498560.0,
+            "39": 482498560.0,
+            "40": 482498560.0,
+            "41": 482498560.0,
+            "42": 482498560.0,
+            "43": 482498560.0,
+            "44": 482498560.0,
+            "45": 482498560.0,
+            "46": 482498560.0,
+            "47": 482498560.0,
+            "48": 482498560.0,
+            "49": 482498560.0,
+            "50": 482498560.0,
+            "51": 482498560.0,
+            "52": 482498560.0,
+            "53": 482498560.0,
+            "54": 482498560.0,
+            "55": 482498560.0,
+            "56": 482498560.0,
+            "57": 482498560.0,
+            "58": 482498560.0,
+            "59": 482498560.0,
+            "60": 482498560.0,
+            "61": 482498560.0,
+            "62": 482498560.0,
+            "63": 482498560.0,
+            "64": 482498560.0,
+            "65": 482498560.0,
+            "66": 482498560.0,
+            "67": 482498560.0,
+            "68": 482498560.0,
+            "69": 482498560.0,
+            "70": 482498560.0,
+            "71": 482498560.0,
+            "72": 482498560.0,
+            "73": 482498560.0,
+            "74": 482498560.0,
+            "75": 482498560.0,
+            "76": 482498560.0,
+            "77": 482498560.0,
+            "78": 482498560.0,
+            "79": 482498560.0,
+            "80": 482498560.0,
+            "81": 482498560.0,
+            "82": 482498560.0,
+            "83": 482498560.0,
+            "84": 482498560.0,
+            "85": 482498560.0,
+            "86": 482498560.0,
+            "87": 482498560.0,
+            "88": 482498560.0,
+            "89": 482498560.0,
+            "90": 482498560.0,
+            "91": 482498560.0,
+            "92": 482498560.0,
+            "93": 482498560.0,
+            "94": 482498560.0,
+            "95": 482498560.0,
+            "96": 482498560.0,
+            "97": 482498560.0,
+            "98": 482498560.0,
+            "99": 482498560.0,
+            "100": 482498560.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1712340992.0,
+            "2": 1891365888.0,
+            "3": 1891365888.0,
+            "4": 1891365888.0,
+            "5": 1891365888.0,
+            "6": 1891365888.0,
+            "7": 1891365888.0,
+            "8": 1891365888.0,
+            "9": 1891365888.0,
+            "10": 1891365888.0,
+            "11": 1891365888.0,
+            "12": 1891365888.0,
+            "13": 1891365888.0,
+            "14": 1891365888.0,
+            "15": 1891365888.0,
+            "16": 1891365888.0,
+            "17": 1891365888.0,
+            "18": 1891365888.0,
+            "19": 1891365888.0,
+            "20": 1891365888.0,
+            "21": 1891365888.0,
+            "22": 1891365888.0,
+            "23": 1891365888.0,
+            "24": 1891365888.0,
+            "25": 1891365888.0,
+            "26": 1891365888.0,
+            "27": 1891365888.0,
+            "28": 1891365888.0,
+            "29": 1891365888.0,
+            "30": 1891365888.0,
+            "31": 1891365888.0,
+            "32": 1891365888.0,
+            "33": 1891365888.0,
+            "34": 1891365888.0,
+            "35": 1891365888.0,
+            "36": 1891365888.0,
+            "37": 1891365888.0,
+            "38": 1891365888.0,
+            "39": 1891365888.0,
+            "40": 1891365888.0,
+            "41": 1891365888.0,
+            "42": 1891365888.0,
+            "43": 1891365888.0,
+            "44": 1891365888.0,
+            "45": 1891365888.0,
+            "46": 1891365888.0,
+            "47": 1891365888.0,
+            "48": 1891365888.0,
+            "49": 1891365888.0,
+            "50": 1891365888.0,
+            "51": 1891365888.0,
+            "52": 1891365888.0,
+            "53": 1891365888.0,
+            "54": 1891365888.0,
+            "55": 1891365888.0,
+            "56": 1891365888.0,
+            "57": 1891365888.0,
+            "58": 1891365888.0,
+            "59": 1891365888.0,
+            "60": 1891365888.0,
+            "61": 1891365888.0,
+            "62": 1891365888.0,
+            "63": 1891365888.0,
+            "64": 1891365888.0,
+            "65": 1891365888.0,
+            "66": 1891365888.0,
+            "67": 1891365888.0,
+            "68": 1891365888.0,
+            "69": 1891365888.0,
+            "70": 1891365888.0,
+            "71": 1891365888.0,
+            "72": 1891365888.0,
+            "73": 1891365888.0,
+            "74": 1891365888.0,
+            "75": 1891365888.0,
+            "76": 1891365888.0,
+            "77": 1891365888.0,
+            "78": 1891365888.0,
+            "79": 1891365888.0,
+            "80": 1891365888.0,
+            "81": 1891365888.0,
+            "82": 1891365888.0,
+            "83": 1891365888.0,
+            "84": 1891365888.0,
+            "85": 1891365888.0,
+            "86": 1891365888.0,
+            "87": 1891365888.0,
+            "88": 1891365888.0,
+            "89": 1891365888.0,
+            "90": 1891365888.0,
+            "91": 1891365888.0,
+            "92": 1891365888.0,
+            "93": 1891365888.0,
+            "94": 1891365888.0,
+            "95": 1891365888.0,
+            "96": 1891365888.0,
+            "97": 1891365888.0,
+            "98": 1891365888.0,
+            "99": 1891365888.0,
+            "100": 1891365888.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 13.54319,
+            "2": 0.26722,
+            "3": 0.22179,
+            "4": 0.22153,
+            "5": 0.22721,
+            "6": 0.22318,
+            "7": 0.22305,
+            "8": 0.26638,
+            "9": 0.25699,
+            "10": 0.22617,
+            "11": 0.22964,
+            "12": 0.22917,
+            "13": 0.22422,
+            "14": 0.22513,
+            "15": 0.22324,
+            "16": 0.22185,
+            "17": 0.2209,
+            "18": 0.229,
+            "19": 0.22105,
+            "20": 0.22048,
+            "21": 0.22339,
+            "22": 0.22351,
+            "23": 0.22154,
+            "24": 0.22155,
+            "25": 0.22184,
+            "26": 0.22048,
+            "27": 0.22559,
+            "28": 0.22037,
+            "29": 0.22036,
+            "30": 0.2223,
+            "31": 0.22392,
+            "32": 0.22147,
+            "33": 0.22201,
+            "34": 0.21977,
+            "35": 0.22008,
+            "36": 0.22582,
+            "37": 0.21924,
+            "38": 0.22002,
+            "39": 0.22005,
+            "40": 0.22002,
+            "41": 0.22508,
+            "42": 0.21887,
+            "43": 0.21999,
+            "44": 0.21904,
+            "45": 0.22339,
+            "46": 0.21983,
+            "47": 0.21914,
+            "48": 0.21981,
+            "49": 0.22038,
+            "50": 0.22179,
+            "51": 0.44158,
+            "52": 0.22072,
+            "53": 0.2216,
+            "54": 0.21972,
+            "55": 0.2224,
+            "56": 0.21985,
+            "57": 0.21947,
+            "58": 0.22049,
+            "59": 0.22101,
+            "60": 0.41998,
+            "61": 0.22036,
+            "62": 0.22068,
+            "63": 0.223,
+            "64": 0.2206,
+            "65": 0.21966,
+            "66": 0.22032,
+            "67": 0.22009,
+            "68": 0.22359,
+            "69": 0.21962,
+            "70": 0.21951,
+            "71": 0.21979,
+            "72": 0.22305,
+            "73": 0.22044,
+            "74": 0.21963,
+            "75": 0.21954,
+            "76": 0.22086,
+            "77": 0.22567,
+            "78": 0.21994,
+            "79": 0.21942,
+            "80": 0.21927,
+            "81": 0.22743,
+            "82": 0.21995,
+            "83": 0.21975,
+            "84": 0.2199,
+            "85": 0.22001,
+            "86": 0.22586,
+            "87": 0.22037,
+            "88": 0.21916,
+            "89": 0.22024,
+            "90": 0.22297,
+            "91": 0.22249,
+            "92": 0.21959,
+            "93": 0.21938,
+            "94": 0.22092,
+            "95": 0.2253,
+            "96": 0.21981,
+            "97": 0.21968,
+            "98": 0.22037,
+            "99": 0.22237,
+            "100": 0.22281
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
new file mode 100644
index 00000000000..8ac6c3744df
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.92705,
+            "2": 10.92645,
+            "3": 10.91604,
+            "4": 10.90911,
+            "5": 10.92795,
+            "6": 10.93626,
+            "7": 10.90626,
+            "8": 10.92128,
+            "9": 10.90998,
+            "10": 10.90786,
+            "11": 10.89335,
+            "12": 10.92456,
+            "13": 10.9146,
+            "14": 10.9213,
+            "15": 10.88314,
+            "16": 10.87325,
+            "17": 10.84129,
+            "18": 10.87276,
+            "19": 10.8563,
+            "20": 10.77629,
+            "21": 10.74869,
+            "22": 10.63031,
+            "23": 10.75678,
+            "24": 10.65646,
+            "25": 10.59141,
+            "26": 10.65375,
+            "27": 10.6485,
+            "28": 10.59548,
+            "29": 10.6088,
+            "30": 10.39192,
+            "31": 10.15753,
+            "32": 10.49098,
+            "33": 10.4793,
+            "34": 10.24058,
+            "35": 10.29686,
+            "36": 10.24644,
+            "37": 10.35232,
+            "38": 10.20489,
+            "39": 10.4052,
+            "40": 10.0964,
+            "41": 10.15175,
+            "42": 10.22026,
+            "43": 9.85499,
+            "44": 9.96143,
+            "45": 9.84464,
+            "46": 9.83801,
+            "47": 10.13988,
+            "48": 9.85718,
+            "49": 9.53698,
+            "50": 9.90918,
+            "51": 9.84886,
+            "52": 9.74154,
+            "53": 10.06347,
+            "54": 9.94683,
+            "55": 9.87762,
+            "56": 9.6274,
+            "57": 9.47112,
+            "58": 9.82925,
+            "59": 9.58253,
+            "60": 9.49121,
+            "61": 9.69956,
+            "62": 9.97968,
+            "63": 9.37277,
+            "64": 9.77468,
+            "65": 8.94236,
+            "66": 9.6991,
+            "67": 9.36382,
+            "68": 9.78787,
+            "69": 9.78332,
+            "70": 9.72266,
+            "71": 9.60801,
+            "72": 9.58459,
+            "73": 9.48963,
+            "74": 8.94871,
+            "75": 9.41912,
+            "76": 9.08725,
+            "77": 10.06354,
+            "78": 9.72835,
+            "79": 9.37162,
+            "80": 9.40077,
+            "81": 9.47843,
+            "82": 9.69177,
+            "83": 9.3076,
+            "84": 9.41232,
+            "85": 9.61207,
+            "86": 9.07599,
+            "87": 9.59468,
+            "88": 9.74738,
+            "89": 9.60686,
+            "90": 9.81015,
+            "91": 9.34359,
+            "92": 9.36482,
+            "93": 9.07761,
+            "94": 8.83108,
+            "95": 9.51716,
+            "96": 9.52447,
+            "97": 9.31027,
+            "98": 9.67892,
+            "99": 8.88832,
+            "100": 9.4015
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1627.0,
+            "2": 1801.0,
+            "3": 1730.0,
+            "4": 1762.0,
+            "5": 2010.0,
+            "6": 1889.0,
+            "7": 1888.0,
+            "8": 1729.0,
+            "9": 1852.0,
+            "10": 1368.0,
+            "11": 1973.0,
+            "12": 1722.0,
+            "13": 1966.0,
+            "14": 1874.0,
+            "15": 1897.0,
+            "16": 1785.0,
+            "17": 1942.0,
+            "18": 1718.0,
+            "19": 1716.0,
+            "20": 1626.0,
+            "21": 1797.0,
+            "22": 1673.0,
+            "23": 1937.0,
+            "24": 1561.0,
+            "25": 1743.0,
+            "26": 1917.0,
+            "27": 1886.0,
+            "28": 1968.0,
+            "29": 2029.0,
+            "30": 1930.0,
+            "31": 1635.0,
+            "32": 1974.0,
+            "33": 2159.0,
+            "34": 2035.0,
+            "35": 1954.0,
+            "36": 1948.0,
+            "37": 2317.0,
+            "38": 2312.0,
+            "39": 2458.0,
+            "40": 2199.0,
+            "41": 2352.0,
+            "42": 2288.0,
+            "43": 2005.0,
+            "44": 2191.0,
+            "45": 2068.0,
+            "46": 2272.0,
+            "47": 2530.0,
+            "48": 2458.0,
+            "49": 2252.0,
+            "50": 2460.0,
+            "51": 2777.0,
+            "52": 2659.0,
+            "53": 2959.0,
+            "54": 2700.0,
+            "55": 2427.0,
+            "56": 2797.0,
+            "57": 2430.0,
+            "58": 3077.0,
+            "59": 2781.0,
+            "60": 2380.0,
+            "61": 2816.0,
+            "62": 2812.0,
+            "63": 2452.0,
+            "64": 2958.0,
+            "65": 2657.0,
+            "66": 3208.0,
+            "67": 2786.0,
+            "68": 2842.0,
+            "69": 2927.0,
+            "70": 3265.0,
+            "71": 3098.0,
+            "72": 2445.0,
+            "73": 3120.0,
+            "74": 1900.0,
+            "75": 2675.0,
+            "76": 3065.0,
+            "77": 3452.0,
+            "78": 3263.0,
+            "79": 3398.0,
+            "80": 3434.0,
+            "81": 3695.0,
+            "82": 3308.0,
+            "83": 2935.0,
+            "84": 3423.0,
+            "85": 3302.0,
+            "86": 2785.0,
+            "87": 3788.0,
+            "88": 3030.0,
+            "89": 3532.0,
+            "90": 3230.0,
+            "91": 2681.0,
+            "92": 3175.0,
+            "93": 2718.0,
+            "94": 3392.0,
+            "95": 3340.0,
+            "96": 3504.0,
+            "97": 3227.0,
+            "98": 3757.0,
+            "99": 3245.0,
+            "100": 3291.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 482498560.0,
+            "2": 482498560.0,
+            "3": 482498560.0,
+            "4": 482498560.0,
+            "5": 482498560.0,
+            "6": 482498560.0,
+            "7": 482498560.0,
+            "8": 482498560.0,
+            "9": 482498560.0,
+            "10": 482498560.0,
+            "11": 482498560.0,
+            "12": 482498560.0,
+            "13": 482498560.0,
+            "14": 482498560.0,
+            "15": 482498560.0,
+            "16": 482498560.0,
+            "17": 482498560.0,
+            "18": 482498560.0,
+            "19": 482498560.0,
+            "20": 482498560.0,
+            "21": 482498560.0,
+            "22": 482498560.0,
+            "23": 482498560.0,
+            "24": 482498560.0,
+            "25": 482498560.0,
+            "26": 482498560.0,
+            "27": 482498560.0,
+            "28": 482498560.0,
+            "29": 482498560.0,
+            "30": 482498560.0,
+            "31": 482498560.0,
+            "32": 482498560.0,
+            "33": 482498560.0,
+            "34": 482498560.0,
+            "35": 482498560.0,
+            "36": 482498560.0,
+            "37": 482498560.0,
+            "38": 482498560.0,
+            "39": 482498560.0,
+            "40": 482498560.0,
+            "41": 482498560.0,
+            "42": 482498560.0,
+            "43": 482498560.0,
+            "44": 482498560.0,
+            "45": 482498560.0,
+            "46": 482498560.0,
+            "47": 482498560.0,
+            "48": 482498560.0,
+            "49": 482498560.0,
+            "50": 482498560.0,
+            "51": 482498560.0,
+            "52": 482498560.0,
+            "53": 482498560.0,
+            "54": 482498560.0,
+            "55": 482498560.0,
+            "56": 482498560.0,
+            "57": 482498560.0,
+            "58": 482498560.0,
+            "59": 482498560.0,
+            "60": 482498560.0,
+            "61": 482498560.0,
+            "62": 482498560.0,
+            "63": 482498560.0,
+            "64": 482498560.0,
+            "65": 482498560.0,
+            "66": 482498560.0,
+            "67": 482498560.0,
+            "68": 482498560.0,
+            "69": 482498560.0,
+            "70": 482498560.0,
+            "71": 482498560.0,
+            "72": 482498560.0,
+            "73": 482498560.0,
+            "74": 482498560.0,
+            "75": 482498560.0,
+            "76": 482498560.0,
+            "77": 482498560.0,
+            "78": 482498560.0,
+            "79": 482498560.0,
+            "80": 482498560.0,
+            "81": 482498560.0,
+            "82": 482498560.0,
+            "83": 482498560.0,
+            "84": 482498560.0,
+            "85": 482498560.0,
+            "86": 482498560.0,
+            "87": 482498560.0,
+            "88": 482498560.0,
+            "89": 482498560.0,
+            "90": 482498560.0,
+            "91": 482498560.0,
+            "92": 482498560.0,
+            "93": 482498560.0,
+            "94": 482498560.0,
+            "95": 482498560.0,
+            "96": 482498560.0,
+            "97": 482498560.0,
+            "98": 482498560.0,
+            "99": 482498560.0,
+            "100": 482498560.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1712340992.0,
+            "2": 1891365888.0,
+            "3": 1891365888.0,
+            "4": 1891365888.0,
+            "5": 1891365888.0,
+            "6": 1891365888.0,
+            "7": 1891365888.0,
+            "8": 1891365888.0,
+            "9": 1891365888.0,
+            "10": 1891365888.0,
+            "11": 1891365888.0,
+            "12": 1891365888.0,
+            "13": 1891365888.0,
+            "14": 1891365888.0,
+            "15": 1891365888.0,
+            "16": 1891365888.0,
+            "17": 1891365888.0,
+            "18": 1891365888.0,
+            "19": 1891365888.0,
+            "20": 1891365888.0,
+            "21": 1891365888.0,
+            "22": 1891365888.0,
+            "23": 1891365888.0,
+            "24": 1891365888.0,
+            "25": 1891365888.0,
+            "26": 1891365888.0,
+            "27": 1891365888.0,
+            "28": 1891365888.0,
+            "29": 1891365888.0,
+            "30": 1891365888.0,
+            "31": 1891365888.0,
+            "32": 1891365888.0,
+            "33": 1891365888.0,
+            "34": 1891365888.0,
+            "35": 1891365888.0,
+            "36": 1891365888.0,
+            "37": 1891365888.0,
+            "38": 1891365888.0,
+            "39": 1891365888.0,
+            "40": 1891365888.0,
+            "41": 1891365888.0,
+            "42": 1891365888.0,
+            "43": 1891365888.0,
+            "44": 1891365888.0,
+            "45": 1891365888.0,
+            "46": 1891365888.0,
+            "47": 1891365888.0,
+            "48": 1891365888.0,
+            "49": 1891365888.0,
+            "50": 1891365888.0,
+            "51": 1891365888.0,
+            "52": 1891365888.0,
+            "53": 1891365888.0,
+            "54": 1891365888.0,
+            "55": 1891365888.0,
+            "56": 1891365888.0,
+            "57": 1891365888.0,
+            "58": 1891365888.0,
+            "59": 1891365888.0,
+            "60": 1891365888.0,
+            "61": 1891365888.0,
+            "62": 1891365888.0,
+            "63": 1891365888.0,
+            "64": 1891365888.0,
+            "65": 1891365888.0,
+            "66": 1891365888.0,
+            "67": 1891365888.0,
+            "68": 1891365888.0,
+            "69": 1891365888.0,
+            "70": 1891365888.0,
+            "71": 1891365888.0,
+            "72": 1891365888.0,
+            "73": 1891365888.0,
+            "74": 1891365888.0,
+            "75": 1891365888.0,
+            "76": 1891365888.0,
+            "77": 1891365888.0,
+            "78": 1891365888.0,
+            "79": 1891365888.0,
+            "80": 1891365888.0,
+            "81": 1891365888.0,
+            "82": 1891365888.0,
+            "83": 1891365888.0,
+            "84": 1891365888.0,
+            "85": 1891365888.0,
+            "86": 1891365888.0,
+            "87": 1891365888.0,
+            "88": 1891365888.0,
+            "89": 1891365888.0,
+            "90": 1891365888.0,
+            "91": 1891365888.0,
+            "92": 1891365888.0,
+            "93": 1891365888.0,
+            "94": 1891365888.0,
+            "95": 1891365888.0,
+            "96": 1891365888.0,
+            "97": 1891365888.0,
+            "98": 1891365888.0,
+            "99": 1891365888.0,
+            "100": 1891365888.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 14.02291,
+            "2": 0.25698,
+            "3": 0.22494,
+            "4": 0.22549,
+            "5": 0.22123,
+            "6": 0.22199,
+            "7": 0.22201,
+            "8": 0.22481,
+            "9": 0.22513,
+            "10": 0.22241,
+            "11": 0.22332,
+            "12": 0.22223,
+            "13": 0.22628,
+            "14": 0.22248,
+            "15": 0.22165,
+            "16": 0.22121,
+            "17": 0.224,
+            "18": 0.22329,
+            "19": 0.22788,
+            "20": 0.22088,
+            "21": 0.22171,
+            "22": 0.2267,
+            "23": 0.2231,
+            "24": 0.22082,
+            "25": 0.22278,
+            "26": 0.22362,
+            "27": 0.22127,
+            "28": 0.22083,
+            "29": 0.22007,
+            "30": 0.22168,
+            "31": 0.22562,
+            "32": 0.22252,
+            "33": 0.22134,
+            "34": 0.22034,
+            "35": 0.22446,
+            "36": 0.22435,
+            "37": 0.21955,
+            "38": 0.22888,
+            "39": 0.22007,
+            "40": 0.22467,
+            "41": 0.22235,
+            "42": 0.22037,
+            "43": 0.21987,
+            "44": 0.22161,
+            "45": 0.22407,
+            "46": 0.21928,
+            "47": 0.21937,
+            "48": 0.22055,
+            "49": 0.22041,
+            "50": 0.21825,
+            "51": 0.23094,
+            "52": 0.22395,
+            "53": 0.22444,
+            "54": 0.22304,
+            "55": 0.22247,
+            "56": 0.22274,
+            "57": 0.22315,
+            "58": 0.22428,
+            "59": 0.22249,
+            "60": 0.22237,
+            "61": 0.22311,
+            "62": 0.2253,
+            "63": 0.22199,
+            "64": 0.22192,
+            "65": 0.22225,
+            "66": 0.22273,
+            "67": 0.22186,
+            "68": 0.22015,
+            "69": 0.22083,
+            "70": 0.22201,
+            "71": 0.22474,
+            "72": 0.22079,
+            "73": 0.22118,
+            "74": 0.22105,
+            "75": 0.22105,
+            "76": 0.22207,
+            "77": 0.22072,
+            "78": 0.22157,
+            "79": 0.22114,
+            "80": 0.22667,
+            "81": 0.22112,
+            "82": 0.22055,
+            "83": 0.22095,
+            "84": 0.22242,
+            "85": 0.22302,
+            "86": 0.22037,
+            "87": 0.22095,
+            "88": 0.22048,
+            "89": 0.22998,
+            "90": 0.22099,
+            "91": 0.22067,
+            "92": 0.2202,
+            "93": 0.22164,
+            "94": 0.22306,
+            "95": 0.22015,
+            "96": 0.22081,
+            "97": 0.22074,
+            "98": 0.22695,
+            "99": 0.22087,
+            "100": 0.22052
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
index aa3c5f5d2a9..b052742de3f 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
@@ -2,141 +2,536 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 10.85949,
+            "2": 10.85553,
+            "3": 10.86546,
+            "4": 10.84554,
             "5": 10.88348,
+            "6": 10.89432,
+            "7": 10.87067,
+            "8": 10.86981,
+            "9": 10.86919,
             "10": 10.83887,
+            "11": 10.89435,
+            "12": 10.87982,
+            "13": 10.87988,
+            "14": 10.90314,
             "15": 10.8405,
+            "16": 10.83786,
+            "17": 10.80668,
+            "18": 10.83027,
+            "19": 10.82259,
             "20": 10.73192,
+            "21": 10.70753,
+            "22": 10.56005,
+            "23": 10.72402,
+            "24": 10.6111,
             "25": 10.54815,
+            "26": 10.61332,
+            "27": 10.63056,
+            "28": 10.56645,
+            "29": 10.59668,
             "30": 10.37137,
+            "31": 10.1172,
+            "32": 10.4613,
+            "33": 10.45249,
+            "34": 10.2169,
             "35": 10.27173,
+            "36": 10.23118,
+            "37": 10.34812,
+            "38": 10.1884,
+            "39": 10.41042,
             "40": 10.09426,
+            "41": 10.1471,
+            "42": 10.21243,
+            "43": 9.8411,
+            "44": 9.95916,
             "45": 9.84085,
+            "46": 9.8248,
+            "47": 10.1388,
+            "48": 9.8584,
+            "49": 9.5472,
             "50": 9.90878,
+            "51": 9.85583,
+            "52": 9.75242,
+            "53": 10.07589,
+            "54": 9.95688,
             "55": 9.88208,
+            "56": 9.63141,
+            "57": 9.48651,
+            "58": 9.83118,
+            "59": 9.58905,
             "60": 9.50651,
+            "61": 9.7037,
+            "62": 9.98291,
+            "63": 9.38315,
+            "64": 9.77906,
             "65": 8.95179,
+            "66": 9.7016,
+            "67": 9.37206,
+            "68": 9.78852,
+            "69": 9.79859,
             "70": 9.74746,
+            "71": 9.6191,
+            "72": 9.58502,
+            "73": 9.49725,
+            "74": 8.93933,
             "75": 9.42706,
+            "76": 9.08024,
+            "77": 10.06571,
+            "78": 9.72896,
+            "79": 9.37772,
             "80": 9.40999,
+            "81": 9.47983,
+            "82": 9.70184,
+            "83": 9.30625,
+            "84": 9.42095,
             "85": 9.61378,
+            "86": 9.07656,
+            "87": 9.59458,
+            "88": 9.75068,
+            "89": 9.60243,
             "90": 9.81901,
+            "91": 9.33899,
+            "92": 9.35717,
+            "93": 9.07883,
+            "94": 8.8351,
             "95": 9.52171,
+            "96": 9.53008,
+            "97": 9.31309,
+            "98": 9.67785,
+            "99": 8.89061,
             "100": 9.39726
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 1638.0,
+            "2": 1766.0,
+            "3": 1620.0,
+            "4": 1810.0,
             "5": 1844.0,
+            "6": 1835.0,
+            "7": 1694.0,
+            "8": 1632.0,
+            "9": 1902.0,
             "10": 1427.0,
+            "11": 1932.0,
+            "12": 1705.0,
+            "13": 1834.0,
+            "14": 1807.0,
             "15": 1907.0,
+            "16": 1797.0,
+            "17": 1911.0,
+            "18": 1667.0,
+            "19": 1742.0,
             "20": 1662.0,
+            "21": 1853.0,
+            "22": 1621.0,
+            "23": 2010.0,
+            "24": 1546.0,
             "25": 1510.0,
+            "26": 1664.0,
+            "27": 1722.0,
+            "28": 1977.0,
+            "29": 2024.0,
             "30": 1873.0,
+            "31": 1494.0,
+            "32": 1890.0,
+            "33": 2067.0,
+            "34": 1802.0,
             "35": 1873.0,
+            "36": 1954.0,
+            "37": 2283.0,
+            "38": 2076.0,
+            "39": 2280.0,
             "40": 2111.0,
+            "41": 2318.0,
+            "42": 2206.0,
+            "43": 2040.0,
+            "44": 2088.0,
             "45": 2181.0,
+            "46": 2434.0,
+            "47": 2446.0,
+            "48": 2481.0,
+            "49": 2398.0,
             "50": 2410.0,
+            "51": 2528.0,
+            "52": 2535.0,
+            "53": 2875.0,
+            "54": 2862.0,
             "55": 2406.0,
+            "56": 2733.0,
+            "57": 2347.0,
+            "58": 2918.0,
+            "59": 2759.0,
             "60": 2404.0,
+            "61": 3022.0,
+            "62": 2494.0,
+            "63": 2452.0,
+            "64": 2838.0,
             "65": 2549.0,
+            "66": 3044.0,
+            "67": 2887.0,
+            "68": 2637.0,
+            "69": 2860.0,
             "70": 3034.0,
+            "71": 2989.0,
+            "72": 2355.0,
+            "73": 3034.0,
+            "74": 1904.0,
             "75": 2538.0,
+            "76": 3012.0,
+            "77": 3193.0,
+            "78": 2994.0,
+            "79": 3097.0,
             "80": 3254.0,
+            "81": 3671.0,
+            "82": 3299.0,
+            "83": 2793.0,
+            "84": 3146.0,
             "85": 3329.0,
+            "86": 2769.0,
+            "87": 3766.0,
+            "88": 3021.0,
+            "89": 3286.0,
             "90": 3029.0,
+            "91": 2772.0,
+            "92": 2955.0,
+            "93": 2852.0,
+            "94": 3411.0,
             "95": 3271.0,
+            "96": 3279.0,
+            "97": 3054.0,
+            "98": 3643.0,
+            "99": 3303.0,
             "100": 3142.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 518291968.0,
+            "2": 518291968.0,
+            "3": 518291968.0,
+            "4": 518291968.0,
             "5": 518291968.0,
+            "6": 518291968.0,
+            "7": 518291968.0,
+            "8": 518291968.0,
+            "9": 518291968.0,
             "10": 518291968.0,
+            "11": 518291968.0,
+            "12": 518291968.0,
+            "13": 518291968.0,
+            "14": 518291968.0,
             "15": 518291968.0,
+            "16": 518291968.0,
+            "17": 518291968.0,
+            "18": 518291968.0,
+            "19": 518291968.0,
             "20": 518291968.0,
+            "21": 518291968.0,
+            "22": 518291968.0,
+            "23": 518291968.0,
+            "24": 518291968.0,
             "25": 518291968.0,
+            "26": 518291968.0,
+            "27": 518291968.0,
+            "28": 518291968.0,
+            "29": 518291968.0,
             "30": 518291968.0,
+            "31": 518291968.0,
+            "32": 518291968.0,
+            "33": 518291968.0,
+            "34": 518291968.0,
             "35": 518291968.0,
+            "36": 518291968.0,
+            "37": 518291968.0,
+            "38": 518291968.0,
+            "39": 518291968.0,
             "40": 518291968.0,
+            "41": 518291968.0,
+            "42": 518291968.0,
+            "43": 518291968.0,
+            "44": 518291968.0,
             "45": 518291968.0,
+            "46": 518291968.0,
+            "47": 518291968.0,
+            "48": 518291968.0,
+            "49": 518291968.0,
             "50": 518291968.0,
+            "51": 518291968.0,
+            "52": 518291968.0,
+            "53": 518291968.0,
+            "54": 518291968.0,
             "55": 518291968.0,
+            "56": 518291968.0,
+            "57": 518291968.0,
+            "58": 518291968.0,
+            "59": 518291968.0,
             "60": 518291968.0,
+            "61": 518291968.0,
+            "62": 518291968.0,
+            "63": 518291968.0,
+            "64": 518291968.0,
             "65": 518291968.0,
+            "66": 518291968.0,
+            "67": 518291968.0,
+            "68": 518291968.0,
+            "69": 518291968.0,
             "70": 518291968.0,
+            "71": 518291968.0,
+            "72": 518291968.0,
+            "73": 518291968.0,
+            "74": 518291968.0,
             "75": 518291968.0,
+            "76": 518291968.0,
+            "77": 518291968.0,
+            "78": 518291968.0,
+            "79": 518291968.0,
             "80": 518291968.0,
+            "81": 518291968.0,
+            "82": 518291968.0,
+            "83": 518291968.0,
+            "84": 518291968.0,
             "85": 518291968.0,
+            "86": 518291968.0,
+            "87": 518291968.0,
+            "88": 518291968.0,
+            "89": 518291968.0,
             "90": 518291968.0,
+            "91": 518291968.0,
+            "92": 518291968.0,
+            "93": 518291968.0,
+            "94": 518291968.0,
             "95": 518291968.0,
+            "96": 518291968.0,
+            "97": 518291968.0,
+            "98": 518291968.0,
+            "99": 518291968.0,
             "100": 518291968.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 1245476352.0,
-            "5": 1430268416.0,
-            "10": 1430268416.0,
-            "15": 1430268416.0,
-            "20": 1430268416.0,
-            "25": 1430268416.0,
-            "30": 1430268416.0,
-            "35": 1430268416.0,
-            "40": 1430268416.0,
-            "45": 1430268416.0,
-            "50": 1430268416.0,
-            "55": 1430268416.0,
-            "60": 1430268416.0,
-            "65": 1430268416.0,
-            "70": 1430268416.0,
-            "75": 1430268416.0,
-            "80": 1430268416.0,
-            "85": 1430268416.0,
-            "90": 1430268416.0,
-            "95": 1430268416.0,
-            "100": 1430268416.0
+            "2": 1429481984.0,
+            "3": 1429481984.0,
+            "4": 1429481984.0,
+            "5": 1429481984.0,
+            "6": 1429481984.0,
+            "7": 1429481984.0,
+            "8": 1429481984.0,
+            "9": 1429481984.0,
+            "10": 1429481984.0,
+            "11": 1429481984.0,
+            "12": 1429481984.0,
+            "13": 1429481984.0,
+            "14": 1429481984.0,
+            "15": 1429481984.0,
+            "16": 1429481984.0,
+            "17": 1429481984.0,
+            "18": 1429481984.0,
+            "19": 1429481984.0,
+            "20": 1429481984.0,
+            "21": 1429481984.0,
+            "22": 1429481984.0,
+            "23": 1429481984.0,
+            "24": 1429481984.0,
+            "25": 1429481984.0,
+            "26": 1429481984.0,
+            "27": 1429481984.0,
+            "28": 1429481984.0,
+            "29": 1429481984.0,
+            "30": 1429481984.0,
+            "31": 1429481984.0,
+            "32": 1429481984.0,
+            "33": 1429481984.0,
+            "34": 1429481984.0,
+            "35": 1429481984.0,
+            "36": 1429481984.0,
+            "37": 1429481984.0,
+            "38": 1429481984.0,
+            "39": 1429481984.0,
+            "40": 1429481984.0,
+            "41": 1429481984.0,
+            "42": 1429481984.0,
+            "43": 1429481984.0,
+            "44": 1429481984.0,
+            "45": 1429481984.0,
+            "46": 1429481984.0,
+            "47": 1429481984.0,
+            "48": 1429481984.0,
+            "49": 1429481984.0,
+            "50": 1429481984.0,
+            "51": 1429481984.0,
+            "52": 1429481984.0,
+            "53": 1429481984.0,
+            "54": 1429481984.0,
+            "55": 1429481984.0,
+            "56": 1429481984.0,
+            "57": 1429481984.0,
+            "58": 1429481984.0,
+            "59": 1429481984.0,
+            "60": 1429481984.0,
+            "61": 1429481984.0,
+            "62": 1429481984.0,
+            "63": 1429481984.0,
+            "64": 1429481984.0,
+            "65": 1429481984.0,
+            "66": 1429481984.0,
+            "67": 1429481984.0,
+            "68": 1429481984.0,
+            "69": 1429481984.0,
+            "70": 1429481984.0,
+            "71": 1429481984.0,
+            "72": 1429481984.0,
+            "73": 1429481984.0,
+            "74": 1429481984.0,
+            "75": 1429481984.0,
+            "76": 1429481984.0,
+            "77": 1429481984.0,
+            "78": 1429481984.0,
+            "79": 1429481984.0,
+            "80": 1429481984.0,
+            "81": 1429481984.0,
+            "82": 1429481984.0,
+            "83": 1429481984.0,
+            "84": 1429481984.0,
+            "85": 1429481984.0,
+            "86": 1429481984.0,
+            "87": 1429481984.0,
+            "88": 1429481984.0,
+            "89": 1429481984.0,
+            "90": 1429481984.0,
+            "91": 1429481984.0,
+            "92": 1429481984.0,
+            "93": 1429481984.0,
+            "94": 1429481984.0,
+            "95": 1429481984.0,
+            "96": 1429481984.0,
+            "97": 1429481984.0,
+            "98": 1429481984.0,
+            "99": 1429481984.0,
+            "100": 1429481984.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 10.72639,
-            "5": 0.12756,
-            "10": 0.12238,
-            "15": 0.12066,
-            "20": 0.12159,
-            "25": 0.12133,
-            "30": 0.12407,
-            "35": 0.12311,
-            "40": 0.1259,
-            "45": 0.1216,
-            "50": 0.12187,
-            "55": 0.12903,
-            "60": 0.12481,
-            "65": 0.12314,
-            "70": 0.12347,
-            "75": 0.12591,
-            "80": 0.12073,
-            "85": 0.12081,
-            "90": 0.12092,
-            "95": 0.1218,
-            "100": 0.12338
+            "1": 12.5643,
+            "2": 0.17332,
+            "3": 0.15504,
+            "4": 0.14953,
+            "5": 0.14296,
+            "6": 0.14226,
+            "7": 0.14346,
+            "8": 0.13938,
+            "9": 0.14124,
+            "10": 0.14047,
+            "11": 0.13835,
+            "12": 0.14091,
+            "13": 0.14198,
+            "14": 0.14069,
+            "15": 0.13974,
+            "16": 0.13801,
+            "17": 0.14306,
+            "18": 0.14074,
+            "19": 0.14027,
+            "20": 0.14158,
+            "21": 0.14008,
+            "22": 0.14191,
+            "23": 0.14006,
+            "24": 0.13998,
+            "25": 0.13889,
+            "26": 0.13978,
+            "27": 0.14315,
+            "28": 0.14416,
+            "29": 0.154,
+            "30": 0.14026,
+            "31": 0.14128,
+            "32": 0.14142,
+            "33": 0.14025,
+            "34": 0.14164,
+            "35": 0.14065,
+            "36": 0.14236,
+            "37": 0.13962,
+            "38": 0.14015,
+            "39": 0.1412,
+            "40": 0.14042,
+            "41": 0.14202,
+            "42": 0.14116,
+            "43": 0.1402,
+            "44": 0.14155,
+            "45": 0.13981,
+            "46": 0.14102,
+            "47": 0.13959,
+            "48": 0.14118,
+            "49": 0.14576,
+            "50": 0.14714,
+            "51": 0.14965,
+            "52": 0.14244,
+            "53": 0.14198,
+            "54": 0.14102,
+            "55": 0.1404,
+            "56": 0.14132,
+            "57": 0.14,
+            "58": 0.14143,
+            "59": 0.16106,
+            "60": 0.15695,
+            "61": 0.15431,
+            "62": 0.14815,
+            "63": 0.14032,
+            "64": 0.14044,
+            "65": 0.14332,
+            "66": 0.14167,
+            "67": 0.14533,
+            "68": 0.1417,
+            "69": 0.14266,
+            "70": 0.14095,
+            "71": 0.14063,
+            "72": 0.1428,
+            "73": 0.14351,
+            "74": 0.14269,
+            "75": 0.14075,
+            "76": 0.14214,
+            "77": 0.14239,
+            "78": 0.1408,
+            "79": 0.14254,
+            "80": 0.14178,
+            "81": 0.14443,
+            "82": 0.14301,
+            "83": 0.14097,
+            "84": 0.14255,
+            "85": 0.14113,
+            "86": 0.14391,
+            "87": 0.14098,
+            "88": 0.16001,
+            "89": 0.15765,
+            "90": 0.1598,
+            "91": 0.16005,
+            "92": 0.14828,
+            "93": 0.15228,
+            "94": 0.15292,
+            "95": 0.14998,
+            "96": 0.14946,
+            "97": 0.15122,
+            "98": 0.144,
+            "99": 0.14325,
+            "100": 0.14483
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..055edccd6a0
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.85949,
+            "2": 10.85553,
+            "3": 10.86546,
+            "4": 10.84554,
+            "5": 10.88348,
+            "6": 10.89432,
+            "7": 10.87067,
+            "8": 10.86981,
+            "9": 10.86919,
+            "10": 10.83887,
+            "11": 10.89435,
+            "12": 10.87982,
+            "13": 10.87988,
+            "14": 10.90314,
+            "15": 10.8405,
+            "16": 10.83786,
+            "17": 10.80668,
+            "18": 10.83027,
+            "19": 10.82259,
+            "20": 10.73192,
+            "21": 10.70753,
+            "22": 10.56005,
+            "23": 10.72402,
+            "24": 10.6111,
+            "25": 10.54815,
+            "26": 10.61332,
+            "27": 10.63056,
+            "28": 10.56645,
+            "29": 10.59668,
+            "30": 10.37137,
+            "31": 10.1172,
+            "32": 10.4613,
+            "33": 10.45249,
+            "34": 10.2169,
+            "35": 10.27173,
+            "36": 10.23118,
+            "37": 10.34812,
+            "38": 10.1884,
+            "39": 10.41042,
+            "40": 10.09426,
+            "41": 10.1471,
+            "42": 10.21243,
+            "43": 9.8411,
+            "44": 9.95916,
+            "45": 9.84085,
+            "46": 9.8248,
+            "47": 10.1388,
+            "48": 9.8584,
+            "49": 9.5472,
+            "50": 9.90878,
+            "51": 9.85583,
+            "52": 9.75242,
+            "53": 10.07589,
+            "54": 9.95688,
+            "55": 9.88208,
+            "56": 9.63141,
+            "57": 9.48651,
+            "58": 9.83118,
+            "59": 9.58905,
+            "60": 9.50651,
+            "61": 9.7037,
+            "62": 9.98291,
+            "63": 9.38315,
+            "64": 9.77906,
+            "65": 8.95179,
+            "66": 9.7016,
+            "67": 9.37206,
+            "68": 9.78852,
+            "69": 9.79859,
+            "70": 9.74746,
+            "71": 9.6191,
+            "72": 9.58502,
+            "73": 9.49725,
+            "74": 8.93933,
+            "75": 9.42706,
+            "76": 9.08024,
+            "77": 10.06571,
+            "78": 9.72896,
+            "79": 9.37772,
+            "80": 9.40999,
+            "81": 9.47983,
+            "82": 9.70184,
+            "83": 9.30625,
+            "84": 9.42095,
+            "85": 9.61378,
+            "86": 9.07656,
+            "87": 9.59458,
+            "88": 9.75068,
+            "89": 9.60243,
+            "90": 9.81901,
+            "91": 9.33899,
+            "92": 9.35717,
+            "93": 9.07883,
+            "94": 8.8351,
+            "95": 9.52171,
+            "96": 9.53008,
+            "97": 9.31309,
+            "98": 9.67785,
+            "99": 8.89061,
+            "100": 9.39726
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1638.0,
+            "2": 1766.0,
+            "3": 1620.0,
+            "4": 1810.0,
+            "5": 1844.0,
+            "6": 1835.0,
+            "7": 1694.0,
+            "8": 1632.0,
+            "9": 1902.0,
+            "10": 1427.0,
+            "11": 1932.0,
+            "12": 1705.0,
+            "13": 1834.0,
+            "14": 1807.0,
+            "15": 1907.0,
+            "16": 1797.0,
+            "17": 1911.0,
+            "18": 1667.0,
+            "19": 1742.0,
+            "20": 1662.0,
+            "21": 1853.0,
+            "22": 1621.0,
+            "23": 2010.0,
+            "24": 1546.0,
+            "25": 1510.0,
+            "26": 1664.0,
+            "27": 1722.0,
+            "28": 1977.0,
+            "29": 2024.0,
+            "30": 1873.0,
+            "31": 1494.0,
+            "32": 1890.0,
+            "33": 2067.0,
+            "34": 1802.0,
+            "35": 1873.0,
+            "36": 1954.0,
+            "37": 2283.0,
+            "38": 2076.0,
+            "39": 2280.0,
+            "40": 2111.0,
+            "41": 2318.0,
+            "42": 2206.0,
+            "43": 2040.0,
+            "44": 2088.0,
+            "45": 2181.0,
+            "46": 2434.0,
+            "47": 2446.0,
+            "48": 2481.0,
+            "49": 2398.0,
+            "50": 2410.0,
+            "51": 2528.0,
+            "52": 2535.0,
+            "53": 2875.0,
+            "54": 2862.0,
+            "55": 2406.0,
+            "56": 2733.0,
+            "57": 2347.0,
+            "58": 2918.0,
+            "59": 2759.0,
+            "60": 2404.0,
+            "61": 3022.0,
+            "62": 2494.0,
+            "63": 2452.0,
+            "64": 2838.0,
+            "65": 2549.0,
+            "66": 3044.0,
+            "67": 2887.0,
+            "68": 2637.0,
+            "69": 2860.0,
+            "70": 3034.0,
+            "71": 2989.0,
+            "72": 2355.0,
+            "73": 3034.0,
+            "74": 1904.0,
+            "75": 2538.0,
+            "76": 3012.0,
+            "77": 3193.0,
+            "78": 2994.0,
+            "79": 3097.0,
+            "80": 3254.0,
+            "81": 3671.0,
+            "82": 3299.0,
+            "83": 2793.0,
+            "84": 3146.0,
+            "85": 3329.0,
+            "86": 2769.0,
+            "87": 3766.0,
+            "88": 3021.0,
+            "89": 3286.0,
+            "90": 3029.0,
+            "91": 2772.0,
+            "92": 2955.0,
+            "93": 2852.0,
+            "94": 3411.0,
+            "95": 3271.0,
+            "96": 3279.0,
+            "97": 3054.0,
+            "98": 3643.0,
+            "99": 3303.0,
+            "100": 3142.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 518291968.0,
+            "2": 518291968.0,
+            "3": 518291968.0,
+            "4": 518291968.0,
+            "5": 518291968.0,
+            "6": 518291968.0,
+            "7": 518291968.0,
+            "8": 518291968.0,
+            "9": 518291968.0,
+            "10": 518291968.0,
+            "11": 518291968.0,
+            "12": 518291968.0,
+            "13": 518291968.0,
+            "14": 518291968.0,
+            "15": 518291968.0,
+            "16": 518291968.0,
+            "17": 518291968.0,
+            "18": 518291968.0,
+            "19": 518291968.0,
+            "20": 518291968.0,
+            "21": 518291968.0,
+            "22": 518291968.0,
+            "23": 518291968.0,
+            "24": 518291968.0,
+            "25": 518291968.0,
+            "26": 518291968.0,
+            "27": 518291968.0,
+            "28": 518291968.0,
+            "29": 518291968.0,
+            "30": 518291968.0,
+            "31": 518291968.0,
+            "32": 518291968.0,
+            "33": 518291968.0,
+            "34": 518291968.0,
+            "35": 518291968.0,
+            "36": 518291968.0,
+            "37": 518291968.0,
+            "38": 518291968.0,
+            "39": 518291968.0,
+            "40": 518291968.0,
+            "41": 518291968.0,
+            "42": 518291968.0,
+            "43": 518291968.0,
+            "44": 518291968.0,
+            "45": 518291968.0,
+            "46": 518291968.0,
+            "47": 518291968.0,
+            "48": 518291968.0,
+            "49": 518291968.0,
+            "50": 518291968.0,
+            "51": 518291968.0,
+            "52": 518291968.0,
+            "53": 518291968.0,
+            "54": 518291968.0,
+            "55": 518291968.0,
+            "56": 518291968.0,
+            "57": 518291968.0,
+            "58": 518291968.0,
+            "59": 518291968.0,
+            "60": 518291968.0,
+            "61": 518291968.0,
+            "62": 518291968.0,
+            "63": 518291968.0,
+            "64": 518291968.0,
+            "65": 518291968.0,
+            "66": 518291968.0,
+            "67": 518291968.0,
+            "68": 518291968.0,
+            "69": 518291968.0,
+            "70": 518291968.0,
+            "71": 518291968.0,
+            "72": 518291968.0,
+            "73": 518291968.0,
+            "74": 518291968.0,
+            "75": 518291968.0,
+            "76": 518291968.0,
+            "77": 518291968.0,
+            "78": 518291968.0,
+            "79": 518291968.0,
+            "80": 518291968.0,
+            "81": 518291968.0,
+            "82": 518291968.0,
+            "83": 518291968.0,
+            "84": 518291968.0,
+            "85": 518291968.0,
+            "86": 518291968.0,
+            "87": 518291968.0,
+            "88": 518291968.0,
+            "89": 518291968.0,
+            "90": 518291968.0,
+            "91": 518291968.0,
+            "92": 518291968.0,
+            "93": 518291968.0,
+            "94": 518291968.0,
+            "95": 518291968.0,
+            "96": 518291968.0,
+            "97": 518291968.0,
+            "98": 518291968.0,
+            "99": 518291968.0,
+            "100": 518291968.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1245476352.0,
+            "2": 1429481984.0,
+            "3": 1429481984.0,
+            "4": 1429481984.0,
+            "5": 1429481984.0,
+            "6": 1429481984.0,
+            "7": 1429481984.0,
+            "8": 1429481984.0,
+            "9": 1429481984.0,
+            "10": 1429481984.0,
+            "11": 1429481984.0,
+            "12": 1429481984.0,
+            "13": 1429481984.0,
+            "14": 1429481984.0,
+            "15": 1429481984.0,
+            "16": 1429481984.0,
+            "17": 1429481984.0,
+            "18": 1429481984.0,
+            "19": 1429481984.0,
+            "20": 1429481984.0,
+            "21": 1429481984.0,
+            "22": 1429481984.0,
+            "23": 1429481984.0,
+            "24": 1429481984.0,
+            "25": 1429481984.0,
+            "26": 1429481984.0,
+            "27": 1429481984.0,
+            "28": 1429481984.0,
+            "29": 1429481984.0,
+            "30": 1429481984.0,
+            "31": 1429481984.0,
+            "32": 1429481984.0,
+            "33": 1429481984.0,
+            "34": 1429481984.0,
+            "35": 1429481984.0,
+            "36": 1429481984.0,
+            "37": 1429481984.0,
+            "38": 1429481984.0,
+            "39": 1429481984.0,
+            "40": 1429481984.0,
+            "41": 1429481984.0,
+            "42": 1429481984.0,
+            "43": 1429481984.0,
+            "44": 1429481984.0,
+            "45": 1429481984.0,
+            "46": 1429481984.0,
+            "47": 1430268416.0,
+            "48": 1430268416.0,
+            "49": 1430268416.0,
+            "50": 1430268416.0,
+            "51": 1430268416.0,
+            "52": 1430268416.0,
+            "53": 1430268416.0,
+            "54": 1430268416.0,
+            "55": 1430268416.0,
+            "56": 1430268416.0,
+            "57": 1430268416.0,
+            "58": 1430268416.0,
+            "59": 1430268416.0,
+            "60": 1430268416.0,
+            "61": 1430268416.0,
+            "62": 1430268416.0,
+            "63": 1430268416.0,
+            "64": 1430268416.0,
+            "65": 1430268416.0,
+            "66": 1430268416.0,
+            "67": 1430268416.0,
+            "68": 1430268416.0,
+            "69": 1430268416.0,
+            "70": 1430268416.0,
+            "71": 1430268416.0,
+            "72": 1430268416.0,
+            "73": 1430268416.0,
+            "74": 1430268416.0,
+            "75": 1430268416.0,
+            "76": 1430268416.0,
+            "77": 1430268416.0,
+            "78": 1430268416.0,
+            "79": 1430268416.0,
+            "80": 1430268416.0,
+            "81": 1430268416.0,
+            "82": 1430268416.0,
+            "83": 1430268416.0,
+            "84": 1430268416.0,
+            "85": 1430268416.0,
+            "86": 1430268416.0,
+            "87": 1430268416.0,
+            "88": 1430268416.0,
+            "89": 1430268416.0,
+            "90": 1430268416.0,
+            "91": 1430268416.0,
+            "92": 1430268416.0,
+            "93": 1430268416.0,
+            "94": 1430268416.0,
+            "95": 1430268416.0,
+            "96": 1430268416.0,
+            "97": 1430268416.0,
+            "98": 1430268416.0,
+            "99": 1430268416.0,
+            "100": 1430268416.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 12.14048,
+            "2": 0.15305,
+            "3": 0.12206,
+            "4": 0.12159,
+            "5": 0.12338,
+            "6": 0.12232,
+            "7": 0.12178,
+            "8": 0.12116,
+            "9": 0.12378,
+            "10": 0.1213,
+            "11": 0.12099,
+            "12": 0.12066,
+            "13": 0.12326,
+            "14": 0.12143,
+            "15": 0.12173,
+            "16": 0.12258,
+            "17": 0.12137,
+            "18": 0.12235,
+            "19": 0.12098,
+            "20": 0.12175,
+            "21": 0.12124,
+            "22": 0.12047,
+            "23": 0.12106,
+            "24": 0.12167,
+            "25": 0.12151,
+            "26": 0.12085,
+            "27": 0.12129,
+            "28": 0.1211,
+            "29": 0.12093,
+            "30": 0.12007,
+            "31": 0.12104,
+            "32": 0.12256,
+            "33": 0.12191,
+            "34": 0.12633,
+            "35": 0.13877,
+            "36": 0.13281,
+            "37": 0.12383,
+            "38": 0.12319,
+            "39": 0.12304,
+            "40": 0.12247,
+            "41": 0.1226,
+            "42": 0.12481,
+            "43": 0.12769,
+            "44": 0.12464,
+            "45": 0.12374,
+            "46": 0.12839,
+            "47": 0.12264,
+            "48": 0.13199,
+            "49": 0.12462,
+            "50": 0.12201,
+            "51": 0.125,
+            "52": 0.13707,
+            "53": 0.12341,
+            "54": 0.12318,
+            "55": 0.12261,
+            "56": 0.12283,
+            "57": 0.12341,
+            "58": 0.12301,
+            "59": 0.12419,
+            "60": 0.12361,
+            "61": 0.12424,
+            "62": 0.12437,
+            "63": 0.12354,
+            "64": 0.12246,
+            "65": 0.12204,
+            "66": 0.1235,
+            "67": 0.12315,
+            "68": 0.12287,
+            "69": 0.12129,
+            "70": 0.12211,
+            "71": 0.12216,
+            "72": 0.12316,
+            "73": 0.12246,
+            "74": 0.12156,
+            "75": 0.12321,
+            "76": 0.12274,
+            "77": 0.12488,
+            "78": 0.12309,
+            "79": 0.12392,
+            "80": 0.12291,
+            "81": 0.12432,
+            "82": 0.1239,
+            "83": 0.12342,
+            "84": 0.12131,
+            "85": 0.12225,
+            "86": 0.12172,
+            "87": 0.12084,
+            "88": 0.12493,
+            "89": 0.12176,
+            "90": 0.12578,
+            "91": 0.12256,
+            "92": 0.12137,
+            "93": 0.12208,
+            "94": 0.12379,
+            "95": 0.12088,
+            "96": 0.12458,
+            "97": 0.12217,
+            "98": 0.12238,
+            "99": 0.12101,
+            "100": 0.12165
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..80f6783f6f2
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.85949,
+            "2": 10.85553,
+            "3": 10.86546,
+            "4": 10.84554,
+            "5": 10.88348,
+            "6": 10.89432,
+            "7": 10.87067,
+            "8": 10.86981,
+            "9": 10.86919,
+            "10": 10.83887,
+            "11": 10.89435,
+            "12": 10.87982,
+            "13": 10.87988,
+            "14": 10.90314,
+            "15": 10.8405,
+            "16": 10.83786,
+            "17": 10.80668,
+            "18": 10.83027,
+            "19": 10.82259,
+            "20": 10.73192,
+            "21": 10.70753,
+            "22": 10.56005,
+            "23": 10.72402,
+            "24": 10.6111,
+            "25": 10.54815,
+            "26": 10.61332,
+            "27": 10.63056,
+            "28": 10.56645,
+            "29": 10.59668,
+            "30": 10.37137,
+            "31": 10.1172,
+            "32": 10.4613,
+            "33": 10.45249,
+            "34": 10.2169,
+            "35": 10.27173,
+            "36": 10.23118,
+            "37": 10.34812,
+            "38": 10.1884,
+            "39": 10.41042,
+            "40": 10.09426,
+            "41": 10.1471,
+            "42": 10.21243,
+            "43": 9.8411,
+            "44": 9.95916,
+            "45": 9.84085,
+            "46": 9.8248,
+            "47": 10.1388,
+            "48": 9.8584,
+            "49": 9.5472,
+            "50": 9.90878,
+            "51": 9.85583,
+            "52": 9.75242,
+            "53": 10.07589,
+            "54": 9.95688,
+            "55": 9.88208,
+            "56": 9.63141,
+            "57": 9.48651,
+            "58": 9.83118,
+            "59": 9.58905,
+            "60": 9.50651,
+            "61": 9.7037,
+            "62": 9.98291,
+            "63": 9.38315,
+            "64": 9.77906,
+            "65": 8.95179,
+            "66": 9.7016,
+            "67": 9.37206,
+            "68": 9.78852,
+            "69": 9.79859,
+            "70": 9.74746,
+            "71": 9.6191,
+            "72": 9.58502,
+            "73": 9.49725,
+            "74": 8.93933,
+            "75": 9.42706,
+            "76": 9.08024,
+            "77": 10.06571,
+            "78": 9.72896,
+            "79": 9.37772,
+            "80": 9.40999,
+            "81": 9.47983,
+            "82": 9.70184,
+            "83": 9.30625,
+            "84": 9.42095,
+            "85": 9.61378,
+            "86": 9.07656,
+            "87": 9.59458,
+            "88": 9.75068,
+            "89": 9.60243,
+            "90": 9.81901,
+            "91": 9.33899,
+            "92": 9.35717,
+            "93": 9.07883,
+            "94": 8.8351,
+            "95": 9.52171,
+            "96": 9.53008,
+            "97": 9.31309,
+            "98": 9.67785,
+            "99": 8.89061,
+            "100": 9.39726
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1638.0,
+            "2": 1766.0,
+            "3": 1620.0,
+            "4": 1810.0,
+            "5": 1844.0,
+            "6": 1835.0,
+            "7": 1694.0,
+            "8": 1632.0,
+            "9": 1902.0,
+            "10": 1427.0,
+            "11": 1932.0,
+            "12": 1705.0,
+            "13": 1834.0,
+            "14": 1807.0,
+            "15": 1907.0,
+            "16": 1797.0,
+            "17": 1911.0,
+            "18": 1667.0,
+            "19": 1742.0,
+            "20": 1662.0,
+            "21": 1853.0,
+            "22": 1621.0,
+            "23": 2010.0,
+            "24": 1546.0,
+            "25": 1510.0,
+            "26": 1664.0,
+            "27": 1722.0,
+            "28": 1977.0,
+            "29": 2024.0,
+            "30": 1873.0,
+            "31": 1494.0,
+            "32": 1890.0,
+            "33": 2067.0,
+            "34": 1802.0,
+            "35": 1873.0,
+            "36": 1954.0,
+            "37": 2283.0,
+            "38": 2076.0,
+            "39": 2280.0,
+            "40": 2111.0,
+            "41": 2318.0,
+            "42": 2206.0,
+            "43": 2040.0,
+            "44": 2088.0,
+            "45": 2181.0,
+            "46": 2434.0,
+            "47": 2446.0,
+            "48": 2481.0,
+            "49": 2398.0,
+            "50": 2410.0,
+            "51": 2528.0,
+            "52": 2535.0,
+            "53": 2875.0,
+            "54": 2862.0,
+            "55": 2406.0,
+            "56": 2733.0,
+            "57": 2347.0,
+            "58": 2918.0,
+            "59": 2759.0,
+            "60": 2404.0,
+            "61": 3022.0,
+            "62": 2494.0,
+            "63": 2452.0,
+            "64": 2838.0,
+            "65": 2549.0,
+            "66": 3044.0,
+            "67": 2887.0,
+            "68": 2637.0,
+            "69": 2860.0,
+            "70": 3034.0,
+            "71": 2989.0,
+            "72": 2355.0,
+            "73": 3034.0,
+            "74": 1904.0,
+            "75": 2538.0,
+            "76": 3012.0,
+            "77": 3193.0,
+            "78": 2994.0,
+            "79": 3097.0,
+            "80": 3254.0,
+            "81": 3671.0,
+            "82": 3299.0,
+            "83": 2793.0,
+            "84": 3146.0,
+            "85": 3329.0,
+            "86": 2769.0,
+            "87": 3766.0,
+            "88": 3021.0,
+            "89": 3286.0,
+            "90": 3029.0,
+            "91": 2772.0,
+            "92": 2955.0,
+            "93": 2852.0,
+            "94": 3411.0,
+            "95": 3271.0,
+            "96": 3279.0,
+            "97": 3054.0,
+            "98": 3643.0,
+            "99": 3303.0,
+            "100": 3142.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 518291968.0,
+            "2": 518291968.0,
+            "3": 518291968.0,
+            "4": 518291968.0,
+            "5": 518291968.0,
+            "6": 518291968.0,
+            "7": 518291968.0,
+            "8": 518291968.0,
+            "9": 518291968.0,
+            "10": 518291968.0,
+            "11": 518291968.0,
+            "12": 518291968.0,
+            "13": 518291968.0,
+            "14": 518291968.0,
+            "15": 518291968.0,
+            "16": 518291968.0,
+            "17": 518291968.0,
+            "18": 518291968.0,
+            "19": 518291968.0,
+            "20": 518291968.0,
+            "21": 518291968.0,
+            "22": 518291968.0,
+            "23": 518291968.0,
+            "24": 518291968.0,
+            "25": 518291968.0,
+            "26": 518291968.0,
+            "27": 518291968.0,
+            "28": 518291968.0,
+            "29": 518291968.0,
+            "30": 518291968.0,
+            "31": 518291968.0,
+            "32": 518291968.0,
+            "33": 518291968.0,
+            "34": 518291968.0,
+            "35": 518291968.0,
+            "36": 518291968.0,
+            "37": 518291968.0,
+            "38": 518291968.0,
+            "39": 518291968.0,
+            "40": 518291968.0,
+            "41": 518291968.0,
+            "42": 518291968.0,
+            "43": 518291968.0,
+            "44": 518291968.0,
+            "45": 518291968.0,
+            "46": 518291968.0,
+            "47": 518291968.0,
+            "48": 518291968.0,
+            "49": 518291968.0,
+            "50": 518291968.0,
+            "51": 518291968.0,
+            "52": 518291968.0,
+            "53": 518291968.0,
+            "54": 518291968.0,
+            "55": 518291968.0,
+            "56": 518291968.0,
+            "57": 518291968.0,
+            "58": 518291968.0,
+            "59": 518291968.0,
+            "60": 518291968.0,
+            "61": 518291968.0,
+            "62": 518291968.0,
+            "63": 518291968.0,
+            "64": 518291968.0,
+            "65": 518291968.0,
+            "66": 518291968.0,
+            "67": 518291968.0,
+            "68": 518291968.0,
+            "69": 518291968.0,
+            "70": 518291968.0,
+            "71": 518291968.0,
+            "72": 518291968.0,
+            "73": 518291968.0,
+            "74": 518291968.0,
+            "75": 518291968.0,
+            "76": 518291968.0,
+            "77": 518291968.0,
+            "78": 518291968.0,
+            "79": 518291968.0,
+            "80": 518291968.0,
+            "81": 518291968.0,
+            "82": 518291968.0,
+            "83": 518291968.0,
+            "84": 518291968.0,
+            "85": 518291968.0,
+            "86": 518291968.0,
+            "87": 518291968.0,
+            "88": 518291968.0,
+            "89": 518291968.0,
+            "90": 518291968.0,
+            "91": 518291968.0,
+            "92": 518291968.0,
+            "93": 518291968.0,
+            "94": 518291968.0,
+            "95": 518291968.0,
+            "96": 518291968.0,
+            "97": 518291968.0,
+            "98": 518291968.0,
+            "99": 518291968.0,
+            "100": 518291968.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1245476352.0,
+            "2": 1429481984.0,
+            "3": 1429481984.0,
+            "4": 1429481984.0,
+            "5": 1429481984.0,
+            "6": 1429481984.0,
+            "7": 1429481984.0,
+            "8": 1429481984.0,
+            "9": 1429481984.0,
+            "10": 1429481984.0,
+            "11": 1429481984.0,
+            "12": 1429481984.0,
+            "13": 1429481984.0,
+            "14": 1429481984.0,
+            "15": 1429481984.0,
+            "16": 1429481984.0,
+            "17": 1429481984.0,
+            "18": 1429481984.0,
+            "19": 1429481984.0,
+            "20": 1429481984.0,
+            "21": 1429481984.0,
+            "22": 1429481984.0,
+            "23": 1429481984.0,
+            "24": 1429481984.0,
+            "25": 1429481984.0,
+            "26": 1429481984.0,
+            "27": 1429481984.0,
+            "28": 1429481984.0,
+            "29": 1429481984.0,
+            "30": 1429481984.0,
+            "31": 1429481984.0,
+            "32": 1429481984.0,
+            "33": 1429481984.0,
+            "34": 1429481984.0,
+            "35": 1429481984.0,
+            "36": 1429481984.0,
+            "37": 1429481984.0,
+            "38": 1429481984.0,
+            "39": 1429481984.0,
+            "40": 1429481984.0,
+            "41": 1429481984.0,
+            "42": 1429481984.0,
+            "43": 1429481984.0,
+            "44": 1429481984.0,
+            "45": 1429481984.0,
+            "46": 1429481984.0,
+            "47": 1429481984.0,
+            "48": 1429481984.0,
+            "49": 1429481984.0,
+            "50": 1429481984.0,
+            "51": 1429481984.0,
+            "52": 1429481984.0,
+            "53": 1429481984.0,
+            "54": 1429481984.0,
+            "55": 1429481984.0,
+            "56": 1429481984.0,
+            "57": 1429481984.0,
+            "58": 1429481984.0,
+            "59": 1429481984.0,
+            "60": 1429481984.0,
+            "61": 1429481984.0,
+            "62": 1429481984.0,
+            "63": 1429481984.0,
+            "64": 1429481984.0,
+            "65": 1429481984.0,
+            "66": 1429481984.0,
+            "67": 1429481984.0,
+            "68": 1429481984.0,
+            "69": 1429481984.0,
+            "70": 1429481984.0,
+            "71": 1429481984.0,
+            "72": 1429481984.0,
+            "73": 1429481984.0,
+            "74": 1429481984.0,
+            "75": 1429481984.0,
+            "76": 1429481984.0,
+            "77": 1429481984.0,
+            "78": 1429481984.0,
+            "79": 1429481984.0,
+            "80": 1429481984.0,
+            "81": 1429481984.0,
+            "82": 1429481984.0,
+            "83": 1429481984.0,
+            "84": 1429481984.0,
+            "85": 1429481984.0,
+            "86": 1429481984.0,
+            "87": 1429481984.0,
+            "88": 1429481984.0,
+            "89": 1429481984.0,
+            "90": 1429481984.0,
+            "91": 1429481984.0,
+            "92": 1429481984.0,
+            "93": 1429481984.0,
+            "94": 1429481984.0,
+            "95": 1429481984.0,
+            "96": 1429481984.0,
+            "97": 1429481984.0,
+            "98": 1429481984.0,
+            "99": 1429481984.0,
+            "100": 1429481984.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 12.65353,
+            "2": 0.15729,
+            "3": 0.13911,
+            "4": 0.14117,
+            "5": 0.14172,
+            "6": 0.14091,
+            "7": 0.14103,
+            "8": 0.14008,
+            "9": 0.14444,
+            "10": 0.14215,
+            "11": 0.143,
+            "12": 0.14395,
+            "13": 0.14101,
+            "14": 0.14112,
+            "15": 0.14126,
+            "16": 0.14286,
+            "17": 0.14201,
+            "18": 0.14405,
+            "19": 0.14472,
+            "20": 0.14424,
+            "21": 0.14746,
+            "22": 0.14732,
+            "23": 0.14871,
+            "24": 0.14885,
+            "25": 0.14732,
+            "26": 0.14775,
+            "27": 0.14978,
+            "28": 0.14685,
+            "29": 0.15004,
+            "30": 0.14663,
+            "31": 0.14925,
+            "32": 0.14679,
+            "33": 0.14465,
+            "34": 0.14701,
+            "35": 0.14556,
+            "36": 0.14835,
+            "37": 0.14562,
+            "38": 0.14971,
+            "39": 0.14881,
+            "40": 0.14688,
+            "41": 0.14373,
+            "42": 0.14577,
+            "43": 0.14595,
+            "44": 0.1465,
+            "45": 0.14283,
+            "46": 0.14194,
+            "47": 0.14334,
+            "48": 0.14235,
+            "49": 0.14347,
+            "50": 0.14228,
+            "51": 0.14946,
+            "52": 0.14427,
+            "53": 0.14469,
+            "54": 0.14466,
+            "55": 0.14197,
+            "56": 0.14396,
+            "57": 0.14283,
+            "58": 0.14383,
+            "59": 0.14201,
+            "60": 0.14448,
+            "61": 0.14593,
+            "62": 0.14316,
+            "63": 0.14235,
+            "64": 0.14447,
+            "65": 0.14383,
+            "66": 0.14456,
+            "67": 0.14508,
+            "68": 0.1452,
+            "69": 0.14518,
+            "70": 0.1449,
+            "71": 0.14576,
+            "72": 0.14328,
+            "73": 0.14352,
+            "74": 0.1504,
+            "75": 0.15058,
+            "76": 0.14825,
+            "77": 0.14229,
+            "78": 0.14494,
+            "79": 0.14518,
+            "80": 0.14464,
+            "81": 0.1461,
+            "82": 0.14482,
+            "83": 0.14487,
+            "84": 0.14272,
+            "85": 0.14154,
+            "86": 0.14252,
+            "87": 0.1447,
+            "88": 0.14327,
+            "89": 0.1441,
+            "90": 0.14688,
+            "91": 0.14346,
+            "92": 0.14427,
+            "93": 0.14222,
+            "94": 0.14464,
+            "95": 0.14507,
+            "96": 0.14196,
+            "97": 0.1438,
+            "98": 0.14103,
+            "99": 0.14644,
+            "100": 0.14474
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
new file mode 100644
index 00000000000..ef4b8c6d946
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.92655,
+            "2": 10.92585,
+            "3": 10.91515,
+            "4": 10.909,
+            "5": 10.92721,
+            "6": 10.93563,
+            "7": 10.90643,
+            "8": 10.92118,
+            "9": 10.9107,
+            "10": 10.90795,
+            "11": 10.89277,
+            "12": 10.92431,
+            "13": 10.91489,
+            "14": 10.92148,
+            "15": 10.88292,
+            "16": 10.87302,
+            "17": 10.84069,
+            "18": 10.873,
+            "19": 10.85633,
+            "20": 10.77594,
+            "21": 10.74894,
+            "22": 10.63083,
+            "23": 10.75614,
+            "24": 10.65645,
+            "25": 10.59266,
+            "26": 10.6544,
+            "27": 10.64915,
+            "28": 10.59496,
+            "29": 10.60945,
+            "30": 10.3918,
+            "31": 10.15724,
+            "32": 10.49112,
+            "33": 10.4796,
+            "34": 10.24073,
+            "35": 10.297,
+            "36": 10.24677,
+            "37": 10.35242,
+            "38": 10.20481,
+            "39": 10.40506,
+            "40": 10.0966,
+            "41": 10.15195,
+            "42": 10.22065,
+            "43": 9.85507,
+            "44": 9.96164,
+            "45": 9.84468,
+            "46": 9.83835,
+            "47": 10.14,
+            "48": 9.85762,
+            "49": 9.53744,
+            "50": 9.90946,
+            "51": 9.84888,
+            "52": 9.74164,
+            "53": 10.0634,
+            "54": 9.94739,
+            "55": 9.87774,
+            "56": 9.62736,
+            "57": 9.47158,
+            "58": 9.82895,
+            "59": 9.58274,
+            "60": 9.4912,
+            "61": 9.69972,
+            "62": 9.97984,
+            "63": 9.37281,
+            "64": 9.77457,
+            "65": 8.94253,
+            "66": 9.69879,
+            "67": 9.3641,
+            "68": 9.78785,
+            "69": 9.78336,
+            "70": 9.72282,
+            "71": 9.60808,
+            "72": 9.58431,
+            "73": 9.4898,
+            "74": 8.94861,
+            "75": 9.4189,
+            "76": 9.08729,
+            "77": 10.06345,
+            "78": 9.72836,
+            "79": 9.37155,
+            "80": 9.40054,
+            "81": 9.47831,
+            "82": 9.69155,
+            "83": 9.30735,
+            "84": 9.41236,
+            "85": 9.61184,
+            "86": 9.0759,
+            "87": 9.59464,
+            "88": 9.74732,
+            "89": 9.60675,
+            "90": 9.81029,
+            "91": 9.34357,
+            "92": 9.36491,
+            "93": 9.07725,
+            "94": 8.83091,
+            "95": 9.51723,
+            "96": 9.52447,
+            "97": 9.31031,
+            "98": 9.67875,
+            "99": 8.88838,
+            "100": 9.40137
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1637.0,
+            "2": 1813.0,
+            "3": 1642.0,
+            "4": 1766.0,
+            "5": 1964.0,
+            "6": 1846.0,
+            "7": 1884.0,
+            "8": 1763.0,
+            "9": 1934.0,
+            "10": 1489.0,
+            "11": 2000.0,
+            "12": 1800.0,
+            "13": 1942.0,
+            "14": 1818.0,
+            "15": 1923.0,
+            "16": 1792.0,
+            "17": 1801.0,
+            "18": 1730.0,
+            "19": 1754.0,
+            "20": 1585.0,
+            "21": 1774.0,
+            "22": 1692.0,
+            "23": 1974.0,
+            "24": 1632.0,
+            "25": 1649.0,
+            "26": 1865.0,
+            "27": 1853.0,
+            "28": 2076.0,
+            "29": 2051.0,
+            "30": 1908.0,
+            "31": 1532.0,
+            "32": 1984.0,
+            "33": 2192.0,
+            "34": 1867.0,
+            "35": 1954.0,
+            "36": 1998.0,
+            "37": 2392.0,
+            "38": 2248.0,
+            "39": 2437.0,
+            "40": 2265.0,
+            "41": 2237.0,
+            "42": 2319.0,
+            "43": 2171.0,
+            "44": 2133.0,
+            "45": 2057.0,
+            "46": 2372.0,
+            "47": 2596.0,
+            "48": 2429.0,
+            "49": 2248.0,
+            "50": 2458.0,
+            "51": 2794.0,
+            "52": 2607.0,
+            "53": 2964.0,
+            "54": 2830.0,
+            "55": 2411.0,
+            "56": 2688.0,
+            "57": 2444.0,
+            "58": 3101.0,
+            "59": 2822.0,
+            "60": 2518.0,
+            "61": 2878.0,
+            "62": 2642.0,
+            "63": 2396.0,
+            "64": 2963.0,
+            "65": 2740.0,
+            "66": 3297.0,
+            "67": 2793.0,
+            "68": 2901.0,
+            "69": 3001.0,
+            "70": 3253.0,
+            "71": 3004.0,
+            "72": 2341.0,
+            "73": 3179.0,
+            "74": 1950.0,
+            "75": 2653.0,
+            "76": 3085.0,
+            "77": 3451.0,
+            "78": 3324.0,
+            "79": 3342.0,
+            "80": 3531.0,
+            "81": 3790.0,
+            "82": 3427.0,
+            "83": 2786.0,
+            "84": 3443.0,
+            "85": 3379.0,
+            "86": 2871.0,
+            "87": 3840.0,
+            "88": 3076.0,
+            "89": 3444.0,
+            "90": 2991.0,
+            "91": 2705.0,
+            "92": 3073.0,
+            "93": 2724.0,
+            "94": 3513.0,
+            "95": 3428.0,
+            "96": 3557.0,
+            "97": 3249.0,
+            "98": 3700.0,
+            "99": 3192.0,
+            "100": 3264.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 436764672.0,
+            "2": 436764672.0,
+            "3": 436764672.0,
+            "4": 436764672.0,
+            "5": 436764672.0,
+            "6": 436764672.0,
+            "7": 436764672.0,
+            "8": 436764672.0,
+            "9": 436764672.0,
+            "10": 436764672.0,
+            "11": 436764672.0,
+            "12": 436764672.0,
+            "13": 436764672.0,
+            "14": 436764672.0,
+            "15": 436764672.0,
+            "16": 436764672.0,
+            "17": 436764672.0,
+            "18": 436764672.0,
+            "19": 436764672.0,
+            "20": 436764672.0,
+            "21": 436764672.0,
+            "22": 436764672.0,
+            "23": 436764672.0,
+            "24": 436764672.0,
+            "25": 436764672.0,
+            "26": 436764672.0,
+            "27": 436764672.0,
+            "28": 436764672.0,
+            "29": 436764672.0,
+            "30": 436764672.0,
+            "31": 436764672.0,
+            "32": 436764672.0,
+            "33": 436764672.0,
+            "34": 436764672.0,
+            "35": 436764672.0,
+            "36": 436764672.0,
+            "37": 436764672.0,
+            "38": 436764672.0,
+            "39": 436764672.0,
+            "40": 436764672.0,
+            "41": 436764672.0,
+            "42": 436764672.0,
+            "43": 436764672.0,
+            "44": 436764672.0,
+            "45": 436764672.0,
+            "46": 436764672.0,
+            "47": 436764672.0,
+            "48": 436764672.0,
+            "49": 436764672.0,
+            "50": 436764672.0,
+            "51": 436764672.0,
+            "52": 436764672.0,
+            "53": 436764672.0,
+            "54": 436764672.0,
+            "55": 436764672.0,
+            "56": 436764672.0,
+            "57": 436764672.0,
+            "58": 436764672.0,
+            "59": 436764672.0,
+            "60": 436764672.0,
+            "61": 436764672.0,
+            "62": 436764672.0,
+            "63": 436764672.0,
+            "64": 436764672.0,
+            "65": 436764672.0,
+            "66": 436764672.0,
+            "67": 436764672.0,
+            "68": 436764672.0,
+            "69": 436764672.0,
+            "70": 436764672.0,
+            "71": 436764672.0,
+            "72": 436764672.0,
+            "73": 436764672.0,
+            "74": 436764672.0,
+            "75": 436764672.0,
+            "76": 436764672.0,
+            "77": 436764672.0,
+            "78": 436764672.0,
+            "79": 436764672.0,
+            "80": 436764672.0,
+            "81": 436764672.0,
+            "82": 436764672.0,
+            "83": 436764672.0,
+            "84": 436764672.0,
+            "85": 436764672.0,
+            "86": 436764672.0,
+            "87": 436764672.0,
+            "88": 436764672.0,
+            "89": 436764672.0,
+            "90": 436764672.0,
+            "91": 436764672.0,
+            "92": 436764672.0,
+            "93": 436764672.0,
+            "94": 436764672.0,
+            "95": 436764672.0,
+            "96": 436764672.0,
+            "97": 436764672.0,
+            "98": 436764672.0,
+            "99": 436764672.0,
+            "100": 436764672.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1178635264.0,
+            "2": 1359495168.0,
+            "3": 1359495168.0,
+            "4": 1359495168.0,
+            "5": 1359495168.0,
+            "6": 1359495168.0,
+            "7": 1359495168.0,
+            "8": 1359495168.0,
+            "9": 1359495168.0,
+            "10": 1359495168.0,
+            "11": 1359495168.0,
+            "12": 1359495168.0,
+            "13": 1359495168.0,
+            "14": 1359495168.0,
+            "15": 1359495168.0,
+            "16": 1359495168.0,
+            "17": 1359495168.0,
+            "18": 1359495168.0,
+            "19": 1359495168.0,
+            "20": 1359495168.0,
+            "21": 1359495168.0,
+            "22": 1359495168.0,
+            "23": 1359495168.0,
+            "24": 1359495168.0,
+            "25": 1359495168.0,
+            "26": 1359495168.0,
+            "27": 1359495168.0,
+            "28": 1359495168.0,
+            "29": 1359495168.0,
+            "30": 1359495168.0,
+            "31": 1359495168.0,
+            "32": 1359495168.0,
+            "33": 1359495168.0,
+            "34": 1359495168.0,
+            "35": 1359495168.0,
+            "36": 1359495168.0,
+            "37": 1359495168.0,
+            "38": 1359495168.0,
+            "39": 1359495168.0,
+            "40": 1359495168.0,
+            "41": 1359495168.0,
+            "42": 1359495168.0,
+            "43": 1359495168.0,
+            "44": 1359495168.0,
+            "45": 1359495168.0,
+            "46": 1359495168.0,
+            "47": 1359495168.0,
+            "48": 1359495168.0,
+            "49": 1359495168.0,
+            "50": 1359495168.0,
+            "51": 1359495168.0,
+            "52": 1359495168.0,
+            "53": 1359495168.0,
+            "54": 1359495168.0,
+            "55": 1359495168.0,
+            "56": 1359495168.0,
+            "57": 1359495168.0,
+            "58": 1359495168.0,
+            "59": 1359495168.0,
+            "60": 1359495168.0,
+            "61": 1359495168.0,
+            "62": 1359495168.0,
+            "63": 1359495168.0,
+            "64": 1359495168.0,
+            "65": 1359495168.0,
+            "66": 1359495168.0,
+            "67": 1359495168.0,
+            "68": 1359495168.0,
+            "69": 1359495168.0,
+            "70": 1359495168.0,
+            "71": 1359495168.0,
+            "72": 1359495168.0,
+            "73": 1359495168.0,
+            "74": 1359495168.0,
+            "75": 1359495168.0,
+            "76": 1359495168.0,
+            "77": 1359495168.0,
+            "78": 1359495168.0,
+            "79": 1359495168.0,
+            "80": 1359495168.0,
+            "81": 1359495168.0,
+            "82": 1359495168.0,
+            "83": 1359495168.0,
+            "84": 1359495168.0,
+            "85": 1359495168.0,
+            "86": 1359495168.0,
+            "87": 1359495168.0,
+            "88": 1359495168.0,
+            "89": 1359495168.0,
+            "90": 1359495168.0,
+            "91": 1359495168.0,
+            "92": 1359495168.0,
+            "93": 1359495168.0,
+            "94": 1359495168.0,
+            "95": 1359495168.0,
+            "96": 1359495168.0,
+            "97": 1359495168.0,
+            "98": 1359495168.0,
+            "99": 1359495168.0,
+            "100": 1359495168.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 11.71223,
+            "2": 0.2559,
+            "3": 0.20574,
+            "4": 0.19465,
+            "5": 0.19231,
+            "6": 0.19171,
+            "7": 0.19937,
+            "8": 0.19134,
+            "9": 0.19297,
+            "10": 0.56022,
+            "11": 0.19644,
+            "12": 0.1919,
+            "13": 0.18999,
+            "14": 0.19039,
+            "15": 0.19033,
+            "16": 0.19392,
+            "17": 0.1905,
+            "18": 0.19034,
+            "19": 0.19238,
+            "20": 0.18982,
+            "21": 0.19272,
+            "22": 0.18887,
+            "23": 0.18965,
+            "24": 0.18822,
+            "25": 0.18884,
+            "26": 0.19177,
+            "27": 0.19002,
+            "28": 0.19012,
+            "29": 0.18865,
+            "30": 0.18813,
+            "31": 0.18848,
+            "32": 0.19189,
+            "33": 0.18955,
+            "34": 0.18747,
+            "35": 0.18875,
+            "36": 0.18808,
+            "37": 0.19208,
+            "38": 0.18809,
+            "39": 0.18964,
+            "40": 0.18801,
+            "41": 0.18881,
+            "42": 0.18974,
+            "43": 0.18833,
+            "44": 0.19089,
+            "45": 0.18763,
+            "46": 0.18829,
+            "47": 0.18867,
+            "48": 0.19358,
+            "49": 0.19137,
+            "50": 0.18755,
+            "51": 0.40667,
+            "52": 0.20997,
+            "53": 0.20527,
+            "54": 0.20595,
+            "55": 0.20323,
+            "56": 0.20609,
+            "57": 0.20386,
+            "58": 0.20342,
+            "59": 0.20542,
+            "60": 0.20552,
+            "61": 0.20398,
+            "62": 0.20382,
+            "63": 0.20526,
+            "64": 0.20557,
+            "65": 0.20431,
+            "66": 0.20453,
+            "67": 0.20352,
+            "68": 0.20417,
+            "69": 0.2078,
+            "70": 0.20587,
+            "71": 0.20478,
+            "72": 0.20614,
+            "73": 0.20512,
+            "74": 0.20553,
+            "75": 0.20566,
+            "76": 0.20364,
+            "77": 0.20348,
+            "78": 0.20324,
+            "79": 0.20677,
+            "80": 0.20465,
+            "81": 0.2031,
+            "82": 0.20231,
+            "83": 0.20385,
+            "84": 0.20449,
+            "85": 0.20555,
+            "86": 0.2034,
+            "87": 0.20494,
+            "88": 0.2068,
+            "89": 0.20402,
+            "90": 0.20742,
+            "91": 0.20169,
+            "92": 0.20203,
+            "93": 0.20392,
+            "94": 0.2017,
+            "95": 0.20418,
+            "96": 0.20159,
+            "97": 0.20256,
+            "98": 0.20348,
+            "99": 0.20162,
+            "100": 0.20224
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
new file mode 100644
index 00000000000..6c29141b1ab
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.92655,
+            "2": 10.92585,
+            "3": 10.91514,
+            "4": 10.909,
+            "5": 10.92715,
+            "6": 10.93558,
+            "7": 10.90643,
+            "8": 10.92116,
+            "9": 10.91068,
+            "10": 10.9079,
+            "11": 10.89281,
+            "12": 10.9243,
+            "13": 10.91489,
+            "14": 10.92142,
+            "15": 10.88293,
+            "16": 10.87308,
+            "17": 10.84069,
+            "18": 10.87299,
+            "19": 10.85635,
+            "20": 10.77597,
+            "21": 10.74899,
+            "22": 10.63079,
+            "23": 10.75618,
+            "24": 10.65646,
+            "25": 10.59264,
+            "26": 10.65436,
+            "27": 10.64916,
+            "28": 10.59497,
+            "29": 10.60952,
+            "30": 10.39177,
+            "31": 10.1573,
+            "32": 10.49109,
+            "33": 10.4796,
+            "34": 10.24074,
+            "35": 10.29698,
+            "36": 10.24672,
+            "37": 10.35242,
+            "38": 10.20483,
+            "39": 10.40503,
+            "40": 10.09663,
+            "41": 10.15197,
+            "42": 10.22069,
+            "43": 9.85509,
+            "44": 9.96162,
+            "45": 9.8447,
+            "46": 9.83835,
+            "47": 10.14006,
+            "48": 9.8576,
+            "49": 9.53743,
+            "50": 9.90948,
+            "51": 9.84887,
+            "52": 9.74166,
+            "53": 10.0634,
+            "54": 9.94738,
+            "55": 9.87771,
+            "56": 9.62738,
+            "57": 9.47156,
+            "58": 9.82893,
+            "59": 9.58275,
+            "60": 9.49123,
+            "61": 9.6997,
+            "62": 9.97993,
+            "63": 9.37281,
+            "64": 9.77461,
+            "65": 8.94258,
+            "66": 9.69883,
+            "67": 9.36407,
+            "68": 9.78787,
+            "69": 9.78335,
+            "70": 9.7228,
+            "71": 9.60807,
+            "72": 9.58432,
+            "73": 9.48978,
+            "74": 8.94859,
+            "75": 9.41891,
+            "76": 9.08727,
+            "77": 10.06346,
+            "78": 9.72836,
+            "79": 9.37154,
+            "80": 9.40055,
+            "81": 9.47831,
+            "82": 9.69156,
+            "83": 9.30737,
+            "84": 9.41236,
+            "85": 9.61183,
+            "86": 9.0759,
+            "87": 9.59459,
+            "88": 9.74736,
+            "89": 9.60675,
+            "90": 9.81024,
+            "91": 9.34359,
+            "92": 9.36491,
+            "93": 9.07724,
+            "94": 8.83091,
+            "95": 9.51724,
+            "96": 9.52446,
+            "97": 9.31031,
+            "98": 9.67875,
+            "99": 8.88841,
+            "100": 9.40137
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1669.0,
+            "2": 1803.0,
+            "3": 1710.0,
+            "4": 1820.0,
+            "5": 1976.0,
+            "6": 1885.0,
+            "7": 1871.0,
+            "8": 1764.0,
+            "9": 1859.0,
+            "10": 1373.0,
+            "11": 1990.0,
+            "12": 1788.0,
+            "13": 1897.0,
+            "14": 1734.0,
+            "15": 1894.0,
+            "16": 1713.0,
+            "17": 1842.0,
+            "18": 1666.0,
+            "19": 1744.0,
+            "20": 1653.0,
+            "21": 1882.0,
+            "22": 1706.0,
+            "23": 1954.0,
+            "24": 1640.0,
+            "25": 1696.0,
+            "26": 1871.0,
+            "27": 1921.0,
+            "28": 2037.0,
+            "29": 2016.0,
+            "30": 1883.0,
+            "31": 1596.0,
+            "32": 1913.0,
+            "33": 2205.0,
+            "34": 1860.0,
+            "35": 1980.0,
+            "36": 2029.0,
+            "37": 2339.0,
+            "38": 2176.0,
+            "39": 2352.0,
+            "40": 2111.0,
+            "41": 2308.0,
+            "42": 2334.0,
+            "43": 2067.0,
+            "44": 2193.0,
+            "45": 2124.0,
+            "46": 2336.0,
+            "47": 2584.0,
+            "48": 2349.0,
+            "49": 2276.0,
+            "50": 2539.0,
+            "51": 2656.0,
+            "52": 2542.0,
+            "53": 2863.0,
+            "54": 2741.0,
+            "55": 2376.0,
+            "56": 2790.0,
+            "57": 2497.0,
+            "58": 2939.0,
+            "59": 2877.0,
+            "60": 2326.0,
+            "61": 2871.0,
+            "62": 2654.0,
+            "63": 2428.0,
+            "64": 3017.0,
+            "65": 2721.0,
+            "66": 3212.0,
+            "67": 2706.0,
+            "68": 2877.0,
+            "69": 2929.0,
+            "70": 3147.0,
+            "71": 2970.0,
+            "72": 2362.0,
+            "73": 3092.0,
+            "74": 1964.0,
+            "75": 2648.0,
+            "76": 3014.0,
+            "77": 3562.0,
+            "78": 3371.0,
+            "79": 3369.0,
+            "80": 3457.0,
+            "81": 3675.0,
+            "82": 3516.0,
+            "83": 2891.0,
+            "84": 3362.0,
+            "85": 3249.0,
+            "86": 2711.0,
+            "87": 3770.0,
+            "88": 3008.0,
+            "89": 3409.0,
+            "90": 3052.0,
+            "91": 2694.0,
+            "92": 3142.0,
+            "93": 2631.0,
+            "94": 3394.0,
+            "95": 3371.0,
+            "96": 3517.0,
+            "97": 3190.0,
+            "98": 3808.0,
+            "99": 3258.0,
+            "100": 3248.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 436764672.0,
+            "2": 436764672.0,
+            "3": 436764672.0,
+            "4": 436764672.0,
+            "5": 436764672.0,
+            "6": 436764672.0,
+            "7": 436764672.0,
+            "8": 436764672.0,
+            "9": 436764672.0,
+            "10": 436764672.0,
+            "11": 436764672.0,
+            "12": 436764672.0,
+            "13": 436764672.0,
+            "14": 436764672.0,
+            "15": 436764672.0,
+            "16": 436764672.0,
+            "17": 436764672.0,
+            "18": 436764672.0,
+            "19": 436764672.0,
+            "20": 436764672.0,
+            "21": 436764672.0,
+            "22": 436764672.0,
+            "23": 436764672.0,
+            "24": 436764672.0,
+            "25": 436764672.0,
+            "26": 436764672.0,
+            "27": 436764672.0,
+            "28": 436764672.0,
+            "29": 436764672.0,
+            "30": 436764672.0,
+            "31": 436764672.0,
+            "32": 436764672.0,
+            "33": 436764672.0,
+            "34": 436764672.0,
+            "35": 436764672.0,
+            "36": 436764672.0,
+            "37": 436764672.0,
+            "38": 436764672.0,
+            "39": 436764672.0,
+            "40": 436764672.0,
+            "41": 436764672.0,
+            "42": 436764672.0,
+            "43": 436764672.0,
+            "44": 436764672.0,
+            "45": 436764672.0,
+            "46": 436764672.0,
+            "47": 436764672.0,
+            "48": 436764672.0,
+            "49": 436764672.0,
+            "50": 436764672.0,
+            "51": 436764672.0,
+            "52": 436764672.0,
+            "53": 436764672.0,
+            "54": 436764672.0,
+            "55": 436764672.0,
+            "56": 436764672.0,
+            "57": 436764672.0,
+            "58": 436764672.0,
+            "59": 436764672.0,
+            "60": 436764672.0,
+            "61": 436764672.0,
+            "62": 436764672.0,
+            "63": 436764672.0,
+            "64": 436764672.0,
+            "65": 436764672.0,
+            "66": 436764672.0,
+            "67": 436764672.0,
+            "68": 436764672.0,
+            "69": 436764672.0,
+            "70": 436764672.0,
+            "71": 436764672.0,
+            "72": 436764672.0,
+            "73": 436764672.0,
+            "74": 436764672.0,
+            "75": 436764672.0,
+            "76": 436764672.0,
+            "77": 436764672.0,
+            "78": 436764672.0,
+            "79": 436764672.0,
+            "80": 436764672.0,
+            "81": 436764672.0,
+            "82": 436764672.0,
+            "83": 436764672.0,
+            "84": 436764672.0,
+            "85": 436764672.0,
+            "86": 436764672.0,
+            "87": 436764672.0,
+            "88": 436764672.0,
+            "89": 436764672.0,
+            "90": 436764672.0,
+            "91": 436764672.0,
+            "92": 436764672.0,
+            "93": 436764672.0,
+            "94": 436764672.0,
+            "95": 436764672.0,
+            "96": 436764672.0,
+            "97": 436764672.0,
+            "98": 436764672.0,
+            "99": 436764672.0,
+            "100": 436764672.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1178635264.0,
+            "2": 1359495168.0,
+            "3": 1360411648.0,
+            "4": 1360411648.0,
+            "5": 1360411648.0,
+            "6": 1360411648.0,
+            "7": 1360411648.0,
+            "8": 1360411648.0,
+            "9": 1360411648.0,
+            "10": 1360411648.0,
+            "11": 1360411648.0,
+            "12": 1360411648.0,
+            "13": 1360411648.0,
+            "14": 1360411648.0,
+            "15": 1360411648.0,
+            "16": 1360411648.0,
+            "17": 1360411648.0,
+            "18": 1360411648.0,
+            "19": 1360411648.0,
+            "20": 1360411648.0,
+            "21": 1360411648.0,
+            "22": 1360411648.0,
+            "23": 1360411648.0,
+            "24": 1360411648.0,
+            "25": 1360411648.0,
+            "26": 1360411648.0,
+            "27": 1360411648.0,
+            "28": 1360411648.0,
+            "29": 1360411648.0,
+            "30": 1360411648.0,
+            "31": 1360411648.0,
+            "32": 1360411648.0,
+            "33": 1360411648.0,
+            "34": 1360411648.0,
+            "35": 1360411648.0,
+            "36": 1360411648.0,
+            "37": 1360411648.0,
+            "38": 1360411648.0,
+            "39": 1360411648.0,
+            "40": 1360411648.0,
+            "41": 1360411648.0,
+            "42": 1360411648.0,
+            "43": 1360411648.0,
+            "44": 1360411648.0,
+            "45": 1360411648.0,
+            "46": 1360411648.0,
+            "47": 1360411648.0,
+            "48": 1360411648.0,
+            "49": 1360411648.0,
+            "50": 1360411648.0,
+            "51": 1360411648.0,
+            "52": 1360411648.0,
+            "53": 1360411648.0,
+            "54": 1360411648.0,
+            "55": 1360411648.0,
+            "56": 1360411648.0,
+            "57": 1360411648.0,
+            "58": 1360411648.0,
+            "59": 1360411648.0,
+            "60": 1360411648.0,
+            "61": 1360411648.0,
+            "62": 1360411648.0,
+            "63": 1360411648.0,
+            "64": 1360411648.0,
+            "65": 1360411648.0,
+            "66": 1360411648.0,
+            "67": 1360411648.0,
+            "68": 1360411648.0,
+            "69": 1360411648.0,
+            "70": 1360411648.0,
+            "71": 1360411648.0,
+            "72": 1360411648.0,
+            "73": 1360411648.0,
+            "74": 1360411648.0,
+            "75": 1360411648.0,
+            "76": 1360411648.0,
+            "77": 1360411648.0,
+            "78": 1360411648.0,
+            "79": 1360411648.0,
+            "80": 1360411648.0,
+            "81": 1360411648.0,
+            "82": 1360411648.0,
+            "83": 1360411648.0,
+            "84": 1360411648.0,
+            "85": 1360411648.0,
+            "86": 1360411648.0,
+            "87": 1360411648.0,
+            "88": 1360411648.0,
+            "89": 1360411648.0,
+            "90": 1360411648.0,
+            "91": 1360411648.0,
+            "92": 1360411648.0,
+            "93": 1360411648.0,
+            "94": 1360411648.0,
+            "95": 1360411648.0,
+            "96": 1360411648.0,
+            "97": 1360411648.0,
+            "98": 1360411648.0,
+            "99": 1360411648.0,
+            "100": 1360411648.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 14.71622,
+            "2": 0.23087,
+            "3": 0.1951,
+            "4": 0.18861,
+            "5": 0.18812,
+            "6": 0.19385,
+            "7": 0.18893,
+            "8": 0.18851,
+            "9": 0.18797,
+            "10": 0.18883,
+            "11": 0.19316,
+            "12": 0.18894,
+            "13": 0.18809,
+            "14": 0.18851,
+            "15": 0.19062,
+            "16": 0.19113,
+            "17": 0.18987,
+            "18": 0.18872,
+            "19": 0.18621,
+            "20": 0.19006,
+            "21": 0.18925,
+            "22": 0.19544,
+            "23": 0.19322,
+            "24": 0.18957,
+            "25": 0.19074,
+            "26": 0.19316,
+            "27": 0.18825,
+            "28": 0.1874,
+            "29": 0.18747,
+            "30": 0.18693,
+            "31": 0.1865,
+            "32": 0.18917,
+            "33": 0.19083,
+            "34": 0.185,
+            "35": 0.18524,
+            "36": 0.18664,
+            "37": 0.18377,
+            "38": 0.18614,
+            "39": 0.18438,
+            "40": 0.18443,
+            "41": 0.18753,
+            "42": 0.1842,
+            "43": 0.18841,
+            "44": 0.18384,
+            "45": 0.18491,
+            "46": 0.18442,
+            "47": 0.18641,
+            "48": 0.18523,
+            "49": 0.18535,
+            "50": 0.18414,
+            "51": 0.19499,
+            "52": 0.18865,
+            "53": 0.18877,
+            "54": 0.18901,
+            "55": 0.18952,
+            "56": 0.18817,
+            "57": 0.18647,
+            "58": 0.19054,
+            "59": 0.18698,
+            "60": 0.19221,
+            "61": 0.1855,
+            "62": 0.18425,
+            "63": 0.18635,
+            "64": 0.18617,
+            "65": 0.18584,
+            "66": 0.18699,
+            "67": 0.18754,
+            "68": 0.18626,
+            "69": 0.18682,
+            "70": 0.37416,
+            "71": 0.18684,
+            "72": 0.18552,
+            "73": 0.18589,
+            "74": 0.18591,
+            "75": 0.19036,
+            "76": 0.18483,
+            "77": 0.18579,
+            "78": 0.18597,
+            "79": 0.1879,
+            "80": 0.18623,
+            "81": 0.18669,
+            "82": 0.18488,
+            "83": 0.18509,
+            "84": 0.18891,
+            "85": 0.18595,
+            "86": 0.18904,
+            "87": 0.18638,
+            "88": 0.18604,
+            "89": 0.18611,
+            "90": 0.18586,
+            "91": 0.18957,
+            "92": 0.18824,
+            "93": 0.18603,
+            "94": 0.18606,
+            "95": 0.18658,
+            "96": 0.18779,
+            "97": 0.18815,
+            "98": 0.18579,
+            "99": 0.186,
+            "100": 0.18722
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_zp_z3_resume_fsdp_dtensor_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_zp_z3_resume_fsdp_dtensor_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
index e8a221fc47b..5ac3723f6cb 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_zp_z3_resume_fsdp_dtensor_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_zp_z3_resume_fsdp_dtensor_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
@@ -2,141 +2,536 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 10.84466,
+            "2": 10.84794,
+            "3": 10.84925,
+            "4": 10.84332,
             "5": 10.88244,
+            "6": 10.88079,
+            "7": 10.86575,
+            "8": 10.85546,
+            "9": 10.85543,
             "10": 10.81818,
+            "11": 10.88769,
+            "12": 10.8634,
+            "13": 10.86681,
+            "14": 10.88414,
             "15": 10.82464,
+            "16": 10.82854,
+            "17": 10.79491,
+            "18": 10.81492,
+            "19": 10.80133,
             "20": 10.7181,
+            "21": 10.69905,
+            "22": 10.56744,
+            "23": 10.717,
+            "24": 10.60443,
             "25": 10.55007,
+            "26": 10.60907,
+            "27": 10.62028,
+            "28": 10.5752,
+            "29": 10.59624,
             "30": 10.38327,
+            "31": 10.1537,
+            "32": 10.48026,
+            "33": 10.47378,
+            "34": 10.2366,
             "35": 10.28843,
+            "36": 10.24838,
+            "37": 10.35354,
+            "38": 10.20794,
+            "39": 10.41884,
             "40": 10.1122,
+            "41": 10.16092,
+            "42": 10.23301,
+            "43": 9.86118,
+            "44": 9.97698,
             "45": 9.86493,
+            "46": 9.84883,
+            "47": 10.16617,
+            "48": 9.87132,
+            "49": 9.56691,
             "50": 9.92114,
+            "51": 9.86695,
+            "52": 9.76956,
+            "53": 10.07809,
+            "54": 9.97027,
             "55": 9.89683,
+            "56": 9.64394,
+            "57": 9.49728,
+            "58": 9.84867,
+            "59": 9.59977,
             "60": 9.50631,
+            "61": 9.71011,
+            "62": 9.99101,
+            "63": 9.38968,
+            "64": 9.78595,
             "65": 8.95983,
+            "66": 9.70876,
+            "67": 9.37892,
+            "68": 9.79599,
+            "69": 9.80666,
             "70": 9.74795,
+            "71": 9.61779,
+            "72": 9.59127,
+            "73": 9.50398,
+            "74": 8.94624,
             "75": 9.42942,
+            "76": 9.08423,
+            "77": 10.06698,
+            "78": 9.73256,
+            "79": 9.38117,
             "80": 9.41061,
+            "81": 9.48289,
+            "82": 9.70492,
+            "83": 9.30713,
+            "84": 9.42241,
             "85": 9.61802,
+            "86": 9.07631,
+            "87": 9.59382,
+            "88": 9.75419,
+            "89": 9.60093,
             "90": 9.82013,
+            "91": 9.3407,
+            "92": 9.35717,
+            "93": 9.07927,
+            "94": 8.83613,
             "95": 9.5223,
+            "96": 9.53379,
+            "97": 9.31633,
+            "98": 9.68007,
+            "99": 8.89242,
             "100": 9.39964
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 1770.0,
+            "2": 1809.0,
+            "3": 1782.0,
+            "4": 1916.0,
             "5": 1973.0,
+            "6": 1955.0,
+            "7": 2046.0,
+            "8": 1773.0,
+            "9": 1815.0,
             "10": 1432.0,
+            "11": 1961.0,
+            "12": 1828.0,
+            "13": 1967.0,
+            "14": 1825.0,
             "15": 1980.0,
+            "16": 1889.0,
+            "17": 1866.0,
+            "18": 1827.0,
+            "19": 1876.0,
             "20": 1715.0,
+            "21": 2046.0,
+            "22": 1872.0,
+            "23": 2168.0,
+            "24": 1814.0,
             "25": 1715.0,
+            "26": 1721.0,
+            "27": 1822.0,
+            "28": 2102.0,
+            "29": 2112.0,
             "30": 2020.0,
+            "31": 1569.0,
+            "32": 2022.0,
+            "33": 2256.0,
+            "34": 1884.0,
             "35": 2034.0,
+            "36": 2027.0,
+            "37": 2438.0,
+            "38": 2363.0,
+            "39": 2526.0,
             "40": 2254.0,
+            "41": 2328.0,
+            "42": 2409.0,
+            "43": 2126.0,
+            "44": 2166.0,
             "45": 2230.0,
+            "46": 2487.0,
+            "47": 2605.0,
+            "48": 2351.0,
+            "49": 2413.0,
             "50": 2274.0,
+            "51": 2579.0,
+            "52": 2508.0,
+            "53": 2879.0,
+            "54": 2744.0,
             "55": 2402.0,
+            "56": 2720.0,
+            "57": 2384.0,
+            "58": 3002.0,
+            "59": 2743.0,
             "60": 2457.0,
+            "61": 2976.0,
+            "62": 2631.0,
+            "63": 2349.0,
+            "64": 3077.0,
             "65": 2634.0,
+            "66": 3076.0,
+            "67": 2906.0,
+            "68": 2759.0,
+            "69": 2907.0,
             "70": 3045.0,
+            "71": 3159.0,
+            "72": 2506.0,
+            "73": 2956.0,
+            "74": 1945.0,
             "75": 2467.0,
+            "76": 2979.0,
+            "77": 3209.0,
+            "78": 3122.0,
+            "79": 3048.0,
             "80": 3389.0,
+            "81": 3799.0,
+            "82": 3272.0,
+            "83": 2962.0,
+            "84": 3328.0,
             "85": 3462.0,
+            "86": 3071.0,
+            "87": 3900.0,
+            "88": 3128.0,
+            "89": 3469.0,
             "90": 3095.0,
+            "91": 2769.0,
+            "92": 3168.0,
+            "93": 2713.0,
+            "94": 3416.0,
             "95": 3515.0,
+            "96": 3425.0,
+            "97": 3223.0,
+            "98": 3769.0,
+            "99": 3230.0,
             "100": 3219.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 246998528.0,
+            "2": 246998528.0,
+            "3": 246998528.0,
+            "4": 246998528.0,
             "5": 246998528.0,
+            "6": 246998528.0,
+            "7": 246998528.0,
+            "8": 246998528.0,
+            "9": 246998528.0,
             "10": 246998528.0,
+            "11": 246998528.0,
+            "12": 246998528.0,
+            "13": 246998528.0,
+            "14": 246998528.0,
             "15": 246998528.0,
+            "16": 246998528.0,
+            "17": 246998528.0,
+            "18": 246998528.0,
+            "19": 246998528.0,
             "20": 246998528.0,
+            "21": 246998528.0,
+            "22": 246998528.0,
+            "23": 246998528.0,
+            "24": 246998528.0,
             "25": 246998528.0,
+            "26": 246998528.0,
+            "27": 246998528.0,
+            "28": 246998528.0,
+            "29": 246998528.0,
             "30": 246998528.0,
+            "31": 246998528.0,
+            "32": 246998528.0,
+            "33": 246998528.0,
+            "34": 246998528.0,
             "35": 246998528.0,
+            "36": 246998528.0,
+            "37": 246998528.0,
+            "38": 246998528.0,
+            "39": 246998528.0,
             "40": 246998528.0,
+            "41": 246998528.0,
+            "42": 246998528.0,
+            "43": 246998528.0,
+            "44": 246998528.0,
             "45": 246998528.0,
+            "46": 246998528.0,
+            "47": 246998528.0,
+            "48": 246998528.0,
+            "49": 246998528.0,
             "50": 246998528.0,
+            "51": 246998528.0,
+            "52": 246998528.0,
+            "53": 246998528.0,
+            "54": 246998528.0,
             "55": 246998528.0,
+            "56": 246998528.0,
+            "57": 246998528.0,
+            "58": 246998528.0,
+            "59": 246998528.0,
             "60": 246998528.0,
+            "61": 246998528.0,
+            "62": 246998528.0,
+            "63": 246998528.0,
+            "64": 246998528.0,
             "65": 246998528.0,
+            "66": 246998528.0,
+            "67": 246998528.0,
+            "68": 246998528.0,
+            "69": 246998528.0,
             "70": 246998528.0,
+            "71": 246998528.0,
+            "72": 246998528.0,
+            "73": 246998528.0,
+            "74": 246998528.0,
             "75": 246998528.0,
+            "76": 246998528.0,
+            "77": 246998528.0,
+            "78": 246998528.0,
+            "79": 246998528.0,
             "80": 246998528.0,
+            "81": 246998528.0,
+            "82": 246998528.0,
+            "83": 246998528.0,
+            "84": 246998528.0,
             "85": 246998528.0,
+            "86": 246998528.0,
+            "87": 246998528.0,
+            "88": 246998528.0,
+            "89": 246998528.0,
             "90": 246998528.0,
+            "91": 246998528.0,
+            "92": 246998528.0,
+            "93": 246998528.0,
+            "94": 246998528.0,
             "95": 246998528.0,
+            "96": 246998528.0,
+            "97": 246998528.0,
+            "98": 246998528.0,
+            "99": 246998528.0,
             "100": 246998528.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 1503207936.0,
+            "2": 1503208960.0,
+            "3": 1503208960.0,
+            "4": 1503208960.0,
             "5": 1503208960.0,
+            "6": 1503208960.0,
+            "7": 1503208960.0,
+            "8": 1503208960.0,
+            "9": 1503208960.0,
             "10": 1503208960.0,
+            "11": 1503208960.0,
+            "12": 1503208960.0,
+            "13": 1503208960.0,
+            "14": 1503208960.0,
             "15": 1503208960.0,
+            "16": 1503208960.0,
+            "17": 1503208960.0,
+            "18": 1503208960.0,
+            "19": 1503208960.0,
             "20": 1503208960.0,
+            "21": 1503208960.0,
+            "22": 1503208960.0,
+            "23": 1503208960.0,
+            "24": 1503208960.0,
             "25": 1503208960.0,
+            "26": 1503208960.0,
+            "27": 1503208960.0,
+            "28": 1503208960.0,
+            "29": 1503208960.0,
             "30": 1503208960.0,
+            "31": 1503208960.0,
+            "32": 1503208960.0,
+            "33": 1503208960.0,
+            "34": 1503208960.0,
             "35": 1503208960.0,
+            "36": 1503208960.0,
+            "37": 1503208960.0,
+            "38": 1503208960.0,
+            "39": 1503208960.0,
             "40": 1503208960.0,
+            "41": 1503208960.0,
+            "42": 1503208960.0,
+            "43": 1503208960.0,
+            "44": 1503208960.0,
             "45": 1503208960.0,
+            "46": 1503208960.0,
+            "47": 1503208960.0,
+            "48": 1503208960.0,
+            "49": 1503208960.0,
             "50": 1503208960.0,
+            "51": 1503208960.0,
+            "52": 1503208960.0,
+            "53": 1503208960.0,
+            "54": 1503208960.0,
             "55": 1503208960.0,
+            "56": 1503208960.0,
+            "57": 1503208960.0,
+            "58": 1503208960.0,
+            "59": 1503208960.0,
             "60": 1503208960.0,
+            "61": 1503208960.0,
+            "62": 1503208960.0,
+            "63": 1503208960.0,
+            "64": 1503208960.0,
             "65": 1503208960.0,
+            "66": 1503208960.0,
+            "67": 1503208960.0,
+            "68": 1503208960.0,
+            "69": 1503208960.0,
             "70": 1503208960.0,
+            "71": 1503208960.0,
+            "72": 1503208960.0,
+            "73": 1503208960.0,
+            "74": 1503208960.0,
             "75": 1503208960.0,
+            "76": 1503208960.0,
+            "77": 1503208960.0,
+            "78": 1503208960.0,
+            "79": 1503208960.0,
             "80": 1503208960.0,
+            "81": 1503208960.0,
+            "82": 1503208960.0,
+            "83": 1503208960.0,
+            "84": 1503208960.0,
             "85": 1503208960.0,
+            "86": 1503208960.0,
+            "87": 1503208960.0,
+            "88": 1503208960.0,
+            "89": 1503208960.0,
             "90": 1503208960.0,
+            "91": 1503208960.0,
+            "92": 1503208960.0,
+            "93": 1503208960.0,
+            "94": 1503208960.0,
             "95": 1503208960.0,
+            "96": 1503208960.0,
+            "97": 1503208960.0,
+            "98": 1503208960.0,
+            "99": 1503208960.0,
             "100": 1503208960.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 7.35335,
-            "5": 0.15349,
-            "10": 0.15437,
-            "15": 0.15387,
-            "20": 0.15054,
-            "25": 0.15011,
-            "30": 0.15223,
-            "35": 0.15279,
-            "40": 0.15254,
-            "45": 0.14885,
-            "50": 0.15116,
-            "55": 0.15076,
-            "60": 0.15109,
-            "65": 0.15214,
-            "70": 0.15048,
-            "75": 0.15013,
-            "80": 0.15119,
-            "85": 0.15129,
-            "90": 0.15233,
-            "95": 0.14802,
-            "100": 0.15191
+            "1": 6.97838,
+            "2": 0.1863,
+            "3": 0.17806,
+            "4": 0.17695,
+            "5": 0.17974,
+            "6": 0.17764,
+            "7": 0.18024,
+            "8": 0.17572,
+            "9": 0.179,
+            "10": 0.17802,
+            "11": 0.17798,
+            "12": 0.18743,
+            "13": 0.18184,
+            "14": 0.18624,
+            "15": 0.1848,
+            "16": 0.18027,
+            "17": 0.17452,
+            "18": 0.17844,
+            "19": 0.17971,
+            "20": 0.17848,
+            "21": 0.17704,
+            "22": 0.17765,
+            "23": 0.17541,
+            "24": 0.17687,
+            "25": 0.1788,
+            "26": 0.17648,
+            "27": 0.17818,
+            "28": 0.17831,
+            "29": 0.17674,
+            "30": 0.17588,
+            "31": 0.17953,
+            "32": 0.17664,
+            "33": 0.17688,
+            "34": 0.17669,
+            "35": 0.1745,
+            "36": 0.1776,
+            "37": 0.17613,
+            "38": 0.17723,
+            "39": 0.17434,
+            "40": 0.17681,
+            "41": 0.17485,
+            "42": 0.17993,
+            "43": 0.174,
+            "44": 0.17741,
+            "45": 0.17457,
+            "46": 0.1789,
+            "47": 0.17735,
+            "48": 0.17895,
+            "49": 0.17421,
+            "50": 0.17774,
+            "51": 0.17494,
+            "52": 0.1787,
+            "53": 0.17718,
+            "54": 0.18021,
+            "55": 0.17484,
+            "56": 0.17693,
+            "57": 0.178,
+            "58": 0.17576,
+            "59": 0.17632,
+            "60": 0.17804,
+            "61": 0.17762,
+            "62": 0.1744,
+            "63": 0.17562,
+            "64": 0.17641,
+            "65": 0.1776,
+            "66": 0.18194,
+            "67": 0.17871,
+            "68": 0.17591,
+            "69": 0.17673,
+            "70": 0.17758,
+            "71": 0.17616,
+            "72": 0.17993,
+            "73": 0.17721,
+            "74": 0.17901,
+            "75": 0.1779,
+            "76": 0.17874,
+            "77": 0.17769,
+            "78": 0.17877,
+            "79": 0.17963,
+            "80": 0.1772,
+            "81": 0.18363,
+            "82": 0.175,
+            "83": 0.17819,
+            "84": 0.17813,
+            "85": 0.17602,
+            "86": 0.17627,
+            "87": 0.17621,
+            "88": 0.17721,
+            "89": 0.17686,
+            "90": 0.17595,
+            "91": 0.17984,
+            "92": 0.17771,
+            "93": 0.17526,
+            "94": 0.17818,
+            "95": 0.17734,
+            "96": 0.18252,
+            "97": 0.186,
+            "98": 0.1736,
+            "99": 0.17768,
+            "100": 0.17699
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_zp_z3_resume_fsdp_dtensor_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_zp_z3_resume_fsdp_dtensor_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..492a25fb45e
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_zp_z3_resume_fsdp_dtensor_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.84466,
+            "2": 10.84794,
+            "3": 10.84925,
+            "4": 10.84332,
+            "5": 10.88244,
+            "6": 10.88079,
+            "7": 10.86575,
+            "8": 10.85546,
+            "9": 10.85543,
+            "10": 10.81818,
+            "11": 10.88769,
+            "12": 10.8634,
+            "13": 10.86681,
+            "14": 10.88414,
+            "15": 10.82464,
+            "16": 10.82854,
+            "17": 10.79491,
+            "18": 10.81492,
+            "19": 10.80133,
+            "20": 10.7181,
+            "21": 10.69905,
+            "22": 10.56744,
+            "23": 10.717,
+            "24": 10.60443,
+            "25": 10.55007,
+            "26": 10.60907,
+            "27": 10.62028,
+            "28": 10.5752,
+            "29": 10.59624,
+            "30": 10.38327,
+            "31": 10.1537,
+            "32": 10.48026,
+            "33": 10.47378,
+            "34": 10.2366,
+            "35": 10.28843,
+            "36": 10.24838,
+            "37": 10.35354,
+            "38": 10.20794,
+            "39": 10.41884,
+            "40": 10.1122,
+            "41": 10.16092,
+            "42": 10.23301,
+            "43": 9.86118,
+            "44": 9.97698,
+            "45": 9.86493,
+            "46": 9.84883,
+            "47": 10.16617,
+            "48": 9.87132,
+            "49": 9.56691,
+            "50": 9.92114,
+            "51": 9.86695,
+            "52": 9.76956,
+            "53": 10.07809,
+            "54": 9.97027,
+            "55": 9.89683,
+            "56": 9.64394,
+            "57": 9.49728,
+            "58": 9.84867,
+            "59": 9.59977,
+            "60": 9.50631,
+            "61": 9.71011,
+            "62": 9.99101,
+            "63": 9.38968,
+            "64": 9.78595,
+            "65": 8.95983,
+            "66": 9.70876,
+            "67": 9.37892,
+            "68": 9.79599,
+            "69": 9.80666,
+            "70": 9.74795,
+            "71": 9.61779,
+            "72": 9.59127,
+            "73": 9.50398,
+            "74": 8.94624,
+            "75": 9.42942,
+            "76": 9.08423,
+            "77": 10.06698,
+            "78": 9.73256,
+            "79": 9.38117,
+            "80": 9.41061,
+            "81": 9.48289,
+            "82": 9.70492,
+            "83": 9.30713,
+            "84": 9.42241,
+            "85": 9.61802,
+            "86": 9.07631,
+            "87": 9.59382,
+            "88": 9.75419,
+            "89": 9.60093,
+            "90": 9.82013,
+            "91": 9.3407,
+            "92": 9.35717,
+            "93": 9.07927,
+            "94": 8.83613,
+            "95": 9.5223,
+            "96": 9.53379,
+            "97": 9.31633,
+            "98": 9.68007,
+            "99": 8.89242,
+            "100": 9.39964
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1770.0,
+            "2": 1809.0,
+            "3": 1782.0,
+            "4": 1916.0,
+            "5": 1973.0,
+            "6": 1955.0,
+            "7": 2046.0,
+            "8": 1773.0,
+            "9": 1815.0,
+            "10": 1432.0,
+            "11": 1961.0,
+            "12": 1828.0,
+            "13": 1967.0,
+            "14": 1825.0,
+            "15": 1980.0,
+            "16": 1889.0,
+            "17": 1866.0,
+            "18": 1827.0,
+            "19": 1876.0,
+            "20": 1715.0,
+            "21": 2046.0,
+            "22": 1872.0,
+            "23": 2168.0,
+            "24": 1814.0,
+            "25": 1715.0,
+            "26": 1721.0,
+            "27": 1822.0,
+            "28": 2102.0,
+            "29": 2112.0,
+            "30": 2020.0,
+            "31": 1569.0,
+            "32": 2022.0,
+            "33": 2256.0,
+            "34": 1884.0,
+            "35": 2034.0,
+            "36": 2027.0,
+            "37": 2438.0,
+            "38": 2363.0,
+            "39": 2526.0,
+            "40": 2254.0,
+            "41": 2328.0,
+            "42": 2409.0,
+            "43": 2126.0,
+            "44": 2166.0,
+            "45": 2230.0,
+            "46": 2487.0,
+            "47": 2605.0,
+            "48": 2351.0,
+            "49": 2413.0,
+            "50": 2274.0,
+            "51": 2579.0,
+            "52": 2508.0,
+            "53": 2879.0,
+            "54": 2744.0,
+            "55": 2402.0,
+            "56": 2720.0,
+            "57": 2384.0,
+            "58": 3002.0,
+            "59": 2743.0,
+            "60": 2457.0,
+            "61": 2976.0,
+            "62": 2631.0,
+            "63": 2349.0,
+            "64": 3077.0,
+            "65": 2634.0,
+            "66": 3076.0,
+            "67": 2906.0,
+            "68": 2759.0,
+            "69": 2907.0,
+            "70": 3045.0,
+            "71": 3159.0,
+            "72": 2506.0,
+            "73": 2956.0,
+            "74": 1945.0,
+            "75": 2467.0,
+            "76": 2979.0,
+            "77": 3209.0,
+            "78": 3122.0,
+            "79": 3048.0,
+            "80": 3389.0,
+            "81": 3799.0,
+            "82": 3272.0,
+            "83": 2962.0,
+            "84": 3328.0,
+            "85": 3462.0,
+            "86": 3071.0,
+            "87": 3900.0,
+            "88": 3128.0,
+            "89": 3469.0,
+            "90": 3095.0,
+            "91": 2769.0,
+            "92": 3168.0,
+            "93": 2713.0,
+            "94": 3416.0,
+            "95": 3515.0,
+            "96": 3425.0,
+            "97": 3223.0,
+            "98": 3769.0,
+            "99": 3230.0,
+            "100": 3219.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 246998528.0,
+            "2": 246998528.0,
+            "3": 246998528.0,
+            "4": 246998528.0,
+            "5": 246998528.0,
+            "6": 246998528.0,
+            "7": 246998528.0,
+            "8": 246998528.0,
+            "9": 246998528.0,
+            "10": 246998528.0,
+            "11": 246998528.0,
+            "12": 246998528.0,
+            "13": 246998528.0,
+            "14": 246998528.0,
+            "15": 246998528.0,
+            "16": 246998528.0,
+            "17": 246998528.0,
+            "18": 246998528.0,
+            "19": 246998528.0,
+            "20": 246998528.0,
+            "21": 246998528.0,
+            "22": 246998528.0,
+            "23": 246998528.0,
+            "24": 246998528.0,
+            "25": 246998528.0,
+            "26": 246998528.0,
+            "27": 246998528.0,
+            "28": 246998528.0,
+            "29": 246998528.0,
+            "30": 246998528.0,
+            "31": 246998528.0,
+            "32": 246998528.0,
+            "33": 246998528.0,
+            "34": 246998528.0,
+            "35": 246998528.0,
+            "36": 246998528.0,
+            "37": 246998528.0,
+            "38": 246998528.0,
+            "39": 246998528.0,
+            "40": 246998528.0,
+            "41": 246998528.0,
+            "42": 246998528.0,
+            "43": 246998528.0,
+            "44": 246998528.0,
+            "45": 246998528.0,
+            "46": 246998528.0,
+            "47": 246998528.0,
+            "48": 246998528.0,
+            "49": 246998528.0,
+            "50": 246998528.0,
+            "51": 246998528.0,
+            "52": 246998528.0,
+            "53": 246998528.0,
+            "54": 246998528.0,
+            "55": 246998528.0,
+            "56": 246998528.0,
+            "57": 246998528.0,
+            "58": 246998528.0,
+            "59": 246998528.0,
+            "60": 246998528.0,
+            "61": 246998528.0,
+            "62": 246998528.0,
+            "63": 246998528.0,
+            "64": 246998528.0,
+            "65": 246998528.0,
+            "66": 246998528.0,
+            "67": 246998528.0,
+            "68": 246998528.0,
+            "69": 246998528.0,
+            "70": 246998528.0,
+            "71": 246998528.0,
+            "72": 246998528.0,
+            "73": 246998528.0,
+            "74": 246998528.0,
+            "75": 246998528.0,
+            "76": 246998528.0,
+            "77": 246998528.0,
+            "78": 246998528.0,
+            "79": 246998528.0,
+            "80": 246998528.0,
+            "81": 246998528.0,
+            "82": 246998528.0,
+            "83": 246998528.0,
+            "84": 246998528.0,
+            "85": 246998528.0,
+            "86": 246998528.0,
+            "87": 246998528.0,
+            "88": 246998528.0,
+            "89": 246998528.0,
+            "90": 246998528.0,
+            "91": 246998528.0,
+            "92": 246998528.0,
+            "93": 246998528.0,
+            "94": 246998528.0,
+            "95": 246998528.0,
+            "96": 246998528.0,
+            "97": 246998528.0,
+            "98": 246998528.0,
+            "99": 246998528.0,
+            "100": 246998528.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1503207936.0,
+            "2": 1503208960.0,
+            "3": 1503208960.0,
+            "4": 1503208960.0,
+            "5": 1503208960.0,
+            "6": 1503208960.0,
+            "7": 1503208960.0,
+            "8": 1503208960.0,
+            "9": 1503208960.0,
+            "10": 1503208960.0,
+            "11": 1503208960.0,
+            "12": 1503208960.0,
+            "13": 1503208960.0,
+            "14": 1503208960.0,
+            "15": 1503208960.0,
+            "16": 1503208960.0,
+            "17": 1503208960.0,
+            "18": 1503208960.0,
+            "19": 1503208960.0,
+            "20": 1503208960.0,
+            "21": 1503208960.0,
+            "22": 1503208960.0,
+            "23": 1503208960.0,
+            "24": 1503208960.0,
+            "25": 1503208960.0,
+            "26": 1503208960.0,
+            "27": 1503208960.0,
+            "28": 1503208960.0,
+            "29": 1503208960.0,
+            "30": 1503208960.0,
+            "31": 1503208960.0,
+            "32": 1503208960.0,
+            "33": 1503208960.0,
+            "34": 1503208960.0,
+            "35": 1503208960.0,
+            "36": 1503208960.0,
+            "37": 1503208960.0,
+            "38": 1503208960.0,
+            "39": 1503208960.0,
+            "40": 1503208960.0,
+            "41": 1503208960.0,
+            "42": 1503208960.0,
+            "43": 1503208960.0,
+            "44": 1503208960.0,
+            "45": 1503208960.0,
+            "46": 1503208960.0,
+            "47": 1503208960.0,
+            "48": 1503208960.0,
+            "49": 1503208960.0,
+            "50": 1503208960.0,
+            "51": 1503208960.0,
+            "52": 1503208960.0,
+            "53": 1503208960.0,
+            "54": 1503208960.0,
+            "55": 1503208960.0,
+            "56": 1503208960.0,
+            "57": 1503208960.0,
+            "58": 1503208960.0,
+            "59": 1503208960.0,
+            "60": 1503208960.0,
+            "61": 1503208960.0,
+            "62": 1503208960.0,
+            "63": 1503208960.0,
+            "64": 1503208960.0,
+            "65": 1503208960.0,
+            "66": 1503208960.0,
+            "67": 1503208960.0,
+            "68": 1503208960.0,
+            "69": 1503208960.0,
+            "70": 1503208960.0,
+            "71": 1503208960.0,
+            "72": 1503208960.0,
+            "73": 1503208960.0,
+            "74": 1503208960.0,
+            "75": 1503208960.0,
+            "76": 1503208960.0,
+            "77": 1503208960.0,
+            "78": 1503208960.0,
+            "79": 1503208960.0,
+            "80": 1503208960.0,
+            "81": 1503208960.0,
+            "82": 1503208960.0,
+            "83": 1503208960.0,
+            "84": 1503208960.0,
+            "85": 1503208960.0,
+            "86": 1503208960.0,
+            "87": 1503208960.0,
+            "88": 1503208960.0,
+            "89": 1503208960.0,
+            "90": 1503208960.0,
+            "91": 1503208960.0,
+            "92": 1503208960.0,
+            "93": 1503208960.0,
+            "94": 1503208960.0,
+            "95": 1503208960.0,
+            "96": 1503208960.0,
+            "97": 1503208960.0,
+            "98": 1503208960.0,
+            "99": 1503208960.0,
+            "100": 1503208960.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 7.36893,
+            "2": 0.17749,
+            "3": 0.15483,
+            "4": 3.4076,
+            "5": 1.15474,
+            "6": 1.45655,
+            "7": 0.15757,
+            "8": 0.15389,
+            "9": 0.47498,
+            "10": 0.16518,
+            "11": 0.23414,
+            "12": 0.15815,
+            "13": 0.15818,
+            "14": 0.15719,
+            "15": 0.15462,
+            "16": 0.16906,
+            "17": 0.159,
+            "18": 0.1595,
+            "19": 0.15825,
+            "20": 0.15699,
+            "21": 0.17023,
+            "22": 0.15299,
+            "23": 0.15858,
+            "24": 0.15811,
+            "25": 0.16082,
+            "26": 0.15919,
+            "27": 0.17036,
+            "28": 0.15511,
+            "29": 0.15676,
+            "30": 0.15849,
+            "31": 0.15691,
+            "32": 0.1571,
+            "33": 0.16802,
+            "34": 0.154,
+            "35": 0.15309,
+            "36": 0.15721,
+            "37": 0.15869,
+            "38": 0.16016,
+            "39": 0.15701,
+            "40": 0.15638,
+            "41": 0.15569,
+            "42": 0.15701,
+            "43": 0.16024,
+            "44": 0.15954,
+            "45": 0.16076,
+            "46": 0.15945,
+            "47": 0.15824,
+            "48": 0.15782,
+            "49": 0.15911,
+            "50": 0.15934,
+            "51": 0.15705,
+            "52": 0.17206,
+            "53": 0.17271,
+            "54": 0.17349,
+            "55": 0.17496,
+            "56": 0.16409,
+            "57": 0.16373,
+            "58": 0.16199,
+            "59": 0.16729,
+            "60": 0.16491,
+            "61": 0.1652,
+            "62": 0.17265,
+            "63": 0.17309,
+            "64": 0.15548,
+            "65": 0.15692,
+            "66": 0.16524,
+            "67": 0.15305,
+            "68": 0.16651,
+            "69": 0.15491,
+            "70": 0.15396,
+            "71": 0.15455,
+            "72": 0.16248,
+            "73": 0.15552,
+            "74": 0.1536,
+            "75": 0.15797,
+            "76": 0.15557,
+            "77": 0.15511,
+            "78": 0.16464,
+            "79": 0.15523,
+            "80": 0.15671,
+            "81": 0.15374,
+            "82": 0.15657,
+            "83": 0.16295,
+            "84": 0.15794,
+            "85": 0.15777,
+            "86": 0.15529,
+            "87": 0.16089,
+            "88": 0.15599,
+            "89": 0.16869,
+            "90": 0.15607,
+            "91": 0.15589,
+            "92": 0.15613,
+            "93": 0.15487,
+            "94": 0.15658,
+            "95": 0.16587,
+            "96": 0.1565,
+            "97": 0.15642,
+            "98": 0.15538,
+            "99": 0.15622,
+            "100": 0.16269
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_zp_z3_resume_fsdp_dtensor_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_zp_z3_resume_fsdp_dtensor_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..dbfceceac77
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_zp_z3_resume_fsdp_dtensor_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.84466,
+            "2": 10.84794,
+            "3": 10.84925,
+            "4": 10.84332,
+            "5": 10.88244,
+            "6": 10.88079,
+            "7": 10.86575,
+            "8": 10.85546,
+            "9": 10.85543,
+            "10": 10.81818,
+            "11": 10.88769,
+            "12": 10.8634,
+            "13": 10.86681,
+            "14": 10.88414,
+            "15": 10.82464,
+            "16": 10.82854,
+            "17": 10.79491,
+            "18": 10.81492,
+            "19": 10.80133,
+            "20": 10.7181,
+            "21": 10.69905,
+            "22": 10.56744,
+            "23": 10.717,
+            "24": 10.60443,
+            "25": 10.55007,
+            "26": 10.60907,
+            "27": 10.62028,
+            "28": 10.5752,
+            "29": 10.59624,
+            "30": 10.38327,
+            "31": 10.1537,
+            "32": 10.48026,
+            "33": 10.47378,
+            "34": 10.2366,
+            "35": 10.28843,
+            "36": 10.24838,
+            "37": 10.35354,
+            "38": 10.20794,
+            "39": 10.41884,
+            "40": 10.1122,
+            "41": 10.16092,
+            "42": 10.23301,
+            "43": 9.86118,
+            "44": 9.97698,
+            "45": 9.86493,
+            "46": 9.84883,
+            "47": 10.16617,
+            "48": 9.87132,
+            "49": 9.56691,
+            "50": 9.92114,
+            "51": 9.86695,
+            "52": 9.76956,
+            "53": 10.07809,
+            "54": 9.97027,
+            "55": 9.89683,
+            "56": 9.64394,
+            "57": 9.49728,
+            "58": 9.84867,
+            "59": 9.59977,
+            "60": 9.50631,
+            "61": 9.71011,
+            "62": 9.99101,
+            "63": 9.38968,
+            "64": 9.78595,
+            "65": 8.95983,
+            "66": 9.70876,
+            "67": 9.37892,
+            "68": 9.79599,
+            "69": 9.80666,
+            "70": 9.74795,
+            "71": 9.61779,
+            "72": 9.59127,
+            "73": 9.50398,
+            "74": 8.94624,
+            "75": 9.42942,
+            "76": 9.08423,
+            "77": 10.06698,
+            "78": 9.73256,
+            "79": 9.38117,
+            "80": 9.41061,
+            "81": 9.48289,
+            "82": 9.70492,
+            "83": 9.30713,
+            "84": 9.42241,
+            "85": 9.61802,
+            "86": 9.07631,
+            "87": 9.59382,
+            "88": 9.75419,
+            "89": 9.60093,
+            "90": 9.82013,
+            "91": 9.3407,
+            "92": 9.35717,
+            "93": 9.07927,
+            "94": 8.83613,
+            "95": 9.5223,
+            "96": 9.53379,
+            "97": 9.31633,
+            "98": 9.68007,
+            "99": 8.89242,
+            "100": 9.39964
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1770.0,
+            "2": 1809.0,
+            "3": 1782.0,
+            "4": 1916.0,
+            "5": 1973.0,
+            "6": 1955.0,
+            "7": 2046.0,
+            "8": 1773.0,
+            "9": 1815.0,
+            "10": 1432.0,
+            "11": 1961.0,
+            "12": 1828.0,
+            "13": 1967.0,
+            "14": 1825.0,
+            "15": 1980.0,
+            "16": 1889.0,
+            "17": 1866.0,
+            "18": 1827.0,
+            "19": 1876.0,
+            "20": 1715.0,
+            "21": 2046.0,
+            "22": 1872.0,
+            "23": 2168.0,
+            "24": 1814.0,
+            "25": 1715.0,
+            "26": 1721.0,
+            "27": 1822.0,
+            "28": 2102.0,
+            "29": 2112.0,
+            "30": 2020.0,
+            "31": 1569.0,
+            "32": 2022.0,
+            "33": 2256.0,
+            "34": 1884.0,
+            "35": 2034.0,
+            "36": 2027.0,
+            "37": 2438.0,
+            "38": 2363.0,
+            "39": 2526.0,
+            "40": 2254.0,
+            "41": 2328.0,
+            "42": 2409.0,
+            "43": 2126.0,
+            "44": 2166.0,
+            "45": 2230.0,
+            "46": 2487.0,
+            "47": 2605.0,
+            "48": 2351.0,
+            "49": 2413.0,
+            "50": 2274.0,
+            "51": 2579.0,
+            "52": 2508.0,
+            "53": 2879.0,
+            "54": 2744.0,
+            "55": 2402.0,
+            "56": 2720.0,
+            "57": 2384.0,
+            "58": 3002.0,
+            "59": 2743.0,
+            "60": 2457.0,
+            "61": 2976.0,
+            "62": 2631.0,
+            "63": 2349.0,
+            "64": 3077.0,
+            "65": 2634.0,
+            "66": 3076.0,
+            "67": 2906.0,
+            "68": 2759.0,
+            "69": 2907.0,
+            "70": 3045.0,
+            "71": 3159.0,
+            "72": 2506.0,
+            "73": 2956.0,
+            "74": 1945.0,
+            "75": 2467.0,
+            "76": 2979.0,
+            "77": 3209.0,
+            "78": 3122.0,
+            "79": 3048.0,
+            "80": 3389.0,
+            "81": 3799.0,
+            "82": 3272.0,
+            "83": 2962.0,
+            "84": 3328.0,
+            "85": 3462.0,
+            "86": 3071.0,
+            "87": 3900.0,
+            "88": 3128.0,
+            "89": 3469.0,
+            "90": 3095.0,
+            "91": 2769.0,
+            "92": 3168.0,
+            "93": 2713.0,
+            "94": 3416.0,
+            "95": 3515.0,
+            "96": 3425.0,
+            "97": 3223.0,
+            "98": 3769.0,
+            "99": 3230.0,
+            "100": 3219.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 246998528.0,
+            "2": 246998528.0,
+            "3": 246998528.0,
+            "4": 246998528.0,
+            "5": 246998528.0,
+            "6": 246998528.0,
+            "7": 246998528.0,
+            "8": 246998528.0,
+            "9": 246998528.0,
+            "10": 246998528.0,
+            "11": 246998528.0,
+            "12": 246998528.0,
+            "13": 246998528.0,
+            "14": 246998528.0,
+            "15": 246998528.0,
+            "16": 246998528.0,
+            "17": 246998528.0,
+            "18": 246998528.0,
+            "19": 246998528.0,
+            "20": 246998528.0,
+            "21": 246998528.0,
+            "22": 246998528.0,
+            "23": 246998528.0,
+            "24": 246998528.0,
+            "25": 246998528.0,
+            "26": 246998528.0,
+            "27": 246998528.0,
+            "28": 246998528.0,
+            "29": 246998528.0,
+            "30": 246998528.0,
+            "31": 246998528.0,
+            "32": 246998528.0,
+            "33": 246998528.0,
+            "34": 246998528.0,
+            "35": 246998528.0,
+            "36": 246998528.0,
+            "37": 246998528.0,
+            "38": 246998528.0,
+            "39": 246998528.0,
+            "40": 246998528.0,
+            "41": 246998528.0,
+            "42": 246998528.0,
+            "43": 246998528.0,
+            "44": 246998528.0,
+            "45": 246998528.0,
+            "46": 246998528.0,
+            "47": 246998528.0,
+            "48": 246998528.0,
+            "49": 246998528.0,
+            "50": 246998528.0,
+            "51": 246998528.0,
+            "52": 246998528.0,
+            "53": 246998528.0,
+            "54": 246998528.0,
+            "55": 246998528.0,
+            "56": 246998528.0,
+            "57": 246998528.0,
+            "58": 246998528.0,
+            "59": 246998528.0,
+            "60": 246998528.0,
+            "61": 246998528.0,
+            "62": 246998528.0,
+            "63": 246998528.0,
+            "64": 246998528.0,
+            "65": 246998528.0,
+            "66": 246998528.0,
+            "67": 246998528.0,
+            "68": 246998528.0,
+            "69": 246998528.0,
+            "70": 246998528.0,
+            "71": 246998528.0,
+            "72": 246998528.0,
+            "73": 246998528.0,
+            "74": 246998528.0,
+            "75": 246998528.0,
+            "76": 246998528.0,
+            "77": 246998528.0,
+            "78": 246998528.0,
+            "79": 246998528.0,
+            "80": 246998528.0,
+            "81": 246998528.0,
+            "82": 246998528.0,
+            "83": 246998528.0,
+            "84": 246998528.0,
+            "85": 246998528.0,
+            "86": 246998528.0,
+            "87": 246998528.0,
+            "88": 246998528.0,
+            "89": 246998528.0,
+            "90": 246998528.0,
+            "91": 246998528.0,
+            "92": 246998528.0,
+            "93": 246998528.0,
+            "94": 246998528.0,
+            "95": 246998528.0,
+            "96": 246998528.0,
+            "97": 246998528.0,
+            "98": 246998528.0,
+            "99": 246998528.0,
+            "100": 246998528.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1503207936.0,
+            "2": 1503208960.0,
+            "3": 1503208960.0,
+            "4": 1503208960.0,
+            "5": 1503208960.0,
+            "6": 1503208960.0,
+            "7": 1503208960.0,
+            "8": 1503208960.0,
+            "9": 1503208960.0,
+            "10": 1503208960.0,
+            "11": 1503208960.0,
+            "12": 1503208960.0,
+            "13": 1503208960.0,
+            "14": 1503208960.0,
+            "15": 1503208960.0,
+            "16": 1503208960.0,
+            "17": 1503208960.0,
+            "18": 1503208960.0,
+            "19": 1503208960.0,
+            "20": 1503208960.0,
+            "21": 1503208960.0,
+            "22": 1503208960.0,
+            "23": 1503208960.0,
+            "24": 1503208960.0,
+            "25": 1503208960.0,
+            "26": 1503208960.0,
+            "27": 1503208960.0,
+            "28": 1503208960.0,
+            "29": 1503208960.0,
+            "30": 1503208960.0,
+            "31": 1503208960.0,
+            "32": 1503208960.0,
+            "33": 1503208960.0,
+            "34": 1503208960.0,
+            "35": 1503208960.0,
+            "36": 1503208960.0,
+            "37": 1503208960.0,
+            "38": 1503208960.0,
+            "39": 1503208960.0,
+            "40": 1503208960.0,
+            "41": 1503208960.0,
+            "42": 1503208960.0,
+            "43": 1503208960.0,
+            "44": 1503208960.0,
+            "45": 1503208960.0,
+            "46": 1503208960.0,
+            "47": 1503208960.0,
+            "48": 1503208960.0,
+            "49": 1503208960.0,
+            "50": 1503208960.0,
+            "51": 1503208960.0,
+            "52": 1503208960.0,
+            "53": 1503208960.0,
+            "54": 1503208960.0,
+            "55": 1503208960.0,
+            "56": 1503208960.0,
+            "57": 1503208960.0,
+            "58": 1503208960.0,
+            "59": 1503208960.0,
+            "60": 1503208960.0,
+            "61": 1503208960.0,
+            "62": 1503208960.0,
+            "63": 1503208960.0,
+            "64": 1503208960.0,
+            "65": 1503208960.0,
+            "66": 1503208960.0,
+            "67": 1503208960.0,
+            "68": 1503208960.0,
+            "69": 1503208960.0,
+            "70": 1503208960.0,
+            "71": 1503208960.0,
+            "72": 1503208960.0,
+            "73": 1503208960.0,
+            "74": 1503208960.0,
+            "75": 1503208960.0,
+            "76": 1503208960.0,
+            "77": 1503208960.0,
+            "78": 1503208960.0,
+            "79": 1503208960.0,
+            "80": 1503208960.0,
+            "81": 1503208960.0,
+            "82": 1503208960.0,
+            "83": 1503208960.0,
+            "84": 1503208960.0,
+            "85": 1503208960.0,
+            "86": 1503208960.0,
+            "87": 1503208960.0,
+            "88": 1503208960.0,
+            "89": 1503208960.0,
+            "90": 1503208960.0,
+            "91": 1503208960.0,
+            "92": 1503208960.0,
+            "93": 1503208960.0,
+            "94": 1503208960.0,
+            "95": 1503208960.0,
+            "96": 1503208960.0,
+            "97": 1503208960.0,
+            "98": 1503208960.0,
+            "99": 1503208960.0,
+            "100": 1503208960.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 6.90789,
+            "2": 0.23993,
+            "3": 0.20829,
+            "4": 0.18489,
+            "5": 0.18237,
+            "6": 0.17507,
+            "7": 0.17401,
+            "8": 0.17758,
+            "9": 0.17734,
+            "10": 0.17577,
+            "11": 0.17329,
+            "12": 0.17635,
+            "13": 0.17559,
+            "14": 0.17588,
+            "15": 0.17556,
+            "16": 0.17798,
+            "17": 0.17347,
+            "18": 0.17346,
+            "19": 0.17675,
+            "20": 0.17518,
+            "21": 0.17864,
+            "22": 0.17833,
+            "23": 0.1827,
+            "24": 0.1775,
+            "25": 0.17745,
+            "26": 0.1755,
+            "27": 0.17594,
+            "28": 0.18475,
+            "29": 0.17599,
+            "30": 0.17452,
+            "31": 0.17601,
+            "32": 0.17743,
+            "33": 0.17355,
+            "34": 0.18205,
+            "35": 0.17672,
+            "36": 0.17728,
+            "37": 0.17438,
+            "38": 0.17752,
+            "39": 0.18463,
+            "40": 0.17673,
+            "41": 0.17505,
+            "42": 0.17657,
+            "43": 0.1769,
+            "44": 0.19406,
+            "45": 0.20743,
+            "46": 0.18263,
+            "47": 0.16986,
+            "48": 0.17268,
+            "49": 0.17404,
+            "50": 0.17381,
+            "51": 0.1735,
+            "52": 0.1693,
+            "53": 0.17058,
+            "54": 0.17247,
+            "55": 0.1773,
+            "56": 0.17259,
+            "57": 0.17109,
+            "58": 0.17178,
+            "59": 0.17167,
+            "60": 0.17568,
+            "61": 0.17729,
+            "62": 0.16999,
+            "63": 0.17091,
+            "64": 0.17034,
+            "65": 0.17236,
+            "66": 0.17625,
+            "67": 0.17591,
+            "68": 0.17126,
+            "69": 0.17159,
+            "70": 0.17123,
+            "71": 0.17221,
+            "72": 0.17877,
+            "73": 0.17426,
+            "74": 0.17035,
+            "75": 0.1721,
+            "76": 0.17327,
+            "77": 0.17396,
+            "78": 0.17631,
+            "79": 0.17485,
+            "80": 0.17347,
+            "81": 0.17358,
+            "82": 0.17087,
+            "83": 0.17164,
+            "84": 0.17784,
+            "85": 0.17401,
+            "86": 0.18008,
+            "87": 0.17399,
+            "88": 0.17322,
+            "89": 0.17239,
+            "90": 0.17856,
+            "91": 0.17078,
+            "92": 0.18016,
+            "93": 0.18343,
+            "94": 0.18085,
+            "95": 0.175,
+            "96": 0.17786,
+            "97": 0.17064,
+            "98": 0.17229,
+            "99": 0.17164,
+            "100": 0.20496
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
index 7190006ec1c..e813675fa98 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
@@ -1 +1,287 @@
-{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.84269, "5": 10.85859, "10": 10.8187, "15": 10.80947, "20": 10.70829, "25": 10.57071, "30": 10.39721, "35": 10.28311, "40": 10.09728, "45": 9.86184, "50": 9.91021}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1669.0, "5": 1956.0, "10": 1416.0, "15": 1958.0, "20": 1802.0, "25": 1767.0, "30": 1901.0, "35": 1938.0, "40": 2126.0, "45": 1927.0, "50": 2307.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 299203072.0, "5": 299203072.0, "10": 299203072.0, "15": 299203072.0, "20": 299203072.0, "25": 299203072.0, "30": 299203072.0, "35": 299203072.0, "40": 299203072.0, "45": 299203072.0, "50": 299203072.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1477945856.0, "5": 1542891008.0, "10": 1542891008.0, "15": 1542891008.0, "20": 1542891008.0, "25": 1542891008.0, "30": 1542891008.0, "35": 1542891008.0, "40": 1542891008.0, "45": 1542891008.0, "50": 1542891008.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 7.64845, "5": 0.20884, "10": 0.20343, "15": 0.20612, "20": 0.22655, "25": 0.19884, "30": 0.20035, "35": 0.20606, "40": 0.19923, "45": 0.20257, "50": 0.20076}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.84269,
+            "2": 10.85556,
+            "3": 10.84446,
+            "4": 10.84222,
+            "5": 10.85859,
+            "6": 10.86289,
+            "7": 10.85166,
+            "8": 10.84694,
+            "9": 10.85648,
+            "10": 10.8187,
+            "11": 10.85952,
+            "12": 10.8434,
+            "13": 10.86329,
+            "14": 10.85467,
+            "15": 10.80947,
+            "16": 10.81639,
+            "17": 10.7887,
+            "18": 10.79677,
+            "19": 10.79127,
+            "20": 10.70829,
+            "21": 10.69425,
+            "22": 10.58587,
+            "23": 10.70272,
+            "24": 10.60461,
+            "25": 10.57071,
+            "26": 10.62002,
+            "27": 10.61414,
+            "28": 10.56371,
+            "29": 10.56749,
+            "30": 10.39721,
+            "31": 10.16567,
+            "32": 10.45764,
+            "33": 10.45152,
+            "34": 10.23938,
+            "35": 10.28311,
+            "36": 10.24692,
+            "37": 10.34247,
+            "38": 10.2052,
+            "39": 10.39167,
+            "40": 10.09728,
+            "41": 10.15266,
+            "42": 10.21035,
+            "43": 9.87733,
+            "44": 9.98208,
+            "45": 9.86184,
+            "46": 9.83605,
+            "47": 10.13379,
+            "48": 9.87207,
+            "49": 9.56144,
+            "50": 9.91021
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1669.0,
+            "2": 1797.0,
+            "3": 1769.0,
+            "4": 1812.0,
+            "5": 1956.0,
+            "6": 1892.0,
+            "7": 1848.0,
+            "8": 1619.0,
+            "9": 1899.0,
+            "10": 1416.0,
+            "11": 1910.0,
+            "12": 1734.0,
+            "13": 1952.0,
+            "14": 1901.0,
+            "15": 1958.0,
+            "16": 1961.0,
+            "17": 1919.0,
+            "18": 1881.0,
+            "19": 1883.0,
+            "20": 1802.0,
+            "21": 1931.0,
+            "22": 1655.0,
+            "23": 1993.0,
+            "24": 1633.0,
+            "25": 1767.0,
+            "26": 1727.0,
+            "27": 1709.0,
+            "28": 1909.0,
+            "29": 2062.0,
+            "30": 1901.0,
+            "31": 1678.0,
+            "32": 1944.0,
+            "33": 2164.0,
+            "34": 1777.0,
+            "35": 1938.0,
+            "36": 1876.0,
+            "37": 2428.0,
+            "38": 2216.0,
+            "39": 2329.0,
+            "40": 2126.0,
+            "41": 2312.0,
+            "42": 2207.0,
+            "43": 1975.0,
+            "44": 2062.0,
+            "45": 1927.0,
+            "46": 2258.0,
+            "47": 2545.0,
+            "48": 2291.0,
+            "49": 2254.0,
+            "50": 2307.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 299203072.0,
+            "2": 299203072.0,
+            "3": 299203072.0,
+            "4": 299203072.0,
+            "5": 299203072.0,
+            "6": 299203072.0,
+            "7": 299203072.0,
+            "8": 299203072.0,
+            "9": 299203072.0,
+            "10": 299203072.0,
+            "11": 299203072.0,
+            "12": 299203072.0,
+            "13": 299203072.0,
+            "14": 299203072.0,
+            "15": 299203072.0,
+            "16": 299203072.0,
+            "17": 299203072.0,
+            "18": 299203072.0,
+            "19": 299203072.0,
+            "20": 299203072.0,
+            "21": 299203072.0,
+            "22": 299203072.0,
+            "23": 299203072.0,
+            "24": 299203072.0,
+            "25": 299203072.0,
+            "26": 299203072.0,
+            "27": 299203072.0,
+            "28": 299203072.0,
+            "29": 299203072.0,
+            "30": 299203072.0,
+            "31": 299203072.0,
+            "32": 299203072.0,
+            "33": 299203072.0,
+            "34": 299203072.0,
+            "35": 299203072.0,
+            "36": 299203072.0,
+            "37": 299203072.0,
+            "38": 299203072.0,
+            "39": 299203072.0,
+            "40": 299203072.0,
+            "41": 299203072.0,
+            "42": 299203072.0,
+            "43": 299203072.0,
+            "44": 299203072.0,
+            "45": 299203072.0,
+            "46": 299203072.0,
+            "47": 299203072.0,
+            "48": 299203072.0,
+            "49": 299203072.0,
+            "50": 299203072.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1477945856.0,
+            "2": 1542891008.0,
+            "3": 1542891008.0,
+            "4": 1542891008.0,
+            "5": 1542891008.0,
+            "6": 1542891008.0,
+            "7": 1542891008.0,
+            "8": 1542891008.0,
+            "9": 1542891008.0,
+            "10": 1542891008.0,
+            "11": 1542891008.0,
+            "12": 1542891008.0,
+            "13": 1542891008.0,
+            "14": 1542891008.0,
+            "15": 1542891008.0,
+            "16": 1542891008.0,
+            "17": 1542891008.0,
+            "18": 1542891008.0,
+            "19": 1542891008.0,
+            "20": 1542891008.0,
+            "21": 1542891008.0,
+            "22": 1542891008.0,
+            "23": 1542891008.0,
+            "24": 1542891008.0,
+            "25": 1542891008.0,
+            "26": 1542891008.0,
+            "27": 1542891008.0,
+            "28": 1542891008.0,
+            "29": 1542891008.0,
+            "30": 1542891008.0,
+            "31": 1542891008.0,
+            "32": 1542891008.0,
+            "33": 1542891008.0,
+            "34": 1542891008.0,
+            "35": 1542891008.0,
+            "36": 1542891008.0,
+            "37": 1542891008.0,
+            "38": 1542891008.0,
+            "39": 1542891008.0,
+            "40": 1542891008.0,
+            "41": 1542891008.0,
+            "42": 1542891008.0,
+            "43": 1542891008.0,
+            "44": 1542891008.0,
+            "45": 1542891008.0,
+            "46": 1542891008.0,
+            "47": 1542891008.0,
+            "48": 1542891008.0,
+            "49": 1542891008.0,
+            "50": 1542891008.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 8.77968,
+            "2": 0.26175,
+            "3": 0.24794,
+            "4": 0.24501,
+            "5": 0.24845,
+            "6": 0.2486,
+            "7": 0.24727,
+            "8": 0.24913,
+            "9": 0.25845,
+            "10": 0.25285,
+            "11": 0.24913,
+            "12": 0.24699,
+            "13": 0.2473,
+            "14": 0.25154,
+            "15": 0.24973,
+            "16": 0.24744,
+            "17": 0.24812,
+            "18": 0.25005,
+            "19": 0.24688,
+            "20": 0.2449,
+            "21": 0.24547,
+            "22": 0.24699,
+            "23": 0.24408,
+            "24": 0.24933,
+            "25": 0.24233,
+            "26": 0.2452,
+            "27": 0.24682,
+            "28": 0.24269,
+            "29": 0.24203,
+            "30": 0.2418,
+            "31": 0.25702,
+            "32": 0.24123,
+            "33": 0.24439,
+            "34": 0.24088,
+            "35": 0.24457,
+            "36": 0.24197,
+            "37": 0.24309,
+            "38": 0.24278,
+            "39": 0.24374,
+            "40": 0.2478,
+            "41": 0.2422,
+            "42": 0.24357,
+            "43": 0.24957,
+            "44": 0.24752,
+            "45": 0.24273,
+            "46": 0.24413,
+            "47": 0.24327,
+            "48": 0.24256,
+            "49": 0.24524,
+            "50": 0.24667
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..0e9e1ac956f
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.84269,
+            "2": 10.85556,
+            "3": 10.84446,
+            "4": 10.84222,
+            "5": 10.85859,
+            "6": 10.86289,
+            "7": 10.85166,
+            "8": 10.84694,
+            "9": 10.85648,
+            "10": 10.8187,
+            "11": 10.85952,
+            "12": 10.8434,
+            "13": 10.86329,
+            "14": 10.85467,
+            "15": 10.80947,
+            "16": 10.81639,
+            "17": 10.7887,
+            "18": 10.79677,
+            "19": 10.79127,
+            "20": 10.70829,
+            "21": 10.69425,
+            "22": 10.58587,
+            "23": 10.70272,
+            "24": 10.60461,
+            "25": 10.57071,
+            "26": 10.62002,
+            "27": 10.61414,
+            "28": 10.56371,
+            "29": 10.56749,
+            "30": 10.39721,
+            "31": 10.16567,
+            "32": 10.45764,
+            "33": 10.45152,
+            "34": 10.23938,
+            "35": 10.28311,
+            "36": 10.24692,
+            "37": 10.34247,
+            "38": 10.2052,
+            "39": 10.39167,
+            "40": 10.09728,
+            "41": 10.15266,
+            "42": 10.21035,
+            "43": 9.87733,
+            "44": 9.98208,
+            "45": 9.86184,
+            "46": 9.83605,
+            "47": 10.13379,
+            "48": 9.87207,
+            "49": 9.56144,
+            "50": 9.91021
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1669.0,
+            "2": 1797.0,
+            "3": 1769.0,
+            "4": 1812.0,
+            "5": 1956.0,
+            "6": 1892.0,
+            "7": 1848.0,
+            "8": 1619.0,
+            "9": 1899.0,
+            "10": 1416.0,
+            "11": 1910.0,
+            "12": 1734.0,
+            "13": 1952.0,
+            "14": 1901.0,
+            "15": 1958.0,
+            "16": 1961.0,
+            "17": 1919.0,
+            "18": 1881.0,
+            "19": 1883.0,
+            "20": 1802.0,
+            "21": 1931.0,
+            "22": 1655.0,
+            "23": 1993.0,
+            "24": 1633.0,
+            "25": 1767.0,
+            "26": 1727.0,
+            "27": 1709.0,
+            "28": 1909.0,
+            "29": 2062.0,
+            "30": 1901.0,
+            "31": 1678.0,
+            "32": 1944.0,
+            "33": 2164.0,
+            "34": 1777.0,
+            "35": 1938.0,
+            "36": 1876.0,
+            "37": 2428.0,
+            "38": 2216.0,
+            "39": 2329.0,
+            "40": 2126.0,
+            "41": 2312.0,
+            "42": 2207.0,
+            "43": 1975.0,
+            "44": 2062.0,
+            "45": 1927.0,
+            "46": 2258.0,
+            "47": 2545.0,
+            "48": 2291.0,
+            "49": 2254.0,
+            "50": 2307.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 299203072.0,
+            "2": 299203072.0,
+            "3": 299203072.0,
+            "4": 299203072.0,
+            "5": 299203072.0,
+            "6": 299203072.0,
+            "7": 299203072.0,
+            "8": 299203072.0,
+            "9": 299203072.0,
+            "10": 299203072.0,
+            "11": 299203072.0,
+            "12": 299203072.0,
+            "13": 299203072.0,
+            "14": 299203072.0,
+            "15": 299203072.0,
+            "16": 299203072.0,
+            "17": 299203072.0,
+            "18": 299203072.0,
+            "19": 299203072.0,
+            "20": 299203072.0,
+            "21": 299203072.0,
+            "22": 299203072.0,
+            "23": 299203072.0,
+            "24": 299203072.0,
+            "25": 299203072.0,
+            "26": 299203072.0,
+            "27": 299203072.0,
+            "28": 299203072.0,
+            "29": 299203072.0,
+            "30": 299203072.0,
+            "31": 299203072.0,
+            "32": 299203072.0,
+            "33": 299203072.0,
+            "34": 299203072.0,
+            "35": 299203072.0,
+            "36": 299203072.0,
+            "37": 299203072.0,
+            "38": 299203072.0,
+            "39": 299203072.0,
+            "40": 299203072.0,
+            "41": 299203072.0,
+            "42": 299203072.0,
+            "43": 299203072.0,
+            "44": 299203072.0,
+            "45": 299203072.0,
+            "46": 299203072.0,
+            "47": 299203072.0,
+            "48": 299203072.0,
+            "49": 299203072.0,
+            "50": 299203072.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1477945856.0,
+            "2": 1542891008.0,
+            "3": 1542891008.0,
+            "4": 1542891008.0,
+            "5": 1542891008.0,
+            "6": 1542891008.0,
+            "7": 1542891008.0,
+            "8": 1542891008.0,
+            "9": 1542891008.0,
+            "10": 1542891008.0,
+            "11": 1542891008.0,
+            "12": 1542891008.0,
+            "13": 1542891008.0,
+            "14": 1542891008.0,
+            "15": 1542891008.0,
+            "16": 1542891008.0,
+            "17": 1542891008.0,
+            "18": 1542891008.0,
+            "19": 1542891008.0,
+            "20": 1542891008.0,
+            "21": 1542891008.0,
+            "22": 1542891008.0,
+            "23": 1542891008.0,
+            "24": 1542891008.0,
+            "25": 1542891008.0,
+            "26": 1542891008.0,
+            "27": 1542891008.0,
+            "28": 1542891008.0,
+            "29": 1542891008.0,
+            "30": 1542891008.0,
+            "31": 1542891008.0,
+            "32": 1542891008.0,
+            "33": 1542891008.0,
+            "34": 1542891008.0,
+            "35": 1542891008.0,
+            "36": 1542891008.0,
+            "37": 1542891008.0,
+            "38": 1542891008.0,
+            "39": 1542891008.0,
+            "40": 1542891008.0,
+            "41": 1542891008.0,
+            "42": 1542891008.0,
+            "43": 1542891008.0,
+            "44": 1542891008.0,
+            "45": 1542891008.0,
+            "46": 1542891008.0,
+            "47": 1542891008.0,
+            "48": 1542891008.0,
+            "49": 1542891008.0,
+            "50": 1542891008.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 8.85835,
+            "2": 0.24835,
+            "3": 0.21606,
+            "4": 0.2165,
+            "5": 0.2184,
+            "6": 0.21562,
+            "7": 0.21636,
+            "8": 0.21549,
+            "9": 0.21564,
+            "10": 0.21602,
+            "11": 0.21604,
+            "12": 0.21848,
+            "13": 0.22011,
+            "14": 0.21851,
+            "15": 0.21382,
+            "16": 0.21395,
+            "17": 0.21404,
+            "18": 0.21912,
+            "19": 0.21472,
+            "20": 0.21137,
+            "21": 0.2132,
+            "22": 0.21258,
+            "23": 0.21793,
+            "24": 0.22285,
+            "25": 0.21743,
+            "26": 0.21892,
+            "27": 0.21849,
+            "28": 0.2197,
+            "29": 0.21953,
+            "30": 0.21687,
+            "31": 0.21658,
+            "32": 0.2223,
+            "33": 0.22171,
+            "34": 0.21429,
+            "35": 0.21354,
+            "36": 0.21407,
+            "37": 0.21643,
+            "38": 0.21392,
+            "39": 0.21524,
+            "40": 0.21475,
+            "41": 0.2181,
+            "42": 0.21582,
+            "43": 0.21601,
+            "44": 0.21724,
+            "45": 0.21547,
+            "46": 0.21832,
+            "47": 0.21586,
+            "48": 0.21703,
+            "49": 0.21487,
+            "50": 0.21525
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..2bfd32d0721
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.84269,
+            "2": 10.85556,
+            "3": 10.84446,
+            "4": 10.84222,
+            "5": 10.85859,
+            "6": 10.86289,
+            "7": 10.85166,
+            "8": 10.84694,
+            "9": 10.85648,
+            "10": 10.8187,
+            "11": 10.85952,
+            "12": 10.8434,
+            "13": 10.86329,
+            "14": 10.85467,
+            "15": 10.80947,
+            "16": 10.81639,
+            "17": 10.7887,
+            "18": 10.79677,
+            "19": 10.79127,
+            "20": 10.70829,
+            "21": 10.69425,
+            "22": 10.58587,
+            "23": 10.70272,
+            "24": 10.60461,
+            "25": 10.57071,
+            "26": 10.62002,
+            "27": 10.61414,
+            "28": 10.56371,
+            "29": 10.56749,
+            "30": 10.39721,
+            "31": 10.16567,
+            "32": 10.45764,
+            "33": 10.45152,
+            "34": 10.23938,
+            "35": 10.28311,
+            "36": 10.24692,
+            "37": 10.34247,
+            "38": 10.2052,
+            "39": 10.39167,
+            "40": 10.09728,
+            "41": 10.15266,
+            "42": 10.21035,
+            "43": 9.87733,
+            "44": 9.98208,
+            "45": 9.86184,
+            "46": 9.83605,
+            "47": 10.13379,
+            "48": 9.87207,
+            "49": 9.56144,
+            "50": 9.91021
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1669.0,
+            "2": 1797.0,
+            "3": 1769.0,
+            "4": 1812.0,
+            "5": 1956.0,
+            "6": 1892.0,
+            "7": 1848.0,
+            "8": 1619.0,
+            "9": 1899.0,
+            "10": 1416.0,
+            "11": 1910.0,
+            "12": 1734.0,
+            "13": 1952.0,
+            "14": 1901.0,
+            "15": 1958.0,
+            "16": 1961.0,
+            "17": 1919.0,
+            "18": 1881.0,
+            "19": 1883.0,
+            "20": 1802.0,
+            "21": 1931.0,
+            "22": 1655.0,
+            "23": 1993.0,
+            "24": 1633.0,
+            "25": 1767.0,
+            "26": 1727.0,
+            "27": 1709.0,
+            "28": 1909.0,
+            "29": 2062.0,
+            "30": 1901.0,
+            "31": 1678.0,
+            "32": 1944.0,
+            "33": 2164.0,
+            "34": 1777.0,
+            "35": 1938.0,
+            "36": 1876.0,
+            "37": 2428.0,
+            "38": 2216.0,
+            "39": 2329.0,
+            "40": 2126.0,
+            "41": 2312.0,
+            "42": 2207.0,
+            "43": 1975.0,
+            "44": 2062.0,
+            "45": 1927.0,
+            "46": 2258.0,
+            "47": 2545.0,
+            "48": 2291.0,
+            "49": 2254.0,
+            "50": 2307.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 299203072.0,
+            "2": 299203072.0,
+            "3": 299203072.0,
+            "4": 299203072.0,
+            "5": 299203072.0,
+            "6": 299203072.0,
+            "7": 299203072.0,
+            "8": 299203072.0,
+            "9": 299203072.0,
+            "10": 299203072.0,
+            "11": 299203072.0,
+            "12": 299203072.0,
+            "13": 299203072.0,
+            "14": 299203072.0,
+            "15": 299203072.0,
+            "16": 299203072.0,
+            "17": 299203072.0,
+            "18": 299203072.0,
+            "19": 299203072.0,
+            "20": 299203072.0,
+            "21": 299203072.0,
+            "22": 299203072.0,
+            "23": 299203072.0,
+            "24": 299203072.0,
+            "25": 299203072.0,
+            "26": 299203072.0,
+            "27": 299203072.0,
+            "28": 299203072.0,
+            "29": 299203072.0,
+            "30": 299203072.0,
+            "31": 299203072.0,
+            "32": 299203072.0,
+            "33": 299203072.0,
+            "34": 299203072.0,
+            "35": 299203072.0,
+            "36": 299203072.0,
+            "37": 299203072.0,
+            "38": 299203072.0,
+            "39": 299203072.0,
+            "40": 299203072.0,
+            "41": 299203072.0,
+            "42": 299203072.0,
+            "43": 299203072.0,
+            "44": 299203072.0,
+            "45": 299203072.0,
+            "46": 299203072.0,
+            "47": 299203072.0,
+            "48": 299203072.0,
+            "49": 299203072.0,
+            "50": 299203072.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1477945856.0,
+            "2": 1542891008.0,
+            "3": 1542891008.0,
+            "4": 1542891008.0,
+            "5": 1542891008.0,
+            "6": 1542891008.0,
+            "7": 1542891008.0,
+            "8": 1542891008.0,
+            "9": 1542891008.0,
+            "10": 1542891008.0,
+            "11": 1542891008.0,
+            "12": 1542891008.0,
+            "13": 1542891008.0,
+            "14": 1542891008.0,
+            "15": 1542891008.0,
+            "16": 1542891008.0,
+            "17": 1542891008.0,
+            "18": 1542891008.0,
+            "19": 1542891008.0,
+            "20": 1542891008.0,
+            "21": 1542891008.0,
+            "22": 1542891008.0,
+            "23": 1542891008.0,
+            "24": 1542891008.0,
+            "25": 1542891008.0,
+            "26": 1542891008.0,
+            "27": 1542891008.0,
+            "28": 1542891008.0,
+            "29": 1542891008.0,
+            "30": 1542891008.0,
+            "31": 1542891008.0,
+            "32": 1542891008.0,
+            "33": 1542891008.0,
+            "34": 1542891008.0,
+            "35": 1542891008.0,
+            "36": 1542891008.0,
+            "37": 1542891008.0,
+            "38": 1542891008.0,
+            "39": 1542891008.0,
+            "40": 1542891008.0,
+            "41": 1542891008.0,
+            "42": 1542891008.0,
+            "43": 1542891008.0,
+            "44": 1542891008.0,
+            "45": 1542891008.0,
+            "46": 1542891008.0,
+            "47": 1542891008.0,
+            "48": 1542891008.0,
+            "49": 1542891008.0,
+            "50": 1542891008.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 8.86827,
+            "2": 0.25581,
+            "3": 0.24685,
+            "4": 0.24528,
+            "5": 0.24786,
+            "6": 0.25055,
+            "7": 0.2473,
+            "8": 0.24843,
+            "9": 0.24646,
+            "10": 0.24448,
+            "11": 0.24595,
+            "12": 0.24375,
+            "13": 0.24607,
+            "14": 0.2438,
+            "15": 0.24496,
+            "16": 0.24469,
+            "17": 0.24672,
+            "18": 0.2472,
+            "19": 0.24412,
+            "20": 0.24734,
+            "21": 0.24525,
+            "22": 0.24726,
+            "23": 0.24425,
+            "24": 0.2467,
+            "25": 0.24589,
+            "26": 0.24521,
+            "27": 0.24972,
+            "28": 0.24969,
+            "29": 0.24951,
+            "30": 0.24819,
+            "31": 0.25039,
+            "32": 0.24983,
+            "33": 0.25363,
+            "34": 0.25237,
+            "35": 0.24992,
+            "36": 0.24811,
+            "37": 0.25001,
+            "38": 0.24929,
+            "39": 0.24928,
+            "40": 0.24894,
+            "41": 0.24934,
+            "42": 0.24889,
+            "43": 0.24734,
+            "44": 0.24821,
+            "45": 0.2492,
+            "46": 0.24867,
+            "47": 0.25083,
+            "48": 0.24933,
+            "49": 0.24988,
+            "50": 0.25012
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
new file mode 100644
index 00000000000..7b27bf78e61
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.86122,
+            "2": 10.85774,
+            "3": 10.86039,
+            "4": 10.84813,
+            "5": 10.88242,
+            "6": 10.88645,
+            "7": 10.86227,
+            "8": 10.86932,
+            "9": 10.86444,
+            "10": 10.83506,
+            "11": 10.87765,
+            "12": 10.87384,
+            "13": 10.87945,
+            "14": 10.88919,
+            "15": 10.82738,
+            "16": 10.83105,
+            "17": 10.79888,
+            "18": 10.82441,
+            "19": 10.81363,
+            "20": 10.72743,
+            "21": 10.71638,
+            "22": 10.57153,
+            "23": 10.7269,
+            "24": 10.61223,
+            "25": 10.55753,
+            "26": 10.60603,
+            "27": 10.61792,
+            "28": 10.57695,
+            "29": 10.59633,
+            "30": 10.37895,
+            "31": 10.13125,
+            "32": 10.47822,
+            "33": 10.46894,
+            "34": 10.22715,
+            "35": 10.28321,
+            "36": 10.22751,
+            "37": 10.35397,
+            "38": 10.20483,
+            "39": 10.40755,
+            "40": 10.08785,
+            "41": 10.1591,
+            "42": 10.21601,
+            "43": 9.84821,
+            "44": 9.9651,
+            "45": 9.82625,
+            "46": 9.83468,
+            "47": 10.15337,
+            "48": 9.84529,
+            "49": 9.52926,
+            "50": 9.91327
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1778.0,
+            "2": 1875.0,
+            "3": 1879.0,
+            "4": 1912.0,
+            "5": 2219.0,
+            "6": 2163.0,
+            "7": 2113.0,
+            "8": 1747.0,
+            "9": 2049.0,
+            "10": 1530.0,
+            "11": 2113.0,
+            "12": 1959.0,
+            "13": 2134.0,
+            "14": 2055.0,
+            "15": 2125.0,
+            "16": 2139.0,
+            "17": 1988.0,
+            "18": 1892.0,
+            "19": 1991.0,
+            "20": 1867.0,
+            "21": 2023.0,
+            "22": 1865.0,
+            "23": 2185.0,
+            "24": 1774.0,
+            "25": 1773.0,
+            "26": 1990.0,
+            "27": 2061.0,
+            "28": 2215.0,
+            "29": 2186.0,
+            "30": 2129.0,
+            "31": 1794.0,
+            "32": 2109.0,
+            "33": 2422.0,
+            "34": 2135.0,
+            "35": 2169.0,
+            "36": 2127.0,
+            "37": 2432.0,
+            "38": 2490.0,
+            "39": 2495.0,
+            "40": 2486.0,
+            "41": 2465.0,
+            "42": 2535.0,
+            "43": 2216.0,
+            "44": 2407.0,
+            "45": 2335.0,
+            "46": 2617.0,
+            "47": 2830.0,
+            "48": 2480.0,
+            "49": 2492.0,
+            "50": 2687.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 269891584.0,
+            "2": 269891584.0,
+            "3": 269891584.0,
+            "4": 269891584.0,
+            "5": 269891584.0,
+            "6": 269891584.0,
+            "7": 269891584.0,
+            "8": 269891584.0,
+            "9": 269891584.0,
+            "10": 269891584.0,
+            "11": 269891584.0,
+            "12": 269891584.0,
+            "13": 269891584.0,
+            "14": 269891584.0,
+            "15": 269891584.0,
+            "16": 269891584.0,
+            "17": 269891584.0,
+            "18": 269891584.0,
+            "19": 269891584.0,
+            "20": 269891584.0,
+            "21": 269891584.0,
+            "22": 269891584.0,
+            "23": 269891584.0,
+            "24": 269891584.0,
+            "25": 269891584.0,
+            "26": 269891584.0,
+            "27": 269891584.0,
+            "28": 269891584.0,
+            "29": 269891584.0,
+            "30": 269891584.0,
+            "31": 269891584.0,
+            "32": 269891584.0,
+            "33": 269891584.0,
+            "34": 269891584.0,
+            "35": 269891584.0,
+            "36": 269891584.0,
+            "37": 269891584.0,
+            "38": 269891584.0,
+            "39": 269891584.0,
+            "40": 269891584.0,
+            "41": 269891584.0,
+            "42": 269891584.0,
+            "43": 269891584.0,
+            "44": 269891584.0,
+            "45": 269891584.0,
+            "46": 269891584.0,
+            "47": 269891584.0,
+            "48": 269891584.0,
+            "49": 269891584.0,
+            "50": 269891584.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1448634368.0,
+            "2": 1515674112.0,
+            "3": 1515674112.0,
+            "4": 1515674112.0,
+            "5": 1515674112.0,
+            "6": 1515674112.0,
+            "7": 1515674112.0,
+            "8": 1515674112.0,
+            "9": 1515674112.0,
+            "10": 1515674112.0,
+            "11": 1515674112.0,
+            "12": 1515674112.0,
+            "13": 1515674112.0,
+            "14": 1515674112.0,
+            "15": 1515674112.0,
+            "16": 1515674112.0,
+            "17": 1515674112.0,
+            "18": 1515674112.0,
+            "19": 1515674112.0,
+            "20": 1515674112.0,
+            "21": 1515674112.0,
+            "22": 1515674112.0,
+            "23": 1515674112.0,
+            "24": 1515674112.0,
+            "25": 1515674112.0,
+            "26": 1515674112.0,
+            "27": 1515674112.0,
+            "28": 1515674112.0,
+            "29": 1515674112.0,
+            "30": 1515674112.0,
+            "31": 1515674112.0,
+            "32": 1515674112.0,
+            "33": 1515674112.0,
+            "34": 1515674112.0,
+            "35": 1515674112.0,
+            "36": 1515676160.0,
+            "37": 1515676160.0,
+            "38": 1515676160.0,
+            "39": 1515676160.0,
+            "40": 1515676160.0,
+            "41": 1515676160.0,
+            "42": 1515676160.0,
+            "43": 1515676160.0,
+            "44": 1515676160.0,
+            "45": 1515676160.0,
+            "46": 1515676160.0,
+            "47": 1515676160.0,
+            "48": 1515676160.0,
+            "49": 1515676160.0,
+            "50": 1515676160.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 11.32442,
+            "2": 0.36793,
+            "3": 0.33232,
+            "4": 0.32917,
+            "5": 0.33097,
+            "6": 0.32866,
+            "7": 0.32256,
+            "8": 0.32486,
+            "9": 0.37982,
+            "10": 0.41476,
+            "11": 0.44694,
+            "12": 0.53248,
+            "13": 0.57146,
+            "14": 0.57246,
+            "15": 0.36094,
+            "16": 0.34892,
+            "17": 0.38022,
+            "18": 0.35319,
+            "19": 0.36887,
+            "20": 0.36416,
+            "21": 0.34563,
+            "22": 0.31882,
+            "23": 0.32147,
+            "24": 0.31667,
+            "25": 0.31696,
+            "26": 0.31902,
+            "27": 0.32164,
+            "28": 0.31663,
+            "29": 0.3158,
+            "30": 0.32265,
+            "31": 0.31608,
+            "32": 0.31574,
+            "33": 0.32267,
+            "34": 0.31719,
+            "35": 0.31721,
+            "36": 0.32191,
+            "37": 0.31699,
+            "38": 0.31788,
+            "39": 0.32413,
+            "40": 0.31691,
+            "41": 0.31767,
+            "42": 0.32282,
+            "43": 0.31846,
+            "44": 0.31976,
+            "45": 0.32052,
+            "46": 0.3223,
+            "47": 0.32037,
+            "48": 0.33259,
+            "49": 0.32455,
+            "50": 0.32849
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
new file mode 100644
index 00000000000..2dea447618c
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.86122,
+            "2": 10.85774,
+            "3": 10.86039,
+            "4": 10.84813,
+            "5": 10.88242,
+            "6": 10.88645,
+            "7": 10.86227,
+            "8": 10.86932,
+            "9": 10.86444,
+            "10": 10.83506,
+            "11": 10.87765,
+            "12": 10.87384,
+            "13": 10.87945,
+            "14": 10.88919,
+            "15": 10.82738,
+            "16": 10.83105,
+            "17": 10.79888,
+            "18": 10.82441,
+            "19": 10.81363,
+            "20": 10.72743,
+            "21": 10.71638,
+            "22": 10.57153,
+            "23": 10.7269,
+            "24": 10.61223,
+            "25": 10.55753,
+            "26": 10.60603,
+            "27": 10.61792,
+            "28": 10.57695,
+            "29": 10.59633,
+            "30": 10.37895,
+            "31": 10.13125,
+            "32": 10.47822,
+            "33": 10.46894,
+            "34": 10.22715,
+            "35": 10.28321,
+            "36": 10.22751,
+            "37": 10.35397,
+            "38": 10.20483,
+            "39": 10.40755,
+            "40": 10.08785,
+            "41": 10.1591,
+            "42": 10.21601,
+            "43": 9.84821,
+            "44": 9.9651,
+            "45": 9.82625,
+            "46": 9.83468,
+            "47": 10.15337,
+            "48": 9.84529,
+            "49": 9.52926,
+            "50": 9.91327
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1778.0,
+            "2": 1875.0,
+            "3": 1879.0,
+            "4": 1912.0,
+            "5": 2219.0,
+            "6": 2163.0,
+            "7": 2113.0,
+            "8": 1747.0,
+            "9": 2049.0,
+            "10": 1530.0,
+            "11": 2113.0,
+            "12": 1959.0,
+            "13": 2134.0,
+            "14": 2055.0,
+            "15": 2125.0,
+            "16": 2139.0,
+            "17": 1988.0,
+            "18": 1892.0,
+            "19": 1991.0,
+            "20": 1867.0,
+            "21": 2023.0,
+            "22": 1865.0,
+            "23": 2185.0,
+            "24": 1774.0,
+            "25": 1773.0,
+            "26": 1990.0,
+            "27": 2061.0,
+            "28": 2215.0,
+            "29": 2186.0,
+            "30": 2129.0,
+            "31": 1794.0,
+            "32": 2109.0,
+            "33": 2422.0,
+            "34": 2135.0,
+            "35": 2169.0,
+            "36": 2127.0,
+            "37": 2432.0,
+            "38": 2490.0,
+            "39": 2495.0,
+            "40": 2486.0,
+            "41": 2465.0,
+            "42": 2535.0,
+            "43": 2216.0,
+            "44": 2407.0,
+            "45": 2335.0,
+            "46": 2617.0,
+            "47": 2830.0,
+            "48": 2480.0,
+            "49": 2492.0,
+            "50": 2687.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 269891584.0,
+            "2": 269891584.0,
+            "3": 269891584.0,
+            "4": 269891584.0,
+            "5": 269891584.0,
+            "6": 269891584.0,
+            "7": 269891584.0,
+            "8": 269891584.0,
+            "9": 269891584.0,
+            "10": 269891584.0,
+            "11": 269891584.0,
+            "12": 269891584.0,
+            "13": 269891584.0,
+            "14": 269891584.0,
+            "15": 269891584.0,
+            "16": 269891584.0,
+            "17": 269891584.0,
+            "18": 269891584.0,
+            "19": 269891584.0,
+            "20": 269891584.0,
+            "21": 269891584.0,
+            "22": 269891584.0,
+            "23": 269891584.0,
+            "24": 269891584.0,
+            "25": 269891584.0,
+            "26": 269891584.0,
+            "27": 269891584.0,
+            "28": 269891584.0,
+            "29": 269891584.0,
+            "30": 269891584.0,
+            "31": 269891584.0,
+            "32": 269891584.0,
+            "33": 269891584.0,
+            "34": 269891584.0,
+            "35": 269891584.0,
+            "36": 269891584.0,
+            "37": 269891584.0,
+            "38": 269891584.0,
+            "39": 269891584.0,
+            "40": 269891584.0,
+            "41": 269891584.0,
+            "42": 269891584.0,
+            "43": 269891584.0,
+            "44": 269891584.0,
+            "45": 269891584.0,
+            "46": 269891584.0,
+            "47": 269891584.0,
+            "48": 269891584.0,
+            "49": 269891584.0,
+            "50": 269891584.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1449682944.0,
+            "2": 1514627584.0,
+            "3": 1514627584.0,
+            "4": 1514628096.0,
+            "5": 1514628096.0,
+            "6": 1515674112.0,
+            "7": 1515674112.0,
+            "8": 1515674112.0,
+            "9": 1515676160.0,
+            "10": 1515676160.0,
+            "11": 1515676160.0,
+            "12": 1515676160.0,
+            "13": 1515676160.0,
+            "14": 1515676160.0,
+            "15": 1515676672.0,
+            "16": 1515676672.0,
+            "17": 1515676672.0,
+            "18": 1515676672.0,
+            "19": 1515676672.0,
+            "20": 1515676672.0,
+            "21": 1515676672.0,
+            "22": 1515676672.0,
+            "23": 1515676672.0,
+            "24": 1515676672.0,
+            "25": 1515676672.0,
+            "26": 1515676672.0,
+            "27": 1515676672.0,
+            "28": 1515676672.0,
+            "29": 1515676672.0,
+            "30": 1515676672.0,
+            "31": 1515676672.0,
+            "32": 1515676672.0,
+            "33": 1515676672.0,
+            "34": 1515676672.0,
+            "35": 1515676672.0,
+            "36": 1515676672.0,
+            "37": 1515676672.0,
+            "38": 1515676672.0,
+            "39": 1515676672.0,
+            "40": 1515676672.0,
+            "41": 1515676672.0,
+            "42": 1515676672.0,
+            "43": 1515676672.0,
+            "44": 1515676672.0,
+            "45": 1515676672.0,
+            "46": 1515676672.0,
+            "47": 1515676672.0,
+            "48": 1515676672.0,
+            "49": 1515676672.0,
+            "50": 1515676672.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 13.6671,
+            "2": 0.83595,
+            "3": 0.32182,
+            "4": 0.325,
+            "5": 0.52703,
+            "6": 0.32134,
+            "7": 0.32449,
+            "8": 0.32437,
+            "9": 0.32282,
+            "10": 0.32149,
+            "11": 0.32428,
+            "12": 0.32191,
+            "13": 0.32586,
+            "14": 0.32086,
+            "15": 0.3225,
+            "16": 0.32112,
+            "17": 0.32105,
+            "18": 0.32408,
+            "19": 0.32353,
+            "20": 0.32273,
+            "21": 0.32558,
+            "22": 0.31978,
+            "23": 0.32165,
+            "24": 0.32145,
+            "25": 0.31914,
+            "26": 0.32323,
+            "27": 0.32298,
+            "28": 0.31906,
+            "29": 0.31806,
+            "30": 0.32112,
+            "31": 0.31802,
+            "32": 0.32203,
+            "33": 0.32813,
+            "34": 0.32256,
+            "35": 0.32108,
+            "36": 0.32976,
+            "37": 0.32104,
+            "38": 0.32185,
+            "39": 0.32826,
+            "40": 0.32693,
+            "41": 0.32396,
+            "42": 0.32632,
+            "43": 0.33312,
+            "44": 0.32745,
+            "45": 0.32655,
+            "46": 0.32577,
+            "47": 0.32382,
+            "48": 0.32447,
+            "49": 0.32891,
+            "50": 0.32257
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
index f479cea5f5f..39765124d93 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
@@ -1 +1,537 @@
-{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.84163, "5": 10.85872, "10": 10.81849, "15": 10.81015, "20": 10.70819, "25": 10.57102, "30": 10.39695, "35": 10.28351, "40": 10.09767, "45": 9.86165, "50": 9.91045, "55": 9.88738, "60": 9.51376, "65": 8.9571, "70": 9.74676, "75": 9.42381, "80": 9.40721, "85": 9.61784, "90": 9.82256, "95": 9.51351, "100": 9.40106}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1736.0, "5": 1955.0, "10": 1441.0, "15": 1907.0, "20": 1700.0, "25": 1686.0, "30": 1941.0, "35": 1907.0, "40": 2224.0, "45": 1956.0, "50": 2232.0, "55": 2206.0, "60": 2157.0, "65": 2630.0, "70": 3040.0, "75": 2461.0, "80": 3104.0, "85": 3167.0, "90": 3069.0, "95": 3206.0, "100": 3111.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 299203072.0, "5": 299203072.0, "10": 299203072.0, "15": 299203072.0, "20": 299203072.0, "25": 299203072.0, "30": 299203072.0, "35": 299203072.0, "40": 299203072.0, "45": 299203072.0, "50": 299203072.0, "55": 299203072.0, "60": 299203072.0, "65": 299203072.0, "70": 299203072.0, "75": 299203072.0, "80": 299203072.0, "85": 299203072.0, "90": 299203072.0, "95": 299203072.0, "100": 299203072.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 977125888.0, "5": 1042071040.0, "10": 1042071040.0, "15": 1042071040.0, "20": 1042071040.0, "25": 1042071040.0, "30": 1042071040.0, "35": 1042071040.0, "40": 1042071040.0, "45": 1042071040.0, "50": 1042071040.0, "55": 1042071040.0, "60": 1042071040.0, "65": 1042071040.0, "70": 1042071040.0, "75": 1042071040.0, "80": 1042071040.0, "85": 1042071040.0, "90": 1042071040.0, "95": 1042071040.0, "100": 1042071040.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 9.57084, "5": 0.17113, "10": 0.17286, "15": 0.16879, "20": 0.16991, "25": 0.16317, "30": 0.16767, "35": 0.16367, "40": 0.16455, "45": 0.17151, "50": 0.16431, "55": 0.17778, "60": 0.16619, "65": 0.16724, "70": 0.17675, "75": 0.17316, "80": 0.17654, "85": 0.18496, "90": 0.167, "95": 0.17008, "100": 0.16742}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.84163,
+            "2": 10.85598,
+            "3": 10.84413,
+            "4": 10.84124,
+            "5": 10.85872,
+            "6": 10.86316,
+            "7": 10.85184,
+            "8": 10.84645,
+            "9": 10.85647,
+            "10": 10.81849,
+            "11": 10.85923,
+            "12": 10.84285,
+            "13": 10.86432,
+            "14": 10.85423,
+            "15": 10.81015,
+            "16": 10.81588,
+            "17": 10.78949,
+            "18": 10.79683,
+            "19": 10.79073,
+            "20": 10.70819,
+            "21": 10.69322,
+            "22": 10.58504,
+            "23": 10.70217,
+            "24": 10.60546,
+            "25": 10.57102,
+            "26": 10.61967,
+            "27": 10.61501,
+            "28": 10.56369,
+            "29": 10.56725,
+            "30": 10.39695,
+            "31": 10.16591,
+            "32": 10.4573,
+            "33": 10.45199,
+            "34": 10.2392,
+            "35": 10.28351,
+            "36": 10.24677,
+            "37": 10.3427,
+            "38": 10.20546,
+            "39": 10.39187,
+            "40": 10.09767,
+            "41": 10.1526,
+            "42": 10.21051,
+            "43": 9.87726,
+            "44": 9.98291,
+            "45": 9.86165,
+            "46": 9.83587,
+            "47": 10.13369,
+            "48": 9.87212,
+            "49": 9.56121,
+            "50": 9.91045,
+            "51": 9.85839,
+            "52": 9.7506,
+            "53": 10.05817,
+            "54": 9.96076,
+            "55": 9.88738,
+            "56": 9.6344,
+            "57": 9.4967,
+            "58": 9.83343,
+            "59": 9.59391,
+            "60": 9.51376,
+            "61": 9.69928,
+            "62": 9.98089,
+            "63": 9.39065,
+            "64": 9.77599,
+            "65": 8.9571,
+            "66": 9.70054,
+            "67": 9.37,
+            "68": 9.78529,
+            "69": 9.78966,
+            "70": 9.74676,
+            "71": 9.61906,
+            "72": 9.58963,
+            "73": 9.49629,
+            "74": 8.94963,
+            "75": 9.42381,
+            "76": 9.07799,
+            "77": 10.07105,
+            "78": 9.72632,
+            "79": 9.37966,
+            "80": 9.40721,
+            "81": 9.48238,
+            "82": 9.70152,
+            "83": 9.30657,
+            "84": 9.41464,
+            "85": 9.61784,
+            "86": 9.08212,
+            "87": 9.59511,
+            "88": 9.75008,
+            "89": 9.60356,
+            "90": 9.82256,
+            "91": 9.33721,
+            "92": 9.35861,
+            "93": 9.07956,
+            "94": 8.83268,
+            "95": 9.51351,
+            "96": 9.52947,
+            "97": 9.31813,
+            "98": 9.67451,
+            "99": 8.88607,
+            "100": 9.40106
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1736.0,
+            "2": 1692.0,
+            "3": 1695.0,
+            "4": 1761.0,
+            "5": 1955.0,
+            "6": 1791.0,
+            "7": 1943.0,
+            "8": 1681.0,
+            "9": 1884.0,
+            "10": 1441.0,
+            "11": 1942.0,
+            "12": 1786.0,
+            "13": 1940.0,
+            "14": 1862.0,
+            "15": 1907.0,
+            "16": 1947.0,
+            "17": 1827.0,
+            "18": 1907.0,
+            "19": 1818.0,
+            "20": 1700.0,
+            "21": 1911.0,
+            "22": 1720.0,
+            "23": 1938.0,
+            "24": 1707.0,
+            "25": 1686.0,
+            "26": 1792.0,
+            "27": 1891.0,
+            "28": 1976.0,
+            "29": 1958.0,
+            "30": 1941.0,
+            "31": 1622.0,
+            "32": 1970.0,
+            "33": 2129.0,
+            "34": 1830.0,
+            "35": 1907.0,
+            "36": 1892.0,
+            "37": 2395.0,
+            "38": 2161.0,
+            "39": 2493.0,
+            "40": 2224.0,
+            "41": 2201.0,
+            "42": 2175.0,
+            "43": 1920.0,
+            "44": 1955.0,
+            "45": 1956.0,
+            "46": 2166.0,
+            "47": 2517.0,
+            "48": 2272.0,
+            "49": 2211.0,
+            "50": 2232.0,
+            "51": 2621.0,
+            "52": 2597.0,
+            "53": 2926.0,
+            "54": 2633.0,
+            "55": 2206.0,
+            "56": 2627.0,
+            "57": 2328.0,
+            "58": 2886.0,
+            "59": 2639.0,
+            "60": 2157.0,
+            "61": 2736.0,
+            "62": 2544.0,
+            "63": 2332.0,
+            "64": 2948.0,
+            "65": 2630.0,
+            "66": 2931.0,
+            "67": 2717.0,
+            "68": 2643.0,
+            "69": 2955.0,
+            "70": 3040.0,
+            "71": 2882.0,
+            "72": 2390.0,
+            "73": 2812.0,
+            "74": 1844.0,
+            "75": 2461.0,
+            "76": 3067.0,
+            "77": 3152.0,
+            "78": 3018.0,
+            "79": 3008.0,
+            "80": 3104.0,
+            "81": 3589.0,
+            "82": 3218.0,
+            "83": 2748.0,
+            "84": 3217.0,
+            "85": 3167.0,
+            "86": 2876.0,
+            "87": 3604.0,
+            "88": 3017.0,
+            "89": 3249.0,
+            "90": 3069.0,
+            "91": 2865.0,
+            "92": 3074.0,
+            "93": 2680.0,
+            "94": 3392.0,
+            "95": 3206.0,
+            "96": 3401.0,
+            "97": 3107.0,
+            "98": 3624.0,
+            "99": 3007.0,
+            "100": 3111.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 299203072.0,
+            "2": 299203072.0,
+            "3": 299203072.0,
+            "4": 299203072.0,
+            "5": 299203072.0,
+            "6": 299203072.0,
+            "7": 299203072.0,
+            "8": 299203072.0,
+            "9": 299203072.0,
+            "10": 299203072.0,
+            "11": 299203072.0,
+            "12": 299203072.0,
+            "13": 299203072.0,
+            "14": 299203072.0,
+            "15": 299203072.0,
+            "16": 299203072.0,
+            "17": 299203072.0,
+            "18": 299203072.0,
+            "19": 299203072.0,
+            "20": 299203072.0,
+            "21": 299203072.0,
+            "22": 299203072.0,
+            "23": 299203072.0,
+            "24": 299203072.0,
+            "25": 299203072.0,
+            "26": 299203072.0,
+            "27": 299203072.0,
+            "28": 299203072.0,
+            "29": 299203072.0,
+            "30": 299203072.0,
+            "31": 299203072.0,
+            "32": 299203072.0,
+            "33": 299203072.0,
+            "34": 299203072.0,
+            "35": 299203072.0,
+            "36": 299203072.0,
+            "37": 299203072.0,
+            "38": 299203072.0,
+            "39": 299203072.0,
+            "40": 299203072.0,
+            "41": 299203072.0,
+            "42": 299203072.0,
+            "43": 299203072.0,
+            "44": 299203072.0,
+            "45": 299203072.0,
+            "46": 299203072.0,
+            "47": 299203072.0,
+            "48": 299203072.0,
+            "49": 299203072.0,
+            "50": 299203072.0,
+            "51": 299203072.0,
+            "52": 299203072.0,
+            "53": 299203072.0,
+            "54": 299203072.0,
+            "55": 299203072.0,
+            "56": 299203072.0,
+            "57": 299203072.0,
+            "58": 299203072.0,
+            "59": 299203072.0,
+            "60": 299203072.0,
+            "61": 299203072.0,
+            "62": 299203072.0,
+            "63": 299203072.0,
+            "64": 299203072.0,
+            "65": 299203072.0,
+            "66": 299203072.0,
+            "67": 299203072.0,
+            "68": 299203072.0,
+            "69": 299203072.0,
+            "70": 299203072.0,
+            "71": 299203072.0,
+            "72": 299203072.0,
+            "73": 299203072.0,
+            "74": 299203072.0,
+            "75": 299203072.0,
+            "76": 299203072.0,
+            "77": 299203072.0,
+            "78": 299203072.0,
+            "79": 299203072.0,
+            "80": 299203072.0,
+            "81": 299203072.0,
+            "82": 299203072.0,
+            "83": 299203072.0,
+            "84": 299203072.0,
+            "85": 299203072.0,
+            "86": 299203072.0,
+            "87": 299203072.0,
+            "88": 299203072.0,
+            "89": 299203072.0,
+            "90": 299203072.0,
+            "91": 299203072.0,
+            "92": 299203072.0,
+            "93": 299203072.0,
+            "94": 299203072.0,
+            "95": 299203072.0,
+            "96": 299203072.0,
+            "97": 299203072.0,
+            "98": 299203072.0,
+            "99": 299203072.0,
+            "100": 299203072.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 977125888.0,
+            "2": 1042071040.0,
+            "3": 1042071040.0,
+            "4": 1042071040.0,
+            "5": 1042071040.0,
+            "6": 1042071040.0,
+            "7": 1042071040.0,
+            "8": 1042071040.0,
+            "9": 1042071040.0,
+            "10": 1042071040.0,
+            "11": 1042071040.0,
+            "12": 1042071040.0,
+            "13": 1042071040.0,
+            "14": 1042071040.0,
+            "15": 1042071040.0,
+            "16": 1042071040.0,
+            "17": 1042071040.0,
+            "18": 1042071040.0,
+            "19": 1042071040.0,
+            "20": 1042071040.0,
+            "21": 1042071040.0,
+            "22": 1042071040.0,
+            "23": 1042071040.0,
+            "24": 1042071040.0,
+            "25": 1042071040.0,
+            "26": 1042071040.0,
+            "27": 1042071040.0,
+            "28": 1042071040.0,
+            "29": 1042071040.0,
+            "30": 1042071040.0,
+            "31": 1042071040.0,
+            "32": 1042071040.0,
+            "33": 1042071040.0,
+            "34": 1042071040.0,
+            "35": 1042071040.0,
+            "36": 1042071040.0,
+            "37": 1042071040.0,
+            "38": 1042071040.0,
+            "39": 1042071040.0,
+            "40": 1042071040.0,
+            "41": 1042071040.0,
+            "42": 1042071040.0,
+            "43": 1042071040.0,
+            "44": 1042071040.0,
+            "45": 1042071040.0,
+            "46": 1042071040.0,
+            "47": 1042071040.0,
+            "48": 1042071040.0,
+            "49": 1042071040.0,
+            "50": 1042071040.0,
+            "51": 1042071040.0,
+            "52": 1042071040.0,
+            "53": 1042071040.0,
+            "54": 1042071040.0,
+            "55": 1042071040.0,
+            "56": 1042071040.0,
+            "57": 1042071040.0,
+            "58": 1042071040.0,
+            "59": 1042071040.0,
+            "60": 1042071040.0,
+            "61": 1042071040.0,
+            "62": 1042071040.0,
+            "63": 1042071040.0,
+            "64": 1042071040.0,
+            "65": 1042071040.0,
+            "66": 1042071040.0,
+            "67": 1042071040.0,
+            "68": 1042071040.0,
+            "69": 1042071040.0,
+            "70": 1042071040.0,
+            "71": 1042071040.0,
+            "72": 1042071040.0,
+            "73": 1042071040.0,
+            "74": 1042071040.0,
+            "75": 1042071040.0,
+            "76": 1042071040.0,
+            "77": 1042071040.0,
+            "78": 1042071040.0,
+            "79": 1042071040.0,
+            "80": 1042071040.0,
+            "81": 1042071040.0,
+            "82": 1042071040.0,
+            "83": 1042071040.0,
+            "84": 1042071040.0,
+            "85": 1042071040.0,
+            "86": 1042071040.0,
+            "87": 1042071040.0,
+            "88": 1042071040.0,
+            "89": 1042071040.0,
+            "90": 1042071040.0,
+            "91": 1042071040.0,
+            "92": 1042071040.0,
+            "93": 1042071040.0,
+            "94": 1042071040.0,
+            "95": 1042071040.0,
+            "96": 1042071040.0,
+            "97": 1042071040.0,
+            "98": 1042071040.0,
+            "99": 1042071040.0,
+            "100": 1042071040.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 9.64755,
+            "2": 0.22676,
+            "3": 0.21049,
+            "4": 0.21226,
+            "5": 0.21276,
+            "6": 0.21284,
+            "7": 0.21174,
+            "8": 0.21294,
+            "9": 0.21455,
+            "10": 0.21245,
+            "11": 0.21305,
+            "12": 0.21226,
+            "13": 0.21393,
+            "14": 0.21543,
+            "15": 0.21306,
+            "16": 0.21524,
+            "17": 0.21547,
+            "18": 0.21654,
+            "19": 0.21182,
+            "20": 0.21446,
+            "21": 0.2154,
+            "22": 0.2134,
+            "23": 0.21194,
+            "24": 0.21397,
+            "25": 0.21361,
+            "26": 0.21508,
+            "27": 0.21438,
+            "28": 0.21467,
+            "29": 0.21423,
+            "30": 0.21547,
+            "31": 0.2149,
+            "32": 0.21373,
+            "33": 0.21293,
+            "34": 0.21223,
+            "35": 0.21322,
+            "36": 0.21538,
+            "37": 0.2171,
+            "38": 0.21288,
+            "39": 0.214,
+            "40": 0.21613,
+            "41": 0.22561,
+            "42": 0.21996,
+            "43": 0.2231,
+            "44": 0.21366,
+            "45": 0.20946,
+            "46": 0.21036,
+            "47": 0.21159,
+            "48": 0.21259,
+            "49": 0.2162,
+            "50": 0.21326,
+            "51": 0.21621,
+            "52": 0.20977,
+            "53": 0.20911,
+            "54": 0.20812,
+            "55": 0.20849,
+            "56": 0.20718,
+            "57": 0.21288,
+            "58": 0.20817,
+            "59": 0.20767,
+            "60": 0.20713,
+            "61": 0.21035,
+            "62": 0.21063,
+            "63": 0.21186,
+            "64": 0.20447,
+            "65": 0.206,
+            "66": 0.2078,
+            "67": 0.21155,
+            "68": 0.21249,
+            "69": 0.20772,
+            "70": 0.2071,
+            "71": 0.20716,
+            "72": 0.20814,
+            "73": 0.20979,
+            "74": 0.21089,
+            "75": 0.20519,
+            "76": 0.20953,
+            "77": 0.20632,
+            "78": 0.21411,
+            "79": 0.20748,
+            "80": 0.20907,
+            "81": 0.20802,
+            "82": 0.20909,
+            "83": 0.21401,
+            "84": 0.21584,
+            "85": 0.20979,
+            "86": 0.20899,
+            "87": 0.20903,
+            "88": 0.21002,
+            "89": 0.20822,
+            "90": 0.20988,
+            "91": 0.2101,
+            "92": 0.20692,
+            "93": 0.21116,
+            "94": 0.20766,
+            "95": 0.2115,
+            "96": 0.20949,
+            "97": 0.20615,
+            "98": 0.20442,
+            "99": 0.2084,
+            "100": 0.20996
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..0521ec92aee
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.84163,
+            "2": 10.85598,
+            "3": 10.84413,
+            "4": 10.84124,
+            "5": 10.85872,
+            "6": 10.86316,
+            "7": 10.85184,
+            "8": 10.84645,
+            "9": 10.85647,
+            "10": 10.81849,
+            "11": 10.85923,
+            "12": 10.84285,
+            "13": 10.86432,
+            "14": 10.85423,
+            "15": 10.81015,
+            "16": 10.81588,
+            "17": 10.78949,
+            "18": 10.79683,
+            "19": 10.79073,
+            "20": 10.70819,
+            "21": 10.69322,
+            "22": 10.58504,
+            "23": 10.70217,
+            "24": 10.60546,
+            "25": 10.57102,
+            "26": 10.61967,
+            "27": 10.61501,
+            "28": 10.56369,
+            "29": 10.56725,
+            "30": 10.39695,
+            "31": 10.16591,
+            "32": 10.4573,
+            "33": 10.45199,
+            "34": 10.2392,
+            "35": 10.28351,
+            "36": 10.24677,
+            "37": 10.3427,
+            "38": 10.20546,
+            "39": 10.39187,
+            "40": 10.09767,
+            "41": 10.1526,
+            "42": 10.21051,
+            "43": 9.87726,
+            "44": 9.98291,
+            "45": 9.86165,
+            "46": 9.83587,
+            "47": 10.13369,
+            "48": 9.87212,
+            "49": 9.56121,
+            "50": 9.91045,
+            "51": 9.85839,
+            "52": 9.7506,
+            "53": 10.05817,
+            "54": 9.96076,
+            "55": 9.88738,
+            "56": 9.6344,
+            "57": 9.4967,
+            "58": 9.83343,
+            "59": 9.59391,
+            "60": 9.51376,
+            "61": 9.69928,
+            "62": 9.98089,
+            "63": 9.39065,
+            "64": 9.77599,
+            "65": 8.9571,
+            "66": 9.70054,
+            "67": 9.37,
+            "68": 9.78529,
+            "69": 9.78966,
+            "70": 9.74676,
+            "71": 9.61906,
+            "72": 9.58963,
+            "73": 9.49629,
+            "74": 8.94963,
+            "75": 9.42381,
+            "76": 9.07799,
+            "77": 10.07105,
+            "78": 9.72632,
+            "79": 9.37966,
+            "80": 9.40721,
+            "81": 9.48238,
+            "82": 9.70152,
+            "83": 9.30657,
+            "84": 9.41464,
+            "85": 9.61784,
+            "86": 9.08212,
+            "87": 9.59511,
+            "88": 9.75008,
+            "89": 9.60356,
+            "90": 9.82256,
+            "91": 9.33721,
+            "92": 9.35861,
+            "93": 9.07956,
+            "94": 8.83268,
+            "95": 9.51351,
+            "96": 9.52947,
+            "97": 9.31813,
+            "98": 9.67451,
+            "99": 8.88607,
+            "100": 9.40106
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1736.0,
+            "2": 1692.0,
+            "3": 1695.0,
+            "4": 1761.0,
+            "5": 1955.0,
+            "6": 1791.0,
+            "7": 1943.0,
+            "8": 1681.0,
+            "9": 1884.0,
+            "10": 1441.0,
+            "11": 1942.0,
+            "12": 1786.0,
+            "13": 1940.0,
+            "14": 1862.0,
+            "15": 1907.0,
+            "16": 1947.0,
+            "17": 1827.0,
+            "18": 1907.0,
+            "19": 1818.0,
+            "20": 1700.0,
+            "21": 1911.0,
+            "22": 1720.0,
+            "23": 1938.0,
+            "24": 1707.0,
+            "25": 1686.0,
+            "26": 1792.0,
+            "27": 1891.0,
+            "28": 1976.0,
+            "29": 1958.0,
+            "30": 1941.0,
+            "31": 1622.0,
+            "32": 1970.0,
+            "33": 2129.0,
+            "34": 1830.0,
+            "35": 1907.0,
+            "36": 1892.0,
+            "37": 2395.0,
+            "38": 2161.0,
+            "39": 2493.0,
+            "40": 2224.0,
+            "41": 2201.0,
+            "42": 2175.0,
+            "43": 1920.0,
+            "44": 1955.0,
+            "45": 1956.0,
+            "46": 2166.0,
+            "47": 2517.0,
+            "48": 2272.0,
+            "49": 2211.0,
+            "50": 2232.0,
+            "51": 2621.0,
+            "52": 2597.0,
+            "53": 2926.0,
+            "54": 2633.0,
+            "55": 2206.0,
+            "56": 2627.0,
+            "57": 2328.0,
+            "58": 2886.0,
+            "59": 2639.0,
+            "60": 2157.0,
+            "61": 2736.0,
+            "62": 2544.0,
+            "63": 2332.0,
+            "64": 2948.0,
+            "65": 2630.0,
+            "66": 2931.0,
+            "67": 2717.0,
+            "68": 2643.0,
+            "69": 2955.0,
+            "70": 3040.0,
+            "71": 2882.0,
+            "72": 2390.0,
+            "73": 2812.0,
+            "74": 1844.0,
+            "75": 2461.0,
+            "76": 3067.0,
+            "77": 3152.0,
+            "78": 3018.0,
+            "79": 3008.0,
+            "80": 3104.0,
+            "81": 3589.0,
+            "82": 3218.0,
+            "83": 2748.0,
+            "84": 3217.0,
+            "85": 3167.0,
+            "86": 2876.0,
+            "87": 3604.0,
+            "88": 3017.0,
+            "89": 3249.0,
+            "90": 3069.0,
+            "91": 2865.0,
+            "92": 3074.0,
+            "93": 2680.0,
+            "94": 3392.0,
+            "95": 3206.0,
+            "96": 3401.0,
+            "97": 3107.0,
+            "98": 3624.0,
+            "99": 3007.0,
+            "100": 3111.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 299203072.0,
+            "2": 299203072.0,
+            "3": 299203072.0,
+            "4": 299203072.0,
+            "5": 299203072.0,
+            "6": 299203072.0,
+            "7": 299203072.0,
+            "8": 299203072.0,
+            "9": 299203072.0,
+            "10": 299203072.0,
+            "11": 299203072.0,
+            "12": 299203072.0,
+            "13": 299203072.0,
+            "14": 299203072.0,
+            "15": 299203072.0,
+            "16": 299203072.0,
+            "17": 299203072.0,
+            "18": 299203072.0,
+            "19": 299203072.0,
+            "20": 299203072.0,
+            "21": 299203072.0,
+            "22": 299203072.0,
+            "23": 299203072.0,
+            "24": 299203072.0,
+            "25": 299203072.0,
+            "26": 299203072.0,
+            "27": 299203072.0,
+            "28": 299203072.0,
+            "29": 299203072.0,
+            "30": 299203072.0,
+            "31": 299203072.0,
+            "32": 299203072.0,
+            "33": 299203072.0,
+            "34": 299203072.0,
+            "35": 299203072.0,
+            "36": 299203072.0,
+            "37": 299203072.0,
+            "38": 299203072.0,
+            "39": 299203072.0,
+            "40": 299203072.0,
+            "41": 299203072.0,
+            "42": 299203072.0,
+            "43": 299203072.0,
+            "44": 299203072.0,
+            "45": 299203072.0,
+            "46": 299203072.0,
+            "47": 299203072.0,
+            "48": 299203072.0,
+            "49": 299203072.0,
+            "50": 299203072.0,
+            "51": 299203072.0,
+            "52": 299203072.0,
+            "53": 299203072.0,
+            "54": 299203072.0,
+            "55": 299203072.0,
+            "56": 299203072.0,
+            "57": 299203072.0,
+            "58": 299203072.0,
+            "59": 299203072.0,
+            "60": 299203072.0,
+            "61": 299203072.0,
+            "62": 299203072.0,
+            "63": 299203072.0,
+            "64": 299203072.0,
+            "65": 299203072.0,
+            "66": 299203072.0,
+            "67": 299203072.0,
+            "68": 299203072.0,
+            "69": 299203072.0,
+            "70": 299203072.0,
+            "71": 299203072.0,
+            "72": 299203072.0,
+            "73": 299203072.0,
+            "74": 299203072.0,
+            "75": 299203072.0,
+            "76": 299203072.0,
+            "77": 299203072.0,
+            "78": 299203072.0,
+            "79": 299203072.0,
+            "80": 299203072.0,
+            "81": 299203072.0,
+            "82": 299203072.0,
+            "83": 299203072.0,
+            "84": 299203072.0,
+            "85": 299203072.0,
+            "86": 299203072.0,
+            "87": 299203072.0,
+            "88": 299203072.0,
+            "89": 299203072.0,
+            "90": 299203072.0,
+            "91": 299203072.0,
+            "92": 299203072.0,
+            "93": 299203072.0,
+            "94": 299203072.0,
+            "95": 299203072.0,
+            "96": 299203072.0,
+            "97": 299203072.0,
+            "98": 299203072.0,
+            "99": 299203072.0,
+            "100": 299203072.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 977125888.0,
+            "2": 1042071040.0,
+            "3": 1042071040.0,
+            "4": 1042071040.0,
+            "5": 1042071040.0,
+            "6": 1042071040.0,
+            "7": 1042071040.0,
+            "8": 1042071040.0,
+            "9": 1042071040.0,
+            "10": 1042071040.0,
+            "11": 1042071040.0,
+            "12": 1042071040.0,
+            "13": 1042071040.0,
+            "14": 1042071040.0,
+            "15": 1042071040.0,
+            "16": 1042071040.0,
+            "17": 1042071040.0,
+            "18": 1042071040.0,
+            "19": 1042071040.0,
+            "20": 1042071040.0,
+            "21": 1042071040.0,
+            "22": 1042071040.0,
+            "23": 1042071040.0,
+            "24": 1042071040.0,
+            "25": 1042071040.0,
+            "26": 1042071040.0,
+            "27": 1042071040.0,
+            "28": 1042071040.0,
+            "29": 1042071040.0,
+            "30": 1042071040.0,
+            "31": 1042071040.0,
+            "32": 1042071040.0,
+            "33": 1042071040.0,
+            "34": 1042071040.0,
+            "35": 1042071040.0,
+            "36": 1042071040.0,
+            "37": 1042071040.0,
+            "38": 1042071040.0,
+            "39": 1042071040.0,
+            "40": 1042071040.0,
+            "41": 1042071040.0,
+            "42": 1042071040.0,
+            "43": 1042071040.0,
+            "44": 1042071040.0,
+            "45": 1042071040.0,
+            "46": 1042071040.0,
+            "47": 1042071040.0,
+            "48": 1042071040.0,
+            "49": 1042071040.0,
+            "50": 1042071040.0,
+            "51": 1042071040.0,
+            "52": 1042071040.0,
+            "53": 1042071040.0,
+            "54": 1042071040.0,
+            "55": 1042071040.0,
+            "56": 1042071040.0,
+            "57": 1042071040.0,
+            "58": 1042071040.0,
+            "59": 1042071040.0,
+            "60": 1042071040.0,
+            "61": 1042071040.0,
+            "62": 1042071040.0,
+            "63": 1042071040.0,
+            "64": 1042071040.0,
+            "65": 1042071040.0,
+            "66": 1042071040.0,
+            "67": 1042071040.0,
+            "68": 1042071040.0,
+            "69": 1042071040.0,
+            "70": 1042071040.0,
+            "71": 1042071040.0,
+            "72": 1042071040.0,
+            "73": 1042071040.0,
+            "74": 1042071040.0,
+            "75": 1042071040.0,
+            "76": 1042071040.0,
+            "77": 1042071040.0,
+            "78": 1042071040.0,
+            "79": 1042071040.0,
+            "80": 1042071040.0,
+            "81": 1042071040.0,
+            "82": 1042071040.0,
+            "83": 1042071040.0,
+            "84": 1042071040.0,
+            "85": 1042071040.0,
+            "86": 1042071040.0,
+            "87": 1042071040.0,
+            "88": 1042071040.0,
+            "89": 1042071040.0,
+            "90": 1042071040.0,
+            "91": 1042071040.0,
+            "92": 1042071040.0,
+            "93": 1042071040.0,
+            "94": 1042071040.0,
+            "95": 1042071040.0,
+            "96": 1042071040.0,
+            "97": 1042071040.0,
+            "98": 1042071040.0,
+            "99": 1042071040.0,
+            "100": 1042071040.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 8.89047,
+            "2": 0.20763,
+            "3": 0.17962,
+            "4": 0.17996,
+            "5": 0.19517,
+            "6": 0.19097,
+            "7": 0.21371,
+            "8": 0.17946,
+            "9": 0.18028,
+            "10": 0.17811,
+            "11": 0.19549,
+            "12": 0.17995,
+            "13": 0.17967,
+            "14": 0.17747,
+            "15": 0.17854,
+            "16": 0.18132,
+            "17": 0.18068,
+            "18": 0.20382,
+            "19": 0.18932,
+            "20": 0.18279,
+            "21": 0.18143,
+            "22": 0.18461,
+            "23": 0.18263,
+            "24": 0.19677,
+            "25": 0.18399,
+            "26": 0.18138,
+            "27": 0.18309,
+            "28": 0.18505,
+            "29": 0.18571,
+            "30": 0.19268,
+            "31": 0.18694,
+            "32": 0.2033,
+            "33": 0.20046,
+            "34": 0.20101,
+            "35": 0.18537,
+            "36": 0.18526,
+            "37": 0.18418,
+            "38": 0.18481,
+            "39": 0.1813,
+            "40": 0.1837,
+            "41": 0.17918,
+            "42": 0.18044,
+            "43": 0.18093,
+            "44": 0.17996,
+            "45": 0.18187,
+            "46": 0.18178,
+            "47": 0.1859,
+            "48": 0.18306,
+            "49": 0.18442,
+            "50": 0.17901,
+            "51": 0.19352,
+            "52": 0.19143,
+            "53": 0.18977,
+            "54": 0.18373,
+            "55": 0.1848,
+            "56": 0.18899,
+            "57": 0.18927,
+            "58": 0.18981,
+            "59": 0.18717,
+            "60": 0.18468,
+            "61": 0.18658,
+            "62": 0.18885,
+            "63": 0.18928,
+            "64": 0.18734,
+            "65": 0.18347,
+            "66": 0.18338,
+            "67": 0.18495,
+            "68": 0.19141,
+            "69": 0.18134,
+            "70": 0.18277,
+            "71": 0.18011,
+            "72": 0.18334,
+            "73": 0.18723,
+            "74": 0.18857,
+            "75": 0.18474,
+            "76": 0.18198,
+            "77": 0.18177,
+            "78": 0.18552,
+            "79": 0.18363,
+            "80": 0.18411,
+            "81": 0.18648,
+            "82": 0.18145,
+            "83": 0.1831,
+            "84": 0.18203,
+            "85": 0.18466,
+            "86": 0.17969,
+            "87": 0.18127,
+            "88": 0.18208,
+            "89": 0.18448,
+            "90": 0.2123,
+            "91": 0.18681,
+            "92": 0.18312,
+            "93": 0.18238,
+            "94": 0.18152,
+            "95": 0.17994,
+            "96": 0.18524,
+            "97": 0.18522,
+            "98": 0.18434,
+            "99": 0.19103,
+            "100": 0.19147
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..b61916ffd95
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.84163,
+            "2": 10.85598,
+            "3": 10.84413,
+            "4": 10.84124,
+            "5": 10.85872,
+            "6": 10.86316,
+            "7": 10.85184,
+            "8": 10.84645,
+            "9": 10.85647,
+            "10": 10.81849,
+            "11": 10.85923,
+            "12": 10.84285,
+            "13": 10.86432,
+            "14": 10.85423,
+            "15": 10.81015,
+            "16": 10.81588,
+            "17": 10.78949,
+            "18": 10.79683,
+            "19": 10.79073,
+            "20": 10.70819,
+            "21": 10.69322,
+            "22": 10.58504,
+            "23": 10.70217,
+            "24": 10.60546,
+            "25": 10.57102,
+            "26": 10.61967,
+            "27": 10.61501,
+            "28": 10.56369,
+            "29": 10.56725,
+            "30": 10.39695,
+            "31": 10.16591,
+            "32": 10.4573,
+            "33": 10.45199,
+            "34": 10.2392,
+            "35": 10.28351,
+            "36": 10.24677,
+            "37": 10.3427,
+            "38": 10.20546,
+            "39": 10.39187,
+            "40": 10.09767,
+            "41": 10.1526,
+            "42": 10.21051,
+            "43": 9.87726,
+            "44": 9.98291,
+            "45": 9.86165,
+            "46": 9.83587,
+            "47": 10.13369,
+            "48": 9.87212,
+            "49": 9.56121,
+            "50": 9.91045,
+            "51": 9.85839,
+            "52": 9.7506,
+            "53": 10.05817,
+            "54": 9.96076,
+            "55": 9.88738,
+            "56": 9.6344,
+            "57": 9.4967,
+            "58": 9.83343,
+            "59": 9.59391,
+            "60": 9.51376,
+            "61": 9.69928,
+            "62": 9.98089,
+            "63": 9.39065,
+            "64": 9.77599,
+            "65": 8.9571,
+            "66": 9.70054,
+            "67": 9.37,
+            "68": 9.78529,
+            "69": 9.78966,
+            "70": 9.74676,
+            "71": 9.61906,
+            "72": 9.58963,
+            "73": 9.49629,
+            "74": 8.94963,
+            "75": 9.42381,
+            "76": 9.07799,
+            "77": 10.07105,
+            "78": 9.72632,
+            "79": 9.37966,
+            "80": 9.40721,
+            "81": 9.48238,
+            "82": 9.70152,
+            "83": 9.30657,
+            "84": 9.41464,
+            "85": 9.61784,
+            "86": 9.08212,
+            "87": 9.59511,
+            "88": 9.75008,
+            "89": 9.60356,
+            "90": 9.82256,
+            "91": 9.33721,
+            "92": 9.35861,
+            "93": 9.07956,
+            "94": 8.83268,
+            "95": 9.51351,
+            "96": 9.52947,
+            "97": 9.31813,
+            "98": 9.67451,
+            "99": 8.88607,
+            "100": 9.40106
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1736.0,
+            "2": 1692.0,
+            "3": 1695.0,
+            "4": 1761.0,
+            "5": 1955.0,
+            "6": 1791.0,
+            "7": 1943.0,
+            "8": 1681.0,
+            "9": 1884.0,
+            "10": 1441.0,
+            "11": 1942.0,
+            "12": 1786.0,
+            "13": 1940.0,
+            "14": 1862.0,
+            "15": 1907.0,
+            "16": 1947.0,
+            "17": 1827.0,
+            "18": 1907.0,
+            "19": 1818.0,
+            "20": 1700.0,
+            "21": 1911.0,
+            "22": 1720.0,
+            "23": 1938.0,
+            "24": 1707.0,
+            "25": 1686.0,
+            "26": 1792.0,
+            "27": 1891.0,
+            "28": 1976.0,
+            "29": 1958.0,
+            "30": 1941.0,
+            "31": 1622.0,
+            "32": 1970.0,
+            "33": 2129.0,
+            "34": 1830.0,
+            "35": 1907.0,
+            "36": 1892.0,
+            "37": 2395.0,
+            "38": 2161.0,
+            "39": 2493.0,
+            "40": 2224.0,
+            "41": 2201.0,
+            "42": 2175.0,
+            "43": 1920.0,
+            "44": 1955.0,
+            "45": 1956.0,
+            "46": 2166.0,
+            "47": 2517.0,
+            "48": 2272.0,
+            "49": 2211.0,
+            "50": 2232.0,
+            "51": 2621.0,
+            "52": 2597.0,
+            "53": 2926.0,
+            "54": 2633.0,
+            "55": 2206.0,
+            "56": 2627.0,
+            "57": 2328.0,
+            "58": 2886.0,
+            "59": 2639.0,
+            "60": 2157.0,
+            "61": 2736.0,
+            "62": 2544.0,
+            "63": 2332.0,
+            "64": 2948.0,
+            "65": 2630.0,
+            "66": 2931.0,
+            "67": 2717.0,
+            "68": 2643.0,
+            "69": 2955.0,
+            "70": 3040.0,
+            "71": 2882.0,
+            "72": 2390.0,
+            "73": 2812.0,
+            "74": 1844.0,
+            "75": 2461.0,
+            "76": 3067.0,
+            "77": 3152.0,
+            "78": 3018.0,
+            "79": 3008.0,
+            "80": 3104.0,
+            "81": 3589.0,
+            "82": 3218.0,
+            "83": 2748.0,
+            "84": 3217.0,
+            "85": 3167.0,
+            "86": 2876.0,
+            "87": 3604.0,
+            "88": 3017.0,
+            "89": 3249.0,
+            "90": 3069.0,
+            "91": 2865.0,
+            "92": 3074.0,
+            "93": 2680.0,
+            "94": 3392.0,
+            "95": 3206.0,
+            "96": 3401.0,
+            "97": 3107.0,
+            "98": 3624.0,
+            "99": 3007.0,
+            "100": 3111.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 299203072.0,
+            "2": 299203072.0,
+            "3": 299203072.0,
+            "4": 299203072.0,
+            "5": 299203072.0,
+            "6": 299203072.0,
+            "7": 299203072.0,
+            "8": 299203072.0,
+            "9": 299203072.0,
+            "10": 299203072.0,
+            "11": 299203072.0,
+            "12": 299203072.0,
+            "13": 299203072.0,
+            "14": 299203072.0,
+            "15": 299203072.0,
+            "16": 299203072.0,
+            "17": 299203072.0,
+            "18": 299203072.0,
+            "19": 299203072.0,
+            "20": 299203072.0,
+            "21": 299203072.0,
+            "22": 299203072.0,
+            "23": 299203072.0,
+            "24": 299203072.0,
+            "25": 299203072.0,
+            "26": 299203072.0,
+            "27": 299203072.0,
+            "28": 299203072.0,
+            "29": 299203072.0,
+            "30": 299203072.0,
+            "31": 299203072.0,
+            "32": 299203072.0,
+            "33": 299203072.0,
+            "34": 299203072.0,
+            "35": 299203072.0,
+            "36": 299203072.0,
+            "37": 299203072.0,
+            "38": 299203072.0,
+            "39": 299203072.0,
+            "40": 299203072.0,
+            "41": 299203072.0,
+            "42": 299203072.0,
+            "43": 299203072.0,
+            "44": 299203072.0,
+            "45": 299203072.0,
+            "46": 299203072.0,
+            "47": 299203072.0,
+            "48": 299203072.0,
+            "49": 299203072.0,
+            "50": 299203072.0,
+            "51": 299203072.0,
+            "52": 299203072.0,
+            "53": 299203072.0,
+            "54": 299203072.0,
+            "55": 299203072.0,
+            "56": 299203072.0,
+            "57": 299203072.0,
+            "58": 299203072.0,
+            "59": 299203072.0,
+            "60": 299203072.0,
+            "61": 299203072.0,
+            "62": 299203072.0,
+            "63": 299203072.0,
+            "64": 299203072.0,
+            "65": 299203072.0,
+            "66": 299203072.0,
+            "67": 299203072.0,
+            "68": 299203072.0,
+            "69": 299203072.0,
+            "70": 299203072.0,
+            "71": 299203072.0,
+            "72": 299203072.0,
+            "73": 299203072.0,
+            "74": 299203072.0,
+            "75": 299203072.0,
+            "76": 299203072.0,
+            "77": 299203072.0,
+            "78": 299203072.0,
+            "79": 299203072.0,
+            "80": 299203072.0,
+            "81": 299203072.0,
+            "82": 299203072.0,
+            "83": 299203072.0,
+            "84": 299203072.0,
+            "85": 299203072.0,
+            "86": 299203072.0,
+            "87": 299203072.0,
+            "88": 299203072.0,
+            "89": 299203072.0,
+            "90": 299203072.0,
+            "91": 299203072.0,
+            "92": 299203072.0,
+            "93": 299203072.0,
+            "94": 299203072.0,
+            "95": 299203072.0,
+            "96": 299203072.0,
+            "97": 299203072.0,
+            "98": 299203072.0,
+            "99": 299203072.0,
+            "100": 299203072.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 977125888.0,
+            "2": 1042071040.0,
+            "3": 1042071040.0,
+            "4": 1042071040.0,
+            "5": 1042071040.0,
+            "6": 1042071040.0,
+            "7": 1042071040.0,
+            "8": 1042071040.0,
+            "9": 1042071040.0,
+            "10": 1042071040.0,
+            "11": 1042071040.0,
+            "12": 1042071040.0,
+            "13": 1042071040.0,
+            "14": 1042071040.0,
+            "15": 1042071040.0,
+            "16": 1042071040.0,
+            "17": 1042071040.0,
+            "18": 1042071040.0,
+            "19": 1042071040.0,
+            "20": 1042071040.0,
+            "21": 1042071040.0,
+            "22": 1042071040.0,
+            "23": 1042071040.0,
+            "24": 1042071040.0,
+            "25": 1042071040.0,
+            "26": 1042071040.0,
+            "27": 1042071040.0,
+            "28": 1042071040.0,
+            "29": 1042071040.0,
+            "30": 1042071040.0,
+            "31": 1042071040.0,
+            "32": 1042071040.0,
+            "33": 1042071040.0,
+            "34": 1042071040.0,
+            "35": 1042071040.0,
+            "36": 1042071040.0,
+            "37": 1042071040.0,
+            "38": 1042071040.0,
+            "39": 1042071040.0,
+            "40": 1042071040.0,
+            "41": 1042071040.0,
+            "42": 1042071040.0,
+            "43": 1042071040.0,
+            "44": 1042071040.0,
+            "45": 1042071040.0,
+            "46": 1042071040.0,
+            "47": 1042071040.0,
+            "48": 1042071040.0,
+            "49": 1042071040.0,
+            "50": 1042071040.0,
+            "51": 1042071040.0,
+            "52": 1042071040.0,
+            "53": 1042071040.0,
+            "54": 1042071040.0,
+            "55": 1042071040.0,
+            "56": 1042071040.0,
+            "57": 1042071040.0,
+            "58": 1042071040.0,
+            "59": 1042071040.0,
+            "60": 1042071040.0,
+            "61": 1042071040.0,
+            "62": 1042071040.0,
+            "63": 1042071040.0,
+            "64": 1042071040.0,
+            "65": 1042071040.0,
+            "66": 1042071040.0,
+            "67": 1042071040.0,
+            "68": 1042071040.0,
+            "69": 1042071040.0,
+            "70": 1042071040.0,
+            "71": 1042071040.0,
+            "72": 1042071040.0,
+            "73": 1042071040.0,
+            "74": 1042071040.0,
+            "75": 1042071040.0,
+            "76": 1042071040.0,
+            "77": 1042071040.0,
+            "78": 1042071040.0,
+            "79": 1042071040.0,
+            "80": 1042071040.0,
+            "81": 1042071040.0,
+            "82": 1042071040.0,
+            "83": 1042071040.0,
+            "84": 1042071040.0,
+            "85": 1042071040.0,
+            "86": 1042071040.0,
+            "87": 1042071040.0,
+            "88": 1042071040.0,
+            "89": 1042071040.0,
+            "90": 1042071040.0,
+            "91": 1042071040.0,
+            "92": 1042071040.0,
+            "93": 1042071040.0,
+            "94": 1042071040.0,
+            "95": 1042071040.0,
+            "96": 1042071040.0,
+            "97": 1042071040.0,
+            "98": 1042071040.0,
+            "99": 1042071040.0,
+            "100": 1042071040.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 9.40872,
+            "2": 0.25886,
+            "3": 0.22849,
+            "4": 0.21099,
+            "5": 0.21193,
+            "6": 0.20863,
+            "7": 0.20987,
+            "8": 0.21014,
+            "9": 0.21139,
+            "10": 0.21148,
+            "11": 0.21513,
+            "12": 0.21915,
+            "13": 0.21037,
+            "14": 0.20786,
+            "15": 0.20927,
+            "16": 0.20756,
+            "17": 0.21005,
+            "18": 0.21022,
+            "19": 0.21019,
+            "20": 0.21012,
+            "21": 0.20995,
+            "22": 0.21005,
+            "23": 0.21213,
+            "24": 0.20995,
+            "25": 0.20776,
+            "26": 0.21296,
+            "27": 0.20984,
+            "28": 0.21526,
+            "29": 0.21164,
+            "30": 0.21175,
+            "31": 0.21062,
+            "32": 0.21292,
+            "33": 0.20962,
+            "34": 0.21025,
+            "35": 0.20968,
+            "36": 0.21367,
+            "37": 0.20989,
+            "38": 0.21034,
+            "39": 0.20979,
+            "40": 0.21092,
+            "41": 0.21065,
+            "42": 0.20865,
+            "43": 0.20939,
+            "44": 0.21656,
+            "45": 0.21131,
+            "46": 0.21087,
+            "47": 0.23723,
+            "48": 0.21006,
+            "49": 0.21157,
+            "50": 0.20975,
+            "51": 0.21952,
+            "52": 0.21306,
+            "53": 0.21253,
+            "54": 0.21223,
+            "55": 0.21336,
+            "56": 0.21514,
+            "57": 0.21536,
+            "58": 0.21288,
+            "59": 0.21211,
+            "60": 0.21298,
+            "61": 0.21285,
+            "62": 0.21438,
+            "63": 0.21461,
+            "64": 0.21382,
+            "65": 0.22082,
+            "66": 0.21222,
+            "67": 0.21414,
+            "68": 0.21315,
+            "69": 0.2153,
+            "70": 0.2172,
+            "71": 0.21323,
+            "72": 0.21366,
+            "73": 0.21434,
+            "74": 0.21455,
+            "75": 0.21545,
+            "76": 0.21631,
+            "77": 0.21419,
+            "78": 0.21365,
+            "79": 0.21514,
+            "80": 0.21447,
+            "81": 0.21379,
+            "82": 0.21487,
+            "83": 0.21038,
+            "84": 0.21708,
+            "85": 0.21166,
+            "86": 0.2141,
+            "87": 0.21613,
+            "88": 0.21214,
+            "89": 0.21499,
+            "90": 0.21811,
+            "91": 0.21563,
+            "92": 0.2152,
+            "93": 0.21548,
+            "94": 0.21863,
+            "95": 0.21366,
+            "96": 0.21458,
+            "97": 0.21279,
+            "98": 0.21555,
+            "99": 0.213,
+            "100": 0.2112
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
new file mode 100644
index 00000000000..81ace8a79cb
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.86122,
+            "2": 10.85774,
+            "3": 10.86039,
+            "4": 10.84813,
+            "5": 10.88242,
+            "6": 10.88645,
+            "7": 10.86227,
+            "8": 10.86932,
+            "9": 10.86444,
+            "10": 10.83506,
+            "11": 10.87765,
+            "12": 10.87384,
+            "13": 10.87945,
+            "14": 10.88919,
+            "15": 10.82738,
+            "16": 10.83105,
+            "17": 10.79888,
+            "18": 10.82441,
+            "19": 10.81363,
+            "20": 10.72743,
+            "21": 10.71638,
+            "22": 10.57153,
+            "23": 10.7269,
+            "24": 10.61223,
+            "25": 10.55753,
+            "26": 10.60603,
+            "27": 10.61792,
+            "28": 10.57695,
+            "29": 10.59633,
+            "30": 10.37895,
+            "31": 10.13125,
+            "32": 10.47822,
+            "33": 10.46894,
+            "34": 10.22715,
+            "35": 10.28321,
+            "36": 10.22751,
+            "37": 10.35397,
+            "38": 10.20483,
+            "39": 10.40755,
+            "40": 10.08785,
+            "41": 10.1591,
+            "42": 10.21601,
+            "43": 9.84821,
+            "44": 9.9651,
+            "45": 9.82625,
+            "46": 9.83468,
+            "47": 10.15337,
+            "48": 9.84529,
+            "49": 9.52926,
+            "50": 9.91327,
+            "51": 9.8517,
+            "52": 9.74686,
+            "53": 10.07204,
+            "54": 9.95738,
+            "55": 9.87788,
+            "56": 9.62943,
+            "57": 9.48988,
+            "58": 9.83265,
+            "59": 9.58831,
+            "60": 9.50874,
+            "61": 9.69495,
+            "62": 9.99373,
+            "63": 9.377,
+            "64": 9.78004,
+            "65": 8.95103,
+            "66": 9.71392,
+            "67": 9.37884,
+            "68": 9.78831,
+            "69": 9.79096,
+            "70": 9.73167,
+            "71": 9.61776,
+            "72": 9.59099,
+            "73": 9.49436,
+            "74": 8.95001,
+            "75": 9.43681,
+            "76": 9.09852,
+            "77": 10.06447,
+            "78": 9.72944,
+            "79": 9.37805,
+            "80": 9.41156,
+            "81": 9.48537,
+            "82": 9.69592,
+            "83": 9.31981,
+            "84": 9.42306,
+            "85": 9.61613,
+            "86": 9.07185,
+            "87": 9.59282,
+            "88": 9.75055,
+            "89": 9.61194,
+            "90": 9.8217,
+            "91": 9.35308,
+            "92": 9.36305,
+            "93": 9.08788,
+            "94": 8.83439,
+            "95": 9.5191,
+            "96": 9.52647,
+            "97": 9.31412,
+            "98": 9.67541,
+            "99": 8.88941,
+            "100": 9.40588
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1778.0,
+            "2": 1875.0,
+            "3": 1879.0,
+            "4": 1912.0,
+            "5": 2219.0,
+            "6": 2163.0,
+            "7": 2113.0,
+            "8": 1747.0,
+            "9": 2049.0,
+            "10": 1530.0,
+            "11": 2113.0,
+            "12": 1959.0,
+            "13": 2134.0,
+            "14": 2055.0,
+            "15": 2125.0,
+            "16": 2139.0,
+            "17": 1988.0,
+            "18": 1892.0,
+            "19": 1991.0,
+            "20": 1867.0,
+            "21": 2023.0,
+            "22": 1865.0,
+            "23": 2185.0,
+            "24": 1774.0,
+            "25": 1773.0,
+            "26": 1990.0,
+            "27": 2061.0,
+            "28": 2215.0,
+            "29": 2186.0,
+            "30": 2129.0,
+            "31": 1794.0,
+            "32": 2109.0,
+            "33": 2422.0,
+            "34": 2135.0,
+            "35": 2169.0,
+            "36": 2127.0,
+            "37": 2432.0,
+            "38": 2490.0,
+            "39": 2495.0,
+            "40": 2486.0,
+            "41": 2465.0,
+            "42": 2535.0,
+            "43": 2216.0,
+            "44": 2407.0,
+            "45": 2335.0,
+            "46": 2617.0,
+            "47": 2830.0,
+            "48": 2480.0,
+            "49": 2492.0,
+            "50": 2687.0,
+            "51": 2863.0,
+            "52": 2881.0,
+            "53": 3220.0,
+            "54": 2894.0,
+            "55": 2652.0,
+            "56": 3006.0,
+            "57": 2561.0,
+            "58": 3273.0,
+            "59": 3039.0,
+            "60": 2765.0,
+            "61": 3310.0,
+            "62": 2936.0,
+            "63": 2630.0,
+            "64": 3230.0,
+            "65": 2946.0,
+            "66": 3500.0,
+            "67": 2976.0,
+            "68": 2944.0,
+            "69": 3117.0,
+            "70": 3629.0,
+            "71": 3255.0,
+            "72": 2633.0,
+            "73": 3338.0,
+            "74": 2172.0,
+            "75": 2702.0,
+            "76": 3162.0,
+            "77": 3850.0,
+            "78": 3590.0,
+            "79": 3658.0,
+            "80": 3866.0,
+            "81": 3976.0,
+            "82": 3680.0,
+            "83": 3153.0,
+            "84": 3586.0,
+            "85": 3517.0,
+            "86": 3137.0,
+            "87": 4177.0,
+            "88": 3589.0,
+            "89": 3849.0,
+            "90": 3349.0,
+            "91": 2936.0,
+            "92": 3526.0,
+            "93": 2965.0,
+            "94": 3772.0,
+            "95": 3530.0,
+            "96": 3774.0,
+            "97": 3636.0,
+            "98": 4064.0,
+            "99": 3394.0,
+            "100": 3530.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 269891584.0,
+            "2": 269891584.0,
+            "3": 269891584.0,
+            "4": 269891584.0,
+            "5": 269891584.0,
+            "6": 269891584.0,
+            "7": 269891584.0,
+            "8": 269891584.0,
+            "9": 269891584.0,
+            "10": 269891584.0,
+            "11": 269891584.0,
+            "12": 269891584.0,
+            "13": 269891584.0,
+            "14": 269891584.0,
+            "15": 269891584.0,
+            "16": 269891584.0,
+            "17": 269891584.0,
+            "18": 269891584.0,
+            "19": 269891584.0,
+            "20": 269891584.0,
+            "21": 269891584.0,
+            "22": 269891584.0,
+            "23": 269891584.0,
+            "24": 269891584.0,
+            "25": 269891584.0,
+            "26": 269891584.0,
+            "27": 269891584.0,
+            "28": 269891584.0,
+            "29": 269891584.0,
+            "30": 269891584.0,
+            "31": 269891584.0,
+            "32": 269891584.0,
+            "33": 269891584.0,
+            "34": 269891584.0,
+            "35": 269891584.0,
+            "36": 269891584.0,
+            "37": 269891584.0,
+            "38": 269891584.0,
+            "39": 269891584.0,
+            "40": 269891584.0,
+            "41": 269891584.0,
+            "42": 269891584.0,
+            "43": 269891584.0,
+            "44": 269891584.0,
+            "45": 269891584.0,
+            "46": 269891584.0,
+            "47": 269891584.0,
+            "48": 269891584.0,
+            "49": 269891584.0,
+            "50": 269891584.0,
+            "51": 269891584.0,
+            "52": 269891584.0,
+            "53": 269891584.0,
+            "54": 269891584.0,
+            "55": 269891584.0,
+            "56": 269891584.0,
+            "57": 269891584.0,
+            "58": 269891584.0,
+            "59": 269891584.0,
+            "60": 269891584.0,
+            "61": 269891584.0,
+            "62": 269891584.0,
+            "63": 269891584.0,
+            "64": 269891584.0,
+            "65": 269891584.0,
+            "66": 269891584.0,
+            "67": 269891584.0,
+            "68": 269891584.0,
+            "69": 269891584.0,
+            "70": 269891584.0,
+            "71": 269891584.0,
+            "72": 269891584.0,
+            "73": 269891584.0,
+            "74": 269891584.0,
+            "75": 269891584.0,
+            "76": 269891584.0,
+            "77": 269891584.0,
+            "78": 269891584.0,
+            "79": 269891584.0,
+            "80": 269891584.0,
+            "81": 269891584.0,
+            "82": 269891584.0,
+            "83": 269891584.0,
+            "84": 269891584.0,
+            "85": 269891584.0,
+            "86": 269891584.0,
+            "87": 269891584.0,
+            "88": 269891584.0,
+            "89": 269891584.0,
+            "90": 269891584.0,
+            "91": 269891584.0,
+            "92": 269891584.0,
+            "93": 269891584.0,
+            "94": 269891584.0,
+            "95": 269891584.0,
+            "96": 269891584.0,
+            "97": 269891584.0,
+            "98": 269891584.0,
+            "99": 269891584.0,
+            "100": 269891584.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1450731008.0,
+            "2": 1515674112.0,
+            "3": 1515674112.0,
+            "4": 1515676672.0,
+            "5": 1515676672.0,
+            "6": 1515676672.0,
+            "7": 1515676672.0,
+            "8": 1515676672.0,
+            "9": 1515676672.0,
+            "10": 1515676672.0,
+            "11": 1515676672.0,
+            "12": 1515676672.0,
+            "13": 1515676672.0,
+            "14": 1515676672.0,
+            "15": 1515676672.0,
+            "16": 1515676672.0,
+            "17": 1515676672.0,
+            "18": 1515676672.0,
+            "19": 1515676672.0,
+            "20": 1515676672.0,
+            "21": 1515676672.0,
+            "22": 1515676672.0,
+            "23": 1515676672.0,
+            "24": 1515676672.0,
+            "25": 1515676672.0,
+            "26": 1515676672.0,
+            "27": 1515676672.0,
+            "28": 1515676672.0,
+            "29": 1515676672.0,
+            "30": 1515676672.0,
+            "31": 1515676672.0,
+            "32": 1515676672.0,
+            "33": 1515676672.0,
+            "34": 1515676672.0,
+            "35": 1515676672.0,
+            "36": 1515676672.0,
+            "37": 1515676672.0,
+            "38": 1515676672.0,
+            "39": 1515676672.0,
+            "40": 1515676672.0,
+            "41": 1515676672.0,
+            "42": 1515676672.0,
+            "43": 1515676672.0,
+            "44": 1515676672.0,
+            "45": 1515676672.0,
+            "46": 1515676672.0,
+            "47": 1515676672.0,
+            "48": 1515676672.0,
+            "49": 1515676672.0,
+            "50": 1515676672.0,
+            "51": 1515676672.0,
+            "52": 1515676672.0,
+            "53": 1515676672.0,
+            "54": 1515676672.0,
+            "55": 1515676672.0,
+            "56": 1515676672.0,
+            "57": 1515676672.0,
+            "58": 1515676672.0,
+            "59": 1515676672.0,
+            "60": 1515676672.0,
+            "61": 1515676672.0,
+            "62": 1515676672.0,
+            "63": 1515676672.0,
+            "64": 1515676672.0,
+            "65": 1515676672.0,
+            "66": 1515676672.0,
+            "67": 1515676672.0,
+            "68": 1515676672.0,
+            "69": 1515676672.0,
+            "70": 1515676672.0,
+            "71": 1515676672.0,
+            "72": 1515676672.0,
+            "73": 1515676672.0,
+            "74": 1515676672.0,
+            "75": 1515676672.0,
+            "76": 1515676672.0,
+            "77": 1515676672.0,
+            "78": 1515676672.0,
+            "79": 1515676672.0,
+            "80": 1515676672.0,
+            "81": 1515676672.0,
+            "82": 1515676672.0,
+            "83": 1515676672.0,
+            "84": 1515676672.0,
+            "85": 1515676672.0,
+            "86": 1515676672.0,
+            "87": 1515676672.0,
+            "88": 1515676672.0,
+            "89": 1515676672.0,
+            "90": 1515676672.0,
+            "91": 1515676672.0,
+            "92": 1515676672.0,
+            "93": 1515676672.0,
+            "94": 1515676672.0,
+            "95": 1515676672.0,
+            "96": 1515676672.0,
+            "97": 1515676672.0,
+            "98": 1515676672.0,
+            "99": 1515676672.0,
+            "100": 1515676672.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.9602,
+            "2": 0.41251,
+            "3": 0.31981,
+            "4": 0.60672,
+            "5": 0.31803,
+            "6": 0.66653,
+            "7": 0.31576,
+            "8": 0.3144,
+            "9": 0.31826,
+            "10": 0.31784,
+            "11": 0.31454,
+            "12": 0.32345,
+            "13": 0.31961,
+            "14": 0.31476,
+            "15": 0.31408,
+            "16": 0.32159,
+            "17": 0.31403,
+            "18": 0.31562,
+            "19": 0.32035,
+            "20": 0.31437,
+            "21": 0.50323,
+            "22": 0.33172,
+            "23": 0.31117,
+            "24": 0.31643,
+            "25": 0.3168,
+            "26": 0.3138,
+            "27": 0.31191,
+            "28": 0.31811,
+            "29": 0.31647,
+            "30": 0.31136,
+            "31": 0.31853,
+            "32": 0.31298,
+            "33": 0.3122,
+            "34": 0.3186,
+            "35": 0.31452,
+            "36": 0.32563,
+            "37": 0.31553,
+            "38": 0.31645,
+            "39": 0.31114,
+            "40": 0.3168,
+            "41": 0.31551,
+            "42": 0.31104,
+            "43": 0.31222,
+            "44": 0.31802,
+            "45": 0.53643,
+            "46": 0.3183,
+            "47": 0.3153,
+            "48": 0.31286,
+            "49": 0.31479,
+            "50": 0.31499,
+            "51": 0.3247,
+            "52": 0.31654,
+            "53": 0.3232,
+            "54": 0.32124,
+            "55": 0.31559,
+            "56": 0.32351,
+            "57": 0.3268,
+            "58": 0.31694,
+            "59": 0.31819,
+            "60": 0.3242,
+            "61": 0.31589,
+            "62": 0.31803,
+            "63": 0.32889,
+            "64": 0.31711,
+            "65": 0.3785,
+            "66": 0.37396,
+            "67": 0.33125,
+            "68": 0.31565,
+            "69": 0.32166,
+            "70": 0.37482,
+            "71": 0.37713,
+            "72": 0.37561,
+            "73": 0.37465,
+            "74": 0.37751,
+            "75": 0.37312,
+            "76": 0.37068,
+            "77": 0.3832,
+            "78": 0.3167,
+            "79": 0.31782,
+            "80": 0.32031,
+            "81": 0.31714,
+            "82": 0.31525,
+            "83": 0.32517,
+            "84": 0.31649,
+            "85": 0.31435,
+            "86": 0.32096,
+            "87": 0.31842,
+            "88": 0.31539,
+            "89": 0.32202,
+            "90": 0.3206,
+            "91": 0.31482,
+            "92": 0.32002,
+            "93": 0.31779,
+            "94": 0.31471,
+            "95": 0.31708,
+            "96": 0.31884,
+            "97": 0.31586,
+            "98": 0.31494,
+            "99": 0.32657,
+            "100": 0.31839
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
new file mode 100644
index 00000000000..d6b97c844a2
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.86122,
+            "2": 10.85774,
+            "3": 10.86039,
+            "4": 10.84813,
+            "5": 10.88242,
+            "6": 10.88645,
+            "7": 10.86227,
+            "8": 10.86932,
+            "9": 10.86444,
+            "10": 10.83506,
+            "11": 10.87765,
+            "12": 10.87384,
+            "13": 10.87945,
+            "14": 10.88919,
+            "15": 10.82738,
+            "16": 10.83105,
+            "17": 10.79888,
+            "18": 10.82441,
+            "19": 10.81363,
+            "20": 10.72743,
+            "21": 10.71638,
+            "22": 10.57153,
+            "23": 10.7269,
+            "24": 10.61223,
+            "25": 10.55753,
+            "26": 10.60603,
+            "27": 10.61792,
+            "28": 10.57695,
+            "29": 10.59633,
+            "30": 10.37895,
+            "31": 10.13125,
+            "32": 10.47822,
+            "33": 10.46894,
+            "34": 10.22715,
+            "35": 10.28321,
+            "36": 10.22751,
+            "37": 10.35397,
+            "38": 10.20483,
+            "39": 10.40755,
+            "40": 10.08785,
+            "41": 10.1591,
+            "42": 10.21601,
+            "43": 9.84821,
+            "44": 9.9651,
+            "45": 9.82625,
+            "46": 9.83468,
+            "47": 10.15337,
+            "48": 9.84529,
+            "49": 9.52926,
+            "50": 9.91327,
+            "51": 9.8517,
+            "52": 9.74686,
+            "53": 10.07204,
+            "54": 9.95738,
+            "55": 9.87788,
+            "56": 9.62943,
+            "57": 9.48988,
+            "58": 9.83265,
+            "59": 9.58831,
+            "60": 9.50874,
+            "61": 9.69495,
+            "62": 9.99373,
+            "63": 9.377,
+            "64": 9.78004,
+            "65": 8.95103,
+            "66": 9.71392,
+            "67": 9.37884,
+            "68": 9.78831,
+            "69": 9.79096,
+            "70": 9.73167,
+            "71": 9.61776,
+            "72": 9.59099,
+            "73": 9.49436,
+            "74": 8.95001,
+            "75": 9.43681,
+            "76": 9.09852,
+            "77": 10.06447,
+            "78": 9.72944,
+            "79": 9.37805,
+            "80": 9.41156,
+            "81": 9.48537,
+            "82": 9.69592,
+            "83": 9.31981,
+            "84": 9.42306,
+            "85": 9.61613,
+            "86": 9.07185,
+            "87": 9.59282,
+            "88": 9.75055,
+            "89": 9.61194,
+            "90": 9.8217,
+            "91": 9.35308,
+            "92": 9.36305,
+            "93": 9.08788,
+            "94": 8.83439,
+            "95": 9.5191,
+            "96": 9.52647,
+            "97": 9.31412,
+            "98": 9.67541,
+            "99": 8.88941,
+            "100": 9.40588
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1778.0,
+            "2": 1875.0,
+            "3": 1879.0,
+            "4": 1912.0,
+            "5": 2219.0,
+            "6": 2163.0,
+            "7": 2113.0,
+            "8": 1747.0,
+            "9": 2049.0,
+            "10": 1530.0,
+            "11": 2113.0,
+            "12": 1959.0,
+            "13": 2134.0,
+            "14": 2055.0,
+            "15": 2125.0,
+            "16": 2139.0,
+            "17": 1988.0,
+            "18": 1892.0,
+            "19": 1991.0,
+            "20": 1867.0,
+            "21": 2023.0,
+            "22": 1865.0,
+            "23": 2185.0,
+            "24": 1774.0,
+            "25": 1773.0,
+            "26": 1990.0,
+            "27": 2061.0,
+            "28": 2215.0,
+            "29": 2186.0,
+            "30": 2129.0,
+            "31": 1794.0,
+            "32": 2109.0,
+            "33": 2422.0,
+            "34": 2135.0,
+            "35": 2169.0,
+            "36": 2127.0,
+            "37": 2432.0,
+            "38": 2490.0,
+            "39": 2495.0,
+            "40": 2486.0,
+            "41": 2465.0,
+            "42": 2535.0,
+            "43": 2216.0,
+            "44": 2407.0,
+            "45": 2335.0,
+            "46": 2617.0,
+            "47": 2830.0,
+            "48": 2480.0,
+            "49": 2492.0,
+            "50": 2687.0,
+            "51": 2863.0,
+            "52": 2881.0,
+            "53": 3220.0,
+            "54": 2894.0,
+            "55": 2652.0,
+            "56": 3006.0,
+            "57": 2561.0,
+            "58": 3273.0,
+            "59": 3039.0,
+            "60": 2765.0,
+            "61": 3310.0,
+            "62": 2936.0,
+            "63": 2630.0,
+            "64": 3230.0,
+            "65": 2946.0,
+            "66": 3500.0,
+            "67": 2976.0,
+            "68": 2944.0,
+            "69": 3117.0,
+            "70": 3629.0,
+            "71": 3255.0,
+            "72": 2633.0,
+            "73": 3338.0,
+            "74": 2172.0,
+            "75": 2702.0,
+            "76": 3162.0,
+            "77": 3850.0,
+            "78": 3590.0,
+            "79": 3658.0,
+            "80": 3866.0,
+            "81": 3976.0,
+            "82": 3680.0,
+            "83": 3153.0,
+            "84": 3586.0,
+            "85": 3517.0,
+            "86": 3137.0,
+            "87": 4177.0,
+            "88": 3589.0,
+            "89": 3849.0,
+            "90": 3349.0,
+            "91": 2936.0,
+            "92": 3526.0,
+            "93": 2965.0,
+            "94": 3772.0,
+            "95": 3530.0,
+            "96": 3774.0,
+            "97": 3636.0,
+            "98": 4064.0,
+            "99": 3394.0,
+            "100": 3530.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 269891584.0,
+            "2": 269891584.0,
+            "3": 269891584.0,
+            "4": 269891584.0,
+            "5": 269891584.0,
+            "6": 269891584.0,
+            "7": 269891584.0,
+            "8": 269891584.0,
+            "9": 269891584.0,
+            "10": 269891584.0,
+            "11": 269891584.0,
+            "12": 269891584.0,
+            "13": 269891584.0,
+            "14": 269891584.0,
+            "15": 269891584.0,
+            "16": 269891584.0,
+            "17": 269891584.0,
+            "18": 269891584.0,
+            "19": 269891584.0,
+            "20": 269891584.0,
+            "21": 269891584.0,
+            "22": 269891584.0,
+            "23": 269891584.0,
+            "24": 269891584.0,
+            "25": 269891584.0,
+            "26": 269891584.0,
+            "27": 269891584.0,
+            "28": 269891584.0,
+            "29": 269891584.0,
+            "30": 269891584.0,
+            "31": 269891584.0,
+            "32": 269891584.0,
+            "33": 269891584.0,
+            "34": 269891584.0,
+            "35": 269891584.0,
+            "36": 269891584.0,
+            "37": 269891584.0,
+            "38": 269891584.0,
+            "39": 269891584.0,
+            "40": 269891584.0,
+            "41": 269891584.0,
+            "42": 269891584.0,
+            "43": 269891584.0,
+            "44": 269891584.0,
+            "45": 269891584.0,
+            "46": 269891584.0,
+            "47": 269891584.0,
+            "48": 269891584.0,
+            "49": 269891584.0,
+            "50": 269891584.0,
+            "51": 269891584.0,
+            "52": 269891584.0,
+            "53": 269891584.0,
+            "54": 269891584.0,
+            "55": 269891584.0,
+            "56": 269891584.0,
+            "57": 269891584.0,
+            "58": 269891584.0,
+            "59": 269891584.0,
+            "60": 269891584.0,
+            "61": 269891584.0,
+            "62": 269891584.0,
+            "63": 269891584.0,
+            "64": 269891584.0,
+            "65": 269891584.0,
+            "66": 269891584.0,
+            "67": 269891584.0,
+            "68": 269891584.0,
+            "69": 269891584.0,
+            "70": 269891584.0,
+            "71": 269891584.0,
+            "72": 269891584.0,
+            "73": 269891584.0,
+            "74": 269891584.0,
+            "75": 269891584.0,
+            "76": 269891584.0,
+            "77": 269891584.0,
+            "78": 269891584.0,
+            "79": 269891584.0,
+            "80": 269891584.0,
+            "81": 269891584.0,
+            "82": 269891584.0,
+            "83": 269891584.0,
+            "84": 269891584.0,
+            "85": 269891584.0,
+            "86": 269891584.0,
+            "87": 269891584.0,
+            "88": 269891584.0,
+            "89": 269891584.0,
+            "90": 269891584.0,
+            "91": 269891584.0,
+            "92": 269891584.0,
+            "93": 269891584.0,
+            "94": 269891584.0,
+            "95": 269891584.0,
+            "96": 269891584.0,
+            "97": 269891584.0,
+            "98": 269891584.0,
+            "99": 269891584.0,
+            "100": 269891584.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1448634368.0,
+            "2": 1515676160.0,
+            "3": 1515676672.0,
+            "4": 1515676672.0,
+            "5": 1515676672.0,
+            "6": 1515676672.0,
+            "7": 1515676672.0,
+            "8": 1515676672.0,
+            "9": 1515676672.0,
+            "10": 1515676672.0,
+            "11": 1515676672.0,
+            "12": 1515676672.0,
+            "13": 1515676672.0,
+            "14": 1515676672.0,
+            "15": 1515676672.0,
+            "16": 1515676672.0,
+            "17": 1515676672.0,
+            "18": 1515676672.0,
+            "19": 1515676672.0,
+            "20": 1515676672.0,
+            "21": 1515676672.0,
+            "22": 1515676672.0,
+            "23": 1515676672.0,
+            "24": 1515676672.0,
+            "25": 1515676672.0,
+            "26": 1515676672.0,
+            "27": 1515676672.0,
+            "28": 1515676672.0,
+            "29": 1515676672.0,
+            "30": 1515676672.0,
+            "31": 1515676672.0,
+            "32": 1515676672.0,
+            "33": 1515676672.0,
+            "34": 1515676672.0,
+            "35": 1515676672.0,
+            "36": 1515676672.0,
+            "37": 1515676672.0,
+            "38": 1515676672.0,
+            "39": 1515676672.0,
+            "40": 1515676672.0,
+            "41": 1515676672.0,
+            "42": 1515676672.0,
+            "43": 1515676672.0,
+            "44": 1515676672.0,
+            "45": 1515676672.0,
+            "46": 1515676672.0,
+            "47": 1515676672.0,
+            "48": 1515676672.0,
+            "49": 1515676672.0,
+            "50": 1515676672.0,
+            "51": 1515676672.0,
+            "52": 1515676672.0,
+            "53": 1515676672.0,
+            "54": 1515676672.0,
+            "55": 1515676672.0,
+            "56": 1515676672.0,
+            "57": 1515676672.0,
+            "58": 1515676672.0,
+            "59": 1515676672.0,
+            "60": 1515676672.0,
+            "61": 1515676672.0,
+            "62": 1515676672.0,
+            "63": 1515676672.0,
+            "64": 1515676672.0,
+            "65": 1515676672.0,
+            "66": 1515676672.0,
+            "67": 1515676672.0,
+            "68": 1515676672.0,
+            "69": 1515676672.0,
+            "70": 1515676672.0,
+            "71": 1515676672.0,
+            "72": 1515676672.0,
+            "73": 1515676672.0,
+            "74": 1515676672.0,
+            "75": 1515676672.0,
+            "76": 1515676672.0,
+            "77": 1515676672.0,
+            "78": 1515676672.0,
+            "79": 1515676672.0,
+            "80": 1515676672.0,
+            "81": 1515676672.0,
+            "82": 1515676672.0,
+            "83": 1515676672.0,
+            "84": 1515676672.0,
+            "85": 1515676672.0,
+            "86": 1515676672.0,
+            "87": 1515676672.0,
+            "88": 1515676672.0,
+            "89": 1515676672.0,
+            "90": 1515676672.0,
+            "91": 1515676672.0,
+            "92": 1515676672.0,
+            "93": 1515676672.0,
+            "94": 1515676672.0,
+            "95": 1515676672.0,
+            "96": 1515676672.0,
+            "97": 1515676672.0,
+            "98": 1515676672.0,
+            "99": 1515676672.0,
+            "100": 1515676672.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.24087,
+            "2": 0.38421,
+            "3": 0.56749,
+            "4": 0.65933,
+            "5": 0.54431,
+            "6": 0.31357,
+            "7": 0.3132,
+            "8": 0.3209,
+            "9": 0.31313,
+            "10": 0.31289,
+            "11": 0.32184,
+            "12": 0.31161,
+            "13": 0.31148,
+            "14": 0.31861,
+            "15": 0.31107,
+            "16": 0.31197,
+            "17": 0.31486,
+            "18": 0.31483,
+            "19": 0.3123,
+            "20": 0.31575,
+            "21": 0.3191,
+            "22": 0.59133,
+            "23": 0.31699,
+            "24": 0.31207,
+            "25": 0.31265,
+            "26": 0.32043,
+            "27": 0.31399,
+            "28": 0.31217,
+            "29": 0.32071,
+            "30": 0.31121,
+            "31": 0.31193,
+            "32": 0.31757,
+            "33": 0.31731,
+            "34": 0.31154,
+            "35": 0.31452,
+            "36": 0.31823,
+            "37": 0.31136,
+            "38": 0.31179,
+            "39": 0.3179,
+            "40": 0.31084,
+            "41": 0.31144,
+            "42": 0.32061,
+            "43": 0.31112,
+            "44": 0.31208,
+            "45": 0.31884,
+            "46": 0.31114,
+            "47": 0.3115,
+            "48": 0.31509,
+            "49": 0.31746,
+            "50": 0.31201,
+            "51": 0.31606,
+            "52": 0.31175,
+            "53": 0.3173,
+            "54": 0.30985,
+            "55": 0.30955,
+            "56": 0.31445,
+            "57": 0.30938,
+            "58": 0.30971,
+            "59": 0.31705,
+            "60": 0.30877,
+            "61": 0.30909,
+            "62": 0.31179,
+            "63": 0.31576,
+            "64": 0.31125,
+            "65": 0.3109,
+            "66": 0.32501,
+            "67": 0.31051,
+            "68": 0.31016,
+            "69": 0.32083,
+            "70": 0.3086,
+            "71": 0.30949,
+            "72": 0.32156,
+            "73": 0.31102,
+            "74": 0.30938,
+            "75": 0.31802,
+            "76": 0.30998,
+            "77": 0.3092,
+            "78": 0.31341,
+            "79": 0.32109,
+            "80": 0.31014,
+            "81": 0.31196,
+            "82": 0.31938,
+            "83": 0.31078,
+            "84": 0.31077,
+            "85": 0.32048,
+            "86": 0.31124,
+            "87": 0.31023,
+            "88": 0.31956,
+            "89": 0.30978,
+            "90": 0.31199,
+            "91": 0.31731,
+            "92": 0.30981,
+            "93": 0.31067,
+            "94": 0.31383,
+            "95": 0.31976,
+            "96": 0.30998,
+            "97": 0.31195,
+            "98": 0.32159,
+            "99": 0.30804,
+            "100": 0.31193
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
index 6f422f501de..c387be284cf 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
@@ -2,141 +2,536 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 10.84163,
+            "2": 10.85598,
+            "3": 10.84413,
+            "4": 10.84124,
             "5": 10.85872,
+            "6": 10.86316,
+            "7": 10.85184,
+            "8": 10.84645,
+            "9": 10.85647,
             "10": 10.81849,
+            "11": 10.85923,
+            "12": 10.84285,
+            "13": 10.86432,
+            "14": 10.85423,
             "15": 10.81015,
+            "16": 10.81588,
+            "17": 10.78949,
+            "18": 10.79683,
+            "19": 10.79073,
             "20": 10.70819,
+            "21": 10.69322,
+            "22": 10.58504,
+            "23": 10.70217,
+            "24": 10.60546,
             "25": 10.57102,
+            "26": 10.61967,
+            "27": 10.61501,
+            "28": 10.56369,
+            "29": 10.56725,
             "30": 10.39695,
+            "31": 10.16591,
+            "32": 10.4573,
+            "33": 10.45199,
+            "34": 10.2392,
             "35": 10.28351,
+            "36": 10.24677,
+            "37": 10.3427,
+            "38": 10.20546,
+            "39": 10.39187,
             "40": 10.09767,
+            "41": 10.1526,
+            "42": 10.21051,
+            "43": 9.87726,
+            "44": 9.98291,
             "45": 9.86165,
+            "46": 9.83587,
+            "47": 10.13369,
+            "48": 9.87212,
+            "49": 9.56121,
             "50": 9.91045,
+            "51": 9.85839,
+            "52": 9.7506,
+            "53": 10.05817,
+            "54": 9.96076,
             "55": 9.88738,
+            "56": 9.6344,
+            "57": 9.4967,
+            "58": 9.83343,
+            "59": 9.59391,
             "60": 9.51376,
+            "61": 9.69928,
+            "62": 9.98089,
+            "63": 9.39065,
+            "64": 9.77599,
             "65": 8.9571,
+            "66": 9.70054,
+            "67": 9.37,
+            "68": 9.78529,
+            "69": 9.78966,
             "70": 9.74676,
+            "71": 9.61906,
+            "72": 9.58963,
+            "73": 9.49629,
+            "74": 8.94963,
             "75": 9.42381,
+            "76": 9.07799,
+            "77": 10.07105,
+            "78": 9.72632,
+            "79": 9.37966,
             "80": 9.40721,
+            "81": 9.48238,
+            "82": 9.70152,
+            "83": 9.30657,
+            "84": 9.41464,
             "85": 9.61784,
+            "86": 9.08212,
+            "87": 9.59511,
+            "88": 9.75008,
+            "89": 9.60356,
             "90": 9.82256,
+            "91": 9.33721,
+            "92": 9.35861,
+            "93": 9.07956,
+            "94": 8.83268,
             "95": 9.51351,
+            "96": 9.52947,
+            "97": 9.31813,
+            "98": 9.67451,
+            "99": 8.88607,
             "100": 9.40106
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 1736.0,
+            "2": 1692.0,
+            "3": 1695.0,
+            "4": 1761.0,
             "5": 1955.0,
+            "6": 1791.0,
+            "7": 1943.0,
+            "8": 1681.0,
+            "9": 1884.0,
             "10": 1441.0,
+            "11": 1942.0,
+            "12": 1786.0,
+            "13": 1940.0,
+            "14": 1862.0,
             "15": 1907.0,
+            "16": 1947.0,
+            "17": 1827.0,
+            "18": 1907.0,
+            "19": 1818.0,
             "20": 1700.0,
+            "21": 1911.0,
+            "22": 1720.0,
+            "23": 1938.0,
+            "24": 1707.0,
             "25": 1686.0,
+            "26": 1792.0,
+            "27": 1891.0,
+            "28": 1976.0,
+            "29": 1958.0,
             "30": 1941.0,
+            "31": 1622.0,
+            "32": 1970.0,
+            "33": 2129.0,
+            "34": 1830.0,
             "35": 1907.0,
+            "36": 1892.0,
+            "37": 2395.0,
+            "38": 2161.0,
+            "39": 2493.0,
             "40": 2224.0,
+            "41": 2201.0,
+            "42": 2175.0,
+            "43": 1920.0,
+            "44": 1955.0,
             "45": 1956.0,
+            "46": 2166.0,
+            "47": 2517.0,
+            "48": 2272.0,
+            "49": 2211.0,
             "50": 2232.0,
+            "51": 2621.0,
+            "52": 2597.0,
+            "53": 2926.0,
+            "54": 2633.0,
             "55": 2206.0,
+            "56": 2627.0,
+            "57": 2328.0,
+            "58": 2886.0,
+            "59": 2639.0,
             "60": 2157.0,
+            "61": 2736.0,
+            "62": 2544.0,
+            "63": 2332.0,
+            "64": 2948.0,
             "65": 2630.0,
+            "66": 2931.0,
+            "67": 2717.0,
+            "68": 2643.0,
+            "69": 2955.0,
             "70": 3040.0,
+            "71": 2882.0,
+            "72": 2390.0,
+            "73": 2812.0,
+            "74": 1844.0,
             "75": 2461.0,
+            "76": 3067.0,
+            "77": 3152.0,
+            "78": 3018.0,
+            "79": 3008.0,
             "80": 3104.0,
+            "81": 3589.0,
+            "82": 3218.0,
+            "83": 2748.0,
+            "84": 3217.0,
             "85": 3167.0,
+            "86": 2876.0,
+            "87": 3604.0,
+            "88": 3017.0,
+            "89": 3249.0,
             "90": 3069.0,
+            "91": 2865.0,
+            "92": 3074.0,
+            "93": 2680.0,
+            "94": 3392.0,
             "95": 3206.0,
+            "96": 3401.0,
+            "97": 3107.0,
+            "98": 3624.0,
+            "99": 3007.0,
             "100": 3111.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 299203072.0,
+            "2": 299203072.0,
+            "3": 299203072.0,
+            "4": 299203072.0,
             "5": 299203072.0,
+            "6": 299203072.0,
+            "7": 299203072.0,
+            "8": 299203072.0,
+            "9": 299203072.0,
             "10": 299203072.0,
+            "11": 299203072.0,
+            "12": 299203072.0,
+            "13": 299203072.0,
+            "14": 299203072.0,
             "15": 299203072.0,
+            "16": 299203072.0,
+            "17": 299203072.0,
+            "18": 299203072.0,
+            "19": 299203072.0,
             "20": 299203072.0,
+            "21": 299203072.0,
+            "22": 299203072.0,
+            "23": 299203072.0,
+            "24": 299203072.0,
             "25": 299203072.0,
+            "26": 299203072.0,
+            "27": 299203072.0,
+            "28": 299203072.0,
+            "29": 299203072.0,
             "30": 299203072.0,
+            "31": 299203072.0,
+            "32": 299203072.0,
+            "33": 299203072.0,
+            "34": 299203072.0,
             "35": 299203072.0,
+            "36": 299203072.0,
+            "37": 299203072.0,
+            "38": 299203072.0,
+            "39": 299203072.0,
             "40": 299203072.0,
+            "41": 299203072.0,
+            "42": 299203072.0,
+            "43": 299203072.0,
+            "44": 299203072.0,
             "45": 299203072.0,
+            "46": 299203072.0,
+            "47": 299203072.0,
+            "48": 299203072.0,
+            "49": 299203072.0,
             "50": 299203072.0,
+            "51": 299203072.0,
+            "52": 299203072.0,
+            "53": 299203072.0,
+            "54": 299203072.0,
             "55": 299203072.0,
+            "56": 299203072.0,
+            "57": 299203072.0,
+            "58": 299203072.0,
+            "59": 299203072.0,
             "60": 299203072.0,
+            "61": 299203072.0,
+            "62": 299203072.0,
+            "63": 299203072.0,
+            "64": 299203072.0,
             "65": 299203072.0,
+            "66": 299203072.0,
+            "67": 299203072.0,
+            "68": 299203072.0,
+            "69": 299203072.0,
             "70": 299203072.0,
+            "71": 299203072.0,
+            "72": 299203072.0,
+            "73": 299203072.0,
+            "74": 299203072.0,
             "75": 299203072.0,
+            "76": 299203072.0,
+            "77": 299203072.0,
+            "78": 299203072.0,
+            "79": 299203072.0,
             "80": 299203072.0,
+            "81": 299203072.0,
+            "82": 299203072.0,
+            "83": 299203072.0,
+            "84": 299203072.0,
             "85": 299203072.0,
+            "86": 299203072.0,
+            "87": 299203072.0,
+            "88": 299203072.0,
+            "89": 299203072.0,
             "90": 299203072.0,
+            "91": 299203072.0,
+            "92": 299203072.0,
+            "93": 299203072.0,
+            "94": 299203072.0,
             "95": 299203072.0,
+            "96": 299203072.0,
+            "97": 299203072.0,
+            "98": 299203072.0,
+            "99": 299203072.0,
             "100": 299203072.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 977125888.0,
+            "2": 1042071040.0,
+            "3": 1042071040.0,
+            "4": 1042071040.0,
             "5": 1042071040.0,
+            "6": 1042071040.0,
+            "7": 1042071040.0,
+            "8": 1042071040.0,
+            "9": 1042071040.0,
             "10": 1042071040.0,
+            "11": 1042071040.0,
+            "12": 1042071040.0,
+            "13": 1042071040.0,
+            "14": 1042071040.0,
             "15": 1042071040.0,
+            "16": 1042071040.0,
+            "17": 1042071040.0,
+            "18": 1042071040.0,
+            "19": 1042071040.0,
             "20": 1042071040.0,
+            "21": 1042071040.0,
+            "22": 1042071040.0,
+            "23": 1042071040.0,
+            "24": 1042071040.0,
             "25": 1042071040.0,
+            "26": 1042071040.0,
+            "27": 1042071040.0,
+            "28": 1042071040.0,
+            "29": 1042071040.0,
             "30": 1042071040.0,
+            "31": 1042071040.0,
+            "32": 1042071040.0,
+            "33": 1042071040.0,
+            "34": 1042071040.0,
             "35": 1042071040.0,
+            "36": 1042071040.0,
+            "37": 1042071040.0,
+            "38": 1042071040.0,
+            "39": 1042071040.0,
             "40": 1042071040.0,
+            "41": 1042071040.0,
+            "42": 1042071040.0,
+            "43": 1042071040.0,
+            "44": 1042071040.0,
             "45": 1042071040.0,
+            "46": 1042071040.0,
+            "47": 1042071040.0,
+            "48": 1042071040.0,
+            "49": 1042071040.0,
             "50": 1042071040.0,
+            "51": 1042071040.0,
+            "52": 1042071040.0,
+            "53": 1042071040.0,
+            "54": 1042071040.0,
             "55": 1042071040.0,
+            "56": 1042071040.0,
+            "57": 1042071040.0,
+            "58": 1042071040.0,
+            "59": 1042071040.0,
             "60": 1042071040.0,
+            "61": 1042071040.0,
+            "62": 1042071040.0,
+            "63": 1042071040.0,
+            "64": 1042071040.0,
             "65": 1042071040.0,
+            "66": 1042071040.0,
+            "67": 1042071040.0,
+            "68": 1042071040.0,
+            "69": 1042071040.0,
             "70": 1042071040.0,
+            "71": 1042071040.0,
+            "72": 1042071040.0,
+            "73": 1042071040.0,
+            "74": 1042071040.0,
             "75": 1042071040.0,
+            "76": 1042071040.0,
+            "77": 1042071040.0,
+            "78": 1042071040.0,
+            "79": 1042071040.0,
             "80": 1042071040.0,
+            "81": 1042071040.0,
+            "82": 1042071040.0,
+            "83": 1042071040.0,
+            "84": 1042071040.0,
             "85": 1042071040.0,
+            "86": 1042071040.0,
+            "87": 1042071040.0,
+            "88": 1042071040.0,
+            "89": 1042071040.0,
             "90": 1042071040.0,
+            "91": 1042071040.0,
+            "92": 1042071040.0,
+            "93": 1042071040.0,
+            "94": 1042071040.0,
             "95": 1042071040.0,
+            "96": 1042071040.0,
+            "97": 1042071040.0,
+            "98": 1042071040.0,
+            "99": 1042071040.0,
             "100": 1042071040.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 8.52165,
-            "5": 0.20516,
-            "10": 0.19368,
-            "15": 0.19068,
-            "20": 0.19109,
-            "25": 0.19345,
-            "30": 0.19142,
-            "35": 0.19012,
-            "40": 0.18948,
-            "45": 0.1901,
-            "50": 0.19384,
-            "55": 0.20627,
-            "60": 0.18816,
-            "65": 0.19043,
-            "70": 0.23342,
-            "75": 0.19438,
-            "80": 0.19064,
-            "85": 0.19143,
-            "90": 0.19257,
-            "95": 0.19189,
-            "100": 0.19388
+            "1": 9.66271,
+            "2": 0.23225,
+            "3": 0.21983,
+            "4": 0.21408,
+            "5": 0.21473,
+            "6": 0.21644,
+            "7": 0.21513,
+            "8": 0.21892,
+            "9": 0.21351,
+            "10": 0.21576,
+            "11": 0.21747,
+            "12": 0.21985,
+            "13": 0.21564,
+            "14": 0.2155,
+            "15": 0.21384,
+            "16": 0.2162,
+            "17": 0.21558,
+            "18": 0.21508,
+            "19": 0.21618,
+            "20": 0.21836,
+            "21": 0.21423,
+            "22": 0.21684,
+            "23": 0.21439,
+            "24": 0.21562,
+            "25": 0.21579,
+            "26": 0.21914,
+            "27": 0.21564,
+            "28": 0.21449,
+            "29": 0.22032,
+            "30": 0.22136,
+            "31": 0.22263,
+            "32": 0.21897,
+            "33": 0.21534,
+            "34": 0.21759,
+            "35": 0.21572,
+            "36": 0.21721,
+            "37": 0.21402,
+            "38": 0.21621,
+            "39": 0.21783,
+            "40": 0.21822,
+            "41": 0.21596,
+            "42": 0.21203,
+            "43": 0.21782,
+            "44": 0.21805,
+            "45": 0.2183,
+            "46": 0.21676,
+            "47": 0.21734,
+            "48": 0.2176,
+            "49": 0.21836,
+            "50": 0.21593,
+            "51": 0.22189,
+            "52": 0.21722,
+            "53": 0.22114,
+            "54": 0.21648,
+            "55": 0.21825,
+            "56": 0.21733,
+            "57": 0.21702,
+            "58": 0.21752,
+            "59": 0.21546,
+            "60": 0.2151,
+            "61": 0.21602,
+            "62": 0.22135,
+            "63": 0.21659,
+            "64": 0.21618,
+            "65": 0.21569,
+            "66": 0.21864,
+            "67": 0.22799,
+            "68": 0.21833,
+            "69": 0.21643,
+            "70": 0.21672,
+            "71": 0.21562,
+            "72": 0.21799,
+            "73": 0.21791,
+            "74": 0.21898,
+            "75": 0.2183,
+            "76": 0.22117,
+            "77": 0.22,
+            "78": 0.2188,
+            "79": 0.21888,
+            "80": 0.21768,
+            "81": 0.22547,
+            "82": 0.2175,
+            "83": 0.2222,
+            "84": 0.21749,
+            "85": 0.22304,
+            "86": 0.22141,
+            "87": 0.22658,
+            "88": 0.21977,
+            "89": 0.21928,
+            "90": 0.21911,
+            "91": 0.22126,
+            "92": 0.21903,
+            "93": 0.22164,
+            "94": 0.21864,
+            "95": 0.21968,
+            "96": 0.21892,
+            "97": 0.21956,
+            "98": 0.21795,
+            "99": 0.22313,
+            "100": 0.2196
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..0a3544b2d93
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.84163,
+            "2": 10.85598,
+            "3": 10.84413,
+            "4": 10.84124,
+            "5": 10.85872,
+            "6": 10.86316,
+            "7": 10.85184,
+            "8": 10.84645,
+            "9": 10.85647,
+            "10": 10.81849,
+            "11": 10.85923,
+            "12": 10.84285,
+            "13": 10.86432,
+            "14": 10.85423,
+            "15": 10.81015,
+            "16": 10.81588,
+            "17": 10.78949,
+            "18": 10.79683,
+            "19": 10.79073,
+            "20": 10.70819,
+            "21": 10.69322,
+            "22": 10.58504,
+            "23": 10.70217,
+            "24": 10.60546,
+            "25": 10.57102,
+            "26": 10.61967,
+            "27": 10.61501,
+            "28": 10.56369,
+            "29": 10.56725,
+            "30": 10.39695,
+            "31": 10.16591,
+            "32": 10.4573,
+            "33": 10.45199,
+            "34": 10.2392,
+            "35": 10.28351,
+            "36": 10.24677,
+            "37": 10.3427,
+            "38": 10.20546,
+            "39": 10.39187,
+            "40": 10.09767,
+            "41": 10.1526,
+            "42": 10.21051,
+            "43": 9.87726,
+            "44": 9.98291,
+            "45": 9.86165,
+            "46": 9.83587,
+            "47": 10.13369,
+            "48": 9.87212,
+            "49": 9.56121,
+            "50": 9.91045,
+            "51": 9.85839,
+            "52": 9.7506,
+            "53": 10.05817,
+            "54": 9.96076,
+            "55": 9.88738,
+            "56": 9.6344,
+            "57": 9.4967,
+            "58": 9.83343,
+            "59": 9.59391,
+            "60": 9.51376,
+            "61": 9.69928,
+            "62": 9.98089,
+            "63": 9.39065,
+            "64": 9.77599,
+            "65": 8.9571,
+            "66": 9.70054,
+            "67": 9.37,
+            "68": 9.78529,
+            "69": 9.78966,
+            "70": 9.74676,
+            "71": 9.61906,
+            "72": 9.58963,
+            "73": 9.49629,
+            "74": 8.94963,
+            "75": 9.42381,
+            "76": 9.07799,
+            "77": 10.07105,
+            "78": 9.72632,
+            "79": 9.37966,
+            "80": 9.40721,
+            "81": 9.48238,
+            "82": 9.70152,
+            "83": 9.30657,
+            "84": 9.41464,
+            "85": 9.61784,
+            "86": 9.08212,
+            "87": 9.59511,
+            "88": 9.75008,
+            "89": 9.60356,
+            "90": 9.82256,
+            "91": 9.33721,
+            "92": 9.35861,
+            "93": 9.07956,
+            "94": 8.83268,
+            "95": 9.51351,
+            "96": 9.52947,
+            "97": 9.31813,
+            "98": 9.67451,
+            "99": 8.88607,
+            "100": 9.40106
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1736.0,
+            "2": 1692.0,
+            "3": 1695.0,
+            "4": 1761.0,
+            "5": 1955.0,
+            "6": 1791.0,
+            "7": 1943.0,
+            "8": 1681.0,
+            "9": 1884.0,
+            "10": 1441.0,
+            "11": 1942.0,
+            "12": 1786.0,
+            "13": 1940.0,
+            "14": 1862.0,
+            "15": 1907.0,
+            "16": 1947.0,
+            "17": 1827.0,
+            "18": 1907.0,
+            "19": 1818.0,
+            "20": 1700.0,
+            "21": 1911.0,
+            "22": 1720.0,
+            "23": 1938.0,
+            "24": 1707.0,
+            "25": 1686.0,
+            "26": 1792.0,
+            "27": 1891.0,
+            "28": 1976.0,
+            "29": 1958.0,
+            "30": 1941.0,
+            "31": 1622.0,
+            "32": 1970.0,
+            "33": 2129.0,
+            "34": 1830.0,
+            "35": 1907.0,
+            "36": 1892.0,
+            "37": 2395.0,
+            "38": 2161.0,
+            "39": 2493.0,
+            "40": 2224.0,
+            "41": 2201.0,
+            "42": 2175.0,
+            "43": 1920.0,
+            "44": 1955.0,
+            "45": 1956.0,
+            "46": 2166.0,
+            "47": 2517.0,
+            "48": 2272.0,
+            "49": 2211.0,
+            "50": 2232.0,
+            "51": 2621.0,
+            "52": 2597.0,
+            "53": 2926.0,
+            "54": 2633.0,
+            "55": 2206.0,
+            "56": 2627.0,
+            "57": 2328.0,
+            "58": 2886.0,
+            "59": 2639.0,
+            "60": 2157.0,
+            "61": 2736.0,
+            "62": 2544.0,
+            "63": 2332.0,
+            "64": 2948.0,
+            "65": 2630.0,
+            "66": 2931.0,
+            "67": 2717.0,
+            "68": 2643.0,
+            "69": 2955.0,
+            "70": 3040.0,
+            "71": 2882.0,
+            "72": 2390.0,
+            "73": 2812.0,
+            "74": 1844.0,
+            "75": 2461.0,
+            "76": 3067.0,
+            "77": 3152.0,
+            "78": 3018.0,
+            "79": 3008.0,
+            "80": 3104.0,
+            "81": 3589.0,
+            "82": 3218.0,
+            "83": 2748.0,
+            "84": 3217.0,
+            "85": 3167.0,
+            "86": 2876.0,
+            "87": 3604.0,
+            "88": 3017.0,
+            "89": 3249.0,
+            "90": 3069.0,
+            "91": 2865.0,
+            "92": 3074.0,
+            "93": 2680.0,
+            "94": 3392.0,
+            "95": 3206.0,
+            "96": 3401.0,
+            "97": 3107.0,
+            "98": 3624.0,
+            "99": 3007.0,
+            "100": 3111.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 299203072.0,
+            "2": 299203072.0,
+            "3": 299203072.0,
+            "4": 299203072.0,
+            "5": 299203072.0,
+            "6": 299203072.0,
+            "7": 299203072.0,
+            "8": 299203072.0,
+            "9": 299203072.0,
+            "10": 299203072.0,
+            "11": 299203072.0,
+            "12": 299203072.0,
+            "13": 299203072.0,
+            "14": 299203072.0,
+            "15": 299203072.0,
+            "16": 299203072.0,
+            "17": 299203072.0,
+            "18": 299203072.0,
+            "19": 299203072.0,
+            "20": 299203072.0,
+            "21": 299203072.0,
+            "22": 299203072.0,
+            "23": 299203072.0,
+            "24": 299203072.0,
+            "25": 299203072.0,
+            "26": 299203072.0,
+            "27": 299203072.0,
+            "28": 299203072.0,
+            "29": 299203072.0,
+            "30": 299203072.0,
+            "31": 299203072.0,
+            "32": 299203072.0,
+            "33": 299203072.0,
+            "34": 299203072.0,
+            "35": 299203072.0,
+            "36": 299203072.0,
+            "37": 299203072.0,
+            "38": 299203072.0,
+            "39": 299203072.0,
+            "40": 299203072.0,
+            "41": 299203072.0,
+            "42": 299203072.0,
+            "43": 299203072.0,
+            "44": 299203072.0,
+            "45": 299203072.0,
+            "46": 299203072.0,
+            "47": 299203072.0,
+            "48": 299203072.0,
+            "49": 299203072.0,
+            "50": 299203072.0,
+            "51": 299203072.0,
+            "52": 299203072.0,
+            "53": 299203072.0,
+            "54": 299203072.0,
+            "55": 299203072.0,
+            "56": 299203072.0,
+            "57": 299203072.0,
+            "58": 299203072.0,
+            "59": 299203072.0,
+            "60": 299203072.0,
+            "61": 299203072.0,
+            "62": 299203072.0,
+            "63": 299203072.0,
+            "64": 299203072.0,
+            "65": 299203072.0,
+            "66": 299203072.0,
+            "67": 299203072.0,
+            "68": 299203072.0,
+            "69": 299203072.0,
+            "70": 299203072.0,
+            "71": 299203072.0,
+            "72": 299203072.0,
+            "73": 299203072.0,
+            "74": 299203072.0,
+            "75": 299203072.0,
+            "76": 299203072.0,
+            "77": 299203072.0,
+            "78": 299203072.0,
+            "79": 299203072.0,
+            "80": 299203072.0,
+            "81": 299203072.0,
+            "82": 299203072.0,
+            "83": 299203072.0,
+            "84": 299203072.0,
+            "85": 299203072.0,
+            "86": 299203072.0,
+            "87": 299203072.0,
+            "88": 299203072.0,
+            "89": 299203072.0,
+            "90": 299203072.0,
+            "91": 299203072.0,
+            "92": 299203072.0,
+            "93": 299203072.0,
+            "94": 299203072.0,
+            "95": 299203072.0,
+            "96": 299203072.0,
+            "97": 299203072.0,
+            "98": 299203072.0,
+            "99": 299203072.0,
+            "100": 299203072.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 977125888.0,
+            "2": 1042071040.0,
+            "3": 1042071040.0,
+            "4": 1042071040.0,
+            "5": 1042071040.0,
+            "6": 1042071040.0,
+            "7": 1042071040.0,
+            "8": 1042071040.0,
+            "9": 1042071040.0,
+            "10": 1042071040.0,
+            "11": 1042071040.0,
+            "12": 1042071040.0,
+            "13": 1042071040.0,
+            "14": 1042071040.0,
+            "15": 1042071040.0,
+            "16": 1042071040.0,
+            "17": 1042071040.0,
+            "18": 1042071040.0,
+            "19": 1042071040.0,
+            "20": 1042071040.0,
+            "21": 1042071040.0,
+            "22": 1042071040.0,
+            "23": 1042071040.0,
+            "24": 1042071040.0,
+            "25": 1042071040.0,
+            "26": 1042071040.0,
+            "27": 1042071040.0,
+            "28": 1042071040.0,
+            "29": 1042071040.0,
+            "30": 1042071040.0,
+            "31": 1042071040.0,
+            "32": 1042071040.0,
+            "33": 1042071040.0,
+            "34": 1042071040.0,
+            "35": 1042071040.0,
+            "36": 1042071040.0,
+            "37": 1042071040.0,
+            "38": 1042071040.0,
+            "39": 1042071040.0,
+            "40": 1042071040.0,
+            "41": 1042071040.0,
+            "42": 1042071040.0,
+            "43": 1042071040.0,
+            "44": 1042071040.0,
+            "45": 1042071040.0,
+            "46": 1042071040.0,
+            "47": 1042071040.0,
+            "48": 1042071040.0,
+            "49": 1042071040.0,
+            "50": 1042071040.0,
+            "51": 1042071040.0,
+            "52": 1042071040.0,
+            "53": 1042071040.0,
+            "54": 1042071040.0,
+            "55": 1042071040.0,
+            "56": 1042071040.0,
+            "57": 1042071040.0,
+            "58": 1042071040.0,
+            "59": 1042071040.0,
+            "60": 1042071040.0,
+            "61": 1042071040.0,
+            "62": 1042071040.0,
+            "63": 1042071040.0,
+            "64": 1042071040.0,
+            "65": 1042071040.0,
+            "66": 1042071040.0,
+            "67": 1042071040.0,
+            "68": 1042071040.0,
+            "69": 1042071040.0,
+            "70": 1042071040.0,
+            "71": 1042071040.0,
+            "72": 1042071040.0,
+            "73": 1042071040.0,
+            "74": 1042071040.0,
+            "75": 1042071040.0,
+            "76": 1042071040.0,
+            "77": 1042071040.0,
+            "78": 1042071040.0,
+            "79": 1042071040.0,
+            "80": 1042071040.0,
+            "81": 1042071040.0,
+            "82": 1042071040.0,
+            "83": 1042071040.0,
+            "84": 1042071040.0,
+            "85": 1042071040.0,
+            "86": 1042071040.0,
+            "87": 1042071040.0,
+            "88": 1042071040.0,
+            "89": 1042071040.0,
+            "90": 1042071040.0,
+            "91": 1042071040.0,
+            "92": 1042071040.0,
+            "93": 1042071040.0,
+            "94": 1042071040.0,
+            "95": 1042071040.0,
+            "96": 1042071040.0,
+            "97": 1042071040.0,
+            "98": 1042071040.0,
+            "99": 1042071040.0,
+            "100": 1042071040.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 9.6125,
+            "2": 0.23356,
+            "3": 0.21314,
+            "4": 0.21148,
+            "5": 0.20775,
+            "6": 0.20509,
+            "7": 0.19583,
+            "8": 0.19566,
+            "9": 0.19148,
+            "10": 0.19484,
+            "11": 0.20705,
+            "12": 0.2015,
+            "13": 0.18887,
+            "14": 0.1904,
+            "15": 0.19036,
+            "16": 0.18983,
+            "17": 0.1895,
+            "18": 0.19146,
+            "19": 0.18958,
+            "20": 0.18946,
+            "21": 0.19061,
+            "22": 0.19252,
+            "23": 0.18928,
+            "24": 0.19105,
+            "25": 0.18924,
+            "26": 0.18957,
+            "27": 0.19008,
+            "28": 0.19134,
+            "29": 0.18909,
+            "30": 0.1922,
+            "31": 0.1908,
+            "32": 0.18951,
+            "33": 0.18928,
+            "34": 0.19468,
+            "35": 0.19052,
+            "36": 0.19049,
+            "37": 0.19173,
+            "38": 0.18825,
+            "39": 0.1911,
+            "40": 0.18942,
+            "41": 0.1919,
+            "42": 0.19303,
+            "43": 0.19325,
+            "44": 0.19049,
+            "45": 0.18935,
+            "46": 0.18861,
+            "47": 0.19155,
+            "48": 0.19149,
+            "49": 0.1913,
+            "50": 0.19586,
+            "51": 0.20004,
+            "52": 0.19367,
+            "53": 0.19138,
+            "54": 0.1927,
+            "55": 0.19196,
+            "56": 0.19084,
+            "57": 0.19081,
+            "58": 0.19132,
+            "59": 0.18829,
+            "60": 0.19212,
+            "61": 0.19275,
+            "62": 0.19577,
+            "63": 0.18781,
+            "64": 0.1893,
+            "65": 0.18899,
+            "66": 0.19016,
+            "67": 0.1858,
+            "68": 0.1931,
+            "69": 0.18841,
+            "70": 0.18896,
+            "71": 0.18966,
+            "72": 0.18842,
+            "73": 0.19129,
+            "74": 0.19147,
+            "75": 0.19408,
+            "76": 0.19017,
+            "77": 0.18501,
+            "78": 0.18992,
+            "79": 0.18844,
+            "80": 0.18811,
+            "81": 0.19097,
+            "82": 0.18879,
+            "83": 0.18908,
+            "84": 0.18763,
+            "85": 0.1877,
+            "86": 0.18953,
+            "87": 0.1893,
+            "88": 0.18802,
+            "89": 0.18961,
+            "90": 0.18878,
+            "91": 0.18927,
+            "92": 0.18915,
+            "93": 0.19047,
+            "94": 0.19,
+            "95": 0.19146,
+            "96": 0.19061,
+            "97": 0.1925,
+            "98": 0.18915,
+            "99": 0.18916,
+            "100": 0.19162
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..6937fb9bd55
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.84163,
+            "2": 10.85598,
+            "3": 10.84413,
+            "4": 10.84124,
+            "5": 10.85872,
+            "6": 10.86316,
+            "7": 10.85184,
+            "8": 10.84645,
+            "9": 10.85647,
+            "10": 10.81849,
+            "11": 10.85923,
+            "12": 10.84285,
+            "13": 10.86432,
+            "14": 10.85423,
+            "15": 10.81015,
+            "16": 10.81588,
+            "17": 10.78949,
+            "18": 10.79683,
+            "19": 10.79073,
+            "20": 10.70819,
+            "21": 10.69322,
+            "22": 10.58504,
+            "23": 10.70217,
+            "24": 10.60546,
+            "25": 10.57102,
+            "26": 10.61967,
+            "27": 10.61501,
+            "28": 10.56369,
+            "29": 10.56725,
+            "30": 10.39695,
+            "31": 10.16591,
+            "32": 10.4573,
+            "33": 10.45199,
+            "34": 10.2392,
+            "35": 10.28351,
+            "36": 10.24677,
+            "37": 10.3427,
+            "38": 10.20546,
+            "39": 10.39187,
+            "40": 10.09767,
+            "41": 10.1526,
+            "42": 10.21051,
+            "43": 9.87726,
+            "44": 9.98291,
+            "45": 9.86165,
+            "46": 9.83587,
+            "47": 10.13369,
+            "48": 9.87212,
+            "49": 9.56121,
+            "50": 9.91045,
+            "51": 9.85839,
+            "52": 9.7506,
+            "53": 10.05817,
+            "54": 9.96076,
+            "55": 9.88738,
+            "56": 9.6344,
+            "57": 9.4967,
+            "58": 9.83343,
+            "59": 9.59391,
+            "60": 9.51376,
+            "61": 9.69928,
+            "62": 9.98089,
+            "63": 9.39065,
+            "64": 9.77599,
+            "65": 8.9571,
+            "66": 9.70054,
+            "67": 9.37,
+            "68": 9.78529,
+            "69": 9.78966,
+            "70": 9.74676,
+            "71": 9.61906,
+            "72": 9.58963,
+            "73": 9.49629,
+            "74": 8.94963,
+            "75": 9.42381,
+            "76": 9.07799,
+            "77": 10.07105,
+            "78": 9.72632,
+            "79": 9.37966,
+            "80": 9.40721,
+            "81": 9.48238,
+            "82": 9.70152,
+            "83": 9.30657,
+            "84": 9.41464,
+            "85": 9.61784,
+            "86": 9.08212,
+            "87": 9.59511,
+            "88": 9.75008,
+            "89": 9.60356,
+            "90": 9.82256,
+            "91": 9.33721,
+            "92": 9.35861,
+            "93": 9.07956,
+            "94": 8.83268,
+            "95": 9.51351,
+            "96": 9.52947,
+            "97": 9.31813,
+            "98": 9.67451,
+            "99": 8.88607,
+            "100": 9.40106
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1736.0,
+            "2": 1692.0,
+            "3": 1695.0,
+            "4": 1761.0,
+            "5": 1955.0,
+            "6": 1791.0,
+            "7": 1943.0,
+            "8": 1681.0,
+            "9": 1884.0,
+            "10": 1441.0,
+            "11": 1942.0,
+            "12": 1786.0,
+            "13": 1940.0,
+            "14": 1862.0,
+            "15": 1907.0,
+            "16": 1947.0,
+            "17": 1827.0,
+            "18": 1907.0,
+            "19": 1818.0,
+            "20": 1700.0,
+            "21": 1911.0,
+            "22": 1720.0,
+            "23": 1938.0,
+            "24": 1707.0,
+            "25": 1686.0,
+            "26": 1792.0,
+            "27": 1891.0,
+            "28": 1976.0,
+            "29": 1958.0,
+            "30": 1941.0,
+            "31": 1622.0,
+            "32": 1970.0,
+            "33": 2129.0,
+            "34": 1830.0,
+            "35": 1907.0,
+            "36": 1892.0,
+            "37": 2395.0,
+            "38": 2161.0,
+            "39": 2493.0,
+            "40": 2224.0,
+            "41": 2201.0,
+            "42": 2175.0,
+            "43": 1920.0,
+            "44": 1955.0,
+            "45": 1956.0,
+            "46": 2166.0,
+            "47": 2517.0,
+            "48": 2272.0,
+            "49": 2211.0,
+            "50": 2232.0,
+            "51": 2621.0,
+            "52": 2597.0,
+            "53": 2926.0,
+            "54": 2633.0,
+            "55": 2206.0,
+            "56": 2627.0,
+            "57": 2328.0,
+            "58": 2886.0,
+            "59": 2639.0,
+            "60": 2157.0,
+            "61": 2736.0,
+            "62": 2544.0,
+            "63": 2332.0,
+            "64": 2948.0,
+            "65": 2630.0,
+            "66": 2931.0,
+            "67": 2717.0,
+            "68": 2643.0,
+            "69": 2955.0,
+            "70": 3040.0,
+            "71": 2882.0,
+            "72": 2390.0,
+            "73": 2812.0,
+            "74": 1844.0,
+            "75": 2461.0,
+            "76": 3067.0,
+            "77": 3152.0,
+            "78": 3018.0,
+            "79": 3008.0,
+            "80": 3104.0,
+            "81": 3589.0,
+            "82": 3218.0,
+            "83": 2748.0,
+            "84": 3217.0,
+            "85": 3167.0,
+            "86": 2876.0,
+            "87": 3604.0,
+            "88": 3017.0,
+            "89": 3249.0,
+            "90": 3069.0,
+            "91": 2865.0,
+            "92": 3074.0,
+            "93": 2680.0,
+            "94": 3392.0,
+            "95": 3206.0,
+            "96": 3401.0,
+            "97": 3107.0,
+            "98": 3624.0,
+            "99": 3007.0,
+            "100": 3111.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 299203072.0,
+            "2": 299203072.0,
+            "3": 299203072.0,
+            "4": 299203072.0,
+            "5": 299203072.0,
+            "6": 299203072.0,
+            "7": 299203072.0,
+            "8": 299203072.0,
+            "9": 299203072.0,
+            "10": 299203072.0,
+            "11": 299203072.0,
+            "12": 299203072.0,
+            "13": 299203072.0,
+            "14": 299203072.0,
+            "15": 299203072.0,
+            "16": 299203072.0,
+            "17": 299203072.0,
+            "18": 299203072.0,
+            "19": 299203072.0,
+            "20": 299203072.0,
+            "21": 299203072.0,
+            "22": 299203072.0,
+            "23": 299203072.0,
+            "24": 299203072.0,
+            "25": 299203072.0,
+            "26": 299203072.0,
+            "27": 299203072.0,
+            "28": 299203072.0,
+            "29": 299203072.0,
+            "30": 299203072.0,
+            "31": 299203072.0,
+            "32": 299203072.0,
+            "33": 299203072.0,
+            "34": 299203072.0,
+            "35": 299203072.0,
+            "36": 299203072.0,
+            "37": 299203072.0,
+            "38": 299203072.0,
+            "39": 299203072.0,
+            "40": 299203072.0,
+            "41": 299203072.0,
+            "42": 299203072.0,
+            "43": 299203072.0,
+            "44": 299203072.0,
+            "45": 299203072.0,
+            "46": 299203072.0,
+            "47": 299203072.0,
+            "48": 299203072.0,
+            "49": 299203072.0,
+            "50": 299203072.0,
+            "51": 299203072.0,
+            "52": 299203072.0,
+            "53": 299203072.0,
+            "54": 299203072.0,
+            "55": 299203072.0,
+            "56": 299203072.0,
+            "57": 299203072.0,
+            "58": 299203072.0,
+            "59": 299203072.0,
+            "60": 299203072.0,
+            "61": 299203072.0,
+            "62": 299203072.0,
+            "63": 299203072.0,
+            "64": 299203072.0,
+            "65": 299203072.0,
+            "66": 299203072.0,
+            "67": 299203072.0,
+            "68": 299203072.0,
+            "69": 299203072.0,
+            "70": 299203072.0,
+            "71": 299203072.0,
+            "72": 299203072.0,
+            "73": 299203072.0,
+            "74": 299203072.0,
+            "75": 299203072.0,
+            "76": 299203072.0,
+            "77": 299203072.0,
+            "78": 299203072.0,
+            "79": 299203072.0,
+            "80": 299203072.0,
+            "81": 299203072.0,
+            "82": 299203072.0,
+            "83": 299203072.0,
+            "84": 299203072.0,
+            "85": 299203072.0,
+            "86": 299203072.0,
+            "87": 299203072.0,
+            "88": 299203072.0,
+            "89": 299203072.0,
+            "90": 299203072.0,
+            "91": 299203072.0,
+            "92": 299203072.0,
+            "93": 299203072.0,
+            "94": 299203072.0,
+            "95": 299203072.0,
+            "96": 299203072.0,
+            "97": 299203072.0,
+            "98": 299203072.0,
+            "99": 299203072.0,
+            "100": 299203072.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 977125888.0,
+            "2": 1042071040.0,
+            "3": 1042071040.0,
+            "4": 1042071040.0,
+            "5": 1042071040.0,
+            "6": 1042071040.0,
+            "7": 1042071040.0,
+            "8": 1042071040.0,
+            "9": 1042071040.0,
+            "10": 1042071040.0,
+            "11": 1042071040.0,
+            "12": 1042071040.0,
+            "13": 1042071040.0,
+            "14": 1042071040.0,
+            "15": 1042071040.0,
+            "16": 1042071040.0,
+            "17": 1042071040.0,
+            "18": 1042071040.0,
+            "19": 1042071040.0,
+            "20": 1042071040.0,
+            "21": 1042071040.0,
+            "22": 1042071040.0,
+            "23": 1042071040.0,
+            "24": 1042071040.0,
+            "25": 1042071040.0,
+            "26": 1042071040.0,
+            "27": 1042071040.0,
+            "28": 1042071040.0,
+            "29": 1042071040.0,
+            "30": 1042071040.0,
+            "31": 1042071040.0,
+            "32": 1042071040.0,
+            "33": 1042071040.0,
+            "34": 1042071040.0,
+            "35": 1042071040.0,
+            "36": 1042071040.0,
+            "37": 1042071040.0,
+            "38": 1042071040.0,
+            "39": 1042071040.0,
+            "40": 1042071040.0,
+            "41": 1042071040.0,
+            "42": 1042071040.0,
+            "43": 1042071040.0,
+            "44": 1042071040.0,
+            "45": 1042071040.0,
+            "46": 1042071040.0,
+            "47": 1042071040.0,
+            "48": 1042071040.0,
+            "49": 1042071040.0,
+            "50": 1042071040.0,
+            "51": 1042071040.0,
+            "52": 1042071040.0,
+            "53": 1042071040.0,
+            "54": 1042071040.0,
+            "55": 1042071040.0,
+            "56": 1042071040.0,
+            "57": 1042071040.0,
+            "58": 1042071040.0,
+            "59": 1042071040.0,
+            "60": 1042071040.0,
+            "61": 1042071040.0,
+            "62": 1042071040.0,
+            "63": 1042071040.0,
+            "64": 1042071040.0,
+            "65": 1042071040.0,
+            "66": 1042071040.0,
+            "67": 1042071040.0,
+            "68": 1042071040.0,
+            "69": 1042071040.0,
+            "70": 1042071040.0,
+            "71": 1042071040.0,
+            "72": 1042071040.0,
+            "73": 1042071040.0,
+            "74": 1042071040.0,
+            "75": 1042071040.0,
+            "76": 1042071040.0,
+            "77": 1042071040.0,
+            "78": 1042071040.0,
+            "79": 1042071040.0,
+            "80": 1042071040.0,
+            "81": 1042071040.0,
+            "82": 1042071040.0,
+            "83": 1042071040.0,
+            "84": 1042071040.0,
+            "85": 1042071040.0,
+            "86": 1042071040.0,
+            "87": 1042071040.0,
+            "88": 1042071040.0,
+            "89": 1042071040.0,
+            "90": 1042071040.0,
+            "91": 1042071040.0,
+            "92": 1042071040.0,
+            "93": 1042071040.0,
+            "94": 1042071040.0,
+            "95": 1042071040.0,
+            "96": 1042071040.0,
+            "97": 1042071040.0,
+            "98": 1042071040.0,
+            "99": 1042071040.0,
+            "100": 1042071040.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 9.71841,
+            "2": 0.23136,
+            "3": 0.22493,
+            "4": 0.22779,
+            "5": 0.22663,
+            "6": 0.22036,
+            "7": 0.23806,
+            "8": 0.23483,
+            "9": 0.21894,
+            "10": 0.22798,
+            "11": 0.22166,
+            "12": 0.22477,
+            "13": 0.21586,
+            "14": 0.2289,
+            "15": 0.21846,
+            "16": 0.22439,
+            "17": 0.22351,
+            "18": 0.21894,
+            "19": 0.22165,
+            "20": 0.23,
+            "21": 0.21688,
+            "22": 0.21901,
+            "23": 0.21714,
+            "24": 0.2185,
+            "25": 0.21681,
+            "26": 0.21775,
+            "27": 0.21816,
+            "28": 0.21837,
+            "29": 0.21776,
+            "30": 0.21739,
+            "31": 0.21725,
+            "32": 0.21929,
+            "33": 0.2156,
+            "34": 0.21959,
+            "35": 0.21865,
+            "36": 0.21696,
+            "37": 0.21952,
+            "38": 0.21797,
+            "39": 0.21568,
+            "40": 0.21803,
+            "41": 0.21756,
+            "42": 0.21877,
+            "43": 0.21676,
+            "44": 0.21677,
+            "45": 0.21721,
+            "46": 0.22075,
+            "47": 0.21856,
+            "48": 0.21933,
+            "49": 0.21808,
+            "50": 0.21813,
+            "51": 0.22296,
+            "52": 0.22336,
+            "53": 0.21692,
+            "54": 0.21796,
+            "55": 0.21788,
+            "56": 0.22002,
+            "57": 0.21845,
+            "58": 0.21989,
+            "59": 0.21686,
+            "60": 0.22032,
+            "61": 0.22127,
+            "62": 0.21716,
+            "63": 0.21811,
+            "64": 0.21821,
+            "65": 0.22368,
+            "66": 0.22001,
+            "67": 0.21796,
+            "68": 0.21889,
+            "69": 0.22034,
+            "70": 0.2227,
+            "71": 0.2211,
+            "72": 0.2167,
+            "73": 0.21687,
+            "74": 0.22416,
+            "75": 0.22056,
+            "76": 0.22116,
+            "77": 0.21759,
+            "78": 0.21843,
+            "79": 0.22272,
+            "80": 0.21922,
+            "81": 0.2196,
+            "82": 0.22739,
+            "83": 0.22344,
+            "84": 0.21981,
+            "85": 0.22041,
+            "86": 0.22015,
+            "87": 0.21885,
+            "88": 0.2239,
+            "89": 0.22975,
+            "90": 0.23365,
+            "91": 0.22476,
+            "92": 0.22336,
+            "93": 0.21913,
+            "94": 0.22057,
+            "95": 0.21711,
+            "96": 0.21724,
+            "97": 0.22153,
+            "98": 0.21996,
+            "99": 0.21866,
+            "100": 0.21935
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
index 0733919eefd..54bb3cbea8d 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
@@ -1 +1,537 @@
-{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.86122, "5": 10.88242, "10": 10.83506, "15": 10.82738, "20": 10.72743, "25": 10.55753, "30": 10.37895, "35": 10.28321, "40": 10.08785, "45": 9.82625, "50": 9.91327, "55": 9.87788, "60": 9.50874, "65": 8.95103, "70": 9.73167, "75": 9.43681, "80": 9.41156, "85": 9.61613, "90": 9.8217, "95": 9.5191, "100": 9.40588}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1778.0, "5": 2219.0, "10": 1530.0, "15": 2125.0, "20": 1867.0, "25": 1773.0, "30": 2129.0, "35": 2169.0, "40": 2486.0, "45": 2335.0, "50": 2687.0, "55": 2652.0, "60": 2765.0, "65": 2946.0, "70": 3629.0, "75": 2702.0, "80": 3866.0, "85": 3517.0, "90": 3349.0, "95": 3530.0, "100": 3530.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 269891584.0, "5": 269891584.0, "10": 269891584.0, "15": 269891584.0, "20": 269891584.0, "25": 269891584.0, "30": 269891584.0, "35": 269891584.0, "40": 269891584.0, "45": 269891584.0, "50": 269891584.0, "55": 269891584.0, "60": 269891584.0, "65": 269891584.0, "70": 269891584.0, "75": 269891584.0, "80": 269891584.0, "85": 269891584.0, "90": 269891584.0, "95": 269891584.0, "100": 269891584.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1448634368.0, "5": 1515676672.0, "10": 1515676672.0, "15": 1515676672.0, "20": 1515676672.0, "25": 1515676672.0, "30": 1515676672.0, "35": 1515676672.0, "40": 1515676672.0, "45": 1515676672.0, "50": 1515676672.0, "55": 1515676672.0, "60": 1515676672.0, "65": 1515676672.0, "70": 1515676672.0, "75": 1515676672.0, "80": 1515676672.0, "85": 1515676672.0, "90": 1515676672.0, "95": 1515676672.0, "100": 1515676672.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 9.50422, "5": 0.32491, "10": 0.31435, "15": 0.31821, "20": 0.31516, "25": 0.31746, "30": 0.31793, "35": 0.31313, "40": 0.321, "45": 0.31588, "50": 0.31619, "55": 0.31619, "60": 0.31976, "65": 0.31872, "70": 0.31488, "75": 0.32184, "80": 0.31524, "85": 0.31903, "90": 0.31743, "95": 0.31797, "100": 0.3198}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.86122,
+            "2": 10.85774,
+            "3": 10.86039,
+            "4": 10.84813,
+            "5": 10.88242,
+            "6": 10.88645,
+            "7": 10.86227,
+            "8": 10.86932,
+            "9": 10.86444,
+            "10": 10.83506,
+            "11": 10.87765,
+            "12": 10.87384,
+            "13": 10.87945,
+            "14": 10.88919,
+            "15": 10.82738,
+            "16": 10.83105,
+            "17": 10.79888,
+            "18": 10.82441,
+            "19": 10.81363,
+            "20": 10.72743,
+            "21": 10.71638,
+            "22": 10.57153,
+            "23": 10.7269,
+            "24": 10.61223,
+            "25": 10.55753,
+            "26": 10.60603,
+            "27": 10.61792,
+            "28": 10.57695,
+            "29": 10.59633,
+            "30": 10.37895,
+            "31": 10.13125,
+            "32": 10.47822,
+            "33": 10.46894,
+            "34": 10.22715,
+            "35": 10.28321,
+            "36": 10.22751,
+            "37": 10.35397,
+            "38": 10.20483,
+            "39": 10.40755,
+            "40": 10.08785,
+            "41": 10.1591,
+            "42": 10.21601,
+            "43": 9.84821,
+            "44": 9.9651,
+            "45": 9.82625,
+            "46": 9.83468,
+            "47": 10.15337,
+            "48": 9.84529,
+            "49": 9.52926,
+            "50": 9.91327,
+            "51": 9.8517,
+            "52": 9.74686,
+            "53": 10.07204,
+            "54": 9.95738,
+            "55": 9.87788,
+            "56": 9.62943,
+            "57": 9.48988,
+            "58": 9.83265,
+            "59": 9.58831,
+            "60": 9.50874,
+            "61": 9.69495,
+            "62": 9.99373,
+            "63": 9.377,
+            "64": 9.78004,
+            "65": 8.95103,
+            "66": 9.71392,
+            "67": 9.37884,
+            "68": 9.78831,
+            "69": 9.79096,
+            "70": 9.73167,
+            "71": 9.61776,
+            "72": 9.59099,
+            "73": 9.49436,
+            "74": 8.95001,
+            "75": 9.43681,
+            "76": 9.09852,
+            "77": 10.06447,
+            "78": 9.72944,
+            "79": 9.37805,
+            "80": 9.41156,
+            "81": 9.48537,
+            "82": 9.69592,
+            "83": 9.31981,
+            "84": 9.42306,
+            "85": 9.61613,
+            "86": 9.07185,
+            "87": 9.59282,
+            "88": 9.75055,
+            "89": 9.61194,
+            "90": 9.8217,
+            "91": 9.35308,
+            "92": 9.36305,
+            "93": 9.08788,
+            "94": 8.83439,
+            "95": 9.5191,
+            "96": 9.52647,
+            "97": 9.31412,
+            "98": 9.67541,
+            "99": 8.88941,
+            "100": 9.40588
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1778.0,
+            "2": 1875.0,
+            "3": 1879.0,
+            "4": 1912.0,
+            "5": 2219.0,
+            "6": 2163.0,
+            "7": 2113.0,
+            "8": 1747.0,
+            "9": 2049.0,
+            "10": 1530.0,
+            "11": 2113.0,
+            "12": 1959.0,
+            "13": 2134.0,
+            "14": 2055.0,
+            "15": 2125.0,
+            "16": 2139.0,
+            "17": 1988.0,
+            "18": 1892.0,
+            "19": 1991.0,
+            "20": 1867.0,
+            "21": 2023.0,
+            "22": 1865.0,
+            "23": 2185.0,
+            "24": 1774.0,
+            "25": 1773.0,
+            "26": 1990.0,
+            "27": 2061.0,
+            "28": 2215.0,
+            "29": 2186.0,
+            "30": 2129.0,
+            "31": 1794.0,
+            "32": 2109.0,
+            "33": 2422.0,
+            "34": 2135.0,
+            "35": 2169.0,
+            "36": 2127.0,
+            "37": 2432.0,
+            "38": 2490.0,
+            "39": 2495.0,
+            "40": 2486.0,
+            "41": 2465.0,
+            "42": 2535.0,
+            "43": 2216.0,
+            "44": 2407.0,
+            "45": 2335.0,
+            "46": 2617.0,
+            "47": 2830.0,
+            "48": 2480.0,
+            "49": 2492.0,
+            "50": 2687.0,
+            "51": 2863.0,
+            "52": 2881.0,
+            "53": 3220.0,
+            "54": 2894.0,
+            "55": 2652.0,
+            "56": 3006.0,
+            "57": 2561.0,
+            "58": 3273.0,
+            "59": 3039.0,
+            "60": 2765.0,
+            "61": 3310.0,
+            "62": 2936.0,
+            "63": 2630.0,
+            "64": 3230.0,
+            "65": 2946.0,
+            "66": 3500.0,
+            "67": 2976.0,
+            "68": 2944.0,
+            "69": 3117.0,
+            "70": 3629.0,
+            "71": 3255.0,
+            "72": 2633.0,
+            "73": 3338.0,
+            "74": 2172.0,
+            "75": 2702.0,
+            "76": 3162.0,
+            "77": 3850.0,
+            "78": 3590.0,
+            "79": 3658.0,
+            "80": 3866.0,
+            "81": 3976.0,
+            "82": 3680.0,
+            "83": 3153.0,
+            "84": 3586.0,
+            "85": 3517.0,
+            "86": 3137.0,
+            "87": 4177.0,
+            "88": 3589.0,
+            "89": 3849.0,
+            "90": 3349.0,
+            "91": 2936.0,
+            "92": 3526.0,
+            "93": 2965.0,
+            "94": 3772.0,
+            "95": 3530.0,
+            "96": 3774.0,
+            "97": 3636.0,
+            "98": 4064.0,
+            "99": 3394.0,
+            "100": 3530.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 269891584.0,
+            "2": 269891584.0,
+            "3": 269891584.0,
+            "4": 269891584.0,
+            "5": 269891584.0,
+            "6": 269891584.0,
+            "7": 269891584.0,
+            "8": 269891584.0,
+            "9": 269891584.0,
+            "10": 269891584.0,
+            "11": 269891584.0,
+            "12": 269891584.0,
+            "13": 269891584.0,
+            "14": 269891584.0,
+            "15": 269891584.0,
+            "16": 269891584.0,
+            "17": 269891584.0,
+            "18": 269891584.0,
+            "19": 269891584.0,
+            "20": 269891584.0,
+            "21": 269891584.0,
+            "22": 269891584.0,
+            "23": 269891584.0,
+            "24": 269891584.0,
+            "25": 269891584.0,
+            "26": 269891584.0,
+            "27": 269891584.0,
+            "28": 269891584.0,
+            "29": 269891584.0,
+            "30": 269891584.0,
+            "31": 269891584.0,
+            "32": 269891584.0,
+            "33": 269891584.0,
+            "34": 269891584.0,
+            "35": 269891584.0,
+            "36": 269891584.0,
+            "37": 269891584.0,
+            "38": 269891584.0,
+            "39": 269891584.0,
+            "40": 269891584.0,
+            "41": 269891584.0,
+            "42": 269891584.0,
+            "43": 269891584.0,
+            "44": 269891584.0,
+            "45": 269891584.0,
+            "46": 269891584.0,
+            "47": 269891584.0,
+            "48": 269891584.0,
+            "49": 269891584.0,
+            "50": 269891584.0,
+            "51": 269891584.0,
+            "52": 269891584.0,
+            "53": 269891584.0,
+            "54": 269891584.0,
+            "55": 269891584.0,
+            "56": 269891584.0,
+            "57": 269891584.0,
+            "58": 269891584.0,
+            "59": 269891584.0,
+            "60": 269891584.0,
+            "61": 269891584.0,
+            "62": 269891584.0,
+            "63": 269891584.0,
+            "64": 269891584.0,
+            "65": 269891584.0,
+            "66": 269891584.0,
+            "67": 269891584.0,
+            "68": 269891584.0,
+            "69": 269891584.0,
+            "70": 269891584.0,
+            "71": 269891584.0,
+            "72": 269891584.0,
+            "73": 269891584.0,
+            "74": 269891584.0,
+            "75": 269891584.0,
+            "76": 269891584.0,
+            "77": 269891584.0,
+            "78": 269891584.0,
+            "79": 269891584.0,
+            "80": 269891584.0,
+            "81": 269891584.0,
+            "82": 269891584.0,
+            "83": 269891584.0,
+            "84": 269891584.0,
+            "85": 269891584.0,
+            "86": 269891584.0,
+            "87": 269891584.0,
+            "88": 269891584.0,
+            "89": 269891584.0,
+            "90": 269891584.0,
+            "91": 269891584.0,
+            "92": 269891584.0,
+            "93": 269891584.0,
+            "94": 269891584.0,
+            "95": 269891584.0,
+            "96": 269891584.0,
+            "97": 269891584.0,
+            "98": 269891584.0,
+            "99": 269891584.0,
+            "100": 269891584.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1448634368.0,
+            "2": 1515676672.0,
+            "3": 1515676672.0,
+            "4": 1515676672.0,
+            "5": 1515676672.0,
+            "6": 1515676672.0,
+            "7": 1515676672.0,
+            "8": 1515676672.0,
+            "9": 1515676672.0,
+            "10": 1515676672.0,
+            "11": 1515676672.0,
+            "12": 1515676672.0,
+            "13": 1515676672.0,
+            "14": 1515676672.0,
+            "15": 1515676672.0,
+            "16": 1515676672.0,
+            "17": 1515676672.0,
+            "18": 1515676672.0,
+            "19": 1515676672.0,
+            "20": 1515676672.0,
+            "21": 1515676672.0,
+            "22": 1515676672.0,
+            "23": 1515676672.0,
+            "24": 1515676672.0,
+            "25": 1515676672.0,
+            "26": 1515676672.0,
+            "27": 1515676672.0,
+            "28": 1515676672.0,
+            "29": 1515676672.0,
+            "30": 1515676672.0,
+            "31": 1515676672.0,
+            "32": 1515676672.0,
+            "33": 1515676672.0,
+            "34": 1515676672.0,
+            "35": 1515676672.0,
+            "36": 1515676672.0,
+            "37": 1515676672.0,
+            "38": 1515676672.0,
+            "39": 1515676672.0,
+            "40": 1515676672.0,
+            "41": 1515676672.0,
+            "42": 1515676672.0,
+            "43": 1515676672.0,
+            "44": 1515676672.0,
+            "45": 1515676672.0,
+            "46": 1515676672.0,
+            "47": 1515676672.0,
+            "48": 1515676672.0,
+            "49": 1515676672.0,
+            "50": 1515676672.0,
+            "51": 1515676672.0,
+            "52": 1515676672.0,
+            "53": 1515676672.0,
+            "54": 1515676672.0,
+            "55": 1515676672.0,
+            "56": 1515676672.0,
+            "57": 1515676672.0,
+            "58": 1515676672.0,
+            "59": 1515676672.0,
+            "60": 1515676672.0,
+            "61": 1515676672.0,
+            "62": 1515676672.0,
+            "63": 1515676672.0,
+            "64": 1515676672.0,
+            "65": 1515676672.0,
+            "66": 1515676672.0,
+            "67": 1515676672.0,
+            "68": 1515676672.0,
+            "69": 1515676672.0,
+            "70": 1515676672.0,
+            "71": 1515676672.0,
+            "72": 1515676672.0,
+            "73": 1515676672.0,
+            "74": 1515676672.0,
+            "75": 1515676672.0,
+            "76": 1515676672.0,
+            "77": 1515676672.0,
+            "78": 1515676672.0,
+            "79": 1515676672.0,
+            "80": 1515676672.0,
+            "81": 1515676672.0,
+            "82": 1515676672.0,
+            "83": 1515676672.0,
+            "84": 1515676672.0,
+            "85": 1515676672.0,
+            "86": 1515676672.0,
+            "87": 1515676672.0,
+            "88": 1515676672.0,
+            "89": 1515676672.0,
+            "90": 1515676672.0,
+            "91": 1515676672.0,
+            "92": 1515676672.0,
+            "93": 1515676672.0,
+            "94": 1515676672.0,
+            "95": 1515676672.0,
+            "96": 1515676672.0,
+            "97": 1515676672.0,
+            "98": 1515676672.0,
+            "99": 1515676672.0,
+            "100": 1515676672.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.29271,
+            "2": 0.42506,
+            "3": 0.68343,
+            "4": 0.36852,
+            "5": 0.35945,
+            "6": 0.70082,
+            "7": 0.36184,
+            "8": 0.36666,
+            "9": 0.36956,
+            "10": 0.36948,
+            "11": 0.34035,
+            "12": 0.33106,
+            "13": 0.32678,
+            "14": 0.50153,
+            "15": 0.32624,
+            "16": 0.32544,
+            "17": 0.33191,
+            "18": 0.32618,
+            "19": 0.3263,
+            "20": 0.33069,
+            "21": 0.32595,
+            "22": 0.3257,
+            "23": 0.33264,
+            "24": 0.32517,
+            "25": 0.32475,
+            "26": 0.33346,
+            "27": 0.33354,
+            "28": 0.32383,
+            "29": 0.33025,
+            "30": 0.32292,
+            "31": 0.32259,
+            "32": 0.33133,
+            "33": 0.32233,
+            "34": 0.32205,
+            "35": 0.32577,
+            "36": 0.33027,
+            "37": 0.32369,
+            "38": 0.3231,
+            "39": 0.32941,
+            "40": 0.32272,
+            "41": 0.32419,
+            "42": 0.32862,
+            "43": 0.32341,
+            "44": 0.32437,
+            "45": 0.3291,
+            "46": 0.32245,
+            "47": 0.32412,
+            "48": 0.32928,
+            "49": 0.32252,
+            "50": 0.3232,
+            "51": 0.3288,
+            "52": 0.32267,
+            "53": 0.32323,
+            "54": 0.33682,
+            "55": 0.32632,
+            "56": 0.32697,
+            "57": 0.33895,
+            "58": 0.32618,
+            "59": 0.32589,
+            "60": 0.3322,
+            "61": 0.3251,
+            "62": 0.32521,
+            "63": 0.33036,
+            "64": 0.32444,
+            "65": 0.32508,
+            "66": 0.33114,
+            "67": 0.32315,
+            "68": 0.32508,
+            "69": 0.3303,
+            "70": 0.32701,
+            "71": 0.32493,
+            "72": 0.32932,
+            "73": 0.32763,
+            "74": 0.32474,
+            "75": 0.32636,
+            "76": 0.33103,
+            "77": 0.32433,
+            "78": 0.32583,
+            "79": 0.33332,
+            "80": 0.32445,
+            "81": 0.32512,
+            "82": 0.33846,
+            "83": 0.32647,
+            "84": 0.32584,
+            "85": 0.33063,
+            "86": 0.32531,
+            "87": 0.32597,
+            "88": 0.33536,
+            "89": 0.32529,
+            "90": 0.32619,
+            "91": 0.33191,
+            "92": 0.32549,
+            "93": 0.32565,
+            "94": 0.33549,
+            "95": 0.32239,
+            "96": 0.32249,
+            "97": 0.32967,
+            "98": 0.3225,
+            "99": 0.32206,
+            "100": 0.32856
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
new file mode 100644
index 00000000000..fbfe9099b9a
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.86122,
+            "2": 10.85774,
+            "3": 10.86039,
+            "4": 10.84813,
+            "5": 10.88242,
+            "6": 10.88645,
+            "7": 10.86227,
+            "8": 10.86932,
+            "9": 10.86444,
+            "10": 10.83506,
+            "11": 10.87765,
+            "12": 10.87384,
+            "13": 10.87945,
+            "14": 10.88919,
+            "15": 10.82738,
+            "16": 10.83105,
+            "17": 10.79888,
+            "18": 10.82441,
+            "19": 10.81363,
+            "20": 10.72743,
+            "21": 10.71638,
+            "22": 10.57153,
+            "23": 10.7269,
+            "24": 10.61223,
+            "25": 10.55753,
+            "26": 10.60603,
+            "27": 10.61792,
+            "28": 10.57695,
+            "29": 10.59633,
+            "30": 10.37895,
+            "31": 10.13125,
+            "32": 10.47822,
+            "33": 10.46894,
+            "34": 10.22715,
+            "35": 10.28321,
+            "36": 10.22751,
+            "37": 10.35397,
+            "38": 10.20483,
+            "39": 10.40755,
+            "40": 10.08785,
+            "41": 10.1591,
+            "42": 10.21601,
+            "43": 9.84821,
+            "44": 9.9651,
+            "45": 9.82625,
+            "46": 9.83468,
+            "47": 10.15337,
+            "48": 9.84529,
+            "49": 9.52926,
+            "50": 9.91327,
+            "51": 9.8517,
+            "52": 9.74686,
+            "53": 10.07204,
+            "54": 9.95738,
+            "55": 9.87788,
+            "56": 9.62943,
+            "57": 9.48988,
+            "58": 9.83265,
+            "59": 9.58831,
+            "60": 9.50874,
+            "61": 9.69495,
+            "62": 9.99373,
+            "63": 9.377,
+            "64": 9.78004,
+            "65": 8.95103,
+            "66": 9.71392,
+            "67": 9.37884,
+            "68": 9.78831,
+            "69": 9.79096,
+            "70": 9.73167,
+            "71": 9.61776,
+            "72": 9.59099,
+            "73": 9.49436,
+            "74": 8.95001,
+            "75": 9.43681,
+            "76": 9.09852,
+            "77": 10.06447,
+            "78": 9.72944,
+            "79": 9.37805,
+            "80": 9.41156,
+            "81": 9.48537,
+            "82": 9.69592,
+            "83": 9.31981,
+            "84": 9.42306,
+            "85": 9.61613,
+            "86": 9.07185,
+            "87": 9.59282,
+            "88": 9.75055,
+            "89": 9.61194,
+            "90": 9.8217,
+            "91": 9.35308,
+            "92": 9.36305,
+            "93": 9.08788,
+            "94": 8.83439,
+            "95": 9.5191,
+            "96": 9.52647,
+            "97": 9.31412,
+            "98": 9.67541,
+            "99": 8.88941,
+            "100": 9.40588
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1778.0,
+            "2": 1875.0,
+            "3": 1879.0,
+            "4": 1912.0,
+            "5": 2219.0,
+            "6": 2163.0,
+            "7": 2113.0,
+            "8": 1747.0,
+            "9": 2049.0,
+            "10": 1530.0,
+            "11": 2113.0,
+            "12": 1959.0,
+            "13": 2134.0,
+            "14": 2055.0,
+            "15": 2125.0,
+            "16": 2139.0,
+            "17": 1988.0,
+            "18": 1892.0,
+            "19": 1991.0,
+            "20": 1867.0,
+            "21": 2023.0,
+            "22": 1865.0,
+            "23": 2185.0,
+            "24": 1774.0,
+            "25": 1773.0,
+            "26": 1990.0,
+            "27": 2061.0,
+            "28": 2215.0,
+            "29": 2186.0,
+            "30": 2129.0,
+            "31": 1794.0,
+            "32": 2109.0,
+            "33": 2422.0,
+            "34": 2135.0,
+            "35": 2169.0,
+            "36": 2127.0,
+            "37": 2432.0,
+            "38": 2490.0,
+            "39": 2495.0,
+            "40": 2486.0,
+            "41": 2465.0,
+            "42": 2535.0,
+            "43": 2216.0,
+            "44": 2407.0,
+            "45": 2335.0,
+            "46": 2617.0,
+            "47": 2830.0,
+            "48": 2480.0,
+            "49": 2492.0,
+            "50": 2687.0,
+            "51": 2863.0,
+            "52": 2881.0,
+            "53": 3220.0,
+            "54": 2894.0,
+            "55": 2652.0,
+            "56": 3006.0,
+            "57": 2561.0,
+            "58": 3273.0,
+            "59": 3039.0,
+            "60": 2765.0,
+            "61": 3310.0,
+            "62": 2936.0,
+            "63": 2630.0,
+            "64": 3230.0,
+            "65": 2946.0,
+            "66": 3500.0,
+            "67": 2976.0,
+            "68": 2944.0,
+            "69": 3117.0,
+            "70": 3629.0,
+            "71": 3255.0,
+            "72": 2633.0,
+            "73": 3338.0,
+            "74": 2172.0,
+            "75": 2702.0,
+            "76": 3162.0,
+            "77": 3850.0,
+            "78": 3590.0,
+            "79": 3658.0,
+            "80": 3866.0,
+            "81": 3976.0,
+            "82": 3680.0,
+            "83": 3153.0,
+            "84": 3586.0,
+            "85": 3517.0,
+            "86": 3137.0,
+            "87": 4177.0,
+            "88": 3589.0,
+            "89": 3849.0,
+            "90": 3349.0,
+            "91": 2936.0,
+            "92": 3526.0,
+            "93": 2965.0,
+            "94": 3772.0,
+            "95": 3530.0,
+            "96": 3774.0,
+            "97": 3636.0,
+            "98": 4064.0,
+            "99": 3394.0,
+            "100": 3530.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 269891584.0,
+            "2": 269891584.0,
+            "3": 269891584.0,
+            "4": 269891584.0,
+            "5": 269891584.0,
+            "6": 269891584.0,
+            "7": 269891584.0,
+            "8": 269891584.0,
+            "9": 269891584.0,
+            "10": 269891584.0,
+            "11": 269891584.0,
+            "12": 269891584.0,
+            "13": 269891584.0,
+            "14": 269891584.0,
+            "15": 269891584.0,
+            "16": 269891584.0,
+            "17": 269891584.0,
+            "18": 269891584.0,
+            "19": 269891584.0,
+            "20": 269891584.0,
+            "21": 269891584.0,
+            "22": 269891584.0,
+            "23": 269891584.0,
+            "24": 269891584.0,
+            "25": 269891584.0,
+            "26": 269891584.0,
+            "27": 269891584.0,
+            "28": 269891584.0,
+            "29": 269891584.0,
+            "30": 269891584.0,
+            "31": 269891584.0,
+            "32": 269891584.0,
+            "33": 269891584.0,
+            "34": 269891584.0,
+            "35": 269891584.0,
+            "36": 269891584.0,
+            "37": 269891584.0,
+            "38": 269891584.0,
+            "39": 269891584.0,
+            "40": 269891584.0,
+            "41": 269891584.0,
+            "42": 269891584.0,
+            "43": 269891584.0,
+            "44": 269891584.0,
+            "45": 269891584.0,
+            "46": 269891584.0,
+            "47": 269891584.0,
+            "48": 269891584.0,
+            "49": 269891584.0,
+            "50": 269891584.0,
+            "51": 269891584.0,
+            "52": 269891584.0,
+            "53": 269891584.0,
+            "54": 269891584.0,
+            "55": 269891584.0,
+            "56": 269891584.0,
+            "57": 269891584.0,
+            "58": 269891584.0,
+            "59": 269891584.0,
+            "60": 269891584.0,
+            "61": 269891584.0,
+            "62": 269891584.0,
+            "63": 269891584.0,
+            "64": 269891584.0,
+            "65": 269891584.0,
+            "66": 269891584.0,
+            "67": 269891584.0,
+            "68": 269891584.0,
+            "69": 269891584.0,
+            "70": 269891584.0,
+            "71": 269891584.0,
+            "72": 269891584.0,
+            "73": 269891584.0,
+            "74": 269891584.0,
+            "75": 269891584.0,
+            "76": 269891584.0,
+            "77": 269891584.0,
+            "78": 269891584.0,
+            "79": 269891584.0,
+            "80": 269891584.0,
+            "81": 269891584.0,
+            "82": 269891584.0,
+            "83": 269891584.0,
+            "84": 269891584.0,
+            "85": 269891584.0,
+            "86": 269891584.0,
+            "87": 269891584.0,
+            "88": 269891584.0,
+            "89": 269891584.0,
+            "90": 269891584.0,
+            "91": 269891584.0,
+            "92": 269891584.0,
+            "93": 269891584.0,
+            "94": 269891584.0,
+            "95": 269891584.0,
+            "96": 269891584.0,
+            "97": 269891584.0,
+            "98": 269891584.0,
+            "99": 269891584.0,
+            "100": 269891584.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1449682432.0,
+            "2": 1515676160.0,
+            "3": 1515676672.0,
+            "4": 1515676672.0,
+            "5": 1515676672.0,
+            "6": 1515676672.0,
+            "7": 1515676672.0,
+            "8": 1515676672.0,
+            "9": 1515676672.0,
+            "10": 1515676672.0,
+            "11": 1515676672.0,
+            "12": 1515676672.0,
+            "13": 1515676672.0,
+            "14": 1515676672.0,
+            "15": 1515676672.0,
+            "16": 1515676672.0,
+            "17": 1515676672.0,
+            "18": 1515676672.0,
+            "19": 1515676672.0,
+            "20": 1515676672.0,
+            "21": 1515676672.0,
+            "22": 1515676672.0,
+            "23": 1515676672.0,
+            "24": 1515676672.0,
+            "25": 1515676672.0,
+            "26": 1515676672.0,
+            "27": 1515676672.0,
+            "28": 1515676672.0,
+            "29": 1515676672.0,
+            "30": 1515676672.0,
+            "31": 1515676672.0,
+            "32": 1515676672.0,
+            "33": 1515676672.0,
+            "34": 1515676672.0,
+            "35": 1515676672.0,
+            "36": 1515676672.0,
+            "37": 1515676672.0,
+            "38": 1515676672.0,
+            "39": 1515676672.0,
+            "40": 1515676672.0,
+            "41": 1515676672.0,
+            "42": 1515676672.0,
+            "43": 1515676672.0,
+            "44": 1515676672.0,
+            "45": 1515676672.0,
+            "46": 1515676672.0,
+            "47": 1515676672.0,
+            "48": 1515676672.0,
+            "49": 1515676672.0,
+            "50": 1515676672.0,
+            "51": 1515676672.0,
+            "52": 1515676672.0,
+            "53": 1515676672.0,
+            "54": 1515676672.0,
+            "55": 1515676672.0,
+            "56": 1515676672.0,
+            "57": 1515676672.0,
+            "58": 1515676672.0,
+            "59": 1515676672.0,
+            "60": 1515676672.0,
+            "61": 1515676672.0,
+            "62": 1515676672.0,
+            "63": 1515676672.0,
+            "64": 1515676672.0,
+            "65": 1515676672.0,
+            "66": 1515676672.0,
+            "67": 1515676672.0,
+            "68": 1515676672.0,
+            "69": 1515676672.0,
+            "70": 1515676672.0,
+            "71": 1515676672.0,
+            "72": 1515676672.0,
+            "73": 1515676672.0,
+            "74": 1515676672.0,
+            "75": 1515676672.0,
+            "76": 1515676672.0,
+            "77": 1515676672.0,
+            "78": 1515676672.0,
+            "79": 1515676672.0,
+            "80": 1515676672.0,
+            "81": 1515676672.0,
+            "82": 1515676672.0,
+            "83": 1515676672.0,
+            "84": 1515676672.0,
+            "85": 1515676672.0,
+            "86": 1515676672.0,
+            "87": 1515676672.0,
+            "88": 1515676672.0,
+            "89": 1515676672.0,
+            "90": 1515676672.0,
+            "91": 1515676672.0,
+            "92": 1515676672.0,
+            "93": 1515676672.0,
+            "94": 1515676672.0,
+            "95": 1515676672.0,
+            "96": 1515676672.0,
+            "97": 1515676672.0,
+            "98": 1515676672.0,
+            "99": 1515676672.0,
+            "100": 1515676672.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 11.32987,
+            "2": 0.44802,
+            "3": 0.3897,
+            "4": 0.56459,
+            "5": 0.32806,
+            "6": 0.32604,
+            "7": 0.3324,
+            "8": 0.32545,
+            "9": 0.32671,
+            "10": 0.32918,
+            "11": 0.32556,
+            "12": 0.32448,
+            "13": 0.33048,
+            "14": 0.32558,
+            "15": 0.32571,
+            "16": 0.32541,
+            "17": 0.32955,
+            "18": 0.32389,
+            "19": 0.32497,
+            "20": 0.32764,
+            "21": 0.32394,
+            "22": 0.32563,
+            "23": 0.32657,
+            "24": 0.32266,
+            "25": 0.32254,
+            "26": 0.3268,
+            "27": 0.32163,
+            "28": 0.32398,
+            "29": 0.32473,
+            "30": 0.32185,
+            "31": 0.32189,
+            "32": 0.32643,
+            "33": 0.32083,
+            "34": 0.56155,
+            "35": 0.31927,
+            "36": 0.31993,
+            "37": 0.32102,
+            "38": 0.32424,
+            "39": 0.31933,
+            "40": 0.32056,
+            "41": 0.32393,
+            "42": 0.31935,
+            "43": 0.32004,
+            "44": 0.32411,
+            "45": 0.31946,
+            "46": 0.32014,
+            "47": 0.32328,
+            "48": 0.32028,
+            "49": 0.32003,
+            "50": 0.32557,
+            "51": 0.32445,
+            "52": 0.31875,
+            "53": 0.32179,
+            "54": 0.31879,
+            "55": 0.31778,
+            "56": 0.32208,
+            "57": 0.32308,
+            "58": 0.34278,
+            "59": 0.321,
+            "60": 0.32449,
+            "61": 0.31868,
+            "62": 0.31968,
+            "63": 0.323,
+            "64": 0.31977,
+            "65": 0.3202,
+            "66": 0.32473,
+            "67": 0.3176,
+            "68": 0.32003,
+            "69": 0.32585,
+            "70": 0.31796,
+            "71": 0.32004,
+            "72": 0.32637,
+            "73": 0.31882,
+            "74": 0.31909,
+            "75": 0.32558,
+            "76": 0.31782,
+            "77": 0.31875,
+            "78": 0.3264,
+            "79": 0.31815,
+            "80": 0.32078,
+            "81": 0.32153,
+            "82": 0.31967,
+            "83": 0.31863,
+            "84": 0.32086,
+            "85": 0.3241,
+            "86": 0.31836,
+            "87": 0.31939,
+            "88": 0.32513,
+            "89": 0.31892,
+            "90": 0.31985,
+            "91": 0.32655,
+            "92": 0.31914,
+            "93": 0.32019,
+            "94": 0.3246,
+            "95": 0.31888,
+            "96": 0.31924,
+            "97": 0.32612,
+            "98": 0.35151,
+            "99": 0.32636,
+            "100": 0.32793
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
new file mode 100644
index 00000000000..9480fee796c
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.86122,
+            "2": 10.85774,
+            "3": 10.86039,
+            "4": 10.84813,
+            "5": 10.88242,
+            "6": 10.88645,
+            "7": 10.86227,
+            "8": 10.86932,
+            "9": 10.86444,
+            "10": 10.83506,
+            "11": 10.87765,
+            "12": 10.87384,
+            "13": 10.87945,
+            "14": 10.88919,
+            "15": 10.82738,
+            "16": 10.83105,
+            "17": 10.79888,
+            "18": 10.82441,
+            "19": 10.81363,
+            "20": 10.72743,
+            "21": 10.71638,
+            "22": 10.57153,
+            "23": 10.7269,
+            "24": 10.61223,
+            "25": 10.55753,
+            "26": 10.60603,
+            "27": 10.61792,
+            "28": 10.57695,
+            "29": 10.59633,
+            "30": 10.37895,
+            "31": 10.13125,
+            "32": 10.47822,
+            "33": 10.46894,
+            "34": 10.22715,
+            "35": 10.28321,
+            "36": 10.22751,
+            "37": 10.35397,
+            "38": 10.20483,
+            "39": 10.40755,
+            "40": 10.08785,
+            "41": 10.1591,
+            "42": 10.21601,
+            "43": 9.84821,
+            "44": 9.9651,
+            "45": 9.82625,
+            "46": 9.83468,
+            "47": 10.15337,
+            "48": 9.84529,
+            "49": 9.52926,
+            "50": 9.91327,
+            "51": 9.8517,
+            "52": 9.74686,
+            "53": 10.07204,
+            "54": 9.95738,
+            "55": 9.87788,
+            "56": 9.62943,
+            "57": 9.48988,
+            "58": 9.83265,
+            "59": 9.58831,
+            "60": 9.50874,
+            "61": 9.69495,
+            "62": 9.99373,
+            "63": 9.377,
+            "64": 9.78004,
+            "65": 8.95103,
+            "66": 9.71392,
+            "67": 9.37884,
+            "68": 9.78831,
+            "69": 9.79096,
+            "70": 9.73167,
+            "71": 9.61776,
+            "72": 9.59099,
+            "73": 9.49436,
+            "74": 8.95001,
+            "75": 9.43681,
+            "76": 9.09852,
+            "77": 10.06447,
+            "78": 9.72944,
+            "79": 9.37805,
+            "80": 9.41156,
+            "81": 9.48537,
+            "82": 9.69592,
+            "83": 9.31981,
+            "84": 9.42306,
+            "85": 9.61613,
+            "86": 9.07185,
+            "87": 9.59282,
+            "88": 9.75055,
+            "89": 9.61194,
+            "90": 9.8217,
+            "91": 9.35308,
+            "92": 9.36305,
+            "93": 9.08788,
+            "94": 8.83439,
+            "95": 9.5191,
+            "96": 9.52647,
+            "97": 9.31412,
+            "98": 9.67541,
+            "99": 8.88941,
+            "100": 9.40588
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1778.0,
+            "2": 1875.0,
+            "3": 1879.0,
+            "4": 1912.0,
+            "5": 2219.0,
+            "6": 2163.0,
+            "7": 2113.0,
+            "8": 1747.0,
+            "9": 2049.0,
+            "10": 1530.0,
+            "11": 2113.0,
+            "12": 1959.0,
+            "13": 2134.0,
+            "14": 2055.0,
+            "15": 2125.0,
+            "16": 2139.0,
+            "17": 1988.0,
+            "18": 1892.0,
+            "19": 1991.0,
+            "20": 1867.0,
+            "21": 2023.0,
+            "22": 1865.0,
+            "23": 2185.0,
+            "24": 1774.0,
+            "25": 1773.0,
+            "26": 1990.0,
+            "27": 2061.0,
+            "28": 2215.0,
+            "29": 2186.0,
+            "30": 2129.0,
+            "31": 1794.0,
+            "32": 2109.0,
+            "33": 2422.0,
+            "34": 2135.0,
+            "35": 2169.0,
+            "36": 2127.0,
+            "37": 2432.0,
+            "38": 2490.0,
+            "39": 2495.0,
+            "40": 2486.0,
+            "41": 2465.0,
+            "42": 2535.0,
+            "43": 2216.0,
+            "44": 2407.0,
+            "45": 2335.0,
+            "46": 2617.0,
+            "47": 2830.0,
+            "48": 2480.0,
+            "49": 2492.0,
+            "50": 2687.0,
+            "51": 2863.0,
+            "52": 2881.0,
+            "53": 3220.0,
+            "54": 2894.0,
+            "55": 2652.0,
+            "56": 3006.0,
+            "57": 2561.0,
+            "58": 3273.0,
+            "59": 3039.0,
+            "60": 2765.0,
+            "61": 3310.0,
+            "62": 2936.0,
+            "63": 2630.0,
+            "64": 3230.0,
+            "65": 2946.0,
+            "66": 3500.0,
+            "67": 2976.0,
+            "68": 2944.0,
+            "69": 3117.0,
+            "70": 3629.0,
+            "71": 3255.0,
+            "72": 2633.0,
+            "73": 3338.0,
+            "74": 2172.0,
+            "75": 2702.0,
+            "76": 3162.0,
+            "77": 3850.0,
+            "78": 3590.0,
+            "79": 3658.0,
+            "80": 3866.0,
+            "81": 3976.0,
+            "82": 3680.0,
+            "83": 3153.0,
+            "84": 3586.0,
+            "85": 3517.0,
+            "86": 3137.0,
+            "87": 4177.0,
+            "88": 3589.0,
+            "89": 3849.0,
+            "90": 3349.0,
+            "91": 2936.0,
+            "92": 3526.0,
+            "93": 2965.0,
+            "94": 3772.0,
+            "95": 3530.0,
+            "96": 3774.0,
+            "97": 3636.0,
+            "98": 4064.0,
+            "99": 3394.0,
+            "100": 3530.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 269891584.0,
+            "2": 269891584.0,
+            "3": 269891584.0,
+            "4": 269891584.0,
+            "5": 269891584.0,
+            "6": 269891584.0,
+            "7": 269891584.0,
+            "8": 269891584.0,
+            "9": 269891584.0,
+            "10": 269891584.0,
+            "11": 269891584.0,
+            "12": 269891584.0,
+            "13": 269891584.0,
+            "14": 269891584.0,
+            "15": 269891584.0,
+            "16": 269891584.0,
+            "17": 269891584.0,
+            "18": 269891584.0,
+            "19": 269891584.0,
+            "20": 269891584.0,
+            "21": 269891584.0,
+            "22": 269891584.0,
+            "23": 269891584.0,
+            "24": 269891584.0,
+            "25": 269891584.0,
+            "26": 269891584.0,
+            "27": 269891584.0,
+            "28": 269891584.0,
+            "29": 269891584.0,
+            "30": 269891584.0,
+            "31": 269891584.0,
+            "32": 269891584.0,
+            "33": 269891584.0,
+            "34": 269891584.0,
+            "35": 269891584.0,
+            "36": 269891584.0,
+            "37": 269891584.0,
+            "38": 269891584.0,
+            "39": 269891584.0,
+            "40": 269891584.0,
+            "41": 269891584.0,
+            "42": 269891584.0,
+            "43": 269891584.0,
+            "44": 269891584.0,
+            "45": 269891584.0,
+            "46": 269891584.0,
+            "47": 269891584.0,
+            "48": 269891584.0,
+            "49": 269891584.0,
+            "50": 269891584.0,
+            "51": 269891584.0,
+            "52": 269891584.0,
+            "53": 269891584.0,
+            "54": 269891584.0,
+            "55": 269891584.0,
+            "56": 269891584.0,
+            "57": 269891584.0,
+            "58": 269891584.0,
+            "59": 269891584.0,
+            "60": 269891584.0,
+            "61": 269891584.0,
+            "62": 269891584.0,
+            "63": 269891584.0,
+            "64": 269891584.0,
+            "65": 269891584.0,
+            "66": 269891584.0,
+            "67": 269891584.0,
+            "68": 269891584.0,
+            "69": 269891584.0,
+            "70": 269891584.0,
+            "71": 269891584.0,
+            "72": 269891584.0,
+            "73": 269891584.0,
+            "74": 269891584.0,
+            "75": 269891584.0,
+            "76": 269891584.0,
+            "77": 269891584.0,
+            "78": 269891584.0,
+            "79": 269891584.0,
+            "80": 269891584.0,
+            "81": 269891584.0,
+            "82": 269891584.0,
+            "83": 269891584.0,
+            "84": 269891584.0,
+            "85": 269891584.0,
+            "86": 269891584.0,
+            "87": 269891584.0,
+            "88": 269891584.0,
+            "89": 269891584.0,
+            "90": 269891584.0,
+            "91": 269891584.0,
+            "92": 269891584.0,
+            "93": 269891584.0,
+            "94": 269891584.0,
+            "95": 269891584.0,
+            "96": 269891584.0,
+            "97": 269891584.0,
+            "98": 269891584.0,
+            "99": 269891584.0,
+            "100": 269891584.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1448633856.0,
+            "2": 1513579520.0,
+            "3": 1513579520.0,
+            "4": 1513579520.0,
+            "5": 1513579520.0,
+            "6": 1513579520.0,
+            "7": 1513579520.0,
+            "8": 1515676160.0,
+            "9": 1515676160.0,
+            "10": 1515676160.0,
+            "11": 1515676160.0,
+            "12": 1515676160.0,
+            "13": 1515676160.0,
+            "14": 1515676160.0,
+            "15": 1515676160.0,
+            "16": 1515676160.0,
+            "17": 1515676160.0,
+            "18": 1515676160.0,
+            "19": 1515676160.0,
+            "20": 1515676160.0,
+            "21": 1515676160.0,
+            "22": 1515676160.0,
+            "23": 1515676160.0,
+            "24": 1515676160.0,
+            "25": 1515676160.0,
+            "26": 1515676160.0,
+            "27": 1515676160.0,
+            "28": 1515676160.0,
+            "29": 1515676160.0,
+            "30": 1515676160.0,
+            "31": 1515676160.0,
+            "32": 1515676160.0,
+            "33": 1515676160.0,
+            "34": 1515676160.0,
+            "35": 1515676160.0,
+            "36": 1515676672.0,
+            "37": 1515676672.0,
+            "38": 1515676672.0,
+            "39": 1515676672.0,
+            "40": 1515676672.0,
+            "41": 1515676672.0,
+            "42": 1515676672.0,
+            "43": 1515676672.0,
+            "44": 1515676672.0,
+            "45": 1515676672.0,
+            "46": 1515676672.0,
+            "47": 1515676672.0,
+            "48": 1515676672.0,
+            "49": 1515676672.0,
+            "50": 1515676672.0,
+            "51": 1515676672.0,
+            "52": 1515676672.0,
+            "53": 1515676672.0,
+            "54": 1515676672.0,
+            "55": 1515676672.0,
+            "56": 1515676672.0,
+            "57": 1515676672.0,
+            "58": 1515676672.0,
+            "59": 1515676672.0,
+            "60": 1515676672.0,
+            "61": 1515676672.0,
+            "62": 1515676672.0,
+            "63": 1515676672.0,
+            "64": 1515676672.0,
+            "65": 1515676672.0,
+            "66": 1515676672.0,
+            "67": 1515676672.0,
+            "68": 1515676672.0,
+            "69": 1515676672.0,
+            "70": 1515676672.0,
+            "71": 1515676672.0,
+            "72": 1515676672.0,
+            "73": 1515676672.0,
+            "74": 1515676672.0,
+            "75": 1515676672.0,
+            "76": 1515676672.0,
+            "77": 1515676672.0,
+            "78": 1515676672.0,
+            "79": 1515676672.0,
+            "80": 1515676672.0,
+            "81": 1515676672.0,
+            "82": 1515676672.0,
+            "83": 1515676672.0,
+            "84": 1515676672.0,
+            "85": 1515676672.0,
+            "86": 1515676672.0,
+            "87": 1515676672.0,
+            "88": 1515676672.0,
+            "89": 1515676672.0,
+            "90": 1515676672.0,
+            "91": 1515676672.0,
+            "92": 1515676672.0,
+            "93": 1515676672.0,
+            "94": 1515676672.0,
+            "95": 1515676672.0,
+            "96": 1515676672.0,
+            "97": 1515676672.0,
+            "98": 1515676672.0,
+            "99": 1515676672.0,
+            "100": 1515676672.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 11.43327,
+            "2": 0.37217,
+            "3": 0.69038,
+            "4": 0.33729,
+            "5": 0.33255,
+            "6": 0.3329,
+            "7": 0.34063,
+            "8": 0.55397,
+            "9": 0.33233,
+            "10": 0.33512,
+            "11": 0.33544,
+            "12": 0.33156,
+            "13": 0.33165,
+            "14": 0.33013,
+            "15": 0.32988,
+            "16": 0.32999,
+            "17": 0.32805,
+            "18": 0.32946,
+            "19": 0.33103,
+            "20": 0.32729,
+            "21": 0.32872,
+            "22": 0.3299,
+            "23": 0.33066,
+            "24": 0.3297,
+            "25": 0.32925,
+            "26": 0.33007,
+            "27": 0.32757,
+            "28": 0.32935,
+            "29": 0.32613,
+            "30": 0.33036,
+            "31": 0.32825,
+            "32": 0.32791,
+            "33": 0.32815,
+            "34": 0.32917,
+            "35": 0.32646,
+            "36": 0.33004,
+            "37": 0.3301,
+            "38": 0.32598,
+            "39": 0.32992,
+            "40": 0.33003,
+            "41": 0.32599,
+            "42": 0.32948,
+            "43": 0.3293,
+            "44": 0.326,
+            "45": 0.3277,
+            "46": 0.33009,
+            "47": 0.32567,
+            "48": 0.32635,
+            "49": 0.33059,
+            "50": 0.33062,
+            "51": 0.33004,
+            "52": 0.32318,
+            "53": 0.32666,
+            "54": 0.32944,
+            "55": 0.32431,
+            "56": 0.3255,
+            "57": 0.33385,
+            "58": 0.32385,
+            "59": 0.32365,
+            "60": 0.33444,
+            "61": 0.32406,
+            "62": 0.32323,
+            "63": 0.33128,
+            "64": 0.32416,
+            "65": 0.32428,
+            "66": 0.32909,
+            "67": 0.32519,
+            "68": 0.3235,
+            "69": 0.33075,
+            "70": 0.32636,
+            "71": 0.32447,
+            "72": 0.32921,
+            "73": 0.32654,
+            "74": 0.32367,
+            "75": 0.32884,
+            "76": 0.32668,
+            "77": 0.32544,
+            "78": 0.33087,
+            "79": 0.32596,
+            "80": 0.32366,
+            "81": 0.32924,
+            "82": 0.32879,
+            "83": 0.32405,
+            "84": 0.32977,
+            "85": 0.32708,
+            "86": 0.32429,
+            "87": 0.32954,
+            "88": 0.32748,
+            "89": 0.32359,
+            "90": 0.3286,
+            "91": 0.33163,
+            "92": 0.32398,
+            "93": 0.32839,
+            "94": 0.3316,
+            "95": 0.32702,
+            "96": 0.32902,
+            "97": 0.32869,
+            "98": 0.32786,
+            "99": 0.33283,
+            "100": 0.3296
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
index c74efe95bb5..b194abf2755 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
@@ -1 +1,537 @@
-{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.84194, "5": 10.85873, "10": 10.81845, "15": 10.81222, "20": 10.71072, "25": 10.57461, "30": 10.40091, "35": 10.28875, "40": 10.10167, "45": 9.86955, "50": 9.91374, "55": 9.89204, "60": 9.51573, "65": 8.95939, "70": 9.74555, "75": 9.41848, "80": 9.40261, "85": 9.61514, "90": 9.81999, "95": 9.51099, "100": 9.39984}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1664.0, "5": 2007.0, "10": 1469.0, "15": 1992.0, "20": 1767.0, "25": 1747.0, "30": 1936.0, "35": 1963.0, "40": 2274.0, "45": 2043.0, "50": 2278.0, "55": 2307.0, "60": 2287.0, "65": 2544.0, "70": 3049.0, "75": 2539.0, "80": 3101.0, "85": 3288.0, "90": 3168.0, "95": 3186.0, "100": 3212.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 397747712.0, "5": 397747712.0, "10": 397747712.0, "15": 397747712.0, "20": 397747712.0, "25": 397747712.0, "30": 397747712.0, "35": 397747712.0, "40": 397747712.0, "45": 397747712.0, "50": 397747712.0, "55": 397747712.0, "60": 397747712.0, "65": 397747712.0, "70": 397747712.0, "75": 397747712.0, "80": 397747712.0, "85": 397747712.0, "90": 397747712.0, "95": 397747712.0, "100": 397747712.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1044755968.0, "5": 1177840128.0, "10": 1177840128.0, "15": 1177840128.0, "20": 1177840128.0, "25": 1177840128.0, "30": 1177840128.0, "35": 1177840128.0, "40": 1177840128.0, "45": 1177840128.0, "50": 1177840128.0, "55": 1177840128.0, "60": 1177840128.0, "65": 1177840128.0, "70": 1177840128.0, "75": 1177840128.0, "80": 1177840128.0, "85": 1177840128.0, "90": 1177840128.0, "95": 1177840128.0, "100": 1177840128.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.05354, "5": 0.25457, "10": 0.23579, "15": 0.24024, "20": 0.23692, "25": 0.24276, "30": 0.24032, "35": 0.26057, "40": 0.23557, "45": 0.23278, "50": 0.23752, "55": 0.25569, "60": 0.23569, "65": 0.23452, "70": 0.2368, "75": 0.24765, "80": 0.24644, "85": 0.23632, "90": 0.23404, "95": 0.23761, "100": 0.24117}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.84194,
+            "2": 10.85713,
+            "3": 10.84346,
+            "4": 10.84202,
+            "5": 10.85873,
+            "6": 10.86412,
+            "7": 10.851,
+            "8": 10.84731,
+            "9": 10.85736,
+            "10": 10.81845,
+            "11": 10.8595,
+            "12": 10.84335,
+            "13": 10.86446,
+            "14": 10.85336,
+            "15": 10.81222,
+            "16": 10.81549,
+            "17": 10.78956,
+            "18": 10.79784,
+            "19": 10.79279,
+            "20": 10.71072,
+            "21": 10.6971,
+            "22": 10.58894,
+            "23": 10.7072,
+            "24": 10.60764,
+            "25": 10.57461,
+            "26": 10.6238,
+            "27": 10.62036,
+            "28": 10.567,
+            "29": 10.57013,
+            "30": 10.40091,
+            "31": 10.17393,
+            "32": 10.46119,
+            "33": 10.45713,
+            "34": 10.24672,
+            "35": 10.28875,
+            "36": 10.25284,
+            "37": 10.3466,
+            "38": 10.20914,
+            "39": 10.39432,
+            "40": 10.10167,
+            "41": 10.159,
+            "42": 10.21413,
+            "43": 9.8848,
+            "44": 9.98809,
+            "45": 9.86955,
+            "46": 9.84366,
+            "47": 10.1377,
+            "48": 9.87973,
+            "49": 9.56916,
+            "50": 9.91374,
+            "51": 9.86379,
+            "52": 9.75652,
+            "53": 10.06157,
+            "54": 9.96418,
+            "55": 9.89204,
+            "56": 9.63681,
+            "57": 9.49807,
+            "58": 9.83504,
+            "59": 9.59701,
+            "60": 9.51573,
+            "61": 9.70155,
+            "62": 9.97973,
+            "63": 9.38914,
+            "64": 9.77552,
+            "65": 8.95939,
+            "66": 9.6978,
+            "67": 9.37174,
+            "68": 9.78449,
+            "69": 9.79058,
+            "70": 9.74555,
+            "71": 9.61867,
+            "72": 9.58317,
+            "73": 9.49175,
+            "74": 8.939,
+            "75": 9.41848,
+            "76": 9.07237,
+            "77": 10.06903,
+            "78": 9.72443,
+            "79": 9.3767,
+            "80": 9.40261,
+            "81": 9.47859,
+            "82": 9.6984,
+            "83": 9.30086,
+            "84": 9.41299,
+            "85": 9.61514,
+            "86": 9.07881,
+            "87": 9.59402,
+            "88": 9.74658,
+            "89": 9.60096,
+            "90": 9.81999,
+            "91": 9.32977,
+            "92": 9.35625,
+            "93": 9.07406,
+            "94": 8.82774,
+            "95": 9.51099,
+            "96": 9.52501,
+            "97": 9.3163,
+            "98": 9.67278,
+            "99": 8.88493,
+            "100": 9.39984
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1664.0,
+            "2": 1707.0,
+            "3": 1836.0,
+            "4": 1861.0,
+            "5": 2007.0,
+            "6": 1868.0,
+            "7": 1826.0,
+            "8": 1697.0,
+            "9": 1815.0,
+            "10": 1469.0,
+            "11": 1876.0,
+            "12": 1879.0,
+            "13": 1979.0,
+            "14": 1902.0,
+            "15": 1992.0,
+            "16": 1988.0,
+            "17": 1879.0,
+            "18": 1802.0,
+            "19": 1886.0,
+            "20": 1767.0,
+            "21": 1929.0,
+            "22": 1714.0,
+            "23": 2031.0,
+            "24": 1685.0,
+            "25": 1747.0,
+            "26": 1811.0,
+            "27": 1915.0,
+            "28": 1929.0,
+            "29": 2020.0,
+            "30": 1936.0,
+            "31": 1680.0,
+            "32": 1878.0,
+            "33": 2204.0,
+            "34": 1888.0,
+            "35": 1963.0,
+            "36": 1928.0,
+            "37": 2383.0,
+            "38": 2177.0,
+            "39": 2388.0,
+            "40": 2274.0,
+            "41": 2194.0,
+            "42": 2167.0,
+            "43": 1922.0,
+            "44": 1978.0,
+            "45": 2043.0,
+            "46": 2112.0,
+            "47": 2556.0,
+            "48": 2251.0,
+            "49": 2320.0,
+            "50": 2278.0,
+            "51": 2563.0,
+            "52": 2431.0,
+            "53": 2917.0,
+            "54": 2655.0,
+            "55": 2307.0,
+            "56": 2605.0,
+            "57": 2385.0,
+            "58": 2952.0,
+            "59": 2730.0,
+            "60": 2287.0,
+            "61": 2904.0,
+            "62": 2601.0,
+            "63": 2452.0,
+            "64": 2810.0,
+            "65": 2544.0,
+            "66": 2914.0,
+            "67": 2664.0,
+            "68": 2709.0,
+            "69": 2967.0,
+            "70": 3049.0,
+            "71": 2936.0,
+            "72": 2410.0,
+            "73": 2991.0,
+            "74": 1882.0,
+            "75": 2539.0,
+            "76": 3060.0,
+            "77": 3219.0,
+            "78": 3023.0,
+            "79": 3084.0,
+            "80": 3101.0,
+            "81": 3530.0,
+            "82": 3298.0,
+            "83": 2666.0,
+            "84": 3154.0,
+            "85": 3288.0,
+            "86": 2827.0,
+            "87": 3720.0,
+            "88": 3168.0,
+            "89": 3275.0,
+            "90": 3168.0,
+            "91": 2919.0,
+            "92": 3071.0,
+            "93": 2751.0,
+            "94": 3412.0,
+            "95": 3186.0,
+            "96": 3429.0,
+            "97": 3083.0,
+            "98": 3477.0,
+            "99": 3093.0,
+            "100": 3212.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 397747712.0,
+            "2": 397747712.0,
+            "3": 397747712.0,
+            "4": 397747712.0,
+            "5": 397747712.0,
+            "6": 397747712.0,
+            "7": 397747712.0,
+            "8": 397747712.0,
+            "9": 397747712.0,
+            "10": 397747712.0,
+            "11": 397747712.0,
+            "12": 397747712.0,
+            "13": 397747712.0,
+            "14": 397747712.0,
+            "15": 397747712.0,
+            "16": 397747712.0,
+            "17": 397747712.0,
+            "18": 397747712.0,
+            "19": 397747712.0,
+            "20": 397747712.0,
+            "21": 397747712.0,
+            "22": 397747712.0,
+            "23": 397747712.0,
+            "24": 397747712.0,
+            "25": 397747712.0,
+            "26": 397747712.0,
+            "27": 397747712.0,
+            "28": 397747712.0,
+            "29": 397747712.0,
+            "30": 397747712.0,
+            "31": 397747712.0,
+            "32": 397747712.0,
+            "33": 397747712.0,
+            "34": 397747712.0,
+            "35": 397747712.0,
+            "36": 397747712.0,
+            "37": 397747712.0,
+            "38": 397747712.0,
+            "39": 397747712.0,
+            "40": 397747712.0,
+            "41": 397747712.0,
+            "42": 397747712.0,
+            "43": 397747712.0,
+            "44": 397747712.0,
+            "45": 397747712.0,
+            "46": 397747712.0,
+            "47": 397747712.0,
+            "48": 397747712.0,
+            "49": 397747712.0,
+            "50": 397747712.0,
+            "51": 397747712.0,
+            "52": 397747712.0,
+            "53": 397747712.0,
+            "54": 397747712.0,
+            "55": 397747712.0,
+            "56": 397747712.0,
+            "57": 397747712.0,
+            "58": 397747712.0,
+            "59": 397747712.0,
+            "60": 397747712.0,
+            "61": 397747712.0,
+            "62": 397747712.0,
+            "63": 397747712.0,
+            "64": 397747712.0,
+            "65": 397747712.0,
+            "66": 397747712.0,
+            "67": 397747712.0,
+            "68": 397747712.0,
+            "69": 397747712.0,
+            "70": 397747712.0,
+            "71": 397747712.0,
+            "72": 397747712.0,
+            "73": 397747712.0,
+            "74": 397747712.0,
+            "75": 397747712.0,
+            "76": 397747712.0,
+            "77": 397747712.0,
+            "78": 397747712.0,
+            "79": 397747712.0,
+            "80": 397747712.0,
+            "81": 397747712.0,
+            "82": 397747712.0,
+            "83": 397747712.0,
+            "84": 397747712.0,
+            "85": 397747712.0,
+            "86": 397747712.0,
+            "87": 397747712.0,
+            "88": 397747712.0,
+            "89": 397747712.0,
+            "90": 397747712.0,
+            "91": 397747712.0,
+            "92": 397747712.0,
+            "93": 397747712.0,
+            "94": 397747712.0,
+            "95": 397747712.0,
+            "96": 397747712.0,
+            "97": 397747712.0,
+            "98": 397747712.0,
+            "99": 397747712.0,
+            "100": 397747712.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1044755968.0,
+            "2": 1177840128.0,
+            "3": 1177840128.0,
+            "4": 1177840128.0,
+            "5": 1177840128.0,
+            "6": 1177840128.0,
+            "7": 1177840128.0,
+            "8": 1177840128.0,
+            "9": 1177840128.0,
+            "10": 1177840128.0,
+            "11": 1177840128.0,
+            "12": 1177840128.0,
+            "13": 1177840128.0,
+            "14": 1177840128.0,
+            "15": 1177840128.0,
+            "16": 1177840128.0,
+            "17": 1177840128.0,
+            "18": 1177840128.0,
+            "19": 1177840128.0,
+            "20": 1177840128.0,
+            "21": 1177840128.0,
+            "22": 1177840128.0,
+            "23": 1177840128.0,
+            "24": 1177840128.0,
+            "25": 1177840128.0,
+            "26": 1177840128.0,
+            "27": 1177840128.0,
+            "28": 1177840128.0,
+            "29": 1177840128.0,
+            "30": 1177840128.0,
+            "31": 1177840128.0,
+            "32": 1177840128.0,
+            "33": 1177840128.0,
+            "34": 1177840128.0,
+            "35": 1177840128.0,
+            "36": 1177840128.0,
+            "37": 1177840128.0,
+            "38": 1177840128.0,
+            "39": 1177840128.0,
+            "40": 1177840128.0,
+            "41": 1177840128.0,
+            "42": 1177840128.0,
+            "43": 1177840128.0,
+            "44": 1177840128.0,
+            "45": 1177840128.0,
+            "46": 1177840128.0,
+            "47": 1177840128.0,
+            "48": 1177840128.0,
+            "49": 1177840128.0,
+            "50": 1177840128.0,
+            "51": 1177840128.0,
+            "52": 1177840128.0,
+            "53": 1177840128.0,
+            "54": 1177840128.0,
+            "55": 1177840128.0,
+            "56": 1177840128.0,
+            "57": 1177840128.0,
+            "58": 1177840128.0,
+            "59": 1177840128.0,
+            "60": 1177840128.0,
+            "61": 1177840128.0,
+            "62": 1177840128.0,
+            "63": 1177840128.0,
+            "64": 1177840128.0,
+            "65": 1177840128.0,
+            "66": 1177840128.0,
+            "67": 1177840128.0,
+            "68": 1177840128.0,
+            "69": 1177840128.0,
+            "70": 1177840128.0,
+            "71": 1177840128.0,
+            "72": 1177840128.0,
+            "73": 1177840128.0,
+            "74": 1177840128.0,
+            "75": 1177840128.0,
+            "76": 1177840128.0,
+            "77": 1177840128.0,
+            "78": 1177840128.0,
+            "79": 1177840128.0,
+            "80": 1177840128.0,
+            "81": 1177840128.0,
+            "82": 1177840128.0,
+            "83": 1177840128.0,
+            "84": 1177840128.0,
+            "85": 1177840128.0,
+            "86": 1177840128.0,
+            "87": 1177840128.0,
+            "88": 1177840128.0,
+            "89": 1177840128.0,
+            "90": 1177840128.0,
+            "91": 1177840128.0,
+            "92": 1177840128.0,
+            "93": 1177840128.0,
+            "94": 1177840128.0,
+            "95": 1177840128.0,
+            "96": 1177840128.0,
+            "97": 1177840128.0,
+            "98": 1177840128.0,
+            "99": 1177840128.0,
+            "100": 1177840128.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 9.98808,
+            "2": 0.31896,
+            "3": 0.2872,
+            "4": 0.28844,
+            "5": 0.29055,
+            "6": 0.28565,
+            "7": 0.29151,
+            "8": 0.2909,
+            "9": 0.28554,
+            "10": 0.28532,
+            "11": 0.28987,
+            "12": 0.29026,
+            "13": 0.28704,
+            "14": 0.28868,
+            "15": 0.29081,
+            "16": 0.29135,
+            "17": 0.29053,
+            "18": 0.29219,
+            "19": 0.28784,
+            "20": 0.29358,
+            "21": 0.30495,
+            "22": 0.29941,
+            "23": 0.29122,
+            "24": 0.29122,
+            "25": 0.29408,
+            "26": 0.29093,
+            "27": 0.2904,
+            "28": 0.29116,
+            "29": 0.29607,
+            "30": 0.29163,
+            "31": 0.29002,
+            "32": 0.29186,
+            "33": 0.28732,
+            "34": 0.28673,
+            "35": 0.29062,
+            "36": 0.2913,
+            "37": 0.28723,
+            "38": 0.28871,
+            "39": 0.29253,
+            "40": 0.2884,
+            "41": 0.28738,
+            "42": 0.28836,
+            "43": 0.28808,
+            "44": 0.28794,
+            "45": 0.29124,
+            "46": 0.29271,
+            "47": 0.28573,
+            "48": 0.28587,
+            "49": 0.28908,
+            "50": 0.28839,
+            "51": 0.30021,
+            "52": 0.30654,
+            "53": 0.3059,
+            "54": 0.29714,
+            "55": 0.28911,
+            "56": 0.29586,
+            "57": 0.29074,
+            "58": 0.28682,
+            "59": 0.29439,
+            "60": 0.28999,
+            "61": 0.29254,
+            "62": 0.28813,
+            "63": 0.29743,
+            "64": 0.28913,
+            "65": 0.29726,
+            "66": 0.29597,
+            "67": 0.28858,
+            "68": 0.29025,
+            "69": 0.29089,
+            "70": 0.29517,
+            "71": 0.28924,
+            "72": 0.29291,
+            "73": 0.29626,
+            "74": 0.29034,
+            "75": 0.28667,
+            "76": 0.29537,
+            "77": 0.29663,
+            "78": 0.29518,
+            "79": 0.29485,
+            "80": 0.29784,
+            "81": 0.2912,
+            "82": 0.29265,
+            "83": 0.29806,
+            "84": 0.29292,
+            "85": 0.29315,
+            "86": 0.31345,
+            "87": 0.31236,
+            "88": 0.29799,
+            "89": 0.2941,
+            "90": 0.29816,
+            "91": 0.29109,
+            "92": 0.2885,
+            "93": 0.29422,
+            "94": 0.29493,
+            "95": 0.28717,
+            "96": 0.29109,
+            "97": 0.29595,
+            "98": 0.29077,
+            "99": 0.29004,
+            "100": 0.29477
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..bd823394dd2
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.84194,
+            "2": 10.85713,
+            "3": 10.84346,
+            "4": 10.84202,
+            "5": 10.85873,
+            "6": 10.86412,
+            "7": 10.851,
+            "8": 10.84731,
+            "9": 10.85736,
+            "10": 10.81845,
+            "11": 10.8595,
+            "12": 10.84335,
+            "13": 10.86446,
+            "14": 10.85336,
+            "15": 10.81222,
+            "16": 10.81549,
+            "17": 10.78956,
+            "18": 10.79784,
+            "19": 10.79279,
+            "20": 10.71072,
+            "21": 10.6971,
+            "22": 10.58894,
+            "23": 10.7072,
+            "24": 10.60764,
+            "25": 10.57461,
+            "26": 10.6238,
+            "27": 10.62036,
+            "28": 10.567,
+            "29": 10.57013,
+            "30": 10.40091,
+            "31": 10.17393,
+            "32": 10.46119,
+            "33": 10.45713,
+            "34": 10.24672,
+            "35": 10.28875,
+            "36": 10.25284,
+            "37": 10.3466,
+            "38": 10.20914,
+            "39": 10.39432,
+            "40": 10.10167,
+            "41": 10.159,
+            "42": 10.21413,
+            "43": 9.8848,
+            "44": 9.98809,
+            "45": 9.86955,
+            "46": 9.84366,
+            "47": 10.1377,
+            "48": 9.87973,
+            "49": 9.56916,
+            "50": 9.91374,
+            "51": 9.86379,
+            "52": 9.75652,
+            "53": 10.06157,
+            "54": 9.96418,
+            "55": 9.89204,
+            "56": 9.63681,
+            "57": 9.49807,
+            "58": 9.83504,
+            "59": 9.59701,
+            "60": 9.51573,
+            "61": 9.70155,
+            "62": 9.97973,
+            "63": 9.38914,
+            "64": 9.77552,
+            "65": 8.95939,
+            "66": 9.6978,
+            "67": 9.37174,
+            "68": 9.78449,
+            "69": 9.79058,
+            "70": 9.74555,
+            "71": 9.61867,
+            "72": 9.58317,
+            "73": 9.49175,
+            "74": 8.939,
+            "75": 9.41848,
+            "76": 9.07237,
+            "77": 10.06903,
+            "78": 9.72443,
+            "79": 9.3767,
+            "80": 9.40261,
+            "81": 9.47859,
+            "82": 9.6984,
+            "83": 9.30086,
+            "84": 9.41299,
+            "85": 9.61514,
+            "86": 9.07881,
+            "87": 9.59402,
+            "88": 9.74658,
+            "89": 9.60096,
+            "90": 9.81999,
+            "91": 9.32977,
+            "92": 9.35625,
+            "93": 9.07406,
+            "94": 8.82774,
+            "95": 9.51099,
+            "96": 9.52501,
+            "97": 9.3163,
+            "98": 9.67278,
+            "99": 8.88493,
+            "100": 9.39984
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1664.0,
+            "2": 1707.0,
+            "3": 1836.0,
+            "4": 1861.0,
+            "5": 2007.0,
+            "6": 1868.0,
+            "7": 1826.0,
+            "8": 1697.0,
+            "9": 1815.0,
+            "10": 1469.0,
+            "11": 1876.0,
+            "12": 1879.0,
+            "13": 1979.0,
+            "14": 1902.0,
+            "15": 1992.0,
+            "16": 1988.0,
+            "17": 1879.0,
+            "18": 1802.0,
+            "19": 1886.0,
+            "20": 1767.0,
+            "21": 1929.0,
+            "22": 1714.0,
+            "23": 2031.0,
+            "24": 1685.0,
+            "25": 1747.0,
+            "26": 1811.0,
+            "27": 1915.0,
+            "28": 1929.0,
+            "29": 2020.0,
+            "30": 1936.0,
+            "31": 1680.0,
+            "32": 1878.0,
+            "33": 2204.0,
+            "34": 1888.0,
+            "35": 1963.0,
+            "36": 1928.0,
+            "37": 2383.0,
+            "38": 2177.0,
+            "39": 2388.0,
+            "40": 2274.0,
+            "41": 2194.0,
+            "42": 2167.0,
+            "43": 1922.0,
+            "44": 1978.0,
+            "45": 2043.0,
+            "46": 2112.0,
+            "47": 2556.0,
+            "48": 2251.0,
+            "49": 2320.0,
+            "50": 2278.0,
+            "51": 2563.0,
+            "52": 2431.0,
+            "53": 2917.0,
+            "54": 2655.0,
+            "55": 2307.0,
+            "56": 2605.0,
+            "57": 2385.0,
+            "58": 2952.0,
+            "59": 2730.0,
+            "60": 2287.0,
+            "61": 2904.0,
+            "62": 2601.0,
+            "63": 2452.0,
+            "64": 2810.0,
+            "65": 2544.0,
+            "66": 2914.0,
+            "67": 2664.0,
+            "68": 2709.0,
+            "69": 2967.0,
+            "70": 3049.0,
+            "71": 2936.0,
+            "72": 2410.0,
+            "73": 2991.0,
+            "74": 1882.0,
+            "75": 2539.0,
+            "76": 3060.0,
+            "77": 3219.0,
+            "78": 3023.0,
+            "79": 3084.0,
+            "80": 3101.0,
+            "81": 3530.0,
+            "82": 3298.0,
+            "83": 2666.0,
+            "84": 3154.0,
+            "85": 3288.0,
+            "86": 2827.0,
+            "87": 3720.0,
+            "88": 3168.0,
+            "89": 3275.0,
+            "90": 3168.0,
+            "91": 2919.0,
+            "92": 3071.0,
+            "93": 2751.0,
+            "94": 3412.0,
+            "95": 3186.0,
+            "96": 3429.0,
+            "97": 3083.0,
+            "98": 3477.0,
+            "99": 3093.0,
+            "100": 3212.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 397747712.0,
+            "2": 397747712.0,
+            "3": 397747712.0,
+            "4": 397747712.0,
+            "5": 397747712.0,
+            "6": 397747712.0,
+            "7": 397747712.0,
+            "8": 397747712.0,
+            "9": 397747712.0,
+            "10": 397747712.0,
+            "11": 397747712.0,
+            "12": 397747712.0,
+            "13": 397747712.0,
+            "14": 397747712.0,
+            "15": 397747712.0,
+            "16": 397747712.0,
+            "17": 397747712.0,
+            "18": 397747712.0,
+            "19": 397747712.0,
+            "20": 397747712.0,
+            "21": 397747712.0,
+            "22": 397747712.0,
+            "23": 397747712.0,
+            "24": 397747712.0,
+            "25": 397747712.0,
+            "26": 397747712.0,
+            "27": 397747712.0,
+            "28": 397747712.0,
+            "29": 397747712.0,
+            "30": 397747712.0,
+            "31": 397747712.0,
+            "32": 397747712.0,
+            "33": 397747712.0,
+            "34": 397747712.0,
+            "35": 397747712.0,
+            "36": 397747712.0,
+            "37": 397747712.0,
+            "38": 397747712.0,
+            "39": 397747712.0,
+            "40": 397747712.0,
+            "41": 397747712.0,
+            "42": 397747712.0,
+            "43": 397747712.0,
+            "44": 397747712.0,
+            "45": 397747712.0,
+            "46": 397747712.0,
+            "47": 397747712.0,
+            "48": 397747712.0,
+            "49": 397747712.0,
+            "50": 397747712.0,
+            "51": 397747712.0,
+            "52": 397747712.0,
+            "53": 397747712.0,
+            "54": 397747712.0,
+            "55": 397747712.0,
+            "56": 397747712.0,
+            "57": 397747712.0,
+            "58": 397747712.0,
+            "59": 397747712.0,
+            "60": 397747712.0,
+            "61": 397747712.0,
+            "62": 397747712.0,
+            "63": 397747712.0,
+            "64": 397747712.0,
+            "65": 397747712.0,
+            "66": 397747712.0,
+            "67": 397747712.0,
+            "68": 397747712.0,
+            "69": 397747712.0,
+            "70": 397747712.0,
+            "71": 397747712.0,
+            "72": 397747712.0,
+            "73": 397747712.0,
+            "74": 397747712.0,
+            "75": 397747712.0,
+            "76": 397747712.0,
+            "77": 397747712.0,
+            "78": 397747712.0,
+            "79": 397747712.0,
+            "80": 397747712.0,
+            "81": 397747712.0,
+            "82": 397747712.0,
+            "83": 397747712.0,
+            "84": 397747712.0,
+            "85": 397747712.0,
+            "86": 397747712.0,
+            "87": 397747712.0,
+            "88": 397747712.0,
+            "89": 397747712.0,
+            "90": 397747712.0,
+            "91": 397747712.0,
+            "92": 397747712.0,
+            "93": 397747712.0,
+            "94": 397747712.0,
+            "95": 397747712.0,
+            "96": 397747712.0,
+            "97": 397747712.0,
+            "98": 397747712.0,
+            "99": 397747712.0,
+            "100": 397747712.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1044755968.0,
+            "2": 1177840128.0,
+            "3": 1177840128.0,
+            "4": 1177840128.0,
+            "5": 1177840128.0,
+            "6": 1177840128.0,
+            "7": 1177840128.0,
+            "8": 1177840128.0,
+            "9": 1177840128.0,
+            "10": 1177840128.0,
+            "11": 1177840128.0,
+            "12": 1177840128.0,
+            "13": 1177840128.0,
+            "14": 1177840128.0,
+            "15": 1177840128.0,
+            "16": 1177840128.0,
+            "17": 1177840128.0,
+            "18": 1177840128.0,
+            "19": 1177840128.0,
+            "20": 1177840128.0,
+            "21": 1177840128.0,
+            "22": 1177840128.0,
+            "23": 1177840128.0,
+            "24": 1177840128.0,
+            "25": 1177840128.0,
+            "26": 1177840128.0,
+            "27": 1177840128.0,
+            "28": 1177840128.0,
+            "29": 1177840128.0,
+            "30": 1177840128.0,
+            "31": 1177840128.0,
+            "32": 1177840128.0,
+            "33": 1177840128.0,
+            "34": 1177840128.0,
+            "35": 1177840128.0,
+            "36": 1177840128.0,
+            "37": 1177840128.0,
+            "38": 1177840128.0,
+            "39": 1177840128.0,
+            "40": 1177840128.0,
+            "41": 1177840128.0,
+            "42": 1177840128.0,
+            "43": 1177840128.0,
+            "44": 1177840128.0,
+            "45": 1177840128.0,
+            "46": 1177840128.0,
+            "47": 1177840128.0,
+            "48": 1177840128.0,
+            "49": 1177840128.0,
+            "50": 1177840128.0,
+            "51": 1177840128.0,
+            "52": 1177840128.0,
+            "53": 1177840128.0,
+            "54": 1177840128.0,
+            "55": 1177840128.0,
+            "56": 1177840128.0,
+            "57": 1177840128.0,
+            "58": 1177840128.0,
+            "59": 1177840128.0,
+            "60": 1177840128.0,
+            "61": 1177840128.0,
+            "62": 1177840128.0,
+            "63": 1177840128.0,
+            "64": 1177840128.0,
+            "65": 1177840128.0,
+            "66": 1177840128.0,
+            "67": 1177840128.0,
+            "68": 1177840128.0,
+            "69": 1177840128.0,
+            "70": 1177840128.0,
+            "71": 1177840128.0,
+            "72": 1177840128.0,
+            "73": 1177840128.0,
+            "74": 1177840128.0,
+            "75": 1177840128.0,
+            "76": 1177840128.0,
+            "77": 1177840128.0,
+            "78": 1177840128.0,
+            "79": 1177840128.0,
+            "80": 1177840128.0,
+            "81": 1177840128.0,
+            "82": 1177840128.0,
+            "83": 1177840128.0,
+            "84": 1177840128.0,
+            "85": 1177840128.0,
+            "86": 1177840128.0,
+            "87": 1177840128.0,
+            "88": 1177840128.0,
+            "89": 1177840128.0,
+            "90": 1177840128.0,
+            "91": 1177840128.0,
+            "92": 1177840128.0,
+            "93": 1177840128.0,
+            "94": 1177840128.0,
+            "95": 1177840128.0,
+            "96": 1177840128.0,
+            "97": 1177840128.0,
+            "98": 1177840128.0,
+            "99": 1177840128.0,
+            "100": 1177840128.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 9.95666,
+            "2": 0.32924,
+            "3": 0.25226,
+            "4": 0.25106,
+            "5": 0.25493,
+            "6": 0.25253,
+            "7": 0.25357,
+            "8": 0.25271,
+            "9": 0.25432,
+            "10": 0.25385,
+            "11": 0.25308,
+            "12": 0.25347,
+            "13": 0.25055,
+            "14": 0.25356,
+            "15": 0.26243,
+            "16": 0.26195,
+            "17": 0.25653,
+            "18": 0.25321,
+            "19": 0.25683,
+            "20": 0.253,
+            "21": 0.26002,
+            "22": 0.25583,
+            "23": 0.2569,
+            "24": 0.25453,
+            "25": 0.25674,
+            "26": 0.28427,
+            "27": 0.26846,
+            "28": 0.25669,
+            "29": 0.25979,
+            "30": 0.25506,
+            "31": 0.25795,
+            "32": 0.25594,
+            "33": 0.25547,
+            "34": 0.25599,
+            "35": 0.2592,
+            "36": 0.25766,
+            "37": 0.25711,
+            "38": 0.25265,
+            "39": 0.25683,
+            "40": 0.25734,
+            "41": 0.25589,
+            "42": 0.25063,
+            "43": 0.25742,
+            "44": 0.25967,
+            "45": 0.25573,
+            "46": 0.25687,
+            "47": 0.26161,
+            "48": 0.25952,
+            "49": 0.25626,
+            "50": 0.25429,
+            "51": 0.26173,
+            "52": 0.27578,
+            "53": 0.2696,
+            "54": 0.26719,
+            "55": 0.26842,
+            "56": 0.27282,
+            "57": 0.27059,
+            "58": 0.26573,
+            "59": 0.27553,
+            "60": 0.26764,
+            "61": 0.25837,
+            "62": 0.25923,
+            "63": 0.27037,
+            "64": 0.26917,
+            "65": 0.26615,
+            "66": 0.57271,
+            "67": 0.26906,
+            "68": 0.26543,
+            "69": 0.26985,
+            "70": 0.27165,
+            "71": 0.26533,
+            "72": 0.27015,
+            "73": 0.26666,
+            "74": 0.26902,
+            "75": 0.26747,
+            "76": 0.26725,
+            "77": 0.269,
+            "78": 0.27067,
+            "79": 0.26982,
+            "80": 0.26617,
+            "81": 0.269,
+            "82": 0.26853,
+            "83": 0.26607,
+            "84": 0.26722,
+            "85": 0.27017,
+            "86": 0.2778,
+            "87": 0.27697,
+            "88": 0.27012,
+            "89": 0.27065,
+            "90": 0.26599,
+            "91": 0.26551,
+            "92": 0.27357,
+            "93": 0.27599,
+            "94": 0.26598,
+            "95": 0.27382,
+            "96": 0.27956,
+            "97": 0.26613,
+            "98": 0.26511,
+            "99": 0.26941,
+            "100": 0.27208
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..d5d1de46cac
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.84194,
+            "2": 10.85713,
+            "3": 10.84346,
+            "4": 10.84202,
+            "5": 10.85873,
+            "6": 10.86412,
+            "7": 10.851,
+            "8": 10.84731,
+            "9": 10.85736,
+            "10": 10.81845,
+            "11": 10.8595,
+            "12": 10.84335,
+            "13": 10.86446,
+            "14": 10.85336,
+            "15": 10.81222,
+            "16": 10.81549,
+            "17": 10.78956,
+            "18": 10.79784,
+            "19": 10.79279,
+            "20": 10.71072,
+            "21": 10.6971,
+            "22": 10.58894,
+            "23": 10.7072,
+            "24": 10.60764,
+            "25": 10.57461,
+            "26": 10.6238,
+            "27": 10.62036,
+            "28": 10.567,
+            "29": 10.57013,
+            "30": 10.40091,
+            "31": 10.17393,
+            "32": 10.46119,
+            "33": 10.45713,
+            "34": 10.24672,
+            "35": 10.28875,
+            "36": 10.25284,
+            "37": 10.3466,
+            "38": 10.20914,
+            "39": 10.39432,
+            "40": 10.10167,
+            "41": 10.159,
+            "42": 10.21413,
+            "43": 9.8848,
+            "44": 9.98809,
+            "45": 9.86955,
+            "46": 9.84366,
+            "47": 10.1377,
+            "48": 9.87973,
+            "49": 9.56916,
+            "50": 9.91374,
+            "51": 9.86379,
+            "52": 9.75652,
+            "53": 10.06157,
+            "54": 9.96418,
+            "55": 9.89204,
+            "56": 9.63681,
+            "57": 9.49807,
+            "58": 9.83504,
+            "59": 9.59701,
+            "60": 9.51573,
+            "61": 9.70155,
+            "62": 9.97973,
+            "63": 9.38914,
+            "64": 9.77552,
+            "65": 8.95939,
+            "66": 9.6978,
+            "67": 9.37174,
+            "68": 9.78449,
+            "69": 9.79058,
+            "70": 9.74555,
+            "71": 9.61867,
+            "72": 9.58317,
+            "73": 9.49175,
+            "74": 8.939,
+            "75": 9.41848,
+            "76": 9.07237,
+            "77": 10.06903,
+            "78": 9.72443,
+            "79": 9.3767,
+            "80": 9.40261,
+            "81": 9.47859,
+            "82": 9.6984,
+            "83": 9.30086,
+            "84": 9.41299,
+            "85": 9.61514,
+            "86": 9.07881,
+            "87": 9.59402,
+            "88": 9.74658,
+            "89": 9.60096,
+            "90": 9.81999,
+            "91": 9.32977,
+            "92": 9.35625,
+            "93": 9.07406,
+            "94": 8.82774,
+            "95": 9.51099,
+            "96": 9.52501,
+            "97": 9.3163,
+            "98": 9.67278,
+            "99": 8.88493,
+            "100": 9.39984
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1664.0,
+            "2": 1707.0,
+            "3": 1836.0,
+            "4": 1861.0,
+            "5": 2007.0,
+            "6": 1868.0,
+            "7": 1826.0,
+            "8": 1697.0,
+            "9": 1815.0,
+            "10": 1469.0,
+            "11": 1876.0,
+            "12": 1879.0,
+            "13": 1979.0,
+            "14": 1902.0,
+            "15": 1992.0,
+            "16": 1988.0,
+            "17": 1879.0,
+            "18": 1802.0,
+            "19": 1886.0,
+            "20": 1767.0,
+            "21": 1929.0,
+            "22": 1714.0,
+            "23": 2031.0,
+            "24": 1685.0,
+            "25": 1747.0,
+            "26": 1811.0,
+            "27": 1915.0,
+            "28": 1929.0,
+            "29": 2020.0,
+            "30": 1936.0,
+            "31": 1680.0,
+            "32": 1878.0,
+            "33": 2204.0,
+            "34": 1888.0,
+            "35": 1963.0,
+            "36": 1928.0,
+            "37": 2383.0,
+            "38": 2177.0,
+            "39": 2388.0,
+            "40": 2274.0,
+            "41": 2194.0,
+            "42": 2167.0,
+            "43": 1922.0,
+            "44": 1978.0,
+            "45": 2043.0,
+            "46": 2112.0,
+            "47": 2556.0,
+            "48": 2251.0,
+            "49": 2320.0,
+            "50": 2278.0,
+            "51": 2563.0,
+            "52": 2431.0,
+            "53": 2917.0,
+            "54": 2655.0,
+            "55": 2307.0,
+            "56": 2605.0,
+            "57": 2385.0,
+            "58": 2952.0,
+            "59": 2730.0,
+            "60": 2287.0,
+            "61": 2904.0,
+            "62": 2601.0,
+            "63": 2452.0,
+            "64": 2810.0,
+            "65": 2544.0,
+            "66": 2914.0,
+            "67": 2664.0,
+            "68": 2709.0,
+            "69": 2967.0,
+            "70": 3049.0,
+            "71": 2936.0,
+            "72": 2410.0,
+            "73": 2991.0,
+            "74": 1882.0,
+            "75": 2539.0,
+            "76": 3060.0,
+            "77": 3219.0,
+            "78": 3023.0,
+            "79": 3084.0,
+            "80": 3101.0,
+            "81": 3530.0,
+            "82": 3298.0,
+            "83": 2666.0,
+            "84": 3154.0,
+            "85": 3288.0,
+            "86": 2827.0,
+            "87": 3720.0,
+            "88": 3168.0,
+            "89": 3275.0,
+            "90": 3168.0,
+            "91": 2919.0,
+            "92": 3071.0,
+            "93": 2751.0,
+            "94": 3412.0,
+            "95": 3186.0,
+            "96": 3429.0,
+            "97": 3083.0,
+            "98": 3477.0,
+            "99": 3093.0,
+            "100": 3212.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 397747712.0,
+            "2": 397747712.0,
+            "3": 397747712.0,
+            "4": 397747712.0,
+            "5": 397747712.0,
+            "6": 397747712.0,
+            "7": 397747712.0,
+            "8": 397747712.0,
+            "9": 397747712.0,
+            "10": 397747712.0,
+            "11": 397747712.0,
+            "12": 397747712.0,
+            "13": 397747712.0,
+            "14": 397747712.0,
+            "15": 397747712.0,
+            "16": 397747712.0,
+            "17": 397747712.0,
+            "18": 397747712.0,
+            "19": 397747712.0,
+            "20": 397747712.0,
+            "21": 397747712.0,
+            "22": 397747712.0,
+            "23": 397747712.0,
+            "24": 397747712.0,
+            "25": 397747712.0,
+            "26": 397747712.0,
+            "27": 397747712.0,
+            "28": 397747712.0,
+            "29": 397747712.0,
+            "30": 397747712.0,
+            "31": 397747712.0,
+            "32": 397747712.0,
+            "33": 397747712.0,
+            "34": 397747712.0,
+            "35": 397747712.0,
+            "36": 397747712.0,
+            "37": 397747712.0,
+            "38": 397747712.0,
+            "39": 397747712.0,
+            "40": 397747712.0,
+            "41": 397747712.0,
+            "42": 397747712.0,
+            "43": 397747712.0,
+            "44": 397747712.0,
+            "45": 397747712.0,
+            "46": 397747712.0,
+            "47": 397747712.0,
+            "48": 397747712.0,
+            "49": 397747712.0,
+            "50": 397747712.0,
+            "51": 397747712.0,
+            "52": 397747712.0,
+            "53": 397747712.0,
+            "54": 397747712.0,
+            "55": 397747712.0,
+            "56": 397747712.0,
+            "57": 397747712.0,
+            "58": 397747712.0,
+            "59": 397747712.0,
+            "60": 397747712.0,
+            "61": 397747712.0,
+            "62": 397747712.0,
+            "63": 397747712.0,
+            "64": 397747712.0,
+            "65": 397747712.0,
+            "66": 397747712.0,
+            "67": 397747712.0,
+            "68": 397747712.0,
+            "69": 397747712.0,
+            "70": 397747712.0,
+            "71": 397747712.0,
+            "72": 397747712.0,
+            "73": 397747712.0,
+            "74": 397747712.0,
+            "75": 397747712.0,
+            "76": 397747712.0,
+            "77": 397747712.0,
+            "78": 397747712.0,
+            "79": 397747712.0,
+            "80": 397747712.0,
+            "81": 397747712.0,
+            "82": 397747712.0,
+            "83": 397747712.0,
+            "84": 397747712.0,
+            "85": 397747712.0,
+            "86": 397747712.0,
+            "87": 397747712.0,
+            "88": 397747712.0,
+            "89": 397747712.0,
+            "90": 397747712.0,
+            "91": 397747712.0,
+            "92": 397747712.0,
+            "93": 397747712.0,
+            "94": 397747712.0,
+            "95": 397747712.0,
+            "96": 397747712.0,
+            "97": 397747712.0,
+            "98": 397747712.0,
+            "99": 397747712.0,
+            "100": 397747712.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1044755968.0,
+            "2": 1177840128.0,
+            "3": 1177840128.0,
+            "4": 1177840128.0,
+            "5": 1177840128.0,
+            "6": 1177840128.0,
+            "7": 1177840128.0,
+            "8": 1177840128.0,
+            "9": 1177840128.0,
+            "10": 1177840128.0,
+            "11": 1177840128.0,
+            "12": 1177840128.0,
+            "13": 1177840128.0,
+            "14": 1177840128.0,
+            "15": 1177840128.0,
+            "16": 1177840128.0,
+            "17": 1177840128.0,
+            "18": 1177840128.0,
+            "19": 1177840128.0,
+            "20": 1177840128.0,
+            "21": 1177840128.0,
+            "22": 1177840128.0,
+            "23": 1177840128.0,
+            "24": 1177840128.0,
+            "25": 1177840128.0,
+            "26": 1177840128.0,
+            "27": 1177840128.0,
+            "28": 1177840128.0,
+            "29": 1177840128.0,
+            "30": 1177840128.0,
+            "31": 1177840128.0,
+            "32": 1177840128.0,
+            "33": 1177840128.0,
+            "34": 1177840128.0,
+            "35": 1177840128.0,
+            "36": 1177840128.0,
+            "37": 1177840128.0,
+            "38": 1177840128.0,
+            "39": 1177840128.0,
+            "40": 1177840128.0,
+            "41": 1177840128.0,
+            "42": 1177840128.0,
+            "43": 1177840128.0,
+            "44": 1177840128.0,
+            "45": 1177840128.0,
+            "46": 1177840128.0,
+            "47": 1177840128.0,
+            "48": 1177840128.0,
+            "49": 1177840128.0,
+            "50": 1177840128.0,
+            "51": 1177840128.0,
+            "52": 1177840128.0,
+            "53": 1177840128.0,
+            "54": 1177840128.0,
+            "55": 1177840128.0,
+            "56": 1177840128.0,
+            "57": 1177840128.0,
+            "58": 1177840128.0,
+            "59": 1177840128.0,
+            "60": 1177840128.0,
+            "61": 1177840128.0,
+            "62": 1177840128.0,
+            "63": 1177840128.0,
+            "64": 1177840128.0,
+            "65": 1177840128.0,
+            "66": 1177840128.0,
+            "67": 1177840128.0,
+            "68": 1177840128.0,
+            "69": 1177840128.0,
+            "70": 1177840128.0,
+            "71": 1177840128.0,
+            "72": 1177840128.0,
+            "73": 1177840128.0,
+            "74": 1177840128.0,
+            "75": 1177840128.0,
+            "76": 1177840128.0,
+            "77": 1177840128.0,
+            "78": 1177840128.0,
+            "79": 1177840128.0,
+            "80": 1177840128.0,
+            "81": 1177840128.0,
+            "82": 1177840128.0,
+            "83": 1177840128.0,
+            "84": 1177840128.0,
+            "85": 1177840128.0,
+            "86": 1177840128.0,
+            "87": 1177840128.0,
+            "88": 1177840128.0,
+            "89": 1177840128.0,
+            "90": 1177840128.0,
+            "91": 1177840128.0,
+            "92": 1177840128.0,
+            "93": 1177840128.0,
+            "94": 1177840128.0,
+            "95": 1177840128.0,
+            "96": 1177840128.0,
+            "97": 1177840128.0,
+            "98": 1177840128.0,
+            "99": 1177840128.0,
+            "100": 1177840128.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 9.61367,
+            "2": 0.31935,
+            "3": 0.29274,
+            "4": 0.28637,
+            "5": 0.2844,
+            "6": 0.29788,
+            "7": 0.2902,
+            "8": 0.28573,
+            "9": 0.29136,
+            "10": 0.29884,
+            "11": 0.29048,
+            "12": 0.2896,
+            "13": 0.29421,
+            "14": 0.29008,
+            "15": 0.2871,
+            "16": 0.28903,
+            "17": 0.2924,
+            "18": 0.28887,
+            "19": 0.28926,
+            "20": 0.30241,
+            "21": 0.29571,
+            "22": 0.28966,
+            "23": 0.29177,
+            "24": 0.29106,
+            "25": 0.28884,
+            "26": 0.28921,
+            "27": 0.29461,
+            "28": 0.28664,
+            "29": 0.28881,
+            "30": 0.29392,
+            "31": 0.29062,
+            "32": 0.28778,
+            "33": 0.29055,
+            "34": 0.29409,
+            "35": 0.29169,
+            "36": 0.29211,
+            "37": 0.29809,
+            "38": 0.29114,
+            "39": 0.29052,
+            "40": 0.2919,
+            "41": 0.2953,
+            "42": 0.28957,
+            "43": 0.29349,
+            "44": 0.30062,
+            "45": 0.28999,
+            "46": 0.29486,
+            "47": 0.29689,
+            "48": 0.29092,
+            "49": 0.29024,
+            "50": 0.28916,
+            "51": 0.30865,
+            "52": 0.29957,
+            "53": 0.28833,
+            "54": 0.29375,
+            "55": 0.29176,
+            "56": 0.29338,
+            "57": 0.28952,
+            "58": 0.29232,
+            "59": 0.29026,
+            "60": 0.28767,
+            "61": 0.29364,
+            "62": 0.2935,
+            "63": 0.29522,
+            "64": 0.29495,
+            "65": 0.29509,
+            "66": 0.29643,
+            "67": 0.29584,
+            "68": 0.29853,
+            "69": 0.29821,
+            "70": 0.29334,
+            "71": 0.29579,
+            "72": 0.29325,
+            "73": 0.29403,
+            "74": 0.29671,
+            "75": 0.63106,
+            "76": 0.29142,
+            "77": 0.29491,
+            "78": 0.29437,
+            "79": 0.29239,
+            "80": 0.29453,
+            "81": 0.29509,
+            "82": 0.29493,
+            "83": 0.2915,
+            "84": 0.30181,
+            "85": 0.29305,
+            "86": 0.28823,
+            "87": 0.29337,
+            "88": 0.29025,
+            "89": 0.28953,
+            "90": 0.29694,
+            "91": 0.29077,
+            "92": 0.29411,
+            "93": 0.28767,
+            "94": 0.29313,
+            "95": 0.29276,
+            "96": 0.29197,
+            "97": 0.29466,
+            "98": 0.29321,
+            "99": 0.29311,
+            "100": 0.29175
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
new file mode 100644
index 00000000000..68686a287ae
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.86209,
+            "2": 10.85806,
+            "3": 10.8598,
+            "4": 10.84984,
+            "5": 10.88253,
+            "6": 10.88646,
+            "7": 10.8626,
+            "8": 10.86997,
+            "9": 10.86483,
+            "10": 10.83642,
+            "11": 10.87862,
+            "12": 10.87482,
+            "13": 10.87957,
+            "14": 10.88968,
+            "15": 10.82909,
+            "16": 10.8329,
+            "17": 10.79973,
+            "18": 10.82619,
+            "19": 10.81484,
+            "20": 10.73237,
+            "21": 10.72029,
+            "22": 10.57776,
+            "23": 10.73009,
+            "24": 10.61704,
+            "25": 10.56392,
+            "26": 10.6109,
+            "27": 10.6244,
+            "28": 10.58233,
+            "29": 10.59936,
+            "30": 10.38484,
+            "31": 10.14179,
+            "32": 10.48065,
+            "33": 10.47405,
+            "34": 10.23471,
+            "35": 10.28951,
+            "36": 10.23434,
+            "37": 10.35826,
+            "38": 10.20825,
+            "39": 10.41154,
+            "40": 10.09133,
+            "41": 10.1661,
+            "42": 10.21968,
+            "43": 9.85861,
+            "44": 9.97128,
+            "45": 9.83487,
+            "46": 9.84446,
+            "47": 10.15847,
+            "48": 9.85182,
+            "49": 9.53839,
+            "50": 9.91604,
+            "51": 9.85736,
+            "52": 9.75252,
+            "53": 10.0755,
+            "54": 9.96042,
+            "55": 9.88232,
+            "56": 9.63204,
+            "57": 9.49336,
+            "58": 9.83436,
+            "59": 9.59208,
+            "60": 9.51376,
+            "61": 9.69806,
+            "62": 9.99169,
+            "63": 9.37379,
+            "64": 9.77832,
+            "65": 8.95392,
+            "66": 9.71066,
+            "67": 9.38186,
+            "68": 9.78754,
+            "69": 9.7933,
+            "70": 9.73094,
+            "71": 9.61728,
+            "72": 9.58467,
+            "73": 9.4898,
+            "74": 8.94127,
+            "75": 9.4313,
+            "76": 9.09097,
+            "77": 10.06237,
+            "78": 9.72645,
+            "79": 9.37428,
+            "80": 9.40597,
+            "81": 9.47979,
+            "82": 9.69227,
+            "83": 9.3124,
+            "84": 9.41987,
+            "85": 9.61137,
+            "86": 9.06834,
+            "87": 9.59084,
+            "88": 9.74523,
+            "89": 9.6065,
+            "90": 9.81743,
+            "91": 9.34257,
+            "92": 9.35903,
+            "93": 9.07904,
+            "94": 8.82791,
+            "95": 9.51571,
+            "96": 9.52139,
+            "97": 9.31116,
+            "98": 9.67194,
+            "99": 8.88688,
+            "100": 9.40429
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1780.0,
+            "2": 1990.0,
+            "3": 1911.0,
+            "4": 1881.0,
+            "5": 2137.0,
+            "6": 2167.0,
+            "7": 2095.0,
+            "8": 1824.0,
+            "9": 2072.0,
+            "10": 1588.0,
+            "11": 2120.0,
+            "12": 2042.0,
+            "13": 2228.0,
+            "14": 2143.0,
+            "15": 2083.0,
+            "16": 1988.0,
+            "17": 2055.0,
+            "18": 1945.0,
+            "19": 2015.0,
+            "20": 1816.0,
+            "21": 2133.0,
+            "22": 1909.0,
+            "23": 2404.0,
+            "24": 1868.0,
+            "25": 1862.0,
+            "26": 1978.0,
+            "27": 2095.0,
+            "28": 2298.0,
+            "29": 2242.0,
+            "30": 2045.0,
+            "31": 1805.0,
+            "32": 2205.0,
+            "33": 2426.0,
+            "34": 2176.0,
+            "35": 2205.0,
+            "36": 2185.0,
+            "37": 2605.0,
+            "38": 2508.0,
+            "39": 2524.0,
+            "40": 2629.0,
+            "41": 2531.0,
+            "42": 2594.0,
+            "43": 2335.0,
+            "44": 2316.0,
+            "45": 2441.0,
+            "46": 2665.0,
+            "47": 2694.0,
+            "48": 2587.0,
+            "49": 2538.0,
+            "50": 2734.0,
+            "51": 2906.0,
+            "52": 2829.0,
+            "53": 3163.0,
+            "54": 3001.0,
+            "55": 2662.0,
+            "56": 2967.0,
+            "57": 2540.0,
+            "58": 3326.0,
+            "59": 3105.0,
+            "60": 2726.0,
+            "61": 3284.0,
+            "62": 2957.0,
+            "63": 2690.0,
+            "64": 3247.0,
+            "65": 3011.0,
+            "66": 3409.0,
+            "67": 2852.0,
+            "68": 3048.0,
+            "69": 3229.0,
+            "70": 3737.0,
+            "71": 3186.0,
+            "72": 2634.0,
+            "73": 3390.0,
+            "74": 2125.0,
+            "75": 2771.0,
+            "76": 3235.0,
+            "77": 3605.0,
+            "78": 3672.0,
+            "79": 3633.0,
+            "80": 3804.0,
+            "81": 4084.0,
+            "82": 3675.0,
+            "83": 3138.0,
+            "84": 3636.0,
+            "85": 3588.0,
+            "86": 3171.0,
+            "87": 4250.0,
+            "88": 3592.0,
+            "89": 3775.0,
+            "90": 3384.0,
+            "91": 3074.0,
+            "92": 3533.0,
+            "93": 3067.0,
+            "94": 3730.0,
+            "95": 3590.0,
+            "96": 3888.0,
+            "97": 3580.0,
+            "98": 4012.0,
+            "99": 3315.0,
+            "100": 3454.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 368387072.0,
+            "2": 368387072.0,
+            "3": 368387072.0,
+            "4": 368387072.0,
+            "5": 368387072.0,
+            "6": 368387072.0,
+            "7": 368387072.0,
+            "8": 368387072.0,
+            "9": 368387072.0,
+            "10": 368387072.0,
+            "11": 368387072.0,
+            "12": 368387072.0,
+            "13": 368387072.0,
+            "14": 368387072.0,
+            "15": 368387072.0,
+            "16": 368387072.0,
+            "17": 368387072.0,
+            "18": 368387072.0,
+            "19": 368387072.0,
+            "20": 368387072.0,
+            "21": 368387072.0,
+            "22": 368387072.0,
+            "23": 368387072.0,
+            "24": 368387072.0,
+            "25": 368387072.0,
+            "26": 368387072.0,
+            "27": 368387072.0,
+            "28": 368387072.0,
+            "29": 368387072.0,
+            "30": 368387072.0,
+            "31": 368387072.0,
+            "32": 368387072.0,
+            "33": 368387072.0,
+            "34": 368387072.0,
+            "35": 368387072.0,
+            "36": 368387072.0,
+            "37": 368387072.0,
+            "38": 368387072.0,
+            "39": 368387072.0,
+            "40": 368387072.0,
+            "41": 368387072.0,
+            "42": 368387072.0,
+            "43": 368387072.0,
+            "44": 368387072.0,
+            "45": 368387072.0,
+            "46": 368387072.0,
+            "47": 368387072.0,
+            "48": 368387072.0,
+            "49": 368387072.0,
+            "50": 368387072.0,
+            "51": 368387072.0,
+            "52": 368387072.0,
+            "53": 368387072.0,
+            "54": 368387072.0,
+            "55": 368387072.0,
+            "56": 368387072.0,
+            "57": 368387072.0,
+            "58": 368387072.0,
+            "59": 368387072.0,
+            "60": 368387072.0,
+            "61": 368387072.0,
+            "62": 368387072.0,
+            "63": 368387072.0,
+            "64": 368387072.0,
+            "65": 368387072.0,
+            "66": 368387072.0,
+            "67": 368387072.0,
+            "68": 368387072.0,
+            "69": 368387072.0,
+            "70": 368387072.0,
+            "71": 368387072.0,
+            "72": 368387072.0,
+            "73": 368387072.0,
+            "74": 368387072.0,
+            "75": 368387072.0,
+            "76": 368387072.0,
+            "77": 368387072.0,
+            "78": 368387072.0,
+            "79": 368387072.0,
+            "80": 368387072.0,
+            "81": 368387072.0,
+            "82": 368387072.0,
+            "83": 368387072.0,
+            "84": 368387072.0,
+            "85": 368387072.0,
+            "86": 368387072.0,
+            "87": 368387072.0,
+            "88": 368387072.0,
+            "89": 368387072.0,
+            "90": 368387072.0,
+            "91": 368387072.0,
+            "92": 368387072.0,
+            "93": 368387072.0,
+            "94": 368387072.0,
+            "95": 368387072.0,
+            "96": 368387072.0,
+            "97": 368387072.0,
+            "98": 368387072.0,
+            "99": 368387072.0,
+            "100": 368387072.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1510972416.0,
+            "2": 1643008000.0,
+            "3": 1643008000.0,
+            "4": 1643008000.0,
+            "5": 1643008000.0,
+            "6": 1645105152.0,
+            "7": 1645105152.0,
+            "8": 1645105152.0,
+            "9": 1645105152.0,
+            "10": 1645105152.0,
+            "11": 1647201280.0,
+            "12": 1647201280.0,
+            "13": 1647201280.0,
+            "14": 1647201280.0,
+            "15": 1647201280.0,
+            "16": 1647201280.0,
+            "17": 1647201280.0,
+            "18": 1647201280.0,
+            "19": 1647201280.0,
+            "20": 1647201280.0,
+            "21": 1647201280.0,
+            "22": 1647201280.0,
+            "23": 1647201280.0,
+            "24": 1647201280.0,
+            "25": 1647201280.0,
+            "26": 1647201280.0,
+            "27": 1647201280.0,
+            "28": 1647201280.0,
+            "29": 1647201280.0,
+            "30": 1647201280.0,
+            "31": 1647201280.0,
+            "32": 1647201280.0,
+            "33": 1647201280.0,
+            "34": 1647201280.0,
+            "35": 1647201280.0,
+            "36": 1647201280.0,
+            "37": 1647201280.0,
+            "38": 1649296896.0,
+            "39": 1649296896.0,
+            "40": 1649296896.0,
+            "41": 1649296896.0,
+            "42": 1649296896.0,
+            "43": 1649296896.0,
+            "44": 1649296896.0,
+            "45": 1649296896.0,
+            "46": 1649296896.0,
+            "47": 1649296896.0,
+            "48": 1649296896.0,
+            "49": 1649296896.0,
+            "50": 1649296896.0,
+            "51": 1649296896.0,
+            "52": 1649299456.0,
+            "53": 1649299456.0,
+            "54": 1649299456.0,
+            "55": 1649299456.0,
+            "56": 1649299456.0,
+            "57": 1649299456.0,
+            "58": 1649299456.0,
+            "59": 1649299456.0,
+            "60": 1649299456.0,
+            "61": 1649299456.0,
+            "62": 1649299456.0,
+            "63": 1649299456.0,
+            "64": 1649299456.0,
+            "65": 1649299456.0,
+            "66": 1649299456.0,
+            "67": 1649299456.0,
+            "68": 1649299456.0,
+            "69": 1649299456.0,
+            "70": 1649299456.0,
+            "71": 1649299456.0,
+            "72": 1649299456.0,
+            "73": 1649299456.0,
+            "74": 1649299456.0,
+            "75": 1649299456.0,
+            "76": 1649299456.0,
+            "77": 1649299456.0,
+            "78": 1649299456.0,
+            "79": 1649299456.0,
+            "80": 1649299456.0,
+            "81": 1649299456.0,
+            "82": 1649299456.0,
+            "83": 1649299456.0,
+            "84": 1649299456.0,
+            "85": 1649299456.0,
+            "86": 1649299456.0,
+            "87": 1649299456.0,
+            "88": 1649299456.0,
+            "89": 1649299456.0,
+            "90": 1649299456.0,
+            "91": 1649299456.0,
+            "92": 1649299456.0,
+            "93": 1649299456.0,
+            "94": 1649299456.0,
+            "95": 1649299456.0,
+            "96": 1649299456.0,
+            "97": 1649299456.0,
+            "98": 1649299456.0,
+            "99": 1649299456.0,
+            "100": 1649299456.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 8.6334,
+            "2": 0.58887,
+            "3": 0.44885,
+            "4": 0.45823,
+            "5": 0.4541,
+            "6": 0.47222,
+            "7": 1.10638,
+            "8": 0.43653,
+            "9": 0.44329,
+            "10": 0.44399,
+            "11": 0.44344,
+            "12": 0.44343,
+            "13": 0.44305,
+            "14": 0.44198,
+            "15": 0.43185,
+            "16": 0.44065,
+            "17": 0.4397,
+            "18": 0.43652,
+            "19": 0.44411,
+            "20": 0.43298,
+            "21": 0.43948,
+            "22": 0.43139,
+            "23": 0.44927,
+            "24": 0.42704,
+            "25": 0.42868,
+            "26": 0.64107,
+            "27": 0.43117,
+            "28": 0.43201,
+            "29": 0.42798,
+            "30": 0.43481,
+            "31": 0.5935,
+            "32": 0.43533,
+            "33": 0.42675,
+            "34": 0.44082,
+            "35": 0.42648,
+            "36": 0.43241,
+            "37": 0.42804,
+            "38": 0.42825,
+            "39": 0.43697,
+            "40": 0.42755,
+            "41": 0.43914,
+            "42": 0.42638,
+            "43": 0.43891,
+            "44": 0.42856,
+            "45": 0.42888,
+            "46": 0.44513,
+            "47": 0.4274,
+            "48": 0.43414,
+            "49": 0.65463,
+            "50": 0.43047,
+            "51": 0.43747,
+            "52": 0.44679,
+            "53": 0.4308,
+            "54": 0.43283,
+            "55": 0.44288,
+            "56": 0.43291,
+            "57": 0.44077,
+            "58": 0.43033,
+            "59": 0.43703,
+            "60": 0.43023,
+            "61": 0.43081,
+            "62": 0.4427,
+            "63": 0.43029,
+            "64": 0.44385,
+            "65": 0.43137,
+            "66": 0.44438,
+            "67": 0.43134,
+            "68": 0.43364,
+            "69": 0.43286,
+            "70": 0.43126,
+            "71": 0.4347,
+            "72": 0.42922,
+            "73": 0.44303,
+            "74": 0.43105,
+            "75": 0.43275,
+            "76": 0.43316,
+            "77": 0.43097,
+            "78": 0.43941,
+            "79": 0.42984,
+            "80": 0.43662,
+            "81": 0.43019,
+            "82": 0.44076,
+            "83": 0.42994,
+            "84": 0.4329,
+            "85": 0.44259,
+            "86": 0.43023,
+            "87": 0.43581,
+            "88": 0.42929,
+            "89": 0.43896,
+            "90": 0.4306,
+            "91": 0.43406,
+            "92": 0.43524,
+            "93": 0.43032,
+            "94": 0.44318,
+            "95": 0.42838,
+            "96": 0.44267,
+            "97": 0.43005,
+            "98": 0.43788,
+            "99": 0.43526,
+            "100": 0.43277
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
new file mode 100644
index 00000000000..48895a39167
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.86209,
+            "2": 10.85806,
+            "3": 10.8598,
+            "4": 10.84984,
+            "5": 10.88253,
+            "6": 10.88646,
+            "7": 10.8626,
+            "8": 10.86997,
+            "9": 10.86483,
+            "10": 10.83642,
+            "11": 10.87862,
+            "12": 10.87482,
+            "13": 10.87957,
+            "14": 10.88968,
+            "15": 10.82909,
+            "16": 10.8329,
+            "17": 10.79973,
+            "18": 10.82619,
+            "19": 10.81484,
+            "20": 10.73237,
+            "21": 10.72029,
+            "22": 10.57776,
+            "23": 10.73009,
+            "24": 10.61704,
+            "25": 10.56392,
+            "26": 10.6109,
+            "27": 10.6244,
+            "28": 10.58233,
+            "29": 10.59936,
+            "30": 10.38484,
+            "31": 10.14179,
+            "32": 10.48065,
+            "33": 10.47405,
+            "34": 10.23471,
+            "35": 10.28951,
+            "36": 10.23434,
+            "37": 10.35826,
+            "38": 10.20825,
+            "39": 10.41154,
+            "40": 10.09133,
+            "41": 10.1661,
+            "42": 10.21968,
+            "43": 9.85861,
+            "44": 9.97128,
+            "45": 9.83487,
+            "46": 9.84446,
+            "47": 10.15847,
+            "48": 9.85182,
+            "49": 9.53839,
+            "50": 9.91604,
+            "51": 9.85736,
+            "52": 9.75252,
+            "53": 10.0755,
+            "54": 9.96042,
+            "55": 9.88232,
+            "56": 9.63204,
+            "57": 9.49336,
+            "58": 9.83436,
+            "59": 9.59208,
+            "60": 9.51376,
+            "61": 9.69806,
+            "62": 9.99169,
+            "63": 9.37379,
+            "64": 9.77832,
+            "65": 8.95392,
+            "66": 9.71066,
+            "67": 9.38186,
+            "68": 9.78754,
+            "69": 9.7933,
+            "70": 9.73094,
+            "71": 9.61728,
+            "72": 9.58467,
+            "73": 9.4898,
+            "74": 8.94127,
+            "75": 9.4313,
+            "76": 9.09097,
+            "77": 10.06237,
+            "78": 9.72645,
+            "79": 9.37428,
+            "80": 9.40597,
+            "81": 9.47979,
+            "82": 9.69227,
+            "83": 9.3124,
+            "84": 9.41987,
+            "85": 9.61137,
+            "86": 9.06834,
+            "87": 9.59084,
+            "88": 9.74523,
+            "89": 9.6065,
+            "90": 9.81743,
+            "91": 9.34257,
+            "92": 9.35903,
+            "93": 9.07904,
+            "94": 8.82791,
+            "95": 9.51571,
+            "96": 9.52139,
+            "97": 9.31116,
+            "98": 9.67194,
+            "99": 8.88688,
+            "100": 9.40429
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1780.0,
+            "2": 1990.0,
+            "3": 1911.0,
+            "4": 1881.0,
+            "5": 2137.0,
+            "6": 2167.0,
+            "7": 2095.0,
+            "8": 1824.0,
+            "9": 2072.0,
+            "10": 1588.0,
+            "11": 2120.0,
+            "12": 2042.0,
+            "13": 2228.0,
+            "14": 2143.0,
+            "15": 2083.0,
+            "16": 1988.0,
+            "17": 2055.0,
+            "18": 1945.0,
+            "19": 2015.0,
+            "20": 1816.0,
+            "21": 2133.0,
+            "22": 1909.0,
+            "23": 2404.0,
+            "24": 1868.0,
+            "25": 1862.0,
+            "26": 1978.0,
+            "27": 2095.0,
+            "28": 2298.0,
+            "29": 2242.0,
+            "30": 2045.0,
+            "31": 1805.0,
+            "32": 2205.0,
+            "33": 2426.0,
+            "34": 2176.0,
+            "35": 2205.0,
+            "36": 2185.0,
+            "37": 2605.0,
+            "38": 2508.0,
+            "39": 2524.0,
+            "40": 2629.0,
+            "41": 2531.0,
+            "42": 2594.0,
+            "43": 2335.0,
+            "44": 2316.0,
+            "45": 2441.0,
+            "46": 2665.0,
+            "47": 2694.0,
+            "48": 2587.0,
+            "49": 2538.0,
+            "50": 2734.0,
+            "51": 2906.0,
+            "52": 2829.0,
+            "53": 3163.0,
+            "54": 3001.0,
+            "55": 2662.0,
+            "56": 2967.0,
+            "57": 2540.0,
+            "58": 3326.0,
+            "59": 3105.0,
+            "60": 2726.0,
+            "61": 3284.0,
+            "62": 2957.0,
+            "63": 2690.0,
+            "64": 3247.0,
+            "65": 3011.0,
+            "66": 3409.0,
+            "67": 2852.0,
+            "68": 3048.0,
+            "69": 3229.0,
+            "70": 3737.0,
+            "71": 3186.0,
+            "72": 2634.0,
+            "73": 3390.0,
+            "74": 2125.0,
+            "75": 2771.0,
+            "76": 3235.0,
+            "77": 3605.0,
+            "78": 3672.0,
+            "79": 3633.0,
+            "80": 3804.0,
+            "81": 4084.0,
+            "82": 3675.0,
+            "83": 3138.0,
+            "84": 3636.0,
+            "85": 3588.0,
+            "86": 3171.0,
+            "87": 4250.0,
+            "88": 3592.0,
+            "89": 3775.0,
+            "90": 3384.0,
+            "91": 3074.0,
+            "92": 3533.0,
+            "93": 3067.0,
+            "94": 3730.0,
+            "95": 3590.0,
+            "96": 3888.0,
+            "97": 3580.0,
+            "98": 4012.0,
+            "99": 3315.0,
+            "100": 3454.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 368387072.0,
+            "2": 368387072.0,
+            "3": 368387072.0,
+            "4": 368387072.0,
+            "5": 368387072.0,
+            "6": 368387072.0,
+            "7": 368387072.0,
+            "8": 368387072.0,
+            "9": 368387072.0,
+            "10": 368387072.0,
+            "11": 368387072.0,
+            "12": 368387072.0,
+            "13": 368387072.0,
+            "14": 368387072.0,
+            "15": 368387072.0,
+            "16": 368387072.0,
+            "17": 368387072.0,
+            "18": 368387072.0,
+            "19": 368387072.0,
+            "20": 368387072.0,
+            "21": 368387072.0,
+            "22": 368387072.0,
+            "23": 368387072.0,
+            "24": 368387072.0,
+            "25": 368387072.0,
+            "26": 368387072.0,
+            "27": 368387072.0,
+            "28": 368387072.0,
+            "29": 368387072.0,
+            "30": 368387072.0,
+            "31": 368387072.0,
+            "32": 368387072.0,
+            "33": 368387072.0,
+            "34": 368387072.0,
+            "35": 368387072.0,
+            "36": 368387072.0,
+            "37": 368387072.0,
+            "38": 368387072.0,
+            "39": 368387072.0,
+            "40": 368387072.0,
+            "41": 368387072.0,
+            "42": 368387072.0,
+            "43": 368387072.0,
+            "44": 368387072.0,
+            "45": 368387072.0,
+            "46": 368387072.0,
+            "47": 368387072.0,
+            "48": 368387072.0,
+            "49": 368387072.0,
+            "50": 368387072.0,
+            "51": 368387072.0,
+            "52": 368387072.0,
+            "53": 368387072.0,
+            "54": 368387072.0,
+            "55": 368387072.0,
+            "56": 368387072.0,
+            "57": 368387072.0,
+            "58": 368387072.0,
+            "59": 368387072.0,
+            "60": 368387072.0,
+            "61": 368387072.0,
+            "62": 368387072.0,
+            "63": 368387072.0,
+            "64": 368387072.0,
+            "65": 368387072.0,
+            "66": 368387072.0,
+            "67": 368387072.0,
+            "68": 368387072.0,
+            "69": 368387072.0,
+            "70": 368387072.0,
+            "71": 368387072.0,
+            "72": 368387072.0,
+            "73": 368387072.0,
+            "74": 368387072.0,
+            "75": 368387072.0,
+            "76": 368387072.0,
+            "77": 368387072.0,
+            "78": 368387072.0,
+            "79": 368387072.0,
+            "80": 368387072.0,
+            "81": 368387072.0,
+            "82": 368387072.0,
+            "83": 368387072.0,
+            "84": 368387072.0,
+            "85": 368387072.0,
+            "86": 368387072.0,
+            "87": 368387072.0,
+            "88": 368387072.0,
+            "89": 368387072.0,
+            "90": 368387072.0,
+            "91": 368387072.0,
+            "92": 368387072.0,
+            "93": 368387072.0,
+            "94": 368387072.0,
+            "95": 368387072.0,
+            "96": 368387072.0,
+            "97": 368387072.0,
+            "98": 368387072.0,
+            "99": 368387072.0,
+            "100": 368387072.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1510972416.0,
+            "2": 1645105152.0,
+            "3": 1645105152.0,
+            "4": 1645105152.0,
+            "5": 1645105152.0,
+            "6": 1645105152.0,
+            "7": 1645105152.0,
+            "8": 1645105152.0,
+            "9": 1645105152.0,
+            "10": 1645105152.0,
+            "11": 1645105152.0,
+            "12": 1645105152.0,
+            "13": 1645105152.0,
+            "14": 1645105152.0,
+            "15": 1645105152.0,
+            "16": 1645105152.0,
+            "17": 1645105152.0,
+            "18": 1645105152.0,
+            "19": 1645105152.0,
+            "20": 1645105152.0,
+            "21": 1645105152.0,
+            "22": 1645105152.0,
+            "23": 1645105152.0,
+            "24": 1645105152.0,
+            "25": 1645105152.0,
+            "26": 1645105152.0,
+            "27": 1645105152.0,
+            "28": 1645105152.0,
+            "29": 1645105152.0,
+            "30": 1645105152.0,
+            "31": 1645105152.0,
+            "32": 1645105152.0,
+            "33": 1645105152.0,
+            "34": 1645105152.0,
+            "35": 1645105152.0,
+            "36": 1645105152.0,
+            "37": 1645105152.0,
+            "38": 1645105152.0,
+            "39": 1645105152.0,
+            "40": 1645105152.0,
+            "41": 1645105152.0,
+            "42": 1645105152.0,
+            "43": 1645105152.0,
+            "44": 1645105152.0,
+            "45": 1645105152.0,
+            "46": 1645105152.0,
+            "47": 1645105152.0,
+            "48": 1645105152.0,
+            "49": 1645105152.0,
+            "50": 1645105152.0,
+            "51": 1645105152.0,
+            "52": 1645105152.0,
+            "53": 1645105152.0,
+            "54": 1645105152.0,
+            "55": 1645105152.0,
+            "56": 1645105152.0,
+            "57": 1645105152.0,
+            "58": 1645105152.0,
+            "59": 1645105152.0,
+            "60": 1645105152.0,
+            "61": 1645105152.0,
+            "62": 1645105152.0,
+            "63": 1645105152.0,
+            "64": 1645105152.0,
+            "65": 1645105152.0,
+            "66": 1645105152.0,
+            "67": 1645105152.0,
+            "68": 1645105152.0,
+            "69": 1645105152.0,
+            "70": 1645105152.0,
+            "71": 1645105152.0,
+            "72": 1645105152.0,
+            "73": 1645105152.0,
+            "74": 1645105152.0,
+            "75": 1645105152.0,
+            "76": 1645105152.0,
+            "77": 1645105152.0,
+            "78": 1645105152.0,
+            "79": 1645105152.0,
+            "80": 1645105152.0,
+            "81": 1645105152.0,
+            "82": 1645105152.0,
+            "83": 1645105152.0,
+            "84": 1645105152.0,
+            "85": 1645105152.0,
+            "86": 1645105152.0,
+            "87": 1645105152.0,
+            "88": 1645105152.0,
+            "89": 1645105152.0,
+            "90": 1645105152.0,
+            "91": 1645105152.0,
+            "92": 1645105152.0,
+            "93": 1645105152.0,
+            "94": 1645105152.0,
+            "95": 1645105152.0,
+            "96": 1645105152.0,
+            "97": 1645105152.0,
+            "98": 1645105152.0,
+            "99": 1645105152.0,
+            "100": 1645105152.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 8.66493,
+            "2": 0.8291,
+            "3": 0.43315,
+            "4": 0.42959,
+            "5": 0.43827,
+            "6": 0.4295,
+            "7": 0.62136,
+            "8": 0.42601,
+            "9": 0.43172,
+            "10": 0.42845,
+            "11": 0.42549,
+            "12": 0.43168,
+            "13": 0.42375,
+            "14": 0.43487,
+            "15": 0.423,
+            "16": 0.43317,
+            "17": 0.42357,
+            "18": 0.42563,
+            "19": 0.42895,
+            "20": 0.42417,
+            "21": 0.43668,
+            "22": 0.42565,
+            "23": 0.43595,
+            "24": 0.42585,
+            "25": 0.42377,
+            "26": 0.4332,
+            "27": 0.4241,
+            "28": 0.43439,
+            "29": 0.42272,
+            "30": 0.4344,
+            "31": 0.42586,
+            "32": 0.42451,
+            "33": 0.43418,
+            "34": 0.42702,
+            "35": 0.64991,
+            "36": 0.42577,
+            "37": 0.42879,
+            "38": 0.42484,
+            "39": 0.66025,
+            "40": 0.42623,
+            "41": 0.42852,
+            "42": 0.42402,
+            "43": 0.42999,
+            "44": 0.42936,
+            "45": 0.42525,
+            "46": 0.43377,
+            "47": 0.42553,
+            "48": 0.42913,
+            "49": 0.42482,
+            "50": 0.42788,
+            "51": 0.44478,
+            "52": 0.4318,
+            "53": 0.42325,
+            "54": 0.44021,
+            "55": 0.42487,
+            "56": 0.43393,
+            "57": 0.42758,
+            "58": 0.43308,
+            "59": 0.42523,
+            "60": 0.42483,
+            "61": 0.43409,
+            "62": 0.42537,
+            "63": 0.43014,
+            "64": 0.42235,
+            "65": 0.42951,
+            "66": 0.43017,
+            "67": 0.42364,
+            "68": 0.4377,
+            "69": 0.42513,
+            "70": 0.4337,
+            "71": 0.42291,
+            "72": 0.42699,
+            "73": 0.43249,
+            "74": 0.42472,
+            "75": 0.4344,
+            "76": 0.4261,
+            "77": 0.43235,
+            "78": 0.42569,
+            "79": 0.42813,
+            "80": 0.43557,
+            "81": 0.42479,
+            "82": 0.43423,
+            "83": 0.42304,
+            "84": 0.43758,
+            "85": 0.42397,
+            "86": 0.42467,
+            "87": 0.43641,
+            "88": 0.42214,
+            "89": 0.42765,
+            "90": 0.42554,
+            "91": 0.44244,
+            "92": 0.42237,
+            "93": 0.42384,
+            "94": 0.44073,
+            "95": 0.42184,
+            "96": 0.43075,
+            "97": 0.42217,
+            "98": 0.44245,
+            "99": 0.42259,
+            "100": 0.42671
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
index 76960796d04..2dfc5d0f6ae 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
@@ -2,141 +2,536 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 10.90433,
+            "2": 10.90931,
+            "3": 10.90937,
+            "4": 10.90764,
             "5": 10.90709,
+            "6": 10.91174,
+            "7": 10.91413,
+            "8": 10.89808,
+            "9": 10.91252,
             "10": 10.87838,
+            "11": 10.90538,
+            "12": 10.89588,
+            "13": 10.91234,
+            "14": 10.90596,
             "15": 10.86278,
+            "16": 10.85987,
+            "17": 10.84211,
+            "18": 10.83508,
+            "19": 10.84021,
             "20": 10.74667,
+            "21": 10.72431,
+            "22": 10.6337,
+            "23": 10.74257,
+            "24": 10.63399,
             "25": 10.60185,
+            "26": 10.64659,
+            "27": 10.64193,
+            "28": 10.58695,
+            "29": 10.59421,
             "30": 10.394,
+            "31": 10.17174,
+            "32": 10.48573,
+            "33": 10.48042,
+            "34": 10.25002,
             "35": 10.29811,
+            "36": 10.25221,
+            "37": 10.36635,
+            "38": 10.22258,
+            "39": 10.42495,
             "40": 10.111,
+            "41": 10.17165,
+            "42": 10.22384,
+            "43": 9.86674,
+            "44": 9.99019,
             "45": 9.8622,
+            "46": 9.84813,
+            "47": 10.16079,
+            "48": 9.87303,
+            "49": 9.55987,
             "50": 9.92159,
+            "51": 9.8695,
+            "52": 9.76154,
+            "53": 10.08349,
+            "54": 9.97449,
             "55": 9.89437,
+            "56": 9.6424,
+            "57": 9.50352,
+            "58": 9.84153,
+            "59": 9.60017,
             "60": 9.51715,
+            "61": 9.70458,
+            "62": 9.98292,
+            "63": 9.39067,
+            "64": 9.7797,
             "65": 8.96053,
+            "66": 9.70288,
+            "67": 9.3734,
+            "68": 9.78805,
+            "69": 9.79828,
             "70": 9.74999,
+            "71": 9.62682,
+            "72": 9.59043,
+            "73": 9.49893,
+            "74": 8.94842,
             "75": 9.42922,
+            "76": 9.08268,
+            "77": 10.07413,
+            "78": 9.73322,
+            "79": 9.38352,
             "80": 9.40713,
+            "81": 9.48366,
+            "82": 9.70577,
+            "83": 9.3103,
+            "84": 9.41846,
             "85": 9.62053,
+            "86": 9.08533,
+            "87": 9.59962,
+            "88": 9.75141,
+            "89": 9.60594,
             "90": 9.8245,
+            "91": 9.33973,
+            "92": 9.36344,
+            "93": 9.08397,
+            "94": 8.83571,
             "95": 9.51936,
+            "96": 9.53001,
+            "97": 9.31995,
+            "98": 9.67709,
+            "99": 8.88909,
             "100": 9.40491
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 1595.0,
+            "2": 1632.0,
+            "3": 1539.0,
+            "4": 1702.0,
             "5": 1827.0,
+            "6": 1718.0,
+            "7": 1810.0,
+            "8": 1634.0,
+            "9": 2007.0,
             "10": 1457.0,
+            "11": 1906.0,
+            "12": 1737.0,
+            "13": 1917.0,
+            "14": 1828.0,
             "15": 1866.0,
+            "16": 1826.0,
+            "17": 1762.0,
+            "18": 1761.0,
+            "19": 1803.0,
             "20": 1803.0,
+            "21": 1996.0,
+            "22": 1691.0,
+            "23": 2060.0,
+            "24": 1622.0,
             "25": 1595.0,
+            "26": 1608.0,
+            "27": 1890.0,
+            "28": 1913.0,
+            "29": 1987.0,
             "30": 1808.0,
+            "31": 1549.0,
+            "32": 1838.0,
+            "33": 2073.0,
+            "34": 1859.0,
             "35": 1870.0,
+            "36": 1870.0,
+            "37": 2300.0,
+            "38": 2186.0,
+            "39": 2368.0,
             "40": 2097.0,
+            "41": 2325.0,
+            "42": 2227.0,
+            "43": 2036.0,
+            "44": 2098.0,
             "45": 2055.0,
+            "46": 2146.0,
+            "47": 2453.0,
+            "48": 2273.0,
+            "49": 2244.0,
             "50": 2252.0,
+            "51": 2484.0,
+            "52": 2568.0,
+            "53": 2834.0,
+            "54": 2607.0,
             "55": 2149.0,
+            "56": 2683.0,
+            "57": 2283.0,
+            "58": 2764.0,
+            "59": 2623.0,
             "60": 2456.0,
+            "61": 2938.0,
+            "62": 2456.0,
+            "63": 2279.0,
+            "64": 3078.0,
             "65": 2504.0,
+            "66": 2881.0,
+            "67": 2683.0,
+            "68": 2657.0,
+            "69": 2832.0,
             "70": 3144.0,
+            "71": 2930.0,
+            "72": 2328.0,
+            "73": 2984.0,
+            "74": 1752.0,
             "75": 2451.0,
+            "76": 3040.0,
+            "77": 3213.0,
+            "78": 2936.0,
+            "79": 2941.0,
             "80": 3112.0,
+            "81": 3568.0,
+            "82": 3105.0,
+            "83": 2725.0,
+            "84": 3051.0,
             "85": 3170.0,
+            "86": 2645.0,
+            "87": 3586.0,
+            "88": 2902.0,
+            "89": 3371.0,
             "90": 2971.0,
+            "91": 2800.0,
+            "92": 3017.0,
+            "93": 2524.0,
+            "94": 3384.0,
             "95": 3147.0,
+            "96": 3388.0,
+            "97": 3031.0,
+            "98": 3619.0,
+            "99": 3004.0,
             "100": 3100.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 312352256.0,
+            "2": 312352256.0,
+            "3": 312352256.0,
+            "4": 312352256.0,
             "5": 312352256.0,
+            "6": 312352256.0,
+            "7": 312352256.0,
+            "8": 312352256.0,
+            "9": 312352256.0,
             "10": 312352256.0,
+            "11": 312352256.0,
+            "12": 312352256.0,
+            "13": 312352256.0,
+            "14": 312352256.0,
             "15": 312352256.0,
+            "16": 312352256.0,
+            "17": 312352256.0,
+            "18": 312352256.0,
+            "19": 312352256.0,
             "20": 312352256.0,
+            "21": 312352256.0,
+            "22": 312352256.0,
+            "23": 312352256.0,
+            "24": 312352256.0,
             "25": 312352256.0,
+            "26": 312352256.0,
+            "27": 312352256.0,
+            "28": 312352256.0,
+            "29": 312352256.0,
             "30": 312352256.0,
+            "31": 312352256.0,
+            "32": 312352256.0,
+            "33": 312352256.0,
+            "34": 312352256.0,
             "35": 312352256.0,
+            "36": 312352256.0,
+            "37": 312352256.0,
+            "38": 312352256.0,
+            "39": 312352256.0,
             "40": 312352256.0,
+            "41": 312352256.0,
+            "42": 312352256.0,
+            "43": 312352256.0,
+            "44": 312352256.0,
             "45": 312352256.0,
+            "46": 312352256.0,
+            "47": 312352256.0,
+            "48": 312352256.0,
+            "49": 312352256.0,
             "50": 312352256.0,
+            "51": 312352256.0,
+            "52": 312352256.0,
+            "53": 312352256.0,
+            "54": 312352256.0,
             "55": 312352256.0,
+            "56": 312352256.0,
+            "57": 312352256.0,
+            "58": 312352256.0,
+            "59": 312352256.0,
             "60": 312352256.0,
+            "61": 312352256.0,
+            "62": 312352256.0,
+            "63": 312352256.0,
+            "64": 312352256.0,
             "65": 312352256.0,
+            "66": 312352256.0,
+            "67": 312352256.0,
+            "68": 312352256.0,
+            "69": 312352256.0,
             "70": 312352256.0,
+            "71": 312352256.0,
+            "72": 312352256.0,
+            "73": 312352256.0,
+            "74": 312352256.0,
             "75": 312352256.0,
+            "76": 312352256.0,
+            "77": 312352256.0,
+            "78": 312352256.0,
+            "79": 312352256.0,
             "80": 312352256.0,
+            "81": 312352256.0,
+            "82": 312352256.0,
+            "83": 312352256.0,
+            "84": 312352256.0,
             "85": 312352256.0,
+            "86": 312352256.0,
+            "87": 312352256.0,
+            "88": 312352256.0,
+            "89": 312352256.0,
             "90": 312352256.0,
+            "91": 312352256.0,
+            "92": 312352256.0,
+            "93": 312352256.0,
+            "94": 312352256.0,
             "95": 312352256.0,
+            "96": 312352256.0,
+            "97": 312352256.0,
+            "98": 312352256.0,
+            "99": 312352256.0,
             "100": 312352256.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 754434560.0,
+            "2": 843763200.0,
+            "3": 843763200.0,
+            "4": 843763200.0,
             "5": 843763200.0,
+            "6": 843763200.0,
+            "7": 843763200.0,
+            "8": 843763200.0,
+            "9": 843763200.0,
             "10": 843763200.0,
+            "11": 843763200.0,
+            "12": 843763200.0,
+            "13": 843763200.0,
+            "14": 843763200.0,
             "15": 843763200.0,
+            "16": 843763200.0,
+            "17": 843763200.0,
+            "18": 843763200.0,
+            "19": 843763200.0,
             "20": 843763200.0,
+            "21": 843763200.0,
+            "22": 843763200.0,
+            "23": 843763200.0,
+            "24": 843763200.0,
             "25": 843763200.0,
+            "26": 843763200.0,
+            "27": 843763200.0,
+            "28": 843763200.0,
+            "29": 843763200.0,
             "30": 843763200.0,
+            "31": 843763200.0,
+            "32": 843763200.0,
+            "33": 843763200.0,
+            "34": 843763200.0,
             "35": 843763200.0,
+            "36": 843763200.0,
+            "37": 843763200.0,
+            "38": 843763200.0,
+            "39": 843763200.0,
             "40": 843763200.0,
+            "41": 843763200.0,
+            "42": 843763200.0,
+            "43": 843763200.0,
+            "44": 843763200.0,
             "45": 843763200.0,
+            "46": 843763200.0,
+            "47": 843763200.0,
+            "48": 843763200.0,
+            "49": 843763200.0,
             "50": 843763200.0,
+            "51": 843763200.0,
+            "52": 843763200.0,
+            "53": 843763200.0,
+            "54": 843763200.0,
             "55": 843763200.0,
+            "56": 843763200.0,
+            "57": 843763200.0,
+            "58": 843763200.0,
+            "59": 843763200.0,
             "60": 843763200.0,
+            "61": 843763200.0,
+            "62": 843763200.0,
+            "63": 843763200.0,
+            "64": 843763200.0,
             "65": 843763200.0,
+            "66": 843763200.0,
+            "67": 843763200.0,
+            "68": 843763200.0,
+            "69": 843763200.0,
             "70": 843763200.0,
+            "71": 843763200.0,
+            "72": 843763200.0,
+            "73": 843763200.0,
+            "74": 843763200.0,
             "75": 843763200.0,
+            "76": 843763200.0,
+            "77": 843763200.0,
+            "78": 843763200.0,
+            "79": 843763200.0,
             "80": 843763200.0,
+            "81": 843763200.0,
+            "82": 843763200.0,
+            "83": 843763200.0,
+            "84": 843763200.0,
             "85": 843763200.0,
+            "86": 843763200.0,
+            "87": 843763200.0,
+            "88": 843763200.0,
+            "89": 843763200.0,
             "90": 843763200.0,
+            "91": 843763200.0,
+            "92": 843763200.0,
+            "93": 843763200.0,
+            "94": 843763200.0,
             "95": 843763200.0,
+            "96": 843763200.0,
+            "97": 843763200.0,
+            "98": 843763200.0,
+            "99": 843763200.0,
             "100": 843763200.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 11.81829,
-            "5": 0.2055,
-            "10": 0.20555,
-            "15": 0.20599,
-            "20": 0.2077,
-            "25": 0.20625,
-            "30": 0.20513,
-            "35": 0.21379,
-            "40": 0.19974,
-            "45": 0.20183,
-            "50": 0.1983,
-            "55": 0.20325,
-            "60": 0.19919,
-            "65": 0.19434,
-            "70": 0.19633,
-            "75": 0.19415,
-            "80": 0.19631,
-            "85": 0.19412,
-            "90": 0.20079,
-            "95": 0.20108,
-            "100": 0.20109
+            "1": 13.88965,
+            "2": 0.27451,
+            "3": 0.24975,
+            "4": 0.25072,
+            "5": 0.2432,
+            "6": 0.24332,
+            "7": 0.23789,
+            "8": 0.23936,
+            "9": 0.23192,
+            "10": 0.22503,
+            "11": 0.22584,
+            "12": 0.22831,
+            "13": 0.22937,
+            "14": 0.22514,
+            "15": 0.22707,
+            "16": 0.22601,
+            "17": 0.22754,
+            "18": 0.22863,
+            "19": 0.22776,
+            "20": 0.2264,
+            "21": 0.22812,
+            "22": 0.23837,
+            "23": 0.25872,
+            "24": 0.23186,
+            "25": 0.22533,
+            "26": 0.22641,
+            "27": 0.22648,
+            "28": 0.22569,
+            "29": 0.22721,
+            "30": 0.22446,
+            "31": 0.2299,
+            "32": 0.22776,
+            "33": 0.22874,
+            "34": 0.22685,
+            "35": 0.22809,
+            "36": 0.23141,
+            "37": 0.22676,
+            "38": 0.22629,
+            "39": 0.22929,
+            "40": 0.23118,
+            "41": 0.22744,
+            "42": 0.22706,
+            "43": 0.23097,
+            "44": 0.22844,
+            "45": 0.22948,
+            "46": 0.22632,
+            "47": 0.22989,
+            "48": 0.22849,
+            "49": 0.23116,
+            "50": 0.23165,
+            "51": 0.25535,
+            "52": 0.27151,
+            "53": 0.23628,
+            "54": 0.23553,
+            "55": 0.23112,
+            "56": 0.23386,
+            "57": 0.2314,
+            "58": 0.23297,
+            "59": 0.22916,
+            "60": 0.22848,
+            "61": 0.23048,
+            "62": 0.22881,
+            "63": 0.23036,
+            "64": 0.2284,
+            "65": 0.23027,
+            "66": 0.22734,
+            "67": 0.23011,
+            "68": 0.22993,
+            "69": 0.22771,
+            "70": 0.23247,
+            "71": 0.22785,
+            "72": 0.22934,
+            "73": 0.22755,
+            "74": 0.22901,
+            "75": 0.22825,
+            "76": 0.22722,
+            "77": 0.22986,
+            "78": 0.22763,
+            "79": 0.22994,
+            "80": 0.22933,
+            "81": 0.2282,
+            "82": 0.22957,
+            "83": 0.22817,
+            "84": 0.22948,
+            "85": 0.2273,
+            "86": 0.22834,
+            "87": 0.23316,
+            "88": 0.22928,
+            "89": 0.22663,
+            "90": 0.23145,
+            "91": 0.22771,
+            "92": 0.22915,
+            "93": 0.22882,
+            "94": 0.22769,
+            "95": 0.22918,
+            "96": 0.23296,
+            "97": 0.22901,
+            "98": 0.23028,
+            "99": 0.23035,
+            "100": 0.23349
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..ff73ed22db1
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.90433,
+            "2": 10.90931,
+            "3": 10.90937,
+            "4": 10.90764,
+            "5": 10.90709,
+            "6": 10.91174,
+            "7": 10.91413,
+            "8": 10.89808,
+            "9": 10.91252,
+            "10": 10.87838,
+            "11": 10.90538,
+            "12": 10.89588,
+            "13": 10.91234,
+            "14": 10.90596,
+            "15": 10.86278,
+            "16": 10.85987,
+            "17": 10.84211,
+            "18": 10.83508,
+            "19": 10.84021,
+            "20": 10.74667,
+            "21": 10.72431,
+            "22": 10.6337,
+            "23": 10.74257,
+            "24": 10.63399,
+            "25": 10.60185,
+            "26": 10.64659,
+            "27": 10.64193,
+            "28": 10.58695,
+            "29": 10.59421,
+            "30": 10.394,
+            "31": 10.17174,
+            "32": 10.48573,
+            "33": 10.48042,
+            "34": 10.25002,
+            "35": 10.29811,
+            "36": 10.25221,
+            "37": 10.36635,
+            "38": 10.22258,
+            "39": 10.42495,
+            "40": 10.111,
+            "41": 10.17165,
+            "42": 10.22384,
+            "43": 9.86674,
+            "44": 9.99019,
+            "45": 9.8622,
+            "46": 9.84813,
+            "47": 10.16079,
+            "48": 9.87303,
+            "49": 9.55987,
+            "50": 9.92159,
+            "51": 9.8695,
+            "52": 9.76154,
+            "53": 10.08349,
+            "54": 9.97449,
+            "55": 9.89437,
+            "56": 9.6424,
+            "57": 9.50352,
+            "58": 9.84153,
+            "59": 9.60017,
+            "60": 9.51715,
+            "61": 9.70458,
+            "62": 9.98292,
+            "63": 9.39067,
+            "64": 9.7797,
+            "65": 8.96053,
+            "66": 9.70288,
+            "67": 9.3734,
+            "68": 9.78805,
+            "69": 9.79828,
+            "70": 9.74999,
+            "71": 9.62682,
+            "72": 9.59043,
+            "73": 9.49893,
+            "74": 8.94842,
+            "75": 9.42922,
+            "76": 9.08268,
+            "77": 10.07413,
+            "78": 9.73322,
+            "79": 9.38352,
+            "80": 9.40713,
+            "81": 9.48366,
+            "82": 9.70577,
+            "83": 9.3103,
+            "84": 9.41846,
+            "85": 9.62053,
+            "86": 9.08533,
+            "87": 9.59962,
+            "88": 9.75141,
+            "89": 9.60594,
+            "90": 9.8245,
+            "91": 9.33973,
+            "92": 9.36344,
+            "93": 9.08397,
+            "94": 8.83571,
+            "95": 9.51936,
+            "96": 9.53001,
+            "97": 9.31995,
+            "98": 9.67709,
+            "99": 8.88909,
+            "100": 9.40491
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1595.0,
+            "2": 1632.0,
+            "3": 1539.0,
+            "4": 1702.0,
+            "5": 1827.0,
+            "6": 1718.0,
+            "7": 1810.0,
+            "8": 1634.0,
+            "9": 2007.0,
+            "10": 1457.0,
+            "11": 1906.0,
+            "12": 1737.0,
+            "13": 1917.0,
+            "14": 1828.0,
+            "15": 1866.0,
+            "16": 1826.0,
+            "17": 1762.0,
+            "18": 1761.0,
+            "19": 1803.0,
+            "20": 1803.0,
+            "21": 1996.0,
+            "22": 1691.0,
+            "23": 2060.0,
+            "24": 1622.0,
+            "25": 1595.0,
+            "26": 1608.0,
+            "27": 1890.0,
+            "28": 1913.0,
+            "29": 1987.0,
+            "30": 1808.0,
+            "31": 1549.0,
+            "32": 1838.0,
+            "33": 2073.0,
+            "34": 1859.0,
+            "35": 1870.0,
+            "36": 1870.0,
+            "37": 2300.0,
+            "38": 2186.0,
+            "39": 2368.0,
+            "40": 2097.0,
+            "41": 2325.0,
+            "42": 2227.0,
+            "43": 2036.0,
+            "44": 2098.0,
+            "45": 2055.0,
+            "46": 2146.0,
+            "47": 2453.0,
+            "48": 2273.0,
+            "49": 2244.0,
+            "50": 2252.0,
+            "51": 2484.0,
+            "52": 2568.0,
+            "53": 2834.0,
+            "54": 2607.0,
+            "55": 2149.0,
+            "56": 2683.0,
+            "57": 2283.0,
+            "58": 2764.0,
+            "59": 2623.0,
+            "60": 2456.0,
+            "61": 2938.0,
+            "62": 2456.0,
+            "63": 2279.0,
+            "64": 3078.0,
+            "65": 2504.0,
+            "66": 2881.0,
+            "67": 2683.0,
+            "68": 2657.0,
+            "69": 2832.0,
+            "70": 3144.0,
+            "71": 2930.0,
+            "72": 2328.0,
+            "73": 2984.0,
+            "74": 1752.0,
+            "75": 2451.0,
+            "76": 3040.0,
+            "77": 3213.0,
+            "78": 2936.0,
+            "79": 2941.0,
+            "80": 3112.0,
+            "81": 3568.0,
+            "82": 3105.0,
+            "83": 2725.0,
+            "84": 3051.0,
+            "85": 3170.0,
+            "86": 2645.0,
+            "87": 3586.0,
+            "88": 2902.0,
+            "89": 3371.0,
+            "90": 2971.0,
+            "91": 2800.0,
+            "92": 3017.0,
+            "93": 2524.0,
+            "94": 3384.0,
+            "95": 3147.0,
+            "96": 3388.0,
+            "97": 3031.0,
+            "98": 3619.0,
+            "99": 3004.0,
+            "100": 3100.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 312352256.0,
+            "2": 312352256.0,
+            "3": 312352256.0,
+            "4": 312352256.0,
+            "5": 312352256.0,
+            "6": 312352256.0,
+            "7": 312352256.0,
+            "8": 312352256.0,
+            "9": 312352256.0,
+            "10": 312352256.0,
+            "11": 312352256.0,
+            "12": 312352256.0,
+            "13": 312352256.0,
+            "14": 312352256.0,
+            "15": 312352256.0,
+            "16": 312352256.0,
+            "17": 312352256.0,
+            "18": 312352256.0,
+            "19": 312352256.0,
+            "20": 312352256.0,
+            "21": 312352256.0,
+            "22": 312352256.0,
+            "23": 312352256.0,
+            "24": 312352256.0,
+            "25": 312352256.0,
+            "26": 312352256.0,
+            "27": 312352256.0,
+            "28": 312352256.0,
+            "29": 312352256.0,
+            "30": 312352256.0,
+            "31": 312352256.0,
+            "32": 312352256.0,
+            "33": 312352256.0,
+            "34": 312352256.0,
+            "35": 312352256.0,
+            "36": 312352256.0,
+            "37": 312352256.0,
+            "38": 312352256.0,
+            "39": 312352256.0,
+            "40": 312352256.0,
+            "41": 312352256.0,
+            "42": 312352256.0,
+            "43": 312352256.0,
+            "44": 312352256.0,
+            "45": 312352256.0,
+            "46": 312352256.0,
+            "47": 312352256.0,
+            "48": 312352256.0,
+            "49": 312352256.0,
+            "50": 312352256.0,
+            "51": 312352256.0,
+            "52": 312352256.0,
+            "53": 312352256.0,
+            "54": 312352256.0,
+            "55": 312352256.0,
+            "56": 312352256.0,
+            "57": 312352256.0,
+            "58": 312352256.0,
+            "59": 312352256.0,
+            "60": 312352256.0,
+            "61": 312352256.0,
+            "62": 312352256.0,
+            "63": 312352256.0,
+            "64": 312352256.0,
+            "65": 312352256.0,
+            "66": 312352256.0,
+            "67": 312352256.0,
+            "68": 312352256.0,
+            "69": 312352256.0,
+            "70": 312352256.0,
+            "71": 312352256.0,
+            "72": 312352256.0,
+            "73": 312352256.0,
+            "74": 312352256.0,
+            "75": 312352256.0,
+            "76": 312352256.0,
+            "77": 312352256.0,
+            "78": 312352256.0,
+            "79": 312352256.0,
+            "80": 312352256.0,
+            "81": 312352256.0,
+            "82": 312352256.0,
+            "83": 312352256.0,
+            "84": 312352256.0,
+            "85": 312352256.0,
+            "86": 312352256.0,
+            "87": 312352256.0,
+            "88": 312352256.0,
+            "89": 312352256.0,
+            "90": 312352256.0,
+            "91": 312352256.0,
+            "92": 312352256.0,
+            "93": 312352256.0,
+            "94": 312352256.0,
+            "95": 312352256.0,
+            "96": 312352256.0,
+            "97": 312352256.0,
+            "98": 312352256.0,
+            "99": 312352256.0,
+            "100": 312352256.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 754434560.0,
+            "2": 843763200.0,
+            "3": 843763200.0,
+            "4": 843763200.0,
+            "5": 843763200.0,
+            "6": 843763200.0,
+            "7": 843763200.0,
+            "8": 843763200.0,
+            "9": 843763200.0,
+            "10": 843763200.0,
+            "11": 843763200.0,
+            "12": 843763200.0,
+            "13": 843763200.0,
+            "14": 843763200.0,
+            "15": 843763200.0,
+            "16": 843763200.0,
+            "17": 843763200.0,
+            "18": 843763200.0,
+            "19": 843763200.0,
+            "20": 843763200.0,
+            "21": 843763200.0,
+            "22": 843763200.0,
+            "23": 843763200.0,
+            "24": 843763200.0,
+            "25": 843763200.0,
+            "26": 843763200.0,
+            "27": 843763200.0,
+            "28": 843763200.0,
+            "29": 843763200.0,
+            "30": 843763200.0,
+            "31": 843763200.0,
+            "32": 843763200.0,
+            "33": 843763200.0,
+            "34": 843763200.0,
+            "35": 843763200.0,
+            "36": 843763200.0,
+            "37": 843763200.0,
+            "38": 843763200.0,
+            "39": 843763200.0,
+            "40": 843763200.0,
+            "41": 843763200.0,
+            "42": 843763200.0,
+            "43": 843763200.0,
+            "44": 843763200.0,
+            "45": 843763200.0,
+            "46": 843763200.0,
+            "47": 843763200.0,
+            "48": 843763200.0,
+            "49": 843763200.0,
+            "50": 843763200.0,
+            "51": 843763200.0,
+            "52": 843763200.0,
+            "53": 843763200.0,
+            "54": 843763200.0,
+            "55": 843763200.0,
+            "56": 843763200.0,
+            "57": 843763200.0,
+            "58": 843763200.0,
+            "59": 843763200.0,
+            "60": 843763200.0,
+            "61": 843763200.0,
+            "62": 843763200.0,
+            "63": 843763200.0,
+            "64": 843763200.0,
+            "65": 843763200.0,
+            "66": 843763200.0,
+            "67": 843763200.0,
+            "68": 843763200.0,
+            "69": 843763200.0,
+            "70": 843763200.0,
+            "71": 843763200.0,
+            "72": 843763200.0,
+            "73": 843763200.0,
+            "74": 843763200.0,
+            "75": 843763200.0,
+            "76": 843763200.0,
+            "77": 843763200.0,
+            "78": 843763200.0,
+            "79": 843763200.0,
+            "80": 843763200.0,
+            "81": 843763200.0,
+            "82": 843763200.0,
+            "83": 843763200.0,
+            "84": 843763200.0,
+            "85": 843763200.0,
+            "86": 843763200.0,
+            "87": 843763200.0,
+            "88": 843763200.0,
+            "89": 843763200.0,
+            "90": 843763200.0,
+            "91": 843763200.0,
+            "92": 843763200.0,
+            "93": 843763200.0,
+            "94": 843763200.0,
+            "95": 843763200.0,
+            "96": 843763200.0,
+            "97": 843763200.0,
+            "98": 843763200.0,
+            "99": 843763200.0,
+            "100": 843763200.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 14.22764,
+            "2": 0.24357,
+            "3": 0.1983,
+            "4": 0.19798,
+            "5": 0.19753,
+            "6": 0.19867,
+            "7": 0.2023,
+            "8": 0.20916,
+            "9": 0.19896,
+            "10": 0.19379,
+            "11": 0.19485,
+            "12": 0.19576,
+            "13": 0.19787,
+            "14": 0.19429,
+            "15": 0.19302,
+            "16": 0.19471,
+            "17": 0.19504,
+            "18": 0.19198,
+            "19": 0.19495,
+            "20": 0.19263,
+            "21": 0.19416,
+            "22": 0.19641,
+            "23": 0.19469,
+            "24": 0.1929,
+            "25": 0.19216,
+            "26": 0.19363,
+            "27": 0.19398,
+            "28": 0.20085,
+            "29": 0.19636,
+            "30": 0.19368,
+            "31": 0.19607,
+            "32": 0.19525,
+            "33": 0.19664,
+            "34": 0.19678,
+            "35": 0.19781,
+            "36": 0.19903,
+            "37": 0.19855,
+            "38": 0.19741,
+            "39": 0.19904,
+            "40": 0.1946,
+            "41": 0.19866,
+            "42": 0.19875,
+            "43": 0.19854,
+            "44": 0.19999,
+            "45": 0.19615,
+            "46": 0.19571,
+            "47": 0.20067,
+            "48": 0.20086,
+            "49": 0.199,
+            "50": 0.20278,
+            "51": 0.22281,
+            "52": 0.23219,
+            "53": 0.1956,
+            "54": 0.20104,
+            "55": 0.19383,
+            "56": 0.19622,
+            "57": 0.1958,
+            "58": 0.19611,
+            "59": 0.20122,
+            "60": 0.19838,
+            "61": 0.19728,
+            "62": 0.19768,
+            "63": 0.19649,
+            "64": 0.19849,
+            "65": 0.19729,
+            "66": 0.20239,
+            "67": 0.1983,
+            "68": 0.19972,
+            "69": 0.19875,
+            "70": 0.19826,
+            "71": 0.199,
+            "72": 0.20079,
+            "73": 0.19629,
+            "74": 0.19463,
+            "75": 0.19309,
+            "76": 0.19531,
+            "77": 0.19866,
+            "78": 0.19554,
+            "79": 0.19894,
+            "80": 0.19644,
+            "81": 0.19444,
+            "82": 0.1982,
+            "83": 0.19564,
+            "84": 0.19462,
+            "85": 0.19336,
+            "86": 0.19393,
+            "87": 0.19166,
+            "88": 0.19067,
+            "89": 0.19389,
+            "90": 0.19317,
+            "91": 0.19001,
+            "92": 0.19028,
+            "93": 0.19093,
+            "94": 0.19224,
+            "95": 0.19066,
+            "96": 0.19224,
+            "97": 0.18966,
+            "98": 0.19044,
+            "99": 0.19273,
+            "100": 0.20509
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..5c404dad658
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.90433,
+            "2": 10.90931,
+            "3": 10.90937,
+            "4": 10.90764,
+            "5": 10.90709,
+            "6": 10.91174,
+            "7": 10.91413,
+            "8": 10.89808,
+            "9": 10.91252,
+            "10": 10.87838,
+            "11": 10.90538,
+            "12": 10.89588,
+            "13": 10.91234,
+            "14": 10.90596,
+            "15": 10.86278,
+            "16": 10.85987,
+            "17": 10.84211,
+            "18": 10.83508,
+            "19": 10.84021,
+            "20": 10.74667,
+            "21": 10.72431,
+            "22": 10.6337,
+            "23": 10.74257,
+            "24": 10.63399,
+            "25": 10.60185,
+            "26": 10.64659,
+            "27": 10.64193,
+            "28": 10.58695,
+            "29": 10.59421,
+            "30": 10.394,
+            "31": 10.17174,
+            "32": 10.48573,
+            "33": 10.48042,
+            "34": 10.25002,
+            "35": 10.29811,
+            "36": 10.25221,
+            "37": 10.36635,
+            "38": 10.22258,
+            "39": 10.42495,
+            "40": 10.111,
+            "41": 10.17165,
+            "42": 10.22384,
+            "43": 9.86674,
+            "44": 9.99019,
+            "45": 9.8622,
+            "46": 9.84813,
+            "47": 10.16079,
+            "48": 9.87303,
+            "49": 9.55987,
+            "50": 9.92159,
+            "51": 9.8695,
+            "52": 9.76154,
+            "53": 10.08349,
+            "54": 9.97449,
+            "55": 9.89437,
+            "56": 9.6424,
+            "57": 9.50352,
+            "58": 9.84153,
+            "59": 9.60017,
+            "60": 9.51715,
+            "61": 9.70458,
+            "62": 9.98292,
+            "63": 9.39067,
+            "64": 9.7797,
+            "65": 8.96053,
+            "66": 9.70288,
+            "67": 9.3734,
+            "68": 9.78805,
+            "69": 9.79828,
+            "70": 9.74999,
+            "71": 9.62682,
+            "72": 9.59043,
+            "73": 9.49893,
+            "74": 8.94842,
+            "75": 9.42922,
+            "76": 9.08268,
+            "77": 10.07413,
+            "78": 9.73322,
+            "79": 9.38352,
+            "80": 9.40713,
+            "81": 9.48366,
+            "82": 9.70577,
+            "83": 9.3103,
+            "84": 9.41846,
+            "85": 9.62053,
+            "86": 9.08533,
+            "87": 9.59962,
+            "88": 9.75141,
+            "89": 9.60594,
+            "90": 9.8245,
+            "91": 9.33973,
+            "92": 9.36344,
+            "93": 9.08397,
+            "94": 8.83571,
+            "95": 9.51936,
+            "96": 9.53001,
+            "97": 9.31995,
+            "98": 9.67709,
+            "99": 8.88909,
+            "100": 9.40491
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1595.0,
+            "2": 1632.0,
+            "3": 1539.0,
+            "4": 1702.0,
+            "5": 1827.0,
+            "6": 1718.0,
+            "7": 1810.0,
+            "8": 1634.0,
+            "9": 2007.0,
+            "10": 1457.0,
+            "11": 1906.0,
+            "12": 1737.0,
+            "13": 1917.0,
+            "14": 1828.0,
+            "15": 1866.0,
+            "16": 1826.0,
+            "17": 1762.0,
+            "18": 1761.0,
+            "19": 1803.0,
+            "20": 1803.0,
+            "21": 1996.0,
+            "22": 1691.0,
+            "23": 2060.0,
+            "24": 1622.0,
+            "25": 1595.0,
+            "26": 1608.0,
+            "27": 1890.0,
+            "28": 1913.0,
+            "29": 1987.0,
+            "30": 1808.0,
+            "31": 1549.0,
+            "32": 1838.0,
+            "33": 2073.0,
+            "34": 1859.0,
+            "35": 1870.0,
+            "36": 1870.0,
+            "37": 2300.0,
+            "38": 2186.0,
+            "39": 2368.0,
+            "40": 2097.0,
+            "41": 2325.0,
+            "42": 2227.0,
+            "43": 2036.0,
+            "44": 2098.0,
+            "45": 2055.0,
+            "46": 2146.0,
+            "47": 2453.0,
+            "48": 2273.0,
+            "49": 2244.0,
+            "50": 2252.0,
+            "51": 2484.0,
+            "52": 2568.0,
+            "53": 2834.0,
+            "54": 2607.0,
+            "55": 2149.0,
+            "56": 2683.0,
+            "57": 2283.0,
+            "58": 2764.0,
+            "59": 2623.0,
+            "60": 2456.0,
+            "61": 2938.0,
+            "62": 2456.0,
+            "63": 2279.0,
+            "64": 3078.0,
+            "65": 2504.0,
+            "66": 2881.0,
+            "67": 2683.0,
+            "68": 2657.0,
+            "69": 2832.0,
+            "70": 3144.0,
+            "71": 2930.0,
+            "72": 2328.0,
+            "73": 2984.0,
+            "74": 1752.0,
+            "75": 2451.0,
+            "76": 3040.0,
+            "77": 3213.0,
+            "78": 2936.0,
+            "79": 2941.0,
+            "80": 3112.0,
+            "81": 3568.0,
+            "82": 3105.0,
+            "83": 2725.0,
+            "84": 3051.0,
+            "85": 3170.0,
+            "86": 2645.0,
+            "87": 3586.0,
+            "88": 2902.0,
+            "89": 3371.0,
+            "90": 2971.0,
+            "91": 2800.0,
+            "92": 3017.0,
+            "93": 2524.0,
+            "94": 3384.0,
+            "95": 3147.0,
+            "96": 3388.0,
+            "97": 3031.0,
+            "98": 3619.0,
+            "99": 3004.0,
+            "100": 3100.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 312352256.0,
+            "2": 312352256.0,
+            "3": 312352256.0,
+            "4": 312352256.0,
+            "5": 312352256.0,
+            "6": 312352256.0,
+            "7": 312352256.0,
+            "8": 312352256.0,
+            "9": 312352256.0,
+            "10": 312352256.0,
+            "11": 312352256.0,
+            "12": 312352256.0,
+            "13": 312352256.0,
+            "14": 312352256.0,
+            "15": 312352256.0,
+            "16": 312352256.0,
+            "17": 312352256.0,
+            "18": 312352256.0,
+            "19": 312352256.0,
+            "20": 312352256.0,
+            "21": 312352256.0,
+            "22": 312352256.0,
+            "23": 312352256.0,
+            "24": 312352256.0,
+            "25": 312352256.0,
+            "26": 312352256.0,
+            "27": 312352256.0,
+            "28": 312352256.0,
+            "29": 312352256.0,
+            "30": 312352256.0,
+            "31": 312352256.0,
+            "32": 312352256.0,
+            "33": 312352256.0,
+            "34": 312352256.0,
+            "35": 312352256.0,
+            "36": 312352256.0,
+            "37": 312352256.0,
+            "38": 312352256.0,
+            "39": 312352256.0,
+            "40": 312352256.0,
+            "41": 312352256.0,
+            "42": 312352256.0,
+            "43": 312352256.0,
+            "44": 312352256.0,
+            "45": 312352256.0,
+            "46": 312352256.0,
+            "47": 312352256.0,
+            "48": 312352256.0,
+            "49": 312352256.0,
+            "50": 312352256.0,
+            "51": 312352256.0,
+            "52": 312352256.0,
+            "53": 312352256.0,
+            "54": 312352256.0,
+            "55": 312352256.0,
+            "56": 312352256.0,
+            "57": 312352256.0,
+            "58": 312352256.0,
+            "59": 312352256.0,
+            "60": 312352256.0,
+            "61": 312352256.0,
+            "62": 312352256.0,
+            "63": 312352256.0,
+            "64": 312352256.0,
+            "65": 312352256.0,
+            "66": 312352256.0,
+            "67": 312352256.0,
+            "68": 312352256.0,
+            "69": 312352256.0,
+            "70": 312352256.0,
+            "71": 312352256.0,
+            "72": 312352256.0,
+            "73": 312352256.0,
+            "74": 312352256.0,
+            "75": 312352256.0,
+            "76": 312352256.0,
+            "77": 312352256.0,
+            "78": 312352256.0,
+            "79": 312352256.0,
+            "80": 312352256.0,
+            "81": 312352256.0,
+            "82": 312352256.0,
+            "83": 312352256.0,
+            "84": 312352256.0,
+            "85": 312352256.0,
+            "86": 312352256.0,
+            "87": 312352256.0,
+            "88": 312352256.0,
+            "89": 312352256.0,
+            "90": 312352256.0,
+            "91": 312352256.0,
+            "92": 312352256.0,
+            "93": 312352256.0,
+            "94": 312352256.0,
+            "95": 312352256.0,
+            "96": 312352256.0,
+            "97": 312352256.0,
+            "98": 312352256.0,
+            "99": 312352256.0,
+            "100": 312352256.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 754434560.0,
+            "2": 843763200.0,
+            "3": 843763200.0,
+            "4": 843763200.0,
+            "5": 843763200.0,
+            "6": 843763200.0,
+            "7": 843763200.0,
+            "8": 843763200.0,
+            "9": 843763200.0,
+            "10": 843763200.0,
+            "11": 843763200.0,
+            "12": 843763200.0,
+            "13": 843763200.0,
+            "14": 843763200.0,
+            "15": 843763200.0,
+            "16": 843763200.0,
+            "17": 843763200.0,
+            "18": 843763200.0,
+            "19": 843763200.0,
+            "20": 843763200.0,
+            "21": 843763200.0,
+            "22": 843763200.0,
+            "23": 843763200.0,
+            "24": 843763200.0,
+            "25": 843763200.0,
+            "26": 843763200.0,
+            "27": 843763200.0,
+            "28": 843763200.0,
+            "29": 843763200.0,
+            "30": 843763200.0,
+            "31": 843763200.0,
+            "32": 843763200.0,
+            "33": 843763200.0,
+            "34": 843763200.0,
+            "35": 843763200.0,
+            "36": 843763200.0,
+            "37": 843763200.0,
+            "38": 843763200.0,
+            "39": 843763200.0,
+            "40": 843763200.0,
+            "41": 843763200.0,
+            "42": 843763200.0,
+            "43": 843763200.0,
+            "44": 843763200.0,
+            "45": 843763200.0,
+            "46": 843763200.0,
+            "47": 843763200.0,
+            "48": 843763200.0,
+            "49": 843763200.0,
+            "50": 843763200.0,
+            "51": 843763200.0,
+            "52": 843763200.0,
+            "53": 843763200.0,
+            "54": 843763200.0,
+            "55": 843763200.0,
+            "56": 843763200.0,
+            "57": 843763200.0,
+            "58": 843763200.0,
+            "59": 843763200.0,
+            "60": 843763200.0,
+            "61": 843763200.0,
+            "62": 843763200.0,
+            "63": 843763200.0,
+            "64": 843763200.0,
+            "65": 843763200.0,
+            "66": 843763200.0,
+            "67": 843763200.0,
+            "68": 843763200.0,
+            "69": 843763200.0,
+            "70": 843763200.0,
+            "71": 843763200.0,
+            "72": 843763200.0,
+            "73": 843763200.0,
+            "74": 843763200.0,
+            "75": 843763200.0,
+            "76": 843763200.0,
+            "77": 843763200.0,
+            "78": 843763200.0,
+            "79": 843763200.0,
+            "80": 843763200.0,
+            "81": 843763200.0,
+            "82": 843763200.0,
+            "83": 843763200.0,
+            "84": 843763200.0,
+            "85": 843763200.0,
+            "86": 843763200.0,
+            "87": 843763200.0,
+            "88": 843763200.0,
+            "89": 843763200.0,
+            "90": 843763200.0,
+            "91": 843763200.0,
+            "92": 843763200.0,
+            "93": 843763200.0,
+            "94": 843763200.0,
+            "95": 843763200.0,
+            "96": 843763200.0,
+            "97": 843763200.0,
+            "98": 843763200.0,
+            "99": 843763200.0,
+            "100": 843763200.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 13.61637,
+            "2": 0.24414,
+            "3": 0.22872,
+            "4": 0.22599,
+            "5": 0.22586,
+            "6": 0.22773,
+            "7": 0.22791,
+            "8": 0.22857,
+            "9": 0.2283,
+            "10": 0.22732,
+            "11": 0.22633,
+            "12": 0.22761,
+            "13": 0.22748,
+            "14": 0.23094,
+            "15": 0.22968,
+            "16": 0.22849,
+            "17": 0.22934,
+            "18": 0.22814,
+            "19": 0.22822,
+            "20": 0.22758,
+            "21": 0.22806,
+            "22": 0.25737,
+            "23": 0.24238,
+            "24": 0.23166,
+            "25": 0.22695,
+            "26": 0.22857,
+            "27": 0.23442,
+            "28": 0.22861,
+            "29": 0.2302,
+            "30": 0.2316,
+            "31": 0.23014,
+            "32": 0.22948,
+            "33": 0.23272,
+            "34": 0.23222,
+            "35": 0.23035,
+            "36": 0.23384,
+            "37": 0.23085,
+            "38": 0.23058,
+            "39": 0.23686,
+            "40": 0.23939,
+            "41": 0.23562,
+            "42": 0.23544,
+            "43": 0.23293,
+            "44": 0.22874,
+            "45": 0.234,
+            "46": 0.22942,
+            "47": 0.23036,
+            "48": 0.23404,
+            "49": 0.2686,
+            "50": 0.24831,
+            "51": 0.28415,
+            "52": 0.23699,
+            "53": 0.26129,
+            "54": 0.2273,
+            "55": 0.22639,
+            "56": 0.22691,
+            "57": 0.22504,
+            "58": 0.22822,
+            "59": 0.22913,
+            "60": 0.22577,
+            "61": 0.23097,
+            "62": 0.22702,
+            "63": 0.22579,
+            "64": 0.22717,
+            "65": 0.22986,
+            "66": 0.22481,
+            "67": 0.22676,
+            "68": 0.22643,
+            "69": 0.22933,
+            "70": 0.23566,
+            "71": 0.22795,
+            "72": 0.22654,
+            "73": 0.2256,
+            "74": 0.22941,
+            "75": 0.23701,
+            "76": 0.23527,
+            "77": 0.23476,
+            "78": 0.23472,
+            "79": 0.22599,
+            "80": 0.22758,
+            "81": 0.22717,
+            "82": 0.22657,
+            "83": 0.22688,
+            "84": 0.22827,
+            "85": 0.22612,
+            "86": 0.22871,
+            "87": 0.23133,
+            "88": 0.22934,
+            "89": 0.22859,
+            "90": 0.22635,
+            "91": 0.22606,
+            "92": 0.2297,
+            "93": 0.22713,
+            "94": 0.2261,
+            "95": 0.227,
+            "96": 0.23135,
+            "97": 0.22866,
+            "98": 0.22601,
+            "99": 0.2277,
+            "100": 0.2323
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
index 5d2d76e675b..cac9c570ec1 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
@@ -1 +1,537 @@
-{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.8583, "5": 10.87283, "10": 10.83266, "15": 10.82103, "20": 10.71378, "25": 10.54764, "30": 10.36787, "35": 10.28458, "40": 10.08925, "45": 9.84558, "50": 9.91941, "55": 9.89198, "60": 9.50822, "65": 8.95947, "70": 9.73442, "75": 9.43116, "80": 9.41096, "85": 9.61514, "90": 9.82374, "95": 9.52259, "100": 9.40801}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1691.0, "5": 2042.0, "10": 1630.0, "15": 2001.0, "20": 1728.0, "25": 1763.0, "30": 2006.0, "35": 2193.0, "40": 2383.0, "45": 2296.0, "50": 2855.0, "55": 2533.0, "60": 2704.0, "65": 2913.0, "70": 3455.0, "75": 2863.0, "80": 3626.0, "85": 3507.0, "90": 3276.0, "95": 3746.0, "100": 3624.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 232422400.0, "5": 232422400.0, "10": 232422400.0, "15": 232422400.0, "20": 232422400.0, "25": 232422400.0, "30": 232422400.0, "35": 232422400.0, "40": 232422400.0, "45": 232422400.0, "50": 232422400.0, "55": 232422400.0, "60": 232422400.0, "65": 232422400.0, "70": 232422400.0, "75": 232422400.0, "80": 232422400.0, "85": 232422400.0, "90": 232422400.0, "95": 232422400.0, "100": 232422400.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 684471808.0, "5": 773274112.0, "10": 775372800.0, "15": 775372800.0, "20": 775372800.0, "25": 775372800.0, "30": 775372800.0, "35": 775372800.0, "40": 775372800.0, "45": 775372800.0, "50": 775372800.0, "55": 775372800.0, "60": 775373312.0, "65": 775373312.0, "70": 775373312.0, "75": 775373312.0, "80": 775373312.0, "85": 775373312.0, "90": 775373312.0, "95": 775373312.0, "100": 775373312.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 15.38884, "5": 0.30271, "10": 0.29872, "15": 0.29913, "20": 0.29673, "25": 0.29722, "30": 0.29513, "35": 0.29581, "40": 0.29346, "45": 0.31009, "50": 0.30584, "55": 0.30586, "60": 0.30392, "65": 0.29478, "70": 0.29561, "75": 0.2972, "80": 0.29542, "85": 0.29898, "90": 0.29519, "95": 0.29733, "100": 0.2954}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.8583,
+            "2": 10.85411,
+            "3": 10.8543,
+            "4": 10.84407,
+            "5": 10.87282,
+            "6": 10.8793,
+            "7": 10.84658,
+            "8": 10.86139,
+            "9": 10.87078,
+            "10": 10.83266,
+            "11": 10.86332,
+            "12": 10.87295,
+            "13": 10.87798,
+            "14": 10.88588,
+            "15": 10.82104,
+            "16": 10.82759,
+            "17": 10.80303,
+            "18": 10.82092,
+            "19": 10.80032,
+            "20": 10.71379,
+            "21": 10.69818,
+            "22": 10.57542,
+            "23": 10.72119,
+            "24": 10.60091,
+            "25": 10.5476,
+            "26": 10.61127,
+            "27": 10.61393,
+            "28": 10.57777,
+            "29": 10.57888,
+            "30": 10.36791,
+            "31": 10.13451,
+            "32": 10.47063,
+            "33": 10.47371,
+            "34": 10.23442,
+            "35": 10.28457,
+            "36": 10.23595,
+            "37": 10.35351,
+            "38": 10.20695,
+            "39": 10.40581,
+            "40": 10.08924,
+            "41": 10.16388,
+            "42": 10.22671,
+            "43": 9.86336,
+            "44": 9.98189,
+            "45": 9.84555,
+            "46": 9.85753,
+            "47": 10.16884,
+            "48": 9.86474,
+            "49": 9.54712,
+            "50": 9.91942,
+            "51": 9.86179,
+            "52": 9.76162,
+            "53": 10.08383,
+            "54": 9.96743,
+            "55": 9.89199,
+            "56": 9.63777,
+            "57": 9.49339,
+            "58": 9.83897,
+            "59": 9.59641,
+            "60": 9.50823,
+            "61": 9.70513,
+            "62": 9.99499,
+            "63": 9.38054,
+            "64": 9.78296,
+            "65": 8.95946,
+            "66": 9.71045,
+            "67": 9.38075,
+            "68": 9.78884,
+            "69": 9.79451,
+            "70": 9.73441,
+            "71": 9.62146,
+            "72": 9.58792,
+            "73": 9.49657,
+            "74": 8.9434,
+            "75": 9.43112,
+            "76": 9.09716,
+            "77": 10.0681,
+            "78": 9.73005,
+            "79": 9.37764,
+            "80": 9.41097,
+            "81": 9.48622,
+            "82": 9.69669,
+            "83": 9.3163,
+            "84": 9.42182,
+            "85": 9.61516,
+            "86": 9.07553,
+            "87": 9.59851,
+            "88": 9.75046,
+            "89": 9.61112,
+            "90": 9.82373,
+            "91": 9.35278,
+            "92": 9.36495,
+            "93": 9.08811,
+            "94": 8.83656,
+            "95": 9.52256,
+            "96": 9.52793,
+            "97": 9.31634,
+            "98": 9.67876,
+            "99": 8.89321,
+            "100": 9.40801
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1708.0,
+            "2": 1804.0,
+            "3": 1725.0,
+            "4": 1881.0,
+            "5": 2019.0,
+            "6": 2015.0,
+            "7": 2086.0,
+            "8": 1730.0,
+            "9": 2024.0,
+            "10": 1515.0,
+            "11": 2162.0,
+            "12": 1847.0,
+            "13": 2125.0,
+            "14": 2050.0,
+            "15": 1946.0,
+            "16": 2000.0,
+            "17": 1996.0,
+            "18": 1874.0,
+            "19": 2011.0,
+            "20": 1771.0,
+            "21": 2099.0,
+            "22": 1892.0,
+            "23": 2171.0,
+            "24": 1834.0,
+            "25": 1790.0,
+            "26": 1803.0,
+            "27": 1998.0,
+            "28": 2211.0,
+            "29": 2129.0,
+            "30": 2147.0,
+            "31": 1623.0,
+            "32": 2174.0,
+            "33": 2364.0,
+            "34": 2035.0,
+            "35": 2089.0,
+            "36": 2202.0,
+            "37": 2603.0,
+            "38": 2468.0,
+            "39": 2623.0,
+            "40": 2383.0,
+            "41": 2519.0,
+            "42": 2522.0,
+            "43": 2235.0,
+            "44": 2275.0,
+            "45": 2319.0,
+            "46": 2632.0,
+            "47": 2675.0,
+            "48": 2697.0,
+            "49": 2551.0,
+            "50": 2814.0,
+            "51": 2767.0,
+            "52": 2804.0,
+            "53": 3231.0,
+            "54": 2905.0,
+            "55": 2575.0,
+            "56": 3077.0,
+            "57": 2587.0,
+            "58": 3346.0,
+            "59": 3056.0,
+            "60": 2695.0,
+            "61": 3191.0,
+            "62": 2637.0,
+            "63": 2649.0,
+            "64": 3176.0,
+            "65": 2756.0,
+            "66": 3481.0,
+            "67": 2905.0,
+            "68": 3114.0,
+            "69": 3133.0,
+            "70": 3533.0,
+            "71": 3225.0,
+            "72": 2621.0,
+            "73": 3297.0,
+            "74": 2145.0,
+            "75": 2799.0,
+            "76": 3354.0,
+            "77": 3466.0,
+            "78": 3485.0,
+            "79": 3464.0,
+            "80": 3614.0,
+            "81": 4011.0,
+            "82": 3694.0,
+            "83": 3201.0,
+            "84": 3655.0,
+            "85": 3597.0,
+            "86": 3096.0,
+            "87": 4103.0,
+            "88": 3306.0,
+            "89": 3839.0,
+            "90": 3352.0,
+            "91": 2980.0,
+            "92": 3452.0,
+            "93": 2967.0,
+            "94": 3773.0,
+            "95": 3589.0,
+            "96": 3800.0,
+            "97": 3412.0,
+            "98": 3998.0,
+            "99": 3483.0,
+            "100": 3651.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 232422400.0,
+            "2": 232422400.0,
+            "3": 232422400.0,
+            "4": 232422400.0,
+            "5": 232422400.0,
+            "6": 233470976.0,
+            "7": 232422400.0,
+            "8": 233470976.0,
+            "9": 232422400.0,
+            "10": 232422400.0,
+            "11": 232422400.0,
+            "12": 232422400.0,
+            "13": 232422400.0,
+            "14": 233470976.0,
+            "15": 232422400.0,
+            "16": 232422400.0,
+            "17": 232422400.0,
+            "18": 232422400.0,
+            "19": 232422400.0,
+            "20": 232422400.0,
+            "21": 232422400.0,
+            "22": 232422400.0,
+            "23": 232422400.0,
+            "24": 232422400.0,
+            "25": 232422400.0,
+            "26": 232422400.0,
+            "27": 232422400.0,
+            "28": 232422400.0,
+            "29": 232422400.0,
+            "30": 232422400.0,
+            "31": 232422400.0,
+            "32": 232422400.0,
+            "33": 232422400.0,
+            "34": 232422400.0,
+            "35": 232422400.0,
+            "36": 232422400.0,
+            "37": 232422400.0,
+            "38": 232422400.0,
+            "39": 232422400.0,
+            "40": 232422400.0,
+            "41": 232422400.0,
+            "42": 232422400.0,
+            "43": 232422400.0,
+            "44": 232422400.0,
+            "45": 232422400.0,
+            "46": 232422400.0,
+            "47": 232422400.0,
+            "48": 232422400.0,
+            "49": 233470976.0,
+            "50": 232422400.0,
+            "51": 232422400.0,
+            "52": 232422400.0,
+            "53": 232422400.0,
+            "54": 232422400.0,
+            "55": 233470976.0,
+            "56": 232422400.0,
+            "57": 233470976.0,
+            "58": 232422400.0,
+            "59": 232422400.0,
+            "60": 232422400.0,
+            "61": 232422400.0,
+            "62": 232422400.0,
+            "63": 232422400.0,
+            "64": 232422400.0,
+            "65": 232422400.0,
+            "66": 232422400.0,
+            "67": 232422400.0,
+            "68": 232422400.0,
+            "69": 232422400.0,
+            "70": 232422400.0,
+            "71": 232422400.0,
+            "72": 232422400.0,
+            "73": 232422400.0,
+            "74": 232422400.0,
+            "75": 232422400.0,
+            "76": 232422400.0,
+            "77": 232422400.0,
+            "78": 232422400.0,
+            "79": 232422400.0,
+            "80": 232422400.0,
+            "81": 232422400.0,
+            "82": 232422400.0,
+            "83": 232422400.0,
+            "84": 232422400.0,
+            "85": 232422400.0,
+            "86": 232422400.0,
+            "87": 232422400.0,
+            "88": 232422400.0,
+            "89": 232422400.0,
+            "90": 232422400.0,
+            "91": 232422400.0,
+            "92": 232422400.0,
+            "93": 232422400.0,
+            "94": 232422400.0,
+            "95": 232422400.0,
+            "96": 232422400.0,
+            "97": 232422400.0,
+            "98": 232422400.0,
+            "99": 233470976.0,
+            "100": 232422400.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 683423744.0,
+            "2": 773273600.0,
+            "3": 773276672.0,
+            "4": 773276672.0,
+            "5": 773276672.0,
+            "6": 773276672.0,
+            "7": 773276672.0,
+            "8": 773276672.0,
+            "9": 773276672.0,
+            "10": 773276672.0,
+            "11": 773276672.0,
+            "12": 773276672.0,
+            "13": 773276672.0,
+            "14": 773276672.0,
+            "15": 773276672.0,
+            "16": 773276672.0,
+            "17": 773276672.0,
+            "18": 773276672.0,
+            "19": 773276672.0,
+            "20": 773276672.0,
+            "21": 773276672.0,
+            "22": 773276672.0,
+            "23": 773276672.0,
+            "24": 773276672.0,
+            "25": 773276672.0,
+            "26": 773276672.0,
+            "27": 773276672.0,
+            "28": 773276672.0,
+            "29": 773276672.0,
+            "30": 773276672.0,
+            "31": 773276672.0,
+            "32": 773276672.0,
+            "33": 773276672.0,
+            "34": 773276672.0,
+            "35": 773276672.0,
+            "36": 773276672.0,
+            "37": 773276672.0,
+            "38": 773276672.0,
+            "39": 773276672.0,
+            "40": 773276672.0,
+            "41": 773276672.0,
+            "42": 773276672.0,
+            "43": 773276672.0,
+            "44": 773276672.0,
+            "45": 773276672.0,
+            "46": 773276672.0,
+            "47": 773276672.0,
+            "48": 773276672.0,
+            "49": 773276672.0,
+            "50": 775372800.0,
+            "51": 775372800.0,
+            "52": 775372800.0,
+            "53": 775372800.0,
+            "54": 775372800.0,
+            "55": 775372800.0,
+            "56": 775372800.0,
+            "57": 775372800.0,
+            "58": 775372800.0,
+            "59": 775372800.0,
+            "60": 775372800.0,
+            "61": 775372800.0,
+            "62": 775372800.0,
+            "63": 775372800.0,
+            "64": 775372800.0,
+            "65": 775372800.0,
+            "66": 775372800.0,
+            "67": 775372800.0,
+            "68": 775372800.0,
+            "69": 775372800.0,
+            "70": 775372800.0,
+            "71": 775372800.0,
+            "72": 775372800.0,
+            "73": 775372800.0,
+            "74": 775372800.0,
+            "75": 775372800.0,
+            "76": 775372800.0,
+            "77": 775372800.0,
+            "78": 775372800.0,
+            "79": 775372800.0,
+            "80": 775372800.0,
+            "81": 775372800.0,
+            "82": 775372800.0,
+            "83": 775372800.0,
+            "84": 775372800.0,
+            "85": 775372800.0,
+            "86": 775372800.0,
+            "87": 775372800.0,
+            "88": 775372800.0,
+            "89": 775372800.0,
+            "90": 775372800.0,
+            "91": 775372800.0,
+            "92": 775372800.0,
+            "93": 775372800.0,
+            "94": 775372800.0,
+            "95": 775372800.0,
+            "96": 775372800.0,
+            "97": 775372800.0,
+            "98": 775372800.0,
+            "99": 775373312.0,
+            "100": 775373312.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 16.23173,
+            "2": 0.48632,
+            "3": 0.3184,
+            "4": 0.31067,
+            "5": 0.31575,
+            "6": 0.3127,
+            "7": 0.3096,
+            "8": 0.31392,
+            "9": 0.31591,
+            "10": 0.30891,
+            "11": 0.31209,
+            "12": 0.31271,
+            "13": 0.30582,
+            "14": 0.31032,
+            "15": 0.30879,
+            "16": 0.3077,
+            "17": 0.30689,
+            "18": 0.30824,
+            "19": 0.30953,
+            "20": 0.30728,
+            "21": 0.31141,
+            "22": 0.31157,
+            "23": 0.30569,
+            "24": 0.30896,
+            "25": 0.30916,
+            "26": 0.30674,
+            "27": 0.31017,
+            "28": 0.30716,
+            "29": 0.30734,
+            "30": 0.30698,
+            "31": 0.30881,
+            "32": 0.3089,
+            "33": 0.30647,
+            "34": 0.3112,
+            "35": 0.311,
+            "36": 0.30632,
+            "37": 0.30856,
+            "38": 0.30986,
+            "39": 0.30502,
+            "40": 0.31035,
+            "41": 0.306,
+            "42": 0.30943,
+            "43": 0.30773,
+            "44": 0.30886,
+            "45": 0.30942,
+            "46": 0.30579,
+            "47": 0.31121,
+            "48": 0.31407,
+            "49": 0.30981,
+            "50": 0.30966,
+            "51": 0.3347,
+            "52": 0.35543,
+            "53": 0.31067,
+            "54": 0.30931,
+            "55": 0.31517,
+            "56": 0.30883,
+            "57": 0.30908,
+            "58": 0.31373,
+            "59": 0.30746,
+            "60": 0.31113,
+            "61": 0.31473,
+            "62": 0.30775,
+            "63": 0.31034,
+            "64": 0.31108,
+            "65": 0.3103,
+            "66": 0.3085,
+            "67": 0.31036,
+            "68": 0.31412,
+            "69": 0.30947,
+            "70": 0.30646,
+            "71": 0.31133,
+            "72": 0.30734,
+            "73": 0.31043,
+            "74": 0.31583,
+            "75": 0.3074,
+            "76": 0.30939,
+            "77": 0.3182,
+            "78": 0.30755,
+            "79": 0.30953,
+            "80": 0.3085,
+            "81": 0.31023,
+            "82": 0.30621,
+            "83": 0.30705,
+            "84": 0.31232,
+            "85": 0.30864,
+            "86": 0.31017,
+            "87": 0.3124,
+            "88": 0.30667,
+            "89": 0.31086,
+            "90": 0.31626,
+            "91": 0.30744,
+            "92": 0.30887,
+            "93": 0.31054,
+            "94": 0.31172,
+            "95": 0.31164,
+            "96": 0.31058,
+            "97": 0.31089,
+            "98": 0.30676,
+            "99": 0.3105,
+            "100": 0.31337
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
new file mode 100644
index 00000000000..02ddabef653
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.8583,
+            "2": 10.85411,
+            "3": 10.85433,
+            "4": 10.84406,
+            "5": 10.87281,
+            "6": 10.87934,
+            "7": 10.84661,
+            "8": 10.86143,
+            "9": 10.87077,
+            "10": 10.83262,
+            "11": 10.86331,
+            "12": 10.87296,
+            "13": 10.87796,
+            "14": 10.88589,
+            "15": 10.82104,
+            "16": 10.82761,
+            "17": 10.80298,
+            "18": 10.82097,
+            "19": 10.80031,
+            "20": 10.71378,
+            "21": 10.69817,
+            "22": 10.57538,
+            "23": 10.72117,
+            "24": 10.60092,
+            "25": 10.54764,
+            "26": 10.6113,
+            "27": 10.6139,
+            "28": 10.57775,
+            "29": 10.57891,
+            "30": 10.36785,
+            "31": 10.13451,
+            "32": 10.47059,
+            "33": 10.47377,
+            "34": 10.23444,
+            "35": 10.28458,
+            "36": 10.23593,
+            "37": 10.35352,
+            "38": 10.20691,
+            "39": 10.40581,
+            "40": 10.08924,
+            "41": 10.16388,
+            "42": 10.22671,
+            "43": 9.86337,
+            "44": 9.98192,
+            "45": 9.84553,
+            "46": 9.85754,
+            "47": 10.16883,
+            "48": 9.86475,
+            "49": 9.54709,
+            "50": 9.91942,
+            "51": 9.86179,
+            "52": 9.76168,
+            "53": 10.08382,
+            "54": 9.96739,
+            "55": 9.89194,
+            "56": 9.63776,
+            "57": 9.49339,
+            "58": 9.83896,
+            "59": 9.59641,
+            "60": 9.50823,
+            "61": 9.7051,
+            "62": 9.99501,
+            "63": 9.38054,
+            "64": 9.78299,
+            "65": 8.95951,
+            "66": 9.71042,
+            "67": 9.38071,
+            "68": 9.7888,
+            "69": 9.79448,
+            "70": 9.73441,
+            "71": 9.62148,
+            "72": 9.58793,
+            "73": 9.49658,
+            "74": 8.94341,
+            "75": 9.43114,
+            "76": 9.09713,
+            "77": 10.06806,
+            "78": 9.73005,
+            "79": 9.37765,
+            "80": 9.41099,
+            "81": 9.48618,
+            "82": 9.69673,
+            "83": 9.31631,
+            "84": 9.42185,
+            "85": 9.61516,
+            "86": 9.07552,
+            "87": 9.59852,
+            "88": 9.75045,
+            "89": 9.61111,
+            "90": 9.82372,
+            "91": 9.35276,
+            "92": 9.365,
+            "93": 9.08813,
+            "94": 8.83655,
+            "95": 9.52257,
+            "96": 9.52788,
+            "97": 9.31634,
+            "98": 9.67878,
+            "99": 8.89321,
+            "100": 9.408
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1763.0,
+            "2": 1819.0,
+            "3": 1753.0,
+            "4": 1839.0,
+            "5": 2031.0,
+            "6": 1952.0,
+            "7": 2030.0,
+            "8": 1821.0,
+            "9": 1978.0,
+            "10": 1514.0,
+            "11": 2190.0,
+            "12": 1980.0,
+            "13": 2061.0,
+            "14": 2005.0,
+            "15": 2039.0,
+            "16": 1942.0,
+            "17": 1958.0,
+            "18": 1872.0,
+            "19": 2009.0,
+            "20": 1786.0,
+            "21": 2024.0,
+            "22": 1927.0,
+            "23": 2112.0,
+            "24": 1797.0,
+            "25": 1786.0,
+            "26": 1847.0,
+            "27": 1928.0,
+            "28": 2178.0,
+            "29": 2193.0,
+            "30": 1995.0,
+            "31": 1717.0,
+            "32": 2149.0,
+            "33": 2307.0,
+            "34": 2027.0,
+            "35": 2102.0,
+            "36": 2075.0,
+            "37": 2656.0,
+            "38": 2499.0,
+            "39": 2642.0,
+            "40": 2331.0,
+            "41": 2426.0,
+            "42": 2542.0,
+            "43": 2149.0,
+            "44": 2238.0,
+            "45": 2333.0,
+            "46": 2656.0,
+            "47": 2731.0,
+            "48": 2697.0,
+            "49": 2593.0,
+            "50": 2736.0,
+            "51": 2763.0,
+            "52": 2904.0,
+            "53": 3209.0,
+            "54": 2987.0,
+            "55": 2624.0,
+            "56": 3069.0,
+            "57": 2544.0,
+            "58": 3248.0,
+            "59": 2958.0,
+            "60": 2691.0,
+            "61": 3226.0,
+            "62": 2712.0,
+            "63": 2643.0,
+            "64": 3019.0,
+            "65": 2812.0,
+            "66": 3479.0,
+            "67": 2963.0,
+            "68": 3241.0,
+            "69": 3301.0,
+            "70": 3423.0,
+            "71": 3263.0,
+            "72": 2524.0,
+            "73": 3240.0,
+            "74": 2175.0,
+            "75": 2801.0,
+            "76": 3300.0,
+            "77": 3556.0,
+            "78": 3435.0,
+            "79": 3546.0,
+            "80": 3676.0,
+            "81": 3912.0,
+            "82": 3694.0,
+            "83": 3221.0,
+            "84": 3559.0,
+            "85": 3548.0,
+            "86": 3164.0,
+            "87": 4228.0,
+            "88": 3325.0,
+            "89": 3804.0,
+            "90": 3382.0,
+            "91": 3001.0,
+            "92": 3415.0,
+            "93": 3050.0,
+            "94": 3856.0,
+            "95": 3636.0,
+            "96": 3973.0,
+            "97": 3386.0,
+            "98": 3934.0,
+            "99": 3571.0,
+            "100": 3660.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 232422400.0,
+            "2": 232422400.0,
+            "3": 232422400.0,
+            "4": 232422400.0,
+            "5": 232422400.0,
+            "6": 232422400.0,
+            "7": 232422400.0,
+            "8": 232422400.0,
+            "9": 232422400.0,
+            "10": 232422400.0,
+            "11": 232422400.0,
+            "12": 232422400.0,
+            "13": 232422400.0,
+            "14": 232422400.0,
+            "15": 232422400.0,
+            "16": 232422400.0,
+            "17": 232422400.0,
+            "18": 232422400.0,
+            "19": 232422400.0,
+            "20": 232422400.0,
+            "21": 232422400.0,
+            "22": 232422400.0,
+            "23": 233470976.0,
+            "24": 232422400.0,
+            "25": 232422400.0,
+            "26": 232422400.0,
+            "27": 232422400.0,
+            "28": 232422400.0,
+            "29": 232422400.0,
+            "30": 232422400.0,
+            "31": 232422400.0,
+            "32": 232422400.0,
+            "33": 232422400.0,
+            "34": 232422400.0,
+            "35": 232422400.0,
+            "36": 232422400.0,
+            "37": 232422400.0,
+            "38": 232422400.0,
+            "39": 232422400.0,
+            "40": 232422400.0,
+            "41": 232422400.0,
+            "42": 232422400.0,
+            "43": 232422400.0,
+            "44": 232422400.0,
+            "45": 232422400.0,
+            "46": 232422400.0,
+            "47": 232422400.0,
+            "48": 232422400.0,
+            "49": 232422400.0,
+            "50": 232422400.0,
+            "51": 232422400.0,
+            "52": 232422400.0,
+            "53": 232422400.0,
+            "54": 233470976.0,
+            "55": 232422400.0,
+            "56": 232422400.0,
+            "57": 232422400.0,
+            "58": 232422400.0,
+            "59": 232422400.0,
+            "60": 232422400.0,
+            "61": 232422400.0,
+            "62": 232422400.0,
+            "63": 232422400.0,
+            "64": 232422400.0,
+            "65": 232422400.0,
+            "66": 232422400.0,
+            "67": 232422400.0,
+            "68": 232422400.0,
+            "69": 232422400.0,
+            "70": 232422400.0,
+            "71": 232422400.0,
+            "72": 232422400.0,
+            "73": 232422400.0,
+            "74": 232422400.0,
+            "75": 232422400.0,
+            "76": 232422400.0,
+            "77": 232422400.0,
+            "78": 232422400.0,
+            "79": 232422400.0,
+            "80": 232422400.0,
+            "81": 232422400.0,
+            "82": 232422400.0,
+            "83": 232422400.0,
+            "84": 232422400.0,
+            "85": 232422400.0,
+            "86": 232422400.0,
+            "87": 232422400.0,
+            "88": 232422400.0,
+            "89": 232422400.0,
+            "90": 232422400.0,
+            "91": 232422400.0,
+            "92": 232422400.0,
+            "93": 232422400.0,
+            "94": 232422400.0,
+            "95": 232422400.0,
+            "96": 232422400.0,
+            "97": 232422400.0,
+            "98": 232422400.0,
+            "99": 232422400.0,
+            "100": 232422400.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 684472320.0,
+            "2": 771179520.0,
+            "3": 773275136.0,
+            "4": 773275136.0,
+            "5": 773275136.0,
+            "6": 773275136.0,
+            "7": 773276672.0,
+            "8": 773276672.0,
+            "9": 773276672.0,
+            "10": 773276672.0,
+            "11": 773276672.0,
+            "12": 773276672.0,
+            "13": 773276672.0,
+            "14": 773276672.0,
+            "15": 773276672.0,
+            "16": 773276672.0,
+            "17": 773276672.0,
+            "18": 773276672.0,
+            "19": 773276672.0,
+            "20": 773276672.0,
+            "21": 773276672.0,
+            "22": 773276672.0,
+            "23": 773276672.0,
+            "24": 773276672.0,
+            "25": 773276672.0,
+            "26": 773276672.0,
+            "27": 773276672.0,
+            "28": 773276672.0,
+            "29": 773276672.0,
+            "30": 773276672.0,
+            "31": 773276672.0,
+            "32": 773276672.0,
+            "33": 773276672.0,
+            "34": 773276672.0,
+            "35": 773276672.0,
+            "36": 773276672.0,
+            "37": 773276672.0,
+            "38": 773276672.0,
+            "39": 773276672.0,
+            "40": 773276672.0,
+            "41": 773276672.0,
+            "42": 773276672.0,
+            "43": 773276672.0,
+            "44": 773276672.0,
+            "45": 773276672.0,
+            "46": 773276672.0,
+            "47": 773276672.0,
+            "48": 773276672.0,
+            "49": 773276672.0,
+            "50": 773276672.0,
+            "51": 773276672.0,
+            "52": 773276672.0,
+            "53": 773276672.0,
+            "54": 773276672.0,
+            "55": 773276672.0,
+            "56": 773276672.0,
+            "57": 773276672.0,
+            "58": 775370752.0,
+            "59": 775370752.0,
+            "60": 775370752.0,
+            "61": 775370752.0,
+            "62": 775370752.0,
+            "63": 775370752.0,
+            "64": 775370752.0,
+            "65": 775370752.0,
+            "66": 775370752.0,
+            "67": 775370752.0,
+            "68": 775370752.0,
+            "69": 775370752.0,
+            "70": 775370752.0,
+            "71": 775370752.0,
+            "72": 775370752.0,
+            "73": 775370752.0,
+            "74": 775370752.0,
+            "75": 775370752.0,
+            "76": 775370752.0,
+            "77": 775370752.0,
+            "78": 775370752.0,
+            "79": 775370752.0,
+            "80": 775370752.0,
+            "81": 775370752.0,
+            "82": 775370752.0,
+            "83": 775370752.0,
+            "84": 775370752.0,
+            "85": 775370752.0,
+            "86": 775370752.0,
+            "87": 775370752.0,
+            "88": 775370752.0,
+            "89": 775370752.0,
+            "90": 775370752.0,
+            "91": 775370752.0,
+            "92": 775370752.0,
+            "93": 775370752.0,
+            "94": 775370752.0,
+            "95": 775370752.0,
+            "96": 775370752.0,
+            "97": 775370752.0,
+            "98": 775370752.0,
+            "99": 775370752.0,
+            "100": 775370752.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 15.23624,
+            "2": 0.45559,
+            "3": 0.34073,
+            "4": 0.34912,
+            "5": 0.33446,
+            "6": 0.33332,
+            "7": 0.33851,
+            "8": 0.33336,
+            "9": 0.32771,
+            "10": 0.33159,
+            "11": 0.34305,
+            "12": 0.32874,
+            "13": 0.33071,
+            "14": 0.32996,
+            "15": 0.32459,
+            "16": 0.32655,
+            "17": 0.33334,
+            "18": 0.32446,
+            "19": 0.3266,
+            "20": 0.32986,
+            "21": 0.32475,
+            "22": 0.3254,
+            "23": 0.33271,
+            "24": 0.32384,
+            "25": 0.32516,
+            "26": 0.33394,
+            "27": 0.32353,
+            "28": 0.32387,
+            "29": 0.33903,
+            "30": 0.32341,
+            "31": 0.32362,
+            "32": 0.33581,
+            "33": 0.32429,
+            "34": 0.32354,
+            "35": 0.34191,
+            "36": 0.32385,
+            "37": 0.31882,
+            "38": 0.33898,
+            "39": 0.30757,
+            "40": 0.31116,
+            "41": 0.31744,
+            "42": 0.30716,
+            "43": 0.30682,
+            "44": 0.31469,
+            "45": 0.31615,
+            "46": 0.30687,
+            "47": 0.30877,
+            "48": 0.31402,
+            "49": 0.30825,
+            "50": 0.30784,
+            "51": 0.34123,
+            "52": 0.30954,
+            "53": 0.56738,
+            "54": 0.30221,
+            "55": 0.31106,
+            "56": 0.30933,
+            "57": 0.31081,
+            "58": 0.30785,
+            "59": 0.30911,
+            "60": 0.3023,
+            "61": 0.62879,
+            "62": 0.30236,
+            "63": 0.30247,
+            "64": 0.30924,
+            "65": 0.30345,
+            "66": 0.29854,
+            "67": 0.30661,
+            "68": 0.30496,
+            "69": 0.29736,
+            "70": 0.30244,
+            "71": 0.30287,
+            "72": 0.29819,
+            "73": 0.29849,
+            "74": 0.30577,
+            "75": 0.30399,
+            "76": 0.30895,
+            "77": 0.30926,
+            "78": 0.30949,
+            "79": 0.30633,
+            "80": 0.31099,
+            "81": 0.30704,
+            "82": 0.30445,
+            "83": 0.31105,
+            "84": 0.30999,
+            "85": 0.30339,
+            "86": 0.30467,
+            "87": 0.30774,
+            "88": 0.30578,
+            "89": 0.30511,
+            "90": 0.31156,
+            "91": 0.30995,
+            "92": 0.30672,
+            "93": 0.31046,
+            "94": 0.3104,
+            "95": 0.30314,
+            "96": 0.30871,
+            "97": 0.30827,
+            "98": 0.30255,
+            "99": 0.30371,
+            "100": 0.30359
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
new file mode 100644
index 00000000000..5e2ba569f87
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.8583,
+            "2": 10.85411,
+            "3": 10.85432,
+            "4": 10.84404,
+            "5": 10.87282,
+            "6": 10.87931,
+            "7": 10.84659,
+            "8": 10.86139,
+            "9": 10.87078,
+            "10": 10.83268,
+            "11": 10.86331,
+            "12": 10.87295,
+            "13": 10.87792,
+            "14": 10.8859,
+            "15": 10.821,
+            "16": 10.8276,
+            "17": 10.803,
+            "18": 10.82095,
+            "19": 10.80028,
+            "20": 10.71379,
+            "21": 10.69818,
+            "22": 10.57543,
+            "23": 10.72117,
+            "24": 10.60088,
+            "25": 10.54762,
+            "26": 10.61129,
+            "27": 10.61394,
+            "28": 10.57775,
+            "29": 10.5789,
+            "30": 10.36786,
+            "31": 10.13447,
+            "32": 10.47056,
+            "33": 10.47376,
+            "34": 10.23442,
+            "35": 10.28459,
+            "36": 10.23594,
+            "37": 10.35354,
+            "38": 10.2069,
+            "39": 10.40582,
+            "40": 10.08919,
+            "41": 10.16389,
+            "42": 10.22672,
+            "43": 9.86333,
+            "44": 9.98188,
+            "45": 9.84556,
+            "46": 9.85756,
+            "47": 10.16883,
+            "48": 9.86477,
+            "49": 9.54713,
+            "50": 9.91938,
+            "51": 9.86177,
+            "52": 9.76163,
+            "53": 10.08382,
+            "54": 9.96738,
+            "55": 9.89195,
+            "56": 9.63775,
+            "57": 9.49339,
+            "58": 9.83898,
+            "59": 9.5964,
+            "60": 9.50822,
+            "61": 9.70512,
+            "62": 9.99504,
+            "63": 9.38054,
+            "64": 9.78296,
+            "65": 8.95947,
+            "66": 9.71043,
+            "67": 9.38078,
+            "68": 9.78882,
+            "69": 9.79449,
+            "70": 9.73441,
+            "71": 9.6215,
+            "72": 9.58789,
+            "73": 9.49656,
+            "74": 8.94345,
+            "75": 9.43109,
+            "76": 9.09716,
+            "77": 10.06808,
+            "78": 9.73001,
+            "79": 9.37764,
+            "80": 9.411,
+            "81": 9.48621,
+            "82": 9.69667,
+            "83": 9.31631,
+            "84": 9.42182,
+            "85": 9.61518,
+            "86": 9.07555,
+            "87": 9.59851,
+            "88": 9.75045,
+            "89": 9.61114,
+            "90": 9.82372,
+            "91": 9.35275,
+            "92": 9.36497,
+            "93": 9.08809,
+            "94": 8.83652,
+            "95": 9.52259,
+            "96": 9.52792,
+            "97": 9.31634,
+            "98": 9.67876,
+            "99": 8.89323,
+            "100": 9.408
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1748.0,
+            "2": 1856.0,
+            "3": 1756.0,
+            "4": 1916.0,
+            "5": 2038.0,
+            "6": 2033.0,
+            "7": 1992.0,
+            "8": 1767.0,
+            "9": 2004.0,
+            "10": 1566.0,
+            "11": 2096.0,
+            "12": 1979.0,
+            "13": 2129.0,
+            "14": 1957.0,
+            "15": 1963.0,
+            "16": 1930.0,
+            "17": 1918.0,
+            "18": 1820.0,
+            "19": 2035.0,
+            "20": 1792.0,
+            "21": 2151.0,
+            "22": 1928.0,
+            "23": 2106.0,
+            "24": 1888.0,
+            "25": 1840.0,
+            "26": 1892.0,
+            "27": 1902.0,
+            "28": 2196.0,
+            "29": 2149.0,
+            "30": 1921.0,
+            "31": 1700.0,
+            "32": 2103.0,
+            "33": 2359.0,
+            "34": 1969.0,
+            "35": 2160.0,
+            "36": 2083.0,
+            "37": 2590.0,
+            "38": 2506.0,
+            "39": 2695.0,
+            "40": 2402.0,
+            "41": 2498.0,
+            "42": 2534.0,
+            "43": 2125.0,
+            "44": 2292.0,
+            "45": 2296.0,
+            "46": 2691.0,
+            "47": 2633.0,
+            "48": 2721.0,
+            "49": 2509.0,
+            "50": 2799.0,
+            "51": 2780.0,
+            "52": 2832.0,
+            "53": 3150.0,
+            "54": 2950.0,
+            "55": 2596.0,
+            "56": 2975.0,
+            "57": 2601.0,
+            "58": 3243.0,
+            "59": 2957.0,
+            "60": 2743.0,
+            "61": 3224.0,
+            "62": 2804.0,
+            "63": 2737.0,
+            "64": 3139.0,
+            "65": 2763.0,
+            "66": 3501.0,
+            "67": 2882.0,
+            "68": 3059.0,
+            "69": 3225.0,
+            "70": 3538.0,
+            "71": 3208.0,
+            "72": 2562.0,
+            "73": 3322.0,
+            "74": 2181.0,
+            "75": 2820.0,
+            "76": 3361.0,
+            "77": 3652.0,
+            "78": 3521.0,
+            "79": 3575.0,
+            "80": 3630.0,
+            "81": 3995.0,
+            "82": 3702.0,
+            "83": 3206.0,
+            "84": 3591.0,
+            "85": 3519.0,
+            "86": 3053.0,
+            "87": 4074.0,
+            "88": 3380.0,
+            "89": 3804.0,
+            "90": 3435.0,
+            "91": 3109.0,
+            "92": 3439.0,
+            "93": 2985.0,
+            "94": 3843.0,
+            "95": 3715.0,
+            "96": 3825.0,
+            "97": 3418.0,
+            "98": 3954.0,
+            "99": 3375.0,
+            "100": 3532.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 232422400.0,
+            "2": 232422400.0,
+            "3": 232422400.0,
+            "4": 232422400.0,
+            "5": 232422400.0,
+            "6": 232422400.0,
+            "7": 232422400.0,
+            "8": 232422400.0,
+            "9": 232422400.0,
+            "10": 232422400.0,
+            "11": 232422400.0,
+            "12": 232422400.0,
+            "13": 232422400.0,
+            "14": 232422400.0,
+            "15": 232422400.0,
+            "16": 232422400.0,
+            "17": 232422400.0,
+            "18": 232422400.0,
+            "19": 232422400.0,
+            "20": 232422400.0,
+            "21": 232422400.0,
+            "22": 232422400.0,
+            "23": 232422400.0,
+            "24": 232422400.0,
+            "25": 232422400.0,
+            "26": 232422400.0,
+            "27": 232422400.0,
+            "28": 232422400.0,
+            "29": 232422400.0,
+            "30": 232422400.0,
+            "31": 232422400.0,
+            "32": 232422400.0,
+            "33": 232422400.0,
+            "34": 232422400.0,
+            "35": 232422400.0,
+            "36": 232422400.0,
+            "37": 232422400.0,
+            "38": 232422400.0,
+            "39": 232422400.0,
+            "40": 232422400.0,
+            "41": 232422400.0,
+            "42": 232422400.0,
+            "43": 232422400.0,
+            "44": 232422400.0,
+            "45": 232422400.0,
+            "46": 232422400.0,
+            "47": 232422400.0,
+            "48": 232422400.0,
+            "49": 232422400.0,
+            "50": 232422400.0,
+            "51": 232422400.0,
+            "52": 232422400.0,
+            "53": 232422400.0,
+            "54": 232422400.0,
+            "55": 232422400.0,
+            "56": 232422400.0,
+            "57": 232422400.0,
+            "58": 232422400.0,
+            "59": 232422400.0,
+            "60": 232422400.0,
+            "61": 232422400.0,
+            "62": 232422400.0,
+            "63": 232422400.0,
+            "64": 232422400.0,
+            "65": 232422400.0,
+            "66": 232422400.0,
+            "67": 232422400.0,
+            "68": 232422400.0,
+            "69": 232422400.0,
+            "70": 232422400.0,
+            "71": 232422400.0,
+            "72": 232422400.0,
+            "73": 232422400.0,
+            "74": 232422400.0,
+            "75": 232422400.0,
+            "76": 232422400.0,
+            "77": 232422400.0,
+            "78": 232422400.0,
+            "79": 232422400.0,
+            "80": 232422400.0,
+            "81": 232422400.0,
+            "82": 232422400.0,
+            "83": 232422400.0,
+            "84": 232422400.0,
+            "85": 232422400.0,
+            "86": 232422400.0,
+            "87": 232422400.0,
+            "88": 232422400.0,
+            "89": 232422400.0,
+            "90": 232422400.0,
+            "91": 232422400.0,
+            "92": 232422400.0,
+            "93": 232422400.0,
+            "94": 232422400.0,
+            "95": 232422400.0,
+            "96": 232422400.0,
+            "97": 232422400.0,
+            "98": 232422400.0,
+            "99": 232422400.0,
+            "100": 232422400.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 686566400.0,
+            "2": 771176960.0,
+            "3": 771177472.0,
+            "4": 773273600.0,
+            "5": 773273600.0,
+            "6": 773273600.0,
+            "7": 773274624.0,
+            "8": 773274624.0,
+            "9": 773274624.0,
+            "10": 773274624.0,
+            "11": 773274624.0,
+            "12": 773274624.0,
+            "13": 773274624.0,
+            "14": 773276160.0,
+            "15": 773276160.0,
+            "16": 773276160.0,
+            "17": 773276160.0,
+            "18": 775372800.0,
+            "19": 775372800.0,
+            "20": 775372800.0,
+            "21": 775372800.0,
+            "22": 775372800.0,
+            "23": 775372800.0,
+            "24": 775372800.0,
+            "25": 775372800.0,
+            "26": 775372800.0,
+            "27": 775372800.0,
+            "28": 775372800.0,
+            "29": 775372800.0,
+            "30": 775372800.0,
+            "31": 775373312.0,
+            "32": 775373312.0,
+            "33": 775373312.0,
+            "34": 775373312.0,
+            "35": 775373312.0,
+            "36": 775373312.0,
+            "37": 775373312.0,
+            "38": 775373312.0,
+            "39": 775373312.0,
+            "40": 775373312.0,
+            "41": 775373312.0,
+            "42": 775373312.0,
+            "43": 775373824.0,
+            "44": 775373824.0,
+            "45": 775373824.0,
+            "46": 775373824.0,
+            "47": 775373824.0,
+            "48": 775373824.0,
+            "49": 775373824.0,
+            "50": 775373824.0,
+            "51": 775373824.0,
+            "52": 775373824.0,
+            "53": 775373824.0,
+            "54": 775373824.0,
+            "55": 775373824.0,
+            "56": 775373824.0,
+            "57": 775373824.0,
+            "58": 775373824.0,
+            "59": 775373824.0,
+            "60": 775373824.0,
+            "61": 775373824.0,
+            "62": 775373824.0,
+            "63": 775373824.0,
+            "64": 775373824.0,
+            "65": 775373824.0,
+            "66": 775373824.0,
+            "67": 775373824.0,
+            "68": 775373824.0,
+            "69": 775373824.0,
+            "70": 775373824.0,
+            "71": 775373824.0,
+            "72": 775373824.0,
+            "73": 775373824.0,
+            "74": 775373824.0,
+            "75": 775373824.0,
+            "76": 775373824.0,
+            "77": 775373824.0,
+            "78": 775373824.0,
+            "79": 775373824.0,
+            "80": 775373824.0,
+            "81": 775373824.0,
+            "82": 775373824.0,
+            "83": 775373824.0,
+            "84": 775373824.0,
+            "85": 775373824.0,
+            "86": 775373824.0,
+            "87": 775373824.0,
+            "88": 775373824.0,
+            "89": 775373824.0,
+            "90": 775373824.0,
+            "91": 775373824.0,
+            "92": 775373824.0,
+            "93": 775373824.0,
+            "94": 775373824.0,
+            "95": 775373824.0,
+            "96": 775373824.0,
+            "97": 775373824.0,
+            "98": 775373824.0,
+            "99": 775373824.0,
+            "100": 775373824.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 17.28027,
+            "2": 0.43557,
+            "3": 0.31256,
+            "4": 0.52452,
+            "5": 0.30225,
+            "6": 0.30256,
+            "7": 0.30555,
+            "8": 0.30821,
+            "9": 0.30219,
+            "10": 0.30529,
+            "11": 0.30616,
+            "12": 0.30125,
+            "13": 0.30004,
+            "14": 0.30732,
+            "15": 0.30042,
+            "16": 0.29949,
+            "17": 0.30269,
+            "18": 0.30194,
+            "19": 0.29918,
+            "20": 0.30331,
+            "21": 0.30981,
+            "22": 0.30199,
+            "23": 0.30598,
+            "24": 0.30587,
+            "25": 0.30317,
+            "26": 0.30125,
+            "27": 0.30707,
+            "28": 0.30389,
+            "29": 0.302,
+            "30": 0.30486,
+            "31": 0.3068,
+            "32": 0.30229,
+            "33": 0.30311,
+            "34": 0.30869,
+            "35": 0.30157,
+            "36": 0.30236,
+            "37": 0.31062,
+            "38": 0.30491,
+            "39": 0.30805,
+            "40": 0.30378,
+            "41": 0.30681,
+            "42": 0.303,
+            "43": 0.30486,
+            "44": 0.30998,
+            "45": 0.30342,
+            "46": 0.3029,
+            "47": 0.30594,
+            "48": 0.30231,
+            "49": 0.30303,
+            "50": 0.30593,
+            "51": 0.34108,
+            "52": 0.3713,
+            "53": 0.30266,
+            "54": 0.30949,
+            "55": 0.30141,
+            "56": 0.30177,
+            "57": 0.31274,
+            "58": 0.30056,
+            "59": 0.30334,
+            "60": 0.30543,
+            "61": 0.31011,
+            "62": 0.30218,
+            "63": 0.30208,
+            "64": 0.30959,
+            "65": 0.30236,
+            "66": 0.3065,
+            "67": 0.31278,
+            "68": 0.30352,
+            "69": 0.30182,
+            "70": 0.30842,
+            "71": 0.50384,
+            "72": 0.30258,
+            "73": 0.30995,
+            "74": 0.30317,
+            "75": 0.30262,
+            "76": 0.31231,
+            "77": 0.3028,
+            "78": 0.29846,
+            "79": 0.30056,
+            "80": 0.3041,
+            "81": 0.29804,
+            "82": 0.29643,
+            "83": 0.30562,
+            "84": 0.2973,
+            "85": 0.29782,
+            "86": 0.30939,
+            "87": 0.29652,
+            "88": 0.2959,
+            "89": 0.29905,
+            "90": 0.30481,
+            "91": 0.29588,
+            "92": 0.29895,
+            "93": 0.30696,
+            "94": 0.29931,
+            "95": 0.30059,
+            "96": 0.31374,
+            "97": 0.30002,
+            "98": 0.29871,
+            "99": 0.30209,
+            "100": 0.30559
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_16b_multiprompt_tokensmatch/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_16b_multiprompt_tokensmatch/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..997f65d9fb4
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_16b_multiprompt_tokensmatch/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1 @@
+{"0": {"input_prompt": "Creative Commons Attribution-ShareAlike 4.0 International Public License\n\nBy exercising the Licensed Rights (defined below), You accept and agree to be bound by the terms and conditions of this Creative Commons Attribution-ShareAlike 4.0 International Public License (\u201cPublic License\u201d).", "generated_text": " To the extent this Public License may be interpreted as a contract, You are granted the Licensed Rights in consideration of Your acceptance of these terms and conditions, and the Licensor grants You such rights in consideration of benefits the Licensor receives from making the Licensed Material available under these terms and conditions.\n\nA \u201cLicense Elements\u201d means the copyright and similar rights held by the Licensor that apply to the", "generated_tokens": [3870, 1278, 13820, 1593, 11227, 56484, 2188, 1402, 27539, 1435, 1261, 8633, 1044, 3213, 1584, 23369, 1278, 29960, 29520, 27868, 1294, 22666, 1307, 9825, 33868, 1307, 2576, 6856, 1321, 5481, 1044, 1321, 1278, 29960, 10648, 47506, 3213, 2516, 10741, 1294, 22666, 1307, 15021, 1278, 29960, 10648, 26510, 1562, 6187, 1278, 29960, 29520, 14736, 5178, 2425, 2576, 6856, 1321, 5481, 1338, 1065, 2129, 93552, 68175, 1414, 4938, 1278, 48896, 1321, 4510, 10741, 6452, 1536, 1278, 29960, 10648, 1455, 11145, 1317, 1278], "tpot": [2.720426321029663, 0.6659098267555237, 0.07840608060359955, 0.07743222266435623, 0.07455050200223923, 0.0731138214468956, 0.07045378535985947, 0.07106886059045792, 0.0719049945473671, 0.07009641081094742, 0.06961708515882492, 0.0693572461605072, 0.07076390087604523, 0.06894252449274063, 0.06956227123737335, 0.07301510870456696, 0.07005567848682404, 0.07221231609582901, 0.06963715702295303, 0.07077756524085999, 0.0693695992231369, 0.07059446722269058, 0.07056189328432083, 0.07043007761240005, 0.07100988924503326, 0.06954912096261978, 0.06932665407657623, 0.06911753863096237, 0.06943970918655396, 0.06930265575647354, 0.06936381012201309, 0.07106435298919678, 0.07099161297082901, 0.06973165273666382, 0.07030060887336731, 0.06937744468450546, 0.07144572585821152, 0.0705178901553154, 0.06963129341602325, 0.06951193511486053, 0.06903158873319626, 0.0701359361410141, 0.06920403242111206, 0.06966931372880936, 0.06947369128465652, 0.07044544070959091, 0.07153702527284622, 0.06970176100730896, 0.07077661156654358, 0.06910556554794312, 0.06982534378767014, 0.07268957048654556, 0.07182464003562927, 0.07119160890579224, 0.07311885058879852, 0.07156931608915329, 0.07464009523391724, 0.0744134783744812, 0.07528038322925568, 0.0751194879412651, 0.0736798420548439, 0.0735008642077446, 0.07334134727716446, 0.07211820781230927, 0.07172300666570663, 0.06956271827220917, 0.06994012743234634, 0.07024886459112167, 0.06890105456113815, 0.07088610529899597, 0.06935007870197296, 0.06854406744241714, 0.06991859525442123, 0.07241446524858475, 0.06963654607534409, 0.06925679743289948, 0.06985462456941605, 0.06919551640748978, 0.06986681371927261, 0.07047929614782333], "latency": 15.219947323203087, "logprobs": [-1.034429907798767, -2.2820096015930176, -1.1818207502365112, -0.005243122112005949, -1.3920068740844727, -0.0023506649304181337, -0.23362953960895538, -4.410646579344757e-05, -0.8059788346290588, -1.165771722793579, -0.005122631322592497, -0.01079292967915535, -0.31597569584846497, -4.845684051513672, -0.054925862699747086, -2.718410015106201, -5.851214408874512, -7.10594367980957, -1.8839404582977295, -6.603451728820801, -0.10522890836000443, -0.14382460713386536, -0.908831775188446, -0.011833587661385536, -0.08751995116472244, -0.031985729932785034, -0.03963988274335861, -1.1124131679534912, -0.005112550221383572, -0.0002406545972917229, -0.021998438984155655, -0.013275211676955223, -0.0030618475284427404, -0.007447692099958658, -0.059675432741642, -0.027009541168808937, -0.2265223264694214, -0.027810541912913322, -0.0022902467753738165, -0.007414560765028, -2.5149638652801514, -0.06250719726085663, -0.49305495619773865, -0.00015066919149830937, -0.10436679422855377, -0.002546284580603242, -0.0039064777083694935, -0.00010132275929208845, -0.03080633655190468, -0.0027381805703043938, -0.002457219874486327, -0.0022670540492981672, -0.06900941580533981, -0.015771063044667244, -0.0026065681595355272, -3.849259376525879, -0.949365496635437, -0.007241431158035994, -0.8718545436859131, -0.2303992360830307, -0.03798322752118111, -0.0003301552205812186, -0.03691234439611435, -0.08387894183397293, -0.00013851160474587232, -0.000623032043222338, -5.864924969500862e-05, -0.027150511741638184, -0.00028236693469807506, -4.279521817807108e-05, -0.0054723224602639675, -0.0008360228384844959, -0.17018567025661469, -0.0045921108685433865, -0.0020528212189674377, -5.245195097813848e-06, -0.16259293258190155, -0.001334729720838368, -3.45700973412022e-05, -0.0004881620698142797, -0.014900578185915947, -2.706014311115723e-05, -0.004492428619414568, -0.03925368934869766, -0.0006156456656754017, -0.1234944611787796, -0.007040690630674362, -0.0002475670480635017, -0.0005224770284257829, -0.005545470397919416, -4.255681051290594e-05, -0.03230837732553482, -0.07245421409606934, -0.11131127178668976, -0.007754461374133825, -5.6980417866725475e-05, -0.0030440206173807383, -0.006326647009700537, -4.875540980719961e-05, -0.0002343380037928, -3.0397906812140718e-05, -0.012009222991764545, -0.009006588719785213, -0.001957882894203067, -1.2040065485052764e-05, -0.0010608765296638012, -0.0002615109842736274, -0.06175156682729721, -0.01025254838168621, -0.0012381753185763955, -0.0038769098464399576, -0.03451932966709137, -6.270212179515511e-05, -0.12754283845424652, -0.1323665827512741, -0.0015854182420298457, -3.0397906812140718e-05, -0.00013350549852475524, -0.15308424830436707, -1.8770537376403809, -0.31804990768432617, -0.5516462922096252, -0.19552597403526306, -0.10012278705835342, -0.7691615223884583, -0.022935237735509872, -0.5969871282577515, -1.5031214952468872, -0.384999543428421, -0.0883193388581276, -0.9926105737686157, -0.389201283454895, -0.22737500071525574, -0.012403964065015316, -0.0001934579631779343, -0.5111952424049377, -0.24787002801895142, -0.0004378790326882154, -0.027225803583860397]}, "1": {"input_prompt": "GNU GENERAL PUBLIC LICENSE\nVersion 3, 29 June 2007\n\nPreamble\n\nThe GNU General Public License is a free, copyleft license for software and other kinds of works.", "generated_text": " The licenses for most software and other practical works are designed to take away your freedom to share and change the works. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change all versions of a program--to make sure it remains free software for all its users. We, the Free Software Foundation, use the GNU General Public License for most of our software; it applies", "generated_tokens": [1531, 91700, 1394, 2725, 9314, 1321, 2147, 18107, 6113, 1584, 9543, 1317, 4069, 5109, 2143, 18613, 1317, 9730, 1321, 4036, 1278, 6113, 1046, 5652, 9033, 1044, 1278, 56703, 7487, 11227, 56484, 1395, 13650, 1317, 33152, 2143, 18613, 1317, 9730, 1321, 4036, 1747, 15628, 1307, 1261, 3467, 1742, 1611, 3180, 5257, 1494, 10714, 5370, 9314, 1394, 1747, 2246, 8616, 1046, 2837, 1044, 1278, 16611, 29494, 17364, 1044, 2210, 1278, 56703, 7487, 11227, 56484, 1394, 2725, 1307, 2948, 9314, 1059, 1494, 28735], "tpot": [0.7174983620643616, 0.07785984128713608, 0.0764852836728096, 0.07466614246368408, 0.0717785581946373, 0.07468675076961517, 0.07152419537305832, 0.06969526410102844, 0.07110752165317535, 0.06970572471618652, 0.06920454651117325, 0.06990531086921692, 0.07004140317440033, 0.0712602511048317, 0.06903129816055298, 0.07071229070425034, 0.07059088349342346, 0.06999795883893967, 0.06967964768409729, 0.07150192558765411, 0.06971721351146698, 0.06916943937540054, 0.06966301053762436, 0.06984022259712219, 0.069039486348629, 0.06911581009626389, 0.06958959996700287, 0.0706978514790535, 0.06978118419647217, 0.06945011019706726, 0.0694519653916359, 0.0701381117105484, 0.06995609402656555, 0.06912890076637268, 0.06973984092473984, 0.06986332684755325, 0.0694037452340126, 0.06932634115219116, 0.06928720325231552, 0.06932701170444489, 0.0689065232872963, 0.07238291203975677, 0.07131846249103546, 0.06996982544660568, 0.07046765089035034, 0.0726158395409584, 0.07259414345026016, 0.07020287960767746, 0.07142271846532822, 0.0708770900964737, 0.07033068686723709, 0.07027311623096466, 0.06996393948793411, 0.07049206644296646, 0.06900809705257416, 0.0699913278222084, 0.07210537791252136, 0.0702073872089386, 0.07132425904273987, 0.06975401192903519, 0.07038697600364685, 0.06933759897947311, 0.06984009593725204, 0.06967458873987198, 0.06888572871685028, 0.06986083090305328, 0.06940105557441711, 0.06956079602241516, 0.06917689740657806, 0.06920892745256424, 0.0712355226278305, 0.07001478224992752, 0.06936268508434296, 0.069720059633255, 0.07083427160978317, 0.0705321878194809, 0.06942963600158691, 0.06904758512973785, 0.06982547044754028, 0.07130048424005508], "latency": 15.219947323203087, "logprobs": [-7.482367992401123, -4.782957077026367, -0.15608751773834229, -0.05624598637223244, -0.0666063204407692, -0.000226472009671852, -0.002314390614628792, -0.7274855971336365, -2.047292470932007, -0.0029495328199118376, -0.8379128575325012, -0.00838379468768835, -0.0015731590101495385, -0.02502445876598358, -0.0011831672163680196, -0.0041245874017477036, -0.00022742546570952982, -0.0002157455455744639, -5.936446541454643e-05, -0.0004980515805073082, -0.0002698534226510674, -2.2059996128082275, -6.3529462814331055, -0.011952094733715057, -0.00010239553375868127, -0.3807244598865509, -0.20424246788024902, -0.41751813888549805, -0.005481095518916845, -1.1086402082582936e-05, -0.007466860581189394, -0.00838320329785347, -0.009201501496136189, -0.017721762880682945, -0.0024051330983638763, -0.00045718232286162674, -8.702239938429557e-06, -1.5139465176616795e-05, -0.0031880526803433895, -0.005352333653718233, -0.10581696778535843, -0.05035088211297989, -0.5795518755912781, -0.019671587273478508, -0.007066140417009592, -0.034393906593322754, -6.98299503326416, -0.46170496940612793, -0.04491615667939186, -0.030878927558660507, -0.0016607552533969283, -0.0006268443539738655, -0.00987135712057352, -6.496695277746767e-05, -0.8354158997535706, -0.007698154542595148, -0.0012696071062237024, -0.0004447901446837932, -0.0018221217906102538, -0.0014835315523669124, -0.001134824356995523, -0.034311436116695404, -0.014452068135142326, -0.0019802500028163195, -0.014066009782254696, -0.002191762439906597, -0.0013553252210840583, -0.015814948827028275, -0.007888473570346832, -0.01361841894686222, -0.0007306052139028907, -0.00019095504831057042, -0.0022776394616812468, -0.0008617501589469612, -0.000940476544201374, -0.0038709724321961403, -0.0038757221773266792, -0.004625573288649321, -0.0022389839868992567, -5.6503606174374e-05, -0.0039673917926847935, -0.007623270619660616, -0.0014759134501218796, -0.0002557904226705432, -0.000474936212413013, -0.00139246741309762, -0.001206504413858056, -0.00015853578224778175, -0.000545472139492631, -0.0014616292901337147, -0.002354232594370842, -9.703165414975956e-05, -0.00024399164249189198, -0.16811230778694153, -0.004927040543407202, -0.017750689759850502, -0.0001802282058633864, -0.0014571059728041291, -0.003566454164683819, -0.00021264675888232887, -0.01999940164387226, -0.0008441222598776221, -4.8636207793606445e-05, -0.0011026738211512566, -1.1801649634435307e-05, -0.1814543753862381, -0.016339080408215523, -0.014278624206781387, -0.0029024637769907713, -0.006082594860345125, -0.0016703951405361295, -0.0006364941946230829, -0.0010387268848717213, -0.002667442662641406, -0.0002610342635307461, -0.002438787603750825, -0.013884739950299263, -0.007366991601884365, -0.005141369998455048, -0.010307767428457737, -0.0009261847590096295, -0.0009263038518838584, -0.0068603926338255405, -0.0008634176338091493, -0.0006144542712718248, -2.2053474822314456e-05, -0.004078048747032881]}}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_16b_multiprompt_tokensmatch/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_16b_multiprompt_tokensmatch/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..bb6ee34ea21
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_16b_multiprompt_tokensmatch/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1 @@
+{"0": {"input_prompt": "Creative Commons Attribution-ShareAlike 4.0 International Public License\n\nBy exercising the Licensed Rights (defined below), You accept and agree to be bound by the terms and conditions of this Creative Commons Attribution-ShareAlike 4.0 International Public License (\u201cPublic License\u201d).", "generated_text": " To the extent this Public License may be interpreted as a contract, You are granted the Licensed Rights in consideration of Your acceptance of these terms and conditions, and the Licensor grants You such rights in consideration of benefits the Licensor receives from making the Licensed Material available under these terms and conditions.\n\nA \u201cLicense Elements\u201d means the copyright and similar rights held by the Licensor that apply to the", "generated_tokens": [3870, 1278, 13820, 1593, 11227, 56484, 2188, 1402, 27539, 1435, 1261, 8633, 1044, 3213, 1584, 23369, 1278, 29960, 29520, 27868, 1294, 22666, 1307, 9825, 33868, 1307, 2576, 6856, 1321, 5481, 1044, 1321, 1278, 29960, 10648, 47506, 3213, 2516, 10741, 1294, 22666, 1307, 15021, 1278, 29960, 10648, 26510, 1562, 6187, 1278, 29960, 29520, 14736, 5178, 2425, 2576, 6856, 1321, 5481, 1338, 1065, 2129, 93552, 68175, 1414, 4938, 1278, 48896, 1321, 4510, 10741, 6452, 1536, 1278, 29960, 10648, 1455, 11145, 1317, 1278], "tpot": [2.4923102855682373, 0.6759980320930481, 0.08269506692886353, 0.08119833469390869, 0.08115603029727936, 0.0800175741314888, 0.08051318675279617, 0.08278025686740875, 0.08045568317174911, 0.08009149134159088, 0.07951929420232773, 0.08059776574373245, 0.08038483560085297, 0.07992669194936752, 0.08057552576065063, 0.07977830618619919, 0.08127715438604355, 0.08072630316019058, 0.08037532866001129, 0.0804634839296341, 0.08137375861406326, 0.0813906267285347, 0.08126940578222275, 0.08076531440019608, 0.08090108633041382, 0.0793602243065834, 0.08094745874404907, 0.0810527354478836, 0.08107049763202667, 0.08040124177932739, 0.07976572960615158, 0.08069661259651184, 0.0826275497674942, 0.0810798704624176, 0.07998496294021606, 0.08005843311548233, 0.0805768370628357, 0.08088915050029755, 0.08113190531730652, 0.08077005296945572, 0.08062981814146042, 0.08078550547361374, 0.08168613910675049, 0.08143996447324753, 0.08142809569835663, 0.08187657594680786, 0.07972115278244019, 0.08118259161710739, 0.08142592012882233, 0.0806335061788559, 0.08064771443605423, 0.07944890111684799, 0.08106396347284317, 0.08158227801322937, 0.0814877450466156, 0.08077871799468994, 0.0795617327094078, 0.08221545070409775, 0.08131680637598038, 0.08039452880620956, 0.080450139939785, 0.07980994880199432, 0.08013289421796799, 0.08113926649093628, 0.08158918470144272, 0.08053535968065262, 0.08091792464256287, 0.07972493022680283, 0.08126131445169449, 0.08287584036588669, 0.0808253064751625, 0.08110111951828003, 0.07954514771699905, 0.08085116744041443, 0.0816071406006813, 0.08060210943222046, 0.08102639764547348, 0.07997968047857285, 0.08147360384464264, 0.08081503957509995], "latency": 16.56691719801165, "logprobs": [-1.034429907798767, -2.2820096015930176, -1.1818207502365112, -0.005243122112005949, -1.3920068740844727, -0.0023506649304181337, -0.23362953960895538, -4.410646579344757e-05, -0.8059788346290588, -1.165771722793579, -0.005122631322592497, -0.01079292967915535, -0.31597569584846497, -4.845684051513672, -0.054925862699747086, -2.718410015106201, -5.851214408874512, -7.10594367980957, -1.8839404582977295, -6.603451728820801, -0.10522890836000443, -0.14382460713386536, -0.908831775188446, -0.011833587661385536, -0.08751995116472244, -0.031985729932785034, -0.03963988274335861, -1.1124131679534912, -0.005112550221383572, -0.0002406545972917229, -0.021998438984155655, -0.013275211676955223, -0.0030618475284427404, -0.007447692099958658, -0.059675432741642, -0.027009541168808937, -0.2265223264694214, -0.027810541912913322, -0.0022902467753738165, -0.007414560765028, -2.5149638652801514, -0.06250719726085663, -0.49305495619773865, -0.00015066919149830937, -0.10436679422855377, -0.002546284580603242, -0.0039064777083694935, -0.00010132275929208845, -0.03080633655190468, -0.0027381805703043938, -0.002457219874486327, -0.0022670540492981672, -0.06900941580533981, -0.015771063044667244, -0.0026065681595355272, -3.849259376525879, -0.949365496635437, -0.007241431158035994, -0.8718545436859131, -0.2303992360830307, -0.03798322752118111, -0.0003301552205812186, -0.03691234439611435, -0.08387894183397293, -0.00013851160474587232, -0.000623032043222338, -5.864924969500862e-05, -0.027150511741638184, -0.00028236693469807506, -4.279521817807108e-05, -0.0054723224602639675, -0.0008360228384844959, -0.17018567025661469, -0.0045921108685433865, -0.0020528212189674377, -5.245195097813848e-06, -0.16259293258190155, -0.001334729720838368, -3.45700973412022e-05, -0.0004881620698142797, -0.014900578185915947, -2.706014311115723e-05, -0.004492428619414568, -0.03925368934869766, -0.0006156456656754017, -0.1234944611787796, -0.007040690630674362, -0.0002475670480635017, -0.0005224770284257829, -0.005545470397919416, -4.255681051290594e-05, -0.03230837732553482, -0.07245421409606934, -0.11131127178668976, -0.007754461374133825, -5.6980417866725475e-05, -0.0030440206173807383, -0.006326647009700537, -4.875540980719961e-05, -0.0002343380037928, -3.0397906812140718e-05, -0.012009222991764545, -0.009006588719785213, -0.001957882894203067, -1.2040065485052764e-05, -0.0010608765296638012, -0.0002615109842736274, -0.06175156682729721, -0.01025254838168621, -0.0012381753185763955, -0.0038769098464399576, -0.03451932966709137, -6.270212179515511e-05, -0.12754283845424652, -0.1323665827512741, -0.0015854182420298457, -3.0397906812140718e-05, -0.00013350549852475524, -0.15308424830436707, -1.8770537376403809, -0.31804990768432617, -0.5516462922096252, -0.19552597403526306, -0.10012278705835342, -0.7691615223884583, -0.022935237735509872, -0.5969871282577515, -1.5031214952468872, -0.384999543428421, -0.0883193388581276, -0.9926105737686157, -0.389201283454895, -0.22737500071525574, -0.012403964065015316, -0.0001934579631779343, -0.5111952424049377, -0.24787002801895142, -0.0004378790326882154, -0.027225803583860397]}, "1": {"input_prompt": "GNU GENERAL PUBLIC LICENSE\nVersion 3, 29 June 2007\n\nPreamble\n\nThe GNU General Public License is a free, copyleft license for software and other kinds of works.", "generated_text": " The licenses for most software and other practical works are designed to take away your freedom to share and change the works. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change all versions of a program--to make sure it remains free software for all its users. We, the Free Software Foundation, use the GNU General Public License for most of our software; it applies", "generated_tokens": [1531, 91700, 1394, 2725, 9314, 1321, 2147, 18107, 6113, 1584, 9543, 1317, 4069, 5109, 2143, 18613, 1317, 9730, 1321, 4036, 1278, 6113, 1046, 5652, 9033, 1044, 1278, 56703, 7487, 11227, 56484, 1395, 13650, 1317, 33152, 2143, 18613, 1317, 9730, 1321, 4036, 1747, 15628, 1307, 1261, 3467, 1742, 1611, 3180, 5257, 1494, 10714, 5370, 9314, 1394, 1747, 2246, 8616, 1046, 2837, 1044, 1278, 16611, 29494, 17364, 1044, 2210, 1278, 56703, 7487, 11227, 56484, 1394, 2725, 1307, 2948, 9314, 1059, 1494, 28735], "tpot": [0.6688169836997986, 0.08646825700998306, 0.083538718521595, 0.08260326087474823, 0.08199965208768845, 0.08158879727125168, 0.0802709087729454, 0.08419913798570633, 0.07995779067277908, 0.08143891394138336, 0.08108057081699371, 0.08084486424922943, 0.08102915436029434, 0.07983194291591644, 0.08131516724824905, 0.0816650539636612, 0.08091884851455688, 0.08093494176864624, 0.08018704503774643, 0.08179347217082977, 0.08112754672765732, 0.08112083375453949, 0.0805734395980835, 0.08067212998867035, 0.08022300899028778, 0.08121798932552338, 0.08183427155017853, 0.0806741788983345, 0.08114969730377197, 0.07974809408187866, 0.080985888838768, 0.08140931278467178, 0.0831851214170456, 0.08096041530370712, 0.07966978847980499, 0.08085939288139343, 0.08112092316150665, 0.08085711300373077, 0.08063827455043793, 0.07968409359455109, 0.08139641582965851, 0.08102294057607651, 0.08102816343307495, 0.08071696013212204, 0.08157248049974442, 0.08005645126104355, 0.08118710666894913, 0.0810147151350975, 0.08026038110256195, 0.08055280148983002, 0.07966405898332596, 0.08168742060661316, 0.0816090852022171, 0.08039574325084686, 0.08089830726385117, 0.0794670432806015, 0.08368594944477081, 0.08118339627981186, 0.08051532506942749, 0.08080841600894928, 0.07947234809398651, 0.08114787191152573, 0.08128608018159866, 0.08138518780469894, 0.08067911118268967, 0.08099766820669174, 0.08047705888748169, 0.08083853125572205, 0.08097779005765915, 0.08190613985061646, 0.08038448542356491, 0.08032994717359543, 0.08100729435682297, 0.08379139006137848, 0.08242924511432648, 0.08085381984710693, 0.07933055609464645, 0.0811963826417923, 0.08024899661540985, 0.08009414374828339], "latency": 16.56691719801165, "logprobs": [-7.482367992401123, -4.782957077026367, -0.15608751773834229, -0.05624598637223244, -0.0666063204407692, -0.000226472009671852, -0.002314390614628792, -0.7274855971336365, -2.047292470932007, -0.0029495328199118376, -0.8379128575325012, -0.00838379468768835, -0.0015731590101495385, -0.02502445876598358, -0.0011831672163680196, -0.0041245874017477036, -0.00022742546570952982, -0.0002157455455744639, -5.936446541454643e-05, -0.0004980515805073082, -0.0002698534226510674, -2.2059996128082275, -6.3529462814331055, -0.011952094733715057, -0.00010239553375868127, -0.3807244598865509, -0.20424246788024902, -0.41751813888549805, -0.005481095518916845, -1.1086402082582936e-05, -0.007466860581189394, -0.00838320329785347, -0.009201501496136189, -0.017721762880682945, -0.0024051330983638763, -0.00045718232286162674, -8.702239938429557e-06, -1.5139465176616795e-05, -0.0031880526803433895, -0.005352333653718233, -0.10581696778535843, -0.05035088211297989, -0.5795518755912781, -0.019671587273478508, -0.007066140417009592, -0.034393906593322754, -6.98299503326416, -0.46170496940612793, -0.04491615667939186, -0.030878927558660507, -0.0016607552533969283, -0.0006268443539738655, -0.00987135712057352, -6.496695277746767e-05, -0.8354158997535706, -0.007698154542595148, -0.0012696071062237024, -0.0004447901446837932, -0.0018221217906102538, -0.0014835315523669124, -0.001134824356995523, -0.034311436116695404, -0.014452068135142326, -0.0019802500028163195, -0.014066009782254696, -0.002191762439906597, -0.0013553252210840583, -0.015814948827028275, -0.007888473570346832, -0.01361841894686222, -0.0007306052139028907, -0.00019095504831057042, -0.0022776394616812468, -0.0008617501589469612, -0.000940476544201374, -0.0038709724321961403, -0.0038757221773266792, -0.004625573288649321, -0.0022389839868992567, -5.6503606174374e-05, -0.0039673917926847935, -0.007623270619660616, -0.0014759134501218796, -0.0002557904226705432, -0.000474936212413013, -0.00139246741309762, -0.001206504413858056, -0.00015853578224778175, -0.000545472139492631, -0.0014616292901337147, -0.002354232594370842, -9.703165414975956e-05, -0.00024399164249189198, -0.16811230778694153, -0.004927040543407202, -0.017750689759850502, -0.0001802282058633864, -0.0014571059728041291, -0.003566454164683819, -0.00021264675888232887, -0.01999940164387226, -0.0008441222598776221, -4.8636207793606445e-05, -0.0011026738211512566, -1.1801649634435307e-05, -0.1814543753862381, -0.016339080408215523, -0.014278624206781387, -0.0029024637769907713, -0.006082594860345125, -0.0016703951405361295, -0.0006364941946230829, -0.0010387268848717213, -0.002667442662641406, -0.0002610342635307461, -0.002438787603750825, -0.013884739950299263, -0.007366991601884365, -0.005141369998455048, -0.010307767428457737, -0.0009261847590096295, -0.0009263038518838584, -0.0068603926338255405, -0.0008634176338091493, -0.0006144542712718248, -2.2053474822314456e-05, -0.004078048747032881]}}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_cudagraphs/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_cudagraphs/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..e7bab115f6e
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_cudagraphs/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1 @@
+{"1": {"input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.", "generated_text": " And then you get to the end of the movie, and you realize that this is not New York at all. This is New York at the end", "generated_tokens": [3060, 2430, 1636, 2012, 1317, 1278, 2362, 1307, 1278, 16070, 1044, 1321, 1636, 23067, 1455, 1593, 1395, 1605, 3140, 5152, 1513, 1747, 1046, 2409, 1395, 3140, 5152, 1513, 1278, 2362], "tpot": [0.5686635971069336, 0.006066783796995878, 0.00542214373126626, 0.005529535934329033, 0.005290016066282988, 0.005014463793486357, 0.004941120278090239, 0.004862783942371607, 0.004948512185364962, 0.004847776144742966, 0.004972127731889486, 0.0052157118916511536, 0.005366367753595114, 0.0054197758436203, 0.005486688110977411, 0.005352096166461706, 0.005394879728555679, 0.005450463853776455, 0.005347424186766148, 0.005441728048026562, 0.0054066237062215805, 0.0052277762442827225, 0.005518496036529541, 0.005288544110953808, 0.005351583939045668, 0.005274975672364235, 0.0052535682916641235, 0.005358528345823288, 0.00528879975900054, 0.0052247364073991776], "latency": 0.7284151650965214, "logprobs": [-9.358616828918457, -2.7474308013916016, -4.628000259399414, -1.5015846490859985, -0.6537986993789673, -1.6720777750015259, -2.478705883026123, -2.0523874759674072, -2.4486241340637207, -6.257688522338867, -1.4695018529891968, -3.4444499015808105, -4.394474029541016, -3.875497817993164, -2.0133562088012695, -1.8832889795303345, -3.8004486560821533, -6.784910678863525, -0.2949134111404419, -0.9851954579353333, -6.626471519470215, -7.186152458190918, -12.800604820251465, -2.2686400413513184, -3.7816011905670166, -0.4978560209274292, -4.371628284454346, -0.0696188285946846, -0.09487748891115189, -3.2375073432922363, -10.075444221496582, -1.138173222541809, -5.97689151763916, -5.093283653259277, -3.874396324157715, -2.6073620319366455, -3.466899871826172, -5.642228126525879, -1.6154727935791016, -5.416567325592041, -12.158267974853516, -12.610607147216797, -0.09664110094308853, -2.5213418006896973, -1.3747841119766235, -2.8510401248931885, -1.1877963542938232, -0.006288621574640274, -3.382380962371826, -13.207911491394043, -4.477662086486816, -2.5299136638641357, -6.053747653961182, -0.7650555372238159, -0.04903985932469368, -1.5557448863983154, -1.1315535306930542, -5.610307216644287, -0.4059771001338959, -4.961302280426025, -0.5701270699501038, -0.7174267172813416, -2.4735305309295654, -13.610812187194824, -0.09192369878292084, -3.5248732566833496, -1.3797900676727295, -6.429551124572754, -0.541852593421936, -3.5403199195861816, -0.8477706909179688, -1.5764057636260986, -5.343497276306152, -17.19588851928711, -6.635483741760254, -0.8923014402389526, -4.114314556121826, -1.2193646430969238, -2.2128424644470215, -1.7673423290252686, -0.22567729651927948, -9.320298194885254, -0.1282224804162979, -7.3249101638793945, -2.511319875717163, -4.0696563720703125, -3.5427517890930176, -1.9300249814987183, -2.347038507461548, -1.5178614854812622, -2.366441249847412, -1.744020938873291, -1.1570327281951904, -3.0150983333587646, -0.5272141098976135, -0.4669455885887146, -1.7157398462295532, -0.8362292051315308, -0.41491177678108215, -0.9386503100395203, -1.5008316040039062, -0.4635278284549713, -1.6312834024429321, -0.5320357084274292, -1.2249717712402344, -1.1707526445388794, -0.0023814670275896788, -1.1655761003494263, -0.006950841750949621, -0.7309689521789551, -0.7428325414657593, -0.042878177016973495, -0.8572992086410522, -0.01948782242834568, -2.0537290573120117, -1.2817553281784058, -0.8235744833946228]}}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_cudagraphs/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_cudagraphs/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..18ce65a905f
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_cudagraphs/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1 @@
+{"1": {"input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.", "generated_text": " And then you get to the end of the movie, and you realize that this is not New York at all. This is New York at the end", "generated_tokens": [3060, 2430, 1636, 2012, 1317, 1278, 2362, 1307, 1278, 16070, 1044, 1321, 1636, 23067, 1455, 1593, 1395, 1605, 3140, 5152, 1513, 1747, 1046, 2409, 1395, 3140, 5152, 1513, 1278, 2362], "tpot": [0.6098978519439697, 0.00587167963385582, 0.00553337624296546, 0.005388895981013775, 0.0052880640141665936, 0.005359936039894819, 0.00534518389031291, 0.005303360056132078, 0.0053532798774540424, 0.005232864059507847, 0.0053773121908307076, 0.005341055803000927, 0.0052644480019807816, 0.005387584213167429, 0.005375008098781109, 0.00524944020435214, 0.0053992001339793205, 0.005333151668310165, 0.0052451519295573235, 0.005348992068320513, 0.005396031774580479, 0.0052389120683074, 0.005332960281521082, 0.005230464041233063, 0.005353568121790886, 0.005343679804354906, 0.005257599987089634, 0.005404096096754074, 0.005395135842263699, 0.005260608159005642], "latency": 0.769633749499917, "logprobs": [-9.358616828918457, -2.7474308013916016, -4.628000259399414, -1.5015846490859985, -0.6537986993789673, -1.6720777750015259, -2.478705883026123, -2.0523874759674072, -2.4486241340637207, -6.257688522338867, -1.4695018529891968, -3.4444499015808105, -4.394474029541016, -3.875497817993164, -2.0133562088012695, -1.8832889795303345, -3.8004486560821533, -6.784910678863525, -0.2949134111404419, -0.9851954579353333, -6.626471519470215, -7.186152458190918, -12.800604820251465, -2.2686400413513184, -3.7816011905670166, -0.4978560209274292, -4.371628284454346, -0.0696188285946846, -0.09487748891115189, -3.2375073432922363, -10.075444221496582, -1.138173222541809, -5.97689151763916, -5.093283653259277, -3.874396324157715, -2.6073620319366455, -3.466899871826172, -5.642228126525879, -1.6154727935791016, -5.416567325592041, -12.158267974853516, -12.610607147216797, -0.09664110094308853, -2.5213418006896973, -1.3747841119766235, -2.8510401248931885, -1.1877963542938232, -0.006288621574640274, -3.382380962371826, -13.207911491394043, -4.477662086486816, -2.5299136638641357, -6.053747653961182, -0.7650555372238159, -0.04903985932469368, -1.5557448863983154, -1.1315535306930542, -5.610307216644287, -0.4059771001338959, -4.961302280426025, -0.5701270699501038, -0.7174267172813416, -2.4735305309295654, -13.610812187194824, -0.09192369878292084, -3.5248732566833496, -1.3797900676727295, -6.429551124572754, -0.541852593421936, -3.5403199195861816, -0.8477706909179688, -1.5764057636260986, -5.343497276306152, -17.19588851928711, -6.635483741760254, -0.8923014402389526, -4.114314556121826, -1.2193646430969238, -2.2128424644470215, -1.7673423290252686, -0.22567729651927948, -9.320298194885254, -0.1282224804162979, -7.3249101638793945, -2.511319875717163, -4.0696563720703125, -3.5427517890930176, -1.9300249814987183, -2.347038507461548, -1.5178614854812622, -2.366441249847412, -1.744020938873291, -1.1570327281951904, -3.0150983333587646, -0.5272141098976135, -0.4669455885887146, -1.7157398462295532, -0.8362292051315308, -0.41491177678108215, -0.9386503100395203, -1.5008316040039062, -0.4635278284549713, -1.6312834024429321, -0.5320357084274292, -1.2249717712402344, -1.1707526445388794, -0.0023814670275896788, -1.1655761003494263, -0.006950841750949621, -0.7309689521789551, -0.7428325414657593, -0.042878177016973495, -0.8572992086410522, -0.01948782242834568, -2.0537290573120117, -1.2817553281784058, -0.8235744833946228]}}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_fp8_cudagraphs/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_fp8_cudagraphs/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..05e16225cd4
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_fp8_cudagraphs/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1 @@
+{"1": {"input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.", "generated_text": " And then you get to the end of the movie, and you realize that this is not New York at all. This is New York at the end", "generated_tokens": [3060, 2430, 1636, 2012, 1317, 1278, 2362, 1307, 1278, 16070, 1044, 1321, 1636, 23067, 1455, 1593, 1395, 1605, 3140, 5152, 1513, 1747, 1046, 2409, 1395, 3140, 5152, 1513, 1278, 2362], "tpot": [0.561271607875824, 0.010015103965997696, 0.008491167798638344, 0.007847008295357227, 0.007853696122765541, 0.007908639498054981, 0.0077699837274849415, 0.007929407991468906, 0.007948416285216808, 0.008069856092333794, 0.008628063835203648, 0.00827731192111969, 0.007847904227674007, 0.007874688133597374, 0.008285152725875378, 0.008413120172917843, 0.008548031561076641, 0.008463519625365734, 0.008221376687288284, 0.008037183433771133, 0.007799903862178326, 0.007931231521070004, 0.008392063900828362, 0.008282655850052834, 0.00781238405033946, 0.007775456178933382, 0.007549664005637169, 0.00783606432378292, 0.00781475193798542, 0.00798182375729084], "latency": 0.8031206205487251, "logprobs": [-9.362524032592773, -2.761181354522705, -4.53175163269043, -1.5617105960845947, -0.7528610229492188, -1.6253626346588135, -2.45941162109375, -2.1533684730529785, -2.346475124359131, -6.157411575317383, -1.3193804025650024, -3.5247979164123535, -4.488514423370361, -3.759702682495117, -2.022449493408203, -1.8945543766021729, -3.6219239234924316, -6.842351913452148, -0.3225390613079071, -0.8537865877151489, -6.520284652709961, -7.550463676452637, -12.595708847045898, -2.9504785537719727, -3.8068642616271973, -0.5890476107597351, -4.3587751388549805, -0.0665372759103775, -0.06955777853727341, -3.3523848056793213, -9.773153305053711, -1.0814638137817383, -6.204980850219727, -5.33505392074585, -3.9411606788635254, -2.7358486652374268, -3.2924106121063232, -6.0152740478515625, -1.8116782903671265, -6.243865013122559, -12.158185958862305, -12.65605354309082, -0.08688803017139435, -2.6079092025756836, -1.4071979522705078, -2.990557909011841, -1.2379846572875977, -0.006849618628621101, -3.4119930267333984, -13.05937671661377, -4.2840399742126465, -2.4802193641662598, -5.933547019958496, -0.9116124510765076, -0.060975510627031326, -1.5681536197662354, -1.0339949131011963, -5.617187023162842, -0.41873589158058167, -4.9402852058410645, -0.5690340995788574, -0.6301103830337524, -2.396580696105957, -13.29629898071289, -0.08181379735469818, -3.6629719734191895, -1.105454683303833, -6.127413749694824, -0.5906393527984619, -3.548814296722412, -0.9948520660400391, -1.5058085918426514, -5.211822509765625, -17.489606857299805, -6.8240861892700195, -0.9539748430252075, -4.2172040939331055, -1.1572864055633545, -2.3540186882019043, -1.798780918121338, -0.2533280849456787, -9.403679847717285, -0.1830129772424698, -7.440906524658203, -2.228740692138672, -4.196046352386475, -3.5180575847625732, -1.9530653953552246, -2.2825613021850586, -1.5544131994247437, -2.3991782665252686, -1.554469347000122, -1.290938377380371, -2.785543203353882, -0.6400948166847229, -0.48503541946411133, -1.432410478591919, -0.9366894960403442, -0.42669478058815, -0.9688448905944824, -1.4787911176681519, -0.43357178568840027, -1.8381303548812866, -0.6210520267486572, -1.0601571798324585, -1.1962573528289795, -0.002758747199550271, -1.2365548610687256, -0.008277395740151405, -0.7464911341667175, -0.8628943562507629, -0.0671280175447464, -0.953361988067627, -0.02595982328057289, -2.139401435852051, -1.1942673921585083, -0.7968283295631409]}}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_fp8_cudagraphs/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_fp8_cudagraphs/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..6a5ace35ec7
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_fp8_cudagraphs/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1 @@
+{"1": {"input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.", "generated_text": " And then you get to the end of the movie, and you realize that this is not New York at all. This is New York at the end", "generated_tokens": [3060, 2430, 1636, 2012, 1317, 1278, 2362, 1307, 1278, 16070, 1044, 1321, 1636, 23067, 1455, 1593, 1395, 1605, 3140, 5152, 1513, 1747, 1046, 2409, 1395, 3140, 5152, 1513, 1278, 2362], "tpot": [0.6358857750892639, 0.009907487779855728, 0.010546143166720867, 0.009435135871171951, 0.010123520158231258, 0.009925439953804016, 0.008350367657840252, 0.008556703105568886, 0.008582624606788158, 0.00840403139591217, 0.008557791821658611, 0.008503519929945469, 0.008379808627068996, 0.009403808042407036, 0.009133151732385159, 0.008321152068674564, 0.008845727890729904, 0.008372415788471699, 0.008591103367507458, 0.009211359545588493, 0.009166751988232136, 0.009767616167664528, 0.008620256558060646, 0.009338144212961197, 0.010125535540282726, 0.010068127885460854, 0.009669983759522438, 0.010439807549118996, 0.010279008187353611, 0.0103340158239007], "latency": 0.9097336048725992, "logprobs": [-9.362524032592773, -2.761181354522705, -4.53175163269043, -1.5617105960845947, -0.7528610229492188, -1.6253626346588135, -2.45941162109375, -2.1533684730529785, -2.346475124359131, -6.157411575317383, -1.3193804025650024, -3.5247979164123535, -4.488514423370361, -3.759702682495117, -2.022449493408203, -1.8945543766021729, -3.6219239234924316, -6.842351913452148, -0.3225390613079071, -0.8537865877151489, -6.520284652709961, -7.550463676452637, -12.595708847045898, -2.9504785537719727, -3.8068642616271973, -0.5890476107597351, -4.3587751388549805, -0.0665372759103775, -0.06955777853727341, -3.3523848056793213, -9.773153305053711, -1.0814638137817383, -6.204980850219727, -5.33505392074585, -3.9411606788635254, -2.7358486652374268, -3.2924106121063232, -6.0152740478515625, -1.8116782903671265, -6.243865013122559, -12.158185958862305, -12.65605354309082, -0.08688803017139435, -2.6079092025756836, -1.4071979522705078, -2.990557909011841, -1.2379846572875977, -0.006849618628621101, -3.4119930267333984, -13.05937671661377, -4.2840399742126465, -2.4802193641662598, -5.933547019958496, -0.9116124510765076, -0.060975510627031326, -1.5681536197662354, -1.0339949131011963, -5.617187023162842, -0.41873589158058167, -4.9402852058410645, -0.5690340995788574, -0.6301103830337524, -2.396580696105957, -13.29629898071289, -0.08181379735469818, -3.6629719734191895, -1.105454683303833, -6.127413749694824, -0.5906393527984619, -3.548814296722412, -0.9948520660400391, -1.5058085918426514, -5.211822509765625, -17.489606857299805, -6.8240861892700195, -0.9539748430252075, -4.2172040939331055, -1.1572864055633545, -2.3540186882019043, -1.798780918121338, -0.2533280849456787, -9.403679847717285, -0.1830129772424698, -7.440906524658203, -2.228740692138672, -4.196046352386475, -3.5180575847625732, -1.9530653953552246, -2.2825613021850586, -1.5544131994247437, -2.3991782665252686, -1.554469347000122, -1.290938377380371, -2.785543203353882, -0.6400948166847229, -0.48503541946411133, -1.432410478591919, -0.9366894960403442, -0.42669478058815, -0.9688448905944824, -1.4787911176681519, -0.43357178568840027, -1.8381303548812866, -0.6210520267486572, -1.0601571798324585, -1.1962573528289795, -0.002758747199550271, -1.2365548610687256, -0.008277395740151405, -0.7464911341667175, -0.8628943562507629, -0.0671280175447464, -0.953361988067627, -0.02595982328057289, -2.139401435852051, -1.1942673921585083, -0.7968283295631409]}}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_logitsmatch/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_logitsmatch/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..f37c35812e5
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_logitsmatch/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1 @@
+{"0": {"input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.", "generated_text": " And then you get to the end of the movie, and you realize that this is not New York at all. This is New York at the end", "generated_tokens": [3060, 2430, 1636, 2012, 1317, 1278, 2362, 1307, 1278, 16070, 1044, 1321, 1636, 23067, 1455, 1593, 1395, 1605, 3140, 5152, 1513, 1747, 1046, 2409, 1395, 3140, 5152, 1513, 1278, 2362], "tpot": [2.1197516918182373, 0.3172459900379181, 0.016708193346858025, 0.015786752104759216, 0.015607455745339394, 0.015449312515556812, 0.015446463599801064, 0.015455200336873531, 0.015508351847529411, 0.016473280265927315, 0.015467967838048935, 0.015407584607601166, 0.015393920242786407, 0.015441760420799255, 0.015666943043470383, 0.015604863874614239, 0.015388128347694874, 0.015523936599493027, 0.015425760298967361, 0.016386207193136215, 0.016847264021635056, 0.016578560695052147, 0.016409022733569145, 0.016199840232729912, 0.015789279714226723, 0.015486880205571651, 0.01539977639913559, 0.016956929117441177, 0.016581375151872635, 0.01746956817805767], "latency": 2.903888032771647, "logprobs": [-9.358616828918457, -2.7474308013916016, -4.628000259399414, -1.5015846490859985, -0.6537986993789673, -1.6720777750015259, -2.478705883026123, -2.0523874759674072, -2.4486241340637207, -6.257688522338867, -1.4695018529891968, -3.4444499015808105, -4.394474029541016, -3.875497817993164, -2.0133562088012695, -1.8832889795303345, -3.8004486560821533, -6.784910678863525, -0.2949134111404419, -0.9851954579353333, -6.626471519470215, -7.186152458190918, -12.800604820251465, -2.2686400413513184, -3.7816011905670166, -0.4978560209274292, -4.371628284454346, -0.0696188285946846, -0.09487748891115189, -3.2375073432922363, -10.075444221496582, -1.138173222541809, -5.97689151763916, -5.093283653259277, -3.874396324157715, -2.6073620319366455, -3.466899871826172, -5.642228126525879, -1.6154727935791016, -5.416567325592041, -12.158267974853516, -12.610607147216797, -0.09664110094308853, -2.5213418006896973, -1.3747841119766235, -2.8510401248931885, -1.1877963542938232, -0.006288621574640274, -3.382380962371826, -13.207911491394043, -4.477662086486816, -2.5299136638641357, -6.053747653961182, -0.7650555372238159, -0.04903985932469368, -1.5557448863983154, -1.1315535306930542, -5.610307216644287, -0.4059771001338959, -4.961302280426025, -0.5701270699501038, -0.7174267172813416, -2.4735305309295654, -13.610812187194824, -0.09192369878292084, -3.5248732566833496, -1.3797900676727295, -6.429551124572754, -0.541852593421936, -3.5403199195861816, -0.8477706909179688, -1.5764057636260986, -5.343497276306152, -17.19588851928711, -6.635483741760254, -0.8923014402389526, -4.114314556121826, -1.2193646430969238, -2.2128424644470215, -1.7673423290252686, -0.22567729651927948, -9.320298194885254, -0.1282224804162979, -7.3249101638793945, -2.511319875717163, -4.0696563720703125, -3.5427517890930176, -1.9300249814987183, -2.347038507461548, -1.5178614854812622, -2.366441249847412, -1.744020938873291, -1.1570327281951904, -3.0150983333587646, -0.5272141098976135, -0.4669455885887146, -1.7157398462295532, -0.8362292051315308, -0.41491177678108215, -0.9386503100395203, -1.5008316040039062, -0.4635278284549713, -1.6312834024429321, -0.5320357084274292, -1.2249717712402344, -1.1707526445388794, -0.0023814670275896788, -1.1655761003494263, -0.006950841750949621, -0.7309689521789551, -0.7428325414657593, -0.042878177016973495, -0.8572992086410522, -0.01948782242834568, -2.0537290573120117, -1.2817553281784058, -0.8235744833946228]}}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_logitsmatch/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_logitsmatch/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..a4b870809ba
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_logitsmatch/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1 @@
+{"0": {"input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.", "generated_text": " And then you get to the end of the movie, and you realize that this is not New York at all. This is New York at the end", "generated_tokens": [3060, 2430, 1636, 2012, 1317, 1278, 2362, 1307, 1278, 16070, 1044, 1321, 1636, 23067, 1455, 1593, 1395, 1605, 3140, 5152, 1513, 1747, 1046, 2409, 1395, 3140, 5152, 1513, 1278, 2362], "tpot": [2.2565205097198486, 0.3516305685043335, 0.01722889579832554, 0.018507104367017746, 0.01656815968453884, 0.016881439834833145, 0.0166244488209486, 0.01648310385644436, 0.016350112855434418, 0.018141599372029305, 0.01638089492917061, 0.016720257699489594, 0.01646953634917736, 0.01641814410686493, 0.016365855932235718, 0.018089760094881058, 0.016283327713608742, 0.01690729521214962, 0.019018815830349922, 0.01721513643860817, 0.01676982268691063, 0.018497919663786888, 0.016406463459134102, 0.01895606331527233, 0.018566368147730827, 0.017292767763137817, 0.02004953660070896, 0.0188816636800766, 0.019935935735702515, 0.019367488101124763], "latency": 3.115501318126917, "logprobs": [-9.358616828918457, -2.7474308013916016, -4.628000259399414, -1.5015846490859985, -0.6537986993789673, -1.6720777750015259, -2.478705883026123, -2.0523874759674072, -2.4486241340637207, -6.257688522338867, -1.4695018529891968, -3.4444499015808105, -4.394474029541016, -3.875497817993164, -2.0133562088012695, -1.8832889795303345, -3.8004486560821533, -6.784910678863525, -0.2949134111404419, -0.9851954579353333, -6.626471519470215, -7.186152458190918, -12.800604820251465, -2.2686400413513184, -3.7816011905670166, -0.4978560209274292, -4.371628284454346, -0.0696188285946846, -0.09487748891115189, -3.2375073432922363, -10.075444221496582, -1.138173222541809, -5.97689151763916, -5.093283653259277, -3.874396324157715, -2.6073620319366455, -3.466899871826172, -5.642228126525879, -1.6154727935791016, -5.416567325592041, -12.158267974853516, -12.610607147216797, -0.09664110094308853, -2.5213418006896973, -1.3747841119766235, -2.8510401248931885, -1.1877963542938232, -0.006288621574640274, -3.382380962371826, -13.207911491394043, -4.477662086486816, -2.5299136638641357, -6.053747653961182, -0.7650555372238159, -0.04903985932469368, -1.5557448863983154, -1.1315535306930542, -5.610307216644287, -0.4059771001338959, -4.961302280426025, -0.5701270699501038, -0.7174267172813416, -2.4735305309295654, -13.610812187194824, -0.09192369878292084, -3.5248732566833496, -1.3797900676727295, -6.429551124572754, -0.541852593421936, -3.5403199195861816, -0.8477706909179688, -1.5764057636260986, -5.343497276306152, -17.19588851928711, -6.635483741760254, -0.8923014402389526, -4.114314556121826, -1.2193646430969238, -2.2128424644470215, -1.7673423290252686, -0.22567729651927948, -9.320298194885254, -0.1282224804162979, -7.3249101638793945, -2.511319875717163, -4.0696563720703125, -3.5427517890930176, -1.9300249814987183, -2.347038507461548, -1.5178614854812622, -2.366441249847412, -1.744020938873291, -1.1570327281951904, -3.0150983333587646, -0.5272141098976135, -0.4669455885887146, -1.7157398462295532, -0.8362292051315308, -0.41491177678108215, -0.9386503100395203, -1.5008316040039062, -0.4635278284549713, -1.6312834024429321, -0.5320357084274292, -1.2249717712402344, -1.1707526445388794, -0.0023814670275896788, -1.1655761003494263, -0.006950841750949621, -0.7309689521789551, -0.7428325414657593, -0.042878177016973495, -0.8572992086410522, -0.01948782242834568, -2.0537290573120117, -1.2817553281784058, -0.8235744833946228]}}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp1_cp1_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp1_cp1_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
index 22fca066f39..f9b98f41237 100644
--- a/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp1_cp1_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp1_cp1_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
@@ -1 +1,287 @@
-{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.97439, "5": 11.00379, "10": 10.95244, "15": 10.85533, "20": 10.6403, "25": 10.25922, "30": 9.91482, "35": 9.70711, "40": 9.34219, "45": 9.00177, "50": 9.12586}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 21015.0, "5": 23387.0, "10": 19344.0, "15": 23461.0, "20": 21503.0, "25": 19506.0, "30": 20239.0, "35": 22142.0, "40": 24112.0, "45": 21801.0, "50": 27877.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 3117478912.0, "5": 3117478912.0, "10": 3117478912.0, "15": 3117478912.0, "20": 3117478912.0, "25": 3117478912.0, "30": 3117478912.0, "35": 3117478912.0, "40": 3117478912.0, "45": 3117478912.0, "50": 3117478912.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 9708208128.0, "5": 10145497088.0, "10": 10145497088.0, "15": 10145497088.0, "20": 10145497088.0, "25": 10145497088.0, "30": 10145497088.0, "35": 10145497088.0, "40": 10145497088.0, "45": 10145497088.0, "50": 10145497088.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 68.38039, "5": 0.15499, "10": 0.15766, "15": 0.15466, "20": 0.15575, "25": 0.15341, "30": 0.15715, "35": 0.16344, "40": 0.15691, "45": 0.18148, "50": 0.16344}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.97434,
+            "2": 10.976,
+            "3": 10.9787,
+            "4": 10.95784,
+            "5": 11.00373,
+            "6": 11.00618,
+            "7": 10.97996,
+            "8": 10.96861,
+            "9": 10.97919,
+            "10": 10.95244,
+            "11": 10.99935,
+            "12": 10.96821,
+            "13": 10.96591,
+            "14": 10.99543,
+            "15": 10.85545,
+            "16": 10.85544,
+            "17": 10.81736,
+            "18": 10.82741,
+            "19": 10.82166,
+            "20": 10.64041,
+            "21": 10.57938,
+            "22": 10.33552,
+            "23": 10.61311,
+            "24": 10.34969,
+            "25": 10.25934,
+            "26": 10.36367,
+            "27": 10.38735,
+            "28": 10.35703,
+            "29": 10.38231,
+            "30": 9.91506,
+            "31": 9.47491,
+            "32": 10.08956,
+            "33": 10.08418,
+            "34": 9.65437,
+            "35": 9.70727,
+            "36": 9.58843,
+            "37": 9.82211,
+            "38": 9.53615,
+            "39": 9.94103,
+            "40": 9.34234,
+            "41": 9.48854,
+            "42": 9.56996,
+            "43": 9.0355,
+            "44": 9.15623,
+            "45": 9.00188,
+            "46": 9.06394,
+            "47": 9.49292,
+            "48": 9.04259,
+            "49": 8.58802,
+            "50": 9.12597
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 20919.0,
+            "2": 21891.0,
+            "3": 21096.0,
+            "4": 20712.0,
+            "5": 23549.0,
+            "6": 24113.0,
+            "7": 23323.0,
+            "8": 21849.0,
+            "9": 22954.0,
+            "10": 19196.0,
+            "11": 24647.0,
+            "12": 23707.0,
+            "13": 24320.0,
+            "14": 24596.0,
+            "15": 23689.0,
+            "16": 23647.0,
+            "17": 22594.0,
+            "18": 22957.0,
+            "19": 23469.0,
+            "20": 21794.0,
+            "21": 22831.0,
+            "22": 19274.0,
+            "23": 24548.0,
+            "24": 19712.0,
+            "25": 19775.0,
+            "26": 21249.0,
+            "27": 22519.0,
+            "28": 23834.0,
+            "29": 23280.0,
+            "30": 20509.0,
+            "31": 17408.0,
+            "32": 21974.0,
+            "33": 22884.0,
+            "34": 21870.0,
+            "35": 22283.0,
+            "36": 21004.0,
+            "37": 22759.0,
+            "38": 22719.0,
+            "39": 22051.0,
+            "40": 23748.0,
+            "41": 24092.0,
+            "42": 23517.0,
+            "43": 22267.0,
+            "44": 22001.0,
+            "45": 21520.0,
+            "46": 22824.0,
+            "47": 25650.0,
+            "48": 25468.0,
+            "49": 25463.0,
+            "50": 28240.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 3117478912.0,
+            "2": 3117478912.0,
+            "3": 3117478912.0,
+            "4": 3117478912.0,
+            "5": 3117478912.0,
+            "6": 3117478912.0,
+            "7": 3117478912.0,
+            "8": 3117478912.0,
+            "9": 3117478912.0,
+            "10": 3117478912.0,
+            "11": 3117478912.0,
+            "12": 3117478912.0,
+            "13": 3117478912.0,
+            "14": 3117478912.0,
+            "15": 3117478912.0,
+            "16": 3117478912.0,
+            "17": 3117478912.0,
+            "18": 3117478912.0,
+            "19": 3117478912.0,
+            "20": 3117478912.0,
+            "21": 3117478912.0,
+            "22": 3117478912.0,
+            "23": 3117478912.0,
+            "24": 3117478912.0,
+            "25": 3117478912.0,
+            "26": 3117478912.0,
+            "27": 3117478912.0,
+            "28": 3117478912.0,
+            "29": 3117478912.0,
+            "30": 3117478912.0,
+            "31": 3117478912.0,
+            "32": 3117478912.0,
+            "33": 3117478912.0,
+            "34": 3117478912.0,
+            "35": 3117478912.0,
+            "36": 3117478912.0,
+            "37": 3117478912.0,
+            "38": 3117478912.0,
+            "39": 3117478912.0,
+            "40": 3117478912.0,
+            "41": 3117478912.0,
+            "42": 3117478912.0,
+            "43": 3117478912.0,
+            "44": 3117478912.0,
+            "45": 3117478912.0,
+            "46": 3117478912.0,
+            "47": 3117478912.0,
+            "48": 3117478912.0,
+            "49": 3117478912.0,
+            "50": 3117478912.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 9708208128.0,
+            "2": 10145497088.0,
+            "3": 10145497088.0,
+            "4": 10145497088.0,
+            "5": 10145497088.0,
+            "6": 10145497088.0,
+            "7": 10145497088.0,
+            "8": 10145497088.0,
+            "9": 10145497088.0,
+            "10": 10145497088.0,
+            "11": 10145497088.0,
+            "12": 10145497088.0,
+            "13": 10145497088.0,
+            "14": 10145497088.0,
+            "15": 10145497088.0,
+            "16": 10145497088.0,
+            "17": 10145497088.0,
+            "18": 10145497088.0,
+            "19": 10145497088.0,
+            "20": 10145497088.0,
+            "21": 10145497088.0,
+            "22": 10145497088.0,
+            "23": 10145497088.0,
+            "24": 10145497088.0,
+            "25": 10145497088.0,
+            "26": 10145497088.0,
+            "27": 10145497088.0,
+            "28": 10145497088.0,
+            "29": 10145497088.0,
+            "30": 10145497088.0,
+            "31": 10145497088.0,
+            "32": 10145497088.0,
+            "33": 10145497088.0,
+            "34": 10145497088.0,
+            "35": 10145497088.0,
+            "36": 10145497088.0,
+            "37": 10145497088.0,
+            "38": 10145497088.0,
+            "39": 10145497088.0,
+            "40": 10145497088.0,
+            "41": 10145497088.0,
+            "42": 10145497088.0,
+            "43": 10145497088.0,
+            "44": 10145497088.0,
+            "45": 10145497088.0,
+            "46": 10145497088.0,
+            "47": 10145497088.0,
+            "48": 10145497088.0,
+            "49": 10145497088.0,
+            "50": 10145497088.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 75.46828,
+            "2": 0.20357,
+            "3": 0.19791,
+            "4": 0.20172,
+            "5": 0.17347,
+            "6": 0.17767,
+            "7": 0.18123,
+            "8": 0.18059,
+            "9": 0.18281,
+            "10": 0.17733,
+            "11": 1.43978,
+            "12": 0.16875,
+            "13": 0.17029,
+            "14": 0.16961,
+            "15": 0.16995,
+            "16": 0.16814,
+            "17": 0.16932,
+            "18": 0.16845,
+            "19": 0.16867,
+            "20": 0.1725,
+            "21": 1.37727,
+            "22": 0.16984,
+            "23": 0.16887,
+            "24": 0.17009,
+            "25": 0.17014,
+            "26": 0.16727,
+            "27": 0.16686,
+            "28": 0.16832,
+            "29": 0.16702,
+            "30": 0.17035,
+            "31": 1.37603,
+            "32": 0.17102,
+            "33": 0.16863,
+            "34": 0.17081,
+            "35": 0.17287,
+            "36": 0.1713,
+            "37": 0.17386,
+            "38": 0.16722,
+            "39": 0.17073,
+            "40": 0.17394,
+            "41": 1.39311,
+            "42": 0.17219,
+            "43": 0.1735,
+            "44": 0.18156,
+            "45": 0.17372,
+            "46": 0.17432,
+            "47": 0.17103,
+            "48": 0.172,
+            "49": 0.17515,
+            "50": 0.17623
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp1_cp1_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp1_cp1_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..5649c8c02c0
--- /dev/null
+++ b/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp1_cp1_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.97433,
+            "2": 10.97599,
+            "3": 10.97873,
+            "4": 10.95776,
+            "5": 11.00374,
+            "6": 11.00622,
+            "7": 10.9799,
+            "8": 10.96858,
+            "9": 10.97924,
+            "10": 10.95251,
+            "11": 10.99936,
+            "12": 10.96824,
+            "13": 10.96591,
+            "14": 10.99554,
+            "15": 10.85561,
+            "16": 10.85538,
+            "17": 10.81726,
+            "18": 10.82754,
+            "19": 10.82158,
+            "20": 10.6404,
+            "21": 10.57926,
+            "22": 10.33548,
+            "23": 10.61314,
+            "24": 10.34966,
+            "25": 10.25929,
+            "26": 10.36381,
+            "27": 10.38733,
+            "28": 10.35697,
+            "29": 10.38233,
+            "30": 9.91499,
+            "31": 9.47474,
+            "32": 10.08958,
+            "33": 10.08413,
+            "34": 9.65424,
+            "35": 9.70719,
+            "36": 9.58835,
+            "37": 9.82205,
+            "38": 9.53609,
+            "39": 9.94086,
+            "40": 9.34225,
+            "41": 9.48846,
+            "42": 9.56986,
+            "43": 9.03547,
+            "44": 9.15612,
+            "45": 9.00184,
+            "46": 9.06401,
+            "47": 9.49282,
+            "48": 9.04255,
+            "49": 8.58799,
+            "50": 9.12592
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 20988.0,
+            "2": 21880.0,
+            "3": 21325.0,
+            "4": 20724.0,
+            "5": 23551.0,
+            "6": 23815.0,
+            "7": 23302.0,
+            "8": 21521.0,
+            "9": 22934.0,
+            "10": 19185.0,
+            "11": 25126.0,
+            "12": 23590.0,
+            "13": 24504.0,
+            "14": 24677.0,
+            "15": 23380.0,
+            "16": 23738.0,
+            "17": 22330.0,
+            "18": 22602.0,
+            "19": 23748.0,
+            "20": 21759.0,
+            "21": 23060.0,
+            "22": 19355.0,
+            "23": 24789.0,
+            "24": 19586.0,
+            "25": 19683.0,
+            "26": 21141.0,
+            "27": 22031.0,
+            "28": 23567.0,
+            "29": 23130.0,
+            "30": 20321.0,
+            "31": 17223.0,
+            "32": 21718.0,
+            "33": 23067.0,
+            "34": 21566.0,
+            "35": 22023.0,
+            "36": 21047.0,
+            "37": 22678.0,
+            "38": 22771.0,
+            "39": 22336.0,
+            "40": 23698.0,
+            "41": 23997.0,
+            "42": 23556.0,
+            "43": 21934.0,
+            "44": 21967.0,
+            "45": 21610.0,
+            "46": 23283.0,
+            "47": 25289.0,
+            "48": 25472.0,
+            "49": 25458.0,
+            "50": 28167.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 3117478912.0,
+            "2": 3117478912.0,
+            "3": 3117478912.0,
+            "4": 3117478912.0,
+            "5": 3117478912.0,
+            "6": 3117478912.0,
+            "7": 3117478912.0,
+            "8": 3117478912.0,
+            "9": 3117478912.0,
+            "10": 3117478912.0,
+            "11": 3117478912.0,
+            "12": 3117478912.0,
+            "13": 3117478912.0,
+            "14": 3117478912.0,
+            "15": 3117478912.0,
+            "16": 3117478912.0,
+            "17": 3117478912.0,
+            "18": 3117478912.0,
+            "19": 3117478912.0,
+            "20": 3117478912.0,
+            "21": 3117478912.0,
+            "22": 3117478912.0,
+            "23": 3117478912.0,
+            "24": 3117478912.0,
+            "25": 3117478912.0,
+            "26": 3117478912.0,
+            "27": 3117478912.0,
+            "28": 3117478912.0,
+            "29": 3117478912.0,
+            "30": 3117478912.0,
+            "31": 3117478912.0,
+            "32": 3117478912.0,
+            "33": 3117478912.0,
+            "34": 3117478912.0,
+            "35": 3117478912.0,
+            "36": 3117478912.0,
+            "37": 3117478912.0,
+            "38": 3117478912.0,
+            "39": 3117478912.0,
+            "40": 3117478912.0,
+            "41": 3117478912.0,
+            "42": 3117478912.0,
+            "43": 3117478912.0,
+            "44": 3117478912.0,
+            "45": 3117478912.0,
+            "46": 3117478912.0,
+            "47": 3117478912.0,
+            "48": 3117478912.0,
+            "49": 3117478912.0,
+            "50": 3117478912.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 9708208128.0,
+            "2": 10145497088.0,
+            "3": 10145497088.0,
+            "4": 10145497088.0,
+            "5": 10145497088.0,
+            "6": 10145497088.0,
+            "7": 10145497088.0,
+            "8": 10145497088.0,
+            "9": 10145497088.0,
+            "10": 10145497088.0,
+            "11": 10145497088.0,
+            "12": 10145497088.0,
+            "13": 10145497088.0,
+            "14": 10145497088.0,
+            "15": 10145497088.0,
+            "16": 10145497088.0,
+            "17": 10145497088.0,
+            "18": 10145497088.0,
+            "19": 10145497088.0,
+            "20": 10145497088.0,
+            "21": 10145497088.0,
+            "22": 10145497088.0,
+            "23": 10145497088.0,
+            "24": 10145497088.0,
+            "25": 10145497088.0,
+            "26": 10145497088.0,
+            "27": 10145497088.0,
+            "28": 10145497088.0,
+            "29": 10145497088.0,
+            "30": 10145497088.0,
+            "31": 10145497088.0,
+            "32": 10145497088.0,
+            "33": 10145497088.0,
+            "34": 10145497088.0,
+            "35": 10145497088.0,
+            "36": 10145497088.0,
+            "37": 10145497088.0,
+            "38": 10145497088.0,
+            "39": 10145497088.0,
+            "40": 10145497088.0,
+            "41": 10145497088.0,
+            "42": 10145497088.0,
+            "43": 10145497088.0,
+            "44": 10145497088.0,
+            "45": 10145497088.0,
+            "46": 10145497088.0,
+            "47": 10145497088.0,
+            "48": 10145497088.0,
+            "49": 10145497088.0,
+            "50": 10145497088.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 71.98615,
+            "2": 0.17824,
+            "3": 0.15658,
+            "4": 0.15553,
+            "5": 0.15552,
+            "6": 0.15497,
+            "7": 0.15557,
+            "8": 0.1611,
+            "9": 0.15455,
+            "10": 0.15318,
+            "11": 1.21675,
+            "12": 0.15852,
+            "13": 0.15923,
+            "14": 0.15544,
+            "15": 0.15619,
+            "16": 0.15301,
+            "17": 0.15568,
+            "18": 0.15352,
+            "19": 0.15601,
+            "20": 0.15832,
+            "21": 1.19636,
+            "22": 0.15369,
+            "23": 0.16001,
+            "24": 0.49798,
+            "25": 0.1566,
+            "26": 0.15462,
+            "27": 0.15479,
+            "28": 0.15431,
+            "29": 0.15608,
+            "30": 0.15697,
+            "31": 1.19237,
+            "32": 0.18057,
+            "33": 0.1804,
+            "34": 0.63136,
+            "35": 0.15799,
+            "36": 0.1573,
+            "37": 0.15724,
+            "38": 0.15688,
+            "39": 0.15684,
+            "40": 0.15532,
+            "41": 1.20433,
+            "42": 0.1556,
+            "43": 0.15643,
+            "44": 0.47664,
+            "45": 0.15538,
+            "46": 0.15623,
+            "47": 0.15655,
+            "48": 0.15632,
+            "49": 0.15651,
+            "50": 0.15611
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp1_cp1_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp1_cp1_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..951506c1571
--- /dev/null
+++ b/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp1_cp1_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.97443,
+            "2": 10.97602,
+            "3": 10.97873,
+            "4": 10.95791,
+            "5": 11.00372,
+            "6": 11.00622,
+            "7": 10.97989,
+            "8": 10.96858,
+            "9": 10.97927,
+            "10": 10.95244,
+            "11": 10.99932,
+            "12": 10.96821,
+            "13": 10.96575,
+            "14": 10.99547,
+            "15": 10.85548,
+            "16": 10.85544,
+            "17": 10.81733,
+            "18": 10.82754,
+            "19": 10.82177,
+            "20": 10.64038,
+            "21": 10.57929,
+            "22": 10.33542,
+            "23": 10.613,
+            "24": 10.3496,
+            "25": 10.2592,
+            "26": 10.36373,
+            "27": 10.38741,
+            "28": 10.35692,
+            "29": 10.38238,
+            "30": 9.91509,
+            "31": 9.47482,
+            "32": 10.0895,
+            "33": 10.08422,
+            "34": 9.65429,
+            "35": 9.70734,
+            "36": 9.58844,
+            "37": 9.82215,
+            "38": 9.53607,
+            "39": 9.94104,
+            "40": 9.3422,
+            "41": 9.48847,
+            "42": 9.56993,
+            "43": 9.03549,
+            "44": 9.15623,
+            "45": 9.00183,
+            "46": 9.06402,
+            "47": 9.49291,
+            "48": 9.04257,
+            "49": 8.58806,
+            "50": 9.12599
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 21181.0,
+            "2": 22037.0,
+            "3": 21249.0,
+            "4": 20277.0,
+            "5": 23590.0,
+            "6": 24135.0,
+            "7": 23650.0,
+            "8": 21651.0,
+            "9": 22980.0,
+            "10": 19092.0,
+            "11": 25008.0,
+            "12": 23782.0,
+            "13": 24367.0,
+            "14": 24697.0,
+            "15": 23602.0,
+            "16": 23837.0,
+            "17": 22509.0,
+            "18": 22645.0,
+            "19": 23485.0,
+            "20": 21887.0,
+            "21": 22872.0,
+            "22": 19313.0,
+            "23": 24389.0,
+            "24": 19718.0,
+            "25": 19814.0,
+            "26": 21274.0,
+            "27": 22560.0,
+            "28": 23731.0,
+            "29": 23099.0,
+            "30": 19997.0,
+            "31": 17111.0,
+            "32": 22093.0,
+            "33": 23200.0,
+            "34": 21525.0,
+            "35": 21837.0,
+            "36": 21070.0,
+            "37": 22975.0,
+            "38": 22727.0,
+            "39": 22485.0,
+            "40": 23583.0,
+            "41": 24012.0,
+            "42": 23529.0,
+            "43": 22092.0,
+            "44": 21911.0,
+            "45": 21790.0,
+            "46": 23173.0,
+            "47": 25505.0,
+            "48": 25316.0,
+            "49": 25527.0,
+            "50": 28117.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 3117478912.0,
+            "2": 3117478912.0,
+            "3": 3117478912.0,
+            "4": 3117478912.0,
+            "5": 3117478912.0,
+            "6": 3117478912.0,
+            "7": 3117478912.0,
+            "8": 3117478912.0,
+            "9": 3117478912.0,
+            "10": 3117478912.0,
+            "11": 3117478912.0,
+            "12": 3117478912.0,
+            "13": 3117478912.0,
+            "14": 3117478912.0,
+            "15": 3117478912.0,
+            "16": 3117478912.0,
+            "17": 3117478912.0,
+            "18": 3117478912.0,
+            "19": 3117478912.0,
+            "20": 3117478912.0,
+            "21": 3117478912.0,
+            "22": 3117478912.0,
+            "23": 3117478912.0,
+            "24": 3117478912.0,
+            "25": 3117478912.0,
+            "26": 3117478912.0,
+            "27": 3117478912.0,
+            "28": 3117478912.0,
+            "29": 3117478912.0,
+            "30": 3117478912.0,
+            "31": 3117478912.0,
+            "32": 3117478912.0,
+            "33": 3117478912.0,
+            "34": 3117478912.0,
+            "35": 3117478912.0,
+            "36": 3117478912.0,
+            "37": 3117478912.0,
+            "38": 3117478912.0,
+            "39": 3117478912.0,
+            "40": 3117478912.0,
+            "41": 3117478912.0,
+            "42": 3117478912.0,
+            "43": 3117478912.0,
+            "44": 3117478912.0,
+            "45": 3117478912.0,
+            "46": 3117478912.0,
+            "47": 3117478912.0,
+            "48": 3117478912.0,
+            "49": 3117478912.0,
+            "50": 3117478912.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 9708208128.0,
+            "2": 10145497088.0,
+            "3": 10145497088.0,
+            "4": 10145497088.0,
+            "5": 10145497088.0,
+            "6": 10145497088.0,
+            "7": 10145497088.0,
+            "8": 10145497088.0,
+            "9": 10145497088.0,
+            "10": 10145497088.0,
+            "11": 10145497088.0,
+            "12": 10145497088.0,
+            "13": 10145497088.0,
+            "14": 10145497088.0,
+            "15": 10145497088.0,
+            "16": 10145497088.0,
+            "17": 10145497088.0,
+            "18": 10145497088.0,
+            "19": 10145497088.0,
+            "20": 10145497088.0,
+            "21": 10145497088.0,
+            "22": 10145497088.0,
+            "23": 10145497088.0,
+            "24": 10145497088.0,
+            "25": 10145497088.0,
+            "26": 10145497088.0,
+            "27": 10145497088.0,
+            "28": 10145497088.0,
+            "29": 10145497088.0,
+            "30": 10145497088.0,
+            "31": 10145497088.0,
+            "32": 10145497088.0,
+            "33": 10145497088.0,
+            "34": 10145497088.0,
+            "35": 10145497088.0,
+            "36": 10145497088.0,
+            "37": 10145497088.0,
+            "38": 10145497088.0,
+            "39": 10145497088.0,
+            "40": 10145497088.0,
+            "41": 10145497088.0,
+            "42": 10145497088.0,
+            "43": 10145497088.0,
+            "44": 10145497088.0,
+            "45": 10145497088.0,
+            "46": 10145497088.0,
+            "47": 10145497088.0,
+            "48": 10145497088.0,
+            "49": 10145497088.0,
+            "50": 10145497088.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 74.91474,
+            "2": 0.1754,
+            "3": 0.17452,
+            "4": 0.16679,
+            "5": 0.16348,
+            "6": 0.16445,
+            "7": 0.16736,
+            "8": 0.16603,
+            "9": 0.16532,
+            "10": 0.16307,
+            "11": 1.37857,
+            "12": 0.16928,
+            "13": 0.53834,
+            "14": 0.57224,
+            "15": 0.16953,
+            "16": 0.16333,
+            "17": 0.16457,
+            "18": 0.16634,
+            "19": 0.51067,
+            "20": 0.16795,
+            "21": 1.3646,
+            "22": 0.16877,
+            "23": 0.16233,
+            "24": 0.16456,
+            "25": 0.16106,
+            "26": 0.16403,
+            "27": 0.16543,
+            "28": 0.52927,
+            "29": 0.16526,
+            "30": 0.16671,
+            "31": 1.34815,
+            "32": 0.1712,
+            "33": 0.16615,
+            "34": 0.16654,
+            "35": 0.16776,
+            "36": 0.16433,
+            "37": 0.16743,
+            "38": 0.5814,
+            "39": 0.17894,
+            "40": 0.16539,
+            "41": 1.61892,
+            "42": 0.1694,
+            "43": 0.16828,
+            "44": 0.16546,
+            "45": 0.16549,
+            "46": 0.16556,
+            "47": 0.51526,
+            "48": 0.16791,
+            "49": 0.16886,
+            "50": 0.16634
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp1_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp1_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
index 31600632301..66d5b70c4e7 100644
--- a/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp1_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp1_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
@@ -2,91 +2,286 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 10.98115,
+            "2": 10.98342,
+            "3": 10.97937,
+            "4": 10.95855,
             "5": 10.99632,
-            "10": 10.94823,
-            "15": 10.85384,
-            "20": 10.61864,
-            "25": 10.23212,
-            "30": 9.88866,
-            "35": 9.64741,
-            "40": 9.29934,
-            "45": 8.9649,
-            "50": 9.11107
+            "6": 11.00381,
+            "7": 10.98294,
+            "8": 10.97489,
+            "9": 10.97741,
+            "10": 10.94819,
+            "11": 10.99293,
+            "12": 10.96683,
+            "13": 10.97205,
+            "14": 10.97917,
+            "15": 10.85381,
+            "16": 10.85123,
+            "17": 10.80904,
+            "18": 10.82571,
+            "19": 10.80813,
+            "20": 10.61863,
+            "21": 10.56868,
+            "22": 10.31924,
+            "23": 10.59307,
+            "24": 10.33426,
+            "25": 10.23213,
+            "26": 10.34313,
+            "27": 10.34586,
+            "28": 10.32458,
+            "29": 10.336,
+            "30": 9.88868,
+            "31": 9.42985,
+            "32": 10.0556,
+            "33": 10.04592,
+            "34": 9.60415,
+            "35": 9.64742,
+            "36": 9.5255,
+            "37": 9.7709,
+            "38": 9.49245,
+            "39": 9.87216,
+            "40": 9.29935,
+            "41": 9.44523,
+            "42": 9.52844,
+            "43": 9.015,
+            "44": 9.13046,
+            "45": 8.96483,
+            "46": 9.02876,
+            "47": 9.45483,
+            "48": 9.0228,
+            "49": 8.56611,
+            "50": 9.11105
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 21057.0,
-            "5": 23384.0,
-            "10": 18836.0,
-            "15": 23361.0,
-            "20": 21198.0,
-            "25": 19270.0,
-            "30": 19749.0,
-            "35": 21428.0,
-            "40": 23790.0,
-            "45": 22634.0,
-            "50": 27374.0
+            "2": 22047.0,
+            "3": 21328.0,
+            "4": 20691.0,
+            "5": 23440.0,
+            "6": 23720.0,
+            "7": 23130.0,
+            "8": 21638.0,
+            "9": 22493.0,
+            "10": 18970.0,
+            "11": 24200.0,
+            "12": 23107.0,
+            "13": 24299.0,
+            "14": 24369.0,
+            "15": 23049.0,
+            "16": 23303.0,
+            "17": 21870.0,
+            "18": 22441.0,
+            "19": 23208.0,
+            "20": 21271.0,
+            "21": 22375.0,
+            "22": 19133.0,
+            "23": 23782.0,
+            "24": 19264.0,
+            "25": 19271.0,
+            "26": 20494.0,
+            "27": 21625.0,
+            "28": 23068.0,
+            "29": 22509.0,
+            "30": 19530.0,
+            "31": 16898.0,
+            "32": 21514.0,
+            "33": 22417.0,
+            "34": 21007.0,
+            "35": 21257.0,
+            "36": 20531.0,
+            "37": 23012.0,
+            "38": 22644.0,
+            "39": 22981.0,
+            "40": 23871.0,
+            "41": 23909.0,
+            "42": 23938.0,
+            "43": 22901.0,
+            "44": 22451.0,
+            "45": 22771.0,
+            "46": 23764.0,
+            "47": 25110.0,
+            "48": 26221.0,
+            "49": 26736.0,
+            "50": 27671.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 1917381632.0,
+            "2": 1917381632.0,
+            "3": 1917381632.0,
+            "4": 1917381632.0,
             "5": 1917381632.0,
+            "6": 1917381632.0,
+            "7": 1917381632.0,
+            "8": 1917381632.0,
+            "9": 1917381632.0,
             "10": 1917381632.0,
+            "11": 1917381632.0,
+            "12": 1917381632.0,
+            "13": 1917381632.0,
+            "14": 1917381632.0,
             "15": 1917381632.0,
+            "16": 1917381632.0,
+            "17": 1917381632.0,
+            "18": 1917381632.0,
+            "19": 1917381632.0,
             "20": 1917381632.0,
+            "21": 1917381632.0,
+            "22": 1917381632.0,
+            "23": 1917381632.0,
+            "24": 1917381632.0,
             "25": 1917381632.0,
+            "26": 1917381632.0,
+            "27": 1917381632.0,
+            "28": 1917381632.0,
+            "29": 1917381632.0,
             "30": 1917381632.0,
+            "31": 1917381632.0,
+            "32": 1917381632.0,
+            "33": 1917381632.0,
+            "34": 1917381632.0,
             "35": 1917381632.0,
+            "36": 1917381632.0,
+            "37": 1917381632.0,
+            "38": 1917381632.0,
+            "39": 1917381632.0,
             "40": 1917381632.0,
+            "41": 1917381632.0,
+            "42": 1917381632.0,
+            "43": 1917381632.0,
+            "44": 1917381632.0,
             "45": 1917381632.0,
+            "46": 1917381632.0,
+            "47": 1917381632.0,
+            "48": 1917381632.0,
+            "49": 1917381632.0,
             "50": 1917381632.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 5502737408.0,
+            "2": 5907581952.0,
+            "3": 5907581952.0,
+            "4": 5907581952.0,
             "5": 5907581952.0,
+            "6": 5907581952.0,
+            "7": 5907581952.0,
+            "8": 5907581952.0,
+            "9": 5907581952.0,
             "10": 5907581952.0,
+            "11": 5907581952.0,
+            "12": 5907581952.0,
+            "13": 5907581952.0,
+            "14": 5907581952.0,
             "15": 5907581952.0,
+            "16": 5907581952.0,
+            "17": 5907581952.0,
+            "18": 5907581952.0,
+            "19": 5907581952.0,
             "20": 5907581952.0,
+            "21": 5907581952.0,
+            "22": 5907581952.0,
+            "23": 5907581952.0,
+            "24": 5907581952.0,
             "25": 5907581952.0,
+            "26": 5907581952.0,
+            "27": 5907581952.0,
+            "28": 5907581952.0,
+            "29": 5907581952.0,
             "30": 5907581952.0,
+            "31": 5907581952.0,
+            "32": 5907581952.0,
+            "33": 5907581952.0,
+            "34": 5907581952.0,
             "35": 5907581952.0,
+            "36": 5907581952.0,
+            "37": 5907581952.0,
+            "38": 5907581952.0,
+            "39": 5907581952.0,
             "40": 5907581952.0,
+            "41": 5907581952.0,
+            "42": 5907581952.0,
+            "43": 5907581952.0,
+            "44": 5907581952.0,
             "45": 5907581952.0,
+            "46": 5907581952.0,
+            "47": 5907581952.0,
+            "48": 5907581952.0,
+            "49": 5907581952.0,
             "50": 5907581952.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 23.78025,
-            "5": 0.2726,
-            "10": 0.28342,
-            "15": 0.27548,
-            "20": 0.27217,
-            "25": 0.27174,
-            "30": 0.27238,
-            "35": 0.26859,
-            "40": 0.27106,
-            "45": 0.27295,
-            "50": 0.27446
+            "1": 77.32153,
+            "2": 0.35381,
+            "3": 0.31954,
+            "4": 0.31994,
+            "5": 0.32133,
+            "6": 0.32343,
+            "7": 0.63691,
+            "8": 0.32502,
+            "9": 0.32218,
+            "10": 0.31839,
+            "11": 1.20693,
+            "12": 0.33292,
+            "13": 0.32979,
+            "14": 0.31793,
+            "15": 0.32907,
+            "16": 0.31632,
+            "17": 0.3213,
+            "18": 0.32431,
+            "19": 0.68468,
+            "20": 0.32501,
+            "21": 0.91375,
+            "22": 0.32148,
+            "23": 0.32164,
+            "24": 0.32358,
+            "25": 0.32444,
+            "26": 0.31929,
+            "27": 0.32159,
+            "28": 0.32567,
+            "29": 0.31799,
+            "30": 0.36795,
+            "31": 0.98526,
+            "32": 0.32231,
+            "33": 0.31619,
+            "34": 0.31784,
+            "35": 0.31943,
+            "36": 0.31897,
+            "37": 0.31509,
+            "38": 0.33279,
+            "39": 0.32732,
+            "40": 0.31631,
+            "41": 0.91813,
+            "42": 0.32108,
+            "43": 0.31789,
+            "44": 0.31862,
+            "45": 0.32451,
+            "46": 0.31705,
+            "47": 0.31711,
+            "48": 0.32216,
+            "49": 0.31997,
+            "50": 0.31833
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp1_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp1_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..5f9d24a49c3
--- /dev/null
+++ b/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp1_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.98115,
+            "2": 10.98342,
+            "3": 10.97937,
+            "4": 10.95855,
+            "5": 10.99622,
+            "6": 11.00384,
+            "7": 10.98297,
+            "8": 10.97483,
+            "9": 10.97753,
+            "10": 10.94815,
+            "11": 10.99296,
+            "12": 10.9669,
+            "13": 10.97214,
+            "14": 10.97925,
+            "15": 10.85387,
+            "16": 10.85117,
+            "17": 10.80894,
+            "18": 10.82573,
+            "19": 10.80812,
+            "20": 10.61863,
+            "21": 10.56868,
+            "22": 10.31918,
+            "23": 10.59297,
+            "24": 10.33422,
+            "25": 10.23218,
+            "26": 10.34314,
+            "27": 10.34572,
+            "28": 10.32477,
+            "29": 10.33598,
+            "30": 9.88873,
+            "31": 9.42999,
+            "32": 10.05561,
+            "33": 10.04589,
+            "34": 9.60423,
+            "35": 9.64746,
+            "36": 9.52548,
+            "37": 9.77088,
+            "38": 9.49242,
+            "39": 9.87225,
+            "40": 9.29943,
+            "41": 9.44525,
+            "42": 9.5284,
+            "43": 9.01502,
+            "44": 9.13045,
+            "45": 8.96484,
+            "46": 9.02877,
+            "47": 9.45487,
+            "48": 9.02277,
+            "49": 8.56605,
+            "50": 9.11107
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 21057.0,
+            "2": 22047.0,
+            "3": 21328.0,
+            "4": 20740.0,
+            "5": 23155.0,
+            "6": 23469.0,
+            "7": 22812.0,
+            "8": 21546.0,
+            "9": 22384.0,
+            "10": 18987.0,
+            "11": 24537.0,
+            "12": 23328.0,
+            "13": 24082.0,
+            "14": 24376.0,
+            "15": 23046.0,
+            "16": 23314.0,
+            "17": 21746.0,
+            "18": 22157.0,
+            "19": 23070.0,
+            "20": 21363.0,
+            "21": 22466.0,
+            "22": 18866.0,
+            "23": 24216.0,
+            "24": 19337.0,
+            "25": 19268.0,
+            "26": 20380.0,
+            "27": 21682.0,
+            "28": 23020.0,
+            "29": 22578.0,
+            "30": 20050.0,
+            "31": 16804.0,
+            "32": 21380.0,
+            "33": 22738.0,
+            "34": 20871.0,
+            "35": 21397.0,
+            "36": 20460.0,
+            "37": 22858.0,
+            "38": 22666.0,
+            "39": 22907.0,
+            "40": 23932.0,
+            "41": 23824.0,
+            "42": 23844.0,
+            "43": 22807.0,
+            "44": 22751.0,
+            "45": 22450.0,
+            "46": 23609.0,
+            "47": 25413.0,
+            "48": 26266.0,
+            "49": 26747.0,
+            "50": 27543.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1917381632.0,
+            "2": 1917381632.0,
+            "3": 1917381632.0,
+            "4": 1917381632.0,
+            "5": 1917381632.0,
+            "6": 1917381632.0,
+            "7": 1917381632.0,
+            "8": 1917381632.0,
+            "9": 1917381632.0,
+            "10": 1917381632.0,
+            "11": 1917381632.0,
+            "12": 1917381632.0,
+            "13": 1917381632.0,
+            "14": 1917381632.0,
+            "15": 1917381632.0,
+            "16": 1917381632.0,
+            "17": 1917381632.0,
+            "18": 1917381632.0,
+            "19": 1917381632.0,
+            "20": 1917381632.0,
+            "21": 1917381632.0,
+            "22": 1917381632.0,
+            "23": 1917381632.0,
+            "24": 1917381632.0,
+            "25": 1917381632.0,
+            "26": 1917381632.0,
+            "27": 1917381632.0,
+            "28": 1917381632.0,
+            "29": 1917381632.0,
+            "30": 1917381632.0,
+            "31": 1917381632.0,
+            "32": 1917381632.0,
+            "33": 1917381632.0,
+            "34": 1917381632.0,
+            "35": 1917381632.0,
+            "36": 1917381632.0,
+            "37": 1917381632.0,
+            "38": 1917381632.0,
+            "39": 1917381632.0,
+            "40": 1917381632.0,
+            "41": 1917381632.0,
+            "42": 1917381632.0,
+            "43": 1917381632.0,
+            "44": 1917381632.0,
+            "45": 1917381632.0,
+            "46": 1917381632.0,
+            "47": 1917381632.0,
+            "48": 1917381632.0,
+            "49": 1917381632.0,
+            "50": 1917381632.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 5502737408.0,
+            "2": 5907581952.0,
+            "3": 5907581952.0,
+            "4": 5907581952.0,
+            "5": 5907581952.0,
+            "6": 5907581952.0,
+            "7": 5907581952.0,
+            "8": 5907581952.0,
+            "9": 5907581952.0,
+            "10": 5907581952.0,
+            "11": 5907581952.0,
+            "12": 5907581952.0,
+            "13": 5907581952.0,
+            "14": 5907581952.0,
+            "15": 5907581952.0,
+            "16": 5907581952.0,
+            "17": 5907581952.0,
+            "18": 5907581952.0,
+            "19": 5907581952.0,
+            "20": 5907581952.0,
+            "21": 5907581952.0,
+            "22": 5907581952.0,
+            "23": 5907581952.0,
+            "24": 5907581952.0,
+            "25": 5907581952.0,
+            "26": 5907581952.0,
+            "27": 5907581952.0,
+            "28": 5907581952.0,
+            "29": 5907581952.0,
+            "30": 5907581952.0,
+            "31": 5907581952.0,
+            "32": 5907581952.0,
+            "33": 5907581952.0,
+            "34": 5907581952.0,
+            "35": 5907581952.0,
+            "36": 5907581952.0,
+            "37": 5907581952.0,
+            "38": 5907581952.0,
+            "39": 5907581952.0,
+            "40": 5907581952.0,
+            "41": 5907581952.0,
+            "42": 5907581952.0,
+            "43": 5907581952.0,
+            "44": 5907581952.0,
+            "45": 5907581952.0,
+            "46": 5907581952.0,
+            "47": 5907581952.0,
+            "48": 5907581952.0,
+            "49": 5907581952.0,
+            "50": 5907581952.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 72.69145,
+            "2": 0.31162,
+            "3": 0.65164,
+            "4": 0.29871,
+            "5": 0.29932,
+            "6": 0.29668,
+            "7": 0.29179,
+            "8": 0.29409,
+            "9": 0.29759,
+            "10": 0.30183,
+            "11": 0.84375,
+            "12": 0.2964,
+            "13": 0.29589,
+            "14": 0.29688,
+            "15": 0.30127,
+            "16": 0.29716,
+            "17": 0.29351,
+            "18": 0.29429,
+            "19": 0.29751,
+            "20": 0.29471,
+            "21": 1.36793,
+            "22": 0.29834,
+            "23": 0.29442,
+            "24": 0.29321,
+            "25": 0.29912,
+            "26": 0.29631,
+            "27": 0.29343,
+            "28": 0.29975,
+            "29": 0.29701,
+            "30": 0.67685,
+            "31": 0.82445,
+            "32": 0.29588,
+            "33": 0.79672,
+            "34": 0.30556,
+            "35": 0.29842,
+            "36": 0.29717,
+            "37": 0.29457,
+            "38": 0.29527,
+            "39": 0.29757,
+            "40": 0.29426,
+            "41": 0.82657,
+            "42": 0.29634,
+            "43": 0.29423,
+            "44": 0.30131,
+            "45": 0.30554,
+            "46": 0.29682,
+            "47": 0.29317,
+            "48": 0.29446,
+            "49": 0.29791,
+            "50": 0.2949
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp1_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp1_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..f9118a22780
--- /dev/null
+++ b/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp1_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.98115,
+            "2": 10.98342,
+            "3": 10.9794,
+            "4": 10.95853,
+            "5": 10.99622,
+            "6": 11.00371,
+            "7": 10.98299,
+            "8": 10.9748,
+            "9": 10.97742,
+            "10": 10.94806,
+            "11": 10.99306,
+            "12": 10.96672,
+            "13": 10.97199,
+            "14": 10.97915,
+            "15": 10.85402,
+            "16": 10.85122,
+            "17": 10.8089,
+            "18": 10.82572,
+            "19": 10.8081,
+            "20": 10.61854,
+            "21": 10.56862,
+            "22": 10.31926,
+            "23": 10.59295,
+            "24": 10.3343,
+            "25": 10.23216,
+            "26": 10.34315,
+            "27": 10.34581,
+            "28": 10.3247,
+            "29": 10.336,
+            "30": 9.88877,
+            "31": 9.42992,
+            "32": 10.05572,
+            "33": 10.0459,
+            "34": 9.6042,
+            "35": 9.64743,
+            "36": 9.52544,
+            "37": 9.77085,
+            "38": 9.49252,
+            "39": 9.87217,
+            "40": 9.29929,
+            "41": 9.44531,
+            "42": 9.52839,
+            "43": 9.01499,
+            "44": 9.13044,
+            "45": 8.96478,
+            "46": 9.02875,
+            "47": 9.45483,
+            "48": 9.02282,
+            "49": 8.56615,
+            "50": 9.11114
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 21211.0,
+            "2": 22047.0,
+            "3": 20892.0,
+            "4": 20624.0,
+            "5": 23413.0,
+            "6": 23493.0,
+            "7": 22797.0,
+            "8": 21401.0,
+            "9": 22665.0,
+            "10": 19047.0,
+            "11": 24508.0,
+            "12": 23266.0,
+            "13": 24271.0,
+            "14": 24293.0,
+            "15": 22782.0,
+            "16": 23282.0,
+            "17": 21824.0,
+            "18": 22133.0,
+            "19": 23099.0,
+            "20": 21505.0,
+            "21": 22490.0,
+            "22": 18675.0,
+            "23": 23908.0,
+            "24": 19148.0,
+            "25": 19388.0,
+            "26": 20532.0,
+            "27": 21766.0,
+            "28": 22571.0,
+            "29": 22352.0,
+            "30": 19883.0,
+            "31": 16703.0,
+            "32": 21084.0,
+            "33": 22377.0,
+            "34": 20576.0,
+            "35": 21216.0,
+            "36": 20603.0,
+            "37": 22812.0,
+            "38": 22830.0,
+            "39": 22708.0,
+            "40": 23830.0,
+            "41": 24061.0,
+            "42": 24003.0,
+            "43": 22790.0,
+            "44": 22703.0,
+            "45": 22360.0,
+            "46": 23642.0,
+            "47": 25112.0,
+            "48": 26185.0,
+            "49": 26666.0,
+            "50": 27765.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1917381632.0,
+            "2": 1917381632.0,
+            "3": 1917381632.0,
+            "4": 1917381632.0,
+            "5": 1917381632.0,
+            "6": 1917381632.0,
+            "7": 1917381632.0,
+            "8": 1917381632.0,
+            "9": 1917381632.0,
+            "10": 1917381632.0,
+            "11": 1917381632.0,
+            "12": 1917381632.0,
+            "13": 1917381632.0,
+            "14": 1917381632.0,
+            "15": 1917381632.0,
+            "16": 1917381632.0,
+            "17": 1917381632.0,
+            "18": 1917381632.0,
+            "19": 1917381632.0,
+            "20": 1917381632.0,
+            "21": 1917381632.0,
+            "22": 1917381632.0,
+            "23": 1917381632.0,
+            "24": 1917381632.0,
+            "25": 1917381632.0,
+            "26": 1917381632.0,
+            "27": 1917381632.0,
+            "28": 1917381632.0,
+            "29": 1917381632.0,
+            "30": 1917381632.0,
+            "31": 1917381632.0,
+            "32": 1917381632.0,
+            "33": 1917381632.0,
+            "34": 1917381632.0,
+            "35": 1917381632.0,
+            "36": 1917381632.0,
+            "37": 1917381632.0,
+            "38": 1917381632.0,
+            "39": 1917381632.0,
+            "40": 1917381632.0,
+            "41": 1917381632.0,
+            "42": 1917381632.0,
+            "43": 1917381632.0,
+            "44": 1917381632.0,
+            "45": 1917381632.0,
+            "46": 1917381632.0,
+            "47": 1917381632.0,
+            "48": 1917381632.0,
+            "49": 1917381632.0,
+            "50": 1917381632.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 5502737408.0,
+            "2": 5907581952.0,
+            "3": 5907581952.0,
+            "4": 5907581952.0,
+            "5": 5907581952.0,
+            "6": 5907581952.0,
+            "7": 5907581952.0,
+            "8": 5907581952.0,
+            "9": 5907581952.0,
+            "10": 5907581952.0,
+            "11": 5907581952.0,
+            "12": 5907581952.0,
+            "13": 5907581952.0,
+            "14": 5907581952.0,
+            "15": 5907581952.0,
+            "16": 5907581952.0,
+            "17": 5907581952.0,
+            "18": 5907581952.0,
+            "19": 5907581952.0,
+            "20": 5907581952.0,
+            "21": 5907581952.0,
+            "22": 5907581952.0,
+            "23": 5907581952.0,
+            "24": 5907581952.0,
+            "25": 5907581952.0,
+            "26": 5907581952.0,
+            "27": 5907581952.0,
+            "28": 5907581952.0,
+            "29": 5907581952.0,
+            "30": 5907581952.0,
+            "31": 5907581952.0,
+            "32": 5907581952.0,
+            "33": 5907581952.0,
+            "34": 5907581952.0,
+            "35": 5907581952.0,
+            "36": 5907581952.0,
+            "37": 5907581952.0,
+            "38": 5907581952.0,
+            "39": 5907581952.0,
+            "40": 5907581952.0,
+            "41": 5907581952.0,
+            "42": 5907581952.0,
+            "43": 5907581952.0,
+            "44": 5907581952.0,
+            "45": 5907581952.0,
+            "46": 5907581952.0,
+            "47": 5907581952.0,
+            "48": 5907581952.0,
+            "49": 5907581952.0,
+            "50": 5907581952.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 76.70816,
+            "2": 0.44479,
+            "3": 0.37638,
+            "4": 0.32493,
+            "5": 0.32865,
+            "6": 0.3221,
+            "7": 0.33027,
+            "8": 0.32627,
+            "9": 0.69409,
+            "10": 0.66689,
+            "11": 0.94476,
+            "12": 0.6757,
+            "13": 0.32571,
+            "14": 0.3194,
+            "15": 0.31954,
+            "16": 0.32142,
+            "17": 0.32144,
+            "18": 0.3188,
+            "19": 0.32023,
+            "20": 0.70348,
+            "21": 1.36061,
+            "22": 0.32306,
+            "23": 0.32129,
+            "24": 0.31927,
+            "25": 0.32503,
+            "26": 0.322,
+            "27": 0.31994,
+            "28": 0.32043,
+            "29": 0.31651,
+            "30": 0.31907,
+            "31": 1.31856,
+            "32": 0.32016,
+            "33": 0.31758,
+            "34": 0.31966,
+            "35": 0.31765,
+            "36": 0.31717,
+            "37": 0.3191,
+            "38": 0.31591,
+            "39": 0.3156,
+            "40": 0.31599,
+            "41": 0.90957,
+            "42": 0.32017,
+            "43": 0.31902,
+            "44": 0.32013,
+            "45": 0.32183,
+            "46": 0.31561,
+            "47": 0.31628,
+            "48": 0.31911,
+            "49": 0.31753,
+            "50": 0.31636
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp4_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp4_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
index 5cd925750cf..42f6add1cac 100644
--- a/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp4_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp4_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
@@ -2,91 +2,286 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 10.98296,
-            "5": 10.99794,
-            "10": 10.94509,
-            "15": 10.85381,
-            "20": 10.6219,
-            "25": 10.23314,
-            "30": 9.8856,
-            "35": 9.64989,
-            "40": 9.30025,
-            "45": 8.96819,
-            "50": 9.10987
+            "2": 10.98234,
+            "3": 10.98048,
+            "4": 10.96506,
+            "5": 10.99783,
+            "6": 11.00523,
+            "7": 10.98269,
+            "8": 10.97586,
+            "9": 10.97815,
+            "10": 10.9452,
+            "11": 10.9926,
+            "12": 10.96812,
+            "13": 10.97042,
+            "14": 10.98195,
+            "15": 10.85378,
+            "16": 10.85001,
+            "17": 10.80676,
+            "18": 10.82651,
+            "19": 10.81114,
+            "20": 10.62181,
+            "21": 10.56061,
+            "22": 10.32111,
+            "23": 10.59523,
+            "24": 10.32471,
+            "25": 10.23316,
+            "26": 10.33835,
+            "27": 10.34872,
+            "28": 10.32088,
+            "29": 10.33079,
+            "30": 9.88567,
+            "31": 9.43004,
+            "32": 10.05321,
+            "33": 10.0429,
+            "34": 9.60531,
+            "35": 9.64985,
+            "36": 9.52945,
+            "37": 9.76829,
+            "38": 9.48586,
+            "39": 9.87467,
+            "40": 9.30029,
+            "41": 9.44905,
+            "42": 9.52868,
+            "43": 9.01596,
+            "44": 9.12962,
+            "45": 8.96833,
+            "46": 9.03055,
+            "47": 9.45737,
+            "48": 9.02116,
+            "49": 8.569,
+            "50": 9.10992
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 3065.0,
-            "5": 3271.0,
-            "10": 2863.0,
-            "15": 3164.0,
-            "20": 3031.0,
-            "25": 2758.0,
-            "30": 2675.0,
-            "35": 2939.0,
-            "40": 3121.0,
-            "45": 2957.0,
-            "50": 3391.0
+            "1": 2981.0,
+            "2": 3050.0,
+            "3": 3036.0,
+            "4": 2803.0,
+            "5": 3277.0,
+            "6": 3332.0,
+            "7": 3180.0,
+            "8": 3031.0,
+            "9": 3010.0,
+            "10": 2837.0,
+            "11": 3454.0,
+            "12": 3290.0,
+            "13": 3425.0,
+            "14": 3543.0,
+            "15": 3264.0,
+            "16": 3165.0,
+            "17": 3109.0,
+            "18": 3150.0,
+            "19": 3225.0,
+            "20": 3006.0,
+            "21": 3072.0,
+            "22": 2636.0,
+            "23": 3329.0,
+            "24": 2773.0,
+            "25": 2778.0,
+            "26": 2782.0,
+            "27": 3018.0,
+            "28": 3154.0,
+            "29": 3221.0,
+            "30": 2661.0,
+            "31": 2317.0,
+            "32": 3059.0,
+            "33": 3139.0,
+            "34": 2875.0,
+            "35": 2919.0,
+            "36": 2956.0,
+            "37": 3114.0,
+            "38": 3011.0,
+            "39": 3102.0,
+            "40": 3052.0,
+            "41": 3056.0,
+            "42": 3312.0,
+            "43": 2849.0,
+            "44": 2950.0,
+            "45": 2930.0,
+            "46": 2991.0,
+            "47": 3237.0,
+            "48": 3285.0,
+            "49": 3389.0,
+            "50": 3341.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 1917251584.0,
+            "2": 1917251584.0,
+            "3": 1917251584.0,
+            "4": 1917251584.0,
             "5": 1917251584.0,
+            "6": 1917251584.0,
+            "7": 1917251584.0,
+            "8": 1917251584.0,
+            "9": 1917251584.0,
             "10": 1917251584.0,
+            "11": 1917251584.0,
+            "12": 1917251584.0,
+            "13": 1917251584.0,
+            "14": 1917251584.0,
             "15": 1917251584.0,
+            "16": 1917251584.0,
+            "17": 1917251584.0,
+            "18": 1917251584.0,
+            "19": 1917251584.0,
             "20": 1917251584.0,
+            "21": 1917251584.0,
+            "22": 1917251584.0,
+            "23": 1917251584.0,
+            "24": 1917251584.0,
             "25": 1917251584.0,
+            "26": 1917251584.0,
+            "27": 1917251584.0,
+            "28": 1917251584.0,
+            "29": 1917251584.0,
             "30": 1917251584.0,
+            "31": 1917251584.0,
+            "32": 1917251584.0,
+            "33": 1917251584.0,
+            "34": 1917251584.0,
             "35": 1917251584.0,
+            "36": 1917251584.0,
+            "37": 1917251584.0,
+            "38": 1917251584.0,
+            "39": 1917251584.0,
             "40": 1917251584.0,
+            "41": 1917251584.0,
+            "42": 1917251584.0,
+            "43": 1917251584.0,
+            "44": 1917251584.0,
             "45": 1917251584.0,
+            "46": 1917251584.0,
+            "47": 1917251584.0,
+            "48": 1917251584.0,
+            "49": 1917251584.0,
             "50": 1917251584.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 2520653312.0,
+            "2": 2743788032.0,
+            "3": 2743788032.0,
+            "4": 2743788032.0,
             "5": 2743788032.0,
+            "6": 2743788032.0,
+            "7": 2743788032.0,
+            "8": 2743788032.0,
+            "9": 2743788032.0,
             "10": 2743788032.0,
+            "11": 2743788032.0,
+            "12": 2743788032.0,
+            "13": 2743788032.0,
+            "14": 2743788032.0,
             "15": 2743788032.0,
+            "16": 2743788032.0,
+            "17": 2743788032.0,
+            "18": 2743788032.0,
+            "19": 2743788032.0,
             "20": 2743788032.0,
+            "21": 2743788032.0,
+            "22": 2743788032.0,
+            "23": 2743788032.0,
+            "24": 2743788032.0,
             "25": 2743788032.0,
+            "26": 2743788032.0,
+            "27": 2743788032.0,
+            "28": 2743788032.0,
+            "29": 2743788032.0,
             "30": 2743788032.0,
+            "31": 2743788032.0,
+            "32": 2743788032.0,
+            "33": 2743788032.0,
+            "34": 2743788032.0,
             "35": 2743788032.0,
+            "36": 2743788032.0,
+            "37": 2743788032.0,
+            "38": 2743788032.0,
+            "39": 2743788032.0,
             "40": 2743788032.0,
+            "41": 2743788032.0,
+            "42": 2743788032.0,
+            "43": 2743788032.0,
+            "44": 2743788032.0,
             "45": 2743788032.0,
+            "46": 2743788032.0,
+            "47": 2743788032.0,
+            "48": 2743788032.0,
+            "49": 2743788032.0,
             "50": 2743788032.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 36.93776,
-            "5": 1.65475,
-            "10": 1.62769,
-            "15": 1.33667,
-            "20": 1.33944,
-            "25": 1.33881,
-            "30": 1.33786,
-            "35": 1.35864,
-            "40": 1.36521,
-            "45": 1.38143,
-            "50": 1.35158
+            "1": 93.29155,
+            "2": 1.49946,
+            "3": 1.49367,
+            "4": 1.4955,
+            "5": 1.49263,
+            "6": 1.48524,
+            "7": 1.54794,
+            "8": 1.57222,
+            "9": 1.48844,
+            "10": 1.48601,
+            "11": 2.09056,
+            "12": 1.49068,
+            "13": 1.57264,
+            "14": 1.49736,
+            "15": 1.48278,
+            "16": 1.48267,
+            "17": 1.48508,
+            "18": 1.48364,
+            "19": 1.48751,
+            "20": 1.61513,
+            "21": 2.08969,
+            "22": 1.48879,
+            "23": 1.48515,
+            "24": 1.48483,
+            "25": 1.48865,
+            "26": 1.57806,
+            "27": 1.51158,
+            "28": 1.49095,
+            "29": 1.49422,
+            "30": 1.48732,
+            "31": 2.0932,
+            "32": 1.5259,
+            "33": 1.56274,
+            "34": 1.48919,
+            "35": 1.48483,
+            "36": 1.49146,
+            "37": 1.48123,
+            "38": 1.48759,
+            "39": 1.56751,
+            "40": 1.51104,
+            "41": 2.08583,
+            "42": 1.48897,
+            "43": 1.48816,
+            "44": 1.49366,
+            "45": 1.50945,
+            "46": 1.59565,
+            "47": 1.49573,
+            "48": 1.48593,
+            "49": 1.49004,
+            "50": 1.49426
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp4_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp4_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..c10a5cde1e8
--- /dev/null
+++ b/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp4_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.98296,
+            "2": 10.98234,
+            "3": 10.98053,
+            "4": 10.96517,
+            "5": 10.9979,
+            "6": 11.00523,
+            "7": 10.98274,
+            "8": 10.97592,
+            "9": 10.97818,
+            "10": 10.94511,
+            "11": 10.99258,
+            "12": 10.96821,
+            "13": 10.97041,
+            "14": 10.98206,
+            "15": 10.85379,
+            "16": 10.84986,
+            "17": 10.8067,
+            "18": 10.82647,
+            "19": 10.81124,
+            "20": 10.62204,
+            "21": 10.56064,
+            "22": 10.32092,
+            "23": 10.59523,
+            "24": 10.32467,
+            "25": 10.2333,
+            "26": 10.33822,
+            "27": 10.34883,
+            "28": 10.32085,
+            "29": 10.33072,
+            "30": 9.88565,
+            "31": 9.43005,
+            "32": 10.05329,
+            "33": 10.04284,
+            "34": 9.60526,
+            "35": 9.64982,
+            "36": 9.52942,
+            "37": 9.7683,
+            "38": 9.48583,
+            "39": 9.87461,
+            "40": 9.30023,
+            "41": 9.44902,
+            "42": 9.52875,
+            "43": 9.01605,
+            "44": 9.12966,
+            "45": 8.96824,
+            "46": 9.03047,
+            "47": 9.45728,
+            "48": 9.02121,
+            "49": 8.56895,
+            "50": 9.1099
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 2975.0,
+            "2": 3053.0,
+            "3": 3035.0,
+            "4": 2876.0,
+            "5": 3232.0,
+            "6": 3471.0,
+            "7": 3136.0,
+            "8": 3055.0,
+            "9": 3098.0,
+            "10": 2850.0,
+            "11": 3481.0,
+            "12": 3323.0,
+            "13": 3340.0,
+            "14": 3441.0,
+            "15": 3128.0,
+            "16": 3234.0,
+            "17": 2908.0,
+            "18": 3136.0,
+            "19": 3105.0,
+            "20": 2933.0,
+            "21": 3024.0,
+            "22": 2661.0,
+            "23": 3271.0,
+            "24": 2839.0,
+            "25": 2707.0,
+            "26": 2894.0,
+            "27": 3076.0,
+            "28": 3167.0,
+            "29": 3152.0,
+            "30": 2676.0,
+            "31": 2303.0,
+            "32": 3067.0,
+            "33": 3156.0,
+            "34": 2735.0,
+            "35": 2962.0,
+            "36": 2820.0,
+            "37": 3125.0,
+            "38": 2908.0,
+            "39": 3089.0,
+            "40": 3006.0,
+            "41": 3005.0,
+            "42": 3262.0,
+            "43": 2920.0,
+            "44": 2865.0,
+            "45": 2829.0,
+            "46": 3050.0,
+            "47": 3247.0,
+            "48": 3311.0,
+            "49": 3262.0,
+            "50": 3449.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1917251584.0,
+            "2": 1917251584.0,
+            "3": 1917251584.0,
+            "4": 1917251584.0,
+            "5": 1917251584.0,
+            "6": 1917251584.0,
+            "7": 1917251584.0,
+            "8": 1917251584.0,
+            "9": 1917251584.0,
+            "10": 1917251584.0,
+            "11": 1917251584.0,
+            "12": 1917251584.0,
+            "13": 1917251584.0,
+            "14": 1917251584.0,
+            "15": 1917251584.0,
+            "16": 1917251584.0,
+            "17": 1917251584.0,
+            "18": 1917251584.0,
+            "19": 1917251584.0,
+            "20": 1917251584.0,
+            "21": 1917251584.0,
+            "22": 1917251584.0,
+            "23": 1917251584.0,
+            "24": 1917251584.0,
+            "25": 1917251584.0,
+            "26": 1917251584.0,
+            "27": 1917251584.0,
+            "28": 1917251584.0,
+            "29": 1917251584.0,
+            "30": 1917251584.0,
+            "31": 1917251584.0,
+            "32": 1917251584.0,
+            "33": 1917251584.0,
+            "34": 1917251584.0,
+            "35": 1917251584.0,
+            "36": 1917251584.0,
+            "37": 1917251584.0,
+            "38": 1917251584.0,
+            "39": 1917251584.0,
+            "40": 1917251584.0,
+            "41": 1917251584.0,
+            "42": 1917251584.0,
+            "43": 1917251584.0,
+            "44": 1917251584.0,
+            "45": 1917251584.0,
+            "46": 1917251584.0,
+            "47": 1917251584.0,
+            "48": 1917251584.0,
+            "49": 1917251584.0,
+            "50": 1917251584.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 2520653312.0,
+            "2": 2743788032.0,
+            "3": 2743788032.0,
+            "4": 2743788032.0,
+            "5": 2743788032.0,
+            "6": 2743788032.0,
+            "7": 2743788032.0,
+            "8": 2743788032.0,
+            "9": 2743788032.0,
+            "10": 2743788032.0,
+            "11": 2743788032.0,
+            "12": 2743788032.0,
+            "13": 2743788032.0,
+            "14": 2743788032.0,
+            "15": 2743788032.0,
+            "16": 2743788032.0,
+            "17": 2743788032.0,
+            "18": 2743788032.0,
+            "19": 2743788032.0,
+            "20": 2743788032.0,
+            "21": 2743788032.0,
+            "22": 2743788032.0,
+            "23": 2743788032.0,
+            "24": 2743788032.0,
+            "25": 2743788032.0,
+            "26": 2743788032.0,
+            "27": 2743788032.0,
+            "28": 2743788032.0,
+            "29": 2743788032.0,
+            "30": 2743788032.0,
+            "31": 2743788032.0,
+            "32": 2743788032.0,
+            "33": 2743788032.0,
+            "34": 2743788032.0,
+            "35": 2743788032.0,
+            "36": 2743788032.0,
+            "37": 2743788032.0,
+            "38": 2743788032.0,
+            "39": 2743788032.0,
+            "40": 2743788032.0,
+            "41": 2743788032.0,
+            "42": 2743788032.0,
+            "43": 2743788032.0,
+            "44": 2743788032.0,
+            "45": 2743788032.0,
+            "46": 2743788032.0,
+            "47": 2743788032.0,
+            "48": 2743788032.0,
+            "49": 2743788032.0,
+            "50": 2743788032.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 92.34219,
+            "2": 1.4515,
+            "3": 1.36887,
+            "4": 1.37341,
+            "5": 1.37602,
+            "6": 1.39004,
+            "7": 1.3836,
+            "8": 1.38196,
+            "9": 1.38896,
+            "10": 1.45857,
+            "11": 1.94935,
+            "12": 1.39106,
+            "13": 1.39805,
+            "14": 1.39033,
+            "15": 1.38482,
+            "16": 1.39457,
+            "17": 1.44864,
+            "18": 1.39068,
+            "19": 1.3833,
+            "20": 1.38815,
+            "21": 1.94703,
+            "22": 1.38309,
+            "23": 1.42093,
+            "24": 1.3998,
+            "25": 1.38693,
+            "26": 1.38436,
+            "27": 1.40235,
+            "28": 1.40751,
+            "29": 1.37396,
+            "30": 1.4111,
+            "31": 1.93813,
+            "32": 1.35926,
+            "33": 1.36462,
+            "34": 1.36782,
+            "35": 1.36782,
+            "36": 1.36568,
+            "37": 1.37148,
+            "38": 1.37963,
+            "39": 1.37862,
+            "40": 1.36625,
+            "41": 1.9063,
+            "42": 1.38764,
+            "43": 1.37219,
+            "44": 1.37186,
+            "45": 1.38575,
+            "46": 1.3857,
+            "47": 1.37676,
+            "48": 1.39862,
+            "49": 1.3615,
+            "50": 1.35892
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp4_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp4_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..baf1fa52671
--- /dev/null
+++ b/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp4_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.98296,
+            "2": 10.98234,
+            "3": 10.98046,
+            "4": 10.96512,
+            "5": 10.99789,
+            "6": 11.00517,
+            "7": 10.98273,
+            "8": 10.97596,
+            "9": 10.9783,
+            "10": 10.9452,
+            "11": 10.99257,
+            "12": 10.96815,
+            "13": 10.9703,
+            "14": 10.98207,
+            "15": 10.85381,
+            "16": 10.85003,
+            "17": 10.80667,
+            "18": 10.82648,
+            "19": 10.81123,
+            "20": 10.62194,
+            "21": 10.56069,
+            "22": 10.32105,
+            "23": 10.59531,
+            "24": 10.32461,
+            "25": 10.23318,
+            "26": 10.33828,
+            "27": 10.34879,
+            "28": 10.32094,
+            "29": 10.33068,
+            "30": 9.8856,
+            "31": 9.42999,
+            "32": 10.05321,
+            "33": 10.0429,
+            "34": 9.6053,
+            "35": 9.64984,
+            "36": 9.52934,
+            "37": 9.76834,
+            "38": 9.48585,
+            "39": 9.87468,
+            "40": 9.30022,
+            "41": 9.44909,
+            "42": 9.52866,
+            "43": 9.01602,
+            "44": 9.12963,
+            "45": 8.96826,
+            "46": 9.03049,
+            "47": 9.45732,
+            "48": 9.02119,
+            "49": 8.56905,
+            "50": 9.10994
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 2992.0,
+            "2": 2911.0,
+            "3": 2981.0,
+            "4": 2784.0,
+            "5": 3153.0,
+            "6": 3292.0,
+            "7": 3123.0,
+            "8": 3104.0,
+            "9": 3123.0,
+            "10": 2796.0,
+            "11": 3497.0,
+            "12": 3305.0,
+            "13": 3271.0,
+            "14": 3414.0,
+            "15": 3082.0,
+            "16": 3257.0,
+            "17": 3088.0,
+            "18": 3113.0,
+            "19": 3283.0,
+            "20": 2980.0,
+            "21": 3045.0,
+            "22": 2623.0,
+            "23": 3281.0,
+            "24": 2774.0,
+            "25": 2745.0,
+            "26": 2827.0,
+            "27": 3106.0,
+            "28": 3227.0,
+            "29": 3118.0,
+            "30": 2695.0,
+            "31": 2326.0,
+            "32": 3058.0,
+            "33": 3138.0,
+            "34": 2755.0,
+            "35": 2931.0,
+            "36": 2947.0,
+            "37": 3169.0,
+            "38": 3016.0,
+            "39": 3187.0,
+            "40": 3076.0,
+            "41": 3043.0,
+            "42": 3245.0,
+            "43": 2813.0,
+            "44": 2934.0,
+            "45": 2868.0,
+            "46": 3015.0,
+            "47": 3294.0,
+            "48": 3327.0,
+            "49": 3253.0,
+            "50": 3403.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1917251584.0,
+            "2": 1917251584.0,
+            "3": 1917251584.0,
+            "4": 1917251584.0,
+            "5": 1917251584.0,
+            "6": 1917251584.0,
+            "7": 1917251584.0,
+            "8": 1917251584.0,
+            "9": 1917251584.0,
+            "10": 1917251584.0,
+            "11": 1917251584.0,
+            "12": 1917251584.0,
+            "13": 1917251584.0,
+            "14": 1917251584.0,
+            "15": 1917251584.0,
+            "16": 1917251584.0,
+            "17": 1917251584.0,
+            "18": 1917251584.0,
+            "19": 1917251584.0,
+            "20": 1917251584.0,
+            "21": 1917251584.0,
+            "22": 1917251584.0,
+            "23": 1917251584.0,
+            "24": 1917251584.0,
+            "25": 1917251584.0,
+            "26": 1917251584.0,
+            "27": 1917251584.0,
+            "28": 1917251584.0,
+            "29": 1917251584.0,
+            "30": 1917251584.0,
+            "31": 1917251584.0,
+            "32": 1917251584.0,
+            "33": 1917251584.0,
+            "34": 1917251584.0,
+            "35": 1917251584.0,
+            "36": 1917251584.0,
+            "37": 1917251584.0,
+            "38": 1917251584.0,
+            "39": 1917251584.0,
+            "40": 1917251584.0,
+            "41": 1917251584.0,
+            "42": 1917251584.0,
+            "43": 1917251584.0,
+            "44": 1917251584.0,
+            "45": 1917251584.0,
+            "46": 1917251584.0,
+            "47": 1917251584.0,
+            "48": 1917251584.0,
+            "49": 1917251584.0,
+            "50": 1917251584.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 2520653312.0,
+            "2": 2743788032.0,
+            "3": 2743788032.0,
+            "4": 2743788032.0,
+            "5": 2743788032.0,
+            "6": 2743788032.0,
+            "7": 2743788032.0,
+            "8": 2743788032.0,
+            "9": 2743788032.0,
+            "10": 2743788032.0,
+            "11": 2743788032.0,
+            "12": 2743788032.0,
+            "13": 2743788032.0,
+            "14": 2743788032.0,
+            "15": 2743788032.0,
+            "16": 2743788032.0,
+            "17": 2743788032.0,
+            "18": 2743788032.0,
+            "19": 2743788032.0,
+            "20": 2743788032.0,
+            "21": 2743788032.0,
+            "22": 2743788032.0,
+            "23": 2743788032.0,
+            "24": 2743788032.0,
+            "25": 2743788032.0,
+            "26": 2743788032.0,
+            "27": 2743788032.0,
+            "28": 2743788032.0,
+            "29": 2743788032.0,
+            "30": 2743788032.0,
+            "31": 2743788032.0,
+            "32": 2743788032.0,
+            "33": 2743788032.0,
+            "34": 2743788032.0,
+            "35": 2743788032.0,
+            "36": 2743788032.0,
+            "37": 2743788032.0,
+            "38": 2743788032.0,
+            "39": 2743788032.0,
+            "40": 2743788032.0,
+            "41": 2743788032.0,
+            "42": 2743788032.0,
+            "43": 2743788032.0,
+            "44": 2743788032.0,
+            "45": 2743788032.0,
+            "46": 2743788032.0,
+            "47": 2743788032.0,
+            "48": 2743788032.0,
+            "49": 2743788032.0,
+            "50": 2743788032.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 92.52278,
+            "2": 1.52203,
+            "3": 1.50103,
+            "4": 1.51627,
+            "5": 1.49943,
+            "6": 1.61325,
+            "7": 1.5622,
+            "8": 1.50668,
+            "9": 1.50122,
+            "10": 1.50749,
+            "11": 2.12764,
+            "12": 1.51111,
+            "13": 1.50973,
+            "14": 1.51712,
+            "15": 1.50952,
+            "16": 1.51343,
+            "17": 1.50742,
+            "18": 1.52017,
+            "19": 1.50622,
+            "20": 1.51648,
+            "21": 2.13229,
+            "22": 1.50789,
+            "23": 1.52087,
+            "24": 1.50668,
+            "25": 1.51534,
+            "26": 1.5016,
+            "27": 1.50737,
+            "28": 1.49873,
+            "29": 1.50715,
+            "30": 1.49941,
+            "31": 2.11492,
+            "32": 1.50348,
+            "33": 1.50106,
+            "34": 1.50093,
+            "35": 1.50813,
+            "36": 1.4988,
+            "37": 1.49847,
+            "38": 1.49777,
+            "39": 1.49937,
+            "40": 1.50456,
+            "41": 2.11318,
+            "42": 1.50605,
+            "43": 1.50721,
+            "44": 1.51813,
+            "45": 1.50211,
+            "46": 1.51633,
+            "47": 1.5019,
+            "48": 1.52386,
+            "49": 1.49987,
+            "50": 1.50829
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/hybrid/hybrid_static_inference_tp1_pp1_2B_cudagraphs/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/hybrid/hybrid_static_inference_tp1_pp1_2B_cudagraphs/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..f43841d5cbf
--- /dev/null
+++ b/tests/functional_tests/test_cases/hybrid/hybrid_static_inference_tp1_pp1_2B_cudagraphs/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1 @@
+{"1": {"input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.", "generated_text": " Then, when you're ready, go home and watch the movie again.", "generated_tokens": [6830, 1044, 2200, 1636, 6185, 11831, 1044, 1974, 4590, 1321, 9951, 1278, 16070, 2790, 1046], "tpot": [0.5682204365730286, 0.00773027166724205, 0.006722208112478256, 0.0064345598220825195, 0.006336224265396595, 0.006343040149658918, 0.0063623362220823765, 0.0063252802938222885, 0.0067179519683122635, 0.006901599932461977, 0.006821152288466692, 0.006867455784231424, 0.006917183753103018, 0.006906943861395121, 0.006760320160537958], "latency": 0.6755752461031079, "logprobs": [-9.485179901123047, -3.7365002632141113, -3.0747694969177246, -1.744485855102539, -0.29669833183288574, -1.4020814895629883, -2.432681083679199, -1.7664837837219238, -1.4741225242614746, -6.42724084854126, -0.8153547048568726, -1.7931451797485352, -3.650665044784546, -3.698770046234131, -1.608336091041565, -1.6549599170684814, -2.8460211753845215, -6.670064926147461, -0.06550002098083496, -1.2442623376846313, -6.04405403137207, -9.507080078125, -10.461563110351562, -1.5952650308609009, -4.6770920753479, -0.745125412940979, -2.1571977138519287, -0.013643701560795307, -0.03557091951370239, -3.090214252471924, -8.740396499633789, -1.5405625104904175, -5.852315902709961, -3.09045672416687, -3.9833602905273438, -3.7632288932800293, -2.444291591644287, -2.273496627807617, -0.4683297276496887, -1.020460605621338, -5.3351545333862305, -8.249643325805664, -0.01584932766854763, -2.8506340980529785, -1.251563549041748, -3.7786898612976074, -1.0169645547866821, -0.002681709360331297, -3.0970988273620605, -11.113213539123535, -3.8127267360687256, -2.329777479171753, -4.672338485717773, -0.09791824221611023, -0.06286392360925674, -1.3320130109786987, -2.1521241664886475, -4.375304222106934, -0.43500134348869324, -3.9912281036376953, -0.5796594023704529, -0.26420092582702637, -2.811892509460449, -13.508228302001953, -0.10134205967187881, -3.5013256072998047, -0.8109210729598999, -5.298563480377197, -0.3272246718406677, -2.333836555480957, -0.5356347560882568, -1.288033366203308, -4.895185947418213, -15.548847198486328, -4.934615612030029, -0.22137367725372314, -6.583427429199219, -0.9010066986083984, -2.237170696258545, -1.8670732975006104, -0.20016230642795563, -5.921288013458252, -0.005614227149635553, -7.52609920501709, -3.284144878387451, -3.6920413970947266, -2.0169901847839355, -2.9249799251556396, -1.469851016998291, -2.4422709941864014, -1.2325081825256348, -1.964760184288025, -1.9597855806350708, -0.2527056932449341, -2.0347321033477783, -1.0436501502990723, -1.2124212980270386, -2.834301233291626, -1.6760799884796143, -2.205287218093872, -1.5265791416168213, -1.2453690767288208]}}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/hybrid/hybrid_static_inference_tp1_pp1_2B_cudagraphs/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/hybrid/hybrid_static_inference_tp1_pp1_2B_cudagraphs/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..36d52789f39
--- /dev/null
+++ b/tests/functional_tests/test_cases/hybrid/hybrid_static_inference_tp1_pp1_2B_cudagraphs/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1 @@
+{"1": {"input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.", "generated_text": " Then, when you're ready, go home and watch the movie again.", "generated_tokens": [6830, 1044, 2200, 1636, 6185, 11831, 1044, 1974, 4590, 1321, 9951, 1278, 16070, 2790, 1046], "tpot": [0.5964657068252563, 0.0076944963075220585, 0.0069276802241802216, 0.006815008353441954, 0.007004896178841591, 0.007135615684092045, 0.007600544020533562, 0.00778160011395812, 0.008111871778964996, 0.008260959759354591, 0.008273440413177013, 0.008334367536008358, 0.008409472182393074, 0.008148159831762314, 0.008159839548170567], "latency": 0.7182000600732863, "logprobs": [-9.485179901123047, -3.7365002632141113, -3.0747694969177246, -1.744485855102539, -0.29669833183288574, -1.4020814895629883, -2.432681083679199, -1.7664837837219238, -1.4741225242614746, -6.42724084854126, -0.8153547048568726, -1.7931451797485352, -3.650665044784546, -3.698770046234131, -1.608336091041565, -1.6549599170684814, -2.8460211753845215, -6.670064926147461, -0.06550002098083496, -1.2442623376846313, -6.04405403137207, -9.507080078125, -10.461563110351562, -1.5952650308609009, -4.6770920753479, -0.745125412940979, -2.1571977138519287, -0.013643701560795307, -0.03557091951370239, -3.090214252471924, -8.740396499633789, -1.5405625104904175, -5.852315902709961, -3.09045672416687, -3.9833602905273438, -3.7632288932800293, -2.444291591644287, -2.273496627807617, -0.4683297276496887, -1.020460605621338, -5.3351545333862305, -8.249643325805664, -0.01584932766854763, -2.8506340980529785, -1.251563549041748, -3.7786898612976074, -1.0169645547866821, -0.002681709360331297, -3.0970988273620605, -11.113213539123535, -3.8127267360687256, -2.329777479171753, -4.672338485717773, -0.09791824221611023, -0.06286392360925674, -1.3320130109786987, -2.1521241664886475, -4.375304222106934, -0.43500134348869324, -3.9912281036376953, -0.5796594023704529, -0.26420092582702637, -2.811892509460449, -13.508228302001953, -0.10134205967187881, -3.5013256072998047, -0.8109210729598999, -5.298563480377197, -0.3272246718406677, -2.333836555480957, -0.5356347560882568, -1.288033366203308, -4.895185947418213, -15.548847198486328, -4.934615612030029, -0.22137367725372314, -6.583427429199219, -0.9010066986083984, -2.237170696258545, -1.8670732975006104, -0.20016230642795563, -5.921288013458252, -0.005614227149635553, -7.52609920501709, -3.284144878387451, -3.6920413970947266, -2.0169901847839355, -2.9249799251556396, -1.469851016998291, -2.4422709941864014, -1.2325081825256348, -1.964760184288025, -1.9597855806350708, -0.2527056932449341, -2.0347321033477783, -1.0436501502990723, -1.2124212980270386, -2.834301233291626, -1.6760799884796143, -2.205287218093872, -1.5265791416168213, -1.2453690767288208]}}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/hybrid/hybrid_static_inference_tp1_pp1_2B_logitsmatch/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/hybrid/hybrid_static_inference_tp1_pp1_2B_logitsmatch/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..0c524fa4991
--- /dev/null
+++ b/tests/functional_tests/test_cases/hybrid/hybrid_static_inference_tp1_pp1_2B_logitsmatch/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1 @@
+{"0": {"input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.", "generated_text": " Then, when you're ready, go home and watch the movie again.", "generated_tokens": [6830, 1044, 2200, 1636, 6185, 11831, 1044, 1974, 4590, 1321, 9951, 1278, 16070, 2790, 1046], "tpot": [22.176082611083984, 0.6151371598243713, 0.034286558628082275, 0.03372633829712868, 0.03291260823607445, 0.033486176282167435, 0.033701471984386444, 0.03326892852783203, 0.03287017345428467, 0.033419039100408554, 0.03316511958837509, 0.03274928033351898, 0.03266361728310585, 0.032435040920972824, 0.03254726529121399], "latency": 23.265353467315435, "logprobs": [-9.485179901123047, -3.7365002632141113, -3.0747694969177246, -1.744485855102539, -0.29669833183288574, -1.4020814895629883, -2.432681083679199, -1.7664837837219238, -1.4741225242614746, -6.42724084854126, -0.8153547048568726, -1.7931451797485352, -3.650665044784546, -3.698770046234131, -1.608336091041565, -1.6549599170684814, -2.8460211753845215, -6.670064926147461, -0.06550002098083496, -1.2442623376846313, -6.04405403137207, -9.507080078125, -10.461563110351562, -1.5952650308609009, -4.6770920753479, -0.745125412940979, -2.1571977138519287, -0.013643701560795307, -0.03557091951370239, -3.090214252471924, -8.740396499633789, -1.5405625104904175, -5.852315902709961, -3.09045672416687, -3.9833602905273438, -3.7632288932800293, -2.444291591644287, -2.273496627807617, -0.4683297276496887, -1.020460605621338, -5.3351545333862305, -8.249643325805664, -0.01584932766854763, -2.8506340980529785, -1.251563549041748, -3.7786898612976074, -1.0169645547866821, -0.002681709360331297, -3.0970988273620605, -11.113213539123535, -3.8127267360687256, -2.329777479171753, -4.672338485717773, -0.09791824221611023, -0.06286392360925674, -1.3320130109786987, -2.1521241664886475, -4.375304222106934, -0.43500134348869324, -3.9912281036376953, -0.5796594023704529, -0.26420092582702637, -2.811892509460449, -13.508228302001953, -0.10134205967187881, -3.5013256072998047, -0.8109210729598999, -5.298563480377197, -0.3272246718406677, -2.333836555480957, -0.5356347560882568, -1.288033366203308, -4.895185947418213, -15.548847198486328, -4.934615612030029, -0.22137367725372314, -6.583427429199219, -0.9010066986083984, -2.237170696258545, -1.8670732975006104, -0.20016230642795563, -5.921288013458252, -0.005614227149635553, -7.52609920501709, -3.284144878387451, -3.6920413970947266, -2.0169901847839355, -2.9249799251556396, -1.469851016998291, -2.4422709941864014, -1.2325081825256348, -1.964760184288025, -1.9597855806350708, -0.2527056932449341, -2.0347321033477783, -1.0436501502990723, -1.2124212980270386, -2.834301233291626, -1.6760799884796143, -2.205287218093872, -1.5265791416168213, -1.2453690767288208]}}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/hybrid/hybrid_static_inference_tp1_pp1_2B_logitsmatch/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/hybrid/hybrid_static_inference_tp1_pp1_2B_logitsmatch/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..1d887d9830c
--- /dev/null
+++ b/tests/functional_tests/test_cases/hybrid/hybrid_static_inference_tp1_pp1_2B_logitsmatch/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1 @@
+{"0": {"input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.", "generated_text": " Then, when you're ready, go home and watch the movie again.", "generated_tokens": [6830, 1044, 2200, 1636, 6185, 11831, 1044, 1974, 4590, 1321, 9951, 1278, 16070, 2790, 1046], "tpot": [23.254732131958008, 0.9408637881278992, 0.034858111292123795, 0.03537708520889282, 0.03476342558860779, 0.03471830487251282, 0.03922403231263161, 0.03739152103662491, 0.03962313383817673, 0.04001171141862869, 0.03972022235393524, 0.040310338139534, 0.038479968905448914, 0.03562349081039429, 0.038027167320251465], "latency": 24.731004369910806, "logprobs": [-9.485179901123047, -3.7365002632141113, -3.0747694969177246, -1.744485855102539, -0.29669833183288574, -1.4020814895629883, -2.432681083679199, -1.7664837837219238, -1.4741225242614746, -6.42724084854126, -0.8153547048568726, -1.7931451797485352, -3.650665044784546, -3.698770046234131, -1.608336091041565, -1.6549599170684814, -2.8460211753845215, -6.670064926147461, -0.06550002098083496, -1.2442623376846313, -6.04405403137207, -9.507080078125, -10.461563110351562, -1.5952650308609009, -4.6770920753479, -0.745125412940979, -2.1571977138519287, -0.013643701560795307, -0.03557091951370239, -3.090214252471924, -8.740396499633789, -1.5405625104904175, -5.852315902709961, -3.09045672416687, -3.9833602905273438, -3.7632288932800293, -2.444291591644287, -2.273496627807617, -0.4683297276496887, -1.020460605621338, -5.3351545333862305, -8.249643325805664, -0.01584932766854763, -2.8506340980529785, -1.251563549041748, -3.7786898612976074, -1.0169645547866821, -0.002681709360331297, -3.0970988273620605, -11.113213539123535, -3.8127267360687256, -2.329777479171753, -4.672338485717773, -0.09791824221611023, -0.06286392360925674, -1.3320130109786987, -2.1521241664886475, -4.375304222106934, -0.43500134348869324, -3.9912281036376953, -0.5796594023704529, -0.26420092582702637, -2.811892509460449, -13.508228302001953, -0.10134205967187881, -3.5013256072998047, -0.8109210729598999, -5.298563480377197, -0.3272246718406677, -2.333836555480957, -0.5356347560882568, -1.288033366203308, -4.895185947418213, -15.548847198486328, -4.934615612030029, -0.22137367725372314, -6.583427429199219, -0.9010066986083984, -2.237170696258545, -1.8670732975006104, -0.20016230642795563, -5.921288013458252, -0.005614227149635553, -7.52609920501709, -3.284144878387451, -3.6920413970947266, -2.0169901847839355, -2.9249799251556396, -1.469851016998291, -2.4422709941864014, -1.2325081825256348, -1.964760184288025, -1.9597855806350708, -0.2527056932449341, -2.0347321033477783, -1.0436501502990723, -1.2124212980270386, -2.834301233291626, -1.6760799884796143, -2.205287218093872, -1.5265791416168213, -1.2453690767288208]}}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgx_h100.json
index c9b7badd2f9..fd720368e7c 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgx_h100.json
@@ -2,91 +2,286 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 10.7999,
-            "5": 10.8256,
-            "10": 10.77408,
-            "15": 10.7823,
-            "20": 10.69976,
-            "25": 10.51847,
-            "30": 10.36472,
-            "35": 10.25433,
-            "40": 10.1024,
-            "45": 9.84248,
-            "50": 9.92572
+            "2": 10.80046,
+            "3": 10.80856,
+            "4": 10.78236,
+            "5": 10.82529,
+            "6": 10.83582,
+            "7": 10.81653,
+            "8": 10.81185,
+            "9": 10.81091,
+            "10": 10.77387,
+            "11": 10.85526,
+            "12": 10.82697,
+            "13": 10.85098,
+            "14": 10.85469,
+            "15": 10.7827,
+            "16": 10.77374,
+            "17": 10.7504,
+            "18": 10.78334,
+            "19": 10.75924,
+            "20": 10.69944,
+            "21": 10.67297,
+            "22": 10.51442,
+            "23": 10.68096,
+            "24": 10.57187,
+            "25": 10.51823,
+            "26": 10.57662,
+            "27": 10.59187,
+            "28": 10.55398,
+            "29": 10.57092,
+            "30": 10.36453,
+            "31": 10.10911,
+            "32": 10.45339,
+            "33": 10.43673,
+            "34": 10.19971,
+            "35": 10.25406,
+            "36": 10.23349,
+            "37": 10.35406,
+            "38": 10.20448,
+            "39": 10.39919,
+            "40": 10.10198,
+            "41": 10.12753,
+            "42": 10.21106,
+            "43": 9.83709,
+            "44": 9.96212,
+            "45": 9.84265,
+            "46": 9.80647,
+            "47": 10.14286,
+            "48": 9.86668,
+            "49": 9.5387,
+            "50": 9.92563
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 4866.0,
-            "5": 5487.0,
-            "10": 4524.0,
-            "15": 5298.0,
-            "20": 4827.0,
-            "25": 5007.0,
-            "30": 5355.0,
-            "35": 5634.0,
-            "40": 5894.0,
-            "45": 5741.0,
-            "50": 6592.0
+            "1": 4859.0,
+            "2": 4958.0,
+            "3": 5062.0,
+            "4": 4978.0,
+            "5": 5447.0,
+            "6": 5701.0,
+            "7": 5288.0,
+            "8": 5091.0,
+            "9": 5455.0,
+            "10": 4456.0,
+            "11": 5940.0,
+            "12": 5333.0,
+            "13": 5833.0,
+            "14": 5618.0,
+            "15": 5332.0,
+            "16": 5494.0,
+            "17": 5290.0,
+            "18": 5259.0,
+            "19": 5322.0,
+            "20": 4889.0,
+            "21": 5334.0,
+            "22": 4823.0,
+            "23": 5689.0,
+            "24": 5082.0,
+            "25": 4963.0,
+            "26": 5289.0,
+            "27": 5273.0,
+            "28": 5740.0,
+            "29": 6004.0,
+            "30": 5295.0,
+            "31": 4876.0,
+            "32": 5709.0,
+            "33": 6098.0,
+            "34": 5165.0,
+            "35": 5500.0,
+            "36": 5505.0,
+            "37": 6376.0,
+            "38": 5826.0,
+            "39": 6773.0,
+            "40": 5824.0,
+            "41": 5809.0,
+            "42": 6386.0,
+            "43": 5747.0,
+            "44": 5860.0,
+            "45": 5732.0,
+            "46": 5948.0,
+            "47": 6430.0,
+            "48": 6500.0,
+            "49": 6497.0,
+            "50": 6719.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 1145716736.0,
-            "5": 1145715200.0,
-            "10": 1145719296.0,
+            "2": 1145714688.0,
+            "3": 1145715200.0,
+            "4": 1145714176.0,
+            "5": 1146209792.0,
+            "6": 1146210816.0,
+            "7": 1145717248.0,
+            "8": 1146209280.0,
+            "9": 1145714688.0,
+            "10": 1146214912.0,
+            "11": 1146209792.0,
+            "12": 1145714176.0,
+            "13": 1145713152.0,
+            "14": 1146209280.0,
             "15": 1145713152.0,
+            "16": 1146210816.0,
+            "17": 1145713664.0,
+            "18": 1146210304.0,
+            "19": 1145714176.0,
             "20": 1145715200.0,
+            "21": 1146210304.0,
+            "22": 1145715712.0,
+            "23": 1145715712.0,
+            "24": 1145713152.0,
             "25": 1145712128.0,
+            "26": 1145715200.0,
+            "27": 1146210304.0,
+            "28": 1145713664.0,
+            "29": 1145711104.0,
             "30": 1145714688.0,
-            "35": 1145717760.0,
+            "31": 1146213376.0,
+            "32": 1145713664.0,
+            "33": 1145714688.0,
+            "34": 1145715200.0,
+            "35": 1146212864.0,
+            "36": 1145713152.0,
+            "37": 1145712128.0,
+            "38": 1146207744.0,
+            "39": 1145715200.0,
             "40": 1146210816.0,
+            "41": 1145714688.0,
+            "42": 1145712128.0,
+            "43": 1145715712.0,
+            "44": 1145717760.0,
             "45": 1146210304.0,
-            "50": 1145715712.0
+            "46": 1146214400.0,
+            "47": 1145714688.0,
+            "48": 1145717760.0,
+            "49": 1145719296.0,
+            "50": 1145716224.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 1593775104.0,
-            "5": 2052787712.0,
-            "10": 2057007616.0,
-            "15": 2057007616.0,
-            "20": 2057007616.0,
-            "25": 2057007616.0,
-            "30": 2057007616.0,
-            "35": 2057007616.0,
-            "40": 2057007616.0,
-            "45": 2057007616.0,
-            "50": 2057007616.0
+            "2": 2051463168.0,
+            "3": 2052978176.0,
+            "4": 2052978176.0,
+            "5": 2052978176.0,
+            "6": 2053324288.0,
+            "7": 2053986816.0,
+            "8": 2053986816.0,
+            "9": 2057060864.0,
+            "10": 2057060864.0,
+            "11": 2057060864.0,
+            "12": 2057060864.0,
+            "13": 2057060864.0,
+            "14": 2057060864.0,
+            "15": 2057060864.0,
+            "16": 2057060864.0,
+            "17": 2057060864.0,
+            "18": 2057060864.0,
+            "19": 2057060864.0,
+            "20": 2057060864.0,
+            "21": 2057060864.0,
+            "22": 2057060864.0,
+            "23": 2057060864.0,
+            "24": 2057060864.0,
+            "25": 2057060864.0,
+            "26": 2057060864.0,
+            "27": 2057060864.0,
+            "28": 2057060864.0,
+            "29": 2057060864.0,
+            "30": 2057060864.0,
+            "31": 2057060864.0,
+            "32": 2057060864.0,
+            "33": 2057060864.0,
+            "34": 2057060864.0,
+            "35": 2057060864.0,
+            "36": 2057060864.0,
+            "37": 2057060864.0,
+            "38": 2057060864.0,
+            "39": 2057060864.0,
+            "40": 2057060864.0,
+            "41": 2057060864.0,
+            "42": 2057060864.0,
+            "43": 2057060864.0,
+            "44": 2057060864.0,
+            "45": 2057060864.0,
+            "46": 2057060864.0,
+            "47": 2057060864.0,
+            "48": 2057060864.0,
+            "49": 2057060864.0,
+            "50": 2057060864.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 16.36205,
-            "5": 0.22567,
-            "10": 0.24367,
-            "15": 0.2361,
-            "20": 0.22731,
-            "25": 0.2551,
-            "30": 0.22323,
-            "35": 0.23009,
-            "40": 0.2213,
-            "45": 0.22842,
-            "50": 0.22548
+            "1": 18.20596,
+            "2": 0.35903,
+            "3": 0.29783,
+            "4": 0.32647,
+            "5": 0.27756,
+            "6": 0.27374,
+            "7": 0.30378,
+            "8": 0.27695,
+            "9": 0.2803,
+            "10": 0.28715,
+            "11": 0.26455,
+            "12": 0.26231,
+            "13": 0.2664,
+            "14": 0.25756,
+            "15": 0.26997,
+            "16": 0.26004,
+            "17": 0.27036,
+            "18": 0.26235,
+            "19": 0.25926,
+            "20": 0.2633,
+            "21": 0.27365,
+            "22": 0.28244,
+            "23": 0.27106,
+            "24": 0.26252,
+            "25": 0.27913,
+            "26": 0.26128,
+            "27": 0.25745,
+            "28": 0.28971,
+            "29": 0.25557,
+            "30": 0.26227,
+            "31": 0.28393,
+            "32": 0.2742,
+            "33": 0.25918,
+            "34": 0.2839,
+            "35": 0.26183,
+            "36": 0.26351,
+            "37": 0.25935,
+            "38": 0.27055,
+            "39": 0.25969,
+            "40": 0.25776,
+            "41": 0.26414,
+            "42": 0.26164,
+            "43": 0.27671,
+            "44": 0.26781,
+            "45": 0.25691,
+            "46": 0.28709,
+            "47": 0.26291,
+            "48": 0.26119,
+            "49": 0.27305,
+            "50": 0.26323
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgxa100_dracooci-ord.json
new file mode 100644
index 00000000000..83e9dd029de
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgxa100_dracooci-ord.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.82721,
+            "2": 10.84035,
+            "3": 10.82693,
+            "4": 10.81925,
+            "5": 10.85729,
+            "6": 10.86987,
+            "7": 10.85126,
+            "8": 10.84503,
+            "9": 10.85262,
+            "10": 10.79218,
+            "11": 10.86541,
+            "12": 10.87056,
+            "13": 10.87103,
+            "14": 10.87907,
+            "15": 10.82509,
+            "16": 10.81245,
+            "17": 10.77498,
+            "18": 10.81067,
+            "19": 10.79628,
+            "20": 10.7226,
+            "21": 10.69703,
+            "22": 10.5511,
+            "23": 10.70525,
+            "24": 10.59039,
+            "25": 10.5437,
+            "26": 10.60015,
+            "27": 10.62026,
+            "28": 10.57443,
+            "29": 10.58672,
+            "30": 10.35727,
+            "31": 10.12151,
+            "32": 10.47011,
+            "33": 10.45715,
+            "34": 10.21596,
+            "35": 10.2716,
+            "36": 10.23548,
+            "37": 10.35256,
+            "38": 10.20575,
+            "39": 10.40073,
+            "40": 10.09692,
+            "41": 10.13841,
+            "42": 10.21761,
+            "43": 9.84436,
+            "44": 9.96211,
+            "45": 9.84091,
+            "46": 9.81936,
+            "47": 10.13901,
+            "48": 9.8515,
+            "49": 9.53555,
+            "50": 9.92434
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 4632.0,
+            "2": 4867.0,
+            "3": 4905.0,
+            "4": 4933.0,
+            "5": 5426.0,
+            "6": 5441.0,
+            "7": 5134.0,
+            "8": 4724.0,
+            "9": 5268.0,
+            "10": 4406.0,
+            "11": 5633.0,
+            "12": 5144.0,
+            "13": 5458.0,
+            "14": 5522.0,
+            "15": 5171.0,
+            "16": 5326.0,
+            "17": 5191.0,
+            "18": 5103.0,
+            "19": 5320.0,
+            "20": 4861.0,
+            "21": 5369.0,
+            "22": 4926.0,
+            "23": 5811.0,
+            "24": 5036.0,
+            "25": 4912.0,
+            "26": 5138.0,
+            "27": 5254.0,
+            "28": 5688.0,
+            "29": 5906.0,
+            "30": 5493.0,
+            "31": 4766.0,
+            "32": 5805.0,
+            "33": 5992.0,
+            "34": 5140.0,
+            "35": 5663.0,
+            "36": 5599.0,
+            "37": 6398.0,
+            "38": 6036.0,
+            "39": 6612.0,
+            "40": 5946.0,
+            "41": 5919.0,
+            "42": 6480.0,
+            "43": 5819.0,
+            "44": 5690.0,
+            "45": 5761.0,
+            "46": 5974.0,
+            "47": 6514.0,
+            "48": 6268.0,
+            "49": 6290.0,
+            "50": 6671.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1116598784.0,
+            "2": 1116598272.0,
+            "3": 1116596224.0,
+            "4": 1116597760.0,
+            "5": 1116595712.0,
+            "6": 1116594688.0,
+            "7": 1116595712.0,
+            "8": 1116595200.0,
+            "9": 1116597760.0,
+            "10": 1116596224.0,
+            "11": 1116597248.0,
+            "12": 1116596224.0,
+            "13": 1116600320.0,
+            "14": 1116594688.0,
+            "15": 1116597760.0,
+            "16": 1116594688.0,
+            "17": 1116595200.0,
+            "18": 1116598272.0,
+            "19": 1116594176.0,
+            "20": 1116595712.0,
+            "21": 1116594176.0,
+            "22": 1116595712.0,
+            "23": 1116596736.0,
+            "24": 1116598272.0,
+            "25": 1116595712.0,
+            "26": 1116598784.0,
+            "27": 1116596224.0,
+            "28": 1116597248.0,
+            "29": 1116598272.0,
+            "30": 1116594688.0,
+            "31": 1116601344.0,
+            "32": 1116597760.0,
+            "33": 1116595712.0,
+            "34": 1116596224.0,
+            "35": 1116598784.0,
+            "36": 1116594176.0,
+            "37": 1116595712.0,
+            "38": 1116596736.0,
+            "39": 1116595200.0,
+            "40": 1116597760.0,
+            "41": 1116598784.0,
+            "42": 1116598784.0,
+            "43": 1116599296.0,
+            "44": 1116598272.0,
+            "45": 1116596736.0,
+            "46": 1116597248.0,
+            "47": 1116597248.0,
+            "48": 1116594688.0,
+            "49": 1116592640.0,
+            "50": 1116598784.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1563067904.0,
+            "2": 2021656576.0,
+            "3": 2021656576.0,
+            "4": 2022763008.0,
+            "5": 2022763008.0,
+            "6": 2022763008.0,
+            "7": 2022763008.0,
+            "8": 2023145984.0,
+            "9": 2023145984.0,
+            "10": 2025749504.0,
+            "11": 2025749504.0,
+            "12": 2025749504.0,
+            "13": 2026550272.0,
+            "14": 2026550272.0,
+            "15": 2026550272.0,
+            "16": 2026550272.0,
+            "17": 2026550272.0,
+            "18": 2026550272.0,
+            "19": 2026550272.0,
+            "20": 2026550272.0,
+            "21": 2026550272.0,
+            "22": 2026550272.0,
+            "23": 2026550272.0,
+            "24": 2026550272.0,
+            "25": 2026550272.0,
+            "26": 2026550272.0,
+            "27": 2026550272.0,
+            "28": 2026550272.0,
+            "29": 2026550272.0,
+            "30": 2026550272.0,
+            "31": 2029278208.0,
+            "32": 2029278208.0,
+            "33": 2029278208.0,
+            "34": 2029278208.0,
+            "35": 2029278208.0,
+            "36": 2029278208.0,
+            "37": 2029278208.0,
+            "38": 2029278208.0,
+            "39": 2029278208.0,
+            "40": 2029278208.0,
+            "41": 2029278208.0,
+            "42": 2029278208.0,
+            "43": 2029278208.0,
+            "44": 2029278208.0,
+            "45": 2029278208.0,
+            "46": 2029278208.0,
+            "47": 2029278208.0,
+            "48": 2029278208.0,
+            "49": 2029278208.0,
+            "50": 2029278208.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 16.71534,
+            "2": 0.42823,
+            "3": 0.35479,
+            "4": 0.35129,
+            "5": 0.35492,
+            "6": 0.34734,
+            "7": 0.34252,
+            "8": 0.34249,
+            "9": 0.3404,
+            "10": 0.34249,
+            "11": 0.34006,
+            "12": 0.34343,
+            "13": 0.341,
+            "14": 0.33997,
+            "15": 0.34123,
+            "16": 0.34135,
+            "17": 0.34196,
+            "18": 0.34169,
+            "19": 0.34148,
+            "20": 0.34323,
+            "21": 0.34514,
+            "22": 0.34317,
+            "23": 0.34353,
+            "24": 0.341,
+            "25": 0.34149,
+            "26": 0.34555,
+            "27": 0.34102,
+            "28": 0.34068,
+            "29": 0.34243,
+            "30": 0.34248,
+            "31": 0.33982,
+            "32": 0.34184,
+            "33": 0.34279,
+            "34": 0.34274,
+            "35": 0.34238,
+            "36": 0.34027,
+            "37": 0.34377,
+            "38": 0.34332,
+            "39": 0.34223,
+            "40": 0.34254,
+            "41": 0.34097,
+            "42": 0.34043,
+            "43": 0.34447,
+            "44": 0.3405,
+            "45": 0.34009,
+            "46": 0.34121,
+            "47": 0.33815,
+            "48": 0.34039,
+            "49": 0.34174,
+            "50": 0.34062
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgxa100_dracooci.json b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgxa100_dracooci.json
new file mode 100644
index 00000000000..c6c228253e0
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgxa100_dracooci.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.82721,
+            "2": 10.84035,
+            "3": 10.82723,
+            "4": 10.81924,
+            "5": 10.85677,
+            "6": 10.87001,
+            "7": 10.85158,
+            "8": 10.84472,
+            "9": 10.85255,
+            "10": 10.79194,
+            "11": 10.86558,
+            "12": 10.87116,
+            "13": 10.87097,
+            "14": 10.87861,
+            "15": 10.82571,
+            "16": 10.81234,
+            "17": 10.77447,
+            "18": 10.81055,
+            "19": 10.79638,
+            "20": 10.72194,
+            "21": 10.69672,
+            "22": 10.55073,
+            "23": 10.70511,
+            "24": 10.59025,
+            "25": 10.54429,
+            "26": 10.60007,
+            "27": 10.62018,
+            "28": 10.57431,
+            "29": 10.58678,
+            "30": 10.35759,
+            "31": 10.122,
+            "32": 10.47002,
+            "33": 10.45695,
+            "34": 10.21597,
+            "35": 10.27122,
+            "36": 10.23573,
+            "37": 10.35257,
+            "38": 10.20582,
+            "39": 10.40083,
+            "40": 10.09682,
+            "41": 10.1389,
+            "42": 10.21834,
+            "43": 9.84408,
+            "44": 9.96196,
+            "45": 9.84128,
+            "46": 9.8194,
+            "47": 10.13893,
+            "48": 9.85148,
+            "49": 9.5354,
+            "50": 9.9245
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 4716.0,
+            "2": 4931.0,
+            "3": 4816.0,
+            "4": 4901.0,
+            "5": 5453.0,
+            "6": 5635.0,
+            "7": 5173.0,
+            "8": 4857.0,
+            "9": 5219.0,
+            "10": 4386.0,
+            "11": 5795.0,
+            "12": 5340.0,
+            "13": 5567.0,
+            "14": 5428.0,
+            "15": 5321.0,
+            "16": 5367.0,
+            "17": 5290.0,
+            "18": 5030.0,
+            "19": 5155.0,
+            "20": 4735.0,
+            "21": 5405.0,
+            "22": 4831.0,
+            "23": 5764.0,
+            "24": 5036.0,
+            "25": 4756.0,
+            "26": 5262.0,
+            "27": 5313.0,
+            "28": 5809.0,
+            "29": 5928.0,
+            "30": 5404.0,
+            "31": 4719.0,
+            "32": 5796.0,
+            "33": 6218.0,
+            "34": 5083.0,
+            "35": 5715.0,
+            "36": 5608.0,
+            "37": 6302.0,
+            "38": 6050.0,
+            "39": 6634.0,
+            "40": 5742.0,
+            "41": 5958.0,
+            "42": 6406.0,
+            "43": 5795.0,
+            "44": 5818.0,
+            "45": 5695.0,
+            "46": 5888.0,
+            "47": 6504.0,
+            "48": 6390.0,
+            "49": 6316.0,
+            "50": 6636.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1114775040.0,
+            "2": 1114774528.0,
+            "3": 1114772480.0,
+            "4": 1114774016.0,
+            "5": 1114770944.0,
+            "6": 1114771456.0,
+            "7": 1114771968.0,
+            "8": 1114770432.0,
+            "9": 1114774016.0,
+            "10": 1114772480.0,
+            "11": 1114772480.0,
+            "12": 1114774016.0,
+            "13": 1114776576.0,
+            "14": 1114770944.0,
+            "15": 1114774016.0,
+            "16": 1114774016.0,
+            "17": 1114770432.0,
+            "18": 1114774016.0,
+            "19": 1114770432.0,
+            "20": 1114771968.0,
+            "21": 1114771456.0,
+            "22": 1114771968.0,
+            "23": 1114772992.0,
+            "24": 1114774528.0,
+            "25": 1114770944.0,
+            "26": 1114774528.0,
+            "27": 1114772480.0,
+            "28": 1114773504.0,
+            "29": 1114774528.0,
+            "30": 1114770944.0,
+            "31": 1114777600.0,
+            "32": 1114773504.0,
+            "33": 1114771968.0,
+            "34": 1114772480.0,
+            "35": 1114775040.0,
+            "36": 1114771456.0,
+            "37": 1114771968.0,
+            "38": 1114772992.0,
+            "39": 1114770432.0,
+            "40": 1114774016.0,
+            "41": 1114775040.0,
+            "42": 1114775040.0,
+            "43": 1114775552.0,
+            "44": 1114774016.0,
+            "45": 1114772480.0,
+            "46": 1114774016.0,
+            "47": 1114772480.0,
+            "48": 1114770432.0,
+            "49": 1114768896.0,
+            "50": 1114775040.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1563110912.0,
+            "2": 2019832832.0,
+            "3": 2019832832.0,
+            "4": 2020832768.0,
+            "5": 2020832768.0,
+            "6": 2020832768.0,
+            "7": 2020832768.0,
+            "8": 2020832768.0,
+            "9": 2020832768.0,
+            "10": 2024514560.0,
+            "11": 2024514560.0,
+            "12": 2024514560.0,
+            "13": 2025236480.0,
+            "14": 2025236480.0,
+            "15": 2025236480.0,
+            "16": 2025236480.0,
+            "17": 2025236480.0,
+            "18": 2025236480.0,
+            "19": 2025236480.0,
+            "20": 2025236480.0,
+            "21": 2025236480.0,
+            "22": 2025236480.0,
+            "23": 2025236480.0,
+            "24": 2025236480.0,
+            "25": 2025236480.0,
+            "26": 2025236480.0,
+            "27": 2025236480.0,
+            "28": 2025236480.0,
+            "29": 2025236480.0,
+            "30": 2025236480.0,
+            "31": 2028140544.0,
+            "32": 2028140544.0,
+            "33": 2028140544.0,
+            "34": 2028140544.0,
+            "35": 2028140544.0,
+            "36": 2028140544.0,
+            "37": 2028140544.0,
+            "38": 2028140544.0,
+            "39": 2028140544.0,
+            "40": 2028140544.0,
+            "41": 2028140544.0,
+            "42": 2028140544.0,
+            "43": 2028140544.0,
+            "44": 2028140544.0,
+            "45": 2028140544.0,
+            "46": 2028140544.0,
+            "47": 2028140544.0,
+            "48": 2028140544.0,
+            "49": 2028140544.0,
+            "50": 2028140544.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 15.43783,
+            "2": 0.38321,
+            "3": 0.36811,
+            "4": 0.35154,
+            "5": 0.3506,
+            "6": 0.35246,
+            "7": 0.35049,
+            "8": 0.35172,
+            "9": 0.35056,
+            "10": 0.35222,
+            "11": 0.35146,
+            "12": 0.35099,
+            "13": 0.35097,
+            "14": 0.34999,
+            "15": 0.35178,
+            "16": 0.3507,
+            "17": 0.35085,
+            "18": 0.36269,
+            "19": 0.3628,
+            "20": 0.39629,
+            "21": 0.362,
+            "22": 0.34881,
+            "23": 0.34826,
+            "24": 0.34894,
+            "25": 0.34905,
+            "26": 0.34868,
+            "27": 0.34852,
+            "28": 0.35034,
+            "29": 0.3505,
+            "30": 0.34898,
+            "31": 0.34972,
+            "32": 0.34827,
+            "33": 0.34805,
+            "34": 0.34828,
+            "35": 0.3462,
+            "36": 0.34816,
+            "37": 0.34932,
+            "38": 0.3474,
+            "39": 0.34618,
+            "40": 0.34596,
+            "41": 0.34685,
+            "42": 0.34571,
+            "43": 0.34956,
+            "44": 0.34632,
+            "45": 0.34487,
+            "46": 0.34479,
+            "47": 0.34793,
+            "48": 0.34481,
+            "49": 0.34468,
+            "50": 0.34354
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..72d650fcb5a
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.7999,
+            "2": 10.80046,
+            "3": 10.80906,
+            "4": 10.78256,
+            "5": 10.82566,
+            "6": 10.83616,
+            "7": 10.81688,
+            "8": 10.81159,
+            "9": 10.81058,
+            "10": 10.77421,
+            "11": 10.8555,
+            "12": 10.82696,
+            "13": 10.85081,
+            "14": 10.85457,
+            "15": 10.78256,
+            "16": 10.77334,
+            "17": 10.75077,
+            "18": 10.78391,
+            "19": 10.75873,
+            "20": 10.70038,
+            "21": 10.67229,
+            "22": 10.51412,
+            "23": 10.68126,
+            "24": 10.57156,
+            "25": 10.51795,
+            "26": 10.57588,
+            "27": 10.59132,
+            "28": 10.55287,
+            "29": 10.57112,
+            "30": 10.36497,
+            "31": 10.10959,
+            "32": 10.45338,
+            "33": 10.43695,
+            "34": 10.20008,
+            "35": 10.25443,
+            "36": 10.23362,
+            "37": 10.35422,
+            "38": 10.20437,
+            "39": 10.39909,
+            "40": 10.10235,
+            "41": 10.12745,
+            "42": 10.21091,
+            "43": 9.83755,
+            "44": 9.96198,
+            "45": 9.8428,
+            "46": 9.80664,
+            "47": 10.14256,
+            "48": 9.86637,
+            "49": 9.53809,
+            "50": 9.92581
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 4832.0,
+            "2": 4993.0,
+            "3": 5015.0,
+            "4": 5101.0,
+            "5": 5493.0,
+            "6": 5733.0,
+            "7": 5202.0,
+            "8": 5069.0,
+            "9": 5607.0,
+            "10": 4607.0,
+            "11": 5837.0,
+            "12": 5394.0,
+            "13": 5775.0,
+            "14": 5823.0,
+            "15": 5240.0,
+            "16": 5310.0,
+            "17": 5304.0,
+            "18": 5229.0,
+            "19": 5439.0,
+            "20": 4899.0,
+            "21": 5406.0,
+            "22": 4858.0,
+            "23": 5868.0,
+            "24": 5135.0,
+            "25": 4824.0,
+            "26": 5375.0,
+            "27": 5395.0,
+            "28": 5877.0,
+            "29": 5992.0,
+            "30": 5324.0,
+            "31": 4919.0,
+            "32": 5852.0,
+            "33": 6135.0,
+            "34": 5147.0,
+            "35": 5560.0,
+            "36": 5414.0,
+            "37": 6415.0,
+            "38": 5968.0,
+            "39": 6734.0,
+            "40": 5818.0,
+            "41": 5767.0,
+            "42": 6510.0,
+            "43": 5734.0,
+            "44": 5802.0,
+            "45": 5717.0,
+            "46": 5997.0,
+            "47": 6519.0,
+            "48": 6573.0,
+            "49": 6525.0,
+            "50": 6552.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1145716736.0,
+            "2": 1145714688.0,
+            "3": 1145715200.0,
+            "4": 1145713152.0,
+            "5": 1146210816.0,
+            "6": 1146210304.0,
+            "7": 1145716736.0,
+            "8": 1146209280.0,
+            "9": 1145714688.0,
+            "10": 1146214912.0,
+            "11": 1146210816.0,
+            "12": 1145713664.0,
+            "13": 1145713152.0,
+            "14": 1146210304.0,
+            "15": 1145713152.0,
+            "16": 1145714688.0,
+            "17": 1145713664.0,
+            "18": 1146212352.0,
+            "19": 1145714176.0,
+            "20": 1145715200.0,
+            "21": 1146210304.0,
+            "22": 1145715712.0,
+            "23": 1145715200.0,
+            "24": 1145713152.0,
+            "25": 1145712128.0,
+            "26": 1145715200.0,
+            "27": 1145715200.0,
+            "28": 1145713664.0,
+            "29": 1145711616.0,
+            "30": 1145714688.0,
+            "31": 1146213376.0,
+            "32": 1145713152.0,
+            "33": 1145714688.0,
+            "34": 1146210304.0,
+            "35": 1146212864.0,
+            "36": 1145713664.0,
+            "37": 1145712640.0,
+            "38": 1146207744.0,
+            "39": 1145715200.0,
+            "40": 1146210816.0,
+            "41": 1145715712.0,
+            "42": 1146207744.0,
+            "43": 1146211328.0,
+            "44": 1145716736.0,
+            "45": 1146210304.0,
+            "46": 1146214400.0,
+            "47": 1145714688.0,
+            "48": 1145717248.0,
+            "49": 1146215936.0,
+            "50": 1145716224.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1593775104.0,
+            "2": 2051463168.0,
+            "3": 2052791808.0,
+            "4": 2052791808.0,
+            "5": 2052791808.0,
+            "6": 2053601792.0,
+            "7": 2054070272.0,
+            "8": 2054225408.0,
+            "9": 2056797696.0,
+            "10": 2057079296.0,
+            "11": 2057079296.0,
+            "12": 2057079296.0,
+            "13": 2057079296.0,
+            "14": 2057079296.0,
+            "15": 2057079296.0,
+            "16": 2057079296.0,
+            "17": 2057079296.0,
+            "18": 2057079296.0,
+            "19": 2057079296.0,
+            "20": 2057079296.0,
+            "21": 2057079296.0,
+            "22": 2057079296.0,
+            "23": 2057079296.0,
+            "24": 2057079296.0,
+            "25": 2057079296.0,
+            "26": 2057079296.0,
+            "27": 2057079296.0,
+            "28": 2057079296.0,
+            "29": 2057079296.0,
+            "30": 2057079296.0,
+            "31": 2057079296.0,
+            "32": 2057079296.0,
+            "33": 2057079296.0,
+            "34": 2057079296.0,
+            "35": 2057079296.0,
+            "36": 2057079296.0,
+            "37": 2057079296.0,
+            "38": 2057079296.0,
+            "39": 2057079296.0,
+            "40": 2057079296.0,
+            "41": 2057079296.0,
+            "42": 2057079296.0,
+            "43": 2057079296.0,
+            "44": 2057079296.0,
+            "45": 2057079296.0,
+            "46": 2057079296.0,
+            "47": 2057079296.0,
+            "48": 2057079296.0,
+            "49": 2057079296.0,
+            "50": 2057079296.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 19.78346,
+            "2": 0.3309,
+            "3": 0.26692,
+            "4": 0.30511,
+            "5": 0.25944,
+            "6": 0.25055,
+            "7": 0.26908,
+            "8": 0.24453,
+            "9": 0.23731,
+            "10": 0.24901,
+            "11": 0.23286,
+            "12": 0.22911,
+            "13": 0.2292,
+            "14": 0.23339,
+            "15": 0.24721,
+            "16": 0.24166,
+            "17": 0.22756,
+            "18": 0.2223,
+            "19": 0.22427,
+            "20": 0.23111,
+            "21": 0.23175,
+            "22": 0.2573,
+            "23": 0.24989,
+            "24": 0.23707,
+            "25": 0.23317,
+            "26": 0.23062,
+            "27": 0.22667,
+            "28": 0.24009,
+            "29": 0.22295,
+            "30": 0.22987,
+            "31": 0.25103,
+            "32": 0.24353,
+            "33": 0.22584,
+            "34": 0.23541,
+            "35": 0.23768,
+            "36": 0.22699,
+            "37": 0.22446,
+            "38": 0.24288,
+            "39": 0.22484,
+            "40": 0.2277,
+            "41": 0.23059,
+            "42": 0.22349,
+            "43": 0.23202,
+            "44": 0.23787,
+            "45": 0.24589,
+            "46": 0.27096,
+            "47": 0.23921,
+            "48": 0.24334,
+            "49": 0.24986,
+            "50": 0.24759
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..e4e01388a15
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.7999,
+            "2": 10.80046,
+            "3": 10.8089,
+            "4": 10.78245,
+            "5": 10.82504,
+            "6": 10.83657,
+            "7": 10.81628,
+            "8": 10.81184,
+            "9": 10.8108,
+            "10": 10.7742,
+            "11": 10.85482,
+            "12": 10.82663,
+            "13": 10.85131,
+            "14": 10.85461,
+            "15": 10.78253,
+            "16": 10.77375,
+            "17": 10.74989,
+            "18": 10.78346,
+            "19": 10.75877,
+            "20": 10.69982,
+            "21": 10.67287,
+            "22": 10.5142,
+            "23": 10.68053,
+            "24": 10.57164,
+            "25": 10.51814,
+            "26": 10.57591,
+            "27": 10.59136,
+            "28": 10.55398,
+            "29": 10.57104,
+            "30": 10.36425,
+            "31": 10.10945,
+            "32": 10.45329,
+            "33": 10.43693,
+            "34": 10.20011,
+            "35": 10.25443,
+            "36": 10.23318,
+            "37": 10.3536,
+            "38": 10.20421,
+            "39": 10.3993,
+            "40": 10.10241,
+            "41": 10.12765,
+            "42": 10.21115,
+            "43": 9.83746,
+            "44": 9.96186,
+            "45": 9.84266,
+            "46": 9.80686,
+            "47": 10.14266,
+            "48": 9.86672,
+            "49": 9.53822,
+            "50": 9.92595
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 4752.0,
+            "2": 5040.0,
+            "3": 5112.0,
+            "4": 5072.0,
+            "5": 5472.0,
+            "6": 5619.0,
+            "7": 5255.0,
+            "8": 5065.0,
+            "9": 5483.0,
+            "10": 4607.0,
+            "11": 5862.0,
+            "12": 5377.0,
+            "13": 5783.0,
+            "14": 5830.0,
+            "15": 5249.0,
+            "16": 5346.0,
+            "17": 5291.0,
+            "18": 5277.0,
+            "19": 5352.0,
+            "20": 4942.0,
+            "21": 5465.0,
+            "22": 4878.0,
+            "23": 5807.0,
+            "24": 5145.0,
+            "25": 4873.0,
+            "26": 5380.0,
+            "27": 5479.0,
+            "28": 5739.0,
+            "29": 5950.0,
+            "30": 5363.0,
+            "31": 4730.0,
+            "32": 5732.0,
+            "33": 5963.0,
+            "34": 5261.0,
+            "35": 5660.0,
+            "36": 5422.0,
+            "37": 6362.0,
+            "38": 6114.0,
+            "39": 6803.0,
+            "40": 5731.0,
+            "41": 5808.0,
+            "42": 6485.0,
+            "43": 5742.0,
+            "44": 5843.0,
+            "45": 5876.0,
+            "46": 6024.0,
+            "47": 6554.0,
+            "48": 6354.0,
+            "49": 6497.0,
+            "50": 6526.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1144115200.0,
+            "2": 1144113152.0,
+            "3": 1144113664.0,
+            "4": 1144112640.0,
+            "5": 1144113664.0,
+            "6": 1144113152.0,
+            "7": 1144115200.0,
+            "8": 1144112640.0,
+            "9": 1144113152.0,
+            "10": 1144118272.0,
+            "11": 1144112640.0,
+            "12": 1144112128.0,
+            "13": 1144110592.0,
+            "14": 1144112640.0,
+            "15": 1144111616.0,
+            "16": 1144112640.0,
+            "17": 1144112128.0,
+            "18": 1144113152.0,
+            "19": 1144112640.0,
+            "20": 1144113664.0,
+            "21": 1144113152.0,
+            "22": 1144114176.0,
+            "23": 1144113664.0,
+            "24": 1144111616.0,
+            "25": 1144110592.0,
+            "26": 1144113664.0,
+            "27": 1144113664.0,
+            "28": 1144112128.0,
+            "29": 1144110080.0,
+            "30": 1144113152.0,
+            "31": 1144116224.0,
+            "32": 1144112128.0,
+            "33": 1144113152.0,
+            "34": 1144113664.0,
+            "35": 1144115712.0,
+            "36": 1144111616.0,
+            "37": 1144111104.0,
+            "38": 1144110592.0,
+            "39": 1144113664.0,
+            "40": 1144113664.0,
+            "41": 1144114176.0,
+            "42": 1144109056.0,
+            "43": 1144114176.0,
+            "44": 1144115200.0,
+            "45": 1144113152.0,
+            "46": 1144117760.0,
+            "47": 1144113152.0,
+            "48": 1144115712.0,
+            "49": 1144117760.0,
+            "50": 1144114176.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1593775104.0,
+            "2": 2049587200.0,
+            "3": 2050487808.0,
+            "4": 2050487808.0,
+            "5": 2050487808.0,
+            "6": 2051877376.0,
+            "7": 2052037632.0,
+            "8": 2052037632.0,
+            "9": 2053219840.0,
+            "10": 2055123968.0,
+            "11": 2055123968.0,
+            "12": 2055123968.0,
+            "13": 2055123968.0,
+            "14": 2055123968.0,
+            "15": 2055123968.0,
+            "16": 2055123968.0,
+            "17": 2055123968.0,
+            "18": 2055123968.0,
+            "19": 2055123968.0,
+            "20": 2055123968.0,
+            "21": 2055123968.0,
+            "22": 2055123968.0,
+            "23": 2055123968.0,
+            "24": 2055123968.0,
+            "25": 2055123968.0,
+            "26": 2055123968.0,
+            "27": 2055123968.0,
+            "28": 2055123968.0,
+            "29": 2055123968.0,
+            "30": 2055123968.0,
+            "31": 2055123968.0,
+            "32": 2055123968.0,
+            "33": 2055123968.0,
+            "34": 2055123968.0,
+            "35": 2055123968.0,
+            "36": 2055123968.0,
+            "37": 2055123968.0,
+            "38": 2055123968.0,
+            "39": 2055123968.0,
+            "40": 2055123968.0,
+            "41": 2055123968.0,
+            "42": 2055123968.0,
+            "43": 2055123968.0,
+            "44": 2055123968.0,
+            "45": 2055123968.0,
+            "46": 2055123968.0,
+            "47": 2055123968.0,
+            "48": 2055123968.0,
+            "49": 2055123968.0,
+            "50": 2055123968.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 17.54696,
+            "2": 0.35381,
+            "3": 0.30805,
+            "4": 0.32999,
+            "5": 0.28074,
+            "6": 0.27713,
+            "7": 0.30692,
+            "8": 0.27076,
+            "9": 0.28178,
+            "10": 0.28798,
+            "11": 0.26657,
+            "12": 0.27288,
+            "13": 0.27118,
+            "14": 0.26505,
+            "15": 0.27307,
+            "16": 0.26745,
+            "17": 0.28092,
+            "18": 0.25951,
+            "19": 0.26123,
+            "20": 0.27117,
+            "21": 0.26705,
+            "22": 0.27657,
+            "23": 0.2785,
+            "24": 0.27138,
+            "25": 0.27542,
+            "26": 0.26549,
+            "27": 0.26436,
+            "28": 0.2817,
+            "29": 0.26002,
+            "30": 0.26437,
+            "31": 0.29073,
+            "32": 0.27239,
+            "33": 0.26215,
+            "34": 0.2748,
+            "35": 0.2623,
+            "36": 0.25929,
+            "37": 0.26086,
+            "38": 0.26996,
+            "39": 0.25721,
+            "40": 0.25938,
+            "41": 0.26959,
+            "42": 0.25657,
+            "43": 0.26426,
+            "44": 0.25689,
+            "45": 0.26206,
+            "46": 0.27753,
+            "47": 0.27998,
+            "48": 0.26838,
+            "49": 0.27354,
+            "50": 0.26097
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_lts_dgxa100_dracooci-ord.json
new file mode 100644
index 00000000000..e3d20b7e9f0
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_lts_dgxa100_dracooci-ord.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.8277,
+            "2": 10.84068,
+            "3": 10.82725,
+            "4": 10.81926,
+            "5": 10.85722,
+            "6": 10.86986,
+            "7": 10.85174,
+            "8": 10.84457,
+            "9": 10.85329,
+            "10": 10.79198,
+            "11": 10.86553,
+            "12": 10.87133,
+            "13": 10.87076,
+            "14": 10.87887,
+            "15": 10.82554,
+            "16": 10.81223,
+            "17": 10.77441,
+            "18": 10.81045,
+            "19": 10.79657,
+            "20": 10.72264,
+            "21": 10.69696,
+            "22": 10.55147,
+            "23": 10.7054,
+            "24": 10.59026,
+            "25": 10.54438,
+            "26": 10.60027,
+            "27": 10.61973,
+            "28": 10.5745,
+            "29": 10.58661,
+            "30": 10.35758,
+            "31": 10.12167,
+            "32": 10.46999,
+            "33": 10.45701,
+            "34": 10.21559,
+            "35": 10.27129,
+            "36": 10.23523,
+            "37": 10.35245,
+            "38": 10.20629,
+            "39": 10.40093,
+            "40": 10.09725,
+            "41": 10.13848,
+            "42": 10.21819,
+            "43": 9.84432,
+            "44": 9.9617,
+            "45": 9.84065,
+            "46": 9.8197,
+            "47": 10.13911,
+            "48": 9.85183,
+            "49": 9.53564,
+            "50": 9.92448
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 4680.0,
+            "2": 4942.0,
+            "3": 4820.0,
+            "4": 4878.0,
+            "5": 5470.0,
+            "6": 5474.0,
+            "7": 5224.0,
+            "8": 4738.0,
+            "9": 5223.0,
+            "10": 4223.0,
+            "11": 5625.0,
+            "12": 5287.0,
+            "13": 5621.0,
+            "14": 5408.0,
+            "15": 5262.0,
+            "16": 5461.0,
+            "17": 5216.0,
+            "18": 5076.0,
+            "19": 5238.0,
+            "20": 4985.0,
+            "21": 5432.0,
+            "22": 4799.0,
+            "23": 5740.0,
+            "24": 5056.0,
+            "25": 4935.0,
+            "26": 5264.0,
+            "27": 5417.0,
+            "28": 5800.0,
+            "29": 5904.0,
+            "30": 5454.0,
+            "31": 4819.0,
+            "32": 5859.0,
+            "33": 6012.0,
+            "34": 5038.0,
+            "35": 5618.0,
+            "36": 5650.0,
+            "37": 6312.0,
+            "38": 6183.0,
+            "39": 6590.0,
+            "40": 5923.0,
+            "41": 5990.0,
+            "42": 6285.0,
+            "43": 5816.0,
+            "44": 5809.0,
+            "45": 5685.0,
+            "46": 5951.0,
+            "47": 6413.0,
+            "48": 6367.0,
+            "49": 6227.0,
+            "50": 6746.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1114761216.0,
+            "2": 1114759680.0,
+            "3": 1114756608.0,
+            "4": 1114760192.0,
+            "5": 1114757120.0,
+            "6": 1114757632.0,
+            "7": 1114759168.0,
+            "8": 1114757632.0,
+            "9": 1114759680.0,
+            "10": 1114759168.0,
+            "11": 1114759168.0,
+            "12": 1114758144.0,
+            "13": 1114763264.0,
+            "14": 1114757120.0,
+            "15": 1114760192.0,
+            "16": 1114758144.0,
+            "17": 1114757120.0,
+            "18": 1114760192.0,
+            "19": 1114758144.0,
+            "20": 1114758656.0,
+            "21": 1114757120.0,
+            "22": 1114758144.0,
+            "23": 1114758144.0,
+            "24": 1114760704.0,
+            "25": 1114758144.0,
+            "26": 1114761216.0,
+            "27": 1114758656.0,
+            "28": 1114759680.0,
+            "29": 1114760704.0,
+            "30": 1114757120.0,
+            "31": 1114763776.0,
+            "32": 1114758656.0,
+            "33": 1114757120.0,
+            "34": 1114758656.0,
+            "35": 1114761216.0,
+            "36": 1114756608.0,
+            "37": 1114758144.0,
+            "38": 1114760192.0,
+            "39": 1114757632.0,
+            "40": 1114759680.0,
+            "41": 1114760192.0,
+            "42": 1114761216.0,
+            "43": 1114760704.0,
+            "44": 1114760192.0,
+            "45": 1114758656.0,
+            "46": 1114760192.0,
+            "47": 1114759680.0,
+            "48": 1114757120.0,
+            "49": 1114755072.0,
+            "50": 1114760704.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1563573248.0,
+            "2": 2019811840.0,
+            "3": 2019811840.0,
+            "4": 2020643840.0,
+            "5": 2020643840.0,
+            "6": 2020643840.0,
+            "7": 2020643840.0,
+            "8": 2020643840.0,
+            "9": 2020643840.0,
+            "10": 2024514560.0,
+            "11": 2024514560.0,
+            "12": 2024514560.0,
+            "13": 2025665536.0,
+            "14": 2025665536.0,
+            "15": 2025665536.0,
+            "16": 2025665536.0,
+            "17": 2025665536.0,
+            "18": 2025665536.0,
+            "19": 2025665536.0,
+            "20": 2025665536.0,
+            "21": 2025665536.0,
+            "22": 2025665536.0,
+            "23": 2025665536.0,
+            "24": 2025665536.0,
+            "25": 2025665536.0,
+            "26": 2025665536.0,
+            "27": 2025665536.0,
+            "28": 2025665536.0,
+            "29": 2025665536.0,
+            "30": 2025665536.0,
+            "31": 2028067328.0,
+            "32": 2028067328.0,
+            "33": 2028067328.0,
+            "34": 2028067328.0,
+            "35": 2028067328.0,
+            "36": 2028067328.0,
+            "37": 2028067328.0,
+            "38": 2028067328.0,
+            "39": 2028067328.0,
+            "40": 2028067328.0,
+            "41": 2028067328.0,
+            "42": 2028067328.0,
+            "43": 2028067328.0,
+            "44": 2028067328.0,
+            "45": 2028067328.0,
+            "46": 2028067328.0,
+            "47": 2028067328.0,
+            "48": 2028067328.0,
+            "49": 2028067328.0,
+            "50": 2028067328.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 11.27277,
+            "2": 0.40327,
+            "3": 0.34567,
+            "4": 0.33458,
+            "5": 0.33204,
+            "6": 0.33309,
+            "7": 0.33268,
+            "8": 0.33286,
+            "9": 0.33475,
+            "10": 0.3322,
+            "11": 0.33002,
+            "12": 0.33139,
+            "13": 0.32988,
+            "14": 0.32847,
+            "15": 0.329,
+            "16": 0.33243,
+            "17": 0.32814,
+            "18": 0.32942,
+            "19": 0.33246,
+            "20": 0.32858,
+            "21": 0.32917,
+            "22": 0.34065,
+            "23": 0.32906,
+            "24": 0.33021,
+            "25": 0.33765,
+            "26": 0.32931,
+            "27": 0.32935,
+            "28": 0.33465,
+            "29": 0.32924,
+            "30": 0.32887,
+            "31": 0.33235,
+            "32": 0.32882,
+            "33": 0.33484,
+            "34": 0.33959,
+            "35": 0.33548,
+            "36": 0.33621,
+            "37": 0.33811,
+            "38": 0.33082,
+            "39": 0.33203,
+            "40": 0.33659,
+            "41": 0.33085,
+            "42": 0.33009,
+            "43": 0.33311,
+            "44": 0.32891,
+            "45": 0.32947,
+            "46": 0.33546,
+            "47": 0.32941,
+            "48": 0.32968,
+            "49": 0.33644,
+            "50": 0.3272
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_lts_dgxa100_dracooci.json
new file mode 100644
index 00000000000..ba66ccd2c7b
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_lts_dgxa100_dracooci.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.8277,
+            "2": 10.84068,
+            "3": 10.82724,
+            "4": 10.81923,
+            "5": 10.85713,
+            "6": 10.87014,
+            "7": 10.85172,
+            "8": 10.84521,
+            "9": 10.85279,
+            "10": 10.79234,
+            "11": 10.86534,
+            "12": 10.87114,
+            "13": 10.87049,
+            "14": 10.87874,
+            "15": 10.82545,
+            "16": 10.81195,
+            "17": 10.77413,
+            "18": 10.81121,
+            "19": 10.79683,
+            "20": 10.72265,
+            "21": 10.69712,
+            "22": 10.55129,
+            "23": 10.70543,
+            "24": 10.58987,
+            "25": 10.54438,
+            "26": 10.60004,
+            "27": 10.62008,
+            "28": 10.57416,
+            "29": 10.58628,
+            "30": 10.35718,
+            "31": 10.12186,
+            "32": 10.47004,
+            "33": 10.457,
+            "34": 10.21604,
+            "35": 10.27123,
+            "36": 10.23567,
+            "37": 10.35221,
+            "38": 10.20618,
+            "39": 10.40139,
+            "40": 10.09681,
+            "41": 10.13873,
+            "42": 10.21803,
+            "43": 9.84419,
+            "44": 9.96192,
+            "45": 9.84135,
+            "46": 9.81933,
+            "47": 10.13938,
+            "48": 9.85137,
+            "49": 9.53548,
+            "50": 9.92432
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 4732.0,
+            "2": 4949.0,
+            "3": 4906.0,
+            "4": 4915.0,
+            "5": 5426.0,
+            "6": 5376.0,
+            "7": 5127.0,
+            "8": 4923.0,
+            "9": 5398.0,
+            "10": 4190.0,
+            "11": 5650.0,
+            "12": 5207.0,
+            "13": 5521.0,
+            "14": 5564.0,
+            "15": 5258.0,
+            "16": 5655.0,
+            "17": 5201.0,
+            "18": 5166.0,
+            "19": 5222.0,
+            "20": 4973.0,
+            "21": 5289.0,
+            "22": 4840.0,
+            "23": 5690.0,
+            "24": 4966.0,
+            "25": 4863.0,
+            "26": 5234.0,
+            "27": 5239.0,
+            "28": 5757.0,
+            "29": 5841.0,
+            "30": 5290.0,
+            "31": 4822.0,
+            "32": 5828.0,
+            "33": 6111.0,
+            "34": 5127.0,
+            "35": 5596.0,
+            "36": 5581.0,
+            "37": 6423.0,
+            "38": 6184.0,
+            "39": 6619.0,
+            "40": 5870.0,
+            "41": 6054.0,
+            "42": 6325.0,
+            "43": 5910.0,
+            "44": 5902.0,
+            "45": 5841.0,
+            "46": 6222.0,
+            "47": 6329.0,
+            "48": 6302.0,
+            "49": 6013.0,
+            "50": 6678.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1114761216.0,
+            "2": 1114759680.0,
+            "3": 1114757632.0,
+            "4": 1114760192.0,
+            "5": 1114757120.0,
+            "6": 1114757632.0,
+            "7": 1114759680.0,
+            "8": 1114758144.0,
+            "9": 1114760192.0,
+            "10": 1114758656.0,
+            "11": 1114759168.0,
+            "12": 1114759168.0,
+            "13": 1114762752.0,
+            "14": 1114757120.0,
+            "15": 1114760192.0,
+            "16": 1114758144.0,
+            "17": 1114757120.0,
+            "18": 1114760192.0,
+            "19": 1114756608.0,
+            "20": 1114759168.0,
+            "21": 1114756608.0,
+            "22": 1114758144.0,
+            "23": 1114758144.0,
+            "24": 1114760704.0,
+            "25": 1114757120.0,
+            "26": 1114761216.0,
+            "27": 1114758656.0,
+            "28": 1114759680.0,
+            "29": 1114760704.0,
+            "30": 1114757632.0,
+            "31": 1114763776.0,
+            "32": 1114760192.0,
+            "33": 1114758144.0,
+            "34": 1114758656.0,
+            "35": 1114761216.0,
+            "36": 1114756608.0,
+            "37": 1114758144.0,
+            "38": 1114760192.0,
+            "39": 1114757632.0,
+            "40": 1114759168.0,
+            "41": 1114760192.0,
+            "42": 1114760192.0,
+            "43": 1114761728.0,
+            "44": 1114760192.0,
+            "45": 1114759680.0,
+            "46": 1114760192.0,
+            "47": 1114759680.0,
+            "48": 1114757120.0,
+            "49": 1114755072.0,
+            "50": 1114761216.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1563573248.0,
+            "2": 2019811840.0,
+            "3": 2019811840.0,
+            "4": 2020516864.0,
+            "5": 2020516864.0,
+            "6": 2020516864.0,
+            "7": 2020516864.0,
+            "8": 2020516864.0,
+            "9": 2020516864.0,
+            "10": 2023621120.0,
+            "11": 2023621120.0,
+            "12": 2023621120.0,
+            "13": 2025571840.0,
+            "14": 2025571840.0,
+            "15": 2025571840.0,
+            "16": 2025571840.0,
+            "17": 2025571840.0,
+            "18": 2025571840.0,
+            "19": 2025571840.0,
+            "20": 2025571840.0,
+            "21": 2025571840.0,
+            "22": 2025571840.0,
+            "23": 2025571840.0,
+            "24": 2025571840.0,
+            "25": 2025571840.0,
+            "26": 2025571840.0,
+            "27": 2025571840.0,
+            "28": 2025571840.0,
+            "29": 2025571840.0,
+            "30": 2025571840.0,
+            "31": 2027690496.0,
+            "32": 2027690496.0,
+            "33": 2027690496.0,
+            "34": 2027690496.0,
+            "35": 2027690496.0,
+            "36": 2027690496.0,
+            "37": 2027690496.0,
+            "38": 2027690496.0,
+            "39": 2027690496.0,
+            "40": 2027690496.0,
+            "41": 2027690496.0,
+            "42": 2027690496.0,
+            "43": 2027690496.0,
+            "44": 2027690496.0,
+            "45": 2027690496.0,
+            "46": 2027690496.0,
+            "47": 2027690496.0,
+            "48": 2027690496.0,
+            "49": 2027690496.0,
+            "50": 2027690496.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 13.26761,
+            "2": 0.46509,
+            "3": 0.33784,
+            "4": 0.32867,
+            "5": 0.32614,
+            "6": 0.3325,
+            "7": 0.32603,
+            "8": 0.32762,
+            "9": 0.33105,
+            "10": 0.3264,
+            "11": 0.32497,
+            "12": 0.33102,
+            "13": 0.32607,
+            "14": 0.32484,
+            "15": 0.32523,
+            "16": 0.33277,
+            "17": 0.33128,
+            "18": 0.32838,
+            "19": 0.32883,
+            "20": 0.32857,
+            "21": 0.32833,
+            "22": 0.32958,
+            "23": 0.32767,
+            "24": 0.32771,
+            "25": 0.32857,
+            "26": 0.32941,
+            "27": 0.33631,
+            "28": 0.3369,
+            "29": 0.32694,
+            "30": 0.32566,
+            "31": 0.32837,
+            "32": 0.32456,
+            "33": 0.32475,
+            "34": 0.33037,
+            "35": 0.32967,
+            "36": 0.33178,
+            "37": 0.32753,
+            "38": 0.324,
+            "39": 0.32398,
+            "40": 0.32822,
+            "41": 0.32419,
+            "42": 0.33155,
+            "43": 0.33488,
+            "44": 0.32987,
+            "45": 0.32872,
+            "46": 0.33575,
+            "47": 0.32897,
+            "48": 0.32935,
+            "49": 0.33172,
+            "50": 0.32626
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgx_h100.json
index 0366fd2c402..d74ca1632d3 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgx_h100.json
@@ -2,91 +2,286 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 10.7999,
-            "5": 10.82494,
-            "10": 10.77362,
-            "15": 10.78226,
-            "20": 10.69951,
-            "25": 10.51731,
+            "2": 10.80046,
+            "3": 10.8086,
+            "4": 10.78211,
+            "5": 10.8253,
+            "6": 10.83613,
+            "7": 10.81656,
+            "8": 10.81172,
+            "9": 10.81127,
+            "10": 10.77365,
+            "11": 10.8551,
+            "12": 10.82716,
+            "13": 10.85093,
+            "14": 10.85516,
+            "15": 10.78294,
+            "16": 10.7735,
+            "17": 10.75018,
+            "18": 10.78378,
+            "19": 10.75892,
+            "20": 10.6994,
+            "21": 10.67278,
+            "22": 10.51458,
+            "23": 10.68081,
+            "24": 10.57159,
+            "25": 10.51778,
+            "26": 10.57633,
+            "27": 10.59163,
+            "28": 10.55359,
+            "29": 10.57084,
             "30": 10.3646,
-            "35": 10.25444,
-            "40": 10.10206,
-            "45": 9.84247,
-            "50": 9.92579
+            "31": 10.1091,
+            "32": 10.45327,
+            "33": 10.43719,
+            "34": 10.20028,
+            "35": 10.25449,
+            "36": 10.23294,
+            "37": 10.35395,
+            "38": 10.20435,
+            "39": 10.3991,
+            "40": 10.10257,
+            "41": 10.12803,
+            "42": 10.21095,
+            "43": 9.83714,
+            "44": 9.96175,
+            "45": 9.84268,
+            "46": 9.80685,
+            "47": 10.14284,
+            "48": 9.86671,
+            "49": 9.53845,
+            "50": 9.92551
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 4776.0,
-            "5": 5514.0,
-            "10": 4403.0,
-            "15": 5180.0,
-            "20": 4969.0,
-            "25": 5011.0,
-            "30": 5227.0,
-            "35": 5579.0,
-            "40": 5764.0,
-            "45": 5881.0,
-            "50": 6673.0
+            "1": 4814.0,
+            "2": 4952.0,
+            "3": 5040.0,
+            "4": 5015.0,
+            "5": 5519.0,
+            "6": 5551.0,
+            "7": 5268.0,
+            "8": 4810.0,
+            "9": 5397.0,
+            "10": 4501.0,
+            "11": 5891.0,
+            "12": 5339.0,
+            "13": 5837.0,
+            "14": 5809.0,
+            "15": 5355.0,
+            "16": 5453.0,
+            "17": 5423.0,
+            "18": 5110.0,
+            "19": 5401.0,
+            "20": 4905.0,
+            "21": 5349.0,
+            "22": 4914.0,
+            "23": 5700.0,
+            "24": 5043.0,
+            "25": 4863.0,
+            "26": 5343.0,
+            "27": 5411.0,
+            "28": 5792.0,
+            "29": 6026.0,
+            "30": 5282.0,
+            "31": 4823.0,
+            "32": 5676.0,
+            "33": 6043.0,
+            "34": 5245.0,
+            "35": 5629.0,
+            "36": 5372.0,
+            "37": 6399.0,
+            "38": 5915.0,
+            "39": 6572.0,
+            "40": 5759.0,
+            "41": 5969.0,
+            "42": 6425.0,
+            "43": 5757.0,
+            "44": 5808.0,
+            "45": 5780.0,
+            "46": 6040.0,
+            "47": 6533.0,
+            "48": 6375.0,
+            "49": 6343.0,
+            "50": 6648.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 1145716736.0,
-            "5": 1146210304.0,
-            "10": 1146214400.0,
-            "15": 1145712640.0,
+            "2": 1145714688.0,
+            "3": 1146211840.0,
+            "4": 1145713152.0,
+            "5": 1146210816.0,
+            "6": 1145713664.0,
+            "7": 1145717248.0,
+            "8": 1145713664.0,
+            "9": 1145714688.0,
+            "10": 1146214912.0,
+            "11": 1145714176.0,
+            "12": 1145714176.0,
+            "13": 1146208768.0,
+            "14": 1146209280.0,
+            "15": 1145713152.0,
+            "16": 1146210304.0,
+            "17": 1145713664.0,
+            "18": 1146209280.0,
+            "19": 1145714176.0,
             "20": 1145715200.0,
+            "21": 1146210304.0,
+            "22": 1145715712.0,
+            "23": 1145715200.0,
+            "24": 1145713152.0,
             "25": 1145712128.0,
+            "26": 1145715200.0,
+            "27": 1145715200.0,
+            "28": 1145713664.0,
+            "29": 1145711616.0,
             "30": 1145714688.0,
-            "35": 1146213376.0,
+            "31": 1145717760.0,
+            "32": 1145713664.0,
+            "33": 1145714688.0,
+            "34": 1145715200.0,
+            "35": 1146212352.0,
+            "36": 1145713152.0,
+            "37": 1145712128.0,
+            "38": 1146208256.0,
+            "39": 1145715200.0,
             "40": 1146210816.0,
-            "45": 1146210304.0,
-            "50": 1146211328.0
+            "41": 1145715712.0,
+            "42": 1145712640.0,
+            "43": 1146211840.0,
+            "44": 1145716736.0,
+            "45": 1146209280.0,
+            "46": 1146214400.0,
+            "47": 1145714688.0,
+            "48": 1145717760.0,
+            "49": 1146215424.0,
+            "50": 1145716224.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 1593766912.0,
-            "5": 2052878848.0,
-            "10": 2057082880.0,
-            "15": 2057082880.0,
-            "20": 2057082880.0,
-            "25": 2057082880.0,
-            "30": 2057082880.0,
-            "35": 2057082880.0,
-            "40": 2057082880.0,
-            "45": 2057082880.0,
-            "50": 2057082880.0
+            "1": 1593775104.0,
+            "2": 2051463168.0,
+            "3": 2052884992.0,
+            "4": 2052884992.0,
+            "5": 2052884992.0,
+            "6": 2053490176.0,
+            "7": 2054021632.0,
+            "8": 2054517248.0,
+            "9": 2057131520.0,
+            "10": 2057131520.0,
+            "11": 2057131520.0,
+            "12": 2057131520.0,
+            "13": 2057131520.0,
+            "14": 2057131520.0,
+            "15": 2057131520.0,
+            "16": 2057131520.0,
+            "17": 2057131520.0,
+            "18": 2057131520.0,
+            "19": 2057131520.0,
+            "20": 2057131520.0,
+            "21": 2057131520.0,
+            "22": 2057131520.0,
+            "23": 2057131520.0,
+            "24": 2057131520.0,
+            "25": 2057131520.0,
+            "26": 2057131520.0,
+            "27": 2057131520.0,
+            "28": 2057131520.0,
+            "29": 2057131520.0,
+            "30": 2057131520.0,
+            "31": 2057131520.0,
+            "32": 2057131520.0,
+            "33": 2057131520.0,
+            "34": 2057131520.0,
+            "35": 2057131520.0,
+            "36": 2057131520.0,
+            "37": 2057131520.0,
+            "38": 2057131520.0,
+            "39": 2057131520.0,
+            "40": 2057131520.0,
+            "41": 2057131520.0,
+            "42": 2057131520.0,
+            "43": 2057131520.0,
+            "44": 2057131520.0,
+            "45": 2057131520.0,
+            "46": 2057131520.0,
+            "47": 2057131520.0,
+            "48": 2057131520.0,
+            "49": 2057131520.0,
+            "50": 2057131520.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 16.10299,
-            "5": 0.23726,
-            "10": 0.2493,
-            "15": 0.24042,
-            "20": 0.23243,
-            "25": 0.23678,
-            "30": 0.22651,
-            "35": 0.24325,
-            "40": 0.23894,
-            "45": 0.23878,
-            "50": 0.24489
+            "1": 17.92077,
+            "2": 0.34824,
+            "3": 0.30032,
+            "4": 0.32972,
+            "5": 0.27324,
+            "6": 0.26945,
+            "7": 0.29877,
+            "8": 0.27354,
+            "9": 0.26617,
+            "10": 0.28282,
+            "11": 0.26525,
+            "12": 0.2586,
+            "13": 0.27078,
+            "14": 0.25807,
+            "15": 0.27244,
+            "16": 0.26017,
+            "17": 0.27564,
+            "18": 0.26003,
+            "19": 0.25894,
+            "20": 0.26689,
+            "21": 0.26403,
+            "22": 0.26923,
+            "23": 0.27423,
+            "24": 0.25699,
+            "25": 0.26351,
+            "26": 0.26238,
+            "27": 0.26331,
+            "28": 0.27004,
+            "29": 0.2532,
+            "30": 0.2563,
+            "31": 0.27893,
+            "32": 0.27696,
+            "33": 0.25765,
+            "34": 0.27112,
+            "35": 0.26525,
+            "36": 0.25555,
+            "37": 0.25575,
+            "38": 0.26372,
+            "39": 0.25643,
+            "40": 0.25561,
+            "41": 0.26327,
+            "42": 0.25857,
+            "43": 0.26139,
+            "44": 0.26205,
+            "45": 0.25417,
+            "46": 0.28594,
+            "47": 0.27128,
+            "48": 0.2658,
+            "49": 0.27152,
+            "50": 0.26917
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgxa100_dracooci-ord.json
new file mode 100644
index 00000000000..d48956be89e
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgxa100_dracooci-ord.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.82721,
+            "2": 10.84035,
+            "3": 10.82731,
+            "4": 10.8193,
+            "5": 10.85656,
+            "6": 10.86991,
+            "7": 10.85176,
+            "8": 10.84458,
+            "9": 10.85252,
+            "10": 10.79217,
+            "11": 10.86529,
+            "12": 10.87083,
+            "13": 10.87071,
+            "14": 10.87878,
+            "15": 10.8256,
+            "16": 10.81248,
+            "17": 10.77483,
+            "18": 10.81066,
+            "19": 10.79672,
+            "20": 10.72242,
+            "21": 10.69688,
+            "22": 10.55103,
+            "23": 10.70528,
+            "24": 10.58973,
+            "25": 10.54425,
+            "26": 10.60032,
+            "27": 10.61999,
+            "28": 10.57405,
+            "29": 10.58627,
+            "30": 10.35725,
+            "31": 10.12171,
+            "32": 10.46994,
+            "33": 10.45695,
+            "34": 10.21593,
+            "35": 10.27139,
+            "36": 10.23585,
+            "37": 10.35223,
+            "38": 10.2059,
+            "39": 10.40125,
+            "40": 10.09684,
+            "41": 10.13886,
+            "42": 10.21812,
+            "43": 9.844,
+            "44": 9.96181,
+            "45": 9.84089,
+            "46": 9.81931,
+            "47": 10.13885,
+            "48": 9.85137,
+            "49": 9.53541,
+            "50": 9.92461
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 4670.0,
+            "2": 4925.0,
+            "3": 4817.0,
+            "4": 4835.0,
+            "5": 5226.0,
+            "6": 5495.0,
+            "7": 5198.0,
+            "8": 4891.0,
+            "9": 5214.0,
+            "10": 4166.0,
+            "11": 5633.0,
+            "12": 5315.0,
+            "13": 5554.0,
+            "14": 5559.0,
+            "15": 5192.0,
+            "16": 5394.0,
+            "17": 5248.0,
+            "18": 5006.0,
+            "19": 5237.0,
+            "20": 4719.0,
+            "21": 5259.0,
+            "22": 4964.0,
+            "23": 5678.0,
+            "24": 4965.0,
+            "25": 4888.0,
+            "26": 5299.0,
+            "27": 5130.0,
+            "28": 5735.0,
+            "29": 5988.0,
+            "30": 5407.0,
+            "31": 4663.0,
+            "32": 5678.0,
+            "33": 6177.0,
+            "34": 5149.0,
+            "35": 5654.0,
+            "36": 5646.0,
+            "37": 6416.0,
+            "38": 6119.0,
+            "39": 6544.0,
+            "40": 5933.0,
+            "41": 5933.0,
+            "42": 6358.0,
+            "43": 5750.0,
+            "44": 5789.0,
+            "45": 5877.0,
+            "46": 6198.0,
+            "47": 6488.0,
+            "48": 6231.0,
+            "49": 6062.0,
+            "50": 6752.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1114775040.0,
+            "2": 1114774528.0,
+            "3": 1114772480.0,
+            "4": 1114774016.0,
+            "5": 1114770944.0,
+            "6": 1114771456.0,
+            "7": 1114772480.0,
+            "8": 1114771968.0,
+            "9": 1114774016.0,
+            "10": 1114771968.0,
+            "11": 1114773504.0,
+            "12": 1114772480.0,
+            "13": 1114776064.0,
+            "14": 1114770944.0,
+            "15": 1114774016.0,
+            "16": 1114771968.0,
+            "17": 1114770944.0,
+            "18": 1114774528.0,
+            "19": 1115379712.0,
+            "20": 1114772480.0,
+            "21": 1114772480.0,
+            "22": 1114771968.0,
+            "23": 1114771968.0,
+            "24": 1114775552.0,
+            "25": 1114771968.0,
+            "26": 1114774528.0,
+            "27": 1114772480.0,
+            "28": 1114773504.0,
+            "29": 1114774528.0,
+            "30": 1114770944.0,
+            "31": 1114777600.0,
+            "32": 1114773504.0,
+            "33": 1114770944.0,
+            "34": 1114772480.0,
+            "35": 1114775040.0,
+            "36": 1114770944.0,
+            "37": 1114771968.0,
+            "38": 1114772992.0,
+            "39": 1114771456.0,
+            "40": 1114774016.0,
+            "41": 1114774016.0,
+            "42": 1114775040.0,
+            "43": 1114775552.0,
+            "44": 1114774016.0,
+            "45": 1114772480.0,
+            "46": 1114774528.0,
+            "47": 1114772480.0,
+            "48": 1114770944.0,
+            "49": 1114768896.0,
+            "50": 1114774528.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1563110912.0,
+            "2": 2019832832.0,
+            "3": 2019832832.0,
+            "4": 2020812288.0,
+            "5": 2020812288.0,
+            "6": 2020812288.0,
+            "7": 2020812288.0,
+            "8": 2020812288.0,
+            "9": 2020812288.0,
+            "10": 2024751616.0,
+            "11": 2024751616.0,
+            "12": 2024751616.0,
+            "13": 2026121728.0,
+            "14": 2026121728.0,
+            "15": 2026121728.0,
+            "16": 2026121728.0,
+            "17": 2026121728.0,
+            "18": 2026121728.0,
+            "19": 2026121728.0,
+            "20": 2026121728.0,
+            "21": 2026121728.0,
+            "22": 2026121728.0,
+            "23": 2026121728.0,
+            "24": 2026121728.0,
+            "25": 2026121728.0,
+            "26": 2026121728.0,
+            "27": 2026121728.0,
+            "28": 2026121728.0,
+            "29": 2026121728.0,
+            "30": 2026121728.0,
+            "31": 2028742656.0,
+            "32": 2028742656.0,
+            "33": 2028742656.0,
+            "34": 2028742656.0,
+            "35": 2028742656.0,
+            "36": 2028742656.0,
+            "37": 2028742656.0,
+            "38": 2028742656.0,
+            "39": 2028742656.0,
+            "40": 2028742656.0,
+            "41": 2028742656.0,
+            "42": 2028742656.0,
+            "43": 2028742656.0,
+            "44": 2028742656.0,
+            "45": 2028742656.0,
+            "46": 2028742656.0,
+            "47": 2028742656.0,
+            "48": 2028742656.0,
+            "49": 2028742656.0,
+            "50": 2028742656.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 12.92068,
+            "2": 0.40425,
+            "3": 0.34949,
+            "4": 0.34585,
+            "5": 0.34357,
+            "6": 0.34307,
+            "7": 0.34349,
+            "8": 0.34363,
+            "9": 0.34455,
+            "10": 0.34336,
+            "11": 0.34249,
+            "12": 0.34279,
+            "13": 0.34314,
+            "14": 0.34376,
+            "15": 0.34119,
+            "16": 0.3408,
+            "17": 0.34177,
+            "18": 0.34009,
+            "19": 0.38762,
+            "20": 0.38864,
+            "21": 0.35834,
+            "22": 0.34233,
+            "23": 0.34258,
+            "24": 0.33896,
+            "25": 0.34661,
+            "26": 0.35239,
+            "27": 0.36394,
+            "28": 0.36314,
+            "29": 0.36104,
+            "30": 0.36054,
+            "31": 0.36036,
+            "32": 0.36349,
+            "33": 0.35945,
+            "34": 0.36271,
+            "35": 0.35678,
+            "36": 0.34046,
+            "37": 0.34187,
+            "38": 0.35806,
+            "39": 0.39525,
+            "40": 0.3435,
+            "41": 0.34593,
+            "42": 0.34164,
+            "43": 0.3405,
+            "44": 0.36624,
+            "45": 0.3662,
+            "46": 0.35554,
+            "47": 0.39304,
+            "48": 0.3749,
+            "49": 0.34201,
+            "50": 0.34231
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgxa100_dracooci.json b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgxa100_dracooci.json
new file mode 100644
index 00000000000..bf890527985
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgxa100_dracooci.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.82721,
+            "2": 10.84035,
+            "3": 10.82731,
+            "4": 10.81928,
+            "5": 10.85683,
+            "6": 10.8698,
+            "7": 10.85147,
+            "8": 10.84484,
+            "9": 10.85252,
+            "10": 10.79142,
+            "11": 10.86555,
+            "12": 10.871,
+            "13": 10.87036,
+            "14": 10.87845,
+            "15": 10.82569,
+            "16": 10.81221,
+            "17": 10.7744,
+            "18": 10.81066,
+            "19": 10.79634,
+            "20": 10.7227,
+            "21": 10.6971,
+            "22": 10.55121,
+            "23": 10.70525,
+            "24": 10.59041,
+            "25": 10.54452,
+            "26": 10.60048,
+            "27": 10.62034,
+            "28": 10.57457,
+            "29": 10.58623,
+            "30": 10.35753,
+            "31": 10.12178,
+            "32": 10.46993,
+            "33": 10.45705,
+            "34": 10.21585,
+            "35": 10.27128,
+            "36": 10.23542,
+            "37": 10.35235,
+            "38": 10.20634,
+            "39": 10.40108,
+            "40": 10.09667,
+            "41": 10.1389,
+            "42": 10.21808,
+            "43": 9.8441,
+            "44": 9.96205,
+            "45": 9.84118,
+            "46": 9.81927,
+            "47": 10.13911,
+            "48": 9.85152,
+            "49": 9.53526,
+            "50": 9.92459
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 4746.0,
+            "2": 4973.0,
+            "3": 4892.0,
+            "4": 4874.0,
+            "5": 5506.0,
+            "6": 5432.0,
+            "7": 5176.0,
+            "8": 4842.0,
+            "9": 5339.0,
+            "10": 4379.0,
+            "11": 5515.0,
+            "12": 5341.0,
+            "13": 5380.0,
+            "14": 5634.0,
+            "15": 5225.0,
+            "16": 5387.0,
+            "17": 5339.0,
+            "18": 5069.0,
+            "19": 5247.0,
+            "20": 4850.0,
+            "21": 5323.0,
+            "22": 4896.0,
+            "23": 5748.0,
+            "24": 5014.0,
+            "25": 4847.0,
+            "26": 5322.0,
+            "27": 5362.0,
+            "28": 5664.0,
+            "29": 6074.0,
+            "30": 5529.0,
+            "31": 4774.0,
+            "32": 5603.0,
+            "33": 5954.0,
+            "34": 5052.0,
+            "35": 5715.0,
+            "36": 5575.0,
+            "37": 6245.0,
+            "38": 6130.0,
+            "39": 6515.0,
+            "40": 5938.0,
+            "41": 5907.0,
+            "42": 6316.0,
+            "43": 5659.0,
+            "44": 5930.0,
+            "45": 5838.0,
+            "46": 6112.0,
+            "47": 6528.0,
+            "48": 6294.0,
+            "49": 6282.0,
+            "50": 6606.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1114775040.0,
+            "2": 1114774528.0,
+            "3": 1114772480.0,
+            "4": 1114774016.0,
+            "5": 1114770944.0,
+            "6": 1114771456.0,
+            "7": 1114772992.0,
+            "8": 1114771456.0,
+            "9": 1114774016.0,
+            "10": 1114772992.0,
+            "11": 1114773504.0,
+            "12": 1114772992.0,
+            "13": 1114776576.0,
+            "14": 1114770944.0,
+            "15": 1114774016.0,
+            "16": 1114774016.0,
+            "17": 1114770432.0,
+            "18": 1114774528.0,
+            "19": 1114770432.0,
+            "20": 1114772480.0,
+            "21": 1114771456.0,
+            "22": 1114771968.0,
+            "23": 1114771968.0,
+            "24": 1114775040.0,
+            "25": 1114770944.0,
+            "26": 1114774528.0,
+            "27": 1114772992.0,
+            "28": 1114774016.0,
+            "29": 1114774528.0,
+            "30": 1114770944.0,
+            "31": 1114777600.0,
+            "32": 1114773504.0,
+            "33": 1114771968.0,
+            "34": 1114772480.0,
+            "35": 1114775040.0,
+            "36": 1114770432.0,
+            "37": 1114771968.0,
+            "38": 1114772992.0,
+            "39": 1114770432.0,
+            "40": 1114774016.0,
+            "41": 1114775040.0,
+            "42": 1114774016.0,
+            "43": 1114774528.0,
+            "44": 1114774016.0,
+            "45": 1114772480.0,
+            "46": 1114774528.0,
+            "47": 1114773504.0,
+            "48": 1114770432.0,
+            "49": 1114769920.0,
+            "50": 1114775040.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1563110912.0,
+            "2": 2019832832.0,
+            "3": 2019832832.0,
+            "4": 2020751872.0,
+            "5": 2020751872.0,
+            "6": 2020751872.0,
+            "7": 2020751872.0,
+            "8": 2020751872.0,
+            "9": 2020751872.0,
+            "10": 2024683008.0,
+            "11": 2024683008.0,
+            "12": 2024683008.0,
+            "13": 2025170944.0,
+            "14": 2025170944.0,
+            "15": 2025170944.0,
+            "16": 2025170944.0,
+            "17": 2025170944.0,
+            "18": 2025170944.0,
+            "19": 2025170944.0,
+            "20": 2025170944.0,
+            "21": 2025170944.0,
+            "22": 2025170944.0,
+            "23": 2025170944.0,
+            "24": 2025170944.0,
+            "25": 2025170944.0,
+            "26": 2025170944.0,
+            "27": 2025170944.0,
+            "28": 2025170944.0,
+            "29": 2025170944.0,
+            "30": 2025170944.0,
+            "31": 2027281408.0,
+            "32": 2027281408.0,
+            "33": 2027281408.0,
+            "34": 2027281408.0,
+            "35": 2027281408.0,
+            "36": 2027281408.0,
+            "37": 2027281408.0,
+            "38": 2027281408.0,
+            "39": 2027281408.0,
+            "40": 2027281408.0,
+            "41": 2027281408.0,
+            "42": 2027281408.0,
+            "43": 2027281408.0,
+            "44": 2027281408.0,
+            "45": 2027281408.0,
+            "46": 2027281408.0,
+            "47": 2027281408.0,
+            "48": 2027281408.0,
+            "49": 2027281408.0,
+            "50": 2027281408.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.92351,
+            "2": 0.44162,
+            "3": 0.35291,
+            "4": 0.35092,
+            "5": 0.34453,
+            "6": 0.34539,
+            "7": 0.34192,
+            "8": 0.34196,
+            "9": 0.3428,
+            "10": 0.34254,
+            "11": 0.34053,
+            "12": 0.34338,
+            "13": 0.34149,
+            "14": 0.34237,
+            "15": 0.34549,
+            "16": 0.36487,
+            "17": 0.34819,
+            "18": 0.34282,
+            "19": 0.34387,
+            "20": 0.34346,
+            "21": 0.34257,
+            "22": 0.34498,
+            "23": 0.3426,
+            "24": 0.34129,
+            "25": 0.34497,
+            "26": 0.34552,
+            "27": 0.34229,
+            "28": 0.34963,
+            "29": 0.34554,
+            "30": 0.34365,
+            "31": 0.34384,
+            "32": 0.34359,
+            "33": 0.34344,
+            "34": 0.34432,
+            "35": 0.34398,
+            "36": 0.344,
+            "37": 0.34452,
+            "38": 0.34594,
+            "39": 0.34391,
+            "40": 0.34438,
+            "41": 0.34366,
+            "42": 0.34258,
+            "43": 0.34401,
+            "44": 0.34425,
+            "45": 0.34371,
+            "46": 0.34314,
+            "47": 0.34264,
+            "48": 0.34318,
+            "49": 0.34322,
+            "50": 0.34204
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..0c2d8bc15ac
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.7999,
+            "2": 10.80046,
+            "3": 10.80882,
+            "4": 10.78271,
+            "5": 10.82527,
+            "6": 10.83559,
+            "7": 10.81654,
+            "8": 10.81189,
+            "9": 10.81027,
+            "10": 10.77395,
+            "11": 10.85546,
+            "12": 10.82687,
+            "13": 10.85063,
+            "14": 10.85519,
+            "15": 10.78219,
+            "16": 10.77344,
+            "17": 10.75025,
+            "18": 10.78337,
+            "19": 10.75865,
+            "20": 10.69949,
+            "21": 10.67201,
+            "22": 10.51454,
+            "23": 10.68053,
+            "24": 10.57151,
+            "25": 10.51842,
+            "26": 10.57602,
+            "27": 10.59131,
+            "28": 10.55338,
+            "29": 10.5705,
+            "30": 10.36499,
+            "31": 10.10913,
+            "32": 10.45347,
+            "33": 10.43732,
+            "34": 10.20004,
+            "35": 10.2548,
+            "36": 10.23345,
+            "37": 10.35402,
+            "38": 10.2041,
+            "39": 10.39978,
+            "40": 10.10252,
+            "41": 10.12783,
+            "42": 10.21103,
+            "43": 9.83757,
+            "44": 9.96217,
+            "45": 9.84252,
+            "46": 9.80674,
+            "47": 10.14274,
+            "48": 9.86654,
+            "49": 9.53815,
+            "50": 9.92567
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 4865.0,
+            "2": 4889.0,
+            "3": 5053.0,
+            "4": 5128.0,
+            "5": 5538.0,
+            "6": 5637.0,
+            "7": 5195.0,
+            "8": 4942.0,
+            "9": 5569.0,
+            "10": 4503.0,
+            "11": 6001.0,
+            "12": 5343.0,
+            "13": 5607.0,
+            "14": 5820.0,
+            "15": 5246.0,
+            "16": 5419.0,
+            "17": 5489.0,
+            "18": 5301.0,
+            "19": 5323.0,
+            "20": 4805.0,
+            "21": 5272.0,
+            "22": 4832.0,
+            "23": 5649.0,
+            "24": 5122.0,
+            "25": 4835.0,
+            "26": 5369.0,
+            "27": 5430.0,
+            "28": 5771.0,
+            "29": 6155.0,
+            "30": 5193.0,
+            "31": 4946.0,
+            "32": 5822.0,
+            "33": 6136.0,
+            "34": 5157.0,
+            "35": 5508.0,
+            "36": 5439.0,
+            "37": 6566.0,
+            "38": 6146.0,
+            "39": 6504.0,
+            "40": 5752.0,
+            "41": 5973.0,
+            "42": 6371.0,
+            "43": 5634.0,
+            "44": 5975.0,
+            "45": 5779.0,
+            "46": 5939.0,
+            "47": 6534.0,
+            "48": 6362.0,
+            "49": 6390.0,
+            "50": 6421.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1144115200.0,
+            "2": 1144113152.0,
+            "3": 1144113664.0,
+            "4": 1144112640.0,
+            "5": 1144113664.0,
+            "6": 1144113664.0,
+            "7": 1144115200.0,
+            "8": 1144112128.0,
+            "9": 1144113152.0,
+            "10": 1144117248.0,
+            "11": 1144112640.0,
+            "12": 1144112640.0,
+            "13": 1144110592.0,
+            "14": 1144113664.0,
+            "15": 1144111616.0,
+            "16": 1144113152.0,
+            "17": 1144112128.0,
+            "18": 1144114176.0,
+            "19": 1144112640.0,
+            "20": 1144113664.0,
+            "21": 1144113152.0,
+            "22": 1144113664.0,
+            "23": 1144114176.0,
+            "24": 1144111616.0,
+            "25": 1144110592.0,
+            "26": 1144114688.0,
+            "27": 1144113664.0,
+            "28": 1144112128.0,
+            "29": 1144109568.0,
+            "30": 1144113152.0,
+            "31": 1144116224.0,
+            "32": 1144112128.0,
+            "33": 1144113152.0,
+            "34": 1144113664.0,
+            "35": 1144115712.0,
+            "36": 1144112128.0,
+            "37": 1144110592.0,
+            "38": 1144110592.0,
+            "39": 1144113664.0,
+            "40": 1144113664.0,
+            "41": 1144114176.0,
+            "42": 1144111104.0,
+            "43": 1144114176.0,
+            "44": 1144116224.0,
+            "45": 1144112640.0,
+            "46": 1144116736.0,
+            "47": 1144113152.0,
+            "48": 1144116224.0,
+            "49": 1144117760.0,
+            "50": 1144114688.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1593766912.0,
+            "2": 2049587200.0,
+            "3": 2050706944.0,
+            "4": 2050706944.0,
+            "5": 2050706944.0,
+            "6": 2051856896.0,
+            "7": 2052133888.0,
+            "8": 2052133888.0,
+            "9": 2053136896.0,
+            "10": 2054898688.0,
+            "11": 2054898688.0,
+            "12": 2054898688.0,
+            "13": 2054898688.0,
+            "14": 2054898688.0,
+            "15": 2054898688.0,
+            "16": 2054898688.0,
+            "17": 2054898688.0,
+            "18": 2054898688.0,
+            "19": 2054898688.0,
+            "20": 2054898688.0,
+            "21": 2054898688.0,
+            "22": 2054898688.0,
+            "23": 2054898688.0,
+            "24": 2054898688.0,
+            "25": 2054898688.0,
+            "26": 2054898688.0,
+            "27": 2054898688.0,
+            "28": 2054898688.0,
+            "29": 2054898688.0,
+            "30": 2054898688.0,
+            "31": 2054898688.0,
+            "32": 2054898688.0,
+            "33": 2054898688.0,
+            "34": 2054898688.0,
+            "35": 2054898688.0,
+            "36": 2054898688.0,
+            "37": 2054898688.0,
+            "38": 2054898688.0,
+            "39": 2054898688.0,
+            "40": 2054898688.0,
+            "41": 2054898688.0,
+            "42": 2054898688.0,
+            "43": 2054898688.0,
+            "44": 2054898688.0,
+            "45": 2054898688.0,
+            "46": 2054898688.0,
+            "47": 2054898688.0,
+            "48": 2054898688.0,
+            "49": 2054898688.0,
+            "50": 2054898688.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 19.95177,
+            "2": 0.34433,
+            "3": 0.26792,
+            "4": 0.28931,
+            "5": 0.24286,
+            "6": 0.23522,
+            "7": 0.26191,
+            "8": 0.24179,
+            "9": 0.23443,
+            "10": 0.2479,
+            "11": 0.22843,
+            "12": 0.23568,
+            "13": 0.22851,
+            "14": 0.22301,
+            "15": 0.23496,
+            "16": 0.22557,
+            "17": 0.23185,
+            "18": 0.22478,
+            "19": 0.21988,
+            "20": 0.22721,
+            "21": 0.22747,
+            "22": 0.25032,
+            "23": 0.23584,
+            "24": 0.22392,
+            "25": 0.24076,
+            "26": 0.22602,
+            "27": 0.21942,
+            "28": 0.25471,
+            "29": 0.22059,
+            "30": 0.22483,
+            "31": 0.24893,
+            "32": 0.23382,
+            "33": 0.2228,
+            "34": 0.24334,
+            "35": 0.22325,
+            "36": 0.22492,
+            "37": 0.22009,
+            "38": 0.22761,
+            "39": 0.22117,
+            "40": 0.22618,
+            "41": 0.23324,
+            "42": 0.23137,
+            "43": 0.23,
+            "44": 0.23628,
+            "45": 0.22927,
+            "46": 0.24977,
+            "47": 0.23757,
+            "48": 0.24069,
+            "49": 0.254,
+            "50": 0.23443
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..d342471ff77
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.7999,
+            "2": 10.80046,
+            "3": 10.80877,
+            "4": 10.78226,
+            "5": 10.8254,
+            "6": 10.83596,
+            "7": 10.81676,
+            "8": 10.81163,
+            "9": 10.81106,
+            "10": 10.77366,
+            "11": 10.85495,
+            "12": 10.82711,
+            "13": 10.85109,
+            "14": 10.8546,
+            "15": 10.78267,
+            "16": 10.77358,
+            "17": 10.75036,
+            "18": 10.78319,
+            "19": 10.75876,
+            "20": 10.6992,
+            "21": 10.67244,
+            "22": 10.51382,
+            "23": 10.68112,
+            "24": 10.57174,
+            "25": 10.51756,
+            "26": 10.57624,
+            "27": 10.59185,
+            "28": 10.55401,
+            "29": 10.57113,
+            "30": 10.36465,
+            "31": 10.10866,
+            "32": 10.45338,
+            "33": 10.43764,
+            "34": 10.20033,
+            "35": 10.25433,
+            "36": 10.23362,
+            "37": 10.35369,
+            "38": 10.20443,
+            "39": 10.39917,
+            "40": 10.10245,
+            "41": 10.12765,
+            "42": 10.21106,
+            "43": 9.83722,
+            "44": 9.962,
+            "45": 9.84252,
+            "46": 9.80612,
+            "47": 10.14257,
+            "48": 9.86665,
+            "49": 9.5383,
+            "50": 9.92576
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 4827.0,
+            "2": 4935.0,
+            "3": 5030.0,
+            "4": 4956.0,
+            "5": 5583.0,
+            "6": 5594.0,
+            "7": 5325.0,
+            "8": 5098.0,
+            "9": 5335.0,
+            "10": 4581.0,
+            "11": 5895.0,
+            "12": 5249.0,
+            "13": 5692.0,
+            "14": 5736.0,
+            "15": 5303.0,
+            "16": 5347.0,
+            "17": 5361.0,
+            "18": 5322.0,
+            "19": 5407.0,
+            "20": 4961.0,
+            "21": 5441.0,
+            "22": 4776.0,
+            "23": 5752.0,
+            "24": 5157.0,
+            "25": 4897.0,
+            "26": 5202.0,
+            "27": 5455.0,
+            "28": 5769.0,
+            "29": 5911.0,
+            "30": 5256.0,
+            "31": 4674.0,
+            "32": 5854.0,
+            "33": 6080.0,
+            "34": 5278.0,
+            "35": 5743.0,
+            "36": 5523.0,
+            "37": 6477.0,
+            "38": 5839.0,
+            "39": 6711.0,
+            "40": 5852.0,
+            "41": 6062.0,
+            "42": 6501.0,
+            "43": 5605.0,
+            "44": 5883.0,
+            "45": 5763.0,
+            "46": 6076.0,
+            "47": 6613.0,
+            "48": 6348.0,
+            "49": 6430.0,
+            "50": 6699.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1145716736.0,
+            "2": 1145714688.0,
+            "3": 1145715200.0,
+            "4": 1145714176.0,
+            "5": 1146210816.0,
+            "6": 1146210304.0,
+            "7": 1145716736.0,
+            "8": 1146209792.0,
+            "9": 1145714688.0,
+            "10": 1146214912.0,
+            "11": 1145714176.0,
+            "12": 1145713664.0,
+            "13": 1145712128.0,
+            "14": 1146209280.0,
+            "15": 1145713152.0,
+            "16": 1146210304.0,
+            "17": 1145713664.0,
+            "18": 1146210304.0,
+            "19": 1145714176.0,
+            "20": 1145715200.0,
+            "21": 1146210304.0,
+            "22": 1145715712.0,
+            "23": 1145716224.0,
+            "24": 1145713152.0,
+            "25": 1145712128.0,
+            "26": 1145715200.0,
+            "27": 1146210304.0,
+            "28": 1145713664.0,
+            "29": 1145711104.0,
+            "30": 1145714688.0,
+            "31": 1146213376.0,
+            "32": 1145713152.0,
+            "33": 1145714688.0,
+            "34": 1145714688.0,
+            "35": 1146213376.0,
+            "36": 1145713664.0,
+            "37": 1145712128.0,
+            "38": 1146207744.0,
+            "39": 1145715200.0,
+            "40": 1146210816.0,
+            "41": 1145714688.0,
+            "42": 1145711104.0,
+            "43": 1146211840.0,
+            "44": 1145717248.0,
+            "45": 1145714688.0,
+            "46": 1146214400.0,
+            "47": 1145714688.0,
+            "48": 1145717248.0,
+            "49": 1146214912.0,
+            "50": 1145716224.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1593766912.0,
+            "2": 2051463168.0,
+            "3": 2052584960.0,
+            "4": 2052584960.0,
+            "5": 2052584960.0,
+            "6": 2053404160.0,
+            "7": 2054199296.0,
+            "8": 2054199296.0,
+            "9": 2056971776.0,
+            "10": 2057138688.0,
+            "11": 2057138688.0,
+            "12": 2057138688.0,
+            "13": 2057138688.0,
+            "14": 2057138688.0,
+            "15": 2057138688.0,
+            "16": 2057138688.0,
+            "17": 2057138688.0,
+            "18": 2057138688.0,
+            "19": 2057138688.0,
+            "20": 2057138688.0,
+            "21": 2057138688.0,
+            "22": 2057138688.0,
+            "23": 2057138688.0,
+            "24": 2057138688.0,
+            "25": 2057138688.0,
+            "26": 2057138688.0,
+            "27": 2057138688.0,
+            "28": 2057138688.0,
+            "29": 2057138688.0,
+            "30": 2057138688.0,
+            "31": 2057138688.0,
+            "32": 2057138688.0,
+            "33": 2057138688.0,
+            "34": 2057138688.0,
+            "35": 2057138688.0,
+            "36": 2057138688.0,
+            "37": 2057138688.0,
+            "38": 2057138688.0,
+            "39": 2057138688.0,
+            "40": 2057138688.0,
+            "41": 2057138688.0,
+            "42": 2057138688.0,
+            "43": 2057138688.0,
+            "44": 2057138688.0,
+            "45": 2057138688.0,
+            "46": 2057138688.0,
+            "47": 2057138688.0,
+            "48": 2057138688.0,
+            "49": 2057138688.0,
+            "50": 2057138688.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 17.99317,
+            "2": 0.35408,
+            "3": 0.30455,
+            "4": 0.32631,
+            "5": 0.27174,
+            "6": 0.27168,
+            "7": 0.29847,
+            "8": 0.27152,
+            "9": 0.27606,
+            "10": 0.27991,
+            "11": 0.25875,
+            "12": 0.25854,
+            "13": 0.26351,
+            "14": 0.2599,
+            "15": 0.26827,
+            "16": 0.25734,
+            "17": 0.26876,
+            "18": 0.26302,
+            "19": 0.25791,
+            "20": 0.26587,
+            "21": 0.26207,
+            "22": 0.2718,
+            "23": 0.27036,
+            "24": 0.2557,
+            "25": 0.27098,
+            "26": 0.2562,
+            "27": 0.25663,
+            "28": 0.28209,
+            "29": 0.25678,
+            "30": 0.26198,
+            "31": 0.27896,
+            "32": 0.26879,
+            "33": 0.25449,
+            "34": 0.27377,
+            "35": 0.25725,
+            "36": 0.25349,
+            "37": 0.2537,
+            "38": 0.26246,
+            "39": 0.25527,
+            "40": 0.25676,
+            "41": 0.26427,
+            "42": 0.25718,
+            "43": 0.26206,
+            "44": 0.25615,
+            "45": 0.261,
+            "46": 0.28413,
+            "47": 0.27633,
+            "48": 0.26455,
+            "49": 0.2706,
+            "50": 0.25944
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_lts_dgx_a100.json
index 99b3ed41c91..4383c914d8e 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_lts_dgx_a100.json
@@ -2,91 +2,286 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 10.8277,
-            "5": 10.85649,
-            "10": 10.79211,
-            "15": 10.82563,
-            "20": 10.72221,
-            "25": 10.54409,
-            "30": 10.35728,
-            "35": 10.2714,
-            "40": 10.09718,
-            "45": 9.8411,
-            "50": 9.92428
+            "2": 10.84068,
+            "3": 10.82705,
+            "4": 10.81913,
+            "5": 10.85673,
+            "6": 10.86984,
+            "7": 10.85119,
+            "8": 10.84465,
+            "9": 10.85269,
+            "10": 10.79157,
+            "11": 10.86571,
+            "12": 10.87169,
+            "13": 10.8708,
+            "14": 10.8787,
+            "15": 10.82554,
+            "16": 10.81251,
+            "17": 10.77478,
+            "18": 10.81068,
+            "19": 10.79632,
+            "20": 10.72175,
+            "21": 10.69765,
+            "22": 10.55138,
+            "23": 10.70555,
+            "24": 10.59005,
+            "25": 10.54425,
+            "26": 10.60036,
+            "27": 10.61973,
+            "28": 10.57442,
+            "29": 10.58656,
+            "30": 10.35754,
+            "31": 10.12169,
+            "32": 10.46987,
+            "33": 10.45722,
+            "34": 10.2158,
+            "35": 10.27086,
+            "36": 10.2354,
+            "37": 10.35246,
+            "38": 10.20574,
+            "39": 10.40061,
+            "40": 10.09681,
+            "41": 10.13869,
+            "42": 10.21829,
+            "43": 9.84428,
+            "44": 9.9614,
+            "45": 9.84116,
+            "46": 9.81955,
+            "47": 10.13927,
+            "48": 9.85138,
+            "49": 9.53518,
+            "50": 9.92455
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 4700.0,
-            "5": 5362.0,
-            "10": 4330.0,
-            "15": 5291.0,
-            "20": 4879.0,
-            "25": 4857.0,
-            "30": 5475.0,
-            "35": 5683.0,
-            "40": 5899.0,
-            "45": 5910.0,
-            "50": 6643.0
+            "1": 4627.0,
+            "2": 4785.0,
+            "3": 4887.0,
+            "4": 5134.0,
+            "5": 5403.0,
+            "6": 5457.0,
+            "7": 5140.0,
+            "8": 4876.0,
+            "9": 5213.0,
+            "10": 4396.0,
+            "11": 5749.0,
+            "12": 5182.0,
+            "13": 5436.0,
+            "14": 5431.0,
+            "15": 5327.0,
+            "16": 5452.0,
+            "17": 5245.0,
+            "18": 5116.0,
+            "19": 5216.0,
+            "20": 4869.0,
+            "21": 5326.0,
+            "22": 4832.0,
+            "23": 5719.0,
+            "24": 5017.0,
+            "25": 4980.0,
+            "26": 5288.0,
+            "27": 5346.0,
+            "28": 5727.0,
+            "29": 5937.0,
+            "30": 5289.0,
+            "31": 4777.0,
+            "32": 5616.0,
+            "33": 6137.0,
+            "34": 5140.0,
+            "35": 5690.0,
+            "36": 5739.0,
+            "37": 6425.0,
+            "38": 5962.0,
+            "39": 6620.0,
+            "40": 5921.0,
+            "41": 5820.0,
+            "42": 6472.0,
+            "43": 5860.0,
+            "44": 5731.0,
+            "45": 5769.0,
+            "46": 6130.0,
+            "47": 6576.0,
+            "48": 6403.0,
+            "49": 6084.0,
+            "50": 6648.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 1116857344.0,
-            "5": 1116853248.0,
-            "10": 1116854784.0,
-            "15": 1116856320.0,
-            "20": 1116853760.0,
-            "25": 1116854272.0,
-            "30": 1116853248.0,
-            "35": 1116857344.0,
-            "40": 1116855808.0,
-            "45": 1116854784.0,
-            "50": 1116857856.0
+            "1": 1115810816.0,
+            "2": 1115809280.0,
+            "3": 1115807232.0,
+            "4": 1115809792.0,
+            "5": 1115806720.0,
+            "6": 1115807232.0,
+            "7": 1115808768.0,
+            "8": 1115807744.0,
+            "9": 1115809792.0,
+            "10": 1115808768.0,
+            "11": 1115808768.0,
+            "12": 1115808256.0,
+            "13": 1115811840.0,
+            "14": 1115807232.0,
+            "15": 1115809792.0,
+            "16": 1115808768.0,
+            "17": 1115806720.0,
+            "18": 1115809792.0,
+            "19": 1115806208.0,
+            "20": 1115808256.0,
+            "21": 1115806208.0,
+            "22": 1115807744.0,
+            "23": 1115807744.0,
+            "24": 1115810304.0,
+            "25": 1115807744.0,
+            "26": 1115810304.0,
+            "27": 1115808256.0,
+            "28": 1115809280.0,
+            "29": 1115810304.0,
+            "30": 1115806720.0,
+            "31": 1115813376.0,
+            "32": 1115809792.0,
+            "33": 1115807744.0,
+            "34": 1115808256.0,
+            "35": 1115810816.0,
+            "36": 1115806208.0,
+            "37": 1115807744.0,
+            "38": 1115809792.0,
+            "39": 1115807232.0,
+            "40": 1115809792.0,
+            "41": 1115810816.0,
+            "42": 1115810816.0,
+            "43": 1115811328.0,
+            "44": 1115809792.0,
+            "45": 1115808768.0,
+            "46": 1115810304.0,
+            "47": 1115808256.0,
+            "48": 1115806208.0,
+            "49": 1115805184.0,
+            "50": 1115811328.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 1562923008.0,
-            "5": 2023396352.0,
-            "10": 2024858112.0,
-            "15": 2026634240.0,
-            "20": 2026634240.0,
-            "25": 2026634240.0,
-            "30": 2026634240.0,
-            "35": 2029936128.0,
-            "40": 2029936128.0,
-            "45": 2029936128.0,
-            "50": 2029936128.0
+            "2": 2021974528.0,
+            "3": 2021974528.0,
+            "4": 2023057408.0,
+            "5": 2023057408.0,
+            "6": 2023057408.0,
+            "7": 2023057408.0,
+            "8": 2023057408.0,
+            "9": 2023057408.0,
+            "10": 2026853376.0,
+            "11": 2026853376.0,
+            "12": 2026853376.0,
+            "13": 2026853376.0,
+            "14": 2026853376.0,
+            "15": 2026853376.0,
+            "16": 2026853376.0,
+            "17": 2026853376.0,
+            "18": 2026853376.0,
+            "19": 2026853376.0,
+            "20": 2026853376.0,
+            "21": 2026964992.0,
+            "22": 2026964992.0,
+            "23": 2026964992.0,
+            "24": 2026964992.0,
+            "25": 2026964992.0,
+            "26": 2026964992.0,
+            "27": 2026964992.0,
+            "28": 2026964992.0,
+            "29": 2026964992.0,
+            "30": 2026964992.0,
+            "31": 2030492160.0,
+            "32": 2030492160.0,
+            "33": 2030492160.0,
+            "34": 2030492160.0,
+            "35": 2030492160.0,
+            "36": 2030492160.0,
+            "37": 2030492160.0,
+            "38": 2030492160.0,
+            "39": 2030492160.0,
+            "40": 2030492160.0,
+            "41": 2030492160.0,
+            "42": 2030492160.0,
+            "43": 2030492160.0,
+            "44": 2030492160.0,
+            "45": 2030492160.0,
+            "46": 2030492160.0,
+            "47": 2030492160.0,
+            "48": 2030492160.0,
+            "49": 2030492160.0,
+            "50": 2030492160.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 21.72442,
-            "5": 0.36486,
-            "10": 0.36609,
-            "15": 0.36152,
-            "20": 0.36301,
-            "25": 0.36085,
-            "30": 0.36083,
-            "35": 0.36317,
-            "40": 0.35895,
-            "45": 0.35462,
-            "50": 0.34937
+            "1": 18.3953,
+            "2": 0.37892,
+            "3": 0.34007,
+            "4": 0.3355,
+            "5": 0.33186,
+            "6": 0.33483,
+            "7": 0.3277,
+            "8": 0.32755,
+            "9": 0.32791,
+            "10": 0.32415,
+            "11": 0.32272,
+            "12": 0.32392,
+            "13": 0.33508,
+            "14": 0.31609,
+            "15": 0.31941,
+            "16": 0.3178,
+            "17": 0.31692,
+            "18": 0.31834,
+            "19": 0.32074,
+            "20": 0.31765,
+            "21": 0.31933,
+            "22": 0.32169,
+            "23": 0.32073,
+            "24": 0.31872,
+            "25": 0.32305,
+            "26": 0.32018,
+            "27": 0.32077,
+            "28": 0.32022,
+            "29": 0.31612,
+            "30": 0.31263,
+            "31": 0.31663,
+            "32": 0.31415,
+            "33": 0.31634,
+            "34": 0.31559,
+            "35": 0.31239,
+            "36": 0.31218,
+            "37": 0.31427,
+            "38": 0.31433,
+            "39": 0.31314,
+            "40": 0.313,
+            "41": 0.31331,
+            "42": 0.31314,
+            "43": 0.31359,
+            "44": 0.31884,
+            "45": 0.31165,
+            "46": 0.31278,
+            "47": 0.31273,
+            "48": 0.31668,
+            "49": 0.31177,
+            "50": 0.31472
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_lts_dgxa100_dracooci-ord.json
new file mode 100644
index 00000000000..4fcc118b15a
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_lts_dgxa100_dracooci-ord.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.8277,
+            "2": 10.84068,
+            "3": 10.82714,
+            "4": 10.81884,
+            "5": 10.85728,
+            "6": 10.86967,
+            "7": 10.85152,
+            "8": 10.84475,
+            "9": 10.85262,
+            "10": 10.79178,
+            "11": 10.86557,
+            "12": 10.87118,
+            "13": 10.87048,
+            "14": 10.87859,
+            "15": 10.82536,
+            "16": 10.81201,
+            "17": 10.77492,
+            "18": 10.81058,
+            "19": 10.79647,
+            "20": 10.72219,
+            "21": 10.69747,
+            "22": 10.55109,
+            "23": 10.70545,
+            "24": 10.59037,
+            "25": 10.54404,
+            "26": 10.60056,
+            "27": 10.6198,
+            "28": 10.57404,
+            "29": 10.5863,
+            "30": 10.35713,
+            "31": 10.12151,
+            "32": 10.47043,
+            "33": 10.45666,
+            "34": 10.21561,
+            "35": 10.2715,
+            "36": 10.23562,
+            "37": 10.35244,
+            "38": 10.20598,
+            "39": 10.40084,
+            "40": 10.09662,
+            "41": 10.13854,
+            "42": 10.21819,
+            "43": 9.84461,
+            "44": 9.96191,
+            "45": 9.84123,
+            "46": 9.81958,
+            "47": 10.13898,
+            "48": 9.85141,
+            "49": 9.53538,
+            "50": 9.92427
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 4663.0,
+            "2": 4900.0,
+            "3": 4885.0,
+            "4": 4989.0,
+            "5": 5412.0,
+            "6": 5512.0,
+            "7": 5196.0,
+            "8": 4835.0,
+            "9": 5183.0,
+            "10": 4430.0,
+            "11": 5618.0,
+            "12": 5155.0,
+            "13": 5430.0,
+            "14": 5486.0,
+            "15": 5243.0,
+            "16": 5345.0,
+            "17": 5174.0,
+            "18": 5152.0,
+            "19": 5229.0,
+            "20": 4720.0,
+            "21": 5279.0,
+            "22": 4870.0,
+            "23": 5653.0,
+            "24": 4987.0,
+            "25": 4930.0,
+            "26": 5230.0,
+            "27": 5136.0,
+            "28": 5923.0,
+            "29": 5833.0,
+            "30": 5420.0,
+            "31": 4687.0,
+            "32": 5606.0,
+            "33": 6087.0,
+            "34": 5166.0,
+            "35": 5579.0,
+            "36": 5643.0,
+            "37": 6381.0,
+            "38": 6032.0,
+            "39": 6660.0,
+            "40": 5774.0,
+            "41": 5952.0,
+            "42": 6422.0,
+            "43": 5957.0,
+            "44": 5847.0,
+            "45": 5675.0,
+            "46": 6132.0,
+            "47": 6540.0,
+            "48": 6342.0,
+            "49": 6080.0,
+            "50": 6648.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1114761216.0,
+            "2": 1114759680.0,
+            "3": 1114756608.0,
+            "4": 1114760192.0,
+            "5": 1114757120.0,
+            "6": 1114757632.0,
+            "7": 1114759680.0,
+            "8": 1114757632.0,
+            "9": 1114760192.0,
+            "10": 1114758656.0,
+            "11": 1114758656.0,
+            "12": 1114759168.0,
+            "13": 1114762752.0,
+            "14": 1114757120.0,
+            "15": 1114760192.0,
+            "16": 1114759168.0,
+            "17": 1114757632.0,
+            "18": 1114761728.0,
+            "19": 1114757632.0,
+            "20": 1114758656.0,
+            "21": 1114758656.0,
+            "22": 1114758144.0,
+            "23": 1114758144.0,
+            "24": 1114761216.0,
+            "25": 1114758144.0,
+            "26": 1114760704.0,
+            "27": 1114758656.0,
+            "28": 1114759680.0,
+            "29": 1114760704.0,
+            "30": 1114757120.0,
+            "31": 1114763776.0,
+            "32": 1114759680.0,
+            "33": 1114758144.0,
+            "34": 1114758656.0,
+            "35": 1114761216.0,
+            "36": 1114756608.0,
+            "37": 1114758144.0,
+            "38": 1114759168.0,
+            "39": 1114758144.0,
+            "40": 1114760192.0,
+            "41": 1114761728.0,
+            "42": 1114761216.0,
+            "43": 1114761728.0,
+            "44": 1114760192.0,
+            "45": 1114759168.0,
+            "46": 1114759168.0,
+            "47": 1114759680.0,
+            "48": 1114756608.0,
+            "49": 1114755072.0,
+            "50": 1114761216.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1563573248.0,
+            "2": 2019811840.0,
+            "3": 2019811840.0,
+            "4": 2020844544.0,
+            "5": 2020844544.0,
+            "6": 2020844544.0,
+            "7": 2020844544.0,
+            "8": 2020844544.0,
+            "9": 2020844544.0,
+            "10": 2022765056.0,
+            "11": 2022765056.0,
+            "12": 2022765056.0,
+            "13": 2025424384.0,
+            "14": 2025424384.0,
+            "15": 2025424384.0,
+            "16": 2025424384.0,
+            "17": 2025424384.0,
+            "18": 2025424384.0,
+            "19": 2025424384.0,
+            "20": 2025424384.0,
+            "21": 2025424384.0,
+            "22": 2025424384.0,
+            "23": 2025424384.0,
+            "24": 2025424384.0,
+            "25": 2025424384.0,
+            "26": 2025424384.0,
+            "27": 2025424384.0,
+            "28": 2025424384.0,
+            "29": 2025424384.0,
+            "30": 2025424384.0,
+            "31": 2027865600.0,
+            "32": 2027865600.0,
+            "33": 2027865600.0,
+            "34": 2027865600.0,
+            "35": 2027865600.0,
+            "36": 2027865600.0,
+            "37": 2027865600.0,
+            "38": 2027865600.0,
+            "39": 2027865600.0,
+            "40": 2027865600.0,
+            "41": 2027865600.0,
+            "42": 2027865600.0,
+            "43": 2027865600.0,
+            "44": 2027865600.0,
+            "45": 2027865600.0,
+            "46": 2027865600.0,
+            "47": 2027865600.0,
+            "48": 2027865600.0,
+            "49": 2027865600.0,
+            "50": 2027865600.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 13.35833,
+            "2": 0.41869,
+            "3": 0.36543,
+            "4": 0.34709,
+            "5": 0.33564,
+            "6": 0.33325,
+            "7": 0.33079,
+            "8": 0.32901,
+            "9": 0.32623,
+            "10": 0.32947,
+            "11": 0.32518,
+            "12": 0.32588,
+            "13": 0.32491,
+            "14": 0.32913,
+            "15": 0.32376,
+            "16": 0.32422,
+            "17": 0.32793,
+            "18": 0.32466,
+            "19": 0.32256,
+            "20": 0.32888,
+            "21": 0.32611,
+            "22": 0.32289,
+            "23": 0.32585,
+            "24": 0.32069,
+            "25": 0.31969,
+            "26": 0.32564,
+            "27": 0.32022,
+            "28": 0.32015,
+            "29": 0.33015,
+            "30": 0.32397,
+            "31": 0.33512,
+            "32": 0.35571,
+            "33": 0.35217,
+            "34": 0.35178,
+            "35": 0.3531,
+            "36": 0.35005,
+            "37": 0.35174,
+            "38": 0.35672,
+            "39": 0.3522,
+            "40": 0.35137,
+            "41": 0.3597,
+            "42": 0.3514,
+            "43": 0.34943,
+            "44": 0.3423,
+            "45": 0.34024,
+            "46": 0.34465,
+            "47": 0.34043,
+            "48": 0.34108,
+            "49": 0.34462,
+            "50": 0.33863
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_lts_dgxa100_dracooci.json
new file mode 100644
index 00000000000..fa073cf9e82
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_lts_dgxa100_dracooci.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.8277,
+            "2": 10.84068,
+            "3": 10.8272,
+            "4": 10.81929,
+            "5": 10.85696,
+            "6": 10.86987,
+            "7": 10.85171,
+            "8": 10.84459,
+            "9": 10.85256,
+            "10": 10.79201,
+            "11": 10.86562,
+            "12": 10.8711,
+            "13": 10.87024,
+            "14": 10.87806,
+            "15": 10.82518,
+            "16": 10.81192,
+            "17": 10.77419,
+            "18": 10.81073,
+            "19": 10.79667,
+            "20": 10.72258,
+            "21": 10.69715,
+            "22": 10.55066,
+            "23": 10.70497,
+            "24": 10.59057,
+            "25": 10.54424,
+            "26": 10.6002,
+            "27": 10.61999,
+            "28": 10.5741,
+            "29": 10.58671,
+            "30": 10.35729,
+            "31": 10.12229,
+            "32": 10.47057,
+            "33": 10.45683,
+            "34": 10.216,
+            "35": 10.27106,
+            "36": 10.23572,
+            "37": 10.35232,
+            "38": 10.20564,
+            "39": 10.40105,
+            "40": 10.09702,
+            "41": 10.13866,
+            "42": 10.21783,
+            "43": 9.84408,
+            "44": 9.96172,
+            "45": 9.84126,
+            "46": 9.81956,
+            "47": 10.13914,
+            "48": 9.85116,
+            "49": 9.53564,
+            "50": 9.92445
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 4700.0,
+            "2": 4941.0,
+            "3": 4879.0,
+            "4": 5033.0,
+            "5": 5483.0,
+            "6": 5460.0,
+            "7": 5198.0,
+            "8": 4858.0,
+            "9": 5126.0,
+            "10": 4376.0,
+            "11": 5570.0,
+            "12": 5203.0,
+            "13": 5521.0,
+            "14": 5427.0,
+            "15": 5181.0,
+            "16": 5391.0,
+            "17": 5179.0,
+            "18": 5030.0,
+            "19": 5304.0,
+            "20": 4943.0,
+            "21": 5245.0,
+            "22": 4859.0,
+            "23": 5613.0,
+            "24": 5111.0,
+            "25": 4846.0,
+            "26": 5147.0,
+            "27": 5309.0,
+            "28": 5797.0,
+            "29": 5929.0,
+            "30": 5357.0,
+            "31": 4733.0,
+            "32": 5718.0,
+            "33": 6104.0,
+            "34": 5218.0,
+            "35": 5554.0,
+            "36": 5610.0,
+            "37": 6378.0,
+            "38": 6206.0,
+            "39": 6498.0,
+            "40": 5948.0,
+            "41": 6006.0,
+            "42": 6256.0,
+            "43": 5824.0,
+            "44": 5788.0,
+            "45": 5746.0,
+            "46": 6111.0,
+            "47": 6493.0,
+            "48": 6237.0,
+            "49": 6304.0,
+            "50": 6666.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1114761216.0,
+            "2": 1114759680.0,
+            "3": 1114756608.0,
+            "4": 1114760192.0,
+            "5": 1114758144.0,
+            "6": 1114757632.0,
+            "7": 1114759680.0,
+            "8": 1114757632.0,
+            "9": 1114760192.0,
+            "10": 1114758656.0,
+            "11": 1114759680.0,
+            "12": 1114758144.0,
+            "13": 1114762752.0,
+            "14": 1114757120.0,
+            "15": 1114760192.0,
+            "16": 1114757120.0,
+            "17": 1114757120.0,
+            "18": 1114760192.0,
+            "19": 1114757120.0,
+            "20": 1114758656.0,
+            "21": 1114757632.0,
+            "22": 1114758144.0,
+            "23": 1114758144.0,
+            "24": 1114760704.0,
+            "25": 1114758144.0,
+            "26": 1114760704.0,
+            "27": 1114758656.0,
+            "28": 1114760192.0,
+            "29": 1114760704.0,
+            "30": 1114757120.0,
+            "31": 1114763776.0,
+            "32": 1114760192.0,
+            "33": 1114757120.0,
+            "34": 1114758656.0,
+            "35": 1114761216.0,
+            "36": 1114756608.0,
+            "37": 1114758144.0,
+            "38": 1114759168.0,
+            "39": 1114757632.0,
+            "40": 1114759680.0,
+            "41": 1114761216.0,
+            "42": 1114760192.0,
+            "43": 1114761728.0,
+            "44": 1114760192.0,
+            "45": 1114758656.0,
+            "46": 1114760192.0,
+            "47": 1114758656.0,
+            "48": 1114757120.0,
+            "49": 1114755072.0,
+            "50": 1114760192.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1563573248.0,
+            "2": 2021675520.0,
+            "3": 2022020608.0,
+            "4": 2022438912.0,
+            "5": 2022438912.0,
+            "6": 2022438912.0,
+            "7": 2022438912.0,
+            "8": 2022438912.0,
+            "9": 2022438912.0,
+            "10": 2025787904.0,
+            "11": 2025787904.0,
+            "12": 2025787904.0,
+            "13": 2027309568.0,
+            "14": 2027309568.0,
+            "15": 2027309568.0,
+            "16": 2027309568.0,
+            "17": 2027309568.0,
+            "18": 2027309568.0,
+            "19": 2027309568.0,
+            "20": 2027309568.0,
+            "21": 2027309568.0,
+            "22": 2027309568.0,
+            "23": 2027309568.0,
+            "24": 2027309568.0,
+            "25": 2027309568.0,
+            "26": 2027309568.0,
+            "27": 2027309568.0,
+            "28": 2027309568.0,
+            "29": 2027309568.0,
+            "30": 2027309568.0,
+            "31": 2029440512.0,
+            "32": 2029440512.0,
+            "33": 2029440512.0,
+            "34": 2029440512.0,
+            "35": 2029440512.0,
+            "36": 2029440512.0,
+            "37": 2029440512.0,
+            "38": 2029440512.0,
+            "39": 2029440512.0,
+            "40": 2029440512.0,
+            "41": 2029440512.0,
+            "42": 2029440512.0,
+            "43": 2029440512.0,
+            "44": 2029440512.0,
+            "45": 2029440512.0,
+            "46": 2029440512.0,
+            "47": 2029440512.0,
+            "48": 2029440512.0,
+            "49": 2029440512.0,
+            "50": 2029440512.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 16.95291,
+            "2": 0.49442,
+            "3": 0.34431,
+            "4": 0.34311,
+            "5": 0.34183,
+            "6": 0.34138,
+            "7": 0.34639,
+            "8": 0.34265,
+            "9": 0.34183,
+            "10": 0.34356,
+            "11": 0.3425,
+            "12": 0.33847,
+            "13": 0.3416,
+            "14": 0.33396,
+            "15": 0.33683,
+            "16": 0.34022,
+            "17": 0.34114,
+            "18": 0.33741,
+            "19": 0.33884,
+            "20": 0.33846,
+            "21": 0.33805,
+            "22": 0.33745,
+            "23": 0.34007,
+            "24": 0.33732,
+            "25": 0.33138,
+            "26": 0.33193,
+            "27": 0.33201,
+            "28": 0.33221,
+            "29": 0.33258,
+            "30": 0.33151,
+            "31": 0.33323,
+            "32": 0.33272,
+            "33": 0.33137,
+            "34": 0.33328,
+            "35": 0.3321,
+            "36": 0.33173,
+            "37": 0.33275,
+            "38": 0.33386,
+            "39": 0.33182,
+            "40": 0.3331,
+            "41": 0.3318,
+            "42": 0.33143,
+            "43": 0.33272,
+            "44": 0.33166,
+            "45": 0.32995,
+            "46": 0.33258,
+            "47": 0.332,
+            "48": 0.33126,
+            "49": 0.33438,
+            "50": 0.32754
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_h100.json
index ab35aab19fb..cd1596da3bc 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_h100.json
@@ -2,91 +2,286 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 10.81746,
+            "2": 10.82149,
+            "3": 10.82234,
+            "4": 10.79883,
             "5": 10.84067,
+            "6": 10.85636,
+            "7": 10.81775,
+            "8": 10.81498,
+            "9": 10.83664,
             "10": 10.7822,
+            "11": 10.85151,
+            "12": 10.84335,
+            "13": 10.85001,
+            "14": 10.87346,
             "15": 10.80974,
+            "16": 10.80359,
+            "17": 10.75702,
+            "18": 10.80691,
+            "19": 10.78689,
             "20": 10.73095,
+            "21": 10.70872,
+            "22": 10.57886,
+            "23": 10.71772,
+            "24": 10.63253,
             "25": 10.57332,
+            "26": 10.62323,
+            "27": 10.63892,
+            "28": 10.60509,
+            "29": 10.61796,
             "30": 10.42067,
+            "31": 10.18074,
+            "32": 10.50619,
+            "33": 10.50937,
+            "34": 10.27626,
             "35": 10.3249,
+            "36": 10.29423,
+            "37": 10.40006,
+            "38": 10.26099,
+            "39": 10.44197,
             "40": 10.1644,
+            "41": 10.2004,
+            "42": 10.26981,
+            "43": 9.93054,
+            "44": 10.04184,
             "45": 9.9288,
+            "46": 9.89638,
+            "47": 10.18471,
+            "48": 9.93119,
+            "49": 9.62763,
             "50": 9.98402
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 5082.0,
+            "2": 5274.0,
+            "3": 5447.0,
+            "4": 5269.0,
             "5": 6020.0,
+            "6": 6160.0,
+            "7": 5592.0,
+            "8": 5309.0,
+            "9": 5743.0,
             "10": 4800.0,
+            "11": 6186.0,
+            "12": 5648.0,
+            "13": 6106.0,
+            "14": 6126.0,
             "15": 5600.0,
+            "16": 5819.0,
+            "17": 5669.0,
+            "18": 5547.0,
+            "19": 5711.0,
             "20": 5380.0,
+            "21": 5677.0,
+            "22": 5023.0,
+            "23": 6080.0,
+            "24": 5403.0,
             "25": 5120.0,
+            "26": 5431.0,
+            "27": 5866.0,
+            "28": 6035.0,
+            "29": 6154.0,
             "30": 5456.0,
+            "31": 4832.0,
+            "32": 5956.0,
+            "33": 6301.0,
+            "34": 5366.0,
             "35": 5900.0,
+            "36": 5703.0,
+            "37": 6744.0,
+            "38": 6098.0,
+            "39": 6737.0,
             "40": 5994.0,
+            "41": 6144.0,
+            "42": 6542.0,
+            "43": 5751.0,
+            "44": 5876.0,
             "45": 5795.0,
+            "46": 6162.0,
+            "47": 6736.0,
+            "48": 6331.0,
+            "49": 6235.0,
             "50": 6668.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 627718656.0,
+            "2": 627719168.0,
+            "3": 627719168.0,
+            "4": 627720704.0,
             "5": 627718656.0,
+            "6": 627718656.0,
+            "7": 627718144.0,
+            "8": 627718144.0,
+            "9": 627718144.0,
             "10": 627719168.0,
+            "11": 627719680.0,
+            "12": 627719168.0,
+            "13": 627719680.0,
+            "14": 627717120.0,
             "15": 627720192.0,
+            "16": 627717632.0,
+            "17": 627718144.0,
+            "18": 627719680.0,
+            "19": 627719168.0,
             "20": 627717120.0,
+            "21": 627718144.0,
+            "22": 627720192.0,
+            "23": 627720192.0,
+            "24": 627718144.0,
             "25": 627718656.0,
+            "26": 627718144.0,
+            "27": 627717120.0,
+            "28": 627718656.0,
+            "29": 627717120.0,
             "30": 627720192.0,
+            "31": 627715072.0,
+            "32": 627720192.0,
+            "33": 627717632.0,
+            "34": 627719168.0,
             "35": 627716608.0,
+            "36": 627719168.0,
+            "37": 627718144.0,
+            "38": 627718656.0,
+            "39": 627715584.0,
             "40": 627717632.0,
+            "41": 627714560.0,
+            "42": 627718144.0,
+            "43": 627713536.0,
+            "44": 627714048.0,
             "45": 627719168.0,
+            "46": 627716096.0,
+            "47": 627717120.0,
+            "48": 627716608.0,
+            "49": 627715072.0,
             "50": 627718144.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 870138880.0,
+            "2": 1099332096.0,
+            "3": 1099950080.0,
+            "4": 1102007296.0,
             "5": 1102007296.0,
+            "6": 1102007296.0,
+            "7": 1102007296.0,
+            "8": 1102007296.0,
+            "9": 1102007296.0,
             "10": 1102007296.0,
+            "11": 1102007296.0,
+            "12": 1102007296.0,
+            "13": 1103012352.0,
+            "14": 1103012352.0,
             "15": 1103012352.0,
+            "16": 1103012352.0,
+            "17": 1103012352.0,
+            "18": 1103012352.0,
+            "19": 1103012352.0,
             "20": 1103012352.0,
+            "21": 1103012352.0,
+            "22": 1103012352.0,
+            "23": 1103012352.0,
+            "24": 1103012352.0,
             "25": 1103012352.0,
+            "26": 1103012352.0,
+            "27": 1103012352.0,
+            "28": 1103012352.0,
+            "29": 1103012352.0,
             "30": 1103012352.0,
+            "31": 1103012352.0,
+            "32": 1103012352.0,
+            "33": 1103012352.0,
+            "34": 1103012352.0,
             "35": 1103012352.0,
+            "36": 1103012352.0,
+            "37": 1103012352.0,
+            "38": 1103012352.0,
+            "39": 1103012352.0,
             "40": 1103012352.0,
+            "41": 1103012352.0,
+            "42": 1103012352.0,
+            "43": 1103012352.0,
+            "44": 1103012352.0,
             "45": 1103012352.0,
+            "46": 1103012352.0,
+            "47": 1103012352.0,
+            "48": 1103012352.0,
+            "49": 1103012352.0,
             "50": 1103012352.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 16.6451,
-            "5": 0.44582,
-            "10": 0.44604,
-            "15": 0.45437,
-            "20": 0.44805,
-            "25": 0.44906,
-            "30": 0.44594,
-            "35": 0.44862,
-            "40": 0.45549,
-            "45": 0.44951,
-            "50": 0.44015
+            "1": 18.1916,
+            "2": 0.59351,
+            "3": 0.53789,
+            "4": 0.55618,
+            "5": 0.51747,
+            "6": 0.51798,
+            "7": 0.53735,
+            "8": 0.51847,
+            "9": 0.51772,
+            "10": 0.51103,
+            "11": 0.51385,
+            "12": 0.50834,
+            "13": 0.51586,
+            "14": 0.50721,
+            "15": 0.53294,
+            "16": 0.51593,
+            "17": 0.51388,
+            "18": 0.51464,
+            "19": 0.50827,
+            "20": 0.50952,
+            "21": 0.50189,
+            "22": 0.50928,
+            "23": 0.50324,
+            "24": 0.50354,
+            "25": 0.50213,
+            "26": 0.49708,
+            "27": 0.49953,
+            "28": 0.50373,
+            "29": 0.50455,
+            "30": 0.50305,
+            "31": 0.50567,
+            "32": 0.50905,
+            "33": 0.50325,
+            "34": 0.51203,
+            "35": 0.52783,
+            "36": 0.51023,
+            "37": 0.50726,
+            "38": 0.52285,
+            "39": 0.50728,
+            "40": 0.52086,
+            "41": 0.51671,
+            "42": 0.51607,
+            "43": 0.51296,
+            "44": 0.51003,
+            "45": 0.51106,
+            "46": 0.53309,
+            "47": 0.52738,
+            "48": 0.5128,
+            "49": 0.53044,
+            "50": 0.50994
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci-ord.json
new file mode 100644
index 00000000000..1a408849afc
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci-ord.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.82196,
+            "2": 10.84132,
+            "3": 10.81128,
+            "4": 10.82231,
+            "5": 10.84518,
+            "6": 10.8626,
+            "7": 10.84391,
+            "8": 10.84701,
+            "9": 10.84948,
+            "10": 10.78921,
+            "11": 10.85726,
+            "12": 10.84459,
+            "13": 10.87146,
+            "14": 10.87456,
+            "15": 10.8336,
+            "16": 10.80914,
+            "17": 10.79111,
+            "18": 10.81065,
+            "19": 10.80588,
+            "20": 10.73505,
+            "21": 10.71444,
+            "22": 10.57729,
+            "23": 10.72656,
+            "24": 10.61835,
+            "25": 10.58138,
+            "26": 10.63781,
+            "27": 10.63741,
+            "28": 10.60575,
+            "29": 10.61061,
+            "30": 10.40958,
+            "31": 10.16916,
+            "32": 10.49914,
+            "33": 10.49662,
+            "34": 10.26146,
+            "35": 10.31467,
+            "36": 10.28534,
+            "37": 10.38868,
+            "38": 10.24742,
+            "39": 10.43812,
+            "40": 10.14618,
+            "41": 10.19703,
+            "42": 10.26135,
+            "43": 9.9103,
+            "44": 10.02321,
+            "45": 9.91713,
+            "46": 9.89492,
+            "47": 10.19337,
+            "48": 9.93091,
+            "49": 9.61227,
+            "50": 9.97428
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 4960.0,
+            "2": 5301.0,
+            "3": 5425.0,
+            "4": 5034.0,
+            "5": 6022.0,
+            "6": 6072.0,
+            "7": 5250.0,
+            "8": 5157.0,
+            "9": 5645.0,
+            "10": 4813.0,
+            "11": 6049.0,
+            "12": 5580.0,
+            "13": 5963.0,
+            "14": 5902.0,
+            "15": 5586.0,
+            "16": 5890.0,
+            "17": 5611.0,
+            "18": 5514.0,
+            "19": 5628.0,
+            "20": 5068.0,
+            "21": 5603.0,
+            "22": 5087.0,
+            "23": 6008.0,
+            "24": 5364.0,
+            "25": 4868.0,
+            "26": 5594.0,
+            "27": 5626.0,
+            "28": 5973.0,
+            "29": 6225.0,
+            "30": 5528.0,
+            "31": 4650.0,
+            "32": 5940.0,
+            "33": 6315.0,
+            "34": 5284.0,
+            "35": 5700.0,
+            "36": 5633.0,
+            "37": 6648.0,
+            "38": 6194.0,
+            "39": 6933.0,
+            "40": 6137.0,
+            "41": 6314.0,
+            "42": 6416.0,
+            "43": 5714.0,
+            "44": 5892.0,
+            "45": 6030.0,
+            "46": 6086.0,
+            "47": 6881.0,
+            "48": 6386.0,
+            "49": 6242.0,
+            "50": 6652.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 598359040.0,
+            "2": 598359040.0,
+            "3": 598359040.0,
+            "4": 598359552.0,
+            "5": 598358016.0,
+            "6": 598358016.0,
+            "7": 598355456.0,
+            "8": 598359552.0,
+            "9": 598356480.0,
+            "10": 598356992.0,
+            "11": 598358016.0,
+            "12": 598359040.0,
+            "13": 598359040.0,
+            "14": 598358528.0,
+            "15": 598359040.0,
+            "16": 598358528.0,
+            "17": 598353408.0,
+            "18": 598358016.0,
+            "19": 598359040.0,
+            "20": 598357504.0,
+            "21": 598359040.0,
+            "22": 598354432.0,
+            "23": 598355968.0,
+            "24": 598356480.0,
+            "25": 598357504.0,
+            "26": 598356480.0,
+            "27": 598360064.0,
+            "28": 598358016.0,
+            "29": 598355456.0,
+            "30": 598358528.0,
+            "31": 598356480.0,
+            "32": 598356992.0,
+            "33": 598359552.0,
+            "34": 598358016.0,
+            "35": 598356480.0,
+            "36": 598358016.0,
+            "37": 598359040.0,
+            "38": 598358016.0,
+            "39": 598357504.0,
+            "40": 598357504.0,
+            "41": 598351872.0,
+            "42": 598358528.0,
+            "43": 598352896.0,
+            "44": 598354944.0,
+            "45": 598355968.0,
+            "46": 598351872.0,
+            "47": 598359040.0,
+            "48": 598354944.0,
+            "49": 598353408.0,
+            "50": 598358016.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 842904576.0,
+            "2": 1072649216.0,
+            "3": 1072649216.0,
+            "4": 1072649216.0,
+            "5": 1072649216.0,
+            "6": 1072649216.0,
+            "7": 1072649216.0,
+            "8": 1072649216.0,
+            "9": 1072649216.0,
+            "10": 1072649216.0,
+            "11": 1072649216.0,
+            "12": 1072649216.0,
+            "13": 1072649216.0,
+            "14": 1072709632.0,
+            "15": 1072709632.0,
+            "16": 1073532416.0,
+            "17": 1073532416.0,
+            "18": 1073532416.0,
+            "19": 1073532416.0,
+            "20": 1073532416.0,
+            "21": 1073532416.0,
+            "22": 1073532416.0,
+            "23": 1073532416.0,
+            "24": 1073532416.0,
+            "25": 1073532416.0,
+            "26": 1073532416.0,
+            "27": 1073532416.0,
+            "28": 1073532416.0,
+            "29": 1073532416.0,
+            "30": 1073532416.0,
+            "31": 1073532416.0,
+            "32": 1073532416.0,
+            "33": 1073532416.0,
+            "34": 1073532416.0,
+            "35": 1073532416.0,
+            "36": 1073532416.0,
+            "37": 1073532416.0,
+            "38": 1073532416.0,
+            "39": 1073532416.0,
+            "40": 1073532416.0,
+            "41": 1073532416.0,
+            "42": 1073532416.0,
+            "43": 1073532416.0,
+            "44": 1073532416.0,
+            "45": 1073532416.0,
+            "46": 1073532416.0,
+            "47": 1073532416.0,
+            "48": 1073532416.0,
+            "49": 1073532416.0,
+            "50": 1073532416.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 16.79929,
+            "2": 0.76107,
+            "3": 0.70012,
+            "4": 0.6957,
+            "5": 0.69356,
+            "6": 0.69449,
+            "7": 0.69404,
+            "8": 0.69622,
+            "9": 0.69268,
+            "10": 0.69289,
+            "11": 0.69397,
+            "12": 0.6939,
+            "13": 0.69543,
+            "14": 0.69343,
+            "15": 0.69367,
+            "16": 0.69313,
+            "17": 0.69312,
+            "18": 0.69243,
+            "19": 0.69103,
+            "20": 0.69247,
+            "21": 0.69344,
+            "22": 0.70018,
+            "23": 0.69201,
+            "24": 0.6925,
+            "25": 0.69194,
+            "26": 0.69263,
+            "27": 0.69615,
+            "28": 0.69387,
+            "29": 0.6943,
+            "30": 0.69451,
+            "31": 0.69337,
+            "32": 0.69257,
+            "33": 0.69262,
+            "34": 0.6935,
+            "35": 0.69273,
+            "36": 0.69514,
+            "37": 0.69327,
+            "38": 0.69244,
+            "39": 0.69222,
+            "40": 0.69263,
+            "41": 0.69355,
+            "42": 0.69577,
+            "43": 0.6959,
+            "44": 0.69514,
+            "45": 0.69357,
+            "46": 0.6948,
+            "47": 0.69457,
+            "48": 0.69365,
+            "49": 0.69508,
+            "50": 0.69782
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci.json b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci.json
new file mode 100644
index 00000000000..6f16e0a8b0c
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.82196,
+            "2": 10.84132,
+            "3": 10.81128,
+            "4": 10.82231,
+            "5": 10.84518,
+            "6": 10.8626,
+            "7": 10.84391,
+            "8": 10.84701,
+            "9": 10.84948,
+            "10": 10.78921,
+            "11": 10.85726,
+            "12": 10.84459,
+            "13": 10.87146,
+            "14": 10.87456,
+            "15": 10.8336,
+            "16": 10.80914,
+            "17": 10.79111,
+            "18": 10.81065,
+            "19": 10.80588,
+            "20": 10.73505,
+            "21": 10.71444,
+            "22": 10.57729,
+            "23": 10.72656,
+            "24": 10.61835,
+            "25": 10.58138,
+            "26": 10.63781,
+            "27": 10.63741,
+            "28": 10.60575,
+            "29": 10.61061,
+            "30": 10.40958,
+            "31": 10.16916,
+            "32": 10.49914,
+            "33": 10.49662,
+            "34": 10.26146,
+            "35": 10.31467,
+            "36": 10.28534,
+            "37": 10.38868,
+            "38": 10.24742,
+            "39": 10.43812,
+            "40": 10.14618,
+            "41": 10.19703,
+            "42": 10.26135,
+            "43": 9.9103,
+            "44": 10.02321,
+            "45": 9.91713,
+            "46": 9.89492,
+            "47": 10.19337,
+            "48": 9.93091,
+            "49": 9.61227,
+            "50": 9.97428
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 4960.0,
+            "2": 5301.0,
+            "3": 5425.0,
+            "4": 5034.0,
+            "5": 6022.0,
+            "6": 6072.0,
+            "7": 5250.0,
+            "8": 5157.0,
+            "9": 5645.0,
+            "10": 4813.0,
+            "11": 6049.0,
+            "12": 5580.0,
+            "13": 5963.0,
+            "14": 5902.0,
+            "15": 5586.0,
+            "16": 5890.0,
+            "17": 5611.0,
+            "18": 5514.0,
+            "19": 5628.0,
+            "20": 5068.0,
+            "21": 5603.0,
+            "22": 5087.0,
+            "23": 6008.0,
+            "24": 5364.0,
+            "25": 4868.0,
+            "26": 5594.0,
+            "27": 5626.0,
+            "28": 5973.0,
+            "29": 6225.0,
+            "30": 5528.0,
+            "31": 4650.0,
+            "32": 5940.0,
+            "33": 6315.0,
+            "34": 5284.0,
+            "35": 5700.0,
+            "36": 5633.0,
+            "37": 6648.0,
+            "38": 6194.0,
+            "39": 6933.0,
+            "40": 6137.0,
+            "41": 6314.0,
+            "42": 6416.0,
+            "43": 5714.0,
+            "44": 5892.0,
+            "45": 6030.0,
+            "46": 6086.0,
+            "47": 6881.0,
+            "48": 6386.0,
+            "49": 6242.0,
+            "50": 6652.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 598359040.0,
+            "2": 598359040.0,
+            "3": 598359040.0,
+            "4": 598359552.0,
+            "5": 598358016.0,
+            "6": 598358016.0,
+            "7": 598355456.0,
+            "8": 598359552.0,
+            "9": 598356480.0,
+            "10": 598356992.0,
+            "11": 598358016.0,
+            "12": 598359040.0,
+            "13": 598359040.0,
+            "14": 598358528.0,
+            "15": 598359040.0,
+            "16": 598358528.0,
+            "17": 598353408.0,
+            "18": 598358016.0,
+            "19": 598359040.0,
+            "20": 598357504.0,
+            "21": 598359040.0,
+            "22": 598354432.0,
+            "23": 598355968.0,
+            "24": 598356480.0,
+            "25": 598357504.0,
+            "26": 598356480.0,
+            "27": 598360064.0,
+            "28": 598358016.0,
+            "29": 598355456.0,
+            "30": 598358528.0,
+            "31": 598356480.0,
+            "32": 598356992.0,
+            "33": 598359552.0,
+            "34": 598358016.0,
+            "35": 598356480.0,
+            "36": 598358016.0,
+            "37": 598359040.0,
+            "38": 598358016.0,
+            "39": 598357504.0,
+            "40": 598357504.0,
+            "41": 598351872.0,
+            "42": 598358528.0,
+            "43": 598352896.0,
+            "44": 598354944.0,
+            "45": 598355968.0,
+            "46": 598351872.0,
+            "47": 598359040.0,
+            "48": 598354944.0,
+            "49": 598353408.0,
+            "50": 598358016.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 842904576.0,
+            "2": 1072649216.0,
+            "3": 1072649216.0,
+            "4": 1072649216.0,
+            "5": 1072649216.0,
+            "6": 1072649216.0,
+            "7": 1072649216.0,
+            "8": 1072649216.0,
+            "9": 1072649216.0,
+            "10": 1072649216.0,
+            "11": 1072649216.0,
+            "12": 1072649216.0,
+            "13": 1072649216.0,
+            "14": 1072709632.0,
+            "15": 1072709632.0,
+            "16": 1073532416.0,
+            "17": 1073532416.0,
+            "18": 1073532416.0,
+            "19": 1073532416.0,
+            "20": 1073532416.0,
+            "21": 1073532416.0,
+            "22": 1073532416.0,
+            "23": 1073532416.0,
+            "24": 1073532416.0,
+            "25": 1073532416.0,
+            "26": 1073532416.0,
+            "27": 1073532416.0,
+            "28": 1073532416.0,
+            "29": 1073532416.0,
+            "30": 1073532416.0,
+            "31": 1073532416.0,
+            "32": 1073532416.0,
+            "33": 1073532416.0,
+            "34": 1073532416.0,
+            "35": 1073532416.0,
+            "36": 1073532416.0,
+            "37": 1073532416.0,
+            "38": 1073532416.0,
+            "39": 1073532416.0,
+            "40": 1073532416.0,
+            "41": 1073532416.0,
+            "42": 1073532416.0,
+            "43": 1073532416.0,
+            "44": 1073532416.0,
+            "45": 1073532416.0,
+            "46": 1073532416.0,
+            "47": 1073532416.0,
+            "48": 1073532416.0,
+            "49": 1073532416.0,
+            "50": 1073532416.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.38225,
+            "2": 0.74075,
+            "3": 0.6836,
+            "4": 0.67846,
+            "5": 0.68171,
+            "6": 0.67743,
+            "7": 0.67855,
+            "8": 0.68164,
+            "9": 0.69137,
+            "10": 0.69257,
+            "11": 0.689,
+            "12": 0.69315,
+            "13": 0.69937,
+            "14": 0.69826,
+            "15": 0.69347,
+            "16": 0.68684,
+            "17": 0.6817,
+            "18": 0.67679,
+            "19": 0.67788,
+            "20": 0.67815,
+            "21": 0.67996,
+            "22": 0.67681,
+            "23": 0.67695,
+            "24": 0.67767,
+            "25": 0.67667,
+            "26": 0.67717,
+            "27": 0.67767,
+            "28": 0.67494,
+            "29": 0.67632,
+            "30": 0.67695,
+            "31": 0.67773,
+            "32": 0.67605,
+            "33": 0.6777,
+            "34": 0.6774,
+            "35": 0.67665,
+            "36": 0.68036,
+            "37": 0.6799,
+            "38": 0.67884,
+            "39": 0.68014,
+            "40": 0.68029,
+            "41": 0.68109,
+            "42": 0.68033,
+            "43": 0.6916,
+            "44": 0.68689,
+            "45": 0.68826,
+            "46": 0.6873,
+            "47": 0.69625,
+            "48": 0.68895,
+            "49": 0.69108,
+            "50": 0.6864
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..c922ef3f273
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.81746,
+            "2": 10.82149,
+            "3": 10.82234,
+            "4": 10.79883,
+            "5": 10.84067,
+            "6": 10.85636,
+            "7": 10.81775,
+            "8": 10.81498,
+            "9": 10.83664,
+            "10": 10.7822,
+            "11": 10.85151,
+            "12": 10.84335,
+            "13": 10.85001,
+            "14": 10.87346,
+            "15": 10.80974,
+            "16": 10.80359,
+            "17": 10.75702,
+            "18": 10.80691,
+            "19": 10.78689,
+            "20": 10.73095,
+            "21": 10.70872,
+            "22": 10.57886,
+            "23": 10.71772,
+            "24": 10.63253,
+            "25": 10.57332,
+            "26": 10.62323,
+            "27": 10.63892,
+            "28": 10.60509,
+            "29": 10.61796,
+            "30": 10.42067,
+            "31": 10.18074,
+            "32": 10.50619,
+            "33": 10.50937,
+            "34": 10.27626,
+            "35": 10.3249,
+            "36": 10.29423,
+            "37": 10.40006,
+            "38": 10.26099,
+            "39": 10.44197,
+            "40": 10.1644,
+            "41": 10.2004,
+            "42": 10.26981,
+            "43": 9.93054,
+            "44": 10.04184,
+            "45": 9.9288,
+            "46": 9.89638,
+            "47": 10.18471,
+            "48": 9.93119,
+            "49": 9.62763,
+            "50": 9.98402
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 5082.0,
+            "2": 5274.0,
+            "3": 5447.0,
+            "4": 5269.0,
+            "5": 6020.0,
+            "6": 6160.0,
+            "7": 5592.0,
+            "8": 5309.0,
+            "9": 5743.0,
+            "10": 4800.0,
+            "11": 6186.0,
+            "12": 5648.0,
+            "13": 6106.0,
+            "14": 6126.0,
+            "15": 5600.0,
+            "16": 5819.0,
+            "17": 5669.0,
+            "18": 5547.0,
+            "19": 5711.0,
+            "20": 5380.0,
+            "21": 5677.0,
+            "22": 5023.0,
+            "23": 6080.0,
+            "24": 5403.0,
+            "25": 5120.0,
+            "26": 5431.0,
+            "27": 5866.0,
+            "28": 6035.0,
+            "29": 6154.0,
+            "30": 5456.0,
+            "31": 4832.0,
+            "32": 5956.0,
+            "33": 6301.0,
+            "34": 5366.0,
+            "35": 5900.0,
+            "36": 5703.0,
+            "37": 6744.0,
+            "38": 6098.0,
+            "39": 6737.0,
+            "40": 5994.0,
+            "41": 6144.0,
+            "42": 6542.0,
+            "43": 5751.0,
+            "44": 5876.0,
+            "45": 5795.0,
+            "46": 6162.0,
+            "47": 6736.0,
+            "48": 6331.0,
+            "49": 6235.0,
+            "50": 6668.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 627718656.0,
+            "2": 627719168.0,
+            "3": 627719168.0,
+            "4": 627720704.0,
+            "5": 627718656.0,
+            "6": 627718656.0,
+            "7": 627718144.0,
+            "8": 627718144.0,
+            "9": 627718144.0,
+            "10": 627719168.0,
+            "11": 627719680.0,
+            "12": 627719168.0,
+            "13": 627719680.0,
+            "14": 627717120.0,
+            "15": 627720192.0,
+            "16": 627717632.0,
+            "17": 627718144.0,
+            "18": 627719680.0,
+            "19": 627719168.0,
+            "20": 627717120.0,
+            "21": 627718144.0,
+            "22": 627720192.0,
+            "23": 627720192.0,
+            "24": 627718144.0,
+            "25": 627718656.0,
+            "26": 627718144.0,
+            "27": 627717120.0,
+            "28": 627718656.0,
+            "29": 627717120.0,
+            "30": 627720192.0,
+            "31": 627715072.0,
+            "32": 627720192.0,
+            "33": 627717632.0,
+            "34": 627719168.0,
+            "35": 627716608.0,
+            "36": 627719168.0,
+            "37": 627718144.0,
+            "38": 627718656.0,
+            "39": 627715584.0,
+            "40": 627717632.0,
+            "41": 627714560.0,
+            "42": 627718144.0,
+            "43": 627713536.0,
+            "44": 627714048.0,
+            "45": 627719168.0,
+            "46": 627716096.0,
+            "47": 627717120.0,
+            "48": 627716608.0,
+            "49": 627715072.0,
+            "50": 627718144.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 870138880.0,
+            "2": 1099332096.0,
+            "3": 1099950080.0,
+            "4": 1102007296.0,
+            "5": 1102007296.0,
+            "6": 1102007296.0,
+            "7": 1102007296.0,
+            "8": 1102007296.0,
+            "9": 1102007296.0,
+            "10": 1102007296.0,
+            "11": 1102007296.0,
+            "12": 1102007296.0,
+            "13": 1103012352.0,
+            "14": 1103012352.0,
+            "15": 1103012352.0,
+            "16": 1103012352.0,
+            "17": 1103012352.0,
+            "18": 1103012352.0,
+            "19": 1103012352.0,
+            "20": 1103012352.0,
+            "21": 1103012352.0,
+            "22": 1103012352.0,
+            "23": 1103012352.0,
+            "24": 1103012352.0,
+            "25": 1103012352.0,
+            "26": 1103012352.0,
+            "27": 1103012352.0,
+            "28": 1103012352.0,
+            "29": 1103012352.0,
+            "30": 1103012352.0,
+            "31": 1103012352.0,
+            "32": 1103012352.0,
+            "33": 1103012352.0,
+            "34": 1103012352.0,
+            "35": 1103012352.0,
+            "36": 1103012352.0,
+            "37": 1103012352.0,
+            "38": 1103012352.0,
+            "39": 1103012352.0,
+            "40": 1103012352.0,
+            "41": 1103012352.0,
+            "42": 1103012352.0,
+            "43": 1103012352.0,
+            "44": 1103012352.0,
+            "45": 1103012352.0,
+            "46": 1103012352.0,
+            "47": 1103012352.0,
+            "48": 1103012352.0,
+            "49": 1103012352.0,
+            "50": 1103012352.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 20.31176,
+            "2": 0.54582,
+            "3": 0.4713,
+            "4": 0.49552,
+            "5": 0.45024,
+            "6": 0.44845,
+            "7": 0.46159,
+            "8": 0.44727,
+            "9": 0.45224,
+            "10": 0.44611,
+            "11": 0.44928,
+            "12": 0.4393,
+            "13": 0.44861,
+            "14": 0.43419,
+            "15": 0.46035,
+            "16": 0.44467,
+            "17": 0.44969,
+            "18": 0.45329,
+            "19": 0.45261,
+            "20": 0.47266,
+            "21": 0.44362,
+            "22": 0.44618,
+            "23": 0.44658,
+            "24": 0.44334,
+            "25": 0.45084,
+            "26": 0.4522,
+            "27": 0.44323,
+            "28": 0.44959,
+            "29": 0.44013,
+            "30": 0.44198,
+            "31": 0.44974,
+            "32": 0.44838,
+            "33": 0.4388,
+            "34": 0.46145,
+            "35": 0.4454,
+            "36": 0.43557,
+            "37": 0.43704,
+            "38": 0.45184,
+            "39": 0.43707,
+            "40": 0.43729,
+            "41": 0.44791,
+            "42": 0.44386,
+            "43": 0.44641,
+            "44": 0.43881,
+            "45": 0.45139,
+            "46": 0.46177,
+            "47": 0.46449,
+            "48": 0.44551,
+            "49": 0.47013,
+            "50": 0.44517
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..c9eee5d9463
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.81746,
+            "2": 10.82149,
+            "3": 10.82234,
+            "4": 10.79883,
+            "5": 10.84067,
+            "6": 10.85636,
+            "7": 10.81775,
+            "8": 10.81498,
+            "9": 10.83664,
+            "10": 10.7822,
+            "11": 10.85151,
+            "12": 10.84335,
+            "13": 10.85001,
+            "14": 10.87346,
+            "15": 10.80974,
+            "16": 10.80359,
+            "17": 10.75702,
+            "18": 10.80691,
+            "19": 10.78689,
+            "20": 10.73095,
+            "21": 10.70872,
+            "22": 10.57886,
+            "23": 10.71772,
+            "24": 10.63253,
+            "25": 10.57332,
+            "26": 10.62323,
+            "27": 10.63892,
+            "28": 10.60509,
+            "29": 10.61796,
+            "30": 10.42067,
+            "31": 10.18074,
+            "32": 10.50619,
+            "33": 10.50937,
+            "34": 10.27626,
+            "35": 10.3249,
+            "36": 10.29423,
+            "37": 10.40006,
+            "38": 10.26099,
+            "39": 10.44197,
+            "40": 10.1644,
+            "41": 10.2004,
+            "42": 10.26981,
+            "43": 9.93054,
+            "44": 10.04184,
+            "45": 9.9288,
+            "46": 9.89638,
+            "47": 10.18471,
+            "48": 9.93119,
+            "49": 9.62763,
+            "50": 9.98402
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 5082.0,
+            "2": 5274.0,
+            "3": 5447.0,
+            "4": 5269.0,
+            "5": 6020.0,
+            "6": 6160.0,
+            "7": 5592.0,
+            "8": 5309.0,
+            "9": 5743.0,
+            "10": 4800.0,
+            "11": 6186.0,
+            "12": 5648.0,
+            "13": 6106.0,
+            "14": 6126.0,
+            "15": 5600.0,
+            "16": 5819.0,
+            "17": 5669.0,
+            "18": 5547.0,
+            "19": 5711.0,
+            "20": 5380.0,
+            "21": 5677.0,
+            "22": 5023.0,
+            "23": 6080.0,
+            "24": 5403.0,
+            "25": 5120.0,
+            "26": 5431.0,
+            "27": 5866.0,
+            "28": 6035.0,
+            "29": 6154.0,
+            "30": 5456.0,
+            "31": 4832.0,
+            "32": 5956.0,
+            "33": 6301.0,
+            "34": 5366.0,
+            "35": 5900.0,
+            "36": 5703.0,
+            "37": 6744.0,
+            "38": 6098.0,
+            "39": 6737.0,
+            "40": 5994.0,
+            "41": 6144.0,
+            "42": 6542.0,
+            "43": 5751.0,
+            "44": 5876.0,
+            "45": 5795.0,
+            "46": 6162.0,
+            "47": 6736.0,
+            "48": 6331.0,
+            "49": 6235.0,
+            "50": 6668.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 627718656.0,
+            "2": 627719168.0,
+            "3": 627719168.0,
+            "4": 627720704.0,
+            "5": 627718656.0,
+            "6": 627718656.0,
+            "7": 627718144.0,
+            "8": 627718144.0,
+            "9": 627718144.0,
+            "10": 627719168.0,
+            "11": 627719680.0,
+            "12": 627719168.0,
+            "13": 627719680.0,
+            "14": 627717120.0,
+            "15": 627720192.0,
+            "16": 627717632.0,
+            "17": 627718144.0,
+            "18": 627719680.0,
+            "19": 627719168.0,
+            "20": 627717120.0,
+            "21": 627718144.0,
+            "22": 627720192.0,
+            "23": 627720192.0,
+            "24": 627718144.0,
+            "25": 627718656.0,
+            "26": 627718144.0,
+            "27": 627717120.0,
+            "28": 627718656.0,
+            "29": 627717120.0,
+            "30": 627720192.0,
+            "31": 627715072.0,
+            "32": 627720192.0,
+            "33": 627717632.0,
+            "34": 627719168.0,
+            "35": 627716608.0,
+            "36": 627719168.0,
+            "37": 627718144.0,
+            "38": 627718656.0,
+            "39": 627715584.0,
+            "40": 627717632.0,
+            "41": 627714560.0,
+            "42": 627718144.0,
+            "43": 627713536.0,
+            "44": 627714048.0,
+            "45": 627719168.0,
+            "46": 627716096.0,
+            "47": 627717120.0,
+            "48": 627716608.0,
+            "49": 627715072.0,
+            "50": 627718144.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 870138880.0,
+            "2": 1099332096.0,
+            "3": 1099950080.0,
+            "4": 1102007296.0,
+            "5": 1102007296.0,
+            "6": 1102007296.0,
+            "7": 1102007296.0,
+            "8": 1102007296.0,
+            "9": 1102007296.0,
+            "10": 1102007296.0,
+            "11": 1102007296.0,
+            "12": 1102007296.0,
+            "13": 1103012352.0,
+            "14": 1103012352.0,
+            "15": 1103012352.0,
+            "16": 1103012352.0,
+            "17": 1103012352.0,
+            "18": 1103012352.0,
+            "19": 1103012352.0,
+            "20": 1103012352.0,
+            "21": 1103012352.0,
+            "22": 1103012352.0,
+            "23": 1103012352.0,
+            "24": 1103012352.0,
+            "25": 1103012352.0,
+            "26": 1103012352.0,
+            "27": 1103012352.0,
+            "28": 1103012352.0,
+            "29": 1103012352.0,
+            "30": 1103012352.0,
+            "31": 1103012352.0,
+            "32": 1103012352.0,
+            "33": 1103012352.0,
+            "34": 1103012352.0,
+            "35": 1103012352.0,
+            "36": 1103012352.0,
+            "37": 1103012352.0,
+            "38": 1103012352.0,
+            "39": 1103012352.0,
+            "40": 1103012352.0,
+            "41": 1103012352.0,
+            "42": 1103012352.0,
+            "43": 1103012352.0,
+            "44": 1103012352.0,
+            "45": 1103012352.0,
+            "46": 1103012352.0,
+            "47": 1103012352.0,
+            "48": 1103012352.0,
+            "49": 1103012352.0,
+            "50": 1103012352.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 17.75731,
+            "2": 0.59137,
+            "3": 0.52847,
+            "4": 0.55398,
+            "5": 0.51736,
+            "6": 0.51707,
+            "7": 0.52895,
+            "8": 0.51861,
+            "9": 0.5181,
+            "10": 0.51717,
+            "11": 0.51445,
+            "12": 0.51129,
+            "13": 0.51494,
+            "14": 0.51037,
+            "15": 0.51828,
+            "16": 0.50983,
+            "17": 0.51156,
+            "18": 0.51029,
+            "19": 0.51087,
+            "20": 0.51452,
+            "21": 0.5039,
+            "22": 0.51296,
+            "23": 0.50822,
+            "24": 0.51693,
+            "25": 0.51087,
+            "26": 0.51188,
+            "27": 0.51138,
+            "28": 0.51374,
+            "29": 0.50808,
+            "30": 0.50936,
+            "31": 0.51301,
+            "32": 0.5132,
+            "33": 0.51,
+            "34": 0.51133,
+            "35": 0.51556,
+            "36": 0.51397,
+            "37": 0.51183,
+            "38": 0.51721,
+            "39": 0.50468,
+            "40": 0.50915,
+            "41": 0.51802,
+            "42": 0.51064,
+            "43": 0.51335,
+            "44": 0.50717,
+            "45": 0.51189,
+            "46": 0.52735,
+            "47": 0.52015,
+            "48": 0.50421,
+            "49": 0.5285,
+            "50": 0.50368
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgx_h100.json
index d8f66f8d26b..4918ee299d7 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgx_h100.json
@@ -2,91 +2,286 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 10.81746,
+            "2": 10.82149,
+            "3": 10.82234,
+            "4": 10.79883,
             "5": 10.84067,
+            "6": 10.85636,
+            "7": 10.81775,
+            "8": 10.81498,
+            "9": 10.83664,
             "10": 10.7822,
+            "11": 10.85151,
+            "12": 10.84335,
+            "13": 10.85001,
+            "14": 10.87346,
             "15": 10.80974,
+            "16": 10.80359,
+            "17": 10.75702,
+            "18": 10.80691,
+            "19": 10.78689,
             "20": 10.73095,
+            "21": 10.70872,
+            "22": 10.57886,
+            "23": 10.71772,
+            "24": 10.63253,
             "25": 10.57332,
+            "26": 10.62323,
+            "27": 10.63892,
+            "28": 10.60509,
+            "29": 10.61796,
             "30": 10.42067,
+            "31": 10.18074,
+            "32": 10.50619,
+            "33": 10.50937,
+            "34": 10.27626,
             "35": 10.3249,
+            "36": 10.29423,
+            "37": 10.40006,
+            "38": 10.26099,
+            "39": 10.44197,
             "40": 10.1644,
+            "41": 10.2004,
+            "42": 10.26981,
+            "43": 9.93054,
+            "44": 10.04184,
             "45": 9.9288,
+            "46": 9.89638,
+            "47": 10.18471,
+            "48": 9.93119,
+            "49": 9.62763,
             "50": 9.98402
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 5082.0,
+            "2": 5274.0,
+            "3": 5447.0,
+            "4": 5269.0,
             "5": 6020.0,
+            "6": 6160.0,
+            "7": 5592.0,
+            "8": 5309.0,
+            "9": 5743.0,
             "10": 4800.0,
+            "11": 6186.0,
+            "12": 5648.0,
+            "13": 6106.0,
+            "14": 6126.0,
             "15": 5600.0,
+            "16": 5819.0,
+            "17": 5669.0,
+            "18": 5547.0,
+            "19": 5711.0,
             "20": 5380.0,
+            "21": 5677.0,
+            "22": 5023.0,
+            "23": 6080.0,
+            "24": 5403.0,
             "25": 5120.0,
+            "26": 5431.0,
+            "27": 5866.0,
+            "28": 6035.0,
+            "29": 6154.0,
             "30": 5456.0,
+            "31": 4832.0,
+            "32": 5956.0,
+            "33": 6301.0,
+            "34": 5366.0,
             "35": 5900.0,
+            "36": 5703.0,
+            "37": 6744.0,
+            "38": 6098.0,
+            "39": 6737.0,
             "40": 5994.0,
+            "41": 6144.0,
+            "42": 6542.0,
+            "43": 5751.0,
+            "44": 5876.0,
             "45": 5795.0,
+            "46": 6162.0,
+            "47": 6736.0,
+            "48": 6331.0,
+            "49": 6235.0,
             "50": 6668.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 627718656.0,
+            "2": 627719168.0,
+            "3": 627719168.0,
+            "4": 627720704.0,
             "5": 627718656.0,
+            "6": 627718656.0,
+            "7": 627718144.0,
+            "8": 627718144.0,
+            "9": 627718144.0,
             "10": 627719168.0,
+            "11": 627719680.0,
+            "12": 627719168.0,
+            "13": 627719680.0,
+            "14": 627717120.0,
             "15": 627720192.0,
+            "16": 627717632.0,
+            "17": 627718144.0,
+            "18": 627719680.0,
+            "19": 627719168.0,
             "20": 627717120.0,
+            "21": 627718144.0,
+            "22": 627720192.0,
+            "23": 627720192.0,
+            "24": 627718144.0,
             "25": 627718656.0,
+            "26": 627718144.0,
+            "27": 627717120.0,
+            "28": 627718656.0,
+            "29": 627717120.0,
             "30": 627720192.0,
+            "31": 627715072.0,
+            "32": 627720192.0,
+            "33": 627717632.0,
+            "34": 627719168.0,
             "35": 627716608.0,
+            "36": 627719168.0,
+            "37": 627718144.0,
+            "38": 627718656.0,
+            "39": 627715584.0,
             "40": 627717632.0,
+            "41": 627714560.0,
+            "42": 627718144.0,
+            "43": 627713536.0,
+            "44": 627714048.0,
             "45": 627719168.0,
+            "46": 627716096.0,
+            "47": 627717120.0,
+            "48": 627716608.0,
+            "49": 627715072.0,
             "50": 627718144.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 870138880.0,
+            "2": 1099332096.0,
+            "3": 1099950080.0,
+            "4": 1102007296.0,
             "5": 1102007296.0,
+            "6": 1102007296.0,
+            "7": 1102007296.0,
+            "8": 1102007296.0,
+            "9": 1102007296.0,
             "10": 1102007296.0,
+            "11": 1102007296.0,
+            "12": 1102007296.0,
+            "13": 1103012352.0,
+            "14": 1103012352.0,
             "15": 1103012352.0,
+            "16": 1103012352.0,
+            "17": 1103012352.0,
+            "18": 1103012352.0,
+            "19": 1103012352.0,
             "20": 1103012352.0,
+            "21": 1103012352.0,
+            "22": 1103012352.0,
+            "23": 1103012352.0,
+            "24": 1103012352.0,
             "25": 1103012352.0,
+            "26": 1103012352.0,
+            "27": 1103012352.0,
+            "28": 1103012352.0,
+            "29": 1103012352.0,
             "30": 1103012352.0,
+            "31": 1103012352.0,
+            "32": 1103012352.0,
+            "33": 1103012352.0,
+            "34": 1103012352.0,
             "35": 1103012352.0,
+            "36": 1103012352.0,
+            "37": 1103012352.0,
+            "38": 1103012352.0,
+            "39": 1103012352.0,
             "40": 1103012352.0,
+            "41": 1103012352.0,
+            "42": 1103012352.0,
+            "43": 1103012352.0,
+            "44": 1103012352.0,
             "45": 1103012352.0,
+            "46": 1103012352.0,
+            "47": 1103012352.0,
+            "48": 1103012352.0,
+            "49": 1103012352.0,
             "50": 1103012352.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 20.08249,
-            "5": 0.4425,
-            "10": 0.44364,
-            "15": 0.4517,
-            "20": 0.45348,
-            "25": 0.44927,
-            "30": 0.44258,
-            "35": 0.45719,
-            "40": 0.44034,
-            "45": 0.45039,
-            "50": 0.45412
+            "1": 17.91075,
+            "2": 0.58262,
+            "3": 0.51891,
+            "4": 0.5535,
+            "5": 0.50364,
+            "6": 0.50993,
+            "7": 0.51644,
+            "8": 0.5062,
+            "9": 0.50479,
+            "10": 0.50352,
+            "11": 0.50142,
+            "12": 0.50105,
+            "13": 0.50984,
+            "14": 0.49899,
+            "15": 0.5144,
+            "16": 0.49725,
+            "17": 0.50222,
+            "18": 0.50011,
+            "19": 0.50584,
+            "20": 0.502,
+            "21": 0.49935,
+            "22": 0.51276,
+            "23": 0.50351,
+            "24": 0.50235,
+            "25": 0.49997,
+            "26": 0.50146,
+            "27": 0.49644,
+            "28": 0.49951,
+            "29": 0.49788,
+            "30": 0.50224,
+            "31": 0.50481,
+            "32": 0.50353,
+            "33": 0.50198,
+            "34": 0.50088,
+            "35": 0.50994,
+            "36": 0.49922,
+            "37": 0.49884,
+            "38": 0.51305,
+            "39": 0.49951,
+            "40": 0.49857,
+            "41": 0.5133,
+            "42": 0.50758,
+            "43": 0.51002,
+            "44": 0.50205,
+            "45": 0.51091,
+            "46": 0.52453,
+            "47": 0.52953,
+            "48": 0.50437,
+            "49": 0.52951,
+            "50": 0.50206
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgxa100_dracooci-ord.json
new file mode 100644
index 00000000000..7d5050e9ca8
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgxa100_dracooci-ord.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.82196,
+            "2": 10.84132,
+            "3": 10.81128,
+            "4": 10.82231,
+            "5": 10.84518,
+            "6": 10.8626,
+            "7": 10.84391,
+            "8": 10.84701,
+            "9": 10.84948,
+            "10": 10.78921,
+            "11": 10.85726,
+            "12": 10.84459,
+            "13": 10.87146,
+            "14": 10.87456,
+            "15": 10.8336,
+            "16": 10.80914,
+            "17": 10.79111,
+            "18": 10.81065,
+            "19": 10.80588,
+            "20": 10.73505,
+            "21": 10.71444,
+            "22": 10.57729,
+            "23": 10.72656,
+            "24": 10.61835,
+            "25": 10.58138,
+            "26": 10.63781,
+            "27": 10.63741,
+            "28": 10.60575,
+            "29": 10.61061,
+            "30": 10.40958,
+            "31": 10.16916,
+            "32": 10.49914,
+            "33": 10.49662,
+            "34": 10.26146,
+            "35": 10.31467,
+            "36": 10.28534,
+            "37": 10.38868,
+            "38": 10.24742,
+            "39": 10.43812,
+            "40": 10.14618,
+            "41": 10.19703,
+            "42": 10.26135,
+            "43": 9.9103,
+            "44": 10.02321,
+            "45": 9.91713,
+            "46": 9.89492,
+            "47": 10.19337,
+            "48": 9.93091,
+            "49": 9.61227,
+            "50": 9.97428
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 4960.0,
+            "2": 5301.0,
+            "3": 5425.0,
+            "4": 5034.0,
+            "5": 6022.0,
+            "6": 6072.0,
+            "7": 5250.0,
+            "8": 5157.0,
+            "9": 5645.0,
+            "10": 4813.0,
+            "11": 6049.0,
+            "12": 5580.0,
+            "13": 5963.0,
+            "14": 5902.0,
+            "15": 5586.0,
+            "16": 5890.0,
+            "17": 5611.0,
+            "18": 5514.0,
+            "19": 5628.0,
+            "20": 5068.0,
+            "21": 5603.0,
+            "22": 5087.0,
+            "23": 6008.0,
+            "24": 5364.0,
+            "25": 4868.0,
+            "26": 5594.0,
+            "27": 5626.0,
+            "28": 5973.0,
+            "29": 6225.0,
+            "30": 5528.0,
+            "31": 4650.0,
+            "32": 5940.0,
+            "33": 6315.0,
+            "34": 5284.0,
+            "35": 5700.0,
+            "36": 5633.0,
+            "37": 6648.0,
+            "38": 6194.0,
+            "39": 6933.0,
+            "40": 6137.0,
+            "41": 6314.0,
+            "42": 6416.0,
+            "43": 5714.0,
+            "44": 5892.0,
+            "45": 6030.0,
+            "46": 6086.0,
+            "47": 6881.0,
+            "48": 6386.0,
+            "49": 6242.0,
+            "50": 6652.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 598359040.0,
+            "2": 598359040.0,
+            "3": 598359040.0,
+            "4": 598359552.0,
+            "5": 598358016.0,
+            "6": 598358016.0,
+            "7": 598355456.0,
+            "8": 598359552.0,
+            "9": 598356480.0,
+            "10": 598356992.0,
+            "11": 598358016.0,
+            "12": 598359040.0,
+            "13": 598359040.0,
+            "14": 598358528.0,
+            "15": 598359040.0,
+            "16": 598358528.0,
+            "17": 598353408.0,
+            "18": 598358016.0,
+            "19": 598359040.0,
+            "20": 598357504.0,
+            "21": 598359040.0,
+            "22": 598354432.0,
+            "23": 598355968.0,
+            "24": 598356480.0,
+            "25": 598357504.0,
+            "26": 598356480.0,
+            "27": 598360064.0,
+            "28": 598358016.0,
+            "29": 598355456.0,
+            "30": 598358528.0,
+            "31": 598356480.0,
+            "32": 598356992.0,
+            "33": 598359552.0,
+            "34": 598358016.0,
+            "35": 598356480.0,
+            "36": 598358016.0,
+            "37": 598359040.0,
+            "38": 598358016.0,
+            "39": 598357504.0,
+            "40": 598357504.0,
+            "41": 598351872.0,
+            "42": 598358528.0,
+            "43": 598352896.0,
+            "44": 598354944.0,
+            "45": 598355968.0,
+            "46": 598351872.0,
+            "47": 598359040.0,
+            "48": 598354944.0,
+            "49": 598353408.0,
+            "50": 598358016.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 842904576.0,
+            "2": 1072649216.0,
+            "3": 1072649216.0,
+            "4": 1072649216.0,
+            "5": 1072649216.0,
+            "6": 1072649216.0,
+            "7": 1072649216.0,
+            "8": 1072649216.0,
+            "9": 1072649216.0,
+            "10": 1072649216.0,
+            "11": 1072649216.0,
+            "12": 1072649216.0,
+            "13": 1072649216.0,
+            "14": 1072709632.0,
+            "15": 1072709632.0,
+            "16": 1073532416.0,
+            "17": 1073532416.0,
+            "18": 1073532416.0,
+            "19": 1073532416.0,
+            "20": 1073532416.0,
+            "21": 1073532416.0,
+            "22": 1073532416.0,
+            "23": 1073532416.0,
+            "24": 1073532416.0,
+            "25": 1073532416.0,
+            "26": 1073532416.0,
+            "27": 1073532416.0,
+            "28": 1073532416.0,
+            "29": 1073532416.0,
+            "30": 1073532416.0,
+            "31": 1073532416.0,
+            "32": 1073532416.0,
+            "33": 1073532416.0,
+            "34": 1073532416.0,
+            "35": 1073532416.0,
+            "36": 1073532416.0,
+            "37": 1073532416.0,
+            "38": 1073532416.0,
+            "39": 1073532416.0,
+            "40": 1073532416.0,
+            "41": 1073532416.0,
+            "42": 1073532416.0,
+            "43": 1073532416.0,
+            "44": 1073532416.0,
+            "45": 1073532416.0,
+            "46": 1073532416.0,
+            "47": 1073532416.0,
+            "48": 1073532416.0,
+            "49": 1073532416.0,
+            "50": 1073532416.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.25563,
+            "2": 0.84048,
+            "3": 0.76934,
+            "4": 0.70267,
+            "5": 0.70067,
+            "6": 0.73137,
+            "7": 0.70039,
+            "8": 0.69557,
+            "9": 0.69658,
+            "10": 0.69913,
+            "11": 0.69847,
+            "12": 0.70123,
+            "13": 0.69803,
+            "14": 0.74546,
+            "15": 0.69706,
+            "16": 0.69684,
+            "17": 0.69413,
+            "18": 0.6926,
+            "19": 0.69376,
+            "20": 0.69387,
+            "21": 0.69326,
+            "22": 0.78586,
+            "23": 0.72599,
+            "24": 0.72235,
+            "25": 0.72284,
+            "26": 0.69513,
+            "27": 0.69273,
+            "28": 0.69235,
+            "29": 0.69264,
+            "30": 0.69356,
+            "31": 0.6931,
+            "32": 0.69432,
+            "33": 0.69145,
+            "34": 0.69259,
+            "35": 0.69173,
+            "36": 0.69116,
+            "37": 0.69404,
+            "38": 0.69316,
+            "39": 0.69303,
+            "40": 0.6953,
+            "41": 0.6947,
+            "42": 0.69578,
+            "43": 0.69462,
+            "44": 0.69287,
+            "45": 0.69391,
+            "46": 0.69672,
+            "47": 0.69316,
+            "48": 0.69498,
+            "49": 0.70272,
+            "50": 0.688
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgxa100_dracooci.json b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgxa100_dracooci.json
new file mode 100644
index 00000000000..9b45d0fd625
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgxa100_dracooci.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.82196,
+            "2": 10.84132,
+            "3": 10.81128,
+            "4": 10.82231,
+            "5": 10.84518,
+            "6": 10.8626,
+            "7": 10.84391,
+            "8": 10.84701,
+            "9": 10.84948,
+            "10": 10.78921,
+            "11": 10.85726,
+            "12": 10.84459,
+            "13": 10.87146,
+            "14": 10.87456,
+            "15": 10.8336,
+            "16": 10.80914,
+            "17": 10.79111,
+            "18": 10.81065,
+            "19": 10.80588,
+            "20": 10.73505,
+            "21": 10.71444,
+            "22": 10.57729,
+            "23": 10.72656,
+            "24": 10.61835,
+            "25": 10.58138,
+            "26": 10.63781,
+            "27": 10.63741,
+            "28": 10.60575,
+            "29": 10.61061,
+            "30": 10.40958,
+            "31": 10.16916,
+            "32": 10.49914,
+            "33": 10.49662,
+            "34": 10.26146,
+            "35": 10.31467,
+            "36": 10.28534,
+            "37": 10.38868,
+            "38": 10.24742,
+            "39": 10.43812,
+            "40": 10.14618,
+            "41": 10.19703,
+            "42": 10.26135,
+            "43": 9.9103,
+            "44": 10.02321,
+            "45": 9.91713,
+            "46": 9.89492,
+            "47": 10.19337,
+            "48": 9.93091,
+            "49": 9.61227,
+            "50": 9.97428
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 4960.0,
+            "2": 5301.0,
+            "3": 5425.0,
+            "4": 5034.0,
+            "5": 6022.0,
+            "6": 6072.0,
+            "7": 5250.0,
+            "8": 5157.0,
+            "9": 5645.0,
+            "10": 4813.0,
+            "11": 6049.0,
+            "12": 5580.0,
+            "13": 5963.0,
+            "14": 5902.0,
+            "15": 5586.0,
+            "16": 5890.0,
+            "17": 5611.0,
+            "18": 5514.0,
+            "19": 5628.0,
+            "20": 5068.0,
+            "21": 5603.0,
+            "22": 5087.0,
+            "23": 6008.0,
+            "24": 5364.0,
+            "25": 4868.0,
+            "26": 5594.0,
+            "27": 5626.0,
+            "28": 5973.0,
+            "29": 6225.0,
+            "30": 5528.0,
+            "31": 4650.0,
+            "32": 5940.0,
+            "33": 6315.0,
+            "34": 5284.0,
+            "35": 5700.0,
+            "36": 5633.0,
+            "37": 6648.0,
+            "38": 6194.0,
+            "39": 6933.0,
+            "40": 6137.0,
+            "41": 6314.0,
+            "42": 6416.0,
+            "43": 5714.0,
+            "44": 5892.0,
+            "45": 6030.0,
+            "46": 6086.0,
+            "47": 6881.0,
+            "48": 6386.0,
+            "49": 6242.0,
+            "50": 6652.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 598359040.0,
+            "2": 598359040.0,
+            "3": 598359040.0,
+            "4": 598359552.0,
+            "5": 598358016.0,
+            "6": 598358016.0,
+            "7": 598355456.0,
+            "8": 598359552.0,
+            "9": 598356480.0,
+            "10": 598356992.0,
+            "11": 598358016.0,
+            "12": 598359040.0,
+            "13": 598359040.0,
+            "14": 598358528.0,
+            "15": 598359040.0,
+            "16": 598358528.0,
+            "17": 598353408.0,
+            "18": 598358016.0,
+            "19": 598359040.0,
+            "20": 598357504.0,
+            "21": 598359040.0,
+            "22": 598354432.0,
+            "23": 598355968.0,
+            "24": 598356480.0,
+            "25": 598357504.0,
+            "26": 598356480.0,
+            "27": 598360064.0,
+            "28": 598358016.0,
+            "29": 598355456.0,
+            "30": 598358528.0,
+            "31": 598356480.0,
+            "32": 598356992.0,
+            "33": 598359552.0,
+            "34": 598358016.0,
+            "35": 598356480.0,
+            "36": 598358016.0,
+            "37": 598359040.0,
+            "38": 598358016.0,
+            "39": 598357504.0,
+            "40": 598357504.0,
+            "41": 598351872.0,
+            "42": 598358528.0,
+            "43": 598352896.0,
+            "44": 598354944.0,
+            "45": 598355968.0,
+            "46": 598351872.0,
+            "47": 598359040.0,
+            "48": 598354944.0,
+            "49": 598353408.0,
+            "50": 598358016.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 842904576.0,
+            "2": 1072649216.0,
+            "3": 1072649216.0,
+            "4": 1072649216.0,
+            "5": 1072649216.0,
+            "6": 1072649216.0,
+            "7": 1072649216.0,
+            "8": 1072649216.0,
+            "9": 1072649216.0,
+            "10": 1072649216.0,
+            "11": 1072649216.0,
+            "12": 1072649216.0,
+            "13": 1072649216.0,
+            "14": 1072709632.0,
+            "15": 1072709632.0,
+            "16": 1073532416.0,
+            "17": 1073532416.0,
+            "18": 1073532416.0,
+            "19": 1073532416.0,
+            "20": 1073532416.0,
+            "21": 1073532416.0,
+            "22": 1073532416.0,
+            "23": 1073532416.0,
+            "24": 1073532416.0,
+            "25": 1073532416.0,
+            "26": 1073532416.0,
+            "27": 1073532416.0,
+            "28": 1073532416.0,
+            "29": 1073532416.0,
+            "30": 1073532416.0,
+            "31": 1073532416.0,
+            "32": 1073532416.0,
+            "33": 1073532416.0,
+            "34": 1073532416.0,
+            "35": 1073532416.0,
+            "36": 1073532416.0,
+            "37": 1073532416.0,
+            "38": 1073532416.0,
+            "39": 1073532416.0,
+            "40": 1073532416.0,
+            "41": 1073532416.0,
+            "42": 1073532416.0,
+            "43": 1073532416.0,
+            "44": 1073532416.0,
+            "45": 1073532416.0,
+            "46": 1073532416.0,
+            "47": 1073532416.0,
+            "48": 1073532416.0,
+            "49": 1073532416.0,
+            "50": 1073532416.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 17.19224,
+            "2": 0.80625,
+            "3": 0.70873,
+            "4": 0.71373,
+            "5": 0.75099,
+            "6": 0.7011,
+            "7": 0.70052,
+            "8": 0.70566,
+            "9": 0.71562,
+            "10": 0.72846,
+            "11": 0.69613,
+            "12": 0.7157,
+            "13": 0.69994,
+            "14": 0.69612,
+            "15": 0.69543,
+            "16": 0.69411,
+            "17": 0.69454,
+            "18": 0.69705,
+            "19": 0.6969,
+            "20": 0.69948,
+            "21": 0.69454,
+            "22": 0.69425,
+            "23": 0.69428,
+            "24": 0.69194,
+            "25": 0.69013,
+            "26": 0.69277,
+            "27": 0.68916,
+            "28": 0.69161,
+            "29": 0.69773,
+            "30": 0.68894,
+            "31": 0.69363,
+            "32": 0.69912,
+            "33": 0.7057,
+            "34": 0.70009,
+            "35": 0.7044,
+            "36": 0.69831,
+            "37": 0.69777,
+            "38": 0.70193,
+            "39": 0.69786,
+            "40": 0.69142,
+            "41": 0.70011,
+            "42": 0.70081,
+            "43": 0.70081,
+            "44": 0.70437,
+            "45": 0.70168,
+            "46": 0.69713,
+            "47": 0.70166,
+            "48": 0.69823,
+            "49": 0.67973,
+            "50": 0.68287
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..f80469c23a2
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.81746,
+            "2": 10.82149,
+            "3": 10.82234,
+            "4": 10.79883,
+            "5": 10.84067,
+            "6": 10.85636,
+            "7": 10.81775,
+            "8": 10.81498,
+            "9": 10.83664,
+            "10": 10.7822,
+            "11": 10.85151,
+            "12": 10.84335,
+            "13": 10.85001,
+            "14": 10.87346,
+            "15": 10.80974,
+            "16": 10.80359,
+            "17": 10.75702,
+            "18": 10.80691,
+            "19": 10.78689,
+            "20": 10.73095,
+            "21": 10.70872,
+            "22": 10.57886,
+            "23": 10.71772,
+            "24": 10.63253,
+            "25": 10.57332,
+            "26": 10.62323,
+            "27": 10.63892,
+            "28": 10.60509,
+            "29": 10.61796,
+            "30": 10.42067,
+            "31": 10.18074,
+            "32": 10.50619,
+            "33": 10.50937,
+            "34": 10.27626,
+            "35": 10.3249,
+            "36": 10.29423,
+            "37": 10.40006,
+            "38": 10.26099,
+            "39": 10.44197,
+            "40": 10.1644,
+            "41": 10.2004,
+            "42": 10.26981,
+            "43": 9.93054,
+            "44": 10.04184,
+            "45": 9.9288,
+            "46": 9.89638,
+            "47": 10.18471,
+            "48": 9.93119,
+            "49": 9.62763,
+            "50": 9.98402
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 5082.0,
+            "2": 5274.0,
+            "3": 5447.0,
+            "4": 5269.0,
+            "5": 6020.0,
+            "6": 6160.0,
+            "7": 5592.0,
+            "8": 5309.0,
+            "9": 5743.0,
+            "10": 4800.0,
+            "11": 6186.0,
+            "12": 5648.0,
+            "13": 6106.0,
+            "14": 6126.0,
+            "15": 5600.0,
+            "16": 5819.0,
+            "17": 5669.0,
+            "18": 5547.0,
+            "19": 5711.0,
+            "20": 5380.0,
+            "21": 5677.0,
+            "22": 5023.0,
+            "23": 6080.0,
+            "24": 5403.0,
+            "25": 5120.0,
+            "26": 5431.0,
+            "27": 5866.0,
+            "28": 6035.0,
+            "29": 6154.0,
+            "30": 5456.0,
+            "31": 4832.0,
+            "32": 5956.0,
+            "33": 6301.0,
+            "34": 5366.0,
+            "35": 5900.0,
+            "36": 5703.0,
+            "37": 6744.0,
+            "38": 6098.0,
+            "39": 6737.0,
+            "40": 5994.0,
+            "41": 6144.0,
+            "42": 6542.0,
+            "43": 5751.0,
+            "44": 5876.0,
+            "45": 5795.0,
+            "46": 6162.0,
+            "47": 6736.0,
+            "48": 6331.0,
+            "49": 6235.0,
+            "50": 6668.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 627718656.0,
+            "2": 627719168.0,
+            "3": 627719168.0,
+            "4": 627720704.0,
+            "5": 627718656.0,
+            "6": 627718656.0,
+            "7": 627718144.0,
+            "8": 627718144.0,
+            "9": 627718144.0,
+            "10": 627719168.0,
+            "11": 627719680.0,
+            "12": 627719168.0,
+            "13": 627719680.0,
+            "14": 627717120.0,
+            "15": 627720192.0,
+            "16": 627717632.0,
+            "17": 627718144.0,
+            "18": 627719680.0,
+            "19": 627719168.0,
+            "20": 627717120.0,
+            "21": 627718144.0,
+            "22": 627720192.0,
+            "23": 627720192.0,
+            "24": 627718144.0,
+            "25": 627718656.0,
+            "26": 627718144.0,
+            "27": 627717120.0,
+            "28": 627718656.0,
+            "29": 627717120.0,
+            "30": 627720192.0,
+            "31": 627715072.0,
+            "32": 627720192.0,
+            "33": 627717632.0,
+            "34": 627719168.0,
+            "35": 627716608.0,
+            "36": 627719168.0,
+            "37": 627718144.0,
+            "38": 627718656.0,
+            "39": 627715584.0,
+            "40": 627717632.0,
+            "41": 627714560.0,
+            "42": 627718144.0,
+            "43": 627713536.0,
+            "44": 627714048.0,
+            "45": 627719168.0,
+            "46": 627716096.0,
+            "47": 627717120.0,
+            "48": 627716608.0,
+            "49": 627715072.0,
+            "50": 627718144.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 870138880.0,
+            "2": 1099332096.0,
+            "3": 1099950080.0,
+            "4": 1102007296.0,
+            "5": 1102007296.0,
+            "6": 1102007296.0,
+            "7": 1102007296.0,
+            "8": 1102007296.0,
+            "9": 1102007296.0,
+            "10": 1102007296.0,
+            "11": 1102007296.0,
+            "12": 1102007296.0,
+            "13": 1103012352.0,
+            "14": 1103012352.0,
+            "15": 1103012352.0,
+            "16": 1103012352.0,
+            "17": 1103012352.0,
+            "18": 1103012352.0,
+            "19": 1103012352.0,
+            "20": 1103012352.0,
+            "21": 1103012352.0,
+            "22": 1103012352.0,
+            "23": 1103012352.0,
+            "24": 1103012352.0,
+            "25": 1103012352.0,
+            "26": 1103012352.0,
+            "27": 1103012352.0,
+            "28": 1103012352.0,
+            "29": 1103012352.0,
+            "30": 1103012352.0,
+            "31": 1103012352.0,
+            "32": 1103012352.0,
+            "33": 1103012352.0,
+            "34": 1103012352.0,
+            "35": 1103012352.0,
+            "36": 1103012352.0,
+            "37": 1103012352.0,
+            "38": 1103012352.0,
+            "39": 1103012352.0,
+            "40": 1103012352.0,
+            "41": 1103012352.0,
+            "42": 1103012352.0,
+            "43": 1103012352.0,
+            "44": 1103012352.0,
+            "45": 1103012352.0,
+            "46": 1103012352.0,
+            "47": 1103012352.0,
+            "48": 1103012352.0,
+            "49": 1103012352.0,
+            "50": 1103012352.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 19.72199,
+            "2": 0.55482,
+            "3": 0.46042,
+            "4": 0.48082,
+            "5": 0.43967,
+            "6": 0.44947,
+            "7": 0.44996,
+            "8": 0.44231,
+            "9": 0.44422,
+            "10": 0.44437,
+            "11": 0.44012,
+            "12": 0.43933,
+            "13": 0.44783,
+            "14": 0.43652,
+            "15": 0.44961,
+            "16": 0.43438,
+            "17": 0.44393,
+            "18": 0.43947,
+            "19": 0.44737,
+            "20": 0.44146,
+            "21": 0.43755,
+            "22": 0.44263,
+            "23": 0.43321,
+            "24": 0.43572,
+            "25": 0.43146,
+            "26": 0.43427,
+            "27": 0.43127,
+            "28": 0.43972,
+            "29": 0.43162,
+            "30": 0.51076,
+            "31": 0.4451,
+            "32": 0.4416,
+            "33": 0.45169,
+            "34": 0.43371,
+            "35": 0.44399,
+            "36": 0.42875,
+            "37": 0.44051,
+            "38": 0.45464,
+            "39": 0.43269,
+            "40": 0.43351,
+            "41": 0.4407,
+            "42": 0.4495,
+            "43": 0.44929,
+            "44": 0.44083,
+            "45": 0.45508,
+            "46": 0.46229,
+            "47": 0.4728,
+            "48": 0.43019,
+            "49": 0.45756,
+            "50": 0.43145
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..25a8b5ae572
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.81746,
+            "2": 10.82149,
+            "3": 10.82234,
+            "4": 10.79883,
+            "5": 10.84067,
+            "6": 10.85636,
+            "7": 10.81775,
+            "8": 10.81498,
+            "9": 10.83664,
+            "10": 10.7822,
+            "11": 10.85151,
+            "12": 10.84335,
+            "13": 10.85001,
+            "14": 10.87346,
+            "15": 10.80974,
+            "16": 10.80359,
+            "17": 10.75702,
+            "18": 10.80691,
+            "19": 10.78689,
+            "20": 10.73095,
+            "21": 10.70872,
+            "22": 10.57886,
+            "23": 10.71772,
+            "24": 10.63253,
+            "25": 10.57332,
+            "26": 10.62323,
+            "27": 10.63892,
+            "28": 10.60509,
+            "29": 10.61796,
+            "30": 10.42067,
+            "31": 10.18074,
+            "32": 10.50619,
+            "33": 10.50937,
+            "34": 10.27626,
+            "35": 10.3249,
+            "36": 10.29423,
+            "37": 10.40006,
+            "38": 10.26099,
+            "39": 10.44197,
+            "40": 10.1644,
+            "41": 10.2004,
+            "42": 10.26981,
+            "43": 9.93054,
+            "44": 10.04184,
+            "45": 9.9288,
+            "46": 9.89638,
+            "47": 10.18471,
+            "48": 9.93119,
+            "49": 9.62763,
+            "50": 9.98402
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 5082.0,
+            "2": 5274.0,
+            "3": 5447.0,
+            "4": 5269.0,
+            "5": 6020.0,
+            "6": 6160.0,
+            "7": 5592.0,
+            "8": 5309.0,
+            "9": 5743.0,
+            "10": 4800.0,
+            "11": 6186.0,
+            "12": 5648.0,
+            "13": 6106.0,
+            "14": 6126.0,
+            "15": 5600.0,
+            "16": 5819.0,
+            "17": 5669.0,
+            "18": 5547.0,
+            "19": 5711.0,
+            "20": 5380.0,
+            "21": 5677.0,
+            "22": 5023.0,
+            "23": 6080.0,
+            "24": 5403.0,
+            "25": 5120.0,
+            "26": 5431.0,
+            "27": 5866.0,
+            "28": 6035.0,
+            "29": 6154.0,
+            "30": 5456.0,
+            "31": 4832.0,
+            "32": 5956.0,
+            "33": 6301.0,
+            "34": 5366.0,
+            "35": 5900.0,
+            "36": 5703.0,
+            "37": 6744.0,
+            "38": 6098.0,
+            "39": 6737.0,
+            "40": 5994.0,
+            "41": 6144.0,
+            "42": 6542.0,
+            "43": 5751.0,
+            "44": 5876.0,
+            "45": 5795.0,
+            "46": 6162.0,
+            "47": 6736.0,
+            "48": 6331.0,
+            "49": 6235.0,
+            "50": 6668.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 627718656.0,
+            "2": 627719168.0,
+            "3": 627719168.0,
+            "4": 627720704.0,
+            "5": 627718656.0,
+            "6": 627718656.0,
+            "7": 627718144.0,
+            "8": 627718144.0,
+            "9": 627718144.0,
+            "10": 627719168.0,
+            "11": 627719680.0,
+            "12": 627719168.0,
+            "13": 627719680.0,
+            "14": 627717120.0,
+            "15": 627720192.0,
+            "16": 627717632.0,
+            "17": 627718144.0,
+            "18": 627719680.0,
+            "19": 627719168.0,
+            "20": 627717120.0,
+            "21": 627718144.0,
+            "22": 627720192.0,
+            "23": 627720192.0,
+            "24": 627718144.0,
+            "25": 627718656.0,
+            "26": 627718144.0,
+            "27": 627717120.0,
+            "28": 627718656.0,
+            "29": 627717120.0,
+            "30": 627720192.0,
+            "31": 627715072.0,
+            "32": 627720192.0,
+            "33": 627717632.0,
+            "34": 627719168.0,
+            "35": 627716608.0,
+            "36": 627719168.0,
+            "37": 627718144.0,
+            "38": 627718656.0,
+            "39": 627715584.0,
+            "40": 627717632.0,
+            "41": 627714560.0,
+            "42": 627718144.0,
+            "43": 627713536.0,
+            "44": 627714048.0,
+            "45": 627719168.0,
+            "46": 627716096.0,
+            "47": 627717120.0,
+            "48": 627716608.0,
+            "49": 627715072.0,
+            "50": 627718144.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 870138880.0,
+            "2": 1099332096.0,
+            "3": 1099950080.0,
+            "4": 1102007296.0,
+            "5": 1102007296.0,
+            "6": 1102007296.0,
+            "7": 1102007296.0,
+            "8": 1102007296.0,
+            "9": 1102007296.0,
+            "10": 1102007296.0,
+            "11": 1102007296.0,
+            "12": 1102007296.0,
+            "13": 1103012352.0,
+            "14": 1103012352.0,
+            "15": 1103012352.0,
+            "16": 1103012352.0,
+            "17": 1103012352.0,
+            "18": 1103012352.0,
+            "19": 1103012352.0,
+            "20": 1103012352.0,
+            "21": 1103012352.0,
+            "22": 1103012352.0,
+            "23": 1103012352.0,
+            "24": 1103012352.0,
+            "25": 1103012352.0,
+            "26": 1103012352.0,
+            "27": 1103012352.0,
+            "28": 1103012352.0,
+            "29": 1103012352.0,
+            "30": 1103012352.0,
+            "31": 1103012352.0,
+            "32": 1103012352.0,
+            "33": 1103012352.0,
+            "34": 1103012352.0,
+            "35": 1103012352.0,
+            "36": 1103012352.0,
+            "37": 1103012352.0,
+            "38": 1103012352.0,
+            "39": 1103012352.0,
+            "40": 1103012352.0,
+            "41": 1103012352.0,
+            "42": 1103012352.0,
+            "43": 1103012352.0,
+            "44": 1103012352.0,
+            "45": 1103012352.0,
+            "46": 1103012352.0,
+            "47": 1103012352.0,
+            "48": 1103012352.0,
+            "49": 1103012352.0,
+            "50": 1103012352.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 17.91902,
+            "2": 0.59117,
+            "3": 0.52614,
+            "4": 0.54746,
+            "5": 0.5056,
+            "6": 0.50649,
+            "7": 0.52305,
+            "8": 0.50853,
+            "9": 0.50644,
+            "10": 0.50303,
+            "11": 0.50387,
+            "12": 0.50249,
+            "13": 0.51153,
+            "14": 0.49861,
+            "15": 0.51318,
+            "16": 0.50066,
+            "17": 0.50888,
+            "18": 0.50788,
+            "19": 0.51533,
+            "20": 0.51425,
+            "21": 0.51111,
+            "22": 0.5116,
+            "23": 0.50626,
+            "24": 0.5049,
+            "25": 0.51101,
+            "26": 0.50993,
+            "27": 0.5073,
+            "28": 0.50949,
+            "29": 0.50784,
+            "30": 0.50783,
+            "31": 0.51255,
+            "32": 0.51065,
+            "33": 0.50731,
+            "34": 0.50768,
+            "35": 0.51749,
+            "36": 0.50656,
+            "37": 0.51012,
+            "38": 0.51668,
+            "39": 0.50475,
+            "40": 0.50784,
+            "41": 0.51405,
+            "42": 0.51014,
+            "43": 0.51186,
+            "44": 0.50532,
+            "45": 0.51211,
+            "46": 0.52864,
+            "47": 0.52545,
+            "48": 0.50927,
+            "49": 0.52883,
+            "50": 0.50373
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgx_h100.json
index 9010e3064a4..90c75c99e13 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgx_h100.json
@@ -2,91 +2,286 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 10.80475,
+            "2": 10.821,
+            "3": 10.8216,
+            "4": 10.79306,
             "5": 10.84831,
+            "6": 10.85888,
+            "7": 10.83177,
+            "8": 10.82362,
+            "9": 10.83757,
             "10": 10.78732,
+            "11": 10.86732,
+            "12": 10.85395,
+            "13": 10.86171,
+            "14": 10.88343,
             "15": 10.79765,
+            "16": 10.79986,
+            "17": 10.76238,
+            "18": 10.80286,
+            "19": 10.7945,
             "20": 10.71733,
+            "21": 10.70194,
+            "22": 10.55147,
+            "23": 10.72167,
+            "24": 10.60698,
             "25": 10.54614,
+            "26": 10.6136,
+            "27": 10.63974,
+            "28": 10.60486,
+            "29": 10.62277,
             "30": 10.41109,
+            "31": 10.1456,
+            "32": 10.51017,
+            "33": 10.50089,
+            "34": 10.25812,
             "35": 10.3154,
+            "36": 10.27895,
+            "37": 10.41061,
+            "38": 10.25908,
+            "39": 10.45334,
             "40": 10.1604,
+            "41": 10.20557,
+            "42": 10.26792,
+            "43": 9.90468,
+            "44": 10.03233,
             "45": 9.91098,
+            "46": 9.87857,
+            "47": 10.20952,
+            "48": 9.93178,
+            "49": 9.61584,
             "50": 9.98565
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 5474.0,
+            "2": 5853.0,
+            "3": 5875.0,
+            "4": 6041.0,
             "5": 6601.0,
+            "6": 6654.0,
+            "7": 6135.0,
+            "8": 5761.0,
+            "9": 6505.0,
             "10": 5497.0,
+            "11": 6994.0,
+            "12": 6523.0,
+            "13": 6807.0,
+            "14": 6969.0,
             "15": 6154.0,
+            "16": 6667.0,
+            "17": 6368.0,
+            "18": 6298.0,
+            "19": 6353.0,
             "20": 5998.0,
+            "21": 6264.0,
+            "22": 5628.0,
+            "23": 6620.0,
+            "24": 6063.0,
             "25": 5649.0,
+            "26": 6226.0,
+            "27": 6409.0,
+            "28": 6790.0,
+            "29": 7055.0,
             "30": 6430.0,
+            "31": 5565.0,
+            "32": 6615.0,
+            "33": 6969.0,
+            "34": 6107.0,
             "35": 6538.0,
+            "36": 6486.0,
+            "37": 7272.0,
+            "38": 6923.0,
+            "39": 7497.0,
             "40": 6997.0,
+            "41": 6747.0,
+            "42": 7228.0,
+            "43": 6629.0,
+            "44": 6752.0,
             "45": 6557.0,
+            "46": 6904.0,
+            "47": 7474.0,
+            "48": 7165.0,
+            "49": 7244.0,
             "50": 7331.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 491766784.0,
+            "2": 491767296.0,
+            "3": 491765760.0,
+            "4": 491767296.0,
             "5": 491766784.0,
+            "6": 491767808.0,
+            "7": 491767296.0,
+            "8": 491768320.0,
+            "9": 491767808.0,
             "10": 491767296.0,
+            "11": 491765248.0,
+            "12": 491764736.0,
+            "13": 491766272.0,
+            "14": 491767808.0,
             "15": 491768832.0,
+            "16": 491769856.0,
+            "17": 491767296.0,
+            "18": 491765248.0,
+            "19": 491766272.0,
             "20": 491766784.0,
+            "21": 491768320.0,
+            "22": 491768320.0,
+            "23": 491765760.0,
+            "24": 491766272.0,
             "25": 491766272.0,
+            "26": 491767296.0,
+            "27": 491766784.0,
+            "28": 491767296.0,
+            "29": 491766272.0,
             "30": 491766272.0,
+            "31": 491767808.0,
+            "32": 491765760.0,
+            "33": 491764736.0,
+            "34": 491768320.0,
             "35": 491769344.0,
+            "36": 491765760.0,
+            "37": 491765248.0,
+            "38": 491766272.0,
+            "39": 491767808.0,
             "40": 491765760.0,
+            "41": 491768320.0,
+            "42": 491766272.0,
+            "43": 491768832.0,
+            "44": 491768320.0,
             "45": 491765248.0,
+            "46": 491768320.0,
+            "47": 491765760.0,
+            "48": 491766784.0,
+            "49": 491766784.0,
             "50": 491765248.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 1047229440.0,
+            "2": 1213900288.0,
+            "3": 1213900288.0,
+            "4": 1213900288.0,
             "5": 1213900288.0,
+            "6": 1213900288.0,
+            "7": 1213900288.0,
+            "8": 1213900288.0,
+            "9": 1213900288.0,
             "10": 1213900288.0,
+            "11": 1213900288.0,
+            "12": 1213900288.0,
+            "13": 1213900288.0,
+            "14": 1213900288.0,
             "15": 1213900288.0,
+            "16": 1213900288.0,
+            "17": 1213900288.0,
+            "18": 1213900288.0,
+            "19": 1213900288.0,
             "20": 1213900288.0,
+            "21": 1213900288.0,
+            "22": 1213900288.0,
+            "23": 1213900288.0,
+            "24": 1213900288.0,
             "25": 1213900288.0,
+            "26": 1213900288.0,
+            "27": 1213900288.0,
+            "28": 1213900288.0,
+            "29": 1213900288.0,
             "30": 1213900288.0,
+            "31": 1213900288.0,
+            "32": 1213900288.0,
+            "33": 1213900288.0,
+            "34": 1213900288.0,
             "35": 1213900288.0,
+            "36": 1213900288.0,
+            "37": 1213900288.0,
+            "38": 1213900288.0,
+            "39": 1213900288.0,
             "40": 1213900288.0,
+            "41": 1213900288.0,
+            "42": 1213900288.0,
+            "43": 1213900288.0,
+            "44": 1213900288.0,
             "45": 1213900288.0,
+            "46": 1213900288.0,
+            "47": 1213900288.0,
+            "48": 1213900288.0,
+            "49": 1213900288.0,
             "50": 1213900288.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 15.3547,
-            "5": 0.36735,
-            "10": 0.37327,
-            "15": 0.36612,
-            "20": 0.37034,
-            "25": 0.36884,
-            "30": 0.37157,
-            "35": 0.38429,
-            "40": 0.38666,
-            "45": 0.39183,
-            "50": 0.38705
+            "1": 13.19467,
+            "2": 0.48448,
+            "3": 0.44871,
+            "4": 0.46924,
+            "5": 0.42566,
+            "6": 0.43083,
+            "7": 0.43901,
+            "8": 0.42599,
+            "9": 0.42583,
+            "10": 0.42829,
+            "11": 0.4235,
+            "12": 0.42225,
+            "13": 0.4285,
+            "14": 0.42372,
+            "15": 0.43098,
+            "16": 0.4172,
+            "17": 0.43302,
+            "18": 0.41927,
+            "19": 0.4331,
+            "20": 0.43471,
+            "21": 0.41939,
+            "22": 0.43275,
+            "23": 0.41768,
+            "24": 0.42806,
+            "25": 0.42095,
+            "26": 0.42731,
+            "27": 0.42655,
+            "28": 0.42892,
+            "29": 0.42736,
+            "30": 0.42769,
+            "31": 0.43481,
+            "32": 0.4238,
+            "33": 0.42194,
+            "34": 0.43633,
+            "35": 0.43921,
+            "36": 0.43121,
+            "37": 0.42193,
+            "38": 0.42605,
+            "39": 0.42408,
+            "40": 0.42556,
+            "41": 0.43247,
+            "42": 0.42213,
+            "43": 0.44451,
+            "44": 0.42353,
+            "45": 0.42949,
+            "46": 0.46147,
+            "47": 0.44954,
+            "48": 0.44275,
+            "49": 0.44961,
+            "50": 0.4304
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci-ord.json
new file mode 100644
index 00000000000..c47332e4152
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci-ord.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.79175,
+            "2": 10.80907,
+            "3": 10.81011,
+            "4": 10.78146,
+            "5": 10.82288,
+            "6": 10.84057,
+            "7": 10.81192,
+            "8": 10.80005,
+            "9": 10.81667,
+            "10": 10.7688,
+            "11": 10.8618,
+            "12": 10.84042,
+            "13": 10.84452,
+            "14": 10.86421,
+            "15": 10.79157,
+            "16": 10.78199,
+            "17": 10.75122,
+            "18": 10.79446,
+            "19": 10.79523,
+            "20": 10.71001,
+            "21": 10.68811,
+            "22": 10.53736,
+            "23": 10.7066,
+            "24": 10.58865,
+            "25": 10.54662,
+            "26": 10.59492,
+            "27": 10.62142,
+            "28": 10.5969,
+            "29": 10.60036,
+            "30": 10.39407,
+            "31": 10.12951,
+            "32": 10.49684,
+            "33": 10.48779,
+            "34": 10.24347,
+            "35": 10.30461,
+            "36": 10.26056,
+            "37": 10.38859,
+            "38": 10.24848,
+            "39": 10.43799,
+            "40": 10.13303,
+            "41": 10.18651,
+            "42": 10.25823,
+            "43": 9.892,
+            "44": 10.02576,
+            "45": 9.90015,
+            "46": 9.88387,
+            "47": 10.19565,
+            "48": 9.91255,
+            "49": 9.60147,
+            "50": 9.97874
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 5656.0,
+            "2": 6018.0,
+            "3": 5790.0,
+            "4": 5941.0,
+            "5": 6476.0,
+            "6": 6653.0,
+            "7": 6287.0,
+            "8": 5875.0,
+            "9": 6239.0,
+            "10": 5453.0,
+            "11": 6936.0,
+            "12": 6711.0,
+            "13": 6655.0,
+            "14": 6814.0,
+            "15": 6233.0,
+            "16": 6533.0,
+            "17": 6397.0,
+            "18": 6112.0,
+            "19": 6678.0,
+            "20": 5837.0,
+            "21": 6403.0,
+            "22": 5715.0,
+            "23": 6744.0,
+            "24": 6051.0,
+            "25": 5811.0,
+            "26": 6104.0,
+            "27": 6484.0,
+            "28": 6884.0,
+            "29": 7253.0,
+            "30": 6047.0,
+            "31": 5593.0,
+            "32": 6625.0,
+            "33": 7054.0,
+            "34": 6104.0,
+            "35": 6712.0,
+            "36": 6684.0,
+            "37": 7523.0,
+            "38": 7273.0,
+            "39": 7620.0,
+            "40": 7062.0,
+            "41": 6895.0,
+            "42": 7426.0,
+            "43": 6713.0,
+            "44": 6664.0,
+            "45": 6681.0,
+            "46": 6923.0,
+            "47": 7705.0,
+            "48": 7248.0,
+            "49": 7331.0,
+            "50": 7527.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 462408192.0,
+            "2": 462406144.0,
+            "3": 462409728.0,
+            "4": 462406144.0,
+            "5": 462407680.0,
+            "6": 462408192.0,
+            "7": 462410752.0,
+            "8": 462410752.0,
+            "9": 462407168.0,
+            "10": 462410240.0,
+            "11": 462408192.0,
+            "12": 462408192.0,
+            "13": 462408704.0,
+            "14": 462409728.0,
+            "15": 462409728.0,
+            "16": 462407168.0,
+            "17": 462408704.0,
+            "18": 462408704.0,
+            "19": 462408704.0,
+            "20": 462408704.0,
+            "21": 462406144.0,
+            "22": 462412800.0,
+            "23": 462409216.0,
+            "24": 462408704.0,
+            "25": 462406144.0,
+            "26": 462410240.0,
+            "27": 462405120.0,
+            "28": 462408192.0,
+            "29": 462407168.0,
+            "30": 462406144.0,
+            "31": 462413312.0,
+            "32": 462408704.0,
+            "33": 462409216.0,
+            "34": 462406144.0,
+            "35": 462410240.0,
+            "36": 462407168.0,
+            "37": 462409728.0,
+            "38": 462408192.0,
+            "39": 462408192.0,
+            "40": 462407680.0,
+            "41": 462411264.0,
+            "42": 462409728.0,
+            "43": 462411264.0,
+            "44": 462407680.0,
+            "45": 462408704.0,
+            "46": 462410752.0,
+            "47": 462407680.0,
+            "48": 462408192.0,
+            "49": 462409728.0,
+            "50": 462409216.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1019807232.0,
+            "2": 1186372608.0,
+            "3": 1186372608.0,
+            "4": 1186372608.0,
+            "5": 1186372608.0,
+            "6": 1186372608.0,
+            "7": 1186372608.0,
+            "8": 1186372608.0,
+            "9": 1186372608.0,
+            "10": 1186372608.0,
+            "11": 1186372608.0,
+            "12": 1186372608.0,
+            "13": 1186372608.0,
+            "14": 1186372608.0,
+            "15": 1186372608.0,
+            "16": 1186372608.0,
+            "17": 1186372608.0,
+            "18": 1186372608.0,
+            "19": 1186372608.0,
+            "20": 1186372608.0,
+            "21": 1186372608.0,
+            "22": 1186372608.0,
+            "23": 1186372608.0,
+            "24": 1186372608.0,
+            "25": 1186372608.0,
+            "26": 1186372608.0,
+            "27": 1186372608.0,
+            "28": 1186372608.0,
+            "29": 1186372608.0,
+            "30": 1186372608.0,
+            "31": 1186372608.0,
+            "32": 1186372608.0,
+            "33": 1186372608.0,
+            "34": 1186372608.0,
+            "35": 1186372608.0,
+            "36": 1186372608.0,
+            "37": 1186372608.0,
+            "38": 1186372608.0,
+            "39": 1186372608.0,
+            "40": 1186372608.0,
+            "41": 1186372608.0,
+            "42": 1186372608.0,
+            "43": 1186372608.0,
+            "44": 1186372608.0,
+            "45": 1186372608.0,
+            "46": 1186372608.0,
+            "47": 1186372608.0,
+            "48": 1186372608.0,
+            "49": 1186372608.0,
+            "50": 1186372608.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 9.69829,
+            "2": 0.75133,
+            "3": 0.68321,
+            "4": 0.68299,
+            "5": 0.61733,
+            "6": 0.57979,
+            "7": 0.57675,
+            "8": 0.57837,
+            "9": 0.58539,
+            "10": 0.58222,
+            "11": 0.58158,
+            "12": 0.58184,
+            "13": 0.58692,
+            "14": 0.58497,
+            "15": 0.59994,
+            "16": 0.59773,
+            "17": 0.57959,
+            "18": 0.57818,
+            "19": 0.57753,
+            "20": 0.57723,
+            "21": 0.57903,
+            "22": 0.57678,
+            "23": 0.58682,
+            "24": 0.57654,
+            "25": 0.57615,
+            "26": 0.57702,
+            "27": 0.57613,
+            "28": 0.57457,
+            "29": 0.57523,
+            "30": 0.57623,
+            "31": 0.57821,
+            "32": 0.57613,
+            "33": 0.57379,
+            "34": 0.57684,
+            "35": 0.57784,
+            "36": 0.57665,
+            "37": 0.57697,
+            "38": 0.57594,
+            "39": 0.57412,
+            "40": 0.57582,
+            "41": 0.57418,
+            "42": 0.57387,
+            "43": 0.57626,
+            "44": 0.57569,
+            "45": 0.57598,
+            "46": 0.57593,
+            "47": 0.57827,
+            "48": 0.57811,
+            "49": 0.57776,
+            "50": 0.57779
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci.json b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci.json
new file mode 100644
index 00000000000..301ddfc5e91
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.79175,
+            "2": 10.80907,
+            "3": 10.81011,
+            "4": 10.78146,
+            "5": 10.82288,
+            "6": 10.84057,
+            "7": 10.81192,
+            "8": 10.80005,
+            "9": 10.81667,
+            "10": 10.7688,
+            "11": 10.8618,
+            "12": 10.84042,
+            "13": 10.84452,
+            "14": 10.86421,
+            "15": 10.79157,
+            "16": 10.78199,
+            "17": 10.75122,
+            "18": 10.79446,
+            "19": 10.79523,
+            "20": 10.71001,
+            "21": 10.68811,
+            "22": 10.53736,
+            "23": 10.7066,
+            "24": 10.58865,
+            "25": 10.54662,
+            "26": 10.59492,
+            "27": 10.62142,
+            "28": 10.5969,
+            "29": 10.60036,
+            "30": 10.39407,
+            "31": 10.12951,
+            "32": 10.49684,
+            "33": 10.48779,
+            "34": 10.24347,
+            "35": 10.30461,
+            "36": 10.26056,
+            "37": 10.38859,
+            "38": 10.24848,
+            "39": 10.43799,
+            "40": 10.13303,
+            "41": 10.18651,
+            "42": 10.25823,
+            "43": 9.892,
+            "44": 10.02576,
+            "45": 9.90015,
+            "46": 9.88387,
+            "47": 10.19565,
+            "48": 9.91255,
+            "49": 9.60147,
+            "50": 9.97874
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 5656.0,
+            "2": 6018.0,
+            "3": 5790.0,
+            "4": 5941.0,
+            "5": 6476.0,
+            "6": 6653.0,
+            "7": 6287.0,
+            "8": 5875.0,
+            "9": 6239.0,
+            "10": 5453.0,
+            "11": 6936.0,
+            "12": 6711.0,
+            "13": 6655.0,
+            "14": 6814.0,
+            "15": 6233.0,
+            "16": 6533.0,
+            "17": 6397.0,
+            "18": 6112.0,
+            "19": 6678.0,
+            "20": 5837.0,
+            "21": 6403.0,
+            "22": 5715.0,
+            "23": 6744.0,
+            "24": 6051.0,
+            "25": 5811.0,
+            "26": 6104.0,
+            "27": 6484.0,
+            "28": 6884.0,
+            "29": 7253.0,
+            "30": 6047.0,
+            "31": 5593.0,
+            "32": 6625.0,
+            "33": 7054.0,
+            "34": 6104.0,
+            "35": 6712.0,
+            "36": 6684.0,
+            "37": 7523.0,
+            "38": 7273.0,
+            "39": 7620.0,
+            "40": 7062.0,
+            "41": 6895.0,
+            "42": 7426.0,
+            "43": 6713.0,
+            "44": 6664.0,
+            "45": 6681.0,
+            "46": 6923.0,
+            "47": 7705.0,
+            "48": 7248.0,
+            "49": 7331.0,
+            "50": 7527.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 462408192.0,
+            "2": 462406144.0,
+            "3": 462409728.0,
+            "4": 462406144.0,
+            "5": 462407680.0,
+            "6": 462408192.0,
+            "7": 462410752.0,
+            "8": 462410752.0,
+            "9": 462407168.0,
+            "10": 462410240.0,
+            "11": 462408192.0,
+            "12": 462408192.0,
+            "13": 462408704.0,
+            "14": 462409728.0,
+            "15": 462409728.0,
+            "16": 462407168.0,
+            "17": 462408704.0,
+            "18": 462408704.0,
+            "19": 462408704.0,
+            "20": 462408704.0,
+            "21": 462406144.0,
+            "22": 462412800.0,
+            "23": 462409216.0,
+            "24": 462408704.0,
+            "25": 462406144.0,
+            "26": 462410240.0,
+            "27": 462405120.0,
+            "28": 462408192.0,
+            "29": 462407168.0,
+            "30": 462406144.0,
+            "31": 462413312.0,
+            "32": 462408704.0,
+            "33": 462409216.0,
+            "34": 462406144.0,
+            "35": 462410240.0,
+            "36": 462407168.0,
+            "37": 462409728.0,
+            "38": 462408192.0,
+            "39": 462408192.0,
+            "40": 462407680.0,
+            "41": 462411264.0,
+            "42": 462409728.0,
+            "43": 462411264.0,
+            "44": 462407680.0,
+            "45": 462408704.0,
+            "46": 462410752.0,
+            "47": 462407680.0,
+            "48": 462408192.0,
+            "49": 462409728.0,
+            "50": 462409216.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1019807232.0,
+            "2": 1186372608.0,
+            "3": 1186372608.0,
+            "4": 1186372608.0,
+            "5": 1186372608.0,
+            "6": 1186372608.0,
+            "7": 1186372608.0,
+            "8": 1186372608.0,
+            "9": 1186372608.0,
+            "10": 1186372608.0,
+            "11": 1186372608.0,
+            "12": 1186372608.0,
+            "13": 1186372608.0,
+            "14": 1186372608.0,
+            "15": 1186372608.0,
+            "16": 1186372608.0,
+            "17": 1186372608.0,
+            "18": 1186372608.0,
+            "19": 1186372608.0,
+            "20": 1186372608.0,
+            "21": 1186372608.0,
+            "22": 1186372608.0,
+            "23": 1186372608.0,
+            "24": 1186372608.0,
+            "25": 1186372608.0,
+            "26": 1186372608.0,
+            "27": 1186372608.0,
+            "28": 1186372608.0,
+            "29": 1186372608.0,
+            "30": 1186372608.0,
+            "31": 1186372608.0,
+            "32": 1186372608.0,
+            "33": 1186372608.0,
+            "34": 1186372608.0,
+            "35": 1186372608.0,
+            "36": 1186372608.0,
+            "37": 1186372608.0,
+            "38": 1186372608.0,
+            "39": 1186372608.0,
+            "40": 1186372608.0,
+            "41": 1186372608.0,
+            "42": 1186372608.0,
+            "43": 1186372608.0,
+            "44": 1186372608.0,
+            "45": 1186372608.0,
+            "46": 1186372608.0,
+            "47": 1186372608.0,
+            "48": 1186372608.0,
+            "49": 1186372608.0,
+            "50": 1186372608.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 11.87016,
+            "2": 0.65629,
+            "3": 0.56435,
+            "4": 0.5717,
+            "5": 0.56322,
+            "6": 0.56979,
+            "7": 0.56582,
+            "8": 0.56867,
+            "9": 0.57661,
+            "10": 0.56784,
+            "11": 0.57189,
+            "12": 0.57201,
+            "13": 0.57482,
+            "14": 0.57089,
+            "15": 0.57194,
+            "16": 0.56916,
+            "17": 0.57352,
+            "18": 0.56823,
+            "19": 0.56931,
+            "20": 0.56782,
+            "21": 0.56743,
+            "22": 0.5663,
+            "23": 0.56569,
+            "24": 0.56599,
+            "25": 0.56544,
+            "26": 0.56524,
+            "27": 0.56556,
+            "28": 0.56547,
+            "29": 0.56456,
+            "30": 0.56668,
+            "31": 0.57243,
+            "32": 0.56549,
+            "33": 0.56604,
+            "34": 0.5659,
+            "35": 0.56549,
+            "36": 0.56418,
+            "37": 0.56524,
+            "38": 0.56422,
+            "39": 0.56426,
+            "40": 0.56469,
+            "41": 0.56367,
+            "42": 0.56796,
+            "43": 0.57027,
+            "44": 0.57157,
+            "45": 0.56565,
+            "46": 0.56924,
+            "47": 0.57401,
+            "48": 0.57226,
+            "49": 0.56767,
+            "50": 0.56405
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..d9811bb579f
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.80475,
+            "2": 10.821,
+            "3": 10.8216,
+            "4": 10.79306,
+            "5": 10.84831,
+            "6": 10.85888,
+            "7": 10.83177,
+            "8": 10.82362,
+            "9": 10.83757,
+            "10": 10.78732,
+            "11": 10.86732,
+            "12": 10.85395,
+            "13": 10.86171,
+            "14": 10.88343,
+            "15": 10.79765,
+            "16": 10.79986,
+            "17": 10.76238,
+            "18": 10.80286,
+            "19": 10.7945,
+            "20": 10.71733,
+            "21": 10.70194,
+            "22": 10.55147,
+            "23": 10.72167,
+            "24": 10.60698,
+            "25": 10.54614,
+            "26": 10.6136,
+            "27": 10.63974,
+            "28": 10.60486,
+            "29": 10.62277,
+            "30": 10.41109,
+            "31": 10.1456,
+            "32": 10.51017,
+            "33": 10.50089,
+            "34": 10.25812,
+            "35": 10.3154,
+            "36": 10.27895,
+            "37": 10.41061,
+            "38": 10.25908,
+            "39": 10.45334,
+            "40": 10.1604,
+            "41": 10.20557,
+            "42": 10.26792,
+            "43": 9.90468,
+            "44": 10.03233,
+            "45": 9.91098,
+            "46": 9.87857,
+            "47": 10.20952,
+            "48": 9.93178,
+            "49": 9.61584,
+            "50": 9.98565
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 5474.0,
+            "2": 5853.0,
+            "3": 5875.0,
+            "4": 6041.0,
+            "5": 6601.0,
+            "6": 6654.0,
+            "7": 6135.0,
+            "8": 5761.0,
+            "9": 6505.0,
+            "10": 5497.0,
+            "11": 6994.0,
+            "12": 6523.0,
+            "13": 6807.0,
+            "14": 6969.0,
+            "15": 6154.0,
+            "16": 6667.0,
+            "17": 6368.0,
+            "18": 6298.0,
+            "19": 6353.0,
+            "20": 5998.0,
+            "21": 6264.0,
+            "22": 5628.0,
+            "23": 6620.0,
+            "24": 6063.0,
+            "25": 5649.0,
+            "26": 6226.0,
+            "27": 6409.0,
+            "28": 6790.0,
+            "29": 7055.0,
+            "30": 6430.0,
+            "31": 5565.0,
+            "32": 6615.0,
+            "33": 6969.0,
+            "34": 6107.0,
+            "35": 6538.0,
+            "36": 6486.0,
+            "37": 7272.0,
+            "38": 6923.0,
+            "39": 7497.0,
+            "40": 6997.0,
+            "41": 6747.0,
+            "42": 7228.0,
+            "43": 6629.0,
+            "44": 6752.0,
+            "45": 6557.0,
+            "46": 6904.0,
+            "47": 7474.0,
+            "48": 7165.0,
+            "49": 7244.0,
+            "50": 7331.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 491766784.0,
+            "2": 491767296.0,
+            "3": 491765760.0,
+            "4": 491767296.0,
+            "5": 491766784.0,
+            "6": 491767808.0,
+            "7": 491767296.0,
+            "8": 491768320.0,
+            "9": 491767808.0,
+            "10": 491767296.0,
+            "11": 491765248.0,
+            "12": 491764736.0,
+            "13": 491766272.0,
+            "14": 491767808.0,
+            "15": 491768832.0,
+            "16": 491769856.0,
+            "17": 491767296.0,
+            "18": 491765248.0,
+            "19": 491766272.0,
+            "20": 491766784.0,
+            "21": 491768320.0,
+            "22": 491768320.0,
+            "23": 491765760.0,
+            "24": 491766272.0,
+            "25": 491766272.0,
+            "26": 491767296.0,
+            "27": 491766784.0,
+            "28": 491767296.0,
+            "29": 491766272.0,
+            "30": 491766272.0,
+            "31": 491767808.0,
+            "32": 491765760.0,
+            "33": 491764736.0,
+            "34": 491768320.0,
+            "35": 491769344.0,
+            "36": 491765760.0,
+            "37": 491765248.0,
+            "38": 491766272.0,
+            "39": 491767808.0,
+            "40": 491765760.0,
+            "41": 491768320.0,
+            "42": 491766272.0,
+            "43": 491768832.0,
+            "44": 491768320.0,
+            "45": 491765248.0,
+            "46": 491768320.0,
+            "47": 491765760.0,
+            "48": 491766784.0,
+            "49": 491766784.0,
+            "50": 491765248.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1047229440.0,
+            "2": 1213900288.0,
+            "3": 1213900288.0,
+            "4": 1213900288.0,
+            "5": 1213900288.0,
+            "6": 1213900288.0,
+            "7": 1213900288.0,
+            "8": 1213900288.0,
+            "9": 1213900288.0,
+            "10": 1213900288.0,
+            "11": 1213900288.0,
+            "12": 1213900288.0,
+            "13": 1213900288.0,
+            "14": 1213900288.0,
+            "15": 1213900288.0,
+            "16": 1213900288.0,
+            "17": 1213900288.0,
+            "18": 1213900288.0,
+            "19": 1213900288.0,
+            "20": 1213900288.0,
+            "21": 1213900288.0,
+            "22": 1213900288.0,
+            "23": 1213900288.0,
+            "24": 1213900288.0,
+            "25": 1213900288.0,
+            "26": 1213900288.0,
+            "27": 1213900288.0,
+            "28": 1213900288.0,
+            "29": 1213900288.0,
+            "30": 1213900288.0,
+            "31": 1213900288.0,
+            "32": 1213900288.0,
+            "33": 1213900288.0,
+            "34": 1213900288.0,
+            "35": 1213900288.0,
+            "36": 1213900288.0,
+            "37": 1213900288.0,
+            "38": 1213900288.0,
+            "39": 1213900288.0,
+            "40": 1213900288.0,
+            "41": 1213900288.0,
+            "42": 1213900288.0,
+            "43": 1213900288.0,
+            "44": 1213900288.0,
+            "45": 1213900288.0,
+            "46": 1213900288.0,
+            "47": 1213900288.0,
+            "48": 1213900288.0,
+            "49": 1213900288.0,
+            "50": 1213900288.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 14.81321,
+            "2": 0.47201,
+            "3": 0.40381,
+            "4": 0.41626,
+            "5": 0.37526,
+            "6": 0.39128,
+            "7": 0.38006,
+            "8": 0.38712,
+            "9": 0.37978,
+            "10": 0.36542,
+            "11": 0.37019,
+            "12": 0.3584,
+            "13": 0.37121,
+            "14": 0.37141,
+            "15": 0.37291,
+            "16": 0.36319,
+            "17": 0.3701,
+            "18": 0.35732,
+            "19": 0.36745,
+            "20": 0.36768,
+            "21": 0.36322,
+            "22": 0.36627,
+            "23": 0.36042,
+            "24": 0.36521,
+            "25": 0.36471,
+            "26": 0.36406,
+            "27": 0.35919,
+            "28": 0.37411,
+            "29": 0.35657,
+            "30": 0.36834,
+            "31": 0.37292,
+            "32": 0.35489,
+            "33": 0.36692,
+            "34": 0.37173,
+            "35": 0.37097,
+            "36": 0.36594,
+            "37": 0.36691,
+            "38": 0.36847,
+            "39": 0.36166,
+            "40": 0.36415,
+            "41": 0.36888,
+            "42": 0.36642,
+            "43": 0.37419,
+            "44": 0.37026,
+            "45": 0.36033,
+            "46": 0.39777,
+            "47": 0.37677,
+            "48": 0.36794,
+            "49": 0.3863,
+            "50": 0.36013
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..b250bf7ac21
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.80475,
+            "2": 10.821,
+            "3": 10.8216,
+            "4": 10.79306,
+            "5": 10.84831,
+            "6": 10.85888,
+            "7": 10.83177,
+            "8": 10.82362,
+            "9": 10.83757,
+            "10": 10.78732,
+            "11": 10.86732,
+            "12": 10.85395,
+            "13": 10.86171,
+            "14": 10.88343,
+            "15": 10.79765,
+            "16": 10.79986,
+            "17": 10.76238,
+            "18": 10.80286,
+            "19": 10.7945,
+            "20": 10.71733,
+            "21": 10.70194,
+            "22": 10.55147,
+            "23": 10.72167,
+            "24": 10.60698,
+            "25": 10.54614,
+            "26": 10.6136,
+            "27": 10.63974,
+            "28": 10.60486,
+            "29": 10.62277,
+            "30": 10.41109,
+            "31": 10.1456,
+            "32": 10.51017,
+            "33": 10.50089,
+            "34": 10.25812,
+            "35": 10.3154,
+            "36": 10.27895,
+            "37": 10.41061,
+            "38": 10.25908,
+            "39": 10.45334,
+            "40": 10.1604,
+            "41": 10.20557,
+            "42": 10.26792,
+            "43": 9.90468,
+            "44": 10.03233,
+            "45": 9.91098,
+            "46": 9.87857,
+            "47": 10.20952,
+            "48": 9.93178,
+            "49": 9.61584,
+            "50": 9.98565
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 5474.0,
+            "2": 5853.0,
+            "3": 5875.0,
+            "4": 6041.0,
+            "5": 6601.0,
+            "6": 6654.0,
+            "7": 6135.0,
+            "8": 5761.0,
+            "9": 6505.0,
+            "10": 5497.0,
+            "11": 6994.0,
+            "12": 6523.0,
+            "13": 6807.0,
+            "14": 6969.0,
+            "15": 6154.0,
+            "16": 6667.0,
+            "17": 6368.0,
+            "18": 6298.0,
+            "19": 6353.0,
+            "20": 5998.0,
+            "21": 6264.0,
+            "22": 5628.0,
+            "23": 6620.0,
+            "24": 6063.0,
+            "25": 5649.0,
+            "26": 6226.0,
+            "27": 6409.0,
+            "28": 6790.0,
+            "29": 7055.0,
+            "30": 6430.0,
+            "31": 5565.0,
+            "32": 6615.0,
+            "33": 6969.0,
+            "34": 6107.0,
+            "35": 6538.0,
+            "36": 6486.0,
+            "37": 7272.0,
+            "38": 6923.0,
+            "39": 7497.0,
+            "40": 6997.0,
+            "41": 6747.0,
+            "42": 7228.0,
+            "43": 6629.0,
+            "44": 6752.0,
+            "45": 6557.0,
+            "46": 6904.0,
+            "47": 7474.0,
+            "48": 7165.0,
+            "49": 7244.0,
+            "50": 7331.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 491766784.0,
+            "2": 491767296.0,
+            "3": 491765760.0,
+            "4": 491767296.0,
+            "5": 491766784.0,
+            "6": 491767808.0,
+            "7": 491767296.0,
+            "8": 491768320.0,
+            "9": 491767808.0,
+            "10": 491767296.0,
+            "11": 491765248.0,
+            "12": 491764736.0,
+            "13": 491766272.0,
+            "14": 491767808.0,
+            "15": 491768832.0,
+            "16": 491769856.0,
+            "17": 491767296.0,
+            "18": 491765248.0,
+            "19": 491766272.0,
+            "20": 491766784.0,
+            "21": 491768320.0,
+            "22": 491768320.0,
+            "23": 491765760.0,
+            "24": 491766272.0,
+            "25": 491766272.0,
+            "26": 491767296.0,
+            "27": 491766784.0,
+            "28": 491767296.0,
+            "29": 491766272.0,
+            "30": 491766272.0,
+            "31": 491767808.0,
+            "32": 491765760.0,
+            "33": 491764736.0,
+            "34": 491768320.0,
+            "35": 491769344.0,
+            "36": 491765760.0,
+            "37": 491765248.0,
+            "38": 491766272.0,
+            "39": 491767808.0,
+            "40": 491765760.0,
+            "41": 491768320.0,
+            "42": 491766272.0,
+            "43": 491768832.0,
+            "44": 491768320.0,
+            "45": 491765248.0,
+            "46": 491768320.0,
+            "47": 491765760.0,
+            "48": 491766784.0,
+            "49": 491766784.0,
+            "50": 491765248.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1047229440.0,
+            "2": 1213900288.0,
+            "3": 1213900288.0,
+            "4": 1213900288.0,
+            "5": 1213900288.0,
+            "6": 1213900288.0,
+            "7": 1213900288.0,
+            "8": 1213900288.0,
+            "9": 1213900288.0,
+            "10": 1213900288.0,
+            "11": 1213900288.0,
+            "12": 1213900288.0,
+            "13": 1213900288.0,
+            "14": 1213900288.0,
+            "15": 1213900288.0,
+            "16": 1213900288.0,
+            "17": 1213900288.0,
+            "18": 1213900288.0,
+            "19": 1213900288.0,
+            "20": 1213900288.0,
+            "21": 1213900288.0,
+            "22": 1213900288.0,
+            "23": 1213900288.0,
+            "24": 1213900288.0,
+            "25": 1213900288.0,
+            "26": 1213900288.0,
+            "27": 1213900288.0,
+            "28": 1213900288.0,
+            "29": 1213900288.0,
+            "30": 1213900288.0,
+            "31": 1213900288.0,
+            "32": 1213900288.0,
+            "33": 1213900288.0,
+            "34": 1213900288.0,
+            "35": 1213900288.0,
+            "36": 1213900288.0,
+            "37": 1213900288.0,
+            "38": 1213900288.0,
+            "39": 1213900288.0,
+            "40": 1213900288.0,
+            "41": 1213900288.0,
+            "42": 1213900288.0,
+            "43": 1213900288.0,
+            "44": 1213900288.0,
+            "45": 1213900288.0,
+            "46": 1213900288.0,
+            "47": 1213900288.0,
+            "48": 1213900288.0,
+            "49": 1213900288.0,
+            "50": 1213900288.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 13.26707,
+            "2": 0.52806,
+            "3": 0.46475,
+            "4": 0.47125,
+            "5": 0.42985,
+            "6": 0.42614,
+            "7": 0.43552,
+            "8": 0.42689,
+            "9": 0.42927,
+            "10": 0.42373,
+            "11": 0.42662,
+            "12": 0.42301,
+            "13": 0.42359,
+            "14": 0.4226,
+            "15": 0.42796,
+            "16": 0.42415,
+            "17": 0.4235,
+            "18": 0.41948,
+            "19": 0.42601,
+            "20": 0.42722,
+            "21": 0.4176,
+            "22": 0.41953,
+            "23": 0.42303,
+            "24": 0.4187,
+            "25": 0.42281,
+            "26": 0.42449,
+            "27": 0.41941,
+            "28": 0.42935,
+            "29": 0.417,
+            "30": 0.4261,
+            "31": 0.42904,
+            "32": 0.41844,
+            "33": 0.41687,
+            "34": 0.43419,
+            "35": 0.43727,
+            "36": 0.42315,
+            "37": 0.42179,
+            "38": 0.42403,
+            "39": 0.4179,
+            "40": 0.42443,
+            "41": 0.42169,
+            "42": 0.42155,
+            "43": 0.43942,
+            "44": 0.42209,
+            "45": 0.41972,
+            "46": 0.46515,
+            "47": 0.43911,
+            "48": 0.43693,
+            "49": 0.44745,
+            "50": 0.4198
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_lts_dgxa100_dracooci-ord.json
new file mode 100644
index 00000000000..8f5e5238362
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_lts_dgxa100_dracooci-ord.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.79288,
+            "2": 10.81339,
+            "3": 10.8144,
+            "4": 10.77657,
+            "5": 10.828,
+            "6": 10.84293,
+            "7": 10.81053,
+            "8": 10.80366,
+            "9": 10.81505,
+            "10": 10.76831,
+            "11": 10.86961,
+            "12": 10.83911,
+            "13": 10.85295,
+            "14": 10.86545,
+            "15": 10.79073,
+            "16": 10.78351,
+            "17": 10.7488,
+            "18": 10.79251,
+            "19": 10.78822,
+            "20": 10.7066,
+            "21": 10.68957,
+            "22": 10.53861,
+            "23": 10.70542,
+            "24": 10.59106,
+            "25": 10.54061,
+            "26": 10.59556,
+            "27": 10.61836,
+            "28": 10.59188,
+            "29": 10.6008,
+            "30": 10.39485,
+            "31": 10.12988,
+            "32": 10.49622,
+            "33": 10.48801,
+            "34": 10.24185,
+            "35": 10.30488,
+            "36": 10.25446,
+            "37": 10.38879,
+            "38": 10.24767,
+            "39": 10.43653,
+            "40": 10.13079,
+            "41": 10.18439,
+            "42": 10.25364,
+            "43": 9.89225,
+            "44": 10.0224,
+            "45": 9.90236,
+            "46": 9.88337,
+            "47": 10.1948,
+            "48": 9.91124,
+            "49": 9.59882,
+            "50": 9.97938
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 5601.0,
+            "2": 5974.0,
+            "3": 5786.0,
+            "4": 5760.0,
+            "5": 6601.0,
+            "6": 6753.0,
+            "7": 6231.0,
+            "8": 5822.0,
+            "9": 6446.0,
+            "10": 5254.0,
+            "11": 6740.0,
+            "12": 6313.0,
+            "13": 6672.0,
+            "14": 6909.0,
+            "15": 6250.0,
+            "16": 6391.0,
+            "17": 6290.0,
+            "18": 6086.0,
+            "19": 6278.0,
+            "20": 5969.0,
+            "21": 6461.0,
+            "22": 5583.0,
+            "23": 6602.0,
+            "24": 5982.0,
+            "25": 5816.0,
+            "26": 6162.0,
+            "27": 6378.0,
+            "28": 6931.0,
+            "29": 7197.0,
+            "30": 6181.0,
+            "31": 5568.0,
+            "32": 6876.0,
+            "33": 6980.0,
+            "34": 6144.0,
+            "35": 6751.0,
+            "36": 6501.0,
+            "37": 7367.0,
+            "38": 7095.0,
+            "39": 7558.0,
+            "40": 6831.0,
+            "41": 6929.0,
+            "42": 7131.0,
+            "43": 6817.0,
+            "44": 6736.0,
+            "45": 6881.0,
+            "46": 7006.0,
+            "47": 7622.0,
+            "48": 7384.0,
+            "49": 7363.0,
+            "50": 7684.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 458211840.0,
+            "2": 458213376.0,
+            "3": 458214400.0,
+            "4": 458211840.0,
+            "5": 458212864.0,
+            "6": 458215424.0,
+            "7": 458212864.0,
+            "8": 458212864.0,
+            "9": 458214912.0,
+            "10": 458214912.0,
+            "11": 458214912.0,
+            "12": 458213888.0,
+            "13": 458214912.0,
+            "14": 458213376.0,
+            "15": 458215424.0,
+            "16": 458214400.0,
+            "17": 458214400.0,
+            "18": 458215424.0,
+            "19": 458209792.0,
+            "20": 458212864.0,
+            "21": 458211840.0,
+            "22": 458219520.0,
+            "23": 458213888.0,
+            "24": 458214912.0,
+            "25": 458215424.0,
+            "26": 458213376.0,
+            "27": 458213888.0,
+            "28": 458213888.0,
+            "29": 458212864.0,
+            "30": 458211840.0,
+            "31": 458218496.0,
+            "32": 458214912.0,
+            "33": 458212352.0,
+            "34": 458214400.0,
+            "35": 458214400.0,
+            "36": 458215424.0,
+            "37": 458213888.0,
+            "38": 458213888.0,
+            "39": 458213888.0,
+            "40": 458214912.0,
+            "41": 458216448.0,
+            "42": 458213888.0,
+            "43": 458217472.0,
+            "44": 458212864.0,
+            "45": 458213888.0,
+            "46": 458216960.0,
+            "47": 458214400.0,
+            "48": 458212352.0,
+            "49": 458215424.0,
+            "50": 458214912.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1016394240.0,
+            "2": 1180904960.0,
+            "3": 1180904960.0,
+            "4": 1180904960.0,
+            "5": 1180904960.0,
+            "6": 1180904960.0,
+            "7": 1180904960.0,
+            "8": 1180904960.0,
+            "9": 1180934144.0,
+            "10": 1180934144.0,
+            "11": 1180934144.0,
+            "12": 1180934144.0,
+            "13": 1180934144.0,
+            "14": 1180934144.0,
+            "15": 1180990976.0,
+            "16": 1180990976.0,
+            "17": 1180990976.0,
+            "18": 1180990976.0,
+            "19": 1180990976.0,
+            "20": 1180990976.0,
+            "21": 1180990976.0,
+            "22": 1180990976.0,
+            "23": 1180990976.0,
+            "24": 1180990976.0,
+            "25": 1181222912.0,
+            "26": 1181222912.0,
+            "27": 1181222912.0,
+            "28": 1181222912.0,
+            "29": 1181222912.0,
+            "30": 1181222912.0,
+            "31": 1181222912.0,
+            "32": 1181222912.0,
+            "33": 1181222912.0,
+            "34": 1181222912.0,
+            "35": 1181468160.0,
+            "36": 1181468160.0,
+            "37": 1181468160.0,
+            "38": 1181468160.0,
+            "39": 1181468160.0,
+            "40": 1181468160.0,
+            "41": 1181468160.0,
+            "42": 1181468160.0,
+            "43": 1181468160.0,
+            "44": 1183467008.0,
+            "45": 1183467008.0,
+            "46": 1183467008.0,
+            "47": 1183467008.0,
+            "48": 1183467008.0,
+            "49": 1183467008.0,
+            "50": 1183467008.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 16.05941,
+            "2": 0.66923,
+            "3": 0.61216,
+            "4": 0.58734,
+            "5": 0.60006,
+            "6": 0.58013,
+            "7": 0.60084,
+            "8": 0.59342,
+            "9": 0.59047,
+            "10": 0.60222,
+            "11": 0.58523,
+            "12": 0.60039,
+            "13": 0.58622,
+            "14": 0.59318,
+            "15": 0.59774,
+            "16": 0.58824,
+            "17": 0.60997,
+            "18": 0.58565,
+            "19": 0.596,
+            "20": 0.59978,
+            "21": 0.58617,
+            "22": 0.60156,
+            "23": 0.58205,
+            "24": 0.60247,
+            "25": 0.60354,
+            "26": 0.5839,
+            "27": 0.61043,
+            "28": 0.58334,
+            "29": 0.60152,
+            "30": 0.59973,
+            "31": 0.58621,
+            "32": 0.59768,
+            "33": 0.58349,
+            "34": 0.59991,
+            "35": 0.59183,
+            "36": 0.58804,
+            "37": 0.60327,
+            "38": 0.58347,
+            "39": 0.60102,
+            "40": 0.58409,
+            "41": 0.59493,
+            "42": 0.5989,
+            "43": 0.58752,
+            "44": 0.59927,
+            "45": 0.59465,
+            "46": 0.60409,
+            "47": 0.60265,
+            "48": 0.5887,
+            "49": 0.6087,
+            "50": 0.58454
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_lts_dgxa100_dracooci.json
new file mode 100644
index 00000000000..19437ff4a78
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_lts_dgxa100_dracooci.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.79288,
+            "2": 10.81339,
+            "3": 10.8144,
+            "4": 10.77657,
+            "5": 10.828,
+            "6": 10.84293,
+            "7": 10.81053,
+            "8": 10.80366,
+            "9": 10.81505,
+            "10": 10.76831,
+            "11": 10.86961,
+            "12": 10.83911,
+            "13": 10.85295,
+            "14": 10.86545,
+            "15": 10.79073,
+            "16": 10.78351,
+            "17": 10.7488,
+            "18": 10.79251,
+            "19": 10.78822,
+            "20": 10.7066,
+            "21": 10.68957,
+            "22": 10.53861,
+            "23": 10.70542,
+            "24": 10.59106,
+            "25": 10.54061,
+            "26": 10.59556,
+            "27": 10.61836,
+            "28": 10.59188,
+            "29": 10.6008,
+            "30": 10.39485,
+            "31": 10.12988,
+            "32": 10.49622,
+            "33": 10.48801,
+            "34": 10.24185,
+            "35": 10.30488,
+            "36": 10.25446,
+            "37": 10.38879,
+            "38": 10.24767,
+            "39": 10.43653,
+            "40": 10.13079,
+            "41": 10.18439,
+            "42": 10.25364,
+            "43": 9.89225,
+            "44": 10.0224,
+            "45": 9.90236,
+            "46": 9.88337,
+            "47": 10.1948,
+            "48": 9.91124,
+            "49": 9.59882,
+            "50": 9.97938
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 5601.0,
+            "2": 5974.0,
+            "3": 5786.0,
+            "4": 5760.0,
+            "5": 6601.0,
+            "6": 6753.0,
+            "7": 6231.0,
+            "8": 5822.0,
+            "9": 6446.0,
+            "10": 5254.0,
+            "11": 6740.0,
+            "12": 6313.0,
+            "13": 6672.0,
+            "14": 6909.0,
+            "15": 6250.0,
+            "16": 6391.0,
+            "17": 6290.0,
+            "18": 6086.0,
+            "19": 6278.0,
+            "20": 5969.0,
+            "21": 6461.0,
+            "22": 5583.0,
+            "23": 6602.0,
+            "24": 5982.0,
+            "25": 5816.0,
+            "26": 6162.0,
+            "27": 6378.0,
+            "28": 6931.0,
+            "29": 7197.0,
+            "30": 6181.0,
+            "31": 5568.0,
+            "32": 6876.0,
+            "33": 6980.0,
+            "34": 6144.0,
+            "35": 6751.0,
+            "36": 6501.0,
+            "37": 7367.0,
+            "38": 7095.0,
+            "39": 7558.0,
+            "40": 6831.0,
+            "41": 6929.0,
+            "42": 7131.0,
+            "43": 6817.0,
+            "44": 6736.0,
+            "45": 6881.0,
+            "46": 7006.0,
+            "47": 7622.0,
+            "48": 7384.0,
+            "49": 7363.0,
+            "50": 7684.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 458211840.0,
+            "2": 458213376.0,
+            "3": 458214400.0,
+            "4": 458211840.0,
+            "5": 458212864.0,
+            "6": 458215424.0,
+            "7": 458212864.0,
+            "8": 458212864.0,
+            "9": 458214912.0,
+            "10": 458214912.0,
+            "11": 458214912.0,
+            "12": 458213888.0,
+            "13": 458214912.0,
+            "14": 458213376.0,
+            "15": 458215424.0,
+            "16": 458214400.0,
+            "17": 458214400.0,
+            "18": 458215424.0,
+            "19": 458209792.0,
+            "20": 458212864.0,
+            "21": 458211840.0,
+            "22": 458219520.0,
+            "23": 458213888.0,
+            "24": 458214912.0,
+            "25": 458215424.0,
+            "26": 458213376.0,
+            "27": 458213888.0,
+            "28": 458213888.0,
+            "29": 458212864.0,
+            "30": 458211840.0,
+            "31": 458218496.0,
+            "32": 458214912.0,
+            "33": 458212352.0,
+            "34": 458214400.0,
+            "35": 458214400.0,
+            "36": 458215424.0,
+            "37": 458213888.0,
+            "38": 458213888.0,
+            "39": 458213888.0,
+            "40": 458214912.0,
+            "41": 458216448.0,
+            "42": 458213888.0,
+            "43": 458217472.0,
+            "44": 458212864.0,
+            "45": 458213888.0,
+            "46": 458216960.0,
+            "47": 458214400.0,
+            "48": 458212352.0,
+            "49": 458215424.0,
+            "50": 458214912.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1016394240.0,
+            "2": 1180904960.0,
+            "3": 1180904960.0,
+            "4": 1180904960.0,
+            "5": 1180904960.0,
+            "6": 1180904960.0,
+            "7": 1180904960.0,
+            "8": 1180904960.0,
+            "9": 1180934144.0,
+            "10": 1180934144.0,
+            "11": 1180934144.0,
+            "12": 1180934144.0,
+            "13": 1180934144.0,
+            "14": 1180934144.0,
+            "15": 1180990976.0,
+            "16": 1180990976.0,
+            "17": 1180990976.0,
+            "18": 1180990976.0,
+            "19": 1180990976.0,
+            "20": 1180990976.0,
+            "21": 1180990976.0,
+            "22": 1180990976.0,
+            "23": 1180990976.0,
+            "24": 1180990976.0,
+            "25": 1181222912.0,
+            "26": 1181222912.0,
+            "27": 1181222912.0,
+            "28": 1181222912.0,
+            "29": 1181222912.0,
+            "30": 1181222912.0,
+            "31": 1181222912.0,
+            "32": 1181222912.0,
+            "33": 1181222912.0,
+            "34": 1181222912.0,
+            "35": 1181468160.0,
+            "36": 1181468160.0,
+            "37": 1181468160.0,
+            "38": 1181468160.0,
+            "39": 1181468160.0,
+            "40": 1181468160.0,
+            "41": 1181468160.0,
+            "42": 1181468160.0,
+            "43": 1181468160.0,
+            "44": 1183467008.0,
+            "45": 1183467008.0,
+            "46": 1183467008.0,
+            "47": 1183467008.0,
+            "48": 1183467008.0,
+            "49": 1183467008.0,
+            "50": 1183467008.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 20.92117,
+            "2": 0.78495,
+            "3": 0.65993,
+            "4": 0.60281,
+            "5": 0.62415,
+            "6": 0.59632,
+            "7": 0.61058,
+            "8": 0.60884,
+            "9": 0.61298,
+            "10": 0.60737,
+            "11": 0.59282,
+            "12": 0.62404,
+            "13": 0.59787,
+            "14": 0.5992,
+            "15": 0.60558,
+            "16": 0.58919,
+            "17": 0.60862,
+            "18": 0.58494,
+            "19": 0.59977,
+            "20": 0.59905,
+            "21": 0.58779,
+            "22": 0.60691,
+            "23": 0.58773,
+            "24": 0.59879,
+            "25": 0.59399,
+            "26": 0.58416,
+            "27": 0.59705,
+            "28": 0.58558,
+            "29": 0.60279,
+            "30": 0.59279,
+            "31": 0.59125,
+            "32": 0.60528,
+            "33": 0.58125,
+            "34": 0.59849,
+            "35": 0.5851,
+            "36": 0.59833,
+            "37": 0.59938,
+            "38": 0.58782,
+            "39": 0.59605,
+            "40": 0.58815,
+            "41": 0.59763,
+            "42": 0.60014,
+            "43": 0.58419,
+            "44": 0.59775,
+            "45": 0.58451,
+            "46": 0.60219,
+            "47": 0.59473,
+            "48": 0.58641,
+            "49": 0.6019,
+            "50": 0.58426
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgx_h100.json
index 966de8bb1bb..eba1757fe35 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgx_h100.json
@@ -2,91 +2,286 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 10.80475,
+            "2": 10.821,
+            "3": 10.8216,
+            "4": 10.79306,
             "5": 10.84831,
+            "6": 10.85888,
+            "7": 10.83177,
+            "8": 10.82362,
+            "9": 10.83757,
             "10": 10.78732,
+            "11": 10.86732,
+            "12": 10.85395,
+            "13": 10.86171,
+            "14": 10.88343,
             "15": 10.79765,
+            "16": 10.79986,
+            "17": 10.76238,
+            "18": 10.80286,
+            "19": 10.7945,
             "20": 10.71733,
+            "21": 10.70194,
+            "22": 10.55147,
+            "23": 10.72167,
+            "24": 10.60698,
             "25": 10.54614,
+            "26": 10.6136,
+            "27": 10.63974,
+            "28": 10.60486,
+            "29": 10.62277,
             "30": 10.41109,
+            "31": 10.1456,
+            "32": 10.51017,
+            "33": 10.50089,
+            "34": 10.25812,
             "35": 10.3154,
+            "36": 10.27895,
+            "37": 10.41061,
+            "38": 10.25908,
+            "39": 10.45334,
             "40": 10.1604,
+            "41": 10.20557,
+            "42": 10.26792,
+            "43": 9.90468,
+            "44": 10.03233,
             "45": 9.91098,
+            "46": 9.87857,
+            "47": 10.20952,
+            "48": 9.93178,
+            "49": 9.61584,
             "50": 9.98565
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 5474.0,
+            "2": 5853.0,
+            "3": 5875.0,
+            "4": 6041.0,
             "5": 6601.0,
+            "6": 6654.0,
+            "7": 6135.0,
+            "8": 5761.0,
+            "9": 6505.0,
             "10": 5497.0,
+            "11": 6994.0,
+            "12": 6523.0,
+            "13": 6807.0,
+            "14": 6969.0,
             "15": 6154.0,
+            "16": 6667.0,
+            "17": 6368.0,
+            "18": 6298.0,
+            "19": 6353.0,
             "20": 5998.0,
+            "21": 6264.0,
+            "22": 5628.0,
+            "23": 6620.0,
+            "24": 6063.0,
             "25": 5649.0,
+            "26": 6226.0,
+            "27": 6409.0,
+            "28": 6790.0,
+            "29": 7055.0,
             "30": 6430.0,
+            "31": 5565.0,
+            "32": 6615.0,
+            "33": 6969.0,
+            "34": 6107.0,
             "35": 6538.0,
+            "36": 6486.0,
+            "37": 7272.0,
+            "38": 6923.0,
+            "39": 7497.0,
             "40": 6997.0,
+            "41": 6747.0,
+            "42": 7228.0,
+            "43": 6629.0,
+            "44": 6752.0,
             "45": 6557.0,
+            "46": 6904.0,
+            "47": 7474.0,
+            "48": 7165.0,
+            "49": 7244.0,
             "50": 7331.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 491766784.0,
+            "2": 491767296.0,
+            "3": 491765760.0,
+            "4": 491767296.0,
             "5": 491766784.0,
+            "6": 491767808.0,
+            "7": 491767296.0,
+            "8": 491768320.0,
+            "9": 491767808.0,
             "10": 491767296.0,
+            "11": 491765248.0,
+            "12": 491764736.0,
+            "13": 491766272.0,
+            "14": 491767808.0,
             "15": 491768832.0,
+            "16": 491769856.0,
+            "17": 491767296.0,
+            "18": 491765248.0,
+            "19": 491766272.0,
             "20": 491766784.0,
+            "21": 491768320.0,
+            "22": 491768320.0,
+            "23": 491765760.0,
+            "24": 491766272.0,
             "25": 491766272.0,
+            "26": 491767296.0,
+            "27": 491766784.0,
+            "28": 491767296.0,
+            "29": 491766272.0,
             "30": 491766272.0,
+            "31": 491767808.0,
+            "32": 491765760.0,
+            "33": 491764736.0,
+            "34": 491768320.0,
             "35": 491769344.0,
+            "36": 491765760.0,
+            "37": 491765248.0,
+            "38": 491766272.0,
+            "39": 491767808.0,
             "40": 491765760.0,
+            "41": 491768320.0,
+            "42": 491766272.0,
+            "43": 491768832.0,
+            "44": 491768320.0,
             "45": 491765248.0,
+            "46": 491768320.0,
+            "47": 491765760.0,
+            "48": 491766784.0,
+            "49": 491766784.0,
             "50": 491765248.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 1047229440.0,
+            "2": 1213900288.0,
+            "3": 1213900288.0,
+            "4": 1213900288.0,
             "5": 1213900288.0,
+            "6": 1213900288.0,
+            "7": 1213900288.0,
+            "8": 1213900288.0,
+            "9": 1213900288.0,
             "10": 1213900288.0,
+            "11": 1213900288.0,
+            "12": 1213900288.0,
+            "13": 1213900288.0,
+            "14": 1213900288.0,
             "15": 1213900288.0,
+            "16": 1213900288.0,
+            "17": 1213900288.0,
+            "18": 1213900288.0,
+            "19": 1213900288.0,
             "20": 1213900288.0,
+            "21": 1213900288.0,
+            "22": 1213900288.0,
+            "23": 1213900288.0,
+            "24": 1213900288.0,
             "25": 1213900288.0,
+            "26": 1213900288.0,
+            "27": 1213900288.0,
+            "28": 1213900288.0,
+            "29": 1213900288.0,
             "30": 1213900288.0,
+            "31": 1213900288.0,
+            "32": 1213900288.0,
+            "33": 1213900288.0,
+            "34": 1213900288.0,
             "35": 1213900288.0,
+            "36": 1213900288.0,
+            "37": 1213900288.0,
+            "38": 1213900288.0,
+            "39": 1213900288.0,
             "40": 1213900288.0,
+            "41": 1213900288.0,
+            "42": 1213900288.0,
+            "43": 1213900288.0,
+            "44": 1213900288.0,
             "45": 1213900288.0,
+            "46": 1213900288.0,
+            "47": 1213900288.0,
+            "48": 1213900288.0,
+            "49": 1213900288.0,
             "50": 1213900288.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 14.78242,
-            "5": 0.36146,
-            "10": 0.35831,
-            "15": 0.36317,
-            "20": 0.36704,
-            "25": 0.35673,
-            "30": 0.36236,
-            "35": 0.37486,
-            "40": 0.36477,
-            "45": 0.36076,
-            "50": 0.36594
+            "1": 13.09447,
+            "2": 0.51607,
+            "3": 0.44405,
+            "4": 0.45969,
+            "5": 0.41888,
+            "6": 0.42393,
+            "7": 0.42442,
+            "8": 0.41943,
+            "9": 0.41271,
+            "10": 0.41462,
+            "11": 0.41487,
+            "12": 0.40591,
+            "13": 0.41444,
+            "14": 0.40303,
+            "15": 0.41598,
+            "16": 0.40637,
+            "17": 0.40922,
+            "18": 0.41209,
+            "19": 0.40964,
+            "20": 0.4238,
+            "21": 0.4078,
+            "22": 0.41408,
+            "23": 0.41657,
+            "24": 0.40953,
+            "25": 0.41984,
+            "26": 0.41935,
+            "27": 0.41845,
+            "28": 0.42267,
+            "29": 0.41439,
+            "30": 0.42344,
+            "31": 0.42201,
+            "32": 0.42025,
+            "33": 0.4143,
+            "34": 0.50551,
+            "35": 0.44065,
+            "36": 0.41296,
+            "37": 0.41985,
+            "38": 0.41541,
+            "39": 0.41687,
+            "40": 0.41757,
+            "41": 0.4181,
+            "42": 0.41983,
+            "43": 0.42929,
+            "44": 0.41833,
+            "45": 0.41337,
+            "46": 0.46022,
+            "47": 0.43427,
+            "48": 0.42794,
+            "49": 0.44841,
+            "50": 0.41311
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgxa100_dracooci-ord.json
new file mode 100644
index 00000000000..aeb8f53adff
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgxa100_dracooci-ord.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.79175,
+            "2": 10.80907,
+            "3": 10.81011,
+            "4": 10.78146,
+            "5": 10.82288,
+            "6": 10.84057,
+            "7": 10.81192,
+            "8": 10.80005,
+            "9": 10.81667,
+            "10": 10.7688,
+            "11": 10.8618,
+            "12": 10.84042,
+            "13": 10.84452,
+            "14": 10.86421,
+            "15": 10.79157,
+            "16": 10.78199,
+            "17": 10.75122,
+            "18": 10.79446,
+            "19": 10.79523,
+            "20": 10.71001,
+            "21": 10.68811,
+            "22": 10.53736,
+            "23": 10.7066,
+            "24": 10.58865,
+            "25": 10.54662,
+            "26": 10.59492,
+            "27": 10.62142,
+            "28": 10.5969,
+            "29": 10.60036,
+            "30": 10.39407,
+            "31": 10.12951,
+            "32": 10.49684,
+            "33": 10.48779,
+            "34": 10.24347,
+            "35": 10.30461,
+            "36": 10.26056,
+            "37": 10.38859,
+            "38": 10.24848,
+            "39": 10.43799,
+            "40": 10.13303,
+            "41": 10.18651,
+            "42": 10.25823,
+            "43": 9.892,
+            "44": 10.02576,
+            "45": 9.90015,
+            "46": 9.88387,
+            "47": 10.19565,
+            "48": 9.91255,
+            "49": 9.60147,
+            "50": 9.97874
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 5656.0,
+            "2": 6018.0,
+            "3": 5790.0,
+            "4": 5941.0,
+            "5": 6476.0,
+            "6": 6653.0,
+            "7": 6287.0,
+            "8": 5875.0,
+            "9": 6239.0,
+            "10": 5453.0,
+            "11": 6936.0,
+            "12": 6711.0,
+            "13": 6655.0,
+            "14": 6814.0,
+            "15": 6233.0,
+            "16": 6533.0,
+            "17": 6397.0,
+            "18": 6112.0,
+            "19": 6678.0,
+            "20": 5837.0,
+            "21": 6403.0,
+            "22": 5715.0,
+            "23": 6744.0,
+            "24": 6051.0,
+            "25": 5811.0,
+            "26": 6104.0,
+            "27": 6484.0,
+            "28": 6884.0,
+            "29": 7253.0,
+            "30": 6047.0,
+            "31": 5593.0,
+            "32": 6625.0,
+            "33": 7054.0,
+            "34": 6104.0,
+            "35": 6712.0,
+            "36": 6684.0,
+            "37": 7523.0,
+            "38": 7273.0,
+            "39": 7620.0,
+            "40": 7062.0,
+            "41": 6895.0,
+            "42": 7426.0,
+            "43": 6713.0,
+            "44": 6664.0,
+            "45": 6681.0,
+            "46": 6923.0,
+            "47": 7705.0,
+            "48": 7248.0,
+            "49": 7331.0,
+            "50": 7527.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 462408192.0,
+            "2": 462406144.0,
+            "3": 462409728.0,
+            "4": 462406144.0,
+            "5": 462407680.0,
+            "6": 462408192.0,
+            "7": 462410752.0,
+            "8": 462410752.0,
+            "9": 462407168.0,
+            "10": 462410240.0,
+            "11": 462408192.0,
+            "12": 462408192.0,
+            "13": 462408704.0,
+            "14": 462409728.0,
+            "15": 462409728.0,
+            "16": 462407168.0,
+            "17": 462408704.0,
+            "18": 462408704.0,
+            "19": 462408704.0,
+            "20": 462408704.0,
+            "21": 462406144.0,
+            "22": 462412800.0,
+            "23": 462409216.0,
+            "24": 462408704.0,
+            "25": 462406144.0,
+            "26": 462410240.0,
+            "27": 462405120.0,
+            "28": 462408192.0,
+            "29": 462407168.0,
+            "30": 462406144.0,
+            "31": 462413312.0,
+            "32": 462408704.0,
+            "33": 462409216.0,
+            "34": 462406144.0,
+            "35": 462410240.0,
+            "36": 462407168.0,
+            "37": 462409728.0,
+            "38": 462408192.0,
+            "39": 462408192.0,
+            "40": 462407680.0,
+            "41": 462411264.0,
+            "42": 462409728.0,
+            "43": 462411264.0,
+            "44": 462407680.0,
+            "45": 462408704.0,
+            "46": 462410752.0,
+            "47": 462407680.0,
+            "48": 462408192.0,
+            "49": 462409728.0,
+            "50": 462409216.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1019807232.0,
+            "2": 1186372608.0,
+            "3": 1186372608.0,
+            "4": 1186372608.0,
+            "5": 1186372608.0,
+            "6": 1186372608.0,
+            "7": 1186372608.0,
+            "8": 1186372608.0,
+            "9": 1186372608.0,
+            "10": 1186372608.0,
+            "11": 1186372608.0,
+            "12": 1186372608.0,
+            "13": 1186372608.0,
+            "14": 1186372608.0,
+            "15": 1186372608.0,
+            "16": 1186372608.0,
+            "17": 1186372608.0,
+            "18": 1186372608.0,
+            "19": 1186372608.0,
+            "20": 1186372608.0,
+            "21": 1186372608.0,
+            "22": 1186372608.0,
+            "23": 1186372608.0,
+            "24": 1186372608.0,
+            "25": 1186372608.0,
+            "26": 1186372608.0,
+            "27": 1186372608.0,
+            "28": 1186372608.0,
+            "29": 1186372608.0,
+            "30": 1186372608.0,
+            "31": 1186372608.0,
+            "32": 1186372608.0,
+            "33": 1186372608.0,
+            "34": 1186372608.0,
+            "35": 1186372608.0,
+            "36": 1186372608.0,
+            "37": 1186372608.0,
+            "38": 1186372608.0,
+            "39": 1186372608.0,
+            "40": 1186372608.0,
+            "41": 1186372608.0,
+            "42": 1186372608.0,
+            "43": 1186372608.0,
+            "44": 1186372608.0,
+            "45": 1186372608.0,
+            "46": 1186372608.0,
+            "47": 1186372608.0,
+            "48": 1186372608.0,
+            "49": 1186372608.0,
+            "50": 1186372608.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 7.63206,
+            "2": 0.65692,
+            "3": 0.5824,
+            "4": 0.58308,
+            "5": 0.58182,
+            "6": 0.57849,
+            "7": 0.57628,
+            "8": 0.57557,
+            "9": 0.57694,
+            "10": 0.57443,
+            "11": 0.57466,
+            "12": 0.57548,
+            "13": 0.57752,
+            "14": 0.58301,
+            "15": 0.57494,
+            "16": 0.5737,
+            "17": 0.57748,
+            "18": 0.57584,
+            "19": 0.57312,
+            "20": 0.57465,
+            "21": 0.57268,
+            "22": 0.57394,
+            "23": 0.57466,
+            "24": 0.57498,
+            "25": 0.57708,
+            "26": 0.57279,
+            "27": 0.57369,
+            "28": 0.57312,
+            "29": 0.57271,
+            "30": 0.57407,
+            "31": 0.5737,
+            "32": 0.57173,
+            "33": 0.57054,
+            "34": 0.5736,
+            "35": 0.57222,
+            "36": 0.57349,
+            "37": 0.57417,
+            "38": 0.57356,
+            "39": 0.57214,
+            "40": 0.57186,
+            "41": 0.57234,
+            "42": 0.57304,
+            "43": 0.5732,
+            "44": 0.5724,
+            "45": 0.5728,
+            "46": 0.57286,
+            "47": 0.57315,
+            "48": 0.57441,
+            "49": 0.57353,
+            "50": 0.57322
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgxa100_dracooci.json b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgxa100_dracooci.json
new file mode 100644
index 00000000000..b4b3a0e2762
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgxa100_dracooci.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.79175,
+            "2": 10.80907,
+            "3": 10.81011,
+            "4": 10.78146,
+            "5": 10.82288,
+            "6": 10.84057,
+            "7": 10.81192,
+            "8": 10.80005,
+            "9": 10.81667,
+            "10": 10.7688,
+            "11": 10.8618,
+            "12": 10.84042,
+            "13": 10.84452,
+            "14": 10.86421,
+            "15": 10.79157,
+            "16": 10.78199,
+            "17": 10.75122,
+            "18": 10.79446,
+            "19": 10.79523,
+            "20": 10.71001,
+            "21": 10.68811,
+            "22": 10.53736,
+            "23": 10.7066,
+            "24": 10.58865,
+            "25": 10.54662,
+            "26": 10.59492,
+            "27": 10.62142,
+            "28": 10.5969,
+            "29": 10.60036,
+            "30": 10.39407,
+            "31": 10.12951,
+            "32": 10.49684,
+            "33": 10.48779,
+            "34": 10.24347,
+            "35": 10.30461,
+            "36": 10.26056,
+            "37": 10.38859,
+            "38": 10.24848,
+            "39": 10.43799,
+            "40": 10.13303,
+            "41": 10.18651,
+            "42": 10.25823,
+            "43": 9.892,
+            "44": 10.02576,
+            "45": 9.90015,
+            "46": 9.88387,
+            "47": 10.19565,
+            "48": 9.91255,
+            "49": 9.60147,
+            "50": 9.97874
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 5656.0,
+            "2": 6018.0,
+            "3": 5790.0,
+            "4": 5941.0,
+            "5": 6476.0,
+            "6": 6653.0,
+            "7": 6287.0,
+            "8": 5875.0,
+            "9": 6239.0,
+            "10": 5453.0,
+            "11": 6936.0,
+            "12": 6711.0,
+            "13": 6655.0,
+            "14": 6814.0,
+            "15": 6233.0,
+            "16": 6533.0,
+            "17": 6397.0,
+            "18": 6112.0,
+            "19": 6678.0,
+            "20": 5837.0,
+            "21": 6403.0,
+            "22": 5715.0,
+            "23": 6744.0,
+            "24": 6051.0,
+            "25": 5811.0,
+            "26": 6104.0,
+            "27": 6484.0,
+            "28": 6884.0,
+            "29": 7253.0,
+            "30": 6047.0,
+            "31": 5593.0,
+            "32": 6625.0,
+            "33": 7054.0,
+            "34": 6104.0,
+            "35": 6712.0,
+            "36": 6684.0,
+            "37": 7523.0,
+            "38": 7273.0,
+            "39": 7620.0,
+            "40": 7062.0,
+            "41": 6895.0,
+            "42": 7426.0,
+            "43": 6713.0,
+            "44": 6664.0,
+            "45": 6681.0,
+            "46": 6923.0,
+            "47": 7705.0,
+            "48": 7248.0,
+            "49": 7331.0,
+            "50": 7527.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 462408192.0,
+            "2": 462406144.0,
+            "3": 462409728.0,
+            "4": 462406144.0,
+            "5": 462407680.0,
+            "6": 462408192.0,
+            "7": 462410752.0,
+            "8": 462410752.0,
+            "9": 462407168.0,
+            "10": 462410240.0,
+            "11": 462408192.0,
+            "12": 462408192.0,
+            "13": 462408704.0,
+            "14": 462409728.0,
+            "15": 462409728.0,
+            "16": 462407168.0,
+            "17": 462408704.0,
+            "18": 462408704.0,
+            "19": 462408704.0,
+            "20": 462408704.0,
+            "21": 462406144.0,
+            "22": 462412800.0,
+            "23": 462409216.0,
+            "24": 462408704.0,
+            "25": 462406144.0,
+            "26": 462410240.0,
+            "27": 462405120.0,
+            "28": 462408192.0,
+            "29": 462407168.0,
+            "30": 462406144.0,
+            "31": 462413312.0,
+            "32": 462408704.0,
+            "33": 462409216.0,
+            "34": 462406144.0,
+            "35": 462410240.0,
+            "36": 462407168.0,
+            "37": 462409728.0,
+            "38": 462408192.0,
+            "39": 462408192.0,
+            "40": 462407680.0,
+            "41": 462411264.0,
+            "42": 462409728.0,
+            "43": 462411264.0,
+            "44": 462407680.0,
+            "45": 462408704.0,
+            "46": 462410752.0,
+            "47": 462407680.0,
+            "48": 462408192.0,
+            "49": 462409728.0,
+            "50": 462409216.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1019807232.0,
+            "2": 1186372608.0,
+            "3": 1186372608.0,
+            "4": 1186372608.0,
+            "5": 1186372608.0,
+            "6": 1186372608.0,
+            "7": 1186372608.0,
+            "8": 1186372608.0,
+            "9": 1186372608.0,
+            "10": 1186372608.0,
+            "11": 1186372608.0,
+            "12": 1186372608.0,
+            "13": 1186372608.0,
+            "14": 1186372608.0,
+            "15": 1186372608.0,
+            "16": 1186372608.0,
+            "17": 1186372608.0,
+            "18": 1186372608.0,
+            "19": 1186372608.0,
+            "20": 1186372608.0,
+            "21": 1186372608.0,
+            "22": 1186372608.0,
+            "23": 1186372608.0,
+            "24": 1186372608.0,
+            "25": 1186372608.0,
+            "26": 1186372608.0,
+            "27": 1186372608.0,
+            "28": 1186372608.0,
+            "29": 1186372608.0,
+            "30": 1186372608.0,
+            "31": 1186372608.0,
+            "32": 1186372608.0,
+            "33": 1186372608.0,
+            "34": 1186372608.0,
+            "35": 1186372608.0,
+            "36": 1186372608.0,
+            "37": 1186372608.0,
+            "38": 1186372608.0,
+            "39": 1186372608.0,
+            "40": 1186372608.0,
+            "41": 1186372608.0,
+            "42": 1186372608.0,
+            "43": 1186372608.0,
+            "44": 1186372608.0,
+            "45": 1186372608.0,
+            "46": 1186372608.0,
+            "47": 1186372608.0,
+            "48": 1186372608.0,
+            "49": 1186372608.0,
+            "50": 1186372608.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 7.40856,
+            "2": 0.64197,
+            "3": 0.58531,
+            "4": 0.58507,
+            "5": 0.57697,
+            "6": 0.5793,
+            "7": 0.5782,
+            "8": 0.58243,
+            "9": 0.58414,
+            "10": 0.58249,
+            "11": 0.58253,
+            "12": 0.58879,
+            "13": 0.58756,
+            "14": 0.5805,
+            "15": 0.57895,
+            "16": 0.58121,
+            "17": 0.58174,
+            "18": 0.58068,
+            "19": 0.58124,
+            "20": 0.58037,
+            "21": 0.58171,
+            "22": 0.58014,
+            "23": 0.5805,
+            "24": 0.5793,
+            "25": 0.58053,
+            "26": 0.58187,
+            "27": 0.57993,
+            "28": 0.57974,
+            "29": 0.58115,
+            "30": 0.58209,
+            "31": 0.58796,
+            "32": 0.58194,
+            "33": 0.58092,
+            "34": 0.58015,
+            "35": 0.5818,
+            "36": 0.58003,
+            "37": 0.58229,
+            "38": 0.58277,
+            "39": 0.57819,
+            "40": 0.57868,
+            "41": 0.57976,
+            "42": 0.57721,
+            "43": 0.57953,
+            "44": 0.58081,
+            "45": 0.57938,
+            "46": 0.58149,
+            "47": 0.58214,
+            "48": 0.58119,
+            "49": 0.58151,
+            "50": 0.57895
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..4fb97350a0f
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.80475,
+            "2": 10.821,
+            "3": 10.8216,
+            "4": 10.79306,
+            "5": 10.84831,
+            "6": 10.85888,
+            "7": 10.83177,
+            "8": 10.82362,
+            "9": 10.83757,
+            "10": 10.78732,
+            "11": 10.86732,
+            "12": 10.85395,
+            "13": 10.86171,
+            "14": 10.88343,
+            "15": 10.79765,
+            "16": 10.79986,
+            "17": 10.76238,
+            "18": 10.80286,
+            "19": 10.7945,
+            "20": 10.71733,
+            "21": 10.70194,
+            "22": 10.55147,
+            "23": 10.72167,
+            "24": 10.60698,
+            "25": 10.54614,
+            "26": 10.6136,
+            "27": 10.63974,
+            "28": 10.60486,
+            "29": 10.62277,
+            "30": 10.41109,
+            "31": 10.1456,
+            "32": 10.51017,
+            "33": 10.50089,
+            "34": 10.25812,
+            "35": 10.3154,
+            "36": 10.27895,
+            "37": 10.41061,
+            "38": 10.25908,
+            "39": 10.45334,
+            "40": 10.1604,
+            "41": 10.20557,
+            "42": 10.26792,
+            "43": 9.90468,
+            "44": 10.03233,
+            "45": 9.91098,
+            "46": 9.87857,
+            "47": 10.20952,
+            "48": 9.93178,
+            "49": 9.61584,
+            "50": 9.98565
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 5474.0,
+            "2": 5853.0,
+            "3": 5875.0,
+            "4": 6041.0,
+            "5": 6601.0,
+            "6": 6654.0,
+            "7": 6135.0,
+            "8": 5761.0,
+            "9": 6505.0,
+            "10": 5497.0,
+            "11": 6994.0,
+            "12": 6523.0,
+            "13": 6807.0,
+            "14": 6969.0,
+            "15": 6154.0,
+            "16": 6667.0,
+            "17": 6368.0,
+            "18": 6298.0,
+            "19": 6353.0,
+            "20": 5998.0,
+            "21": 6264.0,
+            "22": 5628.0,
+            "23": 6620.0,
+            "24": 6063.0,
+            "25": 5649.0,
+            "26": 6226.0,
+            "27": 6409.0,
+            "28": 6790.0,
+            "29": 7055.0,
+            "30": 6430.0,
+            "31": 5565.0,
+            "32": 6615.0,
+            "33": 6969.0,
+            "34": 6107.0,
+            "35": 6538.0,
+            "36": 6486.0,
+            "37": 7272.0,
+            "38": 6923.0,
+            "39": 7497.0,
+            "40": 6997.0,
+            "41": 6747.0,
+            "42": 7228.0,
+            "43": 6629.0,
+            "44": 6752.0,
+            "45": 6557.0,
+            "46": 6904.0,
+            "47": 7474.0,
+            "48": 7165.0,
+            "49": 7244.0,
+            "50": 7331.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 491766784.0,
+            "2": 491767296.0,
+            "3": 491765760.0,
+            "4": 491767296.0,
+            "5": 491766784.0,
+            "6": 491767808.0,
+            "7": 491767296.0,
+            "8": 491768320.0,
+            "9": 491767808.0,
+            "10": 491767296.0,
+            "11": 491765248.0,
+            "12": 491764736.0,
+            "13": 491766272.0,
+            "14": 491767808.0,
+            "15": 491768832.0,
+            "16": 491769856.0,
+            "17": 491767296.0,
+            "18": 491765248.0,
+            "19": 491766272.0,
+            "20": 491766784.0,
+            "21": 491768320.0,
+            "22": 491768320.0,
+            "23": 491765760.0,
+            "24": 491766272.0,
+            "25": 491766272.0,
+            "26": 491767296.0,
+            "27": 491766784.0,
+            "28": 491767296.0,
+            "29": 491766272.0,
+            "30": 491766272.0,
+            "31": 491767808.0,
+            "32": 491765760.0,
+            "33": 491764736.0,
+            "34": 491768320.0,
+            "35": 491769344.0,
+            "36": 491765760.0,
+            "37": 491765248.0,
+            "38": 491766272.0,
+            "39": 491767808.0,
+            "40": 491765760.0,
+            "41": 491768320.0,
+            "42": 491766272.0,
+            "43": 491768832.0,
+            "44": 491768320.0,
+            "45": 491765248.0,
+            "46": 491768320.0,
+            "47": 491765760.0,
+            "48": 491766784.0,
+            "49": 491766784.0,
+            "50": 491765248.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1047229440.0,
+            "2": 1213900288.0,
+            "3": 1213900288.0,
+            "4": 1213900288.0,
+            "5": 1213900288.0,
+            "6": 1213900288.0,
+            "7": 1213900288.0,
+            "8": 1213900288.0,
+            "9": 1213900288.0,
+            "10": 1213900288.0,
+            "11": 1213900288.0,
+            "12": 1213900288.0,
+            "13": 1213900288.0,
+            "14": 1213900288.0,
+            "15": 1213900288.0,
+            "16": 1213900288.0,
+            "17": 1213900288.0,
+            "18": 1213900288.0,
+            "19": 1213900288.0,
+            "20": 1213900288.0,
+            "21": 1213900288.0,
+            "22": 1213900288.0,
+            "23": 1213900288.0,
+            "24": 1213900288.0,
+            "25": 1213900288.0,
+            "26": 1213900288.0,
+            "27": 1213900288.0,
+            "28": 1213900288.0,
+            "29": 1213900288.0,
+            "30": 1213900288.0,
+            "31": 1213900288.0,
+            "32": 1213900288.0,
+            "33": 1213900288.0,
+            "34": 1213900288.0,
+            "35": 1213900288.0,
+            "36": 1213900288.0,
+            "37": 1213900288.0,
+            "38": 1213900288.0,
+            "39": 1213900288.0,
+            "40": 1213900288.0,
+            "41": 1213900288.0,
+            "42": 1213900288.0,
+            "43": 1213900288.0,
+            "44": 1213900288.0,
+            "45": 1213900288.0,
+            "46": 1213900288.0,
+            "47": 1213900288.0,
+            "48": 1213900288.0,
+            "49": 1213900288.0,
+            "50": 1213900288.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 15.17122,
+            "2": 0.48582,
+            "3": 0.38154,
+            "4": 0.40574,
+            "5": 0.36399,
+            "6": 0.36563,
+            "7": 0.3696,
+            "8": 0.36586,
+            "9": 0.36758,
+            "10": 0.36149,
+            "11": 0.37339,
+            "12": 0.36971,
+            "13": 0.36807,
+            "14": 0.36325,
+            "15": 0.36851,
+            "16": 0.36056,
+            "17": 0.36306,
+            "18": 0.36443,
+            "19": 0.36656,
+            "20": 0.36899,
+            "21": 0.35832,
+            "22": 0.35751,
+            "23": 0.36137,
+            "24": 0.35806,
+            "25": 0.35888,
+            "26": 0.36389,
+            "27": 0.35895,
+            "28": 0.36593,
+            "29": 0.36043,
+            "30": 0.36535,
+            "31": 0.38123,
+            "32": 0.36798,
+            "33": 0.36325,
+            "34": 0.3734,
+            "35": 0.37508,
+            "36": 0.37043,
+            "37": 0.38008,
+            "38": 0.37006,
+            "39": 0.37268,
+            "40": 0.37049,
+            "41": 0.37086,
+            "42": 0.36713,
+            "43": 0.37942,
+            "44": 0.38971,
+            "45": 0.37293,
+            "46": 0.41366,
+            "47": 0.39088,
+            "48": 0.37854,
+            "49": 0.41143,
+            "50": 0.37319
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..eb4665ad7e2
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.80475,
+            "2": 10.821,
+            "3": 10.8216,
+            "4": 10.79306,
+            "5": 10.84831,
+            "6": 10.85888,
+            "7": 10.83177,
+            "8": 10.82362,
+            "9": 10.83757,
+            "10": 10.78732,
+            "11": 10.86732,
+            "12": 10.85395,
+            "13": 10.86171,
+            "14": 10.88343,
+            "15": 10.79765,
+            "16": 10.79986,
+            "17": 10.76238,
+            "18": 10.80286,
+            "19": 10.7945,
+            "20": 10.71733,
+            "21": 10.70194,
+            "22": 10.55147,
+            "23": 10.72167,
+            "24": 10.60698,
+            "25": 10.54614,
+            "26": 10.6136,
+            "27": 10.63974,
+            "28": 10.60486,
+            "29": 10.62277,
+            "30": 10.41109,
+            "31": 10.1456,
+            "32": 10.51017,
+            "33": 10.50089,
+            "34": 10.25812,
+            "35": 10.3154,
+            "36": 10.27895,
+            "37": 10.41061,
+            "38": 10.25908,
+            "39": 10.45334,
+            "40": 10.1604,
+            "41": 10.20557,
+            "42": 10.26792,
+            "43": 9.90468,
+            "44": 10.03233,
+            "45": 9.91098,
+            "46": 9.87857,
+            "47": 10.20952,
+            "48": 9.93178,
+            "49": 9.61584,
+            "50": 9.98565
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 5474.0,
+            "2": 5853.0,
+            "3": 5875.0,
+            "4": 6041.0,
+            "5": 6601.0,
+            "6": 6654.0,
+            "7": 6135.0,
+            "8": 5761.0,
+            "9": 6505.0,
+            "10": 5497.0,
+            "11": 6994.0,
+            "12": 6523.0,
+            "13": 6807.0,
+            "14": 6969.0,
+            "15": 6154.0,
+            "16": 6667.0,
+            "17": 6368.0,
+            "18": 6298.0,
+            "19": 6353.0,
+            "20": 5998.0,
+            "21": 6264.0,
+            "22": 5628.0,
+            "23": 6620.0,
+            "24": 6063.0,
+            "25": 5649.0,
+            "26": 6226.0,
+            "27": 6409.0,
+            "28": 6790.0,
+            "29": 7055.0,
+            "30": 6430.0,
+            "31": 5565.0,
+            "32": 6615.0,
+            "33": 6969.0,
+            "34": 6107.0,
+            "35": 6538.0,
+            "36": 6486.0,
+            "37": 7272.0,
+            "38": 6923.0,
+            "39": 7497.0,
+            "40": 6997.0,
+            "41": 6747.0,
+            "42": 7228.0,
+            "43": 6629.0,
+            "44": 6752.0,
+            "45": 6557.0,
+            "46": 6904.0,
+            "47": 7474.0,
+            "48": 7165.0,
+            "49": 7244.0,
+            "50": 7331.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 491766784.0,
+            "2": 491767296.0,
+            "3": 491765760.0,
+            "4": 491767296.0,
+            "5": 491766784.0,
+            "6": 491767808.0,
+            "7": 491767296.0,
+            "8": 491768320.0,
+            "9": 491767808.0,
+            "10": 491767296.0,
+            "11": 491765248.0,
+            "12": 491764736.0,
+            "13": 491766272.0,
+            "14": 491767808.0,
+            "15": 491768832.0,
+            "16": 491769856.0,
+            "17": 491767296.0,
+            "18": 491765248.0,
+            "19": 491766272.0,
+            "20": 491766784.0,
+            "21": 491768320.0,
+            "22": 491768320.0,
+            "23": 491765760.0,
+            "24": 491766272.0,
+            "25": 491766272.0,
+            "26": 491767296.0,
+            "27": 491766784.0,
+            "28": 491767296.0,
+            "29": 491766272.0,
+            "30": 491766272.0,
+            "31": 491767808.0,
+            "32": 491765760.0,
+            "33": 491764736.0,
+            "34": 491768320.0,
+            "35": 491769344.0,
+            "36": 491765760.0,
+            "37": 491765248.0,
+            "38": 491766272.0,
+            "39": 491767808.0,
+            "40": 491765760.0,
+            "41": 491768320.0,
+            "42": 491766272.0,
+            "43": 491768832.0,
+            "44": 491768320.0,
+            "45": 491765248.0,
+            "46": 491768320.0,
+            "47": 491765760.0,
+            "48": 491766784.0,
+            "49": 491766784.0,
+            "50": 491765248.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1047229440.0,
+            "2": 1213900288.0,
+            "3": 1213900288.0,
+            "4": 1213900288.0,
+            "5": 1213900288.0,
+            "6": 1213900288.0,
+            "7": 1213900288.0,
+            "8": 1213900288.0,
+            "9": 1213900288.0,
+            "10": 1213900288.0,
+            "11": 1213900288.0,
+            "12": 1213900288.0,
+            "13": 1213900288.0,
+            "14": 1213900288.0,
+            "15": 1213900288.0,
+            "16": 1213900288.0,
+            "17": 1213900288.0,
+            "18": 1213900288.0,
+            "19": 1213900288.0,
+            "20": 1213900288.0,
+            "21": 1213900288.0,
+            "22": 1213900288.0,
+            "23": 1213900288.0,
+            "24": 1213900288.0,
+            "25": 1213900288.0,
+            "26": 1213900288.0,
+            "27": 1213900288.0,
+            "28": 1213900288.0,
+            "29": 1213900288.0,
+            "30": 1213900288.0,
+            "31": 1213900288.0,
+            "32": 1213900288.0,
+            "33": 1213900288.0,
+            "34": 1213900288.0,
+            "35": 1213900288.0,
+            "36": 1213900288.0,
+            "37": 1213900288.0,
+            "38": 1213900288.0,
+            "39": 1213900288.0,
+            "40": 1213900288.0,
+            "41": 1213900288.0,
+            "42": 1213900288.0,
+            "43": 1213900288.0,
+            "44": 1213900288.0,
+            "45": 1213900288.0,
+            "46": 1213900288.0,
+            "47": 1213900288.0,
+            "48": 1213900288.0,
+            "49": 1213900288.0,
+            "50": 1213900288.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 13.31352,
+            "2": 0.50754,
+            "3": 0.44486,
+            "4": 0.4668,
+            "5": 0.42238,
+            "6": 0.42115,
+            "7": 0.42604,
+            "8": 0.4217,
+            "9": 0.42265,
+            "10": 0.41522,
+            "11": 0.41976,
+            "12": 0.41287,
+            "13": 0.42113,
+            "14": 0.41948,
+            "15": 0.4211,
+            "16": 0.41519,
+            "17": 0.42043,
+            "18": 0.415,
+            "19": 0.42142,
+            "20": 0.42878,
+            "21": 0.4145,
+            "22": 0.42054,
+            "23": 0.41581,
+            "24": 0.42934,
+            "25": 0.43897,
+            "26": 0.42648,
+            "27": 0.42242,
+            "28": 0.42576,
+            "29": 0.42795,
+            "30": 0.42485,
+            "31": 0.43439,
+            "32": 0.42257,
+            "33": 0.41924,
+            "34": 0.43519,
+            "35": 0.43865,
+            "36": 0.42518,
+            "37": 0.42435,
+            "38": 0.42597,
+            "39": 0.42134,
+            "40": 0.42937,
+            "41": 0.42822,
+            "42": 0.42413,
+            "43": 0.44197,
+            "44": 0.42413,
+            "45": 0.42687,
+            "46": 0.46081,
+            "47": 0.45208,
+            "48": 0.43527,
+            "49": 0.44658,
+            "50": 0.41965
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_lts_dgxa100_dracooci-ord.json
new file mode 100644
index 00000000000..a0f445c56dc
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_lts_dgxa100_dracooci-ord.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.79288,
+            "2": 10.81339,
+            "3": 10.8144,
+            "4": 10.77657,
+            "5": 10.828,
+            "6": 10.84293,
+            "7": 10.81053,
+            "8": 10.80366,
+            "9": 10.81505,
+            "10": 10.76831,
+            "11": 10.86961,
+            "12": 10.83911,
+            "13": 10.85295,
+            "14": 10.86545,
+            "15": 10.79073,
+            "16": 10.78351,
+            "17": 10.7488,
+            "18": 10.79251,
+            "19": 10.78822,
+            "20": 10.7066,
+            "21": 10.68957,
+            "22": 10.53861,
+            "23": 10.70542,
+            "24": 10.59106,
+            "25": 10.54061,
+            "26": 10.59556,
+            "27": 10.61836,
+            "28": 10.59188,
+            "29": 10.6008,
+            "30": 10.39485,
+            "31": 10.12988,
+            "32": 10.49622,
+            "33": 10.48801,
+            "34": 10.24185,
+            "35": 10.30488,
+            "36": 10.25446,
+            "37": 10.38879,
+            "38": 10.24767,
+            "39": 10.43653,
+            "40": 10.13079,
+            "41": 10.18439,
+            "42": 10.25364,
+            "43": 9.89225,
+            "44": 10.0224,
+            "45": 9.90236,
+            "46": 9.88337,
+            "47": 10.1948,
+            "48": 9.91124,
+            "49": 9.59882,
+            "50": 9.97938
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 5601.0,
+            "2": 5974.0,
+            "3": 5786.0,
+            "4": 5760.0,
+            "5": 6601.0,
+            "6": 6753.0,
+            "7": 6231.0,
+            "8": 5822.0,
+            "9": 6446.0,
+            "10": 5254.0,
+            "11": 6740.0,
+            "12": 6313.0,
+            "13": 6672.0,
+            "14": 6909.0,
+            "15": 6250.0,
+            "16": 6391.0,
+            "17": 6290.0,
+            "18": 6086.0,
+            "19": 6278.0,
+            "20": 5969.0,
+            "21": 6461.0,
+            "22": 5583.0,
+            "23": 6602.0,
+            "24": 5982.0,
+            "25": 5816.0,
+            "26": 6162.0,
+            "27": 6378.0,
+            "28": 6931.0,
+            "29": 7197.0,
+            "30": 6181.0,
+            "31": 5568.0,
+            "32": 6876.0,
+            "33": 6980.0,
+            "34": 6144.0,
+            "35": 6751.0,
+            "36": 6501.0,
+            "37": 7367.0,
+            "38": 7095.0,
+            "39": 7558.0,
+            "40": 6831.0,
+            "41": 6929.0,
+            "42": 7131.0,
+            "43": 6817.0,
+            "44": 6736.0,
+            "45": 6881.0,
+            "46": 7006.0,
+            "47": 7622.0,
+            "48": 7384.0,
+            "49": 7363.0,
+            "50": 7684.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 458211840.0,
+            "2": 458213376.0,
+            "3": 458214400.0,
+            "4": 458211840.0,
+            "5": 458212864.0,
+            "6": 458215424.0,
+            "7": 458212864.0,
+            "8": 458212864.0,
+            "9": 458214912.0,
+            "10": 458214912.0,
+            "11": 458214912.0,
+            "12": 458213888.0,
+            "13": 458214912.0,
+            "14": 458213376.0,
+            "15": 458215424.0,
+            "16": 458214400.0,
+            "17": 458214400.0,
+            "18": 458215424.0,
+            "19": 458209792.0,
+            "20": 458212864.0,
+            "21": 458211840.0,
+            "22": 458219520.0,
+            "23": 458213888.0,
+            "24": 458214912.0,
+            "25": 458215424.0,
+            "26": 458213376.0,
+            "27": 458213888.0,
+            "28": 458213888.0,
+            "29": 458212864.0,
+            "30": 458211840.0,
+            "31": 458218496.0,
+            "32": 458214912.0,
+            "33": 458212352.0,
+            "34": 458214400.0,
+            "35": 458214400.0,
+            "36": 458215424.0,
+            "37": 458213888.0,
+            "38": 458213888.0,
+            "39": 458213888.0,
+            "40": 458214912.0,
+            "41": 458216448.0,
+            "42": 458213888.0,
+            "43": 458217472.0,
+            "44": 458212864.0,
+            "45": 458213888.0,
+            "46": 458216960.0,
+            "47": 458214400.0,
+            "48": 458212352.0,
+            "49": 458215424.0,
+            "50": 458214912.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1016394240.0,
+            "2": 1180904960.0,
+            "3": 1180904960.0,
+            "4": 1180904960.0,
+            "5": 1180904960.0,
+            "6": 1180904960.0,
+            "7": 1180904960.0,
+            "8": 1180904960.0,
+            "9": 1180934144.0,
+            "10": 1180934144.0,
+            "11": 1180934144.0,
+            "12": 1180934144.0,
+            "13": 1180934144.0,
+            "14": 1180934144.0,
+            "15": 1180990976.0,
+            "16": 1180990976.0,
+            "17": 1180990976.0,
+            "18": 1180990976.0,
+            "19": 1180990976.0,
+            "20": 1180990976.0,
+            "21": 1180990976.0,
+            "22": 1180990976.0,
+            "23": 1180990976.0,
+            "24": 1180990976.0,
+            "25": 1181222912.0,
+            "26": 1181222912.0,
+            "27": 1181222912.0,
+            "28": 1181222912.0,
+            "29": 1181222912.0,
+            "30": 1181222912.0,
+            "31": 1181222912.0,
+            "32": 1181222912.0,
+            "33": 1181222912.0,
+            "34": 1181222912.0,
+            "35": 1181468160.0,
+            "36": 1181468160.0,
+            "37": 1181468160.0,
+            "38": 1181468160.0,
+            "39": 1181468160.0,
+            "40": 1181468160.0,
+            "41": 1181468160.0,
+            "42": 1181468160.0,
+            "43": 1181468160.0,
+            "44": 1183467008.0,
+            "45": 1183467008.0,
+            "46": 1183467008.0,
+            "47": 1183467008.0,
+            "48": 1183467008.0,
+            "49": 1183467008.0,
+            "50": 1183467008.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 15.04132,
+            "2": 0.66987,
+            "3": 0.59594,
+            "4": 0.61167,
+            "5": 0.59747,
+            "6": 0.59554,
+            "7": 0.59774,
+            "8": 0.59108,
+            "9": 0.5993,
+            "10": 0.58738,
+            "11": 0.60339,
+            "12": 0.58716,
+            "13": 0.58921,
+            "14": 0.59746,
+            "15": 0.5794,
+            "16": 0.59504,
+            "17": 0.58538,
+            "18": 0.58652,
+            "19": 0.59212,
+            "20": 0.58939,
+            "21": 0.59669,
+            "22": 0.58476,
+            "23": 0.58776,
+            "24": 0.58842,
+            "25": 0.58684,
+            "26": 0.59629,
+            "27": 0.58034,
+            "28": 0.59676,
+            "29": 0.58449,
+            "30": 0.59286,
+            "31": 0.59012,
+            "32": 0.58016,
+            "33": 0.59804,
+            "34": 0.58394,
+            "35": 0.67758,
+            "36": 0.87613,
+            "37": 0.81369,
+            "38": 0.83448,
+            "39": 0.86288,
+            "40": 0.58264,
+            "41": 0.59313,
+            "42": 0.57727,
+            "43": 0.58849,
+            "44": 0.57983,
+            "45": 0.58518,
+            "46": 0.58778,
+            "47": 0.58381,
+            "48": 0.59237,
+            "49": 0.58055,
+            "50": 0.59541
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_lts_dgxa100_dracooci.json
new file mode 100644
index 00000000000..582aec1d02a
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_lts_dgxa100_dracooci.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.79288,
+            "2": 10.81339,
+            "3": 10.8144,
+            "4": 10.77657,
+            "5": 10.828,
+            "6": 10.84293,
+            "7": 10.81053,
+            "8": 10.80366,
+            "9": 10.81505,
+            "10": 10.76831,
+            "11": 10.86961,
+            "12": 10.83911,
+            "13": 10.85295,
+            "14": 10.86545,
+            "15": 10.79073,
+            "16": 10.78351,
+            "17": 10.7488,
+            "18": 10.79251,
+            "19": 10.78822,
+            "20": 10.7066,
+            "21": 10.68957,
+            "22": 10.53861,
+            "23": 10.70542,
+            "24": 10.59106,
+            "25": 10.54061,
+            "26": 10.59556,
+            "27": 10.61836,
+            "28": 10.59188,
+            "29": 10.6008,
+            "30": 10.39485,
+            "31": 10.12988,
+            "32": 10.49622,
+            "33": 10.48801,
+            "34": 10.24185,
+            "35": 10.30488,
+            "36": 10.25446,
+            "37": 10.38879,
+            "38": 10.24767,
+            "39": 10.43653,
+            "40": 10.13079,
+            "41": 10.18439,
+            "42": 10.25364,
+            "43": 9.89225,
+            "44": 10.0224,
+            "45": 9.90236,
+            "46": 9.88337,
+            "47": 10.1948,
+            "48": 9.91124,
+            "49": 9.59882,
+            "50": 9.97938
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 5601.0,
+            "2": 5974.0,
+            "3": 5786.0,
+            "4": 5760.0,
+            "5": 6601.0,
+            "6": 6753.0,
+            "7": 6231.0,
+            "8": 5822.0,
+            "9": 6446.0,
+            "10": 5254.0,
+            "11": 6740.0,
+            "12": 6313.0,
+            "13": 6672.0,
+            "14": 6909.0,
+            "15": 6250.0,
+            "16": 6391.0,
+            "17": 6290.0,
+            "18": 6086.0,
+            "19": 6278.0,
+            "20": 5969.0,
+            "21": 6461.0,
+            "22": 5583.0,
+            "23": 6602.0,
+            "24": 5982.0,
+            "25": 5816.0,
+            "26": 6162.0,
+            "27": 6378.0,
+            "28": 6931.0,
+            "29": 7197.0,
+            "30": 6181.0,
+            "31": 5568.0,
+            "32": 6876.0,
+            "33": 6980.0,
+            "34": 6144.0,
+            "35": 6751.0,
+            "36": 6501.0,
+            "37": 7367.0,
+            "38": 7095.0,
+            "39": 7558.0,
+            "40": 6831.0,
+            "41": 6929.0,
+            "42": 7131.0,
+            "43": 6817.0,
+            "44": 6736.0,
+            "45": 6881.0,
+            "46": 7006.0,
+            "47": 7622.0,
+            "48": 7384.0,
+            "49": 7363.0,
+            "50": 7684.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 458211840.0,
+            "2": 458213376.0,
+            "3": 458214400.0,
+            "4": 458211840.0,
+            "5": 458212864.0,
+            "6": 458215424.0,
+            "7": 458212864.0,
+            "8": 458212864.0,
+            "9": 458214912.0,
+            "10": 458214912.0,
+            "11": 458214912.0,
+            "12": 458213888.0,
+            "13": 458214912.0,
+            "14": 458213376.0,
+            "15": 458215424.0,
+            "16": 458214400.0,
+            "17": 458214400.0,
+            "18": 458215424.0,
+            "19": 458209792.0,
+            "20": 458212864.0,
+            "21": 458211840.0,
+            "22": 458219520.0,
+            "23": 458213888.0,
+            "24": 458214912.0,
+            "25": 458215424.0,
+            "26": 458213376.0,
+            "27": 458213888.0,
+            "28": 458213888.0,
+            "29": 458212864.0,
+            "30": 458211840.0,
+            "31": 458218496.0,
+            "32": 458214912.0,
+            "33": 458212352.0,
+            "34": 458214400.0,
+            "35": 458214400.0,
+            "36": 458215424.0,
+            "37": 458213888.0,
+            "38": 458213888.0,
+            "39": 458213888.0,
+            "40": 458214912.0,
+            "41": 458216448.0,
+            "42": 458213888.0,
+            "43": 458217472.0,
+            "44": 458212864.0,
+            "45": 458213888.0,
+            "46": 458216960.0,
+            "47": 458214400.0,
+            "48": 458212352.0,
+            "49": 458215424.0,
+            "50": 458214912.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1016394240.0,
+            "2": 1180904960.0,
+            "3": 1180904960.0,
+            "4": 1180904960.0,
+            "5": 1180904960.0,
+            "6": 1180904960.0,
+            "7": 1180904960.0,
+            "8": 1180904960.0,
+            "9": 1180934144.0,
+            "10": 1180934144.0,
+            "11": 1180934144.0,
+            "12": 1180934144.0,
+            "13": 1180934144.0,
+            "14": 1180934144.0,
+            "15": 1180990976.0,
+            "16": 1180990976.0,
+            "17": 1180990976.0,
+            "18": 1180990976.0,
+            "19": 1180990976.0,
+            "20": 1180990976.0,
+            "21": 1180990976.0,
+            "22": 1180990976.0,
+            "23": 1180990976.0,
+            "24": 1180990976.0,
+            "25": 1181222912.0,
+            "26": 1181222912.0,
+            "27": 1181222912.0,
+            "28": 1181222912.0,
+            "29": 1181222912.0,
+            "30": 1181222912.0,
+            "31": 1181222912.0,
+            "32": 1181222912.0,
+            "33": 1181222912.0,
+            "34": 1181222912.0,
+            "35": 1181468160.0,
+            "36": 1181468160.0,
+            "37": 1181468160.0,
+            "38": 1181468160.0,
+            "39": 1181468160.0,
+            "40": 1181468160.0,
+            "41": 1181468160.0,
+            "42": 1181468160.0,
+            "43": 1181468160.0,
+            "44": 1183467008.0,
+            "45": 1183467008.0,
+            "46": 1183467008.0,
+            "47": 1183467008.0,
+            "48": 1183467008.0,
+            "49": 1183467008.0,
+            "50": 1183467008.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 20.00855,
+            "2": 0.70527,
+            "3": 0.59745,
+            "4": 0.60744,
+            "5": 0.61261,
+            "6": 0.61644,
+            "7": 0.60659,
+            "8": 0.59978,
+            "9": 0.59747,
+            "10": 0.59353,
+            "11": 0.59787,
+            "12": 0.59073,
+            "13": 0.58796,
+            "14": 0.5969,
+            "15": 0.59327,
+            "16": 0.59709,
+            "17": 0.58809,
+            "18": 0.59153,
+            "19": 0.59156,
+            "20": 0.58419,
+            "21": 0.59403,
+            "22": 0.58324,
+            "23": 0.59332,
+            "24": 0.59867,
+            "25": 0.58715,
+            "26": 0.59642,
+            "27": 0.58832,
+            "28": 0.59214,
+            "29": 0.58522,
+            "30": 0.58573,
+            "31": 0.59427,
+            "32": 0.58249,
+            "33": 0.59123,
+            "34": 0.582,
+            "35": 0.59565,
+            "36": 0.59193,
+            "37": 0.58268,
+            "38": 0.59363,
+            "39": 0.58071,
+            "40": 0.58884,
+            "41": 0.58702,
+            "42": 0.58338,
+            "43": 0.58987,
+            "44": 0.58365,
+            "45": 0.59495,
+            "46": 0.58622,
+            "47": 0.58253,
+            "48": 0.59065,
+            "49": 0.58385,
+            "50": 0.59154
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_h100.json
index 6cc67512418..daecd2a50e1 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_h100.json
@@ -2,91 +2,286 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 10.81692,
+            "2": 10.82534,
+            "3": 10.82401,
+            "4": 10.79801,
             "5": 10.8415,
+            "6": 10.85912,
+            "7": 10.81927,
+            "8": 10.81789,
+            "9": 10.83554,
             "10": 10.78266,
+            "11": 10.85455,
+            "12": 10.84582,
+            "13": 10.84996,
+            "14": 10.87821,
             "15": 10.80684,
+            "16": 10.80662,
+            "17": 10.76305,
+            "18": 10.80188,
+            "19": 10.79303,
             "20": 10.73474,
+            "21": 10.71067,
+            "22": 10.57636,
+            "23": 10.7196,
+            "24": 10.63305,
             "25": 10.56916,
+            "26": 10.62589,
+            "27": 10.64466,
+            "28": 10.60792,
+            "29": 10.61761,
             "30": 10.42214,
+            "31": 10.17719,
+            "32": 10.50701,
+            "33": 10.50561,
+            "34": 10.27485,
             "35": 10.3276,
+            "36": 10.29275,
+            "37": 10.40262,
+            "38": 10.25679,
+            "39": 10.43615,
             "40": 10.16589,
+            "41": 10.20032,
+            "42": 10.27424,
+            "43": 9.93044,
+            "44": 10.04415,
             "45": 9.92936,
+            "46": 9.89984,
+            "47": 10.18573,
+            "48": 9.93082,
+            "49": 9.6257,
             "50": 9.98437
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 12899.0,
+            "2": 14592.0,
+            "3": 14243.0,
+            "4": 13886.0,
             "5": 15732.0,
+            "6": 16250.0,
+            "7": 15453.0,
+            "8": 13386.0,
+            "9": 15159.0,
             "10": 12804.0,
+            "11": 16441.0,
+            "12": 14951.0,
+            "13": 16151.0,
+            "14": 16330.0,
             "15": 15144.0,
+            "16": 15588.0,
+            "17": 15315.0,
+            "18": 14902.0,
+            "19": 15436.0,
             "20": 13814.0,
+            "21": 13977.0,
+            "22": 12814.0,
+            "23": 16615.0,
+            "24": 13785.0,
             "25": 13451.0,
+            "26": 14681.0,
+            "27": 15288.0,
+            "28": 16290.0,
+            "29": 16880.0,
             "30": 14583.0,
+            "31": 13272.0,
+            "32": 15972.0,
+            "33": 16904.0,
+            "34": 14406.0,
             "35": 14981.0,
+            "36": 15576.0,
+            "37": 17584.0,
+            "38": 16136.0,
+            "39": 17650.0,
             "40": 16506.0,
+            "41": 16391.0,
+            "42": 17008.0,
+            "43": 15459.0,
+            "44": 15097.0,
             "45": 16136.0,
+            "46": 16845.0,
+            "47": 19101.0,
+            "48": 16405.0,
+            "49": 16558.0,
             "50": 18439.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 659394560.0,
+            "2": 659346944.0,
+            "3": 659401728.0,
+            "4": 659351040.0,
             "5": 659623424.0,
+            "6": 659348480.0,
+            "7": 659508736.0,
+            "8": 659353088.0,
+            "9": 659383296.0,
             "10": 659347456.0,
+            "11": 659350016.0,
+            "12": 659437056.0,
+            "13": 659356160.0,
+            "14": 659702272.0,
             "15": 659658240.0,
+            "16": 659450880.0,
+            "17": 659438080.0,
+            "18": 659384320.0,
+            "19": 659492352.0,
             "20": 659372544.0,
+            "21": 659350016.0,
+            "22": 659347456.0,
+            "23": 659348992.0,
+            "24": 659430400.0,
             "25": 659347968.0,
+            "26": 659378176.0,
+            "27": 659353088.0,
+            "28": 659346944.0,
+            "29": 659440640.0,
             "30": 659732480.0,
+            "31": 659361792.0,
+            "32": 659345920.0,
+            "33": 659473920.0,
+            "34": 660008448.0,
             "35": 659819520.0,
+            "36": 659363840.0,
+            "37": 659418624.0,
+            "38": 659351040.0,
+            "39": 659449344.0,
             "40": 659586560.0,
+            "41": 659387392.0,
+            "42": 659476480.0,
+            "43": 659567104.0,
+            "44": 659344384.0,
             "45": 659346944.0,
+            "46": 659466752.0,
+            "47": 659345408.0,
+            "48": 659835392.0,
+            "49": 659494400.0,
             "50": 659346432.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 1853294080.0,
+            "2": 2083995136.0,
+            "3": 2084402688.0,
+            "4": 2084433408.0,
             "5": 2084433408.0,
+            "6": 2084433408.0,
+            "7": 2085503488.0,
+            "8": 2085503488.0,
+            "9": 2085503488.0,
             "10": 2085503488.0,
+            "11": 2085503488.0,
+            "12": 2085503488.0,
+            "13": 2085503488.0,
+            "14": 2085503488.0,
             "15": 2085503488.0,
+            "16": 2085503488.0,
+            "17": 2085503488.0,
+            "18": 2085503488.0,
+            "19": 2085503488.0,
             "20": 2085503488.0,
+            "21": 2085503488.0,
+            "22": 2085503488.0,
+            "23": 2085503488.0,
+            "24": 2085503488.0,
             "25": 2085503488.0,
+            "26": 2085503488.0,
+            "27": 2085503488.0,
+            "28": 2085503488.0,
+            "29": 2085503488.0,
             "30": 2085503488.0,
+            "31": 2085503488.0,
+            "32": 2085503488.0,
+            "33": 2085503488.0,
+            "34": 2085503488.0,
             "35": 2085503488.0,
+            "36": 2085503488.0,
+            "37": 2085503488.0,
+            "38": 2085503488.0,
+            "39": 2085503488.0,
             "40": 2085503488.0,
+            "41": 2085503488.0,
+            "42": 2085503488.0,
+            "43": 2085503488.0,
+            "44": 2085503488.0,
             "45": 2085503488.0,
+            "46": 2085503488.0,
+            "47": 2085503488.0,
+            "48": 2085503488.0,
+            "49": 2085503488.0,
             "50": 2085503488.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 15.75879,
-            "5": 0.23618,
-            "10": 0.23433,
-            "15": 0.2393,
-            "20": 0.23468,
-            "25": 0.22203,
-            "30": 0.22111,
-            "35": 0.22708,
-            "40": 0.22283,
-            "45": 0.23253,
-            "50": 0.22333
+            "1": 13.70163,
+            "2": 0.32995,
+            "3": 0.28329,
+            "4": 0.30327,
+            "5": 0.26887,
+            "6": 0.26248,
+            "7": 0.28317,
+            "8": 0.26472,
+            "9": 0.26858,
+            "10": 0.26512,
+            "11": 0.28434,
+            "12": 0.25515,
+            "13": 0.26048,
+            "14": 0.25624,
+            "15": 0.27581,
+            "16": 0.25102,
+            "17": 0.25664,
+            "18": 0.25657,
+            "19": 0.25806,
+            "20": 0.2591,
+            "21": 0.25054,
+            "22": 0.26613,
+            "23": 0.2877,
+            "24": 0.2503,
+            "25": 0.25227,
+            "26": 0.26224,
+            "27": 0.25269,
+            "28": 0.26737,
+            "29": 0.25139,
+            "30": 0.25065,
+            "31": 0.30552,
+            "32": 0.25136,
+            "33": 0.2573,
+            "34": 0.26376,
+            "35": 0.25668,
+            "36": 0.25566,
+            "37": 0.25143,
+            "38": 0.2666,
+            "39": 0.25121,
+            "40": 0.25249,
+            "41": 0.25912,
+            "42": 0.25442,
+            "43": 0.2721,
+            "44": 0.25368,
+            "45": 0.26494,
+            "46": 0.27206,
+            "47": 0.25676,
+            "48": 0.27981,
+            "49": 0.31376,
+            "50": 0.26619
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci-ord.json
new file mode 100644
index 00000000000..075265941da
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci-ord.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.82004,
+            "2": 10.8392,
+            "3": 10.81124,
+            "4": 10.81983,
+            "5": 10.84794,
+            "6": 10.8608,
+            "7": 10.84085,
+            "8": 10.84432,
+            "9": 10.8504,
+            "10": 10.79461,
+            "11": 10.85658,
+            "12": 10.84848,
+            "13": 10.86929,
+            "14": 10.8667,
+            "15": 10.82911,
+            "16": 10.81111,
+            "17": 10.79027,
+            "18": 10.80981,
+            "19": 10.81143,
+            "20": 10.73175,
+            "21": 10.71285,
+            "22": 10.58199,
+            "23": 10.72,
+            "24": 10.61704,
+            "25": 10.57964,
+            "26": 10.63372,
+            "27": 10.6365,
+            "28": 10.60641,
+            "29": 10.61561,
+            "30": 10.40859,
+            "31": 10.17068,
+            "32": 10.49958,
+            "33": 10.4963,
+            "34": 10.25574,
+            "35": 10.31503,
+            "36": 10.28536,
+            "37": 10.38742,
+            "38": 10.24676,
+            "39": 10.44249,
+            "40": 10.14367,
+            "41": 10.19116,
+            "42": 10.25654,
+            "43": 9.90671,
+            "44": 10.02653,
+            "45": 9.914,
+            "46": 9.89613,
+            "47": 10.18885,
+            "48": 9.92993,
+            "49": 9.61419,
+            "50": 9.97565
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 12826.0,
+            "2": 14613.0,
+            "3": 14549.0,
+            "4": 13422.0,
+            "5": 15951.0,
+            "6": 16055.0,
+            "7": 15208.0,
+            "8": 12944.0,
+            "9": 15110.0,
+            "10": 12611.0,
+            "11": 16586.0,
+            "12": 14954.0,
+            "13": 15925.0,
+            "14": 16182.0,
+            "15": 14834.0,
+            "16": 16023.0,
+            "17": 15486.0,
+            "18": 15116.0,
+            "19": 15584.0,
+            "20": 13675.0,
+            "21": 13873.0,
+            "22": 12917.0,
+            "23": 16766.0,
+            "24": 13924.0,
+            "25": 13129.0,
+            "26": 14794.0,
+            "27": 15169.0,
+            "28": 16393.0,
+            "29": 16719.0,
+            "30": 14652.0,
+            "31": 13126.0,
+            "32": 15987.0,
+            "33": 17372.0,
+            "34": 14206.0,
+            "35": 15183.0,
+            "36": 15837.0,
+            "37": 17507.0,
+            "38": 16617.0,
+            "39": 17712.0,
+            "40": 16971.0,
+            "41": 16795.0,
+            "42": 17304.0,
+            "43": 15578.0,
+            "44": 15564.0,
+            "45": 16188.0,
+            "46": 17443.0,
+            "47": 19238.0,
+            "48": 16575.0,
+            "49": 16273.0,
+            "50": 18998.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 629738496.0,
+            "2": 629741056.0,
+            "3": 629741056.0,
+            "4": 629737472.0,
+            "5": 629945856.0,
+            "6": 629820928.0,
+            "7": 629735936.0,
+            "8": 629741056.0,
+            "9": 629863424.0,
+            "10": 629771776.0,
+            "11": 629848064.0,
+            "12": 629767168.0,
+            "13": 629744128.0,
+            "14": 629783040.0,
+            "15": 629743616.0,
+            "16": 629762560.0,
+            "17": 629806592.0,
+            "18": 629742592.0,
+            "19": 629779456.0,
+            "20": 629873664.0,
+            "21": 629740032.0,
+            "22": 629789696.0,
+            "23": 629762560.0,
+            "24": 630001664.0,
+            "25": 629747712.0,
+            "26": 629774848.0,
+            "27": 629774848.0,
+            "28": 629755392.0,
+            "29": 629753856.0,
+            "30": 629757440.0,
+            "31": 629736448.0,
+            "32": 629881344.0,
+            "33": 629818880.0,
+            "34": 629858304.0,
+            "35": 629787136.0,
+            "36": 630003712.0,
+            "37": 629769216.0,
+            "38": 629809664.0,
+            "39": 629830144.0,
+            "40": 629740544.0,
+            "41": 629737984.0,
+            "42": 630415360.0,
+            "43": 629748224.0,
+            "44": 629811712.0,
+            "45": 629760000.0,
+            "46": 629824000.0,
+            "47": 629742080.0,
+            "48": 629881344.0,
+            "49": 630102528.0,
+            "50": 629818880.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1822359552.0,
+            "2": 2052654592.0,
+            "3": 2053963264.0,
+            "4": 2053963264.0,
+            "5": 2053963264.0,
+            "6": 2053963264.0,
+            "7": 2054027776.0,
+            "8": 2054027776.0,
+            "9": 2054027776.0,
+            "10": 2054027776.0,
+            "11": 2054060032.0,
+            "12": 2054060032.0,
+            "13": 2054418944.0,
+            "14": 2054418944.0,
+            "15": 2054439936.0,
+            "16": 2054439936.0,
+            "17": 2054439936.0,
+            "18": 2054439936.0,
+            "19": 2054439936.0,
+            "20": 2054439936.0,
+            "21": 2054439936.0,
+            "22": 2054439936.0,
+            "23": 2054439936.0,
+            "24": 2054439936.0,
+            "25": 2054439936.0,
+            "26": 2054439936.0,
+            "27": 2054439936.0,
+            "28": 2054439936.0,
+            "29": 2054439936.0,
+            "30": 2054439936.0,
+            "31": 2054439936.0,
+            "32": 2054439936.0,
+            "33": 2054439936.0,
+            "34": 2054439936.0,
+            "35": 2054439936.0,
+            "36": 2054439936.0,
+            "37": 2054439936.0,
+            "38": 2054439936.0,
+            "39": 2054439936.0,
+            "40": 2054439936.0,
+            "41": 2054439936.0,
+            "42": 2054439936.0,
+            "43": 2054439936.0,
+            "44": 2054439936.0,
+            "45": 2054439936.0,
+            "46": 2054439936.0,
+            "47": 2054769152.0,
+            "48": 2054769152.0,
+            "49": 2054769152.0,
+            "50": 2054769152.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 7.01599,
+            "2": 0.45355,
+            "3": 0.36565,
+            "4": 0.36091,
+            "5": 0.35921,
+            "6": 0.35888,
+            "7": 0.35757,
+            "8": 0.35792,
+            "9": 0.35736,
+            "10": 0.3584,
+            "11": 0.359,
+            "12": 0.35941,
+            "13": 0.35718,
+            "14": 0.35719,
+            "15": 0.35705,
+            "16": 0.35632,
+            "17": 0.3593,
+            "18": 0.35903,
+            "19": 0.35833,
+            "20": 0.35817,
+            "21": 0.36067,
+            "22": 0.36054,
+            "23": 0.35773,
+            "24": 0.35639,
+            "25": 0.35602,
+            "26": 0.35542,
+            "27": 0.35615,
+            "28": 0.35911,
+            "29": 0.35797,
+            "30": 0.35947,
+            "31": 0.358,
+            "32": 0.35582,
+            "33": 0.35562,
+            "34": 0.35699,
+            "35": 0.35618,
+            "36": 0.35545,
+            "37": 0.35505,
+            "38": 0.35456,
+            "39": 0.35537,
+            "40": 0.3546,
+            "41": 0.35684,
+            "42": 0.35798,
+            "43": 0.35335,
+            "44": 0.3508,
+            "45": 0.35489,
+            "46": 0.35218,
+            "47": 0.35103,
+            "48": 0.3519,
+            "49": 0.35301,
+            "50": 0.34945
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci.json b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci.json
new file mode 100644
index 00000000000..cd548b7f7bb
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.82004,
+            "2": 10.8392,
+            "3": 10.81124,
+            "4": 10.81983,
+            "5": 10.84794,
+            "6": 10.8608,
+            "7": 10.84085,
+            "8": 10.84432,
+            "9": 10.8504,
+            "10": 10.79461,
+            "11": 10.85658,
+            "12": 10.84848,
+            "13": 10.86929,
+            "14": 10.8667,
+            "15": 10.82911,
+            "16": 10.81111,
+            "17": 10.79027,
+            "18": 10.80981,
+            "19": 10.81143,
+            "20": 10.73175,
+            "21": 10.71285,
+            "22": 10.58199,
+            "23": 10.72,
+            "24": 10.61704,
+            "25": 10.57964,
+            "26": 10.63372,
+            "27": 10.6365,
+            "28": 10.60641,
+            "29": 10.61561,
+            "30": 10.40859,
+            "31": 10.17068,
+            "32": 10.49958,
+            "33": 10.4963,
+            "34": 10.25574,
+            "35": 10.31503,
+            "36": 10.28536,
+            "37": 10.38742,
+            "38": 10.24676,
+            "39": 10.44249,
+            "40": 10.14367,
+            "41": 10.19116,
+            "42": 10.25654,
+            "43": 9.90671,
+            "44": 10.02653,
+            "45": 9.914,
+            "46": 9.89613,
+            "47": 10.18885,
+            "48": 9.92993,
+            "49": 9.61419,
+            "50": 9.97565
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 12826.0,
+            "2": 14613.0,
+            "3": 14549.0,
+            "4": 13422.0,
+            "5": 15951.0,
+            "6": 16055.0,
+            "7": 15208.0,
+            "8": 12944.0,
+            "9": 15110.0,
+            "10": 12611.0,
+            "11": 16586.0,
+            "12": 14954.0,
+            "13": 15925.0,
+            "14": 16182.0,
+            "15": 14834.0,
+            "16": 16023.0,
+            "17": 15486.0,
+            "18": 15116.0,
+            "19": 15584.0,
+            "20": 13675.0,
+            "21": 13873.0,
+            "22": 12917.0,
+            "23": 16766.0,
+            "24": 13924.0,
+            "25": 13129.0,
+            "26": 14794.0,
+            "27": 15169.0,
+            "28": 16393.0,
+            "29": 16719.0,
+            "30": 14652.0,
+            "31": 13126.0,
+            "32": 15987.0,
+            "33": 17372.0,
+            "34": 14206.0,
+            "35": 15183.0,
+            "36": 15837.0,
+            "37": 17507.0,
+            "38": 16617.0,
+            "39": 17712.0,
+            "40": 16971.0,
+            "41": 16795.0,
+            "42": 17304.0,
+            "43": 15578.0,
+            "44": 15564.0,
+            "45": 16188.0,
+            "46": 17443.0,
+            "47": 19238.0,
+            "48": 16575.0,
+            "49": 16273.0,
+            "50": 18998.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 629738496.0,
+            "2": 629741056.0,
+            "3": 629741056.0,
+            "4": 629737472.0,
+            "5": 629945856.0,
+            "6": 629820928.0,
+            "7": 629735936.0,
+            "8": 629741056.0,
+            "9": 629863424.0,
+            "10": 629771776.0,
+            "11": 629848064.0,
+            "12": 629767168.0,
+            "13": 629744128.0,
+            "14": 629783040.0,
+            "15": 629743616.0,
+            "16": 629762560.0,
+            "17": 629806592.0,
+            "18": 629742592.0,
+            "19": 629779456.0,
+            "20": 629873664.0,
+            "21": 629740032.0,
+            "22": 629789696.0,
+            "23": 629762560.0,
+            "24": 630001664.0,
+            "25": 629747712.0,
+            "26": 629774848.0,
+            "27": 629774848.0,
+            "28": 629755392.0,
+            "29": 629753856.0,
+            "30": 629757440.0,
+            "31": 629736448.0,
+            "32": 629881344.0,
+            "33": 629818880.0,
+            "34": 629858304.0,
+            "35": 629787136.0,
+            "36": 630003712.0,
+            "37": 629769216.0,
+            "38": 629809664.0,
+            "39": 629830144.0,
+            "40": 629740544.0,
+            "41": 629737984.0,
+            "42": 630415360.0,
+            "43": 629748224.0,
+            "44": 629811712.0,
+            "45": 629760000.0,
+            "46": 629824000.0,
+            "47": 629742080.0,
+            "48": 629881344.0,
+            "49": 630102528.0,
+            "50": 629818880.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1822359552.0,
+            "2": 2052654592.0,
+            "3": 2053963264.0,
+            "4": 2053963264.0,
+            "5": 2053963264.0,
+            "6": 2053963264.0,
+            "7": 2054027776.0,
+            "8": 2054027776.0,
+            "9": 2054027776.0,
+            "10": 2054027776.0,
+            "11": 2054060032.0,
+            "12": 2054060032.0,
+            "13": 2054418944.0,
+            "14": 2054418944.0,
+            "15": 2054439936.0,
+            "16": 2054439936.0,
+            "17": 2054439936.0,
+            "18": 2054439936.0,
+            "19": 2054439936.0,
+            "20": 2054439936.0,
+            "21": 2054439936.0,
+            "22": 2054439936.0,
+            "23": 2054439936.0,
+            "24": 2054439936.0,
+            "25": 2054439936.0,
+            "26": 2054439936.0,
+            "27": 2054439936.0,
+            "28": 2054439936.0,
+            "29": 2054439936.0,
+            "30": 2054439936.0,
+            "31": 2054439936.0,
+            "32": 2054439936.0,
+            "33": 2054439936.0,
+            "34": 2054439936.0,
+            "35": 2054439936.0,
+            "36": 2054439936.0,
+            "37": 2054439936.0,
+            "38": 2054439936.0,
+            "39": 2054439936.0,
+            "40": 2054439936.0,
+            "41": 2054439936.0,
+            "42": 2054439936.0,
+            "43": 2054439936.0,
+            "44": 2054439936.0,
+            "45": 2054439936.0,
+            "46": 2054439936.0,
+            "47": 2054769152.0,
+            "48": 2054769152.0,
+            "49": 2054769152.0,
+            "50": 2054769152.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 7.28409,
+            "2": 0.41637,
+            "3": 0.36538,
+            "4": 0.36475,
+            "5": 0.36291,
+            "6": 0.36269,
+            "7": 0.3621,
+            "8": 0.36618,
+            "9": 0.41513,
+            "10": 0.35991,
+            "11": 0.35833,
+            "12": 0.35938,
+            "13": 0.35969,
+            "14": 0.35865,
+            "15": 0.35898,
+            "16": 0.35973,
+            "17": 0.35887,
+            "18": 0.3593,
+            "19": 0.35818,
+            "20": 0.35872,
+            "21": 0.36111,
+            "22": 0.36267,
+            "23": 0.36505,
+            "24": 0.36152,
+            "25": 0.35943,
+            "26": 0.36139,
+            "27": 0.35871,
+            "28": 0.35976,
+            "29": 0.36014,
+            "30": 0.36074,
+            "31": 0.36299,
+            "32": 0.35944,
+            "33": 0.36216,
+            "34": 0.362,
+            "35": 0.36095,
+            "36": 0.36098,
+            "37": 0.3688,
+            "38": 0.36204,
+            "39": 0.35854,
+            "40": 0.3619,
+            "41": 0.35612,
+            "42": 0.35586,
+            "43": 0.35734,
+            "44": 0.35693,
+            "45": 0.35773,
+            "46": 0.35625,
+            "47": 0.35614,
+            "48": 0.35584,
+            "49": 0.35496,
+            "50": 0.35545
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..2906cfee84e
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.81692,
+            "2": 10.82534,
+            "3": 10.82401,
+            "4": 10.79801,
+            "5": 10.8415,
+            "6": 10.85912,
+            "7": 10.81927,
+            "8": 10.81789,
+            "9": 10.83554,
+            "10": 10.78266,
+            "11": 10.85455,
+            "12": 10.84582,
+            "13": 10.84996,
+            "14": 10.87821,
+            "15": 10.80684,
+            "16": 10.80662,
+            "17": 10.76305,
+            "18": 10.80188,
+            "19": 10.79303,
+            "20": 10.73474,
+            "21": 10.71067,
+            "22": 10.57636,
+            "23": 10.7196,
+            "24": 10.63305,
+            "25": 10.56916,
+            "26": 10.62589,
+            "27": 10.64466,
+            "28": 10.60792,
+            "29": 10.61761,
+            "30": 10.42214,
+            "31": 10.17719,
+            "32": 10.50701,
+            "33": 10.50561,
+            "34": 10.27485,
+            "35": 10.3276,
+            "36": 10.29275,
+            "37": 10.40262,
+            "38": 10.25679,
+            "39": 10.43615,
+            "40": 10.16589,
+            "41": 10.20032,
+            "42": 10.27424,
+            "43": 9.93044,
+            "44": 10.04415,
+            "45": 9.92936,
+            "46": 9.89984,
+            "47": 10.18573,
+            "48": 9.93082,
+            "49": 9.6257,
+            "50": 9.98437
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 12899.0,
+            "2": 14592.0,
+            "3": 14243.0,
+            "4": 13886.0,
+            "5": 15732.0,
+            "6": 16250.0,
+            "7": 15453.0,
+            "8": 13386.0,
+            "9": 15159.0,
+            "10": 12804.0,
+            "11": 16441.0,
+            "12": 14951.0,
+            "13": 16151.0,
+            "14": 16330.0,
+            "15": 15144.0,
+            "16": 15588.0,
+            "17": 15315.0,
+            "18": 14902.0,
+            "19": 15436.0,
+            "20": 13814.0,
+            "21": 13977.0,
+            "22": 12814.0,
+            "23": 16615.0,
+            "24": 13785.0,
+            "25": 13451.0,
+            "26": 14681.0,
+            "27": 15288.0,
+            "28": 16290.0,
+            "29": 16880.0,
+            "30": 14583.0,
+            "31": 13272.0,
+            "32": 15972.0,
+            "33": 16904.0,
+            "34": 14406.0,
+            "35": 14981.0,
+            "36": 15576.0,
+            "37": 17584.0,
+            "38": 16136.0,
+            "39": 17650.0,
+            "40": 16506.0,
+            "41": 16391.0,
+            "42": 17008.0,
+            "43": 15459.0,
+            "44": 15097.0,
+            "45": 16136.0,
+            "46": 16845.0,
+            "47": 19101.0,
+            "48": 16405.0,
+            "49": 16558.0,
+            "50": 18439.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 659394560.0,
+            "2": 659346944.0,
+            "3": 659401728.0,
+            "4": 659351040.0,
+            "5": 659623424.0,
+            "6": 659348480.0,
+            "7": 659508736.0,
+            "8": 659353088.0,
+            "9": 659383296.0,
+            "10": 659347456.0,
+            "11": 659350016.0,
+            "12": 659437056.0,
+            "13": 659356160.0,
+            "14": 659702272.0,
+            "15": 659658240.0,
+            "16": 659450880.0,
+            "17": 659438080.0,
+            "18": 659384320.0,
+            "19": 659492352.0,
+            "20": 659372544.0,
+            "21": 659350016.0,
+            "22": 659347456.0,
+            "23": 659348992.0,
+            "24": 659430400.0,
+            "25": 659347968.0,
+            "26": 659378176.0,
+            "27": 659353088.0,
+            "28": 659346944.0,
+            "29": 659440640.0,
+            "30": 659732480.0,
+            "31": 659361792.0,
+            "32": 659345920.0,
+            "33": 659473920.0,
+            "34": 660008448.0,
+            "35": 659819520.0,
+            "36": 659363840.0,
+            "37": 659418624.0,
+            "38": 659351040.0,
+            "39": 659449344.0,
+            "40": 659586560.0,
+            "41": 659387392.0,
+            "42": 659476480.0,
+            "43": 659567104.0,
+            "44": 659344384.0,
+            "45": 659346944.0,
+            "46": 659466752.0,
+            "47": 659345408.0,
+            "48": 659835392.0,
+            "49": 659494400.0,
+            "50": 659346432.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1853294080.0,
+            "2": 2083995136.0,
+            "3": 2084402688.0,
+            "4": 2084433408.0,
+            "5": 2084433408.0,
+            "6": 2084433408.0,
+            "7": 2085503488.0,
+            "8": 2085503488.0,
+            "9": 2085503488.0,
+            "10": 2085503488.0,
+            "11": 2085503488.0,
+            "12": 2085503488.0,
+            "13": 2085503488.0,
+            "14": 2085503488.0,
+            "15": 2085503488.0,
+            "16": 2085503488.0,
+            "17": 2085503488.0,
+            "18": 2085503488.0,
+            "19": 2085503488.0,
+            "20": 2085503488.0,
+            "21": 2085503488.0,
+            "22": 2085503488.0,
+            "23": 2085503488.0,
+            "24": 2085503488.0,
+            "25": 2085503488.0,
+            "26": 2085503488.0,
+            "27": 2085503488.0,
+            "28": 2085503488.0,
+            "29": 2085503488.0,
+            "30": 2085503488.0,
+            "31": 2085503488.0,
+            "32": 2085503488.0,
+            "33": 2085503488.0,
+            "34": 2085503488.0,
+            "35": 2085503488.0,
+            "36": 2085503488.0,
+            "37": 2085503488.0,
+            "38": 2085503488.0,
+            "39": 2085503488.0,
+            "40": 2085503488.0,
+            "41": 2085503488.0,
+            "42": 2085503488.0,
+            "43": 2085503488.0,
+            "44": 2085503488.0,
+            "45": 2085503488.0,
+            "46": 2085503488.0,
+            "47": 2085503488.0,
+            "48": 2085503488.0,
+            "49": 2085503488.0,
+            "50": 2085503488.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 15.33188,
+            "2": 0.39945,
+            "3": 0.26382,
+            "4": 0.2701,
+            "5": 0.24001,
+            "6": 0.23463,
+            "7": 0.24587,
+            "8": 0.23051,
+            "9": 0.23491,
+            "10": 0.23256,
+            "11": 0.2548,
+            "12": 0.23554,
+            "13": 0.24407,
+            "14": 0.23603,
+            "15": 0.24759,
+            "16": 0.23243,
+            "17": 0.23641,
+            "18": 0.23374,
+            "19": 0.22953,
+            "20": 0.23517,
+            "21": 0.22989,
+            "22": 0.2361,
+            "23": 0.24153,
+            "24": 0.23019,
+            "25": 0.22803,
+            "26": 0.23226,
+            "27": 0.22872,
+            "28": 0.23463,
+            "29": 0.23254,
+            "30": 0.22883,
+            "31": 0.27127,
+            "32": 0.22829,
+            "33": 0.24048,
+            "34": 0.26445,
+            "35": 0.2532,
+            "36": 0.24919,
+            "37": 0.22702,
+            "38": 0.22443,
+            "39": 0.22286,
+            "40": 0.21951,
+            "41": 0.22887,
+            "42": 0.22125,
+            "43": 0.23026,
+            "44": 0.22208,
+            "45": 0.23148,
+            "46": 0.24241,
+            "47": 0.22735,
+            "48": 0.22857,
+            "49": 0.27512,
+            "50": 0.22154
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..eb013c007ca
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.81692,
+            "2": 10.82534,
+            "3": 10.82401,
+            "4": 10.79801,
+            "5": 10.8415,
+            "6": 10.85912,
+            "7": 10.81927,
+            "8": 10.81789,
+            "9": 10.83554,
+            "10": 10.78266,
+            "11": 10.85455,
+            "12": 10.84582,
+            "13": 10.84996,
+            "14": 10.87821,
+            "15": 10.80684,
+            "16": 10.80662,
+            "17": 10.76305,
+            "18": 10.80188,
+            "19": 10.79303,
+            "20": 10.73474,
+            "21": 10.71067,
+            "22": 10.57636,
+            "23": 10.7196,
+            "24": 10.63305,
+            "25": 10.56916,
+            "26": 10.62589,
+            "27": 10.64466,
+            "28": 10.60792,
+            "29": 10.61761,
+            "30": 10.42214,
+            "31": 10.17719,
+            "32": 10.50701,
+            "33": 10.50561,
+            "34": 10.27485,
+            "35": 10.3276,
+            "36": 10.29275,
+            "37": 10.40262,
+            "38": 10.25679,
+            "39": 10.43615,
+            "40": 10.16589,
+            "41": 10.20032,
+            "42": 10.27424,
+            "43": 9.93044,
+            "44": 10.04415,
+            "45": 9.92936,
+            "46": 9.89984,
+            "47": 10.18573,
+            "48": 9.93082,
+            "49": 9.6257,
+            "50": 9.98437
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 12899.0,
+            "2": 14592.0,
+            "3": 14243.0,
+            "4": 13886.0,
+            "5": 15732.0,
+            "6": 16250.0,
+            "7": 15453.0,
+            "8": 13386.0,
+            "9": 15159.0,
+            "10": 12804.0,
+            "11": 16441.0,
+            "12": 14951.0,
+            "13": 16151.0,
+            "14": 16330.0,
+            "15": 15144.0,
+            "16": 15588.0,
+            "17": 15315.0,
+            "18": 14902.0,
+            "19": 15436.0,
+            "20": 13814.0,
+            "21": 13977.0,
+            "22": 12814.0,
+            "23": 16615.0,
+            "24": 13785.0,
+            "25": 13451.0,
+            "26": 14681.0,
+            "27": 15288.0,
+            "28": 16290.0,
+            "29": 16880.0,
+            "30": 14583.0,
+            "31": 13272.0,
+            "32": 15972.0,
+            "33": 16904.0,
+            "34": 14406.0,
+            "35": 14981.0,
+            "36": 15576.0,
+            "37": 17584.0,
+            "38": 16136.0,
+            "39": 17650.0,
+            "40": 16506.0,
+            "41": 16391.0,
+            "42": 17008.0,
+            "43": 15459.0,
+            "44": 15097.0,
+            "45": 16136.0,
+            "46": 16845.0,
+            "47": 19101.0,
+            "48": 16405.0,
+            "49": 16558.0,
+            "50": 18439.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 659394560.0,
+            "2": 659346944.0,
+            "3": 659401728.0,
+            "4": 659351040.0,
+            "5": 659623424.0,
+            "6": 659348480.0,
+            "7": 659508736.0,
+            "8": 659353088.0,
+            "9": 659383296.0,
+            "10": 659347456.0,
+            "11": 659350016.0,
+            "12": 659437056.0,
+            "13": 659356160.0,
+            "14": 659702272.0,
+            "15": 659658240.0,
+            "16": 659450880.0,
+            "17": 659438080.0,
+            "18": 659384320.0,
+            "19": 659492352.0,
+            "20": 659372544.0,
+            "21": 659350016.0,
+            "22": 659347456.0,
+            "23": 659348992.0,
+            "24": 659430400.0,
+            "25": 659347968.0,
+            "26": 659378176.0,
+            "27": 659353088.0,
+            "28": 659346944.0,
+            "29": 659440640.0,
+            "30": 659732480.0,
+            "31": 659361792.0,
+            "32": 659345920.0,
+            "33": 659473920.0,
+            "34": 660008448.0,
+            "35": 659819520.0,
+            "36": 659363840.0,
+            "37": 659418624.0,
+            "38": 659351040.0,
+            "39": 659449344.0,
+            "40": 659586560.0,
+            "41": 659387392.0,
+            "42": 659476480.0,
+            "43": 659567104.0,
+            "44": 659344384.0,
+            "45": 659346944.0,
+            "46": 659466752.0,
+            "47": 659345408.0,
+            "48": 659835392.0,
+            "49": 659494400.0,
+            "50": 659346432.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1853294080.0,
+            "2": 2083995136.0,
+            "3": 2084402688.0,
+            "4": 2084433408.0,
+            "5": 2084433408.0,
+            "6": 2084433408.0,
+            "7": 2085503488.0,
+            "8": 2085503488.0,
+            "9": 2085503488.0,
+            "10": 2085503488.0,
+            "11": 2085503488.0,
+            "12": 2085503488.0,
+            "13": 2085503488.0,
+            "14": 2085503488.0,
+            "15": 2085503488.0,
+            "16": 2085503488.0,
+            "17": 2085503488.0,
+            "18": 2085503488.0,
+            "19": 2085503488.0,
+            "20": 2085503488.0,
+            "21": 2085503488.0,
+            "22": 2085503488.0,
+            "23": 2085503488.0,
+            "24": 2085503488.0,
+            "25": 2085503488.0,
+            "26": 2085503488.0,
+            "27": 2085503488.0,
+            "28": 2085503488.0,
+            "29": 2085503488.0,
+            "30": 2085503488.0,
+            "31": 2085503488.0,
+            "32": 2085503488.0,
+            "33": 2085503488.0,
+            "34": 2085503488.0,
+            "35": 2085503488.0,
+            "36": 2085503488.0,
+            "37": 2085503488.0,
+            "38": 2085503488.0,
+            "39": 2085503488.0,
+            "40": 2085503488.0,
+            "41": 2085503488.0,
+            "42": 2085503488.0,
+            "43": 2085503488.0,
+            "44": 2085503488.0,
+            "45": 2085503488.0,
+            "46": 2085503488.0,
+            "47": 2085503488.0,
+            "48": 2085503488.0,
+            "49": 2085503488.0,
+            "50": 2085503488.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 13.92506,
+            "2": 0.34079,
+            "3": 0.28891,
+            "4": 0.30652,
+            "5": 0.27326,
+            "6": 0.26908,
+            "7": 0.28337,
+            "8": 0.26429,
+            "9": 0.27048,
+            "10": 0.26866,
+            "11": 0.28689,
+            "12": 0.25961,
+            "13": 0.26511,
+            "14": 0.26065,
+            "15": 0.27834,
+            "16": 0.26398,
+            "17": 0.26064,
+            "18": 0.26661,
+            "19": 0.26487,
+            "20": 0.27686,
+            "21": 0.26249,
+            "22": 0.2677,
+            "23": 0.26859,
+            "24": 0.26049,
+            "25": 0.26086,
+            "26": 0.26279,
+            "27": 0.25983,
+            "28": 0.26561,
+            "29": 0.26345,
+            "30": 0.26142,
+            "31": 0.30613,
+            "32": 0.26049,
+            "33": 0.26142,
+            "34": 0.27278,
+            "35": 0.25691,
+            "36": 0.26151,
+            "37": 0.25654,
+            "38": 0.25753,
+            "39": 0.2576,
+            "40": 0.25839,
+            "41": 0.27219,
+            "42": 0.25851,
+            "43": 0.2668,
+            "44": 0.26229,
+            "45": 0.27182,
+            "46": 0.27691,
+            "47": 0.26299,
+            "48": 0.27152,
+            "49": 0.31513,
+            "50": 0.25813
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_lts_dgxa100_dracooci-ord.json
new file mode 100644
index 00000000000..af91e248c50
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_lts_dgxa100_dracooci-ord.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.82416,
+            "2": 10.83928,
+            "3": 10.81612,
+            "4": 10.8212,
+            "5": 10.84149,
+            "6": 10.86581,
+            "7": 10.84393,
+            "8": 10.84532,
+            "9": 10.85565,
+            "10": 10.79041,
+            "11": 10.85899,
+            "12": 10.84824,
+            "13": 10.86636,
+            "14": 10.86561,
+            "15": 10.8302,
+            "16": 10.80989,
+            "17": 10.79387,
+            "18": 10.80839,
+            "19": 10.8082,
+            "20": 10.73076,
+            "21": 10.71085,
+            "22": 10.57952,
+            "23": 10.71929,
+            "24": 10.61457,
+            "25": 10.57969,
+            "26": 10.64041,
+            "27": 10.63805,
+            "28": 10.61227,
+            "29": 10.61246,
+            "30": 10.41029,
+            "31": 10.16791,
+            "32": 10.49732,
+            "33": 10.49177,
+            "34": 10.25296,
+            "35": 10.31774,
+            "36": 10.28708,
+            "37": 10.38564,
+            "38": 10.24733,
+            "39": 10.43639,
+            "40": 10.14481,
+            "41": 10.19445,
+            "42": 10.25646,
+            "43": 9.91204,
+            "44": 10.02501,
+            "45": 9.91307,
+            "46": 9.89277,
+            "47": 10.1916,
+            "48": 9.928,
+            "49": 9.60925,
+            "50": 9.97569
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 12709.0,
+            "2": 14371.0,
+            "3": 14612.0,
+            "4": 13787.0,
+            "5": 15699.0,
+            "6": 16096.0,
+            "7": 15514.0,
+            "8": 13078.0,
+            "9": 15208.0,
+            "10": 12503.0,
+            "11": 16317.0,
+            "12": 15023.0,
+            "13": 16173.0,
+            "14": 16307.0,
+            "15": 14756.0,
+            "16": 15746.0,
+            "17": 15339.0,
+            "18": 15071.0,
+            "19": 15163.0,
+            "20": 13658.0,
+            "21": 13822.0,
+            "22": 12883.0,
+            "23": 16852.0,
+            "24": 13629.0,
+            "25": 13295.0,
+            "26": 15055.0,
+            "27": 15392.0,
+            "28": 16101.0,
+            "29": 16813.0,
+            "30": 14801.0,
+            "31": 12991.0,
+            "32": 16054.0,
+            "33": 17242.0,
+            "34": 14599.0,
+            "35": 15233.0,
+            "36": 15992.0,
+            "37": 17624.0,
+            "38": 16275.0,
+            "39": 17931.0,
+            "40": 16737.0,
+            "41": 16765.0,
+            "42": 17162.0,
+            "43": 15421.0,
+            "44": 15537.0,
+            "45": 16130.0,
+            "46": 17720.0,
+            "47": 19461.0,
+            "48": 16585.0,
+            "49": 16329.0,
+            "50": 19242.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 625133056.0,
+            "2": 625139200.0,
+            "3": 625138176.0,
+            "4": 625133568.0,
+            "5": 625138176.0,
+            "6": 625136640.0,
+            "7": 625130496.0,
+            "8": 625135616.0,
+            "9": 625136640.0,
+            "10": 625133568.0,
+            "11": 625137152.0,
+            "12": 625138176.0,
+            "13": 625138176.0,
+            "14": 625134592.0,
+            "15": 625135616.0,
+            "16": 625138176.0,
+            "17": 625130496.0,
+            "18": 625137664.0,
+            "19": 625137152.0,
+            "20": 625137664.0,
+            "21": 625137152.0,
+            "22": 625134080.0,
+            "23": 625131520.0,
+            "24": 625134080.0,
+            "25": 625134080.0,
+            "26": 625136128.0,
+            "27": 625138688.0,
+            "28": 625166848.0,
+            "29": 625137152.0,
+            "30": 625135616.0,
+            "31": 625131008.0,
+            "32": 625134592.0,
+            "33": 625137152.0,
+            "34": 625134080.0,
+            "35": 625134592.0,
+            "36": 625135616.0,
+            "37": 625137664.0,
+            "38": 625136128.0,
+            "39": 625135104.0,
+            "40": 625138176.0,
+            "41": 625134080.0,
+            "42": 625139712.0,
+            "43": 625133056.0,
+            "44": 625133056.0,
+            "45": 625135616.0,
+            "46": 625127936.0,
+            "47": 625136128.0,
+            "48": 625126912.0,
+            "49": 625131520.0,
+            "50": 625137664.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1818507264.0,
+            "2": 2049025536.0,
+            "3": 2049341440.0,
+            "4": 2049341440.0,
+            "5": 2049341440.0,
+            "6": 2049341440.0,
+            "7": 2049341440.0,
+            "8": 2049549312.0,
+            "9": 2049549312.0,
+            "10": 2049549312.0,
+            "11": 2050059264.0,
+            "12": 2050059264.0,
+            "13": 2050059264.0,
+            "14": 2050059264.0,
+            "15": 2050059264.0,
+            "16": 2050059264.0,
+            "17": 2050059264.0,
+            "18": 2050059264.0,
+            "19": 2050059264.0,
+            "20": 2050059264.0,
+            "21": 2050059264.0,
+            "22": 2050059264.0,
+            "23": 2050059264.0,
+            "24": 2050059264.0,
+            "25": 2050059264.0,
+            "26": 2050059264.0,
+            "27": 2050059264.0,
+            "28": 2050059264.0,
+            "29": 2050059264.0,
+            "30": 2050059264.0,
+            "31": 2050059264.0,
+            "32": 2050059264.0,
+            "33": 2050059264.0,
+            "34": 2050059264.0,
+            "35": 2050059264.0,
+            "36": 2050059264.0,
+            "37": 2050059264.0,
+            "38": 2050059264.0,
+            "39": 2050059264.0,
+            "40": 2050059264.0,
+            "41": 2050059264.0,
+            "42": 2050059264.0,
+            "43": 2050059264.0,
+            "44": 2050059264.0,
+            "45": 2050059264.0,
+            "46": 2050059264.0,
+            "47": 2050059264.0,
+            "48": 2050059264.0,
+            "49": 2050059264.0,
+            "50": 2050148352.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 13.44804,
+            "2": 0.4545,
+            "3": 0.40145,
+            "4": 0.39962,
+            "5": 0.40214,
+            "6": 0.40788,
+            "7": 0.40992,
+            "8": 0.40872,
+            "9": 0.40355,
+            "10": 0.40545,
+            "11": 0.41454,
+            "12": 0.39604,
+            "13": 0.40021,
+            "14": 0.39269,
+            "15": 0.38202,
+            "16": 0.40653,
+            "17": 0.39389,
+            "18": 0.40314,
+            "19": 0.39215,
+            "20": 0.38662,
+            "21": 0.39822,
+            "22": 0.39482,
+            "23": 0.39892,
+            "24": 0.39111,
+            "25": 0.43645,
+            "26": 0.44712,
+            "27": 0.43121,
+            "28": 0.42413,
+            "29": 0.43447,
+            "30": 0.44716,
+            "31": 0.39545,
+            "32": 0.40817,
+            "33": 0.43535,
+            "34": 0.44181,
+            "35": 0.41776,
+            "36": 0.44963,
+            "37": 0.41369,
+            "38": 0.35924,
+            "39": 0.35768,
+            "40": 0.36975,
+            "41": 0.35836,
+            "42": 0.35907,
+            "43": 0.36834,
+            "44": 0.35722,
+            "45": 0.35442,
+            "46": 0.36721,
+            "47": 0.35342,
+            "48": 0.368,
+            "49": 0.35736,
+            "50": 0.35455
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_lts_dgxa100_dracooci.json
new file mode 100644
index 00000000000..31b44874771
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_lts_dgxa100_dracooci.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.82416,
+            "2": 10.83928,
+            "3": 10.81612,
+            "4": 10.8212,
+            "5": 10.84149,
+            "6": 10.86581,
+            "7": 10.84393,
+            "8": 10.84532,
+            "9": 10.85565,
+            "10": 10.79041,
+            "11": 10.85899,
+            "12": 10.84824,
+            "13": 10.86636,
+            "14": 10.86561,
+            "15": 10.8302,
+            "16": 10.80989,
+            "17": 10.79387,
+            "18": 10.80839,
+            "19": 10.8082,
+            "20": 10.73076,
+            "21": 10.71085,
+            "22": 10.57952,
+            "23": 10.71929,
+            "24": 10.61457,
+            "25": 10.57969,
+            "26": 10.64041,
+            "27": 10.63805,
+            "28": 10.61227,
+            "29": 10.61246,
+            "30": 10.41029,
+            "31": 10.16791,
+            "32": 10.49732,
+            "33": 10.49177,
+            "34": 10.25296,
+            "35": 10.31774,
+            "36": 10.28708,
+            "37": 10.38564,
+            "38": 10.24733,
+            "39": 10.43639,
+            "40": 10.14481,
+            "41": 10.19445,
+            "42": 10.25646,
+            "43": 9.91204,
+            "44": 10.02501,
+            "45": 9.91307,
+            "46": 9.89277,
+            "47": 10.1916,
+            "48": 9.928,
+            "49": 9.60925,
+            "50": 9.97569
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 12709.0,
+            "2": 14371.0,
+            "3": 14612.0,
+            "4": 13787.0,
+            "5": 15699.0,
+            "6": 16096.0,
+            "7": 15514.0,
+            "8": 13078.0,
+            "9": 15208.0,
+            "10": 12503.0,
+            "11": 16317.0,
+            "12": 15023.0,
+            "13": 16173.0,
+            "14": 16307.0,
+            "15": 14756.0,
+            "16": 15746.0,
+            "17": 15339.0,
+            "18": 15071.0,
+            "19": 15163.0,
+            "20": 13658.0,
+            "21": 13822.0,
+            "22": 12883.0,
+            "23": 16852.0,
+            "24": 13629.0,
+            "25": 13295.0,
+            "26": 15055.0,
+            "27": 15392.0,
+            "28": 16101.0,
+            "29": 16813.0,
+            "30": 14801.0,
+            "31": 12991.0,
+            "32": 16054.0,
+            "33": 17242.0,
+            "34": 14599.0,
+            "35": 15233.0,
+            "36": 15992.0,
+            "37": 17624.0,
+            "38": 16275.0,
+            "39": 17931.0,
+            "40": 16737.0,
+            "41": 16765.0,
+            "42": 17162.0,
+            "43": 15421.0,
+            "44": 15537.0,
+            "45": 16130.0,
+            "46": 17720.0,
+            "47": 19461.0,
+            "48": 16585.0,
+            "49": 16329.0,
+            "50": 19242.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 625759744.0,
+            "2": 625139200.0,
+            "3": 625138176.0,
+            "4": 625133568.0,
+            "5": 625138176.0,
+            "6": 625136640.0,
+            "7": 625130496.0,
+            "8": 625135616.0,
+            "9": 625136640.0,
+            "10": 625133568.0,
+            "11": 625137152.0,
+            "12": 625138176.0,
+            "13": 625138176.0,
+            "14": 625134592.0,
+            "15": 625135616.0,
+            "16": 625138176.0,
+            "17": 625130496.0,
+            "18": 625137664.0,
+            "19": 625137152.0,
+            "20": 625137664.0,
+            "21": 625137152.0,
+            "22": 625134080.0,
+            "23": 625131520.0,
+            "24": 625134080.0,
+            "25": 625134080.0,
+            "26": 625136128.0,
+            "27": 625138688.0,
+            "28": 625166848.0,
+            "29": 625137152.0,
+            "30": 625135616.0,
+            "31": 625131008.0,
+            "32": 625134592.0,
+            "33": 625137152.0,
+            "34": 625134080.0,
+            "35": 625134592.0,
+            "36": 625135616.0,
+            "37": 625137664.0,
+            "38": 625136128.0,
+            "39": 625135104.0,
+            "40": 625138176.0,
+            "41": 625134080.0,
+            "42": 625139712.0,
+            "43": 625133056.0,
+            "44": 625133056.0,
+            "45": 625135616.0,
+            "46": 625127936.0,
+            "47": 625136128.0,
+            "48": 625126912.0,
+            "49": 625131520.0,
+            "50": 625137664.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1819058176.0,
+            "2": 2049025536.0,
+            "3": 2049507328.0,
+            "4": 2049507328.0,
+            "5": 2049507328.0,
+            "6": 2049507328.0,
+            "7": 2049507328.0,
+            "8": 2049549312.0,
+            "9": 2049549312.0,
+            "10": 2049549312.0,
+            "11": 2050408448.0,
+            "12": 2050408448.0,
+            "13": 2050408448.0,
+            "14": 2050408448.0,
+            "15": 2050408448.0,
+            "16": 2050408448.0,
+            "17": 2050408448.0,
+            "18": 2050408448.0,
+            "19": 2050408448.0,
+            "20": 2050408448.0,
+            "21": 2050408448.0,
+            "22": 2050408448.0,
+            "23": 2050408448.0,
+            "24": 2050408448.0,
+            "25": 2050408448.0,
+            "26": 2050408448.0,
+            "27": 2050408448.0,
+            "28": 2050408448.0,
+            "29": 2050408448.0,
+            "30": 2050408448.0,
+            "31": 2050408448.0,
+            "32": 2050408448.0,
+            "33": 2050408448.0,
+            "34": 2050408448.0,
+            "35": 2050408448.0,
+            "36": 2050408448.0,
+            "37": 2050408448.0,
+            "38": 2050408448.0,
+            "39": 2050408448.0,
+            "40": 2050408448.0,
+            "41": 2050408448.0,
+            "42": 2050408448.0,
+            "43": 2050408448.0,
+            "44": 2050408448.0,
+            "45": 2050408448.0,
+            "46": 2050408448.0,
+            "47": 2050408448.0,
+            "48": 2050408448.0,
+            "49": 2050408448.0,
+            "50": 2050408448.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 22.77068,
+            "2": 0.46494,
+            "3": 0.3723,
+            "4": 0.36903,
+            "5": 0.37035,
+            "6": 0.36273,
+            "7": 0.36764,
+            "8": 0.36608,
+            "9": 0.36149,
+            "10": 0.37099,
+            "11": 0.36751,
+            "12": 0.36086,
+            "13": 0.37084,
+            "14": 0.36048,
+            "15": 0.36546,
+            "16": 0.36953,
+            "17": 0.36319,
+            "18": 0.36789,
+            "19": 0.36444,
+            "20": 0.3601,
+            "21": 0.37091,
+            "22": 0.36503,
+            "23": 0.3598,
+            "24": 0.36881,
+            "25": 0.36119,
+            "26": 0.36751,
+            "27": 0.36776,
+            "28": 0.35964,
+            "29": 0.36504,
+            "30": 0.36585,
+            "31": 0.36136,
+            "32": 0.37411,
+            "33": 0.36177,
+            "34": 0.36157,
+            "35": 0.36662,
+            "36": 0.35886,
+            "37": 0.36442,
+            "38": 0.36579,
+            "39": 0.35855,
+            "40": 0.36631,
+            "41": 0.36531,
+            "42": 0.35897,
+            "43": 0.37205,
+            "44": 0.36369,
+            "45": 0.3598,
+            "46": 0.3686,
+            "47": 0.36017,
+            "48": 0.36176,
+            "49": 0.36902,
+            "50": 0.35813
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..0f2637a9511
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,344 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 11.04748,
+            "2": 11.03561,
+            "3": 9.58774,
+            "4": 9.25819,
+            "5": 9.53583,
+            "6": 9.8804,
+            "7": 9.48247,
+            "8": 8.93575,
+            "9": 8.65813,
+            "10": 9.0567,
+            "11": 8.49445,
+            "12": 8.52444,
+            "13": 8.45239,
+            "14": 7.97323,
+            "15": 8.0476,
+            "16": 8.07971,
+            "17": 8.09081,
+            "18": 7.76437,
+            "19": 8.14892,
+            "20": 7.89868,
+            "21": 7.59371,
+            "22": 7.54743,
+            "23": 7.43222,
+            "24": 7.4302,
+            "25": 7.67579,
+            "26": 7.06929,
+            "27": 7.62041,
+            "28": 7.32495,
+            "29": 7.49042,
+            "30": 7.64391,
+            "31": 7.39435,
+            "32": 7.58789,
+            "33": 7.64037,
+            "34": 7.69778,
+            "35": 7.20998,
+            "36": 7.08538,
+            "37": 7.42584,
+            "38": 7.18804,
+            "39": 7.55054,
+            "40": 7.54446,
+            "41": 7.49287,
+            "42": 7.24937,
+            "43": 7.23587,
+            "44": 7.41595,
+            "45": 7.18755,
+            "46": 6.89949,
+            "47": 7.29966,
+            "48": 7.14134,
+            "49": 7.58963,
+            "50": 7.03602
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 38802612.0,
+            "2": 38543592.0,
+            "3": 38739528.0,
+            "4": 279937824.0,
+            "5": 259189728.0,
+            "6": 271446400.0,
+            "7": 604773504.0,
+            "8": 768892544.0,
+            "9": 645824128.0,
+            "10": 744257088.0,
+            "11": 718888576.0,
+            "12": 746732544.0,
+            "13": 871990976.0,
+            "14": 821645632.0,
+            "15": 724250816.0,
+            "16": 932241472.0,
+            "17": 648958912.0,
+            "18": 649120000.0,
+            "19": 925992960.0,
+            "20": 989207936.0,
+            "21": 819324096.0,
+            "22": 736955072.0,
+            "23": 910497792.0,
+            "24": 876716672.0,
+            "25": 843170688.0,
+            "26": 809573824.0,
+            "27": 854086912.0,
+            "28": 802857664.0,
+            "29": 805523328.0,
+            "30": 775645184.0,
+            "31": 771754624.0,
+            "32": 749733696.0,
+            "33": 718385216.0,
+            "34": 724771200.0,
+            "35": 737655104.0,
+            "36": 690419968.0,
+            "37": 673203456.0,
+            "38": 627239552.0,
+            "39": 614047168.0,
+            "40": 607288512.0,
+            "41": 582590592.0,
+            "42": 548211200.0,
+            "43": 532740640.0,
+            "44": 554239168.0,
+            "45": 514790528.0,
+            "46": 350258560.0,
+            "47": 472420128.0,
+            "48": 453788736.0,
+            "49": 440597216.0,
+            "50": 303063296.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 6637267456.0,
+            "2": 6637269504.0,
+            "3": 6637269504.0,
+            "4": 6637269504.0,
+            "5": 6637269504.0,
+            "6": 6637269504.0,
+            "7": 6637269504.0,
+            "8": 6637269504.0,
+            "9": 6637269504.0,
+            "10": 6637269504.0,
+            "11": 6637269504.0,
+            "12": 6637269504.0,
+            "13": 6637269504.0,
+            "14": 6637269504.0,
+            "15": 6637269504.0,
+            "16": 6637269504.0,
+            "17": 6637269504.0,
+            "18": 6637269504.0,
+            "19": 6637269504.0,
+            "20": 6637269504.0,
+            "21": 6637269504.0,
+            "22": 6637269504.0,
+            "23": 6637269504.0,
+            "24": 6637269504.0,
+            "25": 6637269504.0,
+            "26": 6637269504.0,
+            "27": 6637269504.0,
+            "28": 6637269504.0,
+            "29": 6637269504.0,
+            "30": 6637269504.0,
+            "31": 6637269504.0,
+            "32": 6637269504.0,
+            "33": 6637269504.0,
+            "34": 6637269504.0,
+            "35": 6637269504.0,
+            "36": 6637269504.0,
+            "37": 6637269504.0,
+            "38": 6637269504.0,
+            "39": 6637269504.0,
+            "40": 6637269504.0,
+            "41": 6637269504.0,
+            "42": 6637269504.0,
+            "43": 6637269504.0,
+            "44": 6637269504.0,
+            "45": 6637269504.0,
+            "46": 6637269504.0,
+            "47": 6637269504.0,
+            "48": 6637269504.0,
+            "49": 6637269504.0,
+            "50": 6637269504.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 55055331328.0,
+            "2": 57809321984.0,
+            "3": 57918455808.0,
+            "4": 57918455808.0,
+            "5": 57918455808.0,
+            "6": 57918455808.0,
+            "7": 57918455808.0,
+            "8": 57918455808.0,
+            "9": 57918455808.0,
+            "10": 57918455808.0,
+            "11": 57918455808.0,
+            "12": 57918455808.0,
+            "13": 57931390976.0,
+            "14": 57931390976.0,
+            "15": 57931390976.0,
+            "16": 57931390976.0,
+            "17": 57931390976.0,
+            "18": 57931390976.0,
+            "19": 57931390976.0,
+            "20": 57931390976.0,
+            "21": 57931390976.0,
+            "22": 57931390976.0,
+            "23": 57931390976.0,
+            "24": 57931390976.0,
+            "25": 57931390976.0,
+            "26": 57931390976.0,
+            "27": 57931390976.0,
+            "28": 57931390976.0,
+            "29": 57931390976.0,
+            "30": 57931390976.0,
+            "31": 57931390976.0,
+            "32": 58003226624.0,
+            "33": 58003226624.0,
+            "34": 58003226624.0,
+            "35": 58003226624.0,
+            "36": 58003226624.0,
+            "37": 58003226624.0,
+            "38": 58003226624.0,
+            "39": 58003226624.0,
+            "40": 58003226624.0,
+            "41": 58003226624.0,
+            "42": 58003226624.0,
+            "43": 58003226624.0,
+            "44": 58183614464.0,
+            "45": 58234208256.0,
+            "46": 58555555840.0,
+            "47": 58555555840.0,
+            "48": 58555555840.0,
+            "49": 58555555840.0,
+            "50": 58780934144.0
+        }
+    },
+    "mtp_1 loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 11.07654,
+            "2": 11.07406,
+            "3": 10.53881,
+            "4": 10.09803,
+            "5": 9.81154,
+            "6": 10.06236,
+            "7": 9.79762,
+            "8": 9.07117,
+            "9": 8.87049,
+            "10": 9.127,
+            "11": 8.49853,
+            "12": 8.53046,
+            "13": 8.42444,
+            "14": 7.847,
+            "15": 7.99077,
+            "16": 8.05015,
+            "17": 8.00064,
+            "18": 7.73104,
+            "19": 8.11087,
+            "20": 7.82933,
+            "21": 7.52501,
+            "22": 7.49916,
+            "23": 7.36982,
+            "24": 7.37235,
+            "25": 7.61578,
+            "26": 7.02029,
+            "27": 7.56014,
+            "28": 7.2681,
+            "29": 7.44399,
+            "30": 7.58618,
+            "31": 7.32468,
+            "32": 7.50596,
+            "33": 7.5715,
+            "34": 7.63581,
+            "35": 7.15224,
+            "36": 7.01784,
+            "37": 7.35163,
+            "38": 7.12551,
+            "39": 7.48656,
+            "40": 7.47408,
+            "41": 7.42096,
+            "42": 7.17595,
+            "43": 7.16059,
+            "44": 7.34289,
+            "45": 7.11969,
+            "46": 6.82753,
+            "47": 7.23525,
+            "48": 7.08042,
+            "49": 7.51043,
+            "50": 6.9735
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 69.29797,
+            "2": 1.7261,
+            "3": 1.40981,
+            "4": 2.16562,
+            "5": 1.7862,
+            "6": 1.7469,
+            "7": 1.96688,
+            "8": 1.97301,
+            "9": 1.74665,
+            "10": 1.69613,
+            "11": 1.02979,
+            "12": 1.02408,
+            "13": 1.03261,
+            "14": 1.02432,
+            "15": 1.0529,
+            "16": 1.04491,
+            "17": 1.03693,
+            "18": 1.03399,
+            "19": 1.03627,
+            "20": 1.02284,
+            "21": 1.01667,
+            "22": 1.02932,
+            "23": 1.03591,
+            "24": 1.03466,
+            "25": 1.03149,
+            "26": 1.03165,
+            "27": 1.02342,
+            "28": 1.03777,
+            "29": 1.04061,
+            "30": 1.05641,
+            "31": 1.02382,
+            "32": 1.01775,
+            "33": 1.03039,
+            "34": 1.03693,
+            "35": 1.03153,
+            "36": 1.02699,
+            "37": 1.02756,
+            "38": 1.02919,
+            "39": 1.01773,
+            "40": 1.03491,
+            "41": 1.03152,
+            "42": 1.03035,
+            "43": 1.0221,
+            "44": 1.05201,
+            "45": 1.02579,
+            "46": 1.02798,
+            "47": 1.03857,
+            "48": 1.02772,
+            "49": 1.0408,
+            "50": 1.03745
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..b3668b31178
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,344 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 11.04748,
+            "2": 11.03561,
+            "3": 9.58773,
+            "4": 9.25819,
+            "5": 9.52742,
+            "6": 9.87911,
+            "7": 9.48366,
+            "8": 8.93879,
+            "9": 8.6551,
+            "10": 9.10915,
+            "11": 8.51806,
+            "12": 8.54732,
+            "13": 8.48144,
+            "14": 8.05312,
+            "15": 8.10118,
+            "16": 8.10344,
+            "17": 8.08878,
+            "18": 7.78589,
+            "19": 8.15794,
+            "20": 7.88069,
+            "21": 7.58542,
+            "22": 7.54895,
+            "23": 7.4296,
+            "24": 7.41901,
+            "25": 7.67277,
+            "26": 7.07835,
+            "27": 7.61157,
+            "28": 7.31513,
+            "29": 7.49487,
+            "30": 7.64287,
+            "31": 7.39102,
+            "32": 7.59148,
+            "33": 7.6393,
+            "34": 7.70086,
+            "35": 7.2119,
+            "36": 7.08623,
+            "37": 7.43064,
+            "38": 7.18999,
+            "39": 7.5525,
+            "40": 7.54961,
+            "41": 7.49385,
+            "42": 7.25481,
+            "43": 7.24066,
+            "44": 7.42131,
+            "45": 7.19201,
+            "46": 6.90547,
+            "47": 7.30704,
+            "48": 7.15325,
+            "49": 7.60504,
+            "50": 7.04512
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 38802612.0,
+            "2": 38543592.0,
+            "3": 38739480.0,
+            "4": 279954336.0,
+            "5": 249745312.0,
+            "6": 268288496.0,
+            "7": 604756224.0,
+            "8": 781485184.0,
+            "9": 636362112.0,
+            "10": 653025216.0,
+            "11": 668551168.0,
+            "12": 765583616.0,
+            "13": 815362944.0,
+            "14": 834270656.0,
+            "15": 755756096.0,
+            "16": 995153536.0,
+            "17": 938291584.0,
+            "18": 721524928.0,
+            "19": 756173504.0,
+            "20": 901129600.0,
+            "21": 721816384.0,
+            "22": 831311872.0,
+            "23": 803536768.0,
+            "24": 628253248.0,
+            "25": 663895680.0,
+            "26": 847321664.0,
+            "27": 828927424.0,
+            "28": 777678976.0,
+            "29": 764628608.0,
+            "30": 781930112.0,
+            "31": 771767616.0,
+            "32": 771755392.0,
+            "33": 586323648.0,
+            "34": 734207552.0,
+            "35": 690468480.0,
+            "36": 485982688.0,
+            "37": 506506336.0,
+            "38": 642964160.0,
+            "39": 661240000.0,
+            "40": 645048768.0,
+            "41": 636072704.0,
+            "42": 491645856.0,
+            "43": 601942528.0,
+            "44": 623448960.0,
+            "45": 539959424.0,
+            "46": 532669088.0,
+            "47": 529039680.0,
+            "48": 504121984.0,
+            "49": 478344480.0,
+            "50": 331385728.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 6637267456.0,
+            "2": 6637269504.0,
+            "3": 6637269504.0,
+            "4": 6637269504.0,
+            "5": 6637269504.0,
+            "6": 6637269504.0,
+            "7": 6637269504.0,
+            "8": 6637269504.0,
+            "9": 6637269504.0,
+            "10": 6637269504.0,
+            "11": 6637269504.0,
+            "12": 6637269504.0,
+            "13": 6637269504.0,
+            "14": 6637269504.0,
+            "15": 6637269504.0,
+            "16": 6637269504.0,
+            "17": 6637269504.0,
+            "18": 6637269504.0,
+            "19": 6637269504.0,
+            "20": 6637269504.0,
+            "21": 6637269504.0,
+            "22": 6637269504.0,
+            "23": 6637269504.0,
+            "24": 6637269504.0,
+            "25": 6637269504.0,
+            "26": 6637269504.0,
+            "27": 6637269504.0,
+            "28": 6637269504.0,
+            "29": 6637269504.0,
+            "30": 6637269504.0,
+            "31": 6637269504.0,
+            "32": 6637269504.0,
+            "33": 6637269504.0,
+            "34": 6637269504.0,
+            "35": 6637269504.0,
+            "36": 6637269504.0,
+            "37": 6637269504.0,
+            "38": 6637269504.0,
+            "39": 6637269504.0,
+            "40": 6637269504.0,
+            "41": 6637269504.0,
+            "42": 6637269504.0,
+            "43": 6637269504.0,
+            "44": 6637269504.0,
+            "45": 6637269504.0,
+            "46": 6637269504.0,
+            "47": 6637269504.0,
+            "48": 6637269504.0,
+            "49": 6637269504.0,
+            "50": 6637269504.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 55055331328.0,
+            "2": 57809321984.0,
+            "3": 57919823872.0,
+            "4": 57919823872.0,
+            "5": 57919823872.0,
+            "6": 57919823872.0,
+            "7": 57919823872.0,
+            "8": 57919823872.0,
+            "9": 57919823872.0,
+            "10": 57919823872.0,
+            "11": 57919823872.0,
+            "12": 57919823872.0,
+            "13": 57932275712.0,
+            "14": 57932275712.0,
+            "15": 57932275712.0,
+            "16": 57932275712.0,
+            "17": 57932275712.0,
+            "18": 57932275712.0,
+            "19": 57932275712.0,
+            "20": 57932275712.0,
+            "21": 57932275712.0,
+            "22": 57932275712.0,
+            "23": 57932275712.0,
+            "24": 57932275712.0,
+            "25": 57932275712.0,
+            "26": 57932275712.0,
+            "27": 57932275712.0,
+            "28": 57932275712.0,
+            "29": 57932275712.0,
+            "30": 57932275712.0,
+            "31": 57932275712.0,
+            "32": 57932275712.0,
+            "33": 57932275712.0,
+            "34": 57932275712.0,
+            "35": 57932275712.0,
+            "36": 57932275712.0,
+            "37": 57932275712.0,
+            "38": 57932275712.0,
+            "39": 57932275712.0,
+            "40": 57932275712.0,
+            "41": 57932275712.0,
+            "42": 57932275712.0,
+            "43": 57932275712.0,
+            "44": 57932275712.0,
+            "45": 57932275712.0,
+            "46": 57932275712.0,
+            "47": 57932275712.0,
+            "48": 57932275712.0,
+            "49": 57932275712.0,
+            "50": 57932275712.0
+        }
+    },
+    "mtp_1 loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 11.07654,
+            "2": 11.07406,
+            "3": 10.53883,
+            "4": 10.09801,
+            "5": 9.81156,
+            "6": 10.06025,
+            "7": 9.7962,
+            "8": 9.06987,
+            "9": 8.86879,
+            "10": 9.13393,
+            "11": 8.5017,
+            "12": 8.54094,
+            "13": 8.43678,
+            "14": 7.85637,
+            "15": 7.99846,
+            "16": 8.05889,
+            "17": 8.01134,
+            "18": 7.73929,
+            "19": 8.1188,
+            "20": 7.83458,
+            "21": 7.53103,
+            "22": 7.50125,
+            "23": 7.37135,
+            "24": 7.37419,
+            "25": 7.61596,
+            "26": 7.01586,
+            "27": 7.55739,
+            "28": 7.26274,
+            "29": 7.43991,
+            "30": 7.58436,
+            "31": 7.32289,
+            "32": 7.50362,
+            "33": 7.56884,
+            "34": 7.6339,
+            "35": 7.151,
+            "36": 7.01725,
+            "37": 7.35013,
+            "38": 7.12483,
+            "39": 7.48708,
+            "40": 7.47451,
+            "41": 7.4181,
+            "42": 7.17557,
+            "43": 7.15957,
+            "44": 7.34227,
+            "45": 7.12176,
+            "46": 6.82526,
+            "47": 7.23374,
+            "48": 7.07893,
+            "49": 7.5077,
+            "50": 6.97094
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 57.80279,
+            "2": 1.26321,
+            "3": 1.18918,
+            "4": 2.24643,
+            "5": 2.25191,
+            "6": 1.80757,
+            "7": 2.09086,
+            "8": 1.69153,
+            "9": 1.81279,
+            "10": 1.64882,
+            "11": 1.03476,
+            "12": 1.03593,
+            "13": 1.04348,
+            "14": 1.03841,
+            "15": 1.04432,
+            "16": 1.05281,
+            "17": 1.04826,
+            "18": 1.04981,
+            "19": 1.05351,
+            "20": 1.04668,
+            "21": 1.05254,
+            "22": 1.05391,
+            "23": 1.04635,
+            "24": 1.05503,
+            "25": 1.04226,
+            "26": 1.0684,
+            "27": 1.04985,
+            "28": 1.04233,
+            "29": 1.05036,
+            "30": 1.06219,
+            "31": 1.044,
+            "32": 1.05614,
+            "33": 1.05729,
+            "34": 1.05618,
+            "35": 1.06289,
+            "36": 1.05761,
+            "37": 1.05956,
+            "38": 1.06343,
+            "39": 1.06848,
+            "40": 1.06027,
+            "41": 1.05493,
+            "42": 1.05258,
+            "43": 1.04879,
+            "44": 1.04949,
+            "45": 1.05964,
+            "46": 1.04465,
+            "47": 1.0491,
+            "48": 1.05387,
+            "49": 1.05218,
+            "50": 1.05453
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json
index 657f6cef025..a7b4d2b32ca 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json
@@ -2,141 +2,536 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 10.81131,
+            "2": 10.83052,
+            "3": 10.82093,
+            "4": 10.81347,
             "5": 10.84338,
+            "6": 10.84743,
+            "7": 10.85254,
+            "8": 10.83482,
+            "9": 10.84276,
             "10": 10.77693,
+            "11": 10.8459,
+            "12": 10.85115,
+            "13": 10.84165,
+            "14": 10.8714,
             "15": 10.83613,
+            "16": 10.79815,
+            "17": 10.77288,
+            "18": 10.8075,
+            "19": 10.78773,
             "20": 10.73433,
+            "21": 10.69461,
+            "22": 10.56597,
+            "23": 10.71611,
+            "24": 10.61321,
             "25": 10.552,
+            "26": 10.61364,
+            "27": 10.62702,
+            "28": 10.59546,
+            "29": 10.59195,
             "30": 10.3916,
+            "31": 10.14615,
+            "32": 10.47399,
+            "33": 10.47051,
+            "34": 10.23435,
             "35": 10.29318,
+            "36": 10.26627,
+            "37": 10.37219,
+            "38": 10.2254,
+            "39": 10.42101,
             "40": 10.13002,
+            "41": 10.16265,
+            "42": 10.24278,
+            "43": 9.88237,
+            "44": 9.99105,
             "45": 9.87295,
+            "46": 9.85181,
+            "47": 10.15633,
+            "48": 9.8915,
+            "49": 9.58889,
             "50": 9.9543,
+            "51": 9.8849,
+            "52": 9.78004,
+            "53": 10.10188,
+            "54": 9.98715,
             "55": 9.9027,
+            "56": 9.66837,
+            "57": 9.53524,
+            "58": 9.89495,
+            "59": 9.62892,
             "60": 9.54308,
+            "61": 9.72727,
+            "62": 10.0332,
+            "63": 9.45215,
+            "64": 9.83179,
             "65": 8.99109,
+            "66": 9.76394,
+            "67": 9.40349,
+            "68": 9.83129,
+            "69": 9.81856,
             "70": 9.77262,
+            "71": 9.658,
+            "72": 9.64033,
+            "73": 9.55124,
+            "74": 9.02026,
             "75": 9.47695,
+            "76": 9.13586,
+            "77": 10.09787,
+            "78": 9.75274,
+            "79": 9.41697,
             "80": 9.45074,
+            "81": 9.52041,
+            "82": 9.73203,
+            "83": 9.36912,
+            "84": 9.45039,
             "85": 9.65229,
+            "86": 9.1123,
+            "87": 9.61119,
+            "88": 9.78708,
+            "89": 9.64625,
             "90": 9.83474,
+            "91": 9.39429,
+            "92": 9.39178,
+            "93": 9.12787,
+            "94": 8.86637,
             "95": 9.54352,
+            "96": 9.55716,
+            "97": 9.332,
+            "98": 9.69189,
+            "99": 8.92072,
             "100": 9.41916
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 1216.0,
+            "2": 1361.0,
+            "3": 1221.0,
+            "4": 1222.0,
             "5": 1385.0,
+            "6": 1467.0,
+            "7": 1252.0,
+            "8": 1355.0,
+            "9": 1346.0,
             "10": 1335.0,
+            "11": 1278.0,
+            "12": 1185.0,
+            "13": 1203.0,
+            "14": 1385.0,
             "15": 1303.0,
+            "16": 1377.0,
+            "17": 1229.0,
+            "18": 1291.0,
+            "19": 1244.0,
             "20": 1183.0,
+            "21": 1262.0,
+            "22": 1122.0,
+            "23": 1301.0,
+            "24": 1066.0,
             "25": 1182.0,
+            "26": 1263.0,
+            "27": 1162.0,
+            "28": 1262.0,
+            "29": 1179.0,
             "30": 1168.0,
+            "31": 991.0,
+            "32": 1092.0,
+            "33": 1183.0,
+            "34": 1081.0,
             "35": 1146.0,
+            "36": 1076.0,
+            "37": 1252.0,
+            "38": 1176.0,
+            "39": 1225.0,
             "40": 1303.0,
+            "41": 1104.0,
+            "42": 1210.0,
+            "43": 1116.0,
+            "44": 1165.0,
             "45": 1097.0,
+            "46": 1308.0,
+            "47": 1165.0,
+            "48": 1134.0,
+            "49": 1272.0,
             "50": 1083.0,
+            "51": 1234.0,
+            "52": 1274.0,
+            "53": 1393.0,
+            "54": 1299.0,
             "55": 1186.0,
+            "56": 1267.0,
+            "57": 1161.0,
+            "58": 1326.0,
+            "59": 1403.0,
             "60": 1177.0,
+            "61": 1363.0,
+            "62": 1302.0,
+            "63": 1245.0,
+            "64": 1378.0,
             "65": 1330.0,
+            "66": 1363.0,
+            "67": 1286.0,
+            "68": 1313.0,
+            "69": 1295.0,
             "70": 1459.0,
+            "71": 1374.0,
+            "72": 1092.0,
+            "73": 1274.0,
+            "74": 943.0,
             "75": 1059.0,
+            "76": 1323.0,
+            "77": 1475.0,
+            "78": 1487.0,
+            "79": 1496.0,
             "80": 1382.0,
+            "81": 1470.0,
+            "82": 1417.0,
+            "83": 1177.0,
+            "84": 1506.0,
             "85": 1420.0,
+            "86": 1281.0,
+            "87": 1540.0,
+            "88": 1467.0,
+            "89": 1452.0,
             "90": 1350.0,
+            "91": 1010.0,
+            "92": 1324.0,
+            "93": 1349.0,
+            "94": 1197.0,
             "95": 2503.0,
+            "96": 2373.0,
+            "97": 1490.0,
+            "98": 2541.0,
+            "99": 1367.0,
             "100": 1122.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 788517888.0,
+            "2": 788488192.0,
+            "3": 788535296.0,
+            "4": 788513280.0,
             "5": 788537344.0,
+            "6": 788479488.0,
+            "7": 788502528.0,
+            "8": 788510208.0,
+            "9": 788526080.0,
             "10": 788538368.0,
+            "11": 788513280.0,
+            "12": 788484096.0,
+            "13": 788542464.0,
+            "14": 788451328.0,
             "15": 788503040.0,
+            "16": 788440576.0,
+            "17": 788558336.0,
+            "18": 788535296.0,
+            "19": 788542464.0,
             "20": 788470784.0,
+            "21": 788508672.0,
+            "22": 788594176.0,
+            "23": 788573696.0,
+            "24": 788513280.0,
             "25": 788655616.0,
+            "26": 788566016.0,
+            "27": 788630528.0,
+            "28": 788568576.0,
+            "29": 788610560.0,
             "30": 788587520.0,
+            "31": 788647424.0,
+            "32": 788602880.0,
+            "33": 788616704.0,
+            "34": 788577792.0,
             "35": 788616704.0,
+            "36": 788642304.0,
+            "37": 788597760.0,
+            "38": 788650496.0,
+            "39": 788663296.0,
             "40": 788550144.0,
+            "41": 788591616.0,
+            "42": 788575232.0,
+            "43": 788541952.0,
+            "44": 788623872.0,
             "45": 788491264.0,
+            "46": 788503552.0,
+            "47": 788572160.0,
+            "48": 788488704.0,
+            "49": 788461568.0,
             "50": 788487168.0,
+            "51": 788523008.0,
+            "52": 788483584.0,
+            "53": 788513792.0,
+            "54": 788503552.0,
             "55": 788499968.0,
+            "56": 788459008.0,
+            "57": 788456448.0,
+            "58": 788499968.0,
+            "59": 788503552.0,
             "60": 788491264.0,
+            "61": 788463616.0,
+            "62": 788497408.0,
+            "63": 788449792.0,
+            "64": 788465664.0,
             "65": 788408320.0,
+            "66": 788445696.0,
+            "67": 788445696.0,
+            "68": 788456448.0,
+            "69": 788473856.0,
             "70": 788497408.0,
+            "71": 788453888.0,
+            "72": 788413952.0,
+            "73": 788444160.0,
+            "74": 788419072.0,
             "75": 788441600.0,
+            "76": 788412928.0,
+            "77": 788471296.0,
+            "78": 788462592.0,
+            "79": 788419072.0,
             "80": 788411392.0,
+            "81": 788430848.0,
+            "82": 788439040.0,
+            "83": 788435456.0,
+            "84": 788471296.0,
             "85": 788461056.0,
+            "86": 788395008.0,
+            "87": 788490752.0,
+            "88": 788493312.0,
+            "89": 788501504.0,
             "90": 788531712.0,
+            "91": 788513792.0,
+            "92": 788516864.0,
+            "93": 788487168.0,
+            "94": 788506624.0,
             "95": 788543488.0,
+            "96": 788563456.0,
+            "97": 788579840.0,
+            "98": 788590592.0,
+            "99": 788514816.0,
             "100": 788570624.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 3023035904.0,
+            "2": 3179259392.0,
+            "3": 3206071808.0,
+            "4": 3206071808.0,
             "5": 3206539776.0,
+            "6": 3206539776.0,
+            "7": 3206539776.0,
+            "8": 3206539776.0,
+            "9": 3206539776.0,
             "10": 3206539776.0,
+            "11": 3206539776.0,
+            "12": 3206539776.0,
+            "13": 3207718400.0,
+            "14": 3207718400.0,
             "15": 3207718400.0,
+            "16": 3207718400.0,
+            "17": 3219952640.0,
+            "18": 3219952640.0,
+            "19": 3219952640.0,
             "20": 3219952640.0,
+            "21": 3219952640.0,
+            "22": 3239834624.0,
+            "23": 3239834624.0,
+            "24": 3239834624.0,
             "25": 3276544000.0,
+            "26": 3276544000.0,
+            "27": 3276544000.0,
+            "28": 3276544000.0,
+            "29": 3276544000.0,
             "30": 3276544000.0,
+            "31": 3276544000.0,
+            "32": 3276544000.0,
+            "33": 3276544000.0,
+            "34": 3276544000.0,
             "35": 3276544000.0,
+            "36": 3276544000.0,
+            "37": 3276544000.0,
+            "38": 3276544000.0,
+            "39": 3281670656.0,
             "40": 3281670656.0,
+            "41": 3281670656.0,
+            "42": 3281670656.0,
+            "43": 3281670656.0,
+            "44": 3281670656.0,
             "45": 3281670656.0,
+            "46": 3281670656.0,
+            "47": 3281670656.0,
+            "48": 3281670656.0,
+            "49": 3281670656.0,
             "50": 3281670656.0,
+            "51": 3281670656.0,
+            "52": 3281670656.0,
+            "53": 3281670656.0,
+            "54": 3281670656.0,
             "55": 3281670656.0,
+            "56": 3281670656.0,
+            "57": 3281670656.0,
+            "58": 3281670656.0,
+            "59": 3281670656.0,
             "60": 3281670656.0,
+            "61": 3281670656.0,
+            "62": 3281670656.0,
+            "63": 3281670656.0,
+            "64": 3281670656.0,
             "65": 3281670656.0,
+            "66": 3281670656.0,
+            "67": 3281670656.0,
+            "68": 3281670656.0,
+            "69": 3281670656.0,
             "70": 3281670656.0,
+            "71": 3281670656.0,
+            "72": 3281670656.0,
+            "73": 3281670656.0,
+            "74": 3281670656.0,
             "75": 3281670656.0,
+            "76": 3281670656.0,
+            "77": 3281670656.0,
+            "78": 3281670656.0,
+            "79": 3281670656.0,
             "80": 3281670656.0,
+            "81": 3281670656.0,
+            "82": 3281670656.0,
+            "83": 3281670656.0,
+            "84": 3281670656.0,
             "85": 3281670656.0,
+            "86": 3281670656.0,
+            "87": 3281670656.0,
+            "88": 3281670656.0,
+            "89": 3281670656.0,
             "90": 3281670656.0,
+            "91": 3281670656.0,
+            "92": 3281670656.0,
+            "93": 3281670656.0,
+            "94": 3281670656.0,
             "95": 3281670656.0,
+            "96": 3281670656.0,
+            "97": 3281670656.0,
+            "98": 3281670656.0,
+            "99": 3281670656.0,
             "100": 3281670656.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 11.3696,
-            "5": 0.16522,
-            "10": 0.1423,
-            "15": 0.12936,
-            "20": 0.14324,
-            "25": 0.1364,
-            "30": 0.15701,
-            "35": 0.15051,
-            "40": 0.14884,
-            "45": 0.15496,
-            "50": 0.15176,
-            "55": 0.1467,
-            "60": 0.16277,
-            "65": 0.14457,
-            "70": 0.16001,
-            "75": 0.15317,
-            "80": 0.15169,
-            "85": 0.15317,
-            "90": 0.14836,
-            "95": 0.1485,
-            "100": 0.1485
+            "1": 11.7037,
+            "2": 0.22491,
+            "3": 0.19533,
+            "4": 0.17539,
+            "5": 0.18483,
+            "6": 0.16647,
+            "7": 0.1641,
+            "8": 0.16288,
+            "9": 0.15397,
+            "10": 0.15258,
+            "11": 0.15812,
+            "12": 0.15338,
+            "13": 0.14727,
+            "14": 0.15276,
+            "15": 0.1431,
+            "16": 0.1553,
+            "17": 0.14923,
+            "18": 0.15041,
+            "19": 0.15216,
+            "20": 0.15811,
+            "21": 0.14566,
+            "22": 0.14796,
+            "23": 0.15503,
+            "24": 0.15065,
+            "25": 0.15039,
+            "26": 0.15548,
+            "27": 0.158,
+            "28": 0.16038,
+            "29": 0.16862,
+            "30": 0.16712,
+            "31": 0.16858,
+            "32": 0.16095,
+            "33": 0.163,
+            "34": 0.1624,
+            "35": 0.16519,
+            "36": 0.16981,
+            "37": 0.16271,
+            "38": 0.16155,
+            "39": 0.17014,
+            "40": 0.1593,
+            "41": 0.167,
+            "42": 0.16495,
+            "43": 0.1718,
+            "44": 0.16565,
+            "45": 0.16518,
+            "46": 0.16648,
+            "47": 0.16483,
+            "48": 0.16244,
+            "49": 0.16707,
+            "50": 0.16226,
+            "51": 0.1715,
+            "52": 0.16281,
+            "53": 0.16077,
+            "54": 0.15821,
+            "55": 0.15951,
+            "56": 0.16684,
+            "57": 0.16109,
+            "58": 0.16192,
+            "59": 0.16349,
+            "60": 0.16237,
+            "61": 0.15955,
+            "62": 0.15954,
+            "63": 0.15968,
+            "64": 0.16092,
+            "65": 0.1539,
+            "66": 0.16199,
+            "67": 0.15811,
+            "68": 0.1652,
+            "69": 0.16307,
+            "70": 0.17014,
+            "71": 0.15399,
+            "72": 0.16312,
+            "73": 0.15787,
+            "74": 0.16598,
+            "75": 0.16279,
+            "76": 0.15216,
+            "77": 0.16031,
+            "78": 0.15503,
+            "79": 0.16083,
+            "80": 0.16046,
+            "81": 0.15996,
+            "82": 0.15176,
+            "83": 0.16328,
+            "84": 0.16094,
+            "85": 0.16065,
+            "86": 0.1554,
+            "87": 0.15864,
+            "88": 0.16406,
+            "89": 0.15924,
+            "90": 0.15731,
+            "91": 0.15776,
+            "92": 0.16339,
+            "93": 0.15877,
+            "94": 0.15733,
+            "95": 0.15774,
+            "96": 0.15579,
+            "97": 0.16338,
+            "98": 0.15898,
+            "99": 0.16066,
+            "100": 0.15749
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..b4d227b10e3
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.81131,
+            "2": 10.83052,
+            "3": 10.82093,
+            "4": 10.81347,
+            "5": 10.84338,
+            "6": 10.84743,
+            "7": 10.85254,
+            "8": 10.83482,
+            "9": 10.84276,
+            "10": 10.77693,
+            "11": 10.8459,
+            "12": 10.85115,
+            "13": 10.84165,
+            "14": 10.8714,
+            "15": 10.83613,
+            "16": 10.79815,
+            "17": 10.77288,
+            "18": 10.8075,
+            "19": 10.78773,
+            "20": 10.73433,
+            "21": 10.69461,
+            "22": 10.56597,
+            "23": 10.71611,
+            "24": 10.61321,
+            "25": 10.552,
+            "26": 10.61364,
+            "27": 10.62702,
+            "28": 10.59546,
+            "29": 10.59195,
+            "30": 10.3916,
+            "31": 10.14615,
+            "32": 10.47399,
+            "33": 10.47051,
+            "34": 10.23435,
+            "35": 10.29318,
+            "36": 10.26627,
+            "37": 10.37219,
+            "38": 10.2254,
+            "39": 10.42101,
+            "40": 10.13002,
+            "41": 10.16265,
+            "42": 10.24278,
+            "43": 9.88237,
+            "44": 9.99105,
+            "45": 9.87295,
+            "46": 9.85181,
+            "47": 10.15633,
+            "48": 9.8915,
+            "49": 9.58889,
+            "50": 9.9543,
+            "51": 9.8849,
+            "52": 9.78004,
+            "53": 10.10188,
+            "54": 9.98715,
+            "55": 9.9027,
+            "56": 9.66837,
+            "57": 9.53524,
+            "58": 9.89495,
+            "59": 9.62892,
+            "60": 9.54308,
+            "61": 9.72727,
+            "62": 10.0332,
+            "63": 9.45215,
+            "64": 9.83179,
+            "65": 8.99109,
+            "66": 9.76394,
+            "67": 9.40349,
+            "68": 9.83129,
+            "69": 9.81856,
+            "70": 9.77262,
+            "71": 9.658,
+            "72": 9.64033,
+            "73": 9.55124,
+            "74": 9.02026,
+            "75": 9.47695,
+            "76": 9.13586,
+            "77": 10.09787,
+            "78": 9.75274,
+            "79": 9.41697,
+            "80": 9.45074,
+            "81": 9.52041,
+            "82": 9.73203,
+            "83": 9.36912,
+            "84": 9.45039,
+            "85": 9.65229,
+            "86": 9.1123,
+            "87": 9.61119,
+            "88": 9.78708,
+            "89": 9.64625,
+            "90": 9.83474,
+            "91": 9.39429,
+            "92": 9.39178,
+            "93": 9.12787,
+            "94": 8.86637,
+            "95": 9.54352,
+            "96": 9.55716,
+            "97": 9.332,
+            "98": 9.69189,
+            "99": 8.92072,
+            "100": 9.41916
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1216.0,
+            "2": 1361.0,
+            "3": 1221.0,
+            "4": 1222.0,
+            "5": 1385.0,
+            "6": 1467.0,
+            "7": 1252.0,
+            "8": 1355.0,
+            "9": 1346.0,
+            "10": 1335.0,
+            "11": 1278.0,
+            "12": 1185.0,
+            "13": 1203.0,
+            "14": 1385.0,
+            "15": 1303.0,
+            "16": 1377.0,
+            "17": 1229.0,
+            "18": 1291.0,
+            "19": 1244.0,
+            "20": 1183.0,
+            "21": 1262.0,
+            "22": 1122.0,
+            "23": 1301.0,
+            "24": 1066.0,
+            "25": 1182.0,
+            "26": 1263.0,
+            "27": 1162.0,
+            "28": 1262.0,
+            "29": 1179.0,
+            "30": 1168.0,
+            "31": 991.0,
+            "32": 1092.0,
+            "33": 1183.0,
+            "34": 1081.0,
+            "35": 1146.0,
+            "36": 1076.0,
+            "37": 1252.0,
+            "38": 1176.0,
+            "39": 1225.0,
+            "40": 1303.0,
+            "41": 1104.0,
+            "42": 1210.0,
+            "43": 1116.0,
+            "44": 1165.0,
+            "45": 1097.0,
+            "46": 1308.0,
+            "47": 1165.0,
+            "48": 1134.0,
+            "49": 1272.0,
+            "50": 1083.0,
+            "51": 1234.0,
+            "52": 1274.0,
+            "53": 1393.0,
+            "54": 1299.0,
+            "55": 1186.0,
+            "56": 1267.0,
+            "57": 1161.0,
+            "58": 1326.0,
+            "59": 1403.0,
+            "60": 1177.0,
+            "61": 1363.0,
+            "62": 1302.0,
+            "63": 1245.0,
+            "64": 1378.0,
+            "65": 1330.0,
+            "66": 1363.0,
+            "67": 1286.0,
+            "68": 1313.0,
+            "69": 1295.0,
+            "70": 1459.0,
+            "71": 1374.0,
+            "72": 1092.0,
+            "73": 1274.0,
+            "74": 943.0,
+            "75": 1059.0,
+            "76": 1323.0,
+            "77": 1475.0,
+            "78": 1487.0,
+            "79": 1496.0,
+            "80": 1382.0,
+            "81": 1470.0,
+            "82": 1417.0,
+            "83": 1177.0,
+            "84": 1506.0,
+            "85": 1420.0,
+            "86": 1281.0,
+            "87": 1540.0,
+            "88": 1467.0,
+            "89": 1452.0,
+            "90": 1350.0,
+            "91": 1010.0,
+            "92": 1324.0,
+            "93": 1349.0,
+            "94": 1197.0,
+            "95": 2503.0,
+            "96": 2373.0,
+            "97": 1490.0,
+            "98": 2541.0,
+            "99": 1367.0,
+            "100": 1122.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 788517888.0,
+            "2": 788488192.0,
+            "3": 788535296.0,
+            "4": 788513280.0,
+            "5": 788537344.0,
+            "6": 788479488.0,
+            "7": 788502528.0,
+            "8": 788510208.0,
+            "9": 788526080.0,
+            "10": 788538368.0,
+            "11": 788513280.0,
+            "12": 788484096.0,
+            "13": 788542464.0,
+            "14": 788451328.0,
+            "15": 788503040.0,
+            "16": 788440576.0,
+            "17": 788558336.0,
+            "18": 788535296.0,
+            "19": 788542464.0,
+            "20": 788470784.0,
+            "21": 788508672.0,
+            "22": 788594176.0,
+            "23": 788573696.0,
+            "24": 788513280.0,
+            "25": 788655616.0,
+            "26": 788566016.0,
+            "27": 788630528.0,
+            "28": 788568576.0,
+            "29": 788610560.0,
+            "30": 788587520.0,
+            "31": 788647424.0,
+            "32": 788602880.0,
+            "33": 788616704.0,
+            "34": 788577792.0,
+            "35": 788616704.0,
+            "36": 788642304.0,
+            "37": 788597760.0,
+            "38": 788650496.0,
+            "39": 788663296.0,
+            "40": 788550144.0,
+            "41": 788591616.0,
+            "42": 788575232.0,
+            "43": 788541952.0,
+            "44": 788623872.0,
+            "45": 788491264.0,
+            "46": 788503552.0,
+            "47": 788572160.0,
+            "48": 788488704.0,
+            "49": 788461568.0,
+            "50": 788487168.0,
+            "51": 788523008.0,
+            "52": 788483584.0,
+            "53": 788513792.0,
+            "54": 788503552.0,
+            "55": 788499968.0,
+            "56": 788459008.0,
+            "57": 788456448.0,
+            "58": 788499968.0,
+            "59": 788503552.0,
+            "60": 788491264.0,
+            "61": 788463616.0,
+            "62": 788497408.0,
+            "63": 788449792.0,
+            "64": 788465664.0,
+            "65": 788408320.0,
+            "66": 788445696.0,
+            "67": 788445696.0,
+            "68": 788456448.0,
+            "69": 788473856.0,
+            "70": 788497408.0,
+            "71": 788453888.0,
+            "72": 788413952.0,
+            "73": 788444160.0,
+            "74": 788419072.0,
+            "75": 788441600.0,
+            "76": 788412928.0,
+            "77": 788471296.0,
+            "78": 788462592.0,
+            "79": 788419072.0,
+            "80": 788411392.0,
+            "81": 788430848.0,
+            "82": 788439040.0,
+            "83": 788435456.0,
+            "84": 788471296.0,
+            "85": 788461056.0,
+            "86": 788395008.0,
+            "87": 788490752.0,
+            "88": 788493312.0,
+            "89": 788501504.0,
+            "90": 788531712.0,
+            "91": 788513792.0,
+            "92": 788516864.0,
+            "93": 788487168.0,
+            "94": 788506624.0,
+            "95": 788543488.0,
+            "96": 788563456.0,
+            "97": 788579840.0,
+            "98": 788590592.0,
+            "99": 788514816.0,
+            "100": 788570624.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 3023035904.0,
+            "2": 3179259392.0,
+            "3": 3206071808.0,
+            "4": 3206071808.0,
+            "5": 3206539776.0,
+            "6": 3206539776.0,
+            "7": 3206539776.0,
+            "8": 3206539776.0,
+            "9": 3206539776.0,
+            "10": 3206539776.0,
+            "11": 3206539776.0,
+            "12": 3206539776.0,
+            "13": 3207718400.0,
+            "14": 3207718400.0,
+            "15": 3207718400.0,
+            "16": 3207718400.0,
+            "17": 3219952640.0,
+            "18": 3219952640.0,
+            "19": 3219952640.0,
+            "20": 3219952640.0,
+            "21": 3219952640.0,
+            "22": 3239834624.0,
+            "23": 3239834624.0,
+            "24": 3239834624.0,
+            "25": 3276544000.0,
+            "26": 3276544000.0,
+            "27": 3276544000.0,
+            "28": 3276544000.0,
+            "29": 3276544000.0,
+            "30": 3276544000.0,
+            "31": 3276544000.0,
+            "32": 3276544000.0,
+            "33": 3276544000.0,
+            "34": 3276544000.0,
+            "35": 3276544000.0,
+            "36": 3276544000.0,
+            "37": 3276544000.0,
+            "38": 3276544000.0,
+            "39": 3281670656.0,
+            "40": 3281670656.0,
+            "41": 3281670656.0,
+            "42": 3281670656.0,
+            "43": 3281670656.0,
+            "44": 3281670656.0,
+            "45": 3281670656.0,
+            "46": 3281670656.0,
+            "47": 3281670656.0,
+            "48": 3281670656.0,
+            "49": 3281670656.0,
+            "50": 3281670656.0,
+            "51": 3281670656.0,
+            "52": 3281670656.0,
+            "53": 3281670656.0,
+            "54": 3281670656.0,
+            "55": 3281670656.0,
+            "56": 3281670656.0,
+            "57": 3281670656.0,
+            "58": 3281670656.0,
+            "59": 3281670656.0,
+            "60": 3281670656.0,
+            "61": 3281670656.0,
+            "62": 3281670656.0,
+            "63": 3281670656.0,
+            "64": 3281670656.0,
+            "65": 3281670656.0,
+            "66": 3281670656.0,
+            "67": 3281670656.0,
+            "68": 3281670656.0,
+            "69": 3281670656.0,
+            "70": 3281670656.0,
+            "71": 3281670656.0,
+            "72": 3281670656.0,
+            "73": 3281670656.0,
+            "74": 3281670656.0,
+            "75": 3281670656.0,
+            "76": 3281670656.0,
+            "77": 3281670656.0,
+            "78": 3281670656.0,
+            "79": 3281670656.0,
+            "80": 3281670656.0,
+            "81": 3281670656.0,
+            "82": 3281670656.0,
+            "83": 3281670656.0,
+            "84": 3281670656.0,
+            "85": 3281670656.0,
+            "86": 3281670656.0,
+            "87": 3281670656.0,
+            "88": 3281670656.0,
+            "89": 3281670656.0,
+            "90": 3281670656.0,
+            "91": 3281670656.0,
+            "92": 3281670656.0,
+            "93": 3281670656.0,
+            "94": 3281670656.0,
+            "95": 3281670656.0,
+            "96": 3281670656.0,
+            "97": 3281670656.0,
+            "98": 3281670656.0,
+            "99": 3281670656.0,
+            "100": 3281670656.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 13.21246,
+            "2": 0.19223,
+            "3": 0.15847,
+            "4": 0.14572,
+            "5": 0.16957,
+            "6": 0.15266,
+            "7": 0.1476,
+            "8": 0.14988,
+            "9": 0.13878,
+            "10": 0.14012,
+            "11": 0.14591,
+            "12": 0.13945,
+            "13": 0.13431,
+            "14": 0.13944,
+            "15": 0.12844,
+            "16": 0.14372,
+            "17": 0.13297,
+            "18": 0.13719,
+            "19": 0.13802,
+            "20": 0.14981,
+            "21": 0.14099,
+            "22": 0.12975,
+            "23": 0.13616,
+            "24": 0.13752,
+            "25": 0.13502,
+            "26": 0.14149,
+            "27": 0.14818,
+            "28": 0.14416,
+            "29": 0.15275,
+            "30": 0.15077,
+            "31": 0.15206,
+            "32": 0.14915,
+            "33": 0.14666,
+            "34": 0.1514,
+            "35": 0.15021,
+            "36": 0.15193,
+            "37": 0.14779,
+            "38": 0.14835,
+            "39": 0.15073,
+            "40": 0.14707,
+            "41": 0.15268,
+            "42": 0.14878,
+            "43": 0.15579,
+            "44": 0.15254,
+            "45": 0.14999,
+            "46": 0.20896,
+            "47": 0.15273,
+            "48": 0.1484,
+            "49": 0.15559,
+            "50": 0.15018,
+            "51": 0.16013,
+            "52": 0.15399,
+            "53": 0.15753,
+            "54": 0.14895,
+            "55": 0.14858,
+            "56": 0.16309,
+            "57": 0.15206,
+            "58": 0.15115,
+            "59": 0.15315,
+            "60": 0.15387,
+            "61": 0.14946,
+            "62": 0.15213,
+            "63": 0.14874,
+            "64": 0.15283,
+            "65": 0.14602,
+            "66": 0.15458,
+            "67": 0.15123,
+            "68": 0.1551,
+            "69": 0.15244,
+            "70": 0.16045,
+            "71": 0.14441,
+            "72": 0.15574,
+            "73": 0.15315,
+            "74": 0.15619,
+            "75": 0.15269,
+            "76": 0.14224,
+            "77": 0.15289,
+            "78": 0.14961,
+            "79": 0.153,
+            "80": 0.15606,
+            "81": 0.15226,
+            "82": 0.14364,
+            "83": 0.15261,
+            "84": 0.15146,
+            "85": 0.15268,
+            "86": 0.14691,
+            "87": 0.15346,
+            "88": 0.15373,
+            "89": 0.14793,
+            "90": 0.14784,
+            "91": 0.14748,
+            "92": 0.15356,
+            "93": 0.14881,
+            "94": 0.14846,
+            "95": 0.14747,
+            "96": 0.14823,
+            "97": 0.15527,
+            "98": 0.15043,
+            "99": 0.15066,
+            "100": 0.14841
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..2ffe6fcfe65
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.81131,
+            "2": 10.83052,
+            "3": 10.82093,
+            "4": 10.81347,
+            "5": 10.84338,
+            "6": 10.84743,
+            "7": 10.85254,
+            "8": 10.83482,
+            "9": 10.84276,
+            "10": 10.77693,
+            "11": 10.8459,
+            "12": 10.85115,
+            "13": 10.84165,
+            "14": 10.8714,
+            "15": 10.83613,
+            "16": 10.79815,
+            "17": 10.77288,
+            "18": 10.8075,
+            "19": 10.78773,
+            "20": 10.73433,
+            "21": 10.69461,
+            "22": 10.56597,
+            "23": 10.71611,
+            "24": 10.61321,
+            "25": 10.552,
+            "26": 10.61364,
+            "27": 10.62702,
+            "28": 10.59546,
+            "29": 10.59195,
+            "30": 10.3916,
+            "31": 10.14615,
+            "32": 10.47399,
+            "33": 10.47051,
+            "34": 10.23435,
+            "35": 10.29318,
+            "36": 10.26627,
+            "37": 10.37219,
+            "38": 10.2254,
+            "39": 10.42101,
+            "40": 10.13002,
+            "41": 10.16265,
+            "42": 10.24278,
+            "43": 9.88237,
+            "44": 9.99105,
+            "45": 9.87295,
+            "46": 9.85181,
+            "47": 10.15633,
+            "48": 9.8915,
+            "49": 9.58889,
+            "50": 9.9543,
+            "51": 9.8849,
+            "52": 9.78004,
+            "53": 10.10188,
+            "54": 9.98715,
+            "55": 9.9027,
+            "56": 9.66837,
+            "57": 9.53524,
+            "58": 9.89495,
+            "59": 9.62892,
+            "60": 9.54308,
+            "61": 9.72727,
+            "62": 10.0332,
+            "63": 9.45215,
+            "64": 9.83179,
+            "65": 8.99109,
+            "66": 9.76394,
+            "67": 9.40349,
+            "68": 9.83129,
+            "69": 9.81856,
+            "70": 9.77262,
+            "71": 9.658,
+            "72": 9.64033,
+            "73": 9.55124,
+            "74": 9.02026,
+            "75": 9.47695,
+            "76": 9.13586,
+            "77": 10.09787,
+            "78": 9.75274,
+            "79": 9.41697,
+            "80": 9.45074,
+            "81": 9.52041,
+            "82": 9.73203,
+            "83": 9.36912,
+            "84": 9.45039,
+            "85": 9.65229,
+            "86": 9.1123,
+            "87": 9.61119,
+            "88": 9.78708,
+            "89": 9.64625,
+            "90": 9.83474,
+            "91": 9.39429,
+            "92": 9.39178,
+            "93": 9.12787,
+            "94": 8.86637,
+            "95": 9.54352,
+            "96": 9.55716,
+            "97": 9.332,
+            "98": 9.69189,
+            "99": 8.92072,
+            "100": 9.41916
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1216.0,
+            "2": 1361.0,
+            "3": 1221.0,
+            "4": 1222.0,
+            "5": 1385.0,
+            "6": 1467.0,
+            "7": 1252.0,
+            "8": 1355.0,
+            "9": 1346.0,
+            "10": 1335.0,
+            "11": 1278.0,
+            "12": 1185.0,
+            "13": 1203.0,
+            "14": 1385.0,
+            "15": 1303.0,
+            "16": 1377.0,
+            "17": 1229.0,
+            "18": 1291.0,
+            "19": 1244.0,
+            "20": 1183.0,
+            "21": 1262.0,
+            "22": 1122.0,
+            "23": 1301.0,
+            "24": 1066.0,
+            "25": 1182.0,
+            "26": 1263.0,
+            "27": 1162.0,
+            "28": 1262.0,
+            "29": 1179.0,
+            "30": 1168.0,
+            "31": 991.0,
+            "32": 1092.0,
+            "33": 1183.0,
+            "34": 1081.0,
+            "35": 1146.0,
+            "36": 1076.0,
+            "37": 1252.0,
+            "38": 1176.0,
+            "39": 1225.0,
+            "40": 1303.0,
+            "41": 1104.0,
+            "42": 1210.0,
+            "43": 1116.0,
+            "44": 1165.0,
+            "45": 1097.0,
+            "46": 1308.0,
+            "47": 1165.0,
+            "48": 1134.0,
+            "49": 1272.0,
+            "50": 1083.0,
+            "51": 1234.0,
+            "52": 1274.0,
+            "53": 1393.0,
+            "54": 1299.0,
+            "55": 1186.0,
+            "56": 1267.0,
+            "57": 1161.0,
+            "58": 1326.0,
+            "59": 1403.0,
+            "60": 1177.0,
+            "61": 1363.0,
+            "62": 1302.0,
+            "63": 1245.0,
+            "64": 1378.0,
+            "65": 1330.0,
+            "66": 1363.0,
+            "67": 1286.0,
+            "68": 1313.0,
+            "69": 1295.0,
+            "70": 1459.0,
+            "71": 1374.0,
+            "72": 1092.0,
+            "73": 1274.0,
+            "74": 943.0,
+            "75": 1059.0,
+            "76": 1323.0,
+            "77": 1475.0,
+            "78": 1487.0,
+            "79": 1496.0,
+            "80": 1382.0,
+            "81": 1470.0,
+            "82": 1417.0,
+            "83": 1177.0,
+            "84": 1506.0,
+            "85": 1420.0,
+            "86": 1281.0,
+            "87": 1540.0,
+            "88": 1467.0,
+            "89": 1452.0,
+            "90": 1350.0,
+            "91": 1010.0,
+            "92": 1324.0,
+            "93": 1349.0,
+            "94": 1197.0,
+            "95": 2503.0,
+            "96": 2373.0,
+            "97": 1490.0,
+            "98": 2541.0,
+            "99": 1367.0,
+            "100": 1122.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 788517888.0,
+            "2": 788488192.0,
+            "3": 788535296.0,
+            "4": 788513280.0,
+            "5": 788537344.0,
+            "6": 788479488.0,
+            "7": 788502528.0,
+            "8": 788510208.0,
+            "9": 788526080.0,
+            "10": 788538368.0,
+            "11": 788513280.0,
+            "12": 788484096.0,
+            "13": 788542464.0,
+            "14": 788451328.0,
+            "15": 788503040.0,
+            "16": 788440576.0,
+            "17": 788558336.0,
+            "18": 788535296.0,
+            "19": 788542464.0,
+            "20": 788470784.0,
+            "21": 788508672.0,
+            "22": 788594176.0,
+            "23": 788573696.0,
+            "24": 788513280.0,
+            "25": 788655616.0,
+            "26": 788566016.0,
+            "27": 788630528.0,
+            "28": 788568576.0,
+            "29": 788610560.0,
+            "30": 788587520.0,
+            "31": 788647424.0,
+            "32": 788602880.0,
+            "33": 788616704.0,
+            "34": 788577792.0,
+            "35": 788616704.0,
+            "36": 788642304.0,
+            "37": 788597760.0,
+            "38": 788650496.0,
+            "39": 788663296.0,
+            "40": 788550144.0,
+            "41": 788591616.0,
+            "42": 788575232.0,
+            "43": 788541952.0,
+            "44": 788623872.0,
+            "45": 788491264.0,
+            "46": 788503552.0,
+            "47": 788572160.0,
+            "48": 788488704.0,
+            "49": 788461568.0,
+            "50": 788487168.0,
+            "51": 788523008.0,
+            "52": 788483584.0,
+            "53": 788513792.0,
+            "54": 788503552.0,
+            "55": 788499968.0,
+            "56": 788459008.0,
+            "57": 788456448.0,
+            "58": 788499968.0,
+            "59": 788503552.0,
+            "60": 788491264.0,
+            "61": 788463616.0,
+            "62": 788497408.0,
+            "63": 788449792.0,
+            "64": 788465664.0,
+            "65": 788408320.0,
+            "66": 788445696.0,
+            "67": 788445696.0,
+            "68": 788456448.0,
+            "69": 788473856.0,
+            "70": 788497408.0,
+            "71": 788453888.0,
+            "72": 788413952.0,
+            "73": 788444160.0,
+            "74": 788419072.0,
+            "75": 788441600.0,
+            "76": 788412928.0,
+            "77": 788471296.0,
+            "78": 788462592.0,
+            "79": 788419072.0,
+            "80": 788411392.0,
+            "81": 788430848.0,
+            "82": 788439040.0,
+            "83": 788435456.0,
+            "84": 788471296.0,
+            "85": 788461056.0,
+            "86": 788395008.0,
+            "87": 788490752.0,
+            "88": 788493312.0,
+            "89": 788501504.0,
+            "90": 788531712.0,
+            "91": 788513792.0,
+            "92": 788516864.0,
+            "93": 788487168.0,
+            "94": 788506624.0,
+            "95": 788543488.0,
+            "96": 788563456.0,
+            "97": 788579840.0,
+            "98": 788590592.0,
+            "99": 788514816.0,
+            "100": 788570624.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 3023035904.0,
+            "2": 3179259392.0,
+            "3": 3206071808.0,
+            "4": 3206071808.0,
+            "5": 3206539776.0,
+            "6": 3206539776.0,
+            "7": 3206539776.0,
+            "8": 3206539776.0,
+            "9": 3206539776.0,
+            "10": 3206539776.0,
+            "11": 3206539776.0,
+            "12": 3206539776.0,
+            "13": 3207718400.0,
+            "14": 3207718400.0,
+            "15": 3207718400.0,
+            "16": 3207718400.0,
+            "17": 3219952640.0,
+            "18": 3219952640.0,
+            "19": 3219952640.0,
+            "20": 3219952640.0,
+            "21": 3219952640.0,
+            "22": 3239834624.0,
+            "23": 3239834624.0,
+            "24": 3239834624.0,
+            "25": 3276544000.0,
+            "26": 3276544000.0,
+            "27": 3276544000.0,
+            "28": 3276544000.0,
+            "29": 3276544000.0,
+            "30": 3276544000.0,
+            "31": 3276544000.0,
+            "32": 3276544000.0,
+            "33": 3276544000.0,
+            "34": 3276544000.0,
+            "35": 3276544000.0,
+            "36": 3276544000.0,
+            "37": 3276544000.0,
+            "38": 3276544000.0,
+            "39": 3281670656.0,
+            "40": 3281670656.0,
+            "41": 3281670656.0,
+            "42": 3281670656.0,
+            "43": 3281670656.0,
+            "44": 3281670656.0,
+            "45": 3281670656.0,
+            "46": 3281670656.0,
+            "47": 3281670656.0,
+            "48": 3281670656.0,
+            "49": 3281670656.0,
+            "50": 3281670656.0,
+            "51": 3281670656.0,
+            "52": 3281670656.0,
+            "53": 3281670656.0,
+            "54": 3281670656.0,
+            "55": 3281670656.0,
+            "56": 3281670656.0,
+            "57": 3281670656.0,
+            "58": 3281670656.0,
+            "59": 3281670656.0,
+            "60": 3281670656.0,
+            "61": 3281670656.0,
+            "62": 3281670656.0,
+            "63": 3281670656.0,
+            "64": 3281670656.0,
+            "65": 3281670656.0,
+            "66": 3281670656.0,
+            "67": 3281670656.0,
+            "68": 3281670656.0,
+            "69": 3281670656.0,
+            "70": 3281670656.0,
+            "71": 3281670656.0,
+            "72": 3281670656.0,
+            "73": 3281670656.0,
+            "74": 3281670656.0,
+            "75": 3281670656.0,
+            "76": 3281670656.0,
+            "77": 3281670656.0,
+            "78": 3281670656.0,
+            "79": 3281670656.0,
+            "80": 3281670656.0,
+            "81": 3281670656.0,
+            "82": 3281670656.0,
+            "83": 3281670656.0,
+            "84": 3281670656.0,
+            "85": 3281670656.0,
+            "86": 3281670656.0,
+            "87": 3281670656.0,
+            "88": 3281670656.0,
+            "89": 3281670656.0,
+            "90": 3281670656.0,
+            "91": 3281670656.0,
+            "92": 3281670656.0,
+            "93": 3281670656.0,
+            "94": 3281670656.0,
+            "95": 3281670656.0,
+            "96": 3281670656.0,
+            "97": 3281670656.0,
+            "98": 3281670656.0,
+            "99": 3281670656.0,
+            "100": 3281670656.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 11.84919,
+            "2": 0.21301,
+            "3": 0.1875,
+            "4": 0.18049,
+            "5": 0.18318,
+            "6": 0.16229,
+            "7": 0.16391,
+            "8": 0.16206,
+            "9": 0.1519,
+            "10": 0.15265,
+            "11": 0.15406,
+            "12": 0.15153,
+            "13": 0.14262,
+            "14": 0.15066,
+            "15": 0.1386,
+            "16": 0.15377,
+            "17": 0.14672,
+            "18": 0.15,
+            "19": 0.15031,
+            "20": 0.15363,
+            "21": 0.14157,
+            "22": 0.14022,
+            "23": 0.15031,
+            "24": 0.14784,
+            "25": 0.14617,
+            "26": 0.15072,
+            "27": 0.15826,
+            "28": 0.15989,
+            "29": 0.17285,
+            "30": 0.16368,
+            "31": 0.16977,
+            "32": 0.1612,
+            "33": 0.15985,
+            "34": 0.15796,
+            "35": 0.16549,
+            "36": 0.16888,
+            "37": 0.16396,
+            "38": 0.16275,
+            "39": 0.16316,
+            "40": 0.15731,
+            "41": 0.16488,
+            "42": 0.16446,
+            "43": 0.16827,
+            "44": 0.16392,
+            "45": 0.16192,
+            "46": 0.16633,
+            "47": 0.16308,
+            "48": 0.16007,
+            "49": 0.16464,
+            "50": 0.15794,
+            "51": 0.17113,
+            "52": 0.16522,
+            "53": 0.1626,
+            "54": 0.15774,
+            "55": 0.15957,
+            "56": 0.16666,
+            "57": 0.16407,
+            "58": 0.16282,
+            "59": 0.16402,
+            "60": 0.16235,
+            "61": 0.15906,
+            "62": 0.16273,
+            "63": 0.16172,
+            "64": 0.16219,
+            "65": 0.15545,
+            "66": 0.16335,
+            "67": 0.16169,
+            "68": 0.16503,
+            "69": 0.1641,
+            "70": 0.17009,
+            "71": 0.1546,
+            "72": 0.16631,
+            "73": 0.16013,
+            "74": 0.166,
+            "75": 0.1647,
+            "76": 0.15257,
+            "77": 0.16369,
+            "78": 0.156,
+            "79": 0.16228,
+            "80": 0.16107,
+            "81": 0.16212,
+            "82": 0.15365,
+            "83": 0.16258,
+            "84": 0.16459,
+            "85": 0.16137,
+            "86": 0.15549,
+            "87": 0.1627,
+            "88": 0.16309,
+            "89": 0.16008,
+            "90": 0.15864,
+            "91": 0.15894,
+            "92": 0.1647,
+            "93": 0.16045,
+            "94": 0.1601,
+            "95": 0.15909,
+            "96": 0.15624,
+            "97": 0.16592,
+            "98": 0.15827,
+            "99": 0.16214,
+            "100": 0.15589
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json
index 34f7db22ade..ae1c2034cde 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json
@@ -2,141 +2,536 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 10.81442,
+            "2": 10.81882,
+            "3": 10.81531,
+            "4": 10.80285,
             "5": 10.8513,
+            "6": 10.85015,
+            "7": 10.83865,
+            "8": 10.83952,
+            "9": 10.82187,
             "10": 10.77753,
+            "11": 10.86422,
+            "12": 10.83724,
+            "13": 10.85876,
+            "14": 10.86332,
             "15": 10.79795,
+            "16": 10.79507,
+            "17": 10.77121,
+            "18": 10.78932,
+            "19": 10.78375,
             "20": 10.71658,
+            "21": 10.68392,
+            "22": 10.53046,
+            "23": 10.69852,
+            "24": 10.58536,
             "25": 10.52392,
+            "26": 10.58331,
+            "27": 10.60949,
+            "28": 10.57165,
+            "29": 10.59009,
             "30": 10.35681,
+            "31": 10.09394,
+            "32": 10.45893,
+            "33": 10.45658,
+            "34": 10.20513,
             "35": 10.26714,
+            "36": 10.22334,
+            "37": 10.35301,
+            "38": 10.19469,
+            "39": 10.4172,
             "40": 10.08945,
+            "41": 10.12779,
+            "42": 10.21205,
+            "43": 9.83115,
+            "44": 9.9694,
             "45": 9.83605,
+            "46": 9.81694,
+            "47": 10.15399,
+            "48": 9.85315,
+            "49": 9.53452,
             "50": 9.91905,
+            "51": 9.85365,
+            "52": 9.74298,
+            "53": 10.07139,
+            "54": 9.96275,
             "55": 9.88234,
+            "56": 9.63465,
+            "57": 9.4865,
+            "58": 9.84855,
+            "59": 9.58914,
             "60": 9.5108,
+            "61": 9.70318,
+            "62": 9.99619,
+            "63": 9.40059,
+            "64": 9.78463,
             "65": 8.95371,
+            "66": 9.7179,
+            "67": 9.36926,
+            "68": 9.79814,
+            "69": 9.79668,
             "70": 9.74892,
+            "71": 9.63192,
+            "72": 9.59949,
+            "73": 9.50317,
+            "74": 8.9522,
             "75": 9.43106,
+            "76": 9.09064,
+            "77": 10.08076,
+            "78": 9.73534,
+            "79": 9.3887,
             "80": 9.41432,
+            "81": 9.48416,
+            "82": 9.7092,
+            "83": 9.31507,
+            "84": 9.41846,
             "85": 9.6224,
+            "86": 9.07938,
+            "87": 9.59206,
+            "88": 9.74951,
+            "89": 9.60449,
             "90": 9.82577,
+            "91": 9.34236,
+            "92": 9.35861,
+            "93": 9.07987,
+            "94": 8.82784,
             "95": 9.50868,
+            "96": 9.52112,
+            "97": 9.30601,
+            "98": 9.66582,
+            "99": 8.87718,
             "100": 9.38975
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 5476.0,
+            "2": 5726.0,
+            "3": 5820.0,
+            "4": 5738.0,
             "5": 6334.0,
+            "6": 6609.0,
+            "7": 5986.0,
+            "8": 5915.0,
+            "9": 6387.0,
             "10": 5090.0,
+            "11": 6596.0,
+            "12": 6165.0,
+            "13": 6559.0,
+            "14": 6568.0,
             "15": 6041.0,
+            "16": 6363.0,
+            "17": 6226.0,
+            "18": 5986.0,
+            "19": 6413.0,
             "20": 5738.0,
+            "21": 6248.0,
+            "22": 5765.0,
+            "23": 6895.0,
+            "24": 6096.0,
             "25": 5736.0,
+            "26": 6113.0,
+            "27": 6495.0,
+            "28": 6754.0,
+            "29": 7066.0,
             "30": 6254.0,
+            "31": 5809.0,
+            "32": 6893.0,
+            "33": 7278.0,
+            "34": 6486.0,
             "35": 6750.0,
+            "36": 6625.0,
+            "37": 7510.0,
+            "38": 7131.0,
+            "39": 7741.0,
             "40": 7222.0,
+            "41": 7096.0,
+            "42": 7656.0,
+            "43": 7205.0,
+            "44": 7138.0,
             "45": 7019.0,
+            "46": 7235.0,
+            "47": 7542.0,
+            "48": 7734.0,
+            "49": 7610.0,
             "50": 7710.0,
+            "51": 8076.0,
+            "52": 7867.0,
+            "53": 8874.0,
+            "54": 8747.0,
             "55": 7601.0,
+            "56": 7891.0,
+            "57": 7603.0,
+            "58": 8731.0,
+            "59": 8257.0,
             "60": 7964.0,
+            "61": 8450.0,
+            "62": 8632.0,
+            "63": 7806.0,
+            "64": 8923.0,
             "65": 8276.0,
+            "66": 9208.0,
+            "67": 8240.0,
+            "68": 8439.0,
+            "69": 8765.0,
             "70": 9578.0,
+            "71": 9145.0,
+            "72": 8894.0,
+            "73": 8946.0,
+            "74": 6930.0,
             "75": 7952.0,
+            "76": 8482.0,
+            "77": 12156.0,
+            "78": 9554.0,
+            "79": 12899.0,
             "80": 11642.0,
+            "81": 9977.0,
+            "82": 9786.0,
+            "83": 14238.0,
+            "84": 13757.0,
             "85": 46448.0,
+            "86": 9803.0,
+            "87": 14740.0,
+            "88": 9790.0,
+            "89": 10097.0,
             "90": 11246.0,
+            "91": 8938.0,
+            "92": 9088.0,
+            "93": 8203.0,
+            "94": 9445.0,
             "95": 9762.0,
+            "96": 47617.0,
+            "97": 8875.0,
+            "98": 11078.0,
+            "99": 15373.0,
             "100": 9275.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 628059136.0,
+            "2": 628060160.0,
+            "3": 628060160.0,
+            "4": 628060160.0,
             "5": 628060160.0,
+            "6": 628060160.0,
+            "7": 628060160.0,
+            "8": 628060160.0,
+            "9": 628060160.0,
             "10": 628060160.0,
+            "11": 628060160.0,
+            "12": 628060160.0,
+            "13": 628060160.0,
+            "14": 628060160.0,
             "15": 628060160.0,
+            "16": 628060160.0,
+            "17": 628060160.0,
+            "18": 628060160.0,
+            "19": 628060160.0,
             "20": 628060160.0,
+            "21": 628060160.0,
+            "22": 628060160.0,
+            "23": 628060160.0,
+            "24": 628060160.0,
             "25": 628060160.0,
+            "26": 628060160.0,
+            "27": 628060160.0,
+            "28": 628060160.0,
+            "29": 628060160.0,
             "30": 628060160.0,
+            "31": 628060160.0,
+            "32": 628060160.0,
+            "33": 628060160.0,
+            "34": 628060160.0,
             "35": 628060160.0,
+            "36": 628060160.0,
+            "37": 628060160.0,
+            "38": 628060160.0,
+            "39": 628060160.0,
             "40": 628060160.0,
+            "41": 628060160.0,
+            "42": 628060160.0,
+            "43": 628060160.0,
+            "44": 628060160.0,
             "45": 628060160.0,
+            "46": 628060160.0,
+            "47": 628060160.0,
+            "48": 628060160.0,
+            "49": 628060160.0,
             "50": 628060160.0,
+            "51": 628060160.0,
+            "52": 628060160.0,
+            "53": 628060160.0,
+            "54": 628060160.0,
             "55": 628060160.0,
+            "56": 628060160.0,
+            "57": 628060160.0,
+            "58": 628060160.0,
+            "59": 628060160.0,
             "60": 628060160.0,
+            "61": 628060160.0,
+            "62": 628060160.0,
+            "63": 628060160.0,
+            "64": 628060160.0,
             "65": 628060160.0,
+            "66": 628060160.0,
+            "67": 628060160.0,
+            "68": 628060160.0,
+            "69": 628060160.0,
             "70": 628060160.0,
+            "71": 628060160.0,
+            "72": 628060160.0,
+            "73": 628060160.0,
+            "74": 628060160.0,
             "75": 628060160.0,
+            "76": 628060160.0,
+            "77": 628060160.0,
+            "78": 628060160.0,
+            "79": 628060160.0,
             "80": 628060160.0,
+            "81": 628060160.0,
+            "82": 628060160.0,
+            "83": 628060160.0,
+            "84": 628060160.0,
             "85": 628060160.0,
+            "86": 628060160.0,
+            "87": 628060160.0,
+            "88": 628060160.0,
+            "89": 628060160.0,
             "90": 628060160.0,
+            "91": 628060160.0,
+            "92": 628060160.0,
+            "93": 628060160.0,
+            "94": 628060160.0,
             "95": 628060160.0,
+            "96": 628060160.0,
+            "97": 628060160.0,
+            "98": 628060160.0,
+            "99": 628060160.0,
             "100": 628060160.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 966226944.0,
+            "2": 1135178752.0,
+            "3": 1135178752.0,
+            "4": 1142161920.0,
             "5": 1142161920.0,
+            "6": 1142161920.0,
+            "7": 1142161920.0,
+            "8": 1142161920.0,
+            "9": 1142161920.0,
             "10": 1142161920.0,
+            "11": 1142161920.0,
+            "12": 1142161920.0,
+            "13": 1142161920.0,
+            "14": 1142161920.0,
             "15": 1142161920.0,
+            "16": 1142161920.0,
+            "17": 1142161920.0,
+            "18": 1142161920.0,
+            "19": 1142161920.0,
             "20": 1142161920.0,
+            "21": 1142161920.0,
+            "22": 1142161920.0,
+            "23": 1142161920.0,
+            "24": 1142161920.0,
             "25": 1142161920.0,
+            "26": 1142161920.0,
+            "27": 1142161920.0,
+            "28": 1142161920.0,
+            "29": 1142161920.0,
             "30": 1142161920.0,
+            "31": 1142161920.0,
+            "32": 1142161920.0,
+            "33": 1142161920.0,
+            "34": 1142161920.0,
             "35": 1142161920.0,
+            "36": 1142161920.0,
+            "37": 1142161920.0,
+            "38": 1142161920.0,
+            "39": 1142161920.0,
             "40": 1142161920.0,
+            "41": 1142161920.0,
+            "42": 1142161920.0,
+            "43": 1142161920.0,
+            "44": 1142161920.0,
             "45": 1142161920.0,
+            "46": 1142161920.0,
+            "47": 1142161920.0,
+            "48": 1142161920.0,
+            "49": 1142161920.0,
             "50": 1142161920.0,
+            "51": 1142161920.0,
+            "52": 1142161920.0,
+            "53": 1142161920.0,
+            "54": 1142161920.0,
             "55": 1142161920.0,
+            "56": 1142161920.0,
+            "57": 1142161920.0,
+            "58": 1142161920.0,
+            "59": 1142161920.0,
             "60": 1142161920.0,
+            "61": 1145419776.0,
+            "62": 1145419776.0,
+            "63": 1145419776.0,
+            "64": 1145419776.0,
             "65": 1145419776.0,
+            "66": 1145419776.0,
+            "67": 1145419776.0,
+            "68": 1145419776.0,
+            "69": 1145419776.0,
             "70": 1145419776.0,
+            "71": 1145419776.0,
+            "72": 1145419776.0,
+            "73": 1145419776.0,
+            "74": 1145419776.0,
             "75": 1145419776.0,
+            "76": 1149517312.0,
+            "77": 1149517312.0,
+            "78": 1149517312.0,
+            "79": 1149517312.0,
             "80": 1149517312.0,
+            "81": 1149517312.0,
+            "82": 1149517312.0,
+            "83": 1149517312.0,
+            "84": 1149517312.0,
             "85": 1149517312.0,
+            "86": 1149517312.0,
+            "87": 1149517312.0,
+            "88": 1149517312.0,
+            "89": 1149517312.0,
             "90": 1149517312.0,
+            "91": 1149517312.0,
+            "92": 1149517312.0,
+            "93": 1149517312.0,
+            "94": 1149517312.0,
             "95": 1149517312.0,
+            "96": 1149517312.0,
+            "97": 1149517312.0,
+            "98": 1149517312.0,
+            "99": 1149517312.0,
             "100": 1149517312.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 22.4417,
-            "5": 0.54127,
-            "10": 0.51699,
-            "15": 0.49577,
-            "20": 0.49101,
-            "25": 0.50704,
-            "30": 0.53551,
-            "35": 0.49875,
-            "40": 0.49003,
-            "45": 0.49309,
-            "50": 0.49843,
-            "55": 0.48281,
-            "60": 0.50246,
-            "65": 0.49261,
-            "70": 0.49745,
-            "75": 0.49851,
-            "80": 0.52914,
-            "85": 0.49531,
-            "90": 0.49632,
-            "95": 0.49182,
-            "100": 0.49317
+            "1": 20.57901,
+            "2": 0.68043,
+            "3": 0.63562,
+            "4": 0.61398,
+            "5": 0.61337,
+            "6": 0.60234,
+            "7": 0.60862,
+            "8": 0.60734,
+            "9": 0.58969,
+            "10": 0.58747,
+            "11": 0.5811,
+            "12": 0.58339,
+            "13": 0.58104,
+            "14": 0.57128,
+            "15": 0.57144,
+            "16": 0.57507,
+            "17": 0.56755,
+            "18": 0.57095,
+            "19": 0.56394,
+            "20": 0.56491,
+            "21": 0.5641,
+            "22": 0.57257,
+            "23": 0.56993,
+            "24": 0.57313,
+            "25": 0.59644,
+            "26": 0.57728,
+            "27": 0.56326,
+            "28": 0.58965,
+            "29": 0.57459,
+            "30": 0.58292,
+            "31": 0.5611,
+            "32": 0.57216,
+            "33": 0.56117,
+            "34": 0.56648,
+            "35": 0.57301,
+            "36": 0.5682,
+            "37": 0.57344,
+            "38": 0.57412,
+            "39": 0.57266,
+            "40": 0.56976,
+            "41": 0.58248,
+            "42": 0.56977,
+            "43": 0.59296,
+            "44": 0.57825,
+            "45": 0.57205,
+            "46": 0.57416,
+            "47": 0.56382,
+            "48": 0.56705,
+            "49": 0.56054,
+            "50": 0.57803,
+            "51": 0.5794,
+            "52": 0.57311,
+            "53": 0.55689,
+            "54": 0.56928,
+            "55": 0.56498,
+            "56": 0.5793,
+            "57": 0.59551,
+            "58": 0.57445,
+            "59": 0.57266,
+            "60": 0.56772,
+            "61": 0.56341,
+            "62": 0.56683,
+            "63": 0.56161,
+            "64": 0.56821,
+            "65": 0.57696,
+            "66": 0.57433,
+            "67": 0.5584,
+            "68": 0.57566,
+            "69": 0.57071,
+            "70": 0.56326,
+            "71": 0.57066,
+            "72": 0.55601,
+            "73": 0.58093,
+            "74": 0.59092,
+            "75": 0.57258,
+            "76": 0.57145,
+            "77": 0.55748,
+            "78": 0.57398,
+            "79": 0.56823,
+            "80": 0.56858,
+            "81": 0.55889,
+            "82": 0.56474,
+            "83": 0.56681,
+            "84": 0.5624,
+            "85": 0.56593,
+            "86": 0.55528,
+            "87": 0.56493,
+            "88": 0.54955,
+            "89": 0.56961,
+            "90": 0.55961,
+            "91": 0.56585,
+            "92": 0.58153,
+            "93": 0.56914,
+            "94": 0.58194,
+            "95": 0.56106,
+            "96": 0.56571,
+            "97": 0.56072,
+            "98": 0.56686,
+            "99": 0.55834,
+            "100": 0.56357
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..fac0ec053dd
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.81442,
+            "2": 10.81882,
+            "3": 10.81551,
+            "4": 10.80292,
+            "5": 10.85144,
+            "6": 10.85011,
+            "7": 10.83867,
+            "8": 10.83952,
+            "9": 10.82213,
+            "10": 10.77746,
+            "11": 10.86426,
+            "12": 10.83689,
+            "13": 10.85831,
+            "14": 10.86354,
+            "15": 10.79774,
+            "16": 10.79537,
+            "17": 10.77155,
+            "18": 10.78908,
+            "19": 10.78343,
+            "20": 10.71629,
+            "21": 10.6835,
+            "22": 10.53061,
+            "23": 10.69849,
+            "24": 10.58571,
+            "25": 10.52397,
+            "26": 10.58327,
+            "27": 10.60963,
+            "28": 10.57207,
+            "29": 10.59012,
+            "30": 10.35613,
+            "31": 10.09392,
+            "32": 10.45887,
+            "33": 10.45644,
+            "34": 10.20494,
+            "35": 10.26735,
+            "36": 10.22333,
+            "37": 10.35299,
+            "38": 10.19476,
+            "39": 10.41731,
+            "40": 10.08948,
+            "41": 10.12721,
+            "42": 10.21207,
+            "43": 9.8313,
+            "44": 9.96936,
+            "45": 9.83601,
+            "46": 9.81666,
+            "47": 10.1539,
+            "48": 9.85279,
+            "49": 9.53447,
+            "50": 9.91909,
+            "51": 9.85364,
+            "52": 9.74286,
+            "53": 10.07155,
+            "54": 9.96279,
+            "55": 9.88223,
+            "56": 9.63465,
+            "57": 9.48633,
+            "58": 9.84878,
+            "59": 9.58904,
+            "60": 9.51094,
+            "61": 9.7032,
+            "62": 9.99637,
+            "63": 9.40044,
+            "64": 9.78465,
+            "65": 8.95366,
+            "66": 9.71808,
+            "67": 9.36931,
+            "68": 9.79818,
+            "69": 9.79667,
+            "70": 9.74899,
+            "71": 9.63213,
+            "72": 9.59956,
+            "73": 9.50308,
+            "74": 8.95202,
+            "75": 9.43084,
+            "76": 9.09067,
+            "77": 10.08102,
+            "78": 9.73521,
+            "79": 9.38853,
+            "80": 9.41418,
+            "81": 9.48403,
+            "82": 9.70907,
+            "83": 9.3152,
+            "84": 9.41838,
+            "85": 9.62222,
+            "86": 9.07945,
+            "87": 9.59202,
+            "88": 9.74953,
+            "89": 9.60441,
+            "90": 9.82577,
+            "91": 9.34232,
+            "92": 9.35837,
+            "93": 9.07969,
+            "94": 8.82793,
+            "95": 9.50864,
+            "96": 9.52117,
+            "97": 9.30605,
+            "98": 9.6658,
+            "99": 8.87716,
+            "100": 9.38997
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 5488.0,
+            "2": 5704.0,
+            "3": 5788.0,
+            "4": 5853.0,
+            "5": 6401.0,
+            "6": 6686.0,
+            "7": 5949.0,
+            "8": 5811.0,
+            "9": 6280.0,
+            "10": 5192.0,
+            "11": 6645.0,
+            "12": 6193.0,
+            "13": 6525.0,
+            "14": 6487.0,
+            "15": 6258.0,
+            "16": 6261.0,
+            "17": 6080.0,
+            "18": 5901.0,
+            "19": 6228.0,
+            "20": 5713.0,
+            "21": 6265.0,
+            "22": 5788.0,
+            "23": 6618.0,
+            "24": 6159.0,
+            "25": 5674.0,
+            "26": 6218.0,
+            "27": 6180.0,
+            "28": 6802.0,
+            "29": 7006.0,
+            "30": 6195.0,
+            "31": 5847.0,
+            "32": 6680.0,
+            "33": 7327.0,
+            "34": 6433.0,
+            "35": 6593.0,
+            "36": 6717.0,
+            "37": 7545.0,
+            "38": 7130.0,
+            "39": 7928.0,
+            "40": 7233.0,
+            "41": 7093.0,
+            "42": 7653.0,
+            "43": 7136.0,
+            "44": 7113.0,
+            "45": 7167.0,
+            "46": 7435.0,
+            "47": 7501.0,
+            "48": 7648.0,
+            "49": 7520.0,
+            "50": 7701.0,
+            "51": 7847.0,
+            "52": 7828.0,
+            "53": 8765.0,
+            "54": 8799.0,
+            "55": 7683.0,
+            "56": 7972.0,
+            "57": 7642.0,
+            "58": 8419.0,
+            "59": 8276.0,
+            "60": 7917.0,
+            "61": 8598.0,
+            "62": 8394.0,
+            "63": 7896.0,
+            "64": 9047.0,
+            "65": 8280.0,
+            "66": 9315.0,
+            "67": 8277.0,
+            "68": 8341.0,
+            "69": 8737.0,
+            "70": 9764.0,
+            "71": 9050.0,
+            "72": 9036.0,
+            "73": 9076.0,
+            "74": 6969.0,
+            "75": 7833.0,
+            "76": 8450.0,
+            "77": 13505.0,
+            "78": 9634.0,
+            "79": 13982.0,
+            "80": 11548.0,
+            "81": 10035.0,
+            "82": 9732.0,
+            "83": 9037.0,
+            "84": 9522.0,
+            "85": 46479.0,
+            "86": 8626.0,
+            "87": 11964.0,
+            "88": 9637.0,
+            "89": 10273.0,
+            "90": 11256.0,
+            "91": 8811.0,
+            "92": 9218.0,
+            "93": 8281.0,
+            "94": 9390.0,
+            "95": 9376.0,
+            "96": 13248.0,
+            "97": 8945.0,
+            "98": 10682.0,
+            "99": 15485.0,
+            "100": 9101.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 628059136.0,
+            "2": 628060160.0,
+            "3": 628060160.0,
+            "4": 628060160.0,
+            "5": 628060160.0,
+            "6": 628060160.0,
+            "7": 628060160.0,
+            "8": 628060160.0,
+            "9": 628060160.0,
+            "10": 628060160.0,
+            "11": 628060160.0,
+            "12": 628060160.0,
+            "13": 628060160.0,
+            "14": 628060160.0,
+            "15": 628060160.0,
+            "16": 628060160.0,
+            "17": 628060160.0,
+            "18": 628060160.0,
+            "19": 628060160.0,
+            "20": 628060160.0,
+            "21": 628060160.0,
+            "22": 628060160.0,
+            "23": 628060160.0,
+            "24": 628060160.0,
+            "25": 628060160.0,
+            "26": 628060160.0,
+            "27": 628060160.0,
+            "28": 628060160.0,
+            "29": 628060160.0,
+            "30": 628060160.0,
+            "31": 628060160.0,
+            "32": 628060160.0,
+            "33": 628060160.0,
+            "34": 628060160.0,
+            "35": 628060160.0,
+            "36": 628060160.0,
+            "37": 628060160.0,
+            "38": 628060160.0,
+            "39": 628060160.0,
+            "40": 628060160.0,
+            "41": 628060160.0,
+            "42": 628060160.0,
+            "43": 628060160.0,
+            "44": 628060160.0,
+            "45": 628060160.0,
+            "46": 628060160.0,
+            "47": 628060160.0,
+            "48": 628060160.0,
+            "49": 628060160.0,
+            "50": 628060160.0,
+            "51": 628060160.0,
+            "52": 628060160.0,
+            "53": 628060160.0,
+            "54": 628060160.0,
+            "55": 628060160.0,
+            "56": 628060160.0,
+            "57": 628060160.0,
+            "58": 628060160.0,
+            "59": 628060160.0,
+            "60": 628060160.0,
+            "61": 628060160.0,
+            "62": 628060160.0,
+            "63": 628060160.0,
+            "64": 628060160.0,
+            "65": 628060160.0,
+            "66": 628060160.0,
+            "67": 628060160.0,
+            "68": 628060160.0,
+            "69": 628060160.0,
+            "70": 628060160.0,
+            "71": 628060160.0,
+            "72": 628060160.0,
+            "73": 628060160.0,
+            "74": 628060160.0,
+            "75": 628060160.0,
+            "76": 628060160.0,
+            "77": 628060160.0,
+            "78": 628060160.0,
+            "79": 628060160.0,
+            "80": 628060160.0,
+            "81": 628060160.0,
+            "82": 628060160.0,
+            "83": 628060160.0,
+            "84": 628060160.0,
+            "85": 628060160.0,
+            "86": 628060160.0,
+            "87": 628060160.0,
+            "88": 628060160.0,
+            "89": 628060160.0,
+            "90": 628060160.0,
+            "91": 628060160.0,
+            "92": 628060160.0,
+            "93": 628060160.0,
+            "94": 628060160.0,
+            "95": 628060160.0,
+            "96": 628060160.0,
+            "97": 628060160.0,
+            "98": 628060160.0,
+            "99": 628060160.0,
+            "100": 628060160.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 966226944.0,
+            "2": 1135178752.0,
+            "3": 1135178752.0,
+            "4": 1142154752.0,
+            "5": 1142154752.0,
+            "6": 1142154752.0,
+            "7": 1142154752.0,
+            "8": 1142154752.0,
+            "9": 1142154752.0,
+            "10": 1142154752.0,
+            "11": 1142154752.0,
+            "12": 1142154752.0,
+            "13": 1142154752.0,
+            "14": 1142154752.0,
+            "15": 1142154752.0,
+            "16": 1142154752.0,
+            "17": 1142154752.0,
+            "18": 1142154752.0,
+            "19": 1142154752.0,
+            "20": 1142154752.0,
+            "21": 1142154752.0,
+            "22": 1142154752.0,
+            "23": 1142154752.0,
+            "24": 1142154752.0,
+            "25": 1142154752.0,
+            "26": 1142154752.0,
+            "27": 1142154752.0,
+            "28": 1142154752.0,
+            "29": 1142154752.0,
+            "30": 1142154752.0,
+            "31": 1142154752.0,
+            "32": 1142154752.0,
+            "33": 1142154752.0,
+            "34": 1142154752.0,
+            "35": 1142154752.0,
+            "36": 1142154752.0,
+            "37": 1142154752.0,
+            "38": 1142154752.0,
+            "39": 1142154752.0,
+            "40": 1142154752.0,
+            "41": 1142154752.0,
+            "42": 1142154752.0,
+            "43": 1142154752.0,
+            "44": 1142154752.0,
+            "45": 1142154752.0,
+            "46": 1142154752.0,
+            "47": 1142154752.0,
+            "48": 1142154752.0,
+            "49": 1142154752.0,
+            "50": 1142154752.0,
+            "51": 1142154752.0,
+            "52": 1142154752.0,
+            "53": 1142154752.0,
+            "54": 1142154752.0,
+            "55": 1142154752.0,
+            "56": 1142154752.0,
+            "57": 1142154752.0,
+            "58": 1142154752.0,
+            "59": 1142154752.0,
+            "60": 1142154752.0,
+            "61": 1145444352.0,
+            "62": 1145444352.0,
+            "63": 1145444352.0,
+            "64": 1145444352.0,
+            "65": 1145444352.0,
+            "66": 1145444352.0,
+            "67": 1145444352.0,
+            "68": 1145444352.0,
+            "69": 1145444352.0,
+            "70": 1145444352.0,
+            "71": 1145444352.0,
+            "72": 1145444352.0,
+            "73": 1145444352.0,
+            "74": 1145444352.0,
+            "75": 1145444352.0,
+            "76": 1149560320.0,
+            "77": 1149560320.0,
+            "78": 1149560320.0,
+            "79": 1149560320.0,
+            "80": 1149560320.0,
+            "81": 1149560320.0,
+            "82": 1149560320.0,
+            "83": 1149560320.0,
+            "84": 1149560320.0,
+            "85": 1149560320.0,
+            "86": 1149560320.0,
+            "87": 1149560320.0,
+            "88": 1149560320.0,
+            "89": 1149560320.0,
+            "90": 1149560320.0,
+            "91": 1149560320.0,
+            "92": 1149560320.0,
+            "93": 1149560320.0,
+            "94": 1149560320.0,
+            "95": 1149560320.0,
+            "96": 1149560320.0,
+            "97": 1149560320.0,
+            "98": 1149560320.0,
+            "99": 1149560320.0,
+            "100": 1149560320.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 22.49159,
+            "2": 0.64465,
+            "3": 0.55144,
+            "4": 0.54612,
+            "5": 0.54224,
+            "6": 0.53272,
+            "7": 0.53156,
+            "8": 0.52769,
+            "9": 0.51643,
+            "10": 0.51904,
+            "11": 0.51365,
+            "12": 0.51064,
+            "13": 0.5046,
+            "14": 0.50595,
+            "15": 0.49656,
+            "16": 0.51295,
+            "17": 0.49558,
+            "18": 0.50544,
+            "19": 0.49807,
+            "20": 0.50213,
+            "21": 0.50583,
+            "22": 0.52086,
+            "23": 0.51086,
+            "24": 0.50937,
+            "25": 0.5124,
+            "26": 0.51291,
+            "27": 0.52068,
+            "28": 0.54211,
+            "29": 0.52886,
+            "30": 0.52175,
+            "31": 0.51586,
+            "32": 0.5142,
+            "33": 0.49143,
+            "34": 0.49103,
+            "35": 0.49405,
+            "36": 0.49048,
+            "37": 0.48575,
+            "38": 0.49941,
+            "39": 0.50795,
+            "40": 0.51375,
+            "41": 0.49293,
+            "42": 0.48855,
+            "43": 0.5029,
+            "44": 0.49021,
+            "45": 0.50044,
+            "46": 0.4959,
+            "47": 0.49439,
+            "48": 0.48796,
+            "49": 0.48244,
+            "50": 0.50689,
+            "51": 0.53388,
+            "52": 0.49313,
+            "53": 0.50127,
+            "54": 0.50696,
+            "55": 0.50505,
+            "56": 0.50751,
+            "57": 0.50921,
+            "58": 0.49608,
+            "59": 0.49342,
+            "60": 0.49604,
+            "61": 0.49149,
+            "62": 0.48784,
+            "63": 0.48712,
+            "64": 0.48464,
+            "65": 0.51125,
+            "66": 0.48673,
+            "67": 0.48738,
+            "68": 0.48812,
+            "69": 0.4924,
+            "70": 0.48944,
+            "71": 0.48906,
+            "72": 0.48542,
+            "73": 0.50073,
+            "74": 0.49165,
+            "75": 0.48855,
+            "76": 0.49114,
+            "77": 0.49358,
+            "78": 0.48743,
+            "79": 0.49072,
+            "80": 0.48515,
+            "81": 0.48089,
+            "82": 0.48965,
+            "83": 0.49061,
+            "84": 0.48204,
+            "85": 0.46988,
+            "86": 0.49418,
+            "87": 0.48287,
+            "88": 0.47854,
+            "89": 0.48256,
+            "90": 0.48294,
+            "91": 0.4982,
+            "92": 0.48423,
+            "93": 0.47976,
+            "94": 0.48336,
+            "95": 0.47914,
+            "96": 0.71379,
+            "97": 1.04054,
+            "98": 3.57564,
+            "99": 4.591,
+            "100": 0.98086
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..68b72267704
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.81442,
+            "2": 10.81882,
+            "3": 10.81551,
+            "4": 10.80292,
+            "5": 10.85144,
+            "6": 10.85011,
+            "7": 10.83867,
+            "8": 10.83952,
+            "9": 10.82213,
+            "10": 10.77746,
+            "11": 10.86426,
+            "12": 10.83689,
+            "13": 10.85831,
+            "14": 10.86354,
+            "15": 10.79774,
+            "16": 10.79537,
+            "17": 10.77155,
+            "18": 10.78908,
+            "19": 10.78343,
+            "20": 10.71629,
+            "21": 10.6835,
+            "22": 10.53061,
+            "23": 10.69849,
+            "24": 10.58571,
+            "25": 10.52397,
+            "26": 10.58327,
+            "27": 10.60963,
+            "28": 10.57207,
+            "29": 10.59012,
+            "30": 10.35613,
+            "31": 10.09392,
+            "32": 10.45887,
+            "33": 10.45644,
+            "34": 10.20494,
+            "35": 10.26735,
+            "36": 10.22333,
+            "37": 10.35299,
+            "38": 10.19476,
+            "39": 10.41731,
+            "40": 10.08948,
+            "41": 10.12721,
+            "42": 10.21207,
+            "43": 9.8313,
+            "44": 9.96936,
+            "45": 9.83601,
+            "46": 9.81666,
+            "47": 10.1539,
+            "48": 9.85279,
+            "49": 9.53447,
+            "50": 9.91909,
+            "51": 9.85364,
+            "52": 9.74286,
+            "53": 10.07155,
+            "54": 9.96279,
+            "55": 9.88223,
+            "56": 9.63465,
+            "57": 9.48633,
+            "58": 9.84878,
+            "59": 9.58904,
+            "60": 9.51094,
+            "61": 9.7032,
+            "62": 9.99637,
+            "63": 9.40044,
+            "64": 9.78465,
+            "65": 8.95366,
+            "66": 9.71808,
+            "67": 9.36931,
+            "68": 9.79818,
+            "69": 9.79667,
+            "70": 9.74899,
+            "71": 9.63213,
+            "72": 9.59956,
+            "73": 9.50308,
+            "74": 8.95202,
+            "75": 9.43084,
+            "76": 9.09067,
+            "77": 10.08102,
+            "78": 9.73521,
+            "79": 9.38853,
+            "80": 9.41418,
+            "81": 9.48403,
+            "82": 9.70907,
+            "83": 9.3152,
+            "84": 9.41838,
+            "85": 9.62222,
+            "86": 9.07945,
+            "87": 9.59202,
+            "88": 9.74953,
+            "89": 9.60441,
+            "90": 9.82577,
+            "91": 9.34232,
+            "92": 9.35837,
+            "93": 9.07969,
+            "94": 8.82793,
+            "95": 9.50864,
+            "96": 9.52117,
+            "97": 9.30605,
+            "98": 9.6658,
+            "99": 8.87716,
+            "100": 9.38997
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 5488.0,
+            "2": 5704.0,
+            "3": 5788.0,
+            "4": 5853.0,
+            "5": 6401.0,
+            "6": 6686.0,
+            "7": 5949.0,
+            "8": 5811.0,
+            "9": 6280.0,
+            "10": 5192.0,
+            "11": 6645.0,
+            "12": 6193.0,
+            "13": 6525.0,
+            "14": 6487.0,
+            "15": 6258.0,
+            "16": 6261.0,
+            "17": 6080.0,
+            "18": 5901.0,
+            "19": 6228.0,
+            "20": 5713.0,
+            "21": 6265.0,
+            "22": 5788.0,
+            "23": 6618.0,
+            "24": 6159.0,
+            "25": 5674.0,
+            "26": 6218.0,
+            "27": 6180.0,
+            "28": 6802.0,
+            "29": 7006.0,
+            "30": 6195.0,
+            "31": 5847.0,
+            "32": 6680.0,
+            "33": 7327.0,
+            "34": 6433.0,
+            "35": 6593.0,
+            "36": 6717.0,
+            "37": 7545.0,
+            "38": 7130.0,
+            "39": 7928.0,
+            "40": 7233.0,
+            "41": 7093.0,
+            "42": 7653.0,
+            "43": 7136.0,
+            "44": 7113.0,
+            "45": 7167.0,
+            "46": 7435.0,
+            "47": 7501.0,
+            "48": 7648.0,
+            "49": 7520.0,
+            "50": 7701.0,
+            "51": 7847.0,
+            "52": 7828.0,
+            "53": 8765.0,
+            "54": 8799.0,
+            "55": 7683.0,
+            "56": 7972.0,
+            "57": 7642.0,
+            "58": 8419.0,
+            "59": 8276.0,
+            "60": 7917.0,
+            "61": 8598.0,
+            "62": 8394.0,
+            "63": 7896.0,
+            "64": 9047.0,
+            "65": 8280.0,
+            "66": 9315.0,
+            "67": 8277.0,
+            "68": 8341.0,
+            "69": 8737.0,
+            "70": 9764.0,
+            "71": 9050.0,
+            "72": 9036.0,
+            "73": 9076.0,
+            "74": 6969.0,
+            "75": 7833.0,
+            "76": 8450.0,
+            "77": 13505.0,
+            "78": 9634.0,
+            "79": 13982.0,
+            "80": 11548.0,
+            "81": 10035.0,
+            "82": 9732.0,
+            "83": 9037.0,
+            "84": 9522.0,
+            "85": 46479.0,
+            "86": 8626.0,
+            "87": 11964.0,
+            "88": 9637.0,
+            "89": 10273.0,
+            "90": 11256.0,
+            "91": 8811.0,
+            "92": 9218.0,
+            "93": 8281.0,
+            "94": 9390.0,
+            "95": 9376.0,
+            "96": 13248.0,
+            "97": 8945.0,
+            "98": 10682.0,
+            "99": 15485.0,
+            "100": 9101.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 628059136.0,
+            "2": 628060160.0,
+            "3": 628060160.0,
+            "4": 628060160.0,
+            "5": 628060160.0,
+            "6": 628060160.0,
+            "7": 628060160.0,
+            "8": 628060160.0,
+            "9": 628060160.0,
+            "10": 628060160.0,
+            "11": 628060160.0,
+            "12": 628060160.0,
+            "13": 628060160.0,
+            "14": 628060160.0,
+            "15": 628060160.0,
+            "16": 628060160.0,
+            "17": 628060160.0,
+            "18": 628060160.0,
+            "19": 628060160.0,
+            "20": 628060160.0,
+            "21": 628060160.0,
+            "22": 628060160.0,
+            "23": 628060160.0,
+            "24": 628060160.0,
+            "25": 628060160.0,
+            "26": 628060160.0,
+            "27": 628060160.0,
+            "28": 628060160.0,
+            "29": 628060160.0,
+            "30": 628060160.0,
+            "31": 628060160.0,
+            "32": 628060160.0,
+            "33": 628060160.0,
+            "34": 628060160.0,
+            "35": 628060160.0,
+            "36": 628060160.0,
+            "37": 628060160.0,
+            "38": 628060160.0,
+            "39": 628060160.0,
+            "40": 628060160.0,
+            "41": 628060160.0,
+            "42": 628060160.0,
+            "43": 628060160.0,
+            "44": 628060160.0,
+            "45": 628060160.0,
+            "46": 628060160.0,
+            "47": 628060160.0,
+            "48": 628060160.0,
+            "49": 628060160.0,
+            "50": 628060160.0,
+            "51": 628060160.0,
+            "52": 628060160.0,
+            "53": 628060160.0,
+            "54": 628060160.0,
+            "55": 628060160.0,
+            "56": 628060160.0,
+            "57": 628060160.0,
+            "58": 628060160.0,
+            "59": 628060160.0,
+            "60": 628060160.0,
+            "61": 628060160.0,
+            "62": 628060160.0,
+            "63": 628060160.0,
+            "64": 628060160.0,
+            "65": 628060160.0,
+            "66": 628060160.0,
+            "67": 628060160.0,
+            "68": 628060160.0,
+            "69": 628060160.0,
+            "70": 628060160.0,
+            "71": 628060160.0,
+            "72": 628060160.0,
+            "73": 628060160.0,
+            "74": 628060160.0,
+            "75": 628060160.0,
+            "76": 628060160.0,
+            "77": 628060160.0,
+            "78": 628060160.0,
+            "79": 628060160.0,
+            "80": 628060160.0,
+            "81": 628060160.0,
+            "82": 628060160.0,
+            "83": 628060160.0,
+            "84": 628060160.0,
+            "85": 628060160.0,
+            "86": 628060160.0,
+            "87": 628060160.0,
+            "88": 628060160.0,
+            "89": 628060160.0,
+            "90": 628060160.0,
+            "91": 628060160.0,
+            "92": 628060160.0,
+            "93": 628060160.0,
+            "94": 628060160.0,
+            "95": 628060160.0,
+            "96": 628060160.0,
+            "97": 628060160.0,
+            "98": 628060160.0,
+            "99": 628060160.0,
+            "100": 628060160.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 966226944.0,
+            "2": 1135178752.0,
+            "3": 1135178752.0,
+            "4": 1142154752.0,
+            "5": 1142154752.0,
+            "6": 1142154752.0,
+            "7": 1142154752.0,
+            "8": 1142154752.0,
+            "9": 1142154752.0,
+            "10": 1142154752.0,
+            "11": 1142154752.0,
+            "12": 1142154752.0,
+            "13": 1142154752.0,
+            "14": 1142154752.0,
+            "15": 1142154752.0,
+            "16": 1142154752.0,
+            "17": 1142154752.0,
+            "18": 1142154752.0,
+            "19": 1142154752.0,
+            "20": 1142154752.0,
+            "21": 1142154752.0,
+            "22": 1142154752.0,
+            "23": 1142154752.0,
+            "24": 1142154752.0,
+            "25": 1142154752.0,
+            "26": 1142154752.0,
+            "27": 1142154752.0,
+            "28": 1142154752.0,
+            "29": 1142154752.0,
+            "30": 1142154752.0,
+            "31": 1142154752.0,
+            "32": 1142154752.0,
+            "33": 1142154752.0,
+            "34": 1142154752.0,
+            "35": 1142154752.0,
+            "36": 1142154752.0,
+            "37": 1142154752.0,
+            "38": 1142154752.0,
+            "39": 1142154752.0,
+            "40": 1142154752.0,
+            "41": 1142154752.0,
+            "42": 1142154752.0,
+            "43": 1142154752.0,
+            "44": 1142154752.0,
+            "45": 1142154752.0,
+            "46": 1142154752.0,
+            "47": 1142154752.0,
+            "48": 1142154752.0,
+            "49": 1142154752.0,
+            "50": 1142154752.0,
+            "51": 1142154752.0,
+            "52": 1142154752.0,
+            "53": 1142154752.0,
+            "54": 1142154752.0,
+            "55": 1142154752.0,
+            "56": 1142154752.0,
+            "57": 1142154752.0,
+            "58": 1142154752.0,
+            "59": 1142154752.0,
+            "60": 1142154752.0,
+            "61": 1145444352.0,
+            "62": 1145444352.0,
+            "63": 1145444352.0,
+            "64": 1145444352.0,
+            "65": 1145444352.0,
+            "66": 1145444352.0,
+            "67": 1145444352.0,
+            "68": 1145444352.0,
+            "69": 1145444352.0,
+            "70": 1145444352.0,
+            "71": 1145444352.0,
+            "72": 1145444352.0,
+            "73": 1145444352.0,
+            "74": 1145444352.0,
+            "75": 1145444352.0,
+            "76": 1149560320.0,
+            "77": 1149560320.0,
+            "78": 1149560320.0,
+            "79": 1149560320.0,
+            "80": 1149560320.0,
+            "81": 1149560320.0,
+            "82": 1149560320.0,
+            "83": 1149560320.0,
+            "84": 1149560320.0,
+            "85": 1149560320.0,
+            "86": 1149560320.0,
+            "87": 1149560320.0,
+            "88": 1149560320.0,
+            "89": 1149560320.0,
+            "90": 1149560320.0,
+            "91": 1149560320.0,
+            "92": 1149560320.0,
+            "93": 1149560320.0,
+            "94": 1149560320.0,
+            "95": 1149560320.0,
+            "96": 1149560320.0,
+            "97": 1149560320.0,
+            "98": 1149560320.0,
+            "99": 1149560320.0,
+            "100": 1149560320.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 20.38736,
+            "2": 0.68138,
+            "3": 0.62881,
+            "4": 0.61692,
+            "5": 0.61365,
+            "6": 0.60735,
+            "7": 0.60006,
+            "8": 0.59897,
+            "9": 0.59763,
+            "10": 0.6122,
+            "11": 0.59106,
+            "12": 0.59749,
+            "13": 0.60001,
+            "14": 0.58446,
+            "15": 0.57929,
+            "16": 0.58508,
+            "17": 0.5725,
+            "18": 0.57386,
+            "19": 0.57617,
+            "20": 0.57081,
+            "21": 0.57614,
+            "22": 0.57046,
+            "23": 0.57731,
+            "24": 0.56893,
+            "25": 0.58004,
+            "26": 0.56911,
+            "27": 0.60575,
+            "28": 0.61474,
+            "29": 0.58874,
+            "30": 0.57969,
+            "31": 0.57737,
+            "32": 0.58556,
+            "33": 0.5704,
+            "34": 0.57592,
+            "35": 0.58241,
+            "36": 0.57697,
+            "37": 0.57978,
+            "38": 0.57647,
+            "39": 0.56977,
+            "40": 0.58017,
+            "41": 0.57153,
+            "42": 0.57267,
+            "43": 0.5881,
+            "44": 0.57211,
+            "45": 0.59552,
+            "46": 0.56308,
+            "47": 0.5736,
+            "48": 0.58403,
+            "49": 0.57693,
+            "50": 0.57016,
+            "51": 0.57233,
+            "52": 0.55871,
+            "53": 0.5593,
+            "54": 0.55755,
+            "55": 0.56057,
+            "56": 0.56649,
+            "57": 0.56057,
+            "58": 0.56658,
+            "59": 0.55825,
+            "60": 0.57038,
+            "61": 0.5563,
+            "62": 0.56031,
+            "63": 0.56901,
+            "64": 0.56097,
+            "65": 0.56153,
+            "66": 0.56761,
+            "67": 0.5785,
+            "68": 0.57341,
+            "69": 0.57139,
+            "70": 0.56231,
+            "71": 0.55874,
+            "72": 0.55834,
+            "73": 0.55824,
+            "74": 0.5552,
+            "75": 0.5593,
+            "76": 0.56038,
+            "77": 0.56527,
+            "78": 0.56728,
+            "79": 0.56424,
+            "80": 0.55564,
+            "81": 0.55955,
+            "82": 0.55867,
+            "83": 0.56254,
+            "84": 0.55754,
+            "85": 0.55409,
+            "86": 0.55901,
+            "87": 0.55904,
+            "88": 0.57097,
+            "89": 0.5735,
+            "90": 0.55808,
+            "91": 0.55819,
+            "92": 0.58224,
+            "93": 0.55845,
+            "94": 0.56512,
+            "95": 0.5709,
+            "96": 0.56099,
+            "97": 0.56779,
+            "98": 0.55446,
+            "99": 0.56053,
+            "100": 0.56338
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
index 2f5cb0af999..73cf979651d 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
@@ -2,141 +2,536 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 10.81565,
-            "5": 10.83826,
-            "10": 10.79021,
-            "15": 10.80531,
-            "20": 10.74643,
-            "25": 10.57512,
-            "30": 10.44697,
-            "35": 10.33173,
-            "40": 10.19856,
-            "45": 9.94354,
-            "50": 10.00316,
-            "55": 9.96304,
-            "60": 9.60428,
-            "65": 9.02427,
-            "70": 9.81034,
-            "75": 9.50548,
-            "80": 9.46755,
-            "85": 9.67934,
-            "90": 9.85571,
-            "95": 9.56508,
-            "100": 9.45426
+            "2": 10.81048,
+            "3": 10.81233,
+            "4": 10.79117,
+            "5": 10.83746,
+            "6": 10.85118,
+            "7": 10.82091,
+            "8": 10.82093,
+            "9": 10.8306,
+            "10": 10.78973,
+            "11": 10.86282,
+            "12": 10.84288,
+            "13": 10.85757,
+            "14": 10.86228,
+            "15": 10.80658,
+            "16": 10.80321,
+            "17": 10.77911,
+            "18": 10.80744,
+            "19": 10.79401,
+            "20": 10.7468,
+            "21": 10.72178,
+            "22": 10.58777,
+            "23": 10.72976,
+            "24": 10.63294,
+            "25": 10.57502,
+            "26": 10.63703,
+            "27": 10.65005,
+            "28": 10.63549,
+            "29": 10.64376,
+            "30": 10.44681,
+            "31": 10.1944,
+            "32": 10.52431,
+            "33": 10.51785,
+            "34": 10.28836,
+            "35": 10.33178,
+            "36": 10.31279,
+            "37": 10.42677,
+            "38": 10.27938,
+            "39": 10.47551,
+            "40": 10.19739,
+            "41": 10.21538,
+            "42": 10.28746,
+            "43": 9.94274,
+            "44": 10.05688,
+            "45": 9.94329,
+            "46": 9.90894,
+            "47": 10.21235,
+            "48": 9.95052,
+            "49": 9.63658,
+            "50": 10.00313,
+            "51": 9.92286,
+            "52": 9.82764,
+            "53": 10.14637,
+            "54": 10.0431,
+            "55": 9.9628,
+            "56": 9.70471,
+            "57": 9.58557,
+            "58": 9.91688,
+            "59": 9.66027,
+            "60": 9.60417,
+            "61": 9.77863,
+            "62": 10.06255,
+            "63": 9.47237,
+            "64": 9.85394,
+            "65": 9.02479,
+            "66": 9.79388,
+            "67": 9.43332,
+            "68": 9.85348,
+            "69": 9.84692,
+            "70": 9.81038,
+            "71": 9.68427,
+            "72": 9.6602,
+            "73": 9.57277,
+            "74": 9.05997,
+            "75": 9.50545,
+            "76": 9.17937,
+            "77": 10.12733,
+            "78": 9.77455,
+            "79": 9.44211,
+            "80": 9.46753,
+            "81": 9.53839,
+            "82": 9.75754,
+            "83": 9.38711,
+            "84": 9.46669,
+            "85": 9.67912,
+            "86": 9.13537,
+            "87": 9.63456,
+            "88": 9.80822,
+            "89": 9.67886,
+            "90": 9.8558,
+            "91": 9.41297,
+            "92": 9.41787,
+            "93": 9.15369,
+            "94": 8.90217,
+            "95": 9.56536,
+            "96": 9.58437,
+            "97": 9.35832,
+            "98": 9.73042,
+            "99": 8.9586,
+            "100": 9.454
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 30837.0,
-            "5": 36033.0,
-            "10": 29790.0,
-            "15": 34550.0,
-            "20": 32683.0,
-            "25": 30957.0,
-            "30": 32603.0,
-            "35": 34043.0,
-            "40": 35657.0,
-            "45": 35490.0,
-            "50": 38984.0,
-            "55": 36972.0,
-            "60": 39721.0,
-            "65": 40930.0,
-            "70": 45588.0,
-            "75": 38781.0,
-            "80": 46737.0,
-            "85": 49087.0,
-            "90": 49441.0,
-            "95": 46735.0,
-            "100": 43962.0
+            "1": 31083.0,
+            "2": 32874.0,
+            "3": 33614.0,
+            "4": 30796.0,
+            "5": 35950.0,
+            "6": 37383.0,
+            "7": 35302.0,
+            "8": 31308.0,
+            "9": 34522.0,
+            "10": 29757.0,
+            "11": 38942.0,
+            "12": 34991.0,
+            "13": 37045.0,
+            "14": 37494.0,
+            "15": 34692.0,
+            "16": 36080.0,
+            "17": 35060.0,
+            "18": 34989.0,
+            "19": 36144.0,
+            "20": 32462.0,
+            "21": 33369.0,
+            "22": 29795.0,
+            "23": 37622.0,
+            "24": 32511.0,
+            "25": 31055.0,
+            "26": 34301.0,
+            "27": 36030.0,
+            "28": 36741.0,
+            "29": 38257.0,
+            "30": 32928.0,
+            "31": 30048.0,
+            "32": 36406.0,
+            "33": 37595.0,
+            "34": 32918.0,
+            "35": 33986.0,
+            "36": 35154.0,
+            "37": 37803.0,
+            "38": 35542.0,
+            "39": 39006.0,
+            "40": 35753.0,
+            "41": 35748.0,
+            "42": 37390.0,
+            "43": 34087.0,
+            "44": 33554.0,
+            "45": 35464.0,
+            "46": 37091.0,
+            "47": 40542.0,
+            "48": 36522.0,
+            "49": 36534.0,
+            "50": 38785.0,
+            "51": 37126.0,
+            "52": 36939.0,
+            "53": 41763.0,
+            "54": 41138.0,
+            "55": 37048.0,
+            "56": 40483.0,
+            "57": 36998.0,
+            "58": 41877.0,
+            "59": 39208.0,
+            "60": 40087.0,
+            "61": 40325.0,
+            "62": 44268.0,
+            "63": 38629.0,
+            "64": 43656.0,
+            "65": 40940.0,
+            "66": 44302.0,
+            "67": 40075.0,
+            "68": 40632.0,
+            "69": 40527.0,
+            "70": 45260.0,
+            "71": 41111.0,
+            "72": 40161.0,
+            "73": 44972.0,
+            "74": 34095.0,
+            "75": 38490.0,
+            "76": 46162.0,
+            "77": 46055.0,
+            "78": 46750.0,
+            "79": 47560.0,
+            "80": 46440.0,
+            "81": 49629.0,
+            "82": 49227.0,
+            "83": 44834.0,
+            "84": 45877.0,
+            "85": 49064.0,
+            "86": 45232.0,
+            "87": 49124.0,
+            "88": 46347.0,
+            "89": 48837.0,
+            "90": 49499.0,
+            "91": 44289.0,
+            "92": 47277.0,
+            "93": 46847.0,
+            "94": 46311.0,
+            "95": 47245.0,
+            "96": 50336.0,
+            "97": 47016.0,
+            "98": 49606.0,
+            "99": 47799.0,
+            "100": 43700.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 1016564224.0,
-            "5": 1016564224.0,
-            "10": 1016563712.0,
+            "2": 1016563712.0,
+            "3": 1016564224.0,
+            "4": 1016563712.0,
+            "5": 1016564736.0,
+            "6": 1016565248.0,
+            "7": 1016564736.0,
+            "8": 1016565248.0,
+            "9": 1016562688.0,
+            "10": 1016564736.0,
+            "11": 1016562176.0,
+            "12": 1016564224.0,
+            "13": 1016563200.0,
+            "14": 1016563712.0,
             "15": 1016564736.0,
-            "20": 1016563200.0,
-            "25": 1016564736.0,
+            "16": 1016562688.0,
+            "17": 1016565248.0,
+            "18": 1016564736.0,
+            "19": 1016563200.0,
+            "20": 1016563712.0,
+            "21": 1016564224.0,
+            "22": 1016564736.0,
+            "23": 1016564736.0,
+            "24": 1016563200.0,
+            "25": 1016565248.0,
+            "26": 1016562176.0,
+            "27": 1016562688.0,
+            "28": 1016562176.0,
+            "29": 1016562688.0,
             "30": 1016566784.0,
+            "31": 1016569344.0,
+            "32": 1016565248.0,
+            "33": 1016564736.0,
+            "34": 1016565248.0,
             "35": 1016565248.0,
-            "40": 1016564224.0,
-            "45": 1016565760.0,
-            "50": 1016565760.0,
-            "55": 1016569856.0,
-            "60": 1017439232.0,
+            "36": 1016565760.0,
+            "37": 1016564736.0,
+            "38": 1016564224.0,
+            "39": 1016562688.0,
+            "40": 1016945152.0,
+            "41": 1016567808.0,
+            "42": 1016564224.0,
+            "43": 1016568320.0,
+            "44": 1016565760.0,
+            "45": 1016565248.0,
+            "46": 1016569344.0,
+            "47": 1016564224.0,
+            "48": 1016569856.0,
+            "49": 1017010688.0,
+            "50": 1016567296.0,
+            "51": 1016566272.0,
+            "52": 1016575488.0,
+            "53": 1016568320.0,
+            "54": 1016567296.0,
+            "55": 1016569344.0,
+            "56": 1016565248.0,
+            "57": 1016575488.0,
+            "58": 1016569856.0,
+            "59": 1016574976.0,
+            "60": 1016571392.0,
+            "61": 1016567808.0,
+            "62": 1016566272.0,
+            "63": 1016576512.0,
+            "64": 1016572416.0,
             "65": 1016584192.0,
+            "66": 1016569344.0,
+            "67": 1016570368.0,
+            "68": 1016566272.0,
+            "69": 1016570880.0,
             "70": 1016569344.0,
+            "71": 1016566784.0,
+            "72": 1016915968.0,
+            "73": 1016572928.0,
+            "74": 1016577536.0,
             "75": 1016567296.0,
-            "80": 1016572416.0,
-            "85": 1016575488.0,
-            "90": 1016569344.0,
-            "95": 1016568320.0,
-            "100": 1016573440.0
+            "76": 1016565760.0,
+            "77": 1016567296.0,
+            "78": 1016572928.0,
+            "79": 1016569344.0,
+            "80": 1016572928.0,
+            "81": 1016569856.0,
+            "82": 1016572416.0,
+            "83": 1016568832.0,
+            "84": 1016573440.0,
+            "85": 1016574976.0,
+            "86": 1016574976.0,
+            "87": 1016568832.0,
+            "88": 1016571904.0,
+            "89": 1016578048.0,
+            "90": 1016568832.0,
+            "91": 1016566784.0,
+            "92": 1016566784.0,
+            "93": 1016570368.0,
+            "94": 1016571904.0,
+            "95": 1016567808.0,
+            "96": 1016566784.0,
+            "97": 1016573440.0,
+            "98": 1016566272.0,
+            "99": 1016578048.0,
+            "100": 1016573952.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 2560655872.0,
-            "5": 2828328960.0,
-            "10": 2829020160.0,
-            "15": 2829965312.0,
-            "20": 2830715392.0,
-            "25": 2830928896.0,
-            "30": 2831643648.0,
-            "35": 2836671488.0,
-            "40": 2836671488.0,
-            "45": 2836671488.0,
-            "50": 2838087680.0,
-            "55": 2843240960.0,
-            "60": 2844131328.0,
-            "65": 2859044864.0,
-            "70": 2859044864.0,
-            "75": 2859044864.0,
-            "80": 2859044864.0,
-            "85": 2859044864.0,
-            "90": 2859044864.0,
-            "95": 2859044864.0,
-            "100": 2859044864.0
+            "2": 2827037696.0,
+            "3": 2827377152.0,
+            "4": 2827377152.0,
+            "5": 2827506688.0,
+            "6": 2827618816.0,
+            "7": 2828691456.0,
+            "8": 2828691456.0,
+            "9": 2828691456.0,
+            "10": 2828691456.0,
+            "11": 2828691456.0,
+            "12": 2828691456.0,
+            "13": 2828691456.0,
+            "14": 2828691456.0,
+            "15": 2829756416.0,
+            "16": 2829756416.0,
+            "17": 2830923264.0,
+            "18": 2830923264.0,
+            "19": 2830923264.0,
+            "20": 2830923264.0,
+            "21": 2830923264.0,
+            "22": 2830923264.0,
+            "23": 2830923264.0,
+            "24": 2830923264.0,
+            "25": 2830923264.0,
+            "26": 2830923264.0,
+            "27": 2830923264.0,
+            "28": 2830923264.0,
+            "29": 2830923264.0,
+            "30": 2833604608.0,
+            "31": 2833604608.0,
+            "32": 2833604608.0,
+            "33": 2833604608.0,
+            "34": 2833604608.0,
+            "35": 2833604608.0,
+            "36": 2833604608.0,
+            "37": 2833604608.0,
+            "38": 2833604608.0,
+            "39": 2833604608.0,
+            "40": 2833604608.0,
+            "41": 2835652608.0,
+            "42": 2835652608.0,
+            "43": 2835652608.0,
+            "44": 2835652608.0,
+            "45": 2835652608.0,
+            "46": 2836792832.0,
+            "47": 2836792832.0,
+            "48": 2837318656.0,
+            "49": 2837318656.0,
+            "50": 2837318656.0,
+            "51": 2837318656.0,
+            "52": 2841922048.0,
+            "53": 2841922048.0,
+            "54": 2841922048.0,
+            "55": 2841922048.0,
+            "56": 2844188672.0,
+            "57": 2847232512.0,
+            "58": 2847232512.0,
+            "59": 2847232512.0,
+            "60": 2847232512.0,
+            "61": 2847232512.0,
+            "62": 2847232512.0,
+            "63": 2847301120.0,
+            "64": 2847301120.0,
+            "65": 2858460160.0,
+            "66": 2858460160.0,
+            "67": 2858460160.0,
+            "68": 2858460160.0,
+            "69": 2858460160.0,
+            "70": 2858460160.0,
+            "71": 2858460160.0,
+            "72": 2858460160.0,
+            "73": 2858460160.0,
+            "74": 2858460160.0,
+            "75": 2858460160.0,
+            "76": 2858460160.0,
+            "77": 2858460160.0,
+            "78": 2858460160.0,
+            "79": 2858460160.0,
+            "80": 2858460160.0,
+            "81": 2858460160.0,
+            "82": 2858460160.0,
+            "83": 2858460160.0,
+            "84": 2858460160.0,
+            "85": 2858460160.0,
+            "86": 2858460160.0,
+            "87": 2858460160.0,
+            "88": 2858460160.0,
+            "89": 2858460160.0,
+            "90": 2858460160.0,
+            "91": 2858460160.0,
+            "92": 2858460160.0,
+            "93": 2858460160.0,
+            "94": 2858460160.0,
+            "95": 2858460160.0,
+            "96": 2858460160.0,
+            "97": 2858460160.0,
+            "98": 2858460160.0,
+            "99": 2858460160.0,
+            "100": 2858460160.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 18.86394,
-            "5": 0.16112,
-            "10": 0.15425,
-            "15": 0.15762,
-            "20": 0.14093,
-            "25": 0.14225,
-            "30": 0.14726,
-            "35": 0.14414,
-            "40": 0.15356,
-            "45": 0.14839,
-            "50": 0.15508,
-            "55": 0.15077,
-            "60": 0.17983,
-            "65": 0.2249,
-            "70": 0.15318,
-            "75": 0.15837,
-            "80": 0.17114,
-            "85": 0.14811,
-            "90": 0.14827,
-            "95": 0.15176,
-            "100": 0.14608
+            "1": 17.04363,
+            "2": 0.27177,
+            "3": 0.19697,
+            "4": 0.20207,
+            "5": 0.17488,
+            "6": 0.1736,
+            "7": 0.18134,
+            "8": 0.17934,
+            "9": 0.17175,
+            "10": 0.16904,
+            "11": 0.17256,
+            "12": 0.16161,
+            "13": 0.166,
+            "14": 0.16567,
+            "15": 0.18106,
+            "16": 0.16499,
+            "17": 0.17792,
+            "18": 0.16846,
+            "19": 0.16132,
+            "20": 0.16075,
+            "21": 0.163,
+            "22": 0.17697,
+            "23": 0.16348,
+            "24": 0.16046,
+            "25": 0.16003,
+            "26": 0.16209,
+            "27": 0.16858,
+            "28": 0.16512,
+            "29": 0.15718,
+            "30": 0.17279,
+            "31": 0.20344,
+            "32": 0.17311,
+            "33": 0.1614,
+            "34": 0.18789,
+            "35": 0.16679,
+            "36": 0.16768,
+            "37": 0.15911,
+            "38": 0.16709,
+            "39": 0.16032,
+            "40": 0.18009,
+            "41": 0.16959,
+            "42": 0.16653,
+            "43": 0.17964,
+            "44": 0.1656,
+            "45": 0.16422,
+            "46": 0.18029,
+            "47": 0.16168,
+            "48": 0.19024,
+            "49": 0.22183,
+            "50": 0.16427,
+            "51": 0.17603,
+            "52": 0.17568,
+            "53": 0.16571,
+            "54": 0.16402,
+            "55": 0.17797,
+            "56": 0.22204,
+            "57": 0.17949,
+            "58": 0.1779,
+            "59": 0.18785,
+            "60": 0.1904,
+            "61": 0.1671,
+            "62": 0.17396,
+            "63": 0.17822,
+            "64": 0.17482,
+            "65": 0.24849,
+            "66": 0.17181,
+            "67": 0.23022,
+            "68": 0.19374,
+            "69": 0.17091,
+            "70": 0.17566,
+            "71": 0.19661,
+            "72": 0.17367,
+            "73": 0.21284,
+            "74": 0.19024,
+            "75": 0.18071,
+            "76": 0.20274,
+            "77": 0.17462,
+            "78": 0.18216,
+            "79": 0.18476,
+            "80": 0.18669,
+            "81": 0.17032,
+            "82": 0.16285,
+            "83": 0.17256,
+            "84": 0.19021,
+            "85": 0.16572,
+            "86": 0.20934,
+            "87": 0.17261,
+            "88": 0.16413,
+            "89": 0.17944,
+            "90": 0.1661,
+            "91": 0.19779,
+            "92": 0.17507,
+            "93": 0.18998,
+            "94": 0.20674,
+            "95": 0.16927,
+            "96": 0.16793,
+            "97": 0.17702,
+            "98": 0.16074,
+            "99": 0.17652,
+            "100": 0.17041
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..dca66d633f5
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.81565,
+            "2": 10.81048,
+            "3": 10.81268,
+            "4": 10.79108,
+            "5": 10.83781,
+            "6": 10.85065,
+            "7": 10.82134,
+            "8": 10.8202,
+            "9": 10.83075,
+            "10": 10.79026,
+            "11": 10.86297,
+            "12": 10.84282,
+            "13": 10.85729,
+            "14": 10.86207,
+            "15": 10.80535,
+            "16": 10.80362,
+            "17": 10.77916,
+            "18": 10.80764,
+            "19": 10.79451,
+            "20": 10.74621,
+            "21": 10.72181,
+            "22": 10.58717,
+            "23": 10.72927,
+            "24": 10.63248,
+            "25": 10.57614,
+            "26": 10.63793,
+            "27": 10.64955,
+            "28": 10.63533,
+            "29": 10.64332,
+            "30": 10.44626,
+            "31": 10.19362,
+            "32": 10.52448,
+            "33": 10.51821,
+            "34": 10.28825,
+            "35": 10.33113,
+            "36": 10.31229,
+            "37": 10.42674,
+            "38": 10.279,
+            "39": 10.47591,
+            "40": 10.19781,
+            "41": 10.21483,
+            "42": 10.28721,
+            "43": 9.94225,
+            "44": 10.05777,
+            "45": 9.9434,
+            "46": 9.90939,
+            "47": 10.21227,
+            "48": 9.95,
+            "49": 9.63638,
+            "50": 10.00366,
+            "51": 9.92331,
+            "52": 9.8284,
+            "53": 10.14655,
+            "54": 10.04302,
+            "55": 9.9627,
+            "56": 9.70496,
+            "57": 9.58521,
+            "58": 9.91705,
+            "59": 9.66061,
+            "60": 9.60423,
+            "61": 9.77841,
+            "62": 10.06213,
+            "63": 9.47178,
+            "64": 9.85438,
+            "65": 9.02476,
+            "66": 9.79406,
+            "67": 9.43345,
+            "68": 9.8534,
+            "69": 9.847,
+            "70": 9.81051,
+            "71": 9.68406,
+            "72": 9.6601,
+            "73": 9.57296,
+            "74": 9.0603,
+            "75": 9.50552,
+            "76": 9.17947,
+            "77": 10.12779,
+            "78": 9.77444,
+            "79": 9.44215,
+            "80": 9.46725,
+            "81": 9.53865,
+            "82": 9.75696,
+            "83": 9.3874,
+            "84": 9.46663,
+            "85": 9.67947,
+            "86": 9.13533,
+            "87": 9.63433,
+            "88": 9.80834,
+            "89": 9.67888,
+            "90": 9.85563,
+            "91": 9.41308,
+            "92": 9.41812,
+            "93": 9.15371,
+            "94": 8.90222,
+            "95": 9.56497,
+            "96": 9.58428,
+            "97": 9.35825,
+            "98": 9.72999,
+            "99": 8.95886,
+            "100": 9.45414
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 30853.0,
+            "2": 33000.0,
+            "3": 33775.0,
+            "4": 30857.0,
+            "5": 35956.0,
+            "6": 37573.0,
+            "7": 35446.0,
+            "8": 31027.0,
+            "9": 34894.0,
+            "10": 29923.0,
+            "11": 38736.0,
+            "12": 35245.0,
+            "13": 36983.0,
+            "14": 38078.0,
+            "15": 34560.0,
+            "16": 36096.0,
+            "17": 34585.0,
+            "18": 34936.0,
+            "19": 36301.0,
+            "20": 32788.0,
+            "21": 33385.0,
+            "22": 29942.0,
+            "23": 37625.0,
+            "24": 32018.0,
+            "25": 31043.0,
+            "26": 34310.0,
+            "27": 35942.0,
+            "28": 37348.0,
+            "29": 38027.0,
+            "30": 32865.0,
+            "31": 30072.0,
+            "32": 36198.0,
+            "33": 37604.0,
+            "34": 32768.0,
+            "35": 34129.0,
+            "36": 34811.0,
+            "37": 37917.0,
+            "38": 35861.0,
+            "39": 38592.0,
+            "40": 35652.0,
+            "41": 35428.0,
+            "42": 37701.0,
+            "43": 33967.0,
+            "44": 33425.0,
+            "45": 35778.0,
+            "46": 37279.0,
+            "47": 40356.0,
+            "48": 36144.0,
+            "49": 36492.0,
+            "50": 39148.0,
+            "51": 37394.0,
+            "52": 36918.0,
+            "53": 41574.0,
+            "54": 40654.0,
+            "55": 37274.0,
+            "56": 40316.0,
+            "57": 36713.0,
+            "58": 42042.0,
+            "59": 39264.0,
+            "60": 39816.0,
+            "61": 40579.0,
+            "62": 44097.0,
+            "63": 38397.0,
+            "64": 43253.0,
+            "65": 40953.0,
+            "66": 44326.0,
+            "67": 40344.0,
+            "68": 40398.0,
+            "69": 40614.0,
+            "70": 45248.0,
+            "71": 41445.0,
+            "72": 39901.0,
+            "73": 44369.0,
+            "74": 33925.0,
+            "75": 38833.0,
+            "76": 46358.0,
+            "77": 46064.0,
+            "78": 46904.0,
+            "79": 47560.0,
+            "80": 46979.0,
+            "81": 50283.0,
+            "82": 49634.0,
+            "83": 45153.0,
+            "84": 45874.0,
+            "85": 49161.0,
+            "86": 45106.0,
+            "87": 49057.0,
+            "88": 46592.0,
+            "89": 48712.0,
+            "90": 49552.0,
+            "91": 43836.0,
+            "92": 47360.0,
+            "93": 46675.0,
+            "94": 46653.0,
+            "95": 46726.0,
+            "96": 50152.0,
+            "97": 47102.0,
+            "98": 50317.0,
+            "99": 48088.0,
+            "100": 43362.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1016564224.0,
+            "2": 1016563712.0,
+            "3": 1016564224.0,
+            "4": 1016563200.0,
+            "5": 1016564736.0,
+            "6": 1016565248.0,
+            "7": 1016563712.0,
+            "8": 1016565248.0,
+            "9": 1016562688.0,
+            "10": 1016564736.0,
+            "11": 1016562688.0,
+            "12": 1016564224.0,
+            "13": 1016563200.0,
+            "14": 1016563712.0,
+            "15": 1017374720.0,
+            "16": 1016562176.0,
+            "17": 1016565248.0,
+            "18": 1016566272.0,
+            "19": 1016563712.0,
+            "20": 1016564224.0,
+            "21": 1016564224.0,
+            "22": 1016566272.0,
+            "23": 1016563712.0,
+            "24": 1016563200.0,
+            "25": 1016565248.0,
+            "26": 1016833024.0,
+            "27": 1016562688.0,
+            "28": 1016562176.0,
+            "29": 1016562688.0,
+            "30": 1016565760.0,
+            "31": 1016568832.0,
+            "32": 1016565248.0,
+            "33": 1016564736.0,
+            "34": 1016564736.0,
+            "35": 1016565248.0,
+            "36": 1016901120.0,
+            "37": 1016564736.0,
+            "38": 1016564224.0,
+            "39": 1016562688.0,
+            "40": 1016563712.0,
+            "41": 1016567296.0,
+            "42": 1016564736.0,
+            "43": 1016567808.0,
+            "44": 1016564736.0,
+            "45": 1016565760.0,
+            "46": 1016569856.0,
+            "47": 1016564224.0,
+            "48": 1016569856.0,
+            "49": 1016568320.0,
+            "50": 1017070592.0,
+            "51": 1016566272.0,
+            "52": 1016575488.0,
+            "53": 1016567808.0,
+            "54": 1016976896.0,
+            "55": 1016569856.0,
+            "56": 1016565248.0,
+            "57": 1016574976.0,
+            "58": 1017060352.0,
+            "59": 1016573952.0,
+            "60": 1016571904.0,
+            "61": 1016568320.0,
+            "62": 1016566784.0,
+            "63": 1016576512.0,
+            "64": 1016572416.0,
+            "65": 1016584192.0,
+            "66": 1016568832.0,
+            "67": 1016570368.0,
+            "68": 1016566272.0,
+            "69": 1016570880.0,
+            "70": 1016937984.0,
+            "71": 1016567296.0,
+            "72": 1016571904.0,
+            "73": 1016572416.0,
+            "74": 1016577024.0,
+            "75": 1016567296.0,
+            "76": 1016565248.0,
+            "77": 1016566272.0,
+            "78": 1016572928.0,
+            "79": 1016568320.0,
+            "80": 1016572416.0,
+            "81": 1016570368.0,
+            "82": 1016571392.0,
+            "83": 1016568320.0,
+            "84": 1016573440.0,
+            "85": 1016574976.0,
+            "86": 1016574976.0,
+            "87": 1016567808.0,
+            "88": 1016570880.0,
+            "89": 1016577024.0,
+            "90": 1016568320.0,
+            "91": 1016566784.0,
+            "92": 1016567808.0,
+            "93": 1016569856.0,
+            "94": 1016571904.0,
+            "95": 1016568320.0,
+            "96": 1016718336.0,
+            "97": 1016573440.0,
+            "98": 1016565248.0,
+            "99": 1016578560.0,
+            "100": 1016574464.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2560655872.0,
+            "2": 2827037696.0,
+            "3": 2827638272.0,
+            "4": 2827638272.0,
+            "5": 2827638272.0,
+            "6": 2828292608.0,
+            "7": 2829339648.0,
+            "8": 2829339648.0,
+            "9": 2829339648.0,
+            "10": 2831441920.0,
+            "11": 2831441920.0,
+            "12": 2831441920.0,
+            "13": 2831441920.0,
+            "14": 2831441920.0,
+            "15": 2831441920.0,
+            "16": 2831441920.0,
+            "17": 2831441920.0,
+            "18": 2831441920.0,
+            "19": 2831441920.0,
+            "20": 2831441920.0,
+            "21": 2831441920.0,
+            "22": 2831441920.0,
+            "23": 2831441920.0,
+            "24": 2831441920.0,
+            "25": 2831441920.0,
+            "26": 2831441920.0,
+            "27": 2831441920.0,
+            "28": 2831441920.0,
+            "29": 2831441920.0,
+            "30": 2831441920.0,
+            "31": 2836701184.0,
+            "32": 2836701184.0,
+            "33": 2836701184.0,
+            "34": 2836701184.0,
+            "35": 2836701184.0,
+            "36": 2836701184.0,
+            "37": 2836701184.0,
+            "38": 2836701184.0,
+            "39": 2836701184.0,
+            "40": 2836701184.0,
+            "41": 2836701184.0,
+            "42": 2836701184.0,
+            "43": 2836701184.0,
+            "44": 2836701184.0,
+            "45": 2836701184.0,
+            "46": 2836701184.0,
+            "47": 2836701184.0,
+            "48": 2836701184.0,
+            "49": 2836701184.0,
+            "50": 2836701184.0,
+            "51": 2836701184.0,
+            "52": 2842246656.0,
+            "53": 2842246656.0,
+            "54": 2842246656.0,
+            "55": 2842246656.0,
+            "56": 2843695104.0,
+            "57": 2848199680.0,
+            "58": 2848199680.0,
+            "59": 2848199680.0,
+            "60": 2848199680.0,
+            "61": 2848199680.0,
+            "62": 2848199680.0,
+            "63": 2848199680.0,
+            "64": 2848199680.0,
+            "65": 2859411456.0,
+            "66": 2859411456.0,
+            "67": 2859411456.0,
+            "68": 2859411456.0,
+            "69": 2859411456.0,
+            "70": 2859411456.0,
+            "71": 2859411456.0,
+            "72": 2859411456.0,
+            "73": 2859411456.0,
+            "74": 2859411456.0,
+            "75": 2859411456.0,
+            "76": 2859411456.0,
+            "77": 2859411456.0,
+            "78": 2859411456.0,
+            "79": 2859411456.0,
+            "80": 2859411456.0,
+            "81": 2859411456.0,
+            "82": 2859411456.0,
+            "83": 2859411456.0,
+            "84": 2859411456.0,
+            "85": 2859411456.0,
+            "86": 2859411456.0,
+            "87": 2859411456.0,
+            "88": 2859411456.0,
+            "89": 2859411456.0,
+            "90": 2859411456.0,
+            "91": 2859411456.0,
+            "92": 2859411456.0,
+            "93": 2859411456.0,
+            "94": 2859411456.0,
+            "95": 2859411456.0,
+            "96": 2859411456.0,
+            "97": 2859411456.0,
+            "98": 2859411456.0,
+            "99": 2859411456.0,
+            "100": 2859411456.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 18.49276,
+            "2": 0.25843,
+            "3": 0.17872,
+            "4": 0.17622,
+            "5": 0.16425,
+            "6": 0.15462,
+            "7": 0.16221,
+            "8": 0.15923,
+            "9": 0.1611,
+            "10": 0.1478,
+            "11": 0.15494,
+            "12": 0.14547,
+            "13": 0.14411,
+            "14": 0.14989,
+            "15": 0.16302,
+            "16": 0.14821,
+            "17": 0.16657,
+            "18": 0.14513,
+            "19": 0.15296,
+            "20": 0.14437,
+            "21": 0.14735,
+            "22": 0.17451,
+            "23": 0.16059,
+            "24": 0.152,
+            "25": 0.15395,
+            "26": 0.15115,
+            "27": 0.15887,
+            "28": 0.15234,
+            "29": 0.1421,
+            "30": 0.15091,
+            "31": 0.18973,
+            "32": 0.14778,
+            "33": 0.14785,
+            "34": 0.1727,
+            "35": 0.15646,
+            "36": 0.16437,
+            "37": 0.1441,
+            "38": 0.15823,
+            "39": 0.14495,
+            "40": 0.16334,
+            "41": 0.14314,
+            "42": 0.14405,
+            "43": 0.15348,
+            "44": 0.14397,
+            "45": 0.15389,
+            "46": 0.17277,
+            "47": 0.14442,
+            "48": 0.16289,
+            "49": 0.21224,
+            "50": 0.14457,
+            "51": 0.17927,
+            "52": 0.15446,
+            "53": 0.14459,
+            "54": 0.14896,
+            "55": 0.1558,
+            "56": 0.2105,
+            "57": 0.17156,
+            "58": 0.146,
+            "59": 0.15771,
+            "60": 0.162,
+            "61": 0.14241,
+            "62": 0.14184,
+            "63": 0.15693,
+            "64": 0.16199,
+            "65": 0.22761,
+            "66": 0.14583,
+            "67": 0.22988,
+            "68": 0.15495,
+            "69": 0.15509,
+            "70": 0.15156,
+            "71": 0.17782,
+            "72": 0.15675,
+            "73": 0.18088,
+            "74": 0.17013,
+            "75": 0.16039,
+            "76": 0.17974,
+            "77": 0.13903,
+            "78": 0.15719,
+            "79": 0.1635,
+            "80": 0.17904,
+            "81": 0.14997,
+            "82": 0.15986,
+            "83": 0.1669,
+            "84": 0.17349,
+            "85": 0.14723,
+            "86": 0.19019,
+            "87": 0.15235,
+            "88": 0.14689,
+            "89": 0.16952,
+            "90": 0.1487,
+            "91": 0.1826,
+            "92": 0.15727,
+            "93": 0.17286,
+            "94": 0.18554,
+            "95": 0.14872,
+            "96": 0.14426,
+            "97": 0.15953,
+            "98": 0.14361,
+            "99": 0.15897,
+            "100": 0.14814
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..d869313b50f
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.81565,
+            "2": 10.81048,
+            "3": 10.8127,
+            "4": 10.79089,
+            "5": 10.83784,
+            "6": 10.85116,
+            "7": 10.82036,
+            "8": 10.82117,
+            "9": 10.83043,
+            "10": 10.78955,
+            "11": 10.86357,
+            "12": 10.84268,
+            "13": 10.85799,
+            "14": 10.86268,
+            "15": 10.80594,
+            "16": 10.80356,
+            "17": 10.77851,
+            "18": 10.80762,
+            "19": 10.79465,
+            "20": 10.747,
+            "21": 10.72249,
+            "22": 10.58742,
+            "23": 10.72933,
+            "24": 10.63238,
+            "25": 10.575,
+            "26": 10.638,
+            "27": 10.64966,
+            "28": 10.63496,
+            "29": 10.64307,
+            "30": 10.44635,
+            "31": 10.19441,
+            "32": 10.52449,
+            "33": 10.51815,
+            "34": 10.28843,
+            "35": 10.33138,
+            "36": 10.3123,
+            "37": 10.4265,
+            "38": 10.27866,
+            "39": 10.47612,
+            "40": 10.19821,
+            "41": 10.21536,
+            "42": 10.28769,
+            "43": 9.94235,
+            "44": 10.05775,
+            "45": 9.94354,
+            "46": 9.90902,
+            "47": 10.21214,
+            "48": 9.94982,
+            "49": 9.63605,
+            "50": 10.00335,
+            "51": 9.92304,
+            "52": 9.82779,
+            "53": 10.14656,
+            "54": 10.04338,
+            "55": 9.96311,
+            "56": 9.70508,
+            "57": 9.58542,
+            "58": 9.91687,
+            "59": 9.66061,
+            "60": 9.60393,
+            "61": 9.77855,
+            "62": 10.0624,
+            "63": 9.47205,
+            "64": 9.85428,
+            "65": 9.02467,
+            "66": 9.79454,
+            "67": 9.43333,
+            "68": 9.85327,
+            "69": 9.847,
+            "70": 9.81072,
+            "71": 9.684,
+            "72": 9.66023,
+            "73": 9.57314,
+            "74": 9.05973,
+            "75": 9.50551,
+            "76": 9.17942,
+            "77": 10.12761,
+            "78": 9.77438,
+            "79": 9.44209,
+            "80": 9.46747,
+            "81": 9.53873,
+            "82": 9.75725,
+            "83": 9.38702,
+            "84": 9.46662,
+            "85": 9.67918,
+            "86": 9.13556,
+            "87": 9.63426,
+            "88": 9.80794,
+            "89": 9.67925,
+            "90": 9.85561,
+            "91": 9.41267,
+            "92": 9.41773,
+            "93": 9.15396,
+            "94": 8.90227,
+            "95": 9.56526,
+            "96": 9.58425,
+            "97": 9.35836,
+            "98": 9.7302,
+            "99": 8.95917,
+            "100": 9.45408
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 30991.0,
+            "2": 32927.0,
+            "3": 33481.0,
+            "4": 30866.0,
+            "5": 36255.0,
+            "6": 37186.0,
+            "7": 35644.0,
+            "8": 31356.0,
+            "9": 34832.0,
+            "10": 29855.0,
+            "11": 38396.0,
+            "12": 35164.0,
+            "13": 37118.0,
+            "14": 38011.0,
+            "15": 34458.0,
+            "16": 35843.0,
+            "17": 34836.0,
+            "18": 35149.0,
+            "19": 36044.0,
+            "20": 32823.0,
+            "21": 33340.0,
+            "22": 30040.0,
+            "23": 37733.0,
+            "24": 31992.0,
+            "25": 31045.0,
+            "26": 34280.0,
+            "27": 36064.0,
+            "28": 36993.0,
+            "29": 38087.0,
+            "30": 32689.0,
+            "31": 30361.0,
+            "32": 36050.0,
+            "33": 37627.0,
+            "34": 33149.0,
+            "35": 34316.0,
+            "36": 35026.0,
+            "37": 37852.0,
+            "38": 35490.0,
+            "39": 38325.0,
+            "40": 35730.0,
+            "41": 35890.0,
+            "42": 37811.0,
+            "43": 34239.0,
+            "44": 33282.0,
+            "45": 35354.0,
+            "46": 37112.0,
+            "47": 40323.0,
+            "48": 36296.0,
+            "49": 36098.0,
+            "50": 38996.0,
+            "51": 37187.0,
+            "52": 36798.0,
+            "53": 41385.0,
+            "54": 41151.0,
+            "55": 36715.0,
+            "56": 40382.0,
+            "57": 36942.0,
+            "58": 42415.0,
+            "59": 39138.0,
+            "60": 39766.0,
+            "61": 40532.0,
+            "62": 43919.0,
+            "63": 38747.0,
+            "64": 43509.0,
+            "65": 40794.0,
+            "66": 44093.0,
+            "67": 40369.0,
+            "68": 40509.0,
+            "69": 40728.0,
+            "70": 45431.0,
+            "71": 41117.0,
+            "72": 39982.0,
+            "73": 44758.0,
+            "74": 34170.0,
+            "75": 38601.0,
+            "76": 46113.0,
+            "77": 45621.0,
+            "78": 47007.0,
+            "79": 47410.0,
+            "80": 46647.0,
+            "81": 50449.0,
+            "82": 49494.0,
+            "83": 45080.0,
+            "84": 46331.0,
+            "85": 48470.0,
+            "86": 45870.0,
+            "87": 49138.0,
+            "88": 46357.0,
+            "89": 48274.0,
+            "90": 50049.0,
+            "91": 43937.0,
+            "92": 47318.0,
+            "93": 46654.0,
+            "94": 46515.0,
+            "95": 47167.0,
+            "96": 50587.0,
+            "97": 46623.0,
+            "98": 49830.0,
+            "99": 48092.0,
+            "100": 43643.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1016564224.0,
+            "2": 1016563712.0,
+            "3": 1016564224.0,
+            "4": 1017172480.0,
+            "5": 1016564224.0,
+            "6": 1016565248.0,
+            "7": 1016564736.0,
+            "8": 1016565248.0,
+            "9": 1016562688.0,
+            "10": 1016564736.0,
+            "11": 1016562688.0,
+            "12": 1016565248.0,
+            "13": 1016564736.0,
+            "14": 1016564224.0,
+            "15": 1016564736.0,
+            "16": 1016562176.0,
+            "17": 1016564736.0,
+            "18": 1016565760.0,
+            "19": 1016563200.0,
+            "20": 1016563200.0,
+            "21": 1016564224.0,
+            "22": 1016566272.0,
+            "23": 1016564736.0,
+            "24": 1016564224.0,
+            "25": 1016564736.0,
+            "26": 1016562176.0,
+            "27": 1016563200.0,
+            "28": 1016562688.0,
+            "29": 1016562688.0,
+            "30": 1016566272.0,
+            "31": 1016569856.0,
+            "32": 1016564736.0,
+            "33": 1016564736.0,
+            "34": 1016565248.0,
+            "35": 1017459712.0,
+            "36": 1016565248.0,
+            "37": 1016565248.0,
+            "38": 1016564224.0,
+            "39": 1016562176.0,
+            "40": 1016565248.0,
+            "41": 1016567808.0,
+            "42": 1016564224.0,
+            "43": 1016568320.0,
+            "44": 1016565760.0,
+            "45": 1016565760.0,
+            "46": 1016570368.0,
+            "47": 1016565248.0,
+            "48": 1016569856.0,
+            "49": 1016568832.0,
+            "50": 1016565760.0,
+            "51": 1016566272.0,
+            "52": 1016574976.0,
+            "53": 1016567808.0,
+            "54": 1016566784.0,
+            "55": 1016569856.0,
+            "56": 1016565248.0,
+            "57": 1016574976.0,
+            "58": 1017110528.0,
+            "59": 1016574976.0,
+            "60": 1016571904.0,
+            "61": 1016567296.0,
+            "62": 1016565760.0,
+            "63": 1016576000.0,
+            "64": 1016572928.0,
+            "65": 1016585216.0,
+            "66": 1016568832.0,
+            "67": 1016569344.0,
+            "68": 1016566272.0,
+            "69": 1016569856.0,
+            "70": 1016569344.0,
+            "71": 1016566272.0,
+            "72": 1016571392.0,
+            "73": 1016572416.0,
+            "74": 1016577536.0,
+            "75": 1016567296.0,
+            "76": 1016565760.0,
+            "77": 1016566272.0,
+            "78": 1016572928.0,
+            "79": 1016568832.0,
+            "80": 1016572416.0,
+            "81": 1016570368.0,
+            "82": 1016571904.0,
+            "83": 1016568832.0,
+            "84": 1016573440.0,
+            "85": 1016575488.0,
+            "86": 1016574976.0,
+            "87": 1016568320.0,
+            "88": 1016816640.0,
+            "89": 1016577024.0,
+            "90": 1016569344.0,
+            "91": 1016566784.0,
+            "92": 1016566784.0,
+            "93": 1016569856.0,
+            "94": 1016571392.0,
+            "95": 1016567808.0,
+            "96": 1016566784.0,
+            "97": 1016573952.0,
+            "98": 1016565760.0,
+            "99": 1016577024.0,
+            "100": 1016574464.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2560655872.0,
+            "2": 2827037696.0,
+            "3": 2827771392.0,
+            "4": 2828163584.0,
+            "5": 2828163584.0,
+            "6": 2828163584.0,
+            "7": 2829373440.0,
+            "8": 2829373440.0,
+            "9": 2829373440.0,
+            "10": 2829925376.0,
+            "11": 2829925376.0,
+            "12": 2829925376.0,
+            "13": 2829925376.0,
+            "14": 2829925376.0,
+            "15": 2830320640.0,
+            "16": 2830320640.0,
+            "17": 2830320640.0,
+            "18": 2830320640.0,
+            "19": 2830320640.0,
+            "20": 2830320640.0,
+            "21": 2830320640.0,
+            "22": 2830406144.0,
+            "23": 2830406144.0,
+            "24": 2830406144.0,
+            "25": 2830406144.0,
+            "26": 2830406144.0,
+            "27": 2830406144.0,
+            "28": 2830406144.0,
+            "29": 2830406144.0,
+            "30": 2831433216.0,
+            "31": 2836904960.0,
+            "32": 2836904960.0,
+            "33": 2836904960.0,
+            "34": 2836904960.0,
+            "35": 2836904960.0,
+            "36": 2836904960.0,
+            "37": 2836904960.0,
+            "38": 2836904960.0,
+            "39": 2836904960.0,
+            "40": 2836904960.0,
+            "41": 2836904960.0,
+            "42": 2836904960.0,
+            "43": 2836904960.0,
+            "44": 2836904960.0,
+            "45": 2836904960.0,
+            "46": 2837527040.0,
+            "47": 2837527040.0,
+            "48": 2837527040.0,
+            "49": 2837527040.0,
+            "50": 2837527040.0,
+            "51": 2837527040.0,
+            "52": 2844526592.0,
+            "53": 2844526592.0,
+            "54": 2844526592.0,
+            "55": 2844526592.0,
+            "56": 2844526592.0,
+            "57": 2845833216.0,
+            "58": 2845833216.0,
+            "59": 2845833216.0,
+            "60": 2845833216.0,
+            "61": 2845833216.0,
+            "62": 2845833216.0,
+            "63": 2847350784.0,
+            "64": 2847350784.0,
+            "65": 2859365376.0,
+            "66": 2859365376.0,
+            "67": 2859365376.0,
+            "68": 2859365376.0,
+            "69": 2859365376.0,
+            "70": 2859365376.0,
+            "71": 2859365376.0,
+            "72": 2859365376.0,
+            "73": 2859365376.0,
+            "74": 2859365376.0,
+            "75": 2859365376.0,
+            "76": 2859365376.0,
+            "77": 2859365376.0,
+            "78": 2859365376.0,
+            "79": 2859365376.0,
+            "80": 2859365376.0,
+            "81": 2859365376.0,
+            "82": 2859365376.0,
+            "83": 2859365376.0,
+            "84": 2859365376.0,
+            "85": 2859365376.0,
+            "86": 2859365376.0,
+            "87": 2859365376.0,
+            "88": 2859365376.0,
+            "89": 2859365376.0,
+            "90": 2859365376.0,
+            "91": 2859365376.0,
+            "92": 2859365376.0,
+            "93": 2859365376.0,
+            "94": 2859365376.0,
+            "95": 2859365376.0,
+            "96": 2859365376.0,
+            "97": 2859365376.0,
+            "98": 2859365376.0,
+            "99": 2859365376.0,
+            "100": 2859365376.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 17.55161,
+            "2": 0.27584,
+            "3": 0.20906,
+            "4": 0.18821,
+            "5": 0.17883,
+            "6": 0.17484,
+            "7": 0.18214,
+            "8": 0.18025,
+            "9": 0.16785,
+            "10": 0.16718,
+            "11": 0.17122,
+            "12": 0.16341,
+            "13": 0.16356,
+            "14": 0.16447,
+            "15": 0.17469,
+            "16": 0.16231,
+            "17": 0.17002,
+            "18": 0.1621,
+            "19": 0.16543,
+            "20": 0.16097,
+            "21": 0.16113,
+            "22": 0.17866,
+            "23": 0.16939,
+            "24": 0.16784,
+            "25": 0.16322,
+            "26": 0.15752,
+            "27": 0.16042,
+            "28": 0.16296,
+            "29": 0.16022,
+            "30": 0.16569,
+            "31": 0.20634,
+            "32": 0.16627,
+            "33": 0.16203,
+            "34": 0.18965,
+            "35": 0.1656,
+            "36": 0.17227,
+            "37": 0.16394,
+            "38": 0.16364,
+            "39": 0.15966,
+            "40": 0.17482,
+            "41": 0.16992,
+            "42": 0.16079,
+            "43": 0.17541,
+            "44": 0.1626,
+            "45": 0.16436,
+            "46": 0.1838,
+            "47": 0.15773,
+            "48": 0.18504,
+            "49": 0.22116,
+            "50": 0.16497,
+            "51": 0.17193,
+            "52": 0.17228,
+            "53": 0.15999,
+            "54": 0.15946,
+            "55": 0.1611,
+            "56": 0.21983,
+            "57": 0.18423,
+            "58": 0.16229,
+            "59": 0.18268,
+            "60": 0.17406,
+            "61": 0.15956,
+            "62": 0.16172,
+            "63": 0.17465,
+            "64": 0.17307,
+            "65": 0.25477,
+            "66": 0.15926,
+            "67": 0.23477,
+            "68": 0.16872,
+            "69": 0.16094,
+            "70": 0.16631,
+            "71": 0.18552,
+            "72": 0.16728,
+            "73": 0.1889,
+            "74": 0.17586,
+            "75": 0.17577,
+            "76": 0.21503,
+            "77": 0.16576,
+            "78": 0.17284,
+            "79": 0.18166,
+            "80": 0.19235,
+            "81": 0.17347,
+            "82": 0.1597,
+            "83": 0.17024,
+            "84": 0.17843,
+            "85": 0.15917,
+            "86": 0.20315,
+            "87": 0.16523,
+            "88": 0.16367,
+            "89": 0.18499,
+            "90": 0.16286,
+            "91": 0.19025,
+            "92": 0.17186,
+            "93": 0.19123,
+            "94": 0.19378,
+            "95": 0.16849,
+            "96": 0.16781,
+            "97": 0.17705,
+            "98": 0.15729,
+            "99": 0.17119,
+            "100": 0.16
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
index a13cf8b8c89..f763ccd7669 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
@@ -2,141 +2,536 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 10.78091,
+            "2": 10.80272,
+            "3": 10.8036,
+            "4": 10.77566,
             "5": 10.83259,
+            "6": 10.83704,
+            "7": 10.79793,
+            "8": 10.79364,
+            "9": 10.808,
             "10": 10.76116,
+            "11": 10.85297,
+            "12": 10.84152,
+            "13": 10.8247,
+            "14": 10.85822,
             "15": 10.78238,
+            "16": 10.77927,
+            "17": 10.74878,
+            "18": 10.7897,
+            "19": 10.7749,
             "20": 10.71704,
+            "21": 10.70811,
+            "22": 10.54787,
+            "23": 10.72978,
+            "24": 10.60324,
             "25": 10.55979,
+            "26": 10.61611,
+            "27": 10.6446,
+            "28": 10.62463,
+            "29": 10.63492,
             "30": 10.42362,
+            "31": 10.16499,
+            "32": 10.51313,
+            "33": 10.5094,
+            "34": 10.2668,
             "35": 10.32318,
+            "36": 10.28865,
+            "37": 10.41114,
+            "38": 10.26426,
+            "39": 10.45,
             "40": 10.17473,
+            "41": 10.20958,
+            "42": 10.27824,
+            "43": 9.91831,
+            "44": 10.03131,
             "45": 9.91995,
+            "46": 9.8862,
+            "47": 10.19255,
+            "48": 9.92803,
+            "49": 9.61616,
             "50": 9.98532,
+            "51": 9.90528,
+            "52": 9.80364,
+            "53": 10.12728,
+            "54": 10.00036,
             "55": 9.9362,
+            "56": 9.68506,
+            "57": 9.55805,
+            "58": 9.90514,
+            "59": 9.63857,
             "60": 9.57451,
+            "61": 9.76864,
+            "62": 10.03802,
+            "63": 9.44503,
+            "64": 9.82796,
             "65": 9.00712,
+            "66": 9.77422,
+            "67": 9.41277,
+            "68": 9.84111,
+            "69": 9.82784,
             "70": 9.79011,
+            "71": 9.66957,
+            "72": 9.62799,
+            "73": 9.5473,
+            "74": 9.03663,
             "75": 9.49153,
+            "76": 9.16783,
+            "77": 10.10857,
+            "78": 9.77081,
+            "79": 9.4383,
             "80": 9.45436,
+            "81": 9.52266,
+            "82": 9.7424,
+            "83": 9.37076,
+            "84": 9.45377,
             "85": 9.65832,
+            "86": 9.12522,
+            "87": 9.62697,
+            "88": 9.79619,
+            "89": 9.66054,
             "90": 9.85081,
+            "91": 9.39408,
+            "92": 9.40744,
+            "93": 9.13595,
+            "94": 8.89048,
             "95": 9.563,
+            "96": 9.5714,
+            "97": 9.34318,
+            "98": 9.73026,
+            "99": 8.95002,
             "100": 9.4424
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 30994.0,
+            "2": 32962.0,
+            "3": 33026.0,
+            "4": 30732.0,
             "5": 36042.0,
+            "6": 37038.0,
+            "7": 34481.0,
+            "8": 31368.0,
+            "9": 33980.0,
             "10": 29532.0,
+            "11": 37852.0,
+            "12": 34972.0,
+            "13": 36994.0,
+            "14": 37789.0,
             "15": 34058.0,
+            "16": 36656.0,
+            "17": 34700.0,
+            "18": 34946.0,
+            "19": 35228.0,
             "20": 32392.0,
+            "21": 33247.0,
+            "22": 30040.0,
+            "23": 37891.0,
+            "24": 32099.0,
             "25": 30921.0,
+            "26": 34212.0,
+            "27": 34975.0,
+            "28": 36746.0,
+            "29": 37759.0,
             "30": 32786.0,
+            "31": 30423.0,
+            "32": 35992.0,
+            "33": 36915.0,
+            "34": 32293.0,
             "35": 33654.0,
+            "36": 34755.0,
+            "37": 37859.0,
+            "38": 36022.0,
+            "39": 38343.0,
             "40": 35963.0,
+            "41": 35882.0,
+            "42": 36774.0,
+            "43": 34186.0,
+            "44": 33572.0,
             "45": 35574.0,
+            "46": 37208.0,
+            "47": 40154.0,
+            "48": 36385.0,
+            "49": 36259.0,
             "50": 38861.0,
+            "51": 38061.0,
+            "52": 37025.0,
+            "53": 41802.0,
+            "54": 41253.0,
             "55": 37654.0,
+            "56": 41164.0,
+            "57": 37682.0,
+            "58": 41782.0,
+            "59": 39444.0,
             "60": 40691.0,
+            "61": 40876.0,
+            "62": 43113.0,
+            "63": 38389.0,
+            "64": 43217.0,
             "65": 41689.0,
+            "66": 45525.0,
+            "67": 41717.0,
+            "68": 40369.0,
+            "69": 41287.0,
             "70": 45545.0,
+            "71": 41651.0,
+            "72": 41881.0,
+            "73": 45139.0,
+            "74": 35747.0,
             "75": 39155.0,
+            "76": 44874.0,
+            "77": 45442.0,
+            "78": 46782.0,
+            "79": 48776.0,
             "80": 47161.0,
+            "81": 51277.0,
+            "82": 49953.0,
+            "83": 45334.0,
+            "84": 46096.0,
             "85": 49238.0,
+            "86": 46118.0,
+            "87": 49880.0,
+            "88": 47115.0,
+            "89": 48583.0,
             "90": 49057.0,
+            "91": 45950.0,
+            "92": 47820.0,
+            "93": 46437.0,
+            "94": 47530.0,
             "95": 48000.0,
+            "96": 50285.0,
+            "97": 46225.0,
+            "98": 49809.0,
+            "99": 47890.0,
             "100": 44636.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 892864512.0,
+            "2": 892868608.0,
+            "3": 892868608.0,
+            "4": 892864512.0,
             "5": 892865024.0,
+            "6": 892866560.0,
+            "7": 892866048.0,
+            "8": 892867584.0,
+            "9": 892865536.0,
             "10": 892867584.0,
+            "11": 892866048.0,
+            "12": 892865536.0,
+            "13": 892865536.0,
+            "14": 892868096.0,
             "15": 892867584.0,
+            "16": 892867072.0,
+            "17": 892867584.0,
+            "18": 892869632.0,
+            "19": 892868096.0,
             "20": 892866560.0,
+            "21": 892866560.0,
+            "22": 892863488.0,
+            "23": 892864512.0,
+            "24": 892867072.0,
             "25": 892863488.0,
+            "26": 892866560.0,
+            "27": 892867072.0,
+            "28": 892865536.0,
+            "29": 892866048.0,
             "30": 892863488.0,
+            "31": 892862464.0,
+            "32": 892861952.0,
+            "33": 892866048.0,
+            "34": 892865536.0,
             "35": 892865024.0,
+            "36": 892868608.0,
+            "37": 892867072.0,
+            "38": 892866560.0,
+            "39": 892866048.0,
             "40": 892867072.0,
+            "41": 892865536.0,
+            "42": 892867584.0,
+            "43": 892861440.0,
+            "44": 892862976.0,
             "45": 892865024.0,
+            "46": 892864512.0,
+            "47": 892865024.0,
+            "48": 892861440.0,
+            "49": 892863488.0,
             "50": 892867072.0,
+            "51": 892860416.0,
+            "52": 892858880.0,
+            "53": 892861440.0,
+            "54": 892861440.0,
             "55": 892862464.0,
+            "56": 892865024.0,
+            "57": 892857344.0,
+            "58": 892859392.0,
+            "59": 892858880.0,
             "60": 892859904.0,
+            "61": 892868608.0,
+            "62": 892865536.0,
+            "63": 892861952.0,
+            "64": 892863488.0,
             "65": 892851712.0,
+            "66": 892866048.0,
+            "67": 892861440.0,
+            "68": 892868608.0,
+            "69": 892864512.0,
             "70": 892866560.0,
+            "71": 892868608.0,
+            "72": 892860416.0,
+            "73": 892868096.0,
+            "74": 892858368.0,
             "75": 892867072.0,
+            "76": 892866560.0,
+            "77": 892867072.0,
+            "78": 892863488.0,
+            "79": 892864512.0,
             "80": 892864512.0,
+            "81": 892866048.0,
+            "82": 892864000.0,
+            "83": 892860928.0,
+            "84": 892861440.0,
             "85": 892861952.0,
+            "86": 892861440.0,
+            "87": 892870144.0,
+            "88": 892862464.0,
+            "89": 892864512.0,
             "90": 892866048.0,
+            "91": 892867072.0,
+            "92": 892865536.0,
+            "93": 892868608.0,
+            "94": 892864512.0,
             "95": 892865024.0,
+            "96": 892865024.0,
+            "97": 892862976.0,
+            "98": 892867584.0,
+            "99": 892859904.0,
             "100": 892861952.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 1867566080.0,
+            "2": 2107252736.0,
+            "3": 2107252736.0,
+            "4": 2107252736.0,
             "5": 2107481600.0,
+            "6": 2107481600.0,
+            "7": 2107481600.0,
+            "8": 2107481600.0,
+            "9": 2107481600.0,
             "10": 2108814336.0,
+            "11": 2108814336.0,
+            "12": 2108814336.0,
+            "13": 2108814336.0,
+            "14": 2108814336.0,
             "15": 2108814336.0,
+            "16": 2109139456.0,
+            "17": 2109139456.0,
+            "18": 2109139456.0,
+            "19": 2109139456.0,
             "20": 2109139456.0,
+            "21": 2109139456.0,
+            "22": 2109139456.0,
+            "23": 2109139456.0,
+            "24": 2109139456.0,
             "25": 2109139456.0,
+            "26": 2109139456.0,
+            "27": 2109139456.0,
+            "28": 2109139456.0,
+            "29": 2109139456.0,
             "30": 2109139456.0,
+            "31": 2109139456.0,
+            "32": 2109139456.0,
+            "33": 2109139456.0,
+            "34": 2109139456.0,
             "35": 2109139456.0,
+            "36": 2109139456.0,
+            "37": 2109139456.0,
+            "38": 2109139456.0,
+            "39": 2109139456.0,
             "40": 2109139456.0,
+            "41": 2109139456.0,
+            "42": 2109139456.0,
+            "43": 2109139456.0,
+            "44": 2109139456.0,
             "45": 2109139456.0,
+            "46": 2109139456.0,
+            "47": 2109139456.0,
+            "48": 2109139456.0,
+            "49": 2109139456.0,
             "50": 2109139456.0,
+            "51": 2109139456.0,
+            "52": 2109139456.0,
+            "53": 2109139456.0,
+            "54": 2109139456.0,
             "55": 2109139456.0,
+            "56": 2109139456.0,
+            "57": 2109139456.0,
+            "58": 2109139456.0,
+            "59": 2109139456.0,
             "60": 2109139456.0,
+            "61": 2109139456.0,
+            "62": 2109139456.0,
+            "63": 2109139456.0,
+            "64": 2109139456.0,
             "65": 2109139456.0,
+            "66": 2109139456.0,
+            "67": 2109139456.0,
+            "68": 2109139456.0,
+            "69": 2109139456.0,
             "70": 2109139456.0,
+            "71": 2109139456.0,
+            "72": 2109139456.0,
+            "73": 2109139456.0,
+            "74": 2109139456.0,
             "75": 2109139456.0,
+            "76": 2109139456.0,
+            "77": 2109139456.0,
+            "78": 2109139456.0,
+            "79": 2109139456.0,
             "80": 2109139456.0,
+            "81": 2109139456.0,
+            "82": 2109139456.0,
+            "83": 2109139456.0,
+            "84": 2109139456.0,
             "85": 2109139456.0,
+            "86": 2109139456.0,
+            "87": 2109897728.0,
+            "88": 2109897728.0,
+            "89": 2109897728.0,
             "90": 2109897728.0,
+            "91": 2109897728.0,
+            "92": 2109897728.0,
+            "93": 2109897728.0,
+            "94": 2109897728.0,
             "95": 2109897728.0,
+            "96": 2109897728.0,
+            "97": 2109897728.0,
+            "98": 2109897728.0,
+            "99": 2109897728.0,
             "100": 2109897728.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 15.73372,
-            "5": 0.22156,
-            "10": 0.21766,
-            "15": 0.22279,
-            "20": 0.2043,
-            "25": 0.2023,
-            "30": 0.20179,
-            "35": 0.20654,
-            "40": 0.20904,
-            "45": 0.21995,
-            "50": 0.20076,
-            "55": 0.21849,
-            "60": 0.21439,
-            "65": 0.26977,
-            "70": 0.20736,
-            "75": 0.21282,
-            "80": 0.22233,
-            "85": 0.21095,
-            "90": 0.20403,
-            "95": 0.21788,
-            "100": 0.20993
+            "1": 14.31194,
+            "2": 0.35602,
+            "3": 0.27118,
+            "4": 0.26003,
+            "5": 0.25566,
+            "6": 0.23955,
+            "7": 0.25733,
+            "8": 0.24144,
+            "9": 0.24541,
+            "10": 0.24933,
+            "11": 0.24384,
+            "12": 0.23671,
+            "13": 0.23911,
+            "14": 0.23582,
+            "15": 0.24799,
+            "16": 0.24336,
+            "17": 0.25026,
+            "18": 0.2284,
+            "19": 0.23348,
+            "20": 0.23732,
+            "21": 0.23466,
+            "22": 0.23579,
+            "23": 0.23473,
+            "24": 0.24834,
+            "25": 0.23298,
+            "26": 0.2337,
+            "27": 0.2322,
+            "28": 0.23129,
+            "29": 0.23719,
+            "30": 0.24475,
+            "31": 0.27609,
+            "32": 0.24141,
+            "33": 0.23534,
+            "34": 0.25714,
+            "35": 0.24161,
+            "36": 0.23358,
+            "37": 0.23063,
+            "38": 0.23854,
+            "39": 0.23304,
+            "40": 0.2404,
+            "41": 0.23771,
+            "42": 0.2345,
+            "43": 0.24255,
+            "44": 0.23514,
+            "45": 0.25421,
+            "46": 0.26534,
+            "47": 0.23362,
+            "48": 0.25382,
+            "49": 0.27095,
+            "50": 0.23751,
+            "51": 0.2738,
+            "52": 0.26505,
+            "53": 0.23078,
+            "54": 0.23459,
+            "55": 0.2529,
+            "56": 0.29375,
+            "57": 0.26697,
+            "58": 0.24903,
+            "59": 0.24384,
+            "60": 0.24359,
+            "61": 0.2298,
+            "62": 0.2365,
+            "63": 0.24866,
+            "64": 0.23579,
+            "65": 0.30261,
+            "66": 0.23489,
+            "67": 0.28661,
+            "68": 0.2497,
+            "69": 0.2358,
+            "70": 0.23664,
+            "71": 0.26035,
+            "72": 0.24553,
+            "73": 0.27252,
+            "74": 0.26037,
+            "75": 0.24806,
+            "76": 0.26257,
+            "77": 0.23946,
+            "78": 0.24328,
+            "79": 0.24753,
+            "80": 0.25383,
+            "81": 0.23677,
+            "82": 0.23361,
+            "83": 0.23998,
+            "84": 0.2503,
+            "85": 0.2394,
+            "86": 0.24786,
+            "87": 0.22954,
+            "88": 0.23347,
+            "89": 0.24991,
+            "90": 0.23017,
+            "91": 0.25015,
+            "92": 0.23807,
+            "93": 0.24597,
+            "94": 0.26925,
+            "95": 0.25645,
+            "96": 0.23369,
+            "97": 0.24492,
+            "98": 0.22834,
+            "99": 0.23921,
+            "100": 0.23446
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..12778ad6bb9
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.78091,
+            "2": 10.80272,
+            "3": 10.8036,
+            "4": 10.77566,
+            "5": 10.83259,
+            "6": 10.83704,
+            "7": 10.79793,
+            "8": 10.79364,
+            "9": 10.808,
+            "10": 10.76116,
+            "11": 10.85297,
+            "12": 10.84152,
+            "13": 10.8247,
+            "14": 10.85822,
+            "15": 10.78238,
+            "16": 10.77927,
+            "17": 10.74878,
+            "18": 10.7897,
+            "19": 10.7749,
+            "20": 10.71704,
+            "21": 10.70811,
+            "22": 10.54787,
+            "23": 10.72978,
+            "24": 10.60324,
+            "25": 10.55979,
+            "26": 10.61611,
+            "27": 10.6446,
+            "28": 10.62463,
+            "29": 10.63492,
+            "30": 10.42362,
+            "31": 10.16499,
+            "32": 10.51313,
+            "33": 10.5094,
+            "34": 10.2668,
+            "35": 10.32318,
+            "36": 10.28865,
+            "37": 10.41114,
+            "38": 10.26426,
+            "39": 10.45,
+            "40": 10.17473,
+            "41": 10.20958,
+            "42": 10.27824,
+            "43": 9.91831,
+            "44": 10.03131,
+            "45": 9.91995,
+            "46": 9.8862,
+            "47": 10.19255,
+            "48": 9.92803,
+            "49": 9.61616,
+            "50": 9.98532,
+            "51": 9.90528,
+            "52": 9.80364,
+            "53": 10.12728,
+            "54": 10.00036,
+            "55": 9.9362,
+            "56": 9.68506,
+            "57": 9.55805,
+            "58": 9.90514,
+            "59": 9.63857,
+            "60": 9.57451,
+            "61": 9.76864,
+            "62": 10.03802,
+            "63": 9.44503,
+            "64": 9.82796,
+            "65": 9.00712,
+            "66": 9.77422,
+            "67": 9.41277,
+            "68": 9.84111,
+            "69": 9.82784,
+            "70": 9.79011,
+            "71": 9.66957,
+            "72": 9.62799,
+            "73": 9.5473,
+            "74": 9.03663,
+            "75": 9.49153,
+            "76": 9.16783,
+            "77": 10.10857,
+            "78": 9.77081,
+            "79": 9.4383,
+            "80": 9.45436,
+            "81": 9.52266,
+            "82": 9.7424,
+            "83": 9.37076,
+            "84": 9.45377,
+            "85": 9.65832,
+            "86": 9.12522,
+            "87": 9.62697,
+            "88": 9.79619,
+            "89": 9.66054,
+            "90": 9.85081,
+            "91": 9.39408,
+            "92": 9.40744,
+            "93": 9.13595,
+            "94": 8.89048,
+            "95": 9.563,
+            "96": 9.5714,
+            "97": 9.34318,
+            "98": 9.73026,
+            "99": 8.95002,
+            "100": 9.4424
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 30994.0,
+            "2": 32962.0,
+            "3": 33026.0,
+            "4": 30732.0,
+            "5": 36042.0,
+            "6": 37038.0,
+            "7": 34481.0,
+            "8": 31368.0,
+            "9": 33980.0,
+            "10": 29532.0,
+            "11": 37852.0,
+            "12": 34972.0,
+            "13": 36994.0,
+            "14": 37789.0,
+            "15": 34058.0,
+            "16": 36656.0,
+            "17": 34700.0,
+            "18": 34946.0,
+            "19": 35228.0,
+            "20": 32392.0,
+            "21": 33247.0,
+            "22": 30040.0,
+            "23": 37891.0,
+            "24": 32099.0,
+            "25": 30921.0,
+            "26": 34212.0,
+            "27": 34975.0,
+            "28": 36746.0,
+            "29": 37759.0,
+            "30": 32786.0,
+            "31": 30423.0,
+            "32": 35992.0,
+            "33": 36915.0,
+            "34": 32293.0,
+            "35": 33654.0,
+            "36": 34755.0,
+            "37": 37859.0,
+            "38": 36022.0,
+            "39": 38343.0,
+            "40": 35963.0,
+            "41": 35882.0,
+            "42": 36774.0,
+            "43": 34186.0,
+            "44": 33572.0,
+            "45": 35574.0,
+            "46": 37208.0,
+            "47": 40154.0,
+            "48": 36385.0,
+            "49": 36259.0,
+            "50": 38861.0,
+            "51": 38061.0,
+            "52": 37025.0,
+            "53": 41802.0,
+            "54": 41253.0,
+            "55": 37654.0,
+            "56": 41164.0,
+            "57": 37682.0,
+            "58": 41782.0,
+            "59": 39444.0,
+            "60": 40691.0,
+            "61": 40876.0,
+            "62": 43113.0,
+            "63": 38389.0,
+            "64": 43217.0,
+            "65": 41689.0,
+            "66": 45525.0,
+            "67": 41717.0,
+            "68": 40369.0,
+            "69": 41287.0,
+            "70": 45545.0,
+            "71": 41651.0,
+            "72": 41881.0,
+            "73": 45139.0,
+            "74": 35747.0,
+            "75": 39155.0,
+            "76": 44874.0,
+            "77": 45442.0,
+            "78": 46782.0,
+            "79": 48776.0,
+            "80": 47161.0,
+            "81": 51277.0,
+            "82": 49953.0,
+            "83": 45334.0,
+            "84": 46096.0,
+            "85": 49238.0,
+            "86": 46118.0,
+            "87": 49880.0,
+            "88": 47115.0,
+            "89": 48583.0,
+            "90": 49057.0,
+            "91": 45950.0,
+            "92": 47820.0,
+            "93": 46437.0,
+            "94": 47530.0,
+            "95": 48000.0,
+            "96": 50285.0,
+            "97": 46225.0,
+            "98": 49809.0,
+            "99": 47890.0,
+            "100": 44636.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 892864512.0,
+            "2": 892868608.0,
+            "3": 892868608.0,
+            "4": 892864512.0,
+            "5": 892865024.0,
+            "6": 892866560.0,
+            "7": 892866048.0,
+            "8": 892867584.0,
+            "9": 892865536.0,
+            "10": 892867584.0,
+            "11": 892866048.0,
+            "12": 892865536.0,
+            "13": 892865536.0,
+            "14": 892868096.0,
+            "15": 892867584.0,
+            "16": 892867072.0,
+            "17": 892867584.0,
+            "18": 892869632.0,
+            "19": 892868096.0,
+            "20": 892866560.0,
+            "21": 892866560.0,
+            "22": 892863488.0,
+            "23": 892864512.0,
+            "24": 892867072.0,
+            "25": 892863488.0,
+            "26": 892866560.0,
+            "27": 892867072.0,
+            "28": 892865536.0,
+            "29": 892866048.0,
+            "30": 892863488.0,
+            "31": 892862464.0,
+            "32": 892861952.0,
+            "33": 892866048.0,
+            "34": 892865536.0,
+            "35": 892865024.0,
+            "36": 892868608.0,
+            "37": 892867072.0,
+            "38": 892866560.0,
+            "39": 892866048.0,
+            "40": 892867072.0,
+            "41": 892865536.0,
+            "42": 892867584.0,
+            "43": 892861440.0,
+            "44": 892862976.0,
+            "45": 892865024.0,
+            "46": 892864512.0,
+            "47": 892865024.0,
+            "48": 892861440.0,
+            "49": 892863488.0,
+            "50": 892867072.0,
+            "51": 892860416.0,
+            "52": 892858880.0,
+            "53": 892861440.0,
+            "54": 892861440.0,
+            "55": 892862464.0,
+            "56": 892865024.0,
+            "57": 892857344.0,
+            "58": 892859392.0,
+            "59": 892858880.0,
+            "60": 892859904.0,
+            "61": 892868608.0,
+            "62": 892865536.0,
+            "63": 892861952.0,
+            "64": 892863488.0,
+            "65": 892851712.0,
+            "66": 892866048.0,
+            "67": 892861440.0,
+            "68": 892868608.0,
+            "69": 892864512.0,
+            "70": 892866560.0,
+            "71": 892868608.0,
+            "72": 892860416.0,
+            "73": 892868096.0,
+            "74": 892858368.0,
+            "75": 892867072.0,
+            "76": 892866560.0,
+            "77": 892867072.0,
+            "78": 892863488.0,
+            "79": 892864512.0,
+            "80": 892864512.0,
+            "81": 892866048.0,
+            "82": 892864000.0,
+            "83": 892860928.0,
+            "84": 892861440.0,
+            "85": 892861952.0,
+            "86": 892861440.0,
+            "87": 892870144.0,
+            "88": 892862464.0,
+            "89": 892864512.0,
+            "90": 892866048.0,
+            "91": 892867072.0,
+            "92": 892865536.0,
+            "93": 892868608.0,
+            "94": 892864512.0,
+            "95": 892865024.0,
+            "96": 892865024.0,
+            "97": 892862976.0,
+            "98": 892867584.0,
+            "99": 892859904.0,
+            "100": 892861952.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1867566080.0,
+            "2": 2107252736.0,
+            "3": 2107252736.0,
+            "4": 2107252736.0,
+            "5": 2107481600.0,
+            "6": 2107481600.0,
+            "7": 2107481600.0,
+            "8": 2107481600.0,
+            "9": 2107481600.0,
+            "10": 2108814336.0,
+            "11": 2108814336.0,
+            "12": 2108814336.0,
+            "13": 2108814336.0,
+            "14": 2108814336.0,
+            "15": 2108814336.0,
+            "16": 2109139456.0,
+            "17": 2109139456.0,
+            "18": 2109139456.0,
+            "19": 2109139456.0,
+            "20": 2109139456.0,
+            "21": 2109139456.0,
+            "22": 2109139456.0,
+            "23": 2109139456.0,
+            "24": 2109139456.0,
+            "25": 2109139456.0,
+            "26": 2109139456.0,
+            "27": 2109139456.0,
+            "28": 2109139456.0,
+            "29": 2109139456.0,
+            "30": 2109139456.0,
+            "31": 2109139456.0,
+            "32": 2109139456.0,
+            "33": 2109139456.0,
+            "34": 2109139456.0,
+            "35": 2109139456.0,
+            "36": 2109139456.0,
+            "37": 2109139456.0,
+            "38": 2109139456.0,
+            "39": 2109139456.0,
+            "40": 2109139456.0,
+            "41": 2109139456.0,
+            "42": 2109139456.0,
+            "43": 2109139456.0,
+            "44": 2109139456.0,
+            "45": 2109139456.0,
+            "46": 2109139456.0,
+            "47": 2109139456.0,
+            "48": 2109139456.0,
+            "49": 2109139456.0,
+            "50": 2109139456.0,
+            "51": 2109139456.0,
+            "52": 2109139456.0,
+            "53": 2109139456.0,
+            "54": 2109139456.0,
+            "55": 2109139456.0,
+            "56": 2109139456.0,
+            "57": 2109139456.0,
+            "58": 2109139456.0,
+            "59": 2109139456.0,
+            "60": 2109139456.0,
+            "61": 2109139456.0,
+            "62": 2109139456.0,
+            "63": 2109139456.0,
+            "64": 2109139456.0,
+            "65": 2109139456.0,
+            "66": 2109139456.0,
+            "67": 2109139456.0,
+            "68": 2109139456.0,
+            "69": 2109139456.0,
+            "70": 2109139456.0,
+            "71": 2109139456.0,
+            "72": 2109139456.0,
+            "73": 2109139456.0,
+            "74": 2109139456.0,
+            "75": 2109139456.0,
+            "76": 2109139456.0,
+            "77": 2109139456.0,
+            "78": 2109139456.0,
+            "79": 2109139456.0,
+            "80": 2109139456.0,
+            "81": 2109139456.0,
+            "82": 2109139456.0,
+            "83": 2109139456.0,
+            "84": 2109139456.0,
+            "85": 2109139456.0,
+            "86": 2109139456.0,
+            "87": 2109897728.0,
+            "88": 2109897728.0,
+            "89": 2109897728.0,
+            "90": 2109897728.0,
+            "91": 2109897728.0,
+            "92": 2109897728.0,
+            "93": 2109897728.0,
+            "94": 2109897728.0,
+            "95": 2109897728.0,
+            "96": 2109897728.0,
+            "97": 2109897728.0,
+            "98": 2109897728.0,
+            "99": 2109897728.0,
+            "100": 2109897728.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 14.94584,
+            "2": 0.28148,
+            "3": 0.23092,
+            "4": 0.2272,
+            "5": 0.21174,
+            "6": 0.2052,
+            "7": 0.2177,
+            "8": 0.20762,
+            "9": 0.21011,
+            "10": 0.20762,
+            "11": 0.20739,
+            "12": 0.20558,
+            "13": 0.20293,
+            "14": 0.20366,
+            "15": 0.2151,
+            "16": 0.20336,
+            "17": 0.211,
+            "18": 0.20107,
+            "19": 0.19975,
+            "20": 0.19946,
+            "21": 0.20167,
+            "22": 0.20546,
+            "23": 0.2079,
+            "24": 0.21407,
+            "25": 0.20322,
+            "26": 0.20113,
+            "27": 0.2036,
+            "28": 0.20193,
+            "29": 0.20351,
+            "30": 0.20276,
+            "31": 0.24088,
+            "32": 0.20552,
+            "33": 0.2062,
+            "34": 0.22507,
+            "35": 0.21674,
+            "36": 0.20224,
+            "37": 0.2024,
+            "38": 0.20522,
+            "39": 0.20019,
+            "40": 0.20848,
+            "41": 0.20633,
+            "42": 0.20422,
+            "43": 0.22047,
+            "44": 0.21076,
+            "45": 0.22033,
+            "46": 0.23288,
+            "47": 0.20066,
+            "48": 0.2262,
+            "49": 0.25589,
+            "50": 0.2006,
+            "51": 0.21639,
+            "52": 0.23518,
+            "53": 0.20634,
+            "54": 0.20906,
+            "55": 0.22297,
+            "56": 0.2742,
+            "57": 0.23575,
+            "58": 0.21113,
+            "59": 0.21965,
+            "60": 0.21956,
+            "61": 0.20714,
+            "62": 0.20897,
+            "63": 0.21858,
+            "64": 0.21079,
+            "65": 0.26753,
+            "66": 0.2086,
+            "67": 0.2478,
+            "68": 0.22097,
+            "69": 0.20663,
+            "70": 0.20836,
+            "71": 0.22856,
+            "72": 0.21708,
+            "73": 0.24693,
+            "74": 0.23784,
+            "75": 0.21364,
+            "76": 0.23055,
+            "77": 0.20122,
+            "78": 0.21746,
+            "79": 0.21857,
+            "80": 0.22508,
+            "81": 0.21322,
+            "82": 0.21041,
+            "83": 0.24051,
+            "84": 0.26987,
+            "85": 0.27857,
+            "86": 0.28871,
+            "87": 0.24894,
+            "88": 0.21388,
+            "89": 0.22289,
+            "90": 0.20477,
+            "91": 0.22651,
+            "92": 0.21738,
+            "93": 0.22137,
+            "94": 0.23367,
+            "95": 0.21527,
+            "96": 0.20516,
+            "97": 0.22856,
+            "98": 0.20431,
+            "99": 0.21662,
+            "100": 0.2101
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..c598c8c5c86
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.78091,
+            "2": 10.80272,
+            "3": 10.8036,
+            "4": 10.77566,
+            "5": 10.83259,
+            "6": 10.83704,
+            "7": 10.79793,
+            "8": 10.79364,
+            "9": 10.808,
+            "10": 10.76116,
+            "11": 10.85297,
+            "12": 10.84152,
+            "13": 10.8247,
+            "14": 10.85822,
+            "15": 10.78238,
+            "16": 10.77927,
+            "17": 10.74878,
+            "18": 10.7897,
+            "19": 10.7749,
+            "20": 10.71704,
+            "21": 10.70811,
+            "22": 10.54787,
+            "23": 10.72978,
+            "24": 10.60324,
+            "25": 10.55979,
+            "26": 10.61611,
+            "27": 10.6446,
+            "28": 10.62463,
+            "29": 10.63492,
+            "30": 10.42362,
+            "31": 10.16499,
+            "32": 10.51313,
+            "33": 10.5094,
+            "34": 10.2668,
+            "35": 10.32318,
+            "36": 10.28865,
+            "37": 10.41114,
+            "38": 10.26426,
+            "39": 10.45,
+            "40": 10.17473,
+            "41": 10.20958,
+            "42": 10.27824,
+            "43": 9.91831,
+            "44": 10.03131,
+            "45": 9.91995,
+            "46": 9.8862,
+            "47": 10.19255,
+            "48": 9.92803,
+            "49": 9.61616,
+            "50": 9.98532,
+            "51": 9.90528,
+            "52": 9.80364,
+            "53": 10.12728,
+            "54": 10.00036,
+            "55": 9.9362,
+            "56": 9.68506,
+            "57": 9.55805,
+            "58": 9.90514,
+            "59": 9.63857,
+            "60": 9.57451,
+            "61": 9.76864,
+            "62": 10.03802,
+            "63": 9.44503,
+            "64": 9.82796,
+            "65": 9.00712,
+            "66": 9.77422,
+            "67": 9.41277,
+            "68": 9.84111,
+            "69": 9.82784,
+            "70": 9.79011,
+            "71": 9.66957,
+            "72": 9.62799,
+            "73": 9.5473,
+            "74": 9.03663,
+            "75": 9.49153,
+            "76": 9.16783,
+            "77": 10.10857,
+            "78": 9.77081,
+            "79": 9.4383,
+            "80": 9.45436,
+            "81": 9.52266,
+            "82": 9.7424,
+            "83": 9.37076,
+            "84": 9.45377,
+            "85": 9.65832,
+            "86": 9.12522,
+            "87": 9.62697,
+            "88": 9.79619,
+            "89": 9.66054,
+            "90": 9.85081,
+            "91": 9.39408,
+            "92": 9.40744,
+            "93": 9.13595,
+            "94": 8.89048,
+            "95": 9.563,
+            "96": 9.5714,
+            "97": 9.34318,
+            "98": 9.73026,
+            "99": 8.95002,
+            "100": 9.4424
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 30994.0,
+            "2": 32962.0,
+            "3": 33026.0,
+            "4": 30732.0,
+            "5": 36042.0,
+            "6": 37038.0,
+            "7": 34481.0,
+            "8": 31368.0,
+            "9": 33980.0,
+            "10": 29532.0,
+            "11": 37852.0,
+            "12": 34972.0,
+            "13": 36994.0,
+            "14": 37789.0,
+            "15": 34058.0,
+            "16": 36656.0,
+            "17": 34700.0,
+            "18": 34946.0,
+            "19": 35228.0,
+            "20": 32392.0,
+            "21": 33247.0,
+            "22": 30040.0,
+            "23": 37891.0,
+            "24": 32099.0,
+            "25": 30921.0,
+            "26": 34212.0,
+            "27": 34975.0,
+            "28": 36746.0,
+            "29": 37759.0,
+            "30": 32786.0,
+            "31": 30423.0,
+            "32": 35992.0,
+            "33": 36915.0,
+            "34": 32293.0,
+            "35": 33654.0,
+            "36": 34755.0,
+            "37": 37859.0,
+            "38": 36022.0,
+            "39": 38343.0,
+            "40": 35963.0,
+            "41": 35882.0,
+            "42": 36774.0,
+            "43": 34186.0,
+            "44": 33572.0,
+            "45": 35574.0,
+            "46": 37208.0,
+            "47": 40154.0,
+            "48": 36385.0,
+            "49": 36259.0,
+            "50": 38861.0,
+            "51": 38061.0,
+            "52": 37025.0,
+            "53": 41802.0,
+            "54": 41253.0,
+            "55": 37654.0,
+            "56": 41164.0,
+            "57": 37682.0,
+            "58": 41782.0,
+            "59": 39444.0,
+            "60": 40691.0,
+            "61": 40876.0,
+            "62": 43113.0,
+            "63": 38389.0,
+            "64": 43217.0,
+            "65": 41689.0,
+            "66": 45525.0,
+            "67": 41717.0,
+            "68": 40369.0,
+            "69": 41287.0,
+            "70": 45545.0,
+            "71": 41651.0,
+            "72": 41881.0,
+            "73": 45139.0,
+            "74": 35747.0,
+            "75": 39155.0,
+            "76": 44874.0,
+            "77": 45442.0,
+            "78": 46782.0,
+            "79": 48776.0,
+            "80": 47161.0,
+            "81": 51277.0,
+            "82": 49953.0,
+            "83": 45334.0,
+            "84": 46096.0,
+            "85": 49238.0,
+            "86": 46118.0,
+            "87": 49880.0,
+            "88": 47115.0,
+            "89": 48583.0,
+            "90": 49057.0,
+            "91": 45950.0,
+            "92": 47820.0,
+            "93": 46437.0,
+            "94": 47530.0,
+            "95": 48000.0,
+            "96": 50285.0,
+            "97": 46225.0,
+            "98": 49809.0,
+            "99": 47890.0,
+            "100": 44636.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 892864512.0,
+            "2": 892868608.0,
+            "3": 892868608.0,
+            "4": 892864512.0,
+            "5": 892865024.0,
+            "6": 892866560.0,
+            "7": 892866048.0,
+            "8": 892867584.0,
+            "9": 892865536.0,
+            "10": 892867584.0,
+            "11": 892866048.0,
+            "12": 892865536.0,
+            "13": 892865536.0,
+            "14": 892868096.0,
+            "15": 892867584.0,
+            "16": 892867072.0,
+            "17": 892867584.0,
+            "18": 892869632.0,
+            "19": 892868096.0,
+            "20": 892866560.0,
+            "21": 892866560.0,
+            "22": 892863488.0,
+            "23": 892864512.0,
+            "24": 892867072.0,
+            "25": 892863488.0,
+            "26": 892866560.0,
+            "27": 892867072.0,
+            "28": 892865536.0,
+            "29": 892866048.0,
+            "30": 892863488.0,
+            "31": 892862464.0,
+            "32": 892861952.0,
+            "33": 892866048.0,
+            "34": 892865536.0,
+            "35": 892865024.0,
+            "36": 892868608.0,
+            "37": 892867072.0,
+            "38": 892866560.0,
+            "39": 892866048.0,
+            "40": 892867072.0,
+            "41": 892865536.0,
+            "42": 892867584.0,
+            "43": 892861440.0,
+            "44": 892862976.0,
+            "45": 892865024.0,
+            "46": 892864512.0,
+            "47": 892865024.0,
+            "48": 892861440.0,
+            "49": 892863488.0,
+            "50": 892867072.0,
+            "51": 892860416.0,
+            "52": 892858880.0,
+            "53": 892861440.0,
+            "54": 892861440.0,
+            "55": 892862464.0,
+            "56": 892865024.0,
+            "57": 892857344.0,
+            "58": 892859392.0,
+            "59": 892858880.0,
+            "60": 892859904.0,
+            "61": 892868608.0,
+            "62": 892865536.0,
+            "63": 892861952.0,
+            "64": 892863488.0,
+            "65": 892851712.0,
+            "66": 892866048.0,
+            "67": 892861440.0,
+            "68": 892868608.0,
+            "69": 892864512.0,
+            "70": 892866560.0,
+            "71": 892868608.0,
+            "72": 892860416.0,
+            "73": 892868096.0,
+            "74": 892858368.0,
+            "75": 892867072.0,
+            "76": 892866560.0,
+            "77": 892867072.0,
+            "78": 892863488.0,
+            "79": 892864512.0,
+            "80": 892864512.0,
+            "81": 892866048.0,
+            "82": 892864000.0,
+            "83": 892860928.0,
+            "84": 892861440.0,
+            "85": 892861952.0,
+            "86": 892861440.0,
+            "87": 892870144.0,
+            "88": 892862464.0,
+            "89": 892864512.0,
+            "90": 892866048.0,
+            "91": 892867072.0,
+            "92": 892865536.0,
+            "93": 892868608.0,
+            "94": 892864512.0,
+            "95": 892865024.0,
+            "96": 892865024.0,
+            "97": 892862976.0,
+            "98": 892867584.0,
+            "99": 892859904.0,
+            "100": 892861952.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1867566080.0,
+            "2": 2107252736.0,
+            "3": 2107252736.0,
+            "4": 2107252736.0,
+            "5": 2107481600.0,
+            "6": 2107481600.0,
+            "7": 2107481600.0,
+            "8": 2107481600.0,
+            "9": 2107481600.0,
+            "10": 2108814336.0,
+            "11": 2108814336.0,
+            "12": 2108814336.0,
+            "13": 2108814336.0,
+            "14": 2108814336.0,
+            "15": 2108814336.0,
+            "16": 2109139456.0,
+            "17": 2109139456.0,
+            "18": 2109139456.0,
+            "19": 2109139456.0,
+            "20": 2109139456.0,
+            "21": 2109139456.0,
+            "22": 2109139456.0,
+            "23": 2109139456.0,
+            "24": 2109139456.0,
+            "25": 2109139456.0,
+            "26": 2109139456.0,
+            "27": 2109139456.0,
+            "28": 2109139456.0,
+            "29": 2109139456.0,
+            "30": 2109139456.0,
+            "31": 2109139456.0,
+            "32": 2109139456.0,
+            "33": 2109139456.0,
+            "34": 2109139456.0,
+            "35": 2109139456.0,
+            "36": 2109139456.0,
+            "37": 2109139456.0,
+            "38": 2109139456.0,
+            "39": 2109139456.0,
+            "40": 2109139456.0,
+            "41": 2109139456.0,
+            "42": 2109139456.0,
+            "43": 2109139456.0,
+            "44": 2109139456.0,
+            "45": 2109139456.0,
+            "46": 2109139456.0,
+            "47": 2109139456.0,
+            "48": 2109139456.0,
+            "49": 2109139456.0,
+            "50": 2109139456.0,
+            "51": 2109139456.0,
+            "52": 2109139456.0,
+            "53": 2109139456.0,
+            "54": 2109139456.0,
+            "55": 2109139456.0,
+            "56": 2109139456.0,
+            "57": 2109139456.0,
+            "58": 2109139456.0,
+            "59": 2109139456.0,
+            "60": 2109139456.0,
+            "61": 2109139456.0,
+            "62": 2109139456.0,
+            "63": 2109139456.0,
+            "64": 2109139456.0,
+            "65": 2109139456.0,
+            "66": 2109139456.0,
+            "67": 2109139456.0,
+            "68": 2109139456.0,
+            "69": 2109139456.0,
+            "70": 2109139456.0,
+            "71": 2109139456.0,
+            "72": 2109139456.0,
+            "73": 2109139456.0,
+            "74": 2109139456.0,
+            "75": 2109139456.0,
+            "76": 2109139456.0,
+            "77": 2109139456.0,
+            "78": 2109139456.0,
+            "79": 2109139456.0,
+            "80": 2109139456.0,
+            "81": 2109139456.0,
+            "82": 2109139456.0,
+            "83": 2109139456.0,
+            "84": 2109139456.0,
+            "85": 2109139456.0,
+            "86": 2109139456.0,
+            "87": 2109897728.0,
+            "88": 2109897728.0,
+            "89": 2109897728.0,
+            "90": 2109897728.0,
+            "91": 2109897728.0,
+            "92": 2109897728.0,
+            "93": 2109897728.0,
+            "94": 2109897728.0,
+            "95": 2109897728.0,
+            "96": 2109897728.0,
+            "97": 2109897728.0,
+            "98": 2109897728.0,
+            "99": 2109897728.0,
+            "100": 2109897728.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 14.1374,
+            "2": 0.29466,
+            "3": 0.26236,
+            "4": 0.26156,
+            "5": 0.24237,
+            "6": 0.23849,
+            "7": 0.252,
+            "8": 0.24427,
+            "9": 0.24029,
+            "10": 0.23618,
+            "11": 0.23659,
+            "12": 0.23342,
+            "13": 0.23316,
+            "14": 0.23233,
+            "15": 0.24856,
+            "16": 0.23522,
+            "17": 0.24126,
+            "18": 0.22751,
+            "19": 0.2299,
+            "20": 0.23346,
+            "21": 0.23441,
+            "22": 0.22921,
+            "23": 0.23376,
+            "24": 0.23927,
+            "25": 0.23185,
+            "26": 0.23099,
+            "27": 0.22756,
+            "28": 0.2284,
+            "29": 0.22889,
+            "30": 0.23032,
+            "31": 0.26621,
+            "32": 0.23553,
+            "33": 0.23683,
+            "34": 0.25808,
+            "35": 0.23912,
+            "36": 0.23198,
+            "37": 0.23086,
+            "38": 0.23515,
+            "39": 0.2291,
+            "40": 0.24108,
+            "41": 0.23663,
+            "42": 0.23631,
+            "43": 0.23891,
+            "44": 0.23205,
+            "45": 0.24801,
+            "46": 0.2689,
+            "47": 0.23258,
+            "48": 0.25079,
+            "49": 0.26858,
+            "50": 0.2361,
+            "51": 0.27052,
+            "52": 0.26801,
+            "53": 0.23804,
+            "54": 0.23998,
+            "55": 0.25008,
+            "56": 0.29894,
+            "57": 0.26807,
+            "58": 0.23939,
+            "59": 0.24845,
+            "60": 0.24835,
+            "61": 0.24071,
+            "62": 0.23697,
+            "63": 0.25187,
+            "64": 0.24293,
+            "65": 0.31273,
+            "66": 0.23771,
+            "67": 0.28851,
+            "68": 0.25834,
+            "69": 0.24387,
+            "70": 0.23624,
+            "71": 0.26612,
+            "72": 0.25067,
+            "73": 0.28048,
+            "74": 0.26617,
+            "75": 0.24822,
+            "76": 0.26459,
+            "77": 0.23429,
+            "78": 0.24496,
+            "79": 0.24741,
+            "80": 0.25523,
+            "81": 0.2433,
+            "82": 0.23696,
+            "83": 0.2421,
+            "84": 0.24973,
+            "85": 0.24316,
+            "86": 0.25585,
+            "87": 0.23448,
+            "88": 0.23245,
+            "89": 0.25191,
+            "90": 0.23373,
+            "91": 0.25927,
+            "92": 0.24203,
+            "93": 0.25124,
+            "94": 0.26498,
+            "95": 0.24482,
+            "96": 0.23378,
+            "97": 0.25053,
+            "98": 0.23165,
+            "99": 0.24761,
+            "100": 0.23858
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
index 0ff756ea400..0938c76ab04 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
@@ -2,141 +2,536 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 10.78091,
+            "2": 10.80272,
+            "3": 10.8036,
+            "4": 10.77566,
             "5": 10.83259,
+            "6": 10.83704,
+            "7": 10.79728,
+            "8": 10.79467,
+            "9": 10.80828,
             "10": 10.76154,
+            "11": 10.85384,
+            "12": 10.84189,
+            "13": 10.82465,
+            "14": 10.85824,
             "15": 10.78235,
+            "16": 10.77923,
+            "17": 10.7484,
+            "18": 10.78919,
+            "19": 10.77567,
             "20": 10.71707,
+            "21": 10.70767,
+            "22": 10.54782,
+            "23": 10.72977,
+            "24": 10.60346,
             "25": 10.55815,
+            "26": 10.61659,
+            "27": 10.6449,
+            "28": 10.62536,
+            "29": 10.6349,
             "30": 10.42303,
+            "31": 10.16459,
+            "32": 10.51284,
+            "33": 10.50836,
+            "34": 10.2667,
             "35": 10.32353,
+            "36": 10.2895,
+            "37": 10.41051,
+            "38": 10.26406,
+            "39": 10.44988,
             "40": 10.17537,
+            "41": 10.20908,
+            "42": 10.27843,
+            "43": 9.91808,
+            "44": 10.03128,
             "45": 9.92032,
+            "46": 9.88579,
+            "47": 10.19208,
+            "48": 9.92758,
+            "49": 9.61634,
             "50": 9.98512,
+            "51": 9.90532,
+            "52": 9.8039,
+            "53": 10.12749,
+            "54": 10.00016,
             "55": 9.93664,
+            "56": 9.68581,
+            "57": 9.55837,
+            "58": 9.90508,
+            "59": 9.63839,
             "60": 9.57464,
+            "61": 9.76841,
+            "62": 10.03826,
+            "63": 9.44553,
+            "64": 9.82755,
             "65": 9.00746,
+            "66": 9.77476,
+            "67": 9.41315,
+            "68": 9.84101,
+            "69": 9.8283,
             "70": 9.79049,
+            "71": 9.66947,
+            "72": 9.62799,
+            "73": 9.54696,
+            "74": 9.03684,
             "75": 9.49167,
+            "76": 9.16779,
+            "77": 10.1088,
+            "78": 9.77072,
+            "79": 9.43806,
             "80": 9.45438,
+            "81": 9.5225,
+            "82": 9.74228,
+            "83": 9.36999,
+            "84": 9.45397,
             "85": 9.65808,
+            "86": 9.12501,
+            "87": 9.62705,
+            "88": 9.79641,
+            "89": 9.66075,
             "90": 9.8512,
+            "91": 9.39414,
+            "92": 9.40741,
+            "93": 9.13573,
+            "94": 8.89066,
             "95": 9.56273,
+            "96": 9.5712,
+            "97": 9.34355,
+            "98": 9.73013,
+            "99": 8.95039,
             "100": 9.44212
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 30994.0,
+            "2": 32962.0,
+            "3": 33026.0,
+            "4": 30732.0,
             "5": 36042.0,
+            "6": 36987.0,
+            "7": 34490.0,
+            "8": 31442.0,
+            "9": 33931.0,
             "10": 29993.0,
+            "11": 37681.0,
+            "12": 34978.0,
+            "13": 36675.0,
+            "14": 37601.0,
             "15": 34369.0,
+            "16": 36581.0,
+            "17": 34615.0,
+            "18": 34408.0,
+            "19": 35362.0,
             "20": 32532.0,
+            "21": 33181.0,
+            "22": 30426.0,
+            "23": 37807.0,
+            "24": 32299.0,
             "25": 30879.0,
+            "26": 33994.0,
+            "27": 34721.0,
+            "28": 36576.0,
+            "29": 37196.0,
             "30": 32443.0,
+            "31": 30177.0,
+            "32": 35948.0,
+            "33": 37549.0,
+            "34": 32243.0,
             "35": 33961.0,
+            "36": 34340.0,
+            "37": 37853.0,
+            "38": 35694.0,
+            "39": 38797.0,
             "40": 36317.0,
+            "41": 35380.0,
+            "42": 36704.0,
+            "43": 34045.0,
+            "44": 33691.0,
             "45": 35877.0,
+            "46": 36737.0,
+            "47": 40148.0,
+            "48": 36696.0,
+            "49": 36203.0,
             "50": 38688.0,
+            "51": 37791.0,
+            "52": 37021.0,
+            "53": 41944.0,
+            "54": 40947.0,
             "55": 37727.0,
+            "56": 40761.0,
+            "57": 37481.0,
+            "58": 41787.0,
+            "59": 39365.0,
             "60": 40922.0,
+            "61": 41100.0,
+            "62": 43388.0,
+            "63": 38269.0,
+            "64": 43526.0,
             "65": 41821.0,
+            "66": 44876.0,
+            "67": 42497.0,
+            "68": 39967.0,
+            "69": 41255.0,
             "70": 45781.0,
+            "71": 42348.0,
+            "72": 42151.0,
+            "73": 45043.0,
+            "74": 35705.0,
             "75": 39397.0,
+            "76": 45340.0,
+            "77": 45670.0,
+            "78": 46614.0,
+            "79": 49159.0,
             "80": 47317.0,
+            "81": 51048.0,
+            "82": 49312.0,
+            "83": 45257.0,
+            "84": 45494.0,
             "85": 49366.0,
+            "86": 45783.0,
+            "87": 50223.0,
+            "88": 47536.0,
+            "89": 48826.0,
             "90": 49499.0,
+            "91": 45726.0,
+            "92": 47926.0,
+            "93": 46433.0,
+            "94": 47675.0,
             "95": 47504.0,
+            "96": 50174.0,
+            "97": 46465.0,
+            "98": 49255.0,
+            "99": 48053.0,
             "100": 44507.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 1254501376.0,
+            "2": 1254505472.0,
+            "3": 1254505472.0,
+            "4": 1254501376.0,
             "5": 1254501888.0,
+            "6": 1254503424.0,
+            "7": 1254503936.0,
+            "8": 1254503936.0,
+            "9": 1254501888.0,
             "10": 1254503424.0,
+            "11": 1254503936.0,
+            "12": 1254502912.0,
+            "13": 1254500864.0,
+            "14": 1254505472.0,
             "15": 1254504448.0,
+            "16": 1254503424.0,
+            "17": 1254504448.0,
+            "18": 1254502400.0,
+            "19": 1254503936.0,
             "20": 1254503424.0,
+            "21": 1254503424.0,
+            "22": 1254501376.0,
+            "23": 1254500864.0,
+            "24": 1254503424.0,
             "25": 1254500352.0,
+            "26": 1254502400.0,
+            "27": 1254501888.0,
+            "28": 1254502912.0,
+            "29": 1254505472.0,
             "30": 1254500352.0,
+            "31": 1254499328.0,
+            "32": 1254500352.0,
+            "33": 1254502912.0,
+            "34": 1254502912.0,
             "35": 1254501888.0,
+            "36": 1254505472.0,
+            "37": 1254503424.0,
+            "38": 1254503936.0,
+            "39": 1254502912.0,
             "40": 1254502912.0,
+            "41": 1254503424.0,
+            "42": 1254502912.0,
+            "43": 1254499840.0,
+            "44": 1254501376.0,
             "45": 1254502400.0,
+            "46": 1254500864.0,
+            "47": 1254503936.0,
+            "48": 1254499840.0,
+            "49": 1254500352.0,
             "50": 1254502912.0,
+            "51": 1254496768.0,
+            "52": 1254496256.0,
+            "53": 1254497792.0,
+            "54": 1254498304.0,
             "55": 1254500352.0,
+            "56": 1254501888.0,
+            "57": 1254493184.0,
+            "58": 1254498304.0,
+            "59": 1254495232.0,
             "60": 1254496768.0,
+            "61": 1254504960.0,
+            "62": 1254503936.0,
+            "63": 1254499328.0,
+            "64": 1254498816.0,
             "65": 1254488576.0,
+            "66": 1254502912.0,
+            "67": 1254498304.0,
+            "68": 1254505984.0,
+            "69": 1254501376.0,
             "70": 1254502912.0,
+            "71": 1254504960.0,
+            "72": 1254496256.0,
+            "73": 1254504448.0,
+            "74": 1254495232.0,
             "75": 1254504448.0,
+            "76": 1254503424.0,
+            "77": 1254503936.0,
+            "78": 1254500352.0,
+            "79": 1254500864.0,
             "80": 1254499840.0,
+            "81": 1254503424.0,
+            "82": 1254500352.0,
+            "83": 1254497792.0,
+            "84": 1254497280.0,
             "85": 1254499328.0,
+            "86": 1254498816.0,
+            "87": 1254505472.0,
+            "88": 1254499328.0,
+            "89": 1254500864.0,
             "90": 1254502912.0,
+            "91": 1254505472.0,
+            "92": 1254502912.0,
+            "93": 1254505472.0,
+            "94": 1254500352.0,
             "95": 1254501888.0,
+            "96": 1254501888.0,
+            "97": 1254499328.0,
+            "98": 1254507520.0,
+            "99": 1254497280.0,
             "100": 1254499840.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 1987779584.0,
+            "2": 2468141568.0,
+            "3": 2468920320.0,
+            "4": 2468920320.0,
             "5": 2468920320.0,
+            "6": 2468920320.0,
+            "7": 2468920320.0,
+            "8": 2468920320.0,
+            "9": 2469234688.0,
             "10": 2469234688.0,
+            "11": 2469234688.0,
+            "12": 2469234688.0,
+            "13": 2469234688.0,
+            "14": 2469234688.0,
             "15": 2469234688.0,
+            "16": 2469234688.0,
+            "17": 2469234688.0,
+            "18": 2469234688.0,
+            "19": 2469234688.0,
             "20": 2469234688.0,
+            "21": 2469234688.0,
+            "22": 2469234688.0,
+            "23": 2469234688.0,
+            "24": 2469234688.0,
             "25": 2469234688.0,
+            "26": 2469234688.0,
+            "27": 2469234688.0,
+            "28": 2469234688.0,
+            "29": 2469234688.0,
             "30": 2469234688.0,
+            "31": 2469234688.0,
+            "32": 2469234688.0,
+            "33": 2469234688.0,
+            "34": 2469234688.0,
             "35": 2469234688.0,
+            "36": 2469234688.0,
+            "37": 2469234688.0,
+            "38": 2469234688.0,
+            "39": 2469234688.0,
             "40": 2469234688.0,
+            "41": 2469234688.0,
+            "42": 2469234688.0,
+            "43": 2469234688.0,
+            "44": 2469234688.0,
             "45": 2469234688.0,
+            "46": 2469234688.0,
+            "47": 2469234688.0,
+            "48": 2469234688.0,
+            "49": 2469234688.0,
             "50": 2469234688.0,
+            "51": 2469234688.0,
+            "52": 2469234688.0,
+            "53": 2469234688.0,
+            "54": 2469234688.0,
             "55": 2469234688.0,
+            "56": 2469234688.0,
+            "57": 2469234688.0,
+            "58": 2469234688.0,
+            "59": 2469234688.0,
             "60": 2469234688.0,
+            "61": 2469234688.0,
+            "62": 2469234688.0,
+            "63": 2469234688.0,
+            "64": 2469234688.0,
             "65": 2469234688.0,
+            "66": 2469234688.0,
+            "67": 2469234688.0,
+            "68": 2469234688.0,
+            "69": 2469234688.0,
             "70": 2469234688.0,
+            "71": 2469234688.0,
+            "72": 2469234688.0,
+            "73": 2469234688.0,
+            "74": 2469234688.0,
             "75": 2469234688.0,
+            "76": 2471084032.0,
+            "77": 2471084032.0,
+            "78": 2471084032.0,
+            "79": 2471084032.0,
             "80": 2471084032.0,
+            "81": 2471084032.0,
+            "82": 2471084032.0,
+            "83": 2471084032.0,
+            "84": 2471084032.0,
             "85": 2471084032.0,
+            "86": 2471084032.0,
+            "87": 2471084032.0,
+            "88": 2471084032.0,
+            "89": 2471084032.0,
             "90": 2471084032.0,
+            "91": 2471084032.0,
+            "92": 2471084032.0,
+            "93": 2471084032.0,
+            "94": 2471084032.0,
             "95": 2471084032.0,
+            "96": 2471084032.0,
+            "97": 2471084032.0,
+            "98": 2471084032.0,
+            "99": 2471084032.0,
             "100": 2471084032.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 14.69201,
-            "5": 0.22243,
-            "10": 0.21151,
-            "15": 0.23075,
-            "20": 0.20988,
-            "25": 0.20888,
-            "30": 0.20701,
-            "35": 0.21011,
-            "40": 0.23615,
-            "45": 0.23553,
-            "50": 0.21576,
-            "55": 0.22099,
-            "60": 0.21927,
-            "65": 0.27911,
-            "70": 0.2143,
-            "75": 0.22985,
-            "80": 0.22209,
-            "85": 0.21722,
-            "90": 0.21557,
-            "95": 0.22417,
-            "100": 0.21151
+            "1": 16.45406,
+            "2": 0.30376,
+            "3": 0.27406,
+            "4": 0.26359,
+            "5": 0.25039,
+            "6": 0.25242,
+            "7": 0.26015,
+            "8": 0.2474,
+            "9": 0.25416,
+            "10": 0.2407,
+            "11": 0.24653,
+            "12": 0.23844,
+            "13": 0.2391,
+            "14": 0.2434,
+            "15": 0.25985,
+            "16": 0.24412,
+            "17": 0.25323,
+            "18": 0.24184,
+            "19": 0.23932,
+            "20": 0.23754,
+            "21": 0.23862,
+            "22": 0.24163,
+            "23": 0.24143,
+            "24": 0.23752,
+            "25": 0.23707,
+            "26": 0.24138,
+            "27": 0.23747,
+            "28": 0.2399,
+            "29": 0.2399,
+            "30": 0.24117,
+            "31": 0.28742,
+            "32": 0.24862,
+            "33": 0.24794,
+            "34": 0.28035,
+            "35": 0.24832,
+            "36": 0.24669,
+            "37": 0.23974,
+            "38": 0.25045,
+            "39": 0.239,
+            "40": 0.26253,
+            "41": 0.24423,
+            "42": 0.25718,
+            "43": 0.25559,
+            "44": 0.24336,
+            "45": 0.27381,
+            "46": 0.27372,
+            "47": 0.24664,
+            "48": 0.25954,
+            "49": 0.30788,
+            "50": 0.25811,
+            "51": 0.26735,
+            "52": 0.27368,
+            "53": 0.24833,
+            "54": 0.24973,
+            "55": 0.25579,
+            "56": 0.30268,
+            "57": 0.26237,
+            "58": 0.24805,
+            "59": 0.25916,
+            "60": 0.25631,
+            "61": 0.54796,
+            "62": 0.24754,
+            "63": 0.27021,
+            "64": 0.25819,
+            "65": 0.32296,
+            "66": 0.2505,
+            "67": 0.30141,
+            "68": 0.26641,
+            "69": 0.24765,
+            "70": 0.2537,
+            "71": 0.26961,
+            "72": 0.25601,
+            "73": 0.27973,
+            "74": 0.27306,
+            "75": 0.25761,
+            "76": 0.27858,
+            "77": 0.24804,
+            "78": 0.26307,
+            "79": 0.25987,
+            "80": 0.26126,
+            "81": 0.25077,
+            "82": 0.24475,
+            "83": 0.25581,
+            "84": 0.267,
+            "85": 0.25176,
+            "86": 0.2659,
+            "87": 0.24692,
+            "88": 0.24749,
+            "89": 0.26384,
+            "90": 0.24272,
+            "91": 0.26651,
+            "92": 0.25574,
+            "93": 0.26453,
+            "94": 0.27259,
+            "95": 0.25268,
+            "96": 0.24969,
+            "97": 0.2596,
+            "98": 0.24136,
+            "99": 0.25695,
+            "100": 0.25268
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..025cf16fd46
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.78091,
+            "2": 10.80272,
+            "3": 10.8036,
+            "4": 10.77566,
+            "5": 10.83259,
+            "6": 10.83704,
+            "7": 10.79728,
+            "8": 10.79467,
+            "9": 10.80828,
+            "10": 10.76154,
+            "11": 10.85384,
+            "12": 10.84189,
+            "13": 10.82465,
+            "14": 10.85824,
+            "15": 10.78235,
+            "16": 10.77923,
+            "17": 10.7484,
+            "18": 10.78919,
+            "19": 10.77567,
+            "20": 10.71707,
+            "21": 10.70767,
+            "22": 10.54782,
+            "23": 10.72977,
+            "24": 10.60346,
+            "25": 10.55815,
+            "26": 10.61659,
+            "27": 10.6449,
+            "28": 10.62536,
+            "29": 10.6349,
+            "30": 10.42303,
+            "31": 10.16459,
+            "32": 10.51284,
+            "33": 10.50836,
+            "34": 10.2667,
+            "35": 10.32353,
+            "36": 10.2895,
+            "37": 10.41051,
+            "38": 10.26406,
+            "39": 10.44988,
+            "40": 10.17537,
+            "41": 10.20908,
+            "42": 10.27843,
+            "43": 9.91808,
+            "44": 10.03128,
+            "45": 9.92032,
+            "46": 9.88579,
+            "47": 10.19208,
+            "48": 9.92758,
+            "49": 9.61634,
+            "50": 9.98512,
+            "51": 9.90532,
+            "52": 9.8039,
+            "53": 10.12749,
+            "54": 10.00016,
+            "55": 9.93664,
+            "56": 9.68581,
+            "57": 9.55837,
+            "58": 9.90508,
+            "59": 9.63839,
+            "60": 9.57464,
+            "61": 9.76841,
+            "62": 10.03826,
+            "63": 9.44553,
+            "64": 9.82755,
+            "65": 9.00746,
+            "66": 9.77476,
+            "67": 9.41315,
+            "68": 9.84101,
+            "69": 9.8283,
+            "70": 9.79049,
+            "71": 9.66947,
+            "72": 9.62799,
+            "73": 9.54696,
+            "74": 9.03684,
+            "75": 9.49167,
+            "76": 9.16779,
+            "77": 10.1088,
+            "78": 9.77072,
+            "79": 9.43806,
+            "80": 9.45438,
+            "81": 9.5225,
+            "82": 9.74228,
+            "83": 9.36999,
+            "84": 9.45397,
+            "85": 9.65808,
+            "86": 9.12501,
+            "87": 9.62705,
+            "88": 9.79641,
+            "89": 9.66075,
+            "90": 9.8512,
+            "91": 9.39414,
+            "92": 9.40741,
+            "93": 9.13573,
+            "94": 8.89066,
+            "95": 9.56273,
+            "96": 9.5712,
+            "97": 9.34355,
+            "98": 9.73013,
+            "99": 8.95039,
+            "100": 9.44212
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 30994.0,
+            "2": 32962.0,
+            "3": 33026.0,
+            "4": 30732.0,
+            "5": 36042.0,
+            "6": 36987.0,
+            "7": 34490.0,
+            "8": 31442.0,
+            "9": 33931.0,
+            "10": 29993.0,
+            "11": 37681.0,
+            "12": 34978.0,
+            "13": 36675.0,
+            "14": 37601.0,
+            "15": 34369.0,
+            "16": 36581.0,
+            "17": 34615.0,
+            "18": 34408.0,
+            "19": 35362.0,
+            "20": 32532.0,
+            "21": 33181.0,
+            "22": 30426.0,
+            "23": 37807.0,
+            "24": 32299.0,
+            "25": 30879.0,
+            "26": 33994.0,
+            "27": 34721.0,
+            "28": 36576.0,
+            "29": 37196.0,
+            "30": 32443.0,
+            "31": 30177.0,
+            "32": 35948.0,
+            "33": 37549.0,
+            "34": 32243.0,
+            "35": 33961.0,
+            "36": 34340.0,
+            "37": 37853.0,
+            "38": 35694.0,
+            "39": 38797.0,
+            "40": 36317.0,
+            "41": 35380.0,
+            "42": 36704.0,
+            "43": 34045.0,
+            "44": 33691.0,
+            "45": 35877.0,
+            "46": 36737.0,
+            "47": 40148.0,
+            "48": 36696.0,
+            "49": 36203.0,
+            "50": 38688.0,
+            "51": 37791.0,
+            "52": 37021.0,
+            "53": 41944.0,
+            "54": 40947.0,
+            "55": 37727.0,
+            "56": 40761.0,
+            "57": 37481.0,
+            "58": 41787.0,
+            "59": 39365.0,
+            "60": 40922.0,
+            "61": 41100.0,
+            "62": 43388.0,
+            "63": 38269.0,
+            "64": 43526.0,
+            "65": 41821.0,
+            "66": 44876.0,
+            "67": 42497.0,
+            "68": 39967.0,
+            "69": 41255.0,
+            "70": 45781.0,
+            "71": 42348.0,
+            "72": 42151.0,
+            "73": 45043.0,
+            "74": 35705.0,
+            "75": 39397.0,
+            "76": 45340.0,
+            "77": 45670.0,
+            "78": 46614.0,
+            "79": 49159.0,
+            "80": 47317.0,
+            "81": 51048.0,
+            "82": 49312.0,
+            "83": 45257.0,
+            "84": 45494.0,
+            "85": 49366.0,
+            "86": 45783.0,
+            "87": 50223.0,
+            "88": 47536.0,
+            "89": 48826.0,
+            "90": 49499.0,
+            "91": 45726.0,
+            "92": 47926.0,
+            "93": 46433.0,
+            "94": 47675.0,
+            "95": 47504.0,
+            "96": 50174.0,
+            "97": 46465.0,
+            "98": 49255.0,
+            "99": 48053.0,
+            "100": 44507.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1254501376.0,
+            "2": 1254505472.0,
+            "3": 1254505472.0,
+            "4": 1254501376.0,
+            "5": 1254501888.0,
+            "6": 1254503424.0,
+            "7": 1254503936.0,
+            "8": 1254503936.0,
+            "9": 1254501888.0,
+            "10": 1254503424.0,
+            "11": 1254503936.0,
+            "12": 1254502912.0,
+            "13": 1254500864.0,
+            "14": 1254505472.0,
+            "15": 1254504448.0,
+            "16": 1254503424.0,
+            "17": 1254504448.0,
+            "18": 1254502400.0,
+            "19": 1254503936.0,
+            "20": 1254503424.0,
+            "21": 1254503424.0,
+            "22": 1254501376.0,
+            "23": 1254500864.0,
+            "24": 1254503424.0,
+            "25": 1254500352.0,
+            "26": 1254502400.0,
+            "27": 1254501888.0,
+            "28": 1254502912.0,
+            "29": 1254505472.0,
+            "30": 1254500352.0,
+            "31": 1254499328.0,
+            "32": 1254500352.0,
+            "33": 1254502912.0,
+            "34": 1254502912.0,
+            "35": 1254501888.0,
+            "36": 1254505472.0,
+            "37": 1254503424.0,
+            "38": 1254503936.0,
+            "39": 1254502912.0,
+            "40": 1254502912.0,
+            "41": 1254503424.0,
+            "42": 1254502912.0,
+            "43": 1254499840.0,
+            "44": 1254501376.0,
+            "45": 1254502400.0,
+            "46": 1254500864.0,
+            "47": 1254503936.0,
+            "48": 1254499840.0,
+            "49": 1254500352.0,
+            "50": 1254502912.0,
+            "51": 1254496768.0,
+            "52": 1254496256.0,
+            "53": 1254497792.0,
+            "54": 1254498304.0,
+            "55": 1254500352.0,
+            "56": 1254501888.0,
+            "57": 1254493184.0,
+            "58": 1254498304.0,
+            "59": 1254495232.0,
+            "60": 1254496768.0,
+            "61": 1254504960.0,
+            "62": 1254503936.0,
+            "63": 1254499328.0,
+            "64": 1254498816.0,
+            "65": 1254488576.0,
+            "66": 1254502912.0,
+            "67": 1254498304.0,
+            "68": 1254505984.0,
+            "69": 1254501376.0,
+            "70": 1254502912.0,
+            "71": 1254504960.0,
+            "72": 1254496256.0,
+            "73": 1254504448.0,
+            "74": 1254495232.0,
+            "75": 1254504448.0,
+            "76": 1254503424.0,
+            "77": 1254503936.0,
+            "78": 1254500352.0,
+            "79": 1254500864.0,
+            "80": 1254499840.0,
+            "81": 1254503424.0,
+            "82": 1254500352.0,
+            "83": 1254497792.0,
+            "84": 1254497280.0,
+            "85": 1254499328.0,
+            "86": 1254498816.0,
+            "87": 1254505472.0,
+            "88": 1254499328.0,
+            "89": 1254500864.0,
+            "90": 1254502912.0,
+            "91": 1254505472.0,
+            "92": 1254502912.0,
+            "93": 1254505472.0,
+            "94": 1254500352.0,
+            "95": 1254501888.0,
+            "96": 1254501888.0,
+            "97": 1254499328.0,
+            "98": 1254507520.0,
+            "99": 1254497280.0,
+            "100": 1254499840.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1987779584.0,
+            "2": 2468141568.0,
+            "3": 2468920320.0,
+            "4": 2468920320.0,
+            "5": 2468920320.0,
+            "6": 2468920320.0,
+            "7": 2468920320.0,
+            "8": 2468920320.0,
+            "9": 2469234688.0,
+            "10": 2469234688.0,
+            "11": 2469234688.0,
+            "12": 2469234688.0,
+            "13": 2469234688.0,
+            "14": 2469234688.0,
+            "15": 2469234688.0,
+            "16": 2469234688.0,
+            "17": 2469234688.0,
+            "18": 2469234688.0,
+            "19": 2469234688.0,
+            "20": 2469234688.0,
+            "21": 2469234688.0,
+            "22": 2469234688.0,
+            "23": 2469234688.0,
+            "24": 2469234688.0,
+            "25": 2469234688.0,
+            "26": 2469234688.0,
+            "27": 2469234688.0,
+            "28": 2469234688.0,
+            "29": 2469234688.0,
+            "30": 2469234688.0,
+            "31": 2469234688.0,
+            "32": 2469234688.0,
+            "33": 2469234688.0,
+            "34": 2469234688.0,
+            "35": 2469234688.0,
+            "36": 2469234688.0,
+            "37": 2469234688.0,
+            "38": 2469234688.0,
+            "39": 2469234688.0,
+            "40": 2469234688.0,
+            "41": 2469234688.0,
+            "42": 2469234688.0,
+            "43": 2469234688.0,
+            "44": 2469234688.0,
+            "45": 2469234688.0,
+            "46": 2469234688.0,
+            "47": 2469234688.0,
+            "48": 2469234688.0,
+            "49": 2469234688.0,
+            "50": 2469234688.0,
+            "51": 2469234688.0,
+            "52": 2469234688.0,
+            "53": 2469234688.0,
+            "54": 2469234688.0,
+            "55": 2469234688.0,
+            "56": 2469234688.0,
+            "57": 2469234688.0,
+            "58": 2469234688.0,
+            "59": 2469234688.0,
+            "60": 2469234688.0,
+            "61": 2469234688.0,
+            "62": 2469234688.0,
+            "63": 2469234688.0,
+            "64": 2469234688.0,
+            "65": 2469234688.0,
+            "66": 2469234688.0,
+            "67": 2469234688.0,
+            "68": 2469234688.0,
+            "69": 2469234688.0,
+            "70": 2469234688.0,
+            "71": 2469234688.0,
+            "72": 2469234688.0,
+            "73": 2469234688.0,
+            "74": 2469234688.0,
+            "75": 2469234688.0,
+            "76": 2471084032.0,
+            "77": 2471084032.0,
+            "78": 2471084032.0,
+            "79": 2471084032.0,
+            "80": 2471084032.0,
+            "81": 2471084032.0,
+            "82": 2471084032.0,
+            "83": 2471084032.0,
+            "84": 2471084032.0,
+            "85": 2471084032.0,
+            "86": 2471084032.0,
+            "87": 2471084032.0,
+            "88": 2471084032.0,
+            "89": 2471084032.0,
+            "90": 2471084032.0,
+            "91": 2471084032.0,
+            "92": 2471084032.0,
+            "93": 2471084032.0,
+            "94": 2471084032.0,
+            "95": 2471084032.0,
+            "96": 2471084032.0,
+            "97": 2471084032.0,
+            "98": 2471084032.0,
+            "99": 2471084032.0,
+            "100": 2471084032.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 15.17389,
+            "2": 0.29264,
+            "3": 0.24602,
+            "4": 0.24527,
+            "5": 0.22453,
+            "6": 0.22311,
+            "7": 0.23274,
+            "8": 0.2252,
+            "9": 0.22875,
+            "10": 0.21336,
+            "11": 0.21953,
+            "12": 0.21057,
+            "13": 0.21762,
+            "14": 0.22015,
+            "15": 0.22934,
+            "16": 0.21241,
+            "17": 0.22416,
+            "18": 0.21545,
+            "19": 0.21467,
+            "20": 0.21475,
+            "21": 0.21061,
+            "22": 0.21275,
+            "23": 0.21475,
+            "24": 0.21185,
+            "25": 0.21253,
+            "26": 0.2112,
+            "27": 0.21285,
+            "28": 0.2167,
+            "29": 0.20854,
+            "30": 0.21576,
+            "31": 0.23787,
+            "32": 0.21289,
+            "33": 0.22111,
+            "34": 0.23768,
+            "35": 0.2106,
+            "36": 0.22199,
+            "37": 0.21758,
+            "38": 0.21584,
+            "39": 0.21031,
+            "40": 0.2149,
+            "41": 0.21829,
+            "42": 0.2324,
+            "43": 0.21985,
+            "44": 0.21241,
+            "45": 0.23011,
+            "46": 0.23336,
+            "47": 0.21312,
+            "48": 0.2234,
+            "49": 0.24557,
+            "50": 0.21111,
+            "51": 0.25988,
+            "52": 0.23849,
+            "53": 0.21639,
+            "54": 0.21699,
+            "55": 0.22888,
+            "56": 0.30406,
+            "57": 0.23464,
+            "58": 0.23245,
+            "59": 0.22402,
+            "60": 0.22789,
+            "61": 0.21859,
+            "62": 0.21793,
+            "63": 0.25413,
+            "64": 0.23301,
+            "65": 0.2935,
+            "66": 0.22039,
+            "67": 0.3074,
+            "68": 0.2458,
+            "69": 0.21734,
+            "70": 0.21543,
+            "71": 0.23323,
+            "72": 0.22846,
+            "73": 0.25747,
+            "74": 0.23067,
+            "75": 0.21956,
+            "76": 0.24584,
+            "77": 0.222,
+            "78": 0.22595,
+            "79": 0.23137,
+            "80": 0.22335,
+            "81": 0.22154,
+            "82": 0.21547,
+            "83": 0.22443,
+            "84": 0.22286,
+            "85": 0.22074,
+            "86": 0.2341,
+            "87": 0.21707,
+            "88": 0.21529,
+            "89": 0.2232,
+            "90": 0.21712,
+            "91": 0.23519,
+            "92": 0.22408,
+            "93": 0.23443,
+            "94": 0.24578,
+            "95": 0.22228,
+            "96": 0.21797,
+            "97": 0.22197,
+            "98": 0.21363,
+            "99": 0.22332,
+            "100": 0.22233
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..1a09e73e300
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.78091,
+            "2": 10.80272,
+            "3": 10.8036,
+            "4": 10.77566,
+            "5": 10.83259,
+            "6": 10.83704,
+            "7": 10.79728,
+            "8": 10.79467,
+            "9": 10.80828,
+            "10": 10.76154,
+            "11": 10.85384,
+            "12": 10.84189,
+            "13": 10.82465,
+            "14": 10.85824,
+            "15": 10.78235,
+            "16": 10.77923,
+            "17": 10.7484,
+            "18": 10.78919,
+            "19": 10.77567,
+            "20": 10.71707,
+            "21": 10.70767,
+            "22": 10.54782,
+            "23": 10.72977,
+            "24": 10.60346,
+            "25": 10.55815,
+            "26": 10.61659,
+            "27": 10.6449,
+            "28": 10.62536,
+            "29": 10.6349,
+            "30": 10.42303,
+            "31": 10.16459,
+            "32": 10.51284,
+            "33": 10.50836,
+            "34": 10.2667,
+            "35": 10.32353,
+            "36": 10.2895,
+            "37": 10.41051,
+            "38": 10.26406,
+            "39": 10.44988,
+            "40": 10.17537,
+            "41": 10.20908,
+            "42": 10.27843,
+            "43": 9.91808,
+            "44": 10.03128,
+            "45": 9.92032,
+            "46": 9.88579,
+            "47": 10.19208,
+            "48": 9.92758,
+            "49": 9.61634,
+            "50": 9.98512,
+            "51": 9.90532,
+            "52": 9.8039,
+            "53": 10.12749,
+            "54": 10.00016,
+            "55": 9.93664,
+            "56": 9.68581,
+            "57": 9.55837,
+            "58": 9.90508,
+            "59": 9.63839,
+            "60": 9.57464,
+            "61": 9.76841,
+            "62": 10.03826,
+            "63": 9.44553,
+            "64": 9.82755,
+            "65": 9.00746,
+            "66": 9.77476,
+            "67": 9.41315,
+            "68": 9.84101,
+            "69": 9.8283,
+            "70": 9.79049,
+            "71": 9.66947,
+            "72": 9.62799,
+            "73": 9.54696,
+            "74": 9.03684,
+            "75": 9.49167,
+            "76": 9.16779,
+            "77": 10.1088,
+            "78": 9.77072,
+            "79": 9.43806,
+            "80": 9.45438,
+            "81": 9.5225,
+            "82": 9.74228,
+            "83": 9.36999,
+            "84": 9.45397,
+            "85": 9.65808,
+            "86": 9.12501,
+            "87": 9.62705,
+            "88": 9.79641,
+            "89": 9.66075,
+            "90": 9.8512,
+            "91": 9.39414,
+            "92": 9.40741,
+            "93": 9.13573,
+            "94": 8.89066,
+            "95": 9.56273,
+            "96": 9.5712,
+            "97": 9.34355,
+            "98": 9.73013,
+            "99": 8.95039,
+            "100": 9.44212
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 30994.0,
+            "2": 32962.0,
+            "3": 33026.0,
+            "4": 30732.0,
+            "5": 36042.0,
+            "6": 36987.0,
+            "7": 34490.0,
+            "8": 31442.0,
+            "9": 33931.0,
+            "10": 29993.0,
+            "11": 37681.0,
+            "12": 34978.0,
+            "13": 36675.0,
+            "14": 37601.0,
+            "15": 34369.0,
+            "16": 36581.0,
+            "17": 34615.0,
+            "18": 34408.0,
+            "19": 35362.0,
+            "20": 32532.0,
+            "21": 33181.0,
+            "22": 30426.0,
+            "23": 37807.0,
+            "24": 32299.0,
+            "25": 30879.0,
+            "26": 33994.0,
+            "27": 34721.0,
+            "28": 36576.0,
+            "29": 37196.0,
+            "30": 32443.0,
+            "31": 30177.0,
+            "32": 35948.0,
+            "33": 37549.0,
+            "34": 32243.0,
+            "35": 33961.0,
+            "36": 34340.0,
+            "37": 37853.0,
+            "38": 35694.0,
+            "39": 38797.0,
+            "40": 36317.0,
+            "41": 35380.0,
+            "42": 36704.0,
+            "43": 34045.0,
+            "44": 33691.0,
+            "45": 35877.0,
+            "46": 36737.0,
+            "47": 40148.0,
+            "48": 36696.0,
+            "49": 36203.0,
+            "50": 38688.0,
+            "51": 37791.0,
+            "52": 37021.0,
+            "53": 41944.0,
+            "54": 40947.0,
+            "55": 37727.0,
+            "56": 40761.0,
+            "57": 37481.0,
+            "58": 41787.0,
+            "59": 39365.0,
+            "60": 40922.0,
+            "61": 41100.0,
+            "62": 43388.0,
+            "63": 38269.0,
+            "64": 43526.0,
+            "65": 41821.0,
+            "66": 44876.0,
+            "67": 42497.0,
+            "68": 39967.0,
+            "69": 41255.0,
+            "70": 45781.0,
+            "71": 42348.0,
+            "72": 42151.0,
+            "73": 45043.0,
+            "74": 35705.0,
+            "75": 39397.0,
+            "76": 45340.0,
+            "77": 45670.0,
+            "78": 46614.0,
+            "79": 49159.0,
+            "80": 47317.0,
+            "81": 51048.0,
+            "82": 49312.0,
+            "83": 45257.0,
+            "84": 45494.0,
+            "85": 49366.0,
+            "86": 45783.0,
+            "87": 50223.0,
+            "88": 47536.0,
+            "89": 48826.0,
+            "90": 49499.0,
+            "91": 45726.0,
+            "92": 47926.0,
+            "93": 46433.0,
+            "94": 47675.0,
+            "95": 47504.0,
+            "96": 50174.0,
+            "97": 46465.0,
+            "98": 49255.0,
+            "99": 48053.0,
+            "100": 44507.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1254501376.0,
+            "2": 1254505472.0,
+            "3": 1254505472.0,
+            "4": 1254501376.0,
+            "5": 1254501888.0,
+            "6": 1254503424.0,
+            "7": 1254503936.0,
+            "8": 1254503936.0,
+            "9": 1254501888.0,
+            "10": 1254503424.0,
+            "11": 1254503936.0,
+            "12": 1254502912.0,
+            "13": 1254500864.0,
+            "14": 1254505472.0,
+            "15": 1254504448.0,
+            "16": 1254503424.0,
+            "17": 1254504448.0,
+            "18": 1254502400.0,
+            "19": 1254503936.0,
+            "20": 1254503424.0,
+            "21": 1254503424.0,
+            "22": 1254501376.0,
+            "23": 1254500864.0,
+            "24": 1254503424.0,
+            "25": 1254500352.0,
+            "26": 1254502400.0,
+            "27": 1254501888.0,
+            "28": 1254502912.0,
+            "29": 1254505472.0,
+            "30": 1254500352.0,
+            "31": 1254499328.0,
+            "32": 1254500352.0,
+            "33": 1254502912.0,
+            "34": 1254502912.0,
+            "35": 1254501888.0,
+            "36": 1254505472.0,
+            "37": 1254503424.0,
+            "38": 1254503936.0,
+            "39": 1254502912.0,
+            "40": 1254502912.0,
+            "41": 1254503424.0,
+            "42": 1254502912.0,
+            "43": 1254499840.0,
+            "44": 1254501376.0,
+            "45": 1254502400.0,
+            "46": 1254500864.0,
+            "47": 1254503936.0,
+            "48": 1254499840.0,
+            "49": 1254500352.0,
+            "50": 1254502912.0,
+            "51": 1254496768.0,
+            "52": 1254496256.0,
+            "53": 1254497792.0,
+            "54": 1254498304.0,
+            "55": 1254500352.0,
+            "56": 1254501888.0,
+            "57": 1254493184.0,
+            "58": 1254498304.0,
+            "59": 1254495232.0,
+            "60": 1254496768.0,
+            "61": 1254504960.0,
+            "62": 1254503936.0,
+            "63": 1254499328.0,
+            "64": 1254498816.0,
+            "65": 1254488576.0,
+            "66": 1254502912.0,
+            "67": 1254498304.0,
+            "68": 1254505984.0,
+            "69": 1254501376.0,
+            "70": 1254502912.0,
+            "71": 1254504960.0,
+            "72": 1254496256.0,
+            "73": 1254504448.0,
+            "74": 1254495232.0,
+            "75": 1254504448.0,
+            "76": 1254503424.0,
+            "77": 1254503936.0,
+            "78": 1254500352.0,
+            "79": 1254500864.0,
+            "80": 1254499840.0,
+            "81": 1254503424.0,
+            "82": 1254500352.0,
+            "83": 1254497792.0,
+            "84": 1254497280.0,
+            "85": 1254499328.0,
+            "86": 1254498816.0,
+            "87": 1254505472.0,
+            "88": 1254499328.0,
+            "89": 1254500864.0,
+            "90": 1254502912.0,
+            "91": 1254505472.0,
+            "92": 1254502912.0,
+            "93": 1254505472.0,
+            "94": 1254500352.0,
+            "95": 1254501888.0,
+            "96": 1254501888.0,
+            "97": 1254499328.0,
+            "98": 1254507520.0,
+            "99": 1254497280.0,
+            "100": 1254499840.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1987779584.0,
+            "2": 2468141568.0,
+            "3": 2468920320.0,
+            "4": 2468920320.0,
+            "5": 2468920320.0,
+            "6": 2468920320.0,
+            "7": 2468920320.0,
+            "8": 2468920320.0,
+            "9": 2469234688.0,
+            "10": 2469234688.0,
+            "11": 2469234688.0,
+            "12": 2469234688.0,
+            "13": 2469234688.0,
+            "14": 2469234688.0,
+            "15": 2469234688.0,
+            "16": 2469234688.0,
+            "17": 2469234688.0,
+            "18": 2469234688.0,
+            "19": 2469234688.0,
+            "20": 2469234688.0,
+            "21": 2469234688.0,
+            "22": 2469234688.0,
+            "23": 2469234688.0,
+            "24": 2469234688.0,
+            "25": 2469234688.0,
+            "26": 2469234688.0,
+            "27": 2469234688.0,
+            "28": 2469234688.0,
+            "29": 2469234688.0,
+            "30": 2469234688.0,
+            "31": 2469234688.0,
+            "32": 2469234688.0,
+            "33": 2469234688.0,
+            "34": 2469234688.0,
+            "35": 2469234688.0,
+            "36": 2469234688.0,
+            "37": 2469234688.0,
+            "38": 2469234688.0,
+            "39": 2469234688.0,
+            "40": 2469234688.0,
+            "41": 2469234688.0,
+            "42": 2469234688.0,
+            "43": 2469234688.0,
+            "44": 2469234688.0,
+            "45": 2469234688.0,
+            "46": 2469234688.0,
+            "47": 2469234688.0,
+            "48": 2469234688.0,
+            "49": 2469234688.0,
+            "50": 2469234688.0,
+            "51": 2469234688.0,
+            "52": 2469234688.0,
+            "53": 2469234688.0,
+            "54": 2469234688.0,
+            "55": 2469234688.0,
+            "56": 2469234688.0,
+            "57": 2469234688.0,
+            "58": 2469234688.0,
+            "59": 2469234688.0,
+            "60": 2469234688.0,
+            "61": 2469234688.0,
+            "62": 2469234688.0,
+            "63": 2469234688.0,
+            "64": 2469234688.0,
+            "65": 2469234688.0,
+            "66": 2469234688.0,
+            "67": 2469234688.0,
+            "68": 2469234688.0,
+            "69": 2469234688.0,
+            "70": 2469234688.0,
+            "71": 2469234688.0,
+            "72": 2469234688.0,
+            "73": 2469234688.0,
+            "74": 2469234688.0,
+            "75": 2469234688.0,
+            "76": 2471084032.0,
+            "77": 2471084032.0,
+            "78": 2471084032.0,
+            "79": 2471084032.0,
+            "80": 2471084032.0,
+            "81": 2471084032.0,
+            "82": 2471084032.0,
+            "83": 2471084032.0,
+            "84": 2471084032.0,
+            "85": 2471084032.0,
+            "86": 2471084032.0,
+            "87": 2471084032.0,
+            "88": 2471084032.0,
+            "89": 2471084032.0,
+            "90": 2471084032.0,
+            "91": 2471084032.0,
+            "92": 2471084032.0,
+            "93": 2471084032.0,
+            "94": 2471084032.0,
+            "95": 2471084032.0,
+            "96": 2471084032.0,
+            "97": 2471084032.0,
+            "98": 2471084032.0,
+            "99": 2471084032.0,
+            "100": 2471084032.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 16.55217,
+            "2": 0.35181,
+            "3": 0.30566,
+            "4": 0.27474,
+            "5": 0.25821,
+            "6": 0.24756,
+            "7": 0.26543,
+            "8": 0.25377,
+            "9": 0.25669,
+            "10": 0.24857,
+            "11": 0.25265,
+            "12": 0.25052,
+            "13": 0.25023,
+            "14": 0.24925,
+            "15": 0.26244,
+            "16": 0.25012,
+            "17": 0.26253,
+            "18": 0.24643,
+            "19": 0.24809,
+            "20": 0.24556,
+            "21": 0.24394,
+            "22": 0.251,
+            "23": 0.24828,
+            "24": 0.24669,
+            "25": 0.24387,
+            "26": 0.24678,
+            "27": 0.24651,
+            "28": 0.25139,
+            "29": 0.24752,
+            "30": 0.24424,
+            "31": 0.28311,
+            "32": 0.25225,
+            "33": 0.24909,
+            "34": 0.26885,
+            "35": 0.25395,
+            "36": 0.2523,
+            "37": 0.24797,
+            "38": 0.25223,
+            "39": 0.24992,
+            "40": 0.25852,
+            "41": 0.24878,
+            "42": 0.2538,
+            "43": 0.2597,
+            "44": 0.24622,
+            "45": 0.26158,
+            "46": 0.27295,
+            "47": 0.2509,
+            "48": 0.26644,
+            "49": 0.28407,
+            "50": 0.25557,
+            "51": 0.26677,
+            "52": 0.27657,
+            "53": 0.25511,
+            "54": 0.25626,
+            "55": 0.26088,
+            "56": 0.30712,
+            "57": 0.27149,
+            "58": 0.25315,
+            "59": 0.26247,
+            "60": 0.26163,
+            "61": 0.25105,
+            "62": 0.24787,
+            "63": 0.27859,
+            "64": 0.26395,
+            "65": 0.32678,
+            "66": 0.25441,
+            "67": 0.30841,
+            "68": 0.27583,
+            "69": 0.2474,
+            "70": 0.25895,
+            "71": 0.27463,
+            "72": 0.26044,
+            "73": 0.27953,
+            "74": 0.27908,
+            "75": 0.26127,
+            "76": 0.28492,
+            "77": 0.25287,
+            "78": 0.26927,
+            "79": 0.26632,
+            "80": 0.26465,
+            "81": 0.25418,
+            "82": 0.25,
+            "83": 0.26012,
+            "84": 0.27232,
+            "85": 0.25707,
+            "86": 0.26564,
+            "87": 0.25446,
+            "88": 0.24718,
+            "89": 0.26899,
+            "90": 0.24357,
+            "91": 0.27455,
+            "92": 0.25494,
+            "93": 0.26852,
+            "94": 0.27917,
+            "95": 0.258,
+            "96": 0.25134,
+            "97": 0.26377,
+            "98": 0.24669,
+            "99": 0.26096,
+            "100": 0.25411
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
index 36909804253..7688d6ec4ea 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
@@ -2,91 +2,286 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 10.80815,
+            "2": 10.82612,
+            "3": 10.83032,
+            "4": 10.80963,
             "5": 10.84127,
+            "6": 10.8581,
+            "7": 10.81967,
+            "8": 10.82506,
+            "9": 10.83749,
             "10": 10.7783,
+            "11": 10.85781,
+            "12": 10.85539,
+            "13": 10.85233,
+            "14": 10.86699,
             "15": 10.81253,
+            "16": 10.80292,
+            "17": 10.78098,
+            "18": 10.80788,
+            "19": 10.79276,
             "20": 10.74548,
+            "21": 10.72785,
+            "22": 10.59608,
+            "23": 10.73999,
+            "24": 10.63509,
             "25": 10.59832,
+            "26": 10.63517,
+            "27": 10.65744,
+            "28": 10.64536,
+            "29": 10.65122,
             "30": 10.44144,
+            "31": 10.21465,
+            "32": 10.53342,
+            "33": 10.52518,
+            "34": 10.30171,
             "35": 10.34871,
+            "36": 10.30843,
+            "37": 10.42353,
+            "38": 10.28859,
+            "39": 10.45514,
             "40": 10.19363,
+            "41": 10.22791,
+            "42": 10.29725,
+            "43": 9.95871,
+            "44": 10.06717,
             "45": 9.95955,
+            "46": 9.92614,
+            "47": 10.20607,
+            "48": 9.96021,
+            "49": 9.65854,
             "50": 10.01296
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 31590.0,
+            "2": 32940.0,
+            "3": 33668.0,
+            "4": 31186.0,
             "5": 36214.0,
+            "6": 37169.0,
+            "7": 34770.0,
+            "8": 31862.0,
+            "9": 34102.0,
             "10": 30394.0,
+            "11": 38432.0,
+            "12": 35039.0,
+            "13": 37236.0,
+            "14": 37668.0,
             "15": 34199.0,
+            "16": 36659.0,
+            "17": 34831.0,
+            "18": 35011.0,
+            "19": 35486.0,
             "20": 33221.0,
+            "21": 33971.0,
+            "22": 30501.0,
+            "23": 38411.0,
+            "24": 32764.0,
             "25": 31363.0,
+            "26": 34624.0,
+            "27": 36096.0,
+            "28": 37021.0,
+            "29": 37900.0,
             "30": 33066.0,
+            "31": 29871.0,
+            "32": 36113.0,
+            "33": 38168.0,
+            "34": 33074.0,
             "35": 34300.0,
+            "36": 35363.0,
+            "37": 38150.0,
+            "38": 35798.0,
+            "39": 38945.0,
             "40": 35780.0,
+            "41": 35999.0,
+            "42": 36611.0,
+            "43": 33781.0,
+            "44": 34207.0,
             "45": 35198.0,
+            "46": 36779.0,
+            "47": 40585.0,
+            "48": 36434.0,
+            "49": 35787.0,
             "50": 38996.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 1027085824.0,
+            "2": 1027085824.0,
+            "3": 1027086848.0,
+            "4": 1027086336.0,
             "5": 1027086848.0,
+            "6": 1027085312.0,
+            "7": 1027081728.0,
+            "8": 1027082752.0,
+            "9": 1027089408.0,
             "10": 1027083776.0,
+            "11": 1027084288.0,
+            "12": 1027084288.0,
+            "13": 1027086848.0,
+            "14": 1027083776.0,
             "15": 1027085312.0,
+            "16": 1027086336.0,
+            "17": 1027084288.0,
+            "18": 1027088384.0,
+            "19": 1027086848.0,
             "20": 1027089920.0,
+            "21": 1027083264.0,
+            "22": 1027086336.0,
+            "23": 1027086848.0,
+            "24": 1027085824.0,
             "25": 1027084288.0,
+            "26": 1027085312.0,
+            "27": 1027085312.0,
+            "28": 1027082752.0,
+            "29": 1027083776.0,
             "30": 1027082240.0,
+            "31": 1027074048.0,
+            "32": 1027077120.0,
+            "33": 1027086336.0,
+            "34": 1027083264.0,
             "35": 1027085312.0,
+            "36": 1027083776.0,
+            "37": 1027084288.0,
+            "38": 1027085312.0,
+            "39": 1027080704.0,
             "40": 1027081728.0,
+            "41": 1027083264.0,
+            "42": 1027086848.0,
+            "43": 1027079680.0,
+            "44": 1027082752.0,
             "45": 1027082752.0,
+            "46": 1027073536.0,
+            "47": 1027082752.0,
+            "48": 1027081216.0,
+            "49": 1027077120.0,
             "50": 1027084800.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 3007080960.0,
+            "2": 3247499776.0,
+            "3": 3247499776.0,
+            "4": 3248093184.0,
             "5": 3248476160.0,
+            "6": 3248476160.0,
+            "7": 3248476160.0,
+            "8": 3248476160.0,
+            "9": 3248476160.0,
             "10": 3249142784.0,
+            "11": 3249142784.0,
+            "12": 3249142784.0,
+            "13": 3249142784.0,
+            "14": 3249142784.0,
             "15": 3249142784.0,
+            "16": 3249142784.0,
+            "17": 3249142784.0,
+            "18": 3249142784.0,
+            "19": 3249142784.0,
             "20": 3249142784.0,
+            "21": 3249142784.0,
+            "22": 3249860608.0,
+            "23": 3249860608.0,
+            "24": 3249972736.0,
             "25": 3249972736.0,
+            "26": 3249972736.0,
+            "27": 3249972736.0,
+            "28": 3249972736.0,
+            "29": 3249972736.0,
             "30": 3249972736.0,
+            "31": 3249972736.0,
+            "32": 3249972736.0,
+            "33": 3249972736.0,
+            "34": 3249972736.0,
             "35": 3249972736.0,
+            "36": 3249972736.0,
+            "37": 3249972736.0,
+            "38": 3249972736.0,
+            "39": 3249972736.0,
             "40": 3249972736.0,
+            "41": 3249972736.0,
+            "42": 3249972736.0,
+            "43": 3249972736.0,
+            "44": 3249972736.0,
             "45": 3249972736.0,
+            "46": 3249972736.0,
+            "47": 3249972736.0,
+            "48": 3249972736.0,
+            "49": 3249972736.0,
             "50": 3249972736.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 14.77721,
-            "5": 0.21434,
-            "10": 0.20442,
-            "15": 0.2258,
-            "20": 0.19737,
-            "25": 0.19707,
-            "30": 0.20038,
-            "35": 0.19865,
-            "40": 0.22651,
-            "45": 0.21953,
-            "50": 0.20317
+            "1": 13.191,
+            "2": 0.30069,
+            "3": 0.25544,
+            "4": 0.25726,
+            "5": 0.25285,
+            "6": 0.23678,
+            "7": 0.24206,
+            "8": 0.23892,
+            "9": 0.23754,
+            "10": 0.23806,
+            "11": 0.22979,
+            "12": 0.23562,
+            "13": 0.24016,
+            "14": 0.22801,
+            "15": 0.25436,
+            "16": 0.23327,
+            "17": 0.24589,
+            "18": 0.23141,
+            "19": 0.23961,
+            "20": 0.23003,
+            "21": 0.22997,
+            "22": 0.23267,
+            "23": 0.22726,
+            "24": 0.22991,
+            "25": 0.22721,
+            "26": 0.23348,
+            "27": 0.23492,
+            "28": 0.22428,
+            "29": 0.23121,
+            "30": 0.23005,
+            "31": 0.27744,
+            "32": 0.22525,
+            "33": 0.22626,
+            "34": 0.26339,
+            "35": 0.23208,
+            "36": 0.24495,
+            "37": 0.22722,
+            "38": 0.23099,
+            "39": 0.22752,
+            "40": 0.25494,
+            "41": 0.24054,
+            "42": 0.22921,
+            "43": 0.249,
+            "44": 0.2389,
+            "45": 0.24525,
+            "46": 0.26032,
+            "47": 0.22841,
+            "48": 0.26262,
+            "49": 0.30096,
+            "50": 0.2341
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..275dd98287a
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.80815,
+            "2": 10.82612,
+            "3": 10.83032,
+            "4": 10.80963,
+            "5": 10.84127,
+            "6": 10.8581,
+            "7": 10.81967,
+            "8": 10.82506,
+            "9": 10.83749,
+            "10": 10.7783,
+            "11": 10.85781,
+            "12": 10.85539,
+            "13": 10.85233,
+            "14": 10.86699,
+            "15": 10.81253,
+            "16": 10.80292,
+            "17": 10.78098,
+            "18": 10.80788,
+            "19": 10.79276,
+            "20": 10.74548,
+            "21": 10.72785,
+            "22": 10.59608,
+            "23": 10.73999,
+            "24": 10.63509,
+            "25": 10.59832,
+            "26": 10.63517,
+            "27": 10.65744,
+            "28": 10.64536,
+            "29": 10.65122,
+            "30": 10.44144,
+            "31": 10.21465,
+            "32": 10.53342,
+            "33": 10.52518,
+            "34": 10.30171,
+            "35": 10.34871,
+            "36": 10.30843,
+            "37": 10.42353,
+            "38": 10.28859,
+            "39": 10.45514,
+            "40": 10.19363,
+            "41": 10.22791,
+            "42": 10.29725,
+            "43": 9.95871,
+            "44": 10.06717,
+            "45": 9.95955,
+            "46": 9.92614,
+            "47": 10.20607,
+            "48": 9.96021,
+            "49": 9.65854,
+            "50": 10.01296
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 31590.0,
+            "2": 32940.0,
+            "3": 33668.0,
+            "4": 31186.0,
+            "5": 36214.0,
+            "6": 37169.0,
+            "7": 34770.0,
+            "8": 31862.0,
+            "9": 34102.0,
+            "10": 30394.0,
+            "11": 38432.0,
+            "12": 35039.0,
+            "13": 37236.0,
+            "14": 37668.0,
+            "15": 34199.0,
+            "16": 36659.0,
+            "17": 34831.0,
+            "18": 35011.0,
+            "19": 35486.0,
+            "20": 33221.0,
+            "21": 33971.0,
+            "22": 30501.0,
+            "23": 38411.0,
+            "24": 32764.0,
+            "25": 31363.0,
+            "26": 34624.0,
+            "27": 36096.0,
+            "28": 37021.0,
+            "29": 37900.0,
+            "30": 33066.0,
+            "31": 29871.0,
+            "32": 36113.0,
+            "33": 38168.0,
+            "34": 33074.0,
+            "35": 34300.0,
+            "36": 35363.0,
+            "37": 38150.0,
+            "38": 35798.0,
+            "39": 38945.0,
+            "40": 35780.0,
+            "41": 35999.0,
+            "42": 36611.0,
+            "43": 33781.0,
+            "44": 34207.0,
+            "45": 35198.0,
+            "46": 36779.0,
+            "47": 40585.0,
+            "48": 36434.0,
+            "49": 35787.0,
+            "50": 38996.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1027085824.0,
+            "2": 1027085824.0,
+            "3": 1027086848.0,
+            "4": 1027086336.0,
+            "5": 1027086848.0,
+            "6": 1027085312.0,
+            "7": 1027081728.0,
+            "8": 1027082752.0,
+            "9": 1027089408.0,
+            "10": 1027083776.0,
+            "11": 1027084288.0,
+            "12": 1027084288.0,
+            "13": 1027086848.0,
+            "14": 1027083776.0,
+            "15": 1027085312.0,
+            "16": 1027086336.0,
+            "17": 1027084288.0,
+            "18": 1027088384.0,
+            "19": 1027086848.0,
+            "20": 1027089920.0,
+            "21": 1027083264.0,
+            "22": 1027086336.0,
+            "23": 1027086848.0,
+            "24": 1027085824.0,
+            "25": 1027084288.0,
+            "26": 1027085312.0,
+            "27": 1027085312.0,
+            "28": 1027082752.0,
+            "29": 1027083776.0,
+            "30": 1027082240.0,
+            "31": 1027074048.0,
+            "32": 1027077120.0,
+            "33": 1027086336.0,
+            "34": 1027083264.0,
+            "35": 1027085312.0,
+            "36": 1027083776.0,
+            "37": 1027084288.0,
+            "38": 1027085312.0,
+            "39": 1027080704.0,
+            "40": 1027081728.0,
+            "41": 1027083264.0,
+            "42": 1027086848.0,
+            "43": 1027079680.0,
+            "44": 1027082752.0,
+            "45": 1027082752.0,
+            "46": 1027073536.0,
+            "47": 1027082752.0,
+            "48": 1027081216.0,
+            "49": 1027077120.0,
+            "50": 1027084800.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 3007080960.0,
+            "2": 3247499776.0,
+            "3": 3247499776.0,
+            "4": 3248093184.0,
+            "5": 3248476160.0,
+            "6": 3248476160.0,
+            "7": 3248476160.0,
+            "8": 3248476160.0,
+            "9": 3248476160.0,
+            "10": 3249142784.0,
+            "11": 3249142784.0,
+            "12": 3249142784.0,
+            "13": 3249142784.0,
+            "14": 3249142784.0,
+            "15": 3249142784.0,
+            "16": 3249142784.0,
+            "17": 3249142784.0,
+            "18": 3249142784.0,
+            "19": 3249142784.0,
+            "20": 3249142784.0,
+            "21": 3249142784.0,
+            "22": 3249860608.0,
+            "23": 3249860608.0,
+            "24": 3249972736.0,
+            "25": 3249972736.0,
+            "26": 3249972736.0,
+            "27": 3249972736.0,
+            "28": 3249972736.0,
+            "29": 3249972736.0,
+            "30": 3249972736.0,
+            "31": 3249972736.0,
+            "32": 3249972736.0,
+            "33": 3249972736.0,
+            "34": 3249972736.0,
+            "35": 3249972736.0,
+            "36": 3249972736.0,
+            "37": 3249972736.0,
+            "38": 3249972736.0,
+            "39": 3249972736.0,
+            "40": 3249972736.0,
+            "41": 3249972736.0,
+            "42": 3249972736.0,
+            "43": 3249972736.0,
+            "44": 3249972736.0,
+            "45": 3249972736.0,
+            "46": 3249972736.0,
+            "47": 3249972736.0,
+            "48": 3249972736.0,
+            "49": 3249972736.0,
+            "50": 3249972736.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 11.71692,
+            "2": 0.26373,
+            "3": 0.22224,
+            "4": 0.22077,
+            "5": 0.21189,
+            "6": 0.20289,
+            "7": 0.21135,
+            "8": 0.20381,
+            "9": 0.19968,
+            "10": 0.20492,
+            "11": 0.19946,
+            "12": 0.20155,
+            "13": 0.20199,
+            "14": 0.19656,
+            "15": 0.22053,
+            "16": 0.20059,
+            "17": 0.21367,
+            "18": 0.19607,
+            "19": 0.20515,
+            "20": 0.19743,
+            "21": 0.19704,
+            "22": 0.20196,
+            "23": 0.19722,
+            "24": 0.20083,
+            "25": 0.19715,
+            "26": 0.19715,
+            "27": 0.19781,
+            "28": 0.19694,
+            "29": 0.20125,
+            "30": 0.19779,
+            "31": 0.23471,
+            "32": 0.19855,
+            "33": 0.19914,
+            "34": 0.22545,
+            "35": 0.19732,
+            "36": 0.21424,
+            "37": 0.19385,
+            "38": 0.2012,
+            "39": 0.19477,
+            "40": 0.21557,
+            "41": 0.20631,
+            "42": 0.20013,
+            "43": 0.20558,
+            "44": 0.2055,
+            "45": 0.2088,
+            "46": 0.21767,
+            "47": 0.19618,
+            "48": 0.22507,
+            "49": 0.24168,
+            "50": 0.19817
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..089545b6f4a
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.80815,
+            "2": 10.82612,
+            "3": 10.83032,
+            "4": 10.80963,
+            "5": 10.84127,
+            "6": 10.8581,
+            "7": 10.81967,
+            "8": 10.82506,
+            "9": 10.83749,
+            "10": 10.7783,
+            "11": 10.85781,
+            "12": 10.85539,
+            "13": 10.85233,
+            "14": 10.86699,
+            "15": 10.81253,
+            "16": 10.80292,
+            "17": 10.78098,
+            "18": 10.80788,
+            "19": 10.79276,
+            "20": 10.74548,
+            "21": 10.72785,
+            "22": 10.59608,
+            "23": 10.73999,
+            "24": 10.63509,
+            "25": 10.59832,
+            "26": 10.63517,
+            "27": 10.65744,
+            "28": 10.64536,
+            "29": 10.65122,
+            "30": 10.44144,
+            "31": 10.21465,
+            "32": 10.53342,
+            "33": 10.52518,
+            "34": 10.30171,
+            "35": 10.34871,
+            "36": 10.30843,
+            "37": 10.42353,
+            "38": 10.28859,
+            "39": 10.45514,
+            "40": 10.19363,
+            "41": 10.22791,
+            "42": 10.29725,
+            "43": 9.95871,
+            "44": 10.06717,
+            "45": 9.95955,
+            "46": 9.92614,
+            "47": 10.20607,
+            "48": 9.96021,
+            "49": 9.65854,
+            "50": 10.01296
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 31590.0,
+            "2": 32940.0,
+            "3": 33668.0,
+            "4": 31186.0,
+            "5": 36214.0,
+            "6": 37169.0,
+            "7": 34770.0,
+            "8": 31862.0,
+            "9": 34102.0,
+            "10": 30394.0,
+            "11": 38432.0,
+            "12": 35039.0,
+            "13": 37236.0,
+            "14": 37668.0,
+            "15": 34199.0,
+            "16": 36659.0,
+            "17": 34831.0,
+            "18": 35011.0,
+            "19": 35486.0,
+            "20": 33221.0,
+            "21": 33971.0,
+            "22": 30501.0,
+            "23": 38411.0,
+            "24": 32764.0,
+            "25": 31363.0,
+            "26": 34624.0,
+            "27": 36096.0,
+            "28": 37021.0,
+            "29": 37900.0,
+            "30": 33066.0,
+            "31": 29871.0,
+            "32": 36113.0,
+            "33": 38168.0,
+            "34": 33074.0,
+            "35": 34300.0,
+            "36": 35363.0,
+            "37": 38150.0,
+            "38": 35798.0,
+            "39": 38945.0,
+            "40": 35780.0,
+            "41": 35999.0,
+            "42": 36611.0,
+            "43": 33781.0,
+            "44": 34207.0,
+            "45": 35198.0,
+            "46": 36779.0,
+            "47": 40585.0,
+            "48": 36434.0,
+            "49": 35787.0,
+            "50": 38996.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1027085824.0,
+            "2": 1027085824.0,
+            "3": 1027086848.0,
+            "4": 1027086336.0,
+            "5": 1027086848.0,
+            "6": 1027085312.0,
+            "7": 1027081728.0,
+            "8": 1027082752.0,
+            "9": 1027089408.0,
+            "10": 1027083776.0,
+            "11": 1027084288.0,
+            "12": 1027084288.0,
+            "13": 1027086848.0,
+            "14": 1027083776.0,
+            "15": 1027085312.0,
+            "16": 1027086336.0,
+            "17": 1027084288.0,
+            "18": 1027088384.0,
+            "19": 1027086848.0,
+            "20": 1027089920.0,
+            "21": 1027083264.0,
+            "22": 1027086336.0,
+            "23": 1027086848.0,
+            "24": 1027085824.0,
+            "25": 1027084288.0,
+            "26": 1027085312.0,
+            "27": 1027085312.0,
+            "28": 1027082752.0,
+            "29": 1027083776.0,
+            "30": 1027082240.0,
+            "31": 1027074048.0,
+            "32": 1027077120.0,
+            "33": 1027086336.0,
+            "34": 1027083264.0,
+            "35": 1027085312.0,
+            "36": 1027083776.0,
+            "37": 1027084288.0,
+            "38": 1027085312.0,
+            "39": 1027080704.0,
+            "40": 1027081728.0,
+            "41": 1027083264.0,
+            "42": 1027086848.0,
+            "43": 1027079680.0,
+            "44": 1027082752.0,
+            "45": 1027082752.0,
+            "46": 1027073536.0,
+            "47": 1027082752.0,
+            "48": 1027081216.0,
+            "49": 1027077120.0,
+            "50": 1027084800.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 3007080960.0,
+            "2": 3247499776.0,
+            "3": 3247499776.0,
+            "4": 3248093184.0,
+            "5": 3248476160.0,
+            "6": 3248476160.0,
+            "7": 3248476160.0,
+            "8": 3248476160.0,
+            "9": 3248476160.0,
+            "10": 3249142784.0,
+            "11": 3249142784.0,
+            "12": 3249142784.0,
+            "13": 3249142784.0,
+            "14": 3249142784.0,
+            "15": 3249142784.0,
+            "16": 3249142784.0,
+            "17": 3249142784.0,
+            "18": 3249142784.0,
+            "19": 3249142784.0,
+            "20": 3249142784.0,
+            "21": 3249142784.0,
+            "22": 3249860608.0,
+            "23": 3249860608.0,
+            "24": 3249972736.0,
+            "25": 3249972736.0,
+            "26": 3249972736.0,
+            "27": 3249972736.0,
+            "28": 3249972736.0,
+            "29": 3249972736.0,
+            "30": 3249972736.0,
+            "31": 3249972736.0,
+            "32": 3249972736.0,
+            "33": 3249972736.0,
+            "34": 3249972736.0,
+            "35": 3249972736.0,
+            "36": 3249972736.0,
+            "37": 3249972736.0,
+            "38": 3249972736.0,
+            "39": 3249972736.0,
+            "40": 3249972736.0,
+            "41": 3249972736.0,
+            "42": 3249972736.0,
+            "43": 3249972736.0,
+            "44": 3249972736.0,
+            "45": 3249972736.0,
+            "46": 3249972736.0,
+            "47": 3249972736.0,
+            "48": 3249972736.0,
+            "49": 3249972736.0,
+            "50": 3249972736.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 13.20887,
+            "2": 0.29449,
+            "3": 0.26099,
+            "4": 0.25199,
+            "5": 0.24285,
+            "6": 0.23658,
+            "7": 0.24248,
+            "8": 0.23258,
+            "9": 0.22661,
+            "10": 0.23769,
+            "11": 0.22933,
+            "12": 0.23288,
+            "13": 0.23074,
+            "14": 0.22376,
+            "15": 0.25054,
+            "16": 0.22881,
+            "17": 0.23932,
+            "18": 0.22427,
+            "19": 0.23467,
+            "20": 0.22747,
+            "21": 0.22662,
+            "22": 0.22866,
+            "23": 0.22726,
+            "24": 0.22901,
+            "25": 0.22654,
+            "26": 0.22683,
+            "27": 0.22909,
+            "28": 0.2264,
+            "29": 0.23339,
+            "30": 0.23066,
+            "31": 0.27285,
+            "32": 0.22966,
+            "33": 0.23016,
+            "34": 0.24956,
+            "35": 0.23114,
+            "36": 0.24161,
+            "37": 0.22585,
+            "38": 0.23047,
+            "39": 0.22695,
+            "40": 0.24845,
+            "41": 0.23491,
+            "42": 0.22656,
+            "43": 0.23744,
+            "44": 0.23602,
+            "45": 0.24859,
+            "46": 0.25828,
+            "47": 0.2367,
+            "48": 0.2564,
+            "49": 0.27812,
+            "50": 0.23401
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
new file mode 100644
index 00000000000..96602c602c1
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.8028,
+            "2": 10.82515,
+            "3": 10.81853,
+            "4": 10.80334,
+            "5": 10.85317,
+            "6": 10.86077,
+            "7": 10.83004,
+            "8": 10.82041,
+            "9": 10.8343,
+            "10": 10.79253,
+            "11": 10.86874,
+            "12": 10.84623,
+            "13": 10.85032,
+            "14": 10.87276,
+            "15": 10.81762,
+            "16": 10.80827,
+            "17": 10.78057,
+            "18": 10.80212,
+            "19": 10.80623,
+            "20": 10.74263,
+            "21": 10.72129,
+            "22": 10.60064,
+            "23": 10.73585,
+            "24": 10.62773,
+            "25": 10.58726,
+            "26": 10.64479,
+            "27": 10.65744,
+            "28": 10.633,
+            "29": 10.64664,
+            "30": 10.43425,
+            "31": 10.20993,
+            "32": 10.52274,
+            "33": 10.5182,
+            "34": 10.30593,
+            "35": 10.35057,
+            "36": 10.32257,
+            "37": 10.42006,
+            "38": 10.28232,
+            "39": 10.47402,
+            "40": 10.18634,
+            "41": 10.22711,
+            "42": 10.29407,
+            "43": 9.96562,
+            "44": 10.07121,
+            "45": 9.95891,
+            "46": 9.92944,
+            "47": 10.23158,
+            "48": 9.96456,
+            "49": 9.6648,
+            "50": 10.0194
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 31321.0,
+            "2": 33507.0,
+            "3": 33742.0,
+            "4": 31142.0,
+            "5": 36637.0,
+            "6": 37952.0,
+            "7": 35367.0,
+            "8": 31793.0,
+            "9": 34742.0,
+            "10": 30318.0,
+            "11": 38311.0,
+            "12": 35873.0,
+            "13": 37077.0,
+            "14": 38139.0,
+            "15": 35096.0,
+            "16": 36153.0,
+            "17": 34599.0,
+            "18": 35615.0,
+            "19": 36094.0,
+            "20": 33013.0,
+            "21": 33392.0,
+            "22": 30732.0,
+            "23": 37995.0,
+            "24": 32271.0,
+            "25": 30677.0,
+            "26": 34406.0,
+            "27": 35346.0,
+            "28": 37369.0,
+            "29": 38116.0,
+            "30": 32775.0,
+            "31": 30305.0,
+            "32": 36349.0,
+            "33": 38243.0,
+            "34": 33070.0,
+            "35": 34420.0,
+            "36": 34971.0,
+            "37": 38372.0,
+            "38": 36065.0,
+            "39": 38349.0,
+            "40": 36074.0,
+            "41": 36445.0,
+            "42": 37346.0,
+            "43": 33959.0,
+            "44": 33566.0,
+            "45": 35624.0,
+            "46": 36724.0,
+            "47": 40791.0,
+            "48": 35583.0,
+            "49": 34833.0,
+            "50": 39159.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 863505920.0,
+            "2": 863503872.0,
+            "3": 863507968.0,
+            "4": 863507968.0,
+            "5": 863502848.0,
+            "6": 863505408.0,
+            "7": 863508480.0,
+            "8": 863503872.0,
+            "9": 863506432.0,
+            "10": 863507456.0,
+            "11": 863503872.0,
+            "12": 863504896.0,
+            "13": 863506432.0,
+            "14": 863506432.0,
+            "15": 863503872.0,
+            "16": 863507456.0,
+            "17": 863511552.0,
+            "18": 863502848.0,
+            "19": 863505408.0,
+            "20": 863504896.0,
+            "21": 863508480.0,
+            "22": 863509504.0,
+            "23": 863507968.0,
+            "24": 863506944.0,
+            "25": 863506944.0,
+            "26": 863506944.0,
+            "27": 863504896.0,
+            "28": 863504896.0,
+            "29": 863505408.0,
+            "30": 863508992.0,
+            "31": 863515136.0,
+            "32": 863512064.0,
+            "33": 863506944.0,
+            "34": 863509504.0,
+            "35": 863511040.0,
+            "36": 863508992.0,
+            "37": 863505408.0,
+            "38": 863505920.0,
+            "39": 863507456.0,
+            "40": 863508480.0,
+            "41": 863513600.0,
+            "42": 863506432.0,
+            "43": 863510016.0,
+            "44": 863512576.0,
+            "45": 863503872.0,
+            "46": 863524352.0,
+            "47": 863503872.0,
+            "48": 863517696.0,
+            "49": 863512064.0,
+            "50": 863505920.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 2741182976.0,
+            "2": 2981644288.0,
+            "3": 2982039040.0,
+            "4": 2984047104.0,
+            "5": 2984047104.0,
+            "6": 2984047104.0,
+            "7": 2984047104.0,
+            "8": 2984047104.0,
+            "9": 2984047104.0,
+            "10": 2984047104.0,
+            "11": 2984047104.0,
+            "12": 2984047104.0,
+            "13": 2984047104.0,
+            "14": 2984047104.0,
+            "15": 2984047104.0,
+            "16": 2984047104.0,
+            "17": 2985508864.0,
+            "18": 2985508864.0,
+            "19": 2985508864.0,
+            "20": 2985508864.0,
+            "21": 2985508864.0,
+            "22": 2985508864.0,
+            "23": 2985508864.0,
+            "24": 2985508864.0,
+            "25": 2985508864.0,
+            "26": 2985508864.0,
+            "27": 2985508864.0,
+            "28": 2985508864.0,
+            "29": 2985508864.0,
+            "30": 2985508864.0,
+            "31": 2986932736.0,
+            "32": 2986932736.0,
+            "33": 2986932736.0,
+            "34": 2986932736.0,
+            "35": 2986932736.0,
+            "36": 2986932736.0,
+            "37": 2986932736.0,
+            "38": 2986932736.0,
+            "39": 2986932736.0,
+            "40": 2988336640.0,
+            "41": 2988336640.0,
+            "42": 2988336640.0,
+            "43": 2988336640.0,
+            "44": 2988336640.0,
+            "45": 2988336640.0,
+            "46": 2990742016.0,
+            "47": 2990742016.0,
+            "48": 2990742016.0,
+            "49": 2990742016.0,
+            "50": 2990742016.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 28.65799,
+            "2": 0.38137,
+            "3": 0.29722,
+            "4": 0.29497,
+            "5": 0.29498,
+            "6": 0.29349,
+            "7": 0.28205,
+            "8": 0.28271,
+            "9": 0.28924,
+            "10": 0.28158,
+            "11": 0.28091,
+            "12": 0.28034,
+            "13": 0.28985,
+            "14": 0.28034,
+            "15": 0.28108,
+            "16": 0.28775,
+            "17": 0.28792,
+            "18": 0.28403,
+            "19": 0.28372,
+            "20": 0.2913,
+            "21": 0.28324,
+            "22": 0.28526,
+            "23": 0.28665,
+            "24": 0.28778,
+            "25": 0.28462,
+            "26": 0.28385,
+            "27": 0.29573,
+            "28": 0.28896,
+            "29": 0.28509,
+            "30": 0.28863,
+            "31": 0.28863,
+            "32": 0.28591,
+            "33": 0.28417,
+            "34": 0.2921,
+            "35": 0.28486,
+            "36": 0.28401,
+            "37": 0.28884,
+            "38": 0.28899,
+            "39": 0.28435,
+            "40": 0.28532,
+            "41": 0.29387,
+            "42": 0.28493,
+            "43": 0.28685,
+            "44": 0.28897,
+            "45": 0.28501,
+            "46": 0.28487,
+            "47": 0.28307,
+            "48": 0.29529,
+            "49": 0.28524,
+            "50": 0.28877
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
new file mode 100644
index 00000000000..9dab947d0b7
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.8028,
+            "2": 10.82515,
+            "3": 10.81853,
+            "4": 10.80334,
+            "5": 10.85317,
+            "6": 10.86077,
+            "7": 10.83004,
+            "8": 10.82041,
+            "9": 10.8343,
+            "10": 10.79253,
+            "11": 10.86874,
+            "12": 10.84623,
+            "13": 10.85032,
+            "14": 10.87276,
+            "15": 10.81762,
+            "16": 10.80827,
+            "17": 10.78057,
+            "18": 10.80212,
+            "19": 10.80623,
+            "20": 10.74263,
+            "21": 10.72129,
+            "22": 10.60064,
+            "23": 10.73585,
+            "24": 10.62773,
+            "25": 10.58726,
+            "26": 10.64479,
+            "27": 10.65744,
+            "28": 10.633,
+            "29": 10.64664,
+            "30": 10.43425,
+            "31": 10.20993,
+            "32": 10.52274,
+            "33": 10.5182,
+            "34": 10.30593,
+            "35": 10.35057,
+            "36": 10.32257,
+            "37": 10.42006,
+            "38": 10.28232,
+            "39": 10.47402,
+            "40": 10.18634,
+            "41": 10.22711,
+            "42": 10.29407,
+            "43": 9.96562,
+            "44": 10.07121,
+            "45": 9.95891,
+            "46": 9.92944,
+            "47": 10.23158,
+            "48": 9.96456,
+            "49": 9.6648,
+            "50": 10.0194
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 31321.0,
+            "2": 33507.0,
+            "3": 33742.0,
+            "4": 31142.0,
+            "5": 36637.0,
+            "6": 37952.0,
+            "7": 35367.0,
+            "8": 31793.0,
+            "9": 34742.0,
+            "10": 30318.0,
+            "11": 38311.0,
+            "12": 35873.0,
+            "13": 37077.0,
+            "14": 38139.0,
+            "15": 35096.0,
+            "16": 36153.0,
+            "17": 34599.0,
+            "18": 35615.0,
+            "19": 36094.0,
+            "20": 33013.0,
+            "21": 33392.0,
+            "22": 30732.0,
+            "23": 37995.0,
+            "24": 32271.0,
+            "25": 30677.0,
+            "26": 34406.0,
+            "27": 35346.0,
+            "28": 37369.0,
+            "29": 38116.0,
+            "30": 32775.0,
+            "31": 30305.0,
+            "32": 36349.0,
+            "33": 38243.0,
+            "34": 33070.0,
+            "35": 34420.0,
+            "36": 34971.0,
+            "37": 38372.0,
+            "38": 36065.0,
+            "39": 38349.0,
+            "40": 36074.0,
+            "41": 36445.0,
+            "42": 37346.0,
+            "43": 33959.0,
+            "44": 33566.0,
+            "45": 35624.0,
+            "46": 36724.0,
+            "47": 40791.0,
+            "48": 35583.0,
+            "49": 34833.0,
+            "50": 39159.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 863505920.0,
+            "2": 863503872.0,
+            "3": 863507968.0,
+            "4": 863507968.0,
+            "5": 863502848.0,
+            "6": 863505408.0,
+            "7": 863508480.0,
+            "8": 863503872.0,
+            "9": 863506432.0,
+            "10": 863507456.0,
+            "11": 863503872.0,
+            "12": 863504896.0,
+            "13": 863506432.0,
+            "14": 863506432.0,
+            "15": 863503872.0,
+            "16": 863507456.0,
+            "17": 863511552.0,
+            "18": 863502848.0,
+            "19": 863505408.0,
+            "20": 863504896.0,
+            "21": 863508480.0,
+            "22": 863509504.0,
+            "23": 863507968.0,
+            "24": 863506944.0,
+            "25": 863506944.0,
+            "26": 863506944.0,
+            "27": 863504896.0,
+            "28": 863504896.0,
+            "29": 863505408.0,
+            "30": 863508992.0,
+            "31": 863515136.0,
+            "32": 863512064.0,
+            "33": 863506944.0,
+            "34": 863509504.0,
+            "35": 863511040.0,
+            "36": 863508992.0,
+            "37": 863505408.0,
+            "38": 863505920.0,
+            "39": 863507456.0,
+            "40": 863508480.0,
+            "41": 863513600.0,
+            "42": 863506432.0,
+            "43": 863510016.0,
+            "44": 863512576.0,
+            "45": 863503872.0,
+            "46": 863524352.0,
+            "47": 863503872.0,
+            "48": 863517696.0,
+            "49": 863512064.0,
+            "50": 863505920.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 2741182976.0,
+            "2": 2981644288.0,
+            "3": 2982039040.0,
+            "4": 2984047104.0,
+            "5": 2984047104.0,
+            "6": 2984047104.0,
+            "7": 2984047104.0,
+            "8": 2984047104.0,
+            "9": 2984047104.0,
+            "10": 2984047104.0,
+            "11": 2984047104.0,
+            "12": 2984047104.0,
+            "13": 2984047104.0,
+            "14": 2984047104.0,
+            "15": 2984047104.0,
+            "16": 2984047104.0,
+            "17": 2985508864.0,
+            "18": 2985508864.0,
+            "19": 2985508864.0,
+            "20": 2985508864.0,
+            "21": 2985508864.0,
+            "22": 2985508864.0,
+            "23": 2985508864.0,
+            "24": 2985508864.0,
+            "25": 2985508864.0,
+            "26": 2985508864.0,
+            "27": 2985508864.0,
+            "28": 2985508864.0,
+            "29": 2985508864.0,
+            "30": 2985508864.0,
+            "31": 2986932736.0,
+            "32": 2986932736.0,
+            "33": 2986932736.0,
+            "34": 2986932736.0,
+            "35": 2986932736.0,
+            "36": 2986932736.0,
+            "37": 2986932736.0,
+            "38": 2986932736.0,
+            "39": 2986932736.0,
+            "40": 2988336640.0,
+            "41": 2988336640.0,
+            "42": 2988336640.0,
+            "43": 2988336640.0,
+            "44": 2988336640.0,
+            "45": 2988336640.0,
+            "46": 2990742016.0,
+            "47": 2990742016.0,
+            "48": 2990742016.0,
+            "49": 2990742016.0,
+            "50": 2990742016.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 26.77929,
+            "2": 0.35069,
+            "3": 0.29635,
+            "4": 0.29093,
+            "5": 0.29737,
+            "6": 0.28672,
+            "7": 0.287,
+            "8": 0.28763,
+            "9": 0.27837,
+            "10": 0.2836,
+            "11": 0.27718,
+            "12": 0.28544,
+            "13": 0.27594,
+            "14": 0.2837,
+            "15": 0.27575,
+            "16": 0.27871,
+            "17": 0.28446,
+            "18": 0.27545,
+            "19": 0.28584,
+            "20": 0.27829,
+            "21": 0.28615,
+            "22": 0.27646,
+            "23": 0.28898,
+            "24": 0.28121,
+            "25": 0.27681,
+            "26": 0.28221,
+            "27": 0.27678,
+            "28": 0.28281,
+            "29": 0.27538,
+            "30": 0.28558,
+            "31": 0.27818,
+            "32": 0.28487,
+            "33": 0.28365,
+            "34": 0.27627,
+            "35": 0.28667,
+            "36": 0.27506,
+            "37": 0.27898,
+            "38": 0.27579,
+            "39": 0.27983,
+            "40": 0.27537,
+            "41": 0.28267,
+            "42": 0.28389,
+            "43": 0.27833,
+            "44": 0.28559,
+            "45": 0.27679,
+            "46": 0.28352,
+            "47": 0.27541,
+            "48": 0.28696,
+            "49": 0.27685,
+            "50": 0.27938
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
index 114ac89edd7..5219c47c6db 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
@@ -2,91 +2,286 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 10.80815,
+            "2": 10.82612,
+            "3": 10.83032,
+            "4": 10.80963,
             "5": 10.84127,
+            "6": 10.8581,
+            "7": 10.81967,
+            "8": 10.82506,
+            "9": 10.83749,
             "10": 10.7783,
+            "11": 10.85781,
+            "12": 10.85539,
+            "13": 10.85233,
+            "14": 10.86699,
             "15": 10.81253,
+            "16": 10.80292,
+            "17": 10.78098,
+            "18": 10.80788,
+            "19": 10.79276,
             "20": 10.74548,
+            "21": 10.72785,
+            "22": 10.59608,
+            "23": 10.73999,
+            "24": 10.63509,
             "25": 10.59832,
+            "26": 10.63517,
+            "27": 10.65744,
+            "28": 10.64536,
+            "29": 10.65122,
             "30": 10.44144,
+            "31": 10.21465,
+            "32": 10.53342,
+            "33": 10.52518,
+            "34": 10.30171,
             "35": 10.34871,
+            "36": 10.30843,
+            "37": 10.42353,
+            "38": 10.28859,
+            "39": 10.45514,
             "40": 10.19363,
+            "41": 10.22791,
+            "42": 10.29725,
+            "43": 9.95871,
+            "44": 10.06717,
             "45": 9.95955,
+            "46": 9.92614,
+            "47": 10.20607,
+            "48": 9.96021,
+            "49": 9.65854,
             "50": 10.01296
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 31590.0,
+            "2": 32940.0,
+            "3": 33668.0,
+            "4": 31186.0,
             "5": 36214.0,
+            "6": 37169.0,
+            "7": 34770.0,
+            "8": 31862.0,
+            "9": 34102.0,
             "10": 30394.0,
+            "11": 38432.0,
+            "12": 35039.0,
+            "13": 37236.0,
+            "14": 37668.0,
             "15": 34199.0,
+            "16": 36659.0,
+            "17": 34831.0,
+            "18": 35011.0,
+            "19": 35486.0,
             "20": 33221.0,
+            "21": 33971.0,
+            "22": 30501.0,
+            "23": 38411.0,
+            "24": 32764.0,
             "25": 31363.0,
+            "26": 34624.0,
+            "27": 36096.0,
+            "28": 37021.0,
+            "29": 37900.0,
             "30": 33066.0,
+            "31": 29871.0,
+            "32": 36113.0,
+            "33": 38168.0,
+            "34": 33074.0,
             "35": 34300.0,
+            "36": 35363.0,
+            "37": 38150.0,
+            "38": 35798.0,
+            "39": 38945.0,
             "40": 35780.0,
+            "41": 35999.0,
+            "42": 36611.0,
+            "43": 33781.0,
+            "44": 34207.0,
             "45": 35198.0,
+            "46": 36779.0,
+            "47": 40585.0,
+            "48": 36434.0,
+            "49": 35787.0,
             "50": 38996.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 1027085824.0,
+            "2": 1027085824.0,
+            "3": 1027086848.0,
+            "4": 1027086336.0,
             "5": 1027086848.0,
+            "6": 1027085312.0,
+            "7": 1027081728.0,
+            "8": 1027082752.0,
+            "9": 1027089408.0,
             "10": 1027083776.0,
+            "11": 1027084288.0,
+            "12": 1027084288.0,
+            "13": 1027086848.0,
+            "14": 1027083776.0,
             "15": 1027085312.0,
+            "16": 1027086336.0,
+            "17": 1027084288.0,
+            "18": 1027088384.0,
+            "19": 1027086848.0,
             "20": 1027089920.0,
+            "21": 1027083264.0,
+            "22": 1027086336.0,
+            "23": 1027086848.0,
+            "24": 1027085824.0,
             "25": 1027084288.0,
+            "26": 1027085312.0,
+            "27": 1027085312.0,
+            "28": 1027082752.0,
+            "29": 1027083776.0,
             "30": 1027082240.0,
+            "31": 1027074048.0,
+            "32": 1027077120.0,
+            "33": 1027086336.0,
+            "34": 1027083264.0,
             "35": 1027085312.0,
+            "36": 1027083776.0,
+            "37": 1027084288.0,
+            "38": 1027085312.0,
+            "39": 1027080704.0,
             "40": 1027081728.0,
+            "41": 1027083264.0,
+            "42": 1027086848.0,
+            "43": 1027079680.0,
+            "44": 1027082752.0,
             "45": 1027082752.0,
+            "46": 1027073536.0,
+            "47": 1027082752.0,
+            "48": 1027081216.0,
+            "49": 1027077120.0,
             "50": 1027084800.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 3007080960.0,
+            "2": 3247499776.0,
+            "3": 3247499776.0,
+            "4": 3248093184.0,
             "5": 3248476160.0,
+            "6": 3248476160.0,
+            "7": 3248476160.0,
+            "8": 3248476160.0,
+            "9": 3248476160.0,
             "10": 3249142784.0,
+            "11": 3249142784.0,
+            "12": 3249142784.0,
+            "13": 3249142784.0,
+            "14": 3249142784.0,
             "15": 3249142784.0,
+            "16": 3249142784.0,
+            "17": 3249142784.0,
+            "18": 3249142784.0,
+            "19": 3249142784.0,
             "20": 3249142784.0,
+            "21": 3249142784.0,
+            "22": 3249860608.0,
+            "23": 3249860608.0,
+            "24": 3249972736.0,
             "25": 3249972736.0,
+            "26": 3249972736.0,
+            "27": 3249972736.0,
+            "28": 3249972736.0,
+            "29": 3249972736.0,
             "30": 3249972736.0,
+            "31": 3249972736.0,
+            "32": 3249972736.0,
+            "33": 3249972736.0,
+            "34": 3249972736.0,
             "35": 3249972736.0,
+            "36": 3249972736.0,
+            "37": 3249972736.0,
+            "38": 3249972736.0,
+            "39": 3249972736.0,
             "40": 3249972736.0,
+            "41": 3249972736.0,
+            "42": 3249972736.0,
+            "43": 3249972736.0,
+            "44": 3249972736.0,
             "45": 3249972736.0,
+            "46": 3249972736.0,
+            "47": 3249972736.0,
+            "48": 3249972736.0,
+            "49": 3249972736.0,
             "50": 3249972736.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 11.83817,
-            "5": 0.24003,
-            "10": 0.21528,
-            "15": 0.22788,
-            "20": 0.20411,
-            "25": 0.20559,
-            "30": 0.20453,
-            "35": 0.20404,
-            "40": 0.21841,
-            "45": 0.2091,
-            "50": 0.20464
+            "1": 13.23313,
+            "2": 0.31808,
+            "3": 0.27025,
+            "4": 0.253,
+            "5": 0.25938,
+            "6": 0.23222,
+            "7": 0.24127,
+            "8": 0.23468,
+            "9": 0.22881,
+            "10": 0.23244,
+            "11": 0.23056,
+            "12": 0.23078,
+            "13": 0.23301,
+            "14": 0.22477,
+            "15": 0.24897,
+            "16": 0.22593,
+            "17": 0.24178,
+            "18": 0.23034,
+            "19": 0.23887,
+            "20": 0.24186,
+            "21": 0.23006,
+            "22": 0.23215,
+            "23": 0.22763,
+            "24": 0.22889,
+            "25": 0.22662,
+            "26": 0.22794,
+            "27": 0.22851,
+            "28": 0.22653,
+            "29": 0.22859,
+            "30": 0.22789,
+            "31": 0.27081,
+            "32": 0.22893,
+            "33": 0.22575,
+            "34": 0.24635,
+            "35": 0.22739,
+            "36": 0.2416,
+            "37": 0.24045,
+            "38": 0.23118,
+            "39": 0.2275,
+            "40": 0.24632,
+            "41": 0.233,
+            "42": 0.22755,
+            "43": 0.25276,
+            "44": 0.2354,
+            "45": 0.2355,
+            "46": 0.25059,
+            "47": 0.22589,
+            "48": 0.25741,
+            "49": 0.27315,
+            "50": 0.22384
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..ad63e8c681e
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.80815,
+            "2": 10.82612,
+            "3": 10.83032,
+            "4": 10.80963,
+            "5": 10.84127,
+            "6": 10.8581,
+            "7": 10.81967,
+            "8": 10.82506,
+            "9": 10.83749,
+            "10": 10.7783,
+            "11": 10.85781,
+            "12": 10.85539,
+            "13": 10.85233,
+            "14": 10.86699,
+            "15": 10.81253,
+            "16": 10.80292,
+            "17": 10.78098,
+            "18": 10.80788,
+            "19": 10.79276,
+            "20": 10.74548,
+            "21": 10.72785,
+            "22": 10.59608,
+            "23": 10.73999,
+            "24": 10.63509,
+            "25": 10.59832,
+            "26": 10.63517,
+            "27": 10.65744,
+            "28": 10.64536,
+            "29": 10.65122,
+            "30": 10.44144,
+            "31": 10.21465,
+            "32": 10.53342,
+            "33": 10.52518,
+            "34": 10.30171,
+            "35": 10.34871,
+            "36": 10.30843,
+            "37": 10.42353,
+            "38": 10.28859,
+            "39": 10.45514,
+            "40": 10.19363,
+            "41": 10.22791,
+            "42": 10.29725,
+            "43": 9.95871,
+            "44": 10.06717,
+            "45": 9.95955,
+            "46": 9.92614,
+            "47": 10.20607,
+            "48": 9.96021,
+            "49": 9.65854,
+            "50": 10.01296
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 31590.0,
+            "2": 32940.0,
+            "3": 33668.0,
+            "4": 31186.0,
+            "5": 36214.0,
+            "6": 37169.0,
+            "7": 34770.0,
+            "8": 31862.0,
+            "9": 34102.0,
+            "10": 30394.0,
+            "11": 38432.0,
+            "12": 35039.0,
+            "13": 37236.0,
+            "14": 37668.0,
+            "15": 34199.0,
+            "16": 36659.0,
+            "17": 34831.0,
+            "18": 35011.0,
+            "19": 35486.0,
+            "20": 33221.0,
+            "21": 33971.0,
+            "22": 30501.0,
+            "23": 38411.0,
+            "24": 32764.0,
+            "25": 31363.0,
+            "26": 34624.0,
+            "27": 36096.0,
+            "28": 37021.0,
+            "29": 37900.0,
+            "30": 33066.0,
+            "31": 29871.0,
+            "32": 36113.0,
+            "33": 38168.0,
+            "34": 33074.0,
+            "35": 34300.0,
+            "36": 35363.0,
+            "37": 38150.0,
+            "38": 35798.0,
+            "39": 38945.0,
+            "40": 35780.0,
+            "41": 35999.0,
+            "42": 36611.0,
+            "43": 33781.0,
+            "44": 34207.0,
+            "45": 35198.0,
+            "46": 36779.0,
+            "47": 40585.0,
+            "48": 36434.0,
+            "49": 35787.0,
+            "50": 38996.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1027085824.0,
+            "2": 1027085824.0,
+            "3": 1027086848.0,
+            "4": 1027086336.0,
+            "5": 1027086848.0,
+            "6": 1027085312.0,
+            "7": 1027081728.0,
+            "8": 1027082752.0,
+            "9": 1027089408.0,
+            "10": 1027083776.0,
+            "11": 1027084288.0,
+            "12": 1027084288.0,
+            "13": 1027086848.0,
+            "14": 1027083776.0,
+            "15": 1027085312.0,
+            "16": 1027086336.0,
+            "17": 1027084288.0,
+            "18": 1027088384.0,
+            "19": 1027086848.0,
+            "20": 1027089920.0,
+            "21": 1027083264.0,
+            "22": 1027086336.0,
+            "23": 1027086848.0,
+            "24": 1027085824.0,
+            "25": 1027084288.0,
+            "26": 1027085312.0,
+            "27": 1027085312.0,
+            "28": 1027082752.0,
+            "29": 1027083776.0,
+            "30": 1027082240.0,
+            "31": 1027074048.0,
+            "32": 1027077120.0,
+            "33": 1027086336.0,
+            "34": 1027083264.0,
+            "35": 1027085312.0,
+            "36": 1027083776.0,
+            "37": 1027084288.0,
+            "38": 1027085312.0,
+            "39": 1027080704.0,
+            "40": 1027081728.0,
+            "41": 1027083264.0,
+            "42": 1027086848.0,
+            "43": 1027079680.0,
+            "44": 1027082752.0,
+            "45": 1027082752.0,
+            "46": 1027073536.0,
+            "47": 1027082752.0,
+            "48": 1027081216.0,
+            "49": 1027077120.0,
+            "50": 1027084800.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 3007080960.0,
+            "2": 3247499776.0,
+            "3": 3247499776.0,
+            "4": 3248093184.0,
+            "5": 3248476160.0,
+            "6": 3248476160.0,
+            "7": 3248476160.0,
+            "8": 3248476160.0,
+            "9": 3248476160.0,
+            "10": 3249142784.0,
+            "11": 3249142784.0,
+            "12": 3249142784.0,
+            "13": 3249142784.0,
+            "14": 3249142784.0,
+            "15": 3249142784.0,
+            "16": 3249142784.0,
+            "17": 3249142784.0,
+            "18": 3249142784.0,
+            "19": 3249142784.0,
+            "20": 3249142784.0,
+            "21": 3249142784.0,
+            "22": 3249860608.0,
+            "23": 3249860608.0,
+            "24": 3249972736.0,
+            "25": 3249972736.0,
+            "26": 3249972736.0,
+            "27": 3249972736.0,
+            "28": 3249972736.0,
+            "29": 3249972736.0,
+            "30": 3249972736.0,
+            "31": 3249972736.0,
+            "32": 3249972736.0,
+            "33": 3249972736.0,
+            "34": 3249972736.0,
+            "35": 3249972736.0,
+            "36": 3249972736.0,
+            "37": 3249972736.0,
+            "38": 3249972736.0,
+            "39": 3249972736.0,
+            "40": 3249972736.0,
+            "41": 3249972736.0,
+            "42": 3249972736.0,
+            "43": 3249972736.0,
+            "44": 3249972736.0,
+            "45": 3249972736.0,
+            "46": 3249972736.0,
+            "47": 3249972736.0,
+            "48": 3249972736.0,
+            "49": 3249972736.0,
+            "50": 3249972736.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 14.64212,
+            "2": 0.27662,
+            "3": 0.22726,
+            "4": 0.22741,
+            "5": 0.21976,
+            "6": 0.21005,
+            "7": 0.21904,
+            "8": 0.20701,
+            "9": 0.20029,
+            "10": 0.21109,
+            "11": 0.20188,
+            "12": 0.20386,
+            "13": 0.20452,
+            "14": 0.19789,
+            "15": 0.21511,
+            "16": 0.20036,
+            "17": 0.21345,
+            "18": 0.20466,
+            "19": 0.20569,
+            "20": 0.19783,
+            "21": 0.19857,
+            "22": 0.20281,
+            "23": 0.20165,
+            "24": 0.20398,
+            "25": 0.20864,
+            "26": 0.20632,
+            "27": 0.20092,
+            "28": 0.20357,
+            "29": 0.20116,
+            "30": 0.19889,
+            "31": 0.23444,
+            "32": 0.19868,
+            "33": 0.19728,
+            "34": 0.21322,
+            "35": 0.19907,
+            "36": 0.20947,
+            "37": 0.1964,
+            "38": 0.20026,
+            "39": 0.19448,
+            "40": 0.21304,
+            "41": 0.20077,
+            "42": 0.19863,
+            "43": 0.21502,
+            "44": 0.21008,
+            "45": 0.20452,
+            "46": 0.22473,
+            "47": 0.20011,
+            "48": 0.22634,
+            "49": 0.23823,
+            "50": 0.20221
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..c49c5a579c0
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.80815,
+            "2": 10.82612,
+            "3": 10.83032,
+            "4": 10.80963,
+            "5": 10.84127,
+            "6": 10.8581,
+            "7": 10.81967,
+            "8": 10.82506,
+            "9": 10.83749,
+            "10": 10.7783,
+            "11": 10.85781,
+            "12": 10.85539,
+            "13": 10.85233,
+            "14": 10.86699,
+            "15": 10.81253,
+            "16": 10.80292,
+            "17": 10.78098,
+            "18": 10.80788,
+            "19": 10.79276,
+            "20": 10.74548,
+            "21": 10.72785,
+            "22": 10.59608,
+            "23": 10.73999,
+            "24": 10.63509,
+            "25": 10.59832,
+            "26": 10.63517,
+            "27": 10.65744,
+            "28": 10.64536,
+            "29": 10.65122,
+            "30": 10.44144,
+            "31": 10.21465,
+            "32": 10.53342,
+            "33": 10.52518,
+            "34": 10.30171,
+            "35": 10.34871,
+            "36": 10.30843,
+            "37": 10.42353,
+            "38": 10.28859,
+            "39": 10.45514,
+            "40": 10.19363,
+            "41": 10.22791,
+            "42": 10.29725,
+            "43": 9.95871,
+            "44": 10.06717,
+            "45": 9.95955,
+            "46": 9.92614,
+            "47": 10.20607,
+            "48": 9.96021,
+            "49": 9.65854,
+            "50": 10.01296
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 31590.0,
+            "2": 32940.0,
+            "3": 33668.0,
+            "4": 31186.0,
+            "5": 36214.0,
+            "6": 37169.0,
+            "7": 34770.0,
+            "8": 31862.0,
+            "9": 34102.0,
+            "10": 30394.0,
+            "11": 38432.0,
+            "12": 35039.0,
+            "13": 37236.0,
+            "14": 37668.0,
+            "15": 34199.0,
+            "16": 36659.0,
+            "17": 34831.0,
+            "18": 35011.0,
+            "19": 35486.0,
+            "20": 33221.0,
+            "21": 33971.0,
+            "22": 30501.0,
+            "23": 38411.0,
+            "24": 32764.0,
+            "25": 31363.0,
+            "26": 34624.0,
+            "27": 36096.0,
+            "28": 37021.0,
+            "29": 37900.0,
+            "30": 33066.0,
+            "31": 29871.0,
+            "32": 36113.0,
+            "33": 38168.0,
+            "34": 33074.0,
+            "35": 34300.0,
+            "36": 35363.0,
+            "37": 38150.0,
+            "38": 35798.0,
+            "39": 38945.0,
+            "40": 35780.0,
+            "41": 35999.0,
+            "42": 36611.0,
+            "43": 33781.0,
+            "44": 34207.0,
+            "45": 35198.0,
+            "46": 36779.0,
+            "47": 40585.0,
+            "48": 36434.0,
+            "49": 35787.0,
+            "50": 38996.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1027085824.0,
+            "2": 1027085824.0,
+            "3": 1027086848.0,
+            "4": 1027086336.0,
+            "5": 1027086848.0,
+            "6": 1027085312.0,
+            "7": 1027081728.0,
+            "8": 1027082752.0,
+            "9": 1027089408.0,
+            "10": 1027083776.0,
+            "11": 1027084288.0,
+            "12": 1027084288.0,
+            "13": 1027086848.0,
+            "14": 1027083776.0,
+            "15": 1027085312.0,
+            "16": 1027086336.0,
+            "17": 1027084288.0,
+            "18": 1027088384.0,
+            "19": 1027086848.0,
+            "20": 1027089920.0,
+            "21": 1027083264.0,
+            "22": 1027086336.0,
+            "23": 1027086848.0,
+            "24": 1027085824.0,
+            "25": 1027084288.0,
+            "26": 1027085312.0,
+            "27": 1027085312.0,
+            "28": 1027082752.0,
+            "29": 1027083776.0,
+            "30": 1027082240.0,
+            "31": 1027074048.0,
+            "32": 1027077120.0,
+            "33": 1027086336.0,
+            "34": 1027083264.0,
+            "35": 1027085312.0,
+            "36": 1027083776.0,
+            "37": 1027084288.0,
+            "38": 1027085312.0,
+            "39": 1027080704.0,
+            "40": 1027081728.0,
+            "41": 1027083264.0,
+            "42": 1027086848.0,
+            "43": 1027079680.0,
+            "44": 1027082752.0,
+            "45": 1027082752.0,
+            "46": 1027073536.0,
+            "47": 1027082752.0,
+            "48": 1027081216.0,
+            "49": 1027077120.0,
+            "50": 1027084800.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 3007080960.0,
+            "2": 3247499776.0,
+            "3": 3247499776.0,
+            "4": 3248093184.0,
+            "5": 3248476160.0,
+            "6": 3248476160.0,
+            "7": 3248476160.0,
+            "8": 3248476160.0,
+            "9": 3248476160.0,
+            "10": 3249142784.0,
+            "11": 3249142784.0,
+            "12": 3249142784.0,
+            "13": 3249142784.0,
+            "14": 3249142784.0,
+            "15": 3249142784.0,
+            "16": 3249142784.0,
+            "17": 3249142784.0,
+            "18": 3249142784.0,
+            "19": 3249142784.0,
+            "20": 3249142784.0,
+            "21": 3249142784.0,
+            "22": 3249860608.0,
+            "23": 3249860608.0,
+            "24": 3249972736.0,
+            "25": 3249972736.0,
+            "26": 3249972736.0,
+            "27": 3249972736.0,
+            "28": 3249972736.0,
+            "29": 3249972736.0,
+            "30": 3249972736.0,
+            "31": 3249972736.0,
+            "32": 3249972736.0,
+            "33": 3249972736.0,
+            "34": 3249972736.0,
+            "35": 3249972736.0,
+            "36": 3249972736.0,
+            "37": 3249972736.0,
+            "38": 3249972736.0,
+            "39": 3249972736.0,
+            "40": 3249972736.0,
+            "41": 3249972736.0,
+            "42": 3249972736.0,
+            "43": 3249972736.0,
+            "44": 3249972736.0,
+            "45": 3249972736.0,
+            "46": 3249972736.0,
+            "47": 3249972736.0,
+            "48": 3249972736.0,
+            "49": 3249972736.0,
+            "50": 3249972736.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 13.35552,
+            "2": 0.37785,
+            "3": 0.29632,
+            "4": 0.29599,
+            "5": 0.25057,
+            "6": 0.2376,
+            "7": 0.24788,
+            "8": 0.2386,
+            "9": 0.23567,
+            "10": 0.23981,
+            "11": 0.23457,
+            "12": 0.23608,
+            "13": 0.24093,
+            "14": 0.23076,
+            "15": 0.25524,
+            "16": 0.23573,
+            "17": 0.24636,
+            "18": 0.2348,
+            "19": 0.23922,
+            "20": 0.23445,
+            "21": 0.22924,
+            "22": 0.23872,
+            "23": 0.23172,
+            "24": 0.23116,
+            "25": 0.23103,
+            "26": 0.23556,
+            "27": 0.23228,
+            "28": 0.23323,
+            "29": 0.23495,
+            "30": 0.23011,
+            "31": 0.27652,
+            "32": 0.23015,
+            "33": 0.22902,
+            "34": 0.25666,
+            "35": 0.23045,
+            "36": 0.24626,
+            "37": 0.23146,
+            "38": 0.2344,
+            "39": 0.22864,
+            "40": 0.24642,
+            "41": 0.23788,
+            "42": 0.23274,
+            "43": 0.24326,
+            "44": 0.23733,
+            "45": 0.24263,
+            "46": 0.25392,
+            "47": 0.23328,
+            "48": 0.26156,
+            "49": 0.27837,
+            "50": 0.23303
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts_etp1_ep4_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts_etp1_ep4_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
new file mode 100644
index 00000000000..171568354d3
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts_etp1_ep4_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.8174,
+            "2": 10.8238,
+            "3": 10.83034,
+            "4": 10.79848,
+            "5": 10.86097,
+            "6": 10.86968,
+            "7": 10.83041,
+            "8": 10.83047,
+            "9": 10.83634,
+            "10": 10.80463,
+            "11": 10.87361,
+            "12": 10.85679,
+            "13": 10.86371,
+            "14": 10.87941,
+            "15": 10.79539,
+            "16": 10.79946,
+            "17": 10.7712,
+            "18": 10.80138,
+            "19": 10.78756,
+            "20": 10.71135,
+            "21": 10.67535,
+            "22": 10.53788,
+            "23": 10.68977,
+            "24": 10.57497,
+            "25": 10.51962,
+            "26": 10.57943,
+            "27": 10.58547,
+            "28": 10.55147,
+            "29": 10.56806,
+            "30": 10.33346,
+            "31": 10.06567,
+            "32": 10.42406,
+            "33": 10.43002,
+            "34": 10.16343,
+            "35": 10.22683,
+            "36": 10.19343,
+            "37": 10.30857,
+            "38": 10.14766,
+            "39": 10.38079,
+            "40": 10.041,
+            "41": 10.08555,
+            "42": 10.17528,
+            "43": 9.76706,
+            "44": 9.91338,
+            "45": 9.7722,
+            "46": 9.75215,
+            "47": 10.11047,
+            "48": 9.79832,
+            "49": 9.4591,
+            "50": 9.86932
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 26535.0,
+            "2": 29510.0,
+            "3": 29143.0,
+            "4": 28253.0,
+            "5": 31546.0,
+            "6": 32394.0,
+            "7": 30992.0,
+            "8": 27483.0,
+            "9": 30277.0,
+            "10": 25541.0,
+            "11": 33316.0,
+            "12": 30322.0,
+            "13": 32492.0,
+            "14": 32959.0,
+            "15": 30463.0,
+            "16": 31824.0,
+            "17": 30856.0,
+            "18": 30543.0,
+            "19": 31088.0,
+            "20": 28331.0,
+            "21": 28793.0,
+            "22": 27857.0,
+            "23": 33708.0,
+            "24": 28428.0,
+            "25": 27263.0,
+            "26": 30930.0,
+            "27": 31082.0,
+            "28": 32928.0,
+            "29": 34437.0,
+            "30": 29642.0,
+            "31": 28293.0,
+            "32": 32660.0,
+            "33": 35555.0,
+            "34": 30589.0,
+            "35": 32022.0,
+            "36": 33586.0,
+            "37": 35917.0,
+            "38": 34614.0,
+            "39": 37197.0,
+            "40": 34911.0,
+            "41": 33219.0,
+            "42": 35534.0,
+            "43": 34573.0,
+            "44": 33331.0,
+            "45": 35017.0,
+            "46": 35205.0,
+            "47": 39557.0,
+            "48": 35883.0,
+            "49": 36444.0,
+            "50": 38975.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1356172288.0,
+            "2": 1356165120.0,
+            "3": 1356179968.0,
+            "4": 1356190208.0,
+            "5": 1356170240.0,
+            "6": 1356170752.0,
+            "7": 1356184064.0,
+            "8": 1356165632.0,
+            "9": 1356161536.0,
+            "10": 1356160000.0,
+            "11": 1356167168.0,
+            "12": 1356178944.0,
+            "13": 1356167168.0,
+            "14": 1356162560.0,
+            "15": 1356180480.0,
+            "16": 1356185088.0,
+            "17": 1356156416.0,
+            "18": 1356187136.0,
+            "19": 1356171264.0,
+            "20": 1356170240.0,
+            "21": 1356188160.0,
+            "22": 1356186112.0,
+            "23": 1356185600.0,
+            "24": 1356181504.0,
+            "25": 1356182528.0,
+            "26": 1356189696.0,
+            "27": 1356189696.0,
+            "28": 1356181504.0,
+            "29": 1356182528.0,
+            "30": 1356198400.0,
+            "31": 1356187136.0,
+            "32": 1356177408.0,
+            "33": 1356187648.0,
+            "34": 1356187648.0,
+            "35": 1356182016.0,
+            "36": 1356178432.0,
+            "37": 1356182528.0,
+            "38": 1356186112.0,
+            "39": 1356170240.0,
+            "40": 1356156416.0,
+            "41": 1356169728.0,
+            "42": 1356151808.0,
+            "43": 1356151808.0,
+            "44": 1356146688.0,
+            "45": 1356140544.0,
+            "46": 1356133888.0,
+            "47": 1356111872.0,
+            "48": 1356119552.0,
+            "49": 1356118528.0,
+            "50": 1356098560.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 3163797504.0,
+            "2": 3730006528.0,
+            "3": 3748878336.0,
+            "4": 3752917504.0,
+            "5": 3752917504.0,
+            "6": 3752917504.0,
+            "7": 3758158848.0,
+            "8": 3758158848.0,
+            "9": 3758158848.0,
+            "10": 3758158848.0,
+            "11": 3758158848.0,
+            "12": 3758158848.0,
+            "13": 3758158848.0,
+            "14": 3758158848.0,
+            "15": 3758158848.0,
+            "16": 3758158848.0,
+            "17": 3758158848.0,
+            "18": 3758158848.0,
+            "19": 3758158848.0,
+            "20": 3758158848.0,
+            "21": 3758158848.0,
+            "22": 3758158848.0,
+            "23": 3758158848.0,
+            "24": 3758158848.0,
+            "25": 3758158848.0,
+            "26": 3758158848.0,
+            "27": 3758158848.0,
+            "28": 3758158848.0,
+            "29": 3770054144.0,
+            "30": 3770054144.0,
+            "31": 3770054144.0,
+            "32": 3770054144.0,
+            "33": 3770054144.0,
+            "34": 3770054144.0,
+            "35": 3770054144.0,
+            "36": 3770054144.0,
+            "37": 3770054144.0,
+            "38": 3770054144.0,
+            "39": 3770054144.0,
+            "40": 3770054144.0,
+            "41": 3770054144.0,
+            "42": 3770054144.0,
+            "43": 3770054144.0,
+            "44": 3770054144.0,
+            "45": 3770054144.0,
+            "46": 3770054144.0,
+            "47": 3770054144.0,
+            "48": 3770054144.0,
+            "49": 3770054144.0,
+            "50": 3770054144.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 27.36516,
+            "2": 0.36408,
+            "3": 0.2993,
+            "4": 0.29949,
+            "5": 0.30461,
+            "6": 0.29574,
+            "7": 0.30041,
+            "8": 0.30848,
+            "9": 0.29849,
+            "10": 0.29846,
+            "11": 0.30503,
+            "12": 0.29885,
+            "13": 0.29495,
+            "14": 0.29657,
+            "15": 0.30665,
+            "16": 0.29545,
+            "17": 0.2982,
+            "18": 0.30792,
+            "19": 0.29588,
+            "20": 0.29657,
+            "21": 0.30198,
+            "22": 0.30357,
+            "23": 0.30049,
+            "24": 0.29959,
+            "25": 0.30994,
+            "26": 0.29865,
+            "27": 0.3002,
+            "28": 0.30774,
+            "29": 0.30125,
+            "30": 0.30366,
+            "31": 0.32063,
+            "32": 0.31461,
+            "33": 0.30383,
+            "34": 0.30388,
+            "35": 0.31199,
+            "36": 0.30381,
+            "37": 0.30412,
+            "38": 0.31439,
+            "39": 0.30499,
+            "40": 0.30779,
+            "41": 0.33024,
+            "42": 0.31735,
+            "43": 0.30791,
+            "44": 0.31609,
+            "45": 0.3076,
+            "46": 0.31885,
+            "47": 0.31309,
+            "48": 0.31902,
+            "49": 0.30799,
+            "50": 0.30894
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts_etp1_ep4_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts_etp1_ep4_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
new file mode 100644
index 00000000000..52e3e931ee9
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts_etp1_ep4_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.8174,
+            "2": 10.8238,
+            "3": 10.83034,
+            "4": 10.79848,
+            "5": 10.86097,
+            "6": 10.86968,
+            "7": 10.83041,
+            "8": 10.83047,
+            "9": 10.83634,
+            "10": 10.80463,
+            "11": 10.87361,
+            "12": 10.85679,
+            "13": 10.86371,
+            "14": 10.87941,
+            "15": 10.79539,
+            "16": 10.79946,
+            "17": 10.7712,
+            "18": 10.80138,
+            "19": 10.78756,
+            "20": 10.71135,
+            "21": 10.67535,
+            "22": 10.53788,
+            "23": 10.68977,
+            "24": 10.57497,
+            "25": 10.51962,
+            "26": 10.57943,
+            "27": 10.58547,
+            "28": 10.55147,
+            "29": 10.56806,
+            "30": 10.33346,
+            "31": 10.06567,
+            "32": 10.42406,
+            "33": 10.43002,
+            "34": 10.16343,
+            "35": 10.22683,
+            "36": 10.19343,
+            "37": 10.30857,
+            "38": 10.14766,
+            "39": 10.38079,
+            "40": 10.041,
+            "41": 10.08555,
+            "42": 10.17528,
+            "43": 9.76706,
+            "44": 9.91338,
+            "45": 9.7722,
+            "46": 9.75215,
+            "47": 10.11047,
+            "48": 9.79832,
+            "49": 9.4591,
+            "50": 9.86932
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 26535.0,
+            "2": 29510.0,
+            "3": 29143.0,
+            "4": 28253.0,
+            "5": 31546.0,
+            "6": 32394.0,
+            "7": 30992.0,
+            "8": 27483.0,
+            "9": 30277.0,
+            "10": 25541.0,
+            "11": 33316.0,
+            "12": 30322.0,
+            "13": 32492.0,
+            "14": 32959.0,
+            "15": 30463.0,
+            "16": 31824.0,
+            "17": 30856.0,
+            "18": 30543.0,
+            "19": 31088.0,
+            "20": 28331.0,
+            "21": 28793.0,
+            "22": 27857.0,
+            "23": 33708.0,
+            "24": 28428.0,
+            "25": 27263.0,
+            "26": 30930.0,
+            "27": 31082.0,
+            "28": 32928.0,
+            "29": 34437.0,
+            "30": 29642.0,
+            "31": 28293.0,
+            "32": 32660.0,
+            "33": 35555.0,
+            "34": 30589.0,
+            "35": 32022.0,
+            "36": 33586.0,
+            "37": 35917.0,
+            "38": 34614.0,
+            "39": 37197.0,
+            "40": 34911.0,
+            "41": 33219.0,
+            "42": 35534.0,
+            "43": 34573.0,
+            "44": 33331.0,
+            "45": 35017.0,
+            "46": 35205.0,
+            "47": 39557.0,
+            "48": 35883.0,
+            "49": 36444.0,
+            "50": 38975.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1356172288.0,
+            "2": 1356165120.0,
+            "3": 1356179968.0,
+            "4": 1356190208.0,
+            "5": 1356170240.0,
+            "6": 1356170752.0,
+            "7": 1356184064.0,
+            "8": 1356165632.0,
+            "9": 1356161536.0,
+            "10": 1356160000.0,
+            "11": 1356167168.0,
+            "12": 1356178944.0,
+            "13": 1356167168.0,
+            "14": 1356162560.0,
+            "15": 1356180480.0,
+            "16": 1356185088.0,
+            "17": 1356156416.0,
+            "18": 1356187136.0,
+            "19": 1356171264.0,
+            "20": 1356170240.0,
+            "21": 1356188160.0,
+            "22": 1356186112.0,
+            "23": 1356185600.0,
+            "24": 1356181504.0,
+            "25": 1356182528.0,
+            "26": 1356189696.0,
+            "27": 1356189696.0,
+            "28": 1356181504.0,
+            "29": 1356182528.0,
+            "30": 1356198400.0,
+            "31": 1356187136.0,
+            "32": 1356177408.0,
+            "33": 1356187648.0,
+            "34": 1356187648.0,
+            "35": 1356182016.0,
+            "36": 1356178432.0,
+            "37": 1356182528.0,
+            "38": 1356186112.0,
+            "39": 1356170240.0,
+            "40": 1356156416.0,
+            "41": 1356169728.0,
+            "42": 1356151808.0,
+            "43": 1356151808.0,
+            "44": 1356146688.0,
+            "45": 1356140544.0,
+            "46": 1356133888.0,
+            "47": 1356111872.0,
+            "48": 1356119552.0,
+            "49": 1356118528.0,
+            "50": 1356098560.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 3163797504.0,
+            "2": 3730006528.0,
+            "3": 3748878336.0,
+            "4": 3752917504.0,
+            "5": 3752917504.0,
+            "6": 3752917504.0,
+            "7": 3758158848.0,
+            "8": 3758158848.0,
+            "9": 3758158848.0,
+            "10": 3758158848.0,
+            "11": 3758158848.0,
+            "12": 3758158848.0,
+            "13": 3758158848.0,
+            "14": 3758158848.0,
+            "15": 3758158848.0,
+            "16": 3758158848.0,
+            "17": 3758158848.0,
+            "18": 3758158848.0,
+            "19": 3758158848.0,
+            "20": 3758158848.0,
+            "21": 3758158848.0,
+            "22": 3758158848.0,
+            "23": 3758158848.0,
+            "24": 3758158848.0,
+            "25": 3758158848.0,
+            "26": 3758158848.0,
+            "27": 3758158848.0,
+            "28": 3758158848.0,
+            "29": 3770054144.0,
+            "30": 3770054144.0,
+            "31": 3770054144.0,
+            "32": 3770054144.0,
+            "33": 3770054144.0,
+            "34": 3770054144.0,
+            "35": 3770054144.0,
+            "36": 3770054144.0,
+            "37": 3770054144.0,
+            "38": 3770054144.0,
+            "39": 3770054144.0,
+            "40": 3770054144.0,
+            "41": 3770054144.0,
+            "42": 3770054144.0,
+            "43": 3770054144.0,
+            "44": 3770054144.0,
+            "45": 3770054144.0,
+            "46": 3770054144.0,
+            "47": 3770054144.0,
+            "48": 3770054144.0,
+            "49": 3770054144.0,
+            "50": 3770054144.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 29.25664,
+            "2": 0.52496,
+            "3": 0.31117,
+            "4": 0.3115,
+            "5": 0.30744,
+            "6": 0.3073,
+            "7": 0.30608,
+            "8": 0.30768,
+            "9": 0.30608,
+            "10": 0.30812,
+            "11": 0.30587,
+            "12": 0.30181,
+            "13": 0.30601,
+            "14": 0.30172,
+            "15": 0.2992,
+            "16": 0.30316,
+            "17": 0.29987,
+            "18": 0.30154,
+            "19": 0.30104,
+            "20": 0.30976,
+            "21": 0.3056,
+            "22": 0.29977,
+            "23": 0.30766,
+            "24": 0.30782,
+            "25": 0.3,
+            "26": 0.30831,
+            "27": 0.3064,
+            "28": 0.30211,
+            "29": 0.30977,
+            "30": 0.30627,
+            "31": 0.31683,
+            "32": 0.31896,
+            "33": 0.308,
+            "34": 0.31449,
+            "35": 0.30656,
+            "36": 0.31192,
+            "37": 0.31478,
+            "38": 0.30653,
+            "39": 0.31106,
+            "40": 0.31664,
+            "41": 0.32127,
+            "42": 0.32489,
+            "43": 0.31002,
+            "44": 0.31115,
+            "45": 0.3117,
+            "46": 0.32232,
+            "47": 0.31526,
+            "48": 0.31918,
+            "49": 0.35454,
+            "50": 0.31865
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
index cc62903f69e..6e2a34b26f8 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
@@ -2,91 +2,286 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 10.77518,
+            "2": 10.78038,
+            "3": 10.79302,
+            "4": 10.74107,
             "5": 10.82013,
+            "6": 10.82951,
+            "7": 10.7953,
+            "8": 10.78263,
+            "9": 10.79278,
             "10": 10.7446,
+            "11": 10.85147,
+            "12": 10.82613,
+            "13": 10.82825,
+            "14": 10.85504,
             "15": 10.75536,
+            "16": 10.75777,
+            "17": 10.72319,
+            "18": 10.76274,
+            "19": 10.75075,
             "20": 10.66587,
+            "21": 10.6419,
+            "22": 10.47523,
+            "23": 10.66959,
+            "24": 10.54157,
             "25": 10.4825,
+            "26": 10.55255,
+            "27": 10.57459,
+            "28": 10.55159,
+            "29": 10.5668,
             "30": 10.31134,
+            "31": 10.01921,
+            "32": 10.42655,
+            "33": 10.42294,
+            "34": 10.14739,
             "35": 10.21574,
+            "36": 10.15811,
+            "37": 10.30279,
+            "38": 10.14031,
+            "39": 10.36301,
             "40": 10.02669,
+            "41": 10.07635,
+            "42": 10.16156,
+            "43": 9.74374,
+            "44": 9.88962,
             "45": 9.75874,
+            "46": 9.73618,
+            "47": 10.0844,
+            "48": 9.78532,
+            "49": 9.45072,
             "50": 9.85634
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 27105.0,
+            "2": 28791.0,
+            "3": 29282.0,
+            "4": 27583.0,
             "5": 31595.0,
+            "6": 32831.0,
+            "7": 31023.0,
+            "8": 27107.0,
+            "9": 30780.0,
             "10": 25505.0,
+            "11": 33684.0,
+            "12": 30235.0,
+            "13": 32960.0,
+            "14": 32880.0,
             "15": 30405.0,
+            "16": 32455.0,
+            "17": 30933.0,
+            "18": 30623.0,
+            "19": 30803.0,
             "20": 28593.0,
+            "21": 29002.0,
+            "22": 27030.0,
+            "23": 34463.0,
+            "24": 29154.0,
             "25": 27827.0,
+            "26": 31119.0,
+            "27": 32108.0,
+            "28": 33412.0,
+            "29": 34737.0,
             "30": 30465.0,
+            "31": 28775.0,
+            "32": 33115.0,
+            "33": 34745.0,
+            "34": 30785.0,
             "35": 32116.0,
+            "36": 33968.0,
+            "37": 36757.0,
+            "38": 34150.0,
+            "39": 37240.0,
             "40": 35353.0,
+            "41": 34638.0,
+            "42": 36703.0,
+            "43": 34601.0,
+            "44": 33783.0,
             "45": 35388.0,
+            "46": 35484.0,
+            "47": 40591.0,
+            "48": 36671.0,
+            "49": 36174.0,
             "50": 38231.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 1562143232.0,
-            "5": 1562716672.0,
-            "10": 1564981248.0,
-            "15": 1565375488.0,
-            "20": 1564531200.0,
-            "25": 1564925952.0,
-            "30": 1563997184.0,
-            "35": 1563508224.0,
-            "40": 1564344832.0,
-            "45": 1566202880.0,
-            "50": 1563379712.0
+            "1": 1563272704.0,
+            "2": 1562858496.0,
+            "3": 1564486144.0,
+            "4": 1564041216.0,
+            "5": 1561823232.0,
+            "6": 1563443712.0,
+            "7": 1564206592.0,
+            "8": 1563517952.0,
+            "9": 1562183680.0,
+            "10": 1565040640.0,
+            "11": 1562508800.0,
+            "12": 1561081344.0,
+            "13": 1562479616.0,
+            "14": 1562858496.0,
+            "15": 1563188736.0,
+            "16": 1562045440.0,
+            "17": 1564147712.0,
+            "18": 1564288512.0,
+            "19": 1562883584.0,
+            "20": 1562017792.0,
+            "21": 1562184704.0,
+            "22": 1562030080.0,
+            "23": 1562267136.0,
+            "24": 1561898496.0,
+            "25": 1563593728.0,
+            "26": 1563150336.0,
+            "27": 1564444160.0,
+            "28": 1562418176.0,
+            "29": 1562973184.0,
+            "30": 1563487744.0,
+            "31": 1563070976.0,
+            "32": 1563377664.0,
+            "33": 1564346368.0,
+            "34": 1561956352.0,
+            "35": 1563001344.0,
+            "36": 1563246080.0,
+            "37": 1564364800.0,
+            "38": 1562608640.0,
+            "39": 1564432896.0,
+            "40": 1563148288.0,
+            "41": 1563740160.0,
+            "42": 1565268480.0,
+            "43": 1565179392.0,
+            "44": 1562279936.0,
+            "45": 1564082176.0,
+            "46": 1563706368.0,
+            "47": 1561835008.0,
+            "48": 1561798144.0,
+            "49": 1562701824.0,
+            "50": 1565224960.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 3676670976.0,
-            "5": 4262092288.0,
-            "10": 4290165248.0,
-            "15": 4290165248.0,
-            "20": 4290165248.0,
-            "25": 4290165248.0,
-            "30": 4290165248.0,
-            "35": 4290165248.0,
-            "40": 4290165248.0,
-            "45": 4290165248.0,
-            "50": 4290165248.0
+            "1": 3678389248.0,
+            "2": 4261802496.0,
+            "3": 4262688768.0,
+            "4": 4262688768.0,
+            "5": 4262688768.0,
+            "6": 4288888832.0,
+            "7": 4288888832.0,
+            "8": 4288888832.0,
+            "9": 4288888832.0,
+            "10": 4288888832.0,
+            "11": 4288888832.0,
+            "12": 4288888832.0,
+            "13": 4288888832.0,
+            "14": 4288888832.0,
+            "15": 4288888832.0,
+            "16": 4288888832.0,
+            "17": 4288888832.0,
+            "18": 4288888832.0,
+            "19": 4288888832.0,
+            "20": 4288888832.0,
+            "21": 4288888832.0,
+            "22": 4288888832.0,
+            "23": 4288888832.0,
+            "24": 4288888832.0,
+            "25": 4288888832.0,
+            "26": 4288888832.0,
+            "27": 4288888832.0,
+            "28": 4288888832.0,
+            "29": 4288888832.0,
+            "30": 4288888832.0,
+            "31": 4288888832.0,
+            "32": 4288888832.0,
+            "33": 4288888832.0,
+            "34": 4288888832.0,
+            "35": 4288888832.0,
+            "36": 4288888832.0,
+            "37": 4288888832.0,
+            "38": 4288888832.0,
+            "39": 4288888832.0,
+            "40": 4288888832.0,
+            "41": 4288888832.0,
+            "42": 4288888832.0,
+            "43": 4288888832.0,
+            "44": 4288888832.0,
+            "45": 4288888832.0,
+            "46": 4288888832.0,
+            "47": 4288888832.0,
+            "48": 4288888832.0,
+            "49": 4288888832.0,
+            "50": 4288888832.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 17.03683,
-            "5": 0.33922,
-            "10": 0.30304,
-            "15": 0.29637,
-            "20": 0.2596,
-            "25": 0.25723,
-            "30": 0.27136,
-            "35": 0.26623,
-            "40": 0.26866,
-            "45": 0.25523,
-            "50": 0.25705
+            "1": 19.14758,
+            "2": 0.49766,
+            "3": 0.44107,
+            "4": 0.37175,
+            "5": 0.37026,
+            "6": 0.33176,
+            "7": 0.32446,
+            "8": 0.31735,
+            "9": 0.3291,
+            "10": 0.32512,
+            "11": 0.30495,
+            "12": 0.31438,
+            "13": 0.29955,
+            "14": 0.30728,
+            "15": 0.31532,
+            "16": 0.29631,
+            "17": 0.30956,
+            "18": 0.30533,
+            "19": 0.30054,
+            "20": 0.30291,
+            "21": 0.30231,
+            "22": 0.32081,
+            "23": 0.29797,
+            "24": 0.3059,
+            "25": 0.3093,
+            "26": 0.30535,
+            "27": 0.30202,
+            "28": 0.31154,
+            "29": 0.30205,
+            "30": 0.3198,
+            "31": 0.36657,
+            "32": 0.30974,
+            "33": 0.34056,
+            "34": 0.32396,
+            "35": 0.34679,
+            "36": 0.30488,
+            "37": 0.31477,
+            "38": 0.31377,
+            "39": 0.31065,
+            "40": 0.30631,
+            "41": 0.30771,
+            "42": 0.3003,
+            "43": 0.30915,
+            "44": 0.31796,
+            "45": 0.2949,
+            "46": 0.30522,
+            "47": 0.30099,
+            "48": 0.30303,
+            "49": 0.30198,
+            "50": 0.29985
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..3c9a1238968
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.77518,
+            "2": 10.78038,
+            "3": 10.79302,
+            "4": 10.74107,
+            "5": 10.82013,
+            "6": 10.82951,
+            "7": 10.7953,
+            "8": 10.78263,
+            "9": 10.79278,
+            "10": 10.7446,
+            "11": 10.85147,
+            "12": 10.82613,
+            "13": 10.82825,
+            "14": 10.85504,
+            "15": 10.75536,
+            "16": 10.75777,
+            "17": 10.72319,
+            "18": 10.76274,
+            "19": 10.75075,
+            "20": 10.66587,
+            "21": 10.6419,
+            "22": 10.47523,
+            "23": 10.66959,
+            "24": 10.54157,
+            "25": 10.4825,
+            "26": 10.55255,
+            "27": 10.57459,
+            "28": 10.55159,
+            "29": 10.5668,
+            "30": 10.31134,
+            "31": 10.01921,
+            "32": 10.42655,
+            "33": 10.42294,
+            "34": 10.14739,
+            "35": 10.21574,
+            "36": 10.15811,
+            "37": 10.30279,
+            "38": 10.14031,
+            "39": 10.36301,
+            "40": 10.02669,
+            "41": 10.07635,
+            "42": 10.16156,
+            "43": 9.74374,
+            "44": 9.88962,
+            "45": 9.75874,
+            "46": 9.73618,
+            "47": 10.0844,
+            "48": 9.78532,
+            "49": 9.45072,
+            "50": 9.85634
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 27105.0,
+            "2": 28791.0,
+            "3": 29282.0,
+            "4": 27583.0,
+            "5": 31595.0,
+            "6": 32831.0,
+            "7": 31023.0,
+            "8": 27107.0,
+            "9": 30780.0,
+            "10": 25505.0,
+            "11": 33684.0,
+            "12": 30235.0,
+            "13": 32960.0,
+            "14": 32880.0,
+            "15": 30405.0,
+            "16": 32455.0,
+            "17": 30933.0,
+            "18": 30623.0,
+            "19": 30803.0,
+            "20": 28593.0,
+            "21": 29002.0,
+            "22": 27030.0,
+            "23": 34463.0,
+            "24": 29154.0,
+            "25": 27827.0,
+            "26": 31119.0,
+            "27": 32108.0,
+            "28": 33412.0,
+            "29": 34737.0,
+            "30": 30465.0,
+            "31": 28775.0,
+            "32": 33115.0,
+            "33": 34745.0,
+            "34": 30785.0,
+            "35": 32116.0,
+            "36": 33968.0,
+            "37": 36757.0,
+            "38": 34150.0,
+            "39": 37240.0,
+            "40": 35353.0,
+            "41": 34638.0,
+            "42": 36703.0,
+            "43": 34601.0,
+            "44": 33783.0,
+            "45": 35388.0,
+            "46": 35484.0,
+            "47": 40591.0,
+            "48": 36671.0,
+            "49": 36174.0,
+            "50": 38231.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1560780288.0,
+            "2": 1562661888.0,
+            "3": 1561168384.0,
+            "4": 1562873856.0,
+            "5": 1561988096.0,
+            "6": 1562931712.0,
+            "7": 1560774144.0,
+            "8": 1560396800.0,
+            "9": 1559476224.0,
+            "10": 1561237504.0,
+            "11": 1560092160.0,
+            "12": 1561073152.0,
+            "13": 1560844288.0,
+            "14": 1560660992.0,
+            "15": 1561358848.0,
+            "16": 1562046464.0,
+            "17": 1562270720.0,
+            "18": 1561111040.0,
+            "19": 1560918528.0,
+            "20": 1560393728.0,
+            "21": 1559810048.0,
+            "22": 1560937472.0,
+            "23": 1560980992.0,
+            "24": 1563885056.0,
+            "25": 1564661760.0,
+            "26": 1562321920.0,
+            "27": 1560262144.0,
+            "28": 1561913344.0,
+            "29": 1561421824.0,
+            "30": 1562089984.0,
+            "31": 1563574784.0,
+            "32": 1560473600.0,
+            "33": 1560724480.0,
+            "34": 1560988672.0,
+            "35": 1559951872.0,
+            "36": 1561882112.0,
+            "37": 1560333312.0,
+            "38": 1561226240.0,
+            "39": 1562092032.0,
+            "40": 1563557888.0,
+            "41": 1561459712.0,
+            "42": 1561729536.0,
+            "43": 1562591744.0,
+            "44": 1562273792.0,
+            "45": 1560520704.0,
+            "46": 1565477888.0,
+            "47": 1562011136.0,
+            "48": 1562666496.0,
+            "49": 1560133632.0,
+            "50": 1562494976.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 3682551808.0,
+            "2": 4261875200.0,
+            "3": 4261875200.0,
+            "4": 4261875200.0,
+            "5": 4262492672.0,
+            "6": 4286960640.0,
+            "7": 4286960640.0,
+            "8": 4286960640.0,
+            "9": 4286960640.0,
+            "10": 4286960640.0,
+            "11": 4286960640.0,
+            "12": 4286960640.0,
+            "13": 4286960640.0,
+            "14": 4286960640.0,
+            "15": 4286960640.0,
+            "16": 4286960640.0,
+            "17": 4286960640.0,
+            "18": 4286960640.0,
+            "19": 4286960640.0,
+            "20": 4286960640.0,
+            "21": 4286960640.0,
+            "22": 4286960640.0,
+            "23": 4286960640.0,
+            "24": 4286960640.0,
+            "25": 4286960640.0,
+            "26": 4286960640.0,
+            "27": 4286960640.0,
+            "28": 4286960640.0,
+            "29": 4286960640.0,
+            "30": 4286960640.0,
+            "31": 4286960640.0,
+            "32": 4286960640.0,
+            "33": 4286960640.0,
+            "34": 4286960640.0,
+            "35": 4286960640.0,
+            "36": 4286960640.0,
+            "37": 4286960640.0,
+            "38": 4286960640.0,
+            "39": 4286960640.0,
+            "40": 4286960640.0,
+            "41": 4286960640.0,
+            "42": 4286960640.0,
+            "43": 4286960640.0,
+            "44": 4286960640.0,
+            "45": 4286960640.0,
+            "46": 4286960640.0,
+            "47": 4286960640.0,
+            "48": 4286960640.0,
+            "49": 4286960640.0,
+            "50": 4286960640.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 20.83226,
+            "2": 0.4277,
+            "3": 0.36235,
+            "4": 0.32018,
+            "5": 0.32467,
+            "6": 0.2866,
+            "7": 0.29271,
+            "8": 0.2778,
+            "9": 0.28029,
+            "10": 0.27681,
+            "11": 0.26073,
+            "12": 0.26966,
+            "13": 0.26171,
+            "14": 0.26964,
+            "15": 0.26556,
+            "16": 0.26142,
+            "17": 0.26797,
+            "18": 0.26832,
+            "19": 0.25503,
+            "20": 0.26854,
+            "21": 0.26028,
+            "22": 0.27376,
+            "23": 0.26433,
+            "24": 0.27688,
+            "25": 0.26452,
+            "26": 0.26581,
+            "27": 0.26181,
+            "28": 0.26407,
+            "29": 0.26847,
+            "30": 0.28514,
+            "31": 0.27185,
+            "32": 0.26438,
+            "33": 0.26828,
+            "34": 0.27142,
+            "35": 0.27204,
+            "36": 0.28491,
+            "37": 0.28927,
+            "38": 0.26843,
+            "39": 0.27153,
+            "40": 0.27149,
+            "41": 0.2612,
+            "42": 0.25803,
+            "43": 0.27298,
+            "44": 0.28995,
+            "45": 0.28088,
+            "46": 0.28702,
+            "47": 0.27506,
+            "48": 0.2642,
+            "49": 0.26659,
+            "50": 0.25965
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..acf98f05d31
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.77518,
+            "2": 10.78038,
+            "3": 10.79302,
+            "4": 10.74107,
+            "5": 10.82013,
+            "6": 10.82951,
+            "7": 10.7953,
+            "8": 10.78263,
+            "9": 10.79278,
+            "10": 10.7446,
+            "11": 10.85147,
+            "12": 10.82613,
+            "13": 10.82825,
+            "14": 10.85504,
+            "15": 10.75536,
+            "16": 10.75777,
+            "17": 10.72319,
+            "18": 10.76274,
+            "19": 10.75075,
+            "20": 10.66587,
+            "21": 10.6419,
+            "22": 10.47523,
+            "23": 10.66959,
+            "24": 10.54157,
+            "25": 10.4825,
+            "26": 10.55255,
+            "27": 10.57459,
+            "28": 10.55159,
+            "29": 10.5668,
+            "30": 10.31134,
+            "31": 10.01921,
+            "32": 10.42655,
+            "33": 10.42294,
+            "34": 10.14739,
+            "35": 10.21574,
+            "36": 10.15811,
+            "37": 10.30279,
+            "38": 10.14031,
+            "39": 10.36301,
+            "40": 10.02669,
+            "41": 10.07635,
+            "42": 10.16156,
+            "43": 9.74374,
+            "44": 9.88962,
+            "45": 9.75874,
+            "46": 9.73618,
+            "47": 10.0844,
+            "48": 9.78532,
+            "49": 9.45072,
+            "50": 9.85634
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 27105.0,
+            "2": 28791.0,
+            "3": 29282.0,
+            "4": 27583.0,
+            "5": 31595.0,
+            "6": 32831.0,
+            "7": 31023.0,
+            "8": 27107.0,
+            "9": 30780.0,
+            "10": 25505.0,
+            "11": 33684.0,
+            "12": 30235.0,
+            "13": 32960.0,
+            "14": 32880.0,
+            "15": 30405.0,
+            "16": 32455.0,
+            "17": 30933.0,
+            "18": 30623.0,
+            "19": 30803.0,
+            "20": 28593.0,
+            "21": 29002.0,
+            "22": 27030.0,
+            "23": 34463.0,
+            "24": 29154.0,
+            "25": 27827.0,
+            "26": 31119.0,
+            "27": 32108.0,
+            "28": 33412.0,
+            "29": 34737.0,
+            "30": 30465.0,
+            "31": 28775.0,
+            "32": 33115.0,
+            "33": 34745.0,
+            "34": 30785.0,
+            "35": 32116.0,
+            "36": 33968.0,
+            "37": 36757.0,
+            "38": 34150.0,
+            "39": 37240.0,
+            "40": 35353.0,
+            "41": 34638.0,
+            "42": 36703.0,
+            "43": 34601.0,
+            "44": 33783.0,
+            "45": 35388.0,
+            "46": 35484.0,
+            "47": 40591.0,
+            "48": 36671.0,
+            "49": 36174.0,
+            "50": 38231.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1561367040.0,
+            "2": 1560972288.0,
+            "3": 1561248256.0,
+            "4": 1560096768.0,
+            "5": 1559926784.0,
+            "6": 1561850368.0,
+            "7": 1560161792.0,
+            "8": 1560285184.0,
+            "9": 1560998912.0,
+            "10": 1561293824.0,
+            "11": 1560700416.0,
+            "12": 1562299904.0,
+            "13": 1560526848.0,
+            "14": 1561499648.0,
+            "15": 1559979520.0,
+            "16": 1561232384.0,
+            "17": 1561337856.0,
+            "18": 1560266240.0,
+            "19": 1561224704.0,
+            "20": 1560222720.0,
+            "21": 1561771008.0,
+            "22": 1559743488.0,
+            "23": 1560801792.0,
+            "24": 1561316864.0,
+            "25": 1560606720.0,
+            "26": 1562301440.0,
+            "27": 1560251904.0,
+            "28": 1559861248.0,
+            "29": 1559861248.0,
+            "30": 1560919552.0,
+            "31": 1561406976.0,
+            "32": 1565212672.0,
+            "33": 1560626176.0,
+            "34": 1561871360.0,
+            "35": 1560959488.0,
+            "36": 1561910784.0,
+            "37": 1559904256.0,
+            "38": 1560347648.0,
+            "39": 1562116608.0,
+            "40": 1562510336.0,
+            "41": 1562299392.0,
+            "42": 1561589248.0,
+            "43": 1560753664.0,
+            "44": 1561721856.0,
+            "45": 1561170944.0,
+            "46": 1561996288.0,
+            "47": 1560805888.0,
+            "48": 1561083392.0,
+            "49": 1560795136.0,
+            "50": 1561778176.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 3680567296.0,
+            "2": 4256236032.0,
+            "3": 4260136960.0,
+            "4": 4260136960.0,
+            "5": 4261063168.0,
+            "6": 4289287168.0,
+            "7": 4289287168.0,
+            "8": 4289287168.0,
+            "9": 4289287168.0,
+            "10": 4289287168.0,
+            "11": 4289287168.0,
+            "12": 4289287168.0,
+            "13": 4289287168.0,
+            "14": 4289287168.0,
+            "15": 4289287168.0,
+            "16": 4289287168.0,
+            "17": 4289287168.0,
+            "18": 4289287168.0,
+            "19": 4289287168.0,
+            "20": 4289287168.0,
+            "21": 4289287168.0,
+            "22": 4289287168.0,
+            "23": 4289287168.0,
+            "24": 4289287168.0,
+            "25": 4289287168.0,
+            "26": 4289287168.0,
+            "27": 4289287168.0,
+            "28": 4289287168.0,
+            "29": 4289287168.0,
+            "30": 4289287168.0,
+            "31": 4289287168.0,
+            "32": 4289287168.0,
+            "33": 4289287168.0,
+            "34": 4289287168.0,
+            "35": 4289287168.0,
+            "36": 4289287168.0,
+            "37": 4289287168.0,
+            "38": 4289287168.0,
+            "39": 4289287168.0,
+            "40": 4289287168.0,
+            "41": 4289287168.0,
+            "42": 4289287168.0,
+            "43": 4289287168.0,
+            "44": 4289287168.0,
+            "45": 4289287168.0,
+            "46": 4289287168.0,
+            "47": 4289287168.0,
+            "48": 4289287168.0,
+            "49": 4289287168.0,
+            "50": 4289287168.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 18.57368,
+            "2": 0.50382,
+            "3": 0.41522,
+            "4": 0.37227,
+            "5": 0.37501,
+            "6": 0.33117,
+            "7": 0.32515,
+            "8": 0.31941,
+            "9": 0.32367,
+            "10": 0.32326,
+            "11": 0.30606,
+            "12": 0.30616,
+            "13": 0.29955,
+            "14": 0.30443,
+            "15": 0.30558,
+            "16": 0.29289,
+            "17": 0.30498,
+            "18": 0.29213,
+            "19": 0.29318,
+            "20": 0.29695,
+            "21": 0.29798,
+            "22": 0.31295,
+            "23": 0.29473,
+            "24": 0.29975,
+            "25": 0.29698,
+            "26": 0.30574,
+            "27": 0.29785,
+            "28": 0.30807,
+            "29": 0.29928,
+            "30": 0.3087,
+            "31": 0.30718,
+            "32": 0.30993,
+            "33": 0.30203,
+            "34": 0.31719,
+            "35": 0.30742,
+            "36": 0.30563,
+            "37": 0.31427,
+            "38": 0.31171,
+            "39": 0.31768,
+            "40": 0.30755,
+            "41": 0.30394,
+            "42": 0.29792,
+            "43": 0.30454,
+            "44": 0.31398,
+            "45": 0.29651,
+            "46": 0.31171,
+            "47": 0.29161,
+            "48": 0.3034,
+            "49": 0.2972,
+            "50": 0.29959
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..58eb3fc16cd
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.95004,
+            "2": 10.9521,
+            "3": 10.5115,
+            "4": 9.96454,
+            "5": 9.93941,
+            "6": 9.67273,
+            "7": 10.20975,
+            "8": 9.49716,
+            "9": 9.55902,
+            "10": 9.79742,
+            "11": 9.30109,
+            "12": 9.40483,
+            "13": 9.39546,
+            "14": 8.84681,
+            "15": 9.02444,
+            "16": 9.07121,
+            "17": 9.04574,
+            "18": 8.75678,
+            "19": 9.18159,
+            "20": 8.8595,
+            "21": 8.53503,
+            "22": 8.55182,
+            "23": 8.42441,
+            "24": 8.37608,
+            "25": 8.64304,
+            "26": 7.97393,
+            "27": 8.56806,
+            "28": 8.19764,
+            "29": 8.3928,
+            "30": 8.67283,
+            "31": 8.289,
+            "32": 8.43572,
+            "33": 8.5568,
+            "34": 8.66018,
+            "35": 8.07934,
+            "36": 7.94976,
+            "37": 8.29565,
+            "38": 7.98044,
+            "39": 8.39201,
+            "40": 8.35513,
+            "41": 8.31876,
+            "42": 8.0583,
+            "43": 8.03283,
+            "44": 8.24243,
+            "45": 8.10277,
+            "46": 7.61696,
+            "47": 8.15273,
+            "48": 8.00569,
+            "49": 8.38688,
+            "50": 7.81491
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 19403624.0,
+            "2": 19274194.0,
+            "3": 19372760.0,
+            "4": 86525248.0,
+            "5": 148575568.0,
+            "6": 145226704.0,
+            "7": 171879984.0,
+            "8": 195785248.0,
+            "9": 164124752.0,
+            "10": 167684736.0,
+            "11": 221077344.0,
+            "12": 200384224.0,
+            "13": 248872528.0,
+            "14": 211169424.0,
+            "15": 214304608.0,
+            "16": 216075632.0,
+            "17": 267845984.0,
+            "18": 170470336.0,
+            "19": 176865072.0,
+            "20": 187955392.0,
+            "21": 225750704.0,
+            "22": 247396816.0,
+            "23": 211643856.0,
+            "24": 205638464.0,
+            "25": 277022272.0,
+            "26": 291562304.0,
+            "27": 225789840.0,
+            "28": 288202368.0,
+            "29": 198390384.0,
+            "30": 213302208.0,
+            "31": 227204752.0,
+            "32": 271112416.0,
+            "33": 231840432.0,
+            "34": 203575536.0,
+            "35": 191152368.0,
+            "36": 222566928.0,
+            "37": 177810112.0,
+            "38": 228708544.0,
+            "39": 211168784.0,
+            "40": 215603968.0,
+            "41": 200089440.0,
+            "42": 228529888.0,
+            "43": 198782848.0,
+            "44": 141902272.0,
+            "45": 181922816.0,
+            "46": 115369856.0,
+            "47": 170214176.0,
+            "48": 137292832.0,
+            "49": 97654936.0,
+            "50": 160979632.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 4883602432.0,
+            "2": 4885017088.0,
+            "3": 4882657792.0,
+            "4": 4883046912.0,
+            "5": 4883725824.0,
+            "6": 4883713536.0,
+            "7": 4883040768.0,
+            "8": 4883273216.0,
+            "9": 4882952704.0,
+            "10": 4885949952.0,
+            "11": 4883990016.0,
+            "12": 4887679488.0,
+            "13": 4884011520.0,
+            "14": 4882899456.0,
+            "15": 4883515904.0,
+            "16": 4883990016.0,
+            "17": 4883410432.0,
+            "18": 4883673600.0,
+            "19": 4882903552.0,
+            "20": 4884541952.0,
+            "21": 4883138048.0,
+            "22": 4883247616.0,
+            "23": 4883839488.0,
+            "24": 4885058048.0,
+            "25": 4882676224.0,
+            "26": 4884058624.0,
+            "27": 4884724224.0,
+            "28": 4884874752.0,
+            "29": 4883127808.0,
+            "30": 4883252736.0,
+            "31": 4882955776.0,
+            "32": 4885190144.0,
+            "33": 4883845632.0,
+            "34": 4884392448.0,
+            "35": 4883083776.0,
+            "36": 4883851776.0,
+            "37": 4885246464.0,
+            "38": 4882680320.0,
+            "39": 4884296192.0,
+            "40": 4884689408.0,
+            "41": 4882836992.0,
+            "42": 4883972608.0,
+            "43": 4884519424.0,
+            "44": 4883354112.0,
+            "45": 4883495424.0,
+            "46": 4882788864.0,
+            "47": 4883144192.0,
+            "48": 4883688960.0,
+            "49": 4884182528.0,
+            "50": 4885279232.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 41210470400.0,
+            "2": 41210470400.0,
+            "3": 41210470400.0,
+            "4": 41210470400.0,
+            "5": 41210470400.0,
+            "6": 41210470400.0,
+            "7": 41210470400.0,
+            "8": 41210470400.0,
+            "9": 41210470400.0,
+            "10": 41210470400.0,
+            "11": 41210470400.0,
+            "12": 41210470400.0,
+            "13": 41210470400.0,
+            "14": 41210470400.0,
+            "15": 41210470400.0,
+            "16": 41210470400.0,
+            "17": 41210470400.0,
+            "18": 41210470400.0,
+            "19": 41210470400.0,
+            "20": 41210470400.0,
+            "21": 41210470400.0,
+            "22": 41210470400.0,
+            "23": 41210470400.0,
+            "24": 41210470400.0,
+            "25": 41210470400.0,
+            "26": 41210470400.0,
+            "27": 41210470400.0,
+            "28": 41210470400.0,
+            "29": 41210470400.0,
+            "30": 41210470400.0,
+            "31": 41210470400.0,
+            "32": 41210470400.0,
+            "33": 41210470400.0,
+            "34": 41210470400.0,
+            "35": 41210470400.0,
+            "36": 41210470400.0,
+            "37": 41210470400.0,
+            "38": 41210470400.0,
+            "39": 41210470400.0,
+            "40": 41210470400.0,
+            "41": 41210470400.0,
+            "42": 41210470400.0,
+            "43": 41210470400.0,
+            "44": 41210470400.0,
+            "45": 41210470400.0,
+            "46": 41210470400.0,
+            "47": 41210470400.0,
+            "48": 41210470400.0,
+            "49": 41210470400.0,
+            "50": 41210470400.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 86.8085,
+            "2": 1.10913,
+            "3": 0.99097,
+            "4": 0.89412,
+            "5": 1.25997,
+            "6": 0.98162,
+            "7": 0.98318,
+            "8": 1.13296,
+            "9": 0.88126,
+            "10": 0.8633,
+            "11": 2.2744,
+            "12": 4.5393,
+            "13": 3.22763,
+            "14": 1.64923,
+            "15": 0.86595,
+            "16": 0.86575,
+            "17": 0.85272,
+            "18": 0.85454,
+            "19": 0.85281,
+            "20": 0.87018,
+            "21": 0.84654,
+            "22": 0.8494,
+            "23": 0.84882,
+            "24": 0.84482,
+            "25": 0.85311,
+            "26": 0.84678,
+            "27": 0.84096,
+            "28": 0.8412,
+            "29": 0.84156,
+            "30": 0.84475,
+            "31": 0.84747,
+            "32": 0.85058,
+            "33": 0.84977,
+            "34": 0.8479,
+            "35": 0.85234,
+            "36": 0.85012,
+            "37": 0.85087,
+            "38": 0.84594,
+            "39": 0.84558,
+            "40": 0.84807,
+            "41": 0.84183,
+            "42": 0.8439,
+            "43": 0.84221,
+            "44": 0.84248,
+            "45": 0.84257,
+            "46": 0.83922,
+            "47": 0.84311,
+            "48": 0.84159,
+            "49": 0.84011,
+            "50": 0.8353
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..daa04af43dd
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.95004,
+            "2": 10.9521,
+            "3": 10.5115,
+            "4": 9.96454,
+            "5": 9.93941,
+            "6": 9.67273,
+            "7": 10.20975,
+            "8": 9.49716,
+            "9": 9.55902,
+            "10": 9.79742,
+            "11": 9.30109,
+            "12": 9.40483,
+            "13": 9.39546,
+            "14": 8.84681,
+            "15": 9.02444,
+            "16": 9.07121,
+            "17": 9.04574,
+            "18": 8.75678,
+            "19": 9.18159,
+            "20": 8.8595,
+            "21": 8.53503,
+            "22": 8.55182,
+            "23": 8.42441,
+            "24": 8.37608,
+            "25": 8.64304,
+            "26": 7.97393,
+            "27": 8.56806,
+            "28": 8.19764,
+            "29": 8.3928,
+            "30": 8.67283,
+            "31": 8.289,
+            "32": 8.43572,
+            "33": 8.5568,
+            "34": 8.66018,
+            "35": 8.07934,
+            "36": 7.94976,
+            "37": 8.29565,
+            "38": 7.98044,
+            "39": 8.39201,
+            "40": 8.35513,
+            "41": 8.31876,
+            "42": 8.0583,
+            "43": 8.03283,
+            "44": 8.24243,
+            "45": 8.10277,
+            "46": 7.61696,
+            "47": 8.15273,
+            "48": 8.00569,
+            "49": 8.38688,
+            "50": 7.81491
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 19403624.0,
+            "2": 19274194.0,
+            "3": 19372760.0,
+            "4": 86525248.0,
+            "5": 148575568.0,
+            "6": 145226704.0,
+            "7": 171879984.0,
+            "8": 195785248.0,
+            "9": 164124752.0,
+            "10": 167684736.0,
+            "11": 221077344.0,
+            "12": 200384224.0,
+            "13": 248872528.0,
+            "14": 211169424.0,
+            "15": 214304608.0,
+            "16": 216075632.0,
+            "17": 267845984.0,
+            "18": 170470336.0,
+            "19": 176865072.0,
+            "20": 187955392.0,
+            "21": 225750704.0,
+            "22": 247396816.0,
+            "23": 211643856.0,
+            "24": 205638464.0,
+            "25": 277022272.0,
+            "26": 291562304.0,
+            "27": 225789840.0,
+            "28": 288202368.0,
+            "29": 198390384.0,
+            "30": 213302208.0,
+            "31": 227204752.0,
+            "32": 271112416.0,
+            "33": 231840432.0,
+            "34": 203575536.0,
+            "35": 191152368.0,
+            "36": 222566928.0,
+            "37": 177810112.0,
+            "38": 228708544.0,
+            "39": 211168784.0,
+            "40": 215603968.0,
+            "41": 200089440.0,
+            "42": 228529888.0,
+            "43": 198782848.0,
+            "44": 141902272.0,
+            "45": 181922816.0,
+            "46": 115369856.0,
+            "47": 170214176.0,
+            "48": 137292832.0,
+            "49": 97654936.0,
+            "50": 160979632.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 4882187264.0,
+            "2": 4881607168.0,
+            "3": 4882283008.0,
+            "4": 4881322496.0,
+            "5": 4882174464.0,
+            "6": 4883177984.0,
+            "7": 4883252736.0,
+            "8": 4881774080.0,
+            "9": 4881443328.0,
+            "10": 4884319744.0,
+            "11": 4882319872.0,
+            "12": 4881232384.0,
+            "13": 4880836096.0,
+            "14": 4882124288.0,
+            "15": 4882108928.0,
+            "16": 4883384832.0,
+            "17": 4880466432.0,
+            "18": 4881518080.0,
+            "19": 4881734144.0,
+            "20": 4883215872.0,
+            "21": 4883534336.0,
+            "22": 4882774528.0,
+            "23": 4881818112.0,
+            "24": 4882441728.0,
+            "25": 4880546304.0,
+            "26": 4882178560.0,
+            "27": 4881892864.0,
+            "28": 4881869312.0,
+            "29": 4882979328.0,
+            "30": 4882715136.0,
+            "31": 4883084800.0,
+            "32": 4881436160.0,
+            "33": 4881766912.0,
+            "34": 4881406464.0,
+            "35": 4881531392.0,
+            "36": 4881479168.0,
+            "37": 4882455040.0,
+            "38": 4882054656.0,
+            "39": 4882005504.0,
+            "40": 4882743808.0,
+            "41": 4881211904.0,
+            "42": 4881378816.0,
+            "43": 4882133504.0,
+            "44": 4881860096.0,
+            "45": 4883165696.0,
+            "46": 4882168320.0,
+            "47": 4881526272.0,
+            "48": 4882125312.0,
+            "49": 4881533440.0,
+            "50": 4881598976.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 41210470400.0,
+            "2": 41210470400.0,
+            "3": 41210470400.0,
+            "4": 41210470400.0,
+            "5": 41210470400.0,
+            "6": 41210470400.0,
+            "7": 41210470400.0,
+            "8": 41210470400.0,
+            "9": 41210470400.0,
+            "10": 41210470400.0,
+            "11": 41210470400.0,
+            "12": 41210470400.0,
+            "13": 41210470400.0,
+            "14": 41210470400.0,
+            "15": 41210470400.0,
+            "16": 41210470400.0,
+            "17": 41210470400.0,
+            "18": 41210470400.0,
+            "19": 41210470400.0,
+            "20": 41210470400.0,
+            "21": 41210470400.0,
+            "22": 41210470400.0,
+            "23": 41210470400.0,
+            "24": 41210470400.0,
+            "25": 41210470400.0,
+            "26": 41210470400.0,
+            "27": 41210470400.0,
+            "28": 41210470400.0,
+            "29": 41210470400.0,
+            "30": 41210470400.0,
+            "31": 41210470400.0,
+            "32": 41210470400.0,
+            "33": 41210470400.0,
+            "34": 41210470400.0,
+            "35": 41210470400.0,
+            "36": 41210470400.0,
+            "37": 41210470400.0,
+            "38": 41210470400.0,
+            "39": 41210470400.0,
+            "40": 41210470400.0,
+            "41": 41210470400.0,
+            "42": 41210470400.0,
+            "43": 41210470400.0,
+            "44": 41210470400.0,
+            "45": 41210470400.0,
+            "46": 41210470400.0,
+            "47": 41210470400.0,
+            "48": 41210470400.0,
+            "49": 41210470400.0,
+            "50": 41210470400.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 96.21947,
+            "2": 1.10023,
+            "3": 0.96399,
+            "4": 0.91113,
+            "5": 1.27509,
+            "6": 1.00484,
+            "7": 1.01236,
+            "8": 1.1739,
+            "9": 0.89406,
+            "10": 0.88836,
+            "11": 0.92033,
+            "12": 0.88331,
+            "13": 0.88179,
+            "14": 0.88307,
+            "15": 0.88648,
+            "16": 0.88425,
+            "17": 0.87155,
+            "18": 0.87556,
+            "19": 0.87374,
+            "20": 0.8744,
+            "21": 0.86757,
+            "22": 0.87217,
+            "23": 0.8736,
+            "24": 0.86646,
+            "25": 0.87328,
+            "26": 0.87121,
+            "27": 0.85886,
+            "28": 0.86392,
+            "29": 0.86385,
+            "30": 0.86425,
+            "31": 0.8631,
+            "32": 0.8617,
+            "33": 0.86069,
+            "34": 0.86829,
+            "35": 0.86837,
+            "36": 0.86776,
+            "37": 0.86686,
+            "38": 0.86359,
+            "39": 0.8677,
+            "40": 0.86441,
+            "41": 0.86179,
+            "42": 0.86079,
+            "43": 0.86149,
+            "44": 0.86222,
+            "45": 0.86336,
+            "46": 0.85875,
+            "47": 0.86219,
+            "48": 0.86026,
+            "49": 0.85894,
+            "50": 0.8544
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..7de18dbf1e6
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,160 @@
+{
+ "0": {
+  "input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.",
+  "generated_text": " Wait for the moment when the music stops, and the lights come up, and the DJ says, \"Ladies and gentlemen, we have a",
+  "generated_tokens": [
+   32844,
+   1394,
+   1278,
+   4735,
+   2200,
+   1278,
+   7146,
+   30774,
+   1044,
+   1321,
+   1278,
+   26466,
+   3930,
+   2015,
+   1044,
+   1321,
+   1278,
+   30245,
+   8223,
+   1044,
+   1429,
+   1076,
+   1332,
+   1564,
+   1321,
+   94228,
+   1044,
+   1729,
+   1736,
+   1261
+  ],
+  "latency": 14.808601379394531,
+  "cuda_graph_request_count_map": null,
+  "step_count": 30,
+  "logprobs": [
+   -10.448518753051758,
+   -3.716420888900757,
+   -2.8180086612701416,
+   -1.243106484413147,
+   -0.24187560379505157,
+   -1.8551081418991089,
+   -2.376569986343384,
+   -1.9645118713378906,
+   -2.109349250793457,
+   -6.07373046875,
+   -0.813377320766449,
+   -2.4836418628692627,
+   -3.536860466003418,
+   -4.1545329093933105,
+   -1.963995099067688,
+   -1.7819465398788452,
+   -2.2402775287628174,
+   -7.124850273132324,
+   -0.04146730899810791,
+   -1.8956966400146484,
+   -5.0023088455200195,
+   -8.669596672058105,
+   -10.017587661743164,
+   -0.8539565801620483,
+   -4.7854743003845215,
+   -0.8601016402244568,
+   -2.2983784675598145,
+   -0.02070772275328636,
+   -0.03607859089970589,
+   -3.367767095565796,
+   -8.763198852539062,
+   -1.2790724039077759,
+   -6.628117084503174,
+   -3.8508503437042236,
+   -3.782344341278076,
+   -4.235904693603516,
+   -2.1973977088928223,
+   -1.0788196325302124,
+   -0.2020731270313263,
+   -0.7980086207389832,
+   -4.743488311767578,
+   -9.224939346313477,
+   -0.013240979053080082,
+   -3.2384161949157715,
+   -1.2238521575927734,
+   -3.97282338142395,
+   -0.7767954468727112,
+   -0.002251710742712021,
+   -2.981565475463867,
+   -10.490681648254395,
+   -2.981630802154541,
+   -1.152982234954834,
+   -4.928977012634277,
+   -0.20187364518642426,
+   -0.04966790974140167,
+   -1.3515344858169556,
+   -2.200042963027954,
+   -4.3557939529418945,
+   -0.36048629879951477,
+   -4.087867736816406,
+   -0.40046849846839905,
+   -0.13703589141368866,
+   -2.805037260055542,
+   -10.71006965637207,
+   -0.051668114960193634,
+   -3.277766704559326,
+   -0.8607810735702515,
+   -4.699098110198975,
+   -0.2629980444908142,
+   -2.686246633529663,
+   -0.8297598361968994,
+   -1.6083959341049194,
+   -5.793962478637695,
+   -16.94595718383789,
+   -2.966357707977295,
+   -0.11854737997055054,
+   -7.449464321136475,
+   -1.0872507095336914,
+   -2.057858943939209,
+   -1.5261168479919434,
+   -0.2606821358203888,
+   -5.62846565246582,
+   -0.006751700770109892,
+   -7.793324947357178,
+   -2.7264108657836914,
+   -2.9370150566101074,
+   -3.0170741081237793,
+   -2.344959020614624,
+   -0.3987772464752197,
+   -1.5143157243728638,
+   -2.3020801544189453,
+   -0.5609080791473389,
+   -1.3160275220870972,
+   -1.987931728363037,
+   -1.7064098119735718,
+   -0.7751765847206116,
+   -0.49781349301338196,
+   -1.2841160297393799,
+   -1.5651875734329224,
+   -0.9735848307609558,
+   -0.4030272364616394,
+   -0.4352472424507141,
+   -0.044518083333969116,
+   -1.274898648262024,
+   -2.1242129802703857,
+   -2.7193076610565186,
+   -0.7538051009178162,
+   -0.41235291957855225,
+   -2.793597459793091,
+   -0.005556969437748194,
+   -0.0006632988806813955,
+   -0.018600093200802803,
+   -0.2592391073703766,
+   -0.1656094491481781,
+   -1.9508270025253296,
+   -1.1184629201889038,
+   -0.39283478260040283
+  ]
+ }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..6da9de60910
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,160 @@
+{
+ "0": {
+  "input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.",
+  "generated_text": " Wait for the moment when the music stops, and the lights come up, and the DJ says, \"Ladies and gentlemen, we have a",
+  "generated_tokens": [
+   32844,
+   1394,
+   1278,
+   4735,
+   2200,
+   1278,
+   7146,
+   30774,
+   1044,
+   1321,
+   1278,
+   26466,
+   3930,
+   2015,
+   1044,
+   1321,
+   1278,
+   30245,
+   8223,
+   1044,
+   1429,
+   1076,
+   1332,
+   1564,
+   1321,
+   94228,
+   1044,
+   1729,
+   1736,
+   1261
+  ],
+  "latency": 16.235759735107422,
+  "cuda_graph_request_count_map": null,
+  "step_count": 30,
+  "logprobs": [
+   -10.448518753051758,
+   -3.716420888900757,
+   -2.8180086612701416,
+   -1.243106484413147,
+   -0.24187560379505157,
+   -1.8551081418991089,
+   -2.376569986343384,
+   -1.9645118713378906,
+   -2.109349250793457,
+   -6.07373046875,
+   -0.813377320766449,
+   -2.4836418628692627,
+   -3.536860466003418,
+   -4.1545329093933105,
+   -1.963995099067688,
+   -1.7819465398788452,
+   -2.2402775287628174,
+   -7.124850273132324,
+   -0.04146730899810791,
+   -1.8956966400146484,
+   -5.0023088455200195,
+   -8.669596672058105,
+   -10.017587661743164,
+   -0.8539565801620483,
+   -4.7854743003845215,
+   -0.8601016402244568,
+   -2.2983784675598145,
+   -0.02070772275328636,
+   -0.03607859089970589,
+   -3.367767095565796,
+   -8.763198852539062,
+   -1.2790724039077759,
+   -6.628117084503174,
+   -3.8508503437042236,
+   -3.782344341278076,
+   -4.235904693603516,
+   -2.1973977088928223,
+   -1.0788196325302124,
+   -0.2020731270313263,
+   -0.7980086207389832,
+   -4.743488311767578,
+   -9.224939346313477,
+   -0.013240979053080082,
+   -3.2384161949157715,
+   -1.2238521575927734,
+   -3.97282338142395,
+   -0.7767954468727112,
+   -0.002251710742712021,
+   -2.981565475463867,
+   -10.490681648254395,
+   -2.981630802154541,
+   -1.152982234954834,
+   -4.928977012634277,
+   -0.20187364518642426,
+   -0.04966790974140167,
+   -1.3515344858169556,
+   -2.200042963027954,
+   -4.3557939529418945,
+   -0.36048629879951477,
+   -4.087867736816406,
+   -0.40046849846839905,
+   -0.13703589141368866,
+   -2.805037260055542,
+   -10.71006965637207,
+   -0.051668114960193634,
+   -3.277766704559326,
+   -0.8607810735702515,
+   -4.699098110198975,
+   -0.2629980444908142,
+   -2.686246633529663,
+   -0.8297598361968994,
+   -1.6083959341049194,
+   -5.793962478637695,
+   -16.94595718383789,
+   -2.966357707977295,
+   -0.11854737997055054,
+   -7.449464321136475,
+   -1.0872507095336914,
+   -2.057858943939209,
+   -1.5261168479919434,
+   -0.2606821358203888,
+   -5.62846565246582,
+   -0.006751700770109892,
+   -7.793324947357178,
+   -2.7264108657836914,
+   -2.9370150566101074,
+   -3.0170741081237793,
+   -2.344959020614624,
+   -0.3987772464752197,
+   -1.5143157243728638,
+   -2.3020801544189453,
+   -0.5609080791473389,
+   -1.3160275220870972,
+   -1.987931728363037,
+   -1.7064098119735718,
+   -0.7751765847206116,
+   -0.49781349301338196,
+   -1.2841160297393799,
+   -1.5651875734329224,
+   -0.9735848307609558,
+   -0.4030272364616394,
+   -0.4352472424507141,
+   -0.044518083333969116,
+   -1.274898648262024,
+   -2.1242129802703857,
+   -2.7193076610565186,
+   -0.7538051009178162,
+   -0.41235291957855225,
+   -2.793597459793091,
+   -0.005556969437748194,
+   -0.0006632988806813955,
+   -0.018600093200802803,
+   -0.2592391073703766,
+   -0.1656094491481781,
+   -1.9508270025253296,
+   -1.1184629201889038,
+   -0.39283478260040283
+  ]
+ }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt_static_inference_tp1_pp1_ep1_16B_logitsmatch/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt_static_inference_tp1_pp1_ep1_16B_logitsmatch/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..c476959ff0d
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt_static_inference_tp1_pp1_ep1_16B_logitsmatch/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1 @@
+{"0": {"input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.", "generated_text": " Wait for the moment when the music stops, and the lights come up, and the DJ says, \"I'm going to play a song for you", "generated_tokens": [32844, 1394, 1278, 4735, 2200, 1278, 7146, 30774, 1044, 1321, 1278, 26466, 3930, 2015, 1044, 1321, 1278, 30245, 8223, 1044, 1429, 1073, 4525, 4670, 1317, 3354, 1261, 6947, 1394, 1636], "tpot": [2.733039379119873, 0.6462976336479187, 0.07264169305562973, 0.0742710754275322, 0.07288099080324173, 0.07153938710689545, 0.07088476419448853, 0.07172102481126785, 0.07192070782184601, 0.07100768387317657, 0.07138767838478088, 0.07139590382575989, 0.07129142433404922, 0.07098300755023956, 0.07138735800981522, 0.07275772839784622, 0.07109101116657257, 0.07179228961467743, 0.07515615969896317, 0.07377561926841736, 0.07309594005346298, 0.07474038749933243, 0.07406358420848846, 0.07546690851449966, 0.07540509104728699, 0.0726393610239029, 0.07218870520591736, 0.07215183973312378, 0.07114642858505249, 0.07710829377174377], "latency": 5.430960623547435, "logprobs": [-9.965213775634766, -3.6972405910491943, -2.8163998126983643, -1.3259482383728027, -0.22894315421581268, -1.801922082901001, -2.380244493484497, -1.9902539253234863, -2.195096731185913, -6.201530456542969, -0.8732167482376099, -2.3890693187713623, -3.4655370712280273, -4.265195369720459, -1.9843286275863647, -1.8525164127349854, -2.247467517852783, -7.156258583068848, -0.04102461040019989, -1.9811111688613892, -5.029601097106934, -8.902811050415039, -9.822186470031738, -0.7156577706336975, -4.822559833526611, -0.830146074295044, -2.264935255050659, -0.02063065394759178, -0.0366678312420845, -3.4783172607421875, -8.650375366210938, -1.247912883758545, -6.612592697143555, -3.64731502532959, -3.6577675342559814, -4.237436771392822, -2.1768712997436523, -1.0792245864868164, -0.22580334544181824, -0.7873495221138, -4.81827974319458, -8.96638011932373, -0.01367227640002966, -3.1769614219665527, -1.3207263946533203, -3.995314121246338, -0.7868635654449463, -0.0021346656139940023, -2.9099419116973877, -10.611204147338867, -3.244929313659668, -1.103176474571228, -4.869075775146484, -0.2279863953590393, -0.06238075718283653, -1.2982008457183838, -2.208366632461548, -4.412147045135498, -0.3588172495365143, -4.0025200843811035, -0.3714170753955841, -0.14747798442840576, -2.7178127765655518, -10.553118705749512, -0.057451825588941574, -3.381279945373535, -0.8944476842880249, -4.724348068237305, -0.25962480902671814, -2.655942678451538, -0.8473785519599915, -1.5853822231292725, -5.768069267272949, -16.949235916137695, -2.675042152404785, -0.12979209423065186, -7.452098369598389, -1.1089909076690674, -2.0911808013916016, -1.5204540491104126, -0.29428866505622864, -5.85228157043457, -0.006600246299058199, -7.733879089355469, -2.7058277130126953, -2.9573605060577393, -3.0196847915649414, -2.450732469558716, -0.3994073271751404, -1.426312804222107, -2.2726848125457764, -0.6103246212005615, -1.3297024965286255, -1.936716914176941, -1.7187526226043701, -0.7779486775398254, -0.5053722858428955, -1.300978660583496, -1.588526964187622, -0.9849303960800171, -0.4031231701374054, -0.4341556429862976, -0.04193130508065224, -1.2715754508972168, -2.116468906402588, -2.6802122592926025, -0.8255553245544434, -0.42921727895736694, -2.904050350189209, -1.4616029262542725, -1.6294372081756592, -0.05650198459625244, -1.3804056644439697, -1.3228214979171753, -1.268000602722168, -1.2933895587921143, -0.5357464551925659]}}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt_static_inference_tp1_pp1_ep1_16B_logitsmatch/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt_static_inference_tp1_pp1_ep1_16B_logitsmatch/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..b6c02c060a6
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt_static_inference_tp1_pp1_ep1_16B_logitsmatch/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1 @@
+{"0": {"input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.", "generated_text": " Wait for the moment when the music stops, and the lights come up, and the DJ says, \"I'm going to play a song for you", "generated_tokens": [32844, 1394, 1278, 4735, 2200, 1278, 7146, 30774, 1044, 1321, 1278, 26466, 3930, 2015, 1044, 1321, 1278, 30245, 8223, 1044, 1429, 1073, 4525, 4670, 1317, 3354, 1261, 6947, 1394, 1636], "tpot": [2.503589630126953, 0.6800563931465149, 0.08277347683906555, 0.08122985810041428, 0.08191356807947159, 0.08138781040906906, 0.08179532736539841, 0.08161459118127823, 0.07997913658618927, 0.08174006640911102, 0.08130563050508499, 0.08119283616542816, 0.083525151014328, 0.0812133401632309, 0.08146921545267105, 0.0823666900396347, 0.0816216692328453, 0.08155478537082672, 0.08143103867769241, 0.08030703663825989, 0.08146540820598602, 0.0814284160733223, 0.0816466212272644, 0.08187752962112427, 0.0818835198879242, 0.08184870332479477, 0.08239014446735382, 0.08098144084215164, 0.08134105801582336, 0.0866490826010704], "latency": 5.490644988021813, "logprobs": [-9.965213775634766, -3.6972405910491943, -2.8163998126983643, -1.3259482383728027, -0.22894315421581268, -1.801922082901001, -2.380244493484497, -1.9902539253234863, -2.195096731185913, -6.201530456542969, -0.8732167482376099, -2.3890693187713623, -3.4655370712280273, -4.265195369720459, -1.9843286275863647, -1.8525164127349854, -2.247467517852783, -7.156258583068848, -0.04102461040019989, -1.9811111688613892, -5.029601097106934, -8.902811050415039, -9.822186470031738, -0.7156577706336975, -4.822559833526611, -0.830146074295044, -2.264935255050659, -0.02063065394759178, -0.0366678312420845, -3.4783172607421875, -8.650375366210938, -1.247912883758545, -6.612592697143555, -3.64731502532959, -3.6577675342559814, -4.237436771392822, -2.1768712997436523, -1.0792245864868164, -0.22580334544181824, -0.7873495221138, -4.81827974319458, -8.96638011932373, -0.01367227640002966, -3.1769614219665527, -1.3207263946533203, -3.995314121246338, -0.7868635654449463, -0.0021346656139940023, -2.9099419116973877, -10.611204147338867, -3.244929313659668, -1.103176474571228, -4.869075775146484, -0.2279863953590393, -0.06238075718283653, -1.2982008457183838, -2.208366632461548, -4.412147045135498, -0.3588172495365143, -4.0025200843811035, -0.3714170753955841, -0.14747798442840576, -2.7178127765655518, -10.553118705749512, -0.057451825588941574, -3.381279945373535, -0.8944476842880249, -4.724348068237305, -0.25962480902671814, -2.655942678451538, -0.8473785519599915, -1.5853822231292725, -5.768069267272949, -16.949235916137695, -2.675042152404785, -0.12979209423065186, -7.452098369598389, -1.1089909076690674, -2.0911808013916016, -1.5204540491104126, -0.29428866505622864, -5.85228157043457, -0.006600246299058199, -7.733879089355469, -2.7058277130126953, -2.9573605060577393, -3.0196847915649414, -2.450732469558716, -0.3994073271751404, -1.426312804222107, -2.2726848125457764, -0.6103246212005615, -1.3297024965286255, -1.936716914176941, -1.7187526226043701, -0.7779486775398254, -0.5053722858428955, -1.300978660583496, -1.588526964187622, -0.9849303960800171, -0.4031231701374054, -0.4341556429862976, -0.04193130508065224, -1.2715754508972168, -2.116468906402588, -2.6802122592926025, -0.8255553245544434, -0.42921727895736694, -2.904050350189209, -1.4616029262542725, -1.6294372081756592, -0.05650198459625244, -1.3804056644439697, -1.3228214979171753, -1.268000602722168, -1.2933895587921143, -0.5357464551925659]}}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt_static_inference_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt_static_inference_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..73fd0caaba6
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt_static_inference_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1 @@
+{"0": {"input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.", "generated_text": " Wait for the moment when the music stops, and the lights come up, and the DJ says, \"I'm going to play a song for you", "generated_tokens": [32844, 1394, 1278, 4735, 2200, 1278, 7146, 30774, 1044, 1321, 1278, 26466, 3930, 2015, 1044, 1321, 1278, 30245, 8223, 1044, 1429, 1073, 4525, 4670, 1317, 3354, 1261, 6947, 1394, 1636], "tpot": [10.43424129486084, 0.7638993859291077, 0.09107974171638489, 0.08577366918325424, 0.08719602972269058, 0.083721823990345, 0.08272668719291687, 0.08146601915359497, 0.08189938962459564, 0.08049139380455017, 0.07883225381374359, 0.07785692811012268, 0.08183623105287552, 0.07833318412303925, 0.07873958349227905, 0.07888400554656982, 0.07648000121116638, 0.07849132269620895, 0.07743385434150696, 0.0782134085893631, 0.07679852843284607, 0.08008908480405807, 0.07658396661281586, 0.07823677361011505, 0.07748432457447052, 0.0787697285413742, 0.08206255733966827, 0.08375174552202225, 0.08225465565919876, 0.07925853133201599], "latency": 13.472718173637986, "logprobs": [-10.448518753051758, -3.693941593170166, -2.833103656768799, -1.2445695400238037, -0.23799529671669006, -1.7522815465927124, -2.378152370452881, -1.9484899044036865, -2.108924388885498, -6.127920150756836, -0.8197959661483765, -2.477976083755493, -3.492497444152832, -4.170319557189941, -1.9918553829193115, -1.8618279695510864, -2.2335567474365234, -7.071791172027588, -0.039936937391757965, -1.9948835372924805, -5.008172512054443, -8.708097457885742, -9.903486251831055, -0.851460337638855, -4.765171051025391, -0.8707393407821655, -2.219733238220215, -0.01853257417678833, -0.035978663712739944, -3.387631416320801, -8.754067420959473, -1.2686023712158203, -6.662981986999512, -3.7872395515441895, -3.6667354106903076, -4.171259880065918, -2.2128500938415527, -1.091404914855957, -0.22139909863471985, -0.8265669941902161, -4.746159553527832, -9.04170036315918, -0.013459297828376293, -3.17301607131958, -1.3139652013778687, -3.9821701049804688, -0.7707944512367249, -0.002040567807853222, -2.9162371158599854, -10.677328109741211, -3.1504364013671875, -1.1485933065414429, -4.871399402618408, -0.20786719024181366, -0.06325722485780716, -1.3587590456008911, -2.207646369934082, -4.407937049865723, -0.36253970861434937, -4.0189995765686035, -0.3988611698150635, -0.13855230808258057, -2.7199528217315674, -10.558171272277832, -0.04671315476298332, -3.5006980895996094, -0.9756439328193665, -4.673828125, -0.2634696066379547, -2.5747756958007812, -0.8531911969184875, -1.6041897535324097, -5.738401412963867, -16.978456497192383, -2.6206722259521484, -0.14098073542118073, -7.450814247131348, -1.076573371887207, -2.129807472229004, -1.5724716186523438, -0.29326727986335754, -5.609436511993408, -0.0065282415598630905, -7.79502010345459, -2.715085744857788, -3.0889575481414795, -3.0355961322784424, -2.4395439624786377, -0.3983170986175537, -1.5089631080627441, -2.276723861694336, -0.6004312038421631, -1.3054823875427246, -1.9454480409622192, -1.7226327657699585, -0.7742734551429749, -0.49186939001083374, -1.2962923049926758, -1.567298173904419, -1.0149078369140625, -0.40288272500038147, -0.4789682626724243, -0.04533138871192932, -1.2695876359939575, -2.223480224609375, -2.6703481674194336, -0.7677091956138611, -0.42749911546707153, -2.8563802242279053, -1.5350499153137207, -1.6456167697906494, -0.05149398744106293, -1.3739523887634277, -1.3543274402618408, -1.2655469179153442, -1.307403326034546, -0.497008740901947]}}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt_static_inference_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt_static_inference_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..6e6ce1505c0
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt_static_inference_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1 @@
+{"0": {"input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.", "generated_text": " Wait for the moment when the music stops, and the lights come up, and the DJ says, \"I'm going to play a song for you", "generated_tokens": [32844, 1394, 1278, 4735, 2200, 1278, 7146, 30774, 1044, 1321, 1278, 26466, 3930, 2015, 1044, 1321, 1278, 30245, 8223, 1044, 1429, 1073, 4525, 4670, 1317, 3354, 1261, 6947, 1394, 1636], "tpot": [10.709007263183594, 0.669678270816803, 0.09804461151361465, 0.095348060131073, 0.09667164832353592, 0.09561737626791, 0.0947360023856163, 0.09328848123550415, 0.11012643575668335, 0.1598961353302002, 0.14693699777126312, 0.09262124449014664, 0.09305571019649506, 0.09258509427309036, 0.09176912158727646, 0.09196281433105469, 0.09245385974645615, 0.09135404974222183, 0.0905960276722908, 0.09146220982074738, 0.09172549843788147, 0.09252317249774933, 0.09408310800790787, 0.09566400200128555, 0.09318371117115021, 0.09361443668603897, 0.09234358370304108, 0.09181750565767288, 0.09110204875469208, 0.09121545404195786], "latency": 14.143519142875448, "logprobs": [-10.448518753051758, -3.693941593170166, -2.833103656768799, -1.2445695400238037, -0.23799529671669006, -1.7522815465927124, -2.378152370452881, -1.9484899044036865, -2.108924388885498, -6.127920150756836, -0.8197959661483765, -2.477976083755493, -3.492497444152832, -4.170319557189941, -1.9918553829193115, -1.8618279695510864, -2.2335567474365234, -7.071791172027588, -0.039936937391757965, -1.9948835372924805, -5.008172512054443, -8.708097457885742, -9.903486251831055, -0.851460337638855, -4.765171051025391, -0.8707393407821655, -2.219733238220215, -0.01853257417678833, -0.035978663712739944, -3.387631416320801, -8.754067420959473, -1.2686023712158203, -6.662981986999512, -3.7872395515441895, -3.6667354106903076, -4.171259880065918, -2.2128500938415527, -1.091404914855957, -0.22139909863471985, -0.8265669941902161, -4.746159553527832, -9.04170036315918, -0.013459297828376293, -3.17301607131958, -1.3139652013778687, -3.9821701049804688, -0.7707944512367249, -0.002040567807853222, -2.9162371158599854, -10.677328109741211, -3.1504364013671875, -1.1485933065414429, -4.871399402618408, -0.20786719024181366, -0.06325722485780716, -1.3587590456008911, -2.207646369934082, -4.407937049865723, -0.36253970861434937, -4.0189995765686035, -0.3988611698150635, -0.13855230808258057, -2.7199528217315674, -10.558171272277832, -0.04671315476298332, -3.5006980895996094, -0.9756439328193665, -4.673828125, -0.2634696066379547, -2.5747756958007812, -0.8531911969184875, -1.6041897535324097, -5.738401412963867, -16.978456497192383, -2.6206722259521484, -0.14098073542118073, -7.450814247131348, -1.076573371887207, -2.129807472229004, -1.5724716186523438, -0.29326727986335754, -5.609436511993408, -0.0065282415598630905, -7.79502010345459, -2.715085744857788, -3.0889575481414795, -3.0355961322784424, -2.4395439624786377, -0.3983170986175537, -1.5089631080627441, -2.276723861694336, -0.6004312038421631, -1.3054823875427246, -1.9454480409622192, -1.7226327657699585, -0.7742734551429749, -0.49186939001083374, -1.2962923049926758, -1.567298173904419, -1.0149078369140625, -0.40288272500038147, -0.4789682626724243, -0.04533138871192932, -1.2695876359939575, -2.223480224609375, -2.6703481674194336, -0.7677091956138611, -0.42749911546707153, -2.8563802242279053, -1.5350499153137207, -1.6456167697906494, -0.05149398744106293, -1.3739523887634277, -1.3543274402618408, -1.2655469179153442, -1.307403326034546, -0.497008740901947]}}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp1_pp1_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp1_pp1_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..c9c6ca750a5
--- /dev/null
+++ b/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp1_pp1_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 9.14877,
+            "2": 9.15171,
+            "3": 9.14691,
+            "4": 9.15346,
+            "5": 9.15057,
+            "6": 9.14683,
+            "7": 9.14378,
+            "8": 9.14363,
+            "9": 9.15069,
+            "10": 9.15231,
+            "11": 9.14609,
+            "12": 9.14125,
+            "13": 9.1414,
+            "14": 9.14248,
+            "15": 9.13419,
+            "16": 9.12601,
+            "17": 9.12407,
+            "18": 9.12053,
+            "19": 9.11789,
+            "20": 9.09777,
+            "21": 9.06948,
+            "22": 9.06985,
+            "23": 9.07079,
+            "24": 9.06043,
+            "25": 9.05505,
+            "26": 9.05713,
+            "27": 9.04089,
+            "28": 9.0186,
+            "29": 9.00353,
+            "30": 8.99697,
+            "31": 8.99484,
+            "32": 8.98416,
+            "33": 8.97763,
+            "34": 8.98617,
+            "35": 8.94993,
+            "36": 8.94557,
+            "37": 8.92133,
+            "38": 8.94104,
+            "39": 8.92482,
+            "40": 8.87122,
+            "41": 8.89627,
+            "42": 8.87601,
+            "43": 8.87414,
+            "44": 8.8411,
+            "45": 8.81228,
+            "46": 8.79564,
+            "47": 8.84576,
+            "48": 8.77191,
+            "49": 8.78047,
+            "50": 8.76196
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 3477955.0,
+            "2": 3392302.0,
+            "3": 3630021.0,
+            "4": 3532452.0,
+            "5": 3783960.0,
+            "6": 3584449.0,
+            "7": 3478372.0,
+            "8": 3414330.0,
+            "9": 3511649.0,
+            "10": 3544311.0,
+            "11": 3475468.0,
+            "12": 3518965.0,
+            "13": 3591786.0,
+            "14": 3549396.0,
+            "15": 3421163.0,
+            "16": 3383319.0,
+            "17": 3424120.0,
+            "18": 3509184.0,
+            "19": 3426107.0,
+            "20": 3465915.0,
+            "21": 3700118.0,
+            "22": 3474397.0,
+            "23": 3693474.0,
+            "24": 3405657.0,
+            "25": 3457588.0,
+            "26": 3479130.0,
+            "27": 3555371.0,
+            "28": 3496999.0,
+            "29": 3561842.0,
+            "30": 3708011.0,
+            "31": 3397663.0,
+            "32": 3467970.0,
+            "33": 3515742.0,
+            "34": 3501589.0,
+            "35": 3432484.0,
+            "36": 3453953.0,
+            "37": 3958777.0,
+            "38": 3488640.0,
+            "39": 3409958.0,
+            "40": 3614258.0,
+            "41": 3425709.0,
+            "42": 3643603.0,
+            "43": 3473029.0,
+            "44": 3448331.0,
+            "45": 3452202.0,
+            "46": 3585738.0,
+            "47": 3467386.0,
+            "48": 3462962.0,
+            "49": 3529813.0,
+            "50": 3412019.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 2431335424.0,
+            "2": 2431335424.0,
+            "3": 2431335424.0,
+            "4": 2431335424.0,
+            "5": 2431335424.0,
+            "6": 2431335424.0,
+            "7": 2431335424.0,
+            "8": 2431335424.0,
+            "9": 2431335424.0,
+            "10": 2431335424.0,
+            "11": 2431335424.0,
+            "12": 2431335424.0,
+            "13": 2431335424.0,
+            "14": 2431335424.0,
+            "15": 2431335424.0,
+            "16": 2431335424.0,
+            "17": 2431335424.0,
+            "18": 2431335424.0,
+            "19": 2431335424.0,
+            "20": 2431335424.0,
+            "21": 2431335424.0,
+            "22": 2431335424.0,
+            "23": 2431335424.0,
+            "24": 2431335424.0,
+            "25": 2431335424.0,
+            "26": 2431335424.0,
+            "27": 2431335424.0,
+            "28": 2431335424.0,
+            "29": 2431335424.0,
+            "30": 2431335424.0,
+            "31": 2431335424.0,
+            "32": 2431335424.0,
+            "33": 2431335424.0,
+            "34": 2431335424.0,
+            "35": 2431335424.0,
+            "36": 2431335424.0,
+            "37": 2431335424.0,
+            "38": 2431335424.0,
+            "39": 2431335424.0,
+            "40": 2431335424.0,
+            "41": 2431335424.0,
+            "42": 2431335424.0,
+            "43": 2431335424.0,
+            "44": 2431335424.0,
+            "45": 2431335424.0,
+            "46": 2431335424.0,
+            "47": 2431335424.0,
+            "48": 2431335424.0,
+            "49": 2431335424.0,
+            "50": 2431335424.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 14740086784.0,
+            "2": 15773663232.0,
+            "3": 15773663232.0,
+            "4": 15773663232.0,
+            "5": 15773663232.0,
+            "6": 15773663232.0,
+            "7": 15773663232.0,
+            "8": 15773663232.0,
+            "9": 15773663232.0,
+            "10": 15773663232.0,
+            "11": 15773663232.0,
+            "12": 15773663232.0,
+            "13": 15773663232.0,
+            "14": 15773663232.0,
+            "15": 15773663232.0,
+            "16": 15773663232.0,
+            "17": 15773663232.0,
+            "18": 15773663232.0,
+            "19": 15773663232.0,
+            "20": 15773663232.0,
+            "21": 15773663232.0,
+            "22": 15773663232.0,
+            "23": 15773663232.0,
+            "24": 15773663232.0,
+            "25": 15773663232.0,
+            "26": 15773663232.0,
+            "27": 15773663232.0,
+            "28": 15773663232.0,
+            "29": 15773663232.0,
+            "30": 15773663232.0,
+            "31": 15773663232.0,
+            "32": 15773663232.0,
+            "33": 15773663232.0,
+            "34": 15773663232.0,
+            "35": 15773663232.0,
+            "36": 15773663232.0,
+            "37": 15773663232.0,
+            "38": 15773663232.0,
+            "39": 15773663232.0,
+            "40": 15773663232.0,
+            "41": 15773663232.0,
+            "42": 15773663232.0,
+            "43": 15773663232.0,
+            "44": 15773663232.0,
+            "45": 15773663232.0,
+            "46": 15773663232.0,
+            "47": 15773663232.0,
+            "48": 15773663232.0,
+            "49": 15773663232.0,
+            "50": 15773663232.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 6.39505,
+            "2": 0.21516,
+            "3": 0.18624,
+            "4": 0.175,
+            "5": 0.17379,
+            "6": 0.17879,
+            "7": 0.17408,
+            "8": 0.17518,
+            "9": 0.17364,
+            "10": 0.17554,
+            "11": 0.17315,
+            "12": 0.17503,
+            "13": 0.17414,
+            "14": 0.17548,
+            "15": 0.17545,
+            "16": 0.17826,
+            "17": 0.17718,
+            "18": 0.19728,
+            "19": 0.18692,
+            "20": 0.17494,
+            "21": 0.17798,
+            "22": 0.19601,
+            "23": 0.19365,
+            "24": 0.17678,
+            "25": 0.17574,
+            "26": 0.17806,
+            "27": 0.17921,
+            "28": 0.18107,
+            "29": 0.17587,
+            "30": 0.18109,
+            "31": 0.18577,
+            "32": 0.1776,
+            "33": 0.17358,
+            "34": 0.18514,
+            "35": 0.18404,
+            "36": 0.18319,
+            "37": 0.17375,
+            "38": 0.19861,
+            "39": 0.18522,
+            "40": 0.17986,
+            "41": 0.18196,
+            "42": 0.17906,
+            "43": 0.1816,
+            "44": 0.17873,
+            "45": 0.1842,
+            "46": 0.18193,
+            "47": 0.18207,
+            "48": 0.18599,
+            "49": 0.17271,
+            "50": 0.18388
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp1_pp1_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp1_pp1_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..7dbf0c3c806
--- /dev/null
+++ b/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp1_pp1_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 9.14877,
+            "2": 9.15171,
+            "3": 9.14691,
+            "4": 9.15346,
+            "5": 9.15057,
+            "6": 9.14683,
+            "7": 9.14378,
+            "8": 9.14363,
+            "9": 9.15069,
+            "10": 9.15231,
+            "11": 9.14609,
+            "12": 9.14125,
+            "13": 9.1414,
+            "14": 9.14248,
+            "15": 9.13419,
+            "16": 9.12601,
+            "17": 9.12407,
+            "18": 9.12053,
+            "19": 9.11789,
+            "20": 9.09777,
+            "21": 9.06948,
+            "22": 9.06985,
+            "23": 9.07079,
+            "24": 9.06043,
+            "25": 9.05505,
+            "26": 9.05713,
+            "27": 9.04089,
+            "28": 9.0186,
+            "29": 9.00353,
+            "30": 8.99697,
+            "31": 8.99484,
+            "32": 8.98416,
+            "33": 8.97763,
+            "34": 8.98617,
+            "35": 8.94993,
+            "36": 8.94557,
+            "37": 8.92133,
+            "38": 8.94104,
+            "39": 8.92482,
+            "40": 8.87122,
+            "41": 8.89627,
+            "42": 8.87601,
+            "43": 8.87414,
+            "44": 8.8411,
+            "45": 8.81228,
+            "46": 8.79564,
+            "47": 8.84576,
+            "48": 8.77191,
+            "49": 8.78047,
+            "50": 8.76196
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 3477955.0,
+            "2": 3392302.0,
+            "3": 3630021.0,
+            "4": 3532452.0,
+            "5": 3783960.0,
+            "6": 3584449.0,
+            "7": 3478372.0,
+            "8": 3414330.0,
+            "9": 3511649.0,
+            "10": 3544311.0,
+            "11": 3475468.0,
+            "12": 3518965.0,
+            "13": 3591786.0,
+            "14": 3549396.0,
+            "15": 3421163.0,
+            "16": 3383319.0,
+            "17": 3424120.0,
+            "18": 3509184.0,
+            "19": 3426107.0,
+            "20": 3465915.0,
+            "21": 3700118.0,
+            "22": 3474397.0,
+            "23": 3693474.0,
+            "24": 3405657.0,
+            "25": 3457588.0,
+            "26": 3479130.0,
+            "27": 3555371.0,
+            "28": 3496999.0,
+            "29": 3561842.0,
+            "30": 3708011.0,
+            "31": 3397663.0,
+            "32": 3467970.0,
+            "33": 3515742.0,
+            "34": 3501589.0,
+            "35": 3432484.0,
+            "36": 3453953.0,
+            "37": 3958777.0,
+            "38": 3488640.0,
+            "39": 3409958.0,
+            "40": 3614258.0,
+            "41": 3425709.0,
+            "42": 3643603.0,
+            "43": 3473029.0,
+            "44": 3448331.0,
+            "45": 3452202.0,
+            "46": 3585738.0,
+            "47": 3467386.0,
+            "48": 3462962.0,
+            "49": 3529813.0,
+            "50": 3412019.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 2431335424.0,
+            "2": 2431335424.0,
+            "3": 2431335424.0,
+            "4": 2431335424.0,
+            "5": 2431335424.0,
+            "6": 2431335424.0,
+            "7": 2431335424.0,
+            "8": 2431335424.0,
+            "9": 2431335424.0,
+            "10": 2431335424.0,
+            "11": 2431335424.0,
+            "12": 2431335424.0,
+            "13": 2431335424.0,
+            "14": 2431335424.0,
+            "15": 2431335424.0,
+            "16": 2431335424.0,
+            "17": 2431335424.0,
+            "18": 2431335424.0,
+            "19": 2431335424.0,
+            "20": 2431335424.0,
+            "21": 2431335424.0,
+            "22": 2431335424.0,
+            "23": 2431335424.0,
+            "24": 2431335424.0,
+            "25": 2431335424.0,
+            "26": 2431335424.0,
+            "27": 2431335424.0,
+            "28": 2431335424.0,
+            "29": 2431335424.0,
+            "30": 2431335424.0,
+            "31": 2431335424.0,
+            "32": 2431335424.0,
+            "33": 2431335424.0,
+            "34": 2431335424.0,
+            "35": 2431335424.0,
+            "36": 2431335424.0,
+            "37": 2431335424.0,
+            "38": 2431335424.0,
+            "39": 2431335424.0,
+            "40": 2431335424.0,
+            "41": 2431335424.0,
+            "42": 2431335424.0,
+            "43": 2431335424.0,
+            "44": 2431335424.0,
+            "45": 2431335424.0,
+            "46": 2431335424.0,
+            "47": 2431335424.0,
+            "48": 2431335424.0,
+            "49": 2431335424.0,
+            "50": 2431335424.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 14740086784.0,
+            "2": 15773663232.0,
+            "3": 15773663232.0,
+            "4": 15773663232.0,
+            "5": 15773663232.0,
+            "6": 15773663232.0,
+            "7": 15773663232.0,
+            "8": 15773663232.0,
+            "9": 15773663232.0,
+            "10": 15773663232.0,
+            "11": 15773663232.0,
+            "12": 15773663232.0,
+            "13": 15773663232.0,
+            "14": 15773663232.0,
+            "15": 15773663232.0,
+            "16": 15773663232.0,
+            "17": 15773663232.0,
+            "18": 15773663232.0,
+            "19": 15773663232.0,
+            "20": 15773663232.0,
+            "21": 15773663232.0,
+            "22": 15773663232.0,
+            "23": 15773663232.0,
+            "24": 15773663232.0,
+            "25": 15773663232.0,
+            "26": 15773663232.0,
+            "27": 15773663232.0,
+            "28": 15773663232.0,
+            "29": 15773663232.0,
+            "30": 15773663232.0,
+            "31": 15773663232.0,
+            "32": 15773663232.0,
+            "33": 15773663232.0,
+            "34": 15773663232.0,
+            "35": 15773663232.0,
+            "36": 15773663232.0,
+            "37": 15773663232.0,
+            "38": 15773663232.0,
+            "39": 15773663232.0,
+            "40": 15773663232.0,
+            "41": 15773663232.0,
+            "42": 15773663232.0,
+            "43": 15773663232.0,
+            "44": 15773663232.0,
+            "45": 15773663232.0,
+            "46": 15773663232.0,
+            "47": 15773663232.0,
+            "48": 15773663232.0,
+            "49": 15773663232.0,
+            "50": 15773663232.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 5.97454,
+            "2": 0.19297,
+            "3": 0.18331,
+            "4": 0.18419,
+            "5": 0.18099,
+            "6": 0.18354,
+            "7": 0.18332,
+            "8": 0.18477,
+            "9": 0.18391,
+            "10": 0.18412,
+            "11": 0.18154,
+            "12": 0.18441,
+            "13": 0.18338,
+            "14": 0.1859,
+            "15": 0.18316,
+            "16": 0.18298,
+            "17": 0.18167,
+            "18": 0.18385,
+            "19": 0.18358,
+            "20": 0.18325,
+            "21": 0.18392,
+            "22": 0.1826,
+            "23": 0.18266,
+            "24": 0.18333,
+            "25": 0.18413,
+            "26": 0.185,
+            "27": 0.18218,
+            "28": 0.18361,
+            "29": 0.18161,
+            "30": 0.18366,
+            "31": 0.18238,
+            "32": 0.18355,
+            "33": 0.18274,
+            "34": 0.18399,
+            "35": 0.18232,
+            "36": 0.18405,
+            "37": 0.18325,
+            "38": 0.18367,
+            "39": 0.18313,
+            "40": 0.18319,
+            "41": 0.18244,
+            "42": 0.18305,
+            "43": 0.18287,
+            "44": 0.18263,
+            "45": 0.18326,
+            "46": 0.18213,
+            "47": 0.18261,
+            "48": 0.18333,
+            "49": 0.18287,
+            "50": 0.18284
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp4_sp_cp2_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp4_sp_cp2_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
index 7d734c87640..5e195fce69e 100644
--- a/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp4_sp_cp2_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp4_sp_cp2_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
@@ -1 +1,287 @@
-{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 9.28651, "5": 9.27695, "10": 9.28293, "15": 9.25309, "20": 9.20817, "25": 9.1444, "30": 9.0783, "35": 8.95924, "40": 8.90642, "45": 8.81379, "50": 8.73494}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 5959400.0, "5": 6498093.0, "10": 6529058.0, "15": 6530023.0, "20": 6527801.0, "25": 6993035.0, "30": 6468659.0, "35": 7065192.0, "40": 6555154.0, "45": 6680008.0, "50": 6238169.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1653820416.0, "5": 1653820416.0, "10": 1653820416.0, "15": 1653820416.0, "20": 1653820416.0, "25": 1653820416.0, "30": 1653820416.0, "35": 1653820416.0, "40": 1653820416.0, "45": 1653820416.0, "50": 1653820416.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1653824512.0, "5": 2142515200.0, "10": 2142515200.0, "15": 2142515200.0, "20": 2142515200.0, "25": 2142515200.0, "30": 2142515200.0, "35": 2142515200.0, "40": 2142515200.0, "45": 2142515200.0, "50": 2142515200.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 15.11206, "5": 0.77394, "10": 0.7922, "15": 0.78343, "20": 1.06047, "25": 0.81006, "30": 1.0155, "35": 0.81042, "40": 0.79935, "45": 0.79234, "50": 0.78227}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 9.28651,
+            "2": 9.28395,
+            "3": 9.28076,
+            "4": 9.28861,
+            "5": 9.27695,
+            "6": 9.28726,
+            "7": 9.27836,
+            "8": 9.28267,
+            "9": 9.28528,
+            "10": 9.28293,
+            "11": 9.28342,
+            "12": 9.27384,
+            "13": 9.27126,
+            "14": 9.27209,
+            "15": 9.25309,
+            "16": 9.24492,
+            "17": 9.24857,
+            "18": 9.22951,
+            "19": 9.23151,
+            "20": 9.20817,
+            "21": 9.17046,
+            "22": 9.15049,
+            "23": 9.16842,
+            "24": 9.15079,
+            "25": 9.1444,
+            "26": 9.14727,
+            "27": 9.12295,
+            "28": 9.09719,
+            "29": 9.09388,
+            "30": 9.0783,
+            "31": 8.97175,
+            "32": 9.03158,
+            "33": 9.02021,
+            "34": 8.98662,
+            "35": 8.95924,
+            "36": 8.97139,
+            "37": 8.91443,
+            "38": 8.88795,
+            "39": 8.88883,
+            "40": 8.90642,
+            "41": 8.81811,
+            "42": 8.87405,
+            "43": 8.85666,
+            "44": 8.81697,
+            "45": 8.81379,
+            "46": 8.84457,
+            "47": 8.73721,
+            "48": 8.66931,
+            "49": 8.70107,
+            "50": 8.73494
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 5959400.0,
+            "2": 6553837.0,
+            "3": 7313493.0,
+            "4": 6377142.0,
+            "5": 6498093.0,
+            "6": 7151947.0,
+            "7": 6210401.0,
+            "8": 6334645.0,
+            "9": 6624584.0,
+            "10": 6529058.0,
+            "11": 7466715.0,
+            "12": 6471579.0,
+            "13": 6003497.0,
+            "14": 8071952.0,
+            "15": 6530023.0,
+            "16": 7526922.0,
+            "17": 6034909.0,
+            "18": 6289605.0,
+            "19": 6162573.0,
+            "20": 6527801.0,
+            "21": 6981914.0,
+            "22": 7132792.0,
+            "23": 5928465.0,
+            "24": 6210239.0,
+            "25": 6993035.0,
+            "26": 6471579.0,
+            "27": 6355357.0,
+            "28": 6877112.0,
+            "29": 6380110.0,
+            "30": 6468659.0,
+            "31": 8165130.0,
+            "32": 6765448.0,
+            "33": 6355561.0,
+            "34": 6662237.0,
+            "35": 7065192.0,
+            "36": 6076915.0,
+            "37": 7785518.0,
+            "38": 6727009.0,
+            "39": 7315902.0,
+            "40": 6555154.0,
+            "41": 7314617.0,
+            "42": 6591869.0,
+            "43": 6928017.0,
+            "44": 7274417.0,
+            "45": 6680008.0,
+            "46": 6232372.0,
+            "47": 6496696.0,
+            "48": 6809696.0,
+            "49": 6753491.0,
+            "50": 6238169.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1653820416.0,
+            "2": 1653820416.0,
+            "3": 1653820416.0,
+            "4": 1653820416.0,
+            "5": 1653820416.0,
+            "6": 1653820416.0,
+            "7": 1653820416.0,
+            "8": 1653820416.0,
+            "9": 1653820416.0,
+            "10": 1653820416.0,
+            "11": 1653820416.0,
+            "12": 1653820416.0,
+            "13": 1653820416.0,
+            "14": 1653820416.0,
+            "15": 1653820416.0,
+            "16": 1653820416.0,
+            "17": 1653820416.0,
+            "18": 1653820416.0,
+            "19": 1653820416.0,
+            "20": 1653820416.0,
+            "21": 1653820416.0,
+            "22": 1653820416.0,
+            "23": 1653820416.0,
+            "24": 1653820416.0,
+            "25": 1653820416.0,
+            "26": 1653820416.0,
+            "27": 1653820416.0,
+            "28": 1653820416.0,
+            "29": 1653820416.0,
+            "30": 1653820416.0,
+            "31": 1653820416.0,
+            "32": 1653820416.0,
+            "33": 1653820416.0,
+            "34": 1653820416.0,
+            "35": 1653820416.0,
+            "36": 1653820416.0,
+            "37": 1653820416.0,
+            "38": 1653820416.0,
+            "39": 1653820416.0,
+            "40": 1653820416.0,
+            "41": 1653820416.0,
+            "42": 1653820416.0,
+            "43": 1653820416.0,
+            "44": 1653820416.0,
+            "45": 1653820416.0,
+            "46": 1653820416.0,
+            "47": 1653820416.0,
+            "48": 1653820416.0,
+            "49": 1653820416.0,
+            "50": 1653820416.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1653824512.0,
+            "2": 2142515200.0,
+            "3": 2142515200.0,
+            "4": 2142515200.0,
+            "5": 2142515200.0,
+            "6": 2142515200.0,
+            "7": 2142515200.0,
+            "8": 2142515200.0,
+            "9": 2142515200.0,
+            "10": 2142515200.0,
+            "11": 2142515200.0,
+            "12": 2142515200.0,
+            "13": 2142515200.0,
+            "14": 2142515200.0,
+            "15": 2142515200.0,
+            "16": 2142515200.0,
+            "17": 2142515200.0,
+            "18": 2142515200.0,
+            "19": 2142515200.0,
+            "20": 2142515200.0,
+            "21": 2142515200.0,
+            "22": 2142515200.0,
+            "23": 2142515200.0,
+            "24": 2142515200.0,
+            "25": 2142515200.0,
+            "26": 2142515200.0,
+            "27": 2142515200.0,
+            "28": 2142515200.0,
+            "29": 2142515200.0,
+            "30": 2142515200.0,
+            "31": 2142515200.0,
+            "32": 2142515200.0,
+            "33": 2142515200.0,
+            "34": 2142515200.0,
+            "35": 2142515200.0,
+            "36": 2142515200.0,
+            "37": 2142515200.0,
+            "38": 2142515200.0,
+            "39": 2142515200.0,
+            "40": 2142515200.0,
+            "41": 2142515200.0,
+            "42": 2142515200.0,
+            "43": 2142515200.0,
+            "44": 2142515200.0,
+            "45": 2142515200.0,
+            "46": 2142515200.0,
+            "47": 2142515200.0,
+            "48": 2142515200.0,
+            "49": 2142515200.0,
+            "50": 2142515200.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 14.37901,
+            "2": 1.00945,
+            "3": 0.97719,
+            "4": 1.00246,
+            "5": 0.95207,
+            "6": 0.95,
+            "7": 0.94753,
+            "8": 0.94707,
+            "9": 0.94823,
+            "10": 0.95034,
+            "11": 0.97925,
+            "12": 0.97702,
+            "13": 0.94374,
+            "14": 1.21224,
+            "15": 0.94966,
+            "16": 0.9451,
+            "17": 0.94563,
+            "18": 0.94303,
+            "19": 1.24824,
+            "20": 0.9452,
+            "21": 0.97627,
+            "22": 0.98348,
+            "23": 1.30411,
+            "24": 0.94959,
+            "25": 0.94296,
+            "26": 0.95158,
+            "27": 0.94465,
+            "28": 0.94877,
+            "29": 0.94644,
+            "30": 0.94814,
+            "31": 1.31598,
+            "32": 0.98424,
+            "33": 1.24311,
+            "34": 0.94977,
+            "35": 1.30685,
+            "36": 0.94683,
+            "37": 0.95372,
+            "38": 0.94948,
+            "39": 0.95294,
+            "40": 1.3288,
+            "41": 0.97347,
+            "42": 0.9497,
+            "43": 1.30833,
+            "44": 0.94555,
+            "45": 0.94659,
+            "46": 0.95663,
+            "47": 0.95211,
+            "48": 0.95051,
+            "49": 0.94741,
+            "50": 0.96304
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp4_sp_cp2_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp4_sp_cp2_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..40e463c4e4e
--- /dev/null
+++ b/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp4_sp_cp2_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 9.28651,
+            "2": 9.28395,
+            "3": 9.28076,
+            "4": 9.28861,
+            "5": 9.27695,
+            "6": 9.28726,
+            "7": 9.27836,
+            "8": 9.28267,
+            "9": 9.28528,
+            "10": 9.28293,
+            "11": 9.28342,
+            "12": 9.27384,
+            "13": 9.27126,
+            "14": 9.27209,
+            "15": 9.25309,
+            "16": 9.24492,
+            "17": 9.24857,
+            "18": 9.22951,
+            "19": 9.23151,
+            "20": 9.20817,
+            "21": 9.17046,
+            "22": 9.15049,
+            "23": 9.16842,
+            "24": 9.15079,
+            "25": 9.1444,
+            "26": 9.14727,
+            "27": 9.12295,
+            "28": 9.09719,
+            "29": 9.09388,
+            "30": 9.0783,
+            "31": 8.97175,
+            "32": 9.03158,
+            "33": 9.02021,
+            "34": 8.98662,
+            "35": 8.95924,
+            "36": 8.97139,
+            "37": 8.91443,
+            "38": 8.88795,
+            "39": 8.88883,
+            "40": 8.90642,
+            "41": 8.81811,
+            "42": 8.87405,
+            "43": 8.85666,
+            "44": 8.81697,
+            "45": 8.81379,
+            "46": 8.84457,
+            "47": 8.73721,
+            "48": 8.66931,
+            "49": 8.70107,
+            "50": 8.73494
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 5959400.0,
+            "2": 6553837.0,
+            "3": 7313493.0,
+            "4": 6377142.0,
+            "5": 6498093.0,
+            "6": 7151947.0,
+            "7": 6210401.0,
+            "8": 6334645.0,
+            "9": 6624584.0,
+            "10": 6529058.0,
+            "11": 7466715.0,
+            "12": 6471579.0,
+            "13": 6003497.0,
+            "14": 8071952.0,
+            "15": 6530023.0,
+            "16": 7526922.0,
+            "17": 6034909.0,
+            "18": 6289605.0,
+            "19": 6162573.0,
+            "20": 6527801.0,
+            "21": 6981914.0,
+            "22": 7132792.0,
+            "23": 5928465.0,
+            "24": 6210239.0,
+            "25": 6993035.0,
+            "26": 6471579.0,
+            "27": 6355357.0,
+            "28": 6877112.0,
+            "29": 6380110.0,
+            "30": 6468659.0,
+            "31": 8165130.0,
+            "32": 6765448.0,
+            "33": 6355561.0,
+            "34": 6662237.0,
+            "35": 7065192.0,
+            "36": 6076915.0,
+            "37": 7785518.0,
+            "38": 6727009.0,
+            "39": 7315902.0,
+            "40": 6555154.0,
+            "41": 7314617.0,
+            "42": 6591869.0,
+            "43": 6928017.0,
+            "44": 7274417.0,
+            "45": 6680008.0,
+            "46": 6232372.0,
+            "47": 6496696.0,
+            "48": 6809696.0,
+            "49": 6753491.0,
+            "50": 6238169.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1653820416.0,
+            "2": 1653820416.0,
+            "3": 1653820416.0,
+            "4": 1653820416.0,
+            "5": 1653820416.0,
+            "6": 1653820416.0,
+            "7": 1653820416.0,
+            "8": 1653820416.0,
+            "9": 1653820416.0,
+            "10": 1653820416.0,
+            "11": 1653820416.0,
+            "12": 1653820416.0,
+            "13": 1653820416.0,
+            "14": 1653820416.0,
+            "15": 1653820416.0,
+            "16": 1653820416.0,
+            "17": 1653820416.0,
+            "18": 1653820416.0,
+            "19": 1653820416.0,
+            "20": 1653820416.0,
+            "21": 1653820416.0,
+            "22": 1653820416.0,
+            "23": 1653820416.0,
+            "24": 1653820416.0,
+            "25": 1653820416.0,
+            "26": 1653820416.0,
+            "27": 1653820416.0,
+            "28": 1653820416.0,
+            "29": 1653820416.0,
+            "30": 1653820416.0,
+            "31": 1653820416.0,
+            "32": 1653820416.0,
+            "33": 1653820416.0,
+            "34": 1653820416.0,
+            "35": 1653820416.0,
+            "36": 1653820416.0,
+            "37": 1653820416.0,
+            "38": 1653820416.0,
+            "39": 1653820416.0,
+            "40": 1653820416.0,
+            "41": 1653820416.0,
+            "42": 1653820416.0,
+            "43": 1653820416.0,
+            "44": 1653820416.0,
+            "45": 1653820416.0,
+            "46": 1653820416.0,
+            "47": 1653820416.0,
+            "48": 1653820416.0,
+            "49": 1653820416.0,
+            "50": 1653820416.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1653824512.0,
+            "2": 2142515200.0,
+            "3": 2142515200.0,
+            "4": 2142515200.0,
+            "5": 2142515200.0,
+            "6": 2142515200.0,
+            "7": 2142515200.0,
+            "8": 2142515200.0,
+            "9": 2142515200.0,
+            "10": 2142515200.0,
+            "11": 2142515200.0,
+            "12": 2142515200.0,
+            "13": 2142515200.0,
+            "14": 2142515200.0,
+            "15": 2142515200.0,
+            "16": 2142515200.0,
+            "17": 2142515200.0,
+            "18": 2142515200.0,
+            "19": 2142515200.0,
+            "20": 2142515200.0,
+            "21": 2142515200.0,
+            "22": 2142515200.0,
+            "23": 2142515200.0,
+            "24": 2142515200.0,
+            "25": 2142515200.0,
+            "26": 2142515200.0,
+            "27": 2142515200.0,
+            "28": 2142515200.0,
+            "29": 2142515200.0,
+            "30": 2142515200.0,
+            "31": 2142515200.0,
+            "32": 2142515200.0,
+            "33": 2142515200.0,
+            "34": 2142515200.0,
+            "35": 2142515200.0,
+            "36": 2142515200.0,
+            "37": 2142515200.0,
+            "38": 2142515200.0,
+            "39": 2142515200.0,
+            "40": 2142515200.0,
+            "41": 2142515200.0,
+            "42": 2142515200.0,
+            "43": 2142515200.0,
+            "44": 2142515200.0,
+            "45": 2142515200.0,
+            "46": 2142515200.0,
+            "47": 2142515200.0,
+            "48": 2142515200.0,
+            "49": 2142515200.0,
+            "50": 2142515200.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 15.02389,
+            "2": 0.90938,
+            "3": 0.833,
+            "4": 0.83139,
+            "5": 0.87938,
+            "6": 0.8436,
+            "7": 0.84341,
+            "8": 0.84254,
+            "9": 0.83392,
+            "10": 0.8484,
+            "11": 0.84151,
+            "12": 0.84392,
+            "13": 0.84466,
+            "14": 0.85987,
+            "15": 0.85033,
+            "16": 0.84631,
+            "17": 0.86049,
+            "18": 0.84475,
+            "19": 1.16176,
+            "20": 0.84338,
+            "21": 0.8904,
+            "22": 0.85197,
+            "23": 1.15742,
+            "24": 0.84195,
+            "25": 0.84346,
+            "26": 0.84406,
+            "27": 0.84866,
+            "28": 0.87098,
+            "29": 0.83524,
+            "30": 1.14004,
+            "31": 1.16138,
+            "32": 0.8533,
+            "33": 0.84361,
+            "34": 0.84484,
+            "35": 0.84276,
+            "36": 0.83752,
+            "37": 0.84209,
+            "38": 0.84471,
+            "39": 0.8405,
+            "40": 1.1684,
+            "41": 0.84052,
+            "42": 0.83772,
+            "43": 1.16777,
+            "44": 1.14427,
+            "45": 0.84262,
+            "46": 1.19422,
+            "47": 0.84418,
+            "48": 0.85685,
+            "49": 0.84021,
+            "50": 0.84726
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp4_sp_cp2_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp4_sp_cp2_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..bf52c8e8fd4
--- /dev/null
+++ b/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp4_sp_cp2_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 9.28651,
+            "2": 9.28395,
+            "3": 9.28076,
+            "4": 9.28861,
+            "5": 9.27695,
+            "6": 9.28726,
+            "7": 9.27836,
+            "8": 9.28267,
+            "9": 9.28528,
+            "10": 9.28293,
+            "11": 9.28342,
+            "12": 9.27384,
+            "13": 9.27126,
+            "14": 9.27209,
+            "15": 9.25309,
+            "16": 9.24492,
+            "17": 9.24857,
+            "18": 9.22951,
+            "19": 9.23151,
+            "20": 9.20817,
+            "21": 9.17046,
+            "22": 9.15049,
+            "23": 9.16842,
+            "24": 9.15079,
+            "25": 9.1444,
+            "26": 9.14727,
+            "27": 9.12295,
+            "28": 9.09719,
+            "29": 9.09388,
+            "30": 9.0783,
+            "31": 8.97175,
+            "32": 9.03158,
+            "33": 9.02021,
+            "34": 8.98662,
+            "35": 8.95924,
+            "36": 8.97139,
+            "37": 8.91443,
+            "38": 8.88795,
+            "39": 8.88883,
+            "40": 8.90642,
+            "41": 8.81811,
+            "42": 8.87405,
+            "43": 8.85666,
+            "44": 8.81697,
+            "45": 8.81379,
+            "46": 8.84457,
+            "47": 8.73721,
+            "48": 8.66931,
+            "49": 8.70107,
+            "50": 8.73494
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 5959400.0,
+            "2": 6553837.0,
+            "3": 7313493.0,
+            "4": 6377142.0,
+            "5": 6498093.0,
+            "6": 7151947.0,
+            "7": 6210401.0,
+            "8": 6334645.0,
+            "9": 6624584.0,
+            "10": 6529058.0,
+            "11": 7466715.0,
+            "12": 6471579.0,
+            "13": 6003497.0,
+            "14": 8071952.0,
+            "15": 6530023.0,
+            "16": 7526922.0,
+            "17": 6034909.0,
+            "18": 6289605.0,
+            "19": 6162573.0,
+            "20": 6527801.0,
+            "21": 6981914.0,
+            "22": 7132792.0,
+            "23": 5928465.0,
+            "24": 6210239.0,
+            "25": 6993035.0,
+            "26": 6471579.0,
+            "27": 6355357.0,
+            "28": 6877112.0,
+            "29": 6380110.0,
+            "30": 6468659.0,
+            "31": 8165130.0,
+            "32": 6765448.0,
+            "33": 6355561.0,
+            "34": 6662237.0,
+            "35": 7065192.0,
+            "36": 6076915.0,
+            "37": 7785518.0,
+            "38": 6727009.0,
+            "39": 7315902.0,
+            "40": 6555154.0,
+            "41": 7314617.0,
+            "42": 6591869.0,
+            "43": 6928017.0,
+            "44": 7274417.0,
+            "45": 6680008.0,
+            "46": 6232372.0,
+            "47": 6496696.0,
+            "48": 6809696.0,
+            "49": 6753491.0,
+            "50": 6238169.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1653820416.0,
+            "2": 1653820416.0,
+            "3": 1653820416.0,
+            "4": 1653820416.0,
+            "5": 1653820416.0,
+            "6": 1653820416.0,
+            "7": 1653820416.0,
+            "8": 1653820416.0,
+            "9": 1653820416.0,
+            "10": 1653820416.0,
+            "11": 1653820416.0,
+            "12": 1653820416.0,
+            "13": 1653820416.0,
+            "14": 1653820416.0,
+            "15": 1653820416.0,
+            "16": 1653820416.0,
+            "17": 1653820416.0,
+            "18": 1653820416.0,
+            "19": 1653820416.0,
+            "20": 1653820416.0,
+            "21": 1653820416.0,
+            "22": 1653820416.0,
+            "23": 1653820416.0,
+            "24": 1653820416.0,
+            "25": 1653820416.0,
+            "26": 1653820416.0,
+            "27": 1653820416.0,
+            "28": 1653820416.0,
+            "29": 1653820416.0,
+            "30": 1653820416.0,
+            "31": 1653820416.0,
+            "32": 1653820416.0,
+            "33": 1653820416.0,
+            "34": 1653820416.0,
+            "35": 1653820416.0,
+            "36": 1653820416.0,
+            "37": 1653820416.0,
+            "38": 1653820416.0,
+            "39": 1653820416.0,
+            "40": 1653820416.0,
+            "41": 1653820416.0,
+            "42": 1653820416.0,
+            "43": 1653820416.0,
+            "44": 1653820416.0,
+            "45": 1653820416.0,
+            "46": 1653820416.0,
+            "47": 1653820416.0,
+            "48": 1653820416.0,
+            "49": 1653820416.0,
+            "50": 1653820416.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1653824512.0,
+            "2": 2142515200.0,
+            "3": 2142515200.0,
+            "4": 2142515200.0,
+            "5": 2142515200.0,
+            "6": 2142515200.0,
+            "7": 2142515200.0,
+            "8": 2142515200.0,
+            "9": 2142515200.0,
+            "10": 2142515200.0,
+            "11": 2142515200.0,
+            "12": 2142515200.0,
+            "13": 2142515200.0,
+            "14": 2142515200.0,
+            "15": 2142515200.0,
+            "16": 2142515200.0,
+            "17": 2142515200.0,
+            "18": 2142515200.0,
+            "19": 2142515200.0,
+            "20": 2142515200.0,
+            "21": 2142515200.0,
+            "22": 2142515200.0,
+            "23": 2142515200.0,
+            "24": 2142515200.0,
+            "25": 2142515200.0,
+            "26": 2142515200.0,
+            "27": 2142515200.0,
+            "28": 2142515200.0,
+            "29": 2142515200.0,
+            "30": 2142515200.0,
+            "31": 2142515200.0,
+            "32": 2142515200.0,
+            "33": 2142515200.0,
+            "34": 2142515200.0,
+            "35": 2142515200.0,
+            "36": 2142515200.0,
+            "37": 2142515200.0,
+            "38": 2142515200.0,
+            "39": 2142515200.0,
+            "40": 2142515200.0,
+            "41": 2142515200.0,
+            "42": 2142515200.0,
+            "43": 2142515200.0,
+            "44": 2142515200.0,
+            "45": 2142515200.0,
+            "46": 2142515200.0,
+            "47": 2142515200.0,
+            "48": 2142515200.0,
+            "49": 2142515200.0,
+            "50": 2142515200.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 14.64684,
+            "2": 0.98193,
+            "3": 0.95861,
+            "4": 0.96167,
+            "5": 0.96222,
+            "6": 0.96444,
+            "7": 0.95334,
+            "8": 0.95675,
+            "9": 0.95004,
+            "10": 0.9526,
+            "11": 0.94782,
+            "12": 0.95256,
+            "13": 0.95466,
+            "14": 0.95046,
+            "15": 0.96366,
+            "16": 0.95156,
+            "17": 0.95425,
+            "18": 0.9544,
+            "19": 1.2298,
+            "20": 0.95303,
+            "21": 0.95634,
+            "22": 0.95632,
+            "23": 0.95424,
+            "24": 0.95464,
+            "25": 0.96269,
+            "26": 0.96616,
+            "27": 0.94874,
+            "28": 0.94988,
+            "29": 1.26385,
+            "30": 0.95465,
+            "31": 1.2033,
+            "32": 0.9571,
+            "33": 0.956,
+            "34": 0.95832,
+            "35": 1.32667,
+            "36": 0.95679,
+            "37": 0.95623,
+            "38": 0.96193,
+            "39": 0.96003,
+            "40": 1.25799,
+            "41": 0.95599,
+            "42": 0.95891,
+            "43": 1.55786,
+            "44": 0.96371,
+            "45": 0.96764,
+            "46": 0.95894,
+            "47": 0.96017,
+            "48": 0.95646,
+            "49": 0.961,
+            "50": 0.96278
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_11b_mr_mcore_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/t5/t5_11b_mr_mcore_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
index 165aa133737..0bff8d085b5 100644
--- a/tests/functional_tests/test_cases/t5/t5_11b_mr_mcore_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/t5/t5_11b_mr_mcore_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
@@ -1 +1,162 @@
-{"lm loss": {"start_step": 1, "end_step": 25, "step_interval": 5, "values": {"1": 10.74903, "5": 11.07413, "10": 9.25112, "15": 8.79113, "20": 8.16452, "25": 7.78994}}, "num-zeros": {"start_step": 1, "end_step": 25, "step_interval": 5, "values": {"1": 245867.0, "5": 251594.0, "10": 252461.0, "15": 261948.0, "20": 248292.0, "25": 237325.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 25, "step_interval": 5, "values": {"1": 40674893824.0, "5": 40674893824.0, "10": 40674893824.0, "15": 40674893824.0, "20": 40674893824.0, "25": 40674893824.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 25, "step_interval": 5, "values": {"1": 40674897920.0, "5": 44982894592.0, "10": 44982894592.0, "15": 44982894592.0, "20": 44982894592.0, "25": 44982894592.0}}, "iteration-time": {"start_step": 1, "end_step": 25, "step_interval": 5, "values": {"1": 13.38447, "5": 0.36674, "10": 0.37116, "15": 0.6292, "20": 0.37325, "25": 0.37334}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 25,
+        "step_interval": 1,
+        "values": {
+            "1": 10.74903,
+            "2": 10.75924,
+            "3": 16.15622,
+            "4": 20.1728,
+            "5": 11.07413,
+            "6": 10.29087,
+            "7": 10.31369,
+            "8": 10.31557,
+            "9": 9.68992,
+            "10": 9.25112,
+            "11": 9.43376,
+            "12": 9.8267,
+            "13": 8.88334,
+            "14": 8.49023,
+            "15": 8.79113,
+            "16": 7.95739,
+            "17": 7.70005,
+            "18": 7.81826,
+            "19": 8.21562,
+            "20": 8.16452,
+            "21": 7.833,
+            "22": 7.71899,
+            "23": 7.88724,
+            "24": 7.70093,
+            "25": 7.78994
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 25,
+        "step_interval": 1,
+        "values": {
+            "1": 245867.0,
+            "2": 256817.0,
+            "3": 248438.0,
+            "4": 233541.0,
+            "5": 251594.0,
+            "6": 259588.0,
+            "7": 256938.0,
+            "8": 237612.0,
+            "9": 241154.0,
+            "10": 252461.0,
+            "11": 288146.0,
+            "12": 248712.0,
+            "13": 241371.0,
+            "14": 228365.0,
+            "15": 261948.0,
+            "16": 237032.0,
+            "17": 249760.0,
+            "18": 251590.0,
+            "19": 257104.0,
+            "20": 248292.0,
+            "21": 231805.0,
+            "22": 223805.0,
+            "23": 247959.0,
+            "24": 250798.0,
+            "25": 237325.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 25,
+        "step_interval": 1,
+        "values": {
+            "1": 40735711232.0,
+            "2": 40735711232.0,
+            "3": 40735711232.0,
+            "4": 40735711232.0,
+            "5": 40735711232.0,
+            "6": 40735711232.0,
+            "7": 40735711232.0,
+            "8": 40735711232.0,
+            "9": 40735711232.0,
+            "10": 40735711232.0,
+            "11": 40735711232.0,
+            "12": 40735711232.0,
+            "13": 40735711232.0,
+            "14": 40735711232.0,
+            "15": 40735711232.0,
+            "16": 40735711232.0,
+            "17": 40735711232.0,
+            "18": 40735711232.0,
+            "19": 40735711232.0,
+            "20": 40735711232.0,
+            "21": 40735711232.0,
+            "22": 40735711232.0,
+            "23": 40735711232.0,
+            "24": 40735711232.0,
+            "25": 40735711232.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 25,
+        "step_interval": 1,
+        "values": {
+            "1": 40735711232.0,
+            "2": 44991991808.0,
+            "3": 44993564672.0,
+            "4": 44993564672.0,
+            "5": 44993564672.0,
+            "6": 44993564672.0,
+            "7": 44993564672.0,
+            "8": 44993564672.0,
+            "9": 44993564672.0,
+            "10": 44993564672.0,
+            "11": 44993564672.0,
+            "12": 44993564672.0,
+            "13": 44993564672.0,
+            "14": 44993564672.0,
+            "15": 44993564672.0,
+            "16": 44993564672.0,
+            "17": 44993564672.0,
+            "18": 44993564672.0,
+            "19": 44993564672.0,
+            "20": 44993564672.0,
+            "21": 44993564672.0,
+            "22": 44993564672.0,
+            "23": 44993564672.0,
+            "24": 44993564672.0,
+            "25": 44993564672.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 25,
+        "step_interval": 1,
+        "values": {
+            "1": 10.24757,
+            "2": 0.4815,
+            "3": 0.41556,
+            "4": 0.40564,
+            "5": 0.40743,
+            "6": 0.40813,
+            "7": 0.42484,
+            "8": 0.41261,
+            "9": 0.40523,
+            "10": 0.41064,
+            "11": 0.40795,
+            "12": 0.409,
+            "13": 0.41219,
+            "14": 0.41524,
+            "15": 0.41267,
+            "16": 0.40783,
+            "17": 0.40886,
+            "18": 0.41321,
+            "19": 0.40795,
+            "20": 0.41032,
+            "21": 0.41828,
+            "22": 0.40867,
+            "23": 0.42317,
+            "24": 0.40771,
+            "25": 0.4176
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_11b_mr_mcore_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/t5/t5_11b_mr_mcore_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..a5fc1a5f4c5
--- /dev/null
+++ b/tests/functional_tests/test_cases/t5/t5_11b_mr_mcore_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,162 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 25,
+        "step_interval": 1,
+        "values": {
+            "1": 10.74903,
+            "2": 10.75924,
+            "3": 16.15622,
+            "4": 20.1728,
+            "5": 11.07413,
+            "6": 10.29087,
+            "7": 10.31369,
+            "8": 10.31557,
+            "9": 9.68992,
+            "10": 9.25112,
+            "11": 9.43376,
+            "12": 9.8267,
+            "13": 8.88334,
+            "14": 8.49023,
+            "15": 8.79113,
+            "16": 7.95739,
+            "17": 7.70005,
+            "18": 7.81826,
+            "19": 8.21562,
+            "20": 8.16452,
+            "21": 7.833,
+            "22": 7.71899,
+            "23": 7.88724,
+            "24": 7.70093,
+            "25": 7.78994
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 25,
+        "step_interval": 1,
+        "values": {
+            "1": 245867.0,
+            "2": 256817.0,
+            "3": 248438.0,
+            "4": 233541.0,
+            "5": 251594.0,
+            "6": 259588.0,
+            "7": 256938.0,
+            "8": 237612.0,
+            "9": 241154.0,
+            "10": 252461.0,
+            "11": 288146.0,
+            "12": 248712.0,
+            "13": 241371.0,
+            "14": 228365.0,
+            "15": 261948.0,
+            "16": 237032.0,
+            "17": 249760.0,
+            "18": 251590.0,
+            "19": 257104.0,
+            "20": 248292.0,
+            "21": 231805.0,
+            "22": 223805.0,
+            "23": 247959.0,
+            "24": 250798.0,
+            "25": 237325.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 25,
+        "step_interval": 1,
+        "values": {
+            "1": 40735711232.0,
+            "2": 40735711232.0,
+            "3": 40735711232.0,
+            "4": 40735711232.0,
+            "5": 40735711232.0,
+            "6": 40735711232.0,
+            "7": 40735711232.0,
+            "8": 40735711232.0,
+            "9": 40735711232.0,
+            "10": 40735711232.0,
+            "11": 40735711232.0,
+            "12": 40735711232.0,
+            "13": 40735711232.0,
+            "14": 40735711232.0,
+            "15": 40735711232.0,
+            "16": 40735711232.0,
+            "17": 40735711232.0,
+            "18": 40735711232.0,
+            "19": 40735711232.0,
+            "20": 40735711232.0,
+            "21": 40735711232.0,
+            "22": 40735711232.0,
+            "23": 40735711232.0,
+            "24": 40735711232.0,
+            "25": 40735711232.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 25,
+        "step_interval": 1,
+        "values": {
+            "1": 40735711232.0,
+            "2": 44991991808.0,
+            "3": 44993564672.0,
+            "4": 44993564672.0,
+            "5": 44993564672.0,
+            "6": 44993564672.0,
+            "7": 44993564672.0,
+            "8": 44993564672.0,
+            "9": 44993564672.0,
+            "10": 44993564672.0,
+            "11": 44993564672.0,
+            "12": 44993564672.0,
+            "13": 44993564672.0,
+            "14": 44993564672.0,
+            "15": 44993564672.0,
+            "16": 44993564672.0,
+            "17": 44993564672.0,
+            "18": 44993564672.0,
+            "19": 44993564672.0,
+            "20": 44993564672.0,
+            "21": 44993564672.0,
+            "22": 44993564672.0,
+            "23": 44993564672.0,
+            "24": 44993564672.0,
+            "25": 44993564672.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 25,
+        "step_interval": 1,
+        "values": {
+            "1": 13.38163,
+            "2": 0.76932,
+            "3": 0.59621,
+            "4": 0.3807,
+            "5": 0.37959,
+            "6": 0.38757,
+            "7": 0.38242,
+            "8": 0.39662,
+            "9": 0.38425,
+            "10": 0.38671,
+            "11": 0.3878,
+            "12": 0.37911,
+            "13": 0.38138,
+            "14": 0.38215,
+            "15": 0.37904,
+            "16": 0.3847,
+            "17": 0.38241,
+            "18": 0.38681,
+            "19": 0.39003,
+            "20": 0.37797,
+            "21": 0.3854,
+            "22": 0.71416,
+            "23": 0.38609,
+            "24": 0.37862,
+            "25": 0.37919
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_11b_mr_mcore_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/t5/t5_11b_mr_mcore_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..45c06ac2f7e
--- /dev/null
+++ b/tests/functional_tests/test_cases/t5/t5_11b_mr_mcore_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,162 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 25,
+        "step_interval": 1,
+        "values": {
+            "1": 10.74903,
+            "2": 10.75924,
+            "3": 16.15622,
+            "4": 20.1728,
+            "5": 11.07413,
+            "6": 10.29087,
+            "7": 10.31369,
+            "8": 10.31557,
+            "9": 9.68992,
+            "10": 9.25112,
+            "11": 9.43376,
+            "12": 9.8267,
+            "13": 8.88334,
+            "14": 8.49023,
+            "15": 8.79113,
+            "16": 7.95739,
+            "17": 7.70005,
+            "18": 7.81826,
+            "19": 8.21562,
+            "20": 8.16452,
+            "21": 7.833,
+            "22": 7.71899,
+            "23": 7.88724,
+            "24": 7.70093,
+            "25": 7.78994
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 25,
+        "step_interval": 1,
+        "values": {
+            "1": 245867.0,
+            "2": 256817.0,
+            "3": 248438.0,
+            "4": 233541.0,
+            "5": 251594.0,
+            "6": 259588.0,
+            "7": 256938.0,
+            "8": 237612.0,
+            "9": 241154.0,
+            "10": 252461.0,
+            "11": 288146.0,
+            "12": 248712.0,
+            "13": 241371.0,
+            "14": 228365.0,
+            "15": 261948.0,
+            "16": 237032.0,
+            "17": 249760.0,
+            "18": 251590.0,
+            "19": 257104.0,
+            "20": 248292.0,
+            "21": 231805.0,
+            "22": 223805.0,
+            "23": 247959.0,
+            "24": 250798.0,
+            "25": 237325.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 25,
+        "step_interval": 1,
+        "values": {
+            "1": 40735711232.0,
+            "2": 40735711232.0,
+            "3": 40735711232.0,
+            "4": 40735711232.0,
+            "5": 40735711232.0,
+            "6": 40735711232.0,
+            "7": 40735711232.0,
+            "8": 40735711232.0,
+            "9": 40735711232.0,
+            "10": 40735711232.0,
+            "11": 40735711232.0,
+            "12": 40735711232.0,
+            "13": 40735711232.0,
+            "14": 40735711232.0,
+            "15": 40735711232.0,
+            "16": 40735711232.0,
+            "17": 40735711232.0,
+            "18": 40735711232.0,
+            "19": 40735711232.0,
+            "20": 40735711232.0,
+            "21": 40735711232.0,
+            "22": 40735711232.0,
+            "23": 40735711232.0,
+            "24": 40735711232.0,
+            "25": 40735711232.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 25,
+        "step_interval": 1,
+        "values": {
+            "1": 40735711232.0,
+            "2": 44991991808.0,
+            "3": 44993564672.0,
+            "4": 44993564672.0,
+            "5": 44993564672.0,
+            "6": 44993564672.0,
+            "7": 44993564672.0,
+            "8": 44993564672.0,
+            "9": 44993564672.0,
+            "10": 44993564672.0,
+            "11": 44993564672.0,
+            "12": 44993564672.0,
+            "13": 44993564672.0,
+            "14": 44993564672.0,
+            "15": 44993564672.0,
+            "16": 44993564672.0,
+            "17": 44993564672.0,
+            "18": 44993564672.0,
+            "19": 44993564672.0,
+            "20": 44993564672.0,
+            "21": 44993564672.0,
+            "22": 44993564672.0,
+            "23": 44993564672.0,
+            "24": 44993564672.0,
+            "25": 44993564672.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 25,
+        "step_interval": 1,
+        "values": {
+            "1": 12.25468,
+            "2": 0.47853,
+            "3": 0.41459,
+            "4": 0.41066,
+            "5": 0.4125,
+            "6": 0.42243,
+            "7": 0.40926,
+            "8": 0.41832,
+            "9": 0.4068,
+            "10": 0.41071,
+            "11": 0.41068,
+            "12": 0.41187,
+            "13": 0.42064,
+            "14": 0.4228,
+            "15": 0.41026,
+            "16": 0.81409,
+            "17": 0.41651,
+            "18": 0.41416,
+            "19": 0.41418,
+            "20": 0.41217,
+            "21": 0.42084,
+            "22": 0.4131,
+            "23": 0.41106,
+            "24": 0.41518,
+            "25": 0.41106
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
index f3a09e92509..8284e160db8 100644
--- a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
@@ -2,141 +2,536 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 10.34904,
+            "2": 10.34488,
+            "3": 9.79407,
+            "4": 9.59568,
             "5": 9.42065,
+            "6": 9.41856,
+            "7": 9.28073,
+            "8": 9.18973,
+            "9": 9.06584,
             "10": 9.00206,
+            "11": 8.81497,
+            "12": 8.78107,
+            "13": 8.82506,
+            "14": 8.6728,
             "15": 8.6368,
+            "16": 8.51926,
+            "17": 8.45732,
+            "18": 8.37037,
+            "19": 8.36068,
             "20": 8.25456,
+            "21": 8.24268,
+            "22": 8.13404,
+            "23": 8.06818,
+            "24": 8.11464,
             "25": 7.95146,
+            "26": 8.08186,
+            "27": 7.86814,
+            "28": 7.94027,
+            "29": 7.77604,
             "30": 7.84595,
+            "31": 7.81568,
+            "32": 7.65964,
+            "33": 7.77905,
+            "34": 7.53277,
             "35": 7.6586,
+            "36": 7.51541,
+            "37": 7.44748,
+            "38": 7.4824,
+            "39": 7.46523,
             "40": 7.49146,
+            "41": 7.40822,
+            "42": 7.35649,
+            "43": 7.43806,
+            "44": 7.35517,
             "45": 7.35103,
+            "46": 7.27859,
+            "47": 7.44152,
+            "48": 7.2683,
+            "49": 7.32389,
             "50": 7.14549,
+            "51": 7.36541,
+            "52": 7.12192,
+            "53": 7.09189,
+            "54": 7.22759,
             "55": 7.13584,
+            "56": 7.20822,
+            "57": 7.31316,
+            "58": 6.99088,
+            "59": 7.09934,
             "60": 7.12683,
+            "61": 7.1014,
+            "62": 7.23954,
+            "63": 7.14417,
+            "64": 7.06836,
             "65": 6.98412,
+            "66": 7.03768,
+            "67": 7.02847,
+            "68": 7.1299,
+            "69": 7.01456,
             "70": 7.04997,
+            "71": 6.89408,
+            "72": 6.98553,
+            "73": 6.96694,
+            "74": 6.90297,
             "75": 7.0574,
+            "76": 6.9581,
+            "77": 7.06903,
+            "78": 7.02133,
+            "79": 6.8504,
             "80": 6.91935,
+            "81": 6.95874,
+            "82": 7.04745,
+            "83": 6.98522,
+            "84": 6.99712,
             "85": 6.83565,
+            "86": 7.04156,
+            "87": 6.96476,
+            "88": 6.89883,
+            "89": 6.80051,
             "90": 7.22593,
+            "91": 6.70562,
+            "92": 7.0381,
+            "93": 6.88685,
+            "94": 7.03908,
             "95": 6.84815,
+            "96": 6.95281,
+            "97": 6.94344,
+            "98": 6.86987,
+            "99": 6.99502,
             "100": 6.96683
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 43274.0,
+            "2": 44071.0,
+            "3": 44760.0,
+            "4": 42385.0,
             "5": 45378.0,
+            "6": 40938.0,
+            "7": 43150.0,
+            "8": 45450.0,
+            "9": 42428.0,
             "10": 45373.0,
+            "11": 43974.0,
+            "12": 44591.0,
+            "13": 43897.0,
+            "14": 46204.0,
             "15": 43924.0,
+            "16": 41613.0,
+            "17": 43852.0,
+            "18": 44669.0,
+            "19": 42579.0,
             "20": 44769.0,
+            "21": 44761.0,
+            "22": 41873.0,
+            "23": 45441.0,
+            "24": 43081.0,
             "25": 42452.0,
+            "26": 43947.0,
+            "27": 46247.0,
+            "28": 46419.0,
+            "29": 46169.0,
             "30": 44035.0,
+            "31": 41152.0,
+            "32": 43347.0,
+            "33": 45435.0,
+            "34": 43300.0,
             "35": 43284.0,
+            "36": 42483.0,
+            "37": 40070.0,
+            "38": 42561.0,
+            "39": 44706.0,
             "40": 43260.0,
+            "41": 44642.0,
+            "42": 43192.0,
+            "43": 45439.0,
+            "44": 44588.0,
             "45": 43274.0,
+            "46": 43921.0,
+            "47": 42364.0,
+            "48": 44740.0,
+            "49": 43152.0,
             "50": 43348.0,
+            "51": 41112.0,
+            "52": 43837.0,
+            "53": 43913.0,
+            "54": 41704.0,
             "55": 43870.0,
+            "56": 43209.0,
+            "57": 42636.0,
+            "58": 43841.0,
+            "59": 44630.0,
             "60": 41219.0,
+            "61": 39702.0,
+            "62": 44739.0,
+            "63": 44651.0,
+            "64": 45372.0,
             "65": 44682.0,
+            "66": 45351.0,
+            "67": 43174.0,
+            "68": 42502.0,
+            "69": 43834.0,
             "70": 45514.0,
+            "71": 43291.0,
+            "72": 44767.0,
+            "73": 45384.0,
+            "74": 42457.0,
             "75": 44673.0,
+            "76": 43876.0,
+            "77": 42026.0,
+            "78": 40350.0,
+            "79": 38918.0,
             "80": 41092.0,
+            "81": 45364.0,
+            "82": 43198.0,
+            "83": 38467.0,
+            "84": 42477.0,
             "85": 43981.0,
+            "86": 45667.0,
+            "87": 40863.0,
+            "88": 41772.0,
+            "89": 41104.0,
             "90": 44669.0,
+            "91": 46134.0,
+            "92": 41634.0,
+            "93": 43241.0,
+            "94": 39538.0,
             "95": 43915.0,
+            "96": 44683.0,
+            "97": 45405.0,
+            "98": 41791.0,
+            "99": 45414.0,
             "100": 42458.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 1132053504.0,
+            "2": 1132053504.0,
+            "3": 1132053504.0,
+            "4": 1132053504.0,
             "5": 1132053504.0,
+            "6": 1132053504.0,
+            "7": 1132053504.0,
+            "8": 1132053504.0,
+            "9": 1132053504.0,
             "10": 1132053504.0,
+            "11": 1132053504.0,
+            "12": 1132053504.0,
+            "13": 1132053504.0,
+            "14": 1132053504.0,
             "15": 1132053504.0,
+            "16": 1132053504.0,
+            "17": 1132053504.0,
+            "18": 1132053504.0,
+            "19": 1132053504.0,
             "20": 1132053504.0,
+            "21": 1132053504.0,
+            "22": 1132053504.0,
+            "23": 1132053504.0,
+            "24": 1132053504.0,
             "25": 1132053504.0,
+            "26": 1132053504.0,
+            "27": 1132053504.0,
+            "28": 1132053504.0,
+            "29": 1132053504.0,
             "30": 1132053504.0,
+            "31": 1132053504.0,
+            "32": 1132053504.0,
+            "33": 1132053504.0,
+            "34": 1132053504.0,
             "35": 1132053504.0,
+            "36": 1132053504.0,
+            "37": 1132053504.0,
+            "38": 1132053504.0,
+            "39": 1132053504.0,
             "40": 1132053504.0,
+            "41": 1132053504.0,
+            "42": 1132053504.0,
+            "43": 1132053504.0,
+            "44": 1132053504.0,
             "45": 1132053504.0,
+            "46": 1132053504.0,
+            "47": 1132053504.0,
+            "48": 1132053504.0,
+            "49": 1132053504.0,
             "50": 1132053504.0,
+            "51": 1132053504.0,
+            "52": 1132053504.0,
+            "53": 1132053504.0,
+            "54": 1132053504.0,
             "55": 1132053504.0,
+            "56": 1132053504.0,
+            "57": 1132053504.0,
+            "58": 1132053504.0,
+            "59": 1132053504.0,
             "60": 1132053504.0,
+            "61": 1132053504.0,
+            "62": 1132053504.0,
+            "63": 1132053504.0,
+            "64": 1132053504.0,
             "65": 1132053504.0,
+            "66": 1132053504.0,
+            "67": 1132053504.0,
+            "68": 1132053504.0,
+            "69": 1132053504.0,
             "70": 1132053504.0,
+            "71": 1132053504.0,
+            "72": 1132053504.0,
+            "73": 1132053504.0,
+            "74": 1132053504.0,
             "75": 1132053504.0,
+            "76": 1132053504.0,
+            "77": 1132053504.0,
+            "78": 1132053504.0,
+            "79": 1132053504.0,
             "80": 1132053504.0,
+            "81": 1132053504.0,
+            "82": 1132053504.0,
+            "83": 1132053504.0,
+            "84": 1132053504.0,
             "85": 1132053504.0,
+            "86": 1132053504.0,
+            "87": 1132053504.0,
+            "88": 1132053504.0,
+            "89": 1132053504.0,
             "90": 1132053504.0,
+            "91": 1132053504.0,
+            "92": 1132053504.0,
+            "93": 1132053504.0,
+            "94": 1132053504.0,
             "95": 1132053504.0,
+            "96": 1132053504.0,
+            "97": 1132053504.0,
+            "98": 1132053504.0,
+            "99": 1132053504.0,
             "100": 1132053504.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 1409266176.0,
+            "2": 1864166912.0,
+            "3": 1864166912.0,
+            "4": 1864166912.0,
             "5": 1864166912.0,
+            "6": 1864166912.0,
+            "7": 1864166912.0,
+            "8": 1864166912.0,
+            "9": 1864166912.0,
             "10": 1864166912.0,
+            "11": 1864166912.0,
+            "12": 1864166912.0,
+            "13": 1864166912.0,
+            "14": 1864166912.0,
             "15": 1864166912.0,
+            "16": 1864166912.0,
+            "17": 1864166912.0,
+            "18": 1864166912.0,
+            "19": 1864166912.0,
             "20": 1864166912.0,
+            "21": 1864166912.0,
+            "22": 1864166912.0,
+            "23": 1864166912.0,
+            "24": 1864166912.0,
             "25": 1864166912.0,
+            "26": 1864166912.0,
+            "27": 1864166912.0,
+            "28": 1864166912.0,
+            "29": 1864166912.0,
             "30": 1864166912.0,
+            "31": 1864166912.0,
+            "32": 1864166912.0,
+            "33": 1864166912.0,
+            "34": 1864166912.0,
             "35": 1864166912.0,
+            "36": 1864166912.0,
+            "37": 1864166912.0,
+            "38": 1864166912.0,
+            "39": 1864166912.0,
             "40": 1864166912.0,
+            "41": 1864166912.0,
+            "42": 1864166912.0,
+            "43": 1864166912.0,
+            "44": 1864166912.0,
             "45": 1864166912.0,
+            "46": 1864166912.0,
+            "47": 1864166912.0,
+            "48": 1864166912.0,
+            "49": 1864166912.0,
             "50": 1864166912.0,
+            "51": 1864166912.0,
+            "52": 1864166912.0,
+            "53": 1864166912.0,
+            "54": 1864166912.0,
             "55": 1864166912.0,
+            "56": 1864166912.0,
+            "57": 1864166912.0,
+            "58": 1864166912.0,
+            "59": 1864166912.0,
             "60": 1864166912.0,
+            "61": 1864166912.0,
+            "62": 1864166912.0,
+            "63": 1864166912.0,
+            "64": 1864166912.0,
             "65": 1864166912.0,
+            "66": 1864166912.0,
+            "67": 1864166912.0,
+            "68": 1864166912.0,
+            "69": 1864166912.0,
             "70": 1864166912.0,
+            "71": 1864166912.0,
+            "72": 1864166912.0,
+            "73": 1864166912.0,
+            "74": 1864166912.0,
             "75": 1864166912.0,
+            "76": 1864166912.0,
+            "77": 1864166912.0,
+            "78": 1864166912.0,
+            "79": 1864166912.0,
             "80": 1864166912.0,
+            "81": 1864166912.0,
+            "82": 1864166912.0,
+            "83": 1864166912.0,
+            "84": 1864166912.0,
             "85": 1864166912.0,
+            "86": 1864166912.0,
+            "87": 1864166912.0,
+            "88": 1864166912.0,
+            "89": 1864166912.0,
             "90": 1864166912.0,
+            "91": 1864166912.0,
+            "92": 1864166912.0,
+            "93": 1864166912.0,
+            "94": 1864166912.0,
             "95": 1864166912.0,
+            "96": 1864166912.0,
+            "97": 1864166912.0,
+            "98": 1864166912.0,
+            "99": 1864166912.0,
             "100": 1864166912.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 9.92821,
-            "5": 0.84728,
-            "10": 0.55604,
-            "15": 0.56749,
-            "20": 0.88464,
-            "25": 0.56066,
-            "30": 0.56065,
-            "35": 0.55291,
-            "40": 0.56895,
-            "45": 0.55838,
-            "50": 0.56254,
-            "55": 0.55721,
-            "60": 0.55871,
-            "65": 0.55687,
-            "70": 0.55579,
-            "75": 0.55255,
-            "80": 0.83995,
-            "85": 0.55623,
-            "90": 0.56239,
-            "95": 0.56105,
-            "100": 0.5538
+            "1": 9.73359,
+            "2": 0.67213,
+            "3": 0.64227,
+            "4": 0.63808,
+            "5": 0.64274,
+            "6": 0.67444,
+            "7": 0.656,
+            "8": 0.64304,
+            "9": 0.64801,
+            "10": 0.6494,
+            "11": 0.64362,
+            "12": 0.64541,
+            "13": 0.64198,
+            "14": 0.64063,
+            "15": 0.64548,
+            "16": 0.64104,
+            "17": 0.64359,
+            "18": 0.64166,
+            "19": 0.65505,
+            "20": 0.73426,
+            "21": 0.95714,
+            "22": 0.65,
+            "23": 0.63689,
+            "24": 0.6432,
+            "25": 0.96753,
+            "26": 1.01279,
+            "27": 0.6456,
+            "28": 0.64422,
+            "29": 0.64535,
+            "30": 1.02938,
+            "31": 0.64295,
+            "32": 0.64549,
+            "33": 1.10839,
+            "34": 0.66812,
+            "35": 0.64537,
+            "36": 0.64987,
+            "37": 0.64712,
+            "38": 0.6499,
+            "39": 0.64672,
+            "40": 0.64485,
+            "41": 0.64456,
+            "42": 0.64313,
+            "43": 0.64617,
+            "44": 0.64605,
+            "45": 0.64551,
+            "46": 0.64651,
+            "47": 0.70467,
+            "48": 0.67348,
+            "49": 0.65815,
+            "50": 0.65354,
+            "51": 0.64544,
+            "52": 0.6421,
+            "53": 0.64328,
+            "54": 0.64635,
+            "55": 0.6411,
+            "56": 0.64965,
+            "57": 0.64264,
+            "58": 0.64835,
+            "59": 0.64574,
+            "60": 0.64782,
+            "61": 0.64933,
+            "62": 0.65052,
+            "63": 0.64609,
+            "64": 0.68144,
+            "65": 0.64542,
+            "66": 0.64402,
+            "67": 0.64496,
+            "68": 0.64484,
+            "69": 0.64035,
+            "70": 0.64288,
+            "71": 0.64575,
+            "72": 0.69431,
+            "73": 0.64645,
+            "74": 0.64787,
+            "75": 0.65414,
+            "76": 0.64408,
+            "77": 0.64637,
+            "78": 0.64886,
+            "79": 0.66194,
+            "80": 0.65332,
+            "81": 0.65413,
+            "82": 0.65243,
+            "83": 0.64364,
+            "84": 0.64934,
+            "85": 0.6425,
+            "86": 0.96767,
+            "87": 0.92546,
+            "88": 0.6477,
+            "89": 0.64523,
+            "90": 0.64767,
+            "91": 0.65445,
+            "92": 0.64953,
+            "93": 0.65409,
+            "94": 0.69319,
+            "95": 0.65121,
+            "96": 0.64906,
+            "97": 0.65378,
+            "98": 0.6511,
+            "99": 0.65393,
+            "100": 0.65491
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..4d566ec6c1b
--- /dev/null
+++ b/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.34904,
+            "2": 10.34488,
+            "3": 9.79407,
+            "4": 9.59568,
+            "5": 9.42065,
+            "6": 9.41856,
+            "7": 9.28073,
+            "8": 9.18973,
+            "9": 9.06584,
+            "10": 9.00206,
+            "11": 8.81497,
+            "12": 8.78107,
+            "13": 8.82506,
+            "14": 8.6728,
+            "15": 8.6368,
+            "16": 8.51926,
+            "17": 8.45732,
+            "18": 8.37037,
+            "19": 8.36068,
+            "20": 8.25456,
+            "21": 8.24268,
+            "22": 8.13404,
+            "23": 8.06818,
+            "24": 8.11464,
+            "25": 7.95146,
+            "26": 8.08186,
+            "27": 7.86814,
+            "28": 7.94027,
+            "29": 7.77604,
+            "30": 7.84595,
+            "31": 7.81568,
+            "32": 7.65964,
+            "33": 7.77905,
+            "34": 7.53277,
+            "35": 7.6586,
+            "36": 7.51541,
+            "37": 7.44748,
+            "38": 7.4824,
+            "39": 7.46523,
+            "40": 7.49146,
+            "41": 7.40822,
+            "42": 7.35649,
+            "43": 7.43806,
+            "44": 7.35517,
+            "45": 7.35103,
+            "46": 7.27859,
+            "47": 7.44152,
+            "48": 7.2683,
+            "49": 7.32389,
+            "50": 7.14549,
+            "51": 7.36541,
+            "52": 7.12192,
+            "53": 7.09189,
+            "54": 7.22759,
+            "55": 7.13584,
+            "56": 7.20822,
+            "57": 7.31316,
+            "58": 6.99088,
+            "59": 7.09934,
+            "60": 7.12683,
+            "61": 7.1014,
+            "62": 7.23954,
+            "63": 7.14417,
+            "64": 7.06836,
+            "65": 6.98412,
+            "66": 7.03768,
+            "67": 7.02847,
+            "68": 7.1299,
+            "69": 7.01456,
+            "70": 7.04997,
+            "71": 6.89408,
+            "72": 6.98553,
+            "73": 6.96694,
+            "74": 6.90297,
+            "75": 7.0574,
+            "76": 6.9581,
+            "77": 7.06903,
+            "78": 7.02133,
+            "79": 6.8504,
+            "80": 6.91935,
+            "81": 6.95874,
+            "82": 7.04745,
+            "83": 6.98522,
+            "84": 6.99712,
+            "85": 6.83565,
+            "86": 7.04156,
+            "87": 6.96476,
+            "88": 6.89883,
+            "89": 6.80051,
+            "90": 7.22593,
+            "91": 6.70562,
+            "92": 7.0381,
+            "93": 6.88685,
+            "94": 7.03908,
+            "95": 6.84815,
+            "96": 6.95281,
+            "97": 6.94344,
+            "98": 6.86987,
+            "99": 6.99502,
+            "100": 6.96683
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 43274.0,
+            "2": 44071.0,
+            "3": 44760.0,
+            "4": 42385.0,
+            "5": 45378.0,
+            "6": 40938.0,
+            "7": 43150.0,
+            "8": 45450.0,
+            "9": 42428.0,
+            "10": 45373.0,
+            "11": 43974.0,
+            "12": 44591.0,
+            "13": 43897.0,
+            "14": 46204.0,
+            "15": 43924.0,
+            "16": 41613.0,
+            "17": 43852.0,
+            "18": 44669.0,
+            "19": 42579.0,
+            "20": 44769.0,
+            "21": 44761.0,
+            "22": 41873.0,
+            "23": 45441.0,
+            "24": 43081.0,
+            "25": 42452.0,
+            "26": 43947.0,
+            "27": 46247.0,
+            "28": 46419.0,
+            "29": 46169.0,
+            "30": 44035.0,
+            "31": 41152.0,
+            "32": 43347.0,
+            "33": 45435.0,
+            "34": 43300.0,
+            "35": 43284.0,
+            "36": 42483.0,
+            "37": 40070.0,
+            "38": 42561.0,
+            "39": 44706.0,
+            "40": 43260.0,
+            "41": 44642.0,
+            "42": 43192.0,
+            "43": 45439.0,
+            "44": 44588.0,
+            "45": 43274.0,
+            "46": 43921.0,
+            "47": 42364.0,
+            "48": 44740.0,
+            "49": 43152.0,
+            "50": 43348.0,
+            "51": 41112.0,
+            "52": 43837.0,
+            "53": 43913.0,
+            "54": 41704.0,
+            "55": 43870.0,
+            "56": 43209.0,
+            "57": 42636.0,
+            "58": 43841.0,
+            "59": 44630.0,
+            "60": 41219.0,
+            "61": 39702.0,
+            "62": 44739.0,
+            "63": 44651.0,
+            "64": 45372.0,
+            "65": 44682.0,
+            "66": 45351.0,
+            "67": 43174.0,
+            "68": 42502.0,
+            "69": 43834.0,
+            "70": 45514.0,
+            "71": 43291.0,
+            "72": 44767.0,
+            "73": 45384.0,
+            "74": 42457.0,
+            "75": 44673.0,
+            "76": 43876.0,
+            "77": 42026.0,
+            "78": 40350.0,
+            "79": 38918.0,
+            "80": 41092.0,
+            "81": 45364.0,
+            "82": 43198.0,
+            "83": 38467.0,
+            "84": 42477.0,
+            "85": 43981.0,
+            "86": 45667.0,
+            "87": 40863.0,
+            "88": 41772.0,
+            "89": 41104.0,
+            "90": 44669.0,
+            "91": 46134.0,
+            "92": 41634.0,
+            "93": 43241.0,
+            "94": 39538.0,
+            "95": 43915.0,
+            "96": 44683.0,
+            "97": 45405.0,
+            "98": 41791.0,
+            "99": 45414.0,
+            "100": 42458.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1132053504.0,
+            "2": 1132053504.0,
+            "3": 1132053504.0,
+            "4": 1132053504.0,
+            "5": 1132053504.0,
+            "6": 1132053504.0,
+            "7": 1132053504.0,
+            "8": 1132053504.0,
+            "9": 1132053504.0,
+            "10": 1132053504.0,
+            "11": 1132053504.0,
+            "12": 1132053504.0,
+            "13": 1132053504.0,
+            "14": 1132053504.0,
+            "15": 1132053504.0,
+            "16": 1132053504.0,
+            "17": 1132053504.0,
+            "18": 1132053504.0,
+            "19": 1132053504.0,
+            "20": 1132053504.0,
+            "21": 1132053504.0,
+            "22": 1132053504.0,
+            "23": 1132053504.0,
+            "24": 1132053504.0,
+            "25": 1132053504.0,
+            "26": 1132053504.0,
+            "27": 1132053504.0,
+            "28": 1132053504.0,
+            "29": 1132053504.0,
+            "30": 1132053504.0,
+            "31": 1132053504.0,
+            "32": 1132053504.0,
+            "33": 1132053504.0,
+            "34": 1132053504.0,
+            "35": 1132053504.0,
+            "36": 1132053504.0,
+            "37": 1132053504.0,
+            "38": 1132053504.0,
+            "39": 1132053504.0,
+            "40": 1132053504.0,
+            "41": 1132053504.0,
+            "42": 1132053504.0,
+            "43": 1132053504.0,
+            "44": 1132053504.0,
+            "45": 1132053504.0,
+            "46": 1132053504.0,
+            "47": 1132053504.0,
+            "48": 1132053504.0,
+            "49": 1132053504.0,
+            "50": 1132053504.0,
+            "51": 1132053504.0,
+            "52": 1132053504.0,
+            "53": 1132053504.0,
+            "54": 1132053504.0,
+            "55": 1132053504.0,
+            "56": 1132053504.0,
+            "57": 1132053504.0,
+            "58": 1132053504.0,
+            "59": 1132053504.0,
+            "60": 1132053504.0,
+            "61": 1132053504.0,
+            "62": 1132053504.0,
+            "63": 1132053504.0,
+            "64": 1132053504.0,
+            "65": 1132053504.0,
+            "66": 1132053504.0,
+            "67": 1132053504.0,
+            "68": 1132053504.0,
+            "69": 1132053504.0,
+            "70": 1132053504.0,
+            "71": 1132053504.0,
+            "72": 1132053504.0,
+            "73": 1132053504.0,
+            "74": 1132053504.0,
+            "75": 1132053504.0,
+            "76": 1132053504.0,
+            "77": 1132053504.0,
+            "78": 1132053504.0,
+            "79": 1132053504.0,
+            "80": 1132053504.0,
+            "81": 1132053504.0,
+            "82": 1132053504.0,
+            "83": 1132053504.0,
+            "84": 1132053504.0,
+            "85": 1132053504.0,
+            "86": 1132053504.0,
+            "87": 1132053504.0,
+            "88": 1132053504.0,
+            "89": 1132053504.0,
+            "90": 1132053504.0,
+            "91": 1132053504.0,
+            "92": 1132053504.0,
+            "93": 1132053504.0,
+            "94": 1132053504.0,
+            "95": 1132053504.0,
+            "96": 1132053504.0,
+            "97": 1132053504.0,
+            "98": 1132053504.0,
+            "99": 1132053504.0,
+            "100": 1132053504.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1409266176.0,
+            "2": 1864166912.0,
+            "3": 1864166912.0,
+            "4": 1864166912.0,
+            "5": 1864166912.0,
+            "6": 1864166912.0,
+            "7": 1864166912.0,
+            "8": 1864166912.0,
+            "9": 1864166912.0,
+            "10": 1864166912.0,
+            "11": 1864166912.0,
+            "12": 1864166912.0,
+            "13": 1864166912.0,
+            "14": 1864166912.0,
+            "15": 1864166912.0,
+            "16": 1864166912.0,
+            "17": 1864166912.0,
+            "18": 1864166912.0,
+            "19": 1864166912.0,
+            "20": 1864166912.0,
+            "21": 1864166912.0,
+            "22": 1864166912.0,
+            "23": 1864166912.0,
+            "24": 1864166912.0,
+            "25": 1864166912.0,
+            "26": 1864166912.0,
+            "27": 1864166912.0,
+            "28": 1864166912.0,
+            "29": 1864166912.0,
+            "30": 1864166912.0,
+            "31": 1864166912.0,
+            "32": 1864166912.0,
+            "33": 1864166912.0,
+            "34": 1864166912.0,
+            "35": 1864166912.0,
+            "36": 1864166912.0,
+            "37": 1864166912.0,
+            "38": 1864166912.0,
+            "39": 1864166912.0,
+            "40": 1864166912.0,
+            "41": 1864166912.0,
+            "42": 1864166912.0,
+            "43": 1864166912.0,
+            "44": 1864166912.0,
+            "45": 1864166912.0,
+            "46": 1864166912.0,
+            "47": 1864166912.0,
+            "48": 1864166912.0,
+            "49": 1864166912.0,
+            "50": 1864166912.0,
+            "51": 1864166912.0,
+            "52": 1864166912.0,
+            "53": 1864166912.0,
+            "54": 1864166912.0,
+            "55": 1864166912.0,
+            "56": 1864166912.0,
+            "57": 1864166912.0,
+            "58": 1864166912.0,
+            "59": 1864166912.0,
+            "60": 1864166912.0,
+            "61": 1864166912.0,
+            "62": 1864166912.0,
+            "63": 1864166912.0,
+            "64": 1864166912.0,
+            "65": 1864166912.0,
+            "66": 1864166912.0,
+            "67": 1864166912.0,
+            "68": 1864166912.0,
+            "69": 1864166912.0,
+            "70": 1864166912.0,
+            "71": 1864166912.0,
+            "72": 1864166912.0,
+            "73": 1864166912.0,
+            "74": 1864166912.0,
+            "75": 1864166912.0,
+            "76": 1864166912.0,
+            "77": 1864166912.0,
+            "78": 1864166912.0,
+            "79": 1864166912.0,
+            "80": 1864166912.0,
+            "81": 1864166912.0,
+            "82": 1864166912.0,
+            "83": 1864166912.0,
+            "84": 1864166912.0,
+            "85": 1864166912.0,
+            "86": 1864166912.0,
+            "87": 1864166912.0,
+            "88": 1864166912.0,
+            "89": 1864166912.0,
+            "90": 1864166912.0,
+            "91": 1864166912.0,
+            "92": 1864166912.0,
+            "93": 1864166912.0,
+            "94": 1864166912.0,
+            "95": 1864166912.0,
+            "96": 1864166912.0,
+            "97": 1864166912.0,
+            "98": 1864166912.0,
+            "99": 1864166912.0,
+            "100": 1864166912.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.29236,
+            "2": 0.67893,
+            "3": 0.58934,
+            "4": 0.59882,
+            "5": 0.5783,
+            "6": 0.57112,
+            "7": 0.5684,
+            "8": 0.55955,
+            "9": 0.5654,
+            "10": 0.56541,
+            "11": 0.57111,
+            "12": 0.57899,
+            "13": 0.56135,
+            "14": 0.56951,
+            "15": 0.56653,
+            "16": 0.56906,
+            "17": 0.5749,
+            "18": 0.56365,
+            "19": 0.56829,
+            "20": 0.93294,
+            "21": 0.56791,
+            "22": 0.56512,
+            "23": 0.57032,
+            "24": 0.56889,
+            "25": 0.56027,
+            "26": 0.87556,
+            "27": 0.56766,
+            "28": 0.88828,
+            "29": 0.56306,
+            "30": 0.56316,
+            "31": 0.88671,
+            "32": 1.03162,
+            "33": 0.90854,
+            "34": 0.88126,
+            "35": 0.56957,
+            "36": 0.56621,
+            "37": 0.56647,
+            "38": 0.56957,
+            "39": 0.56463,
+            "40": 0.5668,
+            "41": 0.56277,
+            "42": 0.58937,
+            "43": 0.56553,
+            "44": 0.5682,
+            "45": 0.56815,
+            "46": 0.56571,
+            "47": 0.57199,
+            "48": 0.57128,
+            "49": 0.59172,
+            "50": 0.56455,
+            "51": 0.56546,
+            "52": 0.56259,
+            "53": 0.56063,
+            "54": 0.56207,
+            "55": 0.55985,
+            "56": 0.57542,
+            "57": 0.56257,
+            "58": 0.55932,
+            "59": 0.56051,
+            "60": 0.56182,
+            "61": 0.58999,
+            "62": 0.55986,
+            "63": 0.56154,
+            "64": 0.56167,
+            "65": 0.56072,
+            "66": 0.57597,
+            "67": 0.56011,
+            "68": 0.55956,
+            "69": 0.56507,
+            "70": 0.58296,
+            "71": 0.56017,
+            "72": 0.56437,
+            "73": 0.56838,
+            "74": 0.56548,
+            "75": 0.57028,
+            "76": 0.56574,
+            "77": 0.56397,
+            "78": 0.56279,
+            "79": 0.56782,
+            "80": 0.56585,
+            "81": 0.56243,
+            "82": 0.5641,
+            "83": 0.56477,
+            "84": 0.5852,
+            "85": 0.56257,
+            "86": 0.84754,
+            "87": 0.56761,
+            "88": 0.56425,
+            "89": 0.57197,
+            "90": 0.85557,
+            "91": 0.56904,
+            "92": 0.57069,
+            "93": 0.56223,
+            "94": 0.56609,
+            "95": 0.565,
+            "96": 0.56747,
+            "97": 0.56431,
+            "98": 0.58797,
+            "99": 0.89814,
+            "100": 0.5783
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..2400879202c
--- /dev/null
+++ b/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.34904,
+            "2": 10.34488,
+            "3": 9.79407,
+            "4": 9.59568,
+            "5": 9.42065,
+            "6": 9.41856,
+            "7": 9.28073,
+            "8": 9.18973,
+            "9": 9.06584,
+            "10": 9.00206,
+            "11": 8.81497,
+            "12": 8.78107,
+            "13": 8.82506,
+            "14": 8.6728,
+            "15": 8.6368,
+            "16": 8.51926,
+            "17": 8.45732,
+            "18": 8.37037,
+            "19": 8.36068,
+            "20": 8.25456,
+            "21": 8.24268,
+            "22": 8.13404,
+            "23": 8.06818,
+            "24": 8.11464,
+            "25": 7.95146,
+            "26": 8.08186,
+            "27": 7.86814,
+            "28": 7.94027,
+            "29": 7.77604,
+            "30": 7.84595,
+            "31": 7.81568,
+            "32": 7.65964,
+            "33": 7.77905,
+            "34": 7.53277,
+            "35": 7.6586,
+            "36": 7.51541,
+            "37": 7.44748,
+            "38": 7.4824,
+            "39": 7.46523,
+            "40": 7.49146,
+            "41": 7.40822,
+            "42": 7.35649,
+            "43": 7.43806,
+            "44": 7.35517,
+            "45": 7.35103,
+            "46": 7.27859,
+            "47": 7.44152,
+            "48": 7.2683,
+            "49": 7.32389,
+            "50": 7.14549,
+            "51": 7.36541,
+            "52": 7.12192,
+            "53": 7.09189,
+            "54": 7.22759,
+            "55": 7.13584,
+            "56": 7.20822,
+            "57": 7.31316,
+            "58": 6.99088,
+            "59": 7.09934,
+            "60": 7.12683,
+            "61": 7.1014,
+            "62": 7.23954,
+            "63": 7.14417,
+            "64": 7.06836,
+            "65": 6.98412,
+            "66": 7.03768,
+            "67": 7.02847,
+            "68": 7.1299,
+            "69": 7.01456,
+            "70": 7.04997,
+            "71": 6.89408,
+            "72": 6.98553,
+            "73": 6.96694,
+            "74": 6.90297,
+            "75": 7.0574,
+            "76": 6.9581,
+            "77": 7.06903,
+            "78": 7.02133,
+            "79": 6.8504,
+            "80": 6.91935,
+            "81": 6.95874,
+            "82": 7.04745,
+            "83": 6.98522,
+            "84": 6.99712,
+            "85": 6.83565,
+            "86": 7.04156,
+            "87": 6.96476,
+            "88": 6.89883,
+            "89": 6.80051,
+            "90": 7.22593,
+            "91": 6.70562,
+            "92": 7.0381,
+            "93": 6.88685,
+            "94": 7.03908,
+            "95": 6.84815,
+            "96": 6.95281,
+            "97": 6.94344,
+            "98": 6.86987,
+            "99": 6.99502,
+            "100": 6.96683
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 43274.0,
+            "2": 44071.0,
+            "3": 44760.0,
+            "4": 42385.0,
+            "5": 45378.0,
+            "6": 40938.0,
+            "7": 43150.0,
+            "8": 45450.0,
+            "9": 42428.0,
+            "10": 45373.0,
+            "11": 43974.0,
+            "12": 44591.0,
+            "13": 43897.0,
+            "14": 46204.0,
+            "15": 43924.0,
+            "16": 41613.0,
+            "17": 43852.0,
+            "18": 44669.0,
+            "19": 42579.0,
+            "20": 44769.0,
+            "21": 44761.0,
+            "22": 41873.0,
+            "23": 45441.0,
+            "24": 43081.0,
+            "25": 42452.0,
+            "26": 43947.0,
+            "27": 46247.0,
+            "28": 46419.0,
+            "29": 46169.0,
+            "30": 44035.0,
+            "31": 41152.0,
+            "32": 43347.0,
+            "33": 45435.0,
+            "34": 43300.0,
+            "35": 43284.0,
+            "36": 42483.0,
+            "37": 40070.0,
+            "38": 42561.0,
+            "39": 44706.0,
+            "40": 43260.0,
+            "41": 44642.0,
+            "42": 43192.0,
+            "43": 45439.0,
+            "44": 44588.0,
+            "45": 43274.0,
+            "46": 43921.0,
+            "47": 42364.0,
+            "48": 44740.0,
+            "49": 43152.0,
+            "50": 43348.0,
+            "51": 41112.0,
+            "52": 43837.0,
+            "53": 43913.0,
+            "54": 41704.0,
+            "55": 43870.0,
+            "56": 43209.0,
+            "57": 42636.0,
+            "58": 43841.0,
+            "59": 44630.0,
+            "60": 41219.0,
+            "61": 39702.0,
+            "62": 44739.0,
+            "63": 44651.0,
+            "64": 45372.0,
+            "65": 44682.0,
+            "66": 45351.0,
+            "67": 43174.0,
+            "68": 42502.0,
+            "69": 43834.0,
+            "70": 45514.0,
+            "71": 43291.0,
+            "72": 44767.0,
+            "73": 45384.0,
+            "74": 42457.0,
+            "75": 44673.0,
+            "76": 43876.0,
+            "77": 42026.0,
+            "78": 40350.0,
+            "79": 38918.0,
+            "80": 41092.0,
+            "81": 45364.0,
+            "82": 43198.0,
+            "83": 38467.0,
+            "84": 42477.0,
+            "85": 43981.0,
+            "86": 45667.0,
+            "87": 40863.0,
+            "88": 41772.0,
+            "89": 41104.0,
+            "90": 44669.0,
+            "91": 46134.0,
+            "92": 41634.0,
+            "93": 43241.0,
+            "94": 39538.0,
+            "95": 43915.0,
+            "96": 44683.0,
+            "97": 45405.0,
+            "98": 41791.0,
+            "99": 45414.0,
+            "100": 42458.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1132053504.0,
+            "2": 1132053504.0,
+            "3": 1132053504.0,
+            "4": 1132053504.0,
+            "5": 1132053504.0,
+            "6": 1132053504.0,
+            "7": 1132053504.0,
+            "8": 1132053504.0,
+            "9": 1132053504.0,
+            "10": 1132053504.0,
+            "11": 1132053504.0,
+            "12": 1132053504.0,
+            "13": 1132053504.0,
+            "14": 1132053504.0,
+            "15": 1132053504.0,
+            "16": 1132053504.0,
+            "17": 1132053504.0,
+            "18": 1132053504.0,
+            "19": 1132053504.0,
+            "20": 1132053504.0,
+            "21": 1132053504.0,
+            "22": 1132053504.0,
+            "23": 1132053504.0,
+            "24": 1132053504.0,
+            "25": 1132053504.0,
+            "26": 1132053504.0,
+            "27": 1132053504.0,
+            "28": 1132053504.0,
+            "29": 1132053504.0,
+            "30": 1132053504.0,
+            "31": 1132053504.0,
+            "32": 1132053504.0,
+            "33": 1132053504.0,
+            "34": 1132053504.0,
+            "35": 1132053504.0,
+            "36": 1132053504.0,
+            "37": 1132053504.0,
+            "38": 1132053504.0,
+            "39": 1132053504.0,
+            "40": 1132053504.0,
+            "41": 1132053504.0,
+            "42": 1132053504.0,
+            "43": 1132053504.0,
+            "44": 1132053504.0,
+            "45": 1132053504.0,
+            "46": 1132053504.0,
+            "47": 1132053504.0,
+            "48": 1132053504.0,
+            "49": 1132053504.0,
+            "50": 1132053504.0,
+            "51": 1132053504.0,
+            "52": 1132053504.0,
+            "53": 1132053504.0,
+            "54": 1132053504.0,
+            "55": 1132053504.0,
+            "56": 1132053504.0,
+            "57": 1132053504.0,
+            "58": 1132053504.0,
+            "59": 1132053504.0,
+            "60": 1132053504.0,
+            "61": 1132053504.0,
+            "62": 1132053504.0,
+            "63": 1132053504.0,
+            "64": 1132053504.0,
+            "65": 1132053504.0,
+            "66": 1132053504.0,
+            "67": 1132053504.0,
+            "68": 1132053504.0,
+            "69": 1132053504.0,
+            "70": 1132053504.0,
+            "71": 1132053504.0,
+            "72": 1132053504.0,
+            "73": 1132053504.0,
+            "74": 1132053504.0,
+            "75": 1132053504.0,
+            "76": 1132053504.0,
+            "77": 1132053504.0,
+            "78": 1132053504.0,
+            "79": 1132053504.0,
+            "80": 1132053504.0,
+            "81": 1132053504.0,
+            "82": 1132053504.0,
+            "83": 1132053504.0,
+            "84": 1132053504.0,
+            "85": 1132053504.0,
+            "86": 1132053504.0,
+            "87": 1132053504.0,
+            "88": 1132053504.0,
+            "89": 1132053504.0,
+            "90": 1132053504.0,
+            "91": 1132053504.0,
+            "92": 1132053504.0,
+            "93": 1132053504.0,
+            "94": 1132053504.0,
+            "95": 1132053504.0,
+            "96": 1132053504.0,
+            "97": 1132053504.0,
+            "98": 1132053504.0,
+            "99": 1132053504.0,
+            "100": 1132053504.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1409266176.0,
+            "2": 1864166912.0,
+            "3": 1864166912.0,
+            "4": 1864166912.0,
+            "5": 1864166912.0,
+            "6": 1864166912.0,
+            "7": 1864166912.0,
+            "8": 1864166912.0,
+            "9": 1864166912.0,
+            "10": 1864166912.0,
+            "11": 1864166912.0,
+            "12": 1864166912.0,
+            "13": 1864166912.0,
+            "14": 1864166912.0,
+            "15": 1864166912.0,
+            "16": 1864166912.0,
+            "17": 1864166912.0,
+            "18": 1864166912.0,
+            "19": 1864166912.0,
+            "20": 1864166912.0,
+            "21": 1864166912.0,
+            "22": 1864166912.0,
+            "23": 1864166912.0,
+            "24": 1864166912.0,
+            "25": 1864166912.0,
+            "26": 1864166912.0,
+            "27": 1864166912.0,
+            "28": 1864166912.0,
+            "29": 1864166912.0,
+            "30": 1864166912.0,
+            "31": 1864166912.0,
+            "32": 1864166912.0,
+            "33": 1864166912.0,
+            "34": 1864166912.0,
+            "35": 1864166912.0,
+            "36": 1864166912.0,
+            "37": 1864166912.0,
+            "38": 1864166912.0,
+            "39": 1864166912.0,
+            "40": 1864166912.0,
+            "41": 1864166912.0,
+            "42": 1864166912.0,
+            "43": 1864166912.0,
+            "44": 1864166912.0,
+            "45": 1864166912.0,
+            "46": 1864166912.0,
+            "47": 1864166912.0,
+            "48": 1864166912.0,
+            "49": 1864166912.0,
+            "50": 1864166912.0,
+            "51": 1864166912.0,
+            "52": 1864166912.0,
+            "53": 1864166912.0,
+            "54": 1864166912.0,
+            "55": 1864166912.0,
+            "56": 1864166912.0,
+            "57": 1864166912.0,
+            "58": 1864166912.0,
+            "59": 1864166912.0,
+            "60": 1864166912.0,
+            "61": 1864166912.0,
+            "62": 1864166912.0,
+            "63": 1864166912.0,
+            "64": 1864166912.0,
+            "65": 1864166912.0,
+            "66": 1864166912.0,
+            "67": 1864166912.0,
+            "68": 1864166912.0,
+            "69": 1864166912.0,
+            "70": 1864166912.0,
+            "71": 1864166912.0,
+            "72": 1864166912.0,
+            "73": 1864166912.0,
+            "74": 1864166912.0,
+            "75": 1864166912.0,
+            "76": 1864166912.0,
+            "77": 1864166912.0,
+            "78": 1864166912.0,
+            "79": 1864166912.0,
+            "80": 1864166912.0,
+            "81": 1864166912.0,
+            "82": 1864166912.0,
+            "83": 1864166912.0,
+            "84": 1864166912.0,
+            "85": 1864166912.0,
+            "86": 1864166912.0,
+            "87": 1864166912.0,
+            "88": 1864166912.0,
+            "89": 1864166912.0,
+            "90": 1864166912.0,
+            "91": 1864166912.0,
+            "92": 1864166912.0,
+            "93": 1864166912.0,
+            "94": 1864166912.0,
+            "95": 1864166912.0,
+            "96": 1864166912.0,
+            "97": 1864166912.0,
+            "98": 1864166912.0,
+            "99": 1864166912.0,
+            "100": 1864166912.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 9.54009,
+            "2": 0.66845,
+            "3": 0.64084,
+            "4": 0.64526,
+            "5": 0.64331,
+            "6": 0.65463,
+            "7": 0.63991,
+            "8": 0.63854,
+            "9": 0.64034,
+            "10": 0.63886,
+            "11": 0.63968,
+            "12": 0.64441,
+            "13": 0.63828,
+            "14": 0.64647,
+            "15": 0.64199,
+            "16": 0.63783,
+            "17": 0.64359,
+            "18": 0.66439,
+            "19": 0.64718,
+            "20": 0.63999,
+            "21": 0.65677,
+            "22": 0.95191,
+            "23": 0.64765,
+            "24": 0.98317,
+            "25": 1.63221,
+            "26": 0.64915,
+            "27": 0.64318,
+            "28": 0.99238,
+            "29": 0.64655,
+            "30": 0.64693,
+            "31": 0.64241,
+            "32": 0.98967,
+            "33": 0.64928,
+            "34": 0.64294,
+            "35": 0.65629,
+            "36": 0.64358,
+            "37": 0.64814,
+            "38": 0.64325,
+            "39": 0.64509,
+            "40": 0.64733,
+            "41": 0.64693,
+            "42": 0.65392,
+            "43": 0.64721,
+            "44": 0.64487,
+            "45": 0.64766,
+            "46": 0.65872,
+            "47": 0.65402,
+            "48": 0.65486,
+            "49": 0.64433,
+            "50": 0.64917,
+            "51": 0.64197,
+            "52": 0.64647,
+            "53": 0.64656,
+            "54": 0.64815,
+            "55": 0.64573,
+            "56": 0.6539,
+            "57": 0.64582,
+            "58": 0.64668,
+            "59": 0.64431,
+            "60": 0.64957,
+            "61": 0.64703,
+            "62": 0.64671,
+            "63": 0.65979,
+            "64": 0.64599,
+            "65": 0.6466,
+            "66": 0.64754,
+            "67": 0.6471,
+            "68": 0.64756,
+            "69": 0.64621,
+            "70": 0.65906,
+            "71": 0.64587,
+            "72": 0.65969,
+            "73": 0.64476,
+            "74": 0.65304,
+            "75": 0.64786,
+            "76": 0.65077,
+            "77": 0.66405,
+            "78": 0.6472,
+            "79": 0.64431,
+            "80": 0.64472,
+            "81": 0.64407,
+            "82": 0.64326,
+            "83": 0.93161,
+            "84": 0.65573,
+            "85": 0.63999,
+            "86": 0.64393,
+            "87": 0.92064,
+            "88": 0.64399,
+            "89": 0.64306,
+            "90": 0.64439,
+            "91": 0.6414,
+            "92": 0.64504,
+            "93": 0.64858,
+            "94": 0.64041,
+            "95": 0.64497,
+            "96": 0.64493,
+            "97": 0.64508,
+            "98": 0.6444,
+            "99": 0.64587,
+            "100": 0.64886
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
index 81031669a61..899d650d38b 100644
--- a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
@@ -2,141 +2,536 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 10.34904,
+            "2": 10.34488,
+            "3": 9.79407,
+            "4": 9.59568,
             "5": 9.42065,
+            "6": 9.41856,
+            "7": 9.28073,
+            "8": 9.18973,
+            "9": 9.06584,
             "10": 9.00206,
+            "11": 8.81497,
+            "12": 8.78107,
+            "13": 8.82506,
+            "14": 8.6728,
             "15": 8.6368,
+            "16": 8.51926,
+            "17": 8.45732,
+            "18": 8.37037,
+            "19": 8.36068,
             "20": 8.25456,
+            "21": 8.24268,
+            "22": 8.13404,
+            "23": 8.06818,
+            "24": 8.11464,
             "25": 7.95146,
+            "26": 8.08186,
+            "27": 7.86814,
+            "28": 7.94027,
+            "29": 7.77604,
             "30": 7.84595,
+            "31": 7.81568,
+            "32": 7.65964,
+            "33": 7.77905,
+            "34": 7.53277,
             "35": 7.6586,
+            "36": 7.51541,
+            "37": 7.44748,
+            "38": 7.4824,
+            "39": 7.46523,
             "40": 7.49146,
+            "41": 7.40822,
+            "42": 7.35649,
+            "43": 7.43806,
+            "44": 7.35517,
             "45": 7.35103,
+            "46": 7.27859,
+            "47": 7.44152,
+            "48": 7.2683,
+            "49": 7.32389,
             "50": 7.14549,
+            "51": 7.36541,
+            "52": 7.12192,
+            "53": 7.09189,
+            "54": 7.22759,
             "55": 7.13584,
+            "56": 7.20822,
+            "57": 7.31316,
+            "58": 6.99088,
+            "59": 7.09934,
             "60": 7.12683,
+            "61": 7.1014,
+            "62": 7.23954,
+            "63": 7.14417,
+            "64": 7.06836,
             "65": 6.98412,
+            "66": 7.03768,
+            "67": 7.02847,
+            "68": 7.1299,
+            "69": 7.01456,
             "70": 7.04997,
+            "71": 6.89408,
+            "72": 6.98553,
+            "73": 6.96694,
+            "74": 6.90297,
             "75": 7.0574,
+            "76": 6.9581,
+            "77": 7.06903,
+            "78": 7.02133,
+            "79": 6.8504,
             "80": 6.91935,
+            "81": 6.95874,
+            "82": 7.04745,
+            "83": 6.98522,
+            "84": 6.99712,
             "85": 6.83565,
+            "86": 7.04156,
+            "87": 6.96476,
+            "88": 6.89883,
+            "89": 6.80051,
             "90": 7.22593,
+            "91": 6.70562,
+            "92": 7.0381,
+            "93": 6.88685,
+            "94": 7.03908,
             "95": 6.84815,
+            "96": 6.95281,
+            "97": 6.94344,
+            "98": 6.86987,
+            "99": 6.99502,
             "100": 6.96683
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 43274.0,
+            "2": 44071.0,
+            "3": 44760.0,
+            "4": 42385.0,
             "5": 45378.0,
+            "6": 40938.0,
+            "7": 43150.0,
+            "8": 45450.0,
+            "9": 42428.0,
             "10": 45373.0,
+            "11": 43974.0,
+            "12": 44591.0,
+            "13": 43897.0,
+            "14": 46204.0,
             "15": 43924.0,
+            "16": 41613.0,
+            "17": 43852.0,
+            "18": 44669.0,
+            "19": 42579.0,
             "20": 44769.0,
+            "21": 44761.0,
+            "22": 41873.0,
+            "23": 45441.0,
+            "24": 43081.0,
             "25": 42452.0,
+            "26": 43947.0,
+            "27": 46247.0,
+            "28": 46419.0,
+            "29": 46169.0,
             "30": 44035.0,
+            "31": 41152.0,
+            "32": 43347.0,
+            "33": 45435.0,
+            "34": 43300.0,
             "35": 43284.0,
+            "36": 42483.0,
+            "37": 40070.0,
+            "38": 42561.0,
+            "39": 44706.0,
             "40": 43260.0,
+            "41": 44642.0,
+            "42": 43192.0,
+            "43": 45439.0,
+            "44": 44588.0,
             "45": 43274.0,
+            "46": 43921.0,
+            "47": 42364.0,
+            "48": 44740.0,
+            "49": 43152.0,
             "50": 43348.0,
+            "51": 41112.0,
+            "52": 43837.0,
+            "53": 43913.0,
+            "54": 41704.0,
             "55": 43870.0,
+            "56": 43209.0,
+            "57": 42636.0,
+            "58": 43841.0,
+            "59": 44630.0,
             "60": 41219.0,
+            "61": 39702.0,
+            "62": 44739.0,
+            "63": 44651.0,
+            "64": 45372.0,
             "65": 44682.0,
+            "66": 45351.0,
+            "67": 43174.0,
+            "68": 42502.0,
+            "69": 43834.0,
             "70": 45514.0,
+            "71": 43291.0,
+            "72": 44767.0,
+            "73": 45384.0,
+            "74": 42457.0,
             "75": 44673.0,
+            "76": 43876.0,
+            "77": 42026.0,
+            "78": 40350.0,
+            "79": 38918.0,
             "80": 41092.0,
+            "81": 45364.0,
+            "82": 43198.0,
+            "83": 38467.0,
+            "84": 42477.0,
             "85": 43981.0,
+            "86": 45667.0,
+            "87": 40863.0,
+            "88": 41772.0,
+            "89": 41104.0,
             "90": 44669.0,
+            "91": 46134.0,
+            "92": 41634.0,
+            "93": 43241.0,
+            "94": 39538.0,
             "95": 43915.0,
+            "96": 44683.0,
+            "97": 45405.0,
+            "98": 41791.0,
+            "99": 45414.0,
             "100": 42458.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 1132053504.0,
+            "2": 1132053504.0,
+            "3": 1132053504.0,
+            "4": 1132053504.0,
             "5": 1132053504.0,
+            "6": 1132053504.0,
+            "7": 1132053504.0,
+            "8": 1132053504.0,
+            "9": 1132053504.0,
             "10": 1132053504.0,
+            "11": 1132053504.0,
+            "12": 1132053504.0,
+            "13": 1132053504.0,
+            "14": 1132053504.0,
             "15": 1132053504.0,
+            "16": 1132053504.0,
+            "17": 1132053504.0,
+            "18": 1132053504.0,
+            "19": 1132053504.0,
             "20": 1132053504.0,
+            "21": 1132053504.0,
+            "22": 1132053504.0,
+            "23": 1132053504.0,
+            "24": 1132053504.0,
             "25": 1132053504.0,
+            "26": 1132053504.0,
+            "27": 1132053504.0,
+            "28": 1132053504.0,
+            "29": 1132053504.0,
             "30": 1132053504.0,
+            "31": 1132053504.0,
+            "32": 1132053504.0,
+            "33": 1132053504.0,
+            "34": 1132053504.0,
             "35": 1132053504.0,
+            "36": 1132053504.0,
+            "37": 1132053504.0,
+            "38": 1132053504.0,
+            "39": 1132053504.0,
             "40": 1132053504.0,
+            "41": 1132053504.0,
+            "42": 1132053504.0,
+            "43": 1132053504.0,
+            "44": 1132053504.0,
             "45": 1132053504.0,
+            "46": 1132053504.0,
+            "47": 1132053504.0,
+            "48": 1132053504.0,
+            "49": 1132053504.0,
             "50": 1132053504.0,
+            "51": 1132053504.0,
+            "52": 1132053504.0,
+            "53": 1132053504.0,
+            "54": 1132053504.0,
             "55": 1132053504.0,
+            "56": 1132053504.0,
+            "57": 1132053504.0,
+            "58": 1132053504.0,
+            "59": 1132053504.0,
             "60": 1132053504.0,
+            "61": 1132053504.0,
+            "62": 1132053504.0,
+            "63": 1132053504.0,
+            "64": 1132053504.0,
             "65": 1132053504.0,
+            "66": 1132053504.0,
+            "67": 1132053504.0,
+            "68": 1132053504.0,
+            "69": 1132053504.0,
             "70": 1132053504.0,
+            "71": 1132053504.0,
+            "72": 1132053504.0,
+            "73": 1132053504.0,
+            "74": 1132053504.0,
             "75": 1132053504.0,
+            "76": 1132053504.0,
+            "77": 1132053504.0,
+            "78": 1132053504.0,
+            "79": 1132053504.0,
             "80": 1132053504.0,
+            "81": 1132053504.0,
+            "82": 1132053504.0,
+            "83": 1132053504.0,
+            "84": 1132053504.0,
             "85": 1132053504.0,
+            "86": 1132053504.0,
+            "87": 1132053504.0,
+            "88": 1132053504.0,
+            "89": 1132053504.0,
             "90": 1132053504.0,
+            "91": 1132053504.0,
+            "92": 1132053504.0,
+            "93": 1132053504.0,
+            "94": 1132053504.0,
             "95": 1132053504.0,
+            "96": 1132053504.0,
+            "97": 1132053504.0,
+            "98": 1132053504.0,
+            "99": 1132053504.0,
             "100": 1132053504.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 1409266176.0,
+            "2": 1864166912.0,
+            "3": 1864166912.0,
+            "4": 1864166912.0,
             "5": 1864166912.0,
+            "6": 1864166912.0,
+            "7": 1864166912.0,
+            "8": 1864166912.0,
+            "9": 1864166912.0,
             "10": 1864166912.0,
+            "11": 1864166912.0,
+            "12": 1864166912.0,
+            "13": 1864166912.0,
+            "14": 1864166912.0,
             "15": 1864166912.0,
+            "16": 1864166912.0,
+            "17": 1864166912.0,
+            "18": 1864166912.0,
+            "19": 1864166912.0,
             "20": 1864166912.0,
+            "21": 1864166912.0,
+            "22": 1864166912.0,
+            "23": 1864166912.0,
+            "24": 1864166912.0,
             "25": 1864166912.0,
+            "26": 1864166912.0,
+            "27": 1864166912.0,
+            "28": 1864166912.0,
+            "29": 1864166912.0,
             "30": 1864166912.0,
+            "31": 1864166912.0,
+            "32": 1864166912.0,
+            "33": 1864166912.0,
+            "34": 1864166912.0,
             "35": 1864166912.0,
+            "36": 1864166912.0,
+            "37": 1864166912.0,
+            "38": 1864166912.0,
+            "39": 1864166912.0,
             "40": 1864166912.0,
+            "41": 1864166912.0,
+            "42": 1864166912.0,
+            "43": 1864166912.0,
+            "44": 1864166912.0,
             "45": 1864166912.0,
+            "46": 1864166912.0,
+            "47": 1864166912.0,
+            "48": 1864166912.0,
+            "49": 1864166912.0,
             "50": 1864166912.0,
+            "51": 1864166912.0,
+            "52": 1864166912.0,
+            "53": 1864166912.0,
+            "54": 1864166912.0,
             "55": 1864166912.0,
+            "56": 1864166912.0,
+            "57": 1864166912.0,
+            "58": 1864166912.0,
+            "59": 1864166912.0,
             "60": 1864166912.0,
+            "61": 1864166912.0,
+            "62": 1864166912.0,
+            "63": 1864166912.0,
+            "64": 1864166912.0,
             "65": 1864166912.0,
+            "66": 1864166912.0,
+            "67": 1864166912.0,
+            "68": 1864166912.0,
+            "69": 1864166912.0,
             "70": 1864166912.0,
+            "71": 1864166912.0,
+            "72": 1864166912.0,
+            "73": 1864166912.0,
+            "74": 1864166912.0,
             "75": 1864166912.0,
+            "76": 1864166912.0,
+            "77": 1864166912.0,
+            "78": 1864166912.0,
+            "79": 1864166912.0,
             "80": 1864166912.0,
+            "81": 1864166912.0,
+            "82": 1864166912.0,
+            "83": 1864166912.0,
+            "84": 1864166912.0,
             "85": 1864166912.0,
+            "86": 1864166912.0,
+            "87": 1864166912.0,
+            "88": 1864166912.0,
+            "89": 1864166912.0,
             "90": 1864166912.0,
+            "91": 1864166912.0,
+            "92": 1864166912.0,
+            "93": 1864166912.0,
+            "94": 1864166912.0,
             "95": 1864166912.0,
+            "96": 1864166912.0,
+            "97": 1864166912.0,
+            "98": 1864166912.0,
+            "99": 1864166912.0,
             "100": 1864166912.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 8.71448,
-            "5": 0.56781,
-            "10": 0.56843,
-            "15": 0.57548,
-            "20": 0.88447,
-            "25": 0.87922,
-            "30": 0.58734,
-            "35": 0.58492,
-            "40": 0.57893,
-            "45": 0.58782,
-            "50": 0.57316,
-            "55": 0.5549,
-            "60": 0.55728,
-            "65": 0.55905,
-            "70": 0.5662,
-            "75": 0.56127,
-            "80": 0.55317,
-            "85": 0.5553,
-            "90": 0.55754,
-            "95": 0.5596,
-            "100": 0.91445
+            "1": 9.67922,
+            "2": 0.68152,
+            "3": 0.65295,
+            "4": 0.64618,
+            "5": 0.65142,
+            "6": 0.64889,
+            "7": 0.65383,
+            "8": 0.6456,
+            "9": 0.66119,
+            "10": 0.65998,
+            "11": 0.6579,
+            "12": 0.65779,
+            "13": 0.6603,
+            "14": 0.65806,
+            "15": 1.0135,
+            "16": 0.65488,
+            "17": 0.931,
+            "18": 1.08662,
+            "19": 0.66372,
+            "20": 0.66034,
+            "21": 0.65544,
+            "22": 0.66308,
+            "23": 0.66077,
+            "24": 1.04108,
+            "25": 0.6666,
+            "26": 0.97428,
+            "27": 0.65856,
+            "28": 0.66326,
+            "29": 0.65747,
+            "30": 0.6582,
+            "31": 1.10061,
+            "32": 1.04733,
+            "33": 0.65682,
+            "34": 0.65788,
+            "35": 0.66349,
+            "36": 0.65804,
+            "37": 0.66396,
+            "38": 0.65876,
+            "39": 0.65606,
+            "40": 0.6586,
+            "41": 0.65742,
+            "42": 0.66367,
+            "43": 0.66411,
+            "44": 0.65879,
+            "45": 0.66227,
+            "46": 0.66361,
+            "47": 0.66004,
+            "48": 0.6614,
+            "49": 0.65707,
+            "50": 0.65748,
+            "51": 0.66048,
+            "52": 0.65517,
+            "53": 0.65236,
+            "54": 0.6505,
+            "55": 0.65061,
+            "56": 0.65419,
+            "57": 0.64612,
+            "58": 0.6508,
+            "59": 0.64828,
+            "60": 0.64805,
+            "61": 0.99903,
+            "62": 0.6529,
+            "63": 0.65264,
+            "64": 0.64941,
+            "65": 0.65259,
+            "66": 0.64896,
+            "67": 0.64907,
+            "68": 0.65692,
+            "69": 0.64922,
+            "70": 0.65143,
+            "71": 0.64786,
+            "72": 0.6595,
+            "73": 0.65025,
+            "74": 0.64993,
+            "75": 0.64539,
+            "76": 0.65147,
+            "77": 0.65111,
+            "78": 0.64894,
+            "79": 0.65192,
+            "80": 0.94887,
+            "81": 0.64772,
+            "82": 0.64406,
+            "83": 0.64869,
+            "84": 0.95425,
+            "85": 0.64926,
+            "86": 0.64526,
+            "87": 0.64401,
+            "88": 0.95609,
+            "89": 0.64807,
+            "90": 0.64544,
+            "91": 0.9603,
+            "92": 0.64218,
+            "93": 0.64853,
+            "94": 0.64394,
+            "95": 1.01268,
+            "96": 1.05755,
+            "97": 0.65312,
+            "98": 0.65341,
+            "99": 0.65751,
+            "100": 0.64782
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..47d23248800
--- /dev/null
+++ b/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.34904,
+            "2": 10.34488,
+            "3": 9.79407,
+            "4": 9.59568,
+            "5": 9.42065,
+            "6": 9.41856,
+            "7": 9.28073,
+            "8": 9.18973,
+            "9": 9.06584,
+            "10": 9.00206,
+            "11": 8.81497,
+            "12": 8.78107,
+            "13": 8.82506,
+            "14": 8.6728,
+            "15": 8.6368,
+            "16": 8.51926,
+            "17": 8.45732,
+            "18": 8.37037,
+            "19": 8.36068,
+            "20": 8.25456,
+            "21": 8.24268,
+            "22": 8.13404,
+            "23": 8.06818,
+            "24": 8.11464,
+            "25": 7.95146,
+            "26": 8.08186,
+            "27": 7.86814,
+            "28": 7.94027,
+            "29": 7.77604,
+            "30": 7.84595,
+            "31": 7.81568,
+            "32": 7.65964,
+            "33": 7.77905,
+            "34": 7.53277,
+            "35": 7.6586,
+            "36": 7.51541,
+            "37": 7.44748,
+            "38": 7.4824,
+            "39": 7.46523,
+            "40": 7.49146,
+            "41": 7.40822,
+            "42": 7.35649,
+            "43": 7.43806,
+            "44": 7.35517,
+            "45": 7.35103,
+            "46": 7.27859,
+            "47": 7.44152,
+            "48": 7.2683,
+            "49": 7.32389,
+            "50": 7.14549,
+            "51": 7.36541,
+            "52": 7.12192,
+            "53": 7.09189,
+            "54": 7.22759,
+            "55": 7.13584,
+            "56": 7.20822,
+            "57": 7.31316,
+            "58": 6.99088,
+            "59": 7.09934,
+            "60": 7.12683,
+            "61": 7.1014,
+            "62": 7.23954,
+            "63": 7.14417,
+            "64": 7.06836,
+            "65": 6.98412,
+            "66": 7.03768,
+            "67": 7.02847,
+            "68": 7.1299,
+            "69": 7.01456,
+            "70": 7.04997,
+            "71": 6.89408,
+            "72": 6.98553,
+            "73": 6.96694,
+            "74": 6.90297,
+            "75": 7.0574,
+            "76": 6.9581,
+            "77": 7.06903,
+            "78": 7.02133,
+            "79": 6.8504,
+            "80": 6.91935,
+            "81": 6.95874,
+            "82": 7.04745,
+            "83": 6.98522,
+            "84": 6.99712,
+            "85": 6.83565,
+            "86": 7.04156,
+            "87": 6.96476,
+            "88": 6.89883,
+            "89": 6.80051,
+            "90": 7.22593,
+            "91": 6.70562,
+            "92": 7.0381,
+            "93": 6.88685,
+            "94": 7.03908,
+            "95": 6.84815,
+            "96": 6.95281,
+            "97": 6.94344,
+            "98": 6.86987,
+            "99": 6.99502,
+            "100": 6.96683
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 43274.0,
+            "2": 44071.0,
+            "3": 44760.0,
+            "4": 42385.0,
+            "5": 45378.0,
+            "6": 40938.0,
+            "7": 43150.0,
+            "8": 45450.0,
+            "9": 42428.0,
+            "10": 45373.0,
+            "11": 43974.0,
+            "12": 44591.0,
+            "13": 43897.0,
+            "14": 46204.0,
+            "15": 43924.0,
+            "16": 41613.0,
+            "17": 43852.0,
+            "18": 44669.0,
+            "19": 42579.0,
+            "20": 44769.0,
+            "21": 44761.0,
+            "22": 41873.0,
+            "23": 45441.0,
+            "24": 43081.0,
+            "25": 42452.0,
+            "26": 43947.0,
+            "27": 46247.0,
+            "28": 46419.0,
+            "29": 46169.0,
+            "30": 44035.0,
+            "31": 41152.0,
+            "32": 43347.0,
+            "33": 45435.0,
+            "34": 43300.0,
+            "35": 43284.0,
+            "36": 42483.0,
+            "37": 40070.0,
+            "38": 42561.0,
+            "39": 44706.0,
+            "40": 43260.0,
+            "41": 44642.0,
+            "42": 43192.0,
+            "43": 45439.0,
+            "44": 44588.0,
+            "45": 43274.0,
+            "46": 43921.0,
+            "47": 42364.0,
+            "48": 44740.0,
+            "49": 43152.0,
+            "50": 43348.0,
+            "51": 41112.0,
+            "52": 43837.0,
+            "53": 43913.0,
+            "54": 41704.0,
+            "55": 43870.0,
+            "56": 43209.0,
+            "57": 42636.0,
+            "58": 43841.0,
+            "59": 44630.0,
+            "60": 41219.0,
+            "61": 39702.0,
+            "62": 44739.0,
+            "63": 44651.0,
+            "64": 45372.0,
+            "65": 44682.0,
+            "66": 45351.0,
+            "67": 43174.0,
+            "68": 42502.0,
+            "69": 43834.0,
+            "70": 45514.0,
+            "71": 43291.0,
+            "72": 44767.0,
+            "73": 45384.0,
+            "74": 42457.0,
+            "75": 44673.0,
+            "76": 43876.0,
+            "77": 42026.0,
+            "78": 40350.0,
+            "79": 38918.0,
+            "80": 41092.0,
+            "81": 45364.0,
+            "82": 43198.0,
+            "83": 38467.0,
+            "84": 42477.0,
+            "85": 43981.0,
+            "86": 45667.0,
+            "87": 40863.0,
+            "88": 41772.0,
+            "89": 41104.0,
+            "90": 44669.0,
+            "91": 46134.0,
+            "92": 41634.0,
+            "93": 43241.0,
+            "94": 39538.0,
+            "95": 43915.0,
+            "96": 44683.0,
+            "97": 45405.0,
+            "98": 41791.0,
+            "99": 45414.0,
+            "100": 42458.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1132053504.0,
+            "2": 1132053504.0,
+            "3": 1132053504.0,
+            "4": 1132053504.0,
+            "5": 1132053504.0,
+            "6": 1132053504.0,
+            "7": 1132053504.0,
+            "8": 1132053504.0,
+            "9": 1132053504.0,
+            "10": 1132053504.0,
+            "11": 1132053504.0,
+            "12": 1132053504.0,
+            "13": 1132053504.0,
+            "14": 1132053504.0,
+            "15": 1132053504.0,
+            "16": 1132053504.0,
+            "17": 1132053504.0,
+            "18": 1132053504.0,
+            "19": 1132053504.0,
+            "20": 1132053504.0,
+            "21": 1132053504.0,
+            "22": 1132053504.0,
+            "23": 1132053504.0,
+            "24": 1132053504.0,
+            "25": 1132053504.0,
+            "26": 1132053504.0,
+            "27": 1132053504.0,
+            "28": 1132053504.0,
+            "29": 1132053504.0,
+            "30": 1132053504.0,
+            "31": 1132053504.0,
+            "32": 1132053504.0,
+            "33": 1132053504.0,
+            "34": 1132053504.0,
+            "35": 1132053504.0,
+            "36": 1132053504.0,
+            "37": 1132053504.0,
+            "38": 1132053504.0,
+            "39": 1132053504.0,
+            "40": 1132053504.0,
+            "41": 1132053504.0,
+            "42": 1132053504.0,
+            "43": 1132053504.0,
+            "44": 1132053504.0,
+            "45": 1132053504.0,
+            "46": 1132053504.0,
+            "47": 1132053504.0,
+            "48": 1132053504.0,
+            "49": 1132053504.0,
+            "50": 1132053504.0,
+            "51": 1132053504.0,
+            "52": 1132053504.0,
+            "53": 1132053504.0,
+            "54": 1132053504.0,
+            "55": 1132053504.0,
+            "56": 1132053504.0,
+            "57": 1132053504.0,
+            "58": 1132053504.0,
+            "59": 1132053504.0,
+            "60": 1132053504.0,
+            "61": 1132053504.0,
+            "62": 1132053504.0,
+            "63": 1132053504.0,
+            "64": 1132053504.0,
+            "65": 1132053504.0,
+            "66": 1132053504.0,
+            "67": 1132053504.0,
+            "68": 1132053504.0,
+            "69": 1132053504.0,
+            "70": 1132053504.0,
+            "71": 1132053504.0,
+            "72": 1132053504.0,
+            "73": 1132053504.0,
+            "74": 1132053504.0,
+            "75": 1132053504.0,
+            "76": 1132053504.0,
+            "77": 1132053504.0,
+            "78": 1132053504.0,
+            "79": 1132053504.0,
+            "80": 1132053504.0,
+            "81": 1132053504.0,
+            "82": 1132053504.0,
+            "83": 1132053504.0,
+            "84": 1132053504.0,
+            "85": 1132053504.0,
+            "86": 1132053504.0,
+            "87": 1132053504.0,
+            "88": 1132053504.0,
+            "89": 1132053504.0,
+            "90": 1132053504.0,
+            "91": 1132053504.0,
+            "92": 1132053504.0,
+            "93": 1132053504.0,
+            "94": 1132053504.0,
+            "95": 1132053504.0,
+            "96": 1132053504.0,
+            "97": 1132053504.0,
+            "98": 1132053504.0,
+            "99": 1132053504.0,
+            "100": 1132053504.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1409266176.0,
+            "2": 1864166912.0,
+            "3": 1864166912.0,
+            "4": 1864166912.0,
+            "5": 1864166912.0,
+            "6": 1864166912.0,
+            "7": 1864166912.0,
+            "8": 1864166912.0,
+            "9": 1864166912.0,
+            "10": 1864166912.0,
+            "11": 1864166912.0,
+            "12": 1864166912.0,
+            "13": 1864166912.0,
+            "14": 1864166912.0,
+            "15": 1864166912.0,
+            "16": 1864166912.0,
+            "17": 1864166912.0,
+            "18": 1864166912.0,
+            "19": 1864166912.0,
+            "20": 1864166912.0,
+            "21": 1864166912.0,
+            "22": 1864166912.0,
+            "23": 1864166912.0,
+            "24": 1864166912.0,
+            "25": 1864166912.0,
+            "26": 1864166912.0,
+            "27": 1864166912.0,
+            "28": 1864166912.0,
+            "29": 1864166912.0,
+            "30": 1864166912.0,
+            "31": 1864166912.0,
+            "32": 1864166912.0,
+            "33": 1864166912.0,
+            "34": 1864166912.0,
+            "35": 1864166912.0,
+            "36": 1864166912.0,
+            "37": 1864166912.0,
+            "38": 1864166912.0,
+            "39": 1864166912.0,
+            "40": 1864166912.0,
+            "41": 1864166912.0,
+            "42": 1864166912.0,
+            "43": 1864166912.0,
+            "44": 1864166912.0,
+            "45": 1864166912.0,
+            "46": 1864166912.0,
+            "47": 1864166912.0,
+            "48": 1864166912.0,
+            "49": 1864166912.0,
+            "50": 1864166912.0,
+            "51": 1864166912.0,
+            "52": 1864166912.0,
+            "53": 1864166912.0,
+            "54": 1864166912.0,
+            "55": 1864166912.0,
+            "56": 1864166912.0,
+            "57": 1864166912.0,
+            "58": 1864166912.0,
+            "59": 1864166912.0,
+            "60": 1864166912.0,
+            "61": 1864166912.0,
+            "62": 1864166912.0,
+            "63": 1864166912.0,
+            "64": 1864166912.0,
+            "65": 1864166912.0,
+            "66": 1864166912.0,
+            "67": 1864166912.0,
+            "68": 1864166912.0,
+            "69": 1864166912.0,
+            "70": 1864166912.0,
+            "71": 1864166912.0,
+            "72": 1864166912.0,
+            "73": 1864166912.0,
+            "74": 1864166912.0,
+            "75": 1864166912.0,
+            "76": 1864166912.0,
+            "77": 1864166912.0,
+            "78": 1864166912.0,
+            "79": 1864166912.0,
+            "80": 1864166912.0,
+            "81": 1864166912.0,
+            "82": 1864166912.0,
+            "83": 1864166912.0,
+            "84": 1864166912.0,
+            "85": 1864166912.0,
+            "86": 1864166912.0,
+            "87": 1864166912.0,
+            "88": 1864166912.0,
+            "89": 1864166912.0,
+            "90": 1864166912.0,
+            "91": 1864166912.0,
+            "92": 1864166912.0,
+            "93": 1864166912.0,
+            "94": 1864166912.0,
+            "95": 1864166912.0,
+            "96": 1864166912.0,
+            "97": 1864166912.0,
+            "98": 1864166912.0,
+            "99": 1864166912.0,
+            "100": 1864166912.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.0714,
+            "2": 0.65344,
+            "3": 0.61776,
+            "4": 0.55941,
+            "5": 0.56517,
+            "6": 0.55953,
+            "7": 0.56488,
+            "8": 0.56168,
+            "9": 0.55963,
+            "10": 0.56502,
+            "11": 0.56812,
+            "12": 0.58499,
+            "13": 0.58777,
+            "14": 0.56659,
+            "15": 0.55908,
+            "16": 0.56702,
+            "17": 0.56652,
+            "18": 0.56368,
+            "19": 0.57588,
+            "20": 0.57328,
+            "21": 0.57961,
+            "22": 0.56693,
+            "23": 0.87697,
+            "24": 0.56276,
+            "25": 0.56409,
+            "26": 0.89777,
+            "27": 0.89041,
+            "28": 0.56631,
+            "29": 0.5637,
+            "30": 0.56457,
+            "31": 0.56285,
+            "32": 0.56729,
+            "33": 1.2087,
+            "34": 1.26391,
+            "35": 0.57364,
+            "36": 0.56616,
+            "37": 0.56143,
+            "38": 0.56332,
+            "39": 0.56267,
+            "40": 0.56706,
+            "41": 0.56887,
+            "42": 0.5604,
+            "43": 0.56419,
+            "44": 0.55389,
+            "45": 0.55665,
+            "46": 0.56256,
+            "47": 0.5757,
+            "48": 0.62949,
+            "49": 0.55714,
+            "50": 0.55326,
+            "51": 0.56303,
+            "52": 0.56765,
+            "53": 0.56019,
+            "54": 0.56447,
+            "55": 0.56674,
+            "56": 0.55563,
+            "57": 0.55623,
+            "58": 0.55651,
+            "59": 0.55616,
+            "60": 0.55374,
+            "61": 0.55657,
+            "62": 0.55473,
+            "63": 0.56052,
+            "64": 0.55785,
+            "65": 0.55653,
+            "66": 0.56406,
+            "67": 0.56415,
+            "68": 0.56582,
+            "69": 0.55566,
+            "70": 0.555,
+            "71": 0.55709,
+            "72": 0.56314,
+            "73": 0.55571,
+            "74": 0.55495,
+            "75": 0.56028,
+            "76": 0.88389,
+            "77": 0.56277,
+            "78": 0.56491,
+            "79": 0.57616,
+            "80": 0.58894,
+            "81": 0.56216,
+            "82": 0.56187,
+            "83": 0.56108,
+            "84": 0.56853,
+            "85": 0.55814,
+            "86": 0.56093,
+            "87": 0.56078,
+            "88": 0.913,
+            "89": 0.55681,
+            "90": 0.55754,
+            "91": 0.56679,
+            "92": 0.55927,
+            "93": 0.89203,
+            "94": 0.56272,
+            "95": 0.55822,
+            "96": 0.56068,
+            "97": 0.91075,
+            "98": 0.56624,
+            "99": 0.92145,
+            "100": 0.88359
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..11ef3fbd8c5
--- /dev/null
+++ b/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.34904,
+            "2": 10.34488,
+            "3": 9.79407,
+            "4": 9.59568,
+            "5": 9.42065,
+            "6": 9.41856,
+            "7": 9.28073,
+            "8": 9.18973,
+            "9": 9.06584,
+            "10": 9.00206,
+            "11": 8.81497,
+            "12": 8.78107,
+            "13": 8.82506,
+            "14": 8.6728,
+            "15": 8.6368,
+            "16": 8.51926,
+            "17": 8.45732,
+            "18": 8.37037,
+            "19": 8.36068,
+            "20": 8.25456,
+            "21": 8.24268,
+            "22": 8.13404,
+            "23": 8.06818,
+            "24": 8.11464,
+            "25": 7.95146,
+            "26": 8.08186,
+            "27": 7.86814,
+            "28": 7.94027,
+            "29": 7.77604,
+            "30": 7.84595,
+            "31": 7.81568,
+            "32": 7.65964,
+            "33": 7.77905,
+            "34": 7.53277,
+            "35": 7.6586,
+            "36": 7.51541,
+            "37": 7.44748,
+            "38": 7.4824,
+            "39": 7.46523,
+            "40": 7.49146,
+            "41": 7.40822,
+            "42": 7.35649,
+            "43": 7.43806,
+            "44": 7.35517,
+            "45": 7.35103,
+            "46": 7.27859,
+            "47": 7.44152,
+            "48": 7.2683,
+            "49": 7.32389,
+            "50": 7.14549,
+            "51": 7.36541,
+            "52": 7.12192,
+            "53": 7.09189,
+            "54": 7.22759,
+            "55": 7.13584,
+            "56": 7.20822,
+            "57": 7.31316,
+            "58": 6.99088,
+            "59": 7.09934,
+            "60": 7.12683,
+            "61": 7.1014,
+            "62": 7.23954,
+            "63": 7.14417,
+            "64": 7.06836,
+            "65": 6.98412,
+            "66": 7.03768,
+            "67": 7.02847,
+            "68": 7.1299,
+            "69": 7.01456,
+            "70": 7.04997,
+            "71": 6.89408,
+            "72": 6.98553,
+            "73": 6.96694,
+            "74": 6.90297,
+            "75": 7.0574,
+            "76": 6.9581,
+            "77": 7.06903,
+            "78": 7.02133,
+            "79": 6.8504,
+            "80": 6.91935,
+            "81": 6.95874,
+            "82": 7.04745,
+            "83": 6.98522,
+            "84": 6.99712,
+            "85": 6.83565,
+            "86": 7.04156,
+            "87": 6.96476,
+            "88": 6.89883,
+            "89": 6.80051,
+            "90": 7.22593,
+            "91": 6.70562,
+            "92": 7.0381,
+            "93": 6.88685,
+            "94": 7.03908,
+            "95": 6.84815,
+            "96": 6.95281,
+            "97": 6.94344,
+            "98": 6.86987,
+            "99": 6.99502,
+            "100": 6.96683
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 43274.0,
+            "2": 44071.0,
+            "3": 44760.0,
+            "4": 42385.0,
+            "5": 45378.0,
+            "6": 40938.0,
+            "7": 43150.0,
+            "8": 45450.0,
+            "9": 42428.0,
+            "10": 45373.0,
+            "11": 43974.0,
+            "12": 44591.0,
+            "13": 43897.0,
+            "14": 46204.0,
+            "15": 43924.0,
+            "16": 41613.0,
+            "17": 43852.0,
+            "18": 44669.0,
+            "19": 42579.0,
+            "20": 44769.0,
+            "21": 44761.0,
+            "22": 41873.0,
+            "23": 45441.0,
+            "24": 43081.0,
+            "25": 42452.0,
+            "26": 43947.0,
+            "27": 46247.0,
+            "28": 46419.0,
+            "29": 46169.0,
+            "30": 44035.0,
+            "31": 41152.0,
+            "32": 43347.0,
+            "33": 45435.0,
+            "34": 43300.0,
+            "35": 43284.0,
+            "36": 42483.0,
+            "37": 40070.0,
+            "38": 42561.0,
+            "39": 44706.0,
+            "40": 43260.0,
+            "41": 44642.0,
+            "42": 43192.0,
+            "43": 45439.0,
+            "44": 44588.0,
+            "45": 43274.0,
+            "46": 43921.0,
+            "47": 42364.0,
+            "48": 44740.0,
+            "49": 43152.0,
+            "50": 43348.0,
+            "51": 41112.0,
+            "52": 43837.0,
+            "53": 43913.0,
+            "54": 41704.0,
+            "55": 43870.0,
+            "56": 43209.0,
+            "57": 42636.0,
+            "58": 43841.0,
+            "59": 44630.0,
+            "60": 41219.0,
+            "61": 39702.0,
+            "62": 44739.0,
+            "63": 44651.0,
+            "64": 45372.0,
+            "65": 44682.0,
+            "66": 45351.0,
+            "67": 43174.0,
+            "68": 42502.0,
+            "69": 43834.0,
+            "70": 45514.0,
+            "71": 43291.0,
+            "72": 44767.0,
+            "73": 45384.0,
+            "74": 42457.0,
+            "75": 44673.0,
+            "76": 43876.0,
+            "77": 42026.0,
+            "78": 40350.0,
+            "79": 38918.0,
+            "80": 41092.0,
+            "81": 45364.0,
+            "82": 43198.0,
+            "83": 38467.0,
+            "84": 42477.0,
+            "85": 43981.0,
+            "86": 45667.0,
+            "87": 40863.0,
+            "88": 41772.0,
+            "89": 41104.0,
+            "90": 44669.0,
+            "91": 46134.0,
+            "92": 41634.0,
+            "93": 43241.0,
+            "94": 39538.0,
+            "95": 43915.0,
+            "96": 44683.0,
+            "97": 45405.0,
+            "98": 41791.0,
+            "99": 45414.0,
+            "100": 42458.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1132053504.0,
+            "2": 1132053504.0,
+            "3": 1132053504.0,
+            "4": 1132053504.0,
+            "5": 1132053504.0,
+            "6": 1132053504.0,
+            "7": 1132053504.0,
+            "8": 1132053504.0,
+            "9": 1132053504.0,
+            "10": 1132053504.0,
+            "11": 1132053504.0,
+            "12": 1132053504.0,
+            "13": 1132053504.0,
+            "14": 1132053504.0,
+            "15": 1132053504.0,
+            "16": 1132053504.0,
+            "17": 1132053504.0,
+            "18": 1132053504.0,
+            "19": 1132053504.0,
+            "20": 1132053504.0,
+            "21": 1132053504.0,
+            "22": 1132053504.0,
+            "23": 1132053504.0,
+            "24": 1132053504.0,
+            "25": 1132053504.0,
+            "26": 1132053504.0,
+            "27": 1132053504.0,
+            "28": 1132053504.0,
+            "29": 1132053504.0,
+            "30": 1132053504.0,
+            "31": 1132053504.0,
+            "32": 1132053504.0,
+            "33": 1132053504.0,
+            "34": 1132053504.0,
+            "35": 1132053504.0,
+            "36": 1132053504.0,
+            "37": 1132053504.0,
+            "38": 1132053504.0,
+            "39": 1132053504.0,
+            "40": 1132053504.0,
+            "41": 1132053504.0,
+            "42": 1132053504.0,
+            "43": 1132053504.0,
+            "44": 1132053504.0,
+            "45": 1132053504.0,
+            "46": 1132053504.0,
+            "47": 1132053504.0,
+            "48": 1132053504.0,
+            "49": 1132053504.0,
+            "50": 1132053504.0,
+            "51": 1132053504.0,
+            "52": 1132053504.0,
+            "53": 1132053504.0,
+            "54": 1132053504.0,
+            "55": 1132053504.0,
+            "56": 1132053504.0,
+            "57": 1132053504.0,
+            "58": 1132053504.0,
+            "59": 1132053504.0,
+            "60": 1132053504.0,
+            "61": 1132053504.0,
+            "62": 1132053504.0,
+            "63": 1132053504.0,
+            "64": 1132053504.0,
+            "65": 1132053504.0,
+            "66": 1132053504.0,
+            "67": 1132053504.0,
+            "68": 1132053504.0,
+            "69": 1132053504.0,
+            "70": 1132053504.0,
+            "71": 1132053504.0,
+            "72": 1132053504.0,
+            "73": 1132053504.0,
+            "74": 1132053504.0,
+            "75": 1132053504.0,
+            "76": 1132053504.0,
+            "77": 1132053504.0,
+            "78": 1132053504.0,
+            "79": 1132053504.0,
+            "80": 1132053504.0,
+            "81": 1132053504.0,
+            "82": 1132053504.0,
+            "83": 1132053504.0,
+            "84": 1132053504.0,
+            "85": 1132053504.0,
+            "86": 1132053504.0,
+            "87": 1132053504.0,
+            "88": 1132053504.0,
+            "89": 1132053504.0,
+            "90": 1132053504.0,
+            "91": 1132053504.0,
+            "92": 1132053504.0,
+            "93": 1132053504.0,
+            "94": 1132053504.0,
+            "95": 1132053504.0,
+            "96": 1132053504.0,
+            "97": 1132053504.0,
+            "98": 1132053504.0,
+            "99": 1132053504.0,
+            "100": 1132053504.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1409266176.0,
+            "2": 1864166912.0,
+            "3": 1864166912.0,
+            "4": 1864166912.0,
+            "5": 1864166912.0,
+            "6": 1864166912.0,
+            "7": 1864166912.0,
+            "8": 1864166912.0,
+            "9": 1864166912.0,
+            "10": 1864166912.0,
+            "11": 1864166912.0,
+            "12": 1864166912.0,
+            "13": 1864166912.0,
+            "14": 1864166912.0,
+            "15": 1864166912.0,
+            "16": 1864166912.0,
+            "17": 1864166912.0,
+            "18": 1864166912.0,
+            "19": 1864166912.0,
+            "20": 1864166912.0,
+            "21": 1864166912.0,
+            "22": 1864166912.0,
+            "23": 1864166912.0,
+            "24": 1864166912.0,
+            "25": 1864166912.0,
+            "26": 1864166912.0,
+            "27": 1864166912.0,
+            "28": 1864166912.0,
+            "29": 1864166912.0,
+            "30": 1864166912.0,
+            "31": 1864166912.0,
+            "32": 1864166912.0,
+            "33": 1864166912.0,
+            "34": 1864166912.0,
+            "35": 1864166912.0,
+            "36": 1864166912.0,
+            "37": 1864166912.0,
+            "38": 1864166912.0,
+            "39": 1864166912.0,
+            "40": 1864166912.0,
+            "41": 1864166912.0,
+            "42": 1864166912.0,
+            "43": 1864166912.0,
+            "44": 1864166912.0,
+            "45": 1864166912.0,
+            "46": 1864166912.0,
+            "47": 1864166912.0,
+            "48": 1864166912.0,
+            "49": 1864166912.0,
+            "50": 1864166912.0,
+            "51": 1864166912.0,
+            "52": 1864166912.0,
+            "53": 1864166912.0,
+            "54": 1864166912.0,
+            "55": 1864166912.0,
+            "56": 1864166912.0,
+            "57": 1864166912.0,
+            "58": 1864166912.0,
+            "59": 1864166912.0,
+            "60": 1864166912.0,
+            "61": 1864166912.0,
+            "62": 1864166912.0,
+            "63": 1864166912.0,
+            "64": 1864166912.0,
+            "65": 1864166912.0,
+            "66": 1864166912.0,
+            "67": 1864166912.0,
+            "68": 1864166912.0,
+            "69": 1864166912.0,
+            "70": 1864166912.0,
+            "71": 1864166912.0,
+            "72": 1864166912.0,
+            "73": 1864166912.0,
+            "74": 1864166912.0,
+            "75": 1864166912.0,
+            "76": 1864166912.0,
+            "77": 1864166912.0,
+            "78": 1864166912.0,
+            "79": 1864166912.0,
+            "80": 1864166912.0,
+            "81": 1864166912.0,
+            "82": 1864166912.0,
+            "83": 1864166912.0,
+            "84": 1864166912.0,
+            "85": 1864166912.0,
+            "86": 1864166912.0,
+            "87": 1864166912.0,
+            "88": 1864166912.0,
+            "89": 1864166912.0,
+            "90": 1864166912.0,
+            "91": 1864166912.0,
+            "92": 1864166912.0,
+            "93": 1864166912.0,
+            "94": 1864166912.0,
+            "95": 1864166912.0,
+            "96": 1864166912.0,
+            "97": 1864166912.0,
+            "98": 1864166912.0,
+            "99": 1864166912.0,
+            "100": 1864166912.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 9.74091,
+            "2": 0.66943,
+            "3": 0.64954,
+            "4": 0.64695,
+            "5": 0.65419,
+            "6": 0.6513,
+            "7": 0.64556,
+            "8": 0.6385,
+            "9": 0.64307,
+            "10": 0.63679,
+            "11": 0.64386,
+            "12": 0.64012,
+            "13": 0.63889,
+            "14": 0.63958,
+            "15": 0.64024,
+            "16": 0.63721,
+            "17": 0.6492,
+            "18": 0.65247,
+            "19": 0.64523,
+            "20": 1.0041,
+            "21": 0.64739,
+            "22": 1.02158,
+            "23": 0.96313,
+            "24": 0.64631,
+            "25": 0.64337,
+            "26": 0.64702,
+            "27": 0.64516,
+            "28": 0.64748,
+            "29": 0.64657,
+            "30": 0.95958,
+            "31": 1.05772,
+            "32": 0.64319,
+            "33": 0.64455,
+            "34": 0.64044,
+            "35": 0.6445,
+            "36": 0.64649,
+            "37": 0.64593,
+            "38": 0.64912,
+            "39": 0.64665,
+            "40": 0.64585,
+            "41": 0.64603,
+            "42": 0.64765,
+            "43": 0.64548,
+            "44": 0.64732,
+            "45": 0.64996,
+            "46": 0.65909,
+            "47": 0.66335,
+            "48": 0.64625,
+            "49": 0.64641,
+            "50": 0.64822,
+            "51": 0.65982,
+            "52": 0.64882,
+            "53": 0.64892,
+            "54": 0.64636,
+            "55": 0.64591,
+            "56": 0.65232,
+            "57": 0.64591,
+            "58": 0.64572,
+            "59": 0.64949,
+            "60": 0.64277,
+            "61": 0.64766,
+            "62": 0.64726,
+            "63": 0.64637,
+            "64": 0.64901,
+            "65": 0.6476,
+            "66": 0.64458,
+            "67": 0.64951,
+            "68": 0.64438,
+            "69": 0.64854,
+            "70": 0.65268,
+            "71": 0.64762,
+            "72": 1.02587,
+            "73": 0.65274,
+            "74": 0.65942,
+            "75": 0.65091,
+            "76": 0.65181,
+            "77": 0.65582,
+            "78": 0.64434,
+            "79": 0.65116,
+            "80": 0.65073,
+            "81": 0.64645,
+            "82": 0.65405,
+            "83": 0.65107,
+            "84": 0.64883,
+            "85": 0.94272,
+            "86": 0.65641,
+            "87": 0.99204,
+            "88": 0.96199,
+            "89": 0.64856,
+            "90": 0.65165,
+            "91": 0.65163,
+            "92": 0.6506,
+            "93": 0.64828,
+            "94": 0.64682,
+            "95": 1.01586,
+            "96": 1.04151,
+            "97": 0.65481,
+            "98": 0.64703,
+            "99": 0.64964,
+            "100": 0.65343
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
index fca23f6593f..702c35ca9af 100644
--- a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
@@ -2,141 +2,536 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 10.34897,
+            "2": 10.34482,
+            "3": 9.79428,
+            "4": 9.59585,
             "5": 9.42074,
+            "6": 9.41847,
+            "7": 9.28062,
+            "8": 9.18972,
+            "9": 9.06519,
             "10": 9.00183,
+            "11": 8.81475,
+            "12": 8.7808,
+            "13": 8.82493,
+            "14": 8.67261,
             "15": 8.6364,
+            "16": 8.51896,
+            "17": 8.45704,
+            "18": 8.37007,
+            "19": 8.36039,
             "20": 8.25417,
+            "21": 8.2421,
+            "22": 8.13324,
+            "23": 8.06764,
+            "24": 8.1142,
             "25": 7.95082,
+            "26": 8.08156,
+            "27": 7.86764,
+            "28": 7.93993,
+            "29": 7.77566,
             "30": 7.84559,
+            "31": 7.8152,
+            "32": 7.65941,
+            "33": 7.77856,
+            "34": 7.53188,
             "35": 7.65804,
+            "36": 7.51464,
+            "37": 7.44686,
+            "38": 7.48161,
+            "39": 7.46435,
             "40": 7.49084,
+            "41": 7.40827,
+            "42": 7.35625,
+            "43": 7.43764,
+            "44": 7.35439,
             "45": 7.35042,
+            "46": 7.27853,
+            "47": 7.4405,
+            "48": 7.26763,
+            "49": 7.32341,
             "50": 7.14486,
+            "51": 7.36469,
+            "52": 7.12044,
+            "53": 7.09167,
+            "54": 7.22712,
             "55": 7.13495,
+            "56": 7.20751,
+            "57": 7.31287,
+            "58": 6.99063,
+            "59": 7.09849,
             "60": 7.12665,
+            "61": 7.10047,
+            "62": 7.23974,
+            "63": 7.14358,
+            "64": 7.06717,
             "65": 6.98408,
+            "66": 7.03692,
+            "67": 7.02875,
+            "68": 7.12914,
+            "69": 7.01425,
             "70": 7.04954,
+            "71": 6.89312,
+            "72": 6.98513,
+            "73": 6.96734,
+            "74": 6.90236,
             "75": 7.05611,
+            "76": 6.95986,
+            "77": 7.06862,
+            "78": 7.0204,
+            "79": 6.8505,
             "80": 6.92019,
+            "81": 6.95982,
+            "82": 7.04575,
+            "83": 6.98617,
+            "84": 6.99991,
             "85": 6.83511,
+            "86": 7.04087,
+            "87": 6.96604,
+            "88": 6.90125,
+            "89": 6.80345,
             "90": 7.22384,
+            "91": 6.70505,
+            "92": 7.03979,
+            "93": 6.8857,
+            "94": 7.04044,
             "95": 6.84746,
+            "96": 6.9546,
+            "97": 6.94425,
+            "98": 6.86865,
+            "99": 6.9948,
             "100": 6.96761
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 43289.0,
+            "2": 44062.0,
+            "3": 44747.0,
+            "4": 42377.0,
             "5": 45372.0,
+            "6": 40957.0,
+            "7": 43147.0,
+            "8": 45474.0,
+            "9": 42425.0,
             "10": 45380.0,
+            "11": 43984.0,
+            "12": 44594.0,
+            "13": 43914.0,
+            "14": 46203.0,
             "15": 43914.0,
+            "16": 41632.0,
+            "17": 43870.0,
+            "18": 44691.0,
+            "19": 42574.0,
             "20": 44769.0,
+            "21": 44757.0,
+            "22": 41854.0,
+            "23": 45440.0,
+            "24": 43066.0,
             "25": 42458.0,
+            "26": 43949.0,
+            "27": 46224.0,
+            "28": 46395.0,
+            "29": 46168.0,
             "30": 44028.0,
+            "31": 41131.0,
+            "32": 43348.0,
+            "33": 45441.0,
+            "34": 43316.0,
             "35": 43258.0,
+            "36": 42459.0,
+            "37": 40074.0,
+            "38": 42544.0,
+            "39": 44707.0,
             "40": 43237.0,
+            "41": 44652.0,
+            "42": 43196.0,
+            "43": 45435.0,
+            "44": 44591.0,
             "45": 43263.0,
+            "46": 43930.0,
+            "47": 42373.0,
+            "48": 44713.0,
+            "49": 43128.0,
             "50": 43361.0,
+            "51": 41133.0,
+            "52": 43849.0,
+            "53": 43899.0,
+            "54": 41704.0,
             "55": 43863.0,
+            "56": 43205.0,
+            "57": 42636.0,
+            "58": 43835.0,
+            "59": 44623.0,
             "60": 41226.0,
+            "61": 39705.0,
+            "62": 44732.0,
+            "63": 44659.0,
+            "64": 45371.0,
             "65": 44682.0,
+            "66": 45341.0,
+            "67": 43169.0,
+            "68": 42486.0,
+            "69": 43829.0,
             "70": 45529.0,
+            "71": 43294.0,
+            "72": 44745.0,
+            "73": 45364.0,
+            "74": 42463.0,
             "75": 44679.0,
+            "76": 43882.0,
+            "77": 42042.0,
+            "78": 40356.0,
+            "79": 38928.0,
             "80": 41079.0,
+            "81": 45349.0,
+            "82": 43226.0,
+            "83": 38474.0,
+            "84": 42415.0,
             "85": 43989.0,
+            "86": 45673.0,
+            "87": 40850.0,
+            "88": 41756.0,
+            "89": 41065.0,
             "90": 44686.0,
+            "91": 46135.0,
+            "92": 41609.0,
+            "93": 43267.0,
+            "94": 39525.0,
             "95": 43921.0,
+            "96": 44683.0,
+            "97": 45412.0,
+            "98": 41832.0,
+            "99": 45416.0,
             "100": 42457.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 1104069120.0,
+            "2": 1104069120.0,
+            "3": 1104069120.0,
+            "4": 1104069120.0,
             "5": 1104069120.0,
+            "6": 1104069120.0,
+            "7": 1104069120.0,
+            "8": 1104069120.0,
+            "9": 1104069120.0,
             "10": 1104069120.0,
+            "11": 1104069120.0,
+            "12": 1104069120.0,
+            "13": 1104069120.0,
+            "14": 1104069120.0,
             "15": 1104069120.0,
+            "16": 1104069120.0,
+            "17": 1104069120.0,
+            "18": 1104069120.0,
+            "19": 1104069120.0,
             "20": 1104069120.0,
+            "21": 1104069120.0,
+            "22": 1104069120.0,
+            "23": 1104069120.0,
+            "24": 1104069120.0,
             "25": 1104069120.0,
+            "26": 1104069120.0,
+            "27": 1104069120.0,
+            "28": 1104069120.0,
+            "29": 1104069120.0,
             "30": 1104069120.0,
+            "31": 1104069120.0,
+            "32": 1104069120.0,
+            "33": 1104069120.0,
+            "34": 1104069120.0,
             "35": 1104069120.0,
+            "36": 1104069120.0,
+            "37": 1104069120.0,
+            "38": 1104069120.0,
+            "39": 1104069120.0,
             "40": 1104069120.0,
+            "41": 1104069120.0,
+            "42": 1104069120.0,
+            "43": 1104069120.0,
+            "44": 1104069120.0,
             "45": 1104069120.0,
+            "46": 1104069120.0,
+            "47": 1104069120.0,
+            "48": 1104069120.0,
+            "49": 1104069120.0,
             "50": 1104069120.0,
+            "51": 1104069120.0,
+            "52": 1104069120.0,
+            "53": 1104069120.0,
+            "54": 1104069120.0,
             "55": 1104069120.0,
+            "56": 1104069120.0,
+            "57": 1104069120.0,
+            "58": 1104069120.0,
+            "59": 1104069120.0,
             "60": 1104069120.0,
+            "61": 1104069120.0,
+            "62": 1104069120.0,
+            "63": 1104069120.0,
+            "64": 1104069120.0,
             "65": 1104069120.0,
+            "66": 1104069120.0,
+            "67": 1104069120.0,
+            "68": 1104069120.0,
+            "69": 1104069120.0,
             "70": 1104069120.0,
+            "71": 1104069120.0,
+            "72": 1104069120.0,
+            "73": 1104069120.0,
+            "74": 1104069120.0,
             "75": 1104069120.0,
+            "76": 1104069120.0,
+            "77": 1104069120.0,
+            "78": 1104069120.0,
+            "79": 1104069120.0,
             "80": 1104069120.0,
+            "81": 1104069120.0,
+            "82": 1104069120.0,
+            "83": 1104069120.0,
+            "84": 1104069120.0,
             "85": 1104069120.0,
+            "86": 1104069120.0,
+            "87": 1104069120.0,
+            "88": 1104069120.0,
+            "89": 1104069120.0,
             "90": 1104069120.0,
+            "91": 1104069120.0,
+            "92": 1104069120.0,
+            "93": 1104069120.0,
+            "94": 1104069120.0,
             "95": 1104069120.0,
+            "96": 1104069120.0,
+            "97": 1104069120.0,
+            "98": 1104069120.0,
+            "99": 1104069120.0,
             "100": 1104069120.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 1368630784.0,
+            "2": 1833295360.0,
+            "3": 1833295360.0,
+            "4": 1833295360.0,
             "5": 1833295360.0,
+            "6": 1833295360.0,
+            "7": 1833295360.0,
+            "8": 1833295360.0,
+            "9": 1833295360.0,
             "10": 1833295360.0,
+            "11": 1833295360.0,
+            "12": 1833295360.0,
+            "13": 1833295360.0,
+            "14": 1833295360.0,
             "15": 1833295360.0,
+            "16": 1833295360.0,
+            "17": 1833295360.0,
+            "18": 1833295360.0,
+            "19": 1833295360.0,
             "20": 1833295360.0,
+            "21": 1833295360.0,
+            "22": 1833295360.0,
+            "23": 1833295360.0,
+            "24": 1833295360.0,
             "25": 1833295360.0,
+            "26": 1833295360.0,
+            "27": 1833295360.0,
+            "28": 1833295360.0,
+            "29": 1833295360.0,
             "30": 1833295360.0,
+            "31": 1833295360.0,
+            "32": 1833295360.0,
+            "33": 1833295360.0,
+            "34": 1833295360.0,
             "35": 1833295360.0,
+            "36": 1833295360.0,
+            "37": 1833295360.0,
+            "38": 1833295360.0,
+            "39": 1833295360.0,
             "40": 1833295360.0,
+            "41": 1833295360.0,
+            "42": 1833295360.0,
+            "43": 1833295360.0,
+            "44": 1833295360.0,
             "45": 1833295360.0,
+            "46": 1833295360.0,
+            "47": 1833295360.0,
+            "48": 1833295360.0,
+            "49": 1833295360.0,
             "50": 1833295360.0,
+            "51": 1833295360.0,
+            "52": 1833295360.0,
+            "53": 1833295360.0,
+            "54": 1833295360.0,
             "55": 1833295360.0,
+            "56": 1833295360.0,
+            "57": 1833295360.0,
+            "58": 1833295360.0,
+            "59": 1833295360.0,
             "60": 1833295360.0,
+            "61": 1833295360.0,
+            "62": 1833295360.0,
+            "63": 1833295360.0,
+            "64": 1833295360.0,
             "65": 1833295360.0,
+            "66": 1833295360.0,
+            "67": 1833295360.0,
+            "68": 1833295360.0,
+            "69": 1833295360.0,
             "70": 1833295360.0,
+            "71": 1833295360.0,
+            "72": 1833295360.0,
+            "73": 1833295360.0,
+            "74": 1833295360.0,
             "75": 1833295360.0,
+            "76": 1833295360.0,
+            "77": 1833295360.0,
+            "78": 1833295360.0,
+            "79": 1833295360.0,
             "80": 1833295360.0,
+            "81": 1833295360.0,
+            "82": 1833295360.0,
+            "83": 1833295360.0,
+            "84": 1833295360.0,
             "85": 1833295360.0,
+            "86": 1833295360.0,
+            "87": 1833295360.0,
+            "88": 1833295360.0,
+            "89": 1833295360.0,
             "90": 1833295360.0,
+            "91": 1833295360.0,
+            "92": 1833295360.0,
+            "93": 1833295360.0,
+            "94": 1833295360.0,
             "95": 1833295360.0,
+            "96": 1833295360.0,
+            "97": 1833295360.0,
+            "98": 1833295360.0,
+            "99": 1833295360.0,
             "100": 1833295360.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 9.42985,
-            "5": 0.45373,
-            "10": 0.45713,
-            "15": 0.47883,
-            "20": 0.47411,
-            "25": 0.4628,
-            "30": 0.47727,
-            "35": 0.46474,
-            "40": 0.46129,
-            "45": 0.49682,
-            "50": 0.47506,
-            "55": 0.47981,
-            "60": 0.47061,
-            "65": 0.46638,
-            "70": 0.46506,
-            "75": 0.47547,
-            "80": 0.46762,
-            "85": 0.47281,
-            "90": 0.46137,
-            "95": 0.47198,
-            "100": 0.46836
+            "1": 9.42728,
+            "2": 0.63617,
+            "3": 0.52215,
+            "4": 0.51838,
+            "5": 0.5248,
+            "6": 0.52221,
+            "7": 0.53157,
+            "8": 0.52268,
+            "9": 0.51794,
+            "10": 0.52148,
+            "11": 0.51655,
+            "12": 0.52503,
+            "13": 0.5178,
+            "14": 0.52926,
+            "15": 0.52639,
+            "16": 0.53361,
+            "17": 0.52309,
+            "18": 0.52324,
+            "19": 0.51834,
+            "20": 0.54965,
+            "21": 0.5586,
+            "22": 0.53836,
+            "23": 0.5225,
+            "24": 0.51851,
+            "25": 0.5199,
+            "26": 0.51853,
+            "27": 0.51882,
+            "28": 0.52551,
+            "29": 0.52254,
+            "30": 0.5192,
+            "31": 0.52201,
+            "32": 0.521,
+            "33": 0.52114,
+            "34": 0.51459,
+            "35": 0.52645,
+            "36": 0.51875,
+            "37": 0.5214,
+            "38": 0.52019,
+            "39": 0.54698,
+            "40": 0.54492,
+            "41": 0.51667,
+            "42": 0.52631,
+            "43": 0.52495,
+            "44": 0.52655,
+            "45": 0.52461,
+            "46": 0.53027,
+            "47": 0.5196,
+            "48": 0.52577,
+            "49": 0.51681,
+            "50": 0.53016,
+            "51": 0.51782,
+            "52": 0.52245,
+            "53": 0.51733,
+            "54": 0.523,
+            "55": 0.51904,
+            "56": 0.53679,
+            "57": 0.52102,
+            "58": 0.55143,
+            "59": 0.55915,
+            "60": 0.5493,
+            "61": 0.525,
+            "62": 0.52356,
+            "63": 0.53373,
+            "64": 0.81727,
+            "65": 0.52459,
+            "66": 0.79536,
+            "67": 0.52103,
+            "68": 0.5317,
+            "69": 0.52528,
+            "70": 0.78794,
+            "71": 0.53084,
+            "72": 0.51933,
+            "73": 0.53233,
+            "74": 0.52693,
+            "75": 0.53508,
+            "76": 0.56134,
+            "77": 0.53435,
+            "78": 0.51717,
+            "79": 0.52701,
+            "80": 0.52068,
+            "81": 0.52531,
+            "82": 0.5217,
+            "83": 0.52326,
+            "84": 0.52412,
+            "85": 0.84182,
+            "86": 0.52908,
+            "87": 0.51925,
+            "88": 0.52315,
+            "89": 0.52102,
+            "90": 0.52827,
+            "91": 0.54314,
+            "92": 0.52504,
+            "93": 0.52556,
+            "94": 0.8296,
+            "95": 0.83995,
+            "96": 0.85045,
+            "97": 0.78149,
+            "98": 0.54296,
+            "99": 0.5427,
+            "100": 0.55085
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..9abfa38cf9f
--- /dev/null
+++ b/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.34897,
+            "2": 10.34482,
+            "3": 9.79428,
+            "4": 9.59585,
+            "5": 9.42074,
+            "6": 9.41847,
+            "7": 9.28062,
+            "8": 9.18972,
+            "9": 9.06519,
+            "10": 9.00183,
+            "11": 8.81475,
+            "12": 8.7808,
+            "13": 8.82493,
+            "14": 8.67261,
+            "15": 8.6364,
+            "16": 8.51896,
+            "17": 8.45704,
+            "18": 8.37007,
+            "19": 8.36039,
+            "20": 8.25417,
+            "21": 8.2421,
+            "22": 8.13324,
+            "23": 8.06764,
+            "24": 8.1142,
+            "25": 7.95082,
+            "26": 8.08156,
+            "27": 7.86764,
+            "28": 7.93993,
+            "29": 7.77566,
+            "30": 7.84559,
+            "31": 7.8152,
+            "32": 7.65941,
+            "33": 7.77856,
+            "34": 7.53188,
+            "35": 7.65804,
+            "36": 7.51464,
+            "37": 7.44686,
+            "38": 7.48161,
+            "39": 7.46435,
+            "40": 7.49084,
+            "41": 7.40827,
+            "42": 7.35625,
+            "43": 7.43764,
+            "44": 7.35439,
+            "45": 7.35042,
+            "46": 7.27853,
+            "47": 7.4405,
+            "48": 7.26763,
+            "49": 7.32341,
+            "50": 7.14486,
+            "51": 7.36469,
+            "52": 7.12044,
+            "53": 7.09167,
+            "54": 7.22712,
+            "55": 7.13495,
+            "56": 7.20751,
+            "57": 7.31287,
+            "58": 6.99063,
+            "59": 7.09849,
+            "60": 7.12665,
+            "61": 7.10047,
+            "62": 7.23974,
+            "63": 7.14358,
+            "64": 7.06717,
+            "65": 6.98408,
+            "66": 7.03692,
+            "67": 7.02875,
+            "68": 7.12914,
+            "69": 7.01425,
+            "70": 7.04954,
+            "71": 6.89312,
+            "72": 6.98513,
+            "73": 6.96734,
+            "74": 6.90236,
+            "75": 7.05611,
+            "76": 6.95986,
+            "77": 7.06862,
+            "78": 7.0204,
+            "79": 6.8505,
+            "80": 6.92019,
+            "81": 6.95982,
+            "82": 7.04575,
+            "83": 6.98617,
+            "84": 6.99991,
+            "85": 6.83511,
+            "86": 7.04087,
+            "87": 6.96604,
+            "88": 6.90125,
+            "89": 6.80345,
+            "90": 7.22384,
+            "91": 6.70505,
+            "92": 7.03979,
+            "93": 6.8857,
+            "94": 7.04044,
+            "95": 6.84746,
+            "96": 6.9546,
+            "97": 6.94425,
+            "98": 6.86865,
+            "99": 6.9948,
+            "100": 6.96761
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 43289.0,
+            "2": 44062.0,
+            "3": 44747.0,
+            "4": 42377.0,
+            "5": 45372.0,
+            "6": 40957.0,
+            "7": 43147.0,
+            "8": 45474.0,
+            "9": 42425.0,
+            "10": 45380.0,
+            "11": 43984.0,
+            "12": 44594.0,
+            "13": 43914.0,
+            "14": 46203.0,
+            "15": 43914.0,
+            "16": 41632.0,
+            "17": 43870.0,
+            "18": 44691.0,
+            "19": 42574.0,
+            "20": 44769.0,
+            "21": 44757.0,
+            "22": 41854.0,
+            "23": 45440.0,
+            "24": 43066.0,
+            "25": 42458.0,
+            "26": 43949.0,
+            "27": 46224.0,
+            "28": 46395.0,
+            "29": 46168.0,
+            "30": 44028.0,
+            "31": 41131.0,
+            "32": 43348.0,
+            "33": 45441.0,
+            "34": 43316.0,
+            "35": 43258.0,
+            "36": 42459.0,
+            "37": 40074.0,
+            "38": 42544.0,
+            "39": 44707.0,
+            "40": 43237.0,
+            "41": 44652.0,
+            "42": 43196.0,
+            "43": 45435.0,
+            "44": 44591.0,
+            "45": 43263.0,
+            "46": 43930.0,
+            "47": 42373.0,
+            "48": 44713.0,
+            "49": 43128.0,
+            "50": 43361.0,
+            "51": 41133.0,
+            "52": 43849.0,
+            "53": 43899.0,
+            "54": 41704.0,
+            "55": 43863.0,
+            "56": 43205.0,
+            "57": 42636.0,
+            "58": 43835.0,
+            "59": 44623.0,
+            "60": 41226.0,
+            "61": 39705.0,
+            "62": 44732.0,
+            "63": 44659.0,
+            "64": 45371.0,
+            "65": 44682.0,
+            "66": 45341.0,
+            "67": 43169.0,
+            "68": 42486.0,
+            "69": 43829.0,
+            "70": 45529.0,
+            "71": 43294.0,
+            "72": 44745.0,
+            "73": 45364.0,
+            "74": 42463.0,
+            "75": 44679.0,
+            "76": 43882.0,
+            "77": 42042.0,
+            "78": 40356.0,
+            "79": 38928.0,
+            "80": 41079.0,
+            "81": 45349.0,
+            "82": 43226.0,
+            "83": 38474.0,
+            "84": 42415.0,
+            "85": 43989.0,
+            "86": 45673.0,
+            "87": 40850.0,
+            "88": 41756.0,
+            "89": 41065.0,
+            "90": 44686.0,
+            "91": 46135.0,
+            "92": 41609.0,
+            "93": 43267.0,
+            "94": 39525.0,
+            "95": 43921.0,
+            "96": 44683.0,
+            "97": 45412.0,
+            "98": 41832.0,
+            "99": 45416.0,
+            "100": 42457.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1104069120.0,
+            "2": 1104069120.0,
+            "3": 1104069120.0,
+            "4": 1104069120.0,
+            "5": 1104069120.0,
+            "6": 1104069120.0,
+            "7": 1104069120.0,
+            "8": 1104069120.0,
+            "9": 1104069120.0,
+            "10": 1104069120.0,
+            "11": 1104069120.0,
+            "12": 1104069120.0,
+            "13": 1104069120.0,
+            "14": 1104069120.0,
+            "15": 1104069120.0,
+            "16": 1104069120.0,
+            "17": 1104069120.0,
+            "18": 1104069120.0,
+            "19": 1104069120.0,
+            "20": 1104069120.0,
+            "21": 1104069120.0,
+            "22": 1104069120.0,
+            "23": 1104069120.0,
+            "24": 1104069120.0,
+            "25": 1104069120.0,
+            "26": 1104069120.0,
+            "27": 1104069120.0,
+            "28": 1104069120.0,
+            "29": 1104069120.0,
+            "30": 1104069120.0,
+            "31": 1104069120.0,
+            "32": 1104069120.0,
+            "33": 1104069120.0,
+            "34": 1104069120.0,
+            "35": 1104069120.0,
+            "36": 1104069120.0,
+            "37": 1104069120.0,
+            "38": 1104069120.0,
+            "39": 1104069120.0,
+            "40": 1104069120.0,
+            "41": 1104069120.0,
+            "42": 1104069120.0,
+            "43": 1104069120.0,
+            "44": 1104069120.0,
+            "45": 1104069120.0,
+            "46": 1104069120.0,
+            "47": 1104069120.0,
+            "48": 1104069120.0,
+            "49": 1104069120.0,
+            "50": 1104069120.0,
+            "51": 1104069120.0,
+            "52": 1104069120.0,
+            "53": 1104069120.0,
+            "54": 1104069120.0,
+            "55": 1104069120.0,
+            "56": 1104069120.0,
+            "57": 1104069120.0,
+            "58": 1104069120.0,
+            "59": 1104069120.0,
+            "60": 1104069120.0,
+            "61": 1104069120.0,
+            "62": 1104069120.0,
+            "63": 1104069120.0,
+            "64": 1104069120.0,
+            "65": 1104069120.0,
+            "66": 1104069120.0,
+            "67": 1104069120.0,
+            "68": 1104069120.0,
+            "69": 1104069120.0,
+            "70": 1104069120.0,
+            "71": 1104069120.0,
+            "72": 1104069120.0,
+            "73": 1104069120.0,
+            "74": 1104069120.0,
+            "75": 1104069120.0,
+            "76": 1104069120.0,
+            "77": 1104069120.0,
+            "78": 1104069120.0,
+            "79": 1104069120.0,
+            "80": 1104069120.0,
+            "81": 1104069120.0,
+            "82": 1104069120.0,
+            "83": 1104069120.0,
+            "84": 1104069120.0,
+            "85": 1104069120.0,
+            "86": 1104069120.0,
+            "87": 1104069120.0,
+            "88": 1104069120.0,
+            "89": 1104069120.0,
+            "90": 1104069120.0,
+            "91": 1104069120.0,
+            "92": 1104069120.0,
+            "93": 1104069120.0,
+            "94": 1104069120.0,
+            "95": 1104069120.0,
+            "96": 1104069120.0,
+            "97": 1104069120.0,
+            "98": 1104069120.0,
+            "99": 1104069120.0,
+            "100": 1104069120.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1368630784.0,
+            "2": 1833295360.0,
+            "3": 1833295360.0,
+            "4": 1833295360.0,
+            "5": 1833295360.0,
+            "6": 1833295360.0,
+            "7": 1833295360.0,
+            "8": 1833295360.0,
+            "9": 1833295360.0,
+            "10": 1833295360.0,
+            "11": 1833295360.0,
+            "12": 1833295360.0,
+            "13": 1833295360.0,
+            "14": 1833295360.0,
+            "15": 1833295360.0,
+            "16": 1833295360.0,
+            "17": 1833295360.0,
+            "18": 1833295360.0,
+            "19": 1833295360.0,
+            "20": 1833295360.0,
+            "21": 1833295360.0,
+            "22": 1833295360.0,
+            "23": 1833295360.0,
+            "24": 1833295360.0,
+            "25": 1833295360.0,
+            "26": 1833295360.0,
+            "27": 1833295360.0,
+            "28": 1833295360.0,
+            "29": 1833295360.0,
+            "30": 1833295360.0,
+            "31": 1833295360.0,
+            "32": 1833295360.0,
+            "33": 1833295360.0,
+            "34": 1833295360.0,
+            "35": 1833295360.0,
+            "36": 1833295360.0,
+            "37": 1833295360.0,
+            "38": 1833295360.0,
+            "39": 1833295360.0,
+            "40": 1833295360.0,
+            "41": 1833295360.0,
+            "42": 1833295360.0,
+            "43": 1833295360.0,
+            "44": 1833295360.0,
+            "45": 1833295360.0,
+            "46": 1833295360.0,
+            "47": 1833295360.0,
+            "48": 1833295360.0,
+            "49": 1833295360.0,
+            "50": 1833295360.0,
+            "51": 1833295360.0,
+            "52": 1833295360.0,
+            "53": 1833295360.0,
+            "54": 1833295360.0,
+            "55": 1833295360.0,
+            "56": 1833295360.0,
+            "57": 1833295360.0,
+            "58": 1833295360.0,
+            "59": 1833295360.0,
+            "60": 1833295360.0,
+            "61": 1833295360.0,
+            "62": 1833295360.0,
+            "63": 1833295360.0,
+            "64": 1833295360.0,
+            "65": 1833295360.0,
+            "66": 1833295360.0,
+            "67": 1833295360.0,
+            "68": 1833295360.0,
+            "69": 1833295360.0,
+            "70": 1833295360.0,
+            "71": 1833295360.0,
+            "72": 1833295360.0,
+            "73": 1833295360.0,
+            "74": 1833295360.0,
+            "75": 1833295360.0,
+            "76": 1833295360.0,
+            "77": 1833295360.0,
+            "78": 1833295360.0,
+            "79": 1833295360.0,
+            "80": 1833295360.0,
+            "81": 1833295360.0,
+            "82": 1833295360.0,
+            "83": 1833295360.0,
+            "84": 1833295360.0,
+            "85": 1833295360.0,
+            "86": 1833295360.0,
+            "87": 1833295360.0,
+            "88": 1833295360.0,
+            "89": 1833295360.0,
+            "90": 1833295360.0,
+            "91": 1833295360.0,
+            "92": 1833295360.0,
+            "93": 1833295360.0,
+            "94": 1833295360.0,
+            "95": 1833295360.0,
+            "96": 1833295360.0,
+            "97": 1833295360.0,
+            "98": 1833295360.0,
+            "99": 1833295360.0,
+            "100": 1833295360.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 9.97888,
+            "2": 0.55212,
+            "3": 0.46939,
+            "4": 0.48338,
+            "5": 0.4977,
+            "6": 0.48497,
+            "7": 0.48521,
+            "8": 0.48365,
+            "9": 0.47845,
+            "10": 0.48441,
+            "11": 0.48622,
+            "12": 0.49049,
+            "13": 0.49384,
+            "14": 0.48918,
+            "15": 0.48451,
+            "16": 0.49344,
+            "17": 0.49291,
+            "18": 0.49613,
+            "19": 0.49898,
+            "20": 0.49079,
+            "21": 0.48153,
+            "22": 0.48369,
+            "23": 0.4824,
+            "24": 0.4958,
+            "25": 0.48572,
+            "26": 0.50758,
+            "27": 0.48722,
+            "28": 0.47977,
+            "29": 0.5598,
+            "30": 0.47951,
+            "31": 1.06254,
+            "32": 0.7493,
+            "33": 1.59176,
+            "34": 0.85052,
+            "35": 2.25233,
+            "36": 1.66198,
+            "37": 0.68722,
+            "38": 0.4632,
+            "39": 0.46558,
+            "40": 0.52308,
+            "41": 0.47497,
+            "42": 0.46579,
+            "43": 0.46956,
+            "44": 0.46788,
+            "45": 0.47342,
+            "46": 0.53067,
+            "47": 0.48889,
+            "48": 0.47648,
+            "49": 0.47372,
+            "50": 0.46927,
+            "51": 0.46862,
+            "52": 0.47754,
+            "53": 0.47724,
+            "54": 0.47513,
+            "55": 0.46395,
+            "56": 0.46587,
+            "57": 0.78252,
+            "58": 0.46515,
+            "59": 0.46114,
+            "60": 0.46011,
+            "61": 0.45394,
+            "62": 0.45518,
+            "63": 0.48166,
+            "64": 0.47197,
+            "65": 0.97766,
+            "66": 0.45863,
+            "67": 0.45331,
+            "68": 0.45132,
+            "69": 0.4828,
+            "70": 0.45508,
+            "71": 0.45601,
+            "72": 1.14428,
+            "73": 0.45179,
+            "74": 0.4534,
+            "75": 0.46049,
+            "76": 0.46918,
+            "77": 0.45685,
+            "78": 0.45627,
+            "79": 0.46018,
+            "80": 0.46056,
+            "81": 0.46543,
+            "82": 0.45359,
+            "83": 0.78935,
+            "84": 0.46472,
+            "85": 0.45517,
+            "86": 0.46043,
+            "87": 0.45426,
+            "88": 0.45214,
+            "89": 0.45913,
+            "90": 0.45237,
+            "91": 0.46312,
+            "92": 0.79955,
+            "93": 0.45537,
+            "94": 0.45217,
+            "95": 0.45359,
+            "96": 0.45058,
+            "97": 0.45281,
+            "98": 0.46149,
+            "99": 0.45894,
+            "100": 0.46912
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..2e0ee7ee230
--- /dev/null
+++ b/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.34897,
+            "2": 10.34482,
+            "3": 9.79428,
+            "4": 9.59585,
+            "5": 9.42074,
+            "6": 9.41847,
+            "7": 9.28062,
+            "8": 9.18972,
+            "9": 9.06519,
+            "10": 9.00183,
+            "11": 8.81475,
+            "12": 8.7808,
+            "13": 8.82493,
+            "14": 8.67261,
+            "15": 8.6364,
+            "16": 8.51896,
+            "17": 8.45704,
+            "18": 8.37007,
+            "19": 8.36039,
+            "20": 8.25417,
+            "21": 8.2421,
+            "22": 8.13324,
+            "23": 8.06764,
+            "24": 8.1142,
+            "25": 7.95082,
+            "26": 8.08156,
+            "27": 7.86764,
+            "28": 7.93993,
+            "29": 7.77566,
+            "30": 7.84559,
+            "31": 7.8152,
+            "32": 7.65941,
+            "33": 7.77856,
+            "34": 7.53188,
+            "35": 7.65804,
+            "36": 7.51464,
+            "37": 7.44686,
+            "38": 7.48161,
+            "39": 7.46435,
+            "40": 7.49084,
+            "41": 7.40827,
+            "42": 7.35625,
+            "43": 7.43764,
+            "44": 7.35439,
+            "45": 7.35042,
+            "46": 7.27853,
+            "47": 7.4405,
+            "48": 7.26763,
+            "49": 7.32341,
+            "50": 7.14486,
+            "51": 7.36469,
+            "52": 7.12044,
+            "53": 7.09167,
+            "54": 7.22712,
+            "55": 7.13495,
+            "56": 7.20751,
+            "57": 7.31287,
+            "58": 6.99063,
+            "59": 7.09849,
+            "60": 7.12665,
+            "61": 7.10047,
+            "62": 7.23974,
+            "63": 7.14358,
+            "64": 7.06717,
+            "65": 6.98408,
+            "66": 7.03692,
+            "67": 7.02875,
+            "68": 7.12914,
+            "69": 7.01425,
+            "70": 7.04954,
+            "71": 6.89312,
+            "72": 6.98513,
+            "73": 6.96734,
+            "74": 6.90236,
+            "75": 7.05611,
+            "76": 6.95986,
+            "77": 7.06862,
+            "78": 7.0204,
+            "79": 6.8505,
+            "80": 6.92019,
+            "81": 6.95982,
+            "82": 7.04575,
+            "83": 6.98617,
+            "84": 6.99991,
+            "85": 6.83511,
+            "86": 7.04087,
+            "87": 6.96604,
+            "88": 6.90125,
+            "89": 6.80345,
+            "90": 7.22384,
+            "91": 6.70505,
+            "92": 7.03979,
+            "93": 6.8857,
+            "94": 7.04044,
+            "95": 6.84746,
+            "96": 6.9546,
+            "97": 6.94425,
+            "98": 6.86865,
+            "99": 6.9948,
+            "100": 6.96761
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 43289.0,
+            "2": 44062.0,
+            "3": 44747.0,
+            "4": 42377.0,
+            "5": 45372.0,
+            "6": 40957.0,
+            "7": 43147.0,
+            "8": 45474.0,
+            "9": 42425.0,
+            "10": 45380.0,
+            "11": 43984.0,
+            "12": 44594.0,
+            "13": 43914.0,
+            "14": 46203.0,
+            "15": 43914.0,
+            "16": 41632.0,
+            "17": 43870.0,
+            "18": 44691.0,
+            "19": 42574.0,
+            "20": 44769.0,
+            "21": 44757.0,
+            "22": 41854.0,
+            "23": 45440.0,
+            "24": 43066.0,
+            "25": 42458.0,
+            "26": 43949.0,
+            "27": 46224.0,
+            "28": 46395.0,
+            "29": 46168.0,
+            "30": 44028.0,
+            "31": 41131.0,
+            "32": 43348.0,
+            "33": 45441.0,
+            "34": 43316.0,
+            "35": 43258.0,
+            "36": 42459.0,
+            "37": 40074.0,
+            "38": 42544.0,
+            "39": 44707.0,
+            "40": 43237.0,
+            "41": 44652.0,
+            "42": 43196.0,
+            "43": 45435.0,
+            "44": 44591.0,
+            "45": 43263.0,
+            "46": 43930.0,
+            "47": 42373.0,
+            "48": 44713.0,
+            "49": 43128.0,
+            "50": 43361.0,
+            "51": 41133.0,
+            "52": 43849.0,
+            "53": 43899.0,
+            "54": 41704.0,
+            "55": 43863.0,
+            "56": 43205.0,
+            "57": 42636.0,
+            "58": 43835.0,
+            "59": 44623.0,
+            "60": 41226.0,
+            "61": 39705.0,
+            "62": 44732.0,
+            "63": 44659.0,
+            "64": 45371.0,
+            "65": 44682.0,
+            "66": 45341.0,
+            "67": 43169.0,
+            "68": 42486.0,
+            "69": 43829.0,
+            "70": 45529.0,
+            "71": 43294.0,
+            "72": 44745.0,
+            "73": 45364.0,
+            "74": 42463.0,
+            "75": 44679.0,
+            "76": 43882.0,
+            "77": 42042.0,
+            "78": 40356.0,
+            "79": 38928.0,
+            "80": 41079.0,
+            "81": 45349.0,
+            "82": 43226.0,
+            "83": 38474.0,
+            "84": 42415.0,
+            "85": 43989.0,
+            "86": 45673.0,
+            "87": 40850.0,
+            "88": 41756.0,
+            "89": 41065.0,
+            "90": 44686.0,
+            "91": 46135.0,
+            "92": 41609.0,
+            "93": 43267.0,
+            "94": 39525.0,
+            "95": 43921.0,
+            "96": 44683.0,
+            "97": 45412.0,
+            "98": 41832.0,
+            "99": 45416.0,
+            "100": 42457.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1104069120.0,
+            "2": 1104069120.0,
+            "3": 1104069120.0,
+            "4": 1104069120.0,
+            "5": 1104069120.0,
+            "6": 1104069120.0,
+            "7": 1104069120.0,
+            "8": 1104069120.0,
+            "9": 1104069120.0,
+            "10": 1104069120.0,
+            "11": 1104069120.0,
+            "12": 1104069120.0,
+            "13": 1104069120.0,
+            "14": 1104069120.0,
+            "15": 1104069120.0,
+            "16": 1104069120.0,
+            "17": 1104069120.0,
+            "18": 1104069120.0,
+            "19": 1104069120.0,
+            "20": 1104069120.0,
+            "21": 1104069120.0,
+            "22": 1104069120.0,
+            "23": 1104069120.0,
+            "24": 1104069120.0,
+            "25": 1104069120.0,
+            "26": 1104069120.0,
+            "27": 1104069120.0,
+            "28": 1104069120.0,
+            "29": 1104069120.0,
+            "30": 1104069120.0,
+            "31": 1104069120.0,
+            "32": 1104069120.0,
+            "33": 1104069120.0,
+            "34": 1104069120.0,
+            "35": 1104069120.0,
+            "36": 1104069120.0,
+            "37": 1104069120.0,
+            "38": 1104069120.0,
+            "39": 1104069120.0,
+            "40": 1104069120.0,
+            "41": 1104069120.0,
+            "42": 1104069120.0,
+            "43": 1104069120.0,
+            "44": 1104069120.0,
+            "45": 1104069120.0,
+            "46": 1104069120.0,
+            "47": 1104069120.0,
+            "48": 1104069120.0,
+            "49": 1104069120.0,
+            "50": 1104069120.0,
+            "51": 1104069120.0,
+            "52": 1104069120.0,
+            "53": 1104069120.0,
+            "54": 1104069120.0,
+            "55": 1104069120.0,
+            "56": 1104069120.0,
+            "57": 1104069120.0,
+            "58": 1104069120.0,
+            "59": 1104069120.0,
+            "60": 1104069120.0,
+            "61": 1104069120.0,
+            "62": 1104069120.0,
+            "63": 1104069120.0,
+            "64": 1104069120.0,
+            "65": 1104069120.0,
+            "66": 1104069120.0,
+            "67": 1104069120.0,
+            "68": 1104069120.0,
+            "69": 1104069120.0,
+            "70": 1104069120.0,
+            "71": 1104069120.0,
+            "72": 1104069120.0,
+            "73": 1104069120.0,
+            "74": 1104069120.0,
+            "75": 1104069120.0,
+            "76": 1104069120.0,
+            "77": 1104069120.0,
+            "78": 1104069120.0,
+            "79": 1104069120.0,
+            "80": 1104069120.0,
+            "81": 1104069120.0,
+            "82": 1104069120.0,
+            "83": 1104069120.0,
+            "84": 1104069120.0,
+            "85": 1104069120.0,
+            "86": 1104069120.0,
+            "87": 1104069120.0,
+            "88": 1104069120.0,
+            "89": 1104069120.0,
+            "90": 1104069120.0,
+            "91": 1104069120.0,
+            "92": 1104069120.0,
+            "93": 1104069120.0,
+            "94": 1104069120.0,
+            "95": 1104069120.0,
+            "96": 1104069120.0,
+            "97": 1104069120.0,
+            "98": 1104069120.0,
+            "99": 1104069120.0,
+            "100": 1104069120.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1368630784.0,
+            "2": 1833295360.0,
+            "3": 1833295360.0,
+            "4": 1833295360.0,
+            "5": 1833295360.0,
+            "6": 1833295360.0,
+            "7": 1833295360.0,
+            "8": 1833295360.0,
+            "9": 1833295360.0,
+            "10": 1833295360.0,
+            "11": 1833295360.0,
+            "12": 1833295360.0,
+            "13": 1833295360.0,
+            "14": 1833295360.0,
+            "15": 1833295360.0,
+            "16": 1833295360.0,
+            "17": 1833295360.0,
+            "18": 1833295360.0,
+            "19": 1833295360.0,
+            "20": 1833295360.0,
+            "21": 1833295360.0,
+            "22": 1833295360.0,
+            "23": 1833295360.0,
+            "24": 1833295360.0,
+            "25": 1833295360.0,
+            "26": 1833295360.0,
+            "27": 1833295360.0,
+            "28": 1833295360.0,
+            "29": 1833295360.0,
+            "30": 1833295360.0,
+            "31": 1833295360.0,
+            "32": 1833295360.0,
+            "33": 1833295360.0,
+            "34": 1833295360.0,
+            "35": 1833295360.0,
+            "36": 1833295360.0,
+            "37": 1833295360.0,
+            "38": 1833295360.0,
+            "39": 1833295360.0,
+            "40": 1833295360.0,
+            "41": 1833295360.0,
+            "42": 1833295360.0,
+            "43": 1833295360.0,
+            "44": 1833295360.0,
+            "45": 1833295360.0,
+            "46": 1833295360.0,
+            "47": 1833295360.0,
+            "48": 1833295360.0,
+            "49": 1833295360.0,
+            "50": 1833295360.0,
+            "51": 1833295360.0,
+            "52": 1833295360.0,
+            "53": 1833295360.0,
+            "54": 1833295360.0,
+            "55": 1833295360.0,
+            "56": 1833295360.0,
+            "57": 1833295360.0,
+            "58": 1833295360.0,
+            "59": 1833295360.0,
+            "60": 1833295360.0,
+            "61": 1833295360.0,
+            "62": 1833295360.0,
+            "63": 1833295360.0,
+            "64": 1833295360.0,
+            "65": 1833295360.0,
+            "66": 1833295360.0,
+            "67": 1833295360.0,
+            "68": 1833295360.0,
+            "69": 1833295360.0,
+            "70": 1833295360.0,
+            "71": 1833295360.0,
+            "72": 1833295360.0,
+            "73": 1833295360.0,
+            "74": 1833295360.0,
+            "75": 1833295360.0,
+            "76": 1833295360.0,
+            "77": 1833295360.0,
+            "78": 1833295360.0,
+            "79": 1833295360.0,
+            "80": 1833295360.0,
+            "81": 1833295360.0,
+            "82": 1833295360.0,
+            "83": 1833295360.0,
+            "84": 1833295360.0,
+            "85": 1833295360.0,
+            "86": 1833295360.0,
+            "87": 1833295360.0,
+            "88": 1833295360.0,
+            "89": 1833295360.0,
+            "90": 1833295360.0,
+            "91": 1833295360.0,
+            "92": 1833295360.0,
+            "93": 1833295360.0,
+            "94": 1833295360.0,
+            "95": 1833295360.0,
+            "96": 1833295360.0,
+            "97": 1833295360.0,
+            "98": 1833295360.0,
+            "99": 1833295360.0,
+            "100": 1833295360.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 9.3446,
+            "2": 0.55186,
+            "3": 0.52074,
+            "4": 0.52226,
+            "5": 0.51961,
+            "6": 0.52672,
+            "7": 0.52451,
+            "8": 0.52369,
+            "9": 0.54507,
+            "10": 0.53931,
+            "11": 0.55505,
+            "12": 0.52851,
+            "13": 0.51692,
+            "14": 0.52026,
+            "15": 0.51979,
+            "16": 0.53317,
+            "17": 0.52489,
+            "18": 0.59625,
+            "19": 0.52238,
+            "20": 0.53197,
+            "21": 0.52211,
+            "22": 0.51979,
+            "23": 0.52551,
+            "24": 0.52413,
+            "25": 0.52676,
+            "26": 0.5192,
+            "27": 0.52336,
+            "28": 0.53671,
+            "29": 0.53561,
+            "30": 0.51609,
+            "31": 0.55983,
+            "32": 0.5166,
+            "33": 0.53721,
+            "34": 0.52158,
+            "35": 0.53727,
+            "36": 0.5279,
+            "37": 0.51655,
+            "38": 0.51986,
+            "39": 0.5223,
+            "40": 0.52388,
+            "41": 0.52083,
+            "42": 0.52801,
+            "43": 0.52136,
+            "44": 0.52414,
+            "45": 0.52048,
+            "46": 0.53415,
+            "47": 0.54831,
+            "48": 0.58827,
+            "49": 0.55044,
+            "50": 0.52682,
+            "51": 0.52339,
+            "52": 0.51726,
+            "53": 0.518,
+            "54": 0.51935,
+            "55": 0.52073,
+            "56": 0.52732,
+            "57": 0.51867,
+            "58": 0.51876,
+            "59": 0.5213,
+            "60": 0.51779,
+            "61": 0.52225,
+            "62": 0.52041,
+            "63": 0.51793,
+            "64": 0.5135,
+            "65": 0.51913,
+            "66": 0.86034,
+            "67": 0.51468,
+            "68": 0.90156,
+            "69": 0.51931,
+            "70": 0.53602,
+            "71": 0.51818,
+            "72": 0.51744,
+            "73": 0.54454,
+            "74": 0.51831,
+            "75": 0.521,
+            "76": 0.52894,
+            "77": 0.53227,
+            "78": 0.51806,
+            "79": 0.51818,
+            "80": 0.51632,
+            "81": 0.51704,
+            "82": 0.51542,
+            "83": 0.51861,
+            "84": 0.53204,
+            "85": 0.52011,
+            "86": 0.53043,
+            "87": 0.94359,
+            "88": 0.51776,
+            "89": 0.51799,
+            "90": 0.51773,
+            "91": 0.51828,
+            "92": 0.52318,
+            "93": 0.51688,
+            "94": 0.51939,
+            "95": 0.51554,
+            "96": 0.9,
+            "97": 0.96079,
+            "98": 0.52856,
+            "99": 0.51996,
+            "100": 0.52921
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
index 49f18d73ef1..791f5758ea5 100644
--- a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
@@ -2,141 +2,536 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 10.34897,
+            "2": 10.34482,
+            "3": 9.79428,
+            "4": 9.59585,
             "5": 9.42074,
+            "6": 9.41847,
+            "7": 9.28062,
+            "8": 9.18972,
+            "9": 9.06519,
             "10": 9.00183,
+            "11": 8.81475,
+            "12": 8.7808,
+            "13": 8.82493,
+            "14": 8.67261,
             "15": 8.6364,
+            "16": 8.51896,
+            "17": 8.45704,
+            "18": 8.37007,
+            "19": 8.36039,
             "20": 8.25417,
+            "21": 8.2421,
+            "22": 8.13324,
+            "23": 8.06764,
+            "24": 8.1142,
             "25": 7.95082,
+            "26": 8.08156,
+            "27": 7.86764,
+            "28": 7.93993,
+            "29": 7.77566,
             "30": 7.84559,
+            "31": 7.8152,
+            "32": 7.65941,
+            "33": 7.77856,
+            "34": 7.53188,
             "35": 7.65804,
+            "36": 7.51464,
+            "37": 7.44686,
+            "38": 7.48161,
+            "39": 7.46435,
             "40": 7.49084,
+            "41": 7.40827,
+            "42": 7.35625,
+            "43": 7.43764,
+            "44": 7.35439,
             "45": 7.35042,
+            "46": 7.27853,
+            "47": 7.4405,
+            "48": 7.26763,
+            "49": 7.32341,
             "50": 7.14486,
+            "51": 7.36469,
+            "52": 7.12044,
+            "53": 7.09167,
+            "54": 7.22712,
             "55": 7.13495,
+            "56": 7.20751,
+            "57": 7.31287,
+            "58": 6.99063,
+            "59": 7.09849,
             "60": 7.12665,
+            "61": 7.10047,
+            "62": 7.23974,
+            "63": 7.14358,
+            "64": 7.06717,
             "65": 6.98408,
+            "66": 7.03692,
+            "67": 7.02875,
+            "68": 7.12914,
+            "69": 7.01425,
             "70": 7.04954,
+            "71": 6.89312,
+            "72": 6.98513,
+            "73": 6.96734,
+            "74": 6.90236,
             "75": 7.05611,
+            "76": 6.95986,
+            "77": 7.06862,
+            "78": 7.0204,
+            "79": 6.8505,
             "80": 6.92019,
+            "81": 6.95982,
+            "82": 7.04575,
+            "83": 6.98617,
+            "84": 6.99991,
             "85": 6.83511,
+            "86": 7.04087,
+            "87": 6.96604,
+            "88": 6.90125,
+            "89": 6.80345,
             "90": 7.22384,
+            "91": 6.70505,
+            "92": 7.03979,
+            "93": 6.8857,
+            "94": 7.04044,
             "95": 6.84746,
+            "96": 6.9546,
+            "97": 6.94425,
+            "98": 6.86865,
+            "99": 6.9948,
             "100": 6.96761
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 43289.0,
+            "2": 44062.0,
+            "3": 44747.0,
+            "4": 42377.0,
             "5": 45372.0,
+            "6": 40957.0,
+            "7": 43147.0,
+            "8": 45474.0,
+            "9": 42425.0,
             "10": 45380.0,
+            "11": 43984.0,
+            "12": 44594.0,
+            "13": 43914.0,
+            "14": 46203.0,
             "15": 43914.0,
+            "16": 41632.0,
+            "17": 43870.0,
+            "18": 44691.0,
+            "19": 42574.0,
             "20": 44769.0,
+            "21": 44757.0,
+            "22": 41854.0,
+            "23": 45440.0,
+            "24": 43066.0,
             "25": 42458.0,
+            "26": 43949.0,
+            "27": 46224.0,
+            "28": 46395.0,
+            "29": 46168.0,
             "30": 44028.0,
+            "31": 41131.0,
+            "32": 43348.0,
+            "33": 45441.0,
+            "34": 43316.0,
             "35": 43258.0,
+            "36": 42459.0,
+            "37": 40074.0,
+            "38": 42544.0,
+            "39": 44707.0,
             "40": 43237.0,
+            "41": 44652.0,
+            "42": 43196.0,
+            "43": 45435.0,
+            "44": 44591.0,
             "45": 43263.0,
+            "46": 43930.0,
+            "47": 42373.0,
+            "48": 44713.0,
+            "49": 43128.0,
             "50": 43361.0,
+            "51": 41133.0,
+            "52": 43849.0,
+            "53": 43899.0,
+            "54": 41704.0,
             "55": 43863.0,
+            "56": 43205.0,
+            "57": 42636.0,
+            "58": 43835.0,
+            "59": 44623.0,
             "60": 41226.0,
+            "61": 39705.0,
+            "62": 44732.0,
+            "63": 44659.0,
+            "64": 45371.0,
             "65": 44682.0,
+            "66": 45341.0,
+            "67": 43169.0,
+            "68": 42486.0,
+            "69": 43829.0,
             "70": 45529.0,
+            "71": 43294.0,
+            "72": 44745.0,
+            "73": 45364.0,
+            "74": 42463.0,
             "75": 44679.0,
+            "76": 43882.0,
+            "77": 42042.0,
+            "78": 40356.0,
+            "79": 38928.0,
             "80": 41079.0,
+            "81": 45349.0,
+            "82": 43226.0,
+            "83": 38474.0,
+            "84": 42415.0,
             "85": 43989.0,
+            "86": 45673.0,
+            "87": 40850.0,
+            "88": 41756.0,
+            "89": 41065.0,
             "90": 44686.0,
+            "91": 46135.0,
+            "92": 41609.0,
+            "93": 43267.0,
+            "94": 39525.0,
             "95": 43921.0,
+            "96": 44683.0,
+            "97": 45412.0,
+            "98": 41832.0,
+            "99": 45416.0,
             "100": 42457.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 1104069120.0,
+            "2": 1104069120.0,
+            "3": 1104069120.0,
+            "4": 1104069120.0,
             "5": 1104069120.0,
+            "6": 1104069120.0,
+            "7": 1104069120.0,
+            "8": 1104069120.0,
+            "9": 1104069120.0,
             "10": 1104069120.0,
+            "11": 1104069120.0,
+            "12": 1104069120.0,
+            "13": 1104069120.0,
+            "14": 1104069120.0,
             "15": 1104069120.0,
+            "16": 1104069120.0,
+            "17": 1104069120.0,
+            "18": 1104069120.0,
+            "19": 1104069120.0,
             "20": 1104069120.0,
+            "21": 1104069120.0,
+            "22": 1104069120.0,
+            "23": 1104069120.0,
+            "24": 1104069120.0,
             "25": 1104069120.0,
+            "26": 1104069120.0,
+            "27": 1104069120.0,
+            "28": 1104069120.0,
+            "29": 1104069120.0,
             "30": 1104069120.0,
+            "31": 1104069120.0,
+            "32": 1104069120.0,
+            "33": 1104069120.0,
+            "34": 1104069120.0,
             "35": 1104069120.0,
+            "36": 1104069120.0,
+            "37": 1104069120.0,
+            "38": 1104069120.0,
+            "39": 1104069120.0,
             "40": 1104069120.0,
+            "41": 1104069120.0,
+            "42": 1104069120.0,
+            "43": 1104069120.0,
+            "44": 1104069120.0,
             "45": 1104069120.0,
+            "46": 1104069120.0,
+            "47": 1104069120.0,
+            "48": 1104069120.0,
+            "49": 1104069120.0,
             "50": 1104069120.0,
+            "51": 1104069120.0,
+            "52": 1104069120.0,
+            "53": 1104069120.0,
+            "54": 1104069120.0,
             "55": 1104069120.0,
+            "56": 1104069120.0,
+            "57": 1104069120.0,
+            "58": 1104069120.0,
+            "59": 1104069120.0,
             "60": 1104069120.0,
+            "61": 1104069120.0,
+            "62": 1104069120.0,
+            "63": 1104069120.0,
+            "64": 1104069120.0,
             "65": 1104069120.0,
+            "66": 1104069120.0,
+            "67": 1104069120.0,
+            "68": 1104069120.0,
+            "69": 1104069120.0,
             "70": 1104069120.0,
+            "71": 1104069120.0,
+            "72": 1104069120.0,
+            "73": 1104069120.0,
+            "74": 1104069120.0,
             "75": 1104069120.0,
+            "76": 1104069120.0,
+            "77": 1104069120.0,
+            "78": 1104069120.0,
+            "79": 1104069120.0,
             "80": 1104069120.0,
+            "81": 1104069120.0,
+            "82": 1104069120.0,
+            "83": 1104069120.0,
+            "84": 1104069120.0,
             "85": 1104069120.0,
+            "86": 1104069120.0,
+            "87": 1104069120.0,
+            "88": 1104069120.0,
+            "89": 1104069120.0,
             "90": 1104069120.0,
+            "91": 1104069120.0,
+            "92": 1104069120.0,
+            "93": 1104069120.0,
+            "94": 1104069120.0,
             "95": 1104069120.0,
+            "96": 1104069120.0,
+            "97": 1104069120.0,
+            "98": 1104069120.0,
+            "99": 1104069120.0,
             "100": 1104069120.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 1368630784.0,
+            "2": 1833295360.0,
+            "3": 1833295360.0,
+            "4": 1833295360.0,
             "5": 1833295360.0,
+            "6": 1833295360.0,
+            "7": 1833295360.0,
+            "8": 1833295360.0,
+            "9": 1833295360.0,
             "10": 1833295360.0,
+            "11": 1833295360.0,
+            "12": 1833295360.0,
+            "13": 1833295360.0,
+            "14": 1833295360.0,
             "15": 1833295360.0,
+            "16": 1833295360.0,
+            "17": 1833295360.0,
+            "18": 1833295360.0,
+            "19": 1833295360.0,
             "20": 1833295360.0,
+            "21": 1833295360.0,
+            "22": 1833295360.0,
+            "23": 1833295360.0,
+            "24": 1833295360.0,
             "25": 1833295360.0,
+            "26": 1833295360.0,
+            "27": 1833295360.0,
+            "28": 1833295360.0,
+            "29": 1833295360.0,
             "30": 1833295360.0,
+            "31": 1833295360.0,
+            "32": 1833295360.0,
+            "33": 1833295360.0,
+            "34": 1833295360.0,
             "35": 1833295360.0,
+            "36": 1833295360.0,
+            "37": 1833295360.0,
+            "38": 1833295360.0,
+            "39": 1833295360.0,
             "40": 1833295360.0,
+            "41": 1833295360.0,
+            "42": 1833295360.0,
+            "43": 1833295360.0,
+            "44": 1833295360.0,
             "45": 1833295360.0,
+            "46": 1833295360.0,
+            "47": 1833295360.0,
+            "48": 1833295360.0,
+            "49": 1833295360.0,
             "50": 1833295360.0,
+            "51": 1833295360.0,
+            "52": 1833295360.0,
+            "53": 1833295360.0,
+            "54": 1833295360.0,
             "55": 1833295360.0,
+            "56": 1833295360.0,
+            "57": 1833295360.0,
+            "58": 1833295360.0,
+            "59": 1833295360.0,
             "60": 1833295360.0,
+            "61": 1833295360.0,
+            "62": 1833295360.0,
+            "63": 1833295360.0,
+            "64": 1833295360.0,
             "65": 1833295360.0,
+            "66": 1833295360.0,
+            "67": 1833295360.0,
+            "68": 1833295360.0,
+            "69": 1833295360.0,
             "70": 1833295360.0,
+            "71": 1833295360.0,
+            "72": 1833295360.0,
+            "73": 1833295360.0,
+            "74": 1833295360.0,
             "75": 1833295360.0,
+            "76": 1833295360.0,
+            "77": 1833295360.0,
+            "78": 1833295360.0,
+            "79": 1833295360.0,
             "80": 1833295360.0,
+            "81": 1833295360.0,
+            "82": 1833295360.0,
+            "83": 1833295360.0,
+            "84": 1833295360.0,
             "85": 1833295360.0,
+            "86": 1833295360.0,
+            "87": 1833295360.0,
+            "88": 1833295360.0,
+            "89": 1833295360.0,
             "90": 1833295360.0,
+            "91": 1833295360.0,
+            "92": 1833295360.0,
+            "93": 1833295360.0,
+            "94": 1833295360.0,
             "95": 1833295360.0,
+            "96": 1833295360.0,
+            "97": 1833295360.0,
+            "98": 1833295360.0,
+            "99": 1833295360.0,
             "100": 1833295360.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 8.34115,
-            "5": 0.45893,
-            "10": 0.45098,
-            "15": 0.46238,
-            "20": 0.44885,
-            "25": 0.4602,
-            "30": 0.44717,
-            "35": 0.45167,
-            "40": 0.46266,
-            "45": 0.44352,
-            "50": 0.78806,
-            "55": 0.46254,
-            "60": 0.45899,
-            "65": 0.47177,
-            "70": 0.44807,
-            "75": 0.44966,
-            "80": 0.44473,
-            "85": 0.45029,
-            "90": 0.48553,
-            "95": 0.4471,
-            "100": 0.46649
+            "1": 9.43749,
+            "2": 0.56177,
+            "3": 0.54092,
+            "4": 0.53069,
+            "5": 0.54015,
+            "6": 0.52654,
+            "7": 0.52537,
+            "8": 0.529,
+            "9": 0.52024,
+            "10": 0.54001,
+            "11": 0.52228,
+            "12": 0.52764,
+            "13": 0.52112,
+            "14": 0.52842,
+            "15": 0.53159,
+            "16": 0.52768,
+            "17": 0.53602,
+            "18": 0.52711,
+            "19": 0.5217,
+            "20": 0.53787,
+            "21": 0.52947,
+            "22": 0.52812,
+            "23": 0.522,
+            "24": 0.525,
+            "25": 0.5262,
+            "26": 0.5262,
+            "27": 0.52831,
+            "28": 0.5236,
+            "29": 0.54456,
+            "30": 0.51906,
+            "31": 0.52674,
+            "32": 0.52164,
+            "33": 0.5315,
+            "34": 0.52077,
+            "35": 0.53196,
+            "36": 0.52142,
+            "37": 0.52841,
+            "38": 0.52733,
+            "39": 0.52595,
+            "40": 0.52329,
+            "41": 0.52463,
+            "42": 0.52373,
+            "43": 0.5242,
+            "44": 0.53002,
+            "45": 0.52375,
+            "46": 0.52927,
+            "47": 0.52485,
+            "48": 0.54174,
+            "49": 0.52535,
+            "50": 0.52504,
+            "51": 0.53766,
+            "52": 0.52768,
+            "53": 0.52759,
+            "54": 0.52754,
+            "55": 0.53938,
+            "56": 0.53362,
+            "57": 0.53077,
+            "58": 0.52676,
+            "59": 0.53132,
+            "60": 0.52333,
+            "61": 0.52796,
+            "62": 0.53758,
+            "63": 0.53371,
+            "64": 0.52937,
+            "65": 0.53002,
+            "66": 0.53001,
+            "67": 0.52768,
+            "68": 0.52999,
+            "69": 0.52873,
+            "70": 0.54329,
+            "71": 0.52577,
+            "72": 0.53281,
+            "73": 0.52373,
+            "74": 0.53896,
+            "75": 0.53536,
+            "76": 0.52444,
+            "77": 0.53551,
+            "78": 0.55804,
+            "79": 0.55697,
+            "80": 0.53175,
+            "81": 0.53929,
+            "82": 0.52759,
+            "83": 0.53135,
+            "84": 0.53043,
+            "85": 0.53678,
+            "86": 0.58197,
+            "87": 0.54322,
+            "88": 0.52771,
+            "89": 0.88532,
+            "90": 0.5352,
+            "91": 0.5432,
+            "92": 0.53256,
+            "93": 0.53,
+            "94": 0.53231,
+            "95": 0.53588,
+            "96": 0.5246,
+            "97": 0.53401,
+            "98": 0.53042,
+            "99": 0.53172,
+            "100": 0.52281
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..7f620001acb
--- /dev/null
+++ b/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.34897,
+            "2": 10.34482,
+            "3": 9.79428,
+            "4": 9.59585,
+            "5": 9.42074,
+            "6": 9.41847,
+            "7": 9.28062,
+            "8": 9.18972,
+            "9": 9.06519,
+            "10": 9.00183,
+            "11": 8.81475,
+            "12": 8.7808,
+            "13": 8.82493,
+            "14": 8.67261,
+            "15": 8.6364,
+            "16": 8.51896,
+            "17": 8.45704,
+            "18": 8.37007,
+            "19": 8.36039,
+            "20": 8.25417,
+            "21": 8.2421,
+            "22": 8.13324,
+            "23": 8.06764,
+            "24": 8.1142,
+            "25": 7.95082,
+            "26": 8.08156,
+            "27": 7.86764,
+            "28": 7.93993,
+            "29": 7.77566,
+            "30": 7.84559,
+            "31": 7.8152,
+            "32": 7.65941,
+            "33": 7.77856,
+            "34": 7.53188,
+            "35": 7.65804,
+            "36": 7.51464,
+            "37": 7.44686,
+            "38": 7.48161,
+            "39": 7.46435,
+            "40": 7.49084,
+            "41": 7.40827,
+            "42": 7.35625,
+            "43": 7.43764,
+            "44": 7.35439,
+            "45": 7.35042,
+            "46": 7.27853,
+            "47": 7.4405,
+            "48": 7.26763,
+            "49": 7.32341,
+            "50": 7.14486,
+            "51": 7.36469,
+            "52": 7.12044,
+            "53": 7.09167,
+            "54": 7.22712,
+            "55": 7.13495,
+            "56": 7.20751,
+            "57": 7.31287,
+            "58": 6.99063,
+            "59": 7.09849,
+            "60": 7.12665,
+            "61": 7.10047,
+            "62": 7.23974,
+            "63": 7.14358,
+            "64": 7.06717,
+            "65": 6.98408,
+            "66": 7.03692,
+            "67": 7.02875,
+            "68": 7.12914,
+            "69": 7.01425,
+            "70": 7.04954,
+            "71": 6.89312,
+            "72": 6.98513,
+            "73": 6.96734,
+            "74": 6.90236,
+            "75": 7.05611,
+            "76": 6.95986,
+            "77": 7.06862,
+            "78": 7.0204,
+            "79": 6.8505,
+            "80": 6.92019,
+            "81": 6.95982,
+            "82": 7.04575,
+            "83": 6.98617,
+            "84": 6.99991,
+            "85": 6.83511,
+            "86": 7.04087,
+            "87": 6.96604,
+            "88": 6.90125,
+            "89": 6.80345,
+            "90": 7.22384,
+            "91": 6.70505,
+            "92": 7.03979,
+            "93": 6.8857,
+            "94": 7.04044,
+            "95": 6.84746,
+            "96": 6.9546,
+            "97": 6.94425,
+            "98": 6.86865,
+            "99": 6.9948,
+            "100": 6.96761
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 43289.0,
+            "2": 44062.0,
+            "3": 44747.0,
+            "4": 42377.0,
+            "5": 45372.0,
+            "6": 40957.0,
+            "7": 43147.0,
+            "8": 45474.0,
+            "9": 42425.0,
+            "10": 45380.0,
+            "11": 43984.0,
+            "12": 44594.0,
+            "13": 43914.0,
+            "14": 46203.0,
+            "15": 43914.0,
+            "16": 41632.0,
+            "17": 43870.0,
+            "18": 44691.0,
+            "19": 42574.0,
+            "20": 44769.0,
+            "21": 44757.0,
+            "22": 41854.0,
+            "23": 45440.0,
+            "24": 43066.0,
+            "25": 42458.0,
+            "26": 43949.0,
+            "27": 46224.0,
+            "28": 46395.0,
+            "29": 46168.0,
+            "30": 44028.0,
+            "31": 41131.0,
+            "32": 43348.0,
+            "33": 45441.0,
+            "34": 43316.0,
+            "35": 43258.0,
+            "36": 42459.0,
+            "37": 40074.0,
+            "38": 42544.0,
+            "39": 44707.0,
+            "40": 43237.0,
+            "41": 44652.0,
+            "42": 43196.0,
+            "43": 45435.0,
+            "44": 44591.0,
+            "45": 43263.0,
+            "46": 43930.0,
+            "47": 42373.0,
+            "48": 44713.0,
+            "49": 43128.0,
+            "50": 43361.0,
+            "51": 41133.0,
+            "52": 43849.0,
+            "53": 43899.0,
+            "54": 41704.0,
+            "55": 43863.0,
+            "56": 43205.0,
+            "57": 42636.0,
+            "58": 43835.0,
+            "59": 44623.0,
+            "60": 41226.0,
+            "61": 39705.0,
+            "62": 44732.0,
+            "63": 44659.0,
+            "64": 45371.0,
+            "65": 44682.0,
+            "66": 45341.0,
+            "67": 43169.0,
+            "68": 42486.0,
+            "69": 43829.0,
+            "70": 45529.0,
+            "71": 43294.0,
+            "72": 44745.0,
+            "73": 45364.0,
+            "74": 42463.0,
+            "75": 44679.0,
+            "76": 43882.0,
+            "77": 42042.0,
+            "78": 40356.0,
+            "79": 38928.0,
+            "80": 41079.0,
+            "81": 45349.0,
+            "82": 43226.0,
+            "83": 38474.0,
+            "84": 42415.0,
+            "85": 43989.0,
+            "86": 45673.0,
+            "87": 40850.0,
+            "88": 41756.0,
+            "89": 41065.0,
+            "90": 44686.0,
+            "91": 46135.0,
+            "92": 41609.0,
+            "93": 43267.0,
+            "94": 39525.0,
+            "95": 43921.0,
+            "96": 44683.0,
+            "97": 45412.0,
+            "98": 41832.0,
+            "99": 45416.0,
+            "100": 42457.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1104069120.0,
+            "2": 1104069120.0,
+            "3": 1104069120.0,
+            "4": 1104069120.0,
+            "5": 1104069120.0,
+            "6": 1104069120.0,
+            "7": 1104069120.0,
+            "8": 1104069120.0,
+            "9": 1104069120.0,
+            "10": 1104069120.0,
+            "11": 1104069120.0,
+            "12": 1104069120.0,
+            "13": 1104069120.0,
+            "14": 1104069120.0,
+            "15": 1104069120.0,
+            "16": 1104069120.0,
+            "17": 1104069120.0,
+            "18": 1104069120.0,
+            "19": 1104069120.0,
+            "20": 1104069120.0,
+            "21": 1104069120.0,
+            "22": 1104069120.0,
+            "23": 1104069120.0,
+            "24": 1104069120.0,
+            "25": 1104069120.0,
+            "26": 1104069120.0,
+            "27": 1104069120.0,
+            "28": 1104069120.0,
+            "29": 1104069120.0,
+            "30": 1104069120.0,
+            "31": 1104069120.0,
+            "32": 1104069120.0,
+            "33": 1104069120.0,
+            "34": 1104069120.0,
+            "35": 1104069120.0,
+            "36": 1104069120.0,
+            "37": 1104069120.0,
+            "38": 1104069120.0,
+            "39": 1104069120.0,
+            "40": 1104069120.0,
+            "41": 1104069120.0,
+            "42": 1104069120.0,
+            "43": 1104069120.0,
+            "44": 1104069120.0,
+            "45": 1104069120.0,
+            "46": 1104069120.0,
+            "47": 1104069120.0,
+            "48": 1104069120.0,
+            "49": 1104069120.0,
+            "50": 1104069120.0,
+            "51": 1104069120.0,
+            "52": 1104069120.0,
+            "53": 1104069120.0,
+            "54": 1104069120.0,
+            "55": 1104069120.0,
+            "56": 1104069120.0,
+            "57": 1104069120.0,
+            "58": 1104069120.0,
+            "59": 1104069120.0,
+            "60": 1104069120.0,
+            "61": 1104069120.0,
+            "62": 1104069120.0,
+            "63": 1104069120.0,
+            "64": 1104069120.0,
+            "65": 1104069120.0,
+            "66": 1104069120.0,
+            "67": 1104069120.0,
+            "68": 1104069120.0,
+            "69": 1104069120.0,
+            "70": 1104069120.0,
+            "71": 1104069120.0,
+            "72": 1104069120.0,
+            "73": 1104069120.0,
+            "74": 1104069120.0,
+            "75": 1104069120.0,
+            "76": 1104069120.0,
+            "77": 1104069120.0,
+            "78": 1104069120.0,
+            "79": 1104069120.0,
+            "80": 1104069120.0,
+            "81": 1104069120.0,
+            "82": 1104069120.0,
+            "83": 1104069120.0,
+            "84": 1104069120.0,
+            "85": 1104069120.0,
+            "86": 1104069120.0,
+            "87": 1104069120.0,
+            "88": 1104069120.0,
+            "89": 1104069120.0,
+            "90": 1104069120.0,
+            "91": 1104069120.0,
+            "92": 1104069120.0,
+            "93": 1104069120.0,
+            "94": 1104069120.0,
+            "95": 1104069120.0,
+            "96": 1104069120.0,
+            "97": 1104069120.0,
+            "98": 1104069120.0,
+            "99": 1104069120.0,
+            "100": 1104069120.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1368630784.0,
+            "2": 1833295360.0,
+            "3": 1833295360.0,
+            "4": 1833295360.0,
+            "5": 1833295360.0,
+            "6": 1833295360.0,
+            "7": 1833295360.0,
+            "8": 1833295360.0,
+            "9": 1833295360.0,
+            "10": 1833295360.0,
+            "11": 1833295360.0,
+            "12": 1833295360.0,
+            "13": 1833295360.0,
+            "14": 1833295360.0,
+            "15": 1833295360.0,
+            "16": 1833295360.0,
+            "17": 1833295360.0,
+            "18": 1833295360.0,
+            "19": 1833295360.0,
+            "20": 1833295360.0,
+            "21": 1833295360.0,
+            "22": 1833295360.0,
+            "23": 1833295360.0,
+            "24": 1833295360.0,
+            "25": 1833295360.0,
+            "26": 1833295360.0,
+            "27": 1833295360.0,
+            "28": 1833295360.0,
+            "29": 1833295360.0,
+            "30": 1833295360.0,
+            "31": 1833295360.0,
+            "32": 1833295360.0,
+            "33": 1833295360.0,
+            "34": 1833295360.0,
+            "35": 1833295360.0,
+            "36": 1833295360.0,
+            "37": 1833295360.0,
+            "38": 1833295360.0,
+            "39": 1833295360.0,
+            "40": 1833295360.0,
+            "41": 1833295360.0,
+            "42": 1833295360.0,
+            "43": 1833295360.0,
+            "44": 1833295360.0,
+            "45": 1833295360.0,
+            "46": 1833295360.0,
+            "47": 1833295360.0,
+            "48": 1833295360.0,
+            "49": 1833295360.0,
+            "50": 1833295360.0,
+            "51": 1833295360.0,
+            "52": 1833295360.0,
+            "53": 1833295360.0,
+            "54": 1833295360.0,
+            "55": 1833295360.0,
+            "56": 1833295360.0,
+            "57": 1833295360.0,
+            "58": 1833295360.0,
+            "59": 1833295360.0,
+            "60": 1833295360.0,
+            "61": 1833295360.0,
+            "62": 1833295360.0,
+            "63": 1833295360.0,
+            "64": 1833295360.0,
+            "65": 1833295360.0,
+            "66": 1833295360.0,
+            "67": 1833295360.0,
+            "68": 1833295360.0,
+            "69": 1833295360.0,
+            "70": 1833295360.0,
+            "71": 1833295360.0,
+            "72": 1833295360.0,
+            "73": 1833295360.0,
+            "74": 1833295360.0,
+            "75": 1833295360.0,
+            "76": 1833295360.0,
+            "77": 1833295360.0,
+            "78": 1833295360.0,
+            "79": 1833295360.0,
+            "80": 1833295360.0,
+            "81": 1833295360.0,
+            "82": 1833295360.0,
+            "83": 1833295360.0,
+            "84": 1833295360.0,
+            "85": 1833295360.0,
+            "86": 1833295360.0,
+            "87": 1833295360.0,
+            "88": 1833295360.0,
+            "89": 1833295360.0,
+            "90": 1833295360.0,
+            "91": 1833295360.0,
+            "92": 1833295360.0,
+            "93": 1833295360.0,
+            "94": 1833295360.0,
+            "95": 1833295360.0,
+            "96": 1833295360.0,
+            "97": 1833295360.0,
+            "98": 1833295360.0,
+            "99": 1833295360.0,
+            "100": 1833295360.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 8.41131,
+            "2": 0.5911,
+            "3": 0.46668,
+            "4": 0.46572,
+            "5": 0.48182,
+            "6": 0.47419,
+            "7": 0.45962,
+            "8": 0.46076,
+            "9": 0.46022,
+            "10": 0.46056,
+            "11": 0.45992,
+            "12": 0.46724,
+            "13": 0.46712,
+            "14": 0.46827,
+            "15": 0.4727,
+            "16": 0.49253,
+            "17": 0.47082,
+            "18": 0.47424,
+            "19": 0.46849,
+            "20": 0.45979,
+            "21": 0.47104,
+            "22": 0.46485,
+            "23": 0.46326,
+            "24": 0.47218,
+            "25": 0.46353,
+            "26": 0.46063,
+            "27": 0.45609,
+            "28": 0.4748,
+            "29": 0.45917,
+            "30": 0.46344,
+            "31": 0.45858,
+            "32": 0.46504,
+            "33": 0.46109,
+            "34": 0.46003,
+            "35": 0.46415,
+            "36": 0.466,
+            "37": 0.46298,
+            "38": 0.46081,
+            "39": 0.46051,
+            "40": 0.46065,
+            "41": 0.46838,
+            "42": 0.49321,
+            "43": 0.47091,
+            "44": 0.46781,
+            "45": 0.45909,
+            "46": 0.4623,
+            "47": 0.46684,
+            "48": 0.46817,
+            "49": 0.47488,
+            "50": 0.46159,
+            "51": 0.4696,
+            "52": 0.46902,
+            "53": 0.46394,
+            "54": 0.46398,
+            "55": 0.48419,
+            "56": 0.48174,
+            "57": 0.46979,
+            "58": 0.46441,
+            "59": 0.46756,
+            "60": 0.45954,
+            "61": 0.46551,
+            "62": 0.46355,
+            "63": 0.4631,
+            "64": 0.46313,
+            "65": 0.47693,
+            "66": 0.46943,
+            "67": 0.45954,
+            "68": 0.46555,
+            "69": 0.46002,
+            "70": 0.47351,
+            "71": 0.46163,
+            "72": 0.46815,
+            "73": 0.46171,
+            "74": 0.46772,
+            "75": 0.75351,
+            "76": 0.46342,
+            "77": 0.47886,
+            "78": 0.47771,
+            "79": 0.47646,
+            "80": 0.47943,
+            "81": 0.47905,
+            "82": 0.47,
+            "83": 0.46092,
+            "84": 1.47835,
+            "85": 0.47794,
+            "86": 0.97054,
+            "87": 3.1063,
+            "88": 0.466,
+            "89": 1.9497,
+            "90": 0.4647,
+            "91": 0.47038,
+            "92": 0.46503,
+            "93": 0.47547,
+            "94": 0.48315,
+            "95": 0.48851,
+            "96": 0.50856,
+            "97": 0.49788,
+            "98": 0.48078,
+            "99": 0.5127,
+            "100": 0.46344
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..b9a799c779f
--- /dev/null
+++ b/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.34897,
+            "2": 10.34482,
+            "3": 9.79428,
+            "4": 9.59585,
+            "5": 9.42074,
+            "6": 9.41847,
+            "7": 9.28062,
+            "8": 9.18972,
+            "9": 9.06519,
+            "10": 9.00183,
+            "11": 8.81475,
+            "12": 8.7808,
+            "13": 8.82493,
+            "14": 8.67261,
+            "15": 8.6364,
+            "16": 8.51896,
+            "17": 8.45704,
+            "18": 8.37007,
+            "19": 8.36039,
+            "20": 8.25417,
+            "21": 8.2421,
+            "22": 8.13324,
+            "23": 8.06764,
+            "24": 8.1142,
+            "25": 7.95082,
+            "26": 8.08156,
+            "27": 7.86764,
+            "28": 7.93993,
+            "29": 7.77566,
+            "30": 7.84559,
+            "31": 7.8152,
+            "32": 7.65941,
+            "33": 7.77856,
+            "34": 7.53188,
+            "35": 7.65804,
+            "36": 7.51464,
+            "37": 7.44686,
+            "38": 7.48161,
+            "39": 7.46435,
+            "40": 7.49084,
+            "41": 7.40827,
+            "42": 7.35625,
+            "43": 7.43764,
+            "44": 7.35439,
+            "45": 7.35042,
+            "46": 7.27853,
+            "47": 7.4405,
+            "48": 7.26763,
+            "49": 7.32341,
+            "50": 7.14486,
+            "51": 7.36469,
+            "52": 7.12044,
+            "53": 7.09167,
+            "54": 7.22712,
+            "55": 7.13495,
+            "56": 7.20751,
+            "57": 7.31287,
+            "58": 6.99063,
+            "59": 7.09849,
+            "60": 7.12665,
+            "61": 7.10047,
+            "62": 7.23974,
+            "63": 7.14358,
+            "64": 7.06717,
+            "65": 6.98408,
+            "66": 7.03692,
+            "67": 7.02875,
+            "68": 7.12914,
+            "69": 7.01425,
+            "70": 7.04954,
+            "71": 6.89312,
+            "72": 6.98513,
+            "73": 6.96734,
+            "74": 6.90236,
+            "75": 7.05611,
+            "76": 6.95986,
+            "77": 7.06862,
+            "78": 7.0204,
+            "79": 6.8505,
+            "80": 6.92019,
+            "81": 6.95982,
+            "82": 7.04575,
+            "83": 6.98617,
+            "84": 6.99991,
+            "85": 6.83511,
+            "86": 7.04087,
+            "87": 6.96604,
+            "88": 6.90125,
+            "89": 6.80345,
+            "90": 7.22384,
+            "91": 6.70505,
+            "92": 7.03979,
+            "93": 6.8857,
+            "94": 7.04044,
+            "95": 6.84746,
+            "96": 6.9546,
+            "97": 6.94425,
+            "98": 6.86865,
+            "99": 6.9948,
+            "100": 6.96761
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 43289.0,
+            "2": 44062.0,
+            "3": 44747.0,
+            "4": 42377.0,
+            "5": 45372.0,
+            "6": 40957.0,
+            "7": 43147.0,
+            "8": 45474.0,
+            "9": 42425.0,
+            "10": 45380.0,
+            "11": 43984.0,
+            "12": 44594.0,
+            "13": 43914.0,
+            "14": 46203.0,
+            "15": 43914.0,
+            "16": 41632.0,
+            "17": 43870.0,
+            "18": 44691.0,
+            "19": 42574.0,
+            "20": 44769.0,
+            "21": 44757.0,
+            "22": 41854.0,
+            "23": 45440.0,
+            "24": 43066.0,
+            "25": 42458.0,
+            "26": 43949.0,
+            "27": 46224.0,
+            "28": 46395.0,
+            "29": 46168.0,
+            "30": 44028.0,
+            "31": 41131.0,
+            "32": 43348.0,
+            "33": 45441.0,
+            "34": 43316.0,
+            "35": 43258.0,
+            "36": 42459.0,
+            "37": 40074.0,
+            "38": 42544.0,
+            "39": 44707.0,
+            "40": 43237.0,
+            "41": 44652.0,
+            "42": 43196.0,
+            "43": 45435.0,
+            "44": 44591.0,
+            "45": 43263.0,
+            "46": 43930.0,
+            "47": 42373.0,
+            "48": 44713.0,
+            "49": 43128.0,
+            "50": 43361.0,
+            "51": 41133.0,
+            "52": 43849.0,
+            "53": 43899.0,
+            "54": 41704.0,
+            "55": 43863.0,
+            "56": 43205.0,
+            "57": 42636.0,
+            "58": 43835.0,
+            "59": 44623.0,
+            "60": 41226.0,
+            "61": 39705.0,
+            "62": 44732.0,
+            "63": 44659.0,
+            "64": 45371.0,
+            "65": 44682.0,
+            "66": 45341.0,
+            "67": 43169.0,
+            "68": 42486.0,
+            "69": 43829.0,
+            "70": 45529.0,
+            "71": 43294.0,
+            "72": 44745.0,
+            "73": 45364.0,
+            "74": 42463.0,
+            "75": 44679.0,
+            "76": 43882.0,
+            "77": 42042.0,
+            "78": 40356.0,
+            "79": 38928.0,
+            "80": 41079.0,
+            "81": 45349.0,
+            "82": 43226.0,
+            "83": 38474.0,
+            "84": 42415.0,
+            "85": 43989.0,
+            "86": 45673.0,
+            "87": 40850.0,
+            "88": 41756.0,
+            "89": 41065.0,
+            "90": 44686.0,
+            "91": 46135.0,
+            "92": 41609.0,
+            "93": 43267.0,
+            "94": 39525.0,
+            "95": 43921.0,
+            "96": 44683.0,
+            "97": 45412.0,
+            "98": 41832.0,
+            "99": 45416.0,
+            "100": 42457.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1104069120.0,
+            "2": 1104069120.0,
+            "3": 1104069120.0,
+            "4": 1104069120.0,
+            "5": 1104069120.0,
+            "6": 1104069120.0,
+            "7": 1104069120.0,
+            "8": 1104069120.0,
+            "9": 1104069120.0,
+            "10": 1104069120.0,
+            "11": 1104069120.0,
+            "12": 1104069120.0,
+            "13": 1104069120.0,
+            "14": 1104069120.0,
+            "15": 1104069120.0,
+            "16": 1104069120.0,
+            "17": 1104069120.0,
+            "18": 1104069120.0,
+            "19": 1104069120.0,
+            "20": 1104069120.0,
+            "21": 1104069120.0,
+            "22": 1104069120.0,
+            "23": 1104069120.0,
+            "24": 1104069120.0,
+            "25": 1104069120.0,
+            "26": 1104069120.0,
+            "27": 1104069120.0,
+            "28": 1104069120.0,
+            "29": 1104069120.0,
+            "30": 1104069120.0,
+            "31": 1104069120.0,
+            "32": 1104069120.0,
+            "33": 1104069120.0,
+            "34": 1104069120.0,
+            "35": 1104069120.0,
+            "36": 1104069120.0,
+            "37": 1104069120.0,
+            "38": 1104069120.0,
+            "39": 1104069120.0,
+            "40": 1104069120.0,
+            "41": 1104069120.0,
+            "42": 1104069120.0,
+            "43": 1104069120.0,
+            "44": 1104069120.0,
+            "45": 1104069120.0,
+            "46": 1104069120.0,
+            "47": 1104069120.0,
+            "48": 1104069120.0,
+            "49": 1104069120.0,
+            "50": 1104069120.0,
+            "51": 1104069120.0,
+            "52": 1104069120.0,
+            "53": 1104069120.0,
+            "54": 1104069120.0,
+            "55": 1104069120.0,
+            "56": 1104069120.0,
+            "57": 1104069120.0,
+            "58": 1104069120.0,
+            "59": 1104069120.0,
+            "60": 1104069120.0,
+            "61": 1104069120.0,
+            "62": 1104069120.0,
+            "63": 1104069120.0,
+            "64": 1104069120.0,
+            "65": 1104069120.0,
+            "66": 1104069120.0,
+            "67": 1104069120.0,
+            "68": 1104069120.0,
+            "69": 1104069120.0,
+            "70": 1104069120.0,
+            "71": 1104069120.0,
+            "72": 1104069120.0,
+            "73": 1104069120.0,
+            "74": 1104069120.0,
+            "75": 1104069120.0,
+            "76": 1104069120.0,
+            "77": 1104069120.0,
+            "78": 1104069120.0,
+            "79": 1104069120.0,
+            "80": 1104069120.0,
+            "81": 1104069120.0,
+            "82": 1104069120.0,
+            "83": 1104069120.0,
+            "84": 1104069120.0,
+            "85": 1104069120.0,
+            "86": 1104069120.0,
+            "87": 1104069120.0,
+            "88": 1104069120.0,
+            "89": 1104069120.0,
+            "90": 1104069120.0,
+            "91": 1104069120.0,
+            "92": 1104069120.0,
+            "93": 1104069120.0,
+            "94": 1104069120.0,
+            "95": 1104069120.0,
+            "96": 1104069120.0,
+            "97": 1104069120.0,
+            "98": 1104069120.0,
+            "99": 1104069120.0,
+            "100": 1104069120.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1368630784.0,
+            "2": 1833295360.0,
+            "3": 1833295360.0,
+            "4": 1833295360.0,
+            "5": 1833295360.0,
+            "6": 1833295360.0,
+            "7": 1833295360.0,
+            "8": 1833295360.0,
+            "9": 1833295360.0,
+            "10": 1833295360.0,
+            "11": 1833295360.0,
+            "12": 1833295360.0,
+            "13": 1833295360.0,
+            "14": 1833295360.0,
+            "15": 1833295360.0,
+            "16": 1833295360.0,
+            "17": 1833295360.0,
+            "18": 1833295360.0,
+            "19": 1833295360.0,
+            "20": 1833295360.0,
+            "21": 1833295360.0,
+            "22": 1833295360.0,
+            "23": 1833295360.0,
+            "24": 1833295360.0,
+            "25": 1833295360.0,
+            "26": 1833295360.0,
+            "27": 1833295360.0,
+            "28": 1833295360.0,
+            "29": 1833295360.0,
+            "30": 1833295360.0,
+            "31": 1833295360.0,
+            "32": 1833295360.0,
+            "33": 1833295360.0,
+            "34": 1833295360.0,
+            "35": 1833295360.0,
+            "36": 1833295360.0,
+            "37": 1833295360.0,
+            "38": 1833295360.0,
+            "39": 1833295360.0,
+            "40": 1833295360.0,
+            "41": 1833295360.0,
+            "42": 1833295360.0,
+            "43": 1833295360.0,
+            "44": 1833295360.0,
+            "45": 1833295360.0,
+            "46": 1833295360.0,
+            "47": 1833295360.0,
+            "48": 1833295360.0,
+            "49": 1833295360.0,
+            "50": 1833295360.0,
+            "51": 1833295360.0,
+            "52": 1833295360.0,
+            "53": 1833295360.0,
+            "54": 1833295360.0,
+            "55": 1833295360.0,
+            "56": 1833295360.0,
+            "57": 1833295360.0,
+            "58": 1833295360.0,
+            "59": 1833295360.0,
+            "60": 1833295360.0,
+            "61": 1833295360.0,
+            "62": 1833295360.0,
+            "63": 1833295360.0,
+            "64": 1833295360.0,
+            "65": 1833295360.0,
+            "66": 1833295360.0,
+            "67": 1833295360.0,
+            "68": 1833295360.0,
+            "69": 1833295360.0,
+            "70": 1833295360.0,
+            "71": 1833295360.0,
+            "72": 1833295360.0,
+            "73": 1833295360.0,
+            "74": 1833295360.0,
+            "75": 1833295360.0,
+            "76": 1833295360.0,
+            "77": 1833295360.0,
+            "78": 1833295360.0,
+            "79": 1833295360.0,
+            "80": 1833295360.0,
+            "81": 1833295360.0,
+            "82": 1833295360.0,
+            "83": 1833295360.0,
+            "84": 1833295360.0,
+            "85": 1833295360.0,
+            "86": 1833295360.0,
+            "87": 1833295360.0,
+            "88": 1833295360.0,
+            "89": 1833295360.0,
+            "90": 1833295360.0,
+            "91": 1833295360.0,
+            "92": 1833295360.0,
+            "93": 1833295360.0,
+            "94": 1833295360.0,
+            "95": 1833295360.0,
+            "96": 1833295360.0,
+            "97": 1833295360.0,
+            "98": 1833295360.0,
+            "99": 1833295360.0,
+            "100": 1833295360.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 9.38956,
+            "2": 0.54892,
+            "3": 0.53756,
+            "4": 0.52845,
+            "5": 0.52687,
+            "6": 0.51818,
+            "7": 0.52819,
+            "8": 0.52051,
+            "9": 0.52526,
+            "10": 0.52865,
+            "11": 0.52834,
+            "12": 0.52573,
+            "13": 0.52783,
+            "14": 0.52938,
+            "15": 0.51899,
+            "16": 0.53517,
+            "17": 0.52289,
+            "18": 0.5363,
+            "19": 0.5954,
+            "20": 0.55838,
+            "21": 0.52166,
+            "22": 0.54146,
+            "23": 0.53649,
+            "24": 0.52785,
+            "25": 0.52349,
+            "26": 0.52481,
+            "27": 0.52376,
+            "28": 0.52226,
+            "29": 0.5291,
+            "30": 0.52613,
+            "31": 0.52719,
+            "32": 0.52341,
+            "33": 0.52646,
+            "34": 0.52272,
+            "35": 0.53016,
+            "36": 0.51941,
+            "37": 0.52643,
+            "38": 0.51914,
+            "39": 0.53109,
+            "40": 0.52353,
+            "41": 0.55102,
+            "42": 0.52656,
+            "43": 0.53223,
+            "44": 0.53438,
+            "45": 0.53126,
+            "46": 0.53776,
+            "47": 0.52511,
+            "48": 0.53521,
+            "49": 0.52743,
+            "50": 0.52883,
+            "51": 0.54078,
+            "52": 0.52088,
+            "53": 0.53221,
+            "54": 0.52473,
+            "55": 0.54396,
+            "56": 0.52771,
+            "57": 0.52699,
+            "58": 0.53079,
+            "59": 0.52445,
+            "60": 0.53037,
+            "61": 0.52164,
+            "62": 0.532,
+            "63": 0.52392,
+            "64": 0.53062,
+            "65": 0.52269,
+            "66": 0.53306,
+            "67": 0.5173,
+            "68": 0.54063,
+            "69": 0.52464,
+            "70": 0.92233,
+            "71": 0.53301,
+            "72": 0.52584,
+            "73": 0.55029,
+            "74": 0.54931,
+            "75": 0.54907,
+            "76": 0.53191,
+            "77": 0.53522,
+            "78": 0.53487,
+            "79": 0.52543,
+            "80": 0.53474,
+            "81": 0.52635,
+            "82": 0.54801,
+            "83": 0.52605,
+            "84": 0.53393,
+            "85": 0.52523,
+            "86": 0.53947,
+            "87": 0.52933,
+            "88": 0.53447,
+            "89": 0.53,
+            "90": 0.5287,
+            "91": 0.53326,
+            "92": 0.54604,
+            "93": 0.53649,
+            "94": 0.5297,
+            "95": 0.54163,
+            "96": 0.52549,
+            "97": 0.53256,
+            "98": 0.53104,
+            "99": 0.54062,
+            "100": 0.52332
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_h100.json
index 2a483ef0d3a..9a9cb7962ee 100644
--- a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_h100.json
@@ -2,141 +2,536 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 10.3313,
+            "2": 10.35273,
+            "3": 9.79594,
+            "4": 9.60954,
             "5": 9.42267,
+            "6": 9.45134,
+            "7": 9.34339,
+            "8": 9.27517,
+            "9": 9.09683,
             "10": 9.07209,
+            "11": 8.8835,
+            "12": 8.83706,
+            "13": 8.86832,
+            "14": 8.71037,
             "15": 8.68183,
+            "16": 8.56139,
+            "17": 8.52303,
+            "18": 8.43962,
+            "19": 8.40445,
             "20": 8.29516,
+            "21": 8.27051,
+            "22": 8.17907,
+            "23": 8.12669,
+            "24": 8.14854,
             "25": 7.99081,
+            "26": 8.12208,
+            "27": 7.90451,
+            "28": 7.98651,
+            "29": 7.80842,
             "30": 7.86913,
+            "31": 7.83557,
+            "32": 7.7216,
+            "33": 7.80364,
+            "34": 7.59209,
             "35": 7.68371,
+            "36": 7.53869,
+            "37": 7.47624,
+            "38": 7.51683,
+            "39": 7.49967,
             "40": 7.51717,
+            "41": 7.43167,
+            "42": 7.40089,
+            "43": 7.4492,
+            "44": 7.3892,
             "45": 7.3802,
+            "46": 7.29486,
+            "47": 7.44839,
+            "48": 7.282,
+            "49": 7.34647,
             "50": 7.17125,
+            "51": 7.37351,
+            "52": 7.13362,
+            "53": 7.11248,
+            "54": 7.23395,
             "55": 7.14784,
+            "56": 7.2278,
+            "57": 7.33273,
+            "58": 6.99464,
+            "59": 7.11597,
             "60": 7.13216,
+            "61": 7.10561,
+            "62": 7.26519,
+            "63": 7.14764,
+            "64": 7.08702,
             "65": 6.98658,
+            "66": 7.04733,
+            "67": 7.04745,
+            "68": 7.14076,
+            "69": 7.24347,
             "70": 7.05974,
+            "71": 6.89358,
+            "72": 6.99793,
+            "73": 6.97928,
+            "74": 6.91973,
             "75": 7.05295,
+            "76": 6.96054,
+            "77": 7.07939,
+            "78": 7.0137,
+            "79": 6.88344,
             "80": 6.93032,
+            "81": 6.96568,
+            "82": 7.05273,
+            "83": 6.98785,
+            "84": 7.00434,
             "85": 6.84596,
+            "86": 7.03651,
+            "87": 6.96347,
+            "88": 6.91343,
+            "89": 6.80657,
             "90": 7.23629,
+            "91": 6.70068,
+            "92": 7.05694,
+            "93": 6.89292,
+            "94": 7.05848,
             "95": 6.84802,
+            "96": 6.9679,
+            "97": 6.9429,
+            "98": 6.87432,
+            "99": 7.01828,
             "100": 6.98491
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 43340.0,
+            "2": 44096.0,
+            "3": 44784.0,
+            "4": 42468.0,
             "5": 45416.0,
+            "6": 40967.0,
+            "7": 43183.0,
+            "8": 45463.0,
+            "9": 42562.0,
             "10": 45358.0,
+            "11": 44024.0,
+            "12": 44607.0,
+            "13": 43921.0,
+            "14": 46213.0,
             "15": 43945.0,
+            "16": 41749.0,
+            "17": 43868.0,
+            "18": 44723.0,
+            "19": 42609.0,
             "20": 44784.0,
+            "21": 44794.0,
+            "22": 41882.0,
+            "23": 45474.0,
+            "24": 43082.0,
             "25": 42696.0,
+            "26": 43952.0,
+            "27": 46262.0,
+            "28": 46418.0,
+            "29": 46154.0,
             "30": 44052.0,
+            "31": 41259.0,
+            "32": 43443.0,
+            "33": 45485.0,
+            "34": 43346.0,
             "35": 43279.0,
+            "36": 42498.0,
+            "37": 40653.0,
+            "38": 42538.0,
+            "39": 44772.0,
             "40": 43278.0,
+            "41": 44664.0,
+            "42": 43297.0,
+            "43": 45448.0,
+            "44": 44622.0,
             "45": 43354.0,
+            "46": 43931.0,
+            "47": 42505.0,
+            "48": 44726.0,
+            "49": 43168.0,
             "50": 43402.0,
+            "51": 41200.0,
+            "52": 43884.0,
+            "53": 43946.0,
+            "54": 41916.0,
             "55": 43925.0,
+            "56": 43252.0,
+            "57": 42636.0,
+            "58": 43941.0,
+            "59": 44619.0,
             "60": 41400.0,
+            "61": 39750.0,
+            "62": 44764.0,
+            "63": 44671.0,
+            "64": 45375.0,
             "65": 44753.0,
+            "66": 45404.0,
+            "67": 43154.0,
+            "68": 42551.0,
+            "69": 43844.0,
             "70": 45537.0,
+            "71": 43335.0,
+            "72": 44839.0,
+            "73": 45372.0,
+            "74": 42511.0,
             "75": 44712.0,
+            "76": 43930.0,
+            "77": 42073.0,
+            "78": 40535.0,
+            "79": 38992.0,
             "80": 41092.0,
+            "81": 45382.0,
+            "82": 43275.0,
+            "83": 38475.0,
+            "84": 42418.0,
             "85": 43979.0,
+            "86": 45691.0,
+            "87": 41145.0,
+            "88": 41782.0,
+            "89": 41042.0,
             "90": 44713.0,
+            "91": 46270.0,
+            "92": 41845.0,
+            "93": 43272.0,
+            "94": 39536.0,
             "95": 44085.0,
+            "96": 44689.0,
+            "97": 45411.0,
+            "98": 41858.0,
+            "99": 45575.0,
             "100": 42501.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 4168870400.0,
+            "2": 4168870400.0,
+            "3": 4168870400.0,
+            "4": 4168870400.0,
             "5": 4168870400.0,
+            "6": 4168870400.0,
+            "7": 4168870400.0,
+            "8": 4168870400.0,
+            "9": 4168870400.0,
             "10": 4168870400.0,
+            "11": 4168870400.0,
+            "12": 4168870400.0,
+            "13": 4168870400.0,
+            "14": 4168870400.0,
             "15": 4168870400.0,
+            "16": 4168870400.0,
+            "17": 4168870400.0,
+            "18": 4168870400.0,
+            "19": 4168870400.0,
             "20": 4168870400.0,
+            "21": 4168870400.0,
+            "22": 4168870400.0,
+            "23": 4168870400.0,
+            "24": 4168870400.0,
             "25": 4168870400.0,
+            "26": 4168870400.0,
+            "27": 4168870400.0,
+            "28": 4168870400.0,
+            "29": 4168870400.0,
             "30": 4168870400.0,
+            "31": 4168870400.0,
+            "32": 4168870400.0,
+            "33": 4168870400.0,
+            "34": 4168870400.0,
             "35": 4168870400.0,
+            "36": 4168870400.0,
+            "37": 4168870400.0,
+            "38": 4168870400.0,
+            "39": 4168870400.0,
             "40": 4168870400.0,
+            "41": 4168870400.0,
+            "42": 4168870400.0,
+            "43": 4168870400.0,
+            "44": 4168870400.0,
             "45": 4168870400.0,
+            "46": 4168870400.0,
+            "47": 4168870400.0,
+            "48": 4168870400.0,
+            "49": 4168870400.0,
             "50": 4168870400.0,
+            "51": 4168870400.0,
+            "52": 4168870400.0,
+            "53": 4168870400.0,
+            "54": 4168870400.0,
             "55": 4168870400.0,
+            "56": 4168870400.0,
+            "57": 4168870400.0,
+            "58": 4168870400.0,
+            "59": 4168870400.0,
             "60": 4168870400.0,
+            "61": 4168870400.0,
+            "62": 4168870400.0,
+            "63": 4168870400.0,
+            "64": 4168870400.0,
             "65": 4168870400.0,
+            "66": 4168870400.0,
+            "67": 4168870400.0,
+            "68": 4168870400.0,
+            "69": 4168870400.0,
             "70": 4168870400.0,
+            "71": 4168870400.0,
+            "72": 4168870400.0,
+            "73": 4168870400.0,
+            "74": 4168870400.0,
             "75": 4168870400.0,
+            "76": 4168870400.0,
+            "77": 4168870400.0,
+            "78": 4168870400.0,
+            "79": 4168870400.0,
             "80": 4168870400.0,
+            "81": 4168870400.0,
+            "82": 4168870400.0,
+            "83": 4168870400.0,
+            "84": 4168870400.0,
             "85": 4168870400.0,
+            "86": 4168870400.0,
+            "87": 4168870400.0,
+            "88": 4168870400.0,
+            "89": 4168870400.0,
             "90": 4168870400.0,
+            "91": 4168870400.0,
+            "92": 4168870400.0,
+            "93": 4168870400.0,
+            "94": 4168870400.0,
             "95": 4168870400.0,
+            "96": 4168870400.0,
+            "97": 4168870400.0,
+            "98": 4168870400.0,
+            "99": 4168870400.0,
             "100": 4168870400.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 4375071232.0,
+            "2": 6204402688.0,
+            "3": 6206499840.0,
+            "4": 6206499840.0,
             "5": 6206499840.0,
+            "6": 6206499840.0,
+            "7": 6206499840.0,
+            "8": 6206499840.0,
+            "9": 6206499840.0,
             "10": 6206499840.0,
+            "11": 6206499840.0,
+            "12": 6206499840.0,
+            "13": 6206499840.0,
+            "14": 6206499840.0,
             "15": 6206499840.0,
+            "16": 6206499840.0,
+            "17": 6206499840.0,
+            "18": 6206499840.0,
+            "19": 6206499840.0,
             "20": 6206499840.0,
+            "21": 6206499840.0,
+            "22": 6206499840.0,
+            "23": 6206499840.0,
+            "24": 6206499840.0,
             "25": 6206499840.0,
+            "26": 6206499840.0,
+            "27": 6206499840.0,
+            "28": 6206499840.0,
+            "29": 6206499840.0,
             "30": 6206499840.0,
+            "31": 6206499840.0,
+            "32": 6206499840.0,
+            "33": 6206499840.0,
+            "34": 6206499840.0,
             "35": 6206499840.0,
+            "36": 6206499840.0,
+            "37": 6206499840.0,
+            "38": 6206499840.0,
+            "39": 6206499840.0,
             "40": 6206499840.0,
+            "41": 6206499840.0,
+            "42": 6206499840.0,
+            "43": 6206499840.0,
+            "44": 6206499840.0,
             "45": 6206499840.0,
+            "46": 6206499840.0,
+            "47": 6206499840.0,
+            "48": 6206499840.0,
+            "49": 6206499840.0,
             "50": 6206499840.0,
+            "51": 6206499840.0,
+            "52": 6206499840.0,
+            "53": 6206499840.0,
+            "54": 6206499840.0,
             "55": 6206499840.0,
+            "56": 6206499840.0,
+            "57": 6206499840.0,
+            "58": 6206499840.0,
+            "59": 6206499840.0,
             "60": 6206499840.0,
+            "61": 6206499840.0,
+            "62": 6206499840.0,
+            "63": 6206499840.0,
+            "64": 6206499840.0,
             "65": 6206499840.0,
+            "66": 6206499840.0,
+            "67": 6206499840.0,
+            "68": 6206499840.0,
+            "69": 6206499840.0,
             "70": 6206499840.0,
+            "71": 6206499840.0,
+            "72": 6206499840.0,
+            "73": 6206499840.0,
+            "74": 6206499840.0,
             "75": 6206499840.0,
+            "76": 6206499840.0,
+            "77": 6206499840.0,
+            "78": 6206499840.0,
+            "79": 6206499840.0,
             "80": 6206499840.0,
+            "81": 6206499840.0,
+            "82": 6206499840.0,
+            "83": 6206499840.0,
+            "84": 6206499840.0,
             "85": 6206499840.0,
+            "86": 6206499840.0,
+            "87": 6206499840.0,
+            "88": 6206499840.0,
+            "89": 6206499840.0,
             "90": 6206499840.0,
+            "91": 6206499840.0,
+            "92": 6206499840.0,
+            "93": 6206499840.0,
+            "94": 6206499840.0,
             "95": 6206499840.0,
+            "96": 6206499840.0,
+            "97": 6206499840.0,
+            "98": 6206499840.0,
+            "99": 6206499840.0,
             "100": 6206499840.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 5.96824,
-            "5": 0.16199,
-            "10": 0.16035,
-            "15": 0.16138,
-            "20": 0.16464,
-            "25": 0.16244,
-            "30": 0.16034,
-            "35": 0.16315,
-            "40": 0.1629,
-            "45": 0.1679,
-            "50": 0.163,
-            "55": 0.16422,
-            "60": 0.16092,
-            "65": 0.17177,
-            "70": 0.16664,
-            "75": 0.16285,
-            "80": 0.15979,
-            "85": 0.16193,
-            "90": 0.16426,
-            "95": 0.16461,
-            "100": 0.49883
+            "1": 7.18555,
+            "2": 0.22912,
+            "3": 0.19495,
+            "4": 0.19292,
+            "5": 0.1933,
+            "6": 0.20082,
+            "7": 0.1898,
+            "8": 0.19078,
+            "9": 0.19631,
+            "10": 0.18961,
+            "11": 0.19602,
+            "12": 0.19712,
+            "13": 0.19248,
+            "14": 0.19302,
+            "15": 0.19445,
+            "16": 0.19515,
+            "17": 0.19565,
+            "18": 0.18839,
+            "19": 0.19044,
+            "20": 0.1878,
+            "21": 0.19199,
+            "22": 0.19051,
+            "23": 0.19216,
+            "24": 0.19009,
+            "25": 0.18449,
+            "26": 0.19206,
+            "27": 0.19,
+            "28": 0.19154,
+            "29": 0.19019,
+            "30": 0.18961,
+            "31": 0.18739,
+            "32": 0.19441,
+            "33": 0.18956,
+            "34": 0.19188,
+            "35": 0.20225,
+            "36": 0.1956,
+            "37": 0.20085,
+            "38": 0.20338,
+            "39": 0.19512,
+            "40": 0.20945,
+            "41": 0.20775,
+            "42": 0.20695,
+            "43": 0.20502,
+            "44": 0.19536,
+            "45": 0.1972,
+            "46": 0.19693,
+            "47": 0.2056,
+            "48": 0.19367,
+            "49": 0.19288,
+            "50": 0.19187,
+            "51": 0.19233,
+            "52": 0.19557,
+            "53": 0.19068,
+            "54": 0.18458,
+            "55": 0.18565,
+            "56": 0.18636,
+            "57": 0.19313,
+            "58": 0.18633,
+            "59": 0.18858,
+            "60": 0.18486,
+            "61": 0.18799,
+            "62": 0.18531,
+            "63": 0.19385,
+            "64": 0.18893,
+            "65": 0.1968,
+            "66": 0.19472,
+            "67": 0.19267,
+            "68": 0.19586,
+            "69": 0.22272,
+            "70": 0.22071,
+            "71": 0.18794,
+            "72": 0.19924,
+            "73": 0.19888,
+            "74": 0.22693,
+            "75": 0.20741,
+            "76": 0.19831,
+            "77": 0.20398,
+            "78": 0.19269,
+            "79": 0.19066,
+            "80": 0.18543,
+            "81": 0.18666,
+            "82": 0.18559,
+            "83": 0.19153,
+            "84": 0.18527,
+            "85": 0.18623,
+            "86": 0.48843,
+            "87": 0.18991,
+            "88": 0.18251,
+            "89": 0.18473,
+            "90": 0.18511,
+            "91": 0.19021,
+            "92": 0.19055,
+            "93": 0.18545,
+            "94": 0.1853,
+            "95": 0.18396,
+            "96": 0.1848,
+            "97": 0.19407,
+            "98": 0.18533,
+            "99": 0.18593,
+            "100": 0.48771
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxa100_dracooci-ord.json
new file mode 100644
index 00000000000..72278130300
--- /dev/null
+++ b/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxa100_dracooci-ord.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.34492,
+            "2": 10.36433,
+            "3": 9.73145,
+            "4": 9.57923,
+            "5": 9.3892,
+            "6": 9.41078,
+            "7": 9.30545,
+            "8": 9.24872,
+            "9": 9.09363,
+            "10": 9.01571,
+            "11": 8.86227,
+            "12": 8.79088,
+            "13": 8.80884,
+            "14": 8.67658,
+            "15": 8.64615,
+            "16": 8.53973,
+            "17": 8.47875,
+            "18": 8.38919,
+            "19": 8.36145,
+            "20": 8.26963,
+            "21": 8.26321,
+            "22": 8.15047,
+            "23": 8.08861,
+            "24": 8.12416,
+            "25": 7.99467,
+            "26": 8.08474,
+            "27": 7.87741,
+            "28": 7.95852,
+            "29": 7.79567,
+            "30": 7.87463,
+            "31": 7.83211,
+            "32": 7.69448,
+            "33": 7.78447,
+            "34": 7.55753,
+            "35": 7.65847,
+            "36": 7.52861,
+            "37": 7.44889,
+            "38": 7.50364,
+            "39": 7.48064,
+            "40": 7.50295,
+            "41": 7.3974,
+            "42": 7.37184,
+            "43": 7.44291,
+            "44": 7.38083,
+            "45": 7.36112,
+            "46": 7.29391,
+            "47": 7.475,
+            "48": 7.29535,
+            "49": 7.3607,
+            "50": 7.19186,
+            "51": 7.38728,
+            "52": 7.13728,
+            "53": 7.12477,
+            "54": 7.23618,
+            "55": 7.16789,
+            "56": 7.22866,
+            "57": 7.34625,
+            "58": 7.03082,
+            "59": 7.12273,
+            "60": 7.16511,
+            "61": 7.11656,
+            "62": 7.26779,
+            "63": 7.16695,
+            "64": 7.08275,
+            "65": 7.00051,
+            "66": 7.07139,
+            "67": 7.05884,
+            "68": 7.14563,
+            "69": 7.03993,
+            "70": 7.07139,
+            "71": 6.91636,
+            "72": 7.02022,
+            "73": 6.99002,
+            "74": 6.91408,
+            "75": 7.07586,
+            "76": 6.97032,
+            "77": 7.08431,
+            "78": 7.03516,
+            "79": 6.88312,
+            "80": 6.95246,
+            "81": 6.98441,
+            "82": 7.06806,
+            "83": 7.00882,
+            "84": 7.01789,
+            "85": 6.86372,
+            "86": 7.04924,
+            "87": 6.99288,
+            "88": 6.92333,
+            "89": 6.82337,
+            "90": 7.25405,
+            "91": 6.72212,
+            "92": 7.05344,
+            "93": 6.91633,
+            "94": 7.0654,
+            "95": 6.85964,
+            "96": 6.98723,
+            "97": 6.96749,
+            "98": 6.89904,
+            "99": 7.02746,
+            "100": 6.99698
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 43313.0,
+            "2": 44075.0,
+            "3": 44779.0,
+            "4": 42461.0,
+            "5": 45406.0,
+            "6": 40995.0,
+            "7": 43185.0,
+            "8": 45480.0,
+            "9": 42555.0,
+            "10": 45370.0,
+            "11": 44017.0,
+            "12": 44619.0,
+            "13": 43939.0,
+            "14": 46223.0,
+            "15": 43950.0,
+            "16": 41732.0,
+            "17": 43869.0,
+            "18": 44696.0,
+            "19": 42631.0,
+            "20": 44806.0,
+            "21": 44813.0,
+            "22": 41897.0,
+            "23": 45483.0,
+            "24": 43099.0,
+            "25": 42740.0,
+            "26": 43950.0,
+            "27": 46249.0,
+            "28": 46424.0,
+            "29": 46206.0,
+            "30": 44052.0,
+            "31": 41268.0,
+            "32": 43408.0,
+            "33": 45487.0,
+            "34": 43390.0,
+            "35": 43279.0,
+            "36": 42533.0,
+            "37": 40700.0,
+            "38": 42585.0,
+            "39": 44772.0,
+            "40": 43242.0,
+            "41": 44698.0,
+            "42": 43271.0,
+            "43": 45502.0,
+            "44": 44648.0,
+            "45": 43344.0,
+            "46": 43923.0,
+            "47": 42519.0,
+            "48": 44691.0,
+            "49": 43190.0,
+            "50": 43411.0,
+            "51": 41175.0,
+            "52": 43901.0,
+            "53": 43967.0,
+            "54": 41964.0,
+            "55": 43968.0,
+            "56": 43280.0,
+            "57": 42566.0,
+            "58": 43903.0,
+            "59": 44657.0,
+            "60": 41346.0,
+            "61": 39760.0,
+            "62": 44779.0,
+            "63": 44680.0,
+            "64": 45395.0,
+            "65": 44726.0,
+            "66": 45386.0,
+            "67": 43197.0,
+            "68": 42570.0,
+            "69": 43834.0,
+            "70": 45545.0,
+            "71": 43402.0,
+            "72": 44828.0,
+            "73": 45410.0,
+            "74": 42508.0,
+            "75": 44680.0,
+            "76": 43936.0,
+            "77": 42111.0,
+            "78": 40541.0,
+            "79": 38950.0,
+            "80": 41138.0,
+            "81": 45397.0,
+            "82": 43256.0,
+            "83": 38500.0,
+            "84": 42533.0,
+            "85": 44039.0,
+            "86": 45756.0,
+            "87": 41125.0,
+            "88": 41799.0,
+            "89": 41088.0,
+            "90": 44735.0,
+            "91": 46292.0,
+            "92": 41852.0,
+            "93": 43234.0,
+            "94": 39581.0,
+            "95": 44094.0,
+            "96": 44736.0,
+            "97": 45487.0,
+            "98": 41852.0,
+            "99": 45522.0,
+            "100": 42475.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 4138985984.0,
+            "2": 4138985984.0,
+            "3": 4138985984.0,
+            "4": 4138985984.0,
+            "5": 4138985984.0,
+            "6": 4138985984.0,
+            "7": 4138985984.0,
+            "8": 4138985984.0,
+            "9": 4138985984.0,
+            "10": 4138985984.0,
+            "11": 4138985984.0,
+            "12": 4138985984.0,
+            "13": 4138985984.0,
+            "14": 4138985984.0,
+            "15": 4138985984.0,
+            "16": 4138985984.0,
+            "17": 4138985984.0,
+            "18": 4138985984.0,
+            "19": 4138985984.0,
+            "20": 4138985984.0,
+            "21": 4138985984.0,
+            "22": 4138985984.0,
+            "23": 4138985984.0,
+            "24": 4138985984.0,
+            "25": 4138985984.0,
+            "26": 4138985984.0,
+            "27": 4138985984.0,
+            "28": 4138985984.0,
+            "29": 4138985984.0,
+            "30": 4138985984.0,
+            "31": 4138985984.0,
+            "32": 4138985984.0,
+            "33": 4138985984.0,
+            "34": 4138985984.0,
+            "35": 4138985984.0,
+            "36": 4138985984.0,
+            "37": 4138985984.0,
+            "38": 4138985984.0,
+            "39": 4138985984.0,
+            "40": 4138985984.0,
+            "41": 4138985984.0,
+            "42": 4138985984.0,
+            "43": 4138985984.0,
+            "44": 4138985984.0,
+            "45": 4138985984.0,
+            "46": 4138985984.0,
+            "47": 4138985984.0,
+            "48": 4138985984.0,
+            "49": 4138985984.0,
+            "50": 4138985984.0,
+            "51": 4138985984.0,
+            "52": 4138985984.0,
+            "53": 4138985984.0,
+            "54": 4138985984.0,
+            "55": 4138985984.0,
+            "56": 4138985984.0,
+            "57": 4138985984.0,
+            "58": 4138985984.0,
+            "59": 4138985984.0,
+            "60": 4138985984.0,
+            "61": 4138985984.0,
+            "62": 4138985984.0,
+            "63": 4138985984.0,
+            "64": 4138985984.0,
+            "65": 4138985984.0,
+            "66": 4138985984.0,
+            "67": 4138985984.0,
+            "68": 4138985984.0,
+            "69": 4138985984.0,
+            "70": 4138985984.0,
+            "71": 4138985984.0,
+            "72": 4138985984.0,
+            "73": 4138985984.0,
+            "74": 4138985984.0,
+            "75": 4138985984.0,
+            "76": 4138985984.0,
+            "77": 4138985984.0,
+            "78": 4138985984.0,
+            "79": 4138985984.0,
+            "80": 4138985984.0,
+            "81": 4138985984.0,
+            "82": 4138985984.0,
+            "83": 4138985984.0,
+            "84": 4138985984.0,
+            "85": 4138985984.0,
+            "86": 4138985984.0,
+            "87": 4138985984.0,
+            "88": 4138985984.0,
+            "89": 4138985984.0,
+            "90": 4138985984.0,
+            "91": 4138985984.0,
+            "92": 4138985984.0,
+            "93": 4138985984.0,
+            "94": 4138985984.0,
+            "95": 4138985984.0,
+            "96": 4138985984.0,
+            "97": 4138985984.0,
+            "98": 4138985984.0,
+            "99": 4138985984.0,
+            "100": 4138985984.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 4345973248.0,
+            "2": 6174256128.0,
+            "3": 6177401856.0,
+            "4": 6177401856.0,
+            "5": 6177401856.0,
+            "6": 6177401856.0,
+            "7": 6177401856.0,
+            "8": 6177401856.0,
+            "9": 6177401856.0,
+            "10": 6177401856.0,
+            "11": 6177401856.0,
+            "12": 6177401856.0,
+            "13": 6177401856.0,
+            "14": 6177401856.0,
+            "15": 6177401856.0,
+            "16": 6177401856.0,
+            "17": 6177401856.0,
+            "18": 6177401856.0,
+            "19": 6177401856.0,
+            "20": 6177401856.0,
+            "21": 6177401856.0,
+            "22": 6177401856.0,
+            "23": 6177401856.0,
+            "24": 6177401856.0,
+            "25": 6177401856.0,
+            "26": 6177401856.0,
+            "27": 6177401856.0,
+            "28": 6177401856.0,
+            "29": 6177401856.0,
+            "30": 6177401856.0,
+            "31": 6177401856.0,
+            "32": 6177401856.0,
+            "33": 6177401856.0,
+            "34": 6177401856.0,
+            "35": 6177401856.0,
+            "36": 6177401856.0,
+            "37": 6177401856.0,
+            "38": 6177401856.0,
+            "39": 6177401856.0,
+            "40": 6177401856.0,
+            "41": 6177401856.0,
+            "42": 6177401856.0,
+            "43": 6177401856.0,
+            "44": 6177401856.0,
+            "45": 6177401856.0,
+            "46": 6177401856.0,
+            "47": 6177401856.0,
+            "48": 6177401856.0,
+            "49": 6177401856.0,
+            "50": 6177401856.0,
+            "51": 6177401856.0,
+            "52": 6177401856.0,
+            "53": 6177401856.0,
+            "54": 6177401856.0,
+            "55": 6177401856.0,
+            "56": 6177401856.0,
+            "57": 6177401856.0,
+            "58": 6177401856.0,
+            "59": 6177401856.0,
+            "60": 6177401856.0,
+            "61": 6177401856.0,
+            "62": 6177401856.0,
+            "63": 6177401856.0,
+            "64": 6177401856.0,
+            "65": 6177401856.0,
+            "66": 6177401856.0,
+            "67": 6177401856.0,
+            "68": 6177401856.0,
+            "69": 6177401856.0,
+            "70": 6177401856.0,
+            "71": 6177401856.0,
+            "72": 6177401856.0,
+            "73": 6177401856.0,
+            "74": 6177401856.0,
+            "75": 6177401856.0,
+            "76": 6177401856.0,
+            "77": 6177401856.0,
+            "78": 6177401856.0,
+            "79": 6177401856.0,
+            "80": 6177401856.0,
+            "81": 6177401856.0,
+            "82": 6177401856.0,
+            "83": 6177401856.0,
+            "84": 6177401856.0,
+            "85": 6177401856.0,
+            "86": 6177401856.0,
+            "87": 6177401856.0,
+            "88": 6177401856.0,
+            "89": 6177401856.0,
+            "90": 6177401856.0,
+            "91": 6177401856.0,
+            "92": 6177401856.0,
+            "93": 6177401856.0,
+            "94": 6177401856.0,
+            "95": 6177401856.0,
+            "96": 6177401856.0,
+            "97": 6177401856.0,
+            "98": 6177401856.0,
+            "99": 6177401856.0,
+            "100": 6177401856.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 7.37564,
+            "2": 0.47907,
+            "3": 0.26318,
+            "4": 0.26361,
+            "5": 0.26788,
+            "6": 0.26504,
+            "7": 0.26585,
+            "8": 0.26222,
+            "9": 0.26257,
+            "10": 0.26426,
+            "11": 0.26743,
+            "12": 0.26324,
+            "13": 0.2631,
+            "14": 0.26214,
+            "15": 0.26226,
+            "16": 0.26202,
+            "17": 0.26215,
+            "18": 0.26191,
+            "19": 0.26192,
+            "20": 0.26328,
+            "21": 0.28093,
+            "22": 0.26248,
+            "23": 0.26259,
+            "24": 0.26257,
+            "25": 0.26193,
+            "26": 0.26229,
+            "27": 0.26207,
+            "28": 0.26284,
+            "29": 0.26248,
+            "30": 0.26171,
+            "31": 0.26369,
+            "32": 0.26295,
+            "33": 0.26244,
+            "34": 0.26239,
+            "35": 0.26289,
+            "36": 0.26221,
+            "37": 0.26173,
+            "38": 0.26276,
+            "39": 0.26177,
+            "40": 0.26145,
+            "41": 0.72968,
+            "42": 0.26423,
+            "43": 0.26386,
+            "44": 0.26138,
+            "45": 0.26438,
+            "46": 0.26265,
+            "47": 0.26382,
+            "48": 0.26338,
+            "49": 0.2647,
+            "50": 0.26389,
+            "51": 0.27004,
+            "52": 0.28055,
+            "53": 0.26495,
+            "54": 0.26509,
+            "55": 0.60834,
+            "56": 0.26487,
+            "57": 0.26475,
+            "58": 0.26728,
+            "59": 0.27353,
+            "60": 0.2644,
+            "61": 0.26294,
+            "62": 0.27032,
+            "63": 0.26838,
+            "64": 0.26385,
+            "65": 0.26288,
+            "66": 0.74822,
+            "67": 0.26372,
+            "68": 0.72466,
+            "69": 0.26508,
+            "70": 0.76862,
+            "71": 0.26359,
+            "72": 0.26496,
+            "73": 0.26691,
+            "74": 0.26615,
+            "75": 0.26787,
+            "76": 0.26937,
+            "77": 0.26491,
+            "78": 0.26651,
+            "79": 0.26743,
+            "80": 0.26533,
+            "81": 0.2655,
+            "82": 0.26612,
+            "83": 0.26497,
+            "84": 0.26502,
+            "85": 0.2647,
+            "86": 0.26554,
+            "87": 0.26569,
+            "88": 0.26554,
+            "89": 0.26468,
+            "90": 0.26229,
+            "91": 0.26142,
+            "92": 0.26206,
+            "93": 0.26215,
+            "94": 0.26471,
+            "95": 0.26142,
+            "96": 0.65482,
+            "97": 0.26367,
+            "98": 0.26226,
+            "99": 0.26183,
+            "100": 0.26175
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxa100_dracooci.json b/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxa100_dracooci.json
new file mode 100644
index 00000000000..96fd81c74b6
--- /dev/null
+++ b/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxa100_dracooci.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.34492,
+            "2": 10.36433,
+            "3": 9.73145,
+            "4": 9.57923,
+            "5": 9.3892,
+            "6": 9.41078,
+            "7": 9.30545,
+            "8": 9.24872,
+            "9": 9.09363,
+            "10": 9.01571,
+            "11": 8.86227,
+            "12": 8.79088,
+            "13": 8.80884,
+            "14": 8.67658,
+            "15": 8.64615,
+            "16": 8.53973,
+            "17": 8.47875,
+            "18": 8.38919,
+            "19": 8.36145,
+            "20": 8.26963,
+            "21": 8.26321,
+            "22": 8.15047,
+            "23": 8.08861,
+            "24": 8.12416,
+            "25": 7.99467,
+            "26": 8.08474,
+            "27": 7.87741,
+            "28": 7.95852,
+            "29": 7.79567,
+            "30": 7.87463,
+            "31": 7.83211,
+            "32": 7.69448,
+            "33": 7.78447,
+            "34": 7.55753,
+            "35": 7.65847,
+            "36": 7.52861,
+            "37": 7.44889,
+            "38": 7.50364,
+            "39": 7.48064,
+            "40": 7.50295,
+            "41": 7.3974,
+            "42": 7.37184,
+            "43": 7.44291,
+            "44": 7.38083,
+            "45": 7.36112,
+            "46": 7.29391,
+            "47": 7.475,
+            "48": 7.29535,
+            "49": 7.3607,
+            "50": 7.19186,
+            "51": 7.38728,
+            "52": 7.13728,
+            "53": 7.12477,
+            "54": 7.23618,
+            "55": 7.16789,
+            "56": 7.22866,
+            "57": 7.34625,
+            "58": 7.03082,
+            "59": 7.12273,
+            "60": 7.16511,
+            "61": 7.11656,
+            "62": 7.26779,
+            "63": 7.16695,
+            "64": 7.08275,
+            "65": 7.00051,
+            "66": 7.07139,
+            "67": 7.05884,
+            "68": 7.14563,
+            "69": 7.03993,
+            "70": 7.07139,
+            "71": 6.91636,
+            "72": 7.02022,
+            "73": 6.99002,
+            "74": 6.91408,
+            "75": 7.07586,
+            "76": 6.97032,
+            "77": 7.08431,
+            "78": 7.03516,
+            "79": 6.88312,
+            "80": 6.95246,
+            "81": 6.98441,
+            "82": 7.06806,
+            "83": 7.00882,
+            "84": 7.01789,
+            "85": 6.86372,
+            "86": 7.04924,
+            "87": 6.99288,
+            "88": 6.92333,
+            "89": 6.82337,
+            "90": 7.25405,
+            "91": 6.72212,
+            "92": 7.05344,
+            "93": 6.91633,
+            "94": 7.0654,
+            "95": 6.85964,
+            "96": 6.98723,
+            "97": 6.96749,
+            "98": 6.89904,
+            "99": 7.02746,
+            "100": 6.99698
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 43313.0,
+            "2": 44075.0,
+            "3": 44779.0,
+            "4": 42461.0,
+            "5": 45406.0,
+            "6": 40995.0,
+            "7": 43185.0,
+            "8": 45480.0,
+            "9": 42555.0,
+            "10": 45370.0,
+            "11": 44017.0,
+            "12": 44619.0,
+            "13": 43939.0,
+            "14": 46223.0,
+            "15": 43950.0,
+            "16": 41732.0,
+            "17": 43869.0,
+            "18": 44696.0,
+            "19": 42631.0,
+            "20": 44806.0,
+            "21": 44813.0,
+            "22": 41897.0,
+            "23": 45483.0,
+            "24": 43099.0,
+            "25": 42740.0,
+            "26": 43950.0,
+            "27": 46249.0,
+            "28": 46424.0,
+            "29": 46206.0,
+            "30": 44052.0,
+            "31": 41268.0,
+            "32": 43408.0,
+            "33": 45487.0,
+            "34": 43390.0,
+            "35": 43279.0,
+            "36": 42533.0,
+            "37": 40700.0,
+            "38": 42585.0,
+            "39": 44772.0,
+            "40": 43242.0,
+            "41": 44698.0,
+            "42": 43271.0,
+            "43": 45502.0,
+            "44": 44648.0,
+            "45": 43344.0,
+            "46": 43923.0,
+            "47": 42519.0,
+            "48": 44691.0,
+            "49": 43190.0,
+            "50": 43411.0,
+            "51": 41175.0,
+            "52": 43901.0,
+            "53": 43967.0,
+            "54": 41964.0,
+            "55": 43968.0,
+            "56": 43280.0,
+            "57": 42566.0,
+            "58": 43903.0,
+            "59": 44657.0,
+            "60": 41346.0,
+            "61": 39760.0,
+            "62": 44779.0,
+            "63": 44680.0,
+            "64": 45395.0,
+            "65": 44726.0,
+            "66": 45386.0,
+            "67": 43197.0,
+            "68": 42570.0,
+            "69": 43834.0,
+            "70": 45545.0,
+            "71": 43402.0,
+            "72": 44828.0,
+            "73": 45410.0,
+            "74": 42508.0,
+            "75": 44680.0,
+            "76": 43936.0,
+            "77": 42111.0,
+            "78": 40541.0,
+            "79": 38950.0,
+            "80": 41138.0,
+            "81": 45397.0,
+            "82": 43256.0,
+            "83": 38500.0,
+            "84": 42533.0,
+            "85": 44039.0,
+            "86": 45756.0,
+            "87": 41125.0,
+            "88": 41799.0,
+            "89": 41088.0,
+            "90": 44735.0,
+            "91": 46292.0,
+            "92": 41852.0,
+            "93": 43234.0,
+            "94": 39581.0,
+            "95": 44094.0,
+            "96": 44736.0,
+            "97": 45487.0,
+            "98": 41852.0,
+            "99": 45522.0,
+            "100": 42475.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 4138985984.0,
+            "2": 4138985984.0,
+            "3": 4138985984.0,
+            "4": 4138985984.0,
+            "5": 4138985984.0,
+            "6": 4138985984.0,
+            "7": 4138985984.0,
+            "8": 4138985984.0,
+            "9": 4138985984.0,
+            "10": 4138985984.0,
+            "11": 4138985984.0,
+            "12": 4138985984.0,
+            "13": 4138985984.0,
+            "14": 4138985984.0,
+            "15": 4138985984.0,
+            "16": 4138985984.0,
+            "17": 4138985984.0,
+            "18": 4138985984.0,
+            "19": 4138985984.0,
+            "20": 4138985984.0,
+            "21": 4138985984.0,
+            "22": 4138985984.0,
+            "23": 4138985984.0,
+            "24": 4138985984.0,
+            "25": 4138985984.0,
+            "26": 4138985984.0,
+            "27": 4138985984.0,
+            "28": 4138985984.0,
+            "29": 4138985984.0,
+            "30": 4138985984.0,
+            "31": 4138985984.0,
+            "32": 4138985984.0,
+            "33": 4138985984.0,
+            "34": 4138985984.0,
+            "35": 4138985984.0,
+            "36": 4138985984.0,
+            "37": 4138985984.0,
+            "38": 4138985984.0,
+            "39": 4138985984.0,
+            "40": 4138985984.0,
+            "41": 4138985984.0,
+            "42": 4138985984.0,
+            "43": 4138985984.0,
+            "44": 4138985984.0,
+            "45": 4138985984.0,
+            "46": 4138985984.0,
+            "47": 4138985984.0,
+            "48": 4138985984.0,
+            "49": 4138985984.0,
+            "50": 4138985984.0,
+            "51": 4138985984.0,
+            "52": 4138985984.0,
+            "53": 4138985984.0,
+            "54": 4138985984.0,
+            "55": 4138985984.0,
+            "56": 4138985984.0,
+            "57": 4138985984.0,
+            "58": 4138985984.0,
+            "59": 4138985984.0,
+            "60": 4138985984.0,
+            "61": 4138985984.0,
+            "62": 4138985984.0,
+            "63": 4138985984.0,
+            "64": 4138985984.0,
+            "65": 4138985984.0,
+            "66": 4138985984.0,
+            "67": 4138985984.0,
+            "68": 4138985984.0,
+            "69": 4138985984.0,
+            "70": 4138985984.0,
+            "71": 4138985984.0,
+            "72": 4138985984.0,
+            "73": 4138985984.0,
+            "74": 4138985984.0,
+            "75": 4138985984.0,
+            "76": 4138985984.0,
+            "77": 4138985984.0,
+            "78": 4138985984.0,
+            "79": 4138985984.0,
+            "80": 4138985984.0,
+            "81": 4138985984.0,
+            "82": 4138985984.0,
+            "83": 4138985984.0,
+            "84": 4138985984.0,
+            "85": 4138985984.0,
+            "86": 4138985984.0,
+            "87": 4138985984.0,
+            "88": 4138985984.0,
+            "89": 4138985984.0,
+            "90": 4138985984.0,
+            "91": 4138985984.0,
+            "92": 4138985984.0,
+            "93": 4138985984.0,
+            "94": 4138985984.0,
+            "95": 4138985984.0,
+            "96": 4138985984.0,
+            "97": 4138985984.0,
+            "98": 4138985984.0,
+            "99": 4138985984.0,
+            "100": 4138985984.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 4345973248.0,
+            "2": 6174256128.0,
+            "3": 6177401856.0,
+            "4": 6177401856.0,
+            "5": 6177401856.0,
+            "6": 6177401856.0,
+            "7": 6177401856.0,
+            "8": 6177401856.0,
+            "9": 6177401856.0,
+            "10": 6177401856.0,
+            "11": 6177401856.0,
+            "12": 6177401856.0,
+            "13": 6177401856.0,
+            "14": 6177401856.0,
+            "15": 6177401856.0,
+            "16": 6177401856.0,
+            "17": 6177401856.0,
+            "18": 6177401856.0,
+            "19": 6177401856.0,
+            "20": 6177401856.0,
+            "21": 6177401856.0,
+            "22": 6177401856.0,
+            "23": 6177401856.0,
+            "24": 6177401856.0,
+            "25": 6177401856.0,
+            "26": 6177401856.0,
+            "27": 6177401856.0,
+            "28": 6177401856.0,
+            "29": 6177401856.0,
+            "30": 6177401856.0,
+            "31": 6177401856.0,
+            "32": 6177401856.0,
+            "33": 6177401856.0,
+            "34": 6177401856.0,
+            "35": 6177401856.0,
+            "36": 6177401856.0,
+            "37": 6177401856.0,
+            "38": 6177401856.0,
+            "39": 6177401856.0,
+            "40": 6177401856.0,
+            "41": 6177401856.0,
+            "42": 6177401856.0,
+            "43": 6177401856.0,
+            "44": 6177401856.0,
+            "45": 6177401856.0,
+            "46": 6177401856.0,
+            "47": 6177401856.0,
+            "48": 6177401856.0,
+            "49": 6177401856.0,
+            "50": 6177401856.0,
+            "51": 6177401856.0,
+            "52": 6177401856.0,
+            "53": 6177401856.0,
+            "54": 6177401856.0,
+            "55": 6177401856.0,
+            "56": 6177401856.0,
+            "57": 6177401856.0,
+            "58": 6177401856.0,
+            "59": 6177401856.0,
+            "60": 6177401856.0,
+            "61": 6177401856.0,
+            "62": 6177401856.0,
+            "63": 6177401856.0,
+            "64": 6177401856.0,
+            "65": 6177401856.0,
+            "66": 6177401856.0,
+            "67": 6177401856.0,
+            "68": 6177401856.0,
+            "69": 6177401856.0,
+            "70": 6177401856.0,
+            "71": 6177401856.0,
+            "72": 6177401856.0,
+            "73": 6177401856.0,
+            "74": 6177401856.0,
+            "75": 6177401856.0,
+            "76": 6177401856.0,
+            "77": 6177401856.0,
+            "78": 6177401856.0,
+            "79": 6177401856.0,
+            "80": 6177401856.0,
+            "81": 6177401856.0,
+            "82": 6177401856.0,
+            "83": 6177401856.0,
+            "84": 6177401856.0,
+            "85": 6177401856.0,
+            "86": 6177401856.0,
+            "87": 6177401856.0,
+            "88": 6177401856.0,
+            "89": 6177401856.0,
+            "90": 6177401856.0,
+            "91": 6177401856.0,
+            "92": 6177401856.0,
+            "93": 6177401856.0,
+            "94": 6177401856.0,
+            "95": 6177401856.0,
+            "96": 6177401856.0,
+            "97": 6177401856.0,
+            "98": 6177401856.0,
+            "99": 6177401856.0,
+            "100": 6177401856.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 7.07146,
+            "2": 0.76333,
+            "3": 0.25771,
+            "4": 0.25798,
+            "5": 0.26042,
+            "6": 0.26046,
+            "7": 0.25457,
+            "8": 0.25511,
+            "9": 0.2545,
+            "10": 0.25426,
+            "11": 0.25469,
+            "12": 0.25997,
+            "13": 0.25528,
+            "14": 0.25614,
+            "15": 0.25513,
+            "16": 0.25483,
+            "17": 0.25502,
+            "18": 0.2548,
+            "19": 0.25406,
+            "20": 0.25473,
+            "21": 0.25442,
+            "22": 0.25742,
+            "23": 0.25489,
+            "24": 0.25468,
+            "25": 0.25473,
+            "26": 0.25514,
+            "27": 0.25485,
+            "28": 0.25816,
+            "29": 0.7004,
+            "30": 0.25418,
+            "31": 0.25433,
+            "32": 0.25688,
+            "33": 0.25464,
+            "34": 0.25871,
+            "35": 0.2549,
+            "36": 0.25562,
+            "37": 0.25614,
+            "38": 0.26065,
+            "39": 0.25541,
+            "40": 0.25812,
+            "41": 0.25448,
+            "42": 0.25927,
+            "43": 0.25478,
+            "44": 0.25871,
+            "45": 0.25543,
+            "46": 0.25643,
+            "47": 0.25677,
+            "48": 0.25828,
+            "49": 0.2635,
+            "50": 0.26946,
+            "51": 0.29227,
+            "52": 0.28254,
+            "53": 0.28602,
+            "54": 0.25359,
+            "55": 0.2527,
+            "56": 0.25629,
+            "57": 0.26137,
+            "58": 0.25726,
+            "59": 0.25218,
+            "60": 0.25733,
+            "61": 0.25525,
+            "62": 0.25763,
+            "63": 0.25252,
+            "64": 0.26416,
+            "65": 0.25869,
+            "66": 0.25931,
+            "67": 0.26105,
+            "68": 0.26311,
+            "69": 0.25743,
+            "70": 0.25561,
+            "71": 0.2518,
+            "72": 0.25716,
+            "73": 0.26251,
+            "74": 0.27278,
+            "75": 0.25271,
+            "76": 0.25285,
+            "77": 0.25408,
+            "78": 0.70817,
+            "79": 0.25523,
+            "80": 0.26051,
+            "81": 0.26069,
+            "82": 0.25995,
+            "83": 0.25528,
+            "84": 0.25685,
+            "85": 0.25548,
+            "86": 0.74098,
+            "87": 0.25554,
+            "88": 0.27779,
+            "89": 0.28379,
+            "90": 0.28037,
+            "91": 0.28316,
+            "92": 0.2777,
+            "93": 0.25778,
+            "94": 0.25143,
+            "95": 0.25144,
+            "96": 0.25195,
+            "97": 0.25167,
+            "98": 0.25838,
+            "99": 0.25302,
+            "100": 0.25157
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..c1e5927389e
--- /dev/null
+++ b/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.3313,
+            "2": 10.35273,
+            "3": 9.79594,
+            "4": 9.60954,
+            "5": 9.42267,
+            "6": 9.45134,
+            "7": 9.34339,
+            "8": 9.27517,
+            "9": 9.09683,
+            "10": 9.07209,
+            "11": 8.8835,
+            "12": 8.83706,
+            "13": 8.86832,
+            "14": 8.71037,
+            "15": 8.68183,
+            "16": 8.56139,
+            "17": 8.52303,
+            "18": 8.43962,
+            "19": 8.40445,
+            "20": 8.29516,
+            "21": 8.27051,
+            "22": 8.17907,
+            "23": 8.12669,
+            "24": 8.14854,
+            "25": 7.99081,
+            "26": 8.12208,
+            "27": 7.90451,
+            "28": 7.98651,
+            "29": 7.80842,
+            "30": 7.86913,
+            "31": 7.83557,
+            "32": 7.7216,
+            "33": 7.80364,
+            "34": 7.59209,
+            "35": 7.68371,
+            "36": 7.53869,
+            "37": 7.47624,
+            "38": 7.51683,
+            "39": 7.49967,
+            "40": 7.51717,
+            "41": 7.43167,
+            "42": 7.40089,
+            "43": 7.4492,
+            "44": 7.3892,
+            "45": 7.3802,
+            "46": 7.29486,
+            "47": 7.44839,
+            "48": 7.282,
+            "49": 7.34647,
+            "50": 7.17125,
+            "51": 7.37351,
+            "52": 7.13362,
+            "53": 7.11248,
+            "54": 7.23395,
+            "55": 7.14784,
+            "56": 7.2278,
+            "57": 7.33273,
+            "58": 6.99464,
+            "59": 7.11597,
+            "60": 7.13216,
+            "61": 7.10561,
+            "62": 7.26519,
+            "63": 7.14764,
+            "64": 7.08702,
+            "65": 6.98658,
+            "66": 7.04733,
+            "67": 7.04745,
+            "68": 7.14076,
+            "69": 7.24347,
+            "70": 7.05974,
+            "71": 6.89358,
+            "72": 6.99793,
+            "73": 6.97928,
+            "74": 6.91973,
+            "75": 7.05295,
+            "76": 6.96054,
+            "77": 7.07939,
+            "78": 7.0137,
+            "79": 6.88344,
+            "80": 6.93032,
+            "81": 6.96568,
+            "82": 7.05273,
+            "83": 6.98785,
+            "84": 7.00434,
+            "85": 6.84596,
+            "86": 7.03651,
+            "87": 6.96347,
+            "88": 6.91343,
+            "89": 6.80657,
+            "90": 7.23629,
+            "91": 6.70068,
+            "92": 7.05694,
+            "93": 6.89292,
+            "94": 7.05848,
+            "95": 6.84802,
+            "96": 6.9679,
+            "97": 6.9429,
+            "98": 6.87432,
+            "99": 7.01828,
+            "100": 6.98491
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 43340.0,
+            "2": 44096.0,
+            "3": 44784.0,
+            "4": 42468.0,
+            "5": 45416.0,
+            "6": 40967.0,
+            "7": 43183.0,
+            "8": 45463.0,
+            "9": 42562.0,
+            "10": 45358.0,
+            "11": 44024.0,
+            "12": 44607.0,
+            "13": 43921.0,
+            "14": 46213.0,
+            "15": 43945.0,
+            "16": 41749.0,
+            "17": 43868.0,
+            "18": 44723.0,
+            "19": 42609.0,
+            "20": 44784.0,
+            "21": 44794.0,
+            "22": 41882.0,
+            "23": 45474.0,
+            "24": 43082.0,
+            "25": 42696.0,
+            "26": 43952.0,
+            "27": 46262.0,
+            "28": 46418.0,
+            "29": 46154.0,
+            "30": 44052.0,
+            "31": 41259.0,
+            "32": 43443.0,
+            "33": 45485.0,
+            "34": 43346.0,
+            "35": 43279.0,
+            "36": 42498.0,
+            "37": 40653.0,
+            "38": 42538.0,
+            "39": 44772.0,
+            "40": 43278.0,
+            "41": 44664.0,
+            "42": 43297.0,
+            "43": 45448.0,
+            "44": 44622.0,
+            "45": 43354.0,
+            "46": 43931.0,
+            "47": 42505.0,
+            "48": 44726.0,
+            "49": 43168.0,
+            "50": 43402.0,
+            "51": 41200.0,
+            "52": 43884.0,
+            "53": 43946.0,
+            "54": 41916.0,
+            "55": 43925.0,
+            "56": 43252.0,
+            "57": 42636.0,
+            "58": 43941.0,
+            "59": 44619.0,
+            "60": 41400.0,
+            "61": 39750.0,
+            "62": 44764.0,
+            "63": 44671.0,
+            "64": 45375.0,
+            "65": 44753.0,
+            "66": 45404.0,
+            "67": 43154.0,
+            "68": 42551.0,
+            "69": 43844.0,
+            "70": 45537.0,
+            "71": 43335.0,
+            "72": 44839.0,
+            "73": 45372.0,
+            "74": 42511.0,
+            "75": 44712.0,
+            "76": 43930.0,
+            "77": 42073.0,
+            "78": 40535.0,
+            "79": 38992.0,
+            "80": 41092.0,
+            "81": 45382.0,
+            "82": 43275.0,
+            "83": 38475.0,
+            "84": 42418.0,
+            "85": 43979.0,
+            "86": 45691.0,
+            "87": 41145.0,
+            "88": 41782.0,
+            "89": 41042.0,
+            "90": 44713.0,
+            "91": 46270.0,
+            "92": 41845.0,
+            "93": 43272.0,
+            "94": 39536.0,
+            "95": 44085.0,
+            "96": 44689.0,
+            "97": 45411.0,
+            "98": 41858.0,
+            "99": 45575.0,
+            "100": 42501.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 4168870400.0,
+            "2": 4168870400.0,
+            "3": 4168870400.0,
+            "4": 4168870400.0,
+            "5": 4168870400.0,
+            "6": 4168870400.0,
+            "7": 4168870400.0,
+            "8": 4168870400.0,
+            "9": 4168870400.0,
+            "10": 4168870400.0,
+            "11": 4168870400.0,
+            "12": 4168870400.0,
+            "13": 4168870400.0,
+            "14": 4168870400.0,
+            "15": 4168870400.0,
+            "16": 4168870400.0,
+            "17": 4168870400.0,
+            "18": 4168870400.0,
+            "19": 4168870400.0,
+            "20": 4168870400.0,
+            "21": 4168870400.0,
+            "22": 4168870400.0,
+            "23": 4168870400.0,
+            "24": 4168870400.0,
+            "25": 4168870400.0,
+            "26": 4168870400.0,
+            "27": 4168870400.0,
+            "28": 4168870400.0,
+            "29": 4168870400.0,
+            "30": 4168870400.0,
+            "31": 4168870400.0,
+            "32": 4168870400.0,
+            "33": 4168870400.0,
+            "34": 4168870400.0,
+            "35": 4168870400.0,
+            "36": 4168870400.0,
+            "37": 4168870400.0,
+            "38": 4168870400.0,
+            "39": 4168870400.0,
+            "40": 4168870400.0,
+            "41": 4168870400.0,
+            "42": 4168870400.0,
+            "43": 4168870400.0,
+            "44": 4168870400.0,
+            "45": 4168870400.0,
+            "46": 4168870400.0,
+            "47": 4168870400.0,
+            "48": 4168870400.0,
+            "49": 4168870400.0,
+            "50": 4168870400.0,
+            "51": 4168870400.0,
+            "52": 4168870400.0,
+            "53": 4168870400.0,
+            "54": 4168870400.0,
+            "55": 4168870400.0,
+            "56": 4168870400.0,
+            "57": 4168870400.0,
+            "58": 4168870400.0,
+            "59": 4168870400.0,
+            "60": 4168870400.0,
+            "61": 4168870400.0,
+            "62": 4168870400.0,
+            "63": 4168870400.0,
+            "64": 4168870400.0,
+            "65": 4168870400.0,
+            "66": 4168870400.0,
+            "67": 4168870400.0,
+            "68": 4168870400.0,
+            "69": 4168870400.0,
+            "70": 4168870400.0,
+            "71": 4168870400.0,
+            "72": 4168870400.0,
+            "73": 4168870400.0,
+            "74": 4168870400.0,
+            "75": 4168870400.0,
+            "76": 4168870400.0,
+            "77": 4168870400.0,
+            "78": 4168870400.0,
+            "79": 4168870400.0,
+            "80": 4168870400.0,
+            "81": 4168870400.0,
+            "82": 4168870400.0,
+            "83": 4168870400.0,
+            "84": 4168870400.0,
+            "85": 4168870400.0,
+            "86": 4168870400.0,
+            "87": 4168870400.0,
+            "88": 4168870400.0,
+            "89": 4168870400.0,
+            "90": 4168870400.0,
+            "91": 4168870400.0,
+            "92": 4168870400.0,
+            "93": 4168870400.0,
+            "94": 4168870400.0,
+            "95": 4168870400.0,
+            "96": 4168870400.0,
+            "97": 4168870400.0,
+            "98": 4168870400.0,
+            "99": 4168870400.0,
+            "100": 4168870400.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 4375071232.0,
+            "2": 6204402688.0,
+            "3": 6206499840.0,
+            "4": 6206499840.0,
+            "5": 6206499840.0,
+            "6": 6206499840.0,
+            "7": 6206499840.0,
+            "8": 6206499840.0,
+            "9": 6206499840.0,
+            "10": 6206499840.0,
+            "11": 6206499840.0,
+            "12": 6206499840.0,
+            "13": 6206499840.0,
+            "14": 6206499840.0,
+            "15": 6206499840.0,
+            "16": 6206499840.0,
+            "17": 6206499840.0,
+            "18": 6206499840.0,
+            "19": 6206499840.0,
+            "20": 6206499840.0,
+            "21": 6206499840.0,
+            "22": 6206499840.0,
+            "23": 6206499840.0,
+            "24": 6206499840.0,
+            "25": 6206499840.0,
+            "26": 6206499840.0,
+            "27": 6206499840.0,
+            "28": 6206499840.0,
+            "29": 6206499840.0,
+            "30": 6206499840.0,
+            "31": 6206499840.0,
+            "32": 6206499840.0,
+            "33": 6206499840.0,
+            "34": 6206499840.0,
+            "35": 6206499840.0,
+            "36": 6206499840.0,
+            "37": 6206499840.0,
+            "38": 6206499840.0,
+            "39": 6206499840.0,
+            "40": 6206499840.0,
+            "41": 6206499840.0,
+            "42": 6206499840.0,
+            "43": 6206499840.0,
+            "44": 6206499840.0,
+            "45": 6206499840.0,
+            "46": 6206499840.0,
+            "47": 6206499840.0,
+            "48": 6206499840.0,
+            "49": 6206499840.0,
+            "50": 6206499840.0,
+            "51": 6206499840.0,
+            "52": 6206499840.0,
+            "53": 6206499840.0,
+            "54": 6206499840.0,
+            "55": 6206499840.0,
+            "56": 6206499840.0,
+            "57": 6206499840.0,
+            "58": 6206499840.0,
+            "59": 6206499840.0,
+            "60": 6206499840.0,
+            "61": 6206499840.0,
+            "62": 6206499840.0,
+            "63": 6206499840.0,
+            "64": 6206499840.0,
+            "65": 6206499840.0,
+            "66": 6206499840.0,
+            "67": 6206499840.0,
+            "68": 6206499840.0,
+            "69": 6206499840.0,
+            "70": 6206499840.0,
+            "71": 6206499840.0,
+            "72": 6206499840.0,
+            "73": 6206499840.0,
+            "74": 6206499840.0,
+            "75": 6206499840.0,
+            "76": 6206499840.0,
+            "77": 6206499840.0,
+            "78": 6206499840.0,
+            "79": 6206499840.0,
+            "80": 6206499840.0,
+            "81": 6206499840.0,
+            "82": 6206499840.0,
+            "83": 6206499840.0,
+            "84": 6206499840.0,
+            "85": 6206499840.0,
+            "86": 6206499840.0,
+            "87": 6206499840.0,
+            "88": 6206499840.0,
+            "89": 6206499840.0,
+            "90": 6206499840.0,
+            "91": 6206499840.0,
+            "92": 6206499840.0,
+            "93": 6206499840.0,
+            "94": 6206499840.0,
+            "95": 6206499840.0,
+            "96": 6206499840.0,
+            "97": 6206499840.0,
+            "98": 6206499840.0,
+            "99": 6206499840.0,
+            "100": 6206499840.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 7.56951,
+            "2": 0.36564,
+            "3": 0.16506,
+            "4": 0.16216,
+            "5": 0.16401,
+            "6": 0.1643,
+            "7": 0.16404,
+            "8": 0.16401,
+            "9": 0.16504,
+            "10": 0.1617,
+            "11": 0.16576,
+            "12": 0.16229,
+            "13": 0.16499,
+            "14": 0.16561,
+            "15": 0.16438,
+            "16": 0.16356,
+            "17": 0.16261,
+            "18": 0.16022,
+            "19": 0.16185,
+            "20": 0.1635,
+            "21": 0.16599,
+            "22": 0.16234,
+            "23": 0.16167,
+            "24": 0.16807,
+            "25": 0.16164,
+            "26": 0.16553,
+            "27": 0.16403,
+            "28": 0.16811,
+            "29": 0.16239,
+            "30": 0.16649,
+            "31": 0.16267,
+            "32": 0.16749,
+            "33": 0.1637,
+            "34": 0.16943,
+            "35": 0.16268,
+            "36": 0.17031,
+            "37": 0.16717,
+            "38": 0.17077,
+            "39": 0.16691,
+            "40": 0.17033,
+            "41": 0.16714,
+            "42": 0.1713,
+            "43": 0.16706,
+            "44": 0.16889,
+            "45": 0.1679,
+            "46": 0.16944,
+            "47": 0.16158,
+            "48": 0.16604,
+            "49": 0.16504,
+            "50": 0.17162,
+            "51": 0.16897,
+            "52": 0.17155,
+            "53": 0.16436,
+            "54": 0.17087,
+            "55": 0.16555,
+            "56": 0.16962,
+            "57": 0.16191,
+            "58": 0.17048,
+            "59": 0.1671,
+            "60": 0.16952,
+            "61": 0.16638,
+            "62": 0.1732,
+            "63": 0.19062,
+            "64": 0.17721,
+            "65": 0.16282,
+            "66": 0.16924,
+            "67": 0.16252,
+            "68": 0.16523,
+            "69": 0.16729,
+            "70": 0.53751,
+            "71": 0.16521,
+            "72": 0.17116,
+            "73": 0.16408,
+            "74": 0.16918,
+            "75": 0.16612,
+            "76": 0.21043,
+            "77": 0.17541,
+            "78": 0.20915,
+            "79": 0.19264,
+            "80": 0.16783,
+            "81": 0.16133,
+            "82": 0.16441,
+            "83": 0.16468,
+            "84": 0.16274,
+            "85": 0.16617,
+            "86": 0.16466,
+            "87": 0.16539,
+            "88": 0.16381,
+            "89": 0.1685,
+            "90": 0.1636,
+            "91": 0.17069,
+            "92": 0.16636,
+            "93": 0.16881,
+            "94": 0.16448,
+            "95": 0.16838,
+            "96": 0.16612,
+            "97": 0.1674,
+            "98": 0.16485,
+            "99": 0.17249,
+            "100": 0.16394
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..8809a47cd54
--- /dev/null
+++ b/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.3313,
+            "2": 10.35273,
+            "3": 9.79594,
+            "4": 9.60954,
+            "5": 9.42267,
+            "6": 9.45134,
+            "7": 9.34339,
+            "8": 9.27517,
+            "9": 9.09683,
+            "10": 9.07209,
+            "11": 8.8835,
+            "12": 8.83706,
+            "13": 8.86832,
+            "14": 8.71037,
+            "15": 8.68183,
+            "16": 8.56139,
+            "17": 8.52303,
+            "18": 8.43962,
+            "19": 8.40445,
+            "20": 8.29516,
+            "21": 8.27051,
+            "22": 8.17907,
+            "23": 8.12669,
+            "24": 8.14854,
+            "25": 7.99081,
+            "26": 8.12208,
+            "27": 7.90451,
+            "28": 7.98651,
+            "29": 7.80842,
+            "30": 7.86913,
+            "31": 7.83557,
+            "32": 7.7216,
+            "33": 7.80364,
+            "34": 7.59209,
+            "35": 7.68371,
+            "36": 7.53869,
+            "37": 7.47624,
+            "38": 7.51683,
+            "39": 7.49967,
+            "40": 7.51717,
+            "41": 7.43167,
+            "42": 7.40089,
+            "43": 7.4492,
+            "44": 7.3892,
+            "45": 7.3802,
+            "46": 7.29486,
+            "47": 7.44839,
+            "48": 7.282,
+            "49": 7.34647,
+            "50": 7.17125,
+            "51": 7.37351,
+            "52": 7.13362,
+            "53": 7.11248,
+            "54": 7.23395,
+            "55": 7.14784,
+            "56": 7.2278,
+            "57": 7.33273,
+            "58": 6.99464,
+            "59": 7.11597,
+            "60": 7.13216,
+            "61": 7.10561,
+            "62": 7.26519,
+            "63": 7.14764,
+            "64": 7.08702,
+            "65": 6.98658,
+            "66": 7.04733,
+            "67": 7.04745,
+            "68": 7.14076,
+            "69": 7.24347,
+            "70": 7.05974,
+            "71": 6.89358,
+            "72": 6.99793,
+            "73": 6.97928,
+            "74": 6.91973,
+            "75": 7.05295,
+            "76": 6.96054,
+            "77": 7.07939,
+            "78": 7.0137,
+            "79": 6.88344,
+            "80": 6.93032,
+            "81": 6.96568,
+            "82": 7.05273,
+            "83": 6.98785,
+            "84": 7.00434,
+            "85": 6.84596,
+            "86": 7.03651,
+            "87": 6.96347,
+            "88": 6.91343,
+            "89": 6.80657,
+            "90": 7.23629,
+            "91": 6.70068,
+            "92": 7.05694,
+            "93": 6.89292,
+            "94": 7.05848,
+            "95": 6.84802,
+            "96": 6.9679,
+            "97": 6.9429,
+            "98": 6.87432,
+            "99": 7.01828,
+            "100": 6.98491
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 43340.0,
+            "2": 44096.0,
+            "3": 44784.0,
+            "4": 42468.0,
+            "5": 45416.0,
+            "6": 40967.0,
+            "7": 43183.0,
+            "8": 45463.0,
+            "9": 42562.0,
+            "10": 45358.0,
+            "11": 44024.0,
+            "12": 44607.0,
+            "13": 43921.0,
+            "14": 46213.0,
+            "15": 43945.0,
+            "16": 41749.0,
+            "17": 43868.0,
+            "18": 44723.0,
+            "19": 42609.0,
+            "20": 44784.0,
+            "21": 44794.0,
+            "22": 41882.0,
+            "23": 45474.0,
+            "24": 43082.0,
+            "25": 42696.0,
+            "26": 43952.0,
+            "27": 46262.0,
+            "28": 46418.0,
+            "29": 46154.0,
+            "30": 44052.0,
+            "31": 41259.0,
+            "32": 43443.0,
+            "33": 45485.0,
+            "34": 43346.0,
+            "35": 43279.0,
+            "36": 42498.0,
+            "37": 40653.0,
+            "38": 42538.0,
+            "39": 44772.0,
+            "40": 43278.0,
+            "41": 44664.0,
+            "42": 43297.0,
+            "43": 45448.0,
+            "44": 44622.0,
+            "45": 43354.0,
+            "46": 43931.0,
+            "47": 42505.0,
+            "48": 44726.0,
+            "49": 43168.0,
+            "50": 43402.0,
+            "51": 41200.0,
+            "52": 43884.0,
+            "53": 43946.0,
+            "54": 41916.0,
+            "55": 43925.0,
+            "56": 43252.0,
+            "57": 42636.0,
+            "58": 43941.0,
+            "59": 44619.0,
+            "60": 41400.0,
+            "61": 39750.0,
+            "62": 44764.0,
+            "63": 44671.0,
+            "64": 45375.0,
+            "65": 44753.0,
+            "66": 45404.0,
+            "67": 43154.0,
+            "68": 42551.0,
+            "69": 43844.0,
+            "70": 45537.0,
+            "71": 43335.0,
+            "72": 44839.0,
+            "73": 45372.0,
+            "74": 42511.0,
+            "75": 44712.0,
+            "76": 43930.0,
+            "77": 42073.0,
+            "78": 40535.0,
+            "79": 38992.0,
+            "80": 41092.0,
+            "81": 45382.0,
+            "82": 43275.0,
+            "83": 38475.0,
+            "84": 42418.0,
+            "85": 43979.0,
+            "86": 45691.0,
+            "87": 41145.0,
+            "88": 41782.0,
+            "89": 41042.0,
+            "90": 44713.0,
+            "91": 46270.0,
+            "92": 41845.0,
+            "93": 43272.0,
+            "94": 39536.0,
+            "95": 44085.0,
+            "96": 44689.0,
+            "97": 45411.0,
+            "98": 41858.0,
+            "99": 45575.0,
+            "100": 42501.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 4168870400.0,
+            "2": 4168870400.0,
+            "3": 4168870400.0,
+            "4": 4168870400.0,
+            "5": 4168870400.0,
+            "6": 4168870400.0,
+            "7": 4168870400.0,
+            "8": 4168870400.0,
+            "9": 4168870400.0,
+            "10": 4168870400.0,
+            "11": 4168870400.0,
+            "12": 4168870400.0,
+            "13": 4168870400.0,
+            "14": 4168870400.0,
+            "15": 4168870400.0,
+            "16": 4168870400.0,
+            "17": 4168870400.0,
+            "18": 4168870400.0,
+            "19": 4168870400.0,
+            "20": 4168870400.0,
+            "21": 4168870400.0,
+            "22": 4168870400.0,
+            "23": 4168870400.0,
+            "24": 4168870400.0,
+            "25": 4168870400.0,
+            "26": 4168870400.0,
+            "27": 4168870400.0,
+            "28": 4168870400.0,
+            "29": 4168870400.0,
+            "30": 4168870400.0,
+            "31": 4168870400.0,
+            "32": 4168870400.0,
+            "33": 4168870400.0,
+            "34": 4168870400.0,
+            "35": 4168870400.0,
+            "36": 4168870400.0,
+            "37": 4168870400.0,
+            "38": 4168870400.0,
+            "39": 4168870400.0,
+            "40": 4168870400.0,
+            "41": 4168870400.0,
+            "42": 4168870400.0,
+            "43": 4168870400.0,
+            "44": 4168870400.0,
+            "45": 4168870400.0,
+            "46": 4168870400.0,
+            "47": 4168870400.0,
+            "48": 4168870400.0,
+            "49": 4168870400.0,
+            "50": 4168870400.0,
+            "51": 4168870400.0,
+            "52": 4168870400.0,
+            "53": 4168870400.0,
+            "54": 4168870400.0,
+            "55": 4168870400.0,
+            "56": 4168870400.0,
+            "57": 4168870400.0,
+            "58": 4168870400.0,
+            "59": 4168870400.0,
+            "60": 4168870400.0,
+            "61": 4168870400.0,
+            "62": 4168870400.0,
+            "63": 4168870400.0,
+            "64": 4168870400.0,
+            "65": 4168870400.0,
+            "66": 4168870400.0,
+            "67": 4168870400.0,
+            "68": 4168870400.0,
+            "69": 4168870400.0,
+            "70": 4168870400.0,
+            "71": 4168870400.0,
+            "72": 4168870400.0,
+            "73": 4168870400.0,
+            "74": 4168870400.0,
+            "75": 4168870400.0,
+            "76": 4168870400.0,
+            "77": 4168870400.0,
+            "78": 4168870400.0,
+            "79": 4168870400.0,
+            "80": 4168870400.0,
+            "81": 4168870400.0,
+            "82": 4168870400.0,
+            "83": 4168870400.0,
+            "84": 4168870400.0,
+            "85": 4168870400.0,
+            "86": 4168870400.0,
+            "87": 4168870400.0,
+            "88": 4168870400.0,
+            "89": 4168870400.0,
+            "90": 4168870400.0,
+            "91": 4168870400.0,
+            "92": 4168870400.0,
+            "93": 4168870400.0,
+            "94": 4168870400.0,
+            "95": 4168870400.0,
+            "96": 4168870400.0,
+            "97": 4168870400.0,
+            "98": 4168870400.0,
+            "99": 4168870400.0,
+            "100": 4168870400.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 4375071232.0,
+            "2": 6204402688.0,
+            "3": 6206499840.0,
+            "4": 6206499840.0,
+            "5": 6206499840.0,
+            "6": 6206499840.0,
+            "7": 6206499840.0,
+            "8": 6206499840.0,
+            "9": 6206499840.0,
+            "10": 6206499840.0,
+            "11": 6206499840.0,
+            "12": 6206499840.0,
+            "13": 6206499840.0,
+            "14": 6206499840.0,
+            "15": 6206499840.0,
+            "16": 6206499840.0,
+            "17": 6206499840.0,
+            "18": 6206499840.0,
+            "19": 6206499840.0,
+            "20": 6206499840.0,
+            "21": 6206499840.0,
+            "22": 6206499840.0,
+            "23": 6206499840.0,
+            "24": 6206499840.0,
+            "25": 6206499840.0,
+            "26": 6206499840.0,
+            "27": 6206499840.0,
+            "28": 6206499840.0,
+            "29": 6206499840.0,
+            "30": 6206499840.0,
+            "31": 6206499840.0,
+            "32": 6206499840.0,
+            "33": 6206499840.0,
+            "34": 6206499840.0,
+            "35": 6206499840.0,
+            "36": 6206499840.0,
+            "37": 6206499840.0,
+            "38": 6206499840.0,
+            "39": 6206499840.0,
+            "40": 6206499840.0,
+            "41": 6206499840.0,
+            "42": 6206499840.0,
+            "43": 6206499840.0,
+            "44": 6206499840.0,
+            "45": 6206499840.0,
+            "46": 6206499840.0,
+            "47": 6206499840.0,
+            "48": 6206499840.0,
+            "49": 6206499840.0,
+            "50": 6206499840.0,
+            "51": 6206499840.0,
+            "52": 6206499840.0,
+            "53": 6206499840.0,
+            "54": 6206499840.0,
+            "55": 6206499840.0,
+            "56": 6206499840.0,
+            "57": 6206499840.0,
+            "58": 6206499840.0,
+            "59": 6206499840.0,
+            "60": 6206499840.0,
+            "61": 6206499840.0,
+            "62": 6206499840.0,
+            "63": 6206499840.0,
+            "64": 6206499840.0,
+            "65": 6206499840.0,
+            "66": 6206499840.0,
+            "67": 6206499840.0,
+            "68": 6206499840.0,
+            "69": 6206499840.0,
+            "70": 6206499840.0,
+            "71": 6206499840.0,
+            "72": 6206499840.0,
+            "73": 6206499840.0,
+            "74": 6206499840.0,
+            "75": 6206499840.0,
+            "76": 6206499840.0,
+            "77": 6206499840.0,
+            "78": 6206499840.0,
+            "79": 6206499840.0,
+            "80": 6206499840.0,
+            "81": 6206499840.0,
+            "82": 6206499840.0,
+            "83": 6206499840.0,
+            "84": 6206499840.0,
+            "85": 6206499840.0,
+            "86": 6206499840.0,
+            "87": 6206499840.0,
+            "88": 6206499840.0,
+            "89": 6206499840.0,
+            "90": 6206499840.0,
+            "91": 6206499840.0,
+            "92": 6206499840.0,
+            "93": 6206499840.0,
+            "94": 6206499840.0,
+            "95": 6206499840.0,
+            "96": 6206499840.0,
+            "97": 6206499840.0,
+            "98": 6206499840.0,
+            "99": 6206499840.0,
+            "100": 6206499840.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 7.22025,
+            "2": 0.31576,
+            "3": 0.19278,
+            "4": 0.19432,
+            "5": 0.18909,
+            "6": 0.19307,
+            "7": 0.18922,
+            "8": 0.19506,
+            "9": 0.18834,
+            "10": 0.19233,
+            "11": 0.18825,
+            "12": 0.19571,
+            "13": 0.19081,
+            "14": 0.19613,
+            "15": 0.18954,
+            "16": 0.18825,
+            "17": 0.18583,
+            "18": 0.18933,
+            "19": 0.1896,
+            "20": 0.19136,
+            "21": 0.18842,
+            "22": 0.19581,
+            "23": 0.18752,
+            "24": 0.19277,
+            "25": 0.18759,
+            "26": 0.19405,
+            "27": 0.18784,
+            "28": 0.18762,
+            "29": 0.19232,
+            "30": 0.18798,
+            "31": 0.18713,
+            "32": 0.18948,
+            "33": 0.18968,
+            "34": 0.19011,
+            "35": 0.18907,
+            "36": 0.18983,
+            "37": 0.18857,
+            "38": 0.18728,
+            "39": 0.18835,
+            "40": 0.18777,
+            "41": 0.188,
+            "42": 0.18818,
+            "43": 0.18602,
+            "44": 0.18972,
+            "45": 0.19276,
+            "46": 0.18816,
+            "47": 0.18794,
+            "48": 0.19299,
+            "49": 0.19241,
+            "50": 0.18805,
+            "51": 0.18895,
+            "52": 0.19459,
+            "53": 0.18821,
+            "54": 0.18597,
+            "55": 0.189,
+            "56": 0.18748,
+            "57": 0.18709,
+            "58": 0.19127,
+            "59": 0.19097,
+            "60": 0.18702,
+            "61": 0.18725,
+            "62": 0.18762,
+            "63": 0.19407,
+            "64": 0.19411,
+            "65": 0.20071,
+            "66": 0.19555,
+            "67": 0.22543,
+            "68": 0.21724,
+            "69": 0.22635,
+            "70": 0.52922,
+            "71": 0.19086,
+            "72": 0.19899,
+            "73": 0.51667,
+            "74": 0.20138,
+            "75": 0.19507,
+            "76": 0.24987,
+            "77": 0.22838,
+            "78": 0.51523,
+            "79": 0.19126,
+            "80": 0.18911,
+            "81": 0.19269,
+            "82": 0.18816,
+            "83": 0.18902,
+            "84": 0.18942,
+            "85": 0.19004,
+            "86": 0.50868,
+            "87": 0.19274,
+            "88": 0.18813,
+            "89": 0.19169,
+            "90": 0.50854,
+            "91": 0.1924,
+            "92": 0.18906,
+            "93": 0.19016,
+            "94": 0.1902,
+            "95": 0.19338,
+            "96": 0.51468,
+            "97": 0.19597,
+            "98": 0.19147,
+            "99": 0.19626,
+            "100": 0.18852
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1/golden_values_dev_dgx_h100.json
index 7a1c2a35b70..24fbb5008a6 100644
--- a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1/golden_values_dev_dgx_h100.json
@@ -2,141 +2,536 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 10.37205,
+            "2": 10.36993,
+            "3": 9.85245,
+            "4": 9.61997,
             "5": 9.40867,
+            "6": 9.43219,
+            "7": 9.31484,
+            "8": 9.27336,
+            "9": 9.11412,
             "10": 9.03968,
+            "11": 8.87198,
+            "12": 8.80862,
+            "13": 8.83469,
+            "14": 8.69021,
             "15": 8.66221,
+            "16": 8.54816,
+            "17": 8.50088,
+            "18": 8.42516,
+            "19": 8.38808,
             "20": 8.28073,
+            "21": 8.26592,
+            "22": 8.15988,
+            "23": 8.11241,
+            "24": 8.14271,
             "25": 7.98425,
+            "26": 8.10594,
+            "27": 7.88954,
+            "28": 7.9705,
+            "29": 7.81272,
             "30": 7.87636,
+            "31": 7.82505,
+            "32": 7.70262,
+            "33": 7.80169,
+            "34": 7.56872,
             "35": 7.67373,
+            "36": 7.54686,
+            "37": 7.47401,
+            "38": 7.50726,
+            "39": 7.49794,
             "40": 7.51081,
+            "41": 7.41055,
+            "42": 7.37984,
+            "43": 7.44091,
+            "44": 7.39372,
             "45": 7.37241,
+            "46": 7.28404,
+            "47": 7.46627,
+            "48": 7.29038,
+            "49": 7.35015,
             "50": 7.17193,
+            "51": 7.37002,
+            "52": 7.14463,
+            "53": 7.12651,
+            "54": 7.23742,
             "55": 7.15579,
+            "56": 7.23152,
+            "57": 7.3354,
+            "58": 7.01365,
+            "59": 7.11427,
             "60": 7.15124,
+            "61": 7.1088,
+            "62": 7.26824,
+            "63": 7.15182,
+            "64": 7.08401,
             "65": 6.99127,
+            "66": 7.05305,
+            "67": 7.04353,
+            "68": 7.13973,
+            "69": 7.03243,
             "70": 7.05831,
+            "71": 6.90378,
+            "72": 6.99805,
+            "73": 6.97678,
+            "74": 6.91757,
             "75": 7.06665,
+            "76": 6.95719,
+            "77": 7.08701,
+            "78": 7.03266,
+            "79": 6.8532,
             "80": 6.93633,
+            "81": 6.97582,
+            "82": 7.0624,
+            "83": 6.98226,
+            "84": 7.00923,
             "85": 6.8507,
+            "86": 7.04663,
+            "87": 6.97947,
+            "88": 6.91093,
+            "89": 6.8168,
             "90": 7.24561,
+            "91": 6.7048,
+            "92": 7.05407,
+            "93": 6.89399,
+            "94": 7.0542,
             "95": 6.85047,
+            "96": 6.96463,
+            "97": 6.95624,
+            "98": 6.8829,
+            "99": 7.00419,
             "100": 6.98982
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 43288.0,
+            "2": 44033.0,
+            "3": 44733.0,
+            "4": 42406.0,
             "5": 45371.0,
+            "6": 40945.0,
+            "7": 43173.0,
+            "8": 45430.0,
+            "9": 42421.0,
             "10": 45369.0,
+            "11": 43974.0,
+            "12": 44588.0,
+            "13": 43908.0,
+            "14": 46215.0,
             "15": 43901.0,
+            "16": 41603.0,
+            "17": 43832.0,
+            "18": 44695.0,
+            "19": 42547.0,
             "20": 44758.0,
+            "21": 44777.0,
+            "22": 41821.0,
+            "23": 45434.0,
+            "24": 43080.0,
             "25": 42439.0,
+            "26": 43936.0,
+            "27": 46214.0,
+            "28": 46342.0,
+            "29": 46135.0,
             "30": 43995.0,
+            "31": 41271.0,
+            "32": 43336.0,
+            "33": 45440.0,
+            "34": 43287.0,
             "35": 43240.0,
+            "36": 42490.0,
+            "37": 40078.0,
+            "38": 42510.0,
+            "39": 44722.0,
             "40": 43230.0,
+            "41": 44669.0,
+            "42": 43262.0,
+            "43": 45476.0,
+            "44": 44624.0,
             "45": 43326.0,
+            "46": 43945.0,
+            "47": 42395.0,
+            "48": 44675.0,
+            "49": 43169.0,
             "50": 43381.0,
+            "51": 41131.0,
+            "52": 43830.0,
+            "53": 43914.0,
+            "54": 42004.0,
             "55": 43871.0,
+            "56": 43227.0,
+            "57": 42550.0,
+            "58": 43816.0,
+            "59": 44631.0,
             "60": 41183.0,
+            "61": 39721.0,
+            "62": 44752.0,
+            "63": 44696.0,
+            "64": 45351.0,
             "65": 44694.0,
+            "66": 45350.0,
+            "67": 43132.0,
+            "68": 42535.0,
+            "69": 43829.0,
             "70": 45533.0,
+            "71": 43322.0,
+            "72": 44749.0,
+            "73": 45365.0,
+            "74": 42492.0,
             "75": 44655.0,
+            "76": 43920.0,
+            "77": 42080.0,
+            "78": 40298.0,
+            "79": 38909.0,
             "80": 41117.0,
+            "81": 45370.0,
+            "82": 43206.0,
+            "83": 38501.0,
+            "84": 42484.0,
             "85": 43986.0,
+            "86": 45704.0,
+            "87": 40839.0,
+            "88": 41828.0,
+            "89": 41074.0,
             "90": 44663.0,
+            "91": 46169.0,
+            "92": 41807.0,
+            "93": 43228.0,
+            "94": 39549.0,
             "95": 44090.0,
+            "96": 44711.0,
+            "97": 45390.0,
+            "98": 41799.0,
+            "99": 45426.0,
             "100": 42443.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 2194357248.0,
+            "2": 2194357248.0,
+            "3": 2194357248.0,
+            "4": 2194357248.0,
             "5": 2194357248.0,
+            "6": 2194357248.0,
+            "7": 2194357248.0,
+            "8": 2194357248.0,
+            "9": 2194357248.0,
             "10": 2194357248.0,
+            "11": 2194357248.0,
+            "12": 2194357248.0,
+            "13": 2194357248.0,
+            "14": 2194357248.0,
             "15": 2194357248.0,
+            "16": 2194357248.0,
+            "17": 2194357248.0,
+            "18": 2194357248.0,
+            "19": 2194357248.0,
             "20": 2194357248.0,
+            "21": 2194357248.0,
+            "22": 2194357248.0,
+            "23": 2194357248.0,
+            "24": 2194357248.0,
             "25": 2194357248.0,
+            "26": 2194357248.0,
+            "27": 2194357248.0,
+            "28": 2194357248.0,
+            "29": 2194357248.0,
             "30": 2194357248.0,
+            "31": 2194357248.0,
+            "32": 2194357248.0,
+            "33": 2194357248.0,
+            "34": 2194357248.0,
             "35": 2194357248.0,
+            "36": 2194357248.0,
+            "37": 2194357248.0,
+            "38": 2194357248.0,
+            "39": 2194357248.0,
             "40": 2194357248.0,
+            "41": 2194357248.0,
+            "42": 2194357248.0,
+            "43": 2194357248.0,
+            "44": 2194357248.0,
             "45": 2194357248.0,
+            "46": 2194357248.0,
+            "47": 2194357248.0,
+            "48": 2194357248.0,
+            "49": 2194357248.0,
             "50": 2194357248.0,
+            "51": 2194357248.0,
+            "52": 2194357248.0,
+            "53": 2194357248.0,
+            "54": 2194357248.0,
             "55": 2194357248.0,
+            "56": 2194357248.0,
+            "57": 2194357248.0,
+            "58": 2194357248.0,
+            "59": 2194357248.0,
             "60": 2194357248.0,
+            "61": 2194357248.0,
+            "62": 2194357248.0,
+            "63": 2194357248.0,
+            "64": 2194357248.0,
             "65": 2194357248.0,
+            "66": 2194357248.0,
+            "67": 2194357248.0,
+            "68": 2194357248.0,
+            "69": 2194357248.0,
             "70": 2194357248.0,
+            "71": 2194357248.0,
+            "72": 2194357248.0,
+            "73": 2194357248.0,
+            "74": 2194357248.0,
             "75": 2194357248.0,
+            "76": 2194357248.0,
+            "77": 2194357248.0,
+            "78": 2194357248.0,
+            "79": 2194357248.0,
             "80": 2194357248.0,
+            "81": 2194357248.0,
+            "82": 2194357248.0,
+            "83": 2194357248.0,
+            "84": 2194357248.0,
             "85": 2194357248.0,
+            "86": 2194357248.0,
+            "87": 2194357248.0,
+            "88": 2194357248.0,
+            "89": 2194357248.0,
             "90": 2194357248.0,
+            "91": 2194357248.0,
+            "92": 2194357248.0,
+            "93": 2194357248.0,
+            "94": 2194357248.0,
             "95": 2194357248.0,
+            "96": 2194357248.0,
+            "97": 2194357248.0,
+            "98": 2194357248.0,
+            "99": 2194357248.0,
             "100": 2194357248.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 2443624960.0,
+            "2": 3375193600.0,
+            "3": 3375193600.0,
+            "4": 3375193600.0,
             "5": 3375193600.0,
+            "6": 3375193600.0,
+            "7": 3375193600.0,
+            "8": 3375193600.0,
+            "9": 3375193600.0,
             "10": 3375193600.0,
+            "11": 3375193600.0,
+            "12": 3375193600.0,
+            "13": 3375193600.0,
+            "14": 3375193600.0,
             "15": 3375193600.0,
+            "16": 3375193600.0,
+            "17": 3375193600.0,
+            "18": 3375193600.0,
+            "19": 3375193600.0,
             "20": 3375193600.0,
+            "21": 3375193600.0,
+            "22": 3375193600.0,
+            "23": 3375193600.0,
+            "24": 3375193600.0,
             "25": 3375193600.0,
+            "26": 3375193600.0,
+            "27": 3375193600.0,
+            "28": 3375193600.0,
+            "29": 3375193600.0,
             "30": 3375193600.0,
+            "31": 3375193600.0,
+            "32": 3375193600.0,
+            "33": 3375193600.0,
+            "34": 3375193600.0,
             "35": 3375193600.0,
+            "36": 3375193600.0,
+            "37": 3375193600.0,
+            "38": 3375193600.0,
+            "39": 3375193600.0,
             "40": 3375193600.0,
+            "41": 3375193600.0,
+            "42": 3375193600.0,
+            "43": 3375193600.0,
+            "44": 3375193600.0,
             "45": 3375193600.0,
+            "46": 3375193600.0,
+            "47": 3375193600.0,
+            "48": 3375193600.0,
+            "49": 3375193600.0,
             "50": 3375193600.0,
+            "51": 3375193600.0,
+            "52": 3375193600.0,
+            "53": 3375193600.0,
+            "54": 3375193600.0,
             "55": 3375193600.0,
+            "56": 3375193600.0,
+            "57": 3375193600.0,
+            "58": 3375193600.0,
+            "59": 3375193600.0,
             "60": 3375193600.0,
+            "61": 3375193600.0,
+            "62": 3375193600.0,
+            "63": 3375193600.0,
+            "64": 3375193600.0,
             "65": 3375193600.0,
+            "66": 3375193600.0,
+            "67": 3375193600.0,
+            "68": 3375193600.0,
+            "69": 3375193600.0,
             "70": 3375193600.0,
+            "71": 3375193600.0,
+            "72": 3375193600.0,
+            "73": 3375193600.0,
+            "74": 3375193600.0,
             "75": 3375193600.0,
+            "76": 3375193600.0,
+            "77": 3375193600.0,
+            "78": 3375193600.0,
+            "79": 3375193600.0,
             "80": 3375193600.0,
+            "81": 3375193600.0,
+            "82": 3375193600.0,
+            "83": 3375193600.0,
+            "84": 3375193600.0,
             "85": 3375193600.0,
+            "86": 3375193600.0,
+            "87": 3375193600.0,
+            "88": 3375193600.0,
+            "89": 3375193600.0,
             "90": 3375193600.0,
+            "91": 3375193600.0,
+            "92": 3375193600.0,
+            "93": 3375193600.0,
+            "94": 3375193600.0,
             "95": 3375193600.0,
+            "96": 3375193600.0,
+            "97": 3375193600.0,
+            "98": 3375193600.0,
+            "99": 3375193600.0,
             "100": 3375193600.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 6.57343,
-            "5": 0.29301,
-            "10": 0.29182,
-            "15": 0.29668,
-            "20": 0.2961,
-            "25": 0.29961,
-            "30": 0.29549,
-            "35": 0.30714,
-            "40": 0.29592,
-            "45": 0.29418,
-            "50": 0.29188,
-            "55": 0.29019,
-            "60": 0.29199,
-            "65": 0.5931,
-            "70": 0.59584,
-            "75": 0.29011,
-            "80": 0.29788,
-            "85": 0.30993,
-            "90": 0.2992,
-            "95": 0.29538,
-            "100": 0.29811
+            "1": 9.51792,
+            "2": 0.37696,
+            "3": 0.35384,
+            "4": 0.34824,
+            "5": 0.34677,
+            "6": 0.36735,
+            "7": 0.37639,
+            "8": 0.37373,
+            "9": 0.37798,
+            "10": 0.37384,
+            "11": 0.37808,
+            "12": 0.37762,
+            "13": 0.37479,
+            "14": 0.38389,
+            "15": 0.37511,
+            "16": 0.3766,
+            "17": 0.37666,
+            "18": 0.37513,
+            "19": 0.36239,
+            "20": 0.34482,
+            "21": 0.36935,
+            "22": 0.37904,
+            "23": 0.36041,
+            "24": 0.35765,
+            "25": 0.36227,
+            "26": 0.3603,
+            "27": 0.36061,
+            "28": 0.35888,
+            "29": 0.36254,
+            "30": 0.3638,
+            "31": 0.36821,
+            "32": 0.36371,
+            "33": 0.36426,
+            "34": 0.63693,
+            "35": 0.38755,
+            "36": 0.37078,
+            "37": 0.36346,
+            "38": 0.36485,
+            "39": 0.36467,
+            "40": 0.43549,
+            "41": 0.35057,
+            "42": 0.35472,
+            "43": 0.35255,
+            "44": 0.34681,
+            "45": 0.34612,
+            "46": 0.3502,
+            "47": 0.34647,
+            "48": 0.7097,
+            "49": 0.34958,
+            "50": 0.34947,
+            "51": 0.68193,
+            "52": 0.66437,
+            "53": 0.6483,
+            "54": 0.35744,
+            "55": 0.34501,
+            "56": 0.35464,
+            "57": 0.3506,
+            "58": 0.34648,
+            "59": 0.35134,
+            "60": 0.34883,
+            "61": 0.34803,
+            "62": 0.35208,
+            "63": 0.3458,
+            "64": 0.34919,
+            "65": 0.35351,
+            "66": 0.35034,
+            "67": 0.34776,
+            "68": 0.35303,
+            "69": 0.34862,
+            "70": 0.35025,
+            "71": 0.35221,
+            "72": 0.34546,
+            "73": 0.34844,
+            "74": 0.35311,
+            "75": 0.34698,
+            "76": 0.34803,
+            "77": 0.34856,
+            "78": 0.34471,
+            "79": 0.64787,
+            "80": 0.34702,
+            "81": 0.35417,
+            "82": 0.34815,
+            "83": 0.34811,
+            "84": 0.36328,
+            "85": 0.35053,
+            "86": 0.34968,
+            "87": 0.641,
+            "88": 0.35086,
+            "89": 0.35762,
+            "90": 0.34969,
+            "91": 0.35083,
+            "92": 0.36212,
+            "93": 0.35255,
+            "94": 0.35084,
+            "95": 0.35297,
+            "96": 0.34869,
+            "97": 0.3518,
+            "98": 0.3551,
+            "99": 0.35073,
+            "100": 0.35332
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1/golden_values_dev_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1/golden_values_dev_dgxa100_dracooci-ord.json
new file mode 100644
index 00000000000..5c3d959191a
--- /dev/null
+++ b/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1/golden_values_dev_dgxa100_dracooci-ord.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.38869,
+            "2": 10.39385,
+            "3": 9.78084,
+            "4": 9.59727,
+            "5": 9.38084,
+            "6": 9.40579,
+            "7": 9.30788,
+            "8": 9.24106,
+            "9": 9.12192,
+            "10": 9.05709,
+            "11": 8.87331,
+            "12": 8.7937,
+            "13": 8.84028,
+            "14": 8.68508,
+            "15": 8.65595,
+            "16": 8.54356,
+            "17": 8.50088,
+            "18": 8.39002,
+            "19": 8.36442,
+            "20": 8.26189,
+            "21": 8.27089,
+            "22": 8.14388,
+            "23": 8.07456,
+            "24": 8.11903,
+            "25": 7.98194,
+            "26": 8.08775,
+            "27": 7.87135,
+            "28": 7.96498,
+            "29": 7.80253,
+            "30": 7.86925,
+            "31": 7.81724,
+            "32": 7.68778,
+            "33": 7.78042,
+            "34": 7.55486,
+            "35": 7.66275,
+            "36": 7.52238,
+            "37": 7.44446,
+            "38": 7.50242,
+            "39": 7.45039,
+            "40": 7.5007,
+            "41": 7.39051,
+            "42": 7.36065,
+            "43": 7.43329,
+            "44": 7.3762,
+            "45": 7.34875,
+            "46": 7.28162,
+            "47": 7.46112,
+            "48": 7.28762,
+            "49": 7.35376,
+            "50": 7.18139,
+            "51": 7.36575,
+            "52": 7.1333,
+            "53": 7.11549,
+            "54": 7.22921,
+            "55": 7.15407,
+            "56": 7.22241,
+            "57": 7.32951,
+            "58": 7.02329,
+            "59": 7.11369,
+            "60": 7.14724,
+            "61": 7.11415,
+            "62": 7.24749,
+            "63": 7.15673,
+            "64": 7.08408,
+            "65": 6.99707,
+            "66": 7.06064,
+            "67": 7.04874,
+            "68": 7.14167,
+            "69": 7.0346,
+            "70": 7.06003,
+            "71": 6.92549,
+            "72": 7.00408,
+            "73": 6.97962,
+            "74": 6.92272,
+            "75": 7.0608,
+            "76": 6.97256,
+            "77": 7.08183,
+            "78": 7.01864,
+            "79": 6.8552,
+            "80": 6.94288,
+            "81": 6.97634,
+            "82": 7.06647,
+            "83": 6.99975,
+            "84": 7.00894,
+            "85": 6.85973,
+            "86": 7.03631,
+            "87": 6.98045,
+            "88": 6.91491,
+            "89": 6.81048,
+            "90": 7.24972,
+            "91": 6.71004,
+            "92": 7.04898,
+            "93": 6.90555,
+            "94": 7.06456,
+            "95": 6.84835,
+            "96": 6.97647,
+            "97": 6.9631,
+            "98": 6.88688,
+            "99": 7.01307,
+            "100": 6.9828
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 43318.0,
+            "2": 44050.0,
+            "3": 44756.0,
+            "4": 42391.0,
+            "5": 45385.0,
+            "6": 40966.0,
+            "7": 43182.0,
+            "8": 45459.0,
+            "9": 42453.0,
+            "10": 45371.0,
+            "11": 43978.0,
+            "12": 44598.0,
+            "13": 43892.0,
+            "14": 46190.0,
+            "15": 43897.0,
+            "16": 41608.0,
+            "17": 43825.0,
+            "18": 44703.0,
+            "19": 42550.0,
+            "20": 44769.0,
+            "21": 44793.0,
+            "22": 41844.0,
+            "23": 45444.0,
+            "24": 43071.0,
+            "25": 42476.0,
+            "26": 43926.0,
+            "27": 46218.0,
+            "28": 46430.0,
+            "29": 46178.0,
+            "30": 43985.0,
+            "31": 41281.0,
+            "32": 43347.0,
+            "33": 45448.0,
+            "34": 43305.0,
+            "35": 43264.0,
+            "36": 42485.0,
+            "37": 40077.0,
+            "38": 42514.0,
+            "39": 44723.0,
+            "40": 43230.0,
+            "41": 44653.0,
+            "42": 43269.0,
+            "43": 45446.0,
+            "44": 44588.0,
+            "45": 43278.0,
+            "46": 43896.0,
+            "47": 42369.0,
+            "48": 44704.0,
+            "49": 43172.0,
+            "50": 43381.0,
+            "51": 41175.0,
+            "52": 43812.0,
+            "53": 43934.0,
+            "54": 41932.0,
+            "55": 43857.0,
+            "56": 43277.0,
+            "57": 42576.0,
+            "58": 43835.0,
+            "59": 44629.0,
+            "60": 41225.0,
+            "61": 39716.0,
+            "62": 44773.0,
+            "63": 44717.0,
+            "64": 45367.0,
+            "65": 44683.0,
+            "66": 45367.0,
+            "67": 43136.0,
+            "68": 42523.0,
+            "69": 43828.0,
+            "70": 45534.0,
+            "71": 43316.0,
+            "72": 44750.0,
+            "73": 45364.0,
+            "74": 42445.0,
+            "75": 44679.0,
+            "76": 43875.0,
+            "77": 42100.0,
+            "78": 40289.0,
+            "79": 38949.0,
+            "80": 41115.0,
+            "81": 45362.0,
+            "82": 43205.0,
+            "83": 38475.0,
+            "84": 42459.0,
+            "85": 44010.0,
+            "86": 45731.0,
+            "87": 40860.0,
+            "88": 41793.0,
+            "89": 41068.0,
+            "90": 44673.0,
+            "91": 46149.0,
+            "92": 41798.0,
+            "93": 43246.0,
+            "94": 39583.0,
+            "95": 44064.0,
+            "96": 44715.0,
+            "97": 45390.0,
+            "98": 41808.0,
+            "99": 45436.0,
+            "100": 42520.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2164472832.0,
+            "2": 2164472832.0,
+            "3": 2164472832.0,
+            "4": 2164472832.0,
+            "5": 2164472832.0,
+            "6": 2164472832.0,
+            "7": 2164472832.0,
+            "8": 2164472832.0,
+            "9": 2164472832.0,
+            "10": 2164472832.0,
+            "11": 2164472832.0,
+            "12": 2164472832.0,
+            "13": 2164472832.0,
+            "14": 2164472832.0,
+            "15": 2164472832.0,
+            "16": 2164472832.0,
+            "17": 2164472832.0,
+            "18": 2164472832.0,
+            "19": 2164472832.0,
+            "20": 2164472832.0,
+            "21": 2164472832.0,
+            "22": 2164472832.0,
+            "23": 2164472832.0,
+            "24": 2164472832.0,
+            "25": 2164472832.0,
+            "26": 2164472832.0,
+            "27": 2164472832.0,
+            "28": 2164472832.0,
+            "29": 2164472832.0,
+            "30": 2164472832.0,
+            "31": 2164472832.0,
+            "32": 2164472832.0,
+            "33": 2164472832.0,
+            "34": 2164472832.0,
+            "35": 2164472832.0,
+            "36": 2164472832.0,
+            "37": 2164472832.0,
+            "38": 2164472832.0,
+            "39": 2164472832.0,
+            "40": 2164472832.0,
+            "41": 2164472832.0,
+            "42": 2164472832.0,
+            "43": 2164472832.0,
+            "44": 2164472832.0,
+            "45": 2164472832.0,
+            "46": 2164472832.0,
+            "47": 2164472832.0,
+            "48": 2164472832.0,
+            "49": 2164472832.0,
+            "50": 2164472832.0,
+            "51": 2164472832.0,
+            "52": 2164472832.0,
+            "53": 2164472832.0,
+            "54": 2164472832.0,
+            "55": 2164472832.0,
+            "56": 2164472832.0,
+            "57": 2164472832.0,
+            "58": 2164472832.0,
+            "59": 2164472832.0,
+            "60": 2164472832.0,
+            "61": 2164472832.0,
+            "62": 2164472832.0,
+            "63": 2164472832.0,
+            "64": 2164472832.0,
+            "65": 2164472832.0,
+            "66": 2164472832.0,
+            "67": 2164472832.0,
+            "68": 2164472832.0,
+            "69": 2164472832.0,
+            "70": 2164472832.0,
+            "71": 2164472832.0,
+            "72": 2164472832.0,
+            "73": 2164472832.0,
+            "74": 2164472832.0,
+            "75": 2164472832.0,
+            "76": 2164472832.0,
+            "77": 2164472832.0,
+            "78": 2164472832.0,
+            "79": 2164472832.0,
+            "80": 2164472832.0,
+            "81": 2164472832.0,
+            "82": 2164472832.0,
+            "83": 2164472832.0,
+            "84": 2164472832.0,
+            "85": 2164472832.0,
+            "86": 2164472832.0,
+            "87": 2164472832.0,
+            "88": 2164472832.0,
+            "89": 2164472832.0,
+            "90": 2164472832.0,
+            "91": 2164472832.0,
+            "92": 2164472832.0,
+            "93": 2164472832.0,
+            "94": 2164472832.0,
+            "95": 2164472832.0,
+            "96": 2164472832.0,
+            "97": 2164472832.0,
+            "98": 2164472832.0,
+            "99": 2164472832.0,
+            "100": 2164472832.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2413216256.0,
+            "2": 3345833472.0,
+            "3": 3345833472.0,
+            "4": 3345833472.0,
+            "5": 3345833472.0,
+            "6": 3345833472.0,
+            "7": 3345833472.0,
+            "8": 3345833472.0,
+            "9": 3345833472.0,
+            "10": 3345833472.0,
+            "11": 3345833472.0,
+            "12": 3345833472.0,
+            "13": 3345833472.0,
+            "14": 3345833472.0,
+            "15": 3345833472.0,
+            "16": 3345833472.0,
+            "17": 3345833472.0,
+            "18": 3345833472.0,
+            "19": 3345833472.0,
+            "20": 3345833472.0,
+            "21": 3345833472.0,
+            "22": 3345833472.0,
+            "23": 3345833472.0,
+            "24": 3345833472.0,
+            "25": 3345833472.0,
+            "26": 3345833472.0,
+            "27": 3345833472.0,
+            "28": 3345833472.0,
+            "29": 3345833472.0,
+            "30": 3345833472.0,
+            "31": 3345833472.0,
+            "32": 3345833472.0,
+            "33": 3345833472.0,
+            "34": 3345833472.0,
+            "35": 3345833472.0,
+            "36": 3345833472.0,
+            "37": 3345833472.0,
+            "38": 3345833472.0,
+            "39": 3345833472.0,
+            "40": 3345833472.0,
+            "41": 3345833472.0,
+            "42": 3345833472.0,
+            "43": 3345833472.0,
+            "44": 3345833472.0,
+            "45": 3345833472.0,
+            "46": 3345833472.0,
+            "47": 3345833472.0,
+            "48": 3345833472.0,
+            "49": 3345833472.0,
+            "50": 3345833472.0,
+            "51": 3345833472.0,
+            "52": 3345833472.0,
+            "53": 3345833472.0,
+            "54": 3345833472.0,
+            "55": 3345833472.0,
+            "56": 3345833472.0,
+            "57": 3345833472.0,
+            "58": 3345833472.0,
+            "59": 3345833472.0,
+            "60": 3345833472.0,
+            "61": 3345833472.0,
+            "62": 3345833472.0,
+            "63": 3345833472.0,
+            "64": 3345833472.0,
+            "65": 3345833472.0,
+            "66": 3345833472.0,
+            "67": 3345833472.0,
+            "68": 3345833472.0,
+            "69": 3345833472.0,
+            "70": 3345833472.0,
+            "71": 3345833472.0,
+            "72": 3345833472.0,
+            "73": 3345833472.0,
+            "74": 3345833472.0,
+            "75": 3345833472.0,
+            "76": 3345833472.0,
+            "77": 3345833472.0,
+            "78": 3345833472.0,
+            "79": 3345833472.0,
+            "80": 3345833472.0,
+            "81": 3345833472.0,
+            "82": 3345833472.0,
+            "83": 3345833472.0,
+            "84": 3345833472.0,
+            "85": 3345833472.0,
+            "86": 3345833472.0,
+            "87": 3345833472.0,
+            "88": 3345833472.0,
+            "89": 3345833472.0,
+            "90": 3345833472.0,
+            "91": 3345833472.0,
+            "92": 3345833472.0,
+            "93": 3345833472.0,
+            "94": 3345833472.0,
+            "95": 3345833472.0,
+            "96": 3345833472.0,
+            "97": 3345833472.0,
+            "98": 3345833472.0,
+            "99": 3345833472.0,
+            "100": 3345833472.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 7.41599,
+            "2": 0.55044,
+            "3": 0.4601,
+            "4": 0.46093,
+            "5": 0.45888,
+            "6": 0.46549,
+            "7": 0.46196,
+            "8": 0.46392,
+            "9": 0.46142,
+            "10": 0.46273,
+            "11": 0.46181,
+            "12": 0.53125,
+            "13": 0.51435,
+            "14": 0.47772,
+            "15": 0.47916,
+            "16": 0.47028,
+            "17": 0.46912,
+            "18": 0.47611,
+            "19": 0.48447,
+            "20": 0.47544,
+            "21": 0.47048,
+            "22": 0.47872,
+            "23": 0.47823,
+            "24": 0.48021,
+            "25": 0.46999,
+            "26": 0.4776,
+            "27": 0.47549,
+            "28": 0.47983,
+            "29": 0.47292,
+            "30": 0.47463,
+            "31": 0.82354,
+            "32": 0.9356,
+            "33": 0.47582,
+            "34": 0.47311,
+            "35": 0.4737,
+            "36": 0.49142,
+            "37": 0.4757,
+            "38": 0.46626,
+            "39": 0.48967,
+            "40": 0.46469,
+            "41": 0.8495,
+            "42": 0.46682,
+            "43": 0.46339,
+            "44": 0.464,
+            "45": 0.46339,
+            "46": 0.4651,
+            "47": 0.46486,
+            "48": 0.7679,
+            "49": 0.82614,
+            "50": 0.46574,
+            "51": 0.81746,
+            "52": 0.80226,
+            "53": 0.46381,
+            "54": 0.51852,
+            "55": 0.46533,
+            "56": 0.46349,
+            "57": 0.46462,
+            "58": 0.46325,
+            "59": 0.46221,
+            "60": 0.98653,
+            "61": 0.46476,
+            "62": 0.46489,
+            "63": 0.4641,
+            "64": 0.46387,
+            "65": 0.46447,
+            "66": 0.46497,
+            "67": 0.46419,
+            "68": 0.46372,
+            "69": 0.46378,
+            "70": 0.46549,
+            "71": 0.46682,
+            "72": 0.4674,
+            "73": 0.46459,
+            "74": 0.46681,
+            "75": 0.46573,
+            "76": 0.46408,
+            "77": 0.465,
+            "78": 0.46602,
+            "79": 0.49286,
+            "80": 0.46795,
+            "81": 0.46459,
+            "82": 0.46605,
+            "83": 0.46772,
+            "84": 0.4651,
+            "85": 0.4646,
+            "86": 0.46421,
+            "87": 0.46391,
+            "88": 0.46392,
+            "89": 0.4668,
+            "90": 0.46462,
+            "91": 0.46389,
+            "92": 0.46949,
+            "93": 0.46646,
+            "94": 0.46559,
+            "95": 0.46701,
+            "96": 0.46805,
+            "97": 0.46541,
+            "98": 0.46506,
+            "99": 0.46495,
+            "100": 0.46492
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1/golden_values_dev_dgxa100_dracooci.json b/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1/golden_values_dev_dgxa100_dracooci.json
new file mode 100644
index 00000000000..2482dd80c70
--- /dev/null
+++ b/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1/golden_values_dev_dgxa100_dracooci.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.38869,
+            "2": 10.39385,
+            "3": 9.78084,
+            "4": 9.59727,
+            "5": 9.38084,
+            "6": 9.40579,
+            "7": 9.30788,
+            "8": 9.24106,
+            "9": 9.12192,
+            "10": 9.05709,
+            "11": 8.87331,
+            "12": 8.7937,
+            "13": 8.84028,
+            "14": 8.68508,
+            "15": 8.65595,
+            "16": 8.54356,
+            "17": 8.50088,
+            "18": 8.39002,
+            "19": 8.36442,
+            "20": 8.26189,
+            "21": 8.27089,
+            "22": 8.14388,
+            "23": 8.07456,
+            "24": 8.11903,
+            "25": 7.98194,
+            "26": 8.08775,
+            "27": 7.87135,
+            "28": 7.96498,
+            "29": 7.80253,
+            "30": 7.86925,
+            "31": 7.81724,
+            "32": 7.68778,
+            "33": 7.78042,
+            "34": 7.55486,
+            "35": 7.66275,
+            "36": 7.52238,
+            "37": 7.44446,
+            "38": 7.50242,
+            "39": 7.45039,
+            "40": 7.5007,
+            "41": 7.39051,
+            "42": 7.36065,
+            "43": 7.43329,
+            "44": 7.3762,
+            "45": 7.34875,
+            "46": 7.28162,
+            "47": 7.46112,
+            "48": 7.28762,
+            "49": 7.35376,
+            "50": 7.18139,
+            "51": 7.36575,
+            "52": 7.1333,
+            "53": 7.11549,
+            "54": 7.22921,
+            "55": 7.15407,
+            "56": 7.22241,
+            "57": 7.32951,
+            "58": 7.02329,
+            "59": 7.11369,
+            "60": 7.14724,
+            "61": 7.11415,
+            "62": 7.24749,
+            "63": 7.15673,
+            "64": 7.08408,
+            "65": 6.99707,
+            "66": 7.06064,
+            "67": 7.04874,
+            "68": 7.14167,
+            "69": 7.0346,
+            "70": 7.06003,
+            "71": 6.92549,
+            "72": 7.00408,
+            "73": 6.97962,
+            "74": 6.92272,
+            "75": 7.0608,
+            "76": 6.97256,
+            "77": 7.08183,
+            "78": 7.01864,
+            "79": 6.8552,
+            "80": 6.94288,
+            "81": 6.97634,
+            "82": 7.06647,
+            "83": 6.99975,
+            "84": 7.00894,
+            "85": 6.85973,
+            "86": 7.03631,
+            "87": 6.98045,
+            "88": 6.91491,
+            "89": 6.81048,
+            "90": 7.24972,
+            "91": 6.71004,
+            "92": 7.04898,
+            "93": 6.90555,
+            "94": 7.06456,
+            "95": 6.84835,
+            "96": 6.97647,
+            "97": 6.9631,
+            "98": 6.88688,
+            "99": 7.01307,
+            "100": 6.9828
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 43318.0,
+            "2": 44050.0,
+            "3": 44756.0,
+            "4": 42391.0,
+            "5": 45385.0,
+            "6": 40966.0,
+            "7": 43182.0,
+            "8": 45459.0,
+            "9": 42453.0,
+            "10": 45371.0,
+            "11": 43978.0,
+            "12": 44598.0,
+            "13": 43892.0,
+            "14": 46190.0,
+            "15": 43897.0,
+            "16": 41608.0,
+            "17": 43825.0,
+            "18": 44703.0,
+            "19": 42550.0,
+            "20": 44769.0,
+            "21": 44793.0,
+            "22": 41844.0,
+            "23": 45444.0,
+            "24": 43071.0,
+            "25": 42476.0,
+            "26": 43926.0,
+            "27": 46218.0,
+            "28": 46430.0,
+            "29": 46178.0,
+            "30": 43985.0,
+            "31": 41281.0,
+            "32": 43347.0,
+            "33": 45448.0,
+            "34": 43305.0,
+            "35": 43264.0,
+            "36": 42485.0,
+            "37": 40077.0,
+            "38": 42514.0,
+            "39": 44723.0,
+            "40": 43230.0,
+            "41": 44653.0,
+            "42": 43269.0,
+            "43": 45446.0,
+            "44": 44588.0,
+            "45": 43278.0,
+            "46": 43896.0,
+            "47": 42369.0,
+            "48": 44704.0,
+            "49": 43172.0,
+            "50": 43381.0,
+            "51": 41175.0,
+            "52": 43812.0,
+            "53": 43934.0,
+            "54": 41932.0,
+            "55": 43857.0,
+            "56": 43277.0,
+            "57": 42576.0,
+            "58": 43835.0,
+            "59": 44629.0,
+            "60": 41225.0,
+            "61": 39716.0,
+            "62": 44773.0,
+            "63": 44717.0,
+            "64": 45367.0,
+            "65": 44683.0,
+            "66": 45367.0,
+            "67": 43136.0,
+            "68": 42523.0,
+            "69": 43828.0,
+            "70": 45534.0,
+            "71": 43316.0,
+            "72": 44750.0,
+            "73": 45364.0,
+            "74": 42445.0,
+            "75": 44679.0,
+            "76": 43875.0,
+            "77": 42100.0,
+            "78": 40289.0,
+            "79": 38949.0,
+            "80": 41115.0,
+            "81": 45362.0,
+            "82": 43205.0,
+            "83": 38475.0,
+            "84": 42459.0,
+            "85": 44010.0,
+            "86": 45731.0,
+            "87": 40860.0,
+            "88": 41793.0,
+            "89": 41068.0,
+            "90": 44673.0,
+            "91": 46149.0,
+            "92": 41798.0,
+            "93": 43246.0,
+            "94": 39583.0,
+            "95": 44064.0,
+            "96": 44715.0,
+            "97": 45390.0,
+            "98": 41808.0,
+            "99": 45436.0,
+            "100": 42520.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2164472832.0,
+            "2": 2164472832.0,
+            "3": 2164472832.0,
+            "4": 2164472832.0,
+            "5": 2164472832.0,
+            "6": 2164472832.0,
+            "7": 2164472832.0,
+            "8": 2164472832.0,
+            "9": 2164472832.0,
+            "10": 2164472832.0,
+            "11": 2164472832.0,
+            "12": 2164472832.0,
+            "13": 2164472832.0,
+            "14": 2164472832.0,
+            "15": 2164472832.0,
+            "16": 2164472832.0,
+            "17": 2164472832.0,
+            "18": 2164472832.0,
+            "19": 2164472832.0,
+            "20": 2164472832.0,
+            "21": 2164472832.0,
+            "22": 2164472832.0,
+            "23": 2164472832.0,
+            "24": 2164472832.0,
+            "25": 2164472832.0,
+            "26": 2164472832.0,
+            "27": 2164472832.0,
+            "28": 2164472832.0,
+            "29": 2164472832.0,
+            "30": 2164472832.0,
+            "31": 2164472832.0,
+            "32": 2164472832.0,
+            "33": 2164472832.0,
+            "34": 2164472832.0,
+            "35": 2164472832.0,
+            "36": 2164472832.0,
+            "37": 2164472832.0,
+            "38": 2164472832.0,
+            "39": 2164472832.0,
+            "40": 2164472832.0,
+            "41": 2164472832.0,
+            "42": 2164472832.0,
+            "43": 2164472832.0,
+            "44": 2164472832.0,
+            "45": 2164472832.0,
+            "46": 2164472832.0,
+            "47": 2164472832.0,
+            "48": 2164472832.0,
+            "49": 2164472832.0,
+            "50": 2164472832.0,
+            "51": 2164472832.0,
+            "52": 2164472832.0,
+            "53": 2164472832.0,
+            "54": 2164472832.0,
+            "55": 2164472832.0,
+            "56": 2164472832.0,
+            "57": 2164472832.0,
+            "58": 2164472832.0,
+            "59": 2164472832.0,
+            "60": 2164472832.0,
+            "61": 2164472832.0,
+            "62": 2164472832.0,
+            "63": 2164472832.0,
+            "64": 2164472832.0,
+            "65": 2164472832.0,
+            "66": 2164472832.0,
+            "67": 2164472832.0,
+            "68": 2164472832.0,
+            "69": 2164472832.0,
+            "70": 2164472832.0,
+            "71": 2164472832.0,
+            "72": 2164472832.0,
+            "73": 2164472832.0,
+            "74": 2164472832.0,
+            "75": 2164472832.0,
+            "76": 2164472832.0,
+            "77": 2164472832.0,
+            "78": 2164472832.0,
+            "79": 2164472832.0,
+            "80": 2164472832.0,
+            "81": 2164472832.0,
+            "82": 2164472832.0,
+            "83": 2164472832.0,
+            "84": 2164472832.0,
+            "85": 2164472832.0,
+            "86": 2164472832.0,
+            "87": 2164472832.0,
+            "88": 2164472832.0,
+            "89": 2164472832.0,
+            "90": 2164472832.0,
+            "91": 2164472832.0,
+            "92": 2164472832.0,
+            "93": 2164472832.0,
+            "94": 2164472832.0,
+            "95": 2164472832.0,
+            "96": 2164472832.0,
+            "97": 2164472832.0,
+            "98": 2164472832.0,
+            "99": 2164472832.0,
+            "100": 2164472832.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2413216256.0,
+            "2": 3345833472.0,
+            "3": 3345833472.0,
+            "4": 3345833472.0,
+            "5": 3345833472.0,
+            "6": 3345833472.0,
+            "7": 3345833472.0,
+            "8": 3345833472.0,
+            "9": 3345833472.0,
+            "10": 3345833472.0,
+            "11": 3345833472.0,
+            "12": 3345833472.0,
+            "13": 3345833472.0,
+            "14": 3345833472.0,
+            "15": 3345833472.0,
+            "16": 3345833472.0,
+            "17": 3345833472.0,
+            "18": 3345833472.0,
+            "19": 3345833472.0,
+            "20": 3345833472.0,
+            "21": 3345833472.0,
+            "22": 3345833472.0,
+            "23": 3345833472.0,
+            "24": 3345833472.0,
+            "25": 3345833472.0,
+            "26": 3345833472.0,
+            "27": 3345833472.0,
+            "28": 3345833472.0,
+            "29": 3345833472.0,
+            "30": 3345833472.0,
+            "31": 3345833472.0,
+            "32": 3345833472.0,
+            "33": 3345833472.0,
+            "34": 3345833472.0,
+            "35": 3345833472.0,
+            "36": 3345833472.0,
+            "37": 3345833472.0,
+            "38": 3345833472.0,
+            "39": 3345833472.0,
+            "40": 3345833472.0,
+            "41": 3345833472.0,
+            "42": 3345833472.0,
+            "43": 3345833472.0,
+            "44": 3345833472.0,
+            "45": 3345833472.0,
+            "46": 3345833472.0,
+            "47": 3345833472.0,
+            "48": 3345833472.0,
+            "49": 3345833472.0,
+            "50": 3345833472.0,
+            "51": 3345833472.0,
+            "52": 3345833472.0,
+            "53": 3345833472.0,
+            "54": 3345833472.0,
+            "55": 3345833472.0,
+            "56": 3345833472.0,
+            "57": 3345833472.0,
+            "58": 3345833472.0,
+            "59": 3345833472.0,
+            "60": 3345833472.0,
+            "61": 3345833472.0,
+            "62": 3345833472.0,
+            "63": 3345833472.0,
+            "64": 3345833472.0,
+            "65": 3345833472.0,
+            "66": 3345833472.0,
+            "67": 3345833472.0,
+            "68": 3345833472.0,
+            "69": 3345833472.0,
+            "70": 3345833472.0,
+            "71": 3345833472.0,
+            "72": 3345833472.0,
+            "73": 3345833472.0,
+            "74": 3345833472.0,
+            "75": 3345833472.0,
+            "76": 3345833472.0,
+            "77": 3345833472.0,
+            "78": 3345833472.0,
+            "79": 3345833472.0,
+            "80": 3345833472.0,
+            "81": 3345833472.0,
+            "82": 3345833472.0,
+            "83": 3345833472.0,
+            "84": 3345833472.0,
+            "85": 3345833472.0,
+            "86": 3345833472.0,
+            "87": 3345833472.0,
+            "88": 3345833472.0,
+            "89": 3345833472.0,
+            "90": 3345833472.0,
+            "91": 3345833472.0,
+            "92": 3345833472.0,
+            "93": 3345833472.0,
+            "94": 3345833472.0,
+            "95": 3345833472.0,
+            "96": 3345833472.0,
+            "97": 3345833472.0,
+            "98": 3345833472.0,
+            "99": 3345833472.0,
+            "100": 3345833472.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 6.60644,
+            "2": 0.57986,
+            "3": 0.47823,
+            "4": 0.48281,
+            "5": 0.48093,
+            "6": 0.47347,
+            "7": 0.47326,
+            "8": 0.47378,
+            "9": 0.4723,
+            "10": 0.4709,
+            "11": 0.47371,
+            "12": 0.47257,
+            "13": 0.47211,
+            "14": 0.4725,
+            "15": 0.47332,
+            "16": 0.47413,
+            "17": 0.4746,
+            "18": 0.47281,
+            "19": 0.47707,
+            "20": 0.47306,
+            "21": 0.4732,
+            "22": 0.46995,
+            "23": 0.47593,
+            "24": 0.47349,
+            "25": 0.47467,
+            "26": 0.48697,
+            "27": 0.46764,
+            "28": 0.47083,
+            "29": 0.47011,
+            "30": 0.47001,
+            "31": 0.46787,
+            "32": 0.82338,
+            "33": 0.47926,
+            "34": 0.482,
+            "35": 0.46965,
+            "36": 0.4706,
+            "37": 0.93011,
+            "38": 0.80405,
+            "39": 0.47254,
+            "40": 0.47196,
+            "41": 0.82549,
+            "42": 0.47441,
+            "43": 0.47469,
+            "44": 0.47149,
+            "45": 0.47417,
+            "46": 0.47445,
+            "47": 0.47452,
+            "48": 0.47581,
+            "49": 0.47293,
+            "50": 0.47057,
+            "51": 0.94959,
+            "52": 0.47119,
+            "53": 0.4725,
+            "54": 0.47393,
+            "55": 0.47401,
+            "56": 0.47324,
+            "57": 0.47407,
+            "58": 0.4761,
+            "59": 0.47586,
+            "60": 0.47378,
+            "61": 0.4733,
+            "62": 0.4737,
+            "63": 0.47104,
+            "64": 0.47276,
+            "65": 0.47318,
+            "66": 0.89402,
+            "67": 0.47315,
+            "68": 0.4734,
+            "69": 0.4712,
+            "70": 0.47401,
+            "71": 0.47383,
+            "72": 0.47295,
+            "73": 0.47295,
+            "74": 0.47389,
+            "75": 0.47397,
+            "76": 0.47329,
+            "77": 0.47294,
+            "78": 0.47471,
+            "79": 0.47574,
+            "80": 0.4753,
+            "81": 0.47352,
+            "82": 0.47352,
+            "83": 0.47483,
+            "84": 0.78574,
+            "85": 0.47734,
+            "86": 0.48545,
+            "87": 0.4736,
+            "88": 1.03977,
+            "89": 0.47047,
+            "90": 0.47102,
+            "91": 0.47334,
+            "92": 0.47576,
+            "93": 0.4727,
+            "94": 0.47956,
+            "95": 0.47304,
+            "96": 0.47172,
+            "97": 0.47639,
+            "98": 0.47474,
+            "99": 0.47123,
+            "100": 0.47327
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..bda6217caaa
--- /dev/null
+++ b/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.37205,
+            "2": 10.36993,
+            "3": 9.85245,
+            "4": 9.61997,
+            "5": 9.40867,
+            "6": 9.43219,
+            "7": 9.31484,
+            "8": 9.27336,
+            "9": 9.11412,
+            "10": 9.03968,
+            "11": 8.87198,
+            "12": 8.80862,
+            "13": 8.83469,
+            "14": 8.69021,
+            "15": 8.66221,
+            "16": 8.54816,
+            "17": 8.50088,
+            "18": 8.42516,
+            "19": 8.38808,
+            "20": 8.28073,
+            "21": 8.26592,
+            "22": 8.15988,
+            "23": 8.11241,
+            "24": 8.14271,
+            "25": 7.98425,
+            "26": 8.10594,
+            "27": 7.88954,
+            "28": 7.9705,
+            "29": 7.81272,
+            "30": 7.87636,
+            "31": 7.82505,
+            "32": 7.70262,
+            "33": 7.80169,
+            "34": 7.56872,
+            "35": 7.67373,
+            "36": 7.54686,
+            "37": 7.47401,
+            "38": 7.50726,
+            "39": 7.49794,
+            "40": 7.51081,
+            "41": 7.41055,
+            "42": 7.37984,
+            "43": 7.44091,
+            "44": 7.39372,
+            "45": 7.37241,
+            "46": 7.28404,
+            "47": 7.46627,
+            "48": 7.29038,
+            "49": 7.35015,
+            "50": 7.17193,
+            "51": 7.37002,
+            "52": 7.14463,
+            "53": 7.12651,
+            "54": 7.23742,
+            "55": 7.15579,
+            "56": 7.23152,
+            "57": 7.3354,
+            "58": 7.01365,
+            "59": 7.11427,
+            "60": 7.15124,
+            "61": 7.1088,
+            "62": 7.26824,
+            "63": 7.15182,
+            "64": 7.08401,
+            "65": 6.99127,
+            "66": 7.05305,
+            "67": 7.04353,
+            "68": 7.13973,
+            "69": 7.03243,
+            "70": 7.05831,
+            "71": 6.90378,
+            "72": 6.99805,
+            "73": 6.97678,
+            "74": 6.91757,
+            "75": 7.06665,
+            "76": 6.95719,
+            "77": 7.08701,
+            "78": 7.03266,
+            "79": 6.8532,
+            "80": 6.93633,
+            "81": 6.97582,
+            "82": 7.0624,
+            "83": 6.98226,
+            "84": 7.00923,
+            "85": 6.8507,
+            "86": 7.04663,
+            "87": 6.97947,
+            "88": 6.91093,
+            "89": 6.8168,
+            "90": 7.24561,
+            "91": 6.7048,
+            "92": 7.05407,
+            "93": 6.89399,
+            "94": 7.0542,
+            "95": 6.85047,
+            "96": 6.96463,
+            "97": 6.95624,
+            "98": 6.8829,
+            "99": 7.00419,
+            "100": 6.98982
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 43288.0,
+            "2": 44033.0,
+            "3": 44733.0,
+            "4": 42406.0,
+            "5": 45371.0,
+            "6": 40945.0,
+            "7": 43173.0,
+            "8": 45430.0,
+            "9": 42421.0,
+            "10": 45369.0,
+            "11": 43974.0,
+            "12": 44588.0,
+            "13": 43908.0,
+            "14": 46215.0,
+            "15": 43901.0,
+            "16": 41603.0,
+            "17": 43832.0,
+            "18": 44695.0,
+            "19": 42547.0,
+            "20": 44758.0,
+            "21": 44777.0,
+            "22": 41821.0,
+            "23": 45434.0,
+            "24": 43080.0,
+            "25": 42439.0,
+            "26": 43936.0,
+            "27": 46214.0,
+            "28": 46342.0,
+            "29": 46135.0,
+            "30": 43995.0,
+            "31": 41271.0,
+            "32": 43336.0,
+            "33": 45440.0,
+            "34": 43287.0,
+            "35": 43240.0,
+            "36": 42490.0,
+            "37": 40078.0,
+            "38": 42510.0,
+            "39": 44722.0,
+            "40": 43230.0,
+            "41": 44669.0,
+            "42": 43262.0,
+            "43": 45476.0,
+            "44": 44624.0,
+            "45": 43326.0,
+            "46": 43945.0,
+            "47": 42395.0,
+            "48": 44675.0,
+            "49": 43169.0,
+            "50": 43381.0,
+            "51": 41131.0,
+            "52": 43830.0,
+            "53": 43914.0,
+            "54": 42004.0,
+            "55": 43871.0,
+            "56": 43227.0,
+            "57": 42550.0,
+            "58": 43816.0,
+            "59": 44631.0,
+            "60": 41183.0,
+            "61": 39721.0,
+            "62": 44752.0,
+            "63": 44696.0,
+            "64": 45351.0,
+            "65": 44694.0,
+            "66": 45350.0,
+            "67": 43132.0,
+            "68": 42535.0,
+            "69": 43829.0,
+            "70": 45533.0,
+            "71": 43322.0,
+            "72": 44749.0,
+            "73": 45365.0,
+            "74": 42492.0,
+            "75": 44655.0,
+            "76": 43920.0,
+            "77": 42080.0,
+            "78": 40298.0,
+            "79": 38909.0,
+            "80": 41117.0,
+            "81": 45370.0,
+            "82": 43206.0,
+            "83": 38501.0,
+            "84": 42484.0,
+            "85": 43986.0,
+            "86": 45704.0,
+            "87": 40839.0,
+            "88": 41828.0,
+            "89": 41074.0,
+            "90": 44663.0,
+            "91": 46169.0,
+            "92": 41807.0,
+            "93": 43228.0,
+            "94": 39549.0,
+            "95": 44090.0,
+            "96": 44711.0,
+            "97": 45390.0,
+            "98": 41799.0,
+            "99": 45426.0,
+            "100": 42443.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2194357248.0,
+            "2": 2194357248.0,
+            "3": 2194357248.0,
+            "4": 2194357248.0,
+            "5": 2194357248.0,
+            "6": 2194357248.0,
+            "7": 2194357248.0,
+            "8": 2194357248.0,
+            "9": 2194357248.0,
+            "10": 2194357248.0,
+            "11": 2194357248.0,
+            "12": 2194357248.0,
+            "13": 2194357248.0,
+            "14": 2194357248.0,
+            "15": 2194357248.0,
+            "16": 2194357248.0,
+            "17": 2194357248.0,
+            "18": 2194357248.0,
+            "19": 2194357248.0,
+            "20": 2194357248.0,
+            "21": 2194357248.0,
+            "22": 2194357248.0,
+            "23": 2194357248.0,
+            "24": 2194357248.0,
+            "25": 2194357248.0,
+            "26": 2194357248.0,
+            "27": 2194357248.0,
+            "28": 2194357248.0,
+            "29": 2194357248.0,
+            "30": 2194357248.0,
+            "31": 2194357248.0,
+            "32": 2194357248.0,
+            "33": 2194357248.0,
+            "34": 2194357248.0,
+            "35": 2194357248.0,
+            "36": 2194357248.0,
+            "37": 2194357248.0,
+            "38": 2194357248.0,
+            "39": 2194357248.0,
+            "40": 2194357248.0,
+            "41": 2194357248.0,
+            "42": 2194357248.0,
+            "43": 2194357248.0,
+            "44": 2194357248.0,
+            "45": 2194357248.0,
+            "46": 2194357248.0,
+            "47": 2194357248.0,
+            "48": 2194357248.0,
+            "49": 2194357248.0,
+            "50": 2194357248.0,
+            "51": 2194357248.0,
+            "52": 2194357248.0,
+            "53": 2194357248.0,
+            "54": 2194357248.0,
+            "55": 2194357248.0,
+            "56": 2194357248.0,
+            "57": 2194357248.0,
+            "58": 2194357248.0,
+            "59": 2194357248.0,
+            "60": 2194357248.0,
+            "61": 2194357248.0,
+            "62": 2194357248.0,
+            "63": 2194357248.0,
+            "64": 2194357248.0,
+            "65": 2194357248.0,
+            "66": 2194357248.0,
+            "67": 2194357248.0,
+            "68": 2194357248.0,
+            "69": 2194357248.0,
+            "70": 2194357248.0,
+            "71": 2194357248.0,
+            "72": 2194357248.0,
+            "73": 2194357248.0,
+            "74": 2194357248.0,
+            "75": 2194357248.0,
+            "76": 2194357248.0,
+            "77": 2194357248.0,
+            "78": 2194357248.0,
+            "79": 2194357248.0,
+            "80": 2194357248.0,
+            "81": 2194357248.0,
+            "82": 2194357248.0,
+            "83": 2194357248.0,
+            "84": 2194357248.0,
+            "85": 2194357248.0,
+            "86": 2194357248.0,
+            "87": 2194357248.0,
+            "88": 2194357248.0,
+            "89": 2194357248.0,
+            "90": 2194357248.0,
+            "91": 2194357248.0,
+            "92": 2194357248.0,
+            "93": 2194357248.0,
+            "94": 2194357248.0,
+            "95": 2194357248.0,
+            "96": 2194357248.0,
+            "97": 2194357248.0,
+            "98": 2194357248.0,
+            "99": 2194357248.0,
+            "100": 2194357248.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2443624960.0,
+            "2": 3375193600.0,
+            "3": 3375193600.0,
+            "4": 3375193600.0,
+            "5": 3375193600.0,
+            "6": 3375193600.0,
+            "7": 3375193600.0,
+            "8": 3375193600.0,
+            "9": 3375193600.0,
+            "10": 3375193600.0,
+            "11": 3375193600.0,
+            "12": 3375193600.0,
+            "13": 3375193600.0,
+            "14": 3375193600.0,
+            "15": 3375193600.0,
+            "16": 3375193600.0,
+            "17": 3375193600.0,
+            "18": 3375193600.0,
+            "19": 3375193600.0,
+            "20": 3375193600.0,
+            "21": 3375193600.0,
+            "22": 3375193600.0,
+            "23": 3375193600.0,
+            "24": 3375193600.0,
+            "25": 3375193600.0,
+            "26": 3375193600.0,
+            "27": 3375193600.0,
+            "28": 3375193600.0,
+            "29": 3375193600.0,
+            "30": 3375193600.0,
+            "31": 3375193600.0,
+            "32": 3375193600.0,
+            "33": 3375193600.0,
+            "34": 3375193600.0,
+            "35": 3375193600.0,
+            "36": 3375193600.0,
+            "37": 3375193600.0,
+            "38": 3375193600.0,
+            "39": 3375193600.0,
+            "40": 3375193600.0,
+            "41": 3375193600.0,
+            "42": 3375193600.0,
+            "43": 3375193600.0,
+            "44": 3375193600.0,
+            "45": 3375193600.0,
+            "46": 3375193600.0,
+            "47": 3375193600.0,
+            "48": 3375193600.0,
+            "49": 3375193600.0,
+            "50": 3375193600.0,
+            "51": 3375193600.0,
+            "52": 3375193600.0,
+            "53": 3375193600.0,
+            "54": 3375193600.0,
+            "55": 3375193600.0,
+            "56": 3375193600.0,
+            "57": 3375193600.0,
+            "58": 3375193600.0,
+            "59": 3375193600.0,
+            "60": 3375193600.0,
+            "61": 3375193600.0,
+            "62": 3375193600.0,
+            "63": 3375193600.0,
+            "64": 3375193600.0,
+            "65": 3375193600.0,
+            "66": 3375193600.0,
+            "67": 3375193600.0,
+            "68": 3375193600.0,
+            "69": 3375193600.0,
+            "70": 3375193600.0,
+            "71": 3375193600.0,
+            "72": 3375193600.0,
+            "73": 3375193600.0,
+            "74": 3375193600.0,
+            "75": 3375193600.0,
+            "76": 3375193600.0,
+            "77": 3375193600.0,
+            "78": 3375193600.0,
+            "79": 3375193600.0,
+            "80": 3375193600.0,
+            "81": 3375193600.0,
+            "82": 3375193600.0,
+            "83": 3375193600.0,
+            "84": 3375193600.0,
+            "85": 3375193600.0,
+            "86": 3375193600.0,
+            "87": 3375193600.0,
+            "88": 3375193600.0,
+            "89": 3375193600.0,
+            "90": 3375193600.0,
+            "91": 3375193600.0,
+            "92": 3375193600.0,
+            "93": 3375193600.0,
+            "94": 3375193600.0,
+            "95": 3375193600.0,
+            "96": 3375193600.0,
+            "97": 3375193600.0,
+            "98": 3375193600.0,
+            "99": 3375193600.0,
+            "100": 3375193600.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 9.22746,
+            "2": 0.38672,
+            "3": 0.30057,
+            "4": 0.29952,
+            "5": 0.29937,
+            "6": 0.29647,
+            "7": 0.29649,
+            "8": 0.29992,
+            "9": 0.29725,
+            "10": 0.29982,
+            "11": 0.29727,
+            "12": 0.3034,
+            "13": 0.29711,
+            "14": 0.29921,
+            "15": 0.2997,
+            "16": 0.29771,
+            "17": 0.29978,
+            "18": 0.30707,
+            "19": 0.30368,
+            "20": 0.30288,
+            "21": 0.30688,
+            "22": 0.30971,
+            "23": 0.29768,
+            "24": 0.30093,
+            "25": 0.30176,
+            "26": 0.30414,
+            "27": 0.29913,
+            "28": 0.29878,
+            "29": 0.29642,
+            "30": 0.3006,
+            "31": 0.30797,
+            "32": 0.30896,
+            "33": 0.30968,
+            "34": 0.3612,
+            "35": 0.30538,
+            "36": 0.30053,
+            "37": 0.59472,
+            "38": 0.30268,
+            "39": 0.306,
+            "40": 0.29983,
+            "41": 0.30255,
+            "42": 0.30761,
+            "43": 0.30015,
+            "44": 0.30214,
+            "45": 0.29904,
+            "46": 0.29871,
+            "47": 0.63098,
+            "48": 0.58973,
+            "49": 0.29989,
+            "50": 0.29759,
+            "51": 0.29699,
+            "52": 0.30117,
+            "53": 0.61374,
+            "54": 0.30194,
+            "55": 0.29408,
+            "56": 0.6341,
+            "57": 0.29608,
+            "58": 0.29787,
+            "59": 0.29707,
+            "60": 0.30154,
+            "61": 0.29779,
+            "62": 0.29855,
+            "63": 0.60825,
+            "64": 0.29897,
+            "65": 0.30635,
+            "66": 0.61882,
+            "67": 0.29871,
+            "68": 0.29693,
+            "69": 0.30148,
+            "70": 0.31212,
+            "71": 0.30211,
+            "72": 0.29679,
+            "73": 0.30078,
+            "74": 0.29883,
+            "75": 0.2978,
+            "76": 0.30303,
+            "77": 0.29772,
+            "78": 0.29776,
+            "79": 0.29689,
+            "80": 0.30425,
+            "81": 0.29967,
+            "82": 0.29825,
+            "83": 0.297,
+            "84": 0.30863,
+            "85": 0.30218,
+            "86": 0.30302,
+            "87": 0.30826,
+            "88": 0.30068,
+            "89": 0.29946,
+            "90": 0.60541,
+            "91": 0.30424,
+            "92": 0.30059,
+            "93": 0.30421,
+            "94": 0.30633,
+            "95": 0.29891,
+            "96": 0.35038,
+            "97": 0.29632,
+            "98": 0.29835,
+            "99": 0.29931,
+            "100": 0.30272
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..89582b25851
--- /dev/null
+++ b/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.37205,
+            "2": 10.36993,
+            "3": 9.85245,
+            "4": 9.61997,
+            "5": 9.40867,
+            "6": 9.43219,
+            "7": 9.31484,
+            "8": 9.27336,
+            "9": 9.11412,
+            "10": 9.03968,
+            "11": 8.87198,
+            "12": 8.80862,
+            "13": 8.83469,
+            "14": 8.69021,
+            "15": 8.66221,
+            "16": 8.54816,
+            "17": 8.50088,
+            "18": 8.42516,
+            "19": 8.38808,
+            "20": 8.28073,
+            "21": 8.26592,
+            "22": 8.15988,
+            "23": 8.11241,
+            "24": 8.14271,
+            "25": 7.98425,
+            "26": 8.10594,
+            "27": 7.88954,
+            "28": 7.9705,
+            "29": 7.81272,
+            "30": 7.87636,
+            "31": 7.82505,
+            "32": 7.70262,
+            "33": 7.80169,
+            "34": 7.56872,
+            "35": 7.67373,
+            "36": 7.54686,
+            "37": 7.47401,
+            "38": 7.50726,
+            "39": 7.49794,
+            "40": 7.51081,
+            "41": 7.41055,
+            "42": 7.37984,
+            "43": 7.44091,
+            "44": 7.39372,
+            "45": 7.37241,
+            "46": 7.28404,
+            "47": 7.46627,
+            "48": 7.29038,
+            "49": 7.35015,
+            "50": 7.17193,
+            "51": 7.37002,
+            "52": 7.14463,
+            "53": 7.12651,
+            "54": 7.23742,
+            "55": 7.15579,
+            "56": 7.23152,
+            "57": 7.3354,
+            "58": 7.01365,
+            "59": 7.11427,
+            "60": 7.15124,
+            "61": 7.1088,
+            "62": 7.26824,
+            "63": 7.15182,
+            "64": 7.08401,
+            "65": 6.99127,
+            "66": 7.05305,
+            "67": 7.04353,
+            "68": 7.13973,
+            "69": 7.03243,
+            "70": 7.05831,
+            "71": 6.90378,
+            "72": 6.99805,
+            "73": 6.97678,
+            "74": 6.91757,
+            "75": 7.06665,
+            "76": 6.95719,
+            "77": 7.08701,
+            "78": 7.03266,
+            "79": 6.8532,
+            "80": 6.93633,
+            "81": 6.97582,
+            "82": 7.0624,
+            "83": 6.98226,
+            "84": 7.00923,
+            "85": 6.8507,
+            "86": 7.04663,
+            "87": 6.97947,
+            "88": 6.91093,
+            "89": 6.8168,
+            "90": 7.24561,
+            "91": 6.7048,
+            "92": 7.05407,
+            "93": 6.89399,
+            "94": 7.0542,
+            "95": 6.85047,
+            "96": 6.96463,
+            "97": 6.95624,
+            "98": 6.8829,
+            "99": 7.00419,
+            "100": 6.98982
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 43288.0,
+            "2": 44033.0,
+            "3": 44733.0,
+            "4": 42406.0,
+            "5": 45371.0,
+            "6": 40945.0,
+            "7": 43173.0,
+            "8": 45430.0,
+            "9": 42421.0,
+            "10": 45369.0,
+            "11": 43974.0,
+            "12": 44588.0,
+            "13": 43908.0,
+            "14": 46215.0,
+            "15": 43901.0,
+            "16": 41603.0,
+            "17": 43832.0,
+            "18": 44695.0,
+            "19": 42547.0,
+            "20": 44758.0,
+            "21": 44777.0,
+            "22": 41821.0,
+            "23": 45434.0,
+            "24": 43080.0,
+            "25": 42439.0,
+            "26": 43936.0,
+            "27": 46214.0,
+            "28": 46342.0,
+            "29": 46135.0,
+            "30": 43995.0,
+            "31": 41271.0,
+            "32": 43336.0,
+            "33": 45440.0,
+            "34": 43287.0,
+            "35": 43240.0,
+            "36": 42490.0,
+            "37": 40078.0,
+            "38": 42510.0,
+            "39": 44722.0,
+            "40": 43230.0,
+            "41": 44669.0,
+            "42": 43262.0,
+            "43": 45476.0,
+            "44": 44624.0,
+            "45": 43326.0,
+            "46": 43945.0,
+            "47": 42395.0,
+            "48": 44675.0,
+            "49": 43169.0,
+            "50": 43381.0,
+            "51": 41131.0,
+            "52": 43830.0,
+            "53": 43914.0,
+            "54": 42004.0,
+            "55": 43871.0,
+            "56": 43227.0,
+            "57": 42550.0,
+            "58": 43816.0,
+            "59": 44631.0,
+            "60": 41183.0,
+            "61": 39721.0,
+            "62": 44752.0,
+            "63": 44696.0,
+            "64": 45351.0,
+            "65": 44694.0,
+            "66": 45350.0,
+            "67": 43132.0,
+            "68": 42535.0,
+            "69": 43829.0,
+            "70": 45533.0,
+            "71": 43322.0,
+            "72": 44749.0,
+            "73": 45365.0,
+            "74": 42492.0,
+            "75": 44655.0,
+            "76": 43920.0,
+            "77": 42080.0,
+            "78": 40298.0,
+            "79": 38909.0,
+            "80": 41117.0,
+            "81": 45370.0,
+            "82": 43206.0,
+            "83": 38501.0,
+            "84": 42484.0,
+            "85": 43986.0,
+            "86": 45704.0,
+            "87": 40839.0,
+            "88": 41828.0,
+            "89": 41074.0,
+            "90": 44663.0,
+            "91": 46169.0,
+            "92": 41807.0,
+            "93": 43228.0,
+            "94": 39549.0,
+            "95": 44090.0,
+            "96": 44711.0,
+            "97": 45390.0,
+            "98": 41799.0,
+            "99": 45426.0,
+            "100": 42443.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2194357248.0,
+            "2": 2194357248.0,
+            "3": 2194357248.0,
+            "4": 2194357248.0,
+            "5": 2194357248.0,
+            "6": 2194357248.0,
+            "7": 2194357248.0,
+            "8": 2194357248.0,
+            "9": 2194357248.0,
+            "10": 2194357248.0,
+            "11": 2194357248.0,
+            "12": 2194357248.0,
+            "13": 2194357248.0,
+            "14": 2194357248.0,
+            "15": 2194357248.0,
+            "16": 2194357248.0,
+            "17": 2194357248.0,
+            "18": 2194357248.0,
+            "19": 2194357248.0,
+            "20": 2194357248.0,
+            "21": 2194357248.0,
+            "22": 2194357248.0,
+            "23": 2194357248.0,
+            "24": 2194357248.0,
+            "25": 2194357248.0,
+            "26": 2194357248.0,
+            "27": 2194357248.0,
+            "28": 2194357248.0,
+            "29": 2194357248.0,
+            "30": 2194357248.0,
+            "31": 2194357248.0,
+            "32": 2194357248.0,
+            "33": 2194357248.0,
+            "34": 2194357248.0,
+            "35": 2194357248.0,
+            "36": 2194357248.0,
+            "37": 2194357248.0,
+            "38": 2194357248.0,
+            "39": 2194357248.0,
+            "40": 2194357248.0,
+            "41": 2194357248.0,
+            "42": 2194357248.0,
+            "43": 2194357248.0,
+            "44": 2194357248.0,
+            "45": 2194357248.0,
+            "46": 2194357248.0,
+            "47": 2194357248.0,
+            "48": 2194357248.0,
+            "49": 2194357248.0,
+            "50": 2194357248.0,
+            "51": 2194357248.0,
+            "52": 2194357248.0,
+            "53": 2194357248.0,
+            "54": 2194357248.0,
+            "55": 2194357248.0,
+            "56": 2194357248.0,
+            "57": 2194357248.0,
+            "58": 2194357248.0,
+            "59": 2194357248.0,
+            "60": 2194357248.0,
+            "61": 2194357248.0,
+            "62": 2194357248.0,
+            "63": 2194357248.0,
+            "64": 2194357248.0,
+            "65": 2194357248.0,
+            "66": 2194357248.0,
+            "67": 2194357248.0,
+            "68": 2194357248.0,
+            "69": 2194357248.0,
+            "70": 2194357248.0,
+            "71": 2194357248.0,
+            "72": 2194357248.0,
+            "73": 2194357248.0,
+            "74": 2194357248.0,
+            "75": 2194357248.0,
+            "76": 2194357248.0,
+            "77": 2194357248.0,
+            "78": 2194357248.0,
+            "79": 2194357248.0,
+            "80": 2194357248.0,
+            "81": 2194357248.0,
+            "82": 2194357248.0,
+            "83": 2194357248.0,
+            "84": 2194357248.0,
+            "85": 2194357248.0,
+            "86": 2194357248.0,
+            "87": 2194357248.0,
+            "88": 2194357248.0,
+            "89": 2194357248.0,
+            "90": 2194357248.0,
+            "91": 2194357248.0,
+            "92": 2194357248.0,
+            "93": 2194357248.0,
+            "94": 2194357248.0,
+            "95": 2194357248.0,
+            "96": 2194357248.0,
+            "97": 2194357248.0,
+            "98": 2194357248.0,
+            "99": 2194357248.0,
+            "100": 2194357248.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2443624960.0,
+            "2": 3375193600.0,
+            "3": 3375193600.0,
+            "4": 3375193600.0,
+            "5": 3375193600.0,
+            "6": 3375193600.0,
+            "7": 3375193600.0,
+            "8": 3375193600.0,
+            "9": 3375193600.0,
+            "10": 3375193600.0,
+            "11": 3375193600.0,
+            "12": 3375193600.0,
+            "13": 3375193600.0,
+            "14": 3375193600.0,
+            "15": 3375193600.0,
+            "16": 3375193600.0,
+            "17": 3375193600.0,
+            "18": 3375193600.0,
+            "19": 3375193600.0,
+            "20": 3375193600.0,
+            "21": 3375193600.0,
+            "22": 3375193600.0,
+            "23": 3375193600.0,
+            "24": 3375193600.0,
+            "25": 3375193600.0,
+            "26": 3375193600.0,
+            "27": 3375193600.0,
+            "28": 3375193600.0,
+            "29": 3375193600.0,
+            "30": 3375193600.0,
+            "31": 3375193600.0,
+            "32": 3375193600.0,
+            "33": 3375193600.0,
+            "34": 3375193600.0,
+            "35": 3375193600.0,
+            "36": 3375193600.0,
+            "37": 3375193600.0,
+            "38": 3375193600.0,
+            "39": 3375193600.0,
+            "40": 3375193600.0,
+            "41": 3375193600.0,
+            "42": 3375193600.0,
+            "43": 3375193600.0,
+            "44": 3375193600.0,
+            "45": 3375193600.0,
+            "46": 3375193600.0,
+            "47": 3375193600.0,
+            "48": 3375193600.0,
+            "49": 3375193600.0,
+            "50": 3375193600.0,
+            "51": 3375193600.0,
+            "52": 3375193600.0,
+            "53": 3375193600.0,
+            "54": 3375193600.0,
+            "55": 3375193600.0,
+            "56": 3375193600.0,
+            "57": 3375193600.0,
+            "58": 3375193600.0,
+            "59": 3375193600.0,
+            "60": 3375193600.0,
+            "61": 3375193600.0,
+            "62": 3375193600.0,
+            "63": 3375193600.0,
+            "64": 3375193600.0,
+            "65": 3375193600.0,
+            "66": 3375193600.0,
+            "67": 3375193600.0,
+            "68": 3375193600.0,
+            "69": 3375193600.0,
+            "70": 3375193600.0,
+            "71": 3375193600.0,
+            "72": 3375193600.0,
+            "73": 3375193600.0,
+            "74": 3375193600.0,
+            "75": 3375193600.0,
+            "76": 3375193600.0,
+            "77": 3375193600.0,
+            "78": 3375193600.0,
+            "79": 3375193600.0,
+            "80": 3375193600.0,
+            "81": 3375193600.0,
+            "82": 3375193600.0,
+            "83": 3375193600.0,
+            "84": 3375193600.0,
+            "85": 3375193600.0,
+            "86": 3375193600.0,
+            "87": 3375193600.0,
+            "88": 3375193600.0,
+            "89": 3375193600.0,
+            "90": 3375193600.0,
+            "91": 3375193600.0,
+            "92": 3375193600.0,
+            "93": 3375193600.0,
+            "94": 3375193600.0,
+            "95": 3375193600.0,
+            "96": 3375193600.0,
+            "97": 3375193600.0,
+            "98": 3375193600.0,
+            "99": 3375193600.0,
+            "100": 3375193600.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 9.37156,
+            "2": 0.38887,
+            "3": 0.36602,
+            "4": 0.35866,
+            "5": 0.36165,
+            "6": 0.37465,
+            "7": 0.35731,
+            "8": 0.3641,
+            "9": 0.35988,
+            "10": 0.35622,
+            "11": 0.36397,
+            "12": 0.36059,
+            "13": 0.35322,
+            "14": 0.36378,
+            "15": 0.35044,
+            "16": 0.351,
+            "17": 0.3614,
+            "18": 0.3499,
+            "19": 0.3502,
+            "20": 0.35899,
+            "21": 0.34832,
+            "22": 0.35463,
+            "23": 0.36264,
+            "24": 0.3582,
+            "25": 0.68028,
+            "26": 0.35807,
+            "27": 0.36086,
+            "28": 0.3546,
+            "29": 0.35008,
+            "30": 0.36639,
+            "31": 0.35917,
+            "32": 0.35093,
+            "33": 0.42545,
+            "34": 0.36458,
+            "35": 0.36139,
+            "36": 0.66018,
+            "37": 0.36179,
+            "38": 0.35264,
+            "39": 0.35347,
+            "40": 0.35947,
+            "41": 0.65933,
+            "42": 0.36488,
+            "43": 0.35596,
+            "44": 0.35639,
+            "45": 0.35817,
+            "46": 0.35914,
+            "47": 0.65482,
+            "48": 0.35543,
+            "49": 0.3548,
+            "50": 0.36559,
+            "51": 0.3585,
+            "52": 0.35668,
+            "53": 0.3592,
+            "54": 0.35503,
+            "55": 0.36108,
+            "56": 0.74128,
+            "57": 0.36657,
+            "58": 0.36018,
+            "59": 0.35608,
+            "60": 0.36593,
+            "61": 0.35388,
+            "62": 0.35617,
+            "63": 0.63145,
+            "64": 0.35737,
+            "65": 0.36509,
+            "66": 0.35793,
+            "67": 0.36215,
+            "68": 0.35502,
+            "69": 0.35608,
+            "70": 0.36406,
+            "71": 0.35939,
+            "72": 0.36012,
+            "73": 0.36102,
+            "74": 0.35997,
+            "75": 0.35821,
+            "76": 0.36372,
+            "77": 0.36015,
+            "78": 0.36089,
+            "79": 0.3626,
+            "80": 0.36632,
+            "81": 0.36481,
+            "82": 0.38444,
+            "83": 0.36154,
+            "84": 0.37204,
+            "85": 0.35784,
+            "86": 0.35591,
+            "87": 0.36678,
+            "88": 0.73353,
+            "89": 0.36867,
+            "90": 0.36231,
+            "91": 0.36826,
+            "92": 0.35945,
+            "93": 0.36394,
+            "94": 0.43835,
+            "95": 0.36152,
+            "96": 0.36154,
+            "97": 0.35778,
+            "98": 0.35857,
+            "99": 0.36061,
+            "100": 0.35857
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgx_h100.json
index d92e66d3e29..a2d102b7a2b 100644
--- a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgx_h100.json
@@ -2,141 +2,536 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 10.36406,
+            "2": 10.37672,
+            "3": 9.84285,
+            "4": 9.61995,
             "5": 9.4049,
+            "6": 9.42891,
+            "7": 9.31288,
+            "8": 9.27047,
+            "9": 9.10629,
             "10": 9.03569,
+            "11": 8.86423,
+            "12": 8.80988,
+            "13": 8.8329,
+            "14": 8.69011,
             "15": 8.66187,
+            "16": 8.54768,
+            "17": 8.50183,
+            "18": 8.42362,
+            "19": 8.38674,
             "20": 8.27993,
+            "21": 8.26472,
+            "22": 8.15738,
+            "23": 8.11148,
+            "24": 8.14234,
             "25": 7.98343,
+            "26": 8.10636,
+            "27": 7.88853,
+            "28": 7.97024,
+            "29": 7.8121,
             "30": 7.87698,
+            "31": 7.82339,
+            "32": 7.70086,
+            "33": 7.80317,
+            "34": 7.56843,
             "35": 7.67276,
+            "36": 7.54942,
+            "37": 7.475,
+            "38": 7.51068,
+            "39": 7.49979,
             "40": 7.51131,
+            "41": 7.41252,
+            "42": 7.38333,
+            "43": 7.4414,
+            "44": 7.39857,
             "45": 7.37352,
+            "46": 7.28824,
+            "47": 7.4683,
+            "48": 7.29457,
+            "49": 7.35181,
             "50": 7.17223,
+            "51": 7.37216,
+            "52": 7.14588,
+            "53": 7.12384,
+            "54": 7.23984,
             "55": 7.15454,
+            "56": 7.23308,
+            "57": 7.33501,
+            "58": 7.01226,
+            "59": 7.12063,
             "60": 7.15043,
+            "61": 7.11076,
+            "62": 7.26458,
+            "63": 7.1544,
+            "64": 7.08651,
             "65": 6.99077,
+            "66": 7.05503,
+            "67": 7.04463,
+            "68": 7.136,
+            "69": 7.03404,
             "70": 7.05994,
+            "71": 6.90146,
+            "72": 6.99845,
+            "73": 6.97783,
+            "74": 6.92205,
             "75": 7.06268,
+            "76": 6.95612,
+            "77": 7.08838,
+            "78": 7.02608,
+            "79": 6.85354,
             "80": 6.93543,
+            "81": 6.97396,
+            "82": 7.05854,
+            "83": 6.98003,
+            "84": 7.00602,
             "85": 6.84771,
+            "86": 7.04197,
+            "87": 6.97366,
+            "88": 6.90817,
+            "89": 6.80902,
             "90": 7.23999,
+            "91": 6.70221,
+            "92": 7.0543,
+            "93": 6.89332,
+            "94": 7.05002,
             "95": 6.84547,
+            "96": 6.96202,
+            "97": 6.95355,
+            "98": 6.8731,
+            "99": 6.99831,
             "100": 6.98508
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 43317.0,
+            "2": 44065.0,
+            "3": 44730.0,
+            "4": 42374.0,
             "5": 45387.0,
+            "6": 40937.0,
+            "7": 43166.0,
+            "8": 45433.0,
+            "9": 42439.0,
             "10": 45374.0,
+            "11": 43947.0,
+            "12": 44584.0,
+            "13": 43908.0,
+            "14": 46205.0,
             "15": 43901.0,
+            "16": 41607.0,
+            "17": 43831.0,
+            "18": 44698.0,
+            "19": 42543.0,
             "20": 44759.0,
+            "21": 44734.0,
+            "22": 41850.0,
+            "23": 45416.0,
+            "24": 43069.0,
             "25": 42442.0,
+            "26": 43923.0,
+            "27": 46212.0,
+            "28": 46362.0,
+            "29": 46133.0,
             "30": 43978.0,
+            "31": 41220.0,
+            "32": 43307.0,
+            "33": 45440.0,
+            "34": 43284.0,
             "35": 43248.0,
+            "36": 42437.0,
+            "37": 40066.0,
+            "38": 42483.0,
+            "39": 44702.0,
             "40": 43230.0,
+            "41": 44672.0,
+            "42": 43202.0,
+            "43": 45459.0,
+            "44": 44609.0,
             "45": 43265.0,
+            "46": 43915.0,
+            "47": 42366.0,
+            "48": 44650.0,
+            "49": 43139.0,
             "50": 43399.0,
+            "51": 41159.0,
+            "52": 43818.0,
+            "53": 43924.0,
+            "54": 41952.0,
             "55": 43866.0,
+            "56": 43239.0,
+            "57": 42540.0,
+            "58": 43856.0,
+            "59": 44589.0,
             "60": 41152.0,
+            "61": 39709.0,
+            "62": 44822.0,
+            "63": 44663.0,
+            "64": 45372.0,
             "65": 44676.0,
+            "66": 45345.0,
+            "67": 43130.0,
+            "68": 42567.0,
+            "69": 43812.0,
             "70": 45538.0,
+            "71": 43282.0,
+            "72": 44765.0,
+            "73": 45354.0,
+            "74": 42517.0,
             "75": 44666.0,
+            "76": 43904.0,
+            "77": 42041.0,
+            "78": 40320.0,
+            "79": 38914.0,
             "80": 41081.0,
+            "81": 45333.0,
+            "82": 43195.0,
+            "83": 38489.0,
+            "84": 42436.0,
             "85": 43978.0,
+            "86": 45680.0,
+            "87": 40832.0,
+            "88": 41797.0,
+            "89": 41083.0,
             "90": 44676.0,
+            "91": 46190.0,
+            "92": 41837.0,
+            "93": 43234.0,
+            "94": 39504.0,
             "95": 44067.0,
+            "96": 44684.0,
+            "97": 45419.0,
+            "98": 41854.0,
+            "99": 45431.0,
             "100": 42479.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 2195405824.0,
+            "2": 2195405824.0,
+            "3": 2195405824.0,
+            "4": 2195405824.0,
             "5": 2195405824.0,
+            "6": 2195405824.0,
+            "7": 2195405824.0,
+            "8": 2195405824.0,
+            "9": 2195405824.0,
             "10": 2195405824.0,
+            "11": 2195405824.0,
+            "12": 2195405824.0,
+            "13": 2195405824.0,
+            "14": 2195405824.0,
             "15": 2195405824.0,
+            "16": 2195405824.0,
+            "17": 2195405824.0,
+            "18": 2195405824.0,
+            "19": 2195405824.0,
             "20": 2195405824.0,
+            "21": 2195405824.0,
+            "22": 2195405824.0,
+            "23": 2195405824.0,
+            "24": 2195405824.0,
             "25": 2195405824.0,
+            "26": 2195405824.0,
+            "27": 2195405824.0,
+            "28": 2195405824.0,
+            "29": 2195405824.0,
             "30": 2195405824.0,
+            "31": 2195405824.0,
+            "32": 2195405824.0,
+            "33": 2195405824.0,
+            "34": 2195405824.0,
             "35": 2195405824.0,
+            "36": 2195405824.0,
+            "37": 2195405824.0,
+            "38": 2195405824.0,
+            "39": 2195405824.0,
             "40": 2195405824.0,
+            "41": 2195405824.0,
+            "42": 2195405824.0,
+            "43": 2195405824.0,
+            "44": 2195405824.0,
             "45": 2195405824.0,
+            "46": 2195405824.0,
+            "47": 2195405824.0,
+            "48": 2195405824.0,
+            "49": 2195405824.0,
             "50": 2195405824.0,
+            "51": 2195405824.0,
+            "52": 2195405824.0,
+            "53": 2195405824.0,
+            "54": 2195405824.0,
             "55": 2195405824.0,
+            "56": 2195405824.0,
+            "57": 2195405824.0,
+            "58": 2195405824.0,
+            "59": 2195405824.0,
             "60": 2195405824.0,
+            "61": 2195405824.0,
+            "62": 2195405824.0,
+            "63": 2195405824.0,
+            "64": 2195405824.0,
             "65": 2195405824.0,
+            "66": 2195405824.0,
+            "67": 2195405824.0,
+            "68": 2195405824.0,
+            "69": 2195405824.0,
             "70": 2195405824.0,
+            "71": 2195405824.0,
+            "72": 2195405824.0,
+            "73": 2195405824.0,
+            "74": 2195405824.0,
             "75": 2195405824.0,
+            "76": 2195405824.0,
+            "77": 2195405824.0,
+            "78": 2195405824.0,
+            "79": 2195405824.0,
             "80": 2195405824.0,
+            "81": 2195405824.0,
+            "82": 2195405824.0,
+            "83": 2195405824.0,
+            "84": 2195405824.0,
             "85": 2195405824.0,
+            "86": 2195405824.0,
+            "87": 2195405824.0,
+            "88": 2195405824.0,
+            "89": 2195405824.0,
             "90": 2195405824.0,
+            "91": 2195405824.0,
+            "92": 2195405824.0,
+            "93": 2195405824.0,
+            "94": 2195405824.0,
             "95": 2195405824.0,
+            "96": 2195405824.0,
+            "97": 2195405824.0,
+            "98": 2195405824.0,
+            "99": 2195405824.0,
             "100": 2195405824.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 2302114304.0,
+            "2": 3236697600.0,
+            "3": 3236697600.0,
+            "4": 3236697600.0,
             "5": 3236697600.0,
+            "6": 3236697600.0,
+            "7": 3236697600.0,
+            "8": 3236697600.0,
+            "9": 3236697600.0,
             "10": 3236697600.0,
+            "11": 3236697600.0,
+            "12": 3236697600.0,
+            "13": 3236697600.0,
+            "14": 3236697600.0,
             "15": 3236697600.0,
+            "16": 3236697600.0,
+            "17": 3236697600.0,
+            "18": 3236697600.0,
+            "19": 3236697600.0,
             "20": 3236697600.0,
+            "21": 3236697600.0,
+            "22": 3236697600.0,
+            "23": 3236697600.0,
+            "24": 3236697600.0,
             "25": 3236697600.0,
+            "26": 3236697600.0,
+            "27": 3236697600.0,
+            "28": 3236697600.0,
+            "29": 3236697600.0,
             "30": 3236697600.0,
+            "31": 3236697600.0,
+            "32": 3236697600.0,
+            "33": 3236697600.0,
+            "34": 3236697600.0,
             "35": 3236697600.0,
+            "36": 3236697600.0,
+            "37": 3236697600.0,
+            "38": 3236697600.0,
+            "39": 3236697600.0,
             "40": 3236697600.0,
+            "41": 3236697600.0,
+            "42": 3236697600.0,
+            "43": 3236697600.0,
+            "44": 3236697600.0,
             "45": 3236697600.0,
+            "46": 3236697600.0,
+            "47": 3236697600.0,
+            "48": 3236697600.0,
+            "49": 3236697600.0,
             "50": 3236697600.0,
+            "51": 3236697600.0,
+            "52": 3236697600.0,
+            "53": 3236697600.0,
+            "54": 3236697600.0,
             "55": 3236697600.0,
+            "56": 3236697600.0,
+            "57": 3236697600.0,
+            "58": 3236697600.0,
+            "59": 3236697600.0,
             "60": 3236697600.0,
+            "61": 3236697600.0,
+            "62": 3236697600.0,
+            "63": 3236697600.0,
+            "64": 3236697600.0,
             "65": 3236697600.0,
+            "66": 3236697600.0,
+            "67": 3236697600.0,
+            "68": 3236697600.0,
+            "69": 3236697600.0,
             "70": 3236697600.0,
+            "71": 3236697600.0,
+            "72": 3236697600.0,
+            "73": 3236697600.0,
+            "74": 3236697600.0,
             "75": 3236697600.0,
+            "76": 3236697600.0,
+            "77": 3236697600.0,
+            "78": 3236697600.0,
+            "79": 3236697600.0,
             "80": 3236697600.0,
+            "81": 3236697600.0,
+            "82": 3236697600.0,
+            "83": 3236697600.0,
+            "84": 3236697600.0,
             "85": 3236697600.0,
+            "86": 3236697600.0,
+            "87": 3236697600.0,
+            "88": 3236697600.0,
+            "89": 3236697600.0,
             "90": 3236697600.0,
+            "91": 3236697600.0,
+            "92": 3236697600.0,
+            "93": 3236697600.0,
+            "94": 3236697600.0,
             "95": 3236697600.0,
+            "96": 3236697600.0,
+            "97": 3236697600.0,
+            "98": 3236697600.0,
+            "99": 3236697600.0,
             "100": 3236697600.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 8.09413,
-            "5": 0.31937,
-            "10": 0.3209,
-            "15": 0.34398,
-            "20": 0.33703,
-            "25": 0.33879,
-            "30": 0.32402,
-            "35": 0.32278,
-            "40": 0.32002,
-            "45": 0.31746,
-            "50": 0.3177,
-            "55": 0.31702,
-            "60": 0.31688,
-            "65": 0.35512,
-            "70": 0.32025,
-            "75": 0.32573,
-            "80": 0.32598,
-            "85": 0.32473,
-            "90": 0.31989,
-            "95": 0.32153,
-            "100": 0.33062
+            "1": 9.77057,
+            "2": 0.47803,
+            "3": 0.39521,
+            "4": 0.3896,
+            "5": 0.40677,
+            "6": 0.40092,
+            "7": 0.37896,
+            "8": 0.41825,
+            "9": 0.38419,
+            "10": 0.38253,
+            "11": 0.388,
+            "12": 0.37925,
+            "13": 0.38239,
+            "14": 0.38417,
+            "15": 0.38038,
+            "16": 0.38563,
+            "17": 0.37955,
+            "18": 0.37924,
+            "19": 0.38589,
+            "20": 0.38224,
+            "21": 0.38465,
+            "22": 0.39351,
+            "23": 0.39472,
+            "24": 0.41255,
+            "25": 0.37965,
+            "26": 0.38355,
+            "27": 0.38309,
+            "28": 0.38253,
+            "29": 0.38831,
+            "30": 0.39434,
+            "31": 0.38798,
+            "32": 0.39078,
+            "33": 0.38911,
+            "34": 0.39627,
+            "35": 0.39394,
+            "36": 0.38355,
+            "37": 0.39453,
+            "38": 0.39933,
+            "39": 0.77019,
+            "40": 0.39504,
+            "41": 0.39035,
+            "42": 0.38272,
+            "43": 0.69367,
+            "44": 0.38983,
+            "45": 0.38622,
+            "46": 0.39091,
+            "47": 0.38234,
+            "48": 0.40833,
+            "49": 0.39525,
+            "50": 0.39478,
+            "51": 0.38185,
+            "52": 0.72146,
+            "53": 0.71311,
+            "54": 0.39457,
+            "55": 0.38277,
+            "56": 0.38969,
+            "57": 0.38363,
+            "58": 0.39928,
+            "59": 0.38579,
+            "60": 0.74396,
+            "61": 0.38508,
+            "62": 0.70202,
+            "63": 0.38295,
+            "64": 0.38027,
+            "65": 0.38758,
+            "66": 0.38184,
+            "67": 0.38386,
+            "68": 0.39654,
+            "69": 0.4087,
+            "70": 0.38668,
+            "71": 0.38146,
+            "72": 0.3836,
+            "73": 0.38965,
+            "74": 0.38207,
+            "75": 0.39256,
+            "76": 0.38363,
+            "77": 0.38092,
+            "78": 0.39131,
+            "79": 0.38231,
+            "80": 0.38962,
+            "81": 0.39663,
+            "82": 0.3956,
+            "83": 0.38416,
+            "84": 0.38159,
+            "85": 0.40841,
+            "86": 0.40201,
+            "87": 0.37934,
+            "88": 0.38888,
+            "89": 0.38181,
+            "90": 0.38763,
+            "91": 0.38558,
+            "92": 0.3862,
+            "93": 0.39397,
+            "94": 0.39231,
+            "95": 0.38616,
+            "96": 0.39411,
+            "97": 0.39063,
+            "98": 0.39664,
+            "99": 0.39039,
+            "100": 0.38619
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgxa100_dracooci-ord.json
new file mode 100644
index 00000000000..42f8893c04e
--- /dev/null
+++ b/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgxa100_dracooci-ord.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.38736,
+            "2": 10.37971,
+            "3": 9.79428,
+            "4": 9.59941,
+            "5": 9.38281,
+            "6": 9.40765,
+            "7": 9.31116,
+            "8": 9.25004,
+            "9": 9.1304,
+            "10": 9.06783,
+            "11": 8.89519,
+            "12": 8.8149,
+            "13": 8.82749,
+            "14": 8.69768,
+            "15": 8.65706,
+            "16": 8.54479,
+            "17": 8.50168,
+            "18": 8.39069,
+            "19": 8.36692,
+            "20": 8.26603,
+            "21": 8.27533,
+            "22": 8.14757,
+            "23": 8.0735,
+            "24": 8.12127,
+            "25": 7.98158,
+            "26": 8.09181,
+            "27": 7.87361,
+            "28": 7.96832,
+            "29": 7.80579,
+            "30": 7.87182,
+            "31": 7.818,
+            "32": 7.69078,
+            "33": 7.7864,
+            "34": 7.55667,
+            "35": 7.66308,
+            "36": 7.52559,
+            "37": 7.44779,
+            "38": 7.50335,
+            "39": 7.45281,
+            "40": 7.50499,
+            "41": 7.38901,
+            "42": 7.36263,
+            "43": 7.43543,
+            "44": 7.37578,
+            "45": 7.3523,
+            "46": 7.2817,
+            "47": 7.46121,
+            "48": 7.29037,
+            "49": 7.35179,
+            "50": 7.17986,
+            "51": 7.36821,
+            "52": 7.13332,
+            "53": 7.11532,
+            "54": 7.23214,
+            "55": 7.15383,
+            "56": 7.22184,
+            "57": 7.33328,
+            "58": 7.02116,
+            "59": 7.11467,
+            "60": 7.14998,
+            "61": 7.1117,
+            "62": 7.25117,
+            "63": 7.15586,
+            "64": 7.08539,
+            "65": 6.99542,
+            "66": 7.05924,
+            "67": 7.04804,
+            "68": 7.13906,
+            "69": 7.03428,
+            "70": 7.0643,
+            "71": 6.9218,
+            "72": 7.00511,
+            "73": 6.97917,
+            "74": 6.92066,
+            "75": 7.06414,
+            "76": 6.97532,
+            "77": 7.0837,
+            "78": 7.01986,
+            "79": 6.86115,
+            "80": 6.94493,
+            "81": 6.97847,
+            "82": 7.06834,
+            "83": 6.99434,
+            "84": 7.01114,
+            "85": 6.8595,
+            "86": 7.04211,
+            "87": 6.98111,
+            "88": 6.91353,
+            "89": 6.81096,
+            "90": 7.25918,
+            "91": 6.71195,
+            "92": 7.05431,
+            "93": 6.91084,
+            "94": 7.06872,
+            "95": 6.84927,
+            "96": 6.98126,
+            "97": 6.96743,
+            "98": 6.89421,
+            "99": 7.0152,
+            "100": 6.99082
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 43296.0,
+            "2": 44067.0,
+            "3": 44759.0,
+            "4": 42367.0,
+            "5": 45373.0,
+            "6": 40966.0,
+            "7": 43147.0,
+            "8": 45448.0,
+            "9": 42470.0,
+            "10": 45357.0,
+            "11": 43969.0,
+            "12": 44583.0,
+            "13": 43897.0,
+            "14": 46189.0,
+            "15": 43909.0,
+            "16": 41613.0,
+            "17": 43823.0,
+            "18": 44678.0,
+            "19": 42556.0,
+            "20": 44765.0,
+            "21": 44723.0,
+            "22": 41820.0,
+            "23": 45463.0,
+            "24": 43077.0,
+            "25": 42457.0,
+            "26": 43913.0,
+            "27": 46221.0,
+            "28": 46390.0,
+            "29": 46160.0,
+            "30": 43999.0,
+            "31": 41276.0,
+            "32": 43316.0,
+            "33": 45432.0,
+            "34": 43303.0,
+            "35": 43276.0,
+            "36": 42461.0,
+            "37": 40045.0,
+            "38": 42557.0,
+            "39": 44701.0,
+            "40": 43214.0,
+            "41": 44667.0,
+            "42": 43241.0,
+            "43": 45448.0,
+            "44": 44605.0,
+            "45": 43265.0,
+            "46": 43892.0,
+            "47": 42375.0,
+            "48": 44656.0,
+            "49": 43182.0,
+            "50": 43383.0,
+            "51": 41130.0,
+            "52": 43841.0,
+            "53": 43918.0,
+            "54": 41894.0,
+            "55": 43861.0,
+            "56": 43229.0,
+            "57": 42488.0,
+            "58": 43831.0,
+            "59": 44616.0,
+            "60": 41267.0,
+            "61": 39701.0,
+            "62": 44746.0,
+            "63": 44704.0,
+            "64": 45346.0,
+            "65": 44696.0,
+            "66": 45356.0,
+            "67": 43133.0,
+            "68": 42535.0,
+            "69": 43803.0,
+            "70": 45504.0,
+            "71": 43309.0,
+            "72": 44800.0,
+            "73": 45401.0,
+            "74": 42467.0,
+            "75": 44661.0,
+            "76": 43882.0,
+            "77": 42110.0,
+            "78": 40337.0,
+            "79": 38924.0,
+            "80": 41077.0,
+            "81": 45349.0,
+            "82": 43228.0,
+            "83": 38446.0,
+            "84": 42443.0,
+            "85": 43970.0,
+            "86": 45668.0,
+            "87": 40846.0,
+            "88": 41780.0,
+            "89": 41056.0,
+            "90": 44657.0,
+            "91": 46133.0,
+            "92": 41748.0,
+            "93": 43205.0,
+            "94": 39556.0,
+            "95": 44047.0,
+            "96": 44668.0,
+            "97": 45383.0,
+            "98": 41817.0,
+            "99": 45425.0,
+            "100": 42429.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2166438912.0,
+            "2": 2166438912.0,
+            "3": 2166438912.0,
+            "4": 2166438912.0,
+            "5": 2166438912.0,
+            "6": 2166438912.0,
+            "7": 2166438912.0,
+            "8": 2166438912.0,
+            "9": 2166438912.0,
+            "10": 2166438912.0,
+            "11": 2166438912.0,
+            "12": 2166438912.0,
+            "13": 2166438912.0,
+            "14": 2166438912.0,
+            "15": 2166438912.0,
+            "16": 2166438912.0,
+            "17": 2166438912.0,
+            "18": 2166438912.0,
+            "19": 2166438912.0,
+            "20": 2166438912.0,
+            "21": 2166438912.0,
+            "22": 2166438912.0,
+            "23": 2166438912.0,
+            "24": 2166438912.0,
+            "25": 2166438912.0,
+            "26": 2166438912.0,
+            "27": 2166438912.0,
+            "28": 2166438912.0,
+            "29": 2166438912.0,
+            "30": 2166438912.0,
+            "31": 2166438912.0,
+            "32": 2166438912.0,
+            "33": 2166438912.0,
+            "34": 2166438912.0,
+            "35": 2166438912.0,
+            "36": 2166438912.0,
+            "37": 2166438912.0,
+            "38": 2166438912.0,
+            "39": 2166438912.0,
+            "40": 2166438912.0,
+            "41": 2166438912.0,
+            "42": 2166438912.0,
+            "43": 2166438912.0,
+            "44": 2166438912.0,
+            "45": 2166438912.0,
+            "46": 2166438912.0,
+            "47": 2166438912.0,
+            "48": 2166438912.0,
+            "49": 2166438912.0,
+            "50": 2166438912.0,
+            "51": 2166438912.0,
+            "52": 2166438912.0,
+            "53": 2166438912.0,
+            "54": 2166438912.0,
+            "55": 2166438912.0,
+            "56": 2166438912.0,
+            "57": 2166438912.0,
+            "58": 2166438912.0,
+            "59": 2166438912.0,
+            "60": 2166438912.0,
+            "61": 2166438912.0,
+            "62": 2166438912.0,
+            "63": 2166438912.0,
+            "64": 2166438912.0,
+            "65": 2166438912.0,
+            "66": 2166438912.0,
+            "67": 2166438912.0,
+            "68": 2166438912.0,
+            "69": 2166438912.0,
+            "70": 2166438912.0,
+            "71": 2166438912.0,
+            "72": 2166438912.0,
+            "73": 2166438912.0,
+            "74": 2166438912.0,
+            "75": 2166438912.0,
+            "76": 2166438912.0,
+            "77": 2166438912.0,
+            "78": 2166438912.0,
+            "79": 2166438912.0,
+            "80": 2166438912.0,
+            "81": 2166438912.0,
+            "82": 2166438912.0,
+            "83": 2166438912.0,
+            "84": 2166438912.0,
+            "85": 2166438912.0,
+            "86": 2166438912.0,
+            "87": 2166438912.0,
+            "88": 2166438912.0,
+            "89": 2166438912.0,
+            "90": 2166438912.0,
+            "91": 2166438912.0,
+            "92": 2166438912.0,
+            "93": 2166438912.0,
+            "94": 2166438912.0,
+            "95": 2166438912.0,
+            "96": 2166438912.0,
+            "97": 2166438912.0,
+            "98": 2166438912.0,
+            "99": 2166438912.0,
+            "100": 2166438912.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2274851328.0,
+            "2": 3206419968.0,
+            "3": 3206419968.0,
+            "4": 3206419968.0,
+            "5": 3206419968.0,
+            "6": 3206419968.0,
+            "7": 3206419968.0,
+            "8": 3206419968.0,
+            "9": 3206419968.0,
+            "10": 3206419968.0,
+            "11": 3206419968.0,
+            "12": 3206419968.0,
+            "13": 3206419968.0,
+            "14": 3206419968.0,
+            "15": 3206419968.0,
+            "16": 3206419968.0,
+            "17": 3206419968.0,
+            "18": 3206419968.0,
+            "19": 3206419968.0,
+            "20": 3206419968.0,
+            "21": 3206419968.0,
+            "22": 3206419968.0,
+            "23": 3206419968.0,
+            "24": 3206419968.0,
+            "25": 3206419968.0,
+            "26": 3206419968.0,
+            "27": 3206419968.0,
+            "28": 3206419968.0,
+            "29": 3206419968.0,
+            "30": 3206419968.0,
+            "31": 3206419968.0,
+            "32": 3206419968.0,
+            "33": 3206419968.0,
+            "34": 3206419968.0,
+            "35": 3206419968.0,
+            "36": 3206419968.0,
+            "37": 3206419968.0,
+            "38": 3206419968.0,
+            "39": 3206419968.0,
+            "40": 3206419968.0,
+            "41": 3206419968.0,
+            "42": 3206419968.0,
+            "43": 3206419968.0,
+            "44": 3206419968.0,
+            "45": 3206419968.0,
+            "46": 3206419968.0,
+            "47": 3206419968.0,
+            "48": 3206419968.0,
+            "49": 3206419968.0,
+            "50": 3206419968.0,
+            "51": 3206419968.0,
+            "52": 3206419968.0,
+            "53": 3206419968.0,
+            "54": 3206419968.0,
+            "55": 3206419968.0,
+            "56": 3206419968.0,
+            "57": 3206419968.0,
+            "58": 3206419968.0,
+            "59": 3206419968.0,
+            "60": 3206419968.0,
+            "61": 3206419968.0,
+            "62": 3206419968.0,
+            "63": 3206419968.0,
+            "64": 3206419968.0,
+            "65": 3206419968.0,
+            "66": 3206419968.0,
+            "67": 3206419968.0,
+            "68": 3206419968.0,
+            "69": 3206419968.0,
+            "70": 3206419968.0,
+            "71": 3206419968.0,
+            "72": 3206419968.0,
+            "73": 3206419968.0,
+            "74": 3206419968.0,
+            "75": 3206419968.0,
+            "76": 3206419968.0,
+            "77": 3206419968.0,
+            "78": 3206419968.0,
+            "79": 3206419968.0,
+            "80": 3206419968.0,
+            "81": 3206419968.0,
+            "82": 3206419968.0,
+            "83": 3206419968.0,
+            "84": 3206419968.0,
+            "85": 3206419968.0,
+            "86": 3206419968.0,
+            "87": 3206419968.0,
+            "88": 3206419968.0,
+            "89": 3206419968.0,
+            "90": 3206419968.0,
+            "91": 3206419968.0,
+            "92": 3206419968.0,
+            "93": 3206419968.0,
+            "94": 3206419968.0,
+            "95": 3206419968.0,
+            "96": 3206419968.0,
+            "97": 3206419968.0,
+            "98": 3206419968.0,
+            "99": 3206419968.0,
+            "100": 3206419968.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 6.79361,
+            "2": 0.67288,
+            "3": 0.52904,
+            "4": 0.52848,
+            "5": 0.52694,
+            "6": 0.52432,
+            "7": 0.52615,
+            "8": 0.52266,
+            "9": 0.52374,
+            "10": 0.5232,
+            "11": 0.52312,
+            "12": 0.52381,
+            "13": 0.52382,
+            "14": 0.52651,
+            "15": 0.52105,
+            "16": 0.52462,
+            "17": 0.52071,
+            "18": 0.52032,
+            "19": 0.52362,
+            "20": 0.54485,
+            "21": 0.52759,
+            "22": 0.52436,
+            "23": 0.52524,
+            "24": 0.52386,
+            "25": 0.52609,
+            "26": 0.98269,
+            "27": 0.52975,
+            "28": 0.52764,
+            "29": 0.5238,
+            "30": 0.90661,
+            "31": 0.52495,
+            "32": 0.52564,
+            "33": 0.55189,
+            "34": 0.52776,
+            "35": 0.52657,
+            "36": 0.94715,
+            "37": 0.52293,
+            "38": 0.51989,
+            "39": 0.52527,
+            "40": 1.00044,
+            "41": 0.51994,
+            "42": 0.52847,
+            "43": 0.52094,
+            "44": 0.52021,
+            "45": 0.83393,
+            "46": 0.52176,
+            "47": 0.52027,
+            "48": 0.52022,
+            "49": 0.92078,
+            "50": 0.52274,
+            "51": 0.52157,
+            "52": 0.51992,
+            "53": 0.52125,
+            "54": 0.52141,
+            "55": 0.52033,
+            "56": 0.52301,
+            "57": 0.52177,
+            "58": 0.52323,
+            "59": 0.52166,
+            "60": 1.02908,
+            "61": 0.52105,
+            "62": 0.84789,
+            "63": 0.52207,
+            "64": 0.52113,
+            "65": 0.52291,
+            "66": 0.52373,
+            "67": 0.5236,
+            "68": 0.52294,
+            "69": 0.52215,
+            "70": 0.5232,
+            "71": 0.5226,
+            "72": 0.52198,
+            "73": 0.52284,
+            "74": 0.52142,
+            "75": 0.52267,
+            "76": 0.52615,
+            "77": 0.51991,
+            "78": 0.52249,
+            "79": 0.52283,
+            "80": 0.522,
+            "81": 0.5205,
+            "82": 0.52145,
+            "83": 0.52129,
+            "84": 0.5242,
+            "85": 0.52276,
+            "86": 0.52121,
+            "87": 0.52263,
+            "88": 0.51919,
+            "89": 0.51905,
+            "90": 0.52153,
+            "91": 0.52154,
+            "92": 0.52132,
+            "93": 0.52497,
+            "94": 0.5276,
+            "95": 0.52062,
+            "96": 0.52743,
+            "97": 0.52114,
+            "98": 0.52333,
+            "99": 0.51967,
+            "100": 0.52209
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgxa100_dracooci.json b/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgxa100_dracooci.json
new file mode 100644
index 00000000000..47b085ccb06
--- /dev/null
+++ b/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgxa100_dracooci.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.38736,
+            "2": 10.37971,
+            "3": 9.79428,
+            "4": 9.59941,
+            "5": 9.38281,
+            "6": 9.40765,
+            "7": 9.31116,
+            "8": 9.25004,
+            "9": 9.1304,
+            "10": 9.06783,
+            "11": 8.89519,
+            "12": 8.8149,
+            "13": 8.82749,
+            "14": 8.69768,
+            "15": 8.65706,
+            "16": 8.54479,
+            "17": 8.50168,
+            "18": 8.39069,
+            "19": 8.36692,
+            "20": 8.26603,
+            "21": 8.27533,
+            "22": 8.14757,
+            "23": 8.0735,
+            "24": 8.12127,
+            "25": 7.98158,
+            "26": 8.09181,
+            "27": 7.87361,
+            "28": 7.96832,
+            "29": 7.80579,
+            "30": 7.87182,
+            "31": 7.818,
+            "32": 7.69078,
+            "33": 7.7864,
+            "34": 7.55667,
+            "35": 7.66308,
+            "36": 7.52559,
+            "37": 7.44779,
+            "38": 7.50335,
+            "39": 7.45281,
+            "40": 7.50499,
+            "41": 7.38901,
+            "42": 7.36263,
+            "43": 7.43543,
+            "44": 7.37578,
+            "45": 7.3523,
+            "46": 7.2817,
+            "47": 7.46121,
+            "48": 7.29037,
+            "49": 7.35179,
+            "50": 7.17986,
+            "51": 7.36821,
+            "52": 7.13332,
+            "53": 7.11532,
+            "54": 7.23214,
+            "55": 7.15383,
+            "56": 7.22184,
+            "57": 7.33328,
+            "58": 7.02116,
+            "59": 7.11467,
+            "60": 7.14998,
+            "61": 7.1117,
+            "62": 7.25117,
+            "63": 7.15586,
+            "64": 7.08539,
+            "65": 6.99542,
+            "66": 7.05924,
+            "67": 7.04804,
+            "68": 7.13906,
+            "69": 7.03428,
+            "70": 7.0643,
+            "71": 6.9218,
+            "72": 7.00511,
+            "73": 6.97917,
+            "74": 6.92066,
+            "75": 7.06414,
+            "76": 6.97532,
+            "77": 7.0837,
+            "78": 7.01986,
+            "79": 6.86115,
+            "80": 6.94493,
+            "81": 6.97847,
+            "82": 7.06834,
+            "83": 6.99434,
+            "84": 7.01114,
+            "85": 6.8595,
+            "86": 7.04211,
+            "87": 6.98111,
+            "88": 6.91353,
+            "89": 6.81096,
+            "90": 7.25918,
+            "91": 6.71195,
+            "92": 7.05431,
+            "93": 6.91084,
+            "94": 7.06872,
+            "95": 6.84927,
+            "96": 6.98126,
+            "97": 6.96743,
+            "98": 6.89421,
+            "99": 7.0152,
+            "100": 6.99082
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 43296.0,
+            "2": 44067.0,
+            "3": 44759.0,
+            "4": 42367.0,
+            "5": 45373.0,
+            "6": 40966.0,
+            "7": 43147.0,
+            "8": 45448.0,
+            "9": 42470.0,
+            "10": 45357.0,
+            "11": 43969.0,
+            "12": 44583.0,
+            "13": 43897.0,
+            "14": 46189.0,
+            "15": 43909.0,
+            "16": 41613.0,
+            "17": 43823.0,
+            "18": 44678.0,
+            "19": 42556.0,
+            "20": 44765.0,
+            "21": 44723.0,
+            "22": 41820.0,
+            "23": 45463.0,
+            "24": 43077.0,
+            "25": 42457.0,
+            "26": 43913.0,
+            "27": 46221.0,
+            "28": 46390.0,
+            "29": 46160.0,
+            "30": 43999.0,
+            "31": 41276.0,
+            "32": 43316.0,
+            "33": 45432.0,
+            "34": 43303.0,
+            "35": 43276.0,
+            "36": 42461.0,
+            "37": 40045.0,
+            "38": 42557.0,
+            "39": 44701.0,
+            "40": 43214.0,
+            "41": 44667.0,
+            "42": 43241.0,
+            "43": 45448.0,
+            "44": 44605.0,
+            "45": 43265.0,
+            "46": 43892.0,
+            "47": 42375.0,
+            "48": 44656.0,
+            "49": 43182.0,
+            "50": 43383.0,
+            "51": 41130.0,
+            "52": 43841.0,
+            "53": 43918.0,
+            "54": 41894.0,
+            "55": 43861.0,
+            "56": 43229.0,
+            "57": 42488.0,
+            "58": 43831.0,
+            "59": 44616.0,
+            "60": 41267.0,
+            "61": 39701.0,
+            "62": 44746.0,
+            "63": 44704.0,
+            "64": 45346.0,
+            "65": 44696.0,
+            "66": 45356.0,
+            "67": 43133.0,
+            "68": 42535.0,
+            "69": 43803.0,
+            "70": 45504.0,
+            "71": 43309.0,
+            "72": 44800.0,
+            "73": 45401.0,
+            "74": 42467.0,
+            "75": 44661.0,
+            "76": 43882.0,
+            "77": 42110.0,
+            "78": 40337.0,
+            "79": 38924.0,
+            "80": 41077.0,
+            "81": 45349.0,
+            "82": 43228.0,
+            "83": 38446.0,
+            "84": 42443.0,
+            "85": 43970.0,
+            "86": 45668.0,
+            "87": 40846.0,
+            "88": 41780.0,
+            "89": 41056.0,
+            "90": 44657.0,
+            "91": 46133.0,
+            "92": 41748.0,
+            "93": 43205.0,
+            "94": 39556.0,
+            "95": 44047.0,
+            "96": 44668.0,
+            "97": 45383.0,
+            "98": 41817.0,
+            "99": 45425.0,
+            "100": 42429.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2166438912.0,
+            "2": 2166438912.0,
+            "3": 2166438912.0,
+            "4": 2166438912.0,
+            "5": 2166438912.0,
+            "6": 2166438912.0,
+            "7": 2166438912.0,
+            "8": 2166438912.0,
+            "9": 2166438912.0,
+            "10": 2166438912.0,
+            "11": 2166438912.0,
+            "12": 2166438912.0,
+            "13": 2166438912.0,
+            "14": 2166438912.0,
+            "15": 2166438912.0,
+            "16": 2166438912.0,
+            "17": 2166438912.0,
+            "18": 2166438912.0,
+            "19": 2166438912.0,
+            "20": 2166438912.0,
+            "21": 2166438912.0,
+            "22": 2166438912.0,
+            "23": 2166438912.0,
+            "24": 2166438912.0,
+            "25": 2166438912.0,
+            "26": 2166438912.0,
+            "27": 2166438912.0,
+            "28": 2166438912.0,
+            "29": 2166438912.0,
+            "30": 2166438912.0,
+            "31": 2166438912.0,
+            "32": 2166438912.0,
+            "33": 2166438912.0,
+            "34": 2166438912.0,
+            "35": 2166438912.0,
+            "36": 2166438912.0,
+            "37": 2166438912.0,
+            "38": 2166438912.0,
+            "39": 2166438912.0,
+            "40": 2166438912.0,
+            "41": 2166438912.0,
+            "42": 2166438912.0,
+            "43": 2166438912.0,
+            "44": 2166438912.0,
+            "45": 2166438912.0,
+            "46": 2166438912.0,
+            "47": 2166438912.0,
+            "48": 2166438912.0,
+            "49": 2166438912.0,
+            "50": 2166438912.0,
+            "51": 2166438912.0,
+            "52": 2166438912.0,
+            "53": 2166438912.0,
+            "54": 2166438912.0,
+            "55": 2166438912.0,
+            "56": 2166438912.0,
+            "57": 2166438912.0,
+            "58": 2166438912.0,
+            "59": 2166438912.0,
+            "60": 2166438912.0,
+            "61": 2166438912.0,
+            "62": 2166438912.0,
+            "63": 2166438912.0,
+            "64": 2166438912.0,
+            "65": 2166438912.0,
+            "66": 2166438912.0,
+            "67": 2166438912.0,
+            "68": 2166438912.0,
+            "69": 2166438912.0,
+            "70": 2166438912.0,
+            "71": 2166438912.0,
+            "72": 2166438912.0,
+            "73": 2166438912.0,
+            "74": 2166438912.0,
+            "75": 2166438912.0,
+            "76": 2166438912.0,
+            "77": 2166438912.0,
+            "78": 2166438912.0,
+            "79": 2166438912.0,
+            "80": 2166438912.0,
+            "81": 2166438912.0,
+            "82": 2166438912.0,
+            "83": 2166438912.0,
+            "84": 2166438912.0,
+            "85": 2166438912.0,
+            "86": 2166438912.0,
+            "87": 2166438912.0,
+            "88": 2166438912.0,
+            "89": 2166438912.0,
+            "90": 2166438912.0,
+            "91": 2166438912.0,
+            "92": 2166438912.0,
+            "93": 2166438912.0,
+            "94": 2166438912.0,
+            "95": 2166438912.0,
+            "96": 2166438912.0,
+            "97": 2166438912.0,
+            "98": 2166438912.0,
+            "99": 2166438912.0,
+            "100": 2166438912.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2274851328.0,
+            "2": 3206419968.0,
+            "3": 3206419968.0,
+            "4": 3206419968.0,
+            "5": 3206419968.0,
+            "6": 3206419968.0,
+            "7": 3206419968.0,
+            "8": 3206419968.0,
+            "9": 3206419968.0,
+            "10": 3206419968.0,
+            "11": 3206419968.0,
+            "12": 3206419968.0,
+            "13": 3206419968.0,
+            "14": 3206419968.0,
+            "15": 3206419968.0,
+            "16": 3206419968.0,
+            "17": 3206419968.0,
+            "18": 3206419968.0,
+            "19": 3206419968.0,
+            "20": 3206419968.0,
+            "21": 3206419968.0,
+            "22": 3206419968.0,
+            "23": 3206419968.0,
+            "24": 3206419968.0,
+            "25": 3206419968.0,
+            "26": 3206419968.0,
+            "27": 3206419968.0,
+            "28": 3206419968.0,
+            "29": 3206419968.0,
+            "30": 3206419968.0,
+            "31": 3206419968.0,
+            "32": 3206419968.0,
+            "33": 3206419968.0,
+            "34": 3206419968.0,
+            "35": 3206419968.0,
+            "36": 3206419968.0,
+            "37": 3206419968.0,
+            "38": 3206419968.0,
+            "39": 3206419968.0,
+            "40": 3206419968.0,
+            "41": 3206419968.0,
+            "42": 3206419968.0,
+            "43": 3206419968.0,
+            "44": 3206419968.0,
+            "45": 3206419968.0,
+            "46": 3206419968.0,
+            "47": 3206419968.0,
+            "48": 3206419968.0,
+            "49": 3206419968.0,
+            "50": 3206419968.0,
+            "51": 3206419968.0,
+            "52": 3206419968.0,
+            "53": 3206419968.0,
+            "54": 3206419968.0,
+            "55": 3206419968.0,
+            "56": 3206419968.0,
+            "57": 3206419968.0,
+            "58": 3206419968.0,
+            "59": 3206419968.0,
+            "60": 3206419968.0,
+            "61": 3206419968.0,
+            "62": 3206419968.0,
+            "63": 3206419968.0,
+            "64": 3206419968.0,
+            "65": 3206419968.0,
+            "66": 3206419968.0,
+            "67": 3206419968.0,
+            "68": 3206419968.0,
+            "69": 3206419968.0,
+            "70": 3206419968.0,
+            "71": 3206419968.0,
+            "72": 3206419968.0,
+            "73": 3206419968.0,
+            "74": 3206419968.0,
+            "75": 3206419968.0,
+            "76": 3206419968.0,
+            "77": 3206419968.0,
+            "78": 3206419968.0,
+            "79": 3206419968.0,
+            "80": 3206419968.0,
+            "81": 3206419968.0,
+            "82": 3206419968.0,
+            "83": 3206419968.0,
+            "84": 3206419968.0,
+            "85": 3206419968.0,
+            "86": 3206419968.0,
+            "87": 3206419968.0,
+            "88": 3206419968.0,
+            "89": 3206419968.0,
+            "90": 3206419968.0,
+            "91": 3206419968.0,
+            "92": 3206419968.0,
+            "93": 3206419968.0,
+            "94": 3206419968.0,
+            "95": 3206419968.0,
+            "96": 3206419968.0,
+            "97": 3206419968.0,
+            "98": 3206419968.0,
+            "99": 3206419968.0,
+            "100": 3206419968.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 6.73376,
+            "2": 0.65941,
+            "3": 0.51203,
+            "4": 0.51525,
+            "5": 0.52038,
+            "6": 0.51334,
+            "7": 0.51752,
+            "8": 0.5127,
+            "9": 0.51252,
+            "10": 0.51101,
+            "11": 0.51366,
+            "12": 0.50297,
+            "13": 0.50253,
+            "14": 0.50965,
+            "15": 0.50415,
+            "16": 0.50379,
+            "17": 0.50831,
+            "18": 0.50394,
+            "19": 0.50529,
+            "20": 0.50608,
+            "21": 0.51227,
+            "22": 0.50603,
+            "23": 0.50603,
+            "24": 0.50551,
+            "25": 0.5064,
+            "26": 0.5045,
+            "27": 0.50456,
+            "28": 0.50408,
+            "29": 0.50983,
+            "30": 0.97806,
+            "31": 0.93746,
+            "32": 0.50302,
+            "33": 0.51581,
+            "34": 0.52445,
+            "35": 0.51009,
+            "36": 0.51001,
+            "37": 0.98759,
+            "38": 0.5072,
+            "39": 0.50626,
+            "40": 0.53153,
+            "41": 0.84585,
+            "42": 0.50894,
+            "43": 0.51171,
+            "44": 0.99354,
+            "45": 1.01626,
+            "46": 0.51162,
+            "47": 0.509,
+            "48": 0.51118,
+            "49": 0.5092,
+            "50": 0.50955,
+            "51": 0.5099,
+            "52": 0.88089,
+            "53": 0.92181,
+            "54": 0.50199,
+            "55": 0.50201,
+            "56": 0.5042,
+            "57": 0.50152,
+            "58": 0.50188,
+            "59": 0.50229,
+            "60": 0.5022,
+            "61": 0.50158,
+            "62": 0.50418,
+            "63": 0.50455,
+            "64": 0.50212,
+            "65": 0.50523,
+            "66": 0.50164,
+            "67": 0.50093,
+            "68": 0.49939,
+            "69": 0.49983,
+            "70": 0.50804,
+            "71": 0.51035,
+            "72": 0.51332,
+            "73": 0.49997,
+            "74": 0.50164,
+            "75": 0.51172,
+            "76": 0.50371,
+            "77": 0.50466,
+            "78": 0.50784,
+            "79": 0.51289,
+            "80": 0.50935,
+            "81": 0.50705,
+            "82": 0.50671,
+            "83": 0.50317,
+            "84": 0.50489,
+            "85": 0.52254,
+            "86": 0.50659,
+            "87": 0.50805,
+            "88": 0.50211,
+            "89": 0.50127,
+            "90": 0.50552,
+            "91": 0.5025,
+            "92": 0.50458,
+            "93": 0.50451,
+            "94": 0.50155,
+            "95": 0.50402,
+            "96": 0.50113,
+            "97": 0.50935,
+            "98": 0.50158,
+            "99": 0.50243,
+            "100": 0.50094
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..3be9df673c7
--- /dev/null
+++ b/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.36406,
+            "2": 10.37672,
+            "3": 9.84285,
+            "4": 9.61995,
+            "5": 9.4049,
+            "6": 9.42891,
+            "7": 9.31288,
+            "8": 9.27047,
+            "9": 9.10629,
+            "10": 9.03569,
+            "11": 8.86423,
+            "12": 8.80988,
+            "13": 8.8329,
+            "14": 8.69011,
+            "15": 8.66187,
+            "16": 8.54768,
+            "17": 8.50183,
+            "18": 8.42362,
+            "19": 8.38674,
+            "20": 8.27993,
+            "21": 8.26472,
+            "22": 8.15738,
+            "23": 8.11148,
+            "24": 8.14234,
+            "25": 7.98343,
+            "26": 8.10636,
+            "27": 7.88853,
+            "28": 7.97024,
+            "29": 7.8121,
+            "30": 7.87698,
+            "31": 7.82339,
+            "32": 7.70086,
+            "33": 7.80317,
+            "34": 7.56843,
+            "35": 7.67276,
+            "36": 7.54942,
+            "37": 7.475,
+            "38": 7.51068,
+            "39": 7.49979,
+            "40": 7.51131,
+            "41": 7.41252,
+            "42": 7.38333,
+            "43": 7.4414,
+            "44": 7.39857,
+            "45": 7.37352,
+            "46": 7.28824,
+            "47": 7.4683,
+            "48": 7.29457,
+            "49": 7.35181,
+            "50": 7.17223,
+            "51": 7.37216,
+            "52": 7.14588,
+            "53": 7.12384,
+            "54": 7.23984,
+            "55": 7.15454,
+            "56": 7.23308,
+            "57": 7.33501,
+            "58": 7.01226,
+            "59": 7.12063,
+            "60": 7.15043,
+            "61": 7.11076,
+            "62": 7.26458,
+            "63": 7.1544,
+            "64": 7.08651,
+            "65": 6.99077,
+            "66": 7.05503,
+            "67": 7.04463,
+            "68": 7.136,
+            "69": 7.03404,
+            "70": 7.05994,
+            "71": 6.90146,
+            "72": 6.99845,
+            "73": 6.97783,
+            "74": 6.92205,
+            "75": 7.06268,
+            "76": 6.95612,
+            "77": 7.08838,
+            "78": 7.02608,
+            "79": 6.85354,
+            "80": 6.93543,
+            "81": 6.97396,
+            "82": 7.05854,
+            "83": 6.98003,
+            "84": 7.00602,
+            "85": 6.84771,
+            "86": 7.04197,
+            "87": 6.97366,
+            "88": 6.90817,
+            "89": 6.80902,
+            "90": 7.23999,
+            "91": 6.70221,
+            "92": 7.0543,
+            "93": 6.89332,
+            "94": 7.05002,
+            "95": 6.84547,
+            "96": 6.96202,
+            "97": 6.95355,
+            "98": 6.8731,
+            "99": 6.99831,
+            "100": 6.98508
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 43317.0,
+            "2": 44065.0,
+            "3": 44730.0,
+            "4": 42374.0,
+            "5": 45387.0,
+            "6": 40937.0,
+            "7": 43166.0,
+            "8": 45433.0,
+            "9": 42439.0,
+            "10": 45374.0,
+            "11": 43947.0,
+            "12": 44584.0,
+            "13": 43908.0,
+            "14": 46205.0,
+            "15": 43901.0,
+            "16": 41607.0,
+            "17": 43831.0,
+            "18": 44698.0,
+            "19": 42543.0,
+            "20": 44759.0,
+            "21": 44734.0,
+            "22": 41850.0,
+            "23": 45416.0,
+            "24": 43069.0,
+            "25": 42442.0,
+            "26": 43923.0,
+            "27": 46212.0,
+            "28": 46362.0,
+            "29": 46133.0,
+            "30": 43978.0,
+            "31": 41220.0,
+            "32": 43307.0,
+            "33": 45440.0,
+            "34": 43284.0,
+            "35": 43248.0,
+            "36": 42437.0,
+            "37": 40066.0,
+            "38": 42483.0,
+            "39": 44702.0,
+            "40": 43230.0,
+            "41": 44672.0,
+            "42": 43202.0,
+            "43": 45459.0,
+            "44": 44609.0,
+            "45": 43265.0,
+            "46": 43915.0,
+            "47": 42366.0,
+            "48": 44650.0,
+            "49": 43139.0,
+            "50": 43399.0,
+            "51": 41159.0,
+            "52": 43818.0,
+            "53": 43924.0,
+            "54": 41952.0,
+            "55": 43866.0,
+            "56": 43239.0,
+            "57": 42540.0,
+            "58": 43856.0,
+            "59": 44589.0,
+            "60": 41152.0,
+            "61": 39709.0,
+            "62": 44822.0,
+            "63": 44663.0,
+            "64": 45372.0,
+            "65": 44676.0,
+            "66": 45345.0,
+            "67": 43130.0,
+            "68": 42567.0,
+            "69": 43812.0,
+            "70": 45538.0,
+            "71": 43282.0,
+            "72": 44765.0,
+            "73": 45354.0,
+            "74": 42517.0,
+            "75": 44666.0,
+            "76": 43904.0,
+            "77": 42041.0,
+            "78": 40320.0,
+            "79": 38914.0,
+            "80": 41081.0,
+            "81": 45333.0,
+            "82": 43195.0,
+            "83": 38489.0,
+            "84": 42436.0,
+            "85": 43978.0,
+            "86": 45680.0,
+            "87": 40832.0,
+            "88": 41797.0,
+            "89": 41083.0,
+            "90": 44676.0,
+            "91": 46190.0,
+            "92": 41837.0,
+            "93": 43234.0,
+            "94": 39504.0,
+            "95": 44067.0,
+            "96": 44684.0,
+            "97": 45419.0,
+            "98": 41854.0,
+            "99": 45431.0,
+            "100": 42479.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2195405824.0,
+            "2": 2195405824.0,
+            "3": 2195405824.0,
+            "4": 2195405824.0,
+            "5": 2195405824.0,
+            "6": 2195405824.0,
+            "7": 2195405824.0,
+            "8": 2195405824.0,
+            "9": 2195405824.0,
+            "10": 2195405824.0,
+            "11": 2195405824.0,
+            "12": 2195405824.0,
+            "13": 2195405824.0,
+            "14": 2195405824.0,
+            "15": 2195405824.0,
+            "16": 2195405824.0,
+            "17": 2195405824.0,
+            "18": 2195405824.0,
+            "19": 2195405824.0,
+            "20": 2195405824.0,
+            "21": 2195405824.0,
+            "22": 2195405824.0,
+            "23": 2195405824.0,
+            "24": 2195405824.0,
+            "25": 2195405824.0,
+            "26": 2195405824.0,
+            "27": 2195405824.0,
+            "28": 2195405824.0,
+            "29": 2195405824.0,
+            "30": 2195405824.0,
+            "31": 2195405824.0,
+            "32": 2195405824.0,
+            "33": 2195405824.0,
+            "34": 2195405824.0,
+            "35": 2195405824.0,
+            "36": 2195405824.0,
+            "37": 2195405824.0,
+            "38": 2195405824.0,
+            "39": 2195405824.0,
+            "40": 2195405824.0,
+            "41": 2195405824.0,
+            "42": 2195405824.0,
+            "43": 2195405824.0,
+            "44": 2195405824.0,
+            "45": 2195405824.0,
+            "46": 2195405824.0,
+            "47": 2195405824.0,
+            "48": 2195405824.0,
+            "49": 2195405824.0,
+            "50": 2195405824.0,
+            "51": 2195405824.0,
+            "52": 2195405824.0,
+            "53": 2195405824.0,
+            "54": 2195405824.0,
+            "55": 2195405824.0,
+            "56": 2195405824.0,
+            "57": 2195405824.0,
+            "58": 2195405824.0,
+            "59": 2195405824.0,
+            "60": 2195405824.0,
+            "61": 2195405824.0,
+            "62": 2195405824.0,
+            "63": 2195405824.0,
+            "64": 2195405824.0,
+            "65": 2195405824.0,
+            "66": 2195405824.0,
+            "67": 2195405824.0,
+            "68": 2195405824.0,
+            "69": 2195405824.0,
+            "70": 2195405824.0,
+            "71": 2195405824.0,
+            "72": 2195405824.0,
+            "73": 2195405824.0,
+            "74": 2195405824.0,
+            "75": 2195405824.0,
+            "76": 2195405824.0,
+            "77": 2195405824.0,
+            "78": 2195405824.0,
+            "79": 2195405824.0,
+            "80": 2195405824.0,
+            "81": 2195405824.0,
+            "82": 2195405824.0,
+            "83": 2195405824.0,
+            "84": 2195405824.0,
+            "85": 2195405824.0,
+            "86": 2195405824.0,
+            "87": 2195405824.0,
+            "88": 2195405824.0,
+            "89": 2195405824.0,
+            "90": 2195405824.0,
+            "91": 2195405824.0,
+            "92": 2195405824.0,
+            "93": 2195405824.0,
+            "94": 2195405824.0,
+            "95": 2195405824.0,
+            "96": 2195405824.0,
+            "97": 2195405824.0,
+            "98": 2195405824.0,
+            "99": 2195405824.0,
+            "100": 2195405824.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2302114304.0,
+            "2": 3236697600.0,
+            "3": 3236697600.0,
+            "4": 3236697600.0,
+            "5": 3236697600.0,
+            "6": 3236697600.0,
+            "7": 3236697600.0,
+            "8": 3236697600.0,
+            "9": 3236697600.0,
+            "10": 3236697600.0,
+            "11": 3236697600.0,
+            "12": 3236697600.0,
+            "13": 3236697600.0,
+            "14": 3236697600.0,
+            "15": 3236697600.0,
+            "16": 3236697600.0,
+            "17": 3236697600.0,
+            "18": 3236697600.0,
+            "19": 3236697600.0,
+            "20": 3236697600.0,
+            "21": 3236697600.0,
+            "22": 3236697600.0,
+            "23": 3236697600.0,
+            "24": 3236697600.0,
+            "25": 3236697600.0,
+            "26": 3236697600.0,
+            "27": 3236697600.0,
+            "28": 3236697600.0,
+            "29": 3236697600.0,
+            "30": 3236697600.0,
+            "31": 3236697600.0,
+            "32": 3236697600.0,
+            "33": 3236697600.0,
+            "34": 3236697600.0,
+            "35": 3236697600.0,
+            "36": 3236697600.0,
+            "37": 3236697600.0,
+            "38": 3236697600.0,
+            "39": 3236697600.0,
+            "40": 3236697600.0,
+            "41": 3236697600.0,
+            "42": 3236697600.0,
+            "43": 3236697600.0,
+            "44": 3236697600.0,
+            "45": 3236697600.0,
+            "46": 3236697600.0,
+            "47": 3236697600.0,
+            "48": 3236697600.0,
+            "49": 3236697600.0,
+            "50": 3236697600.0,
+            "51": 3236697600.0,
+            "52": 3236697600.0,
+            "53": 3236697600.0,
+            "54": 3236697600.0,
+            "55": 3236697600.0,
+            "56": 3236697600.0,
+            "57": 3236697600.0,
+            "58": 3236697600.0,
+            "59": 3236697600.0,
+            "60": 3236697600.0,
+            "61": 3236697600.0,
+            "62": 3236697600.0,
+            "63": 3236697600.0,
+            "64": 3236697600.0,
+            "65": 3236697600.0,
+            "66": 3236697600.0,
+            "67": 3236697600.0,
+            "68": 3236697600.0,
+            "69": 3236697600.0,
+            "70": 3236697600.0,
+            "71": 3236697600.0,
+            "72": 3236697600.0,
+            "73": 3236697600.0,
+            "74": 3236697600.0,
+            "75": 3236697600.0,
+            "76": 3236697600.0,
+            "77": 3236697600.0,
+            "78": 3236697600.0,
+            "79": 3236697600.0,
+            "80": 3236697600.0,
+            "81": 3236697600.0,
+            "82": 3236697600.0,
+            "83": 3236697600.0,
+            "84": 3236697600.0,
+            "85": 3236697600.0,
+            "86": 3236697600.0,
+            "87": 3236697600.0,
+            "88": 3236697600.0,
+            "89": 3236697600.0,
+            "90": 3236697600.0,
+            "91": 3236697600.0,
+            "92": 3236697600.0,
+            "93": 3236697600.0,
+            "94": 3236697600.0,
+            "95": 3236697600.0,
+            "96": 3236697600.0,
+            "97": 3236697600.0,
+            "98": 3236697600.0,
+            "99": 3236697600.0,
+            "100": 3236697600.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 9.39562,
+            "2": 0.44691,
+            "3": 0.3459,
+            "4": 0.34935,
+            "5": 0.34659,
+            "6": 0.35056,
+            "7": 0.3495,
+            "8": 0.35113,
+            "9": 0.34945,
+            "10": 0.35049,
+            "11": 0.35158,
+            "12": 0.34969,
+            "13": 0.34855,
+            "14": 0.35082,
+            "15": 0.35148,
+            "16": 0.35346,
+            "17": 0.35991,
+            "18": 0.35857,
+            "19": 0.35651,
+            "20": 0.35734,
+            "21": 0.36107,
+            "22": 0.35291,
+            "23": 0.34878,
+            "24": 0.34924,
+            "25": 0.34966,
+            "26": 0.35397,
+            "27": 0.35048,
+            "28": 0.39139,
+            "29": 0.35978,
+            "30": 0.35049,
+            "31": 0.35472,
+            "32": 0.34768,
+            "33": 0.3681,
+            "34": 0.37086,
+            "35": 0.35372,
+            "36": 0.35661,
+            "37": 0.96115,
+            "38": 0.69943,
+            "39": 0.35304,
+            "40": 0.39899,
+            "41": 0.3519,
+            "42": 0.35367,
+            "43": 0.35089,
+            "44": 0.35181,
+            "45": 0.85196,
+            "46": 0.353,
+            "47": 0.35065,
+            "48": 0.34986,
+            "49": 0.34987,
+            "50": 0.35017,
+            "51": 0.35243,
+            "52": 0.34764,
+            "53": 0.68786,
+            "54": 0.35071,
+            "55": 0.35502,
+            "56": 0.36533,
+            "57": 0.34855,
+            "58": 0.35098,
+            "59": 0.34751,
+            "60": 0.66551,
+            "61": 0.35376,
+            "62": 0.65487,
+            "63": 0.36102,
+            "64": 0.35122,
+            "65": 0.35654,
+            "66": 0.36028,
+            "67": 0.36743,
+            "68": 0.36013,
+            "69": 0.36151,
+            "70": 0.36618,
+            "71": 0.34619,
+            "72": 0.36448,
+            "73": 0.35934,
+            "74": 0.36235,
+            "75": 0.35742,
+            "76": 0.35529,
+            "77": 0.36633,
+            "78": 0.35551,
+            "79": 0.35185,
+            "80": 0.34938,
+            "81": 0.34965,
+            "82": 0.35454,
+            "83": 0.34716,
+            "84": 0.36305,
+            "85": 0.35771,
+            "86": 0.34829,
+            "87": 0.35483,
+            "88": 0.34874,
+            "89": 0.34898,
+            "90": 0.35072,
+            "91": 0.34969,
+            "92": 0.3539,
+            "93": 0.34627,
+            "94": 0.34706,
+            "95": 0.34587,
+            "96": 0.34804,
+            "97": 0.34773,
+            "98": 0.36076,
+            "99": 0.38382,
+            "100": 0.35651
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..30c495148f4
--- /dev/null
+++ b/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.36406,
+            "2": 10.37672,
+            "3": 9.84285,
+            "4": 9.61995,
+            "5": 9.4049,
+            "6": 9.42891,
+            "7": 9.31288,
+            "8": 9.27047,
+            "9": 9.10629,
+            "10": 9.03569,
+            "11": 8.86423,
+            "12": 8.80988,
+            "13": 8.8329,
+            "14": 8.69011,
+            "15": 8.66187,
+            "16": 8.54768,
+            "17": 8.50183,
+            "18": 8.42362,
+            "19": 8.38674,
+            "20": 8.27993,
+            "21": 8.26472,
+            "22": 8.15738,
+            "23": 8.11148,
+            "24": 8.14234,
+            "25": 7.98343,
+            "26": 8.10636,
+            "27": 7.88853,
+            "28": 7.97024,
+            "29": 7.8121,
+            "30": 7.87698,
+            "31": 7.82339,
+            "32": 7.70086,
+            "33": 7.80317,
+            "34": 7.56843,
+            "35": 7.67276,
+            "36": 7.54942,
+            "37": 7.475,
+            "38": 7.51068,
+            "39": 7.49979,
+            "40": 7.51131,
+            "41": 7.41252,
+            "42": 7.38333,
+            "43": 7.4414,
+            "44": 7.39857,
+            "45": 7.37352,
+            "46": 7.28824,
+            "47": 7.4683,
+            "48": 7.29457,
+            "49": 7.35181,
+            "50": 7.17223,
+            "51": 7.37216,
+            "52": 7.14588,
+            "53": 7.12384,
+            "54": 7.23984,
+            "55": 7.15454,
+            "56": 7.23308,
+            "57": 7.33501,
+            "58": 7.01226,
+            "59": 7.12063,
+            "60": 7.15043,
+            "61": 7.11076,
+            "62": 7.26458,
+            "63": 7.1544,
+            "64": 7.08651,
+            "65": 6.99077,
+            "66": 7.05503,
+            "67": 7.04463,
+            "68": 7.136,
+            "69": 7.03404,
+            "70": 7.05994,
+            "71": 6.90146,
+            "72": 6.99845,
+            "73": 6.97783,
+            "74": 6.92205,
+            "75": 7.06268,
+            "76": 6.95612,
+            "77": 7.08838,
+            "78": 7.02608,
+            "79": 6.85354,
+            "80": 6.93543,
+            "81": 6.97396,
+            "82": 7.05854,
+            "83": 6.98003,
+            "84": 7.00602,
+            "85": 6.84771,
+            "86": 7.04197,
+            "87": 6.97366,
+            "88": 6.90817,
+            "89": 6.80902,
+            "90": 7.23999,
+            "91": 6.70221,
+            "92": 7.0543,
+            "93": 6.89332,
+            "94": 7.05002,
+            "95": 6.84547,
+            "96": 6.96202,
+            "97": 6.95355,
+            "98": 6.8731,
+            "99": 6.99831,
+            "100": 6.98508
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 43317.0,
+            "2": 44065.0,
+            "3": 44730.0,
+            "4": 42374.0,
+            "5": 45387.0,
+            "6": 40937.0,
+            "7": 43166.0,
+            "8": 45433.0,
+            "9": 42439.0,
+            "10": 45374.0,
+            "11": 43947.0,
+            "12": 44584.0,
+            "13": 43908.0,
+            "14": 46205.0,
+            "15": 43901.0,
+            "16": 41607.0,
+            "17": 43831.0,
+            "18": 44698.0,
+            "19": 42543.0,
+            "20": 44759.0,
+            "21": 44734.0,
+            "22": 41850.0,
+            "23": 45416.0,
+            "24": 43069.0,
+            "25": 42442.0,
+            "26": 43923.0,
+            "27": 46212.0,
+            "28": 46362.0,
+            "29": 46133.0,
+            "30": 43978.0,
+            "31": 41220.0,
+            "32": 43307.0,
+            "33": 45440.0,
+            "34": 43284.0,
+            "35": 43248.0,
+            "36": 42437.0,
+            "37": 40066.0,
+            "38": 42483.0,
+            "39": 44702.0,
+            "40": 43230.0,
+            "41": 44672.0,
+            "42": 43202.0,
+            "43": 45459.0,
+            "44": 44609.0,
+            "45": 43265.0,
+            "46": 43915.0,
+            "47": 42366.0,
+            "48": 44650.0,
+            "49": 43139.0,
+            "50": 43399.0,
+            "51": 41159.0,
+            "52": 43818.0,
+            "53": 43924.0,
+            "54": 41952.0,
+            "55": 43866.0,
+            "56": 43239.0,
+            "57": 42540.0,
+            "58": 43856.0,
+            "59": 44589.0,
+            "60": 41152.0,
+            "61": 39709.0,
+            "62": 44822.0,
+            "63": 44663.0,
+            "64": 45372.0,
+            "65": 44676.0,
+            "66": 45345.0,
+            "67": 43130.0,
+            "68": 42567.0,
+            "69": 43812.0,
+            "70": 45538.0,
+            "71": 43282.0,
+            "72": 44765.0,
+            "73": 45354.0,
+            "74": 42517.0,
+            "75": 44666.0,
+            "76": 43904.0,
+            "77": 42041.0,
+            "78": 40320.0,
+            "79": 38914.0,
+            "80": 41081.0,
+            "81": 45333.0,
+            "82": 43195.0,
+            "83": 38489.0,
+            "84": 42436.0,
+            "85": 43978.0,
+            "86": 45680.0,
+            "87": 40832.0,
+            "88": 41797.0,
+            "89": 41083.0,
+            "90": 44676.0,
+            "91": 46190.0,
+            "92": 41837.0,
+            "93": 43234.0,
+            "94": 39504.0,
+            "95": 44067.0,
+            "96": 44684.0,
+            "97": 45419.0,
+            "98": 41854.0,
+            "99": 45431.0,
+            "100": 42479.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2195405824.0,
+            "2": 2195405824.0,
+            "3": 2195405824.0,
+            "4": 2195405824.0,
+            "5": 2195405824.0,
+            "6": 2195405824.0,
+            "7": 2195405824.0,
+            "8": 2195405824.0,
+            "9": 2195405824.0,
+            "10": 2195405824.0,
+            "11": 2195405824.0,
+            "12": 2195405824.0,
+            "13": 2195405824.0,
+            "14": 2195405824.0,
+            "15": 2195405824.0,
+            "16": 2195405824.0,
+            "17": 2195405824.0,
+            "18": 2195405824.0,
+            "19": 2195405824.0,
+            "20": 2195405824.0,
+            "21": 2195405824.0,
+            "22": 2195405824.0,
+            "23": 2195405824.0,
+            "24": 2195405824.0,
+            "25": 2195405824.0,
+            "26": 2195405824.0,
+            "27": 2195405824.0,
+            "28": 2195405824.0,
+            "29": 2195405824.0,
+            "30": 2195405824.0,
+            "31": 2195405824.0,
+            "32": 2195405824.0,
+            "33": 2195405824.0,
+            "34": 2195405824.0,
+            "35": 2195405824.0,
+            "36": 2195405824.0,
+            "37": 2195405824.0,
+            "38": 2195405824.0,
+            "39": 2195405824.0,
+            "40": 2195405824.0,
+            "41": 2195405824.0,
+            "42": 2195405824.0,
+            "43": 2195405824.0,
+            "44": 2195405824.0,
+            "45": 2195405824.0,
+            "46": 2195405824.0,
+            "47": 2195405824.0,
+            "48": 2195405824.0,
+            "49": 2195405824.0,
+            "50": 2195405824.0,
+            "51": 2195405824.0,
+            "52": 2195405824.0,
+            "53": 2195405824.0,
+            "54": 2195405824.0,
+            "55": 2195405824.0,
+            "56": 2195405824.0,
+            "57": 2195405824.0,
+            "58": 2195405824.0,
+            "59": 2195405824.0,
+            "60": 2195405824.0,
+            "61": 2195405824.0,
+            "62": 2195405824.0,
+            "63": 2195405824.0,
+            "64": 2195405824.0,
+            "65": 2195405824.0,
+            "66": 2195405824.0,
+            "67": 2195405824.0,
+            "68": 2195405824.0,
+            "69": 2195405824.0,
+            "70": 2195405824.0,
+            "71": 2195405824.0,
+            "72": 2195405824.0,
+            "73": 2195405824.0,
+            "74": 2195405824.0,
+            "75": 2195405824.0,
+            "76": 2195405824.0,
+            "77": 2195405824.0,
+            "78": 2195405824.0,
+            "79": 2195405824.0,
+            "80": 2195405824.0,
+            "81": 2195405824.0,
+            "82": 2195405824.0,
+            "83": 2195405824.0,
+            "84": 2195405824.0,
+            "85": 2195405824.0,
+            "86": 2195405824.0,
+            "87": 2195405824.0,
+            "88": 2195405824.0,
+            "89": 2195405824.0,
+            "90": 2195405824.0,
+            "91": 2195405824.0,
+            "92": 2195405824.0,
+            "93": 2195405824.0,
+            "94": 2195405824.0,
+            "95": 2195405824.0,
+            "96": 2195405824.0,
+            "97": 2195405824.0,
+            "98": 2195405824.0,
+            "99": 2195405824.0,
+            "100": 2195405824.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2302114304.0,
+            "2": 3236697600.0,
+            "3": 3236697600.0,
+            "4": 3236697600.0,
+            "5": 3236697600.0,
+            "6": 3236697600.0,
+            "7": 3236697600.0,
+            "8": 3236697600.0,
+            "9": 3236697600.0,
+            "10": 3236697600.0,
+            "11": 3236697600.0,
+            "12": 3236697600.0,
+            "13": 3236697600.0,
+            "14": 3236697600.0,
+            "15": 3236697600.0,
+            "16": 3236697600.0,
+            "17": 3236697600.0,
+            "18": 3236697600.0,
+            "19": 3236697600.0,
+            "20": 3236697600.0,
+            "21": 3236697600.0,
+            "22": 3236697600.0,
+            "23": 3236697600.0,
+            "24": 3236697600.0,
+            "25": 3236697600.0,
+            "26": 3236697600.0,
+            "27": 3236697600.0,
+            "28": 3236697600.0,
+            "29": 3236697600.0,
+            "30": 3236697600.0,
+            "31": 3236697600.0,
+            "32": 3236697600.0,
+            "33": 3236697600.0,
+            "34": 3236697600.0,
+            "35": 3236697600.0,
+            "36": 3236697600.0,
+            "37": 3236697600.0,
+            "38": 3236697600.0,
+            "39": 3236697600.0,
+            "40": 3236697600.0,
+            "41": 3236697600.0,
+            "42": 3236697600.0,
+            "43": 3236697600.0,
+            "44": 3236697600.0,
+            "45": 3236697600.0,
+            "46": 3236697600.0,
+            "47": 3236697600.0,
+            "48": 3236697600.0,
+            "49": 3236697600.0,
+            "50": 3236697600.0,
+            "51": 3236697600.0,
+            "52": 3236697600.0,
+            "53": 3236697600.0,
+            "54": 3236697600.0,
+            "55": 3236697600.0,
+            "56": 3236697600.0,
+            "57": 3236697600.0,
+            "58": 3236697600.0,
+            "59": 3236697600.0,
+            "60": 3236697600.0,
+            "61": 3236697600.0,
+            "62": 3236697600.0,
+            "63": 3236697600.0,
+            "64": 3236697600.0,
+            "65": 3236697600.0,
+            "66": 3236697600.0,
+            "67": 3236697600.0,
+            "68": 3236697600.0,
+            "69": 3236697600.0,
+            "70": 3236697600.0,
+            "71": 3236697600.0,
+            "72": 3236697600.0,
+            "73": 3236697600.0,
+            "74": 3236697600.0,
+            "75": 3236697600.0,
+            "76": 3236697600.0,
+            "77": 3236697600.0,
+            "78": 3236697600.0,
+            "79": 3236697600.0,
+            "80": 3236697600.0,
+            "81": 3236697600.0,
+            "82": 3236697600.0,
+            "83": 3236697600.0,
+            "84": 3236697600.0,
+            "85": 3236697600.0,
+            "86": 3236697600.0,
+            "87": 3236697600.0,
+            "88": 3236697600.0,
+            "89": 3236697600.0,
+            "90": 3236697600.0,
+            "91": 3236697600.0,
+            "92": 3236697600.0,
+            "93": 3236697600.0,
+            "94": 3236697600.0,
+            "95": 3236697600.0,
+            "96": 3236697600.0,
+            "97": 3236697600.0,
+            "98": 3236697600.0,
+            "99": 3236697600.0,
+            "100": 3236697600.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 9.46115,
+            "2": 0.46835,
+            "3": 0.38416,
+            "4": 0.37391,
+            "5": 0.37703,
+            "6": 0.38173,
+            "7": 0.37456,
+            "8": 0.37696,
+            "9": 0.37338,
+            "10": 0.37687,
+            "11": 0.38251,
+            "12": 0.38037,
+            "13": 0.37996,
+            "14": 0.38264,
+            "15": 0.37959,
+            "16": 0.38232,
+            "17": 0.37852,
+            "18": 0.37735,
+            "19": 0.3812,
+            "20": 0.37493,
+            "21": 0.38227,
+            "22": 0.38196,
+            "23": 0.37745,
+            "24": 0.3782,
+            "25": 0.37181,
+            "26": 0.37935,
+            "27": 0.38539,
+            "28": 0.38393,
+            "29": 0.3826,
+            "30": 0.37839,
+            "31": 0.38438,
+            "32": 0.64523,
+            "33": 0.37971,
+            "34": 0.38082,
+            "35": 0.74313,
+            "36": 0.3848,
+            "37": 0.38169,
+            "38": 0.38154,
+            "39": 0.40495,
+            "40": 0.40243,
+            "41": 0.37972,
+            "42": 0.37792,
+            "43": 0.38261,
+            "44": 0.37607,
+            "45": 0.37463,
+            "46": 0.37881,
+            "47": 0.37293,
+            "48": 0.37592,
+            "49": 0.659,
+            "50": 0.37783,
+            "51": 0.38158,
+            "52": 0.73901,
+            "53": 0.37684,
+            "54": 0.37707,
+            "55": 0.42405,
+            "56": 0.38184,
+            "57": 0.37936,
+            "58": 0.37539,
+            "59": 0.37591,
+            "60": 0.72267,
+            "61": 0.37815,
+            "62": 0.77277,
+            "63": 0.38815,
+            "64": 0.3807,
+            "65": 0.37848,
+            "66": 0.38143,
+            "67": 0.37999,
+            "68": 0.38158,
+            "69": 0.38427,
+            "70": 0.37479,
+            "71": 0.38252,
+            "72": 0.38036,
+            "73": 0.38116,
+            "74": 0.38336,
+            "75": 0.3771,
+            "76": 0.37876,
+            "77": 0.38102,
+            "78": 0.37864,
+            "79": 0.38095,
+            "80": 0.37954,
+            "81": 0.37575,
+            "82": 0.38084,
+            "83": 0.38192,
+            "84": 0.38267,
+            "85": 0.38765,
+            "86": 0.38467,
+            "87": 0.3817,
+            "88": 0.37395,
+            "89": 0.37751,
+            "90": 0.38076,
+            "91": 0.37565,
+            "92": 0.38237,
+            "93": 0.37738,
+            "94": 0.37726,
+            "95": 0.38237,
+            "96": 0.38018,
+            "97": 0.38525,
+            "98": 0.40815,
+            "99": 0.38117,
+            "100": 0.38201
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1/golden_values_dev_dgx_h100.json
index 3c05fe99417..438130bae1c 100644
--- a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1/golden_values_dev_dgx_h100.json
@@ -2,141 +2,536 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 10.33127,
+            "2": 10.35281,
+            "3": 9.79613,
+            "4": 9.60968,
             "5": 9.42269,
+            "6": 9.45137,
+            "7": 9.34348,
+            "8": 9.27525,
+            "9": 9.09676,
             "10": 9.0722,
+            "11": 8.8835,
+            "12": 8.83711,
+            "13": 8.86836,
+            "14": 8.71039,
             "15": 8.68191,
+            "16": 8.56149,
+            "17": 8.52311,
+            "18": 8.43963,
+            "19": 8.40439,
             "20": 8.29506,
+            "21": 8.27059,
+            "22": 8.17902,
+            "23": 8.12669,
+            "24": 8.14846,
             "25": 7.9909,
+            "26": 8.12216,
+            "27": 7.90453,
+            "28": 7.98655,
+            "29": 7.80845,
             "30": 7.86918,
+            "31": 7.83571,
+            "32": 7.72178,
+            "33": 7.80378,
+            "34": 7.59229,
             "35": 7.68371,
+            "36": 7.53883,
+            "37": 7.47609,
+            "38": 7.5168,
+            "39": 7.49978,
             "40": 7.51704,
+            "41": 7.43174,
+            "42": 7.40104,
+            "43": 7.44926,
+            "44": 7.38919,
             "45": 7.38016,
+            "46": 7.29476,
+            "47": 7.44829,
+            "48": 7.28213,
+            "49": 7.34657,
             "50": 7.17116,
+            "51": 7.37361,
+            "52": 7.13381,
+            "53": 7.11244,
+            "54": 7.23402,
             "55": 7.14785,
+            "56": 7.22775,
+            "57": 7.33273,
+            "58": 6.99461,
+            "59": 7.11599,
             "60": 7.13222,
+            "61": 7.1056,
+            "62": 7.26513,
+            "63": 7.14772,
+            "64": 7.08696,
             "65": 6.98643,
+            "66": 7.04728,
+            "67": 7.04697,
+            "68": 7.14062,
+            "69": 7.2435,
             "70": 7.05957,
+            "71": 6.89356,
+            "72": 6.99769,
+            "73": 6.97897,
+            "74": 6.91983,
             "75": 7.05297,
+            "76": 6.96036,
+            "77": 7.0791,
+            "78": 7.01392,
+            "79": 6.88358,
             "80": 6.93014,
+            "81": 6.96553,
+            "82": 7.05265,
+            "83": 6.98788,
+            "84": 7.00427,
             "85": 6.84577,
+            "86": 7.03621,
+            "87": 6.96327,
+            "88": 6.9137,
+            "89": 6.80631,
             "90": 7.23619,
+            "91": 6.70015,
+            "92": 7.05679,
+            "93": 6.89287,
+            "94": 7.05835,
             "95": 6.84786,
+            "96": 6.96771,
+            "97": 6.94258,
+            "98": 6.87388,
+            "99": 7.01816,
             "100": 6.98466
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 43334.0,
+            "2": 44100.0,
+            "3": 44771.0,
+            "4": 42457.0,
             "5": 45411.0,
+            "6": 40966.0,
+            "7": 43193.0,
+            "8": 45457.0,
+            "9": 42550.0,
             "10": 45360.0,
+            "11": 44029.0,
+            "12": 44605.0,
+            "13": 43917.0,
+            "14": 46219.0,
             "15": 43943.0,
+            "16": 41732.0,
+            "17": 43861.0,
+            "18": 44721.0,
+            "19": 42597.0,
             "20": 44797.0,
+            "21": 44792.0,
+            "22": 41891.0,
+            "23": 45473.0,
+            "24": 43081.0,
             "25": 42682.0,
+            "26": 43950.0,
+            "27": 46253.0,
+            "28": 46447.0,
+            "29": 46164.0,
             "30": 44042.0,
+            "31": 41263.0,
+            "32": 43440.0,
+            "33": 45483.0,
+            "34": 43349.0,
             "35": 43273.0,
+            "36": 42490.0,
+            "37": 40647.0,
+            "38": 42549.0,
+            "39": 44766.0,
             "40": 43281.0,
+            "41": 44669.0,
+            "42": 43287.0,
+            "43": 45454.0,
+            "44": 44627.0,
             "45": 43353.0,
+            "46": 43925.0,
+            "47": 42498.0,
+            "48": 44758.0,
+            "49": 43173.0,
             "50": 43402.0,
+            "51": 41198.0,
+            "52": 43900.0,
+            "53": 43938.0,
+            "54": 41922.0,
             "55": 43916.0,
+            "56": 43237.0,
+            "57": 42634.0,
+            "58": 43916.0,
+            "59": 44616.0,
             "60": 41414.0,
+            "61": 39759.0,
+            "62": 44750.0,
+            "63": 44673.0,
+            "64": 45378.0,
             "65": 44765.0,
+            "66": 45401.0,
+            "67": 43155.0,
+            "68": 42552.0,
+            "69": 43831.0,
             "70": 45546.0,
+            "71": 43332.0,
+            "72": 44847.0,
+            "73": 45376.0,
+            "74": 42503.0,
             "75": 44704.0,
+            "76": 43916.0,
+            "77": 42101.0,
+            "78": 40543.0,
+            "79": 38997.0,
             "80": 41079.0,
+            "81": 45377.0,
+            "82": 43254.0,
+            "83": 38473.0,
+            "84": 42420.0,
             "85": 43989.0,
+            "86": 45694.0,
+            "87": 41164.0,
+            "88": 41773.0,
+            "89": 41047.0,
             "90": 44710.0,
+            "91": 46274.0,
+            "92": 41823.0,
+            "93": 43286.0,
+            "94": 39530.0,
             "95": 44074.0,
+            "96": 44686.0,
+            "97": 45424.0,
+            "98": 41849.0,
+            "99": 45567.0,
             "100": 42485.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 4158515200.0,
+            "2": 4158515200.0,
+            "3": 4158515200.0,
+            "4": 4158515200.0,
             "5": 4158515200.0,
+            "6": 4158515200.0,
+            "7": 4158515200.0,
+            "8": 4158515200.0,
+            "9": 4158515200.0,
             "10": 4158515200.0,
+            "11": 4158515200.0,
+            "12": 4158515200.0,
+            "13": 4158515200.0,
+            "14": 4158515200.0,
             "15": 4158515200.0,
+            "16": 4158515200.0,
+            "17": 4158515200.0,
+            "18": 4158515200.0,
+            "19": 4158515200.0,
             "20": 4158515200.0,
+            "21": 4158515200.0,
+            "22": 4158515200.0,
+            "23": 4158515200.0,
+            "24": 4158515200.0,
             "25": 4158515200.0,
+            "26": 4158515200.0,
+            "27": 4158515200.0,
+            "28": 4158515200.0,
+            "29": 4158515200.0,
             "30": 4158515200.0,
+            "31": 4158515200.0,
+            "32": 4158515200.0,
+            "33": 4158515200.0,
+            "34": 4158515200.0,
             "35": 4158515200.0,
+            "36": 4158515200.0,
+            "37": 4158515200.0,
+            "38": 4158515200.0,
+            "39": 4158515200.0,
             "40": 4158515200.0,
+            "41": 4158515200.0,
+            "42": 4158515200.0,
+            "43": 4158515200.0,
+            "44": 4158515200.0,
             "45": 4158515200.0,
+            "46": 4158515200.0,
+            "47": 4158515200.0,
+            "48": 4158515200.0,
+            "49": 4158515200.0,
             "50": 4158515200.0,
+            "51": 4158515200.0,
+            "52": 4158515200.0,
+            "53": 4158515200.0,
+            "54": 4158515200.0,
             "55": 4158515200.0,
+            "56": 4158515200.0,
+            "57": 4158515200.0,
+            "58": 4158515200.0,
+            "59": 4158515200.0,
             "60": 4158515200.0,
+            "61": 4158515200.0,
+            "62": 4158515200.0,
+            "63": 4158515200.0,
+            "64": 4158515200.0,
             "65": 4158515200.0,
+            "66": 4158515200.0,
+            "67": 4158515200.0,
+            "68": 4158515200.0,
+            "69": 4158515200.0,
             "70": 4158515200.0,
+            "71": 4158515200.0,
+            "72": 4158515200.0,
+            "73": 4158515200.0,
+            "74": 4158515200.0,
             "75": 4158515200.0,
+            "76": 4158515200.0,
+            "77": 4158515200.0,
+            "78": 4158515200.0,
+            "79": 4158515200.0,
             "80": 4158515200.0,
+            "81": 4158515200.0,
+            "82": 4158515200.0,
+            "83": 4158515200.0,
+            "84": 4158515200.0,
             "85": 4158515200.0,
+            "86": 4158515200.0,
+            "87": 4158515200.0,
+            "88": 4158515200.0,
+            "89": 4158515200.0,
             "90": 4158515200.0,
+            "91": 4158515200.0,
+            "92": 4158515200.0,
+            "93": 4158515200.0,
+            "94": 4158515200.0,
             "95": 4158515200.0,
+            "96": 4158515200.0,
+            "97": 4158515200.0,
+            "98": 4158515200.0,
+            "99": 4158515200.0,
             "100": 4158515200.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 4349380608.0,
+            "2": 6185459712.0,
+            "3": 6187556864.0,
+            "4": 6187556864.0,
             "5": 6187556864.0,
+            "6": 6187556864.0,
+            "7": 6187556864.0,
+            "8": 6187556864.0,
+            "9": 6187556864.0,
             "10": 6187556864.0,
+            "11": 6187556864.0,
+            "12": 6187556864.0,
+            "13": 6187556864.0,
+            "14": 6187556864.0,
             "15": 6187556864.0,
+            "16": 6187556864.0,
+            "17": 6187556864.0,
+            "18": 6187556864.0,
+            "19": 6187556864.0,
             "20": 6187556864.0,
+            "21": 6187556864.0,
+            "22": 6187556864.0,
+            "23": 6187556864.0,
+            "24": 6187556864.0,
             "25": 6187556864.0,
+            "26": 6187556864.0,
+            "27": 6187556864.0,
+            "28": 6187556864.0,
+            "29": 6187556864.0,
             "30": 6187556864.0,
+            "31": 6187556864.0,
+            "32": 6187556864.0,
+            "33": 6187556864.0,
+            "34": 6187556864.0,
             "35": 6187556864.0,
+            "36": 6187556864.0,
+            "37": 6187556864.0,
+            "38": 6187556864.0,
+            "39": 6187556864.0,
             "40": 6187556864.0,
+            "41": 6187556864.0,
+            "42": 6187556864.0,
+            "43": 6187556864.0,
+            "44": 6187556864.0,
             "45": 6187556864.0,
+            "46": 6187556864.0,
+            "47": 6187556864.0,
+            "48": 6187556864.0,
+            "49": 6187556864.0,
             "50": 6187556864.0,
+            "51": 6187556864.0,
+            "52": 6187556864.0,
+            "53": 6187556864.0,
+            "54": 6187556864.0,
             "55": 6187556864.0,
+            "56": 6187556864.0,
+            "57": 6187556864.0,
+            "58": 6187556864.0,
+            "59": 6187556864.0,
             "60": 6187556864.0,
+            "61": 6187556864.0,
+            "62": 6187556864.0,
+            "63": 6187556864.0,
+            "64": 6187556864.0,
             "65": 6187556864.0,
+            "66": 6187556864.0,
+            "67": 6187556864.0,
+            "68": 6187556864.0,
+            "69": 6187556864.0,
             "70": 6187556864.0,
+            "71": 6187556864.0,
+            "72": 6187556864.0,
+            "73": 6187556864.0,
+            "74": 6187556864.0,
             "75": 6187556864.0,
+            "76": 6187556864.0,
+            "77": 6187556864.0,
+            "78": 6187556864.0,
+            "79": 6187556864.0,
             "80": 6187556864.0,
+            "81": 6187556864.0,
+            "82": 6187556864.0,
+            "83": 6187556864.0,
+            "84": 6187556864.0,
             "85": 6187556864.0,
+            "86": 6187556864.0,
+            "87": 6187556864.0,
+            "88": 6187556864.0,
+            "89": 6187556864.0,
             "90": 6187556864.0,
+            "91": 6187556864.0,
+            "92": 6187556864.0,
+            "93": 6187556864.0,
+            "94": 6187556864.0,
             "95": 6187556864.0,
+            "96": 6187556864.0,
+            "97": 6187556864.0,
+            "98": 6187556864.0,
+            "99": 6187556864.0,
             "100": 6187556864.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 5.88206,
-            "5": 0.14455,
-            "10": 0.1392,
-            "15": 0.14565,
-            "20": 0.1396,
-            "25": 0.13933,
-            "30": 0.13875,
-            "35": 0.14498,
-            "40": 0.13976,
-            "45": 0.14331,
-            "50": 0.14852,
-            "55": 0.13993,
-            "60": 0.1429,
-            "65": 0.14345,
-            "70": 0.17591,
-            "75": 0.14145,
-            "80": 0.14297,
-            "85": 0.14009,
-            "90": 0.14121,
-            "95": 0.13997,
-            "100": 0.14256
+            "1": 7.07395,
+            "2": 0.19501,
+            "3": 0.16284,
+            "4": 0.15592,
+            "5": 0.16485,
+            "6": 0.15452,
+            "7": 0.1627,
+            "8": 0.15835,
+            "9": 0.15975,
+            "10": 0.15881,
+            "11": 0.16294,
+            "12": 0.15929,
+            "13": 0.16216,
+            "14": 0.15673,
+            "15": 0.16042,
+            "16": 0.15452,
+            "17": 0.16802,
+            "18": 0.15623,
+            "19": 0.16501,
+            "20": 0.15961,
+            "21": 0.16269,
+            "22": 0.15556,
+            "23": 0.16412,
+            "24": 0.1564,
+            "25": 0.1614,
+            "26": 0.15776,
+            "27": 0.16056,
+            "28": 0.16086,
+            "29": 0.16026,
+            "30": 0.15782,
+            "31": 0.1619,
+            "32": 0.1567,
+            "33": 0.16353,
+            "34": 0.1553,
+            "35": 0.16202,
+            "36": 0.15695,
+            "37": 0.16347,
+            "38": 0.15703,
+            "39": 0.1638,
+            "40": 0.1549,
+            "41": 0.15808,
+            "42": 0.1603,
+            "43": 0.15931,
+            "44": 0.15772,
+            "45": 0.16421,
+            "46": 0.15573,
+            "47": 0.16133,
+            "48": 0.1567,
+            "49": 0.16354,
+            "50": 0.15698,
+            "51": 0.15998,
+            "52": 0.15347,
+            "53": 0.16223,
+            "54": 0.1565,
+            "55": 0.16429,
+            "56": 0.15654,
+            "57": 0.16548,
+            "58": 0.15761,
+            "59": 0.16437,
+            "60": 0.15677,
+            "61": 0.16238,
+            "62": 0.15845,
+            "63": 0.16393,
+            "64": 0.16321,
+            "65": 0.16208,
+            "66": 0.15975,
+            "67": 0.16831,
+            "68": 0.15965,
+            "69": 0.16375,
+            "70": 0.16321,
+            "71": 0.17306,
+            "72": 0.15973,
+            "73": 0.16591,
+            "74": 0.1637,
+            "75": 0.16984,
+            "76": 0.16123,
+            "77": 0.17281,
+            "78": 0.16826,
+            "79": 0.17136,
+            "80": 0.16673,
+            "81": 0.16135,
+            "82": 0.16815,
+            "83": 0.20097,
+            "84": 0.19663,
+            "85": 0.16475,
+            "86": 0.16782,
+            "87": 0.16163,
+            "88": 0.16356,
+            "89": 0.16018,
+            "90": 0.16416,
+            "91": 0.15961,
+            "92": 0.16129,
+            "93": 0.15562,
+            "94": 0.1646,
+            "95": 0.15685,
+            "96": 0.16321,
+            "97": 0.15621,
+            "98": 0.16585,
+            "99": 0.15667,
+            "100": 0.17074
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1/golden_values_dev_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1/golden_values_dev_dgxa100_dracooci-ord.json
new file mode 100644
index 00000000000..cefa267841e
--- /dev/null
+++ b/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1/golden_values_dev_dgxa100_dracooci-ord.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.34494,
+            "2": 10.36431,
+            "3": 9.73158,
+            "4": 9.57928,
+            "5": 9.38931,
+            "6": 9.41074,
+            "7": 9.30545,
+            "8": 9.24868,
+            "9": 9.09349,
+            "10": 9.01569,
+            "11": 8.86286,
+            "12": 8.79096,
+            "13": 8.80892,
+            "14": 8.67669,
+            "15": 8.64631,
+            "16": 8.5398,
+            "17": 8.47895,
+            "18": 8.38945,
+            "19": 8.36156,
+            "20": 8.26966,
+            "21": 8.26333,
+            "22": 8.15066,
+            "23": 8.08893,
+            "24": 8.12421,
+            "25": 7.99493,
+            "26": 8.08494,
+            "27": 7.87755,
+            "28": 7.95863,
+            "29": 7.79585,
+            "30": 7.87492,
+            "31": 7.83245,
+            "32": 7.69489,
+            "33": 7.78469,
+            "34": 7.55767,
+            "35": 7.65834,
+            "36": 7.52881,
+            "37": 7.44912,
+            "38": 7.50398,
+            "39": 7.48056,
+            "40": 7.50302,
+            "41": 7.39767,
+            "42": 7.37206,
+            "43": 7.44301,
+            "44": 7.3811,
+            "45": 7.36143,
+            "46": 7.29415,
+            "47": 7.47498,
+            "48": 7.29564,
+            "49": 7.36092,
+            "50": 7.19205,
+            "51": 7.38769,
+            "52": 7.13773,
+            "53": 7.125,
+            "54": 7.23668,
+            "55": 7.16852,
+            "56": 7.22884,
+            "57": 7.34699,
+            "58": 7.03128,
+            "59": 7.1229,
+            "60": 7.16587,
+            "61": 7.1174,
+            "62": 7.26837,
+            "63": 7.16759,
+            "64": 7.08376,
+            "65": 7.00099,
+            "66": 7.07203,
+            "67": 7.05971,
+            "68": 7.14618,
+            "69": 7.03944,
+            "70": 7.07162,
+            "71": 6.91653,
+            "72": 7.02025,
+            "73": 6.9904,
+            "74": 6.9146,
+            "75": 7.07611,
+            "76": 6.97098,
+            "77": 7.08446,
+            "78": 7.03608,
+            "79": 6.88325,
+            "80": 6.95251,
+            "81": 6.985,
+            "82": 7.06843,
+            "83": 7.00882,
+            "84": 7.0181,
+            "85": 6.8641,
+            "86": 7.04979,
+            "87": 6.99342,
+            "88": 6.9238,
+            "89": 6.82406,
+            "90": 7.25457,
+            "91": 6.7226,
+            "92": 7.05372,
+            "93": 6.91688,
+            "94": 7.066,
+            "95": 6.8601,
+            "96": 6.98742,
+            "97": 6.96796,
+            "98": 6.89964,
+            "99": 7.02766,
+            "100": 6.99745
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 43305.0,
+            "2": 44091.0,
+            "3": 44794.0,
+            "4": 42436.0,
+            "5": 45413.0,
+            "6": 40989.0,
+            "7": 43195.0,
+            "8": 45462.0,
+            "9": 42551.0,
+            "10": 45379.0,
+            "11": 44016.0,
+            "12": 44629.0,
+            "13": 43937.0,
+            "14": 46250.0,
+            "15": 43956.0,
+            "16": 41728.0,
+            "17": 43873.0,
+            "18": 44716.0,
+            "19": 42648.0,
+            "20": 44818.0,
+            "21": 44812.0,
+            "22": 41883.0,
+            "23": 45468.0,
+            "24": 43112.0,
+            "25": 42745.0,
+            "26": 43949.0,
+            "27": 46268.0,
+            "28": 46429.0,
+            "29": 46199.0,
+            "30": 44042.0,
+            "31": 41264.0,
+            "32": 43413.0,
+            "33": 45478.0,
+            "34": 43375.0,
+            "35": 43297.0,
+            "36": 42545.0,
+            "37": 40689.0,
+            "38": 42575.0,
+            "39": 44772.0,
+            "40": 43251.0,
+            "41": 44707.0,
+            "42": 43261.0,
+            "43": 45506.0,
+            "44": 44652.0,
+            "45": 43345.0,
+            "46": 43935.0,
+            "47": 42506.0,
+            "48": 44693.0,
+            "49": 43200.0,
+            "50": 43415.0,
+            "51": 41174.0,
+            "52": 43885.0,
+            "53": 43959.0,
+            "54": 41961.0,
+            "55": 43960.0,
+            "56": 43269.0,
+            "57": 42561.0,
+            "58": 43898.0,
+            "59": 44654.0,
+            "60": 41326.0,
+            "61": 39744.0,
+            "62": 44774.0,
+            "63": 44682.0,
+            "64": 45396.0,
+            "65": 44730.0,
+            "66": 45388.0,
+            "67": 43196.0,
+            "68": 42556.0,
+            "69": 43825.0,
+            "70": 45543.0,
+            "71": 43407.0,
+            "72": 44832.0,
+            "73": 45412.0,
+            "74": 42502.0,
+            "75": 44684.0,
+            "76": 43926.0,
+            "77": 42100.0,
+            "78": 40525.0,
+            "79": 38954.0,
+            "80": 41118.0,
+            "81": 45412.0,
+            "82": 43238.0,
+            "83": 38495.0,
+            "84": 42524.0,
+            "85": 44024.0,
+            "86": 45749.0,
+            "87": 41116.0,
+            "88": 41798.0,
+            "89": 41078.0,
+            "90": 44744.0,
+            "91": 46266.0,
+            "92": 41865.0,
+            "93": 43254.0,
+            "94": 39588.0,
+            "95": 44092.0,
+            "96": 44732.0,
+            "97": 45474.0,
+            "98": 41859.0,
+            "99": 45537.0,
+            "100": 42500.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 4158515200.0,
+            "2": 4158515200.0,
+            "3": 4158515200.0,
+            "4": 4158515200.0,
+            "5": 4158515200.0,
+            "6": 4158515200.0,
+            "7": 4158515200.0,
+            "8": 4158515200.0,
+            "9": 4158515200.0,
+            "10": 4158515200.0,
+            "11": 4158515200.0,
+            "12": 4158515200.0,
+            "13": 4158515200.0,
+            "14": 4158515200.0,
+            "15": 4158515200.0,
+            "16": 4158515200.0,
+            "17": 4158515200.0,
+            "18": 4158515200.0,
+            "19": 4158515200.0,
+            "20": 4158515200.0,
+            "21": 4158515200.0,
+            "22": 4158515200.0,
+            "23": 4158515200.0,
+            "24": 4158515200.0,
+            "25": 4158515200.0,
+            "26": 4158515200.0,
+            "27": 4158515200.0,
+            "28": 4158515200.0,
+            "29": 4158515200.0,
+            "30": 4158515200.0,
+            "31": 4158515200.0,
+            "32": 4158515200.0,
+            "33": 4158515200.0,
+            "34": 4158515200.0,
+            "35": 4158515200.0,
+            "36": 4158515200.0,
+            "37": 4158515200.0,
+            "38": 4158515200.0,
+            "39": 4158515200.0,
+            "40": 4158515200.0,
+            "41": 4158515200.0,
+            "42": 4158515200.0,
+            "43": 4158515200.0,
+            "44": 4158515200.0,
+            "45": 4158515200.0,
+            "46": 4158515200.0,
+            "47": 4158515200.0,
+            "48": 4158515200.0,
+            "49": 4158515200.0,
+            "50": 4158515200.0,
+            "51": 4158515200.0,
+            "52": 4158515200.0,
+            "53": 4158515200.0,
+            "54": 4158515200.0,
+            "55": 4158515200.0,
+            "56": 4158515200.0,
+            "57": 4158515200.0,
+            "58": 4158515200.0,
+            "59": 4158515200.0,
+            "60": 4158515200.0,
+            "61": 4158515200.0,
+            "62": 4158515200.0,
+            "63": 4158515200.0,
+            "64": 4158515200.0,
+            "65": 4158515200.0,
+            "66": 4158515200.0,
+            "67": 4158515200.0,
+            "68": 4158515200.0,
+            "69": 4158515200.0,
+            "70": 4158515200.0,
+            "71": 4158515200.0,
+            "72": 4158515200.0,
+            "73": 4158515200.0,
+            "74": 4158515200.0,
+            "75": 4158515200.0,
+            "76": 4158515200.0,
+            "77": 4158515200.0,
+            "78": 4158515200.0,
+            "79": 4158515200.0,
+            "80": 4158515200.0,
+            "81": 4158515200.0,
+            "82": 4158515200.0,
+            "83": 4158515200.0,
+            "84": 4158515200.0,
+            "85": 4158515200.0,
+            "86": 4158515200.0,
+            "87": 4158515200.0,
+            "88": 4158515200.0,
+            "89": 4158515200.0,
+            "90": 4158515200.0,
+            "91": 4158515200.0,
+            "92": 4158515200.0,
+            "93": 4158515200.0,
+            "94": 4158515200.0,
+            "95": 4158515200.0,
+            "96": 4158515200.0,
+            "97": 4158515200.0,
+            "98": 4158515200.0,
+            "99": 4158515200.0,
+            "100": 4158515200.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 4349380608.0,
+            "2": 6185459712.0,
+            "3": 6187556864.0,
+            "4": 6187556864.0,
+            "5": 6187556864.0,
+            "6": 6187556864.0,
+            "7": 6187556864.0,
+            "8": 6187556864.0,
+            "9": 6187556864.0,
+            "10": 6187556864.0,
+            "11": 6187556864.0,
+            "12": 6187556864.0,
+            "13": 6187556864.0,
+            "14": 6187556864.0,
+            "15": 6187556864.0,
+            "16": 6187556864.0,
+            "17": 6187556864.0,
+            "18": 6187556864.0,
+            "19": 6187556864.0,
+            "20": 6187556864.0,
+            "21": 6187556864.0,
+            "22": 6187556864.0,
+            "23": 6187556864.0,
+            "24": 6187556864.0,
+            "25": 6187556864.0,
+            "26": 6187556864.0,
+            "27": 6187556864.0,
+            "28": 6187556864.0,
+            "29": 6187556864.0,
+            "30": 6187556864.0,
+            "31": 6187556864.0,
+            "32": 6187556864.0,
+            "33": 6187556864.0,
+            "34": 6187556864.0,
+            "35": 6187556864.0,
+            "36": 6187556864.0,
+            "37": 6187556864.0,
+            "38": 6187556864.0,
+            "39": 6187556864.0,
+            "40": 6187556864.0,
+            "41": 6187556864.0,
+            "42": 6187556864.0,
+            "43": 6187556864.0,
+            "44": 6187556864.0,
+            "45": 6187556864.0,
+            "46": 6187556864.0,
+            "47": 6187556864.0,
+            "48": 6187556864.0,
+            "49": 6187556864.0,
+            "50": 6187556864.0,
+            "51": 6187556864.0,
+            "52": 6187556864.0,
+            "53": 6187556864.0,
+            "54": 6187556864.0,
+            "55": 6187556864.0,
+            "56": 6187556864.0,
+            "57": 6187556864.0,
+            "58": 6187556864.0,
+            "59": 6187556864.0,
+            "60": 6187556864.0,
+            "61": 6187556864.0,
+            "62": 6187556864.0,
+            "63": 6187556864.0,
+            "64": 6187556864.0,
+            "65": 6187556864.0,
+            "66": 6187556864.0,
+            "67": 6187556864.0,
+            "68": 6187556864.0,
+            "69": 6187556864.0,
+            "70": 6187556864.0,
+            "71": 6187556864.0,
+            "72": 6187556864.0,
+            "73": 6187556864.0,
+            "74": 6187556864.0,
+            "75": 6187556864.0,
+            "76": 6187556864.0,
+            "77": 6187556864.0,
+            "78": 6187556864.0,
+            "79": 6187556864.0,
+            "80": 6187556864.0,
+            "81": 6187556864.0,
+            "82": 6187556864.0,
+            "83": 6187556864.0,
+            "84": 6187556864.0,
+            "85": 6187556864.0,
+            "86": 6187556864.0,
+            "87": 6187556864.0,
+            "88": 6187556864.0,
+            "89": 6187556864.0,
+            "90": 6187556864.0,
+            "91": 6187556864.0,
+            "92": 6187556864.0,
+            "93": 6187556864.0,
+            "94": 6187556864.0,
+            "95": 6187556864.0,
+            "96": 6187556864.0,
+            "97": 6187556864.0,
+            "98": 6187556864.0,
+            "99": 6187556864.0,
+            "100": 6187556864.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 6.07631,
+            "2": 0.42115,
+            "3": 0.24529,
+            "4": 0.23719,
+            "5": 0.2516,
+            "6": 0.2477,
+            "7": 0.2382,
+            "8": 0.23994,
+            "9": 0.26017,
+            "10": 0.27742,
+            "11": 0.24722,
+            "12": 0.243,
+            "13": 0.23789,
+            "14": 0.24255,
+            "15": 0.24011,
+            "16": 0.23679,
+            "17": 0.24823,
+            "18": 0.24785,
+            "19": 0.2488,
+            "20": 0.24836,
+            "21": 0.25124,
+            "22": 0.26347,
+            "23": 0.25688,
+            "24": 0.25176,
+            "25": 0.25034,
+            "26": 0.24652,
+            "27": 0.25028,
+            "28": 0.24542,
+            "29": 0.24835,
+            "30": 0.25164,
+            "31": 0.24716,
+            "32": 0.23244,
+            "33": 0.24002,
+            "34": 0.23187,
+            "35": 0.2359,
+            "36": 0.23168,
+            "37": 0.23963,
+            "38": 0.23232,
+            "39": 0.23677,
+            "40": 0.23188,
+            "41": 0.23971,
+            "42": 0.23201,
+            "43": 0.24022,
+            "44": 0.2318,
+            "45": 0.24134,
+            "46": 0.23272,
+            "47": 0.24039,
+            "48": 0.23386,
+            "49": 0.23758,
+            "50": 0.23159,
+            "51": 0.25559,
+            "52": 0.28119,
+            "53": 0.27021,
+            "54": 0.24392,
+            "55": 0.23902,
+            "56": 0.23405,
+            "57": 0.24193,
+            "58": 0.23238,
+            "59": 0.2443,
+            "60": 0.232,
+            "61": 0.2448,
+            "62": 0.23419,
+            "63": 0.24179,
+            "64": 0.23763,
+            "65": 0.24278,
+            "66": 0.23814,
+            "67": 0.23636,
+            "68": 0.23943,
+            "69": 0.23382,
+            "70": 0.23642,
+            "71": 0.23981,
+            "72": 0.23228,
+            "73": 0.23188,
+            "74": 0.23232,
+            "75": 0.23217,
+            "76": 0.2324,
+            "77": 0.23204,
+            "78": 0.23241,
+            "79": 0.23249,
+            "80": 0.23152,
+            "81": 0.23163,
+            "82": 0.23217,
+            "83": 0.23187,
+            "84": 0.23224,
+            "85": 0.23215,
+            "86": 0.23155,
+            "87": 0.23144,
+            "88": 0.23215,
+            "89": 0.23207,
+            "90": 0.23116,
+            "91": 0.23213,
+            "92": 0.23203,
+            "93": 0.23167,
+            "94": 0.23097,
+            "95": 0.23272,
+            "96": 0.23147,
+            "97": 0.23203,
+            "98": 0.23135,
+            "99": 0.23167,
+            "100": 0.23206
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1/golden_values_dev_dgxa100_dracooci.json b/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1/golden_values_dev_dgxa100_dracooci.json
new file mode 100644
index 00000000000..10ef1405966
--- /dev/null
+++ b/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1/golden_values_dev_dgxa100_dracooci.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.34494,
+            "2": 10.36431,
+            "3": 9.73158,
+            "4": 9.57928,
+            "5": 9.38931,
+            "6": 9.41074,
+            "7": 9.30545,
+            "8": 9.24868,
+            "9": 9.09349,
+            "10": 9.01569,
+            "11": 8.86286,
+            "12": 8.79096,
+            "13": 8.80892,
+            "14": 8.67669,
+            "15": 8.64631,
+            "16": 8.5398,
+            "17": 8.47895,
+            "18": 8.38945,
+            "19": 8.36156,
+            "20": 8.26966,
+            "21": 8.26333,
+            "22": 8.15066,
+            "23": 8.08893,
+            "24": 8.12421,
+            "25": 7.99493,
+            "26": 8.08494,
+            "27": 7.87755,
+            "28": 7.95863,
+            "29": 7.79585,
+            "30": 7.87492,
+            "31": 7.83245,
+            "32": 7.69489,
+            "33": 7.78469,
+            "34": 7.55767,
+            "35": 7.65834,
+            "36": 7.52881,
+            "37": 7.44912,
+            "38": 7.50398,
+            "39": 7.48056,
+            "40": 7.50302,
+            "41": 7.39767,
+            "42": 7.37206,
+            "43": 7.44301,
+            "44": 7.3811,
+            "45": 7.36143,
+            "46": 7.29415,
+            "47": 7.47498,
+            "48": 7.29564,
+            "49": 7.36092,
+            "50": 7.19205,
+            "51": 7.38769,
+            "52": 7.13773,
+            "53": 7.125,
+            "54": 7.23668,
+            "55": 7.16852,
+            "56": 7.22884,
+            "57": 7.34699,
+            "58": 7.03128,
+            "59": 7.1229,
+            "60": 7.16587,
+            "61": 7.1174,
+            "62": 7.26837,
+            "63": 7.16759,
+            "64": 7.08376,
+            "65": 7.00099,
+            "66": 7.07203,
+            "67": 7.05971,
+            "68": 7.14618,
+            "69": 7.03944,
+            "70": 7.07162,
+            "71": 6.91653,
+            "72": 7.02025,
+            "73": 6.9904,
+            "74": 6.9146,
+            "75": 7.07611,
+            "76": 6.97098,
+            "77": 7.08446,
+            "78": 7.03608,
+            "79": 6.88325,
+            "80": 6.95251,
+            "81": 6.985,
+            "82": 7.06843,
+            "83": 7.00882,
+            "84": 7.0181,
+            "85": 6.8641,
+            "86": 7.04979,
+            "87": 6.99342,
+            "88": 6.9238,
+            "89": 6.82406,
+            "90": 7.25457,
+            "91": 6.7226,
+            "92": 7.05372,
+            "93": 6.91688,
+            "94": 7.066,
+            "95": 6.8601,
+            "96": 6.98742,
+            "97": 6.96796,
+            "98": 6.89964,
+            "99": 7.02766,
+            "100": 6.99745
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 43305.0,
+            "2": 44091.0,
+            "3": 44794.0,
+            "4": 42436.0,
+            "5": 45413.0,
+            "6": 40989.0,
+            "7": 43195.0,
+            "8": 45462.0,
+            "9": 42551.0,
+            "10": 45379.0,
+            "11": 44016.0,
+            "12": 44629.0,
+            "13": 43937.0,
+            "14": 46250.0,
+            "15": 43956.0,
+            "16": 41728.0,
+            "17": 43873.0,
+            "18": 44716.0,
+            "19": 42648.0,
+            "20": 44818.0,
+            "21": 44812.0,
+            "22": 41883.0,
+            "23": 45468.0,
+            "24": 43112.0,
+            "25": 42745.0,
+            "26": 43949.0,
+            "27": 46268.0,
+            "28": 46429.0,
+            "29": 46199.0,
+            "30": 44042.0,
+            "31": 41264.0,
+            "32": 43413.0,
+            "33": 45478.0,
+            "34": 43375.0,
+            "35": 43297.0,
+            "36": 42545.0,
+            "37": 40689.0,
+            "38": 42575.0,
+            "39": 44772.0,
+            "40": 43251.0,
+            "41": 44707.0,
+            "42": 43261.0,
+            "43": 45506.0,
+            "44": 44652.0,
+            "45": 43345.0,
+            "46": 43935.0,
+            "47": 42506.0,
+            "48": 44693.0,
+            "49": 43200.0,
+            "50": 43415.0,
+            "51": 41174.0,
+            "52": 43885.0,
+            "53": 43959.0,
+            "54": 41961.0,
+            "55": 43960.0,
+            "56": 43269.0,
+            "57": 42561.0,
+            "58": 43898.0,
+            "59": 44654.0,
+            "60": 41326.0,
+            "61": 39744.0,
+            "62": 44774.0,
+            "63": 44682.0,
+            "64": 45396.0,
+            "65": 44730.0,
+            "66": 45388.0,
+            "67": 43196.0,
+            "68": 42556.0,
+            "69": 43825.0,
+            "70": 45543.0,
+            "71": 43407.0,
+            "72": 44832.0,
+            "73": 45412.0,
+            "74": 42502.0,
+            "75": 44684.0,
+            "76": 43926.0,
+            "77": 42100.0,
+            "78": 40525.0,
+            "79": 38954.0,
+            "80": 41118.0,
+            "81": 45412.0,
+            "82": 43238.0,
+            "83": 38495.0,
+            "84": 42524.0,
+            "85": 44024.0,
+            "86": 45749.0,
+            "87": 41116.0,
+            "88": 41798.0,
+            "89": 41078.0,
+            "90": 44744.0,
+            "91": 46266.0,
+            "92": 41865.0,
+            "93": 43254.0,
+            "94": 39588.0,
+            "95": 44092.0,
+            "96": 44732.0,
+            "97": 45474.0,
+            "98": 41859.0,
+            "99": 45537.0,
+            "100": 42500.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 4158515200.0,
+            "2": 4158515200.0,
+            "3": 4158515200.0,
+            "4": 4158515200.0,
+            "5": 4158515200.0,
+            "6": 4158515200.0,
+            "7": 4158515200.0,
+            "8": 4158515200.0,
+            "9": 4158515200.0,
+            "10": 4158515200.0,
+            "11": 4158515200.0,
+            "12": 4158515200.0,
+            "13": 4158515200.0,
+            "14": 4158515200.0,
+            "15": 4158515200.0,
+            "16": 4158515200.0,
+            "17": 4158515200.0,
+            "18": 4158515200.0,
+            "19": 4158515200.0,
+            "20": 4158515200.0,
+            "21": 4158515200.0,
+            "22": 4158515200.0,
+            "23": 4158515200.0,
+            "24": 4158515200.0,
+            "25": 4158515200.0,
+            "26": 4158515200.0,
+            "27": 4158515200.0,
+            "28": 4158515200.0,
+            "29": 4158515200.0,
+            "30": 4158515200.0,
+            "31": 4158515200.0,
+            "32": 4158515200.0,
+            "33": 4158515200.0,
+            "34": 4158515200.0,
+            "35": 4158515200.0,
+            "36": 4158515200.0,
+            "37": 4158515200.0,
+            "38": 4158515200.0,
+            "39": 4158515200.0,
+            "40": 4158515200.0,
+            "41": 4158515200.0,
+            "42": 4158515200.0,
+            "43": 4158515200.0,
+            "44": 4158515200.0,
+            "45": 4158515200.0,
+            "46": 4158515200.0,
+            "47": 4158515200.0,
+            "48": 4158515200.0,
+            "49": 4158515200.0,
+            "50": 4158515200.0,
+            "51": 4158515200.0,
+            "52": 4158515200.0,
+            "53": 4158515200.0,
+            "54": 4158515200.0,
+            "55": 4158515200.0,
+            "56": 4158515200.0,
+            "57": 4158515200.0,
+            "58": 4158515200.0,
+            "59": 4158515200.0,
+            "60": 4158515200.0,
+            "61": 4158515200.0,
+            "62": 4158515200.0,
+            "63": 4158515200.0,
+            "64": 4158515200.0,
+            "65": 4158515200.0,
+            "66": 4158515200.0,
+            "67": 4158515200.0,
+            "68": 4158515200.0,
+            "69": 4158515200.0,
+            "70": 4158515200.0,
+            "71": 4158515200.0,
+            "72": 4158515200.0,
+            "73": 4158515200.0,
+            "74": 4158515200.0,
+            "75": 4158515200.0,
+            "76": 4158515200.0,
+            "77": 4158515200.0,
+            "78": 4158515200.0,
+            "79": 4158515200.0,
+            "80": 4158515200.0,
+            "81": 4158515200.0,
+            "82": 4158515200.0,
+            "83": 4158515200.0,
+            "84": 4158515200.0,
+            "85": 4158515200.0,
+            "86": 4158515200.0,
+            "87": 4158515200.0,
+            "88": 4158515200.0,
+            "89": 4158515200.0,
+            "90": 4158515200.0,
+            "91": 4158515200.0,
+            "92": 4158515200.0,
+            "93": 4158515200.0,
+            "94": 4158515200.0,
+            "95": 4158515200.0,
+            "96": 4158515200.0,
+            "97": 4158515200.0,
+            "98": 4158515200.0,
+            "99": 4158515200.0,
+            "100": 4158515200.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 4349380608.0,
+            "2": 6185459712.0,
+            "3": 6187556864.0,
+            "4": 6187556864.0,
+            "5": 6187556864.0,
+            "6": 6187556864.0,
+            "7": 6187556864.0,
+            "8": 6187556864.0,
+            "9": 6187556864.0,
+            "10": 6187556864.0,
+            "11": 6187556864.0,
+            "12": 6187556864.0,
+            "13": 6187556864.0,
+            "14": 6187556864.0,
+            "15": 6187556864.0,
+            "16": 6187556864.0,
+            "17": 6187556864.0,
+            "18": 6187556864.0,
+            "19": 6187556864.0,
+            "20": 6187556864.0,
+            "21": 6187556864.0,
+            "22": 6187556864.0,
+            "23": 6187556864.0,
+            "24": 6187556864.0,
+            "25": 6187556864.0,
+            "26": 6187556864.0,
+            "27": 6187556864.0,
+            "28": 6187556864.0,
+            "29": 6187556864.0,
+            "30": 6187556864.0,
+            "31": 6187556864.0,
+            "32": 6187556864.0,
+            "33": 6187556864.0,
+            "34": 6187556864.0,
+            "35": 6187556864.0,
+            "36": 6187556864.0,
+            "37": 6187556864.0,
+            "38": 6187556864.0,
+            "39": 6187556864.0,
+            "40": 6187556864.0,
+            "41": 6187556864.0,
+            "42": 6187556864.0,
+            "43": 6187556864.0,
+            "44": 6187556864.0,
+            "45": 6187556864.0,
+            "46": 6187556864.0,
+            "47": 6187556864.0,
+            "48": 6187556864.0,
+            "49": 6187556864.0,
+            "50": 6187556864.0,
+            "51": 6187556864.0,
+            "52": 6187556864.0,
+            "53": 6187556864.0,
+            "54": 6187556864.0,
+            "55": 6187556864.0,
+            "56": 6187556864.0,
+            "57": 6187556864.0,
+            "58": 6187556864.0,
+            "59": 6187556864.0,
+            "60": 6187556864.0,
+            "61": 6187556864.0,
+            "62": 6187556864.0,
+            "63": 6187556864.0,
+            "64": 6187556864.0,
+            "65": 6187556864.0,
+            "66": 6187556864.0,
+            "67": 6187556864.0,
+            "68": 6187556864.0,
+            "69": 6187556864.0,
+            "70": 6187556864.0,
+            "71": 6187556864.0,
+            "72": 6187556864.0,
+            "73": 6187556864.0,
+            "74": 6187556864.0,
+            "75": 6187556864.0,
+            "76": 6187556864.0,
+            "77": 6187556864.0,
+            "78": 6187556864.0,
+            "79": 6187556864.0,
+            "80": 6187556864.0,
+            "81": 6187556864.0,
+            "82": 6187556864.0,
+            "83": 6187556864.0,
+            "84": 6187556864.0,
+            "85": 6187556864.0,
+            "86": 6187556864.0,
+            "87": 6187556864.0,
+            "88": 6187556864.0,
+            "89": 6187556864.0,
+            "90": 6187556864.0,
+            "91": 6187556864.0,
+            "92": 6187556864.0,
+            "93": 6187556864.0,
+            "94": 6187556864.0,
+            "95": 6187556864.0,
+            "96": 6187556864.0,
+            "97": 6187556864.0,
+            "98": 6187556864.0,
+            "99": 6187556864.0,
+            "100": 6187556864.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 8.56097,
+            "2": 0.5665,
+            "3": 0.23157,
+            "4": 0.23387,
+            "5": 0.24864,
+            "6": 0.23399,
+            "7": 0.23692,
+            "8": 0.23082,
+            "9": 0.23218,
+            "10": 0.23429,
+            "11": 0.22503,
+            "12": 0.23455,
+            "13": 0.22526,
+            "14": 0.23323,
+            "15": 0.23735,
+            "16": 0.236,
+            "17": 0.22678,
+            "18": 0.23575,
+            "19": 0.22315,
+            "20": 0.2333,
+            "21": 0.22422,
+            "22": 0.22407,
+            "23": 0.2339,
+            "24": 0.22414,
+            "25": 0.22406,
+            "26": 0.23317,
+            "27": 0.22305,
+            "28": 0.22383,
+            "29": 0.23323,
+            "30": 0.224,
+            "31": 0.22377,
+            "32": 0.22673,
+            "33": 0.23037,
+            "34": 0.22469,
+            "35": 0.22408,
+            "36": 0.22989,
+            "37": 0.2238,
+            "38": 0.22507,
+            "39": 0.22859,
+            "40": 0.24027,
+            "41": 0.23144,
+            "42": 0.23374,
+            "43": 0.22475,
+            "44": 0.22417,
+            "45": 0.23296,
+            "46": 0.22427,
+            "47": 0.22489,
+            "48": 0.23424,
+            "49": 0.22498,
+            "50": 0.22454,
+            "51": 0.23236,
+            "52": 0.22777,
+            "53": 0.22625,
+            "54": 0.23366,
+            "55": 0.22841,
+            "56": 0.23206,
+            "57": 0.23467,
+            "58": 0.2277,
+            "59": 0.23045,
+            "60": 0.23628,
+            "61": 0.22728,
+            "62": 0.22507,
+            "63": 0.23342,
+            "64": 0.22668,
+            "65": 0.22514,
+            "66": 0.23559,
+            "67": 0.2309,
+            "68": 0.25201,
+            "69": 0.23266,
+            "70": 0.2274,
+            "71": 0.23936,
+            "72": 0.23585,
+            "73": 0.24105,
+            "74": 0.23426,
+            "75": 0.23113,
+            "76": 0.23658,
+            "77": 0.22773,
+            "78": 0.22825,
+            "79": 0.23279,
+            "80": 0.22595,
+            "81": 0.22568,
+            "82": 0.22609,
+            "83": 0.22518,
+            "84": 0.22622,
+            "85": 0.2284,
+            "86": 0.22625,
+            "87": 0.22909,
+            "88": 0.22703,
+            "89": 0.22595,
+            "90": 0.6034,
+            "91": 0.22715,
+            "92": 0.22553,
+            "93": 0.22635,
+            "94": 0.22592,
+            "95": 0.22566,
+            "96": 0.22563,
+            "97": 0.22615,
+            "98": 0.22511,
+            "99": 0.23442,
+            "100": 0.22512
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..a044dd0e135
--- /dev/null
+++ b/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.33127,
+            "2": 10.35281,
+            "3": 9.79613,
+            "4": 9.60968,
+            "5": 9.42269,
+            "6": 9.45137,
+            "7": 9.34348,
+            "8": 9.27525,
+            "9": 9.09676,
+            "10": 9.0722,
+            "11": 8.8835,
+            "12": 8.83711,
+            "13": 8.86836,
+            "14": 8.71039,
+            "15": 8.68191,
+            "16": 8.56149,
+            "17": 8.52311,
+            "18": 8.43963,
+            "19": 8.40439,
+            "20": 8.29506,
+            "21": 8.27059,
+            "22": 8.17902,
+            "23": 8.12669,
+            "24": 8.14846,
+            "25": 7.9909,
+            "26": 8.12216,
+            "27": 7.90453,
+            "28": 7.98655,
+            "29": 7.80845,
+            "30": 7.86918,
+            "31": 7.83571,
+            "32": 7.72178,
+            "33": 7.80378,
+            "34": 7.59229,
+            "35": 7.68371,
+            "36": 7.53883,
+            "37": 7.47609,
+            "38": 7.5168,
+            "39": 7.49978,
+            "40": 7.51704,
+            "41": 7.43174,
+            "42": 7.40104,
+            "43": 7.44926,
+            "44": 7.38919,
+            "45": 7.38016,
+            "46": 7.29476,
+            "47": 7.44829,
+            "48": 7.28213,
+            "49": 7.34657,
+            "50": 7.17116,
+            "51": 7.37361,
+            "52": 7.13381,
+            "53": 7.11244,
+            "54": 7.23402,
+            "55": 7.14785,
+            "56": 7.22775,
+            "57": 7.33273,
+            "58": 6.99461,
+            "59": 7.11599,
+            "60": 7.13222,
+            "61": 7.1056,
+            "62": 7.26513,
+            "63": 7.14772,
+            "64": 7.08696,
+            "65": 6.98643,
+            "66": 7.04728,
+            "67": 7.04697,
+            "68": 7.14062,
+            "69": 7.2435,
+            "70": 7.05957,
+            "71": 6.89356,
+            "72": 6.99769,
+            "73": 6.97897,
+            "74": 6.91983,
+            "75": 7.05297,
+            "76": 6.96036,
+            "77": 7.0791,
+            "78": 7.01392,
+            "79": 6.88358,
+            "80": 6.93014,
+            "81": 6.96553,
+            "82": 7.05265,
+            "83": 6.98788,
+            "84": 7.00427,
+            "85": 6.84577,
+            "86": 7.03621,
+            "87": 6.96327,
+            "88": 6.9137,
+            "89": 6.80631,
+            "90": 7.23619,
+            "91": 6.70015,
+            "92": 7.05679,
+            "93": 6.89287,
+            "94": 7.05835,
+            "95": 6.84786,
+            "96": 6.96771,
+            "97": 6.94258,
+            "98": 6.87388,
+            "99": 7.01816,
+            "100": 6.98466
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 43334.0,
+            "2": 44100.0,
+            "3": 44771.0,
+            "4": 42457.0,
+            "5": 45411.0,
+            "6": 40966.0,
+            "7": 43193.0,
+            "8": 45457.0,
+            "9": 42550.0,
+            "10": 45360.0,
+            "11": 44029.0,
+            "12": 44605.0,
+            "13": 43917.0,
+            "14": 46219.0,
+            "15": 43943.0,
+            "16": 41732.0,
+            "17": 43861.0,
+            "18": 44721.0,
+            "19": 42597.0,
+            "20": 44797.0,
+            "21": 44792.0,
+            "22": 41891.0,
+            "23": 45473.0,
+            "24": 43081.0,
+            "25": 42682.0,
+            "26": 43950.0,
+            "27": 46253.0,
+            "28": 46447.0,
+            "29": 46164.0,
+            "30": 44042.0,
+            "31": 41263.0,
+            "32": 43440.0,
+            "33": 45483.0,
+            "34": 43349.0,
+            "35": 43273.0,
+            "36": 42490.0,
+            "37": 40647.0,
+            "38": 42549.0,
+            "39": 44766.0,
+            "40": 43281.0,
+            "41": 44669.0,
+            "42": 43287.0,
+            "43": 45454.0,
+            "44": 44627.0,
+            "45": 43353.0,
+            "46": 43925.0,
+            "47": 42498.0,
+            "48": 44758.0,
+            "49": 43173.0,
+            "50": 43402.0,
+            "51": 41198.0,
+            "52": 43900.0,
+            "53": 43938.0,
+            "54": 41922.0,
+            "55": 43916.0,
+            "56": 43237.0,
+            "57": 42634.0,
+            "58": 43916.0,
+            "59": 44616.0,
+            "60": 41414.0,
+            "61": 39759.0,
+            "62": 44750.0,
+            "63": 44673.0,
+            "64": 45378.0,
+            "65": 44765.0,
+            "66": 45401.0,
+            "67": 43155.0,
+            "68": 42552.0,
+            "69": 43831.0,
+            "70": 45546.0,
+            "71": 43332.0,
+            "72": 44847.0,
+            "73": 45376.0,
+            "74": 42503.0,
+            "75": 44704.0,
+            "76": 43916.0,
+            "77": 42101.0,
+            "78": 40543.0,
+            "79": 38997.0,
+            "80": 41079.0,
+            "81": 45377.0,
+            "82": 43254.0,
+            "83": 38473.0,
+            "84": 42420.0,
+            "85": 43989.0,
+            "86": 45694.0,
+            "87": 41164.0,
+            "88": 41773.0,
+            "89": 41047.0,
+            "90": 44710.0,
+            "91": 46274.0,
+            "92": 41823.0,
+            "93": 43286.0,
+            "94": 39530.0,
+            "95": 44074.0,
+            "96": 44686.0,
+            "97": 45424.0,
+            "98": 41849.0,
+            "99": 45567.0,
+            "100": 42485.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 4158515200.0,
+            "2": 4158515200.0,
+            "3": 4158515200.0,
+            "4": 4158515200.0,
+            "5": 4158515200.0,
+            "6": 4158515200.0,
+            "7": 4158515200.0,
+            "8": 4158515200.0,
+            "9": 4158515200.0,
+            "10": 4158515200.0,
+            "11": 4158515200.0,
+            "12": 4158515200.0,
+            "13": 4158515200.0,
+            "14": 4158515200.0,
+            "15": 4158515200.0,
+            "16": 4158515200.0,
+            "17": 4158515200.0,
+            "18": 4158515200.0,
+            "19": 4158515200.0,
+            "20": 4158515200.0,
+            "21": 4158515200.0,
+            "22": 4158515200.0,
+            "23": 4158515200.0,
+            "24": 4158515200.0,
+            "25": 4158515200.0,
+            "26": 4158515200.0,
+            "27": 4158515200.0,
+            "28": 4158515200.0,
+            "29": 4158515200.0,
+            "30": 4158515200.0,
+            "31": 4158515200.0,
+            "32": 4158515200.0,
+            "33": 4158515200.0,
+            "34": 4158515200.0,
+            "35": 4158515200.0,
+            "36": 4158515200.0,
+            "37": 4158515200.0,
+            "38": 4158515200.0,
+            "39": 4158515200.0,
+            "40": 4158515200.0,
+            "41": 4158515200.0,
+            "42": 4158515200.0,
+            "43": 4158515200.0,
+            "44": 4158515200.0,
+            "45": 4158515200.0,
+            "46": 4158515200.0,
+            "47": 4158515200.0,
+            "48": 4158515200.0,
+            "49": 4158515200.0,
+            "50": 4158515200.0,
+            "51": 4158515200.0,
+            "52": 4158515200.0,
+            "53": 4158515200.0,
+            "54": 4158515200.0,
+            "55": 4158515200.0,
+            "56": 4158515200.0,
+            "57": 4158515200.0,
+            "58": 4158515200.0,
+            "59": 4158515200.0,
+            "60": 4158515200.0,
+            "61": 4158515200.0,
+            "62": 4158515200.0,
+            "63": 4158515200.0,
+            "64": 4158515200.0,
+            "65": 4158515200.0,
+            "66": 4158515200.0,
+            "67": 4158515200.0,
+            "68": 4158515200.0,
+            "69": 4158515200.0,
+            "70": 4158515200.0,
+            "71": 4158515200.0,
+            "72": 4158515200.0,
+            "73": 4158515200.0,
+            "74": 4158515200.0,
+            "75": 4158515200.0,
+            "76": 4158515200.0,
+            "77": 4158515200.0,
+            "78": 4158515200.0,
+            "79": 4158515200.0,
+            "80": 4158515200.0,
+            "81": 4158515200.0,
+            "82": 4158515200.0,
+            "83": 4158515200.0,
+            "84": 4158515200.0,
+            "85": 4158515200.0,
+            "86": 4158515200.0,
+            "87": 4158515200.0,
+            "88": 4158515200.0,
+            "89": 4158515200.0,
+            "90": 4158515200.0,
+            "91": 4158515200.0,
+            "92": 4158515200.0,
+            "93": 4158515200.0,
+            "94": 4158515200.0,
+            "95": 4158515200.0,
+            "96": 4158515200.0,
+            "97": 4158515200.0,
+            "98": 4158515200.0,
+            "99": 4158515200.0,
+            "100": 4158515200.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 4349380608.0,
+            "2": 6185459712.0,
+            "3": 6187556864.0,
+            "4": 6187556864.0,
+            "5": 6187556864.0,
+            "6": 6187556864.0,
+            "7": 6187556864.0,
+            "8": 6187556864.0,
+            "9": 6187556864.0,
+            "10": 6187556864.0,
+            "11": 6187556864.0,
+            "12": 6187556864.0,
+            "13": 6187556864.0,
+            "14": 6187556864.0,
+            "15": 6187556864.0,
+            "16": 6187556864.0,
+            "17": 6187556864.0,
+            "18": 6187556864.0,
+            "19": 6187556864.0,
+            "20": 6187556864.0,
+            "21": 6187556864.0,
+            "22": 6187556864.0,
+            "23": 6187556864.0,
+            "24": 6187556864.0,
+            "25": 6187556864.0,
+            "26": 6187556864.0,
+            "27": 6187556864.0,
+            "28": 6187556864.0,
+            "29": 6187556864.0,
+            "30": 6187556864.0,
+            "31": 6187556864.0,
+            "32": 6187556864.0,
+            "33": 6187556864.0,
+            "34": 6187556864.0,
+            "35": 6187556864.0,
+            "36": 6187556864.0,
+            "37": 6187556864.0,
+            "38": 6187556864.0,
+            "39": 6187556864.0,
+            "40": 6187556864.0,
+            "41": 6187556864.0,
+            "42": 6187556864.0,
+            "43": 6187556864.0,
+            "44": 6187556864.0,
+            "45": 6187556864.0,
+            "46": 6187556864.0,
+            "47": 6187556864.0,
+            "48": 6187556864.0,
+            "49": 6187556864.0,
+            "50": 6187556864.0,
+            "51": 6187556864.0,
+            "52": 6187556864.0,
+            "53": 6187556864.0,
+            "54": 6187556864.0,
+            "55": 6187556864.0,
+            "56": 6187556864.0,
+            "57": 6187556864.0,
+            "58": 6187556864.0,
+            "59": 6187556864.0,
+            "60": 6187556864.0,
+            "61": 6187556864.0,
+            "62": 6187556864.0,
+            "63": 6187556864.0,
+            "64": 6187556864.0,
+            "65": 6187556864.0,
+            "66": 6187556864.0,
+            "67": 6187556864.0,
+            "68": 6187556864.0,
+            "69": 6187556864.0,
+            "70": 6187556864.0,
+            "71": 6187556864.0,
+            "72": 6187556864.0,
+            "73": 6187556864.0,
+            "74": 6187556864.0,
+            "75": 6187556864.0,
+            "76": 6187556864.0,
+            "77": 6187556864.0,
+            "78": 6187556864.0,
+            "79": 6187556864.0,
+            "80": 6187556864.0,
+            "81": 6187556864.0,
+            "82": 6187556864.0,
+            "83": 6187556864.0,
+            "84": 6187556864.0,
+            "85": 6187556864.0,
+            "86": 6187556864.0,
+            "87": 6187556864.0,
+            "88": 6187556864.0,
+            "89": 6187556864.0,
+            "90": 6187556864.0,
+            "91": 6187556864.0,
+            "92": 6187556864.0,
+            "93": 6187556864.0,
+            "94": 6187556864.0,
+            "95": 6187556864.0,
+            "96": 6187556864.0,
+            "97": 6187556864.0,
+            "98": 6187556864.0,
+            "99": 6187556864.0,
+            "100": 6187556864.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 6.68377,
+            "2": 0.24636,
+            "3": 0.14697,
+            "4": 0.14068,
+            "5": 0.14575,
+            "6": 0.13961,
+            "7": 0.14621,
+            "8": 0.14223,
+            "9": 0.14582,
+            "10": 0.13865,
+            "11": 0.1453,
+            "12": 0.13885,
+            "13": 0.14702,
+            "14": 0.14162,
+            "15": 0.1468,
+            "16": 0.14692,
+            "17": 0.14326,
+            "18": 0.14146,
+            "19": 0.15015,
+            "20": 0.13999,
+            "21": 0.14878,
+            "22": 0.13993,
+            "23": 0.14535,
+            "24": 0.1378,
+            "25": 0.15024,
+            "26": 0.1375,
+            "27": 0.13991,
+            "28": 0.14118,
+            "29": 0.14057,
+            "30": 0.14015,
+            "31": 0.1384,
+            "32": 0.13865,
+            "33": 0.14194,
+            "34": 0.14009,
+            "35": 0.14432,
+            "36": 0.14051,
+            "37": 0.1489,
+            "38": 0.13976,
+            "39": 0.14433,
+            "40": 0.13889,
+            "41": 0.14744,
+            "42": 0.14045,
+            "43": 0.14474,
+            "44": 0.14195,
+            "45": 0.14259,
+            "46": 0.13761,
+            "47": 0.14569,
+            "48": 0.15734,
+            "49": 0.18844,
+            "50": 0.14153,
+            "51": 0.14057,
+            "52": 0.14132,
+            "53": 0.14241,
+            "54": 0.14306,
+            "55": 0.1436,
+            "56": 0.14347,
+            "57": 0.13981,
+            "58": 0.13906,
+            "59": 0.14322,
+            "60": 0.13735,
+            "61": 0.14083,
+            "62": 0.14416,
+            "63": 0.14191,
+            "64": 0.14246,
+            "65": 0.13711,
+            "66": 0.1364,
+            "67": 0.13655,
+            "68": 0.1365,
+            "69": 0.13935,
+            "70": 0.15757,
+            "71": 0.13997,
+            "72": 0.13995,
+            "73": 0.14045,
+            "74": 0.1419,
+            "75": 0.14171,
+            "76": 0.14479,
+            "77": 0.17363,
+            "78": 0.15289,
+            "79": 0.1416,
+            "80": 0.14577,
+            "81": 0.14478,
+            "82": 0.14716,
+            "83": 0.14872,
+            "84": 0.15369,
+            "85": 0.15016,
+            "86": 0.13782,
+            "87": 0.1585,
+            "88": 0.15072,
+            "89": 0.13834,
+            "90": 0.13681,
+            "91": 0.139,
+            "92": 0.13751,
+            "93": 0.13694,
+            "94": 0.13764,
+            "95": 0.13659,
+            "96": 0.13726,
+            "97": 0.13676,
+            "98": 0.13872,
+            "99": 0.13604,
+            "100": 0.13543
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..e788215b20a
--- /dev/null
+++ b/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.33127,
+            "2": 10.35281,
+            "3": 9.79613,
+            "4": 9.60968,
+            "5": 9.42269,
+            "6": 9.45137,
+            "7": 9.34348,
+            "8": 9.27525,
+            "9": 9.09676,
+            "10": 9.0722,
+            "11": 8.8835,
+            "12": 8.83711,
+            "13": 8.86836,
+            "14": 8.71039,
+            "15": 8.68191,
+            "16": 8.56149,
+            "17": 8.52311,
+            "18": 8.43963,
+            "19": 8.40439,
+            "20": 8.29506,
+            "21": 8.27059,
+            "22": 8.17902,
+            "23": 8.12669,
+            "24": 8.14846,
+            "25": 7.9909,
+            "26": 8.12216,
+            "27": 7.90453,
+            "28": 7.98655,
+            "29": 7.80845,
+            "30": 7.86918,
+            "31": 7.83571,
+            "32": 7.72178,
+            "33": 7.80378,
+            "34": 7.59229,
+            "35": 7.68371,
+            "36": 7.53883,
+            "37": 7.47609,
+            "38": 7.5168,
+            "39": 7.49978,
+            "40": 7.51704,
+            "41": 7.43174,
+            "42": 7.40104,
+            "43": 7.44926,
+            "44": 7.38919,
+            "45": 7.38016,
+            "46": 7.29476,
+            "47": 7.44829,
+            "48": 7.28213,
+            "49": 7.34657,
+            "50": 7.17116,
+            "51": 7.37361,
+            "52": 7.13381,
+            "53": 7.11244,
+            "54": 7.23402,
+            "55": 7.14785,
+            "56": 7.22775,
+            "57": 7.33273,
+            "58": 6.99461,
+            "59": 7.11599,
+            "60": 7.13222,
+            "61": 7.1056,
+            "62": 7.26513,
+            "63": 7.14772,
+            "64": 7.08696,
+            "65": 6.98643,
+            "66": 7.04728,
+            "67": 7.04697,
+            "68": 7.14062,
+            "69": 7.2435,
+            "70": 7.05957,
+            "71": 6.89356,
+            "72": 6.99769,
+            "73": 6.97897,
+            "74": 6.91983,
+            "75": 7.05297,
+            "76": 6.96036,
+            "77": 7.0791,
+            "78": 7.01392,
+            "79": 6.88358,
+            "80": 6.93014,
+            "81": 6.96553,
+            "82": 7.05265,
+            "83": 6.98788,
+            "84": 7.00427,
+            "85": 6.84577,
+            "86": 7.03621,
+            "87": 6.96327,
+            "88": 6.9137,
+            "89": 6.80631,
+            "90": 7.23619,
+            "91": 6.70015,
+            "92": 7.05679,
+            "93": 6.89287,
+            "94": 7.05835,
+            "95": 6.84786,
+            "96": 6.96771,
+            "97": 6.94258,
+            "98": 6.87388,
+            "99": 7.01816,
+            "100": 6.98466
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 43334.0,
+            "2": 44100.0,
+            "3": 44771.0,
+            "4": 42457.0,
+            "5": 45411.0,
+            "6": 40966.0,
+            "7": 43193.0,
+            "8": 45457.0,
+            "9": 42550.0,
+            "10": 45360.0,
+            "11": 44029.0,
+            "12": 44605.0,
+            "13": 43917.0,
+            "14": 46219.0,
+            "15": 43943.0,
+            "16": 41732.0,
+            "17": 43861.0,
+            "18": 44721.0,
+            "19": 42597.0,
+            "20": 44797.0,
+            "21": 44792.0,
+            "22": 41891.0,
+            "23": 45473.0,
+            "24": 43081.0,
+            "25": 42682.0,
+            "26": 43950.0,
+            "27": 46253.0,
+            "28": 46447.0,
+            "29": 46164.0,
+            "30": 44042.0,
+            "31": 41263.0,
+            "32": 43440.0,
+            "33": 45483.0,
+            "34": 43349.0,
+            "35": 43273.0,
+            "36": 42490.0,
+            "37": 40647.0,
+            "38": 42549.0,
+            "39": 44766.0,
+            "40": 43281.0,
+            "41": 44669.0,
+            "42": 43287.0,
+            "43": 45454.0,
+            "44": 44627.0,
+            "45": 43353.0,
+            "46": 43925.0,
+            "47": 42498.0,
+            "48": 44758.0,
+            "49": 43173.0,
+            "50": 43402.0,
+            "51": 41198.0,
+            "52": 43900.0,
+            "53": 43938.0,
+            "54": 41922.0,
+            "55": 43916.0,
+            "56": 43237.0,
+            "57": 42634.0,
+            "58": 43916.0,
+            "59": 44616.0,
+            "60": 41414.0,
+            "61": 39759.0,
+            "62": 44750.0,
+            "63": 44673.0,
+            "64": 45378.0,
+            "65": 44765.0,
+            "66": 45401.0,
+            "67": 43155.0,
+            "68": 42552.0,
+            "69": 43831.0,
+            "70": 45546.0,
+            "71": 43332.0,
+            "72": 44847.0,
+            "73": 45376.0,
+            "74": 42503.0,
+            "75": 44704.0,
+            "76": 43916.0,
+            "77": 42101.0,
+            "78": 40543.0,
+            "79": 38997.0,
+            "80": 41079.0,
+            "81": 45377.0,
+            "82": 43254.0,
+            "83": 38473.0,
+            "84": 42420.0,
+            "85": 43989.0,
+            "86": 45694.0,
+            "87": 41164.0,
+            "88": 41773.0,
+            "89": 41047.0,
+            "90": 44710.0,
+            "91": 46274.0,
+            "92": 41823.0,
+            "93": 43286.0,
+            "94": 39530.0,
+            "95": 44074.0,
+            "96": 44686.0,
+            "97": 45424.0,
+            "98": 41849.0,
+            "99": 45567.0,
+            "100": 42485.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 4158515200.0,
+            "2": 4158515200.0,
+            "3": 4158515200.0,
+            "4": 4158515200.0,
+            "5": 4158515200.0,
+            "6": 4158515200.0,
+            "7": 4158515200.0,
+            "8": 4158515200.0,
+            "9": 4158515200.0,
+            "10": 4158515200.0,
+            "11": 4158515200.0,
+            "12": 4158515200.0,
+            "13": 4158515200.0,
+            "14": 4158515200.0,
+            "15": 4158515200.0,
+            "16": 4158515200.0,
+            "17": 4158515200.0,
+            "18": 4158515200.0,
+            "19": 4158515200.0,
+            "20": 4158515200.0,
+            "21": 4158515200.0,
+            "22": 4158515200.0,
+            "23": 4158515200.0,
+            "24": 4158515200.0,
+            "25": 4158515200.0,
+            "26": 4158515200.0,
+            "27": 4158515200.0,
+            "28": 4158515200.0,
+            "29": 4158515200.0,
+            "30": 4158515200.0,
+            "31": 4158515200.0,
+            "32": 4158515200.0,
+            "33": 4158515200.0,
+            "34": 4158515200.0,
+            "35": 4158515200.0,
+            "36": 4158515200.0,
+            "37": 4158515200.0,
+            "38": 4158515200.0,
+            "39": 4158515200.0,
+            "40": 4158515200.0,
+            "41": 4158515200.0,
+            "42": 4158515200.0,
+            "43": 4158515200.0,
+            "44": 4158515200.0,
+            "45": 4158515200.0,
+            "46": 4158515200.0,
+            "47": 4158515200.0,
+            "48": 4158515200.0,
+            "49": 4158515200.0,
+            "50": 4158515200.0,
+            "51": 4158515200.0,
+            "52": 4158515200.0,
+            "53": 4158515200.0,
+            "54": 4158515200.0,
+            "55": 4158515200.0,
+            "56": 4158515200.0,
+            "57": 4158515200.0,
+            "58": 4158515200.0,
+            "59": 4158515200.0,
+            "60": 4158515200.0,
+            "61": 4158515200.0,
+            "62": 4158515200.0,
+            "63": 4158515200.0,
+            "64": 4158515200.0,
+            "65": 4158515200.0,
+            "66": 4158515200.0,
+            "67": 4158515200.0,
+            "68": 4158515200.0,
+            "69": 4158515200.0,
+            "70": 4158515200.0,
+            "71": 4158515200.0,
+            "72": 4158515200.0,
+            "73": 4158515200.0,
+            "74": 4158515200.0,
+            "75": 4158515200.0,
+            "76": 4158515200.0,
+            "77": 4158515200.0,
+            "78": 4158515200.0,
+            "79": 4158515200.0,
+            "80": 4158515200.0,
+            "81": 4158515200.0,
+            "82": 4158515200.0,
+            "83": 4158515200.0,
+            "84": 4158515200.0,
+            "85": 4158515200.0,
+            "86": 4158515200.0,
+            "87": 4158515200.0,
+            "88": 4158515200.0,
+            "89": 4158515200.0,
+            "90": 4158515200.0,
+            "91": 4158515200.0,
+            "92": 4158515200.0,
+            "93": 4158515200.0,
+            "94": 4158515200.0,
+            "95": 4158515200.0,
+            "96": 4158515200.0,
+            "97": 4158515200.0,
+            "98": 4158515200.0,
+            "99": 4158515200.0,
+            "100": 4158515200.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 4349380608.0,
+            "2": 6185459712.0,
+            "3": 6187556864.0,
+            "4": 6187556864.0,
+            "5": 6187556864.0,
+            "6": 6187556864.0,
+            "7": 6187556864.0,
+            "8": 6187556864.0,
+            "9": 6187556864.0,
+            "10": 6187556864.0,
+            "11": 6187556864.0,
+            "12": 6187556864.0,
+            "13": 6187556864.0,
+            "14": 6187556864.0,
+            "15": 6187556864.0,
+            "16": 6187556864.0,
+            "17": 6187556864.0,
+            "18": 6187556864.0,
+            "19": 6187556864.0,
+            "20": 6187556864.0,
+            "21": 6187556864.0,
+            "22": 6187556864.0,
+            "23": 6187556864.0,
+            "24": 6187556864.0,
+            "25": 6187556864.0,
+            "26": 6187556864.0,
+            "27": 6187556864.0,
+            "28": 6187556864.0,
+            "29": 6187556864.0,
+            "30": 6187556864.0,
+            "31": 6187556864.0,
+            "32": 6187556864.0,
+            "33": 6187556864.0,
+            "34": 6187556864.0,
+            "35": 6187556864.0,
+            "36": 6187556864.0,
+            "37": 6187556864.0,
+            "38": 6187556864.0,
+            "39": 6187556864.0,
+            "40": 6187556864.0,
+            "41": 6187556864.0,
+            "42": 6187556864.0,
+            "43": 6187556864.0,
+            "44": 6187556864.0,
+            "45": 6187556864.0,
+            "46": 6187556864.0,
+            "47": 6187556864.0,
+            "48": 6187556864.0,
+            "49": 6187556864.0,
+            "50": 6187556864.0,
+            "51": 6187556864.0,
+            "52": 6187556864.0,
+            "53": 6187556864.0,
+            "54": 6187556864.0,
+            "55": 6187556864.0,
+            "56": 6187556864.0,
+            "57": 6187556864.0,
+            "58": 6187556864.0,
+            "59": 6187556864.0,
+            "60": 6187556864.0,
+            "61": 6187556864.0,
+            "62": 6187556864.0,
+            "63": 6187556864.0,
+            "64": 6187556864.0,
+            "65": 6187556864.0,
+            "66": 6187556864.0,
+            "67": 6187556864.0,
+            "68": 6187556864.0,
+            "69": 6187556864.0,
+            "70": 6187556864.0,
+            "71": 6187556864.0,
+            "72": 6187556864.0,
+            "73": 6187556864.0,
+            "74": 6187556864.0,
+            "75": 6187556864.0,
+            "76": 6187556864.0,
+            "77": 6187556864.0,
+            "78": 6187556864.0,
+            "79": 6187556864.0,
+            "80": 6187556864.0,
+            "81": 6187556864.0,
+            "82": 6187556864.0,
+            "83": 6187556864.0,
+            "84": 6187556864.0,
+            "85": 6187556864.0,
+            "86": 6187556864.0,
+            "87": 6187556864.0,
+            "88": 6187556864.0,
+            "89": 6187556864.0,
+            "90": 6187556864.0,
+            "91": 6187556864.0,
+            "92": 6187556864.0,
+            "93": 6187556864.0,
+            "94": 6187556864.0,
+            "95": 6187556864.0,
+            "96": 6187556864.0,
+            "97": 6187556864.0,
+            "98": 6187556864.0,
+            "99": 6187556864.0,
+            "100": 6187556864.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 7.09171,
+            "2": 0.19937,
+            "3": 0.15739,
+            "4": 0.15626,
+            "5": 0.15726,
+            "6": 0.16596,
+            "7": 0.15866,
+            "8": 0.16018,
+            "9": 0.16342,
+            "10": 0.15848,
+            "11": 0.1563,
+            "12": 0.15949,
+            "13": 0.16471,
+            "14": 0.1653,
+            "15": 0.15904,
+            "16": 0.15673,
+            "17": 0.15845,
+            "18": 0.15591,
+            "19": 0.15809,
+            "20": 0.1593,
+            "21": 0.15934,
+            "22": 0.1588,
+            "23": 0.15615,
+            "24": 0.15816,
+            "25": 0.15513,
+            "26": 0.16623,
+            "27": 0.1635,
+            "28": 0.15796,
+            "29": 0.15745,
+            "30": 0.15659,
+            "31": 0.15757,
+            "32": 0.15805,
+            "33": 0.16121,
+            "34": 0.15918,
+            "35": 0.15628,
+            "36": 0.16015,
+            "37": 0.15954,
+            "38": 0.15711,
+            "39": 0.16207,
+            "40": 0.16543,
+            "41": 0.16329,
+            "42": 0.15895,
+            "43": 0.15771,
+            "44": 0.16372,
+            "45": 0.15827,
+            "46": 0.16205,
+            "47": 0.16175,
+            "48": 0.15754,
+            "49": 0.15916,
+            "50": 0.15618,
+            "51": 0.15693,
+            "52": 0.16151,
+            "53": 0.16143,
+            "54": 0.16281,
+            "55": 0.15891,
+            "56": 0.16235,
+            "57": 0.16248,
+            "58": 0.16949,
+            "59": 0.16264,
+            "60": 0.15666,
+            "61": 0.19456,
+            "62": 0.19414,
+            "63": 0.16346,
+            "64": 0.16675,
+            "65": 0.16803,
+            "66": 0.1748,
+            "67": 0.16431,
+            "68": 0.1587,
+            "69": 0.16219,
+            "70": 0.16457,
+            "71": 0.1716,
+            "72": 0.16546,
+            "73": 0.16711,
+            "74": 0.16142,
+            "75": 0.17042,
+            "76": 0.17092,
+            "77": 0.16596,
+            "78": 0.16577,
+            "79": 0.15743,
+            "80": 0.15851,
+            "81": 0.15791,
+            "82": 0.16001,
+            "83": 0.15783,
+            "84": 0.15788,
+            "85": 0.15665,
+            "86": 0.16107,
+            "87": 0.15608,
+            "88": 0.15928,
+            "89": 0.16138,
+            "90": 0.15621,
+            "91": 0.15886,
+            "92": 0.15808,
+            "93": 0.15911,
+            "94": 0.16777,
+            "95": 0.16017,
+            "96": 0.15821,
+            "97": 0.15642,
+            "98": 0.16061,
+            "99": 0.157,
+            "100": 0.15975
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_h100.json
index a6e8f276b7b..522245541ce 100644
--- a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_h100.json
@@ -2,141 +2,536 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 10.33127,
+            "2": 10.35281,
+            "3": 9.79613,
+            "4": 9.60968,
             "5": 9.42269,
+            "6": 9.45137,
+            "7": 9.34348,
+            "8": 9.27525,
+            "9": 9.09676,
             "10": 9.0722,
+            "11": 8.8835,
+            "12": 8.83711,
+            "13": 8.86836,
+            "14": 8.71039,
             "15": 8.68191,
+            "16": 8.56149,
+            "17": 8.52311,
+            "18": 8.43963,
+            "19": 8.40439,
             "20": 8.29506,
+            "21": 8.27059,
+            "22": 8.17902,
+            "23": 8.12669,
+            "24": 8.14846,
             "25": 7.9909,
+            "26": 8.12216,
+            "27": 7.90453,
+            "28": 7.98655,
+            "29": 7.80845,
             "30": 7.86918,
+            "31": 7.83571,
+            "32": 7.72178,
+            "33": 7.80378,
+            "34": 7.59229,
             "35": 7.68371,
+            "36": 7.53883,
+            "37": 7.47609,
+            "38": 7.5168,
+            "39": 7.49978,
             "40": 7.51704,
+            "41": 7.43174,
+            "42": 7.40104,
+            "43": 7.44926,
+            "44": 7.38919,
             "45": 7.38016,
+            "46": 7.29476,
+            "47": 7.44829,
+            "48": 7.28213,
+            "49": 7.34657,
             "50": 7.17116,
+            "51": 7.37361,
+            "52": 7.13381,
+            "53": 7.11244,
+            "54": 7.23402,
             "55": 7.14785,
+            "56": 7.22775,
+            "57": 7.33273,
+            "58": 6.99461,
+            "59": 7.11599,
             "60": 7.13222,
+            "61": 7.1056,
+            "62": 7.26513,
+            "63": 7.14772,
+            "64": 7.08696,
             "65": 6.98643,
+            "66": 7.04728,
+            "67": 7.04697,
+            "68": 7.14062,
+            "69": 7.2435,
             "70": 7.05957,
+            "71": 6.89356,
+            "72": 6.99769,
+            "73": 6.97897,
+            "74": 6.91983,
             "75": 7.05297,
+            "76": 6.96036,
+            "77": 7.0791,
+            "78": 7.01392,
+            "79": 6.88358,
             "80": 6.93014,
+            "81": 6.96553,
+            "82": 7.05265,
+            "83": 6.98788,
+            "84": 7.00427,
             "85": 6.84577,
+            "86": 7.03621,
+            "87": 6.96327,
+            "88": 6.9137,
+            "89": 6.80631,
             "90": 7.23619,
+            "91": 6.70015,
+            "92": 7.05679,
+            "93": 6.89287,
+            "94": 7.05835,
             "95": 6.84786,
+            "96": 6.96771,
+            "97": 6.94258,
+            "98": 6.87388,
+            "99": 7.01816,
             "100": 6.98466
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 43334.0,
+            "2": 44100.0,
+            "3": 44771.0,
+            "4": 42457.0,
             "5": 45411.0,
+            "6": 40966.0,
+            "7": 43193.0,
+            "8": 45457.0,
+            "9": 42550.0,
             "10": 45360.0,
+            "11": 44029.0,
+            "12": 44605.0,
+            "13": 43917.0,
+            "14": 46219.0,
             "15": 43943.0,
+            "16": 41732.0,
+            "17": 43861.0,
+            "18": 44721.0,
+            "19": 42597.0,
             "20": 44797.0,
+            "21": 44792.0,
+            "22": 41891.0,
+            "23": 45473.0,
+            "24": 43081.0,
             "25": 42682.0,
+            "26": 43950.0,
+            "27": 46253.0,
+            "28": 46447.0,
+            "29": 46164.0,
             "30": 44042.0,
+            "31": 41263.0,
+            "32": 43440.0,
+            "33": 45483.0,
+            "34": 43349.0,
             "35": 43273.0,
+            "36": 42490.0,
+            "37": 40647.0,
+            "38": 42549.0,
+            "39": 44766.0,
             "40": 43281.0,
+            "41": 44669.0,
+            "42": 43287.0,
+            "43": 45454.0,
+            "44": 44627.0,
             "45": 43353.0,
+            "46": 43925.0,
+            "47": 42498.0,
+            "48": 44758.0,
+            "49": 43173.0,
             "50": 43402.0,
+            "51": 41198.0,
+            "52": 43900.0,
+            "53": 43938.0,
+            "54": 41922.0,
             "55": 43916.0,
+            "56": 43237.0,
+            "57": 42634.0,
+            "58": 43916.0,
+            "59": 44616.0,
             "60": 41414.0,
+            "61": 39759.0,
+            "62": 44750.0,
+            "63": 44673.0,
+            "64": 45378.0,
             "65": 44765.0,
+            "66": 45401.0,
+            "67": 43155.0,
+            "68": 42552.0,
+            "69": 43831.0,
             "70": 45546.0,
+            "71": 43332.0,
+            "72": 44847.0,
+            "73": 45376.0,
+            "74": 42503.0,
             "75": 44704.0,
+            "76": 43916.0,
+            "77": 42101.0,
+            "78": 40543.0,
+            "79": 38997.0,
             "80": 41079.0,
+            "81": 45377.0,
+            "82": 43254.0,
+            "83": 38473.0,
+            "84": 42420.0,
             "85": 43989.0,
+            "86": 45694.0,
+            "87": 41164.0,
+            "88": 41773.0,
+            "89": 41047.0,
             "90": 44710.0,
+            "91": 46274.0,
+            "92": 41823.0,
+            "93": 43286.0,
+            "94": 39530.0,
             "95": 44074.0,
+            "96": 44686.0,
+            "97": 45424.0,
+            "98": 41849.0,
+            "99": 45567.0,
             "100": 42485.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 4158515200.0,
+            "2": 4158515200.0,
+            "3": 4158515200.0,
+            "4": 4158515200.0,
             "5": 4158515200.0,
+            "6": 4158515200.0,
+            "7": 4158515200.0,
+            "8": 4158515200.0,
+            "9": 4158515200.0,
             "10": 4158515200.0,
+            "11": 4158515200.0,
+            "12": 4158515200.0,
+            "13": 4158515200.0,
+            "14": 4158515200.0,
             "15": 4158515200.0,
+            "16": 4158515200.0,
+            "17": 4158515200.0,
+            "18": 4158515200.0,
+            "19": 4158515200.0,
             "20": 4158515200.0,
+            "21": 4158515200.0,
+            "22": 4158515200.0,
+            "23": 4158515200.0,
+            "24": 4158515200.0,
             "25": 4158515200.0,
+            "26": 4158515200.0,
+            "27": 4158515200.0,
+            "28": 4158515200.0,
+            "29": 4158515200.0,
             "30": 4158515200.0,
+            "31": 4158515200.0,
+            "32": 4158515200.0,
+            "33": 4158515200.0,
+            "34": 4158515200.0,
             "35": 4158515200.0,
+            "36": 4158515200.0,
+            "37": 4158515200.0,
+            "38": 4158515200.0,
+            "39": 4158515200.0,
             "40": 4158515200.0,
+            "41": 4158515200.0,
+            "42": 4158515200.0,
+            "43": 4158515200.0,
+            "44": 4158515200.0,
             "45": 4158515200.0,
+            "46": 4158515200.0,
+            "47": 4158515200.0,
+            "48": 4158515200.0,
+            "49": 4158515200.0,
             "50": 4158515200.0,
+            "51": 4158515200.0,
+            "52": 4158515200.0,
+            "53": 4158515200.0,
+            "54": 4158515200.0,
             "55": 4158515200.0,
+            "56": 4158515200.0,
+            "57": 4158515200.0,
+            "58": 4158515200.0,
+            "59": 4158515200.0,
             "60": 4158515200.0,
+            "61": 4158515200.0,
+            "62": 4158515200.0,
+            "63": 4158515200.0,
+            "64": 4158515200.0,
             "65": 4158515200.0,
+            "66": 4158515200.0,
+            "67": 4158515200.0,
+            "68": 4158515200.0,
+            "69": 4158515200.0,
             "70": 4158515200.0,
+            "71": 4158515200.0,
+            "72": 4158515200.0,
+            "73": 4158515200.0,
+            "74": 4158515200.0,
             "75": 4158515200.0,
+            "76": 4158515200.0,
+            "77": 4158515200.0,
+            "78": 4158515200.0,
+            "79": 4158515200.0,
             "80": 4158515200.0,
+            "81": 4158515200.0,
+            "82": 4158515200.0,
+            "83": 4158515200.0,
+            "84": 4158515200.0,
             "85": 4158515200.0,
+            "86": 4158515200.0,
+            "87": 4158515200.0,
+            "88": 4158515200.0,
+            "89": 4158515200.0,
             "90": 4158515200.0,
+            "91": 4158515200.0,
+            "92": 4158515200.0,
+            "93": 4158515200.0,
+            "94": 4158515200.0,
             "95": 4158515200.0,
+            "96": 4158515200.0,
+            "97": 4158515200.0,
+            "98": 4158515200.0,
+            "99": 4158515200.0,
             "100": 4158515200.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 4349380608.0,
+            "2": 6185459712.0,
+            "3": 6187556864.0,
+            "4": 6187556864.0,
             "5": 6187556864.0,
+            "6": 6187556864.0,
+            "7": 6187556864.0,
+            "8": 6187556864.0,
+            "9": 6187556864.0,
             "10": 6187556864.0,
+            "11": 6187556864.0,
+            "12": 6187556864.0,
+            "13": 6187556864.0,
+            "14": 6187556864.0,
             "15": 6187556864.0,
+            "16": 6187556864.0,
+            "17": 6187556864.0,
+            "18": 6187556864.0,
+            "19": 6187556864.0,
             "20": 6187556864.0,
+            "21": 6187556864.0,
+            "22": 6187556864.0,
+            "23": 6187556864.0,
+            "24": 6187556864.0,
             "25": 6187556864.0,
+            "26": 6187556864.0,
+            "27": 6187556864.0,
+            "28": 6187556864.0,
+            "29": 6187556864.0,
             "30": 6187556864.0,
+            "31": 6187556864.0,
+            "32": 6187556864.0,
+            "33": 6187556864.0,
+            "34": 6187556864.0,
             "35": 6187556864.0,
+            "36": 6187556864.0,
+            "37": 6187556864.0,
+            "38": 6187556864.0,
+            "39": 6187556864.0,
             "40": 6187556864.0,
+            "41": 6187556864.0,
+            "42": 6187556864.0,
+            "43": 6187556864.0,
+            "44": 6187556864.0,
             "45": 6187556864.0,
+            "46": 6187556864.0,
+            "47": 6187556864.0,
+            "48": 6187556864.0,
+            "49": 6187556864.0,
             "50": 6187556864.0,
+            "51": 6187556864.0,
+            "52": 6187556864.0,
+            "53": 6187556864.0,
+            "54": 6187556864.0,
             "55": 6187556864.0,
+            "56": 6187556864.0,
+            "57": 6187556864.0,
+            "58": 6187556864.0,
+            "59": 6187556864.0,
             "60": 6187556864.0,
+            "61": 6187556864.0,
+            "62": 6187556864.0,
+            "63": 6187556864.0,
+            "64": 6187556864.0,
             "65": 6187556864.0,
+            "66": 6187556864.0,
+            "67": 6187556864.0,
+            "68": 6187556864.0,
+            "69": 6187556864.0,
             "70": 6187556864.0,
+            "71": 6187556864.0,
+            "72": 6187556864.0,
+            "73": 6187556864.0,
+            "74": 6187556864.0,
             "75": 6187556864.0,
+            "76": 6187556864.0,
+            "77": 6187556864.0,
+            "78": 6187556864.0,
+            "79": 6187556864.0,
             "80": 6187556864.0,
+            "81": 6187556864.0,
+            "82": 6187556864.0,
+            "83": 6187556864.0,
+            "84": 6187556864.0,
             "85": 6187556864.0,
+            "86": 6187556864.0,
+            "87": 6187556864.0,
+            "88": 6187556864.0,
+            "89": 6187556864.0,
             "90": 6187556864.0,
+            "91": 6187556864.0,
+            "92": 6187556864.0,
+            "93": 6187556864.0,
+            "94": 6187556864.0,
             "95": 6187556864.0,
+            "96": 6187556864.0,
+            "97": 6187556864.0,
+            "98": 6187556864.0,
+            "99": 6187556864.0,
             "100": 6187556864.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 6.70165,
-            "5": 0.14534,
-            "10": 0.14168,
-            "15": 0.17276,
-            "20": 0.14261,
-            "25": 0.13952,
-            "30": 0.14413,
-            "35": 0.14472,
-            "40": 0.14192,
-            "45": 0.14279,
-            "50": 0.14289,
-            "55": 0.14388,
-            "60": 0.14497,
-            "65": 0.14852,
-            "70": 0.14194,
-            "75": 0.1395,
-            "80": 0.14222,
-            "85": 0.13902,
-            "90": 0.1372,
-            "95": 0.13582,
-            "100": 0.13567
+            "1": 7.04606,
+            "2": 0.19929,
+            "3": 0.2017,
+            "4": 0.19828,
+            "5": 0.15529,
+            "6": 0.15657,
+            "7": 0.1562,
+            "8": 0.15746,
+            "9": 0.15848,
+            "10": 0.1552,
+            "11": 0.15643,
+            "12": 0.15719,
+            "13": 0.15888,
+            "14": 0.15791,
+            "15": 0.15908,
+            "16": 0.15414,
+            "17": 0.1552,
+            "18": 0.15205,
+            "19": 0.18443,
+            "20": 0.19907,
+            "21": 0.16002,
+            "22": 0.1541,
+            "23": 0.1541,
+            "24": 0.15347,
+            "25": 0.15557,
+            "26": 0.15649,
+            "27": 0.16008,
+            "28": 0.15592,
+            "29": 0.15544,
+            "30": 0.15449,
+            "31": 0.15601,
+            "32": 0.15477,
+            "33": 0.159,
+            "34": 0.15733,
+            "35": 0.15695,
+            "36": 0.15477,
+            "37": 0.15376,
+            "38": 0.15585,
+            "39": 0.15472,
+            "40": 0.16007,
+            "41": 0.15379,
+            "42": 0.15522,
+            "43": 0.15668,
+            "44": 0.15453,
+            "45": 0.15571,
+            "46": 0.15742,
+            "47": 0.1588,
+            "48": 0.15282,
+            "49": 0.15611,
+            "50": 0.15733,
+            "51": 0.15969,
+            "52": 0.15894,
+            "53": 0.16067,
+            "54": 0.16019,
+            "55": 0.15633,
+            "56": 0.15774,
+            "57": 0.15905,
+            "58": 0.16207,
+            "59": 0.16104,
+            "60": 0.15837,
+            "61": 0.15701,
+            "62": 0.15604,
+            "63": 0.15894,
+            "64": 0.15836,
+            "65": 0.16179,
+            "66": 0.16196,
+            "67": 0.16049,
+            "68": 0.15825,
+            "69": 0.15755,
+            "70": 0.15963,
+            "71": 0.16471,
+            "72": 0.16654,
+            "73": 0.16164,
+            "74": 0.15823,
+            "75": 0.16142,
+            "76": 0.16113,
+            "77": 0.16286,
+            "78": 0.16729,
+            "79": 0.16051,
+            "80": 0.1567,
+            "81": 0.15597,
+            "82": 0.15346,
+            "83": 0.15578,
+            "84": 0.15723,
+            "85": 0.1555,
+            "86": 0.15702,
+            "87": 0.15866,
+            "88": 0.15938,
+            "89": 0.15659,
+            "90": 0.15777,
+            "91": 0.1688,
+            "92": 0.15804,
+            "93": 0.15347,
+            "94": 0.15467,
+            "95": 0.15963,
+            "96": 0.15485,
+            "97": 0.1585,
+            "98": 0.17109,
+            "99": 0.15645,
+            "100": 0.15472
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxa100_dracooci-ord.json
new file mode 100644
index 00000000000..46dc9be60a4
--- /dev/null
+++ b/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxa100_dracooci-ord.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.34494,
+            "2": 10.36431,
+            "3": 9.73158,
+            "4": 9.57928,
+            "5": 9.38931,
+            "6": 9.41074,
+            "7": 9.30545,
+            "8": 9.24868,
+            "9": 9.09349,
+            "10": 9.01569,
+            "11": 8.86286,
+            "12": 8.79096,
+            "13": 8.80892,
+            "14": 8.67669,
+            "15": 8.64631,
+            "16": 8.5398,
+            "17": 8.47895,
+            "18": 8.38945,
+            "19": 8.36156,
+            "20": 8.26966,
+            "21": 8.26333,
+            "22": 8.15066,
+            "23": 8.08893,
+            "24": 8.12421,
+            "25": 7.99493,
+            "26": 8.08494,
+            "27": 7.87755,
+            "28": 7.95863,
+            "29": 7.79585,
+            "30": 7.87492,
+            "31": 7.83245,
+            "32": 7.69489,
+            "33": 7.78469,
+            "34": 7.55767,
+            "35": 7.65834,
+            "36": 7.52881,
+            "37": 7.44912,
+            "38": 7.50398,
+            "39": 7.48056,
+            "40": 7.50302,
+            "41": 7.39767,
+            "42": 7.37206,
+            "43": 7.44301,
+            "44": 7.3811,
+            "45": 7.36143,
+            "46": 7.29415,
+            "47": 7.47498,
+            "48": 7.29564,
+            "49": 7.36092,
+            "50": 7.19205,
+            "51": 7.38769,
+            "52": 7.13773,
+            "53": 7.125,
+            "54": 7.23668,
+            "55": 7.16852,
+            "56": 7.22884,
+            "57": 7.34699,
+            "58": 7.03128,
+            "59": 7.1229,
+            "60": 7.16587,
+            "61": 7.1174,
+            "62": 7.26837,
+            "63": 7.16759,
+            "64": 7.08376,
+            "65": 7.00099,
+            "66": 7.07203,
+            "67": 7.05971,
+            "68": 7.14618,
+            "69": 7.03944,
+            "70": 7.07162,
+            "71": 6.91653,
+            "72": 7.02025,
+            "73": 6.9904,
+            "74": 6.9146,
+            "75": 7.07611,
+            "76": 6.97098,
+            "77": 7.08446,
+            "78": 7.03608,
+            "79": 6.88325,
+            "80": 6.95251,
+            "81": 6.985,
+            "82": 7.06843,
+            "83": 7.00882,
+            "84": 7.0181,
+            "85": 6.8641,
+            "86": 7.04979,
+            "87": 6.99342,
+            "88": 6.9238,
+            "89": 6.82406,
+            "90": 7.25457,
+            "91": 6.7226,
+            "92": 7.05372,
+            "93": 6.91688,
+            "94": 7.066,
+            "95": 6.8601,
+            "96": 6.98742,
+            "97": 6.96796,
+            "98": 6.89964,
+            "99": 7.02766,
+            "100": 6.99745
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 43305.0,
+            "2": 44091.0,
+            "3": 44794.0,
+            "4": 42436.0,
+            "5": 45413.0,
+            "6": 40989.0,
+            "7": 43195.0,
+            "8": 45462.0,
+            "9": 42551.0,
+            "10": 45379.0,
+            "11": 44016.0,
+            "12": 44629.0,
+            "13": 43937.0,
+            "14": 46250.0,
+            "15": 43956.0,
+            "16": 41728.0,
+            "17": 43873.0,
+            "18": 44716.0,
+            "19": 42648.0,
+            "20": 44818.0,
+            "21": 44812.0,
+            "22": 41883.0,
+            "23": 45468.0,
+            "24": 43112.0,
+            "25": 42745.0,
+            "26": 43949.0,
+            "27": 46268.0,
+            "28": 46429.0,
+            "29": 46199.0,
+            "30": 44042.0,
+            "31": 41264.0,
+            "32": 43413.0,
+            "33": 45478.0,
+            "34": 43375.0,
+            "35": 43297.0,
+            "36": 42545.0,
+            "37": 40689.0,
+            "38": 42575.0,
+            "39": 44772.0,
+            "40": 43251.0,
+            "41": 44707.0,
+            "42": 43261.0,
+            "43": 45506.0,
+            "44": 44652.0,
+            "45": 43345.0,
+            "46": 43935.0,
+            "47": 42506.0,
+            "48": 44693.0,
+            "49": 43200.0,
+            "50": 43415.0,
+            "51": 41174.0,
+            "52": 43885.0,
+            "53": 43959.0,
+            "54": 41961.0,
+            "55": 43960.0,
+            "56": 43269.0,
+            "57": 42561.0,
+            "58": 43898.0,
+            "59": 44654.0,
+            "60": 41326.0,
+            "61": 39744.0,
+            "62": 44774.0,
+            "63": 44682.0,
+            "64": 45396.0,
+            "65": 44730.0,
+            "66": 45388.0,
+            "67": 43196.0,
+            "68": 42556.0,
+            "69": 43825.0,
+            "70": 45543.0,
+            "71": 43407.0,
+            "72": 44832.0,
+            "73": 45412.0,
+            "74": 42502.0,
+            "75": 44684.0,
+            "76": 43926.0,
+            "77": 42100.0,
+            "78": 40525.0,
+            "79": 38954.0,
+            "80": 41118.0,
+            "81": 45412.0,
+            "82": 43238.0,
+            "83": 38495.0,
+            "84": 42524.0,
+            "85": 44024.0,
+            "86": 45749.0,
+            "87": 41116.0,
+            "88": 41798.0,
+            "89": 41078.0,
+            "90": 44744.0,
+            "91": 46266.0,
+            "92": 41865.0,
+            "93": 43254.0,
+            "94": 39588.0,
+            "95": 44092.0,
+            "96": 44732.0,
+            "97": 45474.0,
+            "98": 41859.0,
+            "99": 45537.0,
+            "100": 42500.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 4158515200.0,
+            "2": 4158515200.0,
+            "3": 4158515200.0,
+            "4": 4158515200.0,
+            "5": 4158515200.0,
+            "6": 4158515200.0,
+            "7": 4158515200.0,
+            "8": 4158515200.0,
+            "9": 4158515200.0,
+            "10": 4158515200.0,
+            "11": 4158515200.0,
+            "12": 4158515200.0,
+            "13": 4158515200.0,
+            "14": 4158515200.0,
+            "15": 4158515200.0,
+            "16": 4158515200.0,
+            "17": 4158515200.0,
+            "18": 4158515200.0,
+            "19": 4158515200.0,
+            "20": 4158515200.0,
+            "21": 4158515200.0,
+            "22": 4158515200.0,
+            "23": 4158515200.0,
+            "24": 4158515200.0,
+            "25": 4158515200.0,
+            "26": 4158515200.0,
+            "27": 4158515200.0,
+            "28": 4158515200.0,
+            "29": 4158515200.0,
+            "30": 4158515200.0,
+            "31": 4158515200.0,
+            "32": 4158515200.0,
+            "33": 4158515200.0,
+            "34": 4158515200.0,
+            "35": 4158515200.0,
+            "36": 4158515200.0,
+            "37": 4158515200.0,
+            "38": 4158515200.0,
+            "39": 4158515200.0,
+            "40": 4158515200.0,
+            "41": 4158515200.0,
+            "42": 4158515200.0,
+            "43": 4158515200.0,
+            "44": 4158515200.0,
+            "45": 4158515200.0,
+            "46": 4158515200.0,
+            "47": 4158515200.0,
+            "48": 4158515200.0,
+            "49": 4158515200.0,
+            "50": 4158515200.0,
+            "51": 4158515200.0,
+            "52": 4158515200.0,
+            "53": 4158515200.0,
+            "54": 4158515200.0,
+            "55": 4158515200.0,
+            "56": 4158515200.0,
+            "57": 4158515200.0,
+            "58": 4158515200.0,
+            "59": 4158515200.0,
+            "60": 4158515200.0,
+            "61": 4158515200.0,
+            "62": 4158515200.0,
+            "63": 4158515200.0,
+            "64": 4158515200.0,
+            "65": 4158515200.0,
+            "66": 4158515200.0,
+            "67": 4158515200.0,
+            "68": 4158515200.0,
+            "69": 4158515200.0,
+            "70": 4158515200.0,
+            "71": 4158515200.0,
+            "72": 4158515200.0,
+            "73": 4158515200.0,
+            "74": 4158515200.0,
+            "75": 4158515200.0,
+            "76": 4158515200.0,
+            "77": 4158515200.0,
+            "78": 4158515200.0,
+            "79": 4158515200.0,
+            "80": 4158515200.0,
+            "81": 4158515200.0,
+            "82": 4158515200.0,
+            "83": 4158515200.0,
+            "84": 4158515200.0,
+            "85": 4158515200.0,
+            "86": 4158515200.0,
+            "87": 4158515200.0,
+            "88": 4158515200.0,
+            "89": 4158515200.0,
+            "90": 4158515200.0,
+            "91": 4158515200.0,
+            "92": 4158515200.0,
+            "93": 4158515200.0,
+            "94": 4158515200.0,
+            "95": 4158515200.0,
+            "96": 4158515200.0,
+            "97": 4158515200.0,
+            "98": 4158515200.0,
+            "99": 4158515200.0,
+            "100": 4158515200.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 4349380608.0,
+            "2": 6185459712.0,
+            "3": 6186508288.0,
+            "4": 6186508288.0,
+            "5": 6186508288.0,
+            "6": 6186508288.0,
+            "7": 6186508288.0,
+            "8": 6186508288.0,
+            "9": 6186508288.0,
+            "10": 6186508288.0,
+            "11": 6186508288.0,
+            "12": 6186508288.0,
+            "13": 6186508288.0,
+            "14": 6186508288.0,
+            "15": 6186508288.0,
+            "16": 6186508288.0,
+            "17": 6186508288.0,
+            "18": 6186508288.0,
+            "19": 6186508288.0,
+            "20": 6186508288.0,
+            "21": 6186508288.0,
+            "22": 6186508288.0,
+            "23": 6186508288.0,
+            "24": 6186508288.0,
+            "25": 6186508288.0,
+            "26": 6186508288.0,
+            "27": 6186508288.0,
+            "28": 6186508288.0,
+            "29": 6186508288.0,
+            "30": 6186508288.0,
+            "31": 6186508288.0,
+            "32": 6186508288.0,
+            "33": 6186508288.0,
+            "34": 6186508288.0,
+            "35": 6186508288.0,
+            "36": 6186508288.0,
+            "37": 6186508288.0,
+            "38": 6186508288.0,
+            "39": 6186508288.0,
+            "40": 6186508288.0,
+            "41": 6186508288.0,
+            "42": 6186508288.0,
+            "43": 6186508288.0,
+            "44": 6186508288.0,
+            "45": 6186508288.0,
+            "46": 6186508288.0,
+            "47": 6186508288.0,
+            "48": 6186508288.0,
+            "49": 6186508288.0,
+            "50": 6186508288.0,
+            "51": 6186508288.0,
+            "52": 6186508288.0,
+            "53": 6186508288.0,
+            "54": 6186508288.0,
+            "55": 6186508288.0,
+            "56": 6186508288.0,
+            "57": 6186508288.0,
+            "58": 6186508288.0,
+            "59": 6186508288.0,
+            "60": 6186508288.0,
+            "61": 6186508288.0,
+            "62": 6186508288.0,
+            "63": 6186508288.0,
+            "64": 6186508288.0,
+            "65": 6186508288.0,
+            "66": 6186508288.0,
+            "67": 6186508288.0,
+            "68": 6186508288.0,
+            "69": 6186508288.0,
+            "70": 6186508288.0,
+            "71": 6186508288.0,
+            "72": 6186508288.0,
+            "73": 6186508288.0,
+            "74": 6186508288.0,
+            "75": 6186508288.0,
+            "76": 6186508288.0,
+            "77": 6186508288.0,
+            "78": 6186508288.0,
+            "79": 6186508288.0,
+            "80": 6186508288.0,
+            "81": 6186508288.0,
+            "82": 6186508288.0,
+            "83": 6186508288.0,
+            "84": 6186508288.0,
+            "85": 6186508288.0,
+            "86": 6186508288.0,
+            "87": 6186508288.0,
+            "88": 6186508288.0,
+            "89": 6186508288.0,
+            "90": 6186508288.0,
+            "91": 6186508288.0,
+            "92": 6186508288.0,
+            "93": 6186508288.0,
+            "94": 6186508288.0,
+            "95": 6186508288.0,
+            "96": 6186508288.0,
+            "97": 6186508288.0,
+            "98": 6186508288.0,
+            "99": 6186508288.0,
+            "100": 6186508288.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 8.21684,
+            "2": 0.37772,
+            "3": 0.23303,
+            "4": 0.23009,
+            "5": 0.22929,
+            "6": 0.22867,
+            "7": 0.22881,
+            "8": 0.22909,
+            "9": 0.22901,
+            "10": 0.22924,
+            "11": 0.23187,
+            "12": 0.22897,
+            "13": 0.23042,
+            "14": 0.2296,
+            "15": 0.22858,
+            "16": 0.22859,
+            "17": 0.22788,
+            "18": 0.22827,
+            "19": 0.22884,
+            "20": 0.23119,
+            "21": 0.23125,
+            "22": 0.22876,
+            "23": 0.22795,
+            "24": 0.22894,
+            "25": 0.22857,
+            "26": 0.22882,
+            "27": 0.22865,
+            "28": 0.22894,
+            "29": 0.22835,
+            "30": 0.23042,
+            "31": 0.22904,
+            "32": 0.23034,
+            "33": 0.22865,
+            "34": 0.22876,
+            "35": 0.22767,
+            "36": 0.23145,
+            "37": 0.22819,
+            "38": 0.22929,
+            "39": 0.23937,
+            "40": 0.23013,
+            "41": 0.23989,
+            "42": 0.25348,
+            "43": 0.23486,
+            "44": 0.23088,
+            "45": 0.23068,
+            "46": 0.22861,
+            "47": 0.22901,
+            "48": 0.23829,
+            "49": 0.23037,
+            "50": 0.23633,
+            "51": 0.23085,
+            "52": 0.22798,
+            "53": 0.22797,
+            "54": 0.22841,
+            "55": 0.23845,
+            "56": 0.2312,
+            "57": 0.23463,
+            "58": 0.23191,
+            "59": 0.23051,
+            "60": 0.23189,
+            "61": 0.23338,
+            "62": 0.2342,
+            "63": 0.24812,
+            "64": 0.23433,
+            "65": 0.23118,
+            "66": 0.23175,
+            "67": 0.2309,
+            "68": 0.23178,
+            "69": 0.23371,
+            "70": 0.24569,
+            "71": 0.23723,
+            "72": 0.23422,
+            "73": 0.23146,
+            "74": 0.23179,
+            "75": 0.23182,
+            "76": 0.23205,
+            "77": 0.23407,
+            "78": 0.23174,
+            "79": 0.23271,
+            "80": 0.23234,
+            "81": 0.23065,
+            "82": 0.23148,
+            "83": 0.23229,
+            "84": 0.23128,
+            "85": 0.23341,
+            "86": 0.23319,
+            "87": 0.23195,
+            "88": 0.23228,
+            "89": 0.23287,
+            "90": 0.2318,
+            "91": 0.23237,
+            "92": 0.23164,
+            "93": 0.2304,
+            "94": 0.23017,
+            "95": 0.23214,
+            "96": 0.23143,
+            "97": 0.23171,
+            "98": 0.23065,
+            "99": 0.23302,
+            "100": 0.23775
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxa100_dracooci.json b/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxa100_dracooci.json
new file mode 100644
index 00000000000..80c9681e5c3
--- /dev/null
+++ b/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxa100_dracooci.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.34494,
+            "2": 10.36431,
+            "3": 9.73158,
+            "4": 9.57928,
+            "5": 9.38931,
+            "6": 9.41074,
+            "7": 9.30545,
+            "8": 9.24868,
+            "9": 9.09349,
+            "10": 9.01569,
+            "11": 8.86286,
+            "12": 8.79096,
+            "13": 8.80892,
+            "14": 8.67669,
+            "15": 8.64631,
+            "16": 8.5398,
+            "17": 8.47895,
+            "18": 8.38945,
+            "19": 8.36156,
+            "20": 8.26966,
+            "21": 8.26333,
+            "22": 8.15066,
+            "23": 8.08893,
+            "24": 8.12421,
+            "25": 7.99493,
+            "26": 8.08494,
+            "27": 7.87755,
+            "28": 7.95863,
+            "29": 7.79585,
+            "30": 7.87492,
+            "31": 7.83245,
+            "32": 7.69489,
+            "33": 7.78469,
+            "34": 7.55767,
+            "35": 7.65834,
+            "36": 7.52881,
+            "37": 7.44912,
+            "38": 7.50398,
+            "39": 7.48056,
+            "40": 7.50302,
+            "41": 7.39767,
+            "42": 7.37206,
+            "43": 7.44301,
+            "44": 7.3811,
+            "45": 7.36143,
+            "46": 7.29415,
+            "47": 7.47498,
+            "48": 7.29564,
+            "49": 7.36092,
+            "50": 7.19205,
+            "51": 7.38769,
+            "52": 7.13773,
+            "53": 7.125,
+            "54": 7.23668,
+            "55": 7.16852,
+            "56": 7.22884,
+            "57": 7.34699,
+            "58": 7.03128,
+            "59": 7.1229,
+            "60": 7.16587,
+            "61": 7.1174,
+            "62": 7.26837,
+            "63": 7.16759,
+            "64": 7.08376,
+            "65": 7.00099,
+            "66": 7.07203,
+            "67": 7.05971,
+            "68": 7.14618,
+            "69": 7.03944,
+            "70": 7.07162,
+            "71": 6.91653,
+            "72": 7.02025,
+            "73": 6.9904,
+            "74": 6.9146,
+            "75": 7.07611,
+            "76": 6.97098,
+            "77": 7.08446,
+            "78": 7.03608,
+            "79": 6.88325,
+            "80": 6.95251,
+            "81": 6.985,
+            "82": 7.06843,
+            "83": 7.00882,
+            "84": 7.0181,
+            "85": 6.8641,
+            "86": 7.04979,
+            "87": 6.99342,
+            "88": 6.9238,
+            "89": 6.82406,
+            "90": 7.25457,
+            "91": 6.7226,
+            "92": 7.05372,
+            "93": 6.91688,
+            "94": 7.066,
+            "95": 6.8601,
+            "96": 6.98742,
+            "97": 6.96796,
+            "98": 6.89964,
+            "99": 7.02766,
+            "100": 6.99745
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 43305.0,
+            "2": 44091.0,
+            "3": 44794.0,
+            "4": 42436.0,
+            "5": 45413.0,
+            "6": 40989.0,
+            "7": 43195.0,
+            "8": 45462.0,
+            "9": 42551.0,
+            "10": 45379.0,
+            "11": 44016.0,
+            "12": 44629.0,
+            "13": 43937.0,
+            "14": 46250.0,
+            "15": 43956.0,
+            "16": 41728.0,
+            "17": 43873.0,
+            "18": 44716.0,
+            "19": 42648.0,
+            "20": 44818.0,
+            "21": 44812.0,
+            "22": 41883.0,
+            "23": 45468.0,
+            "24": 43112.0,
+            "25": 42745.0,
+            "26": 43949.0,
+            "27": 46268.0,
+            "28": 46429.0,
+            "29": 46199.0,
+            "30": 44042.0,
+            "31": 41264.0,
+            "32": 43413.0,
+            "33": 45478.0,
+            "34": 43375.0,
+            "35": 43297.0,
+            "36": 42545.0,
+            "37": 40689.0,
+            "38": 42575.0,
+            "39": 44772.0,
+            "40": 43251.0,
+            "41": 44707.0,
+            "42": 43261.0,
+            "43": 45506.0,
+            "44": 44652.0,
+            "45": 43345.0,
+            "46": 43935.0,
+            "47": 42506.0,
+            "48": 44693.0,
+            "49": 43200.0,
+            "50": 43415.0,
+            "51": 41174.0,
+            "52": 43885.0,
+            "53": 43959.0,
+            "54": 41961.0,
+            "55": 43960.0,
+            "56": 43269.0,
+            "57": 42561.0,
+            "58": 43898.0,
+            "59": 44654.0,
+            "60": 41326.0,
+            "61": 39744.0,
+            "62": 44774.0,
+            "63": 44682.0,
+            "64": 45396.0,
+            "65": 44730.0,
+            "66": 45388.0,
+            "67": 43196.0,
+            "68": 42556.0,
+            "69": 43825.0,
+            "70": 45543.0,
+            "71": 43407.0,
+            "72": 44832.0,
+            "73": 45412.0,
+            "74": 42502.0,
+            "75": 44684.0,
+            "76": 43926.0,
+            "77": 42100.0,
+            "78": 40525.0,
+            "79": 38954.0,
+            "80": 41118.0,
+            "81": 45412.0,
+            "82": 43238.0,
+            "83": 38495.0,
+            "84": 42524.0,
+            "85": 44024.0,
+            "86": 45749.0,
+            "87": 41116.0,
+            "88": 41798.0,
+            "89": 41078.0,
+            "90": 44744.0,
+            "91": 46266.0,
+            "92": 41865.0,
+            "93": 43254.0,
+            "94": 39588.0,
+            "95": 44092.0,
+            "96": 44732.0,
+            "97": 45474.0,
+            "98": 41859.0,
+            "99": 45537.0,
+            "100": 42500.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 4158515200.0,
+            "2": 4158515200.0,
+            "3": 4158515200.0,
+            "4": 4158515200.0,
+            "5": 4158515200.0,
+            "6": 4158515200.0,
+            "7": 4158515200.0,
+            "8": 4158515200.0,
+            "9": 4158515200.0,
+            "10": 4158515200.0,
+            "11": 4158515200.0,
+            "12": 4158515200.0,
+            "13": 4158515200.0,
+            "14": 4158515200.0,
+            "15": 4158515200.0,
+            "16": 4158515200.0,
+            "17": 4158515200.0,
+            "18": 4158515200.0,
+            "19": 4158515200.0,
+            "20": 4158515200.0,
+            "21": 4158515200.0,
+            "22": 4158515200.0,
+            "23": 4158515200.0,
+            "24": 4158515200.0,
+            "25": 4158515200.0,
+            "26": 4158515200.0,
+            "27": 4158515200.0,
+            "28": 4158515200.0,
+            "29": 4158515200.0,
+            "30": 4158515200.0,
+            "31": 4158515200.0,
+            "32": 4158515200.0,
+            "33": 4158515200.0,
+            "34": 4158515200.0,
+            "35": 4158515200.0,
+            "36": 4158515200.0,
+            "37": 4158515200.0,
+            "38": 4158515200.0,
+            "39": 4158515200.0,
+            "40": 4158515200.0,
+            "41": 4158515200.0,
+            "42": 4158515200.0,
+            "43": 4158515200.0,
+            "44": 4158515200.0,
+            "45": 4158515200.0,
+            "46": 4158515200.0,
+            "47": 4158515200.0,
+            "48": 4158515200.0,
+            "49": 4158515200.0,
+            "50": 4158515200.0,
+            "51": 4158515200.0,
+            "52": 4158515200.0,
+            "53": 4158515200.0,
+            "54": 4158515200.0,
+            "55": 4158515200.0,
+            "56": 4158515200.0,
+            "57": 4158515200.0,
+            "58": 4158515200.0,
+            "59": 4158515200.0,
+            "60": 4158515200.0,
+            "61": 4158515200.0,
+            "62": 4158515200.0,
+            "63": 4158515200.0,
+            "64": 4158515200.0,
+            "65": 4158515200.0,
+            "66": 4158515200.0,
+            "67": 4158515200.0,
+            "68": 4158515200.0,
+            "69": 4158515200.0,
+            "70": 4158515200.0,
+            "71": 4158515200.0,
+            "72": 4158515200.0,
+            "73": 4158515200.0,
+            "74": 4158515200.0,
+            "75": 4158515200.0,
+            "76": 4158515200.0,
+            "77": 4158515200.0,
+            "78": 4158515200.0,
+            "79": 4158515200.0,
+            "80": 4158515200.0,
+            "81": 4158515200.0,
+            "82": 4158515200.0,
+            "83": 4158515200.0,
+            "84": 4158515200.0,
+            "85": 4158515200.0,
+            "86": 4158515200.0,
+            "87": 4158515200.0,
+            "88": 4158515200.0,
+            "89": 4158515200.0,
+            "90": 4158515200.0,
+            "91": 4158515200.0,
+            "92": 4158515200.0,
+            "93": 4158515200.0,
+            "94": 4158515200.0,
+            "95": 4158515200.0,
+            "96": 4158515200.0,
+            "97": 4158515200.0,
+            "98": 4158515200.0,
+            "99": 4158515200.0,
+            "100": 4158515200.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 4349380608.0,
+            "2": 6185459712.0,
+            "3": 6187556864.0,
+            "4": 6187556864.0,
+            "5": 6187556864.0,
+            "6": 6187556864.0,
+            "7": 6187556864.0,
+            "8": 6187556864.0,
+            "9": 6187556864.0,
+            "10": 6187556864.0,
+            "11": 6187556864.0,
+            "12": 6187556864.0,
+            "13": 6187556864.0,
+            "14": 6187556864.0,
+            "15": 6187556864.0,
+            "16": 6187556864.0,
+            "17": 6187556864.0,
+            "18": 6187556864.0,
+            "19": 6187556864.0,
+            "20": 6187556864.0,
+            "21": 6187556864.0,
+            "22": 6187556864.0,
+            "23": 6187556864.0,
+            "24": 6187556864.0,
+            "25": 6187556864.0,
+            "26": 6187556864.0,
+            "27": 6187556864.0,
+            "28": 6187556864.0,
+            "29": 6187556864.0,
+            "30": 6187556864.0,
+            "31": 6187556864.0,
+            "32": 6187556864.0,
+            "33": 6187556864.0,
+            "34": 6187556864.0,
+            "35": 6187556864.0,
+            "36": 6187556864.0,
+            "37": 6187556864.0,
+            "38": 6187556864.0,
+            "39": 6187556864.0,
+            "40": 6187556864.0,
+            "41": 6187556864.0,
+            "42": 6187556864.0,
+            "43": 6187556864.0,
+            "44": 6187556864.0,
+            "45": 6187556864.0,
+            "46": 6187556864.0,
+            "47": 6187556864.0,
+            "48": 6187556864.0,
+            "49": 6187556864.0,
+            "50": 6187556864.0,
+            "51": 6187556864.0,
+            "52": 6187556864.0,
+            "53": 6187556864.0,
+            "54": 6187556864.0,
+            "55": 6187556864.0,
+            "56": 6187556864.0,
+            "57": 6187556864.0,
+            "58": 6187556864.0,
+            "59": 6187556864.0,
+            "60": 6187556864.0,
+            "61": 6187556864.0,
+            "62": 6187556864.0,
+            "63": 6187556864.0,
+            "64": 6187556864.0,
+            "65": 6187556864.0,
+            "66": 6187556864.0,
+            "67": 6187556864.0,
+            "68": 6187556864.0,
+            "69": 6187556864.0,
+            "70": 6187556864.0,
+            "71": 6187556864.0,
+            "72": 6187556864.0,
+            "73": 6187556864.0,
+            "74": 6187556864.0,
+            "75": 6187556864.0,
+            "76": 6187556864.0,
+            "77": 6187556864.0,
+            "78": 6187556864.0,
+            "79": 6187556864.0,
+            "80": 6187556864.0,
+            "81": 6187556864.0,
+            "82": 6187556864.0,
+            "83": 6187556864.0,
+            "84": 6187556864.0,
+            "85": 6187556864.0,
+            "86": 6187556864.0,
+            "87": 6187556864.0,
+            "88": 6187556864.0,
+            "89": 6187556864.0,
+            "90": 6187556864.0,
+            "91": 6187556864.0,
+            "92": 6187556864.0,
+            "93": 6187556864.0,
+            "94": 6187556864.0,
+            "95": 6187556864.0,
+            "96": 6187556864.0,
+            "97": 6187556864.0,
+            "98": 6187556864.0,
+            "99": 6187556864.0,
+            "100": 6187556864.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 6.32163,
+            "2": 0.38506,
+            "3": 0.23264,
+            "4": 0.23088,
+            "5": 0.23265,
+            "6": 0.23173,
+            "7": 0.23126,
+            "8": 0.23038,
+            "9": 0.23084,
+            "10": 0.23209,
+            "11": 0.23149,
+            "12": 0.23231,
+            "13": 0.23319,
+            "14": 0.22867,
+            "15": 0.22812,
+            "16": 0.22793,
+            "17": 0.22839,
+            "18": 0.22788,
+            "19": 0.22802,
+            "20": 0.22831,
+            "21": 0.22863,
+            "22": 0.22778,
+            "23": 0.22775,
+            "24": 0.2276,
+            "25": 0.22851,
+            "26": 0.22788,
+            "27": 0.22874,
+            "28": 0.22765,
+            "29": 0.2281,
+            "30": 0.2293,
+            "31": 0.22952,
+            "32": 0.22888,
+            "33": 0.22916,
+            "34": 0.22869,
+            "35": 0.22859,
+            "36": 0.22919,
+            "37": 0.22959,
+            "38": 0.22853,
+            "39": 0.22896,
+            "40": 0.22961,
+            "41": 0.22873,
+            "42": 0.22928,
+            "43": 0.22982,
+            "44": 0.22937,
+            "45": 0.22999,
+            "46": 0.22841,
+            "47": 0.23003,
+            "48": 0.22906,
+            "49": 0.23037,
+            "50": 0.22982,
+            "51": 0.23126,
+            "52": 0.22892,
+            "53": 0.23322,
+            "54": 0.22861,
+            "55": 0.23475,
+            "56": 0.22765,
+            "57": 0.23073,
+            "58": 0.22912,
+            "59": 0.23304,
+            "60": 0.23302,
+            "61": 0.23295,
+            "62": 0.23275,
+            "63": 0.23408,
+            "64": 0.234,
+            "65": 0.23292,
+            "66": 0.22871,
+            "67": 0.23056,
+            "68": 0.22829,
+            "69": 0.23494,
+            "70": 0.22853,
+            "71": 0.23538,
+            "72": 0.23311,
+            "73": 0.23976,
+            "74": 0.23226,
+            "75": 0.22923,
+            "76": 0.23951,
+            "77": 0.23749,
+            "78": 0.22838,
+            "79": 0.22723,
+            "80": 0.22612,
+            "81": 0.22628,
+            "82": 0.22606,
+            "83": 0.22681,
+            "84": 0.23292,
+            "85": 0.22707,
+            "86": 0.22686,
+            "87": 0.22866,
+            "88": 0.22831,
+            "89": 0.22841,
+            "90": 0.2279,
+            "91": 0.22948,
+            "92": 0.22866,
+            "93": 0.22908,
+            "94": 0.2282,
+            "95": 0.22949,
+            "96": 0.22803,
+            "97": 0.22905,
+            "98": 0.22804,
+            "99": 0.22947,
+            "100": 0.22895
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..f1c0511f9d6
--- /dev/null
+++ b/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.33127,
+            "2": 10.35281,
+            "3": 9.79613,
+            "4": 9.60968,
+            "5": 9.42269,
+            "6": 9.45137,
+            "7": 9.34348,
+            "8": 9.27525,
+            "9": 9.09676,
+            "10": 9.0722,
+            "11": 8.8835,
+            "12": 8.83711,
+            "13": 8.86836,
+            "14": 8.71039,
+            "15": 8.68191,
+            "16": 8.56149,
+            "17": 8.52311,
+            "18": 8.43963,
+            "19": 8.40439,
+            "20": 8.29506,
+            "21": 8.27059,
+            "22": 8.17902,
+            "23": 8.12669,
+            "24": 8.14846,
+            "25": 7.9909,
+            "26": 8.12216,
+            "27": 7.90453,
+            "28": 7.98655,
+            "29": 7.80845,
+            "30": 7.86918,
+            "31": 7.83571,
+            "32": 7.72178,
+            "33": 7.80378,
+            "34": 7.59229,
+            "35": 7.68371,
+            "36": 7.53883,
+            "37": 7.47609,
+            "38": 7.5168,
+            "39": 7.49978,
+            "40": 7.51704,
+            "41": 7.43174,
+            "42": 7.40104,
+            "43": 7.44926,
+            "44": 7.38919,
+            "45": 7.38016,
+            "46": 7.29476,
+            "47": 7.44829,
+            "48": 7.28213,
+            "49": 7.34657,
+            "50": 7.17116,
+            "51": 7.37361,
+            "52": 7.13381,
+            "53": 7.11244,
+            "54": 7.23402,
+            "55": 7.14785,
+            "56": 7.22775,
+            "57": 7.33273,
+            "58": 6.99461,
+            "59": 7.11599,
+            "60": 7.13222,
+            "61": 7.1056,
+            "62": 7.26513,
+            "63": 7.14772,
+            "64": 7.08696,
+            "65": 6.98643,
+            "66": 7.04728,
+            "67": 7.04697,
+            "68": 7.14062,
+            "69": 7.2435,
+            "70": 7.05957,
+            "71": 6.89356,
+            "72": 6.99769,
+            "73": 6.97897,
+            "74": 6.91983,
+            "75": 7.05297,
+            "76": 6.96036,
+            "77": 7.0791,
+            "78": 7.01392,
+            "79": 6.88358,
+            "80": 6.93014,
+            "81": 6.96553,
+            "82": 7.05265,
+            "83": 6.98788,
+            "84": 7.00427,
+            "85": 6.84577,
+            "86": 7.03621,
+            "87": 6.96327,
+            "88": 6.9137,
+            "89": 6.80631,
+            "90": 7.23619,
+            "91": 6.70015,
+            "92": 7.05679,
+            "93": 6.89287,
+            "94": 7.05835,
+            "95": 6.84786,
+            "96": 6.96771,
+            "97": 6.94258,
+            "98": 6.87388,
+            "99": 7.01816,
+            "100": 6.98466
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 43334.0,
+            "2": 44100.0,
+            "3": 44771.0,
+            "4": 42457.0,
+            "5": 45411.0,
+            "6": 40966.0,
+            "7": 43193.0,
+            "8": 45457.0,
+            "9": 42550.0,
+            "10": 45360.0,
+            "11": 44029.0,
+            "12": 44605.0,
+            "13": 43917.0,
+            "14": 46219.0,
+            "15": 43943.0,
+            "16": 41732.0,
+            "17": 43861.0,
+            "18": 44721.0,
+            "19": 42597.0,
+            "20": 44797.0,
+            "21": 44792.0,
+            "22": 41891.0,
+            "23": 45473.0,
+            "24": 43081.0,
+            "25": 42682.0,
+            "26": 43950.0,
+            "27": 46253.0,
+            "28": 46447.0,
+            "29": 46164.0,
+            "30": 44042.0,
+            "31": 41263.0,
+            "32": 43440.0,
+            "33": 45483.0,
+            "34": 43349.0,
+            "35": 43273.0,
+            "36": 42490.0,
+            "37": 40647.0,
+            "38": 42549.0,
+            "39": 44766.0,
+            "40": 43281.0,
+            "41": 44669.0,
+            "42": 43287.0,
+            "43": 45454.0,
+            "44": 44627.0,
+            "45": 43353.0,
+            "46": 43925.0,
+            "47": 42498.0,
+            "48": 44758.0,
+            "49": 43173.0,
+            "50": 43402.0,
+            "51": 41198.0,
+            "52": 43900.0,
+            "53": 43938.0,
+            "54": 41922.0,
+            "55": 43916.0,
+            "56": 43237.0,
+            "57": 42634.0,
+            "58": 43916.0,
+            "59": 44616.0,
+            "60": 41414.0,
+            "61": 39759.0,
+            "62": 44750.0,
+            "63": 44673.0,
+            "64": 45378.0,
+            "65": 44765.0,
+            "66": 45401.0,
+            "67": 43155.0,
+            "68": 42552.0,
+            "69": 43831.0,
+            "70": 45546.0,
+            "71": 43332.0,
+            "72": 44847.0,
+            "73": 45376.0,
+            "74": 42503.0,
+            "75": 44704.0,
+            "76": 43916.0,
+            "77": 42101.0,
+            "78": 40543.0,
+            "79": 38997.0,
+            "80": 41079.0,
+            "81": 45377.0,
+            "82": 43254.0,
+            "83": 38473.0,
+            "84": 42420.0,
+            "85": 43989.0,
+            "86": 45694.0,
+            "87": 41164.0,
+            "88": 41773.0,
+            "89": 41047.0,
+            "90": 44710.0,
+            "91": 46274.0,
+            "92": 41823.0,
+            "93": 43286.0,
+            "94": 39530.0,
+            "95": 44074.0,
+            "96": 44686.0,
+            "97": 45424.0,
+            "98": 41849.0,
+            "99": 45567.0,
+            "100": 42485.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 4158515200.0,
+            "2": 4158515200.0,
+            "3": 4158515200.0,
+            "4": 4158515200.0,
+            "5": 4158515200.0,
+            "6": 4158515200.0,
+            "7": 4158515200.0,
+            "8": 4158515200.0,
+            "9": 4158515200.0,
+            "10": 4158515200.0,
+            "11": 4158515200.0,
+            "12": 4158515200.0,
+            "13": 4158515200.0,
+            "14": 4158515200.0,
+            "15": 4158515200.0,
+            "16": 4158515200.0,
+            "17": 4158515200.0,
+            "18": 4158515200.0,
+            "19": 4158515200.0,
+            "20": 4158515200.0,
+            "21": 4158515200.0,
+            "22": 4158515200.0,
+            "23": 4158515200.0,
+            "24": 4158515200.0,
+            "25": 4158515200.0,
+            "26": 4158515200.0,
+            "27": 4158515200.0,
+            "28": 4158515200.0,
+            "29": 4158515200.0,
+            "30": 4158515200.0,
+            "31": 4158515200.0,
+            "32": 4158515200.0,
+            "33": 4158515200.0,
+            "34": 4158515200.0,
+            "35": 4158515200.0,
+            "36": 4158515200.0,
+            "37": 4158515200.0,
+            "38": 4158515200.0,
+            "39": 4158515200.0,
+            "40": 4158515200.0,
+            "41": 4158515200.0,
+            "42": 4158515200.0,
+            "43": 4158515200.0,
+            "44": 4158515200.0,
+            "45": 4158515200.0,
+            "46": 4158515200.0,
+            "47": 4158515200.0,
+            "48": 4158515200.0,
+            "49": 4158515200.0,
+            "50": 4158515200.0,
+            "51": 4158515200.0,
+            "52": 4158515200.0,
+            "53": 4158515200.0,
+            "54": 4158515200.0,
+            "55": 4158515200.0,
+            "56": 4158515200.0,
+            "57": 4158515200.0,
+            "58": 4158515200.0,
+            "59": 4158515200.0,
+            "60": 4158515200.0,
+            "61": 4158515200.0,
+            "62": 4158515200.0,
+            "63": 4158515200.0,
+            "64": 4158515200.0,
+            "65": 4158515200.0,
+            "66": 4158515200.0,
+            "67": 4158515200.0,
+            "68": 4158515200.0,
+            "69": 4158515200.0,
+            "70": 4158515200.0,
+            "71": 4158515200.0,
+            "72": 4158515200.0,
+            "73": 4158515200.0,
+            "74": 4158515200.0,
+            "75": 4158515200.0,
+            "76": 4158515200.0,
+            "77": 4158515200.0,
+            "78": 4158515200.0,
+            "79": 4158515200.0,
+            "80": 4158515200.0,
+            "81": 4158515200.0,
+            "82": 4158515200.0,
+            "83": 4158515200.0,
+            "84": 4158515200.0,
+            "85": 4158515200.0,
+            "86": 4158515200.0,
+            "87": 4158515200.0,
+            "88": 4158515200.0,
+            "89": 4158515200.0,
+            "90": 4158515200.0,
+            "91": 4158515200.0,
+            "92": 4158515200.0,
+            "93": 4158515200.0,
+            "94": 4158515200.0,
+            "95": 4158515200.0,
+            "96": 4158515200.0,
+            "97": 4158515200.0,
+            "98": 4158515200.0,
+            "99": 4158515200.0,
+            "100": 4158515200.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 4349380608.0,
+            "2": 6185459712.0,
+            "3": 6187556864.0,
+            "4": 6187556864.0,
+            "5": 6187556864.0,
+            "6": 6187556864.0,
+            "7": 6187556864.0,
+            "8": 6187556864.0,
+            "9": 6187556864.0,
+            "10": 6187556864.0,
+            "11": 6187556864.0,
+            "12": 6187556864.0,
+            "13": 6187556864.0,
+            "14": 6187556864.0,
+            "15": 6187556864.0,
+            "16": 6187556864.0,
+            "17": 6187556864.0,
+            "18": 6187556864.0,
+            "19": 6187556864.0,
+            "20": 6187556864.0,
+            "21": 6187556864.0,
+            "22": 6187556864.0,
+            "23": 6187556864.0,
+            "24": 6187556864.0,
+            "25": 6187556864.0,
+            "26": 6187556864.0,
+            "27": 6187556864.0,
+            "28": 6187556864.0,
+            "29": 6187556864.0,
+            "30": 6187556864.0,
+            "31": 6187556864.0,
+            "32": 6187556864.0,
+            "33": 6187556864.0,
+            "34": 6187556864.0,
+            "35": 6187556864.0,
+            "36": 6187556864.0,
+            "37": 6187556864.0,
+            "38": 6187556864.0,
+            "39": 6187556864.0,
+            "40": 6187556864.0,
+            "41": 6187556864.0,
+            "42": 6187556864.0,
+            "43": 6187556864.0,
+            "44": 6187556864.0,
+            "45": 6187556864.0,
+            "46": 6187556864.0,
+            "47": 6187556864.0,
+            "48": 6187556864.0,
+            "49": 6187556864.0,
+            "50": 6187556864.0,
+            "51": 6187556864.0,
+            "52": 6187556864.0,
+            "53": 6187556864.0,
+            "54": 6187556864.0,
+            "55": 6187556864.0,
+            "56": 6187556864.0,
+            "57": 6187556864.0,
+            "58": 6187556864.0,
+            "59": 6187556864.0,
+            "60": 6187556864.0,
+            "61": 6187556864.0,
+            "62": 6187556864.0,
+            "63": 6187556864.0,
+            "64": 6187556864.0,
+            "65": 6187556864.0,
+            "66": 6187556864.0,
+            "67": 6187556864.0,
+            "68": 6187556864.0,
+            "69": 6187556864.0,
+            "70": 6187556864.0,
+            "71": 6187556864.0,
+            "72": 6187556864.0,
+            "73": 6187556864.0,
+            "74": 6187556864.0,
+            "75": 6187556864.0,
+            "76": 6187556864.0,
+            "77": 6187556864.0,
+            "78": 6187556864.0,
+            "79": 6187556864.0,
+            "80": 6187556864.0,
+            "81": 6187556864.0,
+            "82": 6187556864.0,
+            "83": 6187556864.0,
+            "84": 6187556864.0,
+            "85": 6187556864.0,
+            "86": 6187556864.0,
+            "87": 6187556864.0,
+            "88": 6187556864.0,
+            "89": 6187556864.0,
+            "90": 6187556864.0,
+            "91": 6187556864.0,
+            "92": 6187556864.0,
+            "93": 6187556864.0,
+            "94": 6187556864.0,
+            "95": 6187556864.0,
+            "96": 6187556864.0,
+            "97": 6187556864.0,
+            "98": 6187556864.0,
+            "99": 6187556864.0,
+            "100": 6187556864.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 7.44745,
+            "2": 0.28877,
+            "3": 0.13863,
+            "4": 0.13991,
+            "5": 0.1386,
+            "6": 0.1688,
+            "7": 0.13897,
+            "8": 0.14655,
+            "9": 0.14408,
+            "10": 0.14011,
+            "11": 0.14086,
+            "12": 0.13894,
+            "13": 0.13997,
+            "14": 0.15002,
+            "15": 0.14424,
+            "16": 0.14057,
+            "17": 0.13971,
+            "18": 0.14204,
+            "19": 0.13911,
+            "20": 0.13847,
+            "21": 0.1511,
+            "22": 0.1466,
+            "23": 0.13965,
+            "24": 0.13912,
+            "25": 0.1401,
+            "26": 0.13945,
+            "27": 0.13889,
+            "28": 0.14975,
+            "29": 0.14768,
+            "30": 0.14096,
+            "31": 0.1397,
+            "32": 0.13848,
+            "33": 0.14003,
+            "34": 0.13906,
+            "35": 0.15106,
+            "36": 0.14946,
+            "37": 0.13936,
+            "38": 0.13863,
+            "39": 0.13854,
+            "40": 0.13912,
+            "41": 0.13768,
+            "42": 0.16204,
+            "43": 0.14058,
+            "44": 0.14047,
+            "45": 0.14051,
+            "46": 0.13844,
+            "47": 0.14085,
+            "48": 0.14712,
+            "49": 0.14538,
+            "50": 0.14262,
+            "51": 0.14224,
+            "52": 0.14099,
+            "53": 0.14182,
+            "54": 0.14142,
+            "55": 0.14151,
+            "56": 0.17071,
+            "57": 0.16514,
+            "58": 0.14109,
+            "59": 0.14613,
+            "60": 0.13996,
+            "61": 0.1438,
+            "62": 0.1439,
+            "63": 0.1704,
+            "64": 0.17016,
+            "65": 0.14013,
+            "66": 0.1408,
+            "67": 0.14073,
+            "68": 0.14112,
+            "69": 0.14885,
+            "70": 0.15051,
+            "71": 0.1459,
+            "72": 0.14741,
+            "73": 0.14647,
+            "74": 0.14559,
+            "75": 0.14518,
+            "76": 0.14651,
+            "77": 0.18065,
+            "78": 0.17614,
+            "79": 0.14661,
+            "80": 0.14187,
+            "81": 0.14198,
+            "82": 0.13988,
+            "83": 0.14058,
+            "84": 0.14152,
+            "85": 0.14263,
+            "86": 0.14317,
+            "87": 0.14179,
+            "88": 0.14281,
+            "89": 0.13999,
+            "90": 0.14469,
+            "91": 0.142,
+            "92": 0.14198,
+            "93": 0.14441,
+            "94": 0.14544,
+            "95": 0.14559,
+            "96": 0.14352,
+            "97": 0.14163,
+            "98": 0.14642,
+            "99": 0.14323,
+            "100": 0.14598
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..e0a55371afb
--- /dev/null
+++ b/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.33127,
+            "2": 10.35281,
+            "3": 9.79613,
+            "4": 9.60968,
+            "5": 9.42269,
+            "6": 9.45137,
+            "7": 9.34348,
+            "8": 9.27525,
+            "9": 9.09676,
+            "10": 9.0722,
+            "11": 8.8835,
+            "12": 8.83711,
+            "13": 8.86836,
+            "14": 8.71039,
+            "15": 8.68191,
+            "16": 8.56149,
+            "17": 8.52311,
+            "18": 8.43963,
+            "19": 8.40439,
+            "20": 8.29506,
+            "21": 8.27059,
+            "22": 8.17902,
+            "23": 8.12669,
+            "24": 8.14846,
+            "25": 7.9909,
+            "26": 8.12216,
+            "27": 7.90453,
+            "28": 7.98655,
+            "29": 7.80845,
+            "30": 7.86918,
+            "31": 7.83571,
+            "32": 7.72178,
+            "33": 7.80378,
+            "34": 7.59229,
+            "35": 7.68371,
+            "36": 7.53883,
+            "37": 7.47609,
+            "38": 7.5168,
+            "39": 7.49978,
+            "40": 7.51704,
+            "41": 7.43174,
+            "42": 7.40104,
+            "43": 7.44926,
+            "44": 7.38919,
+            "45": 7.38016,
+            "46": 7.29476,
+            "47": 7.44829,
+            "48": 7.28213,
+            "49": 7.34657,
+            "50": 7.17116,
+            "51": 7.37361,
+            "52": 7.13381,
+            "53": 7.11244,
+            "54": 7.23402,
+            "55": 7.14785,
+            "56": 7.22775,
+            "57": 7.33273,
+            "58": 6.99461,
+            "59": 7.11599,
+            "60": 7.13222,
+            "61": 7.1056,
+            "62": 7.26513,
+            "63": 7.14772,
+            "64": 7.08696,
+            "65": 6.98643,
+            "66": 7.04728,
+            "67": 7.04697,
+            "68": 7.14062,
+            "69": 7.2435,
+            "70": 7.05957,
+            "71": 6.89356,
+            "72": 6.99769,
+            "73": 6.97897,
+            "74": 6.91983,
+            "75": 7.05297,
+            "76": 6.96036,
+            "77": 7.0791,
+            "78": 7.01392,
+            "79": 6.88358,
+            "80": 6.93014,
+            "81": 6.96553,
+            "82": 7.05265,
+            "83": 6.98788,
+            "84": 7.00427,
+            "85": 6.84577,
+            "86": 7.03621,
+            "87": 6.96327,
+            "88": 6.9137,
+            "89": 6.80631,
+            "90": 7.23619,
+            "91": 6.70015,
+            "92": 7.05679,
+            "93": 6.89287,
+            "94": 7.05835,
+            "95": 6.84786,
+            "96": 6.96771,
+            "97": 6.94258,
+            "98": 6.87388,
+            "99": 7.01816,
+            "100": 6.98466
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 43334.0,
+            "2": 44100.0,
+            "3": 44771.0,
+            "4": 42457.0,
+            "5": 45411.0,
+            "6": 40966.0,
+            "7": 43193.0,
+            "8": 45457.0,
+            "9": 42550.0,
+            "10": 45360.0,
+            "11": 44029.0,
+            "12": 44605.0,
+            "13": 43917.0,
+            "14": 46219.0,
+            "15": 43943.0,
+            "16": 41732.0,
+            "17": 43861.0,
+            "18": 44721.0,
+            "19": 42597.0,
+            "20": 44797.0,
+            "21": 44792.0,
+            "22": 41891.0,
+            "23": 45473.0,
+            "24": 43081.0,
+            "25": 42682.0,
+            "26": 43950.0,
+            "27": 46253.0,
+            "28": 46447.0,
+            "29": 46164.0,
+            "30": 44042.0,
+            "31": 41263.0,
+            "32": 43440.0,
+            "33": 45483.0,
+            "34": 43349.0,
+            "35": 43273.0,
+            "36": 42490.0,
+            "37": 40647.0,
+            "38": 42549.0,
+            "39": 44766.0,
+            "40": 43281.0,
+            "41": 44669.0,
+            "42": 43287.0,
+            "43": 45454.0,
+            "44": 44627.0,
+            "45": 43353.0,
+            "46": 43925.0,
+            "47": 42498.0,
+            "48": 44758.0,
+            "49": 43173.0,
+            "50": 43402.0,
+            "51": 41198.0,
+            "52": 43900.0,
+            "53": 43938.0,
+            "54": 41922.0,
+            "55": 43916.0,
+            "56": 43237.0,
+            "57": 42634.0,
+            "58": 43916.0,
+            "59": 44616.0,
+            "60": 41414.0,
+            "61": 39759.0,
+            "62": 44750.0,
+            "63": 44673.0,
+            "64": 45378.0,
+            "65": 44765.0,
+            "66": 45401.0,
+            "67": 43155.0,
+            "68": 42552.0,
+            "69": 43831.0,
+            "70": 45546.0,
+            "71": 43332.0,
+            "72": 44847.0,
+            "73": 45376.0,
+            "74": 42503.0,
+            "75": 44704.0,
+            "76": 43916.0,
+            "77": 42101.0,
+            "78": 40543.0,
+            "79": 38997.0,
+            "80": 41079.0,
+            "81": 45377.0,
+            "82": 43254.0,
+            "83": 38473.0,
+            "84": 42420.0,
+            "85": 43989.0,
+            "86": 45694.0,
+            "87": 41164.0,
+            "88": 41773.0,
+            "89": 41047.0,
+            "90": 44710.0,
+            "91": 46274.0,
+            "92": 41823.0,
+            "93": 43286.0,
+            "94": 39530.0,
+            "95": 44074.0,
+            "96": 44686.0,
+            "97": 45424.0,
+            "98": 41849.0,
+            "99": 45567.0,
+            "100": 42485.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 4158515200.0,
+            "2": 4158515200.0,
+            "3": 4158515200.0,
+            "4": 4158515200.0,
+            "5": 4158515200.0,
+            "6": 4158515200.0,
+            "7": 4158515200.0,
+            "8": 4158515200.0,
+            "9": 4158515200.0,
+            "10": 4158515200.0,
+            "11": 4158515200.0,
+            "12": 4158515200.0,
+            "13": 4158515200.0,
+            "14": 4158515200.0,
+            "15": 4158515200.0,
+            "16": 4158515200.0,
+            "17": 4158515200.0,
+            "18": 4158515200.0,
+            "19": 4158515200.0,
+            "20": 4158515200.0,
+            "21": 4158515200.0,
+            "22": 4158515200.0,
+            "23": 4158515200.0,
+            "24": 4158515200.0,
+            "25": 4158515200.0,
+            "26": 4158515200.0,
+            "27": 4158515200.0,
+            "28": 4158515200.0,
+            "29": 4158515200.0,
+            "30": 4158515200.0,
+            "31": 4158515200.0,
+            "32": 4158515200.0,
+            "33": 4158515200.0,
+            "34": 4158515200.0,
+            "35": 4158515200.0,
+            "36": 4158515200.0,
+            "37": 4158515200.0,
+            "38": 4158515200.0,
+            "39": 4158515200.0,
+            "40": 4158515200.0,
+            "41": 4158515200.0,
+            "42": 4158515200.0,
+            "43": 4158515200.0,
+            "44": 4158515200.0,
+            "45": 4158515200.0,
+            "46": 4158515200.0,
+            "47": 4158515200.0,
+            "48": 4158515200.0,
+            "49": 4158515200.0,
+            "50": 4158515200.0,
+            "51": 4158515200.0,
+            "52": 4158515200.0,
+            "53": 4158515200.0,
+            "54": 4158515200.0,
+            "55": 4158515200.0,
+            "56": 4158515200.0,
+            "57": 4158515200.0,
+            "58": 4158515200.0,
+            "59": 4158515200.0,
+            "60": 4158515200.0,
+            "61": 4158515200.0,
+            "62": 4158515200.0,
+            "63": 4158515200.0,
+            "64": 4158515200.0,
+            "65": 4158515200.0,
+            "66": 4158515200.0,
+            "67": 4158515200.0,
+            "68": 4158515200.0,
+            "69": 4158515200.0,
+            "70": 4158515200.0,
+            "71": 4158515200.0,
+            "72": 4158515200.0,
+            "73": 4158515200.0,
+            "74": 4158515200.0,
+            "75": 4158515200.0,
+            "76": 4158515200.0,
+            "77": 4158515200.0,
+            "78": 4158515200.0,
+            "79": 4158515200.0,
+            "80": 4158515200.0,
+            "81": 4158515200.0,
+            "82": 4158515200.0,
+            "83": 4158515200.0,
+            "84": 4158515200.0,
+            "85": 4158515200.0,
+            "86": 4158515200.0,
+            "87": 4158515200.0,
+            "88": 4158515200.0,
+            "89": 4158515200.0,
+            "90": 4158515200.0,
+            "91": 4158515200.0,
+            "92": 4158515200.0,
+            "93": 4158515200.0,
+            "94": 4158515200.0,
+            "95": 4158515200.0,
+            "96": 4158515200.0,
+            "97": 4158515200.0,
+            "98": 4158515200.0,
+            "99": 4158515200.0,
+            "100": 4158515200.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 4349380608.0,
+            "2": 6185459712.0,
+            "3": 6187556864.0,
+            "4": 6187556864.0,
+            "5": 6187556864.0,
+            "6": 6187556864.0,
+            "7": 6187556864.0,
+            "8": 6187556864.0,
+            "9": 6187556864.0,
+            "10": 6187556864.0,
+            "11": 6187556864.0,
+            "12": 6187556864.0,
+            "13": 6187556864.0,
+            "14": 6187556864.0,
+            "15": 6187556864.0,
+            "16": 6187556864.0,
+            "17": 6187556864.0,
+            "18": 6187556864.0,
+            "19": 6187556864.0,
+            "20": 6187556864.0,
+            "21": 6187556864.0,
+            "22": 6187556864.0,
+            "23": 6187556864.0,
+            "24": 6187556864.0,
+            "25": 6187556864.0,
+            "26": 6187556864.0,
+            "27": 6187556864.0,
+            "28": 6187556864.0,
+            "29": 6187556864.0,
+            "30": 6187556864.0,
+            "31": 6187556864.0,
+            "32": 6187556864.0,
+            "33": 6187556864.0,
+            "34": 6187556864.0,
+            "35": 6187556864.0,
+            "36": 6187556864.0,
+            "37": 6187556864.0,
+            "38": 6187556864.0,
+            "39": 6187556864.0,
+            "40": 6187556864.0,
+            "41": 6187556864.0,
+            "42": 6187556864.0,
+            "43": 6187556864.0,
+            "44": 6187556864.0,
+            "45": 6187556864.0,
+            "46": 6187556864.0,
+            "47": 6187556864.0,
+            "48": 6187556864.0,
+            "49": 6187556864.0,
+            "50": 6187556864.0,
+            "51": 6187556864.0,
+            "52": 6187556864.0,
+            "53": 6187556864.0,
+            "54": 6187556864.0,
+            "55": 6187556864.0,
+            "56": 6187556864.0,
+            "57": 6187556864.0,
+            "58": 6187556864.0,
+            "59": 6187556864.0,
+            "60": 6187556864.0,
+            "61": 6187556864.0,
+            "62": 6187556864.0,
+            "63": 6187556864.0,
+            "64": 6187556864.0,
+            "65": 6187556864.0,
+            "66": 6187556864.0,
+            "67": 6187556864.0,
+            "68": 6187556864.0,
+            "69": 6187556864.0,
+            "70": 6187556864.0,
+            "71": 6187556864.0,
+            "72": 6187556864.0,
+            "73": 6187556864.0,
+            "74": 6187556864.0,
+            "75": 6187556864.0,
+            "76": 6187556864.0,
+            "77": 6187556864.0,
+            "78": 6187556864.0,
+            "79": 6187556864.0,
+            "80": 6187556864.0,
+            "81": 6187556864.0,
+            "82": 6187556864.0,
+            "83": 6187556864.0,
+            "84": 6187556864.0,
+            "85": 6187556864.0,
+            "86": 6187556864.0,
+            "87": 6187556864.0,
+            "88": 6187556864.0,
+            "89": 6187556864.0,
+            "90": 6187556864.0,
+            "91": 6187556864.0,
+            "92": 6187556864.0,
+            "93": 6187556864.0,
+            "94": 6187556864.0,
+            "95": 6187556864.0,
+            "96": 6187556864.0,
+            "97": 6187556864.0,
+            "98": 6187556864.0,
+            "99": 6187556864.0,
+            "100": 6187556864.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 6.98463,
+            "2": 0.19558,
+            "3": 0.15734,
+            "4": 0.15695,
+            "5": 0.15774,
+            "6": 0.15468,
+            "7": 0.15373,
+            "8": 0.15721,
+            "9": 0.15375,
+            "10": 0.15555,
+            "11": 0.15762,
+            "12": 0.15358,
+            "13": 0.15446,
+            "14": 0.15343,
+            "15": 0.15567,
+            "16": 0.15597,
+            "17": 0.19986,
+            "18": 0.19685,
+            "19": 0.15757,
+            "20": 0.16418,
+            "21": 0.1662,
+            "22": 0.1633,
+            "23": 0.15542,
+            "24": 0.16131,
+            "25": 0.15713,
+            "26": 0.16116,
+            "27": 0.15731,
+            "28": 0.16645,
+            "29": 0.1581,
+            "30": 0.16334,
+            "31": 0.15469,
+            "32": 0.1607,
+            "33": 0.15565,
+            "34": 0.16369,
+            "35": 0.15592,
+            "36": 0.16404,
+            "37": 0.15034,
+            "38": 0.15864,
+            "39": 0.15017,
+            "40": 0.1607,
+            "41": 0.15387,
+            "42": 0.17077,
+            "43": 0.15397,
+            "44": 0.1563,
+            "45": 0.15512,
+            "46": 0.16115,
+            "47": 0.15635,
+            "48": 0.16292,
+            "49": 0.15581,
+            "50": 0.16402,
+            "51": 0.15457,
+            "52": 0.16232,
+            "53": 0.156,
+            "54": 0.16433,
+            "55": 0.15283,
+            "56": 0.19434,
+            "57": 0.19273,
+            "58": 0.15955,
+            "59": 0.15405,
+            "60": 0.15503,
+            "61": 0.15418,
+            "62": 0.15446,
+            "63": 0.15778,
+            "64": 0.1578,
+            "65": 0.16024,
+            "66": 0.15656,
+            "67": 0.15524,
+            "68": 0.15394,
+            "69": 0.16041,
+            "70": 0.16082,
+            "71": 0.16503,
+            "72": 0.16142,
+            "73": 0.16242,
+            "74": 0.15995,
+            "75": 0.15816,
+            "76": 0.16199,
+            "77": 0.16827,
+            "78": 0.15987,
+            "79": 0.15797,
+            "80": 0.15617,
+            "81": 0.15308,
+            "82": 0.15484,
+            "83": 0.15382,
+            "84": 0.16856,
+            "85": 0.15976,
+            "86": 0.15794,
+            "87": 0.15409,
+            "88": 0.15333,
+            "89": 0.15511,
+            "90": 0.15333,
+            "91": 0.17162,
+            "92": 0.15418,
+            "93": 0.15421,
+            "94": 0.15169,
+            "95": 0.15479,
+            "96": 0.15268,
+            "97": 0.1552,
+            "98": 0.1575,
+            "99": 0.15403,
+            "100": 0.15379
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp2_pp1_vp1/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp2_pp1_vp1/golden_values_dev_dgx_h100.json
index 3ab4415923d..b7f4830a0c8 100644
--- a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp2_pp1_vp1/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp2_pp1_vp1/golden_values_dev_dgx_h100.json
@@ -2,141 +2,536 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 10.372,
+            "2": 10.37006,
+            "3": 9.85232,
+            "4": 9.61996,
             "5": 9.40868,
+            "6": 9.43215,
+            "7": 9.31482,
+            "8": 9.27336,
+            "9": 9.1139,
             "10": 9.03962,
+            "11": 8.87218,
+            "12": 8.80873,
+            "13": 8.83468,
+            "14": 8.69011,
             "15": 8.66228,
+            "16": 8.54828,
+            "17": 8.50093,
+            "18": 8.42525,
+            "19": 8.3881,
             "20": 8.2807,
+            "21": 8.26609,
+            "22": 8.16003,
+            "23": 8.1124,
+            "24": 8.14262,
             "25": 7.98432,
+            "26": 8.10592,
+            "27": 7.88963,
+            "28": 7.97037,
+            "29": 7.81276,
             "30": 7.87638,
+            "31": 7.82516,
+            "32": 7.70248,
+            "33": 7.80198,
+            "34": 7.56872,
             "35": 7.67379,
+            "36": 7.54691,
+            "37": 7.47408,
+            "38": 7.50739,
+            "39": 7.49773,
             "40": 7.51091,
+            "41": 7.41065,
+            "42": 7.37995,
+            "43": 7.44078,
+            "44": 7.39393,
             "45": 7.37239,
+            "46": 7.28427,
+            "47": 7.46631,
+            "48": 7.2905,
+            "49": 7.35025,
             "50": 7.17204,
+            "51": 7.37012,
+            "52": 7.14467,
+            "53": 7.12652,
+            "54": 7.23751,
             "55": 7.15586,
+            "56": 7.23154,
+            "57": 7.33541,
+            "58": 7.01363,
+            "59": 7.11431,
             "60": 7.15121,
+            "61": 7.10904,
+            "62": 7.26834,
+            "63": 7.15176,
+            "64": 7.08415,
             "65": 6.99114,
+            "66": 7.05301,
+            "67": 7.04354,
+            "68": 7.1398,
+            "69": 7.03224,
             "70": 7.05832,
+            "71": 6.90372,
+            "72": 6.99794,
+            "73": 6.9769,
+            "74": 6.91759,
             "75": 7.06626,
+            "76": 6.95758,
+            "77": 7.0871,
+            "78": 7.03238,
+            "79": 6.85274,
             "80": 6.93633,
+            "81": 6.97617,
+            "82": 7.06196,
+            "83": 6.98213,
+            "84": 7.00931,
             "85": 6.85082,
+            "86": 7.04673,
+            "87": 6.97907,
+            "88": 6.91096,
+            "89": 6.81719,
             "90": 7.2459,
+            "91": 6.7046,
+            "92": 7.05377,
+            "93": 6.89397,
+            "94": 7.0542,
             "95": 6.85031,
+            "96": 6.96441,
+            "97": 6.95632,
+            "98": 6.88246,
+            "99": 7.00392,
             "100": 6.98993
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 43299.0,
+            "2": 44047.0,
+            "3": 44744.0,
+            "4": 42405.0,
             "5": 45385.0,
+            "6": 40946.0,
+            "7": 43183.0,
+            "8": 45446.0,
+            "9": 42445.0,
             "10": 45361.0,
+            "11": 43966.0,
+            "12": 44593.0,
+            "13": 43907.0,
+            "14": 46210.0,
             "15": 43904.0,
+            "16": 41614.0,
+            "17": 43840.0,
+            "18": 44687.0,
+            "19": 42536.0,
             "20": 44746.0,
+            "21": 44767.0,
+            "22": 41831.0,
+            "23": 45449.0,
+            "24": 43072.0,
             "25": 42457.0,
+            "26": 43921.0,
+            "27": 46208.0,
+            "28": 46361.0,
+            "29": 46146.0,
             "30": 43976.0,
+            "31": 41272.0,
+            "32": 43348.0,
+            "33": 45431.0,
+            "34": 43295.0,
             "35": 43264.0,
+            "36": 42493.0,
+            "37": 40075.0,
+            "38": 42518.0,
+            "39": 44713.0,
             "40": 43230.0,
+            "41": 44666.0,
+            "42": 43251.0,
+            "43": 45471.0,
+            "44": 44600.0,
             "45": 43330.0,
+            "46": 43932.0,
+            "47": 42400.0,
+            "48": 44673.0,
+            "49": 43149.0,
             "50": 43373.0,
+            "51": 41142.0,
+            "52": 43824.0,
+            "53": 43917.0,
+            "54": 42023.0,
             "55": 43883.0,
+            "56": 43235.0,
+            "57": 42536.0,
+            "58": 43829.0,
+            "59": 44648.0,
             "60": 41187.0,
+            "61": 39720.0,
+            "62": 44740.0,
+            "63": 44690.0,
+            "64": 45358.0,
             "65": 44695.0,
+            "66": 45364.0,
+            "67": 43138.0,
+            "68": 42538.0,
+            "69": 43820.0,
             "70": 45549.0,
+            "71": 43324.0,
+            "72": 44760.0,
+            "73": 45363.0,
+            "74": 42473.0,
             "75": 44666.0,
+            "76": 43903.0,
+            "77": 42082.0,
+            "78": 40295.0,
+            "79": 38890.0,
             "80": 41131.0,
+            "81": 45363.0,
+            "82": 43206.0,
+            "83": 38487.0,
+            "84": 42462.0,
             "85": 43985.0,
+            "86": 45695.0,
+            "87": 40826.0,
+            "88": 41822.0,
+            "89": 41069.0,
             "90": 44664.0,
+            "91": 46170.0,
+            "92": 41797.0,
+            "93": 43208.0,
+            "94": 39552.0,
             "95": 44106.0,
+            "96": 44697.0,
+            "97": 45398.0,
+            "98": 41792.0,
+            "99": 45429.0,
             "100": 42437.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 2171550208.0,
+            "2": 2171550208.0,
+            "3": 2171550208.0,
+            "4": 2171550208.0,
             "5": 2171550208.0,
+            "6": 2171550208.0,
+            "7": 2171550208.0,
+            "8": 2171550208.0,
+            "9": 2171550208.0,
             "10": 2171550208.0,
+            "11": 2171550208.0,
+            "12": 2171550208.0,
+            "13": 2171550208.0,
+            "14": 2171550208.0,
             "15": 2171550208.0,
+            "16": 2171550208.0,
+            "17": 2171550208.0,
+            "18": 2171550208.0,
+            "19": 2171550208.0,
             "20": 2171550208.0,
+            "21": 2171550208.0,
+            "22": 2171550208.0,
+            "23": 2171550208.0,
+            "24": 2171550208.0,
             "25": 2171550208.0,
+            "26": 2171550208.0,
+            "27": 2171550208.0,
+            "28": 2171550208.0,
+            "29": 2171550208.0,
             "30": 2171550208.0,
+            "31": 2171550208.0,
+            "32": 2171550208.0,
+            "33": 2171550208.0,
+            "34": 2171550208.0,
             "35": 2171550208.0,
+            "36": 2171550208.0,
+            "37": 2171550208.0,
+            "38": 2171550208.0,
+            "39": 2171550208.0,
             "40": 2171550208.0,
+            "41": 2171550208.0,
+            "42": 2171550208.0,
+            "43": 2171550208.0,
+            "44": 2171550208.0,
             "45": 2171550208.0,
+            "46": 2171550208.0,
+            "47": 2171550208.0,
+            "48": 2171550208.0,
+            "49": 2171550208.0,
             "50": 2171550208.0,
+            "51": 2171550208.0,
+            "52": 2171550208.0,
+            "53": 2171550208.0,
+            "54": 2171550208.0,
             "55": 2171550208.0,
+            "56": 2171550208.0,
+            "57": 2171550208.0,
+            "58": 2171550208.0,
+            "59": 2171550208.0,
             "60": 2171550208.0,
+            "61": 2171550208.0,
+            "62": 2171550208.0,
+            "63": 2171550208.0,
+            "64": 2171550208.0,
             "65": 2171550208.0,
+            "66": 2171550208.0,
+            "67": 2171550208.0,
+            "68": 2171550208.0,
+            "69": 2171550208.0,
             "70": 2171550208.0,
+            "71": 2171550208.0,
+            "72": 2171550208.0,
+            "73": 2171550208.0,
+            "74": 2171550208.0,
             "75": 2171550208.0,
+            "76": 2171550208.0,
+            "77": 2171550208.0,
+            "78": 2171550208.0,
+            "79": 2171550208.0,
             "80": 2171550208.0,
+            "81": 2171550208.0,
+            "82": 2171550208.0,
+            "83": 2171550208.0,
+            "84": 2171550208.0,
             "85": 2171550208.0,
+            "86": 2171550208.0,
+            "87": 2171550208.0,
+            "88": 2171550208.0,
+            "89": 2171550208.0,
             "90": 2171550208.0,
+            "91": 2171550208.0,
+            "92": 2171550208.0,
+            "93": 2171550208.0,
+            "94": 2171550208.0,
             "95": 2171550208.0,
+            "96": 2171550208.0,
+            "97": 2171550208.0,
+            "98": 2171550208.0,
+            "99": 2171550208.0,
             "100": 2171550208.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 2407642624.0,
+            "2": 3336458752.0,
+            "3": 3336458752.0,
+            "4": 3336458752.0,
             "5": 3336458752.0,
+            "6": 3336458752.0,
+            "7": 3336458752.0,
+            "8": 3336458752.0,
+            "9": 3336458752.0,
             "10": 3336458752.0,
+            "11": 3336458752.0,
+            "12": 3336458752.0,
+            "13": 3336458752.0,
+            "14": 3336458752.0,
             "15": 3336458752.0,
+            "16": 3336458752.0,
+            "17": 3336458752.0,
+            "18": 3336458752.0,
+            "19": 3336458752.0,
             "20": 3336458752.0,
+            "21": 3336458752.0,
+            "22": 3336458752.0,
+            "23": 3336458752.0,
+            "24": 3336458752.0,
             "25": 3336458752.0,
+            "26": 3336458752.0,
+            "27": 3336458752.0,
+            "28": 3336458752.0,
+            "29": 3336458752.0,
             "30": 3336458752.0,
+            "31": 3336458752.0,
+            "32": 3336458752.0,
+            "33": 3336458752.0,
+            "34": 3336458752.0,
             "35": 3336458752.0,
+            "36": 3336458752.0,
+            "37": 3336458752.0,
+            "38": 3336458752.0,
+            "39": 3336458752.0,
             "40": 3336458752.0,
+            "41": 3336458752.0,
+            "42": 3336458752.0,
+            "43": 3336458752.0,
+            "44": 3336458752.0,
             "45": 3336458752.0,
+            "46": 3336458752.0,
+            "47": 3336458752.0,
+            "48": 3336458752.0,
+            "49": 3336458752.0,
             "50": 3336458752.0,
+            "51": 3336458752.0,
+            "52": 3336458752.0,
+            "53": 3336458752.0,
+            "54": 3336458752.0,
             "55": 3336458752.0,
+            "56": 3336458752.0,
+            "57": 3336458752.0,
+            "58": 3336458752.0,
+            "59": 3336458752.0,
             "60": 3336458752.0,
+            "61": 3336458752.0,
+            "62": 3336458752.0,
+            "63": 3336458752.0,
+            "64": 3336458752.0,
             "65": 3336458752.0,
+            "66": 3336458752.0,
+            "67": 3336458752.0,
+            "68": 3336458752.0,
+            "69": 3336458752.0,
             "70": 3336458752.0,
+            "71": 3336458752.0,
+            "72": 3336458752.0,
+            "73": 3336458752.0,
+            "74": 3336458752.0,
             "75": 3336458752.0,
+            "76": 3336458752.0,
+            "77": 3336458752.0,
+            "78": 3336458752.0,
+            "79": 3336458752.0,
             "80": 3336458752.0,
+            "81": 3336458752.0,
+            "82": 3336458752.0,
+            "83": 3336458752.0,
+            "84": 3336458752.0,
             "85": 3336458752.0,
+            "86": 3336458752.0,
+            "87": 3336458752.0,
+            "88": 3336458752.0,
+            "89": 3336458752.0,
             "90": 3336458752.0,
+            "91": 3336458752.0,
+            "92": 3336458752.0,
+            "93": 3336458752.0,
+            "94": 3336458752.0,
             "95": 3336458752.0,
+            "96": 3336458752.0,
+            "97": 3336458752.0,
+            "98": 3336458752.0,
+            "99": 3336458752.0,
             "100": 3336458752.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 9.63895,
-            "5": 0.26386,
-            "10": 0.26904,
-            "15": 0.26572,
-            "20": 0.2594,
-            "25": 0.25916,
-            "30": 0.25941,
-            "35": 0.34452,
-            "40": 0.26089,
-            "45": 0.26208,
-            "50": 0.25808,
-            "55": 0.26854,
-            "60": 0.25663,
-            "65": 0.25854,
-            "70": 0.25853,
-            "75": 0.25618,
-            "80": 0.25673,
-            "85": 0.25977,
-            "90": 0.25957,
-            "95": 0.26011,
-            "100": 0.25873
+            "1": 9.03109,
+            "2": 0.35076,
+            "3": 0.33208,
+            "4": 0.30024,
+            "5": 0.29051,
+            "6": 0.29151,
+            "7": 0.2915,
+            "8": 0.29069,
+            "9": 0.28128,
+            "10": 0.28633,
+            "11": 0.28968,
+            "12": 0.29187,
+            "13": 0.28737,
+            "14": 0.28701,
+            "15": 0.29554,
+            "16": 0.28451,
+            "17": 0.28904,
+            "18": 0.28765,
+            "19": 0.2927,
+            "20": 0.29433,
+            "21": 0.28956,
+            "22": 0.28517,
+            "23": 0.29568,
+            "24": 0.29372,
+            "25": 0.28702,
+            "26": 0.27993,
+            "27": 0.28025,
+            "28": 0.28025,
+            "29": 0.28655,
+            "30": 0.28192,
+            "31": 0.28723,
+            "32": 0.29054,
+            "33": 0.29967,
+            "34": 0.28855,
+            "35": 0.31974,
+            "36": 0.32479,
+            "37": 0.28367,
+            "38": 0.29414,
+            "39": 0.30161,
+            "40": 0.29066,
+            "41": 0.2857,
+            "42": 0.29152,
+            "43": 0.28567,
+            "44": 0.28393,
+            "45": 0.29254,
+            "46": 0.28887,
+            "47": 0.29566,
+            "48": 0.2879,
+            "49": 0.28337,
+            "50": 0.28858,
+            "51": 0.28557,
+            "52": 0.28641,
+            "53": 0.28977,
+            "54": 0.28532,
+            "55": 0.28322,
+            "56": 0.2855,
+            "57": 0.29617,
+            "58": 0.28816,
+            "59": 0.28781,
+            "60": 0.28732,
+            "61": 0.28426,
+            "62": 0.29092,
+            "63": 0.29263,
+            "64": 0.28875,
+            "65": 0.28714,
+            "66": 0.29018,
+            "67": 0.28162,
+            "68": 0.28703,
+            "69": 0.29503,
+            "70": 0.29276,
+            "71": 0.2824,
+            "72": 0.29151,
+            "73": 0.29279,
+            "74": 0.28282,
+            "75": 0.28454,
+            "76": 0.28479,
+            "77": 0.28239,
+            "78": 0.28785,
+            "79": 0.29392,
+            "80": 0.28563,
+            "81": 0.282,
+            "82": 0.29276,
+            "83": 0.29502,
+            "84": 0.28441,
+            "85": 0.28063,
+            "86": 0.29172,
+            "87": 0.2867,
+            "88": 0.29629,
+            "89": 0.29585,
+            "90": 0.29326,
+            "91": 0.28326,
+            "92": 0.28263,
+            "93": 0.2913,
+            "94": 0.2943,
+            "95": 0.28216,
+            "96": 0.29001,
+            "97": 0.29031,
+            "98": 0.28912,
+            "99": 0.68367,
+            "100": 0.296
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp2_pp1_vp1/golden_values_dev_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp2_pp1_vp1/golden_values_dev_dgxa100_dracooci-ord.json
new file mode 100644
index 00000000000..a5713a081ad
--- /dev/null
+++ b/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp2_pp1_vp1/golden_values_dev_dgxa100_dracooci-ord.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.38854,
+            "2": 10.3937,
+            "3": 9.78105,
+            "4": 9.59731,
+            "5": 9.38095,
+            "6": 9.4057,
+            "7": 9.30785,
+            "8": 9.24107,
+            "9": 9.12192,
+            "10": 9.05714,
+            "11": 8.87325,
+            "12": 8.79368,
+            "13": 8.84026,
+            "14": 8.68518,
+            "15": 8.65603,
+            "16": 8.54372,
+            "17": 8.50113,
+            "18": 8.39001,
+            "19": 8.36443,
+            "20": 8.26193,
+            "21": 8.27097,
+            "22": 8.14406,
+            "23": 8.07467,
+            "24": 8.11915,
+            "25": 7.98192,
+            "26": 8.08777,
+            "27": 7.87148,
+            "28": 7.96511,
+            "29": 7.80258,
+            "30": 7.86937,
+            "31": 7.81742,
+            "32": 7.68788,
+            "33": 7.7805,
+            "34": 7.55497,
+            "35": 7.66279,
+            "36": 7.52257,
+            "37": 7.44455,
+            "38": 7.5026,
+            "39": 7.4504,
+            "40": 7.50083,
+            "41": 7.39053,
+            "42": 7.36073,
+            "43": 7.4333,
+            "44": 7.37641,
+            "45": 7.34894,
+            "46": 7.28171,
+            "47": 7.46122,
+            "48": 7.2877,
+            "49": 7.35375,
+            "50": 7.18147,
+            "51": 7.36608,
+            "52": 7.13343,
+            "53": 7.11575,
+            "54": 7.22932,
+            "55": 7.1542,
+            "56": 7.22261,
+            "57": 7.32969,
+            "58": 7.02356,
+            "59": 7.11377,
+            "60": 7.14734,
+            "61": 7.11404,
+            "62": 7.24755,
+            "63": 7.1568,
+            "64": 7.08414,
+            "65": 6.9972,
+            "66": 7.06074,
+            "67": 7.04881,
+            "68": 7.14167,
+            "69": 7.03482,
+            "70": 7.06009,
+            "71": 6.92578,
+            "72": 7.0043,
+            "73": 6.97965,
+            "74": 6.92276,
+            "75": 7.06086,
+            "76": 6.97271,
+            "77": 7.08186,
+            "78": 7.01883,
+            "79": 6.85524,
+            "80": 6.94306,
+            "81": 6.97637,
+            "82": 7.06676,
+            "83": 6.99984,
+            "84": 7.0089,
+            "85": 6.85989,
+            "86": 7.03607,
+            "87": 6.98072,
+            "88": 6.91508,
+            "89": 6.81068,
+            "90": 7.24967,
+            "91": 6.71006,
+            "92": 7.04916,
+            "93": 6.9057,
+            "94": 7.06458,
+            "95": 6.84836,
+            "96": 6.97667,
+            "97": 6.96312,
+            "98": 6.88704,
+            "99": 7.013,
+            "100": 6.98289
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 43331.0,
+            "2": 44051.0,
+            "3": 44760.0,
+            "4": 42395.0,
+            "5": 45376.0,
+            "6": 40957.0,
+            "7": 43160.0,
+            "8": 45463.0,
+            "9": 42446.0,
+            "10": 45361.0,
+            "11": 43965.0,
+            "12": 44605.0,
+            "13": 43884.0,
+            "14": 46187.0,
+            "15": 43888.0,
+            "16": 41604.0,
+            "17": 43828.0,
+            "18": 44690.0,
+            "19": 42562.0,
+            "20": 44777.0,
+            "21": 44792.0,
+            "22": 41854.0,
+            "23": 45465.0,
+            "24": 43071.0,
+            "25": 42465.0,
+            "26": 43917.0,
+            "27": 46228.0,
+            "28": 46431.0,
+            "29": 46169.0,
+            "30": 43995.0,
+            "31": 41278.0,
+            "32": 43346.0,
+            "33": 45463.0,
+            "34": 43298.0,
+            "35": 43276.0,
+            "36": 42490.0,
+            "37": 40069.0,
+            "38": 42527.0,
+            "39": 44730.0,
+            "40": 43245.0,
+            "41": 44653.0,
+            "42": 43269.0,
+            "43": 45462.0,
+            "44": 44594.0,
+            "45": 43285.0,
+            "46": 43915.0,
+            "47": 42370.0,
+            "48": 44704.0,
+            "49": 43164.0,
+            "50": 43365.0,
+            "51": 41167.0,
+            "52": 43825.0,
+            "53": 43945.0,
+            "54": 41947.0,
+            "55": 43853.0,
+            "56": 43268.0,
+            "57": 42591.0,
+            "58": 43843.0,
+            "59": 44625.0,
+            "60": 41218.0,
+            "61": 39714.0,
+            "62": 44779.0,
+            "63": 44716.0,
+            "64": 45359.0,
+            "65": 44684.0,
+            "66": 45355.0,
+            "67": 43146.0,
+            "68": 42519.0,
+            "69": 43835.0,
+            "70": 45522.0,
+            "71": 43316.0,
+            "72": 44767.0,
+            "73": 45365.0,
+            "74": 42449.0,
+            "75": 44695.0,
+            "76": 43885.0,
+            "77": 42092.0,
+            "78": 40278.0,
+            "79": 38915.0,
+            "80": 41096.0,
+            "81": 45372.0,
+            "82": 43206.0,
+            "83": 38481.0,
+            "84": 42474.0,
+            "85": 43990.0,
+            "86": 45729.0,
+            "87": 40884.0,
+            "88": 41772.0,
+            "89": 41076.0,
+            "90": 44676.0,
+            "91": 46159.0,
+            "92": 41790.0,
+            "93": 43242.0,
+            "94": 39566.0,
+            "95": 44077.0,
+            "96": 44741.0,
+            "97": 45379.0,
+            "98": 41802.0,
+            "99": 45441.0,
+            "100": 42530.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2171550208.0,
+            "2": 2171550208.0,
+            "3": 2171550208.0,
+            "4": 2171550208.0,
+            "5": 2171550208.0,
+            "6": 2171550208.0,
+            "7": 2171550208.0,
+            "8": 2171550208.0,
+            "9": 2171550208.0,
+            "10": 2171550208.0,
+            "11": 2171550208.0,
+            "12": 2171550208.0,
+            "13": 2171550208.0,
+            "14": 2171550208.0,
+            "15": 2171550208.0,
+            "16": 2171550208.0,
+            "17": 2171550208.0,
+            "18": 2171550208.0,
+            "19": 2171550208.0,
+            "20": 2171550208.0,
+            "21": 2171550208.0,
+            "22": 2171550208.0,
+            "23": 2171550208.0,
+            "24": 2171550208.0,
+            "25": 2171550208.0,
+            "26": 2171550208.0,
+            "27": 2171550208.0,
+            "28": 2171550208.0,
+            "29": 2171550208.0,
+            "30": 2171550208.0,
+            "31": 2171550208.0,
+            "32": 2171550208.0,
+            "33": 2171550208.0,
+            "34": 2171550208.0,
+            "35": 2171550208.0,
+            "36": 2171550208.0,
+            "37": 2171550208.0,
+            "38": 2171550208.0,
+            "39": 2171550208.0,
+            "40": 2171550208.0,
+            "41": 2171550208.0,
+            "42": 2171550208.0,
+            "43": 2171550208.0,
+            "44": 2171550208.0,
+            "45": 2171550208.0,
+            "46": 2171550208.0,
+            "47": 2171550208.0,
+            "48": 2171550208.0,
+            "49": 2171550208.0,
+            "50": 2171550208.0,
+            "51": 2171550208.0,
+            "52": 2171550208.0,
+            "53": 2171550208.0,
+            "54": 2171550208.0,
+            "55": 2171550208.0,
+            "56": 2171550208.0,
+            "57": 2171550208.0,
+            "58": 2171550208.0,
+            "59": 2171550208.0,
+            "60": 2171550208.0,
+            "61": 2171550208.0,
+            "62": 2171550208.0,
+            "63": 2171550208.0,
+            "64": 2171550208.0,
+            "65": 2171550208.0,
+            "66": 2171550208.0,
+            "67": 2171550208.0,
+            "68": 2171550208.0,
+            "69": 2171550208.0,
+            "70": 2171550208.0,
+            "71": 2171550208.0,
+            "72": 2171550208.0,
+            "73": 2171550208.0,
+            "74": 2171550208.0,
+            "75": 2171550208.0,
+            "76": 2171550208.0,
+            "77": 2171550208.0,
+            "78": 2171550208.0,
+            "79": 2171550208.0,
+            "80": 2171550208.0,
+            "81": 2171550208.0,
+            "82": 2171550208.0,
+            "83": 2171550208.0,
+            "84": 2171550208.0,
+            "85": 2171550208.0,
+            "86": 2171550208.0,
+            "87": 2171550208.0,
+            "88": 2171550208.0,
+            "89": 2171550208.0,
+            "90": 2171550208.0,
+            "91": 2171550208.0,
+            "92": 2171550208.0,
+            "93": 2171550208.0,
+            "94": 2171550208.0,
+            "95": 2171550208.0,
+            "96": 2171550208.0,
+            "97": 2171550208.0,
+            "98": 2171550208.0,
+            "99": 2171550208.0,
+            "100": 2171550208.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2407642624.0,
+            "2": 3336458752.0,
+            "3": 3336458752.0,
+            "4": 3336458752.0,
+            "5": 3336458752.0,
+            "6": 3336458752.0,
+            "7": 3336458752.0,
+            "8": 3336458752.0,
+            "9": 3336458752.0,
+            "10": 3336458752.0,
+            "11": 3336458752.0,
+            "12": 3336458752.0,
+            "13": 3336458752.0,
+            "14": 3336458752.0,
+            "15": 3336458752.0,
+            "16": 3336458752.0,
+            "17": 3336458752.0,
+            "18": 3336458752.0,
+            "19": 3336458752.0,
+            "20": 3336458752.0,
+            "21": 3336458752.0,
+            "22": 3336458752.0,
+            "23": 3336458752.0,
+            "24": 3336458752.0,
+            "25": 3336458752.0,
+            "26": 3336458752.0,
+            "27": 3336458752.0,
+            "28": 3336458752.0,
+            "29": 3336458752.0,
+            "30": 3336458752.0,
+            "31": 3336458752.0,
+            "32": 3336458752.0,
+            "33": 3336458752.0,
+            "34": 3336458752.0,
+            "35": 3336458752.0,
+            "36": 3336458752.0,
+            "37": 3336458752.0,
+            "38": 3336458752.0,
+            "39": 3336458752.0,
+            "40": 3336458752.0,
+            "41": 3336458752.0,
+            "42": 3336458752.0,
+            "43": 3336458752.0,
+            "44": 3336458752.0,
+            "45": 3336458752.0,
+            "46": 3336458752.0,
+            "47": 3336458752.0,
+            "48": 3336458752.0,
+            "49": 3336458752.0,
+            "50": 3336458752.0,
+            "51": 3336458752.0,
+            "52": 3336458752.0,
+            "53": 3336458752.0,
+            "54": 3336458752.0,
+            "55": 3336458752.0,
+            "56": 3336458752.0,
+            "57": 3336458752.0,
+            "58": 3336458752.0,
+            "59": 3336458752.0,
+            "60": 3336458752.0,
+            "61": 3336458752.0,
+            "62": 3336458752.0,
+            "63": 3336458752.0,
+            "64": 3336458752.0,
+            "65": 3336458752.0,
+            "66": 3336458752.0,
+            "67": 3336458752.0,
+            "68": 3336458752.0,
+            "69": 3336458752.0,
+            "70": 3336458752.0,
+            "71": 3336458752.0,
+            "72": 3336458752.0,
+            "73": 3336458752.0,
+            "74": 3336458752.0,
+            "75": 3336458752.0,
+            "76": 3336458752.0,
+            "77": 3336458752.0,
+            "78": 3336458752.0,
+            "79": 3336458752.0,
+            "80": 3336458752.0,
+            "81": 3336458752.0,
+            "82": 3336458752.0,
+            "83": 3336458752.0,
+            "84": 3336458752.0,
+            "85": 3336458752.0,
+            "86": 3336458752.0,
+            "87": 3336458752.0,
+            "88": 3336458752.0,
+            "89": 3336458752.0,
+            "90": 3336458752.0,
+            "91": 3336458752.0,
+            "92": 3336458752.0,
+            "93": 3336458752.0,
+            "94": 3336458752.0,
+            "95": 3336458752.0,
+            "96": 3336458752.0,
+            "97": 3336458752.0,
+            "98": 3336458752.0,
+            "99": 3336458752.0,
+            "100": 3336458752.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 9.42312,
+            "2": 0.62411,
+            "3": 0.40707,
+            "4": 0.42011,
+            "5": 0.41971,
+            "6": 0.41837,
+            "7": 0.42045,
+            "8": 0.41593,
+            "9": 0.41528,
+            "10": 0.41547,
+            "11": 0.41748,
+            "12": 0.41599,
+            "13": 0.41809,
+            "14": 0.41896,
+            "15": 0.41063,
+            "16": 0.41325,
+            "17": 0.41257,
+            "18": 0.41693,
+            "19": 0.40667,
+            "20": 0.40481,
+            "21": 0.40784,
+            "22": 0.40485,
+            "23": 0.40809,
+            "24": 0.41044,
+            "25": 0.40445,
+            "26": 0.40696,
+            "27": 0.40798,
+            "28": 0.40651,
+            "29": 0.40546,
+            "30": 0.40687,
+            "31": 0.4062,
+            "32": 0.40345,
+            "33": 0.40106,
+            "34": 0.40598,
+            "35": 0.4189,
+            "36": 0.40223,
+            "37": 0.39806,
+            "38": 0.39879,
+            "39": 0.40009,
+            "40": 0.39858,
+            "41": 0.39851,
+            "42": 0.39932,
+            "43": 0.39763,
+            "44": 0.39856,
+            "45": 0.39923,
+            "46": 0.39891,
+            "47": 0.39808,
+            "48": 0.39851,
+            "49": 0.39952,
+            "50": 0.39952,
+            "51": 0.39938,
+            "52": 0.39883,
+            "53": 0.39509,
+            "54": 0.39364,
+            "55": 0.39489,
+            "56": 0.39363,
+            "57": 0.39345,
+            "58": 0.39394,
+            "59": 0.39402,
+            "60": 0.39395,
+            "61": 0.39343,
+            "62": 0.39309,
+            "63": 0.39586,
+            "64": 0.39408,
+            "65": 0.40348,
+            "66": 0.39311,
+            "67": 0.39329,
+            "68": 0.39593,
+            "69": 0.39468,
+            "70": 0.39577,
+            "71": 0.39317,
+            "72": 0.39338,
+            "73": 0.39355,
+            "74": 0.39362,
+            "75": 0.39435,
+            "76": 0.39315,
+            "77": 0.39232,
+            "78": 0.39379,
+            "79": 0.39337,
+            "80": 0.39379,
+            "81": 0.3971,
+            "82": 0.39385,
+            "83": 0.39875,
+            "84": 0.39836,
+            "85": 0.39368,
+            "86": 0.39332,
+            "87": 0.3934,
+            "88": 0.40166,
+            "89": 0.3951,
+            "90": 0.39501,
+            "91": 0.39618,
+            "92": 0.39935,
+            "93": 0.39375,
+            "94": 0.39481,
+            "95": 0.39382,
+            "96": 0.3928,
+            "97": 0.39282,
+            "98": 0.39402,
+            "99": 0.39342,
+            "100": 0.39435
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp2_pp1_vp1/golden_values_dev_dgxa100_dracooci.json b/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp2_pp1_vp1/golden_values_dev_dgxa100_dracooci.json
new file mode 100644
index 00000000000..87a5820cc8c
--- /dev/null
+++ b/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp2_pp1_vp1/golden_values_dev_dgxa100_dracooci.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.38854,
+            "2": 10.3937,
+            "3": 9.78105,
+            "4": 9.59731,
+            "5": 9.38095,
+            "6": 9.4057,
+            "7": 9.30785,
+            "8": 9.24107,
+            "9": 9.12192,
+            "10": 9.05714,
+            "11": 8.87325,
+            "12": 8.79368,
+            "13": 8.84026,
+            "14": 8.68518,
+            "15": 8.65603,
+            "16": 8.54372,
+            "17": 8.50113,
+            "18": 8.39001,
+            "19": 8.36443,
+            "20": 8.26193,
+            "21": 8.27097,
+            "22": 8.14406,
+            "23": 8.07467,
+            "24": 8.11915,
+            "25": 7.98192,
+            "26": 8.08777,
+            "27": 7.87148,
+            "28": 7.96511,
+            "29": 7.80258,
+            "30": 7.86937,
+            "31": 7.81742,
+            "32": 7.68788,
+            "33": 7.7805,
+            "34": 7.55497,
+            "35": 7.66279,
+            "36": 7.52257,
+            "37": 7.44455,
+            "38": 7.5026,
+            "39": 7.4504,
+            "40": 7.50083,
+            "41": 7.39053,
+            "42": 7.36073,
+            "43": 7.4333,
+            "44": 7.37641,
+            "45": 7.34894,
+            "46": 7.28171,
+            "47": 7.46122,
+            "48": 7.2877,
+            "49": 7.35375,
+            "50": 7.18147,
+            "51": 7.36608,
+            "52": 7.13343,
+            "53": 7.11575,
+            "54": 7.22932,
+            "55": 7.1542,
+            "56": 7.22261,
+            "57": 7.32969,
+            "58": 7.02356,
+            "59": 7.11377,
+            "60": 7.14734,
+            "61": 7.11404,
+            "62": 7.24755,
+            "63": 7.1568,
+            "64": 7.08414,
+            "65": 6.9972,
+            "66": 7.06074,
+            "67": 7.04881,
+            "68": 7.14167,
+            "69": 7.03482,
+            "70": 7.06009,
+            "71": 6.92578,
+            "72": 7.0043,
+            "73": 6.97965,
+            "74": 6.92276,
+            "75": 7.06086,
+            "76": 6.97271,
+            "77": 7.08186,
+            "78": 7.01883,
+            "79": 6.85524,
+            "80": 6.94306,
+            "81": 6.97637,
+            "82": 7.06676,
+            "83": 6.99984,
+            "84": 7.0089,
+            "85": 6.85989,
+            "86": 7.03607,
+            "87": 6.98072,
+            "88": 6.91508,
+            "89": 6.81068,
+            "90": 7.24967,
+            "91": 6.71006,
+            "92": 7.04916,
+            "93": 6.9057,
+            "94": 7.06458,
+            "95": 6.84836,
+            "96": 6.97667,
+            "97": 6.96312,
+            "98": 6.88704,
+            "99": 7.013,
+            "100": 6.98289
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 43331.0,
+            "2": 44051.0,
+            "3": 44760.0,
+            "4": 42395.0,
+            "5": 45376.0,
+            "6": 40957.0,
+            "7": 43160.0,
+            "8": 45463.0,
+            "9": 42446.0,
+            "10": 45361.0,
+            "11": 43965.0,
+            "12": 44605.0,
+            "13": 43884.0,
+            "14": 46187.0,
+            "15": 43888.0,
+            "16": 41604.0,
+            "17": 43828.0,
+            "18": 44690.0,
+            "19": 42562.0,
+            "20": 44777.0,
+            "21": 44792.0,
+            "22": 41854.0,
+            "23": 45465.0,
+            "24": 43071.0,
+            "25": 42465.0,
+            "26": 43917.0,
+            "27": 46228.0,
+            "28": 46431.0,
+            "29": 46169.0,
+            "30": 43995.0,
+            "31": 41278.0,
+            "32": 43346.0,
+            "33": 45463.0,
+            "34": 43298.0,
+            "35": 43276.0,
+            "36": 42490.0,
+            "37": 40069.0,
+            "38": 42527.0,
+            "39": 44730.0,
+            "40": 43245.0,
+            "41": 44653.0,
+            "42": 43269.0,
+            "43": 45462.0,
+            "44": 44594.0,
+            "45": 43285.0,
+            "46": 43915.0,
+            "47": 42370.0,
+            "48": 44704.0,
+            "49": 43164.0,
+            "50": 43365.0,
+            "51": 41167.0,
+            "52": 43825.0,
+            "53": 43945.0,
+            "54": 41947.0,
+            "55": 43853.0,
+            "56": 43268.0,
+            "57": 42591.0,
+            "58": 43843.0,
+            "59": 44625.0,
+            "60": 41218.0,
+            "61": 39714.0,
+            "62": 44779.0,
+            "63": 44716.0,
+            "64": 45359.0,
+            "65": 44684.0,
+            "66": 45355.0,
+            "67": 43146.0,
+            "68": 42519.0,
+            "69": 43835.0,
+            "70": 45522.0,
+            "71": 43316.0,
+            "72": 44767.0,
+            "73": 45365.0,
+            "74": 42449.0,
+            "75": 44695.0,
+            "76": 43885.0,
+            "77": 42092.0,
+            "78": 40278.0,
+            "79": 38915.0,
+            "80": 41096.0,
+            "81": 45372.0,
+            "82": 43206.0,
+            "83": 38481.0,
+            "84": 42474.0,
+            "85": 43990.0,
+            "86": 45729.0,
+            "87": 40884.0,
+            "88": 41772.0,
+            "89": 41076.0,
+            "90": 44676.0,
+            "91": 46159.0,
+            "92": 41790.0,
+            "93": 43242.0,
+            "94": 39566.0,
+            "95": 44077.0,
+            "96": 44741.0,
+            "97": 45379.0,
+            "98": 41802.0,
+            "99": 45441.0,
+            "100": 42530.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2171550208.0,
+            "2": 2171550208.0,
+            "3": 2171550208.0,
+            "4": 2171550208.0,
+            "5": 2171550208.0,
+            "6": 2171550208.0,
+            "7": 2171550208.0,
+            "8": 2171550208.0,
+            "9": 2171550208.0,
+            "10": 2171550208.0,
+            "11": 2171550208.0,
+            "12": 2171550208.0,
+            "13": 2171550208.0,
+            "14": 2171550208.0,
+            "15": 2171550208.0,
+            "16": 2171550208.0,
+            "17": 2171550208.0,
+            "18": 2171550208.0,
+            "19": 2171550208.0,
+            "20": 2171550208.0,
+            "21": 2171550208.0,
+            "22": 2171550208.0,
+            "23": 2171550208.0,
+            "24": 2171550208.0,
+            "25": 2171550208.0,
+            "26": 2171550208.0,
+            "27": 2171550208.0,
+            "28": 2171550208.0,
+            "29": 2171550208.0,
+            "30": 2171550208.0,
+            "31": 2171550208.0,
+            "32": 2171550208.0,
+            "33": 2171550208.0,
+            "34": 2171550208.0,
+            "35": 2171550208.0,
+            "36": 2171550208.0,
+            "37": 2171550208.0,
+            "38": 2171550208.0,
+            "39": 2171550208.0,
+            "40": 2171550208.0,
+            "41": 2171550208.0,
+            "42": 2171550208.0,
+            "43": 2171550208.0,
+            "44": 2171550208.0,
+            "45": 2171550208.0,
+            "46": 2171550208.0,
+            "47": 2171550208.0,
+            "48": 2171550208.0,
+            "49": 2171550208.0,
+            "50": 2171550208.0,
+            "51": 2171550208.0,
+            "52": 2171550208.0,
+            "53": 2171550208.0,
+            "54": 2171550208.0,
+            "55": 2171550208.0,
+            "56": 2171550208.0,
+            "57": 2171550208.0,
+            "58": 2171550208.0,
+            "59": 2171550208.0,
+            "60": 2171550208.0,
+            "61": 2171550208.0,
+            "62": 2171550208.0,
+            "63": 2171550208.0,
+            "64": 2171550208.0,
+            "65": 2171550208.0,
+            "66": 2171550208.0,
+            "67": 2171550208.0,
+            "68": 2171550208.0,
+            "69": 2171550208.0,
+            "70": 2171550208.0,
+            "71": 2171550208.0,
+            "72": 2171550208.0,
+            "73": 2171550208.0,
+            "74": 2171550208.0,
+            "75": 2171550208.0,
+            "76": 2171550208.0,
+            "77": 2171550208.0,
+            "78": 2171550208.0,
+            "79": 2171550208.0,
+            "80": 2171550208.0,
+            "81": 2171550208.0,
+            "82": 2171550208.0,
+            "83": 2171550208.0,
+            "84": 2171550208.0,
+            "85": 2171550208.0,
+            "86": 2171550208.0,
+            "87": 2171550208.0,
+            "88": 2171550208.0,
+            "89": 2171550208.0,
+            "90": 2171550208.0,
+            "91": 2171550208.0,
+            "92": 2171550208.0,
+            "93": 2171550208.0,
+            "94": 2171550208.0,
+            "95": 2171550208.0,
+            "96": 2171550208.0,
+            "97": 2171550208.0,
+            "98": 2171550208.0,
+            "99": 2171550208.0,
+            "100": 2171550208.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2407642624.0,
+            "2": 3336458752.0,
+            "3": 3336458752.0,
+            "4": 3336458752.0,
+            "5": 3336458752.0,
+            "6": 3336458752.0,
+            "7": 3336458752.0,
+            "8": 3336458752.0,
+            "9": 3336458752.0,
+            "10": 3336458752.0,
+            "11": 3336458752.0,
+            "12": 3336458752.0,
+            "13": 3336458752.0,
+            "14": 3336458752.0,
+            "15": 3336458752.0,
+            "16": 3336458752.0,
+            "17": 3336458752.0,
+            "18": 3336458752.0,
+            "19": 3336458752.0,
+            "20": 3336458752.0,
+            "21": 3336458752.0,
+            "22": 3336458752.0,
+            "23": 3336458752.0,
+            "24": 3336458752.0,
+            "25": 3336458752.0,
+            "26": 3336458752.0,
+            "27": 3336458752.0,
+            "28": 3336458752.0,
+            "29": 3336458752.0,
+            "30": 3336458752.0,
+            "31": 3336458752.0,
+            "32": 3336458752.0,
+            "33": 3336458752.0,
+            "34": 3336458752.0,
+            "35": 3336458752.0,
+            "36": 3336458752.0,
+            "37": 3336458752.0,
+            "38": 3336458752.0,
+            "39": 3336458752.0,
+            "40": 3336458752.0,
+            "41": 3336458752.0,
+            "42": 3336458752.0,
+            "43": 3336458752.0,
+            "44": 3336458752.0,
+            "45": 3336458752.0,
+            "46": 3336458752.0,
+            "47": 3336458752.0,
+            "48": 3336458752.0,
+            "49": 3336458752.0,
+            "50": 3336458752.0,
+            "51": 3336458752.0,
+            "52": 3336458752.0,
+            "53": 3336458752.0,
+            "54": 3336458752.0,
+            "55": 3336458752.0,
+            "56": 3336458752.0,
+            "57": 3336458752.0,
+            "58": 3336458752.0,
+            "59": 3336458752.0,
+            "60": 3336458752.0,
+            "61": 3336458752.0,
+            "62": 3336458752.0,
+            "63": 3336458752.0,
+            "64": 3336458752.0,
+            "65": 3336458752.0,
+            "66": 3336458752.0,
+            "67": 3336458752.0,
+            "68": 3336458752.0,
+            "69": 3336458752.0,
+            "70": 3336458752.0,
+            "71": 3336458752.0,
+            "72": 3336458752.0,
+            "73": 3336458752.0,
+            "74": 3336458752.0,
+            "75": 3336458752.0,
+            "76": 3336458752.0,
+            "77": 3336458752.0,
+            "78": 3336458752.0,
+            "79": 3336458752.0,
+            "80": 3336458752.0,
+            "81": 3336458752.0,
+            "82": 3336458752.0,
+            "83": 3336458752.0,
+            "84": 3336458752.0,
+            "85": 3336458752.0,
+            "86": 3336458752.0,
+            "87": 3336458752.0,
+            "88": 3336458752.0,
+            "89": 3336458752.0,
+            "90": 3336458752.0,
+            "91": 3336458752.0,
+            "92": 3336458752.0,
+            "93": 3336458752.0,
+            "94": 3336458752.0,
+            "95": 3336458752.0,
+            "96": 3336458752.0,
+            "97": 3336458752.0,
+            "98": 3336458752.0,
+            "99": 3336458752.0,
+            "100": 3336458752.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 7.65153,
+            "2": 0.53984,
+            "3": 0.42661,
+            "4": 0.41593,
+            "5": 0.40702,
+            "6": 0.40818,
+            "7": 0.40561,
+            "8": 0.40327,
+            "9": 0.40232,
+            "10": 0.40905,
+            "11": 0.41597,
+            "12": 0.41177,
+            "13": 0.4131,
+            "14": 0.41425,
+            "15": 0.40979,
+            "16": 0.41034,
+            "17": 0.40766,
+            "18": 0.41324,
+            "19": 0.40983,
+            "20": 0.40973,
+            "21": 0.41258,
+            "22": 0.40882,
+            "23": 0.41161,
+            "24": 0.41499,
+            "25": 0.40883,
+            "26": 0.41065,
+            "27": 0.41442,
+            "28": 0.42182,
+            "29": 0.41133,
+            "30": 0.40692,
+            "31": 0.40463,
+            "32": 0.40734,
+            "33": 0.41503,
+            "34": 0.40436,
+            "35": 0.40604,
+            "36": 0.40609,
+            "37": 0.40425,
+            "38": 0.40616,
+            "39": 0.40517,
+            "40": 0.40457,
+            "41": 0.40404,
+            "42": 0.40366,
+            "43": 0.40482,
+            "44": 0.40536,
+            "45": 0.40416,
+            "46": 0.40309,
+            "47": 0.40454,
+            "48": 0.40394,
+            "49": 0.40592,
+            "50": 0.40575,
+            "51": 0.40587,
+            "52": 0.40615,
+            "53": 0.4075,
+            "54": 0.8929,
+            "55": 0.40675,
+            "56": 0.40691,
+            "57": 0.40758,
+            "58": 0.40852,
+            "59": 0.40647,
+            "60": 0.40547,
+            "61": 0.40637,
+            "62": 0.40696,
+            "63": 0.40776,
+            "64": 0.40276,
+            "65": 0.40178,
+            "66": 0.40265,
+            "67": 0.40328,
+            "68": 0.40315,
+            "69": 0.40883,
+            "70": 0.40216,
+            "71": 0.40455,
+            "72": 0.40323,
+            "73": 0.40261,
+            "74": 0.40269,
+            "75": 0.40043,
+            "76": 0.40039,
+            "77": 0.40035,
+            "78": 0.39953,
+            "79": 0.39986,
+            "80": 0.40626,
+            "81": 0.40677,
+            "82": 0.39929,
+            "83": 0.40058,
+            "84": 0.40833,
+            "85": 0.40235,
+            "86": 0.39878,
+            "87": 0.40207,
+            "88": 0.39947,
+            "89": 0.39981,
+            "90": 0.39896,
+            "91": 0.39963,
+            "92": 0.40003,
+            "93": 0.39864,
+            "94": 0.40427,
+            "95": 0.39942,
+            "96": 0.40168,
+            "97": 0.40276,
+            "98": 0.39869,
+            "99": 0.40201,
+            "100": 0.39949
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp2_pp1_vp1/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp2_pp1_vp1/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..f6481fb6aae
--- /dev/null
+++ b/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp2_pp1_vp1/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.372,
+            "2": 10.37006,
+            "3": 9.85232,
+            "4": 9.61996,
+            "5": 9.40868,
+            "6": 9.43215,
+            "7": 9.31482,
+            "8": 9.27336,
+            "9": 9.1139,
+            "10": 9.03962,
+            "11": 8.87218,
+            "12": 8.80873,
+            "13": 8.83468,
+            "14": 8.69011,
+            "15": 8.66228,
+            "16": 8.54828,
+            "17": 8.50093,
+            "18": 8.42525,
+            "19": 8.3881,
+            "20": 8.2807,
+            "21": 8.26609,
+            "22": 8.16003,
+            "23": 8.1124,
+            "24": 8.14262,
+            "25": 7.98432,
+            "26": 8.10592,
+            "27": 7.88963,
+            "28": 7.97037,
+            "29": 7.81276,
+            "30": 7.87638,
+            "31": 7.82516,
+            "32": 7.70248,
+            "33": 7.80198,
+            "34": 7.56872,
+            "35": 7.67379,
+            "36": 7.54691,
+            "37": 7.47408,
+            "38": 7.50739,
+            "39": 7.49773,
+            "40": 7.51091,
+            "41": 7.41065,
+            "42": 7.37995,
+            "43": 7.44078,
+            "44": 7.39393,
+            "45": 7.37239,
+            "46": 7.28427,
+            "47": 7.46631,
+            "48": 7.2905,
+            "49": 7.35025,
+            "50": 7.17204,
+            "51": 7.37012,
+            "52": 7.14467,
+            "53": 7.12652,
+            "54": 7.23751,
+            "55": 7.15586,
+            "56": 7.23154,
+            "57": 7.33541,
+            "58": 7.01363,
+            "59": 7.11431,
+            "60": 7.15121,
+            "61": 7.10904,
+            "62": 7.26834,
+            "63": 7.15176,
+            "64": 7.08415,
+            "65": 6.99114,
+            "66": 7.05301,
+            "67": 7.04354,
+            "68": 7.1398,
+            "69": 7.03224,
+            "70": 7.05832,
+            "71": 6.90372,
+            "72": 6.99794,
+            "73": 6.9769,
+            "74": 6.91759,
+            "75": 7.06626,
+            "76": 6.95758,
+            "77": 7.0871,
+            "78": 7.03238,
+            "79": 6.85274,
+            "80": 6.93633,
+            "81": 6.97617,
+            "82": 7.06196,
+            "83": 6.98213,
+            "84": 7.00931,
+            "85": 6.85082,
+            "86": 7.04673,
+            "87": 6.97907,
+            "88": 6.91096,
+            "89": 6.81719,
+            "90": 7.2459,
+            "91": 6.7046,
+            "92": 7.05377,
+            "93": 6.89397,
+            "94": 7.0542,
+            "95": 6.85031,
+            "96": 6.96441,
+            "97": 6.95632,
+            "98": 6.88246,
+            "99": 7.00392,
+            "100": 6.98993
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 43299.0,
+            "2": 44047.0,
+            "3": 44744.0,
+            "4": 42405.0,
+            "5": 45385.0,
+            "6": 40946.0,
+            "7": 43183.0,
+            "8": 45446.0,
+            "9": 42445.0,
+            "10": 45361.0,
+            "11": 43966.0,
+            "12": 44593.0,
+            "13": 43907.0,
+            "14": 46210.0,
+            "15": 43904.0,
+            "16": 41614.0,
+            "17": 43840.0,
+            "18": 44687.0,
+            "19": 42536.0,
+            "20": 44746.0,
+            "21": 44767.0,
+            "22": 41831.0,
+            "23": 45449.0,
+            "24": 43072.0,
+            "25": 42457.0,
+            "26": 43921.0,
+            "27": 46208.0,
+            "28": 46361.0,
+            "29": 46146.0,
+            "30": 43976.0,
+            "31": 41272.0,
+            "32": 43348.0,
+            "33": 45431.0,
+            "34": 43295.0,
+            "35": 43264.0,
+            "36": 42493.0,
+            "37": 40075.0,
+            "38": 42518.0,
+            "39": 44713.0,
+            "40": 43230.0,
+            "41": 44666.0,
+            "42": 43251.0,
+            "43": 45471.0,
+            "44": 44600.0,
+            "45": 43330.0,
+            "46": 43932.0,
+            "47": 42400.0,
+            "48": 44673.0,
+            "49": 43149.0,
+            "50": 43373.0,
+            "51": 41142.0,
+            "52": 43824.0,
+            "53": 43917.0,
+            "54": 42023.0,
+            "55": 43883.0,
+            "56": 43235.0,
+            "57": 42536.0,
+            "58": 43829.0,
+            "59": 44648.0,
+            "60": 41187.0,
+            "61": 39720.0,
+            "62": 44740.0,
+            "63": 44690.0,
+            "64": 45358.0,
+            "65": 44695.0,
+            "66": 45364.0,
+            "67": 43138.0,
+            "68": 42538.0,
+            "69": 43820.0,
+            "70": 45549.0,
+            "71": 43324.0,
+            "72": 44760.0,
+            "73": 45363.0,
+            "74": 42473.0,
+            "75": 44666.0,
+            "76": 43903.0,
+            "77": 42082.0,
+            "78": 40295.0,
+            "79": 38890.0,
+            "80": 41131.0,
+            "81": 45363.0,
+            "82": 43206.0,
+            "83": 38487.0,
+            "84": 42462.0,
+            "85": 43985.0,
+            "86": 45695.0,
+            "87": 40826.0,
+            "88": 41822.0,
+            "89": 41069.0,
+            "90": 44664.0,
+            "91": 46170.0,
+            "92": 41797.0,
+            "93": 43208.0,
+            "94": 39552.0,
+            "95": 44106.0,
+            "96": 44697.0,
+            "97": 45398.0,
+            "98": 41792.0,
+            "99": 45429.0,
+            "100": 42437.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2171550208.0,
+            "2": 2171550208.0,
+            "3": 2171550208.0,
+            "4": 2171550208.0,
+            "5": 2171550208.0,
+            "6": 2171550208.0,
+            "7": 2171550208.0,
+            "8": 2171550208.0,
+            "9": 2171550208.0,
+            "10": 2171550208.0,
+            "11": 2171550208.0,
+            "12": 2171550208.0,
+            "13": 2171550208.0,
+            "14": 2171550208.0,
+            "15": 2171550208.0,
+            "16": 2171550208.0,
+            "17": 2171550208.0,
+            "18": 2171550208.0,
+            "19": 2171550208.0,
+            "20": 2171550208.0,
+            "21": 2171550208.0,
+            "22": 2171550208.0,
+            "23": 2171550208.0,
+            "24": 2171550208.0,
+            "25": 2171550208.0,
+            "26": 2171550208.0,
+            "27": 2171550208.0,
+            "28": 2171550208.0,
+            "29": 2171550208.0,
+            "30": 2171550208.0,
+            "31": 2171550208.0,
+            "32": 2171550208.0,
+            "33": 2171550208.0,
+            "34": 2171550208.0,
+            "35": 2171550208.0,
+            "36": 2171550208.0,
+            "37": 2171550208.0,
+            "38": 2171550208.0,
+            "39": 2171550208.0,
+            "40": 2171550208.0,
+            "41": 2171550208.0,
+            "42": 2171550208.0,
+            "43": 2171550208.0,
+            "44": 2171550208.0,
+            "45": 2171550208.0,
+            "46": 2171550208.0,
+            "47": 2171550208.0,
+            "48": 2171550208.0,
+            "49": 2171550208.0,
+            "50": 2171550208.0,
+            "51": 2171550208.0,
+            "52": 2171550208.0,
+            "53": 2171550208.0,
+            "54": 2171550208.0,
+            "55": 2171550208.0,
+            "56": 2171550208.0,
+            "57": 2171550208.0,
+            "58": 2171550208.0,
+            "59": 2171550208.0,
+            "60": 2171550208.0,
+            "61": 2171550208.0,
+            "62": 2171550208.0,
+            "63": 2171550208.0,
+            "64": 2171550208.0,
+            "65": 2171550208.0,
+            "66": 2171550208.0,
+            "67": 2171550208.0,
+            "68": 2171550208.0,
+            "69": 2171550208.0,
+            "70": 2171550208.0,
+            "71": 2171550208.0,
+            "72": 2171550208.0,
+            "73": 2171550208.0,
+            "74": 2171550208.0,
+            "75": 2171550208.0,
+            "76": 2171550208.0,
+            "77": 2171550208.0,
+            "78": 2171550208.0,
+            "79": 2171550208.0,
+            "80": 2171550208.0,
+            "81": 2171550208.0,
+            "82": 2171550208.0,
+            "83": 2171550208.0,
+            "84": 2171550208.0,
+            "85": 2171550208.0,
+            "86": 2171550208.0,
+            "87": 2171550208.0,
+            "88": 2171550208.0,
+            "89": 2171550208.0,
+            "90": 2171550208.0,
+            "91": 2171550208.0,
+            "92": 2171550208.0,
+            "93": 2171550208.0,
+            "94": 2171550208.0,
+            "95": 2171550208.0,
+            "96": 2171550208.0,
+            "97": 2171550208.0,
+            "98": 2171550208.0,
+            "99": 2171550208.0,
+            "100": 2171550208.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2407642624.0,
+            "2": 3336458752.0,
+            "3": 3336458752.0,
+            "4": 3336458752.0,
+            "5": 3336458752.0,
+            "6": 3336458752.0,
+            "7": 3336458752.0,
+            "8": 3336458752.0,
+            "9": 3336458752.0,
+            "10": 3336458752.0,
+            "11": 3336458752.0,
+            "12": 3336458752.0,
+            "13": 3336458752.0,
+            "14": 3336458752.0,
+            "15": 3336458752.0,
+            "16": 3336458752.0,
+            "17": 3336458752.0,
+            "18": 3336458752.0,
+            "19": 3336458752.0,
+            "20": 3336458752.0,
+            "21": 3336458752.0,
+            "22": 3336458752.0,
+            "23": 3336458752.0,
+            "24": 3336458752.0,
+            "25": 3336458752.0,
+            "26": 3336458752.0,
+            "27": 3336458752.0,
+            "28": 3336458752.0,
+            "29": 3336458752.0,
+            "30": 3336458752.0,
+            "31": 3336458752.0,
+            "32": 3336458752.0,
+            "33": 3336458752.0,
+            "34": 3336458752.0,
+            "35": 3336458752.0,
+            "36": 3336458752.0,
+            "37": 3336458752.0,
+            "38": 3336458752.0,
+            "39": 3336458752.0,
+            "40": 3336458752.0,
+            "41": 3336458752.0,
+            "42": 3336458752.0,
+            "43": 3336458752.0,
+            "44": 3336458752.0,
+            "45": 3336458752.0,
+            "46": 3336458752.0,
+            "47": 3336458752.0,
+            "48": 3336458752.0,
+            "49": 3336458752.0,
+            "50": 3336458752.0,
+            "51": 3336458752.0,
+            "52": 3336458752.0,
+            "53": 3336458752.0,
+            "54": 3336458752.0,
+            "55": 3336458752.0,
+            "56": 3336458752.0,
+            "57": 3336458752.0,
+            "58": 3336458752.0,
+            "59": 3336458752.0,
+            "60": 3336458752.0,
+            "61": 3336458752.0,
+            "62": 3336458752.0,
+            "63": 3336458752.0,
+            "64": 3336458752.0,
+            "65": 3336458752.0,
+            "66": 3336458752.0,
+            "67": 3336458752.0,
+            "68": 3336458752.0,
+            "69": 3336458752.0,
+            "70": 3336458752.0,
+            "71": 3336458752.0,
+            "72": 3336458752.0,
+            "73": 3336458752.0,
+            "74": 3336458752.0,
+            "75": 3336458752.0,
+            "76": 3336458752.0,
+            "77": 3336458752.0,
+            "78": 3336458752.0,
+            "79": 3336458752.0,
+            "80": 3336458752.0,
+            "81": 3336458752.0,
+            "82": 3336458752.0,
+            "83": 3336458752.0,
+            "84": 3336458752.0,
+            "85": 3336458752.0,
+            "86": 3336458752.0,
+            "87": 3336458752.0,
+            "88": 3336458752.0,
+            "89": 3336458752.0,
+            "90": 3336458752.0,
+            "91": 3336458752.0,
+            "92": 3336458752.0,
+            "93": 3336458752.0,
+            "94": 3336458752.0,
+            "95": 3336458752.0,
+            "96": 3336458752.0,
+            "97": 3336458752.0,
+            "98": 3336458752.0,
+            "99": 3336458752.0,
+            "100": 3336458752.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 9.60166,
+            "2": 0.33673,
+            "3": 0.25171,
+            "4": 0.25375,
+            "5": 0.25753,
+            "6": 0.27787,
+            "7": 0.24971,
+            "8": 0.2503,
+            "9": 0.25048,
+            "10": 0.24978,
+            "11": 0.25041,
+            "12": 0.24978,
+            "13": 0.25194,
+            "14": 0.2514,
+            "15": 0.25318,
+            "16": 0.25109,
+            "17": 0.25362,
+            "18": 0.24882,
+            "19": 0.24704,
+            "20": 0.25004,
+            "21": 0.27982,
+            "22": 0.24826,
+            "23": 0.24772,
+            "24": 0.251,
+            "25": 0.24928,
+            "26": 0.24917,
+            "27": 0.25053,
+            "28": 0.25787,
+            "29": 0.24964,
+            "30": 0.24738,
+            "31": 0.24871,
+            "32": 0.24723,
+            "33": 0.25394,
+            "34": 0.24523,
+            "35": 0.26602,
+            "36": 0.25389,
+            "37": 0.25278,
+            "38": 0.24491,
+            "39": 0.2522,
+            "40": 0.25493,
+            "41": 0.25366,
+            "42": 0.27735,
+            "43": 0.2544,
+            "44": 0.25245,
+            "45": 0.25589,
+            "46": 0.24817,
+            "47": 0.24991,
+            "48": 0.2536,
+            "49": 0.27661,
+            "50": 0.25098,
+            "51": 0.252,
+            "52": 0.25923,
+            "53": 0.26278,
+            "54": 0.25083,
+            "55": 0.25065,
+            "56": 0.281,
+            "57": 0.25168,
+            "58": 0.25062,
+            "59": 0.24811,
+            "60": 0.25419,
+            "61": 0.2513,
+            "62": 0.24774,
+            "63": 0.24385,
+            "64": 0.24558,
+            "65": 0.24527,
+            "66": 0.24409,
+            "67": 0.24307,
+            "68": 0.24418,
+            "69": 0.24735,
+            "70": 0.26794,
+            "71": 0.24394,
+            "72": 0.24559,
+            "73": 0.24851,
+            "74": 0.24204,
+            "75": 0.24385,
+            "76": 0.24384,
+            "77": 0.2634,
+            "78": 0.24391,
+            "79": 0.24432,
+            "80": 0.24643,
+            "81": 0.24693,
+            "82": 0.2446,
+            "83": 0.24366,
+            "84": 0.24512,
+            "85": 0.25101,
+            "86": 0.24393,
+            "87": 0.24582,
+            "88": 0.24672,
+            "89": 0.24434,
+            "90": 0.24628,
+            "91": 0.24503,
+            "92": 0.24574,
+            "93": 0.25036,
+            "94": 0.25184,
+            "95": 0.254,
+            "96": 0.24924,
+            "97": 0.25063,
+            "98": 0.25449,
+            "99": 0.24818,
+            "100": 0.24724
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp2_pp1_vp1/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp2_pp1_vp1/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..81670d237ce
--- /dev/null
+++ b/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp2_pp1_vp1/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.372,
+            "2": 10.37006,
+            "3": 9.85232,
+            "4": 9.61996,
+            "5": 9.40868,
+            "6": 9.43215,
+            "7": 9.31482,
+            "8": 9.27336,
+            "9": 9.1139,
+            "10": 9.03962,
+            "11": 8.87218,
+            "12": 8.80873,
+            "13": 8.83468,
+            "14": 8.69011,
+            "15": 8.66228,
+            "16": 8.54828,
+            "17": 8.50093,
+            "18": 8.42525,
+            "19": 8.3881,
+            "20": 8.2807,
+            "21": 8.26609,
+            "22": 8.16003,
+            "23": 8.1124,
+            "24": 8.14262,
+            "25": 7.98432,
+            "26": 8.10592,
+            "27": 7.88963,
+            "28": 7.97037,
+            "29": 7.81276,
+            "30": 7.87638,
+            "31": 7.82516,
+            "32": 7.70248,
+            "33": 7.80198,
+            "34": 7.56872,
+            "35": 7.67379,
+            "36": 7.54691,
+            "37": 7.47408,
+            "38": 7.50739,
+            "39": 7.49773,
+            "40": 7.51091,
+            "41": 7.41065,
+            "42": 7.37995,
+            "43": 7.44078,
+            "44": 7.39393,
+            "45": 7.37239,
+            "46": 7.28427,
+            "47": 7.46631,
+            "48": 7.2905,
+            "49": 7.35025,
+            "50": 7.17204,
+            "51": 7.37012,
+            "52": 7.14467,
+            "53": 7.12652,
+            "54": 7.23751,
+            "55": 7.15586,
+            "56": 7.23154,
+            "57": 7.33541,
+            "58": 7.01363,
+            "59": 7.11431,
+            "60": 7.15121,
+            "61": 7.10904,
+            "62": 7.26834,
+            "63": 7.15176,
+            "64": 7.08415,
+            "65": 6.99114,
+            "66": 7.05301,
+            "67": 7.04354,
+            "68": 7.1398,
+            "69": 7.03224,
+            "70": 7.05832,
+            "71": 6.90372,
+            "72": 6.99794,
+            "73": 6.9769,
+            "74": 6.91759,
+            "75": 7.06626,
+            "76": 6.95758,
+            "77": 7.0871,
+            "78": 7.03238,
+            "79": 6.85274,
+            "80": 6.93633,
+            "81": 6.97617,
+            "82": 7.06196,
+            "83": 6.98213,
+            "84": 7.00931,
+            "85": 6.85082,
+            "86": 7.04673,
+            "87": 6.97907,
+            "88": 6.91096,
+            "89": 6.81719,
+            "90": 7.2459,
+            "91": 6.7046,
+            "92": 7.05377,
+            "93": 6.89397,
+            "94": 7.0542,
+            "95": 6.85031,
+            "96": 6.96441,
+            "97": 6.95632,
+            "98": 6.88246,
+            "99": 7.00392,
+            "100": 6.98993
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 43299.0,
+            "2": 44047.0,
+            "3": 44744.0,
+            "4": 42405.0,
+            "5": 45385.0,
+            "6": 40946.0,
+            "7": 43183.0,
+            "8": 45446.0,
+            "9": 42445.0,
+            "10": 45361.0,
+            "11": 43966.0,
+            "12": 44593.0,
+            "13": 43907.0,
+            "14": 46210.0,
+            "15": 43904.0,
+            "16": 41614.0,
+            "17": 43840.0,
+            "18": 44687.0,
+            "19": 42536.0,
+            "20": 44746.0,
+            "21": 44767.0,
+            "22": 41831.0,
+            "23": 45449.0,
+            "24": 43072.0,
+            "25": 42457.0,
+            "26": 43921.0,
+            "27": 46208.0,
+            "28": 46361.0,
+            "29": 46146.0,
+            "30": 43976.0,
+            "31": 41272.0,
+            "32": 43348.0,
+            "33": 45431.0,
+            "34": 43295.0,
+            "35": 43264.0,
+            "36": 42493.0,
+            "37": 40075.0,
+            "38": 42518.0,
+            "39": 44713.0,
+            "40": 43230.0,
+            "41": 44666.0,
+            "42": 43251.0,
+            "43": 45471.0,
+            "44": 44600.0,
+            "45": 43330.0,
+            "46": 43932.0,
+            "47": 42400.0,
+            "48": 44673.0,
+            "49": 43149.0,
+            "50": 43373.0,
+            "51": 41142.0,
+            "52": 43824.0,
+            "53": 43917.0,
+            "54": 42023.0,
+            "55": 43883.0,
+            "56": 43235.0,
+            "57": 42536.0,
+            "58": 43829.0,
+            "59": 44648.0,
+            "60": 41187.0,
+            "61": 39720.0,
+            "62": 44740.0,
+            "63": 44690.0,
+            "64": 45358.0,
+            "65": 44695.0,
+            "66": 45364.0,
+            "67": 43138.0,
+            "68": 42538.0,
+            "69": 43820.0,
+            "70": 45549.0,
+            "71": 43324.0,
+            "72": 44760.0,
+            "73": 45363.0,
+            "74": 42473.0,
+            "75": 44666.0,
+            "76": 43903.0,
+            "77": 42082.0,
+            "78": 40295.0,
+            "79": 38890.0,
+            "80": 41131.0,
+            "81": 45363.0,
+            "82": 43206.0,
+            "83": 38487.0,
+            "84": 42462.0,
+            "85": 43985.0,
+            "86": 45695.0,
+            "87": 40826.0,
+            "88": 41822.0,
+            "89": 41069.0,
+            "90": 44664.0,
+            "91": 46170.0,
+            "92": 41797.0,
+            "93": 43208.0,
+            "94": 39552.0,
+            "95": 44106.0,
+            "96": 44697.0,
+            "97": 45398.0,
+            "98": 41792.0,
+            "99": 45429.0,
+            "100": 42437.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2171550208.0,
+            "2": 2171550208.0,
+            "3": 2171550208.0,
+            "4": 2171550208.0,
+            "5": 2171550208.0,
+            "6": 2171550208.0,
+            "7": 2171550208.0,
+            "8": 2171550208.0,
+            "9": 2171550208.0,
+            "10": 2171550208.0,
+            "11": 2171550208.0,
+            "12": 2171550208.0,
+            "13": 2171550208.0,
+            "14": 2171550208.0,
+            "15": 2171550208.0,
+            "16": 2171550208.0,
+            "17": 2171550208.0,
+            "18": 2171550208.0,
+            "19": 2171550208.0,
+            "20": 2171550208.0,
+            "21": 2171550208.0,
+            "22": 2171550208.0,
+            "23": 2171550208.0,
+            "24": 2171550208.0,
+            "25": 2171550208.0,
+            "26": 2171550208.0,
+            "27": 2171550208.0,
+            "28": 2171550208.0,
+            "29": 2171550208.0,
+            "30": 2171550208.0,
+            "31": 2171550208.0,
+            "32": 2171550208.0,
+            "33": 2171550208.0,
+            "34": 2171550208.0,
+            "35": 2171550208.0,
+            "36": 2171550208.0,
+            "37": 2171550208.0,
+            "38": 2171550208.0,
+            "39": 2171550208.0,
+            "40": 2171550208.0,
+            "41": 2171550208.0,
+            "42": 2171550208.0,
+            "43": 2171550208.0,
+            "44": 2171550208.0,
+            "45": 2171550208.0,
+            "46": 2171550208.0,
+            "47": 2171550208.0,
+            "48": 2171550208.0,
+            "49": 2171550208.0,
+            "50": 2171550208.0,
+            "51": 2171550208.0,
+            "52": 2171550208.0,
+            "53": 2171550208.0,
+            "54": 2171550208.0,
+            "55": 2171550208.0,
+            "56": 2171550208.0,
+            "57": 2171550208.0,
+            "58": 2171550208.0,
+            "59": 2171550208.0,
+            "60": 2171550208.0,
+            "61": 2171550208.0,
+            "62": 2171550208.0,
+            "63": 2171550208.0,
+            "64": 2171550208.0,
+            "65": 2171550208.0,
+            "66": 2171550208.0,
+            "67": 2171550208.0,
+            "68": 2171550208.0,
+            "69": 2171550208.0,
+            "70": 2171550208.0,
+            "71": 2171550208.0,
+            "72": 2171550208.0,
+            "73": 2171550208.0,
+            "74": 2171550208.0,
+            "75": 2171550208.0,
+            "76": 2171550208.0,
+            "77": 2171550208.0,
+            "78": 2171550208.0,
+            "79": 2171550208.0,
+            "80": 2171550208.0,
+            "81": 2171550208.0,
+            "82": 2171550208.0,
+            "83": 2171550208.0,
+            "84": 2171550208.0,
+            "85": 2171550208.0,
+            "86": 2171550208.0,
+            "87": 2171550208.0,
+            "88": 2171550208.0,
+            "89": 2171550208.0,
+            "90": 2171550208.0,
+            "91": 2171550208.0,
+            "92": 2171550208.0,
+            "93": 2171550208.0,
+            "94": 2171550208.0,
+            "95": 2171550208.0,
+            "96": 2171550208.0,
+            "97": 2171550208.0,
+            "98": 2171550208.0,
+            "99": 2171550208.0,
+            "100": 2171550208.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2407642624.0,
+            "2": 3336458752.0,
+            "3": 3336458752.0,
+            "4": 3336458752.0,
+            "5": 3336458752.0,
+            "6": 3336458752.0,
+            "7": 3336458752.0,
+            "8": 3336458752.0,
+            "9": 3336458752.0,
+            "10": 3336458752.0,
+            "11": 3336458752.0,
+            "12": 3336458752.0,
+            "13": 3336458752.0,
+            "14": 3336458752.0,
+            "15": 3336458752.0,
+            "16": 3336458752.0,
+            "17": 3336458752.0,
+            "18": 3336458752.0,
+            "19": 3336458752.0,
+            "20": 3336458752.0,
+            "21": 3336458752.0,
+            "22": 3336458752.0,
+            "23": 3336458752.0,
+            "24": 3336458752.0,
+            "25": 3336458752.0,
+            "26": 3336458752.0,
+            "27": 3336458752.0,
+            "28": 3336458752.0,
+            "29": 3336458752.0,
+            "30": 3336458752.0,
+            "31": 3336458752.0,
+            "32": 3336458752.0,
+            "33": 3336458752.0,
+            "34": 3336458752.0,
+            "35": 3336458752.0,
+            "36": 3336458752.0,
+            "37": 3336458752.0,
+            "38": 3336458752.0,
+            "39": 3336458752.0,
+            "40": 3336458752.0,
+            "41": 3336458752.0,
+            "42": 3336458752.0,
+            "43": 3336458752.0,
+            "44": 3336458752.0,
+            "45": 3336458752.0,
+            "46": 3336458752.0,
+            "47": 3336458752.0,
+            "48": 3336458752.0,
+            "49": 3336458752.0,
+            "50": 3336458752.0,
+            "51": 3336458752.0,
+            "52": 3336458752.0,
+            "53": 3336458752.0,
+            "54": 3336458752.0,
+            "55": 3336458752.0,
+            "56": 3336458752.0,
+            "57": 3336458752.0,
+            "58": 3336458752.0,
+            "59": 3336458752.0,
+            "60": 3336458752.0,
+            "61": 3336458752.0,
+            "62": 3336458752.0,
+            "63": 3336458752.0,
+            "64": 3336458752.0,
+            "65": 3336458752.0,
+            "66": 3336458752.0,
+            "67": 3336458752.0,
+            "68": 3336458752.0,
+            "69": 3336458752.0,
+            "70": 3336458752.0,
+            "71": 3336458752.0,
+            "72": 3336458752.0,
+            "73": 3336458752.0,
+            "74": 3336458752.0,
+            "75": 3336458752.0,
+            "76": 3336458752.0,
+            "77": 3336458752.0,
+            "78": 3336458752.0,
+            "79": 3336458752.0,
+            "80": 3336458752.0,
+            "81": 3336458752.0,
+            "82": 3336458752.0,
+            "83": 3336458752.0,
+            "84": 3336458752.0,
+            "85": 3336458752.0,
+            "86": 3336458752.0,
+            "87": 3336458752.0,
+            "88": 3336458752.0,
+            "89": 3336458752.0,
+            "90": 3336458752.0,
+            "91": 3336458752.0,
+            "92": 3336458752.0,
+            "93": 3336458752.0,
+            "94": 3336458752.0,
+            "95": 3336458752.0,
+            "96": 3336458752.0,
+            "97": 3336458752.0,
+            "98": 3336458752.0,
+            "99": 3336458752.0,
+            "100": 3336458752.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 9.16897,
+            "2": 0.35143,
+            "3": 0.28496,
+            "4": 0.28172,
+            "5": 0.28308,
+            "6": 0.2855,
+            "7": 0.28287,
+            "8": 0.28079,
+            "9": 0.2809,
+            "10": 0.28329,
+            "11": 0.28038,
+            "12": 0.28371,
+            "13": 0.28032,
+            "14": 0.28362,
+            "15": 0.28125,
+            "16": 0.28046,
+            "17": 0.28421,
+            "18": 0.28132,
+            "19": 0.2808,
+            "20": 0.28432,
+            "21": 0.28578,
+            "22": 0.28205,
+            "23": 0.28411,
+            "24": 0.28378,
+            "25": 0.28227,
+            "26": 0.28231,
+            "27": 0.28353,
+            "28": 0.28497,
+            "29": 0.29981,
+            "30": 0.28557,
+            "31": 0.28777,
+            "32": 0.28808,
+            "33": 0.28609,
+            "34": 0.32585,
+            "35": 0.341,
+            "36": 0.2886,
+            "37": 0.28157,
+            "38": 0.2916,
+            "39": 0.28501,
+            "40": 0.27952,
+            "41": 0.27767,
+            "42": 0.28062,
+            "43": 0.28781,
+            "44": 0.2839,
+            "45": 0.282,
+            "46": 0.27837,
+            "47": 0.27883,
+            "48": 0.27865,
+            "49": 0.28179,
+            "50": 0.27881,
+            "51": 0.27669,
+            "52": 0.28063,
+            "53": 0.27909,
+            "54": 0.27716,
+            "55": 0.27807,
+            "56": 0.2785,
+            "57": 0.27679,
+            "58": 0.28004,
+            "59": 0.27659,
+            "60": 0.27984,
+            "61": 0.2771,
+            "62": 0.27714,
+            "63": 0.2802,
+            "64": 0.2918,
+            "65": 0.27948,
+            "66": 0.27839,
+            "67": 0.28573,
+            "68": 0.27933,
+            "69": 0.27893,
+            "70": 0.27964,
+            "71": 0.2767,
+            "72": 0.27816,
+            "73": 0.28004,
+            "74": 0.27997,
+            "75": 0.28095,
+            "76": 0.27752,
+            "77": 0.27912,
+            "78": 0.28068,
+            "79": 0.27992,
+            "80": 0.28771,
+            "81": 0.28046,
+            "82": 0.28352,
+            "83": 0.28376,
+            "84": 0.28337,
+            "85": 0.28197,
+            "86": 0.27949,
+            "87": 0.27909,
+            "88": 0.28479,
+            "89": 0.28248,
+            "90": 0.27742,
+            "91": 0.27819,
+            "92": 0.2809,
+            "93": 0.28123,
+            "94": 0.27933,
+            "95": 0.28364,
+            "96": 0.28523,
+            "97": 0.28365,
+            "98": 0.27822,
+            "99": 0.28382,
+            "100": 0.28917
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/test_utils/python_scripts/download_golden_values.py b/tests/test_utils/python_scripts/download_golden_values.py
index af0a58c3522..650867f231f 100644
--- a/tests/test_utils/python_scripts/download_golden_values.py
+++ b/tests/test_utils/python_scripts/download_golden_values.py
@@ -55,8 +55,8 @@ def main(pipeline_id: int, only_failing: bool):
         for functional_pipeline_job in functional_pipeline_jobs:
             job = project.jobs.get(functional_pipeline_job.id)
             logger.info("Starting with job %s", job.name)
-            if only_failing and job.status != "failed":
-                logger.info("Job %s is not failing. Skipping.", job.name)
+            if only_failing and job.status == "success":
+                logger.info("Job %s is successful. Skipping.", job.name)
                 continue
 
             try:
@@ -66,26 +66,44 @@ def main(pipeline_id: int, only_failing: bool):
                 zip = zipfile.ZipFile(file_name)
                 zip.extractall("tmp")
                 logger.info("Downloaded artifacts of job %s", job.name)
-            except Exception:
+            except Exception as e:
+                logger.error("Failed to download artifacts of job %s due to %s", job.name, e)
                 continue
 
             os.unlink(file_name)
             restart_dir = os.listdir(pathlib.Path("tmp") / "results" / "iteration=0")[-1]
-            golden_values_source = (
-                pathlib.Path(ASSETS_DIR)
-                / f"{restart_dir}"
-                / "assets"
-                / "basic"
-                / f"{job.name.replace('_', '-').lower()}-{environment.replace('_', '-')}"
-                / f"golden_values_{environment}.json"
+            golden_values_sources = list(
+                (
+                    pathlib.Path(ASSETS_DIR)
+                    / f"{restart_dir}"
+                    / "assets"
+                    / "basic"
+                    / f"{job.name.replace('_', '-').lower()}-{environment.replace('_', '-')}"
+                ).glob("g*.json")
             )
+
+            if len(golden_values_sources) == 1:
+                golden_values_source = golden_values_sources[0]
+            else:
+                logger.info(
+                    "Golden values for %s does not exist. Skip.", str(golden_values_sources)
+                )
+                continue
+
+            golden_values_source_name = golden_values_source.name
+            golden_values_source_name = golden_values_source_name.replace("_dgx_h100", "")
+            golden_values_source_name = golden_values_source_name.replace("_dgx_a100", "")
+            golden_values_source_name = golden_values_source_name.replace(
+                "generations", "golden_values"
+            )
+
             golden_values_target = (
                 pathlib.Path("tests")
                 / "functional_tests"
                 / 'test_cases'
                 / job.stage
                 / job.name
-                / f"golden_values_{environment}.json"
+                / golden_values_source_name
             )
 
             if golden_values_source.exists():
diff --git a/tests/test_utils/python_scripts/launch_jet_workload.py b/tests/test_utils/python_scripts/launch_jet_workload.py
index da0ddf9b93b..ec7e2d4a3ae 100644
--- a/tests/test_utils/python_scripts/launch_jet_workload.py
+++ b/tests/test_utils/python_scripts/launch_jet_workload.py
@@ -108,6 +108,7 @@ def launch_and_wait_for_completion(
                                         ),
                                         "HF_HUB_CACHE": "/lustre/fsw/coreai_dlalgo_mcore/hf_hub",
                                         "TRANSFORMERS_OFFLINE": "1",
+                                        "CLUSTER": cluster,
                                     }
                                 }
                             }
@@ -486,15 +487,17 @@ def main(
                 )
 
             if is_flaky_failure(concat_allranks_logs):
-                logger.error("Detected flaky failure, attempt restart.")
+                if n_attempts < 9:
+                    logger.error("Detected flaky failure, attempt restart.")
                 n_attempts += 1
                 continue
 
             if (
                 "FAILED tests/functional_tests/python_test_utils" in concat_mainrank_log
             ) and re.compile(r"\bEXIT_CODE=0\b").search(concat_mainrank_log) is not None:
-                logger.error("Non-determinism, let's try another node.")
                 n_nondeterminism_attemps += 1
+                if n_nondeterminism_attemps < 3:
+                    logger.error("Non-determinism, let's try another node.")
                 continue
 
             telemetrics_and_exit(
diff --git a/tests/test_utils/recipes/bert.yaml b/tests/test_utils/recipes/bert.yaml
index 66e870e66c6..f0be62e4701 100644
--- a/tests/test_utils/recipes/bert.yaml
+++ b/tests/test_utils/recipes/bert.yaml
@@ -50,7 +50,7 @@ spec:
         "CHECKPOINT_LOAD_PATH=/workspace/checkpoints/$NAME"
         "TRAINING_SCRIPT_PATH=pretrain_bert.py"
         "TRAINING_PARAMS_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml"
-        "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_{platforms}.json"
+        "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_$CLUSTER.json"
         "N_REPEAT={n_repeat}"
         "ENABLE_LIGHTWEIGHT_MODE=${{ENABLE_LIGHTWEIGHT_MODE}}"
         "RECORD_CHECKPOINTS=${{RECORD_CHECKPOINTS}}"
diff --git a/tests/test_utils/recipes/gpt-dynamic-inference-with-coordinator.yaml b/tests/test_utils/recipes/gpt-dynamic-inference-with-coordinator.yaml
index 2b7966bb04a..b276ac66d85 100644
--- a/tests/test_utils/recipes/gpt-dynamic-inference-with-coordinator.yaml
+++ b/tests/test_utils/recipes/gpt-dynamic-inference-with-coordinator.yaml
@@ -48,7 +48,7 @@ spec:
         "TRAINING_PARAMS_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml"
         "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_{platforms}.json"
         "OUTPUT_PATH={assets_dir}"
-        "TENSORBOARD_PATH={assets_dir}/generations.json"
+        "TENSORBOARD_PATH={assets_dir}/generations_{environment}_$CLUSTER.json"
         "N_REPEAT={n_repeat}"
         "ENABLE_LIGHTWEIGHT_MODE=false"
         "RECORD_CHECKPOINTS=${{RECORD_CHECKPOINTS}}"
@@ -67,4 +67,3 @@ products:
       - environment: [dev]
         scope: [mr]
         platforms: [dgx_h100]
-  
diff --git a/tests/test_utils/recipes/gpt-dynamic-inference.yaml b/tests/test_utils/recipes/gpt-dynamic-inference.yaml
index 9346c0c8123..757d3d2cd26 100644
--- a/tests/test_utils/recipes/gpt-dynamic-inference.yaml
+++ b/tests/test_utils/recipes/gpt-dynamic-inference.yaml
@@ -49,7 +49,7 @@ spec:
         "TRAINING_PARAMS_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml"
         "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_{platforms}.json"
         "OUTPUT_PATH={assets_dir}"
-        "TENSORBOARD_PATH={assets_dir}/generations.json"
+        "TENSORBOARD_PATH={assets_dir}/generations_{environment}_$CLUSTER.json"
         "N_REPEAT={n_repeat}"
         "ENABLE_LIGHTWEIGHT_MODE=false"
         "RECORD_CHECKPOINTS=${{RECORD_CHECKPOINTS}}"
@@ -73,4 +73,3 @@ products:
       - environment: [dev]
         scope: [mr]
         platforms: [dgx_h100]
-  
diff --git a/tests/test_utils/recipes/gpt-grads.yaml b/tests/test_utils/recipes/gpt-grads.yaml
index 6915a348598..ea569362311 100644
--- a/tests/test_utils/recipes/gpt-grads.yaml
+++ b/tests/test_utils/recipes/gpt-grads.yaml
@@ -55,7 +55,7 @@ spec:
         "CHECKPOINT_LOAD_PATH=/workspace/checkpoints/$NAME"
         "TRAINING_SCRIPT_PATH=pretrain_gpt.py"
         "TRAINING_PARAMS_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml"
-        "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_{platforms}.json"
+        "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_$CLUSTER.json"
         "N_REPEAT=1" 
         "ENABLE_LIGHTWEIGHT_MODE=${{ENABLE_LIGHTWEIGHT_MODE}}"
         "RECORD_CHECKPOINTS=${{RECORD_CHECKPOINTS}}"
diff --git a/tests/test_utils/recipes/gpt-nemo.yaml b/tests/test_utils/recipes/gpt-nemo.yaml
index fc57f54d7d7..848c1a56071 100644
--- a/tests/test_utils/recipes/gpt-nemo.yaml
+++ b/tests/test_utils/recipes/gpt-nemo.yaml
@@ -47,7 +47,7 @@ spec:
         "CHECKPOINT_LOAD_PATH=/workspace/checkpoints/{name}"
         "TRAINING_SCRIPT_PATH=\"nemo llm pretrain -y --factory {nemo_model}\""
         "TRAINING_PARAMS_PATH=/opt/megatron-lm/tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml"
-        "GOLDEN_VALUES_PATH=/opt/megatron-lm/tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_{platforms}.json"
+        "GOLDEN_VALUES_PATH=/opt/megatron-lm/tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_$CLUSTER.json"
         "N_REPEAT={n_repeat}"
     )
 
diff --git a/tests/test_utils/recipes/gpt-static-inference.yaml b/tests/test_utils/recipes/gpt-static-inference.yaml
index 15385fc707a..424c424bbbf 100644
--- a/tests/test_utils/recipes/gpt-static-inference.yaml
+++ b/tests/test_utils/recipes/gpt-static-inference.yaml
@@ -48,9 +48,9 @@ spec:
         "DATA_CACHE_PATH=/workspace/data/cache"
         "TRAINING_SCRIPT_PATH=examples/inference/gpt/gpt_static_inference.py"
         "TRAINING_PARAMS_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml"
-        "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_{platforms}.json"
+        "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_$CLUSTER.json"
         "OUTPUT_PATH={assets_dir}"
-        "TENSORBOARD_PATH={assets_dir}/generations.json"
+        "TENSORBOARD_PATH={assets_dir}/generations_{environment}_$CLUSTER.json"
         "N_REPEAT={n_repeat}"
         "ENABLE_LIGHTWEIGHT_MODE=false"
         "RECORD_CHECKPOINTS=${{RECORD_CHECKPOINTS}}"
diff --git a/tests/test_utils/recipes/gpt.yaml b/tests/test_utils/recipes/gpt.yaml
index 83ac3a5d99a..b29fc21e877 100644
--- a/tests/test_utils/recipes/gpt.yaml
+++ b/tests/test_utils/recipes/gpt.yaml
@@ -61,7 +61,7 @@ spec:
         "CHECKPOINT_LOAD_PATH=/workspace/checkpoints/$NAME"
         "TRAINING_SCRIPT_PATH=pretrain_gpt.py"
         "TRAINING_PARAMS_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml"
-        "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_{platforms}.json"
+        "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_$CLUSTER.json"
         "N_REPEAT={n_repeat}"
         "ENABLE_LIGHTWEIGHT_MODE=${{ENABLE_LIGHTWEIGHT_MODE}}"
         "RECORD_CHECKPOINTS=${{RECORD_CHECKPOINTS}}"
diff --git a/tests/test_utils/recipes/mamba-static-inference.yaml b/tests/test_utils/recipes/mamba-static-inference.yaml
index 3c4faf4ace7..f0e29999d43 100644
--- a/tests/test_utils/recipes/mamba-static-inference.yaml
+++ b/tests/test_utils/recipes/mamba-static-inference.yaml
@@ -45,9 +45,9 @@ spec:
         "DATA_CACHE_PATH=/workspace/data/cache"
         "TRAINING_SCRIPT_PATH=examples/inference/gpt/gpt_static_inference.py"
         "TRAINING_PARAMS_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml"
-        "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_{platforms}.json"
+        "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_$CLUSTER.json"
         "OUTPUT_PATH={assets_dir}"
-        "TENSORBOARD_PATH={assets_dir}/generations.json"
+        "TENSORBOARD_PATH={assets_dir}/generations_{environment}_$CLUSTER.json"
         "N_REPEAT={n_repeat}"
         "ENABLE_LIGHTWEIGHT_MODE=false"
         "RECORD_CHECKPOINTS=${{RECORD_CHECKPOINTS}}"
diff --git a/tests/test_utils/recipes/mamba.yaml b/tests/test_utils/recipes/mamba.yaml
index f4dea805e65..7c1f9a3627f 100644
--- a/tests/test_utils/recipes/mamba.yaml
+++ b/tests/test_utils/recipes/mamba.yaml
@@ -48,7 +48,7 @@ spec:
         "CHECKPOINT_LOAD_PATH=/workspace/checkpoints/{name}"
         "TRAINING_SCRIPT_PATH=pretrain_mamba.py"
         "TRAINING_PARAMS_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml"
-        "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_{platforms}.json"
+        "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_$CLUSTER.json"
         "N_REPEAT={n_repeat}"
         "ENABLE_LIGHTWEIGHT_MODE=${{ENABLE_LIGHTWEIGHT_MODE}}"
         "RECORD_CHECKPOINTS=${{RECORD_CHECKPOINTS}}"
diff --git a/tests/test_utils/recipes/mimo.yaml b/tests/test_utils/recipes/mimo.yaml
index 4abd34b7030..dfde82656dc 100644
--- a/tests/test_utils/recipes/mimo.yaml
+++ b/tests/test_utils/recipes/mimo.yaml
@@ -52,7 +52,7 @@ spec:
         "CHECKPOINT_LOAD_PATH=/workspace/checkpoints/{name}/checkpoints"
         "TRAINING_SCRIPT_PATH=./examples/mimo/train.py"
         "TRAINING_PARAMS_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml"
-        "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}.json"
+        "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_$CLUSTER.json"
         "N_REPEAT={n_repeat}"
     )
 
diff --git a/tests/test_utils/recipes/moe-dynamic-inference.yaml b/tests/test_utils/recipes/moe-dynamic-inference.yaml
index 516f7a390ff..3a48c2564a5 100644
--- a/tests/test_utils/recipes/moe-dynamic-inference.yaml
+++ b/tests/test_utils/recipes/moe-dynamic-inference.yaml
@@ -46,9 +46,9 @@ spec:
         "DATA_CACHE_PATH=/workspace/data/cache"
         "TRAINING_SCRIPT_PATH=examples/inference/gpt/gpt_dynamic_inference.py"
         "TRAINING_PARAMS_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml"
-        "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_{platforms}.json"
+        "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_$CLUSTER.json"
         "OUTPUT_PATH={assets_dir}"
-        "TENSORBOARD_PATH={assets_dir}/generations.json"
+        "TENSORBOARD_PATH={assets_dir}/generations_{environment}_$CLUSTER.json"
         "N_REPEAT={n_repeat}"
         "ENABLE_LIGHTWEIGHT_MODE=false"
         "RECORD_CHECKPOINTS=${{RECORD_CHECKPOINTS}}"
@@ -61,4 +61,4 @@ products:
     products:
       - environment: [dev]
         scope: [mr]
-        platforms: [dgx_h100]
\ No newline at end of file
+        platforms: [dgx_h100]
diff --git a/tests/test_utils/recipes/moe-static-inference.yaml b/tests/test_utils/recipes/moe-static-inference.yaml
index 0a86cffdf31..951820cb7ae 100644
--- a/tests/test_utils/recipes/moe-static-inference.yaml
+++ b/tests/test_utils/recipes/moe-static-inference.yaml
@@ -46,9 +46,9 @@ spec:
         "DATA_CACHE_PATH=/workspace/data/cache"
         "TRAINING_SCRIPT_PATH=examples/inference/gpt/gpt_static_inference.py"
         "TRAINING_PARAMS_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml"
-        "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_{platforms}.json"
+        "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_$CLUSTER.json"
         "OUTPUT_PATH={assets_dir}"
-        "TENSORBOARD_PATH={assets_dir}/generations.json"
+        "TENSORBOARD_PATH={assets_dir}/generations_{environment}_$CLUSTER.json"
         "N_REPEAT={n_repeat}"
         "ENABLE_LIGHTWEIGHT_MODE=false"
         "RECORD_CHECKPOINTS=${{RECORD_CHECKPOINTS}}"
@@ -66,4 +66,4 @@ products:
     products:
       - environment: [dev]
         scope: [mr]
-        platforms: [dgx_h100]
\ No newline at end of file
+        platforms: [dgx_h100]
diff --git a/tests/test_utils/recipes/moe.yaml b/tests/test_utils/recipes/moe.yaml
index 5cfa307c685..972288bd905 100644
--- a/tests/test_utils/recipes/moe.yaml
+++ b/tests/test_utils/recipes/moe.yaml
@@ -52,7 +52,7 @@ spec:
         "CHECKPOINT_LOAD_PATH=/workspace/checkpoints/$NAME"
         "TRAINING_SCRIPT_PATH=pretrain_gpt.py"
         "TRAINING_PARAMS_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml"
-        "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_{platforms}.json"
+        "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_$CLUSTER.json"
         "N_REPEAT={n_repeat}"
         "ENABLE_LIGHTWEIGHT_MODE=${{ENABLE_LIGHTWEIGHT_MODE}}"
         "RECORD_CHECKPOINTS=${{RECORD_CHECKPOINTS}}"
@@ -213,9 +213,9 @@ products:
         platforms: [dgx_h100]
   - test_case: [gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed]
     products:
-     - environment: [dev]
-       scope: [mr]
-       platforms: [dgx_h100]
-     - environment: [dev]
-       scope: [mr-slim]
-       platforms: [dgx_h100]
+      - environment: [dev]
+        scope: [mr]
+        platforms: [dgx_h100]
+      - environment: [dev]
+        scope: [mr-slim]
+        platforms: [dgx_h100]
diff --git a/tests/test_utils/recipes/multimodal-llava.yaml b/tests/test_utils/recipes/multimodal-llava.yaml
index d95fa186172..4de7f0a9c0f 100644
--- a/tests/test_utils/recipes/multimodal-llava.yaml
+++ b/tests/test_utils/recipes/multimodal-llava.yaml
@@ -49,7 +49,7 @@ spec:
         "CHECKPOINT_LOAD_PATH=/workspace/checkpoints/{name}/checkpoints"
         "TRAINING_SCRIPT_PATH=pretrain_vlm.py"
         "TRAINING_PARAMS_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml"
-        "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_{platforms}.json"
+        "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_$CLUSTER.json"
         "N_REPEAT={n_repeat}"
         "ENABLE_LIGHTWEIGHT_MODE=${{ENABLE_LIGHTWEIGHT_MODE}}"
         "RECORD_CHECKPOINTS=${{RECORD_CHECKPOINTS}}"
diff --git a/tests/test_utils/recipes/t5.yaml b/tests/test_utils/recipes/t5.yaml
index 222ce2e9216..31a72e9b5a1 100644
--- a/tests/test_utils/recipes/t5.yaml
+++ b/tests/test_utils/recipes/t5.yaml
@@ -50,7 +50,7 @@ spec:
         "CHECKPOINT_LOAD_PATH=/workspace/checkpoints/$NAME"
         "TRAINING_SCRIPT_PATH=pretrain_t5.py"
         "TRAINING_PARAMS_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml"
-        "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_{platforms}.json"
+        "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_$CLUSTER.json"
         "N_REPEAT={n_repeat}"
         "ENABLE_LIGHTWEIGHT_MODE=${{ENABLE_LIGHTWEIGHT_MODE}}"
         "RECORD_CHECKPOINTS=${{RECORD_CHECKPOINTS}}"
diff --git a/tests/unit_tests/test_muon_optimizer.py b/tests/unit_tests/test_muon_optimizer.py
index d5dffcd0e19..97d78fe6c70 100644
--- a/tests/unit_tests/test_muon_optimizer.py
+++ b/tests/unit_tests/test_muon_optimizer.py
@@ -1,15 +1,14 @@
 import os
-import pytest
-
-from packaging.version import Version
 
+import pytest
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
+from packaging.version import Version
 
 from megatron.core.distributed import DistributedDataParallel, DistributedDataParallelConfig
 from megatron.core.optimizer import OptimizerConfig
-from megatron.core.optimizer.muon import get_megatron_muon_optimizer, TensorParallelMuon
+from megatron.core.optimizer.muon import TensorParallelMuon, get_megatron_muon_optimizer
 from megatron.core.transformer import TransformerConfig
 from tests.unit_tests.test_utilities import Utils
 from tests.unit_tests.test_utils import _deinit_distributed, _init_distributed

From c7590d8c3733619efa87a1a0733ac4cceedc683a Mon Sep 17 00:00:00 2001
From: Yuzhong Wang <yuzhongw@nvidia.com>
Date: Mon, 13 Oct 2025 03:15:44 -0700
Subject: [PATCH 014/334] ADLR/megatron-lm!4070 - [DEV] Support Qwen3next

---
 gpt_builders.py                               |   4 +-
 megatron/core/models/gpt/gpt_layer_specs.py   | 465 +++++++++------
 .../gpt/linear_attention_module_specs.py      |  39 ++
 megatron/core/models/gpt/moe_module_specs.py  |   6 +-
 megatron/core/ssm/gated_delta_net.py          | 551 ++++++++++++++++++
 megatron/core/transformer/attention.py        |  95 ++-
 megatron/core/transformer/moe/moe_layer.py    |   5 +-
 megatron/core/transformer/spec_utils.py       |   1 +
 .../core/transformer/transformer_config.py    |  85 +++
 megatron/training/arguments.py                |  65 ++-
 megatron/training/checkpointing.py            |  24 +-
 megatron/training/training.py                 | 134 ++++-
 megatron/training/utils.py                    |   4 +
 pyproject.toml                                |   1 +
 tests/unit_tests/ssm/test_gated_delta_net.py  | 319 ++++++++++
 .../transformer/moe/test_shared_experts.py    |   9 +-
 .../unit_tests/transformer/test_attention.py  |  20 +-
 uv.lock                                       | 221 ++++++-
 18 files changed, 1792 insertions(+), 256 deletions(-)
 create mode 100644 megatron/core/models/gpt/linear_attention_module_specs.py
 create mode 100644 megatron/core/ssm/gated_delta_net.py
 create mode 100644 tests/unit_tests/ssm/test_gated_delta_net.py

diff --git a/gpt_builders.py b/gpt_builders.py
index 89b228815ff..591f74bb20c 100644
--- a/gpt_builders.py
+++ b/gpt_builders.py
@@ -41,7 +41,7 @@ def gpt_builder(args, pre_process, post_process, vp_stage=None, config=None):
         else:
             use_te = args.transformer_impl == "transformer_engine"
 
-            if args.num_experts:
+            if args.num_experts or (args.linear_attention_type is not None):
                 # Define the decoder block spec
                 transformer_layer_spec = get_gpt_decoder_block_spec(
                     config,
@@ -112,6 +112,7 @@ def _get_transformer_layer_spec(use_te, config):
             args.moe_grouped_gemm,
             args.qk_layernorm,
             args.multi_latent_attention,
+            args.linear_attention_type,
             moe_use_legacy_grouped_gemm=args.moe_use_legacy_grouped_gemm,
             qk_l2_norm=args.qk_l2_norm,
             use_kitchen=config.use_kitchen,
@@ -122,6 +123,7 @@ def _get_transformer_layer_spec(use_te, config):
             args.moe_grouped_gemm,
             args.qk_layernorm,
             args.multi_latent_attention,
+            args.linear_attention_type,
             moe_use_legacy_grouped_gemm=args.moe_use_legacy_grouped_gemm,
             normalization=args.normalization,
             use_kitchen=config.use_kitchen,
diff --git a/megatron/core/models/gpt/gpt_layer_specs.py b/megatron/core/models/gpt/gpt_layer_specs.py
index 68c1eb8c953..e3ef7f20141 100755
--- a/megatron/core/models/gpt/gpt_layer_specs.py
+++ b/megatron/core/models/gpt/gpt_layer_specs.py
@@ -5,6 +5,9 @@
 
 from megatron.core.fusions.fused_bias_dropout import get_bias_dropout_add
 from megatron.core.models.backends import BackendSpecProvider, LocalSpecProvider
+from megatron.core.models.gpt.linear_attention_module_specs import (
+    get_linear_attention_module_spec_for_backend,
+)
 from megatron.core.models.gpt.moe_module_specs import get_moe_module_spec_for_backend
 from megatron.core.transformer.attention import SelfAttention, SelfAttentionSubmodules
 from megatron.core.transformer.enums import AttnMaskType, LayerType
@@ -74,8 +77,10 @@ def get_gpt_layer_with_transformer_engine_spec(
     moe_grouped_gemm: Optional[bool] = False,
     qk_layernorm: Optional[bool] = False,
     multi_latent_attention: Optional[bool] = False,
+    linear_attention_type: Optional[str] = None,
     fp8: Optional[str] = None,  # pylint: disable=unused-argument
     moe_use_legacy_grouped_gemm: Optional[bool] = False,
+    normalization: Optional[str] = None,
     qk_l2_norm: Optional[bool] = False,
     use_te_op_fuser: Optional[bool] = False,
     use_kitchen: bool = False,
@@ -88,10 +93,14 @@ def get_gpt_layer_with_transformer_engine_spec(
         num_experts (int, optional): Number of experts. Defaults to None.
         moe_grouped_gemm (bool, optional): To use Grouped GEMM. Defaults to False.
         qk_layernorm (bool, optional): To use layernorm for queries/keys. Defaults to False.
+        multi_latent_attention (bool, optional): To use multi-latent attention. Defaults to False.
+        linear_attention_type (str, optional): The type of linear attention. Defaults to None.
         fp8 (str, optional): Deprecated. For temporary Nemo compatibility.
         moe_use_legacy_grouped_gemm (bool, optional): Force use the legacy GroupedMLP.
                                                       Defaults to False.
+        normalization (str, optional): The normalization to use. Defaults to None.
         qk_l2_norm (bool, optional): To use l2 norm for queries/keys. Defaults to False.
+        use_kitchen (bool, optional): To use KitchenSpecProvider. Defaults to False.
         use_te_op_fuser (bool, optional): Use Transformer Engine's operation-based API, which may
                                           enable certain operation fusions. Defaults to False.
 
@@ -115,8 +124,22 @@ def get_gpt_layer_with_transformer_engine_spec(
     else:
         backend = TESpecProvider()
 
+    sharded_state_dict_keys_map = {}
+
+    attention = get_attention_module_spec_for_backend(
+        backend=backend,
+        sharded_state_dict_keys_map=sharded_state_dict_keys_map,
+        linear_attention_type=linear_attention_type,
+        qk_layernorm=qk_layernorm,
+        qk_l2_norm=qk_l2_norm,
+        multi_latent_attention=multi_latent_attention,
+        mla_down_proj_use_column_parallel=False,
+        normalization=normalization,
+    )
+
     mlp = get_mlp_module_spec_for_backend(
         backend=backend,
+        sharded_state_dict_keys_map=sharded_state_dict_keys_map,
         num_experts=num_experts,
         moe_grouped_gemm=moe_grouped_gemm,
         moe_use_legacy_grouped_gemm=moe_use_legacy_grouped_gemm,
@@ -124,77 +147,13 @@ def get_gpt_layer_with_transformer_engine_spec(
         use_te_activation_func=use_te_activation_func,
     )
 
-    if multi_latent_attention:
-        assert qk_l2_norm is False, "qk_l2_norm is not supported with MLA."
-        linear_q_up_proj = (
-            backend.column_parallel_layer_norm_linear()
-            if qk_layernorm
-            else backend.column_parallel_linear()
-        )
-        linear_kv_up_proj = (
-            backend.column_parallel_layer_norm_linear()
-            if qk_layernorm
-            else backend.column_parallel_linear()
-        )
-        return ModuleSpec(
-            module=TransformerLayer,
-            submodules=TransformerLayerSubmodules(
-                input_layernorm=backend.layer_norm(),
-                self_attention=ModuleSpec(
-                    module=MLASelfAttention,
-                    params={"attn_mask_type": AttnMaskType.causal},
-                    submodules=MLASelfAttentionSubmodules(
-                        linear_q_proj=backend.column_parallel_linear(),
-                        linear_q_down_proj=backend.linear(),
-                        linear_q_up_proj=linear_q_up_proj,
-                        linear_kv_down_proj=backend.linear(),
-                        linear_kv_up_proj=linear_kv_up_proj,
-                        core_attention=backend.core_attention(),
-                        linear_proj=backend.row_parallel_linear(),
-                        q_layernorm=IdentityOp,
-                        kv_layernorm=IdentityOp,
-                    ),
-                ),
-                self_attn_bda=get_bias_dropout_add,
-                pre_mlp_layernorm=backend.layer_norm() if num_experts else IdentityOp,
-                mlp=mlp,
-                mlp_bda=get_bias_dropout_add,
-            ),
-        )
-    else:
-        qk_norm = backend.layer_norm(for_qk=True)
-        return ModuleSpec(
-            module=TransformerLayer,
-            submodules=TransformerLayerSubmodules(
-                self_attention=ModuleSpec(
-                    module=SelfAttention,
-                    params={"attn_mask_type": AttnMaskType.causal},
-                    submodules=SelfAttentionSubmodules(
-                        linear_qkv=backend.column_parallel_layer_norm_linear(),
-                        core_attention=backend.core_attention(),
-                        linear_proj=backend.row_parallel_linear(),
-                        q_layernorm=(
-                            L2Norm if qk_l2_norm else (qk_norm if qk_layernorm else IdentityOp)
-                        ),
-                        k_layernorm=(
-                            L2Norm if qk_l2_norm else (qk_norm if qk_layernorm else IdentityOp)
-                        ),
-                    ),
-                ),
-                self_attn_bda=get_bias_dropout_add,
-                pre_mlp_layernorm=backend.layer_norm() if num_experts else IdentityOp,
-                mlp=mlp,
-                mlp_bda=get_bias_dropout_add,
-                sharded_state_dict_keys_map={
-                    "mlp.0.weight": "mlp.linear_fc1.layer_norm_weight",
-                    "mlp.0.bias": "mlp.linear_fc1.layer_norm_bias",
-                    "mlp.1.basic_ops.0.weight": "mlp.linear_fc1.weight",
-                    "mlp.1.basic_ops.1.bias": "mlp.linear_fc1.bias",
-                    "mlp.3.basic_ops.0.weight": "mlp.linear_fc2.weight",
-                    "mlp.3.basic_ops.1.bias": "mlp.linear_fc2.bias",
-                },
-            ),
-        )
+    return get_transformer_layer_spec_for_backend(
+        backend=backend,
+        attention=attention,
+        mlp=mlp,
+        sharded_state_dict_keys_map=sharded_state_dict_keys_map,
+        normalization=normalization,
+    )
 
 
 def get_gpt_layer_local_spec(
@@ -202,6 +161,7 @@ def get_gpt_layer_local_spec(
     moe_grouped_gemm: Optional[bool] = False,
     qk_layernorm: Optional[bool] = False,
     multi_latent_attention: Optional[bool] = False,
+    linear_attention_type: Optional[str] = None,
     fp8: Optional[str] = None,  # pylint: disable=unused-argument
     moe_use_legacy_grouped_gemm: Optional[bool] = False,
     normalization: Optional[str] = None,
@@ -215,10 +175,14 @@ def get_gpt_layer_local_spec(
         num_experts (int, optional): Number of experts. Defaults to None.
         moe_grouped_gemm (bool, optional): To use Grouped GEMM. Defaults to False.
         qk_layernorm (bool, optional): To use layernorm for queries/keys. Defaults to False.
+        multi_latent_attention (bool, optional): To use multi-latent attention. Defaults to False.
+        linear_attention_type (str, optional): The type of linear attention. Defaults to None.
         fp8 (str, optional): Deprecated. For temporary Nemo compatibility.
         moe_use_legacy_grouped_gemm (bool, optional): Force use the legacy GroupedMLP.
                                                       Defaults to False.
+        normalization (str, optional): The normalization to use. Defaults to None.
         qk_l2_norm (bool, optional): To use l2 norm for queries/keys. Defaults to False.
+        use_kitchen (bool, optional): To use KitchenSpecProvider. Defaults to False.
 
     Returns:
         ModuleSpec: Module specification with Megatron-Core modules
@@ -229,13 +193,6 @@ def get_gpt_layer_local_spec(
         backend = KitchenSpecProvider(fallback=LocalSpecProvider())
     else:
         backend = LocalSpecProvider()
-    # Adjust for RMS norm.
-    if normalization == "RMSNorm":
-        layer_norm = backend.layer_norm(rms_norm=True, for_qk=False)
-        qk_norm = backend.layer_norm(rms_norm=True, for_qk=True)
-    else:
-        layer_norm = backend.layer_norm(rms_norm=False, for_qk=False)
-        qk_norm = backend.layer_norm(rms_norm=False, for_qk=True)
 
     if fp8 is not None:
         warnings.warn(
@@ -243,6 +200,22 @@ def get_gpt_layer_local_spec(
             " and will be removed soon. Please update your code accordingly."
         )
 
+    if linear_attention_type is not None:
+        raise NotImplementedError("Linear attention is not supported with local spec yet.")
+
+    sharded_state_dict_keys_map = {}
+
+    attention = get_attention_module_spec_for_backend(
+        backend=backend,
+        sharded_state_dict_keys_map=sharded_state_dict_keys_map,
+        linear_attention_type=linear_attention_type,
+        qk_layernorm=qk_layernorm,
+        qk_l2_norm=qk_l2_norm,
+        multi_latent_attention=multi_latent_attention,
+        mla_down_proj_use_column_parallel=True,
+        normalization=normalization,
+    )
+
     mlp = get_mlp_module_spec_for_backend(
         backend=backend,
         num_experts=num_experts,
@@ -250,63 +223,162 @@ def get_gpt_layer_local_spec(
         moe_use_legacy_grouped_gemm=moe_use_legacy_grouped_gemm,
     )
 
+    return get_transformer_layer_spec_for_backend(
+        backend=backend,
+        attention=attention,
+        mlp=mlp,
+        sharded_state_dict_keys_map=sharded_state_dict_keys_map,
+        normalization=normalization,
+    )
+
+
+def get_transformer_layer_spec_for_backend(
+    backend: BackendSpecProvider,
+    attention: ModuleSpec,
+    mlp: ModuleSpec,
+    sharded_state_dict_keys_map: Optional[dict] = None,
+    normalization: Optional[str] = None,
+) -> ModuleSpec:
+    """Helper function to get module spec for TransformerLayer"""
+
+    rms_norm = normalization == "RMSNorm"
+
+    input_layernorm = (
+        IdentityOp
+        if attention.metainfo["fuse_input_layernorm"]
+        else backend.layer_norm(rms_norm=rms_norm, for_qk=False)
+    )
+    pre_mlp_layernorm = (
+        IdentityOp
+        if mlp.metainfo["fuse_pre_mlp_layernorm"]
+        else backend.layer_norm(rms_norm=rms_norm, for_qk=False)
+    )
+
+    transformer_layer = ModuleSpec(
+        module=TransformerLayer,
+        submodules=TransformerLayerSubmodules(
+            input_layernorm=input_layernorm,
+            self_attention=attention,
+            self_attn_bda=get_bias_dropout_add,
+            pre_mlp_layernorm=pre_mlp_layernorm,
+            mlp=mlp,
+            mlp_bda=get_bias_dropout_add,
+            sharded_state_dict_keys_map=sharded_state_dict_keys_map,
+        ),
+    )
+    return transformer_layer
+
+
+def get_attention_module_spec_for_backend(
+    backend: BackendSpecProvider,
+    sharded_state_dict_keys_map: dict,
+    linear_attention_type: Optional[str] = None,
+    qk_layernorm: Optional[bool] = False,
+    qk_l2_norm: Optional[bool] = False,
+    multi_latent_attention: Optional[bool] = False,
+    mla_down_proj_use_column_parallel: Optional[bool] = False,
+    normalization: Optional[str] = None,
+) -> ModuleSpec:
+    """Helper function to get module spec for Attention"""
+
+    if linear_attention_type is not None:
+        return get_linear_attention_module_spec_for_backend(
+            backend=backend,
+            linear_attention_type=linear_attention_type,
+            normalization=normalization,
+        )
+
+    # Adjust for RMS norm.
+    rms_norm = normalization == "RMSNorm"
+    qk_norm = backend.layer_norm(rms_norm=rms_norm, for_qk=True)
+
     if multi_latent_attention:
         assert qk_l2_norm is False, "qk_l2_norm is not supported with MLA."
-        return ModuleSpec(
-            module=TransformerLayer,
-            submodules=TransformerLayerSubmodules(
-                input_layernorm=layer_norm,
-                self_attention=ModuleSpec(
-                    module=MLASelfAttention,
-                    params={"attn_mask_type": AttnMaskType.causal},
-                    submodules=MLASelfAttentionSubmodules(
-                        linear_q_proj=backend.column_parallel_linear(),
-                        linear_q_down_proj=backend.column_parallel_linear(),
-                        linear_q_up_proj=backend.column_parallel_linear(),
-                        linear_kv_down_proj=backend.column_parallel_linear(),
-                        linear_kv_up_proj=backend.column_parallel_linear(),
-                        core_attention=backend.core_attention(),
-                        linear_proj=backend.row_parallel_linear(),
-                        q_layernorm=qk_norm if qk_layernorm else IdentityOp,
-                        kv_layernorm=qk_norm if qk_layernorm else IdentityOp,
-                    ),
-                ),
-                self_attn_bda=get_bias_dropout_add,
-                pre_mlp_layernorm=layer_norm,
-                mlp=mlp,
-                mlp_bda=get_bias_dropout_add,
+        linear_q_down_proj = (
+            backend.column_parallel_linear()
+            if mla_down_proj_use_column_parallel
+            else backend.linear()
+        )
+        linear_kv_down_proj = (
+            backend.column_parallel_linear()
+            if mla_down_proj_use_column_parallel
+            else backend.linear()
+        )
+        linear_q_up_proj = (
+            backend.column_parallel_layer_norm_linear()
+            if qk_layernorm and backend.fuse_layernorm_and_linear()
+            else backend.column_parallel_linear()
+        )
+        linear_kv_up_proj = (
+            backend.column_parallel_layer_norm_linear()
+            if qk_layernorm and backend.fuse_layernorm_and_linear()
+            else backend.column_parallel_linear()
+        )
+        qk_norm = (
+            backend.layer_norm(rms_norm=rms_norm, for_qk=True)
+            if qk_layernorm and not backend.fuse_layernorm_and_linear()
+            else IdentityOp
+        )
+        attention = ModuleSpec(
+            module=MLASelfAttention,
+            params={"attn_mask_type": AttnMaskType.causal},
+            submodules=MLASelfAttentionSubmodules(
+                linear_q_proj=backend.column_parallel_linear(),
+                linear_q_down_proj=linear_q_down_proj,
+                linear_q_up_proj=linear_q_up_proj,
+                linear_kv_down_proj=linear_kv_down_proj,
+                linear_kv_up_proj=linear_kv_up_proj,
+                core_attention=backend.core_attention(),
+                linear_proj=backend.row_parallel_linear(),
+                q_layernorm=qk_norm,
+                kv_layernorm=qk_norm,
             ),
+            metainfo={"fuse_input_layernorm": False},
         )
     else:
-        return ModuleSpec(
-            module=TransformerLayer,
-            submodules=TransformerLayerSubmodules(
-                input_layernorm=layer_norm,
-                self_attention=ModuleSpec(
-                    module=SelfAttention,
-                    params={"attn_mask_type": AttnMaskType.causal},
-                    submodules=SelfAttentionSubmodules(
-                        linear_qkv=backend.column_parallel_linear(),
-                        core_attention=backend.core_attention(),
-                        linear_proj=backend.row_parallel_linear(),
-                        q_layernorm=(
-                            L2Norm if qk_l2_norm else (qk_norm if qk_layernorm else IdentityOp)
-                        ),
-                        k_layernorm=(
-                            L2Norm if qk_l2_norm else (qk_norm if qk_layernorm else IdentityOp)
-                        ),
-                    ),
-                ),
-                self_attn_bda=get_bias_dropout_add,
-                pre_mlp_layernorm=layer_norm,
-                mlp=mlp,
-                mlp_bda=get_bias_dropout_add,
-                sharded_state_dict_keys_map={
-                    "input_layernorm.": "self_attention.linear_qkv.layer_norm_",
-                    "pre_mlp_layernorm.": "mlp.linear_fc1.layer_norm_",
-                },
+        linear_qkv = (
+            backend.column_parallel_layer_norm_linear()
+            if backend.fuse_layernorm_and_linear()
+            else backend.column_parallel_linear()
+        )
+        if qk_l2_norm:
+            qk_norm = L2Norm
+        elif qk_layernorm:
+            qk_norm = backend.layer_norm(rms_norm=rms_norm, for_qk=True)
+        else:
+            qk_norm = IdentityOp
+        attention = ModuleSpec(
+            module=SelfAttention,
+            params={"attn_mask_type": AttnMaskType.causal},
+            submodules=SelfAttentionSubmodules(
+                linear_qkv=linear_qkv,
+                core_attention=backend.core_attention(),
+                linear_proj=backend.row_parallel_linear(),
+                q_layernorm=qk_norm,
+                k_layernorm=qk_norm,
             ),
+            metainfo={"fuse_input_layernorm": backend.fuse_layernorm_and_linear()},
         )
+        if backend.fuse_layernorm_and_linear():
+            sharded_state_dict_keys_map.update(
+                {
+                    "mlp.0.weight": "mlp.linear_fc1.layer_norm_weight",
+                    "mlp.0.bias": "mlp.linear_fc1.layer_norm_bias",
+                    "mlp.1.basic_ops.0.weight": "mlp.linear_fc1.weight",
+                    "mlp.1.basic_ops.1.bias": "mlp.linear_fc1.bias",
+                    "mlp.3.basic_ops.0.weight": "mlp.linear_fc2.weight",
+                    "mlp.3.basic_ops.1.bias": "mlp.linear_fc2.bias",
+                }
+            )
+        else:
+            sharded_state_dict_keys_map.update(
+                {
+                    "input_layernorm.": "self_attention.linear_qkv.layer_norm_",
+                    "pre_mlp_layernorm.": "mlp.linear_fc1.layer_norm_",
+                }
+            )
+
+    return attention
 
 
 def _get_mlp_module_spec(
@@ -365,6 +437,7 @@ def get_mlp_module_spec(
 
 def get_mlp_module_spec_for_backend(
     backend: BackendSpecProvider,
+    sharded_state_dict_keys_map: Optional[dict] = None,
     num_experts: Optional[int] = None,
     moe_grouped_gemm: Optional[bool] = False,
     moe_use_legacy_grouped_gemm: Optional[bool] = False,
@@ -382,13 +455,16 @@ def get_mlp_module_spec_for_backend(
         if backend.fuse_layernorm_and_linear():
             linear_fc1 = backend.column_parallel_layer_norm_linear()
             assert linear_fc1 is not None
+            fuse_pre_mlp_layernorm = True
         else:
             linear_fc1 = backend.column_parallel_linear()
+            fuse_pre_mlp_layernorm = False
         return ModuleSpec(
             module=module,
             submodules=MLPSubmodules(
                 linear_fc1=linear_fc1, linear_fc2=linear_fc2, activation_func=activation_func
             ),
+            metainfo={"fuse_pre_mlp_layernorm": fuse_pre_mlp_layernorm},
         )
     else:
         # Mixture of experts with modules in megatron core.
@@ -409,57 +485,62 @@ def get_gpt_decoder_block_spec(
     vp_stage: Optional[int] = None,
     pp_rank: Optional[int] = None,
 ) -> TransformerBlockSubmodules:
-    """GPT block spec."""
+    """Helper function to get GPT block spec.
+
+    Return a list of transformer layer spec of the current pipeline stage."""
+
+    get_layer_spec_kwargs = {
+        "qk_layernorm": config.qk_layernorm,
+        "moe_use_legacy_grouped_gemm": config.moe_use_legacy_grouped_gemm,
+        "qk_l2_norm": qk_l2_norm,
+        "use_kitchen": config.use_kitchen,
+        "normalization": normalization,
+    }
     if use_transformer_engine:
         layer_norm_impl = TENorm
-        dense_layer_spec = get_gpt_layer_with_transformer_engine_spec(
-            num_experts=None,
-            moe_grouped_gemm=False,
-            qk_layernorm=config.qk_layernorm,
-            multi_latent_attention=config.multi_latent_attention,
-            moe_use_legacy_grouped_gemm=config.moe_use_legacy_grouped_gemm,
-            qk_l2_norm=qk_l2_norm,
-            use_kitchen=config.use_kitchen,
-            use_te_activation_func=config.use_te_activation_func,
-        )
-        moe_layer_spec = get_gpt_layer_with_transformer_engine_spec(
-            num_experts=config.num_moe_experts,
-            moe_grouped_gemm=config.moe_grouped_gemm,
-            qk_layernorm=config.qk_layernorm,
-            multi_latent_attention=config.multi_latent_attention,
-            moe_use_legacy_grouped_gemm=config.moe_use_legacy_grouped_gemm,
-            qk_l2_norm=qk_l2_norm,
-            use_kitchen=config.use_kitchen,
-            use_te_activation_func=config.use_te_activation_func,
-        )
+        get_layer_spec_kwargs["use_te_activation_func"] = config.use_te_activation_func
+        get_layer_spec_fn = get_gpt_layer_with_transformer_engine_spec
     else:
         layer_norm_impl = LNImpl
-        dense_layer_spec = get_gpt_layer_local_spec(
-            num_experts=None,
-            moe_grouped_gemm=False,
-            qk_layernorm=config.qk_layernorm,
-            multi_latent_attention=config.multi_latent_attention,
-            moe_use_legacy_grouped_gemm=config.moe_use_legacy_grouped_gemm,
-            normalization=normalization,
-            qk_l2_norm=qk_l2_norm,
-            use_kitchen=config.use_kitchen,
-        )
-        moe_layer_spec = get_gpt_layer_local_spec(
-            num_experts=config.num_moe_experts,
-            moe_grouped_gemm=config.moe_grouped_gemm,
-            qk_layernorm=config.qk_layernorm,
-            multi_latent_attention=config.multi_latent_attention,
-            moe_use_legacy_grouped_gemm=config.moe_use_legacy_grouped_gemm,
-            normalization=normalization,
-            qk_l2_norm=qk_l2_norm,
-            use_kitchen=config.use_kitchen,
-        )
+        get_layer_spec_fn = get_gpt_layer_local_spec
+
+    layer_spec_dict = {}
+    for mlp_type in ["dense", "moe"]:
+        for attention_type in ["softmax_attention", "linear_attention"]:
+            if mlp_type == "moe":
+                if config.moe_layer_freq is None:
+                    # Skip if there is no MoE layer in the model.
+                    continue
+                num_experts = config.num_moe_experts
+                moe_grouped_gemm = config.moe_grouped_gemm
+            else:
+                num_experts = None
+                moe_grouped_gemm = None
+            if attention_type == "linear_attention":
+                if config.linear_attention_type is None:
+                    # Skip if there is no linear attention layer in the model.
+                    continue
+                linear_attention_type = config.linear_attention_type
+                multi_latent_attention = None
+            else:
+                linear_attention_type = None
+                multi_latent_attention = config.multi_latent_attention
+
+            layer_spec_key = f"{mlp_type}_{attention_type}"
+            layer_spec_dict[layer_spec_key] = get_layer_spec_fn(
+                num_experts=num_experts,
+                moe_grouped_gemm=moe_grouped_gemm,
+                multi_latent_attention=multi_latent_attention,
+                linear_attention_type=linear_attention_type,
+                **get_layer_spec_kwargs,
+            )
 
     # Parse config.moe_layer_freq to determine the pattern of expert/dense layers.
     # 0 stands for dense layers, 1 stands for expert layers.
     # For integer N: Creates a pattern with one expert layer every N layers.
     # For string pattern: Evaluates the str directly (e.g. "[1,0,1]" for alternating expert/dense).
     if isinstance(config.moe_layer_freq, int):
+        # [1,0,0,...,0,1,0,0,...,0,...]
         moe_layer_pattern = [
             1 if (i % config.moe_layer_freq == 0) else 0 for i in range(config.num_layers)
         ]
@@ -475,15 +556,49 @@ def get_gpt_decoder_block_spec(
             f"Invalid moe_layer_freq: {type(config.moe_layer_freq)}, {config.moe_layer_freq}"
         )
 
+    # Parse config.linear_attention_freq to determine the pattern of expert/dense layers.
+    # 0 stands for SDPA layers, 1 stands for LA layers.
+    # For integer N: Creates a pattern with (N-1) LA layers and 1 SDPA layer every N layers.
+    # For string pattern: Evaluates the str directly (e.g. "[1,0,1]" for alternating LA/SDPA).
+    if isinstance(config.linear_attention_freq, int):
+        linear_attention_pattern = [
+            # [1,1,...,1,0,1,1,...,1,0,...]
+            0 if ((i + 1) % config.linear_attention_freq == 0) else 1
+            for i in range(config.num_layers)
+        ]
+    elif isinstance(config.linear_attention_freq, list):
+        linear_attention_pattern = config.linear_attention_freq
+        assert len(linear_attention_pattern) == config.num_layers, (
+            f"Invalid length of linear_attention_pattern: {len(linear_attention_pattern)}, "
+            f"expected {config.num_layers}, "
+            f"current linear attention pattern: {config.linear_attention_freq}"
+        )
+    elif config.linear_attention_freq is None:
+        if config.linear_attention_type is None:
+            linear_attention_pattern = [0] * config.num_layers
+        else:
+            linear_attention_pattern = [1] * config.num_layers
+            warnings.warn(
+                "Linear attention type is specified but linear_attention_freq is None. "
+                "Setting linear_attention_pattern to [1] * config.num_layers as default."
+            )
+    else:
+        raise ValueError(
+            f"Invalid linear_attention_freq: {type(config.linear_attention_freq)},"
+            f" {config.linear_attention_freq}"
+        )
+
     # Create the layer specs for the model.
     layer_specs = []
     for layer_number in range(config.num_layers):
-        if moe_layer_pattern[layer_number] == 1:
-            layer_specs.append(moe_layer_spec)
-        elif moe_layer_pattern[layer_number] == 0:
-            layer_specs.append(dense_layer_spec)
-        else:
-            raise ValueError(f"Invalid layer pattern: {moe_layer_pattern}")
+        mlp_type = "moe" if moe_layer_pattern[layer_number] else "dense"
+        attention_type = (
+            "linear_attention" if linear_attention_pattern[layer_number] else "softmax_attention"
+        )
+        layer_spec_key = f"{mlp_type}_{attention_type}"
+        if layer_spec_key not in layer_spec_dict:
+            raise ValueError(f"Invalid layer spec key: {layer_spec_key}")
+        layer_specs.append(layer_spec_dict[layer_spec_key])
 
     # Slice the layer specs to only include the layers that are built in this pipeline stage.
     # Note: MCore layer_number starts at 1
diff --git a/megatron/core/models/gpt/linear_attention_module_specs.py b/megatron/core/models/gpt/linear_attention_module_specs.py
new file mode 100644
index 00000000000..af23b4b2c08
--- /dev/null
+++ b/megatron/core/models/gpt/linear_attention_module_specs.py
@@ -0,0 +1,39 @@
+# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+
+from typing import Optional
+
+from megatron.core.models.backends import BackendSpecProvider
+from megatron.core.ssm.gated_delta_net import GatedDeltaNet, GatedDeltaNetSubmodules
+from megatron.core.ssm.mamba_mixer import MambaMixer, MambaMixerSubmodules
+from megatron.core.transformer.spec_utils import ModuleSpec
+
+
+def get_linear_attention_module_spec_for_backend(
+    backend: BackendSpecProvider, linear_attention_type: str, normalization: Optional[str] = None
+) -> ModuleSpec:
+    """Helper function to get module spec for Linear Attention"""
+    rms_norm = normalization == "RMSNorm"
+    if linear_attention_type == "mamba":
+        attention = (
+            ModuleSpec(
+                module=MambaMixer,
+                submodules=MambaMixerSubmodules(
+                    in_proj=backend.column_parallel_layer_norm_linear(),
+                    out_proj=backend.row_parallel_linear(),
+                ),
+                metainfo={"fuse_input_layernorm": True},
+            ),
+        )
+    elif linear_attention_type == "gated_delta_net":
+        attention = ModuleSpec(
+            module=GatedDeltaNet,
+            submodules=GatedDeltaNetSubmodules(
+                in_proj=backend.column_parallel_layer_norm_linear(),
+                out_norm=backend.layer_norm(rms_norm=rms_norm, for_qk=False),
+                out_proj=backend.row_parallel_linear(),
+            ),
+            metainfo={"fuse_input_layernorm": True},
+        )
+    else:
+        raise ValueError(f"Invalid linear attention type: {linear_attention_type}")
+    return attention
diff --git a/megatron/core/models/gpt/moe_module_specs.py b/megatron/core/models/gpt/moe_module_specs.py
index e1ea7c163e9..1de0f14efcd 100755
--- a/megatron/core/models/gpt/moe_module_specs.py
+++ b/megatron/core/models/gpt/moe_module_specs.py
@@ -65,10 +65,12 @@ def get_moe_module_spec_for_backend(
     experts = ModuleSpec(module=expert_module, submodules=expert_submodule)
 
     # shared experts spec
-    shared_experts = ModuleSpec(module=SharedExpertMLP, params={"gate": False}, submodules=mlp)
+    shared_experts = ModuleSpec(module=SharedExpertMLP, submodules=mlp)
 
     # MoE module spec
     moe_module_spec = ModuleSpec(
-        module=MoELayer, submodules=MoESubmodules(experts=experts, shared_experts=shared_experts)
+        module=MoELayer,
+        submodules=MoESubmodules(experts=experts, shared_experts=shared_experts),
+        metainfo={"fuse_pre_mlp_layernorm": False},
     )
     return moe_module_spec
diff --git a/megatron/core/ssm/gated_delta_net.py b/megatron/core/ssm/gated_delta_net.py
new file mode 100644
index 00000000000..45588341a39
--- /dev/null
+++ b/megatron/core/ssm/gated_delta_net.py
@@ -0,0 +1,551 @@
+# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025, Songlin Yang, Jan Kautz, Ali Hatamizadeh.
+
+# Some of this code was adopted from https://github.com/huggingface/transformers
+# This source code is licensed under the Apache license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+from dataclasses import dataclass, replace
+from typing import List, Optional, Tuple, Union
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch import Tensor
+
+from megatron.core.dist_checkpointing import ShardedTensor
+from megatron.core.dist_checkpointing.mapping import ReplicaId, ShardedTensorFactory
+from megatron.core.fp8_utils import get_fp8_align_size
+from megatron.core.inference.contexts import BaseInferenceContext
+from megatron.core.packed_seq_params import PackedSeqParams
+from megatron.core.process_groups_config import ProcessGroupCollection
+from megatron.core.tensor_parallel import get_cuda_rng_tracker
+from megatron.core.transformer import TransformerConfig
+from megatron.core.transformer.identity_op import IdentityOp
+from megatron.core.transformer.module import MegatronModule
+from megatron.core.transformer.spec_utils import ModuleSpec, build_module
+from megatron.core.transformer.utils import (
+    make_sharded_tensors_for_checkpoint,
+    sharded_state_dict_default,
+)
+from megatron.core.utils import deprecate_inference_params, nvtx_range_pop, nvtx_range_push
+
+# TODO: Implement GatedDeltaNetContextParallel
+# from .gated_delta_net_context_parallel import GatedDeltaNetContextParallel
+
+try:
+    from fla.modules.l2norm import l2norm
+    from fla.ops.gated_delta_rule import chunk_gated_delta_rule, fused_recurrent_gated_delta_rule
+
+    HAVE_FLA = True
+except ImportError:
+    chunk_gated_delta_rule = None
+    fused_recurrent_gated_delta_rule = None
+
+    HAVE_FLA = False
+
+try:
+    from causal_conv1d import causal_conv1d_fn, causal_conv1d_update
+except ImportError:
+    causal_conv1d_fn = None
+    causal_conv1d_update = None
+
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class GatedDeltaNetSubmodules:
+    """
+    Contains the module specs for the input linear, output norm, and output linear layers.
+    """
+
+    in_proj: Union[ModuleSpec, type] = IdentityOp
+    out_norm: Union[ModuleSpec, type] = IdentityOp
+    out_proj: Union[ModuleSpec, type] = IdentityOp
+
+
+class GatedDeltaNet(MegatronModule):
+    """Gated Delta Net (GDN) layer class
+
+    GDN layer takes input with size [s, b, h]
+    and returns output of the same size.
+    """
+
+    def __init__(
+        self,
+        config: TransformerConfig,
+        submodules: GatedDeltaNetSubmodules,
+        layer_number: int = None,
+        bias: bool = False,
+        conv_bias: bool = False,
+        conv_init: Optional[float] = None,
+        use_qk_l2norm: bool = True,
+        A_init_range: Tuple[float, float] = (1, 16),
+        pg_collection: ProcessGroupCollection = None,
+    ):
+        """
+        Args:
+            config: The config of the model.
+            submodules: Contains the module specs for the input and output linear layers.
+            layer_number: The layer number of this GDN layer.
+            bias: Whether to use bias in the linear layers.
+            conv_bias: Whether to use bias in the causal convolution.
+            conv_init: The initialization range for the causal convolution weights.
+            use_qk_l2norm: Whether to use L2 normalization in the kernel of the gated delta rule.
+            A_init_range: The initialization range for the attention weights.
+            pg_collection: The required process groups to use for tensor model parallel and context
+                parallel.
+        """
+
+        if not HAVE_FLA:
+            raise ImportError("FLA is not installed. Please install it with `pip install fla`.")
+
+        super().__init__(config)
+
+        # Attributes from arguments
+        self.layer_number = layer_number
+        self.bias = bias
+        self.conv_bias = conv_bias
+        self.conv_init = conv_init
+        assert A_init_range[0] >= 0 and A_init_range[1] >= A_init_range[0]
+        self.A_init_range = A_init_range
+        self.use_qk_l2norm = use_qk_l2norm
+        assert pg_collection is not None, "pg_collection must be provided for GatedDeltaNet"
+        self.pg_collection = pg_collection
+        self.tp_size = self.pg_collection.tp.size()
+        self.sp_size = self.tp_size if config.sequence_parallel else 1
+
+        # Attributes from config
+        self.config = config
+        self.hidden_size = config.hidden_size
+        self.act_fn = config.activation_func
+        self.activation = self.act_fn.__name__
+        self.conv_kernel_dim = config.linear_conv_kernel_dim
+        self.key_head_dim = config.linear_key_head_dim
+        self.value_head_dim = config.linear_value_head_dim
+        self.num_key_heads = config.linear_num_key_heads
+        self.num_value_heads = config.linear_num_value_heads
+        self.qk_dim = self.key_head_dim * self.num_key_heads
+        self.v_dim = self.value_head_dim * self.num_value_heads
+
+        # Input projection (hidden_states -> q, k, v, gate, beta, alpha)
+        # TODO: for now, output gate is forced for GDN.
+        # We may remove this restriction in the future.
+        self.in_proj_dim = self.qk_dim * 2 + self.v_dim * 2 + self.num_value_heads * 2
+        if self.config.fp8:
+            fp8_align_size = get_fp8_align_size(self.config.fp8_recipe)
+            assert self.in_proj_dim % fp8_align_size == 0, (
+                "For FP8, the innermost dimension of the GDN layer "
+                "input projection output tensor must be a multiple of 16."
+            )
+        self.in_proj = build_module(
+            submodules.in_proj,
+            self.hidden_size,
+            self.in_proj_dim,
+            config=self.config,
+            init_method=self.config.init_method,
+            gather_output=False,
+            bias=bias,
+            skip_bias_add=False,
+            is_expert=False,
+            tp_comm_buffer_name="fc1",
+            tp_group=self.pg_collection.tp,
+        )
+
+        # Conv1d for QKV
+        self.conv_dim = self.qk_dim * 2 + self.v_dim
+        self.conv_dim_local_tp = self.conv_dim // self.tp_size
+
+        # weight shape: [conv_dim, 1, d_conv]
+        # bias shape: [conv_dim]
+        self.conv1d = nn.Conv1d(
+            in_channels=self.conv_dim_local_tp,
+            out_channels=self.conv_dim_local_tp,
+            bias=conv_bias,
+            kernel_size=self.conv_kernel_dim,
+            groups=self.conv_dim_local_tp,
+            padding=self.conv_kernel_dim - 1,
+            device=torch.cuda.current_device(),
+            dtype=config.params_dtype,
+        )
+        setattr(self.conv1d.weight, "tensor_model_parallel", True)
+        if conv_bias:
+            setattr(self.conv1d.bias, "tensor_model_parallel", True)
+
+        # Time step projection (discretization)
+        self.num_v_heads_local_tp = self.num_value_heads // self.tp_size
+        # dt_bias parameter
+        self.dt_bias = nn.Parameter(
+            torch.empty(
+                self.num_v_heads_local_tp,
+                dtype=config.params_dtype,
+                device=torch.cuda.current_device(),
+            )
+        )
+        setattr(self.dt_bias, "tensor_model_parallel", True)
+        # A_log parameter
+        self.A_log = nn.Parameter(
+            torch.empty(
+                self.num_v_heads_local_tp,
+                dtype=config.params_dtype,
+                device=torch.cuda.current_device(),
+            )
+        )
+        setattr(self.A_log, "tensor_model_parallel", True)
+
+        # Output layernorm before projection
+        self.out_norm = build_module(
+            submodules.out_norm,
+            config=self.config,
+            hidden_size=self.value_head_dim,
+            eps=self.config.layernorm_epsilon,
+        )
+
+        self.out_proj = build_module(
+            submodules.out_proj,
+            self.v_dim,
+            self.hidden_size,
+            config=self.config,
+            init_method=self.config.output_layer_init_method,
+            bias=bias,
+            input_is_parallel=True,
+            skip_bias_add=True,
+            is_expert=False,
+            tp_comm_buffer_name="fc2",
+            tp_group=self.pg_collection.tp,
+        )
+
+        # TODO: support CP
+
+        self.reset_parameters()
+
+    def reset_parameters(self):
+        """Reset the parameters."""
+        if self.config.perform_initialization:
+            with get_cuda_rng_tracker().fork():
+                # conv1d.weight
+                if self.conv_init is not None:
+                    nn.init.uniform_(self.conv1d.weight, -self.conv_init, self.conv_init)
+                # dt_bias
+                torch.ones(
+                    self.num_v_heads_local_tp,
+                    out=self.dt_bias.data,
+                    dtype=self.config.params_dtype,
+                    device=torch.cuda.current_device(),
+                )
+                # A_log
+                A = torch.empty(
+                    self.num_v_heads_local_tp,
+                    dtype=self.config.params_dtype,
+                    device=torch.cuda.current_device(),
+                ).uniform_(*self.A_init_range)
+                self.A_log.data.copy_(A)
+
+    def forward(
+        self,
+        hidden_states: Tensor,
+        attention_mask: Tensor,
+        key_value_states: Optional[Tensor] = None,
+        inference_context: Optional[BaseInferenceContext] = None,
+        rotary_pos_emb: Optional[Union[Tensor, Tuple[Tensor, Tensor]]] = None,
+        rotary_pos_cos: Optional[Tensor] = None,
+        rotary_pos_sin: Optional[Tensor] = None,
+        rotary_pos_cos_sin: Optional[Tensor] = None,
+        attention_bias: Optional[Tensor] = None,
+        packed_seq_params: Optional[PackedSeqParams] = None,
+        sequence_len_offset: Optional[int] = None,
+        *,
+        inference_params: Optional[BaseInferenceContext] = None,
+    ):
+        """
+        Perform a forward pass through the GDN module.
+
+        Args:
+            hidden_states (Tensor): Hidden states.
+            attention_mask (Tensor): Attention mask.
+            key_value_states (Optional[Tensor]): Key/value states (for cross attention).
+            inference_context (Optional[BaseInferenceContext]): Inference context that manages
+                KV cache.
+            rotary_pos_emb (Optional[Union[Tensor, Tuple[Tensor, Tensor]]]): Rotary
+                embedding tensor(s).
+            rotary_pos_cos (Optional[Tensor]): Rotary embedding cosine.
+            rotary_pos_sin (Optional[Tensor]): Rotary embedding sine.
+            rotary_pos_cos_sin (Optional[Tensor]): Combined rotary embedding cosine and sine.
+            attention_bias (Optional[Tensor]): Attention bias.
+            packed_seq_params (Optional[PackedSeqparams]): Parameters used for THD format.
+            sequence_len_offset (Optional[int]): Sequence length offset used for
+                inference CUDA graphs.
+
+        Return:
+            (Tuple[Tensor, Tensor]) GDN output and bias.
+
+        """
+        # TODO: Deal with attention_mask
+
+        inference_context = deprecate_inference_params(inference_context, inference_params)
+
+        seq_len, batch, _ = hidden_states.shape
+        seq_len = seq_len * self.sp_size
+
+        if inference_context is not None:
+            assert (
+                inference_context.is_static_batching()
+            ), "GDN does not currently support dynamic inference batching."
+            assert not self.config.sequence_parallel
+            # TODO: support inference
+            raise NotImplementedError("GDN does not support inference for now.")
+
+        if packed_seq_params is not None:
+            # TODO: support packed sequence
+            raise NotImplementedError("GDN does not support packed sequence for now.")
+
+        # Input projection
+        nvtx_range_push(suffix="in_proj")
+        qkvzba, _ = self.in_proj(hidden_states)
+        nvtx_range_pop(suffix="in_proj")
+
+        # Transpose: s b x --> b s x
+        # From sbhd to bshd format
+        qkvzba = qkvzba.transpose(0, 1)
+
+        # Split, reorder, and reshape the tensor into q, k, v, gate, beta, alpha
+        qkv, gate, beta, alpha = torch.split(
+            qkvzba,
+            [
+                (self.qk_dim * 2 + self.v_dim) // self.tp_size,
+                self.v_dim // self.tp_size,
+                self.num_value_heads // self.tp_size,
+                self.num_value_heads // self.tp_size,
+            ],
+            dim=-1,
+        )
+        gate = gate.reshape(batch, seq_len, -1, self.value_head_dim)
+        beta = beta.reshape(batch, seq_len, -1)
+        alpha = alpha.reshape(batch, seq_len, -1)
+
+        # Convolution on qkv
+        qkv = qkv.transpose(1, 2).contiguous()  # b, s, d -> b, d, s
+        nvtx_range_push(suffix="conv1d")
+        if causal_conv1d_fn is None:
+            qkv = self.act_fn(self.conv1d(qkv)[..., :seq_len])
+        else:
+            assert self.activation in ["silu", "swish"]
+            qkv = causal_conv1d_fn(
+                x=qkv,
+                weight=self.conv1d.weight.squeeze(1),  # d, 1, w -> d, w
+                bias=self.conv1d.bias,
+                activation=self.activation,
+            )
+        nvtx_range_pop(suffix="conv1d")
+        # Split qkv into query, key, and value
+        qkv = qkv.transpose(1, 2)  # b, d, s -> b, s, d
+        query, key, value = torch.split(
+            qkv,
+            [self.qk_dim // self.tp_size, self.qk_dim // self.tp_size, self.v_dim // self.tp_size],
+            dim=-1,
+        )
+        query = query.reshape(batch, seq_len, -1, self.key_head_dim)
+        key = key.reshape(batch, seq_len, -1, self.key_head_dim)
+        value = value.reshape(batch, seq_len, -1, self.value_head_dim)
+        # Apply L2 norm to query and key
+        if self.use_qk_l2norm:
+            query = l2norm(query.contiguous())
+            key = l2norm(key.contiguous())
+        if self.num_value_heads // self.num_key_heads > 1:
+            query = query.repeat_interleave(self.num_value_heads // self.num_key_heads, dim=2)
+            key = key.repeat_interleave(self.num_value_heads // self.num_key_heads, dim=2)
+
+        # Make contiguous
+        query = query.contiguous()
+        key = key.contiguous()
+        value = value.contiguous()
+        gate = gate.contiguous()
+        beta = beta.contiguous()
+        alpha = alpha.contiguous()
+
+        # Calculate g and beta
+        nvtx_range_push(suffix="g_and_beta")
+        g = -self.A_log.exp() * F.softplus(alpha.float() + self.dt_bias)  # In fp32
+        beta = beta.sigmoid()
+        nvtx_range_pop(suffix="g_and_beta")
+
+        nvtx_range_push(suffix="gated_delta_rule")
+        core_attn_out, last_recurrent_state = chunk_gated_delta_rule(
+            query,
+            key,
+            value,
+            g=g,
+            beta=beta,
+            initial_state=None,
+            output_final_state=False,
+            use_qk_l2norm_in_kernel=False,
+        )
+        nvtx_range_pop(suffix="gated_delta_rule")
+
+        # RMSNorm
+        nvtx_range_push(suffix="gated_norm")
+        norm_out = self._torch_compiled_gated_norm(core_attn_out, gate)
+        nvtx_range_pop(suffix="gated_norm")
+
+        # Transpose: b s x --> s b x
+        # From bshd back to sbhd format
+        norm_out = norm_out.reshape(batch, seq_len, -1)
+        norm_out = norm_out.transpose(0, 1).contiguous()
+
+        # Output projection
+        nvtx_range_push(suffix="out_proj")
+        out, out_bias = self.out_proj(norm_out)
+        nvtx_range_pop(suffix="out_proj")
+
+        return out, out_bias
+
+    @torch.compile
+    def _torch_compiled_gated_norm(self, x, gate):
+        # Output Norm
+        x_dtype = x.dtype
+        x = x.reshape(-1, x.shape[-1])
+        y = self.out_norm(x)
+        # Output gate
+        gate = gate.reshape(-1, gate.shape[-1])
+        y = y * self.act_fn(gate.float())
+        y = y.to(x_dtype)
+        return y
+
+    def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None):
+        """Provide a sharded state dictionary for distributed checkpointing."""
+        sharded_state_dict = {}
+        # Parameters
+        self._save_to_state_dict(sharded_state_dict, "", keep_vars=True)
+        sharded_state_dict = make_sharded_tensors_for_checkpoint(
+            sharded_state_dict,
+            prefix,
+            tensor_parallel_layers_axis_map={
+                "A_log": 0,
+                "dt_bias": 0,
+            },  # parameters sharded across TP
+            sharded_offsets=sharded_offsets,
+        )
+        # Submodules
+        for name, module in self.named_children():
+            if name == "conv1d":
+                # Add TP sharding for Conv1d
+                module_sd = module.state_dict(prefix="", keep_vars=True)
+                tp_sharding_map = {f"weight": 0}
+                if self.conv_bias:
+                    tp_sharding_map[f"bias"] = 0
+                module_sharded_sd = make_sharded_tensors_for_checkpoint(
+                    module_sd, f"{prefix}{name}.", tp_sharding_map, sharded_offsets
+                )
+            else:
+                module_sharded_sd = sharded_state_dict_default(
+                    module, f"{prefix}{name}.", sharded_offsets, metadata
+                )
+
+            sharded_state_dict.update(module_sharded_sd)
+
+        # At this point the TP sharding is correctly defined for each tensor, but some of the
+        # tensors must be additionally split into separate parts
+        in_proj_dim_local_tp = self.in_proj_dim // self.tp_size
+        assert sharded_state_dict[f"{prefix}in_proj.weight"].data.size(0) == in_proj_dim_local_tp, (
+            in_proj_dim_local_tp,
+            sharded_state_dict[f"{prefix}in_proj.weight"],
+        )
+
+        sharded_state_dict[f"{prefix}in_proj.weight"] = _split_tensor_factory(
+            sharded_state_dict[f"{prefix}in_proj.weight"],
+            [
+                self.qk_dim // self.tp_size,
+                self.qk_dim // self.tp_size,
+                self.v_dim // self.tp_size,
+                self.v_dim // self.tp_size,
+                self.num_value_heads // self.tp_size,
+                self.num_value_heads // self.tp_size,
+            ],
+            ["query", "key", "value", "z", "beta", "alpha"],
+            0,
+        )
+
+        conv_layer_name_list = ["conv1d.weight"]
+        assert (
+            sharded_state_dict[f"{prefix}conv1d.weight"].data.size(0) == self.conv_dim_local_tp
+        ), (self.conv_dim_local_tp, sharded_state_dict[f"{prefix}conv1d.weight"])
+        if self.conv_bias:
+            conv_layer_name_list.append("conv1d.bias")
+            assert (
+                sharded_state_dict[f"{prefix}conv1d.bias"].data.size(0) == self.conv_dim_local_tp
+            ), (self.conv_dim_local_tp, sharded_state_dict[f"{prefix}conv1d.bias"])
+        for conv_layer_name in conv_layer_name_list:
+            sharded_state_dict[f"{prefix}{conv_layer_name}"] = _split_tensor_factory(
+                sharded_state_dict[f"{prefix}{conv_layer_name}"],
+                [
+                    self.qk_dim // self.tp_size,
+                    self.qk_dim // self.tp_size,
+                    self.v_dim // self.tp_size,
+                ],
+                ["query", "key", "value"],
+                0,
+            )
+
+        return sharded_state_dict
+
+
+def _split_tensor_factory(
+    orig_sh_ten: ShardedTensor, split_sections: List[int], split_names: List[str], split_dim: int
+) -> ShardedTensorFactory:
+    """Builds a factory that splits a given ShardedTensor into several independent chunks."""
+    assert isinstance(orig_sh_ten, ShardedTensor), type(orig_sh_ten)
+    orig_sh_ten_no_data = orig_sh_ten.without_data()  # remove `data` reference
+
+    if sum(split_sections) != orig_sh_ten_no_data.local_shape[split_dim]:
+        raise ValueError(
+            f"Split sections must cover the whole dimension size, "
+            f"got {split_sections=} vs dimensions size "
+            f"{orig_sh_ten_no_data.local_shape[split_dim]}"
+        )
+
+    assert not isinstance(
+        split_sections, int
+    ), "Splitting into predefined section sizes is supported (`split_sections` must be a list)"
+    assert len(split_sections) == len(split_names), (len(split_sections), len(split_names))
+
+    @torch.no_grad()
+    def sh_ten_build_fn(
+        key: str, t: torch.Tensor, replica_id: ReplicaId, flattened_range: Optional[slice]
+    ):
+        factory_sh_ten = replace(
+            orig_sh_ten_no_data,
+            key=key,
+            data=t,
+            dtype=t.dtype,
+            replica_id=replica_id,
+            flattened_range=flattened_range,
+        )
+
+        chunk_sh_tens = []
+        split_start = 0
+        for split_size, split_name in zip(split_sections, split_names):
+            split_chunks = factory_sh_ten.narrow(split_dim, split_start, split_size)
+            for sh_ten in split_chunks:
+                sh_ten.key = f"{sh_ten.key}.{split_name}"
+            chunk_sh_tens.extend(split_chunks)
+            split_start += split_size
+
+        assert split_start == orig_sh_ten_no_data.local_shape[split_dim], (
+            split_start,
+            orig_sh_ten_no_data.local_shape[split_dim],
+        )
+        assert sum(sh_ten.data.numel() for sh_ten in chunk_sh_tens) == t.numel(), (
+            chunk_sh_tens,
+            t.shape,
+        )
+        return chunk_sh_tens
+
+    @torch.no_grad()
+    def sh_ten_merge_fn(sub_state_dict):
+        return torch.cat(sub_state_dict)
+
+    return ShardedTensorFactory(
+        orig_sh_ten.key, orig_sh_ten.data, sh_ten_build_fn, sh_ten_merge_fn, orig_sh_ten.replica_id
+    )
diff --git a/megatron/core/transformer/attention.py b/megatron/core/transformer/attention.py
index 74d30477e5c..518d82a0332 100644
--- a/megatron/core/transformer/attention.py
+++ b/megatron/core/transformer/attention.py
@@ -434,7 +434,7 @@ def _adjust_key_value_for_inference(
         return query, key, value, rotary_pos_emb, attn_mask_type, block_table
 
     @abstractmethod
-    def get_query_key_value_tensors(self, hidden_states, key_value_states, split_qkv=True):
+    def get_query_key_value_tensors(self, hidden_states, key_value_states, output_gate, split_qkv=True):
         """
         This method needs to be implemented based on whether the derived class
         is "self-attn" or "cross-attn".
@@ -718,19 +718,25 @@ def forward(
                 self.k_layernorm is None or isinstance(self.k_layernorm, IdentityOp),
             ]
         )
+        output_gate = self.config.attention_output_gate
         # Check if fused_single_qkv_rope is requested but either unavailable or not
         # supported for the current use case.
         if self.attention_type != "cross":
             assert not (
                 self.config.fused_single_qkv_rope and split_qkv
             ), "fused_single_qkv_rope requested but not available/supported for the config."
+        if output_gate:
+            assert split_qkv, "output_gate is not supported for unsplit mixed_qkv tensor."
 
         qkv_output = self.get_query_key_value_tensors(
-            hidden_states, key_value_states, split_qkv=split_qkv
+            hidden_states, key_value_states, output_gate=output_gate, split_qkv=split_qkv
         )
         attn_mask_type = self.attn_mask_type
         block_table = None
-        if split_qkv:
+        gate = None
+        if output_gate and split_qkv:
+            query, key, value, gate = qkv_output
+        elif split_qkv:
             query, key, value = qkv_output
         else:
             mixed_qkv, qkv_split_arg_list = qkv_output
@@ -912,6 +918,12 @@ def forward(
             core_attn_out = core_attn_out.reshape(core_attn_out.size(0), 1, -1)
         nvtx_range_pop(suffix="core_attention")
 
+        # Output gate
+        if gate is not None:
+            nvtx_range_push(suffix="output_gate")
+            core_attn_out = self._torch_compiled_output_gate(core_attn_out, gate)
+            nvtx_range_pop(suffix="output_gate")
+
         # =================
         # Output. [sq, b, h]
         # =================
@@ -922,6 +934,15 @@ def forward(
 
         return output, bias
 
+    @torch.compile
+    def _torch_compiled_output_gate(self, x, gate):
+        x_dtype = x.dtype
+        gate = gate.contiguous()
+        gate = gate.view(*x.shape)
+        x = x * torch.sigmoid(gate.float())
+        x = x.to(x_dtype)
+        return x
+
     def set_for_recompute_input_layernorm(self):
         """Set the attention layer for recompute input_layernorm. Only needed for fp8."""
         raise NotImplementedError("set_for_recompute_input_layernorm is not implemented.")
@@ -953,10 +974,13 @@ def __init__(
             pg_collection=pg_collection,
         )
 
+        self.linear_qkv_out_dim = self.query_projection_size + 2 * self.kv_projection_size
+        if self.config.attention_output_gate:
+            self.linear_qkv_out_dim += self.config.kv_channels * self.config.num_attention_heads
         self.linear_qkv = build_module(
             submodules.linear_qkv,
             self.config.hidden_size,
-            self.query_projection_size + 2 * self.kv_projection_size,
+            self.linear_qkv_out_dim,
             config=self.config,
             init_method=self.config.init_method,
             gather_output=False,
@@ -1058,30 +1082,44 @@ def _compare(srcs, tgts, names, parallelism):
                 "TP",
             )
 
-    def get_query_key_value_tensors(self, hidden_states, key_value_states=None, split_qkv=True):
+    def get_query_key_value_tensors(
+        self, hidden_states,
+        key_value_states=None,
+        output_gate=False,
+        split_qkv=True
+    ):
         """
-        Derives `query`, `key` and `value` tensors from `hidden_states`. If `split_qkv=False`, then
-        the unsplit mixed_qkv tensor is returned.
+        Derives `query`, `key`, `value` tensors from `hidden_states`.
+        If `output_gate` is True, then also derives `gate` tensor.
+        If `split_qkv=False`, then the unsplit mixed_qkv tensor is returned.
         """
-        # Attention heads [sq, b, h] --> [sq, b, ng * (np/ng + 2) * hn)]
+        # If no output gate: Attention heads [sq, b, h] --> [sq, b, ng * (np/ng + 2) * hn)]
+        # If have output gate: Attention heads [sq, b, h] --> [sq, b, ng * (2 * np/ng + 2) * hn)]
         mixed_qkv, _ = self.linear_qkv(hidden_states)
+        num_query_heads_per_group = (
+            self.num_attention_heads_per_partition // self.num_query_groups_per_partition
+        )
+        if output_gate:
+            num_qkv_heads_per_group = 2 * num_query_heads_per_group + 2
+        else:
+            num_qkv_heads_per_group = num_query_heads_per_group + 2
 
-        # [sq, b, hp] --> [sq, b, ng, (np/ng + 2) * hn]
+        # If no output gate: [sq, b, hp] --> [sq, b, ng, (np/ng + 2) * hn]
+        # If have output gate: [sq, b, hp] --> [sq, b, ng, (2 * np/ng + 2) * hn]
         new_tensor_shape = mixed_qkv.size()[:-1] + (
             self.num_query_groups_per_partition,
-            (
-                (self.num_attention_heads_per_partition // self.num_query_groups_per_partition + 2)
-                * self.hidden_size_per_attention_head
-            ),
+            num_qkv_heads_per_group * self.hidden_size_per_attention_head,
         )
         mixed_qkv = mixed_qkv.view(*new_tensor_shape)
 
+        # Split the tensor into query, gate, key, and value.
+        # If no output gate: [sq, b, ng, (np/ng + 2) * hn]
+        # --> [sq, b, ng, np/ng * hn], None, [sq, b, ng, hn], [sq, b, ng, hn]
+        # If have output gate: [sq, b, ng, (2 * np/ng + 2) * hn]
+        # --> [sq, b, ng, np/ng * hn], [sq, b, ng, np/ng * hn], [sq, b, ng, hn], [sq, b, ng, hn]
         split_arg_list = [
-            (
-                self.num_attention_heads_per_partition
-                // self.num_query_groups_per_partition
-                * self.hidden_size_per_attention_head
-            ),
+            num_query_heads_per_group * self.hidden_size_per_attention_head,
+            num_query_heads_per_group * self.hidden_size_per_attention_head if output_gate else 0,
             self.hidden_size_per_attention_head,
             self.hidden_size_per_attention_head,
         ]
@@ -1091,18 +1129,15 @@ def get_query_key_value_tensors(self, hidden_states, key_value_states=None, spli
             return mixed_qkv, split_arg_list
 
         if SplitAlongDim is not None:
-
-            # [sq, b, ng, (np/ng + 2) * hn]
-            # --> [sq, b, ng, np/ng * hn], [sq, b, ng, hn], [sq, b, ng, hn]
-            (query, key, value) = SplitAlongDim(mixed_qkv, 3, split_arg_list)
+            (query, gate, key, value) = SplitAlongDim(mixed_qkv, 3, split_arg_list)
         else:
+            (query, gate, key, value) = torch.split(mixed_qkv, split_arg_list, dim=3)
 
-            # [sq, b, ng, (np/ng + 2) * hn]
-            # --> [sq, b, ng, np/ng * hn], [sq, b, ng, hn], [sq, b, ng, hn]
-            (query, key, value) = torch.split(mixed_qkv, split_arg_list, dim=3)
-
-        # [sq, b, ng, np/ng * hn] -> [sq, b, np, hn]
+        # Query [sq, b, ng, np/ng * hn] -> [sq, b, np, hn]
         query = query.reshape(query.size(0), query.size(1), -1, self.hidden_size_per_attention_head)
+        if output_gate:
+            # Gate [sq, b, ng, np/ng * hn] -> [sq, b, np, hn]
+            gate = gate.reshape(gate.size(0), gate.size(1), -1, self.hidden_size_per_attention_head)
 
         if self.q_layernorm is not None:
             query = self.q_layernorm(query)
@@ -1113,6 +1148,8 @@ def get_query_key_value_tensors(self, hidden_states, key_value_states=None, spli
         if self.config.test_mode:
             self.run_realtime_tests()
 
+        if output_gate:
+            return query, key, value, gate
         return query, key, value
 
     def backward_dw(self) -> NoReturn:
@@ -1189,11 +1226,13 @@ def __init__(
             is_expert=False,
         )
 
-    def get_query_key_value_tensors(self, hidden_states, key_value_states, split_qkv=True):
+    def get_query_key_value_tensors(self, hidden_states, key_value_states, output_gate=False, split_qkv=True):
         """
         Derives `query` tensor from `hidden_states`, and `key`/`value` tensors
         from `key_value_states`.
         """
+        assert not output_gate, "Output gate is not supported in cross attention for now."
+
         assert split_qkv, "split_qkv must be True for CrossAttention"
         # Attention heads [sk, b, h] --> [sk, b, (np * 2 * hn)]
         mixed_kv, _ = self.linear_kv(key_value_states)
diff --git a/megatron/core/transformer/moe/moe_layer.py b/megatron/core/transformer/moe/moe_layer.py
index bbb5fce4e33..2e6fb68e444 100644
--- a/megatron/core/transformer/moe/moe_layer.py
+++ b/megatron/core/transformer/moe/moe_layer.py
@@ -161,7 +161,10 @@ def __init__(
         # Initialize shared experts
         if self.use_shared_expert:
             self.shared_experts = build_module(
-                self.submodules.shared_experts, config=self.config, pg_collection=pg_collection
+                self.submodules.shared_experts,
+                config=self.config,
+                pg_collection=pg_collection,
+                gate=self.config.moe_shared_expert_gate,
             )
             if self.shared_expert_overlap:
                 self.token_dispatcher.set_shared_experts(self.shared_experts)
diff --git a/megatron/core/transformer/spec_utils.py b/megatron/core/transformer/spec_utils.py
index b3de8541734..897d88d2aa3 100644
--- a/megatron/core/transformer/spec_utils.py
+++ b/megatron/core/transformer/spec_utils.py
@@ -25,6 +25,7 @@ class ModuleSpec:
     module: Union[Tuple, type]
     params: dict = field(default_factory=lambda: {})
     submodules: type = None
+    metainfo: dict = field(default_factory=lambda: {})
 
 
 def import_module(module_path: Tuple[str]):
diff --git a/megatron/core/transformer/transformer_config.py b/megatron/core/transformer/transformer_config.py
index 88da736415e..dc11239836f 100644
--- a/megatron/core/transformer/transformer_config.py
+++ b/megatron/core/transformer/transformer_config.py
@@ -192,6 +192,9 @@ class TransformerConfig(ModelParallelConfig):
     qk_layernorm: bool = False
     """Whether to apply `normalization` type of normalization to the query and key embeddings."""
 
+    attention_output_gate: bool = False
+    """Whether to apply output gate to the attention layers."""
+
     test_mode: bool = False
     """Whether to run real-time tests."""
 
@@ -212,6 +215,34 @@ class TransformerConfig(ModelParallelConfig):
     moe_deepep_num_sms: int = 20
     """Number of SMs to use for DeepEP."""
 
+    ####################
+    # linear attention
+    ####################
+    linear_attention_type: Optional[str] = None
+    """Type of linear attention to use. Currently support gated_delta_net."""
+
+    linear_attention_freq: Optional[Union[int, List[int]]] = None
+    """Frequency between LA (linear attention) layers 
+    and SDPA (scaled dot-product attention) layers.
+    Accepts either:
+    - An integer N: Represents a (N-1):N ratio, meaning (N-1) LA layers for every 1 SDPA layer
+    - A list that defines a custom pattern, e.g.: [1,1,1,0,1,1,1,0,1,1,1,0]"""
+
+    linear_conv_kernel_dim: Optional[int] = None
+    """Conv kernel dimension for the gated delta net."""
+
+    linear_key_head_dim: Optional[int] = None
+    """Query and key head dimension for the gated delta net."""
+
+    linear_value_head_dim: Optional[int] = None
+    """Value and gate head dimension for the gated delta net."""
+
+    linear_num_key_heads: Optional[int] = None
+    """Number of query and key heads for the gated delta net."""
+
+    linear_num_value_heads: Optional[int] = None
+    """Number of value and gate heads for the gated delta net."""
+
     ####################
     # initialization
     ####################
@@ -429,6 +460,9 @@ class TransformerConfig(ModelParallelConfig):
     there are multiple shared experts.
     None means no shared expert."""
 
+    moe_shared_expert_gate: bool = False
+    """Enable gate for shared expert."""
+
     moe_shared_expert_overlap: bool = False
     """Enable overlapping between shared expert computations and dispatcher communications.
     Without this, the shared epxerts execute after the routed experts."""
@@ -744,6 +778,54 @@ def __post_init__(self):
                 f"tensor_model_parallel_size ({self.tensor_model_parallel_size})."
             )
 
+        if self.linear_attention_type is not None:
+            supported_la_types = ["gated_delta_net", "mamba"]
+            assert self.linear_attention_type in supported_la_types, (
+                f"linear_attention_type ({self.linear_attention_type}) only support"
+                f" one of {supported_la_types}."
+            )
+            assert (
+                self.linear_attention_freq is not None
+            ), f"linear_attention_freq must be set for linear attention."
+
+            if self.linear_attention_type == "gated_delta_net":
+                # Check required parameters
+                assert (
+                    self.linear_conv_kernel_dim is not None
+                ), "linear_conv_kernel_dim must be set for gated delta net."
+                assert (
+                    self.linear_key_head_dim is not None
+                ), "linear_key_head_dim must be set for gated delta net."
+                assert (
+                    self.linear_value_head_dim is not None
+                ), "linear_value_head_dim must be set for gated delta net."
+                assert (
+                    self.linear_num_key_heads is not None
+                ), "linear_num_key_heads must be set for gated delta net."
+                assert (
+                    self.linear_num_value_heads is not None
+                ), "linear_num_value_heads must be set for gated delta net."
+                assert self.linear_num_value_heads % self.linear_num_key_heads == 0, (
+                    f"linear_num_value_heads ({self.linear_num_value_heads}) must be a multiple of "
+                    f"linear_num_key_heads ({self.linear_num_key_heads})."
+                )
+
+                # Check tensor parallelism compatibility
+                assert (
+                    self.linear_num_key_heads % self.tensor_model_parallel_size == 0
+                ), "linear_num_key_heads must be a multiple of tensor_model_parallel_size."
+                assert (
+                    self.linear_num_value_heads % self.tensor_model_parallel_size == 0
+                ), "linear_num_value_heads must be a multiple of tensor_model_parallel_size."
+
+                # Do not support yet, but coming soon.
+                assert self.context_parallel_size == 1, (
+                    f"Gated delta net does not support context parallel for now,"
+                    f" but got {self.context_parallel_size=}."
+                )
+            elif self.linear_attention_type == "mamba":
+                raise NotImplementedError("Mamba is not supported yet.")
+
         if self.fp8:
             # cannot support first last layer bf16 with delayed scaling
             if self.first_last_layers_bf16 and self.fp8_recipe == Fp8Recipe.delayed:
@@ -1553,6 +1635,9 @@ def __post_init__(self):
         if self.multi_latent_attention and self.apply_rope_fusion and self.rope_type != "yarn":
             raise ValueError("apply_rope_fusion for MLA only works with YARN RoPE.")
 
+        if self.attention_output_gate:
+            raise NotImplementedError("Output gate is not supported for MLA yet.")
+
         if self.cache_mla_latents:
             assert (
                 self.apply_rope_fusion is False
diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py
index dc33a639e8d..29db36ca6e0 100644
--- a/megatron/training/arguments.py
+++ b/megatron/training/arguments.py
@@ -69,6 +69,7 @@ def add_megatron_arguments(parser: argparse.ArgumentParser):
     parser = _add_vision_args(parser)
     parser = _add_moe_args(parser)
     parser = _add_mla_args(parser)
+    parser = _add_linear_attention_args(parser)
     parser = _add_heterogeneous_args(parser)
     parser = _add_logging_args(parser)
     parser = _add_straggler_detector_args(parser)
@@ -319,7 +320,7 @@ def moe_freq_type(x):
       This allows defining arbitrary patterns of expert and dense layers.
       The pattern length must match the total number of transformer layers.
       Examples:
-          "([0]+[1]*23)": 1 dense layer followed by 23 experts layers
+          "([0]+[1]*23)": 1 dense layer followed by 23 expert layers
           "([1]*3+[0]*2)*2": Three expert layers followed by two dense layers, repeated twice.
     """
     if isinstance(x, int):
@@ -332,6 +333,31 @@ def moe_freq_type(x):
         # it's a single int but in str
         return int(x)
 
+def la_freq_type(x):
+    """Frequency between LA (linear attention) layers and SDPA (scaled dot-product attention) layers.
+
+    Accepts either:
+    - An integer N: Represents a (N-1):N ratio, meaning (N-1) LA layers for every 1 SDPA layer
+    - A string "N": Same as above, but provided as a string
+    - A string containing a Python list expression that defines a custom pattern, e.g.:
+      "([1]*3+[0]*1)*3" evaluates to [1,1,1,0,1,1,1,0,1,1,1,0]
+      where 1 indicates an LA layer and 0 indicates a SDPA layer.
+      This allows defining arbitrary patterns of LA and SDPA layers.
+      The pattern length must match the total number of transformer layers.
+      Examples:
+          "([0]+[1]*23)": 1 SDPA layer followed by 23 LA layers
+          "([1]*3+[0]*2)*2": Three LA layers followed by two SDPA layers, repeated twice.
+    """
+    if x is None or isinstance(x, int):
+        return x
+    assert isinstance(x, str)
+    if '[' in x:
+        # it's a custom pattern
+        return _eval_pattern(x)
+    else:
+        # it's a single int but in str
+        return int(x)
+
 def tuple_type(x):
     """
     Convert a string to a tuple of integers.
@@ -1542,6 +1568,8 @@ def _add_network_size_args(parser):
     group.add_argument('--group-query-attention', action='store_true',
                           help='Use group-query attention.')
     group.add_argument('--num-query-groups', type=int, default=1)
+    group.add_argument('--attention-output-gate', action='store_true',
+                       help='Whether to apply output gate to the attention.')
     group.add_argument('--softmax-type', type=str, default='vanilla',
                        choices=['learnable', 'vanilla', 'off-by-one'],
                        help='Type of softmax to use for the attention. Supports both a fixed offset and '
@@ -1860,6 +1888,12 @@ def _add_regularization_args(parser):
     group.add_argument('--weight-decay-incr-style', type=str, default='constant',
                        choices=['constant', 'linear', 'cosine'],
                        help='Weight decay increment function.')
+    group.add_argument('--no-weight-decay-cond-type', type=str, choices=['qwen3_next'],
+                       help='Type of no weight decay condition. Choices: '
+                       'None (default): param no weight decay if and only if it is 1D; or it is bias; '
+                       'or it is embedding and embedding_init_method_std is not None. '
+                       '"qwen3_next": In addition to the default rules, '
+                       'apply weight decay to qk layernorm as a special case.')
     group.add_argument('--clip-grad', type=float, default=1.0,
                        help='Gradient clipping based on global L2 norm.')
     group.add_argument('--adam-beta1', type=float, default=0.9,
@@ -3028,7 +3062,7 @@ def _add_moe_args(parser):
                             '- A string containing a Python list expression that defines a custom pattern, e.g.: '
                             '"([1]*3+[0]*1)*3" evaluates to [1,1,1,0,1,1,1,0,1,1,1,0] '
                             'where 1 indicates an expert layer and 0 indicates a dense layer. '
-                            'Examples: "([0]+[1]*23)": 1 dense layer followed by 23 experts layers, '
+                            'Examples: "([0]+[1]*23)": 1 dense layer followed by 23 expert layers, '
                             '"([1]*3+[0]*2)*2": Three expert layers followed by two dense layers, repeated twice.')
     group.add_argument('--moe-ffn-hidden-size', type=int, default=None,
                        help='The hidden size of each expert\'s feed-forward network (ffn). '
@@ -3037,6 +3071,8 @@ def _add_moe_args(parser):
                        help='Shared expert total ffn hidden size. '
                        'It should be equal to "num_shared_experts * ffn_size_of_each_shared_expert" if there are multiple shared experts. '
                        'None means no shared expert.')
+    group.add_argument('--moe-shared-expert-gate', action='store_true',
+                       help='Enable gate for shared expert. Only effective when moe-shared-expert-intermediate-size is set.')
     group.add_argument('--moe-shared-expert-overlap', action='store_true',
                        help='Enable overlapping between shared expert computations and dispatcher communications. '
                        'Without this, the shared epxerts execute after the routed experts. '
@@ -3161,6 +3197,31 @@ def _add_mla_args(parser):
 
     return parser
 
+def _add_linear_attention_args(parser):
+    group = parser.add_argument_group(title="la")
+    group.add_argument('--linear-attention-type', default=None, choices=['gated_delta_net', 'mamba'], type=str,
+                       help='Type of linear attention to use. Currently support gated_delta_net and mamba.')
+    group.add_argument('--linear-attention-freq', type=la_freq_type, default=None,
+                       help='Frequency between LA (linear attention) layers and'
+                            ' SDPA (scaled dot-product attention) layers. Accepts either: '
+                            '- An integer N: Represents a (N-1):N ratio, meaning (N-1) LA layers for every 1 SDPA layer '
+                            '- A string containing a Python list expression that defines a custom pattern, e.g.: '
+                            '"([1]*3+[0]*1)*3" evaluates to [1,1,1,0,1,1,1,0,1,1,1,0] '
+                            'where 1 indicates an LA layer and 0 indicates a SDPA layer. '
+                            'Examples: "([0]+[1]*23)": 1 SDPA layer followed by 23 LA layers, '
+                            '"([1]*3+[0]*2)*2": Three LA layers followed by two SDPA layers, repeated twice.')
+    group.add_argument('--linear-conv-kernel-dim', default=4, type=int,
+                       help='Conv kernel dimension for the gated delta net.')
+    group.add_argument('--linear-key-head-dim', default=128, type=int,
+                       help='Query and key head dimension for the gated delta net.')
+    group.add_argument('--linear-value-head-dim', default=128, type=int,
+                       help='Value and gate head dimension for the gated delta net.')
+    group.add_argument('--linear-num-key-heads', default=16, type=int,
+                       help='Number of query and key heads for the gated delta net.')
+    group.add_argument('--linear-num-value-heads', default=32, type=int,
+                       help='Number of value and gate heads for the gated delta net.')
+    return parser
+
 def _add_heterogeneous_args(parser):
     """
     Heterogeneous models refer to transformer architectures where individual layers can differ 
diff --git a/megatron/training/checkpointing.py b/megatron/training/checkpointing.py
index deff728aa23..e0dc794d38a 100644
--- a/megatron/training/checkpointing.py
+++ b/megatron/training/checkpointing.py
@@ -8,6 +8,7 @@
 import shutil
 import sys
 import threading
+import types
 from argparse import Namespace
 from enum import Enum, auto
 from logging import getLogger
@@ -1424,18 +1425,27 @@ def load_checkpoint(ddp_model, optimizer, opt_param_scheduler, load_arg='load',
     ignore_rng_state = False
     ignore_rerun_state = True
     if ckpt_format == "torch_dist":
+        state_dict_args = (
+            state_dict.get('args', types.SimpleNamespace())
+            if state_dict is not None
+            else types.SimpleNamespace()
+        )
+        if not hasattr(state_dict_args, 'tensor_model_parallel_size'):
+            print_rank_0('WARNING: does not find TP size in checkpoint args, using 1 as default.')
+        if not hasattr(state_dict_args, 'pipeline_model_parallel_size'):
+            print_rank_0('WARNING: does not find PP size in checkpoint args, using 1 as default.')
         ckpt_tp_pp = (
-            state_dict['args'].tensor_model_parallel_size,
-            state_dict['args'].pipeline_model_parallel_size,
+            getattr(state_dict_args, 'tensor_model_parallel_size', 1),
+            getattr(state_dict_args, 'pipeline_model_parallel_size', 1),
         )
         run_tp_pp = (
             args.tensor_model_parallel_size,
             args.pipeline_model_parallel_size,
         )
 
-        ckpt_world_size = getattr(state_dict['args'], 'world_size', 0)
+        ckpt_world_size = getattr(state_dict_args, 'world_size', 0)
         run_world_size = getattr(args, 'world_size', 0)
-        ckpt_dp = getattr(state_dict['args'], 'data_parallel_size', 0)
+        ckpt_dp = getattr(state_dict_args, 'data_parallel_size', 0)
         run_dp = getattr(args, 'data_parallel_size', 0)
         mismatch_msg = "(TP, PP) mismatch after resume ({} vs {} from checkpoint)".format(
             run_tp_pp, ckpt_tp_pp
@@ -1443,7 +1453,7 @@ def load_checkpoint(ddp_model, optimizer, opt_param_scheduler, load_arg='load',
 
         # Determine if RNG state will be loaded
         if (ckpt_tp_pp == run_tp_pp and not release and not args.finetune and not args.no_load_rng
-                and not getattr(state_dict['args'], 'no_save_rng', False)):
+                and not getattr(state_dict_args, 'no_save_rng', False)):
             gen_sd_rng_state = get_rng_state(args.ckpt_format)  # we can load the rng state
         else:
             ignore_rng_state = True
@@ -1458,7 +1468,7 @@ def load_checkpoint(ddp_model, optimizer, opt_param_scheduler, load_arg='load',
         print_rank_0(f'sharded_state_dict metadata loaded from the checkpoint: {sharded_sd_metadata}')
         # Determine if optimizer state will be loaded
         if (not release and not args.finetune and not args.no_load_optim
-                and not getattr(state_dict['args'], 'no_save_optim', False)):
+                and not getattr(state_dict_args, 'no_save_optim', False)):
             gen_sd_optim = optimizer
             gen_sd_opt_param_scheduler = opt_param_scheduler
 
@@ -1469,7 +1479,7 @@ def load_checkpoint(ddp_model, optimizer, opt_param_scheduler, load_arg='load',
                     # (for MCore v0.13+ checkpoints `sharded_sd_metadata is not None`)
                     sharded_sd_metadata = {
                         'distrib_optim_sharding_type': ('fully_sharded_model_space'
-                                                        if getattr(state_dict['args'], 'ckpt_fully_parallel_save', False)
+                                                        if getattr(state_dict_args, 'ckpt_fully_parallel_save', False)
                                                         else 'dp_zero_gather_scatter'),
                     }
                 if (
diff --git a/megatron/training/training.py b/megatron/training/training.py
index bc5fefa86ba..3b354581760 100644
--- a/megatron/training/training.py
+++ b/megatron/training/training.py
@@ -237,9 +237,6 @@ def hybrid_flops(batch_size, seq_len, hidden_size,
     def transformer_flops():
         """Calculate FLOPs for a standard Transformer model."""
         # TODO(helenn/dnarayanan): Refactor this to reuse the helper methods.
-        # Attention projection size.
-        query_projection_size = args.kv_channels * args.num_attention_heads
-        query_projection_to_hidden_size_ratio = query_projection_size / args.hidden_size
         # Group Query Attention.
         if not args.group_query_attention:
             args.num_query_groups = args.num_attention_heads
@@ -330,10 +327,9 @@ def transformer_flops():
                     + args.num_attention_heads * (args.qk_head_dim + args.qk_pos_emb_head_dim)
                     + 1
                 )
-            self_attn_term = (
+            standard_self_attn_term = (
                 3
                 * 2  # fwd(1) + bwd(2) *FMA
-                * num_layers
                 * (
                     ## q lora + rope + q norm
                     q_term
@@ -350,29 +346,98 @@ def transformer_flops():
                     ## core attn
                     + args.seq_length
                     * (args.num_attention_heads * (args.qk_head_dim + args.qk_pos_emb_head_dim))
-                    / 2
+                    / 2  # causal mask (only half of the mask is non-zero)
                     + args.seq_length * args.num_attention_heads * args.v_head_dim / 2
                 )
             )
 
         else:
             ## MHA or GQA
-            self_attn_term = (
-                expansion_factor
-                * num_layers
-                * args.hidden_size
-                * args.hidden_size
+            query_projection_size = args.kv_channels * args.num_attention_heads
+            key_projection_size = args.kv_channels * args.num_query_groups
+            value_projection_size = args.kv_channels * args.num_query_groups
+            standard_self_attn_term = (
+                3
+                * 2  # fwd(1) + bwd(2) *FMA
                 * (
-                    (
-                        1
-                        + (args.num_query_groups / args.num_attention_heads)
-                        # # Only half of the attention matrix is non-zero and needs to be multiplied with V.
-                        + (args.seq_length / args.hidden_size / 2)
-                    )
-                    * query_projection_to_hidden_size_ratio
+                    ## qkv proj
+                    args.hidden_size
+                    * (query_projection_size + key_projection_size + value_projection_size)
+                    ## core attention
+                    + query_projection_size
+                    * args.seq_length
+                    / 2  # causal mask (only half of the mask is non-zero)
+                    * 2  # QK^T and (QK^T)V
+                    ## out proj
+                    + query_projection_size
+                    * args.hidden_size
                 )
             )
 
+        if args.linear_attention_type is not None:
+            # Calculate number of dense and MoE Transformer MLPs.
+            if isinstance(args.linear_attention_freq, int):
+                linear_attention_pattern = [
+                    # [1,1,...,1,0,1,1,...,1,0,...]
+                    0 if ((i + 1) % args.linear_attention_freq == 0)
+                    else 1 for i in range(num_layers)
+                ]
+            elif isinstance(args.linear_attention_freq, list):
+                linear_attention_pattern = args.linear_attention_freq
+                assert len(linear_attention_pattern) == num_layers, (
+                    f"Invalid length of linear_attention_pattern: {len(linear_attention_pattern)}, "
+                    f"expected {num_layers}, "
+                    f"current linear attention pattern: {args.linear_attention_freq}"
+                )
+            elif args.linear_attention_freq is None:
+                linear_attention_pattern = [1] * num_layers
+            else:
+                raise ValueError(
+                    f"Invalid linear_attention_freq: {type(args.linear_attention_freq)},"
+                    f" {args.linear_attention_freq}"
+                )
+            num_linear_attention_layers = sum(linear_attention_pattern)
+            num_standard_attention_layers = num_layers - num_linear_attention_layers
+
+            if args.linear_attention_type == "gated_delta_net":
+                # Calculate the FLOPs for the gated delta net attention.
+                qk_head_dim = args.linear_key_head_dim
+                v_head_dim = args.linear_value_head_dim
+                num_qk_heads = args.linear_num_key_heads
+                num_v_heads = args.linear_num_value_heads
+                qk_dim = qk_head_dim * num_qk_heads
+                v_dim = v_head_dim * num_v_heads
+                linear_self_attn_term = (
+                    3
+                    * 2  # fwd(1) + bwd(2) *FMA
+                    * (
+                        ## in proj
+                        args.hidden_size
+                        * (2 * qk_dim + 2 * v_dim + 2 * num_v_heads)
+                        ## conv1d
+                        + args.linear_conv_kernel_dim
+                        * (2 * qk_dim + v_dim)
+                        ## gated delta rule
+                        + num_v_heads
+                        * (v_head_dim ** 2)
+                        * 4  # KK^T, VK^T, S(a(I-bKK^T)), and SQ
+                        ## out proj
+                        + args.hidden_size
+                        * v_dim
+                    )
+                )
+            else:
+                raise ValueError(f"Invalid linear_attention_type: {args.linear_attention_type}")
+        else:
+            num_linear_attention_layers = 0
+            linear_self_attn_term = 0
+            num_standard_attention_layers = num_layers
+
+        self_attn_term = (
+            linear_self_attn_term * num_linear_attention_layers
+            + standard_self_attn_term * num_standard_attention_layers
+        )
+
         total_floating_point_operations = (
             batch_size
             * args.seq_length
@@ -528,6 +593,30 @@ def reorder_inner_param_groups(optimizer_state_dict):
     return preprocessed_common_state_dict
 
 
+def get_no_wd_decay_cond(no_wd_decay_cond_type, default_skip_embedding_weight_decay):
+    """Get the no weight decay condition function."""
+
+    # Default case: no_wd_decay_cond_type is None
+    no_wd_decay_cond_fn = None
+
+    if no_wd_decay_cond_type == 'qwen3_next':
+        # Qwen3-Next applies weight decay to qk layernorm as a special case
+        def qwen3_next_no_wd_decay_cond(name, param):
+            if "q_layernorm" in name or "k_layernorm" in name:
+                no_wd = False
+            else:
+                no_wd = (
+                    name.endswith(".bias")
+                    or len(param.shape) == 1
+                    or (default_skip_embedding_weight_decay and "embedding" in name)
+                )
+            return no_wd
+        no_wd_decay_cond_fn = qwen3_next_no_wd_decay_cond
+    elif no_wd_decay_cond_type is not None:
+        raise ValueError(f"Invalid no_wd_decay_cond_type: {no_wd_decay_cond_type}")
+
+    return no_wd_decay_cond_fn
+
 def pretrain(
     train_valid_test_dataset_provider,
     model_provider,
@@ -664,8 +753,15 @@ def pretrain(
 
     # Model, optimizer, and learning rate.
     timers('model-and-optimizer-setup', log_level=0).start(barrier=True)
+    no_wd_decay_cond = get_no_wd_decay_cond(
+        args.no_weight_decay_cond_type,
+        default_skip_embedding_weight_decay=args.embedding_init_method_std is not None,
+    )
     model, optimizer, opt_param_scheduler = setup_model_and_optimizer(
-        model_provider, model_type, checkpointing_context=checkpointing_context
+        model_provider,
+        model_type,
+        checkpointing_context=checkpointing_context,
+        no_wd_decay_cond=no_wd_decay_cond,
     )
 
     timers('model-and-optimizer-setup').stop()
diff --git a/megatron/training/utils.py b/megatron/training/utils.py
index cef71160791..ee46991bce5 100644
--- a/megatron/training/utils.py
+++ b/megatron/training/utils.py
@@ -38,6 +38,7 @@
 from megatron.core.utils import (
     get_batch_on_this_cp_rank,
     get_data_parallel_group_if_dtensor,
+    is_torch_min_version,
     to_local_if_dtensor,
     unwrap_model,
 )
@@ -271,6 +272,9 @@ def report_memory(name):
     string += ' | max allocated: {}'.format(torch.cuda.max_memory_allocated() / mega_bytes)
     string += ' | reserved: {}'.format(torch.cuda.memory_reserved() / mega_bytes)
     string += ' | max reserved: {}'.format(torch.cuda.max_memory_reserved() / mega_bytes)
+    if is_torch_min_version("2.6.0"):
+        # device usage is not supported in torch < 2.6.0
+        string += ' | device usage: {}'.format(torch.cuda.device_memory_used() / mega_bytes)
     if mpu.get_data_parallel_rank() == 0:
         print("[Rank {}] {}".format(torch.distributed.get_rank(), string), flush=True)
 
diff --git a/pyproject.toml b/pyproject.toml
index 3362a0181c1..0a0fb9993f5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -76,6 +76,7 @@ dev = [
     "setuptools<80.0.0",
     "mamba-ssm~=2.2",
     "causal-conv1d~=1.5",
+    "flash-linear-attention~=0.3.2",
     "nv-grouped-gemm~=1.1",
     "transformer-engine[pytorch]>=2.6.0a0,<2.8.0",
     "nvidia-resiliency-ext>=0.4.0a0,<0.5.0",
diff --git a/tests/unit_tests/ssm/test_gated_delta_net.py b/tests/unit_tests/ssm/test_gated_delta_net.py
new file mode 100644
index 00000000000..dbf8d203634
--- /dev/null
+++ b/tests/unit_tests/ssm/test_gated_delta_net.py
@@ -0,0 +1,319 @@
+# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+
+from functools import partial
+from unittest import mock
+
+import pytest
+import torch
+import torch.nn.functional as F
+
+from megatron.core import parallel_state
+from megatron.core.models.common.embeddings.rope_utils import (
+    get_pos_emb_on_this_cp_rank as get_tensor_on_this_cp_rank,
+)
+from megatron.core.models.gpt.gpt_layer_specs import get_gpt_layer_with_transformer_engine_spec
+from megatron.core.models.gpt.gpt_model import GPTModel
+from megatron.core.process_groups_config import ProcessGroupCollection
+from megatron.core.ssm.gated_delta_net import GatedDeltaNet
+from megatron.core.tensor_parallel.random import model_parallel_cuda_manual_seed
+from megatron.core.transformer import TransformerConfig
+from megatron.training.arguments import parse_args
+from megatron.training.checkpointing import load_checkpoint, save_checkpoint
+from megatron.training.global_vars import set_args
+from megatron.training.training import get_model
+from megatron.training.utils import unwrap_model
+from tests.unit_tests.dist_checkpointing import (
+    TempNamedDir,
+    init_basic_mock_args,
+    init_checkpointing_mock_args,
+)
+from tests.unit_tests.test_utilities import Utils
+
+try:
+    import fla
+
+    HAVE_FLA = True
+except ImportError:
+    HAVE_FLA = False
+
+
+@pytest.mark.parametrize(
+    ("tp_size", "sp", "cp_size"),
+    [
+        (1, False, 1),
+        (2, False, 1),
+        (2, True, 1),
+        # GDN does not support CP for now. Leave it for future work.
+    ],
+)
+@pytest.mark.skipif(not HAVE_FLA, reason="FLA is not installed.")
+@pytest.mark.internal
+class TestGatedDeltaNet:
+
+    @pytest.fixture(scope='function', autouse=True)
+    def setup_method(self, tp_size, sp, cp_size):
+        # Initialize parallel and random seed
+        Utils.initialize_model_parallel(
+            tensor_model_parallel_size=tp_size,
+            pipeline_model_parallel_size=1,
+            context_parallel_size=cp_size,
+        )
+        model_parallel_cuda_manual_seed(123)
+        self.tp_size = tp_size
+        self.cp_size = cp_size
+        self.sp_size = tp_size if sp else 1
+
+        # Get TP and CP process groups from device mesh
+        tp_group = parallel_state.get_tensor_model_parallel_group()
+        cp_group = parallel_state.get_context_parallel_group()
+        pg_collection = ProcessGroupCollection(tp=tp_group, cp=cp_group)
+
+        # Initialize model
+        self.transformer_config = TransformerConfig(
+            hidden_size=256,
+            linear_conv_kernel_dim=2,
+            linear_key_head_dim=64,
+            linear_value_head_dim=64,
+            linear_num_key_heads=4,
+            linear_num_value_heads=8,
+            num_layers=1,
+            normalization="RMSNorm",
+            use_cpu_initialization=True,
+            layernorm_zero_centered_gamma=True,
+            num_attention_heads=8,
+            activation_func=F.silu,
+            bf16=True,
+            tensor_model_parallel_size=tp_size,
+            sequence_parallel=sp,
+            context_parallel_size=cp_size,
+        )
+        gdn_submodules = get_gpt_layer_with_transformer_engine_spec(
+            linear_attention_type="gated_delta_net", normalization="RMSNorm"
+        ).submodules.self_attention.submodules
+
+        self.gdn = GatedDeltaNet(
+            self.transformer_config,
+            submodules=gdn_submodules,
+            layer_number=1,
+            bias=False,
+            conv_bias=False,
+            conv_init=1.0,
+            use_qk_l2norm=True,
+            A_init_range=(1, 16),
+            pg_collection=pg_collection,
+        )
+        self.gdn = self.gdn.cuda().bfloat16()
+
+    def teardown_method(self):
+        Utils.destroy_model_parallel()
+
+    def test_gpu_forward(self):
+        gdn = self.gdn
+
+        micro_batch_size = 2
+        seq_length = 64
+        hidden_states = torch.ones(
+            (seq_length // self.sp_size // self.cp_size, micro_batch_size, gdn.config.hidden_size),
+            device=torch.cuda.current_device(),
+            dtype=torch.bfloat16,
+        )
+        attention_mask = None
+
+        output, bias = gdn(hidden_states, attention_mask)
+
+        assert output.dim() == 3, f"Output too many dimensions ({output.shape=})"
+        assert output.shape[0] == seq_length // self.sp_size // self.cp_size, (
+            f"Output shape {output.shape[0]=} mismatch with "
+            f" {seq_length=} // {self.sp_size=} // {self.cp_size=}."
+        )
+        assert (
+            output.shape[1] == micro_batch_size
+        ), f"Output shape {output.shape[1]=} mismatch with {micro_batch_size=}"
+        assert (
+            output.shape[2] == gdn.config.hidden_size
+        ), f"Output shape {output.shape[2]=} mismatch with {gdn.config.hidden_size=}"
+        assert (
+            output.dtype == hidden_states.dtype
+        ), f"Output dtype {output.dtype=} mismatch with {hidden_states.dtype=}"
+
+
+@pytest.mark.parametrize(
+    ("tp", "sp", "cp"),
+    [
+        (4, False, 1),  # TP w/o SP
+        (4, True, 1),  # TP w/ SP
+        # CP does not support GDN for now. Add it once it is supported.
+    ],
+)
+@pytest.mark.skipif(not HAVE_FLA, reason="FLA is not installed.")
+def test_parallel_gated_delta_net_correctness(tmp_path_dist_ckpt, tp, sp, cp):
+    # Constants
+    seed = 123
+    sequence_length = 256
+    micro_batch_size = 4
+    hidden_size = 128
+    normalization = "RMSNorm"
+
+    # Model initialization function
+    def initialize_gpt_model(config, pre_process=True, post_process=True, vp_stage=None):
+        layer_spec = get_gpt_layer_with_transformer_engine_spec(
+            linear_attention_type="gated_delta_net", normalization=normalization
+        )
+        gpt_model = GPTModel(
+            config=config,
+            transformer_layer_spec=layer_spec,
+            vocab_size=128,
+            max_sequence_length=sequence_length,
+            pre_process=pre_process,
+            post_process=post_process,
+            vp_stage=vp_stage,
+        )
+        return gpt_model
+
+    # Initialize baseline parallel state
+    Utils.initialize_model_parallel(
+        tensor_model_parallel_size=1, pipeline_model_parallel_size=1, context_parallel_size=1
+    )
+
+    # Initialize input hidden states
+    torch.manual_seed(seed)
+    model_parallel_cuda_manual_seed(seed)
+    input_hidden_states = (
+        torch.rand((sequence_length, micro_batch_size, hidden_size))
+        .cuda()
+        .bfloat16()
+        .requires_grad_(True)
+    )
+
+    # Initialize transformer config
+    transformer_config = TransformerConfig(
+        hidden_size=128,
+        linear_conv_kernel_dim=2,
+        linear_key_head_dim=32,
+        linear_value_head_dim=32,
+        linear_num_key_heads=4,
+        linear_num_value_heads=8,
+        num_layers=1,
+        normalization=normalization,
+        use_cpu_initialization=True,
+        layernorm_zero_centered_gamma=True,
+        num_attention_heads=8,
+        activation_func=F.silu,
+        bf16=True,
+    )
+
+    with TempNamedDir(tmp_path_dist_ckpt / 'test_parallel_gdn', sync=True) as ckpt_dir:
+        # Set argument
+        mock_args = parse_args(ignore_unknown_args=True)
+        set_args(mock_args)
+
+        # Initialize baseline model
+        init_basic_mock_args(mock_args, 1, 1, bf16=True)
+        mock_args.context_parallel_size = 1
+        mock_args.sequence_parallel = 1
+        gpt_model = unwrap_model(
+            get_model(partial(initialize_gpt_model, config=transformer_config))
+        )
+
+        # Initialize args and save checkpoint
+        init_checkpointing_mock_args(mock_args, ckpt_dir, False)
+        mock_args.no_save_optim = True
+        mock_args.no_save_rng = True
+        mock_args.no_load_optim = True
+        mock_args.no_load_rng = True
+        save_checkpoint(10, gpt_model, None, None, 0)
+
+        # Calculate baseline output
+        attention = gpt_model[0].decoder.layers[0].self_attention
+        output_hidden_states_baseline, bias_hidden_states_baseline = attention(
+            input_hidden_states, attention_mask=None
+        )
+        output_hidden_states_baseline.sum().backward()
+
+        # Save baseline output
+        input_grad_baseline = input_hidden_states.grad.detach()
+        output_hidden_states_baseline = output_hidden_states_baseline.detach()
+
+        # Initialize parallel model
+        Utils.destroy_model_parallel()
+        Utils.initialize_model_parallel(
+            tensor_model_parallel_size=tp, pipeline_model_parallel_size=1, context_parallel_size=cp
+        )
+        torch.manual_seed(seed)
+        model_parallel_cuda_manual_seed(seed)
+        transformer_config.context_parallel_size = cp
+        transformer_config.tensor_model_parallel_size = tp
+        transformer_config.sequence_parallel = sp
+        init_basic_mock_args(mock_args, tp, 1, bf16=True)
+        mock_args.context_parallel_size = cp
+        mock_args.sequence_parallel = sp
+        gpt_model = unwrap_model(
+            get_model(partial(initialize_gpt_model, config=transformer_config))
+        )
+        with mock.patch('megatron.training.checkpointing.check_checkpoint_args'):
+            with mock.patch('megatron.training.checkpointing.update_num_microbatches'):
+                load_checkpoint(gpt_model, None, None)
+
+        # Function to get tensor on this tp and cp rank
+        cp_group = parallel_state.get_context_parallel_group()
+        tp_rank = parallel_state.get_tensor_model_parallel_rank()
+
+        def get_tensor_on_this_rank(tensor):
+            if cp > 1:
+                tensor = get_tensor_on_this_cp_rank(tensor, 0, cp_group)
+            if tp > 1 and sp:
+                sp_seg = sequence_length // tp // cp
+                tensor = tensor[tp_rank * sp_seg : (tp_rank + 1) * sp_seg]
+            return tensor
+
+        # Calculate parallel model output
+        input_hidden_states = get_tensor_on_this_rank(input_hidden_states)
+        input_hidden_states = input_hidden_states.detach().requires_grad_(True)
+        parallel_attention = gpt_model[0].decoder.layers[0].self_attention
+        output_hidden_states_parallel, bias_hidden_states_parallel = parallel_attention(
+            input_hidden_states, attention_mask=None
+        )
+        output_hidden_states_parallel.sum().backward()
+        input_grad_parallel = input_hidden_states.grad.detach()
+
+        # Check if the output is the same
+        if cp:
+            atol, rtol = 5e-3, 5e-3
+        else:
+            atol, rtol = 5e-4, 5e-4
+        output_hidden_states_baseline = get_tensor_on_this_rank(output_hidden_states_baseline)
+        input_grad_baseline = get_tensor_on_this_rank(input_grad_baseline)
+
+        assert torch.all(
+            ~torch.isnan(output_hidden_states_baseline)
+        ), "output_hidden_states_baseline contains nan"
+        assert torch.all(
+            ~torch.isinf(output_hidden_states_baseline)
+        ), "output_hidden_states_baseline contains inf"
+        assert torch.all(~torch.isnan(input_grad_baseline)), "input_grad_baseline contains nan"
+        assert torch.all(~torch.isinf(input_grad_baseline)), "input_grad_baseline contains inf"
+        assert torch.all(
+            ~torch.isnan(output_hidden_states_parallel)
+        ), "output_hidden_states_parallel contains nan"
+        assert torch.all(
+            ~torch.isinf(output_hidden_states_parallel)
+        ), "output_hidden_states_parallel contains inf"
+        assert torch.all(~torch.isnan(input_grad_parallel)), "input_grad_parallel contains nan"
+        assert torch.all(~torch.isinf(input_grad_parallel)), "input_grad_parallel contains inf"
+
+        torch.testing.assert_close(
+            output_hidden_states_baseline,
+            output_hidden_states_parallel,
+            atol=atol,
+            rtol=rtol,
+            msg=lambda msg: f"Mismatch in output_hidden_states: {msg}",
+        )
+        torch.testing.assert_close(
+            input_grad_baseline,
+            input_grad_parallel,
+            atol=atol,
+            rtol=rtol,
+            msg=lambda msg: f"Mismatch in input_grad: {msg}",
+        )
+
+        Utils.destroy_model_parallel()
diff --git a/tests/unit_tests/transformer/moe/test_shared_experts.py b/tests/unit_tests/transformer/moe/test_shared_experts.py
index f721c482937..6df4d2fd369 100644
--- a/tests/unit_tests/transformer/moe/test_shared_experts.py
+++ b/tests/unit_tests/transformer/moe/test_shared_experts.py
@@ -20,7 +20,8 @@ def teardown_method(self, method):
 
     @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
     @pytest.mark.internal
-    def test_gpu_forward(self):
+    @pytest.mark.parametrize("shared_expert_gate", [False, True])
+    def test_gpu_forward(self, shared_expert_gate):
         Utils.initialize_model_parallel(1, 1)
         model_parallel_cuda_manual_seed(123)
         print("done intializing")
@@ -38,6 +39,7 @@ def test_gpu_forward(self):
             moe_router_load_balancing_type="sinkhorn",
             moe_router_topk=1,
             add_bias_linear=False,
+            moe_shared_expert_gate=shared_expert_gate,
         )
         transformer_layer_spec = get_gpt_layer_local_spec(
             num_experts=num_moe_experts, moe_grouped_gemm=False
@@ -49,7 +51,10 @@ def test_gpu_forward(self):
         assert isinstance(self.moe_layer, MoELayer)
 
         num_weights = sum([p.numel() for p in self.moe_layer.parameters()])
-        assert num_weights == 3480 + 1152
+        if shared_expert_gate:
+            assert num_weights == 3480 + 1152 + 12  # 12 is the weight of the gate
+        else:
+            assert num_weights == 3480 + 1152
         assert self.moe_layer.shared_experts is not None
         assert self.moe_layer.shared_experts.stream is None
         assert self.moe_layer.token_dispatcher.shared_experts is None
diff --git a/tests/unit_tests/transformer/test_attention.py b/tests/unit_tests/transformer/test_attention.py
index 7e0e8c55807..419fc17ca0a 100644
--- a/tests/unit_tests/transformer/test_attention.py
+++ b/tests/unit_tests/transformer/test_attention.py
@@ -25,9 +25,11 @@
     HAVE_FUSED_QKV_ROPE = False
 
 
+@pytest.mark.parametrize("output_gate", [False, True])
 class TestParallelAttention:
 
-    def setup_method(self, method):
+    @pytest.fixture(scope='function', autouse=True)
+    def setup_method(self, output_gate):
         Utils.initialize_model_parallel(1, 1)
         model_parallel_cuda_manual_seed(123)
         self.transformer_config = TransformerConfig(
@@ -37,6 +39,7 @@ def setup_method(self, method):
             use_cpu_initialization=True,
             bf16=True,
             params_dtype=torch.bfloat16,
+            attention_output_gate=output_gate,
         )
         self.parallel_attention = SelfAttention(
             self.transformer_config,
@@ -44,7 +47,7 @@ def setup_method(self, method):
             layer_number=1,
         )
 
-    def teardown_method(self, method):
+    def teardown_method(self):
         Utils.destroy_model_parallel()
 
     def test_constructor(self):
@@ -52,7 +55,10 @@ def test_constructor(self):
         assert self.parallel_attention.layer_number == 1
 
         num_weights = sum([p.numel() for p in self.parallel_attention.parameters()])
-        assert num_weights == 66304
+        if self.transformer_config.attention_output_gate:
+            assert num_weights == 82816
+        else:
+            assert num_weights == 66304
 
     def test_cpu_forward(self):
         # we can't currently do this because the global memory buffer is on GPU
@@ -157,12 +163,15 @@ def test_checkpointed_gpu_forward(self):
         assert bias.shape[0] == config.hidden_size
 
 
+@pytest.mark.parametrize("output_gate", [False, True])
 class TestSelfAttention:
 
-    def setup_method(self, method):
+    @pytest.fixture(scope='function', autouse=True)
+    def setup_method(self, output_gate):
+        self.output_gate = output_gate
         Utils.destroy_model_parallel()
 
-    def teardown_method(self, method):
+    def teardown_method(self):
         Utils.destroy_model_parallel()
 
     def run_self_attention(self, pg_collection):
@@ -171,6 +180,7 @@ def run_self_attention(self, pg_collection):
             num_layers=2,
             hidden_size=128,
             num_attention_heads=4,
+            attention_output_gate=self.output_gate,
             tensor_model_parallel_size=tensor_model_parallel_size,
             use_cpu_initialization=False,
         )
diff --git a/uv.lock b/uv.lock
index 84da2bd685a..9634d2cbf88 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1,5 +1,5 @@
 version = 1
-revision = 2
+revision = 3
 requires-python = ">=3.10"
 resolution-markers = [
     "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
@@ -631,7 +631,7 @@ name = "cffi"
 version = "2.0.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "pycparser", marker = "implementation_name != 'PyPy'" },
+    { name = "pycparser", marker = "(python_full_version < '3.12' and implementation_name != 'PyPy') or (python_full_version == '3.12.*' and implementation_name != 'PyPy' and extra == 'extra-13-megatron-core-dev') or (python_full_version == '3.12.*' and implementation_name != 'PyPy' and extra == 'extra-13-megatron-core-lts') or (python_full_version >= '3.13' and implementation_name != 'PyPy' and extra == 'extra-13-megatron-core-dev') or (implementation_name != 'PyPy' and platform_python_implementation != 'PyPy') or (implementation_name == 'PyPy' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/eb/56/b1ba7935a17738ae8453301356628e8147c79dbb825bcbc73dc7401f9846/cffi-2.0.0.tar.gz", hash = "sha256:44d1b5909021139fe36001ae048dbdde8214afa20200eda0f64c068cac5d5529", size = 523588, upload-time = "2025-09-08T23:24:04.541Z" }
 wheels = [
@@ -777,7 +777,7 @@ name = "click"
 version = "8.3.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "colorama", marker = "sys_platform == 'win32'" },
+    { name = "colorama", marker = "sys_platform == 'win32' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/46/61/de6cd827efad202d7057d93e0fed9294b96952e188f7384832791c7b2254/click-8.3.0.tar.gz", hash = "sha256:e7b8232224eba16f4ebe410c25ced9f7875cb5f3263ffc93cc3e8da705e229c4", size = 276943, upload-time = "2025-09-18T17:32:23.696Z" }
 wheels = [
@@ -1080,6 +1080,30 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/7c/24/f7351052cf9db771fe4f32fca47fd66e6d9b53d8613b17faf7d130a9d553/cython-3.1.4-py3-none-any.whl", hash = "sha256:d194d95e4fa029a3f6c7d46bdd16d973808c7ea4797586911fdb67cb98b1a2c6", size = 1227541, upload-time = "2025-09-16T07:20:29.595Z" },
 ]
 
+[[package]]
+name = "datasets"
+version = "4.1.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "dill" },
+    { name = "filelock" },
+    { name = "fsspec", extra = ["http"], marker = "extra == 'extra-13-megatron-core-dev'" },
+    { name = "huggingface-hub" },
+    { name = "multiprocess" },
+    { name = "numpy" },
+    { name = "packaging" },
+    { name = "pandas" },
+    { name = "pyarrow" },
+    { name = "pyyaml" },
+    { name = "requests" },
+    { name = "tqdm" },
+    { name = "xxhash" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/91/a4/73f8e6ef52c535e1d20d5b2ca83bfe6de399d8b8b8a61ccc8d63d60735aa/datasets-4.1.1.tar.gz", hash = "sha256:7d8d5ba8b12861d2c44bfff9c83484ebfafff1ff553371e5901a8d3aab5450e2", size = 579324, upload-time = "2025-09-18T13:14:27.108Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f4/c8/09012ac195a0aab58755800d2efdc0e7d5905053509f12cb5d136c911cda/datasets-4.1.1-py3-none-any.whl", hash = "sha256:62e4f6899a36be9ec74a7e759a6951253cc85b3fcfa0a759b0efa8353b149dac", size = 503623, upload-time = "2025-09-18T13:14:25.111Z" },
+]
+
 [[package]]
 name = "decorator"
 version = "5.2.1"
@@ -1274,6 +1298,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/18/79/1b8fa1bb3568781e84c9200f951c735f3f157429f44be0495da55894d620/filetype-1.2.0-py2.py3-none-any.whl", hash = "sha256:7ce71b6880181241cf7ac8697a2f1eb6a8bd9b429f7ad6d27b8db9ba5f1c2d25", size = 19970, upload-time = "2022-11-02T17:34:01.425Z" },
 ]
 
+[[package]]
+name = "fla-core"
+version = "0.3.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "einops" },
+    { name = "torch", marker = "sys_platform == 'never'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/67/c6/10a1149b07e6bab45b2cb2d07f6b827716c2baf5f3404161753f25c6389b/fla_core-0.3.2.tar.gz", hash = "sha256:d38db16bc4e1c6fa8c04df442f246da1e6926a209426bc6ef703d41bfbc37c92", size = 296725, upload-time = "2025-09-10T07:43:40.155Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/7e/f5/74947b33c07682280e65adbdf17c4ee94b30232df2f728bafecf13d1d820/fla_core-0.3.2-py3-none-any.whl", hash = "sha256:e751d5a41e33eee721a6fb6588bd857f6f36e0d14719a23b1ebdbd617d307209", size = 413594, upload-time = "2025-09-10T07:43:37.786Z" },
+]
+
 [[package]]
 name = "flake8"
 version = "7.1.0"
@@ -1288,6 +1325,21 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/dc/43/d5147aadaa52558e94e024811f2f9543b4bd7203b3a9659eeb5dff9c61b3/flake8-7.1.0-py2.py3-none-any.whl", hash = "sha256:2e416edcc62471a64cea09353f4e7bdba32aeb079b6e360554c659a122b1bc6a", size = 57569, upload-time = "2024-06-15T21:37:05.342Z" },
 ]
 
+[[package]]
+name = "flash-linear-attention"
+version = "0.3.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "datasets" },
+    { name = "fla-core" },
+    { name = "pytest" },
+    { name = "transformers" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/84/f6/e62c1e562a288557eba7f06f168a7615813d1a227327b8beb8ba426da2c5/flash_linear_attention-0.3.2.tar.gz", hash = "sha256:9147747316c2951fed4ebeb4fa87977c05d807dc70c93b46250b68a6eb1183e2", size = 150880, upload-time = "2025-09-10T07:43:41.37Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a0/d0/35ce9eac5f52c72005095aaa12a393d2656ed7ffedf925b2381a6b76d10c/flash_linear_attention-0.3.2-py3-none-any.whl", hash = "sha256:604e73361437ba786420ab195e2caa3fd19280503761e703fa353c5ce5c65376", size = 274592, upload-time = "2025-09-10T07:43:39.107Z" },
+]
+
 [[package]]
 name = "flash-mla"
 version = "1.0.0+9edee0c"
@@ -1474,6 +1526,11 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/47/71/70db47e4f6ce3e5c37a607355f80da8860a33226be640226ac52cb05ef2e/fsspec-2025.9.0-py3-none-any.whl", hash = "sha256:530dc2a2af60a414a832059574df4a6e10cce927f6f4a78209390fe38955cfb7", size = 199289, upload-time = "2025-09-02T19:10:47.708Z" },
 ]
 
+[package.optional-dependencies]
+http = [
+    { name = "aiohttp" },
+]
+
 [[package]]
 name = "gitdb"
 version = "4.0.12"
@@ -1671,7 +1728,7 @@ source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "filelock" },
     { name = "fsspec" },
-    { name = "hf-xet", marker = "platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'" },
+    { name = "hf-xet", marker = "platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "packaging" },
     { name = "pyyaml" },
     { name = "requests" },
@@ -2176,6 +2233,7 @@ dev = [
     { name = "av" },
     { name = "causal-conv1d" },
     { name = "einops" },
+    { name = "flash-linear-attention" },
     { name = "flashinfer-python" },
     { name = "mamba-ssm" },
     { name = "megatron-energon", extra = ["av-decode"], marker = "extra == 'extra-13-megatron-core-dev'" },
@@ -2272,6 +2330,7 @@ requires-dist = [
     { name = "causal-conv1d", marker = "extra == 'dev'", specifier = "~=1.5" },
     { name = "einops", marker = "extra == 'dev'", specifier = "~=0.8" },
     { name = "einops", marker = "extra == 'lts'" },
+    { name = "flash-linear-attention", marker = "extra == 'dev'", specifier = "~=0.3.2" },
     { name = "flashinfer-python", marker = "extra == 'dev'" },
     { name = "flask-restful", marker = "extra == 'mlm'" },
     { name = "mamba-ssm", marker = "extra == 'dev'", specifier = "~=2.2" },
@@ -2659,6 +2718,24 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/fd/69/b547032297c7e63ba2af494edba695d781af8a0c6e89e4d06cf848b21d80/multidict-6.6.4-py3-none-any.whl", hash = "sha256:27d8f8e125c07cb954e54d75d04905a9bba8a439c1d84aca94949d4d03d8601c", size = 12313, upload-time = "2025-08-11T12:08:46.891Z" },
 ]
 
+[[package]]
+name = "multiprocess"
+version = "0.70.16"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "dill" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/b5/ae/04f39c5d0d0def03247c2893d6f2b83c136bf3320a2154d7b8858f2ba72d/multiprocess-0.70.16.tar.gz", hash = "sha256:161af703d4652a0e1410be6abccecde4a7ddffd19341be0a7011b94aeb171ac1", size = 1772603, upload-time = "2024-01-28T18:52:34.85Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ef/76/6e712a2623d146d314f17598df5de7224c85c0060ef63fd95cc15a25b3fa/multiprocess-0.70.16-pp310-pypy310_pp73-macosx_10_13_x86_64.whl", hash = "sha256:476887be10e2f59ff183c006af746cb6f1fd0eadcfd4ef49e605cbe2659920ee", size = 134980, upload-time = "2024-01-28T18:52:15.731Z" },
+    { url = "https://files.pythonhosted.org/packages/0f/ab/1e6e8009e380e22254ff539ebe117861e5bdb3bff1fc977920972237c6c7/multiprocess-0.70.16-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:d951bed82c8f73929ac82c61f01a7b5ce8f3e5ef40f5b52553b4f547ce2b08ec", size = 134982, upload-time = "2024-01-28T18:52:17.783Z" },
+    { url = "https://files.pythonhosted.org/packages/bc/f7/7ec7fddc92e50714ea3745631f79bd9c96424cb2702632521028e57d3a36/multiprocess-0.70.16-py310-none-any.whl", hash = "sha256:c4a9944c67bd49f823687463660a2d6daae94c289adff97e0f9d696ba6371d02", size = 134824, upload-time = "2024-01-28T18:52:26.062Z" },
+    { url = "https://files.pythonhosted.org/packages/50/15/b56e50e8debaf439f44befec5b2af11db85f6e0f344c3113ae0be0593a91/multiprocess-0.70.16-py311-none-any.whl", hash = "sha256:af4cabb0dac72abfb1e794fa7855c325fd2b55a10a44628a3c1ad3311c04127a", size = 143519, upload-time = "2024-01-28T18:52:28.115Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/7d/a988f258104dcd2ccf1ed40fdc97e26c4ac351eeaf81d76e266c52d84e2f/multiprocess-0.70.16-py312-none-any.whl", hash = "sha256:fc0544c531920dde3b00c29863377f87e1632601092ea2daca74e4beb40faa2e", size = 146741, upload-time = "2024-01-28T18:52:29.395Z" },
+    { url = "https://files.pythonhosted.org/packages/ea/89/38df130f2c799090c978b366cfdf5b96d08de5b29a4a293df7f7429fa50b/multiprocess-0.70.16-py38-none-any.whl", hash = "sha256:a71d82033454891091a226dfc319d0cfa8019a4e888ef9ca910372a446de4435", size = 132628, upload-time = "2024-01-28T18:52:30.853Z" },
+    { url = "https://files.pythonhosted.org/packages/da/d9/f7f9379981e39b8c2511c9e0326d212accacb82f12fbfdc1aa2ce2a7b2b6/multiprocess-0.70.16-py39-none-any.whl", hash = "sha256:a0bafd3ae1b732eac64be2e72038231c1ba97724b60b09400d68f229fcc2fbf3", size = 133351, upload-time = "2024-01-28T18:52:31.981Z" },
+]
+
 [[package]]
 name = "mypy-extensions"
 version = "1.1.0"
@@ -3575,6 +3652,49 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/99/6c/64cafaceea3f99927e84b38a362ec6a8f24f33061c90bda77dfe1cd4c3c6/pulp-3.3.0-py3-none-any.whl", hash = "sha256:dd6ad2d63f196d1254eddf9dcff5cd224912c1f046120cb7c143c5b0eda63fae", size = 16387700, upload-time = "2025-09-18T08:14:53.368Z" },
 ]
 
+[[package]]
+name = "pyarrow"
+version = "21.0.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/ef/c2/ea068b8f00905c06329a3dfcd40d0fcc2b7d0f2e355bdb25b65e0a0e4cd4/pyarrow-21.0.0.tar.gz", hash = "sha256:5051f2dccf0e283ff56335760cbc8622cf52264d67e359d5569541ac11b6d5bc", size = 1133487, upload-time = "2025-07-18T00:57:31.761Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/17/d9/110de31880016e2afc52d8580b397dbe47615defbf09ca8cf55f56c62165/pyarrow-21.0.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:e563271e2c5ff4d4a4cbeb2c83d5cf0d4938b891518e676025f7268c6fe5fe26", size = 31196837, upload-time = "2025-07-18T00:54:34.755Z" },
+    { url = "https://files.pythonhosted.org/packages/df/5f/c1c1997613abf24fceb087e79432d24c19bc6f7259cab57c2c8e5e545fab/pyarrow-21.0.0-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:fee33b0ca46f4c85443d6c450357101e47d53e6c3f008d658c27a2d020d44c79", size = 32659470, upload-time = "2025-07-18T00:54:38.329Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/ed/b1589a777816ee33ba123ba1e4f8f02243a844fed0deec97bde9fb21a5cf/pyarrow-21.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:7be45519b830f7c24b21d630a31d48bcebfd5d4d7f9d3bdb49da9cdf6d764edb", size = 41055619, upload-time = "2025-07-18T00:54:42.172Z" },
+    { url = "https://files.pythonhosted.org/packages/44/28/b6672962639e85dc0ac36f71ab3a8f5f38e01b51343d7aa372a6b56fa3f3/pyarrow-21.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:26bfd95f6bff443ceae63c65dc7e048670b7e98bc892210acba7e4995d3d4b51", size = 42733488, upload-time = "2025-07-18T00:54:47.132Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/cc/de02c3614874b9089c94eac093f90ca5dfa6d5afe45de3ba847fd950fdf1/pyarrow-21.0.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:bd04ec08f7f8bd113c55868bd3fc442a9db67c27af098c5f814a3091e71cc61a", size = 43329159, upload-time = "2025-07-18T00:54:51.686Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/3e/99473332ac40278f196e105ce30b79ab8affab12f6194802f2593d6b0be2/pyarrow-21.0.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:9b0b14b49ac10654332a805aedfc0147fb3469cbf8ea951b3d040dab12372594", size = 45050567, upload-time = "2025-07-18T00:54:56.679Z" },
+    { url = "https://files.pythonhosted.org/packages/7b/f5/c372ef60593d713e8bfbb7e0c743501605f0ad00719146dc075faf11172b/pyarrow-21.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:9d9f8bcb4c3be7738add259738abdeddc363de1b80e3310e04067aa1ca596634", size = 26217959, upload-time = "2025-07-18T00:55:00.482Z" },
+    { url = "https://files.pythonhosted.org/packages/94/dc/80564a3071a57c20b7c32575e4a0120e8a330ef487c319b122942d665960/pyarrow-21.0.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:c077f48aab61738c237802836fc3844f85409a46015635198761b0d6a688f87b", size = 31243234, upload-time = "2025-07-18T00:55:03.812Z" },
+    { url = "https://files.pythonhosted.org/packages/ea/cc/3b51cb2db26fe535d14f74cab4c79b191ed9a8cd4cbba45e2379b5ca2746/pyarrow-21.0.0-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:689f448066781856237eca8d1975b98cace19b8dd2ab6145bf49475478bcaa10", size = 32714370, upload-time = "2025-07-18T00:55:07.495Z" },
+    { url = "https://files.pythonhosted.org/packages/24/11/a4431f36d5ad7d83b87146f515c063e4d07ef0b7240876ddb885e6b44f2e/pyarrow-21.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:479ee41399fcddc46159a551705b89c05f11e8b8cb8e968f7fec64f62d91985e", size = 41135424, upload-time = "2025-07-18T00:55:11.461Z" },
+    { url = "https://files.pythonhosted.org/packages/74/dc/035d54638fc5d2971cbf1e987ccd45f1091c83bcf747281cf6cc25e72c88/pyarrow-21.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:40ebfcb54a4f11bcde86bc586cbd0272bac0d516cfa539c799c2453768477569", size = 42823810, upload-time = "2025-07-18T00:55:16.301Z" },
+    { url = "https://files.pythonhosted.org/packages/2e/3b/89fced102448a9e3e0d4dded1f37fa3ce4700f02cdb8665457fcc8015f5b/pyarrow-21.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8d58d8497814274d3d20214fbb24abcad2f7e351474357d552a8d53bce70c70e", size = 43391538, upload-time = "2025-07-18T00:55:23.82Z" },
+    { url = "https://files.pythonhosted.org/packages/fb/bb/ea7f1bd08978d39debd3b23611c293f64a642557e8141c80635d501e6d53/pyarrow-21.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:585e7224f21124dd57836b1530ac8f2df2afc43c861d7bf3d58a4870c42ae36c", size = 45120056, upload-time = "2025-07-18T00:55:28.231Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/0b/77ea0600009842b30ceebc3337639a7380cd946061b620ac1a2f3cb541e2/pyarrow-21.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:555ca6935b2cbca2c0e932bedd853e9bc523098c39636de9ad4693b5b1df86d6", size = 26220568, upload-time = "2025-07-18T00:55:32.122Z" },
+    { url = "https://files.pythonhosted.org/packages/ca/d4/d4f817b21aacc30195cf6a46ba041dd1be827efa4a623cc8bf39a1c2a0c0/pyarrow-21.0.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:3a302f0e0963db37e0a24a70c56cf91a4faa0bca51c23812279ca2e23481fccd", size = 31160305, upload-time = "2025-07-18T00:55:35.373Z" },
+    { url = "https://files.pythonhosted.org/packages/a2/9c/dcd38ce6e4b4d9a19e1d36914cb8e2b1da4e6003dd075474c4cfcdfe0601/pyarrow-21.0.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:b6b27cf01e243871390474a211a7922bfbe3bda21e39bc9160daf0da3fe48876", size = 32684264, upload-time = "2025-07-18T00:55:39.303Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/74/2a2d9f8d7a59b639523454bec12dba35ae3d0a07d8ab529dc0809f74b23c/pyarrow-21.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:e72a8ec6b868e258a2cd2672d91f2860ad532d590ce94cdf7d5e7ec674ccf03d", size = 41108099, upload-time = "2025-07-18T00:55:42.889Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/90/2660332eeb31303c13b653ea566a9918484b6e4d6b9d2d46879a33ab0622/pyarrow-21.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:b7ae0bbdc8c6674259b25bef5d2a1d6af5d39d7200c819cf99e07f7dfef1c51e", size = 42829529, upload-time = "2025-07-18T00:55:47.069Z" },
+    { url = "https://files.pythonhosted.org/packages/33/27/1a93a25c92717f6aa0fca06eb4700860577d016cd3ae51aad0e0488ac899/pyarrow-21.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:58c30a1729f82d201627c173d91bd431db88ea74dcaa3885855bc6203e433b82", size = 43367883, upload-time = "2025-07-18T00:55:53.069Z" },
+    { url = "https://files.pythonhosted.org/packages/05/d9/4d09d919f35d599bc05c6950095e358c3e15148ead26292dfca1fb659b0c/pyarrow-21.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:072116f65604b822a7f22945a7a6e581cfa28e3454fdcc6939d4ff6090126623", size = 45133802, upload-time = "2025-07-18T00:55:57.714Z" },
+    { url = "https://files.pythonhosted.org/packages/71/30/f3795b6e192c3ab881325ffe172e526499eb3780e306a15103a2764916a2/pyarrow-21.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:cf56ec8b0a5c8c9d7021d6fd754e688104f9ebebf1bf4449613c9531f5346a18", size = 26203175, upload-time = "2025-07-18T00:56:01.364Z" },
+    { url = "https://files.pythonhosted.org/packages/16/ca/c7eaa8e62db8fb37ce942b1ea0c6d7abfe3786ca193957afa25e71b81b66/pyarrow-21.0.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:e99310a4ebd4479bcd1964dff9e14af33746300cb014aa4a3781738ac63baf4a", size = 31154306, upload-time = "2025-07-18T00:56:04.42Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/e8/e87d9e3b2489302b3a1aea709aaca4b781c5252fcb812a17ab6275a9a484/pyarrow-21.0.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:d2fe8e7f3ce329a71b7ddd7498b3cfac0eeb200c2789bd840234f0dc271a8efe", size = 32680622, upload-time = "2025-07-18T00:56:07.505Z" },
+    { url = "https://files.pythonhosted.org/packages/84/52/79095d73a742aa0aba370c7942b1b655f598069489ab387fe47261a849e1/pyarrow-21.0.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:f522e5709379d72fb3da7785aa489ff0bb87448a9dc5a75f45763a795a089ebd", size = 41104094, upload-time = "2025-07-18T00:56:10.994Z" },
+    { url = "https://files.pythonhosted.org/packages/89/4b/7782438b551dbb0468892a276b8c789b8bbdb25ea5c5eb27faadd753e037/pyarrow-21.0.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:69cbbdf0631396e9925e048cfa5bce4e8c3d3b41562bbd70c685a8eb53a91e61", size = 42825576, upload-time = "2025-07-18T00:56:15.569Z" },
+    { url = "https://files.pythonhosted.org/packages/b3/62/0f29de6e0a1e33518dec92c65be0351d32d7ca351e51ec5f4f837a9aab91/pyarrow-21.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:731c7022587006b755d0bdb27626a1a3bb004bb56b11fb30d98b6c1b4718579d", size = 43368342, upload-time = "2025-07-18T00:56:19.531Z" },
+    { url = "https://files.pythonhosted.org/packages/90/c7/0fa1f3f29cf75f339768cc698c8ad4ddd2481c1742e9741459911c9ac477/pyarrow-21.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:dc56bc708f2d8ac71bd1dcb927e458c93cec10b98eb4120206a4091db7b67b99", size = 45131218, upload-time = "2025-07-18T00:56:23.347Z" },
+    { url = "https://files.pythonhosted.org/packages/01/63/581f2076465e67b23bc5a37d4a2abff8362d389d29d8105832e82c9c811c/pyarrow-21.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:186aa00bca62139f75b7de8420f745f2af12941595bbbfa7ed3870ff63e25636", size = 26087551, upload-time = "2025-07-18T00:56:26.758Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/ab/357d0d9648bb8241ee7348e564f2479d206ebe6e1c47ac5027c2e31ecd39/pyarrow-21.0.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:a7a102574faa3f421141a64c10216e078df467ab9576684d5cd696952546e2da", size = 31290064, upload-time = "2025-07-18T00:56:30.214Z" },
+    { url = "https://files.pythonhosted.org/packages/3f/8a/5685d62a990e4cac2043fc76b4661bf38d06efed55cf45a334b455bd2759/pyarrow-21.0.0-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:1e005378c4a2c6db3ada3ad4c217b381f6c886f0a80d6a316fe586b90f77efd7", size = 32727837, upload-time = "2025-07-18T00:56:33.935Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/de/c0828ee09525c2bafefd3e736a248ebe764d07d0fd762d4f0929dbc516c9/pyarrow-21.0.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:65f8e85f79031449ec8706b74504a316805217b35b6099155dd7e227eef0d4b6", size = 41014158, upload-time = "2025-07-18T00:56:37.528Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/26/a2865c420c50b7a3748320b614f3484bfcde8347b2639b2b903b21ce6a72/pyarrow-21.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:3a81486adc665c7eb1a2bde0224cfca6ceaba344a82a971ef059678417880eb8", size = 42667885, upload-time = "2025-07-18T00:56:41.483Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/f9/4ee798dc902533159250fb4321267730bc0a107d8c6889e07c3add4fe3a5/pyarrow-21.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:fc0d2f88b81dcf3ccf9a6ae17f89183762c8a94a5bdcfa09e05cfe413acf0503", size = 43276625, upload-time = "2025-07-18T00:56:48.002Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/da/e02544d6997037a4b0d22d8e5f66bc9315c3671371a8b18c79ade1cefe14/pyarrow-21.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:6299449adf89df38537837487a4f8d3bd91ec94354fdd2a7d30bc11c48ef6e79", size = 44951890, upload-time = "2025-07-18T00:56:52.568Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/4e/519c1bc1876625fe6b71e9a28287c43ec2f20f73c658b9ae1d485c0c206e/pyarrow-21.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:222c39e2c70113543982c6b34f3077962b44fca38c0bd9e68bb6781534425c10", size = 26371006, upload-time = "2025-07-18T00:56:56.379Z" },
+]
+
 [[package]]
 name = "pybind11"
 version = "3.0.1"
@@ -5061,7 +5181,7 @@ name = "sympy"
 version = "1.14.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "mpmath", marker = "sys_platform != 'linux'" },
+    { name = "mpmath", marker = "sys_platform != 'linux' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/83/d3/803453b36afefb7c2bb238361cd4ae6125a569b4db67cd9e79846ba2d68c/sympy-1.14.0.tar.gz", hash = "sha256:d3d3fe8df1e5a0b42f0e7bdf50541697dbe7d23746e894990c030e2b05e72517", size = 7793921, upload-time = "2025-04-27T18:05:01.611Z" }
 wheels = [
@@ -5310,15 +5430,15 @@ name = "torch"
 version = "2.8.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "filelock", marker = "sys_platform != 'linux'" },
-    { name = "fsspec", marker = "sys_platform != 'linux'" },
-    { name = "jinja2", marker = "sys_platform != 'linux'" },
+    { name = "filelock", marker = "sys_platform != 'linux' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "fsspec", marker = "sys_platform != 'linux' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "jinja2", marker = "sys_platform != 'linux' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "networkx", version = "3.4.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and sys_platform != 'linux') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "networkx", version = "3.5", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and sys_platform != 'linux') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "setuptools", marker = "python_full_version >= '3.12' and sys_platform != 'linux'" },
-    { name = "sympy", marker = "sys_platform != 'linux'" },
-    { name = "triton", marker = "sys_platform == 'never'" },
-    { name = "typing-extensions", marker = "sys_platform != 'linux'" },
+    { name = "setuptools", marker = "(python_full_version >= '3.12' and sys_platform != 'linux') or (python_full_version < '3.12' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "sympy", marker = "sys_platform != 'linux' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "triton", marker = "sys_platform == 'never' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "typing-extensions", marker = "sys_platform != 'linux' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
 wheels = [
     { url = "https://files.pythonhosted.org/packages/63/28/110f7274254f1b8476c561dada127173f994afa2b1ffc044efb773c15650/torch-2.8.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:0be92c08b44009d4131d1ff7a8060d10bafdb7ddcb7359ef8d8c5169007ea905", size = 102052793, upload-time = "2025-08-06T14:53:15.852Z" },
@@ -5415,7 +5535,7 @@ name = "tqdm"
 version = "4.67.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "colorama", marker = "sys_platform == 'win32'" },
+    { name = "colorama", marker = "sys_platform == 'win32' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/a8/4b/29b4ef32e036bb34e4ab51796dd745cdba7ed47ad142a9f4a1eb8e0c744d/tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2", size = 169737, upload-time = "2024-11-24T20:12:22.481Z" }
 wheels = [
@@ -5490,7 +5610,7 @@ name = "triton"
 version = "3.4.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "setuptools", marker = "sys_platform != 'linux'" },
+    { name = "setuptools", marker = "sys_platform != 'linux' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
 wheels = [
     { url = "https://files.pythonhosted.org/packages/62/ee/0ee5f64a87eeda19bbad9bc54ae5ca5b98186ed00055281fd40fb4beb10e/triton-3.4.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7ff2785de9bc02f500e085420273bb5cc9c9bb767584a4aa28d6e360cec70128", size = 155430069, upload-time = "2025-07-30T19:58:21.715Z" },
@@ -5961,6 +6081,79 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/1f/03/75a399549e82b6a20ff84d71ee9e777caf6bc687e8004d8b3699565a6aad/xattr-1.2.0-pp310-pypy310_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bb669f01627962ce2bc556f19d421162247bc2cad0d4625d6ea5eb32af4cf29b", size = 17908, upload-time = "2025-07-14T03:15:32.335Z" },
 ]
 
+[[package]]
+name = "xxhash"
+version = "3.5.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/00/5e/d6e5258d69df8b4ed8c83b6664f2b47d30d2dec551a29ad72a6c69eafd31/xxhash-3.5.0.tar.gz", hash = "sha256:84f2caddf951c9cbf8dc2e22a89d4ccf5d86391ac6418fe81e3c67d0cf60b45f", size = 84241, upload-time = "2024-08-17T09:20:38.972Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/bb/8a/0e9feca390d512d293afd844d31670e25608c4a901e10202aa98785eab09/xxhash-3.5.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ece616532c499ee9afbb83078b1b952beffef121d989841f7f4b3dc5ac0fd212", size = 31970, upload-time = "2024-08-17T09:17:35.675Z" },
+    { url = "https://files.pythonhosted.org/packages/16/e6/be5aa49580cd064a18200ab78e29b88b1127e1a8c7955eb8ecf81f2626eb/xxhash-3.5.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3171f693dbc2cef6477054a665dc255d996646b4023fe56cb4db80e26f4cc520", size = 30801, upload-time = "2024-08-17T09:17:37.353Z" },
+    { url = "https://files.pythonhosted.org/packages/20/ee/b8a99ebbc6d1113b3a3f09e747fa318c3cde5b04bd9c197688fadf0eeae8/xxhash-3.5.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7c5d3e570ef46adaf93fc81b44aca6002b5a4d8ca11bd0580c07eac537f36680", size = 220927, upload-time = "2024-08-17T09:17:38.835Z" },
+    { url = "https://files.pythonhosted.org/packages/58/62/15d10582ef159283a5c2b47f6d799fc3303fe3911d5bb0bcc820e1ef7ff4/xxhash-3.5.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7cb29a034301e2982df8b1fe6328a84f4b676106a13e9135a0d7e0c3e9f806da", size = 200360, upload-time = "2024-08-17T09:17:40.851Z" },
+    { url = "https://files.pythonhosted.org/packages/23/41/61202663ea9b1bd8e53673b8ec9e2619989353dba8cfb68e59a9cbd9ffe3/xxhash-3.5.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5d0d307d27099bb0cbeea7260eb39ed4fdb99c5542e21e94bb6fd29e49c57a23", size = 428528, upload-time = "2024-08-17T09:17:42.545Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/07/d9a3059f702dec5b3b703737afb6dda32f304f6e9da181a229dafd052c29/xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c0342aafd421795d740e514bc9858ebddfc705a75a8c5046ac56d85fe97bf196", size = 194149, upload-time = "2024-08-17T09:17:44.361Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/58/27caadf78226ecf1d62dbd0c01d152ed381c14c1ee4ad01f0d460fc40eac/xxhash-3.5.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3dbbd9892c5ebffeca1ed620cf0ade13eb55a0d8c84e0751a6653adc6ac40d0c", size = 207703, upload-time = "2024-08-17T09:17:46.656Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/08/32d558ce23e1e068453c39aed7b3c1cdc690c177873ec0ca3a90d5808765/xxhash-3.5.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:4cc2d67fdb4d057730c75a64c5923abfa17775ae234a71b0200346bfb0a7f482", size = 216255, upload-time = "2024-08-17T09:17:48.031Z" },
+    { url = "https://files.pythonhosted.org/packages/3f/d4/2b971e2d2b0a61045f842b622ef11e94096cf1f12cd448b6fd426e80e0e2/xxhash-3.5.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:ec28adb204b759306a3d64358a5e5c07d7b1dd0ccbce04aa76cb9377b7b70296", size = 202744, upload-time = "2024-08-17T09:17:50.045Z" },
+    { url = "https://files.pythonhosted.org/packages/19/ae/6a6438864a8c4c39915d7b65effd85392ebe22710412902487e51769146d/xxhash-3.5.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:1328f6d8cca2b86acb14104e381225a3d7b42c92c4b86ceae814e5c400dbb415", size = 210115, upload-time = "2024-08-17T09:17:51.834Z" },
+    { url = "https://files.pythonhosted.org/packages/48/7d/b3c27c27d1fc868094d02fe4498ccce8cec9fcc591825c01d6bcb0b4fc49/xxhash-3.5.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:8d47ebd9f5d9607fd039c1fbf4994e3b071ea23eff42f4ecef246ab2b7334198", size = 414247, upload-time = "2024-08-17T09:17:53.094Z" },
+    { url = "https://files.pythonhosted.org/packages/a1/05/918f9e7d2fbbd334b829997045d341d6239b563c44e683b9a7ef8fe50f5d/xxhash-3.5.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:b96d559e0fcddd3343c510a0fe2b127fbff16bf346dd76280b82292567523442", size = 191419, upload-time = "2024-08-17T09:17:54.906Z" },
+    { url = "https://files.pythonhosted.org/packages/08/29/dfe393805b2f86bfc47c290b275f0b7c189dc2f4e136fd4754f32eb18a8d/xxhash-3.5.0-cp310-cp310-win32.whl", hash = "sha256:61c722ed8d49ac9bc26c7071eeaa1f6ff24053d553146d5df031802deffd03da", size = 30114, upload-time = "2024-08-17T09:17:56.566Z" },
+    { url = "https://files.pythonhosted.org/packages/7b/d7/aa0b22c4ebb7c3ccb993d4c565132abc641cd11164f8952d89eb6a501909/xxhash-3.5.0-cp310-cp310-win_amd64.whl", hash = "sha256:9bed5144c6923cc902cd14bb8963f2d5e034def4486ab0bbe1f58f03f042f9a9", size = 30003, upload-time = "2024-08-17T09:17:57.596Z" },
+    { url = "https://files.pythonhosted.org/packages/69/12/f969b81541ee91b55f1ce469d7ab55079593c80d04fd01691b550e535000/xxhash-3.5.0-cp310-cp310-win_arm64.whl", hash = "sha256:893074d651cf25c1cc14e3bea4fceefd67f2921b1bb8e40fcfeba56820de80c6", size = 26773, upload-time = "2024-08-17T09:17:59.169Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/c7/afed0f131fbda960ff15eee7f304fa0eeb2d58770fade99897984852ef23/xxhash-3.5.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:02c2e816896dc6f85922ced60097bcf6f008dedfc5073dcba32f9c8dd786f3c1", size = 31969, upload-time = "2024-08-17T09:18:00.852Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/0c/7c3bc6d87e5235672fcc2fb42fd5ad79fe1033925f71bf549ee068c7d1ca/xxhash-3.5.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:6027dcd885e21581e46d3c7f682cfb2b870942feeed58a21c29583512c3f09f8", size = 30800, upload-time = "2024-08-17T09:18:01.863Z" },
+    { url = "https://files.pythonhosted.org/packages/04/9e/01067981d98069eec1c20201f8c145367698e9056f8bc295346e4ea32dd1/xxhash-3.5.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1308fa542bbdbf2fa85e9e66b1077eea3a88bef38ee8a06270b4298a7a62a166", size = 221566, upload-time = "2024-08-17T09:18:03.461Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/09/d4996de4059c3ce5342b6e1e6a77c9d6c91acce31f6ed979891872dd162b/xxhash-3.5.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c28b2fdcee797e1c1961cd3bcd3d545cab22ad202c846235197935e1df2f8ef7", size = 201214, upload-time = "2024-08-17T09:18:05.616Z" },
+    { url = "https://files.pythonhosted.org/packages/62/f5/6d2dc9f8d55a7ce0f5e7bfef916e67536f01b85d32a9fbf137d4cadbee38/xxhash-3.5.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:924361811732ddad75ff23e90efd9ccfda4f664132feecb90895bade6a1b4623", size = 429433, upload-time = "2024-08-17T09:18:06.957Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/72/9256303f10e41ab004799a4aa74b80b3c5977d6383ae4550548b24bd1971/xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:89997aa1c4b6a5b1e5b588979d1da048a3c6f15e55c11d117a56b75c84531f5a", size = 194822, upload-time = "2024-08-17T09:18:08.331Z" },
+    { url = "https://files.pythonhosted.org/packages/34/92/1a3a29acd08248a34b0e6a94f4e0ed9b8379a4ff471f1668e4dce7bdbaa8/xxhash-3.5.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:685c4f4e8c59837de103344eb1c8a3851f670309eb5c361f746805c5471b8c88", size = 208538, upload-time = "2024-08-17T09:18:10.332Z" },
+    { url = "https://files.pythonhosted.org/packages/53/ad/7fa1a109663366de42f724a1cdb8e796a260dbac45047bce153bc1e18abf/xxhash-3.5.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:dbd2ecfbfee70bc1a4acb7461fa6af7748ec2ab08ac0fa298f281c51518f982c", size = 216953, upload-time = "2024-08-17T09:18:11.707Z" },
+    { url = "https://files.pythonhosted.org/packages/35/02/137300e24203bf2b2a49b48ce898ecce6fd01789c0fcd9c686c0a002d129/xxhash-3.5.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:25b5a51dc3dfb20a10833c8eee25903fd2e14059e9afcd329c9da20609a307b2", size = 203594, upload-time = "2024-08-17T09:18:13.799Z" },
+    { url = "https://files.pythonhosted.org/packages/23/03/aeceb273933d7eee248c4322b98b8e971f06cc3880e5f7602c94e5578af5/xxhash-3.5.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:a8fb786fb754ef6ff8c120cb96629fb518f8eb5a61a16aac3a979a9dbd40a084", size = 210971, upload-time = "2024-08-17T09:18:15.824Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/64/ed82ec09489474cbb35c716b189ddc1521d8b3de12b1b5ab41ce7f70253c/xxhash-3.5.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:a905ad00ad1e1c34fe4e9d7c1d949ab09c6fa90c919860c1534ff479f40fd12d", size = 415050, upload-time = "2024-08-17T09:18:17.142Z" },
+    { url = "https://files.pythonhosted.org/packages/71/43/6db4c02dcb488ad4e03bc86d70506c3d40a384ee73c9b5c93338eb1f3c23/xxhash-3.5.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:963be41bcd49f53af6d795f65c0da9b4cc518c0dd9c47145c98f61cb464f4839", size = 192216, upload-time = "2024-08-17T09:18:18.779Z" },
+    { url = "https://files.pythonhosted.org/packages/22/6d/db4abec29e7a567455344433d095fdb39c97db6955bb4a2c432e486b4d28/xxhash-3.5.0-cp311-cp311-win32.whl", hash = "sha256:109b436096d0a2dd039c355fa3414160ec4d843dfecc64a14077332a00aeb7da", size = 30120, upload-time = "2024-08-17T09:18:20.009Z" },
+    { url = "https://files.pythonhosted.org/packages/52/1c/fa3b61c0cf03e1da4767213672efe186b1dfa4fc901a4a694fb184a513d1/xxhash-3.5.0-cp311-cp311-win_amd64.whl", hash = "sha256:b702f806693201ad6c0a05ddbbe4c8f359626d0b3305f766077d51388a6bac58", size = 30003, upload-time = "2024-08-17T09:18:21.052Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/8e/9e6fc572acf6e1cc7ccb01973c213f895cb8668a9d4c2b58a99350da14b7/xxhash-3.5.0-cp311-cp311-win_arm64.whl", hash = "sha256:c4dcb4120d0cc3cc448624147dba64e9021b278c63e34a38789b688fd0da9bf3", size = 26777, upload-time = "2024-08-17T09:18:22.809Z" },
+    { url = "https://files.pythonhosted.org/packages/07/0e/1bfce2502c57d7e2e787600b31c83535af83746885aa1a5f153d8c8059d6/xxhash-3.5.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:14470ace8bd3b5d51318782cd94e6f94431974f16cb3b8dc15d52f3b69df8e00", size = 31969, upload-time = "2024-08-17T09:18:24.025Z" },
+    { url = "https://files.pythonhosted.org/packages/3f/d6/8ca450d6fe5b71ce521b4e5db69622383d039e2b253e9b2f24f93265b52c/xxhash-3.5.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:59aa1203de1cb96dbeab595ded0ad0c0056bb2245ae11fac11c0ceea861382b9", size = 30787, upload-time = "2024-08-17T09:18:25.318Z" },
+    { url = "https://files.pythonhosted.org/packages/5b/84/de7c89bc6ef63d750159086a6ada6416cc4349eab23f76ab870407178b93/xxhash-3.5.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:08424f6648526076e28fae6ea2806c0a7d504b9ef05ae61d196d571e5c879c84", size = 220959, upload-time = "2024-08-17T09:18:26.518Z" },
+    { url = "https://files.pythonhosted.org/packages/fe/86/51258d3e8a8545ff26468c977101964c14d56a8a37f5835bc0082426c672/xxhash-3.5.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:61a1ff00674879725b194695e17f23d3248998b843eb5e933007ca743310f793", size = 200006, upload-time = "2024-08-17T09:18:27.905Z" },
+    { url = "https://files.pythonhosted.org/packages/02/0a/96973bd325412feccf23cf3680fd2246aebf4b789122f938d5557c54a6b2/xxhash-3.5.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f2f2c61bee5844d41c3eb015ac652a0229e901074951ae48581d58bfb2ba01be", size = 428326, upload-time = "2024-08-17T09:18:29.335Z" },
+    { url = "https://files.pythonhosted.org/packages/11/a7/81dba5010f7e733de88af9555725146fc133be97ce36533867f4c7e75066/xxhash-3.5.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9d32a592cac88d18cc09a89172e1c32d7f2a6e516c3dfde1b9adb90ab5df54a6", size = 194380, upload-time = "2024-08-17T09:18:30.706Z" },
+    { url = "https://files.pythonhosted.org/packages/fb/7d/f29006ab398a173f4501c0e4977ba288f1c621d878ec217b4ff516810c04/xxhash-3.5.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:70dabf941dede727cca579e8c205e61121afc9b28516752fd65724be1355cc90", size = 207934, upload-time = "2024-08-17T09:18:32.133Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/6e/6e88b8f24612510e73d4d70d9b0c7dff62a2e78451b9f0d042a5462c8d03/xxhash-3.5.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e5d0ddaca65ecca9c10dcf01730165fd858533d0be84c75c327487c37a906a27", size = 216301, upload-time = "2024-08-17T09:18:33.474Z" },
+    { url = "https://files.pythonhosted.org/packages/af/51/7862f4fa4b75a25c3b4163c8a873f070532fe5f2d3f9b3fc869c8337a398/xxhash-3.5.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:3e5b5e16c5a480fe5f59f56c30abdeba09ffd75da8d13f6b9b6fd224d0b4d0a2", size = 203351, upload-time = "2024-08-17T09:18:34.889Z" },
+    { url = "https://files.pythonhosted.org/packages/22/61/8d6a40f288f791cf79ed5bb113159abf0c81d6efb86e734334f698eb4c59/xxhash-3.5.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:149b7914451eb154b3dfaa721315117ea1dac2cc55a01bfbd4df7c68c5dd683d", size = 210294, upload-time = "2024-08-17T09:18:36.355Z" },
+    { url = "https://files.pythonhosted.org/packages/17/02/215c4698955762d45a8158117190261b2dbefe9ae7e5b906768c09d8bc74/xxhash-3.5.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:eade977f5c96c677035ff39c56ac74d851b1cca7d607ab3d8f23c6b859379cab", size = 414674, upload-time = "2024-08-17T09:18:38.536Z" },
+    { url = "https://files.pythonhosted.org/packages/31/5c/b7a8db8a3237cff3d535261325d95de509f6a8ae439a5a7a4ffcff478189/xxhash-3.5.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fa9f547bd98f5553d03160967866a71056a60960be00356a15ecc44efb40ba8e", size = 192022, upload-time = "2024-08-17T09:18:40.138Z" },
+    { url = "https://files.pythonhosted.org/packages/78/e3/dd76659b2811b3fd06892a8beb850e1996b63e9235af5a86ea348f053e9e/xxhash-3.5.0-cp312-cp312-win32.whl", hash = "sha256:f7b58d1fd3551b8c80a971199543379be1cee3d0d409e1f6d8b01c1a2eebf1f8", size = 30170, upload-time = "2024-08-17T09:18:42.163Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/6b/1c443fe6cfeb4ad1dcf231cdec96eb94fb43d6498b4469ed8b51f8b59a37/xxhash-3.5.0-cp312-cp312-win_amd64.whl", hash = "sha256:fa0cafd3a2af231b4e113fba24a65d7922af91aeb23774a8b78228e6cd785e3e", size = 30040, upload-time = "2024-08-17T09:18:43.699Z" },
+    { url = "https://files.pythonhosted.org/packages/0f/eb/04405305f290173acc0350eba6d2f1a794b57925df0398861a20fbafa415/xxhash-3.5.0-cp312-cp312-win_arm64.whl", hash = "sha256:586886c7e89cb9828bcd8a5686b12e161368e0064d040e225e72607b43858ba2", size = 26796, upload-time = "2024-08-17T09:18:45.29Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/b8/e4b3ad92d249be5c83fa72916c9091b0965cb0faeff05d9a0a3870ae6bff/xxhash-3.5.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:37889a0d13b0b7d739cfc128b1c902f04e32de17b33d74b637ad42f1c55101f6", size = 31795, upload-time = "2024-08-17T09:18:46.813Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/d8/b3627a0aebfbfa4c12a41e22af3742cf08c8ea84f5cc3367b5de2d039cce/xxhash-3.5.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:97a662338797c660178e682f3bc180277b9569a59abfb5925e8620fba00b9fc5", size = 30792, upload-time = "2024-08-17T09:18:47.862Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/cc/762312960691da989c7cd0545cb120ba2a4148741c6ba458aa723c00a3f8/xxhash-3.5.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7f85e0108d51092bdda90672476c7d909c04ada6923c14ff9d913c4f7dc8a3bc", size = 220950, upload-time = "2024-08-17T09:18:49.06Z" },
+    { url = "https://files.pythonhosted.org/packages/fe/e9/cc266f1042c3c13750e86a535496b58beb12bf8c50a915c336136f6168dc/xxhash-3.5.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cd2fd827b0ba763ac919440042302315c564fdb797294d86e8cdd4578e3bc7f3", size = 199980, upload-time = "2024-08-17T09:18:50.445Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/85/a836cd0dc5cc20376de26b346858d0ac9656f8f730998ca4324921a010b9/xxhash-3.5.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:82085c2abec437abebf457c1d12fccb30cc8b3774a0814872511f0f0562c768c", size = 428324, upload-time = "2024-08-17T09:18:51.988Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/0e/15c243775342ce840b9ba34aceace06a1148fa1630cd8ca269e3223987f5/xxhash-3.5.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:07fda5de378626e502b42b311b049848c2ef38784d0d67b6f30bb5008642f8eb", size = 194370, upload-time = "2024-08-17T09:18:54.164Z" },
+    { url = "https://files.pythonhosted.org/packages/87/a1/b028bb02636dfdc190da01951d0703b3d904301ed0ef6094d948983bef0e/xxhash-3.5.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c279f0d2b34ef15f922b77966640ade58b4ccdfef1c4d94b20f2a364617a493f", size = 207911, upload-time = "2024-08-17T09:18:55.509Z" },
+    { url = "https://files.pythonhosted.org/packages/80/d5/73c73b03fc0ac73dacf069fdf6036c9abad82de0a47549e9912c955ab449/xxhash-3.5.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:89e66ceed67b213dec5a773e2f7a9e8c58f64daeb38c7859d8815d2c89f39ad7", size = 216352, upload-time = "2024-08-17T09:18:57.073Z" },
+    { url = "https://files.pythonhosted.org/packages/b6/2a/5043dba5ddbe35b4fe6ea0a111280ad9c3d4ba477dd0f2d1fe1129bda9d0/xxhash-3.5.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:bcd51708a633410737111e998ceb3b45d3dbc98c0931f743d9bb0a209033a326", size = 203410, upload-time = "2024-08-17T09:18:58.54Z" },
+    { url = "https://files.pythonhosted.org/packages/a2/b2/9a8ded888b7b190aed75b484eb5c853ddd48aa2896e7b59bbfbce442f0a1/xxhash-3.5.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:3ff2c0a34eae7df88c868be53a8dd56fbdf592109e21d4bfa092a27b0bf4a7bf", size = 210322, upload-time = "2024-08-17T09:18:59.943Z" },
+    { url = "https://files.pythonhosted.org/packages/98/62/440083fafbc917bf3e4b67c2ade621920dd905517e85631c10aac955c1d2/xxhash-3.5.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:4e28503dccc7d32e0b9817aa0cbfc1f45f563b2c995b7a66c4c8a0d232e840c7", size = 414725, upload-time = "2024-08-17T09:19:01.332Z" },
+    { url = "https://files.pythonhosted.org/packages/75/db/009206f7076ad60a517e016bb0058381d96a007ce3f79fa91d3010f49cc2/xxhash-3.5.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a6c50017518329ed65a9e4829154626f008916d36295b6a3ba336e2458824c8c", size = 192070, upload-time = "2024-08-17T09:19:03.007Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/6d/c61e0668943a034abc3a569cdc5aeae37d686d9da7e39cf2ed621d533e36/xxhash-3.5.0-cp313-cp313-win32.whl", hash = "sha256:53a068fe70301ec30d868ece566ac90d873e3bb059cf83c32e76012c889b8637", size = 30172, upload-time = "2024-08-17T09:19:04.355Z" },
+    { url = "https://files.pythonhosted.org/packages/96/14/8416dce965f35e3d24722cdf79361ae154fa23e2ab730e5323aa98d7919e/xxhash-3.5.0-cp313-cp313-win_amd64.whl", hash = "sha256:80babcc30e7a1a484eab952d76a4f4673ff601f54d5142c26826502740e70b43", size = 30041, upload-time = "2024-08-17T09:19:05.435Z" },
+    { url = "https://files.pythonhosted.org/packages/27/ee/518b72faa2073f5aa8e3262408d284892cb79cf2754ba0c3a5870645ef73/xxhash-3.5.0-cp313-cp313-win_arm64.whl", hash = "sha256:4811336f1ce11cac89dcbd18f3a25c527c16311709a89313c3acaf771def2d4b", size = 26801, upload-time = "2024-08-17T09:19:06.547Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/9a/233606bada5bd6f50b2b72c45de3d9868ad551e83893d2ac86dc7bb8553a/xxhash-3.5.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:2014c5b3ff15e64feecb6b713af12093f75b7926049e26a580e94dcad3c73d8c", size = 29732, upload-time = "2024-08-17T09:20:11.175Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/67/f75276ca39e2c6604e3bee6c84e9db8a56a4973fde9bf35989787cf6e8aa/xxhash-3.5.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fab81ef75003eda96239a23eda4e4543cedc22e34c373edcaf744e721a163986", size = 36214, upload-time = "2024-08-17T09:20:12.335Z" },
+    { url = "https://files.pythonhosted.org/packages/0f/f8/f6c61fd794229cc3848d144f73754a0c107854372d7261419dcbbd286299/xxhash-3.5.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4e2febf914ace002132aa09169cc572e0d8959d0f305f93d5828c4836f9bc5a6", size = 32020, upload-time = "2024-08-17T09:20:13.537Z" },
+    { url = "https://files.pythonhosted.org/packages/79/d3/c029c99801526f859e6b38d34ab87c08993bf3dcea34b11275775001638a/xxhash-3.5.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5d3a10609c51da2a1c0ea0293fc3968ca0a18bd73838455b5bca3069d7f8e32b", size = 40515, upload-time = "2024-08-17T09:20:14.669Z" },
+    { url = "https://files.pythonhosted.org/packages/62/e3/bef7b82c1997579c94de9ac5ea7626d01ae5858aa22bf4fcb38bf220cb3e/xxhash-3.5.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:5a74f23335b9689b66eb6dbe2a931a88fcd7a4c2cc4b1cb0edba8ce381c7a1da", size = 30064, upload-time = "2024-08-17T09:20:15.925Z" },
+]
+
 [[package]]
 name = "yarl"
 version = "1.21.0"

From c7dee4ba612e3989d9b5a1ed1fb0a1487e00a24c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Tue, 14 Oct 2025 09:21:48 +0000
Subject: [PATCH 015/334] !4236 - [Dev] Formatting dev branch code to avoid
 linting pipeline failures
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 .../core/optimizer/layer_wise_optimizer.py    | 42 +++++++++++--------
 megatron/core/ssm/gated_delta_net.py          |  6 +--
 megatron/core/transformer/attention.py        | 13 +++---
 .../core/transformer/moe/token_dispatcher.py  |  6 ++-
 megatron/core/transformer/spec_utils.py       |  9 +++-
 5 files changed, 47 insertions(+), 29 deletions(-)

diff --git a/megatron/core/optimizer/layer_wise_optimizer.py b/megatron/core/optimizer/layer_wise_optimizer.py
index 6c77be48e30..2bf4e5e613b 100644
--- a/megatron/core/optimizer/layer_wise_optimizer.py
+++ b/megatron/core/optimizer/layer_wise_optimizer.py
@@ -1,13 +1,13 @@
 # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
 
-from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+from typing import List, Optional
 
 import torch
 
 from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.utils import get_pg_rank, get_pg_size
 
-from .clip_grads import clip_grad_by_total_norm_fp32, count_zeros_fp32, get_grad_norm_fp32
+from .clip_grads import count_zeros_fp32, get_grad_norm_fp32
 from .optimizer import ChainedOptimizer, Float16OptimizerWithFloat16Params, MegatronOptimizer
 from .optimizer_config import OptimizerConfig
 
@@ -15,16 +15,19 @@
 class LayerWiseDistributedOptimizer(ChainedOptimizer):
     """Layer-wise distributed optimizer for Megatron-core models.
 
-    This is a experimental distributed optimizer wrapper that distributes weight to DP ranks by full layer.
-    Implemented as ChainedOptimizer to support different weights use different optimizers (e.g. muon+adam)
-    When using, keep all megatron distributed optimizer related options OFF.
+    This is a experimental distributed optimizer wrapper that distributes weight to DP ranks
+    by full layer. Implemented as ChainedOptimizer to support different weights use different
+    optimizers (e.g. muon+adam). When using, keep all megatron distributed optimizer related
+    options OFF.
 
     How LayerWiseDistributedOptimizer work:
     1. weights are splited into lists and each rank only keep its shard in its optimizer
-    2. Megatron DDP handle allreduce grad for all params, note that each rank have full model and grad
+    2. Megatron DDP handle allreduce grad for all params, note that each rank have full model
+    and grad.
     3. optimizer is already modified so only param belong to this DP rank is updated
     3. grad_norm and zero counting will reduce metrics globally in step function
-    4. Do regular update with chained optimizers, optimizer is already modified so partial update happens
+    4. Do regular update with chained optimizers, optimizer is already modified so partial update
+    happens.
     5. allgather updated params to every rank(currently through broadcast loop)
     """
 
@@ -37,7 +40,8 @@ def __init__(
         self.pg_collection = pg_collection
         self.shard_params(optimizers)
         # wrap optimizer after sharding to avoid unnecessary master weight creation
-        # TODO(deyuf): check if underlying optimizer.config need to fixed and if so can use that instead of passing
+        # TODO(deyuf): check if underlying optimizer.config need to fixed and if so can use
+        # that instead of passing
         if config.bf16:
             if isinstance(optimizers[0], Float16OptimizerWithFloat16Params):
                 raise TypeError('LayerWiseDistributedOptimizer received Float16 optimizer already.')
@@ -47,17 +51,20 @@ def __init__(
         super().__init__(optimizers)
 
         # TODO(kunlun, deyuf): potential future perf optimization
-        # since allreduce is unchanged and handled by megatron DDP, they're already in contiguous gbuf
-        # so instead of shard param by layer randomly, we can still shard by buf range but keep some "extras"
-        # to keep boundary weight not sharded. This way each rank do some duplicated work but we can call
-        # single allgather later and all current distopt optimization can be applied
+        # since allreduce is unchanged and handled by megatron DDP, they're already in contiguous
+        # gbuf, so instead of shard param by layer randomly, we can still shard by buf range but
+        # keep some "extras" to keep boundary weight not sharded. This way each rank do some
+        # duplicated work but we can call single allgather later and all current distopt
+        # optimization can be applied.
 
     def shard_params(self, optimizers):
         """Shard all params into lists by rank."""
-        # We'll optimize sharding later if there is perf issue. should be ok since linear are grouped already
-        # Key is to create separate sharding for dp/expt parallel, saved in dp_cp_params_list, expt_dp_params_list
-        # example of 4 dp rank and 10 non-expert parameters p0-p9, then dp_cp_params_list will look like
-        # [[p0, p4, p8], [p1, p5, p9], [p2, p6], [p3, p7]]
+        # We'll optimize sharding later if there is perf issue. should be ok since linear are
+        # grouped already.
+        # Key is to create separate sharding for dp/expt parallel, saved in dp_cp_params_list,
+        # expt_dp_params_list.
+        # Example of 4 dp rank and 10 non-expert parameters p0-p9, then dp_cp_params_list will
+        # look like: [[p0, p4, p8], [p1, p5, p9], [p2, p6], [p3, p7]]
 
         # simplify when dp_cp group size is 1
         if get_pg_size(self.pg_collection.dp_cp) == 1:
@@ -70,7 +77,8 @@ def shard_params(self, optimizers):
         expt_dp_size = get_pg_size(self.pg_collection.expt_dp)
         self.dp_cp_params_list = [[] for _ in range(dp_cp_size)]
         self.expt_dp_params_list = [[] for _ in range(expt_dp_size)]
-        # get all param groups, this is called before init so cannot rely on Chained optimizer method
+        # get all param groups, this is called before init so cannot rely on
+        # Chained optimizer method
         param_groups = []
         for optimizer in optimizers:
             param_groups += optimizer.param_groups
diff --git a/megatron/core/ssm/gated_delta_net.py b/megatron/core/ssm/gated_delta_net.py
index 45588341a39..e12dfd68062 100644
--- a/megatron/core/ssm/gated_delta_net.py
+++ b/megatron/core/ssm/gated_delta_net.py
@@ -36,20 +36,18 @@
 
 try:
     from fla.modules.l2norm import l2norm
-    from fla.ops.gated_delta_rule import chunk_gated_delta_rule, fused_recurrent_gated_delta_rule
+    from fla.ops.gated_delta_rule import chunk_gated_delta_rule
 
     HAVE_FLA = True
 except ImportError:
     chunk_gated_delta_rule = None
-    fused_recurrent_gated_delta_rule = None
 
     HAVE_FLA = False
 
 try:
-    from causal_conv1d import causal_conv1d_fn, causal_conv1d_update
+    from causal_conv1d import causal_conv1d_fn
 except ImportError:
     causal_conv1d_fn = None
-    causal_conv1d_update = None
 
 
 logger = logging.getLogger(__name__)
diff --git a/megatron/core/transformer/attention.py b/megatron/core/transformer/attention.py
index 518d82a0332..870b8ad1c40 100644
--- a/megatron/core/transformer/attention.py
+++ b/megatron/core/transformer/attention.py
@@ -434,7 +434,9 @@ def _adjust_key_value_for_inference(
         return query, key, value, rotary_pos_emb, attn_mask_type, block_table
 
     @abstractmethod
-    def get_query_key_value_tensors(self, hidden_states, key_value_states, output_gate, split_qkv=True):
+    def get_query_key_value_tensors(
+        self, hidden_states, key_value_states, output_gate, split_qkv=True
+    ):
         """
         This method needs to be implemented based on whether the derived class
         is "self-attn" or "cross-attn".
@@ -1083,10 +1085,7 @@ def _compare(srcs, tgts, names, parallelism):
             )
 
     def get_query_key_value_tensors(
-        self, hidden_states,
-        key_value_states=None,
-        output_gate=False,
-        split_qkv=True
+        self, hidden_states, key_value_states=None, output_gate=False, split_qkv=True
     ):
         """
         Derives `query`, `key`, `value` tensors from `hidden_states`.
@@ -1226,7 +1225,9 @@ def __init__(
             is_expert=False,
         )
 
-    def get_query_key_value_tensors(self, hidden_states, key_value_states, output_gate=False, split_qkv=True):
+    def get_query_key_value_tensors(
+        self, hidden_states, key_value_states, output_gate=False, split_qkv=True
+    ):
         """
         Derives `query` tensor from `hidden_states`, and `key`/`value` tensors
         from `key_value_states`.
diff --git a/megatron/core/transformer/moe/token_dispatcher.py b/megatron/core/transformer/moe/token_dispatcher.py
index 82fb7b00583..ec64d1887a1 100644
--- a/megatron/core/transformer/moe/token_dispatcher.py
+++ b/megatron/core/transformer/moe/token_dispatcher.py
@@ -33,6 +33,8 @@
 from megatron.core.transformer.moe.shared_experts import SharedExpertMLP
 from megatron.core.transformer.transformer_config import TransformerConfig
 
+logger = logging.getLogger(__name__)
+
 """ We use the following notation throughout this file:
      H: hidden size
      B: micro batch size
@@ -989,7 +991,9 @@ def dispatch(
         # DeepEP only supports float32 probs
         if self.token_probs.dtype != torch.float32:
             if self.token_probs.dtype in [torch.bfloat16, torch.float16]:
-                print("DeepEP only supports float32 probs, please set --moe-router-dtype=fp32")
+                logger.info(
+                    "DeepEP only supports float32 probs, please set --moe-router-dtype=fp32"
+                )
             self.token_probs = self.token_probs.float()  # downcast or upcast
         hidden_states, dispatched_indices, dispatched_probs, num_tokens_per_expert, handle = (
             fused_dispatch(
diff --git a/megatron/core/transformer/spec_utils.py b/megatron/core/transformer/spec_utils.py
index 897d88d2aa3..24df1add0eb 100644
--- a/megatron/core/transformer/spec_utils.py
+++ b/megatron/core/transformer/spec_utils.py
@@ -1,9 +1,12 @@
 # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
 
+import logging
 import types
 from dataclasses import dataclass, field
 from typing import Tuple, Union
 
+logger = logging.getLogger(__name__)
+
 
 @dataclass
 class ModuleSpec:
@@ -38,12 +41,15 @@ def import_module(module_path: Tuple[str]):
     try:
         module = __import__(base_path, globals(), locals(), [name])
     except ImportError as e:
-        print(f"couldn't import module due to {e}")
+        logger.error(f"couldn't import module due to {e}")
         return None
     return vars(module)[name]
 
 
 def get_module(spec_or_module: Union[ModuleSpec, type], **additional_kwargs):
+    """Retrieve the module class or function specified by a ModuleSpec or
+    return it as is if already provided.
+    """
     # If a module clas is already provided return it as is
     if isinstance(spec_or_module, (type, types.FunctionType)):
         return spec_or_module
@@ -57,6 +63,7 @@ def get_module(spec_or_module: Union[ModuleSpec, type], **additional_kwargs):
 
 
 def build_module(spec_or_module: Union[ModuleSpec, type], *args, **kwargs):
+    """Build a module from a ModuleSpec or return it as is if already provided."""
     # If the passed `spec_or_module` is
     # a `Function`, then return it as it is
     # NOTE: to support an already initialized module add the following condition

From 4c3a1be68cfac256e31a230722fbce439b66aa32 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Tue, 14 Oct 2025 09:23:13 +0000
Subject: [PATCH 016/334] !4211 - ci(fix): Cherrypicking from forks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 .gitlab/stages/00.pre.yml | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/.gitlab/stages/00.pre.yml b/.gitlab/stages/00.pre.yml
index c91ffc80995..c912d5297d2 100644
--- a/.gitlab/stages/00.pre.yml
+++ b/.gitlab/stages/00.pre.yml
@@ -176,14 +176,16 @@ pre:maybe_cherry_pick_to_main:
       TITLE=$(echo -E $MR | jq '.title' | tr -d '"')
       MILESTONE_ID=$(echo -E $MR | jq '.milestone.id' | tr -d '"')
 
-      git remote set-url origin "https://gitlab-ci-token:${PROJECT_ACCESS_TOKEN_MCORE}@${GITLAB_ENDPOINT}/$CI_PROJECT_NAMESPACE/megatron-lm.git"
+      git remote set-url origin "https://gitlab-ci-token:${PROJECT_ACCESS_TOKEN_MCORE}@${GITLAB_ENDPOINT}/$CI_PROJECT_PATH.git"
+      git remote add mr-origin "https://gitlab-ci-token:${PROJECT_ACCESS_TOKEN_MCORE}@${GITLAB_ENDPOINT}/$CI_MERGE_REQUEST_SOURCE_PROJECT_PATH.git"
+
       git config --global user.email "mcore-bot@nvidia.com"
       git config --global user.name "Mcore Bot"
 
       git fetch origin dev
-      git fetch origin $CI_MERGE_REQUEST_SOURCE_BRANCH_NAME
+      git fetch mr-origin $CI_MERGE_REQUEST_SOURCE_BRANCH_NAME
       git checkout $CI_MERGE_REQUEST_SOURCE_BRANCH_NAME
-      START_COMMIT=$(git merge-base origin/dev origin/$CI_MERGE_REQUEST_SOURCE_BRANCH_NAME)
+      START_COMMIT=$(git merge-base origin/dev mr-origin/$CI_MERGE_REQUEST_SOURCE_BRANCH_NAME)
       END_COMMIT=$(git rev-parse HEAD)
 
       git fetch origin main

From 7c350f5af0a13ef9ee01da4a5fb3e7376956972d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Tue, 14 Oct 2025 09:23:33 +0000
Subject: [PATCH 017/334] !4239 - ci: Check out dev for formatting
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 .gitlab/stages/02.test.yml | 2 +-
 tools/autoformat.sh        | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/.gitlab/stages/02.test.yml b/.gitlab/stages/02.test.yml
index 71f49f55055..513fe430c21 100644
--- a/.gitlab/stages/02.test.yml
+++ b/.gitlab/stages/02.test.yml
@@ -289,7 +289,7 @@ test:linting_formatting:
     - git fetch origin main:main
     - |
       if [[ "$CI_MERGE_REQUEST_PROJECT_PATH" == "$CI_MERGE_REQUEST_SOURCE_PROJECT_PATH" ]]; then 
-        bash tools/autoformat.sh
+        BASE_REF="$CI_MERGE_REQUEST_TARGET_BRANCH_NAME" bash tools/autoformat.sh
         set -e
         git fetch origin $CI_MERGE_REQUEST_SOURCE_BRANCH_NAME
         git checkout $CI_MERGE_REQUEST_SOURCE_BRANCH_NAME
diff --git a/tools/autoformat.sh b/tools/autoformat.sh
index 6c3e76b3eaa..85d1d19c7cb 100755
--- a/tools/autoformat.sh
+++ b/tools/autoformat.sh
@@ -15,6 +15,8 @@ CHECK_ONLY=${CHECK_ONLY:-false}
 SKIP_DOCS=${SKIP_DOCS:-false}
 
 BASE_REF=${BASE_REF:-main}
+git remote set-url origin "https://${GITLAB_ENDPOINT}/$CI_PROJECT_NAMESPACE/megatron-lm.git"
+git fetch origin ${BASE_REF}
 CHANGED_FILES=$(git diff --name-only --diff-filter=d --merge-base origin/${BASE_REF} megatron/core tests/ | grep '\.py$' || true)
 ADDITIONAL_ARGS=""
 ADDITIONAL_BLACK_ARGS=""

From 46687cdd8586aaa561d169a843db7848edf7e86a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Tue, 14 Oct 2025 17:56:25 +0000
Subject: [PATCH 018/334] ci: Fix formatting
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 .gitlab/stages/02.test.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.gitlab/stages/02.test.yml b/.gitlab/stages/02.test.yml
index 513fe430c21..34418612b92 100644
--- a/.gitlab/stages/02.test.yml
+++ b/.gitlab/stages/02.test.yml
@@ -287,6 +287,8 @@ test:linting_formatting:
       fi
     - set +e
     - git fetch origin main:main
+    - echo -e "machine gitlab-master.nvidia.com\n  login gitlab-ci-token\n  password $CI_JOB_TOKEN" >~/.netrc
+    - chmod 600 ~/.netrc"
     - |
       if [[ "$CI_MERGE_REQUEST_PROJECT_PATH" == "$CI_MERGE_REQUEST_SOURCE_PROJECT_PATH" ]]; then 
         BASE_REF="$CI_MERGE_REQUEST_TARGET_BRANCH_NAME" bash tools/autoformat.sh

From 50ed5eb1021a65b6de7b636ae84acd176e8319a2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Tue, 14 Oct 2025 18:15:02 +0000
Subject: [PATCH 019/334] ci: Fix linting
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 .gitlab/stages/02.test.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.gitlab/stages/02.test.yml b/.gitlab/stages/02.test.yml
index 34418612b92..358ad740e01 100644
--- a/.gitlab/stages/02.test.yml
+++ b/.gitlab/stages/02.test.yml
@@ -288,7 +288,7 @@ test:linting_formatting:
     - set +e
     - git fetch origin main:main
     - echo -e "machine gitlab-master.nvidia.com\n  login gitlab-ci-token\n  password $CI_JOB_TOKEN" >~/.netrc
-    - chmod 600 ~/.netrc"
+    - chmod 600 ~/.netrc
     - |
       if [[ "$CI_MERGE_REQUEST_PROJECT_PATH" == "$CI_MERGE_REQUEST_SOURCE_PROJECT_PATH" ]]; then 
         BASE_REF="$CI_MERGE_REQUEST_TARGET_BRANCH_NAME" bash tools/autoformat.sh

From b01ad5b0361082e447dbe7b9b9764dc3411e059a Mon Sep 17 00:00:00 2001
From: Li Tao <lit@nvidia.com>
Date: Tue, 14 Oct 2025 11:15:08 -0700
Subject: [PATCH 020/334] ADLR/megatron-lm!4225 - [Dev][NVFP4][MOE] Proper
 NVFP4 Zero Padding for MOE

Co-authored-by: Zhongbo Zhu <zhongboz@nvidia.com>
---
 megatron/core/fp4_utils.py                    | 14 ++++++
 megatron/core/transformer/moe/README.md       |  4 +-
 megatron/core/transformer/moe/experts.py      | 47 ++++++++++++-------
 .../core/transformer/moe/token_dispatcher.py  | 27 +++++++++--
 .../core/transformer/transformer_config.py    | 26 +++++++---
 megatron/training/arguments.py                | 11 +++--
 .../transformer/moe/test_token_dispatcher.py  | 12 ++---
 7 files changed, 100 insertions(+), 41 deletions(-)

diff --git a/megatron/core/fp4_utils.py b/megatron/core/fp4_utils.py
index eae4bf91de6..eb02a4796b0 100644
--- a/megatron/core/fp4_utils.py
+++ b/megatron/core/fp4_utils.py
@@ -47,6 +47,20 @@ def is_nvfp4tensor(tensor: torch.Tensor) -> bool:
     return HAVE_TE_FP4_TENSOR_CLASS and isinstance(tensor, FP4_TENSOR_CLASS)
 
 
+def get_fp4_align_size(fp4_recipe: Fp4Recipe) -> int:
+    """
+    Get the alignment size required for FP4 GEMM.
+    FP4 GEMM requires Blackwell and later architectures.
+
+    The value 32 is a hardware requirement: TMA (Tensor Memory Accelerator) requires
+    a 16-byte aligned address for efficient memory access. Since FP4 uses 4 bits per value,
+    16 bytes (128 bits) corresponds to 32 FP4 values. Therefore, the alignment size for FP4
+    is 32. With this alignment, NVFP4 GEMM can be performed efficiently.
+    """
+    # pylint: disable=unused-argument
+    return 32
+
+
 def dequantize_fp4_tensor(fp4_tensor: torch.Tensor) -> torch.Tensor:
     """Dequantize a fp4 tensor to a higher precision tensor."""
     if is_te_min_version("2.7.0.dev0"):
diff --git a/megatron/core/transformer/moe/README.md b/megatron/core/transformer/moe/README.md
index c7c22201404..56be6fc2463 100644
--- a/megatron/core/transformer/moe/README.md
+++ b/megatron/core/transformer/moe/README.md
@@ -235,7 +235,7 @@ Enable A2A overlap across different batches inspired by the DSv3 DualPipe implme
 | --moe-router-fusion | Enable fusion for MoE TopK routing and aux-loss computation. This is only supported in TransformerEngine 2.7.0 and above. |
 | --moe-router-bias-update-rate | The expert bias is updated based on the number of assigned tokens to each expert in a global batch, where the bias is increased for experts with less assigned tokens and decreased for experts with more assigned tokens. Default is 1e-3 same as that used in DeepSeekV3. |
 | --moe-router-force-load-balancing | (Experimental) Force override routing to balance token distribution using random logits for MoE routers, supporting naive top-k and group-limited top-k. This experimental feature is for benchmarking purposes only! |
-| --moe-router-padding-for-fp8 | Pad the routing_map to make sure the number of tokens each expert received is a multiple of 16/32 for FP8 precision. It is suggested to enable this for dropless training with FP8 precision when num_local_experts > 1. This is a more efficient way to pad for FP8 which eliminates the explicit padding in the GroupedMLP layer. |
+| --moe-router-padding-for-quantization | Pad the routing_map to make sure the number of tokens each expert received is a multiple of 16/32 for FP8/FP4 precision. It is suggested to enable this for dropless training with FP8 precision when num_local_experts > 1. This is a more efficient way to pad for FP8 which eliminates the explicit padding in the GroupedMLP layer. |
 | --moe-aux-loss-coeff | Scaling coefficient for the aux loss: a starting value of 1e-2 is recommended. Default is 0.0. |
 | --moe-z-loss-coeff | Scaling coefficient for the z-loss: a starting value of 1e-3 is recommended. Default is None. |
 | --moe-input-jitter-eps | Add noise to the input tensor by applying jitter with a specified epsilon value. Default is None. |
@@ -464,7 +464,7 @@ Therefore, there are two recommended ways during the first 200 steps to avoid th
 
 **FP8 Training Best Practice**
 - Using latest version of [TransformerEngine](https://github.com/NVIDIA/TransformerEngine).
-- Enable router padding with `--moe-router-padding-for-fp8` to reduce padding overhead.
+- Enable router padding with `--moe-router-padding-for-quantization` to reduce padding overhead.
 - Enable native FP8 weights with `--fp8-param-gather` to reduce weights memory cost.
 
 ### Reference Best Parallel Mapping
diff --git a/megatron/core/transformer/moe/experts.py b/megatron/core/transformer/moe/experts.py
index d8dd3d03f02..e73864a50fa 100644
--- a/megatron/core/transformer/moe/experts.py
+++ b/megatron/core/transformer/moe/experts.py
@@ -21,6 +21,7 @@
     ShardedTensorFactory,
 )
 from megatron.core.dist_checkpointing.utils import replace_prefix_for_sharding
+from megatron.core.fp4_utils import get_fp4_align_size
 from megatron.core.fp8_utils import get_fp8_align_size
 from megatron.core.fusions.fused_bias_geglu import quick_gelu, weighted_bias_quick_geglu_impl
 from megatron.core.fusions.fused_bias_swiglu import weighted_bias_swiglu_impl
@@ -134,8 +135,10 @@ def glu(x):
             self.config.recompute_granularity == 'selective'
             and "moe_act" in self.config.recompute_modules
         )
-        if self.activation_recompute and self.config.fp8:
-            raise ValueError("moe_act recompute for fp8 cannot work with the legacy GroupedMLP.")
+        if self.activation_recompute and (self.config.fp8 or self.config.fp4):
+            raise ValueError(
+                "moe_act recompute for fp8 or fp4 cannot work with the legacy GroupedMLP."
+            )
 
         @jit_fuser
         def activation_func_with_probs(x, probs):
@@ -809,15 +812,15 @@ def __init__(
             self.config.recompute_granularity == 'selective'
             and "moe_act" in self.config.recompute_modules
         )
-        if self.activation_recompute and self.config.fp8:
+        if self.activation_recompute and (self.config.fp8 or self.config.fp4):
             from megatron.core.extensions.transformer_engine import set_save_original_input
 
             set_save_original_input(self.linear_fc2)
 
-        if self.config.fp8:
-            assert HAVE_TE, "FP8 requires TE."
-            self.fp8_padding = Fp8Padding(self.num_local_experts)
-            self.fp8_unpadding = Fp8Unpadding(self.num_local_experts)
+        if self.config.fp8 or self.config.fp4:
+            assert HAVE_TE, "FP8 and FP4 requires TE."
+            self.quantization_padding = Fp8Padding(self.num_local_experts)
+            self.quantization_unpadding = Fp8Unpadding(self.num_local_experts)
 
     @staticmethod
     def _apply_bias(intermediate_parallel, bias_parallel, tokens_per_expert, permuted_probs):
@@ -857,12 +860,12 @@ def forward(
             output (torch.Tensor): The output of the local experts.
         """
         tokens_per_expert = tokens_per_expert.tolist()
-        if self.config.fp8:
+        if self.config.fp8 or self.config.fp4:
             actual_tokens_per_expert = tokens_per_expert
-            permuted_local_hidden_states, tokens_per_expert = self.fp8_padding(
+            permuted_local_hidden_states, tokens_per_expert = self.quantization_padding(
                 permuted_local_hidden_states, tokens_per_expert
             )
-            permuted_probs, _ = self.fp8_padding(
+            permuted_probs, _ = self.quantization_padding(
                 permuted_probs.unsqueeze(-1), actual_tokens_per_expert
             )
         else:
@@ -954,8 +957,8 @@ def glu(x):
             output, output_bias = self.linear_fc2(intermediate_parallel, tokens_per_expert)
 
         # upad and concat the output
-        if self.config.fp8:
-            output = self.fp8_unpadding(output, actual_tokens_per_expert)
+        if self.config.fp8 or self.config.fp4:
+            output = self.quantization_unpadding(output, actual_tokens_per_expert)
 
         output = self._apply_bias(output, output_bias, tokens_per_expert, permuted_probs)
         output_bias = None
@@ -1051,10 +1054,18 @@ def __init__(
             )
             self.local_experts.append(expert)
 
-    def _pad_tensor_for_fp8(self, hidden, probs):
+    def _get_align_size_for_quantization(self):
+        """Get the alignment size for quantization."""
+        if self.config.fp8:
+            return get_fp8_align_size(self.config.fp8_recipe)
+        elif self.config.fp4:
+            return get_fp4_align_size(self.config.fp4_recipe)
+        return 16
+
+    def _pad_tensor_for_quantization(self, hidden, probs):
         """Padding tensor shape to multiples of 16/32."""
         actual_num_tokens = hidden.shape[0]
-        divisor = get_fp8_align_size(self.config.fp8_recipe)
+        divisor = self._get_align_size_for_quantization()
         padded_num_tokens = ceil(actual_num_tokens / divisor) * divisor - actual_num_tokens
         if padded_num_tokens > 0:
             pad_tensor = torch.zeros(
@@ -1086,8 +1097,8 @@ def forward(
             permuted_probs = torch.ones_like(permuted_probs)
 
         if self.num_local_experts == 1:
-            if self.config.fp8:
-                hidden, probs = self._pad_tensor_for_fp8(
+            if self.config.fp8 or self.config.fp4:
+                hidden, probs = self._pad_tensor_for_quantization(
                     permuted_local_hidden_states, permuted_probs
                 )
                 output, output_bias = self.local_experts[0](hidden, probs)
@@ -1106,8 +1117,8 @@ def forward(
             output_local_list = []
 
             for expert, tokens, probs in zip(self.local_experts, tokens_list, probs_list):
-                if self.config.fp8:
-                    hidden, probs = self._pad_tensor_for_fp8(tokens, probs)
+                if self.config.fp8 or self.config.fp4:
+                    hidden, probs = self._pad_tensor_for_quantization(tokens, probs)
                     output, output_bias = expert(hidden, probs)
                     output = output[: tokens.shape[0]]
                 else:
diff --git a/megatron/core/transformer/moe/token_dispatcher.py b/megatron/core/transformer/moe/token_dispatcher.py
index ec64d1887a1..142aa74a19e 100644
--- a/megatron/core/transformer/moe/token_dispatcher.py
+++ b/megatron/core/transformer/moe/token_dispatcher.py
@@ -8,6 +8,7 @@
 
 from megatron.core import utils
 from megatron.core.config import is_experimental_enabled
+from megatron.core.fp4_utils import get_fp4_align_size
 from megatron.core.fp8_utils import get_fp8_align_size
 from megatron.core.fusions.fused_indices_converter import fused_indices_to_multihot
 from megatron.core.fusions.fused_pad_routing_map import fused_pad_routing_map
@@ -195,6 +196,14 @@ def set_shared_experts(self, shared_experts):
         assert self.config.moe_shared_expert_overlap
         self.shared_experts = shared_experts
 
+    def get_align_size_for_quantization(self):
+        """Get the alignment size for quantization."""
+        if self.config.fp8:
+            return get_fp8_align_size(self.config.fp8_recipe)
+        elif self.config.fp4:
+            return get_fp4_align_size(self.config.fp4_recipe)
+        return 16
+
 
 class MoEAllGatherTokenDispatcher(MoETokenDispatcher):
     """
@@ -474,7 +483,7 @@ def preprocess(self, routing_map: torch.Tensor) -> torch.Tensor:
 
         if (
             self.config.moe_expert_capacity_factor is not None
-            or self.config.moe_router_padding_for_fp8
+            or self.config.moe_router_padding_for_quantization
         ):
             # When using token dropping or router padding, output size is dynamic.
             # Need to sync output size GPU->CPU before allocating output buffer
@@ -576,8 +585,8 @@ def dispatch_preprocess(
         assert routing_map.dtype == torch.bool, "Expected bool tensor for mask"
         hidden_states = hidden_states.view(-1, self.hidden_shape[-1])
 
-        if self.config.moe_router_padding_for_fp8:
-            pad_multiple = get_fp8_align_size(self.config.fp8_recipe)
+        if self.config.moe_router_padding_for_quantization:
+            pad_multiple = self.get_align_size_for_quantization()
             if is_experimental_enabled() and self.config.moe_permute_fusion:
                 self.routing_map = fused_pad_routing_map(self.routing_map, pad_multiple)
             else:
@@ -1075,7 +1084,7 @@ def _pad_routing_map(
         """
         Pad the routing map to the nearest multiple of the pad_multiple.
         """
-        pad_multiple = get_fp8_align_size(self.config.fp8_recipe)
+        pad_multiple = self.get_align_size_for_quantization()
 
         num_input_tokens = routing_map.shape[0]
         target_tokens_per_expert = (
@@ -1110,7 +1119,7 @@ def get_permuted_hidden_states_by_experts(self, hidden_states: torch.Tensor) ->
             self.dispatched_routing_map, self.dispatched_probs = self._indices_to_multihot(
                 self.dispatched_indices, self.dispatched_probs
             )
-        if self.config.moe_router_padding_for_fp8:
+        if self.config.moe_router_padding_for_quantization:
             self.dispatched_routing_map, self.tokens_per_expert = self._pad_routing_map(
                 self.dispatched_routing_map, self.tokens_per_expert
             )
@@ -1138,6 +1147,14 @@ def get_restored_hidden_states_by_experts(self, hidden_states: torch.Tensor) ->
         )
         return hidden_states
 
+    def get_align_size_for_quantization(self):
+        """Get the alignment size for quantization."""
+        if self.config.fp8:
+            return get_fp8_align_size(self.config.fp8_recipe)
+        elif self.config.fp4:
+            return get_fp4_align_size(self.config.fp4_recipe)
+        return 16
+
 
 class MoEFlexTokenDispatcher(MoETokenDispatcher):
     """A flexible token dispatcher that abstracts the underlying tensor and expert
diff --git a/megatron/core/transformer/transformer_config.py b/megatron/core/transformer/transformer_config.py
index dc11239836f..8b36425ca2a 100644
--- a/megatron/core/transformer/transformer_config.py
+++ b/megatron/core/transformer/transformer_config.py
@@ -497,10 +497,14 @@ class TransformerConfig(ModelParallelConfig):
     DEPRECATED and replaced by moe_router_num_groups and moe_router_group_topk.
     """
 
+    moe_router_padding_for_quantization: Optional[bool] = False
+    """Whether to pad the routing_map to make sure the number of tokens each expert receives
+    is a multiple of 16/32 for quantized precision (e.g., FP8, FP4). This can remove the explicit
+    padding in the GroupedMLP layer."""
+
     moe_router_padding_for_fp8: Optional[bool] = False
-    """Whether to pad the routing_map to make sure the number of tokens each expert received
-    is a multiple of 16/32 for FP8 precision. This can remove the explicit padding in the
-    GroupedMLP layer."""
+    """[Compatibility alias for moe_router_padding_for_quantization]
+    Enabling this will also enable moe_router_padding_for_quantization."""
 
     moe_router_num_groups: Optional[int] = None
     """Number of groups to divide experts into for group-limited routing.
@@ -1389,13 +1393,23 @@ def __post_init__(self):
                 )
 
         if self.moe_router_padding_for_fp8:
-            if self.fp8 is None:
-                raise ValueError("fp8 must be specified when moe_router_padding_for_fp8 is True.")
+            # enable moe_router_padding_for_quantization
+            warnings.warn(
+                "--moe-router-padding-for-fp8 is going to be deprecated. "
+                "Use --moe-router-padding-for-quantization instead."
+            )
+            self.moe_router_padding_for_quantization = True
+
+        if self.moe_router_padding_for_quantization:
+            if self.fp8 is None and self.fp4 is None:
+                raise ValueError(
+                    "fp8/fp4 must be specified when moe_router_padding_for_quantization is True."
+                )
 
             if self.moe_token_dispatcher_type in ["allgather", "alltoall_seq"]:
                 raise ValueError(
                     "allgather and alltoall_seq dispatcher does not support "
-                    "moe_router_padding_for_fp8."
+                    "moe_router_padding_for_quantization."
                 )
 
         if (
diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py
index 29db36ca6e0..905538ffc9e 100644
--- a/megatron/training/arguments.py
+++ b/megatron/training/arguments.py
@@ -3129,12 +3129,15 @@ def _add_moe_args(parser):
                        'The default value 1e-3 is same as that used in DeepSeekV3.')
     group.add_argument('--moe-router-force-load-balancing', action='store_true',
                        help='[Experimental] Force override routing to balance token distribution using random logits for MoE routers, supporting naive top-k and group-limited top-k. This experimental feature is for benchmarking purposes only!')
-    group.add_argument('--moe-router-padding-for-fp8', action='store_true',
+    group.add_argument('--moe-router-padding-for-quantization', action='store_true',
                        help='Pad the routing_map to make sure the number of tokens each expert received '
-                       'is a multiple of 16/32 for FP8 precision. It is suggested to enable this for '
-                       'dropless training with FP8 precision when num_local_experts > 1. This is a more '
-                       'efficient way to pad for FP8 which eliminates the explicit padding in the '
+                       'is a multiple of 16/32 for FP8/FP4 precision. It is suggested to enable this for '
+                       'dropless training with FP8/FP4 precision when num_local_experts > 1. This is a more '
+                       'efficient way to pad for FP8/FP4 which eliminates the explicit padding in the '
                        'GroupedMLP layer.')
+    group.add_argument('--moe-router-padding-for-fp8', action='store_true',
+                       help='[Compatibility alias for --moe-router-padding-for-quantization] '
+                       'Enabling this will also enable --moe-router-padding-for-quantization.')
     group.add_argument('--moe-aux-loss-coeff', type=float, nargs='+', default=0.0,
                        help='Scaling coefficient for the aux loss: a starting value of 1e-2 is recommended.')
     group.add_argument('--moe-z-loss-coeff', type=float, default=None,
diff --git a/tests/unit_tests/transformer/moe/test_token_dispatcher.py b/tests/unit_tests/transformer/moe/test_token_dispatcher.py
index 328b8837790..82138bc637d 100644
--- a/tests/unit_tests/transformer/moe/test_token_dispatcher.py
+++ b/tests/unit_tests/transformer/moe/test_token_dispatcher.py
@@ -280,15 +280,15 @@ def dispatcher_router_padding_for_fp8_test(self):
         """Test if the routing map is padded correctly for FP8 training.
 
         The test runs the forward flow twice:
-        1. First with moe_router_padding_for_fp8=False
-        2. Then with moe_router_padding_for_fp8=True
+        1. First with moe_router_padding_for_quantization=False
+        2. Then with moe_router_padding_for_quantization=True
 
         We verify that:
         1. The results are the same in both cases
         2. The number of tokens received by each expert is padded to a multiple of 16
         """
-        # First run with moe_router_padding_for_fp8 = False
-        moe_layer = self.new_moe_layer(moe_router_padding_for_fp8=False)
+        # First run with moe_router_padding_for_quantization = False
+        moe_layer = self.new_moe_layer(moe_router_padding_for_quantization=False)
 
         num_tokens = 32
         hidden_states = torch.randn(
@@ -309,8 +309,8 @@ def dispatcher_router_padding_for_fp8_test(self):
         grad_1 = hidden_states.grad.clone()
         hidden_states.grad = None
 
-        # Run with moe_router_padding_for_fp8 = True
-        moe_layer_2 = self.new_moe_layer(moe_router_padding_for_fp8=True, fp8="hybrid")
+        # Run with moe_router_padding_for_quantization = True
+        moe_layer_2 = self.new_moe_layer(moe_router_padding_for_quantization=True, fp8="hybrid")
         moe_layer_2.load_state_dict(moe_layer.state_dict())
 
         probs_2, indices_2 = moe_layer_2.router(hidden_states)

From 061bc3765ab6132f9caa0203c7fe7227bc4f5c48 Mon Sep 17 00:00:00 2001
From: Hao Wu <skyw@nvidia.com>
Date: Tue, 14 Oct 2025 18:48:17 -0700
Subject: [PATCH 021/334] ADLR/megatron-lm!4248 - ADLR/megatron-lm!4159 - Fix
 ProcessGroupCollection missing initialization

---
 megatron/core/optimizer/__init__.py           |  11 +-
 megatron/core/parallel_state.py               |  22 ++--
 megatron/core/process_groups_config.py        | 103 ++++++++++++++----
 tests/unit_tests/test_optimizer.py            |  17 ++-
 .../unit_tests/test_process_groups_config.py  |  33 ++++++
 .../test_transformer_block_custom_pgs.py      |  11 +-
 6 files changed, 161 insertions(+), 36 deletions(-)

diff --git a/megatron/core/optimizer/__init__.py b/megatron/core/optimizer/__init__.py
index c644160cda7..307538fad22 100644
--- a/megatron/core/optimizer/__init__.py
+++ b/megatron/core/optimizer/__init__.py
@@ -281,6 +281,7 @@ def _get_megatron_optimizer_based_on_param_groups(
     data_parallel_group: Optional[torch.distributed.ProcessGroup] = None,
     data_parallel_group_gloo: Optional[torch.distributed.ProcessGroup] = None,
     data_parallel_group_idx: Optional[int] = None,
+    intra_dist_opt_group: Optional[torch.distributed.ProcessGroup] = None,
     distributed_optimizer_instance_id: Optional[int] = 0,
 ) -> MegatronOptimizer:
     """Get Megatron optimizer based on parameter groups.
@@ -459,11 +460,7 @@ def init_state_fn(opt, config=None):
             # This is needed for case where num_distributed_optimizer_instances > 1. In this case,
             # weight gradients are all-reduced across optimizer instances, so each instance has
             # the duplicated weight gradients, need to reduce gradient stats inside each instance.
-            setattr(
-                optimizer,
-                'grad_stats_parallel_group',
-                parallel_state.get_intra_distributed_optimizer_instance_group(),
-            )
+            setattr(optimizer, 'grad_stats_parallel_group', intra_dist_opt_group)
         else:
             optimizer = Float16OptimizerWithFloat16Params(*optimizer_args)
             setattr(optimizer, 'grad_stats_parallel_group', model_parallel_group)
@@ -532,6 +529,7 @@ def get_megatron_optimizer(
     expt_tp_pp_group = process_groups['expt_tp_pp_group']
     intra_dp_cp_group_gloo = process_groups['intra_dp_cp_group_gloo']
     intra_expt_dp_group_gloo = process_groups['intra_expt_dp_group_gloo']
+    intra_dist_opt_group = process_groups['intra_dist_opt_group']
 
     model_parallel_rank = get_pg_rank(mp_group)
 
@@ -570,6 +568,7 @@ def get_megatron_optimizer(
                     data_parallel_group=dp_cp_group,
                     data_parallel_group_gloo=intra_dp_cp_group_gloo,
                     data_parallel_group_idx=model_parallel_rank,
+                    intra_dist_opt_group=intra_dist_opt_group,
                     distributed_optimizer_instance_id=distributed_optimizer_instance_id,
                 )
             )
@@ -610,6 +609,7 @@ def get_megatron_optimizer(
                 data_parallel_group=intra_dp_cp_group,
                 data_parallel_group_gloo=intra_dp_cp_group_gloo,
                 data_parallel_group_idx=model_parallel_rank,
+                intra_dist_opt_group=intra_dist_opt_group,
                 distributed_optimizer_instance_id=distributed_optimizer_instance_id,
             )
         )
@@ -643,6 +643,7 @@ def get_megatron_optimizer(
                 data_parallel_group=intra_expt_dp_group,
                 data_parallel_group_gloo=expt_data_parallel_group_gloo,
                 data_parallel_group_idx=expt_model_parallel_rank,
+                intra_dist_opt_group=intra_dist_opt_group,
                 distributed_optimizer_instance_id=distributed_optimizer_instance_id,
             )
         )
diff --git a/megatron/core/parallel_state.py b/megatron/core/parallel_state.py
index be7eaf27ce4..1e41bf9d8c2 100644
--- a/megatron/core/parallel_state.py
+++ b/megatron/core/parallel_state.py
@@ -13,6 +13,8 @@
 
 from .utils import GlobalMemoryBuffer, is_torch_min_version
 
+logger = logging.getLogger(__name__)
+
 try:
     import einops
 
@@ -1892,23 +1894,25 @@ def get_expert_data_parallel_world_size(partial_expert_data_parallel=False):
         return 0
 
 
-def get_intra_distributed_optimizer_instance_group():
+def get_intra_distributed_optimizer_instance_group(check_initialized=True):
     """Get the group of all GPUs in a distributed optimizer instance."""
-    assert (
-        _INTRA_DISTRIBUTED_OPTIMIZER_INSTANCE_GROUP is not None
-    ), "Intra distributed optimizer instance group is not initialized"
+    if check_initialized:
+        assert (
+            _INTRA_DISTRIBUTED_OPTIMIZER_INSTANCE_GROUP is not None
+        ), "Intra distributed optimizer instance group is not initialized"
     return _INTRA_DISTRIBUTED_OPTIMIZER_INSTANCE_GROUP
 
 
-def get_inter_distributed_optimizer_instance_group():
+def get_inter_distributed_optimizer_instance_group(check_initialized=True):
     """Get the group spanning the different distributed optimizer instances.
     Attention and MLP/Expert share same inter-instance group, so only built
     inter_partial_expert_data_parallel_group, and return it at here.
     """
-    assert _INTER_PARTIAL_EXPERT_DATA_PARALLEL_GROUP is not None, (
-        "Attention and MLP/Expert share same inter distributed optimize instance group, "
-        "which has not been initialized"
-    )
+    if check_initialized:
+        assert _INTER_PARTIAL_EXPERT_DATA_PARALLEL_GROUP is not None, (
+            "Attention and MLP/Expert share same inter distributed optimize instance group, "
+            "which has not been initialized"
+        )
     return _INTER_PARTIAL_EXPERT_DATA_PARALLEL_GROUP
 
 
diff --git a/megatron/core/process_groups_config.py b/megatron/core/process_groups_config.py
index 989a31b6f33..07c922ea685 100644
--- a/megatron/core/process_groups_config.py
+++ b/megatron/core/process_groups_config.py
@@ -127,9 +127,12 @@ class ProcessGroupCollection:
     # _INTRA_EXPERT_DATA_PARALLEL_GROUP
     intra_expt_dp: torch.distributed.ProcessGroup = field(init=False)
 
-    # _INTER_DISTRIBUTED_OPTIMIZER_INSTANCE_GROUP
+    # _INTER_PARTIAL_EXPERT_DATA_PARALLEL_GROUP
     inter_dist_opt: torch.distributed.ProcessGroup = field(init=False)
 
+    # _INTRA_DISTRIBUTED_OPTIMIZER_INSTANCE_GROUP
+    intra_dist_opt: torch.distributed.ProcessGroup = field(init=False)
+
     def __init__(self, **kwargs):
         for key in kwargs:
             if key in [field.name for field in fields(self)]:
@@ -161,29 +164,71 @@ def use_mpu_process_groups(cls, required_pgs: Optional[List[str]] = None):
 
         # Mapping of attribute names to their initialization functions
         pg_to_func = {
-            'tp': parallel_state.get_tensor_model_parallel_group,
-            'pp': parallel_state.get_pipeline_model_parallel_group,
-            'mp': parallel_state.get_model_parallel_group,
-            'cp': parallel_state.get_context_parallel_group,
-            'tp_cp': parallel_state.get_tensor_and_context_parallel_group,
-            'hcp': parallel_state.get_hierarchical_context_parallel_groups,
-            'ep': parallel_state.get_expert_model_parallel_group,
-            'expt_tp': parallel_state.get_expert_tensor_parallel_group,
-            'tp_ep': parallel_state.get_expert_tensor_and_model_parallel_group,
-            'tp_ep_pp': parallel_state.get_expert_tensor_model_pipeline_parallel_group,
-            'embd': parallel_state.get_embedding_group,
-            'pos_embd': parallel_state.get_position_embedding_group,
+            'tp': partial(parallel_state.get_tensor_model_parallel_group, check_initialized=False),
+            'pp': partial(
+                parallel_state.get_pipeline_model_parallel_group, check_initialized=False
+            ),
+            'mp': partial(parallel_state.get_model_parallel_group, check_initialized=False),
+            'cp': partial(parallel_state.get_context_parallel_group, check_initialized=False),
+            'tp_cp': partial(
+                parallel_state.get_tensor_and_context_parallel_group, check_initialized=False
+            ),
+            'hcp': partial(
+                parallel_state.get_hierarchical_context_parallel_groups, check_initialized=False
+            ),
+            'ep': partial(parallel_state.get_expert_model_parallel_group, check_initialized=False),
+            'expt_tp': partial(
+                parallel_state.get_expert_tensor_parallel_group, check_initialized=False
+            ),
+            'tp_ep': partial(
+                parallel_state.get_expert_tensor_and_model_parallel_group, check_initialized=False
+            ),
+            'tp_ep_pp': partial(
+                parallel_state.get_expert_tensor_model_pipeline_parallel_group,
+                check_initialized=False,
+            ),
+            'embd': partial(parallel_state.get_embedding_group, check_initialized=False),
+            'pos_embd': partial(
+                parallel_state.get_position_embedding_group, check_initialized=False
+            ),
+            'dp': parallel_state.get_data_parallel_group,
+            'dp_cp': partial(parallel_state.get_data_parallel_group, with_context_parallel=True),
+            'intra_dp_cp': partial(
+                parallel_state.get_data_parallel_group,
+                with_context_parallel=True,
+                partial_data_parallel=True,
+            ),
+            'intra_expt_dp': partial(
+                parallel_state.get_expert_data_parallel_group,
+                check_initialized=False,
+                partial_expert_data_parallel=True,
+            ),
+            'inter_dist_opt': partial(
+                parallel_state.get_inter_distributed_optimizer_instance_group,
+                check_initialized=False,
+            ),
+            'intra_dist_opt': partial(
+                parallel_state.get_intra_distributed_optimizer_instance_group,
+                check_initialized=False,
+            ),
             # TODO (Hepteract): remove this once distributed checkpoint is refactored
-            'expt_dp': parallel_state.get_expert_data_parallel_group,
+            'expt_dp': partial(
+                parallel_state.get_expert_data_parallel_group, check_initialized=False
+            ),
             'tp_dp_cp': partial(
-                parallel_state.get_tensor_and_data_parallel_group, with_context_parallel=True
+                parallel_state.get_tensor_and_data_parallel_group,
+                check_initialized=False,
+                with_context_parallel=True,
             ),
         }
 
+        assert all(
+            pg in pg_to_func for pg in required_pgs
+        ), f"Initialization function for process group not defined for all \
+        ProcessGroupCollection fields"
+
         # Build initialization dict by calling appropriate parallel_state get_foo_group
-        init_dict = {
-            pg: pg_to_func[pg](check_initialized=False) for pg in required_pgs if pg in pg_to_func
-        }
+        init_dict = {pg: pg_to_func[pg]() for pg in required_pgs}
 
         return cls(**init_dict)
 
@@ -212,6 +257,7 @@ def setup_process_groups_for_optimizer(
                 - mp_group: Model parallel group
                 - expt_tp_pp_group: Expert tensor-model-pipeline parallel group
                 - inter_dist_opt_group: Inter distributed optimizer group (may be None)
+                - intra_dist_opt_group: Intra distributed optimizer group (may be None)
                 - intra_dp_cp_group_gloo: Gloo version of intra_dp_cp_group (may be None)
                 - intra_expt_dp_group_gloo: Gloo version of intra_expt_dp_group (may be None)
         """
@@ -233,6 +279,7 @@ def setup_process_groups_for_optimizer(
             intra_expt_dp_group = parallel_state.get_expert_data_parallel_group(
                 partial_expert_data_parallel=True
             )
+            intra_dist_opt_group = parallel_state.get_intra_distributed_optimizer_instance_group()
 
             # Gloo groups
             if use_gloo_process_groups:
@@ -310,20 +357,32 @@ def setup_process_groups_for_optimizer(
                         hasattr(pg_collection, 'intra_dp_cp')
                         and hasattr(pg_collection, 'intra_expt_dp')
                         and hasattr(pg_collection, 'inter_dist_opt')
+                        and hasattr(pg_collection, 'intra_dist_opt')
                     ):
                         raise ValueError(
-                            "intra_dp_cp, intra_expt_dp, and inter_dist_opt "
+                            "intra_dp_cp, intra_expt_dp, inter_dist_opt, and intra_dist_opt "
                             "process groups are required when using multiple optimizer "
                             "instances (>1) but not provided in pg_collection"
                         )
                     intra_dp_cp_group = pg_collection.intra_dp_cp
                     intra_expt_dp_group = pg_collection.intra_expt_dp
                     inter_dist_opt_group = pg_collection.inter_dist_opt
+
+                if ddp_config.use_distributed_optimizer:
+                    if not hasattr(pg_collection, 'intra_dist_opt'):
+                        raise ValueError(
+                            "intra_dist_opt process group is required but not provided in "
+                            "pg_collection. Please explicitly set it to None if you don't need it."
+                        )
+                    intra_dist_opt_group = pg_collection.intra_dist_opt
+                else:
+                    intra_dist_opt_group = None
             else:
                 # No ddp_config available - use simple fallback
                 intra_dp_cp_group = dp_cp_group
                 intra_expt_dp_group = expt_dp_group
                 inter_dist_opt_group = None
+                intra_dist_opt_group = None
 
             # 5. Model communication groups
             if not hasattr(pg_collection, 'mp'):
@@ -359,6 +418,7 @@ def setup_process_groups_for_optimizer(
             'mp_group': mp_group,
             'expt_tp_pp_group': expt_tp_pp_group,
             'inter_dist_opt_group': inter_dist_opt_group,
+            'intra_dist_opt_group': intra_dist_opt_group,
             'intra_dp_cp_group_gloo': intra_dp_cp_group_gloo,
             'intra_expt_dp_group_gloo': intra_expt_dp_group_gloo,
         }
@@ -411,6 +471,11 @@ def setup_process_groups_for_ddp(
                     if ddp_config.num_distributed_optimizer_instances > 1
                     else None
                 ),
+                'intra_dist_opt_group': (
+                    parallel_state.get_intra_distributed_optimizer_instance_group()
+                    if ddp_config.use_distributed_optimizer
+                    else None
+                ),
             }
         else:
             # Use provided process group collection with validation and fallbacks
diff --git a/tests/unit_tests/test_optimizer.py b/tests/unit_tests/test_optimizer.py
index 35969565a18..d8f6e3a2eeb 100644
--- a/tests/unit_tests/test_optimizer.py
+++ b/tests/unit_tests/test_optimizer.py
@@ -420,10 +420,16 @@ def test_get_megatron_optimizer_with_custom_process_groups(world_size, tp_size,
     mp_mesh = device_mesh["pp", "tp"]
     mp_group = mp_mesh._flatten().get_group()
 
+    # Create intra_dist_opt group
+    # It has the same ranks as dp_cp group when num_distributed_optimizer_instances is not > 1
+    intra_dist_opt_mesh = device_mesh["dp", "cp"]
+    intra_dist_opt_group = intra_dist_opt_mesh._flatten().get_group()
+
     # Create process group configurations
     pg_collection = ProcessGroupCollection()
     pg_collection.dp = dp_group
     pg_collection.dp_cp = dp_cp_group
+    pg_collection.intra_dist_opt = intra_dist_opt_group
     pg_collection.expt_dp = None  # Not using expert parallelism in this test
 
     pg_collection.tp = tp_group
@@ -547,12 +553,19 @@ def test_get_megatron_optimizer_custom_process_groups_validation():
             pg_collection=pg_collection_no_expt_dp,
         )
 
-    # Test 4: Missing mp attribute in pg_collection
+    # Test 4: Missing intra_dist_opt and mp attribute in pg_collection
     pg_collection_complete = ProcessGroupCollection()
     pg_collection_complete.dp = torch.distributed.new_group()
     pg_collection_complete.expt_dp = None  # Explicitly set to None as allowed
-    # Missing required 'mp' attribute
 
+    # Missing required 'intra_dist_opt' attribute
+    with pytest.raises(ValueError, match="intra_dist_opt process group is required"):
+        get_megatron_optimizer(
+            config=optimizer_config, model_chunks=model_chunks, pg_collection=pg_collection_complete
+        )
+
+    pg_collection_complete.intra_dist_opt = None  # Explicitly set to None as allowed
+    # Missing required 'mp' attribute
     with pytest.raises(ValueError, match="mp process group is required"):
         get_megatron_optimizer(
             config=optimizer_config, model_chunks=model_chunks, pg_collection=pg_collection_complete
diff --git a/tests/unit_tests/test_process_groups_config.py b/tests/unit_tests/test_process_groups_config.py
index 0b7e886d61a..032de47e951 100644
--- a/tests/unit_tests/test_process_groups_config.py
+++ b/tests/unit_tests/test_process_groups_config.py
@@ -1,8 +1,10 @@
 # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
 
+import pytest
 import torch.distributed as dist
 
 from megatron.core.process_groups_config import ProcessGroupCollection
+from tests.unit_tests.test_utilities import Utils
 
 
 class TestProcessGroupsConfig:
@@ -64,3 +66,34 @@ def test_hierarchical_context_parallel_groups(self, mocker):
         assert len(model_pgs.hcp) == 2
         assert model_pgs.hcp[0] == mock_pg1
         assert model_pgs.hcp[1] == mock_pg2
+
+
+class TestPGConfigDefaultInitialization:
+
+    def setup_method(self, method):
+        Utils.initialize_model_parallel(1, 1)
+
+    def teardown_method(self, method):
+        Utils.destroy_model_parallel()
+
+    def test_default_initialization(self):
+        """Test default initialization of ProcessGroupCollection."""
+        # Create instance
+        model_pgs = ProcessGroupCollection.use_mpu_process_groups()
+
+        # Test that instance was created successfully
+        assert hasattr(model_pgs, 'tp')
+        assert hasattr(model_pgs, 'pp')
+        assert hasattr(model_pgs, 'dp')
+        assert hasattr(model_pgs, 'dp_cp')
+
+        # Test that only required process groups were initialized
+        model_pgs = ProcessGroupCollection.use_mpu_process_groups(['tp', 'pp', 'cp'])
+        assert hasattr(model_pgs, 'tp')
+        assert hasattr(model_pgs, 'pp')
+        assert hasattr(model_pgs, 'cp')
+        assert not hasattr(model_pgs, 'dp')
+
+        # Test that an error is raised if an invalid process group is requested
+        with pytest.raises(ValueError, match=r"Invalid process groups requested"):
+            model_pgs = ProcessGroupCollection.use_mpu_process_groups(['tp', 'pp', 'foo'])
diff --git a/tests/unit_tests/transformer/test_transformer_block_custom_pgs.py b/tests/unit_tests/transformer/test_transformer_block_custom_pgs.py
index e8d708db8aa..bb64efe7449 100644
--- a/tests/unit_tests/transformer/test_transformer_block_custom_pgs.py
+++ b/tests/unit_tests/transformer/test_transformer_block_custom_pgs.py
@@ -422,10 +422,19 @@ def test_fwd_bwd_pass_non_uniform_transformer_block(
 
         attn_pg_collection = ProcessGroupCollection(tp=attn_tp_group, cp=attn_cp_group)
         mlp_pg_collection = ProcessGroupCollection(tp=mlp_tp_group)
+        default_pg_collection = ProcessGroupCollection.use_mpu_process_groups(
+            required_pgs=['tp', 'pp', 'cp']
+        )
 
         # Get the layer spec with different process groups for attention and mlp
         hetro_layer_spec = _gpt_te_layer_spec_with_hetro_pgs(attn_pg_collection, mlp_pg_collection)
-        custom_block = TransformerBlock(transformer_config, hetro_layer_spec).cuda().bfloat16()
+        custom_block = (
+            TransformerBlock(
+                transformer_config, hetro_layer_spec, pg_collection=default_pg_collection
+            )
+            .cuda()
+            .bfloat16()
+        )
 
         sequence_length = 4096
         micro_batch_size = 2

From b007b91525b4f08ac25dc1dcc5a27d3f9854009a Mon Sep 17 00:00:00 2001
From: Dennis Liu <denliu@nvidia.com>
Date: Tue, 14 Oct 2025 20:23:09 -0700
Subject: [PATCH 022/334] ADLR/megatron-lm!4207 - Refactor dev functional
 tests.

---
 .../model_config.yaml                         |   2 +-
 .../model_config.yaml                         |   2 +-
 .../golden_values_dev_dgx_h100.json           | 110 ++--
 .../golden_values_dev_dgxh100_coreweave.json  | 500 +++++++++---------
 .../model_config.yaml                         |   5 +-
 .../model_config.yaml.tmp                     | 132 +++++
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../golden_values_dev_dgxh100_coreweave.json  | 344 ++++++++++++
 .../model_config.yaml                         |   5 +-
 .../golden_values_dev_dgxh100_coreweave.json  | 498 ++++++++---------
 .../model_config.yaml                         |   9 +-
 .../golden_values_dev_dgxh100_coreweave.json  | 344 ++++++++++++
 .../model_config.yaml                         |   5 +-
 tests/test_utils/recipes/bert.yaml            | 101 ----
 tests/test_utils/recipes/moe.yaml             |  70 +--
 tests/test_utils/recipes/t5.yaml              | 116 ----
 17 files changed, 1400 insertions(+), 851 deletions(-)
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/model_config.yaml.tmp
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_selective_recompute_experimental/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/test_utils/recipes/bert.yaml
 delete mode 100644 tests/test_utils/recipes/t5.yaml

diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/model_config.yaml
index 2354ecd7fd9..041d35cab11 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/model_config.yaml
@@ -57,4 +57,4 @@ MODEL_ARGS:
   --no-bias-gelu-fusion: true
   --log-memory-to-tensorboard: true
   --use-tp-pp-dp-mapping: true
-TEST_TYPE: regular
+TEST_TYPE: ckpt-resume
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/model_config.yaml
index 7c0a103200a..7f9613ba222 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/model_config.yaml
@@ -56,4 +56,4 @@ MODEL_ARGS:
   --disable-bias-linear: true
   --no-bias-gelu-fusion: true
   --log-memory-to-tensorboard: true
-TEST_TYPE: regular
+TEST_TYPE: ckpt-resume
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgx_h100.json
index 9816ef27d80..5f29261761b 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgx_h100.json
@@ -4,17 +4,17 @@
         "end_step": 50,
         "step_interval": 5,
         "values": {
-            "1": 11.04748,
-            "5": 9.53183,
-            "10": 9.0582,
-            "15": 8.04864,
-            "20": 7.90062,
-            "25": 7.67495,
-            "30": 7.64523,
-            "35": 7.21226,
-            "40": 7.54531,
-            "45": 7.1859,
-            "50": 7.03421
+            "1": 11.04737,
+            "5": 9.52647,
+            "10": 9.05826,
+            "15": 8.04442,
+            "20": 7.89153,
+            "25": 7.67197,
+            "30": 7.64284,
+            "35": 7.2114,
+            "40": 7.54179,
+            "45": 7.18472,
+            "50": 7.03329
         }
     },
     "num-zeros": {
@@ -22,17 +22,17 @@
         "end_step": 50,
         "step_interval": 5,
         "values": {
-            "1": 38802624.0,
-            "5": 256032528.0,
-            "10": 734802368.0,
-            "15": 733708032.0,
-            "20": 964047040.0,
-            "25": 827440640.0,
-            "30": 753621760.0,
-            "35": 721925632.0,
-            "40": 585270144.0,
-            "45": 511642912.0,
-            "50": 447736576.0
+            "1": 38802604.0,
+            "5": 252879712.0,
+            "10": 728514944.0,
+            "15": 711699968.0,
+            "20": 992357632.0,
+            "25": 884068160.0,
+            "30": 794514496.0,
+            "35": 712491648.0,
+            "40": 588410624.0,
+            "45": 521081920.0,
+            "50": 432013312.0
         }
     },
     "mem-allocated-bytes": {
@@ -58,17 +58,17 @@
         "end_step": 50,
         "step_interval": 5,
         "values": {
-            "1": 54207885312.0,
-            "5": 57055031296.0,
-            "10": 57055031296.0,
-            "15": 57055031296.0,
-            "20": 57055031296.0,
-            "25": 57055031296.0,
-            "30": 57055031296.0,
-            "35": 57055031296.0,
-            "40": 57055031296.0,
-            "45": 57055031296.0,
-            "50": 57221648384.0
+            "1": 22860046336.0,
+            "5": 25729300480.0,
+            "10": 25729300480.0,
+            "15": 25888860160.0,
+            "20": 25888860160.0,
+            "25": 25888860160.0,
+            "30": 25888860160.0,
+            "35": 25888860160.0,
+            "40": 26620856320.0,
+            "45": 26620856320.0,
+            "50": 26620856320.0
         }
     },
     "mtp_1 loss": {
@@ -76,17 +76,17 @@
         "end_step": 50,
         "step_interval": 5,
         "values": {
-            "1": 11.07654,
-            "5": 9.81153,
-            "10": 9.12699,
-            "15": 7.99246,
-            "20": 7.83056,
-            "25": 7.61672,
-            "30": 7.58819,
-            "35": 7.15342,
-            "40": 7.47463,
-            "45": 7.12042,
-            "50": 6.97381
+            "1": 11.07644,
+            "5": 9.81173,
+            "10": 9.12712,
+            "15": 7.99147,
+            "20": 7.82967,
+            "25": 7.61319,
+            "30": 7.58479,
+            "35": 7.15178,
+            "40": 7.47349,
+            "45": 7.12034,
+            "50": 6.97212
         }
     },
     "iteration-time": {
@@ -94,17 +94,17 @@
         "end_step": 50,
         "step_interval": 5,
         "values": {
-            "1": 50.25533,
-            "5": 2.27026,
-            "10": 1.07136,
-            "15": 1.14652,
-            "20": 1.0723,
-            "25": 1.07693,
-            "30": 1.05572,
-            "35": 1.06285,
-            "40": 1.06142,
-            "45": 1.07083,
-            "50": 1.07307
+            "1": 59.91943,
+            "5": 2.44769,
+            "10": 1.07968,
+            "15": 1.04699,
+            "20": 0.93032,
+            "25": 0.92301,
+            "30": 0.92916,
+            "35": 0.94157,
+            "40": 0.95917,
+            "45": 0.94382,
+            "50": 0.94866
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgxh100_coreweave.json
index 0f2637a9511..17dce39fb21 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgxh100_coreweave.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgxh100_coreweave.json
@@ -4,56 +4,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 11.04748,
-            "2": 11.03561,
-            "3": 9.58774,
-            "4": 9.25819,
-            "5": 9.53583,
-            "6": 9.8804,
-            "7": 9.48247,
-            "8": 8.93575,
-            "9": 8.65813,
-            "10": 9.0567,
-            "11": 8.49445,
-            "12": 8.52444,
-            "13": 8.45239,
-            "14": 7.97323,
-            "15": 8.0476,
-            "16": 8.07971,
-            "17": 8.09081,
-            "18": 7.76437,
-            "19": 8.14892,
-            "20": 7.89868,
-            "21": 7.59371,
-            "22": 7.54743,
-            "23": 7.43222,
-            "24": 7.4302,
-            "25": 7.67579,
-            "26": 7.06929,
-            "27": 7.62041,
-            "28": 7.32495,
-            "29": 7.49042,
-            "30": 7.64391,
-            "31": 7.39435,
-            "32": 7.58789,
-            "33": 7.64037,
-            "34": 7.69778,
-            "35": 7.20998,
-            "36": 7.08538,
-            "37": 7.42584,
-            "38": 7.18804,
-            "39": 7.55054,
-            "40": 7.54446,
-            "41": 7.49287,
-            "42": 7.24937,
-            "43": 7.23587,
-            "44": 7.41595,
-            "45": 7.18755,
-            "46": 6.89949,
-            "47": 7.29966,
-            "48": 7.14134,
-            "49": 7.58963,
-            "50": 7.03602
+            "1": 11.04737,
+            "2": 11.03581,
+            "3": 9.58839,
+            "4": 9.258,
+            "5": 9.52647,
+            "6": 9.907,
+            "7": 9.48764,
+            "8": 8.94128,
+            "9": 8.65518,
+            "10": 9.05826,
+            "11": 8.49585,
+            "12": 8.52509,
+            "13": 8.4535,
+            "14": 7.97148,
+            "15": 8.04442,
+            "16": 8.08093,
+            "17": 8.08585,
+            "18": 7.76263,
+            "19": 8.14979,
+            "20": 7.89153,
+            "21": 7.57836,
+            "22": 7.54353,
+            "23": 7.43311,
+            "24": 7.42342,
+            "25": 7.67197,
+            "26": 7.07162,
+            "27": 7.6134,
+            "28": 7.31484,
+            "29": 7.48975,
+            "30": 7.64284,
+            "31": 7.39141,
+            "32": 7.58528,
+            "33": 7.6358,
+            "34": 7.69534,
+            "35": 7.2114,
+            "36": 7.08322,
+            "37": 7.42539,
+            "38": 7.18849,
+            "39": 7.5489,
+            "40": 7.54179,
+            "41": 7.48887,
+            "42": 7.24738,
+            "43": 7.2341,
+            "44": 7.41462,
+            "45": 7.18472,
+            "46": 6.89672,
+            "47": 7.30005,
+            "48": 7.14262,
+            "49": 7.58803,
+            "50": 7.03329
         }
     },
     "num-zeros": {
@@ -61,56 +61,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 38802612.0,
-            "2": 38543592.0,
-            "3": 38739528.0,
-            "4": 279937824.0,
-            "5": 259189728.0,
-            "6": 271446400.0,
-            "7": 604773504.0,
-            "8": 768892544.0,
-            "9": 645824128.0,
-            "10": 744257088.0,
-            "11": 718888576.0,
-            "12": 746732544.0,
-            "13": 871990976.0,
-            "14": 821645632.0,
-            "15": 724250816.0,
-            "16": 932241472.0,
-            "17": 648958912.0,
-            "18": 649120000.0,
-            "19": 925992960.0,
-            "20": 989207936.0,
-            "21": 819324096.0,
-            "22": 736955072.0,
-            "23": 910497792.0,
-            "24": 876716672.0,
-            "25": 843170688.0,
-            "26": 809573824.0,
-            "27": 854086912.0,
-            "28": 802857664.0,
-            "29": 805523328.0,
-            "30": 775645184.0,
-            "31": 771754624.0,
-            "32": 749733696.0,
-            "33": 718385216.0,
-            "34": 724771200.0,
-            "35": 737655104.0,
-            "36": 690419968.0,
-            "37": 673203456.0,
-            "38": 627239552.0,
-            "39": 614047168.0,
-            "40": 607288512.0,
-            "41": 582590592.0,
-            "42": 548211200.0,
-            "43": 532740640.0,
-            "44": 554239168.0,
-            "45": 514790528.0,
-            "46": 350258560.0,
-            "47": 472420128.0,
-            "48": 453788736.0,
-            "49": 440597216.0,
-            "50": 303063296.0
+            "1": 38802604.0,
+            "2": 38543572.0,
+            "3": 38739364.0,
+            "4": 283087744.0,
+            "5": 252879712.0,
+            "6": 261986800.0,
+            "7": 595325120.0,
+            "8": 778328192.0,
+            "9": 667827904.0,
+            "10": 728514944.0,
+            "11": 718857664.0,
+            "12": 778200448.0,
+            "13": 884592256.0,
+            "14": 846830080.0,
+            "15": 711699968.0,
+            "16": 929099456.0,
+            "17": 718131072.0,
+            "18": 690071360.0,
+            "19": 944853824.0,
+            "20": 992357632.0,
+            "21": 794133440.0,
+            "22": 909975808.0,
+            "23": 919936064.0,
+            "24": 895588736.0,
+            "25": 884068160.0,
+            "26": 869339392.0,
+            "27": 857232640.0,
+            "28": 846888320.0,
+            "29": 821245440.0,
+            "30": 794514496.0,
+            "31": 756025600.0,
+            "32": 762315264.0,
+            "33": 759280512.0,
+            "34": 759373696.0,
+            "35": 712491648.0,
+            "36": 677834240.0,
+            "37": 632307392.0,
+            "38": 614655616.0,
+            "39": 607761664.0,
+            "40": 588410624.0,
+            "41": 582593792.0,
+            "42": 573377664.0,
+            "43": 579927552.0,
+            "44": 579405952.0,
+            "45": 521081920.0,
+            "46": 488627232.0,
+            "47": 478708544.0,
+            "48": 475807040.0,
+            "49": 450025824.0,
+            "50": 432013312.0
         }
     },
     "mem-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 55055331328.0,
-            "2": 57809321984.0,
-            "3": 57918455808.0,
-            "4": 57918455808.0,
-            "5": 57918455808.0,
-            "6": 57918455808.0,
-            "7": 57918455808.0,
-            "8": 57918455808.0,
-            "9": 57918455808.0,
-            "10": 57918455808.0,
-            "11": 57918455808.0,
-            "12": 57918455808.0,
-            "13": 57931390976.0,
-            "14": 57931390976.0,
-            "15": 57931390976.0,
-            "16": 57931390976.0,
-            "17": 57931390976.0,
-            "18": 57931390976.0,
-            "19": 57931390976.0,
-            "20": 57931390976.0,
-            "21": 57931390976.0,
-            "22": 57931390976.0,
-            "23": 57931390976.0,
-            "24": 57931390976.0,
-            "25": 57931390976.0,
-            "26": 57931390976.0,
-            "27": 57931390976.0,
-            "28": 57931390976.0,
-            "29": 57931390976.0,
-            "30": 57931390976.0,
-            "31": 57931390976.0,
-            "32": 58003226624.0,
-            "33": 58003226624.0,
-            "34": 58003226624.0,
-            "35": 58003226624.0,
-            "36": 58003226624.0,
-            "37": 58003226624.0,
-            "38": 58003226624.0,
-            "39": 58003226624.0,
-            "40": 58003226624.0,
-            "41": 58003226624.0,
-            "42": 58003226624.0,
-            "43": 58003226624.0,
-            "44": 58183614464.0,
-            "45": 58234208256.0,
-            "46": 58555555840.0,
-            "47": 58555555840.0,
-            "48": 58555555840.0,
-            "49": 58555555840.0,
-            "50": 58780934144.0
+            "1": 22860046336.0,
+            "2": 25612713984.0,
+            "3": 25729300480.0,
+            "4": 25729300480.0,
+            "5": 25729300480.0,
+            "6": 25729300480.0,
+            "7": 25729300480.0,
+            "8": 25729300480.0,
+            "9": 25729300480.0,
+            "10": 25729300480.0,
+            "11": 25729300480.0,
+            "12": 25729300480.0,
+            "13": 25888860160.0,
+            "14": 25888860160.0,
+            "15": 25888860160.0,
+            "16": 25888860160.0,
+            "17": 25888860160.0,
+            "18": 25888860160.0,
+            "19": 25888860160.0,
+            "20": 25888860160.0,
+            "21": 25888860160.0,
+            "22": 25888860160.0,
+            "23": 25888860160.0,
+            "24": 25888860160.0,
+            "25": 25888860160.0,
+            "26": 25888860160.0,
+            "27": 25888860160.0,
+            "28": 25888860160.0,
+            "29": 25888860160.0,
+            "30": 25888860160.0,
+            "31": 25888860160.0,
+            "32": 25888860160.0,
+            "33": 25888860160.0,
+            "34": 25888860160.0,
+            "35": 25888860160.0,
+            "36": 25888860160.0,
+            "37": 25888860160.0,
+            "38": 26026612736.0,
+            "39": 26610898944.0,
+            "40": 26620856320.0,
+            "41": 26620856320.0,
+            "42": 26620856320.0,
+            "43": 26620856320.0,
+            "44": 26620856320.0,
+            "45": 26620856320.0,
+            "46": 26620856320.0,
+            "47": 26620856320.0,
+            "48": 26620856320.0,
+            "49": 26620856320.0,
+            "50": 26620856320.0
         }
     },
     "mtp_1 loss": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 11.07654,
-            "2": 11.07406,
-            "3": 10.53881,
-            "4": 10.09803,
-            "5": 9.81154,
-            "6": 10.06236,
-            "7": 9.79762,
-            "8": 9.07117,
-            "9": 8.87049,
-            "10": 9.127,
-            "11": 8.49853,
-            "12": 8.53046,
-            "13": 8.42444,
-            "14": 7.847,
-            "15": 7.99077,
-            "16": 8.05015,
-            "17": 8.00064,
-            "18": 7.73104,
-            "19": 8.11087,
-            "20": 7.82933,
-            "21": 7.52501,
-            "22": 7.49916,
-            "23": 7.36982,
-            "24": 7.37235,
-            "25": 7.61578,
-            "26": 7.02029,
-            "27": 7.56014,
-            "28": 7.2681,
-            "29": 7.44399,
-            "30": 7.58618,
-            "31": 7.32468,
-            "32": 7.50596,
-            "33": 7.5715,
-            "34": 7.63581,
-            "35": 7.15224,
-            "36": 7.01784,
-            "37": 7.35163,
-            "38": 7.12551,
-            "39": 7.48656,
-            "40": 7.47408,
-            "41": 7.42096,
-            "42": 7.17595,
-            "43": 7.16059,
-            "44": 7.34289,
-            "45": 7.11969,
-            "46": 6.82753,
-            "47": 7.23525,
-            "48": 7.08042,
-            "49": 7.51043,
-            "50": 6.9735
+            "1": 11.07644,
+            "2": 11.07413,
+            "3": 10.53865,
+            "4": 10.09826,
+            "5": 9.81173,
+            "6": 10.07241,
+            "7": 9.79857,
+            "8": 9.07114,
+            "9": 8.86995,
+            "10": 9.12712,
+            "11": 8.49873,
+            "12": 8.53173,
+            "13": 8.426,
+            "14": 7.84827,
+            "15": 7.99147,
+            "16": 8.05097,
+            "17": 8.00164,
+            "18": 7.73164,
+            "19": 8.11121,
+            "20": 7.82967,
+            "21": 7.52376,
+            "22": 7.49787,
+            "23": 7.3697,
+            "24": 7.37154,
+            "25": 7.61319,
+            "26": 7.02025,
+            "27": 7.559,
+            "28": 7.26735,
+            "29": 7.44367,
+            "30": 7.58479,
+            "31": 7.32416,
+            "32": 7.50469,
+            "33": 7.56964,
+            "34": 7.63474,
+            "35": 7.15178,
+            "36": 7.01748,
+            "37": 7.34976,
+            "38": 7.12419,
+            "39": 7.4868,
+            "40": 7.47349,
+            "41": 7.42217,
+            "42": 7.17743,
+            "43": 7.16238,
+            "44": 7.34394,
+            "45": 7.12034,
+            "46": 6.82708,
+            "47": 7.235,
+            "48": 7.07985,
+            "49": 7.51123,
+            "50": 6.97212
         }
     },
     "iteration-time": {
@@ -289,56 +289,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 69.29797,
-            "2": 1.7261,
-            "3": 1.40981,
-            "4": 2.16562,
-            "5": 1.7862,
-            "6": 1.7469,
-            "7": 1.96688,
-            "8": 1.97301,
-            "9": 1.74665,
-            "10": 1.69613,
-            "11": 1.02979,
-            "12": 1.02408,
-            "13": 1.03261,
-            "14": 1.02432,
-            "15": 1.0529,
-            "16": 1.04491,
-            "17": 1.03693,
-            "18": 1.03399,
-            "19": 1.03627,
-            "20": 1.02284,
-            "21": 1.01667,
-            "22": 1.02932,
-            "23": 1.03591,
-            "24": 1.03466,
-            "25": 1.03149,
-            "26": 1.03165,
-            "27": 1.02342,
-            "28": 1.03777,
-            "29": 1.04061,
-            "30": 1.05641,
-            "31": 1.02382,
-            "32": 1.01775,
-            "33": 1.03039,
-            "34": 1.03693,
-            "35": 1.03153,
-            "36": 1.02699,
-            "37": 1.02756,
-            "38": 1.02919,
-            "39": 1.01773,
-            "40": 1.03491,
-            "41": 1.03152,
-            "42": 1.03035,
-            "43": 1.0221,
-            "44": 1.05201,
-            "45": 1.02579,
-            "46": 1.02798,
-            "47": 1.03857,
-            "48": 1.02772,
-            "49": 1.0408,
-            "50": 1.03745
+            "1": 63.23561,
+            "2": 1.12406,
+            "3": 0.92471,
+            "4": 1.95991,
+            "5": 1.98896,
+            "6": 1.40765,
+            "7": 1.83926,
+            "8": 1.3919,
+            "9": 1.58886,
+            "10": 0.76479,
+            "11": 0.74358,
+            "12": 0.74438,
+            "13": 0.75457,
+            "14": 0.74884,
+            "15": 0.7437,
+            "16": 0.81872,
+            "17": 0.74739,
+            "18": 0.75196,
+            "19": 0.76647,
+            "20": 0.74522,
+            "21": 0.73871,
+            "22": 0.73978,
+            "23": 0.73654,
+            "24": 0.73919,
+            "25": 0.73709,
+            "26": 0.78913,
+            "27": 0.75434,
+            "28": 0.7477,
+            "29": 0.73673,
+            "30": 0.74952,
+            "31": 0.75513,
+            "32": 0.74212,
+            "33": 0.74433,
+            "34": 0.74812,
+            "35": 0.7512,
+            "36": 0.74822,
+            "37": 0.74176,
+            "38": 0.7553,
+            "39": 0.77677,
+            "40": 0.76693,
+            "41": 0.76205,
+            "42": 0.76182,
+            "43": 0.76665,
+            "44": 0.76169,
+            "45": 0.74735,
+            "46": 0.74195,
+            "47": 0.75025,
+            "48": 0.74129,
+            "49": 0.74367,
+            "50": 0.74308
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/model_config.yaml
index 5390afcd09b..0cce9b4edb6 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/model_config.yaml
@@ -17,8 +17,7 @@ MODEL_ARGS:
   --use-distributed-optimizer: true
   --overlap-grad-reduce: true
   --overlap-param-gather: true
-  # Use unfused attention since MLA with fused attention and deterministic mode leads to NaN
-  --attention-backend: unfused # TODO: switch back to fused attention after fix
+  --attention-backend: fused
   # Training args
   --use-mcore-models: true
   --sequence-parallel: true
@@ -123,7 +122,7 @@ MODEL_ARGS:
   # Add mixed precision args
   --bf16: true
   --exit-interval: 50
-TEST_TYPE: regular
+TEST_TYPE: ckpt-resume
 METRICS:
   - "iteration-time"
   - "lm loss"
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/model_config.yaml.tmp b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/model_config.yaml.tmp
new file mode 100644
index 00000000000..e36d590170d
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/model_config.yaml.tmp
@@ -0,0 +1,132 @@
+ENV_VARS:
+  CUDA_DEVICE_MAX_CONNECTIONS: 1
+  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
+  PYTORCH_CUDA_ALLOC_CONF: expandable_segments:True
+  NCCL_NVLS_ENABLE: 0
+  PYTHONWARNINGS: ignore
+  NCCL_DEBUG: VERSION
+MODEL_ARGS:
+  # Distributed args
+  --distributed-timeout-minutes: 60
+  --tensor-model-parallel-size: 2
+  --pipeline-model-parallel-size: 2
+  --num-virtual-stages-per-pipeline-rank: 4
+  --expert-model-parallel-size: 4
+  --context-parallel-size: 1
+  --expert-tensor-parallel-size: 1
+  --use-distributed-optimizer: true
+  --overlap-grad-reduce: true
+  --overlap-param-gather: true
+  --attention-backend: fused
+  # Training args
+  --use-mcore-models: true
+  --sequence-parallel: true
+  --disable-bias-linear: true
+  --micro-batch-size: 4
+  --global-batch-size: 32
+  --train-iters: 50
+  --exit-duration-in-mins: 230
+  --no-check-for-nan-in-loss-and-grad: true
+  --no-rope-fusion: true
+  --cross-entropy-loss-fusion: true
+  --cross-entropy-fusion-impl: native
+  --manual-gc: true
+  --manual-gc-interval: 100
+  # Transformer Engine args
+  --transformer-impl: transformer_engine
+  # Data args
+  --seq-length: 4096
+  --data-cache-path: /workspace/data/cache
+  --data-path: /workspace/data/gpt3_data/my-gpt3_00_text_document
+  --vocab-file: /workspace/data/gpt3_data/bpe/vocab.json
+  --merge-file: /workspace/data/gpt3_data/bpe/merges.txt
+  --split: 949,50,1
+  # Add network size args
+  --num-layers: 16
+  --hidden-size: 1024
+  --ffn-hidden-size: 4096
+  --num-attention-heads: 32
+  --kv-channels: 128
+  --max-position-embeddings: 4096
+  --position-embedding-type: rope
+  --rotary-base: 10000
+  --make-vocab-size-divisible-by: 3232
+  --normalization: RMSNorm
+  --norm-epsilon: 1e-6
+  --swiglu: true
+  --untie-embeddings-and-output-weights: true
+  --multi-latent-attention: true
+  # Comment out the following MTP args to disable MTP
+  --mtp-num-layers: 1
+  --mtp-loss-scaling-factor: 0.1
+  # Add regularization args
+  --attention-dropout: 0.0
+  --hidden-dropout: 0.0
+  --clip-grad: 1.0
+  --weight-decay: 0.1
+  --qk-layernorm: true
+  # Add learning rate args
+  --lr-warmup-fraction: .01
+  --lr: 0.00015
+  --min-lr: 1.0e-5
+  --lr-decay-style: cosine
+  --adam-beta1: 0.9
+  --adam-beta2: 0.95
+  # Add MoE args
+  --num-experts: 32
+  --moe-layer-freq: ([0]*1+[1]*15)
+  --moe-ffn-hidden-size: 1024
+  --moe-shared-expert-intermediate-size: 1024
+  --moe-router-load-balancing-type: seq_aux_loss
+  --moe-router-topk: 4
+  --moe-token-dispatcher-type: alltoall
+  --moe-router-pre-softmax: true
+  --moe-grouped-gemm: true
+  --moe-aux-loss-coeff: 1e-4
+  --moe-router-group-topk: 2
+  --moe-router-num-groups: 4
+  --moe-router-topk-scaling-factor: 2.0
+  --moe-router-score-function: sigmoid
+  --moe-router-enable-expert-bias: true
+  --moe-router-bias-update-rate: 1e-3
+  --moe-router-dtype: fp32
+  --moe-permute-fusion: true
+  # Add MLA args
+  --q-lora-rank: 1536
+  --kv-lora-rank: 512
+  --qk-head-dim: 128
+  --qk-pos-emb-head-dim: 64
+  --v-head-dim: 128
+  --rotary-scaling-factor: 40
+  --mscale: 1.0
+  --mscale-all-dim: 1.0
+  # Add validation args
+  --eval-iters: 32
+  --eval-interval: 200
+  # Add checkpointing args
+  --save: /opt/megatron-lm/runs/82c8dc72-e955-4033-a246-b61784f57fa7/checkpoints
+  --load: /tmp/checkpoints/
+  --save-interval: 25
+  # Add initialization args
+  --init-method-std: 0.02
+  # Add logging args
+  --log-timers-to-tensorboard: true
+  --log-memory-to-tensorboard: true
+  --log-num-zeros-in-grad: true
+  --log-params-norm: true
+  --log-validation-ppl-to-tensorboard: true
+  --log-throughput: true
+  --log-interval: 1
+  --logging-level: 40
+  --tensorboard-dir: /opt/megatron-lm/runs/82c8dc72-e955-4033-a246-b61784f57fa7/tensorboard
+  # Add mixed precision args
+  --bf16: true
+  --exit-interval: 50
+TEST_TYPE: regular
+METRICS:
+  - "iteration-time"
+  - "lm loss"
+  - "num-zeros"
+  - "mem-allocated-bytes"
+  - "mem-max-allocated-bytes"
+  - "mtp_1 loss"
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G/model_config.yaml
index 19a8b4fc639..4e553f2f9ed 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G/model_config.yaml
@@ -33,7 +33,7 @@ MODEL_ARGS:
   --clip-grad: 1.0
   --lr-warmup-fraction: .01
   --log-interval: 1
-  --save-interval: 10000
+  --save-interval: 25
   --eval-interval: 1000
   --eval-iters: 10
   --transformer-impl: transformer_engine
@@ -61,4 +61,4 @@ MODEL_ARGS:
   --attention-backend: unfused
   --no-bias-gelu-fusion: true
   --log-memory-to-tensorboard: true
-TEST_TYPE: regular
+TEST_TYPE: ckpt-resume
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4_dgx_a100_1N8G/model_config.yaml
index f27db4a8021..7ba366f1d1b 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4_dgx_a100_1N8G/model_config.yaml
@@ -34,7 +34,7 @@ MODEL_ARGS:
   --clip-grad: 1.0
   --lr-warmup-fraction: .01
   --log-interval: 1
-  --save-interval: 10000
+  --save-interval: 25
   --eval-interval: 1000
   --eval-iters: 10
   --transformer-impl: transformer_engine
@@ -63,4 +63,4 @@ MODEL_ARGS:
   --no-bias-gelu-fusion: true
   --log-memory-to-tensorboard: true
   --exit-interval: 50
-TEST_TYPE: regular
+TEST_TYPE: ckpt-resume
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..cdd69820131
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,344 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 11.04747,
+            "2": 11.03489,
+            "3": 9.59197,
+            "4": 9.2607,
+            "5": 9.25316,
+            "6": 9.70587,
+            "7": 9.46635,
+            "8": 9.01114,
+            "9": 8.72173,
+            "10": 9.06704,
+            "11": 8.59397,
+            "12": 8.5643,
+            "13": 8.44846,
+            "14": 7.97921,
+            "15": 8.04905,
+            "16": 8.09886,
+            "17": 8.04172,
+            "18": 7.76126,
+            "19": 8.14014,
+            "20": 7.86027,
+            "21": 7.54995,
+            "22": 7.53872,
+            "23": 7.40693,
+            "24": 7.40435,
+            "25": 7.66065,
+            "26": 7.05772,
+            "27": 7.59552,
+            "28": 7.30627,
+            "29": 7.48007,
+            "30": 7.63012,
+            "31": 7.38325,
+            "32": 7.57843,
+            "33": 7.62828,
+            "34": 7.68919,
+            "35": 7.20168,
+            "36": 7.07506,
+            "37": 7.41935,
+            "38": 7.17961,
+            "39": 7.54005,
+            "40": 7.53821,
+            "41": 7.47888,
+            "42": 7.24055,
+            "43": 7.2256,
+            "44": 7.40803,
+            "45": 7.1775,
+            "46": 6.88877,
+            "47": 7.29436,
+            "48": 7.13581,
+            "49": 7.58407,
+            "50": 7.02865
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 38802648.0,
+            "2": 38543564.0,
+            "3": 38740428.0,
+            "4": 264349216.0,
+            "5": 224711328.0,
+            "6": 359592256.0,
+            "7": 683584064.0,
+            "8": 850747136.0,
+            "9": 781151872.0,
+            "10": 863934336.0,
+            "11": 784956928.0,
+            "12": 787741824.0,
+            "13": 906642432.0,
+            "14": 793413952.0,
+            "15": 724351360.0,
+            "16": 929182656.0,
+            "17": 728944832.0,
+            "18": 715233856.0,
+            "19": 894586752.0,
+            "20": 942182208.0,
+            "21": 712310464.0,
+            "22": 903670336.0,
+            "23": 882199552.0,
+            "24": 867334400.0,
+            "25": 874751488.0,
+            "26": 844191104.0,
+            "27": 813243648.0,
+            "28": 626785920.0,
+            "29": 808773120.0,
+            "30": 602759296.0,
+            "31": 793783168.0,
+            "32": 768613888.0,
+            "33": 721639040.0,
+            "34": 734472448.0,
+            "35": 734570880.0,
+            "36": 703058560.0,
+            "37": 692109824.0,
+            "38": 649260992.0,
+            "39": 620422656.0,
+            "40": 604143616.0,
+            "41": 598320448.0,
+            "42": 573424384.0,
+            "43": 576846912.0,
+            "44": 570038144.0,
+            "45": 540081024.0,
+            "46": 501251008.0,
+            "47": 497637664.0,
+            "48": 494691072.0,
+            "49": 490977312.0,
+            "50": 463542304.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 7321331200.0,
+            "2": 7321333248.0,
+            "3": 7321333248.0,
+            "4": 7321333248.0,
+            "5": 7321333248.0,
+            "6": 7321333248.0,
+            "7": 7321333248.0,
+            "8": 7321333248.0,
+            "9": 7321333248.0,
+            "10": 7321333248.0,
+            "11": 7321333248.0,
+            "12": 7321333248.0,
+            "13": 7321333248.0,
+            "14": 7321333248.0,
+            "15": 7321333248.0,
+            "16": 7321333248.0,
+            "17": 7321333248.0,
+            "18": 7321333248.0,
+            "19": 7321333248.0,
+            "20": 7321333248.0,
+            "21": 7321333248.0,
+            "22": 7321333248.0,
+            "23": 7321333248.0,
+            "24": 7321333248.0,
+            "25": 7321333248.0,
+            "26": 7321333248.0,
+            "27": 7321333248.0,
+            "28": 7321333248.0,
+            "29": 7321333248.0,
+            "30": 7321333248.0,
+            "31": 7321333248.0,
+            "32": 7321333248.0,
+            "33": 7321333248.0,
+            "34": 7321333248.0,
+            "35": 7321333248.0,
+            "36": 7321333248.0,
+            "37": 7321333248.0,
+            "38": 7321333248.0,
+            "39": 7321333248.0,
+            "40": 7321333248.0,
+            "41": 7321333248.0,
+            "42": 7321333248.0,
+            "43": 7321333248.0,
+            "44": 7321333248.0,
+            "45": 7321333248.0,
+            "46": 7321333248.0,
+            "47": 7321333248.0,
+            "48": 7321333248.0,
+            "49": 7321333248.0,
+            "50": 7321333248.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 22198937600.0,
+            "2": 24950007808.0,
+            "3": 24950007808.0,
+            "4": 24950007808.0,
+            "5": 24950007808.0,
+            "6": 24950007808.0,
+            "7": 24950007808.0,
+            "8": 24950007808.0,
+            "9": 24950007808.0,
+            "10": 24950007808.0,
+            "11": 24950007808.0,
+            "12": 24950007808.0,
+            "13": 24950007808.0,
+            "14": 24950007808.0,
+            "15": 24950007808.0,
+            "16": 24950007808.0,
+            "17": 24950007808.0,
+            "18": 24950007808.0,
+            "19": 24950007808.0,
+            "20": 24950007808.0,
+            "21": 24950007808.0,
+            "22": 24950007808.0,
+            "23": 24950007808.0,
+            "24": 24950007808.0,
+            "25": 24950007808.0,
+            "26": 24950007808.0,
+            "27": 25072799744.0,
+            "28": 25343600640.0,
+            "29": 25625788416.0,
+            "30": 25625788416.0,
+            "31": 25628155904.0,
+            "32": 25707937792.0,
+            "33": 25707937792.0,
+            "34": 25707937792.0,
+            "35": 25707937792.0,
+            "36": 25707937792.0,
+            "37": 25707937792.0,
+            "38": 25707937792.0,
+            "39": 25707937792.0,
+            "40": 25707937792.0,
+            "41": 25707937792.0,
+            "42": 25707937792.0,
+            "43": 25707937792.0,
+            "44": 25707937792.0,
+            "45": 25707937792.0,
+            "46": 25707937792.0,
+            "47": 25707937792.0,
+            "48": 25707937792.0,
+            "49": 25707937792.0,
+            "50": 25707937792.0
+        }
+    },
+    "mtp_1 loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 11.07742,
+            "2": 11.07559,
+            "3": 10.5272,
+            "4": 10.08877,
+            "5": 9.81119,
+            "6": 9.88673,
+            "7": 9.70278,
+            "8": 8.9944,
+            "9": 8.79002,
+            "10": 9.07171,
+            "11": 8.44594,
+            "12": 8.50226,
+            "13": 8.40983,
+            "14": 7.83955,
+            "15": 7.97902,
+            "16": 8.03361,
+            "17": 7.99642,
+            "18": 7.71928,
+            "19": 8.10116,
+            "20": 7.82113,
+            "21": 7.51112,
+            "22": 7.48906,
+            "23": 7.35335,
+            "24": 7.35884,
+            "25": 7.60836,
+            "26": 7.01391,
+            "27": 7.54721,
+            "28": 7.25644,
+            "29": 7.43129,
+            "30": 7.57524,
+            "31": 7.321,
+            "32": 7.50218,
+            "33": 7.56009,
+            "34": 7.62505,
+            "35": 7.14234,
+            "36": 7.0092,
+            "37": 7.34655,
+            "38": 7.11926,
+            "39": 7.4822,
+            "40": 7.46808,
+            "41": 7.41272,
+            "42": 7.1698,
+            "43": 7.15213,
+            "44": 7.33728,
+            "45": 7.11437,
+            "46": 6.81846,
+            "47": 7.2282,
+            "48": 7.07339,
+            "49": 7.50345,
+            "50": 6.96783
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 71.2429,
+            "2": 1.39205,
+            "3": 1.3521,
+            "4": 1.31895,
+            "5": 0.86745,
+            "6": 0.86249,
+            "7": 1.0949,
+            "8": 1.03022,
+            "9": 0.80778,
+            "10": 0.82011,
+            "11": 0.81426,
+            "12": 0.8098,
+            "13": 0.81209,
+            "14": 0.81361,
+            "15": 0.80969,
+            "16": 0.81315,
+            "17": 0.85127,
+            "18": 0.80813,
+            "19": 0.81928,
+            "20": 0.81012,
+            "21": 0.8101,
+            "22": 0.81064,
+            "23": 0.80537,
+            "24": 0.81149,
+            "25": 0.81261,
+            "26": 0.81877,
+            "27": 0.80314,
+            "28": 0.80383,
+            "29": 0.83563,
+            "30": 0.80254,
+            "31": 0.80006,
+            "32": 0.80658,
+            "33": 0.81426,
+            "34": 0.81824,
+            "35": 0.81124,
+            "36": 0.80978,
+            "37": 0.80679,
+            "38": 0.80838,
+            "39": 0.81028,
+            "40": 0.81044,
+            "41": 0.81268,
+            "42": 0.81318,
+            "43": 0.79311,
+            "44": 0.80471,
+            "45": 0.80526,
+            "46": 0.79795,
+            "47": 0.80592,
+            "48": 0.80158,
+            "49": 0.80635,
+            "50": 0.79969
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/model_config.yaml
index 7ebd9f0d1af..c920037f0f2 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/model_config.yaml
@@ -17,8 +17,7 @@ MODEL_ARGS:
   --use-distributed-optimizer: true
   --overlap-grad-reduce: true
   --overlap-param-gather: true
-  # Use unfused attention since MLA with fused attention and deterministic mode leads to NaN
-  --attention-backend: unfused # TODO: switch back to fused attention after fix
+  --attention-backend: fused
   # Training args
   --use-mcore-models: true
   --sequence-parallel: true
@@ -126,7 +125,7 @@ MODEL_ARGS:
   --fp8-format: hybrid
   --fp8-recipe: tensorwise
   --exit-interval: 50
-TEST_TYPE: regular # Usually ckpt-resume, but as a WAR to #513 set to regular
+TEST_TYPE: ckpt-resume # Usually ckpt-resume, but as a WAR to #513 set to regular
 METRICS:
   - "iteration-time"
   - "lm loss"
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgxh100_coreweave.json
index 58eb3fc16cd..7c3cd772f4f 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgxh100_coreweave.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgxh100_coreweave.json
@@ -4,56 +4,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 10.95004,
-            "2": 10.9521,
-            "3": 10.5115,
-            "4": 9.96454,
-            "5": 9.93941,
+            "1": 10.94947,
+            "2": 10.95236,
+            "3": 10.50817,
+            "4": 9.96373,
+            "5": 9.93907,
             "6": 9.67273,
-            "7": 10.20975,
-            "8": 9.49716,
-            "9": 9.55902,
-            "10": 9.79742,
-            "11": 9.30109,
-            "12": 9.40483,
-            "13": 9.39546,
-            "14": 8.84681,
-            "15": 9.02444,
-            "16": 9.07121,
-            "17": 9.04574,
-            "18": 8.75678,
-            "19": 9.18159,
-            "20": 8.8595,
-            "21": 8.53503,
-            "22": 8.55182,
-            "23": 8.42441,
-            "24": 8.37608,
-            "25": 8.64304,
-            "26": 7.97393,
-            "27": 8.56806,
-            "28": 8.19764,
-            "29": 8.3928,
-            "30": 8.67283,
-            "31": 8.289,
-            "32": 8.43572,
-            "33": 8.5568,
-            "34": 8.66018,
-            "35": 8.07934,
-            "36": 7.94976,
-            "37": 8.29565,
-            "38": 7.98044,
-            "39": 8.39201,
-            "40": 8.35513,
-            "41": 8.31876,
-            "42": 8.0583,
-            "43": 8.03283,
-            "44": 8.24243,
-            "45": 8.10277,
-            "46": 7.61696,
-            "47": 8.15273,
-            "48": 8.00569,
-            "49": 8.38688,
-            "50": 7.81491
+            "7": 10.2137,
+            "8": 9.4963,
+            "9": 9.56483,
+            "10": 9.7979,
+            "11": 9.30107,
+            "12": 9.40465,
+            "13": 9.39581,
+            "14": 8.84796,
+            "15": 9.02503,
+            "16": 9.07162,
+            "17": 9.04638,
+            "18": 8.75696,
+            "19": 9.18152,
+            "20": 8.86295,
+            "21": 8.5361,
+            "22": 8.55339,
+            "23": 8.42711,
+            "24": 8.37747,
+            "25": 8.64415,
+            "26": 7.97441,
+            "27": 8.56675,
+            "28": 8.19618,
+            "29": 8.39325,
+            "30": 8.67137,
+            "31": 8.28979,
+            "32": 8.43623,
+            "33": 8.55717,
+            "34": 8.6598,
+            "35": 8.07929,
+            "36": 7.94958,
+            "37": 8.29465,
+            "38": 7.9784,
+            "39": 8.39172,
+            "40": 8.35622,
+            "41": 8.31635,
+            "42": 8.06507,
+            "43": 8.03396,
+            "44": 8.24146,
+            "45": 8.1039,
+            "46": 7.61771,
+            "47": 8.15375,
+            "48": 8.00818,
+            "49": 8.38737,
+            "50": 7.81612
         }
     },
     "num-zeros": {
@@ -61,56 +61,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 19403624.0,
-            "2": 19274194.0,
-            "3": 19372760.0,
-            "4": 86525248.0,
-            "5": 148575568.0,
-            "6": 145226704.0,
-            "7": 171879984.0,
-            "8": 195785248.0,
-            "9": 164124752.0,
-            "10": 167684736.0,
-            "11": 221077344.0,
-            "12": 200384224.0,
-            "13": 248872528.0,
-            "14": 211169424.0,
-            "15": 214304608.0,
-            "16": 216075632.0,
-            "17": 267845984.0,
-            "18": 170470336.0,
-            "19": 176865072.0,
-            "20": 187955392.0,
-            "21": 225750704.0,
-            "22": 247396816.0,
-            "23": 211643856.0,
-            "24": 205638464.0,
-            "25": 277022272.0,
-            "26": 291562304.0,
-            "27": 225789840.0,
-            "28": 288202368.0,
-            "29": 198390384.0,
-            "30": 213302208.0,
-            "31": 227204752.0,
-            "32": 271112416.0,
-            "33": 231840432.0,
-            "34": 203575536.0,
-            "35": 191152368.0,
-            "36": 222566928.0,
-            "37": 177810112.0,
-            "38": 228708544.0,
-            "39": 211168784.0,
-            "40": 215603968.0,
-            "41": 200089440.0,
-            "42": 228529888.0,
-            "43": 198782848.0,
-            "44": 141902272.0,
-            "45": 181922816.0,
-            "46": 115369856.0,
-            "47": 170214176.0,
-            "48": 137292832.0,
-            "49": 97654936.0,
-            "50": 160979632.0
+            "1": 19403784.0,
+            "2": 19274252.0,
+            "3": 19373794.0,
+            "4": 89687600.0,
+            "5": 139124400.0,
+            "6": 138949920.0,
+            "7": 170316512.0,
+            "8": 192665728.0,
+            "9": 168817872.0,
+            "10": 156652864.0,
+            "11": 217935232.0,
+            "12": 213007792.0,
+            "13": 228424704.0,
+            "14": 217442256.0,
+            "15": 237921408.0,
+            "16": 225523072.0,
+            "17": 225458384.0,
+            "18": 164166928.0,
+            "19": 164457904.0,
+            "20": 180124848.0,
+            "21": 230463232.0,
+            "22": 230096384.0,
+            "23": 210054656.0,
+            "24": 200985472.0,
+            "25": 248708512.0,
+            "26": 301000896.0,
+            "27": 205364384.0,
+            "28": 270886048.0,
+            "29": 259695952.0,
+            "30": 224280720.0,
+            "31": 244360992.0,
+            "32": 189382672.0,
+            "33": 231930816.0,
+            "34": 206712432.0,
+            "35": 194319616.0,
+            "36": 246163408.0,
+            "37": 193561968.0,
+            "38": 228822688.0,
+            "39": 226941728.0,
+            "40": 196742032.0,
+            "41": 200179904.0,
+            "42": 219112640.0,
+            "43": 186235920.0,
+            "44": 138763920.0,
+            "45": 148907984.0,
+            "46": 109115896.0,
+            "47": 167015728.0,
+            "48": 156135104.0,
+            "49": 91378480.0,
+            "50": 164099648.0
         }
     },
     "mem-allocated-bytes": {
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 4883602432.0,
-            "2": 4885017088.0,
-            "3": 4882657792.0,
-            "4": 4883046912.0,
-            "5": 4883725824.0,
-            "6": 4883713536.0,
-            "7": 4883040768.0,
-            "8": 4883273216.0,
-            "9": 4882952704.0,
-            "10": 4885949952.0,
-            "11": 4883990016.0,
-            "12": 4887679488.0,
-            "13": 4884011520.0,
-            "14": 4882899456.0,
-            "15": 4883515904.0,
-            "16": 4883990016.0,
-            "17": 4883410432.0,
-            "18": 4883673600.0,
-            "19": 4882903552.0,
-            "20": 4884541952.0,
-            "21": 4883138048.0,
-            "22": 4883247616.0,
-            "23": 4883839488.0,
-            "24": 4885058048.0,
-            "25": 4882676224.0,
-            "26": 4884058624.0,
-            "27": 4884724224.0,
-            "28": 4884874752.0,
-            "29": 4883127808.0,
-            "30": 4883252736.0,
-            "31": 4882955776.0,
-            "32": 4885190144.0,
-            "33": 4883845632.0,
-            "34": 4884392448.0,
-            "35": 4883083776.0,
-            "36": 4883851776.0,
-            "37": 4885246464.0,
-            "38": 4882680320.0,
-            "39": 4884296192.0,
-            "40": 4884689408.0,
-            "41": 4882836992.0,
-            "42": 4883972608.0,
-            "43": 4884519424.0,
-            "44": 4883354112.0,
-            "45": 4883495424.0,
-            "46": 4882788864.0,
-            "47": 4883144192.0,
-            "48": 4883688960.0,
-            "49": 4884182528.0,
-            "50": 4885279232.0
+            "1": 4751680512.0,
+            "2": 4752032256.0,
+            "3": 4751058432.0,
+            "4": 4751692288.0,
+            "5": 4750785024.0,
+            "6": 4750721536.0,
+            "7": 4750738944.0,
+            "8": 4750471680.0,
+            "9": 4750078464.0,
+            "10": 4750671360.0,
+            "11": 4750662144.0,
+            "12": 4750013952.0,
+            "13": 4750343680.0,
+            "14": 4750866944.0,
+            "15": 4751114752.0,
+            "16": 4754016768.0,
+            "17": 4751645184.0,
+            "18": 4749773312.0,
+            "19": 4751623680.0,
+            "20": 4749661696.0,
+            "21": 4751997440.0,
+            "22": 4751115776.0,
+            "23": 4750557696.0,
+            "24": 4751779328.0,
+            "25": 4750678528.0,
+            "26": 4749646336.0,
+            "27": 4750984704.0,
+            "28": 4752366080.0,
+            "29": 4750876160.0,
+            "30": 4750423552.0,
+            "31": 4750733824.0,
+            "32": 4751212032.0,
+            "33": 4750073344.0,
+            "34": 4751521280.0,
+            "35": 4750867968.0,
+            "36": 4750440960.0,
+            "37": 4750258688.0,
+            "38": 4751287808.0,
+            "39": 4749742592.0,
+            "40": 4750831104.0,
+            "41": 4750516736.0,
+            "42": 4750870016.0,
+            "43": 4750633472.0,
+            "44": 4750676480.0,
+            "45": 4750337536.0,
+            "46": 4751146496.0,
+            "47": 4750629376.0,
+            "48": 4750627328.0,
+            "49": 4751527424.0,
+            "50": 4750583296.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 41210470400.0,
-            "2": 41210470400.0,
-            "3": 41210470400.0,
-            "4": 41210470400.0,
-            "5": 41210470400.0,
-            "6": 41210470400.0,
-            "7": 41210470400.0,
-            "8": 41210470400.0,
-            "9": 41210470400.0,
-            "10": 41210470400.0,
-            "11": 41210470400.0,
-            "12": 41210470400.0,
-            "13": 41210470400.0,
-            "14": 41210470400.0,
-            "15": 41210470400.0,
-            "16": 41210470400.0,
-            "17": 41210470400.0,
-            "18": 41210470400.0,
-            "19": 41210470400.0,
-            "20": 41210470400.0,
-            "21": 41210470400.0,
-            "22": 41210470400.0,
-            "23": 41210470400.0,
-            "24": 41210470400.0,
-            "25": 41210470400.0,
-            "26": 41210470400.0,
-            "27": 41210470400.0,
-            "28": 41210470400.0,
-            "29": 41210470400.0,
-            "30": 41210470400.0,
-            "31": 41210470400.0,
-            "32": 41210470400.0,
-            "33": 41210470400.0,
-            "34": 41210470400.0,
-            "35": 41210470400.0,
-            "36": 41210470400.0,
-            "37": 41210470400.0,
-            "38": 41210470400.0,
-            "39": 41210470400.0,
-            "40": 41210470400.0,
-            "41": 41210470400.0,
-            "42": 41210470400.0,
-            "43": 41210470400.0,
-            "44": 41210470400.0,
-            "45": 41210470400.0,
-            "46": 41210470400.0,
-            "47": 41210470400.0,
-            "48": 41210470400.0,
-            "49": 41210470400.0,
-            "50": 41210470400.0
+            "1": 11458484224.0,
+            "2": 12450223104.0,
+            "3": 12450223104.0,
+            "4": 12450223104.0,
+            "5": 12450223104.0,
+            "6": 12572350464.0,
+            "7": 12815280128.0,
+            "8": 12815280128.0,
+            "9": 13430808576.0,
+            "10": 13558942720.0,
+            "11": 13558942720.0,
+            "12": 13558942720.0,
+            "13": 13558942720.0,
+            "14": 13558942720.0,
+            "15": 13558942720.0,
+            "16": 13558942720.0,
+            "17": 13558942720.0,
+            "18": 13558942720.0,
+            "19": 13558942720.0,
+            "20": 13558942720.0,
+            "21": 13764741120.0,
+            "22": 13887232000.0,
+            "23": 13887232000.0,
+            "24": 13887232000.0,
+            "25": 13887232000.0,
+            "26": 13887232000.0,
+            "27": 13887232000.0,
+            "28": 13887232000.0,
+            "29": 13887232000.0,
+            "30": 13887232000.0,
+            "31": 13887232000.0,
+            "32": 13887232000.0,
+            "33": 13887232000.0,
+            "34": 13887232000.0,
+            "35": 13887232000.0,
+            "36": 13887232000.0,
+            "37": 13887232000.0,
+            "38": 13887232000.0,
+            "39": 13887232000.0,
+            "40": 13887232000.0,
+            "41": 13887232000.0,
+            "42": 13887232000.0,
+            "43": 13887232000.0,
+            "44": 13887232000.0,
+            "45": 13887232000.0,
+            "46": 13887232000.0,
+            "47": 13887232000.0,
+            "48": 13887232000.0,
+            "49": 13887232000.0,
+            "50": 13887232000.0
         }
     },
     "iteration-time": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 86.8085,
-            "2": 1.10913,
-            "3": 0.99097,
-            "4": 0.89412,
-            "5": 1.25997,
-            "6": 0.98162,
-            "7": 0.98318,
-            "8": 1.13296,
-            "9": 0.88126,
-            "10": 0.8633,
-            "11": 2.2744,
-            "12": 4.5393,
-            "13": 3.22763,
-            "14": 1.64923,
-            "15": 0.86595,
-            "16": 0.86575,
-            "17": 0.85272,
-            "18": 0.85454,
-            "19": 0.85281,
-            "20": 0.87018,
-            "21": 0.84654,
-            "22": 0.8494,
-            "23": 0.84882,
-            "24": 0.84482,
-            "25": 0.85311,
-            "26": 0.84678,
-            "27": 0.84096,
-            "28": 0.8412,
-            "29": 0.84156,
-            "30": 0.84475,
-            "31": 0.84747,
-            "32": 0.85058,
-            "33": 0.84977,
-            "34": 0.8479,
-            "35": 0.85234,
-            "36": 0.85012,
-            "37": 0.85087,
-            "38": 0.84594,
-            "39": 0.84558,
-            "40": 0.84807,
-            "41": 0.84183,
-            "42": 0.8439,
-            "43": 0.84221,
-            "44": 0.84248,
-            "45": 0.84257,
-            "46": 0.83922,
-            "47": 0.84311,
-            "48": 0.84159,
-            "49": 0.84011,
-            "50": 0.8353
+            "1": 83.38985,
+            "2": 0.80022,
+            "3": 0.71751,
+            "4": 0.65556,
+            "5": 0.98544,
+            "6": 0.76766,
+            "7": 0.73114,
+            "8": 0.76226,
+            "9": 0.62791,
+            "10": 0.62224,
+            "11": 0.69873,
+            "12": 0.62401,
+            "13": 0.62467,
+            "14": 0.62054,
+            "15": 0.6218,
+            "16": 0.61653,
+            "17": 0.6184,
+            "18": 0.63217,
+            "19": 0.61609,
+            "20": 0.62413,
+            "21": 0.60966,
+            "22": 0.60967,
+            "23": 0.60674,
+            "24": 0.60595,
+            "25": 0.60063,
+            "26": 0.60502,
+            "27": 0.60923,
+            "28": 0.60939,
+            "29": 0.61217,
+            "30": 0.60702,
+            "31": 0.61517,
+            "32": 0.60803,
+            "33": 0.60624,
+            "34": 0.6123,
+            "35": 0.61133,
+            "36": 0.60971,
+            "37": 0.61215,
+            "38": 0.61014,
+            "39": 0.62694,
+            "40": 0.60532,
+            "41": 0.60477,
+            "42": 0.60297,
+            "43": 0.60073,
+            "44": 0.59786,
+            "45": 0.60582,
+            "46": 0.60848,
+            "47": 0.60019,
+            "48": 0.60064,
+            "49": 0.60304,
+            "50": 0.58276
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/model_config.yaml
index 23842f00384..9fdcb460cf3 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/model_config.yaml
@@ -17,8 +17,7 @@ MODEL_ARGS:
   --use-distributed-optimizer: true
   --overlap-grad-reduce: true
   --overlap-param-gather: true
-  # Use unfused attention since MLA with fused attention and deterministic mode leads to NaN
-  --attention-backend: unfused # TODO: switch back to fused attention after fix
+  --attention-backend: fused
   # Training args
   --use-mcore-models: true
   --sequence-parallel: true
@@ -128,10 +127,10 @@ MODEL_ARGS:
   --fp8-format: hybrid
   --fp8-recipe: tensorwise
   --exit-interval: 50
-TEST_TYPE: regular # Usually ckpt-resume, but as a WAR to #513 set to regular
+TEST_TYPE: ckpt-resume # Usually ckpt-resume, but as a WAR to #513 set to regular
 METRICS:
   - "iteration-time"
   - "lm loss"
   - "num-zeros"
-  - "mem-allocated-bytes"
-  - "mem-max-allocated-bytes"
+  # - "mem-allocated-bytes" 
+  # - "mem-max-allocated-bytes" # Disable for now since resume training has more memory cost. To be investigated.
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_selective_recompute_experimental/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_selective_recompute_experimental/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..8c4f243d4c2
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_selective_recompute_experimental/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,344 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 11.0637,
+            "2": 11.03838,
+            "3": 9.79196,
+            "4": 14.17309,
+            "5": 9.48263,
+            "6": 9.30356,
+            "7": 9.27632,
+            "8": 8.75189,
+            "9": 8.70462,
+            "10": 9.04035,
+            "11": 8.41109,
+            "12": 8.53109,
+            "13": 8.43144,
+            "14": 7.93673,
+            "15": 8.00837,
+            "16": 8.08212,
+            "17": 8.06887,
+            "18": 7.75236,
+            "19": 8.13737,
+            "20": 7.88364,
+            "21": 7.56605,
+            "22": 7.55552,
+            "23": 7.42862,
+            "24": 7.41252,
+            "25": 7.67597,
+            "26": 7.08176,
+            "27": 7.62221,
+            "28": 7.32629,
+            "29": 7.49894,
+            "30": 7.63447,
+            "31": 7.3983,
+            "32": 7.59785,
+            "33": 7.64396,
+            "34": 7.70726,
+            "35": 7.21393,
+            "36": 7.08985,
+            "37": 7.42971,
+            "38": 7.19273,
+            "39": 7.56041,
+            "40": 7.55564,
+            "41": 7.49928,
+            "42": 7.25988,
+            "43": 7.24878,
+            "44": 7.42783,
+            "45": 7.21045,
+            "46": 6.91669,
+            "47": 7.31999,
+            "48": 7.16939,
+            "49": 7.62783,
+            "50": 7.05439
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 38802064.0,
+            "2": 38543200.0,
+            "3": 38744220.0,
+            "4": 166695072.0,
+            "5": 394456256.0,
+            "6": 441303136.0,
+            "7": 538731776.0,
+            "8": 680781184.0,
+            "9": 564001216.0,
+            "10": 571185472.0,
+            "11": 624455360.0,
+            "12": 680622208.0,
+            "13": 777548288.0,
+            "14": 717772992.0,
+            "15": 699100416.0,
+            "16": 677486208.0,
+            "17": 645761024.0,
+            "18": 671155776.0,
+            "19": 674320512.0,
+            "20": 891692160.0,
+            "21": 658833920.0,
+            "22": 802998016.0,
+            "23": 756352768.0,
+            "24": 772904192.0,
+            "25": 748799104.0,
+            "26": 771817792.0,
+            "27": 772312064.0,
+            "28": 655008000.0,
+            "29": 783495808.0,
+            "30": 794511296.0,
+            "31": 756035712.0,
+            "32": 535862592.0,
+            "33": 680633984.0,
+            "34": 482597312.0,
+            "35": 671593792.0,
+            "36": 658959488.0,
+            "37": 626012736.0,
+            "38": 614650240.0,
+            "39": 595183872.0,
+            "40": 421718816.0,
+            "41": 557433600.0,
+            "42": 545065344.0,
+            "43": 539024064.0,
+            "44": 544803840.0,
+            "45": 517934176.0,
+            "46": 504352736.0,
+            "47": 497582464.0,
+            "48": 500981632.0,
+            "49": 490922656.0,
+            "50": 472902496.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 6294696448.0,
+            "2": 6295491072.0,
+            "3": 6296283648.0,
+            "4": 6297076224.0,
+            "5": 6297868800.0,
+            "6": 6298661376.0,
+            "7": 6294104064.0,
+            "8": 6294896640.0,
+            "9": 6295689216.0,
+            "10": 6296481792.0,
+            "11": 6294500352.0,
+            "12": 6295292928.0,
+            "13": 6296085504.0,
+            "14": 6296878080.0,
+            "15": 6297670656.0,
+            "16": 6298463232.0,
+            "17": 6299255808.0,
+            "18": 6300048384.0,
+            "19": 6300840960.0,
+            "20": 6301633536.0,
+            "21": 6302426112.0,
+            "22": 6303218688.0,
+            "23": 6304011264.0,
+            "24": 6304803840.0,
+            "25": 6305596416.0,
+            "26": 6306388992.0,
+            "27": 6307181568.0,
+            "28": 6307974144.0,
+            "29": 6308766720.0,
+            "30": 6309559296.0,
+            "31": 6310351872.0,
+            "32": 6311144448.0,
+            "33": 6311937024.0,
+            "34": 6312729600.0,
+            "35": 6313522176.0,
+            "36": 6314314752.0,
+            "37": 6315107328.0,
+            "38": 6315899904.0,
+            "39": 6316692480.0,
+            "40": 6317485056.0,
+            "41": 6318277632.0,
+            "42": 6319070208.0,
+            "43": 6319862784.0,
+            "44": 6320655360.0,
+            "45": 6321447936.0,
+            "46": 6322240512.0,
+            "47": 6323033088.0,
+            "48": 6323825664.0,
+            "49": 6324618240.0,
+            "50": 6325410816.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 46771978240.0,
+            "2": 49466654720.0,
+            "3": 51157819392.0,
+            "4": 51157819392.0,
+            "5": 51157819392.0,
+            "6": 51157819392.0,
+            "7": 51157819392.0,
+            "8": 51157819392.0,
+            "9": 51157819392.0,
+            "10": 51157819392.0,
+            "11": 51157819392.0,
+            "12": 51157819392.0,
+            "13": 51157819392.0,
+            "14": 51157819392.0,
+            "15": 51157819392.0,
+            "16": 51157819392.0,
+            "17": 51157819392.0,
+            "18": 51157819392.0,
+            "19": 51157819392.0,
+            "20": 51157819392.0,
+            "21": 51157819392.0,
+            "22": 51157819392.0,
+            "23": 51157819392.0,
+            "24": 51157819392.0,
+            "25": 51157819392.0,
+            "26": 51157819392.0,
+            "27": 51157819392.0,
+            "28": 51157819392.0,
+            "29": 51157819392.0,
+            "30": 51157819392.0,
+            "31": 51157819392.0,
+            "32": 51157819392.0,
+            "33": 51157819392.0,
+            "34": 51157819392.0,
+            "35": 51157819392.0,
+            "36": 51157819392.0,
+            "37": 51157819392.0,
+            "38": 51157819392.0,
+            "39": 51157819392.0,
+            "40": 51157819392.0,
+            "41": 51157819392.0,
+            "42": 51157819392.0,
+            "43": 51157819392.0,
+            "44": 51157819392.0,
+            "45": 51157819392.0,
+            "46": 51157819392.0,
+            "47": 51157819392.0,
+            "48": 51157819392.0,
+            "49": 51157819392.0,
+            "50": 51157819392.0
+        }
+    },
+    "mtp_1 loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 11.04508,
+            "2": 11.05397,
+            "3": 10.54505,
+            "4": 9.99194,
+            "5": 9.76285,
+            "6": 9.45507,
+            "7": 9.54431,
+            "8": 8.91725,
+            "9": 8.74784,
+            "10": 9.04997,
+            "11": 8.40193,
+            "12": 8.48288,
+            "13": 8.36926,
+            "14": 7.81448,
+            "15": 7.93865,
+            "16": 8.02231,
+            "17": 7.96741,
+            "18": 7.70552,
+            "19": 8.09012,
+            "20": 7.79984,
+            "21": 7.48241,
+            "22": 7.49502,
+            "23": 7.35415,
+            "24": 7.34793,
+            "25": 7.60324,
+            "26": 7.01638,
+            "27": 7.55495,
+            "28": 7.24721,
+            "29": 7.43133,
+            "30": 7.56633,
+            "31": 7.31391,
+            "32": 7.50445,
+            "33": 7.55658,
+            "34": 7.62234,
+            "35": 7.13802,
+            "36": 7.00593,
+            "37": 7.33916,
+            "38": 7.1095,
+            "39": 7.4736,
+            "40": 7.45784,
+            "41": 7.40514,
+            "42": 7.15986,
+            "43": 7.14965,
+            "44": 7.32758,
+            "45": 7.11892,
+            "46": 6.81056,
+            "47": 7.2234,
+            "48": 7.06789,
+            "49": 7.503,
+            "50": 6.9559
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 71.51538,
+            "2": 1.72071,
+            "3": 1.31657,
+            "4": 1.18423,
+            "5": 3.82179,
+            "6": 2.3037,
+            "7": 3.15765,
+            "8": 1.26325,
+            "9": 1.04414,
+            "10": 1.05643,
+            "11": 2.7525,
+            "12": 1.03473,
+            "13": 1.05477,
+            "14": 1.05184,
+            "15": 1.06441,
+            "16": 1.1362,
+            "17": 1.05355,
+            "18": 1.05093,
+            "19": 1.04209,
+            "20": 1.03871,
+            "21": 1.04773,
+            "22": 1.05492,
+            "23": 1.02882,
+            "24": 1.05172,
+            "25": 1.03632,
+            "26": 1.04229,
+            "27": 1.04662,
+            "28": 1.05014,
+            "29": 1.03047,
+            "30": 1.0813,
+            "31": 1.06319,
+            "32": 1.02842,
+            "33": 1.041,
+            "34": 1.02275,
+            "35": 1.03563,
+            "36": 1.0411,
+            "37": 1.02865,
+            "38": 1.03454,
+            "39": 1.05619,
+            "40": 1.04996,
+            "41": 1.02719,
+            "42": 1.05309,
+            "43": 1.03532,
+            "44": 1.05042,
+            "45": 1.03343,
+            "46": 1.04769,
+            "47": 1.03458,
+            "48": 1.04744,
+            "49": 1.04302,
+            "50": 1.0386
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_selective_recompute_experimental/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_selective_recompute_experimental/model_config.yaml
index 0a37ee08498..4036686e888 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_selective_recompute_experimental/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_selective_recompute_experimental/model_config.yaml
@@ -16,8 +16,7 @@ MODEL_ARGS:
   --use-distributed-optimizer: true
   --overlap-grad-reduce: true
   --overlap-param-gather: true
-  # Use unfused attention since MLA with fused attention and deterministic mode leads to NaN
-  --attention-backend: unfused # TODO: switch back to fused attention after fix
+  --attention-backend: unfused
   # Training args
   --use-mcore-models: true
   --sequence-parallel: true
@@ -126,7 +125,7 @@ MODEL_ARGS:
   --bf16: true
   --exit-interval: 50
   --overlap-moe-expert-parallel-comm: true
-TEST_TYPE: regular # Usually ckpt-resume, but as a WAR to #513 set to regular
+TEST_TYPE: ckpt-resume # Usually ckpt-resume, but as a WAR to #513 set to regular
 METRICS:
   - "iteration-time"
   - "lm loss"
diff --git a/tests/test_utils/recipes/bert.yaml b/tests/test_utils/recipes/bert.yaml
deleted file mode 100644
index f0be62e4701..00000000000
--- a/tests/test_utils/recipes/bert.yaml
+++ /dev/null
@@ -1,101 +0,0 @@
-type: basic
-format_version: 1
-maintainers: [mcore]
-loggers: [stdout]
-spec:
-  name: "{test_case}_{environment}_{platforms}"
-  model: bert
-  nodes: 1
-  build: mcore-pyt-{environment}
-  gpus: 8
-  platforms: dgx_a100
-  time_limit:
-  n_repeat:
-  artifacts:
-    /workspace/data/bert_data: text/the_pile/bert_shard00
-    /workspace/checkpoints/bert_mr_mcore_tp2_pp2_frozen_resume_torch_dist_dgx_a100_1N8G_dev: model/mcore_bert/bert_mr_mcore_tp2_pp2_frozen_resume_torch_dist_{platforms}_1N8G_dev/28359448
-  script_setup: |
-    unset https_proxy
-    echo "machine gitlab-master.nvidia.com login okoenig password $RO_API_TOKEN" | tee -a /root/.netrc
-
-    # Checkout latest
-    cd /opt
-    rm -rf /opt/megatron-lm; mkdir megatron-lm; cd megatron-lm
-    git init
-    git remote add origin $MCORE_REPO
-    git fetch origin '+refs/merge-requests/*:refs/remotes/merge-requests/*'
-    git fetch origin $MCORE_MR_COMMIT
-    git checkout $MCORE_MR_COMMIT
-    git rev-parse HEAD
-
-    # Checkout backwards-ref
-    cd /opt
-    rm -rf /opt/megatron-lm-legacy; mkdir megatron-lm-legacy; cd megatron-lm-legacy
-    git init
-    git remote add origin $MCORE_REPO
-    git fetch origin $MCORE_BACKWARDS_COMMIT
-    git checkout $MCORE_BACKWARDS_COMMIT
-    git rev-parse HEAD
-    rm -rf megatron; cp -a /opt/megatron-lm/megatron ./
-  script: |-
-    ls
-    cd /opt/megatron-lm
-    NAME=$(echo {test_case}_{environment} | sed 's/dgx_h100/dgx_a100/g')
-    ARGUMENTS=(
-        "DATA_PATH=/workspace/data/bert_data"
-        "DATA_CACHE_PATH=/workspace/data/cache" 
-        "OUTPUT_PATH={assets_dir}"
-        "TENSORBOARD_PATH={assets_dir}/tensorboard"
-        "CHECKPOINT_SAVE_PATH={artifacts_dir}/checkpoints"
-        "CHECKPOINT_LOAD_PATH=/workspace/checkpoints/$NAME"
-        "TRAINING_SCRIPT_PATH=pretrain_bert.py"
-        "TRAINING_PARAMS_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml"
-        "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_$CLUSTER.json"
-        "N_REPEAT={n_repeat}"
-        "ENABLE_LIGHTWEIGHT_MODE=${{ENABLE_LIGHTWEIGHT_MODE}}"
-        "RECORD_CHECKPOINTS=${{RECORD_CHECKPOINTS}}"
-    )
-
-    bash ./tests/functional_tests/shell_test_utils/run_ci_test.sh ${{ARGUMENTS[@]}}
-
-products:
-  - test_case: [bert_mr_mcore_tp2_pp2_dgx_a100_1N8G]
-    products:
-      - environment: [dev]
-        scope: [mr]
-        platforms: [dgx_h100]
-  - test_case: [bert_mr_mcore_tp2_pp2_local_spec_dgx_a100_1N8G]
-    products:
-      - environment: [dev]
-        scope: [mr]
-        platforms: [dgx_h100]
-  - test_case: [bert_mr_mcore_tp2_pp2_resume_torch_dist_dgx_a100_1N8G]
-    products:
-      - environment: [dev]
-        scope: [mr]
-        platforms: [dgx_h100]
-  - test_case: [bert_mr_mcore_tp2_pp2_resume_torch_dist_local_spec_dgx_a100_1N8G]
-    products:
-      - environment: [dev]
-        scope: [mr]
-        platforms: [dgx_h100]
-  - test_case: [bert_nightly_dgx_a100_1N8G_mcore_tp1_pp2]
-    products:
-      - environment: [dev]
-        scope: [nightly]
-        platforms: [dgx_h100]
-  - test_case: [bert_nightly_dgx_a100_1N8G_mcore_tp1_pp4_vp2]
-    products:
-      - environment: [dev]
-        scope: [nightly]
-        platforms: [dgx_h100]
-  - test_case: [bert_nightly_dgx_a100_1N8G_mcore_tp4_pp1]
-    products:
-      - environment: [dev]
-        scope: [nightly]
-        platforms: [dgx_h100]
-  # - test_case: [bert_mr_mcore_tp2_pp2_frozen_resume_torch_dist_dgx_a100_1N8G]
-  #   products:
-  #     - environment: [dev]
-  #       scope: [mr]
-  #       platforms: [dgx_h100]
diff --git a/tests/test_utils/recipes/moe.yaml b/tests/test_utils/recipes/moe.yaml
index 972288bd905..af4b4203803 100644
--- a/tests/test_utils/recipes/moe.yaml
+++ b/tests/test_utils/recipes/moe.yaml
@@ -64,18 +64,6 @@ products:
   #######################################################################
   # Nightly tests: Run both DEV and LTS unless something is flaky       #
   #######################################################################
-  - test_case: [gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel]
-    products:
-      - environment: [dev]
-        scope: [nightly]
-        platforms: [dgx_a100, dgx_h100]
-      - environment: [lts]
-        scope: [nightly]
-  - test_case: [gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel]
-    products:
-      - environment: [dev]
-        scope: [nightly]
-        platforms: [dgx_a100, dgx_h100]
   - test_case: [gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel]
     products:
       - environment: [dev]
@@ -83,32 +71,11 @@ products:
         platforms: [dgx_a100, dgx_h100]
       - environment: [lts]
         scope: [nightly]
-  - test_case: [gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last]
-    products:
-      - environment: [dev]
-        scope: [nightly]
-        platforms: [dgx_a100, dgx_h100]
-      - environment: [lts]
-        scope: [nightly]
   - test_case: [gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last]
     products:
       - environment: [dev]
         scope: [nightly]
         platforms: [dgx_a100, dgx_h100]
-  - test_case: [gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic]
-    products:
-      - environment: [dev]
-        scope: [nightly]
-        platforms: [dgx_a100, dgx_h100]
-      - environment: [lts]
-        scope: [nightly]
-  - test_case: [gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last]
-    products:
-      - environment: [dev]
-        scope: [nightly]
-        platforms: [dgx_a100, dgx_h100]
-      - environment: [lts]
-        scope: [nightly]
   # - test_case: [gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_resume_torch_dist_te_2experts]
   #   products: # non-determinism: #478
   #     - environment: [dev, lts]
@@ -125,43 +92,21 @@ products:
       - environment: [dev]
         scope: [mr]
         platforms: [dgx_h100]
-  # - test_case: [gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8]
-  #   products:
-  #     - environment: [dev]
-  #       scope: [mr]
-  #       platforms: [dgx_h100] # hang: #513
   - test_case: [gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph]
     products:
       - environment: [dev]
         scope: [mr]
         platforms: [dgx_h100] # hang: #513
-  # - test_case: [gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_selective_recompute_experimental]
-  #   products:
-  #     - environment: [dev]
-  #       scope: [mr]
-  #       platforms: [dgx_h100] # hang: #513
-  - test_case: [gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G]
+  - test_case: [gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_selective_recompute_experimental]
     products:
       - environment: [dev]
         scope: [mr]
-        platforms: [dgx_h100]
+        platforms: [dgx_h100] # hang: #513
   - test_case: [gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances_dgx_a100_1N8G]
     products:
       - environment: [dev]
         scope: [mr]
         platforms: [dgx_h100]
-  - test_case: [gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM_dgx_a100_1N8G]
-    products:
-      - environment: [dev]
-        scope: [mr]
-        platforms: [dgx_h100]
-  - test_case: [gpt3_mr_mcore_te_tp2_pp1_te_8experts_etp1_ep4_dgx_a100_1N8G]
-    products:
-      # - environment: [dev]
-      #   scope: [mr]
-      #   platforms: [dgx_h100] # hang: #513
-      - environment: [lts]
-        scope: [nightly]
   - test_case: [gpt3_mr_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4_dgx_a100_1N8G]
     products:
       - environment: [dev]
@@ -187,6 +132,11 @@ products:
       - environment: [dev]
         scope: [mr]
         platforms: [dgx_h100]
+  - test_case: [gpt3_moe_mr_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer]
+    products:
+      - environment: [dev]
+        scope: [mr]
+        platforms: [dgx_h100]
   #######################################################################
   # Super important MR tests that run for both DEV and LTS per MR       #
   #######################################################################
@@ -203,7 +153,7 @@ products:
   ###########################
   # Merge train tests       #
   ###########################
-  - test_case: [gpt3_moe_mr_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer]
+  - test_case: [gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed]
     products:
       - environment: [dev]
         scope: [mr]
@@ -211,11 +161,11 @@ products:
       - environment: [dev]
         scope: [mr-slim]
         platforms: [dgx_h100]
-  - test_case: [gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed]
+  - test_case: [gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8]
     products:
       - environment: [dev]
         scope: [mr]
-        platforms: [dgx_h100]
+        platforms: [dgx_h100] # hang: #513
       - environment: [dev]
         scope: [mr-slim]
         platforms: [dgx_h100]
diff --git a/tests/test_utils/recipes/t5.yaml b/tests/test_utils/recipes/t5.yaml
deleted file mode 100644
index 31a72e9b5a1..00000000000
--- a/tests/test_utils/recipes/t5.yaml
+++ /dev/null
@@ -1,116 +0,0 @@
-type: basic
-format_version: 1
-maintainers: [mcore]
-loggers: [stdout]
-spec:
-  name: "{test_case}_{environment}_{platforms}"
-  model: t5
-  build: mcore-pyt-{environment}
-  nodes: 1
-  gpus: 8
-  platforms: dgx_a100
-  artifacts:
-    /workspace/data/t5_data: text/the_pile/t5_shard00
-    /workspace/checkpoints/t5_220m_mr_mcore_te_tp2_pp2_frozen_resume_torch_dgx_a100_1N8G_dev: model/mcore_t5/t5_220m_mr_mcore_te_tp2_pp2_frozen_resume_torch_{platforms}_1N8G_dev/28359448
-  script_setup: |
-    unset https_proxy
-    echo "machine gitlab-master.nvidia.com login okoenig password $RO_API_TOKEN" | tee -a /root/.netrc
-
-    # Checkout latest
-    cd /opt
-    rm -rf /opt/megatron-lm; mkdir megatron-lm; cd megatron-lm
-    git init
-    git remote add origin $MCORE_REPO
-    git fetch origin '+refs/merge-requests/*:refs/remotes/merge-requests/*'
-    git fetch origin $MCORE_MR_COMMIT
-    git checkout $MCORE_MR_COMMIT
-    git rev-parse HEAD
-
-    # Checkout backwards-ref
-    cd /opt
-    rm -rf /opt/megatron-lm-legacy; mkdir megatron-lm-legacy; cd megatron-lm-legacy
-    git init
-    git remote add origin $MCORE_REPO
-    git fetch origin $MCORE_BACKWARDS_COMMIT
-    git checkout $MCORE_BACKWARDS_COMMIT
-    git rev-parse HEAD
-    rm -rf megatron; cp -a /opt/megatron-lm/megatron ./
-  script: |-
-    ls
-    cd /opt/megatron-lm
-
-    NAME=$(echo {test_case}_{environment} | sed 's/dgx_h100/dgx_a100/g')
-
-    ARGUMENTS=(
-        "DATA_PATH=/workspace/data/t5_data"
-        "DATA_CACHE_PATH=/workspace/data/cache"
-        "OUTPUT_PATH={assets_dir}"
-        "TENSORBOARD_PATH={assets_dir}/tensorboard"
-        "CHECKPOINT_SAVE_PATH={artifacts_dir}/checkpoints"
-        "CHECKPOINT_LOAD_PATH=/workspace/checkpoints/$NAME"
-        "TRAINING_SCRIPT_PATH=pretrain_t5.py"
-        "TRAINING_PARAMS_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml"
-        "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_$CLUSTER.json"
-        "N_REPEAT={n_repeat}"
-        "ENABLE_LIGHTWEIGHT_MODE=${{ENABLE_LIGHTWEIGHT_MODE}}"
-        "RECORD_CHECKPOINTS=${{RECORD_CHECKPOINTS}}"
-    )
-
-    bash ./tests/functional_tests/shell_test_utils/run_ci_test.sh ${{ARGUMENTS[@]}}
-
-products:
-  - test_case: [t5_11b_mr_mcore_tp4_pp1_dgx_a100_1N8G]
-    products:
-      - environment: [dev]
-        scope: [mr]
-        platforms: [dgx_h100]
-  - test_case: [t5_220m_mr_mcore_te_tp4_pp1_dgx_a100_1N8G]
-    products:
-      - environment: [dev]
-        scope: [mr]
-        platforms: [dgx_h100]
-  - test_case: [t5_220m_mr_mcore_te_tp4_pp1_resume_torch_dist_dgx_a100_1N8G]
-    products:
-      - environment: [dev]
-        scope: [mr]
-        platforms: [dgx_h100]
-  - test_case: [t5_220m_mr_mcore_tp4_pp1_dgx_a100_1N8G]
-    products:
-      - environment: [dev]
-        scope: [mr]
-        platforms: [dgx_h100]
-  - test_case: [t5_220m_mr_mcore_tp4_pp1_resume_torch_dist_dgx_a100_1N8G]
-    products:
-      - environment: [dev]
-        scope: [mr]
-        platforms: [dgx_h100]
-  - test_case: [t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp1_pp1_vp1_resume_torch]
-    products:
-      - environment: [dev]
-        scope: [nightly]
-        platforms: [dgx_a100, dgx_h100]
-  - test_case: [t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1]
-    products:
-      - environment: [dev]
-        scope: [nightly]
-        platforms: [dgx_a100, dgx_h100]
-  - test_case: [t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel]
-    products:
-      - environment: [dev]
-        scope: [nightly]
-        platforms: [dgx_a100, dgx_h100]
-  - test_case: [t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1]
-    products:
-      - environment: [dev]
-        scope: [nightly]
-        platforms: [dgx_a100, dgx_h100]
-  - test_case: [t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1_resume_torch]
-    products:
-      - environment: [dev]
-        scope: [nightly]
-        platforms: [dgx_a100, dgx_h100]
-  - test_case: [t5_220m_nightly_dgx_a100_1N8G_mcore_tp2_pp1_vp1]
-    products:
-      - environment: [dev]
-        scope: [nightly]
-        platforms: [dgx_a100, dgx_h100]

From 32e9518b0e6a91049e9c0ae3b1c471a0d3fd348a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Wed, 15 Oct 2025 06:36:13 +0000
Subject: [PATCH 023/334] ci: No batch short anymore
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 .gitlab/stages/02.test.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.gitlab/stages/02.test.yml b/.gitlab/stages/02.test.yml
index 358ad740e01..af972c8d0cf 100644
--- a/.gitlab/stages/02.test.yml
+++ b/.gitlab/stages/02.test.yml
@@ -76,7 +76,7 @@ test:unit_tests_configure:
         "--test-cases all"
         "--cluster dgxh100_coreweave"
         "--platform dgx_h100"
-        "--partition batch_short,batch"
+        "--partition batch"
         "--container-image ${UTILITY_IMAGE}"
         "--container-tag ${CI_PIPELINE_ID}"
         "--dependent-job test:unit_tests_configure"

From bbc762d5c62f28f31944507f8628719d2f84a6db Mon Sep 17 00:00:00 2001
From: Li Tao <lit@nvidia.com>
Date: Wed, 15 Oct 2025 06:07:18 -0700
Subject: [PATCH 024/334] ADLR/megatron-lm!4231 - [Dev] fix(dataset): fix the
 divergence when using dsv3 tokenizer after !3646; Have datasets account for
 tokenizers which incorrectly define PAD;

Co-authored-by: Teodor-Dumitru Ene <tene@nvidia.com>
---
 megatron/core/datasets/bert_dataset.py        | 10 ++--
 .../blended_megatron_dataset_config.py        | 11 ++++
 megatron/core/datasets/gpt_dataset.py         |  8 ---
 megatron/core/datasets/megatron_dataset.py    | 47 +++++++++++++++++
 megatron/core/datasets/t5_dataset.py          |  4 +-
 megatron/training/arguments.py                | 14 +++++
 megatron/training/tokenizer/tokenizer.py      | 52 +++++++++++++------
 pretrain_bert.py                              |  1 +
 pretrain_gpt.py                               |  1 +
 pretrain_mamba.py                             |  1 +
 pretrain_retro.py                             |  1 +
 pretrain_t5.py                                |  1 +
 pretrain_vlm.py                               |  1 +
 13 files changed, 120 insertions(+), 32 deletions(-)

diff --git a/megatron/core/datasets/bert_dataset.py b/megatron/core/datasets/bert_dataset.py
index 314efb46cd6..6772a4e6644 100644
--- a/megatron/core/datasets/bert_dataset.py
+++ b/megatron/core/datasets/bert_dataset.py
@@ -139,18 +139,14 @@ def __getitem__(self, idx: int) -> Dict[str, Union[int, numpy.ndarray]]:
         assert length_pads >= 0
 
         tokens = numpy.array(tokens, dtype=numpy.int64)
-        tokens = numpy.pad(tokens, (0, length_pads), constant_values=self.config.tokenizer.pad)
+        tokens = numpy.pad(tokens, (0, length_pads), constant_values=self._pad_token_id)
 
         assignments = numpy.array(assignments, dtype=numpy.int64)
-        assignments = numpy.pad(
-            assignments, (0, length_pads), constant_values=self.config.tokenizer.pad
-        )
+        assignments = numpy.pad(assignments, (0, length_pads), constant_values=self._pad_token_id)
 
         # Get the padding mask
         mask_pads = numpy.ones(length_toks, dtype=numpy.int64)
-        mask_pads = numpy.pad(
-            mask_pads, (0, length_pads), constant_values=self.config.tokenizer.pad
-        )
+        mask_pads = numpy.pad(mask_pads, (0, length_pads), constant_values=self._pad_token_id)
 
         # Mask the labels
         labels = numpy.zeros(self.config.sequence_length, dtype=numpy.int64) - 1
diff --git a/megatron/core/datasets/blended_megatron_dataset_config.py b/megatron/core/datasets/blended_megatron_dataset_config.py
index 3222ece836f..fd7132acc0f 100644
--- a/megatron/core/datasets/blended_megatron_dataset_config.py
+++ b/megatron/core/datasets/blended_megatron_dataset_config.py
@@ -77,6 +77,17 @@ class BlendedMegatronDatasetConfig:
        datasets(s).
     """
 
+    allow_ambiguous_pad_tokens: Optional[bool] = False
+    """Whether to prevent pad tokens already present in the dataset from being masked out
+       when the pad token incorrectly shares the same id with other special tokens.
+       Treating such tokens as pad tokens results in training instability and divergence.
+       Such a scenario is best resolved by fixing the tokenizer, but leaving this option as False
+       provides a workaround.
+       This argument will have no effect if the tokenizer is correct. However, should the user
+       desire to train on a dataset that intentionally contains pad tokens - while also using an
+       incorrect tokenizer - this option may be set to True. This is typically not recommended.
+    """
+
     def __post_init__(self) -> None:
         """Do asserts and set fields post init"""
         if self.blend_per_split is not None and any(self.blend_per_split):
diff --git a/megatron/core/datasets/gpt_dataset.py b/megatron/core/datasets/gpt_dataset.py
index 7ea63df8051..c96fed08065 100644
--- a/megatron/core/datasets/gpt_dataset.py
+++ b/megatron/core/datasets/gpt_dataset.py
@@ -20,9 +20,6 @@
 logger = logging.getLogger(__name__)
 
 
-_PAD_TOKEN_ID = -1
-
-
 @dataclass
 class GPTDatasetConfig(BlendedMegatronDatasetConfig):
     """Configuration object for Megatron Core GPT datasets"""
@@ -105,11 +102,6 @@ def __init__(
         self.cached_loss_mask = None
         self.cached_position_ids = None
 
-        try:
-            self._pad_token_id = self.config.tokenizer.pad
-        except Exception:
-            self._pad_token_id = _PAD_TOKEN_ID
-
         (self.document_index, self.sample_index, self.shuffle_index) = (
             self._build_document_sample_shuffle_indices()
         )
diff --git a/megatron/core/datasets/megatron_dataset.py b/megatron/core/datasets/megatron_dataset.py
index 0980ef92d36..185a87e1ac5 100644
--- a/megatron/core/datasets/megatron_dataset.py
+++ b/megatron/core/datasets/megatron_dataset.py
@@ -2,6 +2,7 @@
 
 import hashlib
 import json
+import warnings
 from abc import ABC, abstractmethod
 from collections import OrderedDict
 from typing import Dict, Iterable, List, Optional, Union
@@ -16,6 +17,9 @@
 LowLevelDataset = Union[IndexedDataset, Iterable]
 
 
+_PAD_TOKEN_ID = -1
+
+
 class MegatronDataset(ABC, torch.utils.data.Dataset):
     """The highest level wrapper class from which all dataset classes should inherit
 
@@ -66,6 +70,49 @@ def __init__(
             self.unique_description.encode("utf-8"), usedforsecurity=False
         ).hexdigest()
 
+        # Handle pad token id provided by the tokenizer
+        try:
+            self._pad_token_id = self.config.tokenizer.pad
+        except Exception:
+            self._pad_token_id = _PAD_TOKEN_ID
+
+        # Check if pad token id collides with any other special tokens
+        try:
+            _special_tokens_list = [
+                v for k, v in self.config.tokenizer.special_tokens_dict.items() if k != "pad_token"
+            ]
+        except (AttributeError, IndexError, ValueError):
+            _special_tokens_list = []
+        # If the tokenizer does not have a special_tokens_dict attribute, at least check eos and eod
+        if not _special_tokens_list:
+            try:
+                _special_tokens_list.append(self.config.tokenizer.eos)
+            except AttributeError:
+                pass
+            try:
+                _special_tokens_list.append(self.config.tokenizer.eod)
+            except AttributeError:
+                pass
+
+        if self._pad_token_id in _special_tokens_list:
+            if self.config.allow_ambiguous_pad_tokens:
+                # This will break training, but users must explicitly opt-in to this behavior.
+                warnings.warn(
+                    "The pad token id in the tokenizer collides with another special token id. "
+                    "This may cause instability and lack of covergence during training. "
+                    "Do not ignore this warning if you do not understand the implications. "
+                )
+            else:
+                # Reset the pad token id to a value which is guaranteed not to be in the dataset.
+                self._pad_token_id = _PAD_TOKEN_ID
+                warnings.warn(
+                    "The pad token id in the tokenizer collides with another special token id. "
+                    "This may cause instability and lack of covergence during training. "
+                    "As such, the training flow will avoid masking out any pad tokens already "
+                    "present in the dataset. If you would like to disable this behavior, "
+                    "please provide a tokenizer with a uniquely-defined pad token id."
+                )
+
     @staticmethod
     def numel_low_level_dataset(low_level_dataset: LowLevelDataset) -> int:
         """Return the number of elements in the underlying low level dataset for the purpose of
diff --git a/megatron/core/datasets/t5_dataset.py b/megatron/core/datasets/t5_dataset.py
index 85da1480e10..8e3531b1e86 100644
--- a/megatron/core/datasets/t5_dataset.py
+++ b/megatron/core/datasets/t5_dataset.py
@@ -286,12 +286,12 @@ def __getitem__(self, idx: int) -> Dict[str, Union[int, numpy.ndarray]]:
 
         encoder_input = numpy.array(encoder_input, dtype=numpy.int64)
         encoder_input = numpy.pad(
-            encoder_input, (0, length_pads_encoder), constant_values=self.config.tokenizer.pad
+            encoder_input, (0, length_pads_encoder), constant_values=self._pad_token_id
         )
 
         decoder_input = numpy.array(decoder_input, dtype=numpy.int64)
         decoder_input = numpy.pad(
-            decoder_input, (0, length_pads_decoder), constant_values=self.config.tokenizer.pad
+            decoder_input, (0, length_pads_decoder), constant_values=self._pad_token_id
         )
 
         # Create attention and history masks
diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py
index 905538ffc9e..fa9a0f6d751 100644
--- a/megatron/training/arguments.py
+++ b/megatron/training/arguments.py
@@ -2902,6 +2902,20 @@ def _add_data_args(parser):
                        help='Path to cache index files when using s3 or msc dataloader')
     group.add_argument('--mid-level-dataset-surplus', type=float, default=0.005,
                        help='The sample surplus to build for the mid-level datasets(s)')
+    group.add_argument('--allow-ambiguous-pad-tokens', action='store_true',
+                       help='Whether to prevent pad tokens already present in the dataset '
+                       'from being masked out when the pad token incorrectly shares the same id '
+                       'with other special tokens in the tokenizer. Note that this argument has '
+                       'no effect when the tokenizer correctly provides a unique id for the pad. '
+                       'Masking out such ambiguous pad tokens results in training instability. '
+                       'Such a scenario is best resolved by fixing the tokenizer; leaving this '
+                       'option as False provides a workaround. '
+                       'When left to the default of False, any token ids that collide with the '
+                       'pad token id - as provided by the tokenizer - will not be masked out of '
+                       'the loss calculation: it cannot be determined whether they are truly pad. '
+                       'If instead this argument is set, the training flow will treat all tokens '
+                       'that share the same id as the pad token as true pad tokens, potentially '
+                       'causing severe training instability.')
     return parser
 
 
diff --git a/megatron/training/tokenizer/tokenizer.py b/megatron/training/tokenizer/tokenizer.py
index b1aad6819b4..13b7526ca07 100644
--- a/megatron/training/tokenizer/tokenizer.py
+++ b/megatron/training/tokenizer/tokenizer.py
@@ -48,7 +48,7 @@ def build_tokenizer(args, **kwargs):
         tokenizer = _GPTSentencePieceTokenizer(args.tokenizer_model)
     elif args.tokenizer_type == 'HuggingFaceTokenizer':
         tokenizer = _HuggingFaceTokenizer(
-            args.tokenizer_model, trust_remote_code = args.trust_remote_code, **kwargs,
+            args.tokenizer_model, trust_remote_code=args.trust_remote_code, **kwargs
         )
     elif args.tokenizer_type == 'Llama2Tokenizer':
         assert args.tokenizer_model is not None
@@ -78,11 +78,7 @@ def build_tokenizer(args, **kwargs):
 
         kwargs = dict()
         if args.tokenizer_prompt_format == "nvlm-yi-34b":
-            kwargs = {
-                "from_slow": True,
-                "legacy": False,
-                "add_bos_token": True,
-            }
+            kwargs = {"from_slow": True, "legacy": False, "add_bos_token": True}
 
         # Currently, only HuggingFace tokenizers are supported.
         underlying_tokenizer = transformers.AutoTokenizer.from_pretrained(
@@ -97,10 +93,7 @@ def build_tokenizer(args, **kwargs):
             args.force_system_message,
         )
     elif args.tokenizer_type == "SFTTokenizer":
-        tokenizer = SFTTokenizer(
-            args.tokenizer_model,
-            args.sft_tokenizer_prompt_format, 
-        )
+        tokenizer = SFTTokenizer(args.tokenizer_model, args.sft_tokenizer_prompt_format)
     elif args.tokenizer_type == 'NullMultimodalTokenizer':
         assert args.vocab_size is not None
         tokenizer = _NullMultimodalTokenizer(args.vocab_size)
@@ -144,7 +137,7 @@ def __init__(self, pretrained_model_name_or_path, trust_remote_code=False, **kwa
         self._tokenizer = transformers.AutoTokenizer.from_pretrained(
             pretrained_model_name_or_path=pretrained_model_name_or_path,
             trust_remote_code=trust_remote_code,
-            **kwargs
+            **kwargs,
         )
         self._vocab = self._tokenizer.get_vocab()
         self._inv_vocab = {token_id: token for token, token_id in self._vocab.items()}
@@ -367,6 +360,10 @@ def detokenize(self, token_ids):
     def eod(self):
         return self.eod_id
 
+    @property
+    def eos(self):
+        return self.eod_id
+
 
 class _SentencePieceTokenizer(MegatronLegacyTokenizer):
     """SentencePieceTokenizer-Megatron wrapper"""
@@ -573,6 +570,10 @@ def mask(self):
     def eod(self):
         return self._eos_id
 
+    @property
+    def eos(self):
+        return self._eos_id
+
     @property
     def additional_special_tokens_ids(self):
         return None
@@ -623,6 +624,10 @@ def mask(self):
     def eod(self):
         return self.eos_id
 
+    @property
+    def eos(self):
+        return self.eos_id
+
     @property
     def additional_special_tokens_ids(self):
         return None
@@ -747,7 +752,7 @@ def bos(self) -> int:
     @property
     def eos(self) -> int:
         return self._eos_id
-    
+
     @property
     def pad(self) -> int:
         return self._pad_id
@@ -858,19 +863,30 @@ def mask(self):
     def eod(self):
         return self._eod_id
 
+    @property
+    def eos(self):
+        return self._eod_id
+
     @property
     def additional_special_tokens_ids(self):
         return None
 
+
 class _NullMultimodalTokenizer(MegatronLegacyTokenizer):
     def __init__(self, vocab_size, image_token=None, image_token_id=None):
         super().__init__(None, vocab_size=vocab_size)
         self._vocab_size_without_eod = int(vocab_size)
         self._eod_id = self._vocab_size_without_eod
 
-        from megatron.core.models.multimodal.llava_model import DEFAULT_IMAGE_TOKEN_INDEX, IMAGE_TOKEN
+        from megatron.core.models.multimodal.llava_model import (
+            DEFAULT_IMAGE_TOKEN_INDEX,
+            IMAGE_TOKEN,
+        )
+
         self._image_token = image_token if image_token is not None else IMAGE_TOKEN
-        self._image_token_id = image_token_id if image_token_id is not None else DEFAULT_IMAGE_TOKEN_INDEX
+        self._image_token_id = (
+            image_token_id if image_token_id is not None else DEFAULT_IMAGE_TOKEN_INDEX
+        )
 
     def tokenize(self, text):
         return [int(x) for x in text.split(' ')]
@@ -887,7 +903,9 @@ def offsets(self, ids: list[int], text: str) -> list[int]:
         return offsets
 
     def convert_tokens_to_ids(self, tokens):
-        ids = [(int(t) if t != self._image_token else self._image_token_id) for t in tokens.split('  ')]
+        ids = [
+            (int(t) if t != self._image_token else self._image_token_id) for t in tokens.split('  ')
+        ]
         return ids if len(ids) > 1 else ids[0]
 
     @property
@@ -918,6 +936,10 @@ def mask(self):
     def eod(self):
         return self._eod_id
 
+    @property
+    def eos(self):
+        return self._eod_id
+
     @property
     def additional_special_tokens_ids(self):
         return None
diff --git a/pretrain_bert.py b/pretrain_bert.py
index a5e2728db89..401c32b4cb9 100644
--- a/pretrain_bert.py
+++ b/pretrain_bert.py
@@ -172,6 +172,7 @@ def train_valid_test_datasets_provider(train_val_test_num_samples, vp_stage=None
         masking_use_geometric_distribution=False,
         classification_head=args.bert_binary_head,
         mid_level_dataset_surplus=args.mid_level_dataset_surplus,
+        allow_ambiguous_pad_tokens=args.allow_ambiguous_pad_tokens,
     )
 
     print_rank_0('> building train, validation, and test datasets '
diff --git a/pretrain_gpt.py b/pretrain_gpt.py
index 3b785077664..0c1fd016593 100644
--- a/pretrain_gpt.py
+++ b/pretrain_gpt.py
@@ -190,6 +190,7 @@ def core_gpt_dataset_config_from_args(args):
         create_attention_mask=args.create_attention_mask_in_dataloader,
         object_storage_cache_path=args.object_storage_cache_path,
         mid_level_dataset_surplus=args.mid_level_dataset_surplus,
+        allow_ambiguous_pad_tokens=args.allow_ambiguous_pad_tokens,
     )
 
 
diff --git a/pretrain_mamba.py b/pretrain_mamba.py
index eaf78f7ba9a..8717af11810 100644
--- a/pretrain_mamba.py
+++ b/pretrain_mamba.py
@@ -186,6 +186,7 @@ def core_gpt_dataset_config_from_args(args):
         create_attention_mask=args.create_attention_mask_in_dataloader,
         object_storage_cache_path=args.object_storage_cache_path,
         mid_level_dataset_surplus=args.mid_level_dataset_surplus,
+        allow_ambiguous_pad_tokens=args.allow_ambiguous_pad_tokens,
     )
 
 
diff --git a/pretrain_retro.py b/pretrain_retro.py
index 100cf605657..63abbac5e39 100644
--- a/pretrain_retro.py
+++ b/pretrain_retro.py
@@ -210,6 +210,7 @@ def train_valid_test_datasets_provider(train_valid_test_num_samples):
         reset_attention_mask=args.reset_attention_mask,
         eod_mask_loss=args.eod_mask_loss,
         mid_level_dataset_surplus=args.mid_level_dataset_surplus,
+        allow_ambiguous_pad_tokens=args.allow_ambiguous_pad_tokens,
     )
 
     # GPT datasets.
diff --git a/pretrain_t5.py b/pretrain_t5.py
index 6e6d9ad2c06..e74e7d8809e 100644
--- a/pretrain_t5.py
+++ b/pretrain_t5.py
@@ -233,6 +233,7 @@ def train_valid_test_datasets_provider(train_val_test_num_samples: int):
         masking_use_longer_ngrams=False,
         masking_use_geometric_distribution=True,
         mid_level_dataset_surplus=args.mid_level_dataset_surplus,
+        allow_ambiguous_pad_tokens=args.allow_ambiguous_pad_tokens,
     )
 
     print_rank_0('> building train, validation, and test datasets for T5 ...')
diff --git a/pretrain_vlm.py b/pretrain_vlm.py
index ce1a5102444..524931d2727 100644
--- a/pretrain_vlm.py
+++ b/pretrain_vlm.py
@@ -224,6 +224,7 @@ def train_valid_test_datasets_provider(train_val_test_num_samples):
         image_w=args.img_w,
         preprocess_func=_preprocess_data_for_llava,
         mid_level_dataset_surplus=args.mid_level_dataset_surplus,
+        allow_ambiguous_pad_tokens=args.allow_ambiguous_pad_tokens,
     )
 
     print_rank_0("> building train, validation, and test datasets for multimodal ...")

From df41a69aa0a08f4044f7f07fa22f62021b092813 Mon Sep 17 00:00:00 2001
From: Dennis Liu <denliu@nvidia.com>
Date: Thu, 16 Oct 2025 02:38:22 -0700
Subject: [PATCH 025/334] ADLR/megatron-lm!4254 - [Dev] Fix dev nightly
 functional tests.

---
 .../model_config.yaml                         |   2 +-
 .../model_config.yaml                         |   2 +-
 .../golden_values_dev_dgxh100_eos.json        | 500 +++++++++---------
 .../model_config.yaml.tmp                     | 132 -----
 .../golden_values_dev_dgxh100_eos.json        | 344 ++++++++++++
 .../golden_values_dev_dgxh100_eos.json        | 498 ++++++++---------
 .../golden_values_dev_dgxh100_eos.json        | 344 ++++++++++++
 tests/test_utils/recipes/moe.yaml             |   4 -
 8 files changed, 1189 insertions(+), 637 deletions(-)
 delete mode 100644 tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/model_config.yaml.tmp
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_selective_recompute_experimental/golden_values_dev_dgxh100_eos.json

diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/model_config.yaml
index 041d35cab11..dc19a6c7698 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/model_config.yaml
@@ -33,7 +33,7 @@ MODEL_ARGS:
   --clip-grad: 1.0
   --lr-warmup-fraction: .01
   --log-interval: 1
-  --save-interval: 10000
+  --save-interval: 25
   --eval-interval: 1000
   --eval-iters: 10
   --transformer-impl: transformer_engine
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/model_config.yaml
index 7f9613ba222..30c921c6feb 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/model_config.yaml
@@ -33,7 +33,7 @@ MODEL_ARGS:
   --clip-grad: 1.0
   --lr-warmup-fraction: .01
   --log-interval: 1
-  --save-interval: 10000
+  --save-interval: 25
   --eval-interval: 1000
   --eval-iters: 10
   --transformer-impl: local
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgxh100_eos.json
index b3668b31178..f95a91d4ff2 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgxh100_eos.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgxh100_eos.json
@@ -4,56 +4,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 11.04748,
-            "2": 11.03561,
-            "3": 9.58773,
-            "4": 9.25819,
-            "5": 9.52742,
-            "6": 9.87911,
-            "7": 9.48366,
-            "8": 8.93879,
-            "9": 8.6551,
-            "10": 9.10915,
-            "11": 8.51806,
-            "12": 8.54732,
-            "13": 8.48144,
-            "14": 8.05312,
-            "15": 8.10118,
-            "16": 8.10344,
-            "17": 8.08878,
-            "18": 7.78589,
-            "19": 8.15794,
-            "20": 7.88069,
-            "21": 7.58542,
-            "22": 7.54895,
-            "23": 7.4296,
-            "24": 7.41901,
-            "25": 7.67277,
-            "26": 7.07835,
-            "27": 7.61157,
-            "28": 7.31513,
-            "29": 7.49487,
-            "30": 7.64287,
-            "31": 7.39102,
-            "32": 7.59148,
-            "33": 7.6393,
-            "34": 7.70086,
-            "35": 7.2119,
-            "36": 7.08623,
-            "37": 7.43064,
-            "38": 7.18999,
-            "39": 7.5525,
-            "40": 7.54961,
-            "41": 7.49385,
-            "42": 7.25481,
-            "43": 7.24066,
-            "44": 7.42131,
-            "45": 7.19201,
-            "46": 6.90547,
-            "47": 7.30704,
-            "48": 7.15325,
-            "49": 7.60504,
-            "50": 7.04512
+            "1": 11.04737,
+            "2": 11.03581,
+            "3": 9.58845,
+            "4": 9.25804,
+            "5": 9.54964,
+            "6": 9.8667,
+            "7": 9.47894,
+            "8": 8.92828,
+            "9": 8.66752,
+            "10": 9.05851,
+            "11": 8.49951,
+            "12": 8.52674,
+            "13": 8.45287,
+            "14": 7.99202,
+            "15": 8.05428,
+            "16": 8.08384,
+            "17": 8.09398,
+            "18": 7.76937,
+            "19": 8.14784,
+            "20": 7.88774,
+            "21": 7.58582,
+            "22": 7.5453,
+            "23": 7.4272,
+            "24": 7.42741,
+            "25": 7.67702,
+            "26": 7.06883,
+            "27": 7.61756,
+            "28": 7.33112,
+            "29": 7.49469,
+            "30": 7.6427,
+            "31": 7.39392,
+            "32": 7.58751,
+            "33": 7.64167,
+            "34": 7.70181,
+            "35": 7.21084,
+            "36": 7.08821,
+            "37": 7.42759,
+            "38": 7.19136,
+            "39": 7.55273,
+            "40": 7.54649,
+            "41": 7.49652,
+            "42": 7.25161,
+            "43": 7.2371,
+            "44": 7.41599,
+            "45": 7.19163,
+            "46": 6.90225,
+            "47": 7.30109,
+            "48": 7.14398,
+            "49": 7.59284,
+            "50": 7.03691
         }
     },
     "num-zeros": {
@@ -61,56 +61,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 38802612.0,
-            "2": 38543592.0,
-            "3": 38739480.0,
-            "4": 279954336.0,
-            "5": 249745312.0,
-            "6": 268288496.0,
-            "7": 604756224.0,
-            "8": 781485184.0,
-            "9": 636362112.0,
-            "10": 653025216.0,
-            "11": 668551168.0,
-            "12": 765583616.0,
-            "13": 815362944.0,
-            "14": 834270656.0,
-            "15": 755756096.0,
-            "16": 995153536.0,
-            "17": 938291584.0,
-            "18": 721524928.0,
-            "19": 756173504.0,
-            "20": 901129600.0,
-            "21": 721816384.0,
-            "22": 831311872.0,
-            "23": 803536768.0,
-            "24": 628253248.0,
-            "25": 663895680.0,
-            "26": 847321664.0,
-            "27": 828927424.0,
-            "28": 777678976.0,
-            "29": 764628608.0,
-            "30": 781930112.0,
-            "31": 771767616.0,
-            "32": 771755392.0,
-            "33": 586323648.0,
-            "34": 734207552.0,
-            "35": 690468480.0,
-            "36": 485982688.0,
-            "37": 506506336.0,
-            "38": 642964160.0,
-            "39": 661240000.0,
-            "40": 645048768.0,
-            "41": 636072704.0,
-            "42": 491645856.0,
-            "43": 601942528.0,
-            "44": 623448960.0,
-            "45": 539959424.0,
-            "46": 532669088.0,
-            "47": 529039680.0,
-            "48": 504121984.0,
-            "49": 478344480.0,
-            "50": 331385728.0
+            "1": 38802620.0,
+            "2": 38543572.0,
+            "3": 38741428.0,
+            "4": 283089696.0,
+            "5": 256049008.0,
+            "6": 261995024.0,
+            "7": 601623744.0,
+            "8": 775170304.0,
+            "9": 645831808.0,
+            "10": 728519104.0,
+            "11": 740861312.0,
+            "12": 743565504.0,
+            "13": 893967040.0,
+            "14": 963173120.0,
+            "15": 746290304.0,
+            "16": 938543360.0,
+            "17": 730738816.0,
+            "18": 671172416.0,
+            "19": 922829888.0,
+            "20": 948314368.0,
+            "21": 778417216.0,
+            "22": 938284544.0,
+            "23": 926223744.0,
+            "24": 917606784.0,
+            "25": 918668992.0,
+            "26": 866192768.0,
+            "27": 866673856.0,
+            "28": 856325760.0,
+            "29": 836978240.0,
+            "30": 800803136.0,
+            "31": 790628096.0,
+            "32": 756030016.0,
+            "33": 734117312.0,
+            "34": 734209792.0,
+            "35": 731364736.0,
+            "36": 690416960.0,
+            "37": 679491584.0,
+            "38": 639823360.0,
+            "39": 632918272.0,
+            "40": 610431680.0,
+            "41": 598315904.0,
+            "42": 576523840.0,
+            "43": 406952768.0,
+            "44": 569968896.0,
+            "45": 539956736.0,
+            "46": 365988928.0,
+            "47": 503877472.0,
+            "48": 500972512.0,
+            "49": 478340480.0,
+            "50": 457181248.0
         }
     },
     "mem-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 55055331328.0,
-            "2": 57809321984.0,
-            "3": 57919823872.0,
-            "4": 57919823872.0,
-            "5": 57919823872.0,
-            "6": 57919823872.0,
-            "7": 57919823872.0,
-            "8": 57919823872.0,
-            "9": 57919823872.0,
-            "10": 57919823872.0,
-            "11": 57919823872.0,
-            "12": 57919823872.0,
-            "13": 57932275712.0,
-            "14": 57932275712.0,
-            "15": 57932275712.0,
-            "16": 57932275712.0,
-            "17": 57932275712.0,
-            "18": 57932275712.0,
-            "19": 57932275712.0,
-            "20": 57932275712.0,
-            "21": 57932275712.0,
-            "22": 57932275712.0,
-            "23": 57932275712.0,
-            "24": 57932275712.0,
-            "25": 57932275712.0,
-            "26": 57932275712.0,
-            "27": 57932275712.0,
-            "28": 57932275712.0,
-            "29": 57932275712.0,
-            "30": 57932275712.0,
-            "31": 57932275712.0,
-            "32": 57932275712.0,
-            "33": 57932275712.0,
-            "34": 57932275712.0,
-            "35": 57932275712.0,
-            "36": 57932275712.0,
-            "37": 57932275712.0,
-            "38": 57932275712.0,
-            "39": 57932275712.0,
-            "40": 57932275712.0,
-            "41": 57932275712.0,
-            "42": 57932275712.0,
-            "43": 57932275712.0,
-            "44": 57932275712.0,
-            "45": 57932275712.0,
-            "46": 57932275712.0,
-            "47": 57932275712.0,
-            "48": 57932275712.0,
-            "49": 57932275712.0,
-            "50": 57932275712.0
+            "1": 22860046336.0,
+            "2": 25612713984.0,
+            "3": 25730244608.0,
+            "4": 25730244608.0,
+            "5": 25730244608.0,
+            "6": 25730244608.0,
+            "7": 25730244608.0,
+            "8": 25730244608.0,
+            "9": 25730244608.0,
+            "10": 25730244608.0,
+            "11": 25730244608.0,
+            "12": 25730244608.0,
+            "13": 26180298752.0,
+            "14": 26180298752.0,
+            "15": 26180298752.0,
+            "16": 26180298752.0,
+            "17": 26180298752.0,
+            "18": 26180298752.0,
+            "19": 26180298752.0,
+            "20": 26180298752.0,
+            "21": 26180298752.0,
+            "22": 26180298752.0,
+            "23": 26180298752.0,
+            "24": 26180298752.0,
+            "25": 26180298752.0,
+            "26": 26180298752.0,
+            "27": 26180298752.0,
+            "28": 26180298752.0,
+            "29": 26180298752.0,
+            "30": 26180298752.0,
+            "31": 26180298752.0,
+            "32": 26180298752.0,
+            "33": 26180298752.0,
+            "34": 26180298752.0,
+            "35": 26180298752.0,
+            "36": 26180298752.0,
+            "37": 26180298752.0,
+            "38": 26180298752.0,
+            "39": 26180298752.0,
+            "40": 26180298752.0,
+            "41": 26180298752.0,
+            "42": 26180298752.0,
+            "43": 26180298752.0,
+            "44": 26180298752.0,
+            "45": 26180298752.0,
+            "46": 26180298752.0,
+            "47": 26180298752.0,
+            "48": 26180298752.0,
+            "49": 26180298752.0,
+            "50": 26180298752.0
         }
     },
     "mtp_1 loss": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 11.07654,
-            "2": 11.07406,
-            "3": 10.53883,
-            "4": 10.09801,
-            "5": 9.81156,
-            "6": 10.06025,
-            "7": 9.7962,
-            "8": 9.06987,
-            "9": 8.86879,
-            "10": 9.13393,
-            "11": 8.5017,
-            "12": 8.54094,
-            "13": 8.43678,
-            "14": 7.85637,
-            "15": 7.99846,
-            "16": 8.05889,
-            "17": 8.01134,
-            "18": 7.73929,
-            "19": 8.1188,
-            "20": 7.83458,
-            "21": 7.53103,
-            "22": 7.50125,
-            "23": 7.37135,
-            "24": 7.37419,
-            "25": 7.61596,
-            "26": 7.01586,
-            "27": 7.55739,
-            "28": 7.26274,
-            "29": 7.43991,
-            "30": 7.58436,
-            "31": 7.32289,
-            "32": 7.50362,
-            "33": 7.56884,
-            "34": 7.6339,
-            "35": 7.151,
-            "36": 7.01725,
-            "37": 7.35013,
-            "38": 7.12483,
-            "39": 7.48708,
-            "40": 7.47451,
-            "41": 7.4181,
-            "42": 7.17557,
-            "43": 7.15957,
-            "44": 7.34227,
-            "45": 7.12176,
-            "46": 6.82526,
-            "47": 7.23374,
-            "48": 7.07893,
-            "49": 7.5077,
-            "50": 6.97094
+            "1": 11.07644,
+            "2": 11.07413,
+            "3": 10.53858,
+            "4": 10.0983,
+            "5": 9.8117,
+            "6": 10.05948,
+            "7": 9.79869,
+            "8": 9.0727,
+            "9": 8.87366,
+            "10": 9.12893,
+            "11": 8.49884,
+            "12": 8.52992,
+            "13": 8.42414,
+            "14": 7.84688,
+            "15": 7.99135,
+            "16": 8.05047,
+            "17": 8.0004,
+            "18": 7.73069,
+            "19": 8.11023,
+            "20": 7.82948,
+            "21": 7.51921,
+            "22": 7.49606,
+            "23": 7.37196,
+            "24": 7.37047,
+            "25": 7.61349,
+            "26": 7.01867,
+            "27": 7.5586,
+            "28": 7.26599,
+            "29": 7.44466,
+            "30": 7.58701,
+            "31": 7.32783,
+            "32": 7.50657,
+            "33": 7.56866,
+            "34": 7.63344,
+            "35": 7.15071,
+            "36": 7.01674,
+            "37": 7.34958,
+            "38": 7.12576,
+            "39": 7.48596,
+            "40": 7.47304,
+            "41": 7.41897,
+            "42": 7.17558,
+            "43": 7.16122,
+            "44": 7.34251,
+            "45": 7.12147,
+            "46": 6.82911,
+            "47": 7.23414,
+            "48": 7.07998,
+            "49": 7.51108,
+            "50": 6.9741
         }
     },
     "iteration-time": {
@@ -289,56 +289,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 57.80279,
-            "2": 1.26321,
-            "3": 1.18918,
-            "4": 2.24643,
-            "5": 2.25191,
-            "6": 1.80757,
-            "7": 2.09086,
-            "8": 1.69153,
-            "9": 1.81279,
-            "10": 1.64882,
-            "11": 1.03476,
-            "12": 1.03593,
-            "13": 1.04348,
-            "14": 1.03841,
-            "15": 1.04432,
-            "16": 1.05281,
-            "17": 1.04826,
-            "18": 1.04981,
-            "19": 1.05351,
-            "20": 1.04668,
-            "21": 1.05254,
-            "22": 1.05391,
-            "23": 1.04635,
-            "24": 1.05503,
-            "25": 1.04226,
-            "26": 1.0684,
-            "27": 1.04985,
-            "28": 1.04233,
-            "29": 1.05036,
-            "30": 1.06219,
-            "31": 1.044,
-            "32": 1.05614,
-            "33": 1.05729,
-            "34": 1.05618,
-            "35": 1.06289,
-            "36": 1.05761,
-            "37": 1.05956,
-            "38": 1.06343,
-            "39": 1.06848,
-            "40": 1.06027,
-            "41": 1.05493,
-            "42": 1.05258,
-            "43": 1.04879,
-            "44": 1.04949,
-            "45": 1.05964,
-            "46": 1.04465,
-            "47": 1.0491,
-            "48": 1.05387,
-            "49": 1.05218,
-            "50": 1.05453
+            "1": 57.89597,
+            "2": 1.02226,
+            "3": 0.91676,
+            "4": 1.99588,
+            "5": 2.00486,
+            "6": 1.51451,
+            "7": 1.1193,
+            "8": 1.44004,
+            "9": 1.59872,
+            "10": 0.77647,
+            "11": 0.76373,
+            "12": 0.78131,
+            "13": 0.77869,
+            "14": 0.76703,
+            "15": 1.37612,
+            "16": 0.78402,
+            "17": 0.78337,
+            "18": 0.78947,
+            "19": 0.77286,
+            "20": 0.76873,
+            "21": 0.76722,
+            "22": 0.76847,
+            "23": 0.77301,
+            "24": 0.77475,
+            "25": 0.78165,
+            "26": 0.81166,
+            "27": 1.50584,
+            "28": 0.78435,
+            "29": 0.79046,
+            "30": 0.77828,
+            "31": 0.77039,
+            "32": 0.78392,
+            "33": 0.77294,
+            "34": 0.77717,
+            "35": 0.78379,
+            "36": 0.76722,
+            "37": 0.78405,
+            "38": 0.78584,
+            "39": 0.77423,
+            "40": 0.77729,
+            "41": 0.78273,
+            "42": 0.78119,
+            "43": 0.77474,
+            "44": 0.79851,
+            "45": 0.7826,
+            "46": 0.78586,
+            "47": 0.77961,
+            "48": 0.77947,
+            "49": 0.77944,
+            "50": 0.77976
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/model_config.yaml.tmp b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/model_config.yaml.tmp
deleted file mode 100644
index e36d590170d..00000000000
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/model_config.yaml.tmp
+++ /dev/null
@@ -1,132 +0,0 @@
-ENV_VARS:
-  CUDA_DEVICE_MAX_CONNECTIONS: 1
-  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
-  PYTORCH_CUDA_ALLOC_CONF: expandable_segments:True
-  NCCL_NVLS_ENABLE: 0
-  PYTHONWARNINGS: ignore
-  NCCL_DEBUG: VERSION
-MODEL_ARGS:
-  # Distributed args
-  --distributed-timeout-minutes: 60
-  --tensor-model-parallel-size: 2
-  --pipeline-model-parallel-size: 2
-  --num-virtual-stages-per-pipeline-rank: 4
-  --expert-model-parallel-size: 4
-  --context-parallel-size: 1
-  --expert-tensor-parallel-size: 1
-  --use-distributed-optimizer: true
-  --overlap-grad-reduce: true
-  --overlap-param-gather: true
-  --attention-backend: fused
-  # Training args
-  --use-mcore-models: true
-  --sequence-parallel: true
-  --disable-bias-linear: true
-  --micro-batch-size: 4
-  --global-batch-size: 32
-  --train-iters: 50
-  --exit-duration-in-mins: 230
-  --no-check-for-nan-in-loss-and-grad: true
-  --no-rope-fusion: true
-  --cross-entropy-loss-fusion: true
-  --cross-entropy-fusion-impl: native
-  --manual-gc: true
-  --manual-gc-interval: 100
-  # Transformer Engine args
-  --transformer-impl: transformer_engine
-  # Data args
-  --seq-length: 4096
-  --data-cache-path: /workspace/data/cache
-  --data-path: /workspace/data/gpt3_data/my-gpt3_00_text_document
-  --vocab-file: /workspace/data/gpt3_data/bpe/vocab.json
-  --merge-file: /workspace/data/gpt3_data/bpe/merges.txt
-  --split: 949,50,1
-  # Add network size args
-  --num-layers: 16
-  --hidden-size: 1024
-  --ffn-hidden-size: 4096
-  --num-attention-heads: 32
-  --kv-channels: 128
-  --max-position-embeddings: 4096
-  --position-embedding-type: rope
-  --rotary-base: 10000
-  --make-vocab-size-divisible-by: 3232
-  --normalization: RMSNorm
-  --norm-epsilon: 1e-6
-  --swiglu: true
-  --untie-embeddings-and-output-weights: true
-  --multi-latent-attention: true
-  # Comment out the following MTP args to disable MTP
-  --mtp-num-layers: 1
-  --mtp-loss-scaling-factor: 0.1
-  # Add regularization args
-  --attention-dropout: 0.0
-  --hidden-dropout: 0.0
-  --clip-grad: 1.0
-  --weight-decay: 0.1
-  --qk-layernorm: true
-  # Add learning rate args
-  --lr-warmup-fraction: .01
-  --lr: 0.00015
-  --min-lr: 1.0e-5
-  --lr-decay-style: cosine
-  --adam-beta1: 0.9
-  --adam-beta2: 0.95
-  # Add MoE args
-  --num-experts: 32
-  --moe-layer-freq: ([0]*1+[1]*15)
-  --moe-ffn-hidden-size: 1024
-  --moe-shared-expert-intermediate-size: 1024
-  --moe-router-load-balancing-type: seq_aux_loss
-  --moe-router-topk: 4
-  --moe-token-dispatcher-type: alltoall
-  --moe-router-pre-softmax: true
-  --moe-grouped-gemm: true
-  --moe-aux-loss-coeff: 1e-4
-  --moe-router-group-topk: 2
-  --moe-router-num-groups: 4
-  --moe-router-topk-scaling-factor: 2.0
-  --moe-router-score-function: sigmoid
-  --moe-router-enable-expert-bias: true
-  --moe-router-bias-update-rate: 1e-3
-  --moe-router-dtype: fp32
-  --moe-permute-fusion: true
-  # Add MLA args
-  --q-lora-rank: 1536
-  --kv-lora-rank: 512
-  --qk-head-dim: 128
-  --qk-pos-emb-head-dim: 64
-  --v-head-dim: 128
-  --rotary-scaling-factor: 40
-  --mscale: 1.0
-  --mscale-all-dim: 1.0
-  # Add validation args
-  --eval-iters: 32
-  --eval-interval: 200
-  # Add checkpointing args
-  --save: /opt/megatron-lm/runs/82c8dc72-e955-4033-a246-b61784f57fa7/checkpoints
-  --load: /tmp/checkpoints/
-  --save-interval: 25
-  # Add initialization args
-  --init-method-std: 0.02
-  # Add logging args
-  --log-timers-to-tensorboard: true
-  --log-memory-to-tensorboard: true
-  --log-num-zeros-in-grad: true
-  --log-params-norm: true
-  --log-validation-ppl-to-tensorboard: true
-  --log-throughput: true
-  --log-interval: 1
-  --logging-level: 40
-  --tensorboard-dir: /opt/megatron-lm/runs/82c8dc72-e955-4033-a246-b61784f57fa7/tensorboard
-  # Add mixed precision args
-  --bf16: true
-  --exit-interval: 50
-TEST_TYPE: regular
-METRICS:
-  - "iteration-time"
-  - "lm loss"
-  - "num-zeros"
-  - "mem-allocated-bytes"
-  - "mem-max-allocated-bytes"
-  - "mtp_1 loss"
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..d4aa4cb5ee9
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,344 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 11.04747,
+            "2": 11.03489,
+            "3": 9.59197,
+            "4": 9.2607,
+            "5": 9.25316,
+            "6": 9.70587,
+            "7": 9.46635,
+            "8": 9.01114,
+            "9": 8.72173,
+            "10": 9.06704,
+            "11": 8.59397,
+            "12": 8.5643,
+            "13": 8.44846,
+            "14": 7.97921,
+            "15": 8.04905,
+            "16": 8.09886,
+            "17": 8.04172,
+            "18": 7.76126,
+            "19": 8.14014,
+            "20": 7.86027,
+            "21": 7.54995,
+            "22": 7.53872,
+            "23": 7.40693,
+            "24": 7.40435,
+            "25": 7.66065,
+            "26": 7.05772,
+            "27": 7.59552,
+            "28": 7.30627,
+            "29": 7.48007,
+            "30": 7.63012,
+            "31": 7.38325,
+            "32": 7.57843,
+            "33": 7.62828,
+            "34": 7.68919,
+            "35": 7.20168,
+            "36": 7.07506,
+            "37": 7.41935,
+            "38": 7.17961,
+            "39": 7.54005,
+            "40": 7.53821,
+            "41": 7.47888,
+            "42": 7.24055,
+            "43": 7.2256,
+            "44": 7.40803,
+            "45": 7.1775,
+            "46": 6.88877,
+            "47": 7.29436,
+            "48": 7.13581,
+            "49": 7.58407,
+            "50": 7.02865
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 38802648.0,
+            "2": 38543564.0,
+            "3": 38740428.0,
+            "4": 264349216.0,
+            "5": 224711328.0,
+            "6": 359592256.0,
+            "7": 683584064.0,
+            "8": 850747136.0,
+            "9": 781151872.0,
+            "10": 863934336.0,
+            "11": 784956928.0,
+            "12": 787741824.0,
+            "13": 906642432.0,
+            "14": 793413952.0,
+            "15": 724351360.0,
+            "16": 929182656.0,
+            "17": 728944832.0,
+            "18": 715233856.0,
+            "19": 894586752.0,
+            "20": 942182208.0,
+            "21": 712310464.0,
+            "22": 903670336.0,
+            "23": 882199552.0,
+            "24": 867334400.0,
+            "25": 874751488.0,
+            "26": 844191104.0,
+            "27": 813243648.0,
+            "28": 626785920.0,
+            "29": 808773120.0,
+            "30": 602759296.0,
+            "31": 793783168.0,
+            "32": 768613888.0,
+            "33": 721639040.0,
+            "34": 734472448.0,
+            "35": 734570880.0,
+            "36": 703058560.0,
+            "37": 692109824.0,
+            "38": 649260992.0,
+            "39": 620422656.0,
+            "40": 604143616.0,
+            "41": 598320448.0,
+            "42": 573424384.0,
+            "43": 576846912.0,
+            "44": 570038144.0,
+            "45": 540081024.0,
+            "46": 501251008.0,
+            "47": 497637664.0,
+            "48": 494691072.0,
+            "49": 490977312.0,
+            "50": 463542304.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 7321331200.0,
+            "2": 7321333248.0,
+            "3": 7321333248.0,
+            "4": 7321333248.0,
+            "5": 7321333248.0,
+            "6": 7321333248.0,
+            "7": 7321333248.0,
+            "8": 7321333248.0,
+            "9": 7321333248.0,
+            "10": 7321333248.0,
+            "11": 7321333248.0,
+            "12": 7321333248.0,
+            "13": 7321333248.0,
+            "14": 7321333248.0,
+            "15": 7321333248.0,
+            "16": 7321333248.0,
+            "17": 7321333248.0,
+            "18": 7321333248.0,
+            "19": 7321333248.0,
+            "20": 7321333248.0,
+            "21": 7321333248.0,
+            "22": 7321333248.0,
+            "23": 7321333248.0,
+            "24": 7321333248.0,
+            "25": 7321333248.0,
+            "26": 7321333248.0,
+            "27": 7321333248.0,
+            "28": 7321333248.0,
+            "29": 7321333248.0,
+            "30": 7321333248.0,
+            "31": 7321333248.0,
+            "32": 7321333248.0,
+            "33": 7321333248.0,
+            "34": 7321333248.0,
+            "35": 7321333248.0,
+            "36": 7321333248.0,
+            "37": 7321333248.0,
+            "38": 7321333248.0,
+            "39": 7321333248.0,
+            "40": 7321333248.0,
+            "41": 7321333248.0,
+            "42": 7321333248.0,
+            "43": 7321333248.0,
+            "44": 7321333248.0,
+            "45": 7321333248.0,
+            "46": 7321333248.0,
+            "47": 7321333248.0,
+            "48": 7321333248.0,
+            "49": 7321333248.0,
+            "50": 7321333248.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 22198937600.0,
+            "2": 24950007808.0,
+            "3": 24950007808.0,
+            "4": 24950007808.0,
+            "5": 24950007808.0,
+            "6": 24950007808.0,
+            "7": 24950007808.0,
+            "8": 24950007808.0,
+            "9": 24950007808.0,
+            "10": 24950007808.0,
+            "11": 24950007808.0,
+            "12": 24950007808.0,
+            "13": 24950007808.0,
+            "14": 24950007808.0,
+            "15": 24950007808.0,
+            "16": 24950007808.0,
+            "17": 24950007808.0,
+            "18": 24950007808.0,
+            "19": 24950007808.0,
+            "20": 24950007808.0,
+            "21": 24950007808.0,
+            "22": 24950007808.0,
+            "23": 24950007808.0,
+            "24": 24950007808.0,
+            "25": 24950007808.0,
+            "26": 24950007808.0,
+            "27": 25072799744.0,
+            "28": 25343600640.0,
+            "29": 25625788416.0,
+            "30": 25625788416.0,
+            "31": 25628155904.0,
+            "32": 25707937792.0,
+            "33": 25707937792.0,
+            "34": 25707937792.0,
+            "35": 25707937792.0,
+            "36": 25707937792.0,
+            "37": 25707937792.0,
+            "38": 25707937792.0,
+            "39": 25707937792.0,
+            "40": 25707937792.0,
+            "41": 25707937792.0,
+            "42": 25707937792.0,
+            "43": 25707937792.0,
+            "44": 25707937792.0,
+            "45": 25707937792.0,
+            "46": 25707937792.0,
+            "47": 25707937792.0,
+            "48": 25707937792.0,
+            "49": 25707937792.0,
+            "50": 25707937792.0
+        }
+    },
+    "mtp_1 loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 11.07742,
+            "2": 11.07559,
+            "3": 10.5272,
+            "4": 10.08877,
+            "5": 9.81119,
+            "6": 9.88673,
+            "7": 9.70278,
+            "8": 8.9944,
+            "9": 8.79002,
+            "10": 9.07171,
+            "11": 8.44594,
+            "12": 8.50226,
+            "13": 8.40983,
+            "14": 7.83955,
+            "15": 7.97902,
+            "16": 8.03361,
+            "17": 7.99642,
+            "18": 7.71928,
+            "19": 8.10116,
+            "20": 7.82113,
+            "21": 7.51112,
+            "22": 7.48906,
+            "23": 7.35335,
+            "24": 7.35884,
+            "25": 7.60836,
+            "26": 7.01391,
+            "27": 7.54721,
+            "28": 7.25644,
+            "29": 7.43129,
+            "30": 7.57524,
+            "31": 7.321,
+            "32": 7.50218,
+            "33": 7.56009,
+            "34": 7.62505,
+            "35": 7.14234,
+            "36": 7.0092,
+            "37": 7.34655,
+            "38": 7.11926,
+            "39": 7.4822,
+            "40": 7.46808,
+            "41": 7.41272,
+            "42": 7.1698,
+            "43": 7.15213,
+            "44": 7.33728,
+            "45": 7.11437,
+            "46": 6.81846,
+            "47": 7.2282,
+            "48": 7.07339,
+            "49": 7.50345,
+            "50": 6.96783
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 66.41406,
+            "2": 1.09711,
+            "3": 0.98871,
+            "4": 1.29382,
+            "5": 0.90133,
+            "6": 0.89235,
+            "7": 1.14675,
+            "8": 1.06393,
+            "9": 0.87141,
+            "10": 0.88489,
+            "11": 0.87653,
+            "12": 0.86844,
+            "13": 0.87292,
+            "14": 0.88542,
+            "15": 0.87413,
+            "16": 0.8658,
+            "17": 0.86683,
+            "18": 0.85604,
+            "19": 0.87144,
+            "20": 0.8739,
+            "21": 0.87412,
+            "22": 0.8842,
+            "23": 0.87866,
+            "24": 0.87817,
+            "25": 0.87219,
+            "26": 0.88191,
+            "27": 0.86283,
+            "28": 0.85644,
+            "29": 0.85444,
+            "30": 0.86821,
+            "31": 0.8659,
+            "32": 0.86683,
+            "33": 0.86547,
+            "34": 0.86171,
+            "35": 0.84405,
+            "36": 0.84744,
+            "37": 0.84896,
+            "38": 0.85314,
+            "39": 0.85693,
+            "40": 0.83956,
+            "41": 0.844,
+            "42": 0.84413,
+            "43": 0.83996,
+            "44": 0.84204,
+            "45": 0.84489,
+            "46": 0.83423,
+            "47": 0.83738,
+            "48": 0.85356,
+            "49": 0.86096,
+            "50": 0.85603
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgxh100_eos.json
index daa04af43dd..9ba3e686ab8 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgxh100_eos.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgxh100_eos.json
@@ -4,56 +4,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 10.95004,
-            "2": 10.9521,
-            "3": 10.5115,
-            "4": 9.96454,
-            "5": 9.93941,
+            "1": 10.94947,
+            "2": 10.95236,
+            "3": 10.50817,
+            "4": 9.96373,
+            "5": 9.93907,
             "6": 9.67273,
-            "7": 10.20975,
-            "8": 9.49716,
-            "9": 9.55902,
-            "10": 9.79742,
-            "11": 9.30109,
-            "12": 9.40483,
-            "13": 9.39546,
-            "14": 8.84681,
-            "15": 9.02444,
-            "16": 9.07121,
-            "17": 9.04574,
-            "18": 8.75678,
-            "19": 9.18159,
-            "20": 8.8595,
-            "21": 8.53503,
-            "22": 8.55182,
-            "23": 8.42441,
-            "24": 8.37608,
-            "25": 8.64304,
-            "26": 7.97393,
-            "27": 8.56806,
-            "28": 8.19764,
-            "29": 8.3928,
-            "30": 8.67283,
-            "31": 8.289,
-            "32": 8.43572,
-            "33": 8.5568,
-            "34": 8.66018,
-            "35": 8.07934,
-            "36": 7.94976,
-            "37": 8.29565,
-            "38": 7.98044,
-            "39": 8.39201,
-            "40": 8.35513,
-            "41": 8.31876,
-            "42": 8.0583,
-            "43": 8.03283,
-            "44": 8.24243,
-            "45": 8.10277,
-            "46": 7.61696,
-            "47": 8.15273,
-            "48": 8.00569,
-            "49": 8.38688,
-            "50": 7.81491
+            "7": 10.2137,
+            "8": 9.4963,
+            "9": 9.56483,
+            "10": 9.7979,
+            "11": 9.30107,
+            "12": 9.40465,
+            "13": 9.39581,
+            "14": 8.84796,
+            "15": 9.02503,
+            "16": 9.07162,
+            "17": 9.04638,
+            "18": 8.75696,
+            "19": 9.18152,
+            "20": 8.86295,
+            "21": 8.5361,
+            "22": 8.55339,
+            "23": 8.42711,
+            "24": 8.37747,
+            "25": 8.64415,
+            "26": 7.97441,
+            "27": 8.56675,
+            "28": 8.19618,
+            "29": 8.39325,
+            "30": 8.67137,
+            "31": 8.28979,
+            "32": 8.43623,
+            "33": 8.55717,
+            "34": 8.6598,
+            "35": 8.07929,
+            "36": 7.94958,
+            "37": 8.29465,
+            "38": 7.9784,
+            "39": 8.39172,
+            "40": 8.35622,
+            "41": 8.31635,
+            "42": 8.06507,
+            "43": 8.03396,
+            "44": 8.24146,
+            "45": 8.1039,
+            "46": 7.61771,
+            "47": 8.15375,
+            "48": 8.00818,
+            "49": 8.38737,
+            "50": 7.81612
         }
     },
     "num-zeros": {
@@ -61,56 +61,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 19403624.0,
-            "2": 19274194.0,
-            "3": 19372760.0,
-            "4": 86525248.0,
-            "5": 148575568.0,
-            "6": 145226704.0,
-            "7": 171879984.0,
-            "8": 195785248.0,
-            "9": 164124752.0,
-            "10": 167684736.0,
-            "11": 221077344.0,
-            "12": 200384224.0,
-            "13": 248872528.0,
-            "14": 211169424.0,
-            "15": 214304608.0,
-            "16": 216075632.0,
-            "17": 267845984.0,
-            "18": 170470336.0,
-            "19": 176865072.0,
-            "20": 187955392.0,
-            "21": 225750704.0,
-            "22": 247396816.0,
-            "23": 211643856.0,
-            "24": 205638464.0,
-            "25": 277022272.0,
-            "26": 291562304.0,
-            "27": 225789840.0,
-            "28": 288202368.0,
-            "29": 198390384.0,
-            "30": 213302208.0,
-            "31": 227204752.0,
-            "32": 271112416.0,
-            "33": 231840432.0,
-            "34": 203575536.0,
-            "35": 191152368.0,
-            "36": 222566928.0,
-            "37": 177810112.0,
-            "38": 228708544.0,
-            "39": 211168784.0,
-            "40": 215603968.0,
-            "41": 200089440.0,
-            "42": 228529888.0,
-            "43": 198782848.0,
-            "44": 141902272.0,
-            "45": 181922816.0,
-            "46": 115369856.0,
-            "47": 170214176.0,
-            "48": 137292832.0,
-            "49": 97654936.0,
-            "50": 160979632.0
+            "1": 19403784.0,
+            "2": 19274252.0,
+            "3": 19373794.0,
+            "4": 89687600.0,
+            "5": 139124400.0,
+            "6": 138949920.0,
+            "7": 170316512.0,
+            "8": 192665728.0,
+            "9": 168817872.0,
+            "10": 156652864.0,
+            "11": 217935232.0,
+            "12": 213007792.0,
+            "13": 228424704.0,
+            "14": 217442256.0,
+            "15": 237921408.0,
+            "16": 225523072.0,
+            "17": 225458384.0,
+            "18": 164166928.0,
+            "19": 164457904.0,
+            "20": 180124848.0,
+            "21": 230463232.0,
+            "22": 230096384.0,
+            "23": 210054656.0,
+            "24": 200985472.0,
+            "25": 248708512.0,
+            "26": 301000896.0,
+            "27": 205364384.0,
+            "28": 270886048.0,
+            "29": 259695952.0,
+            "30": 224280720.0,
+            "31": 244360992.0,
+            "32": 189382672.0,
+            "33": 231930816.0,
+            "34": 206712432.0,
+            "35": 194319616.0,
+            "36": 246163408.0,
+            "37": 193561968.0,
+            "38": 228822688.0,
+            "39": 226941728.0,
+            "40": 196742032.0,
+            "41": 200179904.0,
+            "42": 219112640.0,
+            "43": 186235920.0,
+            "44": 138763920.0,
+            "45": 148907984.0,
+            "46": 109115896.0,
+            "47": 167015728.0,
+            "48": 156135104.0,
+            "49": 91378480.0,
+            "50": 164099648.0
         }
     },
     "mem-allocated-bytes": {
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 4882187264.0,
-            "2": 4881607168.0,
-            "3": 4882283008.0,
-            "4": 4881322496.0,
-            "5": 4882174464.0,
-            "6": 4883177984.0,
-            "7": 4883252736.0,
-            "8": 4881774080.0,
-            "9": 4881443328.0,
-            "10": 4884319744.0,
-            "11": 4882319872.0,
-            "12": 4881232384.0,
-            "13": 4880836096.0,
-            "14": 4882124288.0,
-            "15": 4882108928.0,
-            "16": 4883384832.0,
-            "17": 4880466432.0,
-            "18": 4881518080.0,
-            "19": 4881734144.0,
-            "20": 4883215872.0,
-            "21": 4883534336.0,
-            "22": 4882774528.0,
-            "23": 4881818112.0,
-            "24": 4882441728.0,
-            "25": 4880546304.0,
-            "26": 4882178560.0,
-            "27": 4881892864.0,
-            "28": 4881869312.0,
-            "29": 4882979328.0,
-            "30": 4882715136.0,
-            "31": 4883084800.0,
-            "32": 4881436160.0,
-            "33": 4881766912.0,
-            "34": 4881406464.0,
-            "35": 4881531392.0,
-            "36": 4881479168.0,
-            "37": 4882455040.0,
-            "38": 4882054656.0,
-            "39": 4882005504.0,
-            "40": 4882743808.0,
-            "41": 4881211904.0,
-            "42": 4881378816.0,
-            "43": 4882133504.0,
-            "44": 4881860096.0,
-            "45": 4883165696.0,
-            "46": 4882168320.0,
-            "47": 4881526272.0,
-            "48": 4882125312.0,
-            "49": 4881533440.0,
-            "50": 4881598976.0
+            "1": 4749337600.0,
+            "2": 4748343808.0,
+            "3": 4747997696.0,
+            "4": 4747469312.0,
+            "5": 4745943552.0,
+            "6": 4746412544.0,
+            "7": 4749017600.0,
+            "8": 4746762752.0,
+            "9": 4746394112.0,
+            "10": 4748286464.0,
+            "11": 4747621888.0,
+            "12": 4747802112.0,
+            "13": 4746905088.0,
+            "14": 4746850816.0,
+            "15": 4745785856.0,
+            "16": 4746166784.0,
+            "17": 4745583104.0,
+            "18": 4746839552.0,
+            "19": 4746510848.0,
+            "20": 4748375552.0,
+            "21": 4746974720.0,
+            "22": 4747533824.0,
+            "23": 4746271232.0,
+            "24": 4747352576.0,
+            "25": 4746148352.0,
+            "26": 4746516992.0,
+            "27": 4748668416.0,
+            "28": 4746871296.0,
+            "29": 4747913728.0,
+            "30": 4746131968.0,
+            "31": 4747437568.0,
+            "32": 4748567040.0,
+            "33": 4746713600.0,
+            "34": 4747983360.0,
+            "35": 4747450880.0,
+            "36": 4748372480.0,
+            "37": 4747075072.0,
+            "38": 4748749312.0,
+            "39": 4747972096.0,
+            "40": 4746372608.0,
+            "41": 4747513344.0,
+            "42": 4747912704.0,
+            "43": 4746867200.0,
+            "44": 4747612672.0,
+            "45": 4748287488.0,
+            "46": 4746935808.0,
+            "47": 4748032512.0,
+            "48": 4747668992.0,
+            "49": 4747238912.0,
+            "50": 4749120000.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 41210470400.0,
-            "2": 41210470400.0,
-            "3": 41210470400.0,
-            "4": 41210470400.0,
-            "5": 41210470400.0,
-            "6": 41210470400.0,
-            "7": 41210470400.0,
-            "8": 41210470400.0,
-            "9": 41210470400.0,
-            "10": 41210470400.0,
-            "11": 41210470400.0,
-            "12": 41210470400.0,
-            "13": 41210470400.0,
-            "14": 41210470400.0,
-            "15": 41210470400.0,
-            "16": 41210470400.0,
-            "17": 41210470400.0,
-            "18": 41210470400.0,
-            "19": 41210470400.0,
-            "20": 41210470400.0,
-            "21": 41210470400.0,
-            "22": 41210470400.0,
-            "23": 41210470400.0,
-            "24": 41210470400.0,
-            "25": 41210470400.0,
-            "26": 41210470400.0,
-            "27": 41210470400.0,
-            "28": 41210470400.0,
-            "29": 41210470400.0,
-            "30": 41210470400.0,
-            "31": 41210470400.0,
-            "32": 41210470400.0,
-            "33": 41210470400.0,
-            "34": 41210470400.0,
-            "35": 41210470400.0,
-            "36": 41210470400.0,
-            "37": 41210470400.0,
-            "38": 41210470400.0,
-            "39": 41210470400.0,
-            "40": 41210470400.0,
-            "41": 41210470400.0,
-            "42": 41210470400.0,
-            "43": 41210470400.0,
-            "44": 41210470400.0,
-            "45": 41210470400.0,
-            "46": 41210470400.0,
-            "47": 41210470400.0,
-            "48": 41210470400.0,
-            "49": 41210470400.0,
-            "50": 41210470400.0
+            "1": 11455561728.0,
+            "2": 12440659968.0,
+            "3": 12440659968.0,
+            "4": 12440659968.0,
+            "5": 12440659968.0,
+            "6": 12576563200.0,
+            "7": 12813101056.0,
+            "8": 12813101056.0,
+            "9": 13424891904.0,
+            "10": 13556338688.0,
+            "11": 13556338688.0,
+            "12": 13556338688.0,
+            "13": 13556338688.0,
+            "14": 13556338688.0,
+            "15": 13556338688.0,
+            "16": 13556338688.0,
+            "17": 13556338688.0,
+            "18": 13556338688.0,
+            "19": 13556338688.0,
+            "20": 13556338688.0,
+            "21": 13758310400.0,
+            "22": 13883041792.0,
+            "23": 13883041792.0,
+            "24": 13883041792.0,
+            "25": 13883041792.0,
+            "26": 13883041792.0,
+            "27": 13883041792.0,
+            "28": 13883041792.0,
+            "29": 13883041792.0,
+            "30": 13883041792.0,
+            "31": 13883041792.0,
+            "32": 13883041792.0,
+            "33": 13883041792.0,
+            "34": 13883041792.0,
+            "35": 13883041792.0,
+            "36": 13883041792.0,
+            "37": 13883041792.0,
+            "38": 13883041792.0,
+            "39": 13883041792.0,
+            "40": 13883041792.0,
+            "41": 13883041792.0,
+            "42": 13883041792.0,
+            "43": 13883041792.0,
+            "44": 13883041792.0,
+            "45": 13883041792.0,
+            "46": 13883041792.0,
+            "47": 13883041792.0,
+            "48": 13883041792.0,
+            "49": 13883041792.0,
+            "50": 13883041792.0
         }
     },
     "iteration-time": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 96.21947,
-            "2": 1.10023,
-            "3": 0.96399,
-            "4": 0.91113,
-            "5": 1.27509,
-            "6": 1.00484,
-            "7": 1.01236,
-            "8": 1.1739,
-            "9": 0.89406,
-            "10": 0.88836,
-            "11": 0.92033,
-            "12": 0.88331,
-            "13": 0.88179,
-            "14": 0.88307,
-            "15": 0.88648,
-            "16": 0.88425,
-            "17": 0.87155,
-            "18": 0.87556,
-            "19": 0.87374,
-            "20": 0.8744,
-            "21": 0.86757,
-            "22": 0.87217,
-            "23": 0.8736,
-            "24": 0.86646,
-            "25": 0.87328,
-            "26": 0.87121,
-            "27": 0.85886,
-            "28": 0.86392,
-            "29": 0.86385,
-            "30": 0.86425,
-            "31": 0.8631,
-            "32": 0.8617,
-            "33": 0.86069,
-            "34": 0.86829,
-            "35": 0.86837,
-            "36": 0.86776,
-            "37": 0.86686,
-            "38": 0.86359,
-            "39": 0.8677,
-            "40": 0.86441,
-            "41": 0.86179,
-            "42": 0.86079,
-            "43": 0.86149,
-            "44": 0.86222,
-            "45": 0.86336,
-            "46": 0.85875,
-            "47": 0.86219,
-            "48": 0.86026,
-            "49": 0.85894,
-            "50": 0.8544
+            "1": 99.19363,
+            "2": 0.87925,
+            "3": 0.76355,
+            "4": 0.70351,
+            "5": 1.06855,
+            "6": 0.8083,
+            "7": 0.79282,
+            "8": 0.81872,
+            "9": 0.67053,
+            "10": 0.64913,
+            "11": 0.72935,
+            "12": 0.64945,
+            "13": 0.64181,
+            "14": 0.63807,
+            "15": 0.65651,
+            "16": 0.66428,
+            "17": 0.65744,
+            "18": 0.65362,
+            "19": 0.65862,
+            "20": 0.6544,
+            "21": 0.64288,
+            "22": 0.64951,
+            "23": 0.64322,
+            "24": 0.64447,
+            "25": 0.63601,
+            "26": 0.62955,
+            "27": 0.6244,
+            "28": 0.62697,
+            "29": 0.62787,
+            "30": 0.6295,
+            "31": 0.63726,
+            "32": 0.62178,
+            "33": 0.62521,
+            "34": 0.62615,
+            "35": 0.61895,
+            "36": 0.62424,
+            "37": 0.62219,
+            "38": 0.62548,
+            "39": 0.62127,
+            "40": 0.62356,
+            "41": 0.6165,
+            "42": 0.61786,
+            "43": 0.61742,
+            "44": 0.61943,
+            "45": 0.61884,
+            "46": 0.62012,
+            "47": 0.61656,
+            "48": 0.6143,
+            "49": 0.61232,
+            "50": 0.6085
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_selective_recompute_experimental/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_selective_recompute_experimental/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..29b1b467978
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_selective_recompute_experimental/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,344 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 11.0637,
+            "2": 11.03838,
+            "3": 9.79196,
+            "4": 14.17309,
+            "5": 9.48263,
+            "6": 9.30356,
+            "7": 9.27632,
+            "8": 8.75189,
+            "9": 8.70462,
+            "10": 9.04035,
+            "11": 8.41109,
+            "12": 8.53109,
+            "13": 8.43144,
+            "14": 7.93673,
+            "15": 8.00837,
+            "16": 8.08212,
+            "17": 8.06887,
+            "18": 7.75236,
+            "19": 8.13737,
+            "20": 7.88364,
+            "21": 7.56605,
+            "22": 7.55552,
+            "23": 7.42862,
+            "24": 7.41252,
+            "25": 7.67597,
+            "26": 7.08176,
+            "27": 7.62221,
+            "28": 7.32629,
+            "29": 7.49894,
+            "30": 7.63447,
+            "31": 7.3983,
+            "32": 7.59785,
+            "33": 7.64396,
+            "34": 7.70726,
+            "35": 7.21393,
+            "36": 7.08985,
+            "37": 7.42971,
+            "38": 7.19273,
+            "39": 7.56041,
+            "40": 7.55564,
+            "41": 7.49928,
+            "42": 7.25988,
+            "43": 7.24878,
+            "44": 7.42783,
+            "45": 7.21045,
+            "46": 6.91669,
+            "47": 7.31999,
+            "48": 7.16939,
+            "49": 7.62783,
+            "50": 7.05439
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 38802064.0,
+            "2": 38543200.0,
+            "3": 38744220.0,
+            "4": 166695072.0,
+            "5": 394456256.0,
+            "6": 441303136.0,
+            "7": 538731776.0,
+            "8": 680781184.0,
+            "9": 564001216.0,
+            "10": 571185472.0,
+            "11": 624455360.0,
+            "12": 680622208.0,
+            "13": 777548288.0,
+            "14": 717772992.0,
+            "15": 699100416.0,
+            "16": 677486208.0,
+            "17": 645761024.0,
+            "18": 671155776.0,
+            "19": 674320512.0,
+            "20": 891692160.0,
+            "21": 658833920.0,
+            "22": 802998016.0,
+            "23": 756352768.0,
+            "24": 772904192.0,
+            "25": 748799104.0,
+            "26": 771817792.0,
+            "27": 772312064.0,
+            "28": 655008000.0,
+            "29": 783495808.0,
+            "30": 794511296.0,
+            "31": 756035712.0,
+            "32": 535862592.0,
+            "33": 680633984.0,
+            "34": 482597312.0,
+            "35": 671593792.0,
+            "36": 658959488.0,
+            "37": 626012736.0,
+            "38": 614650240.0,
+            "39": 595183872.0,
+            "40": 421718816.0,
+            "41": 557433600.0,
+            "42": 545065344.0,
+            "43": 539024064.0,
+            "44": 544803840.0,
+            "45": 517934176.0,
+            "46": 504352736.0,
+            "47": 497582464.0,
+            "48": 500981632.0,
+            "49": 490922656.0,
+            "50": 472902496.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 6294696448.0,
+            "2": 6295491072.0,
+            "3": 6296283648.0,
+            "4": 6297076224.0,
+            "5": 6297868800.0,
+            "6": 6298661376.0,
+            "7": 6294104064.0,
+            "8": 6294896640.0,
+            "9": 6295689216.0,
+            "10": 6296481792.0,
+            "11": 6294500352.0,
+            "12": 6295292928.0,
+            "13": 6296085504.0,
+            "14": 6296878080.0,
+            "15": 6297670656.0,
+            "16": 6298463232.0,
+            "17": 6299255808.0,
+            "18": 6300048384.0,
+            "19": 6300840960.0,
+            "20": 6301633536.0,
+            "21": 6302426112.0,
+            "22": 6303218688.0,
+            "23": 6304011264.0,
+            "24": 6304803840.0,
+            "25": 6305596416.0,
+            "26": 6306388992.0,
+            "27": 6307181568.0,
+            "28": 6307974144.0,
+            "29": 6308766720.0,
+            "30": 6309559296.0,
+            "31": 6310351872.0,
+            "32": 6311144448.0,
+            "33": 6311937024.0,
+            "34": 6312729600.0,
+            "35": 6313522176.0,
+            "36": 6314314752.0,
+            "37": 6315107328.0,
+            "38": 6315899904.0,
+            "39": 6316692480.0,
+            "40": 6317485056.0,
+            "41": 6318277632.0,
+            "42": 6319070208.0,
+            "43": 6319862784.0,
+            "44": 6320655360.0,
+            "45": 6321447936.0,
+            "46": 6322240512.0,
+            "47": 6323033088.0,
+            "48": 6323825664.0,
+            "49": 6324618240.0,
+            "50": 6325410816.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 46771978240.0,
+            "2": 49466654720.0,
+            "3": 51157819392.0,
+            "4": 51157819392.0,
+            "5": 51157819392.0,
+            "6": 51157819392.0,
+            "7": 51157819392.0,
+            "8": 51157819392.0,
+            "9": 51157819392.0,
+            "10": 51157819392.0,
+            "11": 51157819392.0,
+            "12": 51157819392.0,
+            "13": 51157819392.0,
+            "14": 51157819392.0,
+            "15": 51157819392.0,
+            "16": 51157819392.0,
+            "17": 51157819392.0,
+            "18": 51157819392.0,
+            "19": 51157819392.0,
+            "20": 51157819392.0,
+            "21": 51157819392.0,
+            "22": 51157819392.0,
+            "23": 51157819392.0,
+            "24": 51157819392.0,
+            "25": 51157819392.0,
+            "26": 51157819392.0,
+            "27": 51157819392.0,
+            "28": 51157819392.0,
+            "29": 51157819392.0,
+            "30": 51157819392.0,
+            "31": 51157819392.0,
+            "32": 51157819392.0,
+            "33": 51157819392.0,
+            "34": 51157819392.0,
+            "35": 51157819392.0,
+            "36": 51157819392.0,
+            "37": 51157819392.0,
+            "38": 51157819392.0,
+            "39": 51157819392.0,
+            "40": 51157819392.0,
+            "41": 51157819392.0,
+            "42": 51157819392.0,
+            "43": 51157819392.0,
+            "44": 51157819392.0,
+            "45": 51157819392.0,
+            "46": 51157819392.0,
+            "47": 51157819392.0,
+            "48": 51157819392.0,
+            "49": 51157819392.0,
+            "50": 51157819392.0
+        }
+    },
+    "mtp_1 loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 11.04508,
+            "2": 11.05397,
+            "3": 10.54505,
+            "4": 9.99194,
+            "5": 9.76285,
+            "6": 9.45507,
+            "7": 9.54431,
+            "8": 8.91725,
+            "9": 8.74784,
+            "10": 9.04997,
+            "11": 8.40193,
+            "12": 8.48288,
+            "13": 8.36926,
+            "14": 7.81448,
+            "15": 7.93865,
+            "16": 8.02231,
+            "17": 7.96741,
+            "18": 7.70552,
+            "19": 8.09012,
+            "20": 7.79984,
+            "21": 7.48241,
+            "22": 7.49502,
+            "23": 7.35415,
+            "24": 7.34793,
+            "25": 7.60324,
+            "26": 7.01638,
+            "27": 7.55495,
+            "28": 7.24721,
+            "29": 7.43133,
+            "30": 7.56633,
+            "31": 7.31391,
+            "32": 7.50445,
+            "33": 7.55658,
+            "34": 7.62234,
+            "35": 7.13802,
+            "36": 7.00593,
+            "37": 7.33916,
+            "38": 7.1095,
+            "39": 7.4736,
+            "40": 7.45784,
+            "41": 7.40514,
+            "42": 7.15986,
+            "43": 7.14965,
+            "44": 7.32758,
+            "45": 7.11892,
+            "46": 6.81056,
+            "47": 7.2234,
+            "48": 7.06789,
+            "49": 7.503,
+            "50": 6.9559
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 58.25602,
+            "2": 1.30671,
+            "3": 1.18374,
+            "4": 1.08853,
+            "5": 3.28347,
+            "6": 2.13071,
+            "7": 2.96694,
+            "8": 1.2675,
+            "9": 1.07672,
+            "10": 1.07909,
+            "11": 2.90834,
+            "12": 1.06176,
+            "13": 1.06257,
+            "14": 1.06668,
+            "15": 1.08083,
+            "16": 1.08186,
+            "17": 1.06861,
+            "18": 1.07223,
+            "19": 1.06661,
+            "20": 1.07354,
+            "21": 1.07863,
+            "22": 1.08557,
+            "23": 1.06174,
+            "24": 1.07533,
+            "25": 1.06172,
+            "26": 1.06344,
+            "27": 1.05522,
+            "28": 1.05011,
+            "29": 1.04098,
+            "30": 1.04622,
+            "31": 1.0423,
+            "32": 1.04292,
+            "33": 1.06328,
+            "34": 1.03657,
+            "35": 1.04963,
+            "36": 1.05103,
+            "37": 1.04147,
+            "38": 1.04912,
+            "39": 1.04838,
+            "40": 1.04559,
+            "41": 1.05462,
+            "42": 1.05103,
+            "43": 1.04965,
+            "44": 1.05296,
+            "45": 1.05039,
+            "46": 1.05609,
+            "47": 1.0476,
+            "48": 1.053,
+            "49": 1.04626,
+            "50": 1.05911
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/test_utils/recipes/moe.yaml b/tests/test_utils/recipes/moe.yaml
index af4b4203803..638ee1a89a3 100644
--- a/tests/test_utils/recipes/moe.yaml
+++ b/tests/test_utils/recipes/moe.yaml
@@ -69,8 +69,6 @@ products:
       - environment: [dev]
         scope: [nightly]
         platforms: [dgx_a100, dgx_h100]
-      - environment: [lts]
-        scope: [nightly]
   - test_case: [gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last]
     products:
       - environment: [dev]
@@ -125,8 +123,6 @@ products:
       - environment: [dev]
         scope: [mr]
         platforms: [dgx_h100]
-      - environment: [lts]
-        scope: [nightly]
   - test_case: [gpt3_moe_mr_mcore_te_ep8_resume_torch_dist_dist_optimizer]
     products:
       - environment: [dev]

From bf1a5035f1f776b0bded8bffa0a36eeb573a7a8e Mon Sep 17 00:00:00 2001
From: Deyu Fu <deyuf@nvidia.com>
Date: Thu, 16 Oct 2025 18:44:00 -0700
Subject: [PATCH 026/334] ADLR/megatron-lm!4232 - [DEV] improve muon and
 layer-wise dist opt unit tests

---
 .../core/optimizer/layer_wise_optimizer.py    |   2 +-
 tests/unit_tests/test_layer_wise_optimizer.py | 394 +++++++++++
 tests/unit_tests/test_muon_optimizer.py       | 653 +++++++++++++++---
 3 files changed, 934 insertions(+), 115 deletions(-)
 create mode 100644 tests/unit_tests/test_layer_wise_optimizer.py

diff --git a/megatron/core/optimizer/layer_wise_optimizer.py b/megatron/core/optimizer/layer_wise_optimizer.py
index 2bf4e5e613b..620b1a1994e 100644
--- a/megatron/core/optimizer/layer_wise_optimizer.py
+++ b/megatron/core/optimizer/layer_wise_optimizer.py
@@ -84,7 +84,7 @@ def shard_params(self, optimizers):
             param_groups += optimizer.param_groups
         for group in param_groups:
             params_this_rank = []
-            if group["is_expert_parallel"]:
+            if group.get("is_expert_parallel", False):
                 for p in group["params"]:
                     if expt_dp_idx == get_pg_rank(self.pg_collection.expt_dp):
                         params_this_rank.append(p)
diff --git a/tests/unit_tests/test_layer_wise_optimizer.py b/tests/unit_tests/test_layer_wise_optimizer.py
new file mode 100644
index 00000000000..3993e217734
--- /dev/null
+++ b/tests/unit_tests/test_layer_wise_optimizer.py
@@ -0,0 +1,394 @@
+import os
+import tempfile
+
+import pytest
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from packaging.version import Version
+
+from megatron.core import parallel_state
+from megatron.core.distributed import DistributedDataParallel, DistributedDataParallelConfig
+from megatron.core.optimizer import OptimizerConfig, get_megatron_optimizer
+from megatron.core.optimizer.layer_wise_optimizer import LayerWiseDistributedOptimizer
+from megatron.core.optimizer.optimizer import Float16OptimizerWithFloat16Params, FP32Optimizer
+from megatron.core.process_groups_config import ProcessGroupCollection
+from megatron.core.transformer import TransformerConfig
+from megatron.core.utils import get_pg_size
+from tests.unit_tests.test_utilities import Utils
+
+# Skip all tests in this file for LTS versions
+pytestmark = pytest.mark.skipif(
+    Version(os.getenv('NVIDIA_PYTORCH_VERSION', "24.01")) <= Version("25.05"),
+    reason="Skip layer-wise optimizer for LTS test",
+)
+
+
+class SimpleModel(nn.Module):
+    """Simple model for testing LayerWiseDistributedOptimizer.
+
+    Model with 5 layers to ensure more than 8 parameters (10 total: 5 weights + 5 biases).
+    """
+
+    def __init__(self, input_size=80, hidden_size=48, output_size=10):
+        super().__init__()
+        self.fc1 = nn.Linear(input_size, hidden_size)
+        self.fc2 = nn.Linear(hidden_size, 32)
+        self.fc3 = nn.Linear(32, 24)
+        self.fc4 = nn.Linear(24, 16)
+        self.fc5 = nn.Linear(16, output_size)
+
+    def forward(self, x):
+        x = F.relu(self.fc1(x))
+        x = F.relu(self.fc2(x))
+        x = F.relu(self.fc3(x))
+        x = F.relu(self.fc4(x))
+        x = self.fc5(x)
+        return x
+
+
+class TinyModel(nn.Module):
+    """Tiny model with only 1 layer (2 parameters: weight and bias)."""
+
+    def __init__(self):
+        super().__init__()
+        self.fc1 = nn.Linear(10, 5)
+
+    def forward(self, x):
+        return self.fc1(x)
+
+
+@pytest.mark.skipif(
+    int(os.getenv('WORLD_SIZE', '1')) == 1, reason="Multi-rank test requires WORLD_SIZE > 1"
+)
+class TestLayerWiseOptimizer:
+    """Test class for LayerWiseDistributedOptimizer with common setup code."""
+
+    @pytest.fixture(autouse=True)
+    def setup_and_teardown(self):
+        """Setup and teardown for each test."""
+        world = int(os.getenv('WORLD_SIZE', '1'))
+        rank = int(os.getenv('RANK', '0'))
+        Utils.initialize_model_parallel()
+        yield
+        Utils.destroy_model_parallel()
+
+    def create_model_and_optimizer(
+        self,
+        model_class=SimpleModel,
+        clip_grad=1.0,
+        model_kwargs=None,
+        use_layer_wise=True,
+        copy_from=None,
+    ):
+        """Create model, DDP wrapper, and optimizer.
+
+        Args:
+            model_class: Model class to instantiate
+            clip_grad: Optional gradient clipping value
+            model_kwargs: Optional kwargs for model initialization
+            use_layer_wise: If True, wrap optimizer in LayerWiseDistributedOptimizer;
+                          if False, use get_megatron_optimizer instead (for reference)
+
+        Returns:
+            tuple: (model, optimizer, pg_collection)
+        """
+        if model_kwargs is None:
+            model_kwargs = {}
+
+        model = model_class(**model_kwargs).bfloat16().cuda()
+        model.requires_grad_(True)
+
+        ddp_config = DistributedDataParallelConfig(use_distributed_optimizer=False)
+        model = DistributedDataParallel(
+            TransformerConfig(num_attention_heads=1, num_layers=1), ddp_config, model
+        )
+        if copy_from:
+            model.module.load_state_dict(copy_from.module.state_dict())
+        else:
+            model.broadcast_params()
+
+        optimizer_config = OptimizerConfig(
+            optimizer='adam',
+            lr=0.01,
+            weight_decay=0.01,
+            bf16=not use_layer_wise,
+            use_distributed_optimizer=False,
+            clip_grad=clip_grad,
+        )
+
+        pg_collection = ProcessGroupCollection.use_mpu_process_groups()
+        pg_collection.dp_cp = parallel_state.get_data_parallel_group(with_context_parallel=True)
+        pg_collection.expt_dp = parallel_state.get_expert_data_parallel_group()
+
+        optimizer = get_megatron_optimizer(optimizer_config, [model])
+        if use_layer_wise:
+            optimizer_config.bf16 = True
+            optimizer = LayerWiseDistributedOptimizer(
+                optimizer.chained_optimizers, optimizer_config, pg_collection
+            )
+        return model, optimizer, pg_collection
+
+    def create_reference_model(self, model):
+        """Create a reference model by cloning the current model."""
+        reference_model = type(model.module)().bfloat16().cuda()
+        reference_model.load_state_dict(model.module.state_dict())
+        return reference_model
+
+    def test_basic(self):
+        """Test basic LayerWiseDistributedOptimizer initialization and step with bf16."""
+        model, optimizer, pg_collection = self.create_model_and_optimizer()
+
+        # Verify basic properties
+        assert optimizer is not None, "Optimizer should not be None"
+        assert hasattr(optimizer, 'chained_optimizers'), "Should be a ChainedOptimizer"
+
+        reference_model = self.create_reference_model(model)
+
+        input_tensor = torch.randn(16, 80, dtype=torch.bfloat16, device='cuda')
+        output = model(input_tensor)
+        loss = output.sum()
+        loss.backward()
+
+        update_successful, grad_norm, num_zeros = optimizer.step()
+
+        assert update_successful, "Optimizer step should be successful"
+
+        # Verify parameters were updated
+        params_updated = 0
+        for param, ref_param in zip(model.parameters(), reference_model.parameters()):
+            if not torch.equal(param.data, ref_param.data):
+                params_updated += 1
+
+        assert params_updated > 0, "At least some parameters should be updated"
+
+        # Verify all ranks have the same updated parameters (test allgather)
+        dp_size = get_pg_size(pg_collection.dp_cp)
+
+        if dp_size > 1:
+            for name, param in model.named_parameters():
+                # Gather parameters from all ranks
+                param_list = [torch.zeros_like(param.data) for _ in range(dp_size)]
+                torch.distributed.all_gather(param_list, param.data, group=pg_collection.dp_cp)
+
+                # Verify all ranks have the same parameter values
+                for i in range(1, dp_size):
+                    try:
+                        torch.testing.assert_close(param_list[0], param_list[i])
+                    except AssertionError as e:
+                        # Append additional context without overwriting the default message
+                        raise AssertionError(
+                            f"Parameter {name} differs between rank 0 and rank {i}. {str(e)}"
+                        ) from None
+
+    def test_get_grad_norm(self):
+        """Test LayerWiseDistributedOptimizer gradient norm computation."""
+        model, optimizer, pg_collection = self.create_model_and_optimizer()
+        reference_model, reference_optimizer, _ = self.create_model_and_optimizer(
+            use_layer_wise=False
+        )
+
+        # Set same gradients on both models
+        # note that model is different at this point but we're only testing grad norm here
+        for param, ref_param in zip(model.parameters(), reference_model.parameters()):
+            grad_value = torch.randn_like(param)
+            torch.distributed.broadcast(grad_value, src=0, group=pg_collection.dp_cp)
+            param.main_grad = grad_value.float().detach()
+            ref_param.main_grad = grad_value.float().detach()
+
+        # Test get_grad_norm on both optimizers
+        optimizer.prepare_grads()
+        grad_norm = optimizer.get_grad_norm()
+
+        reference_optimizer.prepare_grads()
+        reference_grad_norm = reference_optimizer.get_grad_norm()
+
+        assert grad_norm is not None, "Grad norm should not be None"
+        assert grad_norm >= 0, "Grad norm should be non-negative"
+
+        # Compare with reference optimizer grad norm
+        torch.testing.assert_close(grad_norm, reference_grad_norm, rtol=1e-5, atol=1e-5)
+
+    def test_state_dict(self):
+        """Test LayerWiseDistributedOptimizer state dict save and load."""
+        model, optimizer, pg_collection = self.create_model_and_optimizer()
+
+        for param in model.parameters():
+            param.grad = torch.randn_like(param)
+        optimizer.step()
+
+        # Test state_dict
+        state_dict = optimizer.state_dict()
+
+        # Test load_state_dict
+        # TODO(deyuf): fix this. not going through get() will cause missing keys like wd_mult
+        # optimizer.load_state_dict(state_dict)
+
+    def test_save_load_file(self):
+        """Test LayerWiseDistributedOptimizer save and load state dict to/from file."""
+        model, optimizer, pg_collection = self.create_model_and_optimizer()
+
+        for param in model.parameters():
+            param.grad = torch.randn_like(param)
+        optimizer.step()
+
+        # Test save to file
+        with tempfile.NamedTemporaryFile(delete=False, suffix='.pt') as tmp_file:
+            temp_filename = tmp_file.name
+
+        try:
+            optimizer.save_state_dict_to_file(temp_filename)
+            assert os.path.exists(temp_filename), "State dict file should be created"
+
+            # Test load from file
+            # TODO(deyuf): fix this. not going through get() will cause missing keys like wd_mult
+            # optimizer.load_state_dict_from_file(temp_filename)
+        finally:
+            # Clean up temporary file
+            if os.path.exists(temp_filename):
+                os.remove(temp_filename)
+
+    def test_multiple_optimizers(self):
+        """Test LayerWiseDistributedOptimizer with multiple chained optimizers.
+
+        This test properly tests allgather functionality with multiple ranks.
+        """
+        model = SimpleModel().bfloat16().cuda()
+        model.requires_grad_(True)
+
+        ddp_config = DistributedDataParallelConfig(use_distributed_optimizer=False)
+        model = DistributedDataParallel(
+            TransformerConfig(num_attention_heads=1, num_layers=1), ddp_config, model
+        )
+
+        optimizer_config = OptimizerConfig(
+            optimizer='adam', lr=0.01, bf16=True, use_distributed_optimizer=False
+        )
+
+        # Split parameters into two groups for testing multiple optimizers
+        params = list(model.parameters())
+        mid_point = len(params) // 2
+        param_groups_1 = [{'params': params[:mid_point]}]
+        param_groups_2 = [{'params': params[mid_point:]}]
+
+        # Create two separate base optimizers
+        base_optimizer_1 = torch.optim.Adam(param_groups_1, lr=optimizer_config.lr)
+        base_optimizer_2 = torch.optim.Adam(param_groups_2, lr=optimizer_config.lr)
+
+        wrapped_optimizer_1 = FP32Optimizer(base_optimizer_1, optimizer_config, None)
+        wrapped_optimizer_2 = FP32Optimizer(base_optimizer_2, optimizer_config, None)
+
+        pg_collection = ProcessGroupCollection.use_mpu_process_groups()
+        pg_collection.dp_cp = parallel_state.get_data_parallel_group(with_context_parallel=True)
+        pg_collection.expt_dp = parallel_state.get_expert_data_parallel_group()
+
+        optimizer = LayerWiseDistributedOptimizer(
+            [wrapped_optimizer_1, wrapped_optimizer_2], optimizer_config, pg_collection
+        )
+
+        assert len(optimizer.chained_optimizers) == 2, "Should have two chained optimizers"
+
+        # Set gradients and test optimizer step - this will trigger allgather
+        for param in model.parameters():
+            param.grad = torch.randn_like(param)
+
+        update_successful, grad_norm, num_zeros = optimizer.step()
+
+        assert update_successful, "Optimizer step should be successful"
+
+    def test_bf16_wrapping(self):
+        """Test LayerWiseDistributedOptimizer automatically wraps optimizer with bf16."""
+        model, optimizer, pg_collection = self.create_model_and_optimizer()
+
+        # Verify bf16 wrapping happened
+        assert isinstance(
+            optimizer.chained_optimizers[0], Float16OptimizerWithFloat16Params
+        ), "Optimizer should be wrapped in Float16OptimizerWithFloat16Params"
+
+        for param in model.parameters():
+            param.grad = torch.randn_like(param)
+
+        update_successful, grad_norm, num_zeros = optimizer.step()
+
+        assert update_successful, "Optimizer step should be successful"
+
+    def test_bf16_error(self):
+        """Test LayerWiseDistributedOptimizer raises error when receiving pre-wrapped Float16 optimizer."""
+        model = SimpleModel().bfloat16().cuda()
+        model.requires_grad_(True)
+
+        ddp_config = DistributedDataParallelConfig(use_distributed_optimizer=False)
+        model = DistributedDataParallel(
+            TransformerConfig(num_attention_heads=1, num_layers=1), ddp_config, model
+        )
+
+        optimizer_config = OptimizerConfig(
+            optimizer='adam', lr=0.01, bf16=True, use_distributed_optimizer=False
+        )
+
+        # Create base optimizer and manually wrap in Float16 optimizer
+        param_groups = [{'params': list(model.parameters())}]
+        base_optimizer = torch.optim.Adam(param_groups, lr=optimizer_config.lr)
+        wrapped_optimizer = Float16OptimizerWithFloat16Params(
+            base_optimizer, optimizer_config, None, None
+        )
+
+        pg_collection = ProcessGroupCollection.use_mpu_process_groups()
+        pg_collection.dp_cp = parallel_state.get_data_parallel_group(with_context_parallel=True)
+        pg_collection.expt_dp = parallel_state.get_expert_data_parallel_group()
+
+        # Should raise TypeError when receiving already-wrapped Float16 optimizer
+        with pytest.raises(
+            TypeError, match='LayerWiseDistributedOptimizer received Float16 optimizer already'
+        ):
+            LayerWiseDistributedOptimizer([wrapped_optimizer], optimizer_config, pg_collection)
+
+    def _run_parameter_update_test(self, model_class=SimpleModel):
+        """Helper method to test parameter updates with a given model class.
+
+        Args:
+            model_class: Model class to use for testing
+        """
+        model, optimizer, pg_collection = self.create_model_and_optimizer(model_class=model_class)
+
+        # Create reference model and optimizer using the same function
+        reference_model, reference_optimizer, _ = self.create_model_and_optimizer(
+            model_class=model_class, use_layer_wise=False, copy_from=model
+        )
+
+        # Set same gradients on both models
+        for param, ref_param in zip(model.parameters(), reference_model.parameters()):
+            assert torch.equal(param.data, ref_param.data)
+            torch.testing.assert_close(param.data, ref_param.data, rtol=1e-5, atol=1e-5)
+            grad_value = torch.randn_like(param)
+            torch.distributed.broadcast(grad_value, src=0, group=pg_collection.dp_cp)
+            param.main_grad = grad_value.clone().detach()
+            ref_param.main_grad = grad_value.clone().detach()
+
+        optimizer.step()
+
+        # Verify at least some parameters were updated
+        params_updated = 0
+        for param, ref_param in zip(model.parameters(), reference_model.parameters()):
+            if not torch.equal(param.data, ref_param.data):
+                params_updated += 1
+
+        assert params_updated > 0, "At least some parameters should be updated"
+
+        reference_optimizer.step()
+
+        # Verify updated values match reference optimizer
+        for param, ref_param in zip(model.parameters(), reference_model.parameters()):
+            torch.testing.assert_close(param.data, ref_param.data, rtol=1e-5, atol=1e-5)
+
+    def test_parameter_updates(self):
+        """Test LayerWiseDistributedOptimizer actually updates model parameters."""
+        self._run_parameter_update_test()
+
+    def test_parameter_updates_insufficient_parameters(self):
+        """Test LayerWiseDistributedOptimizer when there are insufficient parameters for all ranks.
+
+        Uses a tiny model with only 1 layer (2 parameters: weight and bias).
+        This will be insufficient when world size > 2.
+        """
+        self._run_parameter_update_test(model_class=TinyModel)
diff --git a/tests/unit_tests/test_muon_optimizer.py b/tests/unit_tests/test_muon_optimizer.py
index 97d78fe6c70..71d77dc6ecc 100644
--- a/tests/unit_tests/test_muon_optimizer.py
+++ b/tests/unit_tests/test_muon_optimizer.py
@@ -6,30 +6,39 @@
 import torch.nn.functional as F
 from packaging.version import Version
 
+from megatron.core import parallel_state
 from megatron.core.distributed import DistributedDataParallel, DistributedDataParallelConfig
 from megatron.core.optimizer import OptimizerConfig
 from megatron.core.optimizer.muon import TensorParallelMuon, get_megatron_muon_optimizer
+from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.transformer import TransformerConfig
 from tests.unit_tests.test_utilities import Utils
-from tests.unit_tests.test_utils import _deinit_distributed, _init_distributed
+
+# Skip all tests in this file for LTS versions
+pytestmark = pytest.mark.skipif(
+    Version(os.getenv('NVIDIA_PYTORCH_VERSION', "24.01")) <= Version("25.05"),
+    reason="Skip muon optimizer for LTS test",
+)
 
 
 class Net(nn.Module):
     def __init__(self):
         super().__init__()
         self.fc1 = nn.Linear(80, 48)
-        self.fc2 = nn.Linear(48, 10)
+        self.fc2 = nn.Linear(48, 32)
+        self.fc3 = nn.Linear(32, 24)
+        self.fc4 = nn.Linear(24, 16)
+        self.fc5 = nn.Linear(16, 10)
 
     def forward(self, x):
         x = F.relu(self.fc1(x))
-        x = self.fc2(x)
+        x = F.relu(self.fc2(x))
+        x = F.relu(self.fc3(x))
+        x = F.relu(self.fc4(x))
+        x = self.fc5(x)
         return x
 
 
-@pytest.mark.skipif(
-    Version(os.getenv('NVIDIA_PYTORCH_VERSION', "24.01")) <= Version("25.05"),
-    reason="Skip muon optimizer for LTS test",
-)
 def test_muon_optimizer_smoke():
     """Smoke test for TensorParallelMuon optimizer."""
     # Create a simple linear model for testing
@@ -92,153 +101,569 @@ def test_muon_optimizer_smoke():
 
 
 @pytest.mark.skipif(
-    Version(os.getenv('NVIDIA_PYTORCH_VERSION', "24.01")) <= Version("25.05"),
-    reason="Skip muon optimizer for LTS test",
+    int(os.getenv('WORLD_SIZE', '1')) == 1, reason="Multi-rank test requires WORLD_SIZE > 1"
 )
-def test_get_megatron_muon_optimizer_smoke():
-    """Smoke test for get_megatron_muon_optimizer function."""
-    world = int(os.getenv('WORLD_SIZE', '1'))
-    rank = int(os.getenv('RANK', '0'))
-
-    # Setup: distributed, model
-    _init_distributed(world, rank)
-    Utils.initialize_model_parallel()
+class TestMuonOptimizerMultiRank:
+    """Test class for Muon optimizer with multi-rank setup."""
+
+    @pytest.fixture(autouse=True)
+    def setup_and_teardown(self):
+        """Setup and teardown for each test."""
+        Utils.initialize_model_parallel()
+        yield
+        Utils.destroy_model_parallel()
+
+    def create_ddp_model(self, model):
+        """Wrap model in DDP.
+
+        Args:
+            model: Model to wrap
+
+        Returns:
+            DDP-wrapped model
+        """
+        ddp_config = DistributedDataParallelConfig(use_distributed_optimizer=False)
+        return DistributedDataParallel(
+            TransformerConfig(num_attention_heads=1, num_layers=1), ddp_config, model
+        )
+
+    def test_get_megatron_muon_optimizer_smoke(self):
+        """Smoke test for get_megatron_muon_optimizer function."""
+        model = Net().bfloat16().cuda()
+        model.requires_grad_(True)
+        model = self.create_ddp_model(model)
+
+        # Ensure all parameters require gradients
+        for param in model.parameters():
+            assert param.requires_grad, "All parameters should require gradients"
+
+        # Create optimizer config for Muon
+        optimizer_config = OptimizerConfig(
+            optimizer='muon',  # This will be changed internally to 'adam' for non-linear params
+            lr=0.01,
+            weight_decay=0.01,
+            bf16=True,
+            use_distributed_optimizer=False,  # Muon doesn't support distributed optimizer
+            muon_momentum=0.95,
+            muon_use_nesterov=True,
+            muon_fp32_matmul_prec="medium",
+            muon_num_ns_steps=5,
+            muon_scale_mode="spectral",
+            muon_tp_mode="duplicated",
+        )
+
+        # Test creating the optimizer
+        optimizer = get_megatron_muon_optimizer(
+            config=optimizer_config,
+            model_chunks=[model],
+            use_gloo_process_groups=True,
+            layer_wise_distributed_optimizer=False,
+        )
+
+        # Test basic properties
+        assert optimizer is not None, "Optimizer should not be None"
+        assert hasattr(optimizer, 'param_groups'), "Optimizer should have param_groups"
+        assert hasattr(optimizer, 'chained_optimizers'), "Should be a ChainedOptimizer"
+        assert len(optimizer.chained_optimizers) >= 1, "Should have at least one chained optimizer"
+
+        # Test forward and backward pass
+        input_tensor = torch.randn(16, 80, dtype=torch.bfloat16, device='cuda')
+        output = model(input_tensor)
+        loss = output.sum()
+        loss.backward()
+
+        # Store original parameters
+        original_params = {}
+        for name, param in model.named_parameters():
+            original_params[name] = param.data.clone()
+
+        # Test optimizer step
+        optimizer.step()
+
+        # Verify at least some parameters were updated
+        params_updated = 0
+        for name, param in model.named_parameters():
+            if not torch.equal(param.data, original_params[name]):
+                params_updated += 1
+
+        assert params_updated > 0, "At least some parameters should be updated after optimizer step"
+
+        # Test zero_grad
+        optimizer.zero_grad()
+        for param in model.parameters():
+            assert param.grad is None or torch.all(
+                param.grad == 0
+            ), f"Gradients should be zeroed for all parameters"
+
+        # Test state_dict and load_state_dict
+        state_dict = optimizer.state_dict()
+        assert isinstance(state_dict, list), "State dict should be a list"
+
+        # Load state dict should not raise error
+        optimizer.load_state_dict(state_dict)
+
+    def test_get_megatron_muon_optimizer_validation(self):
+        """Test validation logic for get_megatron_muon_optimizer."""
+        model = torch.nn.Linear(100, 50, bias=False, dtype=torch.bfloat16, device='cuda')
+        model.requires_grad_(True)
+        model = self.create_ddp_model(model)
+
+        # Test 1: Distributed optimizer should raise exception
+        optimizer_config_dist = OptimizerConfig(
+            optimizer='muon',
+            lr=0.01,
+            bf16=True,
+            use_distributed_optimizer=True,  # This should cause an exception
+        )
+
+        with pytest.raises(Exception, match='muon with dist optimizer is not supported'):
+            get_megatron_muon_optimizer(config=optimizer_config_dist, model_chunks=[model])
+
+        # Test 2: FP16 should raise exception
+        optimizer_config_fp16 = OptimizerConfig(
+            optimizer='muon',
+            lr=0.01,
+            fp16=True,  # This should cause an exception
+            use_distributed_optimizer=False,
+        )
+
+        with pytest.raises(Exception, match='muon with fp16 is not supported'):
+            get_megatron_muon_optimizer(config=optimizer_config_fp16, model_chunks=[model])
+
+        # Test 3: Invalid num_ns_steps should raise exception
+        optimizer_config_invalid_ns = OptimizerConfig(
+            optimizer='muon',
+            lr=0.01,
+            bf16=True,
+            use_distributed_optimizer=False,
+            muon_num_ns_steps=0,  # This should cause an exception
+        )
+
+        with pytest.raises(ValueError, match='num_ns_steps must be at least 1'):
+            get_megatron_muon_optimizer(config=optimizer_config_invalid_ns, model_chunks=[model])
+
+    def test_get_megatron_muon_optimizer_layer_wise(self):
+        """Test get_megatron_muon_optimizer with layer-wise distributed optimizer."""
+        model = Net().bfloat16().cuda()
+        model.requires_grad_(True)
+        model = self.create_ddp_model(model)
+
+        optimizer_config = OptimizerConfig(
+            optimizer='muon',
+            lr=0.01,
+            weight_decay=0.01,
+            bf16=True,
+            use_distributed_optimizer=False,
+            muon_momentum=0.95,
+            muon_use_nesterov=True,
+            muon_fp32_matmul_prec="medium",
+            muon_num_ns_steps=5,
+            muon_scale_mode="spectral",
+            muon_tp_mode="duplicated",
+        )
+
+        # Test with layer_wise_distributed_optimizer=True
+        optimizer = get_megatron_muon_optimizer(
+            config=optimizer_config,
+            model_chunks=[model],
+            use_gloo_process_groups=True,
+            layer_wise_distributed_optimizer=True,
+        )
+
+        # Verify it's a LayerWiseDistributedOptimizer
+        from megatron.core.optimizer.layer_wise_optimizer import LayerWiseDistributedOptimizer
+
+        assert isinstance(
+            optimizer, LayerWiseDistributedOptimizer
+        ), "Should return LayerWiseDistributedOptimizer"
+
+        # Test forward and backward pass
+        input_tensor = torch.randn(16, 80, dtype=torch.bfloat16, device='cuda')
+        output = model(input_tensor)
+        loss = output.sum()
+        loss.backward()
+
+        # Test optimizer step
+        update_successful, grad_norm, num_zeros = optimizer.step()
+
+        assert update_successful, "Optimizer step should be successful"
+        assert grad_norm is not None or grad_norm is None, "Grad norm should be returned"
+
+
+@pytest.mark.parametrize("mode", ["duplicated", "blockwise", "distributed"])
+def test_muon_optimizer_different_modes_single_rank(mode):
+    """Test TensorParallelMuon optimizer with different modes on single rank.
+
+    When TP size is 1, all modes should produce the same result.
+    """
+    # Set random seed for reproducibility
+    torch.manual_seed(42)
+    torch.cuda.manual_seed(42)
 
-    # Create a model with both linear and non-linear parameters
-    model = Net().bfloat16().cuda()
+    model = torch.nn.Linear(100, 50, bias=False, dtype=torch.float32, device='cuda')
     model.requires_grad_(True)
+    model.weight.data.normal_(0, 0.02)
 
-    # Wrap in DDP (required for Megatron optimizer)
-    ddp_config = DistributedDataParallelConfig(use_distributed_optimizer=False)
-    model = DistributedDataParallel(
-        TransformerConfig(num_attention_heads=1, num_layers=1), ddp_config, model
+    optimizer = TensorParallelMuon(
+        params=[model.weight],
+        lr=0.01,
+        momentum_beta=0.95,
+        weight_decay=0.0,  # Disable weight decay for deterministic comparison
+        num_ns_steps=5,
+        pg_collection=None,
+        mode=mode,
     )
 
-    # Ensure all parameters require gradients
-    for param in model.parameters():
-        assert param.requires_grad, "All parameters should require gradients"
+    # Use fixed input for deterministic results
+    torch.manual_seed(42)
+    input_tensor = torch.randn(32, 100, dtype=torch.float32, device='cuda')
+
+    output = model(input_tensor)
+    loss = output.sum()
+    loss.backward()
 
-    # Create optimizer config for Muon
-    optimizer_config = OptimizerConfig(
-        optimizer='muon',  # This will be changed internally to 'adam' for non-linear params
+    original_weight = model.weight.data.clone()
+    optimizer.step()
+
+    # Verify weight was updated
+    assert not torch.equal(
+        model.weight.data, original_weight
+    ), f"Weight should be updated with mode={mode}"
+
+
+@pytest.mark.skipif(
+    int(os.getenv('WORLD_SIZE', '1')) == 1, reason="Multi-rank test requires WORLD_SIZE > 1"
+)
+class TestMuonOptimizerMultiRankTP:
+    """Test class for Muon optimizer with multi-rank and tensor parallel setup."""
+
+    @pytest.fixture(autouse=True)
+    def setup_and_teardown(self):
+        """Setup and teardown for each test with tensor parallel."""
+        world = int(os.getenv('WORLD_SIZE', '1'))
+        Utils.initialize_model_parallel(tensor_model_parallel_size=min(world, 2))
+        yield
+        Utils.destroy_model_parallel()
+
+    def create_tp_model_and_optimizer(self, mode):
+        """Create model with TP and optimizer.
+
+        Args:
+            mode: Muon optimizer mode
+
+        Returns:
+            tuple: (model, optimizer, pg_collection)
+        """
+        rank = int(os.getenv('RANK', '0'))
+        pg_collection = ProcessGroupCollection.use_mpu_process_groups()
+
+        # Create model with partition_dim for TP
+        torch.manual_seed(42 + rank)
+        model = torch.nn.Linear(100, 50, bias=False, dtype=torch.float32, device='cuda')
+        model.requires_grad_(True)
+        model.weight.data.normal_(0, 0.02)
+        model.weight.partition_dim = 0  # Set partition dimension for TP
+
+        optimizer = TensorParallelMuon(
+            params=[model.weight],
+            lr=0.01,
+            momentum_beta=0.95,
+            weight_decay=0.0,
+            num_ns_steps=5,
+            pg_collection=pg_collection,
+            mode=mode,
+        )
+
+        return model, optimizer
+
+    @pytest.mark.parametrize("mode", ["duplicated", "distributed"])
+    def test_muon_optimizer_modes_multirank_same_result(self, mode):
+        """Test that duplicated and distributed modes produce same results with TP > 1."""
+        model, optimizer = self.create_tp_model_and_optimizer(mode)
+
+        # Use fixed input for deterministic results
+        torch.manual_seed(42)
+        input_tensor = torch.randn(32, 100, dtype=torch.float32, device='cuda')
+
+        output = model(input_tensor)
+        loss = output.sum()
+        loss.backward()
+
+        original_weight = model.weight.data.clone()
+        optimizer.step()
+
+        # Verify weight was updated
+        assert not torch.equal(
+            model.weight.data, original_weight
+        ), f"Weight should be updated with mode={mode}"
+
+    def test_muon_optimizer_blockwise_mode_different_result(self):
+        """Test that blockwise mode produces different results than duplicated/distributed with TP > 1."""
+        model, optimizer = self.create_tp_model_and_optimizer("blockwise")
+
+        # Use fixed input for deterministic results
+        torch.manual_seed(42)
+        input_tensor = torch.randn(32, 100, dtype=torch.float32, device='cuda')
+
+        output = model(input_tensor)
+        loss = output.sum()
+        loss.backward()
+
+        original_weight = model.weight.data.clone()
+        optimizer.step()
+
+        # Verify weight was updated
+        assert not torch.equal(
+            model.weight.data, original_weight
+        ), "Weight should be updated with mode=blockwise"
+
+
+@pytest.mark.parametrize(
+    "coefficient_type_and_steps", [("simple", 3), ("quintic", 5), ("polar_express", 8)]
+)
+def test_muon_optimizer_coefficient_types(coefficient_type_and_steps):
+    """Test TensorParallelMuon optimizer with different coefficient types."""
+    model = torch.nn.Linear(80, 40, bias=False, dtype=torch.float32, device='cuda')
+    model.requires_grad_(True)
+    model.weight.data.fill_(1.0)
+
+    optimizer = TensorParallelMuon(
+        params=[model.weight],
         lr=0.01,
-        weight_decay=0.01,
-        bf16=True,
-        use_distributed_optimizer=False,  # Muon doesn't support distributed optimizer
-        muon_momentum=0.95,
-        muon_use_nesterov=True,
-        muon_fp32_matmul_prec="medium",
-        muon_num_ns_steps=5,
-        muon_scale_mode="spectral",
-        muon_tp_mode="duplicated",
+        coefficient_type=coefficient_type_and_steps[0],
+        num_ns_steps=coefficient_type_and_steps[1],
+        pg_collection=None,
+        mode="duplicated",
     )
 
-    # Test creating the optimizer
-    optimizer = get_megatron_muon_optimizer(
-        config=optimizer_config,
-        model_chunks=[model],
-        use_gloo_process_groups=True,
-        layer_wise_distributed_optimizer=False,
-    )
+    input_tensor = torch.randn(16, 80, dtype=torch.float32, device='cuda')
+    output = model(input_tensor)
+    loss = output.sum()
+    loss.backward()
 
-    # Test basic properties
-    assert optimizer is not None, "Optimizer should not be None"
-    assert hasattr(optimizer, 'param_groups'), "Optimizer should have param_groups"
-    assert hasattr(optimizer, 'chained_optimizers'), "Should be a ChainedOptimizer"
-    assert len(optimizer.chained_optimizers) >= 1, "Should have at least one chained optimizer"
+    original_weight = model.weight.data.clone()
+    optimizer.step()
 
-    # Test forward and backward pass
-    input_tensor = torch.randn(16, 80, dtype=torch.bfloat16, device='cuda')
+    assert not torch.equal(
+        model.weight.data, original_weight
+    ), f"Weight should be updated with coefficient_type={coefficient_type_and_steps[0]} and num_ns_steps={coefficient_type_and_steps[1]}"
+
+
+@pytest.mark.parametrize("scale_mode", ["spectral", "unit_rms_norm", "shape_scaling"])
+def test_muon_optimizer_scale_modes(scale_mode):
+    """Test TensorParallelMuon optimizer with different scale modes."""
+    model = torch.nn.Linear(60, 30, bias=False, dtype=torch.float32, device='cuda')
+    model.requires_grad_(True)
+    model.weight.data.fill_(1.0)
+
+    optimizer = TensorParallelMuon(
+        params=[model.weight],
+        lr=0.01,
+        scale_mode=scale_mode,
+        num_ns_steps=5,
+        pg_collection=None,
+        mode="duplicated",
+    )
+
+    input_tensor = torch.randn(16, 60, dtype=torch.float32, device='cuda')
     output = model(input_tensor)
     loss = output.sum()
     loss.backward()
 
-    # Store original parameters
-    original_params = {}
-    for name, param in model.named_parameters():
-        original_params[name] = param.data.clone()
+    original_weight = model.weight.data.clone()
+    optimizer.step()
 
-    # Test optimizer step
+    assert not torch.equal(
+        model.weight.data, original_weight
+    ), f"Weight should be updated with scale_mode={scale_mode}"
+
+
+@pytest.mark.parametrize("use_nesterov", [True, False])
+def test_muon_optimizer_nesterov(use_nesterov):
+    """Test TensorParallelMuon optimizer with and without Nesterov momentum."""
+    model = torch.nn.Linear(50, 25, bias=False, dtype=torch.float32, device='cuda')
+    model.requires_grad_(True)
+    model.weight.data.fill_(1.0)
+
+    optimizer = TensorParallelMuon(
+        params=[model.weight],
+        lr=0.01,
+        momentum_beta=0.9,
+        use_nesterov=use_nesterov,
+        num_ns_steps=5,
+        pg_collection=None,
+        mode="duplicated",
+    )
+
+    input_tensor = torch.randn(16, 50, dtype=torch.float32, device='cuda')
+    output = model(input_tensor)
+    loss = output.sum()
+    loss.backward()
+
+    original_weight = model.weight.data.clone()
     optimizer.step()
 
-    # Verify at least some parameters were updated
-    params_updated = 0
-    for name, param in model.named_parameters():
-        if not torch.equal(param.data, original_params[name]):
-            params_updated += 1
+    assert not torch.equal(
+        model.weight.data, original_weight
+    ), f"Weight should be updated with use_nesterov={use_nesterov}"
 
-    assert params_updated > 0, "At least some parameters should be updated after optimizer step"
 
-    # Test zero_grad
-    optimizer.zero_grad()
-    for param in model.parameters():
-        assert param.grad is None or torch.all(
-            param.grad == 0
-        ), f"Gradients should be zeroed for all parameters"
+def test_muon_optimizer_multiple_steps():
+    """Test TensorParallelMuon optimizer across multiple optimization steps."""
+    model = torch.nn.Linear(100, 50, bias=False, dtype=torch.float32, device='cuda')
+    model.requires_grad_(True)
+    model.weight.data.fill_(1.0)
 
-    # Test state_dict and load_state_dict
-    state_dict = optimizer.state_dict()
-    assert isinstance(state_dict, list), "State dict should be a list"
+    optimizer = TensorParallelMuon(
+        params=[model.weight],
+        lr=0.01,
+        momentum_beta=0.95,
+        weight_decay=0.01,
+        num_ns_steps=5,
+        pg_collection=None,
+        mode="duplicated",
+    )
 
-    # Load state dict should not raise error
-    optimizer.load_state_dict(state_dict)
+    weights_history = [model.weight.data.clone()]
 
-    _deinit_distributed()
+    for i in range(3):
+        input_tensor = torch.randn(32, 100, dtype=torch.float32, device='cuda')
+        output = model(input_tensor)
+        loss = output.sum()
+        loss.backward()
 
+        optimizer.step()
+        optimizer.zero_grad()
+        weights_history.append(model.weight.data.clone())
 
-@pytest.mark.skipif(
-    Version(os.getenv('NVIDIA_PYTORCH_VERSION', "24.01")) <= Version("25.05"),
-    reason="Skip muon optimizer for LTS test",
-)
-def test_get_megatron_muon_optimizer_validation():
-    """Test validation logic for get_megatron_muon_optimizer."""
-    world = int(os.getenv('WORLD_SIZE', '1'))
-    rank = int(os.getenv('RANK', '0'))
+    # Verify weights changed at each step
+    for i in range(len(weights_history) - 1):
+        assert not torch.equal(
+            weights_history[i], weights_history[i + 1]
+        ), f"Weight should change at step {i}"
 
-    # Setup: distributed, model
-    _init_distributed(world, rank)
-    Utils.initialize_model_parallel()
 
-    # Create a simple model
-    model = torch.nn.Linear(100, 50, bias=False, dtype=torch.bfloat16, device='cuda')
+@pytest.mark.skip(reason="split qkv is not implemented yet")
+def test_muon_optimizer_qkv_split():
+    """Test TensorParallelMuon optimizer with QKV splitting."""
+    # Create a model with QKV-like parameter
+    qkv_size = 3 * 64 * 16  # Combined Q, K, V dimensions, 16 heads x 64 per head
+    hidden_size = 1024
+    model = torch.nn.Linear(hidden_size, qkv_size, bias=False, dtype=torch.float32, device='cuda')
     model.requires_grad_(True)
-    ddp_config = DistributedDataParallelConfig(use_distributed_optimizer=False)
-    model = DistributedDataParallel(
-        TransformerConfig(num_attention_heads=1, num_layers=1), ddp_config, model
+    model.weight.data.fill_(1.0)
+
+    # Mark parameter as QKV
+    model.weight.is_qkv = True
+
+    # QKV split shapes: [Q_size, K_size, V_size]
+    qkv_split_shapes = (64, 64, 64)
+
+    # Test with split_qkv=True
+    optimizer_split = TensorParallelMuon(
+        params=[model.weight],
+        lr=0.01,
+        split_qkv=True,
+        is_qkv_fn=lambda p: getattr(p, 'is_qkv', False),
+        qkv_split_shapes=qkv_split_shapes,
+        num_ns_steps=5,
+        pg_collection=None,
+        mode="duplicated",
     )
 
-    # Test 1: Distributed optimizer should raise exception
-    optimizer_config_dist = OptimizerConfig(
-        optimizer='muon',
+    input_tensor = torch.randn(16, hidden_size, dtype=torch.float32, device='cuda')
+    output = model(input_tensor)
+    loss = output.sum()
+    loss.backward()
+
+    original_weight = model.weight.data.clone()
+    optimizer_split.step()
+    weight_with_split = model.weight.data.clone()
+
+    assert not torch.equal(
+        weight_with_split, original_weight
+    ), "QKV weight should be updated with split_qkv=True"
+
+    # Reset model and test with split_qkv=False
+    model.weight.data.fill_(1.0)
+    optimizer_no_split = TensorParallelMuon(
+        params=[model.weight],
         lr=0.01,
-        bf16=True,
-        use_distributed_optimizer=True,  # This should cause an exception
+        split_qkv=False,
+        num_ns_steps=5,
+        pg_collection=None,
+        mode="duplicated",
     )
 
-    with pytest.raises(Exception, match='muon with dist optimizer is not supported'):
-        get_megatron_muon_optimizer(config=optimizer_config_dist, model_chunks=[model])
+    output = model(input_tensor)
+    loss = output.sum()
+    loss.backward()
+
+    optimizer_no_split.step()
+    weight_without_split = model.weight.data.clone()
+
+    assert not torch.equal(
+        weight_without_split, original_weight
+    ), "QKV weight should be updated with split_qkv=False"
+
+    # Ensure the two results are different
+    assert not torch.equal(
+        weight_with_split, weight_without_split
+    ), "Weights should be different between split_qkv=True and split_qkv=False"
+
 
-    # Test 2: FP16 should raise exception
-    optimizer_config_fp16 = OptimizerConfig(
-        optimizer='muon',
+def test_muon_optimizer_extra_scale_factor():
+    """Test TensorParallelMuon optimizer with different extra_scale_factor values."""
+    model = torch.nn.Linear(80, 40, bias=False, dtype=torch.float32, device='cuda')
+    model.requires_grad_(True)
+    model.weight.data.fill_(1.0)
+
+    optimizer = TensorParallelMuon(
+        params=[model.weight],
         lr=0.01,
-        fp16=True,  # This should cause an exception
-        use_distributed_optimizer=False,
+        extra_scale_factor=2.0,
+        num_ns_steps=5,
+        pg_collection=None,
+        mode="duplicated",
     )
 
-    with pytest.raises(Exception, match='muon with fp16 is not supported'):
-        get_megatron_muon_optimizer(config=optimizer_config_fp16, model_chunks=[model])
+    input_tensor = torch.randn(16, 80, dtype=torch.float32, device='cuda')
+    output = model(input_tensor)
+    loss = output.sum()
+    loss.backward()
+
+    original_weight = model.weight.data.clone()
+    optimizer.step()
+
+    assert not torch.equal(
+        model.weight.data, original_weight
+    ), "Weight should be updated with extra_scale_factor"
 
-    # Test 3: Invalid num_ns_steps should raise exception
-    optimizer_config_invalid_ns = OptimizerConfig(
-        optimizer='muon',
+
+@pytest.mark.parametrize("num_ns_steps", [5, 15, 25])
+def test_muon_optimizer_num_ns_steps(num_ns_steps):
+    """Test TensorParallelMuon optimizer with different numbers of Newton-Schulz steps."""
+    model = torch.nn.Linear(60, 30, bias=False, dtype=torch.float32, device='cuda')
+    model.requires_grad_(True)
+    model.weight.data.fill_(1.0)
+
+    optimizer = TensorParallelMuon(
+        params=[model.weight],
         lr=0.01,
-        bf16=True,
-        use_distributed_optimizer=False,
-        muon_num_ns_steps=0,  # This should cause an exception
+        coefficient_type="quintic",
+        num_ns_steps=num_ns_steps,
+        pg_collection=None,
+        mode="duplicated",
     )
 
-    with pytest.raises(ValueError, match='num_ns_steps must be at least 1'):
-        get_megatron_muon_optimizer(config=optimizer_config_invalid_ns, model_chunks=[model])
+    input_tensor = torch.randn(16, 60, dtype=torch.float32, device='cuda')
+    output = model(input_tensor)
+    loss = output.sum()
+    loss.backward()
 
-    _deinit_distributed()
+    original_weight = model.weight.data.clone()
+    optimizer.step()
+
+    assert not torch.equal(
+        model.weight.data, original_weight
+    ), f"Weight should be updated with num_ns_steps={num_ns_steps}"

From 6802bec8c8a704dccbddc87e32b20a1476b37869 Mon Sep 17 00:00:00 2001
From: Chen Cui <chcui@nvidia.com>
Date: Mon, 20 Oct 2025 10:35:39 -0700
Subject: [PATCH 027/334] ADLR/megatron-lm!4296 - [DEV] fix(MoE): Fix parameter
 initialization

---
 megatron/core/transformer/dot_product_attention.py | 2 ++
 megatron/core/transformer/moe/router.py            | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/megatron/core/transformer/dot_product_attention.py b/megatron/core/transformer/dot_product_attention.py
index 2a958722e46..2a6ac65a685 100644
--- a/megatron/core/transformer/dot_product_attention.py
+++ b/megatron/core/transformer/dot_product_attention.py
@@ -126,6 +126,8 @@ def __init__(
                     )
                 ),
             )
+            if config.perform_initialization:
+                self.softmax_offset = config.init_method(self.softmax_offset)
         else:
             raise ValueError("Softmax type not supported")
 
diff --git a/megatron/core/transformer/moe/router.py b/megatron/core/transformer/moe/router.py
index 068d680c798..7fa4692ef2f 100644
--- a/megatron/core/transformer/moe/router.py
+++ b/megatron/core/transformer/moe/router.py
@@ -66,6 +66,8 @@ def reset_parameters(self):
         """Reset the router parameters."""
         if self.config.perform_initialization:
             self.config.init_method(self.weight)
+            if self.bias is not None:
+                self.config.init_method(self.bias)
         self.weight.data = self.weight.data.to(dtype=self.config.params_dtype)
         setattr(self.weight, 'sequence_parallel', self.config.sequence_parallel)
         if self.bias is not None:

From a6ca591e61acefc904d00793f7fb8c34c8fbb206 Mon Sep 17 00:00:00 2001
From: Zijie Yan <zijiey@nvidia.com>
Date: Tue, 21 Oct 2025 06:37:25 +0000
Subject: [PATCH 028/334] [Dev] Fix attention output gate for TE2.8

---
 megatron/core/transformer/attention.py        | 60 ++++++++++++-------
 .../core/transformer/transformer_config.py    |  4 ++
 .../unit_tests/transformer/test_attention.py  |  2 +
 3 files changed, 43 insertions(+), 23 deletions(-)

diff --git a/megatron/core/transformer/attention.py b/megatron/core/transformer/attention.py
index 870b8ad1c40..655955d8ed0 100644
--- a/megatron/core/transformer/attention.py
+++ b/megatron/core/transformer/attention.py
@@ -1098,10 +1098,9 @@ def get_query_key_value_tensors(
         num_query_heads_per_group = (
             self.num_attention_heads_per_partition // self.num_query_groups_per_partition
         )
+        num_qkv_heads_per_group = num_query_heads_per_group + 2
         if output_gate:
-            num_qkv_heads_per_group = 2 * num_query_heads_per_group + 2
-        else:
-            num_qkv_heads_per_group = num_query_heads_per_group + 2
+            num_qkv_heads_per_group += num_query_heads_per_group
 
         # If no output gate: [sq, b, hp] --> [sq, b, ng, (np/ng + 2) * hn]
         # If have output gate: [sq, b, hp] --> [sq, b, ng, (2 * np/ng + 2) * hn]
@@ -1112,31 +1111,43 @@ def get_query_key_value_tensors(
         mixed_qkv = mixed_qkv.view(*new_tensor_shape)
 
         # Split the tensor into query, gate, key, and value.
-        # If no output gate: [sq, b, ng, (np/ng + 2) * hn]
-        # --> [sq, b, ng, np/ng * hn], None, [sq, b, ng, hn], [sq, b, ng, hn]
-        # If have output gate: [sq, b, ng, (2 * np/ng + 2) * hn]
-        # --> [sq, b, ng, np/ng * hn], [sq, b, ng, np/ng * hn], [sq, b, ng, hn], [sq, b, ng, hn]
-        split_arg_list = [
-            num_query_heads_per_group * self.hidden_size_per_attention_head,
-            num_query_heads_per_group * self.hidden_size_per_attention_head if output_gate else 0,
-            self.hidden_size_per_attention_head,
-            self.hidden_size_per_attention_head,
-        ]
-
-        # Return unsplit mixed_qkv and split_arg_list
-        if not split_qkv:
-            return mixed_qkv, split_arg_list
+        if output_gate:
+            if not split_qkv:
+                raise ValueError("split_qkv not supported for gated attention yet.")
+            # If have output gate: [sq, b, ng, (2 * np/ng + 2) * hn]
+            # --> [sq, b, ng, np/ng * hn], [sq, b, ng, np/ng * hn],
+            # [sq, b, ng, hn], [sq, b, ng, hn]
+            split_arg_list = [
+                num_query_heads_per_group * self.hidden_size_per_attention_head,
+                num_query_heads_per_group * self.hidden_size_per_attention_head,
+                self.hidden_size_per_attention_head,
+                self.hidden_size_per_attention_head,
+            ]
 
-        if SplitAlongDim is not None:
-            (query, gate, key, value) = SplitAlongDim(mixed_qkv, 3, split_arg_list)
+            if SplitAlongDim is not None:
+                (query, gate, key, value) = SplitAlongDim(mixed_qkv, 3, split_arg_list)
+            else:
+                (query, gate, key, value) = torch.split(mixed_qkv, split_arg_list, dim=3)
         else:
-            (query, gate, key, value) = torch.split(mixed_qkv, split_arg_list, dim=3)
+            # If no output gate: [sq, b, ng, (np/ng + 2) * hn]
+            # --> [sq, b, ng, np/ng * hn], None, [sq, b, ng, hn], [sq, b, ng, hn]
+            split_arg_list = [
+                num_query_heads_per_group * self.hidden_size_per_attention_head,
+                self.hidden_size_per_attention_head,
+                self.hidden_size_per_attention_head,
+            ]
+
+            # Return unsplit mixed_qkv and split_arg_list
+            if not split_qkv:
+                return mixed_qkv, split_arg_list
+
+            if SplitAlongDim is not None:
+                (query, key, value) = SplitAlongDim(mixed_qkv, 3, split_arg_list)
+            else:
+                (query, key, value) = torch.split(mixed_qkv, split_arg_list, dim=3)
 
         # Query [sq, b, ng, np/ng * hn] -> [sq, b, np, hn]
         query = query.reshape(query.size(0), query.size(1), -1, self.hidden_size_per_attention_head)
-        if output_gate:
-            # Gate [sq, b, ng, np/ng * hn] -> [sq, b, np, hn]
-            gate = gate.reshape(gate.size(0), gate.size(1), -1, self.hidden_size_per_attention_head)
 
         if self.q_layernorm is not None:
             query = self.q_layernorm(query)
@@ -1148,7 +1159,10 @@ def get_query_key_value_tensors(
             self.run_realtime_tests()
 
         if output_gate:
+            # Gate [sq, b, ng, np/ng * hn] -> [sq, b, np, hn]
+            gate = gate.reshape(*gate.shape[:2], -1, self.hidden_size_per_attention_head)
             return query, key, value, gate
+
         return query, key, value
 
     def backward_dw(self) -> NoReturn:
diff --git a/megatron/core/transformer/transformer_config.py b/megatron/core/transformer/transformer_config.py
index 8b36425ca2a..89fbcb36f5a 100644
--- a/megatron/core/transformer/transformer_config.py
+++ b/megatron/core/transformer/transformer_config.py
@@ -1337,6 +1337,10 @@ def __post_init__(self):
                         "apply_rope_fusion is not available. Please install TE >= 1.4."
                     )
 
+        if self.fused_single_qkv_rope:
+            if self.attention_output_gate:
+                raise ValueError("fused_single_qkv_rope does not support gated attention for now.")
+
         if self.multi_latent_attention and self.rotary_interleaved:
             raise ValueError("rotary_interleaved does not work with multi_latent_attention.")
 
diff --git a/tests/unit_tests/transformer/test_attention.py b/tests/unit_tests/transformer/test_attention.py
index 419fc17ca0a..23858937c72 100644
--- a/tests/unit_tests/transformer/test_attention.py
+++ b/tests/unit_tests/transformer/test_attention.py
@@ -96,6 +96,8 @@ def test_fused_rope_gpu_forward(self, rotary_interleaved, fused_qkv_rope):
         self.parallel_attention.config.apply_rope_fusion = True
         if rotary_interleaved and not is_te_min_version("2.3.0"):
             pytest.skip("Only TE >= 2.3.0 supports interleaved fused RoPE.")
+        if fused_qkv_rope and self.parallel_attention.config.attention_output_gate:
+            pytest.skip("Fused QKV RoPE does not support gated attention for now.")
         if fused_qkv_rope and not HAVE_FUSED_QKV_ROPE:
             pytest.skip("Fused QKV RoPE not available.")
         self.parallel_attention.config.rotary_interleaved = rotary_interleaved

From 78433248157486b881af7b359af7cb649728ef92 Mon Sep 17 00:00:00 2001
From: Zijie Yan <zijiey@nvidia.com>
Date: Tue, 21 Oct 2025 07:27:39 +0000
Subject: [PATCH 029/334] Cleanup UT and toml

---
 docker/Dockerfile.ci.dev                        | 12 +++++++++---
 pyproject.toml                                  | 17 +++++++++--------
 .../transformer/test_multi_token_prediction.py  |  7 +++----
 3 files changed, 21 insertions(+), 15 deletions(-)

diff --git a/docker/Dockerfile.ci.dev b/docker/Dockerfile.ci.dev
index b3295697f31..1357dc5219d 100644
--- a/docker/Dockerfile.ci.dev
+++ b/docker/Dockerfile.ci.dev
@@ -31,8 +31,10 @@ COPY megatron/core/__init__.py /workspace/megatron/core/
 COPY megatron/core/package_info.py /workspace/megatron/core/
 RUN --mount=type=cache,target=/root/.cache/uv \
     bash -ex <<"EOF"
+    export NVTE_CUDA_ARCHS="80;90;100"
     uv venv ${UV_PROJECT_ENVIRONMENT} --system-site-packages
-    uv sync --extra dev --extra mlm --link-mode copy --locked --all-groups \
+    uv sync --only-group build
+    uv sync --extra dev --extra mlm --link-mode copy --locked \
         --no-install-package torch \
         --no-install-package torchvision \
         --no-install-package triton \
@@ -51,15 +53,19 @@ RUN --mount=type=cache,target=/root/.cache/uv \
 EOF
 
 # Install DeepEP
+COPY docker/patches/deepep.patch /workspace/deepep.patch
 RUN bash -ex <<"EOF"
     cd /workspace
-    uv pip install nvidia-nvshmem-cu12
+    uv pip install nvidia-nvshmem-cu13
     pushd /opt/venv/lib/python3.12/site-packages/nvidia/nvshmem/lib/
         ln -s libnvshmem_host.so.3 libnvshmem_host.so
     popd
 
     git clone --branch v1.2.1 https://github.com/deepseek-ai/DeepEP.git
-    TORCH_CUDA_ARCH_LIST="9.0" uv pip install --no-build-isolation -v DeepEP/.
+    pushd DeepEP
+        patch -p1 < /workspace/deepep.patch
+    popd
+    TORCH_CUDA_ARCH_LIST="9.0 10.0 12.0" uv pip install --no-build-isolation -v DeepEP/.
     rm -rf DeepEP
 EOF
 
diff --git a/pyproject.toml b/pyproject.toml
index 0a0fb9993f5..91d66de7efe 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,7 +1,7 @@
 # Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
 
 [build-system]
-requires = ["setuptools<80.0.0", "pybind11"]
+requires = ["setuptools>=80.0.0", "pybind11", "packaging>=24.2"]
 build-backend = "setuptools.build_meta"
 
 [tool.setuptools]
@@ -76,9 +76,8 @@ dev = [
     "setuptools<80.0.0",
     "mamba-ssm~=2.2",
     "causal-conv1d~=1.5",
-    "flash-linear-attention~=0.3.2",
     "nv-grouped-gemm~=1.1",
-    "transformer-engine[pytorch]>=2.6.0a0,<2.8.0",
+    "transformer-engine[pytorch]>=2.7.0a0,<2.9.0",
     "nvidia-resiliency-ext>=0.4.0a0,<0.5.0",
     "nvidia-modelopt[torch]>=0.33.0a0,<0.34.0; sys_platform != 'darwin'",
     "megatron-energon[av_decode]~=6.0",
@@ -86,6 +85,8 @@ dev = [
     "flashinfer-python",
     "wget",
     "onnxscript",
+    "flash-linear-attention~=0.3.2",
+    "emerging_optimizers"
 ]
 
 lts = [
@@ -130,6 +131,7 @@ build = [
     "pybind11",
     "Cython>=3.0.0",
     "torch",
+    "nvidia-mathdx",     # for TE
 ]
 linting = [
     "ruff~=0.9.0",
@@ -140,17 +142,16 @@ linting = [
 ]
 ci = ["python-gitlab", "slack-sdk", "pandas"]
 flash_mla = ["flash_mla"]
-emerging_optimizers = ["emerging_optimizers"]
 
 [tool.uv]
 default-groups = ["linting", "build", "test"]
 no-build-isolation-package = [
-    "transformer-engine",
-    "transformer-engine-torch",
-    "mamba-ssm",
     "causal-conv1d",
     "nv-grouped-gemm",
     "flash_mla",
+    "mamba-ssm",
+    "transformer-engine",
+    "transformer-engine-torch",
 ]
 link-mode = "copy"
 conflicts = [[{ extra = "lts" }, { extra = "dev" }]]
@@ -167,8 +168,8 @@ override-dependencies = [
 flash_mla = [
     { git = "https://github.com/deepseek-ai/FlashMLA", rev = "9edee0c022cd0938148a18e334203b0aab43aa19" },
 ]
+transformer-engine = { git = "https://github.com/NVIDIA/TransformerEngine.git", rev = "release_v2.8" } # on `release_v2.8`
 
-# transformer-engine = { git = "https://github.com/NVIDIA/TransformerEngine.git", rev = "0289e76380088358a584d809faf69effab1a7cda" } # on `release_v2.7
 emerging_optimizers = { git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git", rev= "fb1add873e7851ec34b48581ea1b15761b73d189"}
 
 [tool.isort]
diff --git a/tests/unit_tests/transformer/test_multi_token_prediction.py b/tests/unit_tests/transformer/test_multi_token_prediction.py
index 65e58eaede4..9b9d2c67881 100644
--- a/tests/unit_tests/transformer/test_multi_token_prediction.py
+++ b/tests/unit_tests/transformer/test_multi_token_prediction.py
@@ -101,7 +101,7 @@ def test_constructor_local(self, tp):
             assert num_weights == 15216 * config.mtp_num_layers
 
     @pytest.mark.skipif(not HAVE_TE, reason="transformer_engine not available")
-    @pytest.mark.parametrize(('tp', 'cp'), [(1, 1), (1, 2), (2, 1), (2, 2)])
+    @pytest.mark.parametrize(('tp', 'cp'), [(1, 1), (2, 1), (2, 2)])
     def test_constructor_ues_te(self, tp, cp):
         """Test basic construction of MTP module."""
         torch.manual_seed(_SEED)
@@ -249,7 +249,7 @@ def get_batch(self, seq_length, micro_batch_size):
         not HAVE_TE or not is_te_min_version("2.1.0"),
         reason="grouped_gemm requires TransformerEngine >= 2.1.0",
     )
-    @pytest.mark.parametrize(("tp", "cp"), [(1, 1), (1, 2), (2, 1), (2, 2)])
+    @pytest.mark.parametrize(("tp", "cp"), [(2, 1), (2, 2)])
     def test_sharded_state_dict(self, tp, cp):
         """Test MTP with different tensor parallel sizes."""
         args = self.create_test_args(tp, cp, self.seq_length, self.micro_batch_size)
@@ -268,9 +268,8 @@ def test_sharded_state_dict(self, tp, cp):
         not HAVE_TE or not is_te_min_version("2.1.0"),
         reason="grouped_gemm requires TransformerEngine >= 2.1.0",
     )
-    @pytest.mark.parametrize("full_recompute", [False, True])
     @pytest.mark.parametrize(
-        ("tp", "cp"), [(1, 1), (1, 2), (1, 4), (2, 1), (2, 2), (2, 4), (4, 1), (4, 2)]
+        ("tp", "cp", "full_recompute"), [(1, 1, False), (1, 4, False), (2, 4, False), (4, 1, True)]
     )
     def test_forward_backward(self, tmp_path_dist_ckpt, tp, cp, full_recompute):
         """Test MTP forward and backward with gptmodel."""

From a48a416c14760bbe606b45e88f9798fd8b288654 Mon Sep 17 00:00:00 2001
From: Zijie Yan <zijiey@nvidia.com>
Date: Tue, 21 Oct 2025 08:26:25 +0000
Subject: [PATCH 030/334] Clean up functional test

---
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../golden_values_dev_dgx_h100.json           | 110 ++--
 .../golden_values_dev_dgxh100_coreweave.json  | 500 +++++++++---------
 .../golden_values_dev_dgxh100_eos.json        | 500 +++++++++---------
 .../model_config.yaml                         |   5 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   5 +-
 .../golden_values_dev_dgxh100_coreweave.json  | 498 ++++++++---------
 .../golden_values_dev_dgxh100_eos.json        | 498 ++++++++---------
 .../model_config.yaml                         |  11 +-
 .../golden_values_dev_dgxh100_coreweave.json  | 344 ------------
 .../golden_values_dev_dgxh100_eos.json        | 344 ------------
 .../model_config.yaml                         |   5 +-
 tests/test_utils/recipes/moe.yaml             |  23 +-
 16 files changed, 1089 insertions(+), 1770 deletions(-)
 delete mode 100644 tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_selective_recompute_experimental/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_selective_recompute_experimental/golden_values_dev_dgxh100_eos.json

diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/model_config.yaml
index dc19a6c7698..2354ecd7fd9 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/model_config.yaml
@@ -33,7 +33,7 @@ MODEL_ARGS:
   --clip-grad: 1.0
   --lr-warmup-fraction: .01
   --log-interval: 1
-  --save-interval: 25
+  --save-interval: 10000
   --eval-interval: 1000
   --eval-iters: 10
   --transformer-impl: transformer_engine
@@ -57,4 +57,4 @@ MODEL_ARGS:
   --no-bias-gelu-fusion: true
   --log-memory-to-tensorboard: true
   --use-tp-pp-dp-mapping: true
-TEST_TYPE: ckpt-resume
+TEST_TYPE: regular
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/model_config.yaml
index 30c921c6feb..7c0a103200a 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/model_config.yaml
@@ -33,7 +33,7 @@ MODEL_ARGS:
   --clip-grad: 1.0
   --lr-warmup-fraction: .01
   --log-interval: 1
-  --save-interval: 25
+  --save-interval: 10000
   --eval-interval: 1000
   --eval-iters: 10
   --transformer-impl: local
@@ -56,4 +56,4 @@ MODEL_ARGS:
   --disable-bias-linear: true
   --no-bias-gelu-fusion: true
   --log-memory-to-tensorboard: true
-TEST_TYPE: ckpt-resume
+TEST_TYPE: regular
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgx_h100.json
index 5f29261761b..d06b2b1d235 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgx_h100.json
@@ -4,17 +4,17 @@
         "end_step": 50,
         "step_interval": 5,
         "values": {
-            "1": 11.04737,
-            "5": 9.52647,
-            "10": 9.05826,
-            "15": 8.04442,
-            "20": 7.89153,
-            "25": 7.67197,
-            "30": 7.64284,
-            "35": 7.2114,
-            "40": 7.54179,
-            "45": 7.18472,
-            "50": 7.03329
+            "1": 11.04748,
+            "5": 9.53583,
+            "10": 9.0567,
+            "15": 8.0476,
+            "20": 7.89868,
+            "25": 7.67579,
+            "30": 7.64391,
+            "35": 7.20998,
+            "40": 7.54446,
+            "45": 7.18755,
+            "50": 7.03602
         }
     },
     "num-zeros": {
@@ -22,17 +22,17 @@
         "end_step": 50,
         "step_interval": 5,
         "values": {
-            "1": 38802604.0,
-            "5": 252879712.0,
-            "10": 728514944.0,
-            "15": 711699968.0,
-            "20": 992357632.0,
-            "25": 884068160.0,
-            "30": 794514496.0,
-            "35": 712491648.0,
-            "40": 588410624.0,
-            "45": 521081920.0,
-            "50": 432013312.0
+            "1": 38802612.0,
+            "5": 259189728.0,
+            "10": 744257088.0,
+            "15": 724250816.0,
+            "20": 989207936.0,
+            "25": 843170688.0,
+            "30": 775645184.0,
+            "35": 737655104.0,
+            "40": 607288512.0,
+            "45": 514790528.0,
+            "50": 303063296.0
         }
     },
     "mem-allocated-bytes": {
@@ -58,17 +58,17 @@
         "end_step": 50,
         "step_interval": 5,
         "values": {
-            "1": 22860046336.0,
-            "5": 25729300480.0,
-            "10": 25729300480.0,
-            "15": 25888860160.0,
-            "20": 25888860160.0,
-            "25": 25888860160.0,
-            "30": 25888860160.0,
-            "35": 25888860160.0,
-            "40": 26620856320.0,
-            "45": 26620856320.0,
-            "50": 26620856320.0
+            "1": 55055331328.0,
+            "5": 57918455808.0,
+            "10": 57918455808.0,
+            "15": 57931390976.0,
+            "20": 57931390976.0,
+            "25": 57931390976.0,
+            "30": 57931390976.0,
+            "35": 58003226624.0,
+            "40": 58003226624.0,
+            "45": 58234208256.0,
+            "50": 58780934144.0
         }
     },
     "mtp_1 loss": {
@@ -76,17 +76,17 @@
         "end_step": 50,
         "step_interval": 5,
         "values": {
-            "1": 11.07644,
-            "5": 9.81173,
-            "10": 9.12712,
-            "15": 7.99147,
-            "20": 7.82967,
-            "25": 7.61319,
-            "30": 7.58479,
-            "35": 7.15178,
-            "40": 7.47349,
-            "45": 7.12034,
-            "50": 6.97212
+            "1": 11.07654,
+            "5": 9.81154,
+            "10": 9.127,
+            "15": 7.99077,
+            "20": 7.82933,
+            "25": 7.61578,
+            "30": 7.58618,
+            "35": 7.15224,
+            "40": 7.47408,
+            "45": 7.11969,
+            "50": 6.9735
         }
     },
     "iteration-time": {
@@ -94,17 +94,17 @@
         "end_step": 50,
         "step_interval": 5,
         "values": {
-            "1": 59.91943,
-            "5": 2.44769,
-            "10": 1.07968,
-            "15": 1.04699,
-            "20": 0.93032,
-            "25": 0.92301,
-            "30": 0.92916,
-            "35": 0.94157,
-            "40": 0.95917,
-            "45": 0.94382,
-            "50": 0.94866
+            "1": 71.27032,
+            "5": 2.09978,
+            "10": 1.95997,
+            "15": 1.137,
+            "20": 1.13455,
+            "25": 1.13415,
+            "30": 1.15078,
+            "35": 1.15064,
+            "40": 1.13889,
+            "45": 1.124,
+            "50": 1.13608
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgxh100_coreweave.json
index 17dce39fb21..0f2637a9511 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgxh100_coreweave.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgxh100_coreweave.json
@@ -4,56 +4,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 11.04737,
-            "2": 11.03581,
-            "3": 9.58839,
-            "4": 9.258,
-            "5": 9.52647,
-            "6": 9.907,
-            "7": 9.48764,
-            "8": 8.94128,
-            "9": 8.65518,
-            "10": 9.05826,
-            "11": 8.49585,
-            "12": 8.52509,
-            "13": 8.4535,
-            "14": 7.97148,
-            "15": 8.04442,
-            "16": 8.08093,
-            "17": 8.08585,
-            "18": 7.76263,
-            "19": 8.14979,
-            "20": 7.89153,
-            "21": 7.57836,
-            "22": 7.54353,
-            "23": 7.43311,
-            "24": 7.42342,
-            "25": 7.67197,
-            "26": 7.07162,
-            "27": 7.6134,
-            "28": 7.31484,
-            "29": 7.48975,
-            "30": 7.64284,
-            "31": 7.39141,
-            "32": 7.58528,
-            "33": 7.6358,
-            "34": 7.69534,
-            "35": 7.2114,
-            "36": 7.08322,
-            "37": 7.42539,
-            "38": 7.18849,
-            "39": 7.5489,
-            "40": 7.54179,
-            "41": 7.48887,
-            "42": 7.24738,
-            "43": 7.2341,
-            "44": 7.41462,
-            "45": 7.18472,
-            "46": 6.89672,
-            "47": 7.30005,
-            "48": 7.14262,
-            "49": 7.58803,
-            "50": 7.03329
+            "1": 11.04748,
+            "2": 11.03561,
+            "3": 9.58774,
+            "4": 9.25819,
+            "5": 9.53583,
+            "6": 9.8804,
+            "7": 9.48247,
+            "8": 8.93575,
+            "9": 8.65813,
+            "10": 9.0567,
+            "11": 8.49445,
+            "12": 8.52444,
+            "13": 8.45239,
+            "14": 7.97323,
+            "15": 8.0476,
+            "16": 8.07971,
+            "17": 8.09081,
+            "18": 7.76437,
+            "19": 8.14892,
+            "20": 7.89868,
+            "21": 7.59371,
+            "22": 7.54743,
+            "23": 7.43222,
+            "24": 7.4302,
+            "25": 7.67579,
+            "26": 7.06929,
+            "27": 7.62041,
+            "28": 7.32495,
+            "29": 7.49042,
+            "30": 7.64391,
+            "31": 7.39435,
+            "32": 7.58789,
+            "33": 7.64037,
+            "34": 7.69778,
+            "35": 7.20998,
+            "36": 7.08538,
+            "37": 7.42584,
+            "38": 7.18804,
+            "39": 7.55054,
+            "40": 7.54446,
+            "41": 7.49287,
+            "42": 7.24937,
+            "43": 7.23587,
+            "44": 7.41595,
+            "45": 7.18755,
+            "46": 6.89949,
+            "47": 7.29966,
+            "48": 7.14134,
+            "49": 7.58963,
+            "50": 7.03602
         }
     },
     "num-zeros": {
@@ -61,56 +61,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 38802604.0,
-            "2": 38543572.0,
-            "3": 38739364.0,
-            "4": 283087744.0,
-            "5": 252879712.0,
-            "6": 261986800.0,
-            "7": 595325120.0,
-            "8": 778328192.0,
-            "9": 667827904.0,
-            "10": 728514944.0,
-            "11": 718857664.0,
-            "12": 778200448.0,
-            "13": 884592256.0,
-            "14": 846830080.0,
-            "15": 711699968.0,
-            "16": 929099456.0,
-            "17": 718131072.0,
-            "18": 690071360.0,
-            "19": 944853824.0,
-            "20": 992357632.0,
-            "21": 794133440.0,
-            "22": 909975808.0,
-            "23": 919936064.0,
-            "24": 895588736.0,
-            "25": 884068160.0,
-            "26": 869339392.0,
-            "27": 857232640.0,
-            "28": 846888320.0,
-            "29": 821245440.0,
-            "30": 794514496.0,
-            "31": 756025600.0,
-            "32": 762315264.0,
-            "33": 759280512.0,
-            "34": 759373696.0,
-            "35": 712491648.0,
-            "36": 677834240.0,
-            "37": 632307392.0,
-            "38": 614655616.0,
-            "39": 607761664.0,
-            "40": 588410624.0,
-            "41": 582593792.0,
-            "42": 573377664.0,
-            "43": 579927552.0,
-            "44": 579405952.0,
-            "45": 521081920.0,
-            "46": 488627232.0,
-            "47": 478708544.0,
-            "48": 475807040.0,
-            "49": 450025824.0,
-            "50": 432013312.0
+            "1": 38802612.0,
+            "2": 38543592.0,
+            "3": 38739528.0,
+            "4": 279937824.0,
+            "5": 259189728.0,
+            "6": 271446400.0,
+            "7": 604773504.0,
+            "8": 768892544.0,
+            "9": 645824128.0,
+            "10": 744257088.0,
+            "11": 718888576.0,
+            "12": 746732544.0,
+            "13": 871990976.0,
+            "14": 821645632.0,
+            "15": 724250816.0,
+            "16": 932241472.0,
+            "17": 648958912.0,
+            "18": 649120000.0,
+            "19": 925992960.0,
+            "20": 989207936.0,
+            "21": 819324096.0,
+            "22": 736955072.0,
+            "23": 910497792.0,
+            "24": 876716672.0,
+            "25": 843170688.0,
+            "26": 809573824.0,
+            "27": 854086912.0,
+            "28": 802857664.0,
+            "29": 805523328.0,
+            "30": 775645184.0,
+            "31": 771754624.0,
+            "32": 749733696.0,
+            "33": 718385216.0,
+            "34": 724771200.0,
+            "35": 737655104.0,
+            "36": 690419968.0,
+            "37": 673203456.0,
+            "38": 627239552.0,
+            "39": 614047168.0,
+            "40": 607288512.0,
+            "41": 582590592.0,
+            "42": 548211200.0,
+            "43": 532740640.0,
+            "44": 554239168.0,
+            "45": 514790528.0,
+            "46": 350258560.0,
+            "47": 472420128.0,
+            "48": 453788736.0,
+            "49": 440597216.0,
+            "50": 303063296.0
         }
     },
     "mem-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 22860046336.0,
-            "2": 25612713984.0,
-            "3": 25729300480.0,
-            "4": 25729300480.0,
-            "5": 25729300480.0,
-            "6": 25729300480.0,
-            "7": 25729300480.0,
-            "8": 25729300480.0,
-            "9": 25729300480.0,
-            "10": 25729300480.0,
-            "11": 25729300480.0,
-            "12": 25729300480.0,
-            "13": 25888860160.0,
-            "14": 25888860160.0,
-            "15": 25888860160.0,
-            "16": 25888860160.0,
-            "17": 25888860160.0,
-            "18": 25888860160.0,
-            "19": 25888860160.0,
-            "20": 25888860160.0,
-            "21": 25888860160.0,
-            "22": 25888860160.0,
-            "23": 25888860160.0,
-            "24": 25888860160.0,
-            "25": 25888860160.0,
-            "26": 25888860160.0,
-            "27": 25888860160.0,
-            "28": 25888860160.0,
-            "29": 25888860160.0,
-            "30": 25888860160.0,
-            "31": 25888860160.0,
-            "32": 25888860160.0,
-            "33": 25888860160.0,
-            "34": 25888860160.0,
-            "35": 25888860160.0,
-            "36": 25888860160.0,
-            "37": 25888860160.0,
-            "38": 26026612736.0,
-            "39": 26610898944.0,
-            "40": 26620856320.0,
-            "41": 26620856320.0,
-            "42": 26620856320.0,
-            "43": 26620856320.0,
-            "44": 26620856320.0,
-            "45": 26620856320.0,
-            "46": 26620856320.0,
-            "47": 26620856320.0,
-            "48": 26620856320.0,
-            "49": 26620856320.0,
-            "50": 26620856320.0
+            "1": 55055331328.0,
+            "2": 57809321984.0,
+            "3": 57918455808.0,
+            "4": 57918455808.0,
+            "5": 57918455808.0,
+            "6": 57918455808.0,
+            "7": 57918455808.0,
+            "8": 57918455808.0,
+            "9": 57918455808.0,
+            "10": 57918455808.0,
+            "11": 57918455808.0,
+            "12": 57918455808.0,
+            "13": 57931390976.0,
+            "14": 57931390976.0,
+            "15": 57931390976.0,
+            "16": 57931390976.0,
+            "17": 57931390976.0,
+            "18": 57931390976.0,
+            "19": 57931390976.0,
+            "20": 57931390976.0,
+            "21": 57931390976.0,
+            "22": 57931390976.0,
+            "23": 57931390976.0,
+            "24": 57931390976.0,
+            "25": 57931390976.0,
+            "26": 57931390976.0,
+            "27": 57931390976.0,
+            "28": 57931390976.0,
+            "29": 57931390976.0,
+            "30": 57931390976.0,
+            "31": 57931390976.0,
+            "32": 58003226624.0,
+            "33": 58003226624.0,
+            "34": 58003226624.0,
+            "35": 58003226624.0,
+            "36": 58003226624.0,
+            "37": 58003226624.0,
+            "38": 58003226624.0,
+            "39": 58003226624.0,
+            "40": 58003226624.0,
+            "41": 58003226624.0,
+            "42": 58003226624.0,
+            "43": 58003226624.0,
+            "44": 58183614464.0,
+            "45": 58234208256.0,
+            "46": 58555555840.0,
+            "47": 58555555840.0,
+            "48": 58555555840.0,
+            "49": 58555555840.0,
+            "50": 58780934144.0
         }
     },
     "mtp_1 loss": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 11.07644,
-            "2": 11.07413,
-            "3": 10.53865,
-            "4": 10.09826,
-            "5": 9.81173,
-            "6": 10.07241,
-            "7": 9.79857,
-            "8": 9.07114,
-            "9": 8.86995,
-            "10": 9.12712,
-            "11": 8.49873,
-            "12": 8.53173,
-            "13": 8.426,
-            "14": 7.84827,
-            "15": 7.99147,
-            "16": 8.05097,
-            "17": 8.00164,
-            "18": 7.73164,
-            "19": 8.11121,
-            "20": 7.82967,
-            "21": 7.52376,
-            "22": 7.49787,
-            "23": 7.3697,
-            "24": 7.37154,
-            "25": 7.61319,
-            "26": 7.02025,
-            "27": 7.559,
-            "28": 7.26735,
-            "29": 7.44367,
-            "30": 7.58479,
-            "31": 7.32416,
-            "32": 7.50469,
-            "33": 7.56964,
-            "34": 7.63474,
-            "35": 7.15178,
-            "36": 7.01748,
-            "37": 7.34976,
-            "38": 7.12419,
-            "39": 7.4868,
-            "40": 7.47349,
-            "41": 7.42217,
-            "42": 7.17743,
-            "43": 7.16238,
-            "44": 7.34394,
-            "45": 7.12034,
-            "46": 6.82708,
-            "47": 7.235,
-            "48": 7.07985,
-            "49": 7.51123,
-            "50": 6.97212
+            "1": 11.07654,
+            "2": 11.07406,
+            "3": 10.53881,
+            "4": 10.09803,
+            "5": 9.81154,
+            "6": 10.06236,
+            "7": 9.79762,
+            "8": 9.07117,
+            "9": 8.87049,
+            "10": 9.127,
+            "11": 8.49853,
+            "12": 8.53046,
+            "13": 8.42444,
+            "14": 7.847,
+            "15": 7.99077,
+            "16": 8.05015,
+            "17": 8.00064,
+            "18": 7.73104,
+            "19": 8.11087,
+            "20": 7.82933,
+            "21": 7.52501,
+            "22": 7.49916,
+            "23": 7.36982,
+            "24": 7.37235,
+            "25": 7.61578,
+            "26": 7.02029,
+            "27": 7.56014,
+            "28": 7.2681,
+            "29": 7.44399,
+            "30": 7.58618,
+            "31": 7.32468,
+            "32": 7.50596,
+            "33": 7.5715,
+            "34": 7.63581,
+            "35": 7.15224,
+            "36": 7.01784,
+            "37": 7.35163,
+            "38": 7.12551,
+            "39": 7.48656,
+            "40": 7.47408,
+            "41": 7.42096,
+            "42": 7.17595,
+            "43": 7.16059,
+            "44": 7.34289,
+            "45": 7.11969,
+            "46": 6.82753,
+            "47": 7.23525,
+            "48": 7.08042,
+            "49": 7.51043,
+            "50": 6.9735
         }
     },
     "iteration-time": {
@@ -289,56 +289,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 63.23561,
-            "2": 1.12406,
-            "3": 0.92471,
-            "4": 1.95991,
-            "5": 1.98896,
-            "6": 1.40765,
-            "7": 1.83926,
-            "8": 1.3919,
-            "9": 1.58886,
-            "10": 0.76479,
-            "11": 0.74358,
-            "12": 0.74438,
-            "13": 0.75457,
-            "14": 0.74884,
-            "15": 0.7437,
-            "16": 0.81872,
-            "17": 0.74739,
-            "18": 0.75196,
-            "19": 0.76647,
-            "20": 0.74522,
-            "21": 0.73871,
-            "22": 0.73978,
-            "23": 0.73654,
-            "24": 0.73919,
-            "25": 0.73709,
-            "26": 0.78913,
-            "27": 0.75434,
-            "28": 0.7477,
-            "29": 0.73673,
-            "30": 0.74952,
-            "31": 0.75513,
-            "32": 0.74212,
-            "33": 0.74433,
-            "34": 0.74812,
-            "35": 0.7512,
-            "36": 0.74822,
-            "37": 0.74176,
-            "38": 0.7553,
-            "39": 0.77677,
-            "40": 0.76693,
-            "41": 0.76205,
-            "42": 0.76182,
-            "43": 0.76665,
-            "44": 0.76169,
-            "45": 0.74735,
-            "46": 0.74195,
-            "47": 0.75025,
-            "48": 0.74129,
-            "49": 0.74367,
-            "50": 0.74308
+            "1": 69.29797,
+            "2": 1.7261,
+            "3": 1.40981,
+            "4": 2.16562,
+            "5": 1.7862,
+            "6": 1.7469,
+            "7": 1.96688,
+            "8": 1.97301,
+            "9": 1.74665,
+            "10": 1.69613,
+            "11": 1.02979,
+            "12": 1.02408,
+            "13": 1.03261,
+            "14": 1.02432,
+            "15": 1.0529,
+            "16": 1.04491,
+            "17": 1.03693,
+            "18": 1.03399,
+            "19": 1.03627,
+            "20": 1.02284,
+            "21": 1.01667,
+            "22": 1.02932,
+            "23": 1.03591,
+            "24": 1.03466,
+            "25": 1.03149,
+            "26": 1.03165,
+            "27": 1.02342,
+            "28": 1.03777,
+            "29": 1.04061,
+            "30": 1.05641,
+            "31": 1.02382,
+            "32": 1.01775,
+            "33": 1.03039,
+            "34": 1.03693,
+            "35": 1.03153,
+            "36": 1.02699,
+            "37": 1.02756,
+            "38": 1.02919,
+            "39": 1.01773,
+            "40": 1.03491,
+            "41": 1.03152,
+            "42": 1.03035,
+            "43": 1.0221,
+            "44": 1.05201,
+            "45": 1.02579,
+            "46": 1.02798,
+            "47": 1.03857,
+            "48": 1.02772,
+            "49": 1.0408,
+            "50": 1.03745
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgxh100_eos.json
index f95a91d4ff2..b3668b31178 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgxh100_eos.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgxh100_eos.json
@@ -4,56 +4,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 11.04737,
-            "2": 11.03581,
-            "3": 9.58845,
-            "4": 9.25804,
-            "5": 9.54964,
-            "6": 9.8667,
-            "7": 9.47894,
-            "8": 8.92828,
-            "9": 8.66752,
-            "10": 9.05851,
-            "11": 8.49951,
-            "12": 8.52674,
-            "13": 8.45287,
-            "14": 7.99202,
-            "15": 8.05428,
-            "16": 8.08384,
-            "17": 8.09398,
-            "18": 7.76937,
-            "19": 8.14784,
-            "20": 7.88774,
-            "21": 7.58582,
-            "22": 7.5453,
-            "23": 7.4272,
-            "24": 7.42741,
-            "25": 7.67702,
-            "26": 7.06883,
-            "27": 7.61756,
-            "28": 7.33112,
-            "29": 7.49469,
-            "30": 7.6427,
-            "31": 7.39392,
-            "32": 7.58751,
-            "33": 7.64167,
-            "34": 7.70181,
-            "35": 7.21084,
-            "36": 7.08821,
-            "37": 7.42759,
-            "38": 7.19136,
-            "39": 7.55273,
-            "40": 7.54649,
-            "41": 7.49652,
-            "42": 7.25161,
-            "43": 7.2371,
-            "44": 7.41599,
-            "45": 7.19163,
-            "46": 6.90225,
-            "47": 7.30109,
-            "48": 7.14398,
-            "49": 7.59284,
-            "50": 7.03691
+            "1": 11.04748,
+            "2": 11.03561,
+            "3": 9.58773,
+            "4": 9.25819,
+            "5": 9.52742,
+            "6": 9.87911,
+            "7": 9.48366,
+            "8": 8.93879,
+            "9": 8.6551,
+            "10": 9.10915,
+            "11": 8.51806,
+            "12": 8.54732,
+            "13": 8.48144,
+            "14": 8.05312,
+            "15": 8.10118,
+            "16": 8.10344,
+            "17": 8.08878,
+            "18": 7.78589,
+            "19": 8.15794,
+            "20": 7.88069,
+            "21": 7.58542,
+            "22": 7.54895,
+            "23": 7.4296,
+            "24": 7.41901,
+            "25": 7.67277,
+            "26": 7.07835,
+            "27": 7.61157,
+            "28": 7.31513,
+            "29": 7.49487,
+            "30": 7.64287,
+            "31": 7.39102,
+            "32": 7.59148,
+            "33": 7.6393,
+            "34": 7.70086,
+            "35": 7.2119,
+            "36": 7.08623,
+            "37": 7.43064,
+            "38": 7.18999,
+            "39": 7.5525,
+            "40": 7.54961,
+            "41": 7.49385,
+            "42": 7.25481,
+            "43": 7.24066,
+            "44": 7.42131,
+            "45": 7.19201,
+            "46": 6.90547,
+            "47": 7.30704,
+            "48": 7.15325,
+            "49": 7.60504,
+            "50": 7.04512
         }
     },
     "num-zeros": {
@@ -61,56 +61,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 38802620.0,
-            "2": 38543572.0,
-            "3": 38741428.0,
-            "4": 283089696.0,
-            "5": 256049008.0,
-            "6": 261995024.0,
-            "7": 601623744.0,
-            "8": 775170304.0,
-            "9": 645831808.0,
-            "10": 728519104.0,
-            "11": 740861312.0,
-            "12": 743565504.0,
-            "13": 893967040.0,
-            "14": 963173120.0,
-            "15": 746290304.0,
-            "16": 938543360.0,
-            "17": 730738816.0,
-            "18": 671172416.0,
-            "19": 922829888.0,
-            "20": 948314368.0,
-            "21": 778417216.0,
-            "22": 938284544.0,
-            "23": 926223744.0,
-            "24": 917606784.0,
-            "25": 918668992.0,
-            "26": 866192768.0,
-            "27": 866673856.0,
-            "28": 856325760.0,
-            "29": 836978240.0,
-            "30": 800803136.0,
-            "31": 790628096.0,
-            "32": 756030016.0,
-            "33": 734117312.0,
-            "34": 734209792.0,
-            "35": 731364736.0,
-            "36": 690416960.0,
-            "37": 679491584.0,
-            "38": 639823360.0,
-            "39": 632918272.0,
-            "40": 610431680.0,
-            "41": 598315904.0,
-            "42": 576523840.0,
-            "43": 406952768.0,
-            "44": 569968896.0,
-            "45": 539956736.0,
-            "46": 365988928.0,
-            "47": 503877472.0,
-            "48": 500972512.0,
-            "49": 478340480.0,
-            "50": 457181248.0
+            "1": 38802612.0,
+            "2": 38543592.0,
+            "3": 38739480.0,
+            "4": 279954336.0,
+            "5": 249745312.0,
+            "6": 268288496.0,
+            "7": 604756224.0,
+            "8": 781485184.0,
+            "9": 636362112.0,
+            "10": 653025216.0,
+            "11": 668551168.0,
+            "12": 765583616.0,
+            "13": 815362944.0,
+            "14": 834270656.0,
+            "15": 755756096.0,
+            "16": 995153536.0,
+            "17": 938291584.0,
+            "18": 721524928.0,
+            "19": 756173504.0,
+            "20": 901129600.0,
+            "21": 721816384.0,
+            "22": 831311872.0,
+            "23": 803536768.0,
+            "24": 628253248.0,
+            "25": 663895680.0,
+            "26": 847321664.0,
+            "27": 828927424.0,
+            "28": 777678976.0,
+            "29": 764628608.0,
+            "30": 781930112.0,
+            "31": 771767616.0,
+            "32": 771755392.0,
+            "33": 586323648.0,
+            "34": 734207552.0,
+            "35": 690468480.0,
+            "36": 485982688.0,
+            "37": 506506336.0,
+            "38": 642964160.0,
+            "39": 661240000.0,
+            "40": 645048768.0,
+            "41": 636072704.0,
+            "42": 491645856.0,
+            "43": 601942528.0,
+            "44": 623448960.0,
+            "45": 539959424.0,
+            "46": 532669088.0,
+            "47": 529039680.0,
+            "48": 504121984.0,
+            "49": 478344480.0,
+            "50": 331385728.0
         }
     },
     "mem-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 22860046336.0,
-            "2": 25612713984.0,
-            "3": 25730244608.0,
-            "4": 25730244608.0,
-            "5": 25730244608.0,
-            "6": 25730244608.0,
-            "7": 25730244608.0,
-            "8": 25730244608.0,
-            "9": 25730244608.0,
-            "10": 25730244608.0,
-            "11": 25730244608.0,
-            "12": 25730244608.0,
-            "13": 26180298752.0,
-            "14": 26180298752.0,
-            "15": 26180298752.0,
-            "16": 26180298752.0,
-            "17": 26180298752.0,
-            "18": 26180298752.0,
-            "19": 26180298752.0,
-            "20": 26180298752.0,
-            "21": 26180298752.0,
-            "22": 26180298752.0,
-            "23": 26180298752.0,
-            "24": 26180298752.0,
-            "25": 26180298752.0,
-            "26": 26180298752.0,
-            "27": 26180298752.0,
-            "28": 26180298752.0,
-            "29": 26180298752.0,
-            "30": 26180298752.0,
-            "31": 26180298752.0,
-            "32": 26180298752.0,
-            "33": 26180298752.0,
-            "34": 26180298752.0,
-            "35": 26180298752.0,
-            "36": 26180298752.0,
-            "37": 26180298752.0,
-            "38": 26180298752.0,
-            "39": 26180298752.0,
-            "40": 26180298752.0,
-            "41": 26180298752.0,
-            "42": 26180298752.0,
-            "43": 26180298752.0,
-            "44": 26180298752.0,
-            "45": 26180298752.0,
-            "46": 26180298752.0,
-            "47": 26180298752.0,
-            "48": 26180298752.0,
-            "49": 26180298752.0,
-            "50": 26180298752.0
+            "1": 55055331328.0,
+            "2": 57809321984.0,
+            "3": 57919823872.0,
+            "4": 57919823872.0,
+            "5": 57919823872.0,
+            "6": 57919823872.0,
+            "7": 57919823872.0,
+            "8": 57919823872.0,
+            "9": 57919823872.0,
+            "10": 57919823872.0,
+            "11": 57919823872.0,
+            "12": 57919823872.0,
+            "13": 57932275712.0,
+            "14": 57932275712.0,
+            "15": 57932275712.0,
+            "16": 57932275712.0,
+            "17": 57932275712.0,
+            "18": 57932275712.0,
+            "19": 57932275712.0,
+            "20": 57932275712.0,
+            "21": 57932275712.0,
+            "22": 57932275712.0,
+            "23": 57932275712.0,
+            "24": 57932275712.0,
+            "25": 57932275712.0,
+            "26": 57932275712.0,
+            "27": 57932275712.0,
+            "28": 57932275712.0,
+            "29": 57932275712.0,
+            "30": 57932275712.0,
+            "31": 57932275712.0,
+            "32": 57932275712.0,
+            "33": 57932275712.0,
+            "34": 57932275712.0,
+            "35": 57932275712.0,
+            "36": 57932275712.0,
+            "37": 57932275712.0,
+            "38": 57932275712.0,
+            "39": 57932275712.0,
+            "40": 57932275712.0,
+            "41": 57932275712.0,
+            "42": 57932275712.0,
+            "43": 57932275712.0,
+            "44": 57932275712.0,
+            "45": 57932275712.0,
+            "46": 57932275712.0,
+            "47": 57932275712.0,
+            "48": 57932275712.0,
+            "49": 57932275712.0,
+            "50": 57932275712.0
         }
     },
     "mtp_1 loss": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 11.07644,
-            "2": 11.07413,
-            "3": 10.53858,
-            "4": 10.0983,
-            "5": 9.8117,
-            "6": 10.05948,
-            "7": 9.79869,
-            "8": 9.0727,
-            "9": 8.87366,
-            "10": 9.12893,
-            "11": 8.49884,
-            "12": 8.52992,
-            "13": 8.42414,
-            "14": 7.84688,
-            "15": 7.99135,
-            "16": 8.05047,
-            "17": 8.0004,
-            "18": 7.73069,
-            "19": 8.11023,
-            "20": 7.82948,
-            "21": 7.51921,
-            "22": 7.49606,
-            "23": 7.37196,
-            "24": 7.37047,
-            "25": 7.61349,
-            "26": 7.01867,
-            "27": 7.5586,
-            "28": 7.26599,
-            "29": 7.44466,
-            "30": 7.58701,
-            "31": 7.32783,
-            "32": 7.50657,
-            "33": 7.56866,
-            "34": 7.63344,
-            "35": 7.15071,
-            "36": 7.01674,
-            "37": 7.34958,
-            "38": 7.12576,
-            "39": 7.48596,
-            "40": 7.47304,
-            "41": 7.41897,
-            "42": 7.17558,
-            "43": 7.16122,
-            "44": 7.34251,
-            "45": 7.12147,
-            "46": 6.82911,
-            "47": 7.23414,
-            "48": 7.07998,
-            "49": 7.51108,
-            "50": 6.9741
+            "1": 11.07654,
+            "2": 11.07406,
+            "3": 10.53883,
+            "4": 10.09801,
+            "5": 9.81156,
+            "6": 10.06025,
+            "7": 9.7962,
+            "8": 9.06987,
+            "9": 8.86879,
+            "10": 9.13393,
+            "11": 8.5017,
+            "12": 8.54094,
+            "13": 8.43678,
+            "14": 7.85637,
+            "15": 7.99846,
+            "16": 8.05889,
+            "17": 8.01134,
+            "18": 7.73929,
+            "19": 8.1188,
+            "20": 7.83458,
+            "21": 7.53103,
+            "22": 7.50125,
+            "23": 7.37135,
+            "24": 7.37419,
+            "25": 7.61596,
+            "26": 7.01586,
+            "27": 7.55739,
+            "28": 7.26274,
+            "29": 7.43991,
+            "30": 7.58436,
+            "31": 7.32289,
+            "32": 7.50362,
+            "33": 7.56884,
+            "34": 7.6339,
+            "35": 7.151,
+            "36": 7.01725,
+            "37": 7.35013,
+            "38": 7.12483,
+            "39": 7.48708,
+            "40": 7.47451,
+            "41": 7.4181,
+            "42": 7.17557,
+            "43": 7.15957,
+            "44": 7.34227,
+            "45": 7.12176,
+            "46": 6.82526,
+            "47": 7.23374,
+            "48": 7.07893,
+            "49": 7.5077,
+            "50": 6.97094
         }
     },
     "iteration-time": {
@@ -289,56 +289,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 57.89597,
-            "2": 1.02226,
-            "3": 0.91676,
-            "4": 1.99588,
-            "5": 2.00486,
-            "6": 1.51451,
-            "7": 1.1193,
-            "8": 1.44004,
-            "9": 1.59872,
-            "10": 0.77647,
-            "11": 0.76373,
-            "12": 0.78131,
-            "13": 0.77869,
-            "14": 0.76703,
-            "15": 1.37612,
-            "16": 0.78402,
-            "17": 0.78337,
-            "18": 0.78947,
-            "19": 0.77286,
-            "20": 0.76873,
-            "21": 0.76722,
-            "22": 0.76847,
-            "23": 0.77301,
-            "24": 0.77475,
-            "25": 0.78165,
-            "26": 0.81166,
-            "27": 1.50584,
-            "28": 0.78435,
-            "29": 0.79046,
-            "30": 0.77828,
-            "31": 0.77039,
-            "32": 0.78392,
-            "33": 0.77294,
-            "34": 0.77717,
-            "35": 0.78379,
-            "36": 0.76722,
-            "37": 0.78405,
-            "38": 0.78584,
-            "39": 0.77423,
-            "40": 0.77729,
-            "41": 0.78273,
-            "42": 0.78119,
-            "43": 0.77474,
-            "44": 0.79851,
-            "45": 0.7826,
-            "46": 0.78586,
-            "47": 0.77961,
-            "48": 0.77947,
-            "49": 0.77944,
-            "50": 0.77976
+            "1": 57.80279,
+            "2": 1.26321,
+            "3": 1.18918,
+            "4": 2.24643,
+            "5": 2.25191,
+            "6": 1.80757,
+            "7": 2.09086,
+            "8": 1.69153,
+            "9": 1.81279,
+            "10": 1.64882,
+            "11": 1.03476,
+            "12": 1.03593,
+            "13": 1.04348,
+            "14": 1.03841,
+            "15": 1.04432,
+            "16": 1.05281,
+            "17": 1.04826,
+            "18": 1.04981,
+            "19": 1.05351,
+            "20": 1.04668,
+            "21": 1.05254,
+            "22": 1.05391,
+            "23": 1.04635,
+            "24": 1.05503,
+            "25": 1.04226,
+            "26": 1.0684,
+            "27": 1.04985,
+            "28": 1.04233,
+            "29": 1.05036,
+            "30": 1.06219,
+            "31": 1.044,
+            "32": 1.05614,
+            "33": 1.05729,
+            "34": 1.05618,
+            "35": 1.06289,
+            "36": 1.05761,
+            "37": 1.05956,
+            "38": 1.06343,
+            "39": 1.06848,
+            "40": 1.06027,
+            "41": 1.05493,
+            "42": 1.05258,
+            "43": 1.04879,
+            "44": 1.04949,
+            "45": 1.05964,
+            "46": 1.04465,
+            "47": 1.0491,
+            "48": 1.05387,
+            "49": 1.05218,
+            "50": 1.05453
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/model_config.yaml
index 0cce9b4edb6..5390afcd09b 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/model_config.yaml
@@ -17,7 +17,8 @@ MODEL_ARGS:
   --use-distributed-optimizer: true
   --overlap-grad-reduce: true
   --overlap-param-gather: true
-  --attention-backend: fused
+  # Use unfused attention since MLA with fused attention and deterministic mode leads to NaN
+  --attention-backend: unfused # TODO: switch back to fused attention after fix
   # Training args
   --use-mcore-models: true
   --sequence-parallel: true
@@ -122,7 +123,7 @@ MODEL_ARGS:
   # Add mixed precision args
   --bf16: true
   --exit-interval: 50
-TEST_TYPE: ckpt-resume
+TEST_TYPE: regular
 METRICS:
   - "iteration-time"
   - "lm loss"
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G/model_config.yaml
index 4e553f2f9ed..19a8b4fc639 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G/model_config.yaml
@@ -33,7 +33,7 @@ MODEL_ARGS:
   --clip-grad: 1.0
   --lr-warmup-fraction: .01
   --log-interval: 1
-  --save-interval: 25
+  --save-interval: 10000
   --eval-interval: 1000
   --eval-iters: 10
   --transformer-impl: transformer_engine
@@ -61,4 +61,4 @@ MODEL_ARGS:
   --attention-backend: unfused
   --no-bias-gelu-fusion: true
   --log-memory-to-tensorboard: true
-TEST_TYPE: ckpt-resume
+TEST_TYPE: regular
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4_dgx_a100_1N8G/model_config.yaml
index 7ba366f1d1b..f27db4a8021 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4_dgx_a100_1N8G/model_config.yaml
@@ -34,7 +34,7 @@ MODEL_ARGS:
   --clip-grad: 1.0
   --lr-warmup-fraction: .01
   --log-interval: 1
-  --save-interval: 25
+  --save-interval: 10000
   --eval-interval: 1000
   --eval-iters: 10
   --transformer-impl: transformer_engine
@@ -63,4 +63,4 @@ MODEL_ARGS:
   --no-bias-gelu-fusion: true
   --log-memory-to-tensorboard: true
   --exit-interval: 50
-TEST_TYPE: ckpt-resume
+TEST_TYPE: regular
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/model_config.yaml
index c920037f0f2..7ebd9f0d1af 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/model_config.yaml
@@ -17,7 +17,8 @@ MODEL_ARGS:
   --use-distributed-optimizer: true
   --overlap-grad-reduce: true
   --overlap-param-gather: true
-  --attention-backend: fused
+  # Use unfused attention since MLA with fused attention and deterministic mode leads to NaN
+  --attention-backend: unfused # TODO: switch back to fused attention after fix
   # Training args
   --use-mcore-models: true
   --sequence-parallel: true
@@ -125,7 +126,7 @@ MODEL_ARGS:
   --fp8-format: hybrid
   --fp8-recipe: tensorwise
   --exit-interval: 50
-TEST_TYPE: ckpt-resume # Usually ckpt-resume, but as a WAR to #513 set to regular
+TEST_TYPE: regular # Usually ckpt-resume, but as a WAR to #513 set to regular
 METRICS:
   - "iteration-time"
   - "lm loss"
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgxh100_coreweave.json
index 7c3cd772f4f..58eb3fc16cd 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgxh100_coreweave.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgxh100_coreweave.json
@@ -4,56 +4,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 10.94947,
-            "2": 10.95236,
-            "3": 10.50817,
-            "4": 9.96373,
-            "5": 9.93907,
+            "1": 10.95004,
+            "2": 10.9521,
+            "3": 10.5115,
+            "4": 9.96454,
+            "5": 9.93941,
             "6": 9.67273,
-            "7": 10.2137,
-            "8": 9.4963,
-            "9": 9.56483,
-            "10": 9.7979,
-            "11": 9.30107,
-            "12": 9.40465,
-            "13": 9.39581,
-            "14": 8.84796,
-            "15": 9.02503,
-            "16": 9.07162,
-            "17": 9.04638,
-            "18": 8.75696,
-            "19": 9.18152,
-            "20": 8.86295,
-            "21": 8.5361,
-            "22": 8.55339,
-            "23": 8.42711,
-            "24": 8.37747,
-            "25": 8.64415,
-            "26": 7.97441,
-            "27": 8.56675,
-            "28": 8.19618,
-            "29": 8.39325,
-            "30": 8.67137,
-            "31": 8.28979,
-            "32": 8.43623,
-            "33": 8.55717,
-            "34": 8.6598,
-            "35": 8.07929,
-            "36": 7.94958,
-            "37": 8.29465,
-            "38": 7.9784,
-            "39": 8.39172,
-            "40": 8.35622,
-            "41": 8.31635,
-            "42": 8.06507,
-            "43": 8.03396,
-            "44": 8.24146,
-            "45": 8.1039,
-            "46": 7.61771,
-            "47": 8.15375,
-            "48": 8.00818,
-            "49": 8.38737,
-            "50": 7.81612
+            "7": 10.20975,
+            "8": 9.49716,
+            "9": 9.55902,
+            "10": 9.79742,
+            "11": 9.30109,
+            "12": 9.40483,
+            "13": 9.39546,
+            "14": 8.84681,
+            "15": 9.02444,
+            "16": 9.07121,
+            "17": 9.04574,
+            "18": 8.75678,
+            "19": 9.18159,
+            "20": 8.8595,
+            "21": 8.53503,
+            "22": 8.55182,
+            "23": 8.42441,
+            "24": 8.37608,
+            "25": 8.64304,
+            "26": 7.97393,
+            "27": 8.56806,
+            "28": 8.19764,
+            "29": 8.3928,
+            "30": 8.67283,
+            "31": 8.289,
+            "32": 8.43572,
+            "33": 8.5568,
+            "34": 8.66018,
+            "35": 8.07934,
+            "36": 7.94976,
+            "37": 8.29565,
+            "38": 7.98044,
+            "39": 8.39201,
+            "40": 8.35513,
+            "41": 8.31876,
+            "42": 8.0583,
+            "43": 8.03283,
+            "44": 8.24243,
+            "45": 8.10277,
+            "46": 7.61696,
+            "47": 8.15273,
+            "48": 8.00569,
+            "49": 8.38688,
+            "50": 7.81491
         }
     },
     "num-zeros": {
@@ -61,56 +61,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 19403784.0,
-            "2": 19274252.0,
-            "3": 19373794.0,
-            "4": 89687600.0,
-            "5": 139124400.0,
-            "6": 138949920.0,
-            "7": 170316512.0,
-            "8": 192665728.0,
-            "9": 168817872.0,
-            "10": 156652864.0,
-            "11": 217935232.0,
-            "12": 213007792.0,
-            "13": 228424704.0,
-            "14": 217442256.0,
-            "15": 237921408.0,
-            "16": 225523072.0,
-            "17": 225458384.0,
-            "18": 164166928.0,
-            "19": 164457904.0,
-            "20": 180124848.0,
-            "21": 230463232.0,
-            "22": 230096384.0,
-            "23": 210054656.0,
-            "24": 200985472.0,
-            "25": 248708512.0,
-            "26": 301000896.0,
-            "27": 205364384.0,
-            "28": 270886048.0,
-            "29": 259695952.0,
-            "30": 224280720.0,
-            "31": 244360992.0,
-            "32": 189382672.0,
-            "33": 231930816.0,
-            "34": 206712432.0,
-            "35": 194319616.0,
-            "36": 246163408.0,
-            "37": 193561968.0,
-            "38": 228822688.0,
-            "39": 226941728.0,
-            "40": 196742032.0,
-            "41": 200179904.0,
-            "42": 219112640.0,
-            "43": 186235920.0,
-            "44": 138763920.0,
-            "45": 148907984.0,
-            "46": 109115896.0,
-            "47": 167015728.0,
-            "48": 156135104.0,
-            "49": 91378480.0,
-            "50": 164099648.0
+            "1": 19403624.0,
+            "2": 19274194.0,
+            "3": 19372760.0,
+            "4": 86525248.0,
+            "5": 148575568.0,
+            "6": 145226704.0,
+            "7": 171879984.0,
+            "8": 195785248.0,
+            "9": 164124752.0,
+            "10": 167684736.0,
+            "11": 221077344.0,
+            "12": 200384224.0,
+            "13": 248872528.0,
+            "14": 211169424.0,
+            "15": 214304608.0,
+            "16": 216075632.0,
+            "17": 267845984.0,
+            "18": 170470336.0,
+            "19": 176865072.0,
+            "20": 187955392.0,
+            "21": 225750704.0,
+            "22": 247396816.0,
+            "23": 211643856.0,
+            "24": 205638464.0,
+            "25": 277022272.0,
+            "26": 291562304.0,
+            "27": 225789840.0,
+            "28": 288202368.0,
+            "29": 198390384.0,
+            "30": 213302208.0,
+            "31": 227204752.0,
+            "32": 271112416.0,
+            "33": 231840432.0,
+            "34": 203575536.0,
+            "35": 191152368.0,
+            "36": 222566928.0,
+            "37": 177810112.0,
+            "38": 228708544.0,
+            "39": 211168784.0,
+            "40": 215603968.0,
+            "41": 200089440.0,
+            "42": 228529888.0,
+            "43": 198782848.0,
+            "44": 141902272.0,
+            "45": 181922816.0,
+            "46": 115369856.0,
+            "47": 170214176.0,
+            "48": 137292832.0,
+            "49": 97654936.0,
+            "50": 160979632.0
         }
     },
     "mem-allocated-bytes": {
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 4751680512.0,
-            "2": 4752032256.0,
-            "3": 4751058432.0,
-            "4": 4751692288.0,
-            "5": 4750785024.0,
-            "6": 4750721536.0,
-            "7": 4750738944.0,
-            "8": 4750471680.0,
-            "9": 4750078464.0,
-            "10": 4750671360.0,
-            "11": 4750662144.0,
-            "12": 4750013952.0,
-            "13": 4750343680.0,
-            "14": 4750866944.0,
-            "15": 4751114752.0,
-            "16": 4754016768.0,
-            "17": 4751645184.0,
-            "18": 4749773312.0,
-            "19": 4751623680.0,
-            "20": 4749661696.0,
-            "21": 4751997440.0,
-            "22": 4751115776.0,
-            "23": 4750557696.0,
-            "24": 4751779328.0,
-            "25": 4750678528.0,
-            "26": 4749646336.0,
-            "27": 4750984704.0,
-            "28": 4752366080.0,
-            "29": 4750876160.0,
-            "30": 4750423552.0,
-            "31": 4750733824.0,
-            "32": 4751212032.0,
-            "33": 4750073344.0,
-            "34": 4751521280.0,
-            "35": 4750867968.0,
-            "36": 4750440960.0,
-            "37": 4750258688.0,
-            "38": 4751287808.0,
-            "39": 4749742592.0,
-            "40": 4750831104.0,
-            "41": 4750516736.0,
-            "42": 4750870016.0,
-            "43": 4750633472.0,
-            "44": 4750676480.0,
-            "45": 4750337536.0,
-            "46": 4751146496.0,
-            "47": 4750629376.0,
-            "48": 4750627328.0,
-            "49": 4751527424.0,
-            "50": 4750583296.0
+            "1": 4883602432.0,
+            "2": 4885017088.0,
+            "3": 4882657792.0,
+            "4": 4883046912.0,
+            "5": 4883725824.0,
+            "6": 4883713536.0,
+            "7": 4883040768.0,
+            "8": 4883273216.0,
+            "9": 4882952704.0,
+            "10": 4885949952.0,
+            "11": 4883990016.0,
+            "12": 4887679488.0,
+            "13": 4884011520.0,
+            "14": 4882899456.0,
+            "15": 4883515904.0,
+            "16": 4883990016.0,
+            "17": 4883410432.0,
+            "18": 4883673600.0,
+            "19": 4882903552.0,
+            "20": 4884541952.0,
+            "21": 4883138048.0,
+            "22": 4883247616.0,
+            "23": 4883839488.0,
+            "24": 4885058048.0,
+            "25": 4882676224.0,
+            "26": 4884058624.0,
+            "27": 4884724224.0,
+            "28": 4884874752.0,
+            "29": 4883127808.0,
+            "30": 4883252736.0,
+            "31": 4882955776.0,
+            "32": 4885190144.0,
+            "33": 4883845632.0,
+            "34": 4884392448.0,
+            "35": 4883083776.0,
+            "36": 4883851776.0,
+            "37": 4885246464.0,
+            "38": 4882680320.0,
+            "39": 4884296192.0,
+            "40": 4884689408.0,
+            "41": 4882836992.0,
+            "42": 4883972608.0,
+            "43": 4884519424.0,
+            "44": 4883354112.0,
+            "45": 4883495424.0,
+            "46": 4882788864.0,
+            "47": 4883144192.0,
+            "48": 4883688960.0,
+            "49": 4884182528.0,
+            "50": 4885279232.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 11458484224.0,
-            "2": 12450223104.0,
-            "3": 12450223104.0,
-            "4": 12450223104.0,
-            "5": 12450223104.0,
-            "6": 12572350464.0,
-            "7": 12815280128.0,
-            "8": 12815280128.0,
-            "9": 13430808576.0,
-            "10": 13558942720.0,
-            "11": 13558942720.0,
-            "12": 13558942720.0,
-            "13": 13558942720.0,
-            "14": 13558942720.0,
-            "15": 13558942720.0,
-            "16": 13558942720.0,
-            "17": 13558942720.0,
-            "18": 13558942720.0,
-            "19": 13558942720.0,
-            "20": 13558942720.0,
-            "21": 13764741120.0,
-            "22": 13887232000.0,
-            "23": 13887232000.0,
-            "24": 13887232000.0,
-            "25": 13887232000.0,
-            "26": 13887232000.0,
-            "27": 13887232000.0,
-            "28": 13887232000.0,
-            "29": 13887232000.0,
-            "30": 13887232000.0,
-            "31": 13887232000.0,
-            "32": 13887232000.0,
-            "33": 13887232000.0,
-            "34": 13887232000.0,
-            "35": 13887232000.0,
-            "36": 13887232000.0,
-            "37": 13887232000.0,
-            "38": 13887232000.0,
-            "39": 13887232000.0,
-            "40": 13887232000.0,
-            "41": 13887232000.0,
-            "42": 13887232000.0,
-            "43": 13887232000.0,
-            "44": 13887232000.0,
-            "45": 13887232000.0,
-            "46": 13887232000.0,
-            "47": 13887232000.0,
-            "48": 13887232000.0,
-            "49": 13887232000.0,
-            "50": 13887232000.0
+            "1": 41210470400.0,
+            "2": 41210470400.0,
+            "3": 41210470400.0,
+            "4": 41210470400.0,
+            "5": 41210470400.0,
+            "6": 41210470400.0,
+            "7": 41210470400.0,
+            "8": 41210470400.0,
+            "9": 41210470400.0,
+            "10": 41210470400.0,
+            "11": 41210470400.0,
+            "12": 41210470400.0,
+            "13": 41210470400.0,
+            "14": 41210470400.0,
+            "15": 41210470400.0,
+            "16": 41210470400.0,
+            "17": 41210470400.0,
+            "18": 41210470400.0,
+            "19": 41210470400.0,
+            "20": 41210470400.0,
+            "21": 41210470400.0,
+            "22": 41210470400.0,
+            "23": 41210470400.0,
+            "24": 41210470400.0,
+            "25": 41210470400.0,
+            "26": 41210470400.0,
+            "27": 41210470400.0,
+            "28": 41210470400.0,
+            "29": 41210470400.0,
+            "30": 41210470400.0,
+            "31": 41210470400.0,
+            "32": 41210470400.0,
+            "33": 41210470400.0,
+            "34": 41210470400.0,
+            "35": 41210470400.0,
+            "36": 41210470400.0,
+            "37": 41210470400.0,
+            "38": 41210470400.0,
+            "39": 41210470400.0,
+            "40": 41210470400.0,
+            "41": 41210470400.0,
+            "42": 41210470400.0,
+            "43": 41210470400.0,
+            "44": 41210470400.0,
+            "45": 41210470400.0,
+            "46": 41210470400.0,
+            "47": 41210470400.0,
+            "48": 41210470400.0,
+            "49": 41210470400.0,
+            "50": 41210470400.0
         }
     },
     "iteration-time": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 83.38985,
-            "2": 0.80022,
-            "3": 0.71751,
-            "4": 0.65556,
-            "5": 0.98544,
-            "6": 0.76766,
-            "7": 0.73114,
-            "8": 0.76226,
-            "9": 0.62791,
-            "10": 0.62224,
-            "11": 0.69873,
-            "12": 0.62401,
-            "13": 0.62467,
-            "14": 0.62054,
-            "15": 0.6218,
-            "16": 0.61653,
-            "17": 0.6184,
-            "18": 0.63217,
-            "19": 0.61609,
-            "20": 0.62413,
-            "21": 0.60966,
-            "22": 0.60967,
-            "23": 0.60674,
-            "24": 0.60595,
-            "25": 0.60063,
-            "26": 0.60502,
-            "27": 0.60923,
-            "28": 0.60939,
-            "29": 0.61217,
-            "30": 0.60702,
-            "31": 0.61517,
-            "32": 0.60803,
-            "33": 0.60624,
-            "34": 0.6123,
-            "35": 0.61133,
-            "36": 0.60971,
-            "37": 0.61215,
-            "38": 0.61014,
-            "39": 0.62694,
-            "40": 0.60532,
-            "41": 0.60477,
-            "42": 0.60297,
-            "43": 0.60073,
-            "44": 0.59786,
-            "45": 0.60582,
-            "46": 0.60848,
-            "47": 0.60019,
-            "48": 0.60064,
-            "49": 0.60304,
-            "50": 0.58276
+            "1": 86.8085,
+            "2": 1.10913,
+            "3": 0.99097,
+            "4": 0.89412,
+            "5": 1.25997,
+            "6": 0.98162,
+            "7": 0.98318,
+            "8": 1.13296,
+            "9": 0.88126,
+            "10": 0.8633,
+            "11": 2.2744,
+            "12": 4.5393,
+            "13": 3.22763,
+            "14": 1.64923,
+            "15": 0.86595,
+            "16": 0.86575,
+            "17": 0.85272,
+            "18": 0.85454,
+            "19": 0.85281,
+            "20": 0.87018,
+            "21": 0.84654,
+            "22": 0.8494,
+            "23": 0.84882,
+            "24": 0.84482,
+            "25": 0.85311,
+            "26": 0.84678,
+            "27": 0.84096,
+            "28": 0.8412,
+            "29": 0.84156,
+            "30": 0.84475,
+            "31": 0.84747,
+            "32": 0.85058,
+            "33": 0.84977,
+            "34": 0.8479,
+            "35": 0.85234,
+            "36": 0.85012,
+            "37": 0.85087,
+            "38": 0.84594,
+            "39": 0.84558,
+            "40": 0.84807,
+            "41": 0.84183,
+            "42": 0.8439,
+            "43": 0.84221,
+            "44": 0.84248,
+            "45": 0.84257,
+            "46": 0.83922,
+            "47": 0.84311,
+            "48": 0.84159,
+            "49": 0.84011,
+            "50": 0.8353
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgxh100_eos.json
index 9ba3e686ab8..daa04af43dd 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgxh100_eos.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgxh100_eos.json
@@ -4,56 +4,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 10.94947,
-            "2": 10.95236,
-            "3": 10.50817,
-            "4": 9.96373,
-            "5": 9.93907,
+            "1": 10.95004,
+            "2": 10.9521,
+            "3": 10.5115,
+            "4": 9.96454,
+            "5": 9.93941,
             "6": 9.67273,
-            "7": 10.2137,
-            "8": 9.4963,
-            "9": 9.56483,
-            "10": 9.7979,
-            "11": 9.30107,
-            "12": 9.40465,
-            "13": 9.39581,
-            "14": 8.84796,
-            "15": 9.02503,
-            "16": 9.07162,
-            "17": 9.04638,
-            "18": 8.75696,
-            "19": 9.18152,
-            "20": 8.86295,
-            "21": 8.5361,
-            "22": 8.55339,
-            "23": 8.42711,
-            "24": 8.37747,
-            "25": 8.64415,
-            "26": 7.97441,
-            "27": 8.56675,
-            "28": 8.19618,
-            "29": 8.39325,
-            "30": 8.67137,
-            "31": 8.28979,
-            "32": 8.43623,
-            "33": 8.55717,
-            "34": 8.6598,
-            "35": 8.07929,
-            "36": 7.94958,
-            "37": 8.29465,
-            "38": 7.9784,
-            "39": 8.39172,
-            "40": 8.35622,
-            "41": 8.31635,
-            "42": 8.06507,
-            "43": 8.03396,
-            "44": 8.24146,
-            "45": 8.1039,
-            "46": 7.61771,
-            "47": 8.15375,
-            "48": 8.00818,
-            "49": 8.38737,
-            "50": 7.81612
+            "7": 10.20975,
+            "8": 9.49716,
+            "9": 9.55902,
+            "10": 9.79742,
+            "11": 9.30109,
+            "12": 9.40483,
+            "13": 9.39546,
+            "14": 8.84681,
+            "15": 9.02444,
+            "16": 9.07121,
+            "17": 9.04574,
+            "18": 8.75678,
+            "19": 9.18159,
+            "20": 8.8595,
+            "21": 8.53503,
+            "22": 8.55182,
+            "23": 8.42441,
+            "24": 8.37608,
+            "25": 8.64304,
+            "26": 7.97393,
+            "27": 8.56806,
+            "28": 8.19764,
+            "29": 8.3928,
+            "30": 8.67283,
+            "31": 8.289,
+            "32": 8.43572,
+            "33": 8.5568,
+            "34": 8.66018,
+            "35": 8.07934,
+            "36": 7.94976,
+            "37": 8.29565,
+            "38": 7.98044,
+            "39": 8.39201,
+            "40": 8.35513,
+            "41": 8.31876,
+            "42": 8.0583,
+            "43": 8.03283,
+            "44": 8.24243,
+            "45": 8.10277,
+            "46": 7.61696,
+            "47": 8.15273,
+            "48": 8.00569,
+            "49": 8.38688,
+            "50": 7.81491
         }
     },
     "num-zeros": {
@@ -61,56 +61,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 19403784.0,
-            "2": 19274252.0,
-            "3": 19373794.0,
-            "4": 89687600.0,
-            "5": 139124400.0,
-            "6": 138949920.0,
-            "7": 170316512.0,
-            "8": 192665728.0,
-            "9": 168817872.0,
-            "10": 156652864.0,
-            "11": 217935232.0,
-            "12": 213007792.0,
-            "13": 228424704.0,
-            "14": 217442256.0,
-            "15": 237921408.0,
-            "16": 225523072.0,
-            "17": 225458384.0,
-            "18": 164166928.0,
-            "19": 164457904.0,
-            "20": 180124848.0,
-            "21": 230463232.0,
-            "22": 230096384.0,
-            "23": 210054656.0,
-            "24": 200985472.0,
-            "25": 248708512.0,
-            "26": 301000896.0,
-            "27": 205364384.0,
-            "28": 270886048.0,
-            "29": 259695952.0,
-            "30": 224280720.0,
-            "31": 244360992.0,
-            "32": 189382672.0,
-            "33": 231930816.0,
-            "34": 206712432.0,
-            "35": 194319616.0,
-            "36": 246163408.0,
-            "37": 193561968.0,
-            "38": 228822688.0,
-            "39": 226941728.0,
-            "40": 196742032.0,
-            "41": 200179904.0,
-            "42": 219112640.0,
-            "43": 186235920.0,
-            "44": 138763920.0,
-            "45": 148907984.0,
-            "46": 109115896.0,
-            "47": 167015728.0,
-            "48": 156135104.0,
-            "49": 91378480.0,
-            "50": 164099648.0
+            "1": 19403624.0,
+            "2": 19274194.0,
+            "3": 19372760.0,
+            "4": 86525248.0,
+            "5": 148575568.0,
+            "6": 145226704.0,
+            "7": 171879984.0,
+            "8": 195785248.0,
+            "9": 164124752.0,
+            "10": 167684736.0,
+            "11": 221077344.0,
+            "12": 200384224.0,
+            "13": 248872528.0,
+            "14": 211169424.0,
+            "15": 214304608.0,
+            "16": 216075632.0,
+            "17": 267845984.0,
+            "18": 170470336.0,
+            "19": 176865072.0,
+            "20": 187955392.0,
+            "21": 225750704.0,
+            "22": 247396816.0,
+            "23": 211643856.0,
+            "24": 205638464.0,
+            "25": 277022272.0,
+            "26": 291562304.0,
+            "27": 225789840.0,
+            "28": 288202368.0,
+            "29": 198390384.0,
+            "30": 213302208.0,
+            "31": 227204752.0,
+            "32": 271112416.0,
+            "33": 231840432.0,
+            "34": 203575536.0,
+            "35": 191152368.0,
+            "36": 222566928.0,
+            "37": 177810112.0,
+            "38": 228708544.0,
+            "39": 211168784.0,
+            "40": 215603968.0,
+            "41": 200089440.0,
+            "42": 228529888.0,
+            "43": 198782848.0,
+            "44": 141902272.0,
+            "45": 181922816.0,
+            "46": 115369856.0,
+            "47": 170214176.0,
+            "48": 137292832.0,
+            "49": 97654936.0,
+            "50": 160979632.0
         }
     },
     "mem-allocated-bytes": {
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 4749337600.0,
-            "2": 4748343808.0,
-            "3": 4747997696.0,
-            "4": 4747469312.0,
-            "5": 4745943552.0,
-            "6": 4746412544.0,
-            "7": 4749017600.0,
-            "8": 4746762752.0,
-            "9": 4746394112.0,
-            "10": 4748286464.0,
-            "11": 4747621888.0,
-            "12": 4747802112.0,
-            "13": 4746905088.0,
-            "14": 4746850816.0,
-            "15": 4745785856.0,
-            "16": 4746166784.0,
-            "17": 4745583104.0,
-            "18": 4746839552.0,
-            "19": 4746510848.0,
-            "20": 4748375552.0,
-            "21": 4746974720.0,
-            "22": 4747533824.0,
-            "23": 4746271232.0,
-            "24": 4747352576.0,
-            "25": 4746148352.0,
-            "26": 4746516992.0,
-            "27": 4748668416.0,
-            "28": 4746871296.0,
-            "29": 4747913728.0,
-            "30": 4746131968.0,
-            "31": 4747437568.0,
-            "32": 4748567040.0,
-            "33": 4746713600.0,
-            "34": 4747983360.0,
-            "35": 4747450880.0,
-            "36": 4748372480.0,
-            "37": 4747075072.0,
-            "38": 4748749312.0,
-            "39": 4747972096.0,
-            "40": 4746372608.0,
-            "41": 4747513344.0,
-            "42": 4747912704.0,
-            "43": 4746867200.0,
-            "44": 4747612672.0,
-            "45": 4748287488.0,
-            "46": 4746935808.0,
-            "47": 4748032512.0,
-            "48": 4747668992.0,
-            "49": 4747238912.0,
-            "50": 4749120000.0
+            "1": 4882187264.0,
+            "2": 4881607168.0,
+            "3": 4882283008.0,
+            "4": 4881322496.0,
+            "5": 4882174464.0,
+            "6": 4883177984.0,
+            "7": 4883252736.0,
+            "8": 4881774080.0,
+            "9": 4881443328.0,
+            "10": 4884319744.0,
+            "11": 4882319872.0,
+            "12": 4881232384.0,
+            "13": 4880836096.0,
+            "14": 4882124288.0,
+            "15": 4882108928.0,
+            "16": 4883384832.0,
+            "17": 4880466432.0,
+            "18": 4881518080.0,
+            "19": 4881734144.0,
+            "20": 4883215872.0,
+            "21": 4883534336.0,
+            "22": 4882774528.0,
+            "23": 4881818112.0,
+            "24": 4882441728.0,
+            "25": 4880546304.0,
+            "26": 4882178560.0,
+            "27": 4881892864.0,
+            "28": 4881869312.0,
+            "29": 4882979328.0,
+            "30": 4882715136.0,
+            "31": 4883084800.0,
+            "32": 4881436160.0,
+            "33": 4881766912.0,
+            "34": 4881406464.0,
+            "35": 4881531392.0,
+            "36": 4881479168.0,
+            "37": 4882455040.0,
+            "38": 4882054656.0,
+            "39": 4882005504.0,
+            "40": 4882743808.0,
+            "41": 4881211904.0,
+            "42": 4881378816.0,
+            "43": 4882133504.0,
+            "44": 4881860096.0,
+            "45": 4883165696.0,
+            "46": 4882168320.0,
+            "47": 4881526272.0,
+            "48": 4882125312.0,
+            "49": 4881533440.0,
+            "50": 4881598976.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 11455561728.0,
-            "2": 12440659968.0,
-            "3": 12440659968.0,
-            "4": 12440659968.0,
-            "5": 12440659968.0,
-            "6": 12576563200.0,
-            "7": 12813101056.0,
-            "8": 12813101056.0,
-            "9": 13424891904.0,
-            "10": 13556338688.0,
-            "11": 13556338688.0,
-            "12": 13556338688.0,
-            "13": 13556338688.0,
-            "14": 13556338688.0,
-            "15": 13556338688.0,
-            "16": 13556338688.0,
-            "17": 13556338688.0,
-            "18": 13556338688.0,
-            "19": 13556338688.0,
-            "20": 13556338688.0,
-            "21": 13758310400.0,
-            "22": 13883041792.0,
-            "23": 13883041792.0,
-            "24": 13883041792.0,
-            "25": 13883041792.0,
-            "26": 13883041792.0,
-            "27": 13883041792.0,
-            "28": 13883041792.0,
-            "29": 13883041792.0,
-            "30": 13883041792.0,
-            "31": 13883041792.0,
-            "32": 13883041792.0,
-            "33": 13883041792.0,
-            "34": 13883041792.0,
-            "35": 13883041792.0,
-            "36": 13883041792.0,
-            "37": 13883041792.0,
-            "38": 13883041792.0,
-            "39": 13883041792.0,
-            "40": 13883041792.0,
-            "41": 13883041792.0,
-            "42": 13883041792.0,
-            "43": 13883041792.0,
-            "44": 13883041792.0,
-            "45": 13883041792.0,
-            "46": 13883041792.0,
-            "47": 13883041792.0,
-            "48": 13883041792.0,
-            "49": 13883041792.0,
-            "50": 13883041792.0
+            "1": 41210470400.0,
+            "2": 41210470400.0,
+            "3": 41210470400.0,
+            "4": 41210470400.0,
+            "5": 41210470400.0,
+            "6": 41210470400.0,
+            "7": 41210470400.0,
+            "8": 41210470400.0,
+            "9": 41210470400.0,
+            "10": 41210470400.0,
+            "11": 41210470400.0,
+            "12": 41210470400.0,
+            "13": 41210470400.0,
+            "14": 41210470400.0,
+            "15": 41210470400.0,
+            "16": 41210470400.0,
+            "17": 41210470400.0,
+            "18": 41210470400.0,
+            "19": 41210470400.0,
+            "20": 41210470400.0,
+            "21": 41210470400.0,
+            "22": 41210470400.0,
+            "23": 41210470400.0,
+            "24": 41210470400.0,
+            "25": 41210470400.0,
+            "26": 41210470400.0,
+            "27": 41210470400.0,
+            "28": 41210470400.0,
+            "29": 41210470400.0,
+            "30": 41210470400.0,
+            "31": 41210470400.0,
+            "32": 41210470400.0,
+            "33": 41210470400.0,
+            "34": 41210470400.0,
+            "35": 41210470400.0,
+            "36": 41210470400.0,
+            "37": 41210470400.0,
+            "38": 41210470400.0,
+            "39": 41210470400.0,
+            "40": 41210470400.0,
+            "41": 41210470400.0,
+            "42": 41210470400.0,
+            "43": 41210470400.0,
+            "44": 41210470400.0,
+            "45": 41210470400.0,
+            "46": 41210470400.0,
+            "47": 41210470400.0,
+            "48": 41210470400.0,
+            "49": 41210470400.0,
+            "50": 41210470400.0
         }
     },
     "iteration-time": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 99.19363,
-            "2": 0.87925,
-            "3": 0.76355,
-            "4": 0.70351,
-            "5": 1.06855,
-            "6": 0.8083,
-            "7": 0.79282,
-            "8": 0.81872,
-            "9": 0.67053,
-            "10": 0.64913,
-            "11": 0.72935,
-            "12": 0.64945,
-            "13": 0.64181,
-            "14": 0.63807,
-            "15": 0.65651,
-            "16": 0.66428,
-            "17": 0.65744,
-            "18": 0.65362,
-            "19": 0.65862,
-            "20": 0.6544,
-            "21": 0.64288,
-            "22": 0.64951,
-            "23": 0.64322,
-            "24": 0.64447,
-            "25": 0.63601,
-            "26": 0.62955,
-            "27": 0.6244,
-            "28": 0.62697,
-            "29": 0.62787,
-            "30": 0.6295,
-            "31": 0.63726,
-            "32": 0.62178,
-            "33": 0.62521,
-            "34": 0.62615,
-            "35": 0.61895,
-            "36": 0.62424,
-            "37": 0.62219,
-            "38": 0.62548,
-            "39": 0.62127,
-            "40": 0.62356,
-            "41": 0.6165,
-            "42": 0.61786,
-            "43": 0.61742,
-            "44": 0.61943,
-            "45": 0.61884,
-            "46": 0.62012,
-            "47": 0.61656,
-            "48": 0.6143,
-            "49": 0.61232,
-            "50": 0.6085
+            "1": 96.21947,
+            "2": 1.10023,
+            "3": 0.96399,
+            "4": 0.91113,
+            "5": 1.27509,
+            "6": 1.00484,
+            "7": 1.01236,
+            "8": 1.1739,
+            "9": 0.89406,
+            "10": 0.88836,
+            "11": 0.92033,
+            "12": 0.88331,
+            "13": 0.88179,
+            "14": 0.88307,
+            "15": 0.88648,
+            "16": 0.88425,
+            "17": 0.87155,
+            "18": 0.87556,
+            "19": 0.87374,
+            "20": 0.8744,
+            "21": 0.86757,
+            "22": 0.87217,
+            "23": 0.8736,
+            "24": 0.86646,
+            "25": 0.87328,
+            "26": 0.87121,
+            "27": 0.85886,
+            "28": 0.86392,
+            "29": 0.86385,
+            "30": 0.86425,
+            "31": 0.8631,
+            "32": 0.8617,
+            "33": 0.86069,
+            "34": 0.86829,
+            "35": 0.86837,
+            "36": 0.86776,
+            "37": 0.86686,
+            "38": 0.86359,
+            "39": 0.8677,
+            "40": 0.86441,
+            "41": 0.86179,
+            "42": 0.86079,
+            "43": 0.86149,
+            "44": 0.86222,
+            "45": 0.86336,
+            "46": 0.85875,
+            "47": 0.86219,
+            "48": 0.86026,
+            "49": 0.85894,
+            "50": 0.8544
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/model_config.yaml
index 9fdcb460cf3..11d62eb1490 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/model_config.yaml
@@ -17,7 +17,8 @@ MODEL_ARGS:
   --use-distributed-optimizer: true
   --overlap-grad-reduce: true
   --overlap-param-gather: true
-  --attention-backend: fused
+  # Use unfused attention since MLA with fused attention and deterministic mode leads to NaN
+  --attention-backend: unfused # TODO: switch back to fused attention after fix
   # Training args
   --use-mcore-models: true
   --sequence-parallel: true
@@ -118,7 +119,7 @@ MODEL_ARGS:
   --logging-level: 40
   --tensorboard-dir: ${TENSORBOARD_PATH}
   # CUDA Graph args
-  --external-cuda-graph: true
+  --cuda-graph-impl: transformer_engine
   --cuda-graph-scope: attn
   --cuda-graph-warmup-steps: 0
   --te-rng-tracker: true
@@ -127,10 +128,10 @@ MODEL_ARGS:
   --fp8-format: hybrid
   --fp8-recipe: tensorwise
   --exit-interval: 50
-TEST_TYPE: ckpt-resume # Usually ckpt-resume, but as a WAR to #513 set to regular
+TEST_TYPE: regular # Usually ckpt-resume, but as a WAR to #513 set to regular
 METRICS:
   - "iteration-time"
   - "lm loss"
   - "num-zeros"
-  # - "mem-allocated-bytes" 
-  # - "mem-max-allocated-bytes" # Disable for now since resume training has more memory cost. To be investigated.
+  - "mem-allocated-bytes"
+  - "mem-max-allocated-bytes"
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_selective_recompute_experimental/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_selective_recompute_experimental/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 8c4f243d4c2..00000000000
--- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_selective_recompute_experimental/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,344 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 11.0637,
-            "2": 11.03838,
-            "3": 9.79196,
-            "4": 14.17309,
-            "5": 9.48263,
-            "6": 9.30356,
-            "7": 9.27632,
-            "8": 8.75189,
-            "9": 8.70462,
-            "10": 9.04035,
-            "11": 8.41109,
-            "12": 8.53109,
-            "13": 8.43144,
-            "14": 7.93673,
-            "15": 8.00837,
-            "16": 8.08212,
-            "17": 8.06887,
-            "18": 7.75236,
-            "19": 8.13737,
-            "20": 7.88364,
-            "21": 7.56605,
-            "22": 7.55552,
-            "23": 7.42862,
-            "24": 7.41252,
-            "25": 7.67597,
-            "26": 7.08176,
-            "27": 7.62221,
-            "28": 7.32629,
-            "29": 7.49894,
-            "30": 7.63447,
-            "31": 7.3983,
-            "32": 7.59785,
-            "33": 7.64396,
-            "34": 7.70726,
-            "35": 7.21393,
-            "36": 7.08985,
-            "37": 7.42971,
-            "38": 7.19273,
-            "39": 7.56041,
-            "40": 7.55564,
-            "41": 7.49928,
-            "42": 7.25988,
-            "43": 7.24878,
-            "44": 7.42783,
-            "45": 7.21045,
-            "46": 6.91669,
-            "47": 7.31999,
-            "48": 7.16939,
-            "49": 7.62783,
-            "50": 7.05439
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 38802064.0,
-            "2": 38543200.0,
-            "3": 38744220.0,
-            "4": 166695072.0,
-            "5": 394456256.0,
-            "6": 441303136.0,
-            "7": 538731776.0,
-            "8": 680781184.0,
-            "9": 564001216.0,
-            "10": 571185472.0,
-            "11": 624455360.0,
-            "12": 680622208.0,
-            "13": 777548288.0,
-            "14": 717772992.0,
-            "15": 699100416.0,
-            "16": 677486208.0,
-            "17": 645761024.0,
-            "18": 671155776.0,
-            "19": 674320512.0,
-            "20": 891692160.0,
-            "21": 658833920.0,
-            "22": 802998016.0,
-            "23": 756352768.0,
-            "24": 772904192.0,
-            "25": 748799104.0,
-            "26": 771817792.0,
-            "27": 772312064.0,
-            "28": 655008000.0,
-            "29": 783495808.0,
-            "30": 794511296.0,
-            "31": 756035712.0,
-            "32": 535862592.0,
-            "33": 680633984.0,
-            "34": 482597312.0,
-            "35": 671593792.0,
-            "36": 658959488.0,
-            "37": 626012736.0,
-            "38": 614650240.0,
-            "39": 595183872.0,
-            "40": 421718816.0,
-            "41": 557433600.0,
-            "42": 545065344.0,
-            "43": 539024064.0,
-            "44": 544803840.0,
-            "45": 517934176.0,
-            "46": 504352736.0,
-            "47": 497582464.0,
-            "48": 500981632.0,
-            "49": 490922656.0,
-            "50": 472902496.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 6294696448.0,
-            "2": 6295491072.0,
-            "3": 6296283648.0,
-            "4": 6297076224.0,
-            "5": 6297868800.0,
-            "6": 6298661376.0,
-            "7": 6294104064.0,
-            "8": 6294896640.0,
-            "9": 6295689216.0,
-            "10": 6296481792.0,
-            "11": 6294500352.0,
-            "12": 6295292928.0,
-            "13": 6296085504.0,
-            "14": 6296878080.0,
-            "15": 6297670656.0,
-            "16": 6298463232.0,
-            "17": 6299255808.0,
-            "18": 6300048384.0,
-            "19": 6300840960.0,
-            "20": 6301633536.0,
-            "21": 6302426112.0,
-            "22": 6303218688.0,
-            "23": 6304011264.0,
-            "24": 6304803840.0,
-            "25": 6305596416.0,
-            "26": 6306388992.0,
-            "27": 6307181568.0,
-            "28": 6307974144.0,
-            "29": 6308766720.0,
-            "30": 6309559296.0,
-            "31": 6310351872.0,
-            "32": 6311144448.0,
-            "33": 6311937024.0,
-            "34": 6312729600.0,
-            "35": 6313522176.0,
-            "36": 6314314752.0,
-            "37": 6315107328.0,
-            "38": 6315899904.0,
-            "39": 6316692480.0,
-            "40": 6317485056.0,
-            "41": 6318277632.0,
-            "42": 6319070208.0,
-            "43": 6319862784.0,
-            "44": 6320655360.0,
-            "45": 6321447936.0,
-            "46": 6322240512.0,
-            "47": 6323033088.0,
-            "48": 6323825664.0,
-            "49": 6324618240.0,
-            "50": 6325410816.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 46771978240.0,
-            "2": 49466654720.0,
-            "3": 51157819392.0,
-            "4": 51157819392.0,
-            "5": 51157819392.0,
-            "6": 51157819392.0,
-            "7": 51157819392.0,
-            "8": 51157819392.0,
-            "9": 51157819392.0,
-            "10": 51157819392.0,
-            "11": 51157819392.0,
-            "12": 51157819392.0,
-            "13": 51157819392.0,
-            "14": 51157819392.0,
-            "15": 51157819392.0,
-            "16": 51157819392.0,
-            "17": 51157819392.0,
-            "18": 51157819392.0,
-            "19": 51157819392.0,
-            "20": 51157819392.0,
-            "21": 51157819392.0,
-            "22": 51157819392.0,
-            "23": 51157819392.0,
-            "24": 51157819392.0,
-            "25": 51157819392.0,
-            "26": 51157819392.0,
-            "27": 51157819392.0,
-            "28": 51157819392.0,
-            "29": 51157819392.0,
-            "30": 51157819392.0,
-            "31": 51157819392.0,
-            "32": 51157819392.0,
-            "33": 51157819392.0,
-            "34": 51157819392.0,
-            "35": 51157819392.0,
-            "36": 51157819392.0,
-            "37": 51157819392.0,
-            "38": 51157819392.0,
-            "39": 51157819392.0,
-            "40": 51157819392.0,
-            "41": 51157819392.0,
-            "42": 51157819392.0,
-            "43": 51157819392.0,
-            "44": 51157819392.0,
-            "45": 51157819392.0,
-            "46": 51157819392.0,
-            "47": 51157819392.0,
-            "48": 51157819392.0,
-            "49": 51157819392.0,
-            "50": 51157819392.0
-        }
-    },
-    "mtp_1 loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 11.04508,
-            "2": 11.05397,
-            "3": 10.54505,
-            "4": 9.99194,
-            "5": 9.76285,
-            "6": 9.45507,
-            "7": 9.54431,
-            "8": 8.91725,
-            "9": 8.74784,
-            "10": 9.04997,
-            "11": 8.40193,
-            "12": 8.48288,
-            "13": 8.36926,
-            "14": 7.81448,
-            "15": 7.93865,
-            "16": 8.02231,
-            "17": 7.96741,
-            "18": 7.70552,
-            "19": 8.09012,
-            "20": 7.79984,
-            "21": 7.48241,
-            "22": 7.49502,
-            "23": 7.35415,
-            "24": 7.34793,
-            "25": 7.60324,
-            "26": 7.01638,
-            "27": 7.55495,
-            "28": 7.24721,
-            "29": 7.43133,
-            "30": 7.56633,
-            "31": 7.31391,
-            "32": 7.50445,
-            "33": 7.55658,
-            "34": 7.62234,
-            "35": 7.13802,
-            "36": 7.00593,
-            "37": 7.33916,
-            "38": 7.1095,
-            "39": 7.4736,
-            "40": 7.45784,
-            "41": 7.40514,
-            "42": 7.15986,
-            "43": 7.14965,
-            "44": 7.32758,
-            "45": 7.11892,
-            "46": 6.81056,
-            "47": 7.2234,
-            "48": 7.06789,
-            "49": 7.503,
-            "50": 6.9559
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 71.51538,
-            "2": 1.72071,
-            "3": 1.31657,
-            "4": 1.18423,
-            "5": 3.82179,
-            "6": 2.3037,
-            "7": 3.15765,
-            "8": 1.26325,
-            "9": 1.04414,
-            "10": 1.05643,
-            "11": 2.7525,
-            "12": 1.03473,
-            "13": 1.05477,
-            "14": 1.05184,
-            "15": 1.06441,
-            "16": 1.1362,
-            "17": 1.05355,
-            "18": 1.05093,
-            "19": 1.04209,
-            "20": 1.03871,
-            "21": 1.04773,
-            "22": 1.05492,
-            "23": 1.02882,
-            "24": 1.05172,
-            "25": 1.03632,
-            "26": 1.04229,
-            "27": 1.04662,
-            "28": 1.05014,
-            "29": 1.03047,
-            "30": 1.0813,
-            "31": 1.06319,
-            "32": 1.02842,
-            "33": 1.041,
-            "34": 1.02275,
-            "35": 1.03563,
-            "36": 1.0411,
-            "37": 1.02865,
-            "38": 1.03454,
-            "39": 1.05619,
-            "40": 1.04996,
-            "41": 1.02719,
-            "42": 1.05309,
-            "43": 1.03532,
-            "44": 1.05042,
-            "45": 1.03343,
-            "46": 1.04769,
-            "47": 1.03458,
-            "48": 1.04744,
-            "49": 1.04302,
-            "50": 1.0386
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_selective_recompute_experimental/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_selective_recompute_experimental/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index 29b1b467978..00000000000
--- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_selective_recompute_experimental/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,344 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 11.0637,
-            "2": 11.03838,
-            "3": 9.79196,
-            "4": 14.17309,
-            "5": 9.48263,
-            "6": 9.30356,
-            "7": 9.27632,
-            "8": 8.75189,
-            "9": 8.70462,
-            "10": 9.04035,
-            "11": 8.41109,
-            "12": 8.53109,
-            "13": 8.43144,
-            "14": 7.93673,
-            "15": 8.00837,
-            "16": 8.08212,
-            "17": 8.06887,
-            "18": 7.75236,
-            "19": 8.13737,
-            "20": 7.88364,
-            "21": 7.56605,
-            "22": 7.55552,
-            "23": 7.42862,
-            "24": 7.41252,
-            "25": 7.67597,
-            "26": 7.08176,
-            "27": 7.62221,
-            "28": 7.32629,
-            "29": 7.49894,
-            "30": 7.63447,
-            "31": 7.3983,
-            "32": 7.59785,
-            "33": 7.64396,
-            "34": 7.70726,
-            "35": 7.21393,
-            "36": 7.08985,
-            "37": 7.42971,
-            "38": 7.19273,
-            "39": 7.56041,
-            "40": 7.55564,
-            "41": 7.49928,
-            "42": 7.25988,
-            "43": 7.24878,
-            "44": 7.42783,
-            "45": 7.21045,
-            "46": 6.91669,
-            "47": 7.31999,
-            "48": 7.16939,
-            "49": 7.62783,
-            "50": 7.05439
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 38802064.0,
-            "2": 38543200.0,
-            "3": 38744220.0,
-            "4": 166695072.0,
-            "5": 394456256.0,
-            "6": 441303136.0,
-            "7": 538731776.0,
-            "8": 680781184.0,
-            "9": 564001216.0,
-            "10": 571185472.0,
-            "11": 624455360.0,
-            "12": 680622208.0,
-            "13": 777548288.0,
-            "14": 717772992.0,
-            "15": 699100416.0,
-            "16": 677486208.0,
-            "17": 645761024.0,
-            "18": 671155776.0,
-            "19": 674320512.0,
-            "20": 891692160.0,
-            "21": 658833920.0,
-            "22": 802998016.0,
-            "23": 756352768.0,
-            "24": 772904192.0,
-            "25": 748799104.0,
-            "26": 771817792.0,
-            "27": 772312064.0,
-            "28": 655008000.0,
-            "29": 783495808.0,
-            "30": 794511296.0,
-            "31": 756035712.0,
-            "32": 535862592.0,
-            "33": 680633984.0,
-            "34": 482597312.0,
-            "35": 671593792.0,
-            "36": 658959488.0,
-            "37": 626012736.0,
-            "38": 614650240.0,
-            "39": 595183872.0,
-            "40": 421718816.0,
-            "41": 557433600.0,
-            "42": 545065344.0,
-            "43": 539024064.0,
-            "44": 544803840.0,
-            "45": 517934176.0,
-            "46": 504352736.0,
-            "47": 497582464.0,
-            "48": 500981632.0,
-            "49": 490922656.0,
-            "50": 472902496.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 6294696448.0,
-            "2": 6295491072.0,
-            "3": 6296283648.0,
-            "4": 6297076224.0,
-            "5": 6297868800.0,
-            "6": 6298661376.0,
-            "7": 6294104064.0,
-            "8": 6294896640.0,
-            "9": 6295689216.0,
-            "10": 6296481792.0,
-            "11": 6294500352.0,
-            "12": 6295292928.0,
-            "13": 6296085504.0,
-            "14": 6296878080.0,
-            "15": 6297670656.0,
-            "16": 6298463232.0,
-            "17": 6299255808.0,
-            "18": 6300048384.0,
-            "19": 6300840960.0,
-            "20": 6301633536.0,
-            "21": 6302426112.0,
-            "22": 6303218688.0,
-            "23": 6304011264.0,
-            "24": 6304803840.0,
-            "25": 6305596416.0,
-            "26": 6306388992.0,
-            "27": 6307181568.0,
-            "28": 6307974144.0,
-            "29": 6308766720.0,
-            "30": 6309559296.0,
-            "31": 6310351872.0,
-            "32": 6311144448.0,
-            "33": 6311937024.0,
-            "34": 6312729600.0,
-            "35": 6313522176.0,
-            "36": 6314314752.0,
-            "37": 6315107328.0,
-            "38": 6315899904.0,
-            "39": 6316692480.0,
-            "40": 6317485056.0,
-            "41": 6318277632.0,
-            "42": 6319070208.0,
-            "43": 6319862784.0,
-            "44": 6320655360.0,
-            "45": 6321447936.0,
-            "46": 6322240512.0,
-            "47": 6323033088.0,
-            "48": 6323825664.0,
-            "49": 6324618240.0,
-            "50": 6325410816.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 46771978240.0,
-            "2": 49466654720.0,
-            "3": 51157819392.0,
-            "4": 51157819392.0,
-            "5": 51157819392.0,
-            "6": 51157819392.0,
-            "7": 51157819392.0,
-            "8": 51157819392.0,
-            "9": 51157819392.0,
-            "10": 51157819392.0,
-            "11": 51157819392.0,
-            "12": 51157819392.0,
-            "13": 51157819392.0,
-            "14": 51157819392.0,
-            "15": 51157819392.0,
-            "16": 51157819392.0,
-            "17": 51157819392.0,
-            "18": 51157819392.0,
-            "19": 51157819392.0,
-            "20": 51157819392.0,
-            "21": 51157819392.0,
-            "22": 51157819392.0,
-            "23": 51157819392.0,
-            "24": 51157819392.0,
-            "25": 51157819392.0,
-            "26": 51157819392.0,
-            "27": 51157819392.0,
-            "28": 51157819392.0,
-            "29": 51157819392.0,
-            "30": 51157819392.0,
-            "31": 51157819392.0,
-            "32": 51157819392.0,
-            "33": 51157819392.0,
-            "34": 51157819392.0,
-            "35": 51157819392.0,
-            "36": 51157819392.0,
-            "37": 51157819392.0,
-            "38": 51157819392.0,
-            "39": 51157819392.0,
-            "40": 51157819392.0,
-            "41": 51157819392.0,
-            "42": 51157819392.0,
-            "43": 51157819392.0,
-            "44": 51157819392.0,
-            "45": 51157819392.0,
-            "46": 51157819392.0,
-            "47": 51157819392.0,
-            "48": 51157819392.0,
-            "49": 51157819392.0,
-            "50": 51157819392.0
-        }
-    },
-    "mtp_1 loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 11.04508,
-            "2": 11.05397,
-            "3": 10.54505,
-            "4": 9.99194,
-            "5": 9.76285,
-            "6": 9.45507,
-            "7": 9.54431,
-            "8": 8.91725,
-            "9": 8.74784,
-            "10": 9.04997,
-            "11": 8.40193,
-            "12": 8.48288,
-            "13": 8.36926,
-            "14": 7.81448,
-            "15": 7.93865,
-            "16": 8.02231,
-            "17": 7.96741,
-            "18": 7.70552,
-            "19": 8.09012,
-            "20": 7.79984,
-            "21": 7.48241,
-            "22": 7.49502,
-            "23": 7.35415,
-            "24": 7.34793,
-            "25": 7.60324,
-            "26": 7.01638,
-            "27": 7.55495,
-            "28": 7.24721,
-            "29": 7.43133,
-            "30": 7.56633,
-            "31": 7.31391,
-            "32": 7.50445,
-            "33": 7.55658,
-            "34": 7.62234,
-            "35": 7.13802,
-            "36": 7.00593,
-            "37": 7.33916,
-            "38": 7.1095,
-            "39": 7.4736,
-            "40": 7.45784,
-            "41": 7.40514,
-            "42": 7.15986,
-            "43": 7.14965,
-            "44": 7.32758,
-            "45": 7.11892,
-            "46": 6.81056,
-            "47": 7.2234,
-            "48": 7.06789,
-            "49": 7.503,
-            "50": 6.9559
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 58.25602,
-            "2": 1.30671,
-            "3": 1.18374,
-            "4": 1.08853,
-            "5": 3.28347,
-            "6": 2.13071,
-            "7": 2.96694,
-            "8": 1.2675,
-            "9": 1.07672,
-            "10": 1.07909,
-            "11": 2.90834,
-            "12": 1.06176,
-            "13": 1.06257,
-            "14": 1.06668,
-            "15": 1.08083,
-            "16": 1.08186,
-            "17": 1.06861,
-            "18": 1.07223,
-            "19": 1.06661,
-            "20": 1.07354,
-            "21": 1.07863,
-            "22": 1.08557,
-            "23": 1.06174,
-            "24": 1.07533,
-            "25": 1.06172,
-            "26": 1.06344,
-            "27": 1.05522,
-            "28": 1.05011,
-            "29": 1.04098,
-            "30": 1.04622,
-            "31": 1.0423,
-            "32": 1.04292,
-            "33": 1.06328,
-            "34": 1.03657,
-            "35": 1.04963,
-            "36": 1.05103,
-            "37": 1.04147,
-            "38": 1.04912,
-            "39": 1.04838,
-            "40": 1.04559,
-            "41": 1.05462,
-            "42": 1.05103,
-            "43": 1.04965,
-            "44": 1.05296,
-            "45": 1.05039,
-            "46": 1.05609,
-            "47": 1.0476,
-            "48": 1.053,
-            "49": 1.04626,
-            "50": 1.05911
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_selective_recompute_experimental/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_selective_recompute_experimental/model_config.yaml
index 4036686e888..0a37ee08498 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_selective_recompute_experimental/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_selective_recompute_experimental/model_config.yaml
@@ -16,7 +16,8 @@ MODEL_ARGS:
   --use-distributed-optimizer: true
   --overlap-grad-reduce: true
   --overlap-param-gather: true
-  --attention-backend: unfused
+  # Use unfused attention since MLA with fused attention and deterministic mode leads to NaN
+  --attention-backend: unfused # TODO: switch back to fused attention after fix
   # Training args
   --use-mcore-models: true
   --sequence-parallel: true
@@ -125,7 +126,7 @@ MODEL_ARGS:
   --bf16: true
   --exit-interval: 50
   --overlap-moe-expert-parallel-comm: true
-TEST_TYPE: ckpt-resume # Usually ckpt-resume, but as a WAR to #513 set to regular
+TEST_TYPE: regular # Usually ckpt-resume, but as a WAR to #513 set to regular
 METRICS:
   - "iteration-time"
   - "lm loss"
diff --git a/tests/test_utils/recipes/moe.yaml b/tests/test_utils/recipes/moe.yaml
index 638ee1a89a3..ddfb8d1980b 100644
--- a/tests/test_utils/recipes/moe.yaml
+++ b/tests/test_utils/recipes/moe.yaml
@@ -95,11 +95,11 @@ products:
       - environment: [dev]
         scope: [mr]
         platforms: [dgx_h100] # hang: #513
-  - test_case: [gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_selective_recompute_experimental]
-    products:
-      - environment: [dev]
-        scope: [mr]
-        platforms: [dgx_h100] # hang: #513
+  # - test_case: [gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_selective_recompute_experimental]
+  #   products:
+  #     - environment: [dev]
+  #       scope: [mr]
+  #       platforms: [dgx_h100] # hang: #513
   - test_case: [gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances_dgx_a100_1N8G]
     products:
       - environment: [dev]
@@ -128,11 +128,6 @@ products:
       - environment: [dev]
         scope: [mr]
         platforms: [dgx_h100]
-  - test_case: [gpt3_moe_mr_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer]
-    products:
-      - environment: [dev]
-        scope: [mr]
-        platforms: [dgx_h100]
   #######################################################################
   # Super important MR tests that run for both DEV and LTS per MR       #
   #######################################################################
@@ -149,6 +144,14 @@ products:
   ###########################
   # Merge train tests       #
   ###########################
+  - test_case: [gpt3_moe_mr_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer]
+    products:
+      - environment: [dev]
+        scope: [mr]
+        platforms: [dgx_h100]
+      - environment: [dev]
+        scope: [mr-slim]
+        platforms: [dgx_h100]
   - test_case: [gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed]
     products:
       - environment: [dev]

From 12839ed0d8b2da8c97fe0eaa0fd73c497f1ff1f1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Tue, 21 Oct 2025 23:40:26 +0000
Subject: [PATCH 031/334] build: Fix jet
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 docker/Dockerfile.ci.dev  | 2 +-
 docker/Dockerfile.ci.lts  | 2 +-
 docker/Dockerfile.ci.nemo | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/docker/Dockerfile.ci.dev b/docker/Dockerfile.ci.dev
index b3295697f31..92d7a129d0b 100644
--- a/docker/Dockerfile.ci.dev
+++ b/docker/Dockerfile.ci.dev
@@ -83,6 +83,6 @@ RUN --mount=type=secret,id=JET_INDEX_URLS \
     LOGGER_INDEX_URL=$(cat /run/secrets/LOGGER_INDEX_URL)
     uv pip install --no-cache-dir --upgrade $LOGGER_INDEX_URL "one-logger"
     uv pip install --no-cache-dir --upgrade "setuptools<80.0.0"
-    uv pip install --no-cache-dir --upgrade $JET_INDEX_URLS "jet-client~=2.0" 
+    uv pip install --no-cache-dir --upgrade $JET_INDEX_URLS "jet-client~=3.0" 
 EOF
 ###
diff --git a/docker/Dockerfile.ci.lts b/docker/Dockerfile.ci.lts
index 8889760cfc8..7da27a03f1d 100644
--- a/docker/Dockerfile.ci.lts
+++ b/docker/Dockerfile.ci.lts
@@ -93,6 +93,6 @@ RUN --mount=type=secret,id=JET_INDEX_URLS \
     LOGGER_INDEX_URL=$(cat /run/secrets/LOGGER_INDEX_URL)
     uv pip install --no-cache-dir --upgrade $LOGGER_INDEX_URL "one-logger"
     uv pip install --no-cache-dir --upgrade "setuptools<80.0.0"
-    uv pip install --no-cache-dir --upgrade $JET_INDEX_URLS "jet-client~=2.0" 
+    uv pip install --no-cache-dir --upgrade $JET_INDEX_URLS "jet-client~=3.0" 
 EOF
 ###
\ No newline at end of file
diff --git a/docker/Dockerfile.ci.nemo b/docker/Dockerfile.ci.nemo
index 0452976a8c7..2369602f54d 100644
--- a/docker/Dockerfile.ci.nemo
+++ b/docker/Dockerfile.ci.nemo
@@ -14,7 +14,7 @@ FROM main as jet
 ARG JET_API_VERSION
 RUN --mount=type=secret,id=JET_INDEX_URLS \
     JET_INDEX_URLS=$(cat /run/secrets/JET_INDEX_URLS) && \
-    pip install --no-cache-dir jet-api==$JET_API_VERSION "jet-client~=2.0" --upgrade $JET_INDEX_URLS
+    pip install --no-cache-dir jet-api==$JET_API_VERSION "jet-client~=3.0" --upgrade $JET_INDEX_URLS
 
 ENV PATH="$PATH:/opt/jet/bin"
 ###

From a8bad4b441127242ab60d9bf79e1a52c2b361d34 Mon Sep 17 00:00:00 2001
From: Yu Yao <yuya@nvidia.com>
Date: Tue, 21 Oct 2025 16:48:20 -0700
Subject: [PATCH 032/334] ADLR/megatron-lm!4312 - [dev] Set tensor-parallel
 attributes irrespective of perform_initialization

Co-authored-by: Mcore Bot <mcore-bot@nvidia.com>
Co-authored-by: yaoyu-33 <yaoyu.094@gmail.com>
---
 megatron/core/tensor_parallel/layers.py       | 24 +++++
 megatron/core/transformer/moe/experts.py      | 17 ++++
 .../test_tp_attrs_without_init.py             | 87 +++++++++++++++++++
 3 files changed, 128 insertions(+)
 create mode 100644 tests/unit_tests/tensor_parallel/test_tp_attrs_without_init.py

diff --git a/megatron/core/tensor_parallel/layers.py b/megatron/core/tensor_parallel/layers.py
index 773c61597bc..5ca290ff680 100644
--- a/megatron/core/tensor_parallel/layers.py
+++ b/megatron/core/tensor_parallel/layers.py
@@ -248,6 +248,10 @@ def __init__(
                     rank=get_pg_rank(self.tp_group),
                     world_size=get_pg_size(self.tp_group),
                 )
+            else:
+                set_tensor_model_parallel_attributes(
+                    tensor=self.weight, is_parallel=True, dim=0, stride=1
+                )
         else:
             self.weight = Parameter(
                 torch.empty(
@@ -259,6 +263,10 @@ def __init__(
             )
             if config.perform_initialization:
                 _initialize_affine_weight_gpu(self.weight, init_method, partition_dim=0, stride=1)
+            else:
+                set_tensor_model_parallel_attributes(
+                    tensor=self.weight, is_parallel=True, dim=0, stride=1
+                )
 
     def forward(self, input_):
         """Forward.
@@ -858,6 +866,10 @@ def __init__(
                         rank=rank,
                         world_size=world_size,
                     )
+                else:
+                    set_tensor_model_parallel_attributes(
+                        tensor=self.weight, is_parallel=True, dim=0, stride=stride
+                    )
             else:
                 self.weight = Parameter(
                     torch.empty(
@@ -875,6 +887,10 @@ def __init__(
                         stride=stride,
                         is_expert=self.is_expert,
                     )
+                else:
+                    set_tensor_model_parallel_attributes(
+                        tensor=self.weight, is_parallel=True, dim=0, stride=stride
+                    )
 
             setattr(self.weight, "allreduce", not (self.is_expert and self.expert_parallel))
         else:
@@ -1170,6 +1186,10 @@ def __init__(
                     rank=rank,
                     world_size=world_size,
                 )
+            else:
+                set_tensor_model_parallel_attributes(
+                    tensor=self.weight, is_parallel=True, dim=1, stride=stride
+                )
         else:
             self.weight = Parameter(
                 torch.empty(
@@ -1187,6 +1207,10 @@ def __init__(
                     stride=stride,
                     is_expert=self.is_expert,
                 )
+            else:
+                set_tensor_model_parallel_attributes(
+                    tensor=self.weight, is_parallel=True, dim=1, stride=stride
+                )
         setattr(self.weight, "allreduce", not (self.is_expert and self.expert_parallel))
 
         if bias:
diff --git a/megatron/core/transformer/moe/experts.py b/megatron/core/transformer/moe/experts.py
index e73864a50fa..d0ac20a7536 100644
--- a/megatron/core/transformer/moe/experts.py
+++ b/megatron/core/transformer/moe/experts.py
@@ -30,6 +30,7 @@
 from megatron.core.tensor_parallel.layers import (
     _initialize_affine_weight_cpu,
     _initialize_affine_weight_gpu,
+    set_tensor_model_parallel_attributes,
 )
 from megatron.core.tensor_parallel.utils import divide
 from megatron.core.transformer.mlp import MLP, MLPSubmodules, apply_swiglu_sharded_factory
@@ -208,6 +209,14 @@ def activation_func_with_probs(x, probs):
                     rank=tp_rank,
                     world_size=tp_size,
                 )
+            else:
+                # Ensure TP attrs are set even when not initializing
+                set_tensor_model_parallel_attributes(
+                    tensor=self.weight1, is_parallel=True, dim=1, stride=1
+                )
+                set_tensor_model_parallel_attributes(
+                    tensor=self.weight2, is_parallel=True, dim=0, stride=1
+                )
         else:
             self.weight1 = Parameter(
                 torch.empty(
@@ -232,6 +241,14 @@ def activation_func_with_probs(x, probs):
                 _initialize_affine_weight_gpu(
                     self.weight2, config.output_layer_init_method, partition_dim=0, is_expert=True
                 )
+            else:
+                # Ensure TP attrs are set even when not initializing
+                set_tensor_model_parallel_attributes(
+                    tensor=self.weight1, is_parallel=True, dim=1, stride=1
+                )
+                set_tensor_model_parallel_attributes(
+                    tensor=self.weight2, is_parallel=True, dim=0, stride=1
+                )
         setattr(self.weight1, 'allreduce', not self.expert_parallel)
         setattr(self.weight2, 'allreduce', not self.expert_parallel)
 
diff --git a/tests/unit_tests/tensor_parallel/test_tp_attrs_without_init.py b/tests/unit_tests/tensor_parallel/test_tp_attrs_without_init.py
new file mode 100644
index 00000000000..f7a518e8e88
--- /dev/null
+++ b/tests/unit_tests/tensor_parallel/test_tp_attrs_without_init.py
@@ -0,0 +1,87 @@
+import pytest
+import torch
+
+from megatron.core.tensor_parallel.layers import (
+    ColumnParallelLinear,
+    RowParallelLinear,
+    VocabParallelEmbedding,
+)
+from megatron.core.transformer.transformer_config import TransformerConfig
+from tests.unit_tests.test_utilities import Utils
+
+
+class TestTPAttributesWithoutInitialization:
+
+    def teardown_method(self, method):
+        Utils.destroy_model_parallel()
+
+    @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
+    @pytest.mark.parametrize("use_cpu_init", [True, False])
+    def test_vocab_parallel_embedding_tp_attrs_no_init(self, use_cpu_init):
+        Utils.initialize_model_parallel(tensor_model_parallel_size=2)
+        cfg = TransformerConfig(
+            num_layers=1,
+            hidden_size=8,
+            num_attention_heads=4,
+            use_cpu_initialization=use_cpu_init,
+            perform_initialization=False,
+        )
+
+        emb = VocabParallelEmbedding(
+            num_embeddings=16, embedding_dim=8, init_method=cfg.init_method, config=cfg
+        )
+        w = emb.weight
+        assert hasattr(w, "tensor_model_parallel") and w.tensor_model_parallel is True
+        assert hasattr(w, "partition_dim") and w.partition_dim == 0
+        assert hasattr(w, "partition_stride") and w.partition_stride == 1
+
+    @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
+    @pytest.mark.parametrize("use_cpu_init", [True, False])
+    def test_column_parallel_linear_tp_attrs_no_init(self, use_cpu_init):
+        Utils.initialize_model_parallel(tensor_model_parallel_size=2)
+        cfg = TransformerConfig(
+            num_layers=1,
+            hidden_size=8,
+            num_attention_heads=4,
+            use_cpu_initialization=use_cpu_init,
+            perform_initialization=False,
+        )
+
+        layer = ColumnParallelLinear(
+            input_size=8,
+            output_size=8,
+            init_method=cfg.init_method,
+            bias=True,
+            config=cfg,
+            skip_bias_add=False,
+        )
+        w = layer.weight
+        assert hasattr(w, "tensor_model_parallel") and w.tensor_model_parallel is True
+        assert hasattr(w, "partition_dim") and w.partition_dim == 0
+        assert hasattr(w, "partition_stride") and w.partition_stride == 1
+
+    @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
+    @pytest.mark.parametrize("use_cpu_init", [True, False])
+    def test_row_parallel_linear_tp_attrs_no_init(self, use_cpu_init):
+        Utils.initialize_model_parallel(tensor_model_parallel_size=2)
+        cfg = TransformerConfig(
+            num_layers=1,
+            hidden_size=8,
+            num_attention_heads=4,
+            use_cpu_initialization=use_cpu_init,
+            perform_initialization=False,
+        )
+
+        layer = RowParallelLinear(
+            input_size=8,
+            output_size=8,
+            init_method=cfg.init_method,
+            bias=True,
+            input_is_parallel=True,
+            config=cfg,
+            skip_bias_add=False,
+        )
+        w = layer.weight
+        assert hasattr(w, "tensor_model_parallel") and w.tensor_model_parallel is True
+        assert hasattr(w, "partition_dim") and w.partition_dim == 1
+        assert hasattr(w, "partition_stride") and w.partition_stride == 1

From d9153a50ce14f5e4802a079526552dfbc476149f Mon Sep 17 00:00:00 2001
From: "Tong Liu (Engrg-Hardware 1)" <tongliu@nvidia.com>
Date: Tue, 21 Oct 2025 23:10:16 -0700
Subject: [PATCH 033/334] ADLR/megatron-lm!4237 - [Dev] perf(MoE): Add the
 Hybrid-EP backend to the Flex Dispatcher

---
 docker/Dockerfile.ci.dev                      |   7 +-
 .../common/model_chunk_schedule_plan.py       |   6 +-
 .../core/models/gpt/fine_grained_callables.py |   5 +-
 megatron/core/transformer/moe/README.md       |  13 +-
 megatron/core/transformer/moe/fused_a2a.py    | 270 ++++++++++++++++++
 .../core/transformer/moe/token_dispatcher.py  | 201 +++++++++++--
 .../core/transformer/transformer_config.py    |  20 +-
 megatron/training/arguments.py                |   8 +-
 .../a2a_overlap/test_schedule_chunk_1f1b.py   |   2 +-
 .../a2a_overlap/test_schedule_layer_1f1b.py   |   4 +-
 .../transformer/moe/test_token_dispatcher.py  |  52 +++-
 .../transformer/test_submodule_callables.py   |   2 +-
 12 files changed, 542 insertions(+), 48 deletions(-)

diff --git a/docker/Dockerfile.ci.dev b/docker/Dockerfile.ci.dev
index 92d7a129d0b..1ad8d76324b 100644
--- a/docker/Dockerfile.ci.dev
+++ b/docker/Dockerfile.ci.dev
@@ -58,8 +58,11 @@ RUN bash -ex <<"EOF"
         ln -s libnvshmem_host.so.3 libnvshmem_host.so
     popd
 
-    git clone --branch v1.2.1 https://github.com/deepseek-ai/DeepEP.git
-    TORCH_CUDA_ARCH_LIST="9.0" uv pip install --no-build-isolation -v DeepEP/.
+    git clone --branch hybrid-ep https://github.com/deepseek-ai/DeepEP.git
+    cd DeepEP 
+    git checkout 3f601f7ac1c062c46502646ff04c535013bfca00
+    TORCH_CUDA_ARCH_LIST="9.0" uv pip install --no-build-isolation -v .
+    cd ..
     rm -rf DeepEP
 EOF
 
diff --git a/megatron/core/models/common/model_chunk_schedule_plan.py b/megatron/core/models/common/model_chunk_schedule_plan.py
index 6a411ccdcf6..d501c11a0a9 100644
--- a/megatron/core/models/common/model_chunk_schedule_plan.py
+++ b/megatron/core/models/common/model_chunk_schedule_plan.py
@@ -107,7 +107,11 @@ def _build_callable_nodes(self, event, comp_stream, comm_stream, extra_args):
             if is_mtp
             else isinstance(self.layer.mlp, MoELayer)
         )
-        enable_deepep = self.layer.config.moe_enable_deepep
+
+        enable_deepep = (
+            self.layer.config.moe_token_dispatcher_type == "flex"
+            and self.layer.config.moe_flex_dispatcher_backend == "deepep"
+        )
         extra_args["enable_deepep"] = enable_deepep
         extra_args["is_moe"] = is_moe
         extra_args["delay_wgrad_compute"] = self.layer.config.delay_wgrad_compute
diff --git a/megatron/core/models/gpt/fine_grained_callables.py b/megatron/core/models/gpt/fine_grained_callables.py
index fbecc047682..36298fed66b 100644
--- a/megatron/core/models/gpt/fine_grained_callables.py
+++ b/megatron/core/models/gpt/fine_grained_callables.py
@@ -325,7 +325,10 @@ def build_transformer_layer_callables(layer: TransformerLayer):
     """
 
     is_moe = isinstance(layer.mlp, MoELayer)
-    enable_deepep = layer.config.moe_enable_deepep
+    enable_deepep = (
+        layer.config.moe_token_dispatcher_type == "flex"
+        and layer.config.moe_flex_dispatcher_backend == "deepep"
+    )
 
     def submodule_attn_forward(node: ScheduleNode, hidden_states: torch.Tensor):
         """
diff --git a/megatron/core/transformer/moe/README.md b/megatron/core/transformer/moe/README.md
index 56be6fc2463..1ab325a939b 100644
--- a/megatron/core/transformer/moe/README.md
+++ b/megatron/core/transformer/moe/README.md
@@ -13,6 +13,7 @@ Megatron-Core MoE provides comprehensive parallelism strategies, seamlessly inte
   - Support Multi-Token Prediction (MTP)
   - Batch-level overlapping to hide EP-A2A communication
 - **Support DeepSeek's DeepEP for efficient token dispatching and combining**
+- Support HybridEP for efficient token dispatching and combining within intra-node and MNNVL scenarios.
 - Add fusion for token permutation and unpermutation
 - Support Uneven virtual pipeline parallel split
 - Support output-discarding checkpointing on some submodules
@@ -172,7 +173,13 @@ Note: The MoE model structure is defined through script arguments. All MoE-relat
 ### Leverage DeepSeek's DeepEP for High-Performance Cross-Node Token Dispatching
 - [DeepSeek-DeepEP](https://github.com/deepseek-ai/deepep) provides a highly optimized implementation for MoE token dispatching and combining operations, specifically designed for large-scale MoE training scenarios.
 - DeepEP is particularly recommended for training large-scale, fine-grained MoE architectures such as DeepSeek-V3 and other advanced MoE models.
-- To enable DeepEP in your training configuration, simply set `--moe-token-dispatcher-type=flex` and `--moe-enable-deepep` in your command line arguments.
+- To enable DeepEP in your training configuration, simply set `--moe-token-dispatcher-type=flex` and `--moe-flex-dispatcher-backend=deepep` in your command line arguments.
+
+### Integrate HybridEP for High-Performance Intra-Node Token Dispatching
+- [HybridEP](https://github.com/deepseek-ai/DeepEP/tree/hybrid-ep) is developed by NVIDIA as an optimized solution for large-scale MoE (Mixture of Experts) all-to-all communication. It is designed to leverage NVIDIA GPU hardware capabilities, significantly reducing Streaming Multiprocessor (SM) resource usage.
+- HybridEP currently supports intra-node and multi-node NVLink scenarios.
+- To enable HybridEP, set `--moe-token-dispatcher-type=flex` and
+  `--moe-flex-dispatcher-backend=hybridep` in your command line arguments.
 
 ### CUDA Graph Support
 CUDA Graph functionality can be enabled through two options:
@@ -240,7 +247,7 @@ Enable A2A overlap across different batches inspired by the DSv3 DualPipe implme
 | --moe-z-loss-coeff | Scaling coefficient for the z-loss: a starting value of 1e-3 is recommended. Default is None. |
 | --moe-input-jitter-eps | Add noise to the input tensor by applying jitter with a specified epsilon value. Default is None. |
 | --moe-token-dispatcher-type | Determines the token dispatcher type. Choices are "allgather", "alltoall". Default is "allgather". We recommend using 'alltoall' if expert parallelism is applied. We have upgraded the "alltoall" dispatcher in place during MCore v0.9, while the original implementation renamed as "alltoall_seq" is retained until MCore v0.13.|
-| --moe-enable-deepep | (Experimental) Enable DeepSeek/DeepEP for efficient token dispatching and combine in MoE models. Only works with flex token dispatcher by setting --moe-token-dispatcher-type=flex. |
+| --moe-flex-dispatcher-backend | (Experimental) Select the backend for the flex token dispatcher. Supported options: "deepep", "hybridep". Enables efficient token dispatching and combining for MoE models. |
 | --moe-per-layer-logging | Enable per-layer logging for MoE, currently supports auxiliary loss and z loss. |
 | --moe-expert-capacity-factor | The capacity factor for each expert, None means no token will be dropped. Default is None. |
 | --moe-pad-expert-input-to-capacity | Pads the input for each expert to match the expert capacity length, effective only after the --moe-expert-capacity-factor is set. |
@@ -441,7 +448,7 @@ By setting `--expert-tensor-parallel-size`, we can set MoE-specific TP size.
 - Token Dispatcher sends tokens to the designated expert, involves tensor rearangement and communications.
 - Dispatcher `allgather` is the default option. It achieves better performance and efficiency when only tensor parallelism is used or when the Top-k value is very large.
 - Dispatcher `alltoall` is recommended if expert parallelism is applied.
-- Dispatcher `flex` is a new dispatcher decouples communication group from model parallelism. Currently, only the DeepEP backend is supported for by setting `--moe-enable-deepep`.
+- Dispatcher `flex` is a new dispatcher decouples communication group from model parallelism. It supports two backends(DeepEP and HybridEP) selectable via `--moe-flex-dispatcher-backend`.
 
 **Enable Communication Overlap**
 - Enable `--overlap-param-gather` and `--overlap-grad-reduce` with distributed optimizer.
diff --git a/megatron/core/transformer/moe/fused_a2a.py b/megatron/core/transformer/moe/fused_a2a.py
index 00a840f2b7f..60b0b11a32c 100644
--- a/megatron/core/transformer/moe/fused_a2a.py
+++ b/megatron/core/transformer/moe/fused_a2a.py
@@ -262,3 +262,273 @@ def set_deepep_num_sms(num_sms):
     fused_dispatch = None
     fused_combine = None
     set_deepep_num_sms = None
+
+
+try:
+    from deep_ep import HybridEPBuffer
+
+    HAVE_HYBRIDEP = True
+except ImportError:
+    HAVE_HYBRIDEP = False
+
+_hybrid_ep_buffer = None
+
+
+def init_hybrid_ep_buffer(
+    group: torch.distributed.ProcessGroup,
+    hidden_dim: int,
+    seq_len: int,
+    num_local_experts: int,
+    num_sms_dispatch_api: int,
+    num_sms_combine_api: int,
+    fp8_dispatch: bool,
+) -> None:
+    '''
+    Initialize the HybridEP buffer, including buffer allocation and metadata
+    initialization.
+
+    If a runtime dispatch/combine requires a larger buffer than the one
+    initialized, the buffer will be reallocated at runtime,
+    incuring extra run-time overhead.
+
+    Args:
+        group (torch.distributed.ProcessGroup):
+            Process group for HybridEP all-to-all communication.
+        hidden_dim (int):
+            Hidden dimension of the input tensor.
+        seq_len (int):
+            Maximum sequence length of the input tensor.
+        num_local_experts (int):
+            Number of local experts.
+        num_sms_dispatch_api (int):
+            Number of SMs used by the dispatch API.
+        num_sms_combine_api (int):
+            Number of SMs used by the combine API.
+        fp8_dispatch (bool):
+            Whether to use FP8 communication during the dispatch phase.
+    '''
+    assert not fp8_dispatch, "HybridEP dispatcher does not support fp8 dispatch now"
+    global _hybrid_ep_buffer
+    _hybrid_ep_buffer = HybridEPBuffer(
+        group=group,
+        hidden_dim=hidden_dim,
+        max_num_of_tokens_per_rank=seq_len,
+        num_local_experts=num_local_experts,
+        use_fp8=fp8_dispatch,
+        num_sms_dispatch_api=num_sms_dispatch_api,
+        num_sms_combine_api=num_sms_combine_api,
+    )
+
+
+class HybridEPDispatch(torch.autograd.Function):
+    '''
+    Fused dispatch operation for permute + dispatch a2a + permute using the HybridEP backend
+    '''
+
+    @staticmethod
+    def forward(
+        ctx,
+        x,
+        routing_map,
+        probs,
+        group,
+        num_local_experts,
+        num_sms_dispatch_api=24,
+        num_sms_combine_api=24,
+        num_dispatched_tokens=None,
+        num_permuted_tokens=None,
+        pad_multiple=None,
+    ):
+        '''
+        Forward pass of fused dispatch of the HybridEP backend
+        '''
+        if _hybrid_ep_buffer is None:
+            seq_len, hidden_dim = x.shape[-2:]
+            fp8_dispatch = False  # Currently, we do not support fp8 dispatch
+            init_hybrid_ep_buffer(
+                group,
+                hidden_dim,
+                seq_len,
+                num_local_experts,
+                num_sms_dispatch_api,
+                num_sms_combine_api,
+                fp8_dispatch,
+            )
+        # Defaultly, the output token_per_expert and num_dispatched_tokens_tensor
+        # will be put on the CPU to avoid the potential sync in combine/backward pass,
+        # but if we provide the num_dispatched_tokens and num_permuted_tokens on CPU,
+        # we do not need to the D2H here.
+        use_host_meta = num_dispatched_tokens is None or num_permuted_tokens is None
+        # Process the dispatch
+        (
+            dispatched_hidden,
+            dispatched_probs,
+            dispatched_scaling_factor,
+            tokens_per_expert,
+            handle,
+        ) = _hybrid_ep_buffer.dispatch_with_permute(
+            hidden=x,
+            routing_map=routing_map,
+            probs=probs,
+            scaling_factor=None,
+            num_of_experts_per_rank=num_local_experts,
+            pad_multiple=pad_multiple,
+            num_dispatched_tokens=num_dispatched_tokens,
+            num_permuted_tokens=num_permuted_tokens,
+            use_host_meta=use_host_meta,
+        )
+
+        ctx.handle = handle
+        ctx.pad_multiple = pad_multiple
+        ctx.num_dispatched_tokens = num_dispatched_tokens
+        return (
+            dispatched_hidden,
+            dispatched_probs,
+            dispatched_scaling_factor,
+            tokens_per_expert,
+            handle,
+        )
+
+    @staticmethod
+    def backward(ctx, grad_x, grad_probs, grad_scaling_factor, grad_tokens_per_expert, grad_handle):
+        '''
+        Backward pass of fused dispatch of the HybridEP backend
+        '''
+        handle = ctx.handle
+        combined_hidden, combined_probs = _hybrid_ep_buffer.combine_with_unpermute(
+            hidden=grad_x,
+            probs=grad_probs,
+            handle=handle,
+            pad_multiple=ctx.pad_multiple,
+            num_dispatched_tokens=ctx.num_dispatched_tokens,
+        )
+        return combined_hidden, None, combined_probs, None, None, None, None, None, None, None
+
+
+class HybridEPCombine(torch.autograd.Function):
+    '''
+    Fused combine operation for permute + combine a2a + permute using the HybridEP backend
+    '''
+
+    @staticmethod
+    def forward(
+        ctx, x, handle, num_dispatched_tokens=None, num_permuted_tokens=None, pad_multiple=None
+    ):
+        '''
+        Forward pass of fused combine of the HybridEP backend
+        '''
+        combined_hidden, _ = _hybrid_ep_buffer.combine_with_unpermute(
+            hidden=x,
+            handle=handle,
+            pad_multiple=pad_multiple,
+            num_dispatched_tokens=num_dispatched_tokens,
+        )
+        ctx.handle = handle
+        ctx.pad_multiple = pad_multiple
+        ctx.num_dispatched_tokens = num_dispatched_tokens
+        ctx.num_permuted_tokens = num_permuted_tokens
+        return combined_hidden
+
+    @staticmethod
+    def backward(ctx, grad_x):
+        '''
+        Backward pass of fused combine of the HybridEP backend
+        '''
+        handle = ctx.handle
+        dispatched_hidden, _, _, _, _ = _hybrid_ep_buffer.dispatch_with_permute(
+            hidden=grad_x,
+            scaling_factor=None,
+            handle=handle,
+            pad_multiple=ctx.pad_multiple,
+            num_dispatched_tokens=ctx.num_dispatched_tokens,
+            num_permuted_tokens=ctx.num_permuted_tokens,
+        )
+        return dispatched_hidden, None, None, None, None
+
+
+if HAVE_HYBRIDEP:
+
+    def hybrid_ep_dispatch(
+        x,
+        routing_map,
+        probs,
+        group,
+        num_local_experts,
+        num_sms_dispatch_api=24,
+        num_sms_combine_api=24,
+        num_dispatched_tokens=None,
+        num_permuted_tokens=None,
+        pad_multiple=None,
+    ):
+        '''
+        Perform fused dispatch for "permute + dispatch a2a + permute" using the
+        HybridEP backend.
+
+        Args:
+            x (torch.Tensor):
+                Input hidden states to dispatch.
+            routing_map (torch.Tensor):
+                Map indicating which expert each token is routed to.
+            probs (torch.Tensor):
+                Routing probabilities for each token-expert pair.
+            group (torch.distributed.ProcessGroup):
+                Process group used for communication.
+            num_local_experts (int):
+                Number of local experts.
+            num_sms_dispatch_api (int):
+                Number of SMs used by the dispatch API.
+            num_sms_combine_api (int):
+                Number of SMs used by the combine API.
+            num_dispatched_tokens (int):
+                Number of tokens after dispatch but before permute. HybridEP uses this
+                to allocate buffers. If not provided, HybridEP obtains the size from
+                a GPU tensor, which causes a D2H synchronization.
+            num_permuted_tokens (int):
+                Number of tokens after permute. HybridEP uses this to allocate buffers.
+                If not provided, HybridEP obtains the size from a GPU tensor,
+                which causes a D2H synchronization.
+            pad_multiple (int):
+                Alignment multiple required for FP8 GEMM. If not provided, no padding
+                is performed.
+        '''
+        return HybridEPDispatch.apply(
+            x,
+            routing_map,
+            probs,
+            group,
+            num_local_experts,
+            num_sms_dispatch_api,
+            num_sms_combine_api,
+            num_dispatched_tokens,
+            num_permuted_tokens,
+            pad_multiple,
+        )
+
+    def hybrid_ep_combine(x, handle, num_dispatched_tokens, num_permuted_tokens, pad_multiple):
+        '''
+        Perform fused combine operation for unpermute + combine a2a + unpermute
+        using the HybridEP backend
+
+        args:
+            x (torch.Tensor):
+                Input hidden states to combine
+            handle (EventHandle):
+                Communication handle from dispatch operation
+            num_dispatched_tokens (int):
+                The number of tokens after unpermute but before combine. HybridEP uses this
+                to allocate buffers. If not provided, HybridEP obtains the size from a GPU tensor,
+                which causes a D2H synchronization.
+            num_permuted_tokens (int): The number of tokens before unpermute. HybridEP uses this
+                to allocate buffers. If not provided, HybridEP obtains the size from a GPU tensor,
+                which causes a D2H synchronization.
+            pad_multiple (int):
+                The alignment multiple required for FP8 GEMM. If not provided, no padding
+                is performed.
+        '''
+        return HybridEPCombine.apply(
+            x, handle, num_dispatched_tokens, num_permuted_tokens, pad_multiple
+        )
+
+else:
+    hybrid_ep_dispatch = None
+    hybrid_ep_combine = None
diff --git a/megatron/core/transformer/moe/token_dispatcher.py b/megatron/core/transformer/moe/token_dispatcher.py
index 142aa74a19e..46f94ebe79a 100644
--- a/megatron/core/transformer/moe/token_dispatcher.py
+++ b/megatron/core/transformer/moe/token_dispatcher.py
@@ -20,6 +20,8 @@
 from megatron.core.transformer.moe.fused_a2a import (
     fused_combine,
     fused_dispatch,
+    hybrid_ep_combine,
+    hybrid_ep_dispatch,
     set_deepep_num_sms,
 )
 from megatron.core.transformer.moe.moe_utils import (
@@ -899,11 +901,6 @@ def combine(self, hidden_states: torch.Tensor) -> torch.Tensor:
         """Combine the hidden_states after expert processing."""
         pass
 
-    @abstractmethod
-    def get_dispached_metadata(self) -> torch.Tensor:
-        """Get the metadata of the dispatched hidden_states."""
-        pass
-
     @abstractmethod
     def get_permuted_hidden_states_by_experts(self, hidden_states: torch.Tensor) -> torch.Tensor:
         """Get the permuted hidden states by instances."""
@@ -915,6 +912,161 @@ def get_restored_hidden_states_by_experts(self, hidden_states: torch.Tensor) ->
         pass
 
 
+class _HybridEPManager(_DispatchManager):
+    """
+    A manager class to handle fused all-to-all communication processes for MoE models using
+    HybridEP backend. See https://github.com/deepseek-ai/DeepEP/tree/hybrid-ep for more details.
+
+    The workflow of the HybridEP dispatcher is:
+    (1) setup_metadata(): Process routing map and probabilities to prepare dispatch metadata
+    (2) dispatch():
+        - Permute tokens for communication, perform all-to-all communication,
+        and permute tokens for experts in single step
+    (3) combine():
+        - Unpermute tokens for communication, perform all-to-all communication,
+        and unpermute tokens for attention in single step
+    """
+
+    def __init__(
+        self,
+        group: torch.distributed.ProcessGroup,
+        num_local_experts: int,
+        num_experts: int,
+        config: TransformerConfig,
+    ):
+        """
+        Initialize the HybridEP dispatcher.
+
+        Args:
+            group (torch.distributed.ProcessGroup): The process group to use for communication.
+                This should be the ETPxEP group.
+            num_local_experts (int): The number of local experts.
+            num_experts (int): The total number of experts in the group.
+            config (TransformerConfig): The configuration for the transformer model.
+        """
+        self.group = group
+        self.num_local_experts = num_local_experts
+        self.num_experts = num_experts
+        self.config = config
+        self.permute_fusion = config.moe_permute_fusion
+        self.capacity_factor = config.moe_expert_capacity_factor
+        # Drop and pad the input to capacity.
+        self.drop_and_pad = self.config.moe_pad_expert_input_to_capacity
+        if self.drop_and_pad:
+            assert self.capacity_factor is not None
+        self.capacity = None
+        # The up-bound for the number of tokens after dispatch op, -1 means no up-bound,
+        # which will cause a CPU sync
+        self.num_dispatched_tokens = None
+        # Actually the sum of tokens_per_expert, the up-bound for the number of tokens
+        # after permute op, -1 means no up-bound, will cause a CPU sync
+        self.num_permuted_tokens = None
+
+        # Metadata
+        self.token_probs: Optional[torch.Tensor] = None
+        # Handle used for combine operation
+        self.handle = None
+        # Used for padding the output for each expert
+        self.pad_multiple = None
+
+        if hybrid_ep_dispatch is None:
+            raise ImportError(
+                "HybridEP is not installed. Please install HybridEP package from "
+                "https://github.com/deepseek-ai/DeepEP/tree/hybrid-ep."
+            )
+
+    def setup_metadata(self, routing_map: torch.Tensor, probs: torch.Tensor):
+        num_tokens = routing_map.shape[0]
+        self.routing_map = routing_map.reshape(num_tokens, self.num_experts)
+        self.token_probs = probs.reshape(num_tokens, self.num_experts)
+        # Compute the capacity for each expert at the drop_and_pad mode
+        if self.drop_and_pad:
+            num_out_tokens = num_tokens * self.config.moe_router_topk
+            # Drop and pad the input to capacity.
+            self.capacity = get_capacity(
+                num_tokens=num_out_tokens,
+                num_experts=self.num_experts,
+                capacity_factor=self.capacity_factor,
+            )
+            # We cannot predict the actual number of tokens after the dispatch op,
+            # so we set it to the worst case in drop_and_pad mode
+            self.num_dispatched_tokens = self.capacity * self.group.size() * self.num_local_experts
+            # In drop_and_pad mode, the number of tokens after the permute op
+            # can be computed on the CPU
+            self.num_permuted_tokens = self.num_dispatched_tokens
+            self.tokens_per_expert = torch.full(
+                (self.num_local_experts,), self.capacity * self.group.size(), dtype=torch.long
+            )
+
+    def dispatch(
+        self,
+        hidden_states: torch.Tensor,
+        async_finish: bool = True,
+        allocate_on_comm_stream: bool = True,
+    ) -> torch.Tensor:
+        # HybridEP only supports float32 probs
+        if self.token_probs.dtype != torch.float32:
+            if self.token_probs.dtype in [torch.bfloat16, torch.float16]:
+                logger.warning(
+                    "HybridEP only supports float32 probs, please set --moe-router-dtype=fp32"
+                )
+            self.token_probs = self.token_probs.float()  # downcast or upcast
+        if self.config.fp8:
+            self.pad_multiple = get_fp8_align_size(self.config.fp8_recipe)
+        dispatched_hidden, self.dispatched_probs, _, tokens_per_expert, self.handle = (
+            hybrid_ep_dispatch(
+                x=hidden_states,
+                routing_map=self.routing_map,
+                probs=self.token_probs,
+                group=self.group,
+                num_local_experts=self.num_local_experts,
+                num_sms_dispatch_api=self.config.moe_hybridep_num_sms,
+                num_sms_combine_api=self.config.moe_hybridep_num_sms,
+                num_dispatched_tokens=self.num_dispatched_tokens,
+                num_permuted_tokens=self.num_permuted_tokens,
+                pad_multiple=self.pad_multiple,
+            )
+        )
+
+        if not self.drop_and_pad:
+            self.tokens_per_expert = tokens_per_expert
+            # self.num_permuted_tokens is necessary to allocate the output tensor for permute
+            self.num_permuted_tokens = self.tokens_per_expert.sum()
+
+        return dispatched_hidden
+
+    def combine(
+        self,
+        hidden_states: torch.Tensor,
+        async_finish: bool = True,
+        allocate_on_comm_stream: bool = True,
+    ) -> torch.Tensor:
+        hidden_states = hybrid_ep_combine(
+            x=hidden_states,
+            handle=self.handle,
+            num_dispatched_tokens=self.num_dispatched_tokens,
+            num_permuted_tokens=self.num_permuted_tokens,
+            pad_multiple=self.pad_multiple,
+        )
+        # Release the used handle/num_permuted_tokens which could change in each iteration
+        self.handle = None
+        self.num_permuted_tokens = None
+        self.num_dispatched_tokens = None
+        return hidden_states
+
+    def get_permuted_hidden_states_by_experts(self, hidden_states: torch.Tensor) -> torch.Tensor:
+        return hidden_states, self.dispatched_probs
+
+    def get_restored_hidden_states_by_experts(self, hidden_states: torch.Tensor) -> torch.Tensor:
+        return hidden_states
+
+    def get_number_of_tokens_per_expert(self) -> torch.Tensor:
+        '''
+        Get the number of tokens per expert.
+        '''
+        return self.tokens_per_expert
+
+
 class _DeepepManager(_DispatchManager):
     """
     A manager class to handle fused all-to-all communication processes for MoE models using
@@ -1000,7 +1152,7 @@ def dispatch(
         # DeepEP only supports float32 probs
         if self.token_probs.dtype != torch.float32:
             if self.token_probs.dtype in [torch.bfloat16, torch.float16]:
-                logger.info(
+                logger.warning(
                     "DeepEP only supports float32 probs, please set --moe-router-dtype=fp32"
                 )
             self.token_probs = self.token_probs.float()  # downcast or upcast
@@ -1052,9 +1204,6 @@ def _indices_to_multihot(self, indices, probs):
         multihot_probs[row_indices, valid_indices] = probs[mask]
         return multihot_routing_map.bool(), multihot_probs
 
-    def get_dispached_metadata(self) -> torch.Tensor:
-        return self.dispatched_indices, self.dispatched_probs
-
     def get_number_of_tokens_per_expert(self) -> torch.Tensor:
         """
         Get the number of tokens per expert.
@@ -1183,19 +1332,27 @@ def __init__(
         self.num_local_experts = num_local_experts
         self.local_expert_indices = local_expert_indices
         assert self.tp_size * self.ep_size > 1, "Flex token dispatcher requires TPxEP > 1"
-        assert (
-            self.config.moe_enable_deepep
-        ), "DeepEP is not enabled. Please set --moe-enable-deepep to use DeepEP backend."
-        assert (
-            self.config.moe_pad_expert_input_to_capacity is False
-        ), "Flex token dispatcher does not support --moe-pad-expert-input-to-capacity"
-        self._comm_manager = _DeepepManager(
-            group=self.tp_ep_group,
-            num_local_experts=self.num_local_experts,
-            router_topk=self.tp_size * self.config.moe_router_topk,
-            num_experts=self.tp_size * self.config.num_moe_experts,
-            config=self.config,
-        )
+        if self.config.moe_flex_dispatcher_backend == "deepep":
+            self._comm_manager = _DeepepManager(
+                group=self.tp_ep_group,
+                num_local_experts=self.num_local_experts,
+                router_topk=self.tp_size * self.config.moe_router_topk,
+                num_experts=self.tp_size * self.config.num_moe_experts,
+                config=self.config,
+            )
+        elif self.config.moe_flex_dispatcher_backend == "hybridep":
+            self._comm_manager = _HybridEPManager(
+                group=self.tp_ep_group,
+                num_local_experts=self.num_local_experts,
+                num_experts=self.tp_size * self.config.num_moe_experts,
+                config=self.config,
+            )
+        else:
+            raise ValueError(
+                f"Invalid backend: {self.config.moe_flex_dispatcher_backend}"
+                "Please set --moe-flex-dispatcher-backend=deepep or "
+                "--moe-flex-dispatcher-backend=hybridep"
+            )
 
     def set_shared_experts(self, shared_experts):
         raise NotImplementedError(
diff --git a/megatron/core/transformer/transformer_config.py b/megatron/core/transformer/transformer_config.py
index 8b36425ca2a..a597470e6dc 100644
--- a/megatron/core/transformer/transformer_config.py
+++ b/megatron/core/transformer/transformer_config.py
@@ -215,6 +215,10 @@ class TransformerConfig(ModelParallelConfig):
     moe_deepep_num_sms: int = 20
     """Number of SMs to use for DeepEP."""
 
+    moe_hybridep_num_sms: int = 16
+    """Number of SMs to use for HybridEP. In pure NVL scenarios, 
+    16 SMs can generally achieve good bandwidth."""
+
     ####################
     # linear attention
     ####################
@@ -590,6 +594,11 @@ class TransformerConfig(ModelParallelConfig):
     moe_enable_deepep: bool = False
     """[Experimental] Enable DeepEP for efficient token dispatching and combine in MoE models."""
 
+    moe_flex_dispatcher_backend: str = "deepep"
+    """[Experimental] The backend to use for flex token dispatcher. The default is "deepep".
+    Options are "deepep" and "hybridep". Currently only "hybridep" backend supports 
+    the MNNVL case."""
+
     moe_per_layer_logging: bool = False
     """Enable per-layer logging for MoE, currently supports auxiliary loss and z loss."""
 
@@ -892,11 +901,18 @@ def __post_init__(self):
         if self.moe_enable_deepep:
             if self.moe_token_dispatcher_type != "flex":
                 raise ValueError("DeepEP backend is only supported with flex token dispatcher.")
+            logging.warning(
+                "moe_enable_deepep is deprecated."
+                "Please use --moe-flex-dispatcher-backend=deepep instead."
+            )
 
         if self.moe_token_dispatcher_type == "flex":
-            if self.moe_pad_expert_input_to_capacity:
+            if self.moe_pad_expert_input_to_capacity and (
+                self.moe_enable_deepep or self.moe_flex_dispatcher_backend == "deepep"
+            ):
                 raise ValueError(
-                    "Flex token dispatcher does not support moe_pad_expert_input_to_capacity"
+                    "Flex token dispatcher with deepep backend does not support "
+                    "moe_pad_expert_input_to_capacity"
                 )
 
         if self.moe_shared_expert_intermediate_size is not None:
diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py
index fa9a0f6d751..fe7add9bd21 100644
--- a/megatron/training/arguments.py
+++ b/megatron/training/arguments.py
@@ -3166,9 +3166,15 @@ def _add_moe_args(parser):
                        default='allgather',
                        help="The type of token dispatcher to use. The default is 'allgather'. Options are 'allgather', 'alltoall'. We recommend using 'alltoall' when applying expert parallelism. For more information, please refer to the documentation in core/moe/README.")
     group.add_argument('--moe-enable-deepep', action='store_true',
-                       help='[Experimental] Enable DeepSeek/DeepEP for efficient token dispatching and combine in MoE models. Only works with flex token dispatcher by setting --moe-token-dispatcher-type=flex.')
+                       help='DEPRECATED: Please use --moe-flex-dispatcher-backend=deepep instead.')
+    group.add_argument('--moe-flex-dispatcher-backend', type=str,
+                       choices=['deepep', 'hybridep'],
+                       default='deepep',
+                       help='The backend to use for flex token dispatcher. The default is "deepep". Options are "deepep" and "hybridep".')
     group.add_argument('--moe-deepep-num-sms', type=int, default=20,
                        help='Number of SMs to use for DeepEP.')
+    group.add_argument('--moe-hybridep-num-sms', type=int, default=16,
+                       help='Number of SMs to use for HybridEP.')
     group.add_argument('--moe-permute-fusion', action='store_true',
                        help='Fuse token rearrangement ops during token dispatching.')
     # Token dropping arguments
diff --git a/tests/unit_tests/a2a_overlap/test_schedule_chunk_1f1b.py b/tests/unit_tests/a2a_overlap/test_schedule_chunk_1f1b.py
index 2dd0f20fe2c..81e61a3404a 100644
--- a/tests/unit_tests/a2a_overlap/test_schedule_chunk_1f1b.py
+++ b/tests/unit_tests/a2a_overlap/test_schedule_chunk_1f1b.py
@@ -96,7 +96,7 @@ def test_1f1b_schedule_model_chunk(self, mtp_layers, dispatcher_type, fp8_flag,
         # create TransformerConfig
         extra_kwargs = {"moe_token_dispatcher_type": dispatcher_type}
         if dispatcher_type == "flex":
-            extra_kwargs["moe_enable_deepep"] = True
+            extra_kwargs["moe_flex_dispatcher_backend"] = "deepep"
             extra_kwargs["moe_router_dtype"] = "fp32"
         if fp8_flag is not None:
             extra_kwargs["fp8"] = fp8_flag[0]
diff --git a/tests/unit_tests/a2a_overlap/test_schedule_layer_1f1b.py b/tests/unit_tests/a2a_overlap/test_schedule_layer_1f1b.py
index 729a6e0f2f5..f39a10c5bf3 100644
--- a/tests/unit_tests/a2a_overlap/test_schedule_layer_1f1b.py
+++ b/tests/unit_tests/a2a_overlap/test_schedule_layer_1f1b.py
@@ -362,7 +362,7 @@ def test_transformer_layer_overlap(self, dispatcher_type, fp8_flag):
 
         extra_kwargs = {"moe_token_dispatcher_type": dispatcher_type}
         if dispatcher_type == "flex":
-            extra_kwargs["moe_enable_deepep"] = True
+            extra_kwargs["moe_flex_dispatcher_backend"] = "deepep"
             extra_kwargs["moe_router_dtype"] = "fp32"
         if fp8_flag is not None:
             extra_kwargs["fp8"] = fp8_flag[0]
@@ -415,7 +415,7 @@ def test_mtp_layer_overlap(self, dispatcher_type, fp8_flag):
             "mtp_loss_scaling_factor": 1.1,
         }
         if dispatcher_type == "flex":
-            extra_kwargs["moe_enable_deepep"] = True
+            extra_kwargs["moe_flex_dispatcher_backend"] = "deepep"
             extra_kwargs["moe_router_dtype"] = "fp32"
         if fp8_flag is not None:
             extra_kwargs["fp8_recipe"] = fp8_flag[1]
diff --git a/tests/unit_tests/transformer/moe/test_token_dispatcher.py b/tests/unit_tests/transformer/moe/test_token_dispatcher.py
index 82138bc637d..c2462ef73ad 100644
--- a/tests/unit_tests/transformer/moe/test_token_dispatcher.py
+++ b/tests/unit_tests/transformer/moe/test_token_dispatcher.py
@@ -91,7 +91,7 @@ def __init__(
             sequence_parallel=tp_size > 1,
             add_bias_linear=kwargs.get("add_bias_linear", False),
             moe_permute_fusion=kwargs.get("moe_permute_fusion", False),
-            moe_enable_deepep=kwargs.get("moe_enable_deepep", False),
+            moe_flex_dispatcher_backend=kwargs.get("moe_flex_dispatcher_backend", None),
         )
 
         # init moe layer
@@ -411,7 +411,16 @@ def is_deep_ep_available():
     return HAVE_DEEP_EP
 
 
-@pytest.mark.skipif(not is_deep_ep_available(), reason="Deep EP is not available")
+def is_hybrid_ep_available():
+    from megatron.core.transformer.moe.fused_a2a import HAVE_HYBRIDEP
+
+    return HAVE_HYBRIDEP
+
+
+@pytest.mark.skipif(
+    not is_deep_ep_available() and not is_hybrid_ep_available(),
+    reason="Deep EP and Hybrid EP are not available",
+)
 class TestFlexDispatcher:
     def setup_method(self, method):
         pass
@@ -421,9 +430,14 @@ def teardown_method(self, method):
 
     @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
     @pytest.mark.internal
-    @pytest.mark.parametrize("tp_size,ep_size", [(8, 1), (1, 8), (2, 4)])
+    @pytest.mark.parametrize("tp_size,ep_size", [(1, 8), (8, 1), (4, 2)])
     @pytest.mark.parametrize("permute_fusion", permute_fusion_params)
-    def test_forward_backward(self, tp_size, ep_size, permute_fusion):
+    @pytest.mark.parametrize("moe_flex_dispatcher_backend", ["deepep", "hybridep"])
+    def test_forward_backward(self, tp_size, ep_size, permute_fusion, moe_flex_dispatcher_backend):
+        if moe_flex_dispatcher_backend == "deepep" and not is_deep_ep_available():
+            pytest.skip("Deep EP is not available")
+        if moe_flex_dispatcher_backend == "hybridep" and not is_hybrid_ep_available():
+            pytest.skip("Hybrid EP is not available")
         if permute_fusion:
             config.ENABLE_EXPERIMENTAL = True
         container = MoEModelTestContainer(
@@ -435,8 +449,8 @@ def test_forward_backward(self, tp_size, ep_size, permute_fusion):
             moe_router_load_balancing_type="aux_loss",
             moe_token_dispatcher_type="flex",
             moe_permute_fusion=permute_fusion,
-            hidden_size=32,
-            moe_enable_deepep=True,
+            hidden_size=1024,
+            moe_flex_dispatcher_backend=moe_flex_dispatcher_backend,
             test_dtype=torch.bfloat16,
         )
         container.dispatcher_dropless_test()
@@ -448,7 +462,14 @@ def test_forward_backward(self, tp_size, ep_size, permute_fusion):
     @pytest.mark.timeout(120)
     @pytest.mark.parametrize("tp_size,ep_size", [(1, 8), (8, 1), (4, 2)])
     @pytest.mark.parametrize("permute_fusion", permute_fusion_params)
-    def test_capacity_forward_backward(self, tp_size, ep_size, permute_fusion):
+    @pytest.mark.parametrize("moe_flex_dispatcher_backend", ["deepep", "hybridep"])
+    def test_capacity_forward_backward(
+        self, tp_size, ep_size, permute_fusion, moe_flex_dispatcher_backend
+    ):
+        if moe_flex_dispatcher_backend == "deepep" and not is_deep_ep_available():
+            pytest.skip("Deep EP is not available")
+        if moe_flex_dispatcher_backend == "hybridep" and not is_hybrid_ep_available():
+            pytest.skip("Hybrid EP is not available")
         if permute_fusion:
             config.ENABLE_EXPERIMENTAL = True
         container = MoEModelTestContainer(
@@ -463,8 +484,8 @@ def test_capacity_forward_backward(self, tp_size, ep_size, permute_fusion):
             moe_expert_capacity_factor=0.5,
             moe_pad_expert_input_to_capacity=False,
             moe_permute_fusion=permute_fusion,
-            hidden_size=32,
-            moe_enable_deepep=True,
+            hidden_size=1024,
+            moe_flex_dispatcher_backend=moe_flex_dispatcher_backend,
             test_dtype=torch.bfloat16,
         )
         container.dispatcher_capacity_test()
@@ -478,7 +499,14 @@ def test_capacity_forward_backward(self, tp_size, ep_size, permute_fusion):
     @pytest.mark.timeout(120)
     @pytest.mark.parametrize("tp_size,ep_size", [(1, 8), (8, 1), (4, 2)])
     @pytest.mark.parametrize("permute_fusion", [True])
-    def test_router_padding_for_fp8_forward_backward(self, tp_size, ep_size, permute_fusion):
+    @pytest.mark.parametrize("moe_flex_dispatcher_backend", ["deepep", "hybridep"])
+    def test_router_padding_for_fp8_forward_backward(
+        self, tp_size, ep_size, permute_fusion, moe_flex_dispatcher_backend
+    ):
+        if moe_flex_dispatcher_backend == "deepep" and not is_deep_ep_available():
+            pytest.skip("Deep EP is not available")
+        if moe_flex_dispatcher_backend == "hybridep" and not is_hybrid_ep_available():
+            pytest.skip("Hybrid EP is not available")
         if permute_fusion:
             config.ENABLE_EXPERIMENTAL = True
         container = MoEModelTestContainer(
@@ -491,8 +519,8 @@ def test_router_padding_for_fp8_forward_backward(self, tp_size, ep_size, permute
             moe_token_dispatcher_type="flex",
             moe_pad_expert_input_to_capacity=False,
             moe_permute_fusion=permute_fusion,
-            hidden_size=32,
-            moe_enable_deepep=True,
+            hidden_size=1024,
+            moe_flex_dispatcher_backend=moe_flex_dispatcher_backend,
             test_dtype=torch.bfloat16,
         )
         container.dispatcher_router_padding_for_fp8_test()
diff --git a/tests/unit_tests/transformer/test_submodule_callables.py b/tests/unit_tests/transformer/test_submodule_callables.py
index d0f5ad12d3f..141982a17cf 100644
--- a/tests/unit_tests/transformer/test_submodule_callables.py
+++ b/tests/unit_tests/transformer/test_submodule_callables.py
@@ -137,7 +137,7 @@ def test_1f1b_overlap(self, dispatcher_type, grouped_gemm, permute_fusion):
             "moe_permute_fusion": permute_fusion,
         }
         if dispatcher_type == "flex":
-            extra_kwargs["moe_enable_deepep"] = True
+            extra_kwargs["moe_flex_dispatcher_backend"] = "deepep"
             extra_kwargs["moe_router_dtype"] = "fp32"
         config = get_test_config(extra_kwargs=extra_kwargs, moe_grouped_gemm=grouped_gemm)
         microbatches = 4

From 2e565067015c92f58c217c5a6c2cc54271ce3a2e Mon Sep 17 00:00:00 2001
From: Shifang Xu <shifangx@nvidia.com>
Date: Wed, 22 Oct 2025 05:09:18 -0700
Subject: [PATCH 034/334] ADLR/megatron-lm!4235 - [dev] Support multimodule
 communication

Co-authored-by: Mcore Bot <mcore-bot@nvidia.com>
---
 .../pipeline_parallel/bridge_communicator.py  |   3 -
 .../multimodule_communicator.py               | 523 ++++++++++++
 .../test_multimodule_communicator.py          | 780 ++++++++++++++++++
 3 files changed, 1303 insertions(+), 3 deletions(-)
 create mode 100644 megatron/core/pipeline_parallel/multimodule_communicator.py
 create mode 100644 tests/unit_tests/pipeline_parallel/test_multimodule_communicator.py

diff --git a/megatron/core/pipeline_parallel/bridge_communicator.py b/megatron/core/pipeline_parallel/bridge_communicator.py
index a67ded6bf08..f1e74a2f16d 100644
--- a/megatron/core/pipeline_parallel/bridge_communicator.py
+++ b/megatron/core/pipeline_parallel/bridge_communicator.py
@@ -628,9 +628,6 @@ def send_forward_recv_backward(
                 dist.broadcast(
                     shape_tensor, src=self.current_rank, group=self.src_grid_broadcast_pg
                 )
-                dist.broadcast(
-                    shape_tensor, src=self.current_rank, group=self.src_grid_broadcast_pg
-                )
 
                 # Broadcast the tensors to all ranks in the group
                 dist.broadcast(
diff --git a/megatron/core/pipeline_parallel/multimodule_communicator.py b/megatron/core/pipeline_parallel/multimodule_communicator.py
new file mode 100644
index 00000000000..dfda270ef76
--- /dev/null
+++ b/megatron/core/pipeline_parallel/multimodule_communicator.py
@@ -0,0 +1,523 @@
+# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+
+import logging
+from dataclasses import dataclass
+from typing import Dict, List, Optional, Union
+
+import torch
+import torch.distributed as dist
+
+from megatron.core.hyper_comm_grid import HyperCommGrid
+from megatron.core.model_parallel_config import ModelParallelConfig
+from megatron.core.pipeline_parallel.bridge_communicator import BridgeCommunicator
+from megatron.core.pipeline_parallel.p2p_communication import P2PCommunicator
+
+# Types
+Shape = Union[List[int], torch.Size]
+
+
+@dataclass
+class RankModuleInfo:
+    """Information about a rank in a module."""
+
+    # the stage of the current rank in the current module's pipeline.
+    pp_rank: int  # the stage of the current rank in the current module's pipeline
+    pp_size: int  # the number of ranks in the current module's pipeline
+    p2p_communicator: Optional[P2PCommunicator]
+    # key is either the src or dst module name connected to the current module
+    # one module may have multiple bridge communicators if it has multiple
+    # incoming or outgoing connections.
+    bridge_comms_as_src_module: Optional[List[BridgeCommunicator]]
+    bridge_comms_as_dest_module: Optional[List[BridgeCommunicator]]
+    # the absolute first stage in the overall model
+    # no incoming connections
+    is_source_stage: Optional[bool] = True
+    # the absolute last stage in the overall model
+    # no outgoing connections
+    is_terminal_stage: Optional[bool] = True
+
+
+class MultiModulePipelineCommunicator:
+    """Communicator for a multi-module pipeline."""
+
+    def __init__(
+        self,
+        module_to_grid_map: Dict[str, HyperCommGrid],
+        topology: Dict[str, List[str]],
+        config: ModelParallelConfig,
+        dim_mapping: Dict[str, List[int]] = None,
+    ):
+        """
+        Initialize the MultiModulePipelineCommunicator.
+
+        Args:
+            module_to_grid_map (dict): A dictionary mapping module names to HyperCommGrids.
+                Example:
+                    module_to_grid_map = {
+                        'image_encoder': image_encoder_grid,
+                        'audio_encoder': audio_encoder_grid,
+                        'llm': llm_grid,
+                        'generator': generator_grid
+                    }
+            topology (dict): A dictionary mapping module names to lists of outgoing modules.
+                Example:
+                    topology = {
+                        'image_encoder': ['llm'],
+                        'audio_encoder': ['llm'],
+                        'llm': ['generator'],
+                        'generator': []
+                    }
+            config (ModelParallelConfig): A ModelParallelConfig object.
+            dim_mapping (Dict[str, List[int]]): Dimension mapping for sequence, batch, hidden.
+                Example:
+                    dim_mapping = {'s': 0, 'h': 2, 'b': 1}
+                Default: None
+        """
+        self.module_to_grid_map = module_to_grid_map
+        self.topology = topology
+        self.config = config
+        self.dim_mapping = dim_mapping
+        self.current_rank = dist.get_rank()
+
+        # Build bridge communicators for all modules
+        self.bridge_comms = []
+        self._build_bridge_comms()
+
+        self.rank_module_map = {}
+        self._build_rank_module_info_map()
+
+    def _build_bridge_comms(self):
+        """Construct and store BridgeCommunicator objects that describe the outgoing
+        communication relationships for all of the modules.
+        """
+        for src_module_name, src_grid in self.module_to_grid_map.items():
+            for dest_module_name in self.topology[src_module_name]:
+                dest_grid = self.module_to_grid_map[dest_module_name]
+                bridge_comm = BridgeCommunicator(
+                    src_grid=src_grid,
+                    dest_grid=dest_grid,
+                    dim_mapping=self.dim_mapping,
+                    comm_dtype=self.config.pipeline_dtype,
+                    src_module_name=src_module_name,
+                    dest_module_name=dest_module_name,
+                )
+                self.bridge_comms.append(bridge_comm)
+
+    @property
+    def is_pp_first_stage(self):
+        """Return True if the current rank has the absolute first stage in the overall model.
+
+        The absolute first stage is defined as:
+        1. The current rank must be in the first PP stage (pp_rank == 0) of some module
+        2. That module must be a source module (no incoming connections in topology)
+        """
+        for module_name, rank_module_info in self.rank_module_map.items():
+            # Check if this rank is at the first PP stage of this module
+            if rank_module_info.pp_rank == 0:
+                # Check if this module is a source module (no incoming connections)
+                if self._is_source_module(module_name):
+                    return True
+        return False
+
+    @property
+    def is_pp_last_stage(self):
+        """Return True if the current rank has the absolute last stage in the overall model.
+
+        The absolute last stage is defined as:
+        1. The current rank must be in the last PP stage of some module
+        2. That module must be a sink module (no outgoing connections in topology)
+        """
+        for module_name, rank_module_info in self.rank_module_map.items():
+            # Check if this rank is at the last PP stage of this module
+            if rank_module_info.pp_rank == rank_module_info.pp_size - 1:
+                # Check if this module is a sink module (no outgoing connections)
+                if self._is_sink_module(module_name):
+                    return True
+        return False
+
+    def _is_source_module(self, module_name: str) -> bool:
+        """Check if a module is a source module (has no incoming connections)."""
+        # A module is a source if no other module lists it as a destination
+        for src_module, dest_modules in self.topology.items():
+            if module_name in dest_modules:
+                return False
+        return True
+
+    def _is_sink_module(self, module_name: str) -> bool:
+        """Check if a module is a sink module (has no outgoing connections)."""
+        return len(self.topology.get(module_name, [])) == 0
+
+    def is_current_rank_in_grid(self, grid: HyperCommGrid) -> bool:
+        """Check if the current rank is in the grid."""
+        return grid.rank_offset <= self.current_rank < grid.rank_offset + grid.size
+
+    @property
+    def num_warmup_microbatches(self):
+        """Calculate the number of warmup microbatches for the current rank.
+
+        Uses the same simple logic as P2PCommunicator:
+        total_pipeline_stages - current_rank_stage - 1
+
+        Returns:
+            int: Number of warmup microbatches for this rank
+        """
+        # Get total pipeline depth across all modules
+        total_stages = self.compute_total_pipeline_stages(self.topology, self.module_to_grid_map)
+
+        # Get current rank's position in the overall pipeline (0-indexed)
+        # Use compute_total_pipeline_stages with current rank to get cumulative position
+        if self.rank_module_map:
+            # Take the first module this rank belongs to
+            # TODO: ykarnati - improve this logic.
+            module_name = next(iter(self.rank_module_map.keys()))
+            current_stage = (
+                self.compute_total_pipeline_stages(
+                    self.topology,
+                    self.module_to_grid_map,
+                    rank=self.current_rank,
+                    module_name=module_name,
+                )
+                - 1
+            )  # Convert from 1-indexed to 0-indexed
+        else:
+            current_stage = 0
+
+        assert (
+            current_stage <= total_stages
+        ), f"current_stage: {current_stage} is greater than total_stages: {total_stages}"
+        logging.debug(
+            f"[Rank {dist.get_rank()} ][MultiModulePipelineCommunicator] "
+            f"current_stage: {current_stage} total_stages: {total_stages} "
+            f"num_warmup_microbatches: {total_stages - current_stage - 1}"
+        )
+        return total_stages - current_stage - 1
+
+    def _build_rank_module_info_map(self):
+        """For each module in the current rank, initialize the P2P communicator
+        and build the bridge communicator info for the module.
+        Each rank may hold multiple modules when colocated.
+        """
+        for module_name, module_grid in self.module_to_grid_map.items():
+            if self.is_current_rank_in_grid(module_grid):
+                # Initialize P2P communicator
+                pp_group = module_grid.get_pg('pp')
+                p2p_comm = P2PCommunicator(pp_group, self.config)
+                pp_size = dist.get_world_size(pp_group)
+                rank_in_pp_group = dist.get_group_rank(pp_group, self.current_rank)
+                pp_rank = rank_in_pp_group % pp_size
+
+                bridge_comms_as_dest_module = []
+                bridge_comms_as_src_module = []
+                # If first stage, check if the module has any incoming modules
+                # If so, initialize bridge communicator
+                if pp_rank == 0:
+                    for bridge_comm in self.bridge_comms:
+                        if (
+                            bridge_comm.is_current_rank_in_grid(bridge_comm.dest_grid)
+                            and bridge_comm.dest_module_name == module_name
+                        ):
+                            bridge_comms_as_dest_module.append(bridge_comm)
+                # If last stage, check if the module has any outgoing modules
+                # If so, initialize bridge communicator
+                if pp_rank == pp_size - 1:
+                    for bridge_comm in self.bridge_comms:
+                        if (
+                            bridge_comm.is_current_rank_in_grid(bridge_comm.src_grid)
+                            and bridge_comm.src_module_name == module_name
+                        ):
+                            bridge_comms_as_src_module.append(bridge_comm)
+                # Build RankModuleInfo for the module
+                rank_module_info = RankModuleInfo(
+                    pp_rank=pp_rank,
+                    pp_size=pp_size,
+                    p2p_communicator=p2p_comm,
+                    bridge_comms_as_dest_module=bridge_comms_as_dest_module,
+                    bridge_comms_as_src_module=bridge_comms_as_src_module,
+                )
+                self.rank_module_map[module_name] = rank_module_info
+
+    def recv_forward(
+        self, tensor_shape: Optional[Shape] = None, is_first_stage: bool = False
+    ) -> Dict[str, torch.Tensor]:
+        """Receive forward activation tensor.
+
+        Args:
+            tensor_shape: Expected activation tensor shape
+
+        Returns:
+            A dictionary mapping module names to tensors.
+        """
+        logging.debug(
+            f"[Rank {dist.get_rank()} ][MultiModulePipelineCommunicator] "
+            f"[receive_forward] tensors_shape: {tensor_shape}, is_first_stage: {is_first_stage}"
+        )
+        input_dict = {}
+        for module_name, rank_module_info in self.rank_module_map.items():
+
+            if rank_module_info.pp_rank == 0:
+                # If first stage, and has incoming modules, receive forward activation
+                # from incoming modules.
+                for bridge_comm in rank_module_info.bridge_comms_as_dest_module:
+                    input_dict[bridge_comm.src_module_name] = bridge_comm.recv_forward()
+            else:
+                # If not first stage, receive forward activation tensor from P2P communicator.
+                input_dict[module_name] = rank_module_info.p2p_communicator.recv_forward(
+                    tensor_shapes=tensor_shape, is_first_stage=False
+                )
+        return input_dict
+
+    def send_forward(self, output_dict: Dict[str, torch.Tensor], is_last_stage: bool = False):
+        """Send forward activation tensor.
+
+        Args:
+            output_dict: A dictionary mapping module names to tensors.
+        """
+        logging.debug(
+            f"[Rank {dist.get_rank()} ][MultiModulePipelineCommunicator] "
+            f"[send_forward] output_dict keys: {output_dict.keys()}, is_last_stage: {is_last_stage}"
+        )
+        for module_name, rank_module_info in self.rank_module_map.items():
+            if rank_module_info.pp_rank == rank_module_info.pp_size - 1:
+                # If last stage, and has outgoing modules, send forward activation
+                # by using bridge communicator.
+                for bridge_comm in rank_module_info.bridge_comms_as_src_module:
+                    bridge_comm.send_forward(output_dict[module_name])
+            else:
+                # If not last stage, send forward activation by using P2P communicator.
+                rank_module_info.p2p_communicator.send_forward(
+                    output_dict[module_name], is_last_stage=False
+                )
+
+    def send_forward_recv_backward(
+        self,
+        output_dict: Dict[str, torch.Tensor],
+        tensor_shape: Optional[Shape] = None,
+        is_last_stage: bool = False,
+    ) -> Dict[str, torch.Tensor]:
+        """Send forward activation tensor and receive backward activation tensor.
+
+        Args:
+            output_dict: A dictionary mapping module names to tensors.
+            tensor_shape: Expected gradient tensor shape
+
+        Returns:
+            A dictionary mapping module names to tensors.
+        """
+        logging.debug(
+            f"[Rank {dist.get_rank()} ][MultiModulePipelineCommunicator] "
+            f"[send_forward_recv_backward] output_dict keys: {output_dict.keys()}, "
+            f"tensor_shape: {tensor_shape}, is_last_stage: {is_last_stage}"
+        )
+        grad_dict = {}
+        for module_name, rank_module_info in self.rank_module_map.items():
+            if rank_module_info.pp_rank == rank_module_info.pp_size - 1:
+                # If last stage, and has outgoing modules, send forward activation and
+                # receive backward gradient by using bridge communicator.
+                for bridge_comm in rank_module_info.bridge_comms_as_src_module:
+                    grad_dict[bridge_comm.src_module_name] = bridge_comm.send_forward_recv_backward(
+                        output_dict[module_name]
+                    )
+            else:
+                # If not last stage, send forward activation and receive backward gradient
+                # by using P2P communicator.
+                grad_dict[module_name] = (
+                    rank_module_info.p2p_communicator.send_forward_recv_backward(
+                        output_dict[module_name], tensor_shapes=tensor_shape, is_last_stage=False
+                    )
+                )
+        return grad_dict
+
+    def send_backward_recv_forward(
+        self,
+        grad_dict: Dict[str, torch.Tensor],
+        tensor_shape: Optional[Shape] = None,
+        is_first_stage: bool = False,
+    ) -> Dict[str, torch.Tensor]:
+        """Send backward activation tensor and receive forward activation tensor.
+
+        Args:
+            grad_dict: A dictionary mapping module names to tensors.
+            tensor_shape: Expected gradient tensor shape
+
+        Returns:
+            A dictionary mapping module names to tensors.
+        """
+        logging.debug(
+            f"[Rank {dist.get_rank()} ][MultiModulePipelineCommunicator] "
+            f"[send_backward_recv_forward] grad_dict keys: {grad_dict.keys()}, "
+            f"tensor_shape: {tensor_shape}, is_first_stage: {is_first_stage}"
+        )
+        input_dict = {}
+        for module_name, rank_module_info in self.rank_module_map.items():
+            if rank_module_info.pp_rank == 0:
+                for bridge_comm in rank_module_info.bridge_comms_as_dest_module:
+                    # If first stage, and has incoming modules, send backward gradient and
+                    # receive forward activation by using bridge communicator.
+                    input_dict[bridge_comm.src_module_name] = (
+                        bridge_comm.send_backward_recv_forward(
+                            grad_dict[bridge_comm.src_module_name]
+                        )
+                    )
+            else:
+                # If not first stage, send backward gradient and receive forward activation
+                # by using P2P communicator.
+                input_dict[module_name] = (
+                    rank_module_info.p2p_communicator.send_backward_recv_forward(
+                        grad_dict[module_name], tensor_shapes=tensor_shape, is_first_stage=False
+                    )
+                )
+        return input_dict
+
+    def recv_backward(
+        self, tensor_shape: Optional[Shape] = None, is_last_stage: bool = False
+    ) -> Dict[str, torch.Tensor]:
+        """Receive backward activation tensor.
+
+        Args:
+            tensor_shape: Expected gradient tensor shape
+
+        Returns:
+            A dictionary mapping module names to tensors.
+        """
+        logging.debug(
+            f"[Rank {dist.get_rank()} ][MultiModulePipelineCommunicator] "
+            f"[recv_backward] tensor_shape: {tensor_shape}, is_last_stage: {is_last_stage}"
+        )
+        grad_dict = {}
+        for module_name, rank_module_info in self.rank_module_map.items():
+            if rank_module_info.pp_rank == rank_module_info.pp_size - 1:
+                # If last stage, and has incoming modules, receive backward gradient
+                # by using bridge communicator.
+                for bridge_comm in rank_module_info.bridge_comms_as_src_module:
+                    grad_dict[bridge_comm.src_module_name] = bridge_comm.recv_backward()
+            else:
+                # If not last stage, receive backward gradient by using P2P communicator.
+                grad_dict[module_name] = rank_module_info.p2p_communicator.recv_backward(
+                    tensor_shapes=tensor_shape, is_last_stage=False
+                )
+        return grad_dict
+
+    def send_backward(self, grad_dict: Dict[str, torch.Tensor], is_first_stage: bool = False):
+        """Send backward activation tensor.
+
+        Args:
+            grad_dict: A dictionary mapping module names to tensors.
+        """
+        logging.debug(
+            f"[Rank {dist.get_rank()} ][MultiModulePipelineCommunicator] "
+            f"[send_backward] grad_dict keys: {grad_dict.keys()}, is_first_stage: {is_first_stage}"
+        )
+        for module_name, rank_module_info in self.rank_module_map.items():
+            if rank_module_info.pp_rank == 0:
+                # If first stage, and has incoming modules, send backward activation
+                # by using bridge communicator.
+                for bridge_comm in rank_module_info.bridge_comms_as_dest_module:
+                    bridge_comm.send_backward(grad_dict[bridge_comm.src_module_name])
+            else:
+                # If not first stage, send backward activation by using P2P communicator.
+                rank_module_info.p2p_communicator.send_backward(
+                    grad_dict[module_name], is_first_stage=False
+                )
+
+    @staticmethod
+    def compute_total_pipeline_stages(
+        topology: Dict[str, List[str]],
+        module_to_grid_map: Dict[str, HyperCommGrid],
+        rank: Optional[int] = None,
+        module_name: Optional[str] = None,
+    ) -> int:
+        """Compute the total number of pipeline stages across a multi-module chain.
+
+        Interprets ``topology`` as a directed acyclic graph (DAG) where nodes are modules
+        and edges indicate forward data flow from source to destination modules. Each node
+        is assigned a weight equal to its pipeline parallel size (number of PP stages).
+
+        The total number of stages is defined as the length of the longest path in this DAG
+        under node weights.
+
+        If ``rank`` is None (default), returns the maximum over all terminal (sink) modules of
+        the sum of PP sizes along a path ending at that terminal. For example, given:
+
+            image_encoder ->\
+                              -> llm -> generator
+            audio_encoder  ->/
+
+        the total is: max(pp(image_encoder), pp(audio_encoder)) + pp(llm) + pp(generator).
+
+        If ``rank`` is provided, the result is the total number of pipeline stages up to (and
+        including) the PP stage that ``rank`` occupies inside its module. In this case, the
+        weight of the target module equals (pp_rank_index(rank) + 1) instead of the module's
+        full PP size; other modules still contribute their full PP sizes. If the rank belongs to
+        multiple modules (colocation), pass ``module_name`` to disambiguate; otherwise the
+        maximum across all candidate modules containing the rank is returned.
+
+        Args:
+            topology: Mapping from a module to its list of outgoing modules.
+            module_to_grid_map: Mapping from module name to its ``HyperCommGrid``.
+
+        Returns:
+            The total number of pipeline stages along the longest path given the constraints.
+
+        Raises:
+            ValueError: If the topology contains cycles; or has no terminal nodes when
+                ``rank`` is None
+        """
+        nodes = set(module_to_grid_map.keys())
+        # Build adjacency and reverse-adjacency (predecessors).
+        adj: Dict[str, List[str]] = {node: list(topology.get(node, [])) for node in nodes}
+        preds: Dict[str, List[str]] = {node: [] for node in nodes}
+        for src, outs in adj.items():
+            for dst in outs:
+                preds[dst].append(src)
+
+        # Identify terminal nodes (no outgoing edges) for the rank=None case.
+        sinks = [node for node, outs in adj.items() if not outs]
+        if rank is None and not sinks:
+            raise ValueError(
+                "Topology must be a DAG with at least one terminal (no outgoing) module."
+            )
+
+        def pp_size(name: str) -> int:
+            grid = module_to_grid_map[name]
+            pp_dim_index = grid.dim_names.index('pp')
+            return grid.shape[pp_dim_index]
+
+        def partial_weight_for_target(target: str) -> Optional[int]:
+            if rank is None:
+                return None
+            grid = module_to_grid_map.get(target)
+            rank_groups = grid._gen_rank_enum(['pp'])
+            stage_index: Optional[int] = None
+            for group in rank_groups:
+                if rank in group:
+                    stage_index = group.index(rank)
+                    break
+            return stage_index + 1
+
+        def longest_path_to(target: str) -> int:
+            visiting = set()
+            partial = partial_weight_for_target(target)
+
+            def weight(name: str) -> int:
+                if partial is not None and name == target:
+                    return partial
+                return pp_size(name)
+
+            def dfs(node: str) -> int:
+                if node in visiting:
+                    raise ValueError("Topology contains cycles; expected a DAG.")
+                visiting.add(node)
+                best = 0
+                for p in preds.get(node, []):
+                    val = dfs(p)
+                    if val > best:
+                        best = val
+                visiting.remove(node)
+                return weight(node) + best
+
+            return dfs(target)
+
+        if rank is None:
+            return max(longest_path_to(sink) for sink in sinks)
+
+        return longest_path_to(module_name)
diff --git a/tests/unit_tests/pipeline_parallel/test_multimodule_communicator.py b/tests/unit_tests/pipeline_parallel/test_multimodule_communicator.py
new file mode 100644
index 00000000000..73739859f42
--- /dev/null
+++ b/tests/unit_tests/pipeline_parallel/test_multimodule_communicator.py
@@ -0,0 +1,780 @@
+import logging
+import os
+import sys
+
+import pytest
+import torch
+import torch.distributed as dist
+from packaging import version
+
+from megatron.core import parallel_state
+from megatron.core.hyper_comm_grid import HyperCommGrid
+from megatron.core.model_parallel_config import ModelParallelConfig
+from megatron.core.pipeline_parallel.multimodule_communicator import MultiModulePipelineCommunicator
+from tests.unit_tests.pipeline_parallel.test_bridge_communicator import (
+    _avg_params,
+    _create_transformer_block,
+    _get_pg_collection_from_grid,
+    create_hypercomm_grid,
+    get_transformer_block_and_grid,
+)
+from tests.unit_tests.test_utilities import Utils
+
+
+class TestMultiModulePipelineCommunicator:
+
+    @classmethod
+    def setup_class(cls):
+        """Set up distributed environment for the entire test class."""
+        if not dist.is_initialized():
+            dist.init_process_group(backend="nccl")
+        if torch.cuda.is_available():
+            torch.cuda.set_device(int(os.environ["LOCAL_RANK"]))
+
+        world_size = dist.get_world_size()
+        if world_size != 8:
+            pytest.skip(
+                f"These tests require 8 GPUs, but only {world_size} are available.",
+                allow_module_level=True,
+            )
+
+    def teardown_class(cls):
+        Utils.destroy_model_parallel()
+
+    def test_multimodule_communicator_init(self):
+        """Test MultiModulePipelineCommunicator initialization."""
+
+        # Create process group grids for each module
+        image_encoder_grid = create_hypercomm_grid(offset=0, tp=1, cp=1, pp=1, dp=1)
+        audio_encoder_grid = create_hypercomm_grid(offset=1, tp=1, cp=1, pp=1, dp=1)
+        llm_grid = create_hypercomm_grid(offset=2, tp=2, cp=1, pp=2, dp=1)
+        generator_grid = create_hypercomm_grid(offset=6, tp=2, cp=1, pp=1, dp=1)
+
+        # Define module-grid mapping
+        module_to_grid_map = {
+            'image_encoder': image_encoder_grid,
+            'audio_encoder': audio_encoder_grid,
+            'llm': llm_grid,
+            'generator': generator_grid,
+        }
+        # Define module computation topology
+        topology = {
+            'image_encoder': ['llm'],
+            'audio_encoder': ['llm'],
+            'llm': ['generator'],
+            'generator': [],
+        }
+        config = ModelParallelConfig(bf16=True)
+        # Initialize communicator
+        mllm_comm = MultiModulePipelineCommunicator(module_to_grid_map, topology, config)
+        # Test attributes match expectations
+        assert mllm_comm.module_to_grid_map == module_to_grid_map
+        assert mllm_comm.topology == topology
+        assert mllm_comm.config == config
+        assert mllm_comm.current_rank == dist.get_rank()
+
+    def test_compute_total_pipeline_stages(self):
+        """Test compute_total_pipeline_stages for overall chain and until specific ranks."""
+
+        # Create process group grids for each module
+        image_encoder_grid = create_hypercomm_grid(offset=0, tp=1, cp=1, pp=1, dp=1)
+        audio_encoder_grid = create_hypercomm_grid(offset=1, tp=1, cp=1, pp=1, dp=1)
+        llm_grid = create_hypercomm_grid(offset=2, tp=2, cp=1, pp=2, dp=1)
+        generator_grid = create_hypercomm_grid(offset=6, tp=1, cp=1, pp=1, dp=2)
+
+        # Define module-grid mapping and topology
+        module_to_grid_map = {
+            'image_encoder': image_encoder_grid,
+            'audio_encoder': audio_encoder_grid,
+            'llm': llm_grid,
+            'generator': generator_grid,
+        }
+        topology = {
+            'image_encoder': ['llm'],
+            'audio_encoder': ['llm'],
+            'llm': ['generator'],
+            'generator': [],
+        }
+
+        # Overall total pipeline stages: max(1,1) + 2 + 1 = 4
+        total = MultiModulePipelineCommunicator.compute_total_pipeline_stages(
+            topology, module_to_grid_map
+        )
+        assert total == 4
+
+        llm_pp_rank = MultiModulePipelineCommunicator.compute_total_pipeline_stages(
+            topology, module_to_grid_map, rank=2, module_name='llm'
+        )
+        assert llm_pp_rank == 2
+
+    def test_send_forward_recv_forward(self):
+        """Test send_forward and recv_forward operations."""
+        if not dist.is_initialized():
+            pytest.skip("Distributed not initialized")
+
+        # Create process group grids for each module
+        image_encoder_grid = create_hypercomm_grid(offset=0, tp=1, cp=1, pp=1, dp=1)
+        audio_encoder_grid = create_hypercomm_grid(offset=1, tp=1, cp=1, pp=1, dp=1)
+        llm_grid = create_hypercomm_grid(offset=2, tp=2, cp=1, pp=2, dp=1)
+        generator_grid = create_hypercomm_grid(offset=6, tp=1, cp=1, pp=1, dp=2)
+
+        # Set up module-grid mapping and topology
+        module_to_grid_map = {
+            'image_encoder': image_encoder_grid,
+            'audio_encoder': audio_encoder_grid,
+            'llm': llm_grid,
+            'generator': generator_grid,
+        }
+        topology = {
+            'image_encoder': ['llm'],
+            'audio_encoder': ['llm'],
+            'llm': ['generator'],
+            'generator': [],
+        }
+        config = ModelParallelConfig(pipeline_dtype=torch.float)
+        mllm_comm = MultiModulePipelineCommunicator(module_to_grid_map, topology, config)
+
+        # Simulate forward communication for each module
+        if mllm_comm.is_current_rank_in_grid(image_encoder_grid):
+            # Image encoder sends output forward
+            output_dict = {'image_encoder': torch.randn(2, 8, 128).cuda()}
+            mllm_comm.send_forward(output_dict)
+        if mllm_comm.is_current_rank_in_grid(audio_encoder_grid):
+            # Audio encoder sends output forward
+            output_dict = {'audio_encoder': torch.randn(2, 16, 128).cuda()}
+            mllm_comm.send_forward(output_dict)
+        if mllm_comm.is_current_rank_in_grid(llm_grid):
+            output_dict = {'llm': torch.randn(2, 32, 128).cuda()}
+            if dist.get_rank() == 2 or dist.get_rank() == 3:
+                # LLM stage receives both image and audio outputs
+                input_dict = mllm_comm.recv_forward()
+                assert input_dict['image_encoder'].shape == (2, 8, 128)
+                assert input_dict['audio_encoder'].shape == (2, 16, 128)
+                mllm_comm.send_forward(output_dict)
+            else:
+                # LLM stage receives concatenated LLM outputs
+                input_dict = mllm_comm.recv_forward(tensor_shape=(2, 32, 128))
+                assert input_dict['llm'].shape == (2, 32, 128)
+                mllm_comm.send_forward(output_dict)
+        if mllm_comm.is_current_rank_in_grid(generator_grid):
+            # Generator module receives final LLM output
+            input_dict = mllm_comm.recv_forward()
+            assert input_dict['llm'].shape == (1, 32, 128)
+
+    def test_send_forward_recv_forward_with_different_pp_size(self):
+        """Test for the case when pp(image_encoder) != pp(audio_encoder)."""
+        if not dist.is_initialized():
+            pytest.skip("Distributed not initialized")
+
+        # Create process group grids for each module
+        image_encoder_grid = create_hypercomm_grid(offset=0, tp=1, cp=1, pp=2, dp=1)
+        audio_encoder_grid = create_hypercomm_grid(offset=2, tp=2, cp=1, pp=1, dp=1)
+        llm_grid = create_hypercomm_grid(offset=4, tp=1, cp=1, pp=4, dp=1)
+
+        # Set up module-grid mapping and topology
+        module_to_grid_map = {
+            'image_encoder': image_encoder_grid,
+            'audio_encoder': audio_encoder_grid,
+            'llm': llm_grid,
+        }
+        topology = {'image_encoder': ['llm'], 'audio_encoder': ['llm'], 'llm': []}
+        config = ModelParallelConfig(pipeline_dtype=torch.float)
+        mllm_comm = MultiModulePipelineCommunicator(module_to_grid_map, topology, config)
+
+        # Simulate forward communication for each module
+        if mllm_comm.is_current_rank_in_grid(image_encoder_grid):
+            output_dict = {'image_encoder': torch.randn(2, 8, 128).cuda()}
+            if dist.get_rank() == 0:
+                # Image encoder sends output forward
+                mllm_comm.send_forward(output_dict)
+            else:
+                # Image stage receives image outputs
+                input_dict = mllm_comm.recv_forward(tensor_shape=(2, 8, 128))
+                assert input_dict['image_encoder'].shape == (2, 8, 128)
+                mllm_comm.send_forward(output_dict)
+        if mllm_comm.is_current_rank_in_grid(audio_encoder_grid):
+            # Audio encoder sends output forward
+            output_dict = {'audio_encoder': torch.randn(2, 16, 128).cuda()}
+            mllm_comm.send_forward(output_dict)
+        if mllm_comm.is_current_rank_in_grid(llm_grid):
+            output_dict = {'llm': torch.randn(2, 32, 128).cuda()}
+            if dist.get_rank() == 4:
+                # LLM stage receives both image and audio outputs
+                input_dict = mllm_comm.recv_forward()
+                assert input_dict['image_encoder'].shape == (2, 8, 128)
+                assert input_dict['audio_encoder'].shape == (2, 16, 128)
+                mllm_comm.send_forward(output_dict)
+            elif dist.get_rank() == 5 or dist.get_rank() == 6:
+                # LLM stage receives concatenated LLM outputs
+                input_dict = mllm_comm.recv_forward(tensor_shape=(2, 32, 128))
+                assert input_dict['llm'].shape == (2, 32, 128)
+                mllm_comm.send_forward(output_dict)
+            elif dist.get_rank() == 7:
+                # LLM stage receives concatenated LLM outputs
+                input_dict = mllm_comm.recv_forward(tensor_shape=(2, 32, 128))
+                assert input_dict['llm'].shape == (2, 32, 128)
+
+    def test_send_backward_recv_backward(self):
+        """Test send_backward and recv_backward operations."""
+        if not dist.is_initialized():
+            pytest.skip("Distributed not initialized")
+
+        # Create process group grids for each module
+        image_encoder_grid = create_hypercomm_grid(offset=0, tp=1, cp=1, pp=1, dp=1)
+        audio_encoder_grid = create_hypercomm_grid(offset=1, tp=1, cp=1, pp=1, dp=1)
+        llm_grid = create_hypercomm_grid(offset=2, tp=2, cp=1, pp=2, dp=1)
+        generator_grid = create_hypercomm_grid(offset=6, tp=1, cp=1, pp=1, dp=2)
+
+        # Set up module-grid mapping and topology
+        module_to_grid_map = {
+            'image_encoder': image_encoder_grid,
+            'audio_encoder': audio_encoder_grid,
+            'llm': llm_grid,
+            'generator': generator_grid,
+        }
+        topology = {
+            'image_encoder': ['llm'],
+            'audio_encoder': ['llm'],
+            'llm': ['generator'],
+            'generator': [],
+        }
+        config = ModelParallelConfig(pipeline_dtype=torch.float)
+        mllm_comm = MultiModulePipelineCommunicator(module_to_grid_map, topology, config)
+
+        # Simulate backward communication for each module
+        if mllm_comm.is_current_rank_in_grid(generator_grid):
+            # Generator sends gradient backward
+            grad_dict = {'llm': torch.randn(1, 32, 128).cuda()}
+            mllm_comm.send_backward(grad_dict)
+        if mllm_comm.is_current_rank_in_grid(llm_grid):
+            if dist.get_rank() == 4 or dist.get_rank() == 5:
+                # LLM receives expanded gradient and sends backward
+                received_grad = mllm_comm.recv_backward()
+                assert received_grad['llm'].shape == (2, 32, 128)
+                grad_dict = {'llm': torch.randn(2, 32, 128).cuda()}
+                mllm_comm.send_backward(grad_dict)
+            else:
+                # LLM receives gradient and sends backward to both image/audio encoders
+                received_grad = mllm_comm.recv_backward(tensor_shape=(2, 32, 128))
+                assert received_grad['llm'].shape == (2, 32, 128)
+                grad_dict = {
+                    'image_encoder': torch.randn(2, 8, 128).cuda(),
+                    'audio_encoder': torch.randn(2, 16, 128).cuda(),
+                }
+                mllm_comm.send_backward(grad_dict)
+        if mllm_comm.is_current_rank_in_grid(image_encoder_grid):
+            # Image encoder receives its gradient
+            received_grad = mllm_comm.recv_backward()
+            assert received_grad['image_encoder'].shape == (2, 8, 128)
+        if mllm_comm.is_current_rank_in_grid(audio_encoder_grid):
+            # Audio encoder receives its gradient
+            received_grad = mllm_comm.recv_backward()
+            assert received_grad['audio_encoder'].shape == (2, 16, 128)
+
+    @pytest.mark.skipif(
+        version.parse(torch.__version__) < version.parse('2.3.0'),
+        reason="Feature requires PyTorch 2.3 or later",
+    )
+    def test_send_forward_recv_backward_send_backward_recv_forward(self):
+        """Test send_forward_recv_backward and send_backward_recv_forward operations."""
+        if not dist.is_initialized():
+            pytest.skip("Distributed not initialized")
+
+        # Create process group grids for each module
+        image_encoder_grid = create_hypercomm_grid(offset=0, tp=1, cp=1, pp=1, dp=1)
+        audio_encoder_grid = create_hypercomm_grid(offset=1, tp=1, cp=1, pp=1, dp=1)
+        llm_grid = create_hypercomm_grid(offset=2, tp=2, cp=1, pp=2, dp=1)
+        generator_grid = create_hypercomm_grid(offset=6, tp=1, cp=1, pp=1, dp=2)
+
+        # Set up module-grid mapping and topology
+        module_to_grid_map = {
+            'image_encoder': image_encoder_grid,
+            'audio_encoder': audio_encoder_grid,
+            'llm': llm_grid,
+            'generator': generator_grid,
+        }
+        topology = {
+            'image_encoder': ['llm'],
+            'audio_encoder': ['llm'],
+            'llm': ['generator'],
+            'generator': [],
+        }
+        config = ModelParallelConfig(pipeline_dtype=torch.float)
+        mllm_comm = MultiModulePipelineCommunicator(module_to_grid_map, topology, config)
+
+        # Simulate bidirectional send/recv for forward and backward in pipeline
+
+        # Encoder stages send forward to the first stage of LLM, and receive backward from the first stage of LLM
+        if mllm_comm.is_current_rank_in_grid(image_encoder_grid):
+            output_dict = {'image_encoder': torch.randn(2, 8, 128).cuda()}
+            received_grad = mllm_comm.send_forward_recv_backward(output_dict)
+            assert received_grad['image_encoder'].shape == (2, 8, 128)
+        if mllm_comm.is_current_rank_in_grid(audio_encoder_grid):
+            output_dict = {'audio_encoder': torch.randn(2, 16, 128).cuda()}
+            received_grad = mllm_comm.send_forward_recv_backward(output_dict)
+            assert received_grad['audio_encoder'].shape == (2, 16, 128)
+        if mllm_comm.is_current_rank_in_grid(llm_grid):
+            if dist.get_rank() == 2 or dist.get_rank() == 3:
+                grad_dict = {
+                    'image_encoder': torch.randn(2, 8, 128).cuda(),
+                    'audio_encoder': torch.randn(2, 16, 128).cuda(),
+                }
+                input_dict = mllm_comm.send_backward_recv_forward(grad_dict)
+                assert input_dict['image_encoder'].shape == (2, 8, 128)
+                assert input_dict['audio_encoder'].shape == (2, 16, 128)
+
+        # First stage of LLM sends forward to the second stage of LLM, and receive backward from the second stage of LLM
+        if mllm_comm.is_current_rank_in_grid(llm_grid):
+            if dist.get_rank() == 2 or dist.get_rank() == 3:
+                output_dict = {'llm': torch.randn(2, 32, 128).cuda()}
+                received_grad = mllm_comm.send_forward_recv_backward(
+                    output_dict, tensor_shape=(2, 32, 128)
+                )
+                assert received_grad['llm'].shape == (2, 32, 128)
+            if dist.get_rank() == 4 or dist.get_rank() == 5:
+                grad_dict = {'llm': torch.randn(2, 32, 128).cuda()}
+                input_dict = mllm_comm.send_backward_recv_forward(
+                    grad_dict, tensor_shape=(2, 32, 128)
+                )
+                assert input_dict['llm'].shape == (2, 32, 128)
+
+        # Second stage of LLM sends forward to generator, and receive backward from generator
+        if mllm_comm.is_current_rank_in_grid(llm_grid):
+            if dist.get_rank() == 4 or dist.get_rank() == 5:
+                output_dict = {'llm': torch.randn(2, 32, 128).cuda()}
+                received_grad = mllm_comm.send_forward_recv_backward(output_dict)
+                assert received_grad['llm'].shape == (2, 32, 128)
+        if mllm_comm.is_current_rank_in_grid(generator_grid):
+            grad_dict = {'llm': torch.randn(1, 32, 128).cuda()}
+            input_dict = mllm_comm.send_backward_recv_forward(grad_dict)
+            assert input_dict['llm'].shape == (1, 32, 128)
+
+    @pytest.mark.skipif(
+        version.parse(torch.__version__) < version.parse('2.3.0'),
+        reason="Feature requires PyTorch 2.3 or later",
+    )
+    def test_send_forward_recv_forward_with_transformer_blocks(self):
+        """Test send_forward and recv_forward operations."""
+
+        # Set model/test dimensions for easier debugging and output comparison
+        hidden_size = 16
+        sequence_length = 2
+        micro_batch_size = 2
+
+        # For reproducibility, set a fixed seed
+        torch.manual_seed(12345)
+        dtype = torch.float32
+
+        # Create random input hidden states tensor
+        hidden_states = torch.randn(
+            (sequence_length, micro_batch_size, hidden_size), device="cuda"
+        ).to(dtype)
+        current_rank = dist.get_rank()
+
+        # ========== Initialize tensor model-parallel environment ==========
+        parallel_state_tp = 2
+        Utils.initialize_model_parallel(tensor_model_parallel_size=2)
+
+        # ========== Build reference 1D grid and transformer block for weight sharing ==========
+        ref_grid = create_hypercomm_grid(offset=0, tp=1, cp=1, pp=1, dp=8)
+        ref_pg_collection = _get_pg_collection_from_grid(ref_grid)
+        ref_block = _create_transformer_block(
+            dtype=dtype, hidden_size=hidden_size, pg_collection=ref_pg_collection
+        )
+        _avg_params(
+            ref_block, ref_grid.get_pg("dp")
+        )  # Ensure parameters are averaged across data parallel (DP)
+
+        # ========== Create different transformer blocks for each model stage ==========
+        # Image encoder
+        image_encoder_block, image_encoder_grid = get_transformer_block_and_grid(
+            ref_block,
+            tp_size=1,
+            cp_size=1,
+            pp_size=1,
+            dp_size=1,
+            grid_offset=0,
+            hidden_size=hidden_size,
+            dtype=dtype,
+        )
+        # Audio encoder
+        audio_encoder_block, audio_encoder_grid = get_transformer_block_and_grid(
+            ref_block,
+            tp_size=1,
+            cp_size=1,
+            pp_size=1,
+            dp_size=1,
+            grid_offset=1,
+            hidden_size=hidden_size,
+            dtype=dtype,
+        )
+        # LLM (Large Language Model) block with tensor & pipeline parallelism
+        llm_block, llm_grid = get_transformer_block_and_grid(
+            ref_block,
+            tp_size=2,
+            cp_size=1,
+            pp_size=2,
+            dp_size=1,
+            grid_offset=2,
+            hidden_size=hidden_size,
+            dtype=dtype,
+        )
+        # Generator block (final stage) with DP=2
+        generator_block, generator_grid = get_transformer_block_and_grid(
+            ref_block,
+            tp_size=1,
+            cp_size=1,
+            pp_size=1,
+            dp_size=2,
+            grid_offset=6,
+            hidden_size=hidden_size,
+            dtype=dtype,
+        )
+
+        # ========== Define module-to-grid correspondence and pipeline topology ==========
+        module_to_grid_map = {
+            'image_encoder': image_encoder_grid,
+            'audio_encoder': audio_encoder_grid,
+            'llm': llm_grid,
+            'generator': generator_grid,
+        }
+        topology = {
+            'image_encoder': ['llm'],  # image_encoder sends output to llm
+            'audio_encoder': ['llm'],  # audio_encoder sends output to llm
+            'llm': ['generator'],  # llm sends output to generator
+            'generator': [],  # generator is the final module
+        }
+        config = ModelParallelConfig(pipeline_dtype=torch.float)
+        # Define dimension mapping for sequence, batch, hidden
+        dim_mapping = {'s': 0, 'h': 2, 'b': 1}
+        seq_dim = dim_mapping['s']
+
+        # Communication handler for multi-module pipeline (send/recv abstraction)
+        mllm_comm = MultiModulePipelineCommunicator(
+            module_to_grid_map, topology, config, dim_mapping=dim_mapping
+        )
+
+        # ========== Run actual distributed pipeline blocks (per process, depending on role) ==========
+        if mllm_comm.is_current_rank_in_grid(image_encoder_grid):
+            # Image encoder rank: run forward and send output
+            image_encoder_output = image_encoder_block(
+                hidden_states=hidden_states, attention_mask=None
+            )
+            output_dict = {'image_encoder': image_encoder_output}
+            mllm_comm.send_forward(output_dict)
+        if mllm_comm.is_current_rank_in_grid(audio_encoder_grid):
+            # Audio encoder rank: run forward and send output
+            audio_encoder_output = audio_encoder_block(
+                hidden_states=hidden_states, attention_mask=None
+            )
+            output_dict = {'audio_encoder': audio_encoder_output}
+            mllm_comm.send_forward(output_dict)
+        if mllm_comm.is_current_rank_in_grid(llm_grid):
+            if dist.get_rank() == 2 or dist.get_rank() == 3:
+                # LLM stage 0 (receives both image and audio, concatenates along seq_dim)
+                input_dict = mllm_comm.recv_forward()
+                llm_output = llm_block(
+                    hidden_states=torch.cat(
+                        [input_dict['image_encoder'], input_dict['audio_encoder']], dim=seq_dim
+                    ),
+                    attention_mask=None,
+                )
+                output_dict = {'llm': llm_output}
+                mllm_comm.send_forward(output_dict)
+            else:
+                # LLM stage 1 (receives output of previous LLM stage)
+                input_dict = mllm_comm.recv_forward(
+                    tensor_shape=(sequence_length * 2, micro_batch_size, hidden_size)
+                )
+                llm_output = llm_block(hidden_states=input_dict['llm'], attention_mask=None)
+                output_dict = {'llm': llm_output}
+                mllm_comm.send_forward(output_dict)
+
+        if mllm_comm.is_current_rank_in_grid(generator_grid):
+            # Generator block: only receives from llm and runs forward
+            input_dict = mllm_comm.recv_forward()
+            generator_output = generator_block(hidden_states=input_dict['llm'], attention_mask=None)
+
+        # ========== Build a reference (serial/global) pipeline for correctness checking ==========
+        global_image_encoder_block, _ = get_transformer_block_and_grid(
+            ref_block,
+            tp_size=parallel_state_tp,
+            use_global_parallel_state=True,
+            hidden_size=hidden_size,
+            dtype=dtype,
+        )
+        global_audio_encoder_block, _ = get_transformer_block_and_grid(
+            ref_block,
+            tp_size=parallel_state_tp,
+            use_global_parallel_state=True,
+            hidden_size=hidden_size,
+            dtype=dtype,
+        )
+        global_llm_block_pp_rank_0, _ = get_transformer_block_and_grid(
+            ref_block,
+            tp_size=parallel_state_tp,
+            use_global_parallel_state=True,
+            hidden_size=hidden_size,
+            dtype=dtype,
+        )
+        global_llm_block_pp_rank_1, _ = get_transformer_block_and_grid(
+            ref_block,
+            tp_size=parallel_state_tp,
+            use_global_parallel_state=True,
+            hidden_size=hidden_size,
+            dtype=dtype,
+        )
+        global_generator_block, _ = get_transformer_block_and_grid(
+            ref_block,
+            tp_size=parallel_state_tp,
+            use_global_parallel_state=True,
+            hidden_size=hidden_size,
+            dtype=dtype,
+        )
+
+        # Run each stage sequentially as a global pipeline (for truth)
+        global_image_encoder_output = global_image_encoder_block(
+            hidden_states=hidden_states, attention_mask=None
+        )
+        global_audio_encoder_output = global_audio_encoder_block(
+            hidden_states=hidden_states, attention_mask=None
+        )
+        # Compare output between global and distributed blocks for image/audio stage
+        if current_rank == 0:
+            torch.testing.assert_close(
+                global_image_encoder_output, image_encoder_output, rtol=1e-3, atol=1e-3
+            )
+        if current_rank == 1:
+            torch.testing.assert_close(
+                global_audio_encoder_output, audio_encoder_output, rtol=1e-3, atol=1e-3
+            )
+
+        # Feed outputs to LLM stages (emulate pipeline cut with concatenation)
+        global_llm_input = torch.cat(
+            [global_image_encoder_output, global_audio_encoder_output], dim=seq_dim
+        )
+        global_llm_pp_rank_0_output = global_llm_block_pp_rank_0(
+            hidden_states=global_llm_input, attention_mask=None
+        )
+        if current_rank == 2 or current_rank == 3:
+            torch.testing.assert_close(
+                global_llm_pp_rank_0_output, llm_output, rtol=1e-3, atol=1e-3
+            )
+        global_llm_pp_rank_1_output = global_llm_block_pp_rank_1(
+            hidden_states=global_llm_pp_rank_0_output, attention_mask=None
+        )
+        if current_rank == 4 or current_rank == 5:
+            torch.testing.assert_close(
+                global_llm_pp_rank_1_output, llm_output, rtol=1e-3, atol=1e-3
+            )
+
+        # Generator output and comparison to distributed output (for each DP chunk)
+        global_generator_block_output = global_generator_block(
+            hidden_states=global_llm_pp_rank_1_output, attention_mask=None
+        )
+        global_generator_block_chunks = torch.split(
+            global_generator_block_output, global_generator_block_output.shape[1] // 2, dim=1
+        )
+        if current_rank == 6:
+            torch.testing.assert_close(
+                global_generator_block_chunks[0], generator_output, rtol=1e-3, atol=1e-3
+            )
+        if current_rank == 7:
+            torch.testing.assert_close(
+                global_generator_block_chunks[1], generator_output, rtol=1e-3, atol=1e-3
+            )
+
+    @pytest.mark.skipif(
+        version.parse(torch.__version__) < version.parse('2.3.0'),
+        reason="Feature requires PyTorch 2.3 or later",
+    )
+    @pytest.mark.parametrize(
+        "grid1_tp, grid1_pp, grid1_dp, grid2_tp, grid2_pp, grid2_dp, parallel_state_tp",
+        [
+            (2, 1, 1, 2, 1, 1, 2),  # TP2PP1DP1 to TP2PP1DP1
+            (2, 1, 1, 2, 2, 1, 2),  # TP2PP1DP1 to TP2PP2DP1
+            (2, 2, 1, 2, 2, 1, 2),  # TP2PP2DP1 to TP2PP2DP1
+            (4, 1, 1, 4, 1, 1, 4),  # TP4DP1 to TP4DP1
+            (2, 1, 2, 4, 1, 1, 2),  # TP2DP2 to TP4DP1
+            (4, 1, 1, 2, 1, 2, 2),  # TP4DP1 to TP2DP2
+            (2, 1, 2, 1, 1, 4, 2),  # TP2DP2 to TP1DP4
+        ],
+    )
+    def test_send_forward_recv_forward_with_transformer_blocks_and_different_parallelisms(
+        self, grid1_tp, grid1_pp, grid1_dp, grid2_tp, grid2_pp, grid2_dp, parallel_state_tp
+    ):
+        """Test bridge communicator with two transformer blocks having different process group configurations."""
+        # Model and input configuration
+        hidden_size = 16
+        sequence_length = 2
+        micro_batch_size = 8
+        torch.manual_seed(12345)
+        dtype = torch.float32
+
+        # Create random input tensor on CUDA
+        hidden_states = torch.randn(
+            (sequence_length, micro_batch_size, hidden_size), device="cuda"
+        ).to(dtype)
+        hidden_states_ref = hidden_states.clone()
+        current_rank = dist.get_rank()
+
+        # Initialize model parallel with desired TP
+        Utils.initialize_model_parallel(tensor_model_parallel_size=parallel_state_tp)
+
+        # Build a reference grid and block for parameter sharing & DP averaging
+        ref_grid = create_hypercomm_grid(offset=0, tp=1, cp=1, pp=1, dp=8)
+        ref_pg_collection = _get_pg_collection_from_grid(ref_grid)
+        ref_block = _create_transformer_block(
+            dtype=dtype, hidden_size=hidden_size, pg_collection=ref_pg_collection
+        )
+        _avg_params(
+            ref_block, ref_grid.get_pg("dp")
+        )  # Synchronize parameters across DP for reproducibility
+
+        # ====== Create two transformer block+grid pairs with different TP/DP settings ======
+        block_grid_1, grid_1 = get_transformer_block_and_grid(
+            ref_block,
+            tp_size=grid1_tp,
+            pp_size=grid1_pp,
+            dp_size=grid1_dp,
+            grid_offset=0,
+            hidden_size=hidden_size,
+            dtype=dtype,
+        )
+
+        block_grid_2, grid_2 = get_transformer_block_and_grid(
+            ref_block,
+            tp_size=grid2_tp,
+            pp_size=grid2_pp,
+            dp_size=grid2_dp,
+            grid_offset=grid_1.size,
+            hidden_size=hidden_size,
+            dtype=dtype,
+        )
+
+        dist.barrier()  # Synchronize ranks before communication
+
+        # Module-grid map and pipeline communication topology
+        module_to_grid_map = {'image_encoder': grid_1, 'llm': grid_2}
+        topology = {
+            'image_encoder': ['llm'],  # image_encoder sends forward results to llm
+            'llm': [],  # llm is the last stage here
+        }
+        config = ModelParallelConfig(pipeline_dtype=torch.float)
+        mllm_comm = MultiModulePipelineCommunicator(
+            module_to_grid_map, topology, config, dim_mapping={'s': 0, 'h': 2, 'b': 1}
+        )
+
+        output_grid_2 = None
+        # If current rank is in the first grid, run first block and send output
+        if grid_1 is not None and mllm_comm.is_current_rank_in_grid(grid_1):
+            rank_module_info = mllm_comm.rank_module_map['image_encoder']
+            if rank_module_info.pp_rank == 0:
+                hidden_states = block_grid_1(hidden_states=hidden_states, attention_mask=None)
+                mllm_comm.send_forward({'image_encoder': hidden_states})
+            else:
+                input_dict = mllm_comm.recv_forward(
+                    tensor_shape=(sequence_length, micro_batch_size, hidden_size)
+                )
+                hidden_states = input_dict['image_encoder']
+                hidden_states = block_grid_1(hidden_states=hidden_states, attention_mask=None)
+                mllm_comm.send_forward({'image_encoder': hidden_states})
+
+        # If current rank is in second grid, receive and run the second block
+        if grid_2 is not None and mllm_comm.is_current_rank_in_grid(grid_2):
+            rank_module_info = mllm_comm.rank_module_map['llm']
+            if rank_module_info.pp_rank == 0:
+                input_dict = mllm_comm.recv_forward()
+                hidden_states = input_dict['image_encoder']
+                hidden_states = block_grid_2(hidden_states=hidden_states, attention_mask=None)
+                if rank_module_info.pp_rank == rank_module_info.pp_size - 1:
+                    output_grid_2 = hidden_states
+                else:
+                    mllm_comm.send_forward({'llm': hidden_states})
+            elif rank_module_info.pp_rank < rank_module_info.pp_size - 1:
+                input_dict = mllm_comm.recv_forward(
+                    tensor_shape=(
+                        sequence_length,
+                        (grid1_dp * micro_batch_size) // grid2_dp,
+                        hidden_size,
+                    )
+                )
+                hidden_states = input_dict['llm']
+                hidden_states = block_grid_2(hidden_states=hidden_states, attention_mask=None)
+                mllm_comm.send_forward({'llm': hidden_states})
+            else:
+                input_dict = mllm_comm.recv_forward(
+                    tensor_shape=(
+                        sequence_length,
+                        (grid1_dp * micro_batch_size) // grid2_dp,
+                        hidden_size,
+                    )
+                )
+                hidden_states = input_dict['llm']
+                output_grid_2 = block_grid_2(hidden_states=hidden_states, attention_mask=None)
+
+                # Compute expected output shape based on change in DP size (chunk/expand batch dimension appropriately)
+                factor = max(grid1_dp, grid2_dp) // min(grid1_dp, grid2_dp)
+                expected_output_shape = (
+                    sequence_length,
+                    (
+                        micro_batch_size * factor
+                        if grid1_dp > grid2_dp
+                        else micro_batch_size // factor
+                    ),
+                    hidden_size,
+                )
+                assert (
+                    output_grid_2.shape == expected_output_shape
+                ), f"Output2 shape mismatch: {output_grid_2.shape}"
+
+        # ====== Reference: global (replicated) pipeline forward for correctness checking ======
+        global_block_1, _ = get_transformer_block_and_grid(
+            ref_block,
+            tp_size=parallel_state_tp,
+            use_global_parallel_state=True,
+            hidden_size=hidden_size,
+            dtype=dtype,
+        )
+        global_block_2, _ = get_transformer_block_and_grid(
+            ref_block,
+            tp_size=parallel_state_tp,
+            use_global_parallel_state=True,
+            hidden_size=hidden_size,
+            dtype=dtype,
+        )
+
+        for i in range(grid1_pp):
+            hidden_states_ref = global_block_1(hidden_states=hidden_states_ref, attention_mask=None)
+
+        for i in range(grid2_pp):
+            hidden_states_ref = global_block_2(hidden_states=hidden_states_ref, attention_mask=None)
+
+        # Output comparison under different DP compositions between grids
+        if (
+            grid_2 is not None
+            and mllm_comm.is_current_rank_in_grid(grid_2)
+            and rank_module_info.pp_rank == rank_module_info.pp_size - 1
+        ):
+            if grid1_dp == grid2_dp:
+                # DP size matches: all outputs directly compared
+                torch.testing.assert_close(hidden_states_ref, output_grid_2, rtol=1e-3, atol=1e-3)
+            elif grid1_dp < grid2_dp:
+                # If grid2 expands DP: each output_grid_2 chunk corresponds to a split of the reference output
+                grid2_dp_ranks = grid_2._gen_rank_enum([x for x in grid_2.dim_names if x != "dp"])
+                global_block_2_chunks = torch.split(
+                    hidden_states_ref, hidden_states_ref.shape[1] // (grid2_dp // grid1_dp), dim=1
+                )
+                relevant_chunk = None
+                for i, dp_ranks in enumerate(grid2_dp_ranks):
+                    if current_rank in dp_ranks:
+                        relevant_chunk = global_block_2_chunks[i % len(global_block_2_chunks)]
+                torch.testing.assert_close(relevant_chunk, output_grid_2, rtol=1e-3, atol=1e-3)
+            else:
+                # If DP shrinks (grid1_dp > grid2_dp): just compare the relevant first chunk
+                output_grid_2_first_chunk = torch.chunk(output_grid_2, grid1_dp // grid2_dp, dim=1)[
+                    0
+                ]
+                torch.testing.assert_close(
+                    hidden_states_ref, output_grid_2_first_chunk, rtol=1e-3, atol=1e-3
+                )

From 97ef777c4277eb4d8ad4b2e2f0a8513c5e08caaa Mon Sep 17 00:00:00 2001
From: Zijie Yan <zijiey@nvidia.com>
Date: Thu, 23 Oct 2025 02:26:12 +0000
Subject: [PATCH 035/334] chore: Update golden values.

---
 docker/Dockerfile.ci.dev                      |  10 +-
 .../golden_values_dev_dgxh100_coreweave.json  | 600 +++++++++---------
 .../golden_values_dev_dgxh100_eos.json        | 600 +++++++++---------
 uv.lock                                       | 595 +++++++++--------
 4 files changed, 896 insertions(+), 909 deletions(-)

diff --git a/docker/Dockerfile.ci.dev b/docker/Dockerfile.ci.dev
index caa2b9e1b86..f5da7afada9 100644
--- a/docker/Dockerfile.ci.dev
+++ b/docker/Dockerfile.ci.dev
@@ -61,11 +61,11 @@ RUN bash -ex <<"EOF"
         ln -s libnvshmem_host.so.3 libnvshmem_host.so
     popd
 
-    git clone --branch hybrid-ep https://github.com/deepseek-ai/DeepEP.git
-    cd DeepEP 
-    git checkout 3f601f7ac1c062c46502646ff04c535013bfca00
-    TORCH_CUDA_ARCH_LIST="9.0" uv pip install --no-build-isolation -v .
-    cd ..
+    git clone --branch v1.2.1 https://github.com/deepseek-ai/DeepEP.git
+    pushd DeepEP
+        patch -p1 < /workspace/deepep.patch
+    popd
+    TORCH_CUDA_ARCH_LIST="9.0 10.0 12.0" uv pip install --no-build-isolation -v DeepEP/.
     rm -rf DeepEP
 EOF
 
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/golden_values_dev_dgxh100_coreweave.json
index cdd69820131..0af1bff480e 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/golden_values_dev_dgxh100_coreweave.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/golden_values_dev_dgxh100_coreweave.json
@@ -4,56 +4,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 11.04747,
-            "2": 11.03489,
-            "3": 9.59197,
-            "4": 9.2607,
-            "5": 9.25316,
-            "6": 9.70587,
-            "7": 9.46635,
-            "8": 9.01114,
-            "9": 8.72173,
-            "10": 9.06704,
-            "11": 8.59397,
-            "12": 8.5643,
-            "13": 8.44846,
-            "14": 7.97921,
-            "15": 8.04905,
-            "16": 8.09886,
-            "17": 8.04172,
-            "18": 7.76126,
-            "19": 8.14014,
-            "20": 7.86027,
-            "21": 7.54995,
-            "22": 7.53872,
-            "23": 7.40693,
-            "24": 7.40435,
-            "25": 7.66065,
-            "26": 7.05772,
-            "27": 7.59552,
-            "28": 7.30627,
-            "29": 7.48007,
-            "30": 7.63012,
-            "31": 7.38325,
-            "32": 7.57843,
-            "33": 7.62828,
-            "34": 7.68919,
-            "35": 7.20168,
-            "36": 7.07506,
-            "37": 7.41935,
-            "38": 7.17961,
-            "39": 7.54005,
-            "40": 7.53821,
-            "41": 7.47888,
-            "42": 7.24055,
-            "43": 7.2256,
-            "44": 7.40803,
-            "45": 7.1775,
-            "46": 6.88877,
-            "47": 7.29436,
-            "48": 7.13581,
-            "49": 7.58407,
-            "50": 7.02865
+            "1": 11.04624,
+            "2": 11.03476,
+            "3": 9.59903,
+            "4": 9.26301,
+            "5": 9.36373,
+            "6": 9.59608,
+            "7": 9.45214,
+            "8": 8.95198,
+            "9": 8.65952,
+            "10": 9.17778,
+            "11": 9.21306,
+            "12": 8.68184,
+            "13": 8.6038,
+            "14": 8.01576,
+            "15": 8.13595,
+            "16": 8.20124,
+            "17": 8.13602,
+            "18": 7.83369,
+            "19": 8.22974,
+            "20": 7.9452,
+            "21": 7.62338,
+            "22": 7.60791,
+            "23": 7.48374,
+            "24": 7.46559,
+            "25": 7.71274,
+            "26": 7.12081,
+            "27": 7.64626,
+            "28": 7.35234,
+            "29": 7.52084,
+            "30": 7.67784,
+            "31": 7.42246,
+            "32": 7.6137,
+            "33": 7.66159,
+            "34": 7.72817,
+            "35": 7.23134,
+            "36": 7.10612,
+            "37": 7.44953,
+            "38": 7.20946,
+            "39": 7.57073,
+            "40": 7.56124,
+            "41": 7.51119,
+            "42": 7.27048,
+            "43": 7.25633,
+            "44": 7.43634,
+            "45": 7.21132,
+            "46": 6.91913,
+            "47": 7.32211,
+            "48": 7.16551,
+            "49": 7.6155,
+            "50": 7.05648
         }
     },
     "num-zeros": {
@@ -61,56 +61,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 38802648.0,
-            "2": 38543564.0,
-            "3": 38740428.0,
-            "4": 264349216.0,
-            "5": 224711328.0,
-            "6": 359592256.0,
-            "7": 683584064.0,
-            "8": 850747136.0,
-            "9": 781151872.0,
-            "10": 863934336.0,
-            "11": 784956928.0,
-            "12": 787741824.0,
-            "13": 906642432.0,
-            "14": 793413952.0,
-            "15": 724351360.0,
-            "16": 929182656.0,
-            "17": 728944832.0,
-            "18": 715233856.0,
-            "19": 894586752.0,
-            "20": 942182208.0,
-            "21": 712310464.0,
-            "22": 903670336.0,
-            "23": 882199552.0,
-            "24": 867334400.0,
-            "25": 874751488.0,
-            "26": 844191104.0,
-            "27": 813243648.0,
-            "28": 626785920.0,
-            "29": 808773120.0,
-            "30": 602759296.0,
-            "31": 793783168.0,
-            "32": 768613888.0,
-            "33": 721639040.0,
-            "34": 734472448.0,
-            "35": 734570880.0,
-            "36": 703058560.0,
-            "37": 692109824.0,
-            "38": 649260992.0,
-            "39": 620422656.0,
-            "40": 604143616.0,
-            "41": 598320448.0,
-            "42": 573424384.0,
-            "43": 576846912.0,
-            "44": 570038144.0,
-            "45": 540081024.0,
-            "46": 501251008.0,
-            "47": 497637664.0,
-            "48": 494691072.0,
-            "49": 490977312.0,
-            "50": 463542304.0
+            "1": 38802568,
+            "2": 38543544,
+            "3": 41886704,
+            "4": 264367872,
+            "5": 224737792,
+            "6": 302994528,
+            "7": 645808768,
+            "8": 775291136,
+            "9": 765475328,
+            "10": 675259904,
+            "11": 615098624,
+            "12": 702764352,
+            "13": 934951360,
+            "14": 1060699008,
+            "15": 802967296,
+            "16": 1026771392,
+            "17": 756706880,
+            "18": 715253696,
+            "19": 929126208,
+            "20": 875969472,
+            "21": 665188032,
+            "22": 903854976,
+            "23": 747044352,
+            "24": 920777856,
+            "25": 733230528,
+            "26": 863183104,
+            "27": 879318336,
+            "28": 916219136,
+            "29": 909384256,
+            "30": 879622720,
+            "31": 866425152,
+            "32": 819074560,
+            "33": 589493056,
+            "34": 772011648,
+            "35": 778655488,
+            "36": 759651584,
+            "37": 761302144,
+            "38": 463804224,
+            "39": 543038400,
+            "40": 497278720,
+            "41": 658241792,
+            "42": 661600512,
+            "43": 495713632,
+            "44": 673788672,
+            "45": 470873536,
+            "46": 614455040,
+            "47": 554219584,
+            "48": 570200064,
+            "49": 557109312,
+            "50": 347212736
         }
     },
     "mem-allocated-bytes": {
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 7321331200.0,
-            "2": 7321333248.0,
-            "3": 7321333248.0,
-            "4": 7321333248.0,
-            "5": 7321333248.0,
-            "6": 7321333248.0,
-            "7": 7321333248.0,
-            "8": 7321333248.0,
-            "9": 7321333248.0,
-            "10": 7321333248.0,
-            "11": 7321333248.0,
-            "12": 7321333248.0,
-            "13": 7321333248.0,
-            "14": 7321333248.0,
-            "15": 7321333248.0,
-            "16": 7321333248.0,
-            "17": 7321333248.0,
-            "18": 7321333248.0,
-            "19": 7321333248.0,
-            "20": 7321333248.0,
-            "21": 7321333248.0,
-            "22": 7321333248.0,
-            "23": 7321333248.0,
-            "24": 7321333248.0,
-            "25": 7321333248.0,
-            "26": 7321333248.0,
-            "27": 7321333248.0,
-            "28": 7321333248.0,
-            "29": 7321333248.0,
-            "30": 7321333248.0,
-            "31": 7321333248.0,
-            "32": 7321333248.0,
-            "33": 7321333248.0,
-            "34": 7321333248.0,
-            "35": 7321333248.0,
-            "36": 7321333248.0,
-            "37": 7321333248.0,
-            "38": 7321333248.0,
-            "39": 7321333248.0,
-            "40": 7321333248.0,
-            "41": 7321333248.0,
-            "42": 7321333248.0,
-            "43": 7321333248.0,
-            "44": 7321333248.0,
-            "45": 7321333248.0,
-            "46": 7321333248.0,
-            "47": 7321333248.0,
-            "48": 7321333248.0,
-            "49": 7321333248.0,
-            "50": 7321333248.0
+            "1": 7321308672,
+            "2": 7321310720,
+            "3": 7321310720,
+            "4": 7321310720,
+            "5": 7321310720,
+            "6": 7321310720,
+            "7": 7321310720,
+            "8": 7321310720,
+            "9": 7321310720,
+            "10": 7321310720,
+            "11": 7321310720,
+            "12": 7321310720,
+            "13": 7321310720,
+            "14": 7321310720,
+            "15": 7321310720,
+            "16": 7321310720,
+            "17": 7321310720,
+            "18": 7321310720,
+            "19": 7321310720,
+            "20": 7321310720,
+            "21": 7321310720,
+            "22": 7321310720,
+            "23": 7321310720,
+            "24": 7321310720,
+            "25": 7321310720,
+            "26": 7321310720,
+            "27": 7321310720,
+            "28": 7321310720,
+            "29": 7321310720,
+            "30": 7321310720,
+            "31": 7321310720,
+            "32": 7321310720,
+            "33": 7321310720,
+            "34": 7321310720,
+            "35": 7321310720,
+            "36": 7321310720,
+            "37": 7321310720,
+            "38": 7321310720,
+            "39": 7321310720,
+            "40": 7321310720,
+            "41": 7321310720,
+            "42": 7321310720,
+            "43": 7321310720,
+            "44": 7321310720,
+            "45": 7321310720,
+            "46": 7321310720,
+            "47": 7321310720,
+            "48": 7321310720,
+            "49": 7321310720,
+            "50": 7321310720
         }
     },
     "mem-max-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 22198937600.0,
-            "2": 24950007808.0,
-            "3": 24950007808.0,
-            "4": 24950007808.0,
-            "5": 24950007808.0,
-            "6": 24950007808.0,
-            "7": 24950007808.0,
-            "8": 24950007808.0,
-            "9": 24950007808.0,
-            "10": 24950007808.0,
-            "11": 24950007808.0,
-            "12": 24950007808.0,
-            "13": 24950007808.0,
-            "14": 24950007808.0,
-            "15": 24950007808.0,
-            "16": 24950007808.0,
-            "17": 24950007808.0,
-            "18": 24950007808.0,
-            "19": 24950007808.0,
-            "20": 24950007808.0,
-            "21": 24950007808.0,
-            "22": 24950007808.0,
-            "23": 24950007808.0,
-            "24": 24950007808.0,
-            "25": 24950007808.0,
-            "26": 24950007808.0,
-            "27": 25072799744.0,
-            "28": 25343600640.0,
-            "29": 25625788416.0,
-            "30": 25625788416.0,
-            "31": 25628155904.0,
-            "32": 25707937792.0,
-            "33": 25707937792.0,
-            "34": 25707937792.0,
-            "35": 25707937792.0,
-            "36": 25707937792.0,
-            "37": 25707937792.0,
-            "38": 25707937792.0,
-            "39": 25707937792.0,
-            "40": 25707937792.0,
-            "41": 25707937792.0,
-            "42": 25707937792.0,
-            "43": 25707937792.0,
-            "44": 25707937792.0,
-            "45": 25707937792.0,
-            "46": 25707937792.0,
-            "47": 25707937792.0,
-            "48": 25707937792.0,
-            "49": 25707937792.0,
-            "50": 25707937792.0
+            "1": 54396813312,
+            "2": 57149165568,
+            "3": 57165475840,
+            "4": 57165475840,
+            "5": 57165475840,
+            "6": 57165475840,
+            "7": 57165475840,
+            "8": 57165475840,
+            "9": 57165475840,
+            "10": 57165475840,
+            "11": 57165475840,
+            "12": 57165475840,
+            "13": 57165475840,
+            "14": 57165475840,
+            "15": 57165475840,
+            "16": 57165475840,
+            "17": 57165475840,
+            "18": 57165475840,
+            "19": 57165475840,
+            "20": 57165475840,
+            "21": 57165475840,
+            "22": 57165475840,
+            "23": 57165475840,
+            "24": 57165475840,
+            "25": 57165475840,
+            "26": 57165475840,
+            "27": 57165475840,
+            "28": 57165475840,
+            "29": 57165475840,
+            "30": 57165475840,
+            "31": 57165475840,
+            "32": 57165475840,
+            "33": 57165475840,
+            "34": 57165475840,
+            "35": 57165475840,
+            "36": 57165475840,
+            "37": 57165475840,
+            "38": 57165475840,
+            "39": 57165475840,
+            "40": 57295986688,
+            "41": 57295986688,
+            "42": 57331482624,
+            "43": 57360437248,
+            "44": 57561960448,
+            "45": 57561960448,
+            "46": 57561960448,
+            "47": 57585307648,
+            "48": 57602347008,
+            "49": 57823961088,
+            "50": 57823961088
         }
     },
     "mtp_1 loss": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 11.07742,
-            "2": 11.07559,
-            "3": 10.5272,
-            "4": 10.08877,
-            "5": 9.81119,
-            "6": 9.88673,
-            "7": 9.70278,
-            "8": 8.9944,
-            "9": 8.79002,
-            "10": 9.07171,
-            "11": 8.44594,
-            "12": 8.50226,
-            "13": 8.40983,
-            "14": 7.83955,
-            "15": 7.97902,
-            "16": 8.03361,
-            "17": 7.99642,
-            "18": 7.71928,
-            "19": 8.10116,
-            "20": 7.82113,
-            "21": 7.51112,
-            "22": 7.48906,
-            "23": 7.35335,
-            "24": 7.35884,
-            "25": 7.60836,
-            "26": 7.01391,
-            "27": 7.54721,
-            "28": 7.25644,
-            "29": 7.43129,
-            "30": 7.57524,
-            "31": 7.321,
-            "32": 7.50218,
-            "33": 7.56009,
-            "34": 7.62505,
-            "35": 7.14234,
-            "36": 7.0092,
-            "37": 7.34655,
-            "38": 7.11926,
-            "39": 7.4822,
-            "40": 7.46808,
-            "41": 7.41272,
-            "42": 7.1698,
-            "43": 7.15213,
-            "44": 7.33728,
-            "45": 7.11437,
-            "46": 6.81846,
-            "47": 7.2282,
-            "48": 7.07339,
-            "49": 7.50345,
-            "50": 6.96783
+            "1": 11.07779,
+            "2": 11.07564,
+            "3": 10.52904,
+            "4": 10.08924,
+            "5": 9.81101,
+            "6": 9.88786,
+            "7": 9.72987,
+            "8": 9.02044,
+            "9": 8.8145,
+            "10": 9.09362,
+            "11": 8.77612,
+            "12": 8.56714,
+            "13": 8.54777,
+            "14": 8.04338,
+            "15": 8.10946,
+            "16": 8.13231,
+            "17": 8.0853,
+            "18": 7.83475,
+            "19": 8.21923,
+            "20": 7.91097,
+            "21": 7.58489,
+            "22": 7.56231,
+            "23": 7.44204,
+            "24": 7.44303,
+            "25": 7.67594,
+            "26": 7.07138,
+            "27": 7.60696,
+            "28": 7.30925,
+            "29": 7.48219,
+            "30": 7.62699,
+            "31": 7.3655,
+            "32": 7.54203,
+            "33": 7.60199,
+            "34": 7.66716,
+            "35": 7.18385,
+            "36": 7.05252,
+            "37": 7.38377,
+            "38": 7.15521,
+            "39": 7.51639,
+            "40": 7.4929,
+            "41": 7.44762,
+            "42": 7.20298,
+            "43": 7.18681,
+            "44": 7.36683,
+            "45": 7.15506,
+            "46": 6.85064,
+            "47": 7.26072,
+            "48": 7.10489,
+            "49": 7.53477,
+            "50": 6.99715
         }
     },
     "iteration-time": {
@@ -289,56 +289,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 71.2429,
-            "2": 1.39205,
-            "3": 1.3521,
-            "4": 1.31895,
-            "5": 0.86745,
-            "6": 0.86249,
-            "7": 1.0949,
-            "8": 1.03022,
-            "9": 0.80778,
-            "10": 0.82011,
-            "11": 0.81426,
-            "12": 0.8098,
-            "13": 0.81209,
-            "14": 0.81361,
-            "15": 0.80969,
-            "16": 0.81315,
-            "17": 0.85127,
-            "18": 0.80813,
-            "19": 0.81928,
-            "20": 0.81012,
-            "21": 0.8101,
-            "22": 0.81064,
-            "23": 0.80537,
-            "24": 0.81149,
-            "25": 0.81261,
-            "26": 0.81877,
-            "27": 0.80314,
-            "28": 0.80383,
-            "29": 0.83563,
-            "30": 0.80254,
-            "31": 0.80006,
-            "32": 0.80658,
-            "33": 0.81426,
-            "34": 0.81824,
-            "35": 0.81124,
-            "36": 0.80978,
-            "37": 0.80679,
-            "38": 0.80838,
-            "39": 0.81028,
-            "40": 0.81044,
-            "41": 0.81268,
-            "42": 0.81318,
-            "43": 0.79311,
-            "44": 0.80471,
-            "45": 0.80526,
-            "46": 0.79795,
-            "47": 0.80592,
-            "48": 0.80158,
-            "49": 0.80635,
-            "50": 0.79969
+            "1": 98.46571,
+            "2": 1.63304,
+            "3": 1.32772,
+            "4": 1.63453,
+            "5": 1.11673,
+            "6": 1.14377,
+            "7": 1.33213,
+            "8": 1.32699,
+            "9": 1.07499,
+            "10": 1.12938,
+            "11": 1.07438,
+            "12": 1.11078,
+            "13": 1.06958,
+            "14": 1.08718,
+            "15": 1.10547,
+            "16": 1.07557,
+            "17": 1.08606,
+            "18": 1.0832,
+            "19": 1.08226,
+            "20": 1.126,
+            "21": 1.08645,
+            "22": 1.07978,
+            "23": 1.07859,
+            "24": 1.08221,
+            "25": 1.08192,
+            "26": 1.09185,
+            "27": 1.0923,
+            "28": 1.09562,
+            "29": 1.10486,
+            "30": 1.10038,
+            "31": 1.09094,
+            "32": 1.08693,
+            "33": 1.0883,
+            "34": 1.08169,
+            "35": 1.08611,
+            "36": 1.07758,
+            "37": 1.07933,
+            "38": 1.08289,
+            "39": 1.07885,
+            "40": 1.08075,
+            "41": 1.0781,
+            "42": 1.08028,
+            "43": 1.08035,
+            "44": 1.08973,
+            "45": 1.08944,
+            "46": 1.07483,
+            "47": 1.08306,
+            "48": 1.07701,
+            "49": 1.0768,
+            "50": 1.07022
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/golden_values_dev_dgxh100_eos.json
index d4aa4cb5ee9..585139e83c9 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/golden_values_dev_dgxh100_eos.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/golden_values_dev_dgxh100_eos.json
@@ -4,56 +4,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 11.04747,
-            "2": 11.03489,
-            "3": 9.59197,
-            "4": 9.2607,
-            "5": 9.25316,
-            "6": 9.70587,
-            "7": 9.46635,
-            "8": 9.01114,
-            "9": 8.72173,
-            "10": 9.06704,
-            "11": 8.59397,
-            "12": 8.5643,
-            "13": 8.44846,
-            "14": 7.97921,
-            "15": 8.04905,
-            "16": 8.09886,
-            "17": 8.04172,
-            "18": 7.76126,
-            "19": 8.14014,
-            "20": 7.86027,
-            "21": 7.54995,
-            "22": 7.53872,
-            "23": 7.40693,
-            "24": 7.40435,
-            "25": 7.66065,
-            "26": 7.05772,
-            "27": 7.59552,
-            "28": 7.30627,
-            "29": 7.48007,
-            "30": 7.63012,
-            "31": 7.38325,
-            "32": 7.57843,
-            "33": 7.62828,
-            "34": 7.68919,
-            "35": 7.20168,
-            "36": 7.07506,
-            "37": 7.41935,
-            "38": 7.17961,
-            "39": 7.54005,
-            "40": 7.53821,
-            "41": 7.47888,
-            "42": 7.24055,
-            "43": 7.2256,
-            "44": 7.40803,
-            "45": 7.1775,
-            "46": 6.88877,
-            "47": 7.29436,
-            "48": 7.13581,
-            "49": 7.58407,
-            "50": 7.02865
+            "1": 11.04624,
+            "2": 11.03476,
+            "3": 9.59903,
+            "4": 9.26301,
+            "5": 9.36373,
+            "6": 9.59608,
+            "7": 9.45214,
+            "8": 8.95198,
+            "9": 8.65952,
+            "10": 9.17778,
+            "11": 9.21306,
+            "12": 8.68184,
+            "13": 8.6038,
+            "14": 8.01576,
+            "15": 8.13595,
+            "16": 8.20124,
+            "17": 8.13602,
+            "18": 7.83369,
+            "19": 8.22974,
+            "20": 7.9452,
+            "21": 7.62338,
+            "22": 7.60791,
+            "23": 7.48374,
+            "24": 7.46559,
+            "25": 7.71274,
+            "26": 7.12081,
+            "27": 7.64626,
+            "28": 7.35234,
+            "29": 7.52084,
+            "30": 7.67784,
+            "31": 7.42246,
+            "32": 7.6137,
+            "33": 7.66159,
+            "34": 7.72817,
+            "35": 7.23134,
+            "36": 7.10612,
+            "37": 7.44953,
+            "38": 7.20946,
+            "39": 7.57073,
+            "40": 7.56124,
+            "41": 7.51119,
+            "42": 7.27048,
+            "43": 7.25633,
+            "44": 7.43634,
+            "45": 7.21132,
+            "46": 6.91913,
+            "47": 7.32211,
+            "48": 7.16551,
+            "49": 7.6155,
+            "50": 7.05648
         }
     },
     "num-zeros": {
@@ -61,56 +61,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 38802648.0,
-            "2": 38543564.0,
-            "3": 38740428.0,
-            "4": 264349216.0,
-            "5": 224711328.0,
-            "6": 359592256.0,
-            "7": 683584064.0,
-            "8": 850747136.0,
-            "9": 781151872.0,
-            "10": 863934336.0,
-            "11": 784956928.0,
-            "12": 787741824.0,
-            "13": 906642432.0,
-            "14": 793413952.0,
-            "15": 724351360.0,
-            "16": 929182656.0,
-            "17": 728944832.0,
-            "18": 715233856.0,
-            "19": 894586752.0,
-            "20": 942182208.0,
-            "21": 712310464.0,
-            "22": 903670336.0,
-            "23": 882199552.0,
-            "24": 867334400.0,
-            "25": 874751488.0,
-            "26": 844191104.0,
-            "27": 813243648.0,
-            "28": 626785920.0,
-            "29": 808773120.0,
-            "30": 602759296.0,
-            "31": 793783168.0,
-            "32": 768613888.0,
-            "33": 721639040.0,
-            "34": 734472448.0,
-            "35": 734570880.0,
-            "36": 703058560.0,
-            "37": 692109824.0,
-            "38": 649260992.0,
-            "39": 620422656.0,
-            "40": 604143616.0,
-            "41": 598320448.0,
-            "42": 573424384.0,
-            "43": 576846912.0,
-            "44": 570038144.0,
-            "45": 540081024.0,
-            "46": 501251008.0,
-            "47": 497637664.0,
-            "48": 494691072.0,
-            "49": 490977312.0,
-            "50": 463542304.0
+            "1": 38802568,
+            "2": 38543544,
+            "3": 41886704,
+            "4": 264367872,
+            "5": 224737792,
+            "6": 302994528,
+            "7": 645808768,
+            "8": 775291136,
+            "9": 765475328,
+            "10": 675259904,
+            "11": 615098624,
+            "12": 702764352,
+            "13": 934951360,
+            "14": 1060699008,
+            "15": 802967296,
+            "16": 1026771392,
+            "17": 756706880,
+            "18": 715253696,
+            "19": 929126208,
+            "20": 875969472,
+            "21": 665188032,
+            "22": 903854976,
+            "23": 747044352,
+            "24": 920777856,
+            "25": 733230528,
+            "26": 863183104,
+            "27": 879318336,
+            "28": 916219136,
+            "29": 909384256,
+            "30": 879622720,
+            "31": 866425152,
+            "32": 819074560,
+            "33": 589493056,
+            "34": 772011648,
+            "35": 778655488,
+            "36": 759651584,
+            "37": 761302144,
+            "38": 463804224,
+            "39": 543038400,
+            "40": 497278720,
+            "41": 658241792,
+            "42": 661600512,
+            "43": 495713632,
+            "44": 673788672,
+            "45": 470873536,
+            "46": 614455040,
+            "47": 554219584,
+            "48": 570200064,
+            "49": 557109312,
+            "50": 347212736
         }
     },
     "mem-allocated-bytes": {
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 7321331200.0,
-            "2": 7321333248.0,
-            "3": 7321333248.0,
-            "4": 7321333248.0,
-            "5": 7321333248.0,
-            "6": 7321333248.0,
-            "7": 7321333248.0,
-            "8": 7321333248.0,
-            "9": 7321333248.0,
-            "10": 7321333248.0,
-            "11": 7321333248.0,
-            "12": 7321333248.0,
-            "13": 7321333248.0,
-            "14": 7321333248.0,
-            "15": 7321333248.0,
-            "16": 7321333248.0,
-            "17": 7321333248.0,
-            "18": 7321333248.0,
-            "19": 7321333248.0,
-            "20": 7321333248.0,
-            "21": 7321333248.0,
-            "22": 7321333248.0,
-            "23": 7321333248.0,
-            "24": 7321333248.0,
-            "25": 7321333248.0,
-            "26": 7321333248.0,
-            "27": 7321333248.0,
-            "28": 7321333248.0,
-            "29": 7321333248.0,
-            "30": 7321333248.0,
-            "31": 7321333248.0,
-            "32": 7321333248.0,
-            "33": 7321333248.0,
-            "34": 7321333248.0,
-            "35": 7321333248.0,
-            "36": 7321333248.0,
-            "37": 7321333248.0,
-            "38": 7321333248.0,
-            "39": 7321333248.0,
-            "40": 7321333248.0,
-            "41": 7321333248.0,
-            "42": 7321333248.0,
-            "43": 7321333248.0,
-            "44": 7321333248.0,
-            "45": 7321333248.0,
-            "46": 7321333248.0,
-            "47": 7321333248.0,
-            "48": 7321333248.0,
-            "49": 7321333248.0,
-            "50": 7321333248.0
+            "1": 7321308672,
+            "2": 7321310720,
+            "3": 7321310720,
+            "4": 7321310720,
+            "5": 7321310720,
+            "6": 7321310720,
+            "7": 7321310720,
+            "8": 7321310720,
+            "9": 7321310720,
+            "10": 7321310720,
+            "11": 7321310720,
+            "12": 7321310720,
+            "13": 7321310720,
+            "14": 7321310720,
+            "15": 7321310720,
+            "16": 7321310720,
+            "17": 7321310720,
+            "18": 7321310720,
+            "19": 7321310720,
+            "20": 7321310720,
+            "21": 7321310720,
+            "22": 7321310720,
+            "23": 7321310720,
+            "24": 7321310720,
+            "25": 7321310720,
+            "26": 7321310720,
+            "27": 7321310720,
+            "28": 7321310720,
+            "29": 7321310720,
+            "30": 7321310720,
+            "31": 7321310720,
+            "32": 7321310720,
+            "33": 7321310720,
+            "34": 7321310720,
+            "35": 7321310720,
+            "36": 7321310720,
+            "37": 7321310720,
+            "38": 7321310720,
+            "39": 7321310720,
+            "40": 7321310720,
+            "41": 7321310720,
+            "42": 7321310720,
+            "43": 7321310720,
+            "44": 7321310720,
+            "45": 7321310720,
+            "46": 7321310720,
+            "47": 7321310720,
+            "48": 7321310720,
+            "49": 7321310720,
+            "50": 7321310720
         }
     },
     "mem-max-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 22198937600.0,
-            "2": 24950007808.0,
-            "3": 24950007808.0,
-            "4": 24950007808.0,
-            "5": 24950007808.0,
-            "6": 24950007808.0,
-            "7": 24950007808.0,
-            "8": 24950007808.0,
-            "9": 24950007808.0,
-            "10": 24950007808.0,
-            "11": 24950007808.0,
-            "12": 24950007808.0,
-            "13": 24950007808.0,
-            "14": 24950007808.0,
-            "15": 24950007808.0,
-            "16": 24950007808.0,
-            "17": 24950007808.0,
-            "18": 24950007808.0,
-            "19": 24950007808.0,
-            "20": 24950007808.0,
-            "21": 24950007808.0,
-            "22": 24950007808.0,
-            "23": 24950007808.0,
-            "24": 24950007808.0,
-            "25": 24950007808.0,
-            "26": 24950007808.0,
-            "27": 25072799744.0,
-            "28": 25343600640.0,
-            "29": 25625788416.0,
-            "30": 25625788416.0,
-            "31": 25628155904.0,
-            "32": 25707937792.0,
-            "33": 25707937792.0,
-            "34": 25707937792.0,
-            "35": 25707937792.0,
-            "36": 25707937792.0,
-            "37": 25707937792.0,
-            "38": 25707937792.0,
-            "39": 25707937792.0,
-            "40": 25707937792.0,
-            "41": 25707937792.0,
-            "42": 25707937792.0,
-            "43": 25707937792.0,
-            "44": 25707937792.0,
-            "45": 25707937792.0,
-            "46": 25707937792.0,
-            "47": 25707937792.0,
-            "48": 25707937792.0,
-            "49": 25707937792.0,
-            "50": 25707937792.0
+            "1": 54396813312,
+            "2": 57149165568,
+            "3": 57165475840,
+            "4": 57165475840,
+            "5": 57165475840,
+            "6": 57165475840,
+            "7": 57165475840,
+            "8": 57165475840,
+            "9": 57165475840,
+            "10": 57165475840,
+            "11": 57165475840,
+            "12": 57165475840,
+            "13": 57165475840,
+            "14": 57165475840,
+            "15": 57165475840,
+            "16": 57165475840,
+            "17": 57165475840,
+            "18": 57165475840,
+            "19": 57165475840,
+            "20": 57165475840,
+            "21": 57165475840,
+            "22": 57165475840,
+            "23": 57165475840,
+            "24": 57165475840,
+            "25": 57165475840,
+            "26": 57165475840,
+            "27": 57165475840,
+            "28": 57165475840,
+            "29": 57165475840,
+            "30": 57165475840,
+            "31": 57165475840,
+            "32": 57165475840,
+            "33": 57165475840,
+            "34": 57165475840,
+            "35": 57165475840,
+            "36": 57165475840,
+            "37": 57165475840,
+            "38": 57165475840,
+            "39": 57165475840,
+            "40": 57295986688,
+            "41": 57295986688,
+            "42": 57331482624,
+            "43": 57360437248,
+            "44": 57561960448,
+            "45": 57561960448,
+            "46": 57561960448,
+            "47": 57585307648,
+            "48": 57602347008,
+            "49": 57823961088,
+            "50": 57823961088
         }
     },
     "mtp_1 loss": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 11.07742,
-            "2": 11.07559,
-            "3": 10.5272,
-            "4": 10.08877,
-            "5": 9.81119,
-            "6": 9.88673,
-            "7": 9.70278,
-            "8": 8.9944,
-            "9": 8.79002,
-            "10": 9.07171,
-            "11": 8.44594,
-            "12": 8.50226,
-            "13": 8.40983,
-            "14": 7.83955,
-            "15": 7.97902,
-            "16": 8.03361,
-            "17": 7.99642,
-            "18": 7.71928,
-            "19": 8.10116,
-            "20": 7.82113,
-            "21": 7.51112,
-            "22": 7.48906,
-            "23": 7.35335,
-            "24": 7.35884,
-            "25": 7.60836,
-            "26": 7.01391,
-            "27": 7.54721,
-            "28": 7.25644,
-            "29": 7.43129,
-            "30": 7.57524,
-            "31": 7.321,
-            "32": 7.50218,
-            "33": 7.56009,
-            "34": 7.62505,
-            "35": 7.14234,
-            "36": 7.0092,
-            "37": 7.34655,
-            "38": 7.11926,
-            "39": 7.4822,
-            "40": 7.46808,
-            "41": 7.41272,
-            "42": 7.1698,
-            "43": 7.15213,
-            "44": 7.33728,
-            "45": 7.11437,
-            "46": 6.81846,
-            "47": 7.2282,
-            "48": 7.07339,
-            "49": 7.50345,
-            "50": 6.96783
+            "1": 11.07779,
+            "2": 11.07564,
+            "3": 10.52904,
+            "4": 10.08924,
+            "5": 9.81101,
+            "6": 9.88786,
+            "7": 9.72987,
+            "8": 9.02044,
+            "9": 8.8145,
+            "10": 9.09362,
+            "11": 8.77612,
+            "12": 8.56714,
+            "13": 8.54777,
+            "14": 8.04338,
+            "15": 8.10946,
+            "16": 8.13231,
+            "17": 8.0853,
+            "18": 7.83475,
+            "19": 8.21923,
+            "20": 7.91097,
+            "21": 7.58489,
+            "22": 7.56231,
+            "23": 7.44204,
+            "24": 7.44303,
+            "25": 7.67594,
+            "26": 7.07138,
+            "27": 7.60696,
+            "28": 7.30925,
+            "29": 7.48219,
+            "30": 7.62699,
+            "31": 7.3655,
+            "32": 7.54203,
+            "33": 7.60199,
+            "34": 7.66716,
+            "35": 7.18385,
+            "36": 7.05252,
+            "37": 7.38377,
+            "38": 7.15521,
+            "39": 7.51639,
+            "40": 7.4929,
+            "41": 7.44762,
+            "42": 7.20298,
+            "43": 7.18681,
+            "44": 7.36683,
+            "45": 7.15506,
+            "46": 6.85064,
+            "47": 7.26072,
+            "48": 7.10489,
+            "49": 7.53477,
+            "50": 6.99715
         }
     },
     "iteration-time": {
@@ -289,56 +289,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 66.41406,
-            "2": 1.09711,
-            "3": 0.98871,
-            "4": 1.29382,
-            "5": 0.90133,
-            "6": 0.89235,
-            "7": 1.14675,
-            "8": 1.06393,
-            "9": 0.87141,
-            "10": 0.88489,
-            "11": 0.87653,
-            "12": 0.86844,
-            "13": 0.87292,
-            "14": 0.88542,
-            "15": 0.87413,
-            "16": 0.8658,
-            "17": 0.86683,
-            "18": 0.85604,
-            "19": 0.87144,
-            "20": 0.8739,
-            "21": 0.87412,
-            "22": 0.8842,
-            "23": 0.87866,
-            "24": 0.87817,
-            "25": 0.87219,
-            "26": 0.88191,
-            "27": 0.86283,
-            "28": 0.85644,
-            "29": 0.85444,
-            "30": 0.86821,
-            "31": 0.8659,
-            "32": 0.86683,
-            "33": 0.86547,
-            "34": 0.86171,
-            "35": 0.84405,
-            "36": 0.84744,
-            "37": 0.84896,
-            "38": 0.85314,
-            "39": 0.85693,
-            "40": 0.83956,
-            "41": 0.844,
-            "42": 0.84413,
-            "43": 0.83996,
-            "44": 0.84204,
-            "45": 0.84489,
-            "46": 0.83423,
-            "47": 0.83738,
-            "48": 0.85356,
-            "49": 0.86096,
-            "50": 0.85603
+            "1": 89.12995,
+            "2": 1.33749,
+            "3": 1.24205,
+            "4": 1.63759,
+            "5": 1.13139,
+            "6": 1.12938,
+            "7": 1.37914,
+            "8": 1.3886,
+            "9": 1.10046,
+            "10": 1.11649,
+            "11": 1.11259,
+            "12": 1.10822,
+            "13": 1.10532,
+            "14": 1.11189,
+            "15": 1.1132,
+            "16": 1.10539,
+            "17": 1.11434,
+            "18": 1.11836,
+            "19": 1.11073,
+            "20": 1.11278,
+            "21": 1.11212,
+            "22": 1.10671,
+            "23": 1.11034,
+            "24": 1.11107,
+            "25": 1.11085,
+            "26": 1.10756,
+            "27": 1.10109,
+            "28": 1.1069,
+            "29": 1.11354,
+            "30": 1.11254,
+            "31": 1.10893,
+            "32": 1.11311,
+            "33": 1.10722,
+            "34": 1.10243,
+            "35": 1.10358,
+            "36": 1.09746,
+            "37": 1.09875,
+            "38": 1.10151,
+            "39": 1.10188,
+            "40": 1.10069,
+            "41": 1.10545,
+            "42": 1.10709,
+            "43": 1.1028,
+            "44": 1.10723,
+            "45": 1.10614,
+            "46": 1.09997,
+            "47": 1.1053,
+            "48": 1.10274,
+            "49": 1.09986,
+            "50": 1.10191
         }
     }
 }
\ No newline at end of file
diff --git a/uv.lock b/uv.lock
index 2d2e178241f..1046481f7ec 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1637,63 +1637,63 @@ wheels = [
 
 [[package]]
 name = "grpcio"
-version = "1.75.1"
+version = "1.76.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/9d/f7/8963848164c7604efb3a3e6ee457fdb3a469653e19002bd24742473254f8/grpcio-1.75.1.tar.gz", hash = "sha256:3e81d89ece99b9ace23a6916880baca613c03a799925afb2857887efa8b1b3d2", size = 12731327, upload-time = "2025-09-26T09:03:36.887Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/51/57/89fd829fb00a6d0bee3fbcb2c8a7aa0252d908949b6ab58bfae99d39d77e/grpcio-1.75.1-cp310-cp310-linux_armv7l.whl", hash = "sha256:1712b5890b22547dd29f3215c5788d8fc759ce6dd0b85a6ba6e2731f2d04c088", size = 5705534, upload-time = "2025-09-26T09:00:52.225Z" },
-    { url = "https://files.pythonhosted.org/packages/76/dd/2f8536e092551cf804e96bcda79ecfbc51560b214a0f5b7ebc253f0d4664/grpcio-1.75.1-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:8d04e101bba4b55cea9954e4aa71c24153ba6182481b487ff376da28d4ba46cf", size = 11484103, upload-time = "2025-09-26T09:00:59.457Z" },
-    { url = "https://files.pythonhosted.org/packages/9a/3d/affe2fb897804c98d56361138e73786af8f4dd876b9d9851cfe6342b53c8/grpcio-1.75.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:683cfc70be0c1383449097cba637317e4737a357cfc185d887fd984206380403", size = 6289953, upload-time = "2025-09-26T09:01:03.699Z" },
-    { url = "https://files.pythonhosted.org/packages/87/aa/0f40b7f47a0ff10d7e482bc3af22dac767c7ff27205915f08962d5ca87a2/grpcio-1.75.1-cp310-cp310-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:491444c081a54dcd5e6ada57314321ae526377f498d4aa09d975c3241c5b9e1c", size = 6949785, upload-time = "2025-09-26T09:01:07.504Z" },
-    { url = "https://files.pythonhosted.org/packages/a5/45/b04407e44050781821c84f26df71b3f7bc469923f92f9f8bc27f1406dbcc/grpcio-1.75.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ce08d4e112d0d38487c2b631ec8723deac9bc404e9c7b1011426af50a79999e4", size = 6465708, upload-time = "2025-09-26T09:01:11.028Z" },
-    { url = "https://files.pythonhosted.org/packages/09/3e/4ae3ec0a4d20dcaafbb6e597defcde06399ccdc5b342f607323f3b47f0a3/grpcio-1.75.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:5a2acda37fc926ccc4547977ac3e56b1df48fe200de968e8c8421f6e3093df6c", size = 7100912, upload-time = "2025-09-26T09:01:14.393Z" },
-    { url = "https://files.pythonhosted.org/packages/34/3f/a9085dab5c313bb0cb853f222d095e2477b9b8490a03634cdd8d19daa5c3/grpcio-1.75.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:745c5fe6bf05df6a04bf2d11552c7d867a2690759e7ab6b05c318a772739bd75", size = 8042497, upload-time = "2025-09-26T09:01:17.759Z" },
-    { url = "https://files.pythonhosted.org/packages/c3/87/ea54eba931ab9ed3f999ba95f5d8d01a20221b664725bab2fe93e3dee848/grpcio-1.75.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:259526a7159d39e2db40d566fe3e8f8e034d0fb2db5bf9c00e09aace655a4c2b", size = 7493284, upload-time = "2025-09-26T09:01:20.896Z" },
-    { url = "https://files.pythonhosted.org/packages/b7/5e/287f1bf1a998f4ac46ef45d518de3b5da08b4e86c7cb5e1108cee30b0282/grpcio-1.75.1-cp310-cp310-win32.whl", hash = "sha256:f4b29b9aabe33fed5df0a85e5f13b09ff25e2c05bd5946d25270a8bd5682dac9", size = 3950809, upload-time = "2025-09-26T09:01:23.695Z" },
-    { url = "https://files.pythonhosted.org/packages/a4/a2/3cbfc06a4ec160dc77403b29ecb5cf76ae329eb63204fea6a7c715f1dfdb/grpcio-1.75.1-cp310-cp310-win_amd64.whl", hash = "sha256:cf2e760978dcce7ff7d465cbc7e276c3157eedc4c27aa6de7b594c7a295d3d61", size = 4644704, upload-time = "2025-09-26T09:01:25.763Z" },
-    { url = "https://files.pythonhosted.org/packages/0c/3c/35ca9747473a306bfad0cee04504953f7098527cd112a4ab55c55af9e7bd/grpcio-1.75.1-cp311-cp311-linux_armv7l.whl", hash = "sha256:573855ca2e58e35032aff30bfbd1ee103fbcf4472e4b28d4010757700918e326", size = 5709761, upload-time = "2025-09-26T09:01:28.528Z" },
-    { url = "https://files.pythonhosted.org/packages/c9/2c/ecbcb4241e4edbe85ac2663f885726fea0e947767401288b50d8fdcb9200/grpcio-1.75.1-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:6a4996a2c8accc37976dc142d5991adf60733e223e5c9a2219e157dc6a8fd3a2", size = 11496691, upload-time = "2025-09-26T09:01:31.214Z" },
-    { url = "https://files.pythonhosted.org/packages/81/40/bc07aee2911f0d426fa53fe636216100c31a8ea65a400894f280274cb023/grpcio-1.75.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:b1ea1bbe77ecbc1be00af2769f4ae4a88ce93be57a4f3eebd91087898ed749f9", size = 6296084, upload-time = "2025-09-26T09:01:34.596Z" },
-    { url = "https://files.pythonhosted.org/packages/b8/d1/10c067f6c67396cbf46448b80f27583b5e8c4b46cdfbe18a2a02c2c2f290/grpcio-1.75.1-cp311-cp311-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:e5b425aee54cc5e3e3c58f00731e8a33f5567965d478d516d35ef99fd648ab68", size = 6950403, upload-time = "2025-09-26T09:01:36.736Z" },
-    { url = "https://files.pythonhosted.org/packages/3f/42/5f628abe360b84dfe8dd8f32be6b0606dc31dc04d3358eef27db791ea4d5/grpcio-1.75.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0049a7bf547dafaeeb1db17079ce79596c298bfe308fc084d023c8907a845b9a", size = 6470166, upload-time = "2025-09-26T09:01:39.474Z" },
-    { url = "https://files.pythonhosted.org/packages/c3/93/a24035080251324019882ee2265cfde642d6476c0cf8eb207fc693fcebdc/grpcio-1.75.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:5b8ea230c7f77c0a1a3208a04a1eda164633fb0767b4cefd65a01079b65e5b1f", size = 7107828, upload-time = "2025-09-26T09:01:41.782Z" },
-    { url = "https://files.pythonhosted.org/packages/e4/f8/d18b984c1c9ba0318e3628dbbeb6af77a5007f02abc378c845070f2d3edd/grpcio-1.75.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:36990d629c3c9fb41e546414e5af52d0a7af37ce7113d9682c46d7e2919e4cca", size = 8045421, upload-time = "2025-09-26T09:01:45.835Z" },
-    { url = "https://files.pythonhosted.org/packages/7e/b6/4bf9aacff45deca5eac5562547ed212556b831064da77971a4e632917da3/grpcio-1.75.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b10ad908118d38c2453ade7ff790e5bce36580c3742919007a2a78e3a1e521ca", size = 7503290, upload-time = "2025-09-26T09:01:49.28Z" },
-    { url = "https://files.pythonhosted.org/packages/3b/15/d8d69d10223cb54c887a2180bd29fe5fa2aec1d4995c8821f7aa6eaf72e4/grpcio-1.75.1-cp311-cp311-win32.whl", hash = "sha256:d6be2b5ee7bea656c954dcf6aa8093c6f0e6a3ef9945c99d99fcbfc88c5c0bfe", size = 3950631, upload-time = "2025-09-26T09:01:51.23Z" },
-    { url = "https://files.pythonhosted.org/packages/8a/40/7b8642d45fff6f83300c24eaac0380a840e5e7fe0e8d80afd31b99d7134e/grpcio-1.75.1-cp311-cp311-win_amd64.whl", hash = "sha256:61c692fb05956b17dd6d1ab480f7f10ad0536dba3bc8fd4e3c7263dc244ed772", size = 4646131, upload-time = "2025-09-26T09:01:53.266Z" },
-    { url = "https://files.pythonhosted.org/packages/3a/81/42be79e73a50aaa20af66731c2defeb0e8c9008d9935a64dd8ea8e8c44eb/grpcio-1.75.1-cp312-cp312-linux_armv7l.whl", hash = "sha256:7b888b33cd14085d86176b1628ad2fcbff94cfbbe7809465097aa0132e58b018", size = 5668314, upload-time = "2025-09-26T09:01:55.424Z" },
-    { url = "https://files.pythonhosted.org/packages/c5/a7/3686ed15822fedc58c22f82b3a7403d9faf38d7c33de46d4de6f06e49426/grpcio-1.75.1-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:8775036efe4ad2085975531d221535329f5dac99b6c2a854a995456098f99546", size = 11476125, upload-time = "2025-09-26T09:01:57.927Z" },
-    { url = "https://files.pythonhosted.org/packages/14/85/21c71d674f03345ab183c634ecd889d3330177e27baea8d5d247a89b6442/grpcio-1.75.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:bb658f703468d7fbb5dcc4037c65391b7dc34f808ac46ed9136c24fc5eeb041d", size = 6246335, upload-time = "2025-09-26T09:02:00.76Z" },
-    { url = "https://files.pythonhosted.org/packages/fd/db/3beb661bc56a385ae4fa6b0e70f6b91ac99d47afb726fe76aaff87ebb116/grpcio-1.75.1-cp312-cp312-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:4b7177a1cdb3c51b02b0c0a256b0a72fdab719600a693e0e9037949efffb200b", size = 6916309, upload-time = "2025-09-26T09:02:02.894Z" },
-    { url = "https://files.pythonhosted.org/packages/1e/9c/eda9fe57f2b84343d44c1b66cf3831c973ba29b078b16a27d4587a1fdd47/grpcio-1.75.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7d4fa6ccc3ec2e68a04f7b883d354d7fea22a34c44ce535a2f0c0049cf626ddf", size = 6435419, upload-time = "2025-09-26T09:02:05.055Z" },
-    { url = "https://files.pythonhosted.org/packages/c3/b8/090c98983e0a9d602e3f919a6e2d4e470a8b489452905f9a0fa472cac059/grpcio-1.75.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3d86880ecaeb5b2f0a8afa63824de93adb8ebe4e49d0e51442532f4e08add7d6", size = 7064893, upload-time = "2025-09-26T09:02:07.275Z" },
-    { url = "https://files.pythonhosted.org/packages/ec/c0/6d53d4dbbd00f8bd81571f5478d8a95528b716e0eddb4217cc7cb45aae5f/grpcio-1.75.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:a8041d2f9e8a742aeae96f4b047ee44e73619f4f9d24565e84d5446c623673b6", size = 8011922, upload-time = "2025-09-26T09:02:09.527Z" },
-    { url = "https://files.pythonhosted.org/packages/f2/7c/48455b2d0c5949678d6982c3e31ea4d89df4e16131b03f7d5c590811cbe9/grpcio-1.75.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3652516048bf4c314ce12be37423c79829f46efffb390ad64149a10c6071e8de", size = 7466181, upload-time = "2025-09-26T09:02:12.279Z" },
-    { url = "https://files.pythonhosted.org/packages/fd/12/04a0e79081e3170b6124f8cba9b6275871276be06c156ef981033f691880/grpcio-1.75.1-cp312-cp312-win32.whl", hash = "sha256:44b62345d8403975513af88da2f3d5cc76f73ca538ba46596f92a127c2aea945", size = 3938543, upload-time = "2025-09-26T09:02:14.77Z" },
-    { url = "https://files.pythonhosted.org/packages/5f/d7/11350d9d7fb5adc73d2b0ebf6ac1cc70135577701e607407fe6739a90021/grpcio-1.75.1-cp312-cp312-win_amd64.whl", hash = "sha256:b1e191c5c465fa777d4cafbaacf0c01e0d5278022082c0abbd2ee1d6454ed94d", size = 4641938, upload-time = "2025-09-26T09:02:16.927Z" },
-    { url = "https://files.pythonhosted.org/packages/46/74/bac4ab9f7722164afdf263ae31ba97b8174c667153510322a5eba4194c32/grpcio-1.75.1-cp313-cp313-linux_armv7l.whl", hash = "sha256:3bed22e750d91d53d9e31e0af35a7b0b51367e974e14a4ff229db5b207647884", size = 5672779, upload-time = "2025-09-26T09:02:19.11Z" },
-    { url = "https://files.pythonhosted.org/packages/a6/52/d0483cfa667cddaa294e3ab88fd2c2a6e9dc1a1928c0e5911e2e54bd5b50/grpcio-1.75.1-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:5b8f381eadcd6ecaa143a21e9e80a26424c76a0a9b3d546febe6648f3a36a5ac", size = 11470623, upload-time = "2025-09-26T09:02:22.117Z" },
-    { url = "https://files.pythonhosted.org/packages/cf/e4/d1954dce2972e32384db6a30273275e8c8ea5a44b80347f9055589333b3f/grpcio-1.75.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5bf4001d3293e3414d0cf99ff9b1139106e57c3a66dfff0c5f60b2a6286ec133", size = 6248838, upload-time = "2025-09-26T09:02:26.426Z" },
-    { url = "https://files.pythonhosted.org/packages/06/43/073363bf63826ba8077c335d797a8d026f129dc0912b69c42feaf8f0cd26/grpcio-1.75.1-cp313-cp313-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:9f82ff474103e26351dacfe8d50214e7c9322960d8d07ba7fa1d05ff981c8b2d", size = 6922663, upload-time = "2025-09-26T09:02:28.724Z" },
-    { url = "https://files.pythonhosted.org/packages/c2/6f/076ac0df6c359117676cacfa8a377e2abcecec6a6599a15a672d331f6680/grpcio-1.75.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0ee119f4f88d9f75414217823d21d75bfe0e6ed40135b0cbbfc6376bc9f7757d", size = 6436149, upload-time = "2025-09-26T09:02:30.971Z" },
-    { url = "https://files.pythonhosted.org/packages/6b/27/1d08824f1d573fcb1fa35ede40d6020e68a04391709939e1c6f4193b445f/grpcio-1.75.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:664eecc3abe6d916fa6cf8dd6b778e62fb264a70f3430a3180995bf2da935446", size = 7067989, upload-time = "2025-09-26T09:02:33.233Z" },
-    { url = "https://files.pythonhosted.org/packages/c6/98/98594cf97b8713feb06a8cb04eeef60b4757e3e2fb91aa0d9161da769843/grpcio-1.75.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:c32193fa08b2fbebf08fe08e84f8a0aad32d87c3ad42999c65e9449871b1c66e", size = 8010717, upload-time = "2025-09-26T09:02:36.011Z" },
-    { url = "https://files.pythonhosted.org/packages/8c/7e/bb80b1bba03c12158f9254762cdf5cced4a9bc2e8ed51ed335915a5a06ef/grpcio-1.75.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5cebe13088b9254f6e615bcf1da9131d46cfa4e88039454aca9cb65f639bd3bc", size = 7463822, upload-time = "2025-09-26T09:02:38.26Z" },
-    { url = "https://files.pythonhosted.org/packages/23/1c/1ea57fdc06927eb5640f6750c697f596f26183573069189eeaf6ef86ba2d/grpcio-1.75.1-cp313-cp313-win32.whl", hash = "sha256:4b4c678e7ed50f8ae8b8dbad15a865ee73ce12668b6aaf411bf3258b5bc3f970", size = 3938490, upload-time = "2025-09-26T09:02:40.268Z" },
-    { url = "https://files.pythonhosted.org/packages/4b/24/fbb8ff1ccadfbf78ad2401c41aceaf02b0d782c084530d8871ddd69a2d49/grpcio-1.75.1-cp313-cp313-win_amd64.whl", hash = "sha256:5573f51e3f296a1bcf71e7a690c092845fb223072120f4bdb7a5b48e111def66", size = 4642538, upload-time = "2025-09-26T09:02:42.519Z" },
-    { url = "https://files.pythonhosted.org/packages/f2/1b/9a0a5cecd24302b9fdbcd55d15ed6267e5f3d5b898ff9ac8cbe17ee76129/grpcio-1.75.1-cp314-cp314-linux_armv7l.whl", hash = "sha256:c05da79068dd96723793bffc8d0e64c45f316248417515f28d22204d9dae51c7", size = 5673319, upload-time = "2025-09-26T09:02:44.742Z" },
-    { url = "https://files.pythonhosted.org/packages/c6/ec/9d6959429a83fbf5df8549c591a8a52bb313976f6646b79852c4884e3225/grpcio-1.75.1-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:06373a94fd16ec287116a825161dca179a0402d0c60674ceeec8c9fba344fe66", size = 11480347, upload-time = "2025-09-26T09:02:47.539Z" },
-    { url = "https://files.pythonhosted.org/packages/09/7a/26da709e42c4565c3d7bf999a9569da96243ce34a8271a968dee810a7cf1/grpcio-1.75.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:4484f4b7287bdaa7a5b3980f3c7224c3c622669405d20f69549f5fb956ad0421", size = 6254706, upload-time = "2025-09-26T09:02:50.4Z" },
-    { url = "https://files.pythonhosted.org/packages/f1/08/dcb26a319d3725f199c97e671d904d84ee5680de57d74c566a991cfab632/grpcio-1.75.1-cp314-cp314-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:2720c239c1180eee69f7883c1d4c83fc1a495a2535b5fa322887c70bf02b16e8", size = 6922501, upload-time = "2025-09-26T09:02:52.711Z" },
-    { url = "https://files.pythonhosted.org/packages/78/66/044d412c98408a5e23cb348845979a2d17a2e2b6c3c34c1ec91b920f49d0/grpcio-1.75.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:07a554fa31c668cf0e7a188678ceeca3cb8fead29bbe455352e712ec33ca701c", size = 6437492, upload-time = "2025-09-26T09:02:55.542Z" },
-    { url = "https://files.pythonhosted.org/packages/4e/9d/5e3e362815152aa1afd8b26ea613effa005962f9da0eec6e0e4527e7a7d1/grpcio-1.75.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:3e71a2105210366bfc398eef7f57a664df99194f3520edb88b9c3a7e46ee0d64", size = 7081061, upload-time = "2025-09-26T09:02:58.261Z" },
-    { url = "https://files.pythonhosted.org/packages/1e/1a/46615682a19e100f46e31ddba9ebc297c5a5ab9ddb47b35443ffadb8776c/grpcio-1.75.1-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:8679aa8a5b67976776d3c6b0521e99d1c34db8a312a12bcfd78a7085cb9b604e", size = 8010849, upload-time = "2025-09-26T09:03:00.548Z" },
-    { url = "https://files.pythonhosted.org/packages/67/8e/3204b94ac30b0f675ab1c06540ab5578660dc8b690db71854d3116f20d00/grpcio-1.75.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:aad1c774f4ebf0696a7f148a56d39a3432550612597331792528895258966dc0", size = 7464478, upload-time = "2025-09-26T09:03:03.096Z" },
-    { url = "https://files.pythonhosted.org/packages/b7/97/2d90652b213863b2cf466d9c1260ca7e7b67a16780431b3eb1d0420e3d5b/grpcio-1.75.1-cp314-cp314-win32.whl", hash = "sha256:62ce42d9994446b307649cb2a23335fa8e927f7ab2cbf5fcb844d6acb4d85f9c", size = 4012672, upload-time = "2025-09-26T09:03:05.477Z" },
-    { url = "https://files.pythonhosted.org/packages/f9/df/e2e6e9fc1c985cd1a59e6996a05647c720fe8a03b92f5ec2d60d366c531e/grpcio-1.75.1-cp314-cp314-win_amd64.whl", hash = "sha256:f86e92275710bea3000cb79feca1762dc0ad3b27830dd1a74e82ab321d4ee464", size = 4772475, upload-time = "2025-09-26T09:03:07.661Z" },
+sdist = { url = "https://files.pythonhosted.org/packages/b6/e0/318c1ce3ae5a17894d5791e87aea147587c9e702f24122cc7a5c8bbaeeb1/grpcio-1.76.0.tar.gz", hash = "sha256:7be78388d6da1a25c0d5ec506523db58b18be22d9c37d8d3a32c08be4987bd73", size = 12785182, upload-time = "2025-10-21T16:23:12.106Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/88/17/ff4795dc9a34b6aee6ec379f1b66438a3789cd1315aac0cbab60d92f74b3/grpcio-1.76.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:65a20de41e85648e00305c1bb09a3598f840422e522277641145a32d42dcefcc", size = 5840037, upload-time = "2025-10-21T16:20:25.069Z" },
+    { url = "https://files.pythonhosted.org/packages/4e/ff/35f9b96e3fa2f12e1dcd58a4513a2e2294a001d64dec81677361b7040c9a/grpcio-1.76.0-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:40ad3afe81676fd9ec6d9d406eda00933f218038433980aa19d401490e46ecde", size = 11836482, upload-time = "2025-10-21T16:20:30.113Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/1c/8374990f9545e99462caacea5413ed783014b3b66ace49e35c533f07507b/grpcio-1.76.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:035d90bc79eaa4bed83f524331d55e35820725c9fbb00ffa1904d5550ed7ede3", size = 6407178, upload-time = "2025-10-21T16:20:32.733Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/77/36fd7d7c75a6c12542c90a6d647a27935a1ecaad03e0ffdb7c42db6b04d2/grpcio-1.76.0-cp310-cp310-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:4215d3a102bd95e2e11b5395c78562967959824156af11fa93d18fdd18050990", size = 7075684, upload-time = "2025-10-21T16:20:35.435Z" },
+    { url = "https://files.pythonhosted.org/packages/38/f7/e3cdb252492278e004722306c5a8935eae91e64ea11f0af3437a7de2e2b7/grpcio-1.76.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:49ce47231818806067aea3324d4bf13825b658ad662d3b25fada0bdad9b8a6af", size = 6611133, upload-time = "2025-10-21T16:20:37.541Z" },
+    { url = "https://files.pythonhosted.org/packages/7e/20/340db7af162ccd20a0893b5f3c4a5d676af7b71105517e62279b5b61d95a/grpcio-1.76.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:8cc3309d8e08fd79089e13ed4819d0af72aa935dd8f435a195fd152796752ff2", size = 7195507, upload-time = "2025-10-21T16:20:39.643Z" },
+    { url = "https://files.pythonhosted.org/packages/10/f0/b2160addc1487bd8fa4810857a27132fb4ce35c1b330c2f3ac45d697b106/grpcio-1.76.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:971fd5a1d6e62e00d945423a567e42eb1fa678ba89072832185ca836a94daaa6", size = 8160651, upload-time = "2025-10-21T16:20:42.492Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/2c/ac6f98aa113c6ef111b3f347854e99ebb7fb9d8f7bb3af1491d438f62af4/grpcio-1.76.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:9d9adda641db7207e800a7f089068f6f645959f2df27e870ee81d44701dd9db3", size = 7620568, upload-time = "2025-10-21T16:20:45.995Z" },
+    { url = "https://files.pythonhosted.org/packages/90/84/7852f7e087285e3ac17a2703bc4129fafee52d77c6c82af97d905566857e/grpcio-1.76.0-cp310-cp310-win32.whl", hash = "sha256:063065249d9e7e0782d03d2bca50787f53bd0fb89a67de9a7b521c4a01f1989b", size = 3998879, upload-time = "2025-10-21T16:20:48.592Z" },
+    { url = "https://files.pythonhosted.org/packages/10/30/d3d2adcbb6dd3ff59d6ac3df6ef830e02b437fb5c90990429fd180e52f30/grpcio-1.76.0-cp310-cp310-win_amd64.whl", hash = "sha256:a6ae758eb08088d36812dd5d9af7a9859c05b1e0f714470ea243694b49278e7b", size = 4706892, upload-time = "2025-10-21T16:20:50.697Z" },
+    { url = "https://files.pythonhosted.org/packages/a0/00/8163a1beeb6971f66b4bbe6ac9457b97948beba8dd2fc8e1281dce7f79ec/grpcio-1.76.0-cp311-cp311-linux_armv7l.whl", hash = "sha256:2e1743fbd7f5fa713a1b0a8ac8ebabf0ec980b5d8809ec358d488e273b9cf02a", size = 5843567, upload-time = "2025-10-21T16:20:52.829Z" },
+    { url = "https://files.pythonhosted.org/packages/10/c1/934202f5cf335e6d852530ce14ddb0fef21be612ba9ecbbcbd4d748ca32d/grpcio-1.76.0-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:a8c2cf1209497cf659a667d7dea88985e834c24b7c3b605e6254cbb5076d985c", size = 11848017, upload-time = "2025-10-21T16:20:56.705Z" },
+    { url = "https://files.pythonhosted.org/packages/11/0b/8dec16b1863d74af6eb3543928600ec2195af49ca58b16334972f6775663/grpcio-1.76.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:08caea849a9d3c71a542827d6df9d5a69067b0a1efbea8a855633ff5d9571465", size = 6412027, upload-time = "2025-10-21T16:20:59.3Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/64/7b9e6e7ab910bea9d46f2c090380bab274a0b91fb0a2fe9b0cd399fffa12/grpcio-1.76.0-cp311-cp311-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:f0e34c2079d47ae9f6188211db9e777c619a21d4faba6977774e8fa43b085e48", size = 7075913, upload-time = "2025-10-21T16:21:01.645Z" },
+    { url = "https://files.pythonhosted.org/packages/68/86/093c46e9546073cefa789bd76d44c5cb2abc824ca62af0c18be590ff13ba/grpcio-1.76.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:8843114c0cfce61b40ad48df65abcfc00d4dba82eae8718fab5352390848c5da", size = 6615417, upload-time = "2025-10-21T16:21:03.844Z" },
+    { url = "https://files.pythonhosted.org/packages/f7/b6/5709a3a68500a9c03da6fb71740dcdd5ef245e39266461a03f31a57036d8/grpcio-1.76.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8eddfb4d203a237da6f3cc8a540dad0517d274b5a1e9e636fd8d2c79b5c1d397", size = 7199683, upload-time = "2025-10-21T16:21:06.195Z" },
+    { url = "https://files.pythonhosted.org/packages/91/d3/4b1f2bf16ed52ce0b508161df3a2d186e4935379a159a834cb4a7d687429/grpcio-1.76.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:32483fe2aab2c3794101c2a159070584e5db11d0aa091b2c0ea9c4fc43d0d749", size = 8163109, upload-time = "2025-10-21T16:21:08.498Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/61/d9043f95f5f4cf085ac5dd6137b469d41befb04bd80280952ffa2a4c3f12/grpcio-1.76.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:dcfe41187da8992c5f40aa8c5ec086fa3672834d2be57a32384c08d5a05b4c00", size = 7626676, upload-time = "2025-10-21T16:21:10.693Z" },
+    { url = "https://files.pythonhosted.org/packages/36/95/fd9a5152ca02d8881e4dd419cdd790e11805979f499a2e5b96488b85cf27/grpcio-1.76.0-cp311-cp311-win32.whl", hash = "sha256:2107b0c024d1b35f4083f11245c0e23846ae64d02f40b2b226684840260ed054", size = 3997688, upload-time = "2025-10-21T16:21:12.746Z" },
+    { url = "https://files.pythonhosted.org/packages/60/9c/5c359c8d4c9176cfa3c61ecd4efe5affe1f38d9bae81e81ac7186b4c9cc8/grpcio-1.76.0-cp311-cp311-win_amd64.whl", hash = "sha256:522175aba7af9113c48ec10cc471b9b9bd4f6ceb36aeb4544a8e2c80ed9d252d", size = 4709315, upload-time = "2025-10-21T16:21:15.26Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/05/8e29121994b8d959ffa0afd28996d452f291b48cfc0875619de0bde2c50c/grpcio-1.76.0-cp312-cp312-linux_armv7l.whl", hash = "sha256:81fd9652b37b36f16138611c7e884eb82e0cec137c40d3ef7c3f9b3ed00f6ed8", size = 5799718, upload-time = "2025-10-21T16:21:17.939Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/75/11d0e66b3cdf998c996489581bdad8900db79ebd83513e45c19548f1cba4/grpcio-1.76.0-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:04bbe1bfe3a68bbfd4e52402ab7d4eb59d72d02647ae2042204326cf4bbad280", size = 11825627, upload-time = "2025-10-21T16:21:20.466Z" },
+    { url = "https://files.pythonhosted.org/packages/28/50/2f0aa0498bc188048f5d9504dcc5c2c24f2eb1a9337cd0fa09a61a2e75f0/grpcio-1.76.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d388087771c837cdb6515539f43b9d4bf0b0f23593a24054ac16f7a960be16f4", size = 6359167, upload-time = "2025-10-21T16:21:23.122Z" },
+    { url = "https://files.pythonhosted.org/packages/66/e5/bbf0bb97d29ede1d59d6588af40018cfc345b17ce979b7b45424628dc8bb/grpcio-1.76.0-cp312-cp312-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:9f8f757bebaaea112c00dba718fc0d3260052ce714e25804a03f93f5d1c6cc11", size = 7044267, upload-time = "2025-10-21T16:21:25.995Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/86/f6ec2164f743d9609691115ae8ece098c76b894ebe4f7c94a655c6b03e98/grpcio-1.76.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:980a846182ce88c4f2f7e2c22c56aefd515daeb36149d1c897f83cf57999e0b6", size = 6573963, upload-time = "2025-10-21T16:21:28.631Z" },
+    { url = "https://files.pythonhosted.org/packages/60/bc/8d9d0d8505feccfdf38a766d262c71e73639c165b311c9457208b56d92ae/grpcio-1.76.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:f92f88e6c033db65a5ae3d97905c8fea9c725b63e28d5a75cb73b49bda5024d8", size = 7164484, upload-time = "2025-10-21T16:21:30.837Z" },
+    { url = "https://files.pythonhosted.org/packages/67/e6/5d6c2fc10b95edf6df9b8f19cf10a34263b7fd48493936fffd5085521292/grpcio-1.76.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:4baf3cbe2f0be3289eb68ac8ae771156971848bb8aaff60bad42005539431980", size = 8127777, upload-time = "2025-10-21T16:21:33.577Z" },
+    { url = "https://files.pythonhosted.org/packages/3f/c8/dce8ff21c86abe025efe304d9e31fdb0deaaa3b502b6a78141080f206da0/grpcio-1.76.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:615ba64c208aaceb5ec83bfdce7728b80bfeb8be97562944836a7a0a9647d882", size = 7594014, upload-time = "2025-10-21T16:21:41.882Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/42/ad28191ebf983a5d0ecef90bab66baa5a6b18f2bfdef9d0a63b1973d9f75/grpcio-1.76.0-cp312-cp312-win32.whl", hash = "sha256:45d59a649a82df5718fd9527ce775fd66d1af35e6d31abdcdc906a49c6822958", size = 3984750, upload-time = "2025-10-21T16:21:44.006Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/00/7bd478cbb851c04a48baccaa49b75abaa8e4122f7d86da797500cccdd771/grpcio-1.76.0-cp312-cp312-win_amd64.whl", hash = "sha256:c088e7a90b6017307f423efbb9d1ba97a22aa2170876223f9709e9d1de0b5347", size = 4704003, upload-time = "2025-10-21T16:21:46.244Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/ed/71467ab770effc9e8cef5f2e7388beb2be26ed642d567697bb103a790c72/grpcio-1.76.0-cp313-cp313-linux_armv7l.whl", hash = "sha256:26ef06c73eb53267c2b319f43e6634c7556ea37672029241a056629af27c10e2", size = 5807716, upload-time = "2025-10-21T16:21:48.475Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/85/c6ed56f9817fab03fa8a111ca91469941fb514e3e3ce6d793cb8f1e1347b/grpcio-1.76.0-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:45e0111e73f43f735d70786557dc38141185072d7ff8dc1829d6a77ac1471468", size = 11821522, upload-time = "2025-10-21T16:21:51.142Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/31/2b8a235ab40c39cbc141ef647f8a6eb7b0028f023015a4842933bc0d6831/grpcio-1.76.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:83d57312a58dcfe2a3a0f9d1389b299438909a02db60e2f2ea2ae2d8034909d3", size = 6362558, upload-time = "2025-10-21T16:21:54.213Z" },
+    { url = "https://files.pythonhosted.org/packages/bd/64/9784eab483358e08847498ee56faf8ff6ea8e0a4592568d9f68edc97e9e9/grpcio-1.76.0-cp313-cp313-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:3e2a27c89eb9ac3d81ec8835e12414d73536c6e620355d65102503064a4ed6eb", size = 7049990, upload-time = "2025-10-21T16:21:56.476Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/94/8c12319a6369434e7a184b987e8e9f3b49a114c489b8315f029e24de4837/grpcio-1.76.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:61f69297cba3950a524f61c7c8ee12e55c486cb5f7db47ff9dcee33da6f0d3ae", size = 6575387, upload-time = "2025-10-21T16:21:59.051Z" },
+    { url = "https://files.pythonhosted.org/packages/15/0f/f12c32b03f731f4a6242f771f63039df182c8b8e2cf8075b245b409259d4/grpcio-1.76.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6a15c17af8839b6801d554263c546c69c4d7718ad4321e3166175b37eaacca77", size = 7166668, upload-time = "2025-10-21T16:22:02.049Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/2d/3ec9ce0c2b1d92dd59d1c3264aaec9f0f7c817d6e8ac683b97198a36ed5a/grpcio-1.76.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:25a18e9810fbc7e7f03ec2516addc116a957f8cbb8cbc95ccc80faa072743d03", size = 8124928, upload-time = "2025-10-21T16:22:04.984Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/74/fd3317be5672f4856bcdd1a9e7b5e17554692d3db9a3b273879dc02d657d/grpcio-1.76.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:931091142fd8cc14edccc0845a79248bc155425eee9a98b2db2ea4f00a235a42", size = 7589983, upload-time = "2025-10-21T16:22:07.881Z" },
+    { url = "https://files.pythonhosted.org/packages/45/bb/ca038cf420f405971f19821c8c15bcbc875505f6ffadafe9ffd77871dc4c/grpcio-1.76.0-cp313-cp313-win32.whl", hash = "sha256:5e8571632780e08526f118f74170ad8d50fb0a48c23a746bef2a6ebade3abd6f", size = 3984727, upload-time = "2025-10-21T16:22:10.032Z" },
+    { url = "https://files.pythonhosted.org/packages/41/80/84087dc56437ced7cdd4b13d7875e7439a52a261e3ab4e06488ba6173b0a/grpcio-1.76.0-cp313-cp313-win_amd64.whl", hash = "sha256:f9f7bd5faab55f47231ad8dba7787866b69f5e93bc306e3915606779bbfb4ba8", size = 4702799, upload-time = "2025-10-21T16:22:12.709Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/46/39adac80de49d678e6e073b70204091e76631e03e94928b9ea4ecf0f6e0e/grpcio-1.76.0-cp314-cp314-linux_armv7l.whl", hash = "sha256:ff8a59ea85a1f2191a0ffcc61298c571bc566332f82e5f5be1b83c9d8e668a62", size = 5808417, upload-time = "2025-10-21T16:22:15.02Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/f5/a4531f7fb8b4e2a60b94e39d5d924469b7a6988176b3422487be61fe2998/grpcio-1.76.0-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:06c3d6b076e7b593905d04fdba6a0525711b3466f43b3400266f04ff735de0cd", size = 11828219, upload-time = "2025-10-21T16:22:17.954Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/1c/de55d868ed7a8bd6acc6b1d6ddc4aa36d07a9f31d33c912c804adb1b971b/grpcio-1.76.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fd5ef5932f6475c436c4a55e4336ebbe47bd3272be04964a03d316bbf4afbcbc", size = 6367826, upload-time = "2025-10-21T16:22:20.721Z" },
+    { url = "https://files.pythonhosted.org/packages/59/64/99e44c02b5adb0ad13ab3adc89cb33cb54bfa90c74770f2607eea629b86f/grpcio-1.76.0-cp314-cp314-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:b331680e46239e090f5b3cead313cc772f6caa7d0fc8de349337563125361a4a", size = 7049550, upload-time = "2025-10-21T16:22:23.637Z" },
+    { url = "https://files.pythonhosted.org/packages/43/28/40a5be3f9a86949b83e7d6a2ad6011d993cbe9b6bd27bea881f61c7788b6/grpcio-1.76.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2229ae655ec4e8999599469559e97630185fdd53ae1e8997d147b7c9b2b72cba", size = 6575564, upload-time = "2025-10-21T16:22:26.016Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/a9/1be18e6055b64467440208a8559afac243c66a8b904213af6f392dc2212f/grpcio-1.76.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:490fa6d203992c47c7b9e4a9d39003a0c2bcc1c9aa3c058730884bbbb0ee9f09", size = 7176236, upload-time = "2025-10-21T16:22:28.362Z" },
+    { url = "https://files.pythonhosted.org/packages/0f/55/dba05d3fcc151ce6e81327541d2cc8394f442f6b350fead67401661bf041/grpcio-1.76.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:479496325ce554792dba6548fae3df31a72cef7bad71ca2e12b0e58f9b336bfc", size = 8125795, upload-time = "2025-10-21T16:22:31.075Z" },
+    { url = "https://files.pythonhosted.org/packages/4a/45/122df922d05655f63930cf42c9e3f72ba20aadb26c100ee105cad4ce4257/grpcio-1.76.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:1c9b93f79f48b03ada57ea24725d83a30284a012ec27eab2cf7e50a550cbbbcc", size = 7592214, upload-time = "2025-10-21T16:22:33.831Z" },
+    { url = "https://files.pythonhosted.org/packages/4a/6e/0b899b7f6b66e5af39e377055fb4a6675c9ee28431df5708139df2e93233/grpcio-1.76.0-cp314-cp314-win32.whl", hash = "sha256:747fa73efa9b8b1488a95d0ba1039c8e2dca0f741612d80415b1e1c560febf4e", size = 4062961, upload-time = "2025-10-21T16:22:36.468Z" },
+    { url = "https://files.pythonhosted.org/packages/19/41/0b430b01a2eb38ee887f88c1f07644a1df8e289353b78e82b37ef988fb64/grpcio-1.76.0-cp314-cp314-win_amd64.whl", hash = "sha256:922fa70ba549fce362d2e2871ab542082d66e2aaf0c19480ea453905b01f384e", size = 4834462, upload-time = "2025-10-21T16:22:39.772Z" },
 ]
 
 [[package]]
@@ -2666,7 +2666,7 @@ wheels = [
 
 [[package]]
 name = "multi-storage-client"
-version = "0.32.0"
+version = "0.33.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "filelock" },
@@ -2683,22 +2683,22 @@ dependencies = [
     { name = "xattr" },
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/4f/a4/c5294930789d50ac9745d0f04a22c925278b9593add0d4c28c0633cc21d6/multi_storage_client-0.32.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c02be32131ea5d5dedf537a5985aaf318aafe8c361cf58796850eac9219f0966", size = 5274899, upload-time = "2025-10-10T21:36:42.846Z" },
-    { url = "https://files.pythonhosted.org/packages/e5/2f/d09abbf037e87943de338bb578091125779fc3b3b4a5a58fd7d4b02bdd63/multi_storage_client-0.32.0-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:bbfd9a5bdff5337b7698755876bdb1ff1ea906a5c299c7ebb33f2e92cc23d55d", size = 5395977, upload-time = "2025-10-10T21:36:17.875Z" },
-    { url = "https://files.pythonhosted.org/packages/62/89/3508d9cc0985da78d11e897e69296d5b88a7e6d59d5bfeee0ecdad2a1ee3/multi_storage_client-0.32.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cc5206c6f86a43499bdebfcc1d21617d4263fc7b49fa14afc531098f956f7998", size = 3171327, upload-time = "2025-10-10T21:43:15.737Z" },
-    { url = "https://files.pythonhosted.org/packages/ea/a9/e958250c52254e9a2a9944a3fd92521bc3a521a3ade4f36742ff61a8bb64/multi_storage_client-0.32.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6598970ea0b7355185aa92dca79e8dd01669c60060106d4ff60b5cfb183bf7e4", size = 3343998, upload-time = "2025-10-10T21:40:55.721Z" },
-    { url = "https://files.pythonhosted.org/packages/13/6c/cbaa0bc8464e3b7c5ab826c008b60930733ebd4e7aa3f258d6d6ee989b65/multi_storage_client-0.32.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8cca798a817cee747d957176eeb716208dbe4cd4c66b4a4d4a24abb73dde6cd2", size = 5274417, upload-time = "2025-10-10T21:39:45.954Z" },
-    { url = "https://files.pythonhosted.org/packages/3d/92/fa6cfdc40b39b1f7e92bbbc654d3d1c9882806b561a8e0498c17b5771375/multi_storage_client-0.32.0-cp311-cp311-macosx_11_0_x86_64.whl", hash = "sha256:2d25c8e42f289bce788606db3cebabe41ab35840a35fce0349c660d214dc3a00", size = 5396247, upload-time = "2025-10-10T21:41:42.428Z" },
-    { url = "https://files.pythonhosted.org/packages/2c/4d/a6140ea6a2b1d2d180adeb424305fd97682975bbd0eb52d7ba841eb477d9/multi_storage_client-0.32.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:52f6e592a7532b986d46181f42952aeb334c781b83f0b6175c3efe998d01a646", size = 3172948, upload-time = "2025-10-10T21:41:18.508Z" },
-    { url = "https://files.pythonhosted.org/packages/83/18/2c68bbcf1bedc943e51fc279cee70e474dab8cc42fef12ce0a4cb80d11df/multi_storage_client-0.32.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:35cd768a19e24246dc8207e6812f23a688933a9a1f1dbced0ec7d0f25c0f086f", size = 3344283, upload-time = "2025-10-10T21:44:02.03Z" },
-    { url = "https://files.pythonhosted.org/packages/30/fc/ab252dc0f9080706ec5cdce0ea17e76825885b163b4dd52c5b9909e8adf6/multi_storage_client-0.32.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7cdd9af98981430594c4a47a5283b4dac51d6cad7c983b00dd0fec9daaa0061e", size = 5266870, upload-time = "2025-10-10T21:37:53.421Z" },
-    { url = "https://files.pythonhosted.org/packages/9d/c4/2ff90f2bc3bc9318b9158640e8cf92d57e96f1daa8c4222f2ff587615211/multi_storage_client-0.32.0-cp312-cp312-macosx_11_0_x86_64.whl", hash = "sha256:86b0a319cecefa3d9130a0f0976b5059b0234a4a9c01467151fa364350e6679e", size = 5393630, upload-time = "2025-10-10T21:35:30.693Z" },
-    { url = "https://files.pythonhosted.org/packages/20/c0/385ab374dddaaad9588ab6eef3dd200bfa6adac4148b674dfae10bfdc1af/multi_storage_client-0.32.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c5e481509ca3d09289069c68c519a09eef2c82684e6e50ba2628e043a611de5b", size = 3175520, upload-time = "2025-10-10T21:35:54.182Z" },
-    { url = "https://files.pythonhosted.org/packages/15/fe/40663eb2fcca12a22523f39cb03eb00791cd198dbf3d5cd5e9279e354915/multi_storage_client-0.32.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:33cfa3f50e54b0318c1488736e1cf8896a292a72e8282aa7793487fe78e8745a", size = 3344998, upload-time = "2025-10-10T21:42:05.781Z" },
-    { url = "https://files.pythonhosted.org/packages/02/9f/071749072958d5ed00f728d5287e08a8bd46aadebbb60fcf63a84cdb908c/multi_storage_client-0.32.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:c690e2f701bf00e2dc117f7c9b89f88ca7aa86f8335e293597bdada6adec11fc", size = 5265048, upload-time = "2025-10-10T21:44:25.477Z" },
-    { url = "https://files.pythonhosted.org/packages/2e/eb/76abc34996a960c7c23f61e9d07b2861ed96047ba0f768aa74e279fab76a/multi_storage_client-0.32.0-cp313-cp313-macosx_11_0_x86_64.whl", hash = "sha256:14b1bdc765d060b250335b495c9fca5bcc0957625244b1bc4803029b2755c7b4", size = 5392366, upload-time = "2025-10-10T21:40:32.831Z" },
-    { url = "https://files.pythonhosted.org/packages/e9/35/56255ad4247d877d13accf35dde3e0ec8f2087290def6adbe787ddc952d9/multi_storage_client-0.32.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3c1f139337d7320af3f15d725aee172893386ade01d89af0ae5aab19d501b354", size = 3174684, upload-time = "2025-10-10T21:40:09.993Z" },
-    { url = "https://files.pythonhosted.org/packages/3d/a4/98761f87f30ec7f1afb730a648e58b386067c00c2d8736b18cf543fff57b/multi_storage_client-0.32.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:643bcf247be3bbaea0004c2e003af0aa8ae79258087ed2360670e685499698ed", size = 3344163, upload-time = "2025-10-10T21:43:39.164Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/c4/6279fb7d4b8b0a7af060047d592f00f8d49c547adfebe50bcd8d0d2dc8a5/multi_storage_client-0.33.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:df52b3040ef5698c6388fa589bd63812ae0d2f967d358a792abcad5638686590", size = 5282006, upload-time = "2025-10-23T03:45:37.761Z" },
+    { url = "https://files.pythonhosted.org/packages/22/3b/23d8beccd73b887c4552bf884275611255b5028388fa3317365cd56c2a93/multi_storage_client-0.33.0-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:370da04b1e56a601ba505a29d42fcabc19b583e10d725a37bc0c11ba3573d211", size = 5403083, upload-time = "2025-10-23T03:53:11.998Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/ad/dc355d05fd369da0d800e5f7de24da0393f542c5a6f775f6bcee7edcacb1/multi_storage_client-0.33.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c57749a28ec5d49440f465fd73e4e2feaab18ece9b6e57c73395308b41950f66", size = 3178432, upload-time = "2025-10-23T04:07:00.543Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/ad/97b54419d8a58f696b85504568391a627641152f80650d7d2697fc2702ed/multi_storage_client-0.33.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c7d95f5fe094aab00a240bf6aa11dfe85bec293b76b3688ec3a9c33d86c751d2", size = 3351102, upload-time = "2025-10-23T03:47:47.622Z" },
+    { url = "https://files.pythonhosted.org/packages/52/28/1038a68b9df1b179a61967ce9f7d2e80b9954cdb289801afecde5f7660db/multi_storage_client-0.33.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4b5a0f5a0b7684835be20ae6782070884982a86665e9bab317375a56a20294d1", size = 5281523, upload-time = "2025-10-23T04:06:36.671Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/c5/e18de5e2a2671efdc0a12383b8d63f523044ca453525725b3450d0179c0e/multi_storage_client-0.33.0-cp311-cp311-macosx_11_0_x86_64.whl", hash = "sha256:0db694311f90f44ee8f6f7734a14a0857738a467f2ae201649218a3ecf1f6ab2", size = 5403353, upload-time = "2025-10-23T04:07:25.941Z" },
+    { url = "https://files.pythonhosted.org/packages/7e/c9/d9f65eb2370151dbbb06925f4216ee017e6cdbf7657263fd98e60944e52b/multi_storage_client-0.33.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2cbe3a0b856f0b968f9fc693670a521b5a995b625351241ca008f866fdfff62a", size = 3180052, upload-time = "2025-10-23T03:57:32.797Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/38/08b9d84c93b19ae87caf542ae77f17dfa44a85281ba09de660ffcf3a7718/multi_storage_client-0.33.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:018e7e82255feeff973ff02563f11a30f5e507e4cbc87a2167a9568740144ef2", size = 3351389, upload-time = "2025-10-23T04:02:07.348Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/31/c95634a27723b5ba9d2d74158444cc5e40b151b51ae59ca196fc9993f039/multi_storage_client-0.33.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:030b3a592c6352605e9ebdb8d9303dd42daf5d171ffa684f3283d4a5c6e2edfe", size = 5273976, upload-time = "2025-10-23T04:04:35.99Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/cf/82d1778d73c3baaec331da4ae8d01fa7934bcd73336aa88a08d86d080347/multi_storage_client-0.33.0-cp312-cp312-macosx_11_0_x86_64.whl", hash = "sha256:14dc0ace16d3830917427d6376d14ef62bd053fb2509f893998555ca1e9c4dcb", size = 5400735, upload-time = "2025-10-23T03:58:37.149Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/34/a6194ec725ef80c02de58b5ed3520bb1711807df75a27f7214effd22df34/multi_storage_client-0.33.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a2821765d5c6de365b5b1dcdc7cf2ebba719ff4061fd02975639629f8aa319f6", size = 3182623, upload-time = "2025-10-23T04:03:29.551Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/36/7ec85178fd1dd69c278407a82acaccfb806449deda13f3dbd41f653d73bd/multi_storage_client-0.33.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f92f89480c58067fa53c178785b86e7650e16f277a61a732a8a7019173b16129", size = 3352104, upload-time = "2025-10-23T04:08:51.005Z" },
+    { url = "https://files.pythonhosted.org/packages/88/ef/f2eb2efefb0e0588b29ed573b8354ecd72c38e6143da7ed5ecf53e859bf8/multi_storage_client-0.33.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:ed9af7e77e3cbac1f614816062b36975dcbc610bd3f8c86741d48aa18c718781", size = 5272154, upload-time = "2025-10-23T04:07:49.572Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/49/050aa4fccb2579d2ef5bd0d27169ec98fe85c92bba7a2c31154c491a4f75/multi_storage_client-0.33.0-cp313-cp313-macosx_11_0_x86_64.whl", hash = "sha256:c9d75e95a266ee858cf20c88ed255021552de67a40af9c8884d2fc22037dcd2b", size = 5399474, upload-time = "2025-10-23T04:09:14.545Z" },
+    { url = "https://files.pythonhosted.org/packages/f6/4b/70c2df3b60c28360f185188d351e9c3958b702614963a09ffb1dc251c1ca/multi_storage_client-0.33.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:48195a2ab9e6e9a2763bde17184cad2bdef82684353e210d0d325f20cea18869", size = 3181788, upload-time = "2025-10-23T04:03:10.404Z" },
+    { url = "https://files.pythonhosted.org/packages/9b/96/5008852677fdad10eb9d8dd08a6ea58c6f7e820199a3b2c56607186ac6d5/multi_storage_client-0.33.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bd64403efdcee2a6efcf7bfdb01422dd174c146014563b09f44590346fd835e6", size = 3351269, upload-time = "2025-10-23T04:00:34.714Z" },
 ]
 
 [[package]]
@@ -4679,109 +4679,109 @@ wheels = [
 
 [[package]]
 name = "regex"
-version = "2025.10.22"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/90/f2/97d95db85e11cc85f97581cfc8b4a0405c7fb6099003c23ffaaa0cb4f31d/regex-2025.10.22.tar.gz", hash = "sha256:cc50db098b9d678ace33176a3ab4099616726ae4680fee6ac292302e8950fc4c", size = 400985, upload-time = "2025-10-21T00:48:37.365Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/69/42/2904bb22aaaebaa8348673cfbacd704dba2160d847bf17cc6209349a8b7d/regex-2025.10.22-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:afa5307263ef2883cff3c1055a58239d97c28a888b813489b04ff063f64610d6", size = 487959, upload-time = "2025-10-21T00:45:00.385Z" },
-    { url = "https://files.pythonhosted.org/packages/28/87/ecc953aec36f3c79585d40d2ce3a90ae28aed434c681cfcbed19ce9b4bba/regex-2025.10.22-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:cfd87258e5879cec2f02907a043d69d72c864723209565ae8cd905a823b94976", size = 290421, upload-time = "2025-10-21T00:45:02.122Z" },
-    { url = "https://files.pythonhosted.org/packages/e5/81/aca223093854fb1e385580f6e7ef48fc895ecfe2a8d66133850b8cc12d49/regex-2025.10.22-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:53a184fa09354b02f18fe3c50de3b809386dbc1bbfa8e51598e300342cde5a11", size = 288284, upload-time = "2025-10-21T00:45:03.587Z" },
-    { url = "https://files.pythonhosted.org/packages/42/36/08e03e31cc9dbf5951012a2188d5fd8c79ddc10c2e12849bf434158a1ae3/regex-2025.10.22-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:924a79f8e248271713bc0e1fdd7e48b4632a61152f448e446b8fd724f0715ae8", size = 781457, upload-time = "2025-10-21T00:45:05.105Z" },
-    { url = "https://files.pythonhosted.org/packages/af/28/a1e08f43b850948044b3ab3169472c62e0d59be3e47049a27817a8b3c694/regex-2025.10.22-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:84cd327fd1f245e74a6fe0827e2775cd1de83c4a8cbce1da1627d07c233c5f58", size = 850605, upload-time = "2025-10-21T00:45:06.647Z" },
-    { url = "https://files.pythonhosted.org/packages/5f/65/d864a9a4a3e0ba4ff3f8798481cc9bdc7304a337c999b69e148d0ad320ff/regex-2025.10.22-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:28c4fcf105ae1a09769110669280a3dfe84b291d856368c8b4d77ccf4345434e", size = 898563, upload-time = "2025-10-21T00:45:08.618Z" },
-    { url = "https://files.pythonhosted.org/packages/cc/95/6ae15342e49b9fc1cd8aef350675b3b53446599114c190b3b9df5f4e0bce/regex-2025.10.22-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e32f91f414442d0d6fc6e0b7b58e05afd4deed92c852796f3122822f646fc42e", size = 791535, upload-time = "2025-10-21T00:45:09.888Z" },
-    { url = "https://files.pythonhosted.org/packages/ff/f9/b557590b7ed1f5b8d2452ba8eda8959c4acacbad4ddd764df32438e74f2d/regex-2025.10.22-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:11d2a65fd118c1e409e27dab9aa0a65ebbcab1b836ed441e6e4f78dccc4bd6ef", size = 782461, upload-time = "2025-10-21T00:45:11.636Z" },
-    { url = "https://files.pythonhosted.org/packages/94/dd/1cf6bb815f96137f500282ff209c4cfddfaebfe52cf7eb52ce183d389b41/regex-2025.10.22-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:7ebde462d55fbbc96d888dad35bd413c8a3d53e3423aa23cc8f01c3398f39148", size = 774582, upload-time = "2025-10-21T00:45:14.192Z" },
-    { url = "https://files.pythonhosted.org/packages/03/17/5d6777c93df720c755e4a3b85badaaece51dfe8161cbd1cf70b5a6522a5c/regex-2025.10.22-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:1093a856ed0afdcfc89f65c97a143b1593538827701cc6519c6bc0f1c150e5f6", size = 845647, upload-time = "2025-10-21T00:45:15.486Z" },
-    { url = "https://files.pythonhosted.org/packages/dd/65/431ae5c24c4db5a26b9d5a4c927381b351c6eaa031b61c91e2ed17857135/regex-2025.10.22-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:716a35741a61333c16d29e544685f3dbfa1df48593ad07e92f77b4a831b4c271", size = 836036, upload-time = "2025-10-21T00:45:16.869Z" },
-    { url = "https://files.pythonhosted.org/packages/2f/0e/12c4dce8880364dfb0f31a46ee8dc896805fc6cef473b7491879f30ebd33/regex-2025.10.22-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:4782376eb8dbeacaa69b34498e280e8e95947532f8938081e916bbce871bfbab", size = 779705, upload-time = "2025-10-21T00:45:18.472Z" },
-    { url = "https://files.pythonhosted.org/packages/1d/6b/cd053d41840fd1e4a2cce4abab07248d4ca70c52ed6555490b56e077920c/regex-2025.10.22-cp310-cp310-win32.whl", hash = "sha256:086cc892b1f8e1d8fe7a060012268a21b96ec25b87b4618c12a853564261f63e", size = 265664, upload-time = "2025-10-21T00:45:20.163Z" },
-    { url = "https://files.pythonhosted.org/packages/22/66/557b06253b10ea57198362fb4f6df8860f9d84ee25fcf9a7ca065c9c9984/regex-2025.10.22-cp310-cp310-win_amd64.whl", hash = "sha256:e25f9fb71b775a6d97096cb6c2ac26c675e8c99219afac7f9321f2f4daa46227", size = 277587, upload-time = "2025-10-21T00:45:21.579Z" },
-    { url = "https://files.pythonhosted.org/packages/32/44/37a7cbcac47804b4ed34ffb03da494db7eef3992d42d4eb4fa4e0e840a11/regex-2025.10.22-cp310-cp310-win_arm64.whl", hash = "sha256:d0ecea4950b363a9bb1d01c35cff73c0bc762ebdf91109c806ca33a0cbc9ff03", size = 269980, upload-time = "2025-10-21T00:45:22.889Z" },
-    { url = "https://files.pythonhosted.org/packages/4e/88/739a7c7dc641976fa3d66c0770f6bb2c6ef5cc3f6b44e039f58bffcfbff3/regex-2025.10.22-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:e6b0c007a8b6a9500354eeab8478b18b1cca6ac3fd500f6c3ae017ed617de497", size = 487951, upload-time = "2025-10-21T00:45:24.675Z" },
-    { url = "https://files.pythonhosted.org/packages/8d/6f/7157a845b79bfc68560f17268e8b6c2cd5757b5ca396608118a8209c3489/regex-2025.10.22-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:51170deaffec87e48004f9dab53ff0c4db8d10e2ff7630a78467ccd50f656328", size = 290421, upload-time = "2025-10-21T00:45:26.281Z" },
-    { url = "https://files.pythonhosted.org/packages/bc/e4/a73127c12d6ed1ee97b81aed80b3a63499e409fe947cfcc491197312ebf0/regex-2025.10.22-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:333afc5e00f43598080ff1d00d5948462905ea514343fbdc5a889e7c3d7c23b6", size = 288282, upload-time = "2025-10-21T00:45:27.988Z" },
-    { url = "https://files.pythonhosted.org/packages/67/69/10f1d84cd43ce52257cbc8b4af0e1a7b1b61988ee22e494eda7419702884/regex-2025.10.22-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:31221a2a095173e3121842c9f864a5902703dc5ff0d3298c0fe08f9a8a1d80b1", size = 793289, upload-time = "2025-10-21T00:45:30.192Z" },
-    { url = "https://files.pythonhosted.org/packages/dd/30/cb4dd079787a76c96acddb15465bc1895ef67a02c4de60890b7b073328ad/regex-2025.10.22-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:5de5505e5aac808e2a97515e1d74db99da23259da9dfaf833c1a10f8972d2096", size = 860320, upload-time = "2025-10-21T00:45:31.587Z" },
-    { url = "https://files.pythonhosted.org/packages/ea/6f/25fd36431739dce27bdecb7c6a7e215a545a40577e683fc2708fa6235639/regex-2025.10.22-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:809c6f74840f18574da0ce8365d8635f0f1568552363b9a54adf0b41039a4406", size = 907011, upload-time = "2025-10-21T00:45:33.214Z" },
-    { url = "https://files.pythonhosted.org/packages/0d/96/67fc321360de627c5406aed97be803240227770a29d09117157d56899c4d/regex-2025.10.22-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4bd26a33cad0f24c045fe2d84e70a75f8bd82cb79121382c0ed6c035d247854c", size = 800313, upload-time = "2025-10-21T00:45:34.943Z" },
-    { url = "https://files.pythonhosted.org/packages/17/e9/eff1e7cebb027130242b70b2c81a07d9a2d98414c67ea81fac5e32cda8d2/regex-2025.10.22-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:330b0cd6922f93cc0322002467f347b605555a4d64997f3598c06cf8c1303a7f", size = 782837, upload-time = "2025-10-21T00:45:36.335Z" },
-    { url = "https://files.pythonhosted.org/packages/a5/64/d9eab04a6f3c043ef5d9cabc94d2d6b522c2bc57e68de8e6f88b080ff66a/regex-2025.10.22-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:6763d77bcca503aa1c24b675d05d44c764149f222b7eb6bb3423cebea5eec6e9", size = 854270, upload-time = "2025-10-21T00:45:43.158Z" },
-    { url = "https://files.pythonhosted.org/packages/84/8f/a354bf4b41bfa157d731d3628ba677aff7f0c33603939459bba5ba2e4204/regex-2025.10.22-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:1eba7681913574c0a8025d435bbc6d10855b273d8f8c0e2d2fc9a981cd05704f", size = 845770, upload-time = "2025-10-21T00:45:44.776Z" },
-    { url = "https://files.pythonhosted.org/packages/e7/9e/40a95cc48771d29a55e36d98e34be4f6a8d965fef99dff9056003e32273d/regex-2025.10.22-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:25b80a2ea85f6e06cecf5a3d3a51adb62d19072055bf39d9cabcb29462fffd1d", size = 788777, upload-time = "2025-10-21T00:45:46.551Z" },
-    { url = "https://files.pythonhosted.org/packages/68/87/c9d542090675d014d36bece68d48c314a733ad59d3f4999103813a7bb020/regex-2025.10.22-cp311-cp311-win32.whl", hash = "sha256:c4d655be922039bb4ff8fd8363c71bc8da439f7c7260045e4ff10c774e80606b", size = 265667, upload-time = "2025-10-21T00:45:48.211Z" },
-    { url = "https://files.pythonhosted.org/packages/47/89/98075b8c5a30b70f156af5caa833f57d0967cb0385fbcc1df37a9a0ca702/regex-2025.10.22-cp311-cp311-win_amd64.whl", hash = "sha256:b7ec554c0ed3aa93e0fb91c436b69654c11ab84a701ae3918dbe8fcd1b73984a", size = 277601, upload-time = "2025-10-21T00:45:49.844Z" },
-    { url = "https://files.pythonhosted.org/packages/1f/b7/6664611fc6bdd38e8bf773e135954d10c0ee4326099114b0d00a52c85c96/regex-2025.10.22-cp311-cp311-win_arm64.whl", hash = "sha256:c4347ab5146bdd8b27fdb831f8cf882ec0238c7fdb6baddda1344d07ea8245b2", size = 269973, upload-time = "2025-10-21T00:45:51.535Z" },
-    { url = "https://files.pythonhosted.org/packages/95/a8/3380a8cb20c255878a9f1165b33c4d6a31d8f5417650c22b73bdcaadd281/regex-2025.10.22-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:8b66971471306def7e6baf18ead3f416347d56eb5e295f8a75014d13be92e9fd", size = 489185, upload-time = "2025-10-21T00:45:52.929Z" },
-    { url = "https://files.pythonhosted.org/packages/b0/1c/e1eb33fc1f3a7851cc0f53b588790e14edeeb618e80fd5fd7ea987f9957d/regex-2025.10.22-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:8c93b179960f4f2f517fe47da9984848d8342a6903b4d24649f4ee9bd22ccd3c", size = 291124, upload-time = "2025-10-21T00:45:54.934Z" },
-    { url = "https://files.pythonhosted.org/packages/1b/21/6cc0fe9d4ebd7d6e19c08e77f41082103d52c671eb7eb01cc032e9bccbd4/regex-2025.10.22-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c9b4fa8d221b5db3226029978c8c3f66f2e4c6d871e94b726bcd357e746b7a63", size = 288796, upload-time = "2025-10-21T00:45:56.248Z" },
-    { url = "https://files.pythonhosted.org/packages/23/b0/d74069acbcc60b54977e693dd673099352b024f7f037cec201b0d96b7d99/regex-2025.10.22-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a2a0d4e5f63c8de13fbab94d4a25cc6b02f1007b84e2d4c74f48c242eacb06f1", size = 798441, upload-time = "2025-10-21T00:45:57.896Z" },
-    { url = "https://files.pythonhosted.org/packages/2c/f3/69cd09c226ce0fc6a5cf48b5dea716c0139abed41d02fa81fa774e56e713/regex-2025.10.22-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:d8df6c82c544eed8314667a1fb8f705a9a802a9d6368045354319588ff56708d", size = 864038, upload-time = "2025-10-21T00:46:00.298Z" },
-    { url = "https://files.pythonhosted.org/packages/8e/b0/77bd0e6838f579cc5a02b9e18bc0a759d0ed85b9a8d4d44ad6d3478a40ec/regex-2025.10.22-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a114c2735369334a755a844abd15d5a12716635cc4677fb4e6d793ce369310f6", size = 912054, upload-time = "2025-10-21T00:46:02.358Z" },
-    { url = "https://files.pythonhosted.org/packages/2d/41/c320c3408050eefa516d352d9e05fd4d6af5da7ec0daea56d1e68bb9096c/regex-2025.10.22-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5d53115edada199723b831a49c7e1585ddda7940fb2ba7a78d12bf22e92f23e2", size = 803374, upload-time = "2025-10-21T00:46:03.837Z" },
-    { url = "https://files.pythonhosted.org/packages/88/ed/0942c27223ce6bff95087f4859991634d995d6e186807e038fd1c2c3759c/regex-2025.10.22-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6b4a7d813fdffe99ae0ecc17c80f652c8946c05a6a090eb2560719d02dfdb4b0", size = 787714, upload-time = "2025-10-21T00:46:05.934Z" },
-    { url = "https://files.pythonhosted.org/packages/1c/40/10e2657ed24966742efd68eeb566e26af1eea3925dfe761ce14260a69161/regex-2025.10.22-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:81fb24976e3f71d765edec8a3175abb10359918d8997ca6a756fd68dd3c051f6", size = 858392, upload-time = "2025-10-21T00:46:07.801Z" },
-    { url = "https://files.pythonhosted.org/packages/f3/48/bd382281e2f3bcfc2f355b5283ef16d8175b6df4cb6ed532529b715baf07/regex-2025.10.22-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:d881e96a443528a83f46ab69714befeb35f4d0caf359c43a606b82cb717a5df9", size = 850482, upload-time = "2025-10-21T00:46:09.893Z" },
-    { url = "https://files.pythonhosted.org/packages/2e/5c/fdc0ac5eb3f21a6f19158cce3150e57a65d9770709b8521e09fe9febe813/regex-2025.10.22-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:42abc81ee54e06bef4dbc8e7b8394a57882c718ed3c6aabfea47e429feb94ee9", size = 789633, upload-time = "2025-10-21T00:46:11.687Z" },
-    { url = "https://files.pythonhosted.org/packages/a2/ef/c2e63968c9130a17d79431ba8aa98ada02962435436ef506fb4cef139760/regex-2025.10.22-cp312-cp312-win32.whl", hash = "sha256:db30ab87b3d745b7e95e69099e1c4bf544c3f3800b9376b935943e86f650705a", size = 266060, upload-time = "2025-10-21T00:46:13.577Z" },
-    { url = "https://files.pythonhosted.org/packages/5d/9d/57bc04978add42a62391f8082e94ec3a8c3448d49e349ede8c2c66ca0a55/regex-2025.10.22-cp312-cp312-win_amd64.whl", hash = "sha256:64190fa0432ed254416898ff3b687648e025445bfa357988f20f1332f651f650", size = 276928, upload-time = "2025-10-21T00:46:15.18Z" },
-    { url = "https://files.pythonhosted.org/packages/89/50/760700909a618de1c2405f3a0557a3ec9b4eba516a261aa85fe973d3a354/regex-2025.10.22-cp312-cp312-win_arm64.whl", hash = "sha256:cdfc74d0af9b0cb9bd442619489582b32efc348db651a44967ba5fb71b8d3dee", size = 270103, upload-time = "2025-10-21T00:46:16.903Z" },
-    { url = "https://files.pythonhosted.org/packages/c9/25/4c056f41ae981b41e316e44e0ba76efe0b3655c8a070580c3c069765d4e8/regex-2025.10.22-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:8d49aebe7cb99d80680ff55ff9475bf122c6e3e8a34aec7496aefc90196ac350", size = 488944, upload-time = "2025-10-21T00:46:18.67Z" },
-    { url = "https://files.pythonhosted.org/packages/b5/4e/79e7882d35a613517a63d574d80e68c2e8e2d4c67aeaa0c564025cb9e3d6/regex-2025.10.22-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:45367f329e32988d33e5ebdb69b7fb9eb3fc1d9b789b00724e5ddabb75647064", size = 290995, upload-time = "2025-10-21T00:46:20.089Z" },
-    { url = "https://files.pythonhosted.org/packages/e9/ed/228d94f8af1da578100822d7a3e8a82dc4f0ffbf07c626293deb0b0aff86/regex-2025.10.22-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:fb449bc9d0f379c1064986621e6088a8d28cf628074700c18bd151855f4c9e2f", size = 288686, upload-time = "2025-10-21T00:46:21.769Z" },
-    { url = "https://files.pythonhosted.org/packages/be/e9/203bff375a555b79d36fc707ad99584dc8847b4ef5182656a6e156946395/regex-2025.10.22-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:154919a381798a7ff07371bff86c6ca4cd9cee6110d163867ff12311ad18d7ac", size = 798465, upload-time = "2025-10-21T00:46:23.55Z" },
-    { url = "https://files.pythonhosted.org/packages/fd/31/0660d5bbefcc0ecb0e4f654f69a28a47253da7997ae64fc24e86aff27971/regex-2025.10.22-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:29b4f447d8a514021011d24a50979d5aa1e7d2a99b150eea979221849bd9c77a", size = 863995, upload-time = "2025-10-21T00:46:25.129Z" },
-    { url = "https://files.pythonhosted.org/packages/c8/45/a9e1b6fc5b91976ef5b7f456213da52fb4ce24a7846de7d8777a1c305ac5/regex-2025.10.22-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:c0bd5398ca8b3f9c1f0d09719c195124e955c4677b55b9d5a728eca5f407eb03", size = 912144, upload-time = "2025-10-21T00:46:26.747Z" },
-    { url = "https://files.pythonhosted.org/packages/6b/86/98813e259d8b791891b27c2a6e7ce4fc23bc4222fb46e55f473683ae586e/regex-2025.10.22-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ecb0fbbd37ae701d12b90bacb03ad36c89b0d2d67eab02b5862ab3e1a50ea49e", size = 803370, upload-time = "2025-10-21T00:46:28.314Z" },
-    { url = "https://files.pythonhosted.org/packages/fc/8e/53f27f735368896d777603cf76124b74949ce89123c2c99006834ee29924/regex-2025.10.22-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:419c5fff30240ed10ee55f2d7dd3b54dcc02502568e94be4522b54be63d59aff", size = 787763, upload-time = "2025-10-21T00:46:30.378Z" },
-    { url = "https://files.pythonhosted.org/packages/c5/83/2759cdcdff775205871e10db4d1bf09afa7fbb55af850c5cfb0e9e699090/regex-2025.10.22-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:b71b5c4a00467304ebfae0235b763129af2de074b02e78e959d8990c553c0a6e", size = 858336, upload-time = "2025-10-21T00:46:32.287Z" },
-    { url = "https://files.pythonhosted.org/packages/6f/b5/6fe37d832e1e2cb4e82c444844e1eca88de9171d766f2f9cbe308409a2d8/regex-2025.10.22-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:aa800228137127de4cce1875f0ddeb4ce19d33fd0ac6450c3b00b942866748e7", size = 850401, upload-time = "2025-10-21T00:46:34.275Z" },
-    { url = "https://files.pythonhosted.org/packages/30/57/b9c2b316a87dad82a8845b1854be743441ef375774497f11f13658d016b7/regex-2025.10.22-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:44c8c46b7160260e0cd8b0f7c20ff6269976278d8187646d3e741d8dfe5fcdbc", size = 789738, upload-time = "2025-10-21T00:46:36.421Z" },
-    { url = "https://files.pythonhosted.org/packages/d1/5f/e8bb23662647d519d1ea24f9b30d19c291237aea721662b3d563af6326df/regex-2025.10.22-cp313-cp313-win32.whl", hash = "sha256:701c53e8cb0c73c39d72dc4be71ee88478904b4066bd31f95e2b6fdfac49102e", size = 266055, upload-time = "2025-10-21T00:46:38.062Z" },
-    { url = "https://files.pythonhosted.org/packages/d9/12/035e5c09d1c5e64a640b3c0b2e4b01580e8a36cf0abb99d978422601158d/regex-2025.10.22-cp313-cp313-win_amd64.whl", hash = "sha256:4a3a6320015223d0a14fdc2706e65ca64e7e3d97016acef1349a39c3a0bbbd81", size = 276919, upload-time = "2025-10-21T00:46:39.636Z" },
-    { url = "https://files.pythonhosted.org/packages/be/d3/44dfed03966d26942c53597951035cece3ecf4cb56945ee0bf15014ff092/regex-2025.10.22-cp313-cp313-win_arm64.whl", hash = "sha256:dbb3eb2433ad2158e9719369ea2184329145f50ffae2e6328985fc0de6a71984", size = 270104, upload-time = "2025-10-21T00:46:41.349Z" },
-    { url = "https://files.pythonhosted.org/packages/9c/b9/ccd603c3ad0eead387eaa79203eca0c6846e065e10cb30a717ce2813a878/regex-2025.10.22-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:3fcce0c2b0b7a8f4a029154d7ae9040d2ff5bed77085cd3bf9a56b61a8cda009", size = 491846, upload-time = "2025-10-21T00:46:43.097Z" },
-    { url = "https://files.pythonhosted.org/packages/06/f4/e96216c9faf36fbf42474702afe6efdaecf5b9e5fbce0a77ead5f00191d8/regex-2025.10.22-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:46338f1390c9ddf6c163949cd53558a89ab7c7edbb4713b9d2b7cdf71c87a75a", size = 292541, upload-time = "2025-10-21T00:46:44.996Z" },
-    { url = "https://files.pythonhosted.org/packages/08/19/26b9fbd2daac8e783d3f008e5e18e99c9f31c880c9ba644511e3107e2f86/regex-2025.10.22-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:ca58844dc33b4297ae24505db9528be6862a8b2b961f60f6acc0869ea1291d1a", size = 290899, upload-time = "2025-10-21T00:46:46.564Z" },
-    { url = "https://files.pythonhosted.org/packages/9b/43/cd1512382caedfdb2f663948485ab001cb073631a0d94706db524385eaf5/regex-2025.10.22-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6c4d54ae939c325b8027277f998cc7dd175447745bd12d6a93c09ebebda1226a", size = 807309, upload-time = "2025-10-21T00:46:48.408Z" },
-    { url = "https://files.pythonhosted.org/packages/13/69/6aaa805ed5b53a1a3d6115691745cfd20370f3dddc027f4fcdb8cb050251/regex-2025.10.22-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e8c311ee233a59483d6e3b78d669981f387ca2ce162b029895bddb74cbc37e53", size = 873241, upload-time = "2025-10-21T00:46:50.056Z" },
-    { url = "https://files.pythonhosted.org/packages/75/21/224fe5b25fff1c6ac921246e51603785e688fc8e0d23dabc77d7e62b1b6b/regex-2025.10.22-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:64fc5557f8798a6ac439cabb80ea28c97e509e03ed1a1b23e16f6f7f95ee53fc", size = 914793, upload-time = "2025-10-21T00:46:51.648Z" },
-    { url = "https://files.pythonhosted.org/packages/15/56/9349b5a283b3b05387ecd147962880ef1532827c073d5caf0d291048aaea/regex-2025.10.22-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e7957cab18a1148752372bd6acf23ecc54785d13439ef14024134d37e51e9b77", size = 812580, upload-time = "2025-10-21T00:46:53.585Z" },
-    { url = "https://files.pythonhosted.org/packages/39/71/450cb85d91bc3c6e01589caa6de4b28445ae77fb8915895d9427996926d7/regex-2025.10.22-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:9adaf0a0cefd826192045946bb8922e19d321934fa661efa3744d0aea130b667", size = 795344, upload-time = "2025-10-21T00:46:55.312Z" },
-    { url = "https://files.pythonhosted.org/packages/75/b3/f8e6f2651a22662b00005f0b26f53438b89b33159469e8a279a07b9d951a/regex-2025.10.22-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:61e564ff5eb999e2ccf8311d7cb61ecb24c502ee5116b181b0348b4d882de480", size = 868213, upload-time = "2025-10-21T00:46:57.255Z" },
-    { url = "https://files.pythonhosted.org/packages/37/aa/9dfa760dd368f2a9bc01d1a50edbc838b5ce330ca4142149420acde6d13d/regex-2025.10.22-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:1aa9a1ec0ab3f10210626795bcfe84b0ac20490d085ea4d7628fe381a98592be", size = 854538, upload-time = "2025-10-21T00:46:58.992Z" },
-    { url = "https://files.pythonhosted.org/packages/55/62/e3ef2330f1b2e63fb1e096a53d3335a2dea5e77364cf8a17341e8acb24f1/regex-2025.10.22-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:ffe59e0b0d93cf4999565236b5a36a7d22b10f5f7fed59f423bd5f7542453832", size = 799346, upload-time = "2025-10-21T00:47:00.738Z" },
-    { url = "https://files.pythonhosted.org/packages/45/7e/ae3de5c8a26394be05ad1e2b252dd82425ab72ff7f4e79b03f8a431ecbfa/regex-2025.10.22-cp313-cp313t-win32.whl", hash = "sha256:36ba31e30b9c74a536a08635ca12cb0588ce39298b2cd7904194c2227c284d88", size = 268657, upload-time = "2025-10-21T00:47:02.958Z" },
-    { url = "https://files.pythonhosted.org/packages/4e/1a/d6673cb4f28a368d51316b67c1067a246651731c8fbff50e99060b8ed483/regex-2025.10.22-cp313-cp313t-win_amd64.whl", hash = "sha256:d7d9992c44a5186c6539f9717b6a6e639d4f57f919d238e660f4ce42a22f0ced", size = 280076, upload-time = "2025-10-21T00:47:04.973Z" },
-    { url = "https://files.pythonhosted.org/packages/26/40/30702d35b888a6cc1a290ec6b244109f827eddedb61af77b42c6c5f63928/regex-2025.10.22-cp313-cp313t-win_arm64.whl", hash = "sha256:28ce6c33b836c63ef0a4ec137fd0f136627b71075a5cfffb8c5aaef8ce4535b6", size = 271219, upload-time = "2025-10-21T00:47:06.678Z" },
-    { url = "https://files.pythonhosted.org/packages/93/f2/9977dcdf246c79d906a0286b440a9cd40df04848044b7a269e9b4dcaf2dd/regex-2025.10.22-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:6f8d3d623d1bd4a8eb6eecc86e9ec80a130f071232f8e3d9d907693ca63ab5b6", size = 488962, upload-time = "2025-10-21T00:47:08.288Z" },
-    { url = "https://files.pythonhosted.org/packages/b4/f0/1eff0e3a1d71cb81556b36320295f2970555de0b7d1378760aeb2deed132/regex-2025.10.22-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:f6d9cff7fc70884e3938ea0887dc06ee588647df9ce4b943a3f95b18f8479a58", size = 290936, upload-time = "2025-10-21T00:47:10.191Z" },
-    { url = "https://files.pythonhosted.org/packages/37/fe/ca2f6f955f897ace6539ada97c9419d01b254686b24317c26d738dc641bd/regex-2025.10.22-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:6442d1cd67645854d04ba26ba47f697200b77fb6a11a43dccf38406113515c4f", size = 288767, upload-time = "2025-10-21T00:47:11.939Z" },
-    { url = "https://files.pythonhosted.org/packages/9a/07/a10e2d7cca7b714d1be61cae05aaf3a44517f29b933e8113d490a1c5e908/regex-2025.10.22-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4385761deae1f5082f308267482530b9c286e005627d3afca80eb0bc6de97e70", size = 798885, upload-time = "2025-10-21T00:47:13.713Z" },
-    { url = "https://files.pythonhosted.org/packages/ae/ba/e5f89ed297ab495c1545600ca3d67133e0a008bdea17af1f78e6ab0b8a2e/regex-2025.10.22-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c859b07e2ee607881e6ce7e9b99a02730408cfc3f7e9f5d407c015eb79dcb60b", size = 864767, upload-time = "2025-10-21T00:47:15.542Z" },
-    { url = "https://files.pythonhosted.org/packages/6e/2e/2a4c50a4216c155dbb98b0243e6b918cfa4f19c293eff381363db657e5f0/regex-2025.10.22-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:c4b2eeb15be534fd2499eab59696fada35a5cb2e45606e381d6a35f5dedc8fcf", size = 911393, upload-time = "2025-10-21T00:47:17.327Z" },
-    { url = "https://files.pythonhosted.org/packages/2b/67/38d6f87b2fdef338fb6d1531abfeac61be5b14178ce0467fd87ca75bc7de/regex-2025.10.22-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d79c066145e1229c5733e4d774d17cbc20899681a9086f2a9f943eb4df18d8ec", size = 803144, upload-time = "2025-10-21T00:47:19.095Z" },
-    { url = "https://files.pythonhosted.org/packages/3d/cd/24aa1da7beab4f98e637b56b5eac8aede966e27ac184e8d8462fc038ed01/regex-2025.10.22-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:8ab1d067208191540ca9f38e9e7ae002da1b1fc31d1b21b818d1bd7a944a673e", size = 787831, upload-time = "2025-10-21T00:47:20.845Z" },
-    { url = "https://files.pythonhosted.org/packages/bf/94/e46d13ec3cd6a0bce252b74a71ed711b6767c815967a16ce64b50db66a2b/regex-2025.10.22-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:8f9c02832afb85e4eccde6a098da7e61942ddd9f2220406fd9c5efbbf0d774e8", size = 859160, upload-time = "2025-10-21T00:47:22.862Z" },
-    { url = "https://files.pythonhosted.org/packages/f1/bd/5231cba2089e8be74d62907bea593b5c92b011890ee98d7a00bf02dd6174/regex-2025.10.22-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:a99dbe41ee88b9a1338ebd39eaf41dc33800265a44db7e2b2558bb416378cd04", size = 849897, upload-time = "2025-10-21T00:47:24.635Z" },
-    { url = "https://files.pythonhosted.org/packages/cc/2b/38efccb6763321dfb3ca700d487dc897fc56f6d480c5f5f7bf28dc203820/regex-2025.10.22-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:7aad963cffe1967ff78f37550b961146b59c3db1d06e70471e6a35767ffa2ddd", size = 789371, upload-time = "2025-10-21T00:47:26.627Z" },
-    { url = "https://files.pythonhosted.org/packages/39/bb/37ca05e146ebf1da46a85aaed11bbece5990b9e889afde8d256139c8fc88/regex-2025.10.22-cp314-cp314-win32.whl", hash = "sha256:8fcea7bf64460d3a8dd7e8626f04cc93149f62367015fecbf72ed8a71e91ee60", size = 271452, upload-time = "2025-10-21T00:47:28.727Z" },
-    { url = "https://files.pythonhosted.org/packages/bc/4d/a899b6ec14d7f174f6ed557223644d50b89331f36b2aa324b603f8289a05/regex-2025.10.22-cp314-cp314-win_amd64.whl", hash = "sha256:01a2679bb0286075b0488129b35fc2b1de88538d17f14dc15dd53ecbaaa7548a", size = 280173, upload-time = "2025-10-21T00:47:30.499Z" },
-    { url = "https://files.pythonhosted.org/packages/94/9a/21496131abac3d68cc54d4d99bf97ff0385f66c63a1028172f2f6730ddd0/regex-2025.10.22-cp314-cp314-win_arm64.whl", hash = "sha256:6c79ee40c56db2f9090d3ba2cd730488184e522ccd53da6563f45e826fae03d0", size = 273203, upload-time = "2025-10-21T00:47:32.657Z" },
-    { url = "https://files.pythonhosted.org/packages/28/40/2e5c9dab10e262f36bc0e1a8f7a9c4318618e9fcf7e7fa1d42f348ed43c9/regex-2025.10.22-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:fe200435c5f40efbfbc0591256f96c31e3709704906edc88817f631571682af6", size = 491858, upload-time = "2025-10-21T00:47:34.424Z" },
-    { url = "https://files.pythonhosted.org/packages/40/af/9f4ed3a4ecd3a2bdb58e4190268fdcac934afe32898b9e091fe20f5f97ee/regex-2025.10.22-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:21b6eb4d8a1402aa6a05b98c0a5c353ee68cecfea6eca24542aa992aa2537405", size = 292535, upload-time = "2025-10-21T00:47:36.129Z" },
-    { url = "https://files.pythonhosted.org/packages/8f/14/4025dd4cf7bf278d061de8ec8f8bb1105a9235294fb3d8437f0f38f498c7/regex-2025.10.22-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:f811bb96131be670a59572caeebf2a94e60cd028f2fc2844e38bdb96f5bbbb14", size = 290907, upload-time = "2025-10-21T00:47:37.963Z" },
-    { url = "https://files.pythonhosted.org/packages/a2/7b/a9675643093f800903e1617c3cb651d8684557607ace4af8a023d0fedb28/regex-2025.10.22-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:261a10c8d0dc918fdb3ba83b960f9745de07992696439a2d9b442bf48093b619", size = 807546, upload-time = "2025-10-21T00:47:40.075Z" },
-    { url = "https://files.pythonhosted.org/packages/c7/ca/e8d0d9048676efcbd9f946dd03f5bdbd48040cc31d5a36048c7af8cfe076/regex-2025.10.22-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:18d073751341b9a9152d11ae92b468ffe1a1b16caa974a307c1beb117af6a478", size = 873323, upload-time = "2025-10-21T00:47:42.273Z" },
-    { url = "https://files.pythonhosted.org/packages/b8/63/39d8352ca76cbb15affe6a48ddef3c6471adebe50cb0c6be626bb69d87a1/regex-2025.10.22-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:36878ced03cfe8e80d22af09fb564e2dddb736bf7c323d4467ff0d52fe6629fd", size = 914854, upload-time = "2025-10-21T00:47:44.379Z" },
-    { url = "https://files.pythonhosted.org/packages/ab/fa/47d54acf73907018f92403414014d0728d31dbacaa86d39fdd7ddeffcb08/regex-2025.10.22-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e76167ff542770dd2ffab2b869ef43ebbfc3a683a504e5c259ab64f13e6a17df", size = 812723, upload-time = "2025-10-21T00:47:46.368Z" },
-    { url = "https://files.pythonhosted.org/packages/ff/a2/f814b9f762d4713fb55b4f9abc733c368b4f5b6d08dbda58bd72c4062ce4/regex-2025.10.22-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:9bf8f164cdd1f1f9c9244eaf5f55573ddabb7bdc89541fcd0b9e931b37a46f87", size = 795438, upload-time = "2025-10-21T00:47:48.355Z" },
-    { url = "https://files.pythonhosted.org/packages/89/82/5a78e32780e89eed8b64d8af06e654363131456b7121863072aea509a358/regex-2025.10.22-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:48361da216575aeffdff05fe902b4025f790f492336c33c455846960d151555e", size = 868337, upload-time = "2025-10-21T00:47:50.613Z" },
-    { url = "https://files.pythonhosted.org/packages/c1/06/d533134280c1ee9ef40d586ce7f4b0fe598c284d8feef0c1c82e777df4fc/regex-2025.10.22-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:68afe6a9a856f48282df47301452654144e9be74f23cdce9e3d000b7f3050a07", size = 854565, upload-time = "2025-10-21T00:47:52.905Z" },
-    { url = "https://files.pythonhosted.org/packages/12/c1/0954b6ae0d5da6a3362148bca5e80ce67281beca1b064fb06d3b05c0f19d/regex-2025.10.22-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:60a0251d6618d19c51799308511d7b6a63265bc425c7217a1b809eca927624a5", size = 799565, upload-time = "2025-10-21T00:47:55.127Z" },
-    { url = "https://files.pythonhosted.org/packages/d3/d9/fbef87ba02d3668678b7a71b2d79a2ca092089dc530d83c609d83a82c9f8/regex-2025.10.22-cp314-cp314t-win32.whl", hash = "sha256:20ad0f712ff769003d90b442175779ad8ce7028e2640e10e0878b8a24e6373d1", size = 274427, upload-time = "2025-10-21T00:47:57.097Z" },
-    { url = "https://files.pythonhosted.org/packages/db/df/58fd290ae0b5e223f42e25f1b3a1f445ceeee7d56016b615ab0207fd6552/regex-2025.10.22-cp314-cp314t-win_amd64.whl", hash = "sha256:94485cf318cd628f61dede6e1f9ab1956818ee7dcc59fb51d82e589c1c1a8f03", size = 284141, upload-time = "2025-10-21T00:47:59.661Z" },
-    { url = "https://files.pythonhosted.org/packages/31/f2/01599f68ca68ded192f04209effb8630be4ff261b51b888000aea6f5a752/regex-2025.10.22-cp314-cp314t-win_arm64.whl", hash = "sha256:76bc9875244f1cf27e2e75dd9c8faf2c6dc8c9ff33afa98cf55e94969bea6fdd", size = 274499, upload-time = "2025-10-21T00:48:01.985Z" },
+version = "2025.10.23"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/f8/c8/1d2160d36b11fbe0a61acb7c3c81ab032d9ec8ad888ac9e0a61b85ab99dd/regex-2025.10.23.tar.gz", hash = "sha256:8cbaf8ceb88f96ae2356d01b9adf5e6306fa42fa6f7eab6b97794e37c959ac26", size = 401266, upload-time = "2025-10-21T15:58:20.23Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/88/11/849d5d23633a77047465eaae4cc0cbf24ded7aa496c02e8b9710e28b1687/regex-2025.10.23-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:17bbcde374bef1c5fad9b131f0e28a6a24856dd90368d8c0201e2b5a69533daa", size = 487957, upload-time = "2025-10-21T15:54:26.151Z" },
+    { url = "https://files.pythonhosted.org/packages/87/12/5985386e7e3200a0d6a6417026d2c758d783a932428a5efc0a42ca1ddf74/regex-2025.10.23-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b4e10434279cc8567f99ca6e018e9025d14f2fded2a603380b6be2090f476426", size = 290419, upload-time = "2025-10-21T15:54:28.804Z" },
+    { url = "https://files.pythonhosted.org/packages/67/cf/a8615923f962f8fdc41a3a6093a48726955e8b1993f4614b26a41d249f9b/regex-2025.10.23-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9c9bb421cbe7012c744a5a56cf4d6c80829c72edb1a2991677299c988d6339c8", size = 288285, upload-time = "2025-10-21T15:54:30.47Z" },
+    { url = "https://files.pythonhosted.org/packages/4e/3d/6a3a1e12c86354cd0b3cbf8c3dd6acbe853609ee3b39d47ecd3ce95caf84/regex-2025.10.23-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:275cd1c2ed8c4a78ebfa489618d7aee762e8b4732da73573c3e38236ec5f65de", size = 781458, upload-time = "2025-10-21T15:54:31.978Z" },
+    { url = "https://files.pythonhosted.org/packages/46/47/76a8da004489f2700361754859e373b87a53d043de8c47f4d1583fd39d78/regex-2025.10.23-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:7b426ae7952f3dc1e73a86056d520bd4e5f021397484a6835902fc5648bcacce", size = 850605, upload-time = "2025-10-21T15:54:33.753Z" },
+    { url = "https://files.pythonhosted.org/packages/67/05/fa886461f97d45a6f4b209699cb994dc6d6212d6e219d29444dac5005775/regex-2025.10.23-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:c5cdaf5b6d37c7da1967dbe729d819461aab6a98a072feef65bbcff0a6e60649", size = 898563, upload-time = "2025-10-21T15:54:35.431Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/db/3ddd8d01455f23cabad7499f4199de0df92f5e96d39633203ff9d0b592dc/regex-2025.10.23-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3bfeff0b08f296ab28b4332a7e03ca31c437ee78b541ebc874bbf540e5932f8d", size = 791535, upload-time = "2025-10-21T15:54:37.269Z" },
+    { url = "https://files.pythonhosted.org/packages/7c/ae/0fa5cbf41ca92b6ec3370222fcb6c68b240d68ab10e803d086c03a19fd9e/regex-2025.10.23-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5f97236a67307b775f30a74ef722b64b38b7ab7ba3bb4a2508518a5de545459c", size = 782461, upload-time = "2025-10-21T15:54:39.187Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/23/70af22a016df11af4def27870eb175c2c7235b72d411ecf75a4b4a422cb6/regex-2025.10.23-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:be19e7de499940cd72475fb8e46ab2ecb1cf5906bebdd18a89f9329afb1df82f", size = 774583, upload-time = "2025-10-21T15:54:41.018Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/ee/a54a6851f6905f33d3c4ed64e8737b1d85ed01b5724712530ddc0f9abdb1/regex-2025.10.23-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:883df76ee42d9ecb82b37ff8d01caea5895b3f49630a64d21111078bbf8ef64c", size = 845649, upload-time = "2025-10-21T15:54:42.615Z" },
+    { url = "https://files.pythonhosted.org/packages/80/7d/c3ec1cae14e01fab00e38c41ed35f47a853359e95e9c023e9a4381bb122c/regex-2025.10.23-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:2e9117d1d35fc2addae6281019ecc70dc21c30014b0004f657558b91c6a8f1a7", size = 836037, upload-time = "2025-10-21T15:54:44.63Z" },
+    { url = "https://files.pythonhosted.org/packages/15/ae/45771140dd43c4d67c87b54d3728078ed6a96599d9fc7ba6825086236782/regex-2025.10.23-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:0ff1307f531a5d8cf5c20ea517254551ff0a8dc722193aab66c656c5a900ea68", size = 779705, upload-time = "2025-10-21T15:54:46.08Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/95/074e2581760eafce7c816a352b7d3a322536e5b68c346d1a8bacd895545c/regex-2025.10.23-cp310-cp310-win32.whl", hash = "sha256:7888475787cbfee4a7cd32998eeffe9a28129fa44ae0f691b96cb3939183ef41", size = 265663, upload-time = "2025-10-21T15:54:47.854Z" },
+    { url = "https://files.pythonhosted.org/packages/f7/c7/a25f56a718847e34d3f1608c72eadeb67653bff1a0411da023dd8f4c647b/regex-2025.10.23-cp310-cp310-win_amd64.whl", hash = "sha256:ec41a905908496ce4906dab20fb103c814558db1d69afc12c2f384549c17936a", size = 277587, upload-time = "2025-10-21T15:54:49.571Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/e5/63eb17c6b5deaefd93c2bbb1feae7c0a8d2157da25883a6ca2569cf7a663/regex-2025.10.23-cp310-cp310-win_arm64.whl", hash = "sha256:b2b7f19a764d5e966d5a62bf2c28a8b4093cc864c6734510bdb4aeb840aec5e6", size = 269979, upload-time = "2025-10-21T15:54:51.375Z" },
+    { url = "https://files.pythonhosted.org/packages/82/e5/74b7cd5cd76b4171f9793042045bb1726f7856dd56e582fc3e058a7a8a5e/regex-2025.10.23-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:6c531155bf9179345e85032052a1e5fe1a696a6abf9cea54b97e8baefff970fd", size = 487960, upload-time = "2025-10-21T15:54:53.253Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/08/854fa4b3b20471d1df1c71e831b6a1aa480281e37791e52a2df9641ec5c6/regex-2025.10.23-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:912e9df4e89d383681268d38ad8f5780d7cccd94ba0e9aa09ca7ab7ab4f8e7eb", size = 290425, upload-time = "2025-10-21T15:54:55.21Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/d3/6272b1dd3ca1271661e168762b234ad3e00dbdf4ef0c7b9b72d2d159efa7/regex-2025.10.23-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4f375c61bfc3138b13e762fe0ae76e3bdca92497816936534a0177201666f44f", size = 288278, upload-time = "2025-10-21T15:54:56.862Z" },
+    { url = "https://files.pythonhosted.org/packages/14/8f/c7b365dd9d9bc0a36e018cb96f2ffb60d2ba8deb589a712b437f67de2920/regex-2025.10.23-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e248cc9446081119128ed002a3801f8031e0c219b5d3c64d3cc627da29ac0a33", size = 793289, upload-time = "2025-10-21T15:54:58.352Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/fb/b8fbe9aa16cf0c21f45ec5a6c74b4cecbf1a1c0deb7089d4a6f83a9c1caa/regex-2025.10.23-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b52bf9282fdf401e4f4e721f0f61fc4b159b1307244517789702407dd74e38ca", size = 860321, upload-time = "2025-10-21T15:54:59.813Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/81/bf41405c772324926a9bd8a640dedaa42da0e929241834dfce0733070437/regex-2025.10.23-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5c084889ab2c59765a0d5ac602fd1c3c244f9b3fcc9a65fdc7ba6b74c5287490", size = 907011, upload-time = "2025-10-21T15:55:01.968Z" },
+    { url = "https://files.pythonhosted.org/packages/a4/fb/5ad6a8b92d3f88f3797b51bb4ef47499acc2d0b53d2fbe4487a892f37a73/regex-2025.10.23-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d80e8eb79009bdb0936658c44ca06e2fbbca67792013e3818eea3f5f228971c2", size = 800312, upload-time = "2025-10-21T15:55:04.15Z" },
+    { url = "https://files.pythonhosted.org/packages/42/48/b4efba0168a2b57f944205d823f8e8a3a1ae6211a34508f014ec2c712f4f/regex-2025.10.23-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:b6f259118ba87b814a8ec475380aee5f5ae97a75852a3507cf31d055b01b5b40", size = 782839, upload-time = "2025-10-21T15:55:05.641Z" },
+    { url = "https://files.pythonhosted.org/packages/13/2a/c9efb4c6c535b0559c1fa8e431e0574d229707c9ca718600366fcfef6801/regex-2025.10.23-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:9b8c72a242683dcc72d37595c4f1278dfd7642b769e46700a8df11eab19dfd82", size = 854270, upload-time = "2025-10-21T15:55:07.27Z" },
+    { url = "https://files.pythonhosted.org/packages/34/2d/68eecc1bdaee020e8ba549502291c9450d90d8590d0552247c9b543ebf7b/regex-2025.10.23-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:a8d7b7a0a3df9952f9965342159e0c1f05384c0f056a47ce8b61034f8cecbe83", size = 845771, upload-time = "2025-10-21T15:55:09.477Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/cd/a1ae499cf9b87afb47a67316bbf1037a7c681ffe447c510ed98c0aa2c01c/regex-2025.10.23-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:413bfea20a484c524858125e92b9ce6ffdd0a4b97d4ff96b5859aa119b0f1bdd", size = 788778, upload-time = "2025-10-21T15:55:11.396Z" },
+    { url = "https://files.pythonhosted.org/packages/38/f9/70765e63f5ea7d43b2b6cd4ee9d3323f16267e530fb2a420d92d991cf0fc/regex-2025.10.23-cp311-cp311-win32.whl", hash = "sha256:f76deef1f1019a17dad98f408b8f7afc4bd007cbe835ae77b737e8c7f19ae575", size = 265666, upload-time = "2025-10-21T15:55:13.306Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/1a/18e9476ee1b63aaec3844d8e1cb21842dc19272c7e86d879bfc0dcc60db3/regex-2025.10.23-cp311-cp311-win_amd64.whl", hash = "sha256:59bba9f7125536f23fdab5deeea08da0c287a64c1d3acc1c7e99515809824de8", size = 277600, upload-time = "2025-10-21T15:55:15.087Z" },
+    { url = "https://files.pythonhosted.org/packages/1d/1b/c019167b1f7a8ec77251457e3ff0339ed74ca8bce1ea13138dc98309c923/regex-2025.10.23-cp311-cp311-win_arm64.whl", hash = "sha256:b103a752b6f1632ca420225718d6ed83f6a6ced3016dd0a4ab9a6825312de566", size = 269974, upload-time = "2025-10-21T15:55:16.841Z" },
+    { url = "https://files.pythonhosted.org/packages/f6/57/eeb274d83ab189d02d778851b1ac478477522a92b52edfa6e2ae9ff84679/regex-2025.10.23-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:7a44d9c00f7a0a02d3b777429281376370f3d13d2c75ae74eb94e11ebcf4a7fc", size = 489187, upload-time = "2025-10-21T15:55:18.322Z" },
+    { url = "https://files.pythonhosted.org/packages/55/5c/7dad43a9b6ea88bf77e0b8b7729a4c36978e1043165034212fd2702880c6/regex-2025.10.23-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b83601f84fde939ae3478bb32a3aef36f61b58c3208d825c7e8ce1a735f143f2", size = 291122, upload-time = "2025-10-21T15:55:20.2Z" },
+    { url = "https://files.pythonhosted.org/packages/66/21/38b71e6f2818f0f4b281c8fba8d9d57cfca7b032a648fa59696e0a54376a/regex-2025.10.23-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ec13647907bb9d15fd192bbfe89ff06612e098a5709e7d6ecabbdd8f7908fc45", size = 288797, upload-time = "2025-10-21T15:55:21.932Z" },
+    { url = "https://files.pythonhosted.org/packages/be/95/888f069c89e7729732a6d7cca37f76b44bfb53a1e35dda8a2c7b65c1b992/regex-2025.10.23-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:78d76dd2957d62501084e7012ddafc5fcd406dd982b7a9ca1ea76e8eaaf73e7e", size = 798442, upload-time = "2025-10-21T15:55:23.747Z" },
+    { url = "https://files.pythonhosted.org/packages/76/70/4f903c608faf786627a8ee17c06e0067b5acade473678b69c8094b248705/regex-2025.10.23-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:8668e5f067e31a47699ebb354f43aeb9c0ef136f915bd864243098524482ac43", size = 864039, upload-time = "2025-10-21T15:55:25.656Z" },
+    { url = "https://files.pythonhosted.org/packages/62/19/2df67b526bf25756c7f447dde554fc10a220fd839cc642f50857d01e4a7b/regex-2025.10.23-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a32433fe3deb4b2d8eda88790d2808fed0dc097e84f5e683b4cd4f42edef6cca", size = 912057, upload-time = "2025-10-21T15:55:27.309Z" },
+    { url = "https://files.pythonhosted.org/packages/99/14/9a39b7c9e007968411bc3c843cc14cf15437510c0a9991f080cab654fd16/regex-2025.10.23-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d97d73818c642c938db14c0668167f8d39520ca9d983604575ade3fda193afcc", size = 803374, upload-time = "2025-10-21T15:55:28.9Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/f7/3495151dd3ca79949599b6d069b72a61a2c5e24fc441dccc79dcaf708fe6/regex-2025.10.23-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:bca7feecc72ee33579e9f6ddf8babbe473045717a0e7dbc347099530f96e8b9a", size = 787714, upload-time = "2025-10-21T15:55:30.628Z" },
+    { url = "https://files.pythonhosted.org/packages/28/65/ee882455e051131869957ee8597faea45188c9a98c0dad724cfb302d4580/regex-2025.10.23-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:7e24af51e907d7457cc4a72691ec458320b9ae67dc492f63209f01eecb09de32", size = 858392, upload-time = "2025-10-21T15:55:32.322Z" },
+    { url = "https://files.pythonhosted.org/packages/53/25/9287fef5be97529ebd3ac79d256159cb709a07eb58d4be780d1ca3885da8/regex-2025.10.23-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:d10bcde58bbdf18146f3a69ec46dd03233b94a4a5632af97aa5378da3a47d288", size = 850484, upload-time = "2025-10-21T15:55:34.037Z" },
+    { url = "https://files.pythonhosted.org/packages/f3/b4/b49b88b4fea2f14dc73e5b5842755e782fc2e52f74423d6f4adc130d5880/regex-2025.10.23-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:44383bc0c933388516c2692c9a7503e1f4a67e982f20b9a29d2fb70c6494f147", size = 789634, upload-time = "2025-10-21T15:55:35.958Z" },
+    { url = "https://files.pythonhosted.org/packages/b6/3c/2f8d199d0e84e78bcd6bdc2be9b62410624f6b796e2893d1837ae738b160/regex-2025.10.23-cp312-cp312-win32.whl", hash = "sha256:6040a86f95438a0114bba16e51dfe27f1bc004fd29fe725f54a586f6d522b079", size = 266060, upload-time = "2025-10-21T15:55:37.902Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/67/c35e80969f6ded306ad70b0698863310bdf36aca57ad792f45ddc0e2271f/regex-2025.10.23-cp312-cp312-win_amd64.whl", hash = "sha256:436b4c4352fe0762e3bfa34a5567079baa2ef22aa9c37cf4d128979ccfcad842", size = 276931, upload-time = "2025-10-21T15:55:39.502Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/a1/4ed147de7d2b60174f758412c87fa51ada15cd3296a0ff047f4280aaa7ca/regex-2025.10.23-cp312-cp312-win_arm64.whl", hash = "sha256:f4b1b1991617055b46aff6f6db24888c1f05f4db9801349d23f09ed0714a9335", size = 270103, upload-time = "2025-10-21T15:55:41.24Z" },
+    { url = "https://files.pythonhosted.org/packages/28/c6/195a6217a43719d5a6a12cc192a22d12c40290cecfa577f00f4fb822f07d/regex-2025.10.23-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:b7690f95404a1293923a296981fd943cca12c31a41af9c21ba3edd06398fc193", size = 488956, upload-time = "2025-10-21T15:55:42.887Z" },
+    { url = "https://files.pythonhosted.org/packages/4c/93/181070cd1aa2fa541ff2d3afcf763ceecd4937b34c615fa92765020a6c90/regex-2025.10.23-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:1a32d77aeaea58a13230100dd8797ac1a84c457f3af2fdf0d81ea689d5a9105b", size = 290997, upload-time = "2025-10-21T15:55:44.53Z" },
+    { url = "https://files.pythonhosted.org/packages/b6/c5/9d37fbe3a40ed8dda78c23e1263002497540c0d1522ed75482ef6c2000f0/regex-2025.10.23-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:b24b29402f264f70a3c81f45974323b41764ff7159655360543b7cabb73e7d2f", size = 288686, upload-time = "2025-10-21T15:55:46.186Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/e7/db610ff9f10c2921f9b6ac0c8d8be4681b28ddd40fc0549429366967e61f/regex-2025.10.23-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:563824a08c7c03d96856d84b46fdb3bbb7cfbdf79da7ef68725cda2ce169c72a", size = 798466, upload-time = "2025-10-21T15:55:48.24Z" },
+    { url = "https://files.pythonhosted.org/packages/90/10/aab883e1fa7fe2feb15ac663026e70ca0ae1411efa0c7a4a0342d9545015/regex-2025.10.23-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a0ec8bdd88d2e2659c3518087ee34b37e20bd169419ffead4240a7004e8ed03b", size = 863996, upload-time = "2025-10-21T15:55:50.478Z" },
+    { url = "https://files.pythonhosted.org/packages/a2/b0/8f686dd97a51f3b37d0238cd00a6d0f9ccabe701f05b56de1918571d0d61/regex-2025.10.23-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:b577601bfe1d33913fcd9276d7607bbac827c4798d9e14d04bf37d417a6c41cb", size = 912145, upload-time = "2025-10-21T15:55:52.215Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/ca/639f8cd5b08797bca38fc5e7e07f76641a428cf8c7fca05894caf045aa32/regex-2025.10.23-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7c9f2c68ac6cb3de94eea08a437a75eaa2bd33f9e97c84836ca0b610a5804368", size = 803370, upload-time = "2025-10-21T15:55:53.944Z" },
+    { url = "https://files.pythonhosted.org/packages/0d/1e/a40725bb76959eddf8abc42a967bed6f4851b39f5ac4f20e9794d7832aa5/regex-2025.10.23-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:89f8b9ea3830c79468e26b0e21c3585f69f105157c2154a36f6b7839f8afb351", size = 787767, upload-time = "2025-10-21T15:55:56.004Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/d8/8ee9858062936b0f99656dce390aa667c6e7fb0c357b1b9bf76fb5e2e708/regex-2025.10.23-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:98fd84c4e4ea185b3bb5bf065261ab45867d8875032f358a435647285c722673", size = 858335, upload-time = "2025-10-21T15:55:58.185Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/0a/ed5faaa63fa8e3064ab670e08061fbf09e3a10235b19630cf0cbb9e48c0a/regex-2025.10.23-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:1e11d3e5887b8b096f96b4154dfb902f29c723a9556639586cd140e77e28b313", size = 850402, upload-time = "2025-10-21T15:56:00.023Z" },
+    { url = "https://files.pythonhosted.org/packages/79/14/d05f617342f4b2b4a23561da500ca2beab062bfcc408d60680e77ecaf04d/regex-2025.10.23-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4f13450328a6634348d47a88367e06b64c9d84980ef6a748f717b13f8ce64e87", size = 789739, upload-time = "2025-10-21T15:56:01.967Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/7b/e8ce8eef42a15f2c3461f8b3e6e924bbc86e9605cb534a393aadc8d3aff8/regex-2025.10.23-cp313-cp313-win32.whl", hash = "sha256:37be9296598a30c6a20236248cb8b2c07ffd54d095b75d3a2a2ee5babdc51df1", size = 266054, upload-time = "2025-10-21T15:56:05.291Z" },
+    { url = "https://files.pythonhosted.org/packages/71/2d/55184ed6be6473187868d2f2e6a0708195fc58270e62a22cbf26028f2570/regex-2025.10.23-cp313-cp313-win_amd64.whl", hash = "sha256:ea7a3c283ce0f06fe789365841e9174ba05f8db16e2fd6ae00a02df9572c04c0", size = 276917, upload-time = "2025-10-21T15:56:07.303Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/d4/927eced0e2bd45c45839e556f987f8c8f8683268dd3c00ad327deb3b0172/regex-2025.10.23-cp313-cp313-win_arm64.whl", hash = "sha256:d9a4953575f300a7bab71afa4cd4ac061c7697c89590a2902b536783eeb49a4f", size = 270105, upload-time = "2025-10-21T15:56:09.857Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/b3/95b310605285573341fc062d1d30b19a54f857530e86c805f942c4ff7941/regex-2025.10.23-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:7d6606524fa77b3912c9ef52a42ef63c6cfbfc1077e9dc6296cd5da0da286044", size = 491850, upload-time = "2025-10-21T15:56:11.685Z" },
+    { url = "https://files.pythonhosted.org/packages/a4/8f/207c2cec01e34e56db1eff606eef46644a60cf1739ecd474627db90ad90b/regex-2025.10.23-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:c037aadf4d64bdc38af7db3dbd34877a057ce6524eefcb2914d6d41c56f968cc", size = 292537, upload-time = "2025-10-21T15:56:13.963Z" },
+    { url = "https://files.pythonhosted.org/packages/98/3b/025240af4ada1dc0b5f10d73f3e5122d04ce7f8908ab8881e5d82b9d61b6/regex-2025.10.23-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:99018c331fb2529084a0c9b4c713dfa49fafb47c7712422e49467c13a636c656", size = 290904, upload-time = "2025-10-21T15:56:16.016Z" },
+    { url = "https://files.pythonhosted.org/packages/81/8e/104ac14e2d3450c43db18ec03e1b96b445a94ae510b60138f00ce2cb7ca1/regex-2025.10.23-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:fd8aba965604d70306eb90a35528f776e59112a7114a5162824d43b76fa27f58", size = 807311, upload-time = "2025-10-21T15:56:17.818Z" },
+    { url = "https://files.pythonhosted.org/packages/19/63/78aef90141b7ce0be8a18e1782f764f6997ad09de0e05251f0d2503a914a/regex-2025.10.23-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:238e67264b4013e74136c49f883734f68656adf8257bfa13b515626b31b20f8e", size = 873241, upload-time = "2025-10-21T15:56:19.941Z" },
+    { url = "https://files.pythonhosted.org/packages/b3/a8/80eb1201bb49ae4dba68a1b284b4211ed9daa8e74dc600018a10a90399fb/regex-2025.10.23-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:b2eb48bd9848d66fd04826382f5e8491ae633de3233a3d64d58ceb4ecfa2113a", size = 914794, upload-time = "2025-10-21T15:56:22.488Z" },
+    { url = "https://files.pythonhosted.org/packages/f0/d5/1984b6ee93281f360a119a5ca1af6a8ca7d8417861671388bf750becc29b/regex-2025.10.23-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d36591ce06d047d0c0fe2fc5f14bfbd5b4525d08a7b6a279379085e13f0e3d0e", size = 812581, upload-time = "2025-10-21T15:56:24.319Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/39/11ebdc6d9927172a64ae237d16763145db6bd45ebb4055c17b88edab72a7/regex-2025.10.23-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:b5d4ece8628d6e364302006366cea3ee887db397faebacc5dacf8ef19e064cf8", size = 795346, upload-time = "2025-10-21T15:56:26.232Z" },
+    { url = "https://files.pythonhosted.org/packages/3b/b4/89a591bcc08b5e436af43315284bd233ba77daf0cf20e098d7af12f006c1/regex-2025.10.23-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:39a7e8083959cb1c4ff74e483eecb5a65d3b3e1d821b256e54baf61782c906c6", size = 868214, upload-time = "2025-10-21T15:56:28.597Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/ff/58ba98409c1dbc8316cdb20dafbc63ed267380a07780cafecaf5012dabc9/regex-2025.10.23-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:842d449a8fefe546f311656cf8c0d6729b08c09a185f1cad94c756210286d6a8", size = 854540, upload-time = "2025-10-21T15:56:30.875Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/f2/4a9e9338d67626e2071b643f828a482712ad15889d7268e11e9a63d6f7e9/regex-2025.10.23-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:d614986dc68506be8f00474f4f6960e03e4ca9883f7df47744800e7d7c08a494", size = 799346, upload-time = "2025-10-21T15:56:32.725Z" },
+    { url = "https://files.pythonhosted.org/packages/63/be/543d35c46bebf6f7bf2be538cca74d6585f25714700c36f37f01b92df551/regex-2025.10.23-cp313-cp313t-win32.whl", hash = "sha256:a5b7a26b51a9df473ec16a1934d117443a775ceb7b39b78670b2e21893c330c9", size = 268657, upload-time = "2025-10-21T15:56:34.577Z" },
+    { url = "https://files.pythonhosted.org/packages/14/9f/4dd6b7b612037158bb2c9bcaa710e6fb3c40ad54af441b9c53b3a137a9f1/regex-2025.10.23-cp313-cp313t-win_amd64.whl", hash = "sha256:ce81c5544a5453f61cb6f548ed358cfb111e3b23f3cd42d250a4077a6be2a7b6", size = 280075, upload-time = "2025-10-21T15:56:36.767Z" },
+    { url = "https://files.pythonhosted.org/packages/81/7a/5bd0672aa65d38c8da6747c17c8b441bdb53d816c569e3261013af8e83cf/regex-2025.10.23-cp313-cp313t-win_arm64.whl", hash = "sha256:e9bf7f6699f490e4e43c44757aa179dab24d1960999c84ab5c3d5377714ed473", size = 271219, upload-time = "2025-10-21T15:56:39.033Z" },
+    { url = "https://files.pythonhosted.org/packages/73/f6/0caf29fec943f201fbc8822879c99d31e59c1d51a983d9843ee5cf398539/regex-2025.10.23-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:5b5cb5b6344c4c4c24b2dc87b0bfee78202b07ef7633385df70da7fcf6f7cec6", size = 488960, upload-time = "2025-10-21T15:56:40.849Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/7d/ebb7085b8fa31c24ce0355107cea2b92229d9050552a01c5d291c42aecea/regex-2025.10.23-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:a6ce7973384c37bdf0f371a843f95a6e6f4e1489e10e0cf57330198df72959c5", size = 290932, upload-time = "2025-10-21T15:56:42.875Z" },
+    { url = "https://files.pythonhosted.org/packages/27/41/43906867287cbb5ca4cee671c3cc8081e15deef86a8189c3aad9ac9f6b4d/regex-2025.10.23-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:2ee3663f2c334959016b56e3bd0dd187cbc73f948e3a3af14c3caaa0c3035d10", size = 288766, upload-time = "2025-10-21T15:56:44.894Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/9e/ea66132776700fc77a39b1056e7a5f1308032fead94507e208dc6716b7cd/regex-2025.10.23-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2003cc82a579107e70d013482acce8ba773293f2db534fb532738395c557ff34", size = 798884, upload-time = "2025-10-21T15:56:47.178Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/99/aed1453687ab63819a443930770db972c5c8064421f0d9f5da9ad029f26b/regex-2025.10.23-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:182c452279365a93a9f45874f7f191ec1c51e1f1eb41bf2b16563f1a40c1da3a", size = 864768, upload-time = "2025-10-21T15:56:49.793Z" },
+    { url = "https://files.pythonhosted.org/packages/99/5d/732fe747a1304805eb3853ce6337eea16b169f7105a0d0dd9c6a5ffa9948/regex-2025.10.23-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:b1249e9ff581c5b658c8f0437f883b01f1edcf424a16388591e7c05e5e9e8b0c", size = 911394, upload-time = "2025-10-21T15:56:52.186Z" },
+    { url = "https://files.pythonhosted.org/packages/5e/48/58a1f6623466522352a6efa153b9a3714fc559d9f930e9bc947b4a88a2c3/regex-2025.10.23-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2b841698f93db3ccc36caa1900d2a3be281d9539b822dc012f08fc80b46a3224", size = 803145, upload-time = "2025-10-21T15:56:55.142Z" },
+    { url = "https://files.pythonhosted.org/packages/ea/f6/7dea79be2681a5574ab3fc237aa53b2c1dfd6bd2b44d4640b6c76f33f4c1/regex-2025.10.23-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:956d89e0c92d471e8f7eee73f73fdff5ed345886378c45a43175a77538a1ffe4", size = 787831, upload-time = "2025-10-21T15:56:57.203Z" },
+    { url = "https://files.pythonhosted.org/packages/3a/ad/07b76950fbbe65f88120ca2d8d845047c401450f607c99ed38862904671d/regex-2025.10.23-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:5c259cb363299a0d90d63b5c0d7568ee98419861618a95ee9d91a41cb9954462", size = 859162, upload-time = "2025-10-21T15:56:59.195Z" },
+    { url = "https://files.pythonhosted.org/packages/41/87/374f3b2021b22aa6a4fc0b750d63f9721e53d1631a238f7a1c343c1cd288/regex-2025.10.23-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:185d2b18c062820b3a40d8fefa223a83f10b20a674bf6e8c4a432e8dfd844627", size = 849899, upload-time = "2025-10-21T15:57:01.747Z" },
+    { url = "https://files.pythonhosted.org/packages/12/4a/7f7bb17c5a5a9747249807210e348450dab9212a46ae6d23ebce86ba6a2b/regex-2025.10.23-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:281d87fa790049c2b7c1b4253121edd80b392b19b5a3d28dc2a77579cb2a58ec", size = 789372, upload-time = "2025-10-21T15:57:04.018Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/dd/9c7728ff544fea09bbc8635e4c9e7c423b11c24f1a7a14e6ac4831466709/regex-2025.10.23-cp314-cp314-win32.whl", hash = "sha256:63b81eef3656072e4ca87c58084c7a9c2b81d41a300b157be635a8a675aacfb8", size = 271451, upload-time = "2025-10-21T15:57:06.266Z" },
+    { url = "https://files.pythonhosted.org/packages/48/f8/ef7837ff858eb74079c4804c10b0403c0b740762e6eedba41062225f7117/regex-2025.10.23-cp314-cp314-win_amd64.whl", hash = "sha256:0967c5b86f274800a34a4ed862dfab56928144d03cb18821c5153f8777947796", size = 280173, upload-time = "2025-10-21T15:57:08.206Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/d0/d576e1dbd9885bfcd83d0e90762beea48d9373a6f7ed39170f44ed22e336/regex-2025.10.23-cp314-cp314-win_arm64.whl", hash = "sha256:c70dfe58b0a00b36aa04cdb0f798bf3e0adc31747641f69e191109fd8572c9a9", size = 273206, upload-time = "2025-10-21T15:57:10.367Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/d0/2025268315e8b2b7b660039824cb7765a41623e97d4cd421510925400487/regex-2025.10.23-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:1f5799ea1787aa6de6c150377d11afad39a38afd033f0c5247aecb997978c422", size = 491854, upload-time = "2025-10-21T15:57:12.526Z" },
+    { url = "https://files.pythonhosted.org/packages/44/35/5681c2fec5e8b33454390af209c4353dfc44606bf06d714b0b8bd0454ffe/regex-2025.10.23-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:a9639ab7540cfea45ef57d16dcbea2e22de351998d614c3ad2f9778fa3bdd788", size = 292542, upload-time = "2025-10-21T15:57:15.158Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/17/184eed05543b724132e4a18149e900f5189001fcfe2d64edaae4fbaf36b4/regex-2025.10.23-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:08f52122c352eb44c3421dab78b9b73a8a77a282cc8314ae576fcaa92b780d10", size = 290903, upload-time = "2025-10-21T15:57:17.108Z" },
+    { url = "https://files.pythonhosted.org/packages/25/d0/5e3347aa0db0de382dddfa133a7b0ae72f24b4344f3989398980b44a3924/regex-2025.10.23-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ebf1baebef1c4088ad5a5623decec6b52950f0e4d7a0ae4d48f0a99f8c9cb7d7", size = 807546, upload-time = "2025-10-21T15:57:19.179Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/bb/40c589bbdce1be0c55e9f8159789d58d47a22014f2f820cf2b517a5cd193/regex-2025.10.23-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:16b0f1c2e2d566c562d5c384c2b492646be0a19798532fdc1fdedacc66e3223f", size = 873322, upload-time = "2025-10-21T15:57:21.36Z" },
+    { url = "https://files.pythonhosted.org/packages/fe/56/a7e40c01575ac93360e606278d359f91829781a9f7fb6e5aa435039edbda/regex-2025.10.23-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:f7ada5d9dceafaab92646aa00c10a9efd9b09942dd9b0d7c5a4b73db92cc7e61", size = 914855, upload-time = "2025-10-21T15:57:24.044Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/4b/d55587b192763db3163c3f508b3b67b31bb6f5e7a0e08b83013d0a59500a/regex-2025.10.23-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3a36b4005770044bf08edecc798f0e41a75795b9e7c9c12fe29da8d792ef870c", size = 812724, upload-time = "2025-10-21T15:57:26.123Z" },
+    { url = "https://files.pythonhosted.org/packages/33/20/18bac334955fbe99d17229f4f8e98d05e4a501ac03a442be8facbb37c304/regex-2025.10.23-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:af7b2661dcc032da1fae82069b5ebf2ac1dfcd5359ef8b35e1367bfc92181432", size = 795439, upload-time = "2025-10-21T15:57:28.497Z" },
+    { url = "https://files.pythonhosted.org/packages/67/46/c57266be9df8549c7d85deb4cb82280cb0019e46fff677534c5fa1badfa4/regex-2025.10.23-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:1cb976810ac1416a67562c2e5ba0accf6f928932320fef302e08100ed681b38e", size = 868336, upload-time = "2025-10-21T15:57:30.867Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/f3/bd5879e41ef8187fec5e678e94b526a93f99e7bbe0437b0f2b47f9101694/regex-2025.10.23-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:1a56a54be3897d62f54290190fbcd754bff6932934529fbf5b29933da28fcd43", size = 854567, upload-time = "2025-10-21T15:57:33.062Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/57/2b6bbdbd2f24dfed5b028033aa17ad8f7d86bb28f1a892cac8b3bc89d059/regex-2025.10.23-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:8f3e6d202fb52c2153f532043bbcf618fd177df47b0b306741eb9b60ba96edc3", size = 799565, upload-time = "2025-10-21T15:57:35.153Z" },
+    { url = "https://files.pythonhosted.org/packages/c7/ba/a6168f542ba73b151ed81237adf6b869c7b2f7f8d51618111296674e20ee/regex-2025.10.23-cp314-cp314t-win32.whl", hash = "sha256:1fa1186966b2621b1769fd467c7b22e317e6ba2d2cdcecc42ea3089ef04a8521", size = 274428, upload-time = "2025-10-21T15:57:37.996Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/a0/c84475e14a2829e9b0864ebf77c3f7da909df9d8acfe2bb540ff0072047c/regex-2025.10.23-cp314-cp314t-win_amd64.whl", hash = "sha256:08a15d40ce28362eac3e78e83d75475147869c1ff86bc93285f43b4f4431a741", size = 284140, upload-time = "2025-10-21T15:57:40.027Z" },
+    { url = "https://files.pythonhosted.org/packages/51/33/6a08ade0eee5b8ba79386869fa6f77afeb835b60510f3525db987e2fffc4/regex-2025.10.23-cp314-cp314t-win_arm64.whl", hash = "sha256:a93e97338e1c8ea2649e130dcfbe8cd69bba5e1e163834752ab64dcb4de6d5ed", size = 274497, upload-time = "2025-10-21T15:57:42.389Z" },
 ]
 
 [[package]]
@@ -4835,137 +4835,124 @@ wheels = [
 
 [[package]]
 name = "rpds-py"
-version = "0.27.1"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/e9/dd/2c0cbe774744272b0ae725f44032c77bdcab6e8bcf544bffa3b6e70c8dba/rpds_py-0.27.1.tar.gz", hash = "sha256:26a1c73171d10b7acccbded82bf6a586ab8203601e565badc74bbbf8bc5a10f8", size = 27479, upload-time = "2025-08-27T12:16:36.024Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/a5/ed/3aef893e2dd30e77e35d20d4ddb45ca459db59cead748cad9796ad479411/rpds_py-0.27.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:68afeec26d42ab3b47e541b272166a0b4400313946871cba3ed3a4fc0cab1cef", size = 371606, upload-time = "2025-08-27T12:12:25.189Z" },
-    { url = "https://files.pythonhosted.org/packages/6d/82/9818b443e5d3eb4c83c3994561387f116aae9833b35c484474769c4a8faf/rpds_py-0.27.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:74e5b2f7bb6fa38b1b10546d27acbacf2a022a8b5543efb06cfebc72a59c85be", size = 353452, upload-time = "2025-08-27T12:12:27.433Z" },
-    { url = "https://files.pythonhosted.org/packages/99/c7/d2a110ffaaa397fc6793a83c7bd3545d9ab22658b7cdff05a24a4535cc45/rpds_py-0.27.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9024de74731df54546fab0bfbcdb49fae19159ecaecfc8f37c18d2c7e2c0bd61", size = 381519, upload-time = "2025-08-27T12:12:28.719Z" },
-    { url = "https://files.pythonhosted.org/packages/5a/bc/e89581d1f9d1be7d0247eaef602566869fdc0d084008ba139e27e775366c/rpds_py-0.27.1-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:31d3ebadefcd73b73928ed0b2fd696f7fefda8629229f81929ac9c1854d0cffb", size = 394424, upload-time = "2025-08-27T12:12:30.207Z" },
-    { url = "https://files.pythonhosted.org/packages/ac/2e/36a6861f797530e74bb6ed53495f8741f1ef95939eed01d761e73d559067/rpds_py-0.27.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b2e7f8f169d775dd9092a1743768d771f1d1300453ddfe6325ae3ab5332b4657", size = 523467, upload-time = "2025-08-27T12:12:31.808Z" },
-    { url = "https://files.pythonhosted.org/packages/c4/59/c1bc2be32564fa499f988f0a5c6505c2f4746ef96e58e4d7de5cf923d77e/rpds_py-0.27.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3d905d16f77eb6ab2e324e09bfa277b4c8e5e6b8a78a3e7ff8f3cdf773b4c013", size = 402660, upload-time = "2025-08-27T12:12:33.444Z" },
-    { url = "https://files.pythonhosted.org/packages/0a/ec/ef8bf895f0628dd0a59e54d81caed6891663cb9c54a0f4bb7da918cb88cf/rpds_py-0.27.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:50c946f048209e6362e22576baea09193809f87687a95a8db24e5fbdb307b93a", size = 384062, upload-time = "2025-08-27T12:12:34.857Z" },
-    { url = "https://files.pythonhosted.org/packages/69/f7/f47ff154be8d9a5e691c083a920bba89cef88d5247c241c10b9898f595a1/rpds_py-0.27.1-cp310-cp310-manylinux_2_31_riscv64.whl", hash = "sha256:3deab27804d65cd8289eb814c2c0e807c4b9d9916c9225e363cb0cf875eb67c1", size = 401289, upload-time = "2025-08-27T12:12:36.085Z" },
-    { url = "https://files.pythonhosted.org/packages/3b/d9/ca410363efd0615814ae579f6829cafb39225cd63e5ea5ed1404cb345293/rpds_py-0.27.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:8b61097f7488de4be8244c89915da8ed212832ccf1e7c7753a25a394bf9b1f10", size = 417718, upload-time = "2025-08-27T12:12:37.401Z" },
-    { url = "https://files.pythonhosted.org/packages/e3/a0/8cb5c2ff38340f221cc067cc093d1270e10658ba4e8d263df923daa18e86/rpds_py-0.27.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:8a3f29aba6e2d7d90528d3c792555a93497fe6538aa65eb675b44505be747808", size = 558333, upload-time = "2025-08-27T12:12:38.672Z" },
-    { url = "https://files.pythonhosted.org/packages/6f/8c/1b0de79177c5d5103843774ce12b84caa7164dfc6cd66378768d37db11bf/rpds_py-0.27.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:dd6cd0485b7d347304067153a6dc1d73f7d4fd995a396ef32a24d24b8ac63ac8", size = 589127, upload-time = "2025-08-27T12:12:41.48Z" },
-    { url = "https://files.pythonhosted.org/packages/c8/5e/26abb098d5e01266b0f3a2488d299d19ccc26849735d9d2b95c39397e945/rpds_py-0.27.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:6f4461bf931108c9fa226ffb0e257c1b18dc2d44cd72b125bec50ee0ab1248a9", size = 554899, upload-time = "2025-08-27T12:12:42.925Z" },
-    { url = "https://files.pythonhosted.org/packages/de/41/905cc90ced13550db017f8f20c6d8e8470066c5738ba480d7ba63e3d136b/rpds_py-0.27.1-cp310-cp310-win32.whl", hash = "sha256:ee5422d7fb21f6a00c1901bf6559c49fee13a5159d0288320737bbf6585bd3e4", size = 217450, upload-time = "2025-08-27T12:12:44.813Z" },
-    { url = "https://files.pythonhosted.org/packages/75/3d/6bef47b0e253616ccdf67c283e25f2d16e18ccddd38f92af81d5a3420206/rpds_py-0.27.1-cp310-cp310-win_amd64.whl", hash = "sha256:3e039aabf6d5f83c745d5f9a0a381d031e9ed871967c0a5c38d201aca41f3ba1", size = 228447, upload-time = "2025-08-27T12:12:46.204Z" },
-    { url = "https://files.pythonhosted.org/packages/b5/c1/7907329fbef97cbd49db6f7303893bd1dd5a4a3eae415839ffdfb0762cae/rpds_py-0.27.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:be898f271f851f68b318872ce6ebebbc62f303b654e43bf72683dbdc25b7c881", size = 371063, upload-time = "2025-08-27T12:12:47.856Z" },
-    { url = "https://files.pythonhosted.org/packages/11/94/2aab4bc86228bcf7c48760990273653a4900de89c7537ffe1b0d6097ed39/rpds_py-0.27.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:62ac3d4e3e07b58ee0ddecd71d6ce3b1637de2d373501412df395a0ec5f9beb5", size = 353210, upload-time = "2025-08-27T12:12:49.187Z" },
-    { url = "https://files.pythonhosted.org/packages/3a/57/f5eb3ecf434342f4f1a46009530e93fd201a0b5b83379034ebdb1d7c1a58/rpds_py-0.27.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4708c5c0ceb2d034f9991623631d3d23cb16e65c83736ea020cdbe28d57c0a0e", size = 381636, upload-time = "2025-08-27T12:12:50.492Z" },
-    { url = "https://files.pythonhosted.org/packages/ae/f4/ef95c5945e2ceb5119571b184dd5a1cc4b8541bbdf67461998cfeac9cb1e/rpds_py-0.27.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:abfa1171a9952d2e0002aba2ad3780820b00cc3d9c98c6630f2e93271501f66c", size = 394341, upload-time = "2025-08-27T12:12:52.024Z" },
-    { url = "https://files.pythonhosted.org/packages/5a/7e/4bd610754bf492d398b61725eb9598ddd5eb86b07d7d9483dbcd810e20bc/rpds_py-0.27.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4b507d19f817ebaca79574b16eb2ae412e5c0835542c93fe9983f1e432aca195", size = 523428, upload-time = "2025-08-27T12:12:53.779Z" },
-    { url = "https://files.pythonhosted.org/packages/9f/e5/059b9f65a8c9149361a8b75094864ab83b94718344db511fd6117936ed2a/rpds_py-0.27.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:168b025f8fd8d8d10957405f3fdcef3dc20f5982d398f90851f4abc58c566c52", size = 402923, upload-time = "2025-08-27T12:12:55.15Z" },
-    { url = "https://files.pythonhosted.org/packages/f5/48/64cabb7daced2968dd08e8a1b7988bf358d7bd5bcd5dc89a652f4668543c/rpds_py-0.27.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cb56c6210ef77caa58e16e8c17d35c63fe3f5b60fd9ba9d424470c3400bcf9ed", size = 384094, upload-time = "2025-08-27T12:12:57.194Z" },
-    { url = "https://files.pythonhosted.org/packages/ae/e1/dc9094d6ff566bff87add8a510c89b9e158ad2ecd97ee26e677da29a9e1b/rpds_py-0.27.1-cp311-cp311-manylinux_2_31_riscv64.whl", hash = "sha256:d252f2d8ca0195faa707f8eb9368955760880b2b42a8ee16d382bf5dd807f89a", size = 401093, upload-time = "2025-08-27T12:12:58.985Z" },
-    { url = "https://files.pythonhosted.org/packages/37/8e/ac8577e3ecdd5593e283d46907d7011618994e1d7ab992711ae0f78b9937/rpds_py-0.27.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6e5e54da1e74b91dbc7996b56640f79b195d5925c2b78efaa8c5d53e1d88edde", size = 417969, upload-time = "2025-08-27T12:13:00.367Z" },
-    { url = "https://files.pythonhosted.org/packages/66/6d/87507430a8f74a93556fe55c6485ba9c259949a853ce407b1e23fea5ba31/rpds_py-0.27.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:ffce0481cc6e95e5b3f0a47ee17ffbd234399e6d532f394c8dce320c3b089c21", size = 558302, upload-time = "2025-08-27T12:13:01.737Z" },
-    { url = "https://files.pythonhosted.org/packages/3a/bb/1db4781ce1dda3eecc735e3152659a27b90a02ca62bfeea17aee45cc0fbc/rpds_py-0.27.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:a205fdfe55c90c2cd8e540ca9ceba65cbe6629b443bc05db1f590a3db8189ff9", size = 589259, upload-time = "2025-08-27T12:13:03.127Z" },
-    { url = "https://files.pythonhosted.org/packages/7b/0e/ae1c8943d11a814d01b482e1f8da903f88047a962dff9bbdadf3bd6e6fd1/rpds_py-0.27.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:689fb5200a749db0415b092972e8eba85847c23885c8543a8b0f5c009b1a5948", size = 554983, upload-time = "2025-08-27T12:13:04.516Z" },
-    { url = "https://files.pythonhosted.org/packages/b2/d5/0b2a55415931db4f112bdab072443ff76131b5ac4f4dc98d10d2d357eb03/rpds_py-0.27.1-cp311-cp311-win32.whl", hash = "sha256:3182af66048c00a075010bc7f4860f33913528a4b6fc09094a6e7598e462fe39", size = 217154, upload-time = "2025-08-27T12:13:06.278Z" },
-    { url = "https://files.pythonhosted.org/packages/24/75/3b7ffe0d50dc86a6a964af0d1cc3a4a2cdf437cb7b099a4747bbb96d1819/rpds_py-0.27.1-cp311-cp311-win_amd64.whl", hash = "sha256:b4938466c6b257b2f5c4ff98acd8128ec36b5059e5c8f8372d79316b1c36bb15", size = 228627, upload-time = "2025-08-27T12:13:07.625Z" },
-    { url = "https://files.pythonhosted.org/packages/8d/3f/4fd04c32abc02c710f09a72a30c9a55ea3cc154ef8099078fd50a0596f8e/rpds_py-0.27.1-cp311-cp311-win_arm64.whl", hash = "sha256:2f57af9b4d0793e53266ee4325535a31ba48e2f875da81a9177c9926dfa60746", size = 220998, upload-time = "2025-08-27T12:13:08.972Z" },
-    { url = "https://files.pythonhosted.org/packages/bd/fe/38de28dee5df58b8198c743fe2bea0c785c6d40941b9950bac4cdb71a014/rpds_py-0.27.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:ae2775c1973e3c30316892737b91f9283f9908e3cc7625b9331271eaaed7dc90", size = 361887, upload-time = "2025-08-27T12:13:10.233Z" },
-    { url = "https://files.pythonhosted.org/packages/7c/9a/4b6c7eedc7dd90986bf0fab6ea2a091ec11c01b15f8ba0a14d3f80450468/rpds_py-0.27.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2643400120f55c8a96f7c9d858f7be0c88d383cd4653ae2cf0d0c88f668073e5", size = 345795, upload-time = "2025-08-27T12:13:11.65Z" },
-    { url = "https://files.pythonhosted.org/packages/6f/0e/e650e1b81922847a09cca820237b0edee69416a01268b7754d506ade11ad/rpds_py-0.27.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:16323f674c089b0360674a4abd28d5042947d54ba620f72514d69be4ff64845e", size = 385121, upload-time = "2025-08-27T12:13:13.008Z" },
-    { url = "https://files.pythonhosted.org/packages/1b/ea/b306067a712988e2bff00dcc7c8f31d26c29b6d5931b461aa4b60a013e33/rpds_py-0.27.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9a1f4814b65eacac94a00fc9a526e3fdafd78e439469644032032d0d63de4881", size = 398976, upload-time = "2025-08-27T12:13:14.368Z" },
-    { url = "https://files.pythonhosted.org/packages/2c/0a/26dc43c8840cb8fe239fe12dbc8d8de40f2365e838f3d395835dde72f0e5/rpds_py-0.27.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7ba32c16b064267b22f1850a34051121d423b6f7338a12b9459550eb2096e7ec", size = 525953, upload-time = "2025-08-27T12:13:15.774Z" },
-    { url = "https://files.pythonhosted.org/packages/22/14/c85e8127b573aaf3a0cbd7fbb8c9c99e735a4a02180c84da2a463b766e9e/rpds_py-0.27.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e5c20f33fd10485b80f65e800bbe5f6785af510b9f4056c5a3c612ebc83ba6cb", size = 407915, upload-time = "2025-08-27T12:13:17.379Z" },
-    { url = "https://files.pythonhosted.org/packages/ed/7b/8f4fee9ba1fb5ec856eb22d725a4efa3deb47f769597c809e03578b0f9d9/rpds_py-0.27.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:466bfe65bd932da36ff279ddd92de56b042f2266d752719beb97b08526268ec5", size = 386883, upload-time = "2025-08-27T12:13:18.704Z" },
-    { url = "https://files.pythonhosted.org/packages/86/47/28fa6d60f8b74fcdceba81b272f8d9836ac0340570f68f5df6b41838547b/rpds_py-0.27.1-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:41e532bbdcb57c92ba3be62c42e9f096431b4cf478da9bc3bc6ce5c38ab7ba7a", size = 405699, upload-time = "2025-08-27T12:13:20.089Z" },
-    { url = "https://files.pythonhosted.org/packages/d0/fd/c5987b5e054548df56953a21fe2ebed51fc1ec7c8f24fd41c067b68c4a0a/rpds_py-0.27.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f149826d742b406579466283769a8ea448eed82a789af0ed17b0cd5770433444", size = 423713, upload-time = "2025-08-27T12:13:21.436Z" },
-    { url = "https://files.pythonhosted.org/packages/ac/ba/3c4978b54a73ed19a7d74531be37a8bcc542d917c770e14d372b8daea186/rpds_py-0.27.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:80c60cfb5310677bd67cb1e85a1e8eb52e12529545441b43e6f14d90b878775a", size = 562324, upload-time = "2025-08-27T12:13:22.789Z" },
-    { url = "https://files.pythonhosted.org/packages/b5/6c/6943a91768fec16db09a42b08644b960cff540c66aab89b74be6d4a144ba/rpds_py-0.27.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:7ee6521b9baf06085f62ba9c7a3e5becffbc32480d2f1b351559c001c38ce4c1", size = 593646, upload-time = "2025-08-27T12:13:24.122Z" },
-    { url = "https://files.pythonhosted.org/packages/11/73/9d7a8f4be5f4396f011a6bb7a19fe26303a0dac9064462f5651ced2f572f/rpds_py-0.27.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a512c8263249a9d68cac08b05dd59d2b3f2061d99b322813cbcc14c3c7421998", size = 558137, upload-time = "2025-08-27T12:13:25.557Z" },
-    { url = "https://files.pythonhosted.org/packages/6e/96/6772cbfa0e2485bcceef8071de7821f81aeac8bb45fbfd5542a3e8108165/rpds_py-0.27.1-cp312-cp312-win32.whl", hash = "sha256:819064fa048ba01b6dadc5116f3ac48610435ac9a0058bbde98e569f9e785c39", size = 221343, upload-time = "2025-08-27T12:13:26.967Z" },
-    { url = "https://files.pythonhosted.org/packages/67/b6/c82f0faa9af1c6a64669f73a17ee0eeef25aff30bb9a1c318509efe45d84/rpds_py-0.27.1-cp312-cp312-win_amd64.whl", hash = "sha256:d9199717881f13c32c4046a15f024971a3b78ad4ea029e8da6b86e5aa9cf4594", size = 232497, upload-time = "2025-08-27T12:13:28.326Z" },
-    { url = "https://files.pythonhosted.org/packages/e1/96/2817b44bd2ed11aebacc9251da03689d56109b9aba5e311297b6902136e2/rpds_py-0.27.1-cp312-cp312-win_arm64.whl", hash = "sha256:33aa65b97826a0e885ef6e278fbd934e98cdcfed80b63946025f01e2f5b29502", size = 222790, upload-time = "2025-08-27T12:13:29.71Z" },
-    { url = "https://files.pythonhosted.org/packages/cc/77/610aeee8d41e39080c7e14afa5387138e3c9fa9756ab893d09d99e7d8e98/rpds_py-0.27.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:e4b9fcfbc021633863a37e92571d6f91851fa656f0180246e84cbd8b3f6b329b", size = 361741, upload-time = "2025-08-27T12:13:31.039Z" },
-    { url = "https://files.pythonhosted.org/packages/3a/fc/c43765f201c6a1c60be2043cbdb664013def52460a4c7adace89d6682bf4/rpds_py-0.27.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1441811a96eadca93c517d08df75de45e5ffe68aa3089924f963c782c4b898cf", size = 345574, upload-time = "2025-08-27T12:13:32.902Z" },
-    { url = "https://files.pythonhosted.org/packages/20/42/ee2b2ca114294cd9847d0ef9c26d2b0851b2e7e00bf14cc4c0b581df0fc3/rpds_py-0.27.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:55266dafa22e672f5a4f65019015f90336ed31c6383bd53f5e7826d21a0e0b83", size = 385051, upload-time = "2025-08-27T12:13:34.228Z" },
-    { url = "https://files.pythonhosted.org/packages/fd/e8/1e430fe311e4799e02e2d1af7c765f024e95e17d651612425b226705f910/rpds_py-0.27.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d78827d7ac08627ea2c8e02c9e5b41180ea5ea1f747e9db0915e3adf36b62dcf", size = 398395, upload-time = "2025-08-27T12:13:36.132Z" },
-    { url = "https://files.pythonhosted.org/packages/82/95/9dc227d441ff2670651c27a739acb2535ccaf8b351a88d78c088965e5996/rpds_py-0.27.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ae92443798a40a92dc5f0b01d8a7c93adde0c4dc965310a29ae7c64d72b9fad2", size = 524334, upload-time = "2025-08-27T12:13:37.562Z" },
-    { url = "https://files.pythonhosted.org/packages/87/01/a670c232f401d9ad461d9a332aa4080cd3cb1d1df18213dbd0d2a6a7ab51/rpds_py-0.27.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c46c9dd2403b66a2a3b9720ec4b74d4ab49d4fabf9f03dfdce2d42af913fe8d0", size = 407691, upload-time = "2025-08-27T12:13:38.94Z" },
-    { url = "https://files.pythonhosted.org/packages/03/36/0a14aebbaa26fe7fab4780c76f2239e76cc95a0090bdb25e31d95c492fcd/rpds_py-0.27.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2efe4eb1d01b7f5f1939f4ef30ecea6c6b3521eec451fb93191bf84b2a522418", size = 386868, upload-time = "2025-08-27T12:13:40.192Z" },
-    { url = "https://files.pythonhosted.org/packages/3b/03/8c897fb8b5347ff6c1cc31239b9611c5bf79d78c984430887a353e1409a1/rpds_py-0.27.1-cp313-cp313-manylinux_2_31_riscv64.whl", hash = "sha256:15d3b4d83582d10c601f481eca29c3f138d44c92187d197aff663a269197c02d", size = 405469, upload-time = "2025-08-27T12:13:41.496Z" },
-    { url = "https://files.pythonhosted.org/packages/da/07/88c60edc2df74850d496d78a1fdcdc7b54360a7f610a4d50008309d41b94/rpds_py-0.27.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:4ed2e16abbc982a169d30d1a420274a709949e2cbdef119fe2ec9d870b42f274", size = 422125, upload-time = "2025-08-27T12:13:42.802Z" },
-    { url = "https://files.pythonhosted.org/packages/6b/86/5f4c707603e41b05f191a749984f390dabcbc467cf833769b47bf14ba04f/rpds_py-0.27.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a75f305c9b013289121ec0f1181931975df78738cdf650093e6b86d74aa7d8dd", size = 562341, upload-time = "2025-08-27T12:13:44.472Z" },
-    { url = "https://files.pythonhosted.org/packages/b2/92/3c0cb2492094e3cd9baf9e49bbb7befeceb584ea0c1a8b5939dca4da12e5/rpds_py-0.27.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:67ce7620704745881a3d4b0ada80ab4d99df390838839921f99e63c474f82cf2", size = 592511, upload-time = "2025-08-27T12:13:45.898Z" },
-    { url = "https://files.pythonhosted.org/packages/10/bb/82e64fbb0047c46a168faa28d0d45a7851cd0582f850b966811d30f67ad8/rpds_py-0.27.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9d992ac10eb86d9b6f369647b6a3f412fc0075cfd5d799530e84d335e440a002", size = 557736, upload-time = "2025-08-27T12:13:47.408Z" },
-    { url = "https://files.pythonhosted.org/packages/00/95/3c863973d409210da7fb41958172c6b7dbe7fc34e04d3cc1f10bb85e979f/rpds_py-0.27.1-cp313-cp313-win32.whl", hash = "sha256:4f75e4bd8ab8db624e02c8e2fc4063021b58becdbe6df793a8111d9343aec1e3", size = 221462, upload-time = "2025-08-27T12:13:48.742Z" },
-    { url = "https://files.pythonhosted.org/packages/ce/2c/5867b14a81dc217b56d95a9f2a40fdbc56a1ab0181b80132beeecbd4b2d6/rpds_py-0.27.1-cp313-cp313-win_amd64.whl", hash = "sha256:f9025faafc62ed0b75a53e541895ca272815bec18abe2249ff6501c8f2e12b83", size = 232034, upload-time = "2025-08-27T12:13:50.11Z" },
-    { url = "https://files.pythonhosted.org/packages/c7/78/3958f3f018c01923823f1e47f1cc338e398814b92d83cd278364446fac66/rpds_py-0.27.1-cp313-cp313-win_arm64.whl", hash = "sha256:ed10dc32829e7d222b7d3b93136d25a406ba9788f6a7ebf6809092da1f4d279d", size = 222392, upload-time = "2025-08-27T12:13:52.587Z" },
-    { url = "https://files.pythonhosted.org/packages/01/76/1cdf1f91aed5c3a7bf2eba1f1c4e4d6f57832d73003919a20118870ea659/rpds_py-0.27.1-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:92022bbbad0d4426e616815b16bc4127f83c9a74940e1ccf3cfe0b387aba0228", size = 358355, upload-time = "2025-08-27T12:13:54.012Z" },
-    { url = "https://files.pythonhosted.org/packages/c3/6f/bf142541229374287604caf3bb2a4ae17f0a580798fd72d3b009b532db4e/rpds_py-0.27.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:47162fdab9407ec3f160805ac3e154df042e577dd53341745fc7fb3f625e6d92", size = 342138, upload-time = "2025-08-27T12:13:55.791Z" },
-    { url = "https://files.pythonhosted.org/packages/1a/77/355b1c041d6be40886c44ff5e798b4e2769e497b790f0f7fd1e78d17e9a8/rpds_py-0.27.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb89bec23fddc489e5d78b550a7b773557c9ab58b7946154a10a6f7a214a48b2", size = 380247, upload-time = "2025-08-27T12:13:57.683Z" },
-    { url = "https://files.pythonhosted.org/packages/d6/a4/d9cef5c3946ea271ce2243c51481971cd6e34f21925af2783dd17b26e815/rpds_py-0.27.1-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e48af21883ded2b3e9eb48cb7880ad8598b31ab752ff3be6457001d78f416723", size = 390699, upload-time = "2025-08-27T12:13:59.137Z" },
-    { url = "https://files.pythonhosted.org/packages/3a/06/005106a7b8c6c1a7e91b73169e49870f4af5256119d34a361ae5240a0c1d/rpds_py-0.27.1-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6f5b7bd8e219ed50299e58551a410b64daafb5017d54bbe822e003856f06a802", size = 521852, upload-time = "2025-08-27T12:14:00.583Z" },
-    { url = "https://files.pythonhosted.org/packages/e5/3e/50fb1dac0948e17a02eb05c24510a8fe12d5ce8561c6b7b7d1339ab7ab9c/rpds_py-0.27.1-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:08f1e20bccf73b08d12d804d6e1c22ca5530e71659e6673bce31a6bb71c1e73f", size = 402582, upload-time = "2025-08-27T12:14:02.034Z" },
-    { url = "https://files.pythonhosted.org/packages/cb/b0/f4e224090dc5b0ec15f31a02d746ab24101dd430847c4d99123798661bfc/rpds_py-0.27.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0dc5dceeaefcc96dc192e3a80bbe1d6c410c469e97bdd47494a7d930987f18b2", size = 384126, upload-time = "2025-08-27T12:14:03.437Z" },
-    { url = "https://files.pythonhosted.org/packages/54/77/ac339d5f82b6afff1df8f0fe0d2145cc827992cb5f8eeb90fc9f31ef7a63/rpds_py-0.27.1-cp313-cp313t-manylinux_2_31_riscv64.whl", hash = "sha256:d76f9cc8665acdc0c9177043746775aa7babbf479b5520b78ae4002d889f5c21", size = 399486, upload-time = "2025-08-27T12:14:05.443Z" },
-    { url = "https://files.pythonhosted.org/packages/d6/29/3e1c255eee6ac358c056a57d6d6869baa00a62fa32eea5ee0632039c50a3/rpds_py-0.27.1-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:134fae0e36022edad8290a6661edf40c023562964efea0cc0ec7f5d392d2aaef", size = 414832, upload-time = "2025-08-27T12:14:06.902Z" },
-    { url = "https://files.pythonhosted.org/packages/3f/db/6d498b844342deb3fa1d030598db93937a9964fcf5cb4da4feb5f17be34b/rpds_py-0.27.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:eb11a4f1b2b63337cfd3b4d110af778a59aae51c81d195768e353d8b52f88081", size = 557249, upload-time = "2025-08-27T12:14:08.37Z" },
-    { url = "https://files.pythonhosted.org/packages/60/f3/690dd38e2310b6f68858a331399b4d6dbb9132c3e8ef8b4333b96caf403d/rpds_py-0.27.1-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:13e608ac9f50a0ed4faec0e90ece76ae33b34c0e8656e3dceb9a7db994c692cd", size = 587356, upload-time = "2025-08-27T12:14:10.034Z" },
-    { url = "https://files.pythonhosted.org/packages/86/e3/84507781cccd0145f35b1dc32c72675200c5ce8d5b30f813e49424ef68fc/rpds_py-0.27.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:dd2135527aa40f061350c3f8f89da2644de26cd73e4de458e79606384f4f68e7", size = 555300, upload-time = "2025-08-27T12:14:11.783Z" },
-    { url = "https://files.pythonhosted.org/packages/e5/ee/375469849e6b429b3516206b4580a79e9ef3eb12920ddbd4492b56eaacbe/rpds_py-0.27.1-cp313-cp313t-win32.whl", hash = "sha256:3020724ade63fe320a972e2ffd93b5623227e684315adce194941167fee02688", size = 216714, upload-time = "2025-08-27T12:14:13.629Z" },
-    { url = "https://files.pythonhosted.org/packages/21/87/3fc94e47c9bd0742660e84706c311a860dcae4374cf4a03c477e23ce605a/rpds_py-0.27.1-cp313-cp313t-win_amd64.whl", hash = "sha256:8ee50c3e41739886606388ba3ab3ee2aae9f35fb23f833091833255a31740797", size = 228943, upload-time = "2025-08-27T12:14:14.937Z" },
-    { url = "https://files.pythonhosted.org/packages/70/36/b6e6066520a07cf029d385de869729a895917b411e777ab1cde878100a1d/rpds_py-0.27.1-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:acb9aafccaae278f449d9c713b64a9e68662e7799dbd5859e2c6b3c67b56d334", size = 362472, upload-time = "2025-08-27T12:14:16.333Z" },
-    { url = "https://files.pythonhosted.org/packages/af/07/b4646032e0dcec0df9c73a3bd52f63bc6c5f9cda992f06bd0e73fe3fbebd/rpds_py-0.27.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:b7fb801aa7f845ddf601c49630deeeccde7ce10065561d92729bfe81bd21fb33", size = 345676, upload-time = "2025-08-27T12:14:17.764Z" },
-    { url = "https://files.pythonhosted.org/packages/b0/16/2f1003ee5d0af4bcb13c0cf894957984c32a6751ed7206db2aee7379a55e/rpds_py-0.27.1-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fe0dd05afb46597b9a2e11c351e5e4283c741237e7f617ffb3252780cca9336a", size = 385313, upload-time = "2025-08-27T12:14:19.829Z" },
-    { url = "https://files.pythonhosted.org/packages/05/cd/7eb6dd7b232e7f2654d03fa07f1414d7dfc980e82ba71e40a7c46fd95484/rpds_py-0.27.1-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b6dfb0e058adb12d8b1d1b25f686e94ffa65d9995a5157afe99743bf7369d62b", size = 399080, upload-time = "2025-08-27T12:14:21.531Z" },
-    { url = "https://files.pythonhosted.org/packages/20/51/5829afd5000ec1cb60f304711f02572d619040aa3ec033d8226817d1e571/rpds_py-0.27.1-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ed090ccd235f6fa8bb5861684567f0a83e04f52dfc2e5c05f2e4b1309fcf85e7", size = 523868, upload-time = "2025-08-27T12:14:23.485Z" },
-    { url = "https://files.pythonhosted.org/packages/05/2c/30eebca20d5db95720ab4d2faec1b5e4c1025c473f703738c371241476a2/rpds_py-0.27.1-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bf876e79763eecf3e7356f157540d6a093cef395b65514f17a356f62af6cc136", size = 408750, upload-time = "2025-08-27T12:14:24.924Z" },
-    { url = "https://files.pythonhosted.org/packages/90/1a/cdb5083f043597c4d4276eae4e4c70c55ab5accec078da8611f24575a367/rpds_py-0.27.1-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:12ed005216a51b1d6e2b02a7bd31885fe317e45897de81d86dcce7d74618ffff", size = 387688, upload-time = "2025-08-27T12:14:27.537Z" },
-    { url = "https://files.pythonhosted.org/packages/7c/92/cf786a15320e173f945d205ab31585cc43969743bb1a48b6888f7a2b0a2d/rpds_py-0.27.1-cp314-cp314-manylinux_2_31_riscv64.whl", hash = "sha256:ee4308f409a40e50593c7e3bb8cbe0b4d4c66d1674a316324f0c2f5383b486f9", size = 407225, upload-time = "2025-08-27T12:14:28.981Z" },
-    { url = "https://files.pythonhosted.org/packages/33/5c/85ee16df5b65063ef26017bef33096557a4c83fbe56218ac7cd8c235f16d/rpds_py-0.27.1-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:0b08d152555acf1f455154d498ca855618c1378ec810646fcd7c76416ac6dc60", size = 423361, upload-time = "2025-08-27T12:14:30.469Z" },
-    { url = "https://files.pythonhosted.org/packages/4b/8e/1c2741307fcabd1a334ecf008e92c4f47bb6f848712cf15c923becfe82bb/rpds_py-0.27.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:dce51c828941973a5684d458214d3a36fcd28da3e1875d659388f4f9f12cc33e", size = 562493, upload-time = "2025-08-27T12:14:31.987Z" },
-    { url = "https://files.pythonhosted.org/packages/04/03/5159321baae9b2222442a70c1f988cbbd66b9be0675dd3936461269be360/rpds_py-0.27.1-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:c1476d6f29eb81aa4151c9a31219b03f1f798dc43d8af1250a870735516a1212", size = 592623, upload-time = "2025-08-27T12:14:33.543Z" },
-    { url = "https://files.pythonhosted.org/packages/ff/39/c09fd1ad28b85bc1d4554a8710233c9f4cefd03d7717a1b8fbfd171d1167/rpds_py-0.27.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:3ce0cac322b0d69b63c9cdb895ee1b65805ec9ffad37639f291dd79467bee675", size = 558800, upload-time = "2025-08-27T12:14:35.436Z" },
-    { url = "https://files.pythonhosted.org/packages/c5/d6/99228e6bbcf4baa764b18258f519a9035131d91b538d4e0e294313462a98/rpds_py-0.27.1-cp314-cp314-win32.whl", hash = "sha256:dfbfac137d2a3d0725758cd141f878bf4329ba25e34979797c89474a89a8a3a3", size = 221943, upload-time = "2025-08-27T12:14:36.898Z" },
-    { url = "https://files.pythonhosted.org/packages/be/07/c802bc6b8e95be83b79bdf23d1aa61d68324cb1006e245d6c58e959e314d/rpds_py-0.27.1-cp314-cp314-win_amd64.whl", hash = "sha256:a6e57b0abfe7cc513450fcf529eb486b6e4d3f8aee83e92eb5f1ef848218d456", size = 233739, upload-time = "2025-08-27T12:14:38.386Z" },
-    { url = "https://files.pythonhosted.org/packages/c8/89/3e1b1c16d4c2d547c5717377a8df99aee8099ff050f87c45cb4d5fa70891/rpds_py-0.27.1-cp314-cp314-win_arm64.whl", hash = "sha256:faf8d146f3d476abfee026c4ae3bdd9ca14236ae4e4c310cbd1cf75ba33d24a3", size = 223120, upload-time = "2025-08-27T12:14:39.82Z" },
-    { url = "https://files.pythonhosted.org/packages/62/7e/dc7931dc2fa4a6e46b2a4fa744a9fe5c548efd70e0ba74f40b39fa4a8c10/rpds_py-0.27.1-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:ba81d2b56b6d4911ce735aad0a1d4495e808b8ee4dc58715998741a26874e7c2", size = 358944, upload-time = "2025-08-27T12:14:41.199Z" },
-    { url = "https://files.pythonhosted.org/packages/e6/22/4af76ac4e9f336bfb1a5f240d18a33c6b2fcaadb7472ac7680576512b49a/rpds_py-0.27.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:84f7d509870098de0e864cad0102711c1e24e9b1a50ee713b65928adb22269e4", size = 342283, upload-time = "2025-08-27T12:14:42.699Z" },
-    { url = "https://files.pythonhosted.org/packages/1c/15/2a7c619b3c2272ea9feb9ade67a45c40b3eeb500d503ad4c28c395dc51b4/rpds_py-0.27.1-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a9e960fc78fecd1100539f14132425e1d5fe44ecb9239f8f27f079962021523e", size = 380320, upload-time = "2025-08-27T12:14:44.157Z" },
-    { url = "https://files.pythonhosted.org/packages/a2/7d/4c6d243ba4a3057e994bb5bedd01b5c963c12fe38dde707a52acdb3849e7/rpds_py-0.27.1-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:62f85b665cedab1a503747617393573995dac4600ff51869d69ad2f39eb5e817", size = 391760, upload-time = "2025-08-27T12:14:45.845Z" },
-    { url = "https://files.pythonhosted.org/packages/b4/71/b19401a909b83bcd67f90221330bc1ef11bc486fe4e04c24388d28a618ae/rpds_py-0.27.1-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fed467af29776f6556250c9ed85ea5a4dd121ab56a5f8b206e3e7a4c551e48ec", size = 522476, upload-time = "2025-08-27T12:14:47.364Z" },
-    { url = "https://files.pythonhosted.org/packages/e4/44/1a3b9715c0455d2e2f0f6df5ee6d6f5afdc423d0773a8a682ed2b43c566c/rpds_py-0.27.1-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f2729615f9d430af0ae6b36cf042cb55c0936408d543fb691e1a9e36648fd35a", size = 403418, upload-time = "2025-08-27T12:14:49.991Z" },
-    { url = "https://files.pythonhosted.org/packages/1c/4b/fb6c4f14984eb56673bc868a66536f53417ddb13ed44b391998100a06a96/rpds_py-0.27.1-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1b207d881a9aef7ba753d69c123a35d96ca7cb808056998f6b9e8747321f03b8", size = 384771, upload-time = "2025-08-27T12:14:52.159Z" },
-    { url = "https://files.pythonhosted.org/packages/c0/56/d5265d2d28b7420d7b4d4d85cad8ef891760f5135102e60d5c970b976e41/rpds_py-0.27.1-cp314-cp314t-manylinux_2_31_riscv64.whl", hash = "sha256:639fd5efec029f99b79ae47e5d7e00ad8a773da899b6309f6786ecaf22948c48", size = 400022, upload-time = "2025-08-27T12:14:53.859Z" },
-    { url = "https://files.pythonhosted.org/packages/8f/e9/9f5fc70164a569bdd6ed9046486c3568d6926e3a49bdefeeccfb18655875/rpds_py-0.27.1-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:fecc80cb2a90e28af8a9b366edacf33d7a91cbfe4c2c4544ea1246e949cfebeb", size = 416787, upload-time = "2025-08-27T12:14:55.673Z" },
-    { url = "https://files.pythonhosted.org/packages/d4/64/56dd03430ba491db943a81dcdef115a985aac5f44f565cd39a00c766d45c/rpds_py-0.27.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:42a89282d711711d0a62d6f57d81aa43a1368686c45bc1c46b7f079d55692734", size = 557538, upload-time = "2025-08-27T12:14:57.245Z" },
-    { url = "https://files.pythonhosted.org/packages/3f/36/92cc885a3129993b1d963a2a42ecf64e6a8e129d2c7cc980dbeba84e55fb/rpds_py-0.27.1-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:cf9931f14223de59551ab9d38ed18d92f14f055a5f78c1d8ad6493f735021bbb", size = 588512, upload-time = "2025-08-27T12:14:58.728Z" },
-    { url = "https://files.pythonhosted.org/packages/dd/10/6b283707780a81919f71625351182b4f98932ac89a09023cb61865136244/rpds_py-0.27.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:f39f58a27cc6e59f432b568ed8429c7e1641324fbe38131de852cd77b2d534b0", size = 555813, upload-time = "2025-08-27T12:15:00.334Z" },
-    { url = "https://files.pythonhosted.org/packages/04/2e/30b5ea18c01379da6272a92825dd7e53dc9d15c88a19e97932d35d430ef7/rpds_py-0.27.1-cp314-cp314t-win32.whl", hash = "sha256:d5fa0ee122dc09e23607a28e6d7b150da16c662e66409bbe85230e4c85bb528a", size = 217385, upload-time = "2025-08-27T12:15:01.937Z" },
-    { url = "https://files.pythonhosted.org/packages/32/7d/97119da51cb1dd3f2f3c0805f155a3aa4a95fa44fe7d78ae15e69edf4f34/rpds_py-0.27.1-cp314-cp314t-win_amd64.whl", hash = "sha256:6567d2bb951e21232c2f660c24cf3470bb96de56cdcb3f071a83feeaff8a2772", size = 230097, upload-time = "2025-08-27T12:15:03.961Z" },
-    { url = "https://files.pythonhosted.org/packages/d5/63/b7cc415c345625d5e62f694ea356c58fb964861409008118f1245f8c3347/rpds_py-0.27.1-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:7ba22cb9693df986033b91ae1d7a979bc399237d45fccf875b76f62bb9e52ddf", size = 371360, upload-time = "2025-08-27T12:15:29.218Z" },
-    { url = "https://files.pythonhosted.org/packages/e5/8c/12e1b24b560cf378b8ffbdb9dc73abd529e1adcfcf82727dfd29c4a7b88d/rpds_py-0.27.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:5b640501be9288c77738b5492b3fd3abc4ba95c50c2e41273c8a1459f08298d3", size = 353933, upload-time = "2025-08-27T12:15:30.837Z" },
-    { url = "https://files.pythonhosted.org/packages/9b/85/1bb2210c1f7a1b99e91fea486b9f0f894aa5da3a5ec7097cbad7dec6d40f/rpds_py-0.27.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb08b65b93e0c6dd70aac7f7890a9c0938d5ec71d5cb32d45cf844fb8ae47636", size = 382962, upload-time = "2025-08-27T12:15:32.348Z" },
-    { url = "https://files.pythonhosted.org/packages/cc/c9/a839b9f219cf80ed65f27a7f5ddbb2809c1b85c966020ae2dff490e0b18e/rpds_py-0.27.1-pp310-pypy310_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d7ff07d696a7a38152ebdb8212ca9e5baab56656749f3d6004b34ab726b550b8", size = 394412, upload-time = "2025-08-27T12:15:33.839Z" },
-    { url = "https://files.pythonhosted.org/packages/02/2d/b1d7f928b0b1f4fc2e0133e8051d199b01d7384875adc63b6ddadf3de7e5/rpds_py-0.27.1-pp310-pypy310_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fb7c72262deae25366e3b6c0c0ba46007967aea15d1eea746e44ddba8ec58dcc", size = 523972, upload-time = "2025-08-27T12:15:35.377Z" },
-    { url = "https://files.pythonhosted.org/packages/a9/af/2cbf56edd2d07716df1aec8a726b3159deb47cb5c27e1e42b71d705a7c2f/rpds_py-0.27.1-pp310-pypy310_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7b002cab05d6339716b03a4a3a2ce26737f6231d7b523f339fa061d53368c9d8", size = 403273, upload-time = "2025-08-27T12:15:37.051Z" },
-    { url = "https://files.pythonhosted.org/packages/c0/93/425e32200158d44ff01da5d9612c3b6711fe69f606f06e3895511f17473b/rpds_py-0.27.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:23f6b69d1c26c4704fec01311963a41d7de3ee0570a84ebde4d544e5a1859ffc", size = 385278, upload-time = "2025-08-27T12:15:38.571Z" },
-    { url = "https://files.pythonhosted.org/packages/eb/1a/1a04a915ecd0551bfa9e77b7672d1937b4b72a0fc204a17deef76001cfb2/rpds_py-0.27.1-pp310-pypy310_pp73-manylinux_2_31_riscv64.whl", hash = "sha256:530064db9146b247351f2a0250b8f00b289accea4596a033e94be2389977de71", size = 402084, upload-time = "2025-08-27T12:15:40.529Z" },
-    { url = "https://files.pythonhosted.org/packages/51/f7/66585c0fe5714368b62951d2513b684e5215beaceab2c6629549ddb15036/rpds_py-0.27.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:7b90b0496570bd6b0321724a330d8b545827c4df2034b6ddfc5f5275f55da2ad", size = 419041, upload-time = "2025-08-27T12:15:42.191Z" },
-    { url = "https://files.pythonhosted.org/packages/8e/7e/83a508f6b8e219bba2d4af077c35ba0e0cdd35a751a3be6a7cba5a55ad71/rpds_py-0.27.1-pp310-pypy310_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:879b0e14a2da6a1102a3fc8af580fc1ead37e6d6692a781bd8c83da37429b5ab", size = 560084, upload-time = "2025-08-27T12:15:43.839Z" },
-    { url = "https://files.pythonhosted.org/packages/66/66/bb945683b958a1b19eb0fe715594630d0f36396ebdef4d9b89c2fa09aa56/rpds_py-0.27.1-pp310-pypy310_pp73-musllinux_1_2_i686.whl", hash = "sha256:0d807710df3b5faa66c731afa162ea29717ab3be17bdc15f90f2d9f183da4059", size = 590115, upload-time = "2025-08-27T12:15:46.647Z" },
-    { url = "https://files.pythonhosted.org/packages/12/00/ccfaafaf7db7e7adace915e5c2f2c2410e16402561801e9c7f96683002d3/rpds_py-0.27.1-pp310-pypy310_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:3adc388fc3afb6540aec081fa59e6e0d3908722771aa1e37ffe22b220a436f0b", size = 556561, upload-time = "2025-08-27T12:15:48.219Z" },
-    { url = "https://files.pythonhosted.org/packages/e1/b7/92b6ed9aad103bfe1c45df98453dfae40969eef2cb6c6239c58d7e96f1b3/rpds_py-0.27.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:c796c0c1cc68cb08b0284db4229f5af76168172670c74908fdbd4b7d7f515819", size = 229125, upload-time = "2025-08-27T12:15:49.956Z" },
-    { url = "https://files.pythonhosted.org/packages/0c/ed/e1fba02de17f4f76318b834425257c8ea297e415e12c68b4361f63e8ae92/rpds_py-0.27.1-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:cdfe4bb2f9fe7458b7453ad3c33e726d6d1c7c0a72960bcc23800d77384e42df", size = 371402, upload-time = "2025-08-27T12:15:51.561Z" },
-    { url = "https://files.pythonhosted.org/packages/af/7c/e16b959b316048b55585a697e94add55a4ae0d984434d279ea83442e460d/rpds_py-0.27.1-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:8fabb8fd848a5f75a2324e4a84501ee3a5e3c78d8603f83475441866e60b94a3", size = 354084, upload-time = "2025-08-27T12:15:53.219Z" },
-    { url = "https://files.pythonhosted.org/packages/de/c1/ade645f55de76799fdd08682d51ae6724cb46f318573f18be49b1e040428/rpds_py-0.27.1-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eda8719d598f2f7f3e0f885cba8646644b55a187762bec091fa14a2b819746a9", size = 383090, upload-time = "2025-08-27T12:15:55.158Z" },
-    { url = "https://files.pythonhosted.org/packages/1f/27/89070ca9b856e52960da1472efcb6c20ba27cfe902f4f23ed095b9cfc61d/rpds_py-0.27.1-pp311-pypy311_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3c64d07e95606ec402a0a1c511fe003873fa6af630bda59bac77fac8b4318ebc", size = 394519, upload-time = "2025-08-27T12:15:57.238Z" },
-    { url = "https://files.pythonhosted.org/packages/b3/28/be120586874ef906aa5aeeae95ae8df4184bc757e5b6bd1c729ccff45ed5/rpds_py-0.27.1-pp311-pypy311_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:93a2ed40de81bcff59aabebb626562d48332f3d028ca2036f1d23cbb52750be4", size = 523817, upload-time = "2025-08-27T12:15:59.237Z" },
-    { url = "https://files.pythonhosted.org/packages/a8/ef/70cc197bc11cfcde02a86f36ac1eed15c56667c2ebddbdb76a47e90306da/rpds_py-0.27.1-pp311-pypy311_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:387ce8c44ae94e0ec50532d9cb0edce17311024c9794eb196b90e1058aadeb66", size = 403240, upload-time = "2025-08-27T12:16:00.923Z" },
-    { url = "https://files.pythonhosted.org/packages/cf/35/46936cca449f7f518f2f4996e0e8344db4b57e2081e752441154089d2a5f/rpds_py-0.27.1-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aaf94f812c95b5e60ebaf8bfb1898a7d7cb9c1af5744d4a67fa47796e0465d4e", size = 385194, upload-time = "2025-08-27T12:16:02.802Z" },
-    { url = "https://files.pythonhosted.org/packages/e1/62/29c0d3e5125c3270b51415af7cbff1ec587379c84f55a5761cc9efa8cd06/rpds_py-0.27.1-pp311-pypy311_pp73-manylinux_2_31_riscv64.whl", hash = "sha256:4848ca84d6ded9b58e474dfdbad4b8bfb450344c0551ddc8d958bf4b36aa837c", size = 402086, upload-time = "2025-08-27T12:16:04.806Z" },
-    { url = "https://files.pythonhosted.org/packages/8f/66/03e1087679227785474466fdd04157fb793b3b76e3fcf01cbf4c693c1949/rpds_py-0.27.1-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2bde09cbcf2248b73c7c323be49b280180ff39fadcfe04e7b6f54a678d02a7cf", size = 419272, upload-time = "2025-08-27T12:16:06.471Z" },
-    { url = "https://files.pythonhosted.org/packages/6a/24/e3e72d265121e00b063aef3e3501e5b2473cf1b23511d56e529531acf01e/rpds_py-0.27.1-pp311-pypy311_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:94c44ee01fd21c9058f124d2d4f0c9dc7634bec93cd4b38eefc385dabe71acbf", size = 560003, upload-time = "2025-08-27T12:16:08.06Z" },
-    { url = "https://files.pythonhosted.org/packages/26/ca/f5a344c534214cc2d41118c0699fffbdc2c1bc7046f2a2b9609765ab9c92/rpds_py-0.27.1-pp311-pypy311_pp73-musllinux_1_2_i686.whl", hash = "sha256:df8b74962e35c9249425d90144e721eed198e6555a0e22a563d29fe4486b51f6", size = 590482, upload-time = "2025-08-27T12:16:10.137Z" },
-    { url = "https://files.pythonhosted.org/packages/ce/08/4349bdd5c64d9d193c360aa9db89adeee6f6682ab8825dca0a3f535f434f/rpds_py-0.27.1-pp311-pypy311_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:dc23e6820e3b40847e2f4a7726462ba0cf53089512abe9ee16318c366494c17a", size = 556523, upload-time = "2025-08-27T12:16:12.188Z" },
+version = "0.28.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/48/dc/95f074d43452b3ef5d06276696ece4b3b5d696e7c9ad7173c54b1390cd70/rpds_py-0.28.0.tar.gz", hash = "sha256:abd4df20485a0983e2ca334a216249b6186d6e3c1627e106651943dbdb791aea", size = 27419, upload-time = "2025-10-22T22:24:29.327Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/82/f8/13bb772dc7cbf2c3c5b816febc34fa0cb2c64a08e0569869585684ce6631/rpds_py-0.28.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:7b6013db815417eeb56b2d9d7324e64fcd4fa289caeee6e7a78b2e11fc9b438a", size = 362820, upload-time = "2025-10-22T22:21:15.074Z" },
+    { url = "https://files.pythonhosted.org/packages/84/91/6acce964aab32469c3dbe792cb041a752d64739c534e9c493c701ef0c032/rpds_py-0.28.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1a4c6b05c685c0c03f80dabaeb73e74218c49deea965ca63f76a752807397207", size = 348499, upload-time = "2025-10-22T22:21:17.658Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/93/c05bb1f4f5e0234db7c4917cb8dd5e2e0a9a7b26dc74b1b7bee3c9cfd477/rpds_py-0.28.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f4794c6c3fbe8f9ac87699b131a1f26e7b4abcf6d828da46a3a52648c7930eba", size = 379356, upload-time = "2025-10-22T22:21:19.847Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/37/e292da436f0773e319753c567263427cdf6c645d30b44f09463ff8216cda/rpds_py-0.28.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2e8456b6ee5527112ff2354dd9087b030e3429e43a74f480d4a5ca79d269fd85", size = 390151, upload-time = "2025-10-22T22:21:21.569Z" },
+    { url = "https://files.pythonhosted.org/packages/76/87/a4e3267131616e8faf10486dc00eaedf09bd61c87f01e5ef98e782ee06c9/rpds_py-0.28.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:beb880a9ca0a117415f241f66d56025c02037f7c4efc6fe59b5b8454f1eaa50d", size = 524831, upload-time = "2025-10-22T22:21:23.394Z" },
+    { url = "https://files.pythonhosted.org/packages/e1/c8/4a4ca76f0befae9515da3fad11038f0fce44f6bb60b21fe9d9364dd51fb0/rpds_py-0.28.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6897bebb118c44b38c9cb62a178e09f1593c949391b9a1a6fe777ccab5934ee7", size = 404687, upload-time = "2025-10-22T22:21:25.201Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/65/118afe854424456beafbbebc6b34dcf6d72eae3a08b4632bc4220f8240d9/rpds_py-0.28.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b1b553dd06e875249fd43efd727785efb57a53180e0fde321468222eabbeaafa", size = 382683, upload-time = "2025-10-22T22:21:26.536Z" },
+    { url = "https://files.pythonhosted.org/packages/f7/bc/0625064041fb3a0c77ecc8878c0e8341b0ae27ad0f00cf8f2b57337a1e63/rpds_py-0.28.0-cp310-cp310-manylinux_2_31_riscv64.whl", hash = "sha256:f0b2044fdddeea5b05df832e50d2a06fe61023acb44d76978e1b060206a8a476", size = 398927, upload-time = "2025-10-22T22:21:27.864Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/1a/fed7cf2f1ee8a5e4778f2054153f2cfcf517748875e2f5b21cf8907cd77d/rpds_py-0.28.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:05cf1e74900e8da73fa08cc76c74a03345e5a3e37691d07cfe2092d7d8e27b04", size = 411590, upload-time = "2025-10-22T22:21:29.474Z" },
+    { url = "https://files.pythonhosted.org/packages/c1/64/a8e0f67fa374a6c472dbb0afdaf1ef744724f165abb6899f20e2f1563137/rpds_py-0.28.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:efd489fec7c311dae25e94fe7eeda4b3d06be71c68f2cf2e8ef990ffcd2cd7e8", size = 559843, upload-time = "2025-10-22T22:21:30.917Z" },
+    { url = "https://files.pythonhosted.org/packages/a9/ea/e10353f6d7c105be09b8135b72787a65919971ae0330ad97d87e4e199880/rpds_py-0.28.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:ada7754a10faacd4f26067e62de52d6af93b6d9542f0df73c57b9771eb3ba9c4", size = 584188, upload-time = "2025-10-22T22:21:32.827Z" },
+    { url = "https://files.pythonhosted.org/packages/18/b0/a19743e0763caf0c89f6fc6ba6fbd9a353b24ffb4256a492420c5517da5a/rpds_py-0.28.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:c2a34fd26588949e1e7977cfcbb17a9a42c948c100cab890c6d8d823f0586457", size = 550052, upload-time = "2025-10-22T22:21:34.702Z" },
+    { url = "https://files.pythonhosted.org/packages/de/bc/ec2c004f6c7d6ab1e25dae875cdb1aee087c3ebed5b73712ed3000e3851a/rpds_py-0.28.0-cp310-cp310-win32.whl", hash = "sha256:f9174471d6920cbc5e82a7822de8dfd4dcea86eb828b04fc8c6519a77b0ee51e", size = 215110, upload-time = "2025-10-22T22:21:36.645Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/de/4ce8abf59674e17187023933547d2018363e8fc76ada4f1d4d22871ccb6e/rpds_py-0.28.0-cp310-cp310-win_amd64.whl", hash = "sha256:6e32dd207e2c4f8475257a3540ab8a93eff997abfa0a3fdb287cae0d6cd874b8", size = 223850, upload-time = "2025-10-22T22:21:38.006Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/34/058d0db5471c6be7bef82487ad5021ff8d1d1d27794be8730aad938649cf/rpds_py-0.28.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:03065002fd2e287725d95fbc69688e0c6daf6c6314ba38bdbaa3895418e09296", size = 362344, upload-time = "2025-10-22T22:21:39.713Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/67/9503f0ec8c055a0782880f300c50a2b8e5e72eb1f94dfc2053da527444dd/rpds_py-0.28.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:28ea02215f262b6d078daec0b45344c89e161eab9526b0d898221d96fdda5f27", size = 348440, upload-time = "2025-10-22T22:21:41.056Z" },
+    { url = "https://files.pythonhosted.org/packages/68/2e/94223ee9b32332a41d75b6f94b37b4ce3e93878a556fc5f152cbd856a81f/rpds_py-0.28.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25dbade8fbf30bcc551cb352376c0ad64b067e4fc56f90e22ba70c3ce205988c", size = 379068, upload-time = "2025-10-22T22:21:42.593Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/25/54fd48f9f680cfc44e6a7f39a5fadf1d4a4a1fd0848076af4a43e79f998c/rpds_py-0.28.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3c03002f54cc855860bfdc3442928ffdca9081e73b5b382ed0b9e8efe6e5e205", size = 390518, upload-time = "2025-10-22T22:21:43.998Z" },
+    { url = "https://files.pythonhosted.org/packages/1b/85/ac258c9c27f2ccb1bd5d0697e53a82ebcf8088e3186d5d2bf8498ee7ed44/rpds_py-0.28.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b9699fa7990368b22032baf2b2dce1f634388e4ffc03dfefaaac79f4695edc95", size = 525319, upload-time = "2025-10-22T22:21:45.645Z" },
+    { url = "https://files.pythonhosted.org/packages/40/cb/c6734774789566d46775f193964b76627cd5f42ecf246d257ce84d1912ed/rpds_py-0.28.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b9b06fe1a75e05e0713f06ea0c89ecb6452210fd60e2f1b6ddc1067b990e08d9", size = 404896, upload-time = "2025-10-22T22:21:47.544Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/53/14e37ce83202c632c89b0691185dca9532288ff9d390eacae3d2ff771bae/rpds_py-0.28.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ac9f83e7b326a3f9ec3ef84cda98fb0a74c7159f33e692032233046e7fd15da2", size = 382862, upload-time = "2025-10-22T22:21:49.176Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/83/f3642483ca971a54d60caa4449f9d6d4dbb56a53e0072d0deff51b38af74/rpds_py-0.28.0-cp311-cp311-manylinux_2_31_riscv64.whl", hash = "sha256:0d3259ea9ad8743a75a43eb7819324cdab393263c91be86e2d1901ee65c314e0", size = 398848, upload-time = "2025-10-22T22:21:51.024Z" },
+    { url = "https://files.pythonhosted.org/packages/44/09/2d9c8b2f88e399b4cfe86efdf2935feaf0394e4f14ab30c6c5945d60af7d/rpds_py-0.28.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9a7548b345f66f6695943b4ef6afe33ccd3f1b638bd9afd0f730dd255c249c9e", size = 412030, upload-time = "2025-10-22T22:21:52.665Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/f5/e1cec473d4bde6df1fd3738be8e82d64dd0600868e76e92dfeaebbc2d18f/rpds_py-0.28.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c9a40040aa388b037eb39416710fbcce9443498d2eaab0b9b45ae988b53f5c67", size = 559700, upload-time = "2025-10-22T22:21:54.123Z" },
+    { url = "https://files.pythonhosted.org/packages/8d/be/73bb241c1649edbf14e98e9e78899c2c5e52bbe47cb64811f44d2cc11808/rpds_py-0.28.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:8f60c7ea34e78c199acd0d3cda37a99be2c861dd2b8cf67399784f70c9f8e57d", size = 584581, upload-time = "2025-10-22T22:21:56.102Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/9c/ffc6e9218cd1eb5c2c7dbd276c87cd10e8c2232c456b554169eb363381df/rpds_py-0.28.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:1571ae4292649100d743b26d5f9c63503bb1fedf538a8f29a98dce2d5ba6b4e6", size = 549981, upload-time = "2025-10-22T22:21:58.253Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/50/da8b6d33803a94df0149345ee33e5d91ed4d25fc6517de6a25587eae4133/rpds_py-0.28.0-cp311-cp311-win32.whl", hash = "sha256:5cfa9af45e7c1140af7321fa0bef25b386ee9faa8928c80dc3a5360971a29e8c", size = 214729, upload-time = "2025-10-22T22:21:59.625Z" },
+    { url = "https://files.pythonhosted.org/packages/12/fd/b0f48c4c320ee24c8c20df8b44acffb7353991ddf688af01eef5f93d7018/rpds_py-0.28.0-cp311-cp311-win_amd64.whl", hash = "sha256:dd8d86b5d29d1b74100982424ba53e56033dc47720a6de9ba0259cf81d7cecaa", size = 223977, upload-time = "2025-10-22T22:22:01.092Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/21/c8e77a2ac66e2ec4e21f18a04b4e9a0417ecf8e61b5eaeaa9360a91713b4/rpds_py-0.28.0-cp311-cp311-win_arm64.whl", hash = "sha256:4e27d3a5709cc2b3e013bf93679a849213c79ae0573f9b894b284b55e729e120", size = 217326, upload-time = "2025-10-22T22:22:02.944Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/5c/6c3936495003875fe7b14f90ea812841a08fca50ab26bd840e924097d9c8/rpds_py-0.28.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:6b4f28583a4f247ff60cd7bdda83db8c3f5b05a7a82ff20dd4b078571747708f", size = 366439, upload-time = "2025-10-22T22:22:04.525Z" },
+    { url = "https://files.pythonhosted.org/packages/56/f9/a0f1ca194c50aa29895b442771f036a25b6c41a35e4f35b1a0ea713bedae/rpds_py-0.28.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d678e91b610c29c4b3d52a2c148b641df2b4676ffe47c59f6388d58b99cdc424", size = 348170, upload-time = "2025-10-22T22:22:06.397Z" },
+    { url = "https://files.pythonhosted.org/packages/18/ea/42d243d3a586beb72c77fa5def0487daf827210069a95f36328e869599ea/rpds_py-0.28.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e819e0e37a44a78e1383bf1970076e2ccc4dc8c2bbaa2f9bd1dc987e9afff628", size = 378838, upload-time = "2025-10-22T22:22:07.932Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/78/3de32e18a94791af8f33601402d9d4f39613136398658412a4e0b3047327/rpds_py-0.28.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5ee514e0f0523db5d3fb171f397c54875dbbd69760a414dccf9d4d7ad628b5bd", size = 393299, upload-time = "2025-10-22T22:22:09.435Z" },
+    { url = "https://files.pythonhosted.org/packages/13/7e/4bdb435afb18acea2eb8a25ad56b956f28de7c59f8a1d32827effa0d4514/rpds_py-0.28.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5f3fa06d27fdcee47f07a39e02862da0100cb4982508f5ead53ec533cd5fe55e", size = 518000, upload-time = "2025-10-22T22:22:11.326Z" },
+    { url = "https://files.pythonhosted.org/packages/31/d0/5f52a656875cdc60498ab035a7a0ac8f399890cc1ee73ebd567bac4e39ae/rpds_py-0.28.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:46959ef2e64f9e4a41fc89aa20dbca2b85531f9a72c21099a3360f35d10b0d5a", size = 408746, upload-time = "2025-10-22T22:22:13.143Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/cd/49ce51767b879cde77e7ad9fae164ea15dce3616fe591d9ea1df51152706/rpds_py-0.28.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8455933b4bcd6e83fde3fefc987a023389c4b13f9a58c8d23e4b3f6d13f78c84", size = 386379, upload-time = "2025-10-22T22:22:14.602Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/99/e4e1e1ee93a98f72fc450e36c0e4d99c35370220e815288e3ecd2ec36a2a/rpds_py-0.28.0-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:ad50614a02c8c2962feebe6012b52f9802deec4263946cddea37aaf28dd25a66", size = 401280, upload-time = "2025-10-22T22:22:16.063Z" },
+    { url = "https://files.pythonhosted.org/packages/61/35/e0c6a57488392a8b319d2200d03dad2b29c0db9996f5662c3b02d0b86c02/rpds_py-0.28.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e5deca01b271492553fdb6c7fd974659dce736a15bae5dad7ab8b93555bceb28", size = 412365, upload-time = "2025-10-22T22:22:17.504Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/6a/841337980ea253ec797eb084665436007a1aad0faac1ba097fb906c5f69c/rpds_py-0.28.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:735f8495a13159ce6a0d533f01e8674cec0c57038c920495f87dcb20b3ddb48a", size = 559573, upload-time = "2025-10-22T22:22:19.108Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/5e/64826ec58afd4c489731f8b00729c5f6afdb86f1df1df60bfede55d650bb/rpds_py-0.28.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:961ca621ff10d198bbe6ba4957decca61aa2a0c56695384c1d6b79bf61436df5", size = 583973, upload-time = "2025-10-22T22:22:20.768Z" },
+    { url = "https://files.pythonhosted.org/packages/b6/ee/44d024b4843f8386a4eeaa4c171b3d31d55f7177c415545fd1a24c249b5d/rpds_py-0.28.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:2374e16cc9131022e7d9a8f8d65d261d9ba55048c78f3b6e017971a4f5e6353c", size = 553800, upload-time = "2025-10-22T22:22:22.25Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/89/33e675dccff11a06d4d85dbb4d1865f878d5020cbb69b2c1e7b2d3f82562/rpds_py-0.28.0-cp312-cp312-win32.whl", hash = "sha256:d15431e334fba488b081d47f30f091e5d03c18527c325386091f31718952fe08", size = 216954, upload-time = "2025-10-22T22:22:24.105Z" },
+    { url = "https://files.pythonhosted.org/packages/af/36/45f6ebb3210887e8ee6dbf1bc710ae8400bb417ce165aaf3024b8360d999/rpds_py-0.28.0-cp312-cp312-win_amd64.whl", hash = "sha256:a410542d61fc54710f750d3764380b53bf09e8c4edbf2f9141a82aa774a04f7c", size = 227844, upload-time = "2025-10-22T22:22:25.551Z" },
+    { url = "https://files.pythonhosted.org/packages/57/91/f3fb250d7e73de71080f9a221d19bd6a1c1eb0d12a1ea26513f6c1052ad6/rpds_py-0.28.0-cp312-cp312-win_arm64.whl", hash = "sha256:1f0cfd1c69e2d14f8c892b893997fa9a60d890a0c8a603e88dca4955f26d1edd", size = 217624, upload-time = "2025-10-22T22:22:26.914Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/03/ce566d92611dfac0085c2f4b048cd53ed7c274a5c05974b882a908d540a2/rpds_py-0.28.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:e9e184408a0297086f880556b6168fa927d677716f83d3472ea333b42171ee3b", size = 366235, upload-time = "2025-10-22T22:22:28.397Z" },
+    { url = "https://files.pythonhosted.org/packages/00/34/1c61da1b25592b86fd285bd7bd8422f4c9d748a7373b46126f9ae792a004/rpds_py-0.28.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:edd267266a9b0448f33dc465a97cfc5d467594b600fe28e7fa2f36450e03053a", size = 348241, upload-time = "2025-10-22T22:22:30.171Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/00/ed1e28616848c61c493a067779633ebf4b569eccaacf9ccbdc0e7cba2b9d/rpds_py-0.28.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:85beb8b3f45e4e32f6802fb6cd6b17f615ef6c6a52f265371fb916fae02814aa", size = 378079, upload-time = "2025-10-22T22:22:31.644Z" },
+    { url = "https://files.pythonhosted.org/packages/11/b2/ccb30333a16a470091b6e50289adb4d3ec656fd9951ba8c5e3aaa0746a67/rpds_py-0.28.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d2412be8d00a1b895f8ad827cc2116455196e20ed994bb704bf138fe91a42724", size = 393151, upload-time = "2025-10-22T22:22:33.453Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/d0/73e2217c3ee486d555cb84920597480627d8c0240ff3062005c6cc47773e/rpds_py-0.28.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cf128350d384b777da0e68796afdcebc2e9f63f0e9f242217754e647f6d32491", size = 517520, upload-time = "2025-10-22T22:22:34.949Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/91/23efe81c700427d0841a4ae7ea23e305654381831e6029499fe80be8a071/rpds_py-0.28.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a2036d09b363aa36695d1cc1a97b36865597f4478470b0697b5ee9403f4fe399", size = 408699, upload-time = "2025-10-22T22:22:36.584Z" },
+    { url = "https://files.pythonhosted.org/packages/ca/ee/a324d3198da151820a326c1f988caaa4f37fc27955148a76fff7a2d787a9/rpds_py-0.28.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8e1e9be4fa6305a16be628959188e4fd5cd6f1b0e724d63c6d8b2a8adf74ea6", size = 385720, upload-time = "2025-10-22T22:22:38.014Z" },
+    { url = "https://files.pythonhosted.org/packages/19/ad/e68120dc05af8b7cab4a789fccd8cdcf0fe7e6581461038cc5c164cd97d2/rpds_py-0.28.0-cp313-cp313-manylinux_2_31_riscv64.whl", hash = "sha256:0a403460c9dd91a7f23fc3188de6d8977f1d9603a351d5db6cf20aaea95b538d", size = 401096, upload-time = "2025-10-22T22:22:39.869Z" },
+    { url = "https://files.pythonhosted.org/packages/99/90/c1e070620042459d60df6356b666bb1f62198a89d68881816a7ed121595a/rpds_py-0.28.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d7366b6553cdc805abcc512b849a519167db8f5e5c3472010cd1228b224265cb", size = 411465, upload-time = "2025-10-22T22:22:41.395Z" },
+    { url = "https://files.pythonhosted.org/packages/68/61/7c195b30d57f1b8d5970f600efee72a4fad79ec829057972e13a0370fd24/rpds_py-0.28.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:5b43c6a3726efd50f18d8120ec0551241c38785b68952d240c45ea553912ac41", size = 558832, upload-time = "2025-10-22T22:22:42.871Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/3d/06f3a718864773f69941d4deccdf18e5e47dd298b4628062f004c10f3b34/rpds_py-0.28.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:0cb7203c7bc69d7c1585ebb33a2e6074492d2fc21ad28a7b9d40457ac2a51ab7", size = 583230, upload-time = "2025-10-22T22:22:44.877Z" },
+    { url = "https://files.pythonhosted.org/packages/66/df/62fc783781a121e77fee9a21ead0a926f1b652280a33f5956a5e7833ed30/rpds_py-0.28.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:7a52a5169c664dfb495882adc75c304ae1d50df552fbd68e100fdc719dee4ff9", size = 553268, upload-time = "2025-10-22T22:22:46.441Z" },
+    { url = "https://files.pythonhosted.org/packages/84/85/d34366e335140a4837902d3dea89b51f087bd6a63c993ebdff59e93ee61d/rpds_py-0.28.0-cp313-cp313-win32.whl", hash = "sha256:2e42456917b6687215b3e606ab46aa6bca040c77af7df9a08a6dcfe8a4d10ca5", size = 217100, upload-time = "2025-10-22T22:22:48.342Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/1c/f25a3f3752ad7601476e3eff395fe075e0f7813fbb9862bd67c82440e880/rpds_py-0.28.0-cp313-cp313-win_amd64.whl", hash = "sha256:e0a0311caedc8069d68fc2bf4c9019b58a2d5ce3cd7cb656c845f1615b577e1e", size = 227759, upload-time = "2025-10-22T22:22:50.219Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/d6/5f39b42b99615b5bc2f36ab90423ea404830bdfee1c706820943e9a645eb/rpds_py-0.28.0-cp313-cp313-win_arm64.whl", hash = "sha256:04c1b207ab8b581108801528d59ad80aa83bb170b35b0ddffb29c20e411acdc1", size = 217326, upload-time = "2025-10-22T22:22:51.647Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/8b/0c69b72d1cee20a63db534be0df271effe715ef6c744fdf1ff23bb2b0b1c/rpds_py-0.28.0-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:f296ea3054e11fc58ad42e850e8b75c62d9a93a9f981ad04b2e5ae7d2186ff9c", size = 355736, upload-time = "2025-10-22T22:22:53.211Z" },
+    { url = "https://files.pythonhosted.org/packages/f7/6d/0c2ee773cfb55c31a8514d2cece856dd299170a49babd50dcffb15ddc749/rpds_py-0.28.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:5a7306c19b19005ad98468fcefeb7100b19c79fc23a5f24a12e06d91181193fa", size = 342677, upload-time = "2025-10-22T22:22:54.723Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/1c/22513ab25a27ea205144414724743e305e8153e6abe81833b5e678650f5a/rpds_py-0.28.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e5d9b86aa501fed9862a443c5c3116f6ead8bc9296185f369277c42542bd646b", size = 371847, upload-time = "2025-10-22T22:22:56.295Z" },
+    { url = "https://files.pythonhosted.org/packages/60/07/68e6ccdb4b05115ffe61d31afc94adef1833d3a72f76c9632d4d90d67954/rpds_py-0.28.0-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e5bbc701eff140ba0e872691d573b3d5d30059ea26e5785acba9132d10c8c31d", size = 381800, upload-time = "2025-10-22T22:22:57.808Z" },
+    { url = "https://files.pythonhosted.org/packages/73/bf/6d6d15df80781d7f9f368e7c1a00caf764436518c4877fb28b029c4624af/rpds_py-0.28.0-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9a5690671cd672a45aa8616d7374fdf334a1b9c04a0cac3c854b1136e92374fe", size = 518827, upload-time = "2025-10-22T22:22:59.826Z" },
+    { url = "https://files.pythonhosted.org/packages/7b/d3/2decbb2976cc452cbf12a2b0aaac5f1b9dc5dd9d1f7e2509a3ee00421249/rpds_py-0.28.0-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9f1d92ecea4fa12f978a367c32a5375a1982834649cdb96539dcdc12e609ab1a", size = 399471, upload-time = "2025-10-22T22:23:01.968Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/2c/f30892f9e54bd02e5faca3f6a26d6933c51055e67d54818af90abed9748e/rpds_py-0.28.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8d252db6b1a78d0a3928b6190156042d54c93660ce4d98290d7b16b5296fb7cc", size = 377578, upload-time = "2025-10-22T22:23:03.52Z" },
+    { url = "https://files.pythonhosted.org/packages/f0/5d/3bce97e5534157318f29ac06bf2d279dae2674ec12f7cb9c12739cee64d8/rpds_py-0.28.0-cp313-cp313t-manylinux_2_31_riscv64.whl", hash = "sha256:d61b355c3275acb825f8777d6c4505f42b5007e357af500939d4a35b19177259", size = 390482, upload-time = "2025-10-22T22:23:05.391Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/f0/886bd515ed457b5bd93b166175edb80a0b21a210c10e993392127f1e3931/rpds_py-0.28.0-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:acbe5e8b1026c0c580d0321c8aae4b0a1e1676861d48d6e8c6586625055b606a", size = 402447, upload-time = "2025-10-22T22:23:06.93Z" },
+    { url = "https://files.pythonhosted.org/packages/42/b5/71e8777ac55e6af1f4f1c05b47542a1eaa6c33c1cf0d300dca6a1c6e159a/rpds_py-0.28.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:8aa23b6f0fc59b85b4c7d89ba2965af274346f738e8d9fc2455763602e62fd5f", size = 552385, upload-time = "2025-10-22T22:23:08.557Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/cb/6ca2d70cbda5a8e36605e7788c4aa3bea7c17d71d213465a5a675079b98d/rpds_py-0.28.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:7b14b0c680286958817c22d76fcbca4800ddacef6f678f3a7c79a1fe7067fe37", size = 575642, upload-time = "2025-10-22T22:23:10.348Z" },
+    { url = "https://files.pythonhosted.org/packages/4a/d4/407ad9960ca7856d7b25c96dcbe019270b5ffdd83a561787bc682c797086/rpds_py-0.28.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:bcf1d210dfee61a6c86551d67ee1031899c0fdbae88b2d44a569995d43797712", size = 544507, upload-time = "2025-10-22T22:23:12.434Z" },
+    { url = "https://files.pythonhosted.org/packages/51/31/2f46fe0efcac23fbf5797c6b6b7e1c76f7d60773e525cb65fcbc582ee0f2/rpds_py-0.28.0-cp313-cp313t-win32.whl", hash = "sha256:3aa4dc0fdab4a7029ac63959a3ccf4ed605fee048ba67ce89ca3168da34a1342", size = 205376, upload-time = "2025-10-22T22:23:13.979Z" },
+    { url = "https://files.pythonhosted.org/packages/92/e4/15947bda33cbedfc134490a41841ab8870a72a867a03d4969d886f6594a2/rpds_py-0.28.0-cp313-cp313t-win_amd64.whl", hash = "sha256:7b7d9d83c942855e4fdcfa75d4f96f6b9e272d42fffcb72cd4bb2577db2e2907", size = 215907, upload-time = "2025-10-22T22:23:15.5Z" },
+    { url = "https://files.pythonhosted.org/packages/08/47/ffe8cd7a6a02833b10623bf765fbb57ce977e9a4318ca0e8cf97e9c3d2b3/rpds_py-0.28.0-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:dcdcb890b3ada98a03f9f2bb108489cdc7580176cb73b4f2d789e9a1dac1d472", size = 353830, upload-time = "2025-10-22T22:23:17.03Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/9f/890f36cbd83a58491d0d91ae0db1702639edb33fb48eeb356f80ecc6b000/rpds_py-0.28.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:f274f56a926ba2dc02976ca5b11c32855cbd5925534e57cfe1fda64e04d1add2", size = 341819, upload-time = "2025-10-22T22:23:18.57Z" },
+    { url = "https://files.pythonhosted.org/packages/09/e3/921eb109f682aa24fb76207698fbbcf9418738f35a40c21652c29053f23d/rpds_py-0.28.0-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4fe0438ac4a29a520ea94c8c7f1754cdd8feb1bc490dfda1bfd990072363d527", size = 373127, upload-time = "2025-10-22T22:23:20.216Z" },
+    { url = "https://files.pythonhosted.org/packages/23/13/bce4384d9f8f4989f1a9599c71b7a2d877462e5fd7175e1f69b398f729f4/rpds_py-0.28.0-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8a358a32dd3ae50e933347889b6af9a1bdf207ba5d1a3f34e1a38cd3540e6733", size = 382767, upload-time = "2025-10-22T22:23:21.787Z" },
+    { url = "https://files.pythonhosted.org/packages/23/e1/579512b2d89a77c64ccef5a0bc46a6ef7f72ae0cf03d4b26dcd52e57ee0a/rpds_py-0.28.0-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e80848a71c78aa328fefaba9c244d588a342c8e03bda518447b624ea64d1ff56", size = 517585, upload-time = "2025-10-22T22:23:23.699Z" },
+    { url = "https://files.pythonhosted.org/packages/62/3c/ca704b8d324a2591b0b0adcfcaadf9c862375b11f2f667ac03c61b4fd0a6/rpds_py-0.28.0-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f586db2e209d54fe177e58e0bc4946bea5fb0102f150b1b2f13de03e1f0976f8", size = 399828, upload-time = "2025-10-22T22:23:25.713Z" },
+    { url = "https://files.pythonhosted.org/packages/da/37/e84283b9e897e3adc46b4c88bb3f6ec92a43bd4d2f7ef5b13459963b2e9c/rpds_py-0.28.0-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ae8ee156d6b586e4292491e885d41483136ab994e719a13458055bec14cf370", size = 375509, upload-time = "2025-10-22T22:23:27.32Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/c2/a980beab869d86258bf76ec42dec778ba98151f253a952b02fe36d72b29c/rpds_py-0.28.0-cp314-cp314-manylinux_2_31_riscv64.whl", hash = "sha256:a805e9b3973f7e27f7cab63a6b4f61d90f2e5557cff73b6e97cd5b8540276d3d", size = 392014, upload-time = "2025-10-22T22:23:29.332Z" },
+    { url = "https://files.pythonhosted.org/packages/da/b5/b1d3c5f9d3fa5aeef74265f9c64de3c34a0d6d5cd3c81c8b17d5c8f10ed4/rpds_py-0.28.0-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5d3fd16b6dc89c73a4da0b4ac8b12a7ecc75b2864b95c9e5afed8003cb50a728", size = 402410, upload-time = "2025-10-22T22:23:31.14Z" },
+    { url = "https://files.pythonhosted.org/packages/74/ae/cab05ff08dfcc052afc73dcb38cbc765ffc86f94e966f3924cd17492293c/rpds_py-0.28.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:6796079e5d24fdaba6d49bda28e2c47347e89834678f2bc2c1b4fc1489c0fb01", size = 553593, upload-time = "2025-10-22T22:23:32.834Z" },
+    { url = "https://files.pythonhosted.org/packages/70/80/50d5706ea2a9bfc9e9c5f401d91879e7c790c619969369800cde202da214/rpds_py-0.28.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:76500820c2af232435cbe215e3324c75b950a027134e044423f59f5b9a1ba515", size = 576925, upload-time = "2025-10-22T22:23:34.47Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/12/85a57d7a5855a3b188d024b099fd09c90db55d32a03626d0ed16352413ff/rpds_py-0.28.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:bbdc5640900a7dbf9dd707fe6388972f5bbd883633eb68b76591044cfe346f7e", size = 542444, upload-time = "2025-10-22T22:23:36.093Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/65/10643fb50179509150eb94d558e8837c57ca8b9adc04bd07b98e57b48f8c/rpds_py-0.28.0-cp314-cp314-win32.whl", hash = "sha256:adc8aa88486857d2b35d75f0640b949759f79dc105f50aa2c27816b2e0dd749f", size = 207968, upload-time = "2025-10-22T22:23:37.638Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/84/0c11fe4d9aaea784ff4652499e365963222481ac647bcd0251c88af646eb/rpds_py-0.28.0-cp314-cp314-win_amd64.whl", hash = "sha256:66e6fa8e075b58946e76a78e69e1a124a21d9a48a5b4766d15ba5b06869d1fa1", size = 218876, upload-time = "2025-10-22T22:23:39.179Z" },
+    { url = "https://files.pythonhosted.org/packages/0f/e0/3ab3b86ded7bb18478392dc3e835f7b754cd446f62f3fc96f4fe2aca78f6/rpds_py-0.28.0-cp314-cp314-win_arm64.whl", hash = "sha256:a6fe887c2c5c59413353b7c0caff25d0e566623501ccfff88957fa438a69377d", size = 212506, upload-time = "2025-10-22T22:23:40.755Z" },
+    { url = "https://files.pythonhosted.org/packages/51/ec/d5681bb425226c3501eab50fc30e9d275de20c131869322c8a1729c7b61c/rpds_py-0.28.0-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:7a69df082db13c7070f7b8b1f155fa9e687f1d6aefb7b0e3f7231653b79a067b", size = 355433, upload-time = "2025-10-22T22:23:42.259Z" },
+    { url = "https://files.pythonhosted.org/packages/be/ec/568c5e689e1cfb1ea8b875cffea3649260955f677fdd7ddc6176902d04cd/rpds_py-0.28.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:b1cde22f2c30ebb049a9e74c5374994157b9b70a16147d332f89c99c5960737a", size = 342601, upload-time = "2025-10-22T22:23:44.372Z" },
+    { url = "https://files.pythonhosted.org/packages/32/fe/51ada84d1d2a1d9d8f2c902cfddd0133b4a5eb543196ab5161d1c07ed2ad/rpds_py-0.28.0-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5338742f6ba7a51012ea470bd4dc600a8c713c0c72adaa0977a1b1f4327d6592", size = 372039, upload-time = "2025-10-22T22:23:46.025Z" },
+    { url = "https://files.pythonhosted.org/packages/07/c1/60144a2f2620abade1a78e0d91b298ac2d9b91bc08864493fa00451ef06e/rpds_py-0.28.0-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e1460ebde1bcf6d496d80b191d854adedcc619f84ff17dc1c6d550f58c9efbba", size = 382407, upload-time = "2025-10-22T22:23:48.098Z" },
+    { url = "https://files.pythonhosted.org/packages/45/ed/091a7bbdcf4038a60a461df50bc4c82a7ed6d5d5e27649aab61771c17585/rpds_py-0.28.0-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e3eb248f2feba84c692579257a043a7699e28a77d86c77b032c1d9fbb3f0219c", size = 518172, upload-time = "2025-10-22T22:23:50.16Z" },
+    { url = "https://files.pythonhosted.org/packages/54/dd/02cc90c2fd9c2ef8016fd7813bfacd1c3a1325633ec8f244c47b449fc868/rpds_py-0.28.0-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bd3bbba5def70b16cd1c1d7255666aad3b290fbf8d0fe7f9f91abafb73611a91", size = 399020, upload-time = "2025-10-22T22:23:51.81Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/81/5d98cc0329bbb911ccecd0b9e19fbf7f3a5de8094b4cda5e71013b2dd77e/rpds_py-0.28.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3114f4db69ac5a1f32e7e4d1cbbe7c8f9cf8217f78e6e002cedf2d54c2a548ed", size = 377451, upload-time = "2025-10-22T22:23:53.711Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/07/4d5bcd49e3dfed2d38e2dcb49ab6615f2ceb9f89f5a372c46dbdebb4e028/rpds_py-0.28.0-cp314-cp314t-manylinux_2_31_riscv64.whl", hash = "sha256:4b0cb8a906b1a0196b863d460c0222fb8ad0f34041568da5620f9799b83ccf0b", size = 390355, upload-time = "2025-10-22T22:23:55.299Z" },
+    { url = "https://files.pythonhosted.org/packages/3f/79/9f14ba9010fee74e4f40bf578735cfcbb91d2e642ffd1abe429bb0b96364/rpds_py-0.28.0-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:cf681ac76a60b667106141e11a92a3330890257e6f559ca995fbb5265160b56e", size = 403146, upload-time = "2025-10-22T22:23:56.929Z" },
+    { url = "https://files.pythonhosted.org/packages/39/4c/f08283a82ac141331a83a40652830edd3a4a92c34e07e2bbe00baaea2f5f/rpds_py-0.28.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:1e8ee6413cfc677ce8898d9cde18cc3a60fc2ba756b0dec5b71eb6eb21c49fa1", size = 552656, upload-time = "2025-10-22T22:23:58.62Z" },
+    { url = "https://files.pythonhosted.org/packages/61/47/d922fc0666f0dd8e40c33990d055f4cc6ecff6f502c2d01569dbed830f9b/rpds_py-0.28.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:b3072b16904d0b5572a15eb9d31c1954e0d3227a585fc1351aa9878729099d6c", size = 576782, upload-time = "2025-10-22T22:24:00.312Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/0c/5bafdd8ccf6aa9d3bfc630cfece457ff5b581af24f46a9f3590f790e3df2/rpds_py-0.28.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:b670c30fd87a6aec281c3c9896d3bae4b205fd75d79d06dc87c2503717e46092", size = 544671, upload-time = "2025-10-22T22:24:02.297Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/37/dcc5d8397caa924988693519069d0beea077a866128719351a4ad95e82fc/rpds_py-0.28.0-cp314-cp314t-win32.whl", hash = "sha256:8014045a15b4d2b3476f0a287fcc93d4f823472d7d1308d47884ecac9e612be3", size = 205749, upload-time = "2025-10-22T22:24:03.848Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/69/64d43b21a10d72b45939a28961216baeb721cc2a430f5f7c3bfa21659a53/rpds_py-0.28.0-cp314-cp314t-win_amd64.whl", hash = "sha256:7a4e59c90d9c27c561eb3160323634a9ff50b04e4f7820600a2beb0ac90db578", size = 216233, upload-time = "2025-10-22T22:24:05.471Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/bc/b43f2ea505f28119bd551ae75f70be0c803d2dbcd37c1b3734909e40620b/rpds_py-0.28.0-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:f5e7101145427087e493b9c9b959da68d357c28c562792300dd21a095118ed16", size = 363913, upload-time = "2025-10-22T22:24:07.129Z" },
+    { url = "https://files.pythonhosted.org/packages/28/f2/db318195d324c89a2c57dc5195058cbadd71b20d220685c5bd1da79ee7fe/rpds_py-0.28.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:31eb671150b9c62409a888850aaa8e6533635704fe2b78335f9aaf7ff81eec4d", size = 350452, upload-time = "2025-10-22T22:24:08.754Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/f2/1391c819b8573a4898cedd6b6c5ec5bc370ce59e5d6bdcebe3c9c1db4588/rpds_py-0.28.0-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:48b55c1f64482f7d8bd39942f376bfdf2f6aec637ee8c805b5041e14eeb771db", size = 380957, upload-time = "2025-10-22T22:24:10.826Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/5c/e5de68ee7eb7248fce93269833d1b329a196d736aefb1a7481d1e99d1222/rpds_py-0.28.0-pp311-pypy311_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:24743a7b372e9a76171f6b69c01aedf927e8ac3e16c474d9fe20d552a8cb45c7", size = 391919, upload-time = "2025-10-22T22:24:12.559Z" },
+    { url = "https://files.pythonhosted.org/packages/fb/4f/2376336112cbfeb122fd435d608ad8d5041b3aed176f85a3cb32c262eb80/rpds_py-0.28.0-pp311-pypy311_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:389c29045ee8bbb1627ea190b4976a310a295559eaf9f1464a1a6f2bf84dde78", size = 528541, upload-time = "2025-10-22T22:24:14.197Z" },
+    { url = "https://files.pythonhosted.org/packages/68/53/5ae232e795853dd20da7225c5dd13a09c0a905b1a655e92bdf8d78a99fd9/rpds_py-0.28.0-pp311-pypy311_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:23690b5827e643150cf7b49569679ec13fe9a610a15949ed48b85eb7f98f34ec", size = 405629, upload-time = "2025-10-22T22:24:16.001Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/2d/351a3b852b683ca9b6b8b38ed9efb2347596973849ba6c3a0e99877c10aa/rpds_py-0.28.0-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6f0c9266c26580e7243ad0d72fc3e01d6b33866cfab5084a6da7576bcf1c4f72", size = 384123, upload-time = "2025-10-22T22:24:17.585Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/15/870804daa00202728cc91cb8e2385fa9f1f4eb49857c49cfce89e304eae6/rpds_py-0.28.0-pp311-pypy311_pp73-manylinux_2_31_riscv64.whl", hash = "sha256:4c6c4db5d73d179746951486df97fd25e92396be07fc29ee8ff9a8f5afbdfb27", size = 400923, upload-time = "2025-10-22T22:24:19.512Z" },
+    { url = "https://files.pythonhosted.org/packages/53/25/3706b83c125fa2a0bccceac951de3f76631f6bd0ee4d02a0ed780712ef1b/rpds_py-0.28.0-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a3b695a8fa799dd2cfdb4804b37096c5f6dba1ac7f48a7fbf6d0485bcd060316", size = 413767, upload-time = "2025-10-22T22:24:21.316Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/f9/ce43dbe62767432273ed2584cef71fef8411bddfb64125d4c19128015018/rpds_py-0.28.0-pp311-pypy311_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:6aa1bfce3f83baf00d9c5fcdbba93a3ab79958b4c7d7d1f55e7fe68c20e63912", size = 561530, upload-time = "2025-10-22T22:24:22.958Z" },
+    { url = "https://files.pythonhosted.org/packages/46/c9/ffe77999ed8f81e30713dd38fd9ecaa161f28ec48bb80fa1cd9118399c27/rpds_py-0.28.0-pp311-pypy311_pp73-musllinux_1_2_i686.whl", hash = "sha256:7b0f9dceb221792b3ee6acb5438eb1f02b0cb2c247796a72b016dcc92c6de829", size = 585453, upload-time = "2025-10-22T22:24:24.779Z" },
+    { url = "https://files.pythonhosted.org/packages/ed/d2/4a73b18821fd4669762c855fd1f4e80ceb66fb72d71162d14da58444a763/rpds_py-0.28.0-pp311-pypy311_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:5d0145edba8abd3db0ab22b5300c99dc152f5c9021fab861be0f0544dc3cbc5f", size = 552199, upload-time = "2025-10-22T22:24:26.54Z" },
 ]
 
 [[package]]

From 3bf9b874c32ebbbaa6f895be988e04a19fdce7ee Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Thu, 23 Oct 2025 12:41:25 +0000
Subject: [PATCH 036/334] cp: !4298 - ci: Refactor testsytem - Removal of JET
 Artifacts
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 .github/actions/action.yml                    | 212 +++++-------------
 .github/workflows/cicd-main.yml               | 128 +++++++++--
 pyproject.toml                                |   6 +-
 .../shell_test_utils/run_ci_test.sh           |   8 +-
 .../shell_test_utils/start_interactive_job.sh |  50 +----
 .../golden_values_dev_dgx_a100.json           |   0
 .../golden_values_dev_dgx_h100.json           |   0
 .../golden_values_dev_dgxh100_coreweave.json  |   0
 .../golden_values_dev_dgxh100_eos.json        |   0
 .../golden_values_lts_dgx_a100.json           |   0
 .../model_config.yaml                         |   4 +-
 .../golden_values_dev_dgx_a100.json           |   0
 .../golden_values_dev_dgx_h100.json           |   0
 .../golden_values_dev_dgxh100_coreweave.json  |   0
 .../golden_values_dev_dgxh100_eos.json        |   0
 .../golden_values_lts_dgx_a100.json           |   0
 .../model_config.yaml                         |   4 +-
 .../golden_values_dev_dgx_a100.json           |   0
 .../golden_values_dev_dgx_h100.json           |   0
 .../golden_values_dev_dgxh100_coreweave.json  |   0
 .../golden_values_dev_dgxh100_eos.json        |   0
 .../golden_values_lts_dgx_a100.json           |   0
 .../model_config.yaml                         |   4 +-
 .../golden_values_dev_dgx_a100.json           |   0
 .../golden_values_dev_dgx_h100.json           |   0
 .../golden_values_lts_dgx_a100.json           |   0
 .../model_config.yaml                         |   4 +-
 .../golden_values_dev_dgx_a100.json           |   0
 .../golden_values_dev_dgx_h100.json           |   0
 .../golden_values_dev_dgxh100_coreweave.json  |   0
 .../golden_values_dev_dgxh100_eos.json        |   0
 .../golden_values_lts_dgx_a100.json           |   0
 .../model_config.yaml                         |   4 +-
 .../golden_values_dev_dgx_a100.json           |   0
 .../golden_values_dev_dgx_h100.json           |   0
 .../golden_values_dev_dgxh100_coreweave.json  |   0
 .../golden_values_dev_dgxh100_eos.json        |   0
 .../golden_values_lts_dgx_a100.json           |   0
 .../model_config.yaml                         |   6 +-
 .../golden_values_dev_dgx_a100.json           |   0
 .../golden_values_dev_dgx_h100.json           |   0
 .../golden_values_dev_dgxh100_coreweave.json  |   0
 .../golden_values_dev_dgxh100_eos.json        |   0
 .../golden_values_lts_dgx_a100.json           |   0
 .../model_config.yaml                         |   6 +-
 .../golden_values_dev_dgx_a100.json           |   0
 .../golden_values_dev_dgx_h100.json           |   0
 .../golden_values_dev_dgxh100_coreweave.json  |   0
 .../golden_values_dev_dgxh100_eos.json        |   0
 .../golden_values_lts_dgx_a100.json           |   0
 .../model_config.yaml                         |   4 +-
 .../bert/bert_release/model_config.yaml       |   2 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   2 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   2 +-
 .../model_config.yaml                         |   2 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   2 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   2 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   2 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   2 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   1 +
 .../model_config.yaml                         |   2 +-
 .../model_config.yaml                         |   2 +-
 .../model_config.yaml                         |   2 +-
 .../model_config.yaml                         |   2 +-
 .../model_config.yaml                         |   2 +-
 .../model_config.yaml                         |   2 +-
 .../model_config.yaml                         |   3 +-
 .../model_config.yaml                         |   2 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   2 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   2 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   2 +-
 .../model_config.yaml                         |   2 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   2 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   2 +-
 .../model_config.yaml                         |   2 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   2 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   2 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   2 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   2 +-
 .../model_config.yaml                         |   2 +-
 .../model_config.yaml                         |   2 +-
 .../model_config.yaml                         |   2 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   2 +-
 .../model_config.yaml                         |   2 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   2 +-
 .../model_config.yaml                         |   2 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   2 +-
 .../model_config.yaml                         |   2 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   2 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   2 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   2 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   2 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   6 +-
 .../model_config.yaml                         |   6 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   2 +-
 .../model_config.yaml                         |   2 +-
 .../model_config.yaml                         |   2 +-
 .../model_config.yaml                         |   2 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   5 +-
 .../model_config.yaml                         |   2 +-
 .../model_config.yaml                         |   2 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   2 +-
 .../model_config.yaml                         |   2 +-
 .../model_config.yaml                         |   2 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   2 +-
 .../model_config.yaml                         |   2 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   2 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   2 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   2 +-
 .../model_config.yaml                         |   2 +-
 .../model_config.yaml                         |   2 +-
 .../model_config.yaml                         |   2 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   6 +-
 .../model_config.yaml                         |   6 +-
 .../model_config.yaml                         |   6 +-
 .../model_config.yaml                         |   6 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../model_config.yaml                         |   4 +-
 .../t5/t5_release/model_config.yaml           |   2 +-
 .../generate_jet_trigger_job.py               |   6 +-
 .../python_scripts/generate_local_jobs.py     |   6 +-
 .../python_scripts/launch_jet_workload.py     |   8 +-
 .../launch_nemo_run_workload.py               |  52 +++--
 tests/test_utils/python_scripts/notify.py     |  11 -
 .../{common.py => recipe_parser.py}           |  39 +++-
 .../{common.yaml => ckpt_converter.yaml}      |   0
 .../gpt-dynamic-inference-cuda-graphs.yaml    |   5 +-
 ...pt-dynamic-inference-with-coordinator.yaml |  11 +-
 .../recipes/gpt-dynamic-inference.yaml        |  18 +-
 tests/test_utils/recipes/gpt-grads.yaml       |  11 +-
 tests/test_utils/recipes/gpt-nemo.yaml        |  14 +-
 .../recipes/gpt-static-inference.yaml         |  21 +-
 tests/test_utils/recipes/gpt.yaml             |  59 ++---
 .../recipes/mamba-static-inference.yaml       |  10 +-
 tests/test_utils/recipes/mamba.yaml           |  12 +-
 tests/test_utils/recipes/mimo.yaml            |   8 +-
 .../recipes/moe-dynamic-inference.yaml        |  11 +-
 .../recipes/moe-static-inference.yaml         |   8 +-
 tests/test_utils/recipes/moe.yaml             |  14 +-
 .../test_utils/recipes/multimodal-llava.yaml  |   6 +-
 uv.lock                                       |  68 +++---
 252 files changed, 698 insertions(+), 751 deletions(-)
 rename tests/functional_tests/test_cases/bert/{bert_nightly_dgx_a100_1N8G_mcore_tp1_pp2 => bert_mcore_tp1_pp2}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/bert/{bert_nightly_dgx_a100_1N8G_mcore_tp1_pp2 => bert_mcore_tp1_pp2}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/bert/{bert_nightly_dgx_a100_1N8G_mcore_tp1_pp2 => bert_mcore_tp1_pp2}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/bert/{bert_nightly_dgx_a100_1N8G_mcore_tp1_pp2 => bert_mcore_tp1_pp2}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/bert/{bert_nightly_dgx_a100_1N8G_mcore_tp1_pp2 => bert_mcore_tp1_pp2}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/bert/{bert_nightly_dgx_a100_1N8G_mcore_tp1_pp2 => bert_mcore_tp1_pp2}/model_config.yaml (88%)
 rename tests/functional_tests/test_cases/bert/{bert_nightly_dgx_a100_1N8G_mcore_tp1_pp4_vp2 => bert_mcore_tp1_pp4_vp2}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/bert/{bert_nightly_dgx_a100_1N8G_mcore_tp1_pp4_vp2 => bert_mcore_tp1_pp4_vp2}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/bert/{bert_nightly_dgx_a100_1N8G_mcore_tp1_pp4_vp2 => bert_mcore_tp1_pp4_vp2}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/bert/{bert_nightly_dgx_a100_1N8G_mcore_tp1_pp4_vp2 => bert_mcore_tp1_pp4_vp2}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/bert/{bert_nightly_dgx_a100_1N8G_mcore_tp1_pp4_vp2 => bert_mcore_tp1_pp4_vp2}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/bert/{bert_nightly_dgx_a100_1N8G_mcore_tp1_pp4_vp2 => bert_mcore_tp1_pp4_vp2}/model_config.yaml (89%)
 rename tests/functional_tests/test_cases/bert/{bert_mr_mcore_tp2_pp2_dgx_a100_1N8G => bert_mcore_tp2_pp2}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/bert/{bert_mr_mcore_tp2_pp2_dgx_a100_1N8G => bert_mcore_tp2_pp2}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/bert/{bert_mr_mcore_tp2_pp2_dgx_a100_1N8G => bert_mcore_tp2_pp2}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/bert/{bert_mr_mcore_tp2_pp2_dgx_a100_1N8G => bert_mcore_tp2_pp2}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/bert/{bert_mr_mcore_tp2_pp2_dgx_a100_1N8G => bert_mcore_tp2_pp2}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/bert/{bert_mr_mcore_tp2_pp2_dgx_a100_1N8G => bert_mcore_tp2_pp2}/model_config.yaml (88%)
 rename tests/functional_tests/test_cases/bert/{bert_mr_mcore_tp2_pp2_frozen_resume_torch_dist_dgx_a100_1N8G => bert_mcore_tp2_pp2_frozen_resume_torch_dist}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/bert/{bert_mr_mcore_tp2_pp2_frozen_resume_torch_dist_dgx_a100_1N8G => bert_mcore_tp2_pp2_frozen_resume_torch_dist}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/bert/{bert_mr_mcore_tp2_pp2_frozen_resume_torch_dist_dgx_a100_1N8G => bert_mcore_tp2_pp2_frozen_resume_torch_dist}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/bert/{bert_mr_mcore_tp2_pp2_frozen_resume_torch_dist_dgx_a100_1N8G => bert_mcore_tp2_pp2_frozen_resume_torch_dist}/model_config.yaml (89%)
 rename tests/functional_tests/test_cases/bert/{bert_mr_mcore_tp2_pp2_local_spec_dgx_a100_1N8G => bert_mcore_tp2_pp2_local_spec}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/bert/{bert_mr_mcore_tp2_pp2_local_spec_dgx_a100_1N8G => bert_mcore_tp2_pp2_local_spec}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/bert/{bert_mr_mcore_tp2_pp2_local_spec_dgx_a100_1N8G => bert_mcore_tp2_pp2_local_spec}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/bert/{bert_mr_mcore_tp2_pp2_local_spec_dgx_a100_1N8G => bert_mcore_tp2_pp2_local_spec}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/bert/{bert_mr_mcore_tp2_pp2_local_spec_dgx_a100_1N8G => bert_mcore_tp2_pp2_local_spec}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/bert/{bert_mr_mcore_tp2_pp2_local_spec_dgx_a100_1N8G => bert_mcore_tp2_pp2_local_spec}/model_config.yaml (88%)
 rename tests/functional_tests/test_cases/bert/{bert_mr_mcore_tp2_pp2_resume_torch_dist_dgx_a100_1N8G => bert_mcore_tp2_pp2_resume_torch_dist}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/bert/{bert_mr_mcore_tp2_pp2_resume_torch_dist_dgx_a100_1N8G => bert_mcore_tp2_pp2_resume_torch_dist}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/bert/{bert_mr_mcore_tp2_pp2_resume_torch_dist_dgx_a100_1N8G => bert_mcore_tp2_pp2_resume_torch_dist}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/bert/{bert_mr_mcore_tp2_pp2_resume_torch_dist_dgx_a100_1N8G => bert_mcore_tp2_pp2_resume_torch_dist}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/bert/{bert_mr_mcore_tp2_pp2_resume_torch_dist_dgx_a100_1N8G => bert_mcore_tp2_pp2_resume_torch_dist}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/bert/{bert_mr_mcore_tp2_pp2_resume_torch_dist_dgx_a100_1N8G => bert_mcore_tp2_pp2_resume_torch_dist}/model_config.yaml (84%)
 rename tests/functional_tests/test_cases/bert/{bert_mr_mcore_tp2_pp2_resume_torch_dist_local_spec_dgx_a100_1N8G => bert_mcore_tp2_pp2_resume_torch_dist_local_spec}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/bert/{bert_mr_mcore_tp2_pp2_resume_torch_dist_local_spec_dgx_a100_1N8G => bert_mcore_tp2_pp2_resume_torch_dist_local_spec}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/bert/{bert_mr_mcore_tp2_pp2_resume_torch_dist_local_spec_dgx_a100_1N8G => bert_mcore_tp2_pp2_resume_torch_dist_local_spec}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/bert/{bert_mr_mcore_tp2_pp2_resume_torch_dist_local_spec_dgx_a100_1N8G => bert_mcore_tp2_pp2_resume_torch_dist_local_spec}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/bert/{bert_mr_mcore_tp2_pp2_resume_torch_dist_local_spec_dgx_a100_1N8G => bert_mcore_tp2_pp2_resume_torch_dist_local_spec}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/bert/{bert_mr_mcore_tp2_pp2_resume_torch_dist_local_spec_dgx_a100_1N8G => bert_mcore_tp2_pp2_resume_torch_dist_local_spec}/model_config.yaml (84%)
 rename tests/functional_tests/test_cases/bert/{bert_nightly_dgx_a100_1N8G_mcore_tp4_pp1 => bert_mcore_tp4_pp1}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/bert/{bert_nightly_dgx_a100_1N8G_mcore_tp4_pp1 => bert_mcore_tp4_pp1}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/bert/{bert_nightly_dgx_a100_1N8G_mcore_tp4_pp1 => bert_mcore_tp4_pp1}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/bert/{bert_nightly_dgx_a100_1N8G_mcore_tp4_pp1 => bert_mcore_tp4_pp1}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/bert/{bert_nightly_dgx_a100_1N8G_mcore_tp4_pp1 => bert_mcore_tp4_pp1}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/bert/{bert_nightly_dgx_a100_1N8G_mcore_tp4_pp1 => bert_mcore_tp4_pp1}/model_config.yaml (88%)
 rename tests/test_utils/python_scripts/{common.py => recipe_parser.py} (89%)
 rename tests/test_utils/recipes/{common.yaml => ckpt_converter.yaml} (100%)

diff --git a/.github/actions/action.yml b/.github/actions/action.yml
index b9a02e1e3f5..8b7fd373a98 100644
--- a/.github/actions/action.yml
+++ b/.github/actions/action.yml
@@ -15,6 +15,9 @@ name: "Test Template"
 description: "Template for running NeMo tests in a containerized environment"
 
 inputs:
+  container-image:
+    description: "Container image to use for test"
+    required: true
   timeout:
     description: "Max runtime of test in minutes"
     required: false
@@ -46,83 +49,44 @@ inputs:
 runs:
   using: "composite"
   steps:
-    - name: Copy data
-      shell: bash
-      if: inputs.is_unit_test == 'false'
-      env:
-        SOURCE_DIR: /mnt/datadrive/TestData/megatron-lm/artifacts
-        TARGET_DIR: /home/runner/_work/TestData/megatron-lm/artifacts
-        MODEL: ${{ inputs.model }}
-      run: |
-        mkdir -p $TARGET_DIR/text/data/
-
-        if [[ "$MODEL" == "bert" ]]; then
-          mkdir -p $TARGET_DIR/text/the_pile/bert_shard00/
-          cp -a $SOURCE_DIR/text/the_pile/bert_shard00/. $TARGET_DIR/text/data/
-        elif [[ "$MODEL" == "gpt" ]] || [[ "$MODEL" == "moe" ]]; then
-          cp -a $SOURCE_DIR/text/the_pile/shard00/. $TARGET_DIR/text/data/
-        fi
-
-    - name: Install curl, sudo
-      shell: bash
-      run: |
-        sudo apt-get update
-        sudo apt-get install -y curl uuid-runtime
-
     - name: Checkout repository
       uses: actions/checkout@v2
-      with:
-        path: ${{ github.workspace }}/Megatron-LM
-
-    - name: Cache uv
-      uses: actions/cache@v4
-      id: cache
-      with:
-        path: cache-mount
-        key: ${{ runner.os }}-uv-${{ hashFiles('**/uv.lock') }}
-        restore-keys: |
-          ${{ runner.os }}-uv-
 
-    - name: Restore Docker cache mounts
-      uses: reproducible-containers/buildkit-cache-dance@5b81f4d29dc8397a7d341dba3aeecc7ec54d6361
-      with:
-        cache-dir: cache-mount
-        dockerfile: docker/Dockerfile.ci.dev
-        skip-extraction: ${{ steps.cache.outputs.cache-hit }}
+    - name: Change ownership of /home/runner/
+      shell: bash
+      run: sudo chown -R $(whoami) /home/runner/
 
     - name: Setup python
       uses: actions/setup-python@v5
       with:
         python-version: 3.12
 
-    - name: Download test data
-      shell: bash
-      env:
-        GH_TOKEN: ${{ inputs.PAT }}
-        TIMEOUT: ${{ inputs.timeout }}
-        IS_UNIT_TEST: ${{ inputs.is_unit_test == 'true' }}
+    - name: Install uuidgen
+      shell: bash -x -e -u -o pipefail {0}
       run: |
-        echo "::group::Download test data"
-        pip install --no-cache-dir pygithub click
-        python tests/test_utils/python_scripts/download_unit_tests_dataset.py --assets-dir ./assets
-        echo "::endgroup::"
+        apt-get update
+        apt-get install -y uuid-runtime
 
     - name: Create run-script (unit test)
-      shell: bash
+      shell: bash -x -e -u -o pipefail {0}
       if: inputs.is_unit_test == 'true'
       run: |
         echo "::group::Create run-script"
         cmd=$(cat <<'RUN_TEST_EOF'
         #!/bin/bash
 
-        docker exec -t test_container_${{ github.run_id }} bash -c '
-          set -e
-          bash /opt/megatron-lm/tests/unit_tests/run_ci_test.sh \
-            --tag ${{ inputs.tag }} \
-            --environment dev \
-            --bucket '\''${{ inputs.test_case }}'\'' \
-            --log-dir /opt/megatron-lm/outputs/logs
-        '
+        export PYTHONPATH=$(pwd)
+        export NEMORUN_HOME=$(pwd)
+        pip install --no-cache-dir uv
+        uv sync --only-group test 
+        uv run python tests/test_utils/python_scripts/launch_nemo_run_workload.py \
+          --scope unit-tests \
+          --model unit-tests \
+          --test-case '${{ inputs.test_case }}' \
+          --environment dev \
+          --platform dgx_h100 \
+          --tag ${{ inputs.tag }} \
+          --container-image ${{ inputs.container-image }}
 
         RUN_TEST_EOF
         )
@@ -130,7 +94,7 @@ runs:
         echo "::endgroup::"
 
     - name: Create run-script (e2e test)
-      shell: bash
+      shell: bash -x -e -u -o pipefail {0}
       if: inputs.is_unit_test == 'false'
       env:
         MODEL: ${{ inputs.model }}
@@ -138,118 +102,64 @@ runs:
         echo "::group::Create run-script"
         cmd=$(cat <<'RUN_TEST_EOF'
         #!/bin/bash
-
-
-
-        docker exec -t test_container_${{ github.run_id }} bash -c '
-
-          set -e
-          ls -al /workspace/data
-          
-          if [[ "${{ inputs.model }}" == "bert" ]]; then
-            TRAINING_SCRIPT_PATH=pretrain_bert.py
-          elif [[ "${{ inputs.model }}" == "gpt" ]] || [[ "${{ inputs.model }}" == "moe" ]]; then
-            TRAINING_SCRIPT_PATH=pretrain_gpt.py
-          fi
-          
-          ARGUMENTS=(
-            "DATA_PATH=/workspace/data"
-            "DATA_CACHE_PATH=/workspace/data/cache" 
-            "OUTPUT_PATH=$(pwd)/outputs/"
-            "TENSORBOARD_PATH=$(pwd)/tensorboard"
-            "CHECKPOINT_SAVE_PATH=$(pwd)/checkpoints"
-            "CHECKPOINT_LOAD_PATH=/workspace/checkpoints/$NAME"
-            "TRAINING_SCRIPT_PATH=$TRAINING_SCRIPT_PATH"
-            "TRAINING_PARAMS_PATH=./tests/functional_tests/test_cases/${{inputs.model}}/${{inputs.test_case}}/model_config.yaml"
-            "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/${{inputs.model}}/${{inputs.test_case}}/golden_values_dev_dgx_h100.json"
-            "N_REPEAT=5"
-            "ENABLE_LIGHTWEIGHT_MODE=false"
-            "RECORD_CHECKPOINTS=false"
-          )
-
-          bash ./tests/functional_tests/shell_test_utils/run_ci_test.sh ${ARGUMENTS[@]}
-        '
+        set -euxo pipefail
+
+        export PYTHONPATH=$(pwd)
+        export NEMORUN_HOME=$(pwd)
+        pip install --no-cache-dir uv
+        uv sync --only-group test 
+        uv run python tests/test_utils/python_scripts/launch_nemo_run_workload.py \
+          --scope mr \
+          --model ${{ inputs.model }} \
+          --test-case ${{ inputs.test_case }} \
+          --environment dev \
+          --platform dgx_h100 \
+          --container-image ${{ inputs.container-image }} \
+          --data-dir /mnt/datadrive/TestData/megatron-lm/artifacts
 
         RUN_TEST_EOF
         )
         echo "$cmd" | tee "job.sh"        
         echo "::endgroup::"
 
-    - name: Build container
-      shell: bash
-      env:
-        GH_TOKEN: ${{ inputs.PAT }}
-      run: |
-        echo "::group::Build test container"
-        docker build -f docker/Dockerfile.ci.dev --build-arg FROM_IMAGE_NAME="nvcr.io/nvidia/pytorch:25.06-py3" --target=main -t megatron-core .
-        echo "::endgroup::"
-
-    - name: Start container
-      shell: bash
-      run: |
-        echo "::group::Start test container"
-        set -x
-
-        cmd=$(cat <<RUN_TEST_EOF
-        #!/bin/bash
-        docker container rm -f test_container_${{ github.run_id }} || true
-        docker run \
-          --rm \
-          -d \
-          --name test_container_${{ github.run_id }} \
-          --runtime=nvidia --gpus all \
-          --shm-size=64g \
-          --ipc=host \
-          -e NCCL_IB_DISABLE=1 \
-          -e NCCL_P2P_LEVEL=NVL \
-          --workdir /opt/megatron-lm/ \
-          -v /home/runner/_work/TestData/megatron-lm/artifacts/text/data/:/workspace/data \
-          --volume ${{ github.workspace }}/Megatron-LM:/opt/megatron-lm/ \
-          $VOLUME_ARGS \
-          megatron-core \
-          bash -c "sleep $(( ${{ inputs.timeout }} * 60 + 60 ))"
-        RUN_TEST_EOF
-        )
-
-        echo "$cmd" | tee "retry_job.sh"
-        bash retry_job.sh
-        echo "::endgroup::"
-
     - name: Set timeout
-      shell: bash
+      shell: bash -x -e -u -o pipefail {0}
       id: timeout_in_seconds
       run: |
         echo "::group::Set timeout"
         echo "main=$(( ${{ inputs.timeout }} * 60 ))" | tee -a "$GITHUB_OUTPUT"
         echo "::endgroup::"
 
+    - name: Pull container
+      shell: bash -x -e -u -o pipefail {0}
+      run: |
+        echo "::group::Pull container"
+        docker pull ${{ inputs.container-image }}
+        echo "::endgroup::"
+
     - name: Run main script
-      uses: nick-fields/retry@v3
+      shell: bash -x -e -u -o pipefail {0}
       id: run-main-script
-      with:
-        timeout_seconds: ${{ steps.timeout_in_seconds.outputs.main }}
-        max_attempts: 3
-        shell: bash
-        retry_on: any
-        command: /bin/bash job.sh
-        on_retry_command: /bin/bash retry_job.sh
+      run: |
+        echo "::group::Run main script"
+        EXIT_CODE=0
+        /bin/bash job.sh || EXIT_CODE=$?
+        echo "exit_code=$EXIT_CODE" | tee -a "$GITHUB_OUTPUT"
+        exit $EXIT_CODE
+        echo "::endgroup::"
 
     - name: Check result
       id: check
-      shell: bash
+      shell: bash -x -e -u -o pipefail {0}
       env:
         IS_UNIT_TEST: ${{ inputs.is_unit_test == 'true' }}
       run: |
         echo "::group::Check result"
 
-        docker cp test_container_${{ github.run_id }}:/opt/megatron-lm/outputs/logs ./
         logs_report=logs-${{ inputs.test_case }}-${{ github.run_id }}-$(uuidgen)
         echo "logs_report=$logs_report" | sed 's/\//-/g' | sed 's/\*/-/g' | tee -a "$GITHUB_OUTPUT"
 
         if [[ "$IS_UNIT_TEST" == "true" ]]; then
-          docker exec test_container_${{ github.run_id }} /opt/venv/bin/coverage xml
-          docker cp test_container_${{ github.run_id }}:/opt/megatron-lm/.coverage .coverage
-          docker cp test_container_${{ github.run_id }}:/opt/megatron-lm/coverage.xml coverage.xml
           coverage_report=coverage-${{ inputs.is_unit_test == 'true' && 'unit-test' || 'e2e' }}-${{ github.run_id }}-$(uuidgen)
         else
           coverage_report=none
@@ -267,8 +177,10 @@ runs:
         if [[ "$IS_SUCCESS" == "false" ]]; then
           echo Test did not finish successfully.
           exit 1
-        else
-          docker exec -t test_container_${{ github.run_id }} /opt/venv/bin/coverage report -i
+        fi
+
+        if [[ "$coverage_report" != "none" ]]; then
+          uv run coverage report -i
         fi
 
         exit $EXIT_CODE
@@ -290,9 +202,3 @@ runs:
         name: ${{ steps.check.outputs.logs_report }}
         path: logs
         include-hidden-files: true
-
-    - name: Container shutdown
-      if: always()
-      shell: bash
-      run: |
-        docker container rm -f test_container_${{ github.run_id }} || true
diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml
index d5e826134c7..7f030bfb641 100644
--- a/.github/workflows/cicd-main.yml
+++ b/.github/workflows/cicd-main.yml
@@ -70,7 +70,7 @@ jobs:
       - name: Run linting
         run: |
           export PATH=".venv/bin:$PATH"
-          export BASE_REF="${{ fromJSON(steps.get-pr-info.outputs.pr-info).base.ref }}" 
+          export BASE_REF="${{ startsWith(github.ref, 'refs/heads/pull-request/') && fromJSON(steps.get-pr-info.outputs.pr-info).base.ref || 'HEAD~1' }}" 
           export CHECK_ONLY=true 
           export SKIP_DOCS=false 
           bash tools/autoformat.sh
@@ -87,6 +87,67 @@ jobs:
       - name: Running CI tests
         run: |
           echo "Running CI tests"
+          echo "is_merge_group: ${{ needs.pre-flight.outputs.is_merge_group }}"
+
+  cicd-container-build:
+    needs: [pre-flight, cicd-wait-in-queue]
+    runs-on: nvidia-ci-aws-gpu-x8
+    environment: nemo-ci
+    if: |
+      (
+        success() 
+        || needs.pre-flight.outputs.is_ci_workload == 'true'
+        || needs.pre-flight.outputs.force_run_all == 'true'
+      )
+      && needs.pre-flight.outputs.is_merge_group == 'false'
+      && !cancelled()
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Setup python
+        uses: actions/setup-python@v5
+        with:
+          python-version: 3.12
+
+      - name: Get PR info
+        id: get-pr-info
+        if: startsWith(github.ref, 'refs/heads/pull-request/')
+        uses: nv-gha-runners/get-pr-info@main
+
+      - name: Download test data
+        shell: bash
+        env:
+          GH_TOKEN: ${{ secrets.PAT }}
+        run: |
+          echo "::group::Download test data"
+          pip install --no-cache-dir pygithub click
+          python tests/test_utils/python_scripts/download_unit_tests_dataset.py --assets-dir ./assets
+          echo "::endgroup::"
+
+      - name: Pull cache
+        run: |
+          docker pull 766267172432.dkr.ecr.us-east-1.amazonaws.com/megatron-lm:main || true
+          docker pull 766267172432.dkr.ecr.us-east-1.amazonaws.com/megatron-lm:${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number }} || true
+
+      - name: Build and push
+        uses: docker/build-push-action@v5
+        with:
+          file: ./docker/Dockerfile.ci.dev
+          push: true
+          context: .
+          target: main
+          build-args: |
+            FROM_IMAGE_NAME=nvcr.io/nvidia/pytorch:25.09-py3
+          cache-from: |
+            766267172432.dkr.ecr.us-east-1.amazonaws.com/megatron-lm:${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number }}
+            766267172432.dkr.ecr.us-east-1.amazonaws.com/megatron-lm:main
+          no-cache: false
+          tags: |
+            766267172432.dkr.ecr.us-east-1.amazonaws.com/megatron-lm:${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number }}
+            766267172432.dkr.ecr.us-east-1.amazonaws.com/megatron-lm:${{ github.sha }}
+          secrets: |
+            GH_TOKEN=${{ secrets.PAT }}
 
   cicd-unit-tests-latest:
     strategy:
@@ -99,7 +160,10 @@ jobs:
           - bucket: "unit_tests/dist_checkpointing/models/"
           - bucket: "unit_tests/transformer/*.py"
           - bucket: "unit_tests/transformer/moe"
-    needs: [pre-flight, cicd-wait-in-queue]
+    needs:
+      - pre-flight
+      - cicd-wait-in-queue
+      # - cicd-container-build
     runs-on: nvidia-ci-aws-gpu-x8
     name: "${{ matrix.bucket }} - latest"
     environment: nemo-ci
@@ -109,7 +173,7 @@ jobs:
         || needs.pre-flight.outputs.is_ci_workload == 'true'
         || needs.pre-flight.outputs.force_run_all == 'true'
       )
-      && !needs.pre-flight.outputs.is_merge_group == 'true'
+      && needs.pre-flight.outputs.is_merge_group == 'false'
       && !cancelled()
     steps:
       - name: Checkout
@@ -122,24 +186,57 @@ jobs:
           timeout: ${{ matrix.timeout || 30 }}
           is_unit_test: "true"
           PAT: ${{ secrets.PAT }}
+          container-image: 766267172432.dkr.ecr.us-east-1.amazonaws.com/megatron-lm:1864 #${{ github.sha }}
+
+  cicd-parse-functional-tests:
+    runs-on: ubuntu-latest
+    needs:
+      - pre-flight
+      - cicd-wait-in-queue
+      # - cicd-container-build
+      # - cicd-unit-tests-latest
+    outputs:
+      functional-tests: ${{ steps.main.outputs.functional-tests }}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Parse functional tests
+        id: main
+        run: |
+          export PYTHONPATH=$(pwd)
+          python tests/test_utils/python_scripts/generate_jet_trigger_job.py \
+            --scope mr \
+            --n-repeat 5 \
+            --time-limit 2700 \
+            --test-cases all \
+            --container-image mcore_ci_dev \
+            --container-tag latest \
+            --dependent-job functional:configure \
+            --record-checkpoints false \
+            --slurm-account gh \
+            --no-enable-warmup \
+            --environment dev \
+            --platform dgx_h100 \
+            --enable-lightweight-mode \
+            --cluster ghci \
+            --output-path functional-tests.yaml
+
+          cat functional-tests.yaml | \
+            yq -o json 'del(.default, .stages, .workflow) | to_entries | map({"model": .value.stage, "test_case": .key})' | jq -c  > functional-tests.json
+
+          echo "functional-tests=$(cat functional-tests.json)" | tee -a "$GITHUB_OUTPUT"
 
   cicd-functional-tests-latest:
     strategy:
       fail-fast: false
       matrix:
-        include:
-          - model: "gpt"
-            test_case: "gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G"
-          - model: "gpt"
-            test_case: "gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G"
-          - model: "moe"
-            test_case: "gpt3_moe_mr_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer"
-          - model: "moe"
-            test_case: "gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed"
+        include: ${{ fromJson(needs.cicd-parse-functional-tests.outputs.functional-tests) }}
     needs:
       - pre-flight
       - cicd-wait-in-queue
-      - cicd-unit-tests-latest
+      - cicd-parse-functional-tests
+      # - cicd-unit-tests-latest
     runs-on: nvidia-ci-aws-gpu-x8
     name: "${{ matrix.model }}/${{ matrix.test_case }} - latest"
     environment: nemo-ci
@@ -149,7 +246,7 @@ jobs:
         || needs.pre-flight.outputs.is_ci_workload == 'true'
         || needs.pre-flight.outputs.force_run_all == 'true'
       )
-      && !needs.pre-flight.outputs.is_merge_group == 'true'
+      && needs.pre-flight.outputs.is_merge_group == 'false'
       && !cancelled()
     steps:
       - name: Checkout
@@ -163,6 +260,7 @@ jobs:
           timeout: ${{ matrix.timeout || 30 }}
           is_unit_test: "false"
           PAT: ${{ secrets.PAT }}
+          container-image: 766267172432.dkr.ecr.us-east-1.amazonaws.com/megatron-lm:1864 # ${{ github.sha }}
 
   Nemo_CICD_Test:
     needs:
@@ -243,7 +341,7 @@ jobs:
       && !cancelled()
     strategy:
       matrix:
-        flag: [unit-test, e2e]
+        flag: [unit-test]
     steps:
       - name: Checkout
         uses: actions/checkout@v4
diff --git a/pyproject.toml b/pyproject.toml
index 91d66de7efe..aaabab3875c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -86,7 +86,7 @@ dev = [
     "wget",
     "onnxscript",
     "flash-linear-attention~=0.3.2",
-    "emerging_optimizers"
+    "emerging_optimizers",
 ]
 
 lts = [
@@ -170,8 +170,8 @@ flash_mla = [
 ]
 transformer-engine = { git = "https://github.com/NVIDIA/TransformerEngine.git", rev = "release_v2.8" } # on `release_v2.8`
 
-emerging_optimizers = { git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git", rev= "fb1add873e7851ec34b48581ea1b15761b73d189"}
-
+emerging_optimizers = { git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git", rev = "fb1add873e7851ec34b48581ea1b15761b73d189" }
+nemo-run = { git = "https://github.com/NVIDIA-NeMo/Run.git", rev = "8ca8f7952a597f944985f1f1368a7acb9aa3a6c2" }
 [tool.isort]
 profile = "black"                                                          # black-compatible
 line_length = 100                                                          # should match black parameters
diff --git a/tests/functional_tests/shell_test_utils/run_ci_test.sh b/tests/functional_tests/shell_test_utils/run_ci_test.sh
index b24423773e5..75cb4e619e7 100644
--- a/tests/functional_tests/shell_test_utils/run_ci_test.sh
+++ b/tests/functional_tests/shell_test_utils/run_ci_test.sh
@@ -48,6 +48,8 @@ for mandatory_var in "${MANDATORY_VARS[@]}"; do
     fi
 done
 
+set -exo pipefail
+
 # Extract settings from params file
 TEST_TYPE=$(cat $TRAINING_PARAMS_PATH |
     /usr/local/bin/yq '.TEST_TYPE')
@@ -64,7 +66,7 @@ else
 fi
 
 mkdir -p $CHECKPOINT_SAVE_PATH
-mkdir -p $CHECKPOINT_LOAD_PATH
+mkdir -p $CHECKPOINT_LOAD_PATH || true
 _CHECKPOINT_LOAD_PATH=$CHECKPOINT_LOAD_PATH
 _CHECKPOINT_SAVE_PATH=$CHECKPOINT_SAVE_PATH
 
@@ -103,6 +105,10 @@ if [[ "$MODE" == "pretraining" && "$TEST_TYPE" != "release" ]]; then
         TRAIN_ITERS=$(cat $TRAINING_PARAMS_PATH |
             /usr/local/bin/yq '.MODEL_ARGS."--exit-interval" // "100"')
     fi
+elif [[ "$MODE" == "inference" && "$TEST_TYPE" != "release" ]]; then
+    if [[ "$ENABLE_LIGHTWEIGHT_MODE" == "true" && "$IS_NEMO_TEST" == "false" ]]; then
+        /usr/local/bin/yq -i '.ENV_VARS."SKIP_PYTEST" = 1' $TRAINING_PARAMS_PATH
+    fi
 fi
 
 if [[ "$MODE" == "pretraining" && "$TEST_TYPE" = "release" ]]; then
diff --git a/tests/functional_tests/shell_test_utils/start_interactive_job.sh b/tests/functional_tests/shell_test_utils/start_interactive_job.sh
index d3b6055e55b..0b30fc01283 100644
--- a/tests/functional_tests/shell_test_utils/start_interactive_job.sh
+++ b/tests/functional_tests/shell_test_utils/start_interactive_job.sh
@@ -78,56 +78,8 @@ if [ -z "$PARTITION" ] || [ -z "$SLURM_ACCOUNT" ] || [ -z "$IMAGE" ] || [ -z "$D
     exit 1
 fi
 
-# Check if recipes directory exists
-if [ ! -d "$RECIPES_DIR" ]; then
-    echo "Error: Recipes directory '$RECIPES_DIR' does not exist"
-    exit 1
-fi
-
-# Create copy of recipes with interpolated artifacts
-python -m tests.test_utils.python_scripts.common --recipes-dir $RECIPES_DIR --output-dir $RECIPES_DIR/interpolated
-
 # Add current directory to container mounts
-CONTAINER_MOUNTS="$(pwd):/opt/megatron-lm"
-
-# Process each YAML file in the recipes directory
-if [ ! -f "$YAML_FILE" ]; then
-    continue
-fi
-
-echo "Processing $(basename "$YAML_FILE")..."
-YAML_FILE=workflows.yaml
-# Extract artifacts from YAML file
-while IFS=: read -r value key; do
-    # Skip empty or malformed entries
-    if [ -z "$value" ] || [ -z "$key" ] || [ "$value" = "/data/" ] || [ "$key" = "/data/" ]; then
-        continue
-    fi
-
-    # Skip entries that don't start with a forward slash
-    if [[ ! "$key" =~ ^/ ]]; then
-        continue
-    fi
-
-    # Create the mount string
-    mount="${DATASET_DIR}/${value}:${key}"
-
-    # Skip if we've seen this mount before
-    if [ "${seen_mounts[$mount]}" = "1" ]; then
-        echo "Skipping duplicate mount: $mount"
-        continue
-    fi
-
-    # Mark this mount as seen
-    seen_mounts[$mount]=1
-
-    if [ -z "$CONTAINER_MOUNTS" ]; then
-        CONTAINER_MOUNTS="$mount"
-    else
-        CONTAINER_MOUNTS="${CONTAINER_MOUNTS},$mount"
-    fi
-done < <(yq eval '.[].spec.artifacts | to_entries | .[] | "\(.value):\(.key)"' "$YAML_FILE")
-rm $YAML_FILE
+CONTAINER_MOUNTS="$DATASET_DIR:/mnt/artifacts,$(pwd):/opt/megatron-lm"
 
 # Build the final srun command
 SRUN_CMD="srun \
diff --git a/tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp2/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp2/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp2/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp2/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp2/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp2/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp2/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp2/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp2/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp2/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp2/model_config.yaml b/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp2/model_config.yaml
similarity index 88%
rename from tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp2/model_config.yaml
rename to tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp2/model_config.yaml
index 7ccfd215dcc..ede505eb2f4 100644
--- a/tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp2/model_config.yaml
+++ b/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp2/model_config.yaml
@@ -22,8 +22,8 @@ MODEL_ARGS:
   --lr-decay-iters: 990000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-bert_00_text_sentence
-  --vocab-file: ${DATA_PATH}/vocab.txt
+  --data-path: ${DATA_PATH}/text/the_pile/bert_shard00/my-bert_00_text_sentence
+  --vocab-file: ${DATA_PATH}/text/the_pile/bert_shard00/vocab.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.0001
diff --git a/tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp4_vp2/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp4_vp2/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp4_vp2/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp4_vp2/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp4_vp2/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp4_vp2/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp4_vp2/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp4_vp2/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp4_vp2/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp4_vp2/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp4_vp2/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp4_vp2/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp4_vp2/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp4_vp2/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp4_vp2/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp4_vp2/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp4_vp2/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp4_vp2/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp4_vp2/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp4_vp2/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp4_vp2/model_config.yaml b/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp4_vp2/model_config.yaml
similarity index 89%
rename from tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp4_vp2/model_config.yaml
rename to tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp4_vp2/model_config.yaml
index b4c5decf82e..e606d04a88c 100644
--- a/tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp4_vp2/model_config.yaml
+++ b/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp4_vp2/model_config.yaml
@@ -22,8 +22,8 @@ MODEL_ARGS:
   --lr-decay-iters: 990000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-bert_00_text_sentence
-  --vocab-file: ${DATA_PATH}/vocab.txt
+  --data-path: ${DATA_PATH}/text/the_pile/bert_shard00/my-bert_00_text_sentence
+  --vocab-file: ${DATA_PATH}/text/the_pile/bert_shard00/vocab.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.0001
diff --git a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2/model_config.yaml
similarity index 88%
rename from tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2/model_config.yaml
index 11909062fb8..e7bb67a9ed8 100644
--- a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2/model_config.yaml
@@ -22,8 +22,8 @@ MODEL_ARGS:
   --lr-decay-iters: 990000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-bert_00_text_sentence
-  --vocab-file: ${DATA_PATH}/vocab.txt
+  --data-path: ${DATA_PATH}/text/the_pile/bert_shard00/my-bert_00_text_sentence
+  --vocab-file: ${DATA_PATH}/text/the_pile/bert_shard00/vocab.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.0001
diff --git a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_frozen_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_frozen_resume_torch_dist/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_frozen_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_frozen_resume_torch_dist/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_frozen_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_frozen_resume_torch_dist/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_frozen_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_frozen_resume_torch_dist/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_frozen_resume_torch_dist_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_frozen_resume_torch_dist/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_frozen_resume_torch_dist_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_frozen_resume_torch_dist/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_frozen_resume_torch_dist_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_frozen_resume_torch_dist/model_config.yaml
similarity index 89%
rename from tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_frozen_resume_torch_dist_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_frozen_resume_torch_dist/model_config.yaml
index 09864ee106a..6f38457cdd0 100644
--- a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_frozen_resume_torch_dist_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_frozen_resume_torch_dist/model_config.yaml
@@ -22,8 +22,8 @@ MODEL_ARGS:
   --lr-decay-iters: 990000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-bert_00_text_sentence
-  --vocab-file: ${DATA_PATH}/vocab.txt
+  --data-path: ${DATA_PATH}/text/the_pile/bert_shard00/my-bert_00_text_sentence
+  --vocab-file: ${DATA_PATH}/text/the_pile/bert_shard00/vocab.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.0001
diff --git a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_local_spec_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_local_spec/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_local_spec_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_local_spec/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_local_spec_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_local_spec/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_local_spec_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_local_spec/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_local_spec_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_local_spec/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_local_spec_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_local_spec/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_local_spec_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_local_spec/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_local_spec_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_local_spec/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_local_spec_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_local_spec/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_local_spec_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_local_spec/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_local_spec_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_local_spec/model_config.yaml
similarity index 88%
rename from tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_local_spec_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_local_spec/model_config.yaml
index 7eeac331ad3..def6878c889 100644
--- a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_local_spec_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_local_spec/model_config.yaml
@@ -22,8 +22,8 @@ MODEL_ARGS:
   --lr-decay-iters: 990000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-bert_00_text_sentence
-  --vocab-file: ${DATA_PATH}/vocab.txt
+  --data-path: ${DATA_PATH}/text/the_pile/bert_shard00/my-bert_00_text_sentence
+  --vocab-file: ${DATA_PATH}/text/the_pile/bert_shard00/vocab.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.0001
diff --git a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist/model_config.yaml
similarity index 84%
rename from tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist/model_config.yaml
index 94d9cbfd83f..8b993bfaec3 100644
--- a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist/model_config.yaml
@@ -22,8 +22,8 @@ MODEL_ARGS:
   --lr-decay-iters: 990000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-bert_00_text_sentence
-  --vocab-file: ${DATA_PATH}/vocab.txt
+  --data-path: ${DATA_PATH}/text/the_pile/bert_shard00/my-bert_00_text_sentence
+  --vocab-file: ${DATA_PATH}/text/the_pile/bert_shard00/vocab.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.0001
@@ -42,6 +42,6 @@ MODEL_ARGS:
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --ckpt-format: torch
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --attention-backend: unfused
 TEST_TYPE: ckpt-resume
diff --git a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_local_spec_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist_local_spec/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_local_spec_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist_local_spec/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_local_spec_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist_local_spec/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_local_spec_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist_local_spec/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_local_spec_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist_local_spec/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_local_spec_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist_local_spec/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_local_spec_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist_local_spec/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_local_spec_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist_local_spec/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_local_spec_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist_local_spec/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_local_spec_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist_local_spec/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_local_spec_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist_local_spec/model_config.yaml
similarity index 84%
rename from tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_local_spec_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist_local_spec/model_config.yaml
index c496f84f196..05a3d0730c8 100644
--- a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_local_spec_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist_local_spec/model_config.yaml
@@ -22,8 +22,8 @@ MODEL_ARGS:
   --lr-decay-iters: 990000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-bert_00_text_sentence
-  --vocab-file: ${DATA_PATH}/vocab.txt
+  --data-path: ${DATA_PATH}/text/the_pile/bert_shard00/my-bert_00_text_sentence
+  --vocab-file: ${DATA_PATH}/text/the_pile/bert_shard00/vocab.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.0001
@@ -40,7 +40,7 @@ MODEL_ARGS:
   --use-checkpoint-args: true
   --use-checkpoint-opt_param-scheduler: true
   --no-gradient-accumulation-fusion: true
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --ckpt-format: torch
diff --git a/tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp4_pp1/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp4_pp1/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp4_pp1/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/bert/bert_mcore_tp4_pp1/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp4_pp1/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp4_pp1/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp4_pp1/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/bert/bert_mcore_tp4_pp1/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp4_pp1/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp4_pp1/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp4_pp1/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/bert/bert_mcore_tp4_pp1/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp4_pp1/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp4_pp1/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp4_pp1/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/bert/bert_mcore_tp4_pp1/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp4_pp1/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp4_pp1/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp4_pp1/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/bert/bert_mcore_tp4_pp1/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp4_pp1/model_config.yaml b/tests/functional_tests/test_cases/bert/bert_mcore_tp4_pp1/model_config.yaml
similarity index 88%
rename from tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp4_pp1/model_config.yaml
rename to tests/functional_tests/test_cases/bert/bert_mcore_tp4_pp1/model_config.yaml
index 59607ba28d4..777be078e4d 100644
--- a/tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp4_pp1/model_config.yaml
+++ b/tests/functional_tests/test_cases/bert/bert_mcore_tp4_pp1/model_config.yaml
@@ -22,8 +22,8 @@ MODEL_ARGS:
   --lr-decay-iters: 990000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-bert_00_text_sentence
-  --vocab-file: ${DATA_PATH}/vocab.txt
+  --data-path: ${DATA_PATH}/text/the_pile/bert_shard00/my-bert_00_text_sentence
+  --vocab-file: ${DATA_PATH}/text/the_pile/bert_shard00/vocab.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.0001
diff --git a/tests/functional_tests/test_cases/bert/bert_release/model_config.yaml b/tests/functional_tests/test_cases/bert/bert_release/model_config.yaml
index c4b80767c63..68cbb230996 100644
--- a/tests/functional_tests/test_cases/bert/bert_release/model_config.yaml
+++ b/tests/functional_tests/test_cases/bert/bert_release/model_config.yaml
@@ -27,7 +27,7 @@ MODEL_ARGS:
   --pipeline-model-parallel-size: 8
   # Data args
   --data-path: ${DATA_BLEND}
-  --vocab-file: ${DATA_PATH}/vocab.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/bert_shard00/vocab.txt
   --split: 949,50,1
   --data-cache-path: ${DATA_CACHE_PATH}
   # EVAL_AND_LOGGING_ARGS
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml
index c2d14870924..208827c9aea 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -46,7 +46,7 @@ MODEL_ARGS:
   --no-gradient-accumulation-fusion: true
   --use-mcore-models: true
   --ckpt-format: torch_dist
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --apply-query-key-layer-scaling: true
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_fsdp2_resume_torch_dist_te/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_fsdp2_resume_torch_dist_te/model_config.yaml
index 3b8c3563f41..15fbeb4f986 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_fsdp2_resume_torch_dist_te/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_fsdp2_resume_torch_dist_te/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -47,7 +47,7 @@ MODEL_ARGS:
   --use-mcore-models: true
   --ckpt-format: torch_dist
   --dist-ckpt-optim-fully-reshardable: true
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --fp16: true
   --apply-query-key-layer-scaling: true
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml
index 4c7132e2d1c..573cddceff0 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -48,7 +48,7 @@ MODEL_ARGS:
   --use-mcore-models: true
   --ckpt-format: torch_dist
   --dist-ckpt-optim-fully-reshardable: true
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --fp16: true
   --apply-query-key-layer-scaling: true
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2/model_config.yaml
index 0be73f09e67..f897d2b9a8e 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -44,7 +44,7 @@ MODEL_ARGS:
   --use-mcore-models: true
   --ckpt-format: torch_dist
   --dist-ckpt-optim-fully-reshardable: true
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --apply-query-key-layer-scaling: true
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_fp16/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_fp16/model_config.yaml
index eac35eeb2ab..7345237d672 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_fp16/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_fp16/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -43,7 +43,7 @@ MODEL_ARGS:
   --no-gradient-accumulation-fusion: true
   --use-mcore-models: true
   --ckpt-format: torch_dist
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --fp16: true
   --apply-query-key-layer-scaling: true
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_resume_torch_dist/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_resume_torch_dist/model_config.yaml
index d5960cff7ac..e15844bafb7 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_resume_torch_dist/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_resume_torch_dist/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -44,7 +44,7 @@ MODEL_ARGS:
   --use-checkpoint-opt_param-scheduler: true
   --use-mcore-models: true
   --ckpt-format: torch_dist
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --fp16: true
   --apply-query-key-layer-scaling: true
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4/model_config.yaml
index ee577dda37a..c7dfcfe48e3 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -44,7 +44,7 @@ MODEL_ARGS:
   --use-mcore-models: true
   --ckpt-format: torch_dist
   --dist-ckpt-optim-fully-reshardable: true
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --apply-query-key-layer-scaling: true
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_resume_torch_dist/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_resume_torch_dist/model_config.yaml
index 60bf33c7e78..e829340190e 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_resume_torch_dist/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_resume_torch_dist/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -45,7 +45,7 @@ MODEL_ARGS:
   --use-mcore-models: true
   --ckpt-format: torch_dist
   --dist-ckpt-optim-fully-reshardable: true
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --fp16: true
   --apply-query-key-layer-scaling: true
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1/model_config.yaml
index 33da65bd2b7..863cf9cac25 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -43,7 +43,7 @@ MODEL_ARGS:
   --no-gradient-accumulation-fusion: true
   --use-mcore-models: true
   --ckpt-format: torch_dist
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --apply-query-key-layer-scaling: true
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch/model_config.yaml
index b57638bcd80..fcb9fa2884f 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist/model_config.yaml
index 6070ad5e039..0e32dbd913a 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -44,7 +44,7 @@ MODEL_ARGS:
   --use-checkpoint-opt_param-scheduler: true
   --use-mcore-models: true
   --ckpt-format: torch_dist
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --fp16: true
   --apply-query-key-layer-scaling: true
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_b200_1N8G_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_b200_1N8G_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/model_config.yaml
index 387f03d450d..246fb33da57 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_b200_1N8G_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_b200_1N8G_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_b200_1N8G_mcore_tp4_cp2_current_scaling_native_fp8_tp_fsdp/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_b200_1N8G_mcore_tp4_cp2_current_scaling_native_fp8_tp_fsdp/model_config.yaml
index 967567958f0..196492f1ec7 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_b200_1N8G_mcore_tp4_cp2_current_scaling_native_fp8_tp_fsdp/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_b200_1N8G_mcore_tp4_cp2_current_scaling_native_fp8_tp_fsdp/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_b200_1N8G_mcore_tp4_cp2_current_scaling_native_fp8_tp_sp_cp_tp_overlap/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_b200_1N8G_mcore_tp4_cp2_current_scaling_native_fp8_tp_sp_cp_tp_overlap/model_config.yaml
index 1b5de4373f6..665388ce7a1 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_b200_1N8G_mcore_tp4_cp2_current_scaling_native_fp8_tp_sp_cp_tp_overlap/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_b200_1N8G_mcore_tp4_cp2_current_scaling_native_fp8_tp_sp_cp_tp_overlap/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -49,7 +49,7 @@ MODEL_ARGS:
   --attention-softmax-in-fp32: true
   --ckpt-format: torch_dist
   --dist-ckpt-optim-fully-reshardable: true
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --log-memory-to-tensorboard: true
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_b200_1N8G_mcore_tp4_cp2_mxfp8_tp_sp_cp/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_b200_1N8G_mcore_tp4_cp2_mxfp8_tp_sp_cp/model_config.yaml
index ccff1cf44fd..f4cbb87d27d 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_b200_1N8G_mcore_tp4_cp2_mxfp8_tp_sp_cp/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_b200_1N8G_mcore_tp4_cp2_mxfp8_tp_sp_cp/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_b200_1N8G_mcore_tp4_cp2_native_fp8_tp_sp_cp_tp_overlap/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_b200_1N8G_mcore_tp4_cp2_native_fp8_tp_sp_cp_tp_overlap/model_config.yaml
index 7fe999b2a6a..80218da886d 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_b200_1N8G_mcore_tp4_cp2_native_fp8_tp_sp_cp_tp_overlap/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_b200_1N8G_mcore_tp4_cp2_native_fp8_tp_sp_cp_tp_overlap/model_config.yaml
@@ -20,7 +20,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -49,7 +49,7 @@ MODEL_ARGS:
   --attention-softmax-in-fp32: true
   --ckpt-format: torch_dist
   --dist-ckpt-save-pre-mcore-014: true
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --log-memory-to-tensorboard: true
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_nondet_tp1_pp1_fp8_no_model_parallel/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_nondet_tp1_pp1_fp8_no_model_parallel/model_config.yaml
index 0e243b61138..96b4a6c0ccc 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_nondet_tp1_pp1_fp8_no_model_parallel/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_nondet_tp1_pp1_fp8_no_model_parallel/model_config.yaml
@@ -19,7 +19,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -44,7 +44,7 @@ MODEL_ARGS:
   --attention-softmax-in-fp32: true
   --ckpt-format: torch_dist
   --dist-ckpt-optim-fully-reshardable: true
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --log-memory-to-tensorboard: true
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp1_pp1_bf16_baseline/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp1_pp1_bf16_baseline/model_config.yaml
index 453c506742b..c46be1c819b 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp1_pp1_bf16_baseline/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp1_pp1_bf16_baseline/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -42,7 +42,7 @@ MODEL_ARGS:
   --deterministic-mode: true
   --no-gradient-accumulation-fusion: true
   --ckpt-format: torch_dist
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --fp16: true
   --apply-query-key-layer-scaling: true
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp1_pp1_fp8_no_model_parallel/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp1_pp1_fp8_no_model_parallel/model_config.yaml
index 8211c7f40f6..c151135828d 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp1_pp1_fp8_no_model_parallel/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp1_pp1_fp8_no_model_parallel/model_config.yaml
@@ -22,7 +22,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -47,7 +47,7 @@ MODEL_ARGS:
   --fp8-amax-compute-algo: max
   --attention-softmax-in-fp32: true
   --ckpt-format: torch_dist
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --log-memory-to-tensorboard: true
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp1_pp2_fp8_pp/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp1_pp2_fp8_pp/model_config.yaml
index cf4fe01721c..40dea9779c9 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp1_pp2_fp8_pp/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp1_pp2_fp8_pp/model_config.yaml
@@ -22,7 +22,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -48,7 +48,7 @@ MODEL_ARGS:
   --attention-softmax-in-fp32: true
   --ckpt-format: torch_dist
   --dist-ckpt-optim-fully-reshardable: true
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --log-memory-to-tensorboard: true
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/model_config.yaml
index 51475b1a653..fb47009a77d 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/model_config.yaml
@@ -22,7 +22,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_fp8_tp_pp/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_fp8_tp_pp/model_config.yaml
index 02db21e9477..32dd88dfb72 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_fp8_tp_pp/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_fp8_tp_pp/model_config.yaml
@@ -22,7 +22,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -48,7 +48,7 @@ MODEL_ARGS:
   --attention-softmax-in-fp32: true
   --ckpt-format: torch_dist
   --dist-ckpt-save-pre-mcore-014: true
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --log-memory-to-tensorboard: true
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_fp8_tp_pp_sp/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_fp8_tp_pp_sp/model_config.yaml
index 3f650edfa8a..21c6ac25e83 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_fp8_tp_pp_sp/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_fp8_tp_pp_sp/model_config.yaml
@@ -22,7 +22,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -48,7 +48,7 @@ MODEL_ARGS:
   --fp8-amax-compute-algo: max
   --attention-softmax-in-fp32: true
   --ckpt-format: torch_dist
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --log-memory-to-tensorboard: true
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_native_fp8_tp_pp_sp/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_native_fp8_tp_pp_sp/model_config.yaml
index 95e4fd5b48e..59707f588c0 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_native_fp8_tp_pp_sp/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_native_fp8_tp_pp_sp/model_config.yaml
@@ -22,7 +22,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -51,7 +51,7 @@ MODEL_ARGS:
   --attention-softmax-in-fp32: true
   --ckpt-format: torch_dist
   --dist-ckpt-optim-fully-reshardable: true
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --log-memory-to-tensorboard: true
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp4_cp2_current_scaling_native_fp8_tp_fsdp/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp4_cp2_current_scaling_native_fp8_tp_fsdp/model_config.yaml
index a38d289752f..0e62673a628 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp4_cp2_current_scaling_native_fp8_tp_fsdp/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp4_cp2_current_scaling_native_fp8_tp_fsdp/model_config.yaml
@@ -20,7 +20,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp4_cp2_current_scaling_native_fp8_tp_sp_cp_tp_overlap/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp4_cp2_current_scaling_native_fp8_tp_sp_cp_tp_overlap/model_config.yaml
index bbbcf96b674..4361bf233cd 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp4_cp2_current_scaling_native_fp8_tp_sp_cp_tp_overlap/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp4_cp2_current_scaling_native_fp8_tp_sp_cp_tp_overlap/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -49,7 +49,7 @@ MODEL_ARGS:
   --attention-softmax-in-fp32: true
   --ckpt-format: torch_dist
   --dist-ckpt-optim-fully-reshardable: true
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --log-memory-to-tensorboard: true
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp4_cp2_native_fp8_tp_sp_cp_tp_overlap/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp4_cp2_native_fp8_tp_sp_cp_tp_overlap/model_config.yaml
index 01736c68999..ed56bc7cfad 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp4_cp2_native_fp8_tp_sp_cp_tp_overlap/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp4_cp2_native_fp8_tp_sp_cp_tp_overlap/model_config.yaml
@@ -22,7 +22,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp4_pp2_fp8_tp_pp/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp4_pp2_fp8_tp_pp/model_config.yaml
index 9bd15f98877..fe4a6575953 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp4_pp2_fp8_tp_pp/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp4_pp2_fp8_tp_pp/model_config.yaml
@@ -22,7 +22,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -47,7 +47,7 @@ MODEL_ARGS:
   --fp8-amax-compute-algo: max
   --attention-softmax-in-fp32: true
   --ckpt-format: torch_dist
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --log-memory-to-tensorboard: true
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_7b_mr_dgx_a100_1N8G_tp1_pp4_memory_speed/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_7b_mr_dgx_a100_1N8G_tp1_pp4_memory_speed/model_config.yaml
index 48cf5e1cfac..c2a26a070fb 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_7b_mr_dgx_a100_1N8G_tp1_pp4_memory_speed/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_7b_mr_dgx_a100_1N8G_tp1_pp4_memory_speed/model_config.yaml
@@ -58,6 +58,7 @@ MODEL_ARGS:
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --log-memory-to-tensorboard: true
+  --exit-interval: 25
 TEST_TYPE: regular
 METRICS:
   - "iteration-time"
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_disable/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_disable/model_config.yaml
index 9b641b68d75..14d585d84a7 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_disable/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_disable/model_config.yaml
@@ -63,7 +63,7 @@ MODEL_ARGS:
   --load: ${CHECKPOINT_LOAD_PATH}
   # data settings
   --data-cache-path: ${DATA_CACHE_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   # logging settings
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_enable/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_enable/model_config.yaml
index d18a37d7823..df91f9a95eb 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_enable/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_enable/model_config.yaml
@@ -62,7 +62,7 @@ MODEL_ARGS:
   --load: ${CHECKPOINT_LOAD_PATH}
   # data settings
   --data-cache-path: ${DATA_CACHE_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   # logging settings
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_persistent_1/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_persistent_1/model_config.yaml
index 3258e398b1e..849df09f27f 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_persistent_1/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_persistent_1/model_config.yaml
@@ -63,7 +63,7 @@ MODEL_ARGS:
   --load: ${CHECKPOINT_LOAD_PATH}
   # data settings
   --data-cache-path: ${DATA_CACHE_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   # logging settings
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_persistent_2/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_persistent_2/model_config.yaml
index 5fd21f6175a..3316142031f 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_persistent_2/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_persistent_2/model_config.yaml
@@ -62,7 +62,7 @@ MODEL_ARGS:
   --load: ${CHECKPOINT_LOAD_PATH}
   # data settings
   --data-cache-path: ${DATA_CACHE_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   # logging settings
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_reshard/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_reshard/model_config.yaml
index 65bdc723480..4b8d6a47b9c 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_reshard/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_reshard/model_config.yaml
@@ -63,7 +63,7 @@ MODEL_ARGS:
   --load: ${CHECKPOINT_LOAD_PATH}
   # data settings
   --data-cache-path: ${DATA_CACHE_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   # logging settings
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_resume/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_resume/model_config.yaml
index fd313d7a959..43937abe664 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_resume/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_resume/model_config.yaml
@@ -62,7 +62,7 @@ MODEL_ARGS:
   --load: ${CHECKPOINT_LOAD_PATH}
   # data settings
   --data-cache-path: ${DATA_CACHE_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   # logging settings
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_resume_check_grads/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_resume_check_grads/model_config.yaml
index 476d0e08cf1..e9c35d0e86d 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_resume_check_grads/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_resume_check_grads/model_config.yaml
@@ -59,7 +59,6 @@ BASE_MODEL_ARGS: &BASE_MODEL_ARGS
   --num-query-groups: 8
   --seq-length: 512
   --kv-channels: 128
-  --ffn-hidden-size: 8192
   --group-query-attention: true
   --normalization: RMSNorm
   --swiglu: true
@@ -90,7 +89,7 @@ BASE_MODEL_ARGS: &BASE_MODEL_ARGS
   --load: ${CHECKPOINT_LOAD_PATH}
   # data settings
   --data-cache-path: ${DATA_CACHE_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   # logging settings
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_transient/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_transient/model_config.yaml
index 48d188d81c7..5021a029d3b 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_transient/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_transient/model_config.yaml
@@ -64,7 +64,7 @@ MODEL_ARGS:
   --exit-interval: 4
   # data settings
   --data-cache-path: ${DATA_CACHE_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   # logging settings
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/model_config.yaml
index fd43e992119..8031bf55d8d 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -46,7 +46,7 @@ MODEL_ARGS:
   --attention-softmax-in-fp32: true
   --use-mcore-models: true
   --ckpt-format: torch_dist
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --attention-backend: unfused
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_frozen_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_frozen_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/model_config.yaml
index 1e11b3ff94a..5ed4553ad1d 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_frozen_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_frozen_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/model_config.yaml
index efe469636e9..6eac7d0da72 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -48,7 +48,7 @@ MODEL_ARGS:
   --use-checkpoint-opt_param-scheduler: true
   --use-mcore-models: true
   --ckpt-format: torch_dist
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --log-memory-to-tensorboard: true
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/model_config.yaml
index a0785630f36..750986482c7 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -48,7 +48,7 @@ MODEL_ARGS:
   --use-mcore-models: true
   --ckpt-format: torch_dist
   --dist-ckpt-optim-fully-reshardable: true
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --log-memory-to-tensorboard: true
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G/model_config.yaml
index ff347789ff1..f34c980d821 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -49,7 +49,7 @@ MODEL_ARGS:
   --use-mcore-models: true
   --ckpt-format: torch_dist
   --dist-ckpt-save-pre-mcore-014: true
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --log-memory-to-tensorboard: true
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_uniform_full_recompute_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_uniform_full_recompute_dgx_a100_1N8G/model_config.yaml
index e09ac1ce49e..7c880daf577 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_uniform_full_recompute_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_uniform_full_recompute_dgx_a100_1N8G/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -48,7 +48,7 @@ MODEL_ARGS:
   --use-mcore-models: true
   --ckpt-format: torch_dist
   --dist-ckpt-optim-fully-reshardable: true
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --attention-backend: unfused
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_cp4_a2a_p2p_nondeterministic_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_cp4_a2a_p2p_nondeterministic_dgx_a100_1N8G/model_config.yaml
index af2f93042ea..7f0958f94f2 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_cp4_a2a_p2p_nondeterministic_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_cp4_a2a_p2p_nondeterministic_dgx_a100_1N8G/model_config.yaml
@@ -19,7 +19,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -48,7 +48,7 @@ MODEL_ARGS:
   --use-mcore-models: true
   --ckpt-format: torch_dist
   --dist-ckpt-save-pre-mcore-014: true
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --attention-backend: flash
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_cp4_a2a_p2p_nondeterministic_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_cp4_a2a_p2p_nondeterministic_dgx_a100_1N8G/model_config.yaml
index 3f6379f90ff..7271fe996d6 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_cp4_a2a_p2p_nondeterministic_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_cp4_a2a_p2p_nondeterministic_dgx_a100_1N8G/model_config.yaml
@@ -19,7 +19,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_dgx_a100_1N8G/model_config.yaml
index c49288bf939..7c5a764ccb9 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_dgx_a100_1N8G/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -47,7 +47,7 @@ MODEL_ARGS:
   --use-checkpoint-opt_param-scheduler: true
   --use-mcore-models: true
   --ckpt-format: torch_dist
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --log-memory-to-tensorboard: true
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/model_config.yaml
index ef2d6010e6f..2491fd02e96 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -49,7 +49,7 @@ MODEL_ARGS:
   --use-mcore-models: true
   --ckpt-format: torch_dist
   --dist-ckpt-save-pre-mcore-014: true
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --log-memory-to-tensorboard: true
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_rope_embeddings_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_rope_embeddings_dgx_a100_1N8G/model_config.yaml
index 4f3560b8c35..58d4628f72d 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_rope_embeddings_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_rope_embeddings_dgx_a100_1N8G/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -47,7 +47,7 @@ MODEL_ARGS:
   --use-mcore-models: true
   --ckpt-format: torch_dist
   --dist-ckpt-save-pre-mcore-014: true
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --attention-backend: unfused
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/model_config.yaml
index cb4e11e3d3c..5fcf15a2c3e 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -48,7 +48,7 @@ MODEL_ARGS:
   --use-mcore-models: true
   --ckpt-format: torch_dist
   --dist-ckpt-optim-fully-reshardable: true
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --attention-backend: unfused
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_disable_bias_linear_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_disable_bias_linear_dgx_a100_1N8G/model_config.yaml
index 388afdaed4a..6b66183c1dc 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_disable_bias_linear_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_disable_bias_linear_dgx_a100_1N8G/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -46,7 +46,7 @@ MODEL_ARGS:
   --attention-softmax-in-fp32: true
   --use-mcore-models: true
   --ckpt-format: torch_dist
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --attention-backend: unfused
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_frozen_resume_torch_dist_swiglu_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_frozen_resume_torch_dist_swiglu_dgx_a100_1N8G/model_config.yaml
index 4defebeac39..089fd7808ff 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_frozen_resume_torch_dist_swiglu_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_frozen_resume_torch_dist_swiglu_dgx_a100_1N8G/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_persistent_ckpt_disable_bias_linear_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_persistent_ckpt_disable_bias_linear_dgx_a100_1N8G/model_config.yaml
index 47ec5c2bddf..3d8843214a3 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_persistent_ckpt_disable_bias_linear_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_persistent_ckpt_disable_bias_linear_dgx_a100_1N8G/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear_dgx_a100_1N8G/model_config.yaml
index 89ff19ad1e8..4dc43353c9f 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear_dgx_a100_1N8G/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -47,7 +47,7 @@ MODEL_ARGS:
   --use-checkpoint-opt_param-scheduler: true
   --use-mcore-models: true
   --ckpt-format: torch_dist
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --log-memory-to-tensorboard: true
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear_dgx_a100_1N8G/model_config.yaml
index 58554cc1121..7133af75b8f 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear_dgx_a100_1N8G/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_sequence_parallel_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_sequence_parallel_dgx_a100_1N8G/model_config.yaml
index a63a24f6aa0..1e29b79848b 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_sequence_parallel_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_sequence_parallel_dgx_a100_1N8G/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -47,7 +47,7 @@ MODEL_ARGS:
   --use-checkpoint-opt_param-scheduler: true
   --use-mcore-models: true
   --ckpt-format: torch_dist
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --log-memory-to-tensorboard: true
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_swiglu_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_swiglu_dgx_a100_1N8G/model_config.yaml
index 7281f21ce90..27d8203d307 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_swiglu_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_swiglu_dgx_a100_1N8G/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -48,7 +48,7 @@ MODEL_ARGS:
   --use-checkpoint-opt_param-scheduler: true
   --use-mcore-models: true
   --ckpt-format: torch_dist
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --log-memory-to-tensorboard: true
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G/model_config.yaml
index b6527f0f7c7..bc0da950ac8 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -46,7 +46,7 @@ MODEL_ARGS:
   --use-checkpoint-opt_param-scheduler: true
   --use-mcore-models: true
   --ckpt-format: torch_dist
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --log-memory-to-tensorboard: true
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_sequence_parallel_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_sequence_parallel_dgx_a100_1N8G/model_config.yaml
index f7822d5c5dc..962e08d5e73 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_sequence_parallel_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_sequence_parallel_dgx_a100_1N8G/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -47,7 +47,7 @@ MODEL_ARGS:
   --use-mcore-models: true
   --ckpt-format: torch_dist
   --dist-ckpt-optim-fully-reshardable: true
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --attention-backend: unfused
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_swiglu_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_swiglu_dgx_a100_1N8G/model_config.yaml
index d4fb79b2bea..8942fa94b55 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_swiglu_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_swiglu_dgx_a100_1N8G/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -48,7 +48,7 @@ MODEL_ARGS:
   --use-mcore-models: true
   --ckpt-format: torch_dist
   --dist-ckpt-save-pre-mcore-014: true
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --attention-backend: unfused
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_untie_embeddings_and_outputs_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_untie_embeddings_and_outputs_dgx_a100_1N8G/model_config.yaml
index ac8332843f7..7f6ae92394d 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_untie_embeddings_and_outputs_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_untie_embeddings_and_outputs_dgx_a100_1N8G/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -45,7 +45,7 @@ MODEL_ARGS:
   --attention-softmax-in-fp32: true
   --use-mcore-models: true
   --ckpt-format: torch_dist
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --attention-backend: unfused
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_calculate_per_token_loss_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_calculate_per_token_loss_dgx_a100_1N8G/model_config.yaml
index 2a13801a9d1..65ea19f9bd8 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_calculate_per_token_loss_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_calculate_per_token_loss_dgx_a100_1N8G/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -47,7 +47,7 @@ MODEL_ARGS:
   --use-mcore-models: true
   --ckpt-format: torch_dist
   --dist-ckpt-save-pre-mcore-014: true
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --attention-backend: unfused
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_decoupled_lr_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_decoupled_lr_dgx_a100_1N8G/model_config.yaml
index 4a235266b14..99a04b44fe3 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_decoupled_lr_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_decoupled_lr_dgx_a100_1N8G/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G/model_config.yaml
index 3dece98a527..aa041fec6de 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G/model_config.yaml
@@ -22,7 +22,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/model_config.yaml
index fbb85c1a7d2..a1150d0db09 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -47,7 +47,7 @@ MODEL_ARGS:
   --attention-softmax-in-fp32: true
   --use-mcore-models: true
   --ckpt-format: torch_dist
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --attention-backend: unfused
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/model_config.yaml
index b0fd77bb767..907c86da3b1 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -52,7 +52,7 @@ MODEL_ARGS:
   --use-mcore-models: true
   --ckpt-format: torch_dist
   --dist-ckpt-optim-fully-reshardable: true
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --attention-backend: unfused
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G/model_config.yaml
index 170c1397ba1..503e702c4f5 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G/model_config.yaml
index 9473172d43a..c8d15bbf005 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -49,7 +49,7 @@ MODEL_ARGS:
   --use-mcore-models: true
   --ckpt-format: torch_dist
   --dist-ckpt-save-pre-mcore-014: true
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --attention-backend: unfused
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G/model_config.yaml
index e64e70ae046..8db3c6529df 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G/model_config.yaml
@@ -22,7 +22,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G/model_config.yaml
index e28ce4aea78..243a52e84bd 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -48,7 +48,7 @@ MODEL_ARGS:
   --use-mcore-models: true
   --ckpt-format: torch_dist
   --dist-ckpt-optim-fully-reshardable: true
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --log-memory-to-tensorboard: true
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dgx_a100_1N8G/model_config.yaml
index efbe0f3d7cc..699ca43cc7b 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dgx_a100_1N8G/model_config.yaml
@@ -22,7 +22,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/model_config.yaml
index 835e017ccce..b3a950dcb5e 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -49,7 +49,7 @@ MODEL_ARGS:
   --use-mcore-models: true
   --ckpt-format: torch_dist
   --dist-ckpt-save-pre-mcore-014: true
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --log-memory-to-tensorboard: true
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/model_config.yaml
index f9b74000068..0e71ea6c268 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/model_config.yaml
@@ -22,7 +22,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G/model_config.yaml
index 13a92a6133d..6aa5a991e90 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G/model_config.yaml
@@ -22,7 +22,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap_dgx_a100_1N8G/model_config.yaml
index 89d3d84146e..4907dfb7f4c 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap_dgx_a100_1N8G/model_config.yaml
@@ -22,7 +22,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G/model_config.yaml
index 4fba5fca3a8..b894bf3bd20 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G/model_config.yaml
@@ -22,7 +22,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G/model_config.yaml
index 9e8d9b87466..cfdbe747764 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -45,7 +45,7 @@ MODEL_ARGS:
   --attention-softmax-in-fp32: true
   --use-mcore-models: true
   --ckpt-format: torch_dist
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --decoder-first-pipeline-num-layers: 2
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split_dgx_a100_1N8G/model_config.yaml
index dd5d83e0603..f9f58db94f9 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split_dgx_a100_1N8G/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -47,7 +47,7 @@ MODEL_ARGS:
   --use-mcore-models: true
   --ckpt-format: torch_dist
   --dist-ckpt-save-pre-mcore-014: true
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --account-for-embedding-in-pipeline-split: true
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml
index 325268c5a9d..db560c8aac5 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml
@@ -19,7 +19,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -46,7 +46,7 @@ MODEL_ARGS:
   --use-mcore-models: true
   --ckpt-format: torch_dist
   --dist-ckpt-optim-fully-reshardable: true
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --attention-backend: flash
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml
index 85ff6feb92d..c6a2379b571 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml
@@ -19,7 +19,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_fsdp2_resume_torch_dist_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_fsdp2_resume_torch_dist_dgx_a100_1N8G/model_config.yaml
index 0ef2b566008..1ad10c02caa 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_fsdp2_resume_torch_dist_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_fsdp2_resume_torch_dist_dgx_a100_1N8G/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_modelopt_distill_resume/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_modelopt_distill_resume/model_config.yaml
index b267aa17fd2..364a41d2fe1 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_modelopt_distill_resume/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_modelopt_distill_resume/model_config.yaml
@@ -37,7 +37,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -62,7 +62,7 @@ MODEL_ARGS:
   --use-checkpoint-opt_param-scheduler: true
   --ckpt-format: torch_dist
   --dist-ckpt-save-pre-mcore-014: true
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --attention-backend: unfused
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_multi_dist_optimizer_instances_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_multi_dist_optimizer_instances_dgx_a100_1N8G/model_config.yaml
index 5f76e8f8b18..ac70eb6bd1e 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_multi_dist_optimizer_instances_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_multi_dist_optimizer_instances_dgx_a100_1N8G/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -49,7 +49,7 @@ MODEL_ARGS:
   --use-mcore-models: true
   --ckpt-format: torch_dist
   --dist-ckpt-save-pre-mcore-014: true
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --attention-backend: unfused
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml
index c03a621f91d..585aea5c26e 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml
@@ -19,7 +19,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -47,7 +47,7 @@ MODEL_ARGS:
   --use-mcore-models: true
   --ckpt-format: torch_dist
   --dist-ckpt-optim-fully-reshardable: true
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --attention-backend: flash
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances_dgx_a100_1N8G/model_config.yaml
index d853b772bb9..f8f7bded190 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances_dgx_a100_1N8G/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -50,7 +50,7 @@ MODEL_ARGS:
   --use-mcore-models: true
   --ckpt-format: torch_dist
   --dist-ckpt-optim-fully-reshardable: true
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --log-memory-to-tensorboard: true
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_dgx_a100_1N8G/model_config.yaml
index 8af4e996340..6234292f5ff 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_dgx_a100_1N8G/model_config.yaml
@@ -19,7 +19,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic_dgx_a100_1N8G/model_config.yaml
index a168bf941f9..d510bd15c0f 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic_dgx_a100_1N8G/model_config.yaml
@@ -19,7 +19,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_dgx_a100_1N8G/model_config.yaml
index c28625ec1f0..ccc411e5879 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_dgx_a100_1N8G/model_config.yaml
@@ -19,7 +19,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -45,7 +45,7 @@ MODEL_ARGS:
   --attention-softmax-in-fp32: true
   --use-mcore-models: true
   --ckpt-format: torch_dist
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --attention-backend: flash
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last_dgx_a100_1N8G/model_config.yaml
index 3a1f90a9273..5a9f0ea8a89 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last_dgx_a100_1N8G/model_config.yaml
@@ -19,7 +19,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last_dgx_a100_1N8G/model_config.yaml
index d2e2e266ff6..920ad6832d8 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last_dgx_a100_1N8G/model_config.yaml
@@ -19,7 +19,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_dp_last_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_dp_last_dgx_a100_1N8G/model_config.yaml
index 683a855ab88..78e7e3a45ca 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_dp_last_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_dp_last_dgx_a100_1N8G/model_config.yaml
@@ -19,7 +19,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -46,7 +46,7 @@ MODEL_ARGS:
   --use-mcore-models: true
   --ckpt-format: torch_dist
   --dist-ckpt-save-pre-mcore-014: true
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --attention-backend: flash
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last_dgx_a100_1N8G/model_config.yaml
index f35f4f3d99f..36a000292f5 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last_dgx_a100_1N8G/model_config.yaml
@@ -19,7 +19,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -45,7 +45,7 @@ MODEL_ARGS:
   --attention-softmax-in-fp32: true
   --use-mcore-models: true
   --ckpt-format: torch_dist
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --attention-backend: flash
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml
index 5a5d023dbf5..ddbc04621a6 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml
@@ -19,7 +19,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -46,7 +46,7 @@ MODEL_ARGS:
   --use-mcore-models: true
   --ckpt-format: torch_dist
   --dist-ckpt-optim-fully-reshardable: true
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --attention-backend: flash
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G/model_config.yaml
index 98fca77b1b8..31e5bb16ad5 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G/model_config.yaml
@@ -19,7 +19,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -42,7 +42,7 @@ MODEL_ARGS:
   --attention-softmax-in-fp32: true
   --use-mcore-models: true
   --ckpt-format: torch_dist
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --log-memory-to-tensorboard: true
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_ddp_average_in_collective_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_ddp_average_in_collective_dgx_a100_1N8G/model_config.yaml
index f68e6657c26..76cfaf020af 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_ddp_average_in_collective_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_ddp_average_in_collective_dgx_a100_1N8G/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -46,7 +46,7 @@ MODEL_ARGS:
   --use-mcore-models: true
   --ckpt-format: torch_dist
   --dist-ckpt-optim-fully-reshardable: true
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --attention-backend: unfused
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_defer_embedding_wgrad_compute_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_defer_embedding_wgrad_compute_dgx_a100_1N8G/model_config.yaml
index e800a1bb0e3..3488b4d1585 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_defer_embedding_wgrad_compute_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_defer_embedding_wgrad_compute_dgx_a100_1N8G/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -45,7 +45,7 @@ MODEL_ARGS:
   --attention-softmax-in-fp32: true
   --use-mcore-models: true
   --ckpt-format: torch_dist
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --attention-backend: unfused
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_dgx_a100_1N8G/model_config.yaml
index e97bc5217c8..3a9b912ed0c 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_dgx_a100_1N8G/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -45,7 +45,7 @@ MODEL_ARGS:
   --use-mcore-models: true
   --ckpt-format: torch_dist
   --dist-ckpt-optim-fully-reshardable: true
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --attention-backend: unfused
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_mla_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_mla_dgx_a100_1N8G/model_config.yaml
index 8fa925d715d..586f90f1cf6 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_mla_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_mla_dgx_a100_1N8G/model_config.yaml
@@ -27,7 +27,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_no_create_attention_mask_in_dataloader_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_no_create_attention_mask_in_dataloader_dgx_a100_1N8G/model_config.yaml
index b0aa1f66235..dd928979546 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_no_create_attention_mask_in_dataloader_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_no_create_attention_mask_in_dataloader_dgx_a100_1N8G/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -46,7 +46,7 @@ MODEL_ARGS:
   --use-mcore-models: true
   --ckpt-format: torch_dist
   --dist-ckpt-save-pre-mcore-014: true
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --attention-backend: unfused
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_no_mmap_bin_files_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_no_mmap_bin_files_dgx_a100_1N8G/model_config.yaml
index 8d7abbe27d4..bf6520edcd6 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_no_mmap_bin_files_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_no_mmap_bin_files_dgx_a100_1N8G/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -45,7 +45,7 @@ MODEL_ARGS:
   --attention-softmax-in-fp32: true
   --use-mcore-models: true
   --ckpt-format: torch_dist
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --attention-backend: unfused
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml
index b31c1bc3ef9..f7c1c7ee725 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml
@@ -19,7 +19,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -46,7 +46,7 @@ MODEL_ARGS:
   --use-checkpoint-opt_param-scheduler: true
   --use-mcore-models: true
   --ckpt-format: torch_dist
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --attention-backend: flash
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G/model_config.yaml
index aac3d65eb87..deaadae81a3 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G/model_config.yaml
@@ -19,7 +19,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -44,7 +44,7 @@ MODEL_ARGS:
   --use-mcore-models: true
   --ckpt-format: torch_dist
   --dist-ckpt-optim-fully-reshardable: true
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --log-memory-to-tensorboard: true
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G/model_config.yaml
index a12763a2117..fbbe2255a82 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -46,7 +46,7 @@ MODEL_ARGS:
   --use-checkpoint-opt_param-scheduler: true
   --use-mcore-models: true
   --ckpt-format: torch_dist
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --log-memory-to-tensorboard: true
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G/model_config.yaml
index 9d8400459f1..383ec818661 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -47,7 +47,7 @@ MODEL_ARGS:
   --use-mcore-models: true
   --ckpt-format: torch_dist
   --dist-ckpt-save-pre-mcore-014: true
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --log-memory-to-tensorboard: true
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/model_config.yaml
index fcc217aa470..14cefe1e409 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -45,7 +45,7 @@ MODEL_ARGS:
   --use-checkpoint-opt_param-scheduler: true
   --use-mcore-models: true
   --ckpt-format: torch_dist
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --log-memory-to-tensorboard: true
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader_dgx_a100_1N8G/model_config.yaml
index b9d5f466afc..3cf39c93e9c 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader_dgx_a100_1N8G/model_config.yaml
@@ -22,7 +22,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_mmap_bin_files_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_mmap_bin_files_dgx_a100_1N8G/model_config.yaml
index 5d78d653aae..4fd3ccba030 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_mmap_bin_files_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_mmap_bin_files_dgx_a100_1N8G/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -46,7 +46,7 @@ MODEL_ARGS:
   --use-checkpoint-opt_param-scheduler: true
   --use-mcore-models: true
   --ckpt-format: torch_dist
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --log-memory-to-tensorboard: true
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G/model_config.yaml
index b19f7ffcb9c..e8f7fee1215 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G/model_config.yaml
@@ -19,7 +19,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -43,7 +43,7 @@ MODEL_ARGS:
   --use-mcore-models: true
   --ckpt-format: torch_dist
   --dist-ckpt-optim-fully-reshardable: true
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --log-memory-to-tensorboard: true
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_zp_z3_resume_fsdp_dtensor_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_zp_z3_resume_fsdp_dtensor_dgx_a100_1N8G/model_config.yaml
index ac68729bd5e..d6a183799fd 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_zp_z3_resume_fsdp_dtensor_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_zp_z3_resume_fsdp_dtensor_dgx_a100_1N8G/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/model_config.yaml
index 6fee9172272..8df2e496bb1 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -46,7 +46,7 @@ MODEL_ARGS:
   --attention-softmax-in-fp32: true
   --use-mcore-models: true
   --ckpt-format: torch_dist
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --attention-backend: unfused
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/model_config.yaml
index dea5ced0081..7cd304fc880 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -48,7 +48,7 @@ MODEL_ARGS:
   --use-mcore-models: true
   --ckpt-format: torch_dist
   --dist-ckpt-save-pre-mcore-014: true
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --attention-backend: unfused
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_qk_layernorm_test_mode_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_qk_layernorm_test_mode_dgx_a100_1N8G/model_config.yaml
index 1c2e8ff6304..72f029c9044 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_qk_layernorm_test_mode_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_qk_layernorm_test_mode_dgx_a100_1N8G/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -47,7 +47,7 @@ MODEL_ARGS:
   --use-mcore-models: true
   --ckpt-format: torch_dist
   --dist-ckpt-optim-fully-reshardable: true
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --attention-backend: unfused
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/model_config.yaml
index 73f311df459..75a0ffc2adc 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -47,7 +47,7 @@ MODEL_ARGS:
   --use-checkpoint-opt_param-scheduler: true
   --use-mcore-models: true
   --ckpt-format: torch_dist
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --log-memory-to-tensorboard: true
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/model_config.yaml
index 83a671b2c26..de4164176bb 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -49,7 +49,7 @@ MODEL_ARGS:
   --use-mcore-models: true
   --ckpt-format: torch_dist
   --dist-ckpt-optim-fully-reshardable: true
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --log-memory-to-tensorboard: true
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G/model_config.yaml
index eee1bb896f2..2ee48e8111c 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -48,7 +48,7 @@ MODEL_ARGS:
   --use-mcore-models: true
   --ckpt-format: torch_dist
   --dist-ckpt-save-pre-mcore-014: true
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --log-memory-to-tensorboard: true
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/model_config.yaml
index 1c83796b116..8f09dae5fec 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/model_config.yaml
@@ -19,7 +19,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/model_config.yaml
index 8543a37af49..1ac8ec45c24 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/model_config.yaml
@@ -19,7 +19,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -46,7 +46,7 @@ MODEL_ARGS:
   --use-mcore-models: true
   --ckpt-format: torch_dist
   --dist-ckpt-optim-fully-reshardable: true
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --log-memory-to-tensorboard: true
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_tp2_pp2_resume_torch_dist_uninstall_te_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_tp2_pp2_resume_torch_dist_uninstall_te_dgx_a100_1N8G/model_config.yaml
index 46dfa985920..37fb8b1cccd 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_tp2_pp2_resume_torch_dist_uninstall_te_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_tp2_pp2_resume_torch_dist_uninstall_te_dgx_a100_1N8G/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -46,7 +46,7 @@ MODEL_ARGS:
   --use-checkpoint-opt_param-scheduler: true
   --use-mcore-models: true
   --ckpt-format: torch_dist
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --fp16: true
   --apply-query-key-layer-scaling: true
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_tp2_pp2_uninstall_te_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_tp2_pp2_uninstall_te_dgx_a100_1N8G/model_config.yaml
index 6f776fc09b1..1406468fadf 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_tp2_pp2_uninstall_te_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_tp2_pp2_uninstall_te_dgx_a100_1N8G/model_config.yaml
@@ -25,7 +25,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -49,7 +49,7 @@ MODEL_ARGS:
   --no-gradient-accumulation-fusion: true
   --use-mcore-models: true
   --ckpt-format: torch_dist
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --fp16: true
   --apply-query-key-layer-scaling: true
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch/model_config.yaml
index 363f31519f9..2ec2c402230 100644
--- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch/model_config.yaml
@@ -9,7 +9,7 @@ MODEL_ARGS:
   --tiktoken-pattern: v2
   --use-mcore-models: true
   --tokenizer-type: TikTokenizer
-  --tokenizer-model: ${DATA_PATH}/mcore_mistral/tokenizer/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json
+  --tokenizer-model: ${CHECKPOINT_LOAD_PATH}/model/mcore_mistral/nemo_minitron-0.5b/v1/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json
   --auto-detect-ckpt-format: true
   --max-tokens-to-oom: 3600000
   --inference-max-seq-length: 4096
@@ -19,7 +19,7 @@ MODEL_ARGS:
   --no-load-optim: true
   --no-use-tokenizer-model-from-checkpoint-args: true
   --timing-log-level: 2
-  --load: ${CHECKPOINT_LOAD_PATH}/mcore_mistral/model
+  --load: ${CHECKPOINT_LOAD_PATH}/model/mcore_mistral/nemo_minitron-0.5b/v1
   --distributed-backend: nccl
   --log-interval: 1
   --transformer-impl: transformer_engine
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_logitsmatch/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_logitsmatch/model_config.yaml
index c0b563c663b..13e56a13c85 100644
--- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_logitsmatch/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_logitsmatch/model_config.yaml
@@ -9,7 +9,7 @@ MODEL_ARGS:
   --tiktoken-pattern: v2
   --use-mcore-models: true
   --tokenizer-type: TikTokenizer
-  --tokenizer-model: ${DATA_PATH}/mcore_mistral/tokenizer/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json
+  --tokenizer-model: ${CHECKPOINT_LOAD_PATH}/model/mcore_mistral/nemo_minitron-0.5b/v1/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json
   --auto-detect-ckpt-format: true
   --max-tokens-to-oom: 3600000
   --inference-max-seq-length: 4096
@@ -19,7 +19,7 @@ MODEL_ARGS:
   --no-load-optim: true
   --no-use-tokenizer-model-from-checkpoint-args: true
   --timing-log-level: 2
-  --load: ${CHECKPOINT_LOAD_PATH}/mcore_mistral/model
+  --load: ${CHECKPOINT_LOAD_PATH}/model/mcore_mistral/nemo_minitron-0.5b/v1
   --distributed-backend: nccl
   --log-interval: 1
   --transformer-impl: transformer_engine
@@ -49,7 +49,7 @@ MODEL_ARGS:
   --inference-ckpt-non-strict: true # To handle the extra_state errors
   --output-path: ${TENSORBOARD_PATH}
   --output-every-n-results: 32
-  --prompt-file: ${DATA_PATH}/sharegpt/filtered-benchmark/processed.jsonl
+  --prompt-file: ${DATA_PATH}/text/sharegpt-vicuna/filtered/processed.jsonl
   --prompt-file-num-truncate: 128 # originally 1024
   --num-tokens-to-generate: 128 # originally 512
   --incoming-requests-per-step: 32
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_dp8_583m_logitsmatch_zmq/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_dp8_583m_logitsmatch_zmq/model_config.yaml
index 024d2ede3da..b99100f65eb 100644
--- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_dp8_583m_logitsmatch_zmq/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_dp8_583m_logitsmatch_zmq/model_config.yaml
@@ -9,7 +9,7 @@ MODEL_ARGS:
   --tiktoken-pattern: v2
   --use-mcore-models: true
   --tokenizer-type: TikTokenizer
-  --tokenizer-model: ${DATA_PATH}/mcore_mistral/tokenizer/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json
+  --tokenizer-model: ${CHECKPOINT_LOAD_PATH}/model/mcore_mistral/nemo_minitron-0.5b/v1/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json
   --auto-detect-ckpt-format: true
   --max-tokens-to-oom: 3600000
   --inference-max-seq-length: 4096
@@ -19,7 +19,7 @@ MODEL_ARGS:
   --no-load-optim: true
   --no-use-tokenizer-model-from-checkpoint-args: true
   --timing-log-level: 2
-  --load: ${CHECKPOINT_LOAD_PATH}/mcore_mistral/model
+  --load: ${CHECKPOINT_LOAD_PATH}/model/mcore_mistral/nemo_minitron-0.5b/v1/
   --distributed-backend: nccl
   --log-interval: 1
   --transformer-impl: transformer_engine
@@ -51,7 +51,7 @@ MODEL_ARGS:
   --prompts: "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies."
   --incoming-requests-per-step: 32
   --use-flashinfer-fused-rope: true
-    
+
 METRICS:
   - "generated_tokens"
   - "logprobs"
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_583m_logitsmatch/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_583m_logitsmatch/model_config.yaml
index f2d3dee3904..7a2cc9b0c78 100644
--- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_583m_logitsmatch/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_583m_logitsmatch/model_config.yaml
@@ -9,7 +9,7 @@ MODEL_ARGS:
   --tiktoken-pattern: v2
   --use-mcore-models: true
   --tokenizer-type: TikTokenizer
-  --tokenizer-model: ${DATA_PATH}/mcore_mistral/tokenizer/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json
+  --tokenizer-model: ${CHECKPOINT_LOAD_PATH}/model/mcore_mistral/nemo_minitron-0.5b/v1/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json
   --auto-detect-ckpt-format: true
   --max-tokens-to-oom: 3600000
   --inference-max-seq-length: 4096
@@ -19,7 +19,7 @@ MODEL_ARGS:
   --no-load-optim: true
   --no-use-tokenizer-model-from-checkpoint-args: true
   --timing-log-level: 2
-  --load: ${CHECKPOINT_LOAD_PATH}/mcore_mistral/model
+  --load: ${CHECKPOINT_LOAD_PATH}/model/mcore_mistral/nemo_minitron-0.5b/v1/
   --distributed-backend: nccl
   --log-interval: 1
   --transformer-impl: transformer_engine
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_dp1_583m_logitsmatch_zmq/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_dp1_583m_logitsmatch_zmq/model_config.yaml
index 5fe1ecf5c8f..0b31d16af75 100644
--- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_dp1_583m_logitsmatch_zmq/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_dp1_583m_logitsmatch_zmq/model_config.yaml
@@ -9,7 +9,7 @@ MODEL_ARGS:
   --tiktoken-pattern: v2
   --use-mcore-models: true
   --tokenizer-type: TikTokenizer
-  --tokenizer-model: ${DATA_PATH}/mcore_mistral/tokenizer/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json
+  --tokenizer-model: ${CHECKPOINT_LOAD_PATH}/model/mcore_mistral/nemo_minitron-0.5b/v1/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json
   --auto-detect-ckpt-format: true
   --max-tokens-to-oom: 3600000
   --inference-max-seq-length: 4096
@@ -19,7 +19,7 @@ MODEL_ARGS:
   --no-load-optim: true
   --no-use-tokenizer-model-from-checkpoint-args: true
   --timing-log-level: 2
-  --load: ${CHECKPOINT_LOAD_PATH}/mcore_mistral/model
+  --load: ${CHECKPOINT_LOAD_PATH}/model/mcore_mistral/nemo_minitron-0.5b/v1/
   --distributed-backend: nccl
   --log-interval: 1
   --transformer-impl: transformer_engine
diff --git a/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_16b_multiprompt_tokensmatch/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_16b_multiprompt_tokensmatch/model_config.yaml
index 90e93dfdcd8..3b10336138d 100644
--- a/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_16b_multiprompt_tokensmatch/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_16b_multiprompt_tokensmatch/model_config.yaml
@@ -12,8 +12,8 @@ MODEL_ARGS:
   --log-memory-to-tensorboard: true
   --timing-log-level: 2
   # See the mount paths defined in the top level tests/test_utils/recipes/gpt-static-inference.yaml
-  --load: ${CHECKPOINT_LOAD_PATH}/deepseek_16b_pyt/model/checkpoints
-  --tokenizer-model: ${DATA_PATH}/deepseek_16b_pyt/tokenizer/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json
+  --load: ${CHECKPOINT_LOAD_PATH}/model/deepseek_16b_pyt/dcp/mcore-v1_bf16/checkpoints
+  --tokenizer-model: ${CHECKPOINT_LOAD_PATH}/model/deepseek_16b_pyt/dcp/mcore-v1_bf16/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json
   --tokenizer-type: TikTokenizer
   --tiktoken-pattern: v2
   --distributed-backend: nccl
diff --git a/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_cudagraphs/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_cudagraphs/model_config.yaml
index 18fe5beff99..04e6caa3303 100644
--- a/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_cudagraphs/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_cudagraphs/model_config.yaml
@@ -9,7 +9,7 @@ MODEL_ARGS:
   --tiktoken-pattern: v2
   --use-mcore-models: true
   --tokenizer-type: TikTokenizer
-  --tokenizer-model: ${DATA_PATH}/mcore_mistral/tokenizer/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json
+  --tokenizer-model: ${CHECKPOINT_LOAD_PATH}/model/mcore_mistral/nemo_minitron-0.5b/v1/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json
   --auto-detect-ckpt-format: true
   --max-tokens-to-oom: 3600000
   --inference-max-seq-length: 4096
@@ -19,7 +19,7 @@ MODEL_ARGS:
   --no-load-optim: true
   --no-use-tokenizer-model-from-checkpoint-args: true
   --timing-log-level: 2
-  --load: ${CHECKPOINT_LOAD_PATH}/mcore_mistral/model
+  --load: ${CHECKPOINT_LOAD_PATH}/model/mcore_mistral/nemo_minitron-0.5b/v1/
   --distributed-backend: nccl
   --log-interval: 1
   --transformer-impl: transformer_engine
diff --git a/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_fp8_cudagraphs/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_fp8_cudagraphs/model_config.yaml
index d03c69f8325..9aa1a6e1c96 100644
--- a/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_fp8_cudagraphs/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_fp8_cudagraphs/model_config.yaml
@@ -9,7 +9,7 @@ MODEL_ARGS:
   --tiktoken-pattern: v2
   --use-mcore-models: true
   --tokenizer-type: TikTokenizer
-  --tokenizer-model: ${DATA_PATH}/mcore_mistral/tokenizer/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json
+  --tokenizer-model: ${CHECKPOINT_LOAD_PATH}/model/mcore_mistral/nemo_minitron-0.5b/v1/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json
   --auto-detect-ckpt-format: true
   --max-tokens-to-oom: 3600000
   --inference-max-seq-length: 4096
@@ -19,7 +19,7 @@ MODEL_ARGS:
   --no-load-optim: true
   --no-use-tokenizer-model-from-checkpoint-args: true
   --timing-log-level: 2
-  --load: ${CHECKPOINT_LOAD_PATH}/mcore_mistral/model
+  --load: ${CHECKPOINT_LOAD_PATH}/model/mcore_mistral/nemo_minitron-0.5b/v1/
   --distributed-backend: nccl
   --log-interval: 1
   --transformer-impl: transformer_engine
diff --git a/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_logitsmatch/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_logitsmatch/model_config.yaml
index d78c45e380c..b3564f8226a 100644
--- a/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_logitsmatch/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_logitsmatch/model_config.yaml
@@ -9,7 +9,7 @@ MODEL_ARGS:
   --tiktoken-pattern: v2
   --use-mcore-models: true
   --tokenizer-type: TikTokenizer
-  --tokenizer-model: ${DATA_PATH}/mcore_mistral/tokenizer/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json
+  --tokenizer-model: ${CHECKPOINT_LOAD_PATH}/model/mcore_mistral/nemo_minitron-0.5b/v1/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json
   --auto-detect-ckpt-format: true
   --max-tokens-to-oom: 3600000
   --inference-max-seq-length: 4096
@@ -19,7 +19,7 @@ MODEL_ARGS:
   --no-load-optim: true
   --no-use-tokenizer-model-from-checkpoint-args: true
   --timing-log-level: 2
-  --load: ${CHECKPOINT_LOAD_PATH}/mcore_mistral/model
+  --load: ${CHECKPOINT_LOAD_PATH}/model/mcore_mistral/nemo_minitron-0.5b/v1/
   --distributed-backend: nccl
   --log-interval: 1
   --transformer-impl: transformer_engine
diff --git a/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp1_cp1_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp1_cp1_dgx_a100_1N8G/model_config.yaml
index 3de471e8f8b..4350c4a6f50 100644
--- a/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp1_cp1_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp1_cp1_dgx_a100_1N8G/model_config.yaml
@@ -26,7 +26,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
diff --git a/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp4_cp1_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp4_cp1_dgx_a100_1N8G/model_config.yaml
index 21fa690e66d..b571dca2dd0 100644
--- a/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp4_cp1_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp4_cp1_dgx_a100_1N8G/model_config.yaml
@@ -26,7 +26,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
diff --git a/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp1_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp1_dgx_a100_1N8G/model_config.yaml
index f3942d7ae4a..941d3f6f829 100644
--- a/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp1_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp1_dgx_a100_1N8G/model_config.yaml
@@ -26,7 +26,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
diff --git a/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp4_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp4_dgx_a100_1N8G/model_config.yaml
index 76891deaa85..588cfe3e80a 100644
--- a/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp4_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp4_dgx_a100_1N8G/model_config.yaml
@@ -26,7 +26,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
diff --git a/tests/functional_tests/test_cases/hybrid/hybrid_static_inference_tp1_pp1_2B_cudagraphs/model_config.yaml b/tests/functional_tests/test_cases/hybrid/hybrid_static_inference_tp1_pp1_2B_cudagraphs/model_config.yaml
index 4e55935511c..75e4d3123bd 100644
--- a/tests/functional_tests/test_cases/hybrid/hybrid_static_inference_tp1_pp1_2B_cudagraphs/model_config.yaml
+++ b/tests/functional_tests/test_cases/hybrid/hybrid_static_inference_tp1_pp1_2B_cudagraphs/model_config.yaml
@@ -11,8 +11,8 @@ MODEL_ARGS:
   --log-timers-to-tensorboard: true
   --log-memory-to-tensorboard: true
   --timing-log-level: 2
-  --load: ${CHECKPOINT_LOAD_PATH}/mamba_hybrid_2b/checkpoint
-  --tokenizer-model: ${DATA_PATH}/mamba_hybrid_2b/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json
+  --load: ${CHECKPOINT_LOAD_PATH}/model/mamba_hybrid_2b/dcp/mcore-v1_bf16/checkpoint
+  --tokenizer-model: ${CHECKPOINT_LOAD_PATH}/model/mamba_hybrid_2b/dcp/mcore-v1_bf16/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json
   --tokenizer-type: TikTokenizer
   --tiktoken-pattern: v2
   --distributed-backend: nccl
diff --git a/tests/functional_tests/test_cases/hybrid/hybrid_static_inference_tp1_pp1_2B_logitsmatch/model_config.yaml b/tests/functional_tests/test_cases/hybrid/hybrid_static_inference_tp1_pp1_2B_logitsmatch/model_config.yaml
index 2af1fa222c1..301b68e7382 100644
--- a/tests/functional_tests/test_cases/hybrid/hybrid_static_inference_tp1_pp1_2B_logitsmatch/model_config.yaml
+++ b/tests/functional_tests/test_cases/hybrid/hybrid_static_inference_tp1_pp1_2B_logitsmatch/model_config.yaml
@@ -11,8 +11,8 @@ MODEL_ARGS:
   --log-timers-to-tensorboard: true
   --log-memory-to-tensorboard: true
   --timing-log-level: 2
-  --load: ${CHECKPOINT_LOAD_PATH}/mamba_hybrid_2b/checkpoint
-  --tokenizer-model: ${DATA_PATH}/mamba_hybrid_2b/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json
+  --load: ${CHECKPOINT_LOAD_PATH}/model/mamba_hybrid_2b/dcp/mcore-v1_bf16/checkpoint
+  --tokenizer-model: ${CHECKPOINT_LOAD_PATH}/model/mamba_hybrid_2b/dcp/mcore-v1_bf16/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json
   --tokenizer-type: TikTokenizer
   --tiktoken-pattern: v2
   --distributed-backend: nccl
diff --git a/tests/functional_tests/test_cases/mimo/mimo_vlm_pretrain_convergence_tp1_pp1_cp1_dp8/model_config.yaml b/tests/functional_tests/test_cases/mimo/mimo_vlm_pretrain_convergence_tp1_pp1_cp1_dp8/model_config.yaml
index 447b5a094e8..ced98a352b1 100644
--- a/tests/functional_tests/test_cases/mimo/mimo_vlm_pretrain_convergence_tp1_pp1_cp1_dp8/model_config.yaml
+++ b/tests/functional_tests/test_cases/mimo/mimo_vlm_pretrain_convergence_tp1_pp1_cp1_dp8/model_config.yaml
@@ -3,7 +3,6 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Ring
   CUBLAS_WORKSPACE_CONFIG: :4096:8
-  ARTIFACTS_ROOT: /workspace/checkpoints
 MODEL_ARGS:
   --num-layers: 32
   --hidden-size: 4096
@@ -48,8 +47,8 @@ MODEL_ARGS:
   --deterministic-mode: true
   --log-memory-to-tensorboard: true
   --dataloader-type: external
-  --data-path: ${DATA_PATH}
-  --language-model-checkpoint: ${ARTIFACTS_ROOT}/vicuna_7b_pyt/dcp/mcore-v1.5_fp32/weights
+  --data-path: ${DATA_PATH}/mixed/mcore_mimo_vlm/llava_pretrain_energon
+  --language-model-checkpoint: ${CHECKPOINT_LOAD_PATH}/model/vicuna_7b_pyt/dcp/mcore-v1.5_fp32/weights
   --auto-detect-ckpt-format: true
   --accumulate-allreduce-grads-in-fp32: true
   --position-embedding-type: rope
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/model_config.yaml
index f955dbf17a7..6bdb19e1001 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/model_config.yaml
index f5014a23b5c..97db543f73c 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/model_config.yaml
index 7cb050257a9..45ae64df053 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -50,7 +50,7 @@ MODEL_ARGS:
   --use-mcore-models: true
   --ckpt-format: torch_dist
   --dist-ckpt-optim-fully-reshardable: true
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --attention-backend: flash
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/model_config.yaml
index 2354ecd7fd9..bb3f5df251d 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -49,7 +49,7 @@ MODEL_ARGS:
   --no-gradient-accumulation-fusion: true
   --use-mcore-models: true
   --ckpt-format: torch_dist
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --attention-backend: flash
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/model_config.yaml
index 7c0a103200a..5ce2939b05d 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -49,7 +49,7 @@ MODEL_ARGS:
   --no-gradient-accumulation-fusion: true
   --use-mcore-models: true
   --ckpt-format: torch_dist
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --attention-backend: unfused
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/model_config.yaml
index a01439c83cc..60652f0ded9 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -50,7 +50,7 @@ MODEL_ARGS:
   --use-mcore-models: true
   --ckpt-format: torch_dist
   --dist-ckpt-save-pre-mcore-014: true
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --attention-backend: unfused
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_resume_torch_dist_te_4experts2parallel/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_resume_torch_dist_te_4experts2parallel/model_config.yaml
index 984e8bd51f3..8411f00055e 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_resume_torch_dist_te_4experts2parallel/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_resume_torch_dist_te_4experts2parallel/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -49,7 +49,7 @@ MODEL_ARGS:
   --use-checkpoint-opt_param-scheduler: true
   --use-mcore-models: true
   --ckpt-format: torch_dist
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --disable-bias-linear: true
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/model_config.yaml
index 617d2a70b58..ac03efd36a5 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -49,7 +49,7 @@ MODEL_ARGS:
   --use-mcore-models: true
   --ckpt-format: torch_dist
   --dist-ckpt-optim-fully-reshardable: true
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --attention-backend: unfused
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_resume_torch_dist_te_2experts/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_resume_torch_dist_te_2experts/model_config.yaml
index 34070006ad7..989a24acaf7 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_resume_torch_dist_te_2experts/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_resume_torch_dist_te_2experts/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -49,7 +49,7 @@ MODEL_ARGS:
   --use-mcore-models: true
   --ckpt-format: torch_dist
   --dist-ckpt-save-pre-mcore-014: true
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --fp16: true
   --apply-query-key-layer-scaling: true
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/model_config.yaml
index 5390afcd09b..52eb433afd5 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/model_config.yaml
@@ -38,7 +38,7 @@ MODEL_ARGS:
   # Data args
   --seq-length: 4096
   --data-cache-path: ${DATA_CACHE_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_ep8_resume_torch_dist_dist_optimizer/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_ep8_resume_torch_dist_dist_optimizer/model_config.yaml
index 8dcf744be8f..b95d5c04a1a 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_ep8_resume_torch_dist_dist_optimizer/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_ep8_resume_torch_dist_dist_optimizer/model_config.yaml
@@ -22,7 +22,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/model_config.yaml
index 2dd0fda1c25..5268bf68b33 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/model_config.yaml
@@ -22,7 +22,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/model_config.yaml
index 8e98f65315b..8f4f022345a 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/model_config.yaml
@@ -19,7 +19,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -50,7 +50,7 @@ MODEL_ARGS:
   --use-checkpoint-opt_param-scheduler: true
   --use-mcore-models: true
   --ckpt-format: torch_dist
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --disable-bias-linear: true
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/model_config.yaml
index 27b2db92ca9..aa83c79ceb2 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_groupedGEMM_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_groupedGEMM_dgx_a100_1N8G/model_config.yaml
index e5dd41580d0..758f7af8f0f 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_groupedGEMM_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_groupedGEMM_dgx_a100_1N8G/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/model_config.yaml
index f78250b86e2..2ef041c07af 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -53,7 +53,7 @@ MODEL_ARGS:
   --use-mcore-models: true
   --ckpt-format: torch_dist
   --dist-ckpt-optim-fully-reshardable: true
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --ckpt-assume-constant-structure: true
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_groupedGEMM_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_groupedGEMM_dgx_a100_1N8G/model_config.yaml
index e970e1e0209..29a63c7d148 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_groupedGEMM_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_groupedGEMM_dgx_a100_1N8G/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances_dgx_a100_1N8G/model_config.yaml
index be2a2cb6a6f..a15bbf77196 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances_dgx_a100_1N8G/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -52,7 +52,7 @@ MODEL_ARGS:
   --use-checkpoint-opt_param-scheduler: true
   --use-mcore-models: true
   --ckpt-format: torch_dist
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --ckpt-assume-constant-structure: true
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_top2router_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_top2router_dgx_a100_1N8G/model_config.yaml
index 0888531f330..a7e85122831 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_top2router_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_top2router_dgx_a100_1N8G/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -54,7 +54,7 @@ MODEL_ARGS:
   --use-mcore-models: true
   --ckpt-format: torch_dist
   --dist-ckpt-save-pre-mcore-014: true
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --no-bias-gelu-fusion: true
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G/model_config.yaml
index 19a8b4fc639..a5f390a463d 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/model_config.yaml
index 12c43095c41..7ffcd448b37 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -51,7 +51,7 @@ MODEL_ARGS:
   --attention-softmax-in-fp32: true
   --use-mcore-models: true
   --ckpt-format: torch_dist
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --attention-backend: unfused
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM_dgx_a100_1N8G/model_config.yaml
index a88a8b74b97..e7aa73ba6b1 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM_dgx_a100_1N8G/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -56,7 +56,7 @@ MODEL_ARGS:
   --use-mcore-models: true
   --ckpt-format: torch_dist
   --dist-ckpt-save-pre-mcore-014: true
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --attention-backend: unfused
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_top2router_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_top2router_dgx_a100_1N8G/model_config.yaml
index b22cd9ba9ba..3806ae26529 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_top2router_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_top2router_dgx_a100_1N8G/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -52,7 +52,7 @@ MODEL_ARGS:
   --attention-softmax-in-fp32: true
   --use-mcore-models: true
   --ckpt-format: torch_dist
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --attention-backend: unfused
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts_etp1_ep4_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts_etp1_ep4_dgx_a100_1N8G/model_config.yaml
index 91a908a4fcd..4820a43bf3f 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts_etp1_ep4_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts_etp1_ep4_dgx_a100_1N8G/model_config.yaml
@@ -22,7 +22,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -55,7 +55,7 @@ MODEL_ARGS:
   --use-mcore-models: true
   --ckpt-format: torch_dist
   --dist-ckpt-optim-fully-reshardable: true
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --attention-backend: unfused
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4_dgx_a100_1N8G/model_config.yaml
index f27db4a8021..488b8ad92d2 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4_dgx_a100_1N8G/model_config.yaml
@@ -22,7 +22,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/model_config.yaml
index 7ebd9f0d1af..e8c45375110 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/model_config.yaml
@@ -38,7 +38,7 @@ MODEL_ARGS:
   # Data args
   --seq-length: 4096
   --data-cache-path: ${DATA_CACHE_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/model_config.yaml
index 11d62eb1490..c7f0bde3e82 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/model_config.yaml
@@ -38,7 +38,7 @@ MODEL_ARGS:
   # Data args
   --seq-length: 4096
   --data-cache-path: ${DATA_CACHE_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_selective_recompute_experimental/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_selective_recompute_experimental/model_config.yaml
index 0a37ee08498..bf1c5a45cc9 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_selective_recompute_experimental/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_selective_recompute_experimental/model_config.yaml
@@ -39,7 +39,7 @@ MODEL_ARGS:
   # Data args
   --seq-length: 4096
   --data-cache-path: ${DATA_CACHE_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router_dgx_a100_1N8G/model_config.yaml
index e46fc9246b7..e593e94f5ac 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router_dgx_a100_1N8G/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
   --vocab-file: ${DATA_PATH}/bpe/vocab.json
   --merge-file: ${DATA_PATH}/bpe/merges.txt
   --split: 949,50,1
@@ -58,7 +58,7 @@ MODEL_ARGS:
   --use-mcore-models: true
   --ckpt-format: torch_dist
   --dist-ckpt-optim-fully-reshardable: true
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --no-bias-gelu-fusion: true
diff --git a/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_tp4_pp1_ep4_16B_logitsmatch/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_tp4_pp1_ep4_16B_logitsmatch/model_config.yaml
index df6ca00d00e..d94b06f5ac8 100644
--- a/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_tp4_pp1_ep4_16B_logitsmatch/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_tp4_pp1_ep4_16B_logitsmatch/model_config.yaml
@@ -11,8 +11,8 @@ MODEL_ARGS:
   --log-timers-to-tensorboard: true
   --log-memory-to-tensorboard: true
   --timing-log-level: 2
-  --load: ${CHECKPOINT_LOAD_PATH}/deepseek_16b_pyt/model/checkpoints
-  --tokenizer-model: ${DATA_PATH}/deepseek_16b_pyt/tokenizer/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json
+  --load: ${CHECKPOINT_LOAD_PATH}/model/deepseek_16b_pyt/dcp/mcore-v1_bf16/checkpoints
+  --tokenizer-model: ${CHECKPOINT_LOAD_PATH}/model/deepseek_16b_pyt/dcp/mcore-v1_bf16/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json
   --tokenizer-type: TikTokenizer
   --tiktoken-pattern: v2
   --distributed-backend: nccl
diff --git a/tests/functional_tests/test_cases/moe/gpt_static_inference_tp1_pp1_ep1_16B_logitsmatch/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt_static_inference_tp1_pp1_ep1_16B_logitsmatch/model_config.yaml
index 3f09b79d8e7..a9171008b7c 100644
--- a/tests/functional_tests/test_cases/moe/gpt_static_inference_tp1_pp1_ep1_16B_logitsmatch/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt_static_inference_tp1_pp1_ep1_16B_logitsmatch/model_config.yaml
@@ -11,8 +11,8 @@ MODEL_ARGS:
   --log-timers-to-tensorboard: true
   --log-memory-to-tensorboard: true
   --timing-log-level: 2
-  --load: ${CHECKPOINT_LOAD_PATH}/deepseek_16b_pyt/model/checkpoints
-  --tokenizer-model: ${DATA_PATH}/deepseek_16b_pyt/tokenizer/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json
+  --load: ${CHECKPOINT_LOAD_PATH}/model/deepseek_16b_pyt/dcp/mcore-v1_bf16/checkpoints
+  --tokenizer-model: ${CHECKPOINT_LOAD_PATH}/model/deepseek_16b_pyt/dcp/mcore-v1_bf16/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json
   --tokenizer-type: TikTokenizer
   --tiktoken-pattern: v2
   --distributed-backend: nccl
diff --git a/tests/functional_tests/test_cases/moe/gpt_static_inference_tp4_pp1_ep4_16B_logitsmatch/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt_static_inference_tp4_pp1_ep4_16B_logitsmatch/model_config.yaml
index 26a9f7afc1e..116992b2d7f 100644
--- a/tests/functional_tests/test_cases/moe/gpt_static_inference_tp4_pp1_ep4_16B_logitsmatch/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt_static_inference_tp4_pp1_ep4_16B_logitsmatch/model_config.yaml
@@ -11,8 +11,8 @@ MODEL_ARGS:
   --log-timers-to-tensorboard: true
   --log-memory-to-tensorboard: true
   --timing-log-level: 2
-  --load: ${CHECKPOINT_LOAD_PATH}/deepseek_16b_pyt/model/checkpoints
-  --tokenizer-model: ${DATA_PATH}/deepseek_16b_pyt/tokenizer/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json
+  --load: ${CHECKPOINT_LOAD_PATH}/model/deepseek_16b_pyt/dcp/mcore-v1_bf16/checkpoints
+  --tokenizer-model: ${CHECKPOINT_LOAD_PATH}/model/deepseek_16b_pyt/dcp/mcore-v1_bf16/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json
   --tokenizer-type: TikTokenizer
   --tiktoken-pattern: v2
   --distributed-backend: nccl
diff --git a/tests/functional_tests/test_cases/t5/t5_11b_mr_mcore_tp4_pp1_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/t5/t5_11b_mr_mcore_tp4_pp1_dgx_a100_1N8G/model_config.yaml
index e9556f5f36e..234236c7d26 100644
--- a/tests/functional_tests/test_cases/t5/t5_11b_mr_mcore_tp4_pp1_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/t5/t5_11b_mr_mcore_tp4_pp1_dgx_a100_1N8G/model_config.yaml
@@ -29,8 +29,8 @@ MODEL_ARGS:
   --vocab-extra-ids: 100
   --init-method-std: 0.015
   --transformer-impl: transformer_engine
-  --data-path: ${DATA_PATH}/my-t5_00_text_document
-  --vocab-file: ${DATA_PATH}/bert-large-cased-vocab.txt
+  --data-path: ${DATA_PATH}/text/the_pile/t5_shard00/my-t5_00_text_document
+  --vocab-file: ${DATA_PATH}/text/the_pile/t5_shard00/bert-large-cased-vocab.txt
   --tokenizer-type: BertWordPieceCase
   --calculate-per-token-loss: true
   --split: 99982,9,9
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_dgx_a100_1N8G/model_config.yaml
index 48f79ab9977..54ad28a8e8a 100644
--- a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_dgx_a100_1N8G/model_config.yaml
@@ -29,8 +29,8 @@ MODEL_ARGS:
   --vocab-extra-ids: 100
   --init-method-std: 0.015
   --transformer-impl: transformer_engine
-  --data-path: ${DATA_PATH}/my-t5_00_text_document
-  --vocab-file: ${DATA_PATH}/bert-large-cased-vocab.txt
+  --data-path: ${DATA_PATH}/text/the_pile/t5_shard00/my-t5_00_text_document
+  --vocab-file: ${DATA_PATH}/text/the_pile/t5_shard00/bert-large-cased-vocab.txt
   --tokenizer-type: BertWordPieceCase
   --calculate-per-token-loss: true
   --split: 99982,9,9
@@ -50,7 +50,7 @@ MODEL_ARGS:
   --data-cache-path: ${DATA_CACHE_PATH}
   --deterministic-mode: true
   --ckpt-format: torch_dist
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --attention-backend: unfused
   --log-memory-to-tensorboard: true
 TEST_TYPE: regular
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/model_config.yaml
index 941f616134e..9cc675a35f6 100644
--- a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/model_config.yaml
@@ -29,8 +29,8 @@ MODEL_ARGS:
   --vocab-extra-ids: 100
   --init-method-std: 0.015
   --transformer-impl: transformer_engine
-  --data-path: ${DATA_PATH}/my-t5_00_text_document
-  --vocab-file: ${DATA_PATH}/bert-large-cased-vocab.txt
+  --data-path: ${DATA_PATH}/text/the_pile/t5_shard00/my-t5_00_text_document
+  --vocab-file: ${DATA_PATH}/text/the_pile/t5_shard00/bert-large-cased-vocab.txt
   --tokenizer-type: BertWordPieceCase
   --calculate-per-token-loss: true
   --split: 99982,9,9
@@ -50,7 +50,7 @@ MODEL_ARGS:
   --data-cache-path: ${DATA_CACHE_PATH}
   --deterministic-mode: true
   --ckpt-format: torch_dist
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --attention-backend: unfused
   --log-memory-to-tensorboard: true
 TEST_TYPE: ckpt-resume
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_dgx_a100_1N8G/model_config.yaml
index 4a1f05c07ab..5dc3478de12 100644
--- a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_dgx_a100_1N8G/model_config.yaml
@@ -29,8 +29,8 @@ MODEL_ARGS:
   --vocab-extra-ids: 100
   --init-method-std: 0.015
   --transformer-impl: local
-  --data-path: ${DATA_PATH}/my-t5_00_text_document
-  --vocab-file: ${DATA_PATH}/bert-large-cased-vocab.txt
+  --data-path: ${DATA_PATH}/text/the_pile/t5_shard00/my-t5_00_text_document
+  --vocab-file: ${DATA_PATH}/text/the_pile/t5_shard00/bert-large-cased-vocab.txt
   --tokenizer-type: BertWordPieceCase
   --calculate-per-token-loss: true
   --split: 99982,9,9
@@ -50,6 +50,6 @@ MODEL_ARGS:
   --data-cache-path: ${DATA_CACHE_PATH}
   --deterministic-mode: true
   --ckpt-format: torch_dist
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --log-memory-to-tensorboard: true
 TEST_TYPE: regular
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/model_config.yaml
index 9bd3c8b887e..1bf1e028390 100644
--- a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/model_config.yaml
@@ -29,8 +29,8 @@ MODEL_ARGS:
   --vocab-extra-ids: 100
   --init-method-std: 0.015
   --transformer-impl: local
-  --data-path: ${DATA_PATH}/my-t5_00_text_document
-  --vocab-file: ${DATA_PATH}/bert-large-cased-vocab.txt
+  --data-path: ${DATA_PATH}/text/the_pile/t5_shard00/my-t5_00_text_document
+  --vocab-file: ${DATA_PATH}/text/the_pile/t5_shard00/bert-large-cased-vocab.txt
   --tokenizer-type: BertWordPieceCase
   --calculate-per-token-loss: true
   --split: 99982,9,9
@@ -50,6 +50,6 @@ MODEL_ARGS:
   --data-cache-path: ${DATA_CACHE_PATH}
   --deterministic-mode: true
   --ckpt-format: torch_dist
-  --dist-ckpt-strictness: log_all  # backward compatibility for TE changes
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --log-memory-to-tensorboard: true
 TEST_TYPE: ckpt-resume
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp1_pp1_vp1_resume_torch/model_config.yaml b/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp1_pp1_vp1_resume_torch/model_config.yaml
index ae465aecc67..76afded197d 100644
--- a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp1_pp1_vp1_resume_torch/model_config.yaml
+++ b/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp1_pp1_vp1_resume_torch/model_config.yaml
@@ -29,8 +29,8 @@ MODEL_ARGS:
   --vocab-extra-ids: 100
   --init-method-std: 0.015
   --transformer-impl: transformer_engine
-  --data-path: ${DATA_PATH}/my-t5_00_text_document
-  --vocab-file: ${DATA_PATH}/bert-large-cased-vocab.txt
+  --data-path: ${DATA_PATH}/text/the_pile/t5_shard00/my-t5_00_text_document
+  --vocab-file: ${DATA_PATH}/text/the_pile/t5_shard00/bert-large-cased-vocab.txt
   --tokenizer-type: BertWordPieceCase
   --calculate-per-token-loss: true
   --split: 99982,9,9
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1/model_config.yaml b/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1/model_config.yaml
index 4df31e32ed9..2ab4e9730d7 100644
--- a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1/model_config.yaml
+++ b/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1/model_config.yaml
@@ -29,8 +29,8 @@ MODEL_ARGS:
   --vocab-extra-ids: 100
   --init-method-std: 0.015
   --transformer-impl: transformer_engine
-  --data-path: ${DATA_PATH}/my-t5_00_text_document
-  --vocab-file: ${DATA_PATH}/bert-large-cased-vocab.txt
+  --data-path: ${DATA_PATH}/text/the_pile/t5_shard00/my-t5_00_text_document
+  --vocab-file: ${DATA_PATH}/text/the_pile/t5_shard00/bert-large-cased-vocab.txt
   --tokenizer-type: BertWordPieceCase
   --calculate-per-token-loss: true
   --split: 99982,9,9
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel/model_config.yaml b/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel/model_config.yaml
index 6a5a701a776..37085e01771 100644
--- a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel/model_config.yaml
+++ b/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel/model_config.yaml
@@ -29,8 +29,8 @@ MODEL_ARGS:
   --vocab-extra-ids: 100
   --init-method-std: 0.015
   --transformer-impl: transformer_engine
-  --data-path: ${DATA_PATH}/my-t5_00_text_document
-  --vocab-file: ${DATA_PATH}/bert-large-cased-vocab.txt
+  --data-path: ${DATA_PATH}/text/the_pile/t5_shard00/my-t5_00_text_document
+  --vocab-file: ${DATA_PATH}/text/the_pile/t5_shard00/bert-large-cased-vocab.txt
   --tokenizer-type: BertWordPieceCase
   --calculate-per-token-loss: true
   --split: 99982,9,9
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1/model_config.yaml b/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1/model_config.yaml
index 268cd275db5..46e7209823f 100644
--- a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1/model_config.yaml
+++ b/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1/model_config.yaml
@@ -29,8 +29,8 @@ MODEL_ARGS:
   --vocab-extra-ids: 100
   --init-method-std: 0.015
   --transformer-impl: local
-  --data-path: ${DATA_PATH}/my-t5_00_text_document
-  --vocab-file: ${DATA_PATH}/bert-large-cased-vocab.txt
+  --data-path: ${DATA_PATH}/text/the_pile/t5_shard00/my-t5_00_text_document
+  --vocab-file: ${DATA_PATH}/text/the_pile/t5_shard00/bert-large-cased-vocab.txt
   --tokenizer-type: BertWordPieceCase
   --calculate-per-token-loss: true
   --split: 99982,9,9
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1_resume_torch/model_config.yaml b/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1_resume_torch/model_config.yaml
index 8d871796477..0b11a3c137c 100644
--- a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1_resume_torch/model_config.yaml
+++ b/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1_resume_torch/model_config.yaml
@@ -29,8 +29,8 @@ MODEL_ARGS:
   --vocab-extra-ids: 100
   --init-method-std: 0.015
   --transformer-impl: local
-  --data-path: ${DATA_PATH}/my-t5_00_text_document
-  --vocab-file: ${DATA_PATH}/bert-large-cased-vocab.txt
+  --data-path: ${DATA_PATH}/text/the_pile/t5_shard00/my-t5_00_text_document
+  --vocab-file: ${DATA_PATH}/text/the_pile/t5_shard00/bert-large-cased-vocab.txt
   --tokenizer-type: BertWordPieceCase
   --calculate-per-token-loss: true
   --split: 99982,9,9
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp2_pp1_vp1/model_config.yaml b/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp2_pp1_vp1/model_config.yaml
index d315b91295e..c305e4a86dd 100644
--- a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp2_pp1_vp1/model_config.yaml
+++ b/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp2_pp1_vp1/model_config.yaml
@@ -29,8 +29,8 @@ MODEL_ARGS:
   --vocab-extra-ids: 100
   --init-method-std: 0.015
   --transformer-impl: local
-  --data-path: ${DATA_PATH}/my-t5_00_text_document
-  --vocab-file: ${DATA_PATH}/bert-large-cased-vocab.txt
+  --data-path: ${DATA_PATH}/text/the_pile/t5_shard00/my-t5_00_text_document
+  --vocab-file: ${DATA_PATH}/text/the_pile/t5_shard00/bert-large-cased-vocab.txt
   --tokenizer-type: BertWordPieceCase
   --calculate-per-token-loss: true
   --split: 99982,9,9
diff --git a/tests/functional_tests/test_cases/t5/t5_release/model_config.yaml b/tests/functional_tests/test_cases/t5/t5_release/model_config.yaml
index de1e2d982ec..d30207b5b51 100644
--- a/tests/functional_tests/test_cases/t5/t5_release/model_config.yaml
+++ b/tests/functional_tests/test_cases/t5/t5_release/model_config.yaml
@@ -37,7 +37,7 @@ MODEL_ARGS:
   --pipeline-model-parallel-size: 1
   # Data args
   --data-path: ${DATA_BLEND}
-  --vocab-file: ${DATA_PATH}/bert-large-cased-vocab.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/t5_shard00/bert-large-cased-vocab.txt
   --tokenizer-type: BertWordPieceCase
   --split: 99982,9,9
   --data-cache-path: ${DATA_CACHE_PATH}
diff --git a/tests/test_utils/python_scripts/generate_jet_trigger_job.py b/tests/test_utils/python_scripts/generate_jet_trigger_job.py
index 9c6edc05657..50d8598ae66 100644
--- a/tests/test_utils/python_scripts/generate_jet_trigger_job.py
+++ b/tests/test_utils/python_scripts/generate_jet_trigger_job.py
@@ -4,7 +4,7 @@
 import click
 import yaml
 
-from tests.test_utils.python_scripts import common
+from tests.test_utils.python_scripts import recipe_parser
 
 BASE_PATH = pathlib.Path(__file__).parent.resolve()
 
@@ -81,7 +81,7 @@ def main(
 ):
     list_of_test_cases = [
         test_case
-        for test_case in common.load_workloads(
+        for test_case in recipe_parser.load_workloads(
             scope=scope,
             container_tag=container_tag,
             environment=environment,
@@ -158,7 +158,7 @@ def main(
 
         for test_idx, test_case in enumerate(list_of_test_cases):
             job_tags = list(tags)
-            job_tags.append(f"cluster/{common.resolve_cluster_config(cluster)}")
+            job_tags.append(f"cluster/{recipe_parser.resolve_cluster_config(cluster)}")
 
             script = [
                 "export PYTHONPATH=$(pwd); "
diff --git a/tests/test_utils/python_scripts/generate_local_jobs.py b/tests/test_utils/python_scripts/generate_local_jobs.py
index 6a16af24a30..4a7cf2d7c13 100644
--- a/tests/test_utils/python_scripts/generate_local_jobs.py
+++ b/tests/test_utils/python_scripts/generate_local_jobs.py
@@ -11,7 +11,7 @@
 import click
 import yaml
 
-from tests.test_utils.python_scripts import common
+from tests.test_utils.python_scripts import recipe_parser
 
 
 def load_script(config_path: str) -> str:
@@ -68,7 +68,7 @@ def main(
     enable_lightweight_mode: bool = False,
     record_checkpoints: bool = False,
 ):
-    workloads = common.load_workloads(
+    workloads = recipe_parser.load_workloads(
         container_image="none",
         scope=scope,
         model=model,
@@ -77,6 +77,8 @@ def main(
         container_tag="none",
     )
 
+    print(workloads)
+
     for workload in workloads:
         if workload.type == "build":
             continue
diff --git a/tests/test_utils/python_scripts/launch_jet_workload.py b/tests/test_utils/python_scripts/launch_jet_workload.py
index 254f522c6fb..0e3ed179f4a 100644
--- a/tests/test_utils/python_scripts/launch_jet_workload.py
+++ b/tests/test_utils/python_scripts/launch_jet_workload.py
@@ -17,7 +17,7 @@
 from jetclient.facades.objects import log as jet_log
 from jetclient.services.dtos.pipeline import PipelineStatus
 
-from tests.test_utils.python_scripts import common
+from tests.test_utils.python_scripts import recipe_parser
 
 BASE_PATH = pathlib.Path(__file__).parent.resolve()
 DASHBOARD_ENDPOINT = os.getenv("DASHBOARD_ENDPOINT")
@@ -70,7 +70,7 @@ def launch_and_wait_for_completion(
             ).workloads.submit(
                 workloads=[
                     jetclient.JETWorkloadManifest(**workload)
-                    for workload in common.load_workloads(
+                    for workload in recipe_parser.load_workloads(
                         test_case=test_case,
                         n_repeat=n_repeat,
                         time_limit=(1200 if enable_lightweight_mode else time_limit),
@@ -83,7 +83,7 @@ def launch_and_wait_for_completion(
                         record_checkpoints=record_checkpoints,
                     )
                 ],
-                config_id=f"mcore/{common.resolve_cluster_config(cluster)}",
+                config_id=f"mcore/{recipe_parser.resolve_cluster_config(cluster)}",
                 custom_config={
                     "launchers": {cluster: cluster_config},
                     "executors": {
@@ -116,7 +116,7 @@ def launch_and_wait_for_completion(
                     },
                     "outputs": {
                         "enabled": True,
-                        "artifacts_storages": [common.resolve_artifact_config(cluster)],
+                        "artifacts_storages": [recipe_parser.resolve_artifact_config(cluster)],
                     },
                 },
                 wait_for_validation=True,
diff --git a/tests/test_utils/python_scripts/launch_nemo_run_workload.py b/tests/test_utils/python_scripts/launch_nemo_run_workload.py
index d0ba6c4fe85..1aa1c560052 100644
--- a/tests/test_utils/python_scripts/launch_nemo_run_workload.py
+++ b/tests/test_utils/python_scripts/launch_nemo_run_workload.py
@@ -1,10 +1,16 @@
+import logging
 import os
 import pathlib
+import sys
+from typing import Optional
 
 import click
 import nemo_run as run
 
-from tests.test_utils.python_scripts import common
+from tests.test_utils.python_scripts import recipe_parser
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
 
 
 @click.command()
@@ -13,8 +19,20 @@
 @click.option("--test-case", required=True, type=str, help="Test case of the workload")
 @click.option("--environment", required=True, type=str, help="Environment of the workload")
 @click.option("--platform", required=True, type=str, help="Platform of the workload")
-def main(scope, model, test_case, environment, platform):
-    workloads = common.load_workloads(
+@click.option("--container-image", required=True, type=str, help="Container image of the workload")
+@click.option("--data-dir", required=False, type=str, help="Data directory of the workload")
+@click.option("--tag", required=False, type=str, help="Tag of the workload")
+def main(
+    scope,
+    model,
+    test_case,
+    environment,
+    platform,
+    container_image,
+    data_dir: Optional[str] = None,
+    tag: Optional[str] = None,
+):
+    workloads = recipe_parser.load_workloads(
         container_image="none",
         scope=scope,
         model=model,
@@ -22,17 +40,17 @@ def main(scope, model, test_case, environment, platform):
         environment=environment,
         container_tag="none",
         platform=platform,
+        tag=tag,
     )
 
     workloads = [workload for workload in workloads if workload.type != "build"]
 
-    print(workloads)
     assert len(workloads) == 1, f"Expected exactly one workload, got {len(workloads)}"
 
     workload = workloads[0]
     magic_values = dict(workload.spec)
-    magic_values["assets_dir"] = "$OUTPUT_PATH"
-    magic_values["artifacts_dir"] = "$OUTPUT_PATH"
+    magic_values["assets_dir"] = "/opt/megatron-lm/assets_dir"
+    magic_values["artifacts_dir"] = "/opt/megatron-lm/artifacts_dir"
     magic_values["environment"] = environment
     magic_values["test_case"] = workload.spec["test_case"]
     magic_values["name"] = workload.spec["name"].format(**magic_values)
@@ -40,17 +58,13 @@ def main(scope, model, test_case, environment, platform):
 
     inline_script = run.Script(inline=workload.spec["script"])
 
-    artifacts = [
-        "{host_path}:{mount_path}".format(
-            mount_path=mount_path, host_path=str(pathlib.Path("/root") / host_path)
-        )
-        for mount_path, host_path in workload.spec["artifacts"].items()
-    ]
+    artifacts = []
     artifacts.append(f"{os.getcwd()}:/opt/megatron-lm")
-    print(artifacts)
+    if data_dir:
+        artifacts.append(f"{pathlib.Path(data_dir)}:/mnt/artifacts")
 
     executor = run.DockerExecutor(
-        container_image="megatron-core",
+        container_image=container_image,
         num_gpus=-1,
         runtime="nvidia",
         ipc_mode="host",
@@ -59,15 +73,23 @@ def main(scope, model, test_case, environment, platform):
             "PYTHONUNBUFFERED": "1",
             "OUTPUT_PATH": os.getcwd(),
             "ENABLE_LIGHTWEIGHT_MODE": "true",
+            "N_REPEAT": "1",
         },
         packager=run.Packager(),
         volumes=artifacts,
     )
-    with run.Experiment("docker-experiment", executor=executor, log_level="INFO") as exp:
+    with run.Experiment("mcore-ci-test", executor=executor, log_level="INFO") as exp:
         _ = exp.add([inline_script], tail_logs=False, name="task-1")
 
+        exp.dryrun(log=True)
         exp.run(detach=False, tail_logs=True, sequential=False)
 
+    result_dict = exp.status(return_dict=True)
+    _, job_dict = list(result_dict.items())[0]
+
+    logger.info(f"Job status: {job_dict["status"]}")
+    sys.exit(0 if str(job_dict["status"]) == "SUCCEEDED" else 1)
+
 
 if __name__ == "__main__":
     main()
diff --git a/tests/test_utils/python_scripts/notify.py b/tests/test_utils/python_scripts/notify.py
index 4cff0db7f6e..7da00dc401a 100644
--- a/tests/test_utils/python_scripts/notify.py
+++ b/tests/test_utils/python_scripts/notify.py
@@ -22,17 +22,6 @@ def get_gitlab_handle():
     return gitlab.Gitlab(f"https://{GITLAB_ENDPOINT}", private_token=os.getenv("RO_API_TOKEN"))
 
 
-def extract_surrounding_text(text, keyword="error", context=400, fallback_length=800):
-    index = text.rfind(keyword)  # Find the last occurrence
-    if index == -1:
-        return text[-fallback_length:]  # Return last 800 chars if keyword is not found
-
-    start = max(0, index - context)  # Ensure we don't go below 0
-    end = min(len(text), index + len(keyword))  # Ensure we don't exceed the text length
-
-    return text[start:end]
-
-
 def get_jobs_per_bridge(pipeline_id: int, type_of_job: str):
     bridge = {}
     for pipeline_bridge in (
diff --git a/tests/test_utils/python_scripts/common.py b/tests/test_utils/python_scripts/recipe_parser.py
similarity index 89%
rename from tests/test_utils/python_scripts/common.py
rename to tests/test_utils/python_scripts/recipe_parser.py
index 23c191cc399..e26d04d6f20 100644
--- a/tests/test_utils/python_scripts/common.py
+++ b/tests/test_utils/python_scripts/recipe_parser.py
@@ -1,12 +1,16 @@
 import copy
 import itertools
+import logging
 import pathlib
 from typing import List, Optional
 
+import click
 import yaml
 
 BASE_PATH = pathlib.Path(__file__).parent.resolve()
 
+logger = logging.getLogger(__name__)
+
 
 class dotdict(dict):
     """dot.notation access to dictionary attributes"""
@@ -25,6 +29,8 @@ def resolve_cluster_config(cluster: str) -> str:
         return "draco-oci-ord"
     if cluster == "dgxh100_coreweave":
         return "coreweave"
+    if cluster == "ghci":
+        return "ghci"
     raise ValueError(f"Unknown cluster {cluster} provided.")
 
 
@@ -95,15 +101,15 @@ def filter_by_test_case(workload_manifests: List[dotdict], test_case: str) -> Op
     workload_manifests = list(
         workload_manifest
         for workload_manifest in workload_manifests
-        if workload_manifest.spec["test_case"] == test_case
+        if workload_manifest["spec"]["test_case"] == test_case
     )
 
     if len(workload_manifests) > 1:
-        print("Duplicate test_case found!")
+        logger.info("Duplicate test_case found!")
         return None
 
     if len(workload_manifests) == 0:
-        print("No test_case found!")
+        logger.info("No test_case found!")
         return None
 
     return workload_manifests[0]
@@ -118,7 +124,7 @@ def filter_by_scope(workload_manifests: List[dotdict], scope: str) -> List[dotdi
     )
 
     if len(workload_manifests) == 0:
-        print("No test_case found!")
+        logger.info("No test_case found!")
         return []
 
     return workload_manifests
@@ -136,7 +142,7 @@ def filter_by_environment(workload_manifests: List[dotdict], environment: str) -
     )
 
     if len(workload_manifests_copy) == 0:
-        print("No test_case found!")
+        logger.info("No test_case found!")
         return []
 
     return workload_manifests_copy
@@ -153,7 +159,7 @@ def filter_by_platform(workload_manifests: List[dotdict], platform: str) -> List
     )
 
     if len(workload_manifests) == 0:
-        print("No test_case found!")
+        logger.info("No test_case found!")
         return []
 
     return workload_manifests
@@ -168,7 +174,7 @@ def filter_by_model(workload_manifests: List[dotdict], model: str) -> List[dotdi
     )
 
     if len(workload_manifests) == 0:
-        print("No test_case found!")
+        logger.info("No test_case found!")
         return []
 
     return workload_manifests
@@ -184,7 +190,7 @@ def filter_by_tag(workload_manifests: List[dotdict], tag: str) -> List[dotdict]:
     )
 
     if len(workload_manifests) == 0:
-        print("No test_case found!")
+        logger.info("No test_case found!")
         return []
 
     return workload_manifests
@@ -200,7 +206,7 @@ def filter_by_test_cases(workload_manifests: List[dotdict], test_cases: str) ->
     )
 
     if len(workload_manifests) == 0:
-        print("No test_case found!")
+        logger.info("No test_case found!")
         return []
 
     return workload_manifests
@@ -269,7 +275,9 @@ def load_workloads(
         workload.spec["artifacts"] = {
             key: value.replace(r"{platforms}", workload.spec["platforms"])
             for key, value in (
-                workload.spec["artifacts"].items() if "artifacts" in workload.spec else {}
+                workload.spec["artifacts"].items()
+                if "artifacts" in workload.spec and workload.spec["artifacts"] is not None
+                else {}
             )
         }
 
@@ -288,9 +296,16 @@ def load_workloads(
     return workloads
 
 
-if __name__ == "__main__":
-    workflows = load_workloads(container_tag="main")
+@click.command()
+@click.option("--model", required=False, type=str, default=None, help="Model to select")
+@click.option("--test-case", required=False, type=str, default=None, help="Test case to select")
+def main(model: Optional[str], test_case: Optional[str]):
+    workflows = load_workloads(container_tag="main", model=model, test_case=test_case)
     # Save workflows to YAML file
     output_file = "workflows.yaml"
     with open(output_file, "w") as f:
         yaml.dump([dict(workflow) for workflow in workflows], f)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/test_utils/recipes/common.yaml b/tests/test_utils/recipes/ckpt_converter.yaml
similarity index 100%
rename from tests/test_utils/recipes/common.yaml
rename to tests/test_utils/recipes/ckpt_converter.yaml
diff --git a/tests/test_utils/recipes/gpt-dynamic-inference-cuda-graphs.yaml b/tests/test_utils/recipes/gpt-dynamic-inference-cuda-graphs.yaml
index e96bcaa4ee7..dd90bc38e88 100644
--- a/tests/test_utils/recipes/gpt-dynamic-inference-cuda-graphs.yaml
+++ b/tests/test_utils/recipes/gpt-dynamic-inference-cuda-graphs.yaml
@@ -11,8 +11,7 @@ spec:
   n_repeat: 1
   platforms: dgx_a100
   artifacts:
-    /workspace/data/mcore_mistral/model: model/mcore_mistral/nemo_minitron-0.5b/v1
-    /workspace/data/mcore_mistral/tokenizer: model/mcore_mistral/nemo_minitron-0.5b/v1
+    /workspace/data/model/mcore_mistral: model/mcore_mistral/nemo_minitron-0.5b/v1
   script_setup: |
     unset https_proxy
     echo "machine gitlab-master.nvidia.com login okoenig password $RO_API_TOKEN" | tee -a /root/.netrc
@@ -44,7 +43,7 @@ spec:
       --tee "0:3,7:3" \
       --redirects "3" \
       --nproc_per_node 1 \
-      tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_validation/cuda_graphs.py --checkpoint-dir /workspace/data/mcore_mistral/model --tokenizer-model /workspace/data/mcore_mistral/tokenizer/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json
+      tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_validation/cuda_graphs.py --checkpoint-dir /workspace/data/model/mcore_mistral --tokenizer-model /workspace/data/model/mcore_mistral/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json
 
 products:
   - test_case: [gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_validation]
diff --git a/tests/test_utils/recipes/gpt-dynamic-inference-with-coordinator.yaml b/tests/test_utils/recipes/gpt-dynamic-inference-with-coordinator.yaml
index b276ac66d85..56ecdabcded 100644
--- a/tests/test_utils/recipes/gpt-dynamic-inference-with-coordinator.yaml
+++ b/tests/test_utils/recipes/gpt-dynamic-inference-with-coordinator.yaml
@@ -11,8 +11,7 @@ spec:
   n_repeat: 1
   platforms: dgx_a100
   artifacts:
-    /workspace/data/mcore_mistral/model: model/mcore_mistral/nemo_minitron-0.5b/v1
-    /workspace/data/mcore_mistral/tokenizer: model/mcore_mistral/nemo_minitron-0.5b/v1
+    /workspace/data/model/mcore_mistral: model/mcore_mistral/nemo_minitron-0.5b/v1
   script_setup: |
     unset https_proxy
     echo "machine gitlab-master.nvidia.com login okoenig password $RO_API_TOKEN" | tee -a /root/.netrc
@@ -40,9 +39,9 @@ spec:
     cd /opt/megatron-lm
 
     ARGUMENTS=(
-        "CHECKPOINT_LOAD_PATH=/workspace/data"
+        "CHECKPOINT_LOAD_PATH=/mnt/artifacts"
         "CHECKPOINT_SAVE_PATH=/tmp/checkpoints"
-        "DATA_PATH=/workspace/data"
+        "DATA_PATH=null"
         "DATA_CACHE_PATH=/workspace/data/cache"
         "TRAINING_SCRIPT_PATH=examples/inference/gpt/gpt_dynamic_inference_with_coordinator.py"
         "TRAINING_PARAMS_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml"
@@ -50,7 +49,7 @@ spec:
         "OUTPUT_PATH={assets_dir}"
         "TENSORBOARD_PATH={assets_dir}/generations_{environment}_$CLUSTER.json"
         "N_REPEAT={n_repeat}"
-        "ENABLE_LIGHTWEIGHT_MODE=false"
+        "ENABLE_LIGHTWEIGHT_MODE=${{ENABLE_LIGHTWEIGHT_MODE}}"
         "RECORD_CHECKPOINTS=${{RECORD_CHECKPOINTS}}"
     )
 
@@ -65,5 +64,5 @@ products:
   - test_case: [gpt_dynamic_inference_tp1_pp1_dp8_583m_logitsmatch_zmq]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
diff --git a/tests/test_utils/recipes/gpt-dynamic-inference.yaml b/tests/test_utils/recipes/gpt-dynamic-inference.yaml
index cd7bfd3fbec..914d3c0a757 100644
--- a/tests/test_utils/recipes/gpt-dynamic-inference.yaml
+++ b/tests/test_utils/recipes/gpt-dynamic-inference.yaml
@@ -10,10 +10,6 @@ spec:
   gpus: 1
   n_repeat: 1
   platforms: dgx_a100
-  artifacts:
-    /workspace/data/mcore_mistral/model: model/mcore_mistral/nemo_minitron-0.5b/v1
-    /workspace/data/mcore_mistral/tokenizer: model/mcore_mistral/nemo_minitron-0.5b/v1
-    /workspace/data/sharegpt/filtered-benchmark: text/sharegpt-vicuna/filtered
   script_setup: |
     unset https_proxy
     echo "machine gitlab-master.nvidia.com login okoenig password $RO_API_TOKEN" | tee -a /root/.netrc
@@ -41,17 +37,17 @@ spec:
     cd /opt/megatron-lm
 
     ARGUMENTS=(
-        "CHECKPOINT_LOAD_PATH=/workspace/data"
+        "CHECKPOINT_LOAD_PATH=/mnt/artifacts"
         "CHECKPOINT_SAVE_PATH=/tmp/checkpoints"
-        "DATA_PATH=/workspace/data"
-        "DATA_CACHE_PATH=/workspace/data/cache"
+        "DATA_PATH=/mnt/artifacts/"
+        "DATA_CACHE_PATH=/workspace/data/cache" 
         "TRAINING_SCRIPT_PATH=examples/inference/gpt/gpt_dynamic_inference.py"
         "TRAINING_PARAMS_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml"
         "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_{platforms}.json"
         "OUTPUT_PATH={assets_dir}"
         "TENSORBOARD_PATH={assets_dir}/generations_{environment}_$CLUSTER.json"
         "N_REPEAT={n_repeat}"
-        "ENABLE_LIGHTWEIGHT_MODE=false"
+        "ENABLE_LIGHTWEIGHT_MODE=${{ENABLE_LIGHTWEIGHT_MODE}}"
         "RECORD_CHECKPOINTS=${{RECORD_CHECKPOINTS}}"
     )
 
@@ -61,17 +57,17 @@ products:
   - test_case: [gpt_dynamic_inference_tp1_pp1_583m_logitsmatch]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
   - test_case: [gpt_dynamic_inference_tp8_pp1_583m_logitsmatch]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
   - test_case: [gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
   - test_case: [gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_logitsmatch_decode_graphs_only]
     products:
diff --git a/tests/test_utils/recipes/gpt-grads.yaml b/tests/test_utils/recipes/gpt-grads.yaml
index ea569362311..205985d5e13 100644
--- a/tests/test_utils/recipes/gpt-grads.yaml
+++ b/tests/test_utils/recipes/gpt-grads.yaml
@@ -11,10 +11,7 @@ spec:
   n_repeat: 1
   platforms: dgx_h100
   artifacts:
-    /workspace/data/gpt3_data: text/the_pile/shard00
-    /workspace/checkpoints/gpt3_mr_mcore_reruns_resume_check_grads_dev: model/mcore_gpt/gpt3_4b_pyt/25.03.05_bf16_rerun-enabled_v2
-    /workspace/checkpoints/gpt3_mr_mcore_reruns_resume_check_grads_lts: model/mcore_gpt/gpt3_4b_pyt/25.03.05_bf16_rerun-enabled_v2
-    /workspace/checkpoints/gpt_teacher: model/gpt_dummy_pyt/ckpt/24.10.0_bf16_teacher
+    /mnt/artifacts/text/the_pile/shard00: text/the_pile/shard00
   script_setup: |
     unset https_proxy
     echo "machine gitlab-master.nvidia.com login okoenig password $RO_API_TOKEN" | tee -a /root/.netrc
@@ -47,12 +44,12 @@ spec:
     # Note: This test is very expensive, so we hardcode N_REPEAT=1
 
     ARGUMENTS=(
-        "DATA_PATH=/workspace/data/gpt3_data"
+        "DATA_PATH=/mnt/artifacts"
         "DATA_CACHE_PATH=/workspace/data/cache"
         "OUTPUT_PATH={assets_dir}"
         "TENSORBOARD_PATH={assets_dir}/tensorboard"
         "CHECKPOINT_SAVE_PATH={artifacts_dir}/checkpoints"
-        "CHECKPOINT_LOAD_PATH=/workspace/checkpoints/$NAME"
+        "CHECKPOINT_LOAD_PATH=/mnt/artifacts"
         "TRAINING_SCRIPT_PATH=pretrain_gpt.py"
         "TRAINING_PARAMS_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml"
         "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_$CLUSTER.json"
@@ -67,5 +64,5 @@ products:
   - test_case: [gpt3_mr_mcore_reruns_resume_check_grads]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
diff --git a/tests/test_utils/recipes/gpt-nemo.yaml b/tests/test_utils/recipes/gpt-nemo.yaml
index 848c1a56071..14c2106ed31 100644
--- a/tests/test_utils/recipes/gpt-nemo.yaml
+++ b/tests/test_utils/recipes/gpt-nemo.yaml
@@ -44,7 +44,7 @@ spec:
         "OUTPUT_PATH={assets_dir}"
         "TENSORBOARD_PATH={assets_dir}/tensorboard"
         "CHECKPOINT_SAVE_PATH={artifacts_dir}/checkpoints"
-        "CHECKPOINT_LOAD_PATH=/workspace/checkpoints/{name}"
+        "CHECKPOINT_LOAD_PATH=/mnt/artifacts/model/{name}"
         "TRAINING_SCRIPT_PATH=\"nemo llm pretrain -y --factory {nemo_model}\""
         "TRAINING_PARAMS_PATH=/opt/megatron-lm/tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml"
         "GOLDEN_VALUES_PATH=/opt/megatron-lm/tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_$CLUSTER.json"
@@ -57,36 +57,36 @@ products:
   - test_case: [llama3-nemo_8b_mr_mbs1_gbs8_mcore_te_8experts_tp2_ep2_pp2_dgx_a100_1N8G]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [deprecated]
         platforms: [dgx_h100]
         nemo_model: [llama3_8b]
   - test_case: [llama3-nemo_8b_mr_mbs4_gbs64_mcore_te_tp1_pp1_cp2_dgx_a100_1N8G]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [deprecated]
         platforms: [dgx_h100]
         nemo_model: [llama3_8b]
   - test_case: [mixtral-nemo_8x7b_mr_mbs1_gbs8_mcore_te_tp2_pp1_ep2_1N8G]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [deprecated]
         platforms: [dgx_h100]
         nemo_model: [mixtral_8x7b]
   - test_case: [gemma2-nemo_2b_mr_mbs1_gbs8_mcore_te_tp4_pp1_cp1_1N8G]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [deprecated]
         platforms: [dgx_h100]
         nemo_model: [gemma2_2b]
   - test_case: [bert-nemo_340m_mr_mbs2_gbs32_mcore_te_tp2_pp2_1N8G]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [deprecated]
         platforms: [dgx_h100]
         nemo_model: [bert_340m]
   - test_case: [t5-nemo_220m_mr_mbs4_gbs64_te_tp1_pp1_1N8G]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [deprecated]
         platforms: [dgx_h100]
         nemo_model: [t5_220m]
diff --git a/tests/test_utils/recipes/gpt-static-inference.yaml b/tests/test_utils/recipes/gpt-static-inference.yaml
index 424c424bbbf..9ed7f6c09f9 100644
--- a/tests/test_utils/recipes/gpt-static-inference.yaml
+++ b/tests/test_utils/recipes/gpt-static-inference.yaml
@@ -10,11 +10,6 @@ spec:
   gpus: 1
   n_repeat: 1
   platforms: dgx_a100
-  artifacts:
-    /workspace/data/mcore_mistral/model: model/mcore_mistral/nemo_minitron-0.5b/v1
-    /workspace/data/mcore_mistral/tokenizer: model/mcore_mistral/nemo_minitron-0.5b/v1
-    /workspace/data/deepseek_16b_pyt/model: model/deepseek_16b_pyt/dcp/mcore-v1_bf16
-    /workspace/data/deepseek_16b_pyt/tokenizer: model/deepseek_16b_pyt/dcp/mcore-v1_bf16
   script_setup: |
     unset https_proxy
     echo "machine gitlab-master.nvidia.com login okoenig password $RO_API_TOKEN" | tee -a /root/.netrc
@@ -42,17 +37,17 @@ spec:
     cd /opt/megatron-lm
 
     ARGUMENTS=(
-        "CHECKPOINT_LOAD_PATH=/workspace/data"
+        "CHECKPOINT_LOAD_PATH=/mnt/artifacts/"
         "CHECKPOINT_SAVE_PATH=/tmp/checkpoints"
-        "DATA_PATH=/workspace/data"
-        "DATA_CACHE_PATH=/workspace/data/cache"
+        "DATA_PATH=null"
+        "DATA_CACHE_PATH=/workspace/data/cache" 
         "TRAINING_SCRIPT_PATH=examples/inference/gpt/gpt_static_inference.py"
         "TRAINING_PARAMS_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml"
         "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_$CLUSTER.json"
         "OUTPUT_PATH={assets_dir}"
         "TENSORBOARD_PATH={assets_dir}/generations_{environment}_$CLUSTER.json"
         "N_REPEAT={n_repeat}"
-        "ENABLE_LIGHTWEIGHT_MODE=false"
+        "ENABLE_LIGHTWEIGHT_MODE=${{ENABLE_LIGHTWEIGHT_MODE}}"
         "RECORD_CHECKPOINTS=${{RECORD_CHECKPOINTS}}"
     )
 
@@ -62,20 +57,20 @@ products:
   - test_case: [gpt_static_inference_tp1_pp1_583m_logitsmatch]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
   - test_case: [gpt_static_inference_tp1_pp1_583m_cudagraphs]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
   - test_case: [gpt_static_inference_tp1_pp1_583m_fp8_cudagraphs]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
   - test_case: [gpt_static_inference_tp1_pp1_16b_multiprompt_tokensmatch]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
diff --git a/tests/test_utils/recipes/gpt.yaml b/tests/test_utils/recipes/gpt.yaml
index b29fc21e877..5eb29ac2605 100644
--- a/tests/test_utils/recipes/gpt.yaml
+++ b/tests/test_utils/recipes/gpt.yaml
@@ -10,19 +10,6 @@ spec:
   gpus: 8
   n_repeat: 5
   platforms: dgx_a100
-  artifacts:
-    /workspace/data/gpt3_data: text/the_pile/shard00
-    /workspace/checkpoints/gpt3_mr_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G_dev: model/mcore_gpt/gpt3_mr_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone_{platforms}_1N8G_dev/24475828
-    /workspace/checkpoints/gpt3_mr_mcore_te_tp1_pp1_frozen_resume_torch_dist_dist_optimizer_dgx_a100_1N8G_dev: model/mcore_gpt/gpt3_mr_mcore_te_tp1_pp1_frozen_resume_torch_dist_dist_optimizer_{platforms}_1N8G_dev/28359448
-    /workspace/checkpoints/gpt3_mr_mcore_te_tp1_pp4_frozen_resume_torch_dist_swiglu_dgx_a100_1N8G_dev: model/mcore_gpt/gpt3_mr_mcore_te_tp1_pp4_frozen_resume_torch_dist_swiglu_{platforms}_1N8G_dev/28359448
-    /workspace/checkpoints/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G_dev: model/mcore_gpt/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_cp2_nondeterministic_{platforms}_1N8G_dev/28359448
-    /workspace/checkpoints/gpt3_mr_mcore_reruns_resume_dev: model/mcore_gpt/gpt3_4b_pyt/25.03.05_bf16_rerun-enabled_v2
-    /workspace/checkpoints/gpt3_mr_mcore_reruns_resume_lts: model/mcore_gpt/gpt3_4b_pyt/25.03.05_bf16_rerun-enabled_v2
-    /workspace/checkpoints/gpt3_mr_mcore_reruns_reshard_dev: model/mcore_gpt/gpt3_4b_pyt/25.03.05_bf16_rerun-enabled_v2
-    /workspace/checkpoints/gpt3_mr_mcore_reruns_reshard_lts: model/mcore_gpt/gpt3_4b_pyt/25.03.05_bf16_rerun-enabled_v2
-    /workspace/checkpoints/gpt3_mr_mcore_reruns_persistent_2_dev: model/mcore_gpt/gpt3_4b_pyt/25.03.05_bf16_rerun-persistent_v2
-    /workspace/checkpoints/gpt3_mr_mcore_reruns_persistent_2_lts: model/mcore_gpt/gpt3_4b_pyt/25.03.05_bf16_rerun-persistent_v2
-    /workspace/checkpoints/gpt_teacher: model/gpt_dummy_pyt/ckpt/24.10.0_bf16_teacher
   script_setup: |
     unset https_proxy
     echo "machine gitlab-master.nvidia.com login okoenig password $RO_API_TOKEN" | tee -a /root/.netrc
@@ -53,12 +40,12 @@ spec:
     NAME=$(echo {test_case}_{environment} | sed 's/dgx_h100/dgx_a100/g')
 
     ARGUMENTS=(
-        "DATA_PATH=/workspace/data/gpt3_data"
+        "DATA_PATH=/mnt/artifacts"
         "DATA_CACHE_PATH=/workspace/data/cache"
         "OUTPUT_PATH={assets_dir}"
         "TENSORBOARD_PATH={assets_dir}/tensorboard"
         "CHECKPOINT_SAVE_PATH={artifacts_dir}/checkpoints"
-        "CHECKPOINT_LOAD_PATH=/workspace/checkpoints/$NAME"
+        "CHECKPOINT_LOAD_PATH=/mnt/artifacts/"
         "TRAINING_SCRIPT_PATH=pretrain_gpt.py"
         "TRAINING_PARAMS_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml"
         "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_$CLUSTER.json"
@@ -192,7 +179,7 @@ products:
   - test_case: [gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
@@ -248,21 +235,21 @@ products:
   - test_case: [gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
   - test_case: [gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
   - test_case: [gpt3_mr_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split_dgx_a100_1N8G]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
@@ -283,55 +270,55 @@ products:
   - test_case: [gpt3_mr_mcore_te_tp2_pp2_cp2_dgx_a100_1N8G]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
   - test_case: [gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_dp_last_dgx_a100_1N8G]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
   - test_case: [gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_dgx_a100_1N8G]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
   - test_case: [gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last_dgx_a100_1N8G]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
   - test_case: [gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
   - test_case: [gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last_dgx_a100_1N8G]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
   - test_case: [gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic_dgx_a100_1N8G]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
   - test_case: [gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last_dgx_a100_1N8G]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
   - test_case: [gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
@@ -353,7 +340,7 @@ products:
   - test_case: [gpt3_mr_mcore_te_tp2_pp2_mla_dgx_a100_1N8G]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
   - test_case: [gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G]
     products:
@@ -407,21 +394,21 @@ products:
   - test_case: [gpt3_mr_mcore_tp2_pp2_uninstall_te_dgx_a100_1N8G]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
   - test_case: [gpt3_7b_mr_dgx_a100_1N8G_tp1_pp4_memory_speed]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       # - environment: [lts]
       #   scope: [nightly] # OOM: #434
   - test_case: [gpt3_7b_mr_dgx_a100_1N8G_tp4_pp1_memory_speed]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       # - environment: [lts]
       #   scope: [nightly] # OOM: #434
@@ -451,7 +438,7 @@ products:
   - test_case: [gpt3_mr_mcore_reruns_persistent_1]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       # - environment: [lts]
       #   scope: [nightly]
@@ -467,7 +454,7 @@ products:
       - environment: [lts]
         scope: [mr]
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [dev]
         scope: [mr-slim]
@@ -475,7 +462,7 @@ products:
   - test_case: [gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [mr]
@@ -484,7 +471,7 @@ products:
       - environment: [lts]
         scope: [mr]
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [dev]
         scope: [mr-slim]
diff --git a/tests/test_utils/recipes/mamba-static-inference.yaml b/tests/test_utils/recipes/mamba-static-inference.yaml
index f0e29999d43..a4eaecaa53e 100644
--- a/tests/test_utils/recipes/mamba-static-inference.yaml
+++ b/tests/test_utils/recipes/mamba-static-inference.yaml
@@ -39,9 +39,9 @@ spec:
     cd /opt/megatron-lm
 
     ARGUMENTS=(
-        "CHECKPOINT_LOAD_PATH=/workspace/data"
+        "CHECKPOINT_LOAD_PATH=/mnt/artifacts"
         "CHECKPOINT_SAVE_PATH=/tmp/checkpoints"
-        "DATA_PATH=/workspace/data"
+        "DATA_PATH=null"
         "DATA_CACHE_PATH=/workspace/data/cache"
         "TRAINING_SCRIPT_PATH=examples/inference/gpt/gpt_static_inference.py"
         "TRAINING_PARAMS_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml"
@@ -49,7 +49,7 @@ spec:
         "OUTPUT_PATH={assets_dir}"
         "TENSORBOARD_PATH={assets_dir}/generations_{environment}_$CLUSTER.json"
         "N_REPEAT={n_repeat}"
-        "ENABLE_LIGHTWEIGHT_MODE=false"
+        "ENABLE_LIGHTWEIGHT_MODE=${{ENABLE_LIGHTWEIGHT_MODE}}"
         "RECORD_CHECKPOINTS=${{RECORD_CHECKPOINTS}}"
     )
 
@@ -59,10 +59,10 @@ products:
   - test_case: [hybrid_static_inference_tp1_pp1_2B_logitsmatch]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
   - test_case: [hybrid_static_inference_tp1_pp1_2B_cudagraphs]
     products:
       - environment: [dev]
         scope: [mr]
-        platforms: [dgx_h100]
+        platforms: [dg  x_h100]
diff --git a/tests/test_utils/recipes/mamba.yaml b/tests/test_utils/recipes/mamba.yaml
index 7c1f9a3627f..0f8a4085ea5 100644
--- a/tests/test_utils/recipes/mamba.yaml
+++ b/tests/test_utils/recipes/mamba.yaml
@@ -10,8 +10,6 @@ spec:
   gpus: 8
   n_repeat: 5
   platforms: dgx_a100
-  artifacts:
-    /workspace/data/gpt3_data: text/the_pile/shard00
   script_setup: |
     unset https_proxy
     echo "machine gitlab-master.nvidia.com login okoenig password $RO_API_TOKEN" | tee -a /root/.netrc
@@ -40,12 +38,12 @@ spec:
     cd /opt/megatron-lm
 
     ARGUMENTS=(
-        "DATA_PATH=/workspace/data/gpt3_data"
+        "DATA_PATH=/mnt/artifacts"
         "DATA_CACHE_PATH=/workspace/data/cache"
         "OUTPUT_PATH={assets_dir}"
         "TENSORBOARD_PATH={assets_dir}/tensorboard"
         "CHECKPOINT_SAVE_PATH={artifacts_dir}/checkpoints"
-        "CHECKPOINT_LOAD_PATH=/workspace/checkpoints/{name}"
+        "CHECKPOINT_LOAD_PATH=/mnt/artifacts/model/{name}"
         "TRAINING_SCRIPT_PATH=pretrain_mamba.py"
         "TRAINING_PARAMS_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml"
         "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_$CLUSTER.json"
@@ -60,7 +58,7 @@ products:
   - test_case: [hybrid_mr_mcore_te_tp1_pp1_cp1_dgx_a100_1N8G]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       # - environment: [lts] # disabled until triton is bumped
       #   scope: [nightly]
@@ -76,7 +74,7 @@ products:
   - test_case: [hybrid_mr_mcore_te_tp2_pp1_cp1_dgx_a100_1N8G]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       # - environment: [lts] # disabled until triton is bumped
       #   scope: [nightly]
@@ -84,7 +82,7 @@ products:
   - test_case: [hybrid_mr_mcore_te_tp2_pp1_cp4_dgx_a100_1N8G]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       # - environment: [lts] # disabled until triton is bumped
       #   scope: [nightly]
diff --git a/tests/test_utils/recipes/mimo.yaml b/tests/test_utils/recipes/mimo.yaml
index dfde82656dc..41e735776f9 100644
--- a/tests/test_utils/recipes/mimo.yaml
+++ b/tests/test_utils/recipes/mimo.yaml
@@ -11,7 +11,7 @@ spec:
   platforms: dgx_h100
   artifacts:
     /workspace/data/llava_pretrain_energon: mixed/mcore_mimo_vlm/llava_pretrain_energon
-    /workspace/checkpoints/vicuna_7b_pyt/dcp/mcore-v1.5_fp32: model/vicuna_7b_pyt/dcp/mcore-v1.5_fp32
+    /mnt/artifacts/model/vicuna_7b_pyt/dcp/mcore-v1.5_fp32: model/vicuna_7b_pyt/dcp/mcore-v1.5_fp32
   time_limit:
   n_repeat:
   test_case:
@@ -44,12 +44,12 @@ spec:
     cd /opt/megatron-lm
     NAME=$(echo {test_case}_{environment} | sed 's/dgx_h100/dgx_a100/g')
     ARGUMENTS=(
-        "DATA_PATH='/workspace/data/llava_pretrain_energon/'"
-        "DATA_CACHE_PATH='-'"
+        "DATA_PATH=/mnt/artifacts"
+        "DATA_CACHE_PATH=/workspace/data/cache"
         "OUTPUT_PATH={assets_dir}"
         "TENSORBOARD_PATH={assets_dir}/tensorboard"
         "CHECKPOINT_SAVE_PATH={artifacts_dir}/checkpoints"
-        "CHECKPOINT_LOAD_PATH=/workspace/checkpoints/{name}/checkpoints"
+        "CHECKPOINT_LOAD_PATH=/mnt/artifacts"
         "TRAINING_SCRIPT_PATH=./examples/mimo/train.py"
         "TRAINING_PARAMS_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml"
         "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_$CLUSTER.json"
diff --git a/tests/test_utils/recipes/moe-dynamic-inference.yaml b/tests/test_utils/recipes/moe-dynamic-inference.yaml
index 36d09cb36c4..c9d1be57add 100644
--- a/tests/test_utils/recipes/moe-dynamic-inference.yaml
+++ b/tests/test_utils/recipes/moe-dynamic-inference.yaml
@@ -10,9 +10,6 @@ spec:
   gpus: 8
   n_repeat: 1
   platforms: dgx_a100
-  artifacts:
-    /workspace/data/deepseek_16b_pyt/model: model/deepseek_16b_pyt/dcp/mcore-v1_bf16
-    /workspace/data/deepseek_16b_pyt/tokenizer: model/deepseek_16b_pyt/dcp/mcore-v1_bf16
   script_setup: |
     unset https_proxy
     echo "machine gitlab-master.nvidia.com login okoenig password $RO_API_TOKEN" | tee -a /root/.netrc
@@ -40,9 +37,9 @@ spec:
     cd /opt/megatron-lm
 
     ARGUMENTS=(
-        "CHECKPOINT_LOAD_PATH=/workspace/data"
+        "CHECKPOINT_LOAD_PATH=/mnt/artifacts"
         "CHECKPOINT_SAVE_PATH=/tmp/checkpoints"
-        "DATA_PATH=/workspace/data"
+        "DATA_PATH=null"
         "DATA_CACHE_PATH=/workspace/data/cache"
         "TRAINING_SCRIPT_PATH=examples/inference/gpt/gpt_dynamic_inference.py"
         "TRAINING_PARAMS_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml"
@@ -50,7 +47,7 @@ spec:
         "OUTPUT_PATH={assets_dir}"
         "TENSORBOARD_PATH={assets_dir}/generations_{environment}_$CLUSTER.json"
         "N_REPEAT={n_repeat}"
-        "ENABLE_LIGHTWEIGHT_MODE=false"
+        "ENABLE_LIGHTWEIGHT_MODE=${{ENABLE_LIGHTWEIGHT_MODE}}"
         "RECORD_CHECKPOINTS=${{RECORD_CHECKPOINTS}}"
     )
 
@@ -60,7 +57,7 @@ products:
   - test_case: [gpt_dynamic_inference_tp4_pp1_ep4_16B_logitsmatch]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
   - test_case: [gpt_dynamic_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch]
     products:
diff --git a/tests/test_utils/recipes/moe-static-inference.yaml b/tests/test_utils/recipes/moe-static-inference.yaml
index c1411283ad9..f2f98fbc146 100644
--- a/tests/test_utils/recipes/moe-static-inference.yaml
+++ b/tests/test_utils/recipes/moe-static-inference.yaml
@@ -11,8 +11,6 @@ spec:
   n_repeat: 1
   platforms: dgx_a100
   artifacts:
-    /workspace/data/deepseek_16b_pyt/model: model/deepseek_16b_pyt/dcp/mcore-v1_bf16
-    /workspace/data/deepseek_16b_pyt/tokenizer: model/deepseek_16b_pyt/dcp/mcore-v1_bf16
   script_setup: |
     unset https_proxy
     echo "machine gitlab-master.nvidia.com login okoenig password $RO_API_TOKEN" | tee -a /root/.netrc
@@ -40,9 +38,9 @@ spec:
     cd /opt/megatron-lm
 
     ARGUMENTS=(
-        "CHECKPOINT_LOAD_PATH=/workspace/data"
+        "CHECKPOINT_LOAD_PATH=/mnt/artifacts"
         "CHECKPOINT_SAVE_PATH=/tmp/checkpoints"
-        "DATA_PATH=/workspace/data"
+        "DATA_PATH=null"
         "DATA_CACHE_PATH=/workspace/data/cache"
         "TRAINING_SCRIPT_PATH=examples/inference/gpt/gpt_static_inference.py"
         "TRAINING_PARAMS_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml"
@@ -50,7 +48,7 @@ spec:
         "OUTPUT_PATH={assets_dir}"
         "TENSORBOARD_PATH={assets_dir}/generations_{environment}_$CLUSTER.json"
         "N_REPEAT={n_repeat}"
-        "ENABLE_LIGHTWEIGHT_MODE=false"
+        "ENABLE_LIGHTWEIGHT_MODE=${{ENABLE_LIGHTWEIGHT_MODE}}"
         "RECORD_CHECKPOINTS=${{RECORD_CHECKPOINTS}}"
     )
 
diff --git a/tests/test_utils/recipes/moe.yaml b/tests/test_utils/recipes/moe.yaml
index ddfb8d1980b..fd8f00c242f 100644
--- a/tests/test_utils/recipes/moe.yaml
+++ b/tests/test_utils/recipes/moe.yaml
@@ -10,10 +10,6 @@ spec:
   gpus: 8
   n_repeat: 5
   platforms: dgx_a100
-  artifacts:
-    /workspace/data/gpt3_data: text/the_pile/shard00
-    /workspace/checkpoints/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_groupedGEMM_dgx_a100_1N8G_dev: model/mcore_gpt/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_groupedGEMM_{platforms}_1N8G_dev/28359448
-    /workspace/checkpoints/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G_dev: model/mcore_gpt/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_dist_optimizer_{platforms}_1N8G_dev/28359448
   script_setup: |
     unset https_proxy
     echo "machine gitlab-master.nvidia.com login okoenig password $RO_API_TOKEN" | tee -a /root/.netrc
@@ -44,12 +40,12 @@ spec:
     NAME=$(echo {test_case}_{environment} | sed 's/dgx_h100/dgx_a100/g')
 
     ARGUMENTS=(
-        "DATA_PATH=/workspace/data/gpt3_data"
+        "DATA_PATH=/mnt/artifacts"
         "DATA_CACHE_PATH=/workspace/data/cache"
         "OUTPUT_PATH={assets_dir}"
         "TENSORBOARD_PATH={assets_dir}/tensorboard"
         "CHECKPOINT_SAVE_PATH={artifacts_dir}/checkpoints"
-        "CHECKPOINT_LOAD_PATH=/workspace/checkpoints/$NAME"
+        "CHECKPOINT_LOAD_PATH=/mnt/artifacts"
         "TRAINING_SCRIPT_PATH=pretrain_gpt.py"
         "TRAINING_PARAMS_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml"
         "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_$CLUSTER.json"
@@ -108,7 +104,7 @@ products:
   - test_case: [gpt3_mr_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4_dgx_a100_1N8G]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
   # TODO: The migration of custom fsdp causes EP + FSDP to be temporarily unavailable, which will be fixed in a subsequent MR.
   # - test_case: [gpt3_mr_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router_dgx_a100_1N8G]
@@ -121,7 +117,7 @@ products:
   - test_case: [gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
   - test_case: [gpt3_moe_mr_mcore_te_ep8_resume_torch_dist_dist_optimizer]
     products:
@@ -155,7 +151,7 @@ products:
   - test_case: [gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [dev]
         scope: [mr-slim]
diff --git a/tests/test_utils/recipes/multimodal-llava.yaml b/tests/test_utils/recipes/multimodal-llava.yaml
index 4de7f0a9c0f..65393f14f50 100644
--- a/tests/test_utils/recipes/multimodal-llava.yaml
+++ b/tests/test_utils/recipes/multimodal-llava.yaml
@@ -46,7 +46,7 @@ spec:
         "OUTPUT_PATH={assets_dir}"
         "TENSORBOARD_PATH={assets_dir}/tensorboard"
         "CHECKPOINT_SAVE_PATH={artifacts_dir}/checkpoints"
-        "CHECKPOINT_LOAD_PATH=/workspace/checkpoints/{name}/checkpoints"
+        "CHECKPOINT_LOAD_PATH=/mnt/artifacts/model/{name}/checkpoints"
         "TRAINING_SCRIPT_PATH=pretrain_vlm.py"
         "TRAINING_PARAMS_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml"
         "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_$CLUSTER.json"
@@ -61,10 +61,10 @@ products:
   - test_case: [multimodal_llava_mr_mcore_te_tp1_pp1_dgx_a100_1N8G]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
   - test_case: [multimodal_llava_mr_mcore_te_tp4_sp_cp2_dgx_a100_1N8G]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
diff --git a/uv.lock b/uv.lock
index 1046481f7ec..28110f38852 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1,5 +1,5 @@
 version = 1
-revision = 3
+revision = 2
 requires-python = ">=3.10"
 resolution-markers = [
     "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
@@ -281,10 +281,10 @@ name = "anyio"
 version = "4.9.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "exceptiongroup", marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "exceptiongroup", marker = "python_full_version < '3.11'" },
     { name = "idna" },
     { name = "sniffio" },
-    { name = "typing-extensions", marker = "python_full_version < '3.13' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/95/7d/4c1bd541d4dffa1b52bd83fb8527089e097a106fc90b467a7313b105f840/anyio-4.9.0.tar.gz", hash = "sha256:673c0c244e15788651a4ff38710fea9675823028a6f08a5eda409e0c9840a028", size = 190949, upload-time = "2025-03-17T00:02:54.77Z" }
 wheels = [
@@ -668,7 +668,7 @@ name = "cffi"
 version = "2.0.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "pycparser", marker = "implementation_name != 'PyPy' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "pycparser", marker = "implementation_name != 'PyPy'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/eb/56/b1ba7935a17738ae8453301356628e8147c79dbb825bcbc73dc7401f9846/cffi-2.0.0.tar.gz", hash = "sha256:44d1b5909021139fe36001ae048dbdde8214afa20200eda0f64c068cac5d5529", size = 523588, upload-time = "2025-09-08T23:24:04.541Z" }
 wheels = [
@@ -839,7 +839,7 @@ name = "click"
 version = "8.3.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "colorama", marker = "sys_platform == 'win32' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "colorama", marker = "sys_platform == 'win32'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/46/61/de6cd827efad202d7057d93e0fed9294b96952e188f7384832791c7b2254/click-8.3.0.tar.gz", hash = "sha256:e7b8232224eba16f4ebe410c25ced9f7875cb5f3263ffc93cc3e8da705e229c4", size = 276943, upload-time = "2025-09-18T17:32:23.696Z" }
 wheels = [
@@ -1291,7 +1291,7 @@ name = "exceptiongroup"
 version = "1.3.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "typing-extensions", marker = "python_full_version < '3.13' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/0b/9f/a65090624ecf468cdca03533906e7c69ed7588582240cfe7cc9e770b50eb/exceptiongroup-1.3.0.tar.gz", hash = "sha256:b241f5885f560bc56a59ee63ca4c6a8bfa46ae4ad651af316d4e81817bb9fd88", size = 29749, upload-time = "2025-05-10T17:42:51.123Z" }
 wheels = [
@@ -1799,7 +1799,7 @@ source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "filelock" },
     { name = "fsspec" },
-    { name = "hf-xet", marker = "platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "hf-xet", marker = "platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'" },
     { name = "packaging" },
     { name = "pyyaml" },
     { name = "requests" },
@@ -2469,7 +2469,7 @@ linting = [
 ]
 test = [
     { name = "coverage" },
-    { name = "nemo-run" },
+    { name = "nemo-run", git = "https://github.com/NVIDIA-NeMo/Run.git?rev=8ca8f7952a597f944985f1f1368a7acb9aa3a6c2" },
     { name = "nltk" },
     { name = "pydantic" },
     { name = "pygithub" },
@@ -2886,8 +2886,8 @@ wheels = [
 
 [[package]]
 name = "nemo-run"
-version = "0.6.0"
-source = { registry = "https://pypi.org/simple" }
+version = "0.7.0rc0.dev0"
+source = { git = "https://github.com/NVIDIA-NeMo/Run.git?rev=8ca8f7952a597f944985f1f1368a7acb9aa3a6c2#8ca8f7952a597f944985f1f1368a7acb9aa3a6c2" }
 dependencies = [
     { name = "catalogue" },
     { name = "cryptography" },
@@ -2905,10 +2905,6 @@ dependencies = [
     { name = "torchx" },
     { name = "typer" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/8b/0a/161c5f9534946f096d7ba16e40874cf9ebbff17d57c1f88173b4b32cf067/nemo_run-0.6.0.tar.gz", hash = "sha256:8c2ec0a87a0e4df799ee527422fd2df366926cdc4cc8e0b666df98b550cd9bb7", size = 2284395, upload-time = "2025-10-09T16:07:25.718Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/ac/2e/56750d75ec35a692e9eb0ac0f780da9f12c8e599b8273b9eabc33ae0ca30/nemo_run-0.6.0-py3-none-any.whl", hash = "sha256:7b6473aded379e9c793b7f1f64c7f44ce3ef70b4ea27dad95fd84523531ac403", size = 235439, upload-time = "2025-10-09T16:07:24.46Z" },
-]
 
 [[package]]
 name = "networkx"
@@ -4410,12 +4406,12 @@ name = "pytest"
 version = "8.3.5"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "colorama", marker = "sys_platform == 'win32' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "exceptiongroup", marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "colorama", marker = "sys_platform == 'win32'" },
+    { name = "exceptiongroup", marker = "python_full_version < '3.11'" },
     { name = "iniconfig" },
     { name = "packaging" },
     { name = "pluggy" },
-    { name = "tomli", marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "tomli", marker = "python_full_version < '3.11'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/ae/3c/c9d525a414d506893f0cd8a8d0de7706446213181570cdbd766691164e40/pytest-8.3.5.tar.gz", hash = "sha256:f4efe70cc14e511565ac476b57c279e12a855b11f48f212af1080ef2263d3845", size = 1450891, upload-time = "2025-03-02T12:54:54.503Z" }
 wheels = [
@@ -4670,7 +4666,7 @@ source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "attrs" },
     { name = "rpds-py" },
-    { name = "typing-extensions", marker = "python_full_version < '3.13' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/22/f5/df4e9027acead3ecc63e50fe1e36aca1523e1719559c499951bb4b53188f/referencing-0.37.0.tar.gz", hash = "sha256:44aefc3142c5b842538163acb373e24cce6632bd54bdb01b21ad5863489f50d8", size = 78036, upload-time = "2025-10-13T15:30:48.871Z" }
 wheels = [
@@ -5890,24 +5886,24 @@ dependencies = [
     { name = "jinja2" },
     { name = "networkx", version = "3.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "networkx", version = "3.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "nvidia-cublas-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "nvidia-cuda-cupti-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "nvidia-cuda-nvrtc-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "nvidia-cuda-runtime-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "nvidia-cudnn-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "nvidia-cufft-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "nvidia-cufile-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "nvidia-curand-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "nvidia-cusolver-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "nvidia-cusparse-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "nvidia-cusparselt-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "nvidia-nccl-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "nvidia-nvjitlink-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "nvidia-nvshmem-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "nvidia-nvtx-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "setuptools", marker = "python_full_version >= '3.12' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "nvidia-cublas-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-cuda-cupti-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-cuda-nvrtc-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-cuda-runtime-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-cudnn-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-cufft-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-cufile-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-curand-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-cusolver-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-cusparse-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-cusparselt-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-nccl-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-nvjitlink-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-nvshmem-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-nvtx-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "setuptools", marker = "python_full_version >= '3.12'" },
     { name = "sympy" },
-    { name = "triton", marker = "sys_platform == 'never' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "triton", marker = "sys_platform == 'never'" },
     { name = "typing-extensions" },
 ]
 wheels = [
@@ -6021,7 +6017,7 @@ name = "tqdm"
 version = "4.67.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "colorama", marker = "sys_platform == 'win32' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "colorama", marker = "sys_platform == 'win32'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/a8/4b/29b4ef32e036bb34e4ab51796dd745cdba7ed47ad142a9f4a1eb8e0c744d/tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2", size = 169737, upload-time = "2024-11-24T20:12:22.481Z" }
 wheels = [

From 265f4ee482a0b60a59b088a59e4eaed35e26ffef Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Thu, 23 Oct 2025 15:28:18 +0000
Subject: [PATCH 037/334] ci: Add copyright-checker for GitHub
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 .github/actions/action.yml                    |  26 +-
 .github/workflows/cicd-main.yml               | 158 +++++-
 .github/workflows/copyright-check.yml         |  19 +-
 megatron/core/config_logger.py                |  14 +-
 .../golden_values_dev_dgxh100_dgxc.json       | 287 ++++++++++
 .../golden_values_dev_dgxh100_dgxc.json       | 537 ++++++++++++++++++
 .../golden_values_dev_dgxh100_dgxc.json       | 344 +++++++++++
 .../golden_values_dev_dgxh100_dgxc.json       | 537 ++++++++++++++++++
 .../launch_nemo_run_workload.py               |  13 +-
 9 files changed, 1916 insertions(+), 19 deletions(-)
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G/golden_values_dev_dgxh100_dgxc.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgxh100_dgxc.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgxh100_dgxc.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_dgxc.json

diff --git a/.github/actions/action.yml b/.github/actions/action.yml
index 8b7fd373a98..d726fcabc9f 100644
--- a/.github/actions/action.yml
+++ b/.github/actions/action.yml
@@ -93,6 +93,27 @@ runs:
         echo "$cmd" | tee "job.sh"        
         echo "::endgroup::"
 
+    - name: Get PR info
+      id: get-pr-info
+      if: startsWith(github.ref, 'refs/heads/pull-request/')
+      uses: nv-gha-runners/get-pr-info@main
+
+    - name: Install GH CLI
+      shell: bash -x -e -u -o pipefail {0}
+      run: |
+        apt-get update
+        apt-get install -y gh
+
+    - name: Has Run tests label
+      shell: bash -x -e -u -o pipefail {0}
+      id: has-run-tests-label
+      env:
+        GH_TOKEN: ${{ github.token }}
+      run: |
+        PR_NUMBER=${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number }}
+        HAS_RUN_TESTS_LABEL=$(gh pr view $PR_NUMBER --json labels | jq '[.labels[].name] | any(. == "Run tests")')
+        echo "main=$HAS_RUN_TESTS_LABEL" | tee -a $GITHUB_OUTPUT
+
     - name: Create run-script (e2e test)
       shell: bash -x -e -u -o pipefail {0}
       if: inputs.is_unit_test == 'false'
@@ -115,7 +136,8 @@ runs:
           --environment dev \
           --platform dgx_h100 \
           --container-image ${{ inputs.container-image }} \
-          --data-dir /mnt/datadrive/TestData/megatron-lm/artifacts
+          --data-dir /mnt/datadrive/TestData/megatron-lm/artifacts \
+          --enable-lightweight-mode
 
         RUN_TEST_EOF
         )
@@ -200,5 +222,5 @@ runs:
       uses: actions/upload-artifact@v4
       with:
         name: ${{ steps.check.outputs.logs_report }}
-        path: logs
+        path: ${{ inputs.is_unit_test == 'true' && 'logs' || 'assets_dir' }}
         include-hidden-files: true
diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml
index 7f030bfb641..a56afb74c71 100644
--- a/.github/workflows/cicd-main.yml
+++ b/.github/workflows/cicd-main.yml
@@ -32,6 +32,113 @@ permissions:
   contents: read
 
 jobs:
+  is-not-external-contributor:
+    runs-on: ubuntu-latest
+    environment: nemo-ci
+    outputs:
+      is_external_contributor: ${{ github.event.pull_request.user.type == 'User' }}
+    permissions:
+      issues: write
+      pull-requests: write
+    env:
+      GITHUB_TOKEN: ${{ secrets.PAT }}
+      REPO: ${{ github.repository }}
+      SCHEDULED_JOB: ${{ github.event_name == 'schedule' }}
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          token: ${{ env.GITHUB_TOKEN }}
+
+      - name: Get PR info
+        id: get-pr-info
+        if: startsWith(github.ref, 'refs/heads/pull-request/')
+        uses: nv-gha-runners/get-pr-info@main
+
+      - name: Check membership
+        id: check-membership
+        run: |
+          PR_AUTHOR=${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').user.login }}
+
+          if [ "${{ env.SCHEDULED_JOB }}" == "true" ]; then
+            echo "is_maintainer=true" | tee -a $GITHUB_OUTPUT
+            exit 0
+          fi
+
+          echo "Checking if $PR_AUTHOR is a repo collaborator..."
+          API_URL="https://api.github.com/repos/$REPO/collaborators/$PR_AUTHOR"
+          REPO_MEMBERSHIP_RESPONSE=$(curl -s -o /dev/null -w "%{http_code}" -L \
+            -H "Accept: application/vnd.github+json" \
+            -H "Authorization: Bearer $GITHUB_TOKEN" \
+            -H "X-GitHub-Api-Version: 2022-11-28" \
+            $API_URL)
+
+          echo "Checking if $PR_AUTHOR is an org collaborator to NVIDIA-NeMo..."
+          API_URL="https://api.github.com/orgs/NVIDIA-NeMo/members/$PR_AUTHOR"
+          ORG_NVIDIA_NEMO_MEMBERSHIP_RESPONSE=$(curl -s -o /dev/null -w "%{http_code}" -L \
+            -H "Accept: application/vnd.github+json" \
+            -H "Authorization: Bearer $GITHUB_TOKEN" \
+            -H "X-GitHub-Api-Version: 2022-11-28" \
+            $API_URL)
+
+          echo "Checking if $PR_AUTHOR is an org collaborator to NVIDIA..."
+          API_URL="https://api.github.com/orgs/NVIDIA/members/$PR_AUTHOR"
+          ORG_NVIDIA_MEMBERSHIP_RESPONSE=$(curl -s -o /dev/null -w "%{http_code}" -L \
+            -H "Accept: application/vnd.github+json" \
+            -H "Authorization: Bearer $GITHUB_TOKEN" \
+            -H "X-GitHub-Api-Version: 2022-11-28" \
+            $API_URL)
+
+          if [ "$REPO_MEMBERSHIP_RESPONSE" -eq 204 ] || [ "$ORG_NVIDIA_NEMO_MEMBERSHIP_RESPONSE" -eq 204 ] || [ "$ORG_NVIDIA_MEMBERSHIP_RESPONSE" -eq 204 ]; then
+            echo "is_maintainer=true" | tee -a $GITHUB_OUTPUT
+          else
+            echo "is_maintainer=false" | tee -a $GITHUB_OUTPUT
+          fi
+
+      - name: Find Comment
+        uses: peter-evans/find-comment@v4
+        if: startsWith(github.ref, 'refs/heads/pull-request/')
+        id: fc
+        with:
+          issue-number: ${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number }}
+          repository: ${{ github.repository }}
+          body-includes: "<!--external-contributor-comment-->"
+
+      - name: Delete comment
+        uses: actions/github-script@v7
+        if: startsWith(github.ref, 'refs/heads/pull-request/') && steps.fc.outputs.comment-id != ''
+        with:
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+          script: |
+            await github.rest.issues.deleteComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              comment_id: ${{ steps.fc.outputs.comment-id }}
+            })
+
+      - name: Write pull request comment
+        if: startsWith(github.ref, 'refs/heads/pull-request/') && steps.check-membership.outputs.is_maintainer == 'false'
+        uses: peter-evans/create-or-update-comment@v5
+        with:
+          issue-number: ${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number }}
+          repository: ${{ github.repository }}
+          body: |
+            <!--external-contributor-comment-->
+
+            Thank you for your contribution!
+
+            NVIDIA Megatron-LM is currently transitioning to development on Github. We will aim to review your PR after we complete our transition and stabilize our Github development process.
+
+            Thank you for your understanding.
+
+      - name: exit
+        run: |
+          if [ "${{ steps.check-membership.outputs.is_maintainer }}" == "true" ]; then
+            exit 0
+          else
+            exit 1
+          fi
+
   pre-flight:
     uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_cicd_preflight.yml@v0.65.0
 
@@ -188,7 +295,7 @@ jobs:
           PAT: ${{ secrets.PAT }}
           container-image: 766267172432.dkr.ecr.us-east-1.amazonaws.com/megatron-lm:1864 #${{ github.sha }}
 
-  cicd-parse-functional-tests:
+  cicd-parse-integration-tests:
     runs-on: ubuntu-latest
     needs:
       - pre-flight
@@ -196,17 +303,44 @@ jobs:
       # - cicd-container-build
       # - cicd-unit-tests-latest
     outputs:
-      functional-tests: ${{ steps.main.outputs.functional-tests }}
+      integration-tests: ${{ steps.main.outputs.integration-tests }}
     steps:
       - name: Checkout
         uses: actions/checkout@v4
 
+      - name: Get PR info
+        id: get-pr-info
+        if: startsWith(github.ref, 'refs/heads/pull-request/')
+        uses: nv-gha-runners/get-pr-info@main
+
+      - name: Has Run tests label
+        id: has-run-tests-label
+        env:
+          GH_TOKEN: ${{ secrets.PAT }}
+        run: |
+          PR_NUMBER=${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number }}
+          HAS_RUN_TESTS_LABEL=$(gh pr view $PR_NUMBER --json labels | jq '[.labels[].name] | any(. == "Run tests")')
+          echo "main=$HAS_RUN_TESTS_LABEL" | tee -a $GITHUB_OUTPUT
+
       - name: Parse functional tests
         id: main
+        env:
+          HAS_RUN_TESTS_LABEL: ${{ steps.has-run-tests-label.outputs.HAS_RUN_TESTS_LABEL }}
         run: |
           export PYTHONPATH=$(pwd)
+
+          if [ "$HAS_RUN_TESTS_LABEL" == "true" ]; then
+            ARGS=(
+              --scope mr
+              --enable-lightweight-mode
+            )
+          else
+            ARGS=(
+              --scope mr-slim
+            )
+          fi
+
           python tests/test_utils/python_scripts/generate_jet_trigger_job.py \
-            --scope mr \
             --n-repeat 5 \
             --time-limit 2700 \
             --test-cases all \
@@ -218,24 +352,24 @@ jobs:
             --no-enable-warmup \
             --environment dev \
             --platform dgx_h100 \
-            --enable-lightweight-mode \
             --cluster ghci \
-            --output-path functional-tests.yaml
+            ${ARGS[@]} \
+            --output-path integration-tests.yaml
 
-          cat functional-tests.yaml | \
-            yq -o json 'del(.default, .stages, .workflow) | to_entries | map({"model": .value.stage, "test_case": .key})' | jq -c  > functional-tests.json
+          cat integration-tests.yaml | \
+            yq -o json 'del(.default, .stages, .workflow) | to_entries | map({"model": .value.stage, "test_case": .key}) | sort_by(.model, .test_case)' | jq -c  > integration-tests.json
 
-          echo "functional-tests=$(cat functional-tests.json)" | tee -a "$GITHUB_OUTPUT"
+          echo "integration-tests=$(cat integration-tests.json)" | tee -a "$GITHUB_OUTPUT"
 
-  cicd-functional-tests-latest:
+  cicd-integration-tests-latest:
     strategy:
       fail-fast: false
       matrix:
-        include: ${{ fromJson(needs.cicd-parse-functional-tests.outputs.functional-tests) }}
+        include: ${{ fromJson(needs.cicd-parse-integration-tests.outputs.integration-tests) }}
     needs:
       - pre-flight
       - cicd-wait-in-queue
-      - cicd-parse-functional-tests
+      - cicd-parse-integration-tests
       # - cicd-unit-tests-latest
     runs-on: nvidia-ci-aws-gpu-x8
     name: "${{ matrix.model }}/${{ matrix.test_case }} - latest"
@@ -266,7 +400,7 @@ jobs:
     needs:
       - pre-flight
       - cicd-unit-tests-latest
-      - cicd-functional-tests-latest
+      - cicd-integration-tests-latest
     if: |
       (
         needs.pre-flight.outputs.docs_only == 'true'
diff --git a/.github/workflows/copyright-check.yml b/.github/workflows/copyright-check.yml
index c65bb402a26..8b075448833 100644
--- a/.github/workflows/copyright-check.yml
+++ b/.github/workflows/copyright-check.yml
@@ -10,7 +10,7 @@
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
-# limitations under the License.
+# limitations under the License..
 
 name: Copyright check
 
@@ -30,7 +30,9 @@ jobs:
     if: |
       !(needs.pre-flight.outputs.docs_only == 'true'
       || needs.pre-flight.outputs.is_deployment_workflow == 'true')
-    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_copyright_check.yml@v0.2.0
+    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_copyright_check.yml@v0.65.2
+    with:
+      from-year: 2019
 
   copyright-check-summary:
     needs: [pre-flight, copyright-check]
@@ -44,4 +46,15 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Result
-        run: echo Copyright check successful
+        run: |
+          FAILED_JOBS=$(gh run view $GITHUB_RUN_ID --json jobs --jq '[.jobs[] | select(.status == "completed" and .conclusion != "success")] | length') || echo 0
+
+          if [ "${FAILED_JOBS:-0}" -eq 0 ] || [ "$SKIPPING_IS_ALLOWED" == "true" ]; then
+              echo "✅ All previous jobs completed successfully"
+              exit 0
+          else
+              echo "❌ Found $FAILED_JOBS failed job(s)"
+              # Show which jobs failed
+              gh run view $GITHUB_RUN_ID --json jobs --jq '.jobs[] | select(.status == "completed" and .conclusion != "success") | .name'
+              exit 1
+          fi
diff --git a/megatron/core/config_logger.py b/megatron/core/config_logger.py
index 4e666bb274e..bee2be09205 100644
--- a/megatron/core/config_logger.py
+++ b/megatron/core/config_logger.py
@@ -1,4 +1,16 @@
-# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 
 import dataclasses
 import json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G/golden_values_dev_dgxh100_dgxc.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G/golden_values_dev_dgxh100_dgxc.json
new file mode 100644
index 00000000000..737ecfb1b9d
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G/golden_values_dev_dgxh100_dgxc.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.84523,
+            "2": 10.85412,
+            "3": 10.85365,
+            "4": 10.83867,
+            "5": 10.87428,
+            "6": 10.89334,
+            "7": 10.8541,
+            "8": 10.86235,
+            "9": 10.86352,
+            "10": 10.82859,
+            "11": 10.88772,
+            "12": 10.87148,
+            "13": 10.87938,
+            "14": 10.89123,
+            "15": 10.81927,
+            "16": 10.83063,
+            "17": 10.79878,
+            "18": 10.81771,
+            "19": 10.81957,
+            "20": 10.72749,
+            "21": 10.70552,
+            "22": 10.56396,
+            "23": 10.72823,
+            "24": 10.60839,
+            "25": 10.55198,
+            "26": 10.60868,
+            "27": 10.62879,
+            "28": 10.58271,
+            "29": 10.59982,
+            "30": 10.36511,
+            "31": 10.12096,
+            "32": 10.47628,
+            "33": 10.46906,
+            "34": 10.22326,
+            "35": 10.27848,
+            "36": 10.22883,
+            "37": 10.35947,
+            "38": 10.19331,
+            "39": 10.41586,
+            "40": 10.09773,
+            "41": 10.15718,
+            "42": 10.22441,
+            "43": 9.83281,
+            "44": 9.96935,
+            "45": 9.84205,
+            "46": 9.83017,
+            "47": 10.15602,
+            "48": 9.85503,
+            "49": 9.54049,
+            "50": 9.91258
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1725.0,
+            "2": 1664.0,
+            "3": 1710.0,
+            "4": 1712.0,
+            "5": 1834.0,
+            "6": 1743.0,
+            "7": 1803.0,
+            "8": 1737.0,
+            "9": 1779.0,
+            "10": 1459.0,
+            "11": 1898.0,
+            "12": 1661.0,
+            "13": 1860.0,
+            "14": 1764.0,
+            "15": 1886.0,
+            "16": 1916.0,
+            "17": 1773.0,
+            "18": 1702.0,
+            "19": 1742.0,
+            "20": 1649.0,
+            "21": 1899.0,
+            "22": 1631.0,
+            "23": 1960.0,
+            "24": 1570.0,
+            "25": 1647.0,
+            "26": 1649.0,
+            "27": 1811.0,
+            "28": 1930.0,
+            "29": 1910.0,
+            "30": 1964.0,
+            "31": 1536.0,
+            "32": 1873.0,
+            "33": 2191.0,
+            "34": 1838.0,
+            "35": 2017.0,
+            "36": 1916.0,
+            "37": 2345.0,
+            "38": 2247.0,
+            "39": 2374.0,
+            "40": 2207.0,
+            "41": 2246.0,
+            "42": 2291.0,
+            "43": 2027.0,
+            "44": 2147.0,
+            "45": 2164.0,
+            "46": 2300.0,
+            "47": 2418.0,
+            "48": 2467.0,
+            "49": 2255.0,
+            "50": 2224.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 552328704.0,
+            "2": 552328704.0,
+            "3": 552328704.0,
+            "4": 552328704.0,
+            "5": 552328704.0,
+            "6": 552328704.0,
+            "7": 552328704.0,
+            "8": 552328704.0,
+            "9": 552328704.0,
+            "10": 552328704.0,
+            "11": 552328704.0,
+            "12": 552328704.0,
+            "13": 552328704.0,
+            "14": 552328704.0,
+            "15": 552328704.0,
+            "16": 552328704.0,
+            "17": 552328704.0,
+            "18": 552328704.0,
+            "19": 552328704.0,
+            "20": 552328704.0,
+            "21": 552328704.0,
+            "22": 552328704.0,
+            "23": 552328704.0,
+            "24": 552328704.0,
+            "25": 552328704.0,
+            "26": 552328704.0,
+            "27": 552328704.0,
+            "28": 552328704.0,
+            "29": 552328704.0,
+            "30": 552328704.0,
+            "31": 552328704.0,
+            "32": 552328704.0,
+            "33": 552328704.0,
+            "34": 552328704.0,
+            "35": 552328704.0,
+            "36": 552328704.0,
+            "37": 552328704.0,
+            "38": 552328704.0,
+            "39": 552328704.0,
+            "40": 552328704.0,
+            "41": 552328704.0,
+            "42": 552328704.0,
+            "43": 552328704.0,
+            "44": 552328704.0,
+            "45": 552328704.0,
+            "46": 552328704.0,
+            "47": 552328704.0,
+            "48": 552328704.0,
+            "49": 552328704.0,
+            "50": 552328704.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 3798208000.0,
+            "2": 3943007744.0,
+            "3": 3943007744.0,
+            "4": 3943007744.0,
+            "5": 3943007744.0,
+            "6": 3943007744.0,
+            "7": 3943007744.0,
+            "8": 3943007744.0,
+            "9": 3943007744.0,
+            "10": 3943007744.0,
+            "11": 3943007744.0,
+            "12": 3943007744.0,
+            "13": 3943007744.0,
+            "14": 3943007744.0,
+            "15": 3943007744.0,
+            "16": 3943007744.0,
+            "17": 3943007744.0,
+            "18": 3943007744.0,
+            "19": 3943007744.0,
+            "20": 3943007744.0,
+            "21": 3943007744.0,
+            "22": 3943007744.0,
+            "23": 3943007744.0,
+            "24": 3943007744.0,
+            "25": 3943007744.0,
+            "26": 3943007744.0,
+            "27": 3943007744.0,
+            "28": 3943007744.0,
+            "29": 3943007744.0,
+            "30": 3943007744.0,
+            "31": 3943007744.0,
+            "32": 3943007744.0,
+            "33": 3943007744.0,
+            "34": 3943007744.0,
+            "35": 3943007744.0,
+            "36": 3943007744.0,
+            "37": 3943007744.0,
+            "38": 3943007744.0,
+            "39": 3943007744.0,
+            "40": 3943007744.0,
+            "41": 3943007744.0,
+            "42": 3943007744.0,
+            "43": 3943007744.0,
+            "44": 3943007744.0,
+            "45": 3943007744.0,
+            "46": 3943007744.0,
+            "47": 3943007744.0,
+            "48": 3943007744.0,
+            "49": 3943007744.0,
+            "50": 3943007744.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 13.33022,
+            "2": 0.14078,
+            "3": 0.13198,
+            "4": 0.12852,
+            "5": 0.13083,
+            "6": 0.13237,
+            "7": 0.13228,
+            "8": 0.1313,
+            "9": 0.12811,
+            "10": 0.1288,
+            "11": 0.33424,
+            "12": 0.13269,
+            "13": 0.12918,
+            "14": 0.12679,
+            "15": 0.12826,
+            "16": 0.12904,
+            "17": 0.12886,
+            "18": 0.12955,
+            "19": 0.1304,
+            "20": 0.13345,
+            "21": 0.33748,
+            "22": 0.12668,
+            "23": 0.13016,
+            "24": 0.13048,
+            "25": 0.13063,
+            "26": 0.12607,
+            "27": 0.12969,
+            "28": 0.12911,
+            "29": 0.12982,
+            "30": 0.12875,
+            "31": 0.33159,
+            "32": 0.13001,
+            "33": 0.12965,
+            "34": 0.12637,
+            "35": 0.12796,
+            "36": 0.12613,
+            "37": 0.13026,
+            "38": 0.1296,
+            "39": 0.12924,
+            "40": 0.12739,
+            "41": 0.33311,
+            "42": 0.12916,
+            "43": 0.12923,
+            "44": 0.12827,
+            "45": 0.12448,
+            "46": 0.12337,
+            "47": 0.12316,
+            "48": 0.12962,
+            "49": 0.12832,
+            "50": 0.12865
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgxh100_dgxc.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgxh100_dgxc.json
new file mode 100644
index 00000000000..8bf73ebcf59
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgxh100_dgxc.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.84163,
+            "2": 10.85598,
+            "3": 10.84413,
+            "4": 10.84124,
+            "5": 10.85872,
+            "6": 10.86316,
+            "7": 10.85184,
+            "8": 10.84645,
+            "9": 10.85647,
+            "10": 10.81849,
+            "11": 10.85923,
+            "12": 10.84285,
+            "13": 10.86432,
+            "14": 10.85423,
+            "15": 10.81015,
+            "16": 10.81588,
+            "17": 10.78949,
+            "18": 10.79683,
+            "19": 10.79073,
+            "20": 10.70819,
+            "21": 10.69322,
+            "22": 10.58504,
+            "23": 10.70217,
+            "24": 10.60546,
+            "25": 10.57102,
+            "26": 10.61967,
+            "27": 10.61501,
+            "28": 10.56369,
+            "29": 10.56725,
+            "30": 10.39695,
+            "31": 10.16591,
+            "32": 10.4573,
+            "33": 10.45199,
+            "34": 10.2392,
+            "35": 10.28351,
+            "36": 10.24677,
+            "37": 10.3427,
+            "38": 10.20546,
+            "39": 10.39187,
+            "40": 10.09767,
+            "41": 10.1526,
+            "42": 10.21051,
+            "43": 9.87726,
+            "44": 9.98291,
+            "45": 9.86165,
+            "46": 9.83587,
+            "47": 10.13369,
+            "48": 9.87212,
+            "49": 9.56121,
+            "50": 9.91045,
+            "51": 9.85839,
+            "52": 9.7506,
+            "53": 10.05817,
+            "54": 9.96076,
+            "55": 9.88738,
+            "56": 9.6344,
+            "57": 9.4967,
+            "58": 9.83343,
+            "59": 9.59391,
+            "60": 9.51376,
+            "61": 9.69928,
+            "62": 9.98089,
+            "63": 9.39065,
+            "64": 9.77599,
+            "65": 8.9571,
+            "66": 9.70054,
+            "67": 9.37,
+            "68": 9.78529,
+            "69": 9.78966,
+            "70": 9.74676,
+            "71": 9.61906,
+            "72": 9.58963,
+            "73": 9.49629,
+            "74": 8.94963,
+            "75": 9.42381,
+            "76": 9.07799,
+            "77": 10.07105,
+            "78": 9.72632,
+            "79": 9.37966,
+            "80": 9.40721,
+            "81": 9.48238,
+            "82": 9.70152,
+            "83": 9.30657,
+            "84": 9.41464,
+            "85": 9.61784,
+            "86": 9.08212,
+            "87": 9.59511,
+            "88": 9.75008,
+            "89": 9.60356,
+            "90": 9.82256,
+            "91": 9.33721,
+            "92": 9.35861,
+            "93": 9.07956,
+            "94": 8.83268,
+            "95": 9.51351,
+            "96": 9.52947,
+            "97": 9.31813,
+            "98": 9.67451,
+            "99": 8.88607,
+            "100": 9.40106
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1736.0,
+            "2": 1692.0,
+            "3": 1695.0,
+            "4": 1761.0,
+            "5": 1955.0,
+            "6": 1791.0,
+            "7": 1943.0,
+            "8": 1681.0,
+            "9": 1884.0,
+            "10": 1441.0,
+            "11": 1942.0,
+            "12": 1786.0,
+            "13": 1940.0,
+            "14": 1862.0,
+            "15": 1907.0,
+            "16": 1947.0,
+            "17": 1827.0,
+            "18": 1907.0,
+            "19": 1818.0,
+            "20": 1700.0,
+            "21": 1911.0,
+            "22": 1720.0,
+            "23": 1938.0,
+            "24": 1707.0,
+            "25": 1686.0,
+            "26": 1792.0,
+            "27": 1891.0,
+            "28": 1976.0,
+            "29": 1958.0,
+            "30": 1941.0,
+            "31": 1622.0,
+            "32": 1970.0,
+            "33": 2129.0,
+            "34": 1830.0,
+            "35": 1907.0,
+            "36": 1892.0,
+            "37": 2395.0,
+            "38": 2161.0,
+            "39": 2493.0,
+            "40": 2224.0,
+            "41": 2201.0,
+            "42": 2175.0,
+            "43": 1920.0,
+            "44": 1955.0,
+            "45": 1956.0,
+            "46": 2166.0,
+            "47": 2517.0,
+            "48": 2272.0,
+            "49": 2211.0,
+            "50": 2232.0,
+            "51": 2621.0,
+            "52": 2597.0,
+            "53": 2926.0,
+            "54": 2633.0,
+            "55": 2206.0,
+            "56": 2627.0,
+            "57": 2328.0,
+            "58": 2886.0,
+            "59": 2639.0,
+            "60": 2157.0,
+            "61": 2736.0,
+            "62": 2544.0,
+            "63": 2332.0,
+            "64": 2948.0,
+            "65": 2630.0,
+            "66": 2931.0,
+            "67": 2717.0,
+            "68": 2643.0,
+            "69": 2955.0,
+            "70": 3040.0,
+            "71": 2882.0,
+            "72": 2390.0,
+            "73": 2812.0,
+            "74": 1844.0,
+            "75": 2461.0,
+            "76": 3067.0,
+            "77": 3152.0,
+            "78": 3018.0,
+            "79": 3008.0,
+            "80": 3104.0,
+            "81": 3589.0,
+            "82": 3218.0,
+            "83": 2748.0,
+            "84": 3217.0,
+            "85": 3167.0,
+            "86": 2876.0,
+            "87": 3604.0,
+            "88": 3017.0,
+            "89": 3249.0,
+            "90": 3069.0,
+            "91": 2865.0,
+            "92": 3074.0,
+            "93": 2680.0,
+            "94": 3392.0,
+            "95": 3206.0,
+            "96": 3401.0,
+            "97": 3107.0,
+            "98": 3624.0,
+            "99": 3007.0,
+            "100": 3111.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 299204096.0,
+            "2": 299204096.0,
+            "3": 299204096.0,
+            "4": 299204096.0,
+            "5": 299204096.0,
+            "6": 299204096.0,
+            "7": 299204096.0,
+            "8": 299204096.0,
+            "9": 299204096.0,
+            "10": 299204096.0,
+            "11": 299204096.0,
+            "12": 299204096.0,
+            "13": 299204096.0,
+            "14": 299204096.0,
+            "15": 299204096.0,
+            "16": 299204096.0,
+            "17": 299204096.0,
+            "18": 299204096.0,
+            "19": 299204096.0,
+            "20": 299204096.0,
+            "21": 299204096.0,
+            "22": 299204096.0,
+            "23": 299204096.0,
+            "24": 299204096.0,
+            "25": 299204096.0,
+            "26": 299204096.0,
+            "27": 299204096.0,
+            "28": 299204096.0,
+            "29": 299204096.0,
+            "30": 299204096.0,
+            "31": 299204096.0,
+            "32": 299204096.0,
+            "33": 299204096.0,
+            "34": 299204096.0,
+            "35": 299204096.0,
+            "36": 299204096.0,
+            "37": 299204096.0,
+            "38": 299204096.0,
+            "39": 299204096.0,
+            "40": 299204096.0,
+            "41": 299204096.0,
+            "42": 299204096.0,
+            "43": 299204096.0,
+            "44": 299204096.0,
+            "45": 299204096.0,
+            "46": 299204096.0,
+            "47": 299204096.0,
+            "48": 299204096.0,
+            "49": 299204096.0,
+            "50": 299204096.0,
+            "51": 299204096.0,
+            "52": 299204096.0,
+            "53": 299204096.0,
+            "54": 299204096.0,
+            "55": 299204096.0,
+            "56": 299204096.0,
+            "57": 299204096.0,
+            "58": 299204096.0,
+            "59": 299204096.0,
+            "60": 299204096.0,
+            "61": 299204096.0,
+            "62": 299204096.0,
+            "63": 299204096.0,
+            "64": 299204096.0,
+            "65": 299204096.0,
+            "66": 299204096.0,
+            "67": 299204096.0,
+            "68": 299204096.0,
+            "69": 299204096.0,
+            "70": 299204096.0,
+            "71": 299204096.0,
+            "72": 299204096.0,
+            "73": 299204096.0,
+            "74": 299204096.0,
+            "75": 299204096.0,
+            "76": 299204096.0,
+            "77": 299204096.0,
+            "78": 299204096.0,
+            "79": 299204096.0,
+            "80": 299204096.0,
+            "81": 299204096.0,
+            "82": 299204096.0,
+            "83": 299204096.0,
+            "84": 299204096.0,
+            "85": 299204096.0,
+            "86": 299204096.0,
+            "87": 299204096.0,
+            "88": 299204096.0,
+            "89": 299204096.0,
+            "90": 299204096.0,
+            "91": 299204096.0,
+            "92": 299204096.0,
+            "93": 299204096.0,
+            "94": 299204096.0,
+            "95": 299204096.0,
+            "96": 299204096.0,
+            "97": 299204096.0,
+            "98": 299204096.0,
+            "99": 299204096.0,
+            "100": 299204096.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 977520128.0,
+            "2": 1042465280.0,
+            "3": 1042465280.0,
+            "4": 1042465280.0,
+            "5": 1042465280.0,
+            "6": 1042465280.0,
+            "7": 1042465280.0,
+            "8": 1042465280.0,
+            "9": 1042465280.0,
+            "10": 1042465280.0,
+            "11": 1042465280.0,
+            "12": 1042465280.0,
+            "13": 1042465280.0,
+            "14": 1042465280.0,
+            "15": 1042465280.0,
+            "16": 1042465280.0,
+            "17": 1042465280.0,
+            "18": 1042465280.0,
+            "19": 1042465280.0,
+            "20": 1042465280.0,
+            "21": 1042465280.0,
+            "22": 1042465280.0,
+            "23": 1042465280.0,
+            "24": 1042465280.0,
+            "25": 1042465280.0,
+            "26": 1042465280.0,
+            "27": 1042465280.0,
+            "28": 1042465280.0,
+            "29": 1042465280.0,
+            "30": 1042465280.0,
+            "31": 1042465280.0,
+            "32": 1042465280.0,
+            "33": 1042465280.0,
+            "34": 1042465280.0,
+            "35": 1042465280.0,
+            "36": 1042465280.0,
+            "37": 1042465280.0,
+            "38": 1042465280.0,
+            "39": 1042465280.0,
+            "40": 1042465280.0,
+            "41": 1042465280.0,
+            "42": 1042465280.0,
+            "43": 1042465280.0,
+            "44": 1042465280.0,
+            "45": 1042465280.0,
+            "46": 1042465280.0,
+            "47": 1042465280.0,
+            "48": 1042465280.0,
+            "49": 1042465280.0,
+            "50": 1042465280.0,
+            "51": 1042465280.0,
+            "52": 1042465280.0,
+            "53": 1042465280.0,
+            "54": 1042465280.0,
+            "55": 1042465280.0,
+            "56": 1042465280.0,
+            "57": 1042465280.0,
+            "58": 1042465280.0,
+            "59": 1042465280.0,
+            "60": 1042465280.0,
+            "61": 1042465280.0,
+            "62": 1042465280.0,
+            "63": 1042465280.0,
+            "64": 1042465280.0,
+            "65": 1042465280.0,
+            "66": 1042465280.0,
+            "67": 1042465280.0,
+            "68": 1042465280.0,
+            "69": 1042465280.0,
+            "70": 1042465280.0,
+            "71": 1042465280.0,
+            "72": 1042465280.0,
+            "73": 1042465280.0,
+            "74": 1042465280.0,
+            "75": 1042465280.0,
+            "76": 1042465280.0,
+            "77": 1042465280.0,
+            "78": 1042465280.0,
+            "79": 1042465280.0,
+            "80": 1042465280.0,
+            "81": 1042465280.0,
+            "82": 1042465280.0,
+            "83": 1042465280.0,
+            "84": 1042465280.0,
+            "85": 1042465280.0,
+            "86": 1042465280.0,
+            "87": 1042465280.0,
+            "88": 1042465280.0,
+            "89": 1042465280.0,
+            "90": 1042465280.0,
+            "91": 1042465280.0,
+            "92": 1042465280.0,
+            "93": 1042465280.0,
+            "94": 1042465280.0,
+            "95": 1042465280.0,
+            "96": 1042465280.0,
+            "97": 1042465280.0,
+            "98": 1042465280.0,
+            "99": 1042465280.0,
+            "100": 1042465280.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 7.4943,
+            "2": 0.30777,
+            "3": 0.28744,
+            "4": 0.28478,
+            "5": 0.28355,
+            "6": 0.28205,
+            "7": 0.28648,
+            "8": 0.28145,
+            "9": 0.28384,
+            "10": 0.28181,
+            "11": 0.28279,
+            "12": 0.29109,
+            "13": 0.28824,
+            "14": 0.28545,
+            "15": 0.28902,
+            "16": 0.28736,
+            "17": 0.28857,
+            "18": 0.28805,
+            "19": 0.28819,
+            "20": 0.28484,
+            "21": 0.28898,
+            "22": 0.28201,
+            "23": 0.29011,
+            "24": 0.28393,
+            "25": 0.29706,
+            "26": 0.30988,
+            "27": 0.2925,
+            "28": 0.28946,
+            "29": 0.29323,
+            "30": 0.29381,
+            "31": 0.29538,
+            "32": 0.28808,
+            "33": 0.30043,
+            "34": 0.29302,
+            "35": 0.2845,
+            "36": 0.28795,
+            "37": 0.28827,
+            "38": 0.2899,
+            "39": 0.29094,
+            "40": 0.28938,
+            "41": 0.28856,
+            "42": 0.29185,
+            "43": 0.28692,
+            "44": 0.28562,
+            "45": 0.28753,
+            "46": 0.29142,
+            "47": 0.29037,
+            "48": 0.28879,
+            "49": 0.28294,
+            "50": 0.28321,
+            "51": 0.30977,
+            "52": 8.12602,
+            "53": 5.69198,
+            "54": 4.43736,
+            "55": 5.06277,
+            "56": 5.45623,
+            "57": 5.46825,
+            "58": 7.06638,
+            "59": 4.24603,
+            "60": 8.21666,
+            "61": 4.4828,
+            "62": 6.62355,
+            "63": 5.55937,
+            "64": 3.34027,
+            "65": 5.0081,
+            "66": 4.41115,
+            "67": 4.97292,
+            "68": 4.81,
+            "69": 5.36112,
+            "70": 5.8305,
+            "71": 3.63336,
+            "72": 8.33029,
+            "73": 3.31876,
+            "74": 4.77939,
+            "75": 5.56427,
+            "76": 6.70233,
+            "77": 4.87125,
+            "78": 3.17949,
+            "79": 4.79331,
+            "80": 5.00405,
+            "81": 4.17384,
+            "82": 5.59422,
+            "83": 6.29678,
+            "84": 3.92285,
+            "85": 4.83815,
+            "86": 3.89693,
+            "87": 3.12272,
+            "88": 4.27964,
+            "89": 4.13974,
+            "90": 3.51718,
+            "91": 3.66628,
+            "92": 4.80546,
+            "93": 4.94171,
+            "94": 2.69087,
+            "95": 4.90083,
+            "96": 5.10401,
+            "97": 4.90487,
+            "98": 3.9353,
+            "99": 3.9083,
+            "100": 3.6134
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgxh100_dgxc.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgxh100_dgxc.json
new file mode 100644
index 00000000000..13b71c1d7f0
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgxh100_dgxc.json
@@ -0,0 +1,344 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 11.04722,
+            "2": 11.03572,
+            "3": 9.58802,
+            "4": 9.25807,
+            "5": 9.46595,
+            "6": 9.99646,
+            "7": 9.50952,
+            "8": 8.97596,
+            "9": 8.64768,
+            "10": 9.40103,
+            "11": 8.86556,
+            "12": 8.63563,
+            "13": 8.52125,
+            "14": 8.08824,
+            "15": 8.1958,
+            "16": 8.22112,
+            "17": 8.14098,
+            "18": 7.8386,
+            "19": 8.23438,
+            "20": 7.95361,
+            "21": 7.62549,
+            "22": 7.60352,
+            "23": 7.47957,
+            "24": 7.46573,
+            "25": 7.70343,
+            "26": 7.10719,
+            "27": 7.64313,
+            "28": 7.34582,
+            "29": 7.5169,
+            "30": 7.67511,
+            "31": 7.41799,
+            "32": 7.61213,
+            "33": 7.66582,
+            "34": 7.73101,
+            "35": 7.23081,
+            "36": 7.10765,
+            "37": 7.4476,
+            "38": 7.21053,
+            "39": 7.57508,
+            "40": 7.5662,
+            "41": 7.51605,
+            "42": 7.27243,
+            "43": 7.25706,
+            "44": 7.44,
+            "45": 7.21244,
+            "46": 6.92421,
+            "47": 7.32604,
+            "48": 7.17147,
+            "49": 7.62154,
+            "50": 7.0624
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 38802612.0,
+            "2": 38543656.0,
+            "3": 38739356.0,
+            "4": 273649600.0,
+            "5": 252887040.0,
+            "6": 255692384.0,
+            "7": 598483264.0,
+            "8": 787737984.0,
+            "9": 696133120.0,
+            "10": 505146368.0,
+            "11": 718888640.0,
+            "12": 872597184.0,
+            "13": 947495104.0,
+            "14": 1076398976.0,
+            "15": 856390592.0,
+            "16": 1048635648.0,
+            "17": 831370688.0,
+            "18": 963679552.0,
+            "19": 970018240.0,
+            "20": 935737344.0,
+            "21": 904189312.0,
+            "22": 887937280.0,
+            "23": 894777856.0,
+            "24": 703744192.0,
+            "25": 909232512.0,
+            "26": 875633216.0,
+            "27": 894981376.0,
+            "28": 919242816.0,
+            "29": 931351552.0,
+            "30": 929784768.0,
+            "31": 941621376.0,
+            "32": 885000768.0,
+            "33": 828484096.0,
+            "34": 822284800.0,
+            "35": 832032128.0,
+            "36": 787939392.0,
+            "37": 770719808.0,
+            "38": 561204672.0,
+            "39": 617201536.0,
+            "40": 695374592.0,
+            "41": 698978816.0,
+            "42": 692913728.0,
+            "43": 668003776.0,
+            "44": 673780992.0,
+            "45": 631182912.0,
+            "46": 444613312.0,
+            "47": 591957824.0,
+            "48": 617363968.0,
+            "49": 585295808.0,
+            "50": 570423872.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 6637272576.0,
+            "2": 6637274624.0,
+            "3": 6637274624.0,
+            "4": 6637274624.0,
+            "5": 6637274624.0,
+            "6": 6637274624.0,
+            "7": 6637274624.0,
+            "8": 6637274624.0,
+            "9": 6637274624.0,
+            "10": 6637274624.0,
+            "11": 6637274624.0,
+            "12": 6637274624.0,
+            "13": 6637274624.0,
+            "14": 6637274624.0,
+            "15": 6637274624.0,
+            "16": 6637274624.0,
+            "17": 6637274624.0,
+            "18": 6637274624.0,
+            "19": 6637274624.0,
+            "20": 6637274624.0,
+            "21": 6637274624.0,
+            "22": 6637274624.0,
+            "23": 6637274624.0,
+            "24": 6637274624.0,
+            "25": 6637274624.0,
+            "26": 6637274624.0,
+            "27": 6637274624.0,
+            "28": 6637274624.0,
+            "29": 6637274624.0,
+            "30": 6637274624.0,
+            "31": 6637274624.0,
+            "32": 6637274624.0,
+            "33": 6637274624.0,
+            "34": 6637274624.0,
+            "35": 6637274624.0,
+            "36": 6637274624.0,
+            "37": 6637274624.0,
+            "38": 6637274624.0,
+            "39": 6637274624.0,
+            "40": 6637274624.0,
+            "41": 6637274624.0,
+            "42": 6637274624.0,
+            "43": 6637274624.0,
+            "44": 6637274624.0,
+            "45": 6637274624.0,
+            "46": 6637274624.0,
+            "47": 6637274624.0,
+            "48": 6637274624.0,
+            "49": 6637274624.0,
+            "50": 6637274624.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 55056003072.0,
+            "2": 57810763776.0,
+            "3": 57920647168.0,
+            "4": 57920647168.0,
+            "5": 57920647168.0,
+            "6": 57920647168.0,
+            "7": 57920647168.0,
+            "8": 57920647168.0,
+            "9": 57920647168.0,
+            "10": 57920647168.0,
+            "11": 57920647168.0,
+            "12": 57920647168.0,
+            "13": 57920647168.0,
+            "14": 57920647168.0,
+            "15": 57920647168.0,
+            "16": 57920647168.0,
+            "17": 57920647168.0,
+            "18": 57920647168.0,
+            "19": 57920647168.0,
+            "20": 57920647168.0,
+            "21": 57920647168.0,
+            "22": 57920647168.0,
+            "23": 57920647168.0,
+            "24": 57920647168.0,
+            "25": 57920647168.0,
+            "26": 57920647168.0,
+            "27": 57920647168.0,
+            "28": 57920647168.0,
+            "29": 57920647168.0,
+            "30": 57920647168.0,
+            "31": 57920647168.0,
+            "32": 57920647168.0,
+            "33": 57920647168.0,
+            "34": 57961472000.0,
+            "35": 57961472000.0,
+            "36": 57961472000.0,
+            "37": 57961472000.0,
+            "38": 57961472000.0,
+            "39": 57961472000.0,
+            "40": 57961472000.0,
+            "41": 57961472000.0,
+            "42": 57961472000.0,
+            "43": 57961472000.0,
+            "44": 57961472000.0,
+            "45": 57961472000.0,
+            "46": 57961472000.0,
+            "47": 57961472000.0,
+            "48": 57961472000.0,
+            "49": 57961472000.0,
+            "50": 57961472000.0
+        }
+    },
+    "mtp_1 loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 11.07648,
+            "2": 11.07404,
+            "3": 10.53854,
+            "4": 10.09813,
+            "5": 9.81166,
+            "6": 10.09741,
+            "7": 9.79481,
+            "8": 9.0642,
+            "9": 8.86016,
+            "10": 9.34039,
+            "11": 8.51318,
+            "12": 8.59467,
+            "13": 8.5292,
+            "14": 7.95757,
+            "15": 8.06962,
+            "16": 8.11802,
+            "17": 8.06993,
+            "18": 7.80587,
+            "19": 8.19192,
+            "20": 7.8906,
+            "21": 7.57063,
+            "22": 7.55091,
+            "23": 7.41606,
+            "24": 7.42454,
+            "25": 7.65274,
+            "26": 7.05583,
+            "27": 7.59747,
+            "28": 7.29984,
+            "29": 7.472,
+            "30": 7.61908,
+            "31": 7.35179,
+            "32": 7.52979,
+            "33": 7.59161,
+            "34": 7.66287,
+            "35": 7.17383,
+            "36": 7.04133,
+            "37": 7.37081,
+            "38": 7.1443,
+            "39": 7.50879,
+            "40": 7.48921,
+            "41": 7.43802,
+            "42": 7.19405,
+            "43": 7.17581,
+            "44": 7.35785,
+            "45": 7.13985,
+            "46": 6.84014,
+            "47": 7.25094,
+            "48": 7.09407,
+            "49": 7.52321,
+            "50": 6.98987
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 86.39826,
+            "2": 1.422,
+            "3": 10.22559,
+            "4": 14.42033,
+            "5": 8.84175,
+            "6": 7.82667,
+            "7": 11.39742,
+            "8": 6.95883,
+            "9": 9.03821,
+            "10": 10.04724,
+            "11": 6.73176,
+            "12": 10.40096,
+            "13": 8.09212,
+            "14": 12.48417,
+            "15": 10.47434,
+            "16": 5.38933,
+            "17": 9.91136,
+            "18": 12.5031,
+            "19": 3.69959,
+            "20": 6.47676,
+            "21": 8.9867,
+            "22": 6.26614,
+            "23": 14.73195,
+            "24": 5.95294,
+            "25": 7.82357,
+            "26": 1.13211,
+            "27": 10.86033,
+            "28": 5.6863,
+            "29": 8.4589,
+            "30": 11.41315,
+            "31": 8.85024,
+            "32": 4.72753,
+            "33": 8.44604,
+            "34": 10.74723,
+            "35": 6.95053,
+            "36": 6.82478,
+            "37": 7.84389,
+            "38": 9.46014,
+            "39": 8.6244,
+            "40": 5.78378,
+            "41": 6.9593,
+            "42": 5.09864,
+            "43": 8.81575,
+            "44": 6.08546,
+            "45": 10.08201,
+            "46": 6.04881,
+            "47": 7.73914,
+            "48": 7.18802,
+            "49": 7.82111,
+            "50": 7.94794
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_dgxc.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_dgxc.json
new file mode 100644
index 00000000000..3f2294f2670
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_dgxc.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.81442,
+            "2": 10.81882,
+            "3": 10.81551,
+            "4": 10.80292,
+            "5": 10.85144,
+            "6": 10.85011,
+            "7": 10.83867,
+            "8": 10.83952,
+            "9": 10.82213,
+            "10": 10.77746,
+            "11": 10.86426,
+            "12": 10.83689,
+            "13": 10.85831,
+            "14": 10.86354,
+            "15": 10.79774,
+            "16": 10.79537,
+            "17": 10.77155,
+            "18": 10.78908,
+            "19": 10.78343,
+            "20": 10.71629,
+            "21": 10.6835,
+            "22": 10.53061,
+            "23": 10.69849,
+            "24": 10.58571,
+            "25": 10.52397,
+            "26": 10.58327,
+            "27": 10.60963,
+            "28": 10.57207,
+            "29": 10.59012,
+            "30": 10.35613,
+            "31": 10.09392,
+            "32": 10.45887,
+            "33": 10.45644,
+            "34": 10.20494,
+            "35": 10.26735,
+            "36": 10.22333,
+            "37": 10.35299,
+            "38": 10.19476,
+            "39": 10.41731,
+            "40": 10.08948,
+            "41": 10.12721,
+            "42": 10.21207,
+            "43": 9.8313,
+            "44": 9.96936,
+            "45": 9.83601,
+            "46": 9.81666,
+            "47": 10.1539,
+            "48": 9.85279,
+            "49": 9.53447,
+            "50": 9.91909,
+            "51": 9.85364,
+            "52": 9.74286,
+            "53": 10.07155,
+            "54": 9.96279,
+            "55": 9.88223,
+            "56": 9.63465,
+            "57": 9.48633,
+            "58": 9.84878,
+            "59": 9.58904,
+            "60": 9.51094,
+            "61": 9.7032,
+            "62": 9.99637,
+            "63": 9.40044,
+            "64": 9.78465,
+            "65": 8.95366,
+            "66": 9.71808,
+            "67": 9.36931,
+            "68": 9.79818,
+            "69": 9.79667,
+            "70": 9.74899,
+            "71": 9.63213,
+            "72": 9.59956,
+            "73": 9.50308,
+            "74": 8.95202,
+            "75": 9.43084,
+            "76": 9.09067,
+            "77": 10.08102,
+            "78": 9.73521,
+            "79": 9.38853,
+            "80": 9.41418,
+            "81": 9.48403,
+            "82": 9.70907,
+            "83": 9.3152,
+            "84": 9.41838,
+            "85": 9.62222,
+            "86": 9.07945,
+            "87": 9.59202,
+            "88": 9.74953,
+            "89": 9.60441,
+            "90": 9.82577,
+            "91": 9.34232,
+            "92": 9.35837,
+            "93": 9.07969,
+            "94": 8.82793,
+            "95": 9.50864,
+            "96": 9.52117,
+            "97": 9.30605,
+            "98": 9.6658,
+            "99": 8.87716,
+            "100": 9.38997
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 5488.0,
+            "2": 5704.0,
+            "3": 5788.0,
+            "4": 5853.0,
+            "5": 6401.0,
+            "6": 6686.0,
+            "7": 5949.0,
+            "8": 5811.0,
+            "9": 6280.0,
+            "10": 5192.0,
+            "11": 6645.0,
+            "12": 6193.0,
+            "13": 6525.0,
+            "14": 6487.0,
+            "15": 6258.0,
+            "16": 6261.0,
+            "17": 6080.0,
+            "18": 5901.0,
+            "19": 6228.0,
+            "20": 5713.0,
+            "21": 6265.0,
+            "22": 5788.0,
+            "23": 6618.0,
+            "24": 6159.0,
+            "25": 5674.0,
+            "26": 6218.0,
+            "27": 6180.0,
+            "28": 6802.0,
+            "29": 7006.0,
+            "30": 6195.0,
+            "31": 5847.0,
+            "32": 6680.0,
+            "33": 7327.0,
+            "34": 6433.0,
+            "35": 6593.0,
+            "36": 6717.0,
+            "37": 7545.0,
+            "38": 7130.0,
+            "39": 7928.0,
+            "40": 7233.0,
+            "41": 7093.0,
+            "42": 7653.0,
+            "43": 7136.0,
+            "44": 7113.0,
+            "45": 7167.0,
+            "46": 7435.0,
+            "47": 7501.0,
+            "48": 7648.0,
+            "49": 7520.0,
+            "50": 7701.0,
+            "51": 7847.0,
+            "52": 7828.0,
+            "53": 8765.0,
+            "54": 8799.0,
+            "55": 7683.0,
+            "56": 7972.0,
+            "57": 7642.0,
+            "58": 8419.0,
+            "59": 8276.0,
+            "60": 7917.0,
+            "61": 8598.0,
+            "62": 8394.0,
+            "63": 7896.0,
+            "64": 9047.0,
+            "65": 8280.0,
+            "66": 9315.0,
+            "67": 8277.0,
+            "68": 8341.0,
+            "69": 8737.0,
+            "70": 9764.0,
+            "71": 9050.0,
+            "72": 9036.0,
+            "73": 9076.0,
+            "74": 6969.0,
+            "75": 7833.0,
+            "76": 8450.0,
+            "77": 13505.0,
+            "78": 9634.0,
+            "79": 13982.0,
+            "80": 11548.0,
+            "81": 10035.0,
+            "82": 9732.0,
+            "83": 9037.0,
+            "84": 9522.0,
+            "85": 46479.0,
+            "86": 8626.0,
+            "87": 11964.0,
+            "88": 9637.0,
+            "89": 10273.0,
+            "90": 11256.0,
+            "91": 8811.0,
+            "92": 9218.0,
+            "93": 8281.0,
+            "94": 9390.0,
+            "95": 9376.0,
+            "96": 13248.0,
+            "97": 8945.0,
+            "98": 10682.0,
+            "99": 15485.0,
+            "100": 9101.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 628643840.0,
+            "2": 628644864.0,
+            "3": 628644864.0,
+            "4": 628644864.0,
+            "5": 628644864.0,
+            "6": 628644864.0,
+            "7": 628644864.0,
+            "8": 628644864.0,
+            "9": 628644864.0,
+            "10": 628644864.0,
+            "11": 628644864.0,
+            "12": 628644864.0,
+            "13": 628644864.0,
+            "14": 628644864.0,
+            "15": 628644864.0,
+            "16": 628644864.0,
+            "17": 628644864.0,
+            "18": 628644864.0,
+            "19": 628644864.0,
+            "20": 628644864.0,
+            "21": 628644864.0,
+            "22": 628644864.0,
+            "23": 628644864.0,
+            "24": 628644864.0,
+            "25": 628644864.0,
+            "26": 628644864.0,
+            "27": 628644864.0,
+            "28": 628644864.0,
+            "29": 628644864.0,
+            "30": 628644864.0,
+            "31": 628644864.0,
+            "32": 628644864.0,
+            "33": 628644864.0,
+            "34": 628644864.0,
+            "35": 628644864.0,
+            "36": 628644864.0,
+            "37": 628644864.0,
+            "38": 628644864.0,
+            "39": 628644864.0,
+            "40": 628644864.0,
+            "41": 628644864.0,
+            "42": 628644864.0,
+            "43": 628644864.0,
+            "44": 628644864.0,
+            "45": 628644864.0,
+            "46": 628644864.0,
+            "47": 628644864.0,
+            "48": 628644864.0,
+            "49": 628644864.0,
+            "50": 628644864.0,
+            "51": 628644864.0,
+            "52": 628644864.0,
+            "53": 628644864.0,
+            "54": 628644864.0,
+            "55": 628644864.0,
+            "56": 628644864.0,
+            "57": 628644864.0,
+            "58": 628644864.0,
+            "59": 628644864.0,
+            "60": 628644864.0,
+            "61": 628644864.0,
+            "62": 628644864.0,
+            "63": 628644864.0,
+            "64": 628644864.0,
+            "65": 628644864.0,
+            "66": 628644864.0,
+            "67": 628644864.0,
+            "68": 628644864.0,
+            "69": 628644864.0,
+            "70": 628644864.0,
+            "71": 628644864.0,
+            "72": 628644864.0,
+            "73": 628644864.0,
+            "74": 628644864.0,
+            "75": 628644864.0,
+            "76": 628644864.0,
+            "77": 628644864.0,
+            "78": 628644864.0,
+            "79": 628644864.0,
+            "80": 628644864.0,
+            "81": 628644864.0,
+            "82": 628644864.0,
+            "83": 628644864.0,
+            "84": 628644864.0,
+            "85": 628644864.0,
+            "86": 628644864.0,
+            "87": 628644864.0,
+            "88": 628644864.0,
+            "89": 628644864.0,
+            "90": 628644864.0,
+            "91": 628644864.0,
+            "92": 628644864.0,
+            "93": 628644864.0,
+            "94": 628644864.0,
+            "95": 628644864.0,
+            "96": 628644864.0,
+            "97": 628644864.0,
+            "98": 628644864.0,
+            "99": 628644864.0,
+            "100": 628644864.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 982153216.0,
+            "2": 1149395968.0,
+            "3": 1149395968.0,
+            "4": 1155440128.0,
+            "5": 1155440128.0,
+            "6": 1155440128.0,
+            "7": 1155440128.0,
+            "8": 1155440128.0,
+            "9": 1155440128.0,
+            "10": 1155440128.0,
+            "11": 1155440128.0,
+            "12": 1155440128.0,
+            "13": 1155440128.0,
+            "14": 1155440128.0,
+            "15": 1155440128.0,
+            "16": 1155440128.0,
+            "17": 1155440128.0,
+            "18": 1155440128.0,
+            "19": 1155440128.0,
+            "20": 1155440128.0,
+            "21": 1155440128.0,
+            "22": 1155440128.0,
+            "23": 1155440128.0,
+            "24": 1155440128.0,
+            "25": 1155440128.0,
+            "26": 1155440128.0,
+            "27": 1155440128.0,
+            "28": 1155440128.0,
+            "29": 1155440128.0,
+            "30": 1155440128.0,
+            "31": 1155440128.0,
+            "32": 1155440128.0,
+            "33": 1155440128.0,
+            "34": 1155440128.0,
+            "35": 1155440128.0,
+            "36": 1155440128.0,
+            "37": 1155440128.0,
+            "38": 1155440128.0,
+            "39": 1155440128.0,
+            "40": 1155440128.0,
+            "41": 1155440128.0,
+            "42": 1155440128.0,
+            "43": 1155440128.0,
+            "44": 1155440128.0,
+            "45": 1155440128.0,
+            "46": 1155440128.0,
+            "47": 1155440128.0,
+            "48": 1155440128.0,
+            "49": 1155440128.0,
+            "50": 1155440128.0,
+            "51": 1155440128.0,
+            "52": 1155440128.0,
+            "53": 1155440128.0,
+            "54": 1155440128.0,
+            "55": 1155440128.0,
+            "56": 1155440128.0,
+            "57": 1155440128.0,
+            "58": 1155440128.0,
+            "59": 1155440128.0,
+            "60": 1155999232.0,
+            "61": 1159285760.0,
+            "62": 1159285760.0,
+            "63": 1159285760.0,
+            "64": 1159285760.0,
+            "65": 1159285760.0,
+            "66": 1159285760.0,
+            "67": 1159285760.0,
+            "68": 1159285760.0,
+            "69": 1159285760.0,
+            "70": 1159285760.0,
+            "71": 1159285760.0,
+            "72": 1159285760.0,
+            "73": 1159285760.0,
+            "74": 1159285760.0,
+            "75": 1159285760.0,
+            "76": 1164709376.0,
+            "77": 1164709376.0,
+            "78": 1164709376.0,
+            "79": 1164709376.0,
+            "80": 1164709376.0,
+            "81": 1164709376.0,
+            "82": 1164709376.0,
+            "83": 1164709376.0,
+            "84": 1164709376.0,
+            "85": 1164709376.0,
+            "86": 1164709376.0,
+            "87": 1164709376.0,
+            "88": 1164709376.0,
+            "89": 1164709376.0,
+            "90": 1164709376.0,
+            "91": 1164709376.0,
+            "92": 1164709376.0,
+            "93": 1164709376.0,
+            "94": 1164709376.0,
+            "95": 1164709376.0,
+            "96": 1164709376.0,
+            "97": 1164709376.0,
+            "98": 1164709376.0,
+            "99": 1164709376.0,
+            "100": 1164709376.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 14.68355,
+            "2": 0.90574,
+            "3": 0.83204,
+            "4": 0.80726,
+            "5": 0.81604,
+            "6": 0.80698,
+            "7": 0.79149,
+            "8": 0.78879,
+            "9": 5.49279,
+            "10": 7.0174,
+            "11": 5.92452,
+            "12": 5.81078,
+            "13": 7.25845,
+            "14": 4.93946,
+            "15": 4.83531,
+            "16": 4.47779,
+            "17": 6.18406,
+            "18": 6.12945,
+            "19": 10.25032,
+            "20": 7.44996,
+            "21": 6.16308,
+            "22": 9.83266,
+            "23": 6.97961,
+            "24": 8.73643,
+            "25": 7.58409,
+            "26": 6.5798,
+            "27": 9.71829,
+            "28": 7.38708,
+            "29": 8.61355,
+            "30": 7.20245,
+            "31": 7.15976,
+            "32": 10.8435,
+            "33": 7.30066,
+            "34": 4.75923,
+            "35": 7.80722,
+            "36": 7.65565,
+            "37": 8.21042,
+            "38": 7.29325,
+            "39": 8.35765,
+            "40": 9.13683,
+            "41": 9.17388,
+            "42": 8.76786,
+            "43": 6.60222,
+            "44": 9.37932,
+            "45": 8.70546,
+            "46": 7.2996,
+            "47": 7.24309,
+            "48": 8.69252,
+            "49": 6.05433,
+            "50": 8.17077,
+            "51": 5.49966,
+            "52": 8.23075,
+            "53": 7.32789,
+            "54": 8.08693,
+            "55": 6.13482,
+            "56": 7.89454,
+            "57": 6.91153,
+            "58": 10.68201,
+            "59": 4.20298,
+            "60": 10.28771,
+            "61": 9.10505,
+            "62": 8.665,
+            "63": 7.47889,
+            "64": 6.00947,
+            "65": 6.44144,
+            "66": 7.43135,
+            "67": 6.56432,
+            "68": 8.03943,
+            "69": 7.40803,
+            "70": 8.51347,
+            "71": 7.69153,
+            "72": 8.48279,
+            "73": 5.96062,
+            "74": 6.63264,
+            "75": 8.55139,
+            "76": 8.45504,
+            "77": 6.34534,
+            "78": 4.89292,
+            "79": 8.63417,
+            "80": 6.91863,
+            "81": 6.90981,
+            "82": 9.79368,
+            "83": 10.43847,
+            "84": 6.26228,
+            "85": 5.61723,
+            "86": 6.31752,
+            "87": 5.27251,
+            "88": 7.88452,
+            "89": 6.17004,
+            "90": 7.59884,
+            "91": 8.09035,
+            "92": 5.87686,
+            "93": 6.89489,
+            "94": 4.69639,
+            "95": 6.85708,
+            "96": 7.35569,
+            "97": 6.66015,
+            "98": 7.07087,
+            "99": 6.85994,
+            "100": 5.88721
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/test_utils/python_scripts/launch_nemo_run_workload.py b/tests/test_utils/python_scripts/launch_nemo_run_workload.py
index 1aa1c560052..b3032eb15c4 100644
--- a/tests/test_utils/python_scripts/launch_nemo_run_workload.py
+++ b/tests/test_utils/python_scripts/launch_nemo_run_workload.py
@@ -22,6 +22,15 @@
 @click.option("--container-image", required=True, type=str, help="Container image of the workload")
 @click.option("--data-dir", required=False, type=str, help="Data directory of the workload")
 @click.option("--tag", required=False, type=str, help="Tag of the workload")
+@click.option(
+    "--enable-lightweight-mode",
+    is_flag=True,
+    show_default=True,
+    required=False,
+    type=bool,
+    default=False,
+    help="To enable lightweight mode",
+)
 def main(
     scope,
     model,
@@ -31,6 +40,7 @@ def main(
     container_image,
     data_dir: Optional[str] = None,
     tag: Optional[str] = None,
+    enable_lightweight_mode: Optional[bool] = False,
 ):
     workloads = recipe_parser.load_workloads(
         container_image="none",
@@ -72,8 +82,9 @@ def main(
         env_vars={
             "PYTHONUNBUFFERED": "1",
             "OUTPUT_PATH": os.getcwd(),
-            "ENABLE_LIGHTWEIGHT_MODE": "true",
+            "ENABLE_LIGHTWEIGHT_MODE": str(enable_lightweight_mode).lower(),
             "N_REPEAT": "1",
+            "CLUSTER": "dgxh100_dgxc",
         },
         packager=run.Packager(),
         volumes=artifacts,

From bec65822072a298c89937de67a778e1b76b54015 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Thu, 23 Oct 2025 16:04:53 +0000
Subject: [PATCH 038/334] ADLR/megatron-lm!4298 - ci: Refactor testsytem -
 Removal of JET Artifacts
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 .github/actions/action.yml                    |    4 +-
 .../workflows/build-test-publish-wheel.yml    |    1 +
 .github/workflows/cicd-main.yml               |   66 +-
 .github/workflows/copyright-check.yml         |    3 +-
 .github/workflows/install-test.yml            |    1 +
 .gitlab/stages/04.functional-tests.yml        |    2 +-
 pyproject.toml                                |    1 -
 .../python_test_utils/common.py               |   22 +-
 .../test_pretraining_regular_pipeline.py      |   37 +-
 .../shell_test_utils/_run_training.sh         |    6 +-
 .../shell_test_utils/run_ci_test.sh           |    8 +-
 .../bert/bert_mcore_tp1_pp2/model_config.yaml |    2 +-
 .../bert_mcore_tp1_pp4_vp2/model_config.yaml  |    2 +-
 .../bert/bert_mcore_tp2_pp2/model_config.yaml |    2 +-
 .../model_config.yaml                         |    2 +-
 .../model_config.yaml                         |    2 +-
 .../model_config.yaml                         |    2 +-
 .../model_config.yaml                         |    2 +-
 .../bert/bert_mcore_tp4_pp1/model_config.yaml |    2 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../model_config.yaml                         |    0
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    0
 .../model_config.yaml                         |    6 +-
 .../model_config.yaml                         |    6 +-
 .../model_config.yaml                         |    6 +-
 .../model_config.yaml                         |    6 +-
 .../model_config.yaml                         |    6 +-
 .../model_config.yaml                         |    6 +-
 .../README.md                                 |    0
 .../model_config.yaml                         |    8 +-
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../golden_values_lts_dgx_a100.json           |    0
 ...olden_values_lts_dgxa100_dracooci-ord.json |    0
 .../golden_values_lts_dgxa100_dracooci.json   |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../golden_values_lts_dgx_a100.json           |    0
 ...olden_values_lts_dgxa100_dracooci-ord.json |    0
 .../golden_values_lts_dgxa100_dracooci.json   |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../golden_values_lts_dgx_a100.json           |    0
 ...olden_values_lts_dgxa100_dracooci-ord.json |    0
 .../golden_values_lts_dgxa100_dracooci.json   |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../golden_values_lts_dgx_a100.json           |    0
 ...olden_values_lts_dgxa100_dracooci-ord.json |    0
 .../golden_values_lts_dgxa100_dracooci.json   |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../golden_values_lts_dgx_a100.json           |    0
 ...olden_values_lts_dgxa100_dracooci-ord.json |    0
 .../golden_values_lts_dgxa100_dracooci.json   |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../golden_values_lts_dgx_a100.json           |    0
 ...olden_values_lts_dgxa100_dracooci-ord.json |    0
 .../golden_values_lts_dgxa100_dracooci.json   |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../golden_values_lts_dgx_a100.json           |    0
 ...olden_values_lts_dgxa100_dracooci-ord.json |    0
 .../golden_values_lts_dgxa100_dracooci.json   |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_dgxc.json       |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../golden_values_lts_dgx_a100.json           |    0
 ...olden_values_lts_dgxa100_dracooci-ord.json |    0
 .../golden_values_lts_dgxa100_dracooci.json   |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../golden_values_lts_dgx_a100.json           |    0
 ...olden_values_lts_dgxa100_dracooci-ord.json |    0
 .../golden_values_lts_dgxa100_dracooci.json   |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../golden_values_lts_dgx_a100.json           |    0
 ...olden_values_lts_dgxa100_dracooci-ord.json |    0
 .../golden_values_lts_dgxa100_dracooci.json   |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../golden_values_lts_dgx_a100.json           |    0
 ...olden_values_lts_dgxa100_dracooci-ord.json |    0
 .../golden_values_lts_dgxa100_dracooci.json   |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_lts_dgx_a100.json           |    0
 ...olden_values_lts_dgxa100_dracooci-ord.json |    0
 .../golden_values_lts_dgxa100_dracooci.json   |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_lts_dgx_a100.json           |    0
 ...olden_values_lts_dgxa100_dracooci-ord.json |    0
 .../golden_values_lts_dgxa100_dracooci.json   |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_lts_dgx_a100.json           |    0
 ...olden_values_lts_dgxa100_dracooci-ord.json |    0
 .../golden_values_lts_dgxa100_dracooci.json   |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_dev_dgxh100_coreweave.json  |  287 +
 .../golden_values_dev_dgxh100_eos.json        |  287 +
 .../golden_values_lts_dgx_a100.json           |    0
 ...olden_values_lts_dgxa100_dracooci-ord.json |    0
 .../golden_values_lts_dgxa100_dracooci.json   |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../golden_values_lts_dgx_a100.json           |    0
 ...olden_values_lts_dgxa100_dracooci-ord.json |    0
 .../golden_values_lts_dgxa100_dracooci.json   |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../golden_values_lts_dgx_a100.json           |    0
 ...olden_values_lts_dgxa100_dracooci-ord.json |    0
 .../golden_values_lts_dgxa100_dracooci.json   |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    6 +-
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    8 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../golden_values_lts_dgx_a100.json           |    0
 ...olden_values_lts_dgxa100_dracooci-ord.json |    0
 .../golden_values_lts_dgxa100_dracooci.json   |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../golden_values_lts_dgx_a100.json           |    0
 ...olden_values_lts_dgxa100_dracooci-ord.json |    0
 .../golden_values_lts_dgxa100_dracooci.json   |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../golden_values_lts_dgx_a100.json           |    0
 ...olden_values_lts_dgxa100_dracooci-ord.json |    0
 .../golden_values_lts_dgxa100_dracooci.json   |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../golden_values_lts_dgx_a100.json           |    0
 ...olden_values_lts_dgxa100_dracooci-ord.json |    0
 .../golden_values_lts_dgxa100_dracooci.json   |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../golden_values_lts_dgx_a100.json           |    0
 ...olden_values_lts_dgxa100_dracooci-ord.json |    0
 .../golden_values_lts_dgxa100_dracooci.json   |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../golden_values_lts_dgx_a100.json           |    0
 ...olden_values_lts_dgxa100_dracooci-ord.json |    0
 .../golden_values_lts_dgxa100_dracooci.json   |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../golden_values_lts_dgx_a100.json           |    0
 ...olden_values_lts_dgxa100_dracooci-ord.json |    0
 .../golden_values_lts_dgxa100_dracooci.json   |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../golden_values_lts_dgx_a100.json           |    0
 ...olden_values_lts_dgxa100_dracooci-ord.json |    0
 .../golden_values_lts_dgxa100_dracooci.json   |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../golden_values_lts_dgx_a100.json           |    0
 ...olden_values_lts_dgxa100_dracooci-ord.json |    0
 .../golden_values_lts_dgxa100_dracooci.json   |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../golden_values_lts_dgx_a100.json           |    0
 ...olden_values_lts_dgxa100_dracooci-ord.json |    0
 .../golden_values_lts_dgxa100_dracooci.json   |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../golden_values_lts_dgx_a100.json           |    0
 ...olden_values_lts_dgxa100_dracooci-ord.json |    0
 .../golden_values_lts_dgxa100_dracooci.json   |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../golden_values_lts_dgx_a100.json           |    0
 ...olden_values_lts_dgxa100_dracooci-ord.json |    0
 .../golden_values_lts_dgxa100_dracooci.json   |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_lts_dgx_a100.json           |    0
 ...olden_values_lts_dgxa100_dracooci-ord.json |    0
 .../golden_values_lts_dgxa100_dracooci.json   |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../golden_values_lts_dgx_a100.json           |    0
 ...olden_values_lts_dgxa100_dracooci-ord.json |    0
 .../golden_values_lts_dgxa100_dracooci.json   |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../golden_values_lts_dgx_a100.json           |    0
 ...olden_values_lts_dgxa100_dracooci-ord.json |    0
 .../golden_values_lts_dgxa100_dracooci.json   |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../golden_values_lts_dgx_a100.json           |    0
 ...olden_values_lts_dgxa100_dracooci-ord.json |    0
 .../golden_values_lts_dgxa100_dracooci.json   |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_dgxc.json       |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../golden_values_lts_dgx_a100.json           |    0
 ...olden_values_lts_dgxa100_dracooci-ord.json |    0
 .../golden_values_lts_dgxa100_dracooci.json   |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../golden_values_lts_dgx_a100.json           |    0
 ...olden_values_lts_dgxa100_dracooci-ord.json |    0
 .../golden_values_lts_dgxa100_dracooci.json   |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../golden_values_lts_dgx_a100.json           |    0
 ...olden_values_lts_dgxa100_dracooci-ord.json |    0
 .../golden_values_lts_dgxa100_dracooci.json   |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    6 +-
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../golden_values_lts_dgx_a100.json           |    0
 ...olden_values_lts_dgxa100_dracooci-ord.json |    0
 .../golden_values_lts_dgxa100_dracooci.json   |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../golden_values_lts_dgx_a100.json           |    0
 ...olden_values_lts_dgxa100_dracooci-ord.json |    0
 .../golden_values_lts_dgxa100_dracooci.json   |    0
 .../model_config.yaml                         |    8 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 ...olden_values_dev_dgxa100_dracooci-ord.json |    0
 .../golden_values_dev_dgxa100_dracooci.json   |    0
 .../golden_values_lts_dgx_a100.json           |    0
 ...olden_values_lts_dgxa100_dracooci-ord.json |    0
 .../golden_values_lts_dgxa100_dracooci.json   |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../golden_values_lts_dgx_a100.json           |    0
 ...olden_values_lts_dgxa100_dracooci-ord.json |    0
 .../golden_values_lts_dgxa100_dracooci.json   |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 ...olden_values_dev_dgxa100_dracooci-ord.json |    0
 .../golden_values_dev_dgxa100_dracooci.json   |    0
 .../golden_values_lts_dgx_a100.json           |    0
 ...olden_values_lts_dgxa100_dracooci-ord.json |    0
 .../golden_values_lts_dgxa100_dracooci.json   |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../golden_values_lts_dgx_a100.json           |    0
 ...olden_values_lts_dgxa100_dracooci-ord.json |    0
 .../golden_values_lts_dgxa100_dracooci.json   |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../golden_values_lts_dgx_a100.json           |    0
 ...olden_values_lts_dgxa100_dracooci-ord.json |    0
 .../golden_values_lts_dgxa100_dracooci.json   |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgxh100_coreweave.json  |  287 -
 .../golden_values_dev_dgxh100_eos.json        |  287 -
 .../model_config.yaml                         |    6 +-
 .../tp_comm_overlap_cfg.yaml                  |    0
 .../model_config.yaml                         |    6 +-
 .../model_config.yaml                         |    6 +-
 .../model_config.yaml                         |    6 +-
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../model_config.yaml                         |    6 +-
 .../tp_comm_overlap_cfg.yaml                  |    0
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    6 +-
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_h100.json           |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_h100.json           |  342 +-
 .../model_config.yaml                         |    2 +-
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_h100.json           | 5398 +++++++++--------
 .../model_config.yaml                         |    2 +-
 .../model_config.yaml                         |    2 +-
 .../model_config.yaml                         |    2 +-
 .../model_config.yaml                         |    2 +-
 .../model_config.yaml                         |    2 +-
 .../model_config.yaml                         |    2 +-
 .../model_config.yaml                         |    2 +-
 .../model_config.yaml                         |    2 +-
 .../model_config.yaml                         |    6 +-
 .../model_config.yaml                         |    6 +-
 .../model_config.yaml                         |    6 +-
 .../model_config.yaml                         |    6 +-
 .../model_config.yaml                         |    2 +-
 .../model_config.yaml                         |    2 +-
 .../model_config.yaml                         |    2 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 ...olden_values_dev_dgxa100_dracooci-ord.json |    0
 .../golden_values_dev_dgxa100_dracooci.json   |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../golden_values_lts_dgx_a100.json           |    0
 ...olden_values_lts_dgxa100_dracooci-ord.json |    0
 .../golden_values_lts_dgxa100_dracooci.json   |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 ...olden_values_dev_dgxa100_dracooci-ord.json |    0
 .../golden_values_dev_dgxa100_dracooci.json   |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../golden_values_lts_dgx_a100.json           |    0
 ...olden_values_lts_dgxa100_dracooci-ord.json |    0
 .../golden_values_lts_dgxa100_dracooci.json   |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../golden_values_lts_dgx_a100.json           |    0
 ...olden_values_lts_dgxa100_dracooci-ord.json |    0
 .../golden_values_lts_dgxa100_dracooci.json   |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_lts_dgx_a100.json           |    0
 ...olden_values_lts_dgxa100_dracooci-ord.json |    0
 .../golden_values_lts_dgxa100_dracooci.json   |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../model_config.yaml                         |    6 +-
 .../model_config.yaml                         |    4 +-
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../model_config.yaml                         |    4 +-
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../model_config.yaml                         |    4 +-
 .../golden_values_dev_dgx_h100.json           |    0
 .../model_config.yaml                         |    4 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 ...olden_values_dev_dgxa100_dracooci-ord.json |    0
 .../golden_values_dev_dgxa100_dracooci.json   |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 ...olden_values_dev_dgxa100_dracooci-ord.json |    0
 .../golden_values_dev_dgxa100_dracooci.json   |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 ...olden_values_dev_dgxa100_dracooci-ord.json |    0
 .../golden_values_dev_dgxa100_dracooci.json   |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../golden_values_lts_dgx_a100.json           |    0
 ...olden_values_lts_dgxa100_dracooci-ord.json |    0
 .../golden_values_lts_dgxa100_dracooci.json   |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 ...olden_values_dev_dgxa100_dracooci-ord.json |    0
 .../golden_values_dev_dgxa100_dracooci.json   |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../golden_values_lts_dgx_a100.json           |    0
 ...olden_values_lts_dgxa100_dracooci-ord.json |    0
 .../golden_values_lts_dgxa100_dracooci.json   |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 ...olden_values_dev_dgxa100_dracooci-ord.json |    0
 .../golden_values_dev_dgxa100_dracooci.json   |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../golden_values_lts_dgx_a100.json           |    0
 ...olden_values_lts_dgxa100_dracooci-ord.json |    0
 .../golden_values_lts_dgxa100_dracooci.json   |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev.json                    |    0
 .../golden_values_lts.json                    |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_dgxc.json       |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../model_config.yaml                         |    6 +-
 .../model_config.yaml                         |    8 +-
 .../model_config.yaml                         |    2 +-
 .../model_config.yaml                         |    6 +-
 .../model_config.yaml                         |    2 +-
 .../model_config.yaml                         |    2 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    2 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    2 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../model_config.yaml                         |    2 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 ...olden_values_dev_dgxa100_dracooci-ord.json |    0
 .../golden_values_dev_dgxa100_dracooci.json   |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    2 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 ...olden_values_dev_dgxa100_dracooci-ord.json |    0
 .../golden_values_dev_dgxa100_dracooci.json   |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    2 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 ...olden_values_dev_dgxa100_dracooci-ord.json |    0
 .../golden_values_dev_dgxa100_dracooci.json   |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    2 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    2 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    2 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 ...olden_values_dev_dgxa100_dracooci-ord.json |    0
 .../golden_values_dev_dgxa100_dracooci.json   |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    2 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 ...olden_values_dev_dgxa100_dracooci-ord.json |    0
 .../golden_values_dev_dgxa100_dracooci.json   |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    2 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 ...olden_values_dev_dgxa100_dracooci-ord.json |    0
 .../golden_values_dev_dgxa100_dracooci.json   |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    2 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    2 +-
 .../golden_values_dev_dgx_a100.json           |    0
 .../golden_values_dev_dgx_h100.json           |    0
 .../golden_values_dev_dgxh100_coreweave.json  |    0
 .../golden_values_dev_dgxh100_eos.json        |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../model_config.yaml                         |    2 +-
 .../t5/t5_release/model_config.yaml           |    2 +-
 .../golden_values_lts_dgx_a100.json           |    0
 .../golden_values_lts_dgx_a100.json           |    0
 .../python_scripts/launch_jet_workload.py     |    1 +
 .../launch_nemo_run_workload.py               |   64 +-
 tests/test_utils/recipes/ckpt_converter.yaml  |    1 +
 .../gpt-dynamic-inference-cuda-graphs.yaml    |    2 -
 ...pt-dynamic-inference-with-coordinator.yaml |    2 -
 tests/test_utils/recipes/gpt-grads.yaml       |    4 +-
 tests/test_utils/recipes/gpt.yaml             |  156 +-
 .../recipes/mamba-static-inference.yaml       |    2 -
 .../recipes/moe-static-inference.yaml         |    1 -
 tests/test_utils/recipes/moe.yaml             |   30 +-
 .../test_utils/recipes/multimodal-llava.yaml  |    4 +-
 uv.lock                                       |  116 +-
 870 files changed, 4255 insertions(+), 4159 deletions(-)
 rename tests/functional_tests/test_cases/gpt/{gpt3_7b_mr_dgx_a100_1N8G_tp1_pp4_memory_speed => gpt3_7b_tp1_pp4_memory_speed}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_7b_mr_dgx_a100_1N8G_tp1_pp4_memory_speed => gpt3_7b_tp1_pp4_memory_speed}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_7b_mr_dgx_a100_1N8G_tp1_pp4_memory_speed => gpt3_7b_tp1_pp4_memory_speed}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_7b_mr_dgx_a100_1N8G_tp1_pp4_memory_speed => gpt3_7b_tp1_pp4_memory_speed}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_7b_mr_dgx_a100_1N8G_tp1_pp4_memory_speed => gpt3_7b_tp1_pp4_memory_speed}/model_config.yaml (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_7b_mr_dgx_a100_1N8G_tp4_pp1_memory_speed => gpt3_7b_tp4_pp1_memory_speed}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_7b_mr_dgx_a100_1N8G_tp4_pp1_memory_speed => gpt3_7b_tp4_pp1_memory_speed}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_7b_mr_dgx_a100_1N8G_tp4_pp1_memory_speed => gpt3_7b_tp4_pp1_memory_speed}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_7b_mr_dgx_a100_1N8G_tp4_pp1_memory_speed => gpt3_7b_tp4_pp1_memory_speed}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_7b_mr_dgx_a100_1N8G_tp4_pp1_memory_speed => gpt3_7b_tp4_pp1_memory_speed}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_7b_mr_dgx_a100_1N8G_tp4_pp1_memory_speed => gpt3_7b_tp4_pp1_memory_speed}/model_config.yaml (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_reruns_disable => gpt3_mcore_reruns_disable}/model_config.yaml (94%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_reruns_enable => gpt3_mcore_reruns_enable}/model_config.yaml (93%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_reruns_persistent_1 => gpt3_mcore_reruns_persistent_1}/model_config.yaml (94%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_reruns_persistent_2 => gpt3_mcore_reruns_persistent_2}/model_config.yaml (94%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_reruns_reshard => gpt3_mcore_reruns_reshard}/model_config.yaml (94%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_reruns_resume => gpt3_mcore_reruns_resume}/model_config.yaml (93%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_reruns_resume_check_grads => gpt3_mcore_reruns_resume_check_grads}/README.md (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_reruns_resume_check_grads => gpt3_mcore_reruns_resume_check_grads}/model_config.yaml (94%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_reruns_transient => gpt3_mcore_reruns_transient}/model_config.yaml (94%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files}/golden_values_lts_dgxa100_dracooci-ord.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files}/golden_values_lts_dgxa100_dracooci.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files}/model_config.yaml (90%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp1_frozen_resume_torch_dist_dist_optimizer_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp1_frozen_resume_torch_dist_dist_optimizer}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp1_frozen_resume_torch_dist_dist_optimizer_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp1_frozen_resume_torch_dist_dist_optimizer}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp1_frozen_resume_torch_dist_dist_optimizer_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp1_frozen_resume_torch_dist_dist_optimizer}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp1_frozen_resume_torch_dist_dist_optimizer_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp1_frozen_resume_torch_dist_dist_optimizer}/model_config.yaml (91%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer}/golden_values_lts_dgxa100_dracooci-ord.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer}/golden_values_lts_dgxa100_dracooci.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer}/model_config.yaml (90%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_no_mmap_bin_files}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_no_mmap_bin_files}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_no_mmap_bin_files}/model_config.yaml (90%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute}/golden_values_lts_dgxa100_dracooci-ord.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute}/golden_values_lts_dgxa100_dracooci.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute}/model_config.yaml (91%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp1_uniform_full_recompute_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp1_uniform_full_recompute}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp1_uniform_full_recompute_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp1_uniform_full_recompute}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp1_uniform_full_recompute_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp1_uniform_full_recompute}/model_config.yaml (91%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp2_cp4_a2a_p2p_nondeterministic_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp2_cp4_a2a_p2p_nondeterministic}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp2_cp4_a2a_p2p_nondeterministic_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp2_cp4_a2a_p2p_nondeterministic}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp2_cp4_a2a_p2p_nondeterministic_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp2_cp4_a2a_p2p_nondeterministic}/model_config.yaml (91%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_cp4_a2a_p2p_nondeterministic_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp2_resume_torch_dist_cp4_a2a_p2p_nondeterministic}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_cp4_a2a_p2p_nondeterministic_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp2_resume_torch_dist_cp4_a2a_p2p_nondeterministic}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_cp4_a2a_p2p_nondeterministic_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp2_resume_torch_dist_cp4_a2a_p2p_nondeterministic}/model_config.yaml (91%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings}/model_config.yaml (90%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion}/golden_values_lts_dgxa100_dracooci-ord.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion}/golden_values_lts_dgxa100_dracooci.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion}/model_config.yaml (91%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp2_rope_embeddings_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp2_rope_embeddings}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp2_rope_embeddings_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp2_rope_embeddings}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp2_rope_embeddings_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp2_rope_embeddings}/model_config.yaml (90%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp2_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp2_rope_embeddings_interleaved_no_fusion}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp2_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp2_rope_embeddings_interleaved_no_fusion}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp2_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp2_rope_embeddings_interleaved_no_fusion}/model_config.yaml (90%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_disable_bias_linear_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_disable_bias_linear}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_disable_bias_linear_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_disable_bias_linear}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_disable_bias_linear_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_disable_bias_linear}/model_config.yaml (90%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_frozen_resume_torch_dist_swiglu_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_frozen_resume_torch_dist_swiglu}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_frozen_resume_torch_dist_swiglu_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_frozen_resume_torch_dist_swiglu}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_frozen_resume_torch_dist_swiglu_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_frozen_resume_torch_dist_swiglu}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_frozen_resume_torch_dist_swiglu_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_frozen_resume_torch_dist_swiglu}/model_config.yaml (91%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_persistent_ckpt_disable_bias_linear_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_persistent_ckpt_disable_bias_linear}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_persistent_ckpt_disable_bias_linear_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_persistent_ckpt_disable_bias_linear}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_persistent_ckpt_disable_bias_linear_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_persistent_ckpt_disable_bias_linear}/model_config.yaml (90%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear}/model_config.yaml (90%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear}/golden_values_lts_dgxa100_dracooci-ord.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear}/golden_values_lts_dgxa100_dracooci.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear}/model_config.yaml (90%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_sequence_parallel_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_resume_torch_dist_sequence_parallel}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_sequence_parallel_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_resume_torch_dist_sequence_parallel}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_sequence_parallel_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_resume_torch_dist_sequence_parallel}/model_config.yaml (90%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_swiglu_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_swiglu_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_swiglu_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_swiglu_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_swiglu_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_swiglu_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu}/model_config.yaml (90%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs}/golden_values_lts_dgxa100_dracooci-ord.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs}/golden_values_lts_dgxa100_dracooci.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs}/model_config.yaml (90%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_sequence_parallel_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_sequence_parallel}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_sequence_parallel_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_sequence_parallel}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_sequence_parallel_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_sequence_parallel}/model_config.yaml (90%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_swiglu_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_swiglu}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_swiglu_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_swiglu}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_swiglu_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_swiglu}/model_config.yaml (90%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_untie_embeddings_and_outputs_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_untie_embeddings_and_outputs}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_untie_embeddings_and_outputs_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_untie_embeddings_and_outputs}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_untie_embeddings_and_outputs_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_untie_embeddings_and_outputs}/model_config.yaml (90%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1}/golden_values_lts_dgxa100_dracooci-ord.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1}/golden_values_lts_dgxa100_dracooci.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1}/model_config.yaml (91%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_calculate_per_token_loss_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_calculate_per_token_loss}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_calculate_per_token_loss_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_calculate_per_token_loss}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_calculate_per_token_loss_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_calculate_per_token_loss}/model_config.yaml (90%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_decoupled_lr_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_decoupled_lr}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_decoupled_lr_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_decoupled_lr}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_decoupled_lr_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_decoupled_lr}/model_config.yaml (90%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce}/model_config.yaml (90%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather}/model_config.yaml (91%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer}/golden_values_dev_dgxh100_dgxc.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer}/golden_values_lts_dgxa100_dracooci-ord.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer}/golden_values_lts_dgxa100_dracooci.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer}/model_config.yaml (91%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_untied}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_untied}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_untied}/model_config.yaml (91%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr}/golden_values_lts_dgxa100_dracooci-ord.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr}/golden_values_lts_dgxa100_dracooci.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr}/model_config.yaml (91%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist}/model_config.yaml (91%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss}/golden_values_lts_dgxa100_dracooci-ord.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss}/golden_values_lts_dgxa100_dracooci.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss}/model_config.yaml (91%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce}/golden_values_lts_dgxa100_dracooci-ord.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce}/golden_values_lts_dgxa100_dracooci.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce}/model_config.yaml (91%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather}/golden_values_lts_dgxa100_dracooci-ord.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather}/golden_values_lts_dgxa100_dracooci.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather}/model_config.yaml (92%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied}/golden_values_lts_dgxa100_dracooci-ord.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied}/golden_values_lts_dgxa100_dracooci.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied}/model_config.yaml (91%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap}/golden_values_lts_dgxa100_dracooci-ord.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap}/golden_values_lts_dgxa100_dracooci.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap}/model_config.yaml (91%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap}/golden_values_dev_dgx_h100.json (100%)
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_dev_dgxh100_eos.json
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap}/golden_values_lts_dgxa100_dracooci-ord.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap}/golden_values_lts_dgxa100_dracooci.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap}/model_config.yaml (91%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline}/golden_values_lts_dgxa100_dracooci-ord.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline}/golden_values_lts_dgxa100_dracooci.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline}/model_config.yaml (90%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split}/golden_values_lts_dgxa100_dracooci-ord.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split}/golden_values_lts_dgxa100_dracooci.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split}/model_config.yaml (91%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp1_cp2_nondeterministic_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_cp2_nondeterministic}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp1_cp2_nondeterministic_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_cp2_nondeterministic}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp1_cp2_nondeterministic_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_cp2_nondeterministic}/model_config.yaml (90%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_cp2_nondeterministic}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_cp2_nondeterministic}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_cp2_nondeterministic}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_cp2_nondeterministic}/model_config.yaml (90%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp1_fsdp2_resume_torch_dist_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_fsdp2_resume_torch_dist}/model_config.yaml (91%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp1_modelopt_distill_resume => gpt3_mcore_te_tp2_pp1_modelopt_distill_resume}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp1_modelopt_distill_resume => gpt3_mcore_te_tp2_pp1_modelopt_distill_resume}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp1_modelopt_distill_resume => gpt3_mcore_te_tp2_pp1_modelopt_distill_resume}/model_config.yaml (90%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp1_multi_dist_optimizer_instances_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_multi_dist_optimizer_instances}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp1_multi_dist_optimizer_instances_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_multi_dist_optimizer_instances}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp1_multi_dist_optimizer_instances_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_multi_dist_optimizer_instances}/model_config.yaml (91%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic}/golden_values_lts_dgxa100_dracooci-ord.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic}/golden_values_lts_dgxa100_dracooci.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic}/model_config.yaml (90%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances}/golden_values_lts_dgxa100_dracooci-ord.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances}/golden_values_lts_dgxa100_dracooci.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances}/model_config.yaml (91%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2}/model_config.yaml (90%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_cp2_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_cp2}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_cp2_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_cp2}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_cp2_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_cp2}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_cp2_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_cp2}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_cp2_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_cp2}/model_config.yaml (90%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss}/model_config.yaml (90%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic}/golden_values_lts_dgxa100_dracooci-ord.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic}/golden_values_lts_dgxa100_dracooci.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic}/model_config.yaml (90%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last}/model_config.yaml (90%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last}/golden_values_lts_dgxa100_dracooci-ord.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last}/golden_values_lts_dgxa100_dracooci.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last}/model_config.yaml (90%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_dp_last_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_cp2_etp4_dp_last}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_dp_last_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_cp2_etp4_dp_last}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_dp_last_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_cp2_etp4_dp_last}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_dp_last_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_cp2_etp4_dp_last}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_dp_last_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_cp2_etp4_dp_last}/model_config.yaml (90%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last}/golden_values_lts_dgxa100_dracooci-ord.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last}/golden_values_lts_dgxa100_dracooci.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last}/model_config.yaml (90%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_cp2_nondeterministic}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_cp2_nondeterministic}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_cp2_nondeterministic}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_cp2_nondeterministic}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_cp2_nondeterministic}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_cp2_nondeterministic}/golden_values_lts_dgxa100_dracooci-ord.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_cp2_nondeterministic}/golden_values_lts_dgxa100_dracooci.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_cp2_nondeterministic}/model_config.yaml (90%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion}/golden_values_lts_dgxa100_dracooci-ord.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion}/golden_values_lts_dgxa100_dracooci.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion}/model_config.yaml (89%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_ddp_average_in_collective_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_ddp_average_in_collective}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_ddp_average_in_collective_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_ddp_average_in_collective}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_ddp_average_in_collective_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_ddp_average_in_collective}/model_config.yaml (90%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_defer_embedding_wgrad_compute_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_defer_embedding_wgrad_compute}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_defer_embedding_wgrad_compute_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_defer_embedding_wgrad_compute}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_defer_embedding_wgrad_compute_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_defer_embedding_wgrad_compute}/model_config.yaml (90%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_mla_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_mla}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_mla_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_mla}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_mla_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_mla}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_mla_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_mla}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_mla_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_mla}/model_config.yaml (91%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_no_create_attention_mask_in_dataloader_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_no_create_attention_mask_in_dataloader}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_no_create_attention_mask_in_dataloader_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_no_create_attention_mask_in_dataloader}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_no_create_attention_mask_in_dataloader_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_no_create_attention_mask_in_dataloader}/model_config.yaml (90%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_no_mmap_bin_files_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_no_mmap_bin_files}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_no_mmap_bin_files_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_no_mmap_bin_files}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_no_mmap_bin_files_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_no_mmap_bin_files}/model_config.yaml (90%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_resume_torch_dist}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_resume_torch_dist}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_resume_torch_dist}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_resume_torch_dist}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_resume_torch_dist}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_resume_torch_dist}/golden_values_lts_dgxa100_dracooci-ord.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_resume_torch_dist}/golden_values_lts_dgxa100_dracooci.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_resume_torch_dist}/model_config.yaml (90%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic}/golden_values_lts_dgxa100_dracooci-ord.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic}/golden_values_lts_dgxa100_dracooci.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic}/model_config.yaml (90%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion}/golden_values_lts_dgxa100_dracooci-ord.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion}/golden_values_lts_dgxa100_dracooci.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion}/model_config.yaml (90%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective}/golden_values_lts_dgxa100_dracooci-ord.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective}/golden_values_lts_dgxa100_dracooci.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective}/model_config.yaml (90%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute}/golden_values_lts_dgxa100_dracooci-ord.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute}/golden_values_lts_dgxa100_dracooci.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute}/model_config.yaml (90%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader}/golden_values_lts_dgxa100_dracooci-ord.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader}/golden_values_lts_dgxa100_dracooci.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader}/model_config.yaml (91%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_mmap_bin_files_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_mmap_bin_files}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_mmap_bin_files_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_mmap_bin_files}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_mmap_bin_files_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_mmap_bin_files}/model_config.yaml (90%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone}/golden_values_lts_dgxa100_dracooci-ord.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone}/golden_values_lts_dgxa100_dracooci.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone}/model_config.yaml (90%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_zp_z3_resume_fsdp_dtensor_dgx_a100_1N8G => gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_zp_z3_resume_fsdp_dtensor_dgx_a100_1N8G => gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_zp_z3_resume_fsdp_dtensor_dgx_a100_1N8G => gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_zp_z3_resume_fsdp_dtensor_dgx_a100_1N8G => gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_zp_z3_resume_fsdp_dtensor_dgx_a100_1N8G => gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp2_zp_z3_resume_fsdp_dtensor_dgx_a100_1N8G => gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor}/model_config.yaml (90%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G => gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G => gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G => gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce}/model_config.yaml (90%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G => gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G => gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G => gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G => gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G => gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G => gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather}/golden_values_lts_dgxa100_dracooci-ord.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G => gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather}/golden_values_lts_dgxa100_dracooci.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G => gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather}/model_config.yaml (91%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp4_pp1_qk_layernorm_test_mode_dgx_a100_1N8G => gpt3_mcore_te_tp4_pp1_qk_layernorm_test_mode}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp4_pp1_qk_layernorm_test_mode_dgx_a100_1N8G => gpt3_mcore_te_tp4_pp1_qk_layernorm_test_mode}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp4_pp1_qk_layernorm_test_mode_dgx_a100_1N8G => gpt3_mcore_te_tp4_pp1_qk_layernorm_test_mode}/model_config.yaml (90%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G => gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G => gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G => gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G => gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G => gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G => gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce}/golden_values_lts_dgxa100_dracooci-ord.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G => gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce}/golden_values_lts_dgxa100_dracooci.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G => gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce}/model_config.yaml (90%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G => gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G => gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G => gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G => gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather}/golden_values_dev_dgxh100_dgxc.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G => gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G => gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G => gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather}/golden_values_lts_dgxa100_dracooci-ord.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G => gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather}/golden_values_lts_dgxa100_dracooci.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G => gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather}/model_config.yaml (91%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G => gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G => gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G => gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G => gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G => gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G => gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode}/golden_values_lts_dgxa100_dracooci-ord.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G => gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode}/golden_values_lts_dgxa100_dracooci.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G => gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode}/model_config.yaml (90%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G => gpt3_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G => gpt3_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G => gpt3_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G => gpt3_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone}/model_config.yaml (90%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G => gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G => gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G => gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G => gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G => gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G => gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone}/golden_values_lts_dgxa100_dracooci-ord.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G => gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone}/golden_values_lts_dgxa100_dracooci.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G => gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone}/model_config.yaml (90%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_dist_optimizer_overlap_grad_reduce_param_gather => gpt3_mcore_tp1_pp1_dist_optimizer_overlap_grad_reduce_param_gather}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_dist_optimizer_overlap_grad_reduce_param_gather => gpt3_mcore_tp1_pp1_dist_optimizer_overlap_grad_reduce_param_gather}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_dist_optimizer_overlap_grad_reduce_param_gather => gpt3_mcore_tp1_pp1_dist_optimizer_overlap_grad_reduce_param_gather}/model_config.yaml (90%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_fsdp2_resume_torch_dist_te => gpt3_mcore_tp1_pp1_fsdp2_resume_torch_dist_te}/model_config.yaml (90%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather => gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather => gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather => gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather => gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather => gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather => gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather}/golden_values_lts_dgxa100_dracooci-ord.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather => gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather}/golden_values_lts_dgxa100_dracooci.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather => gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather}/model_config.yaml (91%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2 => gpt3_mcore_tp1_pp2}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2 => gpt3_mcore_tp1_pp2}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2 => gpt3_mcore_tp1_pp2}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2 => gpt3_mcore_tp1_pp2}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2 => gpt3_mcore_tp1_pp2}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2 => gpt3_mcore_tp1_pp2}/golden_values_lts_dgxa100_dracooci-ord.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2 => gpt3_mcore_tp1_pp2}/golden_values_lts_dgxa100_dracooci.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2 => gpt3_mcore_tp1_pp2}/model_config.yaml (89%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_fp16 => gpt3_mcore_tp1_pp2_fp16}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_fp16 => gpt3_mcore_tp1_pp2_fp16}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_fp16 => gpt3_mcore_tp1_pp2_fp16}/model_config.yaml (90%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_resume_torch_dist => gpt3_mcore_tp1_pp2_resume_torch_dist}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_resume_torch_dist => gpt3_mcore_tp1_pp2_resume_torch_dist}/golden_values_dev_dgxa100_dracooci-ord.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_resume_torch_dist => gpt3_mcore_tp1_pp2_resume_torch_dist}/golden_values_dev_dgxa100_dracooci.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_resume_torch_dist => gpt3_mcore_tp1_pp2_resume_torch_dist}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_resume_torch_dist => gpt3_mcore_tp1_pp2_resume_torch_dist}/golden_values_lts_dgxa100_dracooci-ord.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_resume_torch_dist => gpt3_mcore_tp1_pp2_resume_torch_dist}/golden_values_lts_dgxa100_dracooci.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_resume_torch_dist => gpt3_mcore_tp1_pp2_resume_torch_dist}/model_config.yaml (90%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4 => gpt3_mcore_tp1_pp4}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4 => gpt3_mcore_tp1_pp4}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4 => gpt3_mcore_tp1_pp4}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4 => gpt3_mcore_tp1_pp4}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4 => gpt3_mcore_tp1_pp4}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4 => gpt3_mcore_tp1_pp4}/golden_values_lts_dgxa100_dracooci-ord.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4 => gpt3_mcore_tp1_pp4}/golden_values_lts_dgxa100_dracooci.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4 => gpt3_mcore_tp1_pp4}/model_config.yaml (90%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_resume_torch_dist => gpt3_mcore_tp1_pp4_resume_torch_dist}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_resume_torch_dist => gpt3_mcore_tp1_pp4_resume_torch_dist}/golden_values_dev_dgxa100_dracooci-ord.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_resume_torch_dist => gpt3_mcore_tp1_pp4_resume_torch_dist}/golden_values_dev_dgxa100_dracooci.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_resume_torch_dist => gpt3_mcore_tp1_pp4_resume_torch_dist}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_resume_torch_dist => gpt3_mcore_tp1_pp4_resume_torch_dist}/golden_values_lts_dgxa100_dracooci-ord.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_resume_torch_dist => gpt3_mcore_tp1_pp4_resume_torch_dist}/golden_values_lts_dgxa100_dracooci.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_resume_torch_dist => gpt3_mcore_tp1_pp4_resume_torch_dist}/model_config.yaml (90%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_tp2_pp2_resume_torch_dist_uninstall_te_dgx_a100_1N8G => gpt3_mcore_tp2_pp2_resume_torch_dist_uninstall_te}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_tp2_pp2_resume_torch_dist_uninstall_te_dgx_a100_1N8G => gpt3_mcore_tp2_pp2_resume_torch_dist_uninstall_te}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_tp2_pp2_resume_torch_dist_uninstall_te_dgx_a100_1N8G => gpt3_mcore_tp2_pp2_resume_torch_dist_uninstall_te}/model_config.yaml (90%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_tp2_pp2_uninstall_te_dgx_a100_1N8G => gpt3_mcore_tp2_pp2_uninstall_te}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_tp2_pp2_uninstall_te_dgx_a100_1N8G => gpt3_mcore_tp2_pp2_uninstall_te}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_mr_mcore_tp2_pp2_uninstall_te_dgx_a100_1N8G => gpt3_mcore_tp2_pp2_uninstall_te}/model_config.yaml (91%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1 => gpt3_mcore_tp4_pp1}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1 => gpt3_mcore_tp4_pp1}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1 => gpt3_mcore_tp4_pp1}/model_config.yaml (90%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch => gpt3_mcore_tp4_pp1_resume_torch}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch => gpt3_mcore_tp4_pp1_resume_torch}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch => gpt3_mcore_tp4_pp1_resume_torch}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch => gpt3_mcore_tp4_pp1_resume_torch}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch => gpt3_mcore_tp4_pp1_resume_torch}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch => gpt3_mcore_tp4_pp1_resume_torch}/golden_values_lts_dgxa100_dracooci-ord.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch => gpt3_mcore_tp4_pp1_resume_torch}/golden_values_lts_dgxa100_dracooci.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch => gpt3_mcore_tp4_pp1_resume_torch}/model_config.yaml (89%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist => gpt3_mcore_tp4_pp1_resume_torch_dist}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist => gpt3_mcore_tp4_pp1_resume_torch_dist}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist => gpt3_mcore_tp4_pp1_resume_torch_dist}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist => gpt3_mcore_tp4_pp1_resume_torch_dist}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist => gpt3_mcore_tp4_pp1_resume_torch_dist}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist => gpt3_mcore_tp4_pp1_resume_torch_dist}/golden_values_lts_dgxa100_dracooci-ord.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist => gpt3_mcore_tp4_pp1_resume_torch_dist}/golden_values_lts_dgxa100_dracooci.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist => gpt3_mcore_tp4_pp1_resume_torch_dist}/model_config.yaml (90%)
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_weekly_dgx_b200_1N8G_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap => gpt3_weekly_dgx_b200_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap}/model_config.yaml (91%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_weekly_dgx_b200_1N8G_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap => gpt3_weekly_dgx_b200_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap}/tp_comm_overlap_cfg.yaml (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_weekly_dgx_b200_1N8G_mcore_tp4_cp2_current_scaling_native_fp8_tp_fsdp => gpt3_weekly_dgx_b200_mcore_tp4_cp2_current_scaling_native_fp8_tp_fsdp}/model_config.yaml (91%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_weekly_dgx_b200_1N8G_mcore_tp4_cp2_current_scaling_native_fp8_tp_sp_cp_tp_overlap => gpt3_weekly_dgx_b200_mcore_tp4_cp2_current_scaling_native_fp8_tp_sp_cp_tp_overlap}/model_config.yaml (91%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_weekly_dgx_b200_1N8G_mcore_tp4_cp2_mxfp8_tp_sp_cp => gpt3_weekly_dgx_b200_mcore_tp4_cp2_mxfp8_tp_sp_cp}/model_config.yaml (90%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_weekly_dgx_b200_1N8G_mcore_tp4_cp2_native_fp8_tp_sp_cp_tp_overlap => gpt3_weekly_dgx_b200_mcore_tp4_cp2_native_fp8_tp_sp_cp_tp_overlap}/model_config.yaml (90%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_weekly_dgx_h100_1N8G_mcore_nondet_tp1_pp1_fp8_no_model_parallel => gpt3_weekly_dgx_h100_mcore_nondet_tp1_pp1_fp8_no_model_parallel}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_weekly_dgx_h100_1N8G_mcore_nondet_tp1_pp1_fp8_no_model_parallel => gpt3_weekly_dgx_h100_mcore_nondet_tp1_pp1_fp8_no_model_parallel}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_weekly_dgx_h100_1N8G_mcore_nondet_tp1_pp1_fp8_no_model_parallel => gpt3_weekly_dgx_h100_mcore_nondet_tp1_pp1_fp8_no_model_parallel}/model_config.yaml (90%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp1_pp1_bf16_baseline => gpt3_weekly_dgx_h100_mcore_tp1_pp1_bf16_baseline}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp1_pp1_bf16_baseline => gpt3_weekly_dgx_h100_mcore_tp1_pp1_bf16_baseline}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp1_pp1_bf16_baseline => gpt3_weekly_dgx_h100_mcore_tp1_pp1_bf16_baseline}/model_config.yaml (89%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp1_pp1_fp8_no_model_parallel => gpt3_weekly_dgx_h100_mcore_tp1_pp1_fp8_no_model_parallel}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp1_pp1_fp8_no_model_parallel => gpt3_weekly_dgx_h100_mcore_tp1_pp1_fp8_no_model_parallel}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp1_pp1_fp8_no_model_parallel => gpt3_weekly_dgx_h100_mcore_tp1_pp1_fp8_no_model_parallel}/model_config.yaml (90%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp1_pp2_fp8_pp => gpt3_weekly_dgx_h100_mcore_tp1_pp2_fp8_pp}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp1_pp2_fp8_pp => gpt3_weekly_dgx_h100_mcore_tp1_pp2_fp8_pp}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp1_pp2_fp8_pp => gpt3_weekly_dgx_h100_mcore_tp1_pp2_fp8_pp}/model_config.yaml (90%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap => gpt3_weekly_dgx_h100_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap => gpt3_weekly_dgx_h100_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap}/model_config.yaml (92%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap => gpt3_weekly_dgx_h100_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap}/tp_comm_overlap_cfg.yaml (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_fp8_tp_pp => gpt3_weekly_dgx_h100_mcore_tp2_pp2_fp8_tp_pp}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_fp8_tp_pp => gpt3_weekly_dgx_h100_mcore_tp2_pp2_fp8_tp_pp}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_fp8_tp_pp => gpt3_weekly_dgx_h100_mcore_tp2_pp2_fp8_tp_pp}/model_config.yaml (90%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_fp8_tp_pp_sp => gpt3_weekly_dgx_h100_mcore_tp2_pp2_fp8_tp_pp_sp}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_fp8_tp_pp_sp => gpt3_weekly_dgx_h100_mcore_tp2_pp2_fp8_tp_pp_sp}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_fp8_tp_pp_sp => gpt3_weekly_dgx_h100_mcore_tp2_pp2_fp8_tp_pp_sp}/model_config.yaml (90%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_native_fp8_tp_pp_sp => gpt3_weekly_dgx_h100_mcore_tp2_pp2_native_fp8_tp_pp_sp}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_native_fp8_tp_pp_sp => gpt3_weekly_dgx_h100_mcore_tp2_pp2_native_fp8_tp_pp_sp}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_native_fp8_tp_pp_sp => gpt3_weekly_dgx_h100_mcore_tp2_pp2_native_fp8_tp_pp_sp}/model_config.yaml (91%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp4_cp2_current_scaling_native_fp8_tp_fsdp => gpt3_weekly_dgx_h100_mcore_tp4_cp2_current_scaling_native_fp8_tp_fsdp}/model_config.yaml (91%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp4_cp2_current_scaling_native_fp8_tp_sp_cp_tp_overlap => gpt3_weekly_dgx_h100_mcore_tp4_cp2_current_scaling_native_fp8_tp_sp_cp_tp_overlap}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp4_cp2_current_scaling_native_fp8_tp_sp_cp_tp_overlap => gpt3_weekly_dgx_h100_mcore_tp4_cp2_current_scaling_native_fp8_tp_sp_cp_tp_overlap}/model_config.yaml (91%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp4_cp2_native_fp8_tp_sp_cp_tp_overlap => gpt3_weekly_dgx_h100_mcore_tp4_cp2_native_fp8_tp_sp_cp_tp_overlap}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp4_cp2_native_fp8_tp_sp_cp_tp_overlap => gpt3_weekly_dgx_h100_mcore_tp4_cp2_native_fp8_tp_sp_cp_tp_overlap}/model_config.yaml (91%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp4_pp2_fp8_tp_pp => gpt3_weekly_dgx_h100_mcore_tp4_pp2_fp8_tp_pp}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp4_pp2_fp8_tp_pp => gpt3_weekly_dgx_h100_mcore_tp4_pp2_fp8_tp_pp}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/gpt/{gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp4_pp2_fp8_tp_pp => gpt3_weekly_dgx_h100_mcore_tp4_pp2_fp8_tp_pp}/model_config.yaml (90%)
 rename tests/functional_tests/test_cases/moe/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic => gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic => gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic => gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic}/golden_values_dev_dgxa100_dracooci-ord.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic => gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic}/golden_values_dev_dgxa100_dracooci.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic => gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic => gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic => gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic => gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic}/golden_values_lts_dgxa100_dracooci-ord.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic => gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic}/golden_values_lts_dgxa100_dracooci.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic => gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic}/model_config.yaml (91%)
 rename tests/functional_tests/test_cases/moe/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last => gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last => gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last => gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last}/golden_values_dev_dgxa100_dracooci-ord.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last => gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last}/golden_values_dev_dgxa100_dracooci.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last => gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last => gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last => gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last => gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last}/golden_values_lts_dgxa100_dracooci-ord.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last => gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last}/golden_values_lts_dgxa100_dracooci.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last => gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last}/model_config.yaml (91%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G => gpt3_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer}/model_config.yaml (91%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_dist_optimizer}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_dist_optimizer}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_dist_optimizer}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_dist_optimizer}/model_config.yaml (92%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_groupedGEMM_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_groupedGEMM}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_groupedGEMM_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_groupedGEMM}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_groupedGEMM_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_groupedGEMM}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_groupedGEMM_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_groupedGEMM}/model_config.yaml (91%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer}/model_config.yaml (92%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_groupedGEMM_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_groupedGEMM}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_groupedGEMM_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_groupedGEMM}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_groupedGEMM_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_groupedGEMM}/model_config.yaml (91%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances}/model_config.yaml (92%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_top2router_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_top2router}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_top2router_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_top2router}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_top2router_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_top2router}/model_config.yaml (91%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective}/golden_values_lts_dgxa100_dracooci-ord.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective}/golden_values_lts_dgxa100_dracooci.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective}/model_config.yaml (92%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_te_8experts2parallel_dist_optimizer}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_te_8experts2parallel_dist_optimizer}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_te_8experts2parallel_dist_optimizer}/model_config.yaml (91%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM}/model_config.yaml (92%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_top2router_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_te_8experts2parallel_top2router}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_top2router_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_te_8experts2parallel_top2router}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_top2router_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_te_8experts2parallel_top2router}/model_config.yaml (91%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp2_pp1_te_8experts_etp1_ep4_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_te_8experts_etp1_ep4}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp2_pp1_te_8experts_etp1_ep4_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_te_8experts_etp1_ep4}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp2_pp1_te_8experts_etp1_ep4_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_te_8experts_etp1_ep4}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp2_pp1_te_8experts_etp1_ep4_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_te_8experts_etp1_ep4}/golden_values_lts_dgxa100_dracooci-ord.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp2_pp1_te_8experts_etp1_ep4_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_te_8experts_etp1_ep4}/golden_values_lts_dgxa100_dracooci.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp2_pp1_te_8experts_etp1_ep4_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_te_8experts_etp1_ep4}/model_config.yaml (92%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4_dgx_a100_1N8G => gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4}/model_config.yaml (92%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8 => gpt3_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8 => gpt3_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8 => gpt3_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8 => gpt3_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8}/model_config.yaml (96%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph => gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph => gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph => gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph => gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph}/model_config.yaml (96%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_selective_recompute_experimental => gpt3_mcore_te_tp2_pp2_ep4_etp1_selective_recompute_experimental}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_selective_recompute_experimental => gpt3_mcore_te_tp2_pp2_ep4_etp1_selective_recompute_experimental}/model_config.yaml (96%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router_dgx_a100_1N8G => gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router_dgx_a100_1N8G => gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router_dgx_a100_1N8G => gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_mr_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router_dgx_a100_1N8G => gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router}/model_config.yaml (92%)
 rename tests/functional_tests/test_cases/moe/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel => gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel => gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel => gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel}/golden_values_dev_dgxa100_dracooci-ord.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel => gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel}/golden_values_dev_dgxa100_dracooci.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel => gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel => gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel => gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel}/model_config.yaml (91%)
 rename tests/functional_tests/test_cases/moe/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last => gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last => gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last => gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last}/golden_values_dev_dgxa100_dracooci-ord.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last => gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last}/golden_values_dev_dgxa100_dracooci.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last => gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last => gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last => gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last}/model_config.yaml (91%)
 rename tests/functional_tests/test_cases/moe/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel => gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel => gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel => gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel}/golden_values_dev_dgxa100_dracooci-ord.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel => gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel}/golden_values_dev_dgxa100_dracooci.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel => gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel => gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel => gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel => gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel}/golden_values_lts_dgxa100_dracooci-ord.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel => gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel}/golden_values_lts_dgxa100_dracooci.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel => gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel}/model_config.yaml (91%)
 rename tests/functional_tests/test_cases/moe/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last => gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last => gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last => gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last}/golden_values_dev_dgxa100_dracooci-ord.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last => gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last}/golden_values_dev_dgxa100_dracooci.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last => gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last => gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last => gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last => gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last}/golden_values_lts_dgxa100_dracooci-ord.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last => gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last}/golden_values_lts_dgxa100_dracooci.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last => gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last}/model_config.yaml (91%)
 rename tests/functional_tests/test_cases/moe/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_resume_torch_dist_te_4experts2parallel => gpt3_mcore_tp2_pp2_ep2_resume_torch_dist_te_4experts2parallel}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_resume_torch_dist_te_4experts2parallel => gpt3_mcore_tp2_pp2_ep2_resume_torch_dist_te_4experts2parallel}/model_config.yaml (91%)
 rename tests/functional_tests/test_cases/moe/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel => gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel => gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel => gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel}/golden_values_dev_dgxa100_dracooci-ord.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel => gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel}/golden_values_dev_dgxa100_dracooci.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel => gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel => gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel => gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel => gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel}/golden_values_lts_dgxa100_dracooci-ord.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel => gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel}/golden_values_lts_dgxa100_dracooci.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel => gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel}/model_config.yaml (91%)
 rename tests/functional_tests/test_cases/moe/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_resume_torch_dist_te_2experts => gpt3_mcore_tp2_pp2_resume_torch_dist_te_2experts}/golden_values_dev.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_resume_torch_dist_te_2experts => gpt3_mcore_tp2_pp2_resume_torch_dist_te_2experts}/golden_values_lts.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_resume_torch_dist_te_2experts => gpt3_mcore_tp2_pp2_resume_torch_dist_te_2experts}/model_config.yaml (91%)
 rename tests/functional_tests/test_cases/moe/{gpt3_moe_mr_mcore_te_ep8_resume_torch_dist_dist_optimizer => gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_optimizer}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_moe_mr_mcore_te_ep8_resume_torch_dist_dist_optimizer => gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_optimizer}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_moe_mr_mcore_te_ep8_resume_torch_dist_dist_optimizer => gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_optimizer}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_moe_mr_mcore_te_ep8_resume_torch_dist_dist_optimizer => gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_optimizer}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_moe_mr_mcore_te_ep8_resume_torch_dist_dist_optimizer => gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_optimizer}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_moe_mr_mcore_te_ep8_resume_torch_dist_dist_optimizer => gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_optimizer}/model_config.yaml (92%)
 rename tests/functional_tests/test_cases/moe/{gpt3_moe_mr_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer => gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_moe_mr_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer => gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_moe_mr_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer => gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer}/golden_values_dev_dgxh100_dgxc.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_moe_mr_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer => gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/moe/{gpt3_moe_mr_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer => gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer}/model_config.yaml (92%)
 rename tests/functional_tests/test_cases/multimodal-llava/{multimodal_llava_mr_mcore_te_tp1_pp1_dgx_a100_1N8G => multimodal_llava_mcore_te_tp1_pp1}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/multimodal-llava/{multimodal_llava_mr_mcore_te_tp1_pp1_dgx_a100_1N8G => multimodal_llava_mcore_te_tp1_pp1}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/multimodal-llava/{multimodal_llava_mr_mcore_te_tp1_pp1_dgx_a100_1N8G => multimodal_llava_mcore_te_tp1_pp1}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/multimodal-llava/{multimodal_llava_mr_mcore_te_tp1_pp1_dgx_a100_1N8G => multimodal_llava_mcore_te_tp1_pp1}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/multimodal-llava/{multimodal_llava_mr_mcore_te_tp1_pp1_dgx_a100_1N8G => multimodal_llava_mcore_te_tp1_pp1}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/multimodal-llava/{multimodal_llava_mr_mcore_te_tp1_pp1_dgx_a100_1N8G => multimodal_llava_mcore_te_tp1_pp1}/model_config.yaml (98%)
 rename tests/functional_tests/test_cases/multimodal-llava/{multimodal_llava_mr_mcore_te_tp4_sp_cp2_dgx_a100_1N8G => multimodal_llava_mcore_te_tp4_sp_cp2}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/multimodal-llava/{multimodal_llava_mr_mcore_te_tp4_sp_cp2_dgx_a100_1N8G => multimodal_llava_mcore_te_tp4_sp_cp2}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/multimodal-llava/{multimodal_llava_mr_mcore_te_tp4_sp_cp2_dgx_a100_1N8G => multimodal_llava_mcore_te_tp4_sp_cp2}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/multimodal-llava/{multimodal_llava_mr_mcore_te_tp4_sp_cp2_dgx_a100_1N8G => multimodal_llava_mcore_te_tp4_sp_cp2}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/multimodal-llava/{multimodal_llava_mr_mcore_te_tp4_sp_cp2_dgx_a100_1N8G => multimodal_llava_mcore_te_tp4_sp_cp2}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/multimodal-llava/{multimodal_llava_mr_mcore_te_tp4_sp_cp2_dgx_a100_1N8G => multimodal_llava_mcore_te_tp4_sp_cp2}/model_config.yaml (98%)
 rename tests/functional_tests/test_cases/t5/{t5_11b_mr_mcore_tp4_pp1_dgx_a100_1N8G => t5_11b_mcore_tp4_pp1}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/t5/{t5_11b_mr_mcore_tp4_pp1_dgx_a100_1N8G => t5_11b_mcore_tp4_pp1}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/t5/{t5_11b_mr_mcore_tp4_pp1_dgx_a100_1N8G => t5_11b_mcore_tp4_pp1}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/t5/{t5_11b_mr_mcore_tp4_pp1_dgx_a100_1N8G => t5_11b_mcore_tp4_pp1}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/t5/{t5_11b_mr_mcore_tp4_pp1_dgx_a100_1N8G => t5_11b_mcore_tp4_pp1}/model_config.yaml (98%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp1_pp1_vp1_resume_torch => t5_mcore_te_tp1_pp1_vp1_resume_torch}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp1_pp1_vp1_resume_torch => t5_mcore_te_tp1_pp1_vp1_resume_torch}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp1_pp1_vp1_resume_torch => t5_mcore_te_tp1_pp1_vp1_resume_torch}/golden_values_dev_dgxa100_dracooci-ord.json (100%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp1_pp1_vp1_resume_torch => t5_mcore_te_tp1_pp1_vp1_resume_torch}/golden_values_dev_dgxa100_dracooci.json (100%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp1_pp1_vp1_resume_torch => t5_mcore_te_tp1_pp1_vp1_resume_torch}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp1_pp1_vp1_resume_torch => t5_mcore_te_tp1_pp1_vp1_resume_torch}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp1_pp1_vp1_resume_torch => t5_mcore_te_tp1_pp1_vp1_resume_torch}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp1_pp1_vp1_resume_torch => t5_mcore_te_tp1_pp1_vp1_resume_torch}/model_config.yaml (98%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1 => t5_mcore_te_tp2_pp1_vp1}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1 => t5_mcore_te_tp2_pp1_vp1}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1 => t5_mcore_te_tp2_pp1_vp1}/golden_values_dev_dgxa100_dracooci-ord.json (100%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1 => t5_mcore_te_tp2_pp1_vp1}/golden_values_dev_dgxa100_dracooci.json (100%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1 => t5_mcore_te_tp2_pp1_vp1}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1 => t5_mcore_te_tp2_pp1_vp1}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1 => t5_mcore_te_tp2_pp1_vp1}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1 => t5_mcore_te_tp2_pp1_vp1}/model_config.yaml (98%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel => t5_mcore_te_tp2_pp1_vp1_sequence_parallel}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel => t5_mcore_te_tp2_pp1_vp1_sequence_parallel}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel => t5_mcore_te_tp2_pp1_vp1_sequence_parallel}/golden_values_dev_dgxa100_dracooci-ord.json (100%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel => t5_mcore_te_tp2_pp1_vp1_sequence_parallel}/golden_values_dev_dgxa100_dracooci.json (100%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel => t5_mcore_te_tp2_pp1_vp1_sequence_parallel}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel => t5_mcore_te_tp2_pp1_vp1_sequence_parallel}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel => t5_mcore_te_tp2_pp1_vp1_sequence_parallel}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel => t5_mcore_te_tp2_pp1_vp1_sequence_parallel}/model_config.yaml (98%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_mr_mcore_te_tp4_pp1_dgx_a100_1N8G => t5_mcore_te_tp4_pp1}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_mr_mcore_te_tp4_pp1_dgx_a100_1N8G => t5_mcore_te_tp4_pp1}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_mr_mcore_te_tp4_pp1_dgx_a100_1N8G => t5_mcore_te_tp4_pp1}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_mr_mcore_te_tp4_pp1_dgx_a100_1N8G => t5_mcore_te_tp4_pp1}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_mr_mcore_te_tp4_pp1_dgx_a100_1N8G => t5_mcore_te_tp4_pp1}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_mr_mcore_te_tp4_pp1_dgx_a100_1N8G => t5_mcore_te_tp4_pp1}/model_config.yaml (98%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_mr_mcore_te_tp4_pp1_resume_torch_dist_dgx_a100_1N8G => t5_mcore_te_tp4_pp1_resume_torch_dist}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_mr_mcore_te_tp4_pp1_resume_torch_dist_dgx_a100_1N8G => t5_mcore_te_tp4_pp1_resume_torch_dist}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_mr_mcore_te_tp4_pp1_resume_torch_dist_dgx_a100_1N8G => t5_mcore_te_tp4_pp1_resume_torch_dist}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_mr_mcore_te_tp4_pp1_resume_torch_dist_dgx_a100_1N8G => t5_mcore_te_tp4_pp1_resume_torch_dist}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_mr_mcore_te_tp4_pp1_resume_torch_dist_dgx_a100_1N8G => t5_mcore_te_tp4_pp1_resume_torch_dist}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_mr_mcore_te_tp4_pp1_resume_torch_dist_dgx_a100_1N8G => t5_mcore_te_tp4_pp1_resume_torch_dist}/model_config.yaml (98%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1 => t5_mcore_tp1_pp1_vp1}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1 => t5_mcore_tp1_pp1_vp1}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1 => t5_mcore_tp1_pp1_vp1}/golden_values_dev_dgxa100_dracooci-ord.json (100%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1 => t5_mcore_tp1_pp1_vp1}/golden_values_dev_dgxa100_dracooci.json (100%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1 => t5_mcore_tp1_pp1_vp1}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1 => t5_mcore_tp1_pp1_vp1}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1 => t5_mcore_tp1_pp1_vp1}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1 => t5_mcore_tp1_pp1_vp1}/model_config.yaml (98%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1_resume_torch => t5_mcore_tp1_pp1_vp1_resume_torch}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1_resume_torch => t5_mcore_tp1_pp1_vp1_resume_torch}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1_resume_torch => t5_mcore_tp1_pp1_vp1_resume_torch}/golden_values_dev_dgxa100_dracooci-ord.json (100%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1_resume_torch => t5_mcore_tp1_pp1_vp1_resume_torch}/golden_values_dev_dgxa100_dracooci.json (100%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1_resume_torch => t5_mcore_tp1_pp1_vp1_resume_torch}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1_resume_torch => t5_mcore_tp1_pp1_vp1_resume_torch}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1_resume_torch => t5_mcore_tp1_pp1_vp1_resume_torch}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1_resume_torch => t5_mcore_tp1_pp1_vp1_resume_torch}/model_config.yaml (98%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_nightly_dgx_a100_1N8G_mcore_tp2_pp1_vp1 => t5_mcore_tp2_pp1_vp1}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_nightly_dgx_a100_1N8G_mcore_tp2_pp1_vp1 => t5_mcore_tp2_pp1_vp1}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_nightly_dgx_a100_1N8G_mcore_tp2_pp1_vp1 => t5_mcore_tp2_pp1_vp1}/golden_values_dev_dgxa100_dracooci-ord.json (100%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_nightly_dgx_a100_1N8G_mcore_tp2_pp1_vp1 => t5_mcore_tp2_pp1_vp1}/golden_values_dev_dgxa100_dracooci.json (100%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_nightly_dgx_a100_1N8G_mcore_tp2_pp1_vp1 => t5_mcore_tp2_pp1_vp1}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_nightly_dgx_a100_1N8G_mcore_tp2_pp1_vp1 => t5_mcore_tp2_pp1_vp1}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_nightly_dgx_a100_1N8G_mcore_tp2_pp1_vp1 => t5_mcore_tp2_pp1_vp1}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_nightly_dgx_a100_1N8G_mcore_tp2_pp1_vp1 => t5_mcore_tp2_pp1_vp1}/model_config.yaml (98%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_mr_mcore_tp4_pp1_dgx_a100_1N8G => t5_mcore_tp4_pp1}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_mr_mcore_tp4_pp1_dgx_a100_1N8G => t5_mcore_tp4_pp1}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_mr_mcore_tp4_pp1_dgx_a100_1N8G => t5_mcore_tp4_pp1}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_mr_mcore_tp4_pp1_dgx_a100_1N8G => t5_mcore_tp4_pp1}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_mr_mcore_tp4_pp1_dgx_a100_1N8G => t5_mcore_tp4_pp1}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_mr_mcore_tp4_pp1_dgx_a100_1N8G => t5_mcore_tp4_pp1}/model_config.yaml (98%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_mr_mcore_tp4_pp1_resume_torch_dist_dgx_a100_1N8G => t5_mcore_tp4_pp1_resume_torch_dist}/golden_values_dev_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_mr_mcore_tp4_pp1_resume_torch_dist_dgx_a100_1N8G => t5_mcore_tp4_pp1_resume_torch_dist}/golden_values_dev_dgx_h100.json (100%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_mr_mcore_tp4_pp1_resume_torch_dist_dgx_a100_1N8G => t5_mcore_tp4_pp1_resume_torch_dist}/golden_values_dev_dgxh100_coreweave.json (100%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_mr_mcore_tp4_pp1_resume_torch_dist_dgx_a100_1N8G => t5_mcore_tp4_pp1_resume_torch_dist}/golden_values_dev_dgxh100_eos.json (100%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_mr_mcore_tp4_pp1_resume_torch_dist_dgx_a100_1N8G => t5_mcore_tp4_pp1_resume_torch_dist}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_mr_mcore_tp4_pp1_resume_torch_dist_dgx_a100_1N8G => t5_mcore_tp4_pp1_resume_torch_dist}/model_config.yaml (98%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_weekly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1 => t5_weekly_mcore_te_tp2_pp1_vp1}/golden_values_lts_dgx_a100.json (100%)
 rename tests/functional_tests/test_cases/t5/{t5_220m_weekly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel => t5_weekly_mcore_te_tp2_pp1_vp1_sequence_parallel}/golden_values_lts_dgx_a100.json (100%)

diff --git a/.github/actions/action.yml b/.github/actions/action.yml
index d726fcabc9f..d2f43599182 100644
--- a/.github/actions/action.yml
+++ b/.github/actions/action.yml
@@ -173,6 +173,7 @@ runs:
     - name: Check result
       id: check
       shell: bash -x -e -u -o pipefail {0}
+      if: always()
       env:
         IS_UNIT_TEST: ${{ inputs.is_unit_test == 'true' }}
       run: |
@@ -210,7 +211,7 @@ runs:
 
     - name: Upload coverage
       uses: actions/upload-artifact@v4
-      if: ${{ steps.check.outputs.coverage_report != 'none' }}
+      if: ${{ always() && steps.check.outputs.coverage_report != 'none' }}
       with:
         name: ${{ steps.check.outputs.coverage_report }}
         path: |
@@ -220,6 +221,7 @@ runs:
 
     - name: Upload logs
       uses: actions/upload-artifact@v4
+      if: always()
       with:
         name: ${{ steps.check.outputs.logs_report }}
         path: ${{ inputs.is_unit_test == 'true' && 'logs' || 'assets_dir' }}
diff --git a/.github/workflows/build-test-publish-wheel.yml b/.github/workflows/build-test-publish-wheel.yml
index 95795e67f60..0b6cdd7efdb 100644
--- a/.github/workflows/build-test-publish-wheel.yml
+++ b/.github/workflows/build-test-publish-wheel.yml
@@ -17,6 +17,7 @@ name: Build, test, and publish a PyPi wheel (to testpypi).
 on:
   push:
     branches:
+      - dev
       - main
       - "pull-request/[0-9]+"
       - "deploy-release/*"
diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml
index a56afb74c71..94d486f2fb5 100644
--- a/.github/workflows/cicd-main.yml
+++ b/.github/workflows/cicd-main.yml
@@ -17,6 +17,7 @@ on:
     - cron: "0 */2 * * *"
   push:
     branches:
+      - dev
       - main
       - "pull-request/[0-9]+"
       - "deploy-release/*"
@@ -31,6 +32,9 @@ permissions:
   id-token: write
   contents: read
 
+env:
+  container-registry: 766267172432.dkr.ecr.us-east-1.amazonaws.com
+
 jobs:
   is-not-external-contributor:
     runs-on: ubuntu-latest
@@ -140,6 +144,7 @@ jobs:
           fi
 
   pre-flight:
+    needs: [is-not-external-contributor]
     uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_cicd_preflight.yml@v0.65.0
 
   linting:
@@ -177,6 +182,8 @@ jobs:
       - name: Run linting
         run: |
           export PATH=".venv/bin:$PATH"
+          export GITLAB_ENDPOINT=github.com
+          export CI_PROJECT_NAMESPACE=NVIDIA
           export BASE_REF="${{ startsWith(github.ref, 'refs/heads/pull-request/') && fromJSON(steps.get-pr-info.outputs.pr-info).base.ref || 'HEAD~1' }}" 
           export CHECK_ONLY=true 
           export SKIP_DOCS=false 
@@ -232,10 +239,38 @@ jobs:
           python tests/test_utils/python_scripts/download_unit_tests_dataset.py --assets-dir ./assets
           echo "::endgroup::"
 
+      - name: Install GH CLI
+        shell: bash
+        run: |
+          apt-get update
+          apt-get install -y gh
+
       - name: Pull cache
         run: |
-          docker pull 766267172432.dkr.ecr.us-east-1.amazonaws.com/megatron-lm:main || true
-          docker pull 766267172432.dkr.ecr.us-east-1.amazonaws.com/megatron-lm:${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number }} || true
+          docker pull ${{ env.container-registry }}/megatron-lm:main || true
+          docker pull ${{ env.container-registry }}/megatron-lm:${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number }} || true
+
+      - name: Get last merged PR
+        id: cache_from
+        env:
+          GH_TOKEN: ${{ github.token }}
+        run: |
+          LAST_PRS=$(gh api graphql -f query='
+            query {
+              repository(owner: "NVIDIA", name: "Megatron-LM") {
+                pullRequests(states: MERGED, first: 100, orderBy: {field: UPDATED_AT, direction: DESC}) {
+                  nodes {
+                    number
+                  }
+                }
+              }
+            }' | jq -r '.data.repository.pullRequests.nodes[].number' | while read -r number; do
+              echo "${{ env.container-registry }}/megatron-lm:$number"
+            done)
+
+          echo "LAST_PRS<<EOF" | tee -a $GITHUB_OUTPUT
+          echo "$LAST_PRS" | tee -a $GITHUB_OUTPUT
+          echo "EOF" | tee -a $GITHUB_OUTPUT
 
       - name: Build and push
         uses: docker/build-push-action@v5
@@ -247,12 +282,13 @@ jobs:
           build-args: |
             FROM_IMAGE_NAME=nvcr.io/nvidia/pytorch:25.09-py3
           cache-from: |
-            766267172432.dkr.ecr.us-east-1.amazonaws.com/megatron-lm:${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number }}
-            766267172432.dkr.ecr.us-east-1.amazonaws.com/megatron-lm:main
+            ${{ env.container-registry }}/megatron-lm:${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number || 0 }}
+            ${{ env.container-registry }}/megatron-lm:main
+            ${{ steps.cache_from.outputs.LAST_PRS }}
           no-cache: false
           tags: |
-            766267172432.dkr.ecr.us-east-1.amazonaws.com/megatron-lm:${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number }}
-            766267172432.dkr.ecr.us-east-1.amazonaws.com/megatron-lm:${{ github.sha }}
+            ${{ env.container-registry }}/megatron-lm:${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number || 0 }}
+            ${{ env.container-registry }}/megatron-lm:${{ github.sha }}
           secrets: |
             GH_TOKEN=${{ secrets.PAT }}
 
@@ -270,7 +306,7 @@ jobs:
     needs:
       - pre-flight
       - cicd-wait-in-queue
-      # - cicd-container-build
+      - cicd-container-build
     runs-on: nvidia-ci-aws-gpu-x8
     name: "${{ matrix.bucket }} - latest"
     environment: nemo-ci
@@ -282,6 +318,10 @@ jobs:
       )
       && needs.pre-flight.outputs.is_merge_group == 'false'
       && !cancelled()
+    env:
+      PIP_DISABLE_PIP_VERSION_CHECK: 1
+      PIP_NO_PYTHON_VERSION_WARNING: 1
+      PIP_ROOT_USER_ACTION: ignore
     steps:
       - name: Checkout
         uses: actions/checkout@v4
@@ -293,15 +333,15 @@ jobs:
           timeout: ${{ matrix.timeout || 30 }}
           is_unit_test: "true"
           PAT: ${{ secrets.PAT }}
-          container-image: 766267172432.dkr.ecr.us-east-1.amazonaws.com/megatron-lm:1864 #${{ github.sha }}
+          container-image: ${{ env.container-registry }}/megatron-lm:${{ github.sha }}
 
   cicd-parse-integration-tests:
     runs-on: ubuntu-latest
     needs:
       - pre-flight
       - cicd-wait-in-queue
-      # - cicd-container-build
-      # - cicd-unit-tests-latest
+      - cicd-container-build
+      - cicd-unit-tests-latest
     outputs:
       integration-tests: ${{ steps.main.outputs.integration-tests }}
     steps:
@@ -374,6 +414,10 @@ jobs:
     runs-on: nvidia-ci-aws-gpu-x8
     name: "${{ matrix.model }}/${{ matrix.test_case }} - latest"
     environment: nemo-ci
+    env:
+      PIP_DISABLE_PIP_VERSION_CHECK: 1
+      PIP_NO_PYTHON_VERSION_WARNING: 1
+      PIP_ROOT_USER_ACTION: ignore
     if: |
       (
         success() 
@@ -394,7 +438,7 @@ jobs:
           timeout: ${{ matrix.timeout || 30 }}
           is_unit_test: "false"
           PAT: ${{ secrets.PAT }}
-          container-image: 766267172432.dkr.ecr.us-east-1.amazonaws.com/megatron-lm:1864 # ${{ github.sha }}
+          container-image: ${{ env.container-registry }}/megatron-lm:1864 # ${{ github.sha }}
 
   Nemo_CICD_Test:
     needs:
diff --git a/.github/workflows/copyright-check.yml b/.github/workflows/copyright-check.yml
index 8b075448833..0584b2692c2 100644
--- a/.github/workflows/copyright-check.yml
+++ b/.github/workflows/copyright-check.yml
@@ -17,6 +17,7 @@ name: Copyright check
 on:
   push:
     branches:
+      - dev
       - main
       - "pull-request/[0-9]+"
       - "deploy-release/*"
@@ -30,7 +31,7 @@ jobs:
     if: |
       !(needs.pre-flight.outputs.docs_only == 'true'
       || needs.pre-flight.outputs.is_deployment_workflow == 'true')
-    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_copyright_check.yml@v0.65.2
+    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_copyright_check.yml@v0.65.3
     with:
       from-year: 2019
 
diff --git a/.github/workflows/install-test.yml b/.github/workflows/install-test.yml
index fa97d9d3de6..8e409ef2207 100644
--- a/.github/workflows/install-test.yml
+++ b/.github/workflows/install-test.yml
@@ -20,6 +20,7 @@ name: Installation Test
 on:
   push:
     branches:
+      - dev
       - main
       - "pull-request/[0-9]+"
       - "deploy-release/*"
diff --git a/.gitlab/stages/04.functional-tests.yml b/.gitlab/stages/04.functional-tests.yml
index 7fe8aad0771..ea2f1bcef8c 100644
--- a/.gitlab/stages/04.functional-tests.yml
+++ b/.gitlab/stages/04.functional-tests.yml
@@ -176,7 +176,7 @@ functional:run_nemo:
   inherit:
     variables: true
   variables:
-    MCORE_COMMIT: $CI_COMMIT_SHA
+    MCORE_MR_COMMIT: $CI_COMMIT_SHA
     TEST_NEMO2_MODULE: "True"
     ALLOW_FAILURE_DEPENDENCY: "True"
     TESTS_TO_RUN_ON_THIS_COMMIT: nightly
diff --git a/pyproject.toml b/pyproject.toml
index aaabab3875c..d02b873d1d9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -169,7 +169,6 @@ flash_mla = [
     { git = "https://github.com/deepseek-ai/FlashMLA", rev = "9edee0c022cd0938148a18e334203b0aab43aa19" },
 ]
 transformer-engine = { git = "https://github.com/NVIDIA/TransformerEngine.git", rev = "release_v2.8" } # on `release_v2.8`
-
 emerging_optimizers = { git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git", rev = "fb1add873e7851ec34b48581ea1b15761b73d189" }
 nemo-run = { git = "https://github.com/NVIDIA-NeMo/Run.git", rev = "8ca8f7952a597f944985f1f1368a7acb9aa3a6c2" }
 [tool.isort]
diff --git a/tests/functional_tests/python_test_utils/common.py b/tests/functional_tests/python_test_utils/common.py
index ccae60cc3f6..8fb3ebc511a 100644
--- a/tests/functional_tests/python_test_utils/common.py
+++ b/tests/functional_tests/python_test_utils/common.py
@@ -19,14 +19,6 @@
 logger = logging.getLogger(__name__)
 
 
-def approximate_threshold(rtol: float) -> Callable:
-    def _func(y_pred: List[Union[float, int]], y_true: List[Union[float, int]]):
-
-        return np.mean([np.mean(y_pred), np.mean(y_true)]) * rtol
-
-    return _func
-
-
 class TypeOfTestResult(enum.Enum):
     APPROXIMATE = 1
     DETERMINISTIC = 2
@@ -46,7 +38,6 @@ class NotDeterminsticError(Exception):
 
 class ApproximateTest(Test):
     atol: Union[int, float] = 0
-    atol_func: Optional[Callable] = None
     rtol: float = 1e-5
 
     @property
@@ -58,16 +49,14 @@ def error_message(self, metric_name: str) -> NotApproximateError:
 
 
 class DeterministicTest(Test):
-    @property
-    def atol(self) -> Union[int, float]:
-        return 0
-
-    atol_func: Optional[Callable] = None
-
     @property
     def rtol(self) -> float:
         return 0.0
 
+    @property
+    def atol(self) -> Union[int, float]:
+        return 0
+
     @property
     def type_of_test_result(self) -> TypeOfTestResult:
         return TypeOfTestResult.DETERMINISTIC
@@ -235,8 +224,7 @@ def pipeline(
                 golden = np.array(golden_value_list)
 
                 # Tolerance check
-                rtol = 0 if test.type_of_test_result == TypeOfTestResult.DETERMINISTIC else 0.10
-                is_close = np.isclose(actual, golden, rtol=rtol, atol=0)
+                is_close = np.isclose(actual, golden, rtol=test.rtol, atol=test.atol)
 
                 num_failing_steps_allowed = min(max(total_steps_evaluated // 100, 1), 50)
                 passing = np.mean(is_close) >= (num_failing_steps_allowed / total_steps_evaluated)
diff --git a/tests/functional_tests/python_test_utils/test_pretraining_regular_pipeline.py b/tests/functional_tests/python_test_utils/test_pretraining_regular_pipeline.py
index a38016d1e50..db03d30f65a 100644
--- a/tests/functional_tests/python_test_utils/test_pretraining_regular_pipeline.py
+++ b/tests/functional_tests/python_test_utils/test_pretraining_regular_pipeline.py
@@ -9,35 +9,14 @@
 logger = logging.getLogger(__name__)
 
 CHECK_THRESHOLDS = {
-    "iteration-time": [
-        common.ApproximateTest(atol_func=common.approximate_threshold(rtol=0.05), rtol=0)
-    ],
-    "mem-allocated-bytes": [
-        common.ApproximateTest(atol_func=common.approximate_threshold(rtol=0.05), rtol=0)
-    ],
-    "mem-max-allocated-bytes": [
-        common.ApproximateTest(atol_func=common.approximate_threshold(rtol=0.05), rtol=0)
-    ],
-    "lm loss": [
-        common.DeterministicTest(),
-        common.ApproximateTest(atol_func=common.approximate_threshold(rtol=0.05), rtol=0),
-    ],
-    "mtp_1 loss": [
-        common.DeterministicTest(),
-        common.ApproximateTest(atol_func=common.approximate_threshold(rtol=0.05), rtol=0),
-    ],
-    "num-zeros": [
-        common.DeterministicTest(),
-        common.ApproximateTest(atol_func=common.approximate_threshold(rtol=0.20), rtol=0),
-    ],
-    "generated_tokens": [
-        common.DeterministicTest(),
-        common.ApproximateTest(atol_func=common.approximate_threshold(rtol=0.05), rtol=0),
-    ],
-    "logprobs": [
-        common.DeterministicTest(),
-        common.ApproximateTest(atol_func=common.approximate_threshold(rtol=0.05), rtol=0),
-    ],
+    "iteration-time": [common.ApproximateTest(atol=0, rtol=0.25)],
+    "mem-allocated-bytes": [common.ApproximateTest(atol=0, rtol=0.05)],
+    "mem-max-allocated-bytes": [common.ApproximateTest(atol=0, rtol=0.05)],
+    "lm loss": [common.DeterministicTest(), common.ApproximateTest(atol=0, rtol=0.05)],
+    "mtp_1 loss": [common.DeterministicTest(), common.ApproximateTest(atol=0, rtol=0.05)],
+    "num-zeros": [common.DeterministicTest(), common.ApproximateTest(atol=0, rtol=0.05)],
+    "generated_tokens": [common.DeterministicTest(), common.ApproximateTest(atol=0, rtol=0.05)],
+    "logprobs": [common.DeterministicTest(), common.ApproximateTest(atol=0, rtol=0.05)],
 }
 
 
diff --git a/tests/functional_tests/shell_test_utils/_run_training.sh b/tests/functional_tests/shell_test_utils/_run_training.sh
index 5179c02c3b5..1d0e77a3477 100644
--- a/tests/functional_tests/shell_test_utils/_run_training.sh
+++ b/tests/functional_tests/shell_test_utils/_run_training.sh
@@ -8,7 +8,7 @@
 
 set -euxo pipefail
 
-echo "------ARGUMENTS LIST --------"
+set +x
 for ARGUMENT in "$@"; do
     KEY=$(echo $ARGUMENT | cut -f1 -d=)
 
@@ -18,7 +18,7 @@ for ARGUMENT in "$@"; do
     export "$KEY"="$VALUE"
     echo "$KEY=$VALUE"
 done
-echo "---------------------------------"
+set -x
 
 # Check that mandatory vars are set
 MANDATORY_VARS=(
@@ -39,9 +39,11 @@ for mandatory_var in "${MANDATORY_VARS[@]}"; do
     fi
 done
 
+set +x
 # Envsubst model_params
 cat $TRAINING_PARAMS_PATH | envsubst "$(env | cut -d= -f1 | sed -e 's/^/$/')" >$TRAINING_PARAMS_PATH.tmp
 TRAINING_PARAMS_PATH="$TRAINING_PARAMS_PATH.tmp"
+set -x
 
 # Pull env vars to export
 ENV_VARS=$(/usr/local/bin/yq '... comments="" | .ENV_VARS | to_entries | .[] | [.key + "=" + .value] | join(" ")' "$TRAINING_PARAMS_PATH")
diff --git a/tests/functional_tests/shell_test_utils/run_ci_test.sh b/tests/functional_tests/shell_test_utils/run_ci_test.sh
index 75cb4e619e7..5a6ea64f42d 100644
--- a/tests/functional_tests/shell_test_utils/run_ci_test.sh
+++ b/tests/functional_tests/shell_test_utils/run_ci_test.sh
@@ -8,9 +8,7 @@ ulimit -Sn $(ulimit -Hn)
 # Increase soft limit for number of processes to match hard limit
 ulimit -Su $(ulimit -Hu)
 
-echo "------ARGUMENTS LIST --------"
-# Use eval to properly handle quoted arguments
-eval "set -- $@"
+set +x
 for ARGUMENT in "$@"; do
     # Split on first = only, preserving any subsequent = signs in the value
     KEY="${ARGUMENT%%=*}"
@@ -26,7 +24,7 @@ for ARGUMENT in "$@"; do
     export "$KEY"="$(eval echo $VALUE)"
     echo "$KEY=$VALUE"
 done
-echo "---------------------------------"
+set -x
 
 # Check that mandatory vars are set
 MANDATORY_VARS=(
@@ -306,7 +304,7 @@ for i in $(seq 1 $N_REPEAT); do
         fi
 
         # For inference jobs
-        if [[ "$MODE" == "inference" ]]; then
+        if [[ "$MODE" == "inference" && ("$TRAINING_EXIT_CODE" -eq 0 || "$TEST_TYPE" == "release") ]]; then
             if [[ "$TEST_TYPE" == "frozen-start" ]]; then
                 uv run --no-sync pytest -s -o log_cli=true --log-cli-level=info $ROOT_DIR/tests/functional_tests/python_test_utils/test_inference_regular_pipeline.py \
                     --golden-values-path $GOLDEN_VALUES_PATH \
diff --git a/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp2/model_config.yaml b/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp2/model_config.yaml
index ede505eb2f4..60537ce8776 100644
--- a/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp2/model_config.yaml
+++ b/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp2/model_config.yaml
@@ -18,7 +18,7 @@ MODEL_ARGS:
   --seq-length: 512
   --max-position-embeddings: 512
   --train-iters: 50
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 990000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
diff --git a/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp4_vp2/model_config.yaml b/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp4_vp2/model_config.yaml
index e606d04a88c..0e908381456 100644
--- a/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp4_vp2/model_config.yaml
+++ b/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp4_vp2/model_config.yaml
@@ -18,7 +18,7 @@ MODEL_ARGS:
   --seq-length: 512
   --max-position-embeddings: 512
   --train-iters: 50
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 990000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
diff --git a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2/model_config.yaml b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2/model_config.yaml
index e7bb67a9ed8..f965ee1d9ef 100644
--- a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2/model_config.yaml
+++ b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2/model_config.yaml
@@ -18,7 +18,7 @@ MODEL_ARGS:
   --seq-length: 512
   --max-position-embeddings: 512
   --train-iters: 50
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 990000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
diff --git a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_frozen_resume_torch_dist/model_config.yaml b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_frozen_resume_torch_dist/model_config.yaml
index 6f38457cdd0..fc4c836c98a 100644
--- a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_frozen_resume_torch_dist/model_config.yaml
+++ b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_frozen_resume_torch_dist/model_config.yaml
@@ -18,7 +18,7 @@ MODEL_ARGS:
   --seq-length: 512
   --max-position-embeddings: 512
   --train-iters: 100
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 990000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
diff --git a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_local_spec/model_config.yaml b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_local_spec/model_config.yaml
index def6878c889..8974bc1ea24 100644
--- a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_local_spec/model_config.yaml
+++ b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_local_spec/model_config.yaml
@@ -18,7 +18,7 @@ MODEL_ARGS:
   --seq-length: 512
   --max-position-embeddings: 512
   --train-iters: 50
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 990000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
diff --git a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist/model_config.yaml b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist/model_config.yaml
index 8b993bfaec3..49135684124 100644
--- a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist/model_config.yaml
+++ b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist/model_config.yaml
@@ -18,7 +18,7 @@ MODEL_ARGS:
   --seq-length: 512
   --max-position-embeddings: 512
   --train-iters: 100
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 990000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
diff --git a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist_local_spec/model_config.yaml b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist_local_spec/model_config.yaml
index 05a3d0730c8..6c0dc550515 100644
--- a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist_local_spec/model_config.yaml
+++ b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist_local_spec/model_config.yaml
@@ -18,7 +18,7 @@ MODEL_ARGS:
   --seq-length: 512
   --max-position-embeddings: 512
   --train-iters: 100
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 990000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
diff --git a/tests/functional_tests/test_cases/bert/bert_mcore_tp4_pp1/model_config.yaml b/tests/functional_tests/test_cases/bert/bert_mcore_tp4_pp1/model_config.yaml
index 777be078e4d..e001ea4dc08 100644
--- a/tests/functional_tests/test_cases/bert/bert_mcore_tp4_pp1/model_config.yaml
+++ b/tests/functional_tests/test_cases/bert/bert_mcore_tp4_pp1/model_config.yaml
@@ -18,7 +18,7 @@ MODEL_ARGS:
   --seq-length: 512
   --max-position-embeddings: 512
   --train-iters: 50
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 990000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_7b_mr_dgx_a100_1N8G_tp1_pp4_memory_speed/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_7b_tp1_pp4_memory_speed/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_7b_mr_dgx_a100_1N8G_tp1_pp4_memory_speed/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_7b_tp1_pp4_memory_speed/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_7b_mr_dgx_a100_1N8G_tp1_pp4_memory_speed/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_7b_tp1_pp4_memory_speed/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_7b_mr_dgx_a100_1N8G_tp1_pp4_memory_speed/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_7b_tp1_pp4_memory_speed/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_7b_mr_dgx_a100_1N8G_tp1_pp4_memory_speed/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_7b_tp1_pp4_memory_speed/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_7b_mr_dgx_a100_1N8G_tp1_pp4_memory_speed/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/gpt/gpt3_7b_tp1_pp4_memory_speed/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_7b_mr_dgx_a100_1N8G_tp1_pp4_memory_speed/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_7b_tp1_pp4_memory_speed/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_7b_mr_dgx_a100_1N8G_tp1_pp4_memory_speed/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/gpt/gpt3_7b_tp1_pp4_memory_speed/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_7b_mr_dgx_a100_1N8G_tp1_pp4_memory_speed/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_7b_tp1_pp4_memory_speed/model_config.yaml
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_7b_mr_dgx_a100_1N8G_tp1_pp4_memory_speed/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_7b_tp1_pp4_memory_speed/model_config.yaml
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_7b_mr_dgx_a100_1N8G_tp4_pp1_memory_speed/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_7b_tp4_pp1_memory_speed/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_7b_mr_dgx_a100_1N8G_tp4_pp1_memory_speed/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_7b_tp4_pp1_memory_speed/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_7b_mr_dgx_a100_1N8G_tp4_pp1_memory_speed/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_7b_tp4_pp1_memory_speed/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_7b_mr_dgx_a100_1N8G_tp4_pp1_memory_speed/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_7b_tp4_pp1_memory_speed/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_7b_mr_dgx_a100_1N8G_tp4_pp1_memory_speed/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_7b_tp4_pp1_memory_speed/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_7b_mr_dgx_a100_1N8G_tp4_pp1_memory_speed/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/gpt/gpt3_7b_tp4_pp1_memory_speed/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_7b_mr_dgx_a100_1N8G_tp4_pp1_memory_speed/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_7b_tp4_pp1_memory_speed/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_7b_mr_dgx_a100_1N8G_tp4_pp1_memory_speed/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/gpt/gpt3_7b_tp4_pp1_memory_speed/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_7b_mr_dgx_a100_1N8G_tp4_pp1_memory_speed/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_7b_tp4_pp1_memory_speed/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_7b_mr_dgx_a100_1N8G_tp4_pp1_memory_speed/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_7b_tp4_pp1_memory_speed/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_7b_mr_dgx_a100_1N8G_tp4_pp1_memory_speed/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_7b_tp4_pp1_memory_speed/model_config.yaml
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_7b_mr_dgx_a100_1N8G_tp4_pp1_memory_speed/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_7b_tp4_pp1_memory_speed/model_config.yaml
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_disable/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_disable/model_config.yaml
similarity index 94%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_disable/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_disable/model_config.yaml
index 14d585d84a7..2026f11ade2 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_disable/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_disable/model_config.yaml
@@ -64,11 +64,11 @@ MODEL_ARGS:
   # data settings
   --data-cache-path: ${DATA_CACHE_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   # logging settings
   --tensorboard-dir: ${TENSORBOARD_PATH}
-  --timing-log-level: 2
+  --timing-log-level: 0
   --log-interval: 1
   --log-params-norm: true
   --log-num-zeros-in-grad: true
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_enable/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_enable/model_config.yaml
similarity index 93%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_enable/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_enable/model_config.yaml
index df91f9a95eb..41cb6561429 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_enable/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_enable/model_config.yaml
@@ -63,11 +63,11 @@ MODEL_ARGS:
   # data settings
   --data-cache-path: ${DATA_CACHE_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   # logging settings
   --tensorboard-dir: ${TENSORBOARD_PATH}
-  --timing-log-level: 2
+  --timing-log-level: 0
   --log-interval: 1
   --log-params-norm: true
   --log-num-zeros-in-grad: true
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_persistent_1/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_persistent_1/model_config.yaml
similarity index 94%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_persistent_1/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_persistent_1/model_config.yaml
index 849df09f27f..9cd921e9833 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_persistent_1/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_persistent_1/model_config.yaml
@@ -64,11 +64,11 @@ MODEL_ARGS:
   # data settings
   --data-cache-path: ${DATA_CACHE_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   # logging settings
   --tensorboard-dir: ${TENSORBOARD_PATH}
-  --timing-log-level: 2
+  --timing-log-level: 0
   --log-interval: 1
   --log-params-norm: true
   --log-num-zeros-in-grad: true
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_persistent_2/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_persistent_2/model_config.yaml
similarity index 94%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_persistent_2/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_persistent_2/model_config.yaml
index 3316142031f..f902393d049 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_persistent_2/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_persistent_2/model_config.yaml
@@ -63,11 +63,11 @@ MODEL_ARGS:
   # data settings
   --data-cache-path: ${DATA_CACHE_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   # logging settings
   --tensorboard-dir: ${TENSORBOARD_PATH}
-  --timing-log-level: 2
+  --timing-log-level: 0
   --log-interval: 1
   --log-params-norm: true
   --log-num-zeros-in-grad: true
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_reshard/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_reshard/model_config.yaml
similarity index 94%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_reshard/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_reshard/model_config.yaml
index 4b8d6a47b9c..2e82cad10a8 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_reshard/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_reshard/model_config.yaml
@@ -64,11 +64,11 @@ MODEL_ARGS:
   # data settings
   --data-cache-path: ${DATA_CACHE_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   # logging settings
   --tensorboard-dir: ${TENSORBOARD_PATH}
-  --timing-log-level: 2
+  --timing-log-level: 0
   --log-interval: 1
   --log-params-norm: true
   --log-num-zeros-in-grad: true
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_resume/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_resume/model_config.yaml
similarity index 93%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_resume/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_resume/model_config.yaml
index 43937abe664..0abd4db698e 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_resume/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_resume/model_config.yaml
@@ -63,11 +63,11 @@ MODEL_ARGS:
   # data settings
   --data-cache-path: ${DATA_CACHE_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   # logging settings
   --tensorboard-dir: ${TENSORBOARD_PATH}
-  --timing-log-level: 2
+  --timing-log-level: 0
   --log-interval: 1
   --log-params-norm: true
   --log-num-zeros-in-grad: true
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_resume_check_grads/README.md b/tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_resume_check_grads/README.md
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_resume_check_grads/README.md
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_resume_check_grads/README.md
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_resume_check_grads/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_resume_check_grads/model_config.yaml
similarity index 94%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_resume_check_grads/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_resume_check_grads/model_config.yaml
index e9c35d0e86d..582c9523f73 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_resume_check_grads/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_resume_check_grads/model_config.yaml
@@ -86,15 +86,15 @@ BASE_MODEL_ARGS: &BASE_MODEL_ARGS
   --ckpt-format: torch_dist
   --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --save: ${CHECKPOINT_SAVE_PATH}
-  --load: ${CHECKPOINT_LOAD_PATH}
+  --load: ${CHECKPOINT_LOAD_PATH}/model/mcore_gpt/gpt3_4b_pyt/25.03.05_bf16_rerun-enabled_v2
   # data settings
   --data-cache-path: ${DATA_CACHE_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   # logging settings
   --tensorboard-dir: ${TENSORBOARD_PATH}
-  --timing-log-level: 2
+  --timing-log-level: 0
   --log-interval: 1
   --log-params-norm: true
   --log-num-zeros-in-grad: true
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_transient/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_transient/model_config.yaml
similarity index 94%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_transient/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_transient/model_config.yaml
index 5021a029d3b..59a57e2212b 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_transient/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_transient/model_config.yaml
@@ -65,11 +65,11 @@ MODEL_ARGS:
   # data settings
   --data-cache-path: ${DATA_CACHE_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   # logging settings
   --tensorboard-dir: ${TENSORBOARD_PATH}
-  --timing-log-level: 2
+  --timing-log-level: 0
   --log-interval: 1
   --log-params-norm: true
   --log-num-zeros-in-grad: true
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_lts_dgxa100_dracooci-ord.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_lts_dgxa100_dracooci-ord.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_lts_dgxa100_dracooci.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_lts_dgxa100_dracooci.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/model_config.yaml
similarity index 90%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/model_config.yaml
index 8031bf55d8d..2d5e340fa6d 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 50
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_frozen_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_frozen_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_frozen_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_frozen_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_frozen_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_frozen_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_frozen_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_frozen_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_frozen_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_frozen_resume_torch_dist_dist_optimizer/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_frozen_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_frozen_resume_torch_dist_dist_optimizer/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_frozen_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_frozen_resume_torch_dist_dist_optimizer/model_config.yaml
similarity index 91%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_frozen_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_frozen_resume_torch_dist_dist_optimizer/model_config.yaml
index 5ed4553ad1d..c7b46ff9b8d 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_frozen_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_frozen_resume_torch_dist_dist_optimizer/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 100
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_lts_dgxa100_dracooci-ord.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_lts_dgxa100_dracooci-ord.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_lts_dgxa100_dracooci.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_lts_dgxa100_dracooci.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/model_config.yaml
similarity index 90%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/model_config.yaml
index 6eac7d0da72..82506115963 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 100
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_no_mmap_bin_files/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_no_mmap_bin_files/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_no_mmap_bin_files/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_no_mmap_bin_files/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_no_mmap_bin_files/model_config.yaml
similarity index 90%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_no_mmap_bin_files/model_config.yaml
index 750986482c7..4a5bf3d8fc7 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_no_mmap_bin_files/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 100
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_lts_dgxa100_dracooci-ord.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_lts_dgxa100_dracooci-ord.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_lts_dgxa100_dracooci.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_lts_dgxa100_dracooci.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/model_config.yaml
similarity index 91%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/model_config.yaml
index f34c980d821..bb0708b11ef 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 100
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_uniform_full_recompute_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_uniform_full_recompute/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_uniform_full_recompute_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_uniform_full_recompute/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_uniform_full_recompute_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_uniform_full_recompute/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_uniform_full_recompute_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_uniform_full_recompute/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_uniform_full_recompute_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_uniform_full_recompute/model_config.yaml
similarity index 91%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_uniform_full_recompute_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_uniform_full_recompute/model_config.yaml
index 7c880daf577..a5dbe2157e5 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_uniform_full_recompute_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_uniform_full_recompute/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 50
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_cp4_a2a_p2p_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_cp4_a2a_p2p_nondeterministic/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_cp4_a2a_p2p_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_cp4_a2a_p2p_nondeterministic/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_cp4_a2a_p2p_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_cp4_a2a_p2p_nondeterministic/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_cp4_a2a_p2p_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_cp4_a2a_p2p_nondeterministic/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_cp4_a2a_p2p_nondeterministic_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_cp4_a2a_p2p_nondeterministic/model_config.yaml
similarity index 91%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_cp4_a2a_p2p_nondeterministic_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_cp4_a2a_p2p_nondeterministic/model_config.yaml
index 7f0958f94f2..4aeea406fb9 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_cp4_a2a_p2p_nondeterministic_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_cp4_a2a_p2p_nondeterministic/model_config.yaml
@@ -15,13 +15,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 50
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_cp4_a2a_p2p_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_cp4_a2a_p2p_nondeterministic/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_cp4_a2a_p2p_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_cp4_a2a_p2p_nondeterministic/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_cp4_a2a_p2p_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_cp4_a2a_p2p_nondeterministic/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_cp4_a2a_p2p_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_cp4_a2a_p2p_nondeterministic/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_cp4_a2a_p2p_nondeterministic_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_cp4_a2a_p2p_nondeterministic/model_config.yaml
similarity index 91%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_cp4_a2a_p2p_nondeterministic_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_cp4_a2a_p2p_nondeterministic/model_config.yaml
index 7271fe996d6..6d6bf2b5b94 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_cp4_a2a_p2p_nondeterministic_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_cp4_a2a_p2p_nondeterministic/model_config.yaml
@@ -15,13 +15,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 100
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings/model_config.yaml
similarity index 90%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings/model_config.yaml
index 7c5a764ccb9..5e4131a43ca 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 100
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_lts_dgxa100_dracooci-ord.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_lts_dgxa100_dracooci-ord.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_lts_dgxa100_dracooci.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_lts_dgxa100_dracooci.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/model_config.yaml
similarity index 91%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/model_config.yaml
index 2491fd02e96..c75d099790f 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 100
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_rope_embeddings_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_rope_embeddings/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_rope_embeddings_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_rope_embeddings/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_rope_embeddings_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_rope_embeddings/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_rope_embeddings_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_rope_embeddings/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_rope_embeddings_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_rope_embeddings/model_config.yaml
similarity index 90%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_rope_embeddings_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_rope_embeddings/model_config.yaml
index 58d4628f72d..ffabf9583f6 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_rope_embeddings_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_rope_embeddings/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 50
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_rope_embeddings_interleaved_no_fusion/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_rope_embeddings_interleaved_no_fusion/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_rope_embeddings_interleaved_no_fusion/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_rope_embeddings_interleaved_no_fusion/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_rope_embeddings_interleaved_no_fusion/model_config.yaml
similarity index 90%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_rope_embeddings_interleaved_no_fusion/model_config.yaml
index 5fcf15a2c3e..b391387f9ff 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_rope_embeddings_interleaved_no_fusion/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 50
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_disable_bias_linear_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_disable_bias_linear/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_disable_bias_linear_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_disable_bias_linear/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_disable_bias_linear_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_disable_bias_linear/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_disable_bias_linear_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_disable_bias_linear/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_disable_bias_linear_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_disable_bias_linear/model_config.yaml
similarity index 90%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_disable_bias_linear_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_disable_bias_linear/model_config.yaml
index 6b66183c1dc..5415e3de96d 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_disable_bias_linear_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_disable_bias_linear/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 50
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_frozen_resume_torch_dist_swiglu_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_frozen_resume_torch_dist_swiglu/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_frozen_resume_torch_dist_swiglu_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_frozen_resume_torch_dist_swiglu/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_frozen_resume_torch_dist_swiglu_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_frozen_resume_torch_dist_swiglu/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_frozen_resume_torch_dist_swiglu_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_frozen_resume_torch_dist_swiglu/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_frozen_resume_torch_dist_swiglu_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_frozen_resume_torch_dist_swiglu/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_frozen_resume_torch_dist_swiglu_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_frozen_resume_torch_dist_swiglu/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_frozen_resume_torch_dist_swiglu_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_frozen_resume_torch_dist_swiglu/model_config.yaml
similarity index 91%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_frozen_resume_torch_dist_swiglu_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_frozen_resume_torch_dist_swiglu/model_config.yaml
index 089fd7808ff..8d372f5539d 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_frozen_resume_torch_dist_swiglu_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_frozen_resume_torch_dist_swiglu/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 100
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_persistent_ckpt_disable_bias_linear_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_persistent_ckpt_disable_bias_linear/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_persistent_ckpt_disable_bias_linear_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_persistent_ckpt_disable_bias_linear/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_persistent_ckpt_disable_bias_linear_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_persistent_ckpt_disable_bias_linear/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_persistent_ckpt_disable_bias_linear_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_persistent_ckpt_disable_bias_linear/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_persistent_ckpt_disable_bias_linear_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_persistent_ckpt_disable_bias_linear/model_config.yaml
similarity index 90%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_persistent_ckpt_disable_bias_linear_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_persistent_ckpt_disable_bias_linear/model_config.yaml
index 3d8843214a3..d91e9be4f54 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_persistent_ckpt_disable_bias_linear_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_persistent_ckpt_disable_bias_linear/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 50
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear/model_config.yaml
similarity index 90%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear/model_config.yaml
index 4dc43353c9f..7d069ce9ec6 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 100
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_lts_dgxa100_dracooci-ord.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_lts_dgxa100_dracooci-ord.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_lts_dgxa100_dracooci.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_lts_dgxa100_dracooci.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/model_config.yaml
similarity index 90%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/model_config.yaml
index 7133af75b8f..ea882318c7e 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 100
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_sequence_parallel_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_sequence_parallel/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_sequence_parallel_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_sequence_parallel/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_sequence_parallel_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_sequence_parallel/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_sequence_parallel_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_sequence_parallel/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_sequence_parallel_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_sequence_parallel/model_config.yaml
similarity index 90%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_sequence_parallel_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_sequence_parallel/model_config.yaml
index 1e29b79848b..d67dd6af765 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_sequence_parallel_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_sequence_parallel/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 100
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_swiglu_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_swiglu_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_swiglu_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_swiglu_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_swiglu_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_swiglu_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_swiglu_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_swiglu_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_swiglu_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_swiglu_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_swiglu_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu/model_config.yaml
similarity index 90%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_swiglu_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu/model_config.yaml
index 27d8203d307..1e25f4bd4e1 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_swiglu_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 100
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_lts_dgxa100_dracooci-ord.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_lts_dgxa100_dracooci-ord.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_lts_dgxa100_dracooci.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_lts_dgxa100_dracooci.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/model_config.yaml
similarity index 90%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/model_config.yaml
index bc0da950ac8..2d734908089 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 100
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_sequence_parallel_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_sequence_parallel/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_sequence_parallel_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_sequence_parallel/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_sequence_parallel_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_sequence_parallel/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_sequence_parallel_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_sequence_parallel/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_sequence_parallel_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_sequence_parallel/model_config.yaml
similarity index 90%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_sequence_parallel_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_sequence_parallel/model_config.yaml
index 962e08d5e73..319164782fc 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_sequence_parallel_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_sequence_parallel/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 50
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_swiglu_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_swiglu/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_swiglu_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_swiglu/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_swiglu_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_swiglu/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_swiglu_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_swiglu/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_swiglu_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_swiglu/model_config.yaml
similarity index 90%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_swiglu_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_swiglu/model_config.yaml
index 8942fa94b55..a3a1a458739 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_swiglu_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_swiglu/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 50
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_untie_embeddings_and_outputs_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_untie_embeddings_and_outputs/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_untie_embeddings_and_outputs_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_untie_embeddings_and_outputs/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_untie_embeddings_and_outputs_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_untie_embeddings_and_outputs/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_untie_embeddings_and_outputs_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_untie_embeddings_and_outputs/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_untie_embeddings_and_outputs_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_untie_embeddings_and_outputs/model_config.yaml
similarity index 90%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_untie_embeddings_and_outputs_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_untie_embeddings_and_outputs/model_config.yaml
index 7f6ae92394d..ea8f4bb71d0 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_untie_embeddings_and_outputs_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_untie_embeddings_and_outputs/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 50
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_lts_dgxa100_dracooci-ord.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_lts_dgxa100_dracooci-ord.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_lts_dgxa100_dracooci.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_lts_dgxa100_dracooci.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/model_config.yaml
similarity index 91%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/model_config.yaml
index aa041fec6de..ea869246a7c 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/model_config.yaml
@@ -18,13 +18,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 50
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_calculate_per_token_loss_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_calculate_per_token_loss/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_calculate_per_token_loss_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_calculate_per_token_loss/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_calculate_per_token_loss_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_calculate_per_token_loss/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_calculate_per_token_loss_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_calculate_per_token_loss/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_calculate_per_token_loss_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_calculate_per_token_loss/model_config.yaml
similarity index 90%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_calculate_per_token_loss_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_calculate_per_token_loss/model_config.yaml
index 65ea19f9bd8..767283cf2a1 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_calculate_per_token_loss_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_calculate_per_token_loss/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 50
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_decoupled_lr_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_decoupled_lr/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_decoupled_lr_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_decoupled_lr/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_decoupled_lr_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_decoupled_lr/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_decoupled_lr_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_decoupled_lr/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_decoupled_lr_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_decoupled_lr/model_config.yaml
similarity index 90%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_decoupled_lr_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_decoupled_lr/model_config.yaml
index 99a04b44fe3..46ff13cb9a8 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_decoupled_lr_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_decoupled_lr/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 50
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce/model_config.yaml
similarity index 90%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce/model_config.yaml
index a1150d0db09..5a1b1ce289d 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 50
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml
similarity index 91%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml
index 907c86da3b1..31ffc9c8111 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 50
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G/golden_values_dev_dgxh100_dgxc.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_dev_dgxh100_dgxc.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G/golden_values_dev_dgxh100_dgxc.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_dev_dgxh100_dgxc.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_lts_dgxa100_dracooci-ord.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_lts_dgxa100_dracooci-ord.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_lts_dgxa100_dracooci.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_lts_dgxa100_dracooci.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/model_config.yaml
similarity index 91%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/model_config.yaml
index 503e702c4f5..0bd25e79735 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 50
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_untied/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_untied/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_untied/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_untied/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_untied/model_config.yaml
similarity index 91%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_untied/model_config.yaml
index c8d15bbf005..778e7d361b3 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_untied/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 50
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_lts_dgxa100_dracooci-ord.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_lts_dgxa100_dracooci-ord.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_lts_dgxa100_dracooci.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_lts_dgxa100_dracooci.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/model_config.yaml
similarity index 91%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/model_config.yaml
index 8db3c6529df..d502c3e1fef 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/model_config.yaml
@@ -18,13 +18,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 100
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist/model_config.yaml
similarity index 91%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist/model_config.yaml
index 699ca43cc7b..edc9bc1ff2a 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist/model_config.yaml
@@ -18,13 +18,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 100
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_lts_dgxa100_dracooci-ord.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_lts_dgxa100_dracooci-ord.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_lts_dgxa100_dracooci.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_lts_dgxa100_dracooci.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/model_config.yaml
similarity index 91%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/model_config.yaml
index 243a52e84bd..1b9c96b3f7d 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 100
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgxa100_dracooci-ord.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgxa100_dracooci-ord.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgxa100_dracooci.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgxa100_dracooci.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/model_config.yaml
similarity index 91%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/model_config.yaml
index b3a950dcb5e..fed75814df5 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 100
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgxa100_dracooci-ord.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgxa100_dracooci-ord.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgxa100_dracooci.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgxa100_dracooci.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml
similarity index 92%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml
index 0e71ea6c268..af06fe06903 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml
@@ -18,13 +18,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 100
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied/golden_values_lts_dgxa100_dracooci-ord.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied/golden_values_lts_dgxa100_dracooci-ord.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied/golden_values_lts_dgxa100_dracooci.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied/golden_values_lts_dgxa100_dracooci.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied/model_config.yaml
similarity index 91%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied/model_config.yaml
index 6aa5a991e90..035549f8fb6 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied/model_config.yaml
@@ -18,13 +18,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 100
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap/golden_values_lts_dgxa100_dracooci-ord.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap/golden_values_lts_dgxa100_dracooci-ord.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap/golden_values_lts_dgxa100_dracooci.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap/golden_values_lts_dgxa100_dracooci.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap/model_config.yaml
similarity index 91%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap/model_config.yaml
index 4907dfb7f4c..ef758e5639f 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap/model_config.yaml
@@ -18,13 +18,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 100
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..0c1ce6fced4
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.83936,
+            "2": 10.8442,
+            "3": 10.86813,
+            "4": 10.86022,
+            "5": 10.87939,
+            "6": 10.85969,
+            "7": 10.86386,
+            "8": 10.8444,
+            "9": 10.88995,
+            "10": 10.8926,
+            "11": 10.89136,
+            "12": 10.85312,
+            "13": 10.87319,
+            "14": 10.83805,
+            "15": 10.83088,
+            "16": 10.82011,
+            "17": 10.79138,
+            "18": 10.81055,
+            "19": 10.77977,
+            "20": 10.6635,
+            "21": 10.69765,
+            "22": 10.67421,
+            "23": 10.77344,
+            "24": 10.63919,
+            "25": 10.50497,
+            "26": 10.61911,
+            "27": 10.56921,
+            "28": 10.46859,
+            "29": 10.41119,
+            "30": 10.42916,
+            "31": 10.52553,
+            "32": 10.34942,
+            "33": 10.2967,
+            "34": 10.46909,
+            "35": 9.99632,
+            "36": 10.13945,
+            "37": 10.0434,
+            "38": 10.4139,
+            "39": 9.80941,
+            "40": 10.12495,
+            "41": 10.14883,
+            "42": 10.04042,
+            "43": 10.22142,
+            "44": 10.07348,
+            "45": 9.71369,
+            "46": 10.00449,
+            "47": 9.94758,
+            "48": 9.68856,
+            "49": 9.93637,
+            "50": 9.96042
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1026.0,
+            "2": 1184.0,
+            "3": 1226.0,
+            "4": 1248.0,
+            "5": 1259.0,
+            "6": 1421.0,
+            "7": 1182.0,
+            "8": 1036.0,
+            "9": 1293.0,
+            "10": 1319.0,
+            "11": 1212.0,
+            "12": 1373.0,
+            "13": 1327.0,
+            "14": 1121.0,
+            "15": 1217.0,
+            "16": 1163.0,
+            "17": 1246.0,
+            "18": 1280.0,
+            "19": 1128.0,
+            "20": 1019.0,
+            "21": 1147.0,
+            "22": 1156.0,
+            "23": 1341.0,
+            "24": 1312.0,
+            "25": 1066.0,
+            "26": 1138.0,
+            "27": 1270.0,
+            "28": 1260.0,
+            "29": 1292.0,
+            "30": 1532.0,
+            "31": 1477.0,
+            "32": 1460.0,
+            "33": 1537.0,
+            "34": 1513.0,
+            "35": 1235.0,
+            "36": 1316.0,
+            "37": 1466.0,
+            "38": 1564.0,
+            "39": 1380.0,
+            "40": 1513.0,
+            "41": 1633.0,
+            "42": 1509.0,
+            "43": 1731.0,
+            "44": 1636.0,
+            "45": 1501.0,
+            "46": 1884.0,
+            "47": 1567.0,
+            "48": 1631.0,
+            "49": 1825.0,
+            "50": 1639.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 759682560.0,
+            "2": 759682560.0,
+            "3": 759682560.0,
+            "4": 759682560.0,
+            "5": 759682560.0,
+            "6": 759682560.0,
+            "7": 759682560.0,
+            "8": 759682560.0,
+            "9": 759682560.0,
+            "10": 759682560.0,
+            "11": 759682560.0,
+            "12": 759682560.0,
+            "13": 759682560.0,
+            "14": 759682560.0,
+            "15": 759682560.0,
+            "16": 759682560.0,
+            "17": 759682560.0,
+            "18": 759682560.0,
+            "19": 759682560.0,
+            "20": 759682560.0,
+            "21": 759682560.0,
+            "22": 759682560.0,
+            "23": 759682560.0,
+            "24": 759682560.0,
+            "25": 759682560.0,
+            "26": 759682560.0,
+            "27": 759682560.0,
+            "28": 759682560.0,
+            "29": 759682560.0,
+            "30": 759682560.0,
+            "31": 759682560.0,
+            "32": 759682560.0,
+            "33": 759682560.0,
+            "34": 759682560.0,
+            "35": 759682560.0,
+            "36": 759682560.0,
+            "37": 759682560.0,
+            "38": 759682560.0,
+            "39": 759682560.0,
+            "40": 759682560.0,
+            "41": 759682560.0,
+            "42": 759682560.0,
+            "43": 759682560.0,
+            "44": 759682560.0,
+            "45": 759682560.0,
+            "46": 759682560.0,
+            "47": 759682560.0,
+            "48": 759682560.0,
+            "49": 759682560.0,
+            "50": 759682560.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 4340903936.0,
+            "2": 4622615552.0,
+            "3": 4622615552.0,
+            "4": 4622615552.0,
+            "5": 4622615552.0,
+            "6": 4622615552.0,
+            "7": 4622615552.0,
+            "8": 4622615552.0,
+            "9": 4622615552.0,
+            "10": 4622615552.0,
+            "11": 4622615552.0,
+            "12": 4622615552.0,
+            "13": 4622615552.0,
+            "14": 4622615552.0,
+            "15": 4622615552.0,
+            "16": 4622615552.0,
+            "17": 4622615552.0,
+            "18": 4622615552.0,
+            "19": 4622615552.0,
+            "20": 4622615552.0,
+            "21": 4622615552.0,
+            "22": 4622615552.0,
+            "23": 4622615552.0,
+            "24": 4622615552.0,
+            "25": 4622615552.0,
+            "26": 4622615552.0,
+            "27": 4622615552.0,
+            "28": 4622615552.0,
+            "29": 4622615552.0,
+            "30": 4622615552.0,
+            "31": 4622615552.0,
+            "32": 4622615552.0,
+            "33": 4622615552.0,
+            "34": 4622615552.0,
+            "35": 4622615552.0,
+            "36": 4622615552.0,
+            "37": 4622615552.0,
+            "38": 4622615552.0,
+            "39": 4622615552.0,
+            "40": 4622615552.0,
+            "41": 4622615552.0,
+            "42": 4622615552.0,
+            "43": 4622615552.0,
+            "44": 4622615552.0,
+            "45": 4622615552.0,
+            "46": 4622615552.0,
+            "47": 4622615552.0,
+            "48": 4622615552.0,
+            "49": 4622615552.0,
+            "50": 4622615552.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 14.98171,
+            "2": 0.13344,
+            "3": 0.10755,
+            "4": 0.10562,
+            "5": 0.10638,
+            "6": 0.10549,
+            "7": 0.10612,
+            "8": 0.10814,
+            "9": 0.10654,
+            "10": 0.10633,
+            "11": 0.10725,
+            "12": 0.10667,
+            "13": 0.10769,
+            "14": 0.10593,
+            "15": 0.10694,
+            "16": 0.10715,
+            "17": 0.1064,
+            "18": 0.10706,
+            "19": 0.10964,
+            "20": 0.1054,
+            "21": 0.10752,
+            "22": 0.10979,
+            "23": 0.10834,
+            "24": 0.10667,
+            "25": 0.10762,
+            "26": 0.10605,
+            "27": 0.10756,
+            "28": 0.1059,
+            "29": 0.10662,
+            "30": 0.10738,
+            "31": 0.1065,
+            "32": 0.1074,
+            "33": 0.10712,
+            "34": 0.10631,
+            "35": 0.10672,
+            "36": 0.10785,
+            "37": 0.10664,
+            "38": 0.1064,
+            "39": 0.10666,
+            "40": 0.10518,
+            "41": 0.10655,
+            "42": 0.10605,
+            "43": 0.10563,
+            "44": 0.1064,
+            "45": 0.10629,
+            "46": 0.10691,
+            "47": 0.10711,
+            "48": 0.10618,
+            "49": 0.10991,
+            "50": 0.10529
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..2a87d7e4de5
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.83936,
+            "2": 10.8442,
+            "3": 10.86813,
+            "4": 10.86022,
+            "5": 10.87939,
+            "6": 10.85969,
+            "7": 10.86386,
+            "8": 10.8444,
+            "9": 10.88995,
+            "10": 10.8926,
+            "11": 10.89136,
+            "12": 10.85312,
+            "13": 10.87319,
+            "14": 10.83805,
+            "15": 10.83088,
+            "16": 10.82011,
+            "17": 10.79138,
+            "18": 10.81055,
+            "19": 10.77977,
+            "20": 10.6635,
+            "21": 10.69765,
+            "22": 10.67421,
+            "23": 10.77344,
+            "24": 10.63919,
+            "25": 10.50497,
+            "26": 10.61911,
+            "27": 10.56921,
+            "28": 10.46859,
+            "29": 10.41119,
+            "30": 10.42916,
+            "31": 10.52553,
+            "32": 10.34942,
+            "33": 10.2967,
+            "34": 10.46909,
+            "35": 9.99632,
+            "36": 10.13945,
+            "37": 10.0434,
+            "38": 10.4139,
+            "39": 9.80941,
+            "40": 10.12495,
+            "41": 10.14883,
+            "42": 10.04042,
+            "43": 10.22142,
+            "44": 10.07348,
+            "45": 9.71369,
+            "46": 10.00449,
+            "47": 9.94758,
+            "48": 9.68856,
+            "49": 9.93637,
+            "50": 9.96042
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1026.0,
+            "2": 1184.0,
+            "3": 1226.0,
+            "4": 1248.0,
+            "5": 1259.0,
+            "6": 1421.0,
+            "7": 1182.0,
+            "8": 1036.0,
+            "9": 1293.0,
+            "10": 1319.0,
+            "11": 1212.0,
+            "12": 1373.0,
+            "13": 1327.0,
+            "14": 1121.0,
+            "15": 1217.0,
+            "16": 1163.0,
+            "17": 1246.0,
+            "18": 1280.0,
+            "19": 1128.0,
+            "20": 1019.0,
+            "21": 1147.0,
+            "22": 1156.0,
+            "23": 1341.0,
+            "24": 1312.0,
+            "25": 1066.0,
+            "26": 1138.0,
+            "27": 1270.0,
+            "28": 1260.0,
+            "29": 1292.0,
+            "30": 1532.0,
+            "31": 1477.0,
+            "32": 1460.0,
+            "33": 1537.0,
+            "34": 1513.0,
+            "35": 1235.0,
+            "36": 1316.0,
+            "37": 1466.0,
+            "38": 1564.0,
+            "39": 1380.0,
+            "40": 1513.0,
+            "41": 1633.0,
+            "42": 1509.0,
+            "43": 1731.0,
+            "44": 1636.0,
+            "45": 1501.0,
+            "46": 1884.0,
+            "47": 1567.0,
+            "48": 1631.0,
+            "49": 1825.0,
+            "50": 1639.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 759682560.0,
+            "2": 759682560.0,
+            "3": 759682560.0,
+            "4": 759682560.0,
+            "5": 759682560.0,
+            "6": 759682560.0,
+            "7": 759682560.0,
+            "8": 759682560.0,
+            "9": 759682560.0,
+            "10": 759682560.0,
+            "11": 759682560.0,
+            "12": 759682560.0,
+            "13": 759682560.0,
+            "14": 759682560.0,
+            "15": 759682560.0,
+            "16": 759682560.0,
+            "17": 759682560.0,
+            "18": 759682560.0,
+            "19": 759682560.0,
+            "20": 759682560.0,
+            "21": 759682560.0,
+            "22": 759682560.0,
+            "23": 759682560.0,
+            "24": 759682560.0,
+            "25": 759682560.0,
+            "26": 759682560.0,
+            "27": 759682560.0,
+            "28": 759682560.0,
+            "29": 759682560.0,
+            "30": 759682560.0,
+            "31": 759682560.0,
+            "32": 759682560.0,
+            "33": 759682560.0,
+            "34": 759682560.0,
+            "35": 759682560.0,
+            "36": 759682560.0,
+            "37": 759682560.0,
+            "38": 759682560.0,
+            "39": 759682560.0,
+            "40": 759682560.0,
+            "41": 759682560.0,
+            "42": 759682560.0,
+            "43": 759682560.0,
+            "44": 759682560.0,
+            "45": 759682560.0,
+            "46": 759682560.0,
+            "47": 759682560.0,
+            "48": 759682560.0,
+            "49": 759682560.0,
+            "50": 759682560.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 4340903936.0,
+            "2": 4622615552.0,
+            "3": 4622615552.0,
+            "4": 4622615552.0,
+            "5": 4622615552.0,
+            "6": 4622615552.0,
+            "7": 4622615552.0,
+            "8": 4622615552.0,
+            "9": 4622615552.0,
+            "10": 4622615552.0,
+            "11": 4622615552.0,
+            "12": 4622615552.0,
+            "13": 4622615552.0,
+            "14": 4622615552.0,
+            "15": 4622615552.0,
+            "16": 4622615552.0,
+            "17": 4622615552.0,
+            "18": 4622615552.0,
+            "19": 4622615552.0,
+            "20": 4622615552.0,
+            "21": 4622615552.0,
+            "22": 4622615552.0,
+            "23": 4622615552.0,
+            "24": 4622615552.0,
+            "25": 4622615552.0,
+            "26": 4622615552.0,
+            "27": 4622615552.0,
+            "28": 4622615552.0,
+            "29": 4622615552.0,
+            "30": 4622615552.0,
+            "31": 4622615552.0,
+            "32": 4622615552.0,
+            "33": 4622615552.0,
+            "34": 4622615552.0,
+            "35": 4622615552.0,
+            "36": 4622615552.0,
+            "37": 4622615552.0,
+            "38": 4622615552.0,
+            "39": 4622615552.0,
+            "40": 4622615552.0,
+            "41": 4622615552.0,
+            "42": 4622615552.0,
+            "43": 4622615552.0,
+            "44": 4622615552.0,
+            "45": 4622615552.0,
+            "46": 4622615552.0,
+            "47": 4622615552.0,
+            "48": 4622615552.0,
+            "49": 4622615552.0,
+            "50": 4622615552.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 13.61511,
+            "2": 0.1778,
+            "3": 0.1277,
+            "4": 0.12936,
+            "5": 0.13227,
+            "6": 0.12879,
+            "7": 0.12864,
+            "8": 0.12608,
+            "9": 0.12256,
+            "10": 0.12099,
+            "11": 0.12182,
+            "12": 0.12459,
+            "13": 0.12256,
+            "14": 0.12133,
+            "15": 0.12193,
+            "16": 0.12162,
+            "17": 0.12333,
+            "18": 0.12123,
+            "19": 0.1213,
+            "20": 0.12425,
+            "21": 0.12132,
+            "22": 0.12275,
+            "23": 0.12087,
+            "24": 0.12024,
+            "25": 0.12097,
+            "26": 0.12149,
+            "27": 0.1222,
+            "28": 0.1211,
+            "29": 0.12079,
+            "30": 0.12068,
+            "31": 0.12272,
+            "32": 0.12225,
+            "33": 0.12154,
+            "34": 0.11969,
+            "35": 0.12134,
+            "36": 0.12208,
+            "37": 0.12324,
+            "38": 0.13559,
+            "39": 0.13696,
+            "40": 0.12255,
+            "41": 0.12095,
+            "42": 0.12133,
+            "43": 0.12263,
+            "44": 0.1226,
+            "45": 0.12131,
+            "46": 0.12049,
+            "47": 0.12042,
+            "48": 0.12231,
+            "49": 0.12137,
+            "50": 0.12131
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_lts_dgxa100_dracooci-ord.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_lts_dgxa100_dracooci-ord.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_lts_dgxa100_dracooci.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_lts_dgxa100_dracooci.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/model_config.yaml
similarity index 91%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/model_config.yaml
index b894bf3bd20..06545179645 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/model_config.yaml
@@ -18,13 +18,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 50
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_lts_dgxa100_dracooci-ord.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_lts_dgxa100_dracooci-ord.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_lts_dgxa100_dracooci.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_lts_dgxa100_dracooci.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/model_config.yaml
similarity index 90%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/model_config.yaml
index cfdbe747764..8710e92a138 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 50
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_lts_dgxa100_dracooci-ord.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_lts_dgxa100_dracooci-ord.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_lts_dgxa100_dracooci.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_lts_dgxa100_dracooci.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/model_config.yaml
similarity index 91%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/model_config.yaml
index f9f58db94f9..dea9b4aad98 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 50
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_cp2_nondeterministic/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_cp2_nondeterministic/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_cp2_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_cp2_nondeterministic/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_cp2_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_cp2_nondeterministic/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_cp2_nondeterministic/model_config.yaml
similarity index 90%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_cp2_nondeterministic/model_config.yaml
index db560c8aac5..5394f9d0070 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_cp2_nondeterministic/model_config.yaml
@@ -15,13 +15,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 50
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_cp2_nondeterministic/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_cp2_nondeterministic/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_cp2_nondeterministic/model_config.yaml
similarity index 90%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_cp2_nondeterministic/model_config.yaml
index c6a2379b571..4bd321b43da 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_cp2_nondeterministic/model_config.yaml
@@ -15,13 +15,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 100
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_fsdp2_resume_torch_dist_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_fsdp2_resume_torch_dist/model_config.yaml
similarity index 91%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_fsdp2_resume_torch_dist_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_fsdp2_resume_torch_dist/model_config.yaml
index 1ad10c02caa..1229288b9be 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_fsdp2_resume_torch_dist_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_fsdp2_resume_torch_dist/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 100
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_modelopt_distill_resume/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_modelopt_distill_resume/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_modelopt_distill_resume/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_modelopt_distill_resume/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_modelopt_distill_resume/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_modelopt_distill_resume/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_modelopt_distill_resume/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_modelopt_distill_resume/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_modelopt_distill_resume/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_modelopt_distill_resume/model_config.yaml
similarity index 90%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_modelopt_distill_resume/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_modelopt_distill_resume/model_config.yaml
index 364a41d2fe1..556fcfbcf11 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_modelopt_distill_resume/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_modelopt_distill_resume/model_config.yaml
@@ -10,7 +10,7 @@ BEFORE_SCRIPT: |
   mkdir -p ${DATA_CACHE_PATH}/distill && echo $DISTILL_CONFIG | yq -P > ${DATA_CACHE_PATH}/distill/distill_config.yaml
 MODEL_ARGS:
   --export-te-mcore-model: true
-  --export-kd-teacher-load: ${ARTIFACTS_ROOT}/gpt_teacher
+  --export-kd-teacher-load: ${DATA_PATH}/model/gpt_dummy_pyt/ckpt/24.10.0_bf16_teacher
   --export-kd-cfg: ${DATA_CACHE_PATH}/distill/distill_config.yaml
   --auto-detect-ckpt-format: true
   --num-layers: 12
@@ -33,13 +33,13 @@ MODEL_ARGS:
   --untie-embeddings-and-output-weights: true
   --disable-bias-linear: true
   --train-iters: 100
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_multi_dist_optimizer_instances/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_multi_dist_optimizer_instances/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_multi_dist_optimizer_instances/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_multi_dist_optimizer_instances/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_multi_dist_optimizer_instances_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_multi_dist_optimizer_instances/model_config.yaml
similarity index 91%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_multi_dist_optimizer_instances_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_multi_dist_optimizer_instances/model_config.yaml
index ac70eb6bd1e..3175a07cc88 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_multi_dist_optimizer_instances_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_multi_dist_optimizer_instances/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 50
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/golden_values_lts_dgxa100_dracooci-ord.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/golden_values_lts_dgxa100_dracooci-ord.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/golden_values_lts_dgxa100_dracooci.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/golden_values_lts_dgxa100_dracooci.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/model_config.yaml
similarity index 90%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/model_config.yaml
index 585aea5c26e..3f427a04f9d 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/model_config.yaml
@@ -15,13 +15,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 100
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_lts_dgxa100_dracooci-ord.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_lts_dgxa100_dracooci-ord.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_lts_dgxa100_dracooci.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_lts_dgxa100_dracooci.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/model_config.yaml
similarity index 91%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/model_config.yaml
index f8f7bded190..d3446e92c2e 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 100
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2/model_config.yaml
similarity index 90%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2/model_config.yaml
index 3a9b912ed0c..05b166f0a7b 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 50
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2/model_config.yaml
similarity index 90%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2/model_config.yaml
index ccc411e5879..70155c2ff81 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2/model_config.yaml
@@ -15,13 +15,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 50
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss/model_config.yaml
similarity index 90%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss/model_config.yaml
index 6234292f5ff..92f4bfb1cdf 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss/model_config.yaml
@@ -15,13 +15,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 50
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic/golden_values_lts_dgxa100_dracooci-ord.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic/golden_values_lts_dgxa100_dracooci-ord.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic/golden_values_lts_dgxa100_dracooci.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic/golden_values_lts_dgxa100_dracooci.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic/model_config.yaml
similarity index 90%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic/model_config.yaml
index d510bd15c0f..b4d63762604 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic/model_config.yaml
@@ -15,13 +15,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 50
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last/model_config.yaml
similarity index 90%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last/model_config.yaml
index 5a9f0ea8a89..880d7fc7ce0 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last/model_config.yaml
@@ -15,13 +15,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 50
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last/golden_values_lts_dgxa100_dracooci-ord.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last/golden_values_lts_dgxa100_dracooci-ord.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last/golden_values_lts_dgxa100_dracooci.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last/golden_values_lts_dgxa100_dracooci.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last/model_config.yaml
similarity index 90%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last/model_config.yaml
index 920ad6832d8..013569c5882 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last/model_config.yaml
@@ -15,13 +15,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 50
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_dp_last_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_dp_last/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_dp_last_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_dp_last/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_dp_last_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_dp_last/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_dp_last_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_dp_last/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_dp_last_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_dp_last/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_dp_last_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_dp_last/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_dp_last_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_dp_last/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_dp_last_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_dp_last/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_dp_last_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_dp_last/model_config.yaml
similarity index 90%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_dp_last_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_dp_last/model_config.yaml
index 78e7e3a45ca..6aad7304c19 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_dp_last_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_dp_last/model_config.yaml
@@ -15,13 +15,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 50
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last/golden_values_lts_dgxa100_dracooci-ord.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last/golden_values_lts_dgxa100_dracooci-ord.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last/golden_values_lts_dgxa100_dracooci.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last/golden_values_lts_dgxa100_dracooci.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last/model_config.yaml
similarity index 90%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last/model_config.yaml
index 36a000292f5..8866fa67175 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last/model_config.yaml
@@ -15,13 +15,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 50
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_nondeterministic/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_nondeterministic/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_nondeterministic/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_nondeterministic/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_nondeterministic/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_nondeterministic/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_nondeterministic/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_nondeterministic/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_nondeterministic/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_nondeterministic/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_nondeterministic/golden_values_lts_dgxa100_dracooci-ord.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_nondeterministic/golden_values_lts_dgxa100_dracooci-ord.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_nondeterministic/golden_values_lts_dgxa100_dracooci.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_nondeterministic/golden_values_lts_dgxa100_dracooci.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_nondeterministic/model_config.yaml
similarity index 90%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_nondeterministic/model_config.yaml
index ddbc04621a6..f4649e2d303 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_nondeterministic/model_config.yaml
@@ -15,13 +15,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 50
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion/golden_values_lts_dgxa100_dracooci-ord.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion/golden_values_lts_dgxa100_dracooci-ord.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion/golden_values_lts_dgxa100_dracooci.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion/golden_values_lts_dgxa100_dracooci.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion/model_config.yaml
similarity index 89%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion/model_config.yaml
index 31e5bb16ad5..a77cd637800 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion/model_config.yaml
@@ -15,13 +15,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 50
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_ddp_average_in_collective_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_ddp_average_in_collective/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_ddp_average_in_collective_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_ddp_average_in_collective/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_ddp_average_in_collective_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_ddp_average_in_collective/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_ddp_average_in_collective_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_ddp_average_in_collective/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_ddp_average_in_collective_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_ddp_average_in_collective/model_config.yaml
similarity index 90%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_ddp_average_in_collective_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_ddp_average_in_collective/model_config.yaml
index 76cfaf020af..9f416e74884 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_ddp_average_in_collective_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_ddp_average_in_collective/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 50
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_defer_embedding_wgrad_compute_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_defer_embedding_wgrad_compute/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_defer_embedding_wgrad_compute_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_defer_embedding_wgrad_compute/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_defer_embedding_wgrad_compute_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_defer_embedding_wgrad_compute/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_defer_embedding_wgrad_compute_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_defer_embedding_wgrad_compute/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_defer_embedding_wgrad_compute_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_defer_embedding_wgrad_compute/model_config.yaml
similarity index 90%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_defer_embedding_wgrad_compute_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_defer_embedding_wgrad_compute/model_config.yaml
index 3488b4d1585..2622612205a 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_defer_embedding_wgrad_compute_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_defer_embedding_wgrad_compute/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 50
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_mla_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mla/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_mla_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mla/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_mla_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mla/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_mla_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mla/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_mla_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mla/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_mla_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mla/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_mla_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mla/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_mla_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mla/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_mla_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mla/model_config.yaml
similarity index 91%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_mla_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mla/model_config.yaml
index 586f90f1cf6..00f01d3bac0 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_mla_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mla/model_config.yaml
@@ -23,13 +23,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 50
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_no_create_attention_mask_in_dataloader_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_no_create_attention_mask_in_dataloader/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_no_create_attention_mask_in_dataloader_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_no_create_attention_mask_in_dataloader/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_no_create_attention_mask_in_dataloader_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_no_create_attention_mask_in_dataloader/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_no_create_attention_mask_in_dataloader_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_no_create_attention_mask_in_dataloader/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_no_create_attention_mask_in_dataloader_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_no_create_attention_mask_in_dataloader/model_config.yaml
similarity index 90%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_no_create_attention_mask_in_dataloader_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_no_create_attention_mask_in_dataloader/model_config.yaml
index dd928979546..3d1b350ced0 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_no_create_attention_mask_in_dataloader_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_no_create_attention_mask_in_dataloader/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 50
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_no_mmap_bin_files_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_no_mmap_bin_files/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_no_mmap_bin_files_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_no_mmap_bin_files/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_no_mmap_bin_files_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_no_mmap_bin_files/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_no_mmap_bin_files_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_no_mmap_bin_files/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_no_mmap_bin_files_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_no_mmap_bin_files/model_config.yaml
similarity index 90%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_no_mmap_bin_files_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_no_mmap_bin_files/model_config.yaml
index bf6520edcd6..d4939a8c2cf 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_no_mmap_bin_files_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_no_mmap_bin_files/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 50
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_lts_dgxa100_dracooci-ord.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_lts_dgxa100_dracooci-ord.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_lts_dgxa100_dracooci.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_lts_dgxa100_dracooci.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/model_config.yaml
similarity index 90%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/model_config.yaml
index 14cefe1e409..af4aa0bf4fc 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 100
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/golden_values_lts_dgxa100_dracooci-ord.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/golden_values_lts_dgxa100_dracooci-ord.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/golden_values_lts_dgxa100_dracooci.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/golden_values_lts_dgxa100_dracooci.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/model_config.yaml
similarity index 90%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/model_config.yaml
index f7c1c7ee725..9fbe95431e0 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/model_config.yaml
@@ -15,13 +15,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 100
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/golden_values_lts_dgxa100_dracooci-ord.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/golden_values_lts_dgxa100_dracooci-ord.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/golden_values_lts_dgxa100_dracooci.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/golden_values_lts_dgxa100_dracooci.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/model_config.yaml
similarity index 90%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/model_config.yaml
index deaadae81a3..54d49da6c14 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/model_config.yaml
@@ -15,13 +15,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 100
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_lts_dgxa100_dracooci-ord.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_lts_dgxa100_dracooci-ord.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_lts_dgxa100_dracooci.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_lts_dgxa100_dracooci.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/model_config.yaml
similarity index 90%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/model_config.yaml
index fbbe2255a82..f906e5f8439 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 100
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_lts_dgxa100_dracooci-ord.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_lts_dgxa100_dracooci-ord.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_lts_dgxa100_dracooci.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_lts_dgxa100_dracooci.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/model_config.yaml
similarity index 90%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/model_config.yaml
index 383ec818661..7e2261ae518 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 100
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader/golden_values_lts_dgxa100_dracooci-ord.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader/golden_values_lts_dgxa100_dracooci-ord.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader/golden_values_lts_dgxa100_dracooci.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader/golden_values_lts_dgxa100_dracooci.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader/model_config.yaml
similarity index 91%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader/model_config.yaml
index 3cf39c93e9c..ea5523e1d2a 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader/model_config.yaml
@@ -18,13 +18,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 100
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_mmap_bin_files_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_mmap_bin_files/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_mmap_bin_files_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_mmap_bin_files/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_mmap_bin_files_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_mmap_bin_files/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_mmap_bin_files_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_mmap_bin_files/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_mmap_bin_files_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_mmap_bin_files/model_config.yaml
similarity index 90%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_mmap_bin_files_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_mmap_bin_files/model_config.yaml
index 4fd3ccba030..afbc17a0301 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_mmap_bin_files_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_mmap_bin_files/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 100
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/golden_values_lts_dgxa100_dracooci-ord.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/golden_values_lts_dgxa100_dracooci-ord.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/golden_values_lts_dgxa100_dracooci.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/golden_values_lts_dgxa100_dracooci.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/model_config.yaml
similarity index 90%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/model_config.yaml
index e8f7fee1215..bcbfdad6616 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/model_config.yaml
@@ -15,13 +15,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 100
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_zp_z3_resume_fsdp_dtensor_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_zp_z3_resume_fsdp_dtensor_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_zp_z3_resume_fsdp_dtensor_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_zp_z3_resume_fsdp_dtensor_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_zp_z3_resume_fsdp_dtensor_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_zp_z3_resume_fsdp_dtensor_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_zp_z3_resume_fsdp_dtensor_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_zp_z3_resume_fsdp_dtensor_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_zp_z3_resume_fsdp_dtensor_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_zp_z3_resume_fsdp_dtensor_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_zp_z3_resume_fsdp_dtensor_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor/model_config.yaml
similarity index 90%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_zp_z3_resume_fsdp_dtensor_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor/model_config.yaml
index d6a183799fd..ecc62315f9f 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_zp_z3_resume_fsdp_dtensor_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 100
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce/model_config.yaml
similarity index 90%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce/model_config.yaml
index 8df2e496bb1..89c6943100e 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 50
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgxa100_dracooci-ord.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgxa100_dracooci-ord.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgxa100_dracooci.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgxa100_dracooci.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml
similarity index 91%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml
index 7cd304fc880..9d8de380f83 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 50
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_qk_layernorm_test_mode_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_qk_layernorm_test_mode/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_qk_layernorm_test_mode_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_qk_layernorm_test_mode/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_qk_layernorm_test_mode_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_qk_layernorm_test_mode/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_qk_layernorm_test_mode_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_qk_layernorm_test_mode/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_qk_layernorm_test_mode_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_qk_layernorm_test_mode/model_config.yaml
similarity index 90%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_qk_layernorm_test_mode_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_qk_layernorm_test_mode/model_config.yaml
index 72f029c9044..18a7195b436 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_qk_layernorm_test_mode_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_qk_layernorm_test_mode/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 50
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgxa100_dracooci-ord.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgxa100_dracooci-ord.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgxa100_dracooci.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgxa100_dracooci.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/model_config.yaml
similarity index 90%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/model_config.yaml
index 75a0ffc2adc..fe8e0f493d1 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 100
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgxh100_dgxc.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgxh100_dgxc.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgxh100_dgxc.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgxh100_dgxc.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgxa100_dracooci-ord.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgxa100_dracooci-ord.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgxa100_dracooci.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgxa100_dracooci.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml
similarity index 91%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml
index de4164176bb..136c696ef2f 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 100
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_lts_dgxa100_dracooci-ord.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_lts_dgxa100_dracooci-ord.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_lts_dgxa100_dracooci.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_lts_dgxa100_dracooci.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/model_config.yaml
similarity index 90%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/model_config.yaml
index 2ee48e8111c..0f842738f62 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 100
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone/model_config.yaml
similarity index 90%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone/model_config.yaml
index 8f09dae5fec..4aa0b36a84b 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone/model_config.yaml
@@ -15,13 +15,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 100
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_lts_dgxa100_dracooci-ord.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_lts_dgxa100_dracooci-ord.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_lts_dgxa100_dracooci.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_lts_dgxa100_dracooci.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/model_config.yaml
similarity index 90%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/model_config.yaml
index 1ac8ec45c24..620eeaeff46 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/model_config.yaml
@@ -15,13 +15,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 100
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml
similarity index 90%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml
index 208827c9aea..b8a79c7a083 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 50
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_fsdp2_resume_torch_dist_te/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_fsdp2_resume_torch_dist_te/model_config.yaml
similarity index 90%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_fsdp2_resume_torch_dist_te/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_fsdp2_resume_torch_dist_te/model_config.yaml
index 15fbeb4f986..4febeeb3aca 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_fsdp2_resume_torch_dist_te/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_fsdp2_resume_torch_dist_te/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 100
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgxa100_dracooci-ord.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgxa100_dracooci-ord.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgxa100_dracooci-ord.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgxa100_dracooci.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgxa100_dracooci.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgxa100_dracooci.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml
similarity index 91%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml
index 573cddceff0..8793230c3c9 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 100
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_lts_dgxa100_dracooci-ord.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_lts_dgxa100_dracooci-ord.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_lts_dgxa100_dracooci-ord.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_lts_dgxa100_dracooci.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_lts_dgxa100_dracooci.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_lts_dgxa100_dracooci.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/model_config.yaml
similarity index 89%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/model_config.yaml
index f897d2b9a8e..84da70b66c7 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/model_config.yaml
@@ -1,4 +1,4 @@
-ENV_VARS:
+s`ENV_VARS:
   CUDA_DEVICE_MAX_CONNECTIONS: 1
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Ring
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 50
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_fp16/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_fp16/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_fp16/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_fp16/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_fp16/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_fp16/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_fp16/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_fp16/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_fp16/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_fp16/model_config.yaml
similarity index 90%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_fp16/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_fp16/model_config.yaml
index 7345237d672..f4c058fb0a0 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_fp16/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_fp16/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 50
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_resume_torch_dist/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_resume_torch_dist/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_resume_torch_dist/golden_values_dev_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/golden_values_dev_dgxa100_dracooci-ord.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_resume_torch_dist/golden_values_dev_dgxa100_dracooci-ord.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/golden_values_dev_dgxa100_dracooci-ord.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_resume_torch_dist/golden_values_dev_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/golden_values_dev_dgxa100_dracooci.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_resume_torch_dist/golden_values_dev_dgxa100_dracooci.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/golden_values_dev_dgxa100_dracooci.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_resume_torch_dist/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_resume_torch_dist/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_resume_torch_dist/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/golden_values_lts_dgxa100_dracooci-ord.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_resume_torch_dist/golden_values_lts_dgxa100_dracooci-ord.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/golden_values_lts_dgxa100_dracooci-ord.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_resume_torch_dist/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/golden_values_lts_dgxa100_dracooci.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_resume_torch_dist/golden_values_lts_dgxa100_dracooci.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/golden_values_lts_dgxa100_dracooci.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_resume_torch_dist/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/model_config.yaml
similarity index 90%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_resume_torch_dist/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/model_config.yaml
index e15844bafb7..e2a0f1f1f69 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_resume_torch_dist/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 100
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_lts_dgxa100_dracooci-ord.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4/golden_values_lts_dgxa100_dracooci-ord.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_lts_dgxa100_dracooci-ord.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_lts_dgxa100_dracooci.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4/golden_values_lts_dgxa100_dracooci.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_lts_dgxa100_dracooci.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/model_config.yaml
similarity index 90%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/model_config.yaml
index c7dfcfe48e3..b9b786ee247 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 50
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_resume_torch_dist/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_resume_torch_dist/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_resume_torch_dist/golden_values_dev_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/golden_values_dev_dgxa100_dracooci-ord.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_resume_torch_dist/golden_values_dev_dgxa100_dracooci-ord.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/golden_values_dev_dgxa100_dracooci-ord.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_resume_torch_dist/golden_values_dev_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/golden_values_dev_dgxa100_dracooci.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_resume_torch_dist/golden_values_dev_dgxa100_dracooci.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/golden_values_dev_dgxa100_dracooci.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_resume_torch_dist/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_resume_torch_dist/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_resume_torch_dist/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/golden_values_lts_dgxa100_dracooci-ord.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_resume_torch_dist/golden_values_lts_dgxa100_dracooci-ord.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/golden_values_lts_dgxa100_dracooci-ord.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_resume_torch_dist/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/golden_values_lts_dgxa100_dracooci.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_resume_torch_dist/golden_values_lts_dgxa100_dracooci.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/golden_values_lts_dgxa100_dracooci.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_resume_torch_dist/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/model_config.yaml
similarity index 90%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_resume_torch_dist/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/model_config.yaml
index e829340190e..b4991e3621e 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_resume_torch_dist/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 100
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_tp2_pp2_resume_torch_dist_uninstall_te_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp2_pp2_resume_torch_dist_uninstall_te/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_tp2_pp2_resume_torch_dist_uninstall_te_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp2_pp2_resume_torch_dist_uninstall_te/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_tp2_pp2_resume_torch_dist_uninstall_te_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp2_pp2_resume_torch_dist_uninstall_te/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_tp2_pp2_resume_torch_dist_uninstall_te_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp2_pp2_resume_torch_dist_uninstall_te/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_tp2_pp2_resume_torch_dist_uninstall_te_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp2_pp2_resume_torch_dist_uninstall_te/model_config.yaml
similarity index 90%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_tp2_pp2_resume_torch_dist_uninstall_te_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp2_pp2_resume_torch_dist_uninstall_te/model_config.yaml
index 37fb8b1cccd..cc6a76a97d9 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_tp2_pp2_resume_torch_dist_uninstall_te_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp2_pp2_resume_torch_dist_uninstall_te/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 100
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_tp2_pp2_uninstall_te_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp2_pp2_uninstall_te/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_tp2_pp2_uninstall_te_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp2_pp2_uninstall_te/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_tp2_pp2_uninstall_te_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp2_pp2_uninstall_te/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_tp2_pp2_uninstall_te_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp2_pp2_uninstall_te/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_tp2_pp2_uninstall_te_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp2_pp2_uninstall_te/model_config.yaml
similarity index 91%
rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_tp2_pp2_uninstall_te_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp2_pp2_uninstall_te/model_config.yaml
index 1406468fadf..7601d0188ae 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_tp2_pp2_uninstall_te_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp2_pp2_uninstall_te/model_config.yaml
@@ -21,13 +21,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 50
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1/model_config.yaml
similarity index 90%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1/model_config.yaml
index 863cf9cac25..a365aae9089 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 50
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_lts_dgxa100_dracooci-ord.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch/golden_values_lts_dgxa100_dracooci-ord.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_lts_dgxa100_dracooci-ord.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_lts_dgxa100_dracooci.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch/golden_values_lts_dgxa100_dracooci.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_lts_dgxa100_dracooci.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/model_config.yaml
similarity index 89%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/model_config.yaml
index fcb9fa2884f..c9473f99f96 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 100
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_lts_dgxa100_dracooci-ord.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist/golden_values_lts_dgxa100_dracooci-ord.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_lts_dgxa100_dracooci-ord.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_lts_dgxa100_dracooci.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist/golden_values_lts_dgxa100_dracooci.json
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_lts_dgxa100_dracooci.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/model_config.yaml
similarity index 90%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/model_config.yaml
index 0e32dbd913a..23b58cdc782 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 100
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 67c8ef8abff..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.83936,
-            "2": 10.8442,
-            "3": 10.86813,
-            "4": 10.86022,
-            "5": 10.87939,
-            "6": 10.85969,
-            "7": 10.86386,
-            "8": 10.8444,
-            "9": 10.88995,
-            "10": 10.8926,
-            "11": 10.89136,
-            "12": 10.85312,
-            "13": 10.87319,
-            "14": 10.83805,
-            "15": 10.83088,
-            "16": 10.82011,
-            "17": 10.79138,
-            "18": 10.81055,
-            "19": 10.77977,
-            "20": 10.6635,
-            "21": 10.69765,
-            "22": 10.67421,
-            "23": 10.77344,
-            "24": 10.63919,
-            "25": 10.50497,
-            "26": 10.61911,
-            "27": 10.56921,
-            "28": 10.46859,
-            "29": 10.41119,
-            "30": 10.42916,
-            "31": 10.52553,
-            "32": 10.34942,
-            "33": 10.2967,
-            "34": 10.46909,
-            "35": 9.99632,
-            "36": 10.13945,
-            "37": 10.0434,
-            "38": 10.4139,
-            "39": 9.80941,
-            "40": 10.12495,
-            "41": 10.14883,
-            "42": 10.04042,
-            "43": 10.22142,
-            "44": 10.07348,
-            "45": 9.71369,
-            "46": 10.00449,
-            "47": 9.94758,
-            "48": 9.68856,
-            "49": 9.93637,
-            "50": 9.96042
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1026.0,
-            "2": 1184.0,
-            "3": 1226.0,
-            "4": 1248.0,
-            "5": 1259.0,
-            "6": 1421.0,
-            "7": 1182.0,
-            "8": 1036.0,
-            "9": 1293.0,
-            "10": 1319.0,
-            "11": 1212.0,
-            "12": 1373.0,
-            "13": 1327.0,
-            "14": 1121.0,
-            "15": 1217.0,
-            "16": 1163.0,
-            "17": 1246.0,
-            "18": 1280.0,
-            "19": 1128.0,
-            "20": 1019.0,
-            "21": 1147.0,
-            "22": 1156.0,
-            "23": 1341.0,
-            "24": 1312.0,
-            "25": 1066.0,
-            "26": 1138.0,
-            "27": 1270.0,
-            "28": 1260.0,
-            "29": 1292.0,
-            "30": 1532.0,
-            "31": 1477.0,
-            "32": 1460.0,
-            "33": 1537.0,
-            "34": 1513.0,
-            "35": 1235.0,
-            "36": 1316.0,
-            "37": 1466.0,
-            "38": 1564.0,
-            "39": 1380.0,
-            "40": 1513.0,
-            "41": 1633.0,
-            "42": 1509.0,
-            "43": 1731.0,
-            "44": 1636.0,
-            "45": 1501.0,
-            "46": 1884.0,
-            "47": 1567.0,
-            "48": 1631.0,
-            "49": 1825.0,
-            "50": 1639.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 759681536.0,
-            "2": 759681536.0,
-            "3": 759681536.0,
-            "4": 759681536.0,
-            "5": 759681536.0,
-            "6": 759681536.0,
-            "7": 759681536.0,
-            "8": 759681536.0,
-            "9": 759681536.0,
-            "10": 759681536.0,
-            "11": 759681536.0,
-            "12": 759681536.0,
-            "13": 759681536.0,
-            "14": 759681536.0,
-            "15": 759681536.0,
-            "16": 759681536.0,
-            "17": 759681536.0,
-            "18": 759681536.0,
-            "19": 759681536.0,
-            "20": 759681536.0,
-            "21": 759681536.0,
-            "22": 759681536.0,
-            "23": 759681536.0,
-            "24": 759681536.0,
-            "25": 759681536.0,
-            "26": 759681536.0,
-            "27": 759681536.0,
-            "28": 759681536.0,
-            "29": 759681536.0,
-            "30": 759681536.0,
-            "31": 759681536.0,
-            "32": 759681536.0,
-            "33": 759681536.0,
-            "34": 759681536.0,
-            "35": 759681536.0,
-            "36": 759681536.0,
-            "37": 759681536.0,
-            "38": 759681536.0,
-            "39": 759681536.0,
-            "40": 759681536.0,
-            "41": 759681536.0,
-            "42": 759681536.0,
-            "43": 759681536.0,
-            "44": 759681536.0,
-            "45": 759681536.0,
-            "46": 759681536.0,
-            "47": 759681536.0,
-            "48": 759681536.0,
-            "49": 759681536.0,
-            "50": 759681536.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 4340902912.0,
-            "2": 4622614528.0,
-            "3": 4622614528.0,
-            "4": 4622614528.0,
-            "5": 4622614528.0,
-            "6": 4622614528.0,
-            "7": 4622614528.0,
-            "8": 4622614528.0,
-            "9": 4622614528.0,
-            "10": 4622614528.0,
-            "11": 4622614528.0,
-            "12": 4622614528.0,
-            "13": 4622614528.0,
-            "14": 4622614528.0,
-            "15": 4622614528.0,
-            "16": 4622614528.0,
-            "17": 4622614528.0,
-            "18": 4622614528.0,
-            "19": 4622614528.0,
-            "20": 4622614528.0,
-            "21": 4622614528.0,
-            "22": 4622614528.0,
-            "23": 4622614528.0,
-            "24": 4622614528.0,
-            "25": 4622614528.0,
-            "26": 4622614528.0,
-            "27": 4622614528.0,
-            "28": 4622614528.0,
-            "29": 4622614528.0,
-            "30": 4622614528.0,
-            "31": 4622614528.0,
-            "32": 4622614528.0,
-            "33": 4622614528.0,
-            "34": 4622614528.0,
-            "35": 4622614528.0,
-            "36": 4622614528.0,
-            "37": 4622614528.0,
-            "38": 4622614528.0,
-            "39": 4622614528.0,
-            "40": 4622614528.0,
-            "41": 4622614528.0,
-            "42": 4622614528.0,
-            "43": 4622614528.0,
-            "44": 4622614528.0,
-            "45": 4622614528.0,
-            "46": 4622614528.0,
-            "47": 4622614528.0,
-            "48": 4622614528.0,
-            "49": 4622614528.0,
-            "50": 4622614528.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 13.91724,
-            "2": 0.27573,
-            "3": 0.23467,
-            "4": 0.23594,
-            "5": 0.23302,
-            "6": 0.23216,
-            "7": 0.23399,
-            "8": 0.23423,
-            "9": 0.23365,
-            "10": 0.23211,
-            "11": 0.2332,
-            "12": 0.23283,
-            "13": 0.23445,
-            "14": 0.23405,
-            "15": 0.23349,
-            "16": 0.23298,
-            "17": 0.23305,
-            "18": 0.23251,
-            "19": 0.23322,
-            "20": 0.23348,
-            "21": 0.23189,
-            "22": 0.23316,
-            "23": 0.2316,
-            "24": 0.23233,
-            "25": 0.23512,
-            "26": 0.23232,
-            "27": 0.23306,
-            "28": 0.23244,
-            "29": 0.23331,
-            "30": 0.23258,
-            "31": 0.23311,
-            "32": 0.23326,
-            "33": 0.23418,
-            "34": 0.23411,
-            "35": 0.23489,
-            "36": 0.2317,
-            "37": 0.23483,
-            "38": 0.23235,
-            "39": 0.23511,
-            "40": 0.23413,
-            "41": 0.23395,
-            "42": 0.23405,
-            "43": 0.23331,
-            "44": 0.23297,
-            "45": 0.23473,
-            "46": 0.23192,
-            "47": 0.23377,
-            "48": 0.23322,
-            "49": 0.23042,
-            "50": 0.23263
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index 5e0ca24c497..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.83936,
-            "2": 10.8442,
-            "3": 10.86813,
-            "4": 10.86022,
-            "5": 10.87939,
-            "6": 10.85969,
-            "7": 10.86386,
-            "8": 10.8444,
-            "9": 10.88995,
-            "10": 10.8926,
-            "11": 10.89136,
-            "12": 10.85312,
-            "13": 10.87319,
-            "14": 10.83805,
-            "15": 10.83088,
-            "16": 10.82011,
-            "17": 10.79138,
-            "18": 10.81055,
-            "19": 10.77977,
-            "20": 10.6635,
-            "21": 10.69765,
-            "22": 10.67421,
-            "23": 10.77344,
-            "24": 10.63919,
-            "25": 10.50497,
-            "26": 10.61911,
-            "27": 10.56921,
-            "28": 10.46859,
-            "29": 10.41119,
-            "30": 10.42916,
-            "31": 10.52553,
-            "32": 10.34942,
-            "33": 10.2967,
-            "34": 10.46909,
-            "35": 9.99632,
-            "36": 10.13945,
-            "37": 10.0434,
-            "38": 10.4139,
-            "39": 9.80941,
-            "40": 10.12495,
-            "41": 10.14883,
-            "42": 10.04042,
-            "43": 10.22142,
-            "44": 10.07348,
-            "45": 9.71369,
-            "46": 10.00449,
-            "47": 9.94758,
-            "48": 9.68856,
-            "49": 9.93637,
-            "50": 9.96042
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1026.0,
-            "2": 1184.0,
-            "3": 1226.0,
-            "4": 1248.0,
-            "5": 1259.0,
-            "6": 1421.0,
-            "7": 1182.0,
-            "8": 1036.0,
-            "9": 1293.0,
-            "10": 1319.0,
-            "11": 1212.0,
-            "12": 1373.0,
-            "13": 1327.0,
-            "14": 1121.0,
-            "15": 1217.0,
-            "16": 1163.0,
-            "17": 1246.0,
-            "18": 1280.0,
-            "19": 1128.0,
-            "20": 1019.0,
-            "21": 1147.0,
-            "22": 1156.0,
-            "23": 1341.0,
-            "24": 1312.0,
-            "25": 1066.0,
-            "26": 1138.0,
-            "27": 1270.0,
-            "28": 1260.0,
-            "29": 1292.0,
-            "30": 1532.0,
-            "31": 1477.0,
-            "32": 1460.0,
-            "33": 1537.0,
-            "34": 1513.0,
-            "35": 1235.0,
-            "36": 1316.0,
-            "37": 1466.0,
-            "38": 1564.0,
-            "39": 1380.0,
-            "40": 1513.0,
-            "41": 1633.0,
-            "42": 1509.0,
-            "43": 1731.0,
-            "44": 1636.0,
-            "45": 1501.0,
-            "46": 1884.0,
-            "47": 1567.0,
-            "48": 1631.0,
-            "49": 1825.0,
-            "50": 1639.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 759681536.0,
-            "2": 759681536.0,
-            "3": 759681536.0,
-            "4": 759681536.0,
-            "5": 759681536.0,
-            "6": 759681536.0,
-            "7": 759681536.0,
-            "8": 759681536.0,
-            "9": 759681536.0,
-            "10": 759681536.0,
-            "11": 759681536.0,
-            "12": 759681536.0,
-            "13": 759681536.0,
-            "14": 759681536.0,
-            "15": 759681536.0,
-            "16": 759681536.0,
-            "17": 759681536.0,
-            "18": 759681536.0,
-            "19": 759681536.0,
-            "20": 759681536.0,
-            "21": 759681536.0,
-            "22": 759681536.0,
-            "23": 759681536.0,
-            "24": 759681536.0,
-            "25": 759681536.0,
-            "26": 759681536.0,
-            "27": 759681536.0,
-            "28": 759681536.0,
-            "29": 759681536.0,
-            "30": 759681536.0,
-            "31": 759681536.0,
-            "32": 759681536.0,
-            "33": 759681536.0,
-            "34": 759681536.0,
-            "35": 759681536.0,
-            "36": 759681536.0,
-            "37": 759681536.0,
-            "38": 759681536.0,
-            "39": 759681536.0,
-            "40": 759681536.0,
-            "41": 759681536.0,
-            "42": 759681536.0,
-            "43": 759681536.0,
-            "44": 759681536.0,
-            "45": 759681536.0,
-            "46": 759681536.0,
-            "47": 759681536.0,
-            "48": 759681536.0,
-            "49": 759681536.0,
-            "50": 759681536.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 4340902912.0,
-            "2": 4622614528.0,
-            "3": 4622614528.0,
-            "4": 4622614528.0,
-            "5": 4622614528.0,
-            "6": 4622614528.0,
-            "7": 4622614528.0,
-            "8": 4622614528.0,
-            "9": 4622614528.0,
-            "10": 4622614528.0,
-            "11": 4622614528.0,
-            "12": 4622614528.0,
-            "13": 4622614528.0,
-            "14": 4622614528.0,
-            "15": 4622614528.0,
-            "16": 4622614528.0,
-            "17": 4622614528.0,
-            "18": 4622614528.0,
-            "19": 4622614528.0,
-            "20": 4622614528.0,
-            "21": 4622614528.0,
-            "22": 4622614528.0,
-            "23": 4622614528.0,
-            "24": 4622614528.0,
-            "25": 4622614528.0,
-            "26": 4622614528.0,
-            "27": 4622614528.0,
-            "28": 4622614528.0,
-            "29": 4622614528.0,
-            "30": 4622614528.0,
-            "31": 4622614528.0,
-            "32": 4622614528.0,
-            "33": 4622614528.0,
-            "34": 4622614528.0,
-            "35": 4622614528.0,
-            "36": 4622614528.0,
-            "37": 4622614528.0,
-            "38": 4622614528.0,
-            "39": 4622614528.0,
-            "40": 4622614528.0,
-            "41": 4622614528.0,
-            "42": 4622614528.0,
-            "43": 4622614528.0,
-            "44": 4622614528.0,
-            "45": 4622614528.0,
-            "46": 4622614528.0,
-            "47": 4622614528.0,
-            "48": 4622614528.0,
-            "49": 4622614528.0,
-            "50": 4622614528.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 12.785,
-            "2": 0.28429,
-            "3": 0.25654,
-            "4": 0.25675,
-            "5": 0.25763,
-            "6": 0.25556,
-            "7": 0.25403,
-            "8": 0.25276,
-            "9": 0.25351,
-            "10": 0.25546,
-            "11": 0.25488,
-            "12": 0.25607,
-            "13": 0.25404,
-            "14": 0.25256,
-            "15": 0.25733,
-            "16": 0.25987,
-            "17": 0.25778,
-            "18": 0.25053,
-            "19": 0.25288,
-            "20": 0.258,
-            "21": 0.25606,
-            "22": 0.25231,
-            "23": 0.25223,
-            "24": 0.26464,
-            "25": 0.26469,
-            "26": 0.25015,
-            "27": 0.25378,
-            "28": 0.25459,
-            "29": 0.26134,
-            "30": 0.26129,
-            "31": 0.2595,
-            "32": 0.26444,
-            "33": 0.25568,
-            "34": 0.25514,
-            "35": 0.25087,
-            "36": 0.25275,
-            "37": 0.25383,
-            "38": 0.24953,
-            "39": 0.24996,
-            "40": 0.25393,
-            "41": 0.25556,
-            "42": 0.25158,
-            "43": 0.25124,
-            "44": 0.25,
-            "45": 0.25586,
-            "46": 0.26057,
-            "47": 0.25868,
-            "48": 0.26304,
-            "49": 0.2615,
-            "50": 0.26261
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_b200_1N8G_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_b200_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/model_config.yaml
similarity index 91%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_b200_1N8G_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_b200_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/model_config.yaml
index 246fb33da57..f6892ae5c24 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_b200_1N8G_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_b200_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 2000
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_b200_1N8G_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/tp_comm_overlap_cfg.yaml b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_b200_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/tp_comm_overlap_cfg.yaml
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_b200_1N8G_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/tp_comm_overlap_cfg.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_b200_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/tp_comm_overlap_cfg.yaml
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_b200_1N8G_mcore_tp4_cp2_current_scaling_native_fp8_tp_fsdp/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_b200_mcore_tp4_cp2_current_scaling_native_fp8_tp_fsdp/model_config.yaml
similarity index 91%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_b200_1N8G_mcore_tp4_cp2_current_scaling_native_fp8_tp_fsdp/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_b200_mcore_tp4_cp2_current_scaling_native_fp8_tp_fsdp/model_config.yaml
index 196492f1ec7..9c23cb7938f 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_b200_1N8G_mcore_tp4_cp2_current_scaling_native_fp8_tp_fsdp/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_b200_mcore_tp4_cp2_current_scaling_native_fp8_tp_fsdp/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 2000
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_b200_1N8G_mcore_tp4_cp2_current_scaling_native_fp8_tp_sp_cp_tp_overlap/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_b200_mcore_tp4_cp2_current_scaling_native_fp8_tp_sp_cp_tp_overlap/model_config.yaml
similarity index 91%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_b200_1N8G_mcore_tp4_cp2_current_scaling_native_fp8_tp_sp_cp_tp_overlap/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_b200_mcore_tp4_cp2_current_scaling_native_fp8_tp_sp_cp_tp_overlap/model_config.yaml
index 665388ce7a1..4727007ffe2 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_b200_1N8G_mcore_tp4_cp2_current_scaling_native_fp8_tp_sp_cp_tp_overlap/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_b200_mcore_tp4_cp2_current_scaling_native_fp8_tp_sp_cp_tp_overlap/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 2000
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_b200_1N8G_mcore_tp4_cp2_mxfp8_tp_sp_cp/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_b200_mcore_tp4_cp2_mxfp8_tp_sp_cp/model_config.yaml
similarity index 90%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_b200_1N8G_mcore_tp4_cp2_mxfp8_tp_sp_cp/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_b200_mcore_tp4_cp2_mxfp8_tp_sp_cp/model_config.yaml
index f4cbb87d27d..bba1f1ad19e 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_b200_1N8G_mcore_tp4_cp2_mxfp8_tp_sp_cp/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_b200_mcore_tp4_cp2_mxfp8_tp_sp_cp/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 2000
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_b200_1N8G_mcore_tp4_cp2_native_fp8_tp_sp_cp_tp_overlap/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_b200_mcore_tp4_cp2_native_fp8_tp_sp_cp_tp_overlap/model_config.yaml
similarity index 90%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_b200_1N8G_mcore_tp4_cp2_native_fp8_tp_sp_cp_tp_overlap/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_b200_mcore_tp4_cp2_native_fp8_tp_sp_cp_tp_overlap/model_config.yaml
index 80218da886d..5ac9b7b4701 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_b200_1N8G_mcore_tp4_cp2_native_fp8_tp_sp_cp_tp_overlap/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_b200_mcore_tp4_cp2_native_fp8_tp_sp_cp_tp_overlap/model_config.yaml
@@ -16,13 +16,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 2000
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_nondet_tp1_pp1_fp8_no_model_parallel/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_nondet_tp1_pp1_fp8_no_model_parallel/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_nondet_tp1_pp1_fp8_no_model_parallel/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_nondet_tp1_pp1_fp8_no_model_parallel/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_nondet_tp1_pp1_fp8_no_model_parallel/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_nondet_tp1_pp1_fp8_no_model_parallel/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_nondet_tp1_pp1_fp8_no_model_parallel/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_nondet_tp1_pp1_fp8_no_model_parallel/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_nondet_tp1_pp1_fp8_no_model_parallel/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_nondet_tp1_pp1_fp8_no_model_parallel/model_config.yaml
similarity index 90%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_nondet_tp1_pp1_fp8_no_model_parallel/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_nondet_tp1_pp1_fp8_no_model_parallel/model_config.yaml
index 96b4a6c0ccc..0e70965cb2b 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_nondet_tp1_pp1_fp8_no_model_parallel/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_nondet_tp1_pp1_fp8_no_model_parallel/model_config.yaml
@@ -15,13 +15,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 2000
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp1_pp1_bf16_baseline/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp1_pp1_bf16_baseline/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp1_pp1_bf16_baseline/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp1_pp1_bf16_baseline/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp1_pp1_bf16_baseline/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp1_pp1_bf16_baseline/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp1_pp1_bf16_baseline/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp1_pp1_bf16_baseline/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp1_pp1_bf16_baseline/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp1_pp1_bf16_baseline/model_config.yaml
similarity index 89%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp1_pp1_bf16_baseline/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp1_pp1_bf16_baseline/model_config.yaml
index c46be1c819b..db5dea3ae6e 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp1_pp1_bf16_baseline/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp1_pp1_bf16_baseline/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 2000
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp1_pp1_fp8_no_model_parallel/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp1_pp1_fp8_no_model_parallel/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp1_pp1_fp8_no_model_parallel/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp1_pp1_fp8_no_model_parallel/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp1_pp1_fp8_no_model_parallel/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp1_pp1_fp8_no_model_parallel/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp1_pp1_fp8_no_model_parallel/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp1_pp1_fp8_no_model_parallel/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp1_pp1_fp8_no_model_parallel/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp1_pp1_fp8_no_model_parallel/model_config.yaml
similarity index 90%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp1_pp1_fp8_no_model_parallel/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp1_pp1_fp8_no_model_parallel/model_config.yaml
index c151135828d..12063418adf 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp1_pp1_fp8_no_model_parallel/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp1_pp1_fp8_no_model_parallel/model_config.yaml
@@ -18,13 +18,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 2000
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp1_pp2_fp8_pp/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp1_pp2_fp8_pp/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp1_pp2_fp8_pp/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp1_pp2_fp8_pp/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp1_pp2_fp8_pp/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp1_pp2_fp8_pp/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp1_pp2_fp8_pp/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp1_pp2_fp8_pp/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp1_pp2_fp8_pp/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp1_pp2_fp8_pp/model_config.yaml
similarity index 90%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp1_pp2_fp8_pp/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp1_pp2_fp8_pp/model_config.yaml
index 40dea9779c9..51a2f6cfc7c 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp1_pp2_fp8_pp/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp1_pp2_fp8_pp/model_config.yaml
@@ -18,13 +18,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 2000
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/model_config.yaml
similarity index 92%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/model_config.yaml
index fb47009a77d..5668a7575e2 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/model_config.yaml
@@ -18,13 +18,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 2000
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/tp_comm_overlap_cfg.yaml b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/tp_comm_overlap_cfg.yaml
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/tp_comm_overlap_cfg.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/tp_comm_overlap_cfg.yaml
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_fp8_tp_pp/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_fp8_tp_pp/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_fp8_tp_pp/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_fp8_tp_pp/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_fp8_tp_pp/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_fp8_tp_pp/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_fp8_tp_pp/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_fp8_tp_pp/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_fp8_tp_pp/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_fp8_tp_pp/model_config.yaml
similarity index 90%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_fp8_tp_pp/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_fp8_tp_pp/model_config.yaml
index 32dd88dfb72..66c9f171be5 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_fp8_tp_pp/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_fp8_tp_pp/model_config.yaml
@@ -18,13 +18,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 2000
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_fp8_tp_pp_sp/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_fp8_tp_pp_sp/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_fp8_tp_pp_sp/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_fp8_tp_pp_sp/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_fp8_tp_pp_sp/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_fp8_tp_pp_sp/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_fp8_tp_pp_sp/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_fp8_tp_pp_sp/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_fp8_tp_pp_sp/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_fp8_tp_pp_sp/model_config.yaml
similarity index 90%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_fp8_tp_pp_sp/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_fp8_tp_pp_sp/model_config.yaml
index 21c6ac25e83..54237309642 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_fp8_tp_pp_sp/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_fp8_tp_pp_sp/model_config.yaml
@@ -18,13 +18,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 2000
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_native_fp8_tp_pp_sp/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_native_fp8_tp_pp_sp/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_native_fp8_tp_pp_sp/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_native_fp8_tp_pp_sp/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_native_fp8_tp_pp_sp/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_native_fp8_tp_pp_sp/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_native_fp8_tp_pp_sp/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_native_fp8_tp_pp_sp/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_native_fp8_tp_pp_sp/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_native_fp8_tp_pp_sp/model_config.yaml
similarity index 91%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_native_fp8_tp_pp_sp/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_native_fp8_tp_pp_sp/model_config.yaml
index 59707f588c0..a5903e51b6c 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_native_fp8_tp_pp_sp/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_native_fp8_tp_pp_sp/model_config.yaml
@@ -18,13 +18,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 2000
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp4_cp2_current_scaling_native_fp8_tp_fsdp/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp4_cp2_current_scaling_native_fp8_tp_fsdp/model_config.yaml
similarity index 91%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp4_cp2_current_scaling_native_fp8_tp_fsdp/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp4_cp2_current_scaling_native_fp8_tp_fsdp/model_config.yaml
index 0e62673a628..ac5ff6cfbbf 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp4_cp2_current_scaling_native_fp8_tp_fsdp/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp4_cp2_current_scaling_native_fp8_tp_fsdp/model_config.yaml
@@ -16,13 +16,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 2000
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp4_cp2_current_scaling_native_fp8_tp_sp_cp_tp_overlap/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp4_cp2_current_scaling_native_fp8_tp_sp_cp_tp_overlap/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp4_cp2_current_scaling_native_fp8_tp_sp_cp_tp_overlap/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp4_cp2_current_scaling_native_fp8_tp_sp_cp_tp_overlap/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp4_cp2_current_scaling_native_fp8_tp_sp_cp_tp_overlap/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp4_cp2_current_scaling_native_fp8_tp_sp_cp_tp_overlap/model_config.yaml
similarity index 91%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp4_cp2_current_scaling_native_fp8_tp_sp_cp_tp_overlap/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp4_cp2_current_scaling_native_fp8_tp_sp_cp_tp_overlap/model_config.yaml
index 4361bf233cd..3963a359ea9 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp4_cp2_current_scaling_native_fp8_tp_sp_cp_tp_overlap/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp4_cp2_current_scaling_native_fp8_tp_sp_cp_tp_overlap/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 2000
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp4_cp2_native_fp8_tp_sp_cp_tp_overlap/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp4_cp2_native_fp8_tp_sp_cp_tp_overlap/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp4_cp2_native_fp8_tp_sp_cp_tp_overlap/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp4_cp2_native_fp8_tp_sp_cp_tp_overlap/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp4_cp2_native_fp8_tp_sp_cp_tp_overlap/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp4_cp2_native_fp8_tp_sp_cp_tp_overlap/model_config.yaml
similarity index 91%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp4_cp2_native_fp8_tp_sp_cp_tp_overlap/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp4_cp2_native_fp8_tp_sp_cp_tp_overlap/model_config.yaml
index ed56bc7cfad..ddb34ad850b 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp4_cp2_native_fp8_tp_sp_cp_tp_overlap/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp4_cp2_native_fp8_tp_sp_cp_tp_overlap/model_config.yaml
@@ -18,13 +18,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 2000
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp4_pp2_fp8_tp_pp/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp4_pp2_fp8_tp_pp/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp4_pp2_fp8_tp_pp/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp4_pp2_fp8_tp_pp/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp4_pp2_fp8_tp_pp/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp4_pp2_fp8_tp_pp/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp4_pp2_fp8_tp_pp/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp4_pp2_fp8_tp_pp/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp4_pp2_fp8_tp_pp/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp4_pp2_fp8_tp_pp/model_config.yaml
similarity index 90%
rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp4_pp2_fp8_tp_pp/model_config.yaml
rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp4_pp2_fp8_tp_pp/model_config.yaml
index fe4a6575953..cf9f6b6ceb8 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp4_pp2_fp8_tp_pp/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp4_pp2_fp8_tp_pp/model_config.yaml
@@ -18,13 +18,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 2000
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch/golden_values_dev_dgx_h100.json
index b8f1a38fa0f..8776674df82 100644
--- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch/golden_values_dev_dgx_h100.json
@@ -1,159 +1,187 @@
 {
- "0": {
-  "input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.",
-  "generated_text": " And then you get to the end of the movie, and you realize that this is not New York at all. This is New York at the end",
-  "generated_tokens": [
-   3060,
-   2430,
-   1636,
-   2012,
-   1317,
-   1278,
-   2362,
-   1307,
-   1278,
-   16070,
-   1044,
-   1321,
-   1636,
-   23067,
-   1455,
-   1593,
-   1395,
-   1605,
-   3140,
-   5152,
-   1513,
-   1747,
-   1046,
-   2409,
-   1395,
-   3140,
-   5152,
-   1513,
-   1278,
-   2362
-  ],
-  "latency": 2.491823673248291,
-  "logprobs": [
-   -9.362494468688965,
-   -2.827894449234009,
-   -4.557381629943848,
-   -1.4968647956848145,
-   -0.717312216758728,
-   -1.7262351512908936,
-   -2.522736072540283,
-   -2.1782360076904297,
-   -2.3603432178497314,
-   -6.136383533477783,
-   -1.4676916599273682,
-   -3.468963384628296,
-   -4.424870491027832,
-   -3.7345848083496094,
-   -2.012619972229004,
-   -1.8833301067352295,
-   -3.5708768367767334,
-   -6.8197832107543945,
-   -0.3122292757034302,
-   -0.9820290207862854,
-   -6.532033443450928,
-   -7.498172760009766,
-   -12.615165710449219,
-   -2.409003496170044,
-   -3.8550546169281006,
-   -0.5105050802230835,
-   -4.2802581787109375,
-   -0.06971167027950287,
-   -0.054025799036026,
-   -3.319596767425537,
-   -9.703240394592285,
-   -1.0997297763824463,
-   -6.224854469299316,
-   -5.234503269195557,
-   -3.934987783432007,
-   -2.5263679027557373,
-   -3.1843955516815186,
-   -5.880871295928955,
-   -1.8436813354492188,
-   -5.906496047973633,
-   -12.15787410736084,
-   -12.5841064453125,
-   -0.0819428563117981,
-   -2.6212656497955322,
-   -1.4329369068145752,
-   -2.885145425796509,
-   -1.2901865243911743,
-   -0.006647023372352123,
-   -3.5115818977355957,
-   -12.945953369140625,
-   -3.793078899383545,
-   -3.0094375610351562,
-   -5.966838836669922,
-   -0.8998424410820007,
-   -0.040962252765893936,
-   -1.5467679500579834,
-   -1.0785343647003174,
-   -5.73494815826416,
-   -0.38491737842559814,
-   -5.017007827758789,
-   -0.5568072199821472,
-   -0.5968841910362244,
-   -2.3609962463378906,
-   -13.582086563110352,
-   -0.09050048142671585,
-   -3.7264108657836914,
-   -1.1208789348602295,
-   -6.052675247192383,
-   -0.5848909616470337,
-   -3.5906238555908203,
-   -0.9494907855987549,
-   -1.5676641464233398,
-   -5.127577781677246,
-   -17.19189453125,
-   -6.698403835296631,
-   -1.0449178218841553,
-   -4.365664958953857,
-   -1.1243419647216797,
-   -2.2092156410217285,
-   -1.8081634044647217,
-   -0.23330983519554138,
-   -9.439546585083008,
-   -0.2947109341621399,
-   -7.253565788269043,
-   -2.3855936527252197,
-   -4.629369258880615,
-   -3.4186267852783203,
-   -1.9727531671524048,
-   -2.331681251525879,
-   -1.5606917142868042,
-   -2.454296588897705,
-   -1.5334703922271729,
-   -1.2631131410598755,
-   -2.657367706298828,
-   -0.6480202078819275,
-   -0.4550393521785736,
-   -1.3625166416168213,
-   -0.8142069578170776,
-   -0.4496593475341797,
-   -0.9312890768051147,
-   -1.732723355293274,
-   -0.44613128900527954,
-   -1.6895122528076172,
-   -0.6082233190536499,
-   -1.0978344678878784,
-   -1.1122435331344604,
-   -0.002520838286727667,
-   -1.4072327613830566,
-   -0.007462364621460438,
-   -0.7548662424087524,
-   -0.9937503337860107,
-   -0.0675487294793129,
-   -0.9595617055892944,
-   -0.029961343854665756,
-   -2.205785036087036,
-   -1.2615025043487549,
-   -0.7878209352493286
-  ]
- },
- "throughput": 109.3571928299837
+    "0": {
+        "input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.",
+        "generated_text": " And then you get to the end of the movie, and you realize that this is not New York at all. This is New York at the end",
+        "generated_tokens": [
+            3060,
+            2430,
+            1636,
+            2012,
+            1317,
+            1278,
+            2362,
+            1307,
+            1278,
+            16070,
+            1044,
+            1321,
+            1636,
+            23067,
+            1455,
+            1593,
+            1395,
+            1605,
+            3140,
+            5152,
+            1513,
+            1747,
+            1046,
+            2409,
+            1395,
+            3140,
+            5152,
+            1513,
+            1278,
+            2362
+        ],
+        "latency": 0.290968656539917,
+        "cuda_graph_request_count_map": {
+            "372": 0,
+            "360": 0,
+            "336": 0,
+            "312": 0,
+            "288": 0,
+            "264": 0,
+            "240": 0,
+            "216": 0,
+            "192": 0,
+            "168": 0,
+            "144": 0,
+            "120": 0,
+            "96": 0,
+            "72": 0,
+            "48": 0,
+            "24": 29
+        },
+        "step_count": 240,
+        "logprobs": [
+            -9.362494468688965,
+            -2.827894449234009,
+            -4.557381629943848,
+            -1.4968647956848145,
+            -0.717312216758728,
+            -1.7262351512908936,
+            -2.522736072540283,
+            -2.1782360076904297,
+            -2.3603432178497314,
+            -6.136383533477783,
+            -1.4676916599273682,
+            -3.468963384628296,
+            -4.424870491027832,
+            -3.7345848083496094,
+            -2.012619972229004,
+            -1.8833301067352295,
+            -3.5708768367767334,
+            -6.8197832107543945,
+            -0.3122292757034302,
+            -0.9820290207862854,
+            -6.532033443450928,
+            -7.498172760009766,
+            -12.615165710449219,
+            -2.409003496170044,
+            -3.8550546169281006,
+            -0.5105050802230835,
+            -4.2802581787109375,
+            -0.06971167027950287,
+            -0.054025799036026,
+            -3.319596767425537,
+            -9.703240394592285,
+            -1.0997297763824463,
+            -6.224854469299316,
+            -5.234503269195557,
+            -3.934987783432007,
+            -2.5263679027557373,
+            -3.1843955516815186,
+            -5.880871295928955,
+            -1.8436813354492188,
+            -5.906496047973633,
+            -12.15787410736084,
+            -12.5841064453125,
+            -0.0819428563117981,
+            -2.6212656497955322,
+            -1.4329369068145752,
+            -2.885145425796509,
+            -1.2901865243911743,
+            -0.006647023372352123,
+            -3.5115818977355957,
+            -12.945953369140625,
+            -3.793078899383545,
+            -3.0094375610351562,
+            -5.966838836669922,
+            -0.8998424410820007,
+            -0.040962252765893936,
+            -1.5467679500579834,
+            -1.0785343647003174,
+            -5.73494815826416,
+            -0.38491737842559814,
+            -5.017007827758789,
+            -0.5568072199821472,
+            -0.5968841910362244,
+            -2.3609962463378906,
+            -13.582086563110352,
+            -0.09050048142671585,
+            -3.7264108657836914,
+            -1.1208789348602295,
+            -6.052675247192383,
+            -0.5848909616470337,
+            -3.5906238555908203,
+            -0.9494907855987549,
+            -1.5676641464233398,
+            -5.127577781677246,
+            -17.19189453125,
+            -6.698403835296631,
+            -1.0449178218841553,
+            -4.365664958953857,
+            -1.1243419647216797,
+            -2.2092156410217285,
+            -1.8081634044647217,
+            -0.23330983519554138,
+            -9.439546585083008,
+            -0.2947109341621399,
+            -7.253565788269043,
+            -2.3855936527252197,
+            -4.629369258880615,
+            -3.4186267852783203,
+            -1.9727531671524048,
+            -2.331681251525879,
+            -1.5606917142868042,
+            -2.454296588897705,
+            -1.5334703922271729,
+            -1.2631131410598755,
+            -2.657367706298828,
+            -0.6480202078819275,
+            -0.4550393521785736,
+            -1.3625166416168213,
+            -0.8142069578170776,
+            -0.4496593475341797,
+            -0.9312890768051147,
+            -1.732723355293274,
+            -0.44613128900527954,
+            -1.6895122528076172,
+            -0.6082233190536499,
+            -1.0978344678878784,
+            -1.1122435331344604,
+            -0.002520838286727667,
+            -1.4072327613830566,
+            -0.007462364621460438,
+            -0.7548662424087524,
+            -0.9937503337860107,
+            -0.0675487294793129,
+            -0.9595617055892944,
+            -0.029961343854665756,
+            -2.205785036087036,
+            -1.2615025043487549,
+            -0.7878209352493286
+        ]
+    },
+    "throughput": [
+        2.3393335747358535,
+        102.34586197079994,
+        103.58898028807208,
+        104.45258510126983,
+        103.72620640365217,
+        104.56994550823111,
+        105.82297004422847,
+        102.44643771631509
+    ]
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch/model_config.yaml
index 2ec2c402230..0675b047464 100644
--- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch/model_config.yaml
@@ -18,7 +18,7 @@ MODEL_ARGS:
   --micro-batch-size: 1
   --no-load-optim: true
   --no-use-tokenizer-model-from-checkpoint-args: true
-  --timing-log-level: 2
+  --timing-log-level: 0
   --load: ${CHECKPOINT_LOAD_PATH}/model/mcore_mistral/nemo_minitron-0.5b/v1
   --distributed-backend: nccl
   --log-interval: 1
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_logitsmatch_decode_graphs_only/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_logitsmatch_decode_graphs_only/model_config.yaml
index 67c9de20806..2ba9050ceaf 100644
--- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_logitsmatch_decode_graphs_only/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_logitsmatch_decode_graphs_only/model_config.yaml
@@ -9,7 +9,7 @@ MODEL_ARGS:
   --tiktoken-pattern: v2
   --use-mcore-models: true
   --tokenizer-type: TikTokenizer
-  --tokenizer-model: ${DATA_PATH}/mcore_mistral/tokenizer/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json
+  --tokenizer-model: ${CHECKPOINT_LOAD_PATH}/model/mcore_mistral/nemo_minitron-0.5b/v1/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json
   --auto-detect-ckpt-format: true
   --max-tokens-to-oom: 3600000
   --inference-max-seq-length: 4096
@@ -18,8 +18,8 @@ MODEL_ARGS:
   --micro-batch-size: 1
   --no-load-optim: true
   --no-use-tokenizer-model-from-checkpoint-args: true
-  --timing-log-level: 2
-  --load: ${CHECKPOINT_LOAD_PATH}/mcore_mistral/model
+  --timing-log-level: 0
+  --load: ${CHECKPOINT_LOAD_PATH}/model/mcore_mistral/nemo_minitron-0.5b/v1
   --distributed-backend: nccl
   --log-interval: 1
   --transformer-impl: transformer_engine
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_logitsmatch/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_logitsmatch/golden_values_dev_dgx_h100.json
index d76a889a3fa..31b66789d94 100644
--- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_logitsmatch/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_logitsmatch/golden_values_dev_dgx_h100.json
@@ -1,2699 +1,2703 @@
 {
- "0": {
-  "input_prompt": "The $500 Cup of coffee?\nConsider this, most Americans spend an average of $1,500-2,000 a year on this bean water.\nI have a few question for you: \nHow has business been the past few months?\nDo you ever feel like your business is stuck?\nDon't feel like you're able to improve performance and make changes required to achieve success ?\nAre your customers spneding less and less and wanting more?\nHave the gas prices affected your business?\nDo you have employees and do they hate you or wish they could quit?\n\nNow, before you and I can decide wheter or not I will be a good fit for your business we should talk this over with coffee.\nAnd, just to warn you this isn't some casual thing. This is not a date or time to be personal or social (but by all means share what you will coz I'll gladly listen).\nTher eare two major talking points and stratagies we will focios on in our lil coffee social\nFor one, we will find your unique selling Proposition (USP).\nDo have the best price? Are you the cheapest in town? Are your customers jerks? Do you haVE REGULARS? Why do people come back?\nwe'll also look for the holes in your business bucket. I'm willing to bet there's a hole or two in your business we'll find together that'll make this 500 dollar cup of Joe pay for itse;f immedietly.\nMany find themselves to be more profitable by just finding out where the dollars are escaping in their business and I like to think of myself as a guy that comes along with some spakel or putty and patch those holes up for you.\nBeleive me, just fixing one hole can mean a lot...just think about a sinking boat that has a hole in it that's about 3\u201d in diameter... it doesn't take long to sink.\nI have no agenda, besides f=getting to know your business and seeing wher I can patch the holes and find what makes you do darn unique (I know this won't take long.)\nMany folks, I bet, will find what they need to get off their chest with a quick phone call and they just paypal me the money and make a coffee at home. Look, that's fine too.\nI just to get you ot of your comfort zone, because this is where it all starts my frind.\nSome smart GOAT entrepreneur will probably get everything they need just out of our lil mini consulatant for the more extensive business consukting I offer, and look, that's fine, too.\nMaybe this cup of coffee will be all you need to gtet back on your feet, but not only surive, but thrive!\nI'm not trying to make frineds, or make a bunch of money, or look down your shirt\nBut this is only going to be a 45 minute (max) coffee chat\nAnd, I know you (and me) both have a lot on our plates. So no messing around\nAfter our consultation I will follow up with you in a few days and see how things are going, then I will be emailing you about once or twice every two weeks, just to make sure you're staying on task and implementing what we went over.\nTghere is no obligation to go any further and will gladly give you back your money if this pre-consultation doesn't put you on the right path or you don't get any value out of it...",
-  "generated_text": " $ is a$ is a $ is a $ is a $ is a $ is a $$1, you\n$ $$ $\n$ $- $\n$\n$\n$ $$$\n$\n$\n$\n$\n$\n$\n$\n$???????, $\n-1\n$\n1.5.\n$\n$, you\n$.\n$\n1,1.1\nI\n$.\nI\n\n\nHow\n$,,,0,1,0,0.0\nIn\nThe\nThe\nThe\n",
-  "generated_tokens": [
-   1659,
-   1395,
-   1261,
-   1036,
-   1395,
-   1261,
-   1659,
-   1395,
-   1261,
-   1659,
-   1395,
-   1261,
-   1659,
-   1395,
-   1261,
-   1659,
-   1395,
-   1261,
-   1659,
-   1036,
-   1049,
-   1044,
-   1636,
-   1010,
-   1036,
-   1659,
-   1036,
-   1659,
-   1010,
-   1036,
-   1659,
-   1045,
-   1659,
-   1010,
-   1036,
-   1010,
-   1036,
-   1010,
-   1036,
-   1659,
-   1036,
-   1036,
-   1010,
-   1036,
-   1010,
-   1036,
-   1010,
-   1036,
-   1010,
-   1036,
-   1010,
-   1036,
-   1010,
-   1036,
-   1010,
-   1036,
-   1010,
-   1036,
-   1063,
-   1063,
-   1063,
-   1063,
-   1063,
-   1063,
-   1063,
-   1044,
-   1659,
-   1010,
-   1045,
-   1049,
-   1010,
-   1036,
-   1010,
-   1049,
-   1046,
-   1053,
-   1046,
-   1010,
-   1036,
-   1010,
-   1036,
-   1044,
-   1636,
-   1010,
-   1036,
-   1046,
-   1010,
-   1036,
-   1010,
-   1049,
-   1044,
-   1049,
-   1046,
-   1049,
-   1010,
-   1073,
-   1010,
-   1036,
-   1046,
-   1010,
-   1073,
-   1010,
-   1010,
-   1010,
-   7801,
-   1010,
-   1036,
-   1044,
-   1044,
-   1044,
-   1048,
-   1044,
-   1049,
-   1044,
-   1048,
-   1044,
-   1048,
-   1046,
-   1048,
-   1010,
-   1785,
-   1010,
-   1784,
-   1010,
-   1784,
-   1010,
-   1784,
-   1010
-  ],
-  "latency": 6.757228374481201,
-  "cuda_graph_request_count_map": null,
-  "step_count": 2048,
-  "logprobs": [
-   -7.7319135665893555,
-   -2.188307285308838,
-   -0.7547445297241211,
-   -0.7294313311576843,
-   -10.238386154174805,
-   -3.3775341510772705,
-   -6.394498825073242,
-   -7.354557037353516,
-   -9.018157958984375,
-   -3.012073040008545,
-   -3.2584073543548584,
-   -5.220732688903809,
-   -4.620487213134766,
-   -2.5078930854797363,
-   -3.752683162689209,
-   -0.13360372185707092,
-   -0.05705544352531433,
-   -0.41462242603302,
-   -1.585279941558838,
-   -1.6438164710998535,
-   -1.9557222127914429,
-   -0.3989897072315216,
-   -0.0365302674472332,
-   -6.368816375732422,
-   -0.8731719255447388,
-   -0.022585075348615646,
-   -0.2775891423225403,
-   -0.0027362785767763853,
-   -0.0006812873762100935,
-   -1.581446647644043,
-   -0.008688976056873798,
-   -0.3532317280769348,
-   -6.071163177490234,
-   -9.162371635437012,
-   -9.965556144714355,
-   -2.400461196899414,
-   -2.9898362159729004,
-   -2.9803032875061035,
-   -2.12601900100708,
-   -3.500912666320801,
-   -7.015069007873535,
-   -2.278961420059204,
-   -0.46380555629730225,
-   -4.078739166259766,
-   -1.9430254697799683,
-   -3.5642244815826416,
-   -3.689701795578003,
-   -6.201474189758301,
-   -6.580833911895752,
-   -2.3081111907958984,
-   -5.42717170715332,
-   -1.1886008977890015,
-   -1.172760248184204,
-   -1.3571951389312744,
-   -1.3551844358444214,
-   -3.376784324645996,
-   -0.05118789151310921,
-   -4.064360618591309,
-   -2.575554847717285,
-   -0.6994737386703491,
-   -2.56724214553833,
-   -2.1888976097106934,
-   -0.4816131591796875,
-   -4.070178985595703,
-   -2.0060782432556152,
-   -6.858033180236816,
-   -0.059200502932071686,
-   -3.214278221130371,
-   -0.9671833515167236,
-   -0.823198676109314,
-   -1.0130078792572021,
-   -4.595561981201172,
-   -0.012724989093840122,
-   -5.214311599731445,
-   -8.246870040893555,
-   -3.1476030349731445,
-   -3.299684524536133,
-   -4.218191146850586,
-   -7.318399429321289,
-   -0.8580498695373535,
-   -3.0894036293029785,
-   -1.886361002922058,
-   -7.217658996582031,
-   -3.271679639816284,
-   -3.9717154502868652,
-   -1.8835484981536865,
-   -10.034332275390625,
-   -11.382490158081055,
-   -5.417011260986328,
-   -7.505967140197754,
-   -2.33837890625,
-   -0.07904055714607239,
-   -3.294971227645874,
-   -7.813640594482422,
-   -1.7646901607513428,
-   -4.025320053100586,
-   -3.5977325439453125,
-   -4.390352249145508,
-   -9.147806167602539,
-   -0.5303041934967041,
-   -7.721246242523193,
-   -0.6311959028244019,
-   -0.8119025230407715,
-   -0.7227814197540283,
-   -1.8369406461715698,
-   -0.20933297276496887,
-   -1.5395950078964233,
-   -4.424448490142822,
-   -4.084965705871582,
-   -3.355497360229492,
-   -1.0475609302520752,
-   -6.479413986206055,
-   -0.7810530662536621,
-   -2.132437229156494,
-   -6.648703098297119,
-   -2.9522438049316406,
-   -1.2485712766647339,
-   -4.040503025054932,
-   -2.3415768146514893,
-   -5.358206748962402,
-   -1.6258506774902344,
-   -3.956300973892212,
-   -0.732298731803894,
-   -7.441117286682129,
-   -1.5242161750793457,
-   -2.4555861949920654,
-   -4.295163154602051,
-   -9.687600135803223,
-   -0.8213484883308411,
-   -1.2446978092193604,
-   -0.01942702941596508,
-   -4.619411468505859,
-   -3.3297007083892822,
-   -2.2139487266540527,
-   -3.691431999206543,
-   -2.6574106216430664,
-   -6.075929641723633,
-   -0.6123450994491577,
-   -1.2942559719085693,
-   -0.6262839436531067,
-   -7.398006439208984,
-   -4.4869890213012695,
-   -4.202048301696777,
-   -4.982994079589844,
-   -0.637227475643158,
-   -3.061023235321045,
-   -10.117584228515625,
-   -3.8567495346069336,
-   -4.0480828285217285,
-   -2.472019672393799,
-   -4.246374607086182,
-   -1.3939155340194702,
-   -7.132441520690918,
-   -0.20108745992183685,
-   -4.986658573150635,
-   -4.387957572937012,
-   -0.01108358334749937,
-   -4.209756851196289,
-   -7.271108627319336,
-   -4.047314643859863,
-   -2.6497321128845215,
-   -1.4763175249099731,
-   -0.28365400433540344,
-   -3.5247769355773926,
-   -1.4226995706558228,
-   -4.327237129211426,
-   -2.0407187938690186,
-   -6.1437907218933105,
-   -1.5190880298614502,
-   -2.5511486530303955,
-   -7.504094123840332,
-   -2.152172565460205,
-   -6.708334922790527,
-   -6.913146495819092,
-   -3.6959621906280518,
-   -6.752341270446777,
-   -0.63083815574646,
-   -0.12433214485645294,
-   -5.0525641441345215,
-   -4.435934066772461,
-   -0.45601028203964233,
-   -6.3459577560424805,
-   -9.882917404174805,
-   -3.1422882080078125,
-   -2.550520658493042,
-   -3.2099051475524902,
-   -6.278127193450928,
-   -0.07764133810997009,
-   -3.155696153640747,
-   -1.933587670326233,
-   -9.61027717590332,
-   -6.211391925811768,
-   -4.664543151855469,
-   -6.783782005310059,
-   -5.676271438598633,
-   -8.605900764465332,
-   -0.0824289619922638,
-   -3.5463995933532715,
-   -13.374168395996094,
-   -1.2401021718978882,
-   -1.8734056949615479,
-   -3.4154422283172607,
-   -1.6733763217926025,
-   -17.633970260620117,
-   -9.345113754272461,
-   -0.6277351975440979,
-   -2.9617538452148438,
-   -2.5565333366394043,
-   -10.10580825805664,
-   -7.130337715148926,
-   -7.36820125579834,
-   -4.098911285400391,
-   -5.747079372406006,
-   -2.945054769515991,
-   -0.7887389063835144,
-   -1.6583149433135986,
-   -1.0165244340896606,
-   -6.581666946411133,
-   -5.926386833190918,
-   -5.845194339752197,
-   -0.9657630920410156,
-   -7.868755340576172,
-   -1.3244551420211792,
-   -0.2657390236854553,
-   -0.06403665244579315,
-   -2.983020782470703,
-   -5.943899631500244,
-   -7.877285957336426,
-   -3.593116283416748,
-   -3.819509506225586,
-   -7.226177215576172,
-   -2.5206997394561768,
-   -3.385587215423584,
-   -0.37499159574508667,
-   -1.4698283672332764,
-   -3.1460342407226562,
-   -0.0077166082337498665,
-   -4.350916862487793,
-   -3.2183218002319336,
-   -0.6242184638977051,
-   -1.4782464504241943,
-   -2.8054311275482178,
-   -3.0831401348114014,
-   -12.17662525177002,
-   -2.113419532775879,
-   -1.6448111534118652,
-   -2.1834323406219482,
-   -0.7630388140678406,
-   -10.1896390914917,
-   -6.234405517578125,
-   -11.46288776397705,
-   -1.003785490989685,
-   -4.211658477783203,
-   -1.5010679960250854,
-   -5.859302043914795,
-   -2.0465080738067627,
-   -3.7468819618225098,
-   -4.684195518493652,
-   -4.318704128265381,
-   -2.7234389781951904,
-   -9.00437068939209,
-   -3.043811321258545,
-   -3.1384406089782715,
-   -2.713779926300049,
-   -2.095993995666504,
-   -2.1484954357147217,
-   -10.274479866027832,
-   -0.682350754737854,
-   -0.25973302125930786,
-   -3.6964316368103027,
-   -13.434456825256348,
-   -2.3368239402770996,
-   -5.382724761962891,
-   -1.9073458909988403,
-   -5.905669212341309,
-   -0.032165709882974625,
-   -1.6530004739761353,
-   -2.728893280029297,
-   -1.640552043914795,
-   -1.1391171216964722,
-   -1.4353511333465576,
-   -4.003787994384766,
-   -0.3450564742088318,
-   -0.7168521285057068,
-   -0.34650325775146484,
-   -0.3616408705711365,
-   -7.062709331512451,
-   -1.2851682901382446,
-   -2.299129009246826,
-   -8.800156593322754,
-   -5.208735466003418,
-   -4.780910491943359,
-   -2.78342342376709,
-   -4.469717979431152,
-   -6.909726619720459,
-   -2.5114197731018066,
-   -0.659822404384613,
-   -0.6915416121482849,
-   -3.2363741397857666,
-   -0.5283617377281189,
-   -0.10473938286304474,
-   -6.215325832366943,
-   -7.283237934112549,
-   -1.6797031164169312,
-   -11.50100040435791,
-   -7.5822978019714355,
-   -3.387317657470703,
-   -11.407575607299805,
-   -5.441976547241211,
-   -3.3264851570129395,
-   -0.7265786528587341,
-   -1.382750153541565,
-   -7.841699600219727,
-   -8.105277061462402,
-   -3.9569506645202637,
-   -4.963083267211914,
-   -0.5492897629737854,
-   -4.6081390380859375,
-   -5.870400905609131,
-   -3.957930088043213,
-   -5.275494575500488,
-   -4.105091094970703,
-   -2.15435528755188,
-   -2.8472700119018555,
-   -1.1278448104858398,
-   -8.226571083068848,
-   -0.40629008412361145,
-   -9.916461944580078,
-   -4.616743087768555,
-   -1.691868543624878,
-   -0.6639478802680969,
-   -2.5716753005981445,
-   -6.676954746246338,
-   -6.535329818725586,
-   -0.4170510768890381,
-   -1.443942904472351,
-   -3.145481824874878,
-   -1.440589427947998,
-   -0.26935356855392456,
-   -0.9647155404090881,
-   -4.335958957672119,
-   -1.5647850036621094,
-   -5.890466690063477,
-   -3.01654052734375,
-   -1.9168468713760376,
-   -3.7365682125091553,
-   -8.001864433288574,
-   -10.680083274841309,
-   -4.489352226257324,
-   -4.6058149337768555,
-   -7.69011116027832,
-   -3.6247005462646484,
-   -1.5600426197052002,
-   -10.2160062789917,
-   -5.004643440246582,
-   -0.19602319598197937,
-   -3.375545024871826,
-   -2.669325590133667,
-   -1.3932737112045288,
-   -1.6410658359527588,
-   -6.847603797912598,
-   -6.744344711303711,
-   -0.5215591192245483,
-   -0.25840020179748535,
-   -1.1448237895965576,
-   -5.57253885269165,
-   -7.251138687133789,
-   -4.221924781799316,
-   -0.7688062787055969,
-   -2.504502534866333,
-   -3.146519660949707,
-   -2.206653356552124,
-   -1.4295082092285156,
-   -7.96943998336792,
-   -4.332189083099365,
-   -2.5750505924224854,
-   -1.7102608680725098,
-   -5.311381816864014,
-   -8.897522926330566,
-   -2.994919538497925,
-   -3.3397974967956543,
-   -2.1794328689575195,
-   -2.437566041946411,
-   -0.3181810975074768,
-   -0.27412793040275574,
-   -0.7914466857910156,
-   -2.3470635414123535,
-   -2.4099245071411133,
-   -2.491870880126953,
-   -3.024170160293579,
-   -1.9719040393829346,
-   -11.373910903930664,
-   -1.4279751777648926,
-   -0.14573107659816742,
-   -2.055763006210327,
-   -6.366893291473389,
-   -4.24091911315918,
-   -0.00709194503724575,
-   -2.0199716091156006,
-   -2.524750232696533,
-   -1.4272525310516357,
-   -0.5185190439224243,
-   -2.927150011062622,
-   -2.7070627212524414,
-   -3.365638017654419,
-   -4.318085193634033,
-   -7.773144721984863,
-   -1.7947180271148682,
-   -7.657534599304199,
-   -8.767786026000977,
-   -14.74280071258545,
-   -1.8042558431625366,
-   -3.2712037563323975,
-   -1.4002125263214111,
-   -4.887944221496582,
-   -1.4821010828018188,
-   -1.5255622863769531,
-   -5.879070281982422,
-   -4.463839530944824,
-   -5.1955976486206055,
-   -5.665647506713867,
-   -0.3775045573711395,
-   -5.9350481033325195,
-   -2.800539255142212,
-   -0.13162286579608917,
-   -3.034379720687866,
-   -4.729524612426758,
-   -4.6252641677856445,
-   -3.850942611694336,
-   -2.4760568141937256,
-   -6.059760093688965,
-   -10.12075138092041,
-   -0.9469369649887085,
-   -11.595907211303711,
-   -6.875324726104736,
-   -4.268826007843018,
-   -2.835529088973999,
-   -3.8626279830932617,
-   -4.876199245452881,
-   -0.013071090914309025,
-   -4.964417934417725,
-   -0.7445687055587769,
-   -5.707155227661133,
-   -6.10660457611084,
-   -4.317755699157715,
-   -4.440443992614746,
-   -2.9202542304992676,
-   -4.743522644042969,
-   -1.2569392919540405,
-   -2.8675737380981445,
-   -2.3151841163635254,
-   -4.318130970001221,
-   -1.9054772853851318,
-   -1.1808521747589111,
-   -0.765956461429596,
-   -2.768916606903076,
-   -6.237791061401367,
-   -1.7224305868148804,
-   -7.137521743774414,
-   -4.512486457824707,
-   -1.9069950580596924,
-   -4.145983695983887,
-   -5.365190505981445,
-   -0.059828490018844604,
-   -2.273892879486084,
-   -3.4013004302978516,
-   -5.035730361938477,
-   -6.501443386077881,
-   -9.903446197509766,
-   -1.6332892179489136,
-   -2.1572084426879883,
-   -1.6149548292160034,
-   -1.4698481559753418,
-   -6.01010799407959,
-   -2.2243528366088867,
-   -6.900836944580078,
-   -6.0930986404418945,
-   -2.974020481109619,
-   -3.225423574447632,
-   -8.423272132873535,
-   -1.3423724174499512,
-   -3.626147508621216,
-   -0.4862469434738159,
-   -6.860866546630859,
-   -3.8910953998565674,
-   -2.33319354057312,
-   -1.7229185104370117,
-   -2.215972423553467,
-   -8.99046516418457,
-   -4.099084854125977,
-   -2.4191012382507324,
-   -8.288970947265625,
-   -2.9641928672790527,
-   -1.5036451816558838,
-   -3.0544614791870117,
-   -0.0715634673833847,
-   -2.444031238555908,
-   -4.520998954772949,
-   -3.972568988800049,
-   -0.4985870122909546,
-   -2.1651363372802734,
-   -3.4427435398101807,
-   -1.730639100074768,
-   -0.9458961486816406,
-   -7.740211009979248,
-   -9.39163875579834,
-   -3.895984172821045,
-   -1.7523534297943115,
-   -5.41331672668457,
-   -8.910720825195312,
-   -12.971094131469727,
-   -3.0455880165100098,
-   -10.501265525817871,
-   -3.3864927291870117,
-   -4.842309951782227,
-   -3.9964733123779297,
-   -7.3046793937683105,
-   -2.6607093811035156,
-   -1.3541781902313232,
-   -5.003270626068115,
-   -3.944551944732666,
-   -0.11356143653392792,
-   -5.174440383911133,
-   -9.628616333007812,
-   -8.654989242553711,
-   -8.980416297912598,
-   -6.670101642608643,
-   -5.488286018371582,
-   -5.943419933319092,
-   -2.126483201980591,
-   -8.054739952087402,
-   -7.458671569824219,
-   -2.5267202854156494,
-   -6.455472946166992,
-   -8.655346870422363,
-   -7.903901100158691,
-   -6.221062660217285,
-   -7.129237174987793,
-   -4.2345380783081055,
-   -2.5375306606292725,
-   -7.697700500488281,
-   -1.567080020904541,
-   -2.084331750869751,
-   -0.25020831823349,
-   -1.5145041942596436,
-   -4.619244575500488,
-   -0.2970108985900879,
-   -0.4977554678916931,
-   -6.197869300842285,
-   -4.030620098114014,
-   -7.232107639312744,
-   -0.21076253056526184,
-   -1.563366174697876,
-   -1.133756160736084,
-   -2.708237648010254,
-   -4.080535888671875,
-   -0.6818401217460632,
-   -0.1864331066608429,
-   -0.49012088775634766,
-   -8.732468605041504,
-   -11.945040702819824,
-   -5.243098735809326,
-   -1.5294703245162964,
-   -0.8935543298721313,
-   -0.6174070835113525,
-   -1.5068217515945435,
-   -3.5766501426696777,
-   -5.393096923828125,
-   -4.202867031097412,
-   -14.765748023986816,
-   -5.2513813972473145,
-   -0.7597705721855164,
-   -0.2502063810825348,
-   -1.7403976917266846,
-   -2.8000779151916504,
-   -1.9808133840560913,
-   -2.1654744148254395,
-   -1.8629226684570312,
-   -3.222038745880127,
-   -0.040942225605249405,
-   -2.3384013175964355,
-   -10.210381507873535,
-   -4.5859761238098145,
-   -0.5805734395980835,
-   -3.7019288539886475,
-   -2.001936674118042,
-   -2.7876083850860596,
-   -2.9799084663391113,
-   -4.349887371063232,
-   -0.0792960673570633,
-   -1.4366114139556885,
-   -1.0813264846801758,
-   -1.3510822057724,
-   -6.7060699462890625,
-   -5.436615943908691,
-   -3.978389263153076,
-   -6.785447597503662,
-   -6.147171497344971,
-   -3.97414231300354,
-   -4.332991600036621,
-   -0.9269428253173828,
-   -5.1237101554870605,
-   -4.486598968505859,
-   -0.04678357392549515,
-   -1.0307552814483643,
-   -1.4249452352523804,
-   -4.517682075500488,
-   -3.561821699142456,
-   -2.0815205574035645,
-   -0.6041194200515747,
-   -5.992964744567871,
-   -7.092092514038086,
-   -0.48916709423065186,
-   -2.6405677795410156,
-   -4.3345723152160645,
-   -3.533582925796509,
-   -3.1233346462249756,
-   -3.107872486114502,
-   -1.9901115894317627,
-   -3.1052846908569336,
-   -1.8440347909927368,
-   -6.21368408203125,
-   -1.8796799182891846,
-   -2.705214738845825,
-   -0.2987763583660126,
-   -4.070865154266357,
-   -1.6675832271575928,
-   -1.3896636962890625,
-   -1.5731089115142822,
-   -3.526170015335083,
-   -2.5088443756103516,
-   -1.208929419517517,
-   -3.673125743865967,
-   -2.501532554626465,
-   -6.875064373016357,
-   -8.512459754943848,
-   -1.042314052581787,
-   -3.657850980758667,
-   -7.0950798988342285,
-   -4.974049091339111,
-   -8.14085578918457,
-   -3.529888153076172,
-   -1.9389504194259644,
-   -7.0902204513549805,
-   -2.409292459487915,
-   -2.9428021907806396,
-   -1.688283085823059,
-   -3.622368335723877,
-   -2.0903351306915283,
-   -4.160663604736328,
-   -3.1683764457702637,
-   -1.2135626077651978,
-   -7.566033363342285,
-   -3.1186251640319824,
-   -5.899919509887695,
-   -0.9518840312957764,
-   -2.656729221343994,
-   -2.2994377613067627,
-   -6.806836128234863,
-   -1.280236840248108,
-   -2.838846206665039,
-   -1.3598848581314087,
-   -11.707776069641113,
-   -3.134333372116089,
-   -0.6230669617652893,
-   -8.219222068786621,
-   -7.562507152557373,
-   -7.489459037780762,
-   -1.5368008613586426,
-   -7.149652481079102,
-   -5.749268054962158,
-   -3.162869691848755,
-   -2.7235195636749268,
-   -6.128931999206543,
-   -1.1934199333190918,
-   -3.986410617828369,
-   -3.76609468460083,
-   -1.712721586227417,
-   -3.195504903793335,
-   -8.397743225097656,
-   -3.1260581016540527,
-   -9.792022705078125,
-   -4.217884540557861,
-   -11.583260536193848,
-   -5.987588882446289,
-   -5.178754806518555,
-   -6.994749069213867,
-   -5.167606353759766,
-   -7.124668121337891,
-   -6.201416015625,
-   -10.203682899475098,
-   -6.858526229858398,
-   -2.733592987060547,
-   -5.078882217407227,
-   -9.003358840942383,
-   -4.704894542694092,
-   -3.9085562229156494,
-   -7.247268199920654,
-   -7.091092109680176,
-   -4.4150166511535645,
-   -7.56699275970459,
-   -9.485116004943848,
-   -1.9977033138275146,
-   -6.65272331237793,
-   -2.236643075942993,
-   -7.518955707550049,
-   -5.525973320007324,
-   -4.67877721786499,
-   -6.608670234680176,
-   -5.536133766174316,
-   -10.772479057312012,
-   -10.8853178024292,
-   -3.6156129837036133,
-   -6.751470565795898,
-   -6.4537434577941895,
-   -3.4220399856567383,
-   -8.251005172729492,
-   -3.2146153450012207,
-   -6.330069541931152,
-   -1.5551663637161255,
-   -6.520583629608154,
-   -10.450878143310547,
-   -5.8788957595825195,
-   -3.7398200035095215,
-   -3.9084208011627197,
-   -0.3640081584453583,
-   -6.961522102355957,
-   -6.066243648529053,
-   -7.270624160766602,
-   -5.098455429077148,
-   -2.7642822265625,
-   -5.460171699523926,
-   -7.362828731536865,
-   -2.558631658554077,
-   -2.186410427093506,
-   -2.5309929847717285,
-   -2.46756649017334,
-   -2.0306026935577393,
-   -1.8713470697402954,
-   -2.108008623123169,
-   -1.2698389291763306,
-   -2.1712756156921387,
-   -2.4432802200317383,
-   -1.1477653980255127,
-   -1.8417484760284424,
-   -2.5971946716308594,
-   -1.8250831365585327,
-   -2.103092670440674,
-   -2.5183165073394775,
-   -2.9367291927337646,
-   -1.9412965774536133,
-   -1.7692793607711792,
-   -2.864521026611328,
-   -3.1332175731658936,
-   -1.098311185836792,
-   -2.946441173553467,
-   -2.2800471782684326,
-   -3.1929852962493896,
-   -2.754260778427124,
-   -3.485616445541382,
-   -3.3010287284851074,
-   -2.5537776947021484,
-   -2.6752865314483643,
-   -3.1617612838745117,
-   -2.4571690559387207,
-   -2.060081958770752,
-   -2.425969362258911,
-   -2.212725877761841,
-   -2.4232254028320312,
-   -3.0587053298950195,
-   -2.4074010848999023,
-   -2.457937479019165,
-   -2.319617986679077,
-   -2.6340954303741455,
-   -2.599524736404419,
-   -2.5302212238311768,
-   -1.6849274635314941,
-   -2.2609786987304688,
-   -2.039928674697876,
-   -1.9474098682403564,
-   -2.3550753593444824,
-   -1.718749761581421,
-   -2.413884162902832,
-   -1.6247628927230835,
-   -2.4784040451049805,
-   -1.828325629234314,
-   -1.3880831003189087,
-   -1.4448199272155762,
-   -1.1477117538452148,
-   -1.1669728755950928,
-   -1.8787822723388672,
-   -1.5565840005874634,
-   -1.6666553020477295,
-   -1.747725248336792,
-   -1.959598422050476,
-   -2.0376486778259277,
-   -2.345367431640625,
-   -2.055098533630371,
-   -1.3940613269805908,
-   -3.4385242462158203,
-   -2.7489635944366455,
-   -3.2590157985687256,
-   -3.1128957271575928,
-   -1.7070379257202148,
-   -3.9010369777679443,
-   -3.21574068069458,
-   -3.3850393295288086,
-   -1.8778185844421387,
-   -2.698211908340454,
-   -1.8060741424560547,
-   -2.0845324993133545,
-   -3.4797585010528564,
-   -2.263254404067993,
-   -3.083108901977539,
-   -1.6589758396148682,
-   -2.687279224395752,
-   -1.77505624294281,
-   -2.6142921447753906,
-   -1.934045672416687,
-   -1.8834377527236938,
-   -2.8038980960845947,
-   -1.550542950630188,
-   -3.4054152965545654,
-   -1.724036693572998,
-   -2.3146564960479736,
-   -1.5134503841400146,
-   -2.9289023876190186,
-   -1.5285141468048096,
-   -3.421035051345825,
-   -1.3757282495498657,
-   -3.441431760787964,
-   -1.5286564826965332,
-   -3.4372904300689697,
-   -3.173043966293335,
-   -1.1313854455947876,
-   -1.486415147781372,
-   -2.506413221359253,
-   -3.5165903568267822,
-   -1.4112176895141602,
-   -3.7175354957580566,
-   -2.2170844078063965,
-   -0.704839289188385,
-   -0.6626103520393372,
-   -2.5483946800231934,
-   -0.949668288230896,
-   -3.1339564323425293,
-   -1.3326977491378784,
-   -3.5493476390838623,
-   -0.6785370111465454,
-   -3.236161470413208,
-   -2.1347084045410156,
-   -3.802447557449341,
-   -2.585503339767456,
-   -3.9361765384674072,
-   -2.290905714035034,
-   -3.714280605316162,
-   -1.793616533279419,
-   -3.7252492904663086,
-   -1.450188159942627,
-   -3.11938738822937,
-   -1.25174880027771
-  ]
- },
- "32": {
-  "input_prompt": "create a conversational article",
-  "generated_text": " about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article.",
-  "generated_tokens": [
-   2314,
-   1278,
-   17915,
-   1307,
-   1278,
-   9369,
-   1046,
-   1531,
-   9369,
-   2715,
-   1402,
-   2314,
-   1278,
-   17915,
-   1307,
-   1278,
-   9369,
-   1046,
-   1531,
-   9369,
-   2715,
-   1402,
-   2314,
-   1278,
-   17915,
-   1307,
-   1278,
-   9369,
-   1046,
-   1531,
-   9369,
-   2715,
-   1402,
-   2314,
-   1278,
-   17915,
-   1307,
-   1278,
-   9369,
-   1046,
-   1531,
-   9369,
-   2715,
-   1402,
-   2314,
-   1278,
-   17915,
-   1307,
-   1278,
-   9369,
-   1046,
-   1531,
-   9369,
-   2715,
-   1402,
-   2314,
-   1278,
-   17915,
-   1307,
-   1278,
-   9369,
-   1046,
-   1531,
-   9369,
-   2715,
-   1402,
-   2314,
-   1278,
-   17915,
-   1307,
-   1278,
-   9369,
-   1046,
-   1531,
-   9369,
-   2715,
-   1402,
-   2314,
-   1278,
-   17915,
-   1307,
-   1278,
-   9369,
-   1046,
-   1531,
-   9369,
-   2715,
-   1402,
-   2314,
-   1278,
-   17915,
-   1307,
-   1278,
-   9369,
-   1046,
-   1531,
-   9369,
-   2715,
-   1402,
-   2314,
-   1278,
-   17915,
-   1307,
-   1278,
-   9369,
-   1046,
-   1531,
-   9369,
-   2715,
-   1402,
-   2314,
-   1278,
-   17915,
-   1307,
-   1278,
-   9369,
-   1046,
-   1531,
-   9369,
-   2715,
-   1402,
-   2314,
-   1278,
-   17915,
-   1307,
-   1278,
-   9369,
-   1046
-  ],
-  "latency": 20.777740478515625,
-  "cuda_graph_request_count_map": null,
-  "step_count": 2048,
-  "logprobs": [
-   -4.4165568351745605,
-   -11.358176231384277,
-   -0.0701780766248703,
-   -7.797665119171143,
-   -2.6805992126464844,
-   -1.4707680940628052,
-   -3.0390255451202393,
-   -1.6902849674224854,
-   -1.270594835281372,
-   -1.1936196088790894,
-   -1.2523558139801025,
-   -2.7270259857177734,
-   -1.2371309995651245,
-   -0.9618493318557739,
-   -0.4379909038543701,
-   -1.3917063474655151,
-   -1.1055524349212646,
-   -0.9122569561004639,
-   -0.9911308288574219,
-   -0.08436793833971024,
-   -0.5424078106880188,
-   -0.9181017279624939,
-   -0.5873759388923645,
-   -0.19014373421669006,
-   -0.06655456870794296,
-   -0.15252672135829926,
-   -0.09415211528539658,
-   -0.009787309914827347,
-   -0.013910251669585705,
-   -0.005296128336340189,
-   -0.005677408073097467,
-   -0.02013739012181759,
-   -0.21594694256782532,
-   -0.07153760641813278,
-   -0.0066444179974496365,
-   -0.010198505595326424,
-   -0.011980246752500534,
-   -0.003686776151880622,
-   -0.0037619550712406635,
-   -0.0022467151284217834,
-   -0.004088377580046654,
-   -0.021828632801771164,
-   -0.0012669878778979182,
-   -0.09768074005842209,
-   -0.02652405947446823,
-   -0.0019286142196506262,
-   -0.002283824374899268,
-   -0.0032225127797573805,
-   -0.0009741804678924382,
-   -0.0009415484382770956,
-   -0.001211624126881361,
-   -0.001135300612077117,
-   -0.002340436913073063,
-   -0.0010846928926184773,
-   -0.0509282611310482,
-   -0.03832047060132027,
-   -0.00257422705180943,
-   -0.0022806129418313503,
-   -0.00262785074301064,
-   -0.0008195855189114809,
-   -0.0010239601833745837,
-   -0.0013777059502899647,
-   -0.0009899006690829992,
-   -0.0018756669014692307,
-   -0.0015304292319342494,
-   -0.08506463468074799,
-   -0.01893703266978264,
-   -0.0013797297142446041,
-   -0.0014461545506492257,
-   -0.0013971101725474,
-   -0.0005869334563612938,
-   -0.0005212855176068842,
-   -0.000876757490914315,
-   -0.0005256939912214875,
-   -0.0012863941956311464,
-   -0.0015691122971475124,
-   -0.051276568323373795,
-   -0.00973513163626194,
-   -0.0010469438275322318,
-   -0.0011531615164130926,
-   -0.0009969270322471857,
-   -0.00038342276820912957,
-   -0.0004032037395518273,
-   -0.000730247818864882,
-   -0.0003275334893260151,
-   -0.0008700875914655626,
-   -0.0017572689102962613,
-   -0.044393111020326614,
-   -0.013102858327329159,
-   -0.0011463745031505823,
-   -0.0012070996453985572,
-   -0.0012325793504714966,
-   -0.0005048430757597089,
-   -0.0004876854654867202,
-   -0.0007901645149104297,
-   -0.00041500062798149884,
-   -0.0009869233472272754,
-   -0.0018687656847760081,
-   -0.03943866863846779,
-   -0.014425630681216717,
-   -0.0014756753807887435,
-   -0.001423775334842503,
-   -0.001209719106554985,
-   -0.0005046047735959291,
-   -0.00042167355422861874,
-   -0.0007688426994718611,
-   -0.0002699726028367877,
-   -0.0006598440813831985,
-   -0.0017849955474957824,
-   -0.038999658077955246,
-   -0.012665312737226486,
-   -0.0014427024871110916,
-   -0.0014492495683953166,
-   -0.001016576774418354,
-   -0.00042083943844772875,
-   -0.00033241944038309157,
-   -0.0006403064471669495,
-   -0.00022373080719262362,
-   -0.0007053509471006691,
-   -0.0018597226589918137,
-   -0.030997740104794502,
-   -0.011259939521551132,
-   -0.0012655591126531363,
-   -0.00134151556994766,
-   -0.0008106521563604474,
-   -0.0003513672563713044,
-   -0.0002964295563288033,
-   -0.0006368515896610916,
-   -0.00020180096908006817,
-   -0.0005779979983344674,
-   -0.0016014858847483993,
-   -0.0271126888692379
-  ]
- },
- "64": {
-  "input_prompt": "App Concept: \"Eggy's Interactive Adventure World\"\n\nEggy's Interactive Adventure World is a cutting-edge app designed for phones and tablets that combines immersive storytelling with engaging, interactive experiences for both parents and children. This innovative app allows users to explore the world of Eggy and participate in a series of adventures that teach essential life skills, foster personal growth, and strengthen parent-child bonds.\n\nKey Features:\n\n1. Interactive Stories: The app brings the 20 Eggy stories to life through interactive, animated storytelling. Users can read, listen, and actively participate in the stories, making choices that guide Eggy through various adventures, each with unique lessons and experiences.\n2. Augmented Reality (AR) Integration: The app utilizes AR technology, allowing users to interact with Eggy and the story characters in their real-world environment. This immersive experience creates a deeper connection with the stories and encourages engagement.\n3. Personalized Adventures: The app customizes each story based on the user's interests, needs, and personal growth areas. By collecting user input, the app tailors the storylines and learning outcomes to ensure that each adventure resonates with the child's unique experiences.\n4. Parent-Child Cooperative Play: The app encourages parents and children to collaborate and engage in the stories together. Through a split-screen mode or multi-device connectivity, parents and children can actively participate in the stories and learn essential life skills together.\n5. Skill-Building Mini-Games: Interspersed throughout the stories are educational mini-games that reinforce the lessons and themes. These games are designed to be fun, engaging, and accessible to children, encouraging them to practice and develop various skills.\n6. Progress Tracking and Rewards: The app tracks the user's progress through the stories and rewards them with badges, trophies, and other collectibles. This gamification aspect encourages continued engagement and fosters a sense of accomplishment and growth.\n7. Community Features: The app provides a safe, moderated online community where parents can share their experiences, discuss the stories and lessons, and connect with other families who are also using the app. This community fosters support, inspiration, and learning.\n8. Expert Resources: The app includes access to resources from experts in child development, psychology, and education, providing parents with additional tools and guidance on fostering the growth and well-being of their children.\n\nEggy's Interactive Adventure World is a one-of-a-kind, mind-blowing app that revolutionizes the way parents and children engage with stories and learn essential life skills. Combining the power of storytelling, cutting-edge technology, and expert resources, this app is a must-have for families looking to strengthen their bonds and support their children's personal growth.",
-  "generated_text": "</s>The 1999\u20132000 season was the 10th season of the National Hockey League (NHL). The 1999\u20132000 season was the 10th season of the National Hockey League (NHL). The 1999\u20132000 season was the 10th season of the National Hockey League (NHL). The 1999\u20132000 season was the 10th season was the 10th season was the 10 season was the 10 season was the 10",
-  "generated_tokens": [
-   2,
-   1784,
-   1032,
-   1049,
-   1057,
-   1057,
-   1057,
-   1882,
-   1050,
-   1048,
-   1048,
-   1048,
-   5526,
-   1486,
-   1278,
-   1032,
-   1049,
-   1048,
-   1411,
-   5526,
-   1307,
-   1278,
-   5805,
-   46982,
-   8525,
-   1319,
-   19644,
-   1076,
-   1577,
-   1531,
-   1032,
-   1049,
-   1057,
-   1057,
-   1057,
-   1882,
-   1050,
-   1048,
-   1048,
-   1048,
-   5526,
-   1486,
-   1278,
-   1032,
-   1049,
-   1048,
-   1411,
-   5526,
-   1307,
-   1278,
-   5805,
-   46982,
-   8525,
-   1319,
-   19644,
-   1076,
-   1577,
-   1531,
-   1032,
-   1049,
-   1057,
-   1057,
-   1057,
-   1882,
-   1050,
-   1048,
-   1048,
-   1048,
-   5526,
-   1486,
-   1278,
-   1032,
-   1049,
-   1048,
-   1411,
-   5526,
-   1307,
-   1278,
-   5805,
-   46982,
-   8525,
-   1319,
-   19644,
-   1076,
-   1577,
-   1531,
-   1032,
-   1049,
-   1057,
-   1057,
-   1057,
-   1882,
-   1050,
-   1048,
-   1048,
-   1048,
-   5526,
-   1486,
-   1278,
-   1032,
-   1049,
-   1048,
-   1411,
-   5526,
-   1486,
-   1278,
-   1032,
-   1049,
-   1048,
-   1411,
-   5526,
-   1486,
-   1278,
-   1032,
-   1049,
-   1048,
-   5526,
-   1486,
-   1278,
-   1032,
-   1049,
-   1048,
-   5526,
-   1486,
-   1278,
-   1032,
-   1049,
-   1048
-  ],
-  "latency": 35.2662193775177,
-  "cuda_graph_request_count_map": null,
-  "step_count": 2048,
-  "logprobs": [
-   -12.107745170593262,
-   -2.9727728366851807,
-   -3.720092535018921,
-   -5.592433929443359,
-   -10.964235305786133,
-   -3.654498338699341,
-   -9.33439826965332,
-   -4.833785057067871,
-   -5.187321662902832,
-   -2.6944785118103027,
-   -6.9262237548828125,
-   -0.654232919216156,
-   -0.5550781488418579,
-   -0.21346639096736908,
-   -0.0134271876886487,
-   -0.010840100236237049,
-   -1.3878544569015503,
-   -0.6296291351318359,
-   -7.9766011238098145,
-   -0.4393192231655121,
-   -5.639142036437988,
-   -3.277270793914795,
-   -1.0206468105316162,
-   -11.703084945678711,
-   -0.7100943922996521,
-   -0.2809169888496399,
-   -2.771284818649292,
-   -7.190817832946777,
-   -4.048691749572754,
-   -0.012056218460202217,
-   -3.3802318572998047,
-   -0.6807184815406799,
-   -3.4844107627868652,
-   -3.312331199645996,
-   -0.5001641511917114,
-   -2.61255145072937,
-   -4.243694305419922,
-   -4.333778381347656,
-   -6.0625810623168945,
-   -0.011777156963944435,
-   -0.37577226758003235,
-   -0.9490834474563599,
-   -3.5450198650360107,
-   -2.1778035163879395,
-   -0.45957911014556885,
-   -3.00771164894104,
-   -1.7600425481796265,
-   -0.09766030311584473,
-   -2.467618942260742,
-   -1.329679012298584,
-   -0.8384320735931396,
-   -1.1864604949951172,
-   -3.628342866897583,
-   -0.2470003068447113,
-   -1.8938640356063843,
-   -5.168431282043457,
-   -0.05005566030740738,
-   -2.258014678955078,
-   -2.449028968811035,
-   -0.0034086955711245537,
-   -3.9485883712768555,
-   -1.6201664209365845,
-   -5.139942646026611,
-   -4.859354496002197,
-   -0.23686674237251282,
-   -0.5541543364524841,
-   -2.5826025009155273,
-   -6.114635467529297,
-   -4.3380208015441895,
-   -0.7412900924682617,
-   -0.3221715986728668,
-   -0.13805493712425232,
-   -4.1797332763671875,
-   -7.3456268310546875,
-   -0.13762745261192322,
-   -2.0905232429504395,
-   -1.0178627967834473,
-   -4.108260631561279,
-   -0.6007124185562134,
-   -1.0410642623901367,
-   -4.122039794921875,
-   -0.35905471444129944,
-   -1.4274661540985107,
-   -4.139932155609131,
-   -0.4237431585788727,
-   -1.6294409036636353,
-   -0.9811424016952515,
-   -4.132790565490723,
-   -1.1318120956420898,
-   -6.8258256912231445,
-   -1.5455098152160645,
-   -0.6984409093856812,
-   -13.664215087890625,
-   -0.1166313961148262,
-   -1.6347849369049072,
-   -0.28875046968460083,
-   -0.03130083531141281,
-   -1.5293006896972656,
-   -1.6488375663757324,
-   -4.224111557006836,
-   -4.760683059692383,
-   -1.9758747816085815,
-   -1.5828256607055664,
-   -2.8463857173919678,
-   -0.2620386481285095,
-   -1.7243889570236206,
-   -1.7945923805236816,
-   -0.8884308338165283,
-   -0.3766394555568695,
-   -0.34033581614494324,
-   -9.05566692352295,
-   -0.22754782438278198,
-   -0.033802058547735214,
-   -0.34108465909957886,
-   -0.5644669532775879,
-   -2.0925779342651367,
-   -4.547505855560303,
-   -10.870464324951172,
-   -1.1072022914886475,
-   -5.503787994384766,
-   -3.259672164916992,
-   -0.007964519783854485,
-   -3.0111639499664307,
-   -4.246737480163574,
-   -0.7813188433647156,
-   -3.331031322479248,
-   -4.485962867736816,
-   -0.9492117166519165,
-   -2.6757047176361084,
-   -1.1591349840164185,
-   -1.122117519378662,
-   -2.629878044128418,
-   -5.986321926116943,
-   -0.2146703153848648,
-   -0.002392764901742339,
-   -7.372479438781738,
-   -0.007077385671436787,
-   -0.06599216908216476,
-   -0.0970711037516594,
-   -3.2874932289123535,
-   -0.0019583588000386953,
-   -0.9122000336647034,
-   -4.930907249450684,
-   -0.019508399069309235,
-   -0.308611661195755,
-   -0.07778516411781311,
-   -3.8497893810272217,
-   -0.46124517917633057,
-   -0.38821348547935486,
-   -2.668412208557129,
-   -1.845987319946289,
-   -0.06470083445310593,
-   -0.006619549356400967,
-   -1.2610487937927246,
-   -0.13015533983707428,
-   -3.365312099456787,
-   -0.0014690094394609332,
-   -1.6789823770523071,
-   -1.2499005794525146,
-   -3.3992111682891846,
-   -5.563300132751465,
-   -0.823418140411377,
-   -4.24124813079834,
-   -1.6597849130630493,
-   -0.6941139698028564,
-   -1.5637556314468384,
-   -0.5482053756713867,
-   -0.9507225751876831,
-   -3.764758586883545,
-   -0.0006518622976727784,
-   -0.7540555000305176,
-   -5.058262825012207,
-   -0.3302401602268219,
-   -2.8130555152893066,
-   -0.17079885303974152,
-   -2.871047019958496,
-   -0.3991694450378418,
-   -3.1476998329162598,
-   -0.3488404452800751,
-   -2.0545666217803955,
-   -4.201597690582275,
-   -5.164614677429199,
-   -0.0271432027220726,
-   -0.0009785869624465704,
-   -3.3444161415100098,
-   -1.3117046356201172,
-   -6.375423431396484,
-   -0.05535568296909332,
-   -0.3919340968132019,
-   -0.060594215989112854,
-   -6.507473468780518,
-   -0.0023910999298095703,
-   -2.143423318862915,
-   -3.335618257522583,
-   -2.953970432281494,
-   -0.0013383012264966965,
-   -0.8080525398254395,
-   -0.29526084661483765,
-   -0.04036511853337288,
-   -3.231475353240967,
-   -1.0585589408874512,
-   -6.136373043060303,
-   -0.006182829383760691,
-   -0.035548023879528046,
-   -5.509808540344238,
-   -1.8490750789642334,
-   -9.83314037322998,
-   -0.07037576287984848,
-   -3.1621387004852295,
-   -6.762360095977783,
-   -1.3490527868270874,
-   -3.601043462753296,
-   -1.176393985748291,
-   -0.4342959523200989,
-   -0.06266004592180252,
-   -5.464046001434326,
-   -0.017946599051356316,
-   -1.0416009426116943,
-   -1.6117159128189087,
-   -12.289417266845703,
-   -1.5004339218139648,
-   -5.76563835144043,
-   -4.038386821746826,
-   -0.20812086760997772,
-   -3.6306562423706055,
-   -1.3901070356369019,
-   -1.087137222290039,
-   -2.423213243484497,
-   -4.503086090087891,
-   -0.0008031480247154832,
-   -0.03627370297908783,
-   -0.1653430461883545,
-   -7.958648681640625,
-   -1.1018548011779785,
-   -1.290948748588562,
-   -3.8049263954162598,
-   -1.8253734111785889,
-   -0.059022851288318634,
-   -0.0013984196120873094,
-   -4.698851585388184,
-   -2.5421664714813232,
-   -0.024493809789419174,
-   -4.828659534454346,
-   -3.0295286178588867,
-   -3.550312042236328,
-   -0.1185273677110672,
-   -0.22595760226249695,
-   -0.10782183706760406,
-   -1.4033282995224,
-   -0.4485701024532318,
-   -0.2889708876609802,
-   -0.05471855774521828,
-   -0.007632025051862001,
-   -2.1156554222106934,
-   -0.6249589323997498,
-   -4.198577404022217,
-   -0.14178156852722168,
-   -4.284021377563477,
-   -2.227515935897827,
-   -3.5022120475769043,
-   -0.19575819373130798,
-   -15.964509963989258,
-   -4.055960655212402,
-   -11.125024795532227,
-   -0.7681724429130554,
-   -3.0436902046203613,
-   -7.030262470245361,
-   -4.376729488372803,
-   -5.476145267486572,
-   -0.4219042658805847,
-   -3.7689766883850098,
-   -0.060010604560375214,
-   -0.8134393692016602,
-   -0.11386934667825699,
-   -0.025473715737462044,
-   -0.09736856073141098,
-   -4.357361793518066,
-   -0.3670865297317505,
-   -0.08063744008541107,
-   -0.1311480849981308,
-   -1.0903867483139038,
-   -1.2705107927322388,
-   -1.5076212882995605,
-   -4.295275688171387,
-   -0.04185756668448448,
-   -0.19810955226421356,
-   -1.9645220041275024,
-   -0.9597910642623901,
-   -0.13429655134677887,
-   -0.002283110748976469,
-   -7.066074371337891,
-   -3.639211654663086,
-   -1.0263917446136475,
-   -8.124760627746582,
-   -1.132537841796875,
-   -0.09160765260457993,
-   -0.08996370434761047,
-   -10.165366172790527,
-   -3.501585006713867,
-   -0.0019847711082547903,
-   -0.05309417471289635,
-   -0.31209683418273926,
-   -0.15089339017868042,
-   -1.23564875125885,
-   -1.2685208320617676,
-   -7.832758903503418,
-   -0.19271136820316315,
-   -0.014305183663964272,
-   -0.0007532381569035351,
-   -0.44688940048217773,
-   -2.6239724159240723,
-   -1.738666296005249,
-   -1.6480977535247803,
-   -0.46753185987472534,
-   -8.656959533691406,
-   -3.79868483543396,
-   -0.9281394481658936,
-   -2.2381181716918945,
-   -1.7654449939727783,
-   -0.4948798418045044,
-   -0.025028761476278305,
-   -1.5435361862182617,
-   -1.6390818357467651,
-   -1.4962153434753418,
-   -0.3425217270851135,
-   -0.013077914714813232,
-   -0.038474079221487045,
-   -5.3364362716674805,
-   -0.42365288734436035,
-   -1.884093999862671,
-   -3.510357618331909,
-   -6.198029518127441,
-   -0.44375038146972656,
-   -0.0008789013954810798,
-   -3.6025230884552,
-   -1.419615626335144,
-   -2.6723289489746094,
-   -5.775190830230713,
-   -1.1380761861801147,
-   -2.6683366298675537,
-   -0.43395891785621643,
-   -0.003145867260172963,
-   -8.63144302368164,
-   -1.646262764930725,
-   -1.732487678527832,
-   -4.561546802520752,
-   -0.5277953147888184,
-   -0.07333153486251831,
-   -0.5624169707298279,
-   -0.12201295047998428,
-   -2.6561455726623535,
-   -1.1071691513061523,
-   -2.6895060539245605,
-   -0.040864069014787674,
-   -0.04126371443271637,
-   -1.8294739723205566,
-   -0.09022177755832672,
-   -0.3154001832008362,
-   -0.46215569972991943,
-   -2.2462844848632812,
-   -0.30149081349372864,
-   -0.52588951587677,
-   -8.288043975830078,
-   -0.0002057340752799064,
-   -0.8021711707115173,
-   -4.4546098709106445,
-   -0.0001565095444675535,
-   -0.0015961299650371075,
-   -0.15216240286827087,
-   -0.3677564561367035,
-   -5.018707275390625,
-   -0.7850045561790466,
-   -1.9582659006118774,
-   -1.0046892166137695,
-   -10.0401029586792,
-   -0.16878114640712738,
-   -5.944240570068359,
-   -1.5523078441619873,
-   -5.7253522872924805,
-   -0.47948503494262695,
-   -0.44009655714035034,
-   -5.671053886413574,
-   -0.003280022880062461,
-   -0.7937742471694946,
-   -0.9639376401901245,
-   -0.00030048147891648114,
-   -1.0747740268707275,
-   -0.8839919567108154,
-   -3.416811466217041,
-   -1.6602673530578613,
-   -0.2706959843635559,
-   -0.0024333172477781773,
-   -4.478696823120117,
-   -6.20179557800293,
-   -0.11359559744596481,
-   -0.202009916305542,
-   -0.022310219705104828,
-   -2.367263078689575,
-   -1.0405994653701782,
-   -5.984308242797852,
-   -2.105138063430786,
-   -9.583202362060547,
-   -0.0004957877099514008,
-   -3.0655455589294434,
-   -0.0669412910938263,
-   -0.8977450728416443,
-   -2.2271294593811035,
-   -2.6617536544799805,
-   -1.8184051513671875,
-   -0.8291114568710327,
-   -0.4864235818386078,
-   -0.7993525862693787,
-   -3.51106858253479,
-   -2.1530935764312744,
-   -0.257144957780838,
-   -1.3934082984924316,
-   -1.3137131929397583,
-   -0.3384077548980713,
-   -0.1697217971086502,
-   -2.353395938873291,
-   -0.03406282886862755,
-   -0.39059701561927795,
-   -3.422821044921875,
-   -1.7117210626602173,
-   -0.7018465399742126,
-   -1.5995906591415405,
-   -3.6218395233154297,
-   -0.12497704476118088,
-   -0.16966234147548676,
-   -0.7313685417175293,
-   -0.4956285357475281,
-   -1.0840849876403809,
-   -5.042126655578613,
-   -0.00031704644788987935,
-   -7.683258056640625,
-   -0.9210801720619202,
-   -4.687852382659912,
-   -0.0028814247343689203,
-   -0.043382611125707626,
-   -4.1948652267456055,
-   -2.66593337059021,
-   -0.06153333932161331,
-   -0.0023110604379326105,
-   -6.729236602783203,
-   -5.777127742767334,
-   -0.08932067453861237,
-   -0.09890018403530121,
-   -0.009886111132800579,
-   -3.1145148277282715,
-   -3.725565195083618,
-   -0.0021998509764671326,
-   -3.9927196502685547,
-   -2.753793239593506,
-   -1.6037236452102661,
-   -0.17461130023002625,
-   -4.804804801940918,
-   -0.2311229705810547,
-   -0.30256444215774536,
-   -2.235363006591797,
-   -0.006614102050662041,
-   -0.34757524728775024,
-   -1.4946835041046143,
-   -1.222062587738037,
-   -3.658839225769043,
-   -1.356170892715454,
-   -0.5371109843254089,
-   -3.7580835819244385,
-   -4.54621696472168,
-   -0.31577637791633606,
-   -3.677156925201416,
-   -2.7181396484375,
-   -7.4674882888793945,
-   -0.00019369633810129017,
-   -2.3798398971557617,
-   -2.5452184677124023,
-   -0.2858496308326721,
-   -4.315659523010254,
-   -0.025835415348410606,
-   -0.000603493710514158,
-   -0.2546294331550598,
-   -0.12032663822174072,
-   -2.006908655166626,
-   -5.990736961364746,
-   -7.146596908569336,
-   -0.23356498777866364,
-   -0.2201036810874939,
-   -0.01235415879637003,
-   -0.011248741298913956,
-   -1.4155778884887695,
-   -0.40242519974708557,
-   -5.877886772155762,
-   -0.7865053415298462,
-   -0.03231288120150566,
-   -0.004864405374974012,
-   -0.0050629740580916405,
-   -2.7049152851104736,
-   -6.822089195251465,
-   -0.39252761006355286,
-   -1.2290617227554321,
-   -0.007630132604390383,
-   -3.485461711883545,
-   -0.47985684871673584,
-   -6.1813530921936035,
-   -0.03757825121283531,
-   -0.37834712862968445,
-   -0.22192610800266266,
-   -1.165318489074707,
-   -0.5220151543617249,
-   -0.1289423257112503,
-   -3.216222047805786,
-   -1.0787583589553833,
-   -3.0716826915740967,
-   -0.6023419499397278,
-   -2.558605194091797,
-   -0.927433431148529,
-   -0.00364841241389513,
-   -0.14910078048706055,
-   -0.7318926453590393,
-   -6.159773826599121,
-   -0.0015301911626011133,
-   -1.8908276557922363,
-   -1.9641315937042236,
-   -0.021651331335306168,
-   -2.1648828983306885,
-   -2.2700207233428955,
-   -7.833290100097656,
-   -0.03397307172417641,
-   -0.8344621658325195,
-   -0.02225659228861332,
-   -0.06639260798692703,
-   -2.3780317306518555,
-   -3.180129051208496,
-   -0.09030630439519882,
-   -2.4138312339782715,
-   -1.3445552587509155,
-   -1.848326325416565,
-   -0.9726964831352234,
-   -2.851792335510254,
-   -0.0630769282579422,
-   -0.0011394681641831994,
-   -0.05843213573098183,
-   -2.6616668701171875,
-   -1.575437068939209,
-   -0.180197611451149,
-   -5.552371501922607,
-   -0.26108410954475403,
-   -2.529611587524414,
-   -0.37780019640922546,
-   -5.141795635223389,
-   -0.5921107530593872,
-   -0.2474975287914276,
-   -0.10687454044818878,
-   -4.891775131225586,
-   -0.25011152029037476,
-   -2.4100728034973145,
-   -1.358667016029358,
-   -2.790961503982544,
-   -3.8654675483703613,
-   -1.0076243877410889,
-   -0.7456949949264526,
-   -1.5575554370880127,
-   -2.05328631401062,
-   -1.6538066864013672,
-   -0.0558217354118824,
-   -0.0001817776501411572,
-   -0.0011643542675301433,
-   -0.038359593600034714,
-   -1.4208931922912598,
-   -0.542127251625061,
-   -0.3162364959716797,
-   -0.3966117799282074,
-   -1.1765563488006592,
-   -1.7920958995819092,
-   -0.18425509333610535,
-   -0.1092008650302887,
-   -0.46676987409591675,
-   -0.24977745115756989,
-   -1.0375996828079224,
-   -0.5268858671188354,
-   -0.008942908607423306,
-   -0.6404479146003723,
-   -0.0033111530356109142,
-   -5.3165931603871286e-05,
-   -0.5154370665550232,
-   -0.39286962151527405,
-   -1.401839256286621,
-   -0.6232213973999023,
-   -0.02168831042945385,
-   -0.004282470792531967,
-   -0.005199837032705545,
-   -0.09748794883489609,
-   -0.040823787450790405,
-   -0.00014852374442853034,
-   -0.0005832401220686734,
-   -0.005303124897181988,
-   -0.6537013053894043,
-   -0.38026049733161926,
-   -0.04189129173755646,
-   -0.010385753586888313,
-   -0.008756335824728012,
-   -0.013362848199903965,
-   -0.000504723924677819,
-   -0.002797620603814721,
-   -0.0014512732159346342,
-   -0.0013321106089279056,
-   -0.010883613489568233,
-   -0.005159396678209305,
-   -0.004701037425547838,
-   -0.01591104455292225,
-   -0.001474246964789927,
-   -1.2278481335670222e-05,
-   -0.010548785328865051,
-   -0.08341525495052338,
-   -0.03858809545636177,
-   -0.056062061339616776,
-   -0.0009532198309898376,
-   -0.0005789510905742645,
-   -0.0008986725588329136,
-   -0.00710969977080822,
-   -0.0006561510381288826,
-   -1.4781842764932662e-05,
-   -5.578839045483619e-05,
-   -0.0006398299592547119,
-   -0.0028786908369511366,
-   -0.0034092895220965147,
-   -0.008268529549241066,
-   -0.006602259818464518,
-   -0.004517706111073494,
-   -0.02233586646616459,
-   -0.0006323245470412076,
-   -0.009195122867822647,
-   -0.0029284947086125612,
-   -0.004457537550479174,
-   -0.017873765900731087,
-   -0.008801711723208427,
-   -0.0036383166443556547,
-   -0.08078611642122269,
-   -0.006347495596855879,
-   -0.0002177716523874551,
-   -0.04688572511076927,
-   -0.2718890309333801,
-   -0.07996802777051926,
-   -0.04674842208623886,
-   -0.009984076954424381,
-   -0.010000954382121563,
-   -0.050126753747463226,
-   -0.5864179730415344,
-   -0.2915390133857727,
-   -0.008090462535619736,
-   -0.032190412282943726,
-   -0.03461571782827377,
-   -0.2785419523715973,
-   -0.05830562859773636,
-   -0.02893950417637825,
-   -0.12241066247224808,
-   -0.02711048536002636,
-   -0.16450686752796173,
-   -0.09852994978427887,
-   -0.2651047706604004,
-   -0.35559725761413574,
-   -0.12606258690357208,
-   -0.32793670892715454,
-   -0.20878805220127106,
-   -0.7587923407554626,
-   -0.4730657637119293,
-   -1.496794581413269,
-   -0.2486363798379898,
-   -0.20723387598991394,
-   -0.1872958242893219,
-   -0.19151091575622559,
-   -0.3350580036640167,
-   -1.3085839748382568,
-   -0.6109102964401245,
-   -0.2947172224521637,
-   -0.37130236625671387,
-   -0.19888469576835632,
-   -0.18297068774700165,
-   -0.9978674054145813,
-   -0.5471905469894409,
-   -0.4379975199699402,
-   -0.407988041639328,
-   -0.17325688898563385,
-   -0.16938896477222443
-  ]
- },
- "96": {
-  "input_prompt": "what is the concept of double materiality in sustainability?",
-  "generated_text": " What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double material",
-  "generated_tokens": [
-   5675,
-   1395,
-   1278,
-   7401,
-   1307,
-   5558,
-   4318,
-   1532,
-   1294,
-   60187,
-   1063,
-   5675,
-   1395,
-   1278,
-   7401,
-   1307,
-   5558,
-   4318,
-   1532,
-   1294,
-   60187,
-   1063,
-   5675,
-   1395,
-   1278,
-   7401,
-   1307,
-   5558,
-   4318,
-   1532,
-   1294,
-   60187,
-   1063,
-   5675,
-   1395,
-   1278,
-   7401,
-   1307,
-   5558,
-   4318,
-   1532,
-   1294,
-   60187,
-   1063,
-   5675,
-   1395,
-   1278,
-   7401,
-   1307,
-   5558,
-   4318,
-   1532,
-   1294,
-   60187,
-   1063,
-   5675,
-   1395,
-   1278,
-   7401,
-   1307,
-   5558,
-   4318,
-   1532,
-   1294,
-   60187,
-   1063,
-   5675,
-   1395,
-   1278,
-   7401,
-   1307,
-   5558,
-   4318,
-   1532,
-   1294,
-   60187,
-   1063,
-   5675,
-   1395,
-   1278,
-   7401,
-   1307,
-   5558,
-   4318,
-   1532,
-   1294,
-   60187,
-   1063,
-   5675,
-   1395,
-   1278,
-   7401,
-   1307,
-   5558,
-   4318,
-   1532,
-   1294,
-   60187,
-   1063,
-   5675,
-   1395,
-   1278,
-   7401,
-   1307,
-   5558,
-   4318,
-   1532,
-   1294,
-   60187,
-   1063,
-   5675,
-   1395,
-   1278,
-   7401,
-   1307,
-   5558,
-   4318,
-   1532,
-   1294,
-   60187,
-   1063,
-   5675,
-   1395,
-   1278,
-   7401,
-   1307,
-   5558,
-   4318
-  ],
-  "latency": 50.88405132293701,
-  "cuda_graph_request_count_map": null,
-  "step_count": 2048,
-  "logprobs": [
-   -4.917365074157715,
-   -0.9960631132125854,
-   -7.875392913818359,
-   -0.2993181347846985,
-   -7.760880470275879,
-   -10.308395385742188,
-   -2.1807961463928223,
-   -1.6412583589553833,
-   -9.521512985229492,
-   -1.627489447593689,
-   -1.8410861492156982,
-   -0.9285702705383301,
-   -0.2576955556869507,
-   -0.9641067981719971,
-   -0.02314644306898117,
-   -0.6696561574935913,
-   -0.07035009562969208,
-   -0.004622488282620907,
-   -0.025748632848262787,
-   -0.06276137381792068,
-   -0.17385317385196686,
-   -0.3285445272922516,
-   -0.0592009499669075,
-   -0.007940039038658142,
-   -0.22664028406143188,
-   -0.0017957051750272512,
-   -0.022929180413484573,
-   -0.005733947269618511,
-   -0.0012996093137189746,
-   -0.006419987417757511,
-   -0.02376849390566349,
-   -0.27800270915031433,
-   -0.4650723934173584,
-   -0.04936715215444565,
-   -0.003972141072154045,
-   -0.01477995328605175,
-   -0.0012044801842421293,
-   -0.014891182072460651,
-   -0.002709767082706094,
-   -0.0009939497103914618,
-   -0.0028436246793717146,
-   -0.006759870797395706,
-   -0.15416178107261658,
-   -0.20121537148952484,
-   -0.016414370387792587,
-   -0.0015769677702337503,
-   -0.008138825185596943,
-   -0.0007713441736996174,
-   -0.013819841668009758,
-   -0.003826678032055497,
-   -0.0005918181850574911,
-   -0.0014938872773200274,
-   -0.00485716899856925,
-   -0.081083282828331,
-   -0.09642580896615982,
-   -0.009630884043872356,
-   -0.0010948146227747202,
-   -0.007085552904754877,
-   -0.0006310140597634017,
-   -0.013073914684355259,
-   -0.0039152647368609905,
-   -0.000364713923772797,
-   -0.001292108790948987,
-   -0.004158303141593933,
-   -0.044283974915742874,
-   -0.05722038820385933,
-   -0.006369172595441341,
-   -0.0007976687629707158,
-   -0.005993015132844448,
-   -0.0004935238393954933,
-   -0.011310506612062454,
-   -0.002951553324237466,
-   -0.000387831823900342,
-   -0.000977038755081594,
-   -0.0036971091758459806,
-   -0.030511993914842606,
-   -0.04246694967150688,
-   -0.004863100592046976,
-   -0.0006927236099727452,
-   -0.005206122528761625,
-   -0.0005129451747052372,
-   -0.00894621666520834,
-   -0.0028565814718604088,
-   -0.00041333239641971886,
-   -0.0009002208826132119,
-   -0.0033131728414446115,
-   -0.021188799291849136,
-   -0.03330245241522789,
-   -0.0038543473929166794,
-   -0.0006504327175207436,
-   -0.004474864806979895,
-   -0.00048029806930571795,
-   -0.009718249551951885,
-   -0.0030443770810961723,
-   -0.0003743662964552641,
-   -0.0009439303539693356,
-   -0.003729770192876458,
-   -0.016505014151334763,
-   -0.0290373582392931,
-   -0.003315192647278309,
-   -0.0005821678787469864,
-   -0.004148805979639292,
-   -0.00042489083716645837,
-   -0.006856840569525957,
-   -0.0028660909738391638,
-   -0.00032574593205936253,
-   -0.0006986799417063594,
-   -0.003671098267659545,
-   -0.012792548164725304,
-   -0.02553274855017662,
-   -0.002730690874159336,
-   -0.0005067494930699468,
-   -0.0036923582665622234,
-   -0.0004451475979294628,
-   -0.007225453853607178,
-   -0.002746859099715948,
-   -0.0003323002893012017,
-   -0.0008517451351508498,
-   -0.003630714723840356,
-   -0.011284693144261837,
-   -0.02186688780784607,
-   -0.0025819556321948767,
-   -0.0004931663861498237,
-   -0.0031721293926239014,
-   -0.00040725519647821784,
-   -0.0062383925542235374,
-   -0.00238693761639297,
-   -0.00040749352774582803,
-   -0.0007970731821842492,
-   -0.003604583442211151,
-   -0.010075542144477367,
-   -0.022386692464351654,
-   -0.0025295186787843704,
-   -0.00046302087139338255,
-   -0.003138143103569746,
-   -0.0004233417857903987,
-   -0.006178564392030239,
-   -0.0021037368569523096
-  ]
- },
- "127": {
-  "input_prompt": "as before we talk about using , i usually do if else() to in case the property would be null before.\nif today i public a property and i want to use using to instead , will code be like this?\n\npublic Material Mat;\n\nusing(Mat){\n// Do something;\n}\n\nis that right?",
-  "generated_text": " i am not sure if i am using the wrong syntax or if i am using the syntax correctly but i am not sure if i am using the syntax correctly or if i am using the syntax incorrectly.\n\nI am not sure if i am using the wrong syntax or if i am using the syntax correctly but i am not sure if i am using the syntax correctly or if i am using the syntax incorrectly.\n\nI am not sure if i am using the wrong syntax or if i am using the syntax correctly but i am not sure if i am using the syntax correctly or if i am using the syntax incorrectly.\n\nI am not sure if i am using",
-  "generated_tokens": [
-   1623,
-   1855,
-   1605,
-   5257,
-   1693,
-   1623,
-   1855,
-   2505,
-   1278,
-   8462,
-   22692,
-   1505,
-   1693,
-   1623,
-   1855,
-   2505,
-   1278,
-   22692,
-   17047,
-   1809,
-   1623,
-   1855,
-   1605,
-   5257,
-   1693,
-   1623,
-   1855,
-   2505,
-   1278,
-   22692,
-   17047,
-   1505,
-   1693,
-   1623,
-   1855,
-   2505,
-   1278,
-   22692,
-   73751,
-   1338,
-   1073,
-   1855,
-   1605,
-   5257,
-   1693,
-   1623,
-   1855,
-   2505,
-   1278,
-   8462,
-   22692,
-   1505,
-   1693,
-   1623,
-   1855,
-   2505,
-   1278,
-   22692,
-   17047,
-   1809,
-   1623,
-   1855,
-   1605,
-   5257,
-   1693,
-   1623,
-   1855,
-   2505,
-   1278,
-   22692,
-   17047,
-   1505,
-   1693,
-   1623,
-   1855,
-   2505,
-   1278,
-   22692,
-   73751,
-   1338,
-   1073,
-   1855,
-   1605,
-   5257,
-   1693,
-   1623,
-   1855,
-   2505,
-   1278,
-   8462,
-   22692,
-   1505,
-   1693,
-   1623,
-   1855,
-   2505,
-   1278,
-   22692,
-   17047,
-   1809,
-   1623,
-   1855,
-   1605,
-   5257,
-   1693,
-   1623,
-   1855,
-   2505,
-   1278,
-   22692,
-   17047,
-   1505,
-   1693,
-   1623,
-   1855,
-   2505,
-   1278,
-   22692,
-   73751,
-   1338,
-   1073,
-   1855,
-   1605,
-   5257,
-   1693,
-   1623,
-   1855,
-   2505
-  ],
-  "latency": 62.613630533218384,
-  "cuda_graph_request_count_map": null,
-  "step_count": 2048,
-  "logprobs": [
-   -7.793755531311035,
-   -4.224940776824951,
-   -5.446587562561035,
-   -0.267395555973053,
-   -6.118141174316406,
-   -11.105525970458984,
-   -6.67517614364624,
-   -6.902530670166016,
-   -3.2218151092529297,
-   -5.99644660949707,
-   -8.431673049926758,
-   -8.04836368560791,
-   -3.992594003677368,
-   -7.550079345703125,
-   -2.873685836791992,
-   -3.3877997398376465,
-   -7.9306745529174805,
-   -7.005484580993652,
-   -1.0481306314468384,
-   -2.686237335205078,
-   -6.131283760070801,
-   -6.2994704246521,
-   -7.931419372558594,
-   -11.3147554397583,
-   -8.544670104980469,
-   -12.01729679107666,
-   -3.89847469329834,
-   -1.7964364290237427,
-   -2.952878952026367,
-   -1.9217232465744019,
-   -2.272329330444336,
-   -0.37552154064178467,
-   -1.667820692062378,
-   -7.510344505310059,
-   -3.498040199279785,
-   -7.980632305145264,
-   -7.672002792358398,
-   -4.4999470710754395,
-   -7.155375003814697,
-   -2.4486124515533447,
-   -4.785946846008301,
-   -1.153855800628662,
-   -2.3994438648223877,
-   -4.250652313232422,
-   -12.24446964263916,
-   -8.344388008117676,
-   -2.608186721801758,
-   -5.200589179992676,
-   -8.25888442993164,
-   -3.6245617866516113,
-   -7.689338207244873,
-   -7.345355033874512,
-   -1.2661759853363037,
-   -7.265620231628418,
-   -1.9884108304977417,
-   -6.269482612609863,
-   -2.41705584526062,
-   -1.8929681777954102,
-   -1.8259913921356201,
-   -2.0997350215911865,
-   -2.323200225830078,
-   -1.3998825550079346,
-   -0.8789899945259094,
-   -1.082053542137146,
-   -1.1831339597702026,
-   -1.4462857246398926,
-   -1.6481035947799683,
-   -1.4408715963363647,
-   -1.2603964805603027,
-   -1.5267670154571533,
-   -1.6345772743225098,
-   -1.3796477317810059,
-   -0.7609691023826599,
-   -0.3548354506492615,
-   -0.7552334666252136,
-   -0.44776833057403564,
-   -1.1078286170959473,
-   -1.3036658763885498,
-   -0.5214896202087402,
-   -0.8486822843551636,
-   -0.22470997273921967,
-   -0.4705755412578583,
-   -0.5639711022377014,
-   -0.5388108491897583,
-   -0.6052999496459961,
-   -0.1002030223608017,
-   -0.286334365606308,
-   -0.45798981189727783,
-   -1.0107953548431396,
-   -0.11875647306442261,
-   -0.6969441771507263,
-   -0.4609107971191406,
-   -0.07614769786596298,
-   -0.5035472512245178,
-   -0.1682187020778656,
-   -0.10476160794496536,
-   -0.6586751341819763,
-   -0.35806939005851746,
-   -1.5364394187927246,
-   -2.4093759059906006,
-   -1.977368950843811,
-   -1.6216907501220703,
-   -0.27647316455841064,
-   -0.2991848587989807,
-   -0.2783535420894623,
-   -0.05913994088768959,
-   -0.03023873083293438,
-   -0.043339803814888,
-   -0.7320341467857361,
-   -0.0030677898321300745,
-   -0.0332595594227314,
-   -0.012804670259356499,
-   -0.004041599575430155,
-   -0.0014899593079462647,
-   -0.001948602613992989,
-   -0.0029070996679365635,
-   -0.040939707309007645,
-   -0.013942227698862553,
-   -0.04897322878241539,
-   -0.011005887761712074,
-   -0.0044113704934716225,
-   -0.0013179434463381767,
-   -0.003658389439806342,
-   -0.009758152067661285,
-   -0.0014104428701102734,
-   -0.0016671819612383842,
-   -0.000771939754486084,
-   -0.0015519729349762201,
-   -0.003720743814483285,
-   -0.004249115474522114,
-   -0.00485657574608922,
-   -0.005053604021668434,
-   -0.002336274366825819,
-   -0.0009155849111266434,
-   -0.0004978132783435285,
-   -0.0005953923100605607,
-   -0.0011395872570574284,
-   -0.001485078944824636,
-   -0.3072909712791443,
-   -1.7295066118240356,
-   -0.4807289242744446,
-   -0.1245415136218071,
-   -0.011858444660902023,
-   -0.020613837987184525,
-   -0.011020978912711143,
-   -0.003106294432654977,
-   -0.0009966888464987278,
-   -0.0019349202048033476,
-   -0.037407051771879196,
-   -0.0003496989083942026,
-   -0.005922981072217226,
-   -0.007394562941044569,
-   -0.0006037319544702768,
-   -0.0008836655179038644,
-   -0.0002884448622353375,
-   -0.00047600860125385225,
-   -0.0024947968777269125,
-   -0.00442774873226881,
-   -0.004059052560478449,
-   -0.0018594847060739994,
-   -0.0006179092451930046,
-   -0.00022635281493421644,
-   -0.0006730675231665373,
-   -0.003022746881470084,
-   -0.0002343380037928,
-   -0.00047791501856409013,
-   -9.440929716220126e-05,
-   -0.00021550717065110803,
-   -0.0013523490633815527,
-   -0.0032202552538365126,
-   -0.001157686347141862,
-   -0.004449942149221897,
-   -0.0016590891173109412,
-   -0.00101062236353755,
-   -0.0003079893649555743,
-   -0.00048375347978435457,
-   -0.0021734442561864853,
-   -0.00423036003485322,
-   -0.11514264345169067,
-   -0.8658493757247925,
-   -0.084366075694561,
-   -0.02140468917787075,
-   -0.0060798698104918,
-   -0.008638513274490833,
-   -0.003212531330063939,
-   -0.0009598892065696418,
-   -0.00032085992279462516
-  ]
- },
- "throughput": 120.8737525217505
+    "0": {
+        "input_prompt": "The $500 Cup of coffee?\nConsider this, most Americans spend an average of $1,500-2,000 a year on this bean water.\nI have a few question for you: \nHow has business been the past few months?\nDo you ever feel like your business is stuck?\nDon't feel like you're able to improve performance and make changes required to achieve success ?\nAre your customers spneding less and less and wanting more?\nHave the gas prices affected your business?\nDo you have employees and do they hate you or wish they could quit?\n\nNow, before you and I can decide wheter or not I will be a good fit for your business we should talk this over with coffee.\nAnd, just to warn you this isn't some casual thing. This is not a date or time to be personal or social (but by all means share what you will coz I'll gladly listen).\nTher eare two major talking points and stratagies we will focios on in our lil coffee social\nFor one, we will find your unique selling Proposition (USP).\nDo have the best price? Are you the cheapest in town? Are your customers jerks? Do you haVE REGULARS? Why do people come back?\nwe'll also look for the holes in your business bucket. I'm willing to bet there's a hole or two in your business we'll find together that'll make this 500 dollar cup of Joe pay for itse;f immedietly.\nMany find themselves to be more profitable by just finding out where the dollars are escaping in their business and I like to think of myself as a guy that comes along with some spakel or putty and patch those holes up for you.\nBeleive me, just fixing one hole can mean a lot...just think about a sinking boat that has a hole in it that's about 3\u201d in diameter... it doesn't take long to sink.\nI have no agenda, besides f=getting to know your business and seeing wher I can patch the holes and find what makes you do darn unique (I know this won't take long.)\nMany folks, I bet, will find what they need to get off their chest with a quick phone call and they just paypal me the money and make a coffee at home. Look, that's fine too.\nI just to get you ot of your comfort zone, because this is where it all starts my frind.\nSome smart GOAT entrepreneur will probably get everything they need just out of our lil mini consulatant for the more extensive business consukting I offer, and look, that's fine, too.\nMaybe this cup of coffee will be all you need to gtet back on your feet, but not only surive, but thrive!\nI'm not trying to make frineds, or make a bunch of money, or look down your shirt\nBut this is only going to be a 45 minute (max) coffee chat\nAnd, I know you (and me) both have a lot on our plates. So no messing around\nAfter our consultation I will follow up with you in a few days and see how things are going, then I will be emailing you about once or twice every two weeks, just to make sure you're staying on task and implementing what we went over.\nTghere is no obligation to go any further and will gladly give you back your money if this pre-consultation doesn't put you on the right path or you don't get any value out of it...",
+        "generated_text": " $ is a$ is a $ is a $ is a $ is a $ is a $$1, you\n$ $$ $\n$ $- $\n$\n$\n$ $$$\n$\n$\n$\n$\n$\n$\n$\n$???????, $\n-1\n$\n1.5.\n$\n$, you\n$.\n$\n1,1.1\nI\n$.\nI\n\n\nHow\n$,,,0,1,0,0.0\nIn\nThe\nThe\nThe\n",
+        "generated_tokens": [
+            1659,
+            1395,
+            1261,
+            1036,
+            1395,
+            1261,
+            1659,
+            1395,
+            1261,
+            1659,
+            1395,
+            1261,
+            1659,
+            1395,
+            1261,
+            1659,
+            1395,
+            1261,
+            1659,
+            1036,
+            1049,
+            1044,
+            1636,
+            1010,
+            1036,
+            1659,
+            1036,
+            1659,
+            1010,
+            1036,
+            1659,
+            1045,
+            1659,
+            1010,
+            1036,
+            1010,
+            1036,
+            1010,
+            1036,
+            1659,
+            1036,
+            1036,
+            1010,
+            1036,
+            1010,
+            1036,
+            1010,
+            1036,
+            1010,
+            1036,
+            1010,
+            1036,
+            1010,
+            1036,
+            1010,
+            1036,
+            1010,
+            1036,
+            1063,
+            1063,
+            1063,
+            1063,
+            1063,
+            1063,
+            1063,
+            1044,
+            1659,
+            1010,
+            1045,
+            1049,
+            1010,
+            1036,
+            1010,
+            1049,
+            1046,
+            1053,
+            1046,
+            1010,
+            1036,
+            1010,
+            1036,
+            1044,
+            1636,
+            1010,
+            1036,
+            1046,
+            1010,
+            1036,
+            1010,
+            1049,
+            1044,
+            1049,
+            1046,
+            1049,
+            1010,
+            1073,
+            1010,
+            1036,
+            1046,
+            1010,
+            1073,
+            1010,
+            1010,
+            1010,
+            7801,
+            1010,
+            1036,
+            1044,
+            1044,
+            1044,
+            1048,
+            1044,
+            1049,
+            1044,
+            1048,
+            1044,
+            1048,
+            1046,
+            1048,
+            1010,
+            1785,
+            1010,
+            1784,
+            1010,
+            1784,
+            1010,
+            1784,
+            1010
+        ],
+        "latency": 9.833553552627563,
+        "cuda_graph_request_count_map": null,
+        "step_count": 6144,
+        "logprobs": [
+            -7.7319135665893555,
+            -2.188307285308838,
+            -0.7547445297241211,
+            -0.7294313311576843,
+            -10.238386154174805,
+            -3.3775341510772705,
+            -6.394498825073242,
+            -7.354557037353516,
+            -9.018157958984375,
+            -3.012073040008545,
+            -3.2584073543548584,
+            -5.220732688903809,
+            -4.620487213134766,
+            -2.5078930854797363,
+            -3.752683162689209,
+            -0.13360372185707092,
+            -0.05705544352531433,
+            -0.41462242603302,
+            -1.585279941558838,
+            -1.6438164710998535,
+            -1.9557222127914429,
+            -0.3989897072315216,
+            -0.0365302674472332,
+            -6.368816375732422,
+            -0.8731719255447388,
+            -0.022585075348615646,
+            -0.2775891423225403,
+            -0.0027362785767763853,
+            -0.0006812873762100935,
+            -1.581446647644043,
+            -0.008688976056873798,
+            -0.3532317280769348,
+            -6.071163177490234,
+            -9.162371635437012,
+            -9.965556144714355,
+            -2.400461196899414,
+            -2.9898362159729004,
+            -2.9803032875061035,
+            -2.12601900100708,
+            -3.500912666320801,
+            -7.015069007873535,
+            -2.278961420059204,
+            -0.46380555629730225,
+            -4.078739166259766,
+            -1.9430254697799683,
+            -3.5642244815826416,
+            -3.689701795578003,
+            -6.201474189758301,
+            -6.580833911895752,
+            -2.3081111907958984,
+            -5.42717170715332,
+            -1.1886008977890015,
+            -1.172760248184204,
+            -1.3571951389312744,
+            -1.3551844358444214,
+            -3.376784324645996,
+            -0.05118789151310921,
+            -4.064360618591309,
+            -2.575554847717285,
+            -0.6994737386703491,
+            -2.56724214553833,
+            -2.1888976097106934,
+            -0.4816131591796875,
+            -4.070178985595703,
+            -2.0060782432556152,
+            -6.858033180236816,
+            -0.059200502932071686,
+            -3.214278221130371,
+            -0.9671833515167236,
+            -0.823198676109314,
+            -1.0130078792572021,
+            -4.595561981201172,
+            -0.012724989093840122,
+            -5.214311599731445,
+            -8.246870040893555,
+            -3.1476030349731445,
+            -3.299684524536133,
+            -4.218191146850586,
+            -7.318399429321289,
+            -0.8580498695373535,
+            -3.0894036293029785,
+            -1.886361002922058,
+            -7.217658996582031,
+            -3.271679639816284,
+            -3.9717154502868652,
+            -1.8835484981536865,
+            -10.034332275390625,
+            -11.382490158081055,
+            -5.417011260986328,
+            -7.505967140197754,
+            -2.33837890625,
+            -0.07904055714607239,
+            -3.294971227645874,
+            -7.813640594482422,
+            -1.7646901607513428,
+            -4.025320053100586,
+            -3.5977325439453125,
+            -4.390352249145508,
+            -9.147806167602539,
+            -0.5303041934967041,
+            -7.721246242523193,
+            -0.6311959028244019,
+            -0.8119025230407715,
+            -0.7227814197540283,
+            -1.8369406461715698,
+            -0.20933297276496887,
+            -1.5395950078964233,
+            -4.424448490142822,
+            -4.084965705871582,
+            -3.355497360229492,
+            -1.0475609302520752,
+            -6.479413986206055,
+            -0.7810530662536621,
+            -2.132437229156494,
+            -6.648703098297119,
+            -2.9522438049316406,
+            -1.2485712766647339,
+            -4.040503025054932,
+            -2.3415768146514893,
+            -5.358206748962402,
+            -1.6258506774902344,
+            -3.956300973892212,
+            -0.732298731803894,
+            -7.441117286682129,
+            -1.5242161750793457,
+            -2.4555861949920654,
+            -4.295163154602051,
+            -9.687600135803223,
+            -0.8213484883308411,
+            -1.2446978092193604,
+            -0.01942702941596508,
+            -4.619411468505859,
+            -3.3297007083892822,
+            -2.2139487266540527,
+            -3.691431999206543,
+            -2.6574106216430664,
+            -6.075929641723633,
+            -0.6123450994491577,
+            -1.2942559719085693,
+            -0.6262839436531067,
+            -7.398006439208984,
+            -4.4869890213012695,
+            -4.202048301696777,
+            -4.982994079589844,
+            -0.637227475643158,
+            -3.061023235321045,
+            -10.117584228515625,
+            -3.8567495346069336,
+            -4.0480828285217285,
+            -2.472019672393799,
+            -4.246374607086182,
+            -1.3939155340194702,
+            -7.132441520690918,
+            -0.20108745992183685,
+            -4.986658573150635,
+            -4.387957572937012,
+            -0.01108358334749937,
+            -4.209756851196289,
+            -7.271108627319336,
+            -4.047314643859863,
+            -2.6497321128845215,
+            -1.4763175249099731,
+            -0.28365400433540344,
+            -3.5247769355773926,
+            -1.4226995706558228,
+            -4.327237129211426,
+            -2.0407187938690186,
+            -6.1437907218933105,
+            -1.5190880298614502,
+            -2.5511486530303955,
+            -7.504094123840332,
+            -2.152172565460205,
+            -6.708334922790527,
+            -6.913146495819092,
+            -3.6959621906280518,
+            -6.752341270446777,
+            -0.63083815574646,
+            -0.12433214485645294,
+            -5.0525641441345215,
+            -4.435934066772461,
+            -0.45601028203964233,
+            -6.3459577560424805,
+            -9.882917404174805,
+            -3.1422882080078125,
+            -2.550520658493042,
+            -3.2099051475524902,
+            -6.278127193450928,
+            -0.07764133810997009,
+            -3.155696153640747,
+            -1.933587670326233,
+            -9.61027717590332,
+            -6.211391925811768,
+            -4.664543151855469,
+            -6.783782005310059,
+            -5.676271438598633,
+            -8.605900764465332,
+            -0.0824289619922638,
+            -3.5463995933532715,
+            -13.374168395996094,
+            -1.2401021718978882,
+            -1.8734056949615479,
+            -3.4154422283172607,
+            -1.6733763217926025,
+            -17.633970260620117,
+            -9.345113754272461,
+            -0.6277351975440979,
+            -2.9617538452148438,
+            -2.5565333366394043,
+            -10.10580825805664,
+            -7.130337715148926,
+            -7.36820125579834,
+            -4.098911285400391,
+            -5.747079372406006,
+            -2.945054769515991,
+            -0.7887389063835144,
+            -1.6583149433135986,
+            -1.0165244340896606,
+            -6.581666946411133,
+            -5.926386833190918,
+            -5.845194339752197,
+            -0.9657630920410156,
+            -7.868755340576172,
+            -1.3244551420211792,
+            -0.2657390236854553,
+            -0.06403665244579315,
+            -2.983020782470703,
+            -5.943899631500244,
+            -7.877285957336426,
+            -3.593116283416748,
+            -3.819509506225586,
+            -7.226177215576172,
+            -2.5206997394561768,
+            -3.385587215423584,
+            -0.37499159574508667,
+            -1.4698283672332764,
+            -3.1460342407226562,
+            -0.0077166082337498665,
+            -4.350916862487793,
+            -3.2183218002319336,
+            -0.6242184638977051,
+            -1.4782464504241943,
+            -2.8054311275482178,
+            -3.0831401348114014,
+            -12.17662525177002,
+            -2.113419532775879,
+            -1.6448111534118652,
+            -2.1834323406219482,
+            -0.7630388140678406,
+            -10.1896390914917,
+            -6.234405517578125,
+            -11.46288776397705,
+            -1.003785490989685,
+            -4.211658477783203,
+            -1.5010679960250854,
+            -5.859302043914795,
+            -2.0465080738067627,
+            -3.7468819618225098,
+            -4.684195518493652,
+            -4.318704128265381,
+            -2.7234389781951904,
+            -9.00437068939209,
+            -3.043811321258545,
+            -3.1384406089782715,
+            -2.713779926300049,
+            -2.095993995666504,
+            -2.1484954357147217,
+            -10.274479866027832,
+            -0.682350754737854,
+            -0.25973302125930786,
+            -3.6964316368103027,
+            -13.434456825256348,
+            -2.3368239402770996,
+            -5.382724761962891,
+            -1.9073458909988403,
+            -5.905669212341309,
+            -0.032165709882974625,
+            -1.6530004739761353,
+            -2.728893280029297,
+            -1.640552043914795,
+            -1.1391171216964722,
+            -1.4353511333465576,
+            -4.003787994384766,
+            -0.3450564742088318,
+            -0.7168521285057068,
+            -0.34650325775146484,
+            -0.3616408705711365,
+            -7.062709331512451,
+            -1.2851682901382446,
+            -2.299129009246826,
+            -8.800156593322754,
+            -5.208735466003418,
+            -4.780910491943359,
+            -2.78342342376709,
+            -4.469717979431152,
+            -6.909726619720459,
+            -2.5114197731018066,
+            -0.659822404384613,
+            -0.6915416121482849,
+            -3.2363741397857666,
+            -0.5283617377281189,
+            -0.10473938286304474,
+            -6.215325832366943,
+            -7.283237934112549,
+            -1.6797031164169312,
+            -11.50100040435791,
+            -7.5822978019714355,
+            -3.387317657470703,
+            -11.407575607299805,
+            -5.441976547241211,
+            -3.3264851570129395,
+            -0.7265786528587341,
+            -1.382750153541565,
+            -7.841699600219727,
+            -8.105277061462402,
+            -3.9569506645202637,
+            -4.963083267211914,
+            -0.5492897629737854,
+            -4.6081390380859375,
+            -5.870400905609131,
+            -3.957930088043213,
+            -5.275494575500488,
+            -4.105091094970703,
+            -2.15435528755188,
+            -2.8472700119018555,
+            -1.1278448104858398,
+            -8.226571083068848,
+            -0.40629008412361145,
+            -9.916461944580078,
+            -4.616743087768555,
+            -1.691868543624878,
+            -0.6639478802680969,
+            -2.5716753005981445,
+            -6.676954746246338,
+            -6.535329818725586,
+            -0.4170510768890381,
+            -1.443942904472351,
+            -3.145481824874878,
+            -1.440589427947998,
+            -0.26935356855392456,
+            -0.9647155404090881,
+            -4.335958957672119,
+            -1.5647850036621094,
+            -5.890466690063477,
+            -3.01654052734375,
+            -1.9168468713760376,
+            -3.7365682125091553,
+            -8.001864433288574,
+            -10.680083274841309,
+            -4.489352226257324,
+            -4.6058149337768555,
+            -7.69011116027832,
+            -3.6247005462646484,
+            -1.5600426197052002,
+            -10.2160062789917,
+            -5.004643440246582,
+            -0.19602319598197937,
+            -3.375545024871826,
+            -2.669325590133667,
+            -1.3932737112045288,
+            -1.6410658359527588,
+            -6.847603797912598,
+            -6.744344711303711,
+            -0.5215591192245483,
+            -0.25840020179748535,
+            -1.1448237895965576,
+            -5.57253885269165,
+            -7.251138687133789,
+            -4.221924781799316,
+            -0.7688062787055969,
+            -2.504502534866333,
+            -3.146519660949707,
+            -2.206653356552124,
+            -1.4295082092285156,
+            -7.96943998336792,
+            -4.332189083099365,
+            -2.5750505924224854,
+            -1.7102608680725098,
+            -5.311381816864014,
+            -8.897522926330566,
+            -2.994919538497925,
+            -3.3397974967956543,
+            -2.1794328689575195,
+            -2.437566041946411,
+            -0.3181810975074768,
+            -0.27412793040275574,
+            -0.7914466857910156,
+            -2.3470635414123535,
+            -2.4099245071411133,
+            -2.491870880126953,
+            -3.024170160293579,
+            -1.9719040393829346,
+            -11.373910903930664,
+            -1.4279751777648926,
+            -0.14573107659816742,
+            -2.055763006210327,
+            -6.366893291473389,
+            -4.24091911315918,
+            -0.00709194503724575,
+            -2.0199716091156006,
+            -2.524750232696533,
+            -1.4272525310516357,
+            -0.5185190439224243,
+            -2.927150011062622,
+            -2.7070627212524414,
+            -3.365638017654419,
+            -4.318085193634033,
+            -7.773144721984863,
+            -1.7947180271148682,
+            -7.657534599304199,
+            -8.767786026000977,
+            -14.74280071258545,
+            -1.8042558431625366,
+            -3.2712037563323975,
+            -1.4002125263214111,
+            -4.887944221496582,
+            -1.4821010828018188,
+            -1.5255622863769531,
+            -5.879070281982422,
+            -4.463839530944824,
+            -5.1955976486206055,
+            -5.665647506713867,
+            -0.3775045573711395,
+            -5.9350481033325195,
+            -2.800539255142212,
+            -0.13162286579608917,
+            -3.034379720687866,
+            -4.729524612426758,
+            -4.6252641677856445,
+            -3.850942611694336,
+            -2.4760568141937256,
+            -6.059760093688965,
+            -10.12075138092041,
+            -0.9469369649887085,
+            -11.595907211303711,
+            -6.875324726104736,
+            -4.268826007843018,
+            -2.835529088973999,
+            -3.8626279830932617,
+            -4.876199245452881,
+            -0.013071090914309025,
+            -4.964417934417725,
+            -0.7445687055587769,
+            -5.707155227661133,
+            -6.10660457611084,
+            -4.317755699157715,
+            -4.440443992614746,
+            -2.9202542304992676,
+            -4.743522644042969,
+            -1.2569392919540405,
+            -2.8675737380981445,
+            -2.3151841163635254,
+            -4.318130970001221,
+            -1.9054772853851318,
+            -1.1808521747589111,
+            -0.765956461429596,
+            -2.768916606903076,
+            -6.237791061401367,
+            -1.7224305868148804,
+            -7.137521743774414,
+            -4.512486457824707,
+            -1.9069950580596924,
+            -4.145983695983887,
+            -5.365190505981445,
+            -0.059828490018844604,
+            -2.273892879486084,
+            -3.4013004302978516,
+            -5.035730361938477,
+            -6.501443386077881,
+            -9.903446197509766,
+            -1.6332892179489136,
+            -2.1572084426879883,
+            -1.6149548292160034,
+            -1.4698481559753418,
+            -6.01010799407959,
+            -2.2243528366088867,
+            -6.900836944580078,
+            -6.0930986404418945,
+            -2.974020481109619,
+            -3.225423574447632,
+            -8.423272132873535,
+            -1.3423724174499512,
+            -3.626147508621216,
+            -0.4862469434738159,
+            -6.860866546630859,
+            -3.8910953998565674,
+            -2.33319354057312,
+            -1.7229185104370117,
+            -2.215972423553467,
+            -8.99046516418457,
+            -4.099084854125977,
+            -2.4191012382507324,
+            -8.288970947265625,
+            -2.9641928672790527,
+            -1.5036451816558838,
+            -3.0544614791870117,
+            -0.0715634673833847,
+            -2.444031238555908,
+            -4.520998954772949,
+            -3.972568988800049,
+            -0.4985870122909546,
+            -2.1651363372802734,
+            -3.4427435398101807,
+            -1.730639100074768,
+            -0.9458961486816406,
+            -7.740211009979248,
+            -9.39163875579834,
+            -3.895984172821045,
+            -1.7523534297943115,
+            -5.41331672668457,
+            -8.910720825195312,
+            -12.971094131469727,
+            -3.0455880165100098,
+            -10.501265525817871,
+            -3.3864927291870117,
+            -4.842309951782227,
+            -3.9964733123779297,
+            -7.3046793937683105,
+            -2.6607093811035156,
+            -1.3541781902313232,
+            -5.003270626068115,
+            -3.944551944732666,
+            -0.11356143653392792,
+            -5.174440383911133,
+            -9.628616333007812,
+            -8.654989242553711,
+            -8.980416297912598,
+            -6.670101642608643,
+            -5.488286018371582,
+            -5.943419933319092,
+            -2.126483201980591,
+            -8.054739952087402,
+            -7.458671569824219,
+            -2.5267202854156494,
+            -6.455472946166992,
+            -8.655346870422363,
+            -7.903901100158691,
+            -6.221062660217285,
+            -7.129237174987793,
+            -4.2345380783081055,
+            -2.5375306606292725,
+            -7.697700500488281,
+            -1.567080020904541,
+            -2.084331750869751,
+            -0.25020831823349,
+            -1.5145041942596436,
+            -4.619244575500488,
+            -0.2970108985900879,
+            -0.4977554678916931,
+            -6.197869300842285,
+            -4.030620098114014,
+            -7.232107639312744,
+            -0.21076253056526184,
+            -1.563366174697876,
+            -1.133756160736084,
+            -2.708237648010254,
+            -4.080535888671875,
+            -0.6818401217460632,
+            -0.1864331066608429,
+            -0.49012088775634766,
+            -8.732468605041504,
+            -11.945040702819824,
+            -5.243098735809326,
+            -1.5294703245162964,
+            -0.8935543298721313,
+            -0.6174070835113525,
+            -1.5068217515945435,
+            -3.5766501426696777,
+            -5.393096923828125,
+            -4.202867031097412,
+            -14.765748023986816,
+            -5.2513813972473145,
+            -0.7597705721855164,
+            -0.2502063810825348,
+            -1.7403976917266846,
+            -2.8000779151916504,
+            -1.9808133840560913,
+            -2.1654744148254395,
+            -1.8629226684570312,
+            -3.222038745880127,
+            -0.040942225605249405,
+            -2.3384013175964355,
+            -10.210381507873535,
+            -4.5859761238098145,
+            -0.5805734395980835,
+            -3.7019288539886475,
+            -2.001936674118042,
+            -2.7876083850860596,
+            -2.9799084663391113,
+            -4.349887371063232,
+            -0.0792960673570633,
+            -1.4366114139556885,
+            -1.0813264846801758,
+            -1.3510822057724,
+            -6.7060699462890625,
+            -5.436615943908691,
+            -3.978389263153076,
+            -6.785447597503662,
+            -6.147171497344971,
+            -3.97414231300354,
+            -4.332991600036621,
+            -0.9269428253173828,
+            -5.1237101554870605,
+            -4.486598968505859,
+            -0.04678357392549515,
+            -1.0307552814483643,
+            -1.4249452352523804,
+            -4.517682075500488,
+            -3.561821699142456,
+            -2.0815205574035645,
+            -0.6041194200515747,
+            -5.992964744567871,
+            -7.092092514038086,
+            -0.48916709423065186,
+            -2.6405677795410156,
+            -4.3345723152160645,
+            -3.533582925796509,
+            -3.1233346462249756,
+            -3.107872486114502,
+            -1.9901115894317627,
+            -3.1052846908569336,
+            -1.8440347909927368,
+            -6.21368408203125,
+            -1.8796799182891846,
+            -2.705214738845825,
+            -0.2987763583660126,
+            -4.070865154266357,
+            -1.6675832271575928,
+            -1.3896636962890625,
+            -1.5731089115142822,
+            -3.526170015335083,
+            -2.5088443756103516,
+            -1.208929419517517,
+            -3.673125743865967,
+            -2.501532554626465,
+            -6.875064373016357,
+            -8.512459754943848,
+            -1.042314052581787,
+            -3.657850980758667,
+            -7.0950798988342285,
+            -4.974049091339111,
+            -8.14085578918457,
+            -3.529888153076172,
+            -1.9389504194259644,
+            -7.0902204513549805,
+            -2.409292459487915,
+            -2.9428021907806396,
+            -1.688283085823059,
+            -3.622368335723877,
+            -2.0903351306915283,
+            -4.160663604736328,
+            -3.1683764457702637,
+            -1.2135626077651978,
+            -7.566033363342285,
+            -3.1186251640319824,
+            -5.899919509887695,
+            -0.9518840312957764,
+            -2.656729221343994,
+            -2.2994377613067627,
+            -6.806836128234863,
+            -1.280236840248108,
+            -2.838846206665039,
+            -1.3598848581314087,
+            -11.707776069641113,
+            -3.134333372116089,
+            -0.6230669617652893,
+            -8.219222068786621,
+            -7.562507152557373,
+            -7.489459037780762,
+            -1.5368008613586426,
+            -7.149652481079102,
+            -5.749268054962158,
+            -3.162869691848755,
+            -2.7235195636749268,
+            -6.128931999206543,
+            -1.1934199333190918,
+            -3.986410617828369,
+            -3.76609468460083,
+            -1.712721586227417,
+            -3.195504903793335,
+            -8.397743225097656,
+            -3.1260581016540527,
+            -9.792022705078125,
+            -4.217884540557861,
+            -11.583260536193848,
+            -5.987588882446289,
+            -5.178754806518555,
+            -6.994749069213867,
+            -5.167606353759766,
+            -7.124668121337891,
+            -6.201416015625,
+            -10.203682899475098,
+            -6.858526229858398,
+            -2.733592987060547,
+            -5.078882217407227,
+            -9.003358840942383,
+            -4.704894542694092,
+            -3.9085562229156494,
+            -7.247268199920654,
+            -7.091092109680176,
+            -4.4150166511535645,
+            -7.56699275970459,
+            -9.485116004943848,
+            -1.9977033138275146,
+            -6.65272331237793,
+            -2.236643075942993,
+            -7.518955707550049,
+            -5.525973320007324,
+            -4.67877721786499,
+            -6.608670234680176,
+            -5.536133766174316,
+            -10.772479057312012,
+            -10.8853178024292,
+            -3.6156129837036133,
+            -6.751470565795898,
+            -6.4537434577941895,
+            -3.4220399856567383,
+            -8.251005172729492,
+            -3.2146153450012207,
+            -6.330069541931152,
+            -1.5551663637161255,
+            -6.520583629608154,
+            -10.450878143310547,
+            -5.8788957595825195,
+            -3.7398200035095215,
+            -3.9084208011627197,
+            -0.3640081584453583,
+            -6.961522102355957,
+            -6.066243648529053,
+            -7.270624160766602,
+            -5.098455429077148,
+            -2.7642822265625,
+            -5.460171699523926,
+            -7.362828731536865,
+            -2.558631658554077,
+            -2.186410427093506,
+            -2.5309929847717285,
+            -2.46756649017334,
+            -2.0306026935577393,
+            -1.8713470697402954,
+            -2.108008623123169,
+            -1.2698389291763306,
+            -2.1712756156921387,
+            -2.4432802200317383,
+            -1.1477653980255127,
+            -1.8417484760284424,
+            -2.5971946716308594,
+            -1.8250831365585327,
+            -2.103092670440674,
+            -2.5183165073394775,
+            -2.9367291927337646,
+            -1.9412965774536133,
+            -1.7692793607711792,
+            -2.864521026611328,
+            -3.1332175731658936,
+            -1.098311185836792,
+            -2.946441173553467,
+            -2.2800471782684326,
+            -3.1929852962493896,
+            -2.754260778427124,
+            -3.485616445541382,
+            -3.3010287284851074,
+            -2.5537776947021484,
+            -2.6752865314483643,
+            -3.1617612838745117,
+            -2.4571690559387207,
+            -2.060081958770752,
+            -2.425969362258911,
+            -2.212725877761841,
+            -2.4232254028320312,
+            -3.0587053298950195,
+            -2.4074010848999023,
+            -2.457937479019165,
+            -2.319617986679077,
+            -2.6340954303741455,
+            -2.599524736404419,
+            -2.5302212238311768,
+            -1.6849274635314941,
+            -2.2609786987304688,
+            -2.039928674697876,
+            -1.9474098682403564,
+            -2.3550753593444824,
+            -1.718749761581421,
+            -2.413884162902832,
+            -1.6247628927230835,
+            -2.4784040451049805,
+            -1.828325629234314,
+            -1.3880831003189087,
+            -1.4448199272155762,
+            -1.1477117538452148,
+            -1.1669728755950928,
+            -1.8787822723388672,
+            -1.5565840005874634,
+            -1.6666553020477295,
+            -1.747725248336792,
+            -1.959598422050476,
+            -2.0376486778259277,
+            -2.345367431640625,
+            -2.055098533630371,
+            -1.3940613269805908,
+            -3.4385242462158203,
+            -2.7489635944366455,
+            -3.2590157985687256,
+            -3.1128957271575928,
+            -1.7070379257202148,
+            -3.9010369777679443,
+            -3.21574068069458,
+            -3.3850393295288086,
+            -1.8778185844421387,
+            -2.698211908340454,
+            -1.8060741424560547,
+            -2.0845324993133545,
+            -3.4797585010528564,
+            -2.263254404067993,
+            -3.083108901977539,
+            -1.6589758396148682,
+            -2.687279224395752,
+            -1.77505624294281,
+            -2.6142921447753906,
+            -1.934045672416687,
+            -1.8834377527236938,
+            -2.8038980960845947,
+            -1.550542950630188,
+            -3.4054152965545654,
+            -1.724036693572998,
+            -2.3146564960479736,
+            -1.5134503841400146,
+            -2.9289023876190186,
+            -1.5285141468048096,
+            -3.421035051345825,
+            -1.3757282495498657,
+            -3.441431760787964,
+            -1.5286564826965332,
+            -3.4372904300689697,
+            -3.173043966293335,
+            -1.1313854455947876,
+            -1.486415147781372,
+            -2.506413221359253,
+            -3.5165903568267822,
+            -1.4112176895141602,
+            -3.7175354957580566,
+            -2.2170844078063965,
+            -0.704839289188385,
+            -0.6626103520393372,
+            -2.5483946800231934,
+            -0.949668288230896,
+            -3.1339564323425293,
+            -1.3326977491378784,
+            -3.5493476390838623,
+            -0.6785370111465454,
+            -3.236161470413208,
+            -2.1347084045410156,
+            -3.802447557449341,
+            -2.585503339767456,
+            -3.9361765384674072,
+            -2.290905714035034,
+            -3.714280605316162,
+            -1.793616533279419,
+            -3.7252492904663086,
+            -1.450188159942627,
+            -3.11938738822937,
+            -1.25174880027771
+        ]
+    },
+    "32": {
+        "input_prompt": "create a conversational article",
+        "generated_text": " about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article.",
+        "generated_tokens": [
+            2314,
+            1278,
+            17915,
+            1307,
+            1278,
+            9369,
+            1046,
+            1531,
+            9369,
+            2715,
+            1402,
+            2314,
+            1278,
+            17915,
+            1307,
+            1278,
+            9369,
+            1046,
+            1531,
+            9369,
+            2715,
+            1402,
+            2314,
+            1278,
+            17915,
+            1307,
+            1278,
+            9369,
+            1046,
+            1531,
+            9369,
+            2715,
+            1402,
+            2314,
+            1278,
+            17915,
+            1307,
+            1278,
+            9369,
+            1046,
+            1531,
+            9369,
+            2715,
+            1402,
+            2314,
+            1278,
+            17915,
+            1307,
+            1278,
+            9369,
+            1046,
+            1531,
+            9369,
+            2715,
+            1402,
+            2314,
+            1278,
+            17915,
+            1307,
+            1278,
+            9369,
+            1046,
+            1531,
+            9369,
+            2715,
+            1402,
+            2314,
+            1278,
+            17915,
+            1307,
+            1278,
+            9369,
+            1046,
+            1531,
+            9369,
+            2715,
+            1402,
+            2314,
+            1278,
+            17915,
+            1307,
+            1278,
+            9369,
+            1046,
+            1531,
+            9369,
+            2715,
+            1402,
+            2314,
+            1278,
+            17915,
+            1307,
+            1278,
+            9369,
+            1046,
+            1531,
+            9369,
+            2715,
+            1402,
+            2314,
+            1278,
+            17915,
+            1307,
+            1278,
+            9369,
+            1046,
+            1531,
+            9369,
+            2715,
+            1402,
+            2314,
+            1278,
+            17915,
+            1307,
+            1278,
+            9369,
+            1046,
+            1531,
+            9369,
+            2715,
+            1402,
+            2314,
+            1278,
+            17915,
+            1307,
+            1278,
+            9369,
+            1046
+        ],
+        "latency": 49.05716586112976,
+        "cuda_graph_request_count_map": null,
+        "step_count": 6144,
+        "logprobs": [
+            -4.4165568351745605,
+            -11.358176231384277,
+            -0.0701780766248703,
+            -7.797665119171143,
+            -2.6805992126464844,
+            -1.4707680940628052,
+            -3.0390255451202393,
+            -1.6902849674224854,
+            -1.270594835281372,
+            -1.1936196088790894,
+            -1.2523558139801025,
+            -2.7270259857177734,
+            -1.2371309995651245,
+            -0.9618493318557739,
+            -0.4379909038543701,
+            -1.3917063474655151,
+            -1.1055524349212646,
+            -0.9122569561004639,
+            -0.9911308288574219,
+            -0.08436793833971024,
+            -0.5424078106880188,
+            -0.9181017279624939,
+            -0.5873759388923645,
+            -0.19014373421669006,
+            -0.06655456870794296,
+            -0.15252672135829926,
+            -0.09415211528539658,
+            -0.009787309914827347,
+            -0.013910251669585705,
+            -0.005296128336340189,
+            -0.005677408073097467,
+            -0.02013739012181759,
+            -0.21594694256782532,
+            -0.07153760641813278,
+            -0.0066444179974496365,
+            -0.010198505595326424,
+            -0.011980246752500534,
+            -0.003686776151880622,
+            -0.0037619550712406635,
+            -0.0022467151284217834,
+            -0.004088377580046654,
+            -0.021828632801771164,
+            -0.0012669878778979182,
+            -0.09768074005842209,
+            -0.02652405947446823,
+            -0.0019286142196506262,
+            -0.002283824374899268,
+            -0.0032225127797573805,
+            -0.0009741804678924382,
+            -0.0009415484382770956,
+            -0.001211624126881361,
+            -0.001135300612077117,
+            -0.002340436913073063,
+            -0.0010846928926184773,
+            -0.0509282611310482,
+            -0.03832047060132027,
+            -0.00257422705180943,
+            -0.0022806129418313503,
+            -0.00262785074301064,
+            -0.0008195855189114809,
+            -0.0010239601833745837,
+            -0.0013777059502899647,
+            -0.0009899006690829992,
+            -0.0018756669014692307,
+            -0.0015304292319342494,
+            -0.08506463468074799,
+            -0.01893703266978264,
+            -0.0013797297142446041,
+            -0.0014461545506492257,
+            -0.0013971101725474,
+            -0.0005869334563612938,
+            -0.0005212855176068842,
+            -0.000876757490914315,
+            -0.0005256939912214875,
+            -0.0012863941956311464,
+            -0.0015691122971475124,
+            -0.051276568323373795,
+            -0.00973513163626194,
+            -0.0010469438275322318,
+            -0.0011531615164130926,
+            -0.0009969270322471857,
+            -0.00038342276820912957,
+            -0.0004032037395518273,
+            -0.000730247818864882,
+            -0.0003275334893260151,
+            -0.0008700875914655626,
+            -0.0017572689102962613,
+            -0.044393111020326614,
+            -0.013102858327329159,
+            -0.0011463745031505823,
+            -0.0012070996453985572,
+            -0.0012325793504714966,
+            -0.0005048430757597089,
+            -0.0004876854654867202,
+            -0.0007901645149104297,
+            -0.00041500062798149884,
+            -0.0009869233472272754,
+            -0.0018687656847760081,
+            -0.03943866863846779,
+            -0.014425630681216717,
+            -0.0014756753807887435,
+            -0.001423775334842503,
+            -0.001209719106554985,
+            -0.0005046047735959291,
+            -0.00042167355422861874,
+            -0.0007688426994718611,
+            -0.0002699726028367877,
+            -0.0006598440813831985,
+            -0.0017849955474957824,
+            -0.038999658077955246,
+            -0.012665312737226486,
+            -0.0014427024871110916,
+            -0.0014492495683953166,
+            -0.001016576774418354,
+            -0.00042083943844772875,
+            -0.00033241944038309157,
+            -0.0006403064471669495,
+            -0.00022373080719262362,
+            -0.0007053509471006691,
+            -0.0018597226589918137,
+            -0.030997740104794502,
+            -0.011259939521551132,
+            -0.0012655591126531363,
+            -0.00134151556994766,
+            -0.0008106521563604474,
+            -0.0003513672563713044,
+            -0.0002964295563288033,
+            -0.0006368515896610916,
+            -0.00020180096908006817,
+            -0.0005779979983344674,
+            -0.0016014858847483993,
+            -0.0271126888692379
+        ]
+    },
+    "64": {
+        "input_prompt": "App Concept: \"Eggy's Interactive Adventure World\"\n\nEggy's Interactive Adventure World is a cutting-edge app designed for phones and tablets that combines immersive storytelling with engaging, interactive experiences for both parents and children. This innovative app allows users to explore the world of Eggy and participate in a series of adventures that teach essential life skills, foster personal growth, and strengthen parent-child bonds.\n\nKey Features:\n\n1. Interactive Stories: The app brings the 20 Eggy stories to life through interactive, animated storytelling. Users can read, listen, and actively participate in the stories, making choices that guide Eggy through various adventures, each with unique lessons and experiences.\n2. Augmented Reality (AR) Integration: The app utilizes AR technology, allowing users to interact with Eggy and the story characters in their real-world environment. This immersive experience creates a deeper connection with the stories and encourages engagement.\n3. Personalized Adventures: The app customizes each story based on the user's interests, needs, and personal growth areas. By collecting user input, the app tailors the storylines and learning outcomes to ensure that each adventure resonates with the child's unique experiences.\n4. Parent-Child Cooperative Play: The app encourages parents and children to collaborate and engage in the stories together. Through a split-screen mode or multi-device connectivity, parents and children can actively participate in the stories and learn essential life skills together.\n5. Skill-Building Mini-Games: Interspersed throughout the stories are educational mini-games that reinforce the lessons and themes. These games are designed to be fun, engaging, and accessible to children, encouraging them to practice and develop various skills.\n6. Progress Tracking and Rewards: The app tracks the user's progress through the stories and rewards them with badges, trophies, and other collectibles. This gamification aspect encourages continued engagement and fosters a sense of accomplishment and growth.\n7. Community Features: The app provides a safe, moderated online community where parents can share their experiences, discuss the stories and lessons, and connect with other families who are also using the app. This community fosters support, inspiration, and learning.\n8. Expert Resources: The app includes access to resources from experts in child development, psychology, and education, providing parents with additional tools and guidance on fostering the growth and well-being of their children.\n\nEggy's Interactive Adventure World is a one-of-a-kind, mind-blowing app that revolutionizes the way parents and children engage with stories and learn essential life skills. Combining the power of storytelling, cutting-edge technology, and expert resources, this app is a must-have for families looking to strengthen their bonds and support their children's personal growth.",
+        "generated_text": "</s>The 1999\u20132000 season was the 10th season of the National Hockey League (NHL). The 1999\u20132000 season was the 10th season of the National Hockey League (NHL). The 1999\u20132000 season was the 10th season of the National Hockey League (NHL). The 1999\u20132000 season was the 10th season was the 10th season was the 10 season was the 10 season was the 10",
+        "generated_tokens": [
+            2,
+            1784,
+            1032,
+            1049,
+            1057,
+            1057,
+            1057,
+            1882,
+            1050,
+            1048,
+            1048,
+            1048,
+            5526,
+            1486,
+            1278,
+            1032,
+            1049,
+            1048,
+            1411,
+            5526,
+            1307,
+            1278,
+            5805,
+            46982,
+            8525,
+            1319,
+            19644,
+            1076,
+            1577,
+            1531,
+            1032,
+            1049,
+            1057,
+            1057,
+            1057,
+            1882,
+            1050,
+            1048,
+            1048,
+            1048,
+            5526,
+            1486,
+            1278,
+            1032,
+            1049,
+            1048,
+            1411,
+            5526,
+            1307,
+            1278,
+            5805,
+            46982,
+            8525,
+            1319,
+            19644,
+            1076,
+            1577,
+            1531,
+            1032,
+            1049,
+            1057,
+            1057,
+            1057,
+            1882,
+            1050,
+            1048,
+            1048,
+            1048,
+            5526,
+            1486,
+            1278,
+            1032,
+            1049,
+            1048,
+            1411,
+            5526,
+            1307,
+            1278,
+            5805,
+            46982,
+            8525,
+            1319,
+            19644,
+            1076,
+            1577,
+            1531,
+            1032,
+            1049,
+            1057,
+            1057,
+            1057,
+            1882,
+            1050,
+            1048,
+            1048,
+            1048,
+            5526,
+            1486,
+            1278,
+            1032,
+            1049,
+            1048,
+            1411,
+            5526,
+            1486,
+            1278,
+            1032,
+            1049,
+            1048,
+            1411,
+            5526,
+            1486,
+            1278,
+            1032,
+            1049,
+            1048,
+            5526,
+            1486,
+            1278,
+            1032,
+            1049,
+            1048,
+            5526,
+            1486,
+            1278,
+            1032,
+            1049,
+            1048
+        ],
+        "latency": 87.92628955841064,
+        "cuda_graph_request_count_map": null,
+        "step_count": 6144,
+        "logprobs": [
+            -12.107745170593262,
+            -2.9727728366851807,
+            -3.720092535018921,
+            -5.592433929443359,
+            -10.964235305786133,
+            -3.654498338699341,
+            -9.33439826965332,
+            -4.833785057067871,
+            -5.187321662902832,
+            -2.6944785118103027,
+            -6.9262237548828125,
+            -0.654232919216156,
+            -0.5550781488418579,
+            -0.21346639096736908,
+            -0.0134271876886487,
+            -0.010840100236237049,
+            -1.3878544569015503,
+            -0.6296291351318359,
+            -7.9766011238098145,
+            -0.4393192231655121,
+            -5.639142036437988,
+            -3.277270793914795,
+            -1.0206468105316162,
+            -11.703084945678711,
+            -0.7100943922996521,
+            -0.2809169888496399,
+            -2.771284818649292,
+            -7.190817832946777,
+            -4.048691749572754,
+            -0.012056218460202217,
+            -3.3802318572998047,
+            -0.6807184815406799,
+            -3.4844107627868652,
+            -3.312331199645996,
+            -0.5001641511917114,
+            -2.61255145072937,
+            -4.243694305419922,
+            -4.333778381347656,
+            -6.0625810623168945,
+            -0.011777156963944435,
+            -0.37577226758003235,
+            -0.9490834474563599,
+            -3.5450198650360107,
+            -2.1778035163879395,
+            -0.45957911014556885,
+            -3.00771164894104,
+            -1.7600425481796265,
+            -0.09766030311584473,
+            -2.467618942260742,
+            -1.329679012298584,
+            -0.8384320735931396,
+            -1.1864604949951172,
+            -3.628342866897583,
+            -0.2470003068447113,
+            -1.8938640356063843,
+            -5.168431282043457,
+            -0.05005566030740738,
+            -2.258014678955078,
+            -2.449028968811035,
+            -0.0034086955711245537,
+            -3.9485883712768555,
+            -1.6201664209365845,
+            -5.139942646026611,
+            -4.859354496002197,
+            -0.23686674237251282,
+            -0.5541543364524841,
+            -2.5826025009155273,
+            -6.114635467529297,
+            -4.3380208015441895,
+            -0.7412900924682617,
+            -0.3221715986728668,
+            -0.13805493712425232,
+            -4.1797332763671875,
+            -7.3456268310546875,
+            -0.13762745261192322,
+            -2.0905232429504395,
+            -1.0178627967834473,
+            -4.108260631561279,
+            -0.6007124185562134,
+            -1.0410642623901367,
+            -4.122039794921875,
+            -0.35905471444129944,
+            -1.4274661540985107,
+            -4.139932155609131,
+            -0.4237431585788727,
+            -1.6294409036636353,
+            -0.9811424016952515,
+            -4.132790565490723,
+            -1.1318120956420898,
+            -6.8258256912231445,
+            -1.5455098152160645,
+            -0.6984409093856812,
+            -13.664215087890625,
+            -0.1166313961148262,
+            -1.6347849369049072,
+            -0.28875046968460083,
+            -0.03130083531141281,
+            -1.5293006896972656,
+            -1.6488375663757324,
+            -4.224111557006836,
+            -4.760683059692383,
+            -1.9758747816085815,
+            -1.5828256607055664,
+            -2.8463857173919678,
+            -0.2620386481285095,
+            -1.7243889570236206,
+            -1.7945923805236816,
+            -0.8884308338165283,
+            -0.3766394555568695,
+            -0.34033581614494324,
+            -9.05566692352295,
+            -0.22754782438278198,
+            -0.033802058547735214,
+            -0.34108465909957886,
+            -0.5644669532775879,
+            -2.0925779342651367,
+            -4.547505855560303,
+            -10.870464324951172,
+            -1.1072022914886475,
+            -5.503787994384766,
+            -3.259672164916992,
+            -0.007964519783854485,
+            -3.0111639499664307,
+            -4.246737480163574,
+            -0.7813188433647156,
+            -3.331031322479248,
+            -4.485962867736816,
+            -0.9492117166519165,
+            -2.6757047176361084,
+            -1.1591349840164185,
+            -1.122117519378662,
+            -2.629878044128418,
+            -5.986321926116943,
+            -0.2146703153848648,
+            -0.002392764901742339,
+            -7.372479438781738,
+            -0.007077385671436787,
+            -0.06599216908216476,
+            -0.0970711037516594,
+            -3.2874932289123535,
+            -0.0019583588000386953,
+            -0.9122000336647034,
+            -4.930907249450684,
+            -0.019508399069309235,
+            -0.308611661195755,
+            -0.07778516411781311,
+            -3.8497893810272217,
+            -0.46124517917633057,
+            -0.38821348547935486,
+            -2.668412208557129,
+            -1.845987319946289,
+            -0.06470083445310593,
+            -0.006619549356400967,
+            -1.2610487937927246,
+            -0.13015533983707428,
+            -3.365312099456787,
+            -0.0014690094394609332,
+            -1.6789823770523071,
+            -1.2499005794525146,
+            -3.3992111682891846,
+            -5.563300132751465,
+            -0.823418140411377,
+            -4.24124813079834,
+            -1.6597849130630493,
+            -0.6941139698028564,
+            -1.5637556314468384,
+            -0.5482053756713867,
+            -0.9507225751876831,
+            -3.764758586883545,
+            -0.0006518622976727784,
+            -0.7540555000305176,
+            -5.058262825012207,
+            -0.3302401602268219,
+            -2.8130555152893066,
+            -0.17079885303974152,
+            -2.871047019958496,
+            -0.3991694450378418,
+            -3.1476998329162598,
+            -0.3488404452800751,
+            -2.0545666217803955,
+            -4.201597690582275,
+            -5.164614677429199,
+            -0.0271432027220726,
+            -0.0009785869624465704,
+            -3.3444161415100098,
+            -1.3117046356201172,
+            -6.375423431396484,
+            -0.05535568296909332,
+            -0.3919340968132019,
+            -0.060594215989112854,
+            -6.507473468780518,
+            -0.0023910999298095703,
+            -2.143423318862915,
+            -3.335618257522583,
+            -2.953970432281494,
+            -0.0013383012264966965,
+            -0.8080525398254395,
+            -0.29526084661483765,
+            -0.04036511853337288,
+            -3.231475353240967,
+            -1.0585589408874512,
+            -6.136373043060303,
+            -0.006182829383760691,
+            -0.035548023879528046,
+            -5.509808540344238,
+            -1.8490750789642334,
+            -9.83314037322998,
+            -0.07037576287984848,
+            -3.1621387004852295,
+            -6.762360095977783,
+            -1.3490527868270874,
+            -3.601043462753296,
+            -1.176393985748291,
+            -0.4342959523200989,
+            -0.06266004592180252,
+            -5.464046001434326,
+            -0.017946599051356316,
+            -1.0416009426116943,
+            -1.6117159128189087,
+            -12.289417266845703,
+            -1.5004339218139648,
+            -5.76563835144043,
+            -4.038386821746826,
+            -0.20812086760997772,
+            -3.6306562423706055,
+            -1.3901070356369019,
+            -1.087137222290039,
+            -2.423213243484497,
+            -4.503086090087891,
+            -0.0008031480247154832,
+            -0.03627370297908783,
+            -0.1653430461883545,
+            -7.958648681640625,
+            -1.1018548011779785,
+            -1.290948748588562,
+            -3.8049263954162598,
+            -1.8253734111785889,
+            -0.059022851288318634,
+            -0.0013984196120873094,
+            -4.698851585388184,
+            -2.5421664714813232,
+            -0.024493809789419174,
+            -4.828659534454346,
+            -3.0295286178588867,
+            -3.550312042236328,
+            -0.1185273677110672,
+            -0.22595760226249695,
+            -0.10782183706760406,
+            -1.4033282995224,
+            -0.4485701024532318,
+            -0.2889708876609802,
+            -0.05471855774521828,
+            -0.007632025051862001,
+            -2.1156554222106934,
+            -0.6249589323997498,
+            -4.198577404022217,
+            -0.14178156852722168,
+            -4.284021377563477,
+            -2.227515935897827,
+            -3.5022120475769043,
+            -0.19575819373130798,
+            -15.964509963989258,
+            -4.055960655212402,
+            -11.125024795532227,
+            -0.7681724429130554,
+            -3.0436902046203613,
+            -7.030262470245361,
+            -4.376729488372803,
+            -5.476145267486572,
+            -0.4219042658805847,
+            -3.7689766883850098,
+            -0.060010604560375214,
+            -0.8134393692016602,
+            -0.11386934667825699,
+            -0.025473715737462044,
+            -0.09736856073141098,
+            -4.357361793518066,
+            -0.3670865297317505,
+            -0.08063744008541107,
+            -0.1311480849981308,
+            -1.0903867483139038,
+            -1.2705107927322388,
+            -1.5076212882995605,
+            -4.295275688171387,
+            -0.04185756668448448,
+            -0.19810955226421356,
+            -1.9645220041275024,
+            -0.9597910642623901,
+            -0.13429655134677887,
+            -0.002283110748976469,
+            -7.066074371337891,
+            -3.639211654663086,
+            -1.0263917446136475,
+            -8.124760627746582,
+            -1.132537841796875,
+            -0.09160765260457993,
+            -0.08996370434761047,
+            -10.165366172790527,
+            -3.501585006713867,
+            -0.0019847711082547903,
+            -0.05309417471289635,
+            -0.31209683418273926,
+            -0.15089339017868042,
+            -1.23564875125885,
+            -1.2685208320617676,
+            -7.832758903503418,
+            -0.19271136820316315,
+            -0.014305183663964272,
+            -0.0007532381569035351,
+            -0.44688940048217773,
+            -2.6239724159240723,
+            -1.738666296005249,
+            -1.6480977535247803,
+            -0.46753185987472534,
+            -8.656959533691406,
+            -3.79868483543396,
+            -0.9281394481658936,
+            -2.2381181716918945,
+            -1.7654449939727783,
+            -0.4948798418045044,
+            -0.025028761476278305,
+            -1.5435361862182617,
+            -1.6390818357467651,
+            -1.4962153434753418,
+            -0.3425217270851135,
+            -0.013077914714813232,
+            -0.038474079221487045,
+            -5.3364362716674805,
+            -0.42365288734436035,
+            -1.884093999862671,
+            -3.510357618331909,
+            -6.198029518127441,
+            -0.44375038146972656,
+            -0.0008789013954810798,
+            -3.6025230884552,
+            -1.419615626335144,
+            -2.6723289489746094,
+            -5.775190830230713,
+            -1.1380761861801147,
+            -2.6683366298675537,
+            -0.43395891785621643,
+            -0.003145867260172963,
+            -8.63144302368164,
+            -1.646262764930725,
+            -1.732487678527832,
+            -4.561546802520752,
+            -0.5277953147888184,
+            -0.07333153486251831,
+            -0.5624169707298279,
+            -0.12201295047998428,
+            -2.6561455726623535,
+            -1.1071691513061523,
+            -2.6895060539245605,
+            -0.040864069014787674,
+            -0.04126371443271637,
+            -1.8294739723205566,
+            -0.09022177755832672,
+            -0.3154001832008362,
+            -0.46215569972991943,
+            -2.2462844848632812,
+            -0.30149081349372864,
+            -0.52588951587677,
+            -8.288043975830078,
+            -0.0002057340752799064,
+            -0.8021711707115173,
+            -4.4546098709106445,
+            -0.0001565095444675535,
+            -0.0015961299650371075,
+            -0.15216240286827087,
+            -0.3677564561367035,
+            -5.018707275390625,
+            -0.7850045561790466,
+            -1.9582659006118774,
+            -1.0046892166137695,
+            -10.0401029586792,
+            -0.16878114640712738,
+            -5.944240570068359,
+            -1.5523078441619873,
+            -5.7253522872924805,
+            -0.47948503494262695,
+            -0.44009655714035034,
+            -5.671053886413574,
+            -0.003280022880062461,
+            -0.7937742471694946,
+            -0.9639376401901245,
+            -0.00030048147891648114,
+            -1.0747740268707275,
+            -0.8839919567108154,
+            -3.416811466217041,
+            -1.6602673530578613,
+            -0.2706959843635559,
+            -0.0024333172477781773,
+            -4.478696823120117,
+            -6.20179557800293,
+            -0.11359559744596481,
+            -0.202009916305542,
+            -0.022310219705104828,
+            -2.367263078689575,
+            -1.0405994653701782,
+            -5.984308242797852,
+            -2.105138063430786,
+            -9.583202362060547,
+            -0.0004957877099514008,
+            -3.0655455589294434,
+            -0.0669412910938263,
+            -0.8977450728416443,
+            -2.2271294593811035,
+            -2.6617536544799805,
+            -1.8184051513671875,
+            -0.8291114568710327,
+            -0.4864235818386078,
+            -0.7993525862693787,
+            -3.51106858253479,
+            -2.1530935764312744,
+            -0.257144957780838,
+            -1.3934082984924316,
+            -1.3137131929397583,
+            -0.3384077548980713,
+            -0.1697217971086502,
+            -2.353395938873291,
+            -0.03406282886862755,
+            -0.39059701561927795,
+            -3.422821044921875,
+            -1.7117210626602173,
+            -0.7018465399742126,
+            -1.5995906591415405,
+            -3.6218395233154297,
+            -0.12497704476118088,
+            -0.16966234147548676,
+            -0.7313685417175293,
+            -0.4956285357475281,
+            -1.0840849876403809,
+            -5.042126655578613,
+            -0.00031704644788987935,
+            -7.683258056640625,
+            -0.9210801720619202,
+            -4.687852382659912,
+            -0.0028814247343689203,
+            -0.043382611125707626,
+            -4.1948652267456055,
+            -2.66593337059021,
+            -0.06153333932161331,
+            -0.0023110604379326105,
+            -6.729236602783203,
+            -5.777127742767334,
+            -0.08932067453861237,
+            -0.09890018403530121,
+            -0.009886111132800579,
+            -3.1145148277282715,
+            -3.725565195083618,
+            -0.0021998509764671326,
+            -3.9927196502685547,
+            -2.753793239593506,
+            -1.6037236452102661,
+            -0.17461130023002625,
+            -4.804804801940918,
+            -0.2311229705810547,
+            -0.30256444215774536,
+            -2.235363006591797,
+            -0.006614102050662041,
+            -0.34757524728775024,
+            -1.4946835041046143,
+            -1.222062587738037,
+            -3.658839225769043,
+            -1.356170892715454,
+            -0.5371109843254089,
+            -3.7580835819244385,
+            -4.54621696472168,
+            -0.31577637791633606,
+            -3.677156925201416,
+            -2.7181396484375,
+            -7.4674882888793945,
+            -0.00019369633810129017,
+            -2.3798398971557617,
+            -2.5452184677124023,
+            -0.2858496308326721,
+            -4.315659523010254,
+            -0.025835415348410606,
+            -0.000603493710514158,
+            -0.2546294331550598,
+            -0.12032663822174072,
+            -2.006908655166626,
+            -5.990736961364746,
+            -7.146596908569336,
+            -0.23356498777866364,
+            -0.2201036810874939,
+            -0.01235415879637003,
+            -0.011248741298913956,
+            -1.4155778884887695,
+            -0.40242519974708557,
+            -5.877886772155762,
+            -0.7865053415298462,
+            -0.03231288120150566,
+            -0.004864405374974012,
+            -0.0050629740580916405,
+            -2.7049152851104736,
+            -6.822089195251465,
+            -0.39252761006355286,
+            -1.2290617227554321,
+            -0.007630132604390383,
+            -3.485461711883545,
+            -0.47985684871673584,
+            -6.1813530921936035,
+            -0.03757825121283531,
+            -0.37834712862968445,
+            -0.22192610800266266,
+            -1.165318489074707,
+            -0.5220151543617249,
+            -0.1289423257112503,
+            -3.216222047805786,
+            -1.0787583589553833,
+            -3.0716826915740967,
+            -0.6023419499397278,
+            -2.558605194091797,
+            -0.927433431148529,
+            -0.00364841241389513,
+            -0.14910078048706055,
+            -0.7318926453590393,
+            -6.159773826599121,
+            -0.0015301911626011133,
+            -1.8908276557922363,
+            -1.9641315937042236,
+            -0.021651331335306168,
+            -2.1648828983306885,
+            -2.2700207233428955,
+            -7.833290100097656,
+            -0.03397307172417641,
+            -0.8344621658325195,
+            -0.02225659228861332,
+            -0.06639260798692703,
+            -2.3780317306518555,
+            -3.180129051208496,
+            -0.09030630439519882,
+            -2.4138312339782715,
+            -1.3445552587509155,
+            -1.848326325416565,
+            -0.9726964831352234,
+            -2.851792335510254,
+            -0.0630769282579422,
+            -0.0011394681641831994,
+            -0.05843213573098183,
+            -2.6616668701171875,
+            -1.575437068939209,
+            -0.180197611451149,
+            -5.552371501922607,
+            -0.26108410954475403,
+            -2.529611587524414,
+            -0.37780019640922546,
+            -5.141795635223389,
+            -0.5921107530593872,
+            -0.2474975287914276,
+            -0.10687454044818878,
+            -4.891775131225586,
+            -0.25011152029037476,
+            -2.4100728034973145,
+            -1.358667016029358,
+            -2.790961503982544,
+            -3.8654675483703613,
+            -1.0076243877410889,
+            -0.7456949949264526,
+            -1.5575554370880127,
+            -2.05328631401062,
+            -1.6538066864013672,
+            -0.0558217354118824,
+            -0.0001817776501411572,
+            -0.0011643542675301433,
+            -0.038359593600034714,
+            -1.4208931922912598,
+            -0.542127251625061,
+            -0.3162364959716797,
+            -0.3966117799282074,
+            -1.1765563488006592,
+            -1.7920958995819092,
+            -0.18425509333610535,
+            -0.1092008650302887,
+            -0.46676987409591675,
+            -0.24977745115756989,
+            -1.0375996828079224,
+            -0.5268858671188354,
+            -0.008942908607423306,
+            -0.6404479146003723,
+            -0.0033111530356109142,
+            -5.3165931603871286e-05,
+            -0.5154370665550232,
+            -0.39286962151527405,
+            -1.401839256286621,
+            -0.6232213973999023,
+            -0.02168831042945385,
+            -0.004282470792531967,
+            -0.005199837032705545,
+            -0.09748794883489609,
+            -0.040823787450790405,
+            -0.00014852374442853034,
+            -0.0005832401220686734,
+            -0.005303124897181988,
+            -0.6537013053894043,
+            -0.38026049733161926,
+            -0.04189129173755646,
+            -0.010385753586888313,
+            -0.008756335824728012,
+            -0.013362848199903965,
+            -0.000504723924677819,
+            -0.002797620603814721,
+            -0.0014512732159346342,
+            -0.0013321106089279056,
+            -0.010883613489568233,
+            -0.005159396678209305,
+            -0.004701037425547838,
+            -0.01591104455292225,
+            -0.001474246964789927,
+            -1.2278481335670222e-05,
+            -0.010548785328865051,
+            -0.08341525495052338,
+            -0.03858809545636177,
+            -0.056062061339616776,
+            -0.0009532198309898376,
+            -0.0005789510905742645,
+            -0.0008986725588329136,
+            -0.00710969977080822,
+            -0.0006561510381288826,
+            -1.4781842764932662e-05,
+            -5.578839045483619e-05,
+            -0.0006398299592547119,
+            -0.0028786908369511366,
+            -0.0034092895220965147,
+            -0.008268529549241066,
+            -0.006602259818464518,
+            -0.004517706111073494,
+            -0.02233586646616459,
+            -0.0006323245470412076,
+            -0.009195122867822647,
+            -0.0029284947086125612,
+            -0.004457537550479174,
+            -0.017873765900731087,
+            -0.008801711723208427,
+            -0.0036383166443556547,
+            -0.08078611642122269,
+            -0.006347495596855879,
+            -0.0002177716523874551,
+            -0.04688572511076927,
+            -0.2718890309333801,
+            -0.07996802777051926,
+            -0.04674842208623886,
+            -0.009984076954424381,
+            -0.010000954382121563,
+            -0.050126753747463226,
+            -0.5864179730415344,
+            -0.2915390133857727,
+            -0.008090462535619736,
+            -0.032190412282943726,
+            -0.03461571782827377,
+            -0.2785419523715973,
+            -0.05830562859773636,
+            -0.02893950417637825,
+            -0.12241066247224808,
+            -0.02711048536002636,
+            -0.16450686752796173,
+            -0.09852994978427887,
+            -0.2651047706604004,
+            -0.35559725761413574,
+            -0.12606258690357208,
+            -0.32793670892715454,
+            -0.20878805220127106,
+            -0.7587923407554626,
+            -0.4730657637119293,
+            -1.496794581413269,
+            -0.2486363798379898,
+            -0.20723387598991394,
+            -0.1872958242893219,
+            -0.19151091575622559,
+            -0.3350580036640167,
+            -1.3085839748382568,
+            -0.6109102964401245,
+            -0.2947172224521637,
+            -0.37130236625671387,
+            -0.19888469576835632,
+            -0.18297068774700165,
+            -0.9978674054145813,
+            -0.5471905469894409,
+            -0.4379975199699402,
+            -0.407988041639328,
+            -0.17325688898563385,
+            -0.16938896477222443
+        ]
+    },
+    "96": {
+        "input_prompt": "what is the concept of double materiality in sustainability?",
+        "generated_text": " What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double material",
+        "generated_tokens": [
+            5675,
+            1395,
+            1278,
+            7401,
+            1307,
+            5558,
+            4318,
+            1532,
+            1294,
+            60187,
+            1063,
+            5675,
+            1395,
+            1278,
+            7401,
+            1307,
+            5558,
+            4318,
+            1532,
+            1294,
+            60187,
+            1063,
+            5675,
+            1395,
+            1278,
+            7401,
+            1307,
+            5558,
+            4318,
+            1532,
+            1294,
+            60187,
+            1063,
+            5675,
+            1395,
+            1278,
+            7401,
+            1307,
+            5558,
+            4318,
+            1532,
+            1294,
+            60187,
+            1063,
+            5675,
+            1395,
+            1278,
+            7401,
+            1307,
+            5558,
+            4318,
+            1532,
+            1294,
+            60187,
+            1063,
+            5675,
+            1395,
+            1278,
+            7401,
+            1307,
+            5558,
+            4318,
+            1532,
+            1294,
+            60187,
+            1063,
+            5675,
+            1395,
+            1278,
+            7401,
+            1307,
+            5558,
+            4318,
+            1532,
+            1294,
+            60187,
+            1063,
+            5675,
+            1395,
+            1278,
+            7401,
+            1307,
+            5558,
+            4318,
+            1532,
+            1294,
+            60187,
+            1063,
+            5675,
+            1395,
+            1278,
+            7401,
+            1307,
+            5558,
+            4318,
+            1532,
+            1294,
+            60187,
+            1063,
+            5675,
+            1395,
+            1278,
+            7401,
+            1307,
+            5558,
+            4318,
+            1532,
+            1294,
+            60187,
+            1063,
+            5675,
+            1395,
+            1278,
+            7401,
+            1307,
+            5558,
+            4318,
+            1532,
+            1294,
+            60187,
+            1063,
+            5675,
+            1395,
+            1278,
+            7401,
+            1307,
+            5558,
+            4318
+        ],
+        "latency": 126.90091466903687,
+        "cuda_graph_request_count_map": null,
+        "step_count": 6144,
+        "logprobs": [
+            -4.917365074157715,
+            -0.9960631132125854,
+            -7.875392913818359,
+            -0.2993181347846985,
+            -7.760880470275879,
+            -10.308395385742188,
+            -2.1807961463928223,
+            -1.6412583589553833,
+            -9.521512985229492,
+            -1.627489447593689,
+            -1.8410861492156982,
+            -0.9285702705383301,
+            -0.2576955556869507,
+            -0.9641067981719971,
+            -0.02314644306898117,
+            -0.6696561574935913,
+            -0.07035009562969208,
+            -0.004622488282620907,
+            -0.025748632848262787,
+            -0.06276137381792068,
+            -0.17385317385196686,
+            -0.3285445272922516,
+            -0.0592009499669075,
+            -0.007940039038658142,
+            -0.22664028406143188,
+            -0.0017957051750272512,
+            -0.022929180413484573,
+            -0.005733947269618511,
+            -0.0012996093137189746,
+            -0.006419987417757511,
+            -0.02376849390566349,
+            -0.27800270915031433,
+            -0.4650723934173584,
+            -0.04936715215444565,
+            -0.003972141072154045,
+            -0.01477995328605175,
+            -0.0012044801842421293,
+            -0.014891182072460651,
+            -0.002709767082706094,
+            -0.0009939497103914618,
+            -0.0028436246793717146,
+            -0.006759870797395706,
+            -0.15416178107261658,
+            -0.20121537148952484,
+            -0.016414370387792587,
+            -0.0015769677702337503,
+            -0.008138825185596943,
+            -0.0007713441736996174,
+            -0.013819841668009758,
+            -0.003826678032055497,
+            -0.0005918181850574911,
+            -0.0014938872773200274,
+            -0.00485716899856925,
+            -0.081083282828331,
+            -0.09642580896615982,
+            -0.009630884043872356,
+            -0.0010948146227747202,
+            -0.007085552904754877,
+            -0.0006310140597634017,
+            -0.013073914684355259,
+            -0.0039152647368609905,
+            -0.000364713923772797,
+            -0.001292108790948987,
+            -0.004158303141593933,
+            -0.044283974915742874,
+            -0.05722038820385933,
+            -0.006369172595441341,
+            -0.0007976687629707158,
+            -0.005993015132844448,
+            -0.0004935238393954933,
+            -0.011310506612062454,
+            -0.002951553324237466,
+            -0.000387831823900342,
+            -0.000977038755081594,
+            -0.0036971091758459806,
+            -0.030511993914842606,
+            -0.04246694967150688,
+            -0.004863100592046976,
+            -0.0006927236099727452,
+            -0.005206122528761625,
+            -0.0005129451747052372,
+            -0.00894621666520834,
+            -0.0028565814718604088,
+            -0.00041333239641971886,
+            -0.0009002208826132119,
+            -0.0033131728414446115,
+            -0.021188799291849136,
+            -0.03330245241522789,
+            -0.0038543473929166794,
+            -0.0006504327175207436,
+            -0.004474864806979895,
+            -0.00048029806930571795,
+            -0.009718249551951885,
+            -0.0030443770810961723,
+            -0.0003743662964552641,
+            -0.0009439303539693356,
+            -0.003729770192876458,
+            -0.016505014151334763,
+            -0.0290373582392931,
+            -0.003315192647278309,
+            -0.0005821678787469864,
+            -0.004148805979639292,
+            -0.00042489083716645837,
+            -0.006856840569525957,
+            -0.0028660909738391638,
+            -0.00032574593205936253,
+            -0.0006986799417063594,
+            -0.003671098267659545,
+            -0.012792548164725304,
+            -0.02553274855017662,
+            -0.002730690874159336,
+            -0.0005067494930699468,
+            -0.0036923582665622234,
+            -0.0004451475979294628,
+            -0.007225453853607178,
+            -0.002746859099715948,
+            -0.0003323002893012017,
+            -0.0008517451351508498,
+            -0.003630714723840356,
+            -0.011284693144261837,
+            -0.02186688780784607,
+            -0.0025819556321948767,
+            -0.0004931663861498237,
+            -0.0031721293926239014,
+            -0.00040725519647821784,
+            -0.0062383925542235374,
+            -0.00238693761639297,
+            -0.00040749352774582803,
+            -0.0007970731821842492,
+            -0.003604583442211151,
+            -0.010075542144477367,
+            -0.022386692464351654,
+            -0.0025295186787843704,
+            -0.00046302087139338255,
+            -0.003138143103569746,
+            -0.0004233417857903987,
+            -0.006178564392030239,
+            -0.0021037368569523096
+        ]
+    },
+    "127": {
+        "input_prompt": "as before we talk about using , i usually do if else() to in case the property would be null before.\nif today i public a property and i want to use using to instead , will code be like this?\n\npublic Material Mat;\n\nusing(Mat){\n// Do something;\n}\n\nis that right?",
+        "generated_text": " i am not sure if i am using the wrong syntax or if i am using the syntax correctly but i am not sure if i am using the syntax correctly or if i am using the syntax incorrectly.\n\nI am not sure if i am using the wrong syntax or if i am using the syntax correctly but i am not sure if i am using the syntax correctly or if i am using the syntax incorrectly.\n\nI am not sure if i am using the wrong syntax or if i am using the syntax correctly but i am not sure if i am using the syntax correctly or if i am using the syntax incorrectly.\n\nI am not sure if i am using",
+        "generated_tokens": [
+            1623,
+            1855,
+            1605,
+            5257,
+            1693,
+            1623,
+            1855,
+            2505,
+            1278,
+            8462,
+            22692,
+            1505,
+            1693,
+            1623,
+            1855,
+            2505,
+            1278,
+            22692,
+            17047,
+            1809,
+            1623,
+            1855,
+            1605,
+            5257,
+            1693,
+            1623,
+            1855,
+            2505,
+            1278,
+            22692,
+            17047,
+            1505,
+            1693,
+            1623,
+            1855,
+            2505,
+            1278,
+            22692,
+            73751,
+            1338,
+            1073,
+            1855,
+            1605,
+            5257,
+            1693,
+            1623,
+            1855,
+            2505,
+            1278,
+            8462,
+            22692,
+            1505,
+            1693,
+            1623,
+            1855,
+            2505,
+            1278,
+            22692,
+            17047,
+            1809,
+            1623,
+            1855,
+            1605,
+            5257,
+            1693,
+            1623,
+            1855,
+            2505,
+            1278,
+            22692,
+            17047,
+            1505,
+            1693,
+            1623,
+            1855,
+            2505,
+            1278,
+            22692,
+            73751,
+            1338,
+            1073,
+            1855,
+            1605,
+            5257,
+            1693,
+            1623,
+            1855,
+            2505,
+            1278,
+            8462,
+            22692,
+            1505,
+            1693,
+            1623,
+            1855,
+            2505,
+            1278,
+            22692,
+            17047,
+            1809,
+            1623,
+            1855,
+            1605,
+            5257,
+            1693,
+            1623,
+            1855,
+            2505,
+            1278,
+            22692,
+            17047,
+            1505,
+            1693,
+            1623,
+            1855,
+            2505,
+            1278,
+            22692,
+            73751,
+            1338,
+            1073,
+            1855,
+            1605,
+            5257,
+            1693,
+            1623,
+            1855,
+            2505
+        ],
+        "latency": 156.19056010246277,
+        "cuda_graph_request_count_map": null,
+        "step_count": 6144,
+        "logprobs": [
+            -7.793755531311035,
+            -4.224940776824951,
+            -5.446587562561035,
+            -0.267395555973053,
+            -6.118141174316406,
+            -11.105525970458984,
+            -6.67517614364624,
+            -6.902530670166016,
+            -3.2218151092529297,
+            -5.99644660949707,
+            -8.431673049926758,
+            -8.04836368560791,
+            -3.992594003677368,
+            -7.550079345703125,
+            -2.873685836791992,
+            -3.3877997398376465,
+            -7.9306745529174805,
+            -7.005484580993652,
+            -1.0481306314468384,
+            -2.686237335205078,
+            -6.131283760070801,
+            -6.2994704246521,
+            -7.931419372558594,
+            -11.3147554397583,
+            -8.544670104980469,
+            -12.01729679107666,
+            -3.89847469329834,
+            -1.7964364290237427,
+            -2.952878952026367,
+            -1.9217232465744019,
+            -2.272329330444336,
+            -0.37552154064178467,
+            -1.667820692062378,
+            -7.510344505310059,
+            -3.498040199279785,
+            -7.980632305145264,
+            -7.672002792358398,
+            -4.4999470710754395,
+            -7.155375003814697,
+            -2.4486124515533447,
+            -4.785946846008301,
+            -1.153855800628662,
+            -2.3994438648223877,
+            -4.250652313232422,
+            -12.24446964263916,
+            -8.344388008117676,
+            -2.608186721801758,
+            -5.200589179992676,
+            -8.25888442993164,
+            -3.6245617866516113,
+            -7.689338207244873,
+            -7.345355033874512,
+            -1.2661759853363037,
+            -7.265620231628418,
+            -1.9884108304977417,
+            -6.269482612609863,
+            -2.41705584526062,
+            -1.8929681777954102,
+            -1.8259913921356201,
+            -2.0997350215911865,
+            -2.323200225830078,
+            -1.3998825550079346,
+            -0.8789899945259094,
+            -1.082053542137146,
+            -1.1831339597702026,
+            -1.4462857246398926,
+            -1.6481035947799683,
+            -1.4408715963363647,
+            -1.2603964805603027,
+            -1.5267670154571533,
+            -1.6345772743225098,
+            -1.3796477317810059,
+            -0.7609691023826599,
+            -0.3548354506492615,
+            -0.7552334666252136,
+            -0.44776833057403564,
+            -1.1078286170959473,
+            -1.3036658763885498,
+            -0.5214896202087402,
+            -0.8486822843551636,
+            -0.22470997273921967,
+            -0.4705755412578583,
+            -0.5639711022377014,
+            -0.5388108491897583,
+            -0.6052999496459961,
+            -0.1002030223608017,
+            -0.286334365606308,
+            -0.45798981189727783,
+            -1.0107953548431396,
+            -0.11875647306442261,
+            -0.6969441771507263,
+            -0.4609107971191406,
+            -0.07614769786596298,
+            -0.5035472512245178,
+            -0.1682187020778656,
+            -0.10476160794496536,
+            -0.6586751341819763,
+            -0.35806939005851746,
+            -1.5364394187927246,
+            -2.4093759059906006,
+            -1.977368950843811,
+            -1.6216907501220703,
+            -0.27647316455841064,
+            -0.2991848587989807,
+            -0.2783535420894623,
+            -0.05913994088768959,
+            -0.03023873083293438,
+            -0.043339803814888,
+            -0.7320341467857361,
+            -0.0030677898321300745,
+            -0.0332595594227314,
+            -0.012804670259356499,
+            -0.004041599575430155,
+            -0.0014899593079462647,
+            -0.001948602613992989,
+            -0.0029070996679365635,
+            -0.040939707309007645,
+            -0.013942227698862553,
+            -0.04897322878241539,
+            -0.011005887761712074,
+            -0.0044113704934716225,
+            -0.0013179434463381767,
+            -0.003658389439806342,
+            -0.009758152067661285,
+            -0.0014104428701102734,
+            -0.0016671819612383842,
+            -0.000771939754486084,
+            -0.0015519729349762201,
+            -0.003720743814483285,
+            -0.004249115474522114,
+            -0.00485657574608922,
+            -0.005053604021668434,
+            -0.002336274366825819,
+            -0.0009155849111266434,
+            -0.0004978132783435285,
+            -0.0005953923100605607,
+            -0.0011395872570574284,
+            -0.001485078944824636,
+            -0.3072909712791443,
+            -1.7295066118240356,
+            -0.4807289242744446,
+            -0.1245415136218071,
+            -0.011858444660902023,
+            -0.020613837987184525,
+            -0.011020978912711143,
+            -0.003106294432654977,
+            -0.0009966888464987278,
+            -0.0019349202048033476,
+            -0.037407051771879196,
+            -0.0003496989083942026,
+            -0.005922981072217226,
+            -0.007394562941044569,
+            -0.0006037319544702768,
+            -0.0008836655179038644,
+            -0.0002884448622353375,
+            -0.00047600860125385225,
+            -0.0024947968777269125,
+            -0.00442774873226881,
+            -0.004059052560478449,
+            -0.0018594847060739994,
+            -0.0006179092451930046,
+            -0.00022635281493421644,
+            -0.0006730675231665373,
+            -0.003022746881470084,
+            -0.0002343380037928,
+            -0.00047791501856409013,
+            -9.440929716220126e-05,
+            -0.00021550717065110803,
+            -0.0013523490633815527,
+            -0.0032202552538365126,
+            -0.001157686347141862,
+            -0.004449942149221897,
+            -0.0016590891173109412,
+            -0.00101062236353755,
+            -0.0003079893649555743,
+            -0.00048375347978435457,
+            -0.0021734442561864853,
+            -0.00423036003485322,
+            -0.11514264345169067,
+            -0.8658493757247925,
+            -0.084366075694561,
+            -0.02140468917787075,
+            -0.0060798698104918,
+            -0.008638513274490833,
+            -0.003212531330063939,
+            -0.0009598892065696418,
+            -0.00032085992279462516
+        ]
+    },
+    "throughput": [
+        92.14086318169623,
+        104.14077061259405,
+        104.70701879377005
+    ]
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_logitsmatch/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_logitsmatch/model_config.yaml
index 13e56a13c85..96ada2bf1e9 100644
--- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_logitsmatch/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_logitsmatch/model_config.yaml
@@ -18,7 +18,7 @@ MODEL_ARGS:
   --micro-batch-size: 1
   --no-load-optim: true
   --no-use-tokenizer-model-from-checkpoint-args: true
-  --timing-log-level: 2
+  --timing-log-level: 0
   --load: ${CHECKPOINT_LOAD_PATH}/model/mcore_mistral/nemo_minitron-0.5b/v1
   --distributed-backend: nccl
   --log-interval: 1
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_dp8_583m_logitsmatch_zmq/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_dp8_583m_logitsmatch_zmq/model_config.yaml
index b99100f65eb..a4f47d3705f 100644
--- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_dp8_583m_logitsmatch_zmq/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_dp8_583m_logitsmatch_zmq/model_config.yaml
@@ -18,7 +18,7 @@ MODEL_ARGS:
   --micro-batch-size: 1
   --no-load-optim: true
   --no-use-tokenizer-model-from-checkpoint-args: true
-  --timing-log-level: 2
+  --timing-log-level: 0
   --load: ${CHECKPOINT_LOAD_PATH}/model/mcore_mistral/nemo_minitron-0.5b/v1/
   --distributed-backend: nccl
   --log-interval: 1
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_583m_logitsmatch/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_583m_logitsmatch/model_config.yaml
index 7a2cc9b0c78..59186f8d532 100644
--- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_583m_logitsmatch/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_583m_logitsmatch/model_config.yaml
@@ -18,7 +18,7 @@ MODEL_ARGS:
   --micro-batch-size: 1
   --no-load-optim: true
   --no-use-tokenizer-model-from-checkpoint-args: true
-  --timing-log-level: 2
+  --timing-log-level: 0
   --load: ${CHECKPOINT_LOAD_PATH}/model/mcore_mistral/nemo_minitron-0.5b/v1/
   --distributed-backend: nccl
   --log-interval: 1
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_dp1_583m_logitsmatch_zmq/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_dp1_583m_logitsmatch_zmq/model_config.yaml
index 0b31d16af75..612e621534d 100644
--- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_dp1_583m_logitsmatch_zmq/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_dp1_583m_logitsmatch_zmq/model_config.yaml
@@ -18,7 +18,7 @@ MODEL_ARGS:
   --micro-batch-size: 1
   --no-load-optim: true
   --no-use-tokenizer-model-from-checkpoint-args: true
-  --timing-log-level: 2
+  --timing-log-level: 0
   --load: ${CHECKPOINT_LOAD_PATH}/model/mcore_mistral/nemo_minitron-0.5b/v1/
   --distributed-backend: nccl
   --log-interval: 1
diff --git a/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_16b_multiprompt_tokensmatch/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_16b_multiprompt_tokensmatch/model_config.yaml
index 3b10336138d..cb06eae2e7e 100644
--- a/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_16b_multiprompt_tokensmatch/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_16b_multiprompt_tokensmatch/model_config.yaml
@@ -10,7 +10,7 @@ MODEL_ARGS:
   --log-validation-ppl-to-tensorboard: true
   --log-timers-to-tensorboard: true
   --log-memory-to-tensorboard: true
-  --timing-log-level: 2
+  --timing-log-level: 0
   # See the mount paths defined in the top level tests/test_utils/recipes/gpt-static-inference.yaml
   --load: ${CHECKPOINT_LOAD_PATH}/model/deepseek_16b_pyt/dcp/mcore-v1_bf16/checkpoints
   --tokenizer-model: ${CHECKPOINT_LOAD_PATH}/model/deepseek_16b_pyt/dcp/mcore-v1_bf16/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json
diff --git a/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_cudagraphs/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_cudagraphs/model_config.yaml
index 04e6caa3303..c080cd5f5a7 100644
--- a/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_cudagraphs/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_cudagraphs/model_config.yaml
@@ -18,7 +18,7 @@ MODEL_ARGS:
   --micro-batch-size: 1
   --no-load-optim: true
   --no-use-tokenizer-model-from-checkpoint-args: true
-  --timing-log-level: 2
+  --timing-log-level: 0
   --load: ${CHECKPOINT_LOAD_PATH}/model/mcore_mistral/nemo_minitron-0.5b/v1/
   --distributed-backend: nccl
   --log-interval: 1
diff --git a/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_fp8_cudagraphs/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_fp8_cudagraphs/model_config.yaml
index 9aa1a6e1c96..e3a4d695ead 100644
--- a/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_fp8_cudagraphs/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_fp8_cudagraphs/model_config.yaml
@@ -18,7 +18,7 @@ MODEL_ARGS:
   --micro-batch-size: 1
   --no-load-optim: true
   --no-use-tokenizer-model-from-checkpoint-args: true
-  --timing-log-level: 2
+  --timing-log-level: 0
   --load: ${CHECKPOINT_LOAD_PATH}/model/mcore_mistral/nemo_minitron-0.5b/v1/
   --distributed-backend: nccl
   --log-interval: 1
diff --git a/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_logitsmatch/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_logitsmatch/model_config.yaml
index b3564f8226a..90a1836347e 100644
--- a/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_logitsmatch/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_logitsmatch/model_config.yaml
@@ -18,7 +18,7 @@ MODEL_ARGS:
   --micro-batch-size: 1
   --no-load-optim: true
   --no-use-tokenizer-model-from-checkpoint-args: true
-  --timing-log-level: 2
+  --timing-log-level: 0
   --load: ${CHECKPOINT_LOAD_PATH}/model/mcore_mistral/nemo_minitron-0.5b/v1/
   --distributed-backend: nccl
   --log-interval: 1
diff --git a/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp1_cp1_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp1_cp1_dgx_a100_1N8G/model_config.yaml
index 4350c4a6f50..199cf809ba2 100644
--- a/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp1_cp1_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp1_cp1_dgx_a100_1N8G/model_config.yaml
@@ -22,13 +22,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 50
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp4_cp1_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp4_cp1_dgx_a100_1N8G/model_config.yaml
index b571dca2dd0..0983337becc 100644
--- a/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp4_cp1_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp4_cp1_dgx_a100_1N8G/model_config.yaml
@@ -22,13 +22,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 50
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp1_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp1_dgx_a100_1N8G/model_config.yaml
index 941d3f6f829..7f7aac5d78b 100644
--- a/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp1_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp1_dgx_a100_1N8G/model_config.yaml
@@ -22,13 +22,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 50
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp4_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp4_dgx_a100_1N8G/model_config.yaml
index 588cfe3e80a..93418f580fc 100644
--- a/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp4_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp4_dgx_a100_1N8G/model_config.yaml
@@ -22,13 +22,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 50
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/hybrid/hybrid_static_inference_tp1_pp1_2B_cudagraphs/model_config.yaml b/tests/functional_tests/test_cases/hybrid/hybrid_static_inference_tp1_pp1_2B_cudagraphs/model_config.yaml
index 75e4d3123bd..7702274db5f 100644
--- a/tests/functional_tests/test_cases/hybrid/hybrid_static_inference_tp1_pp1_2B_cudagraphs/model_config.yaml
+++ b/tests/functional_tests/test_cases/hybrid/hybrid_static_inference_tp1_pp1_2B_cudagraphs/model_config.yaml
@@ -10,7 +10,7 @@ MODEL_ARGS:
   --log-validation-ppl-to-tensorboard: true
   --log-timers-to-tensorboard: true
   --log-memory-to-tensorboard: true
-  --timing-log-level: 2
+  --timing-log-level: 0
   --load: ${CHECKPOINT_LOAD_PATH}/model/mamba_hybrid_2b/dcp/mcore-v1_bf16/checkpoint
   --tokenizer-model: ${CHECKPOINT_LOAD_PATH}/model/mamba_hybrid_2b/dcp/mcore-v1_bf16/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json
   --tokenizer-type: TikTokenizer
diff --git a/tests/functional_tests/test_cases/hybrid/hybrid_static_inference_tp1_pp1_2B_logitsmatch/model_config.yaml b/tests/functional_tests/test_cases/hybrid/hybrid_static_inference_tp1_pp1_2B_logitsmatch/model_config.yaml
index 301b68e7382..9a7769eb432 100644
--- a/tests/functional_tests/test_cases/hybrid/hybrid_static_inference_tp1_pp1_2B_logitsmatch/model_config.yaml
+++ b/tests/functional_tests/test_cases/hybrid/hybrid_static_inference_tp1_pp1_2B_logitsmatch/model_config.yaml
@@ -10,7 +10,7 @@ MODEL_ARGS:
   --log-validation-ppl-to-tensorboard: true
   --log-timers-to-tensorboard: true
   --log-memory-to-tensorboard: true
-  --timing-log-level: 2
+  --timing-log-level: 0
   --load: ${CHECKPOINT_LOAD_PATH}/model/mamba_hybrid_2b/dcp/mcore-v1_bf16/checkpoint
   --tokenizer-model: ${CHECKPOINT_LOAD_PATH}/model/mamba_hybrid_2b/dcp/mcore-v1_bf16/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json
   --tokenizer-type: TikTokenizer
diff --git a/tests/functional_tests/test_cases/mimo/mimo_vlm_pretrain_convergence_tp1_pp1_cp1_dp8/model_config.yaml b/tests/functional_tests/test_cases/mimo/mimo_vlm_pretrain_convergence_tp1_pp1_cp1_dp8/model_config.yaml
index ced98a352b1..2daf74b89a7 100644
--- a/tests/functional_tests/test_cases/mimo/mimo_vlm_pretrain_convergence_tp1_pp1_cp1_dp8/model_config.yaml
+++ b/tests/functional_tests/test_cases/mimo/mimo_vlm_pretrain_convergence_tp1_pp1_cp1_dp8/model_config.yaml
@@ -19,7 +19,7 @@ MODEL_ARGS:
   --seq-length: 4096
   --max-position-embeddings: 4096
   --train-iters: 100
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 2200
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgxa100_dracooci-ord.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgxa100_dracooci-ord.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgxa100_dracooci-ord.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgxa100_dracooci.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgxa100_dracooci.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgxa100_dracooci.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgxa100_dracooci.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_lts_dgxa100_dracooci-ord.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_lts_dgxa100_dracooci-ord.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_lts_dgxa100_dracooci-ord.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_lts_dgxa100_dracooci.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_lts_dgxa100_dracooci.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_lts_dgxa100_dracooci.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/model_config.yaml
similarity index 91%
rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/model_config.yaml
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/model_config.yaml
index 6bdb19e1001..cdabc4b6225 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 50
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgxa100_dracooci-ord.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgxa100_dracooci-ord.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgxa100_dracooci-ord.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgxa100_dracooci.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgxa100_dracooci.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgxa100_dracooci.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgxa100_dracooci.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_lts_dgxa100_dracooci-ord.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_lts_dgxa100_dracooci-ord.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_lts_dgxa100_dracooci-ord.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_lts_dgxa100_dracooci.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_lts_dgxa100_dracooci.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_lts_dgxa100_dracooci.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/model_config.yaml
similarity index 91%
rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/model_config.yaml
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/model_config.yaml
index 97db543f73c..731ff82d8d4 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 50
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer/model_config.yaml
similarity index 91%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer/model_config.yaml
index 8f4f022345a..f7fd8b2963d 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer/model_config.yaml
@@ -15,13 +15,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 100
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_dist_optimizer/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_dist_optimizer/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_dist_optimizer/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_dist_optimizer/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_dist_optimizer/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_dist_optimizer/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_dist_optimizer/model_config.yaml
similarity index 92%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_dist_optimizer/model_config.yaml
index aa83c79ceb2..61b5c9339ba 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_dist_optimizer/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 100
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_groupedGEMM_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_groupedGEMM/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_groupedGEMM_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_groupedGEMM/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_groupedGEMM_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_groupedGEMM/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_groupedGEMM_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_groupedGEMM/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_groupedGEMM_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_groupedGEMM/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_groupedGEMM_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_groupedGEMM/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_groupedGEMM_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_groupedGEMM/model_config.yaml
similarity index 91%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_groupedGEMM_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_groupedGEMM/model_config.yaml
index 758f7af8f0f..a3995df9627 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_groupedGEMM_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_groupedGEMM/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 100
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer/model_config.yaml
similarity index 92%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer/model_config.yaml
index 2ef041c07af..8672163186c 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 100
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_groupedGEMM_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_groupedGEMM/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_groupedGEMM_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_groupedGEMM/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_groupedGEMM_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_groupedGEMM/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_groupedGEMM_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_groupedGEMM/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_groupedGEMM_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_groupedGEMM/model_config.yaml
similarity index 91%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_groupedGEMM_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_groupedGEMM/model_config.yaml
index 29a63c7d148..4ed0bb89001 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_groupedGEMM_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_groupedGEMM/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 100
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances/model_config.yaml
similarity index 92%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances/model_config.yaml
index a15bbf77196..8e267b178b4 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 100
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_top2router_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_top2router_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_top2router_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_top2router/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_top2router_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_top2router/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_top2router_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_top2router/model_config.yaml
similarity index 91%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_top2router_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_top2router/model_config.yaml
index a7e85122831..9490d832f7d 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_top2router_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_top2router/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 100
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_lts_dgxa100_dracooci-ord.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_lts_dgxa100_dracooci-ord.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_lts_dgxa100_dracooci.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_lts_dgxa100_dracooci.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/model_config.yaml
similarity index 92%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/model_config.yaml
index a5f390a463d..b84bf45b890 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 50
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_dist_optimizer/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_dist_optimizer/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_dist_optimizer/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_dist_optimizer/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_dist_optimizer/model_config.yaml
similarity index 91%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_dist_optimizer/model_config.yaml
index 7ffcd448b37..b5c774d4d3c 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_dist_optimizer/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 50
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM/model_config.yaml
similarity index 92%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM/model_config.yaml
index e7aa73ba6b1..d02951177b0 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 50
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_top2router_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_top2router/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_top2router_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_top2router/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_top2router_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_top2router/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_top2router_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_top2router/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_top2router_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_top2router/model_config.yaml
similarity index 91%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_top2router_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_top2router/model_config.yaml
index 3806ae26529..8c75b0a2e76 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_top2router_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_top2router/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 50
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts_etp1_ep4_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts_etp1_ep4/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts_etp1_ep4_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts_etp1_ep4/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts_etp1_ep4_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts_etp1_ep4/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts_etp1_ep4_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts_etp1_ep4/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts_etp1_ep4_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts_etp1_ep4/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts_etp1_ep4_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts_etp1_ep4/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts_etp1_ep4_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts_etp1_ep4/golden_values_lts_dgxa100_dracooci-ord.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts_etp1_ep4_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts_etp1_ep4/golden_values_lts_dgxa100_dracooci-ord.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts_etp1_ep4_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts_etp1_ep4/golden_values_lts_dgxa100_dracooci.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts_etp1_ep4_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts_etp1_ep4/golden_values_lts_dgxa100_dracooci.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts_etp1_ep4_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts_etp1_ep4/model_config.yaml
similarity index 92%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts_etp1_ep4_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts_etp1_ep4/model_config.yaml
index 4820a43bf3f..978babb72ff 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts_etp1_ep4_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts_etp1_ep4/model_config.yaml
@@ -18,13 +18,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 50
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4/model_config.yaml
similarity index 92%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4/model_config.yaml
index 488b8ad92d2..b6a7c223acc 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4/model_config.yaml
@@ -18,13 +18,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 50
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/model_config.yaml
index 52eb433afd5..4c991767ca3 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/model_config.yaml
@@ -39,8 +39,8 @@ MODEL_ARGS:
   --seq-length: 4096
   --data-cache-path: ${DATA_CACHE_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   # Add network size args
   --num-layers: 16
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/model_config.yaml
similarity index 96%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/model_config.yaml
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/model_config.yaml
index e8c45375110..a1a5219ecb4 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/model_config.yaml
@@ -39,8 +39,8 @@ MODEL_ARGS:
   --seq-length: 4096
   --data-cache-path: ${DATA_CACHE_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   # Add network size args
   --num-layers: 16
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/model_config.yaml
similarity index 96%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/model_config.yaml
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/model_config.yaml
index c7f0bde3e82..bd565830970 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/model_config.yaml
@@ -39,8 +39,8 @@ MODEL_ARGS:
   --seq-length: 4096
   --data-cache-path: ${DATA_CACHE_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   # Add network size args
   --num-layers: 16
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_selective_recompute_experimental/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_selective_recompute_experimental/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_selective_recompute_experimental/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_selective_recompute_experimental/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_selective_recompute_experimental/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_selective_recompute_experimental/model_config.yaml
similarity index 96%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_selective_recompute_experimental/model_config.yaml
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_selective_recompute_experimental/model_config.yaml
index bf1c5a45cc9..efb1fedf93c 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_selective_recompute_experimental/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_selective_recompute_experimental/model_config.yaml
@@ -40,8 +40,8 @@ MODEL_ARGS:
   --seq-length: 4096
   --data-cache-path: ${DATA_CACHE_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   # Add network size args
   --num-layers: 16
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/model_config.yaml
similarity index 92%
rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/model_config.yaml
index e593e94f5ac..3ecd68b9841 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 100
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci-ord.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci-ord.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci-ord.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/model_config.yaml
similarity index 91%
rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/model_config.yaml
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/model_config.yaml
index 45ae64df053..c147b689e71 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 50
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgxa100_dracooci-ord.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgxa100_dracooci-ord.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgxa100_dracooci-ord.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgxa100_dracooci.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgxa100_dracooci.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgxa100_dracooci.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgxa100_dracooci.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/model_config.yaml
similarity index 91%
rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/model_config.yaml
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/model_config.yaml
index bb3f5df251d..f77c2a41f68 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 50
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci-ord.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci-ord.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci-ord.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_lts_dgxa100_dracooci-ord.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_lts_dgxa100_dracooci-ord.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_lts_dgxa100_dracooci-ord.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_lts_dgxa100_dracooci.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_lts_dgxa100_dracooci.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_lts_dgxa100_dracooci.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/model_config.yaml
similarity index 91%
rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/model_config.yaml
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/model_config.yaml
index 5ce2939b05d..12e6698a5f4 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 50
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgxa100_dracooci-ord.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgxa100_dracooci-ord.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgxa100_dracooci-ord.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgxa100_dracooci.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgxa100_dracooci.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgxa100_dracooci.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgxa100_dracooci.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_lts_dgxa100_dracooci-ord.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_lts_dgxa100_dracooci-ord.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_lts_dgxa100_dracooci-ord.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_lts_dgxa100_dracooci.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_lts_dgxa100_dracooci.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_lts_dgxa100_dracooci.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/model_config.yaml
similarity index 91%
rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/model_config.yaml
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/model_config.yaml
index 60652f0ded9..c714e058651 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 50
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_resume_torch_dist_te_4experts2parallel/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_resume_torch_dist_te_4experts2parallel/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_resume_torch_dist_te_4experts2parallel/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_resume_torch_dist_te_4experts2parallel/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_resume_torch_dist_te_4experts2parallel/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_resume_torch_dist_te_4experts2parallel/model_config.yaml
similarity index 91%
rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_resume_torch_dist_te_4experts2parallel/model_config.yaml
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_resume_torch_dist_te_4experts2parallel/model_config.yaml
index 8411f00055e..86a05a93562 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_resume_torch_dist_te_4experts2parallel/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_resume_torch_dist_te_4experts2parallel/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 100
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci-ord.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci-ord.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci-ord.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_lts_dgxa100_dracooci-ord.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_lts_dgxa100_dracooci-ord.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_lts_dgxa100_dracooci-ord.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_lts_dgxa100_dracooci.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_lts_dgxa100_dracooci.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_lts_dgxa100_dracooci.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/model_config.yaml
similarity index 91%
rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/model_config.yaml
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/model_config.yaml
index ac03efd36a5..5020d9d9397 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 50
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_resume_torch_dist_te_2experts/golden_values_dev.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_resume_torch_dist_te_2experts/golden_values_dev.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_resume_torch_dist_te_2experts/golden_values_dev.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_resume_torch_dist_te_2experts/golden_values_dev.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_resume_torch_dist_te_2experts/golden_values_lts.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_resume_torch_dist_te_2experts/golden_values_lts.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_resume_torch_dist_te_2experts/golden_values_lts.json
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_resume_torch_dist_te_2experts/golden_values_lts.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_resume_torch_dist_te_2experts/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_resume_torch_dist_te_2experts/model_config.yaml
similarity index 91%
rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_resume_torch_dist_te_2experts/model_config.yaml
rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_resume_torch_dist_te_2experts/model_config.yaml
index 989a24acaf7..d763069b566 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_resume_torch_dist_te_2experts/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_resume_torch_dist_te_2experts/model_config.yaml
@@ -17,13 +17,13 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 100
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_ep8_resume_torch_dist_dist_optimizer/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_optimizer/model_config.yaml
similarity index 92%
rename from tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_ep8_resume_torch_dist_dist_optimizer/model_config.yaml
rename to tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_optimizer/model_config.yaml
index b95d5c04a1a..cd7656d240f 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_ep8_resume_torch_dist_dist_optimizer/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_optimizer/model_config.yaml
@@ -18,13 +18,13 @@ MODEL_ARGS:
   --max-position-embeddings: 1024
   --disable-bias-linear: true
   --train-iters: 100
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_dgxc.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_dgxc.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_dgxc.json
rename to tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_dgxc.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/model_config.yaml
similarity index 92%
rename from tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/model_config.yaml
rename to tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/model_config.yaml
index 5268bf68b33..fb438f0edda 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/model_config.yaml
@@ -18,13 +18,13 @@ MODEL_ARGS:
   --max-position-embeddings: 1024
   --disable-bias-linear: true
   --train-iters: 100
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
   --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   --distributed-backend: nccl
   --lr: 0.00015
diff --git a/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch/model_config.yaml
index c6e7916ea72..0e1f9110793 100644
--- a/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch/model_config.yaml
@@ -10,9 +10,9 @@ MODEL_ARGS:
   --log-validation-ppl-to-tensorboard: true
   --log-timers-to-tensorboard: true
   --log-memory-to-tensorboard: true
-  --timing-log-level: 2
-  --load: ${CHECKPOINT_LOAD_PATH}/deepseek_16b_pyt/model/checkpoints
-  --tokenizer-model: ${DATA_PATH}/deepseek_16b_pyt/tokenizer/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json
+  --timing-log-level: 0
+  --load: ${CHECKPOINT_LOAD_PATH}/model/deepseek_16b_pyt/dcp/mcore-v1_bf16/checkpoints
+  --tokenizer-model: ${CHECKPOINT_LOAD_PATH}/model/deepseek_16b_pyt/dcp/mcore-v1_bf16/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json
   --tokenizer-type: TikTokenizer
   --tiktoken-pattern: v2
   --distributed-backend: nccl
@@ -82,4 +82,4 @@ MODEL_ARGS:
   --inference-repeat-n: 8
 METRICS:
   - "generated_tokens"
-  - "logprobs"
\ No newline at end of file
+  - "logprobs"
diff --git a/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_tp4_pp1_ep4_16B_logitsmatch/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_tp4_pp1_ep4_16B_logitsmatch/model_config.yaml
index d94b06f5ac8..1b9eaaf1f65 100644
--- a/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_tp4_pp1_ep4_16B_logitsmatch/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_tp4_pp1_ep4_16B_logitsmatch/model_config.yaml
@@ -10,7 +10,7 @@ MODEL_ARGS:
   --log-validation-ppl-to-tensorboard: true
   --log-timers-to-tensorboard: true
   --log-memory-to-tensorboard: true
-  --timing-log-level: 2
+  --timing-log-level: 0
   --load: ${CHECKPOINT_LOAD_PATH}/model/deepseek_16b_pyt/dcp/mcore-v1_bf16/checkpoints
   --tokenizer-model: ${CHECKPOINT_LOAD_PATH}/model/deepseek_16b_pyt/dcp/mcore-v1_bf16/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json
   --tokenizer-type: TikTokenizer
diff --git a/tests/functional_tests/test_cases/moe/gpt_static_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt_static_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch/model_config.yaml
index 2289078dd5b..3ba12056190 100644
--- a/tests/functional_tests/test_cases/moe/gpt_static_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt_static_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch/model_config.yaml
@@ -10,9 +10,9 @@ MODEL_ARGS:
   --log-validation-ppl-to-tensorboard: true
   --log-timers-to-tensorboard: true
   --log-memory-to-tensorboard: true
-  --timing-log-level: 2
-  --load: ${CHECKPOINT_LOAD_PATH}/deepseek_16b_pyt/model/checkpoints
-  --tokenizer-model: ${DATA_PATH}/deepseek_16b_pyt/tokenizer/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json
+  --timing-log-level: 0
+  --load: ${CHECKPOINT_LOAD_PATH}/model/deepseek_16b_pyt/dcp/mcore-v1_bf16/checkpoints
+  --tokenizer-model: ${CHECKPOINT_LOAD_PATH}/model/deepseek_16b_pyt/dcp/mcore-v1_bf16/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json
   --tokenizer-type: TikTokenizer
   --tiktoken-pattern: v2
   --distributed-backend: nccl
diff --git a/tests/functional_tests/test_cases/moe/gpt_static_inference_tp1_pp1_ep1_16B_logitsmatch/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt_static_inference_tp1_pp1_ep1_16B_logitsmatch/model_config.yaml
index a9171008b7c..569eb969d72 100644
--- a/tests/functional_tests/test_cases/moe/gpt_static_inference_tp1_pp1_ep1_16B_logitsmatch/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt_static_inference_tp1_pp1_ep1_16B_logitsmatch/model_config.yaml
@@ -10,7 +10,7 @@ MODEL_ARGS:
   --log-validation-ppl-to-tensorboard: true
   --log-timers-to-tensorboard: true
   --log-memory-to-tensorboard: true
-  --timing-log-level: 2
+  --timing-log-level: 0
   --load: ${CHECKPOINT_LOAD_PATH}/model/deepseek_16b_pyt/dcp/mcore-v1_bf16/checkpoints
   --tokenizer-model: ${CHECKPOINT_LOAD_PATH}/model/deepseek_16b_pyt/dcp/mcore-v1_bf16/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json
   --tokenizer-type: TikTokenizer
diff --git a/tests/functional_tests/test_cases/moe/gpt_static_inference_tp4_pp1_ep4_16B_logitsmatch/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt_static_inference_tp4_pp1_ep4_16B_logitsmatch/model_config.yaml
index 116992b2d7f..366d2f23575 100644
--- a/tests/functional_tests/test_cases/moe/gpt_static_inference_tp4_pp1_ep4_16B_logitsmatch/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt_static_inference_tp4_pp1_ep4_16B_logitsmatch/model_config.yaml
@@ -10,7 +10,7 @@ MODEL_ARGS:
   --log-validation-ppl-to-tensorboard: true
   --log-timers-to-tensorboard: true
   --log-memory-to-tensorboard: true
-  --timing-log-level: 2
+  --timing-log-level: 0
   --load: ${CHECKPOINT_LOAD_PATH}/model/deepseek_16b_pyt/dcp/mcore-v1_bf16/checkpoints
   --tokenizer-model: ${CHECKPOINT_LOAD_PATH}/model/deepseek_16b_pyt/dcp/mcore-v1_bf16/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json
   --tokenizer-type: TikTokenizer
diff --git a/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp1_pp1_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp1_pp1/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp1_pp1_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp1_pp1/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp1_pp1_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp1_pp1/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp1_pp1_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp1_pp1/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp1_pp1_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp1_pp1/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp1_pp1_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp1_pp1/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp1_pp1_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp1_pp1/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp1_pp1_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp1_pp1/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp1_pp1_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp1_pp1/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp1_pp1_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp1_pp1/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp1_pp1_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp1_pp1/model_config.yaml
similarity index 98%
rename from tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp1_pp1_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp1_pp1/model_config.yaml
index 4b59ffaca86..2898070f957 100644
--- a/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp1_pp1_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp1_pp1/model_config.yaml
@@ -19,7 +19,7 @@ MODEL_ARGS:
   --seq-length: 1024
   --max-position-embeddings: 1024
   --train-iters: 50
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
diff --git a/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp4_sp_cp2_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp4_sp_cp2/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp4_sp_cp2_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp4_sp_cp2/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp4_sp_cp2_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp4_sp_cp2/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp4_sp_cp2_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp4_sp_cp2/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp4_sp_cp2_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp4_sp_cp2/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp4_sp_cp2_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp4_sp_cp2/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp4_sp_cp2_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp4_sp_cp2/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp4_sp_cp2_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp4_sp_cp2/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp4_sp_cp2_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp4_sp_cp2/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp4_sp_cp2_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp4_sp_cp2/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp4_sp_cp2_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp4_sp_cp2/model_config.yaml
similarity index 98%
rename from tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp4_sp_cp2_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp4_sp_cp2/model_config.yaml
index a13b09397eb..23bdaac5010 100644
--- a/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp4_sp_cp2_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp4_sp_cp2/model_config.yaml
@@ -21,7 +21,7 @@ MODEL_ARGS:
   --disable-vision-class-token: true
   --max-position-embeddings: 4096
   --train-iters: 50
-  --timing-log-level: 2
+  --timing-log-level: 0
   --lr-decay-iters: 320000
   --save: ${CHECKPOINT_SAVE_PATH}
   --load: ${CHECKPOINT_LOAD_PATH}
diff --git a/tests/functional_tests/test_cases/t5/t5_11b_mr_mcore_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/t5/t5_11b_mcore_tp4_pp1/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/t5/t5_11b_mr_mcore_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/t5/t5_11b_mcore_tp4_pp1/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/t5/t5_11b_mr_mcore_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/t5/t5_11b_mcore_tp4_pp1/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/t5/t5_11b_mr_mcore_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/t5/t5_11b_mcore_tp4_pp1/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/t5/t5_11b_mr_mcore_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/t5/t5_11b_mcore_tp4_pp1/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/t5/t5_11b_mr_mcore_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/t5/t5_11b_mcore_tp4_pp1/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/t5/t5_11b_mr_mcore_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/t5/t5_11b_mcore_tp4_pp1/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/t5/t5_11b_mr_mcore_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/t5/t5_11b_mcore_tp4_pp1/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/t5/t5_11b_mr_mcore_tp4_pp1_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/t5/t5_11b_mcore_tp4_pp1/model_config.yaml
similarity index 98%
rename from tests/functional_tests/test_cases/t5/t5_11b_mr_mcore_tp4_pp1_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/t5/t5_11b_mcore_tp4_pp1/model_config.yaml
index 234236c7d26..c2798ecf6af 100644
--- a/tests/functional_tests/test_cases/t5/t5_11b_mr_mcore_tp4_pp1_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/t5/t5_11b_mcore_tp4_pp1/model_config.yaml
@@ -41,7 +41,7 @@ MODEL_ARGS:
   --log-num-zeros-in-grad: true
   --log-validation-ppl-to-tensorboard: true
   --log-timers-to-tensorboard: true
-  --timing-log-level: 2
+  --timing-log-level: 0
   --log-interval: 1
   --save-interval: 10000
   --eval-interval: 1000
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxa100_dracooci-ord.json
similarity index 100%
rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxa100_dracooci-ord.json
rename to tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxa100_dracooci-ord.json
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxa100_dracooci.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxa100_dracooci.json
similarity index 100%
rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxa100_dracooci.json
rename to tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxa100_dracooci.json
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp1_pp1_vp1_resume_torch/model_config.yaml b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/model_config.yaml
similarity index 98%
rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp1_pp1_vp1_resume_torch/model_config.yaml
rename to tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/model_config.yaml
index 76afded197d..aa0f67ff311 100644
--- a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp1_pp1_vp1_resume_torch/model_config.yaml
+++ b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/model_config.yaml
@@ -41,7 +41,7 @@ MODEL_ARGS:
   --log-num-zeros-in-grad: true
   --log-validation-ppl-to-tensorboard: true
   --log-timers-to-tensorboard: true
-  --timing-log-level: 2
+  --timing-log-level: 0
   --log-interval: 1
   --save-interval: 50
   --eval-interval: 1000
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1/golden_values_dev_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1/golden_values_dev_dgxa100_dracooci-ord.json
similarity index 100%
rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1/golden_values_dev_dgxa100_dracooci-ord.json
rename to tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1/golden_values_dev_dgxa100_dracooci-ord.json
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1/golden_values_dev_dgxa100_dracooci.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1/golden_values_dev_dgxa100_dracooci.json
similarity index 100%
rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1/golden_values_dev_dgxa100_dracooci.json
rename to tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1/golden_values_dev_dgxa100_dracooci.json
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1/model_config.yaml b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1/model_config.yaml
similarity index 98%
rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1/model_config.yaml
rename to tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1/model_config.yaml
index 2ab4e9730d7..59c1d0f280f 100644
--- a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1/model_config.yaml
+++ b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1/model_config.yaml
@@ -41,7 +41,7 @@ MODEL_ARGS:
   --log-num-zeros-in-grad: true
   --log-validation-ppl-to-tensorboard: true
   --log-timers-to-tensorboard: true
-  --timing-log-level: 2
+  --timing-log-level: 0
   --log-interval: 1
   --save-interval: 10000
   --eval-interval: 1000
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgxa100_dracooci-ord.json
similarity index 100%
rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgxa100_dracooci-ord.json
rename to tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgxa100_dracooci-ord.json
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgxa100_dracooci.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgxa100_dracooci.json
similarity index 100%
rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgxa100_dracooci.json
rename to tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgxa100_dracooci.json
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel/model_config.yaml b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1_sequence_parallel/model_config.yaml
similarity index 98%
rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel/model_config.yaml
rename to tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1_sequence_parallel/model_config.yaml
index 37085e01771..80a84a26e0c 100644
--- a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel/model_config.yaml
+++ b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1_sequence_parallel/model_config.yaml
@@ -41,7 +41,7 @@ MODEL_ARGS:
   --log-num-zeros-in-grad: true
   --log-validation-ppl-to-tensorboard: true
   --log-timers-to-tensorboard: true
-  --timing-log-level: 2
+  --timing-log-level: 0
   --log-interval: 1
   --save-interval: 10000
   --eval-interval: 1000
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1/model_config.yaml
similarity index 98%
rename from tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1/model_config.yaml
index 54ad28a8e8a..047280dec39 100644
--- a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1/model_config.yaml
@@ -41,7 +41,7 @@ MODEL_ARGS:
   --log-num-zeros-in-grad: true
   --log-validation-ppl-to-tensorboard: true
   --log-timers-to-tensorboard: true
-  --timing-log-level: 2
+  --timing-log-level: 0
   --log-interval: 1
   --save-interval: 10000
   --eval-interval: 1000
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1_resume_torch_dist/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1_resume_torch_dist/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1_resume_torch_dist/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1_resume_torch_dist/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1_resume_torch_dist/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1_resume_torch_dist/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1_resume_torch_dist/model_config.yaml
similarity index 98%
rename from tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1_resume_torch_dist/model_config.yaml
index 9cc675a35f6..1611c02251b 100644
--- a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1_resume_torch_dist/model_config.yaml
@@ -41,7 +41,7 @@ MODEL_ARGS:
   --log-num-zeros-in-grad: true
   --log-validation-ppl-to-tensorboard: true
   --log-timers-to-tensorboard: true
-  --timing-log-level: 2
+  --timing-log-level: 0
   --log-interval: 1
   --save-interval: 50
   --eval-interval: 1000
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1/golden_values_dev_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1/golden_values_dev_dgxa100_dracooci-ord.json
similarity index 100%
rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1/golden_values_dev_dgxa100_dracooci-ord.json
rename to tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1/golden_values_dev_dgxa100_dracooci-ord.json
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1/golden_values_dev_dgxa100_dracooci.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1/golden_values_dev_dgxa100_dracooci.json
similarity index 100%
rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1/golden_values_dev_dgxa100_dracooci.json
rename to tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1/golden_values_dev_dgxa100_dracooci.json
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1/model_config.yaml b/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1/model_config.yaml
similarity index 98%
rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1/model_config.yaml
rename to tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1/model_config.yaml
index 46e7209823f..12ccecb5883 100644
--- a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1/model_config.yaml
+++ b/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1/model_config.yaml
@@ -41,7 +41,7 @@ MODEL_ARGS:
   --log-num-zeros-in-grad: true
   --log-validation-ppl-to-tensorboard: true
   --log-timers-to-tensorboard: true
-  --timing-log-level: 2
+  --timing-log-level: 0
   --log-interval: 1
   --save-interval: 10000
   --eval-interval: 1000
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxa100_dracooci-ord.json
similarity index 100%
rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxa100_dracooci-ord.json
rename to tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxa100_dracooci-ord.json
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxa100_dracooci.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxa100_dracooci.json
similarity index 100%
rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxa100_dracooci.json
rename to tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxa100_dracooci.json
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1_resume_torch/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1_resume_torch/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1_resume_torch/model_config.yaml b/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/model_config.yaml
similarity index 98%
rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1_resume_torch/model_config.yaml
rename to tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/model_config.yaml
index 0b11a3c137c..8559fd587d1 100644
--- a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1_resume_torch/model_config.yaml
+++ b/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/model_config.yaml
@@ -41,7 +41,7 @@ MODEL_ARGS:
   --log-num-zeros-in-grad: true
   --log-validation-ppl-to-tensorboard: true
   --log-timers-to-tensorboard: true
-  --timing-log-level: 2
+  --timing-log-level: 0
   --log-interval: 1
   --save-interval: 50
   --eval-interval: 1000
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp2_pp1_vp1/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp2_pp1_vp1/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp2_pp1_vp1/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/t5/t5_mcore_tp2_pp1_vp1/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp2_pp1_vp1/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp2_pp1_vp1/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp2_pp1_vp1/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/t5/t5_mcore_tp2_pp1_vp1/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp2_pp1_vp1/golden_values_dev_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp2_pp1_vp1/golden_values_dev_dgxa100_dracooci-ord.json
similarity index 100%
rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp2_pp1_vp1/golden_values_dev_dgxa100_dracooci-ord.json
rename to tests/functional_tests/test_cases/t5/t5_mcore_tp2_pp1_vp1/golden_values_dev_dgxa100_dracooci-ord.json
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp2_pp1_vp1/golden_values_dev_dgxa100_dracooci.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp2_pp1_vp1/golden_values_dev_dgxa100_dracooci.json
similarity index 100%
rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp2_pp1_vp1/golden_values_dev_dgxa100_dracooci.json
rename to tests/functional_tests/test_cases/t5/t5_mcore_tp2_pp1_vp1/golden_values_dev_dgxa100_dracooci.json
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp2_pp1_vp1/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp2_pp1_vp1/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp2_pp1_vp1/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/t5/t5_mcore_tp2_pp1_vp1/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp2_pp1_vp1/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp2_pp1_vp1/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp2_pp1_vp1/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/t5/t5_mcore_tp2_pp1_vp1/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp2_pp1_vp1/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp2_pp1_vp1/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp2_pp1_vp1/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/t5/t5_mcore_tp2_pp1_vp1/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp2_pp1_vp1/model_config.yaml b/tests/functional_tests/test_cases/t5/t5_mcore_tp2_pp1_vp1/model_config.yaml
similarity index 98%
rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp2_pp1_vp1/model_config.yaml
rename to tests/functional_tests/test_cases/t5/t5_mcore_tp2_pp1_vp1/model_config.yaml
index c305e4a86dd..9c6a835571c 100644
--- a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp2_pp1_vp1/model_config.yaml
+++ b/tests/functional_tests/test_cases/t5/t5_mcore_tp2_pp1_vp1/model_config.yaml
@@ -41,7 +41,7 @@ MODEL_ARGS:
   --log-num-zeros-in-grad: true
   --log-validation-ppl-to-tensorboard: true
   --log-timers-to-tensorboard: true
-  --timing-log-level: 2
+  --timing-log-level: 0
   --log-interval: 1
   --save-interval: 10000
   --eval-interval: 1000
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1/model_config.yaml
similarity index 98%
rename from tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1/model_config.yaml
index 5dc3478de12..dd3896ad88a 100644
--- a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1/model_config.yaml
@@ -41,7 +41,7 @@ MODEL_ARGS:
   --log-num-zeros-in-grad: true
   --log-validation-ppl-to-tensorboard: true
   --log-timers-to-tensorboard: true
-  --timing-log-level: 2
+  --timing-log-level: 0
   --log-interval: 1
   --save-interval: 10000
   --eval-interval: 1000
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
rename to tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
rename to tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgxh100_eos.json
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1_resume_torch_dist/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1_resume_torch_dist/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1_resume_torch_dist/model_config.yaml
similarity index 98%
rename from tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/model_config.yaml
rename to tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1_resume_torch_dist/model_config.yaml
index 1bf1e028390..4c955dd5441 100644
--- a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1_resume_torch_dist/model_config.yaml
@@ -41,7 +41,7 @@ MODEL_ARGS:
   --log-num-zeros-in-grad: true
   --log-validation-ppl-to-tensorboard: true
   --log-timers-to-tensorboard: true
-  --timing-log-level: 2
+  --timing-log-level: 0
   --log-interval: 1
   --save-interval: 50
   --eval-interval: 1000
diff --git a/tests/functional_tests/test_cases/t5/t5_release/model_config.yaml b/tests/functional_tests/test_cases/t5/t5_release/model_config.yaml
index d30207b5b51..964acdba5cf 100644
--- a/tests/functional_tests/test_cases/t5/t5_release/model_config.yaml
+++ b/tests/functional_tests/test_cases/t5/t5_release/model_config.yaml
@@ -55,7 +55,7 @@ MODEL_ARGS:
   --log-num-zeros-in-grad: true
   --log-params-norm: true
   --log-validation-ppl-to-tensorboard: true
-  --timing-log-level: 2
+  --timing-log-level: 0
   --wandb-project: megatron-core-release-runs
   --wandb-exp-name: ${WANDB_EXPERIMENT}
 METRICS:
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_weekly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/t5/t5_weekly_mcore_te_tp2_pp1_vp1/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/t5/t5_220m_weekly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/t5/t5_weekly_mcore_te_tp2_pp1_vp1/golden_values_lts_dgx_a100.json
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_weekly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/t5/t5_weekly_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_lts_dgx_a100.json
similarity index 100%
rename from tests/functional_tests/test_cases/t5/t5_220m_weekly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_lts_dgx_a100.json
rename to tests/functional_tests/test_cases/t5/t5_weekly_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_lts_dgx_a100.json
diff --git a/tests/test_utils/python_scripts/launch_jet_workload.py b/tests/test_utils/python_scripts/launch_jet_workload.py
index 0e3ed179f4a..7dc4a7357a7 100644
--- a/tests/test_utils/python_scripts/launch_jet_workload.py
+++ b/tests/test_utils/python_scripts/launch_jet_workload.py
@@ -288,6 +288,7 @@ def is_flaky_failure(concat_allranks_logs: str) -> bool:
         or "Unpack failed: incomplete input" in concat_allranks_logs
         or "unspecified launch failure" in concat_allranks_logs
         or "free(): corrupted unsorted chunks" in concat_allranks_logs
+        or "Segfault encountered" in concat_allranks_logs
     )
 
 
diff --git a/tests/test_utils/python_scripts/launch_nemo_run_workload.py b/tests/test_utils/python_scripts/launch_nemo_run_workload.py
index b3032eb15c4..648ac28d19a 100644
--- a/tests/test_utils/python_scripts/launch_nemo_run_workload.py
+++ b/tests/test_utils/python_scripts/launch_nemo_run_workload.py
@@ -13,6 +13,34 @@
 logger = logging.getLogger(__name__)
 
 
+def is_flaky_failure(concat_allranks_logs: str) -> bool:
+    """Assumes that certain keywords hint towards intermittent failures"""
+
+    return (
+        "The server socket has failed to listen on any local network address."
+        in concat_allranks_logs
+        or "Some NCCL operations have failed or timed out." in concat_allranks_logs
+        or "uncorrectable ECC error encountered" in concat_allranks_logs
+        or "illegal memory access" in concat_allranks_logs
+        or "illegal instruction" in concat_allranks_logs
+        or "torch.distributed.DistNetworkError" in concat_allranks_logs
+        or "Segmentation fault" in concat_allranks_logs
+        or "found NaN in" in concat_allranks_logs
+        or "For debugging consider passing CUDA_LAUNCH_BLOCKING=1" in concat_allranks_logs
+        or "double free or corruption" in concat_allranks_logs
+        or "Call to CUDA function failed." in concat_allranks_logs
+        or "Connection reset by peer" in concat_allranks_logs
+        or "invalid pointer" in concat_allranks_logs
+        or "malloc(): unaligned tcache chunk detected" in concat_allranks_logs
+        or "zmq.error.ZMQError: Address already in use" in concat_allranks_logs
+        or "We couldn't connect to 'https://huggingface.co'" in concat_allranks_logs
+        or "Unpack failed: incomplete input" in concat_allranks_logs
+        or "unspecified launch failure" in concat_allranks_logs
+        or "free(): corrupted unsorted chunks" in concat_allranks_logs
+        or "Segfault encountered" in concat_allranks_logs
+    )
+
+
 @click.command()
 @click.option("--scope", required=True, type=str, help="Scope of the workload")
 @click.option("--model", required=True, type=str, help="Model of the workload")
@@ -89,11 +117,39 @@ def main(
         packager=run.Packager(),
         volumes=artifacts,
     )
-    with run.Experiment("mcore-ci-test", executor=executor, log_level="INFO") as exp:
-        _ = exp.add([inline_script], tail_logs=False, name="task-1")
 
-        exp.dryrun(log=True)
-        exp.run(detach=False, tail_logs=True, sequential=False)
+    n_attempts = 0
+    while n_attempts < 3:
+        with run.Experiment("mcore-ci-test", executor=executor, log_level="INFO") as exp:
+            _ = exp.add([inline_script], tail_logs=False, name="task-1")
+
+            exp.dryrun(log=True)
+            exp.run(detach=False, tail_logs=True, sequential=False)
+
+        result_dict = exp.status(return_dict=True)
+        _, job_dict = list(result_dict.items())[0]
+        succeeded = str(job_dict["status"]) == "SUCCEEDED"
+
+        if succeeded:
+            logger.info(f"Job succeeded with status: {job_dict["status"]}")
+            sys.exit(0)
+
+        logger.error(f"Job failed with status: {job_dict["status"]}")
+        log_file_paths = pathlib.Path(os.getcwd()).glob("assets_dir/logs/*/*/attempt_0/*/std*.log")
+        all_ranks_all_logs = []
+        for log_file_path in log_file_paths:
+            with open(log_file_path, "r") as f:
+                all_logs = f.readlines()
+            all_ranks_all_logs.extend(all_logs)
+        all_ranks_all_logs_string = "\n".join(all_ranks_all_logs)
+        if is_flaky_failure(all_ranks_all_logs_string):
+            logger.warning("Detected flaky failure, attempt restart.")
+            n_attempts += 1
+            continue
+
+        sys.exit(1)
+
+    sys.exit(1)
 
     result_dict = exp.status(return_dict=True)
     _, job_dict = list(result_dict.items())[0]
diff --git a/tests/test_utils/recipes/ckpt_converter.yaml b/tests/test_utils/recipes/ckpt_converter.yaml
index 5d705869958..f78f184a326 100644
--- a/tests/test_utils/recipes/ckpt_converter.yaml
+++ b/tests/test_utils/recipes/ckpt_converter.yaml
@@ -34,6 +34,7 @@ spec:
     rm -rf megatron; cp -a /opt/megatron-lm/megatron ./
   script: |-
     ls
+
     cd /opt/megatron-lm
 
     torchrun \
diff --git a/tests/test_utils/recipes/gpt-dynamic-inference-cuda-graphs.yaml b/tests/test_utils/recipes/gpt-dynamic-inference-cuda-graphs.yaml
index dd90bc38e88..47b8d346150 100644
--- a/tests/test_utils/recipes/gpt-dynamic-inference-cuda-graphs.yaml
+++ b/tests/test_utils/recipes/gpt-dynamic-inference-cuda-graphs.yaml
@@ -10,8 +10,6 @@ spec:
   gpus: 1
   n_repeat: 1
   platforms: dgx_a100
-  artifacts:
-    /workspace/data/model/mcore_mistral: model/mcore_mistral/nemo_minitron-0.5b/v1
   script_setup: |
     unset https_proxy
     echo "machine gitlab-master.nvidia.com login okoenig password $RO_API_TOKEN" | tee -a /root/.netrc
diff --git a/tests/test_utils/recipes/gpt-dynamic-inference-with-coordinator.yaml b/tests/test_utils/recipes/gpt-dynamic-inference-with-coordinator.yaml
index 56ecdabcded..dd8cf6b945d 100644
--- a/tests/test_utils/recipes/gpt-dynamic-inference-with-coordinator.yaml
+++ b/tests/test_utils/recipes/gpt-dynamic-inference-with-coordinator.yaml
@@ -10,8 +10,6 @@ spec:
   gpus: 1
   n_repeat: 1
   platforms: dgx_a100
-  artifacts:
-    /workspace/data/model/mcore_mistral: model/mcore_mistral/nemo_minitron-0.5b/v1
   script_setup: |
     unset https_proxy
     echo "machine gitlab-master.nvidia.com login okoenig password $RO_API_TOKEN" | tee -a /root/.netrc
diff --git a/tests/test_utils/recipes/gpt-grads.yaml b/tests/test_utils/recipes/gpt-grads.yaml
index 205985d5e13..cdd3a050ff2 100644
--- a/tests/test_utils/recipes/gpt-grads.yaml
+++ b/tests/test_utils/recipes/gpt-grads.yaml
@@ -10,8 +10,6 @@ spec:
   gpus: 8
   n_repeat: 1
   platforms: dgx_h100
-  artifacts:
-    /mnt/artifacts/text/the_pile/shard00: text/the_pile/shard00
   script_setup: |
     unset https_proxy
     echo "machine gitlab-master.nvidia.com login okoenig password $RO_API_TOKEN" | tee -a /root/.netrc
@@ -61,7 +59,7 @@ spec:
     bash ./tests/functional_tests/shell_test_utils/run_ci_test.sh ${{ARGUMENTS[@]}}
 
 products:
-  - test_case: [gpt3_mr_mcore_reruns_resume_check_grads]
+  - test_case: [gpt3_mcore_reruns_resume_check_grads]
     products:
       - environment: [dev]
         scope: [mr, mr-github]
diff --git a/tests/test_utils/recipes/gpt.yaml b/tests/test_utils/recipes/gpt.yaml
index 5eb29ac2605..0dafb8685c2 100644
--- a/tests/test_utils/recipes/gpt.yaml
+++ b/tests/test_utils/recipes/gpt.yaml
@@ -60,43 +60,43 @@ products:
   #######################################################################
   # Nightly tests: Run both DEV and LTS unless something is flaky       #
   #######################################################################
-  - test_case: [gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather]
+  - test_case: [gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather]
     products:
       - environment: [dev]
         scope: [nightly]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
-  - test_case: [gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2]
+  - test_case: [gpt3_mcore_tp1_pp2]
     products:
       - environment: [lts]
         scope: [nightly]
       - environment: [dev]
         scope: [nightly]
         platforms: [dgx_h100]
-  - test_case: [gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_resume_torch_dist]
+  - test_case: [gpt3_mcore_tp1_pp2_resume_torch_dist]
     products:
       - environment: [dev, lts]
         scope: [nightly]
-  - test_case: [gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4]
+  - test_case: [gpt3_mcore_tp1_pp4]
     products:
       - environment: [lts]
         scope: [nightly]
       - environment: [dev]
         scope: [nightly]
         platforms: [dgx_h100]
-  - test_case: [gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_resume_torch_dist]
+  - test_case: [gpt3_mcore_tp1_pp4_resume_torch_dist]
     products:
       - environment: [dev, lts]
         scope: [nightly]
-  - test_case: [gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch]
+  - test_case: [gpt3_mcore_tp4_pp1_resume_torch]
     products:
       - environment: [lts]
         scope: [nightly]
       - environment: [dev]
         scope: [nightly]
         platforms: [dgx_h100]
-  - test_case: [gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist]
+  - test_case: [gpt3_mcore_tp4_pp1_resume_torch_dist]
     products:
       - environment: [lts]
         scope: [nightly]
@@ -107,215 +107,215 @@ products:
   # MR tests: Mostly DEV on MR, and LTS on nightly cadence, except for  #
   #             some very important tests.                              #
   #######################################################################
-  - test_case: [gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G]
+  - test_case: [gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files]
     products:
       - environment: [dev]
         scope: [mr]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
-  - test_case: [gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G]
+  - test_case: [gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer]
     products:
       - environment: [dev]
         scope: [mr]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
-  - test_case: [gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G]
+  - test_case: [gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute]
     products:
       - environment: [dev]
         scope: [mr]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
-  # - test_case: [gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_cp4_a2a_p2p_nondeterministic_dgx_a100_1N8G]
+  # - test_case: [gpt3_mcore_te_tp1_pp2_resume_torch_dist_cp4_a2a_p2p_nondeterministic]
   #   products:
   #     - environment: [dev]
   #       scope: [mr]
   #     - environment: [lts]
   #       scope: [nightly] # Non-deterministic: #487
-  - test_case: [gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_dgx_a100_1N8G]
+  - test_case: [gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings]
     products:
       - environment: [dev]
         scope: [mr]
         platforms: [dgx_h100]
       # - environment: [lts]
       #   scope: [nightly] # outdated TE: #501
-  - test_case: [gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G]
+  - test_case: [gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion]
     products:
       - environment: [dev]
         scope: [mr]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
-  - test_case: [gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear_dgx_a100_1N8G]
+  - test_case: [gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear]
     products:
       - environment: [dev]
         scope: [mr]
         platforms: [dgx_h100]
       # - environment: [lts]
       #   scope: [nightly] # non-determinism: #436
-  - test_case: [gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear_dgx_a100_1N8G]
+  - test_case: [gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear]
     products:
       - environment: [dev]
         scope: [mr]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
-  - test_case: [gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_swiglu_dgx_a100_1N8G]
+  - test_case: [gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu]
     products:
       - environment: [dev]
         scope: [mr]
         platforms: [dgx_h100]
       # - environment: [lts]
       #   scope: [nightly] # non-determinism: #437
-  - test_case: [gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G]
+  - test_case: [gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs]
     products:
       - environment: [dev]
         scope: [mr]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
-  - test_case: [gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G]
+  - test_case: [gpt3_mcore_te_tp1_pp4_vp1]
     products:
       - environment: [dev]
         scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
-  - test_case: [gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G]
+  - test_case: [gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
-  - test_case: [gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G]
+  - test_case: [gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss]
     products:
       - environment: [dev]
         scope: [mr]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
-      # - test_case: [gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dgx_a100_1N8G]
+      # - test_case: [gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist]
       #   products:
       #     - environment: [dev]
       #       scope: [mr]
       #       platforms: [dgx_h100] # Hangs: #513
       # - environment: [lts]
       #   scope: [nightly]
-  - test_case: [gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G]
+  - test_case: [gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce]
     products:
       - environment: [dev]
         scope: [mr]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
-  - test_case: [gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G]
+  - test_case: [gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather]
     products:
       # - environment: [dev]
       #   scope: [mr]
       #   platforms: [dgx_h100] # Hangs: #513
       - environment: [lts]
         scope: [nightly]
-  - test_case: [gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G]
+  - test_case: [gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied]
     products:
       # - environment: [dev]
       #   scope: [mr] # Hangs: #513
       #   platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
-  - test_case: [gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap_dgx_a100_1N8G]
+  - test_case: [gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap]
     products:
       # - environment: [dev]
       #   scope: [mr] # Hangs: #513
       #   platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
-  - test_case: [gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G]
+  - test_case: [gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap]
     products:
       - environment: [dev]
         scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
-  - test_case: [gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G]
+  - test_case: [gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline]
     products:
       - environment: [dev]
         scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
-  - test_case: [gpt3_mr_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split_dgx_a100_1N8G]
+  - test_case: [gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split]
     products:
       - environment: [dev]
         scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
-  - test_case: [gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G]
+  - test_case: [gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
-  - test_case: [gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances_dgx_a100_1N8G]
+  - test_case: [gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
-  - test_case: [gpt3_mr_mcore_te_tp2_pp2_cp2_dgx_a100_1N8G]
+  - test_case: [gpt3_mcore_te_tp2_pp2_cp2]
     products:
       - environment: [dev]
         scope: [mr, mr-github]
         platforms: [dgx_h100]
-  - test_case: [gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_dp_last_dgx_a100_1N8G]
+  - test_case: [gpt3_mcore_te_tp2_pp2_cp2_etp4_dp_last]
     products:
       - environment: [dev]
         scope: [mr, mr-github]
         platforms: [dgx_h100]
-  - test_case: [gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_dgx_a100_1N8G]
+  - test_case: [gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss]
     products:
       - environment: [dev]
         scope: [mr, mr-github]
         platforms: [dgx_h100]
-  - test_case: [gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last_dgx_a100_1N8G]
+  - test_case: [gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last]
     products:
       - environment: [dev]
         scope: [mr, mr-github]
         platforms: [dgx_h100]
-  - test_case: [gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G]
+  - test_case: [gpt3_mcore_te_tp2_pp2_cp2_nondeterministic]
     products:
       - environment: [dev]
         scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
-  - test_case: [gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last_dgx_a100_1N8G]
+  - test_case: [gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last]
     products:
       - environment: [dev]
         scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
-  - test_case: [gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic_dgx_a100_1N8G]
+  - test_case: [gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic]
     products:
       - environment: [dev]
         scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
-  - test_case: [gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last_dgx_a100_1N8G]
+  - test_case: [gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last]
     products:
       - environment: [dev]
         scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
-  - test_case: [gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G]
+  - test_case: [gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion]
     products:
       - environment: [dev]
         scope: [mr, mr-github]
@@ -323,110 +323,110 @@ products:
       - environment: [lts]
         scope: [nightly]
 
-  - test_case: [gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G]
+  - test_case: [gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic]
     products:
       - environment: [dev]
         scope: [mr]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
-  - test_case: [gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G]
+  - test_case: [gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion]
     products:
       - environment: [dev]
         scope: [mr]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
-  - test_case: [gpt3_mr_mcore_te_tp2_pp2_mla_dgx_a100_1N8G]
+  - test_case: [gpt3_mcore_te_tp2_pp2_mla]
     products:
       - environment: [dev]
         scope: [mr, mr-github]
         platforms: [dgx_h100]
-  - test_case: [gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G]
+  - test_case: [gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective]
     products:
       - environment: [dev]
         scope: [mr]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
-  - test_case: [gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G]
+  - test_case: [gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute]
     products:
       - environment: [dev]
         scope: [mr]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
-  - test_case: [gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G]
+  - test_case: [gpt3_mcore_te_tp2_pp2_resume_torch_dist]
     products:
       - environment: [dev]
         scope: [mr]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
-  - test_case: [gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader_dgx_a100_1N8G]
+  - test_case: [gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader]
     products:
       # - environment: [dev]
       #   scope: [mr] # Hangs: #513
       #   platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
-  - test_case: [gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G]
+  - test_case: [gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone]
     products:
       - environment: [dev]
         scope: [mr]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
-  - test_case: [gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G]
+  - test_case: [gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce]
     products:
       - environment: [dev]
         scope: [mr]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
-  - test_case: [gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G]
+  - test_case: [gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode]
     products:
       - environment: [dev]
         scope: [mr]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
-  - test_case: [gpt3_mr_mcore_tp2_pp2_uninstall_te_dgx_a100_1N8G]
+  - test_case: [gpt3_mcore_tp2_pp2_uninstall_te]
     products:
       - environment: [dev]
         scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
-  - test_case: [gpt3_7b_mr_dgx_a100_1N8G_tp1_pp4_memory_speed]
+  - test_case: [gpt3_7b_tp1_pp4_memory_speed]
     products:
       - environment: [dev]
         scope: [mr, mr-github]
         platforms: [dgx_h100]
       # - environment: [lts]
       #   scope: [nightly] # OOM: #434
-  - test_case: [gpt3_7b_mr_dgx_a100_1N8G_tp4_pp1_memory_speed]
+  - test_case: [gpt3_7b_tp4_pp1_memory_speed]
     products:
       - environment: [dev]
         scope: [mr, mr-github]
         platforms: [dgx_h100]
       # - environment: [lts]
       #   scope: [nightly] # OOM: #434
-  - test_case: [gpt3_mr_mcore_te_tp2_zp_z3_resume_fsdp_dtensor_dgx_a100_1N8G]
+  - test_case: [gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor]
     products:
       - environment: [dev]
         scope: [mr]
         platforms: [dgx_h100]
       # - environment: [lts]
       #   scope: [nightly]
-  - test_case: [gpt3_mr_mcore_te_tp2_pp1_modelopt_distill_resume]
+  - test_case: [gpt3_mcore_te_tp2_pp1_modelopt_distill_resume]
     products:
       - environment: [dev]
         scope: [mr]
         platforms: [dgx_h100]
       # - environment: [lts]
       #   scope: [nightly] # Outdated: #502
-  # - test_case: [gpt3_mr_mcore_te_tp2_pp1_fsdp2_resume_torch_dist_dgx_a100_1N8G]
+  # - test_case: [gpt3_mcore_te_tp2_pp1_fsdp2_resume_torch_dist]
   #   products:
   # - environment: [dev]
   #   scope: [mr] # Broken: #484
@@ -435,21 +435,21 @@ products:
   #######################################################################
   # Super important MR tests that run for both DEV and LTS per MR       #
   #######################################################################
-  - test_case: [gpt3_mr_mcore_reruns_persistent_1]
+  - test_case: [gpt3_mcore_reruns_persistent_1]
     products:
       - environment: [dev]
         scope: [mr, mr-github]
         platforms: [dgx_h100]
       # - environment: [lts]
       #   scope: [nightly]
-  # - test_case: [gpt3_mr_mcore_reruns_persistent_2]
+  # - test_case: [gpt3_mcore_reruns_persistent_2]
   #   products:
   # - environment: [dev]
   #   scope: [mr]
   #   platforms: [dgx_h100]
   # - environment: [lts]
   #   scope: [nightly]
-  - test_case: [gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G]
+  - test_case: [gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer]
     products:
       - environment: [lts]
         scope: [mr]
@@ -459,14 +459,14 @@ products:
       - environment: [dev]
         scope: [mr-slim]
         platforms: [dgx_h100]
-  - test_case: [gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G]
+  - test_case: [gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather]
     products:
       - environment: [dev]
         scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [mr]
-  - test_case: [gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G]
+  - test_case: [gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather]
     products:
       - environment: [lts]
         scope: [mr]
@@ -476,79 +476,79 @@ products:
       - environment: [dev]
         scope: [mr-slim]
         platforms: [dgx_h100]
-  - test_case: [gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G]
+  - test_case: [gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone]
     products:
       - environment: [dev]
         scope: [mr]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [mr]
-  # - test_case: [gpt3_mr_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G]
+  # - test_case: [gpt3_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone]
   #   products:
   #     - environment: [dev]
   #       scope: [mr]
   #       platforms: [dgx_h100]
-  # - test_case: [gpt3_mr_mcore_te_tp1_pp1_frozen_resume_torch_dist_dist_optimizer_dgx_a100_1N8G]
+  # - test_case: [gpt3_mcore_te_tp1_pp1_frozen_resume_torch_dist_dist_optimizer]
   #   products:
   #     - environment: [dev]
   #       scope: [mr]
   #       platforms: [dgx_h100]
-  # - test_case: [gpt3_mr_mcore_te_tp1_pp4_frozen_resume_torch_dist_swiglu_dgx_a100_1N8G]
+  # - test_case: [gpt3_mcore_te_tp1_pp4_frozen_resume_torch_dist_swiglu]
   #   products:
   #     - environment: [dev]
   #       scope: [mr]
   #       platforms: [dgx_h100]
-  # - test_case: [gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G]
+  # - test_case: [gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_cp2_nondeterministic]
   #   products:
   #     - environment: [dev]
   #       scope: [mr]
   #       platforms: [dgx_a100, dgx_h100]
-  # - test_case: [gpt3_345m_weekly_dgx_b200_1N8G_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap]
+  # - test_case: [gpt3_weekly_dgx_b200_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap]
   #   products:
   #     - environment: [dev]
   #       scope: [weekly]
   #       platforms: [dgx_b200]
-  # - test_case: [gpt3_345m_weekly_dgx_b200_1N8G_mcore_tp4_cp2_current_scaling_native_fp8_tp_fsdp]
+  # - test_case: [gpt3_weekly_dgx_b200_mcore_tp4_cp2_current_scaling_native_fp8_tp_fsdp]
   #   products:
   #     - environment: [dev]
   #       scope: [weekly]
   #       platforms: [dgx_b200]
-  # - test_case: [gpt3_345m_weekly_dgx_b200_1N8G_mcore_tp4_cp2_current_scaling_native_fp8_tp_sp_cp_tp_overlap]
+  # - test_case: [gpt3_weekly_dgx_b200_mcore_tp4_cp2_current_scaling_native_fp8_tp_sp_cp_tp_overlap]
   #   products:
   #     - environment: [dev]
   #       scope: [weekly]
   #       platforms: [dgx_b200]
-  # - test_case: [gpt3_345m_weekly_dgx_b200_1N8G_mcore_tp4_cp2_mxfp8_tp_sp_cp]
+  # - test_case: [gpt3_weekly_dgx_b200_mcore_tp4_cp2_mxfp8_tp_sp_cp]
   #   products:
   #     - environment: [dev]
   #       scope: [weekly]
   #       platforms: [dgx_b200]
-  # - test_case: [gpt3_345m_weekly_dgx_b200_1N8G_mcore_tp4_cp2_native_fp8_tp_sp_cp_tp_overlap]
+  # - test_case: [gpt3_weekly_dgx_b200_mcore_tp4_cp2_native_fp8_tp_sp_cp_tp_overlap]
   #   products:
   #     - environment: [dev]
   #       scope: [weekly]
   #       platforms: [dgx_b200]
-  - test_case: [gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap]
+  - test_case: [gpt3_weekly_dgx_h100_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap]
     products:
       - environment: [dev]
         scope: [weekly]
         platforms: [dgx_h100]
-  # - test_case: [gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp4_cp2_current_scaling_native_fp8_tp_fsdp]
+  # - test_case: [gpt3_weekly_dgx_h100_mcore_tp4_cp2_current_scaling_native_fp8_tp_fsdp]
   #   products:
   #     - environment: [dev]
   #       scope: [weekly]
   #       platforms: [dgx_h100]
-  # - test_case: [gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp4_cp2_current_scaling_native_fp8_tp_sp_cp_tp_overlap]
+  # - test_case: [gpt3_weekly_dgx_h100_mcore_tp4_cp2_current_scaling_native_fp8_tp_sp_cp_tp_overlap]
   #   products:
   #     - environment: [dev]
   #       scope: [weekly]
   #       platforms: [dgx_h100]
-  - test_case: [gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp4_cp2_native_fp8_tp_sp_cp_tp_overlap]
+  - test_case: [gpt3_weekly_dgx_h100_mcore_tp4_cp2_native_fp8_tp_sp_cp_tp_overlap]
     products:
       - environment: [dev]
         scope: [weekly]
         platforms: [dgx_h100]
-  # - test_case: [gpt3_mr_mcore_tp2_pp2_resume_torch_dist_uninstall_te_dgx_a100_1N8G]
+  # - test_case: [gpt3_mcore_tp2_pp2_resume_torch_dist_uninstall_te]
   #   products:
   #     - environment: [dev, lts]
   #       scope: [mr] # Non-deterministic: #483
diff --git a/tests/test_utils/recipes/mamba-static-inference.yaml b/tests/test_utils/recipes/mamba-static-inference.yaml
index a4eaecaa53e..e727c4db5ee 100644
--- a/tests/test_utils/recipes/mamba-static-inference.yaml
+++ b/tests/test_utils/recipes/mamba-static-inference.yaml
@@ -10,8 +10,6 @@ spec:
   gpus: 1
   n_repeat: 1
   platforms: dgx_a100
-  artifacts:
-    /workspace/data/mamba_hybrid_2b: model/mamba_hybrid_2b/dcp/mcore-v1_bf16
   script_setup: |
     unset https_proxy
     echo "machine gitlab-master.nvidia.com login okoenig password $RO_API_TOKEN" | tee -a /root/.netrc
diff --git a/tests/test_utils/recipes/moe-static-inference.yaml b/tests/test_utils/recipes/moe-static-inference.yaml
index f2f98fbc146..c11cd294592 100644
--- a/tests/test_utils/recipes/moe-static-inference.yaml
+++ b/tests/test_utils/recipes/moe-static-inference.yaml
@@ -10,7 +10,6 @@ spec:
   gpus: 8
   n_repeat: 1
   platforms: dgx_a100
-  artifacts:
   script_setup: |
     unset https_proxy
     echo "machine gitlab-master.nvidia.com login okoenig password $RO_API_TOKEN" | tee -a /root/.netrc
diff --git a/tests/test_utils/recipes/moe.yaml b/tests/test_utils/recipes/moe.yaml
index fd8f00c242f..8164ca37df8 100644
--- a/tests/test_utils/recipes/moe.yaml
+++ b/tests/test_utils/recipes/moe.yaml
@@ -60,17 +60,17 @@ products:
   #######################################################################
   # Nightly tests: Run both DEV and LTS unless something is flaky       #
   #######################################################################
-  - test_case: [gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel]
+  - test_case: [gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel]
     products:
       - environment: [dev]
         scope: [nightly]
         platforms: [dgx_a100, dgx_h100]
-  - test_case: [gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last]
+  - test_case: [gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last]
     products:
       - environment: [dev]
         scope: [nightly]
         platforms: [dgx_a100, dgx_h100]
-  # - test_case: [gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_resume_torch_dist_te_2experts]
+  # - test_case: [gpt3_mcore_tp2_pp2_resume_torch_dist_te_2experts]
   #   products: # non-determinism: #478
   #     - environment: [dev, lts]
   #       scope: [nightly]
@@ -81,45 +81,45 @@ products:
   # MR tests: Mostly DEV on MR, and LTS on nightly cadence, except for  #
   #             some very important tests.                              #
   #######################################################################
-  - test_case: [gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G]
+  - test_case: [gpt3_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer]
     products:
       - environment: [dev]
         scope: [mr]
         platforms: [dgx_h100]
-  - test_case: [gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph]
+  - test_case: [gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph]
     products:
       - environment: [dev]
         scope: [mr]
         platforms: [dgx_h100] # hang: #513
-  # - test_case: [gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_selective_recompute_experimental]
+  # - test_case: [gpt3_mcore_te_tp2_pp2_ep4_etp1_selective_recompute_experimental]
   #   products:
   #     - environment: [dev]
   #       scope: [mr]
   #       platforms: [dgx_h100] # hang: #513
-  - test_case: [gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances_dgx_a100_1N8G]
+  - test_case: [gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances]
     products:
       - environment: [dev]
         scope: [mr]
         platforms: [dgx_h100]
-  - test_case: [gpt3_mr_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4_dgx_a100_1N8G]
+  - test_case: [gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4]
     products:
       - environment: [dev]
         scope: [mr, mr-github]
         platforms: [dgx_h100]
   # TODO: The migration of custom fsdp causes EP + FSDP to be temporarily unavailable, which will be fixed in a subsequent MR.
-  # - test_case: [gpt3_mr_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router_dgx_a100_1N8G]
+  # - test_case: [gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router]
   #   products:
   #     - environment: [dev]
   #       scope: [mr]
   #       platforms: [dgx_h100]
   #     - environment: [lts]
   #       scope: [nightly]
-  - test_case: [gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G]
+  - test_case: [gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective]
     products:
       - environment: [dev]
         scope: [mr, mr-github]
         platforms: [dgx_h100]
-  - test_case: [gpt3_moe_mr_mcore_te_ep8_resume_torch_dist_dist_optimizer]
+  - test_case: [gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_optimizer]
     products:
       - environment: [dev]
         scope: [mr]
@@ -127,12 +127,12 @@ products:
   #######################################################################
   # Super important MR tests that run for both DEV and LTS per MR       #
   #######################################################################
-  # - test_case: [gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G]
+  # - test_case: [gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_dist_optimizer]
   #   products:
   #     - environment: [dev]
   #       scope: [mr]
   #       platforms: [dgx_h100]
-  # - test_case: [gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_groupedGEMM_dgx_a100_1N8G]
+  # - test_case: [gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_groupedGEMM]
   #   products:
   #     - environment: [dev]
   #       scope: [mr]
@@ -140,7 +140,7 @@ products:
   ###########################
   # Merge train tests       #
   ###########################
-  - test_case: [gpt3_moe_mr_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer]
+  - test_case: [gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer]
     products:
       - environment: [dev]
         scope: [mr]
@@ -156,7 +156,7 @@ products:
       - environment: [dev]
         scope: [mr-slim]
         platforms: [dgx_h100]
-  - test_case: [gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8]
+  - test_case: [gpt3_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8]
     products:
       - environment: [dev]
         scope: [mr]
diff --git a/tests/test_utils/recipes/multimodal-llava.yaml b/tests/test_utils/recipes/multimodal-llava.yaml
index 65393f14f50..80a30f050bc 100644
--- a/tests/test_utils/recipes/multimodal-llava.yaml
+++ b/tests/test_utils/recipes/multimodal-llava.yaml
@@ -58,12 +58,12 @@ spec:
     bash ./tests/functional_tests/shell_test_utils/run_ci_test.sh ${{ARGUMENTS[@]}}
 
 products:
-  - test_case: [multimodal_llava_mr_mcore_te_tp1_pp1_dgx_a100_1N8G]
+  - test_case: [multimodal_llava_mcore_te_tp1_pp1]
     products:
       - environment: [dev]
         scope: [mr, mr-github]
         platforms: [dgx_h100]
-  - test_case: [multimodal_llava_mr_mcore_te_tp4_sp_cp2_dgx_a100_1N8G]
+  - test_case: [multimodal_llava_mcore_te_tp4_sp_cp2]
     products:
       - environment: [dev]
         scope: [mr, mr-github]
diff --git a/uv.lock b/uv.lock
index 28110f38852..f7c8916166b 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1093,61 +1093,61 @@ wheels = [
 
 [[package]]
 name = "cython"
-version = "3.1.5"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/4d/ab/4e980fbfbc894f95854aabff68a029dd6044a9550c480a1049a65263c72b/cython-3.1.5.tar.gz", hash = "sha256:7e73c7e6da755a8dffb9e0e5c4398e364e37671778624188444f1ff0d9458112", size = 3192050, upload-time = "2025-10-20T06:06:51.928Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/b5/9f/677707b1734285632a71a3b644b36e77801ce36a7a34af2e64f516b451f0/cython-3.1.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d27f08ea53099f0101a0c582f1000fcae51cae177bbd4f6f95adfd8adb7a5271", size = 2993670, upload-time = "2025-10-20T06:08:47.301Z" },
-    { url = "https://files.pythonhosted.org/packages/40/28/6fa54e679b33eb8640f1fe0a222096c5f8080d25035a923f444d56ea3046/cython-3.1.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:68cf7d059fd673adf3486e34950612069ec0c235e3ae8455424dfb6fdf85cffd", size = 2918339, upload-time = "2025-10-20T06:08:49.029Z" },
-    { url = "https://files.pythonhosted.org/packages/78/7e/f3a5979b16efa916a3494986bb234b2ae66ba81ab2e4e358a0b991eaa288/cython-3.1.5-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:8e9e35cad5ae781abef944ce8a8395e098d6e042e5269cc4bcbc1fc177b1e3e3", size = 3511124, upload-time = "2025-10-20T06:08:51.353Z" },
-    { url = "https://files.pythonhosted.org/packages/0c/15/a44cc4b6e2482e5453b2eaac00a52b79d2dd71a5fe8c2000dfc7f06c4d32/cython-3.1.5-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:51798e2a76559dff79faee263c971006ce5ae2ee6ecd2fbf108fce3cc0acbac7", size = 3265544, upload-time = "2025-10-20T06:08:53.564Z" },
-    { url = "https://files.pythonhosted.org/packages/13/d0/8fe7ad4115f5b4f9b2643a2efd22bfb301e81b6be618fdbc7d560a5edb7c/cython-3.1.5-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d4d6054f65626d4bb1846da686370394ee83e66a8a752fad7ca362ed8de1cf8c", size = 3427201, upload-time = "2025-10-20T06:08:55.455Z" },
-    { url = "https://files.pythonhosted.org/packages/1a/24/b00761f82f323a4c0a2fc0877c5a4ceeb0f9dbc1626b3aed124593edc7c9/cython-3.1.5-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:e9744f8c701365bc8081946c68de2f106c5aa70b08c3b989f482d469b9d6fd77", size = 3280702, upload-time = "2025-10-20T06:08:57.669Z" },
-    { url = "https://files.pythonhosted.org/packages/e5/d1/c4b151f8ac86a7444a9a73693f51e36956fb106b55358f809870e49f66e0/cython-3.1.5-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:8396663f6c38fa392de2fb5ea7efd7749334d5bb6b95cd58f9d1bd566924a593", size = 3525363, upload-time = "2025-10-20T06:08:59.873Z" },
-    { url = "https://files.pythonhosted.org/packages/a9/2f/e8158f27b34b121975f87db2a7ea7d0e8091a30be5602a5a36f28b7c1944/cython-3.1.5-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:e069c5af8f646faaacca1a693f74fb27254f7d8ddec2045301d39a8df552c777", size = 3441442, upload-time = "2025-10-20T06:09:01.649Z" },
-    { url = "https://files.pythonhosted.org/packages/27/65/9c74b2bd719b563732a0fc5b0162db2d4eac5289bc3452e15b2534dda5d4/cython-3.1.5-cp310-cp310-win32.whl", hash = "sha256:ed0dfaad3a5ca8bf6f3546d40a55f3b879d1f835ca19382d8ca582318de09d49", size = 2484767, upload-time = "2025-10-20T06:09:03.447Z" },
-    { url = "https://files.pythonhosted.org/packages/f9/f3/147d524a623f9a1c3269ece074c5a6b9ded38994fddbe57cb4f77d8d3be3/cython-3.1.5-cp310-cp310-win_amd64.whl", hash = "sha256:7af877689440cda31e455003d6f615e0ffca658c7f7dcbf17573bfb469848cdf", size = 2709618, upload-time = "2025-10-20T06:09:05.471Z" },
-    { url = "https://files.pythonhosted.org/packages/4b/f3/fcd5a3c43db19884dfafe7794b463728c70147aa1876223f431916d44984/cython-3.1.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1aad56376c6ff10deee50f3a9ff5a1fddbe24c6debad7041b86cc618f127836a", size = 3026477, upload-time = "2025-10-20T06:09:07.712Z" },
-    { url = "https://files.pythonhosted.org/packages/3d/19/81fa80bdeca5cee456ac52728c993e62eaf58407d19232db55536cf66c4b/cython-3.1.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ef1df5201bf6eef6224e04584b0032874bd1e10e9f4e5701bfa502fca2f301bb", size = 2956078, upload-time = "2025-10-20T06:09:09.781Z" },
-    { url = "https://files.pythonhosted.org/packages/a1/40/002d72dc5914a8043dc9fed9b05b10fb4d365c5182733af3e0768a388cb7/cython-3.1.5-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:dce715a5b4279b82354855609d96e49a1bdc8a23499fb03d707df3865df3c565", size = 3412101, upload-time = "2025-10-20T06:09:11.762Z" },
-    { url = "https://files.pythonhosted.org/packages/ab/3f/8913ffad4f025446a3fa1662675277e340aef3ddb583704b5569698c28dc/cython-3.1.5-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3b185ac9ff4170a584decffb6616457208f5a4148c78613f3118f70603b3759c", size = 3191171, upload-time = "2025-10-20T06:09:16.924Z" },
-    { url = "https://files.pythonhosted.org/packages/63/fb/66e72c2e4b88f7f221d6226ab7ada1c572924bd73c3c66f899313c4e33d3/cython-3.1.5-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e3f86927811923958af0a4c68c6b978438cec0070b56dd68f968b2a070e4dc4d", size = 3313920, upload-time = "2025-10-20T06:09:18.856Z" },
-    { url = "https://files.pythonhosted.org/packages/bb/40/0858cb88f7cd8b7d1627cefff67fcc0d50c3bd9303a3687f4dbc5d2790cf/cython-3.1.5-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:61b19977f4af6632413cf89e9126fc9935b33d3d42699ee4370e74ac0ad38fc8", size = 3205839, upload-time = "2025-10-20T06:09:21.473Z" },
-    { url = "https://files.pythonhosted.org/packages/d7/e4/8edaf492b365720a553a83d5a1289f4f3198ae2ffd7333142f1b175b3012/cython-3.1.5-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:44ae7765f5d1082efd7a6cc9beedc7499a70e3cac528faad6cfca9d68b879253", size = 3428501, upload-time = "2025-10-20T06:09:23.756Z" },
-    { url = "https://files.pythonhosted.org/packages/22/8c/db66aeba98f0374cc18f6311679d1fa984852e0c737815b35df37ffd5be6/cython-3.1.5-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:d5e7a836c18638d7c383e438306c36acd7ea3f5feb78d32796efab506626567a", size = 3330574, upload-time = "2025-10-20T06:09:25.827Z" },
-    { url = "https://files.pythonhosted.org/packages/83/4b/5e01ab06d625496e0d0c5cd34d8b1793833fafb4ebde439595fb289bf77e/cython-3.1.5-cp311-cp311-win32.whl", hash = "sha256:f7991ef8da0132962c4a79636e01792cc96e0ede333d8b5d772be8bf218f6549", size = 2482452, upload-time = "2025-10-20T06:09:27.455Z" },
-    { url = "https://files.pythonhosted.org/packages/2c/67/71d858413f1753399b303bec74b4322001e1af8215edf7cc34e6e6d7e3ff/cython-3.1.5-cp311-cp311-win_amd64.whl", hash = "sha256:d31861678d88a7c6e69e022e37ed2a7d378fdd6b7843d63f3a2e97fc3fc88d63", size = 2713943, upload-time = "2025-10-20T06:09:29.571Z" },
-    { url = "https://files.pythonhosted.org/packages/54/3c/beb8bd4b94ae08cc9b90aac152e917e2fcab1d3189fb5143bc5f1622dc59/cython-3.1.5-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:38bf7bbe29e8508645d2c3d6313f7fb6872c22f54980f68819422d0812c95f69", size = 3063044, upload-time = "2025-10-20T06:09:32.361Z" },
-    { url = "https://files.pythonhosted.org/packages/3b/88/1e0df92588704503a863230fed61d95fc6e38c0db2537eaf6e5c140e5055/cython-3.1.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:61c42f881320a2b34a88806ddee6b424b3caa6fa193b008123704a2896b5bc37", size = 2970800, upload-time = "2025-10-20T06:09:34.58Z" },
-    { url = "https://files.pythonhosted.org/packages/5c/27/51854d64c058265ea216cf04239d5818ffb72e200875273acae77e96821f/cython-3.1.5-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:dde94e825ed23d0189a43c7714143de6ab35c7d6ca6dca4b2b2fcd2db418400d", size = 3387292, upload-time = "2025-10-20T06:09:36.218Z" },
-    { url = "https://files.pythonhosted.org/packages/86/03/37274f84d775e19234c8ba3b7b9ffee55d038d39312446e1123f9f9e8167/cython-3.1.5-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:51e8f773a90a61179ebf5eb2f0f711607a39d7c87ba254d9a7693b8dc62b5c8c", size = 3168510, upload-time = "2025-10-20T06:09:38.312Z" },
-    { url = "https://files.pythonhosted.org/packages/d2/d2/52bf6d5b18d6faa9c3655c2c2854dd4cc3630e0af7ff89e415fbba713c37/cython-3.1.5-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:326633ca2aa0233098e75198f955b5836c2dc12b19e1b1aa10877e96b9aee37d", size = 3319825, upload-time = "2025-10-20T06:09:40.229Z" },
-    { url = "https://files.pythonhosted.org/packages/93/05/4935c5aff6bc95155168b59990ce364877ae3d97b7cc58b20e93be9c0803/cython-3.1.5-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:7002d9ae1c8863f089195b539c72c927e0f41cc4787e8e369db6e8f22e12b7b8", size = 3181070, upload-time = "2025-10-20T06:09:42.481Z" },
-    { url = "https://files.pythonhosted.org/packages/10/c8/65650a07facc6e7aeec9e94358715a1a0f18960f8c5a30f60291c5e911b5/cython-3.1.5-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:6a0905a967bc4eaf6186837efbd023061bc25b5f80599203bad5db858527d9da", size = 3400149, upload-time = "2025-10-20T06:09:47.86Z" },
-    { url = "https://files.pythonhosted.org/packages/f7/78/ac690c772d2942ae16498d7cc182f056d3cf42788153685334b78904b087/cython-3.1.5-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:531e431e23bbd3e658b41a1240d641131a11d5b5689062e9b811a6b4eab4ecf7", size = 3330840, upload-time = "2025-10-20T06:09:49.574Z" },
-    { url = "https://files.pythonhosted.org/packages/ac/53/ea4aaf1a80c537b53c8cad6f99980ea7cf80e1be2a3c7db790c58af34b42/cython-3.1.5-cp312-cp312-win32.whl", hash = "sha256:920e2579858b3b47aa9026667d7adbd22a6cccf1e8da1bf3ea01a1c451a4ef0f", size = 2487776, upload-time = "2025-10-20T06:09:51.437Z" },
-    { url = "https://files.pythonhosted.org/packages/2a/89/195d56054f8936b38c046fab904aaec4d7e221db2a45b4016d11e909cf2e/cython-3.1.5-cp312-cp312-win_amd64.whl", hash = "sha256:b230b4ef06752c186ebd071989aac6ea60c79078a5430d3d33712cec0dc19ffd", size = 2705869, upload-time = "2025-10-20T06:09:53.08Z" },
-    { url = "https://files.pythonhosted.org/packages/89/7e/9b4e099076e6a56939ef7def0ebf7f31f204fc2383be57f31fd0d8c91659/cython-3.1.5-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:3c9b6d424f8b4f621b2d08ee5c344970311df0dac5c259667786b21b77657460", size = 3051579, upload-time = "2025-10-20T06:09:54.733Z" },
-    { url = "https://files.pythonhosted.org/packages/a4/4d/4f5d2ab95ed507f8c510bf8044d9d07b44ad1e0a684b3b8796c9003e39ef/cython-3.1.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:08e998a4d5049ea75932674701fa283397477330d1583bc9f63b693a380a38c6", size = 2958963, upload-time = "2025-10-20T06:09:56.45Z" },
-    { url = "https://files.pythonhosted.org/packages/f7/0c/c5eb8d2a2f1bbf7b23656609fb4cfc34a0812fca969614c5fbf011bcf122/cython-3.1.5-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:a89cba730a2fd93eb057f0d1f0e0f1d5377f263333ae34038e31df561f77a923", size = 3359452, upload-time = "2025-10-20T06:09:58.617Z" },
-    { url = "https://files.pythonhosted.org/packages/b4/b1/8b02f05928e5e5beadafbf6d8c34117f3fb9d5532fd266a9ad80749b50ef/cython-3.1.5-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2f7994fd7486020cb3a4022121534489d984a42aac773a2eeada1b2e1f057cf9", size = 3154975, upload-time = "2025-10-20T06:10:00.827Z" },
-    { url = "https://files.pythonhosted.org/packages/8e/53/a8018e50b64207847ac1de0aa007ca1a3a775ca388f265e85f5d70bcb754/cython-3.1.5-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b92ed80e3be2b35f594587389d9f7399860c8f17d9e4f23b7046f022f254b10b", size = 3307804, upload-time = "2025-10-20T06:10:02.559Z" },
-    { url = "https://files.pythonhosted.org/packages/32/c5/c761968122169696648a5a8a4c228a34e6de2a62b98d27c18c57235f8303/cython-3.1.5-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ada0c4eb7a98948a2a45444062a07995c8d3fa6fc5bc5a14a0e57ef793d0d8b7", size = 3170533, upload-time = "2025-10-20T06:10:04.952Z" },
-    { url = "https://files.pythonhosted.org/packages/47/af/c6e585912d19360bf02408368322a6c458dc1c0e867f75baa8b4f0f6bcdc/cython-3.1.5-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:5a3b6e75c8ffa5a06824be6e3858990ed1e88d432dcfc4ec865d419c44eaa29d", size = 3372608, upload-time = "2025-10-20T06:10:06.622Z" },
-    { url = "https://files.pythonhosted.org/packages/95/0f/34aa595446a485333b09398de8a769a9f80e58c2b07918b6268cba5ebe71/cython-3.1.5-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:834378e535e524168f9e54ae6bb4bbd3e414bbc7e4532945b715bd867a2be0ce", size = 3319976, upload-time = "2025-10-20T06:10:08.303Z" },
-    { url = "https://files.pythonhosted.org/packages/8f/e3/620258785bd382c19283f37c65bcaa5d6b2437247b4bb4b40128ca96638a/cython-3.1.5-cp313-cp313-win32.whl", hash = "sha256:18e6049138f4ad45fa3947437fe74126c5d932a36cdb93cb3a70715712021c2d", size = 2481579, upload-time = "2025-10-20T06:10:10.159Z" },
-    { url = "https://files.pythonhosted.org/packages/71/98/bd2cd37ee7f2420e73d21082e137ba949186e293044f24c0954a9595d018/cython-3.1.5-cp313-cp313-win_amd64.whl", hash = "sha256:fcebc7112872828f8815eb73e0c1572975f982af8febc56cfa369aa996e24142", size = 2703469, upload-time = "2025-10-20T06:10:11.799Z" },
-    { url = "https://files.pythonhosted.org/packages/7c/52/a44f5b3e7988ef3a55ea297cd5b56204ff5d0caaf7df048bcb78efe595ab/cython-3.1.5-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:888bf3f12aadfb2dc2c41e83932f40fc2ac519933c809aae16e901c4413d6966", size = 3046849, upload-time = "2025-10-20T06:10:14.087Z" },
-    { url = "https://files.pythonhosted.org/packages/d2/a8/fb84d9b6cc933b65f4e3cedc4e69a1baa7987f6dfb5165f89298521c2073/cython-3.1.5-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:85ffc5aa27d2e175bab4c649299aa4ae2b4c559040a5bf50b0ad141e76e17032", size = 2967186, upload-time = "2025-10-20T06:10:16.286Z" },
-    { url = "https://files.pythonhosted.org/packages/74/ee/a5aba9d36dacbda936335186a6ee3195bf780fd8a8a98e1a6e17351ca9a4/cython-3.1.5-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:e4d7f37e4217e1e93c944a175865deffbf16c9901eaba48fc35473afbfb658d4", size = 3359989, upload-time = "2025-10-20T06:10:18.384Z" },
-    { url = "https://files.pythonhosted.org/packages/08/64/1a058f052c71390b4440c8e1dc93bc09cdf04ec4d49e9fde0524b38e0678/cython-3.1.5-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5503aa6eec0faeba03428058a4911994cdf1f668baf84c87fad8c862415c5f3d", size = 3193017, upload-time = "2025-10-20T06:10:20.3Z" },
-    { url = "https://files.pythonhosted.org/packages/31/fd/de9461718977b59560630bd0ad07dcb77209df7f4e7774ef0ec8f787433d/cython-3.1.5-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:99943633ea61dfb53093e16827cc66c376b1513fb37f5ce8e052e49f4852ae85", size = 3312092, upload-time = "2025-10-20T06:10:21.998Z" },
-    { url = "https://files.pythonhosted.org/packages/c0/e3/5b57fa9a72b24b80ba23225d53886d07b714920e6bb19fc83a09977799b6/cython-3.1.5-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:a82183bbbc8591de7ca902f2a22e2ffc82e31fd1a66f1180931f522050db5eb2", size = 3209437, upload-time = "2025-10-20T06:10:23.784Z" },
-    { url = "https://files.pythonhosted.org/packages/fd/14/ebe6d9172d0ed6bca68bb21c384694922d7a8eef6dcf8d4c843be7128f0a/cython-3.1.5-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:9daa08ff24ef526ae2aa5560430a3121f1584b406945a17d7e0bbf9c18bf161a", size = 3375201, upload-time = "2025-10-20T06:10:25.703Z" },
-    { url = "https://files.pythonhosted.org/packages/25/30/9e28256ceb70511636f5e5340dfa36a4310a41bc0e190734b62b75a7993b/cython-3.1.5-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:d6d13320e01e719cf9668daa88ccd9f84bae74f26ac1a3779b4ec32bc40feaeb", size = 3323425, upload-time = "2025-10-20T06:10:27.484Z" },
-    { url = "https://files.pythonhosted.org/packages/13/ff/0f4dc479c6d4fec80a48613141c8ce8de98d75dc549d01cc87364057c4de/cython-3.1.5-cp314-cp314-win32.whl", hash = "sha256:51a7ef5688d3d37d762ee6df83a567b0a67bde7528a467e9dc82df9d9fc23c46", size = 2503714, upload-time = "2025-10-20T06:10:29.144Z" },
-    { url = "https://files.pythonhosted.org/packages/19/75/0cd7a00833496aa4c5eb76e6fa118fc51faf92947e090af799fa6ff30c16/cython-3.1.5-cp314-cp314-win_amd64.whl", hash = "sha256:8ac9324feb0694a941794222444600536f9c44b120b5745e1aa7042504281aa1", size = 2735084, upload-time = "2025-10-20T06:10:30.921Z" },
-    { url = "https://files.pythonhosted.org/packages/1b/33/8af1a1d424176a5f8710b687b84dd2f403e41b87b0e0acf569d39723f257/cython-3.1.5-py3-none-any.whl", hash = "sha256:1bef4a168f4f650d17d67b43792ed045829b570f1e4108c6c37a56fe268aa728", size = 1227619, upload-time = "2025-10-20T06:06:48.387Z" },
+version = "3.1.6"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/e3/58/6a8321cc0791876dc2509d7a22fc75535a1a7aa770b3496772f58b0a53a4/cython-3.1.6.tar.gz", hash = "sha256:ff4ccffcf98f30ab5723fc45a39c0548a3f6ab14f01d73930c5bfaea455ff01c", size = 3192329, upload-time = "2025-10-23T12:38:20.786Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/63/bb/23e917f1d2a11834730ff07cdb7e7c87ab72c16090b3d61b86477a38cc68/cython-3.1.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c4027b4d1bf7781fdfb2dbe1c1d81ccac9b910831511747e2c9fc8452fb3ea6b", size = 2989648, upload-time = "2025-10-23T12:38:38.272Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/72/9ec7797714c65bf45d11fb33361fd5cb522556d8a2a2e808f17db6a3aaf6/cython-3.1.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:141dea9df09f9c711af3b95510bd417c58b2abd33676eef1cb61f25581f7090a", size = 2914302, upload-time = "2025-10-23T12:38:39.888Z" },
+    { url = "https://files.pythonhosted.org/packages/30/cd/63d551eb65273e144e9ee84bf697190586201dd02d2fd719b68e7da724e2/cython-3.1.6-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:486376a988268408b7e8ea7b4cccffb914aa497c498b41589fb4a862ba47e050", size = 3507159, upload-time = "2025-10-23T12:38:41.988Z" },
+    { url = "https://files.pythonhosted.org/packages/44/bd/c451e15cd89ee98fa5207689505f9a211f79cdb4d18f2f96a7c9c6e7f3f6/cython-3.1.6-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bdc6e63a04ead11812752a5198b85b7fc079688c76712348d072403f18fdeb49", size = 3261427, upload-time = "2025-10-23T12:38:43.838Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/dc/a4102de1a15a2ef56fc46e4486da112a8701b63ff98077d0ebaa39792e44/cython-3.1.6-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:47e79f0bfbf403a5d6008bc9e7214e81e647794ca95cae6716399ba21abcc706", size = 3423208, upload-time = "2025-10-23T12:38:45.953Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/d6/dff399500588611e2bf189f191cc03bc985c80aaa263242c3abcd93122f7/cython-3.1.6-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:2379f729f1d5a445adb4621f279f7c23aeb6245f036f96cce14b5b2fd1f5ff0a", size = 3276605, upload-time = "2025-10-23T12:38:47.825Z" },
+    { url = "https://files.pythonhosted.org/packages/09/b1/af3d75e6b4363abd8efbe18cf90709b7dee38108846f3c7377ee50b8adcb/cython-3.1.6-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:1089e18d938b6e742f077e398d52e1701080213c4f203755afde6f1b33d9e051", size = 3521386, upload-time = "2025-10-23T12:38:49.929Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/58/6fc30fba52c9cf35bb5d02effc7b16cdc9aa3d3aa56b07e47429c59ee657/cython-3.1.6-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:73576246abbc62397db85cbdde74d2e5d73dabfdb7e593fdbb3671275ffb50ce", size = 3437394, upload-time = "2025-10-23T12:38:52.145Z" },
+    { url = "https://files.pythonhosted.org/packages/69/c9/10bde13a679d1dc90b86bba754d94b126637686f4bba7637e14a923b8962/cython-3.1.6-cp310-cp310-win32.whl", hash = "sha256:f48eae3275b3352ba7eb550fc5321b0fb1ba8d916fa9985fb2f02ce42ae69ddd", size = 2480812, upload-time = "2025-10-23T12:38:54.126Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/60/c5dd9af41c9ec6ee406b423458065d2d3427422e0eb1bb91794c8ab3b787/cython-3.1.6-cp310-cp310-win_amd64.whl", hash = "sha256:4066908ee24a18572880966de1d0865d178f5ab9828a9249faa97e1ffdfbed9f", size = 2705655, upload-time = "2025-10-23T12:38:56.064Z" },
+    { url = "https://files.pythonhosted.org/packages/a7/44/631939fd36577fccf0c47c9cd14fdc3d8125cde166ed2b2f1abdf9a505cc/cython-3.1.6-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5a1aedd8990f470d108b76ca768d9f1766d6610cf2546b73075dbe1e523daebe", size = 3022464, upload-time = "2025-10-23T12:38:57.677Z" },
+    { url = "https://files.pythonhosted.org/packages/ec/68/700aef24fcf73f77940fec7efa27c18da68f6a5446dfce5e3a253ab707e3/cython-3.1.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f75c33e83e224737b1a68b2868bc08bddaabc6f04aef74864ff6069fe2e68341", size = 2952046, upload-time = "2025-10-23T12:38:59.684Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/9e/5dba03cc21190bd6756bb4717038a16cc87930ef32399c6d0e6bbbe538b3/cython-3.1.6-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:91b8fb3e961b3344bf257b851f2ce679727f44857fec94d643bcc458601dab54", size = 3408110, upload-time = "2025-10-23T12:39:01.442Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/45/81897d8802666d10086639b0f70702d2f9d03bb5358b012bb109b08b4dd1/cython-3.1.6-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1cfeb04d43464f5ff8398b499ba46c6eef22093da0e74b25f972576e768880e7", size = 3187425, upload-time = "2025-10-23T12:39:03.661Z" },
+    { url = "https://files.pythonhosted.org/packages/3b/ed/1a1e93703edf37ee822c03013246d2b4c05a8ea689105051205150dadf07/cython-3.1.6-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f32366c198ac663a540ff4fa6ed55801d113183616c51100f4cc533568d2c4cf", size = 3309991, upload-time = "2025-10-23T12:39:05.801Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/11/147aefe4bdc5aa4f273283ea62949001d877808f4ad8a3b4774baf05f0ac/cython-3.1.6-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:9856e8cd7f7a95a3f10a8f15fef4d17e5a4a57fb5185fe3482cec4adb0536635", size = 3202048, upload-time = "2025-10-23T12:39:07.52Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/82/6a43a68a1c9e22bef7476eb5a4fd8987812972b6746991b7b16b599aa872/cython-3.1.6-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:6966f4d4ee13eceade2d952dc63bdf313f413c0c3f165aef0d6f62e6f27dab02", size = 3424512, upload-time = "2025-10-23T12:39:09.241Z" },
+    { url = "https://files.pythonhosted.org/packages/2e/d1/40dfa6c02bde72669525a2666aff5b0c75b0ec6f9d965b4beb1582ad4b6c/cython-3.1.6-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:dffb14bc986626be50003f4edc614a2c0a56cbaaf87259f6c763a6d21da14921", size = 3326637, upload-time = "2025-10-23T12:39:11.376Z" },
+    { url = "https://files.pythonhosted.org/packages/58/7c/c8dab163f2c9f8e3c4972aee31a45307f2b96733f799aa036ba05292efa8/cython-3.1.6-cp311-cp311-win32.whl", hash = "sha256:cde4748d37483b6c91df9f4327768e2828b1e374cb61bcee06d618958de59b7b", size = 2478500, upload-time = "2025-10-23T12:39:12.958Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/34/895cda4ac7e93460cedb28f609a7c056f09c1db5694ed38058f680c56386/cython-3.1.6-cp311-cp311-win_amd64.whl", hash = "sha256:29d6141b0c9697dfcaf5940eceb06353bec76f51f0579658964c0d29418000df", size = 2709986, upload-time = "2025-10-23T12:39:15.042Z" },
+    { url = "https://files.pythonhosted.org/packages/70/cd/6e7bb9ef074d35c1b62af91c9f92126fae992d5a8fb6b47fdd1ade67bf56/cython-3.1.6-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:0d2c32e8f6c65854e8203b381ff7ab540820763756b7c326e2c8dc18c9bbb44e", size = 3059014, upload-time = "2025-10-23T12:39:16.823Z" },
+    { url = "https://files.pythonhosted.org/packages/13/04/a1b4fe2a4c72eb8fdcdf6b680908328f920f813caeb72f1b5d2cea40e45c/cython-3.1.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:be24fcde7300a81712af279467ebc79baafc8483eb4dfa4daebf8ee90a826d39", size = 2966746, upload-time = "2025-10-23T12:39:18.56Z" },
+    { url = "https://files.pythonhosted.org/packages/57/44/347f48b0ccfaa8233860a64b88a9df851138058ea923583e68625528710f/cython-3.1.6-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:5012025af433bd7188fe1f7705df1c4a67e7add80c71658f6c6bc35ea876cc68", size = 3383297, upload-time = "2025-10-23T12:39:20.231Z" },
+    { url = "https://files.pythonhosted.org/packages/98/80/e065d0725614ce9ff43624ae1d9f81647c5fd2d88ecffc2614dde703482d/cython-3.1.6-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3b3520e2d4484f927c3ec00d32ffda75ec72cfd6a2ee07adac721cce339fa26f", size = 3164391, upload-time = "2025-10-23T12:39:22.036Z" },
+    { url = "https://files.pythonhosted.org/packages/95/e1/3f86f321ff6bfd31310a5478f5ac56eaac3ea0743f6b76543ff5fbcb2b4e/cython-3.1.6-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c8a01d241d775319bcd7adb4144b070e1c4b01cdf841a62032492f07fad9efdc", size = 3316085, upload-time = "2025-10-23T12:39:23.795Z" },
+    { url = "https://files.pythonhosted.org/packages/94/b5/677a2f4faa1c036cedbb715edc933b09de3e235891f1fcdaa82f8c3fdc85/cython-3.1.6-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:fd88799fa7bb177182423e0745c9197c50938c6839ebfbe6fd01539582ed488e", size = 3176911, upload-time = "2025-10-23T12:39:25.749Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/e4/21117a7768ab19fcd766f2dd81f0a61d2d24e7a3649eff306349c2ab99a8/cython-3.1.6-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:f809bae2e00b79c01ff5daf9a260df7c1bc9fda087b9d625592fa28c1a2248a9", size = 3396231, upload-time = "2025-10-23T12:39:28.168Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/4e/1152e9bfa0357d2237449fad94673c273f72c011a54c7227bb1291dd4423/cython-3.1.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6f657e7a4b2242d159de603f280928d8e458dfba48144714774ad76c08f5a530", size = 3327101, upload-time = "2025-10-23T12:39:30.361Z" },
+    { url = "https://files.pythonhosted.org/packages/39/fe/b7f9dc5ba8ce221aa7d40587d1d7175871b2ea61917c7fa4d5e85a7c042f/cython-3.1.6-cp312-cp312-win32.whl", hash = "sha256:6502f3e58db0ab3e2c983bec2c8c9e45d602e2c7ff921a5a8515b0008d918102", size = 2483823, upload-time = "2025-10-23T12:39:31.986Z" },
+    { url = "https://files.pythonhosted.org/packages/40/d5/60261f023b0bdb28f0b9e8f00690b8bdbef692995184bc57f33811f8a936/cython-3.1.6-cp312-cp312-win_amd64.whl", hash = "sha256:71d099d8d6094c5de63a32e67b29964565aed889a218e8d16a94083f4239b904", size = 2701846, upload-time = "2025-10-23T12:39:33.769Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/96/22b43125180d9b2814da4271d9450a5cc4623a6c6439b6b1d8faa7675c81/cython-3.1.6-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:14f0d6b9f803eacf48e9e80ea12a03f54e5f5ac48914341b0a6b81554b3b3154", size = 3047517, upload-time = "2025-10-23T12:39:35.641Z" },
+    { url = "https://files.pythonhosted.org/packages/db/09/8abf6ccb13d1e2589e60320423f861952cf4c4ec092cd8536e1beb018e9c/cython-3.1.6-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:ec79615d0e29fa29fd4283bc7a2ed9c3d00532086a0031532d64b724db8c3e8e", size = 2954975, upload-time = "2025-10-23T12:39:37.568Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/4d/c3455fb738f52d536e7a113749c0a2242943251ce2d0dfac0e42ebba2fc0/cython-3.1.6-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:037d457738cf4fc12260946c6524b745f488cf413428099f2a064af7612d181f", size = 3355462, upload-time = "2025-10-23T12:39:39.462Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/b4/923f4d7ca7d987573aa2df0ca48fa9a103a48ddf1aec9cd8fcef9618b787/cython-3.1.6-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6b036cb4ed7abcbc89cc04311832b22ad386c532fdd1fe690e1364aa992a54c7", size = 3150852, upload-time = "2025-10-23T12:39:41.416Z" },
+    { url = "https://files.pythonhosted.org/packages/f0/2c/985dd11b6cc3ac2e460c5e0b59030aebca66a85f9423db90e5186e8e9087/cython-3.1.6-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e0fb2694327834c5bda7c5a07605f76437354d0ff76bb8739e77b479d176cf52", size = 3304059, upload-time = "2025-10-23T12:39:43.154Z" },
+    { url = "https://files.pythonhosted.org/packages/69/af/b3af74d1d10a0f6d4d9fcdd836959ae54dabb36f84f316b09ccb84dbd8e0/cython-3.1.6-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:92eb7a39e60426165a5b2a219af181e5695c4dedd598e317a7a4d9086bd66b91", size = 3166353, upload-time = "2025-10-23T12:39:45.146Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/2d/48130ecef876f141aaded34a961f32be45d2f36aa285de08d2e81aa5fec3/cython-3.1.6-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:c475018b28f4e7111148bd02b600595090e0aac6cc49615c4586bb4e7f164a22", size = 3368659, upload-time = "2025-10-23T12:39:46.908Z" },
+    { url = "https://files.pythonhosted.org/packages/2f/b2/0cd9ff5be3f0d224bc139eea8a8e83066d61ad424cf7fd0f43c3c4b791d4/cython-3.1.6-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b1b4bb661103cb95c6ca70daf5d39992b2d89fd260b02a54d92e365095ed37eb", size = 3316247, upload-time = "2025-10-23T12:39:48.699Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/0f/55f95e166c591fb8fd8caeb1f2c86cf86ef6f7f929a56094615ab757dc11/cython-3.1.6-cp313-cp313-win32.whl", hash = "sha256:69b1bea23b51628b8c9f14c3e0bb4c7dd5be63781bfbaa581b1c683b473c728a", size = 2477610, upload-time = "2025-10-23T12:39:51.014Z" },
+    { url = "https://files.pythonhosted.org/packages/2e/07/23aa4577513a5e918c0deaf8a2ab8a9a5e6703e3fe554e3bc2c3bda1ef58/cython-3.1.6-cp313-cp313-win_amd64.whl", hash = "sha256:c844004712a9fe2a6f2ed4d6fe02aabb2e0e34f88c150724aad1afec7caff37a", size = 2699460, upload-time = "2025-10-23T12:39:54.146Z" },
+    { url = "https://files.pythonhosted.org/packages/5b/16/e399f6fd33912116aba8bcdfeadd6093ff14996d7b5b72212fe4301e9f96/cython-3.1.6-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:8893619efa77fc83934c1255c619d522711a5cf5933cef0d5c2b9755e8e5fabc", size = 3042822, upload-time = "2025-10-23T12:39:56.081Z" },
+    { url = "https://files.pythonhosted.org/packages/94/aa/5500ff58f8972431c0e74783546b8cdc39511493aa44b74a7fde1ec4e654/cython-3.1.6-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:bb49c74220af0b098f406701f0b87876b1c7614716d39786306986b9feea774b", size = 2963154, upload-time = "2025-10-23T12:39:57.933Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/04/caa7893a4259e4bdb333a40a2105d58b53294445d9d2cf948eac9f0346b5/cython-3.1.6-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:defbf9571fca78e8a6e21b93d35c0a491d6af77a8e6180a0146da1b3c8eb8ce6", size = 3356015, upload-time = "2025-10-23T12:39:59.856Z" },
+    { url = "https://files.pythonhosted.org/packages/df/da/6736caaf38a4d9f09db4b8dd76d0c8f7937820c2eef4d899f80259566298/cython-3.1.6-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8cd7ea8c6ce0adf52d142bf37c4d54b8d0356818144a4584a24f2a0b9cdae6b8", size = 3188923, upload-time = "2025-10-23T12:40:01.926Z" },
+    { url = "https://files.pythonhosted.org/packages/e8/ba/5dbee7f80c11c57a68b1e26d285e106ab259e7cf50536369b28f952b5809/cython-3.1.6-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9c47fcc47553214e0a139fd33199d825c5d13970cd6c1039d2594af855ffb338", size = 3308343, upload-time = "2025-10-23T12:40:03.673Z" },
+    { url = "https://files.pythonhosted.org/packages/81/c0/2759f4e2ec2f10ac941b2963de217f0ee6c0f6b2767ddcbaeba799c77dec/cython-3.1.6-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:92489385bca6d1935913540e35701a979618fdfeed4dbec6cad1be924fb487bf", size = 3205352, upload-time = "2025-10-23T12:40:05.431Z" },
+    { url = "https://files.pythonhosted.org/packages/c7/fc/077b0084300d42bc69f4c9468c1946882884db859daa48b2b98b8f194fad/cython-3.1.6-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:926a3efd9b7012cdb3df0d1886e6f0e32e0b72a5d311ac2d3f48c0716fd91c6d", size = 3371256, upload-time = "2025-10-23T12:40:07.174Z" },
+    { url = "https://files.pythonhosted.org/packages/60/71/4461521017e51b66a2d8dd443a596d636c87149e2d6ae95d664cbfdb1303/cython-3.1.6-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:e35118eedfa0138154a43fb6b14e83703dae93193ba9940c747c170ed845cca7", size = 3319689, upload-time = "2025-10-23T12:40:09.181Z" },
+    { url = "https://files.pythonhosted.org/packages/5b/53/f8dfff20e06dd3a6a39ed7b5ba784a9797eb206ec7df56f35c0e0ca31a49/cython-3.1.6-cp314-cp314-win32.whl", hash = "sha256:27f2b26442737d6e080900284883e078aae0276dfd7715a49b338f1a9481f7b9", size = 2499779, upload-time = "2025-10-23T12:40:11.306Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/cd/fef529bcc8eb6b55caf8bda524ee6194593137579fdc4ee616ff2a40dd2a/cython-3.1.6-cp314-cp314-win_amd64.whl", hash = "sha256:7f75ead2a7cad5ee719427b915711c70e40a114f045b2a9b5bd983484a0b83a7", size = 2731204, upload-time = "2025-10-23T12:40:13.878Z" },
+    { url = "https://files.pythonhosted.org/packages/18/d5/7a04640bf559bb890455ffb28978daf7d44f667c3f04a4d422c655c1ba92/cython-3.1.6-py3-none-any.whl", hash = "sha256:91dcf7eb9b6a089ce4e9e1140e571d84c3bca834afb77ec269be7aa9d31a8157", size = 1223550, upload-time = "2025-10-23T12:38:16.732Z" },
 ]
 
 [[package]]
@@ -1794,7 +1794,7 @@ http2 = [
 
 [[package]]
 name = "huggingface-hub"
-version = "0.35.3"
+version = "0.36.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "filelock" },
@@ -1806,9 +1806,9 @@ dependencies = [
     { name = "tqdm" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/10/7e/a0a97de7c73671863ca6b3f61fa12518caf35db37825e43d63a70956738c/huggingface_hub-0.35.3.tar.gz", hash = "sha256:350932eaa5cc6a4747efae85126ee220e4ef1b54e29d31c3b45c5612ddf0b32a", size = 461798, upload-time = "2025-09-29T14:29:58.625Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/98/63/4910c5fa9128fdadf6a9c5ac138e8b1b6cee4ca44bf7915bbfbce4e355ee/huggingface_hub-0.36.0.tar.gz", hash = "sha256:47b3f0e2539c39bf5cde015d63b72ec49baff67b6931c3d97f3f84532e2b8d25", size = 463358, upload-time = "2025-10-23T12:12:01.413Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/31/a0/651f93d154cb72323358bf2bbae3e642bdb5d2f1bfc874d096f7cb159fa0/huggingface_hub-0.35.3-py3-none-any.whl", hash = "sha256:0e3a01829c19d86d03793e4577816fe3bdfc1602ac62c7fb220d593d351224ba", size = 564262, upload-time = "2025-09-29T14:29:55.813Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/bd/1a875e0d592d447cbc02805fd3fe0f497714d6a2583f59d14fa9ebad96eb/huggingface_hub-0.36.0-py3-none-any.whl", hash = "sha256:7bcc9ad17d5b3f07b57c78e79d527102d08313caa278a641993acddcb894548d", size = 566094, upload-time = "2025-10-23T12:11:59.557Z" },
 ]
 
 [[package]]

From 855aa49176f6ff44f24b8f389ad66fcddc54217c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Thu, 23 Oct 2025 17:44:55 +0200
Subject: [PATCH 039/334] ci: Fix copyright checker (#1889) (#1890)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 .github/workflows/cicd-main.yml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml
index 94d486f2fb5..047926a188e 100644
--- a/.github/workflows/cicd-main.yml
+++ b/.github/workflows/cicd-main.yml
@@ -61,10 +61,12 @@ jobs:
 
       - name: Check membership
         id: check-membership
+        env:
+          IS_MAIN_BRANCH: ${{ github.ref == 'refs/heads/main' }}
         run: |
           PR_AUTHOR=${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').user.login }}
 
-          if [ "${{ env.SCHEDULED_JOB }}" == "true" ]; then
+          if [ "${{ env.SCHEDULED_JOB }}" == "true" ] || [ "${IS_MAIN_BRANCH}" == "true" ]; then
             echo "is_maintainer=true" | tee -a $GITHUB_OUTPUT
             exit 0
           fi

From 6068fa9edf1bbb4cbdf6b33d3a4753642af18fb9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Thu, 23 Oct 2025 15:50:15 +0000
Subject: [PATCH 040/334] ci: Run on dev
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 .github/workflows/cicd-main.yml | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml
index 047926a188e..94d486f2fb5 100644
--- a/.github/workflows/cicd-main.yml
+++ b/.github/workflows/cicd-main.yml
@@ -61,12 +61,10 @@ jobs:
 
       - name: Check membership
         id: check-membership
-        env:
-          IS_MAIN_BRANCH: ${{ github.ref == 'refs/heads/main' }}
         run: |
           PR_AUTHOR=${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').user.login }}
 
-          if [ "${{ env.SCHEDULED_JOB }}" == "true" ] || [ "${IS_MAIN_BRANCH}" == "true" ]; then
+          if [ "${{ env.SCHEDULED_JOB }}" == "true" ]; then
             echo "is_maintainer=true" | tee -a $GITHUB_OUTPUT
             exit 0
           fi

From 885679487613c17360c30dcec2b6d802dc5d9cde Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Thu, 23 Oct 2025 18:18:55 +0200
Subject: [PATCH 041/334] ci: Bump copyright header (#1894)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 .github/workflows/copyright-check.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/copyright-check.yml b/.github/workflows/copyright-check.yml
index 0584b2692c2..ff135c6c958 100644
--- a/.github/workflows/copyright-check.yml
+++ b/.github/workflows/copyright-check.yml
@@ -31,7 +31,7 @@ jobs:
     if: |
       !(needs.pre-flight.outputs.docs_only == 'true'
       || needs.pre-flight.outputs.is_deployment_workflow == 'true')
-    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_copyright_check.yml@v0.65.3
+    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_copyright_check.yml@v0.65.4
     with:
       from-year: 2019
 

From beceec698486a033262116199f90318ec3df865e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Thu, 23 Oct 2025 16:23:49 +0000
Subject: [PATCH 042/334] ci: Allow runs on dev branch
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 .github/workflows/cicd-main.yml | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml
index 94d486f2fb5..f21fdfed446 100644
--- a/.github/workflows/cicd-main.yml
+++ b/.github/workflows/cicd-main.yml
@@ -61,10 +61,13 @@ jobs:
 
       - name: Check membership
         id: check-membership
+        env:
+          IS_MAIN_BRANCH: ${{ github.ref == 'refs/heads/main' }}
+          IS_DEV_BRANCH: ${{ github.ref == 'refs/heads/dev' }}
         run: |
           PR_AUTHOR=${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').user.login }}
 
-          if [ "${{ env.SCHEDULED_JOB }}" == "true" ]; then
+          if [ "${{ env.SCHEDULED_JOB }}" == "true" ] || [ "${IS_MAIN_BRANCH}" == "true" ] || [ "${IS_DEV_BRANCH}" == "true" ]; then
             echo "is_maintainer=true" | tee -a $GITHUB_OUTPUT
             exit 0
           fi

From 13b6a3675b86a395174d7cca4cc8b636a7cb2704 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Thu, 23 Oct 2025 16:29:20 +0000
Subject: [PATCH 043/334] ci: Linting on push
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 .github/workflows/cicd-main.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml
index f21fdfed446..e70677e19fb 100644
--- a/.github/workflows/cicd-main.yml
+++ b/.github/workflows/cicd-main.yml
@@ -187,7 +187,7 @@ jobs:
           export PATH=".venv/bin:$PATH"
           export GITLAB_ENDPOINT=github.com
           export CI_PROJECT_NAMESPACE=NVIDIA
-          export BASE_REF="${{ startsWith(github.ref, 'refs/heads/pull-request/') && fromJSON(steps.get-pr-info.outputs.pr-info).base.ref || 'HEAD~1' }}" 
+          export BASE_REF="${{ startsWith(github.ref, 'refs/heads/pull-request/') && fromJSON(steps.get-pr-info.outputs.pr-info).base.ref || github.sha }}" 
           export CHECK_ONLY=true 
           export SKIP_DOCS=false 
           bash tools/autoformat.sh

From 14a0a23f4a0e8b2b3c67051c2bd9fbdd4775b62e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Thu, 23 Oct 2025 16:31:21 +0000
Subject: [PATCH 044/334] ci: Run linting only on PR
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 .github/workflows/cicd-main.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml
index e70677e19fb..89d33506082 100644
--- a/.github/workflows/cicd-main.yml
+++ b/.github/workflows/cicd-main.yml
@@ -183,11 +183,12 @@ jobs:
         uses: nv-gha-runners/get-pr-info@main
 
       - name: Run linting
+        if: startsWith(github.ref, 'refs/heads/pull-request/')
         run: |
           export PATH=".venv/bin:$PATH"
           export GITLAB_ENDPOINT=github.com
           export CI_PROJECT_NAMESPACE=NVIDIA
-          export BASE_REF="${{ startsWith(github.ref, 'refs/heads/pull-request/') && fromJSON(steps.get-pr-info.outputs.pr-info).base.ref || github.sha }}" 
+          export BASE_REF="${{ fromJSON(steps.get-pr-info.outputs.pr-info).base.ref }}" 
           export CHECK_ONLY=true 
           export SKIP_DOCS=false 
           bash tools/autoformat.sh

From 8e035496979cd6eb37595975ab725d93c69a8143 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Thu, 23 Oct 2025 17:09:16 +0000
Subject: [PATCH 045/334] ci(fix): HAS_RUN_TESTS_LABEL
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 .github/actions/action.yml      | 2 +-
 .github/workflows/cicd-main.yml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/actions/action.yml b/.github/actions/action.yml
index d2f43599182..831f840d22b 100644
--- a/.github/actions/action.yml
+++ b/.github/actions/action.yml
@@ -111,7 +111,7 @@ runs:
         GH_TOKEN: ${{ github.token }}
       run: |
         PR_NUMBER=${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number }}
-        HAS_RUN_TESTS_LABEL=$(gh pr view $PR_NUMBER --json labels | jq '[.labels[].name] | any(. == "Run tests")')
+        HAS_RUN_TESTS_LABEL=$(gh pr view $PR_NUMBER --json labels | jq '[.labels[].name] | any(. == "Run tests")') || echo "false"
         echo "main=$HAS_RUN_TESTS_LABEL" | tee -a $GITHUB_OUTPUT
 
     - name: Create run-script (e2e test)
diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml
index 89d33506082..38739c07b1f 100644
--- a/.github/workflows/cicd-main.yml
+++ b/.github/workflows/cicd-main.yml
@@ -363,7 +363,7 @@ jobs:
           GH_TOKEN: ${{ secrets.PAT }}
         run: |
           PR_NUMBER=${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number }}
-          HAS_RUN_TESTS_LABEL=$(gh pr view $PR_NUMBER --json labels | jq '[.labels[].name] | any(. == "Run tests")')
+          HAS_RUN_TESTS_LABEL=$(gh pr view $PR_NUMBER --json labels | jq '[.labels[].name] | any(. == "Run tests")') || echo "false"
           echo "main=$HAS_RUN_TESTS_LABEL" | tee -a $GITHUB_OUTPUT
 
       - name: Parse functional tests

From da842988caa8fcf68ff6e153f446244f06eb629e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Thu, 23 Oct 2025 17:50:57 +0000
Subject: [PATCH 046/334] ci: Fix linting
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 .gitlab/stages/02.test.yml | 18 +-------
 tools/check_copyright.py   | 94 ++++++++++++++++++++++++++++++++++++++
 tools/copyright.sh         | 50 +++++++++-----------
 3 files changed, 118 insertions(+), 44 deletions(-)
 create mode 100644 tools/check_copyright.py

diff --git a/.gitlab/stages/02.test.yml b/.gitlab/stages/02.test.yml
index af972c8d0cf..db10271da15 100644
--- a/.gitlab/stages/02.test.yml
+++ b/.gitlab/stages/02.test.yml
@@ -286,23 +286,8 @@ test:linting_formatting:
         exit 0
       fi
     - set +e
-    - git fetch origin main:main
-    - echo -e "machine gitlab-master.nvidia.com\n  login gitlab-ci-token\n  password $CI_JOB_TOKEN" >~/.netrc
-    - chmod 600 ~/.netrc
-    - |
-      if [[ "$CI_MERGE_REQUEST_PROJECT_PATH" == "$CI_MERGE_REQUEST_SOURCE_PROJECT_PATH" ]]; then 
-        BASE_REF="$CI_MERGE_REQUEST_TARGET_BRANCH_NAME" bash tools/autoformat.sh
-        set -e
-        git fetch origin $CI_MERGE_REQUEST_SOURCE_BRANCH_NAME
-        git checkout $CI_MERGE_REQUEST_SOURCE_BRANCH_NAME
-        git config --global user.email "mcore-bot@nvidia.com"
-        git config --global user.name "Mcore Bot"
-        git remote set-url origin "https://gitlab-ci-token:${PAT}@${GITLAB_ENDPOINT}/$CI_PROJECT_NAMESPACE/megatron-lm.git"
-        git add -A .
-        git commit -m "chore: Format files" || true
-        git push -u origin $CI_MERGE_REQUEST_SOURCE_BRANCH_NAME
-      fi
     - env
+    - export GITLAB_ENDPOINT=gitlab-ci-token:${PAT}@${GITLAB_ENDPOINT}
     - BASE_REF="$CI_MERGE_REQUEST_TARGET_BRANCH_NAME" CHECK_ONLY=true SKIP_DOCS=$([[ "$CI_MERGE_REQUEST_LABELS" == *"Skip docs"* ]] && echo "true" || echo "false") bash tools/autoformat.sh
 
 test:linting_copyright:
@@ -318,6 +303,7 @@ test:linting_copyright:
   needs: [test:build_image]
   script:
     - git fetch origin main
+    - export GITLAB_ENDPOINT=gitlab-ci-token:${PAT}@${GITLAB_ENDPOINT}
     - bash tools/copyright.sh
 
 # Override from template
diff --git a/tools/check_copyright.py b/tools/check_copyright.py
new file mode 100644
index 00000000000..a62334d2421
--- /dev/null
+++ b/tools/check_copyright.py
@@ -0,0 +1,94 @@
+#!/usr/bin/env python3
+"""
+Script to check and optionally add NVIDIA copyright headers to files.
+"""
+
+import sys
+import argparse
+from pathlib import Path
+from datetime import datetime
+
+EXPECTED_HEADER = """# Copyright (c) {}-{}, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+
+
+def has_correct_header(file_path, from_year: int):
+    """Check if file has the correct copyright header."""
+    try:
+        with open(file_path, 'r', encoding='utf-8') as f:
+            content = f.read()
+
+        # Check if the expected header is at the start of the file
+        return content.startswith(EXPECTED_HEADER.format(from_year, str(datetime.now().year)))
+    except Exception as e:
+        print(f"Error reading {file_path}: {e}")
+        return False
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description='Check and add NVIDIA copyright headers to files.'
+    )
+    parser.add_argument(
+        'files',
+        nargs='+',
+        help='Files to check/modify'
+    )
+    parser.add_argument(
+        '--from-year',
+        type=int,
+        required=True,
+        help='Project creation year'
+    )
+
+    args = parser.parse_args()
+
+    missing_headers = []
+
+    for file_path in args.files:
+        path = Path(file_path)
+
+        if not path.exists():
+            print(f"File not found: {file_path}")
+            continue
+
+        if not path.is_file():
+            print(f"Not a file: {file_path}")
+            continue
+
+        if has_correct_header(path, args.from_year):
+            print(f"✓ Header present: {file_path}")
+        else:
+            print(f"✗ Header missing: {file_path}")
+            missing_headers.append(path)
+
+    # Exit with error code if headers are missing and not added
+    if missing_headers:
+        print(f"\n{len(missing_headers)} file(s) missing copyright header.")
+        print("\n")
+        print("Add or replace the header in those files with the following content:")
+        print(EXPECTED_HEADER)
+        print("\n")
+        print(
+            "Disclaimer: This must done irrespective of the magnitude of the change "
+            "or whether your are the file/module author."
+        )
+        sys.exit(1)
+
+    sys.exit(0)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/tools/copyright.sh b/tools/copyright.sh
index 66098f84d2b..3223733647e 100644
--- a/tools/copyright.sh
+++ b/tools/copyright.sh
@@ -1,34 +1,28 @@
 #!/bin/bash
+set -euox pipefail
 
-# Files ending with .py should have Copyright notice in the first line.
-SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+GIT_VERSION=$(git version | awk '{print $3}')
+GIT_MAJOR=$(echo $GIT_VERSION | awk -F. '{print $1}')
+GIT_MINOR=$(echo $GIT_VERSION | awk -F. '{print $2}')
 
-# Move to the project root
-cd $SCRIPT_DIR/..
-find_files_with_missing_copyright() {
-find ./megatron/ -type f -name '*.py' | while read path; do
-    echo -en $path"\t"
-    head -2 $path | grep -iv 'coding=' | head -1
-done \
-   | egrep -iv 'Copyright.*NVIDIA CORPORATION.*All rights reserved.' \
-   | grep -iv 'BSD 3-Clause License' \
-   | grep -iv 'Copyright.*Microsoft' \
-   | grep -iv 'Copyright.*The Open AI Team' \
-   | grep -iv 'Copyright.*The Google AI' \
-   | grep -iv 'Copyright.*Facebook' | while read line; do
-     echo $line | cut -d' ' -f1
-   done
-}
+if [[ $GIT_MAJOR -eq 2 && $GIT_MINOR -lt 31 ]]; then
+    echo "Git version must be at least 2.31.0. Found $GIT_VERSION"
+    exit 1
+fi
 
+SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)
 
-declare RESULT=($(find_files_with_missing_copyright))  # (..) = array
+BASE_REF=${BASE_REF:-main}
+git remote set-url origin "https://${GITLAB_ENDPOINT}/$CI_PROJECT_NAMESPACE/megatron-lm.git"
+git fetch origin ${BASE_REF}
+CHANGED_FILES=$(git diff --name-only --diff-filter=d --merge-base origin/${BASE_REF} megatron/core tests/ | grep '\.py$' || true)
 
-if [ "${#RESULT[@]}" -gt 0 ]; then
-   echo "Error: Found files with missing copyright:"
-   for (( i=0; i<"${#RESULT[@]}"; i++ )); do
-      echo "path= ${RESULT[$i]}"
-   done
-   exit 1;
-else
-   echo "Ok: All files start with copyright notice"
-fi
+if [[ -n "$CHANGED_FILES" ]]; then
+   CMD="python ${SCRIPT_DIR}/check_copyright.py"
+
+   # Add the files
+   CMD="$CMD --from-year 2019 $CHANGED_FILES"
+
+   # Run the check
+   eval $CMD
+fi
\ No newline at end of file

From 38166a61514d121bac99341763238fe2c984d969 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Thu, 23 Oct 2025 20:46:29 +0200
Subject: [PATCH 047/334] ci: Add codeowners to dev branch (#1898)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 .github/CODEOWNERS         | 15 +++++++++++++++
 .gitlab/stages/02.test.yml | 24 ++++++++++++++++++++----
 2 files changed, 35 insertions(+), 4 deletions(-)
 create mode 100644 .github/CODEOWNERS

diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
new file mode 100644
index 00000000000..cc3cb0dbc58
--- /dev/null
+++ b/.github/CODEOWNERS
@@ -0,0 +1,15 @@
+megatron/core @NVIDIA/core-nemo @NVIDIA/core-devtech
+
+.gitlab/ @NVIDIA/ci
+.github/ @NVIDIA/ci
+.gitlab-ci.yml @NVIDIA/ci
+docker/  @NVIDIA/ci
+tests/unit_tests/run_ci_test.sh @NVIDIA/ci
+tests/test_utils/python_scripts/ 
+tests/functional_tests/python_test_utils/ @NVIDIA/ci
+tests/functional_tests/shell_test_utils/ @NVIDIA/ci
+megatron/core/transformer/transformer_block.py @NVIDIA/ci
+megatron/core/transformer/transformer_layer.py @NVIDIA/ci
+tests/functional_tests/test_cases/ @NVIDIA/ci
+tests/functional_tests/recipes/ @NVIDIA/ci
+tests/unit_tests/ @NVIDIA/ci
diff --git a/.gitlab/stages/02.test.yml b/.gitlab/stages/02.test.yml
index db10271da15..f4f06fbca9d 100644
--- a/.gitlab/stages/02.test.yml
+++ b/.gitlab/stages/02.test.yml
@@ -11,8 +11,10 @@ include:
 wait_for_resources:
   extends: [.test_rules]
   needs:
-    - test:linting_formatting
-    - test:linting_copyright
+    - job: test:linting_formatting
+      optional: true
+    - job: test:linting_copyright
+      optional: true
     - job: test:linting_secret_detection
       optional: true
     - test:build_image
@@ -127,8 +129,10 @@ test:unit_tests_configure:
 
 .unit_tests_run:
   needs:
-    - test:linting_formatting
-    - test:linting_copyright
+    - job: test:linting_formatting
+      optional: true
+    - job: test:linting_copyright
+      optional: true
     - job: test:linting_secret_detection
       optional: true
     - test:unit_tests_configure
@@ -280,6 +284,12 @@ test:linting_formatting:
   needs: [test:build_image]
   variables:
     GIT_STRATEGY: "clone"
+  rules:
+    - if: $PUBLISH == "yes"
+      when: never
+    - if: $CI_PIPELINE_SOURCE == 'push'
+      when: never
+    - when: on_success
   script:
     - |
       if [[ "$CI_PIPELINE_SOURCE" != "merge_request_event" ]]; then
@@ -301,6 +311,12 @@ test:linting_copyright:
     - team/megatron
   image: ${UTILITY_IMAGE}:${CI_PIPELINE_ID}
   needs: [test:build_image]
+  rules:
+    - if: $PUBLISH == "yes"
+      when: never
+    - if: $CI_PIPELINE_SOURCE == 'push'
+      when: never
+    - when: on_success
   script:
     - git fetch origin main
     - export GITLAB_ENDPOINT=gitlab-ci-token:${PAT}@${GITLAB_ENDPOINT}

From 620826b0f7d7e2c588d0584f3e491c4b04fc7694 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Thu, 23 Oct 2025 19:58:29 +0000
Subject: [PATCH 048/334] ci(fix): dynamic inference tests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 tests/test_utils/recipes/gpt-dynamic-inference.yaml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/test_utils/recipes/gpt-dynamic-inference.yaml b/tests/test_utils/recipes/gpt-dynamic-inference.yaml
index 914d3c0a757..748e4734a6d 100644
--- a/tests/test_utils/recipes/gpt-dynamic-inference.yaml
+++ b/tests/test_utils/recipes/gpt-dynamic-inference.yaml
@@ -43,7 +43,7 @@ spec:
         "DATA_CACHE_PATH=/workspace/data/cache" 
         "TRAINING_SCRIPT_PATH=examples/inference/gpt/gpt_dynamic_inference.py"
         "TRAINING_PARAMS_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml"
-        "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_{platforms}.json"
+        "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_$CLUSTER.json"
         "OUTPUT_PATH={assets_dir}"
         "TENSORBOARD_PATH={assets_dir}/generations_{environment}_$CLUSTER.json"
         "N_REPEAT={n_repeat}"
@@ -74,4 +74,3 @@ products:
       - environment: [dev]
         scope: [mr]
         platforms: [dgx_h100]
-  

From 829ae2fa40d4e68c22eb4338cbd7bfc4216ac007 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Thu, 23 Oct 2025 21:01:12 +0000
Subject: [PATCH 049/334] ci(fix): No copyright on push
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 .gitlab/stages/02.test.yml | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/.gitlab/stages/02.test.yml b/.gitlab/stages/02.test.yml
index f4f06fbca9d..98bcaeefc7d 100644
--- a/.gitlab/stages/02.test.yml
+++ b/.gitlab/stages/02.test.yml
@@ -287,7 +287,7 @@ test:linting_formatting:
   rules:
     - if: $PUBLISH == "yes"
       when: never
-    - if: $CI_PIPELINE_SOURCE == 'push'
+    - if: $CI_PIPELINE_SOURCE == 'push' || $CI_PIPELINE_SOURCE == 'schedule'
       when: never
     - when: on_success
   script:
@@ -318,6 +318,10 @@ test:linting_copyright:
       when: never
     - when: on_success
   script:
+    - |
+      if [[ "$CI_PIPELINE_SOURCE" != "merge_request_event" ]]; then
+        exit 0
+      fi
     - git fetch origin main
     - export GITLAB_ENDPOINT=gitlab-ci-token:${PAT}@${GITLAB_ENDPOINT}
     - bash tools/copyright.sh

From f73769735d423a1adcdceb2aa81f3ce71febc65e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Fri, 24 Oct 2025 05:46:02 +0200
Subject: [PATCH 050/334] ci: Move test optimizer into its own bucket (#1909)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 .github/actions/action.yml                    |  2 +-
 .github/workflows/cicd-approve-test-queue.yml | 45 +++++++++---
 .github/workflows/cicd-main.yml               | 40 ++++++++---
 .github/workflows/copyright-check.yml         |  6 +-
 .gitlab/stages/02.test.yml                    |  1 +
 tests/test_utils/recipes/unit-tests.yaml      | 28 ++++++--
 tests/unit_tests/find_test_cases.py           | 70 +++++++++++++++++++
 tests/unit_tests/run_ci_test.sh               | 27 ++-----
 tools/check_copyright.py                      | 29 ++------
 9 files changed, 170 insertions(+), 78 deletions(-)
 create mode 100644 tests/unit_tests/find_test_cases.py

diff --git a/.github/actions/action.yml b/.github/actions/action.yml
index 831f840d22b..157cb8ec5d1 100644
--- a/.github/actions/action.yml
+++ b/.github/actions/action.yml
@@ -82,7 +82,7 @@ runs:
         uv run python tests/test_utils/python_scripts/launch_nemo_run_workload.py \
           --scope unit-tests \
           --model unit-tests \
-          --test-case '${{ inputs.test_case }}' \
+          --test-case "${{ inputs.test_case }}" \
           --environment dev \
           --platform dgx_h100 \
           --tag ${{ inputs.tag }} \
diff --git a/.github/workflows/cicd-approve-test-queue.yml b/.github/workflows/cicd-approve-test-queue.yml
index 3e8052c6777..bd87e1d725d 100644
--- a/.github/workflows/cicd-approve-test-queue.yml
+++ b/.github/workflows/cicd-approve-test-queue.yml
@@ -41,8 +41,8 @@ jobs:
         env:
           GITHUB_TOKEN: ${{ secrets.PAT }}
           MAX_CONCURRENCY: ${{ vars.MAX_CONCURRENCY || 1 }}
+        shell: python
         run: |
-          python - <<EOF
           import os
           import requests
 
@@ -76,18 +76,42 @@ jobs:
                       print(f"Response: {e.response.text}")
                   return None
 
+          def is_pr_targeting_main(workflow_run):
+              """Check if a workflow run belongs to a PR targeting main branch."""
+              # Check if it's a pull_request event
+              if workflow_run.get("event") != "pull_request":
+                  return False
+              
+              # Get the head branch and base branch from pull_requests
+              pull_requests = workflow_run.get("pull_requests", [])
+              if not pull_requests:
+                  return False
+              
+              # Check if any PR is targeting main
+              for pr in pull_requests:
+                  if pr.get("base", {}).get("ref") == "main":
+                      return True
+              
+              return False
+
           # Get current running and queued workflows
           print("Fetching workflow runs...")
           queued_workflow_runs = make_request("actions/runs?status=queued").get("workflow_runs", [])
           in_progress_workflow_runs = make_request("actions/runs?status=in_progress").get("workflow_runs", [])
 
+          # Filter for workflows belonging to PRs targeting main
+          queued_workflow_runs = [run for run in queued_workflow_runs 
+                                  if run["name"] == "CICD Megatron-LM" and is_pr_targeting_main(run)]
+          in_progress_workflow_runs = [run for run in in_progress_workflow_runs 
+                                      if run["name"] == "CICD Megatron-LM" and is_pr_targeting_main(run)]
+
           # Count running and queued workflows
-          queued_workflows = sum(1 for run in queued_workflow_runs if run["name"] == "CICD Megatron-LM")
-          in_progress_workflows = sum(1 for run in in_progress_workflow_runs if run["name"] == "CICD Megatron-LM")
+          queued_workflows = len(queued_workflow_runs)
+          in_progress_workflows = len(in_progress_workflow_runs)
 
           total_workflows = queued_workflows + in_progress_workflows
-          print(f"Current queued workflows: {queued_workflows}")
-          print(f"Current running workflows: {in_progress_workflows}")
+          print(f"Current queued workflows (PRs targeting main): {queued_workflows}")
+          print(f"Current running workflows (PRs targeting main): {in_progress_workflows}")
           print(f"Total workflows: {total_workflows}")
           print(f"Max concurrency: {MAX_CONCURRENCY}")
 
@@ -98,14 +122,15 @@ jobs:
           # Get waiting CI workflows for test environment
           print("Fetching deployments...")
           pending_workflows = make_request("actions/runs?status=waiting").get("workflow_runs", [])
-          pending_workflows = [run for run in pending_workflows if run["name"] == "CICD Megatron-LM"]
+          pending_workflows = [run for run in pending_workflows 
+                              if run["name"] == "CICD Megatron-LM" and is_pr_targeting_main(run)]
 
           # Sort deployments by creation date (oldest first)
           print("Sorting workflows...")
           pending_workflows = sorted(pending_workflows, key=lambda x: x["created_at"])
 
           # Process each deployment
-          print("Processing ...")
+          print(f"Processing {len(pending_workflows)} pending workflows...")
           for workflow in pending_workflows:
               if total_workflows >= MAX_CONCURRENCY:
                   print("Maximum concurrency reached, stopping approvals")
@@ -113,7 +138,9 @@ jobs:
 
               workflow_id = workflow["id"]
               workflow_name = workflow["display_title"]
-              print(f"Approving workflow {workflow_name} with Run Id: {workflow_id}")
+              pr_info = workflow.get("pull_requests", [{}])[0]
+              pr_number = pr_info.get("number", "unknown")
+              print(f"Approving workflow {workflow_name} (PR #{pr_number}) with Run Id: {workflow_id}")
 
               deployment_url = f"actions/runs/{workflow_id}/pending_deployments"
               deployment = make_request(deployment_url)[0]
@@ -132,8 +159,6 @@ jobs:
               else:
                   print(f"Failed to approve deployment {deployment['id']}")
                   exit(1)
-
-          EOF
   notify:
     if: failure()
     runs-on: ubuntu-latest
diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml
index 38739c07b1f..4a1ae76b081 100644
--- a/.github/workflows/cicd-main.yml
+++ b/.github/workflows/cicd-main.yml
@@ -296,21 +296,41 @@ jobs:
           secrets: |
             GH_TOKEN=${{ secrets.PAT }}
 
+  cicd-parse-unit-tests:
+    runs-on: ubuntu-latest
+    outputs:
+      unit-tests: ${{ steps.parse-unit-tests.outputs.unit-tests }}
+    needs:
+      - pre-flight
+      - cicd-wait-in-queue
+      - cicd-container-build
+    if: |
+      (
+        success() 
+        || needs.pre-flight.outputs.is_ci_workload == 'true'
+        || needs.pre-flight.outputs.force_run_all == 'true'
+      )
+      && needs.pre-flight.outputs.is_merge_group == 'false'
+      && !cancelled()
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Parse unit tests
+        id: parse-unit-tests
+        run: |
+          cat tests/test_utils/recipes/unit-tests.yaml | yq -o json '[.products[].test_case[] | { "bucket": .}]' | jq -c > unit-tests.json
+          echo "unit-tests=$(cat unit-tests.json)" | tee -a $GITHUB_OUTPUT
+
   cicd-unit-tests-latest:
     strategy:
       fail-fast: false
       matrix:
-        include:
-          - bucket: "unit_tests"
-          - bucket: "unit_tests/data/"
-          - bucket: "unit_tests/dist_checkpointing/*.py"
-          - bucket: "unit_tests/dist_checkpointing/models/"
-          - bucket: "unit_tests/transformer/*.py"
-          - bucket: "unit_tests/transformer/moe"
+        include: ${{ fromJson(needs.cicd-parse-unit-tests.outputs.unit-tests) }}
     needs:
       - pre-flight
       - cicd-wait-in-queue
       - cicd-container-build
+      - cicd-parse-unit-tests
     runs-on: nvidia-ci-aws-gpu-x8
     name: "${{ matrix.bucket }} - latest"
     environment: nemo-ci
@@ -332,12 +352,12 @@ jobs:
       - name: main
         uses: ./.github/actions
         with:
-          test_case: tests/${{ matrix.bucket }}
+          test_case: ${{ matrix.bucket }}
           tag: latest
           timeout: ${{ matrix.timeout || 30 }}
           is_unit_test: "true"
           PAT: ${{ secrets.PAT }}
-          container-image: ${{ env.container-registry }}/megatron-lm:${{ github.sha }}
+          container-image: ${{ env.container-registry }}/megatron-lm:1909 #${{ github.sha }}
 
   cicd-parse-integration-tests:
     runs-on: ubuntu-latest
@@ -414,7 +434,7 @@ jobs:
       - pre-flight
       - cicd-wait-in-queue
       - cicd-parse-integration-tests
-      # - cicd-unit-tests-latest
+      - cicd-unit-tests-latest
     runs-on: nvidia-ci-aws-gpu-x8
     name: "${{ matrix.model }}/${{ matrix.test_case }} - latest"
     environment: nemo-ci
diff --git a/.github/workflows/copyright-check.yml b/.github/workflows/copyright-check.yml
index ff135c6c958..0463e1dd962 100644
--- a/.github/workflows/copyright-check.yml
+++ b/.github/workflows/copyright-check.yml
@@ -10,7 +10,7 @@
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
-# limitations under the License..
+# limitations under the License.
 
 name: Copyright check
 
@@ -31,9 +31,7 @@ jobs:
     if: |
       !(needs.pre-flight.outputs.docs_only == 'true'
       || needs.pre-flight.outputs.is_deployment_workflow == 'true')
-    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_copyright_check.yml@v0.65.4
-    with:
-      from-year: 2019
+    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_copyright_check.yml@v0.65.9
 
   copyright-check-summary:
     needs: [pre-flight, copyright-check]
diff --git a/.gitlab/stages/02.test.yml b/.gitlab/stages/02.test.yml
index 98bcaeefc7d..699bef68181 100644
--- a/.gitlab/stages/02.test.yml
+++ b/.gitlab/stages/02.test.yml
@@ -323,6 +323,7 @@ test:linting_copyright:
         exit 0
       fi
     - git fetch origin main
+    - BASE_REF="$CI_MERGE_REQUEST_TARGET_BRANCH_NAME"
     - export GITLAB_ENDPOINT=gitlab-ci-token:${PAT}@${GITLAB_ENDPOINT}
     - bash tools/copyright.sh
 
diff --git a/tests/test_utils/recipes/unit-tests.yaml b/tests/test_utils/recipes/unit-tests.yaml
index aef67781168..d84e507c6df 100644
--- a/tests/test_utils/recipes/unit-tests.yaml
+++ b/tests/test_utils/recipes/unit-tests.yaml
@@ -59,49 +59,63 @@ spec:
     cp coverage.xml {assets_dir}
 
 products:
-  - test_case: [tests/unit_tests/data/]
+  - test_case: [tests/unit_tests/data/**/*.py]
     products:
       - environment: [lts, dev]
         tag: [latest, legacy]
         scope: [unit-tests]
         n_repeat: [1]
         time_limit: [1800]
-  - test_case: [tests/unit_tests/dist_checkpointing/*.py]
+  - test_case: [tests/unit_tests/dist_checkpointing/test_optimizer.py]
     products:
       - environment: [lts, dev]
         tag: [latest, legacy]
         scope: [unit-tests]
         n_repeat: [1]
         time_limit: [1800]
-  - test_case: [tests/unit_tests/dist_checkpointing/models/]
+  - test_case: [tests/unit_tests/dist_checkpointing/**/*.py]
     products:
       - environment: [lts, dev]
         tag: [latest, legacy]
         scope: [unit-tests]
         n_repeat: [1]
         time_limit: [1800]
-  - test_case: [tests/unit_tests/transformer/*.py]
+  - test_case: [tests/unit_tests/dist_checkpointing/models/**/*.py]
     products:
       - environment: [lts, dev]
         tag: [latest, legacy]
         scope: [unit-tests]
         n_repeat: [1]
         time_limit: [1800]
-  - test_case: [tests/unit_tests/transformer/moe]
+  - test_case: [tests/unit_tests/dist_checkpointing/models/test_moe_experts.py]
     products:
       - environment: [lts, dev]
         tag: [latest, legacy]
         scope: [unit-tests]
         n_repeat: [1]
         time_limit: [1800]
-  - test_case: [tests/unit_tests/distributed/fsdp]
+  - test_case: [tests/unit_tests/transformer/**/*.py]
+    products:
+      - environment: [lts, dev]
+        tag: [latest, legacy]
+        scope: [unit-tests]
+        n_repeat: [1]
+        time_limit: [1800]
+  - test_case: [tests/unit_tests/transformer/moe/**/*.py]
+    products:
+      - environment: [lts, dev]
+        tag: [latest, legacy]
+        scope: [unit-tests]
+        n_repeat: [1]
+        time_limit: [1800]
+  - test_case: [tests/unit_tests/distributed/fsdp/**/*.py]
     products:
       - environment: [lts, dev]
         tag: [latest]
         scope: [unit-tests]
         n_repeat: [1]
         time_limit: [1800]
-  - test_case: [tests/unit_tests]
+  - test_case: [tests/unit_tests/**/*.py]
     products:
       - environment: [lts, dev]
         tag: [latest, legacy]
diff --git a/tests/unit_tests/find_test_cases.py b/tests/unit_tests/find_test_cases.py
new file mode 100644
index 00000000000..2e9f5515b7d
--- /dev/null
+++ b/tests/unit_tests/find_test_cases.py
@@ -0,0 +1,70 @@
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#!/usr/bin/env python3
+
+import subprocess
+import sys
+from pathlib import Path
+
+
+def get_test_cases(yaml_file):
+    result = subprocess.run(
+        ['yq', 'eval', '.products[].test_case[]', yaml_file],
+        capture_output=True,
+        text=True,
+        check=True,
+    )
+    return [line.strip() for line in result.stdout.strip().split('\n') if line.strip()]
+
+
+def get_base_path(pattern):
+    if '**' in pattern:
+        return pattern.split('/**')[0]
+    elif '*' in pattern:
+        return pattern.rsplit('/', 1)[0]
+    return pattern.rstrip('/')
+
+
+def is_child_of_bucket(test_case, bucket):
+    test_base = get_base_path(test_case)
+    bucket_base = get_base_path(bucket)
+    return test_base.startswith(bucket_base + '/')
+
+
+def expand_pattern(pattern):
+    if '**' in pattern:
+        parts = pattern.split('/**/')
+        if len(parts) == 2:
+            base_dir, file_pattern = parts
+        else:
+            # Handle case like 'dir/**'
+            base_dir = pattern.split('/**')[0]
+            file_pattern = '*.py'
+        return [str(f) for f in Path(base_dir).rglob(file_pattern) if f.is_file()]
+    elif '*' in pattern:
+        base_dir, file_pattern = pattern.rsplit('/', 1)
+        return [str(f) for f in Path(base_dir).glob(file_pattern) if f.is_file()]
+    elif Path(pattern).is_file():
+        return [pattern]
+    return []
+
+
+def main():
+    BUCKET = sys.argv[1]
+    YAML_FILE = 'tests/test_utils/recipes/unit-tests.yaml'
+
+    all_test_cases = get_test_cases(YAML_FILE)
+    bucket_files = set(expand_pattern(BUCKET))
+
+    # Collect files from child test cases to ignore
+    files_to_ignore = set()
+    for test_case in all_test_cases:
+        if test_case != BUCKET and is_child_of_bucket(test_case, BUCKET):
+            files_to_ignore.update(expand_pattern(test_case))
+
+    # Output files to ignore
+    for file in sorted(files_to_ignore & bucket_files):
+        print(f"--ignore={file}")
+
+
+if __name__ == '__main__':
+    main()
diff --git a/tests/unit_tests/run_ci_test.sh b/tests/unit_tests/run_ci_test.sh
index 7e12ebbab1e..81dd3ae2a14 100755
--- a/tests/unit_tests/run_ci_test.sh
+++ b/tests/unit_tests/run_ci_test.sh
@@ -114,27 +114,10 @@ for element in "${MARKER[@]:1}"; do
 done
 
 export BUCKET
-IGNORE_TEST_CASES=$(
-    cat $SCRIPT_PATH/../test_utils/recipes/unit-tests.yaml |
-        yq eval '
-    with(.products[].test_case; del(.[] | select(. == env(BUCKET)))) 
-    | .products[].test_case[]
-    ' |
-        tr " " "\n"
-)
-
 IGNORE_ARGS=()
-while IFS= read -r test_case; do
-    if [[ $test_case == *\** ]]; then
-        FILES=($(ls $test_case))
-        echo ${FILES[@]}
-        for file in "${FILES[@]}"; do
-            IGNORE_ARGS+=("--ignore='$file'")
-        done
-    else
-        IGNORE_ARGS+=("--ignore=$test_case")
-    fi
-done <<<"$IGNORE_TEST_CASES"
+while IFS= read -r line; do
+    [[ -n "$line" ]] && IGNORE_ARGS+=("$line")
+done < <(python tests/unit_tests/find_test_cases.py "$BUCKET")
 
 echo "------ARGUMENTS for SLURM ---"
 MASTER_ADDR=${MASTER_ADDR:-localhost}
@@ -167,7 +150,7 @@ for i in $(seq $UNIT_TEST_REPEAT); do
         -m pytest \
         -xvs \
         ${IGNORE_ARGS[@]} \
-        -m "'not experimental and ${MARKER_ARG}'" $BUCKET)
+        -m "'not experimental and ${MARKER_ARG}'" $(echo "$BUCKET" | sed 's|/\*\*/\*\.py$||'))
     eval "$CMD"
 
     if [[ "$TAG" == "latest" ]]; then
@@ -175,7 +158,7 @@ for i in $(seq $UNIT_TEST_REPEAT); do
             -xvs \
             --experimental \
              ${IGNORE_ARGS[@]} \
-            -m "'experimental and ${MARKER_ARG}'" $BUCKET)
+            -m "'experimental and ${MARKER_ARG}'" $(echo "$BUCKET" | sed 's|/\*\*/\*\.py$||'))
 
         eval "$CMD"
     fi
diff --git a/tools/check_copyright.py b/tools/check_copyright.py
index a62334d2421..d63cd906eab 100644
--- a/tools/check_copyright.py
+++ b/tools/check_copyright.py
@@ -8,30 +8,17 @@
 from pathlib import Path
 from datetime import datetime
 
-EXPECTED_HEADER = """# Copyright (c) {}-{}, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
+EXPECTED_HEADER = """# Copyright (c) {} NVIDIA CORPORATION & AFFILIATES. All rights reserved."""
 
 
-def has_correct_header(file_path, from_year: int):
+def has_correct_header(file_path):
     """Check if file has the correct copyright header."""
     try:
         with open(file_path, 'r', encoding='utf-8') as f:
             content = f.read()
 
         # Check if the expected header is at the start of the file
-        return content.startswith(EXPECTED_HEADER.format(from_year, str(datetime.now().year)))
+        return content.startswith(EXPECTED_HEADER.format(str(datetime.now().year)))
     except Exception as e:
         print(f"Error reading {file_path}: {e}")
         return False
@@ -46,12 +33,6 @@ def main():
         nargs='+',
         help='Files to check/modify'
     )
-    parser.add_argument(
-        '--from-year',
-        type=int,
-        required=True,
-        help='Project creation year'
-    )
 
     args = parser.parse_args()
 
@@ -68,7 +49,7 @@ def main():
             print(f"Not a file: {file_path}")
             continue
 
-        if has_correct_header(path, args.from_year):
+        if has_correct_header(path):
             print(f"✓ Header present: {file_path}")
         else:
             print(f"✗ Header missing: {file_path}")
@@ -79,7 +60,7 @@ def main():
         print(f"\n{len(missing_headers)} file(s) missing copyright header.")
         print("\n")
         print("Add or replace the header in those files with the following content:")
-        print(EXPECTED_HEADER)
+        print(EXPECTED_HEADER.format(str(datetime.now().year)))
         print("\n")
         print(
             "Disclaimer: This must done irrespective of the magnitude of the change "

From 176a2ed5787819cbf6da4ee0a549d2108fd59b66 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Fri, 24 Oct 2025 10:49:13 +0200
Subject: [PATCH 051/334] ci: Update container image tags to use github.sha

---
 .github/workflows/cicd-main.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml
index 4a1ae76b081..9c2f8ae6f5f 100644
--- a/.github/workflows/cicd-main.yml
+++ b/.github/workflows/cicd-main.yml
@@ -357,7 +357,7 @@ jobs:
           timeout: ${{ matrix.timeout || 30 }}
           is_unit_test: "true"
           PAT: ${{ secrets.PAT }}
-          container-image: ${{ env.container-registry }}/megatron-lm:1909 #${{ github.sha }}
+          container-image: ${{ env.container-registry }}/megatron-lm:${{ github.sha }}
 
   cicd-parse-integration-tests:
     runs-on: ubuntu-latest
@@ -462,7 +462,7 @@ jobs:
           timeout: ${{ matrix.timeout || 30 }}
           is_unit_test: "false"
           PAT: ${{ secrets.PAT }}
-          container-image: ${{ env.container-registry }}/megatron-lm:1864 # ${{ github.sha }}
+          container-image: ${{ env.container-registry }}/megatron-lm:${{ github.sha }}
 
   Nemo_CICD_Test:
     needs:

From d3d204881762dcf25186a9d0a88df8fd91ef46ff Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Fri, 24 Oct 2025 20:35:05 +0200
Subject: [PATCH 052/334] Ko3n1g/chore/merge main into dev (#1903)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
Co-authored-by: James Shen <yueshen@nvidia.com>
Co-authored-by: Chen-Han Yu <chenhany@cw-dfw-cs-001-login-01.cm.cluster>
Co-authored-by: Shanmugam Ramasamy <shanmugamr@nvidia.com>
Co-authored-by: Shanmugam Ramasamy <shanmugamr@cw-dfw-cs-001-vscode-01.cm.cluster>
Co-authored-by: Mcore Bot <mcore-bot@nvidia.com>
Co-authored-by: Shanmugam Ramasamy <shanmugamr@shanmugamr-mlt.client.nvidia.com>
Co-authored-by: Siddharth Singh <sidsingh@nvidia.com>
Co-authored-by: Shanmugam Ramasamy <shanmugamr@cw-dfw-cs-001-login-01.cm.cluster>
Co-authored-by: Youngeun Kwon <youngeunk@nvidia.com>
Co-authored-by: Shunjia Ding <shunjiad@nvidia.com>
Co-authored-by: Maanu Grover <maanug@nvidia.com>
Co-authored-by: Jack Chang <jianbinc@nvidia.com>
Co-authored-by: jianbinc <shjwudp@gmail.com>
Co-authored-by: xuwenc <xuwenc@nvidia.com>
---
 .../workflows/build-test-publish-wheel.yml    |  6 +-
 .github/workflows/cicd-approve-test-queue.yml | 60 +++++++++++--------
 .github/workflows/cicd-main.yml               |  8 +--
 .github/workflows/install-test.yml            |  6 +-
 4 files changed, 50 insertions(+), 30 deletions(-)

diff --git a/.github/workflows/build-test-publish-wheel.yml b/.github/workflows/build-test-publish-wheel.yml
index 0b6cdd7efdb..1ff9f53202b 100644
--- a/.github/workflows/build-test-publish-wheel.yml
+++ b/.github/workflows/build-test-publish-wheel.yml
@@ -21,6 +21,8 @@ on:
       - main
       - "pull-request/[0-9]+"
       - "deploy-release/*"
+  merge_group:
+    types: [checks_requested]
 
 defaults:
   run:
@@ -32,12 +34,13 @@ permissions:
 
 jobs:
   pre-flight:
-    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_cicd_preflight.yml@v0.64.2
+    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_cicd_preflight.yml@v0.65.5
 
   build-test-publish-wheel:
     needs: [pre-flight]
     if: |
       !(needs.pre-flight.outputs.docs_only == 'true'
+      || needs.pre-flight.outputs.is_merge_group == 'true'
       || needs.pre-flight.outputs.is_deployment_workflow == 'true')
     uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_build_test_publish_wheel.yml@v0.63.1
     with:
@@ -61,6 +64,7 @@ jobs:
     if: |
       (
         needs.pre-flight.outputs.docs_only == 'true'
+        || needs.pre-flight.outputs.is_merge_group == 'true'
         || needs.pre-flight.outputs.is_deployment_workflow == 'true'
         || always()
       )
diff --git a/.github/workflows/cicd-approve-test-queue.yml b/.github/workflows/cicd-approve-test-queue.yml
index bd87e1d725d..1f23905d5d8 100644
--- a/.github/workflows/cicd-approve-test-queue.yml
+++ b/.github/workflows/cicd-approve-test-queue.yml
@@ -23,6 +23,9 @@ jobs:
   approve-queue:
     runs-on: ubuntu-latest
     environment: main
+    strategy:
+      matrix:
+        branch: [main, dev]
     steps:
       - name: Checkout repository
         uses: actions/checkout@v4
@@ -45,13 +48,13 @@ jobs:
         run: |
           import os
           import requests
-
+          import re
 
           # GitHub API configuration
           GITHUB_TOKEN = os.environ["GITHUB_TOKEN"]
           REPO = os.environ["GITHUB_REPOSITORY"]
-          MAX_CONCURRENCY = int(os.environ["MAX_CONCURRENCY"])
-          API_BASE = f"https://api.github.com/repos/{REPO}"
+          MAX_CONCURRENCY = int(os.environ["MAX_CONCURRENCY"]) // 2
+          API_BASE = f"https://api.github.com/repos/NVIDIA/Megatron-LM"
 
           # Headers for GitHub API
           headers = {
@@ -76,22 +79,30 @@ jobs:
                       print(f"Response: {e.response.text}")
                   return None
 
-          def is_pr_targeting_main(workflow_run):
-              """Check if a workflow run belongs to a PR targeting main branch."""
-              # Check if it's a pull_request event
-              if workflow_run.get("event") != "pull_request":
-                  return False
+          def is_pr_targeting_branch(workflow_run, target_branch):
+              """
+              Check if a workflow run belongs to a PR targeting the given branch.
+              Extract PR number from head branch like 'pull-request/1913' and verify base branch.
+              """
+              print(workflow_run.get("head_branch", ""))
+              head_branch = workflow_run.get("head_branch", "")
+              match = re.match(r"pull-request/(\d+)", head_branch)
+              if not match:
+                  return False  # Not a PR branch pattern
+
+              pr_number = int(match.group(1))
               
-              # Get the head branch and base branch from pull_requests
-              pull_requests = workflow_run.get("pull_requests", [])
-              if not pull_requests:
+              # Fetch PR info from GitHub API
+              pr_info = make_request(f"pulls/{pr_number}")
+              if not pr_info:
+                  print(f"Failed to fetch PR #{pr_number}")
                   return False
-              
-              # Check if any PR is targeting main
-              for pr in pull_requests:
-                  if pr.get("base", {}).get("ref") == "main":
-                      return True
-              
+
+              base_branch = pr_info.get("base", {}).get("ref")
+              if base_branch == target_branch:
+                  print(f"PR #{pr_number} targets {target_branch}")
+                  return True
+
               return False
 
           # Get current running and queued workflows
@@ -99,19 +110,19 @@ jobs:
           queued_workflow_runs = make_request("actions/runs?status=queued").get("workflow_runs", [])
           in_progress_workflow_runs = make_request("actions/runs?status=in_progress").get("workflow_runs", [])
 
-          # Filter for workflows belonging to PRs targeting main
+          # Filter for workflows belonging to PRs targeting ${{ matrix.branch }}
           queued_workflow_runs = [run for run in queued_workflow_runs 
-                                  if run["name"] == "CICD Megatron-LM" and is_pr_targeting_main(run)]
+                                  if run["name"] == "CICD Megatron-LM" and is_pr_targeting_branch(run, "${{ matrix.branch }}")]
           in_progress_workflow_runs = [run for run in in_progress_workflow_runs 
-                                      if run["name"] == "CICD Megatron-LM" and is_pr_targeting_main(run)]
+                                      if run["name"] == "CICD Megatron-LM" and is_pr_targeting_branch(run, "${{ matrix.branch }}")]
 
           # Count running and queued workflows
           queued_workflows = len(queued_workflow_runs)
           in_progress_workflows = len(in_progress_workflow_runs)
 
           total_workflows = queued_workflows + in_progress_workflows
-          print(f"Current queued workflows (PRs targeting main): {queued_workflows}")
-          print(f"Current running workflows (PRs targeting main): {in_progress_workflows}")
+          print(f"Current queued workflows (PRs targeting ${{ matrix.branch }}): {queued_workflows}")
+          print(f"Current running workflows (PRs targeting ${{ matrix.branch }}): {in_progress_workflows}")
           print(f"Total workflows: {total_workflows}")
           print(f"Max concurrency: {MAX_CONCURRENCY}")
 
@@ -122,8 +133,9 @@ jobs:
           # Get waiting CI workflows for test environment
           print("Fetching deployments...")
           pending_workflows = make_request("actions/runs?status=waiting").get("workflow_runs", [])
+          print("Pending workflows:", len(pending_workflows))
           pending_workflows = [run for run in pending_workflows 
-                              if run["name"] == "CICD Megatron-LM" and is_pr_targeting_main(run)]
+                              if run["name"] == "CICD Megatron-LM" and is_pr_targeting_branch(run, "${{ matrix.branch }}")]
 
           # Sort deployments by creation date (oldest first)
           print("Sorting workflows...")
@@ -140,7 +152,7 @@ jobs:
               workflow_name = workflow["display_title"]
               pr_info = workflow.get("pull_requests", [{}])[0]
               pr_number = pr_info.get("number", "unknown")
-              print(f"Approving workflow {workflow_name} (PR #{pr_number}) with Run Id: {workflow_id}")
+              print(f"Approving workflow {workflow_name} with Run Id: {workflow_id}")
 
               deployment_url = f"actions/runs/{workflow_id}/pending_deployments"
               deployment = make_request(deployment_url)[0]
diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml
index 9c2f8ae6f5f..88be3d5bcc3 100644
--- a/.github/workflows/cicd-main.yml
+++ b/.github/workflows/cicd-main.yml
@@ -47,7 +47,6 @@ jobs:
     env:
       GITHUB_TOKEN: ${{ secrets.PAT }}
       REPO: ${{ github.repository }}
-      SCHEDULED_JOB: ${{ github.event_name == 'schedule' }}
     steps:
       - name: Checkout repository
         uses: actions/checkout@v4
@@ -63,11 +62,12 @@ jobs:
         id: check-membership
         env:
           IS_MAIN_BRANCH: ${{ github.ref == 'refs/heads/main' }}
-          IS_DEV_BRANCH: ${{ github.ref == 'refs/heads/dev' }}
+          IS_MERGE_GROUP: ${{ github.event_name == 'merge_group' }}
+          SCHEDULED_JOB: ${{ github.event_name == 'schedule' }}
         run: |
           PR_AUTHOR=${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').user.login }}
 
-          if [ "${{ env.SCHEDULED_JOB }}" == "true" ] || [ "${IS_MAIN_BRANCH}" == "true" ] || [ "${IS_DEV_BRANCH}" == "true" ]; then
+          if [ "${{ env.SCHEDULED_JOB }}" == "true" ] || [ "${IS_MAIN_BRANCH}" == "true" ] [ "${IS_DEV_BRANCH}" == "true" ] || [ "${IS_MERGE_GROUP}" == "true" ]; then
             echo "is_maintainer=true" | tee -a $GITHUB_OUTPUT
             exit 0
           fi
@@ -148,7 +148,7 @@ jobs:
 
   pre-flight:
     needs: [is-not-external-contributor]
-    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_cicd_preflight.yml@v0.65.0
+    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_cicd_preflight.yml@v0.65.5
 
   linting:
     runs-on: ubuntu-latest
diff --git a/.github/workflows/install-test.yml b/.github/workflows/install-test.yml
index 8e409ef2207..419202dbc2c 100644
--- a/.github/workflows/install-test.yml
+++ b/.github/workflows/install-test.yml
@@ -24,15 +24,18 @@ on:
       - main
       - "pull-request/[0-9]+"
       - "deploy-release/*"
+  merge_group:
+    types: [checks_requested]
 
 jobs:
   pre-flight:
-    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_cicd_preflight.yml@v0.64.2
+    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_cicd_preflight.yml@v0.65.5
 
   pip-test-pytorch:
     needs: [pre-flight]
     if: |
       !(needs.pre-flight.outputs.docs_only == 'true'
+      || needs.pre-flight.outputs.is_merge_group == 'true'
       || needs.pre-flight.outputs.is_deployment_workflow == 'true')
     runs-on: linux-amd64-cpu16
     name: Pip - Python${{ matrix.python-version }} - AMD64/Linux - NGC PyTorch
@@ -77,6 +80,7 @@ jobs:
     needs: [pre-flight]
     if: |
       !(needs.pre-flight.outputs.docs_only == 'true'
+      || needs.pre-flight.outputs.is_merge_group == 'true'
       || needs.pre-flight.outputs.is_deployment_workflow == 'true')
     runs-on: linux-amd64-cpu16
     name: UV - Python${{ matrix.python-version }} - AMD64/Linux - NGC PyTorch

From 1ef95d9cc965be5b2373a490eee4f6badda30a7b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Fri, 24 Oct 2025 18:43:37 +0000
Subject: [PATCH 053/334] ci: Fix approval bot
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 .gitlab/stages/00.pre.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.gitlab/stages/00.pre.yml b/.gitlab/stages/00.pre.yml
index 35ebef1ecb8..5c74073ff14 100644
--- a/.gitlab/stages/00.pre.yml
+++ b/.gitlab/stages/00.pre.yml
@@ -379,10 +379,10 @@ pre:approve_merge_gate:
 
       gh api "repos/$REPO/actions/runs?status=waiting" --jq '.workflow_runs[].id' \
         | while read run_id; do
-            HEAD_BRANCH=$(gh api "repos/$REPO/actions/runs/$run_id" --jq '.head_branch')
+            HEAD_BRANCH=$(gh api "repos/$REPO/actions/runs/$run_id" --jq '.head_branch' 2>/dev/null) || continue
             PR_NUMBER="${HEAD_BRANCH##*/}"
             if [ -n "$PR_NUMBER" ]; then
-                PR_BASE=$(gh api "repos/$REPO/pulls/$PR_NUMBER" --jq '.base.ref')
+                PR_BASE=$(gh api "repos/$REPO/pulls/$PR_NUMBER" --jq '.base.ref' 2>/dev/null) || continue
                 if [ "$PR_BASE" = "$TARGET_BRANCH" ]; then
                     gh api \
                       --method POST "repos/$REPO/actions/runs/$run_id/pending_deployments" \

From 9b8d7033349d38d57b40dff8aeb4deeb5230d6b8 Mon Sep 17 00:00:00 2001
From: Charlie Truong <chtruong@nvidia.com>
Date: Fri, 24 Oct 2025 14:28:40 -0500
Subject: [PATCH 054/334] ci: Fix dev branch CI (#1922)

Fix dev branch CI

For some reason, on the dev branch, the call to `energy_monitor.pause()`
fails in the training script. It does not seem to be related to the
dependencies because this still fails when using the same docker image
with same pyproject.toml and uv.lock file. I recommend we merge this to
unblock the dev branch and allow us more time to dig deeper into the
root cause.

---------

Signed-off-by: Charlie Truong <chtruong@nvidia.com>
Co-authored-by: Oliver Koenig <okoenig@nvidia.com>
---
 megatron/training/training.py                   | 17 ++++++++++-------
 .../gpt/gpt3_mcore_tp1_pp2/model_config.yaml    |  2 +-
 2 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/megatron/training/training.py b/megatron/training/training.py
index fec4c1a3dc7..f805dab0f15 100644
--- a/megatron/training/training.py
+++ b/megatron/training/training.py
@@ -1241,7 +1241,7 @@ def setup_model_and_optimizer(
         # set dense model related args in to global args before getting dense model
         args.num_experts = None
         args.expert_model_parallel_size = 1
-        args.ffn_hidden_size = moe_ffn_hidden_size * args.moe_upcycling_granularity 
+        args.ffn_hidden_size = moe_ffn_hidden_size * args.moe_upcycling_granularity
 
         # get dense model
         dense_model_for_upcycling = get_model(model_provider_func, model_type)
@@ -1838,7 +1838,8 @@ def save_checkpoint_and_time(
 
     # Stop timer to get accurate train interval time and exclude checkpointing duration
     timers('interval-time').stop()
-    energy_monitor.pause()
+    if args.log_energy:
+        energy_monitor.pause()
 
     # Extra barrier is added to make sure all ranks report the max time.
     timer_key = 'save-checkpoint-non-persistent' if non_persistent_ckpt else 'save-checkpoint'
@@ -1880,7 +1881,9 @@ def save_checkpoint_and_time(
         )
 
     # Recover timing
-    energy_monitor.resume()
+    if args.log_energy:
+        energy_monitor.resume()
+
     timers('interval-time', log_level=0).start(barrier=True)
 
 
@@ -2791,7 +2794,7 @@ def evaluate_and_print_results(
         eval_iters = [args.eval_iters]
     else:
         eval_iters = args.eval_iters
-        
+
     if args.full_validation:
         assert len(eval_iters) == len(data_iterators)
 
@@ -2807,7 +2810,7 @@ def evaluate_and_print_results(
         eval_iters = [args.eval_iters]
     else:
         eval_iters = args.eval_iters
-    
+
     for index, (iterator, iterations) in enumerate(zip(data_iterators, eval_iters)):
         suffix = ""
         if args.multiple_validation_sets:
@@ -2925,7 +2928,7 @@ def build_train_valid_test_data_loaders(build_train_valid_test_datasets_provider
             build_train_valid_test_datasets_provider, (1, 1, 1) if getattr(args, 'perform_rl_step', False) else None
         )
         valid_ds = [valid_ds] if not isinstance(valid_ds, list) else valid_ds
-        
+
         # Build dataloders.
         train_dataloader = build_pretraining_data_loader(train_ds, args.consumed_train_samples)
 
@@ -3000,7 +3003,7 @@ def _get_iterator(dataloader_type, dataloader):
 
     if valid_dataloaders is not None:
         # when using full validation, we need to override eval iters with the correct
-        # number of iterations on tp rank 0 so that it can be distributed to the other 
+        # number of iterations on tp rank 0 so that it can be distributed to the other
         # ranks later
         if args.full_validation:
             if args.multiple_validation_sets:
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/model_config.yaml
index 84da70b66c7..4cc6e53b8c8 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/model_config.yaml
@@ -1,4 +1,4 @@
-s`ENV_VARS:
+ENV_VARS:
   CUDA_DEVICE_MAX_CONNECTIONS: 1
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Ring

From 10d280ada1df76241435f47a24b37869354f65ef Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Fri, 24 Oct 2025 21:47:25 +0200
Subject: [PATCH 055/334] Ko3n1g/ci/cherrypick automation dev (#1926)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 .github/workflows/cherry-pick-release-commit.yml | 5 ++++-
 .github/workflows/cicd-main.yml                  | 1 +
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/cherry-pick-release-commit.yml b/.github/workflows/cherry-pick-release-commit.yml
index 0fc1da80015..9cf8ed98660 100644
--- a/.github/workflows/cherry-pick-release-commit.yml
+++ b/.github/workflows/cherry-pick-release-commit.yml
@@ -17,10 +17,13 @@ on:
   push:
     branches:
       - main
+      - dev
 
 jobs:
   cherry-pick:
-    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_cherry_pick.yml@v0.31.0
+    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_cherry_pick.yml@v0.65.9
+    with:
+      target-branches-pattern: 'core_(*dev_)?r[0-9]+\.[0-9]+\.[0-9]+'
     secrets:
       PAT: ${{ secrets.PAT }}
       SLACK_WEBHOOK_ADMIN: ${{ secrets.SLACK_WEBHOOK_ADMIN }}
diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml
index 88be3d5bcc3..f5a999858dd 100644
--- a/.github/workflows/cicd-main.yml
+++ b/.github/workflows/cicd-main.yml
@@ -62,6 +62,7 @@ jobs:
         id: check-membership
         env:
           IS_MAIN_BRANCH: ${{ github.ref == 'refs/heads/main' }}
+          IS_DEV_BRANCH: ${{ github.ref == 'refs/heads/dev' }}
           IS_MERGE_GROUP: ${{ github.event_name == 'merge_group' }}
           SCHEDULED_JOB: ${{ github.event_name == 'schedule' }}
         run: |

From 017c7b3a3c1f31d25f687b419930e11e46b09d8a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Fri, 24 Oct 2025 19:52:49 +0000
Subject: [PATCH 056/334] ci: Fix dev
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 .github/workflows/cicd-main.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml
index f5a999858dd..96deabcf9f3 100644
--- a/.github/workflows/cicd-main.yml
+++ b/.github/workflows/cicd-main.yml
@@ -68,7 +68,7 @@ jobs:
         run: |
           PR_AUTHOR=${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').user.login }}
 
-          if [ "${{ env.SCHEDULED_JOB }}" == "true" ] || [ "${IS_MAIN_BRANCH}" == "true" ] [ "${IS_DEV_BRANCH}" == "true" ] || [ "${IS_MERGE_GROUP}" == "true" ]; then
+          if [ "${{ env.SCHEDULED_JOB }}" == "true" ] || [ "${IS_MAIN_BRANCH}" == "true" ] || [ "${IS_DEV_BRANCH}" == "true" ] || [ "${IS_MERGE_GROUP}" == "true" ]; then
             echo "is_maintainer=true" | tee -a $GITHUB_OUTPUT
             exit 0
           fi

From 598d41f2b987ffe2f9f9598d2e41e5ef99e4e4ce Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Sat, 25 Oct 2025 16:20:19 +0200
Subject: [PATCH 057/334] Ko3n1g/chore/merge main into dev20251025 (#1943)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 .github/pull_request_template.md              |   64 +
 .github/workflows/cicd-main.yml               |   17 +-
 .github/workflows/community-bot.yml           |    3 +-
 .gitlab-ci.yml                                |    4 +
 .gitlab/stages/00.pre.yml                     |   51 +-
 .gitlab/stages/01.build.yml                   |    2 +
 .gitlab/stages/02.test.yml                    |  104 +-
 .gitlab/stages/03.integration-tests.yml       |    2 +
 .gitlab/stages/04.functional-tests.yml        |    2 +
 .gitlab/stages/05.publish.yml                 |   48 +-
 .../golden_values_dev_dgxh100_eos.json        |  178 ++
 .../golden_values_dev_dgxh100_eos.json        |  178 ++
 .../golden_values_dev_dgxh100_eos.json        | 2699 +++++++++++++++++
 .../golden_values_dev_dgxh100_eos.json        |  161 +
 .../python_scripts/approve_merge_gate.py      |  117 +
 tests/test_utils/recipes/unit-tests.yaml      |   21 +
 16 files changed, 3491 insertions(+), 160 deletions(-)
 create mode 100644 .github/pull_request_template.md
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_logitsmatch_decode_graphs_only/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_logitsmatch/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_583m_logitsmatch/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/test_utils/python_scripts/approve_merge_gate.py

diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
new file mode 100644
index 00000000000..7f7dedd27ad
--- /dev/null
+++ b/.github/pull_request_template.md
@@ -0,0 +1,64 @@
+# What does this PR do ?
+<!-- Add a one line overview of what this PR aims to accomplish. -->
+
+:warning: For major changes (either in lines of code or in its impact), please make sure to first share discuss a design-doc with the team.  
+
+## Contribution process
+
+```mermaid
+flowchart LR
+    A[Pre-checks] --> B[PR Tests]
+    subgraph Code Review/Approval
+        C1[Expert Review] --> C2[Final Review]
+    end
+    B --> C1
+    C2 --> D[Merge]
+```
+
+### Pre-checks
+
+- [ ] I want this PR in a versioned release and have added the appropriate Milestone (e.g., `Core 0.8`)
+- [ ] I have added relevant unit tests
+- [ ] I have added relevant functional tests
+- [ ] I have added proper typing to my code [Typing guidelines](https://docs.python.org/3/library/typing.html)
+- [ ] I have added relevant documentation
+- [ ] I have run the [autoformatter.sh](https://github.com/NVIDIA/Megatron-LM/blob/main/tools/autoformat.sh) on my PR
+
+### Code review
+
+The following process is enforced via the CODEOWNERS file for changes into `megatron/core`. For changes outside of `megatron/core`, it is up to the PR author whether or not to tag the Final Reviewer team.
+
+<details>
+<summary>For MRs into `main` branch</summary>
+
+#### (Step 1): Add PR label `Expert Review`
+
+#### (Step 2): Collect the expert reviewers reviews
+
+1. Attach the `Expert Review` label when your PR is ready for review.
+2. GitHub auto-assigns expert reviewers based on your changes. They will get notified and pick up your PR soon.
+
+:warning: Only proceed to the next step once all reviewers have approved, merge-conflict are resolved and the CI is passing.  
+Final Review might get declined if these requirements are not fulfilled.
+
+#### (Step 3): Final Review
+
+1. Add `Final Review` label
+2. GitHub auto-assigns final reviewers based on your changes. They will get notified and pick up your PR soon.
+
+#### (Optional Step 4): Cherry-pick into release branch
+
+If this PR also needs to be merged into `core_r*` release branches, after this PR has been merged, select `Cherry-pick` to open a new PR into the release branch.
+
+</details>
+
+<details>
+<summary>For MRs into `dev` branch</summary>
+The proposed review process for `dev` branch is under active discussion.
+
+MRs are mergable after one approval by either `eharper@nvidia.com` or `zijiey@nvidia.com`.
+</details>
+
+### Merging your PR
+
+Any member of [core-adlr](https://github.com/orgs/teams/NVIDIA/core-adlr) and [`core-nemo`](https://github.com/orgs/teams/NVIDIA/core-nemo) will be able to merge your PR.
diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml
index 96deabcf9f3..d1e411be98f 100644
--- a/.github/workflows/cicd-main.yml
+++ b/.github/workflows/cicd-main.yml
@@ -14,7 +14,7 @@
 name: CICD Megatron-LM
 on:
   schedule:
-    - cron: "0 */2 * * *"
+    - cron: 0 0 * * *
   push:
     branches:
       - dev
@@ -23,6 +23,7 @@ on:
       - "deploy-release/*"
   merge_group:
     types: [checks_requested]
+  workflow_dispatch:
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-${{ github.event.label.name || 'main' }}-${{ github.event_name }}
@@ -149,7 +150,7 @@ jobs:
 
   pre-flight:
     needs: [is-not-external-contributor]
-    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_cicd_preflight.yml@v0.65.5
+    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_cicd_preflight.yml@v0.65.10
 
   linting:
     runs-on: ubuntu-latest
@@ -319,7 +320,7 @@ jobs:
       - name: Parse unit tests
         id: parse-unit-tests
         run: |
-          cat tests/test_utils/recipes/unit-tests.yaml | yq -o json '[.products[].test_case[] | { "bucket": .}]' | jq -c > unit-tests.json
+          cat tests/test_utils/recipes/unit-tests.yaml | yq -o json '[.products[].test_case[] | { "bucket": .}] | sort_by(.model, .test_case)' | jq -c > unit-tests.json
           echo "unit-tests=$(cat unit-tests.json)" | tee -a $GITHUB_OUTPUT
 
   cicd-unit-tests-latest:
@@ -367,6 +368,14 @@ jobs:
       - cicd-wait-in-queue
       - cicd-container-build
       - cicd-unit-tests-latest
+    if: |
+      (
+        success() 
+        || needs.pre-flight.outputs.is_ci_workload == 'true'
+        || needs.pre-flight.outputs.force_run_all == 'true'
+      )
+      && needs.pre-flight.outputs.is_merge_group == 'false'
+      && !cancelled()
     outputs:
       integration-tests: ${{ steps.main.outputs.integration-tests }}
     steps:
@@ -491,7 +500,7 @@ jobs:
         env:
           GH_TOKEN: ${{ github.token }}
           RUN_ID: ${{ github.run_id }}
-          SKIPPING_IS_ALLOWED: ${{ needs.pre-flight.outputs.docs_only == 'true' || needs.pre-flight.outputs.is_deployment_workflow == 'true' || needs.pre-flight.outputs.is_merge_group == 'true' }}
+          SKIPPING_IS_ALLOWED: ${{ needs.pre-flight.outputs.docs_only == 'true' || needs.pre-flight.outputs.is_deployment_workflow == 'true' || needs.pre-flight.outputs.is_merge_group == 'true' || needs.pre-flight.outputs.is_ci_workload == 'true' }}
         run: |
           FAILED_JOBS=$(gh run view $GITHUB_RUN_ID --json jobs --jq '[.jobs[] | select(.status == "completed" and .conclusion == "failure")] | length') || echo 0
           SKIPPED_JOBS=$(gh run view $GITHUB_RUN_ID --json jobs --jq '[.jobs[] | select(.status == "completed" and .conclusion == "skipped")] | length') || echo 0
diff --git a/.github/workflows/community-bot.yml b/.github/workflows/community-bot.yml
index 57d482afa34..9f939510ed1 100644
--- a/.github/workflows/community-bot.yml
+++ b/.github/workflows/community-bot.yml
@@ -21,6 +21,7 @@ on:
 
 jobs:
   community-bot:
-    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_community_bot.yml@v0.49.1
+    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_community_bot.yml@v0.65.10
     secrets:
       GH_TOKEN: ${{ secrets.PAT }}
+      environment: main
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 6b46d92aacb..5ddf5f094c2 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -18,6 +18,8 @@ workflow:
     - if: $CI_PROJECT_NAMESPACE != "ADLR" || ($CI_PIPELINE_SOURCE == "merge_request_event" && $CI_MERGE_REQUEST_PROJECT_PATH != "ADLR/megatron-lm")
       when: never
 
+    - if: $CI_PIPELINE_SOURCE == "schedule" && ($CI_COMMIT_BRANCH == 'ci-approve-dev' || $CI_COMMIT_BRANCH == 'ci-approve-main')
+
     # ci-branches only for schedule
     - if: $CI_COMMIT_BRANCH =~ /ci-/ && $CI_PIPELINE_SOURCE != "schedule"
       when: never
@@ -154,6 +156,8 @@ default:
     when: runner_system_failure
 
 variables:
+  BUILD:
+    value: "yes"
   UNIT_TEST:
     value: "yes"
     options:
diff --git a/.gitlab/stages/00.pre.yml b/.gitlab/stages/00.pre.yml
index 5c74073ff14..dca3a7b47ae 100644
--- a/.gitlab/stages/00.pre.yml
+++ b/.gitlab/stages/00.pre.yml
@@ -8,6 +8,7 @@ include:
       when: always
     - if: $CI_MERGE_REQUEST_EVENT_TYPE == 'merged_result'
       when: always
+
     - when: never
   stage: .pre
 
@@ -348,53 +349,3 @@ pre:check_status_of_main:
     - if: $CI_MERGE_REQUEST_EVENT_TYPE == 'merge_train'
       when: always
     - when: never
-
-pre:approve_merge_gate:
-  extends: [.pre_rules]
-  image: maniator/gh
-  tags:
-    - arch/amd64
-    - env/prod
-    - origin/jet-fleet
-    - owner/jet-core
-    - purpose/utility
-    - team/megatron
-  script:
-    - |
-      set -eoux pipefail
-      EXIT_CODE=0
-      python tests/test_utils/python_scripts/check_status_of_main.py --target-branch "$CI_COMMIT_BRANCH" --once || EXIT_CODE=$?
-
-      export GH_TOKEN=$GH_TOKEN
-      export REPO=NVIDIA/Megatron-LM
-      export TARGET_BRANCH="$CI_COMMIT_BRANCH"
-
-      if [[ $EXIT_CODE -eq 0 ]]; then
-        STATUS="approved"
-        COMMENT="Main is healthy. Submitting PR."
-      else
-        STATUS="rejected"
-        COMMENT="Main is not healthy. An automation engineer is investigating. No need to take any action."
-      fi
-
-      gh api "repos/$REPO/actions/runs?status=waiting" --jq '.workflow_runs[].id' \
-        | while read run_id; do
-            HEAD_BRANCH=$(gh api "repos/$REPO/actions/runs/$run_id" --jq '.head_branch' 2>/dev/null) || continue
-            PR_NUMBER="${HEAD_BRANCH##*/}"
-            if [ -n "$PR_NUMBER" ]; then
-                PR_BASE=$(gh api "repos/$REPO/pulls/$PR_NUMBER" --jq '.base.ref' 2>/dev/null) || continue
-                if [ "$PR_BASE" = "$TARGET_BRANCH" ]; then
-                    gh api \
-                      --method POST "repos/$REPO/actions/runs/$run_id/pending_deployments" \
-                      -F "environment_ids[]=$(gh api "repos/$REPO/environments" --jq '.environments[] | select(.name=="merge-gate") | .id')" \
-                      -f state="$STATUS" \
-                      -f comment="$COMMENT";
-                fi
-            fi
-        done
-  retry:
-    max: 2
-  rules:
-    - if: $CI_PIPELINE_SOURCE == "schedule" && ($CI_COMMIT_BRANCH == 'ci-approve-dev' || $CI_COMMIT_BRANCH == 'ci-approve-main')
-      when: always
-    - when: never
diff --git a/.gitlab/stages/01.build.yml b/.gitlab/stages/01.build.yml
index 2fd9e1f32e6..0658daaa9ec 100644
--- a/.gitlab/stages/01.build.yml
+++ b/.gitlab/stages/01.build.yml
@@ -1,5 +1,7 @@
 .build_rules:
   rules:
+    - if: $BUILD == "no"
+      when: never
     - when: on_success
   stage: test
 
diff --git a/.gitlab/stages/02.test.yml b/.gitlab/stages/02.test.yml
index 699bef68181..2f018f94e66 100644
--- a/.gitlab/stages/02.test.yml
+++ b/.gitlab/stages/02.test.yml
@@ -2,6 +2,8 @@
   rules:
     - if: $PUBLISH == "yes"
       when: never
+    - if: $BUILD == "no"
+      when: never
     - when: on_success
   stage: test
 
@@ -11,10 +13,6 @@ include:
 wait_for_resources:
   extends: [.test_rules]
   needs:
-    - job: test:linting_formatting
-      optional: true
-    - job: test:linting_copyright
-      optional: true
     - job: test:linting_secret_detection
       optional: true
     - test:build_image
@@ -76,7 +74,7 @@ test:unit_tests_configure:
         "--n-repeat ${UNIT_TEST_REPEAT}"
         "--time-limit $(( UNIT_TEST_TIMEOUT * 60 ))"
         "--test-cases all"
-        "--cluster dgxh100_coreweave"
+        "--cluster $H100_CLUSTER"
         "--platform dgx_h100"
         "--partition batch"
         "--container-image ${UTILITY_IMAGE}"
@@ -161,46 +159,6 @@ test:unit_tests_configure:
     - if: $UNIT_TEST == 'yes' && $UNIT_TEST_REPEAT != '0'
       when: on_success
 
-test:unit_tests_pyt(DEV)_mcore(legacy):
-  extends: [.unit_tests_run]
-  variables:
-    ENVIRONMENT: dev
-    TAG: legacy
-  rules:
-    - if: $CI_MERGE_REQUEST_TARGET_BRANCH_NAME == 'dev'
-      when: never
-    - if: $CI_COMMIT_BRANCH == 'ci-dev-unit-test-extended'
-      when: never
-    - if: $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /^core_r/
-      when: never
-    - if: $CI_MERGE_REQUEST_EVENT_TYPE == 'merged_result' && $CI_MERGE_REQUEST_TARGET_BRANCH_NAME != 'main'
-      when: never
-    - if: $UNIT_TEST == 'yes' && $CI_MERGE_REQUEST_EVENT_TYPE == 'merged_result' && $CI_MERGE_REQUEST_TARGET_BRANCH_PROTECTED != "true"
-      allow_failure: true
-      when: on_success
-    - if: $UNIT_TEST == 'yes' && $UNIT_TEST_REPEAT != '0'
-      when: on_success
-
-test:unit_tests_pyt(LTS)_mcore(legacy):
-  extends: [.unit_tests_run]
-  variables:
-    ENVIRONMENT: lts
-    TAG: legacy
-  rules:
-    - if: $CI_MERGE_REQUEST_TARGET_BRANCH_NAME == 'dev'
-      when: never
-    - if: $CI_COMMIT_BRANCH == 'ci-dev-unit-test-extended'
-      when: never
-    - if: $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /^core_r/
-      when: never
-    - if: $CI_MERGE_REQUEST_EVENT_TYPE == 'merged_result' && $CI_MERGE_REQUEST_TARGET_BRANCH_NAME != 'main'
-      when: never
-    - if: $UNIT_TEST == 'yes' && $CI_MERGE_REQUEST_EVENT_TYPE == 'merged_result' && $CI_MERGE_REQUEST_TARGET_BRANCH_PROTECTED != "true"
-      allow_failure: true
-      when: on_success
-    - if: $UNIT_TEST == 'yes' && $UNIT_TEST_REPEAT != '0'
-      when: on_success
-
 test:unit_tests_pyt(DEV)_mcore(latest):
   extends: [.unit_tests_run]
   variables:
@@ -271,62 +229,6 @@ test:linting_docs_build:
     - cd documentation/
     - ./repo docs
 
-test:linting_formatting:
-  extends: [.test_rules]
-  image: ${UTILITY_IMAGE}:${CI_PIPELINE_ID}
-  tags:
-    - arch/amd64
-    - env/prod
-    - origin/jet-fleet
-    - owner/jet-core
-    - purpose/utility
-    - team/megatron
-  needs: [test:build_image]
-  variables:
-    GIT_STRATEGY: "clone"
-  rules:
-    - if: $PUBLISH == "yes"
-      when: never
-    - if: $CI_PIPELINE_SOURCE == 'push' || $CI_PIPELINE_SOURCE == 'schedule'
-      when: never
-    - when: on_success
-  script:
-    - |
-      if [[ "$CI_PIPELINE_SOURCE" != "merge_request_event" ]]; then
-        exit 0
-      fi
-    - set +e
-    - env
-    - export GITLAB_ENDPOINT=gitlab-ci-token:${PAT}@${GITLAB_ENDPOINT}
-    - BASE_REF="$CI_MERGE_REQUEST_TARGET_BRANCH_NAME" CHECK_ONLY=true SKIP_DOCS=$([[ "$CI_MERGE_REQUEST_LABELS" == *"Skip docs"* ]] && echo "true" || echo "false") bash tools/autoformat.sh
-
-test:linting_copyright:
-  extends: [.test_rules]
-  tags:
-    - arch/amd64
-    - env/prod
-    - origin/jet-fleet
-    - owner/jet-core
-    - purpose/utility
-    - team/megatron
-  image: ${UTILITY_IMAGE}:${CI_PIPELINE_ID}
-  needs: [test:build_image]
-  rules:
-    - if: $PUBLISH == "yes"
-      when: never
-    - if: $CI_PIPELINE_SOURCE == 'push'
-      when: never
-    - when: on_success
-  script:
-    - |
-      if [[ "$CI_PIPELINE_SOURCE" != "merge_request_event" ]]; then
-        exit 0
-      fi
-    - git fetch origin main
-    - BASE_REF="$CI_MERGE_REQUEST_TARGET_BRANCH_NAME"
-    - export GITLAB_ENDPOINT=gitlab-ci-token:${PAT}@${GITLAB_ENDPOINT}
-    - bash tools/copyright.sh
-
 # Override from template
 secret_detection:
   rules:
diff --git a/.gitlab/stages/03.integration-tests.yml b/.gitlab/stages/03.integration-tests.yml
index df4d84234bb..824721b9fb1 100644
--- a/.gitlab/stages/03.integration-tests.yml
+++ b/.gitlab/stages/03.integration-tests.yml
@@ -1,6 +1,8 @@
 .integration_tests_rules:
   stage: integration_tests
   rules:
+    - if: $BUILD == "no"
+      when: never
     - if: $INTEGRATION_TEST == "yes"
       when: on_success
     - when: never
diff --git a/.gitlab/stages/04.functional-tests.yml b/.gitlab/stages/04.functional-tests.yml
index ea2f1bcef8c..dbdef4484f2 100644
--- a/.gitlab/stages/04.functional-tests.yml
+++ b/.gitlab/stages/04.functional-tests.yml
@@ -1,6 +1,8 @@
 .functional_tests_rules:
   stage: functional_tests
   rules:
+    - if: $BUILD == "no"
+      when: never
     - if: $FUNCTIONAL_TEST == "yes"
       when: on_success
     - when: never
diff --git a/.gitlab/stages/05.publish.yml b/.gitlab/stages/05.publish.yml
index 695479179c5..20495434f6b 100644
--- a/.gitlab/stages/05.publish.yml
+++ b/.gitlab/stages/05.publish.yml
@@ -1,6 +1,8 @@
 .publish_common_freeze:
   stage: publish
   rules:
+    - if: $BUILD == "no"
+      when: never
     - if: ($CI_COMMIT_BRANCH == "main") && $PUBLISH == "yes" && $PUBLISH_SCOPE == "code-freeze"
       when: manual
     - when: never
@@ -538,10 +540,6 @@ publish:upload_statistics:
   stage: publish
   image: ${UTILITY_IMAGE}:${CI_PIPELINE_ID}
   needs:
-    - job: test:unit_tests_pyt(DEV)_mcore(legacy)
-      optional: true
-    - job: test:unit_tests_pyt(LTS)_mcore(legacy)
-      optional: true
     - job: test:unit_tests_pyt(DEV)_mcore(latest)
     - job: test:unit_tests_pyt(LTS)_mcore(latest)
     - job: functional:run_lts_dgx_a100
@@ -749,3 +747,45 @@ publish:merge_into_dev:
     - if: $CI_COMMIT_BRANCH == "main" && $CI_PIPELINE_SOURCE == "push"
       allow_failure: true
     - when: never
+
+publish:approve_merge_gate:
+  stage: publish
+  image: maniator/gh
+  tags:
+    - arch/amd64
+    - env/prod
+    - origin/jet-fleet
+    - owner/jet-core
+    - purpose/utility
+    - team/megatron
+  script:
+    - |
+      set -eoux pipefail
+      EXIT_CODE=0
+      apk add python3
+      python -m venv .venv
+      source .venv/bin/activate
+      pip install --no-cache-dir python-gitlab click pygithub
+      export GITLAB_ENDPOINT
+      export RO_API_TOKEN=${PROJECT_ACCESS_TOKEN_MCORE}
+      python tests/test_utils/python_scripts/check_status_of_main.py --target-branch "$CI_COMMIT_BRANCH" --once || EXIT_CODE=$?
+
+      export GH_TOKEN=$GH_TOKEN
+      export REPO=NVIDIA/Megatron-LM
+      export TARGET_BRANCH="$CI_COMMIT_BRANCH"
+
+      if [[ $EXIT_CODE -eq 0 ]]; then
+        export STATUS="approved"
+        export COMMENT="Main is healthy. Submitting PR."
+      else
+        export STATUS="rejected"
+        export COMMENT="Main is not healthy. An automation engineer is investigating. No need to take any action."
+      fi
+
+      python tests/test_utils/python_scripts/approve_merge_gate.py
+  retry:
+    max: 2
+  rules:
+    - if: $CI_PIPELINE_SOURCE == "schedule" || ($CI_COMMIT_BRANCH == 'ci-approve-dev' || $CI_COMMIT_BRANCH == 'ci-approve-main')
+      when: always
+    - when: never
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..5decbad6a1a
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,178 @@
+{
+    "0": {
+        "input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.",
+        "generated_text": " And then you get to the end of the movie, and you realize that this is not New York at all. This is New York at the end",
+        "generated_tokens": [
+            3060,
+            2430,
+            1636,
+            2012,
+            1317,
+            1278,
+            2362,
+            1307,
+            1278,
+            16070,
+            1044,
+            1321,
+            1636,
+            23067,
+            1455,
+            1593,
+            1395,
+            1605,
+            3140,
+            5152,
+            1513,
+            1747,
+            1046,
+            2409,
+            1395,
+            3140,
+            5152,
+            1513,
+            1278,
+            2362
+        ],
+        "latency": 0.2756917476654053,
+        "cuda_graph_request_count_map": {
+            "372": 0,
+            "360": 0,
+            "336": 0,
+            "312": 0,
+            "288": 0,
+            "264": 0,
+            "240": 0,
+            "216": 0,
+            "192": 0,
+            "168": 0,
+            "144": 0,
+            "120": 0,
+            "96": 0,
+            "72": 0,
+            "48": 0,
+            "24": 29
+        },
+        "step_count": 240,
+        "logprobs": [
+            -9.362494468688965,
+            -2.827894449234009,
+            -4.557381629943848,
+            -1.4968647956848145,
+            -0.717312216758728,
+            -1.7262351512908936,
+            -2.522736072540283,
+            -2.1782360076904297,
+            -2.3603432178497314,
+            -6.136383533477783,
+            -1.4676916599273682,
+            -3.468963384628296,
+            -4.424870491027832,
+            -3.7345848083496094,
+            -2.012619972229004,
+            -1.8833301067352295,
+            -3.5708768367767334,
+            -6.8197832107543945,
+            -0.3122292757034302,
+            -0.9820290207862854,
+            -6.532033443450928,
+            -7.498172760009766,
+            -12.615165710449219,
+            -2.409003496170044,
+            -3.8550546169281006,
+            -0.5105050802230835,
+            -4.2802581787109375,
+            -0.06971167027950287,
+            -0.054025799036026,
+            -3.319596767425537,
+            -9.703240394592285,
+            -1.0997297763824463,
+            -6.224854469299316,
+            -5.234503269195557,
+            -3.934987783432007,
+            -2.5263679027557373,
+            -3.1843955516815186,
+            -5.880871295928955,
+            -1.8436813354492188,
+            -5.906496047973633,
+            -12.15787410736084,
+            -12.5841064453125,
+            -0.0819428563117981,
+            -2.6212656497955322,
+            -1.4329369068145752,
+            -2.885145425796509,
+            -1.2901865243911743,
+            -0.006647023372352123,
+            -3.5115818977355957,
+            -12.945953369140625,
+            -3.793078899383545,
+            -3.0094375610351562,
+            -5.966838836669922,
+            -0.8998424410820007,
+            -0.040962252765893936,
+            -1.5467679500579834,
+            -1.0785343647003174,
+            -5.73494815826416,
+            -0.38491737842559814,
+            -5.017007827758789,
+            -0.5568072199821472,
+            -0.5968841910362244,
+            -2.3609962463378906,
+            -13.582086563110352,
+            -0.09050048142671585,
+            -3.7264108657836914,
+            -1.1208789348602295,
+            -6.052675247192383,
+            -0.5848909616470337,
+            -3.5906238555908203,
+            -0.9494907855987549,
+            -1.5676641464233398,
+            -5.127577781677246,
+            -17.19189453125,
+            -6.698403835296631,
+            -1.0449178218841553,
+            -4.365664958953857,
+            -1.1243419647216797,
+            -2.2092156410217285,
+            -1.8081634044647217,
+            -0.23330983519554138,
+            -9.439546585083008,
+            -0.2947109341621399,
+            -7.253565788269043,
+            -2.3855936527252197,
+            -4.629369258880615,
+            -3.4186267852783203,
+            -1.9727531671524048,
+            -2.331681251525879,
+            -1.5606917142868042,
+            -2.454296588897705,
+            -1.5334703922271729,
+            -1.2631131410598755,
+            -2.657367706298828,
+            -0.6480202078819275,
+            -0.4550393521785736,
+            -1.3625166416168213,
+            -0.8142069578170776,
+            -0.4496593475341797,
+            -0.9312890768051147,
+            -1.732723355293274,
+            -0.44613128900527954,
+            -1.6895122528076172,
+            -0.6082233190536499,
+            -1.0978344678878784,
+            -1.1122435331344604,
+            -0.002520838286727667,
+            -1.4072327613830566,
+            -0.007462364621460438,
+            -0.7548662424087524,
+            -0.9937503337860107,
+            -0.0675487294793129,
+            -0.9595617055892944,
+            -0.029961343854665756,
+            -2.205785036087036,
+            -1.2615025043487549,
+            -0.7878209352493286
+        ]
+    },
+    "throughput": 105.62266013491053
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_logitsmatch_decode_graphs_only/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_logitsmatch_decode_graphs_only/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..20da149d1f1
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_logitsmatch_decode_graphs_only/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,178 @@
+{
+    "0": {
+        "input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.",
+        "generated_text": " And then you get to the end of the movie, and you realize that this is not New York at all. This is New York at the end",
+        "generated_tokens": [
+            3060,
+            2430,
+            1636,
+            2012,
+            1317,
+            1278,
+            2362,
+            1307,
+            1278,
+            16070,
+            1044,
+            1321,
+            1636,
+            23067,
+            1455,
+            1593,
+            1395,
+            1605,
+            3140,
+            5152,
+            1513,
+            1747,
+            1046,
+            2409,
+            1395,
+            3140,
+            5152,
+            1513,
+            1278,
+            2362
+        ],
+        "latency": 0.3700687885284424,
+        "cuda_graph_request_count_map": {
+            "372": 0,
+            "360": 0,
+            "336": 0,
+            "312": 0,
+            "288": 0,
+            "264": 0,
+            "240": 0,
+            "216": 0,
+            "192": 0,
+            "168": 0,
+            "144": 0,
+            "120": 0,
+            "96": 0,
+            "72": 0,
+            "48": 0,
+            "24": 29
+        },
+        "step_count": 240,
+        "logprobs": [
+            -9.362494468688965,
+            -2.827894449234009,
+            -4.557381629943848,
+            -1.4968647956848145,
+            -0.717312216758728,
+            -1.7262351512908936,
+            -2.522736072540283,
+            -2.1782360076904297,
+            -2.3603432178497314,
+            -6.136383533477783,
+            -1.4676916599273682,
+            -3.468963384628296,
+            -4.424870491027832,
+            -3.7345848083496094,
+            -2.012619972229004,
+            -1.8833301067352295,
+            -3.5708768367767334,
+            -6.8197832107543945,
+            -0.3122292757034302,
+            -0.9820290207862854,
+            -6.532033443450928,
+            -7.498172760009766,
+            -12.615165710449219,
+            -2.409003496170044,
+            -3.8550546169281006,
+            -0.5105050802230835,
+            -4.2802581787109375,
+            -0.06971167027950287,
+            -0.054025799036026,
+            -3.319596767425537,
+            -9.703240394592285,
+            -1.0997297763824463,
+            -6.224854469299316,
+            -5.234503269195557,
+            -3.934987783432007,
+            -2.5263679027557373,
+            -3.1843955516815186,
+            -5.880871295928955,
+            -1.8436813354492188,
+            -5.906496047973633,
+            -12.15787410736084,
+            -12.5841064453125,
+            -0.0819428563117981,
+            -2.6212656497955322,
+            -1.4329369068145752,
+            -2.885145425796509,
+            -1.2901865243911743,
+            -0.006647023372352123,
+            -3.5115818977355957,
+            -12.945953369140625,
+            -3.793078899383545,
+            -3.0094375610351562,
+            -5.966838836669922,
+            -0.8998424410820007,
+            -0.040962252765893936,
+            -1.5467679500579834,
+            -1.0785343647003174,
+            -5.73494815826416,
+            -0.38491737842559814,
+            -5.017007827758789,
+            -0.5568072199821472,
+            -0.5968841910362244,
+            -2.3609962463378906,
+            -13.582086563110352,
+            -0.09050048142671585,
+            -3.7264108657836914,
+            -1.1208789348602295,
+            -6.052675247192383,
+            -0.5848909616470337,
+            -3.5906238555908203,
+            -0.9494907855987549,
+            -1.5676641464233398,
+            -5.127577781677246,
+            -17.19189453125,
+            -6.698403835296631,
+            -1.0449178218841553,
+            -4.365664958953857,
+            -1.1243419647216797,
+            -2.2092156410217285,
+            -1.8081634044647217,
+            -0.23330983519554138,
+            -9.439546585083008,
+            -0.2947109341621399,
+            -7.253565788269043,
+            -2.3855936527252197,
+            -4.629369258880615,
+            -3.4186267852783203,
+            -1.9727531671524048,
+            -2.331681251525879,
+            -1.5606917142868042,
+            -2.454296588897705,
+            -1.5334703922271729,
+            -1.2631131410598755,
+            -2.657367706298828,
+            -0.6480202078819275,
+            -0.4550393521785736,
+            -1.3625166416168213,
+            -0.8142069578170776,
+            -0.4496593475341797,
+            -0.9312890768051147,
+            -1.732723355293274,
+            -0.44613128900527954,
+            -1.6895122528076172,
+            -0.6082233190536499,
+            -1.0978344678878784,
+            -1.1122435331344604,
+            -0.002520838286727667,
+            -1.4072327613830566,
+            -0.007462364621460438,
+            -0.7548662424087524,
+            -0.9937503337860107,
+            -0.0675487294793129,
+            -0.9595617055892944,
+            -0.029961343854665756,
+            -2.205785036087036,
+            -1.2615025043487549,
+            -0.7878209352493286
+        ]
+    },
+    "throughput": 79.31454807788677
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_logitsmatch/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_logitsmatch/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..ad16c16b924
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_logitsmatch/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,2699 @@
+{
+    "0": {
+        "input_prompt": "The $500 Cup of coffee?\nConsider this, most Americans spend an average of $1,500-2,000 a year on this bean water.\nI have a few question for you: \nHow has business been the past few months?\nDo you ever feel like your business is stuck?\nDon't feel like you're able to improve performance and make changes required to achieve success ?\nAre your customers spneding less and less and wanting more?\nHave the gas prices affected your business?\nDo you have employees and do they hate you or wish they could quit?\n\nNow, before you and I can decide wheter or not I will be a good fit for your business we should talk this over with coffee.\nAnd, just to warn you this isn't some casual thing. This is not a date or time to be personal or social (but by all means share what you will coz I'll gladly listen).\nTher eare two major talking points and stratagies we will focios on in our lil coffee social\nFor one, we will find your unique selling Proposition (USP).\nDo have the best price? Are you the cheapest in town? Are your customers jerks? Do you haVE REGULARS? Why do people come back?\nwe'll also look for the holes in your business bucket. I'm willing to bet there's a hole or two in your business we'll find together that'll make this 500 dollar cup of Joe pay for itse;f immedietly.\nMany find themselves to be more profitable by just finding out where the dollars are escaping in their business and I like to think of myself as a guy that comes along with some spakel or putty and patch those holes up for you.\nBeleive me, just fixing one hole can mean a lot...just think about a sinking boat that has a hole in it that's about 3\u201d in diameter... it doesn't take long to sink.\nI have no agenda, besides f=getting to know your business and seeing wher I can patch the holes and find what makes you do darn unique (I know this won't take long.)\nMany folks, I bet, will find what they need to get off their chest with a quick phone call and they just paypal me the money and make a coffee at home. Look, that's fine too.\nI just to get you ot of your comfort zone, because this is where it all starts my frind.\nSome smart GOAT entrepreneur will probably get everything they need just out of our lil mini consulatant for the more extensive business consukting I offer, and look, that's fine, too.\nMaybe this cup of coffee will be all you need to gtet back on your feet, but not only surive, but thrive!\nI'm not trying to make frineds, or make a bunch of money, or look down your shirt\nBut this is only going to be a 45 minute (max) coffee chat\nAnd, I know you (and me) both have a lot on our plates. So no messing around\nAfter our consultation I will follow up with you in a few days and see how things are going, then I will be emailing you about once or twice every two weeks, just to make sure you're staying on task and implementing what we went over.\nTghere is no obligation to go any further and will gladly give you back your money if this pre-consultation doesn't put you on the right path or you don't get any value out of it...",
+        "generated_text": " $ is a$ is a $ is a $ is a $ is a $ is a $$1, you\n$ $$ $\n$ $- $\n$\n$\n$ $$$\n$\n$\n$\n$\n$\n$\n$\n$???????, $\n-1\n$\n1.5.\n$\n$, you\n$.\n$\n1,1.1\nI\n$.\nI\n\n\nHow\n$,,,0,1,0,0.0\nIn\nThe\nThe\nThe\n",
+        "generated_tokens": [
+            1659,
+            1395,
+            1261,
+            1036,
+            1395,
+            1261,
+            1659,
+            1395,
+            1261,
+            1659,
+            1395,
+            1261,
+            1659,
+            1395,
+            1261,
+            1659,
+            1395,
+            1261,
+            1659,
+            1036,
+            1049,
+            1044,
+            1636,
+            1010,
+            1036,
+            1659,
+            1036,
+            1659,
+            1010,
+            1036,
+            1659,
+            1045,
+            1659,
+            1010,
+            1036,
+            1010,
+            1036,
+            1010,
+            1036,
+            1659,
+            1036,
+            1036,
+            1010,
+            1036,
+            1010,
+            1036,
+            1010,
+            1036,
+            1010,
+            1036,
+            1010,
+            1036,
+            1010,
+            1036,
+            1010,
+            1036,
+            1010,
+            1036,
+            1063,
+            1063,
+            1063,
+            1063,
+            1063,
+            1063,
+            1063,
+            1044,
+            1659,
+            1010,
+            1045,
+            1049,
+            1010,
+            1036,
+            1010,
+            1049,
+            1046,
+            1053,
+            1046,
+            1010,
+            1036,
+            1010,
+            1036,
+            1044,
+            1636,
+            1010,
+            1036,
+            1046,
+            1010,
+            1036,
+            1010,
+            1049,
+            1044,
+            1049,
+            1046,
+            1049,
+            1010,
+            1073,
+            1010,
+            1036,
+            1046,
+            1010,
+            1073,
+            1010,
+            1010,
+            1010,
+            7801,
+            1010,
+            1036,
+            1044,
+            1044,
+            1044,
+            1048,
+            1044,
+            1049,
+            1044,
+            1048,
+            1044,
+            1048,
+            1046,
+            1048,
+            1010,
+            1785,
+            1010,
+            1784,
+            1010,
+            1784,
+            1010,
+            1784,
+            1010
+        ],
+        "latency": 10.056535482406616,
+        "cuda_graph_request_count_map": null,
+        "step_count": 6144,
+        "logprobs": [
+            -7.7319135665893555,
+            -2.188307285308838,
+            -0.7547445297241211,
+            -0.7294313311576843,
+            -10.238386154174805,
+            -3.3775341510772705,
+            -6.394498825073242,
+            -7.354557037353516,
+            -9.018157958984375,
+            -3.012073040008545,
+            -3.2584073543548584,
+            -5.220732688903809,
+            -4.620487213134766,
+            -2.5078930854797363,
+            -3.752683162689209,
+            -0.13360372185707092,
+            -0.05705544352531433,
+            -0.41462242603302,
+            -1.585279941558838,
+            -1.6438164710998535,
+            -1.9557222127914429,
+            -0.3989897072315216,
+            -0.0365302674472332,
+            -6.368816375732422,
+            -0.8731719255447388,
+            -0.022585075348615646,
+            -0.2775891423225403,
+            -0.0027362785767763853,
+            -0.0006812873762100935,
+            -1.581446647644043,
+            -0.008688976056873798,
+            -0.3532317280769348,
+            -6.071163177490234,
+            -9.162371635437012,
+            -9.965556144714355,
+            -2.400461196899414,
+            -2.9898362159729004,
+            -2.9803032875061035,
+            -2.12601900100708,
+            -3.500912666320801,
+            -7.015069007873535,
+            -2.278961420059204,
+            -0.46380555629730225,
+            -4.078739166259766,
+            -1.9430254697799683,
+            -3.5642244815826416,
+            -3.689701795578003,
+            -6.201474189758301,
+            -6.580833911895752,
+            -2.3081111907958984,
+            -5.42717170715332,
+            -1.1886008977890015,
+            -1.172760248184204,
+            -1.3571951389312744,
+            -1.3551844358444214,
+            -3.376784324645996,
+            -0.05118789151310921,
+            -4.064360618591309,
+            -2.575554847717285,
+            -0.6994737386703491,
+            -2.56724214553833,
+            -2.1888976097106934,
+            -0.4816131591796875,
+            -4.070178985595703,
+            -2.0060782432556152,
+            -6.858033180236816,
+            -0.059200502932071686,
+            -3.214278221130371,
+            -0.9671833515167236,
+            -0.823198676109314,
+            -1.0130078792572021,
+            -4.595561981201172,
+            -0.012724989093840122,
+            -5.214311599731445,
+            -8.246870040893555,
+            -3.1476030349731445,
+            -3.299684524536133,
+            -4.218191146850586,
+            -7.318399429321289,
+            -0.8580498695373535,
+            -3.0894036293029785,
+            -1.886361002922058,
+            -7.217658996582031,
+            -3.271679639816284,
+            -3.9717154502868652,
+            -1.8835484981536865,
+            -10.034332275390625,
+            -11.382490158081055,
+            -5.417011260986328,
+            -7.505967140197754,
+            -2.33837890625,
+            -0.07904055714607239,
+            -3.294971227645874,
+            -7.813640594482422,
+            -1.7646901607513428,
+            -4.025320053100586,
+            -3.5977325439453125,
+            -4.390352249145508,
+            -9.147806167602539,
+            -0.5303041934967041,
+            -7.721246242523193,
+            -0.6311959028244019,
+            -0.8119025230407715,
+            -0.7227814197540283,
+            -1.8369406461715698,
+            -0.20933297276496887,
+            -1.5395950078964233,
+            -4.424448490142822,
+            -4.084965705871582,
+            -3.355497360229492,
+            -1.0475609302520752,
+            -6.479413986206055,
+            -0.7810530662536621,
+            -2.132437229156494,
+            -6.648703098297119,
+            -2.9522438049316406,
+            -1.2485712766647339,
+            -4.040503025054932,
+            -2.3415768146514893,
+            -5.358206748962402,
+            -1.6258506774902344,
+            -3.956300973892212,
+            -0.732298731803894,
+            -7.441117286682129,
+            -1.5242161750793457,
+            -2.4555861949920654,
+            -4.295163154602051,
+            -9.687600135803223,
+            -0.8213484883308411,
+            -1.2446978092193604,
+            -0.01942702941596508,
+            -4.619411468505859,
+            -3.3297007083892822,
+            -2.2139487266540527,
+            -3.691431999206543,
+            -2.6574106216430664,
+            -6.075929641723633,
+            -0.6123450994491577,
+            -1.2942559719085693,
+            -0.6262839436531067,
+            -7.398006439208984,
+            -4.4869890213012695,
+            -4.202048301696777,
+            -4.982994079589844,
+            -0.637227475643158,
+            -3.061023235321045,
+            -10.117584228515625,
+            -3.8567495346069336,
+            -4.0480828285217285,
+            -2.472019672393799,
+            -4.246374607086182,
+            -1.3939155340194702,
+            -7.132441520690918,
+            -0.20108745992183685,
+            -4.986658573150635,
+            -4.387957572937012,
+            -0.01108358334749937,
+            -4.209756851196289,
+            -7.271108627319336,
+            -4.047314643859863,
+            -2.6497321128845215,
+            -1.4763175249099731,
+            -0.28365400433540344,
+            -3.5247769355773926,
+            -1.4226995706558228,
+            -4.327237129211426,
+            -2.0407187938690186,
+            -6.1437907218933105,
+            -1.5190880298614502,
+            -2.5511486530303955,
+            -7.504094123840332,
+            -2.152172565460205,
+            -6.708334922790527,
+            -6.913146495819092,
+            -3.6959621906280518,
+            -6.752341270446777,
+            -0.63083815574646,
+            -0.12433214485645294,
+            -5.0525641441345215,
+            -4.435934066772461,
+            -0.45601028203964233,
+            -6.3459577560424805,
+            -9.882917404174805,
+            -3.1422882080078125,
+            -2.550520658493042,
+            -3.2099051475524902,
+            -6.278127193450928,
+            -0.07764133810997009,
+            -3.155696153640747,
+            -1.933587670326233,
+            -9.61027717590332,
+            -6.211391925811768,
+            -4.664543151855469,
+            -6.783782005310059,
+            -5.676271438598633,
+            -8.605900764465332,
+            -0.0824289619922638,
+            -3.5463995933532715,
+            -13.374168395996094,
+            -1.2401021718978882,
+            -1.8734056949615479,
+            -3.4154422283172607,
+            -1.6733763217926025,
+            -17.633970260620117,
+            -9.345113754272461,
+            -0.6277351975440979,
+            -2.9617538452148438,
+            -2.5565333366394043,
+            -10.10580825805664,
+            -7.130337715148926,
+            -7.36820125579834,
+            -4.098911285400391,
+            -5.747079372406006,
+            -2.945054769515991,
+            -0.7887389063835144,
+            -1.6583149433135986,
+            -1.0165244340896606,
+            -6.581666946411133,
+            -5.926386833190918,
+            -5.845194339752197,
+            -0.9657630920410156,
+            -7.868755340576172,
+            -1.3244551420211792,
+            -0.2657390236854553,
+            -0.06403665244579315,
+            -2.983020782470703,
+            -5.943899631500244,
+            -7.877285957336426,
+            -3.593116283416748,
+            -3.819509506225586,
+            -7.226177215576172,
+            -2.5206997394561768,
+            -3.385587215423584,
+            -0.37499159574508667,
+            -1.4698283672332764,
+            -3.1460342407226562,
+            -0.0077166082337498665,
+            -4.350916862487793,
+            -3.2183218002319336,
+            -0.6242184638977051,
+            -1.4782464504241943,
+            -2.8054311275482178,
+            -3.0831401348114014,
+            -12.17662525177002,
+            -2.113419532775879,
+            -1.6448111534118652,
+            -2.1834323406219482,
+            -0.7630388140678406,
+            -10.1896390914917,
+            -6.234405517578125,
+            -11.46288776397705,
+            -1.003785490989685,
+            -4.211658477783203,
+            -1.5010679960250854,
+            -5.859302043914795,
+            -2.0465080738067627,
+            -3.7468819618225098,
+            -4.684195518493652,
+            -4.318704128265381,
+            -2.7234389781951904,
+            -9.00437068939209,
+            -3.043811321258545,
+            -3.1384406089782715,
+            -2.713779926300049,
+            -2.095993995666504,
+            -2.1484954357147217,
+            -10.274479866027832,
+            -0.682350754737854,
+            -0.25973302125930786,
+            -3.6964316368103027,
+            -13.434456825256348,
+            -2.3368239402770996,
+            -5.382724761962891,
+            -1.9073458909988403,
+            -5.905669212341309,
+            -0.032165709882974625,
+            -1.6530004739761353,
+            -2.728893280029297,
+            -1.640552043914795,
+            -1.1391171216964722,
+            -1.4353511333465576,
+            -4.003787994384766,
+            -0.3450564742088318,
+            -0.7168521285057068,
+            -0.34650325775146484,
+            -0.3616408705711365,
+            -7.062709331512451,
+            -1.2851682901382446,
+            -2.299129009246826,
+            -8.800156593322754,
+            -5.208735466003418,
+            -4.780910491943359,
+            -2.78342342376709,
+            -4.469717979431152,
+            -6.909726619720459,
+            -2.5114197731018066,
+            -0.659822404384613,
+            -0.6915416121482849,
+            -3.2363741397857666,
+            -0.5283617377281189,
+            -0.10473938286304474,
+            -6.215325832366943,
+            -7.283237934112549,
+            -1.6797031164169312,
+            -11.50100040435791,
+            -7.5822978019714355,
+            -3.387317657470703,
+            -11.407575607299805,
+            -5.441976547241211,
+            -3.3264851570129395,
+            -0.7265786528587341,
+            -1.382750153541565,
+            -7.841699600219727,
+            -8.105277061462402,
+            -3.9569506645202637,
+            -4.963083267211914,
+            -0.5492897629737854,
+            -4.6081390380859375,
+            -5.870400905609131,
+            -3.957930088043213,
+            -5.275494575500488,
+            -4.105091094970703,
+            -2.15435528755188,
+            -2.8472700119018555,
+            -1.1278448104858398,
+            -8.226571083068848,
+            -0.40629008412361145,
+            -9.916461944580078,
+            -4.616743087768555,
+            -1.691868543624878,
+            -0.6639478802680969,
+            -2.5716753005981445,
+            -6.676954746246338,
+            -6.535329818725586,
+            -0.4170510768890381,
+            -1.443942904472351,
+            -3.145481824874878,
+            -1.440589427947998,
+            -0.26935356855392456,
+            -0.9647155404090881,
+            -4.335958957672119,
+            -1.5647850036621094,
+            -5.890466690063477,
+            -3.01654052734375,
+            -1.9168468713760376,
+            -3.7365682125091553,
+            -8.001864433288574,
+            -10.680083274841309,
+            -4.489352226257324,
+            -4.6058149337768555,
+            -7.69011116027832,
+            -3.6247005462646484,
+            -1.5600426197052002,
+            -10.2160062789917,
+            -5.004643440246582,
+            -0.19602319598197937,
+            -3.375545024871826,
+            -2.669325590133667,
+            -1.3932737112045288,
+            -1.6410658359527588,
+            -6.847603797912598,
+            -6.744344711303711,
+            -0.5215591192245483,
+            -0.25840020179748535,
+            -1.1448237895965576,
+            -5.57253885269165,
+            -7.251138687133789,
+            -4.221924781799316,
+            -0.7688062787055969,
+            -2.504502534866333,
+            -3.146519660949707,
+            -2.206653356552124,
+            -1.4295082092285156,
+            -7.96943998336792,
+            -4.332189083099365,
+            -2.5750505924224854,
+            -1.7102608680725098,
+            -5.311381816864014,
+            -8.897522926330566,
+            -2.994919538497925,
+            -3.3397974967956543,
+            -2.1794328689575195,
+            -2.437566041946411,
+            -0.3181810975074768,
+            -0.27412793040275574,
+            -0.7914466857910156,
+            -2.3470635414123535,
+            -2.4099245071411133,
+            -2.491870880126953,
+            -3.024170160293579,
+            -1.9719040393829346,
+            -11.373910903930664,
+            -1.4279751777648926,
+            -0.14573107659816742,
+            -2.055763006210327,
+            -6.366893291473389,
+            -4.24091911315918,
+            -0.00709194503724575,
+            -2.0199716091156006,
+            -2.524750232696533,
+            -1.4272525310516357,
+            -0.5185190439224243,
+            -2.927150011062622,
+            -2.7070627212524414,
+            -3.365638017654419,
+            -4.318085193634033,
+            -7.773144721984863,
+            -1.7947180271148682,
+            -7.657534599304199,
+            -8.767786026000977,
+            -14.74280071258545,
+            -1.8042558431625366,
+            -3.2712037563323975,
+            -1.4002125263214111,
+            -4.887944221496582,
+            -1.4821010828018188,
+            -1.5255622863769531,
+            -5.879070281982422,
+            -4.463839530944824,
+            -5.1955976486206055,
+            -5.665647506713867,
+            -0.3775045573711395,
+            -5.9350481033325195,
+            -2.800539255142212,
+            -0.13162286579608917,
+            -3.034379720687866,
+            -4.729524612426758,
+            -4.6252641677856445,
+            -3.850942611694336,
+            -2.4760568141937256,
+            -6.059760093688965,
+            -10.12075138092041,
+            -0.9469369649887085,
+            -11.595907211303711,
+            -6.875324726104736,
+            -4.268826007843018,
+            -2.835529088973999,
+            -3.8626279830932617,
+            -4.876199245452881,
+            -0.013071090914309025,
+            -4.964417934417725,
+            -0.7445687055587769,
+            -5.707155227661133,
+            -6.10660457611084,
+            -4.317755699157715,
+            -4.440443992614746,
+            -2.9202542304992676,
+            -4.743522644042969,
+            -1.2569392919540405,
+            -2.8675737380981445,
+            -2.3151841163635254,
+            -4.318130970001221,
+            -1.9054772853851318,
+            -1.1808521747589111,
+            -0.765956461429596,
+            -2.768916606903076,
+            -6.237791061401367,
+            -1.7224305868148804,
+            -7.137521743774414,
+            -4.512486457824707,
+            -1.9069950580596924,
+            -4.145983695983887,
+            -5.365190505981445,
+            -0.059828490018844604,
+            -2.273892879486084,
+            -3.4013004302978516,
+            -5.035730361938477,
+            -6.501443386077881,
+            -9.903446197509766,
+            -1.6332892179489136,
+            -2.1572084426879883,
+            -1.6149548292160034,
+            -1.4698481559753418,
+            -6.01010799407959,
+            -2.2243528366088867,
+            -6.900836944580078,
+            -6.0930986404418945,
+            -2.974020481109619,
+            -3.225423574447632,
+            -8.423272132873535,
+            -1.3423724174499512,
+            -3.626147508621216,
+            -0.4862469434738159,
+            -6.860866546630859,
+            -3.8910953998565674,
+            -2.33319354057312,
+            -1.7229185104370117,
+            -2.215972423553467,
+            -8.99046516418457,
+            -4.099084854125977,
+            -2.4191012382507324,
+            -8.288970947265625,
+            -2.9641928672790527,
+            -1.5036451816558838,
+            -3.0544614791870117,
+            -0.0715634673833847,
+            -2.444031238555908,
+            -4.520998954772949,
+            -3.972568988800049,
+            -0.4985870122909546,
+            -2.1651363372802734,
+            -3.4427435398101807,
+            -1.730639100074768,
+            -0.9458961486816406,
+            -7.740211009979248,
+            -9.39163875579834,
+            -3.895984172821045,
+            -1.7523534297943115,
+            -5.41331672668457,
+            -8.910720825195312,
+            -12.971094131469727,
+            -3.0455880165100098,
+            -10.501265525817871,
+            -3.3864927291870117,
+            -4.842309951782227,
+            -3.9964733123779297,
+            -7.3046793937683105,
+            -2.6607093811035156,
+            -1.3541781902313232,
+            -5.003270626068115,
+            -3.944551944732666,
+            -0.11356143653392792,
+            -5.174440383911133,
+            -9.628616333007812,
+            -8.654989242553711,
+            -8.980416297912598,
+            -6.670101642608643,
+            -5.488286018371582,
+            -5.943419933319092,
+            -2.126483201980591,
+            -8.054739952087402,
+            -7.458671569824219,
+            -2.5267202854156494,
+            -6.455472946166992,
+            -8.655346870422363,
+            -7.903901100158691,
+            -6.221062660217285,
+            -7.129237174987793,
+            -4.2345380783081055,
+            -2.5375306606292725,
+            -7.697700500488281,
+            -1.567080020904541,
+            -2.084331750869751,
+            -0.25020831823349,
+            -1.5145041942596436,
+            -4.619244575500488,
+            -0.2970108985900879,
+            -0.4977554678916931,
+            -6.197869300842285,
+            -4.030620098114014,
+            -7.232107639312744,
+            -0.21076253056526184,
+            -1.563366174697876,
+            -1.133756160736084,
+            -2.708237648010254,
+            -4.080535888671875,
+            -0.6818401217460632,
+            -0.1864331066608429,
+            -0.49012088775634766,
+            -8.732468605041504,
+            -11.945040702819824,
+            -5.243098735809326,
+            -1.5294703245162964,
+            -0.8935543298721313,
+            -0.6174070835113525,
+            -1.5068217515945435,
+            -3.5766501426696777,
+            -5.393096923828125,
+            -4.202867031097412,
+            -14.765748023986816,
+            -5.2513813972473145,
+            -0.7597705721855164,
+            -0.2502063810825348,
+            -1.7403976917266846,
+            -2.8000779151916504,
+            -1.9808133840560913,
+            -2.1654744148254395,
+            -1.8629226684570312,
+            -3.222038745880127,
+            -0.040942225605249405,
+            -2.3384013175964355,
+            -10.210381507873535,
+            -4.5859761238098145,
+            -0.5805734395980835,
+            -3.7019288539886475,
+            -2.001936674118042,
+            -2.7876083850860596,
+            -2.9799084663391113,
+            -4.349887371063232,
+            -0.0792960673570633,
+            -1.4366114139556885,
+            -1.0813264846801758,
+            -1.3510822057724,
+            -6.7060699462890625,
+            -5.436615943908691,
+            -3.978389263153076,
+            -6.785447597503662,
+            -6.147171497344971,
+            -3.97414231300354,
+            -4.332991600036621,
+            -0.9269428253173828,
+            -5.1237101554870605,
+            -4.486598968505859,
+            -0.04678357392549515,
+            -1.0307552814483643,
+            -1.4249452352523804,
+            -4.517682075500488,
+            -3.561821699142456,
+            -2.0815205574035645,
+            -0.6041194200515747,
+            -5.992964744567871,
+            -7.092092514038086,
+            -0.48916709423065186,
+            -2.6405677795410156,
+            -4.3345723152160645,
+            -3.533582925796509,
+            -3.1233346462249756,
+            -3.107872486114502,
+            -1.9901115894317627,
+            -3.1052846908569336,
+            -1.8440347909927368,
+            -6.21368408203125,
+            -1.8796799182891846,
+            -2.705214738845825,
+            -0.2987763583660126,
+            -4.070865154266357,
+            -1.6675832271575928,
+            -1.3896636962890625,
+            -1.5731089115142822,
+            -3.526170015335083,
+            -2.5088443756103516,
+            -1.208929419517517,
+            -3.673125743865967,
+            -2.501532554626465,
+            -6.875064373016357,
+            -8.512459754943848,
+            -1.042314052581787,
+            -3.657850980758667,
+            -7.0950798988342285,
+            -4.974049091339111,
+            -8.14085578918457,
+            -3.529888153076172,
+            -1.9389504194259644,
+            -7.0902204513549805,
+            -2.409292459487915,
+            -2.9428021907806396,
+            -1.688283085823059,
+            -3.622368335723877,
+            -2.0903351306915283,
+            -4.160663604736328,
+            -3.1683764457702637,
+            -1.2135626077651978,
+            -7.566033363342285,
+            -3.1186251640319824,
+            -5.899919509887695,
+            -0.9518840312957764,
+            -2.656729221343994,
+            -2.2994377613067627,
+            -6.806836128234863,
+            -1.280236840248108,
+            -2.838846206665039,
+            -1.3598848581314087,
+            -11.707776069641113,
+            -3.134333372116089,
+            -0.6230669617652893,
+            -8.219222068786621,
+            -7.562507152557373,
+            -7.489459037780762,
+            -1.5368008613586426,
+            -7.149652481079102,
+            -5.749268054962158,
+            -3.162869691848755,
+            -2.7235195636749268,
+            -6.128931999206543,
+            -1.1934199333190918,
+            -3.986410617828369,
+            -3.76609468460083,
+            -1.712721586227417,
+            -3.195504903793335,
+            -8.397743225097656,
+            -3.1260581016540527,
+            -9.792022705078125,
+            -4.217884540557861,
+            -11.583260536193848,
+            -5.987588882446289,
+            -5.178754806518555,
+            -6.994749069213867,
+            -5.167606353759766,
+            -7.124668121337891,
+            -6.201416015625,
+            -10.203682899475098,
+            -6.858526229858398,
+            -2.733592987060547,
+            -5.078882217407227,
+            -9.003358840942383,
+            -4.704894542694092,
+            -3.9085562229156494,
+            -7.247268199920654,
+            -7.091092109680176,
+            -4.4150166511535645,
+            -7.56699275970459,
+            -9.485116004943848,
+            -1.9977033138275146,
+            -6.65272331237793,
+            -2.236643075942993,
+            -7.518955707550049,
+            -5.525973320007324,
+            -4.67877721786499,
+            -6.608670234680176,
+            -5.536133766174316,
+            -10.772479057312012,
+            -10.8853178024292,
+            -3.6156129837036133,
+            -6.751470565795898,
+            -6.4537434577941895,
+            -3.4220399856567383,
+            -8.251005172729492,
+            -3.2146153450012207,
+            -6.330069541931152,
+            -1.5551663637161255,
+            -6.520583629608154,
+            -10.450878143310547,
+            -5.8788957595825195,
+            -3.7398200035095215,
+            -3.9084208011627197,
+            -0.3640081584453583,
+            -6.961522102355957,
+            -6.066243648529053,
+            -7.270624160766602,
+            -5.098455429077148,
+            -2.7642822265625,
+            -5.460171699523926,
+            -7.362828731536865,
+            -2.558631658554077,
+            -2.186410427093506,
+            -2.5309929847717285,
+            -2.46756649017334,
+            -2.0306026935577393,
+            -1.8713470697402954,
+            -2.108008623123169,
+            -1.2698389291763306,
+            -2.1712756156921387,
+            -2.4432802200317383,
+            -1.1477653980255127,
+            -1.8417484760284424,
+            -2.5971946716308594,
+            -1.8250831365585327,
+            -2.103092670440674,
+            -2.5183165073394775,
+            -2.9367291927337646,
+            -1.9412965774536133,
+            -1.7692793607711792,
+            -2.864521026611328,
+            -3.1332175731658936,
+            -1.098311185836792,
+            -2.946441173553467,
+            -2.2800471782684326,
+            -3.1929852962493896,
+            -2.754260778427124,
+            -3.485616445541382,
+            -3.3010287284851074,
+            -2.5537776947021484,
+            -2.6752865314483643,
+            -3.1617612838745117,
+            -2.4571690559387207,
+            -2.060081958770752,
+            -2.425969362258911,
+            -2.212725877761841,
+            -2.4232254028320312,
+            -3.0587053298950195,
+            -2.4074010848999023,
+            -2.457937479019165,
+            -2.319617986679077,
+            -2.6340954303741455,
+            -2.599524736404419,
+            -2.5302212238311768,
+            -1.6849274635314941,
+            -2.2609786987304688,
+            -2.039928674697876,
+            -1.9474098682403564,
+            -2.3550753593444824,
+            -1.718749761581421,
+            -2.413884162902832,
+            -1.6247628927230835,
+            -2.4784040451049805,
+            -1.828325629234314,
+            -1.3880831003189087,
+            -1.4448199272155762,
+            -1.1477117538452148,
+            -1.1669728755950928,
+            -1.8787822723388672,
+            -1.5565840005874634,
+            -1.6666553020477295,
+            -1.747725248336792,
+            -1.959598422050476,
+            -2.0376486778259277,
+            -2.345367431640625,
+            -2.055098533630371,
+            -1.3940613269805908,
+            -3.4385242462158203,
+            -2.7489635944366455,
+            -3.2590157985687256,
+            -3.1128957271575928,
+            -1.7070379257202148,
+            -3.9010369777679443,
+            -3.21574068069458,
+            -3.3850393295288086,
+            -1.8778185844421387,
+            -2.698211908340454,
+            -1.8060741424560547,
+            -2.0845324993133545,
+            -3.4797585010528564,
+            -2.263254404067993,
+            -3.083108901977539,
+            -1.6589758396148682,
+            -2.687279224395752,
+            -1.77505624294281,
+            -2.6142921447753906,
+            -1.934045672416687,
+            -1.8834377527236938,
+            -2.8038980960845947,
+            -1.550542950630188,
+            -3.4054152965545654,
+            -1.724036693572998,
+            -2.3146564960479736,
+            -1.5134503841400146,
+            -2.9289023876190186,
+            -1.5285141468048096,
+            -3.421035051345825,
+            -1.3757282495498657,
+            -3.441431760787964,
+            -1.5286564826965332,
+            -3.4372904300689697,
+            -3.173043966293335,
+            -1.1313854455947876,
+            -1.486415147781372,
+            -2.506413221359253,
+            -3.5165903568267822,
+            -1.4112176895141602,
+            -3.7175354957580566,
+            -2.2170844078063965,
+            -0.704839289188385,
+            -0.6626103520393372,
+            -2.5483946800231934,
+            -0.949668288230896,
+            -3.1339564323425293,
+            -1.3326977491378784,
+            -3.5493476390838623,
+            -0.6785370111465454,
+            -3.236161470413208,
+            -2.1347084045410156,
+            -3.802447557449341,
+            -2.585503339767456,
+            -3.9361765384674072,
+            -2.290905714035034,
+            -3.714280605316162,
+            -1.793616533279419,
+            -3.7252492904663086,
+            -1.450188159942627,
+            -3.11938738822937,
+            -1.25174880027771
+        ]
+    },
+    "32": {
+        "input_prompt": "create a conversational article",
+        "generated_text": " about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article.",
+        "generated_tokens": [
+            2314,
+            1278,
+            17915,
+            1307,
+            1278,
+            9369,
+            1046,
+            1531,
+            9369,
+            2715,
+            1402,
+            2314,
+            1278,
+            17915,
+            1307,
+            1278,
+            9369,
+            1046,
+            1531,
+            9369,
+            2715,
+            1402,
+            2314,
+            1278,
+            17915,
+            1307,
+            1278,
+            9369,
+            1046,
+            1531,
+            9369,
+            2715,
+            1402,
+            2314,
+            1278,
+            17915,
+            1307,
+            1278,
+            9369,
+            1046,
+            1531,
+            9369,
+            2715,
+            1402,
+            2314,
+            1278,
+            17915,
+            1307,
+            1278,
+            9369,
+            1046,
+            1531,
+            9369,
+            2715,
+            1402,
+            2314,
+            1278,
+            17915,
+            1307,
+            1278,
+            9369,
+            1046,
+            1531,
+            9369,
+            2715,
+            1402,
+            2314,
+            1278,
+            17915,
+            1307,
+            1278,
+            9369,
+            1046,
+            1531,
+            9369,
+            2715,
+            1402,
+            2314,
+            1278,
+            17915,
+            1307,
+            1278,
+            9369,
+            1046,
+            1531,
+            9369,
+            2715,
+            1402,
+            2314,
+            1278,
+            17915,
+            1307,
+            1278,
+            9369,
+            1046,
+            1531,
+            9369,
+            2715,
+            1402,
+            2314,
+            1278,
+            17915,
+            1307,
+            1278,
+            9369,
+            1046,
+            1531,
+            9369,
+            2715,
+            1402,
+            2314,
+            1278,
+            17915,
+            1307,
+            1278,
+            9369,
+            1046,
+            1531,
+            9369,
+            2715,
+            1402,
+            2314,
+            1278,
+            17915,
+            1307,
+            1278,
+            9369,
+            1046
+        ],
+        "latency": 48.19877076148987,
+        "cuda_graph_request_count_map": null,
+        "step_count": 6144,
+        "logprobs": [
+            -4.4165568351745605,
+            -11.358176231384277,
+            -0.0701780766248703,
+            -7.797665119171143,
+            -2.6805992126464844,
+            -1.4707680940628052,
+            -3.0390255451202393,
+            -1.6902849674224854,
+            -1.270594835281372,
+            -1.1936196088790894,
+            -1.2523558139801025,
+            -2.7270259857177734,
+            -1.2371309995651245,
+            -0.9618493318557739,
+            -0.4379909038543701,
+            -1.3917063474655151,
+            -1.1055524349212646,
+            -0.9122569561004639,
+            -0.9911308288574219,
+            -0.08436793833971024,
+            -0.5424078106880188,
+            -0.9181017279624939,
+            -0.5873759388923645,
+            -0.19014373421669006,
+            -0.06655456870794296,
+            -0.15252672135829926,
+            -0.09415211528539658,
+            -0.009787309914827347,
+            -0.013910251669585705,
+            -0.005296128336340189,
+            -0.005677408073097467,
+            -0.02013739012181759,
+            -0.21594694256782532,
+            -0.07153760641813278,
+            -0.0066444179974496365,
+            -0.010198505595326424,
+            -0.011980246752500534,
+            -0.003686776151880622,
+            -0.0037619550712406635,
+            -0.0022467151284217834,
+            -0.004088377580046654,
+            -0.021828632801771164,
+            -0.0012669878778979182,
+            -0.09768074005842209,
+            -0.02652405947446823,
+            -0.0019286142196506262,
+            -0.002283824374899268,
+            -0.0032225127797573805,
+            -0.0009741804678924382,
+            -0.0009415484382770956,
+            -0.001211624126881361,
+            -0.001135300612077117,
+            -0.002340436913073063,
+            -0.0010846928926184773,
+            -0.0509282611310482,
+            -0.03832047060132027,
+            -0.00257422705180943,
+            -0.0022806129418313503,
+            -0.00262785074301064,
+            -0.0008195855189114809,
+            -0.0010239601833745837,
+            -0.0013777059502899647,
+            -0.0009899006690829992,
+            -0.0018756669014692307,
+            -0.0015304292319342494,
+            -0.08506463468074799,
+            -0.01893703266978264,
+            -0.0013797297142446041,
+            -0.0014461545506492257,
+            -0.0013971101725474,
+            -0.0005869334563612938,
+            -0.0005212855176068842,
+            -0.000876757490914315,
+            -0.0005256939912214875,
+            -0.0012863941956311464,
+            -0.0015691122971475124,
+            -0.051276568323373795,
+            -0.00973513163626194,
+            -0.0010469438275322318,
+            -0.0011531615164130926,
+            -0.0009969270322471857,
+            -0.00038342276820912957,
+            -0.0004032037395518273,
+            -0.000730247818864882,
+            -0.0003275334893260151,
+            -0.0008700875914655626,
+            -0.0017572689102962613,
+            -0.044393111020326614,
+            -0.013102858327329159,
+            -0.0011463745031505823,
+            -0.0012070996453985572,
+            -0.0012325793504714966,
+            -0.0005048430757597089,
+            -0.0004876854654867202,
+            -0.0007901645149104297,
+            -0.00041500062798149884,
+            -0.0009869233472272754,
+            -0.0018687656847760081,
+            -0.03943866863846779,
+            -0.014425630681216717,
+            -0.0014756753807887435,
+            -0.001423775334842503,
+            -0.001209719106554985,
+            -0.0005046047735959291,
+            -0.00042167355422861874,
+            -0.0007688426994718611,
+            -0.0002699726028367877,
+            -0.0006598440813831985,
+            -0.0017849955474957824,
+            -0.038999658077955246,
+            -0.012665312737226486,
+            -0.0014427024871110916,
+            -0.0014492495683953166,
+            -0.001016576774418354,
+            -0.00042083943844772875,
+            -0.00033241944038309157,
+            -0.0006403064471669495,
+            -0.00022373080719262362,
+            -0.0007053509471006691,
+            -0.0018597226589918137,
+            -0.030997740104794502,
+            -0.011259939521551132,
+            -0.0012655591126531363,
+            -0.00134151556994766,
+            -0.0008106521563604474,
+            -0.0003513672563713044,
+            -0.0002964295563288033,
+            -0.0006368515896610916,
+            -0.00020180096908006817,
+            -0.0005779979983344674,
+            -0.0016014858847483993,
+            -0.0271126888692379
+        ]
+    },
+    "64": {
+        "input_prompt": "App Concept: \"Eggy's Interactive Adventure World\"\n\nEggy's Interactive Adventure World is a cutting-edge app designed for phones and tablets that combines immersive storytelling with engaging, interactive experiences for both parents and children. This innovative app allows users to explore the world of Eggy and participate in a series of adventures that teach essential life skills, foster personal growth, and strengthen parent-child bonds.\n\nKey Features:\n\n1. Interactive Stories: The app brings the 20 Eggy stories to life through interactive, animated storytelling. Users can read, listen, and actively participate in the stories, making choices that guide Eggy through various adventures, each with unique lessons and experiences.\n2. Augmented Reality (AR) Integration: The app utilizes AR technology, allowing users to interact with Eggy and the story characters in their real-world environment. This immersive experience creates a deeper connection with the stories and encourages engagement.\n3. Personalized Adventures: The app customizes each story based on the user's interests, needs, and personal growth areas. By collecting user input, the app tailors the storylines and learning outcomes to ensure that each adventure resonates with the child's unique experiences.\n4. Parent-Child Cooperative Play: The app encourages parents and children to collaborate and engage in the stories together. Through a split-screen mode or multi-device connectivity, parents and children can actively participate in the stories and learn essential life skills together.\n5. Skill-Building Mini-Games: Interspersed throughout the stories are educational mini-games that reinforce the lessons and themes. These games are designed to be fun, engaging, and accessible to children, encouraging them to practice and develop various skills.\n6. Progress Tracking and Rewards: The app tracks the user's progress through the stories and rewards them with badges, trophies, and other collectibles. This gamification aspect encourages continued engagement and fosters a sense of accomplishment and growth.\n7. Community Features: The app provides a safe, moderated online community where parents can share their experiences, discuss the stories and lessons, and connect with other families who are also using the app. This community fosters support, inspiration, and learning.\n8. Expert Resources: The app includes access to resources from experts in child development, psychology, and education, providing parents with additional tools and guidance on fostering the growth and well-being of their children.\n\nEggy's Interactive Adventure World is a one-of-a-kind, mind-blowing app that revolutionizes the way parents and children engage with stories and learn essential life skills. Combining the power of storytelling, cutting-edge technology, and expert resources, this app is a must-have for families looking to strengthen their bonds and support their children's personal growth.",
+        "generated_text": "</s>The 1999\u20132000 season was the 10th season of the National Hockey League (NHL). The 1999\u20132000 season was the 10th season of the National Hockey League (NHL). The 1999\u20132000 season was the 10th season of the National Hockey League (NHL). The 1999\u20132000 season was the 10th season was the 10th season was the 10 season was the 10 season was the 10",
+        "generated_tokens": [
+            2,
+            1784,
+            1032,
+            1049,
+            1057,
+            1057,
+            1057,
+            1882,
+            1050,
+            1048,
+            1048,
+            1048,
+            5526,
+            1486,
+            1278,
+            1032,
+            1049,
+            1048,
+            1411,
+            5526,
+            1307,
+            1278,
+            5805,
+            46982,
+            8525,
+            1319,
+            19644,
+            1076,
+            1577,
+            1531,
+            1032,
+            1049,
+            1057,
+            1057,
+            1057,
+            1882,
+            1050,
+            1048,
+            1048,
+            1048,
+            5526,
+            1486,
+            1278,
+            1032,
+            1049,
+            1048,
+            1411,
+            5526,
+            1307,
+            1278,
+            5805,
+            46982,
+            8525,
+            1319,
+            19644,
+            1076,
+            1577,
+            1531,
+            1032,
+            1049,
+            1057,
+            1057,
+            1057,
+            1882,
+            1050,
+            1048,
+            1048,
+            1048,
+            5526,
+            1486,
+            1278,
+            1032,
+            1049,
+            1048,
+            1411,
+            5526,
+            1307,
+            1278,
+            5805,
+            46982,
+            8525,
+            1319,
+            19644,
+            1076,
+            1577,
+            1531,
+            1032,
+            1049,
+            1057,
+            1057,
+            1057,
+            1882,
+            1050,
+            1048,
+            1048,
+            1048,
+            5526,
+            1486,
+            1278,
+            1032,
+            1049,
+            1048,
+            1411,
+            5526,
+            1486,
+            1278,
+            1032,
+            1049,
+            1048,
+            1411,
+            5526,
+            1486,
+            1278,
+            1032,
+            1049,
+            1048,
+            5526,
+            1486,
+            1278,
+            1032,
+            1049,
+            1048,
+            5526,
+            1486,
+            1278,
+            1032,
+            1049,
+            1048
+        ],
+        "latency": 86.85381531715393,
+        "cuda_graph_request_count_map": null,
+        "step_count": 6144,
+        "logprobs": [
+            -12.107745170593262,
+            -2.9727728366851807,
+            -3.720092535018921,
+            -5.592433929443359,
+            -10.964235305786133,
+            -3.654498338699341,
+            -9.33439826965332,
+            -4.833785057067871,
+            -5.187321662902832,
+            -2.6944785118103027,
+            -6.9262237548828125,
+            -0.654232919216156,
+            -0.5550781488418579,
+            -0.21346639096736908,
+            -0.0134271876886487,
+            -0.010840100236237049,
+            -1.3878544569015503,
+            -0.6296291351318359,
+            -7.9766011238098145,
+            -0.4393192231655121,
+            -5.639142036437988,
+            -3.277270793914795,
+            -1.0206468105316162,
+            -11.703084945678711,
+            -0.7100943922996521,
+            -0.2809169888496399,
+            -2.771284818649292,
+            -7.190817832946777,
+            -4.048691749572754,
+            -0.012056218460202217,
+            -3.3802318572998047,
+            -0.6807184815406799,
+            -3.4844107627868652,
+            -3.312331199645996,
+            -0.5001641511917114,
+            -2.61255145072937,
+            -4.243694305419922,
+            -4.333778381347656,
+            -6.0625810623168945,
+            -0.011777156963944435,
+            -0.37577226758003235,
+            -0.9490834474563599,
+            -3.5450198650360107,
+            -2.1778035163879395,
+            -0.45957911014556885,
+            -3.00771164894104,
+            -1.7600425481796265,
+            -0.09766030311584473,
+            -2.467618942260742,
+            -1.329679012298584,
+            -0.8384320735931396,
+            -1.1864604949951172,
+            -3.628342866897583,
+            -0.2470003068447113,
+            -1.8938640356063843,
+            -5.168431282043457,
+            -0.05005566030740738,
+            -2.258014678955078,
+            -2.449028968811035,
+            -0.0034086955711245537,
+            -3.9485883712768555,
+            -1.6201664209365845,
+            -5.139942646026611,
+            -4.859354496002197,
+            -0.23686674237251282,
+            -0.5541543364524841,
+            -2.5826025009155273,
+            -6.114635467529297,
+            -4.3380208015441895,
+            -0.7412900924682617,
+            -0.3221715986728668,
+            -0.13805493712425232,
+            -4.1797332763671875,
+            -7.3456268310546875,
+            -0.13762745261192322,
+            -2.0905232429504395,
+            -1.0178627967834473,
+            -4.108260631561279,
+            -0.6007124185562134,
+            -1.0410642623901367,
+            -4.122039794921875,
+            -0.35905471444129944,
+            -1.4274661540985107,
+            -4.139932155609131,
+            -0.4237431585788727,
+            -1.6294409036636353,
+            -0.9811424016952515,
+            -4.132790565490723,
+            -1.1318120956420898,
+            -6.8258256912231445,
+            -1.5455098152160645,
+            -0.6984409093856812,
+            -13.664215087890625,
+            -0.1166313961148262,
+            -1.6347849369049072,
+            -0.28875046968460083,
+            -0.03130083531141281,
+            -1.5293006896972656,
+            -1.6488375663757324,
+            -4.224111557006836,
+            -4.760683059692383,
+            -1.9758747816085815,
+            -1.5828256607055664,
+            -2.8463857173919678,
+            -0.2620386481285095,
+            -1.7243889570236206,
+            -1.7945923805236816,
+            -0.8884308338165283,
+            -0.3766394555568695,
+            -0.34033581614494324,
+            -9.05566692352295,
+            -0.22754782438278198,
+            -0.033802058547735214,
+            -0.34108465909957886,
+            -0.5644669532775879,
+            -2.0925779342651367,
+            -4.547505855560303,
+            -10.870464324951172,
+            -1.1072022914886475,
+            -5.503787994384766,
+            -3.259672164916992,
+            -0.007964519783854485,
+            -3.0111639499664307,
+            -4.246737480163574,
+            -0.7813188433647156,
+            -3.331031322479248,
+            -4.485962867736816,
+            -0.9492117166519165,
+            -2.6757047176361084,
+            -1.1591349840164185,
+            -1.122117519378662,
+            -2.629878044128418,
+            -5.986321926116943,
+            -0.2146703153848648,
+            -0.002392764901742339,
+            -7.372479438781738,
+            -0.007077385671436787,
+            -0.06599216908216476,
+            -0.0970711037516594,
+            -3.2874932289123535,
+            -0.0019583588000386953,
+            -0.9122000336647034,
+            -4.930907249450684,
+            -0.019508399069309235,
+            -0.308611661195755,
+            -0.07778516411781311,
+            -3.8497893810272217,
+            -0.46124517917633057,
+            -0.38821348547935486,
+            -2.668412208557129,
+            -1.845987319946289,
+            -0.06470083445310593,
+            -0.006619549356400967,
+            -1.2610487937927246,
+            -0.13015533983707428,
+            -3.365312099456787,
+            -0.0014690094394609332,
+            -1.6789823770523071,
+            -1.2499005794525146,
+            -3.3992111682891846,
+            -5.563300132751465,
+            -0.823418140411377,
+            -4.24124813079834,
+            -1.6597849130630493,
+            -0.6941139698028564,
+            -1.5637556314468384,
+            -0.5482053756713867,
+            -0.9507225751876831,
+            -3.764758586883545,
+            -0.0006518622976727784,
+            -0.7540555000305176,
+            -5.058262825012207,
+            -0.3302401602268219,
+            -2.8130555152893066,
+            -0.17079885303974152,
+            -2.871047019958496,
+            -0.3991694450378418,
+            -3.1476998329162598,
+            -0.3488404452800751,
+            -2.0545666217803955,
+            -4.201597690582275,
+            -5.164614677429199,
+            -0.0271432027220726,
+            -0.0009785869624465704,
+            -3.3444161415100098,
+            -1.3117046356201172,
+            -6.375423431396484,
+            -0.05535568296909332,
+            -0.3919340968132019,
+            -0.060594215989112854,
+            -6.507473468780518,
+            -0.0023910999298095703,
+            -2.143423318862915,
+            -3.335618257522583,
+            -2.953970432281494,
+            -0.0013383012264966965,
+            -0.8080525398254395,
+            -0.29526084661483765,
+            -0.04036511853337288,
+            -3.231475353240967,
+            -1.0585589408874512,
+            -6.136373043060303,
+            -0.006182829383760691,
+            -0.035548023879528046,
+            -5.509808540344238,
+            -1.8490750789642334,
+            -9.83314037322998,
+            -0.07037576287984848,
+            -3.1621387004852295,
+            -6.762360095977783,
+            -1.3490527868270874,
+            -3.601043462753296,
+            -1.176393985748291,
+            -0.4342959523200989,
+            -0.06266004592180252,
+            -5.464046001434326,
+            -0.017946599051356316,
+            -1.0416009426116943,
+            -1.6117159128189087,
+            -12.289417266845703,
+            -1.5004339218139648,
+            -5.76563835144043,
+            -4.038386821746826,
+            -0.20812086760997772,
+            -3.6306562423706055,
+            -1.3901070356369019,
+            -1.087137222290039,
+            -2.423213243484497,
+            -4.503086090087891,
+            -0.0008031480247154832,
+            -0.03627370297908783,
+            -0.1653430461883545,
+            -7.958648681640625,
+            -1.1018548011779785,
+            -1.290948748588562,
+            -3.8049263954162598,
+            -1.8253734111785889,
+            -0.059022851288318634,
+            -0.0013984196120873094,
+            -4.698851585388184,
+            -2.5421664714813232,
+            -0.024493809789419174,
+            -4.828659534454346,
+            -3.0295286178588867,
+            -3.550312042236328,
+            -0.1185273677110672,
+            -0.22595760226249695,
+            -0.10782183706760406,
+            -1.4033282995224,
+            -0.4485701024532318,
+            -0.2889708876609802,
+            -0.05471855774521828,
+            -0.007632025051862001,
+            -2.1156554222106934,
+            -0.6249589323997498,
+            -4.198577404022217,
+            -0.14178156852722168,
+            -4.284021377563477,
+            -2.227515935897827,
+            -3.5022120475769043,
+            -0.19575819373130798,
+            -15.964509963989258,
+            -4.055960655212402,
+            -11.125024795532227,
+            -0.7681724429130554,
+            -3.0436902046203613,
+            -7.030262470245361,
+            -4.376729488372803,
+            -5.476145267486572,
+            -0.4219042658805847,
+            -3.7689766883850098,
+            -0.060010604560375214,
+            -0.8134393692016602,
+            -0.11386934667825699,
+            -0.025473715737462044,
+            -0.09736856073141098,
+            -4.357361793518066,
+            -0.3670865297317505,
+            -0.08063744008541107,
+            -0.1311480849981308,
+            -1.0903867483139038,
+            -1.2705107927322388,
+            -1.5076212882995605,
+            -4.295275688171387,
+            -0.04185756668448448,
+            -0.19810955226421356,
+            -1.9645220041275024,
+            -0.9597910642623901,
+            -0.13429655134677887,
+            -0.002283110748976469,
+            -7.066074371337891,
+            -3.639211654663086,
+            -1.0263917446136475,
+            -8.124760627746582,
+            -1.132537841796875,
+            -0.09160765260457993,
+            -0.08996370434761047,
+            -10.165366172790527,
+            -3.501585006713867,
+            -0.0019847711082547903,
+            -0.05309417471289635,
+            -0.31209683418273926,
+            -0.15089339017868042,
+            -1.23564875125885,
+            -1.2685208320617676,
+            -7.832758903503418,
+            -0.19271136820316315,
+            -0.014305183663964272,
+            -0.0007532381569035351,
+            -0.44688940048217773,
+            -2.6239724159240723,
+            -1.738666296005249,
+            -1.6480977535247803,
+            -0.46753185987472534,
+            -8.656959533691406,
+            -3.79868483543396,
+            -0.9281394481658936,
+            -2.2381181716918945,
+            -1.7654449939727783,
+            -0.4948798418045044,
+            -0.025028761476278305,
+            -1.5435361862182617,
+            -1.6390818357467651,
+            -1.4962153434753418,
+            -0.3425217270851135,
+            -0.013077914714813232,
+            -0.038474079221487045,
+            -5.3364362716674805,
+            -0.42365288734436035,
+            -1.884093999862671,
+            -3.510357618331909,
+            -6.198029518127441,
+            -0.44375038146972656,
+            -0.0008789013954810798,
+            -3.6025230884552,
+            -1.419615626335144,
+            -2.6723289489746094,
+            -5.775190830230713,
+            -1.1380761861801147,
+            -2.6683366298675537,
+            -0.43395891785621643,
+            -0.003145867260172963,
+            -8.63144302368164,
+            -1.646262764930725,
+            -1.732487678527832,
+            -4.561546802520752,
+            -0.5277953147888184,
+            -0.07333153486251831,
+            -0.5624169707298279,
+            -0.12201295047998428,
+            -2.6561455726623535,
+            -1.1071691513061523,
+            -2.6895060539245605,
+            -0.040864069014787674,
+            -0.04126371443271637,
+            -1.8294739723205566,
+            -0.09022177755832672,
+            -0.3154001832008362,
+            -0.46215569972991943,
+            -2.2462844848632812,
+            -0.30149081349372864,
+            -0.52588951587677,
+            -8.288043975830078,
+            -0.0002057340752799064,
+            -0.8021711707115173,
+            -4.4546098709106445,
+            -0.0001565095444675535,
+            -0.0015961299650371075,
+            -0.15216240286827087,
+            -0.3677564561367035,
+            -5.018707275390625,
+            -0.7850045561790466,
+            -1.9582659006118774,
+            -1.0046892166137695,
+            -10.0401029586792,
+            -0.16878114640712738,
+            -5.944240570068359,
+            -1.5523078441619873,
+            -5.7253522872924805,
+            -0.47948503494262695,
+            -0.44009655714035034,
+            -5.671053886413574,
+            -0.003280022880062461,
+            -0.7937742471694946,
+            -0.9639376401901245,
+            -0.00030048147891648114,
+            -1.0747740268707275,
+            -0.8839919567108154,
+            -3.416811466217041,
+            -1.6602673530578613,
+            -0.2706959843635559,
+            -0.0024333172477781773,
+            -4.478696823120117,
+            -6.20179557800293,
+            -0.11359559744596481,
+            -0.202009916305542,
+            -0.022310219705104828,
+            -2.367263078689575,
+            -1.0405994653701782,
+            -5.984308242797852,
+            -2.105138063430786,
+            -9.583202362060547,
+            -0.0004957877099514008,
+            -3.0655455589294434,
+            -0.0669412910938263,
+            -0.8977450728416443,
+            -2.2271294593811035,
+            -2.6617536544799805,
+            -1.8184051513671875,
+            -0.8291114568710327,
+            -0.4864235818386078,
+            -0.7993525862693787,
+            -3.51106858253479,
+            -2.1530935764312744,
+            -0.257144957780838,
+            -1.3934082984924316,
+            -1.3137131929397583,
+            -0.3384077548980713,
+            -0.1697217971086502,
+            -2.353395938873291,
+            -0.03406282886862755,
+            -0.39059701561927795,
+            -3.422821044921875,
+            -1.7117210626602173,
+            -0.7018465399742126,
+            -1.5995906591415405,
+            -3.6218395233154297,
+            -0.12497704476118088,
+            -0.16966234147548676,
+            -0.7313685417175293,
+            -0.4956285357475281,
+            -1.0840849876403809,
+            -5.042126655578613,
+            -0.00031704644788987935,
+            -7.683258056640625,
+            -0.9210801720619202,
+            -4.687852382659912,
+            -0.0028814247343689203,
+            -0.043382611125707626,
+            -4.1948652267456055,
+            -2.66593337059021,
+            -0.06153333932161331,
+            -0.0023110604379326105,
+            -6.729236602783203,
+            -5.777127742767334,
+            -0.08932067453861237,
+            -0.09890018403530121,
+            -0.009886111132800579,
+            -3.1145148277282715,
+            -3.725565195083618,
+            -0.0021998509764671326,
+            -3.9927196502685547,
+            -2.753793239593506,
+            -1.6037236452102661,
+            -0.17461130023002625,
+            -4.804804801940918,
+            -0.2311229705810547,
+            -0.30256444215774536,
+            -2.235363006591797,
+            -0.006614102050662041,
+            -0.34757524728775024,
+            -1.4946835041046143,
+            -1.222062587738037,
+            -3.658839225769043,
+            -1.356170892715454,
+            -0.5371109843254089,
+            -3.7580835819244385,
+            -4.54621696472168,
+            -0.31577637791633606,
+            -3.677156925201416,
+            -2.7181396484375,
+            -7.4674882888793945,
+            -0.00019369633810129017,
+            -2.3798398971557617,
+            -2.5452184677124023,
+            -0.2858496308326721,
+            -4.315659523010254,
+            -0.025835415348410606,
+            -0.000603493710514158,
+            -0.2546294331550598,
+            -0.12032663822174072,
+            -2.006908655166626,
+            -5.990736961364746,
+            -7.146596908569336,
+            -0.23356498777866364,
+            -0.2201036810874939,
+            -0.01235415879637003,
+            -0.011248741298913956,
+            -1.4155778884887695,
+            -0.40242519974708557,
+            -5.877886772155762,
+            -0.7865053415298462,
+            -0.03231288120150566,
+            -0.004864405374974012,
+            -0.0050629740580916405,
+            -2.7049152851104736,
+            -6.822089195251465,
+            -0.39252761006355286,
+            -1.2290617227554321,
+            -0.007630132604390383,
+            -3.485461711883545,
+            -0.47985684871673584,
+            -6.1813530921936035,
+            -0.03757825121283531,
+            -0.37834712862968445,
+            -0.22192610800266266,
+            -1.165318489074707,
+            -0.5220151543617249,
+            -0.1289423257112503,
+            -3.216222047805786,
+            -1.0787583589553833,
+            -3.0716826915740967,
+            -0.6023419499397278,
+            -2.558605194091797,
+            -0.927433431148529,
+            -0.00364841241389513,
+            -0.14910078048706055,
+            -0.7318926453590393,
+            -6.159773826599121,
+            -0.0015301911626011133,
+            -1.8908276557922363,
+            -1.9641315937042236,
+            -0.021651331335306168,
+            -2.1648828983306885,
+            -2.2700207233428955,
+            -7.833290100097656,
+            -0.03397307172417641,
+            -0.8344621658325195,
+            -0.02225659228861332,
+            -0.06639260798692703,
+            -2.3780317306518555,
+            -3.180129051208496,
+            -0.09030630439519882,
+            -2.4138312339782715,
+            -1.3445552587509155,
+            -1.848326325416565,
+            -0.9726964831352234,
+            -2.851792335510254,
+            -0.0630769282579422,
+            -0.0011394681641831994,
+            -0.05843213573098183,
+            -2.6616668701171875,
+            -1.575437068939209,
+            -0.180197611451149,
+            -5.552371501922607,
+            -0.26108410954475403,
+            -2.529611587524414,
+            -0.37780019640922546,
+            -5.141795635223389,
+            -0.5921107530593872,
+            -0.2474975287914276,
+            -0.10687454044818878,
+            -4.891775131225586,
+            -0.25011152029037476,
+            -2.4100728034973145,
+            -1.358667016029358,
+            -2.790961503982544,
+            -3.8654675483703613,
+            -1.0076243877410889,
+            -0.7456949949264526,
+            -1.5575554370880127,
+            -2.05328631401062,
+            -1.6538066864013672,
+            -0.0558217354118824,
+            -0.0001817776501411572,
+            -0.0011643542675301433,
+            -0.038359593600034714,
+            -1.4208931922912598,
+            -0.542127251625061,
+            -0.3162364959716797,
+            -0.3966117799282074,
+            -1.1765563488006592,
+            -1.7920958995819092,
+            -0.18425509333610535,
+            -0.1092008650302887,
+            -0.46676987409591675,
+            -0.24977745115756989,
+            -1.0375996828079224,
+            -0.5268858671188354,
+            -0.008942908607423306,
+            -0.6404479146003723,
+            -0.0033111530356109142,
+            -5.3165931603871286e-05,
+            -0.5154370665550232,
+            -0.39286962151527405,
+            -1.401839256286621,
+            -0.6232213973999023,
+            -0.02168831042945385,
+            -0.004282470792531967,
+            -0.005199837032705545,
+            -0.09748794883489609,
+            -0.040823787450790405,
+            -0.00014852374442853034,
+            -0.0005832401220686734,
+            -0.005303124897181988,
+            -0.6537013053894043,
+            -0.38026049733161926,
+            -0.04189129173755646,
+            -0.010385753586888313,
+            -0.008756335824728012,
+            -0.013362848199903965,
+            -0.000504723924677819,
+            -0.002797620603814721,
+            -0.0014512732159346342,
+            -0.0013321106089279056,
+            -0.010883613489568233,
+            -0.005159396678209305,
+            -0.004701037425547838,
+            -0.01591104455292225,
+            -0.001474246964789927,
+            -1.2278481335670222e-05,
+            -0.010548785328865051,
+            -0.08341525495052338,
+            -0.03858809545636177,
+            -0.056062061339616776,
+            -0.0009532198309898376,
+            -0.0005789510905742645,
+            -0.0008986725588329136,
+            -0.00710969977080822,
+            -0.0006561510381288826,
+            -1.4781842764932662e-05,
+            -5.578839045483619e-05,
+            -0.0006398299592547119,
+            -0.0028786908369511366,
+            -0.0034092895220965147,
+            -0.008268529549241066,
+            -0.006602259818464518,
+            -0.004517706111073494,
+            -0.02233586646616459,
+            -0.0006323245470412076,
+            -0.009195122867822647,
+            -0.0029284947086125612,
+            -0.004457537550479174,
+            -0.017873765900731087,
+            -0.008801711723208427,
+            -0.0036383166443556547,
+            -0.08078611642122269,
+            -0.006347495596855879,
+            -0.0002177716523874551,
+            -0.04688572511076927,
+            -0.2718890309333801,
+            -0.07996802777051926,
+            -0.04674842208623886,
+            -0.009984076954424381,
+            -0.010000954382121563,
+            -0.050126753747463226,
+            -0.5864179730415344,
+            -0.2915390133857727,
+            -0.008090462535619736,
+            -0.032190412282943726,
+            -0.03461571782827377,
+            -0.2785419523715973,
+            -0.05830562859773636,
+            -0.02893950417637825,
+            -0.12241066247224808,
+            -0.02711048536002636,
+            -0.16450686752796173,
+            -0.09852994978427887,
+            -0.2651047706604004,
+            -0.35559725761413574,
+            -0.12606258690357208,
+            -0.32793670892715454,
+            -0.20878805220127106,
+            -0.7587923407554626,
+            -0.4730657637119293,
+            -1.496794581413269,
+            -0.2486363798379898,
+            -0.20723387598991394,
+            -0.1872958242893219,
+            -0.19151091575622559,
+            -0.3350580036640167,
+            -1.3085839748382568,
+            -0.6109102964401245,
+            -0.2947172224521637,
+            -0.37130236625671387,
+            -0.19888469576835632,
+            -0.18297068774700165,
+            -0.9978674054145813,
+            -0.5471905469894409,
+            -0.4379975199699402,
+            -0.407988041639328,
+            -0.17325688898563385,
+            -0.16938896477222443
+        ]
+    },
+    "96": {
+        "input_prompt": "what is the concept of double materiality in sustainability?",
+        "generated_text": " What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double material",
+        "generated_tokens": [
+            5675,
+            1395,
+            1278,
+            7401,
+            1307,
+            5558,
+            4318,
+            1532,
+            1294,
+            60187,
+            1063,
+            5675,
+            1395,
+            1278,
+            7401,
+            1307,
+            5558,
+            4318,
+            1532,
+            1294,
+            60187,
+            1063,
+            5675,
+            1395,
+            1278,
+            7401,
+            1307,
+            5558,
+            4318,
+            1532,
+            1294,
+            60187,
+            1063,
+            5675,
+            1395,
+            1278,
+            7401,
+            1307,
+            5558,
+            4318,
+            1532,
+            1294,
+            60187,
+            1063,
+            5675,
+            1395,
+            1278,
+            7401,
+            1307,
+            5558,
+            4318,
+            1532,
+            1294,
+            60187,
+            1063,
+            5675,
+            1395,
+            1278,
+            7401,
+            1307,
+            5558,
+            4318,
+            1532,
+            1294,
+            60187,
+            1063,
+            5675,
+            1395,
+            1278,
+            7401,
+            1307,
+            5558,
+            4318,
+            1532,
+            1294,
+            60187,
+            1063,
+            5675,
+            1395,
+            1278,
+            7401,
+            1307,
+            5558,
+            4318,
+            1532,
+            1294,
+            60187,
+            1063,
+            5675,
+            1395,
+            1278,
+            7401,
+            1307,
+            5558,
+            4318,
+            1532,
+            1294,
+            60187,
+            1063,
+            5675,
+            1395,
+            1278,
+            7401,
+            1307,
+            5558,
+            4318,
+            1532,
+            1294,
+            60187,
+            1063,
+            5675,
+            1395,
+            1278,
+            7401,
+            1307,
+            5558,
+            4318,
+            1532,
+            1294,
+            60187,
+            1063,
+            5675,
+            1395,
+            1278,
+            7401,
+            1307,
+            5558,
+            4318
+        ],
+        "latency": 125.58511328697205,
+        "cuda_graph_request_count_map": null,
+        "step_count": 6144,
+        "logprobs": [
+            -4.917365074157715,
+            -0.9960631132125854,
+            -7.875392913818359,
+            -0.2993181347846985,
+            -7.760880470275879,
+            -10.308395385742188,
+            -2.1807961463928223,
+            -1.6412583589553833,
+            -9.521512985229492,
+            -1.627489447593689,
+            -1.8410861492156982,
+            -0.9285702705383301,
+            -0.2576955556869507,
+            -0.9641067981719971,
+            -0.02314644306898117,
+            -0.6696561574935913,
+            -0.07035009562969208,
+            -0.004622488282620907,
+            -0.025748632848262787,
+            -0.06276137381792068,
+            -0.17385317385196686,
+            -0.3285445272922516,
+            -0.0592009499669075,
+            -0.007940039038658142,
+            -0.22664028406143188,
+            -0.0017957051750272512,
+            -0.022929180413484573,
+            -0.005733947269618511,
+            -0.0012996093137189746,
+            -0.006419987417757511,
+            -0.02376849390566349,
+            -0.27800270915031433,
+            -0.4650723934173584,
+            -0.04936715215444565,
+            -0.003972141072154045,
+            -0.01477995328605175,
+            -0.0012044801842421293,
+            -0.014891182072460651,
+            -0.002709767082706094,
+            -0.0009939497103914618,
+            -0.0028436246793717146,
+            -0.006759870797395706,
+            -0.15416178107261658,
+            -0.20121537148952484,
+            -0.016414370387792587,
+            -0.0015769677702337503,
+            -0.008138825185596943,
+            -0.0007713441736996174,
+            -0.013819841668009758,
+            -0.003826678032055497,
+            -0.0005918181850574911,
+            -0.0014938872773200274,
+            -0.00485716899856925,
+            -0.081083282828331,
+            -0.09642580896615982,
+            -0.009630884043872356,
+            -0.0010948146227747202,
+            -0.007085552904754877,
+            -0.0006310140597634017,
+            -0.013073914684355259,
+            -0.0039152647368609905,
+            -0.000364713923772797,
+            -0.001292108790948987,
+            -0.004158303141593933,
+            -0.044283974915742874,
+            -0.05722038820385933,
+            -0.006369172595441341,
+            -0.0007976687629707158,
+            -0.005993015132844448,
+            -0.0004935238393954933,
+            -0.011310506612062454,
+            -0.002951553324237466,
+            -0.000387831823900342,
+            -0.000977038755081594,
+            -0.0036971091758459806,
+            -0.030511993914842606,
+            -0.04246694967150688,
+            -0.004863100592046976,
+            -0.0006927236099727452,
+            -0.005206122528761625,
+            -0.0005129451747052372,
+            -0.00894621666520834,
+            -0.0028565814718604088,
+            -0.00041333239641971886,
+            -0.0009002208826132119,
+            -0.0033131728414446115,
+            -0.021188799291849136,
+            -0.03330245241522789,
+            -0.0038543473929166794,
+            -0.0006504327175207436,
+            -0.004474864806979895,
+            -0.00048029806930571795,
+            -0.009718249551951885,
+            -0.0030443770810961723,
+            -0.0003743662964552641,
+            -0.0009439303539693356,
+            -0.003729770192876458,
+            -0.016505014151334763,
+            -0.0290373582392931,
+            -0.003315192647278309,
+            -0.0005821678787469864,
+            -0.004148805979639292,
+            -0.00042489083716645837,
+            -0.006856840569525957,
+            -0.0028660909738391638,
+            -0.00032574593205936253,
+            -0.0006986799417063594,
+            -0.003671098267659545,
+            -0.012792548164725304,
+            -0.02553274855017662,
+            -0.002730690874159336,
+            -0.0005067494930699468,
+            -0.0036923582665622234,
+            -0.0004451475979294628,
+            -0.007225453853607178,
+            -0.002746859099715948,
+            -0.0003323002893012017,
+            -0.0008517451351508498,
+            -0.003630714723840356,
+            -0.011284693144261837,
+            -0.02186688780784607,
+            -0.0025819556321948767,
+            -0.0004931663861498237,
+            -0.0031721293926239014,
+            -0.00040725519647821784,
+            -0.0062383925542235374,
+            -0.00238693761639297,
+            -0.00040749352774582803,
+            -0.0007970731821842492,
+            -0.003604583442211151,
+            -0.010075542144477367,
+            -0.022386692464351654,
+            -0.0025295186787843704,
+            -0.00046302087139338255,
+            -0.003138143103569746,
+            -0.0004233417857903987,
+            -0.006178564392030239,
+            -0.0021037368569523096
+        ]
+    },
+    "127": {
+        "input_prompt": "as before we talk about using , i usually do if else() to in case the property would be null before.\nif today i public a property and i want to use using to instead , will code be like this?\n\npublic Material Mat;\n\nusing(Mat){\n// Do something;\n}\n\nis that right?",
+        "generated_text": " i am not sure if i am using the wrong syntax or if i am using the syntax correctly but i am not sure if i am using the syntax correctly or if i am using the syntax incorrectly.\n\nI am not sure if i am using the wrong syntax or if i am using the syntax correctly but i am not sure if i am using the syntax correctly or if i am using the syntax incorrectly.\n\nI am not sure if i am using the wrong syntax or if i am using the syntax correctly but i am not sure if i am using the syntax correctly or if i am using the syntax incorrectly.\n\nI am not sure if i am using",
+        "generated_tokens": [
+            1623,
+            1855,
+            1605,
+            5257,
+            1693,
+            1623,
+            1855,
+            2505,
+            1278,
+            8462,
+            22692,
+            1505,
+            1693,
+            1623,
+            1855,
+            2505,
+            1278,
+            22692,
+            17047,
+            1809,
+            1623,
+            1855,
+            1605,
+            5257,
+            1693,
+            1623,
+            1855,
+            2505,
+            1278,
+            22692,
+            17047,
+            1505,
+            1693,
+            1623,
+            1855,
+            2505,
+            1278,
+            22692,
+            73751,
+            1338,
+            1073,
+            1855,
+            1605,
+            5257,
+            1693,
+            1623,
+            1855,
+            2505,
+            1278,
+            8462,
+            22692,
+            1505,
+            1693,
+            1623,
+            1855,
+            2505,
+            1278,
+            22692,
+            17047,
+            1809,
+            1623,
+            1855,
+            1605,
+            5257,
+            1693,
+            1623,
+            1855,
+            2505,
+            1278,
+            22692,
+            17047,
+            1505,
+            1693,
+            1623,
+            1855,
+            2505,
+            1278,
+            22692,
+            73751,
+            1338,
+            1073,
+            1855,
+            1605,
+            5257,
+            1693,
+            1623,
+            1855,
+            2505,
+            1278,
+            8462,
+            22692,
+            1505,
+            1693,
+            1623,
+            1855,
+            2505,
+            1278,
+            22692,
+            17047,
+            1809,
+            1623,
+            1855,
+            1605,
+            5257,
+            1693,
+            1623,
+            1855,
+            2505,
+            1278,
+            22692,
+            17047,
+            1505,
+            1693,
+            1623,
+            1855,
+            2505,
+            1278,
+            22692,
+            73751,
+            1338,
+            1073,
+            1855,
+            1605,
+            5257,
+            1693,
+            1623,
+            1855,
+            2505
+        ],
+        "latency": 154.75680470466614,
+        "cuda_graph_request_count_map": null,
+        "step_count": 6144,
+        "logprobs": [
+            -7.793755531311035,
+            -4.224940776824951,
+            -5.446587562561035,
+            -0.267395555973053,
+            -6.118141174316406,
+            -11.105525970458984,
+            -6.67517614364624,
+            -6.902530670166016,
+            -3.2218151092529297,
+            -5.99644660949707,
+            -8.431673049926758,
+            -8.04836368560791,
+            -3.992594003677368,
+            -7.550079345703125,
+            -2.873685836791992,
+            -3.3877997398376465,
+            -7.9306745529174805,
+            -7.005484580993652,
+            -1.0481306314468384,
+            -2.686237335205078,
+            -6.131283760070801,
+            -6.2994704246521,
+            -7.931419372558594,
+            -11.3147554397583,
+            -8.544670104980469,
+            -12.01729679107666,
+            -3.89847469329834,
+            -1.7964364290237427,
+            -2.952878952026367,
+            -1.9217232465744019,
+            -2.272329330444336,
+            -0.37552154064178467,
+            -1.667820692062378,
+            -7.510344505310059,
+            -3.498040199279785,
+            -7.980632305145264,
+            -7.672002792358398,
+            -4.4999470710754395,
+            -7.155375003814697,
+            -2.4486124515533447,
+            -4.785946846008301,
+            -1.153855800628662,
+            -2.3994438648223877,
+            -4.250652313232422,
+            -12.24446964263916,
+            -8.344388008117676,
+            -2.608186721801758,
+            -5.200589179992676,
+            -8.25888442993164,
+            -3.6245617866516113,
+            -7.689338207244873,
+            -7.345355033874512,
+            -1.2661759853363037,
+            -7.265620231628418,
+            -1.9884108304977417,
+            -6.269482612609863,
+            -2.41705584526062,
+            -1.8929681777954102,
+            -1.8259913921356201,
+            -2.0997350215911865,
+            -2.323200225830078,
+            -1.3998825550079346,
+            -0.8789899945259094,
+            -1.082053542137146,
+            -1.1831339597702026,
+            -1.4462857246398926,
+            -1.6481035947799683,
+            -1.4408715963363647,
+            -1.2603964805603027,
+            -1.5267670154571533,
+            -1.6345772743225098,
+            -1.3796477317810059,
+            -0.7609691023826599,
+            -0.3548354506492615,
+            -0.7552334666252136,
+            -0.44776833057403564,
+            -1.1078286170959473,
+            -1.3036658763885498,
+            -0.5214896202087402,
+            -0.8486822843551636,
+            -0.22470997273921967,
+            -0.4705755412578583,
+            -0.5639711022377014,
+            -0.5388108491897583,
+            -0.6052999496459961,
+            -0.1002030223608017,
+            -0.286334365606308,
+            -0.45798981189727783,
+            -1.0107953548431396,
+            -0.11875647306442261,
+            -0.6969441771507263,
+            -0.4609107971191406,
+            -0.07614769786596298,
+            -0.5035472512245178,
+            -0.1682187020778656,
+            -0.10476160794496536,
+            -0.6586751341819763,
+            -0.35806939005851746,
+            -1.5364394187927246,
+            -2.4093759059906006,
+            -1.977368950843811,
+            -1.6216907501220703,
+            -0.27647316455841064,
+            -0.2991848587989807,
+            -0.2783535420894623,
+            -0.05913994088768959,
+            -0.03023873083293438,
+            -0.043339803814888,
+            -0.7320341467857361,
+            -0.0030677898321300745,
+            -0.0332595594227314,
+            -0.012804670259356499,
+            -0.004041599575430155,
+            -0.0014899593079462647,
+            -0.001948602613992989,
+            -0.0029070996679365635,
+            -0.040939707309007645,
+            -0.013942227698862553,
+            -0.04897322878241539,
+            -0.011005887761712074,
+            -0.0044113704934716225,
+            -0.0013179434463381767,
+            -0.003658389439806342,
+            -0.009758152067661285,
+            -0.0014104428701102734,
+            -0.0016671819612383842,
+            -0.000771939754486084,
+            -0.0015519729349762201,
+            -0.003720743814483285,
+            -0.004249115474522114,
+            -0.00485657574608922,
+            -0.005053604021668434,
+            -0.002336274366825819,
+            -0.0009155849111266434,
+            -0.0004978132783435285,
+            -0.0005953923100605607,
+            -0.0011395872570574284,
+            -0.001485078944824636,
+            -0.3072909712791443,
+            -1.7295066118240356,
+            -0.4807289242744446,
+            -0.1245415136218071,
+            -0.011858444660902023,
+            -0.020613837987184525,
+            -0.011020978912711143,
+            -0.003106294432654977,
+            -0.0009966888464987278,
+            -0.0019349202048033476,
+            -0.037407051771879196,
+            -0.0003496989083942026,
+            -0.005922981072217226,
+            -0.007394562941044569,
+            -0.0006037319544702768,
+            -0.0008836655179038644,
+            -0.0002884448622353375,
+            -0.00047600860125385225,
+            -0.0024947968777269125,
+            -0.00442774873226881,
+            -0.004059052560478449,
+            -0.0018594847060739994,
+            -0.0006179092451930046,
+            -0.00022635281493421644,
+            -0.0006730675231665373,
+            -0.003022746881470084,
+            -0.0002343380037928,
+            -0.00047791501856409013,
+            -9.440929716220126e-05,
+            -0.00021550717065110803,
+            -0.0013523490633815527,
+            -0.0032202552538365126,
+            -0.001157686347141862,
+            -0.004449942149221897,
+            -0.0016590891173109412,
+            -0.00101062236353755,
+            -0.0003079893649555743,
+            -0.00048375347978435457,
+            -0.0021734442561864853,
+            -0.00423036003485322,
+            -0.11514264345169067,
+            -0.8658493757247925,
+            -0.084366075694561,
+            -0.02140468917787075,
+            -0.0060798698104918,
+            -0.008638513274490833,
+            -0.003212531330063939,
+            -0.0009598892065696418,
+            -0.00032085992279462516
+        ]
+    },
+    "throughput": 105.49771806099545
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_583m_logitsmatch/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_583m_logitsmatch/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..1491284644d
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_583m_logitsmatch/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,161 @@
+{
+    "0": {
+        "input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.",
+        "generated_text": " And that this is the place where you can be yourself, and be yourself in the most beautiful way. And that this is the place where you can",
+        "generated_tokens": [
+            3060,
+            1455,
+            1593,
+            1395,
+            1278,
+            3535,
+            2478,
+            1636,
+            1710,
+            1402,
+            14019,
+            1044,
+            1321,
+            1402,
+            14019,
+            1294,
+            1278,
+            2725,
+            15568,
+            3039,
+            1046,
+            3060,
+            1455,
+            1593,
+            1395,
+            1278,
+            3535,
+            2478,
+            1636,
+            1710
+        ],
+        "latency": 2.2049803733825684,
+        "cuda_graph_request_count_map": null,
+        "step_count": 240,
+        "logprobs": [
+            -9.358587265014648,
+            -2.7594826221466064,
+            -4.608366012573242,
+            -1.4093360900878906,
+            -0.6152952313423157,
+            -1.7217562198638916,
+            -2.496668815612793,
+            -2.0547454357147217,
+            -2.441960573196411,
+            -6.280838966369629,
+            -1.5643692016601562,
+            -3.462346076965332,
+            -4.428728103637695,
+            -3.8633861541748047,
+            -1.9936373233795166,
+            -1.8929449319839478,
+            -3.796365737915039,
+            -6.8360137939453125,
+            -0.2901247441768646,
+            -0.9246833324432373,
+            -6.633338928222656,
+            -7.166708469390869,
+            -12.771251678466797,
+            -2.198296308517456,
+            -3.7778120040893555,
+            -0.4983733296394348,
+            -4.381269454956055,
+            -0.0666784718632698,
+            -0.09580295532941818,
+            -3.2437636852264404,
+            -10.079947471618652,
+            -1.172220230102539,
+            -5.977442741394043,
+            -5.046236038208008,
+            -3.855658531188965,
+            -2.5585858821868896,
+            -3.356245994567871,
+            -5.557229518890381,
+            -1.6787731647491455,
+            -5.483290672302246,
+            -12.218501091003418,
+            -12.61402702331543,
+            -0.09662941098213196,
+            -2.5431432723999023,
+            -1.4071024656295776,
+            -2.9154715538024902,
+            -1.1964417695999146,
+            -0.006458481773734093,
+            -3.3625335693359375,
+            -13.262511253356934,
+            -4.314079761505127,
+            -2.617699146270752,
+            -5.987792015075684,
+            -0.778266429901123,
+            -0.048888545483350754,
+            -1.548882007598877,
+            -1.1381981372833252,
+            -5.627166748046875,
+            -0.4078553318977356,
+            -4.958505630493164,
+            -0.6187160611152649,
+            -0.7174848914146423,
+            -2.469533920288086,
+            -13.620073318481445,
+            -0.09088654816150665,
+            -3.526974678039551,
+            -1.4195809364318848,
+            -6.402483940124512,
+            -0.5898402333259583,
+            -3.565917491912842,
+            -0.8561318516731262,
+            -1.6140165328979492,
+            -5.370549201965332,
+            -17.159223556518555,
+            -6.583524703979492,
+            -0.8855001926422119,
+            -4.19431209564209,
+            -1.2012220621109009,
+            -2.2563133239746094,
+            -1.7674944400787354,
+            -0.22064533829689026,
+            -9.292220115661621,
+            -0.12445646524429321,
+            -7.29617977142334,
+            -2.526529312133789,
+            -4.071560859680176,
+            -3.5568013191223145,
+            -1.926215410232544,
+            -2.349026918411255,
+            -2.2132363319396973,
+            -0.3125414550304413,
+            -1.4718132019042969,
+            -2.149106740951538,
+            -1.0855519771575928,
+            -1.631832242012024,
+            -1.3751734495162964,
+            -1.9396103620529175,
+            -1.5293723344802856,
+            -0.8444125056266785,
+            -1.2414811849594116,
+            -1.9522171020507812,
+            -2.4338042736053467,
+            -1.5651824474334717,
+            -0.9498789310455322,
+            -1.8044980764389038,
+            -2.356677770614624,
+            -1.247452974319458,
+            -1.550165057182312,
+            -0.5635553598403931,
+            -0.6177330017089844,
+            -0.4778785705566406,
+            -0.020452087745070457,
+            -0.48500269651412964,
+            -0.23854275047779083,
+            -0.06543659418821335,
+            -0.11837350577116013,
+            -0.0585334412753582
+        ]
+    },
+    "throughput": 13.337338555385374
+}
\ No newline at end of file
diff --git a/tests/test_utils/python_scripts/approve_merge_gate.py b/tests/test_utils/python_scripts/approve_merge_gate.py
new file mode 100644
index 00000000000..dbd4ef99b44
--- /dev/null
+++ b/tests/test_utils/python_scripts/approve_merge_gate.py
@@ -0,0 +1,117 @@
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#!/usr/bin/env python3
+"""
+Approve pending deployments for workflow runs from PRs targeting a specific branch.
+
+Requirements:
+    pip install PyGithub
+
+Usage:
+    export GH_TOKEN="ghp_..."
+    export REPO="NVIDIA/Megatron-LM"
+    export TARGET_BRANCH="main"
+    export STATUS="approved"
+    export COMMENT="Auto-approved by CI"
+    
+    python approve_pending_deployments.py
+"""
+
+import logging
+import os
+import re
+import sys
+
+from github import Github, GithubException
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+
+def main():
+    # Get environment variables
+    github_token = os.environ.get("GH_TOKEN")
+    repo_name = os.environ.get("REPO")
+    target_branch = os.environ.get("TARGET_BRANCH")
+    status = os.environ.get("STATUS")
+    comment = os.environ.get("COMMENT", "")
+
+    if not all([github_token, repo_name, target_branch, status]):
+        logger.error(
+            "Error: GITHUB_TOKEN, REPO, TARGET_BRANCH, and STATUS environment variables must be set"
+        )
+        sys.exit(1)
+
+    # Initialize GitHub client
+    g = Github(github_token)
+
+    try:
+        repo = g.get_repo(repo_name)
+    except GithubException as e:
+        logger.error(f"Error accessing repository: {e}")
+        sys.exit(1)
+
+    # Get merge-gate environment ID
+    env_id = None
+    try:
+        # Note: PyGithub doesn't have direct environment support yet,
+        # so we use the underlying requester
+        response = repo._requester.requestJsonAndCheck("GET", f"{repo.url}/environments")
+        for env in response[1].get("environments", []):
+            if env.get("name") == "merge-gate":
+                env_id = env.get("id")
+                break
+
+        if not env_id:
+            logger.error("Error: merge-gate environment not found")
+            sys.exit(1)
+    except GithubException as e:
+        logger.error(f"Error fetching environments: {e}")
+        sys.exit(1)
+
+    logger.info(f"merge-gate environment ID: {env_id}")
+
+    # Get waiting workflow runs
+    try:
+        workflow_runs = repo.get_workflow_runs(status="waiting")
+    except GithubException as e:
+        logger.error(f"Error fetching workflow runs: {e}")
+        sys.exit(1)
+
+    logger.info(f"Found {workflow_runs.totalCount} waiting workflow runs")
+
+    # Process each workflow run
+    for run in workflow_runs:
+        head_branch = run.head_branch
+
+        # Extract PR number from branch pattern pull-request/(\d+)
+        match = re.search(r"gh-readonly-queue/([^/]+)/pr-(\d+)-", head_branch)
+        if not match:
+            logger.info(f"Skipping Run #{run.id} on {head_branch}: not a PR branch")
+            continue
+
+        branch_name = match.group(1)
+        pr_number = int(match.group(2))
+        logger.info(f"Processing PR #{pr_number} from run {run.id}")
+
+        if branch_name != target_branch:
+            logger.info(f"Skipping run {run.id}: targets {branch_name}, not {target_branch}")
+            continue
+
+        logger.info(f"Processing PR #{pr_number} from run {run.id} (branch: {branch_name})")
+
+        # Approve pending deployment
+        try:
+            # PyGithub doesn't have direct support for pending deployments API
+            # Use the underlying requester
+            repo._requester.requestJsonAndCheck(
+                "POST",
+                f"{repo.url}/actions/runs/{run.id}/pending_deployments",
+                input={"environment_ids": [env_id], "state": status, "comment": comment},
+            )
+            logger.info(f"✓ Successfully updated deployment for run {run.id} (PR #{pr_number})")
+        except GithubException as e:
+            logger.info(f"✗ Failed to update deployment for run {run.id}: {e}", file=sys.stderr)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/test_utils/recipes/unit-tests.yaml b/tests/test_utils/recipes/unit-tests.yaml
index d84e507c6df..5d2243a94af 100644
--- a/tests/test_utils/recipes/unit-tests.yaml
+++ b/tests/test_utils/recipes/unit-tests.yaml
@@ -59,6 +59,27 @@ spec:
     cp coverage.xml {assets_dir}
 
 products:
+  - test_case: [tests/unit_tests/test_fp8_param.py]
+    products:
+      - environment: [lts, dev]
+        tag: [latest, legacy]
+        scope: [unit-tests]
+        n_repeat: [1]
+        time_limit: [1800]
+  - test_case: [tests/unit_tests/pipeline_parallel/**/*.py]
+    products:
+      - environment: [lts, dev]
+        tag: [latest, legacy]
+        scope: [unit-tests]
+        n_repeat: [1]
+        time_limit: [1800]
+  - test_case: [tests/unit_tests/models/**/*.py]
+    products:
+      - environment: [lts, dev]
+        tag: [latest, legacy]
+        scope: [unit-tests]
+        n_repeat: [1]
+        time_limit: [1800]
   - test_case: [tests/unit_tests/data/**/*.py]
     products:
       - environment: [lts, dev]

From 4fc8520d913fc63de37320c2c142f4d8462bdcbb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Sat, 25 Oct 2025 16:29:53 +0200
Subject: [PATCH 058/334] ci: Fix branch of approval bot (#1945)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 .gitlab/stages/05.publish.yml | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/.gitlab/stages/05.publish.yml b/.gitlab/stages/05.publish.yml
index 20495434f6b..f4f1c153ad3 100644
--- a/.gitlab/stages/05.publish.yml
+++ b/.gitlab/stages/05.publish.yml
@@ -772,7 +772,11 @@ publish:approve_merge_gate:
 
       export GH_TOKEN=$GH_TOKEN
       export REPO=NVIDIA/Megatron-LM
-      export TARGET_BRANCH="$CI_COMMIT_BRANCH"
+      if [[ "$CI_COMMIT_BRANCH" == *main* ]]; then
+        export TARGET_BRANCH="main"
+      elif [[ "$CI_COMMIT_BRANCH" == *dev* ]]; then
+        export TARGET_BRANCH="dev"
+      fi
 
       if [[ $EXIT_CODE -eq 0 ]]; then
         export STATUS="approved"

From 574a0095b44d0a2a3e87ad85d0477fd3618bed24 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Sun, 26 Oct 2025 08:47:16 +0000
Subject: [PATCH 059/334] ci(fix): Approval gate
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 .gitlab/stages/05.publish.yml | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/.gitlab/stages/05.publish.yml b/.gitlab/stages/05.publish.yml
index f4f1c153ad3..d97f457621a 100644
--- a/.gitlab/stages/05.publish.yml
+++ b/.gitlab/stages/05.publish.yml
@@ -768,6 +768,12 @@ publish:approve_merge_gate:
       pip install --no-cache-dir python-gitlab click pygithub
       export GITLAB_ENDPOINT
       export RO_API_TOKEN=${PROJECT_ACCESS_TOKEN_MCORE}
+      if [[ "$CI_COMMIT_BRANCH" == *main* ]]; then
+        export TARGET_BRANCH="main"
+      elif [[ "$CI_COMMIT_BRANCH" == *dev* ]]; then
+        export TARGET_BRANCH="dev"
+      fi
+
       python tests/test_utils/python_scripts/check_status_of_main.py --target-branch "$CI_COMMIT_BRANCH" --once || EXIT_CODE=$?
 
       export GH_TOKEN=$GH_TOKEN

From 8243834d39bbd641db38581b0e335a127b808743 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Sun, 26 Oct 2025 08:56:37 +0000
Subject: [PATCH 060/334] ci: Approval gate
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 .gitlab/stages/05.publish.yml | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/.gitlab/stages/05.publish.yml b/.gitlab/stages/05.publish.yml
index d97f457621a..cf561727a49 100644
--- a/.gitlab/stages/05.publish.yml
+++ b/.gitlab/stages/05.publish.yml
@@ -778,11 +778,6 @@ publish:approve_merge_gate:
 
       export GH_TOKEN=$GH_TOKEN
       export REPO=NVIDIA/Megatron-LM
-      if [[ "$CI_COMMIT_BRANCH" == *main* ]]; then
-        export TARGET_BRANCH="main"
-      elif [[ "$CI_COMMIT_BRANCH" == *dev* ]]; then
-        export TARGET_BRANCH="dev"
-      fi
 
       if [[ $EXIT_CODE -eq 0 ]]; then
         export STATUS="approved"

From 106516c91ad9229e66417ad5193c98970cd33275 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Sun, 26 Oct 2025 08:58:47 +0000
Subject: [PATCH 061/334] ci: Approval gate rule
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 .gitlab/stages/05.publish.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.gitlab/stages/05.publish.yml b/.gitlab/stages/05.publish.yml
index cf561727a49..f2d229f1cc5 100644
--- a/.gitlab/stages/05.publish.yml
+++ b/.gitlab/stages/05.publish.yml
@@ -791,6 +791,6 @@ publish:approve_merge_gate:
   retry:
     max: 2
   rules:
-    - if: $CI_PIPELINE_SOURCE == "schedule" || ($CI_COMMIT_BRANCH == 'ci-approve-dev' || $CI_COMMIT_BRANCH == 'ci-approve-main')
+    - if: $CI_PIPELINE_SOURCE == "schedule" && ($CI_COMMIT_BRANCH == 'ci-approve-dev' || $CI_COMMIT_BRANCH == 'ci-approve-main')
       when: always
     - when: never

From ef48a1309f2b8889373823a5346e0fbad74ea94d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Sun, 26 Oct 2025 09:01:44 +0000
Subject: [PATCH 062/334] ci: Update golden values dev
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 .../golden_values_lts_dgxa100_dracooci.json   | 200 +++++++++---------
 .../golden_values_lts_dgxa100_dracooci.json   | 100 ++++-----
 2 files changed, 150 insertions(+), 150 deletions(-)

diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap/golden_values_lts_dgxa100_dracooci.json
index ec432ff7884..56a53cbf6ca 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap/golden_values_lts_dgxa100_dracooci.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap/golden_values_lts_dgxa100_dracooci.json
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 21.09115,
-            "2": 0.41164,
-            "3": 0.38182,
-            "4": 0.38049,
-            "5": 0.60969,
-            "6": 0.36583,
-            "7": 0.36416,
-            "8": 0.37604,
-            "9": 0.3679,
-            "10": 0.36785,
-            "11": 0.36954,
-            "12": 0.36975,
-            "13": 0.36874,
-            "14": 0.36917,
-            "15": 0.37218,
-            "16": 0.37039,
-            "17": 0.36749,
-            "18": 0.36956,
-            "19": 0.37349,
-            "20": 0.37202,
-            "21": 0.36788,
-            "22": 0.37092,
-            "23": 0.36616,
-            "24": 0.36575,
-            "25": 0.36576,
-            "26": 0.36657,
-            "27": 0.36754,
-            "28": 0.36677,
-            "29": 0.36466,
-            "30": 0.36792,
-            "31": 0.36536,
-            "32": 0.36562,
-            "33": 0.36872,
-            "34": 0.36339,
-            "35": 0.36568,
-            "36": 0.36568,
-            "37": 0.36366,
-            "38": 0.36485,
-            "39": 0.36421,
-            "40": 0.35995,
-            "41": 0.36131,
-            "42": 0.36351,
-            "43": 0.36398,
-            "44": 0.3645,
-            "45": 0.359,
-            "46": 0.3614,
-            "47": 0.35954,
-            "48": 0.36106,
-            "49": 0.36508,
-            "50": 0.36162,
-            "51": 0.36692,
-            "52": 0.36519,
-            "53": 0.3602,
-            "54": 0.36089,
-            "55": 0.36195,
-            "56": 0.35943,
-            "57": 0.36048,
-            "58": 0.36032,
-            "59": 0.36446,
-            "60": 0.36455,
-            "61": 0.36016,
-            "62": 0.36345,
-            "63": 0.3602,
-            "64": 0.36067,
-            "65": 0.36076,
-            "66": 0.36538,
-            "67": 0.57124,
-            "68": 0.36375,
-            "69": 0.36298,
-            "70": 0.3623,
-            "71": 0.36583,
-            "72": 0.36199,
-            "73": 0.36503,
-            "74": 0.3612,
-            "75": 0.36467,
-            "76": 0.36386,
-            "77": 0.36345,
-            "78": 0.36764,
-            "79": 0.36585,
-            "80": 0.36636,
-            "81": 0.36354,
-            "82": 0.36426,
-            "83": 0.36781,
-            "84": 0.58958,
-            "85": 0.36576,
-            "86": 0.36705,
-            "87": 0.36285,
-            "88": 0.3685,
-            "89": 0.36603,
-            "90": 0.36553,
-            "91": 0.36328,
-            "92": 0.36279,
-            "93": 0.36243,
-            "94": 0.3647,
-            "95": 0.3673,
-            "96": 0.36551,
-            "97": 0.36297,
-            "98": 0.36326,
-            "99": 0.3621,
-            "100": 0.36226
+            "1": 20.13148,
+            "2": 0.19658,
+            "3": 0.16932,
+            "4": 0.16925,
+            "5": 0.16695,
+            "6": 0.16969,
+            "7": 0.4281,
+            "8": 0.16351,
+            "9": 0.16208,
+            "10": 0.37746,
+            "11": 0.16397,
+            "12": 0.16616,
+            "13": 0.16752,
+            "14": 0.16658,
+            "15": 0.16626,
+            "16": 0.16687,
+            "17": 0.16684,
+            "18": 0.16721,
+            "19": 0.16647,
+            "20": 0.16786,
+            "21": 0.16027,
+            "22": 0.16375,
+            "23": 0.15995,
+            "24": 0.16197,
+            "25": 0.16052,
+            "26": 0.16097,
+            "27": 0.16002,
+            "28": 0.16159,
+            "29": 0.15911,
+            "30": 0.16097,
+            "31": 0.15974,
+            "32": 0.162,
+            "33": 0.15978,
+            "34": 0.16068,
+            "35": 0.16093,
+            "36": 0.16084,
+            "37": 0.16071,
+            "38": 0.16241,
+            "39": 0.15964,
+            "40": 0.16151,
+            "41": 0.16012,
+            "42": 0.16055,
+            "43": 0.15998,
+            "44": 0.16159,
+            "45": 0.16019,
+            "46": 0.16043,
+            "47": 0.16108,
+            "48": 0.16025,
+            "49": 0.15985,
+            "50": 0.16067,
+            "51": 0.17029,
+            "52": 0.16714,
+            "53": 0.16748,
+            "54": 0.16511,
+            "55": 0.1671,
+            "56": 0.1665,
+            "57": 0.16873,
+            "58": 0.16673,
+            "59": 0.16609,
+            "60": 0.16583,
+            "61": 0.1659,
+            "62": 0.16564,
+            "63": 0.16874,
+            "64": 0.16698,
+            "65": 0.1663,
+            "66": 0.16574,
+            "67": 0.16591,
+            "68": 0.16649,
+            "69": 0.16691,
+            "70": 0.16656,
+            "71": 0.16678,
+            "72": 0.16455,
+            "73": 0.16685,
+            "74": 0.16559,
+            "75": 0.1703,
+            "76": 0.1649,
+            "77": 0.16778,
+            "78": 0.16543,
+            "79": 0.16601,
+            "80": 0.1648,
+            "81": 0.17029,
+            "82": 0.16906,
+            "83": 0.17088,
+            "84": 0.16504,
+            "85": 0.16803,
+            "86": 0.16513,
+            "87": 0.16682,
+            "88": 0.16712,
+            "89": 0.16999,
+            "90": 0.1666,
+            "91": 0.16704,
+            "92": 0.16919,
+            "93": 0.17188,
+            "94": 0.17115,
+            "95": 0.16965,
+            "96": 0.1671,
+            "97": 0.16712,
+            "98": 0.17096,
+            "99": 0.16958,
+            "100": 0.16893
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_lts_dgxa100_dracooci.json
index 516c7e99194..9e89b4bc3ee 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_lts_dgxa100_dracooci.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_lts_dgxa100_dracooci.json
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 19.94048,
-            "2": 0.39367,
-            "3": 0.37589,
-            "4": 0.37388,
-            "5": 0.66307,
-            "6": 0.36351,
-            "7": 0.3595,
-            "8": 0.36116,
-            "9": 0.36043,
-            "10": 0.35758,
-            "11": 0.36057,
-            "12": 0.35963,
-            "13": 0.36072,
-            "14": 0.35903,
-            "15": 0.35994,
-            "16": 0.35763,
-            "17": 0.36245,
-            "18": 0.35747,
-            "19": 0.35878,
-            "20": 0.35982,
-            "21": 0.35849,
-            "22": 0.35936,
-            "23": 0.35823,
-            "24": 0.35778,
-            "25": 0.3606,
-            "26": 0.35907,
-            "27": 0.35852,
-            "28": 0.35911,
-            "29": 0.35837,
-            "30": 0.35815,
-            "31": 0.35909,
-            "32": 0.35701,
-            "33": 0.3602,
-            "34": 0.35976,
-            "35": 0.36009,
-            "36": 0.35943,
-            "37": 0.35776,
-            "38": 0.35664,
-            "39": 0.36098,
-            "40": 0.35836,
-            "41": 0.35857,
-            "42": 0.35915,
-            "43": 0.3572,
-            "44": 0.35779,
-            "45": 0.36243,
-            "46": 0.35772,
-            "47": 0.35984,
-            "48": 0.35743,
-            "49": 0.35726,
-            "50": 0.35872
+            "1": 19.01834,
+            "2": 0.19131,
+            "3": 0.16463,
+            "4": 0.17624,
+            "5": 0.16919,
+            "6": 0.16375,
+            "7": 0.16414,
+            "8": 0.16407,
+            "9": 0.16499,
+            "10": 0.16212,
+            "11": 0.16324,
+            "12": 0.16316,
+            "13": 0.16134,
+            "14": 0.16068,
+            "15": 0.16212,
+            "16": 0.16071,
+            "17": 0.1623,
+            "18": 0.16066,
+            "19": 0.16307,
+            "20": 0.16502,
+            "21": 0.16536,
+            "22": 0.16572,
+            "23": 0.16545,
+            "24": 0.16393,
+            "25": 0.16484,
+            "26": 0.16386,
+            "27": 0.16204,
+            "28": 0.16264,
+            "29": 0.16076,
+            "30": 0.16134,
+            "31": 0.15999,
+            "32": 0.1604,
+            "33": 0.16121,
+            "34": 0.16175,
+            "35": 0.16122,
+            "36": 0.16498,
+            "37": 0.16166,
+            "38": 0.1626,
+            "39": 0.16244,
+            "40": 0.16183,
+            "41": 0.16437,
+            "42": 0.16175,
+            "43": 0.1628,
+            "44": 0.16269,
+            "45": 0.16111,
+            "46": 0.16288,
+            "47": 0.16257,
+            "48": 0.16123,
+            "49": 0.15922,
+            "50": 0.16035
         }
     }
 }
\ No newline at end of file

From 3281c010a2f7829d8274d13abade26632edda13c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Sun, 26 Oct 2025 10:51:02 +0000
Subject: [PATCH 063/334] ci: Approval gate
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 .gitlab/stages/05.publish.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.gitlab/stages/05.publish.yml b/.gitlab/stages/05.publish.yml
index f2d229f1cc5..68388419a6e 100644
--- a/.gitlab/stages/05.publish.yml
+++ b/.gitlab/stages/05.publish.yml
@@ -774,7 +774,7 @@ publish:approve_merge_gate:
         export TARGET_BRANCH="dev"
       fi
 
-      python tests/test_utils/python_scripts/check_status_of_main.py --target-branch "$CI_COMMIT_BRANCH" --once || EXIT_CODE=$?
+      python tests/test_utils/python_scripts/check_status_of_main.py --target-branch "$TARGET_BRANCH" --once || EXIT_CODE=$?
 
       export GH_TOKEN=$GH_TOKEN
       export REPO=NVIDIA/Megatron-LM

From 8fe0c3a563a1b1d76f92914bf7242c5f5529e90b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Sun, 26 Oct 2025 12:19:29 +0000
Subject: [PATCH 064/334] ci: Approval bot
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 .gitlab/stages/05.publish.yml                 | 10 ++++--
 .../python_scripts/check_status_of_main.py    | 32 ++++++++++++-------
 tools/autoformat.sh                           |  2 +-
 3 files changed, 30 insertions(+), 14 deletions(-)

diff --git a/.gitlab/stages/05.publish.yml b/.gitlab/stages/05.publish.yml
index 68388419a6e..024ec2aa490 100644
--- a/.gitlab/stages/05.publish.yml
+++ b/.gitlab/stages/05.publish.yml
@@ -782,12 +782,18 @@ publish:approve_merge_gate:
       if [[ $EXIT_CODE -eq 0 ]]; then
         export STATUS="approved"
         export COMMENT="Main is healthy. Submitting PR."
-      else
+      elif [[ $EXIT_CODE -eq 1 ]]; then
         export STATUS="rejected"
         export COMMENT="Main is not healthy. An automation engineer is investigating. No need to take any action."
+      elif [[ $EXIT_CODE -eq 2 ]]; then
+        echo "Main is running. We won't cancel the deployment."
+        exit 0
+      fi
+
+      if [[ $EXIT_CODE -lt 2 ]]; then
+        python tests/test_utils/python_scripts/approve_merge_gate.py
       fi
 
-      python tests/test_utils/python_scripts/approve_merge_gate.py
   retry:
     max: 2
   rules:
diff --git a/tests/test_utils/python_scripts/check_status_of_main.py b/tests/test_utils/python_scripts/check_status_of_main.py
index 16f80e6dcf6..a1cae393bfb 100644
--- a/tests/test_utils/python_scripts/check_status_of_main.py
+++ b/tests/test_utils/python_scripts/check_status_of_main.py
@@ -43,22 +43,26 @@ def most_recent_pipeline(target_branch: str):
 
 def is_pending(target_branch: str):
     pipeline = most_recent_pipeline(target_branch)
-    is_pending = (
-        pipeline.attributes['status'] == 'pending' or pipeline.attributes['status'] == 'running'
-    )
-    is_canceled = pipeline.attributes['status'] == 'canceled'
+    PENDING_STATUSES = [
+        "created",
+        "waiting_for_resource",
+        "preparing",
+        "pending",
+        "running",
+        "canceled",
+        "skipped",
+        "manual",
+        "scheduled",
+    ]
+
+    is_pending = pipeline.attributes['status'] in PENDING_STATUSES
 
     if not is_pending:
         logger.info(
             f"Main pipeline {pipeline.id} finished with status {pipeline.attributes['status']}"
         )
 
-    return is_pending or is_canceled
-
-
-def is_sucess(target_branch: str):
-    pipeline = most_recent_pipeline(target_branch)
-    return pipeline.attributes['status'] == 'success'
+    return is_pending
 
 
 @click.command()
@@ -71,12 +75,18 @@ def main(target_branch: str, continuous: bool):
             break
         time.sleep(60)
 
-    if not is_sucess(target_branch=target_branch):
+    pipeline = most_recent_pipeline(target_branch)
+
+    if pipeline.attributes['status'] == 'failed':
         logger.error(
             "Main is broken, we're therefore blocking your merge. Please wait until main is fixed again by checking the repo's front page. If the status is green again, you can re-attempt the merge. Feel free to ping the team if you have any questions."
         )
         sys.exit(1)
 
+    if pipeline.attributes['status'] == 'running':
+        logger.info("Main is running, we won't cancel the deployment.")
+        sys.exit(2)
+
     sys.exit(0)
 
 
diff --git a/tools/autoformat.sh b/tools/autoformat.sh
index 85d1d19c7cb..fffc7725eb4 100755
--- a/tools/autoformat.sh
+++ b/tools/autoformat.sh
@@ -15,7 +15,7 @@ CHECK_ONLY=${CHECK_ONLY:-false}
 SKIP_DOCS=${SKIP_DOCS:-false}
 
 BASE_REF=${BASE_REF:-main}
-git remote set-url origin "https://${GITLAB_ENDPOINT}/$CI_PROJECT_NAMESPACE/megatron-lm.git"
+git remote set-url origin "https://github.com/NVIDIA/Megatron-LM.git"
 git fetch origin ${BASE_REF}
 CHANGED_FILES=$(git diff --name-only --diff-filter=d --merge-base origin/${BASE_REF} megatron/core tests/ | grep '\.py$' || true)
 ADDITIONAL_ARGS=""

From a33936d0b169c72f27e2b66680c869ae83d48015 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Sun, 26 Oct 2025 14:51:06 +0000
Subject: [PATCH 065/334] ci: Increase time limit for main tests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 .gitlab-ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 5ddf5f094c2..6523c4a1973 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -41,7 +41,7 @@ workflow:
         FUNCTIONAL_TEST_SCOPE: mr
         FUNCTIONAL_TEST_REPEAT: 5
         FUNCTIONAL_TEST_RECORD_CHECKPOINTS: "no"
-        FUNCTIONAL_TEST_TIME_LIMIT: 2700
+        FUNCTIONAL_TEST_TIME_LIMIT: 3600
         CLUSTER_A100: ""
         CLUSTER_H100: ""
         PUBLISH: "no"

From 51768236aad5e2dccbdbae68ef2032bc8ae44604 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Sun, 26 Oct 2025 14:04:08 +0100
Subject: [PATCH 066/334] ci: Auto-assign milestone (#1952)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 .github/workflows/auto-assign-milestone.yml | 74 +++++++++++++++++++++
 1 file changed, 74 insertions(+)
 create mode 100644 .github/workflows/auto-assign-milestone.yml

diff --git a/.github/workflows/auto-assign-milestone.yml b/.github/workflows/auto-assign-milestone.yml
new file mode 100644
index 00000000000..7eae6838332
--- /dev/null
+++ b/.github/workflows/auto-assign-milestone.yml
@@ -0,0 +1,74 @@
+name: Auto-assign Milestone to PR
+
+on:
+  push:
+    branches:
+      - "pull-request/[0-9]+"
+
+permissions:
+  contents: read
+  pull-requests: write
+  issues: write
+
+jobs:
+  assign-milestone:
+    runs-on: ubuntu-latest
+    environment: nemo-ci
+    steps:
+      - name: Get PR info
+        id: get-pr-info
+        if: startsWith(github.ref, 'refs/heads/pull-request/')
+        uses: nv-gha-runners/get-pr-info@main
+
+      - name: Check if PR has milestone
+        id: check_milestone
+        env:
+          GH_TOKEN: ${{ secrets.PAT }}
+        run: |
+          MILESTONE=$(gh pr view ${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number }} \
+            --repo ${{ github.repository }} \
+            --json milestone \
+            --jq '.milestone.title')
+
+          if [ "$MILESTONE" = "null" ] || [ -z "$MILESTONE" ]; then
+            echo "has_milestone=false" >> $GITHUB_OUTPUT
+          else
+            echo "has_milestone=true" >> $GITHUB_OUTPUT
+            echo "PR already has milestone: $MILESTONE"
+          fi
+
+      - name: Get most recent open milestone
+        if: steps.check_milestone.outputs.has_milestone == 'false'
+        id: get_milestone
+        env:
+          GH_TOKEN: ${{ secrets.PAT }}
+        run: |
+          # Get the most recent open milestone (sorted by due date, then by creation date)
+          MILESTONE_NUMBER=$(gh api \
+            "repos/${{ github.repository }}/milestones?state=open&sort=due_on&direction=desc" \
+            --jq '.[0].number')
+
+          MILESTONE_TITLE=$(gh api \
+            "repos/${{ github.repository }}/milestones?state=open&sort=due_on&direction=desc" \
+            --jq '.[0].title')
+
+          if [ -z "$MILESTONE_NUMBER" ] || [ "$MILESTONE_NUMBER" = "null" ]; then
+            echo "No open milestones found"
+            echo "milestone_found=false" >> $GITHUB_OUTPUT
+          else
+            echo "milestone_found=true" >> $GITHUB_OUTPUT
+            echo "milestone_number=$MILESTONE_NUMBER" >> $GITHUB_OUTPUT
+            echo "milestone_title=$MILESTONE_TITLE" >> $GITHUB_OUTPUT
+            echo "Found milestone: $MILESTONE_TITLE (number: $MILESTONE_NUMBER)"
+          fi
+
+      - name: Assign milestone to PR
+        if: steps.check_milestone.outputs.has_milestone == 'false' && steps.get_milestone.outputs.milestone_found == 'true'
+        env:
+          GH_TOKEN: ${{ secrets.PAT }}
+        run: |
+          gh pr edit ${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number }} \
+            --repo ${{ github.repository }} \
+            --milestone "${{ steps.get_milestone.outputs.milestone_title }}"
+
+          echo "✅ Assigned milestone '${{ steps.get_milestone.outputs.milestone_title }}' to PR #${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number }}"

From 4b6ba6019a677f3f806c4f2eb4de3ea46fc83de0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Sun, 26 Oct 2025 19:01:05 +0100
Subject: [PATCH 067/334] ci: Run on push to release branch (#1960) (#1962)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 .gitlab-ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 6523c4a1973..53574fdea22 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -33,7 +33,7 @@ workflow:
     - if: $CI_PIPELINE_SOURCE == "web"
 
     # For push to main
-    - if: $CI_PIPELINE_SOURCE == 'push' && ($CI_COMMIT_BRANCH == "main" || $CI_COMMIT_BRANCH == "dev")
+    - if: $CI_PIPELINE_SOURCE == 'push' && ($CI_COMMIT_BRANCH == "main" || $CI_COMMIT_BRANCH == "dev" || $CI_COMMIT_BRANCH =~ /^core_/)
       variables:
         UNIT_TEST: "no"
         INTEGRATION_TEST: "no"

From 221747d02b827ff71858e69c687665198b45debc Mon Sep 17 00:00:00 2001
From: Deyu Fu <Deyu.Foo@gmail.com>
Date: Mon, 27 Oct 2025 12:20:00 +0800
Subject: [PATCH 068/334] [DEV] support split qkv in muon (#1915)

---
 megatron/core/optimizer/muon.py             | 118 +++++++++++---------
 megatron/core/optimizer/optimizer_config.py |   7 +-
 megatron/core/tensor_parallel/layers.py     |   3 +-
 megatron/training/arguments.py              |  10 +-
 pyproject.toml                              |   2 +-
 tests/unit_tests/test_muon_optimizer.py     |   3 +-
 tests/unit_tests/test_optimizer.py          |  19 +---
 uv.lock                                     |   4 +-
 8 files changed, 91 insertions(+), 75 deletions(-)

diff --git a/megatron/core/optimizer/muon.py b/megatron/core/optimizer/muon.py
index d2dc7533bf9..700ad17e630 100644
--- a/megatron/core/optimizer/muon.py
+++ b/megatron/core/optimizer/muon.py
@@ -1,10 +1,9 @@
-# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 """Megatron muon optimizer wrapper to handle tensor-parallel."""
 
 import logging
-from functools import partial
-from typing import Callable, List, Literal, Optional
+from typing import Any, Callable, List, Literal, Optional
 
 import torch
 from torch.optim.optimizer import ParamsT
@@ -65,35 +64,36 @@ def __init__(
         if num_ns_steps < 1:
             raise ValueError(f"num_ns_steps must be at least 1, got {num_ns_steps}")
 
-        orthogonalize_fn = partial(
-            newton_schulz_tp,
-            steps=num_ns_steps,
-            coefficient_type=coefficient_type,
-            mode="duplicated" if mode == "blockwise" else mode,
-        )
-        scale_factor_fn = partial(
-            get_muon_scale_factor, mode=scale_mode, extra_scale_factor=extra_scale_factor
-        )
-
-        def orthogonalize_fn_tp(
-            x: torch.Tensor,
+        def scaled_orthogonalize_fn(
+            grad: torch.Tensor,
             tp_group: torch.distributed.ProcessGroup,
             partition_dim: int | None = None,
         ) -> torch.Tensor:
-            return orthogonalize_fn(x, tp_group=tp_group, partition_dim=partition_dim)
-
-        def scale_factor_fn_tp(
-            size_out: int, size_in: int, partition_dim: int | None = None
-        ) -> float:
-            if partition_dim is None:
-                return scale_factor_fn(size_out, size_in)
-
-            size = [size_out, size_in]
-            size[partition_dim] *= get_pg_size(pg_collection.tp) if pg_collection else 1
-            return scale_factor_fn(*size)
+            log_single_rank(
+                logger,
+                logging.DEBUG,
+                f'Orthogonalizing grad with {num_ns_steps} steps, {coefficient_type} coefficient, '
+                f'{scale_mode} scale mode, extra_scale_factor={extra_scale_factor}',
+            )
+            size = [grad.size(-2), grad.size(-1)]
+            if partition_dim:
+                size[partition_dim] *= get_pg_size(tp_group)
+            orth_grad = newton_schulz_tp(
+                grad,
+                steps=num_ns_steps,
+                coefficient_type=coefficient_type,
+                tp_group=tp_group,
+                partition_dim=partition_dim,
+                mode="duplicated" if mode == "blockwise" else mode,
+            )
+            scale_factor = get_muon_scale_factor(size[0], size[1], mode=scale_mode)
+            return orth_grad * scale_factor * extra_scale_factor
 
         self.pg_collection = pg_collection
         self.mode = mode
+        self.split_qkv = split_qkv
+        self.is_qkv_fn = is_qkv_fn
+        self.qkv_split_shapes = qkv_split_shapes
 
         super().__init__(
             params,
@@ -102,15 +102,11 @@ def scale_factor_fn_tp(
             use_nesterov,
             weight_decay,
             use_decoupled_weight_decay,
-            split_qkv,
-            is_qkv_fn,
-            qkv_split_shapes,
             fp32_matmul_prec,
-            orthogonalize_fn_tp,
-            scale_factor_fn_tp,
+            scaled_orthogonalize_fn,
         )
 
-    def orthogonalize(self, p: torch.Tensor, grad: torch.Tensor) -> torch.Tensor:
+    def orthogonalize(self, p: torch.Tensor, grad: torch.Tensor, **kwargs: Any) -> torch.Tensor:
         """Orthogonalize the momentum.
 
         Args:
@@ -122,6 +118,7 @@ def orthogonalize(self, p: torch.Tensor, grad: torch.Tensor) -> torch.Tensor:
         Returns:
             The orthogonalized gradient tensor.
         """
+        # TODO(deyuf): switch to group
         if self.pg_collection:
             tp_group = (
                 self.pg_collection.expt_tp
@@ -135,27 +132,33 @@ def orthogonalize(self, p: torch.Tensor, grad: torch.Tensor) -> torch.Tensor:
             # llm-shower use different default value for partition_dim than TE.
             # Because -1 is a valid index for ndarray, we decided to not overload it.
             partition_dim = None
+
         if self.split_qkv and self.is_qkv_fn(p):  # type: ignore[misc]
             # split grouped attention parameters (e.g., QKV, GQA, etc.)
-            qkv_grads = torch.split(grad, self.qkv_split_shapes, dim=0)
+            grad_shape = grad.shape
+            log_single_rank(
+                logger,
+                logging.DEBUG,
+                f'qkv split grad shape {grad_shape}, split shapes {self.qkv_split_shapes}',
+            )
+            num_query_groups = grad_shape[0] // sum(self.qkv_split_shapes)
+            qkv_grads = torch.split(
+                grad.view(num_query_groups, sum(self.qkv_split_shapes), -1),
+                self.qkv_split_shapes,
+                dim=1,
+            )
+            qkv_grads = [g.reshape(-1, grad_shape[-1]) for g in qkv_grads]
 
-            # Apply Newton-Schulz to each component
-            qkv_whitened = [
-                self.orthogonalize_fn(g, tp_group=tp_group, partition_dim=partition_dim)
+            # Apply Newton-Schulz and scales to each component, concat back
+            qkv_grads = [
+                self.scaled_orthogonalize_fn(g, tp_group, partition_dim).view(
+                    num_query_groups, -1, grad_shape[-1]
+                )
                 for g in qkv_grads
             ]
-            qkv_scales = [
-                self.scale_factor_fn(g.size(0), g.size(1), partition_dim) for g in qkv_grads
-            ]
-
-            # Apply individual scales to each component and concatenate
-            grad = torch.cat(
-                [whitened * scale for whitened, scale in zip(qkv_whitened, qkv_scales)]
-            )
+            grad = torch.cat(qkv_grads, dim=1).view(grad_shape)
         else:
-            grad = self.orthogonalize_fn(
-                grad, tp_group=tp_group, partition_dim=partition_dim
-            ) * self.scale_factor_fn(grad.size(0), grad.size(1), partition_dim)
+            grad = self.scaled_orthogonalize_fn(grad, tp_group, partition_dim)
         return grad
 
 
@@ -206,7 +209,18 @@ def get_megatron_muon_optimizer(
     # record list of non/linear params
     linear_params = []
     nonlinear_params = []
+
     for model_chunk in model_chunks:
+        # use config to determine qkv split shapes.
+        # no need to check tp since tp splits by head and this is per head(group) dimension
+        num_attention_heads = model_chunk.config.num_attention_heads
+        num_query_groups = model_chunk.config.num_query_groups
+        kv_channels = model_chunk.config.kv_channels
+        qkv_split_shapes = [
+            num_attention_heads // num_query_groups * kv_channels,
+            kv_channels,
+            kv_channels,
+        ]
         for name, param in model_chunk.named_parameters():
             if not param.requires_grad:
                 continue
@@ -215,6 +229,10 @@ def get_megatron_muon_optimizer(
             # change in optimizer
             if 'experts' in name and 'shared' not in name:
                 param.expert_tp = True
+            # add flag for qkv parameter
+            # TODO(deyuf): support MLA
+            if 'linear_qkv.weight' in name and len(param.shape) == 2:
+                param.is_qkv = True
             # TODO(deyuf): might not be sufficient for future algorithm. revisit this conditioning
             if not getattr(param, 'is_embedding_or_output_parameter', False) and not (
                 len(param.shape) == 1
@@ -238,7 +256,6 @@ def get_megatron_muon_optimizer(
         decoupled_min_lr=config.decoupled_min_lr,
     )
 
-    # TODO(deyuf): support qkv split
     optimizer = TensorParallelMuon(
         linear_param_groups,
         lr=config.lr,
@@ -248,8 +265,9 @@ def get_megatron_muon_optimizer(
         fp32_matmul_prec=config.muon_fp32_matmul_prec,
         num_ns_steps=config.muon_num_ns_steps,
         scale_mode=config.muon_scale_mode,
-        split_qkv=False,
-        qkv_split_shapes=None,
+        split_qkv=config.muon_split_qkv,
+        is_qkv_fn=lambda p: getattr(p, 'is_qkv', False),
+        qkv_split_shapes=qkv_split_shapes,
         extra_scale_factor=config.muon_extra_scale_factor,
         pg_collection=pg_collection,
         mode=config.muon_tp_mode,
diff --git a/megatron/core/optimizer/optimizer_config.py b/megatron/core/optimizer/optimizer_config.py
index ced3845804f..8692d1e9b52 100644
--- a/megatron/core/optimizer/optimizer_config.py
+++ b/megatron/core/optimizer/optimizer_config.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 from dataclasses import dataclass
 from typing import Callable, Optional
@@ -128,7 +128,10 @@ class OptimizerConfig:
     muon_momentum: float = 0.95
     """The momentum used by the internal SGD."""
 
-    muon_use_nesterov: bool = True
+    muon_split_qkv: bool = True
+    """Whether to split QKV parameters for Muon optimizer."""
+
+    muon_use_nesterov: bool = False
     """Whether to use Nesterov-style momentum in the internal SGD."""
 
     muon_scale_mode: str = "spectral"
diff --git a/megatron/core/tensor_parallel/layers.py b/megatron/core/tensor_parallel/layers.py
index 5ca290ff680..e79d55b9fa3 100644
--- a/megatron/core/tensor_parallel/layers.py
+++ b/megatron/core/tensor_parallel/layers.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 # Parts of the code here are adapted from PyTorch
 # repo: https://github.com/pytorch/pytorch
@@ -57,6 +57,7 @@
 
 _MODEL_PARALLEL_ATTRIBUTE_DEFAULTS = {
     "expert_tp": False,
+    "is_qkv": False,
     "tensor_model_parallel": False,
     "partition_dim": -1,
     "partition_stride": 1,
diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py
index d1e062edd02..bdf915a8ae1 100644
--- a/megatron/training/arguments.py
+++ b/megatron/training/arguments.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 """Megatron arguments."""
 
@@ -1940,10 +1940,12 @@ def _add_regularization_args(parser):
                        'numerical stability')
     group.add_argument('--sgd-momentum', type=float, default=0.9,
                        help='Momentum factor for sgd')
-    group.add_argument('--muon-momentum', type=float, default=0.95,
+    group.add_argument('--muon-momentum', type=float, default=0.9,
                        help='Momentum factor for Muon optimizer')
-    group.add_argument('--muon-no-use-nesterov', action='store_false', default=True,
-                       dest='muon_use_nesterov',
+    group.add_argument('--muon-no-split-qkv', action='store_false', default=True,
+                       dest='muon_split_qkv',
+                       help='Whether to split QKV parameters for Muon optimizer')
+    group.add_argument('--muon-use-nesterov', action='store_true',
                        help='Whether to use Nesterov-style momentum in the internal SGD')
     group.add_argument('--muon-scale-mode', type=str, default='spectral',
                        choices=['spectral', 'unit_rms_norm', 'shape_scaling'],
diff --git a/pyproject.toml b/pyproject.toml
index d02b873d1d9..db91ce393e7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -169,7 +169,7 @@ flash_mla = [
     { git = "https://github.com/deepseek-ai/FlashMLA", rev = "9edee0c022cd0938148a18e334203b0aab43aa19" },
 ]
 transformer-engine = { git = "https://github.com/NVIDIA/TransformerEngine.git", rev = "release_v2.8" } # on `release_v2.8`
-emerging_optimizers = { git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git", rev = "fb1add873e7851ec34b48581ea1b15761b73d189" }
+emerging_optimizers = { git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git", rev = "cf9909b777ffac18e05b67a6708282cadc000942" }
 nemo-run = { git = "https://github.com/NVIDIA-NeMo/Run.git", rev = "8ca8f7952a597f944985f1f1368a7acb9aa3a6c2" }
 [tool.isort]
 profile = "black"                                                          # black-compatible
diff --git a/tests/unit_tests/test_muon_optimizer.py b/tests/unit_tests/test_muon_optimizer.py
index 71d77dc6ecc..cc99f7a16e6 100644
--- a/tests/unit_tests/test_muon_optimizer.py
+++ b/tests/unit_tests/test_muon_optimizer.py
@@ -1,3 +1,5 @@
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
 import os
 
 import pytest
@@ -543,7 +545,6 @@ def test_muon_optimizer_multiple_steps():
         ), f"Weight should change at step {i}"
 
 
-@pytest.mark.skip(reason="split qkv is not implemented yet")
 def test_muon_optimizer_qkv_split():
     """Test TensorParallelMuon optimizer with QKV splitting."""
     # Create a model with QKV-like parameter
diff --git a/tests/unit_tests/test_optimizer.py b/tests/unit_tests/test_optimizer.py
index d8f6e3a2eeb..3d6b4b3c15e 100644
--- a/tests/unit_tests/test_optimizer.py
+++ b/tests/unit_tests/test_optimizer.py
@@ -1,3 +1,5 @@
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
 import os
 
 import pytest
@@ -244,24 +246,13 @@ def run_model(model, input, optim, fp8_recipe, fp8_recipe_settings):
         test_model, input, test_optim, fp8_recipe, fp8_recipe_settings
     )
 
-    rtol = 1e-3  # relative tolerance
-    atol = 1e-5  # absolute tolerance
+    rtol, atol = 1.6e-2, 1e-5
 
     # Compare grad norms - allow small difference due to precision
-    rel_diff = abs(test_grad_norm - baseline_grad_norm) / (
-        abs(baseline_grad_norm) + 1e-7  # avoid div by 0
-    )
-    abs_diff = abs(test_grad_norm - baseline_grad_norm)
-    assert (
-        rel_diff <= rtol or abs_diff <= atol
-    ), f"Grad norm mismatch: baseline={baseline_grad_norm}, test={test_grad_norm}, rel_diff={rel_diff}, abs_diff={abs_diff}"
+    torch.testing.assert_close(test_grad_norm, baseline_grad_norm, atol=atol, rtol=rtol)
 
     # Compare losses - allow small difference due to precision
-    loss_rel_diff = abs(test_loss - baseline_loss) / (abs(baseline_loss) + 1e-7)
-    loss_abs_diff = abs(test_loss - baseline_loss)
-    assert (
-        loss_rel_diff <= rtol or loss_abs_diff <= atol
-    ), f"Loss mismatch: baseline={baseline_loss}, test={test_loss}, rel_diff={loss_rel_diff}, abs_diff={loss_abs_diff}"
+    torch.testing.assert_close(test_loss, baseline_loss, atol=atol, rtol=rtol)
 
     # Save and reload state dict for the test model
     state_dict = test_optim.state_dict()
diff --git a/uv.lock b/uv.lock
index f7c8916166b..c20d3f55dfe 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1279,7 +1279,7 @@ wheels = [
 [[package]]
 name = "emerging-optimizers"
 version = "0.1.0"
-source = { git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git?rev=fb1add873e7851ec34b48581ea1b15761b73d189#fb1add873e7851ec34b48581ea1b15761b73d189" }
+source = { git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git?rev=cf9909b777ffac18e05b67a6708282cadc000942#cf9909b777ffac18e05b67a6708282cadc000942" }
 dependencies = [
     { name = "absl-py" },
     { name = "torch", marker = "sys_platform == 'never'" },
@@ -2401,7 +2401,7 @@ requires-dist = [
     { name = "causal-conv1d", marker = "extra == 'dev'", specifier = "~=1.5" },
     { name = "einops", marker = "extra == 'dev'", specifier = "~=0.8" },
     { name = "einops", marker = "extra == 'lts'" },
-    { name = "emerging-optimizers", marker = "extra == 'dev'", git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git?rev=fb1add873e7851ec34b48581ea1b15761b73d189" },
+    { name = "emerging-optimizers", marker = "extra == 'dev'", git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git?rev=cf9909b777ffac18e05b67a6708282cadc000942" },
     { name = "flash-linear-attention", marker = "extra == 'dev'", specifier = "~=0.3.2" },
     { name = "flashinfer-python", marker = "extra == 'dev'" },
     { name = "flask-restful", marker = "extra == 'mlm'" },

From a0a1866ff56fa079aa6fe9cbb2775bbab58170b4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Mon, 27 Oct 2025 09:27:23 +0000
Subject: [PATCH 069/334] ci: Add golden values for
 gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_logitsmatch_decode_graphs_only
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 .../golden_values_dev_dgxh100_coreweave.json  | 187 ++++++++++++++++++
 1 file changed, 187 insertions(+)
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_logitsmatch_decode_graphs_only/golden_values_dev_dgxh100_coreweave.json

diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_logitsmatch_decode_graphs_only/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_logitsmatch_decode_graphs_only/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..ddc6cacf3a8
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_logitsmatch_decode_graphs_only/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,187 @@
+{
+ "0": {
+  "input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.",
+  "generated_text": " And then you get to the end of the movie, and you realize that this is not New York at all. This is New York at the end",
+  "generated_tokens": [
+   3060,
+   2430,
+   1636,
+   2012,
+   1317,
+   1278,
+   2362,
+   1307,
+   1278,
+   16070,
+   1044,
+   1321,
+   1636,
+   23067,
+   1455,
+   1593,
+   1395,
+   1605,
+   3140,
+   5152,
+   1513,
+   1747,
+   1046,
+   2409,
+   1395,
+   3140,
+   5152,
+   1513,
+   1278,
+   2362
+  ],
+  "latency": 0.3733036518096924,
+  "cuda_graph_request_count_map": {
+   "372": 0,
+   "360": 0,
+   "336": 0,
+   "312": 0,
+   "288": 0,
+   "264": 0,
+   "240": 0,
+   "216": 0,
+   "192": 0,
+   "168": 0,
+   "144": 0,
+   "120": 0,
+   "96": 0,
+   "72": 0,
+   "48": 0,
+   "24": 29
+  },
+  "step_count": 240,
+  "logprobs": [
+   -9.362494468688965,
+   -2.827894449234009,
+   -4.557381629943848,
+   -1.4968647956848145,
+   -0.717312216758728,
+   -1.7262351512908936,
+   -2.522736072540283,
+   -2.1782360076904297,
+   -2.3603432178497314,
+   -6.136383533477783,
+   -1.4676916599273682,
+   -3.468963384628296,
+   -4.424870491027832,
+   -3.7345848083496094,
+   -2.012619972229004,
+   -1.8833301067352295,
+   -3.5708768367767334,
+   -6.8197832107543945,
+   -0.3122292757034302,
+   -0.9820290207862854,
+   -6.532033443450928,
+   -7.498172760009766,
+   -12.615165710449219,
+   -2.409003496170044,
+   -3.8550546169281006,
+   -0.5105050802230835,
+   -4.2802581787109375,
+   -0.06971167027950287,
+   -0.054025799036026,
+   -3.319596767425537,
+   -9.703240394592285,
+   -1.0997297763824463,
+   -6.224854469299316,
+   -5.234503269195557,
+   -3.934987783432007,
+   -2.5263679027557373,
+   -3.1843955516815186,
+   -5.880871295928955,
+   -1.8436813354492188,
+   -5.906496047973633,
+   -12.15787410736084,
+   -12.5841064453125,
+   -0.0819428563117981,
+   -2.6212656497955322,
+   -1.4329369068145752,
+   -2.885145425796509,
+   -1.2901865243911743,
+   -0.006647023372352123,
+   -3.5115818977355957,
+   -12.945953369140625,
+   -3.793078899383545,
+   -3.0094375610351562,
+   -5.966838836669922,
+   -0.8998424410820007,
+   -0.040962252765893936,
+   -1.5467679500579834,
+   -1.0785343647003174,
+   -5.73494815826416,
+   -0.38491737842559814,
+   -5.017007827758789,
+   -0.5568072199821472,
+   -0.5968841910362244,
+   -2.3609962463378906,
+   -13.582086563110352,
+   -0.09050048142671585,
+   -3.7264108657836914,
+   -1.1208789348602295,
+   -6.052675247192383,
+   -0.5848909616470337,
+   -3.5906238555908203,
+   -0.9494907855987549,
+   -1.5676641464233398,
+   -5.127577781677246,
+   -17.19189453125,
+   -6.698403835296631,
+   -1.0449178218841553,
+   -4.365664958953857,
+   -1.1243419647216797,
+   -2.2092156410217285,
+   -1.8081634044647217,
+   -0.23330983519554138,
+   -9.439546585083008,
+   -0.2947109341621399,
+   -7.253565788269043,
+   -2.3855936527252197,
+   -4.629369258880615,
+   -3.4186267852783203,
+   -1.9727531671524048,
+   -2.331681251525879,
+   -1.5606917142868042,
+   -2.454296588897705,
+   -1.5334703922271729,
+   -1.2631131410598755,
+   -2.657367706298828,
+   -0.6480202078819275,
+   -0.4550393521785736,
+   -1.3625166416168213,
+   -0.8142069578170776,
+   -0.4496593475341797,
+   -0.9312890768051147,
+   -1.732723355293274,
+   -0.44613128900527954,
+   -1.6895122528076172,
+   -0.6082233190536499,
+   -1.0978344678878784,
+   -1.1122435331344604,
+   -0.002520838286727667,
+   -1.4072327613830566,
+   -0.007462364621460438,
+   -0.7548662424087524,
+   -0.9937503337860107,
+   -0.0675487294793129,
+   -0.9595617055892944,
+   -0.029961343854665756,
+   -2.205785036087036,
+   -1.2615025043487549,
+   -0.7878209352493286
+  ]
+ },
+ "throughput": [
+  14.167753773233736,
+  78.68224606460956,
+  79.61636072923858,
+  79.54665108975186,
+  79.62008872611396,
+  79.57034369848175,
+  79.0717192987748,
+  79.63717144611178
+ ]
+}
\ No newline at end of file

From c9fb78b85e291e4869df2809e6ee99d257af4fa9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Mon, 27 Oct 2025 09:29:47 +0000
Subject: [PATCH 070/334] ci: Add more golden values
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 .../golden_values_dev_dgxh100_coreweave.json  |  187 ++
 .../golden_values_dev_dgxh100_coreweave.json  | 2703 +++++++++++++++++
 .../golden_values_dev_dgxh100_coreweave.json  |  170 ++
 3 files changed, 3060 insertions(+)
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_logitsmatch/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_583m_logitsmatch/golden_values_dev_dgxh100_coreweave.json

diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..8076bdc9a25
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,187 @@
+{
+ "0": {
+  "input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.",
+  "generated_text": " And then you get to the end of the movie, and you realize that this is not New York at all. This is New York at the end",
+  "generated_tokens": [
+   3060,
+   2430,
+   1636,
+   2012,
+   1317,
+   1278,
+   2362,
+   1307,
+   1278,
+   16070,
+   1044,
+   1321,
+   1636,
+   23067,
+   1455,
+   1593,
+   1395,
+   1605,
+   3140,
+   5152,
+   1513,
+   1747,
+   1046,
+   2409,
+   1395,
+   3140,
+   5152,
+   1513,
+   1278,
+   2362
+  ],
+  "latency": 0.2859375476837158,
+  "cuda_graph_request_count_map": {
+   "372": 0,
+   "360": 0,
+   "336": 0,
+   "312": 0,
+   "288": 0,
+   "264": 0,
+   "240": 0,
+   "216": 0,
+   "192": 0,
+   "168": 0,
+   "144": 0,
+   "120": 0,
+   "96": 0,
+   "72": 0,
+   "48": 0,
+   "24": 29
+  },
+  "step_count": 240,
+  "logprobs": [
+   -9.362494468688965,
+   -2.827894449234009,
+   -4.557381629943848,
+   -1.4968647956848145,
+   -0.717312216758728,
+   -1.7262351512908936,
+   -2.522736072540283,
+   -2.1782360076904297,
+   -2.3603432178497314,
+   -6.136383533477783,
+   -1.4676916599273682,
+   -3.468963384628296,
+   -4.424870491027832,
+   -3.7345848083496094,
+   -2.012619972229004,
+   -1.8833301067352295,
+   -3.5708768367767334,
+   -6.8197832107543945,
+   -0.3122292757034302,
+   -0.9820290207862854,
+   -6.532033443450928,
+   -7.498172760009766,
+   -12.615165710449219,
+   -2.409003496170044,
+   -3.8550546169281006,
+   -0.5105050802230835,
+   -4.2802581787109375,
+   -0.06971167027950287,
+   -0.054025799036026,
+   -3.319596767425537,
+   -9.703240394592285,
+   -1.0997297763824463,
+   -6.224854469299316,
+   -5.234503269195557,
+   -3.934987783432007,
+   -2.5263679027557373,
+   -3.1843955516815186,
+   -5.880871295928955,
+   -1.8436813354492188,
+   -5.906496047973633,
+   -12.15787410736084,
+   -12.5841064453125,
+   -0.0819428563117981,
+   -2.6212656497955322,
+   -1.4329369068145752,
+   -2.885145425796509,
+   -1.2901865243911743,
+   -0.006647023372352123,
+   -3.5115818977355957,
+   -12.945953369140625,
+   -3.793078899383545,
+   -3.0094375610351562,
+   -5.966838836669922,
+   -0.8998424410820007,
+   -0.040962252765893936,
+   -1.5467679500579834,
+   -1.0785343647003174,
+   -5.73494815826416,
+   -0.38491737842559814,
+   -5.017007827758789,
+   -0.5568072199821472,
+   -0.5968841910362244,
+   -2.3609962463378906,
+   -13.582086563110352,
+   -0.09050048142671585,
+   -3.7264108657836914,
+   -1.1208789348602295,
+   -6.052675247192383,
+   -0.5848909616470337,
+   -3.5906238555908203,
+   -0.9494907855987549,
+   -1.5676641464233398,
+   -5.127577781677246,
+   -17.19189453125,
+   -6.698403835296631,
+   -1.0449178218841553,
+   -4.365664958953857,
+   -1.1243419647216797,
+   -2.2092156410217285,
+   -1.8081634044647217,
+   -0.23330983519554138,
+   -9.439546585083008,
+   -0.2947109341621399,
+   -7.253565788269043,
+   -2.3855936527252197,
+   -4.629369258880615,
+   -3.4186267852783203,
+   -1.9727531671524048,
+   -2.331681251525879,
+   -1.5606917142868042,
+   -2.454296588897705,
+   -1.5334703922271729,
+   -1.2631131410598755,
+   -2.657367706298828,
+   -0.6480202078819275,
+   -0.4550393521785736,
+   -1.3625166416168213,
+   -0.8142069578170776,
+   -0.4496593475341797,
+   -0.9312890768051147,
+   -1.732723355293274,
+   -0.44613128900527954,
+   -1.6895122528076172,
+   -0.6082233190536499,
+   -1.0978344678878784,
+   -1.1122435331344604,
+   -0.002520838286727667,
+   -1.4072327613830566,
+   -0.007462364621460438,
+   -0.7548662424087524,
+   -0.9937503337860107,
+   -0.0675487294793129,
+   -0.9595617055892944,
+   -0.029961343854665756,
+   -2.205785036087036,
+   -1.2615025043487549,
+   -0.7878209352493286
+  ]
+ },
+ "throughput": [
+  4.17304871546938,
+  103.09983375107234,
+  103.84588149949121,
+  103.54772132523577,
+  103.90874002236247,
+  103.06242433872661,
+  103.53792289114989,
+  103.82591647661074
+ ]
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_logitsmatch/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_logitsmatch/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..7184e0e35c1
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_logitsmatch/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,2703 @@
+{
+ "0": {
+  "input_prompt": "The $500 Cup of coffee?\nConsider this, most Americans spend an average of $1,500-2,000 a year on this bean water.\nI have a few question for you: \nHow has business been the past few months?\nDo you ever feel like your business is stuck?\nDon't feel like you're able to improve performance and make changes required to achieve success ?\nAre your customers spneding less and less and wanting more?\nHave the gas prices affected your business?\nDo you have employees and do they hate you or wish they could quit?\n\nNow, before you and I can decide wheter or not I will be a good fit for your business we should talk this over with coffee.\nAnd, just to warn you this isn't some casual thing. This is not a date or time to be personal or social (but by all means share what you will coz I'll gladly listen).\nTher eare two major talking points and stratagies we will focios on in our lil coffee social\nFor one, we will find your unique selling Proposition (USP).\nDo have the best price? Are you the cheapest in town? Are your customers jerks? Do you haVE REGULARS? Why do people come back?\nwe'll also look for the holes in your business bucket. I'm willing to bet there's a hole or two in your business we'll find together that'll make this 500 dollar cup of Joe pay for itse;f immedietly.\nMany find themselves to be more profitable by just finding out where the dollars are escaping in their business and I like to think of myself as a guy that comes along with some spakel or putty and patch those holes up for you.\nBeleive me, just fixing one hole can mean a lot...just think about a sinking boat that has a hole in it that's about 3\u201d in diameter... it doesn't take long to sink.\nI have no agenda, besides f=getting to know your business and seeing wher I can patch the holes and find what makes you do darn unique (I know this won't take long.)\nMany folks, I bet, will find what they need to get off their chest with a quick phone call and they just paypal me the money and make a coffee at home. Look, that's fine too.\nI just to get you ot of your comfort zone, because this is where it all starts my frind.\nSome smart GOAT entrepreneur will probably get everything they need just out of our lil mini consulatant for the more extensive business consukting I offer, and look, that's fine, too.\nMaybe this cup of coffee will be all you need to gtet back on your feet, but not only surive, but thrive!\nI'm not trying to make frineds, or make a bunch of money, or look down your shirt\nBut this is only going to be a 45 minute (max) coffee chat\nAnd, I know you (and me) both have a lot on our plates. So no messing around\nAfter our consultation I will follow up with you in a few days and see how things are going, then I will be emailing you about once or twice every two weeks, just to make sure you're staying on task and implementing what we went over.\nTghere is no obligation to go any further and will gladly give you back your money if this pre-consultation doesn't put you on the right path or you don't get any value out of it...",
+  "generated_text": " $ is a$ is a $ is a $ is a $ is a $ is a $$1, you\n$ $$ $\n$ $- $\n$\n$\n$ $$$\n$\n$\n$\n$\n$\n$\n$\n$???????, $\n-1\n$\n1.5.\n$\n$, you\n$.\n$\n1,1.1\nI\n$.\nI\n\n\nHow\n$,,,0,1,0,0.0\nIn\nThe\nThe\nThe\n",
+  "generated_tokens": [
+   1659,
+   1395,
+   1261,
+   1036,
+   1395,
+   1261,
+   1659,
+   1395,
+   1261,
+   1659,
+   1395,
+   1261,
+   1659,
+   1395,
+   1261,
+   1659,
+   1395,
+   1261,
+   1659,
+   1036,
+   1049,
+   1044,
+   1636,
+   1010,
+   1036,
+   1659,
+   1036,
+   1659,
+   1010,
+   1036,
+   1659,
+   1045,
+   1659,
+   1010,
+   1036,
+   1010,
+   1036,
+   1010,
+   1036,
+   1659,
+   1036,
+   1036,
+   1010,
+   1036,
+   1010,
+   1036,
+   1010,
+   1036,
+   1010,
+   1036,
+   1010,
+   1036,
+   1010,
+   1036,
+   1010,
+   1036,
+   1010,
+   1036,
+   1063,
+   1063,
+   1063,
+   1063,
+   1063,
+   1063,
+   1063,
+   1044,
+   1659,
+   1010,
+   1045,
+   1049,
+   1010,
+   1036,
+   1010,
+   1049,
+   1046,
+   1053,
+   1046,
+   1010,
+   1036,
+   1010,
+   1036,
+   1044,
+   1636,
+   1010,
+   1036,
+   1046,
+   1010,
+   1036,
+   1010,
+   1049,
+   1044,
+   1049,
+   1046,
+   1049,
+   1010,
+   1073,
+   1010,
+   1036,
+   1046,
+   1010,
+   1073,
+   1010,
+   1010,
+   1010,
+   7801,
+   1010,
+   1036,
+   1044,
+   1044,
+   1044,
+   1048,
+   1044,
+   1049,
+   1044,
+   1048,
+   1044,
+   1048,
+   1046,
+   1048,
+   1010,
+   1785,
+   1010,
+   1784,
+   1010,
+   1784,
+   1010,
+   1784,
+   1010
+  ],
+  "latency": 9.77891230583191,
+  "cuda_graph_request_count_map": null,
+  "step_count": 6144,
+  "logprobs": [
+   -7.7319135665893555,
+   -2.188307285308838,
+   -0.7547445297241211,
+   -0.7294313311576843,
+   -10.238386154174805,
+   -3.3775341510772705,
+   -6.394498825073242,
+   -7.354557037353516,
+   -9.018157958984375,
+   -3.012073040008545,
+   -3.2584073543548584,
+   -5.220732688903809,
+   -4.620487213134766,
+   -2.5078930854797363,
+   -3.752683162689209,
+   -0.13360372185707092,
+   -0.05705544352531433,
+   -0.41462242603302,
+   -1.585279941558838,
+   -1.6438164710998535,
+   -1.9557222127914429,
+   -0.3989897072315216,
+   -0.0365302674472332,
+   -6.368816375732422,
+   -0.8731719255447388,
+   -0.022585075348615646,
+   -0.2775891423225403,
+   -0.0027362785767763853,
+   -0.0006812873762100935,
+   -1.581446647644043,
+   -0.008688976056873798,
+   -0.3532317280769348,
+   -6.071163177490234,
+   -9.162371635437012,
+   -9.965556144714355,
+   -2.400461196899414,
+   -2.9898362159729004,
+   -2.9803032875061035,
+   -2.12601900100708,
+   -3.500912666320801,
+   -7.015069007873535,
+   -2.278961420059204,
+   -0.46380555629730225,
+   -4.078739166259766,
+   -1.9430254697799683,
+   -3.5642244815826416,
+   -3.689701795578003,
+   -6.201474189758301,
+   -6.580833911895752,
+   -2.3081111907958984,
+   -5.42717170715332,
+   -1.1886008977890015,
+   -1.172760248184204,
+   -1.3571951389312744,
+   -1.3551844358444214,
+   -3.376784324645996,
+   -0.05118789151310921,
+   -4.064360618591309,
+   -2.575554847717285,
+   -0.6994737386703491,
+   -2.56724214553833,
+   -2.1888976097106934,
+   -0.4816131591796875,
+   -4.070178985595703,
+   -2.0060782432556152,
+   -6.858033180236816,
+   -0.059200502932071686,
+   -3.214278221130371,
+   -0.9671833515167236,
+   -0.823198676109314,
+   -1.0130078792572021,
+   -4.595561981201172,
+   -0.012724989093840122,
+   -5.214311599731445,
+   -8.246870040893555,
+   -3.1476030349731445,
+   -3.299684524536133,
+   -4.218191146850586,
+   -7.318399429321289,
+   -0.8580498695373535,
+   -3.0894036293029785,
+   -1.886361002922058,
+   -7.217658996582031,
+   -3.271679639816284,
+   -3.9717154502868652,
+   -1.8835484981536865,
+   -10.034332275390625,
+   -11.382490158081055,
+   -5.417011260986328,
+   -7.505967140197754,
+   -2.33837890625,
+   -0.07904055714607239,
+   -3.294971227645874,
+   -7.813640594482422,
+   -1.7646901607513428,
+   -4.025320053100586,
+   -3.5977325439453125,
+   -4.390352249145508,
+   -9.147806167602539,
+   -0.5303041934967041,
+   -7.721246242523193,
+   -0.6311959028244019,
+   -0.8119025230407715,
+   -0.7227814197540283,
+   -1.8369406461715698,
+   -0.20933297276496887,
+   -1.5395950078964233,
+   -4.424448490142822,
+   -4.084965705871582,
+   -3.355497360229492,
+   -1.0475609302520752,
+   -6.479413986206055,
+   -0.7810530662536621,
+   -2.132437229156494,
+   -6.648703098297119,
+   -2.9522438049316406,
+   -1.2485712766647339,
+   -4.040503025054932,
+   -2.3415768146514893,
+   -5.358206748962402,
+   -1.6258506774902344,
+   -3.956300973892212,
+   -0.732298731803894,
+   -7.441117286682129,
+   -1.5242161750793457,
+   -2.4555861949920654,
+   -4.295163154602051,
+   -9.687600135803223,
+   -0.8213484883308411,
+   -1.2446978092193604,
+   -0.01942702941596508,
+   -4.619411468505859,
+   -3.3297007083892822,
+   -2.2139487266540527,
+   -3.691431999206543,
+   -2.6574106216430664,
+   -6.075929641723633,
+   -0.6123450994491577,
+   -1.2942559719085693,
+   -0.6262839436531067,
+   -7.398006439208984,
+   -4.4869890213012695,
+   -4.202048301696777,
+   -4.982994079589844,
+   -0.637227475643158,
+   -3.061023235321045,
+   -10.117584228515625,
+   -3.8567495346069336,
+   -4.0480828285217285,
+   -2.472019672393799,
+   -4.246374607086182,
+   -1.3939155340194702,
+   -7.132441520690918,
+   -0.20108745992183685,
+   -4.986658573150635,
+   -4.387957572937012,
+   -0.01108358334749937,
+   -4.209756851196289,
+   -7.271108627319336,
+   -4.047314643859863,
+   -2.6497321128845215,
+   -1.4763175249099731,
+   -0.28365400433540344,
+   -3.5247769355773926,
+   -1.4226995706558228,
+   -4.327237129211426,
+   -2.0407187938690186,
+   -6.1437907218933105,
+   -1.5190880298614502,
+   -2.5511486530303955,
+   -7.504094123840332,
+   -2.152172565460205,
+   -6.708334922790527,
+   -6.913146495819092,
+   -3.6959621906280518,
+   -6.752341270446777,
+   -0.63083815574646,
+   -0.12433214485645294,
+   -5.0525641441345215,
+   -4.435934066772461,
+   -0.45601028203964233,
+   -6.3459577560424805,
+   -9.882917404174805,
+   -3.1422882080078125,
+   -2.550520658493042,
+   -3.2099051475524902,
+   -6.278127193450928,
+   -0.07764133810997009,
+   -3.155696153640747,
+   -1.933587670326233,
+   -9.61027717590332,
+   -6.211391925811768,
+   -4.664543151855469,
+   -6.783782005310059,
+   -5.676271438598633,
+   -8.605900764465332,
+   -0.0824289619922638,
+   -3.5463995933532715,
+   -13.374168395996094,
+   -1.2401021718978882,
+   -1.8734056949615479,
+   -3.4154422283172607,
+   -1.6733763217926025,
+   -17.633970260620117,
+   -9.345113754272461,
+   -0.6277351975440979,
+   -2.9617538452148438,
+   -2.5565333366394043,
+   -10.10580825805664,
+   -7.130337715148926,
+   -7.36820125579834,
+   -4.098911285400391,
+   -5.747079372406006,
+   -2.945054769515991,
+   -0.7887389063835144,
+   -1.6583149433135986,
+   -1.0165244340896606,
+   -6.581666946411133,
+   -5.926386833190918,
+   -5.845194339752197,
+   -0.9657630920410156,
+   -7.868755340576172,
+   -1.3244551420211792,
+   -0.2657390236854553,
+   -0.06403665244579315,
+   -2.983020782470703,
+   -5.943899631500244,
+   -7.877285957336426,
+   -3.593116283416748,
+   -3.819509506225586,
+   -7.226177215576172,
+   -2.5206997394561768,
+   -3.385587215423584,
+   -0.37499159574508667,
+   -1.4698283672332764,
+   -3.1460342407226562,
+   -0.0077166082337498665,
+   -4.350916862487793,
+   -3.2183218002319336,
+   -0.6242184638977051,
+   -1.4782464504241943,
+   -2.8054311275482178,
+   -3.0831401348114014,
+   -12.17662525177002,
+   -2.113419532775879,
+   -1.6448111534118652,
+   -2.1834323406219482,
+   -0.7630388140678406,
+   -10.1896390914917,
+   -6.234405517578125,
+   -11.46288776397705,
+   -1.003785490989685,
+   -4.211658477783203,
+   -1.5010679960250854,
+   -5.859302043914795,
+   -2.0465080738067627,
+   -3.7468819618225098,
+   -4.684195518493652,
+   -4.318704128265381,
+   -2.7234389781951904,
+   -9.00437068939209,
+   -3.043811321258545,
+   -3.1384406089782715,
+   -2.713779926300049,
+   -2.095993995666504,
+   -2.1484954357147217,
+   -10.274479866027832,
+   -0.682350754737854,
+   -0.25973302125930786,
+   -3.6964316368103027,
+   -13.434456825256348,
+   -2.3368239402770996,
+   -5.382724761962891,
+   -1.9073458909988403,
+   -5.905669212341309,
+   -0.032165709882974625,
+   -1.6530004739761353,
+   -2.728893280029297,
+   -1.640552043914795,
+   -1.1391171216964722,
+   -1.4353511333465576,
+   -4.003787994384766,
+   -0.3450564742088318,
+   -0.7168521285057068,
+   -0.34650325775146484,
+   -0.3616408705711365,
+   -7.062709331512451,
+   -1.2851682901382446,
+   -2.299129009246826,
+   -8.800156593322754,
+   -5.208735466003418,
+   -4.780910491943359,
+   -2.78342342376709,
+   -4.469717979431152,
+   -6.909726619720459,
+   -2.5114197731018066,
+   -0.659822404384613,
+   -0.6915416121482849,
+   -3.2363741397857666,
+   -0.5283617377281189,
+   -0.10473938286304474,
+   -6.215325832366943,
+   -7.283237934112549,
+   -1.6797031164169312,
+   -11.50100040435791,
+   -7.5822978019714355,
+   -3.387317657470703,
+   -11.407575607299805,
+   -5.441976547241211,
+   -3.3264851570129395,
+   -0.7265786528587341,
+   -1.382750153541565,
+   -7.841699600219727,
+   -8.105277061462402,
+   -3.9569506645202637,
+   -4.963083267211914,
+   -0.5492897629737854,
+   -4.6081390380859375,
+   -5.870400905609131,
+   -3.957930088043213,
+   -5.275494575500488,
+   -4.105091094970703,
+   -2.15435528755188,
+   -2.8472700119018555,
+   -1.1278448104858398,
+   -8.226571083068848,
+   -0.40629008412361145,
+   -9.916461944580078,
+   -4.616743087768555,
+   -1.691868543624878,
+   -0.6639478802680969,
+   -2.5716753005981445,
+   -6.676954746246338,
+   -6.535329818725586,
+   -0.4170510768890381,
+   -1.443942904472351,
+   -3.145481824874878,
+   -1.440589427947998,
+   -0.26935356855392456,
+   -0.9647155404090881,
+   -4.335958957672119,
+   -1.5647850036621094,
+   -5.890466690063477,
+   -3.01654052734375,
+   -1.9168468713760376,
+   -3.7365682125091553,
+   -8.001864433288574,
+   -10.680083274841309,
+   -4.489352226257324,
+   -4.6058149337768555,
+   -7.69011116027832,
+   -3.6247005462646484,
+   -1.5600426197052002,
+   -10.2160062789917,
+   -5.004643440246582,
+   -0.19602319598197937,
+   -3.375545024871826,
+   -2.669325590133667,
+   -1.3932737112045288,
+   -1.6410658359527588,
+   -6.847603797912598,
+   -6.744344711303711,
+   -0.5215591192245483,
+   -0.25840020179748535,
+   -1.1448237895965576,
+   -5.57253885269165,
+   -7.251138687133789,
+   -4.221924781799316,
+   -0.7688062787055969,
+   -2.504502534866333,
+   -3.146519660949707,
+   -2.206653356552124,
+   -1.4295082092285156,
+   -7.96943998336792,
+   -4.332189083099365,
+   -2.5750505924224854,
+   -1.7102608680725098,
+   -5.311381816864014,
+   -8.897522926330566,
+   -2.994919538497925,
+   -3.3397974967956543,
+   -2.1794328689575195,
+   -2.437566041946411,
+   -0.3181810975074768,
+   -0.27412793040275574,
+   -0.7914466857910156,
+   -2.3470635414123535,
+   -2.4099245071411133,
+   -2.491870880126953,
+   -3.024170160293579,
+   -1.9719040393829346,
+   -11.373910903930664,
+   -1.4279751777648926,
+   -0.14573107659816742,
+   -2.055763006210327,
+   -6.366893291473389,
+   -4.24091911315918,
+   -0.00709194503724575,
+   -2.0199716091156006,
+   -2.524750232696533,
+   -1.4272525310516357,
+   -0.5185190439224243,
+   -2.927150011062622,
+   -2.7070627212524414,
+   -3.365638017654419,
+   -4.318085193634033,
+   -7.773144721984863,
+   -1.7947180271148682,
+   -7.657534599304199,
+   -8.767786026000977,
+   -14.74280071258545,
+   -1.8042558431625366,
+   -3.2712037563323975,
+   -1.4002125263214111,
+   -4.887944221496582,
+   -1.4821010828018188,
+   -1.5255622863769531,
+   -5.879070281982422,
+   -4.463839530944824,
+   -5.1955976486206055,
+   -5.665647506713867,
+   -0.3775045573711395,
+   -5.9350481033325195,
+   -2.800539255142212,
+   -0.13162286579608917,
+   -3.034379720687866,
+   -4.729524612426758,
+   -4.6252641677856445,
+   -3.850942611694336,
+   -2.4760568141937256,
+   -6.059760093688965,
+   -10.12075138092041,
+   -0.9469369649887085,
+   -11.595907211303711,
+   -6.875324726104736,
+   -4.268826007843018,
+   -2.835529088973999,
+   -3.8626279830932617,
+   -4.876199245452881,
+   -0.013071090914309025,
+   -4.964417934417725,
+   -0.7445687055587769,
+   -5.707155227661133,
+   -6.10660457611084,
+   -4.317755699157715,
+   -4.440443992614746,
+   -2.9202542304992676,
+   -4.743522644042969,
+   -1.2569392919540405,
+   -2.8675737380981445,
+   -2.3151841163635254,
+   -4.318130970001221,
+   -1.9054772853851318,
+   -1.1808521747589111,
+   -0.765956461429596,
+   -2.768916606903076,
+   -6.237791061401367,
+   -1.7224305868148804,
+   -7.137521743774414,
+   -4.512486457824707,
+   -1.9069950580596924,
+   -4.145983695983887,
+   -5.365190505981445,
+   -0.059828490018844604,
+   -2.273892879486084,
+   -3.4013004302978516,
+   -5.035730361938477,
+   -6.501443386077881,
+   -9.903446197509766,
+   -1.6332892179489136,
+   -2.1572084426879883,
+   -1.6149548292160034,
+   -1.4698481559753418,
+   -6.01010799407959,
+   -2.2243528366088867,
+   -6.900836944580078,
+   -6.0930986404418945,
+   -2.974020481109619,
+   -3.225423574447632,
+   -8.423272132873535,
+   -1.3423724174499512,
+   -3.626147508621216,
+   -0.4862469434738159,
+   -6.860866546630859,
+   -3.8910953998565674,
+   -2.33319354057312,
+   -1.7229185104370117,
+   -2.215972423553467,
+   -8.99046516418457,
+   -4.099084854125977,
+   -2.4191012382507324,
+   -8.288970947265625,
+   -2.9641928672790527,
+   -1.5036451816558838,
+   -3.0544614791870117,
+   -0.0715634673833847,
+   -2.444031238555908,
+   -4.520998954772949,
+   -3.972568988800049,
+   -0.4985870122909546,
+   -2.1651363372802734,
+   -3.4427435398101807,
+   -1.730639100074768,
+   -0.9458961486816406,
+   -7.740211009979248,
+   -9.39163875579834,
+   -3.895984172821045,
+   -1.7523534297943115,
+   -5.41331672668457,
+   -8.910720825195312,
+   -12.971094131469727,
+   -3.0455880165100098,
+   -10.501265525817871,
+   -3.3864927291870117,
+   -4.842309951782227,
+   -3.9964733123779297,
+   -7.3046793937683105,
+   -2.6607093811035156,
+   -1.3541781902313232,
+   -5.003270626068115,
+   -3.944551944732666,
+   -0.11356143653392792,
+   -5.174440383911133,
+   -9.628616333007812,
+   -8.654989242553711,
+   -8.980416297912598,
+   -6.670101642608643,
+   -5.488286018371582,
+   -5.943419933319092,
+   -2.126483201980591,
+   -8.054739952087402,
+   -7.458671569824219,
+   -2.5267202854156494,
+   -6.455472946166992,
+   -8.655346870422363,
+   -7.903901100158691,
+   -6.221062660217285,
+   -7.129237174987793,
+   -4.2345380783081055,
+   -2.5375306606292725,
+   -7.697700500488281,
+   -1.567080020904541,
+   -2.084331750869751,
+   -0.25020831823349,
+   -1.5145041942596436,
+   -4.619244575500488,
+   -0.2970108985900879,
+   -0.4977554678916931,
+   -6.197869300842285,
+   -4.030620098114014,
+   -7.232107639312744,
+   -0.21076253056526184,
+   -1.563366174697876,
+   -1.133756160736084,
+   -2.708237648010254,
+   -4.080535888671875,
+   -0.6818401217460632,
+   -0.1864331066608429,
+   -0.49012088775634766,
+   -8.732468605041504,
+   -11.945040702819824,
+   -5.243098735809326,
+   -1.5294703245162964,
+   -0.8935543298721313,
+   -0.6174070835113525,
+   -1.5068217515945435,
+   -3.5766501426696777,
+   -5.393096923828125,
+   -4.202867031097412,
+   -14.765748023986816,
+   -5.2513813972473145,
+   -0.7597705721855164,
+   -0.2502063810825348,
+   -1.7403976917266846,
+   -2.8000779151916504,
+   -1.9808133840560913,
+   -2.1654744148254395,
+   -1.8629226684570312,
+   -3.222038745880127,
+   -0.040942225605249405,
+   -2.3384013175964355,
+   -10.210381507873535,
+   -4.5859761238098145,
+   -0.5805734395980835,
+   -3.7019288539886475,
+   -2.001936674118042,
+   -2.7876083850860596,
+   -2.9799084663391113,
+   -4.349887371063232,
+   -0.0792960673570633,
+   -1.4366114139556885,
+   -1.0813264846801758,
+   -1.3510822057724,
+   -6.7060699462890625,
+   -5.436615943908691,
+   -3.978389263153076,
+   -6.785447597503662,
+   -6.147171497344971,
+   -3.97414231300354,
+   -4.332991600036621,
+   -0.9269428253173828,
+   -5.1237101554870605,
+   -4.486598968505859,
+   -0.04678357392549515,
+   -1.0307552814483643,
+   -1.4249452352523804,
+   -4.517682075500488,
+   -3.561821699142456,
+   -2.0815205574035645,
+   -0.6041194200515747,
+   -5.992964744567871,
+   -7.092092514038086,
+   -0.48916709423065186,
+   -2.6405677795410156,
+   -4.3345723152160645,
+   -3.533582925796509,
+   -3.1233346462249756,
+   -3.107872486114502,
+   -1.9901115894317627,
+   -3.1052846908569336,
+   -1.8440347909927368,
+   -6.21368408203125,
+   -1.8796799182891846,
+   -2.705214738845825,
+   -0.2987763583660126,
+   -4.070865154266357,
+   -1.6675832271575928,
+   -1.3896636962890625,
+   -1.5731089115142822,
+   -3.526170015335083,
+   -2.5088443756103516,
+   -1.208929419517517,
+   -3.673125743865967,
+   -2.501532554626465,
+   -6.875064373016357,
+   -8.512459754943848,
+   -1.042314052581787,
+   -3.657850980758667,
+   -7.0950798988342285,
+   -4.974049091339111,
+   -8.14085578918457,
+   -3.529888153076172,
+   -1.9389504194259644,
+   -7.0902204513549805,
+   -2.409292459487915,
+   -2.9428021907806396,
+   -1.688283085823059,
+   -3.622368335723877,
+   -2.0903351306915283,
+   -4.160663604736328,
+   -3.1683764457702637,
+   -1.2135626077651978,
+   -7.566033363342285,
+   -3.1186251640319824,
+   -5.899919509887695,
+   -0.9518840312957764,
+   -2.656729221343994,
+   -2.2994377613067627,
+   -6.806836128234863,
+   -1.280236840248108,
+   -2.838846206665039,
+   -1.3598848581314087,
+   -11.707776069641113,
+   -3.134333372116089,
+   -0.6230669617652893,
+   -8.219222068786621,
+   -7.562507152557373,
+   -7.489459037780762,
+   -1.5368008613586426,
+   -7.149652481079102,
+   -5.749268054962158,
+   -3.162869691848755,
+   -2.7235195636749268,
+   -6.128931999206543,
+   -1.1934199333190918,
+   -3.986410617828369,
+   -3.76609468460083,
+   -1.712721586227417,
+   -3.195504903793335,
+   -8.397743225097656,
+   -3.1260581016540527,
+   -9.792022705078125,
+   -4.217884540557861,
+   -11.583260536193848,
+   -5.987588882446289,
+   -5.178754806518555,
+   -6.994749069213867,
+   -5.167606353759766,
+   -7.124668121337891,
+   -6.201416015625,
+   -10.203682899475098,
+   -6.858526229858398,
+   -2.733592987060547,
+   -5.078882217407227,
+   -9.003358840942383,
+   -4.704894542694092,
+   -3.9085562229156494,
+   -7.247268199920654,
+   -7.091092109680176,
+   -4.4150166511535645,
+   -7.56699275970459,
+   -9.485116004943848,
+   -1.9977033138275146,
+   -6.65272331237793,
+   -2.236643075942993,
+   -7.518955707550049,
+   -5.525973320007324,
+   -4.67877721786499,
+   -6.608670234680176,
+   -5.536133766174316,
+   -10.772479057312012,
+   -10.8853178024292,
+   -3.6156129837036133,
+   -6.751470565795898,
+   -6.4537434577941895,
+   -3.4220399856567383,
+   -8.251005172729492,
+   -3.2146153450012207,
+   -6.330069541931152,
+   -1.5551663637161255,
+   -6.520583629608154,
+   -10.450878143310547,
+   -5.8788957595825195,
+   -3.7398200035095215,
+   -3.9084208011627197,
+   -0.3640081584453583,
+   -6.961522102355957,
+   -6.066243648529053,
+   -7.270624160766602,
+   -5.098455429077148,
+   -2.7642822265625,
+   -5.460171699523926,
+   -7.362828731536865,
+   -2.558631658554077,
+   -2.186410427093506,
+   -2.5309929847717285,
+   -2.46756649017334,
+   -2.0306026935577393,
+   -1.8713470697402954,
+   -2.108008623123169,
+   -1.2698389291763306,
+   -2.1712756156921387,
+   -2.4432802200317383,
+   -1.1477653980255127,
+   -1.8417484760284424,
+   -2.5971946716308594,
+   -1.8250831365585327,
+   -2.103092670440674,
+   -2.5183165073394775,
+   -2.9367291927337646,
+   -1.9412965774536133,
+   -1.7692793607711792,
+   -2.864521026611328,
+   -3.1332175731658936,
+   -1.098311185836792,
+   -2.946441173553467,
+   -2.2800471782684326,
+   -3.1929852962493896,
+   -2.754260778427124,
+   -3.485616445541382,
+   -3.3010287284851074,
+   -2.5537776947021484,
+   -2.6752865314483643,
+   -3.1617612838745117,
+   -2.4571690559387207,
+   -2.060081958770752,
+   -2.425969362258911,
+   -2.212725877761841,
+   -2.4232254028320312,
+   -3.0587053298950195,
+   -2.4074010848999023,
+   -2.457937479019165,
+   -2.319617986679077,
+   -2.6340954303741455,
+   -2.599524736404419,
+   -2.5302212238311768,
+   -1.6849274635314941,
+   -2.2609786987304688,
+   -2.039928674697876,
+   -1.9474098682403564,
+   -2.3550753593444824,
+   -1.718749761581421,
+   -2.413884162902832,
+   -1.6247628927230835,
+   -2.4784040451049805,
+   -1.828325629234314,
+   -1.3880831003189087,
+   -1.4448199272155762,
+   -1.1477117538452148,
+   -1.1669728755950928,
+   -1.8787822723388672,
+   -1.5565840005874634,
+   -1.6666553020477295,
+   -1.747725248336792,
+   -1.959598422050476,
+   -2.0376486778259277,
+   -2.345367431640625,
+   -2.055098533630371,
+   -1.3940613269805908,
+   -3.4385242462158203,
+   -2.7489635944366455,
+   -3.2590157985687256,
+   -3.1128957271575928,
+   -1.7070379257202148,
+   -3.9010369777679443,
+   -3.21574068069458,
+   -3.3850393295288086,
+   -1.8778185844421387,
+   -2.698211908340454,
+   -1.8060741424560547,
+   -2.0845324993133545,
+   -3.4797585010528564,
+   -2.263254404067993,
+   -3.083108901977539,
+   -1.6589758396148682,
+   -2.687279224395752,
+   -1.77505624294281,
+   -2.6142921447753906,
+   -1.934045672416687,
+   -1.8834377527236938,
+   -2.8038980960845947,
+   -1.550542950630188,
+   -3.4054152965545654,
+   -1.724036693572998,
+   -2.3146564960479736,
+   -1.5134503841400146,
+   -2.9289023876190186,
+   -1.5285141468048096,
+   -3.421035051345825,
+   -1.3757282495498657,
+   -3.441431760787964,
+   -1.5286564826965332,
+   -3.4372904300689697,
+   -3.173043966293335,
+   -1.1313854455947876,
+   -1.486415147781372,
+   -2.506413221359253,
+   -3.5165903568267822,
+   -1.4112176895141602,
+   -3.7175354957580566,
+   -2.2170844078063965,
+   -0.704839289188385,
+   -0.6626103520393372,
+   -2.5483946800231934,
+   -0.949668288230896,
+   -3.1339564323425293,
+   -1.3326977491378784,
+   -3.5493476390838623,
+   -0.6785370111465454,
+   -3.236161470413208,
+   -2.1347084045410156,
+   -3.802447557449341,
+   -2.585503339767456,
+   -3.9361765384674072,
+   -2.290905714035034,
+   -3.714280605316162,
+   -1.793616533279419,
+   -3.7252492904663086,
+   -1.450188159942627,
+   -3.11938738822937,
+   -1.25174880027771
+  ]
+ },
+ "32": {
+  "input_prompt": "create a conversational article",
+  "generated_text": " about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article.",
+  "generated_tokens": [
+   2314,
+   1278,
+   17915,
+   1307,
+   1278,
+   9369,
+   1046,
+   1531,
+   9369,
+   2715,
+   1402,
+   2314,
+   1278,
+   17915,
+   1307,
+   1278,
+   9369,
+   1046,
+   1531,
+   9369,
+   2715,
+   1402,
+   2314,
+   1278,
+   17915,
+   1307,
+   1278,
+   9369,
+   1046,
+   1531,
+   9369,
+   2715,
+   1402,
+   2314,
+   1278,
+   17915,
+   1307,
+   1278,
+   9369,
+   1046,
+   1531,
+   9369,
+   2715,
+   1402,
+   2314,
+   1278,
+   17915,
+   1307,
+   1278,
+   9369,
+   1046,
+   1531,
+   9369,
+   2715,
+   1402,
+   2314,
+   1278,
+   17915,
+   1307,
+   1278,
+   9369,
+   1046,
+   1531,
+   9369,
+   2715,
+   1402,
+   2314,
+   1278,
+   17915,
+   1307,
+   1278,
+   9369,
+   1046,
+   1531,
+   9369,
+   2715,
+   1402,
+   2314,
+   1278,
+   17915,
+   1307,
+   1278,
+   9369,
+   1046,
+   1531,
+   9369,
+   2715,
+   1402,
+   2314,
+   1278,
+   17915,
+   1307,
+   1278,
+   9369,
+   1046,
+   1531,
+   9369,
+   2715,
+   1402,
+   2314,
+   1278,
+   17915,
+   1307,
+   1278,
+   9369,
+   1046,
+   1531,
+   9369,
+   2715,
+   1402,
+   2314,
+   1278,
+   17915,
+   1307,
+   1278,
+   9369,
+   1046,
+   1531,
+   9369,
+   2715,
+   1402,
+   2314,
+   1278,
+   17915,
+   1307,
+   1278,
+   9369,
+   1046
+  ],
+  "latency": 48.63822364807129,
+  "cuda_graph_request_count_map": null,
+  "step_count": 6144,
+  "logprobs": [
+   -4.4165568351745605,
+   -11.358176231384277,
+   -0.0701780766248703,
+   -7.797665119171143,
+   -2.6805992126464844,
+   -1.4707680940628052,
+   -3.0390255451202393,
+   -1.6902849674224854,
+   -1.270594835281372,
+   -1.1936196088790894,
+   -1.2523558139801025,
+   -2.7270259857177734,
+   -1.2371309995651245,
+   -0.9618493318557739,
+   -0.4379909038543701,
+   -1.3917063474655151,
+   -1.1055524349212646,
+   -0.9122569561004639,
+   -0.9911308288574219,
+   -0.08436793833971024,
+   -0.5424078106880188,
+   -0.9181017279624939,
+   -0.5873759388923645,
+   -0.19014373421669006,
+   -0.06655456870794296,
+   -0.15252672135829926,
+   -0.09415211528539658,
+   -0.009787309914827347,
+   -0.013910251669585705,
+   -0.005296128336340189,
+   -0.005677408073097467,
+   -0.02013739012181759,
+   -0.21594694256782532,
+   -0.07153760641813278,
+   -0.0066444179974496365,
+   -0.010198505595326424,
+   -0.011980246752500534,
+   -0.003686776151880622,
+   -0.0037619550712406635,
+   -0.0022467151284217834,
+   -0.004088377580046654,
+   -0.021828632801771164,
+   -0.0012669878778979182,
+   -0.09768074005842209,
+   -0.02652405947446823,
+   -0.0019286142196506262,
+   -0.002283824374899268,
+   -0.0032225127797573805,
+   -0.0009741804678924382,
+   -0.0009415484382770956,
+   -0.001211624126881361,
+   -0.001135300612077117,
+   -0.002340436913073063,
+   -0.0010846928926184773,
+   -0.0509282611310482,
+   -0.03832047060132027,
+   -0.00257422705180943,
+   -0.0022806129418313503,
+   -0.00262785074301064,
+   -0.0008195855189114809,
+   -0.0010239601833745837,
+   -0.0013777059502899647,
+   -0.0009899006690829992,
+   -0.0018756669014692307,
+   -0.0015304292319342494,
+   -0.08506463468074799,
+   -0.01893703266978264,
+   -0.0013797297142446041,
+   -0.0014461545506492257,
+   -0.0013971101725474,
+   -0.0005869334563612938,
+   -0.0005212855176068842,
+   -0.000876757490914315,
+   -0.0005256939912214875,
+   -0.0012863941956311464,
+   -0.0015691122971475124,
+   -0.051276568323373795,
+   -0.00973513163626194,
+   -0.0010469438275322318,
+   -0.0011531615164130926,
+   -0.0009969270322471857,
+   -0.00038342276820912957,
+   -0.0004032037395518273,
+   -0.000730247818864882,
+   -0.0003275334893260151,
+   -0.0008700875914655626,
+   -0.0017572689102962613,
+   -0.044393111020326614,
+   -0.013102858327329159,
+   -0.0011463745031505823,
+   -0.0012070996453985572,
+   -0.0012325793504714966,
+   -0.0005048430757597089,
+   -0.0004876854654867202,
+   -0.0007901645149104297,
+   -0.00041500062798149884,
+   -0.0009869233472272754,
+   -0.0018687656847760081,
+   -0.03943866863846779,
+   -0.014425630681216717,
+   -0.0014756753807887435,
+   -0.001423775334842503,
+   -0.001209719106554985,
+   -0.0005046047735959291,
+   -0.00042167355422861874,
+   -0.0007688426994718611,
+   -0.0002699726028367877,
+   -0.0006598440813831985,
+   -0.0017849955474957824,
+   -0.038999658077955246,
+   -0.012665312737226486,
+   -0.0014427024871110916,
+   -0.0014492495683953166,
+   -0.001016576774418354,
+   -0.00042083943844772875,
+   -0.00033241944038309157,
+   -0.0006403064471669495,
+   -0.00022373080719262362,
+   -0.0007053509471006691,
+   -0.0018597226589918137,
+   -0.030997740104794502,
+   -0.011259939521551132,
+   -0.0012655591126531363,
+   -0.00134151556994766,
+   -0.0008106521563604474,
+   -0.0003513672563713044,
+   -0.0002964295563288033,
+   -0.0006368515896610916,
+   -0.00020180096908006817,
+   -0.0005779979983344674,
+   -0.0016014858847483993,
+   -0.0271126888692379
+  ]
+ },
+ "64": {
+  "input_prompt": "App Concept: \"Eggy's Interactive Adventure World\"\n\nEggy's Interactive Adventure World is a cutting-edge app designed for phones and tablets that combines immersive storytelling with engaging, interactive experiences for both parents and children. This innovative app allows users to explore the world of Eggy and participate in a series of adventures that teach essential life skills, foster personal growth, and strengthen parent-child bonds.\n\nKey Features:\n\n1. Interactive Stories: The app brings the 20 Eggy stories to life through interactive, animated storytelling. Users can read, listen, and actively participate in the stories, making choices that guide Eggy through various adventures, each with unique lessons and experiences.\n2. Augmented Reality (AR) Integration: The app utilizes AR technology, allowing users to interact with Eggy and the story characters in their real-world environment. This immersive experience creates a deeper connection with the stories and encourages engagement.\n3. Personalized Adventures: The app customizes each story based on the user's interests, needs, and personal growth areas. By collecting user input, the app tailors the storylines and learning outcomes to ensure that each adventure resonates with the child's unique experiences.\n4. Parent-Child Cooperative Play: The app encourages parents and children to collaborate and engage in the stories together. Through a split-screen mode or multi-device connectivity, parents and children can actively participate in the stories and learn essential life skills together.\n5. Skill-Building Mini-Games: Interspersed throughout the stories are educational mini-games that reinforce the lessons and themes. These games are designed to be fun, engaging, and accessible to children, encouraging them to practice and develop various skills.\n6. Progress Tracking and Rewards: The app tracks the user's progress through the stories and rewards them with badges, trophies, and other collectibles. This gamification aspect encourages continued engagement and fosters a sense of accomplishment and growth.\n7. Community Features: The app provides a safe, moderated online community where parents can share their experiences, discuss the stories and lessons, and connect with other families who are also using the app. This community fosters support, inspiration, and learning.\n8. Expert Resources: The app includes access to resources from experts in child development, psychology, and education, providing parents with additional tools and guidance on fostering the growth and well-being of their children.\n\nEggy's Interactive Adventure World is a one-of-a-kind, mind-blowing app that revolutionizes the way parents and children engage with stories and learn essential life skills. Combining the power of storytelling, cutting-edge technology, and expert resources, this app is a must-have for families looking to strengthen their bonds and support their children's personal growth.",
+  "generated_text": "</s>The 1999\u20132000 season was the 10th season of the National Hockey League (NHL). The 1999\u20132000 season was the 10th season of the National Hockey League (NHL). The 1999\u20132000 season was the 10th season of the National Hockey League (NHL). The 1999\u20132000 season was the 10th season was the 10th season was the 10 season was the 10 season was the 10",
+  "generated_tokens": [
+   2,
+   1784,
+   1032,
+   1049,
+   1057,
+   1057,
+   1057,
+   1882,
+   1050,
+   1048,
+   1048,
+   1048,
+   5526,
+   1486,
+   1278,
+   1032,
+   1049,
+   1048,
+   1411,
+   5526,
+   1307,
+   1278,
+   5805,
+   46982,
+   8525,
+   1319,
+   19644,
+   1076,
+   1577,
+   1531,
+   1032,
+   1049,
+   1057,
+   1057,
+   1057,
+   1882,
+   1050,
+   1048,
+   1048,
+   1048,
+   5526,
+   1486,
+   1278,
+   1032,
+   1049,
+   1048,
+   1411,
+   5526,
+   1307,
+   1278,
+   5805,
+   46982,
+   8525,
+   1319,
+   19644,
+   1076,
+   1577,
+   1531,
+   1032,
+   1049,
+   1057,
+   1057,
+   1057,
+   1882,
+   1050,
+   1048,
+   1048,
+   1048,
+   5526,
+   1486,
+   1278,
+   1032,
+   1049,
+   1048,
+   1411,
+   5526,
+   1307,
+   1278,
+   5805,
+   46982,
+   8525,
+   1319,
+   19644,
+   1076,
+   1577,
+   1531,
+   1032,
+   1049,
+   1057,
+   1057,
+   1057,
+   1882,
+   1050,
+   1048,
+   1048,
+   1048,
+   5526,
+   1486,
+   1278,
+   1032,
+   1049,
+   1048,
+   1411,
+   5526,
+   1486,
+   1278,
+   1032,
+   1049,
+   1048,
+   1411,
+   5526,
+   1486,
+   1278,
+   1032,
+   1049,
+   1048,
+   5526,
+   1486,
+   1278,
+   1032,
+   1049,
+   1048,
+   5526,
+   1486,
+   1278,
+   1032,
+   1049,
+   1048
+  ],
+  "latency": 87.50420427322388,
+  "cuda_graph_request_count_map": null,
+  "step_count": 6144,
+  "logprobs": [
+   -12.107745170593262,
+   -2.9727728366851807,
+   -3.720092535018921,
+   -5.592433929443359,
+   -10.964235305786133,
+   -3.654498338699341,
+   -9.33439826965332,
+   -4.833785057067871,
+   -5.187321662902832,
+   -2.6944785118103027,
+   -6.9262237548828125,
+   -0.654232919216156,
+   -0.5550781488418579,
+   -0.21346639096736908,
+   -0.0134271876886487,
+   -0.010840100236237049,
+   -1.3878544569015503,
+   -0.6296291351318359,
+   -7.9766011238098145,
+   -0.4393192231655121,
+   -5.639142036437988,
+   -3.277270793914795,
+   -1.0206468105316162,
+   -11.703084945678711,
+   -0.7100943922996521,
+   -0.2809169888496399,
+   -2.771284818649292,
+   -7.190817832946777,
+   -4.048691749572754,
+   -0.012056218460202217,
+   -3.3802318572998047,
+   -0.6807184815406799,
+   -3.4844107627868652,
+   -3.312331199645996,
+   -0.5001641511917114,
+   -2.61255145072937,
+   -4.243694305419922,
+   -4.333778381347656,
+   -6.0625810623168945,
+   -0.011777156963944435,
+   -0.37577226758003235,
+   -0.9490834474563599,
+   -3.5450198650360107,
+   -2.1778035163879395,
+   -0.45957911014556885,
+   -3.00771164894104,
+   -1.7600425481796265,
+   -0.09766030311584473,
+   -2.467618942260742,
+   -1.329679012298584,
+   -0.8384320735931396,
+   -1.1864604949951172,
+   -3.628342866897583,
+   -0.2470003068447113,
+   -1.8938640356063843,
+   -5.168431282043457,
+   -0.05005566030740738,
+   -2.258014678955078,
+   -2.449028968811035,
+   -0.0034086955711245537,
+   -3.9485883712768555,
+   -1.6201664209365845,
+   -5.139942646026611,
+   -4.859354496002197,
+   -0.23686674237251282,
+   -0.5541543364524841,
+   -2.5826025009155273,
+   -6.114635467529297,
+   -4.3380208015441895,
+   -0.7412900924682617,
+   -0.3221715986728668,
+   -0.13805493712425232,
+   -4.1797332763671875,
+   -7.3456268310546875,
+   -0.13762745261192322,
+   -2.0905232429504395,
+   -1.0178627967834473,
+   -4.108260631561279,
+   -0.6007124185562134,
+   -1.0410642623901367,
+   -4.122039794921875,
+   -0.35905471444129944,
+   -1.4274661540985107,
+   -4.139932155609131,
+   -0.4237431585788727,
+   -1.6294409036636353,
+   -0.9811424016952515,
+   -4.132790565490723,
+   -1.1318120956420898,
+   -6.8258256912231445,
+   -1.5455098152160645,
+   -0.6984409093856812,
+   -13.664215087890625,
+   -0.1166313961148262,
+   -1.6347849369049072,
+   -0.28875046968460083,
+   -0.03130083531141281,
+   -1.5293006896972656,
+   -1.6488375663757324,
+   -4.224111557006836,
+   -4.760683059692383,
+   -1.9758747816085815,
+   -1.5828256607055664,
+   -2.8463857173919678,
+   -0.2620386481285095,
+   -1.7243889570236206,
+   -1.7945923805236816,
+   -0.8884308338165283,
+   -0.3766394555568695,
+   -0.34033581614494324,
+   -9.05566692352295,
+   -0.22754782438278198,
+   -0.033802058547735214,
+   -0.34108465909957886,
+   -0.5644669532775879,
+   -2.0925779342651367,
+   -4.547505855560303,
+   -10.870464324951172,
+   -1.1072022914886475,
+   -5.503787994384766,
+   -3.259672164916992,
+   -0.007964519783854485,
+   -3.0111639499664307,
+   -4.246737480163574,
+   -0.7813188433647156,
+   -3.331031322479248,
+   -4.485962867736816,
+   -0.9492117166519165,
+   -2.6757047176361084,
+   -1.1591349840164185,
+   -1.122117519378662,
+   -2.629878044128418,
+   -5.986321926116943,
+   -0.2146703153848648,
+   -0.002392764901742339,
+   -7.372479438781738,
+   -0.007077385671436787,
+   -0.06599216908216476,
+   -0.0970711037516594,
+   -3.2874932289123535,
+   -0.0019583588000386953,
+   -0.9122000336647034,
+   -4.930907249450684,
+   -0.019508399069309235,
+   -0.308611661195755,
+   -0.07778516411781311,
+   -3.8497893810272217,
+   -0.46124517917633057,
+   -0.38821348547935486,
+   -2.668412208557129,
+   -1.845987319946289,
+   -0.06470083445310593,
+   -0.006619549356400967,
+   -1.2610487937927246,
+   -0.13015533983707428,
+   -3.365312099456787,
+   -0.0014690094394609332,
+   -1.6789823770523071,
+   -1.2499005794525146,
+   -3.3992111682891846,
+   -5.563300132751465,
+   -0.823418140411377,
+   -4.24124813079834,
+   -1.6597849130630493,
+   -0.6941139698028564,
+   -1.5637556314468384,
+   -0.5482053756713867,
+   -0.9507225751876831,
+   -3.764758586883545,
+   -0.0006518622976727784,
+   -0.7540555000305176,
+   -5.058262825012207,
+   -0.3302401602268219,
+   -2.8130555152893066,
+   -0.17079885303974152,
+   -2.871047019958496,
+   -0.3991694450378418,
+   -3.1476998329162598,
+   -0.3488404452800751,
+   -2.0545666217803955,
+   -4.201597690582275,
+   -5.164614677429199,
+   -0.0271432027220726,
+   -0.0009785869624465704,
+   -3.3444161415100098,
+   -1.3117046356201172,
+   -6.375423431396484,
+   -0.05535568296909332,
+   -0.3919340968132019,
+   -0.060594215989112854,
+   -6.507473468780518,
+   -0.0023910999298095703,
+   -2.143423318862915,
+   -3.335618257522583,
+   -2.953970432281494,
+   -0.0013383012264966965,
+   -0.8080525398254395,
+   -0.29526084661483765,
+   -0.04036511853337288,
+   -3.231475353240967,
+   -1.0585589408874512,
+   -6.136373043060303,
+   -0.006182829383760691,
+   -0.035548023879528046,
+   -5.509808540344238,
+   -1.8490750789642334,
+   -9.83314037322998,
+   -0.07037576287984848,
+   -3.1621387004852295,
+   -6.762360095977783,
+   -1.3490527868270874,
+   -3.601043462753296,
+   -1.176393985748291,
+   -0.4342959523200989,
+   -0.06266004592180252,
+   -5.464046001434326,
+   -0.017946599051356316,
+   -1.0416009426116943,
+   -1.6117159128189087,
+   -12.289417266845703,
+   -1.5004339218139648,
+   -5.76563835144043,
+   -4.038386821746826,
+   -0.20812086760997772,
+   -3.6306562423706055,
+   -1.3901070356369019,
+   -1.087137222290039,
+   -2.423213243484497,
+   -4.503086090087891,
+   -0.0008031480247154832,
+   -0.03627370297908783,
+   -0.1653430461883545,
+   -7.958648681640625,
+   -1.1018548011779785,
+   -1.290948748588562,
+   -3.8049263954162598,
+   -1.8253734111785889,
+   -0.059022851288318634,
+   -0.0013984196120873094,
+   -4.698851585388184,
+   -2.5421664714813232,
+   -0.024493809789419174,
+   -4.828659534454346,
+   -3.0295286178588867,
+   -3.550312042236328,
+   -0.1185273677110672,
+   -0.22595760226249695,
+   -0.10782183706760406,
+   -1.4033282995224,
+   -0.4485701024532318,
+   -0.2889708876609802,
+   -0.05471855774521828,
+   -0.007632025051862001,
+   -2.1156554222106934,
+   -0.6249589323997498,
+   -4.198577404022217,
+   -0.14178156852722168,
+   -4.284021377563477,
+   -2.227515935897827,
+   -3.5022120475769043,
+   -0.19575819373130798,
+   -15.964509963989258,
+   -4.055960655212402,
+   -11.125024795532227,
+   -0.7681724429130554,
+   -3.0436902046203613,
+   -7.030262470245361,
+   -4.376729488372803,
+   -5.476145267486572,
+   -0.4219042658805847,
+   -3.7689766883850098,
+   -0.060010604560375214,
+   -0.8134393692016602,
+   -0.11386934667825699,
+   -0.025473715737462044,
+   -0.09736856073141098,
+   -4.357361793518066,
+   -0.3670865297317505,
+   -0.08063744008541107,
+   -0.1311480849981308,
+   -1.0903867483139038,
+   -1.2705107927322388,
+   -1.5076212882995605,
+   -4.295275688171387,
+   -0.04185756668448448,
+   -0.19810955226421356,
+   -1.9645220041275024,
+   -0.9597910642623901,
+   -0.13429655134677887,
+   -0.002283110748976469,
+   -7.066074371337891,
+   -3.639211654663086,
+   -1.0263917446136475,
+   -8.124760627746582,
+   -1.132537841796875,
+   -0.09160765260457993,
+   -0.08996370434761047,
+   -10.165366172790527,
+   -3.501585006713867,
+   -0.0019847711082547903,
+   -0.05309417471289635,
+   -0.31209683418273926,
+   -0.15089339017868042,
+   -1.23564875125885,
+   -1.2685208320617676,
+   -7.832758903503418,
+   -0.19271136820316315,
+   -0.014305183663964272,
+   -0.0007532381569035351,
+   -0.44688940048217773,
+   -2.6239724159240723,
+   -1.738666296005249,
+   -1.6480977535247803,
+   -0.46753185987472534,
+   -8.656959533691406,
+   -3.79868483543396,
+   -0.9281394481658936,
+   -2.2381181716918945,
+   -1.7654449939727783,
+   -0.4948798418045044,
+   -0.025028761476278305,
+   -1.5435361862182617,
+   -1.6390818357467651,
+   -1.4962153434753418,
+   -0.3425217270851135,
+   -0.013077914714813232,
+   -0.038474079221487045,
+   -5.3364362716674805,
+   -0.42365288734436035,
+   -1.884093999862671,
+   -3.510357618331909,
+   -6.198029518127441,
+   -0.44375038146972656,
+   -0.0008789013954810798,
+   -3.6025230884552,
+   -1.419615626335144,
+   -2.6723289489746094,
+   -5.775190830230713,
+   -1.1380761861801147,
+   -2.6683366298675537,
+   -0.43395891785621643,
+   -0.003145867260172963,
+   -8.63144302368164,
+   -1.646262764930725,
+   -1.732487678527832,
+   -4.561546802520752,
+   -0.5277953147888184,
+   -0.07333153486251831,
+   -0.5624169707298279,
+   -0.12201295047998428,
+   -2.6561455726623535,
+   -1.1071691513061523,
+   -2.6895060539245605,
+   -0.040864069014787674,
+   -0.04126371443271637,
+   -1.8294739723205566,
+   -0.09022177755832672,
+   -0.3154001832008362,
+   -0.46215569972991943,
+   -2.2462844848632812,
+   -0.30149081349372864,
+   -0.52588951587677,
+   -8.288043975830078,
+   -0.0002057340752799064,
+   -0.8021711707115173,
+   -4.4546098709106445,
+   -0.0001565095444675535,
+   -0.0015961299650371075,
+   -0.15216240286827087,
+   -0.3677564561367035,
+   -5.018707275390625,
+   -0.7850045561790466,
+   -1.9582659006118774,
+   -1.0046892166137695,
+   -10.0401029586792,
+   -0.16878114640712738,
+   -5.944240570068359,
+   -1.5523078441619873,
+   -5.7253522872924805,
+   -0.47948503494262695,
+   -0.44009655714035034,
+   -5.671053886413574,
+   -0.003280022880062461,
+   -0.7937742471694946,
+   -0.9639376401901245,
+   -0.00030048147891648114,
+   -1.0747740268707275,
+   -0.8839919567108154,
+   -3.416811466217041,
+   -1.6602673530578613,
+   -0.2706959843635559,
+   -0.0024333172477781773,
+   -4.478696823120117,
+   -6.20179557800293,
+   -0.11359559744596481,
+   -0.202009916305542,
+   -0.022310219705104828,
+   -2.367263078689575,
+   -1.0405994653701782,
+   -5.984308242797852,
+   -2.105138063430786,
+   -9.583202362060547,
+   -0.0004957877099514008,
+   -3.0655455589294434,
+   -0.0669412910938263,
+   -0.8977450728416443,
+   -2.2271294593811035,
+   -2.6617536544799805,
+   -1.8184051513671875,
+   -0.8291114568710327,
+   -0.4864235818386078,
+   -0.7993525862693787,
+   -3.51106858253479,
+   -2.1530935764312744,
+   -0.257144957780838,
+   -1.3934082984924316,
+   -1.3137131929397583,
+   -0.3384077548980713,
+   -0.1697217971086502,
+   -2.353395938873291,
+   -0.03406282886862755,
+   -0.39059701561927795,
+   -3.422821044921875,
+   -1.7117210626602173,
+   -0.7018465399742126,
+   -1.5995906591415405,
+   -3.6218395233154297,
+   -0.12497704476118088,
+   -0.16966234147548676,
+   -0.7313685417175293,
+   -0.4956285357475281,
+   -1.0840849876403809,
+   -5.042126655578613,
+   -0.00031704644788987935,
+   -7.683258056640625,
+   -0.9210801720619202,
+   -4.687852382659912,
+   -0.0028814247343689203,
+   -0.043382611125707626,
+   -4.1948652267456055,
+   -2.66593337059021,
+   -0.06153333932161331,
+   -0.0023110604379326105,
+   -6.729236602783203,
+   -5.777127742767334,
+   -0.08932067453861237,
+   -0.09890018403530121,
+   -0.009886111132800579,
+   -3.1145148277282715,
+   -3.725565195083618,
+   -0.0021998509764671326,
+   -3.9927196502685547,
+   -2.753793239593506,
+   -1.6037236452102661,
+   -0.17461130023002625,
+   -4.804804801940918,
+   -0.2311229705810547,
+   -0.30256444215774536,
+   -2.235363006591797,
+   -0.006614102050662041,
+   -0.34757524728775024,
+   -1.4946835041046143,
+   -1.222062587738037,
+   -3.658839225769043,
+   -1.356170892715454,
+   -0.5371109843254089,
+   -3.7580835819244385,
+   -4.54621696472168,
+   -0.31577637791633606,
+   -3.677156925201416,
+   -2.7181396484375,
+   -7.4674882888793945,
+   -0.00019369633810129017,
+   -2.3798398971557617,
+   -2.5452184677124023,
+   -0.2858496308326721,
+   -4.315659523010254,
+   -0.025835415348410606,
+   -0.000603493710514158,
+   -0.2546294331550598,
+   -0.12032663822174072,
+   -2.006908655166626,
+   -5.990736961364746,
+   -7.146596908569336,
+   -0.23356498777866364,
+   -0.2201036810874939,
+   -0.01235415879637003,
+   -0.011248741298913956,
+   -1.4155778884887695,
+   -0.40242519974708557,
+   -5.877886772155762,
+   -0.7865053415298462,
+   -0.03231288120150566,
+   -0.004864405374974012,
+   -0.0050629740580916405,
+   -2.7049152851104736,
+   -6.822089195251465,
+   -0.39252761006355286,
+   -1.2290617227554321,
+   -0.007630132604390383,
+   -3.485461711883545,
+   -0.47985684871673584,
+   -6.1813530921936035,
+   -0.03757825121283531,
+   -0.37834712862968445,
+   -0.22192610800266266,
+   -1.165318489074707,
+   -0.5220151543617249,
+   -0.1289423257112503,
+   -3.216222047805786,
+   -1.0787583589553833,
+   -3.0716826915740967,
+   -0.6023419499397278,
+   -2.558605194091797,
+   -0.927433431148529,
+   -0.00364841241389513,
+   -0.14910078048706055,
+   -0.7318926453590393,
+   -6.159773826599121,
+   -0.0015301911626011133,
+   -1.8908276557922363,
+   -1.9641315937042236,
+   -0.021651331335306168,
+   -2.1648828983306885,
+   -2.2700207233428955,
+   -7.833290100097656,
+   -0.03397307172417641,
+   -0.8344621658325195,
+   -0.02225659228861332,
+   -0.06639260798692703,
+   -2.3780317306518555,
+   -3.180129051208496,
+   -0.09030630439519882,
+   -2.4138312339782715,
+   -1.3445552587509155,
+   -1.848326325416565,
+   -0.9726964831352234,
+   -2.851792335510254,
+   -0.0630769282579422,
+   -0.0011394681641831994,
+   -0.05843213573098183,
+   -2.6616668701171875,
+   -1.575437068939209,
+   -0.180197611451149,
+   -5.552371501922607,
+   -0.26108410954475403,
+   -2.529611587524414,
+   -0.37780019640922546,
+   -5.141795635223389,
+   -0.5921107530593872,
+   -0.2474975287914276,
+   -0.10687454044818878,
+   -4.891775131225586,
+   -0.25011152029037476,
+   -2.4100728034973145,
+   -1.358667016029358,
+   -2.790961503982544,
+   -3.8654675483703613,
+   -1.0076243877410889,
+   -0.7456949949264526,
+   -1.5575554370880127,
+   -2.05328631401062,
+   -1.6538066864013672,
+   -0.0558217354118824,
+   -0.0001817776501411572,
+   -0.0011643542675301433,
+   -0.038359593600034714,
+   -1.4208931922912598,
+   -0.542127251625061,
+   -0.3162364959716797,
+   -0.3966117799282074,
+   -1.1765563488006592,
+   -1.7920958995819092,
+   -0.18425509333610535,
+   -0.1092008650302887,
+   -0.46676987409591675,
+   -0.24977745115756989,
+   -1.0375996828079224,
+   -0.5268858671188354,
+   -0.008942908607423306,
+   -0.6404479146003723,
+   -0.0033111530356109142,
+   -5.3165931603871286e-05,
+   -0.5154370665550232,
+   -0.39286962151527405,
+   -1.401839256286621,
+   -0.6232213973999023,
+   -0.02168831042945385,
+   -0.004282470792531967,
+   -0.005199837032705545,
+   -0.09748794883489609,
+   -0.040823787450790405,
+   -0.00014852374442853034,
+   -0.0005832401220686734,
+   -0.005303124897181988,
+   -0.6537013053894043,
+   -0.38026049733161926,
+   -0.04189129173755646,
+   -0.010385753586888313,
+   -0.008756335824728012,
+   -0.013362848199903965,
+   -0.000504723924677819,
+   -0.002797620603814721,
+   -0.0014512732159346342,
+   -0.0013321106089279056,
+   -0.010883613489568233,
+   -0.005159396678209305,
+   -0.004701037425547838,
+   -0.01591104455292225,
+   -0.001474246964789927,
+   -1.2278481335670222e-05,
+   -0.010548785328865051,
+   -0.08341525495052338,
+   -0.03858809545636177,
+   -0.056062061339616776,
+   -0.0009532198309898376,
+   -0.0005789510905742645,
+   -0.0008986725588329136,
+   -0.00710969977080822,
+   -0.0006561510381288826,
+   -1.4781842764932662e-05,
+   -5.578839045483619e-05,
+   -0.0006398299592547119,
+   -0.0028786908369511366,
+   -0.0034092895220965147,
+   -0.008268529549241066,
+   -0.006602259818464518,
+   -0.004517706111073494,
+   -0.02233586646616459,
+   -0.0006323245470412076,
+   -0.009195122867822647,
+   -0.0029284947086125612,
+   -0.004457537550479174,
+   -0.017873765900731087,
+   -0.008801711723208427,
+   -0.0036383166443556547,
+   -0.08078611642122269,
+   -0.006347495596855879,
+   -0.0002177716523874551,
+   -0.04688572511076927,
+   -0.2718890309333801,
+   -0.07996802777051926,
+   -0.04674842208623886,
+   -0.009984076954424381,
+   -0.010000954382121563,
+   -0.050126753747463226,
+   -0.5864179730415344,
+   -0.2915390133857727,
+   -0.008090462535619736,
+   -0.032190412282943726,
+   -0.03461571782827377,
+   -0.2785419523715973,
+   -0.05830562859773636,
+   -0.02893950417637825,
+   -0.12241066247224808,
+   -0.02711048536002636,
+   -0.16450686752796173,
+   -0.09852994978427887,
+   -0.2651047706604004,
+   -0.35559725761413574,
+   -0.12606258690357208,
+   -0.32793670892715454,
+   -0.20878805220127106,
+   -0.7587923407554626,
+   -0.4730657637119293,
+   -1.496794581413269,
+   -0.2486363798379898,
+   -0.20723387598991394,
+   -0.1872958242893219,
+   -0.19151091575622559,
+   -0.3350580036640167,
+   -1.3085839748382568,
+   -0.6109102964401245,
+   -0.2947172224521637,
+   -0.37130236625671387,
+   -0.19888469576835632,
+   -0.18297068774700165,
+   -0.9978674054145813,
+   -0.5471905469894409,
+   -0.4379975199699402,
+   -0.407988041639328,
+   -0.17325688898563385,
+   -0.16938896477222443
+  ]
+ },
+ "96": {
+  "input_prompt": "what is the concept of double materiality in sustainability?",
+  "generated_text": " What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double material",
+  "generated_tokens": [
+   5675,
+   1395,
+   1278,
+   7401,
+   1307,
+   5558,
+   4318,
+   1532,
+   1294,
+   60187,
+   1063,
+   5675,
+   1395,
+   1278,
+   7401,
+   1307,
+   5558,
+   4318,
+   1532,
+   1294,
+   60187,
+   1063,
+   5675,
+   1395,
+   1278,
+   7401,
+   1307,
+   5558,
+   4318,
+   1532,
+   1294,
+   60187,
+   1063,
+   5675,
+   1395,
+   1278,
+   7401,
+   1307,
+   5558,
+   4318,
+   1532,
+   1294,
+   60187,
+   1063,
+   5675,
+   1395,
+   1278,
+   7401,
+   1307,
+   5558,
+   4318,
+   1532,
+   1294,
+   60187,
+   1063,
+   5675,
+   1395,
+   1278,
+   7401,
+   1307,
+   5558,
+   4318,
+   1532,
+   1294,
+   60187,
+   1063,
+   5675,
+   1395,
+   1278,
+   7401,
+   1307,
+   5558,
+   4318,
+   1532,
+   1294,
+   60187,
+   1063,
+   5675,
+   1395,
+   1278,
+   7401,
+   1307,
+   5558,
+   4318,
+   1532,
+   1294,
+   60187,
+   1063,
+   5675,
+   1395,
+   1278,
+   7401,
+   1307,
+   5558,
+   4318,
+   1532,
+   1294,
+   60187,
+   1063,
+   5675,
+   1395,
+   1278,
+   7401,
+   1307,
+   5558,
+   4318,
+   1532,
+   1294,
+   60187,
+   1063,
+   5675,
+   1395,
+   1278,
+   7401,
+   1307,
+   5558,
+   4318,
+   1532,
+   1294,
+   60187,
+   1063,
+   5675,
+   1395,
+   1278,
+   7401,
+   1307,
+   5558,
+   4318
+  ],
+  "latency": 126.4328100681305,
+  "cuda_graph_request_count_map": null,
+  "step_count": 6144,
+  "logprobs": [
+   -4.917365074157715,
+   -0.9960631132125854,
+   -7.875392913818359,
+   -0.2993181347846985,
+   -7.760880470275879,
+   -10.308395385742188,
+   -2.1807961463928223,
+   -1.6412583589553833,
+   -9.521512985229492,
+   -1.627489447593689,
+   -1.8410861492156982,
+   -0.9285702705383301,
+   -0.2576955556869507,
+   -0.9641067981719971,
+   -0.02314644306898117,
+   -0.6696561574935913,
+   -0.07035009562969208,
+   -0.004622488282620907,
+   -0.025748632848262787,
+   -0.06276137381792068,
+   -0.17385317385196686,
+   -0.3285445272922516,
+   -0.0592009499669075,
+   -0.007940039038658142,
+   -0.22664028406143188,
+   -0.0017957051750272512,
+   -0.022929180413484573,
+   -0.005733947269618511,
+   -0.0012996093137189746,
+   -0.006419987417757511,
+   -0.02376849390566349,
+   -0.27800270915031433,
+   -0.4650723934173584,
+   -0.04936715215444565,
+   -0.003972141072154045,
+   -0.01477995328605175,
+   -0.0012044801842421293,
+   -0.014891182072460651,
+   -0.002709767082706094,
+   -0.0009939497103914618,
+   -0.0028436246793717146,
+   -0.006759870797395706,
+   -0.15416178107261658,
+   -0.20121537148952484,
+   -0.016414370387792587,
+   -0.0015769677702337503,
+   -0.008138825185596943,
+   -0.0007713441736996174,
+   -0.013819841668009758,
+   -0.003826678032055497,
+   -0.0005918181850574911,
+   -0.0014938872773200274,
+   -0.00485716899856925,
+   -0.081083282828331,
+   -0.09642580896615982,
+   -0.009630884043872356,
+   -0.0010948146227747202,
+   -0.007085552904754877,
+   -0.0006310140597634017,
+   -0.013073914684355259,
+   -0.0039152647368609905,
+   -0.000364713923772797,
+   -0.001292108790948987,
+   -0.004158303141593933,
+   -0.044283974915742874,
+   -0.05722038820385933,
+   -0.006369172595441341,
+   -0.0007976687629707158,
+   -0.005993015132844448,
+   -0.0004935238393954933,
+   -0.011310506612062454,
+   -0.002951553324237466,
+   -0.000387831823900342,
+   -0.000977038755081594,
+   -0.0036971091758459806,
+   -0.030511993914842606,
+   -0.04246694967150688,
+   -0.004863100592046976,
+   -0.0006927236099727452,
+   -0.005206122528761625,
+   -0.0005129451747052372,
+   -0.00894621666520834,
+   -0.0028565814718604088,
+   -0.00041333239641971886,
+   -0.0009002208826132119,
+   -0.0033131728414446115,
+   -0.021188799291849136,
+   -0.03330245241522789,
+   -0.0038543473929166794,
+   -0.0006504327175207436,
+   -0.004474864806979895,
+   -0.00048029806930571795,
+   -0.009718249551951885,
+   -0.0030443770810961723,
+   -0.0003743662964552641,
+   -0.0009439303539693356,
+   -0.003729770192876458,
+   -0.016505014151334763,
+   -0.0290373582392931,
+   -0.003315192647278309,
+   -0.0005821678787469864,
+   -0.004148805979639292,
+   -0.00042489083716645837,
+   -0.006856840569525957,
+   -0.0028660909738391638,
+   -0.00032574593205936253,
+   -0.0006986799417063594,
+   -0.003671098267659545,
+   -0.012792548164725304,
+   -0.02553274855017662,
+   -0.002730690874159336,
+   -0.0005067494930699468,
+   -0.0036923582665622234,
+   -0.0004451475979294628,
+   -0.007225453853607178,
+   -0.002746859099715948,
+   -0.0003323002893012017,
+   -0.0008517451351508498,
+   -0.003630714723840356,
+   -0.011284693144261837,
+   -0.02186688780784607,
+   -0.0025819556321948767,
+   -0.0004931663861498237,
+   -0.0031721293926239014,
+   -0.00040725519647821784,
+   -0.0062383925542235374,
+   -0.00238693761639297,
+   -0.00040749352774582803,
+   -0.0007970731821842492,
+   -0.003604583442211151,
+   -0.010075542144477367,
+   -0.022386692464351654,
+   -0.0025295186787843704,
+   -0.00046302087139338255,
+   -0.003138143103569746,
+   -0.0004233417857903987,
+   -0.006178564392030239,
+   -0.0021037368569523096
+  ]
+ },
+ "127": {
+  "input_prompt": "as before we talk about using , i usually do if else() to in case the property would be null before.\nif today i public a property and i want to use using to instead , will code be like this?\n\npublic Material Mat;\n\nusing(Mat){\n// Do something;\n}\n\nis that right?",
+  "generated_text": " i am not sure if i am using the wrong syntax or if i am using the syntax correctly but i am not sure if i am using the syntax correctly or if i am using the syntax incorrectly.\n\nI am not sure if i am using the wrong syntax or if i am using the syntax correctly but i am not sure if i am using the syntax correctly or if i am using the syntax incorrectly.\n\nI am not sure if i am using the wrong syntax or if i am using the syntax correctly but i am not sure if i am using the syntax correctly or if i am using the syntax incorrectly.\n\nI am not sure if i am using",
+  "generated_tokens": [
+   1623,
+   1855,
+   1605,
+   5257,
+   1693,
+   1623,
+   1855,
+   2505,
+   1278,
+   8462,
+   22692,
+   1505,
+   1693,
+   1623,
+   1855,
+   2505,
+   1278,
+   22692,
+   17047,
+   1809,
+   1623,
+   1855,
+   1605,
+   5257,
+   1693,
+   1623,
+   1855,
+   2505,
+   1278,
+   22692,
+   17047,
+   1505,
+   1693,
+   1623,
+   1855,
+   2505,
+   1278,
+   22692,
+   73751,
+   1338,
+   1073,
+   1855,
+   1605,
+   5257,
+   1693,
+   1623,
+   1855,
+   2505,
+   1278,
+   8462,
+   22692,
+   1505,
+   1693,
+   1623,
+   1855,
+   2505,
+   1278,
+   22692,
+   17047,
+   1809,
+   1623,
+   1855,
+   1605,
+   5257,
+   1693,
+   1623,
+   1855,
+   2505,
+   1278,
+   22692,
+   17047,
+   1505,
+   1693,
+   1623,
+   1855,
+   2505,
+   1278,
+   22692,
+   73751,
+   1338,
+   1073,
+   1855,
+   1605,
+   5257,
+   1693,
+   1623,
+   1855,
+   2505,
+   1278,
+   8462,
+   22692,
+   1505,
+   1693,
+   1623,
+   1855,
+   2505,
+   1278,
+   22692,
+   17047,
+   1809,
+   1623,
+   1855,
+   1605,
+   5257,
+   1693,
+   1623,
+   1855,
+   2505,
+   1278,
+   22692,
+   17047,
+   1505,
+   1693,
+   1623,
+   1855,
+   2505,
+   1278,
+   22692,
+   73751,
+   1338,
+   1073,
+   1855,
+   1605,
+   5257,
+   1693,
+   1623,
+   1855,
+   2505
+  ],
+  "latency": 155.6906189918518,
+  "cuda_graph_request_count_map": null,
+  "step_count": 6144,
+  "logprobs": [
+   -7.793755531311035,
+   -4.224940776824951,
+   -5.446587562561035,
+   -0.267395555973053,
+   -6.118141174316406,
+   -11.105525970458984,
+   -6.67517614364624,
+   -6.902530670166016,
+   -3.2218151092529297,
+   -5.99644660949707,
+   -8.431673049926758,
+   -8.04836368560791,
+   -3.992594003677368,
+   -7.550079345703125,
+   -2.873685836791992,
+   -3.3877997398376465,
+   -7.9306745529174805,
+   -7.005484580993652,
+   -1.0481306314468384,
+   -2.686237335205078,
+   -6.131283760070801,
+   -6.2994704246521,
+   -7.931419372558594,
+   -11.3147554397583,
+   -8.544670104980469,
+   -12.01729679107666,
+   -3.89847469329834,
+   -1.7964364290237427,
+   -2.952878952026367,
+   -1.9217232465744019,
+   -2.272329330444336,
+   -0.37552154064178467,
+   -1.667820692062378,
+   -7.510344505310059,
+   -3.498040199279785,
+   -7.980632305145264,
+   -7.672002792358398,
+   -4.4999470710754395,
+   -7.155375003814697,
+   -2.4486124515533447,
+   -4.785946846008301,
+   -1.153855800628662,
+   -2.3994438648223877,
+   -4.250652313232422,
+   -12.24446964263916,
+   -8.344388008117676,
+   -2.608186721801758,
+   -5.200589179992676,
+   -8.25888442993164,
+   -3.6245617866516113,
+   -7.689338207244873,
+   -7.345355033874512,
+   -1.2661759853363037,
+   -7.265620231628418,
+   -1.9884108304977417,
+   -6.269482612609863,
+   -2.41705584526062,
+   -1.8929681777954102,
+   -1.8259913921356201,
+   -2.0997350215911865,
+   -2.323200225830078,
+   -1.3998825550079346,
+   -0.8789899945259094,
+   -1.082053542137146,
+   -1.1831339597702026,
+   -1.4462857246398926,
+   -1.6481035947799683,
+   -1.4408715963363647,
+   -1.2603964805603027,
+   -1.5267670154571533,
+   -1.6345772743225098,
+   -1.3796477317810059,
+   -0.7609691023826599,
+   -0.3548354506492615,
+   -0.7552334666252136,
+   -0.44776833057403564,
+   -1.1078286170959473,
+   -1.3036658763885498,
+   -0.5214896202087402,
+   -0.8486822843551636,
+   -0.22470997273921967,
+   -0.4705755412578583,
+   -0.5639711022377014,
+   -0.5388108491897583,
+   -0.6052999496459961,
+   -0.1002030223608017,
+   -0.286334365606308,
+   -0.45798981189727783,
+   -1.0107953548431396,
+   -0.11875647306442261,
+   -0.6969441771507263,
+   -0.4609107971191406,
+   -0.07614769786596298,
+   -0.5035472512245178,
+   -0.1682187020778656,
+   -0.10476160794496536,
+   -0.6586751341819763,
+   -0.35806939005851746,
+   -1.5364394187927246,
+   -2.4093759059906006,
+   -1.977368950843811,
+   -1.6216907501220703,
+   -0.27647316455841064,
+   -0.2991848587989807,
+   -0.2783535420894623,
+   -0.05913994088768959,
+   -0.03023873083293438,
+   -0.043339803814888,
+   -0.7320341467857361,
+   -0.0030677898321300745,
+   -0.0332595594227314,
+   -0.012804670259356499,
+   -0.004041599575430155,
+   -0.0014899593079462647,
+   -0.001948602613992989,
+   -0.0029070996679365635,
+   -0.040939707309007645,
+   -0.013942227698862553,
+   -0.04897322878241539,
+   -0.011005887761712074,
+   -0.0044113704934716225,
+   -0.0013179434463381767,
+   -0.003658389439806342,
+   -0.009758152067661285,
+   -0.0014104428701102734,
+   -0.0016671819612383842,
+   -0.000771939754486084,
+   -0.0015519729349762201,
+   -0.003720743814483285,
+   -0.004249115474522114,
+   -0.00485657574608922,
+   -0.005053604021668434,
+   -0.002336274366825819,
+   -0.0009155849111266434,
+   -0.0004978132783435285,
+   -0.0005953923100605607,
+   -0.0011395872570574284,
+   -0.001485078944824636,
+   -0.3072909712791443,
+   -1.7295066118240356,
+   -0.4807289242744446,
+   -0.1245415136218071,
+   -0.011858444660902023,
+   -0.020613837987184525,
+   -0.011020978912711143,
+   -0.003106294432654977,
+   -0.0009966888464987278,
+   -0.0019349202048033476,
+   -0.037407051771879196,
+   -0.0003496989083942026,
+   -0.005922981072217226,
+   -0.007394562941044569,
+   -0.0006037319544702768,
+   -0.0008836655179038644,
+   -0.0002884448622353375,
+   -0.00047600860125385225,
+   -0.0024947968777269125,
+   -0.00442774873226881,
+   -0.004059052560478449,
+   -0.0018594847060739994,
+   -0.0006179092451930046,
+   -0.00022635281493421644,
+   -0.0006730675231665373,
+   -0.003022746881470084,
+   -0.0002343380037928,
+   -0.00047791501856409013,
+   -9.440929716220126e-05,
+   -0.00021550717065110803,
+   -0.0013523490633815527,
+   -0.0032202552538365126,
+   -0.001157686347141862,
+   -0.004449942149221897,
+   -0.0016590891173109412,
+   -0.00101062236353755,
+   -0.0003079893649555743,
+   -0.00048375347978435457,
+   -0.0021734442561864853,
+   -0.00423036003485322,
+   -0.11514264345169067,
+   -0.8658493757247925,
+   -0.084366075694561,
+   -0.02140468917787075,
+   -0.0060798698104918,
+   -0.008638513274490833,
+   -0.003212531330063939,
+   -0.0009598892065696418,
+   -0.00032085992279462516
+  ]
+ },
+ "throughput": [
+  93.24123994187065,
+  104.94118337233992,
+  105.03843789693171
+ ]
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_583m_logitsmatch/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_583m_logitsmatch/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..066995bd666
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_583m_logitsmatch/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,170 @@
+{
+ "0": {
+  "input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.",
+  "generated_text": " And that this is the place where you can be yourself, and be yourself in the most beautiful way. And that this is the place where you can",
+  "generated_tokens": [
+   3060,
+   1455,
+   1593,
+   1395,
+   1278,
+   3535,
+   2478,
+   1636,
+   1710,
+   1402,
+   14019,
+   1044,
+   1321,
+   1402,
+   14019,
+   1294,
+   1278,
+   2725,
+   15568,
+   3039,
+   1046,
+   3060,
+   1455,
+   1593,
+   1395,
+   1278,
+   3535,
+   2478,
+   1636,
+   1710
+  ],
+  "latency": 2.1998238563537598,
+  "cuda_graph_request_count_map": null,
+  "step_count": 240,
+  "logprobs": [
+   -9.358587265014648,
+   -2.7594826221466064,
+   -4.608366012573242,
+   -1.4093360900878906,
+   -0.6152952313423157,
+   -1.7217562198638916,
+   -2.496668815612793,
+   -2.0547454357147217,
+   -2.441960573196411,
+   -6.280838966369629,
+   -1.5643692016601562,
+   -3.462346076965332,
+   -4.428728103637695,
+   -3.8633861541748047,
+   -1.9936373233795166,
+   -1.8929449319839478,
+   -3.796365737915039,
+   -6.8360137939453125,
+   -0.2901247441768646,
+   -0.9246833324432373,
+   -6.633338928222656,
+   -7.166708469390869,
+   -12.771251678466797,
+   -2.198296308517456,
+   -3.7778120040893555,
+   -0.4983733296394348,
+   -4.381269454956055,
+   -0.0666784718632698,
+   -0.09580295532941818,
+   -3.2437636852264404,
+   -10.079947471618652,
+   -1.172220230102539,
+   -5.977442741394043,
+   -5.046236038208008,
+   -3.855658531188965,
+   -2.5585858821868896,
+   -3.356245994567871,
+   -5.557229518890381,
+   -1.6787731647491455,
+   -5.483290672302246,
+   -12.218501091003418,
+   -12.61402702331543,
+   -0.09662941098213196,
+   -2.5431432723999023,
+   -1.4071024656295776,
+   -2.9154715538024902,
+   -1.1964417695999146,
+   -0.006458481773734093,
+   -3.3625335693359375,
+   -13.262511253356934,
+   -4.314079761505127,
+   -2.617699146270752,
+   -5.987792015075684,
+   -0.778266429901123,
+   -0.048888545483350754,
+   -1.548882007598877,
+   -1.1381981372833252,
+   -5.627166748046875,
+   -0.4078553318977356,
+   -4.958505630493164,
+   -0.6187160611152649,
+   -0.7174848914146423,
+   -2.469533920288086,
+   -13.620073318481445,
+   -0.09088654816150665,
+   -3.526974678039551,
+   -1.4195809364318848,
+   -6.402483940124512,
+   -0.5898402333259583,
+   -3.565917491912842,
+   -0.8561318516731262,
+   -1.6140165328979492,
+   -5.370549201965332,
+   -17.159223556518555,
+   -6.583524703979492,
+   -0.8855001926422119,
+   -4.19431209564209,
+   -1.2012220621109009,
+   -2.2563133239746094,
+   -1.7674944400787354,
+   -0.22064533829689026,
+   -9.292220115661621,
+   -0.12445646524429321,
+   -7.29617977142334,
+   -2.526529312133789,
+   -4.071560859680176,
+   -3.5568013191223145,
+   -1.926215410232544,
+   -2.349026918411255,
+   -2.2132363319396973,
+   -0.3125414550304413,
+   -1.4718132019042969,
+   -2.149106740951538,
+   -1.0855519771575928,
+   -1.631832242012024,
+   -1.3751734495162964,
+   -1.9396103620529175,
+   -1.5293723344802856,
+   -0.8444125056266785,
+   -1.2414811849594116,
+   -1.9522171020507812,
+   -2.4338042736053467,
+   -1.5651824474334717,
+   -0.9498789310455322,
+   -1.8044980764389038,
+   -2.356677770614624,
+   -1.247452974319458,
+   -1.550165057182312,
+   -0.5635553598403931,
+   -0.6177330017089844,
+   -0.4778785705566406,
+   -0.020452087745070457,
+   -0.48500269651412964,
+   -0.23854275047779083,
+   -0.06543659418821335,
+   -0.11837350577116013,
+   -0.0585334412753582
+  ]
+ },
+ "throughput": [
+  0.7170174223459943,
+  12.998776662244524,
+  13.163004282426089,
+  13.581765270525981,
+  13.619124445335821,
+  13.655332144429561,
+  13.608264815678803,
+  13.614656540485411
+ ]
+}
\ No newline at end of file

From 6f5128440a5cd80c073a1b6804f908cf53c2523e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Mon, 27 Oct 2025 10:23:00 +0000
Subject: [PATCH 071/334] ci: Aggregate throughput
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 .../golden_values_dev_dgxh100_coreweave.json  |  361 +-
 .../golden_values_dev_dgxh100_coreweave.json  |  361 +-
 .../golden_values_dev_dgxh100_coreweave.json  | 5398 ++++++++---------
 .../golden_values_dev_dgxh100_coreweave.json  |  327 +-
 4 files changed, 3208 insertions(+), 3239 deletions(-)

diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch/golden_values_dev_dgxh100_coreweave.json
index 8076bdc9a25..0e953af50e7 100644
--- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch/golden_values_dev_dgxh100_coreweave.json
+++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch/golden_values_dev_dgxh100_coreweave.json
@@ -1,187 +1,178 @@
 {
- "0": {
-  "input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.",
-  "generated_text": " And then you get to the end of the movie, and you realize that this is not New York at all. This is New York at the end",
-  "generated_tokens": [
-   3060,
-   2430,
-   1636,
-   2012,
-   1317,
-   1278,
-   2362,
-   1307,
-   1278,
-   16070,
-   1044,
-   1321,
-   1636,
-   23067,
-   1455,
-   1593,
-   1395,
-   1605,
-   3140,
-   5152,
-   1513,
-   1747,
-   1046,
-   2409,
-   1395,
-   3140,
-   5152,
-   1513,
-   1278,
-   2362
-  ],
-  "latency": 0.2859375476837158,
-  "cuda_graph_request_count_map": {
-   "372": 0,
-   "360": 0,
-   "336": 0,
-   "312": 0,
-   "288": 0,
-   "264": 0,
-   "240": 0,
-   "216": 0,
-   "192": 0,
-   "168": 0,
-   "144": 0,
-   "120": 0,
-   "96": 0,
-   "72": 0,
-   "48": 0,
-   "24": 29
-  },
-  "step_count": 240,
-  "logprobs": [
-   -9.362494468688965,
-   -2.827894449234009,
-   -4.557381629943848,
-   -1.4968647956848145,
-   -0.717312216758728,
-   -1.7262351512908936,
-   -2.522736072540283,
-   -2.1782360076904297,
-   -2.3603432178497314,
-   -6.136383533477783,
-   -1.4676916599273682,
-   -3.468963384628296,
-   -4.424870491027832,
-   -3.7345848083496094,
-   -2.012619972229004,
-   -1.8833301067352295,
-   -3.5708768367767334,
-   -6.8197832107543945,
-   -0.3122292757034302,
-   -0.9820290207862854,
-   -6.532033443450928,
-   -7.498172760009766,
-   -12.615165710449219,
-   -2.409003496170044,
-   -3.8550546169281006,
-   -0.5105050802230835,
-   -4.2802581787109375,
-   -0.06971167027950287,
-   -0.054025799036026,
-   -3.319596767425537,
-   -9.703240394592285,
-   -1.0997297763824463,
-   -6.224854469299316,
-   -5.234503269195557,
-   -3.934987783432007,
-   -2.5263679027557373,
-   -3.1843955516815186,
-   -5.880871295928955,
-   -1.8436813354492188,
-   -5.906496047973633,
-   -12.15787410736084,
-   -12.5841064453125,
-   -0.0819428563117981,
-   -2.6212656497955322,
-   -1.4329369068145752,
-   -2.885145425796509,
-   -1.2901865243911743,
-   -0.006647023372352123,
-   -3.5115818977355957,
-   -12.945953369140625,
-   -3.793078899383545,
-   -3.0094375610351562,
-   -5.966838836669922,
-   -0.8998424410820007,
-   -0.040962252765893936,
-   -1.5467679500579834,
-   -1.0785343647003174,
-   -5.73494815826416,
-   -0.38491737842559814,
-   -5.017007827758789,
-   -0.5568072199821472,
-   -0.5968841910362244,
-   -2.3609962463378906,
-   -13.582086563110352,
-   -0.09050048142671585,
-   -3.7264108657836914,
-   -1.1208789348602295,
-   -6.052675247192383,
-   -0.5848909616470337,
-   -3.5906238555908203,
-   -0.9494907855987549,
-   -1.5676641464233398,
-   -5.127577781677246,
-   -17.19189453125,
-   -6.698403835296631,
-   -1.0449178218841553,
-   -4.365664958953857,
-   -1.1243419647216797,
-   -2.2092156410217285,
-   -1.8081634044647217,
-   -0.23330983519554138,
-   -9.439546585083008,
-   -0.2947109341621399,
-   -7.253565788269043,
-   -2.3855936527252197,
-   -4.629369258880615,
-   -3.4186267852783203,
-   -1.9727531671524048,
-   -2.331681251525879,
-   -1.5606917142868042,
-   -2.454296588897705,
-   -1.5334703922271729,
-   -1.2631131410598755,
-   -2.657367706298828,
-   -0.6480202078819275,
-   -0.4550393521785736,
-   -1.3625166416168213,
-   -0.8142069578170776,
-   -0.4496593475341797,
-   -0.9312890768051147,
-   -1.732723355293274,
-   -0.44613128900527954,
-   -1.6895122528076172,
-   -0.6082233190536499,
-   -1.0978344678878784,
-   -1.1122435331344604,
-   -0.002520838286727667,
-   -1.4072327613830566,
-   -0.007462364621460438,
-   -0.7548662424087524,
-   -0.9937503337860107,
-   -0.0675487294793129,
-   -0.9595617055892944,
-   -0.029961343854665756,
-   -2.205785036087036,
-   -1.2615025043487549,
-   -0.7878209352493286
-  ]
- },
- "throughput": [
-  4.17304871546938,
-  103.09983375107234,
-  103.84588149949121,
-  103.54772132523577,
-  103.90874002236247,
-  103.06242433872661,
-  103.53792289114989,
-  103.82591647661074
- ]
+    "0": {
+        "input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.",
+        "generated_text": " And then you get to the end of the movie, and you realize that this is not New York at all. This is New York at the end",
+        "generated_tokens": [
+            3060,
+            2430,
+            1636,
+            2012,
+            1317,
+            1278,
+            2362,
+            1307,
+            1278,
+            16070,
+            1044,
+            1321,
+            1636,
+            23067,
+            1455,
+            1593,
+            1395,
+            1605,
+            3140,
+            5152,
+            1513,
+            1747,
+            1046,
+            2409,
+            1395,
+            3140,
+            5152,
+            1513,
+            1278,
+            2362
+        ],
+        "latency": 0.2859375476837158,
+        "cuda_graph_request_count_map": {
+            "372": 0,
+            "360": 0,
+            "336": 0,
+            "312": 0,
+            "288": 0,
+            "264": 0,
+            "240": 0,
+            "216": 0,
+            "192": 0,
+            "168": 0,
+            "144": 0,
+            "120": 0,
+            "96": 0,
+            "72": 0,
+            "48": 0,
+            "24": 29
+        },
+        "step_count": 240,
+        "logprobs": [
+            -9.362494468688965,
+            -2.827894449234009,
+            -4.557381629943848,
+            -1.4968647956848145,
+            -0.717312216758728,
+            -1.7262351512908936,
+            -2.522736072540283,
+            -2.1782360076904297,
+            -2.3603432178497314,
+            -6.136383533477783,
+            -1.4676916599273682,
+            -3.468963384628296,
+            -4.424870491027832,
+            -3.7345848083496094,
+            -2.012619972229004,
+            -1.8833301067352295,
+            -3.5708768367767334,
+            -6.8197832107543945,
+            -0.3122292757034302,
+            -0.9820290207862854,
+            -6.532033443450928,
+            -7.498172760009766,
+            -12.615165710449219,
+            -2.409003496170044,
+            -3.8550546169281006,
+            -0.5105050802230835,
+            -4.2802581787109375,
+            -0.06971167027950287,
+            -0.054025799036026,
+            -3.319596767425537,
+            -9.703240394592285,
+            -1.0997297763824463,
+            -6.224854469299316,
+            -5.234503269195557,
+            -3.934987783432007,
+            -2.5263679027557373,
+            -3.1843955516815186,
+            -5.880871295928955,
+            -1.8436813354492188,
+            -5.906496047973633,
+            -12.15787410736084,
+            -12.5841064453125,
+            -0.0819428563117981,
+            -2.6212656497955322,
+            -1.4329369068145752,
+            -2.885145425796509,
+            -1.2901865243911743,
+            -0.006647023372352123,
+            -3.5115818977355957,
+            -12.945953369140625,
+            -3.793078899383545,
+            -3.0094375610351562,
+            -5.966838836669922,
+            -0.8998424410820007,
+            -0.040962252765893936,
+            -1.5467679500579834,
+            -1.0785343647003174,
+            -5.73494815826416,
+            -0.38491737842559814,
+            -5.017007827758789,
+            -0.5568072199821472,
+            -0.5968841910362244,
+            -2.3609962463378906,
+            -13.582086563110352,
+            -0.09050048142671585,
+            -3.7264108657836914,
+            -1.1208789348602295,
+            -6.052675247192383,
+            -0.5848909616470337,
+            -3.5906238555908203,
+            -0.9494907855987549,
+            -1.5676641464233398,
+            -5.127577781677246,
+            -17.19189453125,
+            -6.698403835296631,
+            -1.0449178218841553,
+            -4.365664958953857,
+            -1.1243419647216797,
+            -2.2092156410217285,
+            -1.8081634044647217,
+            -0.23330983519554138,
+            -9.439546585083008,
+            -0.2947109341621399,
+            -7.253565788269043,
+            -2.3855936527252197,
+            -4.629369258880615,
+            -3.4186267852783203,
+            -1.9727531671524048,
+            -2.331681251525879,
+            -1.5606917142868042,
+            -2.454296588897705,
+            -1.5334703922271729,
+            -1.2631131410598755,
+            -2.657367706298828,
+            -0.6480202078819275,
+            -0.4550393521785736,
+            -1.3625166416168213,
+            -0.8142069578170776,
+            -0.4496593475341797,
+            -0.9312890768051147,
+            -1.732723355293274,
+            -0.44613128900527954,
+            -1.6895122528076172,
+            -0.6082233190536499,
+            -1.0978344678878784,
+            -1.1122435331344604,
+            -0.002520838286727667,
+            -1.4072327613830566,
+            -0.007462364621460438,
+            -0.7548662424087524,
+            -0.9937503337860107,
+            -0.0675487294793129,
+            -0.9595617055892944,
+            -0.029961343854665756,
+            -2.205785036087036,
+            -1.2615025043487549,
+            -0.7878209352493286
+        ]
+    },
+    "throughput": 103.54772132523577
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_logitsmatch_decode_graphs_only/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_logitsmatch_decode_graphs_only/golden_values_dev_dgxh100_coreweave.json
index ddc6cacf3a8..771d0c18307 100644
--- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_logitsmatch_decode_graphs_only/golden_values_dev_dgxh100_coreweave.json
+++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_logitsmatch_decode_graphs_only/golden_values_dev_dgxh100_coreweave.json
@@ -1,187 +1,178 @@
 {
- "0": {
-  "input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.",
-  "generated_text": " And then you get to the end of the movie, and you realize that this is not New York at all. This is New York at the end",
-  "generated_tokens": [
-   3060,
-   2430,
-   1636,
-   2012,
-   1317,
-   1278,
-   2362,
-   1307,
-   1278,
-   16070,
-   1044,
-   1321,
-   1636,
-   23067,
-   1455,
-   1593,
-   1395,
-   1605,
-   3140,
-   5152,
-   1513,
-   1747,
-   1046,
-   2409,
-   1395,
-   3140,
-   5152,
-   1513,
-   1278,
-   2362
-  ],
-  "latency": 0.3733036518096924,
-  "cuda_graph_request_count_map": {
-   "372": 0,
-   "360": 0,
-   "336": 0,
-   "312": 0,
-   "288": 0,
-   "264": 0,
-   "240": 0,
-   "216": 0,
-   "192": 0,
-   "168": 0,
-   "144": 0,
-   "120": 0,
-   "96": 0,
-   "72": 0,
-   "48": 0,
-   "24": 29
-  },
-  "step_count": 240,
-  "logprobs": [
-   -9.362494468688965,
-   -2.827894449234009,
-   -4.557381629943848,
-   -1.4968647956848145,
-   -0.717312216758728,
-   -1.7262351512908936,
-   -2.522736072540283,
-   -2.1782360076904297,
-   -2.3603432178497314,
-   -6.136383533477783,
-   -1.4676916599273682,
-   -3.468963384628296,
-   -4.424870491027832,
-   -3.7345848083496094,
-   -2.012619972229004,
-   -1.8833301067352295,
-   -3.5708768367767334,
-   -6.8197832107543945,
-   -0.3122292757034302,
-   -0.9820290207862854,
-   -6.532033443450928,
-   -7.498172760009766,
-   -12.615165710449219,
-   -2.409003496170044,
-   -3.8550546169281006,
-   -0.5105050802230835,
-   -4.2802581787109375,
-   -0.06971167027950287,
-   -0.054025799036026,
-   -3.319596767425537,
-   -9.703240394592285,
-   -1.0997297763824463,
-   -6.224854469299316,
-   -5.234503269195557,
-   -3.934987783432007,
-   -2.5263679027557373,
-   -3.1843955516815186,
-   -5.880871295928955,
-   -1.8436813354492188,
-   -5.906496047973633,
-   -12.15787410736084,
-   -12.5841064453125,
-   -0.0819428563117981,
-   -2.6212656497955322,
-   -1.4329369068145752,
-   -2.885145425796509,
-   -1.2901865243911743,
-   -0.006647023372352123,
-   -3.5115818977355957,
-   -12.945953369140625,
-   -3.793078899383545,
-   -3.0094375610351562,
-   -5.966838836669922,
-   -0.8998424410820007,
-   -0.040962252765893936,
-   -1.5467679500579834,
-   -1.0785343647003174,
-   -5.73494815826416,
-   -0.38491737842559814,
-   -5.017007827758789,
-   -0.5568072199821472,
-   -0.5968841910362244,
-   -2.3609962463378906,
-   -13.582086563110352,
-   -0.09050048142671585,
-   -3.7264108657836914,
-   -1.1208789348602295,
-   -6.052675247192383,
-   -0.5848909616470337,
-   -3.5906238555908203,
-   -0.9494907855987549,
-   -1.5676641464233398,
-   -5.127577781677246,
-   -17.19189453125,
-   -6.698403835296631,
-   -1.0449178218841553,
-   -4.365664958953857,
-   -1.1243419647216797,
-   -2.2092156410217285,
-   -1.8081634044647217,
-   -0.23330983519554138,
-   -9.439546585083008,
-   -0.2947109341621399,
-   -7.253565788269043,
-   -2.3855936527252197,
-   -4.629369258880615,
-   -3.4186267852783203,
-   -1.9727531671524048,
-   -2.331681251525879,
-   -1.5606917142868042,
-   -2.454296588897705,
-   -1.5334703922271729,
-   -1.2631131410598755,
-   -2.657367706298828,
-   -0.6480202078819275,
-   -0.4550393521785736,
-   -1.3625166416168213,
-   -0.8142069578170776,
-   -0.4496593475341797,
-   -0.9312890768051147,
-   -1.732723355293274,
-   -0.44613128900527954,
-   -1.6895122528076172,
-   -0.6082233190536499,
-   -1.0978344678878784,
-   -1.1122435331344604,
-   -0.002520838286727667,
-   -1.4072327613830566,
-   -0.007462364621460438,
-   -0.7548662424087524,
-   -0.9937503337860107,
-   -0.0675487294793129,
-   -0.9595617055892944,
-   -0.029961343854665756,
-   -2.205785036087036,
-   -1.2615025043487549,
-   -0.7878209352493286
-  ]
- },
- "throughput": [
-  14.167753773233736,
-  78.68224606460956,
-  79.61636072923858,
-  79.54665108975186,
-  79.62008872611396,
-  79.57034369848175,
-  79.0717192987748,
-  79.63717144611178
- ]
+    "0": {
+        "input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.",
+        "generated_text": " And then you get to the end of the movie, and you realize that this is not New York at all. This is New York at the end",
+        "generated_tokens": [
+            3060,
+            2430,
+            1636,
+            2012,
+            1317,
+            1278,
+            2362,
+            1307,
+            1278,
+            16070,
+            1044,
+            1321,
+            1636,
+            23067,
+            1455,
+            1593,
+            1395,
+            1605,
+            3140,
+            5152,
+            1513,
+            1747,
+            1046,
+            2409,
+            1395,
+            3140,
+            5152,
+            1513,
+            1278,
+            2362
+        ],
+        "latency": 0.3733036518096924,
+        "cuda_graph_request_count_map": {
+            "372": 0,
+            "360": 0,
+            "336": 0,
+            "312": 0,
+            "288": 0,
+            "264": 0,
+            "240": 0,
+            "216": 0,
+            "192": 0,
+            "168": 0,
+            "144": 0,
+            "120": 0,
+            "96": 0,
+            "72": 0,
+            "48": 0,
+            "24": 29
+        },
+        "step_count": 240,
+        "logprobs": [
+            -9.362494468688965,
+            -2.827894449234009,
+            -4.557381629943848,
+            -1.4968647956848145,
+            -0.717312216758728,
+            -1.7262351512908936,
+            -2.522736072540283,
+            -2.1782360076904297,
+            -2.3603432178497314,
+            -6.136383533477783,
+            -1.4676916599273682,
+            -3.468963384628296,
+            -4.424870491027832,
+            -3.7345848083496094,
+            -2.012619972229004,
+            -1.8833301067352295,
+            -3.5708768367767334,
+            -6.8197832107543945,
+            -0.3122292757034302,
+            -0.9820290207862854,
+            -6.532033443450928,
+            -7.498172760009766,
+            -12.615165710449219,
+            -2.409003496170044,
+            -3.8550546169281006,
+            -0.5105050802230835,
+            -4.2802581787109375,
+            -0.06971167027950287,
+            -0.054025799036026,
+            -3.319596767425537,
+            -9.703240394592285,
+            -1.0997297763824463,
+            -6.224854469299316,
+            -5.234503269195557,
+            -3.934987783432007,
+            -2.5263679027557373,
+            -3.1843955516815186,
+            -5.880871295928955,
+            -1.8436813354492188,
+            -5.906496047973633,
+            -12.15787410736084,
+            -12.5841064453125,
+            -0.0819428563117981,
+            -2.6212656497955322,
+            -1.4329369068145752,
+            -2.885145425796509,
+            -1.2901865243911743,
+            -0.006647023372352123,
+            -3.5115818977355957,
+            -12.945953369140625,
+            -3.793078899383545,
+            -3.0094375610351562,
+            -5.966838836669922,
+            -0.8998424410820007,
+            -0.040962252765893936,
+            -1.5467679500579834,
+            -1.0785343647003174,
+            -5.73494815826416,
+            -0.38491737842559814,
+            -5.017007827758789,
+            -0.5568072199821472,
+            -0.5968841910362244,
+            -2.3609962463378906,
+            -13.582086563110352,
+            -0.09050048142671585,
+            -3.7264108657836914,
+            -1.1208789348602295,
+            -6.052675247192383,
+            -0.5848909616470337,
+            -3.5906238555908203,
+            -0.9494907855987549,
+            -1.5676641464233398,
+            -5.127577781677246,
+            -17.19189453125,
+            -6.698403835296631,
+            -1.0449178218841553,
+            -4.365664958953857,
+            -1.1243419647216797,
+            -2.2092156410217285,
+            -1.8081634044647217,
+            -0.23330983519554138,
+            -9.439546585083008,
+            -0.2947109341621399,
+            -7.253565788269043,
+            -2.3855936527252197,
+            -4.629369258880615,
+            -3.4186267852783203,
+            -1.9727531671524048,
+            -2.331681251525879,
+            -1.5606917142868042,
+            -2.454296588897705,
+            -1.5334703922271729,
+            -1.2631131410598755,
+            -2.657367706298828,
+            -0.6480202078819275,
+            -0.4550393521785736,
+            -1.3625166416168213,
+            -0.8142069578170776,
+            -0.4496593475341797,
+            -0.9312890768051147,
+            -1.732723355293274,
+            -0.44613128900527954,
+            -1.6895122528076172,
+            -0.6082233190536499,
+            -1.0978344678878784,
+            -1.1122435331344604,
+            -0.002520838286727667,
+            -1.4072327613830566,
+            -0.007462364621460438,
+            -0.7548662424087524,
+            -0.9937503337860107,
+            -0.0675487294793129,
+            -0.9595617055892944,
+            -0.029961343854665756,
+            -2.205785036087036,
+            -1.2615025043487549,
+            -0.7878209352493286
+        ]
+    },
+    "throughput": 79.54665108975186
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_logitsmatch/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_logitsmatch/golden_values_dev_dgxh100_coreweave.json
index 7184e0e35c1..a76d4f44413 100644
--- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_logitsmatch/golden_values_dev_dgxh100_coreweave.json
+++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_logitsmatch/golden_values_dev_dgxh100_coreweave.json
@@ -1,2703 +1,2699 @@
 {
- "0": {
-  "input_prompt": "The $500 Cup of coffee?\nConsider this, most Americans spend an average of $1,500-2,000 a year on this bean water.\nI have a few question for you: \nHow has business been the past few months?\nDo you ever feel like your business is stuck?\nDon't feel like you're able to improve performance and make changes required to achieve success ?\nAre your customers spneding less and less and wanting more?\nHave the gas prices affected your business?\nDo you have employees and do they hate you or wish they could quit?\n\nNow, before you and I can decide wheter or not I will be a good fit for your business we should talk this over with coffee.\nAnd, just to warn you this isn't some casual thing. This is not a date or time to be personal or social (but by all means share what you will coz I'll gladly listen).\nTher eare two major talking points and stratagies we will focios on in our lil coffee social\nFor one, we will find your unique selling Proposition (USP).\nDo have the best price? Are you the cheapest in town? Are your customers jerks? Do you haVE REGULARS? Why do people come back?\nwe'll also look for the holes in your business bucket. I'm willing to bet there's a hole or two in your business we'll find together that'll make this 500 dollar cup of Joe pay for itse;f immedietly.\nMany find themselves to be more profitable by just finding out where the dollars are escaping in their business and I like to think of myself as a guy that comes along with some spakel or putty and patch those holes up for you.\nBeleive me, just fixing one hole can mean a lot...just think about a sinking boat that has a hole in it that's about 3\u201d in diameter... it doesn't take long to sink.\nI have no agenda, besides f=getting to know your business and seeing wher I can patch the holes and find what makes you do darn unique (I know this won't take long.)\nMany folks, I bet, will find what they need to get off their chest with a quick phone call and they just paypal me the money and make a coffee at home. Look, that's fine too.\nI just to get you ot of your comfort zone, because this is where it all starts my frind.\nSome smart GOAT entrepreneur will probably get everything they need just out of our lil mini consulatant for the more extensive business consukting I offer, and look, that's fine, too.\nMaybe this cup of coffee will be all you need to gtet back on your feet, but not only surive, but thrive!\nI'm not trying to make frineds, or make a bunch of money, or look down your shirt\nBut this is only going to be a 45 minute (max) coffee chat\nAnd, I know you (and me) both have a lot on our plates. So no messing around\nAfter our consultation I will follow up with you in a few days and see how things are going, then I will be emailing you about once or twice every two weeks, just to make sure you're staying on task and implementing what we went over.\nTghere is no obligation to go any further and will gladly give you back your money if this pre-consultation doesn't put you on the right path or you don't get any value out of it...",
-  "generated_text": " $ is a$ is a $ is a $ is a $ is a $ is a $$1, you\n$ $$ $\n$ $- $\n$\n$\n$ $$$\n$\n$\n$\n$\n$\n$\n$\n$???????, $\n-1\n$\n1.5.\n$\n$, you\n$.\n$\n1,1.1\nI\n$.\nI\n\n\nHow\n$,,,0,1,0,0.0\nIn\nThe\nThe\nThe\n",
-  "generated_tokens": [
-   1659,
-   1395,
-   1261,
-   1036,
-   1395,
-   1261,
-   1659,
-   1395,
-   1261,
-   1659,
-   1395,
-   1261,
-   1659,
-   1395,
-   1261,
-   1659,
-   1395,
-   1261,
-   1659,
-   1036,
-   1049,
-   1044,
-   1636,
-   1010,
-   1036,
-   1659,
-   1036,
-   1659,
-   1010,
-   1036,
-   1659,
-   1045,
-   1659,
-   1010,
-   1036,
-   1010,
-   1036,
-   1010,
-   1036,
-   1659,
-   1036,
-   1036,
-   1010,
-   1036,
-   1010,
-   1036,
-   1010,
-   1036,
-   1010,
-   1036,
-   1010,
-   1036,
-   1010,
-   1036,
-   1010,
-   1036,
-   1010,
-   1036,
-   1063,
-   1063,
-   1063,
-   1063,
-   1063,
-   1063,
-   1063,
-   1044,
-   1659,
-   1010,
-   1045,
-   1049,
-   1010,
-   1036,
-   1010,
-   1049,
-   1046,
-   1053,
-   1046,
-   1010,
-   1036,
-   1010,
-   1036,
-   1044,
-   1636,
-   1010,
-   1036,
-   1046,
-   1010,
-   1036,
-   1010,
-   1049,
-   1044,
-   1049,
-   1046,
-   1049,
-   1010,
-   1073,
-   1010,
-   1036,
-   1046,
-   1010,
-   1073,
-   1010,
-   1010,
-   1010,
-   7801,
-   1010,
-   1036,
-   1044,
-   1044,
-   1044,
-   1048,
-   1044,
-   1049,
-   1044,
-   1048,
-   1044,
-   1048,
-   1046,
-   1048,
-   1010,
-   1785,
-   1010,
-   1784,
-   1010,
-   1784,
-   1010,
-   1784,
-   1010
-  ],
-  "latency": 9.77891230583191,
-  "cuda_graph_request_count_map": null,
-  "step_count": 6144,
-  "logprobs": [
-   -7.7319135665893555,
-   -2.188307285308838,
-   -0.7547445297241211,
-   -0.7294313311576843,
-   -10.238386154174805,
-   -3.3775341510772705,
-   -6.394498825073242,
-   -7.354557037353516,
-   -9.018157958984375,
-   -3.012073040008545,
-   -3.2584073543548584,
-   -5.220732688903809,
-   -4.620487213134766,
-   -2.5078930854797363,
-   -3.752683162689209,
-   -0.13360372185707092,
-   -0.05705544352531433,
-   -0.41462242603302,
-   -1.585279941558838,
-   -1.6438164710998535,
-   -1.9557222127914429,
-   -0.3989897072315216,
-   -0.0365302674472332,
-   -6.368816375732422,
-   -0.8731719255447388,
-   -0.022585075348615646,
-   -0.2775891423225403,
-   -0.0027362785767763853,
-   -0.0006812873762100935,
-   -1.581446647644043,
-   -0.008688976056873798,
-   -0.3532317280769348,
-   -6.071163177490234,
-   -9.162371635437012,
-   -9.965556144714355,
-   -2.400461196899414,
-   -2.9898362159729004,
-   -2.9803032875061035,
-   -2.12601900100708,
-   -3.500912666320801,
-   -7.015069007873535,
-   -2.278961420059204,
-   -0.46380555629730225,
-   -4.078739166259766,
-   -1.9430254697799683,
-   -3.5642244815826416,
-   -3.689701795578003,
-   -6.201474189758301,
-   -6.580833911895752,
-   -2.3081111907958984,
-   -5.42717170715332,
-   -1.1886008977890015,
-   -1.172760248184204,
-   -1.3571951389312744,
-   -1.3551844358444214,
-   -3.376784324645996,
-   -0.05118789151310921,
-   -4.064360618591309,
-   -2.575554847717285,
-   -0.6994737386703491,
-   -2.56724214553833,
-   -2.1888976097106934,
-   -0.4816131591796875,
-   -4.070178985595703,
-   -2.0060782432556152,
-   -6.858033180236816,
-   -0.059200502932071686,
-   -3.214278221130371,
-   -0.9671833515167236,
-   -0.823198676109314,
-   -1.0130078792572021,
-   -4.595561981201172,
-   -0.012724989093840122,
-   -5.214311599731445,
-   -8.246870040893555,
-   -3.1476030349731445,
-   -3.299684524536133,
-   -4.218191146850586,
-   -7.318399429321289,
-   -0.8580498695373535,
-   -3.0894036293029785,
-   -1.886361002922058,
-   -7.217658996582031,
-   -3.271679639816284,
-   -3.9717154502868652,
-   -1.8835484981536865,
-   -10.034332275390625,
-   -11.382490158081055,
-   -5.417011260986328,
-   -7.505967140197754,
-   -2.33837890625,
-   -0.07904055714607239,
-   -3.294971227645874,
-   -7.813640594482422,
-   -1.7646901607513428,
-   -4.025320053100586,
-   -3.5977325439453125,
-   -4.390352249145508,
-   -9.147806167602539,
-   -0.5303041934967041,
-   -7.721246242523193,
-   -0.6311959028244019,
-   -0.8119025230407715,
-   -0.7227814197540283,
-   -1.8369406461715698,
-   -0.20933297276496887,
-   -1.5395950078964233,
-   -4.424448490142822,
-   -4.084965705871582,
-   -3.355497360229492,
-   -1.0475609302520752,
-   -6.479413986206055,
-   -0.7810530662536621,
-   -2.132437229156494,
-   -6.648703098297119,
-   -2.9522438049316406,
-   -1.2485712766647339,
-   -4.040503025054932,
-   -2.3415768146514893,
-   -5.358206748962402,
-   -1.6258506774902344,
-   -3.956300973892212,
-   -0.732298731803894,
-   -7.441117286682129,
-   -1.5242161750793457,
-   -2.4555861949920654,
-   -4.295163154602051,
-   -9.687600135803223,
-   -0.8213484883308411,
-   -1.2446978092193604,
-   -0.01942702941596508,
-   -4.619411468505859,
-   -3.3297007083892822,
-   -2.2139487266540527,
-   -3.691431999206543,
-   -2.6574106216430664,
-   -6.075929641723633,
-   -0.6123450994491577,
-   -1.2942559719085693,
-   -0.6262839436531067,
-   -7.398006439208984,
-   -4.4869890213012695,
-   -4.202048301696777,
-   -4.982994079589844,
-   -0.637227475643158,
-   -3.061023235321045,
-   -10.117584228515625,
-   -3.8567495346069336,
-   -4.0480828285217285,
-   -2.472019672393799,
-   -4.246374607086182,
-   -1.3939155340194702,
-   -7.132441520690918,
-   -0.20108745992183685,
-   -4.986658573150635,
-   -4.387957572937012,
-   -0.01108358334749937,
-   -4.209756851196289,
-   -7.271108627319336,
-   -4.047314643859863,
-   -2.6497321128845215,
-   -1.4763175249099731,
-   -0.28365400433540344,
-   -3.5247769355773926,
-   -1.4226995706558228,
-   -4.327237129211426,
-   -2.0407187938690186,
-   -6.1437907218933105,
-   -1.5190880298614502,
-   -2.5511486530303955,
-   -7.504094123840332,
-   -2.152172565460205,
-   -6.708334922790527,
-   -6.913146495819092,
-   -3.6959621906280518,
-   -6.752341270446777,
-   -0.63083815574646,
-   -0.12433214485645294,
-   -5.0525641441345215,
-   -4.435934066772461,
-   -0.45601028203964233,
-   -6.3459577560424805,
-   -9.882917404174805,
-   -3.1422882080078125,
-   -2.550520658493042,
-   -3.2099051475524902,
-   -6.278127193450928,
-   -0.07764133810997009,
-   -3.155696153640747,
-   -1.933587670326233,
-   -9.61027717590332,
-   -6.211391925811768,
-   -4.664543151855469,
-   -6.783782005310059,
-   -5.676271438598633,
-   -8.605900764465332,
-   -0.0824289619922638,
-   -3.5463995933532715,
-   -13.374168395996094,
-   -1.2401021718978882,
-   -1.8734056949615479,
-   -3.4154422283172607,
-   -1.6733763217926025,
-   -17.633970260620117,
-   -9.345113754272461,
-   -0.6277351975440979,
-   -2.9617538452148438,
-   -2.5565333366394043,
-   -10.10580825805664,
-   -7.130337715148926,
-   -7.36820125579834,
-   -4.098911285400391,
-   -5.747079372406006,
-   -2.945054769515991,
-   -0.7887389063835144,
-   -1.6583149433135986,
-   -1.0165244340896606,
-   -6.581666946411133,
-   -5.926386833190918,
-   -5.845194339752197,
-   -0.9657630920410156,
-   -7.868755340576172,
-   -1.3244551420211792,
-   -0.2657390236854553,
-   -0.06403665244579315,
-   -2.983020782470703,
-   -5.943899631500244,
-   -7.877285957336426,
-   -3.593116283416748,
-   -3.819509506225586,
-   -7.226177215576172,
-   -2.5206997394561768,
-   -3.385587215423584,
-   -0.37499159574508667,
-   -1.4698283672332764,
-   -3.1460342407226562,
-   -0.0077166082337498665,
-   -4.350916862487793,
-   -3.2183218002319336,
-   -0.6242184638977051,
-   -1.4782464504241943,
-   -2.8054311275482178,
-   -3.0831401348114014,
-   -12.17662525177002,
-   -2.113419532775879,
-   -1.6448111534118652,
-   -2.1834323406219482,
-   -0.7630388140678406,
-   -10.1896390914917,
-   -6.234405517578125,
-   -11.46288776397705,
-   -1.003785490989685,
-   -4.211658477783203,
-   -1.5010679960250854,
-   -5.859302043914795,
-   -2.0465080738067627,
-   -3.7468819618225098,
-   -4.684195518493652,
-   -4.318704128265381,
-   -2.7234389781951904,
-   -9.00437068939209,
-   -3.043811321258545,
-   -3.1384406089782715,
-   -2.713779926300049,
-   -2.095993995666504,
-   -2.1484954357147217,
-   -10.274479866027832,
-   -0.682350754737854,
-   -0.25973302125930786,
-   -3.6964316368103027,
-   -13.434456825256348,
-   -2.3368239402770996,
-   -5.382724761962891,
-   -1.9073458909988403,
-   -5.905669212341309,
-   -0.032165709882974625,
-   -1.6530004739761353,
-   -2.728893280029297,
-   -1.640552043914795,
-   -1.1391171216964722,
-   -1.4353511333465576,
-   -4.003787994384766,
-   -0.3450564742088318,
-   -0.7168521285057068,
-   -0.34650325775146484,
-   -0.3616408705711365,
-   -7.062709331512451,
-   -1.2851682901382446,
-   -2.299129009246826,
-   -8.800156593322754,
-   -5.208735466003418,
-   -4.780910491943359,
-   -2.78342342376709,
-   -4.469717979431152,
-   -6.909726619720459,
-   -2.5114197731018066,
-   -0.659822404384613,
-   -0.6915416121482849,
-   -3.2363741397857666,
-   -0.5283617377281189,
-   -0.10473938286304474,
-   -6.215325832366943,
-   -7.283237934112549,
-   -1.6797031164169312,
-   -11.50100040435791,
-   -7.5822978019714355,
-   -3.387317657470703,
-   -11.407575607299805,
-   -5.441976547241211,
-   -3.3264851570129395,
-   -0.7265786528587341,
-   -1.382750153541565,
-   -7.841699600219727,
-   -8.105277061462402,
-   -3.9569506645202637,
-   -4.963083267211914,
-   -0.5492897629737854,
-   -4.6081390380859375,
-   -5.870400905609131,
-   -3.957930088043213,
-   -5.275494575500488,
-   -4.105091094970703,
-   -2.15435528755188,
-   -2.8472700119018555,
-   -1.1278448104858398,
-   -8.226571083068848,
-   -0.40629008412361145,
-   -9.916461944580078,
-   -4.616743087768555,
-   -1.691868543624878,
-   -0.6639478802680969,
-   -2.5716753005981445,
-   -6.676954746246338,
-   -6.535329818725586,
-   -0.4170510768890381,
-   -1.443942904472351,
-   -3.145481824874878,
-   -1.440589427947998,
-   -0.26935356855392456,
-   -0.9647155404090881,
-   -4.335958957672119,
-   -1.5647850036621094,
-   -5.890466690063477,
-   -3.01654052734375,
-   -1.9168468713760376,
-   -3.7365682125091553,
-   -8.001864433288574,
-   -10.680083274841309,
-   -4.489352226257324,
-   -4.6058149337768555,
-   -7.69011116027832,
-   -3.6247005462646484,
-   -1.5600426197052002,
-   -10.2160062789917,
-   -5.004643440246582,
-   -0.19602319598197937,
-   -3.375545024871826,
-   -2.669325590133667,
-   -1.3932737112045288,
-   -1.6410658359527588,
-   -6.847603797912598,
-   -6.744344711303711,
-   -0.5215591192245483,
-   -0.25840020179748535,
-   -1.1448237895965576,
-   -5.57253885269165,
-   -7.251138687133789,
-   -4.221924781799316,
-   -0.7688062787055969,
-   -2.504502534866333,
-   -3.146519660949707,
-   -2.206653356552124,
-   -1.4295082092285156,
-   -7.96943998336792,
-   -4.332189083099365,
-   -2.5750505924224854,
-   -1.7102608680725098,
-   -5.311381816864014,
-   -8.897522926330566,
-   -2.994919538497925,
-   -3.3397974967956543,
-   -2.1794328689575195,
-   -2.437566041946411,
-   -0.3181810975074768,
-   -0.27412793040275574,
-   -0.7914466857910156,
-   -2.3470635414123535,
-   -2.4099245071411133,
-   -2.491870880126953,
-   -3.024170160293579,
-   -1.9719040393829346,
-   -11.373910903930664,
-   -1.4279751777648926,
-   -0.14573107659816742,
-   -2.055763006210327,
-   -6.366893291473389,
-   -4.24091911315918,
-   -0.00709194503724575,
-   -2.0199716091156006,
-   -2.524750232696533,
-   -1.4272525310516357,
-   -0.5185190439224243,
-   -2.927150011062622,
-   -2.7070627212524414,
-   -3.365638017654419,
-   -4.318085193634033,
-   -7.773144721984863,
-   -1.7947180271148682,
-   -7.657534599304199,
-   -8.767786026000977,
-   -14.74280071258545,
-   -1.8042558431625366,
-   -3.2712037563323975,
-   -1.4002125263214111,
-   -4.887944221496582,
-   -1.4821010828018188,
-   -1.5255622863769531,
-   -5.879070281982422,
-   -4.463839530944824,
-   -5.1955976486206055,
-   -5.665647506713867,
-   -0.3775045573711395,
-   -5.9350481033325195,
-   -2.800539255142212,
-   -0.13162286579608917,
-   -3.034379720687866,
-   -4.729524612426758,
-   -4.6252641677856445,
-   -3.850942611694336,
-   -2.4760568141937256,
-   -6.059760093688965,
-   -10.12075138092041,
-   -0.9469369649887085,
-   -11.595907211303711,
-   -6.875324726104736,
-   -4.268826007843018,
-   -2.835529088973999,
-   -3.8626279830932617,
-   -4.876199245452881,
-   -0.013071090914309025,
-   -4.964417934417725,
-   -0.7445687055587769,
-   -5.707155227661133,
-   -6.10660457611084,
-   -4.317755699157715,
-   -4.440443992614746,
-   -2.9202542304992676,
-   -4.743522644042969,
-   -1.2569392919540405,
-   -2.8675737380981445,
-   -2.3151841163635254,
-   -4.318130970001221,
-   -1.9054772853851318,
-   -1.1808521747589111,
-   -0.765956461429596,
-   -2.768916606903076,
-   -6.237791061401367,
-   -1.7224305868148804,
-   -7.137521743774414,
-   -4.512486457824707,
-   -1.9069950580596924,
-   -4.145983695983887,
-   -5.365190505981445,
-   -0.059828490018844604,
-   -2.273892879486084,
-   -3.4013004302978516,
-   -5.035730361938477,
-   -6.501443386077881,
-   -9.903446197509766,
-   -1.6332892179489136,
-   -2.1572084426879883,
-   -1.6149548292160034,
-   -1.4698481559753418,
-   -6.01010799407959,
-   -2.2243528366088867,
-   -6.900836944580078,
-   -6.0930986404418945,
-   -2.974020481109619,
-   -3.225423574447632,
-   -8.423272132873535,
-   -1.3423724174499512,
-   -3.626147508621216,
-   -0.4862469434738159,
-   -6.860866546630859,
-   -3.8910953998565674,
-   -2.33319354057312,
-   -1.7229185104370117,
-   -2.215972423553467,
-   -8.99046516418457,
-   -4.099084854125977,
-   -2.4191012382507324,
-   -8.288970947265625,
-   -2.9641928672790527,
-   -1.5036451816558838,
-   -3.0544614791870117,
-   -0.0715634673833847,
-   -2.444031238555908,
-   -4.520998954772949,
-   -3.972568988800049,
-   -0.4985870122909546,
-   -2.1651363372802734,
-   -3.4427435398101807,
-   -1.730639100074768,
-   -0.9458961486816406,
-   -7.740211009979248,
-   -9.39163875579834,
-   -3.895984172821045,
-   -1.7523534297943115,
-   -5.41331672668457,
-   -8.910720825195312,
-   -12.971094131469727,
-   -3.0455880165100098,
-   -10.501265525817871,
-   -3.3864927291870117,
-   -4.842309951782227,
-   -3.9964733123779297,
-   -7.3046793937683105,
-   -2.6607093811035156,
-   -1.3541781902313232,
-   -5.003270626068115,
-   -3.944551944732666,
-   -0.11356143653392792,
-   -5.174440383911133,
-   -9.628616333007812,
-   -8.654989242553711,
-   -8.980416297912598,
-   -6.670101642608643,
-   -5.488286018371582,
-   -5.943419933319092,
-   -2.126483201980591,
-   -8.054739952087402,
-   -7.458671569824219,
-   -2.5267202854156494,
-   -6.455472946166992,
-   -8.655346870422363,
-   -7.903901100158691,
-   -6.221062660217285,
-   -7.129237174987793,
-   -4.2345380783081055,
-   -2.5375306606292725,
-   -7.697700500488281,
-   -1.567080020904541,
-   -2.084331750869751,
-   -0.25020831823349,
-   -1.5145041942596436,
-   -4.619244575500488,
-   -0.2970108985900879,
-   -0.4977554678916931,
-   -6.197869300842285,
-   -4.030620098114014,
-   -7.232107639312744,
-   -0.21076253056526184,
-   -1.563366174697876,
-   -1.133756160736084,
-   -2.708237648010254,
-   -4.080535888671875,
-   -0.6818401217460632,
-   -0.1864331066608429,
-   -0.49012088775634766,
-   -8.732468605041504,
-   -11.945040702819824,
-   -5.243098735809326,
-   -1.5294703245162964,
-   -0.8935543298721313,
-   -0.6174070835113525,
-   -1.5068217515945435,
-   -3.5766501426696777,
-   -5.393096923828125,
-   -4.202867031097412,
-   -14.765748023986816,
-   -5.2513813972473145,
-   -0.7597705721855164,
-   -0.2502063810825348,
-   -1.7403976917266846,
-   -2.8000779151916504,
-   -1.9808133840560913,
-   -2.1654744148254395,
-   -1.8629226684570312,
-   -3.222038745880127,
-   -0.040942225605249405,
-   -2.3384013175964355,
-   -10.210381507873535,
-   -4.5859761238098145,
-   -0.5805734395980835,
-   -3.7019288539886475,
-   -2.001936674118042,
-   -2.7876083850860596,
-   -2.9799084663391113,
-   -4.349887371063232,
-   -0.0792960673570633,
-   -1.4366114139556885,
-   -1.0813264846801758,
-   -1.3510822057724,
-   -6.7060699462890625,
-   -5.436615943908691,
-   -3.978389263153076,
-   -6.785447597503662,
-   -6.147171497344971,
-   -3.97414231300354,
-   -4.332991600036621,
-   -0.9269428253173828,
-   -5.1237101554870605,
-   -4.486598968505859,
-   -0.04678357392549515,
-   -1.0307552814483643,
-   -1.4249452352523804,
-   -4.517682075500488,
-   -3.561821699142456,
-   -2.0815205574035645,
-   -0.6041194200515747,
-   -5.992964744567871,
-   -7.092092514038086,
-   -0.48916709423065186,
-   -2.6405677795410156,
-   -4.3345723152160645,
-   -3.533582925796509,
-   -3.1233346462249756,
-   -3.107872486114502,
-   -1.9901115894317627,
-   -3.1052846908569336,
-   -1.8440347909927368,
-   -6.21368408203125,
-   -1.8796799182891846,
-   -2.705214738845825,
-   -0.2987763583660126,
-   -4.070865154266357,
-   -1.6675832271575928,
-   -1.3896636962890625,
-   -1.5731089115142822,
-   -3.526170015335083,
-   -2.5088443756103516,
-   -1.208929419517517,
-   -3.673125743865967,
-   -2.501532554626465,
-   -6.875064373016357,
-   -8.512459754943848,
-   -1.042314052581787,
-   -3.657850980758667,
-   -7.0950798988342285,
-   -4.974049091339111,
-   -8.14085578918457,
-   -3.529888153076172,
-   -1.9389504194259644,
-   -7.0902204513549805,
-   -2.409292459487915,
-   -2.9428021907806396,
-   -1.688283085823059,
-   -3.622368335723877,
-   -2.0903351306915283,
-   -4.160663604736328,
-   -3.1683764457702637,
-   -1.2135626077651978,
-   -7.566033363342285,
-   -3.1186251640319824,
-   -5.899919509887695,
-   -0.9518840312957764,
-   -2.656729221343994,
-   -2.2994377613067627,
-   -6.806836128234863,
-   -1.280236840248108,
-   -2.838846206665039,
-   -1.3598848581314087,
-   -11.707776069641113,
-   -3.134333372116089,
-   -0.6230669617652893,
-   -8.219222068786621,
-   -7.562507152557373,
-   -7.489459037780762,
-   -1.5368008613586426,
-   -7.149652481079102,
-   -5.749268054962158,
-   -3.162869691848755,
-   -2.7235195636749268,
-   -6.128931999206543,
-   -1.1934199333190918,
-   -3.986410617828369,
-   -3.76609468460083,
-   -1.712721586227417,
-   -3.195504903793335,
-   -8.397743225097656,
-   -3.1260581016540527,
-   -9.792022705078125,
-   -4.217884540557861,
-   -11.583260536193848,
-   -5.987588882446289,
-   -5.178754806518555,
-   -6.994749069213867,
-   -5.167606353759766,
-   -7.124668121337891,
-   -6.201416015625,
-   -10.203682899475098,
-   -6.858526229858398,
-   -2.733592987060547,
-   -5.078882217407227,
-   -9.003358840942383,
-   -4.704894542694092,
-   -3.9085562229156494,
-   -7.247268199920654,
-   -7.091092109680176,
-   -4.4150166511535645,
-   -7.56699275970459,
-   -9.485116004943848,
-   -1.9977033138275146,
-   -6.65272331237793,
-   -2.236643075942993,
-   -7.518955707550049,
-   -5.525973320007324,
-   -4.67877721786499,
-   -6.608670234680176,
-   -5.536133766174316,
-   -10.772479057312012,
-   -10.8853178024292,
-   -3.6156129837036133,
-   -6.751470565795898,
-   -6.4537434577941895,
-   -3.4220399856567383,
-   -8.251005172729492,
-   -3.2146153450012207,
-   -6.330069541931152,
-   -1.5551663637161255,
-   -6.520583629608154,
-   -10.450878143310547,
-   -5.8788957595825195,
-   -3.7398200035095215,
-   -3.9084208011627197,
-   -0.3640081584453583,
-   -6.961522102355957,
-   -6.066243648529053,
-   -7.270624160766602,
-   -5.098455429077148,
-   -2.7642822265625,
-   -5.460171699523926,
-   -7.362828731536865,
-   -2.558631658554077,
-   -2.186410427093506,
-   -2.5309929847717285,
-   -2.46756649017334,
-   -2.0306026935577393,
-   -1.8713470697402954,
-   -2.108008623123169,
-   -1.2698389291763306,
-   -2.1712756156921387,
-   -2.4432802200317383,
-   -1.1477653980255127,
-   -1.8417484760284424,
-   -2.5971946716308594,
-   -1.8250831365585327,
-   -2.103092670440674,
-   -2.5183165073394775,
-   -2.9367291927337646,
-   -1.9412965774536133,
-   -1.7692793607711792,
-   -2.864521026611328,
-   -3.1332175731658936,
-   -1.098311185836792,
-   -2.946441173553467,
-   -2.2800471782684326,
-   -3.1929852962493896,
-   -2.754260778427124,
-   -3.485616445541382,
-   -3.3010287284851074,
-   -2.5537776947021484,
-   -2.6752865314483643,
-   -3.1617612838745117,
-   -2.4571690559387207,
-   -2.060081958770752,
-   -2.425969362258911,
-   -2.212725877761841,
-   -2.4232254028320312,
-   -3.0587053298950195,
-   -2.4074010848999023,
-   -2.457937479019165,
-   -2.319617986679077,
-   -2.6340954303741455,
-   -2.599524736404419,
-   -2.5302212238311768,
-   -1.6849274635314941,
-   -2.2609786987304688,
-   -2.039928674697876,
-   -1.9474098682403564,
-   -2.3550753593444824,
-   -1.718749761581421,
-   -2.413884162902832,
-   -1.6247628927230835,
-   -2.4784040451049805,
-   -1.828325629234314,
-   -1.3880831003189087,
-   -1.4448199272155762,
-   -1.1477117538452148,
-   -1.1669728755950928,
-   -1.8787822723388672,
-   -1.5565840005874634,
-   -1.6666553020477295,
-   -1.747725248336792,
-   -1.959598422050476,
-   -2.0376486778259277,
-   -2.345367431640625,
-   -2.055098533630371,
-   -1.3940613269805908,
-   -3.4385242462158203,
-   -2.7489635944366455,
-   -3.2590157985687256,
-   -3.1128957271575928,
-   -1.7070379257202148,
-   -3.9010369777679443,
-   -3.21574068069458,
-   -3.3850393295288086,
-   -1.8778185844421387,
-   -2.698211908340454,
-   -1.8060741424560547,
-   -2.0845324993133545,
-   -3.4797585010528564,
-   -2.263254404067993,
-   -3.083108901977539,
-   -1.6589758396148682,
-   -2.687279224395752,
-   -1.77505624294281,
-   -2.6142921447753906,
-   -1.934045672416687,
-   -1.8834377527236938,
-   -2.8038980960845947,
-   -1.550542950630188,
-   -3.4054152965545654,
-   -1.724036693572998,
-   -2.3146564960479736,
-   -1.5134503841400146,
-   -2.9289023876190186,
-   -1.5285141468048096,
-   -3.421035051345825,
-   -1.3757282495498657,
-   -3.441431760787964,
-   -1.5286564826965332,
-   -3.4372904300689697,
-   -3.173043966293335,
-   -1.1313854455947876,
-   -1.486415147781372,
-   -2.506413221359253,
-   -3.5165903568267822,
-   -1.4112176895141602,
-   -3.7175354957580566,
-   -2.2170844078063965,
-   -0.704839289188385,
-   -0.6626103520393372,
-   -2.5483946800231934,
-   -0.949668288230896,
-   -3.1339564323425293,
-   -1.3326977491378784,
-   -3.5493476390838623,
-   -0.6785370111465454,
-   -3.236161470413208,
-   -2.1347084045410156,
-   -3.802447557449341,
-   -2.585503339767456,
-   -3.9361765384674072,
-   -2.290905714035034,
-   -3.714280605316162,
-   -1.793616533279419,
-   -3.7252492904663086,
-   -1.450188159942627,
-   -3.11938738822937,
-   -1.25174880027771
-  ]
- },
- "32": {
-  "input_prompt": "create a conversational article",
-  "generated_text": " about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article.",
-  "generated_tokens": [
-   2314,
-   1278,
-   17915,
-   1307,
-   1278,
-   9369,
-   1046,
-   1531,
-   9369,
-   2715,
-   1402,
-   2314,
-   1278,
-   17915,
-   1307,
-   1278,
-   9369,
-   1046,
-   1531,
-   9369,
-   2715,
-   1402,
-   2314,
-   1278,
-   17915,
-   1307,
-   1278,
-   9369,
-   1046,
-   1531,
-   9369,
-   2715,
-   1402,
-   2314,
-   1278,
-   17915,
-   1307,
-   1278,
-   9369,
-   1046,
-   1531,
-   9369,
-   2715,
-   1402,
-   2314,
-   1278,
-   17915,
-   1307,
-   1278,
-   9369,
-   1046,
-   1531,
-   9369,
-   2715,
-   1402,
-   2314,
-   1278,
-   17915,
-   1307,
-   1278,
-   9369,
-   1046,
-   1531,
-   9369,
-   2715,
-   1402,
-   2314,
-   1278,
-   17915,
-   1307,
-   1278,
-   9369,
-   1046,
-   1531,
-   9369,
-   2715,
-   1402,
-   2314,
-   1278,
-   17915,
-   1307,
-   1278,
-   9369,
-   1046,
-   1531,
-   9369,
-   2715,
-   1402,
-   2314,
-   1278,
-   17915,
-   1307,
-   1278,
-   9369,
-   1046,
-   1531,
-   9369,
-   2715,
-   1402,
-   2314,
-   1278,
-   17915,
-   1307,
-   1278,
-   9369,
-   1046,
-   1531,
-   9369,
-   2715,
-   1402,
-   2314,
-   1278,
-   17915,
-   1307,
-   1278,
-   9369,
-   1046,
-   1531,
-   9369,
-   2715,
-   1402,
-   2314,
-   1278,
-   17915,
-   1307,
-   1278,
-   9369,
-   1046
-  ],
-  "latency": 48.63822364807129,
-  "cuda_graph_request_count_map": null,
-  "step_count": 6144,
-  "logprobs": [
-   -4.4165568351745605,
-   -11.358176231384277,
-   -0.0701780766248703,
-   -7.797665119171143,
-   -2.6805992126464844,
-   -1.4707680940628052,
-   -3.0390255451202393,
-   -1.6902849674224854,
-   -1.270594835281372,
-   -1.1936196088790894,
-   -1.2523558139801025,
-   -2.7270259857177734,
-   -1.2371309995651245,
-   -0.9618493318557739,
-   -0.4379909038543701,
-   -1.3917063474655151,
-   -1.1055524349212646,
-   -0.9122569561004639,
-   -0.9911308288574219,
-   -0.08436793833971024,
-   -0.5424078106880188,
-   -0.9181017279624939,
-   -0.5873759388923645,
-   -0.19014373421669006,
-   -0.06655456870794296,
-   -0.15252672135829926,
-   -0.09415211528539658,
-   -0.009787309914827347,
-   -0.013910251669585705,
-   -0.005296128336340189,
-   -0.005677408073097467,
-   -0.02013739012181759,
-   -0.21594694256782532,
-   -0.07153760641813278,
-   -0.0066444179974496365,
-   -0.010198505595326424,
-   -0.011980246752500534,
-   -0.003686776151880622,
-   -0.0037619550712406635,
-   -0.0022467151284217834,
-   -0.004088377580046654,
-   -0.021828632801771164,
-   -0.0012669878778979182,
-   -0.09768074005842209,
-   -0.02652405947446823,
-   -0.0019286142196506262,
-   -0.002283824374899268,
-   -0.0032225127797573805,
-   -0.0009741804678924382,
-   -0.0009415484382770956,
-   -0.001211624126881361,
-   -0.001135300612077117,
-   -0.002340436913073063,
-   -0.0010846928926184773,
-   -0.0509282611310482,
-   -0.03832047060132027,
-   -0.00257422705180943,
-   -0.0022806129418313503,
-   -0.00262785074301064,
-   -0.0008195855189114809,
-   -0.0010239601833745837,
-   -0.0013777059502899647,
-   -0.0009899006690829992,
-   -0.0018756669014692307,
-   -0.0015304292319342494,
-   -0.08506463468074799,
-   -0.01893703266978264,
-   -0.0013797297142446041,
-   -0.0014461545506492257,
-   -0.0013971101725474,
-   -0.0005869334563612938,
-   -0.0005212855176068842,
-   -0.000876757490914315,
-   -0.0005256939912214875,
-   -0.0012863941956311464,
-   -0.0015691122971475124,
-   -0.051276568323373795,
-   -0.00973513163626194,
-   -0.0010469438275322318,
-   -0.0011531615164130926,
-   -0.0009969270322471857,
-   -0.00038342276820912957,
-   -0.0004032037395518273,
-   -0.000730247818864882,
-   -0.0003275334893260151,
-   -0.0008700875914655626,
-   -0.0017572689102962613,
-   -0.044393111020326614,
-   -0.013102858327329159,
-   -0.0011463745031505823,
-   -0.0012070996453985572,
-   -0.0012325793504714966,
-   -0.0005048430757597089,
-   -0.0004876854654867202,
-   -0.0007901645149104297,
-   -0.00041500062798149884,
-   -0.0009869233472272754,
-   -0.0018687656847760081,
-   -0.03943866863846779,
-   -0.014425630681216717,
-   -0.0014756753807887435,
-   -0.001423775334842503,
-   -0.001209719106554985,
-   -0.0005046047735959291,
-   -0.00042167355422861874,
-   -0.0007688426994718611,
-   -0.0002699726028367877,
-   -0.0006598440813831985,
-   -0.0017849955474957824,
-   -0.038999658077955246,
-   -0.012665312737226486,
-   -0.0014427024871110916,
-   -0.0014492495683953166,
-   -0.001016576774418354,
-   -0.00042083943844772875,
-   -0.00033241944038309157,
-   -0.0006403064471669495,
-   -0.00022373080719262362,
-   -0.0007053509471006691,
-   -0.0018597226589918137,
-   -0.030997740104794502,
-   -0.011259939521551132,
-   -0.0012655591126531363,
-   -0.00134151556994766,
-   -0.0008106521563604474,
-   -0.0003513672563713044,
-   -0.0002964295563288033,
-   -0.0006368515896610916,
-   -0.00020180096908006817,
-   -0.0005779979983344674,
-   -0.0016014858847483993,
-   -0.0271126888692379
-  ]
- },
- "64": {
-  "input_prompt": "App Concept: \"Eggy's Interactive Adventure World\"\n\nEggy's Interactive Adventure World is a cutting-edge app designed for phones and tablets that combines immersive storytelling with engaging, interactive experiences for both parents and children. This innovative app allows users to explore the world of Eggy and participate in a series of adventures that teach essential life skills, foster personal growth, and strengthen parent-child bonds.\n\nKey Features:\n\n1. Interactive Stories: The app brings the 20 Eggy stories to life through interactive, animated storytelling. Users can read, listen, and actively participate in the stories, making choices that guide Eggy through various adventures, each with unique lessons and experiences.\n2. Augmented Reality (AR) Integration: The app utilizes AR technology, allowing users to interact with Eggy and the story characters in their real-world environment. This immersive experience creates a deeper connection with the stories and encourages engagement.\n3. Personalized Adventures: The app customizes each story based on the user's interests, needs, and personal growth areas. By collecting user input, the app tailors the storylines and learning outcomes to ensure that each adventure resonates with the child's unique experiences.\n4. Parent-Child Cooperative Play: The app encourages parents and children to collaborate and engage in the stories together. Through a split-screen mode or multi-device connectivity, parents and children can actively participate in the stories and learn essential life skills together.\n5. Skill-Building Mini-Games: Interspersed throughout the stories are educational mini-games that reinforce the lessons and themes. These games are designed to be fun, engaging, and accessible to children, encouraging them to practice and develop various skills.\n6. Progress Tracking and Rewards: The app tracks the user's progress through the stories and rewards them with badges, trophies, and other collectibles. This gamification aspect encourages continued engagement and fosters a sense of accomplishment and growth.\n7. Community Features: The app provides a safe, moderated online community where parents can share their experiences, discuss the stories and lessons, and connect with other families who are also using the app. This community fosters support, inspiration, and learning.\n8. Expert Resources: The app includes access to resources from experts in child development, psychology, and education, providing parents with additional tools and guidance on fostering the growth and well-being of their children.\n\nEggy's Interactive Adventure World is a one-of-a-kind, mind-blowing app that revolutionizes the way parents and children engage with stories and learn essential life skills. Combining the power of storytelling, cutting-edge technology, and expert resources, this app is a must-have for families looking to strengthen their bonds and support their children's personal growth.",
-  "generated_text": "</s>The 1999\u20132000 season was the 10th season of the National Hockey League (NHL). The 1999\u20132000 season was the 10th season of the National Hockey League (NHL). The 1999\u20132000 season was the 10th season of the National Hockey League (NHL). The 1999\u20132000 season was the 10th season was the 10th season was the 10 season was the 10 season was the 10",
-  "generated_tokens": [
-   2,
-   1784,
-   1032,
-   1049,
-   1057,
-   1057,
-   1057,
-   1882,
-   1050,
-   1048,
-   1048,
-   1048,
-   5526,
-   1486,
-   1278,
-   1032,
-   1049,
-   1048,
-   1411,
-   5526,
-   1307,
-   1278,
-   5805,
-   46982,
-   8525,
-   1319,
-   19644,
-   1076,
-   1577,
-   1531,
-   1032,
-   1049,
-   1057,
-   1057,
-   1057,
-   1882,
-   1050,
-   1048,
-   1048,
-   1048,
-   5526,
-   1486,
-   1278,
-   1032,
-   1049,
-   1048,
-   1411,
-   5526,
-   1307,
-   1278,
-   5805,
-   46982,
-   8525,
-   1319,
-   19644,
-   1076,
-   1577,
-   1531,
-   1032,
-   1049,
-   1057,
-   1057,
-   1057,
-   1882,
-   1050,
-   1048,
-   1048,
-   1048,
-   5526,
-   1486,
-   1278,
-   1032,
-   1049,
-   1048,
-   1411,
-   5526,
-   1307,
-   1278,
-   5805,
-   46982,
-   8525,
-   1319,
-   19644,
-   1076,
-   1577,
-   1531,
-   1032,
-   1049,
-   1057,
-   1057,
-   1057,
-   1882,
-   1050,
-   1048,
-   1048,
-   1048,
-   5526,
-   1486,
-   1278,
-   1032,
-   1049,
-   1048,
-   1411,
-   5526,
-   1486,
-   1278,
-   1032,
-   1049,
-   1048,
-   1411,
-   5526,
-   1486,
-   1278,
-   1032,
-   1049,
-   1048,
-   5526,
-   1486,
-   1278,
-   1032,
-   1049,
-   1048,
-   5526,
-   1486,
-   1278,
-   1032,
-   1049,
-   1048
-  ],
-  "latency": 87.50420427322388,
-  "cuda_graph_request_count_map": null,
-  "step_count": 6144,
-  "logprobs": [
-   -12.107745170593262,
-   -2.9727728366851807,
-   -3.720092535018921,
-   -5.592433929443359,
-   -10.964235305786133,
-   -3.654498338699341,
-   -9.33439826965332,
-   -4.833785057067871,
-   -5.187321662902832,
-   -2.6944785118103027,
-   -6.9262237548828125,
-   -0.654232919216156,
-   -0.5550781488418579,
-   -0.21346639096736908,
-   -0.0134271876886487,
-   -0.010840100236237049,
-   -1.3878544569015503,
-   -0.6296291351318359,
-   -7.9766011238098145,
-   -0.4393192231655121,
-   -5.639142036437988,
-   -3.277270793914795,
-   -1.0206468105316162,
-   -11.703084945678711,
-   -0.7100943922996521,
-   -0.2809169888496399,
-   -2.771284818649292,
-   -7.190817832946777,
-   -4.048691749572754,
-   -0.012056218460202217,
-   -3.3802318572998047,
-   -0.6807184815406799,
-   -3.4844107627868652,
-   -3.312331199645996,
-   -0.5001641511917114,
-   -2.61255145072937,
-   -4.243694305419922,
-   -4.333778381347656,
-   -6.0625810623168945,
-   -0.011777156963944435,
-   -0.37577226758003235,
-   -0.9490834474563599,
-   -3.5450198650360107,
-   -2.1778035163879395,
-   -0.45957911014556885,
-   -3.00771164894104,
-   -1.7600425481796265,
-   -0.09766030311584473,
-   -2.467618942260742,
-   -1.329679012298584,
-   -0.8384320735931396,
-   -1.1864604949951172,
-   -3.628342866897583,
-   -0.2470003068447113,
-   -1.8938640356063843,
-   -5.168431282043457,
-   -0.05005566030740738,
-   -2.258014678955078,
-   -2.449028968811035,
-   -0.0034086955711245537,
-   -3.9485883712768555,
-   -1.6201664209365845,
-   -5.139942646026611,
-   -4.859354496002197,
-   -0.23686674237251282,
-   -0.5541543364524841,
-   -2.5826025009155273,
-   -6.114635467529297,
-   -4.3380208015441895,
-   -0.7412900924682617,
-   -0.3221715986728668,
-   -0.13805493712425232,
-   -4.1797332763671875,
-   -7.3456268310546875,
-   -0.13762745261192322,
-   -2.0905232429504395,
-   -1.0178627967834473,
-   -4.108260631561279,
-   -0.6007124185562134,
-   -1.0410642623901367,
-   -4.122039794921875,
-   -0.35905471444129944,
-   -1.4274661540985107,
-   -4.139932155609131,
-   -0.4237431585788727,
-   -1.6294409036636353,
-   -0.9811424016952515,
-   -4.132790565490723,
-   -1.1318120956420898,
-   -6.8258256912231445,
-   -1.5455098152160645,
-   -0.6984409093856812,
-   -13.664215087890625,
-   -0.1166313961148262,
-   -1.6347849369049072,
-   -0.28875046968460083,
-   -0.03130083531141281,
-   -1.5293006896972656,
-   -1.6488375663757324,
-   -4.224111557006836,
-   -4.760683059692383,
-   -1.9758747816085815,
-   -1.5828256607055664,
-   -2.8463857173919678,
-   -0.2620386481285095,
-   -1.7243889570236206,
-   -1.7945923805236816,
-   -0.8884308338165283,
-   -0.3766394555568695,
-   -0.34033581614494324,
-   -9.05566692352295,
-   -0.22754782438278198,
-   -0.033802058547735214,
-   -0.34108465909957886,
-   -0.5644669532775879,
-   -2.0925779342651367,
-   -4.547505855560303,
-   -10.870464324951172,
-   -1.1072022914886475,
-   -5.503787994384766,
-   -3.259672164916992,
-   -0.007964519783854485,
-   -3.0111639499664307,
-   -4.246737480163574,
-   -0.7813188433647156,
-   -3.331031322479248,
-   -4.485962867736816,
-   -0.9492117166519165,
-   -2.6757047176361084,
-   -1.1591349840164185,
-   -1.122117519378662,
-   -2.629878044128418,
-   -5.986321926116943,
-   -0.2146703153848648,
-   -0.002392764901742339,
-   -7.372479438781738,
-   -0.007077385671436787,
-   -0.06599216908216476,
-   -0.0970711037516594,
-   -3.2874932289123535,
-   -0.0019583588000386953,
-   -0.9122000336647034,
-   -4.930907249450684,
-   -0.019508399069309235,
-   -0.308611661195755,
-   -0.07778516411781311,
-   -3.8497893810272217,
-   -0.46124517917633057,
-   -0.38821348547935486,
-   -2.668412208557129,
-   -1.845987319946289,
-   -0.06470083445310593,
-   -0.006619549356400967,
-   -1.2610487937927246,
-   -0.13015533983707428,
-   -3.365312099456787,
-   -0.0014690094394609332,
-   -1.6789823770523071,
-   -1.2499005794525146,
-   -3.3992111682891846,
-   -5.563300132751465,
-   -0.823418140411377,
-   -4.24124813079834,
-   -1.6597849130630493,
-   -0.6941139698028564,
-   -1.5637556314468384,
-   -0.5482053756713867,
-   -0.9507225751876831,
-   -3.764758586883545,
-   -0.0006518622976727784,
-   -0.7540555000305176,
-   -5.058262825012207,
-   -0.3302401602268219,
-   -2.8130555152893066,
-   -0.17079885303974152,
-   -2.871047019958496,
-   -0.3991694450378418,
-   -3.1476998329162598,
-   -0.3488404452800751,
-   -2.0545666217803955,
-   -4.201597690582275,
-   -5.164614677429199,
-   -0.0271432027220726,
-   -0.0009785869624465704,
-   -3.3444161415100098,
-   -1.3117046356201172,
-   -6.375423431396484,
-   -0.05535568296909332,
-   -0.3919340968132019,
-   -0.060594215989112854,
-   -6.507473468780518,
-   -0.0023910999298095703,
-   -2.143423318862915,
-   -3.335618257522583,
-   -2.953970432281494,
-   -0.0013383012264966965,
-   -0.8080525398254395,
-   -0.29526084661483765,
-   -0.04036511853337288,
-   -3.231475353240967,
-   -1.0585589408874512,
-   -6.136373043060303,
-   -0.006182829383760691,
-   -0.035548023879528046,
-   -5.509808540344238,
-   -1.8490750789642334,
-   -9.83314037322998,
-   -0.07037576287984848,
-   -3.1621387004852295,
-   -6.762360095977783,
-   -1.3490527868270874,
-   -3.601043462753296,
-   -1.176393985748291,
-   -0.4342959523200989,
-   -0.06266004592180252,
-   -5.464046001434326,
-   -0.017946599051356316,
-   -1.0416009426116943,
-   -1.6117159128189087,
-   -12.289417266845703,
-   -1.5004339218139648,
-   -5.76563835144043,
-   -4.038386821746826,
-   -0.20812086760997772,
-   -3.6306562423706055,
-   -1.3901070356369019,
-   -1.087137222290039,
-   -2.423213243484497,
-   -4.503086090087891,
-   -0.0008031480247154832,
-   -0.03627370297908783,
-   -0.1653430461883545,
-   -7.958648681640625,
-   -1.1018548011779785,
-   -1.290948748588562,
-   -3.8049263954162598,
-   -1.8253734111785889,
-   -0.059022851288318634,
-   -0.0013984196120873094,
-   -4.698851585388184,
-   -2.5421664714813232,
-   -0.024493809789419174,
-   -4.828659534454346,
-   -3.0295286178588867,
-   -3.550312042236328,
-   -0.1185273677110672,
-   -0.22595760226249695,
-   -0.10782183706760406,
-   -1.4033282995224,
-   -0.4485701024532318,
-   -0.2889708876609802,
-   -0.05471855774521828,
-   -0.007632025051862001,
-   -2.1156554222106934,
-   -0.6249589323997498,
-   -4.198577404022217,
-   -0.14178156852722168,
-   -4.284021377563477,
-   -2.227515935897827,
-   -3.5022120475769043,
-   -0.19575819373130798,
-   -15.964509963989258,
-   -4.055960655212402,
-   -11.125024795532227,
-   -0.7681724429130554,
-   -3.0436902046203613,
-   -7.030262470245361,
-   -4.376729488372803,
-   -5.476145267486572,
-   -0.4219042658805847,
-   -3.7689766883850098,
-   -0.060010604560375214,
-   -0.8134393692016602,
-   -0.11386934667825699,
-   -0.025473715737462044,
-   -0.09736856073141098,
-   -4.357361793518066,
-   -0.3670865297317505,
-   -0.08063744008541107,
-   -0.1311480849981308,
-   -1.0903867483139038,
-   -1.2705107927322388,
-   -1.5076212882995605,
-   -4.295275688171387,
-   -0.04185756668448448,
-   -0.19810955226421356,
-   -1.9645220041275024,
-   -0.9597910642623901,
-   -0.13429655134677887,
-   -0.002283110748976469,
-   -7.066074371337891,
-   -3.639211654663086,
-   -1.0263917446136475,
-   -8.124760627746582,
-   -1.132537841796875,
-   -0.09160765260457993,
-   -0.08996370434761047,
-   -10.165366172790527,
-   -3.501585006713867,
-   -0.0019847711082547903,
-   -0.05309417471289635,
-   -0.31209683418273926,
-   -0.15089339017868042,
-   -1.23564875125885,
-   -1.2685208320617676,
-   -7.832758903503418,
-   -0.19271136820316315,
-   -0.014305183663964272,
-   -0.0007532381569035351,
-   -0.44688940048217773,
-   -2.6239724159240723,
-   -1.738666296005249,
-   -1.6480977535247803,
-   -0.46753185987472534,
-   -8.656959533691406,
-   -3.79868483543396,
-   -0.9281394481658936,
-   -2.2381181716918945,
-   -1.7654449939727783,
-   -0.4948798418045044,
-   -0.025028761476278305,
-   -1.5435361862182617,
-   -1.6390818357467651,
-   -1.4962153434753418,
-   -0.3425217270851135,
-   -0.013077914714813232,
-   -0.038474079221487045,
-   -5.3364362716674805,
-   -0.42365288734436035,
-   -1.884093999862671,
-   -3.510357618331909,
-   -6.198029518127441,
-   -0.44375038146972656,
-   -0.0008789013954810798,
-   -3.6025230884552,
-   -1.419615626335144,
-   -2.6723289489746094,
-   -5.775190830230713,
-   -1.1380761861801147,
-   -2.6683366298675537,
-   -0.43395891785621643,
-   -0.003145867260172963,
-   -8.63144302368164,
-   -1.646262764930725,
-   -1.732487678527832,
-   -4.561546802520752,
-   -0.5277953147888184,
-   -0.07333153486251831,
-   -0.5624169707298279,
-   -0.12201295047998428,
-   -2.6561455726623535,
-   -1.1071691513061523,
-   -2.6895060539245605,
-   -0.040864069014787674,
-   -0.04126371443271637,
-   -1.8294739723205566,
-   -0.09022177755832672,
-   -0.3154001832008362,
-   -0.46215569972991943,
-   -2.2462844848632812,
-   -0.30149081349372864,
-   -0.52588951587677,
-   -8.288043975830078,
-   -0.0002057340752799064,
-   -0.8021711707115173,
-   -4.4546098709106445,
-   -0.0001565095444675535,
-   -0.0015961299650371075,
-   -0.15216240286827087,
-   -0.3677564561367035,
-   -5.018707275390625,
-   -0.7850045561790466,
-   -1.9582659006118774,
-   -1.0046892166137695,
-   -10.0401029586792,
-   -0.16878114640712738,
-   -5.944240570068359,
-   -1.5523078441619873,
-   -5.7253522872924805,
-   -0.47948503494262695,
-   -0.44009655714035034,
-   -5.671053886413574,
-   -0.003280022880062461,
-   -0.7937742471694946,
-   -0.9639376401901245,
-   -0.00030048147891648114,
-   -1.0747740268707275,
-   -0.8839919567108154,
-   -3.416811466217041,
-   -1.6602673530578613,
-   -0.2706959843635559,
-   -0.0024333172477781773,
-   -4.478696823120117,
-   -6.20179557800293,
-   -0.11359559744596481,
-   -0.202009916305542,
-   -0.022310219705104828,
-   -2.367263078689575,
-   -1.0405994653701782,
-   -5.984308242797852,
-   -2.105138063430786,
-   -9.583202362060547,
-   -0.0004957877099514008,
-   -3.0655455589294434,
-   -0.0669412910938263,
-   -0.8977450728416443,
-   -2.2271294593811035,
-   -2.6617536544799805,
-   -1.8184051513671875,
-   -0.8291114568710327,
-   -0.4864235818386078,
-   -0.7993525862693787,
-   -3.51106858253479,
-   -2.1530935764312744,
-   -0.257144957780838,
-   -1.3934082984924316,
-   -1.3137131929397583,
-   -0.3384077548980713,
-   -0.1697217971086502,
-   -2.353395938873291,
-   -0.03406282886862755,
-   -0.39059701561927795,
-   -3.422821044921875,
-   -1.7117210626602173,
-   -0.7018465399742126,
-   -1.5995906591415405,
-   -3.6218395233154297,
-   -0.12497704476118088,
-   -0.16966234147548676,
-   -0.7313685417175293,
-   -0.4956285357475281,
-   -1.0840849876403809,
-   -5.042126655578613,
-   -0.00031704644788987935,
-   -7.683258056640625,
-   -0.9210801720619202,
-   -4.687852382659912,
-   -0.0028814247343689203,
-   -0.043382611125707626,
-   -4.1948652267456055,
-   -2.66593337059021,
-   -0.06153333932161331,
-   -0.0023110604379326105,
-   -6.729236602783203,
-   -5.777127742767334,
-   -0.08932067453861237,
-   -0.09890018403530121,
-   -0.009886111132800579,
-   -3.1145148277282715,
-   -3.725565195083618,
-   -0.0021998509764671326,
-   -3.9927196502685547,
-   -2.753793239593506,
-   -1.6037236452102661,
-   -0.17461130023002625,
-   -4.804804801940918,
-   -0.2311229705810547,
-   -0.30256444215774536,
-   -2.235363006591797,
-   -0.006614102050662041,
-   -0.34757524728775024,
-   -1.4946835041046143,
-   -1.222062587738037,
-   -3.658839225769043,
-   -1.356170892715454,
-   -0.5371109843254089,
-   -3.7580835819244385,
-   -4.54621696472168,
-   -0.31577637791633606,
-   -3.677156925201416,
-   -2.7181396484375,
-   -7.4674882888793945,
-   -0.00019369633810129017,
-   -2.3798398971557617,
-   -2.5452184677124023,
-   -0.2858496308326721,
-   -4.315659523010254,
-   -0.025835415348410606,
-   -0.000603493710514158,
-   -0.2546294331550598,
-   -0.12032663822174072,
-   -2.006908655166626,
-   -5.990736961364746,
-   -7.146596908569336,
-   -0.23356498777866364,
-   -0.2201036810874939,
-   -0.01235415879637003,
-   -0.011248741298913956,
-   -1.4155778884887695,
-   -0.40242519974708557,
-   -5.877886772155762,
-   -0.7865053415298462,
-   -0.03231288120150566,
-   -0.004864405374974012,
-   -0.0050629740580916405,
-   -2.7049152851104736,
-   -6.822089195251465,
-   -0.39252761006355286,
-   -1.2290617227554321,
-   -0.007630132604390383,
-   -3.485461711883545,
-   -0.47985684871673584,
-   -6.1813530921936035,
-   -0.03757825121283531,
-   -0.37834712862968445,
-   -0.22192610800266266,
-   -1.165318489074707,
-   -0.5220151543617249,
-   -0.1289423257112503,
-   -3.216222047805786,
-   -1.0787583589553833,
-   -3.0716826915740967,
-   -0.6023419499397278,
-   -2.558605194091797,
-   -0.927433431148529,
-   -0.00364841241389513,
-   -0.14910078048706055,
-   -0.7318926453590393,
-   -6.159773826599121,
-   -0.0015301911626011133,
-   -1.8908276557922363,
-   -1.9641315937042236,
-   -0.021651331335306168,
-   -2.1648828983306885,
-   -2.2700207233428955,
-   -7.833290100097656,
-   -0.03397307172417641,
-   -0.8344621658325195,
-   -0.02225659228861332,
-   -0.06639260798692703,
-   -2.3780317306518555,
-   -3.180129051208496,
-   -0.09030630439519882,
-   -2.4138312339782715,
-   -1.3445552587509155,
-   -1.848326325416565,
-   -0.9726964831352234,
-   -2.851792335510254,
-   -0.0630769282579422,
-   -0.0011394681641831994,
-   -0.05843213573098183,
-   -2.6616668701171875,
-   -1.575437068939209,
-   -0.180197611451149,
-   -5.552371501922607,
-   -0.26108410954475403,
-   -2.529611587524414,
-   -0.37780019640922546,
-   -5.141795635223389,
-   -0.5921107530593872,
-   -0.2474975287914276,
-   -0.10687454044818878,
-   -4.891775131225586,
-   -0.25011152029037476,
-   -2.4100728034973145,
-   -1.358667016029358,
-   -2.790961503982544,
-   -3.8654675483703613,
-   -1.0076243877410889,
-   -0.7456949949264526,
-   -1.5575554370880127,
-   -2.05328631401062,
-   -1.6538066864013672,
-   -0.0558217354118824,
-   -0.0001817776501411572,
-   -0.0011643542675301433,
-   -0.038359593600034714,
-   -1.4208931922912598,
-   -0.542127251625061,
-   -0.3162364959716797,
-   -0.3966117799282074,
-   -1.1765563488006592,
-   -1.7920958995819092,
-   -0.18425509333610535,
-   -0.1092008650302887,
-   -0.46676987409591675,
-   -0.24977745115756989,
-   -1.0375996828079224,
-   -0.5268858671188354,
-   -0.008942908607423306,
-   -0.6404479146003723,
-   -0.0033111530356109142,
-   -5.3165931603871286e-05,
-   -0.5154370665550232,
-   -0.39286962151527405,
-   -1.401839256286621,
-   -0.6232213973999023,
-   -0.02168831042945385,
-   -0.004282470792531967,
-   -0.005199837032705545,
-   -0.09748794883489609,
-   -0.040823787450790405,
-   -0.00014852374442853034,
-   -0.0005832401220686734,
-   -0.005303124897181988,
-   -0.6537013053894043,
-   -0.38026049733161926,
-   -0.04189129173755646,
-   -0.010385753586888313,
-   -0.008756335824728012,
-   -0.013362848199903965,
-   -0.000504723924677819,
-   -0.002797620603814721,
-   -0.0014512732159346342,
-   -0.0013321106089279056,
-   -0.010883613489568233,
-   -0.005159396678209305,
-   -0.004701037425547838,
-   -0.01591104455292225,
-   -0.001474246964789927,
-   -1.2278481335670222e-05,
-   -0.010548785328865051,
-   -0.08341525495052338,
-   -0.03858809545636177,
-   -0.056062061339616776,
-   -0.0009532198309898376,
-   -0.0005789510905742645,
-   -0.0008986725588329136,
-   -0.00710969977080822,
-   -0.0006561510381288826,
-   -1.4781842764932662e-05,
-   -5.578839045483619e-05,
-   -0.0006398299592547119,
-   -0.0028786908369511366,
-   -0.0034092895220965147,
-   -0.008268529549241066,
-   -0.006602259818464518,
-   -0.004517706111073494,
-   -0.02233586646616459,
-   -0.0006323245470412076,
-   -0.009195122867822647,
-   -0.0029284947086125612,
-   -0.004457537550479174,
-   -0.017873765900731087,
-   -0.008801711723208427,
-   -0.0036383166443556547,
-   -0.08078611642122269,
-   -0.006347495596855879,
-   -0.0002177716523874551,
-   -0.04688572511076927,
-   -0.2718890309333801,
-   -0.07996802777051926,
-   -0.04674842208623886,
-   -0.009984076954424381,
-   -0.010000954382121563,
-   -0.050126753747463226,
-   -0.5864179730415344,
-   -0.2915390133857727,
-   -0.008090462535619736,
-   -0.032190412282943726,
-   -0.03461571782827377,
-   -0.2785419523715973,
-   -0.05830562859773636,
-   -0.02893950417637825,
-   -0.12241066247224808,
-   -0.02711048536002636,
-   -0.16450686752796173,
-   -0.09852994978427887,
-   -0.2651047706604004,
-   -0.35559725761413574,
-   -0.12606258690357208,
-   -0.32793670892715454,
-   -0.20878805220127106,
-   -0.7587923407554626,
-   -0.4730657637119293,
-   -1.496794581413269,
-   -0.2486363798379898,
-   -0.20723387598991394,
-   -0.1872958242893219,
-   -0.19151091575622559,
-   -0.3350580036640167,
-   -1.3085839748382568,
-   -0.6109102964401245,
-   -0.2947172224521637,
-   -0.37130236625671387,
-   -0.19888469576835632,
-   -0.18297068774700165,
-   -0.9978674054145813,
-   -0.5471905469894409,
-   -0.4379975199699402,
-   -0.407988041639328,
-   -0.17325688898563385,
-   -0.16938896477222443
-  ]
- },
- "96": {
-  "input_prompt": "what is the concept of double materiality in sustainability?",
-  "generated_text": " What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double material",
-  "generated_tokens": [
-   5675,
-   1395,
-   1278,
-   7401,
-   1307,
-   5558,
-   4318,
-   1532,
-   1294,
-   60187,
-   1063,
-   5675,
-   1395,
-   1278,
-   7401,
-   1307,
-   5558,
-   4318,
-   1532,
-   1294,
-   60187,
-   1063,
-   5675,
-   1395,
-   1278,
-   7401,
-   1307,
-   5558,
-   4318,
-   1532,
-   1294,
-   60187,
-   1063,
-   5675,
-   1395,
-   1278,
-   7401,
-   1307,
-   5558,
-   4318,
-   1532,
-   1294,
-   60187,
-   1063,
-   5675,
-   1395,
-   1278,
-   7401,
-   1307,
-   5558,
-   4318,
-   1532,
-   1294,
-   60187,
-   1063,
-   5675,
-   1395,
-   1278,
-   7401,
-   1307,
-   5558,
-   4318,
-   1532,
-   1294,
-   60187,
-   1063,
-   5675,
-   1395,
-   1278,
-   7401,
-   1307,
-   5558,
-   4318,
-   1532,
-   1294,
-   60187,
-   1063,
-   5675,
-   1395,
-   1278,
-   7401,
-   1307,
-   5558,
-   4318,
-   1532,
-   1294,
-   60187,
-   1063,
-   5675,
-   1395,
-   1278,
-   7401,
-   1307,
-   5558,
-   4318,
-   1532,
-   1294,
-   60187,
-   1063,
-   5675,
-   1395,
-   1278,
-   7401,
-   1307,
-   5558,
-   4318,
-   1532,
-   1294,
-   60187,
-   1063,
-   5675,
-   1395,
-   1278,
-   7401,
-   1307,
-   5558,
-   4318,
-   1532,
-   1294,
-   60187,
-   1063,
-   5675,
-   1395,
-   1278,
-   7401,
-   1307,
-   5558,
-   4318
-  ],
-  "latency": 126.4328100681305,
-  "cuda_graph_request_count_map": null,
-  "step_count": 6144,
-  "logprobs": [
-   -4.917365074157715,
-   -0.9960631132125854,
-   -7.875392913818359,
-   -0.2993181347846985,
-   -7.760880470275879,
-   -10.308395385742188,
-   -2.1807961463928223,
-   -1.6412583589553833,
-   -9.521512985229492,
-   -1.627489447593689,
-   -1.8410861492156982,
-   -0.9285702705383301,
-   -0.2576955556869507,
-   -0.9641067981719971,
-   -0.02314644306898117,
-   -0.6696561574935913,
-   -0.07035009562969208,
-   -0.004622488282620907,
-   -0.025748632848262787,
-   -0.06276137381792068,
-   -0.17385317385196686,
-   -0.3285445272922516,
-   -0.0592009499669075,
-   -0.007940039038658142,
-   -0.22664028406143188,
-   -0.0017957051750272512,
-   -0.022929180413484573,
-   -0.005733947269618511,
-   -0.0012996093137189746,
-   -0.006419987417757511,
-   -0.02376849390566349,
-   -0.27800270915031433,
-   -0.4650723934173584,
-   -0.04936715215444565,
-   -0.003972141072154045,
-   -0.01477995328605175,
-   -0.0012044801842421293,
-   -0.014891182072460651,
-   -0.002709767082706094,
-   -0.0009939497103914618,
-   -0.0028436246793717146,
-   -0.006759870797395706,
-   -0.15416178107261658,
-   -0.20121537148952484,
-   -0.016414370387792587,
-   -0.0015769677702337503,
-   -0.008138825185596943,
-   -0.0007713441736996174,
-   -0.013819841668009758,
-   -0.003826678032055497,
-   -0.0005918181850574911,
-   -0.0014938872773200274,
-   -0.00485716899856925,
-   -0.081083282828331,
-   -0.09642580896615982,
-   -0.009630884043872356,
-   -0.0010948146227747202,
-   -0.007085552904754877,
-   -0.0006310140597634017,
-   -0.013073914684355259,
-   -0.0039152647368609905,
-   -0.000364713923772797,
-   -0.001292108790948987,
-   -0.004158303141593933,
-   -0.044283974915742874,
-   -0.05722038820385933,
-   -0.006369172595441341,
-   -0.0007976687629707158,
-   -0.005993015132844448,
-   -0.0004935238393954933,
-   -0.011310506612062454,
-   -0.002951553324237466,
-   -0.000387831823900342,
-   -0.000977038755081594,
-   -0.0036971091758459806,
-   -0.030511993914842606,
-   -0.04246694967150688,
-   -0.004863100592046976,
-   -0.0006927236099727452,
-   -0.005206122528761625,
-   -0.0005129451747052372,
-   -0.00894621666520834,
-   -0.0028565814718604088,
-   -0.00041333239641971886,
-   -0.0009002208826132119,
-   -0.0033131728414446115,
-   -0.021188799291849136,
-   -0.03330245241522789,
-   -0.0038543473929166794,
-   -0.0006504327175207436,
-   -0.004474864806979895,
-   -0.00048029806930571795,
-   -0.009718249551951885,
-   -0.0030443770810961723,
-   -0.0003743662964552641,
-   -0.0009439303539693356,
-   -0.003729770192876458,
-   -0.016505014151334763,
-   -0.0290373582392931,
-   -0.003315192647278309,
-   -0.0005821678787469864,
-   -0.004148805979639292,
-   -0.00042489083716645837,
-   -0.006856840569525957,
-   -0.0028660909738391638,
-   -0.00032574593205936253,
-   -0.0006986799417063594,
-   -0.003671098267659545,
-   -0.012792548164725304,
-   -0.02553274855017662,
-   -0.002730690874159336,
-   -0.0005067494930699468,
-   -0.0036923582665622234,
-   -0.0004451475979294628,
-   -0.007225453853607178,
-   -0.002746859099715948,
-   -0.0003323002893012017,
-   -0.0008517451351508498,
-   -0.003630714723840356,
-   -0.011284693144261837,
-   -0.02186688780784607,
-   -0.0025819556321948767,
-   -0.0004931663861498237,
-   -0.0031721293926239014,
-   -0.00040725519647821784,
-   -0.0062383925542235374,
-   -0.00238693761639297,
-   -0.00040749352774582803,
-   -0.0007970731821842492,
-   -0.003604583442211151,
-   -0.010075542144477367,
-   -0.022386692464351654,
-   -0.0025295186787843704,
-   -0.00046302087139338255,
-   -0.003138143103569746,
-   -0.0004233417857903987,
-   -0.006178564392030239,
-   -0.0021037368569523096
-  ]
- },
- "127": {
-  "input_prompt": "as before we talk about using , i usually do if else() to in case the property would be null before.\nif today i public a property and i want to use using to instead , will code be like this?\n\npublic Material Mat;\n\nusing(Mat){\n// Do something;\n}\n\nis that right?",
-  "generated_text": " i am not sure if i am using the wrong syntax or if i am using the syntax correctly but i am not sure if i am using the syntax correctly or if i am using the syntax incorrectly.\n\nI am not sure if i am using the wrong syntax or if i am using the syntax correctly but i am not sure if i am using the syntax correctly or if i am using the syntax incorrectly.\n\nI am not sure if i am using the wrong syntax or if i am using the syntax correctly but i am not sure if i am using the syntax correctly or if i am using the syntax incorrectly.\n\nI am not sure if i am using",
-  "generated_tokens": [
-   1623,
-   1855,
-   1605,
-   5257,
-   1693,
-   1623,
-   1855,
-   2505,
-   1278,
-   8462,
-   22692,
-   1505,
-   1693,
-   1623,
-   1855,
-   2505,
-   1278,
-   22692,
-   17047,
-   1809,
-   1623,
-   1855,
-   1605,
-   5257,
-   1693,
-   1623,
-   1855,
-   2505,
-   1278,
-   22692,
-   17047,
-   1505,
-   1693,
-   1623,
-   1855,
-   2505,
-   1278,
-   22692,
-   73751,
-   1338,
-   1073,
-   1855,
-   1605,
-   5257,
-   1693,
-   1623,
-   1855,
-   2505,
-   1278,
-   8462,
-   22692,
-   1505,
-   1693,
-   1623,
-   1855,
-   2505,
-   1278,
-   22692,
-   17047,
-   1809,
-   1623,
-   1855,
-   1605,
-   5257,
-   1693,
-   1623,
-   1855,
-   2505,
-   1278,
-   22692,
-   17047,
-   1505,
-   1693,
-   1623,
-   1855,
-   2505,
-   1278,
-   22692,
-   73751,
-   1338,
-   1073,
-   1855,
-   1605,
-   5257,
-   1693,
-   1623,
-   1855,
-   2505,
-   1278,
-   8462,
-   22692,
-   1505,
-   1693,
-   1623,
-   1855,
-   2505,
-   1278,
-   22692,
-   17047,
-   1809,
-   1623,
-   1855,
-   1605,
-   5257,
-   1693,
-   1623,
-   1855,
-   2505,
-   1278,
-   22692,
-   17047,
-   1505,
-   1693,
-   1623,
-   1855,
-   2505,
-   1278,
-   22692,
-   73751,
-   1338,
-   1073,
-   1855,
-   1605,
-   5257,
-   1693,
-   1623,
-   1855,
-   2505
-  ],
-  "latency": 155.6906189918518,
-  "cuda_graph_request_count_map": null,
-  "step_count": 6144,
-  "logprobs": [
-   -7.793755531311035,
-   -4.224940776824951,
-   -5.446587562561035,
-   -0.267395555973053,
-   -6.118141174316406,
-   -11.105525970458984,
-   -6.67517614364624,
-   -6.902530670166016,
-   -3.2218151092529297,
-   -5.99644660949707,
-   -8.431673049926758,
-   -8.04836368560791,
-   -3.992594003677368,
-   -7.550079345703125,
-   -2.873685836791992,
-   -3.3877997398376465,
-   -7.9306745529174805,
-   -7.005484580993652,
-   -1.0481306314468384,
-   -2.686237335205078,
-   -6.131283760070801,
-   -6.2994704246521,
-   -7.931419372558594,
-   -11.3147554397583,
-   -8.544670104980469,
-   -12.01729679107666,
-   -3.89847469329834,
-   -1.7964364290237427,
-   -2.952878952026367,
-   -1.9217232465744019,
-   -2.272329330444336,
-   -0.37552154064178467,
-   -1.667820692062378,
-   -7.510344505310059,
-   -3.498040199279785,
-   -7.980632305145264,
-   -7.672002792358398,
-   -4.4999470710754395,
-   -7.155375003814697,
-   -2.4486124515533447,
-   -4.785946846008301,
-   -1.153855800628662,
-   -2.3994438648223877,
-   -4.250652313232422,
-   -12.24446964263916,
-   -8.344388008117676,
-   -2.608186721801758,
-   -5.200589179992676,
-   -8.25888442993164,
-   -3.6245617866516113,
-   -7.689338207244873,
-   -7.345355033874512,
-   -1.2661759853363037,
-   -7.265620231628418,
-   -1.9884108304977417,
-   -6.269482612609863,
-   -2.41705584526062,
-   -1.8929681777954102,
-   -1.8259913921356201,
-   -2.0997350215911865,
-   -2.323200225830078,
-   -1.3998825550079346,
-   -0.8789899945259094,
-   -1.082053542137146,
-   -1.1831339597702026,
-   -1.4462857246398926,
-   -1.6481035947799683,
-   -1.4408715963363647,
-   -1.2603964805603027,
-   -1.5267670154571533,
-   -1.6345772743225098,
-   -1.3796477317810059,
-   -0.7609691023826599,
-   -0.3548354506492615,
-   -0.7552334666252136,
-   -0.44776833057403564,
-   -1.1078286170959473,
-   -1.3036658763885498,
-   -0.5214896202087402,
-   -0.8486822843551636,
-   -0.22470997273921967,
-   -0.4705755412578583,
-   -0.5639711022377014,
-   -0.5388108491897583,
-   -0.6052999496459961,
-   -0.1002030223608017,
-   -0.286334365606308,
-   -0.45798981189727783,
-   -1.0107953548431396,
-   -0.11875647306442261,
-   -0.6969441771507263,
-   -0.4609107971191406,
-   -0.07614769786596298,
-   -0.5035472512245178,
-   -0.1682187020778656,
-   -0.10476160794496536,
-   -0.6586751341819763,
-   -0.35806939005851746,
-   -1.5364394187927246,
-   -2.4093759059906006,
-   -1.977368950843811,
-   -1.6216907501220703,
-   -0.27647316455841064,
-   -0.2991848587989807,
-   -0.2783535420894623,
-   -0.05913994088768959,
-   -0.03023873083293438,
-   -0.043339803814888,
-   -0.7320341467857361,
-   -0.0030677898321300745,
-   -0.0332595594227314,
-   -0.012804670259356499,
-   -0.004041599575430155,
-   -0.0014899593079462647,
-   -0.001948602613992989,
-   -0.0029070996679365635,
-   -0.040939707309007645,
-   -0.013942227698862553,
-   -0.04897322878241539,
-   -0.011005887761712074,
-   -0.0044113704934716225,
-   -0.0013179434463381767,
-   -0.003658389439806342,
-   -0.009758152067661285,
-   -0.0014104428701102734,
-   -0.0016671819612383842,
-   -0.000771939754486084,
-   -0.0015519729349762201,
-   -0.003720743814483285,
-   -0.004249115474522114,
-   -0.00485657574608922,
-   -0.005053604021668434,
-   -0.002336274366825819,
-   -0.0009155849111266434,
-   -0.0004978132783435285,
-   -0.0005953923100605607,
-   -0.0011395872570574284,
-   -0.001485078944824636,
-   -0.3072909712791443,
-   -1.7295066118240356,
-   -0.4807289242744446,
-   -0.1245415136218071,
-   -0.011858444660902023,
-   -0.020613837987184525,
-   -0.011020978912711143,
-   -0.003106294432654977,
-   -0.0009966888464987278,
-   -0.0019349202048033476,
-   -0.037407051771879196,
-   -0.0003496989083942026,
-   -0.005922981072217226,
-   -0.007394562941044569,
-   -0.0006037319544702768,
-   -0.0008836655179038644,
-   -0.0002884448622353375,
-   -0.00047600860125385225,
-   -0.0024947968777269125,
-   -0.00442774873226881,
-   -0.004059052560478449,
-   -0.0018594847060739994,
-   -0.0006179092451930046,
-   -0.00022635281493421644,
-   -0.0006730675231665373,
-   -0.003022746881470084,
-   -0.0002343380037928,
-   -0.00047791501856409013,
-   -9.440929716220126e-05,
-   -0.00021550717065110803,
-   -0.0013523490633815527,
-   -0.0032202552538365126,
-   -0.001157686347141862,
-   -0.004449942149221897,
-   -0.0016590891173109412,
-   -0.00101062236353755,
-   -0.0003079893649555743,
-   -0.00048375347978435457,
-   -0.0021734442561864853,
-   -0.00423036003485322,
-   -0.11514264345169067,
-   -0.8658493757247925,
-   -0.084366075694561,
-   -0.02140468917787075,
-   -0.0060798698104918,
-   -0.008638513274490833,
-   -0.003212531330063939,
-   -0.0009598892065696418,
-   -0.00032085992279462516
-  ]
- },
- "throughput": [
-  93.24123994187065,
-  104.94118337233992,
-  105.03843789693171
- ]
+    "0": {
+        "input_prompt": "The $500 Cup of coffee?\nConsider this, most Americans spend an average of $1,500-2,000 a year on this bean water.\nI have a few question for you: \nHow has business been the past few months?\nDo you ever feel like your business is stuck?\nDon't feel like you're able to improve performance and make changes required to achieve success ?\nAre your customers spneding less and less and wanting more?\nHave the gas prices affected your business?\nDo you have employees and do they hate you or wish they could quit?\n\nNow, before you and I can decide wheter or not I will be a good fit for your business we should talk this over with coffee.\nAnd, just to warn you this isn't some casual thing. This is not a date or time to be personal or social (but by all means share what you will coz I'll gladly listen).\nTher eare two major talking points and stratagies we will focios on in our lil coffee social\nFor one, we will find your unique selling Proposition (USP).\nDo have the best price? Are you the cheapest in town? Are your customers jerks? Do you haVE REGULARS? Why do people come back?\nwe'll also look for the holes in your business bucket. I'm willing to bet there's a hole or two in your business we'll find together that'll make this 500 dollar cup of Joe pay for itse;f immedietly.\nMany find themselves to be more profitable by just finding out where the dollars are escaping in their business and I like to think of myself as a guy that comes along with some spakel or putty and patch those holes up for you.\nBeleive me, just fixing one hole can mean a lot...just think about a sinking boat that has a hole in it that's about 3\u201d in diameter... it doesn't take long to sink.\nI have no agenda, besides f=getting to know your business and seeing wher I can patch the holes and find what makes you do darn unique (I know this won't take long.)\nMany folks, I bet, will find what they need to get off their chest with a quick phone call and they just paypal me the money and make a coffee at home. Look, that's fine too.\nI just to get you ot of your comfort zone, because this is where it all starts my frind.\nSome smart GOAT entrepreneur will probably get everything they need just out of our lil mini consulatant for the more extensive business consukting I offer, and look, that's fine, too.\nMaybe this cup of coffee will be all you need to gtet back on your feet, but not only surive, but thrive!\nI'm not trying to make frineds, or make a bunch of money, or look down your shirt\nBut this is only going to be a 45 minute (max) coffee chat\nAnd, I know you (and me) both have a lot on our plates. So no messing around\nAfter our consultation I will follow up with you in a few days and see how things are going, then I will be emailing you about once or twice every two weeks, just to make sure you're staying on task and implementing what we went over.\nTghere is no obligation to go any further and will gladly give you back your money if this pre-consultation doesn't put you on the right path or you don't get any value out of it...",
+        "generated_text": " $ is a$ is a $ is a $ is a $ is a $ is a $$1, you\n$ $$ $\n$ $- $\n$\n$\n$ $$$\n$\n$\n$\n$\n$\n$\n$\n$???????, $\n-1\n$\n1.5.\n$\n$, you\n$.\n$\n1,1.1\nI\n$.\nI\n\n\nHow\n$,,,0,1,0,0.0\nIn\nThe\nThe\nThe\n",
+        "generated_tokens": [
+            1659,
+            1395,
+            1261,
+            1036,
+            1395,
+            1261,
+            1659,
+            1395,
+            1261,
+            1659,
+            1395,
+            1261,
+            1659,
+            1395,
+            1261,
+            1659,
+            1395,
+            1261,
+            1659,
+            1036,
+            1049,
+            1044,
+            1636,
+            1010,
+            1036,
+            1659,
+            1036,
+            1659,
+            1010,
+            1036,
+            1659,
+            1045,
+            1659,
+            1010,
+            1036,
+            1010,
+            1036,
+            1010,
+            1036,
+            1659,
+            1036,
+            1036,
+            1010,
+            1036,
+            1010,
+            1036,
+            1010,
+            1036,
+            1010,
+            1036,
+            1010,
+            1036,
+            1010,
+            1036,
+            1010,
+            1036,
+            1010,
+            1036,
+            1063,
+            1063,
+            1063,
+            1063,
+            1063,
+            1063,
+            1063,
+            1044,
+            1659,
+            1010,
+            1045,
+            1049,
+            1010,
+            1036,
+            1010,
+            1049,
+            1046,
+            1053,
+            1046,
+            1010,
+            1036,
+            1010,
+            1036,
+            1044,
+            1636,
+            1010,
+            1036,
+            1046,
+            1010,
+            1036,
+            1010,
+            1049,
+            1044,
+            1049,
+            1046,
+            1049,
+            1010,
+            1073,
+            1010,
+            1036,
+            1046,
+            1010,
+            1073,
+            1010,
+            1010,
+            1010,
+            7801,
+            1010,
+            1036,
+            1044,
+            1044,
+            1044,
+            1048,
+            1044,
+            1049,
+            1044,
+            1048,
+            1044,
+            1048,
+            1046,
+            1048,
+            1010,
+            1785,
+            1010,
+            1784,
+            1010,
+            1784,
+            1010,
+            1784,
+            1010
+        ],
+        "latency": 9.77891230583191,
+        "cuda_graph_request_count_map": null,
+        "step_count": 6144,
+        "logprobs": [
+            -7.7319135665893555,
+            -2.188307285308838,
+            -0.7547445297241211,
+            -0.7294313311576843,
+            -10.238386154174805,
+            -3.3775341510772705,
+            -6.394498825073242,
+            -7.354557037353516,
+            -9.018157958984375,
+            -3.012073040008545,
+            -3.2584073543548584,
+            -5.220732688903809,
+            -4.620487213134766,
+            -2.5078930854797363,
+            -3.752683162689209,
+            -0.13360372185707092,
+            -0.05705544352531433,
+            -0.41462242603302,
+            -1.585279941558838,
+            -1.6438164710998535,
+            -1.9557222127914429,
+            -0.3989897072315216,
+            -0.0365302674472332,
+            -6.368816375732422,
+            -0.8731719255447388,
+            -0.022585075348615646,
+            -0.2775891423225403,
+            -0.0027362785767763853,
+            -0.0006812873762100935,
+            -1.581446647644043,
+            -0.008688976056873798,
+            -0.3532317280769348,
+            -6.071163177490234,
+            -9.162371635437012,
+            -9.965556144714355,
+            -2.400461196899414,
+            -2.9898362159729004,
+            -2.9803032875061035,
+            -2.12601900100708,
+            -3.500912666320801,
+            -7.015069007873535,
+            -2.278961420059204,
+            -0.46380555629730225,
+            -4.078739166259766,
+            -1.9430254697799683,
+            -3.5642244815826416,
+            -3.689701795578003,
+            -6.201474189758301,
+            -6.580833911895752,
+            -2.3081111907958984,
+            -5.42717170715332,
+            -1.1886008977890015,
+            -1.172760248184204,
+            -1.3571951389312744,
+            -1.3551844358444214,
+            -3.376784324645996,
+            -0.05118789151310921,
+            -4.064360618591309,
+            -2.575554847717285,
+            -0.6994737386703491,
+            -2.56724214553833,
+            -2.1888976097106934,
+            -0.4816131591796875,
+            -4.070178985595703,
+            -2.0060782432556152,
+            -6.858033180236816,
+            -0.059200502932071686,
+            -3.214278221130371,
+            -0.9671833515167236,
+            -0.823198676109314,
+            -1.0130078792572021,
+            -4.595561981201172,
+            -0.012724989093840122,
+            -5.214311599731445,
+            -8.246870040893555,
+            -3.1476030349731445,
+            -3.299684524536133,
+            -4.218191146850586,
+            -7.318399429321289,
+            -0.8580498695373535,
+            -3.0894036293029785,
+            -1.886361002922058,
+            -7.217658996582031,
+            -3.271679639816284,
+            -3.9717154502868652,
+            -1.8835484981536865,
+            -10.034332275390625,
+            -11.382490158081055,
+            -5.417011260986328,
+            -7.505967140197754,
+            -2.33837890625,
+            -0.07904055714607239,
+            -3.294971227645874,
+            -7.813640594482422,
+            -1.7646901607513428,
+            -4.025320053100586,
+            -3.5977325439453125,
+            -4.390352249145508,
+            -9.147806167602539,
+            -0.5303041934967041,
+            -7.721246242523193,
+            -0.6311959028244019,
+            -0.8119025230407715,
+            -0.7227814197540283,
+            -1.8369406461715698,
+            -0.20933297276496887,
+            -1.5395950078964233,
+            -4.424448490142822,
+            -4.084965705871582,
+            -3.355497360229492,
+            -1.0475609302520752,
+            -6.479413986206055,
+            -0.7810530662536621,
+            -2.132437229156494,
+            -6.648703098297119,
+            -2.9522438049316406,
+            -1.2485712766647339,
+            -4.040503025054932,
+            -2.3415768146514893,
+            -5.358206748962402,
+            -1.6258506774902344,
+            -3.956300973892212,
+            -0.732298731803894,
+            -7.441117286682129,
+            -1.5242161750793457,
+            -2.4555861949920654,
+            -4.295163154602051,
+            -9.687600135803223,
+            -0.8213484883308411,
+            -1.2446978092193604,
+            -0.01942702941596508,
+            -4.619411468505859,
+            -3.3297007083892822,
+            -2.2139487266540527,
+            -3.691431999206543,
+            -2.6574106216430664,
+            -6.075929641723633,
+            -0.6123450994491577,
+            -1.2942559719085693,
+            -0.6262839436531067,
+            -7.398006439208984,
+            -4.4869890213012695,
+            -4.202048301696777,
+            -4.982994079589844,
+            -0.637227475643158,
+            -3.061023235321045,
+            -10.117584228515625,
+            -3.8567495346069336,
+            -4.0480828285217285,
+            -2.472019672393799,
+            -4.246374607086182,
+            -1.3939155340194702,
+            -7.132441520690918,
+            -0.20108745992183685,
+            -4.986658573150635,
+            -4.387957572937012,
+            -0.01108358334749937,
+            -4.209756851196289,
+            -7.271108627319336,
+            -4.047314643859863,
+            -2.6497321128845215,
+            -1.4763175249099731,
+            -0.28365400433540344,
+            -3.5247769355773926,
+            -1.4226995706558228,
+            -4.327237129211426,
+            -2.0407187938690186,
+            -6.1437907218933105,
+            -1.5190880298614502,
+            -2.5511486530303955,
+            -7.504094123840332,
+            -2.152172565460205,
+            -6.708334922790527,
+            -6.913146495819092,
+            -3.6959621906280518,
+            -6.752341270446777,
+            -0.63083815574646,
+            -0.12433214485645294,
+            -5.0525641441345215,
+            -4.435934066772461,
+            -0.45601028203964233,
+            -6.3459577560424805,
+            -9.882917404174805,
+            -3.1422882080078125,
+            -2.550520658493042,
+            -3.2099051475524902,
+            -6.278127193450928,
+            -0.07764133810997009,
+            -3.155696153640747,
+            -1.933587670326233,
+            -9.61027717590332,
+            -6.211391925811768,
+            -4.664543151855469,
+            -6.783782005310059,
+            -5.676271438598633,
+            -8.605900764465332,
+            -0.0824289619922638,
+            -3.5463995933532715,
+            -13.374168395996094,
+            -1.2401021718978882,
+            -1.8734056949615479,
+            -3.4154422283172607,
+            -1.6733763217926025,
+            -17.633970260620117,
+            -9.345113754272461,
+            -0.6277351975440979,
+            -2.9617538452148438,
+            -2.5565333366394043,
+            -10.10580825805664,
+            -7.130337715148926,
+            -7.36820125579834,
+            -4.098911285400391,
+            -5.747079372406006,
+            -2.945054769515991,
+            -0.7887389063835144,
+            -1.6583149433135986,
+            -1.0165244340896606,
+            -6.581666946411133,
+            -5.926386833190918,
+            -5.845194339752197,
+            -0.9657630920410156,
+            -7.868755340576172,
+            -1.3244551420211792,
+            -0.2657390236854553,
+            -0.06403665244579315,
+            -2.983020782470703,
+            -5.943899631500244,
+            -7.877285957336426,
+            -3.593116283416748,
+            -3.819509506225586,
+            -7.226177215576172,
+            -2.5206997394561768,
+            -3.385587215423584,
+            -0.37499159574508667,
+            -1.4698283672332764,
+            -3.1460342407226562,
+            -0.0077166082337498665,
+            -4.350916862487793,
+            -3.2183218002319336,
+            -0.6242184638977051,
+            -1.4782464504241943,
+            -2.8054311275482178,
+            -3.0831401348114014,
+            -12.17662525177002,
+            -2.113419532775879,
+            -1.6448111534118652,
+            -2.1834323406219482,
+            -0.7630388140678406,
+            -10.1896390914917,
+            -6.234405517578125,
+            -11.46288776397705,
+            -1.003785490989685,
+            -4.211658477783203,
+            -1.5010679960250854,
+            -5.859302043914795,
+            -2.0465080738067627,
+            -3.7468819618225098,
+            -4.684195518493652,
+            -4.318704128265381,
+            -2.7234389781951904,
+            -9.00437068939209,
+            -3.043811321258545,
+            -3.1384406089782715,
+            -2.713779926300049,
+            -2.095993995666504,
+            -2.1484954357147217,
+            -10.274479866027832,
+            -0.682350754737854,
+            -0.25973302125930786,
+            -3.6964316368103027,
+            -13.434456825256348,
+            -2.3368239402770996,
+            -5.382724761962891,
+            -1.9073458909988403,
+            -5.905669212341309,
+            -0.032165709882974625,
+            -1.6530004739761353,
+            -2.728893280029297,
+            -1.640552043914795,
+            -1.1391171216964722,
+            -1.4353511333465576,
+            -4.003787994384766,
+            -0.3450564742088318,
+            -0.7168521285057068,
+            -0.34650325775146484,
+            -0.3616408705711365,
+            -7.062709331512451,
+            -1.2851682901382446,
+            -2.299129009246826,
+            -8.800156593322754,
+            -5.208735466003418,
+            -4.780910491943359,
+            -2.78342342376709,
+            -4.469717979431152,
+            -6.909726619720459,
+            -2.5114197731018066,
+            -0.659822404384613,
+            -0.6915416121482849,
+            -3.2363741397857666,
+            -0.5283617377281189,
+            -0.10473938286304474,
+            -6.215325832366943,
+            -7.283237934112549,
+            -1.6797031164169312,
+            -11.50100040435791,
+            -7.5822978019714355,
+            -3.387317657470703,
+            -11.407575607299805,
+            -5.441976547241211,
+            -3.3264851570129395,
+            -0.7265786528587341,
+            -1.382750153541565,
+            -7.841699600219727,
+            -8.105277061462402,
+            -3.9569506645202637,
+            -4.963083267211914,
+            -0.5492897629737854,
+            -4.6081390380859375,
+            -5.870400905609131,
+            -3.957930088043213,
+            -5.275494575500488,
+            -4.105091094970703,
+            -2.15435528755188,
+            -2.8472700119018555,
+            -1.1278448104858398,
+            -8.226571083068848,
+            -0.40629008412361145,
+            -9.916461944580078,
+            -4.616743087768555,
+            -1.691868543624878,
+            -0.6639478802680969,
+            -2.5716753005981445,
+            -6.676954746246338,
+            -6.535329818725586,
+            -0.4170510768890381,
+            -1.443942904472351,
+            -3.145481824874878,
+            -1.440589427947998,
+            -0.26935356855392456,
+            -0.9647155404090881,
+            -4.335958957672119,
+            -1.5647850036621094,
+            -5.890466690063477,
+            -3.01654052734375,
+            -1.9168468713760376,
+            -3.7365682125091553,
+            -8.001864433288574,
+            -10.680083274841309,
+            -4.489352226257324,
+            -4.6058149337768555,
+            -7.69011116027832,
+            -3.6247005462646484,
+            -1.5600426197052002,
+            -10.2160062789917,
+            -5.004643440246582,
+            -0.19602319598197937,
+            -3.375545024871826,
+            -2.669325590133667,
+            -1.3932737112045288,
+            -1.6410658359527588,
+            -6.847603797912598,
+            -6.744344711303711,
+            -0.5215591192245483,
+            -0.25840020179748535,
+            -1.1448237895965576,
+            -5.57253885269165,
+            -7.251138687133789,
+            -4.221924781799316,
+            -0.7688062787055969,
+            -2.504502534866333,
+            -3.146519660949707,
+            -2.206653356552124,
+            -1.4295082092285156,
+            -7.96943998336792,
+            -4.332189083099365,
+            -2.5750505924224854,
+            -1.7102608680725098,
+            -5.311381816864014,
+            -8.897522926330566,
+            -2.994919538497925,
+            -3.3397974967956543,
+            -2.1794328689575195,
+            -2.437566041946411,
+            -0.3181810975074768,
+            -0.27412793040275574,
+            -0.7914466857910156,
+            -2.3470635414123535,
+            -2.4099245071411133,
+            -2.491870880126953,
+            -3.024170160293579,
+            -1.9719040393829346,
+            -11.373910903930664,
+            -1.4279751777648926,
+            -0.14573107659816742,
+            -2.055763006210327,
+            -6.366893291473389,
+            -4.24091911315918,
+            -0.00709194503724575,
+            -2.0199716091156006,
+            -2.524750232696533,
+            -1.4272525310516357,
+            -0.5185190439224243,
+            -2.927150011062622,
+            -2.7070627212524414,
+            -3.365638017654419,
+            -4.318085193634033,
+            -7.773144721984863,
+            -1.7947180271148682,
+            -7.657534599304199,
+            -8.767786026000977,
+            -14.74280071258545,
+            -1.8042558431625366,
+            -3.2712037563323975,
+            -1.4002125263214111,
+            -4.887944221496582,
+            -1.4821010828018188,
+            -1.5255622863769531,
+            -5.879070281982422,
+            -4.463839530944824,
+            -5.1955976486206055,
+            -5.665647506713867,
+            -0.3775045573711395,
+            -5.9350481033325195,
+            -2.800539255142212,
+            -0.13162286579608917,
+            -3.034379720687866,
+            -4.729524612426758,
+            -4.6252641677856445,
+            -3.850942611694336,
+            -2.4760568141937256,
+            -6.059760093688965,
+            -10.12075138092041,
+            -0.9469369649887085,
+            -11.595907211303711,
+            -6.875324726104736,
+            -4.268826007843018,
+            -2.835529088973999,
+            -3.8626279830932617,
+            -4.876199245452881,
+            -0.013071090914309025,
+            -4.964417934417725,
+            -0.7445687055587769,
+            -5.707155227661133,
+            -6.10660457611084,
+            -4.317755699157715,
+            -4.440443992614746,
+            -2.9202542304992676,
+            -4.743522644042969,
+            -1.2569392919540405,
+            -2.8675737380981445,
+            -2.3151841163635254,
+            -4.318130970001221,
+            -1.9054772853851318,
+            -1.1808521747589111,
+            -0.765956461429596,
+            -2.768916606903076,
+            -6.237791061401367,
+            -1.7224305868148804,
+            -7.137521743774414,
+            -4.512486457824707,
+            -1.9069950580596924,
+            -4.145983695983887,
+            -5.365190505981445,
+            -0.059828490018844604,
+            -2.273892879486084,
+            -3.4013004302978516,
+            -5.035730361938477,
+            -6.501443386077881,
+            -9.903446197509766,
+            -1.6332892179489136,
+            -2.1572084426879883,
+            -1.6149548292160034,
+            -1.4698481559753418,
+            -6.01010799407959,
+            -2.2243528366088867,
+            -6.900836944580078,
+            -6.0930986404418945,
+            -2.974020481109619,
+            -3.225423574447632,
+            -8.423272132873535,
+            -1.3423724174499512,
+            -3.626147508621216,
+            -0.4862469434738159,
+            -6.860866546630859,
+            -3.8910953998565674,
+            -2.33319354057312,
+            -1.7229185104370117,
+            -2.215972423553467,
+            -8.99046516418457,
+            -4.099084854125977,
+            -2.4191012382507324,
+            -8.288970947265625,
+            -2.9641928672790527,
+            -1.5036451816558838,
+            -3.0544614791870117,
+            -0.0715634673833847,
+            -2.444031238555908,
+            -4.520998954772949,
+            -3.972568988800049,
+            -0.4985870122909546,
+            -2.1651363372802734,
+            -3.4427435398101807,
+            -1.730639100074768,
+            -0.9458961486816406,
+            -7.740211009979248,
+            -9.39163875579834,
+            -3.895984172821045,
+            -1.7523534297943115,
+            -5.41331672668457,
+            -8.910720825195312,
+            -12.971094131469727,
+            -3.0455880165100098,
+            -10.501265525817871,
+            -3.3864927291870117,
+            -4.842309951782227,
+            -3.9964733123779297,
+            -7.3046793937683105,
+            -2.6607093811035156,
+            -1.3541781902313232,
+            -5.003270626068115,
+            -3.944551944732666,
+            -0.11356143653392792,
+            -5.174440383911133,
+            -9.628616333007812,
+            -8.654989242553711,
+            -8.980416297912598,
+            -6.670101642608643,
+            -5.488286018371582,
+            -5.943419933319092,
+            -2.126483201980591,
+            -8.054739952087402,
+            -7.458671569824219,
+            -2.5267202854156494,
+            -6.455472946166992,
+            -8.655346870422363,
+            -7.903901100158691,
+            -6.221062660217285,
+            -7.129237174987793,
+            -4.2345380783081055,
+            -2.5375306606292725,
+            -7.697700500488281,
+            -1.567080020904541,
+            -2.084331750869751,
+            -0.25020831823349,
+            -1.5145041942596436,
+            -4.619244575500488,
+            -0.2970108985900879,
+            -0.4977554678916931,
+            -6.197869300842285,
+            -4.030620098114014,
+            -7.232107639312744,
+            -0.21076253056526184,
+            -1.563366174697876,
+            -1.133756160736084,
+            -2.708237648010254,
+            -4.080535888671875,
+            -0.6818401217460632,
+            -0.1864331066608429,
+            -0.49012088775634766,
+            -8.732468605041504,
+            -11.945040702819824,
+            -5.243098735809326,
+            -1.5294703245162964,
+            -0.8935543298721313,
+            -0.6174070835113525,
+            -1.5068217515945435,
+            -3.5766501426696777,
+            -5.393096923828125,
+            -4.202867031097412,
+            -14.765748023986816,
+            -5.2513813972473145,
+            -0.7597705721855164,
+            -0.2502063810825348,
+            -1.7403976917266846,
+            -2.8000779151916504,
+            -1.9808133840560913,
+            -2.1654744148254395,
+            -1.8629226684570312,
+            -3.222038745880127,
+            -0.040942225605249405,
+            -2.3384013175964355,
+            -10.210381507873535,
+            -4.5859761238098145,
+            -0.5805734395980835,
+            -3.7019288539886475,
+            -2.001936674118042,
+            -2.7876083850860596,
+            -2.9799084663391113,
+            -4.349887371063232,
+            -0.0792960673570633,
+            -1.4366114139556885,
+            -1.0813264846801758,
+            -1.3510822057724,
+            -6.7060699462890625,
+            -5.436615943908691,
+            -3.978389263153076,
+            -6.785447597503662,
+            -6.147171497344971,
+            -3.97414231300354,
+            -4.332991600036621,
+            -0.9269428253173828,
+            -5.1237101554870605,
+            -4.486598968505859,
+            -0.04678357392549515,
+            -1.0307552814483643,
+            -1.4249452352523804,
+            -4.517682075500488,
+            -3.561821699142456,
+            -2.0815205574035645,
+            -0.6041194200515747,
+            -5.992964744567871,
+            -7.092092514038086,
+            -0.48916709423065186,
+            -2.6405677795410156,
+            -4.3345723152160645,
+            -3.533582925796509,
+            -3.1233346462249756,
+            -3.107872486114502,
+            -1.9901115894317627,
+            -3.1052846908569336,
+            -1.8440347909927368,
+            -6.21368408203125,
+            -1.8796799182891846,
+            -2.705214738845825,
+            -0.2987763583660126,
+            -4.070865154266357,
+            -1.6675832271575928,
+            -1.3896636962890625,
+            -1.5731089115142822,
+            -3.526170015335083,
+            -2.5088443756103516,
+            -1.208929419517517,
+            -3.673125743865967,
+            -2.501532554626465,
+            -6.875064373016357,
+            -8.512459754943848,
+            -1.042314052581787,
+            -3.657850980758667,
+            -7.0950798988342285,
+            -4.974049091339111,
+            -8.14085578918457,
+            -3.529888153076172,
+            -1.9389504194259644,
+            -7.0902204513549805,
+            -2.409292459487915,
+            -2.9428021907806396,
+            -1.688283085823059,
+            -3.622368335723877,
+            -2.0903351306915283,
+            -4.160663604736328,
+            -3.1683764457702637,
+            -1.2135626077651978,
+            -7.566033363342285,
+            -3.1186251640319824,
+            -5.899919509887695,
+            -0.9518840312957764,
+            -2.656729221343994,
+            -2.2994377613067627,
+            -6.806836128234863,
+            -1.280236840248108,
+            -2.838846206665039,
+            -1.3598848581314087,
+            -11.707776069641113,
+            -3.134333372116089,
+            -0.6230669617652893,
+            -8.219222068786621,
+            -7.562507152557373,
+            -7.489459037780762,
+            -1.5368008613586426,
+            -7.149652481079102,
+            -5.749268054962158,
+            -3.162869691848755,
+            -2.7235195636749268,
+            -6.128931999206543,
+            -1.1934199333190918,
+            -3.986410617828369,
+            -3.76609468460083,
+            -1.712721586227417,
+            -3.195504903793335,
+            -8.397743225097656,
+            -3.1260581016540527,
+            -9.792022705078125,
+            -4.217884540557861,
+            -11.583260536193848,
+            -5.987588882446289,
+            -5.178754806518555,
+            -6.994749069213867,
+            -5.167606353759766,
+            -7.124668121337891,
+            -6.201416015625,
+            -10.203682899475098,
+            -6.858526229858398,
+            -2.733592987060547,
+            -5.078882217407227,
+            -9.003358840942383,
+            -4.704894542694092,
+            -3.9085562229156494,
+            -7.247268199920654,
+            -7.091092109680176,
+            -4.4150166511535645,
+            -7.56699275970459,
+            -9.485116004943848,
+            -1.9977033138275146,
+            -6.65272331237793,
+            -2.236643075942993,
+            -7.518955707550049,
+            -5.525973320007324,
+            -4.67877721786499,
+            -6.608670234680176,
+            -5.536133766174316,
+            -10.772479057312012,
+            -10.8853178024292,
+            -3.6156129837036133,
+            -6.751470565795898,
+            -6.4537434577941895,
+            -3.4220399856567383,
+            -8.251005172729492,
+            -3.2146153450012207,
+            -6.330069541931152,
+            -1.5551663637161255,
+            -6.520583629608154,
+            -10.450878143310547,
+            -5.8788957595825195,
+            -3.7398200035095215,
+            -3.9084208011627197,
+            -0.3640081584453583,
+            -6.961522102355957,
+            -6.066243648529053,
+            -7.270624160766602,
+            -5.098455429077148,
+            -2.7642822265625,
+            -5.460171699523926,
+            -7.362828731536865,
+            -2.558631658554077,
+            -2.186410427093506,
+            -2.5309929847717285,
+            -2.46756649017334,
+            -2.0306026935577393,
+            -1.8713470697402954,
+            -2.108008623123169,
+            -1.2698389291763306,
+            -2.1712756156921387,
+            -2.4432802200317383,
+            -1.1477653980255127,
+            -1.8417484760284424,
+            -2.5971946716308594,
+            -1.8250831365585327,
+            -2.103092670440674,
+            -2.5183165073394775,
+            -2.9367291927337646,
+            -1.9412965774536133,
+            -1.7692793607711792,
+            -2.864521026611328,
+            -3.1332175731658936,
+            -1.098311185836792,
+            -2.946441173553467,
+            -2.2800471782684326,
+            -3.1929852962493896,
+            -2.754260778427124,
+            -3.485616445541382,
+            -3.3010287284851074,
+            -2.5537776947021484,
+            -2.6752865314483643,
+            -3.1617612838745117,
+            -2.4571690559387207,
+            -2.060081958770752,
+            -2.425969362258911,
+            -2.212725877761841,
+            -2.4232254028320312,
+            -3.0587053298950195,
+            -2.4074010848999023,
+            -2.457937479019165,
+            -2.319617986679077,
+            -2.6340954303741455,
+            -2.599524736404419,
+            -2.5302212238311768,
+            -1.6849274635314941,
+            -2.2609786987304688,
+            -2.039928674697876,
+            -1.9474098682403564,
+            -2.3550753593444824,
+            -1.718749761581421,
+            -2.413884162902832,
+            -1.6247628927230835,
+            -2.4784040451049805,
+            -1.828325629234314,
+            -1.3880831003189087,
+            -1.4448199272155762,
+            -1.1477117538452148,
+            -1.1669728755950928,
+            -1.8787822723388672,
+            -1.5565840005874634,
+            -1.6666553020477295,
+            -1.747725248336792,
+            -1.959598422050476,
+            -2.0376486778259277,
+            -2.345367431640625,
+            -2.055098533630371,
+            -1.3940613269805908,
+            -3.4385242462158203,
+            -2.7489635944366455,
+            -3.2590157985687256,
+            -3.1128957271575928,
+            -1.7070379257202148,
+            -3.9010369777679443,
+            -3.21574068069458,
+            -3.3850393295288086,
+            -1.8778185844421387,
+            -2.698211908340454,
+            -1.8060741424560547,
+            -2.0845324993133545,
+            -3.4797585010528564,
+            -2.263254404067993,
+            -3.083108901977539,
+            -1.6589758396148682,
+            -2.687279224395752,
+            -1.77505624294281,
+            -2.6142921447753906,
+            -1.934045672416687,
+            -1.8834377527236938,
+            -2.8038980960845947,
+            -1.550542950630188,
+            -3.4054152965545654,
+            -1.724036693572998,
+            -2.3146564960479736,
+            -1.5134503841400146,
+            -2.9289023876190186,
+            -1.5285141468048096,
+            -3.421035051345825,
+            -1.3757282495498657,
+            -3.441431760787964,
+            -1.5286564826965332,
+            -3.4372904300689697,
+            -3.173043966293335,
+            -1.1313854455947876,
+            -1.486415147781372,
+            -2.506413221359253,
+            -3.5165903568267822,
+            -1.4112176895141602,
+            -3.7175354957580566,
+            -2.2170844078063965,
+            -0.704839289188385,
+            -0.6626103520393372,
+            -2.5483946800231934,
+            -0.949668288230896,
+            -3.1339564323425293,
+            -1.3326977491378784,
+            -3.5493476390838623,
+            -0.6785370111465454,
+            -3.236161470413208,
+            -2.1347084045410156,
+            -3.802447557449341,
+            -2.585503339767456,
+            -3.9361765384674072,
+            -2.290905714035034,
+            -3.714280605316162,
+            -1.793616533279419,
+            -3.7252492904663086,
+            -1.450188159942627,
+            -3.11938738822937,
+            -1.25174880027771
+        ]
+    },
+    "32": {
+        "input_prompt": "create a conversational article",
+        "generated_text": " about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article.",
+        "generated_tokens": [
+            2314,
+            1278,
+            17915,
+            1307,
+            1278,
+            9369,
+            1046,
+            1531,
+            9369,
+            2715,
+            1402,
+            2314,
+            1278,
+            17915,
+            1307,
+            1278,
+            9369,
+            1046,
+            1531,
+            9369,
+            2715,
+            1402,
+            2314,
+            1278,
+            17915,
+            1307,
+            1278,
+            9369,
+            1046,
+            1531,
+            9369,
+            2715,
+            1402,
+            2314,
+            1278,
+            17915,
+            1307,
+            1278,
+            9369,
+            1046,
+            1531,
+            9369,
+            2715,
+            1402,
+            2314,
+            1278,
+            17915,
+            1307,
+            1278,
+            9369,
+            1046,
+            1531,
+            9369,
+            2715,
+            1402,
+            2314,
+            1278,
+            17915,
+            1307,
+            1278,
+            9369,
+            1046,
+            1531,
+            9369,
+            2715,
+            1402,
+            2314,
+            1278,
+            17915,
+            1307,
+            1278,
+            9369,
+            1046,
+            1531,
+            9369,
+            2715,
+            1402,
+            2314,
+            1278,
+            17915,
+            1307,
+            1278,
+            9369,
+            1046,
+            1531,
+            9369,
+            2715,
+            1402,
+            2314,
+            1278,
+            17915,
+            1307,
+            1278,
+            9369,
+            1046,
+            1531,
+            9369,
+            2715,
+            1402,
+            2314,
+            1278,
+            17915,
+            1307,
+            1278,
+            9369,
+            1046,
+            1531,
+            9369,
+            2715,
+            1402,
+            2314,
+            1278,
+            17915,
+            1307,
+            1278,
+            9369,
+            1046,
+            1531,
+            9369,
+            2715,
+            1402,
+            2314,
+            1278,
+            17915,
+            1307,
+            1278,
+            9369,
+            1046
+        ],
+        "latency": 48.63822364807129,
+        "cuda_graph_request_count_map": null,
+        "step_count": 6144,
+        "logprobs": [
+            -4.4165568351745605,
+            -11.358176231384277,
+            -0.0701780766248703,
+            -7.797665119171143,
+            -2.6805992126464844,
+            -1.4707680940628052,
+            -3.0390255451202393,
+            -1.6902849674224854,
+            -1.270594835281372,
+            -1.1936196088790894,
+            -1.2523558139801025,
+            -2.7270259857177734,
+            -1.2371309995651245,
+            -0.9618493318557739,
+            -0.4379909038543701,
+            -1.3917063474655151,
+            -1.1055524349212646,
+            -0.9122569561004639,
+            -0.9911308288574219,
+            -0.08436793833971024,
+            -0.5424078106880188,
+            -0.9181017279624939,
+            -0.5873759388923645,
+            -0.19014373421669006,
+            -0.06655456870794296,
+            -0.15252672135829926,
+            -0.09415211528539658,
+            -0.009787309914827347,
+            -0.013910251669585705,
+            -0.005296128336340189,
+            -0.005677408073097467,
+            -0.02013739012181759,
+            -0.21594694256782532,
+            -0.07153760641813278,
+            -0.0066444179974496365,
+            -0.010198505595326424,
+            -0.011980246752500534,
+            -0.003686776151880622,
+            -0.0037619550712406635,
+            -0.0022467151284217834,
+            -0.004088377580046654,
+            -0.021828632801771164,
+            -0.0012669878778979182,
+            -0.09768074005842209,
+            -0.02652405947446823,
+            -0.0019286142196506262,
+            -0.002283824374899268,
+            -0.0032225127797573805,
+            -0.0009741804678924382,
+            -0.0009415484382770956,
+            -0.001211624126881361,
+            -0.001135300612077117,
+            -0.002340436913073063,
+            -0.0010846928926184773,
+            -0.0509282611310482,
+            -0.03832047060132027,
+            -0.00257422705180943,
+            -0.0022806129418313503,
+            -0.00262785074301064,
+            -0.0008195855189114809,
+            -0.0010239601833745837,
+            -0.0013777059502899647,
+            -0.0009899006690829992,
+            -0.0018756669014692307,
+            -0.0015304292319342494,
+            -0.08506463468074799,
+            -0.01893703266978264,
+            -0.0013797297142446041,
+            -0.0014461545506492257,
+            -0.0013971101725474,
+            -0.0005869334563612938,
+            -0.0005212855176068842,
+            -0.000876757490914315,
+            -0.0005256939912214875,
+            -0.0012863941956311464,
+            -0.0015691122971475124,
+            -0.051276568323373795,
+            -0.00973513163626194,
+            -0.0010469438275322318,
+            -0.0011531615164130926,
+            -0.0009969270322471857,
+            -0.00038342276820912957,
+            -0.0004032037395518273,
+            -0.000730247818864882,
+            -0.0003275334893260151,
+            -0.0008700875914655626,
+            -0.0017572689102962613,
+            -0.044393111020326614,
+            -0.013102858327329159,
+            -0.0011463745031505823,
+            -0.0012070996453985572,
+            -0.0012325793504714966,
+            -0.0005048430757597089,
+            -0.0004876854654867202,
+            -0.0007901645149104297,
+            -0.00041500062798149884,
+            -0.0009869233472272754,
+            -0.0018687656847760081,
+            -0.03943866863846779,
+            -0.014425630681216717,
+            -0.0014756753807887435,
+            -0.001423775334842503,
+            -0.001209719106554985,
+            -0.0005046047735959291,
+            -0.00042167355422861874,
+            -0.0007688426994718611,
+            -0.0002699726028367877,
+            -0.0006598440813831985,
+            -0.0017849955474957824,
+            -0.038999658077955246,
+            -0.012665312737226486,
+            -0.0014427024871110916,
+            -0.0014492495683953166,
+            -0.001016576774418354,
+            -0.00042083943844772875,
+            -0.00033241944038309157,
+            -0.0006403064471669495,
+            -0.00022373080719262362,
+            -0.0007053509471006691,
+            -0.0018597226589918137,
+            -0.030997740104794502,
+            -0.011259939521551132,
+            -0.0012655591126531363,
+            -0.00134151556994766,
+            -0.0008106521563604474,
+            -0.0003513672563713044,
+            -0.0002964295563288033,
+            -0.0006368515896610916,
+            -0.00020180096908006817,
+            -0.0005779979983344674,
+            -0.0016014858847483993,
+            -0.0271126888692379
+        ]
+    },
+    "64": {
+        "input_prompt": "App Concept: \"Eggy's Interactive Adventure World\"\n\nEggy's Interactive Adventure World is a cutting-edge app designed for phones and tablets that combines immersive storytelling with engaging, interactive experiences for both parents and children. This innovative app allows users to explore the world of Eggy and participate in a series of adventures that teach essential life skills, foster personal growth, and strengthen parent-child bonds.\n\nKey Features:\n\n1. Interactive Stories: The app brings the 20 Eggy stories to life through interactive, animated storytelling. Users can read, listen, and actively participate in the stories, making choices that guide Eggy through various adventures, each with unique lessons and experiences.\n2. Augmented Reality (AR) Integration: The app utilizes AR technology, allowing users to interact with Eggy and the story characters in their real-world environment. This immersive experience creates a deeper connection with the stories and encourages engagement.\n3. Personalized Adventures: The app customizes each story based on the user's interests, needs, and personal growth areas. By collecting user input, the app tailors the storylines and learning outcomes to ensure that each adventure resonates with the child's unique experiences.\n4. Parent-Child Cooperative Play: The app encourages parents and children to collaborate and engage in the stories together. Through a split-screen mode or multi-device connectivity, parents and children can actively participate in the stories and learn essential life skills together.\n5. Skill-Building Mini-Games: Interspersed throughout the stories are educational mini-games that reinforce the lessons and themes. These games are designed to be fun, engaging, and accessible to children, encouraging them to practice and develop various skills.\n6. Progress Tracking and Rewards: The app tracks the user's progress through the stories and rewards them with badges, trophies, and other collectibles. This gamification aspect encourages continued engagement and fosters a sense of accomplishment and growth.\n7. Community Features: The app provides a safe, moderated online community where parents can share their experiences, discuss the stories and lessons, and connect with other families who are also using the app. This community fosters support, inspiration, and learning.\n8. Expert Resources: The app includes access to resources from experts in child development, psychology, and education, providing parents with additional tools and guidance on fostering the growth and well-being of their children.\n\nEggy's Interactive Adventure World is a one-of-a-kind, mind-blowing app that revolutionizes the way parents and children engage with stories and learn essential life skills. Combining the power of storytelling, cutting-edge technology, and expert resources, this app is a must-have for families looking to strengthen their bonds and support their children's personal growth.",
+        "generated_text": "</s>The 1999\u20132000 season was the 10th season of the National Hockey League (NHL). The 1999\u20132000 season was the 10th season of the National Hockey League (NHL). The 1999\u20132000 season was the 10th season of the National Hockey League (NHL). The 1999\u20132000 season was the 10th season was the 10th season was the 10 season was the 10 season was the 10",
+        "generated_tokens": [
+            2,
+            1784,
+            1032,
+            1049,
+            1057,
+            1057,
+            1057,
+            1882,
+            1050,
+            1048,
+            1048,
+            1048,
+            5526,
+            1486,
+            1278,
+            1032,
+            1049,
+            1048,
+            1411,
+            5526,
+            1307,
+            1278,
+            5805,
+            46982,
+            8525,
+            1319,
+            19644,
+            1076,
+            1577,
+            1531,
+            1032,
+            1049,
+            1057,
+            1057,
+            1057,
+            1882,
+            1050,
+            1048,
+            1048,
+            1048,
+            5526,
+            1486,
+            1278,
+            1032,
+            1049,
+            1048,
+            1411,
+            5526,
+            1307,
+            1278,
+            5805,
+            46982,
+            8525,
+            1319,
+            19644,
+            1076,
+            1577,
+            1531,
+            1032,
+            1049,
+            1057,
+            1057,
+            1057,
+            1882,
+            1050,
+            1048,
+            1048,
+            1048,
+            5526,
+            1486,
+            1278,
+            1032,
+            1049,
+            1048,
+            1411,
+            5526,
+            1307,
+            1278,
+            5805,
+            46982,
+            8525,
+            1319,
+            19644,
+            1076,
+            1577,
+            1531,
+            1032,
+            1049,
+            1057,
+            1057,
+            1057,
+            1882,
+            1050,
+            1048,
+            1048,
+            1048,
+            5526,
+            1486,
+            1278,
+            1032,
+            1049,
+            1048,
+            1411,
+            5526,
+            1486,
+            1278,
+            1032,
+            1049,
+            1048,
+            1411,
+            5526,
+            1486,
+            1278,
+            1032,
+            1049,
+            1048,
+            5526,
+            1486,
+            1278,
+            1032,
+            1049,
+            1048,
+            5526,
+            1486,
+            1278,
+            1032,
+            1049,
+            1048
+        ],
+        "latency": 87.50420427322388,
+        "cuda_graph_request_count_map": null,
+        "step_count": 6144,
+        "logprobs": [
+            -12.107745170593262,
+            -2.9727728366851807,
+            -3.720092535018921,
+            -5.592433929443359,
+            -10.964235305786133,
+            -3.654498338699341,
+            -9.33439826965332,
+            -4.833785057067871,
+            -5.187321662902832,
+            -2.6944785118103027,
+            -6.9262237548828125,
+            -0.654232919216156,
+            -0.5550781488418579,
+            -0.21346639096736908,
+            -0.0134271876886487,
+            -0.010840100236237049,
+            -1.3878544569015503,
+            -0.6296291351318359,
+            -7.9766011238098145,
+            -0.4393192231655121,
+            -5.639142036437988,
+            -3.277270793914795,
+            -1.0206468105316162,
+            -11.703084945678711,
+            -0.7100943922996521,
+            -0.2809169888496399,
+            -2.771284818649292,
+            -7.190817832946777,
+            -4.048691749572754,
+            -0.012056218460202217,
+            -3.3802318572998047,
+            -0.6807184815406799,
+            -3.4844107627868652,
+            -3.312331199645996,
+            -0.5001641511917114,
+            -2.61255145072937,
+            -4.243694305419922,
+            -4.333778381347656,
+            -6.0625810623168945,
+            -0.011777156963944435,
+            -0.37577226758003235,
+            -0.9490834474563599,
+            -3.5450198650360107,
+            -2.1778035163879395,
+            -0.45957911014556885,
+            -3.00771164894104,
+            -1.7600425481796265,
+            -0.09766030311584473,
+            -2.467618942260742,
+            -1.329679012298584,
+            -0.8384320735931396,
+            -1.1864604949951172,
+            -3.628342866897583,
+            -0.2470003068447113,
+            -1.8938640356063843,
+            -5.168431282043457,
+            -0.05005566030740738,
+            -2.258014678955078,
+            -2.449028968811035,
+            -0.0034086955711245537,
+            -3.9485883712768555,
+            -1.6201664209365845,
+            -5.139942646026611,
+            -4.859354496002197,
+            -0.23686674237251282,
+            -0.5541543364524841,
+            -2.5826025009155273,
+            -6.114635467529297,
+            -4.3380208015441895,
+            -0.7412900924682617,
+            -0.3221715986728668,
+            -0.13805493712425232,
+            -4.1797332763671875,
+            -7.3456268310546875,
+            -0.13762745261192322,
+            -2.0905232429504395,
+            -1.0178627967834473,
+            -4.108260631561279,
+            -0.6007124185562134,
+            -1.0410642623901367,
+            -4.122039794921875,
+            -0.35905471444129944,
+            -1.4274661540985107,
+            -4.139932155609131,
+            -0.4237431585788727,
+            -1.6294409036636353,
+            -0.9811424016952515,
+            -4.132790565490723,
+            -1.1318120956420898,
+            -6.8258256912231445,
+            -1.5455098152160645,
+            -0.6984409093856812,
+            -13.664215087890625,
+            -0.1166313961148262,
+            -1.6347849369049072,
+            -0.28875046968460083,
+            -0.03130083531141281,
+            -1.5293006896972656,
+            -1.6488375663757324,
+            -4.224111557006836,
+            -4.760683059692383,
+            -1.9758747816085815,
+            -1.5828256607055664,
+            -2.8463857173919678,
+            -0.2620386481285095,
+            -1.7243889570236206,
+            -1.7945923805236816,
+            -0.8884308338165283,
+            -0.3766394555568695,
+            -0.34033581614494324,
+            -9.05566692352295,
+            -0.22754782438278198,
+            -0.033802058547735214,
+            -0.34108465909957886,
+            -0.5644669532775879,
+            -2.0925779342651367,
+            -4.547505855560303,
+            -10.870464324951172,
+            -1.1072022914886475,
+            -5.503787994384766,
+            -3.259672164916992,
+            -0.007964519783854485,
+            -3.0111639499664307,
+            -4.246737480163574,
+            -0.7813188433647156,
+            -3.331031322479248,
+            -4.485962867736816,
+            -0.9492117166519165,
+            -2.6757047176361084,
+            -1.1591349840164185,
+            -1.122117519378662,
+            -2.629878044128418,
+            -5.986321926116943,
+            -0.2146703153848648,
+            -0.002392764901742339,
+            -7.372479438781738,
+            -0.007077385671436787,
+            -0.06599216908216476,
+            -0.0970711037516594,
+            -3.2874932289123535,
+            -0.0019583588000386953,
+            -0.9122000336647034,
+            -4.930907249450684,
+            -0.019508399069309235,
+            -0.308611661195755,
+            -0.07778516411781311,
+            -3.8497893810272217,
+            -0.46124517917633057,
+            -0.38821348547935486,
+            -2.668412208557129,
+            -1.845987319946289,
+            -0.06470083445310593,
+            -0.006619549356400967,
+            -1.2610487937927246,
+            -0.13015533983707428,
+            -3.365312099456787,
+            -0.0014690094394609332,
+            -1.6789823770523071,
+            -1.2499005794525146,
+            -3.3992111682891846,
+            -5.563300132751465,
+            -0.823418140411377,
+            -4.24124813079834,
+            -1.6597849130630493,
+            -0.6941139698028564,
+            -1.5637556314468384,
+            -0.5482053756713867,
+            -0.9507225751876831,
+            -3.764758586883545,
+            -0.0006518622976727784,
+            -0.7540555000305176,
+            -5.058262825012207,
+            -0.3302401602268219,
+            -2.8130555152893066,
+            -0.17079885303974152,
+            -2.871047019958496,
+            -0.3991694450378418,
+            -3.1476998329162598,
+            -0.3488404452800751,
+            -2.0545666217803955,
+            -4.201597690582275,
+            -5.164614677429199,
+            -0.0271432027220726,
+            -0.0009785869624465704,
+            -3.3444161415100098,
+            -1.3117046356201172,
+            -6.375423431396484,
+            -0.05535568296909332,
+            -0.3919340968132019,
+            -0.060594215989112854,
+            -6.507473468780518,
+            -0.0023910999298095703,
+            -2.143423318862915,
+            -3.335618257522583,
+            -2.953970432281494,
+            -0.0013383012264966965,
+            -0.8080525398254395,
+            -0.29526084661483765,
+            -0.04036511853337288,
+            -3.231475353240967,
+            -1.0585589408874512,
+            -6.136373043060303,
+            -0.006182829383760691,
+            -0.035548023879528046,
+            -5.509808540344238,
+            -1.8490750789642334,
+            -9.83314037322998,
+            -0.07037576287984848,
+            -3.1621387004852295,
+            -6.762360095977783,
+            -1.3490527868270874,
+            -3.601043462753296,
+            -1.176393985748291,
+            -0.4342959523200989,
+            -0.06266004592180252,
+            -5.464046001434326,
+            -0.017946599051356316,
+            -1.0416009426116943,
+            -1.6117159128189087,
+            -12.289417266845703,
+            -1.5004339218139648,
+            -5.76563835144043,
+            -4.038386821746826,
+            -0.20812086760997772,
+            -3.6306562423706055,
+            -1.3901070356369019,
+            -1.087137222290039,
+            -2.423213243484497,
+            -4.503086090087891,
+            -0.0008031480247154832,
+            -0.03627370297908783,
+            -0.1653430461883545,
+            -7.958648681640625,
+            -1.1018548011779785,
+            -1.290948748588562,
+            -3.8049263954162598,
+            -1.8253734111785889,
+            -0.059022851288318634,
+            -0.0013984196120873094,
+            -4.698851585388184,
+            -2.5421664714813232,
+            -0.024493809789419174,
+            -4.828659534454346,
+            -3.0295286178588867,
+            -3.550312042236328,
+            -0.1185273677110672,
+            -0.22595760226249695,
+            -0.10782183706760406,
+            -1.4033282995224,
+            -0.4485701024532318,
+            -0.2889708876609802,
+            -0.05471855774521828,
+            -0.007632025051862001,
+            -2.1156554222106934,
+            -0.6249589323997498,
+            -4.198577404022217,
+            -0.14178156852722168,
+            -4.284021377563477,
+            -2.227515935897827,
+            -3.5022120475769043,
+            -0.19575819373130798,
+            -15.964509963989258,
+            -4.055960655212402,
+            -11.125024795532227,
+            -0.7681724429130554,
+            -3.0436902046203613,
+            -7.030262470245361,
+            -4.376729488372803,
+            -5.476145267486572,
+            -0.4219042658805847,
+            -3.7689766883850098,
+            -0.060010604560375214,
+            -0.8134393692016602,
+            -0.11386934667825699,
+            -0.025473715737462044,
+            -0.09736856073141098,
+            -4.357361793518066,
+            -0.3670865297317505,
+            -0.08063744008541107,
+            -0.1311480849981308,
+            -1.0903867483139038,
+            -1.2705107927322388,
+            -1.5076212882995605,
+            -4.295275688171387,
+            -0.04185756668448448,
+            -0.19810955226421356,
+            -1.9645220041275024,
+            -0.9597910642623901,
+            -0.13429655134677887,
+            -0.002283110748976469,
+            -7.066074371337891,
+            -3.639211654663086,
+            -1.0263917446136475,
+            -8.124760627746582,
+            -1.132537841796875,
+            -0.09160765260457993,
+            -0.08996370434761047,
+            -10.165366172790527,
+            -3.501585006713867,
+            -0.0019847711082547903,
+            -0.05309417471289635,
+            -0.31209683418273926,
+            -0.15089339017868042,
+            -1.23564875125885,
+            -1.2685208320617676,
+            -7.832758903503418,
+            -0.19271136820316315,
+            -0.014305183663964272,
+            -0.0007532381569035351,
+            -0.44688940048217773,
+            -2.6239724159240723,
+            -1.738666296005249,
+            -1.6480977535247803,
+            -0.46753185987472534,
+            -8.656959533691406,
+            -3.79868483543396,
+            -0.9281394481658936,
+            -2.2381181716918945,
+            -1.7654449939727783,
+            -0.4948798418045044,
+            -0.025028761476278305,
+            -1.5435361862182617,
+            -1.6390818357467651,
+            -1.4962153434753418,
+            -0.3425217270851135,
+            -0.013077914714813232,
+            -0.038474079221487045,
+            -5.3364362716674805,
+            -0.42365288734436035,
+            -1.884093999862671,
+            -3.510357618331909,
+            -6.198029518127441,
+            -0.44375038146972656,
+            -0.0008789013954810798,
+            -3.6025230884552,
+            -1.419615626335144,
+            -2.6723289489746094,
+            -5.775190830230713,
+            -1.1380761861801147,
+            -2.6683366298675537,
+            -0.43395891785621643,
+            -0.003145867260172963,
+            -8.63144302368164,
+            -1.646262764930725,
+            -1.732487678527832,
+            -4.561546802520752,
+            -0.5277953147888184,
+            -0.07333153486251831,
+            -0.5624169707298279,
+            -0.12201295047998428,
+            -2.6561455726623535,
+            -1.1071691513061523,
+            -2.6895060539245605,
+            -0.040864069014787674,
+            -0.04126371443271637,
+            -1.8294739723205566,
+            -0.09022177755832672,
+            -0.3154001832008362,
+            -0.46215569972991943,
+            -2.2462844848632812,
+            -0.30149081349372864,
+            -0.52588951587677,
+            -8.288043975830078,
+            -0.0002057340752799064,
+            -0.8021711707115173,
+            -4.4546098709106445,
+            -0.0001565095444675535,
+            -0.0015961299650371075,
+            -0.15216240286827087,
+            -0.3677564561367035,
+            -5.018707275390625,
+            -0.7850045561790466,
+            -1.9582659006118774,
+            -1.0046892166137695,
+            -10.0401029586792,
+            -0.16878114640712738,
+            -5.944240570068359,
+            -1.5523078441619873,
+            -5.7253522872924805,
+            -0.47948503494262695,
+            -0.44009655714035034,
+            -5.671053886413574,
+            -0.003280022880062461,
+            -0.7937742471694946,
+            -0.9639376401901245,
+            -0.00030048147891648114,
+            -1.0747740268707275,
+            -0.8839919567108154,
+            -3.416811466217041,
+            -1.6602673530578613,
+            -0.2706959843635559,
+            -0.0024333172477781773,
+            -4.478696823120117,
+            -6.20179557800293,
+            -0.11359559744596481,
+            -0.202009916305542,
+            -0.022310219705104828,
+            -2.367263078689575,
+            -1.0405994653701782,
+            -5.984308242797852,
+            -2.105138063430786,
+            -9.583202362060547,
+            -0.0004957877099514008,
+            -3.0655455589294434,
+            -0.0669412910938263,
+            -0.8977450728416443,
+            -2.2271294593811035,
+            -2.6617536544799805,
+            -1.8184051513671875,
+            -0.8291114568710327,
+            -0.4864235818386078,
+            -0.7993525862693787,
+            -3.51106858253479,
+            -2.1530935764312744,
+            -0.257144957780838,
+            -1.3934082984924316,
+            -1.3137131929397583,
+            -0.3384077548980713,
+            -0.1697217971086502,
+            -2.353395938873291,
+            -0.03406282886862755,
+            -0.39059701561927795,
+            -3.422821044921875,
+            -1.7117210626602173,
+            -0.7018465399742126,
+            -1.5995906591415405,
+            -3.6218395233154297,
+            -0.12497704476118088,
+            -0.16966234147548676,
+            -0.7313685417175293,
+            -0.4956285357475281,
+            -1.0840849876403809,
+            -5.042126655578613,
+            -0.00031704644788987935,
+            -7.683258056640625,
+            -0.9210801720619202,
+            -4.687852382659912,
+            -0.0028814247343689203,
+            -0.043382611125707626,
+            -4.1948652267456055,
+            -2.66593337059021,
+            -0.06153333932161331,
+            -0.0023110604379326105,
+            -6.729236602783203,
+            -5.777127742767334,
+            -0.08932067453861237,
+            -0.09890018403530121,
+            -0.009886111132800579,
+            -3.1145148277282715,
+            -3.725565195083618,
+            -0.0021998509764671326,
+            -3.9927196502685547,
+            -2.753793239593506,
+            -1.6037236452102661,
+            -0.17461130023002625,
+            -4.804804801940918,
+            -0.2311229705810547,
+            -0.30256444215774536,
+            -2.235363006591797,
+            -0.006614102050662041,
+            -0.34757524728775024,
+            -1.4946835041046143,
+            -1.222062587738037,
+            -3.658839225769043,
+            -1.356170892715454,
+            -0.5371109843254089,
+            -3.7580835819244385,
+            -4.54621696472168,
+            -0.31577637791633606,
+            -3.677156925201416,
+            -2.7181396484375,
+            -7.4674882888793945,
+            -0.00019369633810129017,
+            -2.3798398971557617,
+            -2.5452184677124023,
+            -0.2858496308326721,
+            -4.315659523010254,
+            -0.025835415348410606,
+            -0.000603493710514158,
+            -0.2546294331550598,
+            -0.12032663822174072,
+            -2.006908655166626,
+            -5.990736961364746,
+            -7.146596908569336,
+            -0.23356498777866364,
+            -0.2201036810874939,
+            -0.01235415879637003,
+            -0.011248741298913956,
+            -1.4155778884887695,
+            -0.40242519974708557,
+            -5.877886772155762,
+            -0.7865053415298462,
+            -0.03231288120150566,
+            -0.004864405374974012,
+            -0.0050629740580916405,
+            -2.7049152851104736,
+            -6.822089195251465,
+            -0.39252761006355286,
+            -1.2290617227554321,
+            -0.007630132604390383,
+            -3.485461711883545,
+            -0.47985684871673584,
+            -6.1813530921936035,
+            -0.03757825121283531,
+            -0.37834712862968445,
+            -0.22192610800266266,
+            -1.165318489074707,
+            -0.5220151543617249,
+            -0.1289423257112503,
+            -3.216222047805786,
+            -1.0787583589553833,
+            -3.0716826915740967,
+            -0.6023419499397278,
+            -2.558605194091797,
+            -0.927433431148529,
+            -0.00364841241389513,
+            -0.14910078048706055,
+            -0.7318926453590393,
+            -6.159773826599121,
+            -0.0015301911626011133,
+            -1.8908276557922363,
+            -1.9641315937042236,
+            -0.021651331335306168,
+            -2.1648828983306885,
+            -2.2700207233428955,
+            -7.833290100097656,
+            -0.03397307172417641,
+            -0.8344621658325195,
+            -0.02225659228861332,
+            -0.06639260798692703,
+            -2.3780317306518555,
+            -3.180129051208496,
+            -0.09030630439519882,
+            -2.4138312339782715,
+            -1.3445552587509155,
+            -1.848326325416565,
+            -0.9726964831352234,
+            -2.851792335510254,
+            -0.0630769282579422,
+            -0.0011394681641831994,
+            -0.05843213573098183,
+            -2.6616668701171875,
+            -1.575437068939209,
+            -0.180197611451149,
+            -5.552371501922607,
+            -0.26108410954475403,
+            -2.529611587524414,
+            -0.37780019640922546,
+            -5.141795635223389,
+            -0.5921107530593872,
+            -0.2474975287914276,
+            -0.10687454044818878,
+            -4.891775131225586,
+            -0.25011152029037476,
+            -2.4100728034973145,
+            -1.358667016029358,
+            -2.790961503982544,
+            -3.8654675483703613,
+            -1.0076243877410889,
+            -0.7456949949264526,
+            -1.5575554370880127,
+            -2.05328631401062,
+            -1.6538066864013672,
+            -0.0558217354118824,
+            -0.0001817776501411572,
+            -0.0011643542675301433,
+            -0.038359593600034714,
+            -1.4208931922912598,
+            -0.542127251625061,
+            -0.3162364959716797,
+            -0.3966117799282074,
+            -1.1765563488006592,
+            -1.7920958995819092,
+            -0.18425509333610535,
+            -0.1092008650302887,
+            -0.46676987409591675,
+            -0.24977745115756989,
+            -1.0375996828079224,
+            -0.5268858671188354,
+            -0.008942908607423306,
+            -0.6404479146003723,
+            -0.0033111530356109142,
+            -5.3165931603871286e-05,
+            -0.5154370665550232,
+            -0.39286962151527405,
+            -1.401839256286621,
+            -0.6232213973999023,
+            -0.02168831042945385,
+            -0.004282470792531967,
+            -0.005199837032705545,
+            -0.09748794883489609,
+            -0.040823787450790405,
+            -0.00014852374442853034,
+            -0.0005832401220686734,
+            -0.005303124897181988,
+            -0.6537013053894043,
+            -0.38026049733161926,
+            -0.04189129173755646,
+            -0.010385753586888313,
+            -0.008756335824728012,
+            -0.013362848199903965,
+            -0.000504723924677819,
+            -0.002797620603814721,
+            -0.0014512732159346342,
+            -0.0013321106089279056,
+            -0.010883613489568233,
+            -0.005159396678209305,
+            -0.004701037425547838,
+            -0.01591104455292225,
+            -0.001474246964789927,
+            -1.2278481335670222e-05,
+            -0.010548785328865051,
+            -0.08341525495052338,
+            -0.03858809545636177,
+            -0.056062061339616776,
+            -0.0009532198309898376,
+            -0.0005789510905742645,
+            -0.0008986725588329136,
+            -0.00710969977080822,
+            -0.0006561510381288826,
+            -1.4781842764932662e-05,
+            -5.578839045483619e-05,
+            -0.0006398299592547119,
+            -0.0028786908369511366,
+            -0.0034092895220965147,
+            -0.008268529549241066,
+            -0.006602259818464518,
+            -0.004517706111073494,
+            -0.02233586646616459,
+            -0.0006323245470412076,
+            -0.009195122867822647,
+            -0.0029284947086125612,
+            -0.004457537550479174,
+            -0.017873765900731087,
+            -0.008801711723208427,
+            -0.0036383166443556547,
+            -0.08078611642122269,
+            -0.006347495596855879,
+            -0.0002177716523874551,
+            -0.04688572511076927,
+            -0.2718890309333801,
+            -0.07996802777051926,
+            -0.04674842208623886,
+            -0.009984076954424381,
+            -0.010000954382121563,
+            -0.050126753747463226,
+            -0.5864179730415344,
+            -0.2915390133857727,
+            -0.008090462535619736,
+            -0.032190412282943726,
+            -0.03461571782827377,
+            -0.2785419523715973,
+            -0.05830562859773636,
+            -0.02893950417637825,
+            -0.12241066247224808,
+            -0.02711048536002636,
+            -0.16450686752796173,
+            -0.09852994978427887,
+            -0.2651047706604004,
+            -0.35559725761413574,
+            -0.12606258690357208,
+            -0.32793670892715454,
+            -0.20878805220127106,
+            -0.7587923407554626,
+            -0.4730657637119293,
+            -1.496794581413269,
+            -0.2486363798379898,
+            -0.20723387598991394,
+            -0.1872958242893219,
+            -0.19151091575622559,
+            -0.3350580036640167,
+            -1.3085839748382568,
+            -0.6109102964401245,
+            -0.2947172224521637,
+            -0.37130236625671387,
+            -0.19888469576835632,
+            -0.18297068774700165,
+            -0.9978674054145813,
+            -0.5471905469894409,
+            -0.4379975199699402,
+            -0.407988041639328,
+            -0.17325688898563385,
+            -0.16938896477222443
+        ]
+    },
+    "96": {
+        "input_prompt": "what is the concept of double materiality in sustainability?",
+        "generated_text": " What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double material",
+        "generated_tokens": [
+            5675,
+            1395,
+            1278,
+            7401,
+            1307,
+            5558,
+            4318,
+            1532,
+            1294,
+            60187,
+            1063,
+            5675,
+            1395,
+            1278,
+            7401,
+            1307,
+            5558,
+            4318,
+            1532,
+            1294,
+            60187,
+            1063,
+            5675,
+            1395,
+            1278,
+            7401,
+            1307,
+            5558,
+            4318,
+            1532,
+            1294,
+            60187,
+            1063,
+            5675,
+            1395,
+            1278,
+            7401,
+            1307,
+            5558,
+            4318,
+            1532,
+            1294,
+            60187,
+            1063,
+            5675,
+            1395,
+            1278,
+            7401,
+            1307,
+            5558,
+            4318,
+            1532,
+            1294,
+            60187,
+            1063,
+            5675,
+            1395,
+            1278,
+            7401,
+            1307,
+            5558,
+            4318,
+            1532,
+            1294,
+            60187,
+            1063,
+            5675,
+            1395,
+            1278,
+            7401,
+            1307,
+            5558,
+            4318,
+            1532,
+            1294,
+            60187,
+            1063,
+            5675,
+            1395,
+            1278,
+            7401,
+            1307,
+            5558,
+            4318,
+            1532,
+            1294,
+            60187,
+            1063,
+            5675,
+            1395,
+            1278,
+            7401,
+            1307,
+            5558,
+            4318,
+            1532,
+            1294,
+            60187,
+            1063,
+            5675,
+            1395,
+            1278,
+            7401,
+            1307,
+            5558,
+            4318,
+            1532,
+            1294,
+            60187,
+            1063,
+            5675,
+            1395,
+            1278,
+            7401,
+            1307,
+            5558,
+            4318,
+            1532,
+            1294,
+            60187,
+            1063,
+            5675,
+            1395,
+            1278,
+            7401,
+            1307,
+            5558,
+            4318
+        ],
+        "latency": 126.4328100681305,
+        "cuda_graph_request_count_map": null,
+        "step_count": 6144,
+        "logprobs": [
+            -4.917365074157715,
+            -0.9960631132125854,
+            -7.875392913818359,
+            -0.2993181347846985,
+            -7.760880470275879,
+            -10.308395385742188,
+            -2.1807961463928223,
+            -1.6412583589553833,
+            -9.521512985229492,
+            -1.627489447593689,
+            -1.8410861492156982,
+            -0.9285702705383301,
+            -0.2576955556869507,
+            -0.9641067981719971,
+            -0.02314644306898117,
+            -0.6696561574935913,
+            -0.07035009562969208,
+            -0.004622488282620907,
+            -0.025748632848262787,
+            -0.06276137381792068,
+            -0.17385317385196686,
+            -0.3285445272922516,
+            -0.0592009499669075,
+            -0.007940039038658142,
+            -0.22664028406143188,
+            -0.0017957051750272512,
+            -0.022929180413484573,
+            -0.005733947269618511,
+            -0.0012996093137189746,
+            -0.006419987417757511,
+            -0.02376849390566349,
+            -0.27800270915031433,
+            -0.4650723934173584,
+            -0.04936715215444565,
+            -0.003972141072154045,
+            -0.01477995328605175,
+            -0.0012044801842421293,
+            -0.014891182072460651,
+            -0.002709767082706094,
+            -0.0009939497103914618,
+            -0.0028436246793717146,
+            -0.006759870797395706,
+            -0.15416178107261658,
+            -0.20121537148952484,
+            -0.016414370387792587,
+            -0.0015769677702337503,
+            -0.008138825185596943,
+            -0.0007713441736996174,
+            -0.013819841668009758,
+            -0.003826678032055497,
+            -0.0005918181850574911,
+            -0.0014938872773200274,
+            -0.00485716899856925,
+            -0.081083282828331,
+            -0.09642580896615982,
+            -0.009630884043872356,
+            -0.0010948146227747202,
+            -0.007085552904754877,
+            -0.0006310140597634017,
+            -0.013073914684355259,
+            -0.0039152647368609905,
+            -0.000364713923772797,
+            -0.001292108790948987,
+            -0.004158303141593933,
+            -0.044283974915742874,
+            -0.05722038820385933,
+            -0.006369172595441341,
+            -0.0007976687629707158,
+            -0.005993015132844448,
+            -0.0004935238393954933,
+            -0.011310506612062454,
+            -0.002951553324237466,
+            -0.000387831823900342,
+            -0.000977038755081594,
+            -0.0036971091758459806,
+            -0.030511993914842606,
+            -0.04246694967150688,
+            -0.004863100592046976,
+            -0.0006927236099727452,
+            -0.005206122528761625,
+            -0.0005129451747052372,
+            -0.00894621666520834,
+            -0.0028565814718604088,
+            -0.00041333239641971886,
+            -0.0009002208826132119,
+            -0.0033131728414446115,
+            -0.021188799291849136,
+            -0.03330245241522789,
+            -0.0038543473929166794,
+            -0.0006504327175207436,
+            -0.004474864806979895,
+            -0.00048029806930571795,
+            -0.009718249551951885,
+            -0.0030443770810961723,
+            -0.0003743662964552641,
+            -0.0009439303539693356,
+            -0.003729770192876458,
+            -0.016505014151334763,
+            -0.0290373582392931,
+            -0.003315192647278309,
+            -0.0005821678787469864,
+            -0.004148805979639292,
+            -0.00042489083716645837,
+            -0.006856840569525957,
+            -0.0028660909738391638,
+            -0.00032574593205936253,
+            -0.0006986799417063594,
+            -0.003671098267659545,
+            -0.012792548164725304,
+            -0.02553274855017662,
+            -0.002730690874159336,
+            -0.0005067494930699468,
+            -0.0036923582665622234,
+            -0.0004451475979294628,
+            -0.007225453853607178,
+            -0.002746859099715948,
+            -0.0003323002893012017,
+            -0.0008517451351508498,
+            -0.003630714723840356,
+            -0.011284693144261837,
+            -0.02186688780784607,
+            -0.0025819556321948767,
+            -0.0004931663861498237,
+            -0.0031721293926239014,
+            -0.00040725519647821784,
+            -0.0062383925542235374,
+            -0.00238693761639297,
+            -0.00040749352774582803,
+            -0.0007970731821842492,
+            -0.003604583442211151,
+            -0.010075542144477367,
+            -0.022386692464351654,
+            -0.0025295186787843704,
+            -0.00046302087139338255,
+            -0.003138143103569746,
+            -0.0004233417857903987,
+            -0.006178564392030239,
+            -0.0021037368569523096
+        ]
+    },
+    "127": {
+        "input_prompt": "as before we talk about using , i usually do if else() to in case the property would be null before.\nif today i public a property and i want to use using to instead , will code be like this?\n\npublic Material Mat;\n\nusing(Mat){\n// Do something;\n}\n\nis that right?",
+        "generated_text": " i am not sure if i am using the wrong syntax or if i am using the syntax correctly but i am not sure if i am using the syntax correctly or if i am using the syntax incorrectly.\n\nI am not sure if i am using the wrong syntax or if i am using the syntax correctly but i am not sure if i am using the syntax correctly or if i am using the syntax incorrectly.\n\nI am not sure if i am using the wrong syntax or if i am using the syntax correctly but i am not sure if i am using the syntax correctly or if i am using the syntax incorrectly.\n\nI am not sure if i am using",
+        "generated_tokens": [
+            1623,
+            1855,
+            1605,
+            5257,
+            1693,
+            1623,
+            1855,
+            2505,
+            1278,
+            8462,
+            22692,
+            1505,
+            1693,
+            1623,
+            1855,
+            2505,
+            1278,
+            22692,
+            17047,
+            1809,
+            1623,
+            1855,
+            1605,
+            5257,
+            1693,
+            1623,
+            1855,
+            2505,
+            1278,
+            22692,
+            17047,
+            1505,
+            1693,
+            1623,
+            1855,
+            2505,
+            1278,
+            22692,
+            73751,
+            1338,
+            1073,
+            1855,
+            1605,
+            5257,
+            1693,
+            1623,
+            1855,
+            2505,
+            1278,
+            8462,
+            22692,
+            1505,
+            1693,
+            1623,
+            1855,
+            2505,
+            1278,
+            22692,
+            17047,
+            1809,
+            1623,
+            1855,
+            1605,
+            5257,
+            1693,
+            1623,
+            1855,
+            2505,
+            1278,
+            22692,
+            17047,
+            1505,
+            1693,
+            1623,
+            1855,
+            2505,
+            1278,
+            22692,
+            73751,
+            1338,
+            1073,
+            1855,
+            1605,
+            5257,
+            1693,
+            1623,
+            1855,
+            2505,
+            1278,
+            8462,
+            22692,
+            1505,
+            1693,
+            1623,
+            1855,
+            2505,
+            1278,
+            22692,
+            17047,
+            1809,
+            1623,
+            1855,
+            1605,
+            5257,
+            1693,
+            1623,
+            1855,
+            2505,
+            1278,
+            22692,
+            17047,
+            1505,
+            1693,
+            1623,
+            1855,
+            2505,
+            1278,
+            22692,
+            73751,
+            1338,
+            1073,
+            1855,
+            1605,
+            5257,
+            1693,
+            1623,
+            1855,
+            2505
+        ],
+        "latency": 155.6906189918518,
+        "cuda_graph_request_count_map": null,
+        "step_count": 6144,
+        "logprobs": [
+            -7.793755531311035,
+            -4.224940776824951,
+            -5.446587562561035,
+            -0.267395555973053,
+            -6.118141174316406,
+            -11.105525970458984,
+            -6.67517614364624,
+            -6.902530670166016,
+            -3.2218151092529297,
+            -5.99644660949707,
+            -8.431673049926758,
+            -8.04836368560791,
+            -3.992594003677368,
+            -7.550079345703125,
+            -2.873685836791992,
+            -3.3877997398376465,
+            -7.9306745529174805,
+            -7.005484580993652,
+            -1.0481306314468384,
+            -2.686237335205078,
+            -6.131283760070801,
+            -6.2994704246521,
+            -7.931419372558594,
+            -11.3147554397583,
+            -8.544670104980469,
+            -12.01729679107666,
+            -3.89847469329834,
+            -1.7964364290237427,
+            -2.952878952026367,
+            -1.9217232465744019,
+            -2.272329330444336,
+            -0.37552154064178467,
+            -1.667820692062378,
+            -7.510344505310059,
+            -3.498040199279785,
+            -7.980632305145264,
+            -7.672002792358398,
+            -4.4999470710754395,
+            -7.155375003814697,
+            -2.4486124515533447,
+            -4.785946846008301,
+            -1.153855800628662,
+            -2.3994438648223877,
+            -4.250652313232422,
+            -12.24446964263916,
+            -8.344388008117676,
+            -2.608186721801758,
+            -5.200589179992676,
+            -8.25888442993164,
+            -3.6245617866516113,
+            -7.689338207244873,
+            -7.345355033874512,
+            -1.2661759853363037,
+            -7.265620231628418,
+            -1.9884108304977417,
+            -6.269482612609863,
+            -2.41705584526062,
+            -1.8929681777954102,
+            -1.8259913921356201,
+            -2.0997350215911865,
+            -2.323200225830078,
+            -1.3998825550079346,
+            -0.8789899945259094,
+            -1.082053542137146,
+            -1.1831339597702026,
+            -1.4462857246398926,
+            -1.6481035947799683,
+            -1.4408715963363647,
+            -1.2603964805603027,
+            -1.5267670154571533,
+            -1.6345772743225098,
+            -1.3796477317810059,
+            -0.7609691023826599,
+            -0.3548354506492615,
+            -0.7552334666252136,
+            -0.44776833057403564,
+            -1.1078286170959473,
+            -1.3036658763885498,
+            -0.5214896202087402,
+            -0.8486822843551636,
+            -0.22470997273921967,
+            -0.4705755412578583,
+            -0.5639711022377014,
+            -0.5388108491897583,
+            -0.6052999496459961,
+            -0.1002030223608017,
+            -0.286334365606308,
+            -0.45798981189727783,
+            -1.0107953548431396,
+            -0.11875647306442261,
+            -0.6969441771507263,
+            -0.4609107971191406,
+            -0.07614769786596298,
+            -0.5035472512245178,
+            -0.1682187020778656,
+            -0.10476160794496536,
+            -0.6586751341819763,
+            -0.35806939005851746,
+            -1.5364394187927246,
+            -2.4093759059906006,
+            -1.977368950843811,
+            -1.6216907501220703,
+            -0.27647316455841064,
+            -0.2991848587989807,
+            -0.2783535420894623,
+            -0.05913994088768959,
+            -0.03023873083293438,
+            -0.043339803814888,
+            -0.7320341467857361,
+            -0.0030677898321300745,
+            -0.0332595594227314,
+            -0.012804670259356499,
+            -0.004041599575430155,
+            -0.0014899593079462647,
+            -0.001948602613992989,
+            -0.0029070996679365635,
+            -0.040939707309007645,
+            -0.013942227698862553,
+            -0.04897322878241539,
+            -0.011005887761712074,
+            -0.0044113704934716225,
+            -0.0013179434463381767,
+            -0.003658389439806342,
+            -0.009758152067661285,
+            -0.0014104428701102734,
+            -0.0016671819612383842,
+            -0.000771939754486084,
+            -0.0015519729349762201,
+            -0.003720743814483285,
+            -0.004249115474522114,
+            -0.00485657574608922,
+            -0.005053604021668434,
+            -0.002336274366825819,
+            -0.0009155849111266434,
+            -0.0004978132783435285,
+            -0.0005953923100605607,
+            -0.0011395872570574284,
+            -0.001485078944824636,
+            -0.3072909712791443,
+            -1.7295066118240356,
+            -0.4807289242744446,
+            -0.1245415136218071,
+            -0.011858444660902023,
+            -0.020613837987184525,
+            -0.011020978912711143,
+            -0.003106294432654977,
+            -0.0009966888464987278,
+            -0.0019349202048033476,
+            -0.037407051771879196,
+            -0.0003496989083942026,
+            -0.005922981072217226,
+            -0.007394562941044569,
+            -0.0006037319544702768,
+            -0.0008836655179038644,
+            -0.0002884448622353375,
+            -0.00047600860125385225,
+            -0.0024947968777269125,
+            -0.00442774873226881,
+            -0.004059052560478449,
+            -0.0018594847060739994,
+            -0.0006179092451930046,
+            -0.00022635281493421644,
+            -0.0006730675231665373,
+            -0.003022746881470084,
+            -0.0002343380037928,
+            -0.00047791501856409013,
+            -9.440929716220126e-05,
+            -0.00021550717065110803,
+            -0.0013523490633815527,
+            -0.0032202552538365126,
+            -0.001157686347141862,
+            -0.004449942149221897,
+            -0.0016590891173109412,
+            -0.00101062236353755,
+            -0.0003079893649555743,
+            -0.00048375347978435457,
+            -0.0021734442561864853,
+            -0.00423036003485322,
+            -0.11514264345169067,
+            -0.8658493757247925,
+            -0.084366075694561,
+            -0.02140468917787075,
+            -0.0060798698104918,
+            -0.008638513274490833,
+            -0.003212531330063939,
+            -0.0009598892065696418,
+            -0.00032085992279462516
+        ]
+    },
+    "throughput": 104.94118337233992
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_583m_logitsmatch/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_583m_logitsmatch/golden_values_dev_dgxh100_coreweave.json
index 066995bd666..e1cada771ca 100644
--- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_583m_logitsmatch/golden_values_dev_dgxh100_coreweave.json
+++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_583m_logitsmatch/golden_values_dev_dgxh100_coreweave.json
@@ -1,170 +1,161 @@
 {
- "0": {
-  "input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.",
-  "generated_text": " And that this is the place where you can be yourself, and be yourself in the most beautiful way. And that this is the place where you can",
-  "generated_tokens": [
-   3060,
-   1455,
-   1593,
-   1395,
-   1278,
-   3535,
-   2478,
-   1636,
-   1710,
-   1402,
-   14019,
-   1044,
-   1321,
-   1402,
-   14019,
-   1294,
-   1278,
-   2725,
-   15568,
-   3039,
-   1046,
-   3060,
-   1455,
-   1593,
-   1395,
-   1278,
-   3535,
-   2478,
-   1636,
-   1710
-  ],
-  "latency": 2.1998238563537598,
-  "cuda_graph_request_count_map": null,
-  "step_count": 240,
-  "logprobs": [
-   -9.358587265014648,
-   -2.7594826221466064,
-   -4.608366012573242,
-   -1.4093360900878906,
-   -0.6152952313423157,
-   -1.7217562198638916,
-   -2.496668815612793,
-   -2.0547454357147217,
-   -2.441960573196411,
-   -6.280838966369629,
-   -1.5643692016601562,
-   -3.462346076965332,
-   -4.428728103637695,
-   -3.8633861541748047,
-   -1.9936373233795166,
-   -1.8929449319839478,
-   -3.796365737915039,
-   -6.8360137939453125,
-   -0.2901247441768646,
-   -0.9246833324432373,
-   -6.633338928222656,
-   -7.166708469390869,
-   -12.771251678466797,
-   -2.198296308517456,
-   -3.7778120040893555,
-   -0.4983733296394348,
-   -4.381269454956055,
-   -0.0666784718632698,
-   -0.09580295532941818,
-   -3.2437636852264404,
-   -10.079947471618652,
-   -1.172220230102539,
-   -5.977442741394043,
-   -5.046236038208008,
-   -3.855658531188965,
-   -2.5585858821868896,
-   -3.356245994567871,
-   -5.557229518890381,
-   -1.6787731647491455,
-   -5.483290672302246,
-   -12.218501091003418,
-   -12.61402702331543,
-   -0.09662941098213196,
-   -2.5431432723999023,
-   -1.4071024656295776,
-   -2.9154715538024902,
-   -1.1964417695999146,
-   -0.006458481773734093,
-   -3.3625335693359375,
-   -13.262511253356934,
-   -4.314079761505127,
-   -2.617699146270752,
-   -5.987792015075684,
-   -0.778266429901123,
-   -0.048888545483350754,
-   -1.548882007598877,
-   -1.1381981372833252,
-   -5.627166748046875,
-   -0.4078553318977356,
-   -4.958505630493164,
-   -0.6187160611152649,
-   -0.7174848914146423,
-   -2.469533920288086,
-   -13.620073318481445,
-   -0.09088654816150665,
-   -3.526974678039551,
-   -1.4195809364318848,
-   -6.402483940124512,
-   -0.5898402333259583,
-   -3.565917491912842,
-   -0.8561318516731262,
-   -1.6140165328979492,
-   -5.370549201965332,
-   -17.159223556518555,
-   -6.583524703979492,
-   -0.8855001926422119,
-   -4.19431209564209,
-   -1.2012220621109009,
-   -2.2563133239746094,
-   -1.7674944400787354,
-   -0.22064533829689026,
-   -9.292220115661621,
-   -0.12445646524429321,
-   -7.29617977142334,
-   -2.526529312133789,
-   -4.071560859680176,
-   -3.5568013191223145,
-   -1.926215410232544,
-   -2.349026918411255,
-   -2.2132363319396973,
-   -0.3125414550304413,
-   -1.4718132019042969,
-   -2.149106740951538,
-   -1.0855519771575928,
-   -1.631832242012024,
-   -1.3751734495162964,
-   -1.9396103620529175,
-   -1.5293723344802856,
-   -0.8444125056266785,
-   -1.2414811849594116,
-   -1.9522171020507812,
-   -2.4338042736053467,
-   -1.5651824474334717,
-   -0.9498789310455322,
-   -1.8044980764389038,
-   -2.356677770614624,
-   -1.247452974319458,
-   -1.550165057182312,
-   -0.5635553598403931,
-   -0.6177330017089844,
-   -0.4778785705566406,
-   -0.020452087745070457,
-   -0.48500269651412964,
-   -0.23854275047779083,
-   -0.06543659418821335,
-   -0.11837350577116013,
-   -0.0585334412753582
-  ]
- },
- "throughput": [
-  0.7170174223459943,
-  12.998776662244524,
-  13.163004282426089,
-  13.581765270525981,
-  13.619124445335821,
-  13.655332144429561,
-  13.608264815678803,
-  13.614656540485411
- ]
+    "0": {
+        "input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.",
+        "generated_text": " And that this is the place where you can be yourself, and be yourself in the most beautiful way. And that this is the place where you can",
+        "generated_tokens": [
+            3060,
+            1455,
+            1593,
+            1395,
+            1278,
+            3535,
+            2478,
+            1636,
+            1710,
+            1402,
+            14019,
+            1044,
+            1321,
+            1402,
+            14019,
+            1294,
+            1278,
+            2725,
+            15568,
+            3039,
+            1046,
+            3060,
+            1455,
+            1593,
+            1395,
+            1278,
+            3535,
+            2478,
+            1636,
+            1710
+        ],
+        "latency": 2.1998238563537598,
+        "cuda_graph_request_count_map": null,
+        "step_count": 240,
+        "logprobs": [
+            -9.358587265014648,
+            -2.7594826221466064,
+            -4.608366012573242,
+            -1.4093360900878906,
+            -0.6152952313423157,
+            -1.7217562198638916,
+            -2.496668815612793,
+            -2.0547454357147217,
+            -2.441960573196411,
+            -6.280838966369629,
+            -1.5643692016601562,
+            -3.462346076965332,
+            -4.428728103637695,
+            -3.8633861541748047,
+            -1.9936373233795166,
+            -1.8929449319839478,
+            -3.796365737915039,
+            -6.8360137939453125,
+            -0.2901247441768646,
+            -0.9246833324432373,
+            -6.633338928222656,
+            -7.166708469390869,
+            -12.771251678466797,
+            -2.198296308517456,
+            -3.7778120040893555,
+            -0.4983733296394348,
+            -4.381269454956055,
+            -0.0666784718632698,
+            -0.09580295532941818,
+            -3.2437636852264404,
+            -10.079947471618652,
+            -1.172220230102539,
+            -5.977442741394043,
+            -5.046236038208008,
+            -3.855658531188965,
+            -2.5585858821868896,
+            -3.356245994567871,
+            -5.557229518890381,
+            -1.6787731647491455,
+            -5.483290672302246,
+            -12.218501091003418,
+            -12.61402702331543,
+            -0.09662941098213196,
+            -2.5431432723999023,
+            -1.4071024656295776,
+            -2.9154715538024902,
+            -1.1964417695999146,
+            -0.006458481773734093,
+            -3.3625335693359375,
+            -13.262511253356934,
+            -4.314079761505127,
+            -2.617699146270752,
+            -5.987792015075684,
+            -0.778266429901123,
+            -0.048888545483350754,
+            -1.548882007598877,
+            -1.1381981372833252,
+            -5.627166748046875,
+            -0.4078553318977356,
+            -4.958505630493164,
+            -0.6187160611152649,
+            -0.7174848914146423,
+            -2.469533920288086,
+            -13.620073318481445,
+            -0.09088654816150665,
+            -3.526974678039551,
+            -1.4195809364318848,
+            -6.402483940124512,
+            -0.5898402333259583,
+            -3.565917491912842,
+            -0.8561318516731262,
+            -1.6140165328979492,
+            -5.370549201965332,
+            -17.159223556518555,
+            -6.583524703979492,
+            -0.8855001926422119,
+            -4.19431209564209,
+            -1.2012220621109009,
+            -2.2563133239746094,
+            -1.7674944400787354,
+            -0.22064533829689026,
+            -9.292220115661621,
+            -0.12445646524429321,
+            -7.29617977142334,
+            -2.526529312133789,
+            -4.071560859680176,
+            -3.5568013191223145,
+            -1.926215410232544,
+            -2.349026918411255,
+            -2.2132363319396973,
+            -0.3125414550304413,
+            -1.4718132019042969,
+            -2.149106740951538,
+            -1.0855519771575928,
+            -1.631832242012024,
+            -1.3751734495162964,
+            -1.9396103620529175,
+            -1.5293723344802856,
+            -0.8444125056266785,
+            -1.2414811849594116,
+            -1.9522171020507812,
+            -2.4338042736053467,
+            -1.5651824474334717,
+            -0.9498789310455322,
+            -1.8044980764389038,
+            -2.356677770614624,
+            -1.247452974319458,
+            -1.550165057182312,
+            -0.5635553598403931,
+            -0.6177330017089844,
+            -0.4778785705566406,
+            -0.020452087745070457,
+            -0.48500269651412964,
+            -0.23854275047779083,
+            -0.06543659418821335,
+            -0.11837350577116013,
+            -0.0585334412753582
+        ]
+    },
+    "throughput": 13.581765270525981
 }
\ No newline at end of file

From eb07b693b4aa7c3267b44dce7b55365c8dcc1258 Mon Sep 17 00:00:00 2001
From: Charlie Truong <chtruong@nvidia.com>
Date: Mon, 27 Oct 2025 06:22:31 -0500
Subject: [PATCH 072/334] Update dev branch codeowners (#1963)

Signed-off-by: Charlie Truong <chtruong@nvidia.com>
---
 .github/CODEOWNERS | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index cc3cb0dbc58..7613dc59da5 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -1,15 +1,12 @@
-megatron/core @NVIDIA/core-nemo @NVIDIA/core-devtech
+* @NVIDIA/core-nemo @NVIDIA/core-devtech
 
 .gitlab/ @NVIDIA/ci
 .github/ @NVIDIA/ci
 .gitlab-ci.yml @NVIDIA/ci
 docker/  @NVIDIA/ci
 tests/unit_tests/run_ci_test.sh @NVIDIA/ci
-tests/test_utils/python_scripts/ 
+tests/test_utils/python_scripts/
 tests/functional_tests/python_test_utils/ @NVIDIA/ci
 tests/functional_tests/shell_test_utils/ @NVIDIA/ci
-megatron/core/transformer/transformer_block.py @NVIDIA/ci
-megatron/core/transformer/transformer_layer.py @NVIDIA/ci
-tests/functional_tests/test_cases/ @NVIDIA/ci
-tests/functional_tests/recipes/ @NVIDIA/ci
-tests/unit_tests/ @NVIDIA/ci
+pyproject.toml @NVIDIA/ci
+uv.lock @NVIDIA/ci

From fa384d200e4571d0f60ce954eef7d029a0d9cbb6 Mon Sep 17 00:00:00 2001
From: Xin Yao <xiny@nvidia.com>
Date: Mon, 27 Oct 2025 16:56:51 +0800
Subject: [PATCH 073/334] [Dev] JIT for MoE router and preprocess (#1918)

Signed-off-by: Xin Yao <xiny@nvidia.com>
---
 .../core/fusions/fused_pad_routing_map.py     |  5 ++++-
 megatron/core/transformer/moe/moe_utils.py    | 11 +++++++---
 megatron/core/transformer/moe/router.py       | 20 +++++++++++++------
 .../core/transformer/moe/token_dispatcher.py  |  4 +++-
 4 files changed, 29 insertions(+), 11 deletions(-)

diff --git a/megatron/core/fusions/fused_pad_routing_map.py b/megatron/core/fusions/fused_pad_routing_map.py
index e7c3a7e48c9..8e4d1763270 100644
--- a/megatron/core/fusions/fused_pad_routing_map.py
+++ b/megatron/core/fusions/fused_pad_routing_map.py
@@ -1,9 +1,11 @@
-# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
 from unittest.mock import MagicMock
 
 import torch
 from packaging import version
 
+from megatron.core.jit import jit_fuser
 from megatron.core.utils import experimental_fn, null_decorator
 
 try:
@@ -69,6 +71,7 @@ def _pad_routing_map_kernel(
 
 
 @experimental_fn(introduced_with_version="0.13.0")
+@jit_fuser
 def fused_pad_routing_map(routing_map: torch.Tensor, pad_multiple: int) -> torch.Tensor:
     """Fused version of pad_routing_map.
     Args:
diff --git a/megatron/core/transformer/moe/moe_utils.py b/megatron/core/transformer/moe/moe_utils.py
index dc857129834..17942fa5a3e 100644
--- a/megatron/core/transformer/moe/moe_utils.py
+++ b/megatron/core/transformer/moe/moe_utils.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 import math
 from typing import List, Optional, Union
@@ -7,6 +7,7 @@
 
 from megatron.core import parallel_state
 from megatron.core.process_groups_config import ProcessGroupCollection
+from megatron.core.transformer.cuda_graphs import is_graph_capturing
 
 try:
     import transformer_engine as te  # pylint: disable=unused-import
@@ -905,12 +906,16 @@ class RandomSTE(torch.autograd.Function):
     """
 
     generator = None
+    random_logits = None
 
     @staticmethod
     def forward(ctx, logits):
         """
         Forward pass returns random logits with rank-specific seed.
         """
+        if is_graph_capturing() and RandomSTE.random_logits is not None:
+            return RandomSTE.random_logits
+
         if RandomSTE.generator is None:
             global_rank = torch.distributed.get_rank()
             base_seed = 42
@@ -918,8 +923,8 @@ def forward(ctx, logits):
             RandomSTE.generator = torch.Generator(device=logits.device)
             RandomSTE.generator.manual_seed(seed)
 
-        random_logits = logits.clone().normal_(generator=RandomSTE.generator)
-        return random_logits
+        RandomSTE.random_logits = logits.clone().normal_(generator=RandomSTE.generator)
+        return RandomSTE.random_logits
 
     @staticmethod
     def backward(ctx, grad_output):
diff --git a/megatron/core/transformer/moe/router.py b/megatron/core/transformer/moe/router.py
index 7fa4692ef2f..16fc9d9af8f 100644
--- a/megatron/core/transformer/moe/router.py
+++ b/megatron/core/transformer/moe/router.py
@@ -1,10 +1,11 @@
-# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 from abc import ABC, abstractmethod
 from typing import Optional
 
 import torch
 
+from megatron.core.jit import jit_fuser
 from megatron.core.tensor_parallel import reduce_from_tensor_model_parallel_region
 from megatron.core.transformer.module import MegatronModule
 from megatron.core.transformer.moe.moe_utils import (
@@ -468,6 +469,16 @@ def apply_input_jitter(self, input: torch.Tensor):
         else:
             return input
 
+    @jit_fuser
+    def _apply_expert_bias(self, routing_map: torch.Tensor):
+        """
+        Update expert bias and tokens_per_expert
+        Prevent extra local tokens accumulation on evaluation or activation recomputation
+        """
+        if self.enable_expert_bias and torch.is_grad_enabled():
+            with torch.no_grad():
+                self.local_tokens_per_expert += routing_map.sum(dim=0)
+
     def routing(self, logits: torch.Tensor):
         """Top-k routing function
 
@@ -526,11 +537,8 @@ def routing(self, logits: torch.Tensor):
                 probs, scores_for_aux_loss, routing_map_for_aux_loss
             )
 
-        # Update expert bias and tokens_per_expert
-        # Prevent extra local tokens accumulation on evaluation or activation recomputation
-        if self.enable_expert_bias and torch.is_grad_enabled():
-            with torch.no_grad():
-                self.local_tokens_per_expert += routing_map.sum(dim=0)
+        # Optionally apply expert bias
+        self._apply_expert_bias(routing_map)
 
         return probs, routing_map
 
diff --git a/megatron/core/transformer/moe/token_dispatcher.py b/megatron/core/transformer/moe/token_dispatcher.py
index 46f94ebe79a..bb034292715 100644
--- a/megatron/core/transformer/moe/token_dispatcher.py
+++ b/megatron/core/transformer/moe/token_dispatcher.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 import logging
 from abc import ABC, abstractmethod
@@ -12,6 +12,7 @@
 from megatron.core.fp8_utils import get_fp8_align_size
 from megatron.core.fusions.fused_indices_converter import fused_indices_to_multihot
 from megatron.core.fusions.fused_pad_routing_map import fused_pad_routing_map
+from megatron.core.jit import jit_fuser
 from megatron.core.tensor_parallel import (
     all_to_all,
     gather_from_sequence_parallel_region,
@@ -1386,6 +1387,7 @@ def _initialize_metadata(self, routing_map: torch.Tensor, probs: torch.Tensor) -
         ).contiguous()
         return routing_map, probs
 
+    @jit_fuser
     def dispatch_preprocess(
         self, hidden_states: torch.Tensor, routing_map: torch.Tensor, probs: torch.Tensor
     ):

From 9069e1268f495407598d9f6771e363737505dab7 Mon Sep 17 00:00:00 2001
From: Hongbin Liu <lhb8125@users.noreply.github.com>
Date: Mon, 27 Oct 2025 16:57:51 +0800
Subject: [PATCH 074/334] [Dev] feat(moe): Fine-grained activation offloading
 (#1912)

Signed-off-by: Hongbin Liu <hongbinl@nvidia.com>
---
 .../fine_grained_activation_offloading.md     |  29 +
 docs/source/api-guide/index.rst               |   1 +
 .../offloading_and_recomputing.png            | Bin 0 -> 332427 bytes
 .../core/extensions/transformer_engine.py     |  12 +-
 .../common/model_chunk_schedule_plan.py       |   9 +-
 .../core/models/gpt/fine_grained_callables.py |  23 +-
 megatron/core/models/gpt/gpt_model.py         |  27 +-
 .../fine_grained_activation_offload.py        | 603 ++++++++++++++++++
 megatron/core/pipeline_parallel/schedules.py  |  14 +-
 megatron/core/tensor_parallel/random.py       |   9 +-
 megatron/core/transformer/attention.py        |  70 +-
 megatron/core/transformer/moe/README.md       |  14 +
 megatron/core/transformer/moe/experts.py      |  65 +-
 .../transformer/multi_latent_attention.py     |  40 +-
 .../transformer/multi_token_prediction.py     |   7 +-
 .../core/transformer/transformer_block.py     |  10 +-
 .../core/transformer/transformer_config.py    |  43 +-
 .../core/transformer/transformer_layer.py     |  56 +-
 megatron/training/arguments.py                |  11 +-
 .../golden_values_dev_coreweave.json          | 110 ++++
 .../golden_values_dev_eos.json                | 110 ++++
 .../model_config.yaml                         | 139 ++++
 .../golden_values_dev_coreweave.json          |  92 +++
 .../golden_values_dev_eos.json                |  92 +++
 .../model_config.yaml                         | 134 ++++
 tests/test_utils/recipes/moe.yaml             |  10 +
 ...test_fine_grained_activation_offloading.py | 187 ++++++
 27 files changed, 1856 insertions(+), 61 deletions(-)
 create mode 100644 docs/source/api-guide/fine_grained_activation_offloading.md
 create mode 100644 docs/source/images/fine_grained_activation_offloading/offloading_and_recomputing.png
 create mode 100644 megatron/core/pipeline_parallel/fine_grained_activation_offload.py
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_eos.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/model_config.yaml
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_eos.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/model_config.yaml
 create mode 100644 tests/unit_tests/pipeline_parallel/test_fine_grained_activation_offloading.py

diff --git a/docs/source/api-guide/fine_grained_activation_offloading.md b/docs/source/api-guide/fine_grained_activation_offloading.md
new file mode 100644
index 00000000000..b4c2ea753fa
--- /dev/null
+++ b/docs/source/api-guide/fine_grained_activation_offloading.md
@@ -0,0 +1,29 @@
+# Fine-grained Activation Offloading (collaborated with rednote)
+
+Memory capacity is more and more important with the rising of extreme sparse MoE models like DeepSeek-V3 and Qwen3-235B. Fine-grained recomputing reduces the memory footprint at the cost of extra recomputation, while offloading could utilize the host-device bandwidth to achieve nearly zero-overhead. Fine-grained Activation Offloading targets at offloading the activation at the granularity of specific modules, so that we can calibrate the amount of offloading activation to maximize the training throughput.
+
+**Features**
+* Support PP=1/PP/Interleaved PP
+* Compatible with fine-grained recomputation
+* Support FP8
+* Support MTP
+* Support mixed dense & moe layer
+* Support A2A Overlap
+* Support CUDA Graph
+  * (Temporary) cuda graph scope cannot contains the offloading modules
+
+**Usage**
+```bash
+# Enable fine-grained activation offloading
+--fine-grained-activation-offloading
+
+# Specify which modules are going to offload its input
+# Choices: "attn_norm", "core_attn", "attn_proj", "mlp_norm", "expert_fc1", "moe_act".
+--offload-modules expert_fc1
+```
+**Compatible with Fine-grained Recomputation**
+- For modules with minor perf overhead like layernorm or moe_act, use recomputing to reduce memory footprint;
+- For other modules, use offloading to reduce memory footprint;
+- Make sure the offloading/reloading could be overlapped with computing;
+
+![Fine-grained Activation Offloading and Fine-grained Recomputation](../images/fine_grained_activation_offloading/offloading_and_recomputing.png)
diff --git a/docs/source/api-guide/index.rst b/docs/source/api-guide/index.rst
index 710a7caf4de..ac6d7cb0b2d 100644
--- a/docs/source/api-guide/index.rst
+++ b/docs/source/api-guide/index.rst
@@ -22,3 +22,4 @@ API Guide
    optimizer_cpu_offload
    multi_token_prediction
    tokenizers
+   fine_grained_activation_offloading
diff --git a/docs/source/images/fine_grained_activation_offloading/offloading_and_recomputing.png b/docs/source/images/fine_grained_activation_offloading/offloading_and_recomputing.png
new file mode 100644
index 0000000000000000000000000000000000000000..6c8afa78bb180a0815aff02693690b864e9b01f8
GIT binary patch
literal 332427
zcmeFZXH-*bw+4z>P!Lfeq7<Vby(!XLRHRqwQkCA3UX#dHK&gWCj?$~4_aF$N7b!t{
zj}RcVBtRg!aesU7&vVWl<Nm!r_F#~;Ad8jt&iTw|mUm9xYilaeQZrFgP*BjSC_mMq
zptxK}L2+UI;sx?=euPZFCLhjt>L@*-C?8~5C;#x)#z@6hLxX~se14JQ{F&<%XMbHn
z{-QX;L~;Jla|#O8GtB>->zv{K>l#W5iZBO?bAMf9Og{cPRsHqy&+%;bng8A~oAU3g
zFBfK?{rmjFujimh(_@$-A1=L8e(p&@A>8)s`wX|W@iYa+BMOzLkM;b{Y|c{E^DAT!
zcOzcEet(CTdP)Ck)tyGC_Sct|nI<Tx!+VPhos3r>?BX{XH#RoPUn2RrZz8KNTrPak
z{?_uwgRA#0UI#ore?V%T8BLe-Y$_=ZEJk?^1fl&QcD9YhB_$x%g2uqXRMC6qu0EnT
z^Y6>QGWZt<|C+(SRPZko{>y~_GU2~W_%9Rw%Y^?j;lE7y|6?W$b~q=NX=~l3@V{{y
zTuPZ%SnI?ssCy#nG*DHsshXO*>~>Z?pu3UEUQnOyH3j9xJO7_A`$AtvPKK6d26K`!
z+`4Z$?a?a3(u!t?!%{z)E0r(axeuq@JiF%mV=c967$&$X@v($e9!WoaLk0}g4XbbP
z{pruwLY%BzG|fBUU~vWYfi~s0y(p*c4_X*}wN34K%F7H9qJVLHZo~nRveQ32i?6jn
zLesr;Nxd`q`=Db#J%hNusTQooQaS{BbJ;h1l%YVg8KRRl^z7Sr5pM(}`3=}#9ZKBy
z%=GlwOeSGNB%iiXO5&O2BSNR74ytuGw2oYdT16G&gfI42P!L`pnbkI|@q3l3Ne-?!
zDyhoCu#$PvW5Neq1**@k{8ICsf=8Q1N*2zVE*nq&he$76l`!&dwDK-pWd0EJP=_0=
zmXd=bgmPY|{O!)06#8R6={x6Zsn$}5d6E0GZ9xY6q*#5|<xiTKr(>T4kFfg#dls9k
zpl>snd{v9w!^R~Yg_;=5{>E}P+COxuF??JdKk^M!Q0NL-7X~#cF=c*swK#Bg8od>}
z{;}mIrR0#Lqz`|WREH_s2T%BeYEE#sJdG}h^5SKIwxjXk_En_CclV>4?sZ@gtL0&q
zNXtF|u79GstKUfgR2ax;x`WSO`V-E<>6GbBt7Ex>zfnB(jh#{Sy>p>gCk+z1ZfoDN
zY#e+43npJ~QW9;N(}up5ZWsH%n@vamIH3xGMrDp^yr2PC6x2J_jd06yQSy@{pMUVY
z&kV`lQ=%rJJ|^*P=e|2Nqyt>y1X7s|p#cTFayiP9nT19Z2QuTh=OMs*?rDwpK*JtE
zFW%loUr|rwKWA{UzfTesp0ae7yd>T>$Y*9DaQAF8chs-v7}j_(64RIV-bpQl<cW@A
z2;{~Y91?Ij;kpcPy=Dz}-#^!8^z=+Qd#UM$U+KXD{qG<@U|2YlkohVy^l#`}U-f%p
z$(mL;1L8=`n+yo1La*9|Cvm%efn0?$D!*QZKcWfzRuvFfv(*=_#p^2hLi)6Y_?Vic
zR35|UT9&MEOsDY;-nhXGInJdQ{jMOue&xAu-@RLrf8YNv`)X^yw3#dE1K)oZPRDat
z!TaRS-PdjH7hkcyT>aE#l}DBJJHx)9K66_;g)-*ZwZEc|V4ia)G(H5Mn<R>a3>+g~
zTD^SolekVFsBA}LN+wxn({gk6RU*_)O0l2&OUuUbm!-7u=OijFi>ERMyVji_dPome
zE%MXbVobV#mD`Cb!Z-gHodt(5`n^4O@&+WN{fh2ud&L!sJNJ>4rb<Q)e!E8NU%7q*
z$kpyUk5D=no)-TT83kNC<Xm!*imZFnd(60}GC-N;v?JB2#eg^ba6^qOL8HFFe$S>f
zCcHo=sN9lWe7t(Tk^zz>ee|*iX;WCeekS2AdT`kKzc>6#7B9-rmlrn#hP0C&B)lcd
zvp<ajdCfrZnT$ce&VK;>*D8dmo#SAF#s4){Wk$az0V8~>w+nrVLw9l$D1C<!c-|8}
zt=vxf7e_*K!ay_&=n*a9^^zmILpCX*SAc=eFg*pPL1$9zJH?wu#(m|s!*t)_|E5R&
z@Yf`#Vp3YKG^0&uiq^TS$$@_Y?c4tWn)MAb(E79W!u|%ry^E$6HrpB6u9fc)KU8*0
zdLqx0oRfw-c%UY)vi!5aiKJ$sMd3Bfy5ky@=LGnuF&}RKwt`Pn`|$B~1Q3AK^_w>c
ziD5NE&&w~fl=%JpUSgiFS^khCBlihV{}5Y84XUr2)iG78m^V<*zl+XHzmfGfeEv~(
zw%Ni!?{VHz-<Cr6V=}dcUt5rB?^OsvsM5*Z{ml_PvLyqs=dx7g#$0XhmZV)m!O+To
zZg#Vzjp#U;5Fp~p9<^-o4OEDzkNo4;67NU0(dci%y-SbryrjpS*TRLk1@*ptx8>cr
z*y+ho_a0Ebipj=X=x>%C!l!mx-Q2S7FRAIRjfaCc;>#Gx;y(8Bi}_1deUt}xg-mw-
zBx8!;afZW8PU25>+DTKasTGfFp&_3Aq-7~Fe5|^Nnpm-PAv{7?=@ZNER{oDulgSo_
zqua7Xomzvv``53FZVL&}dMOpSO|g;9?}YjbUGlt5UwX*-lMg-1XBFg%nhD%N>>j#%
z^;CR8rChXpZ4gJJc^GnV4@kJqq(s;*vORGM%QwxfbkjRrjPjI@M17jW09zRXH^%9T
zWgaaqtMVUp#mfWgbkd8HIky;Y9VxHE`E~LI0=pZh^Lt*M^PqmU*<GY!Miv;&nd1+3
zIsGD6AH85YGgy&7Vm0&qoaVDPP`Ov1Pha<fcrriLd$d=d0JZV@&9XWC3jZ;Sg_$Vt
z|KXJc85hm~4Bn12Oj?O@4OU-!^kV${Bvt7#TlSffdkbmdZa0&01qmop`BGYR+V@(f
zpBBS-jo#ds=O?vj(bBl7=R_-uCnfW(=;BKl-CUwlsk}b!eE2+XzJ^i}C1QX9sFQm_
zxnaPI*X%S8clbWe&hG2~!mhITFO1!M^4bb<wfL8ph!AXR^&UY|tU*V}Pr!f7)OLcp
zaR}oL%3op)?_g-+wd`5-f5eghRPL9uUZ0onwUYgZMaZiQ?mMsJmK#j-4_@!Yw9&JT
z2k9RHBjP3R$7zZE`n$F-l*}c^c)s$C`?p9)Ws=nTGC=aSW|7vO-SI2C&RA@4FrG+f
zMobuMb<mBP!~t82rQo_P>xWX-v#e=P+<)h?U!T5ayr7iY6v(5gmfUB;pP<35dhLH-
z@~7m#L)QU(|GX2te!f-@Ku4qJS-TfhwadkjbiQ|1#lP;#%EypD?V<EFNv(VKI_Dy@
z>~`^v(8ntYkcx0j#ST<SMWy0d4s#xZGdDefjtga<4ib$_61MMd)Xmqr_0I?VC4?I)
zZQzj@gSW>yd@)@5ZhrqK$oqBO9nLnW_WMh*e;d%n75bg?Z~@A3u!28S_e67d^^v5V
z#y-tqY;lj`FYsKvxLMeYI=@#^x{vLmg9Vi^-CscG6%!~rY3TFaMH3{Sjue1id{jQ=
zfj3VXbbj!z)A+dff5@=%HQ}>GD#fe5x55m+=;SZn{_oj>N3fiD?xTM=@XoN-!ll>|
zIfFh>Gp==gtCi_zTBORK_pJi>MRPI+Cb!cu%PH=b{19^);1g}ZbEuD!##e7$T5Y^>
zB#*o%6;JkGR|c}V|F^Y!a_nTKrok20?92AV!{hHV|GzxVFMRnu3IUw?`@Q6-wla~F
z>GPae##L}-BI#NSydLMjw>2)>MpyLV&(+v+eW~H}e$>8lg8r;@FP`~Rca{FS|0Mui
z*+xu_Tj~EICD|gFM%0PxsqGL65Y(i6tMdPl@GtSyo8G~dRNTLNA43UIb~5a|626tR
zBo%<A{R%e=e|T_A+j*WG1rlvO%`!LvT$gK-EOlFmn(m3@n7An1jZg3yzfNPNE$%`%
z^SGs2OWlBj+OAd0;IUz;Xisi+ld7c8v-(-M0ZYZ<kM9IEY$~6bE@0Mp5y@LS5~zhl
zM+qrP5VIwoFZ!J5ET*AQ0x-IpKJQ)2H(E5db0_`RPrCxJV(Z{>HwFYRq{8U!!MAR8
z$g?A?Rg{@rj-TF(<^pbvK=8?PTmK*CS}=pfQ&<PWki;0g4?yt*l^&Nb6@*V#>=j*f
z_(sX17HT3PS8x;<g3p>342c<?vn}?Nb5k3a6G2J-Q9KZ*D%I+fu~^{vwnR};O7Tc2
zBL0@}G#atbfO)6>4UJ?6gemmK&T3_kxiow`l}4rWAxCs@U*{B6iC5q{vhxMk>%Jb(
zBvle&^Myj9a|P7#m5@NlPo*nD<KJT*H#y;0nz%zZl>|?d8O&)KS1V6__cx4Fmpu$&
zFMFGYaQU&tZn;4g4+k<-WnB}7f##0$Pr<DFF`YB&u+Q!DcPlD(AU}{PGoXO*rQEz>
zHH;U1)VlY~=&Snqex&Nq^YG<^4fJ!XpAjulQ!s;Pv(pQ;nPoeoJG?=((N6cD+<dvj
z;#40ZOh=Hx%=_pc9y``V5$3WZ0B|#EU<5p;&_Z`YiJU$B<FA&lPg<J}<cQL6?!=_d
zhn-tHy4AZsPztM#p`L-~4X25mn9%w|hOI0@5ZHlg*h(4>Ub++24%9o$uXvtY;=qR2
zm(g|yg#cAmRN#g=Lws^&uhNF}`>pomoZZNNnD}X1XcM+F*n#Jh%a#Ct+G#mBa;NVo
zjv@X*I@pk3!S?jdf6nXY6PcmQ&|4P_*w^XI9Df~Z=qr4Z)GnSA_`^qIySXJ$z%>ug
z%tH>_)0bQpE_N=4R{=&jacA7WmJ}&F5GX(*C~!%g8t5Xx1UZv~yViL({4)box8KU6
z;HMSaL));Pm9$WxsN@&i4??Rn@65FZrXIp<AYW##Uv~R`N?fxLp>fG9Jch5)bPV>n
zE+SyB@rEiDuV+OfCBoqlZM=89Sy29|k!S|fSIfA45)>koO&hzNuS$1OFQ|CW{A#yb
zA5$O9db1BcAS4zf4VbiW*Xqn*FptrR8`5gcK8#$Vrv|mf@%dZGRiGxpQDVq<L&`Ug
zM0u;G5DzgHz><2W+!ig}))>)`>$Xfg?LEQA&Tq7*6jC&>;|Tu{xy$<a$HmjA!IFfo
zdI1D~S0yy2##0ztib@9$vT~vZvQXZ@=J#tZZXs~K$aU%F)G9}Z$mxaVa;Vj;Tj^A<
z;qY6}Efd+k{YFUOoa&~$N0VJbSHR_fN{X)UF*03NamW}`0!1x}woVK`1U82;A!8*3
z-Rflw0=q9=qE&8DOwrbdA<D*o(rN9MCf%}{e`*grF%~5fu(yT-Eeso*+7UfrygKF5
zLGr6}g@<3-kyzdDiM-Vn?W4OjqegpY`ILCg%MNWzU)=*)@3}^y<>mvXo&jf&b#O~A
z1^!co^QMRr28$@b_6L$_0v3Oi?-hJOu7&3_>PwSTGuMT#)8sw5sW?y{`l5hFz2(l$
zL744&EP7^(x~)6yJI+6E9?tNb#hU%Mh_X%QB1>t?j)s=YZULq3o=Gq+u(ehzJq$>-
z;lJ{fReZ$GSG%Sm@3L>f={^9KR;8Eg>w#*Pel@6*Tg{`_mGM|e+c_sWX~m)plDK58
zU_U4C-^w%BH^RtOvch~Y)pg{e&%HQqGrjSnN{k2zfIiROv}Q$>Z1MrjK{|2L+$0nC
zSj)<C2f3!R6XNFPN)I$Ffy3p7OL7YvsjTz%IA(=;EP9#=pC8W4nk}Bw7)0;Lv%ctP
zZ*MNBPpqYVxD3|YG}_4w-3`6%Wu~#95Ka%sIa2MBHuLt&+Kve1S-0bj*Ku<srv#i+
zcA%Q5jeUP3`(yd?CSZ&@!7TX5rI%^?Pek_I7wH$@hihCjv3r8nO~5OSFEf+sbXJu2
z{i0>d`sRwW_gn){YkG6Hhw@#toP&V6x`U!=f^CHKDwo>@ta8?~*!_3CT-L7?_3e9+
z58V1(s>Etf15Kp4!eUO-tA-jnR%DL%o44K|h^(y+w=T;%_mh!V2DO@ZyWCRFT6*bi
z^sF2mYhZ7U8#ZWg`gqHq7e?2!m8s7}cwm3|wA)IK(_glyhVBJ{4oGL~&FVnTwh{Ij
zS6Gw4RanP7q*HrNtt2M%7L(zIyOSuPL-z+_M*P|o73hWKg_g`y(`zQc={+rEeN1Yh
z=iYJ`(~fWmL3euNJuRuU3mY&Z9-^LUTr%czYN${M<(rtOa5|ylIawI0<{67NJ$2S+
zwnAP+>YryexASMsdAXNFH>Cnacw|5lZ#N9iPMMZ0=56n*wm^$KP=LR^_^%l5#-rB}
z5q&h0W5K$lKTw0|5ezWR)UgwKeWXiLTI|;B8~srshN3NH0pIYA0}b!If)<gpC~h5;
zw83^@amjIBQo;ERY-jseyvNHCxZd@RZ%$5i(rV%{A}hST_<{1)#g?*6mW~$Lt<OKx
zJiN7Gbjl`!^4n*(%Mun$6{4{lmgCD|fSG5v(Lr*`HmWL0g*kV#hR)lr(8jvE_zbii
zHdNsZLo#jcA;&BJS+8gm_!EX>iDw3$b}xj%QZ`dC*fx0NjZ~Z&J^D5pmiu-!>q(Fe
zARy|~P{SkpBdH<6GW3~x;8*v#=nA$JsTKv%Zzox@SiL=io$<F~@a&gy@^Vjwt!Kqu
zl9Psg#I{2Auh*`s#KE@bxu$;{G`#iHY+ZJ)Ur{~00)JT&#;nsWCZUC*;Mf!oHJP43
zssr~_7yXUlvUUgT6{f8pvs5K6l-YtjE+Os-=?+$KA~&&FYBNMh!HVaekvA*~Ef%c?
zA_6((txl1y__6|LmG|f}JAY`WBQC6oHb?Kt{L`N-r2&s@tOi#406CEKVVv#|p=85f
zktP+$D{1^pXdZv+P>jL=z&hP4MQVWULSzNcajlz2vz2}l3YyAbEH{d)(x~P0iF!4=
za<OKpsySLaZFAKP;TJ8c7GsKX{a8MZ`i}RHDYboCF*hNkqwZP)24xjChSoz>#^ZjE
zLSMw5tFNzbu9jz&ApU{6QWnE~42h+QLNQbL8m_1kYZ8YKJXTtCMw4-&ou>QKHMc=8
z=$m$HV@7~_7*;@4z@t^4RD=02r7Ot#94|f_qTUE1XGmgu1^lw`ad5LKtKR0>MghzW
zfx;~bYH;uz`eeD;z`IK-i;lX~!m4-banvR6sp*$lkKVKlU3@M%B>tH$Lu~xqiIWzr
zR{bM1X9KQet*H7_8Lx5wL70QkPq7CL52Z!SKon_lT|xXjFOLwBZl5UUb>AlHM>oef
zG8smRa()15A&_m(KE2@?R!5JQoV<$N58LJBzD*Q-W=>t)51I4>?}wEJCejtgR|+5*
zG0Aug)<>*GXf^#KMuIQNoM(Q~ur{DxE6@4OzQ+~qUWiq_s4IBj;BEy&W)gPVQvtNs
zYa@MB6mO-yc_v}np)2&KlHh9c%4+Y?_{x;o8g9tl|I~v8eU_IPGt9_qIlo^~wYZTi
zllyIX!sxg|zB}(g<L%(rlh?h8;qk!d@5?@aJVJBs;D%8T0UuZMkAhk^niju#>Bxnu
z&kRA?6}D>w<e+7fDr9OX_s$rp`<}RKN#=l>NYvL~ig)WW8v2iSDcd$~$qECTJK%-%
zam~LMuiGduy>fT=e;FqA>g2rJ&oqaj5fRrfGwJ4-nVuupP7&4@W?kTl{8fu2Zl0sX
zSnUGCq$4+8H1s9V_(H2ua_SSt8<;ZHXkC$-yEe%PH?=z5P++Y}5tna@Qne`WeHlY4
z0O9p^pA1rN;g)eLM41PFtT&GDQ=ZL}$=>_a9dt<9bt7E+I98Z1DIZB64_fI6BkGq`
zC`=8HmCJPqPIzsX*sF1P<yxpjk)qf{NTfNjH@cTeiq@$@nuURzh`8&C(050gz0LP4
z)1RO<kTRf)Rp4I$^OtD7U|ZoiQOm@>2Gv6uhXKDbrdw^{&Mp{c+4u6%cSVd(CxXa^
zpOH)ly$lYM_?#2en;loX>Ims-g;q@cI57S$$u;kunDjzrelf{JcBUz=_N%`H6}JQY
z!HjWTtL^azL+YW9<CY>@p0ThWK367u{mSKX6VxmxwK!Pc7<>jc<dVY?hQW%&x+I-C
zXmnp+->Xu(!up~6KIsxR{$f%seBLdrx==r6KNT>2j^}-nUB(cvCHh5HkOjPP>-s_z
zHembF=uz~>R=}*R^^)h|GL>Tq>nza6Ng+$*Dagb`S~c*IO+U&g&%$t5N?3@{tg~(5
zI4yW9`tQQoui%*GPFrW^Y6#0lTgv!AHO9hK6Xo0b9(z$CE4s#^>nJy0a%G@D<1Qt;
z3=|N==>mq`7Yz~Q*BtHHNXuwx72zPYkEew9_zZ3AJEY=7Q<-ku6pM(6*gQLheD8oi
z!^2n>YEDx1d(=tzG5JQ!jn<h<e**qfqkn%+f3d$}4*u<S?Yo*XCn834fVs9`iHCJv
z$dDkv&pK65*B|iei_n*ld=!lml5XOQt;QupoK7N?zQ!#b|7IX(Rpe==+nss?-8XC<
zsvtykzd33%%rSCu&tD&9Z1=D|=K2%_{yzWm(bb+0-ht9->J38aNwosgpWmJmRy<1N
zl6(exeABUN<<*#-@nf+uCx_`5>nMu6E%8ueOmV=veEGczcw#GLG^W|ZUkuE<b@RYv
z;OR6z^6qi0nlz{XEl)@dL_^4%fU0kX9O*+5hp&5U#_OvXTnL{W52s5~e(o4=1fapI
z$;TaO-D>Exiy3c@;1EOfkE<0H+NUNw(Lg^bNb)B&>XclmLz#9uM37c&B`XpwtF=61
zFe19TZAYUqpN|#OYPDdz>7R(aHWLZUvKsi#j%+Xi?+%X2RlE&k+`u&}{kJNRUDw9h
zg0OyiuJ>xaXKo6yW+97j=F$Ou=MQQUZ?xOq303Df1JC+1-h-@gw*rPt?&T}TJ<RQ#
z;ikL0Pg)4^OAo1iB^s*Mhr+iQ#I<D}?Y>gjyr2hozW=Dx6Hu`mc8!blM_(WPv~N)_
z_}T8`PC5Yb{7sT&6jbu0oLZ>B3{aASOSN1Ib224OY3Uvt(jJ7ojrWS9K1#X)E&DRE
z9_|U4<_(fRoXSH6u4Bn8<|PXVC|=U%39&N@1wWF;i4b0!6i#_>=~r~`LM8~MbUct^
z30-&n5WFrV+Y;Dm+}WJp<k004-k)BFFxtamwwyjb`g%6O*ZL0JZ31fljrF<`4*KS(
zdWpnfKG*uFvS`Ux%^liuYy?mva*#S0=6tob@uqlR2JDKJf;|aRyVr7CEN?mWg6&P8
z!VSM?*|*aIP9=D?^VKFGv1IZ;Zg=EPc*W0SbEG9jIQfpw7ShYhrP1p~1!ZZIQ+yW(
zs}S_A+~G@VjJ&w+w>W$qP<uwn{dwstOUNYz^4oJ2L7$|zar2toRZI$U<?p)&*Imnn
zv8TECs5v^V`~lHBxUBuut`g)!>)AP<Q1v|5K90@#w8S*LFFIA2DOBLQBx(}>bsxvt
z`qCRMQ#j`G#X%9#O3umcjU6wm0d5kGw~qW^OOW>uy>fbwq(g>wPgsxnJF@@>>@<(p
zbwc1=P;Hp<Y4^UD`u9(LiIc=>o^^TGx|FbVCwtDIbmgO$AK3%?Ulk$BVAwFN?@3eQ
z4<v!(sEiVEfeO&*jvF^5PeKd!amgTjmVpPUfGP55z0w%+N#IoF7RcZ*y{$q{%c#Wf
z^%7@z{J~{FfRgH0p1wn>4|C-bPu<&}nl)|gL&CXuf}%L$PlltX(P4U=6<^<W@o~6K
zAg^^nqD7rz($PK5Tl=aKnEr;7!v7X1$)KiYkT7x~n%tR*6~-M1h~5{Wo8}sPEgk*@
zE3$gjm1oBjH0o^Pf!G`c*zu)qB(wM^n<xY?`s!v%H*lg;Pc|dMDs*$buDj)W#E>g>
z51tRL*v)_Q4D3uEO&YK7@ev0Tn|m8{g%fA49eFC~l7r&b2O^>k&V}Xq&1xcpuy8z2
z(946S@CXKMrT921-^+<s{dr7lxd)xT^NgNK$9+;`g0!%f-q`dAZq8*pj$9drOUl~(
zh;jk$oEP|6i_Mlh`N&%*Z=F|&d%mq#5tFt@%vPSY>V{}pw>E=RHCms^t}LA@Z@?aU
zr{dqKS2@NENt@@?8$ao6Eh2J-JI)NFKhke_#!mW=kQrJ1Mw$-@3(&ur3is92{DQJ`
zQ-b`wFH#0(^G*uj-*t}iJ~CX9%RCPKGQ3x`M=bOZGB~{Z+Nd#~x>?^n<r5R=V8f+a
zjrWM9sV^u$QC9K1eCuFe3!SEmN$CtL{S*;r#G!W7x&!n$G`5u0=nO5Eu8v9{8T*j<
z@)syE^J+4vblgmi@u}(@Rnj4As|RqY=9|lx6>P<~)2^#%vzEchhCMdy-RZm)EWy>0
zlovkSki1VU^~?AJvf@*su%#rpo1Clf&q;cm=hCz!_5FCO5aWF>y3hQLVZPCPJHp@w
z@Zcl-4KUB%goN&hF>Dw^tLlHo<co9MuIrlnvw)Q4`DMtFwNb0)iD<KH{>$y5;))Wp
zdD7!$V4=MUd=>)%DU@PbQny+4#6+<Vc4LP9rlay$^%p$Di_LP7V@~(SMLe-p`q&V2
zx_A8@jl<ov{xFOpK`ip4aN(F|X^xBsHtGi?7UypziBW%!;(7DCr}PxON@%ZFW@K#g
zSzyWcwraY9MriU9bQKEsyZng&DX5C3hDphSL%2~(L5X@+m~!oipAzR7^iyv=y<4OM
zP?UFV*}~gj5z!S0nNQL71y9M#W-sC379$5VkBQh+6p!Y!`iBJqd0xVe{hTg<r=b?l
zkA(EcWT)mTqQ>-x2FD+F%4L@~o<Iu0ZSo@TYga93@d4lk58b#qGo`%NqQVVje_8IL
zwQ)W#T8%03w6L|+5Y|-u7iF`|K{cr{M!v=n>XRqe9O3(#I@Pj|3QD_HPDjE3utqKI
zlz1zr&E`T(NQeVom!Pz^8?;&=^Df)+Yjv21d?L}NVbV)gHu0uyY2UnyXFxSubx?8h
zvfdygT@$Cpp^Wac@lo6ccz;$(&wOLIri0&y_n0riwF<kVD?KhY-Hwc^p4}oDTYe0>
zCbROvSZj18w3tx&C~e5zQMvDnPTi^w9+Q{psTsJfT?Wb9euKZ{>^9!of%psqhgHmk
z`3bl)^LwB2dBY!k3oxvB=u_UT4DR;f@jlH5HnbXu!-cTFe6yx^WMs?nO(_kK$9r;u
zwWR=p^ZxSAn>D^2BB4}z<;AwApiT?`cB+|x!E~7E6rs0nr1rq@VPw`DS!L;`6O%3K
zpvIp|z&@xnw$QbR`7R-@o*qpoJ<0lL<xNO+b=<IAT0d@Xbvky7y6RK~-SH^70zhrp
zsimx(iN#f@73CUm;%3RH&ea)$h3<8GZ?7rTT_ni3eYR?V7S%TF-1r}v_47NpkdchD
zZfP%NJ|Wt>v8y6AXA!K%bQ>d@F-y!XrxdEC^r%6>^knlw57_a^t6uDNv=i@I+B4AS
zcN&NcUn?~t05boR(Xk1+kH9<EL$Pbv*kkl4j+}0ft+p@eUgw>CYE$dXQ&$!mI9p!l
zz1LEAZJrzCBBcsYpO8KJAwFF2tF|}<+d3;Y+J2f~>Cu1$5uUVx^2*QAuQq2QCL)fj
zyNIuQKZ=5|IL%hPoO9TvH^T@%zK6JZSo6Fyp{7E6R>QwyxUEM_3qnlk^-2jl_G-@3
zk_-h^%jxtH-iQWOx?qP0wULa>EGEFZ7`+9WRwAG_z=U%iB%`CGiZfv!_4CY)i;LmN
ztbqwa>jtKmFNOE4${ih*@Q(VF8XAbq8gXbEs82{A9np-WCeq|OXO)w5I+F)sNvJ!R
zrhUEHT)WeK!#zzwCl|S-NV7sMStRSm-Rna;+TH{|6AJ?@zKEQ$H3}p(LWPdfJ*PaW
z+-Nj8w%@3c>)htDq<NjXuh)_AI6U(avN=JtYcK(vZW=FV-4|aEX_`G?H_4Gr2)dlA
z5ArA?qYA4a2j4qMsz3EK#l<;kBJ5wCO0TaFVnkym0t+kN98C@1rpD)yvX)FcSG?r2
z;JJG|pgXyihPl^~CgPCI)5ScT#-tdQr5|*NeH1hN0Ia$ykKVPObJCleR!XWi2oWML
zRiAVcdj*i6T2D;!F03;j+rLSsoMTdnoX!e38dBSRue@9C5oV+HBL->qOn6IUv*}qD
zTA3ruigt04gIq0ixrXzP@NnU9p~++Ttpt8CQjv@}_dS~9;ViA6cR!;092}_zs{*TG
za=rA}yK8<gz;D|E=dFVRo`C5tj>V>~Mx23JoE$xKw0X#nvPgOF-&Ex{b>OalH7$By
z*KNu(P)CTrwL7FhJqCG5Zr8E>W|p6>D0-}Mur?Gvd_ZjAP2cI2yW{MDA_GhZ_r^`t
z2$#bKxpUu}nTc?g7v_DkfA?Ktb37GV=k`zgB4YU0))Yw}*dANLuEy?YZicAGW<G|F
zL;a4A`53DG4=!etP#Be-DMA{MbDAF`KY9;{vI?z>QO{nndT-~_-v_cqdv~B5Aju9>
zD_gry6zsvCb6eNB^p+%jmP)+^*CJlRsAS}RG)(-6-jUW2tad%53{+(zLq#jFuu({N
z)@2`}*sETmUNr4dI%{=WuDvH|7wt(^k@z35WYzQiG%Y1ZVBfG6E2oMmK*{n&nCEJ(
z)rxk9VHMpEFOKs019pVraa_#fBJhh*9D7oL`rdbomHm=G^SdoVnN>VMy6YG&qD~JQ
zJLKXUG<@4ihnQMC(FG98=Qt7?=Msm3x2KZ=QxfKe4rc`UOv*N8v2r#UJ}E-HjdEW4
z8XGmS>nDx8$vde5=2NRVN%=jl#lfv~cUC;#@71|5HM}&khsR9~O=!C;XP*@j8Qb=b
zvZ|qr1dDHYvhviLMb}^kROS8Z=g6g=2PPHFd6@k!FH&3*>;Gu__;U{}<iUXr_+9gO
zbA9sn;N`&?hf&4i1%8EY%=~q(y?SNY%r0qqv?>p-@a_66SKg;viSNC~njQ|Y&W`4z
z3;&p;4K%;q0MwRL@owx0oMH%UTkCe<y8nQfgq1{yEjW7qs`MorDboTyx6Yo>xaO=v
zQ};Yi>N4YTjjU9n{b-{LvrlL*J(t9c$TBsVJZT~NE{*2P>OU?$9rZ~!a0$}4Hb!xS
ze7<u32OHB#R-hUsYvRd57O;OK?RNP!`MwRVOMs*x{@u+e|09xcWssmQF++wv<@-l{
zEqm}Q^9_Z8>oFB6H^mFrQZJQ<x!j+qzmUs66z@5jbDIq|p-s*iou&nK$Vubh)bS9}
z@TM*--o?lHl!XHtR(Eu@=x;QT10eHX0g%V^1CPSRk{@BtN!O7WMA0m4b$9UiN!A`%
zHH%yVUbGSJf~@GosF-Ik9-S1AZDlQc5TYe~23e-(hlEM$16^BFc6p)&W7yia#2p<1
zXLDt4`kSN0=mBG7sh?u6$jO)2(3JwW#tOyDE@6ybg%Wu!sG`;_sj(jh2e{sg*ckuJ
z+tXru-*&WRi%y;C^=l#gi)RwzhG3TS0KAKt@M0uc^ZTts%qDPizoNK^|He0u=z=E)
zAhN~Q4CfSic0w<gd7vVtW8@tO+O`6iTQztMH|Xc!2G<ksQ=m=|(v5OmCAj&yotYB7
zm)&!2S)~-{C4h3jCl?{&dfys9j!EBVJsxmu=w`|I*3&%O#}f*0*uN2bGTv(oyniaU
z&};0659e?5?j?b9)m3p+`acWf|6mtfwxGVuA$?@&f;<36?Y!w$#v2d^V})Ux(02?w
ztqa&#;i4ay_Y5<NM@>q-k&*-_$8>0Ck%0enz*7_3_MKBUn=4T(p8W4AG-6vKx@LKV
ziD?N?^<4F&&AQ?pLpFAqr$=+;9=YPyh4)aBKA~$O28&dBmPLK%e;S#|uMpD36U%S&
zeaSu|N4$C4^I+4J^oK51%%gIC*IGLY1QJ$t@?78ad0vHx=VN0|75$#aPMf_@N7tEX
zfjs<f3AR75pMtFTgE&KwA6y%pNe6d*O{~`H4h%(|>YlMoTisBTb&YBPHIKEv7c3e6
z%DV~l+(^@zKn@k}X<LAur{>=?yqY&2B)FNE9yiJyPE9ns@v!^IZUnoqKWx^2?OAG&
zTJ-S7%PjHQ{lOweCcr;?o8*=ko)7ypd)Bi-iZA#niQd;h*|F?kS)CqzwxdBDt?TwA
zZC(=hK0b$%bw6l13w=4SDOA`l9i{nYnAijVv>y`b5>T<xyPok~Wd1eCrBNMl2qG8f
z7MV>?E$C02*pj%@o3{~CEJ8w)i!odBJsIv+YMW;#NS{Wl39p;%znTktc}2=?HYFHq
zt+@m{M9KC|e{GFboZ47YPO06LpAt?CC4eADaBlUFmW(S`^jD!7wv;vQ&|<s`V~nQ$
z93y}k*_(7IR@-lhR-2Y_D~Tl3*f<{Mg?CUM4K5j7Ka<KvxP584l_y6X$p)igmP72x
zfu}@jm(kVx^i0q+?KPdbNiy5;y}bPdarAZsRjW)U@gjmRChG~B4V8`-uYqm70T$X0
z5PT2Ir5ngDKmLei`WOLWw<&tYssF+mEq>a)%Tad`k?lZ;MI9wGSjc}@`*J!Omfl+1
z@2di0Kw-WX;FAa9Voa<sbV4<5Y~G@KHKAI0H(Ysqjr(?v7krVR13zeVmwPpbMcR)_
zfvV*lQ1MCiR+-k`@NrluKG0$EX2q_Uh_}ca2?Uetvwnh{n=>k-LqNsx&&H8I+jWfW
zgEejhY^NM4Tsy3_6+u9!z98RU6CpWT@z1K9pLq;WwYX%1>nBupc~Z?M7<a+Rf<<y`
z+V^CwE*L58NRKDOZJa+yAI+6W+_%ceDLz5V*o9HwTl4onwxH7DobPV=+>Txrfs2Ve
zuK=MYFQjg;dfX(u#x{;_pS^wcxpJ%DhyAq~k}epk9e?PwO{de;qD7l0FP{?IjOUqW
zq?Tu-!5I=G0ASpix-ma-ooWtDpojU*T`w6Bo&Mp?1JBM~HH^?0S|%57&q>rD!DSC*
zB-9zW$9+-@h^G#lI7?zP-DI4QNc6gfF~Hr9U9x!Aip0+J5QZKyAZhi2#8(?;L>!zE
zNQX5JG1uN!ug&=~-Bvm?o(r(Qtx^)DK0Vx3OP7hq>^P{|*c6eKncz8c;^2%15~9{t
zzYRkutxVwSO3^|Hf6#^1pGui1E1e)(6``*<_*}T$%7M3SrfTC*8#47op4TG7vg^MM
z3L6^-R)FR^HLuhfMvX&}RM0A74l2;1YNx}<g=f_~xYixr0m`M5tcW=@{+dEf!Z+l5
z(b67tkSrW~t&KiYpp2sUpI;dKk7l3WU`qHOCfY{J{gEvwQz_W$;h^3o+&R}vYU`TA
z5GI~fqMRc4WBZ%bUExn|AI!F@Q<z?1l-cDNdoUkmw=`1C@{ghfLjvQmLbc?U#rc&C
zX|wA^_q{%QS&x0K{6SS(kl_QodMren*?ui0DVx;PT_B8<EZNAsnexF+@Gu}(oqHPb
zK6#*)cmJI>M{sisrT^e@?2>q_uvrxtW&`Y0V}MYg0;wyi_Tz%wH@n;VopYNJ=k}&6
zaZY_kOc3RJp!fmUy<GaCec{=MI-6%>Pg;Kvha`c~k(*R{`5IQzt(wxHoZY%BHiE0z
z(+Z<Y`gS~(`cDKP_v5Q|Ufix*g<<9;QKk)7YQaqZA=5g6Fu4vyD{X%NXz~Q8F6mUU
zSLd^OaogmC+i`4%n(MXb$MQJ#p^gVU&7od>HUtaIDzV<7oZa1YyLPV3$AR>!mt0EZ
zJCT<dl}Z0Jf{YKCVG4vE%HTBoz(oZP<w<(jbn_Mme_6E-E!Q5ju%7`^U~WF|*(IA<
zyCA(>Q)l4qTN#@iX9i*UCV;;vbv^N>)o&Z?!)$G5c?IrY&g45Yzmm$g>i1}MFt+Qm
z{|JxmqJ_BemKRv`MZ_a^;wPDTmJI3D&5Q49T)J&o(tNUlNK_F7-nl|Qjod#T+%~jC
zgRK({tc5n@+A@NrD?Uu_bkK>jh+scFcTLh2_bK`Vy)NqW7(IK7)5rb^D-~zF18=3(
zXxr46Wk-yEn&3{k@<=&d;pV4fS7713^D3D40|;IEJ%+=q#WnX@ZB*nzhB8s)$9#BX
zbK0JGMErZ3n5{!5CydAu51nh|DG0LrS`{w(;#%!0M7Ymtbq4M^*s#eID4q}AVG9b2
z5r`S@A8Me@A4HH9fnzRGi}EO<v@(Xz+WxHw4lH{`*FTEDg+H%0Q(CHTc9xj%ZEf8J
zQIp!fmxrhEn3uc)Sw`!T_pZrVPQ<Wy?B^^I9MDK}5EfSw{hYj=Bd&oUy}avt49R^`
z;gq|3DIO9%uJ1)oRbZZFg0_;|W<P~xdw<(}`oRB1#Y2TUk@MAirsku|%v;fMu>6M)
z#rSdO-Im`v%(Cfp^|=fwr;xGe9j_hk-5%OCkWgoq%SP^<3Dff^@YGvW39kvy8NEvS
zanCRf{BTPEV6FuYUb>=&smQf{HfHko{{lej&=X#-av*(yU!saHW;{GZzJ@f)q&Q<B
z$z}OJ^baI7c)j=!OZMpchFM4mCejkJ=B)RT<!%|kjJ1NkaP&DW;%4j!qZolcb>9;(
zmzf$TbP-doyp=pxQ4}}z1;256711|!&v<@t8J6p9Ya(1eF5JwtX*23QUD~DLA7wU%
zb)W6)A<Y)JF-`B*V1jZZ@r1WeT&8{63m@`aCkWk#8t|R4#*vMQti}XS7NW=jad!F|
zDt*yLdo|ht*nDw*kq<u(VGUqb&wxKDgKmoJBpC-vlJ}sdA{|bB`hF^zHw1Bl1jr#u
zDY+9tl2fCcel*CIU@`HT&i{v{=v*gcr3*Y{XmDu!iQs<Pp_3zLX=4#v{>gWn{<xYA
zC1s(JJ+?eCN$6nAabrWu>x?%}9t1!}+irJQp4v%nrySNy0Rk8Dl}3@@AVK`K0tYFa
z%U<-!2+vYiN~a0_b~z!`0|}okHm`w3bY+grZ&_)^xhIT7Y5k{*BbN>@7*RP4q)sMP
z3M#^)kBrQ=4`+pSe#-UwmMS_nxo<5x_E%kS9F`g<gc#4lXd>XH2?M7S;*%8vW)AXT
zl<4KO@$4v(99D)_dw2QHs6@+xJVW200TWB$ht{XlxHB-#b$ZeF5u;omP4~ijI1su!
zmtyl-@=kQwY;-EYGE%0y>B@^|6EHvHEEz?h-}8if&F?Ox!B2;~K^70JJH!c&+P!Ca
z=aQP7xmmCGrVG1WBU@ol`K?=DQ7rOuj}B<YNlKmiXSXI5{YT0D(fP|xFT|>>^ZK2f
zQ|uhN84{{lmrHnc*I>L&$`!CxDY|J7uzxv@nBh5i{QV+)sA1r^*(0@@tjmJkb>VGC
zw>L#DtfRkz6z`&%B(kFN29hT**9oz9Kf*Tuj8(=rer+IEd(dQV(@#SiEvE5m(S7To
zE*zlpNUmC&+ROOerC@4{j_ImAb<s)uc^QSpQ|vN>61PX*^%#@!=9m^x(6wVu!NMC|
zuPdsO70cHOOxb@vESr2&0rM=88?{bPpp5W~uF<(ebRZk5l@umY=$h(pPHkBCHI-gK
z`vc==l#=iT4e4c*B1&JGNKu#3?_ZQ04>L^m@>yk$9z8AHcj~_KZNDHR<=gHl&2g2g
zIqG(C{bx(xP7Od#ntp<;?|bm0V>Xv*s&9hBmuoZ$ptpzl#DVX2FZI>t^mXwa7CBy8
zHbeovJDzh^dh?INL0*O8J`em0S7&qHcizCtv*%Fh;S7lT48)43In`mp$P3hmNyCt<
z-I&x)SZ>K=swgai8;&-&DR|JFwF?cSVcoZKiPU(djzAij6b#^nQcqPDEx26NKBB5!
zRt>BUZ=~8g&lAz(_C{w$BVD-i-1@Ah$<&$B`ixx4{bg^XN$PcRR?`=BO+bHU0~<P@
zO^e%-C)_aXs%Rxh?vzZ4oVC!Xg2aV^V!He>@?P&7)Aw5-Gmo^yPb^6$FD~5P4ATI?
z7@F3x-r=%<oE5o9Egth+KbyiAVa>m^v)Os9Zd)T%KK1ZO3InFkoPWOgX10#Z@XIGI
zsXzerfEAg>olP!|YnWG&BaZ>WoSe=5CsHfFjT>lxpr9*csQzN_=oSn*xc!jy&V?0!
zR4`W2RYr(P^e!E5{&EwY+$Z|E>TtD*b$rr}=ZdzetYHUDSr}Oze>nY8hu{^5%|<o<
zN2{0C#~$*nZ_U#cHRp9O7<&5q+S|q|lu#-8xHz18i=NyLXE)g^`AX1tGd8Dlxi#l~
z9}!n>*|%EJa8kQF%ReY-Ir^>!23xENmft2geZ19aGQYX|u+xRIY}coyXesVd%6M<%
ztj_e4QIZ3<XYjt|w2HHI-($u!#nLDZmz!uJSaj%dvlicK`e>h*I$=TK@XCBOtHwj;
z(&-l)GEokvR`A%yG43SZ`}x>3$jYIP$y=-8;nVIs;KwB=m4HjNB(IV(lW-4H*Wi@J
z1&ot1{#mp6?pU`MnEsTHfsw})GNrdi?z}?IGT@yQsXLEp**<gGAo8y{Svj-8j;)r5
zOo-Zao8O|n4e3|(8RmpO<kK&(l)x>Vp~4ZT3eHS0v92|_T7j-EF5rLbyuobePB1<y
z7yFyqER;mvn8XwcXlUH(5Mt?sWZ3lOhuSguyvz@HGVTXvsT_SNJ`Sy4G7PulyK%b6
zaIN$u@I^@VSS{T<mn3;tX?a&62VRG~<}z8fvY-8GvScls?s$}1c^5{ov!*`x8u3*y
zhP&OHCw;wJU4E)?gozrv*0*MKojx3e7(B9txkb2?jApV&t{*a4u#F}is^n5zG|$TF
z*w$2H;_okJamGLY#<pK85CbL+O4=P8h~<M{IX5H_S>?b*-_)YP%#lN1kFRr-L4yo6
z%!=bn&UIBo5QFl?ViglkCcy9f(+@F-I~6hVVBhp4E1f}X6dCPRE`%7Dk1R=}Q3X8y
zMT(HrrJDB%Ck4H-axmvYBl5Q6_2Uk!T6tE!jlRI2kgn(b@?c08k@!b2(@)aO?ZCqo
z<Fgup&x%)BoQ^qKE)tp_WcX^rzZL$|Xigq&xIvz%aR`2SpYrS_^+5K8M+1$Yj4|{E
zf$*I=KI737&WTG?G6qw_d<B5CVm)W#vBCE08qFmH&&mAtnW9YTAH?RF1VJNk-u79`
z=`OLef{439rfbf)x#E6tu9yc~#fo_|>6^7~Iqyzs0xZ7W!7}c2utVPE3dpM8%JE)*
z+>4K*a9RJ@D^VcuP&nO6<y@h>&nDYaBI^cI@z0y2QH1t>>gPEihn@%Wt#HY+d#g9e
zapi!Tt$FRL4i1y<DOM3?1!N*bn;iQdvJlC=Ly~LW6W(9ulWQbK)AAk_ft9O7Qt?*W
z<R>@sRNwe4MsQc1CHIUCLCh{`?E3ef%xeed$>7h!&31R*R_3tfmDh0`gbk^YTUSqa
z-{|Ms1UvD=2Z(|`r^#~5kd<^Rtfv!`6Pe95_6P3&HdwHx*=7;{Yv@!un7q5Z6=7Zi
zUUuI+^X{(nYG`R-cTGR*PoRMe-~Q8P!61uWP&gHNymUv%E#&)EMosxqK7ooca2tkg
zm{Do3peNcpQA^i%&^_xzRpprt)l(r}=vcMYG>kMLT?B8FWE!W7t>z%e)eGo$;AvJQ
z(I1o{CSr&<u@p#$mAK7Gkwr~LS9k`}FOVybCg^U?PqXX2rHZlAikusst!__y`5kdG
z&V3wP<fMY%Q*Bu~$gd%xnl=<=o!+o1-qm1guYWsOhwx#qA8ahZ^5%Pbu@1{e-uJuk
z#71)2{x-64Rc;Q!+nO<48l}G`8*Xwh-i1$RBf#VdIe|FPs4f$~>~`bk2B_E1jJix~
z&Mkd>CRhK~23Ysfd(YBlHJ<9|098|y9<+wbf4j>sUgp&9)yR5tM*9Kf!yE6+L_9rl
zDeh?-ADKJg^s;qx*`at?ngFw+%54$r*`BDM&vs~;P&u+)B{56T^`Pc<c;+d6XB|W9
zs|Wu3jyxH`j>p`(MTFP$iXve%KB1-3!X`aG#0`6-?<JXM{1G#3omIBeaV=+M`870y
z`v54_MRfAZIbv3uYa06iy)4p2x`aNZJwkr}LJ+%LaUml|JgZF3D`?f|`!#+YnOgX~
zurMTgb5bN|lPq7Bf=_a{T^VoMXs!N`#qqfX^H$o;nurF>UsHuhtTt;0pC629@bjn3
zAk(+Ye}v8w;KGzo^kg5u;x9+;k&6AHIw<JCte#nP4PXs_@G0c6ZY9mPL3x?S<P>64
z{x%nR0WyZCo|2r{x-@KZo-2tZ$*v{;TLGR6@WW&g$QH3x7xV^8#Q-hX@iWscLc3f#
zU3@St&?H!Td(~?69aiHYdF4sW^=Wg$(zgv+6M)wKyz>b##UtM+&{NYXwNV0*s_53?
z{sZM=-+uKsNQeI#No8YXjuJFln0Z?ZRk7(hK;OHhF=%|xwpxCosH}9ekq>lZcm1y8
z>KRs(W3>cQ=8{r4P_QL47dZ*l(2iZuKo1Vh9{rR;@pMkO4WV!1&%h!gU=j4A{2k;e
zt$Z`AJAyc`J2No=h%r0~13itChLY#7{t&Ujp+t!5;pm#Ns-#dt=w6RSW>9>n%am|5
z6)=AX`s8OSxdsA=NQ&9Q$WI$J<EpQlD22N(N>GZ#Ztdp|aW@OOs1fOuwG5gY%i4G^
zLk-$RVwAb73Kdl66V-kNBG1<nZ%k|6IFb$`_bt75x<iP%p2Xy$33#_<VK#J=@IeP9
zq|}f+!UlZ92_}d8;gJMS=UE>#;@8gAZB&5wr&R2{PO9svzG?|aTAjg{Y`7xg8|MP#
z$UE(~t7aWNUS8fwfpneGKC5re{y|*Bz*({jw+wTz;4#RyBJxjcMQcoDOQ4LHKd8&T
z2G5PX)%(3AUE-YIBO8{!U<3Z>wxd-2`I#Gy?@1SWfzxMshn}TP@PsNCXzDSg;m!2t
zAU^#mGY&rJbiiPC`G81N9j7J=$%_YtYjHzjB{X$mMkpKmS=VB>k<f%jItlj5Y}eeN
zc@d~et|@Y}#PJ*=`6DV2u6!1J?UkLv_Nf85QfM@c5lJP|jMo^g(Em2INaG>yd=t*J
zvI<tm1|!WWA@5dX!%=mSTb>7Dd~-6fKojJZ4DPJGSSyuq9lY>d^pkbloc_!H(#6q}
zP-!WF4zC+!;W3V(Kwt{F7o7xWfWD7UQ@{zPcAsA7JXn7#9ATWh^4AQFjjKaYn47sP
zhwGaGPvPwxG99%9+0S}Wa^UU`RMP!uB-7EvgJ=~8O)FmgC7c^5y3fiu!k~8b&9R?b
zC%xen*OLPVT3MY%xetOxW7vs(2^?`Jj=<SiDj#olqyFpObScL5T9L~Xm7ZAi*t}?9
zUpae!y$s5z<8{>#b60}#Z!=NJ4X@oG?Br?8&2vHW#nKM);zyYL6HI@(ii&P((7SsM
zxtSr+QIV)L0kG#m1>0()?Yt5Dp<#uZD%~@yij3g}!=ipQ0GKF1u*OcSUr9a;%6mT)
zWXhwxA-CW>(3KZnyMMvAea>|EtioV*2`@82UW-nf*=l-mk#`@NexKHx_0vlsowv<C
z2E-PI*0!6fFbgiGjhC&n<J>04!HpjQP=kI?DZPhC-0N@@At(I!EU!5Piiu1-5TGSJ
zS2MRK$B!R0NzL;LirC$Q&~SS8)nnUvG2u`6^@69pA6jzSt}(5g>i%Sdksf~k3<%>j
zke1b@y~xTq(~9AN{1nes434W?y^jCoW6MK71G{|>*A(=Y#3p69mT!}rb7%U(CN#LT
zErH*QMh84lFv$yfvF_@so=mz$*Xy7JchlRW#?>!PGV>newd+B=iE}xW7KZ7!_>B65
zyj!sRk>pDLO6&V6-uCG$luh2;(|Pu#a}Gf5-J14vhm!OJT5Y7fp55kgHdmkbvFqXH
zexv_0rr+Yi5$ad$yY`H<v}snsle9DVlx2Udl$ue!ZgFDBch)B<viq*{p3UPdTd&`~
zhwIpwXLs+u3g@aTBc0RP<m_%Mr1_{%p0RK<mV!k)fJ{F~(>}A?7Rx``m|1M#n$pe!
zZb&q~JY2~>%S9y;M1*vFInKU&Tf3o>a|h6A#{e=kB~LdASm-yKz%*kC$U42%V$LDd
zgWwx8X8u^3`-F47QhjthSQQqv<HAnFQBnA}=$5(&!_lUv&ZPA)-JoTPqO{jQu^`WO
zZnT~)`mKljjfN;*tskuxAZzxu>}|5-<5xko80vJ-#D<X_)>i+9gnC-Bp`QA%%K4l4
zy&UgRn)t~ukyFzGWZMJofp9CG)X_=Rgg@zN+PJz>mT}`Vp2}PkR$W8g{J@~_F8mvI
zqUyna%z?>({sPCv!^f3(iGkg&u?_?&UH%ZF(}BJvyKg@ng4nAP9F&~6zqGj@>Yiut
zn@6w}BnMv)x$OS9SbAjqZ6UK4xWPG-R=f?>kx^b`j;zSJX+=Hz1&x!leXWR*ev~0^
zu<pIG9ee3;4aKO`>oqXPE=-9?xcH$`ZTA0R??1zu+P1%8cw4rJ4G|TkDhNmyq*s-u
z^xlgS0qFvU4iO8zNbf{IdXpMzQ0cwb&>@5t0tr2kJPY0DZ1+CpKJWi}Kl;ThxYn9;
zuF>Y0WBkTgB%VCH*1LfVyKQv%x1f<B#Yt{n5TWQgi*)-!ySK$7nJJ<@Ju4-G1fO}J
z!~lNP1&@@iw6eV;xwh!+4#nv<m0pAMcuw`#oV;I_gRQO2T_Cph#vDHAAsOdA)gT;m
z`ekM(_I<Uz+{!3>grw5WDF#M716dCA^=^5wUzpuTUcaq8i4NT<s5^AVkNBvq{@u96
zG<mMm6jpvRMq%pv7NLDr6k-VATbb<NG8Ou)3CbH<l?iKo9F+JpxZj}`3R2Y#2dR%*
zbXI-elUr)F3-oYI{20yvCC_*5LwIHlSpb6^LaU%%^F3=71@`4dpLDV6S&5z}+Bw2n
z<Dh{HqWB&kUU*^T>eSmVt&wSx%<^%)B+y0J=~2{03x%$qe56jz*VF+P?OlyPf_Wy7
zoP@~R_m<J@;CYS4QbQn&RU&nR`=-%HAsvULIZz<#Zu18?jb)=(6o$INiD_EewZ|~J
zD?4S?Qlyx=arz^#u>BG|e#Ic~Y=&;)qj@fQ;~e~N7|BP=&)V~eFhA9?D6wfn(MI?^
zv+*1=y1X~*?=EFDNX5tFBytaGO~{z+)4f%BqTuB1np6DOc0;0;6VONQcdL{j=da!6
z!$?`}(x0-0EUBkJ#?~@@93^~PV!Q^HXcQ7Ou@~{~pK|M>A=}46`x@&{2x(a{wg?sH
znlEGRSYd6B{Tn;^x~V?c=}+!1PqGn6&#z%uuiter;Bq@YRsrWcYc8NrfGa-YIB%ie
z%(6pm#=)XL1{7jSeDZce*mHzFd?pzB<QbF)^}}Js>6gQ8NtThH8qEn|?cn7aHgFwh
zs$^{=JfLwX*p%}mzWf44bkr+4)!MaQjZjP)?MJ7&xiq4j&6v?jtJzAH6U^@r%D)x?
zqz?1B%YTY--=jEghe3Lu;^EWxuSG?;4S*;Y8TZ7?-a#o(+#V;D)tG*R=lrVFRZ+o(
zqUMieFVZM~XRa;ArceNJm2JZi)p0^E*-&G@3+Q)|Mag@qhtU-69PZ)jyC>wVz8>BF
zyfL9xt8%N}ItG$j(9u*>xQ)6?eD|0s_U<&kRF8>B83I%xul*Li`IgOg8P+aT%;{w5
zx*o*18>)!04nG`iU{Bo-(G-7C0m}(Ho%uNyTbQ=7b6MW+9q%bOoFm80Rcu%R%=<E?
z&j&dmxC*@j@FDer%=qsFB|QR$v3SesTBTgR>f+%kt!V-&IOP*>k<wkc@I6R9^?ZdP
z_Bigw3lo7i6dBq93c`;16s)o)*siNwRsuafb{#ENOzYCT{bolK`Aq`U&3TL^6I(2|
z@<MP8cAN<4ns|sbj@KP-k~gwfSR%`U7B>V6vB(_FYZ;g)#U3$&y~H9+%vuPgTise;
zSxx+wG>!~DDJ4o{SI2NKL!$@M?IP#J@wGI8gFb&|;lHq*k8+mUVRPbfJ6&>i&wOyS
zUbw!Xea|P{(2`MWYwoex`n$*b{7aAd^-7M|H;NPUp#{8<*k@KQTC^%1sy+F6kC&si
z-ilR4Ku8z&Nuz<-KTXb{*K5eNWsUGL4<7(yfnQhB3WII(WQh+}8Zlfv%*XbGMztj$
z6}~$M(}^dMY4_rdz?dMa9X=^q?I>x!+<iP-B#q?`S+FN>GwE9E<;69ydayo1ozA*T
zR1mj#Y8=W0G9t)XSM5m-z3>0pJ|Uv{5j0hxNgxe(P}U%jF_^VpI)7+9R_tUwGBZDB
zaaEXp=2e~TGLf~R-Yy{`CF7Cz=kTkPLi#fDh#z+ArU^@n?%YZKK+HymLgs?)OE<1|
zw-)d8Br_R>n)6ZPeuG}h_^_`$cWqjDSl8|^PJD;gfQ<8E5R2APsS<6-2<u@nE&6p$
z`pd2+00v+0I5*U11+w&_aAw-395qars39(~H`gXgOu96ExG3pq)9^4vO4**KT5N+p
zW_rgPR?xf!XK`tFqkTr#UF_(-V5}{d_h`qhN-Kx#r#<}U`Hk1@!C&KPPAZ%vhTqz}
z6g|e1YA8=WeHCW#X$BPy@%+@uQ3?_q*`Ez^VXuf!Q$tuzNL}-OomF;qHEQ#4f_YxC
zP6M2wYaio~fHTwrj(oik_YG_{oWX3$v}2e|YEne|MKlg1_`1f;uiz1N*bpBi?w%3x
zoA=#1wRp(Is~Z&r=cY2e%5<$)CBrM_18XEii(EeT9d9(s1V(Rt5J<J?W}`sDI=^Te
zPqhwu_CoJ>*%0RqkBKYa*+#H`X-~@e8*<-=$KGBz0+uCpt=LayI+NFH<YtJx{uS3)
z<1qQ&4XkU?Hw0F-4b^0?Rm<ma5~^KARNzo3-X@L}m)TZr{n+M_o3!4*HV?nZ)$>Fc
zGio1In*YMm#V5-9I&cmw44Me*1hPnW!Q!xE+zWPeB!3(2kz(8}{~{L;nIxlGwa?83
zEE}x{MCd4Zh^j)jh<k8t%CTK<C(LEvkmnUAiUx@nk&zb>R(~OhG+|U{jzJ%_@`K*H
zFQ95e?~$Q&g~i=DLTB!-;ogWn|0$6KoJ<<d(d4fj(Q}W_sWd1{D4?g$l}+Q2)H~|^
zVOrT;UmIZZId_UXxxy`JqjkfAXeo0#Y+-#v*9Jb-SU)F<IJUOB?P>&b?o(!g(1h@I
z!z{T5`;QLZ;i?dRFJcg8aG_+#HL|GKgz-so^TksG*_r#BgY68+l*D@lRtd%y8bEq!
zXZzE-a3-LM#chK9I%sQ;NkK5vi@PWj5RMyNK4e5MvCsv-s8nOxJG8a_?J;$a4!lmA
zQCx$fKon@!(6w9)DPK>FO>8b8OLV?jD>lHFLYIR=w__q5(OISg-o@tzjWnh`gx9a;
ziG7eHYq`=h6&Fi%wlZm#KtoS;urhU!x$S?A9IeJtMmr>`tLLCbS#dFWo#k46Ie~E=
z(ZqVT8tw4==)(5H%O?CGqpPfJtIoShbb@>#McW0s^B1*t&3pu);iFUi70CWw3=`C8
zQN-mgnBH*3Wv|ibp<{9XKVXz??AkqVfTFi|><&jeG`&Fij4)6>^BtnAh}Z{*dN1??
z0I1<0Z*SlP)Z<WP$zAR}%HwW{SQI}5q0=u#l)8Kp->!gvk~%MMXfN(@jJGJTPlLC2
z&e7PYD1C-^E5jx8r#`YywH(Z<7wNSq=I(fSjF(t=Oj;b;$!O^y@`_YGWya<_H4q<q
z2_acdq)R8yb2lwo6;Zx}0>5(;YH43qGr0<sy^f-Wv=Gy%T1bu;+rm$2jn`iueXo0S
zXc2kYeDGS{ill>#FHe-C$1ckSrm;NDZb~cElGiDlY?Y$)%w_#hjog~}gbnwbjFtPR
za|h;Sx^>&0CLD@NQYN~uL=UaCAgY6i(Jg`l*Tnt(N9yfeh1<)7vWBwn^{y}E=uPQA
z$1>}fEGl8V7?(hZ#GwQy9ptC{ks&@Zs{+f>V3yp2+>5qFm>U(OU&oI+)L8`^>zQ>s
z68JsX>TL-t4R|g;j~wr58?_>9>`_zuv^Kc{KKIK)<#x{$_pWfv;<!rFczyag*P0xd
zF!CrCbWIM6n$?}tqrqb#vEy+iVkTGhjBUpf@6K<W$W#Lm++CL0vo4xV-Wx1tH)}ef
zSBumY@d|H7G-&<q5^^6WSM=oBr^hiH88G@~3Ko%NkLG`}b)(xk#&G{B6qY+dO7k)B
zi!s%N=xU%`#n185aSF>k=NM|k9t#@CQV)A5waXamt%xl#-ueS1spOH%Zynk<)9bRL
zC;4$s`=CDGcU%)$Rljdecl-7agru=L&`LrpzDZcuW<UDibs+4kJ$v;OX2wtBl9ndo
zCBXjG1JB$1RorC-g;te#_Fa{NmpMrYyu6H-T>ENuhJ%KQqp@6^pHqGk-L~0>)rl@}
zNl#vfK>f7LTPZ|`T30&H)jy3q@aqNT%-mc_7l%lBZVvMB4JtIIhy9VsjlA0Xn%&8#
zsab(Cc$rI@dnQ<I@H=B@jQTQLw42?Wm@8o-oN%w{dSKfE@lkQuoI>CS8G{t=>zSFU
z+#K5Iz0Xr+<uIysYI1v%klxvCW;VRIjF!sE>N0I+c;4GUuP?q!QkYXyiJ$E%PaSLr
z=Gl$bUi7uezulv5`026Hh+!e-Ejbl9cMBXsYmJ)BO!&6N2)UOUVv*Om82-fTZYrHY
zt-)+!j79|ar$ZP6(kGbmlV&#Wu1;f20C9d;{Mxn&HLm#CeHnxHa;Q~ay@c3y(nkp>
zeuRB#AHD@ZYCYCErt7zmPE`jb)*eKMXoI}RN-(Xjqwe}jt1qIr_bsg0w)YZ5Ai^E!
zf_5%7&0$uc8VHg#cRMieu6hLhg9(+*`g73v48<(Ad9QWGs{_oK)jW|zW7PhMXuyiN
zjn>DycHcy>*h4GHV`|B`Hbp(>OqVtD7yhU!k<Gf6CpY_+UwMG1hHBBzq9w%<=<NW_
zIEL9~6=X2cy*2L(GsN<=DM*Y+yhcMB(Ebt&7nOYJ9D|uvY2PLq>&%~srX9+ZlpfbA
zADZ6RDAb7YJjRK*0*-*b8J9}wC<!KqRcqC1z`Bhr+toW%XU(C?{II8+Ey7WJVq|lj
zs>n`;)PCB6H6eJlflKQqG|@KIpQ&dgJn&$T6*JPrmeSheMVq?ils`dt&85&uaHK_|
zNZl4PDWjK)MrQA*wf(huPr~m&&GZ^?kaB^QRaO0rk-{}-=2<VxeAK>=4-4zKoz<8#
zD{<ix_`pQlKJA9JBLXPu5N({o`_^IDCGz$FTtlEFtLglE>~R@p*W9->1oo~>c9&L)
z{o_|HtNJo`jr8IR2a;c}N98wTuq&1sg(_7d*`p+9y;!@f=w}i&H0WQ8J~4x4ykwbs
zjxB1DRNgd_iQUd;tDz#xp5RL@V98Nze=l;4nggAYz@F8+>K%T4?^!4X#j1!3lAu+b
zXv({v)}$PG>T0Uj^SXkmu&i|zyVR0x2PsS)1lMqqk|SiEBy?Q8@JQ3(XnWRURVsTj
z(hdGH3HzI_7nKr5K{CcQMP8Qxf3Ond7}-(0jFVy8m3_~B$sG}|C}Ax5z@hS6%Veeq
zv!5HZ?rj=St+Y38CgBwA^_lIU)U;m$iEE<jA<L;V+H`ChxQ1s>DcEyqN)0SxRUb)h
zvpf(JYH482UBm))imOEsXHk4+DxEi}EsF%*t0zph9k|aB?#-n8BUMsu_!*@#H7^ad
zX~D*+q8*n#F1*iR$?++<2*R{3s6LVdM_wBCPqpnuk%CmJb_!B@B1QmY(!8}*K+8u}
z`hXrHP_q#BB!&3Bn+USp(x7e|oEdp76(lWQ)N@?b)Uh~J17cXr+k72}*y(&+v$Fb4
zL;TSrw(ZLDj77Axj8_U`3Yz7Z*GGSB&dPH;P3di7E6k9yep(^%*zLUVWkZ%?Lfn&q
z+P(2+kE<>^eB0F1vJEDAm5=-Q2n(NGatxtc%prVwItYv1I&l>TlJ~pkmA&ig=2!#g
zuiDNx`xXsU8mX_Zp3Q)QkUQBiGKEgCT!|gGXldLk7ZuI8cBmCyNa|0=^INYU)ci(C
z8EXq{ZU&r`$p@2{O*Yokw709q)=r1Gl+95Dg07(e!zYo54ILIrWN%KXP&;+>;mLql
zr`6~(?FVy*XMJ4`C{QfXaW0X0(qW~$BbF^C(=}i&`P}qXw+aS7RF{^DeEV@7(^K%y
ziLjwZ>m_hk$z6)~^i-mcW|dJv*Nj%;I6OWyL&5Il%tB+KZ9PPRRat(;9=xB%m2r2)
zhRVpdp0hI%@2XcWdGGauA@Z{%AJn=1HNl424xT(r{I;2Ij1am?t6@|w=`p0pag}n-
zdehHgE%0o;a`v6^-MPNNfEB4_z&}!9?I1Q;S=&@8FXF#OVD8p(YT>)5ce7Eqsj{ru
zj`qgE-cC{VVQf#1hWgsf#0M=Pa+MyT@dml`<L(#B!Bh0AJW{L(tk~K#+Gt$ZqR`2&
zfA?#Km)~x6jA(T2#TYmtx&pvkt}6}80c{|{`;X;EhpQ)3C4=@KcB>-}m$z;0o`aBV
zK}LJ9?Z*1TH6S)-oAF*)?>R(xg%%qA09m18qxT~v0+j!2=CyNlf4vT1x~)XFR`t$d
zG$c~@0_U#Eh)eDeUPaeurm0}1gi4oAZG)S7tNN=$L;h)>hUS2JB<&%T&W;;6BpTz=
z*V4P=J=i|y`Db&-V+iWpNkc<JfdrW>ZT|99_zXIobdU19lnI>^YTx%6arX_d*VT9z
zwxVYaJT-7Q1~4@HaPaC1hm(bV_aN+TebIc&&>k7<qj|hey@q<>CAOUtv&g=Re8;s@
zmwMl<<5eFR)T}f>at?@T*<^Y3&~2dE0aJJHj!`)}lTFZfi<dvmN@EE;@~&Az*6i?u
zL3WCvoA=A*6jIv+4#%=HTec13ZS64<sPByZIz#OHFzB{Hkknz@*HynsQy`e*9Elv(
zj#jK)G$wdoZYSO}b3Bmp-f{MrY4ptSPV$?#Ua4K=fJL(5Gfi8O3Hq?GZ30tmQUHnO
zu;id_dxaPo-leFXDH(8NoI4vNDnDU1gm)vitK}!ppq?GETh^<YSopd^KC@j+7Oj+Y
zTw@)958;H#zQc$4D?Wi#v!SdOKT3tr-)+Oj=0~*dLwZL~=s?ChduIF6osA*)2Tiyc
ziMc_zH&HHm@DIq`)_uFmGhE}R*iyZoaP&wryS%+fDtgf^T)rjqzAZt6KSb)}fL>N@
z;qr@+UYL<JHM2HWIK~3?LPnWvB>vck(F*V~W-6XtRo&&b6=L%J$(`+0`=YWP33Nl;
z{(Y*5xpKa<-kCRO%SmYgevWw3g>_kxe^8JE!sPVU>oy_$oa8HVdH7eS;5H(86YVQ0
z7RfssitiylM+)Hqlk5>HJH97@RQXh@p4;bdL>~qhY!P?Wx|5@hn2gu%NF}`OcGMw#
zK`7a^xM#*g4)lo&8m7NT5slx6Q4^R5w<CAk!c;wrTcnN0KuK0ni8}V$``bnV1G^0d
zQ$&<J4yd$#wvT_*BguYv3uRVy-S}e)MYIb`Q{pMJ)^_d<cC9MEIb@(72P#u|1HQE%
zh+y~p8Xu+e@rVCu>>lLY9q$riR5uPZWFdHl0RBelrIKk~T9-@_>f@BKd3vcg8_ipR
z!IW6sw~Hv7V&ACaoG=$i{KzL^pFTk8h$PyH-cJ6<IrO;k0_!TNnE-IizAud}r`){>
z6*EH?4env$OG0~>iZn7Iz3eWIHTcZT)yLoM?2<J#b(oj2qQB=)QD>lyVEUJ@xg92r
z<KG0CLd1~=+wa`=ka<>m1w)CLyW=s|p|e0s0MWbhN`&_ph*uV?0xiWuHE9L4h0ytD
zhfv-7gM4`=@H`mGWNTJk<^|osqL<0`x^dztnqgqGK@2g}4y!Yx|D4*Xf050Be#6BI
z#!c8YYGG~>H2`aoi6oO}@7_Xo@$SdYG9yIx$H*K*SocjEo>Q|(;-59;u?URQZg{sk
zl!Veiy~OIZOhv~7_usfcE5u+K-wg3NF)P$0#JkVv4p*N`eYz6H1on%FAsSn`b{{P+
zxD(Ze>a}RITJUZnE$cjZJ=skOTti?z8*^;mWvCa|p2T|yzioCSL{*Q~;9JGo^;n>L
z#-M@_suR3^PX#}71>T^uI;vWfVfz+WMu`4AeZ#6QZzY4QDngeiqO!WH-=!Yl^_V@j
z_49G_oUUs6sxkmcb-BApU_yD=N;TlvbRHiVO3rVS08yMK@qyb0FRbeQ%Zjb)@#vZ@
z@dB;=D<cS?$TJA6MCwIRDnx?NxIT0+w&fmI%zH=jL$4-;pJRv7LW{zb8Ie6h%`lLg
z8Go=s-#T7q^}tt$cw_x`;NWsr6h3{$L%qN@^<ZSE9qg8f7kQIqG$*WW7K1qM$1ntM
z1MDihj3?)<GF#g65~06z9sbRy@_1tn0X9}6k;k#tw%&q4+^+DKJ7=4EuBLKF8sR*P
zS9t>6p1_6IuLV+EHR6QlInWhkvrJ#5&233$&9NM&yq@Pgsb{aO{%$c{VTw#^q~WSD
z3ya3u88COE?)M$_mTf#)9a&|{^*sn1m*y+I9_Ee%pK~KxOeRYS92#CJ$Wv_a?3YhI
za@s}W>g=gBu<%0cxjMSh)IRv@O}?i10I*-?YHiT9A}yQ3I04(VbH+fGsS|HKS~7NF
zTm>9oWFa>WiN_XAXM77aQxJ_Av?wY=LiqU!O@X*;k*m+AdyFmU^IF3f(R&FEpX_OC
ztfeIvZit3%8Ldx+!Sx&%n|c8&D{z$#Utw5Yxp3scPv%XF|Gsg$O^}2EiEJ-g>J_gi
z|Lmz%-mSIW?`QZVACe3^26RPtz|aG6VGr{Fr@x=4ts@GTx5!^Jh=zRcqvhZW#~1AY
zDMZ9Nxh1IX2%b}=aID2n5Lcw+4m?0F=`ETy_~1ZzA6`bHnTw~U`?xQ{&ubrv6WQNS
zq$1K^lVT$3WSHD#&O&UL7EMn-PYd@uy<#M&Ca?_xyr`j93ZgB!<H>)pI}>1c2U}?+
z#L~^GR_f0V9KZRzzw8>Z$h(z6SU7q0k%=<5I=(1Hh%8mP;JjvwdBFCks|fA)gtQl>
zl580fjl9%(6W)P6cMq*^+YVf~xLd>zc~}f%$a<SgtyNvFU#bk8pooqkqF=c*_n3{R
zC)O_&sIVDlkLvYB*%kTR?qE!iwYqt|`m?KkhfviS6Pasn#LMGck8qYS6;`^!jixsh
zTPMxltq2#|HjcmCIl&+73j>UIVUC+#mXI8|z8o_SUVt@rISNwP-tzK_<1p}&$TogC
zvrBfaEZwRni^NU}x^mRp;znvqd=S<?d1CD-cehw#O7l`qk%e_qEW2m=<MpywHZk?^
zaV~o6#AC-FsH1FHBNrLj^F;XH^oxDPVVQvLk0A;q1jMpE!q@aVBS{RWK8<RH-#^lg
zE?0srIIShg8m(5x$hzi!L*?b*vntGHFeCuhNZ;XNQ+~uQ8715J_;xre6|d$bLAiI>
z*xse8Ul)cvIE)5tNl%AvkFZm+C{Rx0YBN2%x;p#;qcC;`1wNu)^#-+qyTJo~wd$_M
zBN)K^YjY)!c~T*GUX%kpT!fk33T6)3YHfinfP2MZjO=5Xm*n;a>>cH9rz2TFS!sfw
zf~H*uUBMq>RDPow=#CNO^w~gq7}3D?iPdD1UdQ&7Qqjnhe97l??y%gUdGy+~b0#3W
z1oMG=yW$5VPU~rLGSwx_*b7a?ftfpH7oF;pi;!Znvm(p`d3+yjC%}>KMZLShw5bwf
z5#7Zr=pDS_aXU6<#K@Lg_^w-YONBc{b*8JP)(4%R_7ii?%hmg?!?w@dfv)XbpW{n*
zR3{W>NjMH=1_UdGCs@JZ-vn7wvV;_76W`YZx%XC?!1EI-GNEU$dc<N=ZnV;ah6}9*
zX=CS@-GSbvmz1U(U5mf1%3otysuTUKzVIc>8B4>HOn{uQee+;<R&%Wf&u45j$rZV*
zKX#!ha)r)WoKvqliO5;r)tb<n-Suw$HjDJinOn;ck}5LMsg&9hde1F!A%4Oaao5wK
z9WQCZC$2O5N}kj6@DBg^D4_k8!@h}5&@=A$J@~^ZlzR#p6z_Jl?Y>IVm$#QO1u=FA
zf1?@mx~HQSGH&Ex&=hiYN=0kd={`NsLp`ZTNN$s&X*c#8UKQFC-ps1{?KqWF2Okth
z<y&(M<NNCydO8Gj=k$SuL1Tt&V$E3~%{*vEXjZM*0`-2n-SX*4xTECv0&XrBuZ|%s
zP?+6=*Tpxic4WMR2e^AsqU(SpJ~*gi0WP&mIvh#AUJV068s&ksG2eC%5A`>L^l8#}
z*G&vV%F>w+RGVn88k6PqB07rB6J(3B*g|z`O=f%Y;(e<_Rab<ERq3R-K|5|gz9R6G
zOLmNw*}5`n0xCZ`8RS=At5Xns+=%_JcU1ij^tSW#>nV`;`hJD=BtGwQ%g3v%?SI_&
z0La0sk|wLz3<)qa`~>~@UQ2fO;fy>JUxWYy5Y0TdH)3oae`rGYXgOGw+3j!hoaOlZ
z+w?4@>+Jni;0lm~dU-(~`TLhY8k2t?>~H^MpSMvbe(?Zg^68zEh+JApH2_agPoEMT
z$-l-c)BOti*o?qL{d51-A5-REoe@3!M@#fh<=3Fzwfy*hz3s1LXgX)8!bx^yFi&Qd
zQYy|cfX~AgrMM;h*#Q2J+ksA)8?@eZF$}n4>PE4C;mPhq%I5u`i|(rv#CxVo`g__z
zml_X$%7OlgTl!$|)Jf~cN>t5*{Y3MmB|pXiQHlAo>~nvK1p<Imw=2L)h;b?{{tjHZ
ztMD46s@D-T{71s!i#w1~eWvs`dj0tG>9x~b{d`hnITP#}iR;<!Tp^09;P^}#{=f61
zGacXu${~kW1-SCy=1H2j$7#tce=H;h=(TJ%_QvJ!Pkkptr{au4g8I^lXecXW9AaOa
zxFW>OOADJ7IQOq)<~t7nGsfQk2m&+!T&J3fWBrF{83XUpw)*5}kmH|hxkD-@m+^=!
zTL^;-%)2vG@hsJde~AOYK${?5YW{rm_(9zVO0O{+{1>La(+Ge8fO-|H;&1#R?$0j+
zD7_2*nm?sM|I~yBIzGw#-^>C>Pu;24h^J()(EjvEG`Cmo8Y`Ii5IJ$_Utg;1_@yR&
zX^!os&$Hi!&RkqoK>tCi4<<ml0ya^d@voZp#s9Pbm72kebHIP!W8E;n=-R&UC8oza
zZb1X+Mw)e(AnD<9lI4jW-KD+6G3p)DSfsI?D%Z8zhZk<q1<3iI?rFTQHh+zR!pZfu
zMH(|54^`wLe}cc<Ty%DJ_UpipuBi;|T_qErKeN=V@e_Axvs|p*b1mEpQ{t0R_@gQG
z0sLSkFE=Z<#-e5wtxsMoY+CP+^m2G4Cc&FFiIwZyWJEg;hy8Zq<mt1QZ#{VZ{f8px
z1kn%LOfp?aaqGd;H^0}CC~chh?Ed4Hr@ANOKRR~aQdl6nUr6^HuJ_Y}{~RG$^_HY(
z&3)7Cn(W|~L+;?0iJY=O#{S1Gndz^I4Js|^c2LsGx9$m_2IdHzf6D%25k5PoCmv>q
zv(){$rhhzlHaO_Z6CNwVqfXa5sby3+qN!I<i<$ie`M(bDYy`Z*G&rn4obkcyFV87A
z==jFMKA?d=j^B{#<P}Y-D|Eojf4uQOe<*(VLRvgk!=TR;6|99{)NO&P^h)S+{)If3
zN3Weer1#bl3f%DlM$^#v;?X-OW>=B$gLr}`NZAI^{s@zwN&0;PpC_!JO>Bm$$U)J;
z{bJoAJUfhxSO0d`(^n@6WRfD13IOR}e`y71A2QJ$@q@+>EKc9ae$_mt|8K}r#<VfI
z1U7CQ_FudcIXet%x`C1t6EuaLegAz`|9rvAH$(x-(2FWCk{?g70TBLTkAnS=ReW>;
z2qR))tMnJ1-aB#9h~l$y-W}k6-Lq@dZ=ajCEt9iIa|FG}vhi2ek@FN`z_dClynFOl
z9-Iw^0z3|b7K9%Y`svFPn1^riaSA_fdSyIgR@`2vpYLp2R$tU4*7cVuA3T*m`&c#p
zieH=Tm0P{OU$9HK*Zwl(L3lbG|9Ysu0ed+83hQ5=c>Sf3bWifIam?U8z^%-h*SjB=
zz0Zt7{zl)wE%8sf{EI03y7Y^5zog}t7W~qJUs~`>3w~+A|4|FFve+2^(F@>juGlY^
z<8Ln9FZ%oviC-e|OACH!!7nZNr3JsV;FlKs(t=-F@JkDRX~8co_@xEEwBVN({L+G7
zTJTE?{+kvw%YU@aKf`<JuPOU~p;>Uq7ZPvxW>(?;9d&nlC;i(162OOWax%kkD|f!$
zRgI|4|F@9$(=;Iat{7FjedS*U`#C-EQFi7?QhYwCu+Y)Di{pDHz4uP<fX38eRtsb^
zDbrl*@8h049{0Cut^dck$`2gnlLG~Sr!R0_rmP3n>F2pQ_|+r+kBs_17ZD75FoD)Q
zexd(D1E02AGu^C_vv)_&0NL_{2P&*to-<h%NxVZ9h@HQ##K%bK!JGC%XK6Iqe;E(}
zLa2Xf8q&ai9(!@}G;`1c9bu0Wn?Us6h&7-EjP*(!yOa3eR`;LgxhD$%6q?WW-}`S8
z@Ndu&-)ZBs%y#RcSv8#-Ug^J*=08k+gAo9BfKFT9_#6NJr?I}gy7u8CuTfa-5%YiN
z%fFvIIQ;eLXr4EG7ykX_f3UvU;G=b|&Qs5SyTV{qfbwT&IsOL3`A^jP+e{nOz}t79
z{<kClWBUIzl#d$eW7V72pzr?;;O77JEU=6_*+L}$Y3RSL@^7@2Wdazk>3RMCMDxoZ
zKF-nnO?v-FvAYosuuE-IFYf;$;}0IZ33U3Ogz8^Lf2s+nhisF@v;PyDjv4&l>G_|y
z@c#z}`!e2h5=zD&gg%ca&u?b<9|`?4d%j4MKBkMiyPs1$l@7XZLg!%B=uIBPC?Gv?
z#lfi&#VBz@jT6m*3(=hkUU|fGl;a<*WSg7OdG_6iz(Y@l6_`MIj&jnM=OVIlXWpWQ
z8<Wo`8<);cS~{%$0UG;HFaKAPy<oldFoUV947%BWxpg73b`P%a942pkHF<M>B{<H`
zC4N2)Bbpdf$FXunGeI>ip{LJ0Po{6@z{jW@z@7oKrx;MYx}w-?MT;SQ1JnQig^@l!
z0Qb)%ykJF=`fSV!+$zl8y4l7X-K2C$>ZA1yFL3gW6D==HCkmqfMQ;A9wE2);SxWi1
zt&d)~?kbm<sH3C`jA~mbEBMFSzARi9+D`(@L3f7BxsxUgELZ<GN&&q4vPW9^wta6*
zc_^H+#TTktx=AUEkYijA9Az}sYm6q>^R4N;U9Z<!|A#pE<ZylnU@bL|7K}+Lv}jE6
z<eHPr+@Tx$1Dg0>U*iXx?(qUr&gg`QjujDdF3htefn41lNEpu@dYjHWVV2vG_zzQ|
z$m)~X7_~lC|IR&gkZV-;e`dtJ%;PO8$|?4S&BHF^B!dBMTdFqRA*9kVpzoOSneQGj
zXLcs(O$};QcL!c1D4DiLjXF|4`5#vQV}NEIz>x19C^z+?25o?ESK1C`hI42bP4WF9
ziBHwfCmqCqTckzWsXZt(s!iqpAHKjJ&wXFO0br_^TT=TUJ@pFO`I;bC()n~gUha>z
zEmgj5j0%l3xc&1J|Bc&<Y5=)4w!UZy;LGB?A2;@q%gNAOcYrh|lM6~G(QhLC5sdoL
z_d(5bz}LByo!lPrt?5{HZVJj?aEVvZY`)R8pu2{9x;iatz9^y`;QF*N!asw2-*fD;
zgMFh41Dt-d!2cDUUE3x>XzrcAn0W`|u<Qhl!^6fFMs4X=xv!c6Q^<Mh?cck3N{$I8
z<By{JEo2rd=q?vCnVL27lhX%P-c2h^q6y*lZ^f;A@wX+{kvSBZLy$v5Zale(ak>Sw
z%nwr&kq|X?zBHxN9n^xLihf8}1DZOYxJYDJiRCX3sW@o8aVu;)fE+KqcC%QR!9=AH
z_<ToW?7_<Bz5EE2d#IPI2@Oq{IC&n!df?u(MnOhwZ7a^YBr0cTBHPJ&$`9SQEO{WZ
z$m4C-h>>1!@+k5bV;+%Nz~LnsvD<dMQHw{dPPqC50S+i*mh8f+CY*Aeo=0#lmb~id
zW~}tYRi1uFeGZ}VT|{%WlZTT5YMXz-qa%wE3!ZyV-V@e3f>7K!)M^}Il%NVP>gu`_
zd9*De;AV9_fK>xmIcD|0N_Cu&Pi?DOe8S0=7V~;Z28!Q&+2U2Pv;Hud38MjFZtpJX
zVCRmNC0CEem$75%_P<*+gPcqs8S8qgO&@BLfvS4ItGbs%?EPJpr1=9r7TTwCQgC6@
z=0;>E`d-j1I6Uk0@MsKTaHjrbV*HWV@2?_jW3{sDqDDkyR`hAp)1F#rLWD$+o^nfd
zH@=ljDT|taimc(m1^as(h?w!VXEE^hJ^PFeiQ6ynSM+|76}&6_0ddc`<*paJw=5E!
zv|H7h<iCiUp-)G1JDO(%Z&iWSgE!ax#mIcw+;|T03|KKkK}}Q~G20&!=;})CRoW3k
zZXdd*E1ieM^UTK=n1`l`Q}|K^P=`EY#yO==&QI2o8GumcGKbF^{TLSIE@*d$#Y3(+
z>_1HwxmV;SIov2hDJLd0H{lOrIKy5wO{CfsBu6LJgcLW=gHc;I#7u}($e1`GAqEp%
zPhd9tJ|X!`2Set!Rq&*QVnD@IW7JKgQuz5-+9UQ?-i%MLr=$?8W-gvj=_8zU|Eq8Q
zT@jbKH+-Ne4)Q8GRi7c&M14=YA^dsZE6_ukrC;9Yb+q@pkwj0La|P-en)i&>*^I%9
zZ6-=GCtzSDy?W%jBdYmOq|id-{@AYNt;<DWlyxh&O~k-dYq>@M;`ishB2xPnB*!3L
z|71<eBAjI|i~&nIiePnfz(ZvhnmScN;+BGfK@`4PBE>x_VVnmje=omsvy4uELQhER
z`9pe&aLSu7HjkFzBH;FJspwkQ!*p^vvLT1yMU*#RZ0)Eh_eZ;yI}{nbPnaSvhX%Et
zKZ=#UphZVd3#WIGmNvGT%6&CQ(+Unl@#NmOE3Z$sA$I7Fi85mG`OflFo42_)BpK&m
zfaaI6#S49CyBL2sWG5^nVjl6>*dN-a0G^Jjy)_P{;Tc8_C!AOfMLc)68??c`&aC!w
z1#P)^yiVD^sN%n<D9`~S6dZ8TS{5L}JkbQr#x;S8Tf{{-8EN<#Ngf7_GOzB<o{{8g
zFE<Yx#k_^6iS8<Ln9N#abSCdmcUxu`lx9ecDN`}j=<~II<Hck~ePP`7))w|II$Uyl
zo{?o?Fji*kRRux#`fr;J3~wXXl>}oFM~G<?#j`6VbvD)>Gg)+4-xA^>$QoZX=_$9r
zptY~Hy3`?2BmObTVKmX2q}q9Vw_Q-j`=hh5(R|t%J_-~g0jH^*Nv&&R1$*V%Qo{;4
z*@url8P+e7AglTJw+-<~hYl0HP~9Yx_v;+g75QvX_(6I2*?~i9Yf*gJ{q1k*lIn}n
zkCe&}5!WE96}t-<<-en?0lCmPJ{D?<nr3+36WHHpoLD<$S{1}dNA6%|798$rB@|)C
zmPa$Z;JW4nK7}0^B!*_!JgpqJ|GfMbw7ANk?<G>lx9G(^_t}3oYEkt01&gu1Eu-t_
zA)*Yhn#qVZzId_d={seSN&PnjN1`b^$SR8Fy)G?OD3*hw5Mtww&KF*GpCu7yi>cvb
zpoq{O3U+u}W|r3fGD0J_q4o}&j&vDONfErfnV4eJ6B52=3Ev%I%32T<w`N>PsdtNw
zE|M1GPjg3wWh?9IU9~1iON+L#4uy0+PmGnT%^TOfKN&ER+-$WQ7BC{f#z`khji$C*
zM4v-F@3I3IJ~5R?<rf6}tQ$XA{XpwujkRq2ka*TO-`eunedkqJ=WJa)WP7c|8JCQ#
zfq4R4*eUVG8EBcm>awchR6x<`?j*T9uJV|_Sa99GMm$nXAMNC#<LxQ=s_l8GoU~()
zgNH;;=L@HJ8t_!-d!&y?Qa?4!zB1Pn9|H-i$u$(*I+re2nB`VO*)vv@R5pFST}BX6
zS-_(YP8j3&_%d2>V8!xRH3pU%cX$##<paWE+O|f&N%~LbD=1e9juQpu%KU3vCAe=u
z)EZqF5)=rBoqh>aILamH1SQ5gA$F+Nzm7+e9!ky=VP;zVEOv7o)$IFRy<Jj{k0onC
zX54af!@rIJOEHTi4-08k(ZfAjU&PM67)`G#E+P!OishS6HTA{d<8YS(U9}ry8JVTp
zoO0B7)n-%}u!YyNWNW!hy`-JoEcq6U9#bL5<Mx95rvqG_r4y#e`wWd~FX9CRcZOQ<
zG%xa<+33=nE5a6dF#3^2)LmvEzFJlW%(=3A3_TsC3=%i{46$(;21af3aFc=HytI@6
z7GZPmc|P{JUVq83Y*Fs1%V@U||2A#r4ku8)p*Q&{aUN3#>2(9T$okMxDm6~GP69sz
zJ&QuT&e(?<(}N`c`wYC{i$x6GVM_roxc1$LCnt1sTvotYCG96LQh6YA{@cj;p>Cyt
znsMQ=GLLzUuEr>hY54m6&8`*hYWb1-)#LS%gL^g~)Q-ztoKNl(;sTQ74LvB%awL)d
zY$oD-PoB5HXr*|zPDc&I)>|p=5d3BK%iCTk=uEtS{%U?rz{x=vx8V}y$KlcbIYJ`=
z<_VpoS50wftZ9F{>nV@Re>N0maL`!dNqFm99^C!&*gqW5nu-S=k_{!+)QZt{qGAGN
z!o_!BM<nDu!Mfqma7Ps}^G9DHvbDkt+>gRmxm=>A19K#U_ln>#)N?$H9gF@WTgvD|
zlpoLjQ1TG12BV&N%QsuaV3dTQHWXSdQK`1<VMrMk7SI}0k%wH&e(ota=S3bXt4X2m
z9Oh0Jv<M|_S24q-9WG+I`!=%34cPA61uhdqTSWttDAdI5)RI47yLDHmrMEPPz10*U
z!gF%uN00<Isss6yBFIe#ll0qklC+z}{<VpU?)>u1Y&6sk2I;AoVhYrQLaSW*bmoeR
zTug{@mrF{k(}5fLh_-AiN-8B58d7;{?k)My>GxtVToTbux;#m}pz#%)842TxKh)yf
z7l}YbppMMQhd#yBrKR*V*~@BnyjQD&jo*_UFri-*qS&9fv6xx0=55^B)>sJ???klR
z+2w>zyBWqv^R?gR*&O$G;iF=1o3C$iuH|Bg6ro}=NuRnFUS!gR@OW8vq%6H-$(8Ir
zX=!YqPS}rY-Hr21?u=*?s4AH5Zoh1%BgEo2+SJ!0flLCoIQMYauPm<|9Jxj&U%-^6
zPqQ)tVUnj=U*dV<AqGcA(gX-dg!1~p{u09FXAf?Rl%{;4ti&8gi}Z6@XqjZ>mgA5p
zHgw0c$Vxoms3NYnL`_;~)$7wty>vrGWzp|cl(bA7`N5tBO&J+YA{UN~H3e*qbPDw+
zyXAVfPl?;@vv*YK)Kvo>Lk|3{_Rmv&58L=yGTn31N23R)#!5jrN<Q?ruyk)nY5uRN
z<7Nh$Q<uZ**`Sm)+8vV8NContAh$dmOT#Oco&Deh+AgU2fsB$z`EqXdCtKru6HXEx
zcCP_97yp||XlU?3N8&iuP)eMu0#MHc6Qp8U;)#qSR5w*auA$>kZ<i$OQ_3OOt;S22
zEyy9)H|_Ouq8dyOsH5GLodE}7TM-Ft5t0*FMDdr%#Ou}KD=Go4r(xPiMjpmUf4KVc
z2b41$o}_Kr@y-{-zMXXVqH#R4!fHDvyOud^pE^w=tw8u8l0MxwtwUN;u-TLdx$#zv
zlVO^Dr5%>G`JSAJ;KP-x^+@|`ckqMhcUPE-ePb__#MD06^&yY<)Z%23&ER^Np;06{
zWNWEt!7x(F$g5YT)?dGWgKtrb-sQbhNwp+M!bnLm;A>4Y<9(*7FJThfwyRT`lY5n-
z9sHEF&j?w$wgs6gZushv7H9BC7iMKS_HK20$4-AKHcgP@a4*#NA+xd<;y@C^CSEfH
z9;3UzG7j=suin@`pD<kt)VmK%Rj;eKSG(_ICy!YjqK#I$UFYAB4RQ?(y0#-m;I}F)
zrZX3(tQ4Rr>ztHz4&DYB>a1GZ$@>aAJzv4|76y4>!^&-OqAJi-*dq@~h0cQ8qA|@|
zr>M5Q!#F)ldNON94sjV6BY3e1F`?bk-stFEKmYwmJ3p-o`$;U3OT;4VHc{o}IAU1b
z*+XTdVf9QHiza$M(hIZ7h9b;t<1pC<%80mgH00?>r{w^U$jTs1aMPbvbjR}bC5l35
zxog$9x-JWah)pTo7}2nsXW2XhctI9bhlSkb<?o#=IPRV|$J{K|(G$RYh#;i_Bf~u;
z^CrsHv5qZXIwM_ndA5B?ULab(m>L1&X>DS)vfR3ZqmdKBHf$6h&l#yPYzxmbEM7S*
zIUpEx9B}5fdl`&GjZHc=nCsau&_3NsglxSAgPTZ1v*`D79d|RSJ93rS&ve>n-5%MD
zJ2KYgs&%U-YRii1AN)%s)7k6z@Qb=3Sz)e06d2cM%F?WX++`~Y7OL%!b76K>mhMi>
z&!E72$`5p-jBaAN=+RN<twp-*t%6qXc0?wOV^6}Juc#1M50LL?+=_|PQHmehM9D5_
z!!O@A86Ld7%T00d47*8QyxR*U1D0-|9M52@)^RJ5tV|u_8A)A7c~Cgcbneifg^y}(
zbI{*U<}ka+7UQZoV&(R-yx&xW!o>T!gNL+tgOgFC40&XMqA#jhH!g|x?S<&)SV$iV
z>+h$j9ar>(Mb;D6FQ68Q05$Cs1T@>uJ3~hXb9NYZmj(bIDN|{=U=h9`U{{zZER861
zRK8T4$e1`8&=}Pm(-d3P(zumKz3sWS%(`Daru|0#PH7S?cS>n)2PV$WTML~?{)8g7
zaiPU!i`phFk0nC6CT0kn(-pXk9(6JkJ2aT&JvCn44s9ypT6{l?j$L=Oe$|ay`jAoc
z%EHm!_D$n!mt=UKY5jUoTg4U4>YDZ-M#-g3PQg!2Jg!RxDsGstj$TqW2Tv$<j+#X|
zO?^{vyb~pa!~A0W$NmBS`Q3T<H<ETdiXDsS`_=bJ?OIPqr=%2Sy>i)|qAfz|g6}Wd
zA!0C;S2R~%HWzYE*I+TF!XUwgDB6IgDDhVUN&j>Jzg)e#wCtwuvV73{1eK3}b?Dz}
zz?2)@#Z&8c#NFPrUWm6%{n`Q@5-*~uU$d;^@sK4>zi7J?JRd7w?rAGyKu=L_*he0u
zLgnyA-sLc6@>vkwkmFja`E%ZG9)&aBufW8Li%5hZ(WA<J+hv|o8Vgz^Fj1(K93HOY
z+E3uZ*jdo{!;%p@zwHWs!t#288ePRmJaSdAE~}4ktpyY-k!o0T&|l){<(={IU*z<x
zqJ_`!NScyGbQ=sq(#&k8*&(CW7W849HNFw+n-EHtZ<)xUlst}N1o`0}wJS2X$uuf3
ztcShGnou3Ebk;yfrKeD?>U2=1M^JiP(i)cY+a(*aq(PxrT45J>2wnWRqOc13kuse`
z$126=IWgjS(Q)g8!uO)X)F||T55?8)RA#2cpT%ZmR(#C`OZtE{&3qEs2143UEscJR
zD-igaU>%lW&QE#)$HV=o+dj}&BsNW<<%e<hgIs>=S>EL{r9OaR<+r8idu5rwm1UbR
z6sCu>R-Tlp1@_z=#KJY(b%n{2ns35-FcZ+o_4pGY!MJcBa&>R{H1BfBx|eOg?F$kC
ze;JYw1#kYz)!%%QPloUI(gI)Z)7&?^lbeKJavD}%#7sHc?Qsc}o8g-s$7rFf(7GM_
zj$!mE2+7iCnJ})JuKfCa`x+WQZjs{2!czyFCL1%n_KV+6pwR-r{yfNQ-8|tv3EQ<{
zH>um4MCJ}n7b+<)j|j8HAL8mq>Z=wewG%pKgH$*GKX70|EdO9WratX?+K0ba*8+>L
z1HRg8W<^7+#*0H7;}QvDOV9weQnn7IMY1;0@*Yb^TNBL}XOaysJ2~N%^OpJ3a|+h8
zKEV42Ek3WXm@X!gtlkU7aan@as!bZYPe-T98{bW7JNuT2{nZLhTzd=9LAZJ!F9*WG
zPPv5>U&_xbvksZ@_T{MwGy>5#<rjQg;&47XD8*(ue9v9%QuOI-M$AU>OMn%5eK>OO
z%)YkLwA^5slJr1`Z6P1#Uc?i~{=wwkF4Fg7Q#rulN<fnySNG!6vF{^SjgjJV=@UL*
z=&x*nOxCP6k1q9)ABhhy;8iAN7uRW~)xg>p(lQOZY=^dArZ7TyOp)7vC?{PI=Zvta
z_HbW`IBAfdgQ`P#$P=Mq!6}o);hfj+7Eg&9bXnVlJ(Hk;1?!DZOc`JT6!@*=bw+l|
z`Mb)M5U^cOu^Vtb!P00Io#mUZ{EsGo>$^S=Vdi3P+buegF|;?f*@E|SL%jaoimJjM
zb5LKn?kLrjub7&*-3GU=uxYK2_vLU{Po_upB-|Di0544qN4s36DQV<>>CGZgLO7&(
zj`ygBO-U@wi2POUd|0_Y1h5sK?^#}7c8?LwRb2aOSZ*m(wKp6Vt#QfqN&UsPns*$D
zE|2w!3k$*?9dIJDITA`$f*o`xWRTUe8i+7Cu0s4?6}qV-qKf3=l4nzHQ46htx4=iQ
z&%R2C;b@$lr;hNde|^|mZnBB>;+7ANUbC>$Vu2pX5m#p+>{d@1ecp~R^EB}(-M<8L
zzi@E@NsOaYhjU9J6VQ;hnhO&Hu)sbJVq8L3ZE>dU=giudT-;B^^81%hL@&1q2;bZh
zhIk2bSN90<GSp#tR?!kJ37C{CK{1IqsDQ#wi1kGSld>B)Stq-T6*tj$F4+{^{7CrL
z;X{ji)AtpqI0s*PHFx9U1U|sJ4F{__e8|UPZ%1QZ=QaWHM@s75mt{ViG(;&Z%1vW`
zC*M;vbG18XO)$Rw*g5-_Hi@z<Wap+cp)D3-2fnlBHV`+`)^e>uSI_B<{G`nAclBSh
zUZFzRAg*<{3Alh4CPgB$`!&4XW~r|yiT4-O&V-!rzPVh2=5U!^r6d@M9?6cQz6o|h
zdLGBxT2uv1RX#PWAA>K2oR{qHb`CD<k+Dx82844Vx*caD+tiM(d>Yp)oRpZwA(s+U
z|H|bPCpk~w$t`=m{1R7$sr!65On?|xCwSjCn_ZjrD)Z`E!9+7&hYs4WhWU$zTo8O{
zCVS}Eof>pNTpuu^wqH_;TNd2A72V{7weBDwW43j6>|c~RqnfQd(A0%&_%6xn=_?A0
zi%QLMI-z>*?p4lROvp?ydg9RY)hd<eav^W2hx3FIitx22#N95RB@{Ef17XQRQdxj|
zI1MW7t%U1`f;+=j*Aw(ALcR)=>H9hV-Npa%^7MFJtG5tnJ=e>GT-5pSp38cDI)w3|
z8D9szYUvcU^|-H9`}v4GQZ~1T?u0;RgZg%{!5NSkgkeV*O4bwXYAj8+YDS*U35LG1
z=zmhf6e)y_?O1KL4CYC2%Hxrpoc|2*sj>f1vvP*|p+Q!+j|iezLY8s#$id)FWd2IK
z{Sg19^ka8iqu3^NzyEf#kieqFp^P-aW-OLLg3jTgbY280*S;a`s-y9Ry*`06$xA3%
z>O`UEv9oua7oNt?=mO~~yKjphsQJEKY8VSc+vcYOo;f(t(bfo*XbWrzV~d5Ma09t3
zTC(-=vVWv#eoAS?eLHzYi+N`TDMMJTn9#c3J(V}@+X>Zz;HJHk*`lp=O!O}qY-M}9
zPzihX1t233guH(1rM9<}5@nq!;U;bC93%N~S6x!J_}X5>xlmY_lI@6nplA^vOZFNM
zY@HE1EM*M1pN?z&EEkqGDo+_3ch{|`01inq{+v5>l1?>-+f`MM*TSvsXcW{<s3T=V
zRM@iuAw5<X+v?(cU3{eDWCA7@Ur_;}Oa{e@rFJHkzi5##u3AYnlj4B%zM*Ju*p<jz
zyXG<&-zT0o0W$+vp$O5)b`Q8#A$+%HbM<Y#uz(z=n_;}9bz3MB0`B?U>?lU7Zu`ht
zGM9$J8^7lKd~2+fJJs9?a|k%mCf*LaBki2jg`>qbAO;HcOL}`7j<4>VLJRNCl;HEl
z-Fy1B4c1wA?W(ILN-fRymyL)BAY#KYAO7dQ`orS%J^}pW<UzXqYJp*n1j_SdFh1fp
zcby9#Bw<dIFG*G&5S1}b%x*yX;su7GPqEa(Novr(z>+%O(yVjB<MX9?KKseDEoe)p
zDly_2_GA%~Oe_yoXVFltRfK+?1f!*7rFm+soPoC^A=JOtsa@49Y-ytwzV||G5et^I
z-9CH)actC$T^woEo>$p&N*{8*nB^rDKr6gXtMnFC+>Ux#)3ck|)RHgK-x#b--3x44
z)!WmmBctYs!k5YwRe7M1g79qYC@f#nZZSATa678-{`@Dpl{(8cfiI6wpH(#XY1^O^
zjQ4a^%uIh~@Uu**AIsGG%PxOWdA?XYtiaxmsw^eo0xX|sW<MvDKA$Fxmo{yC)=(YP
zP98)*pj*4U8Hz7t&BN3em>^W6w-YgRA9@}4(&Xg6HDyR~>WfNJ@L%tlg4vvl1Z;nd
zlEKJDk3{YKBL~lq<Frq%XYCra9FBR!MD9Y=_J`Xc>mw3$(TZ!Xo!&iRqLW*KcKPg)
za*iIn04qOF!6j?(wb*n~3DM^#yZS5BJ99dSpLjGe^Gep-+<qMH&xN;N6Y6^^5cy3^
z^RM??qW#cd^E*qm)q)RKwva_@u*0s^BLCCz{KzLHbrpj9In$ll3q~ebJK>vdyD8KS
zdwJ|rb@R&iJUWFx9(I2%VpmwabSN)M@r;*O4Yl+*c%MKEWs|H~?GTMHOBl&e{!0b|
zh-Tg3_VIyshKwWxi&FWlC%ng5IInO`6fRe%Th4~&BvXLA95CJCa)Lu`Ufy0dD+u4>
zs`#glPk?Bn#C0Cs98p=fwQ-_lih9L`iZ>2PFk6}yJBuad<Wm~6H@aSs<abHTgEzsG
zo9$4)c6)jW@=YKz<}bvWR%p++%g!A*?m(^*R#`Qj6i}qN<pkn5JuZA^Xj>DL5`o_5
zoRVhA(uCRPXRaEGY<t&$uZkA4Cq<@@9`$h^Bnx&CWrAPENJsYx>nD7<dfipk20zab
z8Y+~F;hEmWVzxyYxsFqZM42+YpeF(N22jV^p<9=_H`Vre#&K2>>8!(=9DS~!_NHiG
zMS_=uG~7k}Q~y+4r&q#AcH?{zV|U&lly(5Gy#ftere-vk5#yXAV+;)#l{9s3Z1-Pf
z!1CkXl8?xiX=J{U5yhwe1<M5hWz+zmOoOrOSgq82;T2Jge&3CmbtQJHNLxwc(gTm9
zE1Hg96=^E1K?Fg!r3e>^wnl%eHc=ZuX2(47uVq3f<`*g(nz=i!<-sMMPN`npwW-eW
z1R?Q$D*_9LLH0*sjx%rkqJ9tmO`S9TmIHc4p*;{RnIjOk{A%j!oydpg&OMeM6KnY1
zI`1Ny>J4(Ff?#bOFf`&;2Ou5j*T1(!%V8o-+{(FD8RC}&&1SHrrbjB`3)^+(1#+Xp
zEEu>`b1EvT#|VEQL`F~3#tPAL3{Gv&zv<M`hOhhFxsssa?TR8^82bO%d(XHg)4Wl5
zWUOPshysFuf{FrCq<0)?(jvVR1?f#X0YXF_r6au;=|y_4!4W~a)X<U$1R`Am1QH-Q
zH_pyJfIGW$p7Y`T@a~uSF)_KyzxNCELvd7d+%H{M2KvqnbMcOC(+bBF*Kgsz=;_zf
zDGF9Cq_xUo%uGIefi=gU<hk3VLg>YJs%XWN1Fj*0x1gBmXxM=8N3)m)E25bwpW@{$
zYp2`LD`SnC_s_wauTF=#>P$~C%;A&uiZpS}vdkIokQa2J*R})&%^C&9nluKalCm)q
z1r5FdLw!jB9GAW2B$_U4e{?Ote~Vjf_iwA3PlL5kooJ2FJ!3Yv2)X0dn>fFdCx!w<
zQQmyHvSYPuv@z-1m?)JS6;Zf-ob7J-^+<^XWs{E)tIa^F`-v%~o(;{&H;CzHmrbS6
zso$YojCQBdIv3PL(K8mwrbwkHq~V|E;;Je*EL($v<%zB;Nl4xH%*Y+a0t4Sm(G7Kz
z{2=P0p09~gBFy}ES7(MmsX?B`Eg_&6(d`a=BUOoiRtD@TPCq`nH@SCbGX;Nkk5*V(
zOG+SU=-3;nk4x#9q2FSoem2pltG8JE>b{ju(|MzM7Z*x;MDv=<#}jchFePPaCSiy!
z7Utd?c4dr57qz{Fe`5}2DrwF;(ZF@#5wi(*n7H{nBT9K3hCi*>Q^*eO3#t7wOC5ue
zTwgp=AZkzS?5sujZoKi}xLopx@hmlZ?e?887sjAi$=PKH5FHoq1jeZ&op5o6<NBG^
zy#d>^j$UU>d|`m?^=v-rILjU1$JcwJX*OXdqV99k<4S6Ip_`u@Q%s5)aDXD6IIH+W
z4TY;x8Y(D5yizg}Dv6+B6t<;jjH9#FjXW=3?t?lubE0ianyHoW1o<L?R2=B#fP2D~
zd~vSV?$22OBe;h1g~Z>BEA$w{>5O4grpB@bWtkT8Yai{ak?ZZ#Pamjy8*54_oTz^$
z9ecUER=c{Vy?;2s<?3c7O<QjC+@cPD!q}Q_LtDO)G5^C9vms<b0=L<)t7WjO0|c8!
z9q6giq7R}Dvf^e}xKHNTnu)$l*Rn1dP0~|6M`3Smwdqb~v4p-WLmiE7&=cgDDvYVa
zsk)K1l9vY;dbBp;X~*xVn;TrQ5Pxu$G7ML4Nq_wC38q?Z$5+<mth0-3LH`c?xz4Wq
zfK-Cp;GM*P>+wltX(1sUGi#33k*q27d?T2PZVQ2Otrf1OOlEcOkMFOddL|paDtZE-
zhM2bH&8Y~FWGik!UWjt{vygR9Csq}5aw2ydu2g*6V!F?rQ0_5mi+()QBeb0EjGfv^
zFte;R{}9clMEl7NRYnfn%Zz%Q8y1e<PAqj=NO1m;z!H-<3}J@B^_RyJ5G+Duz$T83
zg%pwGMQ>iXm2Xadmj#5nAx2G=_@9Sn)xkWcw<B2}XB15|P3C{MrNo!(tFWbpVw-7-
z$HmI7lw;R@1AeVtoQOZ&5lKy6z{lH?%j{6TaMaTe4&x0=O?njSF%yT6W2rH=NFSci
zp@gzW!)4A}Q#74N1lzV=Hu_d$;tZ?oIS_%earMIJhqK*AdbHhW_MX1it;sJ4Uy(d3
znRtI#iZ*ux5kjggIP60lZPk={uC!g3(Mazh(1|qW^;WSX(T!nt%E0T!V?bUtoAe~#
z-82<=TWFbb8|4^rNiB%5b8PahlFRR4f@71e3ku99`83y$6PT%vPnK@iidT=n0#MXt
zi+<I;fO+zA3tz{ET{k?TqaoX?V1BaDA~>y)`<^+jnq!IDQ_&T5#mP{%K(@M;Z{?G}
zK1ez=ms2{dHP}NA%j1k&Fj)I7JibLk^&u2I7~Oa3ics*!HNgU6W#O_K+Xs5*e)lDS
zqB4(_JQhEeAHcz1=iS3?Yzz)#|8>pfF^(!5$VPHd@r0g*QEmwd(OB{XdAha)st5Cr
z(spAL>-uRz3Qe}}wiCrcIkH%c%P$AK)iauD-0E3Tq|+;}Mb|6lo1XOG(JdiBRIQLJ
zArcZm;E|M+bn+v=-WEQW5#u3|cQY58{+dV18VK}>^1Jh#;QkbL!9Av6Wp}x!gEyxK
zF}K_3u{#1Aju^Xv>o(E~HSzLteDc~`7^UGb`mNJ>fy6nz8d&DrnLjqun!Z|JO;%W3
zST%pYD7k&sV?AP^!>xelws4W{hM+oP`M!<?;AerwzE+ycd-!ftlw<kGlgSiK?)qJR
zC+hfQh+*MaMY>TVNxbot00Q<ILO_d}<20PMCu%!8*$BQC;~p*6T=mfLR_8hURR8PY
zr#q@r3e5op-3cW6ob{`TJ-Ilf<%`%5Lu5?x#Mb6a)!AJ{1R00~Y~0kB^c;;eJonV)
zI){0G@0gx$C6G7_!*{V^jPLU08W7M+{x(B@7L5JV#CkMw?0Rmn_-fp{wB_fcx~-p9
zLPkE%GLY-CF`uN56kfIqP6TQ@at7^O$Tm+YU@BX&4Z)e>W5ThsxItpyH5!`yC7vLX
zINNd%eg010Lnil0VzS7Ih+@sQ+~u5De0r06N8XaFO*t5y;{$8iQKAf(k(&JzIqYAk
z&Jv71Bd<xDLoNd~U9p5p^Ro7M&<O=sg(r6>nm|y$=|?x(Jk46R6U3N4Yw0pI$~2vW
z@tWqEMc8`TlLxe#2DLES`GB)Os@;(N(Rcku{tnp;Q$`jE;24;CGv01&_$7Cw5pGi&
zgb5;0Lbe{uniM)2wXm__S5{H1`8Oy<M7tI<)_HPk0fDJ_&at&Qgu;n6HclC=NO#Hb
zeVnyK>Tng0q%b5)F|F?UquA5}J&iGD5{tLuzfl}PVWC#Il|ZJ`p_471hQX8B41xQd
zoeioX*=QcvG>lGy(6_>Mnhv{iUlrlJ9?`ae-Y_kZTnWTz#EnYUccqjX#*3SgpNX3Z
z@OaNiVz{d1(_0lgH*A3-412gfBGdl^U9!j%jSp{u_^@NV%99$0?kqM`up)15z0cgM
z>e!qsc#`6;xnPaFb4@^M&zt6Ms4?j!dbS~`E5sJA<)G)S?^&_!X64p0JOD%hf-?iC
zWBAXBd&}f##v&S$wik4xyT`|gHm=Q6y9{c21aI?IAU&21CyoWe{l&wL`VplXowCur
zMr(94o9Dcht<^?E9Dw)f!&Y{TYT<_Ukos6W%23(>-(V93h|#(v_`p!ed|KGJcU3fx
z(PnFzjV21|EIHFZF0Ia7gN=$frPgiUy;~hkX(V@>dnYM(&omWo#_(XidCbHsG>H2;
zTH{tiuqf@rV-laquphH<cQzjuH1*p#N^13g!Ny>|66fjGf)Qjm4(+Ociv9Shdj_7s
zcvHQRW_zbOVb^{$7QTjqn`o5pEM`|V=x)h@;(z*l%lo73P5K$RofpwY3e3}U24YU<
zGVOCh`{omVhB{Zk(y|-JDnvMX3x$H;ubcC=T1PwK(>r%=tYQi(&I-jpxE>OL+j$Iv
z9x=;!GS&SyL`h<HLEGd@Je|ln?Rnp(f*yxlr!E$wFL80nbmiwZq~1suY$RpB6_Vez
zRkg2Issw@&^?}HWF=~KN=bw&`8f!jsSk$?hA<UkL1=}>;m?{K`qHZDl1Hkf6@N!KM
z3uom@2`&yI+fR-~Lhet+iu0|F^vk8PQwJ-SRceTenug*-clzecK`e!?=!*jsy6_bW
zOQ`2#*-9)13a4L99cn$N78Waa!sC7ZUi@2)3YRnhGexF;wEP7%M9Byn=s5F7LrHe3
zjP6`}FSxDZNqPs2F*b!<d79jGrG~QE*+;!KRiUCa>RtVzt!slM7`t(kCfK4`#i+&_
zH8b-@jvB+)l`iTX+c?X_?4e%ku3)&<5)L@B#nv-*snLLox?#)cFjEwQ+`Q?%7ZjUF
zY~f9siS3fSx=imZBl@_QU+JdU&edfh=b?E=YD@on+eJojXsziGF)RH1z}KX9fiM&$
z45)dkBB9>M_r7JJj)N~gQv<99pU>E+Rw{tBIq2GwO?o7Dm_oAAZKM3N(I=LjY>1;M
z$TAY0?>s%mvZ<_QJyfi{4LB@lR?!(@WvshNHL0J;<M+H!m(@a+VpTQk^;+9$w;A-d
z<D6tqNYw<QmY+75G+`Tz`yrOW8wFJSzkPYLcL&*;?Nem!f~T`XfL1fI2{vw<xjV`&
zNj7FGIHt2r1A{`N`O}TPZmIOrtmpIT%!cAKXW+7n#{4cOs)l4QC&7C+>anHbJX3-1
z2BVg(ye(UH5}*Tl)(yRWuguv)`_6_$Ycu!y&z+Fpsq5EC?m~X`ArB|p4*7maCU@$6
zc@2W52r4-!o9JypLSE5^)h_0#uHk=erPdz9v-Z^dlnrSovdh^mBF7N@mxe4i9p?)x
zHzPF#PunnPUFB)%c1&5e>*M74vMbXPoK1uTlVAXuYJ4Zth|!8Mx<AQO`HNtwI8--k
z7Qb%wNjwKAx!ZZ8n~J5><GHU5MF5YIoiBYmCh{>S_3}97H`?tI(J{!y@OG(gczH67
zJ>~NEKxy;!3nHZpy{Q=Ex>uwm+Y9!ks()D7nyOc&`<aXTjUDP{TKpAM7XYvMtHWqj
zo{@W-We0e<BdkIJQl!7z=zEGobPC9o*u|#-DVB8S<{dJNoDporLjiG(?^kpHmXQ+!
z;YMsL&)oBts=dPOVbfmUekO-2u>Ot2WEwKcY-*W<4z)As+`G&9y4<s~i4H?5b^iQ(
zy7T#HCuu{}raiCm!FA|~*zlOk#^uK+cCWW}h=*1f;(PdnTZFEZ@xr~IShlys0!|e-
zqM%VWCOKm>m9)`F9P0XFzBKCxK|L`<cU$J}WtX>a<UXMg%VU%tUjC)$uowEM>u5e5
zd)R2&aRxfS-nLw*x>hAus1o_^GG{?Z^jwCi7|*Y8=d40LVl|)o?r&e@x1V@jS%HzQ
zyL+0hkEQ4gpshL}AL>M=Zil?eZj)2pcsKi-0Z^dK6GF-YLGKy4=wCP+-w;6uR|OuS
z$H{!CrfK^csyF&u$R&bgPhi^`8}ry;%Fxd6R3fOiqk}#+wZ)TS!fzq=WE-G9=(VW@
zHncYXN_n5VZI&e5*xNoUC6bcbxcw}0frI*(uZ3;_+PuArt5)f_QOVhnFAQ{7UC+Es
zk-2s4l;2O2N8X-RVgE+;O6K<$pZShnF!+gaqQv~g?JB1uhRYEJTm>FWJ=;BAx^?{G
z_2T^X<zvxg9In@;w-#4LX%w$dsKrRO$zfZf?>lG7$J5%UhI$$dHEgG+z-Oa?vEs7t
z@E#yy_V~Q>Ljf#?2W}#0c7G6KFjSSJtuPO>1iOqcgtD{e8|@TX!}~_#LvP1+VmNay
zQh<8KU{k+xn?*x%qInuN3N7bM)gis1^Gh*tg%C?SZw9+dSzh50&GNKUPVd*Z^Pk%h
zHkmON)@IWBKFt<;OlhXV3R`Ux^p6TFKNR~zZjY>O;XBH7JQO3+R7f#>6*kI8wz{yM
zW9e&0ufvEoZLb7WH_OJRFPKKtFb`(A5L#5|c7txwC@nNVBBA5A&R$hW&E(A7>*{2h
zQ?!&s=COXg^!6i~VT!$wop1q<{>N^1PT#o4>xJ5+U{w}v{B%38gwIl0vkhJhYoElH
z?1=@H#xp5rh22xvjPJYv8}G_s!d}dRMV-GL^Y(g&(%DgZxBRhAeAd=#Cfo3YvDJsr
z*Q9m*dO^6kkY_ti<_8~lQO(b(-etN9Rnp988eq#7nA#8$=H>hQt}Pp$O%ty=X~>E{
z+eSQvAZFv|@yS;7oM5os3?D`4ijExiNjN%!#Uolex+_eRWFkwxC>$2&{Iq2a@DBTE
z`ylPYZo08aC!=T}ctW7Gi|DDVn32?8!=}r2&a&v(0k&DWThym3@C;uCne$xM9ZTQy
zdkK%N)$y9FRXRP8Wmad0u(io*Nxv0OcvXpg5{Xj)m_2A_NF6HQG6QrMU>BqjVGUPJ
zeP!Kh#Tm%8`A)Wj(#%e3+S>yeWUPPEPv6@78|>67I)w#hbKL0Qv>9W@z$-$jn1QI_
zlU)8ovo5=x{yt2zYvIN11FYF{TCpAq9|GrFB}=3(tcZJG9Y;eI8`<)c>YjYa7#Htg
zUrhcbG@&_{j-Jk2Z?{@ojiVDzJ!I&6iD?Z1>M1h#(v?e5&>q(-Y^v^ILWq)J;!`)Y
zch@j)klK}bdo|t7w?MaI2jsE_-9~SXGS%PvZj=+w_-WiG8*3R%B)pg#D^O6=Zz5A)
zdY7ePXu}httSM%ZGdI0W`DF>$DV^+)Z{`{p7>(3#7v9Cxk~il{=nFHev9M8({wY_=
zta+oVIE1cO!VTw|@+rgJZpEe>$z(jkgxZeT<cV#cw#fQ9oB6VnmI3cfu7K^-5!BM|
zE>Bn!e(^pgvdN=UynnusSdjQOdWNw`6<H`I%_NzFzpO{xNp^2880cLgX=yUXNzeF8
zbry#9{mO8sGSq3RCePYu9J}0SecS+Ii6m6`j=%WX?ebIfnEmEQn(N{x79%ZZUuMsj
zIT-rVqQ7K`h1-0)vHn8}=1j3nle)rXaVp}gWq9DH)G=-L2NE1(&S!ME1J8--o4$+E
zVRliQg__H%Dduq#Tg&GYd-J#7g}mC?O7L-kIg0Y$3=OKOtA2~y81sbb1A*XHh4mN>
z6)d>S1D1diZ?i3y;Vkw;+hdX8A_~Op)^WB#qzc-dJ7-firHETJoCCAy;k6NbZE#Ht
z0&+5G$;{taq}xDz4anEOG}I8vE;pgwM|wK*V%1qoR!!n|OlYoF^(7o#4uzc~l|v}b
zAwb3#>}3i*Q{mU!(gmg-d`z7ntO1bF`S|;_cI|^Zh0t@q{p8oj{)Qq|>??u0(MR}Z
z%($_#f{;bd!`XK9cZ0&_Q;53@zaJ-j<Ls*zyEpUBYWwNW3!lF?a>;9@`Ap6zTliSX
z3R|0f7e<e$=|rIIb<@gO6`ms0{1Hm^*(xS(eb(&0>j48fDDk8MX8wh`)wie3j)v|y
zxh~!1ktd0`FPHaE0^!*}nulCqsc#|HnD(Fno)t3u<Lu)^lPU{qx#vnItLgFV8xJ1F
z_w%s>6@0|XrS1X+Ze7oT&>`#5gwA@lmFdBe)EC*3LWGS|){}!~+ch&%k*%s=_7w_#
zeOmk#4&0ix=_6VNHA;P%uFxYUi7J~7R2(6yqV4XR6Iylz?acKNOMV@%#B4^{*`0!I
zIepC;!M2RTckIWjteXYj1ucCHn|lU3o={up@@0d%Vv)4Oc_N|F#bopmM*NP-;m}FF
zLYn=i-LTH<t8b*kVQR8!!B&}Aa0i+t-0>!4{L}M{G)K-b;%g@#7JsYt$QEU@cr!M#
zP<q{tx!Zm4uHh<a(BEr!O*0uo!{?l;*VZa;Z9OCSPy(xyn!!|By2I~j>6Gz81w*?=
zkw{;eu0F0`5;{sGNlT`O@6H+9GaJv`1`*}S1~(vxV*jlNh4C*cX3P=vtLgrB*=G>V
z5qjpnB&g&KgmlyGsSQv_lkRXixo^xe6YU|tusY5IWMdZ}P(_Dv!L~k;2V0H|8{A|k
z=&)}KmCmD@K82iJe$OOGqTUf1W;kudy1&NT@AWZfQ>7N+dJW<yOR2dKmI=d1&G}kp
z<`Ey;x-UhPL|-pln+9`tdfk=s-hqL<HHbD!f@>p5tF+PPNl;RnhVi*nsI%OKPs2SS
zbD<lEf@LAojkDvrS&YneS9iZuZRR+lc3NG>#~{cF!utW1*x5lvs=QI?n^p;fF1b$W
z#15y&y0QGU-?dX^9rUx8!uU%mnDg(7{8_`!8;b)8_M1ZGwaJ}lZrsZ%kl6afDK7kE
zU){|gF`g1eAR4hKzjMt_45Q_1C9C$X#`4X^yHFxd{tC@1+W)-+cTx0Kgu(;*T!C_x
z4U$b^#z;<d+s^XHXERoAE{BrV^#rT>t5fZu9EAx`c%*`m!o|+pAC-pI<<zAmfNEeU
zY&UwrOumUoN%DVhM>1@CIKD9IzznGyRoK;n=)SNojz-G#<qzS$4v&n4gI2=mzp5(`
zEjKkmq96{qt@_d>Rk!H(M#X`ZSX(>`8z6u~h+|!BX;+w5$M2DFsUbNENzCDJ9g^>9
zP{?l)#aS$}SVtEVrqVm60x|DZ)}+F)z4CpzDX`AnwX;=WVOwglaEuk!Z0pw+5QxIS
z$DrKUmblp%-qhG81_%tOcQ-j0XTBg>g>qA!-PHu#x03o}c+8!lnb7%HV)MjCFIb3g
zxot&AWwty`Oe|8Q8D?8yeULJa91RS(tsJZ3Jr}K<jr0=v{U_TIZ%b4iK^Mqn<|Uju
z-_`?@7xTs7sKt9i71e6ZjAwM&2HJX+x7G5mC|WuNFKYl%iC-0Up(3em+8jkqZ|AfP
zmz5~qZ3_L`o3Q#`XQ*s~96#yi#QL0`<p=y(xFWJx3;|xb%~al>%JfYbHg>{xq#<La
z?e4VA^};h%DH4cb;w@zOPi1axue@|Hj;TyDeWW2>M7`bd1F=4Ux6>Z`Ii2Ns?AOQo
zMq$#^11uqC5*8Fk7w#Bp0Q`ryW3}G@4rn6=qtGTVJ!Iz^zam%~(m*K`n{8OOWlNzi
z<uz4rc??9Auwb#`g#ZKL$L8bhppd8&>1*|@QWwH|4atL~H~u?uIrVI1xFJyE%py8~
zw#kHf!C}1thiY$=z5y0cFLTMr0_egCVgL&jYHfA?T~AG;J#`)TGJ2P7j14dN)Vt*)
zi1-SR#uMT`@jbLASaQM{d>z17=!>DL__MFJWwDtN4bs&cErBUjo_X~~43glWPhlTA
z+Pt({-h-xrex@N9h=^M#^G_PQo6uoJ9<?I)Y?5ZL*V4%^(yg}m?~sKhr;7Z|K}47p
zkQ~3=of3rJg}biEz9=sA#E420-o|fEG%R$B1+Q<rz38d~I#1KGs4B@OjpglkW)VU8
z@Ek1s#SCJy)wwp~eR40nT#`PVVzPLr+?169!>I+CfQt9`4SZF&%AVcwZq2kM%H?|2
z`U@?EwL7-4*%*nZkHj8h=+@_Uw_uQ{<<W0#uA8kp6NQtUox?^cv~4GEe=NH-zv<Y+
zQqcB#ae5P=Arq~nWCxRE1lg*nBDyt8TKb!+wN0Cpb$`ueYv;DE3a9P+bGC3Vdl$|{
z{dweOYV5f<FV_~(rG<FDYjAU3AM?3J$b%EAaFrTtaj8eNLYLSbRGEc!ojppsp5a71
z19G@8N5OkbKr`FDYp#4`mWjCV)8Q8|fKvKgs&1Gm|6rkkZ_P3CdCsm<;b<Kum!=0&
z3To%9+}guN+EW#UDS561n@?Sf5!y(Z)m%eZsF-&T#j5VOv-7W^I#G-9Z6U>y?mnDV
zR>>PDu}9w)BL~z|mob&z<12<a$w2(i`?(Brd{NzcW5(M$g(WXOznW?Va-ze#Sc?nn
zwoD%5dap|;`@w9L1$5U=BD>lAxcp^<h?>mWrFf0JJCejS;VehgdTT`%zjT;`X9v@Q
zZ)%mBgWB{s`;yTlppmn)HhpXBeLc1DN@`wuAhrMj!`A5596A0qmyhnz@iYW5&3MjP
zVvU2I+r8AI(R0&lYn`u(?N-JjxQ)p1u7tNM%FvfFrlMx=F*)82B(qZM=D9n`MBD6+
zyM-L9S<Vhi1`6XX0!|YZQvAZy({BBiGYtF68UC=?yu+U*DiG2mmmcn)@=!we%b=3{
zs2kk_v#Y0VTp{`yg>`xObHVeq(Hrg2L!dlC(x{A3vSFp=LVAkABE!32_n`L#%mMdU
zfa?v7CTr7Ww(KjluI&}E41!sYfA^*P^rFw`DUPzF#*nuxP-*5a278au5vI;V>!V{F
zAe)JSZrHwiv#MU6gHa#(!Fh4jcHNiwv|!A<Ikjblwwjy%w}+HjM3zS8vtKPFiYr*O
zyt%|!q>L;k>8I{?X1q@V?Aq;$O=Jnw8?&1%)kK;OAjhDGOgQt*ZE>wz*HEPek+%=m
zOa0`ReT<>;RMy;C{Wtp~z<?q@?q|T3du5`<SH<6)P+-7~d0dvCU#R2eQ`qV|X0bki
z?wI|V(Fm$Gm`9el7S^EyzPv=capi^7ehSG|mN8W_R=K}i;k(^I{~lW)h%6V87Z80y
zS{1|zj@Q_A&%kKAGm^TJc%_&4=e`6&g^O$7rK445-rnIPaS`I6y>kP@Rv+G-I<c<*
zq<h$VyW*_EeJSaMD#_25+F@~S2Ei70E5Y7{ueODG76v5X9iZD`cLx+jJ;%&Qt`Nl1
zdnx-LF@H3HCb#KrW^JY3fv7j&V5|8xc}QovKBcKoh!n|JpnL*a?85HdLF#!FsvFt3
zE;uBhFu}A{!U<_*C9O?^Kx8C{R`8rE&3?JhK=UT9KMwhHy@$<}3qpCFP$0JZ44D~P
zWpP4tslG!T2}#ELGkz?yYNhF)y39_OsG;Un<1x*`IhK$&(YQ*I3hGXFRTOhiwIo;<
zSHHd3n=E3HnzK3AWU(L#8+(=U*c%z+I*ISibl!O?V7QF()Z~H?`Ba#Ho9Pn_%|O_@
zR;a2}MtJ8de4KNHF73z0A9VlXbTDH>5c5C&!B2{cm{=UH=(-fVv~xWjZ<r%hpRxB!
z(9x6YoKLkwL~~>feH&)07}iL(Ud|zxJ%+3bDtqDuNgGIkhA_7!N6zS_un-$^qshw3
zcsU2%O3`k%{J364t`mL2ewKTfDx;<nkb-l|!i(y1$CGnNG5xkRN4X7FYuEb~c5E4t
zF(>Y!w6jAo?8y}rNS5*nIm|fZe%w6Aieh!)uAA53BELtGdF#vFf$2@}z8<d>`p`a2
zychY~7w~`?(9Pvm0qz<~o{1tBD<+rnAPY%zJyl*eot-=MPeWqyQgq5{%(ZSy#eu3H
z#2QUMxRn-52fd8Q5voKl87B_xY5fyw;#H=^wjOVXI^Q9pwIMP)kYErEEX}FPAl%sd
z&TyD(X(0O#(Cjzc(?o%)u<ly@KWJ%ji3)S>*`zmiqB((R*w`TXhcFjl`?jEGr69YC
zL7b4^6$GUQ<mLeJ?UmfVRRzJZs$r|n@q!#I$iN+i(&)W#d&q0uN=?W*lPk0b4q8nX
zqfG$8&<`^(ltC-TJ&L=3fZ~qRzKOeA(yQ2QT)ot^zOdb+%-#!!rm8Ya;%?7n55;D0
z3!w%rP594tvc7#AmllCk=g=m*_|#m`s&aQwL`u8f6XTBH49wEv$Qqz)xOpc9s#Ld_
z!jl!9=y<26NVE|g+~C|nh9}didAp_(tBdP73ek1x;O(bEMoufb1NR1)Y1;X=g(}Ai
zbjfMp=N&Chngavpd=14YMd0ji@nJ$ede3!|<c`qK@0`Emk#nMYLLK$)hf1ldpEG4v
zk0;;Mea!a{IVGcf3{SIs>HGhEekbGWGfM=4iw)k3XOOe}-?J3Q_uFP*cJgOjHtGe(
zm~;CGaaQ8Drg|9G0#!w=v(Zmt&@)x*Y8OosAF#r}ToeeX@`A}P1<Pt`?GHFUI>WV!
zxP;bK!nA`JUZ)>uj)fh&p1m$Oin^m)yz^MhXV}`>;kxNXWwlum+ezt6z?T5u4tsWY
zu*_$-(z-Kr!+NU5T2OZ=dA)>4NLWj--dWnksJ>Q#8zqRoU1_5^p(4rJ`1Kq70kyq;
z_@1P9#k5Lig@_MlE+r=ptgo*1F9w`uJE!8pI;$ooaKReC#>b%y#2;LTb~hxd)17PO
zZO7}YGEc|CZ%X1yHjxuy?}t5*B-?%zqk{@DF6ajPM8noMaQ5kC5;97`25{)B0_FyZ
z#VQItP>s@B?>eJpB`B8ZUzinJz^tM(sab-1Piy_;SIYXZH>BnTld_2dzcAulmC4$7
z%_zfpR0_mdsP=G_XRPVcYMDQI5AdUWlt$3d49TkIZ5`iy0gyudw6JoA2+C&(@pO`f
zpSlXgMuo`<7})HZNSF0R%TgLt(ByH}I#!!#OZjO-o35>2D~1d#-Wd#v$R2pUEBC^!
zXx|ZSZUnL8#sEc2P{B?3m#FE~9J)IcJ1_E7Say|$_s0~hY)D(Tx5;%=pZfI1cqQF=
zmFl8h?e5Ii4Na2_l35;@D%T>gbj=cMju<4BaMPn}(Z0wtS<>m=^>nUWtAd<}NC9Wl
zwvO)Vp#j(@*fQD^R_8DT9>~~)8fY$vQF_3cbkb9ifSK2Uvs9UjLnUuT(7XoRQRU$5
zGN~p0)ieQ4mHgu0&}S*Lek<ADLc=7_oa)!QKk8R`z$X;o$d>-DBIpgN5FsdIqsq<?
zt0PqkZDQ=Yh1J?aJ6i64Am@zSmnpribpNEI8eF}B9V=`m^x<$zT4m<<3ok&FoRJZJ
z)a7EhZboDEoHE~PC|ar47A>zf_36~rPVX^%=MhkG(5G?hS<F{Pv)Xw)-s)3ErO}1(
zNHaVkM(&IbUh2xF)d>G2qkcoY7@B#N>@Q)=MO==izQmZH-Igym^f-6_u!b~^M|0je
zyWZwOV^$!~M{9+1W6{!T0My-6jXO?pbPjW=7E~8~<uC4gWp2yOQVP`NI0B0KjqlU#
zZGiv13={YsW8^)mEhs(os<W}2O>Z3Zz2&-zg*HZB{}3B(>OM^hFvIbh>9^|{<I-^{
z&`;vq*YmJ~SathzN$wMAob}7^Q?m<}8NJH-(-`}IkiI3crvzNqI!)xUPruV@a~$o+
z1#h_W>Ci7JwC@Hu{z1UUOc*#3V(Fxw*Y`7)gh8SKYzQfOCs)~~$j7F#g%LD@F~bRc
z12K*o9RatZ!)#!lN276a+m#?rHbU5^afS{=x9yFTDAVMXr2x-4sUjM@EYr;GKK8Jm
zA`8jqogTH&uv*9taukQOc2`=gSA>`c9XZ#@b$}M=yQvU$6vMd^!1|q=#%V8FcpEB+
z?Magr!0!qqSa5I(8L?!zzFUgCUg|txZ{0mS?DLZRWp;c(*ue|7smL3ayg3mSmNV+S
z`HHdsBd3%!SG!oq@&_OyKll4gY5)k_^B_xuOrKHeOvyH%8y800klgrEoe5*tmCmsJ
z;%2`vkpyVl(Ph~zKktOYBlyl={T$r<(BZkYg?G%MpnCoNtL(-S{6Jsx&;t%AoR%P)
z!knAL$iMc9<LBsl(^)7;^mcY#aM-Kq0>;xDL8e{~vT8<oIoOv8jI5zo($?5c#Od1V
zpxfFuWebV5_TDWR`J`0NpQB10_?q9w%l)&KWABgqpplF1Ohe`a1i>Ly)gc!uS+yol
zz}%lJ-yv%<#}kq(L{JDQlAlFr>1#bI$>;9F+ify=)$eLqnHqaFIqOgjvt@x^g&ngu
ztC006J$M434SSV^rCj%fc;4Kbxwf3$n=H%$%=V4OBp7O}mJiiL5Vjs2%bE*yLo8K8
zHi8XsS;NEv6NfYsiqTOKuD22C+9d@p{#pfXs~gnrd4W7<H0%h8Zz^9bwC)Rz>^GZD
zPM%dY5O`G9nNSF{Tk7me>^g18p#tQ7E^Z`D7$6@krBoW)iVn}~J0cP~b&L&1on3Gw
z1LMl?LU+IAXu$e;Yg*Buj8W&%<B;;QSHn)?m;5C-6xqy;U46}4Hs_73eqG;IYbl>!
zOs$6rPicdI0WLSwt9`EDJnmJoz-H>(w0(oUYF!ua#1r-@Kt$9!Q;#gpQM(lKvGqvy
znz1<adR&r<Cirq8?Z#sLp3zQUPQ}inzxm#iVM82$B&u`9gWLG-?tL@DqeYSLryL17
z+S5o{9O%qihSL$e2Z^WA&Ip(85yoRTtH8?T?A$7v;<K)8*iikv#=&rRS9|3|cZnz=
z`KjJlieGCSr@O;-_&xD$dICu!Qj&M#ytESf`Rcv35}llO7^g;%j^0-bQSR`LjNV(`
zO)?GACU}w=0^XVe&kQ}}ngp91Vap9hyO{Ovw=iWMZ5?xFZ4ChRhIrb_GIwX*Nk=%l
zbZx;wkWjtL>W-rfOsP!aG@@uGCly)v3^m_0gtcjS;96K<zhdoeG3gsq5H@`241B>c
z=o8!9^_UlCtn9gi9OCJ#sz4PoHZgWHELVFus8!;57mig2IsX|vu2=RIcV6JnIaz-k
z#;=dF<c9pG8q4XB4385Ek9q+`q+I>3(e!B56Ph2b@t^l54Jx)t91s>Gn#b_D*F*^c
zB9c?j{GlLdMVL41a&V2{RD%E8D2XYh&=0I?L2)d6?5ymb@BC}T(!Chr%=RHMm^Lct
zE<YPH-ReVGsj8%?h)8E4d7%IH3X^k0S3F2T77If$m%d~7r7IR4%^bZO!<8T@-}wU}
z9suhfc^w15$n{>z^Z0V$3g@Yy6%i|;&t%oy+xzCrZVG)1x@MQjAF~SupLs0!Gq;!y
z*KvuH{QDj3{bAqz#LsbTjHAHj!0LbX)gO-s_*`lIak1$C-Osgw0ciO$-_xWAq|x76
z?f<V^(*sU%;~~WQ@o|+nj6J9E<=st-Oi2!VJWkpsSKz5}FzYicVa7j>b@?H8cAoym
z-bBdufD*=o(0_FF=rHq|Fr6=3)j)FEg<_=K^9~|giE-kOOZ@oB$cJNNgm2R>o(I?b
zyU4yn3s9Hoqk_M4{vS49oC7XISm$5d`jKq^#wY*X`|o0DA8!8(^S?n^W_oD?uQpM8
zPiF4}v0vi@U`Q6)wTg*zzjXd#Ju`zeiR?{u&t9lr^vDYAp8=46hDL%bfC{s4+G@!I
zs_npLf4xI9^G#q9F17gEbFUAECI7knzx{)^e@}`SOb~uJ_dY85%N4xinrkE%J+J(y
zk=#HvVlY+biqth@;yBWy*y;TaBquVI(#2bZ%0GD~3>YYFEGA)FMtcYVY5IWqBEV@W
zdg$|gSl9#1nT+Ozux;{<4>e*12U22xLao{V7r=H<cVOJ#sqiDTWGGZK&R!6TOZT2(
z?`J@j6~ztx;Dmu_*rx0a=iU#7M_3>r5S&zb=ii$Ik7WFOUQ}L>J`&IcOx%<CO-4=a
zLBuwPiq?U}o;5FDps%hMva3J1$)S4!u7_@#<;7<=QtL>IH()rMYrJ?WTO`X<S-TA)
zw3AV|r~X<*Zj(6P`*F}wj$=~+?_#krNS4jrWu!#m{=@IBTfaAIba|uN?a;*z?Kyby
z>jSMkQ81fd_(hQ!7^JiPHtG~d0g~A{!`9{chJDasF!H(|o$|YIu$KSu-hX*ZzuCPQ
zfTQJue}AaISkG^7uJhf$$v212O&<a9;I;pUxz4tS0QVPX{F5aA-2Rawz<;k={`Yg8
ziw>{&_s-#WZO@#(@V_72U32^(3IBPa|Fr$9>wvgY{C^nS&G26Y{~eV77s3CD5)aTX
zFq!?o9Q^mB?teM>@4@LG^mU5k{~HnAdhI_8N&>SV4W^OYJ2uwYTSW2ie4V2;KW8FE
zz}{Gp(qAc?(Q29wyumMJ><HgIz%AcdtF;JFnWf&#i=zKY9$D$76}Swfj<u?ituvEU
zY&<8m42(Fkgau0`3K+i@{AfQn)6-fpe%N+E9&($1+{o|vG2h*{uodtO9>-|(E?{|P
zJT?Zt8GVzs;^YtCZl=G;`@Dl*!rDFlpt@TBC8^@cu634{RDHG7mM!sH)D$rnmF2V8
zt~?BL{$}pwm4j!}<%(aO^V(k6-x$5OMX-q<)3^}ab>d|^9651|V}nUoG>ba>hUH>z
zaka^hwvL%GV0r0DzaXR@I0V7Qug5;o!G>%R(LbeeyD{%bv##)G-NT8JM;A|H)wQPH
zUrGgMX|Sc96?iHu54QeE`SACiH8WrPT%~a3w-<-Vz&lT-DL&JPn_{>?jZwW(C|q^y
zAU*SzQ9&<8)6YC{;jFS*kJ8CCU_T7WN3N#<>j8ypLmxDM{q7wTtF|Y57C79TL)9cI
zdEIE(pC<u~YDtBw(p#+2ovAEjHhfypq;RGD!QFv!>92IagDRxnaLGeMs(=2a!uR4O
z;pj7-RD0I+e&_)%+NF$K&(9y}o31X3{czlqdq&nXOZ9_W@fsX85Dg4WRFd!1uVOif
zC?G`o3V!htbsasq{7J~cf7?p{UIVNcyCin+L$m#lU$jEq;&9-&d>*ZGaI=yVC(qz+
zaWK1<o-jIp?uYcu=S+Z^ZvOCui+rLCr@Xq8X1kSmC*hET2b7t93rU~4NB05l{_e-N
zCHFlCCE3Lse2|BlpHUGlA~b4uYLAEu3Jj>;IO{a0asZY`TT7as2JbCwwnbI4@Wb67
z17`N!GyXb%)Up3lF31Np6YLYEX@y_dF^BE~M}B(C;KV^p&@)oqXOa8(g1fafzAE;I
zg@*x?)H=CP@Q|MkqN8t8*P76}JH_66{Lp0_Xz4q?12qwx(tYrEz}E7908rJRURb0$
zbPhGYNqV`Dsy9AB<=-9hV1m#3%_^v`rUjKcy;{0)@OOX*UkOl>!Gi6fYLQ1R$6iT&
zr=*?C(W}SS#)AHw1#oZyAZFM79kLl%?C9Bp1OgDg0}wtdO9VeVbQ3?FlV{BL2)|+~
zSnm)o0w4_u`;M#Wt}*4qE6OYY{!|5eg!Z3U`u<0=|7ii~KaTrk^y;A_np1`Ral}RJ
zDAVBun;k9!M`T>JPuS0WM^9F0q{Z*ysK0~X6m<}>kL*qk0-K#M4i4eB;W>yuU=cPp
zfRmmi+5eH&{ioLyTq84`kz14Zqy-#t_K*xw=Xlt=hjczAa0==Nhkx-V5P+{gg|;}j
zRkPwrfxF-DJQX~0sQeTF;){O(QRPP<Hh<c4l|*d~4{gQ9{gCpV*H<IWUL3k})af$<
z-|vi%wA?%heBjRY_W%vxAX9zF&E<`t+5kYG9T6_UR&_AlceM4D4CT9k8xeUyh&Tv*
zAi7ev7sVMKA3nGhfaf!RlI6(NLwIJRs+8ZOb3SK00J1}fe)JL`oM7IX?nCk&b&dD)
z-knF8EL(&Q0eIy6SMh(~+45iDnFqi#w%h)YpaSp=-0M4@UmeCX-_3_tzDtXd_VOPh
zBYvQZ^uo`_%MKLkEk;w}OkjjLVspL5yl?lwFMg<NCGNY@9=kjJ)b>0cl^*L3@YT1{
z^~*#B*qZU&sO6>*^hSx{(W%=Uu?^&!6-b$(kVD1+Wb&rT_<sAiX9!3|_m3)fBDOo8
zC3hdvKfo_WYyprhR_37n_Q@gM3sm2OH8UDsj%Xe0>q1tHM~|P66WvXz*m`rizq?wh
zvK|@LPMv8MDzT!UrQtRFQ&7CzBiW-<ry5p5HoG<(Cu>%CE{C5TJ7~=aTz{jWK1VOT
zT5a8aS)9S(q2(b20y?Tu{wlLt-ak2N?+>6r#o1YDVhfX;)E~HE$nIxxl)3h6TVA8R
z;bwZ6*v{*`!aJ=5^^?ID{B}~H%Yv2=2_^S7hl6X}yBe16OpLgU;Sz}d5e!!!0;)0V
z`TYa@{DJ+dsQDIGq4n0$n`%a}=fttUM=5|sJtxgSRGZqe@~TkrMRd%4N8N`Cv4TT+
z^#kbmL+{HgKAJdb-)6C&-b-{(VG|3cZ`%IWoO|(8Ms8OEw7ySMerB{(6GcTGCgpbO
z2Z?)_k7OP#o}|BLzL>=6eJHW~N^bAgpPzHSHaFJ!abD*^YvlWZuL35L6Vn0E{jl9Z
zz573xIkOj9ezV1ONHPD<7yj+CSMPlk{1*rMtIhVU#d{r)o%g58c@N8jzwuj{XU~pa
zLLRuHd!&rDus!mSBtGz!{a50h*fY}%2TA)s{M#4tjt7)|<KjzkqaRq|Z``K&>7GUF
zp#FdNB-o#(^`bd&azYu+@*lM|e|f{dxXgz;fNZRhD-8R;{ox*=0fjw^4@W!IYChcf
zi{<|X|NIYx&r~<pOq#siI4J1<GsGUn9UC))ITjh<kQu&U?QunI>aerP#+9XxaxkcN
zqRlRq%0hL2B*2P*wa5VuN<O4L|Lvj9=UxyIm{(>cOPQLzEb};p-A<7<aQnocPVULC
z3eRU|V$4UU7Nn~_S6Z`tX1(SfbDfuC&)f|hc*OocR|5$9kt1E)FrRRDyz9dR2?zO>
zE`cVP(ipl@)E?a=WIt9aWIsZF<n?IH{=F7@JnzaMwP$}~bsu8@#u=KOm+u_W9z?kk
zo&#Zdd;SyEjWc_f;Pu+uImP8Mc>m74vqzZ`D?$BEYxxRXMHJqSnVyO)m-}NtA&u$%
zu`;v!UQaujtUtb5=-*{@R5Q*iUyhbsd*iX!si|E~sj~<Au+ZzT64n^(Cmiu?{iClN
zi)T%%-{~j#mhMa>tl0?m&?6vX&OMR+?+2L2lan+Gz@_$Up>A_M8wl`T8F1ZR32*s)
zf|Tv#LjP2Jj@CM=2Bs~xGApcbw<{))AkSGWG8JU~>-C|Fop2*cY+gY_#>kUHX5F6<
zAej2`OP7$18HHk`J%(QA6psa6z5=*eA*Us-(GtB{9*6MQoce?99$nwF95Soy_Y>43
zY8orf%AC-uQ}@O4SC=9=V<1s#!K9#pI(zEO&SkP`oxL&h5$LymlliY6kXaQ8UZBSP
z%{oxibpq0NRnqyn0=#WPCx0b%R5X10rQ))vS87fsK`getUeJ1Cef^Vujz=4e(NQTD
zz55%z_RR1-f+upZp>Q6SD*D)03`&ONDC_w6>bBj?2D&FU^*v|`N)PJOKQDT6@jVrv
z{MLB3%I(IbU>BjJdM8aJ`}O?A_w3_uLG&hm>}~VcbIug!{ZW(T7jv?r@=4+0Dx_Dp
zcwsvY>+AQ?DJeBiJ>_vG64Tgg%jzZ#7unvNv6qZ&>sIMCTz~7jIAJLem88+Qwyg@4
z^?E-QmSskO%5S`TwV=>TT&s9GpTTI-^H<_5P2Eo;PoQ!z2F)D6;USye@mkGuyG}Hu
z=stYkPvmm|vDgPSS-f<Br`JzP*bw69fbJgNCt*Mn1p{8}T%7Y0pwOsbZUY;xnc_Wi
zLW8-_Ttw-1(|d6@GUouB1$p$1uE+52$d4y)(Y$V8=KPm@0*J7UlZz@^@dkCq=}MBO
z`-bP!?@~?6Q@F?O?>in)O{Gk=R|{FBPt2%uhnj9z2-xpPf!6vb9=y&>YF00h@Ws$&
z)el1=mocs`g`;kj-Ek#wyXnsr&TIB=8^uF&-}G~aZHnrbUcw-87OE0y7L<)~o$Li&
z<2S>0jqLU#O&oiS9uoF4+<F@4uE2c6f;`AZ;$nn+$zo^TBCUUk5%15x<3WgrINnG1
z++Kr(bDONzdeQa$l{h}5pBAMwpuwzeZZqcw^w!0PB-Tke?Xd*|KCp0~s`LZxMKY$a
zLW=u8lr(&_$M==g<eJGBi&lwIp^4k?mgzB4wWh?RmrfHCmf5K&V!$4Y23e*|xIR}h
zv;n{F_WU&?abi@Oefo;%_5<;SMjAtL;cR7HpKE)id|GP8joso9m#GKbaL;e-*6#;1
z2HLj~65WYDdLD)=lrg2HIj6Oh*dD|}WTUETBeNi`eJ(~&Hyh~rwzh$u-p{Q2&uVe(
zdj7!#Y=#10ntT<dBfQ+Nth_6m2}Rv=`P+zO2jlI6_2(a1l&8m9bv>^LO}7X@C*INP
zKXH@uW{Z~NS5wlR*sheP9Ub+UtT5}(bQ<s&W=!@3x)wAPwp#)})^#M$<#_l=T==!9
z&}yYPA`<TVbbPx%=p03MGneC|Z;(YwR8hl9qBW{&JFxpKIJNQ{OTVxKsV%Da^<Nog
z@&+>L9VJ249~mGQ`SL8g8K*ZI{C!rwaI%^(ho=TtUYiO7;S_vso2K#n!pyRk3Ubl_
zUKrh$wF~mR%1UicDfegD7@W?^bGGB!==cA?b}lfEB^+_RE{nY=6zM3BT@o_r2|k7=
zt$p4I3;fiE5M*7UEP?1`u{otfYgx*-NsU)cZDhGe@2u!@kBW$!QyM~!fyUWl6u5%6
zoLg>yDm(J}wvZqw3w7|<;F{Z0VBlRHA7RrHjYdb}18d4?`9mp7;0-qr6frA@4G(e%
z`Ls<K^2Y$L)8YA=L`;3&PJ>NJ=wBNV?LPLiwk3K+K^@wXXGaCbXQw??z=o?c#pxOi
zY6zwcA0}!o`Kd7`tL=}_A)KXvvG~W`#boB5)I_u!Ed(b!<H(Ay?UCj=%{D9tl*`?k
z;sklf4;mUk?T72BH`Z-u1mo|ld4En%5fL%z1Q9J^<}Hq_z91)SSzpI7nvN!?^F#wl
z$*FKqIUBw*y7=|ZHV=ileZ?LTSi{STz#%!HwK%QrPn{DFhlKPt;`^>45QG>XRf?`-
zRw{C`=_cOWo-8@og*h^`qDk~vMtTv%i#LP)8<uzK*Pr<ZfJSdI;%Zod4oQoS_|nf$
zcZ-+<@1ZwsUiXoGQNf!f+>!7vhvrG~+8oufujM!P?TsCnJ8hy_3Wi3#sf|4Mz@x}f
z>fT<Pj49LWkGD98<r}RXll)+NJHLE%IXkn~P(=SmK~J%`XqLz1o#cf9@bZ@4UR8aj
z9Z(d$?x)F;Tm=8xgwD=3$q4~kal|OmbLp2-?f(rkO`ZpaXkH0KEqoI8^vx)#sy7p3
z7xv+wBK}5@h<7yY5gy<U5aDBGe{r$1U7hGDo1$BQm3{4Ve;p!=O%$dGPJI9^u+86r
zZm)!jG__K>WGsFAb=%v_>jCfxq!`<A&NjK%V7c|pEU?;9sFAGuD8nkzpITOez7GQZ
zwM(R!mmjm9Y`=DvfZJ?nRhHAIuZ#r9-$}n^6htkDjo6$t1VMe>+B+rrrrbc%VO#b7
z<E$jkq3tR*F|uU~L~h5hWzgTW{Q+m#S8LDv4YRVz)pAAy13C*cm;1Jc@-Gu#&3FM5
z-_tRa0s{h#`iknqb_ngP7kp+S|1zC?BLb*#sc|oGxBP6gLA<PrRCB@^uDWO%QPy*P
zc&DO4kwN^XVw5giYL;}6r!G+Yj7=@}QsOM`_NtQBOVu%nB#x0>WeYd^8X#+HlyoeH
znrOKb>c1)HBbttgz#t(bn67ITxR+HsO#*{=@94ZK4i}aWUa>SMdWiwAF%A?5ehvP#
zf^iJ3kxti3xfDIWnX{rW?h=@MJU->pn#<jem%k!*ni9rsqIHaE?i0nYCSHQK6^4+I
ztf2v0@-5+@A#;S4NI{O1LIc^mWt?q(*YIj;Dp)RwLA^j=t={X_@#+?};ONZ-1C#o!
zLi=jFA^AfAlAoyH1~4a%*iM+23Jbz9waSknrci&(k}ySYo!JN2K8mk<MBbW~0^LN#
zxzsxGqK_$3#>?oZW~o(P40|0Fb&k`ju)3a<IO(9vUZT!DGy{GEc$`#yeU$0-<SCq6
z;YjL;d>*3r7UQ@H(AL>X`*qG($vzJjM|hjZRC4}+0r02ABVQ_`rgghAs~QYw>|Ma*
z@%j3)x;T+P(8EJu*}yv_xkLDcG(z6nQIp)fjRyKC77V90%KT{x#<mM)*(-638gM43
zhw$WImWU>;4G?L6k*3d5pRw=x);d9$fKOoIm132E%M|9-aeuOdXQ7D@{9bsZKvAej
zm6yd^lTKkwNn+tS&h-VyyDQ-3M8__;k1X0m5@I2Fo3zVeDlZGq8g6P77lR9?i-~3p
zi=|u%fns{{1<P{^hlP%RMSp%bY2Z;%jCyr0|8wo|YDkPN`-%l8+`;QIJ{A!x+rSO{
z>_!5IlEPrq#qh}1`P-E~3;GV%Nfr16iJRxHvfmrrmP!cQ{NkT<Jp#kN?ai~h5EmP%
z^-BD>7d)hUNIQkG(IF71l4gVK%-Ujs7it>;Ib=!%p3}O-XiBmMmFJHg&!2snGS5@L
zw(VHh-(j0R-1{pTw!LE@-ajzB?IBZWUcRXfBJSFh+~pSAx!=;udVITapkp)1piRB_
zo>Z$rCvF<SHB~b4_)UKYX5)pO*Jf7_yUC##NQTlQpqp{8IaB~JEdzHG#`;D))PQ>a
zyG%mUr!KLDa3wft)J>s_wcmM^bO3`J5?iZveQ(wQeWNJ0x-(UFD?0}5r)LI-#Oo)K
z^+HC8vtY?|(^`ERI<3KlJR`*XbJxM7o!wu}MQz{P?ln@}9~;)*tu-`9OukzZCW5Rd
zD-?1@X=t;3xXN!{Ie&^{T61si3$$&l!nu=auE!1VI)JvKu?jOc<3uSNLgYYZs4=E{
zqGq1X+KkKlb46R`^d`(VfsSpyqS`G}bsU-bx{4dT*Kkk=-7x2?+W3H*RWI*V(iFE?
znHXkask;SOZu#|6K$&}g_~U8}_CrRm6bcxXYHPT;18|5kQgW*tXi8u3;2swb2Zq(h
z`}&BnUhpHtdp_N>Vz(JkCA)7cygCEdKgZ0>%|}arC13t(-1=j(anFt+xF2E5KQn&m
zjpOg#;2@S<A7KjzE>}-S6`tFJjPX8Nu1$BRzWTqGepKA(O2*ILsI)vgmf#q(tJpG5
znj?-{!J*P?$8CEcO-9U4zqW+Da0?~6uyKc2VU-Jae(Pmt>j@p%$dc7Mej92iI*Fy6
z=`Z$hZ-T7vHb(z$3(Q=q^EVjp5xz)qY~g3~W#$|1P&BIlV19S3D7m-QuM&^DNeSi(
z$;Hhsr#m$wy`AFw1W+Beccr_(v4aK@B)%Y9oQa*?E{R6I9$fB9cI)DxrTZgW?@JdA
z+0YO5+DLI^lNQLOF=u?|X;x=euw+(^-Eu0%l2YILys+V>ja`xWWgO6tb8#%6a_(z#
zG{TeOCCe5d107BBB#!xq%_vD{4XucN>=s(9e&|H(BlsFmRIBqPQ@%Xh@A3jZaxRS(
z&&DqM!f?;ovrCq0rZwIl&Y~VrH=gc^D}h{(xe}w}&{SD%{Tca@e+_3)-?5t*lO<?a
zz;dO^*0(V4>^Spc<Y%DWHz3?D91&^VfW00PKfMVy{QZeCuCBtojr3~sw&MmsYe8*a
zD&*2X(r~()&Tql`fl1PLjca!JFD(`3VzHr`vj_=o@l6L#_BYtwcBfD4ZEl;5g!ieL
z7&HZSgvM&N#s97)*UmP2$Q6OETuyNP6x@~rj5M*A+s1~*-)?)Jr;zi#Wex|-AAUYa
zL)TQ4jtM-ARB-MZTaS8yo=D!&;^XHKeS0%3Cem!RD{_Eoa(h5rItA$DSXvgd{r&lR
z#S2V=1=vWh(aM8WMeSAl12F|z)2`xWicX#TOqj>b&UJm6pAX7odS<@|rNCID_OVHQ
zRGi7_AU+srQ39AX5CqIrek?jLB#bW#sGRLPg&*j2A$EM=K~W%AC~$?hXz^Onn^ISt
z9=EuGh<SK+Sqb)ZvA!o1D{*#KOp-ytpux?}PT`X{dn1sOTgyxTNH)A?Z<znB7#|m>
zrqSKkA6w(do&E;Gqs5_wFa<0-4#LPMJ+<YJ2o<@Lu2<HHkV`VLyYk3O?XgMIs5r*e
zTUFMV6^3s0W_6xYDAn?H3h%$ex3&fnmd7$aXJJ{L@`ten0*~zB1N^;%Lfq7FYXmrJ
z*?6n}eO8n6tA74dbZ(=Pm^aChA<X38*>1|GVG}<%ejQblLcnz@yBmt32=C6_MjFXt
zHm4;Wd*@F>ea=h4ExoF~V3BvPMrQhAig!8L++~yK!#8o4L1v8=q=YW%USi`rJ%>Og
z-!KvBsV4uO|HIyUhc%gg?V@8J#z90BL<B|<kS>DsqJp55(7S+w^b+Zv48uq-(mP1+
zAiWcmUPBK(N>3nQLJ295{W9~Lv&EUS=ljmSuJhOTZ^~PqwVw5?df#zvq^IVf;DP79
zlRKO{p>_Onb|E*>c~WloApkxvFbMja59bmC;M_`HuY}0?MGZhRjTzN_qQY#vv;it|
zQgc>4$x@l54bEGCXc9&?p42I6x_3*pI0chl2rjod!2zyqOrNx=_CLNd-KYkWJ9c*l
z>(kR@M)X3L#R2TuwQ81(>*}HR5};B2P`MS`4^6M((Z@zo*8^I}IjcuavrbwQC6IRc
zF3`+>GAI9uyQZ5joD8?h$8Qkd<zK2_fkoW{KsR{Go~iXIt#|r+(P&|NIm9@#^{_uO
zL?qPCVb^`*SAiI~`$q<m+)dq8*f|DnD*Hz6BOiRhEyKdh#YDtzCbHeSC*K;tfZ`E*
zz68X5xe{=4+q7iqxaalQw5t!wc_H=cCVjrl5<6nZ5hO=0^TP4R8aG1-qEQR77C-iz
zwL$?6ZXgr9Z!)FuV6-D|2g_KAwq(K%J5(IQKiGnpeAl;R6LWVKu)_^kqC!^U>hoG6
zKPmzNoePSjn;SXy`(Em$W7`kgYu^+{fOMt5#f1#I%g2adDg~eNnfR?c1C`UVpxh21
zbGJhcc*{KN;xc_lCaehurRZb~p(a?#8--^hm#H1)?s4a!&8h3<_7e659CbV0S@}YH
zMM`O9u0m8Mc2(9PfPy2{`$TT)Ci+y{f*$GVJX|KKlN~$Ba$1NF8TIPQYJDO<k`3it
z@vcHG_p=YXG~4{~gy0JK7XBqh>Ms7DoSfv^)bX|e>P(b;PrX9i<;yRwjeC+I3<6@=
zI(<Y^La@r)<n^w-L{@BLKZRgx9xHofXhVH{)-XI%4oU|wy-yiBM1{1#JQ3K4i7x)l
zD<OnbR_W^k8mo2FYLPFP=%QhdD_>lVuk8vfM(UU}Yo{_yp|)Gx<d@dhMzi7}#wAL}
zB7F=jb#xXhxOg9_HVM<`a+vi8#b(8T&P%o`vO)`1bK6|C(RfR#9VNF`E&vh_oJ>bk
zAV<nR0YzK#Ffy7HzCGCuy@#lsBuRWuGMd~jB4s4t$W6ruH`HO#&<z+AV5q3nr*36~
zR;(c|<8fNx#=GoQ+J59s-kIIc!4fPNxh6w+8+Q_Ic}7sS+1>2iDJZ<edsF`>tCt0i
zrh*^7ts@c9X4H6hll`5rfE=_nb)#x!c&9hW(R*@SR-kutHhl*^+4H~#wJblBF-?6c
z`}4~PubQIj^bXrYULE_kY763{2+!YR#)0Ab+ITmqAIwe$2XElkcIYnh2Pw<6v3O<!
zBibhzGzY2J-j8&Adkxn6dC2KTymxxPQ}=Jsu=7dF;|~Qiy0Mh_Ge)cZS6J2NrfNBj
zYL=>zGeK?ju&Ln}hqHSTGDpk5vc~V}7Y+xWq}-tEO&QvQDISkbl5qLtrqxWdy(Djx
z{@wH%NEz%bH{H=ViiEd8YYY^+!Hmj?J+)3_$NWkPL!KH&eVok+%F)5Q^DfJ>&}R&0
zLcjb~`%;T)4^f(u&eO8-_AzX<<$>s%z_L)XKx+<{8egp|pYKkM^T9|5gGx}XsN33n
zune<`L~ZJUc~qt7^0`eDL#_fJS^b5)d7VWMj6?yXCF8>I;pZmZgeJvHQ{(mZs*Sv^
z#-w3_Up~x(9GDoVxZ2^JScY>&V^gIMo5>z8FznqaVHu9Yg!OLbXLyU=!2YRyL+Q#r
zGw0n1hjR&n!}`RUn?;AepOKhMYV_44Y?(7>)KztthUN>x^dS~PX*#2N3g!ydb94}V
zx!Ay^O|P{}Q|WR2c^p(z6<)`Y6Z;V+ZQk8gr~?$Ci222CQe^porFK1)4niGxx5S-f
z1K)sGnOz{Gs^j3ugKdTx;vhp|R`vWoG_lRH8+;KJyPjV4dMImoZq7tl4~oL4uVUW<
zUwO4rJGDDuW8!x?YE*rOeoeO-+}h&eZQX0tgczIYJk=ygygJA|sDqSRmFA4;bCI{l
z+*Qb<FO8U+S2FqpXA4#tu5aGU!??QG6o$#g2R4MJ??w#pM^5Wlksd3rZ}6%{gcRU(
zyo`5g8nG%LskCe9N=(8PK}m;*CuYal?1<}W2w3PUKaeyL6Ne29WZbUg)U}CY96d;+
zhRfZ)J!*(W2ovQbr^nwx50j<@7l_#AB+-Z2Py}PTz>X0RB#7;RD@jUPSeP8fVt$FV
zv<J_|!ny)K?J=8)L)7=(-_|Ym4VW!4dvR-5;KOo7o{|)_iLca|_gde7Q1(L%?DXOF
zY~wHIU~W)%+1|l<JIl3E*F#sGGDkIl^o{*w<@UC%<y}9AW6rNw?<bW$f?O56!q@W$
z1dfh_N!8?!Pl$RccKZ*LY5F6J&*C*EN@6ubjkY8E8Ccsi?ZT1ctbXc~2O-4-tHKN;
zGY(^B`+y^~hvr`2&aPN=mEILT9aC`=caspkYomrsnb(nd)aQ(X8et~;d?UMFN-cb4
z|2Y7qC%ulZY*eeMcHF>-k08pKBk}maP!u#%HMEWyfMk2tvVOf@Hw{SA7{j*xpnOaN
zg((T|9~6Uv(!Y&w+8238bZ#YrQpv?}3h9M>0Qv*92$E{gNMZjP6}%q6Cm4W}?=T7-
zHKa3IC{K$FrMl<=MVm5-IYO$a&sx!qY5<nNOtbH-r=dHzL7MS?Ey4NY;o99uk)Ab1
z+i|olBzAlDjmlo@9s5~z7>?maGu&mhd>@{BGK0)W|NMH_gIY6?`2MfDd6yG24egk)
zd-Gpq#A1YP;)%kGaVHr~7jnL#hBU;Po#{Ho<DddoOPNX*{d@cp0CLjaUI1Cl*L%ir
z445#l)q$(c)eWmuqD|nfgXx>)K#XQ|Yu&~^1t>%Drg&-?s4d@?u~@9;o@#7$Rw9_)
z{Uq6dKM(@S7(!+wKgS4ZVfia5+N1nYxlW(__!a3Yda1%S{&KU=T~0gPp6sbjob{Vs
zD47s`W9ba!818#rz()MG9=^CR$9k|;BN}>{g?)=!L9ixZI)_IwuOYb>$jUs9@!3W9
zY#QXh@Y0sNz>ZLxcveKVY~pp;<BG@9xH5190O~xny>s*pHF@h@=ju%!yeYWl%;AnW
z-_q8d)}OC2H*Q8YF`Us;uz!M=80F3kI-}GWH_R2UxO4M^Cg7*{Gve2MJtOpQs8k-u
znjPWSJ_IUy>*6%)6DjM|6m;jzmI>BAN9o8lJ)CPmIpl@-0}_KV#IO<Vujk&T?%h6S
ze9K+WMoFa<+QO^jGxIBVR5Fl3W*!MwB~4~R{2<cP1XfQ=kme?T0e@!`L};480ZtcY
zQ?Z|ieApKjH_29%H8mQQfIQ2r81?Uuxh1$uS+wu-YBzQx5fPB_La&Ej;HV)OK|AY|
z4V?MUg!U5C#^&7Zl}X-mZ4`C&E5tjym#`*V*1gcpk^zbAS6>kfR8cM1rj6#p55IW|
z9K{4fxN9a;21Q1ptyAGo=E$8cuD>aY&0>d^&c$a=CV5&#)cL1iiTdl07)=+cSj$61
z>qhtyKR`4(-|yJIHFtxr#N1+F+P1ClW)GYfx>cU8L!Mtr#bs)+#aA5pI~?wykC90p
zz)q*_G0@jA3kupznHQPLv=3WNGR@0Grakkp@ARdv2beiV_(N`9UfnBBf5EuFTRclB
zF@9mzJ#ow4B&3d%p<m-`J#neYSTzJ6BzIlOSk+ZuW8v!l7a-6p^|RE}Rc_@mof}Rd
zaA2$OxaF6t^BeE#)p>|?+*0EOlU58Nh)KLC_V}UMJ9kf^7L)a(oAJN^X`v*q+bIYr
z@5J^kip$M3A}7TQA+t|@zE|2_@byp<x<*}yF7zMnM0scc9#%Ttkfi6x`FIMn=9vF@
zXl8SP%Pwk7#u@s#T-7z|P#`boz%oC!d+3?Mpr@((itmfo)omcCXBd-Wa0rKQjt8Pk
zoSX;x&2c2l+dGcdvxmN1b&j1|g5zbj<<HzpVE`b97t$&<DZ^<bRTIF8=<3IM9uJ)(
z$Co$l%wa@RNS-4d=938$<3qN7GwF|ZCju5@u)UmJ54@K?xqNElFZI?+YuTzB9zyne
zLz$kl(B%zdBLOt#EM$^zM#dB{X^#qkT&x8U4fde!K6e+I1!Pw2bQ>KeVNJ26k;%k8
zqX=zHZWeV;4pM2D{f&VY$Lg`mRAi6-=a0Lpc2>H&imm_S?mU8?MK5>X0H+}9GwJ`R
z+ruxfFg&?iAn8N@VR{pC`v;(uukcfxCTC1}s7TCqbdXqQK$Bns)a#uqfJ0nDSYb%K
zq+-Cu6Pww#xAH_vRhlaspq126mf;kk?OzZ$+IdTO6l)Td?0HoGbpAS~Ab7Drjq&EV
zSm<g@;R`jpn=i#VDqFa-2^Q-LS3W53l%`EbN145^%I+TYxgFq3ADcKJvP)Ni>F0N|
zDQy^u^JVgDc$!t2*@sggLUf0`<tI|fRi!rOyv327wR71<JD{It)8WI<Nb{1yU!5L2
zt@M%M5}{=vBrje?XNG#1$m}ZE?ml}F>pm%Zn5Zz95G<l#S1KSQviiEcpTCoc6#Xfc
zD#};<-qMGRUXwP5h{A$-<DBusTlVg9b6wPyv}>yfGrair&Jn@<ZJTG+ZOkN4`EOt^
z7S>m96|@OBVpOYql;vceKCp&(i=W=DJV-#nSmc+a3f$)=D^9%5TSLG`j&kkXT$c9<
zJ5_-LV(OZ9>S`E@31j*`U=VS+i{{JN;X$9ZDRDwN$5e~o`^1sM$z{t|uhzLjLFq;i
z-RP^8R^877*oAJ31L?Mb6}dn|_!@vJDdUYh!``RP^mBtn$7*h@%eDS5cVo`S{K#VX
zB+2SQ(#v+?kD^Nm(SS4lA8(%Pmb;`ZG#Y%HP3`_qzfjz{e#RUbr~1WdT8OM$k~*%e
z3fmscpPTXC^lVElN1?V+?q2I|mW0$4y%`}7UBbcsjv~}`BbAjROY(W1A54szwMX51
z5jUff@pLVaQbq|5Zumv=77Z(^K6L~8xp(SW+M$)#;uo;(Y*)yoV@Cq1;+Tgrd<A!p
zLpO4ch_~({`H&UN@E;>~!e*Wg+uD_utO8^vCP%tRd=QlNMyzn;;w}=IEdfobqpJ4;
z*E%RvdK{ExD&w1q<0a}|#DkiV#Rjf}J<+H)1VSzNxjUHKJF`B;&xyD`7NC>y&gnPJ
z%2I@%Az{rGtjg-hdt)FW%X{3t9OCZPN4*ol@7+T7ZIWyb{i|xXq4(aP)hSn_57B)x
zuK2#>{8B|$bos3hAFm}nJ&RdjW-iky_I7`et5{q|wouI@gdCo+$K@}MzH-?Ul!}p-
z88d8sAB%}PXx*Q3Yr7x=ZSrx=EifQlRD(V1P|=K#osJO}7OpAOR%lBtma;W0wO?rQ
zCCzAJyW?%drL;@X>9z}_fm{K$sA66^GxB=(F=DYwHtYswpQu}}H#s6XE$F09erA4_
zSn*=&Lk?~H$1Inp5b*jkb5Z1rY12f?A<W{}+E&(9#wbYc?4ruNEbF=YJsnz?Hga@-
zBjq`Jb^6;NcU&IDp;mF3LyMj0SOT6wgr8Y|cJYuq9vYR|P{r_w;Xo`}(T8FzU#rZ=
zV>bas<oCdhHFH!&Em)=6&Wgg1a#TUT6!E9~J=0K<D?;>8yT+$&RTh?K(5XgQ;Vv$x
zm8wI=-z>K2Wat?ddXt_kI7x&R01Sh(b0+Y{^z-pk%x?409Eoj{`9_z8;k>EKcnbuH
z?7kaxwXD9#WLIWC-Z5d<S)C_}Pk8VXLHR}Vxa({)VKzA=Gqoq~vu}NFnfCa)=~Hj=
zdd+OdU_Gq!VY1BKBhOC9x8vTv6DCZU07gd}$;d>2F}f*x1-?!q>r0ReCP7x#JS&?m
zTja<^gMv@X1&RbH%WS`&s=-w>Kc5{$ej<#S2e_8{O?lZ`n(9Ex94Fbztf#*BW}P+k
zg)8Ehc77Gaq-aXGVH_V^^fYNE>^FHxR~G|BRytCQ7iGUKSvTJK0Lz}#bYp>+L0<|b
zzkDDu`24bnJ?W{q3qHqf+X7GW9V>oYVn!Q3wIgx6afw2Lxc>Gb1OM~6z&WC16F|+`
zKU)TmVD}<*A6GD_m=+p7++OOxa4Gk8Pnnl-f5?^f9%WXjF%Giq0Dm6zFx4*_V4<i<
ze=bmK@BCya_5fl}7)_y?OkpQ^w8!M6)EQM{#sp$DZ5V!zv;z`2%KZE<j#O*&M|(V5
zFOLTthzb}v!QF4o4seOsH2_d1d8IHt`$UAVP1s5tv98~&#ce^=ZTNRhXoa)Gk+QXu
zVi^p0ynOF^!~=pPd=}ftyI~}bw5_O<iTbmps@;J_t>}g0;@oV@V>^*Mjwye9{IdLV
z8lFl?MmT-lV=Xn&*U11!o?<genmU4-TOVIMT&izZow)<^8Nlvz4Ld0-i7oRg?MB#(
z<>#egs0Of<I_UY=53`mtkJfJB1N&}|YYa<!BI7HUV++jJE61Hd#*Pav+vNFJT$`&n
zjvO1Ch!9LCLN`{9>S~(FG>AqF|FTl)+EX$ox0nDMy9&NHVQ3OJG}ajh@9c9UsK|M)
zMUz@oEaoc1E0}CoBVG4H?$&Ns1iw9J8&4rUwNoz|_GEE2g<`wDL4P?&5?9umHEo?n
zcvA`!xxs7<-|=@HD5ISNvH*pzmp7GW`%^AvJ7!aGp_y2j-DNwQf^Rfd<-|!@*1A@&
zRvzCfiLfrNSU?hH<|pC9%B1-?BUur6Mxij1Ga{`}+d5~*4*7rv{#qIi6c@8ra-DyB
z1WGAn1HE7xsQo>j^vZc=Q(eg2wGWYywBUT3KH8*t*Q^}()J3C>HeKdcWvN3>VmH?0
z=?a+xHQ=06)zWq+K}NH19i?nU!xqG3Z?lg3RcCH-+UDtVZ-K{o1tXU1#?C(l&FiO!
zKVIzPmB3ASWo=oOERcgZb|Jo-)I%dFAV{x;mEM(a^&IFTZ*Kqmwa(=uqX|gHgc!k@
z)!?<zNJ+uvSx}-YS#JJY|EIQ9<gaWBBDaaf-Y|zk)tVT~JNPdtRQV?J`{=z7tr_t;
zJ<7bE!pWe1Y?A*<(Iql;ZzRcRwKSE?x=`&~EL{;oWe7fU5%)k=S|eLp83Ok{$8$uo
zN3ZC=U^npMK2oX9T=8AiSD5AXJV@F!`#Ffu`*jDq7$y&}GbW`+pbocm9Y0sFU#8nu
z)vM3L`BxfwVQYfM`t?Usf#>7rA<xMf*R;9=sx!WACHYf}fAC*We1G|xrVXqov-4AB
ztzv2vnc>LkxBl04jGJok@LH#yCLdO?K0czV`^qc|+;5~^WrQ(YQ);T2bb0i=f<bY%
z+qPdMhRGPt$+59Q<?3XrQ&QD78R|F-kF56N(s6e(bCfprlSa{KiL2D5gbKJfMI|@5
z6>M5M>zYUiVw?qM&d2+VyB+^hZYPs&%q=(xVK49V;WG*Etr)F<#S17N3xB*V_Hk)3
z7_6{zH?%S)%h_o_i6y_uhu`h6k8|3(jafQs2dMm!=ewzza$a3ygDyhEmRwzly%ldc
zo~G%*6X;Cz3)jI=l1&z`t|cnvp4gt1rUdj-&1_vPSwY?a{aS&j+_F}4>(lF>vC{7Q
zo316hDL2s9v@E*DUtaysI{jHhcz2<AGWyxgMqwjQT$HKBJLUpOnl7#QP2m^s0_87i
zn7~@nn<Y)Vmd8Eoecdcu{a)>3d-5|BcZTz`qS5vMPs#lIh_{ld{YYEswBZ&!V%%Se
zDdCE-+x*6I{{?(?#6kbD5>t$^Pp3>lg%NQo&JvLn`arbmVfX&%UQ36n!|XyV)nU#a
z-UNDQRo|Lj1G^>ODs@Dey4Zy5cj-wOZhOD7Bmtd?dtZfE3KHI`*ET<DFDuiDn9Hch
zsx|C(UlfXy5wYXhtVb+Pk7Oqt{Rw3GS3m29xW8g}!>nsgM%$U)Bxyr$_LE^5{*k94
zoYnZo{^xYd@r2xP48g7H_+8VmxyaS}-G~*}W~)ZM0}G;Dj7jKnA9Q^MyvDrQ@tcn<
zR!*@Qs=6kiTr)rYCD8pkVTXEu=D2wgIO{GXaF%SNY%SUS91gffQF|TD4=ba56%Par
zQ!TJ5q~%8ql1rH^*g?+ac#>dxthkfU0~Rn;DqGJ_<W`2ZUD|J6n1^mG&4UzX5Qfp?
zpsjRffZe=r^QY8S7Suik7e2+uAPc6OZJaP78Vi?u^$cL9_M^Ht$5*c+B#>hVIXU9o
z=I4d!16q#C{aT03l4^n2tJK%#=kRZL$O88Vi{(7ZfC?=y4R9Vm@3u*D!jR$Qhw?UT
z$3tdCsDqhXD8YW#Xc%A%X~3)_zb2LC>R|N_^gZO7%xs;VI;puc^frYFNtRnl{l6y@
z(xbG3<a-i&5(l!|q{aY2-*VIve~D4Ij=DgEyMqmet1BdNAL{cfjCb>0b`FX>sQB|9
zrt&>cT=2CK_kwlfMzGF@xko0cejekOQoBb%9e!cVV-x7bB!S#<4Y!QTgoIpAsW6R<
ztbA>HCVFLtOWQWJUh@b0=jsch^OI9jMFuETx~g8iKcoj$_W16nozMBJJK>MSM|&x)
zhhF%0ZZJt<JHTCX4IDp=kObJX_*wj}0+fRD!p40fhppnKo<==kX86G-f?gP&K>^Y{
z^5wU}Oig$KSlLTOPPJ3O6<yoqBYoL6-h-qK;KHFiF#yiG#h?M+fM@>dC^IH4SXX`p
z9MubcJs74(6Ahkk;aSR|M*B!u*Xwm6@11P8`{QS?Y36JB6h^eJk`oJZRuBp{d0VOd
zC<sT<erm7Tk)zEVf9Tsak*~J41I&r>LQ(F{+i&`WtMiHm%}XmMVHq(*`_RJ3+Q&dL
zRbpfAIUKM#67;GZzV7?Anr^<z`!JOm*h^`735dib&n4T-<9ni$@HJ)QSkLazC^;WK
z77n!)=nZCO^@DkNXIW3ziO5Kg$DKozwie*kA%PlqxBD%mk9(d#>&2mqT=Oc&C<abV
zRXX<>Uzt|1FdH<^AGX@owC{--f9q8y4(K2{X+H{0Vp2muni2-e2a5cLtREiq-P%II
zH+9w@a?ggz{8(+OcJcJA-valWRjGoWEe5<)1-8oO%NJx)Yi4X@`1yB=c^Kcs0;WCk
zm~>d=Gnuwk=zvJZ>&?l{VSAu{)oj?h<fuZy6-LA<6M2VmiSs7qO}H&0_jpT<`NpEl
zev`92E3?h>@b%AmQNfkc>=t@r`F*@N{porhspB)y)Hdf1nN?I=oHKAL-NsGl=Vo6*
zJ3eW$Snxy=DCyjM(&8^)v6;14ZckUSUw-&HXDM9NGxdV6(s=tLv{d_At6i{}Hfz`G
zZ1a~5`)xgQ>)jt~mtE<NOwy}huCwB4pi?$=q^-SpgHRsqGY*<jxz3#<)f2#Cd@CCK
z!Ux=WiKUwmfNjt-@ey2+9i4Lct^dm??udGSH8aNvWYn?K<;0{=g8ifyoukus{W79N
zdr5#J-6sq1(joLfadm*2D(YT>Z1$a==hWcfLz_;79H4>4E&)$VDs4}|v;3{EO{>so
zb`us}Nh15~452$Eo7@r!Cd^R?UhAFQqGJ_8+Mof?rRxmrdeHpW&9d2Lv8kVT`K}C;
zC*pl%QBtn^4u*rw6E7A&z7J#Gsu48uX~yLxu#sKv*CZHO0x4wAuL3&Cld!d$K}mbX
z$%l=c&4F0h3@>B2Uv>peGirAJ27IOI>mc*&y50>;s%xbQPd!&u=nXs=%2A8DA2th-
z>BbJzF4Kn}aI^3&U_4eMS5ub+64TCOkS!yR@mhOB5L<YEjGl)>C!M=cOLm=UkJ+(I
z`SA{xFxkA6QYdq5r=Jl5Pn&TnZ=MvL^{reeQ%mDHRT5XuY)?0=P$^j0^LIfLGII+U
z_qr$M#u0Bm1numXUS}50s|Fv*$r81+oSt;O9JCun#O|awRIHqjcLo!c%ftyDE1^9i
zcm3^{7t<W3J=A40lS}PxwMZB_!mM*DE$@0rS|Ab&Iue)>Lchx<NIh8HcL$fWR!mLg
z<pfHa4bt06->R-ooBtf`?zD6U66bKV&p&?fMTHSvz1UXw3Un*^S>IgQi;=_QUD6^+
z`ZFW1d%eTwx`fSHeSu`4+0Ev7Q1RO=_YeMjH+m^(cY^g6KOQ<Ge5wb-STYzLo#pg7
zmE1TtMUw0Km~!lhY05FNnhA9RA6kyWSeckHHNgX{{faY={REYKd-h0yWXw#$$!XyB
z<4R}TsUWfW=327AK{u6bw{&t^yW>D4U+j$AR!%tdpg4vPpT9Oct*GNZujOWDv&=wV
z-g&^GRvO+Qo$Y54Ta^l-3X!H$aM1__PAP*yn0kG}IkV4YestU!n#YfL5!1#d{V|tO
z@by*Cu0Y!Tr7a%bCf9jM8u)fOX=dwYZUCjl(t7^Zt$rEH^;)rEAH=R61+5v{Lf0r&
z&`~AQbrIh&4ciHx-+4u?8r2KQk$M31;N7J3TpKc(C=+E(C}0)anP@U|sY)W3dqesz
zGGz0TkB)1DBiJ@=Rq*5pQ~Din7deD-@y4iO{i}?pt6|jm+I?zQv?r#Tdh&^3SyM?x
z$gHT_{xaY&ZK0g!d87Cro7elyrk`TMb!%;}-Gtl<;bK-+l{w^3jbV<QA=Z!8eW65t
zl4yI7caAZ1C5}Ou#KTBUaj&+@Zn3xq_KEeDg}NAvg?kY{yj(^mLMsTB%HEh}8EL(5
zJTcuo+{b}v{gOG3A+`ufc_9*oT!sb5N{x>jIF;1;BuuK2V=nlGg8Up@|Bm{i2`z3X
z+Ruy(gqI8p2=U{wsjM~J86xUhU^k-UNUA?07A5-pm$@L}1!w>T52f}c_7CwYe<lC_
zFi!6WE*R4+lRw`bzJr){e%xus>X6n65DMpu;qyyZM5M5(%;s!}X9vr{G%S1(fA;4*
zifzBGs9cn8I5RX|>$)rjvY5jhNG%eoQZ1tx!8tMeLGp%v&Qg#7ErH7AK&t`f_6bCf
zhu(w)r=`MrT5gO|n<rr~nC@xZ!X9D73g``LHb-nUSY|nIg8R5!(()!dByPe3vcVW!
zlc|RK%q6SKbCTUCVWR7-J$w0r5x#n+NP*+X@<c(v**;~phE6Pxe1CgXg0>j16$9d|
zp;wQ2Zqtu4JF1seeoZ3N4q*7HIi9Y-yYc{o`bxb>RSgd(#8<z>Z`|x)dh587Il!T?
zG9%r?eJRo9Ufbf0=J{Kc1y3b&9G)5v_xZqzYzM;dJ4-OySyYLjr^*DgdmS&V({|WS
zVtv6>QmVO2U;O#08gx?pS$L_8h>?e-=QTx8`!ocjt|`4;0#6^WflX3Ua1--=X&<&R
z`%Sh2o^VKfhN6jA_eV&&aSK5TsZB+Q+6z*EaJFI}GpA1vqw2)Tk^Fgv!mG)_3<Fp^
zzB;Ce*E2A=N*~13&=WiMYJh2B^dsRX!L^kL?+z;#^XCwZU{+k+m9os<a=X(3-gyHG
zNl_i2!mfVjoFgy`D7;r>hXj(_Zf48YW4cNSkY+RJx;@3K-`=a0Sh~b^t{0JSXqd@N
zF3E*?sF3LytGcp-TUjy~q9a1t5OQ%inZ+TQE_8meG!3ohLmeVuo7x@N00NF;R^&pe
zh%S}bF0ay}%g5%#g;v8egKe=xB1^Q;eI)0oj?zVI%TBy|z$S}B<n%2^6}jW|EFCQ6
ziU@%E%9ME{>eBUD1MOXv>HTGW`h8ak@kBsV3Mw5ik^5*#lF{>m5QvplRkhK`UXbIU
z8-$8D5QFh;9;s_pgH0YO0?|5lUm$r5C26rVn?BfJxmyQ>U(N<UtYjV$Vk!nm?s5`r
z<km6JOg!Ae(Fd?6Yp-K%S%no|T*E8`>&UQH+E|K7B`j7|yOEzXS9IXO0@X?!aksYz
zMyEhdii>;JY;(Bynj5Lo+E#kk_S050mDR+!XB~pAv@MoWtX#KdCZuU9#Hx`#?x~f)
zD4l(V4ac|BpZ+tPZ$6?;K=lp|ewl?D$mgn3hx}u|EiNV2DsM|VOn==Kx}PR<t`S;l
zjG0c4XWfj^b}|EbG>BPHCtKM&(dzt#1%NCWR02YCK6LY8Z;j8fbdik3Wqkqi8BHf+
z-T8C`)ShQx4Lv@6|3dA@XnW+|L+0n0FE7UzL6V=N#KjZsJGx+1OPe01w5cE^2<*3-
z_Z9frY4MiIaU-PaHwiG{XfU8y0cQ+}G+YPA-3qCTWzDz_aB5Yv@Pz^NQ<(|?QI(Zn
zHEW)Ny!VVs<ftTiHB|Hyhg)ZXQe*<nbDPcG<zzNrgNKyep>Uw@yni@!nl>7I?^s9f
z_+WO4QgSpdGRni;c>035IIr7Br08AXhD|1qO#4gyuJGRr=OY{MeS9*fH(5VXmPR)a
zq)=K0>_<9URHF&#tV2OdcCMy<VBDPP0_EF%ylkAviRMs+-@xNiP4SAASpz7l2uxdH
zga9+Oud(q){X<mO>ve-CYQAueDP04eir@o;b!B;}7<pKqXE%(}bszAbH?N3T(gNmX
zEn`Hy<d*~)o<WM&%kiz*vduCbg0A+FB5Sh*WE;D)>GUh=TH>eOO{3%Hl5~xPs|IWE
z;Wmq6Sb*Y{iR~Z)UYrmRgWe^}0MVVhLtEhHKuNaK6JJ>#NWL0>2?+O4-EDAYO87)q
z&}r^rm<i)xx3E~(m{VTked76IfmEWkh~Ilo>$_1d&&mBRpUzZdjy<N*y)?GZa7$)W
zkCx(E32ix()G!c;jl5|AEDhc6&KeQKu5(eoE_W!P3s<<b*^<B{SjjkLU+e0zW^uXE
zbKD$>Z4S#6>tqx^;vS1V0OP_!S-x0)@EioR#>ia<&6~hhd*bTI7kBZ$umMB7P7n3?
zU-Ku^*()U0K!2HCWEw?L2BiIfDGbg8YJ_dcHjb5*1DpII7)Vu@*w1pr64o}os^gUy
z;XeomJ}^)`Q$~hz5G%~$x)7cjboS`GDQotsJZa{j2ZwrDXnS=SRB$2@{&aPk5E!#s
zwSrI?-BED$9kBSJuaDnzu2S*X7mZ+DqZ41C-e;zpZK@<h32dZY9VuRs%cWA-qZRjI
zMH&8N52r&lrc^{%rS{kw?ebihanCf6Wr#{u0*YT)!+1c{hK$W~b0?zSWqXFV+%1eb
zFwks6;H9X6@W`!S*(enq&W20gw#9O@ox|(Z@bXw!fcvXUTT0tr^<cHx?5m5ADKQ@<
zn{vyh<4oYYiTC1KkHanFaq*UG_$S7N-25S{gp~vmWR%l&(~uVs`6#NPy37|y&{@oh
z!SI-8YRS8*l{DXuy|-`CO^Y@Nr+d#?DqZrD8?<uTs8Wb8PqkVGq8{s6A(&H@>9L=6
z#oHjE(q@~58Or?-t?d~%=Wz=lpX{e;SU~ALh=5why3T=ZZBmN^Nw@89HdI@?Q_U|w
z7f5~Xy>kIkxsS?6;|xjf;w)2;xwotK8uGVKf@quZzcM8M8O3A2^G2hu4nN1xraxwE
zH+!2B%p)uh%;@aI)wtVgW_acu@UEJrcl1>{HH>^2DNJPh;YQP+6s0d{3iDCjsApcd
zwV_GbVV{8*5u>l$=H`fbX*j6a5VpQ9@HHy&XK6@b?A(C#ZM~~#aO7bkFb*QJwP;|~
zB90QH&u&}sO{2r45W1UI3wp3{7Q*H%c4?gjJJeAdLnM9BnZY!$FzxQ#hZtB@Xfxlw
zj87fT(6%k~_R-{_qj5v&xC51Rvs}Q>@cqI4O^sz{!PALEpAJ{hFZU88vcC*IG4nG9
zg?Y$jN$KwHK*T~e@*2k;&uEO{&W*YO^qlu*xwECSEC7!~bR~@kI6-1fWJ9YJw&?9;
zLV8VpRO7+#6z)82X=*SHCjg3K_H)*&Nf(n?Jb03lQlW)^l%Jz~1=`x*KW0Tz>lD1q
z4UY2Ky@YxTYHsjy?8A_Mna$7~Gqx3Hp~ZS0>6B%CZD>&gYY$QInZ=VJMt-U<TOKUH
zJ3V?bc{eSW9$MC=Z8``>wk@nFr{6mj)C}7AOG@;hR-;rd@%ieFi>gkOsm6v<`BVc~
z4O6F7a=&y(&=p9|bps)V-@dOokf+Q2_@(=3{r8|bVRF@!`KPFQyS5f&%Ym_nmAFNP
zWx+`@mjYdXty)mVr(z8zKV{GFS1y>fkmw*JTE&~*w#JI|3~H6q=E4uSi1P#JOs>V<
z98vL#u;GYZ9v-SNXFPt^ZQC+LHPwqYx9WW6(x9jW)ywYyid^{K$tXeWURB#n><%TM
z6Efv{NW9~ChTP48*c`0MKBZ9$sZO{j1L}(#pyub_gNa-;(y`J!nMn5+<GH%S1GOB_
zM?p=#Zs)~lG_89ML&+_oEQnu>O^g|U_490-!G#9nsl_T+>RAuM?j;yzf7uC*kU)`d
zMpcAPH#_()!XXMI>rOn7utB8deoYE4kh?y>Hu|;O)NFz*$x<=!oTl~xKdtm_Ob@ju
z0-wKaPYq1sLC>w3(G8I0WlQvzLw%dG{2u-MK*n7r@=#w6G%t34={dkJy>IHNBsBlR
zZeE?NCz+wAmxznUdFuc~!@eg$`f((qihVqTlGl-Z&7@!AVBbiipMw$f&r_#>wt%?K
z&f&HC`Zu45726!0gw9U@E$I3KnZz)bgK*J1rrL4^=IrG|pl+db_-oLxH(1V3s*E3h
zZnZ#u<?gSfkM4fF!!kQT@-25+h_nO5>^o5Q;kB5GbW*e2(cv;Ye3U<~7ZL<{XbQMo
zP~^K{I(bR9rDLk19F2RI=ul<Fz=DkhuQ4!e&n=DPq-yqeT#RY#DPJbIq-?{}^qo?g
zVfRrc)9#toZuzHADeeoy4M3WANAfE|!$j3DbK|(`^`bG-k;=oI05!~CTa1WCsWmwq
z;gKJ?caMI-#jox%zl%iR@@i2V%UXjOG43u%BZO+yH1R?av>vk?&as6fT8nl&%`70m
zqT3$vX=zSl{*V-G(xy_afeO5wZ0$Q9$B$@1R^8m8V#!*er<rvJJuo<lk4DDKZq-3L
zi|KL!h~S4ng&LO7&DXPalveWrtgl&MJ2eSe8>un)u3vsb?(_794_U-;zJ667^A0SB
z%w1vd&d{Cy1tBUzr`UF9eeVirMR2q}>AjOiO>1+2@5Ucqoe|%gZW$h5%f!r2+oB|&
zb6<B(20ve&d5I{5S}9!Dd8oB3?I*aDA2>zi-ZaD1Qj1aKVq~-9u$O*{VCIsO&6;P}
zr<1eety#BeyF-kiW)8o%m_+()H6UOt#wxy&id;8HrsIeMPY(;|;GdO}Ms6~;QE!$<
ziPq%8C`J=<N>?_QfcdF@9okvGaKcMDNLH&3I^u1R+yq2Vhz9O}K6FMq0tslRrRfqD
zX~yV^E`y@ZPiAg%W%k)of{ylX59$DC25}=mjBAPQ?c>$VR8zyOfngNfj1y`+d{A}g
z;vp&+obdCW=V*diX_d6jB8@t^$yIG4DxyKuv;|wV@zp;Dgs=VD!-g(P$tMCH@chiw
zA099|ZGF$fYi(~v>@?!?2A4l2kgMGV!!p4_i8(kQuUYrebE&$@A68K@uLBACQdoxq
zuE*xYulX)`se*?QLa--JpUyzu8NWGf9$UOj1vW65`k*`5f}QVJr0sjiAzaqx*Kq;4
zh!zp4RdwwEiW0(Q2)~}+-MulhYd8wJJ>KUq$rUgw<~2Tv=sUv1k*gs8K&f)R8J*w)
zuVV-uLY>m)M=!I_s`AWy)nx;+e)0CP;1!<5d9g)5i)8}0#O(=6wj&(B^|;)hPV%6%
zKe+21D7eA#tn5H_iKG+@!be=EuctTUT7AT1(QBh)+9O1GL-}i?GENRXNf{PI`22JF
zkl~%+ULs6H5cbx|DXml6KI54*85(?89o#Zs{Bu{R-9+DhbY?}t(-(}RAbq-Y;~N4~
zqE5dlRt|R78cMFcD1vpuhUm_1(o>_4eYA6S41tj#s{-K5nPE{BQF;Agvag@alK1{4
zJ7+O4F(f!q{&l^>y%}seFyO!!m)=8ux-39l!LN@Gjj5~3hbfqWQn(Xf^0IYXxxk-C
z`$XMb5y>7yGn4BLbGJ<Zbg5k}iBoWQE<x>Gw$S(~ht@Xh4`Rv7x#!4t_q7Nqz>)K6
zs9R~QE-Qjpgi*!g9=*Z<Ww$=2S_1TZvkqY}YfEOMdd@F}^%d|Q3^L14(#`8gkP=5%
zZ}iqU9s48S<wZT~K_9j3#hpXEGj{4EjwwZXK*Zl5J5mk=B_Aa5HLyY|r}q;73TZsM
z6GGxrg^yVls*3PT2|mXB5?GV*GfI&Tv-Ut#v$t~YSoCP^F=SDw15_&>gWZz&4of=e
ziT|1mNVDnHSt1M(XmD_`v!7ziw$6U6k22VE%T^KeGV?!fD-hrAU0y+`i}TWTUR028
zxw8bfxAXDb^L#FM+{(ofU86Z10s@#m7)HSKmH~5SN46|XjE<wCzPl*EEW84anl&ty
zooQF36Ijwubm<E<IBa8YZQbqKtGXV)dO79X>zPouNL>jK=)JTw3#3r!g)NWdTBWZf
z3x^NM@Rd;ss<v_|i&`wp(!K;Vnqe<<Ga7sa1~bIG$SyMPLpb43!Gull`*PeJ*Ma>o
zK}YlyGx)&(S~5}46Fms13;me{6T`(D(KOKp@uZZP*o9+(Y{r6FKOjslWh^-mT|CSO
zS_E$ec<6LeP>V%dwotk3ZjOkYkebW}6Ecemhf~J-_jm<9Ewg}_Epzd+;=D^m)Eq#n
zIoh{M*VprjuX*??N!;Dym2cg#a#q;_3OGH?%{EqQ>l=Ja*0ZPi;S$JHB9J=)NrU)e
z1$yV|ogD=6Ug<qTw*3?HlY+LOo=2y%Q*#%JF-&A2bD9Fw3cXqA^Xk92=<tc&w}5%P
z0vN~|vwT_Nqdj0pv->hCR13QzN+OxXGnzIM2;mk1-`%GA0=f#ICu9PeJvINe!Rg}}
zPK5$C4y!b`ap+T1m}|O&_?KhA{gE_s!{Np|4!qWWKX61v?5Og4e})PVB~>**%)O4k
z?A%mgYI5FuXl;9!8eRMObB#j7^@^ZbfBu%;IG4>e(gnNkRqBC%7n?Uq=uS>u*rwT+
zSGE0-J@FXPMU}+2Di%ijfmambz@$cQ(~2haLGY?4b@e`bVN;gDpHYAj_Zyq=mIqIB
zU}Ven&WPKWo=WnWW(JeU-faRR<i%K-(q16P>#$j47fFew+z`X=sNuVpfx0lRK(dv{
z@7}3`nMNM(@=yZmcH0xtBR_G_dM9Cb$6L&4sk1KFtzT@^nx)N?Yz}p<a~_;TyWGlA
zZBr+*Q4>#mfp&l^ct!|j<>c5Y=P*A6TbN^6q>iqL2$Gllg$43C5rP|n_ap<P#R~>_
zqF(Q)176u>&{0ngWYZg>xXX&2-gPPgdL0}}%<64+5^OES{TxEs-Q_!E<VenP$BmLK
zOC!OOvQOLG{FtIE1`Irn?X!k@kS)T{TV_eUw(WoeO{QD+(l{BJs`k+8yhDxOsUUeJ
zLX4;6(io%3I;kpon&V2QUu)r2B!Y*>$qxot_~xkbFT&WMSNOSv-PHK~K9HBEg`vYL
zHOlaZjy6AAvB0gQqe9w1lRsn0wdn~+kD=u7$4q27Mz>Y+etMItHMxy*AvOKG_?Gi(
zgY#xBGNu;syr<mDVSEwj763vp6`TXK(b}khQny0Fviq7Ac3Sx7`Q$dwBZ)CVs^hsD
zAHU-k3#@sAc$}ZlZqN9`O;{>vSbV;t5jd0>TlN}66G`}|mgc)7Qph<tlv|28A|k?8
zv?9g3VVH8oAO!(oYJTAc$Bkd8SCxKcy_KVPnlY?7>(Q~rx+#)hxJDmgJO*yaEt^YW
zcM4FN@x-~zg)s2{86Di`dPj5CN!hYd$SuQ6yx(QpTfAF>kvJ$)2ywXOmIhfL5NTac
zD%Vi@J*D>;sNbabHZ;9TMIJZKT>o+W#9Y*hMXx)GSM&kQf00SO(@oZ?X}`y?a(q&8
z_td)m`Oez_l6#hP^2rX5rH3nnB^D~g1bwBOhcI4lse-QAr_R$6?Xxni%V9Ybj9rg$
z2YUsvS4gp@yT7AEz)g}C8@(p&qk2;gjeKQuezs=I*C)4Uy?lY@Lb9Duk*UXB2olyg
zz?)kB%!IUW0(f*{_Dd4Uoe8q6bK^jFK}_)^>h}Yuo`jO|#0tV@9J2)Pxemi|DhD4E
z@y!HsXoUg;(#84cyUBTvv*vyGF89n8b`h59i`N`%4fo0(y2p?NlEFS@j9`|i^hNMH
z6&&AfE7^%%k<du(rjMqj7{k*@z0OLXYu}Fd32LTm0FlXuxu24N3V|;w{ZG!WzanV<
z4|#VR%?4ij?U-ZPIAqd1cOkj!Ob1i>=~6E-0b|maqn^3jwaQe!8c^GH9EedxxhK#t
zl<T`2i+fdAPKXqgOhu62f($v{w9GyZED1o8B-#MDlJz~86(&_>pg~3paGj!yH0a4~
zk?4-p@%>GHW!(xHk%bRmENhMG_Vo_t<D`P@(RK<2hQa_6O)JqG`NdnR<W+T<EW_Bd
z+ASq@hNirT0NZi0@sWr%5FX|&>qzmJ88nFBHbA&nIS}oUq0NvaKhGWdyuPOq-r(|h
zBaWd~gZeo7gnfs46J*_6TW$#l#$iD<?o&d)A8^)Ou<-T+|7BQ|eW+&C8rY!0JI!~9
z!P55#wYncq|3bR3!)#S9eJPb0Z`j<!z3@_CQB3c2>d{&D7g!p$_*2o!Okp<>`}Pxi
z@%gnAoajd4ytv@ES$E~;b~blkj@6`cr}6Hv@gROrKf`7jbUsz??eql6a|)55fwEM{
z%)zt)T7tv!h08iz-t6(tSdG(1e`}y`D5tUz5;b9nR0Vo0tp`S=tex*3+l3z2gWI^q
zY`RrR_axTD>V3smlTg@Y!()lnB;`hnBTrzqchUT$Cn~S&h<hC9k1FE-S-1-izTF^L
z#@xWVN^dpG>ke*q)PRi!iW)b=O&pbh<HqOHTn2<lr+MYCxvoF6E^bBb>1+xzxw-Z&
z`DvZ%B=*3xw3t{en>;(4u`2o(Hb-Gm++mc0T026ISKDJ8Z_sEpO=ho52%R!pj}kBQ
zgkY$)f=L&ytA_V7ukZyI#*=VImuznU{d~E1A9rbgm*cVUZ?8+bvphAVJN#Sp*6bZS
zhQ<3Sg;ll+Up7w3^U*IxYNlU*w;HJSTuIfO<yGZHdT3^q*+IB}^6f5`wkeGtZ`h`;
z+>vBe^sO4w3z8^$z54cTokQ)tv_P)YDT_1yJ3V5O!Yq)+Pm9#%f<777&~Q1RA7blj
zFPN6|^j%M9qW|pQI6AxDvovxvIhrSF*2{c-w!4@wJRZ!2{EB`ucko0?cP>5sE^HXP
z5`*rs;Y!VW&4RmfoBERHsNSVw!+OaCa!a|fA<4`BRgYP{!F7;q{I|_8=huF1uM-Vg
z3_fD*kI@vCCz>b#Pc}iJyrPodG{k!vI`-6S{n2CMFUIkKZFplMrrOK7hL_8l-<Gg!
zHDB@i>9N}+!3n^(>A^Z;Jl;I_uH@txtVtczm=i|4)Z`IMwqRK;t5gTY&{wRi#HHnG
zm~034tD+c88&i3ID#-q}esJb25OR*+T!xt(zawumm%whO8a{ow4N%e~H^a&V|IeH>
zKlq1SZh^iY_sv0vQFpx#Glj1|?U)OVP=`t`g+3v+szX~x5@ZG-s-1d$%&L+5WR2<a
zbQ@3`XF?Dy!))m~8hf+53HN!XvTcjOT!snFO1+<#%8P<^JowLAysY~v?ubv+u*c>F
zAjs*-vm&)O+XDeWgU)q>Qn>QVdfJV-GT36@>ABs1SV$*-!5n;eaN3U=CK)vGnF=j;
zmn`?WO+Ae}n^}&FM>0w}s3Ce|8L86rnEX#oE#$OzOg*(NY6{If0J#+%B@hiGiNMh%
zb%)KBZ_GONsl{gQGVh>*-4vO6)T!}nX=yJtxIJYc%$UgFmKP=f!ja6R$ONx@_0t7w
zy}$CYTTcF@W)$^X^%o|EB`2bPWb023*M`gArg3~Dmq8%}L3XaQI>i7j=Z`|#<rYi1
zW+K?XwMwV@j{c)&_t($+aSU%P1xq?B`ik*z(aEpKMUnuBno;T=d7j{V++ym>^Hn!S
z6Tn`5A@^F}?(>HcF0=B5=rLe^4faXv^O-V9@QAUr@tlhc4jOg#axu25mXso_>c6a-
z%%cX}MRrq9Iek2!b_R7UvrP-xUDnn>gWDf>A8Y`Wd+DK|pVlSd9oy%rdm-z74hn<m
z)lTf^^Yvdo02`?6j%@u?1LY|ZZE5)X2=-UAwlS(JYxJFyc%!hgTG=0DW}cy+r`^b@
z<HO0q(Rp1dBYsuAtGkInU)kagaFbdWth`PkMO08R=IjP^>%ncbnbt89+1i;O^)Q5B
zO+(k<gT|Kv-AMiKz>0son$&z&${9D2Mt|xl{$X{KYT99@RAQK=0-EXZ!0EN8KtIe)
zCQBl`lK_qwFK8gdS$X)$fk+#<EBgA_aGtXMF}>(ma5tApSUg!+C+Jg?t2)H1v*0JR
z`J_YjJ&8;9-<u=<1<(oFGA>Ty)xSu3L+uwB3Nsy?Htp!w`K(iKwO9`?LJgEc#C3X_
zT<6U2jX*578q;)8)uoh!(Wd5)?0tUUQzs78Uy<Vsq7{=cwdx5xC&6S(6*NI>Ai0J<
zW?90qI@peKh#;6SYr!2XSUY}noI|zMdfMKp&XAK;8uavU6N3bTzN7F2*6iifk6Na8
zC-ntyo%{A*j{pkGsdu-%pQc@{1hQv9`m|nt{-&cn&=<#U;PgZ(vI8jmRho2WP~+t1
zZ#z25Ig(Yw>Occ9GXR3%26a?;0|!ecJS8~oUA0(tIu^OHCqu0o>G4dC37j#yiehwB
zmt6ZjHUxOyq+YSvz*9!|q!rE*z`YzyE*J~>Myw<DXB|ei>nsO7D6S^`bf&^rGt25*
zWc|Uj{*q>Woa2&_X&W(SO#@A$?NQz0y_Jx?SWDhTT;V96yAfu8uY^1lTQ3y%rSRyD
zf>3Evc7JJQQub~o!}kJgPrJU69Ba9ndnyY3ha>3whyQbXSttXV5w0%r&&Wvsa?O_x
zCl)*vR#N@nLN@)6pZVip(j!DTW1s(bckwsk`>>XvP>@O5g30$dw)?FY^hlvtn|+io
zK6-!ez6Jo5W~ce@^okbsztU@2;C9VizS#~gMeUi4#E9x9myJM`|Jow?M*?V`F3J|?
zzFqkm4z;rU`g*t~|H-F?6j~bMo3N;(?!MY4g>QfNjb8k3ALhLTpf)Z&yRYNEeX^`a
zZ66I{6zQZL1C@Vm=cDQE<9zOf;M;Y7`EM7zPw(H>XJ==4>UTE{U-t0)zybNXw4N^M
zrfN$u7~W2ZAj;g0+*|Ug8A{bPqJFMbFgO^^_!c-J4ll-6V`pU~4j6KxJoUfbVn6uL
zu#P~jR;OmI)+$6dbSp_^!GbpSOsB(>Ka}e=fSg=@YMBOqZI6_Trx%Tpy?WE|GX<EO
znU?k-<-OAmLAJuhJ`!!t`<-!oH`ZcKv@a6lSs+s<c3}}ysDHZA#QCMmT&Me^|I(>{
zT<}wZra1wmbd)gsH@E1!C;N~1eg_bq6VLqfcmHzl_FO>h%Av6CZ>0V&fBAOLUekJU
z?*H3AW3K^k%qh+AZ~n_azZC%CP5Iw=%x@l1SrM=;xrcWDze9L)0*V;BR4wq&FZ3@j
z^EU|Z@66Nx`$hjp!2jFEo!WBW=)^aR|9^qe{yzf#uiLxxKmN}S{zmKmk1G0qxr2Rw
z{8wAyzo78B5<GLrAk{j#UI*!(WYsL*+Ax+{!*GBP;coT7crQE?SGUUSP!IL{`@UJ=
z2ACzhGbLF6)&F6<9R4#HudRy6-cji9`xGexif84D-=jl2=&B$m(Fc=%hjxD7e8H>9
z`%wOLKI`ufasEdamN&;TXlI3D9db^rJ^Pf1g=&%Uo(#?0l+NFOz5uZE=FMdYEdMJw
zf9&;}x26dhNeizx@45@Ft+8?*nTg{&vjimczayCf*7+Zu?|+o8|H1iu2<?sPf*PFi
z#^2*K34pj@ZP*#9{BOd0G<SAfJWTOn>-ZL@z03{qzj+XAfOilth7l4!4QKWbdvzqa
zeQzjRqxCq=Wz<rFA>tp%=ok1-mM!z~^1q6j=6X_sKG2Zx;G<=S#rNy)zkNbTz5vS{
z{kP-apW?Fe5f7JzYVz0ESEm8Wfj0oMbkEd~SdNje|KRzZ+F$Lj&*!_GEVuGn(TDFH
zJYb~##GT$BxBUA3lYW1&zH~aPDH0Ysqsfj_p8gvu|HuL$wk9H8(Cd`j`wwe<-`bxu
z`Dxb{XRII7*5A&hk)f08;AaHg{`EQ+&c<e*Tu1Y((Da4xpZ34;UN-01AD+Bd{@U{o
zbl>0g5#7l;13X_N|GLgv6v{q-tn;MvOPTLi)Bl7?YitOXQTeZO=e(XY@`v0%S+<w|
zZ4><0O^znl2>y@jjB&IN|Lr<PJb*rjWlGBat3GS;j*R|s9UDL5-%^7vWxyUQ_eK4j
z_N{aL{o9{NN4fP8vAQrx${e+i{*H19ppkAp5j(C?(CK>te1F;dyl3r)vP}=-tSjp^
zzyI;0dLRNuPFW5&{HwHtD8gm_kb20X!{2^vbYj<SGsVyS>*9=a+@Ip%nv7eF2T40x
z5h5L6e}DJ?y2TazWyFg%9<{%&!%rCe`R9*!{wB8bP0!v}gmv2#{{1%JelpE_e%sew
z{MDYEf}7+vttI-dWlp=wqyi=M)nP$EM^Ob-xNUvUXRiv3BOS?f-sbwr--vr8c6fGq
zXI#J8?P?_GyPY`!1WOZlCI4n)Pd(sE+8^hM<t|Re`SlT3@*Z5i`1;4@+V;osCIyJ%
zV+F!mZ)T+UR;<Ibifw$*dak0^umk1tie=EF7M*U}wK<%JbBhDlw{HPhk9Iz~jhR)}
z`#*dy>hJX8=8c;wprWm3pjS^`Cj8j)Vci6n;B$2mf-VpjCk*K`*jNRBK6~eCMY6!=
zL^98*rv@4VjZUb*;4+uw|M9^3e{73C@%-I_hHDmC?M(5tNP4NvXPyUfv0Ry%Z&`(M
z7mVMhfP(MF)3|wG`r-6jyrjFlhhAEi0I*X}fAQTS|K7d<06bqmZ&C0l<Egpp>N;Tj
z(^aF?7dXDwd5B*Ajw9|DfC&-c_Y(J3_$xPi%I<%=Kt^}oS*nX={V&qqJD{m_+Z$Fy
zR1_VhN*|TpRGM@U>C$@_P<oMG10n)L5DQgWK<PD<&;kTSk=|P%K%}=oq$NNgyxY0w
zoI9ZBoVnlk53rNX-s@TQx7K>fW<n`lk=)NR#L$OA%sq{i{wpH*CH3!kenjjJ!DY!4
zZ2d2z{Rg%6%0ODm|J>K;KlA3-WBX`!6-cHHAu5Z<*Mt6&(4T`5QUM^{=W4^}pLz1v
z_hs6Is<xUUPe|(@cL?49bVu<W)3MVh{^t8xZ~Fq_w`+27B>7km?ys`@Pmi*F@S|LK
zTjJjw9rM>Q{7n4cZutCKj$MBMp8vX)&u{;q<TgPK$jvvy)K2kd!v6aw@2&zR1;J(v
z>wjX-zthIhI>5U>S(mQqIktxV?>_4GO+aUCUyRB9yXXI!UH_1Ury(G*36N|0f9*s3
ztgS%ZU6FL(c8>G%Kldm4)jfZ5FW2bNv|BJ|ci^s*IBK;`tMKxbY$2F%jI(iSVXCX<
zXWyf{6y`^tg}gp5bLmHlqb#>edB<e<2%n4Ss~H;B&pC9CS|Iy!@3xR^CFQAVh7`Is
zI)QyBrq%-zSyB=a>zffhVTK_*E_(}|^y7LzaiTsN(2dFYWFN?X=IT2qlEy8aoDl6+
zJN>b8h2DxYN=f^MGJ-j?M|WeK5rPyJdo3_vWlQ!-b>KMKEoVUJl`8fCS2J2EAF7&y
zFGH(c)Ckf9CW}i9e(aSVn#So4HO~2KLhxM8RVPdHnG8TW<=*%8zm~spO)~8^3Wbie
zLm-L7PYDG_p#mu@lPEOR`R!71c0T`kF^B$2>Er;KHL0_Ku!}#be`vvv0-t^??!?-;
zpSa|CA9#_p!^TBK>;lV?q32A{LWjyW(>@F~hYv~Izw9?*CdT%&V|%Ad0`gpOPP_CU
z6%FWxkB|(9syi%22zF2e5OkkR4a6_-sU+<2==5%e>E`HgVHrv;Y8)g9RqXW$`R&=6
z;>xWTNV)IWsOIGPE?izx{#kZRz`<_If2jR@wBvD7-gT3p1c<$`wU;<s>2?+FMuL=h
zw*+p-Vd{)!pO=Z4{}X_LZ$;c1#3&|_H<sC|3tiD~s)@TL6sRlcAXj<w<I@`cWZXxy
zABE!446DWCb7+1hGKXY%ZFQh;Coksf#}&Ig1x<p{im{KZJX%%L(quQ4<srinVY**>
zpv=|o$s_sk;B}exVv}E3Qkew#Hs%$_TG~(W@WeZ18$b35qcu_yemaG&rj}?)_(VNk
zyWr?|0T;*7Um=Q*hky%NuPOV-{NI1;rt8Z;wn-W0bfbfe3k_23RE@uXsmaq@PCuQ#
zOV<>l!kBmWu(fbIXQ<}s{_D72whUeD!S#D1q?hmLULJ$zg^-+qT{tcnR#~9gF*C`2
zlD|s^p{;2_*x#iJu$exsEH$H&**F*KVaIWTJBM44PCY7j`fbVL;q2x`)H?L$@n<gG
z4}HIi$RG<QVjYM#iP=`$jXZ{VQ-`lKKi8y<_E{8dvmDkJ*o_^Ae0V+K=}mtypX69%
zDvbkdX-8&zzQDKlTpr|}V(+HBZjM`U;j2qID<gpeZ4tr_OSzSR7-bk^;%A@)_l}0h
zMsfYb^`^?vZvste9-Hbbrtu3>yEVE;+EC@jKq(=88mMZ5swVxIhar6(eq1PMbl%~c
z9gE6mU!wJ5bvnNu&3eNA<q*XNQ}_ET7oq(&yvUpS0qgZrR@)6CQz;RyQZf|i{XrqW
zotjysVU2Xx!Mp>}eZy>fJ9kR|irdO)2y#%DZGWhv&AjPhSd(;)0Uros?Eg3g|9$k;
zE2!Px?^y;3$fLHzs;tX5@fL9ru97z?%y47zg`aZ>_Ic&LXr(V*5&~+6)4+7djs~{3
z2YTwme7>)wp?eeO1dcL<Dv0|&K6zJ;^Y=%$705x8cQoIgJ`PhtM@SNeDhu;siBRv?
zBTn22UiX7S90(|!0g(;IE|()*Abd<yZ!3{jAI;1H+g<Dp?O*m+b$G7&q@C!n;Rb?F
zW6nYM*56&=mU{c9Vk!A39q>M8pM9!SHI~}inWV0QuJ}DrZ9RH&>y24*HDo7O$ZJQ`
zX)=Y`^Lgh7JJ5&spxqud9X8wB#@h`7Ia3Khi?lNVq7WFPh$gn??Oh2`r*FWWb68z=
zdwA3LsG`8FGveL#$HbfHH#CydC2wz66hEoy-!ci-N}+wrd@uw1){a#VGYsZ(Ih<y=
z@%!R>?%IOVjrL~Bso^0Dr?z$$F~=w03@u%fGY9H>-5oxuU^NfVWoWHVeMeT>W1k`=
zat>MI78I`a9<&~qkNe<EXZ;V*ONK5>5(YUH_WCAtt?dNPMAe4hb&VS|C82~bTl;OW
zZ&4h1YqSSW<31y3rS<+t<@G<E+79gh=V+NSt{*0H60Clc4$VkwIC#Fh(2Z4ZQu5wf
zSWCdGAGtH0Sha)+2z{%(L<i6Er+G`1*WhL`yT#<C!>x8jS<M3XIkBCq1UFniDnAfQ
z3%c1YLxh@otK4Bz9fTv!K;9)btTYVF#pm|)_l4{Z4#pa$r3bKlU#b{ZcWETZi12~d
zp1h6UKma?trHH(YkPPc=cqeDuUKE_deetGPwvlR29x^9A05BG?O&aR1SdKGGV<2-f
zjgn4pb6#+*v$umOmN_zyt%)!QyrqJ|qq*a`vUJC&eBpMYcmGc?ZHXR$VCGwT*N-nI
z`;C*9PGz_z37XtI;<&%mg0+DAkmKzQk13+;BZG^VEO5!W8}kod4%L(uG-gHYbq>ZR
z1<CC2PBvwAuSf_4Z8l{i<TDJ=LJOPETMzULx4T&?HQk@|evkrg%zy;o10TBU%QseF
zzTL}U$)1v8LH12K<oM37{D|CB%eF2%86gMg?3P~QLpomjqLA{z1Em9%6xzzqFx>z{
zDc+sZd$9CU3ef)L{TQL6@;lMM6r2;dd&xQ!!OvX({$$DKM>geu(9VFMMc3|iEuX#<
z*s)zZe*7GlF5A#)34RB9i<}x@Q2w&?#=ho*a36*R7JO@gFE)!FZm1M7_)Q<Jd?qQ#
z+6z5gg8$-tBQRIersH51O($3)iFhAU%6o4*V<2}`)5Xy(&x#;Gl%TNK2p96|)YJjO
zdH-Q=2nL;tIAn;^=H5!oR_NV@!ZzYsbM=_`CwH3TrYyysI=)L80%{GqMS;du>P>o?
zc3mEb13+9_z^0hgeoT%E<JK!e7Ho|JtKsl;tH*}I!q*W1d%mzK7QvEjmd&~J)y?LQ
z36`qzqeu33sJ49uAO6K~h!jg2&B0e05!f)de2sZL8T3-5U$JW~mwqA>U1i;YMv8Bj
zlaifJ!A|Gviy51t{hrqm^9}n8?n#lO77#N2Lp^!~!SIe;$3g!_m(2PVT}{R!BN|B@
zb^y19P96A;o8k68iK}D{fzHV?IIo@ccs%}yB=M(7m=Xfqnyuz_2DHhse|#55ax>LU
zKnmK?+^CcC;JGS<D+{4lGz^<h70?W8+DGp%D0dj8Dz5M8-zp8_<MW$A4L;{+sImf~
z5v@58>PV&gomCWTHu{A(<sDWFLQ=?rH!>vZ^=n+!004ZG!UBJ&thncw;=9e`TiSWC
zvFMe?)+-Q*es`~xe%v$14r_$Zq!p>UVz!?Qo3C$TSsRY<&a!FTcq#OBBcx@ofsN1a
zKt`w-Yg~I5ho2F8I+o{K{kpi3m*T;kPRy4W>*uN(71wLGDJ2M^{CWc)?1Ck)$u9_t
zD7m1{vM8f^4p8xmWLV1@;y%o~`?V6K>L%t+0#QL?RR6{&+u(c74uWzy;<dsqdqkE8
zH~JQR)@KpnP^FokNF~aU1*tjS3;f*w+zTLwVwHP~MyaBYa-c6JPZU=46D)x%WagQY
zIi?-A<sUQJB%@U)`ZgO!q}btKF<g`hl}yKCm{*N#mHzXN4?q;NFW46Dj=0Nsfu?0H
zerCNU9ppi8;^|W^w!`G1ql+@4Fh%TuBj(~G5N{94=Ov)3XPWjM<vmL$l9Y8E9I-N+
zd+ILoHAj-y1TARSV%oVVn3Tu{s#%(ZDJCZ*8B}_iix}V<xdVaUC{r7@h}Fa;fr08l
zH1b`Vj&J6)X-G<S5Vp#j56|}s;)ZU{#aYV(D|$pg%LQ!hToM77of*|Fu8&nz*E+07
zEUBow(Eo(lCxL7~#}vXf`_Bn}|KQT>`8i{~uge2_j+J>{uC~Xsf_Ic8!!G!*FPYA$
zFWwaoQ`L0}q)1@9qMYEbMlZfty`d&dVMg5RcO71180cH01wtK{!{gcRSvPoAYhMX%
z_9x3={|O?hC)l*35<Zj!p|ioBVb(_r!6!m?1yje#f^pM{Q7kd(MQ`72C49}z*QY7A
zcTBaHKf2oMFz)3yNpUBvT`6@ungWQacPP;Q4)UH)hyz1S`(qR?PZC0-r4u9Er1PAI
z4_)47S4i2qb0v0s&~IV?*#t+=Z+mwuQ^cR}lXSYQy&pDr<3xj?!1=Fern?J?B?c){
zISDV1SMiZjq=jcvb)HOjM8uWA?u03RCi6~MS&+D~XLC!R8y}MN7z>skY_>ZXT`FdF
zc<O16{n`?$CL<<3sH%|$>}^z82Gdnshu?VR2_%2tXhprSmWQSeyVV!z$EzVk+5%r_
zWG+PNvZfTz4H_u;OY%Z?Ttv(Y_vh8pqM|h;%(9zn>LWFPDK6t4ggUIVU?_V|Xs2{4
zjpSZq0em`FC<qT7Y?HdW3UwQ|E4TVKph=yoy878kz{yd-!cCn?=qHP?EeG;k<AXUm
z`eXY3+mar@kb%@(b~!c#?~V;^Q{eKrMr-DY!<>P_S<9Hlroa9<QT2muMY0?1(}hFz
zi(&M*J4f$EbBOX8DP*O)LG%p^^$xouTeGsCDL?~lk5cSCI=%x4RDKlH<Q`%(M8<#Q
ztovLQ{fQ&j%V$>cbIrl6`FF|%yb}_P%mWR}RBZWriI}7S!%|md2;{2Ww~AgNSd9Ky
z@N`p)GY^Z3gCD_Yl71uxIGL6L?d~pbTGXN3wV=bBu%{~;_Rt-DVqqgtEnNm$8Fwd}
zaVHv+wpx!?Y_$tqrz#pZr>--?<&T%NLdV#C6U@;ISQL&a01j8yHRYf<LwjUrvKxOz
z$Yr4N;2Z3lyBmGX+k0R8C}hCdvATobyk(S!V?;%Fu-R6-r92YL5LHT!#C3GPw2x2e
zc5aZX6b}oX`<%Up2Kw}CO{qW@YQc6rAD`5fa0uHK`|<g<h^NJug{x|u+f?p<4u9}I
zF&%4q=UQdS7rC#?-sly{EDhb|Wf#YRpHj3X8UWi&Za%!uaa^qSx{tVRweJL~rXu0T
z2KO<II-hr<XnTS^HM8Py@#yV7%Vxz;4LVB=pTQ7FQ>?tjy#1lSZD$%l|6zt8DT;%Y
zc%ceh#l(>Ib>k2lyF;ZoUV71G0FBznJCtzjFyV%V>d_koz8N-(cFN1f?#d1yG?y22
zkw(U_LI*cI+iM|VO$T|q$_ZFf8?Z?f?AStvA8Pthh<y9uEMo0sqYioYN3?XT+HZ_v
z83$Z-O(om0F?LahB0wzK0o=pgH3`)<llL0<T!<Z>1AXc;u7Pn=`bv3cHT|4SaN}wK
z0EHw47%)!Zs_SOkB6_ar4SbByn?$^yrUN1n*N%XLJ<2$0%yxl^5b3bI8j_IMFh_8h
zrql!iWRS(Je8xJpSl!Lqj%$vEY*C^Ty;g-Z<gU6^rayK8I__V*!T>9C^-FwtqHW~G
z3E;5hr*Y|%-H?j7W|CtA!F(lYJSSPbj8Uqdic=ghA03)<koN{oI3HZpR5nErQu@cV
z#m5i=U#sTu{4bMLAyYWp1PHO3Fd}YHbhE03z{1mALkFac>4}JiSlH6?5hTEn**yd;
zl2|B@8ebCr#4q+fSF140l$=l3lFvJJ|1<JLW(;I*2IN9bfkHfy4q7LJCI<=SpWVy!
zd#lHnXL%ZCVn1>Sr%!x$0f2*g!}}BfFFhXLjf%{nV384bIov=E06`(w=Ne-G&H7uK
zBb@J#4A^n&`>+UMZE2+G-VU!(e_6NvU6rol0j&>~%t{hbTsKXz>hhu)sntiA&kkFn
zmXMK<xsR~D&R%Eaq@IDI$kYU^z9SZtI{vehJO42K<B=U!`rAZ)Pvh%e<$BlJ0g2oQ
zi!&D{Mfiqf^WWFJd6SyjaKRz|qxJ0z4x$tT4(}snm481?B9h)G@w?EO+%T7kwaJ93
zqskFvrQPf7x3Gc2gPuj{WU=0~WU;MbWfRZde%KEB{k>}>=UJbf`s<IOxMxxdY)miM
zVz$36bM0)C573hSCVT4izkY;<UQ;fqRp_f8=OCmZoW>799*1w+g1$d2>zhGD$=!QA
zmg!P*A>e+8g<L%(HkQ5)j;DGLvAydxaaL~8bCgsrcM!#{nyXlPy@cH)SFyN1&)l|0
z0_5=-S1)OA>Wz}#bN03C+u4uLTpdR3ym-eE2)P!QPVmX_ndS!JDVobP>piX|FAMC9
z@d-D(jphA*;a{Zvg~$rhcbPgg3=+Q|A04aK5ebeefl0!4@>+xnN5S{)`27(5<=_9x
z0oG@-e54W11bwVS0DG8U&9>#;`bPnMd(9M5cPt~-`I;PV+ecG|eKJjBd=aL!Ew1hO
z#ZfMxYAtMCbbUR+m&dArQcn?=o*A2t3Vg2WxGgS6XqBFYaJO~&OS}vG94d!LUD+91
zV0XUz%G7yAJtV%X%P;_<NGW5{;c0%hmpV7>eV}<U?R&nqt|0vl)i=q+`+bK!S!aE1
zZXPG|hur!KNgirB3>!jL75^e5fD!f=LUkY?ng?}FW)i>MxcXNKo+bsK&o)Heu}5#`
zOP9cNCutwsHXu1HRc|oD&EA-}X0eti^VL|rp|3KyjfJ+7$&EHNM3(0j@z%~d$t_kI
zp8fnfZNfLUW2q`yL(9R`Kb7^oup}!g0WY~hf!n63@oGAJ3e`g)rMAoBrZgqmZ3RDJ
zI%}>}R9dVKi&(NXg7@bWgHtlEc?oFA<er_*&(VB+A+m&Xt6Xj&t6<%@ac8>_bvCR<
zJZH4J)R+0<g4xQlFVsNqm=^vi>d+CYrHl+_2X%>3`u~;v|9l0fEz8S)TsdY6_STEj
zUFbHYBW4;&RNp+I4}D!E(|y#>pInCXpWpd%sfIUbj@GqsZoll1|B51`r{-<v3^b{{
z^hVXAzKC<@T%53wrF*!cS3QQru{;;Yws+EEqHhOo#mJ>P+<5rhnk=kFx=ORNht_o^
zp|V}rbvbx-`b(UybMr$2oB)sP8&n-9ERa8*(NuO%_3rg6FzjF-x->;ZHF|_|1J53_
z^BnC|I1*IL04bj^%O95|H$IXk82IPZX&l%1&_rrrtW-_A7=_m%zF!C}sD6@@?69Re
zFC03%`{+x%=GlU)MLTsLOC9j%(t9mY9F~qB2$QV@%w7B0n}oSdwr07T`_3I*0a=<o
zNpuaLlcO^}%%JExDTs5UVPzmAUP{tre-<)xw~0v}+U$YG)xsx>AxMrSfjfSYg4;rI
zIhvxz1N>6<?T#4<rDs7pa;8|sR|c=f1H50A9jI@CgLL{k0$Pr`%~;`l+plv<Q?p!T
z{Q;A&%*shu1Q;UE#oKZ@36`KzrsD(3WFrMjDTL?c!M?bBRJd_msJo<OugS+pCOPPW
zCz`^n2<G(J5zvV|{pvvN+eg($YjFY`<x6|31dNFN?B_7K)C?pCmVLkpLl29Y>{dTF
z$s&p7SX9eyEvCFQx%%O`CZ_{>F8|T@)}{$Nyn^PTMWrS@aK0WL2sfPMb}7Qry|`(B
zjq{~kV_;&wG`1|^3eRJ%7MSfey^9it_EvAJnr^o%TnJoI$9pYuai^RTbUxyF{&dMh
zTM+B9G}YAqI7dIwr88*Tr0;OurqSaUdVZQTk$fA(zcJuaeA1silZ`u%)YTi1t9^as
zfm!Cd=^p@QiR3-*C}hJfTpFLbh1Jyx;J-PXn-_NJO7o|iVNa1@Ima%z99_g&=+h~_
zjMW;I3(5G5%W#<k%{LaVpDh*{_X{k^3Z96br;*-0WCgq+)J3jPiVn71{^`vIEtwU(
z5in7QE2PO+GFB6K^sYAybIAql#e~&`NK3Z$_&YfaZhw2!#=m$@etVm$2ATcxEMCKF
zP!2Bv;&6N+uXwhT(M$d|cH!ppE+t(8#wN}%VyT_$hYj^sb6&lCow5XHY)jj{7RLbV
zx018PrOJ&u`8t}?J+?*AKbH2hb+3QPPCMyua{J&-YT(+aw9oz9GcQ`7*R9y$Q?G1~
zC(>62-}dUYBwbs@jPbZ7oV{{btNDubIB)(8ejjyCv*+_xJ)$_SWv4=Oe^?d<d1sY|
z0=_J)(gJ~eI3idLTHK?JI2JWo`#eYre$NV%V_?zV@V2W=e)K6FeMvl{?!$~t%az+l
zuLc1Zb6CXwH9Ij|UTWB;4Tm215+i_n7}4I9R(LLv7n5usXW<^rDm-wc7kd`JlYXya
zF4Ny&d<4lwx0mCP@@2!UujGPY!CNP?<%F6G&DEc#q!uP|Ijl<<i95Fh55qq~KEztM
zS6UFNN^nt+zARdv7vF)=1;W*Q`gx_2Z4+1<2S(&j<AJ1B+>=F(rsXQ~DN%!qQG6r(
zf`?76Taf)XYY2K$8tzG~6m=Kza3HVHt8q6eb89%{?3xpHs#0n{V<=xZ>}u;2%j1Ox
zCcww%vy*i%CEtp-@aIVn)*5nd_vrT=JsZuuHx?_-^Rq3VA_1L!mpL%{;?hY0sB?V_
z0juIZ5F`wOp<Dv*qVFFMWwMzML%R;He`;uAVL{GC5c$mo&;x=SPnKLkAp$oTC2!c9
zdXBP|aq28+N=q#T%x5T=?1`duZ3cLp2Pla&DJ-$e<O@ez(Q>0|AzPyDKC3FcN=quY
zUN030$p==IUtkUsk%Alc7%m48-m(fihqDShmawJj$|q5be_dzxY7AJ7HG{>b7q#CS
zw7fLXS;~^L&#T)RT--~Vw0CrmO_|9(6=w={3si@wlr&jp1WEQE5-UG7m}-FYjFe6c
z<;Ugs^Sp;DG`)&CKQXCK@&`I`770}IQjnBli+vPzT+5z)v=03<Bu?(je8O0k>L#>v
zz1?b#45s3(J*p2-Rke#(ZRp}pq3o((>-^Mwt|`ecqlfm0hBs*Y!<V(ni*{!{p=eQ~
z?K!uL>v4PE#ndP68EdK}>z1Y0-$6BU8h!tE<++$4CO_C5<9eCQ#X~Q{EdCuKFYI|&
z!<8(@j2x*8?7{)*7JFSv&=oV(xpZADCP7ojcpEePDa{G@U0Q*?od}crdB`Rz&p7g7
z14Z3cCtNkGK^6;yX@P}hF1`@8Q^)<d5KPtw*y$9HhFpqG-|~5re3)x(@@CB#QNZCd
z!R*z9f6(6AStFmTjHa3JRxDeJLX(F1iYrmxeRT;_a5G$05wspOt*Gu?CwmxDq%|mw
zsz~m6M0BwvhoeC^=qft><s5fAK;uR?1o2YpbbAVAOXbmFn1h2j*%=96X<zT#PAg2G
z-kA4ZBy$P2{w;T4qq}j$#ad-?YxL|z=Ds<`G1KLM9TDKvEveQ1qin-HKiIfk^4>RN
zb;y_m9US#KY}*lqPZL0iHI3?;VWN1GuM6PDMJ0~H9<2-4pLJPXW4Sao{gSnD#Yc{A
z;Yhu=w1$OZ0ZmNEBy<x3nDUHr;sfuZ#GrN_n)mGe%0E|}h=D)YY=1tq?Dqf_juZIq
zH`z=ysSoDe4xaUV>yxlLC!i{cHWK>{uskh8lKtWgs)*_9BV1Bat6c<fMGGxMrffmr
zE9%hno^(+D<si`dw~z*W+SN$?`3H<tr%4OhZ_n0$s%2$($nQr&IQnjTvHFqCP4X8m
za*ksZ&$r1_$qMF0$pPRHXt7kx{_^ocBLm={`ImwTZy{#oot1f!FC};8ExKtMpX%Pm
ze(mM$SvGUA;UHa}7rop(T|^cNv5igM(CL#pY{?{~E9qPIIiYp9r~BpuN^c4WOtMPZ
zHnP$Q-Du-~$Bgv$Y9iFWI(m0GiEIANSyY7lSJLGK8U8H%^P#&uXOoTHwjcBvA4Kv0
zfOXYXUw@#TbJKTuR%8;gX(fWb2ff+*?yugrm-(%3%qWt?>h_ba<xjifnCs?0d7USu
ze$Q1q8-~m{PkOBAvluS!eNdlWa#}M}=m{H@O+Wv*UWjX^4w4`6+KWHXv3-UkX&XC+
zDd3rOlkTHTKMb_T_E%iAB{kP|!;TrOz>+Wr_^_U%gZ@CX*q?j?U=#Bf=0Yr7{9A1V
zjsrhH0rEXV&xtEaZQFjI9`l}3hew_u=HFfR$^OV2GHIhrPgo>tpCyT|#E3<oW$Vnn
zUlH@m&u+_w2mWmKKO^=N@%3MqnKys{toNn0=a*0a1s49*eGj#!nUCi=f1iLK#OEtN
zyTDDoQ7(8=+`oF+zx#|*ulC&k?X|~A=DucMWbfv4BAYvY&p)3Z3cm`N#C2Z3?}Wzv
zLE)Vq3vcf8LTR%3%ytZ=qh56XV(<TmCNz=q5unhGb7r^y+X#h<Po04=;Bt!eTPrW-
zp1a5%Qe==fD|voM@mVio{GKPxNk!zQQr54PsXv)s9e3#eS&p#UQ<Qop2V~zifAVI3
zTeU(}0gRzjz)JZ4K?ol|kY>>|ovsZ!ImmzV{jYo_zc7(1#))k5^wA80u7dBYk41d^
zc;WH$db0I7VD0<KM?3^nY~tL%Q=Wf`MV9IOOEIZ;I_D-cFaDdGWsLy~=8BF<{}olo
zJ`3eO|2WLE=&9c2A8<mbMwag3bx+&A{t4Rl;gk6UP#<yGK`R9O@xyUwoehP(@=x)c
z;L#81Y#%O!hiZ6bdR92;g?w(;R_A}_;K5^Z(tZ3>;@&qOohi0^sN>uGEe*a*#q<Le
ztQgD2=?+P;<A=AL9-xoWc$(Bi5nv79mX7E9^V?UAl#(yE6oBi<Rh=#%7mvbw*k0=#
zKIAYgTCo*>i^_<Mw|TC*2R)TLk1FT*7t@aWy4#r~b$O?d4-#ITUOUgB#LDqq(I+dW
zDj;7UO)(NI(ncmaCsHo^=o_hy6s7L-J9z>Ym(7&!{cQUW05(V6GP=<xP9pDBfYu2z
zwOTzY%4B^+^82NKBbq;XCqq&f#l185d%!>ii9~uJTwP;1IdfCTr*pAZ)D&m%t@&py
z4}DHj@Z59^;C(mycNOzKLGa~pb&dD`FdNzEi!WDqpYCi>E;5eITr!jXHv|8t1db6W
z8%`oAmvdj%M=A3YDNZkB)TK;%(EK0Tng9GF|6)k3qBx^hz&i@ffoV5=GxPAlr^+X_
z_k_rfU53(J1-wgiL!TM@UkGC_({0@rK37$h!Av?GRd|`anYE2<j92^^amNMs3;Okb
zR6bO<k+lD7PoZR2-#sP1tg4VhU6-SSWb)UDD%ULk_0|9N6|!OHf$D;s1t<J3Fk4oP
zq&`j5bIt3Wv=l=`>#7wOF^VhTg{oMqEzAFf9j8fugtQ0Oc>f}1`x~L@;<lxOQ@I?u
zq@Uca=)U`gP&QVZk^hBZ|BY&`*Cdzp)QE-IL)+(D`X=~2s~QZfA~-l-=@u8g%kr~u
zFPgd55~P6Gyx@Y=EDy7O(y|Fw0OP5koWz1%r_xdd<0oos-tpFFT1>$D)Pht3^E2*z
zPy?jD8XqMrj48yXMUlLo7)}o8I2c70qT7(Ath1dp^~nl6UH8@24peE~WBUXenr`Q<
zkT*lo5)JO8(xpOFRK~XQ6lXYb!>I*0iCML3?qMIa+zAQVvud6aNGDqafwNeXhg({&
zp|~vA{Jq~p?PBB($lwm=$ak8OCZ?iA)2hJ1^-q5OCJB)q7q)~9>tB_4*6q&F>CfPo
zy$ewwv;3i%DLyaat_XgoAc?>})w&xH(&RSNTD>z*m7$Ata=|7DQ&mz8Qu%W88#d%B
zn-u!Hv9HO;`V2R4bPq^}jC8gb@l}!sLYts|MjHYx=Hvu3$B2qp{^s>zcQ6HYCpTuQ
zg767(gbDm2jbhfDcwg3g5Y&Q`Y8$gBNUhF+_3?&h2rpy6lcOOox&0Fl{VkDpC7^Q&
zTfWZ-Vo%U@A#FQjuOuw2TC1@G!&LHA3pQVA@H`fV@w)8xdBlU#b<N0tqXz!gdVXB;
z)2S}+N-I?IO4l6~j@<loP^tDYTl!x$|95|7l9AM}GKxGE06Un}gu+K#J^HKd9nK_;
z>efC{VskEY7%J`afQ*}G(!J-*LCRXBW@;@V`>PFy*Y7Eqyv=sbQS{~caCniAHh)r+
zO5|;p(U=9rOlyEZO&CQ*%?05N7lij}9L0-QV0Fbh&1olw-mwQM-XGE>)oLvJPkE&x
z@0ebdNpw3js4D;TDZC?ed}&>xwC{4cOPCaac)R-039`0cH?vb-4w@V`nW$eEiyodF
z&9hi%s6h`0pdTz=FG$GXao&_Y*zp>5d311r91XI`5OytCXuK{n`l+xM#6;`%#;!31
zKgZ$Slbw+@f;XgY<snayhxN1TZ~G#hRuVlPuGKHxZ@C{%2$(DKEm%t+PaT@!Y2{g7
z4~&Cag-+19RAS?H-7%HImIYry<@FjgGDkwgD5_D3J-^f?>?ypbV=k_|B7Ek$LJ$iY
z4#FPX*X}@*4DWX>1Clyypz*dzbZ25I5uH8|OL`>}qOtfw#j}E~Wv|lBxvMC5^nVR?
z??<X7?2zYaXU7K%Rz&#BU6swOBi)9`2nArB9F8WRKmXIi$u^z;&NL|Sl9BXhsoDpe
zW-wAwVgHuea7U${+2oNg;j9<_0v%ebhwNw^TEM}DQ(Md9aelKq1s<ge2On1K`IDnK
zmc_mnt~~*tp%U>MCu`mBxT{Jua<6FaiOWW^?m{`8Ghyl}{`cQPcVCKX&!VV{=1^RT
zXya8pXSKd_ZW+g$FC4}BD|~^RPE89<h?$$37cH9*Sw56fE}#Obgb6t-=zA%LkA^{u
z->Y~yMlD-QagC-uRb$eK)uKoOI~?NJ<c5(Hx$}*MoZ*c{F|6nl8#pFfxXT5<$39WD
z(p~7UIsWBzYJ)+?i)ZDIAqCCsH-qG+3A|Dkdm!pV^Qo1{Gr@|jWh%d^NW0DERR+#+
zhO3j0schulupnl4D1fcX=cG+)R5%2!>+TmtI%R<}zI0M=s5&>A;KQ+vw^bIUgnV3m
zh4&>MJ@E}&SpdaYzp+bAt!Nr0Dp_GS8mT;~_Ef86Joiynl;&-+A-#*0&hHNR7eTOK
zaper^+<>X*<?N7CB3)+(wEr)zJv8DsW%2gR-}!UDrkklRN|dyTx%J`d%0(9K4+&t8
zuN1D?hs;{Md7OI0%otqsAM<IS$vT6fA3wP~+u#CwtfvlFp&P;3VKc)*Msn^+AL2g^
z_ynas@T|PiK1U_I%H_kK>J^)fl(j<7Q3ZlOZcCn{D?LC{2hNRIV3z@FI((w#hVNE@
zmpLNbFoM(43bE~6?z7NV9*SCv?!W;E;xXlfYI52&4%jF`ZR5e2V$_$|RqC9eDR`2w
zL6jR@W!R3#Iaaj+sK<VuiMp}cdJkn+5S$fP=_ri7i1(U5VET>8Cf=Y_@0~nX3%f*c
zdE4b-Vwbz7Wt4m)2-p(O0Kc=b*u9T@p;AfgOH`GCwsW->enwn)5LJ4UA%J6;Z9GZw
zf34&1{Pj*&jFRGwDfhR_Bxwceo=L8k`Cpenq28(YW)k;Q(@K!qcXY}VXS7;|$!m}w
zr?(~^7YDx?dHr%YOP_(!d67A<Os<@VyLinL`1;$-nThNbC7xGXb8Dm6H6$gKh*q#*
zOBA@C+O^!$y9udIQ-HdYwfWLOL*E%n2Q5SF2P+V_I#85sA2wqRoNp7Q!M)tUAN&lb
z69ss*d%j`vsxl5g9NLHP_%gNBP>WcxS&$WNI)(E&89P7PMbwpDkZ~1z7-e72XPyd3
z$2EhyWV92c8;y?vu@|qNuDoF76O6LW;8Efv=GsT-tOX883mAB3T4+tOL*C^yMxv;3
z7Dp+~pk7Yuw4Aa$JE8k3MR849$h#KK*Zu%mxE2{{Tg7dSXkk9=CNv+u)aAaXa_6f-
znn!_dP2psQN=JYrENrlkeGrMaYtSPg{>b1l@$Du*WcqE5k}pz`#{>ag(qha<T5j;Q
zxPXhB`SQ5mdc@VilbILC4A!tKs*Ef1TjQS%nhM;RDn%GWh!~#Fw|CN9&jn6Sj%IH6
z*fru!ms_o_0QvE`(}Y*$A$aZQr!KCO{5^}wj35y~#_QBQI|SEL%rpx4ax$1X#{E7M
zcbcf_jZYe2(dr3a+h<eIvvr#H4wnMWYqThUcVCb(u)3TH%h34}t?pWW`m3w|{R^qj
zzJI8k?kz+;+DWA%h@miyAyM<rPrtX62f+5-N*GK1_*Cu1%r{=nkhj#4r#4*U{!me5
zfJyp(^c&kXegVDuLF$c9a9zC`C%*@G9!~R&VBI(uT0|_0b~>qW0eo41sMZ^Z=r=I2
zUE_7rW?<NLgPQ7D41;$@_n(C2C+R<U;!={9Tg$3C^k@&<V`VizOCCru(?TW}LZx|c
zbr|8BKDguL&5BJmX>b!sN^G>8+*D&jd{}zvQr1gd&$6Z>#{}Lb=nO}24<k5Q#prR#
zwsGfOniU(6fs^~zrB}ENfLAD#V6t}&<J<T+oWwr2?<nnzkYBb&pEkWmK-s12964~v
zsC!2TQsEly&^8+vxNymi=!-ZhY5`rnI|77DFTu&L^dIU|ajy^R7Oy>lFcpF<{WR)s
zCCp);vBjh(0Bb_kTe^&<5GVw+3@e2vSQmQ$xJ<oaXhx7DSQ;I`cKOVn|5|+_ylc&4
z3KKJ6^*#Oe`75J&euyYl?=fHT@KYqhl=qD6DzIO$+!(iJ1AZgeW>Ig|-%7SWK06Pi
zXfsY-%@%pOR|!@q?}Qt#(rAyF^57nW{rr`v8K|5o#WxesPGRbayMe`_hxe-~$RWRV
zj;;I--UT+-A47<ox+g96c~g6NpraqmL1ja&J*LLf$s^sy*3<9tz*WjbvM?c?I$gWj
z*rzBsu{oG}YM((sMsFSp-hDK&>*G7gUKxZ^Us&mNa)`4+d>adfdK}HRo35y|7*aQt
zs}>g*?VxOS?iXEgnmtWkZ(vy`-%d%j5%_65J+~~hb`Mu6vZLeZjx|^{Ik%4}Xdx^(
zHCOh&@T)UP5co=2zEeEH!HsE@;YxJN9(F{>iM)5#JvVo4c(R>qgWaug^N4D+HO_=3
zZ#OB9@KzTZxe^M|IDmR+25xQ)T`WW{HmGO6Oe)_{b)8&q?4)L@gY(^Q(FWhgf2a~u
z1P;_F@UWD8zvGCe?O6`ck2_%MbBZ*Y4=y3FCo8BsKi!A`+hjd(tqV&G@${(L?Q+)`
zD>8}tEE{5hhrbCZewlGEkWyW#Jc=vQz|C5<76R$moY&ISX@607;ggBx*~z1ZEr~2k
z{)M}xvspXOaZk<OT&S>e+#tRmacO!v?5Syg;j{BdPk&r*cBNb{;S(2A76xQqhLK!8
z)E}>u_vx@l-yUgt`N~d0u@y5$!(_QOFxKVSzAt0r#m(e8n(nYVd~~~NcXzVRsocrb
zgTKTM4!6h{xMS=1smEy+X9==ddUNNdD0V=)fUPxF!L^a65qS&KW2>t+5{7xMhd*~E
zt*QakOT{}Y&R!F~>@rfNm_8W|9AUC*RKBC2HsXo$=0CiC$4<4iZOM&zfAM^)cSB9A
z@H@10ajWEHa(6B*zqCQXQxOS)YS*=aixp$W<`-$Cc0MO8nxyM^hwgYsWbk0n9wioK
z_B#^KR)5vC`KbgKdiqB>?aB7_HP<l3**M)GCCv9*F0zjA+qwGrYwYJmS|Hl@J2$|u
zd6)r5_2GpnBJTmBol6__8U{oUiswKoL7QQ4Af1F{8~@D|@_r{MPS?6S0Mf9@#x@eo
zW;nF}794F;7gS_E{-}x7GJhgFW}TFp;hx=@uNM<S0z^t@+vnBNvnX2FlMfVJAX?7X
zCH|mzhL&^J<TZpAF+pDK;2T%P6j}-bYTXJC5#^zT43BIKB4chYXgGt*fieeU;S3v?
zUVa)f6(_Pg;yn8#+K;Ulu6_#^<%Z?7&_+I5=K?Eo&ZcSfXdRkxBtgZ;Dy?cc><T)m
zadp@6d?IpBII1j7uL}4u)_B!k7$$5XzB3{?68evri;Uu-a-j`tPUzPyU<#3H&o7<a
zfMSDaHA@Z9!#;c?tsaak(_7>6hAcm#jwl66_zoKbvrc0aS2R?gQexYfmN+!^zF=vy
z3eui^MZao^G$E7k@WL7%l_TK<od^w}5g&<-H64BMk~QwKOx}T-S~YO!G|;GFdNIAj
zwRmrN>1Ini%F4?xVMugaMh0Wd_g%({yF^8{z;sZJf6AwdiH4=tP}(O=58jRN?FHvF
z6v=;mX~FR@@=7&oq=m_Jg1=@e4)4rx4Q8J0)!z#acS$V~5_)*pdihR%udvZbyXpP4
ziZP77iNttec*lZeh@DX(2#Ff8P0qKfcTV(@yc8i|+$*dFs>3`b1WyQzhd&u6#YlV;
zY57)|{@(raONH46gKeO-@;CCn6fIWipn?km(3Pa^l%DT?Z!3}c)!k_#B5hcxMyb|L
z5+sJ9#Y4Udp;YWz<i_nhqY~?pLqEnki{?QTl(q9~AB_yb6&fCDw~)!Qafw8=h}ye-
zz9mr<qrP6|#HE+(FM{}xCN-a<RetC!8`8kb0;MuKs;NHA`iR@jjRNG>c$I8rcxGy+
zZ(GeV^UnxZ{Z_A6Ai4Dmv3O_EdMBvJ6E>Z<Co<xWpux|rLRMuo>MXeuU$*bSlJnBk
zA5^Ny<473-byUlHS%nLtD?$Ux5zYIqn&}l$Oc$wDI*K5QVUl8M>iS4u^#-JG@?_^;
zM|lq!p$pO6K0JeXs!^8>oKgWkWGc%puiTVv2CZ#%l}cDloNfa=T~r3z9Qws7QwBQe
zR8SPaKD;IQ#>?Qf#`XMHJKL=7?SfBmckrZdt@HY9%scLuag<U?RJ)93S$9YTZwn>e
z;Al@=)t#&DU-)D`zlWrB`Cvn3!Y|P<I+O{Naz$>Fh`9G3y3)OaTskl=r`sKU#{8d)
z*#E56UAzdZ`d)=^&7nQ4vP*v~9Inn9MavLxqU!%;5J^U<)WRNkDVn=(p2>ur>5(&~
zF#A1%(r|iK0&;T&84x?nFx19g2y3P1NYC^Lan1-T!Ul*SNfWGF$mYf}H}8k_Y%$GV
z*%R^G#F^ASkr366+yRj1FdP$#aC6*1>}8+p4bG8tsMDe7RUfa_7$2U2q!l@V9+p>m
zc4rtKDu+w?x6c?Tl(&024*b?4ov}vXZTB_tlDi4B#@Au%ZxWrK<%3`iLzTV#9eeCs
zm$Zp}_N^0}8hn!~52|Vf(p-5960NHr=S_6ziN4L7%4tyN7Tn6OjeF*}kxZG6sww_c
z9-vWO(YpFxW+F`7v#p28Q^d`w7m}6d&zNmY^+Y|JGeCO`6TSa=iA5Z~z{@Kgss6G(
z!d!rMa+E$BZaKjF8tq`XYB{7-v|YT4jP~@wOuR_FJyaa${BDVOp)$E9%z0M{WvI(d
z!=kpTZJA|*bB=zMQ8j26yVJyzmh5uTweoVyYv?kc)Z)O~fW(biymO~*-uOH%Pk|Ty
zfE9()(v;{e_ZThTWqg~Em~vztlE=Y0X&yk|adG7KO3{v6`y6|T(w_6{XkkoUE}oK6
z@|`qCoVlx4uWs|D$=rF~b*FWcVrC|pV2gQM4l!w$zhTM$xFu7Jl-}Wahsq7hx(7ai
zn{8eSYy=wd`RH(wh3XcL*azLOM>^=WbGn`!)UNANVcxjK`<waJq))t9(Uq#ZU*PHj
zJKeJSl^(=fZDB9me(F1a?`eyjxu60C)kHx#w&jhrv`Q*L>a3Dj9>gf`xPd_Dt`yBq
zK@)b;u~GDYkjq!!D{PEk#<x67Ok-#*BtHrTgsuh|M+Xj^56-V0x3Qumb#8tj<S=u+
zM2XU1eE^QFR9=qY$MMW9xR!@DOV_(N?066(YPAXG*>nYebX+yrgEu31X66SAUxGPc
z!F9SdjwS0CdkeyDg`C^PRZckl(A9O)g!sW+nhdcXu0VYrcfZQfw+GbdK%Qm%(>nAP
z5pqcj2YEHExjc04b;doLXx7P*_jRW-Mlh!iEsv<x#r%RclW!t)*01KJ39rblIcX={
z-NW>Yyj?3RWHdXMK5(a5dJtbYvVlG5u}|gPigIKMQEdfdmyiqwtGk+qPv`5zt3e!E
ze_u?eLK{ftFY$K=mT|l7vM_}dUF&sk97h3_XdItcM-sV9mrG@61EQix!1zADG^?*d
zZ}7T6S76F5)sl4w;o&%*#EQ`DjSJi&a;=bkYt7qSdeYpm@{iRIn#jx_9a*QUUN>@X
ztr)#2EtfOYZqTx?8?#nb>sz!l#kfSsrNO4Ea^xd3*YH$`S-sidmDe^SGZ0v?MOInw
zQm<)_?3f;k*V&`xQi~;Z5)X!pWe=?GDkh2kO1@cRk5=PG%8U;a`OAn*MP=9tYN|tH
zjNp-Y%cAwUs@~6&R5dLg)d4ScYb>Xa;()yWPg$u2th}rb;X>cM?rWE45#7tU7~fc^
zLS$a4k}tm7moZviI&`!$SJ^6AV8IOQ{BqqiM_60o3iMtkIv{;58HRL=oR$4M$^O3_
zXbDH3GNs<i#<0rf>g0yX^+SIO8=G@m6s#JX(R1bUbLQ#TF`b?{+8P;L5gNzSg>xDR
z2U95X=RKxee|*T97k{@5`Rb_DGxkm)sxA&UIM&Giqu4kMfAc?X(0D2gpU7dzxf3B$
zs+TX9?ou=_wWnZSf0#l%OCIQ<T5gSXNFB0rk7~wQ=0ELpSH-Nd6W*@}e@*mMPQ8ue
z1AKO|ZJpk^*P2@8o=hcw0KW(Wpx93=30vxp#q*%|{M+16Tw#I%m#=jMmKTUc8|k7Q
z`ZKoyX7vjT4Q;Li%qqPc60oQuSmWAT?Y$%bKICb9cDs1B30R{qiBrnd3k^7uXdGHC
zfJreYkXOk3y+%Oa4WK7gfC*n$Evu(kC{R>sxeRBmW-hrLW58|pwU?5q0}?dD;K$^G
z=Ti!ZRUupS3B(me#`orEHO&CiEqy$x#_mhRm+_h3N9m-D0Cn{onH(6?65*?F$Tptv
z70(RwM8-+RCiY}gbUKfs@wR+el|btj0V+do=gluxU?ytBsKK(u{DWZ;-5|v~C8LS^
zC20kv>6Z8L&Uc3n!A)HJy74;9TC=oNEYWLTZ${RLG3=Q^2CM66&Lplyo~j0ci7k#j
ziI0<c^xkOD7&=2NeBb=fY=stH3P=0&CTFqea$KR4abFOTD_k>EzgzH?v=;808FpTe
zv#bt>RxdBN&YBn?`qVTPOzBF#-_4s50B48TjrqnMO5Ikyn$TKxSFDiAxU_T)SEJM*
zncdM?{z5%dO2rDuroV3X7n<uHpkEwmA12yoCF0EMRf~hOM)J7Fsn2n~(suq}q<%+v
zBVy)B%OE(KN`NaiUa?HsgXvc@n||p{lv%%BCO#}vlNXL6gC`CytKwoZuq}RUHAH{D
zqxJTr*oses+U245UCZUv54D5Ny#BiKQRSSS0?3HRq@B-2cEyWhRC!o<*f<b=f53We
zc0L^d<^cov#WqQTVM=LBqp1c9VYSh?r!H>&N1X}FT!A;{(j4H#ds=6%cZUKXy<>>I
z(5N^cd6TBdQOa`6fT7d?6AP7Nn%C5_?CBlM(3sEAmrO8~PBINKSK|@6Fmu<GtK6&*
zhmKwoG+WIuFpKMCVnW##;iP<_anXfm+$|q0mL@&nllEMhH(8EUdbBx~#iAe3WX=%`
zbKy={?|YB{lI$kWtr!bVy=VGe()mH=D^g4#D7KkB7J2JR4R6tlmI&}oDv>(kIq;D#
z6(P{escJ9%hDC6$>c9o=)xx#cGZIdd2+>(|cx#a>sNL}L2w8N#Sy6(iN5U`+)}%W#
z@2^kBQ>yEH75&DPr6?y{J!r3DMicDDoy$$05KzN8<Xe5RoBfMc;-B6pp7V{@8~3Ql
z5>-zI`%#PLw=N{DsdcynjA=7hqx#Wm`*`L;K5%|ATVBu^T|<ElPLa@NvPX@Df5?n|
z5&iC4JT6%ZbX7j?`R!>@2gcA>6~BK4AT{eVjn1dc8X72}bH+86+!{I<T1b&LP%yYW
z7g$Me+keMt1F<nFkCU#KS$Xg_2SlVzd8BQR4~2_5xogtE*YU88LNlZKZt4sj#=_m&
zW<CiXt0sHm1Lx1_CeW{Q4W>4WoKJ?y=HO=@xf~gjGXw=>Sy$h%EXPvR?2_%lU5J@q
zZYhXYe}a6lrqsi1d8Zmp89=ojr<>^kwtZuzRhHjrBU$jxXlorM`GhwvKXTX$o!)z7
zUti7YqN!Kj%L()$x2empwNYSgkm!835o(J%k`rh6P<M_;1#xj}Emg>}T`Y(&{@_|_
z+T9y+=bS?b1Usb`3-J9T{>^l?`faCz7P0-R&y1J;R$lX)y+NpAO6Cm1%k*vUg2(9W
z#kSEN(WgE|K`|HSMn|d@z%|tCUtSe-cyrA_7sU&cbk?<PQnLXNtf^^${KkXM9955R
zQh5t2E{^CFi$Qy@#2A>@ytwR^U;Tb?DqZg}G{ZCk9woW6vpL73J~}hS)P}dskq*Z}
zfzpkz<~fXonu+r@*&q*D#No>FRB?;cb+JqR9b{mimMrZ7W9b}t3!b{33**_(G*nmT
z0Yq{cgGNY)?MfG2o;(hbmRV-0BVaJm9yu~~*Z=@>J~lU93=_(JRpdSPnNg2{i**OJ
z2ERqgJm{*7n^ei@j9?z5Gzmd<wRW*s`$|Fk)GSBvb`<NdAdr~7q2;%lQh5}Px8Wg)
z;dmMgKNuv3yToa(NmVyC<MBpkBbocmdwV^72+TeBNkn1wWC>kfSyL3}-;KC@{BImL
zR-rBPDAozFo+uwfsgz4I&YK3UJOr?HiZ!B{UG%`@Dm8<wv~b~EBEhi^9@lADC5PM~
z@)qrPnb`o$7H~k_ty@!}LF0trt#MA-L$pI<L1(HaR_}GI=MxP%YWfOHhslE2xO_tN
zbT-YE_|_*?_sx3=BQqeIFv4W}O&`n4YUe#YntjgryTQ!?emlgeTNrfdOBU?b-#`aG
zNKK5FfZ@{Fs&aT#WFnK+wNX{>kU{f2GoZmyUF6mmn7a55F_fd>QM)42-<qCzSD~%|
z9F#+YS3r1<sv!Vmx#Ll?okbejZufd0KUWzMQdloh8MY8GY4LQFmr{>>zpUJtRAeCh
zfVa10BJV(y8dp=Lep^b_%4Lyjq5^zPl#{HU!O`d_spm;Zukpd4r$yk3DQq&aX#>U;
zUlHP#5`O4X*Dy0kF6a@h?p0;Ou@~4rS<|#KT)hMh_F()w$viMMbNR;{rPL&3u$8EV
z^BYaBrf~+AZAoQ9X_22yuxyD8W~d~cqD)oi6;22Yd9ztvt?pe7&E8K+H$$w@k3R}q
znA}tn?SHhRK4cl#{II+NLC?4>RH34{&RSuAU#nrcXG0aT)8B#mR+pVKI9S&A5e}dt
z2)}SpApk0uby8-OOY-6BtULzJZ>$FJPfOOjtwFD<4G9o4eGU_)t00;2#Yv72akR$4
zOJnO>TS5$FLlF=`j>EHF^{+KHFm7lWl@80Hw)I&*F@ps6jTa*VzAFgFD(pqH*eaJ{
zbZ=S!++ttn!$)(w*FoQU1cCFH9}7y@RvI(3smo<jpN&}h;BqbZ(3IEWZlM>akM@D=
z9fb|pjAM%-6^aXDSUW$fy30OOzgDOpVsMTK;^*H=`@$ag0OcY!HuyEEB18bGYl}fY
z)up;fg%f=Ra?{kRrF@vMvS+$XXL>zThcEgxoKYkCzs@Q7GY|-s$_kyTaffE!2_RB<
z50Xi@bcPn*y8gw6FfK6qHJXAReh<>OF9uf7SobV+o1dE~bbB<+C=9-0u`PA_3wsk1
z5#Dikd{-@8;`EI9{5n6(Cd^qv^AOkuXb(&gOsKoDCbV0*vXwSbq%m3O^Lm%;wURmt
zW{u!ch$8*6e?{z{djWLTK}1AYWh@I?8dI?oF_0e~iu+1zBd`{CE-N0iZkb<}r+<s9
z3N9MA-7>C6fNZNn>WvneOtQmpMbjBOustY;p=LpnWcpZ7oTo0X&h?BQC_|>D_=C;Z
zpuh8Xw+6=2j!dIw#A^)gN^}BHMQ-V_yn+Jaf$o)26&CK_YPX8t;x(n6zAz^{+uEt?
z)d>dY-|n)V^&Yuue?Rmn&uP5N(>z0a-7K7dt;%1nG#NA4kd6O?&!ri9n3|Ui2GLr^
zE<VSgQ#rUxH=MEG=am0$EQY-Uru{i?J(tNHJQ(pT<ni!F%eoW7(Sp=yJjO1}8||?v
zQ0NM@j9Ol~H#G;J9yOHCnTrAY*v%#sqCQUN8>OPMR9(eP)Cl@h0%Cjl@V5rc1{el5
z>rO!!#O;((vGF7{_@Pvgp4FL=iHgK)3zbQgpU6y6q$D*BMPD9sI6ymHz%FSG`Yun?
z)o+M5bF;<!L#mJRCUVIc%xW5LNWage!Ix@`#plpq83>o$GZ@R$JVc%ikeR}t7_+%*
zBR@80=NRB^2Z(@JV0jGP9^y8Xhs1m%RJ+nJkQgUhXK=nXMP$u4YLreDz77=k0(+eE
z+thXhl_0&>XxY-{IrA*?9plW%sR@Ww(|kEs(<*E<sX5(ub|9m;{yjST%;C-3^MH69
zKXkZGP9~c_XGLw{l|)iIaJ~Iv<RsoaWvO{~xW<r54s0%_e#yyI$D4p!;RKF|)Wu<w
zQm=U-<XzzDJ~-c4H<CAlFS-9-lJQJd@FJ3uj<f1&Svz|qxASkh4Qa(-ip{D|h+b|o
z9%t)zD_{bVtG{lPTYbCPp(NjH{o>zHuC4p&Giu-v9_ObC>N>mG*1qAU5WQy6-4&ky
z$JuuWG?i_8kEkFh;sg+tW}#P+-cbZ8(wlT?(m{F+h=2@LX+c^50qF_7Lo5^l=_T|i
zy+=wCNC<p~nYs7ftKRp#-+v^)KKrb;SNW~AyQf6bg?^_cBnPC^>?(va20$!BEREZ-
z6q0quFoT;Y{+&i=`kdBA348d=Ow<D!5jR`PaMD0h5xW^)l$K>$`sY#pG56-G-Z-1i
z%s0M@gKx+0S+p)DA*CcYi>HShnkj{<8Y$JIRe*D6#iX~k6iZ;cbvymxW85Dz^jcS8
z$@DbX)@r2t>kn+O7@g8Wjv4?`#FMW90EC>~XQ^nge%JU#CR{Ya)ku8JEVA0ML3Qt6
zV^!yu)*;MX7Fm%Pla1K}&EL_%g^iSB#45n`6s5t=2<f|(Tly%d#KTLIT|?Pjsa2uc
zPFoXCih6byz^e|kbgjFiR<KFnX!7mdD<TPL>SOYI>78{uXl}z41DnlzE?>b-g5i4{
z%-#?zJC(;n*itfL(v>H;NDWL_JYPe&xyKqwvs-i?mv|#N<2)zP1^8k0xL@YsdGa<g
zp}GMZs!#cozkQ*mL2|v2kbY!{k$3;<w5C^MwUOi7V&HARc&iLrTCRLu<h7Vtt#11p
z&D5OOEUnKtp~_PeB5sQX6h4o46uS*+xP6L4DBrr)-ch2nK3&U8k!)@^-%lY_e##Zj
z<@Bz}xP#N;E4R1#xLmngYpnW~#i%6hrn*e=+nUOXxmJ!G8-O+At7#53fGVYnd6&-J
z37?79s!%2s`Mvkekal!HRg#Q80h*E5=TuxC$-ZrgrWd6{F_|NjX$j4xxI3zhQd(|R
z1-WEgwpxXOd9grAx4%qS52dnQe^a?ZH}<H1ctC~<ja}Hy2Wp=SlCIK=Cnin<scMc_
z211rwTSGZK9Or~eo`Ly!=S`+*MxEbS0~m*_MpSGQ#Qm9s3n2;b{eYa<Wa2zS&eg<;
zn*qh$)%mq^{kIS{Y9-+b%m`ueP=S%5z_%<WZgN;pCcpfBL3mW=M>dsa)53V$bYj#I
zQ<Q<!)SD!=axo=~(0EnOtHRL2^7*O9OHVi$8_y9Izp%8!0Q}vt-nf73P(I}o-EN`<
z9BCFs2PDfM3QOo~6*7z5ZRZ8DMKc~Esx82=?if~6cXO4%T~ceyy^*q;m}E4au8jZP
zRR;is1gdLJgJdl}cunDOKnS)pcCMYL-+%^h^3Al<RK=xezu9(v5NgsoqBr@NdU@JC
z9JFMpQnUotQjsOCbOu5`(jB~*C}1@-<KFc_GmqxGXjKoehK7<2vT~XmVwiej14vSM
zbT~*@BaY@*jmvp*0DD<UHx*RYGP3i^dt7eYru{s*caWm<1|30EXPhO0WzkI7$lT)e
z0E(ybqvT{zf~918uEONx6Z*rZxDTX)h<pxRpsPmKz$r)Uf!5j=+sy>KJD=~>q}#^!
zaBMlg_S}i3R40Uefa(utE;o<CQCv<RIyH8x-G=$OD<VIz?KPuaVn%zh1r+oQF7WgK
zMwLZgEKL?Q?KZ(xs}wUn?jKll8Rv2sm|V*4$LQ#X8kqxo6ujH0vH^nHn~U7oRLjJf
zJ{=afs5qVI06__ZYMwEK!))U2Xe5|+9st!9jNd0GTxc|`2%WQ5IF!zJm`MtJi_)U1
z)1TJLEAV!mc{{Enpe~?iA+towFWJtmFt%CcQaRkw%<to_TrvtINo10CCf2vo(KSz}
z3|o?F$4*>aZ)l<JR1V}M#Z+<sw~!^!_-7AsATMmmNoY>rw688@N7@x>vvp0?w0v!N
zmn<dzW1HE32#a}H2FC6ojaKt#i%*9oGqKbmC{1c3=`TBSImuN~E*Y9xgu0vSH77*d
zH)LF9O>S^-K0UxIVle)0o|aa`cg?hYx53v`RnnHT9Xj+lG?jN_+?ex?RsT>UrAwu+
zZiQTY^>oRL#Vb?d5M_T*!HmCpOHwiAh>VE^BCWZ}lCoVXJav*<jmaQM5Qxc}JZPwC
zy<LG^j6%Hrc-XBbSvGE`cP_f>$S8#&_l9dmrMczGlp<R8<v=BA{oust2QK%exF!+a
z8lltcdjg6x^7o-1W>ZA8oSnm^Ge2x)cG7s8D=xmXWBpp0hFi)SiEy7i2EZ6-ZG^Yh
z1%vSbH=XuJ?bpM>vlE1NQoGsQ*YLsu?c&wMwZJ{YCt0!UTpe6a*SCfn${oF;!K73k
ziL7G=K9i`jM(|j&aj~2Eba7vj$ID6KB!W1tDFTdy7!_ku1utDZjgdAl7T*Lwfk9^j
zkM(`aK5tFp5(y1X#~qc3nrjUYc`J6Yz*I&Gf~b>jhh6)TVkO$*S24kI=P9adz6c|y
zSf|2%i*|q%cs`AGNOE9mx<)MP5O98mPu3HAS{G*@I?G$E_a;(vkpTe+wK&ls3=9;7
zQ*VVi%u3eOi}v_2miTF;ts*?M)KS;$nbX58EvUabXw{A?i+HQ=NfwwX*QDVTSc%1p
ztpFITXtCxgA7B=<8BPV@vyCTb?tj4F(~u-g6%MM3C*B4lGaWq7;a`5|gv*tj+ZYfC
zNV)w9^3CdrA#ufYFY8jJ@fOfu+o@budDnZ5$e0@hsg~aSSz7Iy0ni{2OPCz=_{$!r
z-Th*{d~;(}S5!>A_^&Wn_vV-Mm^;;<Pd921D?c`8eR(vv5_?9CJLryKf=9ixO6DWW
zx7=c6sril4nBqr8KGBv$TgmS&mfDSh7Q5N=&Y=<DyT_XR^N7T-^$3e^qoC{4$L4b6
zO+!Dc+GfX}QDF=lQbH=4{|)%gZaupJ0OKS~tJ$?#uTHORDAN9!6#<G0t?Y#{y;Bm$
zFWyg9S5}GV;v>}i3>X(wm%3uR6X8$%dqGq#)Lzra(8n5(huUj1H@k!fNWV*|guI~J
z$DQSFN$-8a4?7CCHgDl4tSpYoQrZ3hQ~>@HfNuH7DNFA~%KW21xl4Wl$*OQ#+xZ!K
zE2;^=D{EEUZIzAX`Wvsl!XAQ?>b$xRnEDBpi?~&05%Km`#2hO<d|?c%;kGTbuFRY6
zDo?wcudZqMahMKuCRwrio5?lK@6XCKo$P?YL*^A`^lpitm&8$CwpE~kaR2*?f5FB7
zbH{5B02X6*jRvUyX0hQH=Ei!zDElbGs43bWpuj|`XBNf&cUNsT^BJZc1}pZ(H<5|Z
z-q{f-bYmSleN)4w8kj~<<J?NA_t`bqDrE;n=DoYOXrTf7omVR5F2(nRT%svF($V$x
zSMit?H=+Gr8qQsRWuBdMisRtrhkrIbWtYDraaQNn*V!jM)31L-Pw}_15_!M5G7b=v
z)dMU)ak6IE6;49G+8-Nexa0s0faI*$FbFx6cs@T+gOd}3!mqENiJ*~CcADZS3ue`S
zsqY%4Z`q?*{fAeU-Va1H&rZ|O`nt;58+`mU{Ku5fGj7ebY+9e-e~wU)1BD>Z3sr{H
znCsFw=sflj`70XJ)O-j1;#E@6p4v^hy8z}SK#fML-b0TTAvmD8Xv{sfvoMpSmZmI`
z$~uW;3u$a;8%Z+i8@niS_wA#KE>00;R<l{X#Sq_{b+d(+p{h3kK-j-+M$Usm7tP5f
zC7myhz5@H6(g5K(cz{NZ4>C>)*R!5cy665Wj4X7YL=UVX7v1vax)&neu>Q3q>-ob^
zRh`}4rnyhqjqBB@bmtgEi|~R`w$cYzyIbZLHc_*5Toh`*rDkuUe6Xt87~+(5B8#Mi
zvlFvqaL%!MIii*T^~D-53R2Vw6^92fj(jh0b`Bv9QYSMcgfP4x+kX<S`kkeql<I1t
zroUt-5c0sYWnI!uHTvq)2H=(D@<|Y5r{c86f3qOe97NNY>z3x!>)-vW^*K9@B+=`e
zq%^yjTtg=fGk_w=o2H@><x~`#YSKG?o)~SwA8Fve9^vSh{GpXFXCtiFT2fxn924WJ
zR4;=Z-V$4spaJ}c?e-(-#JoV$*dy4Ld7;{4@5dS$2UV8!RhzV<{7>Dt9deD@;2~Z=
z$chm#3IOz^3bBERURdQM+IGeNXiY;pKz5F$TSLZYNbf%m>BA2_%ScT;Y31s@^%%k8
zs-gT<nX`8{&7Jk+U31A9@-NE5%GnFrdL)XaN_}tD9m@6e(in5T;0UyNlYl!AS#TE-
z*fox-9$u;{W-j&B-fp#|4bLn|2u?>75@#$m94ZQ0A3dsApMu$z?H>oKsBHvHl|r>k
zi+~6*>4SmOht)vT3lms8FNMkF^zXN8#~0G4u!}&6_>LFhUsA|(w^TY^dPpJB;iKoz
zfc+U(%TDSPfmZj2Bea+AaUvhqV0_46ql+y|E91?$nZW(8+_DDl39j-yxW+Mf?C5+6
z)*9kWOW<$<#Yl=TsxLzIS$IWqmL_h-it<#>%s7D$!{$7qT4(?$S7vusVZEMKp-`hj
zj-wU*zw5$pBx<(?g8$T7H4mMqH~vjg{^|#~deXsnI@a{N7rv(b<#)^^o0-%jMYOGb
zmOPEQz9;+mF-y&X>T~aHHqk=&S0C7dle9OAc(C^&fi__PG$bQ%QPFu{xiuDg_;67s
zzkZ~uH^qVFy!mn0_kukD0%i=*N)z)261AfxfB{Hv?hF0X0RPWU(u@F8tVY~%cl&PR
zanGa&KSEzwCQ2NYed3@<K6aQFrCsMd(k;xXa?V-H(jU)*uxC2EU^rRh^ca9J?lx98
zNxS>JgA=vlmX_zDw@H$*Vsyn|l&w7>P|xl8oO3^x0~4p;rwe#=>@6(3vDWc>X?EW?
zGRqB<>mJe2LZL&RqG##CzB4?)$0UvceanEf@3E)@7f7%pm!cN|kGzCaU-yE3bH1<I
zcI6zo0&6>4&7;ziafOOvOL3r%F4R6xfnx~ZG;r{HC^^3UfLjLk8dmNYAa9ZAzQh%k
z+%xipDvCTRmUhe_Agw35!B<x`-AUWUo+a8H9#8xq%Ki0|o!zN`D3z%%76mWv-mV@w
z4;b(Ks6nRh4j<s7oea<?CWLbAru~jnEWiY#<mp$<eiOXTmC*_{Jr6U#nA2wD853rq
z(G(YTo+1@1IDCJ1y8m5wZ1&ZY?sQ47&v=_lFU+|&@xak$Z&v-2j^Z?(>!TSd?Rwum
z`1-SF8jlauE>d_)_;_nL`Hs`JvMqf7esbC2CyxQ(GKbpdfG2PoRV#eA5#VFmiO<$c
zm~^-KPKti|F^T1|h8B6%ggF~3vY>#a?jL8tzy9&SG@vy0Tga3W`5zzm<0e`Do`R+F
zCSTxt{RF0GjhrmDmXZ?vON$AGEc~g@`CUWb&zm}7q{|5-aqB%lG}Znr-qNg;ODHG=
z)Gxts;?z&1!r^2wOi$0x4yJ#l$DeC!Hp*@Px!922#Oa?b_TTLcBA4^$VwFl0<Ud*L
zKkjTm_3_xi;#iY-_XVbwrQqryWnJ5U>8$*R%C?=|kv%zf+cN+5<NlqGj{p~ecI(Xd
z$C}ZDFS#||l7!+F_Xnl5y~9Qb;o_?AMITtn<P_?}nTsm9|C4bUa_@k#a38=w<HwKR
znVl%`tKK7rRYfSYa^$)2_gey_E##zUeePQ6_g3e})c~H!@rS?^cKJtibe4=03JUh#
z<owka_7B4R<(gkQK^0#J{lN$PLAFoGw!&M!_ou&Acj4#TlQVAqAn~K%|7?F>D1bpy
zlVp^BEGt(~i!Qlw|Guxk{^c2vwh1TvJI8)<di=Rr3W_}`1=e#gzVSa|e-K$<IbPmZ
z4yz^0bKg5=|7pFSQGn|*K&|#LhF{y!&sMl(=M1P?HFmv!^q08!GphVXLVGf(%<|sF
zpAq&KzpJi2nSVgQEU*3l6C@<+q_!w<gY3Vd;dfLqG6rmR84>S+`5DuG?K`>l6NJ+1
zSEv6DH-ApF|9!18ULaN&3nDi7|3vYAv`asI<7`WSx4I8Mo%-$_|BH_yV<#f!h&VEl
z{xv=N8M`$r0N!%3E&TSszw-a~TRF!Q5vwr|hW*5e`0304L?;R#0lcN9cK_|aBq@hh
zc3JnPs+3gkU$SXu6rzprNWc92d`atXqQ;n5@zehuamx70D}HM&yaIouCtxICu>ICD
zZB7wr{VHU!4{o}~d}Z^Mv03gSy?Wf5Wk~hq0`V&b#Yd3DT#4Hxm_5AHb&^@*(;m!E
z_0QntuXp*_9ZJ^8_8D#oY*e9}qT+!jH#{ch`B3)B;XXhY=knj_^}uzKECX5RcSV%<
zVpD8%D?+|qe%T?y;*3beHr%ewDwE0c1;$^P@;tUuG=*z*XOe50o*Nln{}@oh=Rz(q
zc%5XuFpDutht`Akv}r<6#6?fp+}&V3Zqy%KMlv}kwmol9Unh3V0{@FO>VK*b<*o7p
ze>``%q<DMFGrQXkMVmA8n*3_ldCJ7g9C_ZY>sUQikB}oLksrY_v{yx0(O|xP`9qO#
zv8L<kGAD-`3!qMH{sIj&{{`=Xv{r}L=PQyxwd&*ZxQiW?!b9f`jLi9!#--A-wH*jy
z5d~NOVmuL;Si#uUS|>pxqLpu^atj<0^w6%3d#wv@X4d&)>w)y=5eGXtiSJW_|Epop
zgvzne#@MSyAk~zzuqI1~!))rS_(5i2Lsp&H!SRxdXBA5{j05n(h7GDTXzmUUeNCwX
zjvC_RWW?Vc*JWonlv&ru@OcV-25UZc@gMcPcKs#rj8VzO{+BPxTn8lN>LqXWsQ+~v
z-zjvt?)7OgT2iFCo#bZ5mqU1#k1di4ZuO>HJCoFmzN9rm{g_z`421c`^bYw+*ukK`
zQ)+fzAr&NcVdw$kXnzl_$lc_jFcPWD73Ai+d_Qrs{-Z|Qoe9yE_=H`vz2cIWlWg$P
zRdTc8WF*K{27hoP(wi%m?e>S^uO(6q9C{7}J6XtIHl?~zt|d5PGE|sPce?bky9za`
z3{Z|ax)jx4@qGVbNv>&Kzwq&8`0AHpa^o%HbbX{p+><vdw_2m5!Au*g2kB6gB(558
zHM8npB4%Wo4MoUP4Mgu&&!tn~IQh$t8ii*tz%a5^Gi|<ezi7ix9&UFbq<qTnxk{Ex
z1pNvpckt`-_St?7vEy8;*M&o5g}+mKsBCx^{kwq*Il5@7SE)mT9Z903O}c!k{K4XX
z1O5h{T4)|}+eG%<kU<Q}9bHvr{8>VF#ec7ulX1Rjt?nb`L>2iO1_A_x^qgaQJi5&V
zzItxybFNW7G~wOvC&eU?@8T9A(#hqgO0+6G?j!KfYGU$WA1xnYJ#E*^iq*N?BSvTZ
z-LwsD52w?{k_ixkT1Ao3`fz-1$gu1@%&>F}Z*g(tc*6OXb4%9*=BuU^ZHb5per@Ty
zWxZR|EK%ozY{{BC7mo4xaTLb}lcCOFneW|g=62I4dYVOu1B#4D#J`@mr46;D3ydB^
z#f|Qky{-Ow#&&+VSax9npNl7FY%@bM(r|M_40yDGmr>Z(XgBxp;#J}DhEG)>|D2x@
zMMaC|X3{@YwZOfYvBH?)KX#iL2e?vt@r}utL%rNvi)RYRfL0Y-M9YJ+dNex5Jq7Dz
zc!+1p$AD?Dvu0Wr@Xj8JfM9cAx<-?`YVZ|>W$&_$-F>6*mbwk1TgC^OTqpO=-6&()
z6pkSZLZBd76WNS=3~1M?YrWcRll&&xIzxWYvf0)3T-VW~%jHOSV2%j|p|2KQlH$Hy
z4*`HL=w-|HR)Tl$@QU{$zvjcfs!Y#NigEg|AAN1!o8`j|RaB$-W?4<sva<nw=Ll4G
ze_BVNk~>~uHI{e#LVtNr_me8yS){<w(P`PUKtyZu41q~IJiD~>FFu5wc1Wh=lGE%g
zla%4w;wr+gIfJrlnJn|AS=aq!Z4AB|rIMP(<wPP^VVS5LlrLt!eHYpx@)}8+$Hpr-
zdi$D5x;3+-EE{rwbg7Y=x`SJkwN4uK1_7Ut?mx%cr+p~fe%;3k+Nf4verl+IbGCQ5
zL+z!mWy0M3_S-&IrL#qter}jP&mW7<n5HP`F=S<-r_2S;h<>^0lz7%DyO9!Gdv~^$
zM|W$m<23ECN37GGZs90KcTC`U`jx}80FV}#mI7whUB?JyrDsGxbVp6`h0<<Gf4jBV
z>OyaoKaGPhT0Ur}qVeXZ;TCy)%udlnWlQH+x|yYW_^`tHdF_~W@tjG!O;q6GC1<gJ
zwje}rptAJ2S4@tzc-Av^6-Mhc+t}5`)clJ@*Mq5^ii{JUdbFoEG!u@tv|CLrs0I1-
zFBs5p)!EP2(P+>(EI6%e<gN@fIWA%g-{hO39)*JyCy!-k3^pYDw{!v!EWC~-Uq|v{
zdz^61SZ<7@DE^|CMgszb(qi!oR?}+&CMS7Upy7cw&qb!cIvpx`5k4PeTIv9ZZr_Uj
z9rI4=Qc;MUv6WZK?0=eI^;2i8y75_1hSkjNbNrN(vPYTQiBHtJy=DB*QtPR-azh50
zWc4#j%gqxtv`I?_4S=Z}Go+P}JD(o)k^xQDX|lN`hvAmVAoy5N>M=(+Wyh@Y@090|
z#dW258?rPaU2h&$oLa!HAJfx0j#Eu-N=#Qqf<&%JMRxT$*e2>aJ==RiLBD{$SZ`3Q
zF(IEyM!U}K^vxqjJDQN427lRKKY$r?pAz+~JWp%!A#rk8M%`dMFTI5p&bdRIZf!x$
zpt#`prR$|$A6ujoy!<ZOFx)uKz|vfFf(la?#gL0An?)X|Yt<)WH>3r$ES7r=K6hkh
zSUNlJOr_TXokRisY(nGPiAmC<pV-nT);HCSzHOpVii6^TNC9Pfs@QA35c#Ct&1L(^
zr<?@(FH+$xbcwA#oxpe!iY8a5gcNj3z9-x=aez<zP|1Qih(nF*u_z6oQ2(Mv|MATG
z`xhZ;=fjHbpT2f}kK2U(f;NI`s+NeZ-+hu<-OKs1n$GU?+gTD3r4drW6v4QsUYkDf
z529b+gC#=KM%9YOgdfn^QLAn!W0lz~?-S~KDpUf8)g&;Bx(JPUYj>4E1429AD23MZ
zLlaH@>v6D6Vu6c3OKQfOC(`YGHld3rpgFSGJv>bnmkmMO;Sj7abZTqc$~CY1G)2`e
zZ9qbtnm(6}yUL*$))1HPYxxXWVuN8fMcP0e3B~SHemjs!O$)SW-JuD20M%tKZ1x${
zLK5F%!SaM#{Z6pKM!ao65)7i{dARQ`tfwB$F15bZ#;lQeN*UAXv&B5JD@e20?BlGx
zgrhKNP`iJiX+ixMC`W^07CF>-H2at#YM<Zd|B%P~iCZ(EO@fVX&ox?$+BN67eBeBO
zVi$GQt|xb~9kg%0S*H5<cvF`P(-iOMypJLW#(XeP>qR9nmR;`jGwKC~(G;m`0$7b+
z8g{NN{rYz3J-3Q<-5Ju7$>DQ6L2sT$Q%c0uPVJOGlC}@Py;fcuZGIw&AXc_Yi?>6z
z@2Ju?!JKKi0XPBfT9hdbLVH}!r7k0{?TYBtk|{Zqmleitl#O!2YqGY6=44vNl@(By
z=eHOMk7CP!cHeeUS`QkRgJuwxCZVu7FB#5_)e6+Nc#Gzm-mzh&P<v5?1wSU*S0g)x
zc?w!vy({}K)_pdZa|)NfYM1w6t4E&JfP2~!o<4C9)q*XNp#O6K$GE`es`_S>7mc~j
z{#I;NQeDC03!X)_$t`4NZhUvj@T^gH)o0g`92=;6+dE-gs6W_2FxfKi08T~j4X`V`
zvyRpjFeu1%B!w|vr6x9TSt!>xwWxdg(VTnP)S2uR!a8ic)=u~c{A4o_XE9QXfGh?s
zcl3i_aXsjuZoo_!O$PX9d^k!E0S6mSEz`JE^lzV@wuT)gLAbd$JRbY=b-w93yFtbH
zLoMUy)(QIVedEHhPN2bd&sfu;nL&&K|3xm~Pvzz*!m#VpMI)|?u!gUx^X$Dx#~U+b
z8T9R#ciV=_Nc8m6)1l7qURw@Jz8#n2$O#KGBgr|Na0!$UOp(JF2D%=5=^Sf&*sDtp
z_gpK~>NrWQ8Y!=wCUr7-+Q4Ohj9#r?9(5SqV#!p;_bwj~^lfZx^!a$%njR5^Quwz_
z#4xc-<cvfg#j_1NZg<u&GPn8@YI9j-JU53Ul!bG?ph0x1D<6wLOq*g8%3M{ZCub}_
zlU0`GC~O8_nKB||(b?>Ntr-m~Ga;}uWW3d05nnH&f{agkP~w5&HONDHlUdbt-aZng
zD#ia*UCI7c<Lr60$J>yyyt~ZZxXVN--04VlWL$V<8F%AK@y%Y)Rsj3xn-uQFm}Ql5
zm|*{_O)?g<gu;|>-1wl;h(%))N!OIkB2d3dVTw+%wbh%p0`<`Knk(qgqrhMjU2)en
z6z%K+8Q46FLjWb5cJy$Cu)`N=4?-%BD2XYcrtq!Eh{+k7%BGuB_yFZZGkyO_jL%E6
z=vK7(t4A6iI6vCR2Qy_oy*39QUi4}x;AI|tpmRanj$3#P)uW+YpUB6iL<}TXqj}oB
zSXmLP6NzxpesOeUkE^yVmSJIxiyzpPR6s`@O|SO@E$TNw>JyqGK|DFgFTCL*JJ5bp
z!~-sTfMIZ*o%sE^5|D<<3L1uEt=_m^Vc_Jr8O)T}%a7ELuFUVaX5>y3^?b&#+c_Q|
zdF-cG{6P7$s*}iZeB)7TtHIpkei`CKOaJha?#Dh-c=C-)#WD7G{`Twt50m}Jd8eCj
zq{_z2Ki)qVy=#8XQ<`CV+9`Eu$xejsc7vaPA+g5||E_HH%9R*KRD;B&0LGy!<r2o$
zo98&c;$gK<9Sw+!xU5IcTT)za2t&!6>2#yW*Ka$N^pI{|ce1fa|LUrBizT~4M{~9M
zb>VAw^shI&cf~AGP7TGD5xiZ@B(nR$_Hs0-DLXo&J!5rPy(B@d=e=vhF_5u74YO8v
zrH+V0Hc@_N8p_v{oBP{2`@Y_C9dR9SgIdgCjvcaO%dzG;^JfXTz`Uq2`e>Inv}7$5
z|E{8<Y=8TX`fZuF9v2w}#|j7xUk@})Z-4T8tm$)bxh07D!q!#{V|i0Y{IOAGtrGI}
z_AcoL>|mu;X~Z8_be?`0e{szrV{e#MZ8X*k|1{qXHuy=Hlq5c^!WlV!Dl^n(Hr4y+
z*|LfJfy<VE-vcjUdD6@)lK1>qFp?f_FjR{M*&tDiTLRs+OZrwt8@OeVFZQOeBD5Xh
zc7OEs?hDh#wJ6rF;lZq0`NrRFPB^T=yqueP_yd&c6X~Z-_k1E{#+&coW1jX(F;`s-
zilp7@08R0AWKiv<?&9)!hx=D0haNa%?t*=;uEXkUgIT7wBKo|LvaGEo^fK*`T2+?4
zQ(t=mv2-QMiLZm8bJniP1QcuE-o$ZCHMqaPG<``a<Ca^ZzKM<1M9{RJk4W^k%nBUW
zDX`YH9^yW57kXMYu~Ft->*wJu*pUKPn$li&i58$Ml22R>(%FO`dX9SS!a>~9&Rc8j
z+6OsdYzp-~)<sfZU0!XXeAmKDFHd$icoT61_sH`ZFZN50E)++y?H5+Gnztwf&(2~*
zo)jWD2vo@f!Q6-4$9db<k;netM~~Lh-p$Ft9w|aqBlSiXACar=J&iJ}!Zl`siA2n~
z)OY?+<(2+Cw%{7y$;{#-WB2E6B_J!-3}D9p2hB+S3{o?0gJ)-wyIXLd8UAlaC41)#
zHi=V~O-sj5M<dP6(C}e{=cugp3;H#l#hi_;h{=3Vz;81<eqUxPHx`Hc94GmeI|?ki
z^Gl9ht_Gx!%P((N>`7I?w#ZgY2@CJ{`c{`k{g~PIu9`yQnmjS6h1ChpzSpx6lU7J(
zS#z9Bs>9UKBE9r+6QvQD%u6_`U(LM+{~II-?UtfExsG;^_O6XdACxj-E`Ml@ZS@?j
zxwDblhml-<$~2Th?QQ6%xaxGY9afN@KD#Hxu2=RYE@gx=7Chfv<bl2DEwVu7{}}0w
zqw?lSRxqiPc|24t87q_hy7t03VJNFBEm*9)M0d2)>@*&=|HxEwY|JZ*k>L$huFOaa
zf4d@B4<qmaoVyhln_7NDY+DiCdMadXK7U=dCZN&s>^(yG^D#sdp^bSeE~m3@o`2@0
z=X$IE)EL=qI9i6UUC}FaV(?7q11}$3>_+Q&4H7+LI~B6Ka0x6X<Qy4BGG9{qOlBIT
z*GQI7VLDZj32V&bGUauajb%>WX)lHwA}P|Qs^xKC1zz#d7Vv&Gxg$GO>k5&-(;8eF
z5}HPGYW>C5`B$gFot~!8wx}jUBo`Mg8jkVb0&n!JJhLI%``UfxS3Gndr9Dp<j71(G
zU53m)QDZ|ZFeo|K1K=u9e|qp5UeE*5CWdVNS_#g$-;u?R6lkC2>BwLYV5>1%YwUg?
zV$-NmpqM2>+!lW~43m{l@tZs>4t}KwzGA|R<@X*WOY84_s!dcp5|jpex};gZf=SI)
zdECkpapu}1WFuy?XiN-6MG1R{wbl8m>TVbPltlK>iNp+2zs#Ezp>3+$wZHEa*KSqT
zG$mLXQ=2a-t=Kl;SZ5Po-xy5EpIBZvsCK6cj?@*hf6Q)>8&o+QTtyZCn(M3AD1y@^
z{!@(!3t4^Q!^1d)#x>z03woqF5nP?#HVmtMf~>7F<hW%U2#!_B;26wFrr=D|UEoC|
zytwR|TX2*=f~g|X!n}Kx_tV<UO`M(d3!k-K{k0Y}&8sBq-9k-S7VsF(f(#o&!%v}$
zQ{B5Z#$r`!z5^!orZ1u5goVg{R`ui0aBRm;-XWyI!o;dQE*hB#V2!0$80hc8Z0b#r
zSC%C*96i^9kZ?lA=>0}Q{$%;XP!=W6d{65D;vEPN)0~RDl5qV}vl9Nf&6rHYyofUR
z5jH?*@l)(8V*CUqX(M+-0$Z9wo4k%Tn$EX9Z7o(jKz=da?h|BGn?m2Z1|(_E8zu$i
zy)=T;_^MKF3NvT+*|Z?<IkaKTd`@>zL9<pPu6<V4Vio$h+9d5o)8|)FGS{JMgDj9;
zRu`P->}MVRe20o9DKj={May|#9thGq(y5zFCbJkkS(_pPeuOeTx|J+CL&;fdph^%1
z(G`rFe|iGuASPa5-ygR<CJenTA>?uUXB<I2j?cqUb7<anU*<g|VZNq2AGT%ee{tac
ztD49ZXr|1ltnyS46x*A>#Zt(#w=uk?3~~mgdDbX?fUn47x2(@0WPmFNUnB>}^r_In
zm8?P?J6R^ivO2e#m-k!!cJP_|-hBL3dAlDCnX;7yalUva9xwL0`QwUPst1}}4tksl
z=wkz^#8F&08`N6o=tkz=vVO1z;?{O;SSuaJx%#8qq(3P)|1^xtc0V)69ZBQ#A)Lva
zui3-vR27r;p&bXO-dm)}&Y3cq``kU#o-I$rA3wV2peDu#)xB9_Qs_DNy05h`b2Yux
zs-P}cKf1KAYW!+v?c!Qv6fcSC)b@I^o<-w2gnT75=W28eJF3#D9~n2Z>kZTDjCBpb
zr9Re;>oIv#TVt~GU@zpucnzyb?z+EMUJb!gnLf*w$rL7j#f<MNi$({&gK<Pbai#KG
z#zz6_EUL<@<}my4n7wCaP%PaM%*`gymS=(*g~Swl^+fgcJ~_n7J+&DaKaRn~zPyDq
zR>>wvRe<}zuT)<#;zfM<(zio}%gSntng$z!1?KntnsMG`!A^&k(q|f#-1A`j`@8)v
zPZpEL8D~B&w|ck8272e&HL6YuYvp&>Ad;wcql9gR2T@)XC?8-FWV%|V4&2o>OFgK_
zxM_cHSnp;fFJ5lDeZsrX3gHx#wNF$ZBg+y5=4rl-Jhw&mjTpz-abiQ5W#LX!ipmeb
z12d*jAA`zqZ;;lssi>Qt&?%eRHgA~7jFkMGSmcAb?ZW1@)7X5YD2v5Rf|UJ!e3<C!
zyMTVr(N|V1Lh-yvcgKrlhy4X;by#idZFlvo9<<By>!ZWsV;3LZ^m^~h{=t(-{b3h-
z_iPlx)Sk-H*5+IomsQ$l(Eoj8o}_j;%tM~C1m5GgR2sdaazVRP4taV@znSTxXU)0_
zr6X=4b0X@I_d?rKAzkKI1<jeGT%Mz$;|&X4K48lG*5-jtf|eU5`p$M#Q|!nWAG{d9
z!qzW_&B4FIDyH(8o?`X9YR*bJ6Kc#~j*w{!Lh^f%fdO)bSf0%*#hcdx^c<0ng?vYo
zpqi4krrMOUxQ?Zy&Hdqzh=U{RDkRmD<_Z??jMQ&Wkd7W6NJpvd>+|7nS8fvz-$q-f
zDBkUkIiChg3=yW#errDN8F8Uh>(!n4=N;_UkJ0iaev-(TU9#2Cs>EH9Y}@PtzAJ@P
zFPY}-`;5KD{9^F~xNSfB0l|TorLXwg>#5Ks8F;tGZoy72j$87uT}?1e)M<V9yh`&k
zoqUNq6`WRT23H<kMvtb?`{2ZfNwR7^)qbC7(6qENC{Qz<%C9w~@Gvvb+zSba-3G1Y
z!;ShE#ru9ALK*Mf`05mm9Su(ROn$DLFN%iiT0Kb=x`Y+qKb<9lU<23702i4G%#yX&
z%LeVDdqlug0X^hH<so$fosB-z<~`d%U7l$RUe;23G;mcha_%&fHC*dZ5Lm0(W5|&s
zEZN6JRQcmwP=vovJ#fK>vl_{oau;@4x6_QX;-2Lz{ke9c$FPB-KPTR=G$CqgCKdMp
z2qw<$DPB*Z5@`{q>ipH*8;#PJqVr^Qo~+O9=`Q%g<x-gFu?<Ua+rt^mL27&Hb;1-n
zY3aFw*6)+I-lI2WEdi@^=bVyR`r$@ifWFMf-=H%IuXj@0;18|4Vp(6mo-^6RSz8$w
zBrKM&`H7QrwTK6SvvzsE+4qd(QL!{p>+V_X`A3$5$OI{&%S5n%Em&mNE|Qvc&x|VZ
z(}V4RY_XIxv}y|j!5+j8HyB(L&2M`F?WoUf=|`1-2W8blMQG>2UzXF*gyG=>x-~Ji
z>K*QY1L38CB{kjs6owKjXuL&CWTpB1F$+X>n%^zDO(;@Vf!btmSs1l=<LWm!>~6Hp
zqJ#}@$-Xk58-CF803!ik-AW~NM2IuJA&&#*!1;W5Vt%c+-;IgbLU@`*zS5F&eZcV)
z@zVXDT)*>UycPmIzoJv)-A~2ebZeWV$+$ZV_?wU+x`8&D1LS^jtxb=s?~xCEHROmE
zH$jV^*jor0(U1}x2tM{oe)XJ9$iFYuxn|sB?99t7<vs7S<cI@GjJ?UiFa)NIxDkB6
z@#5L>y5b0{CxolUsNmES2_s^TtSeRPP??1gm^qBlfoJ$;YClY?&6hHetaB&%{m|XS
zHX$Lc`!8KnzCjZ6zFv2=Vp^L;CAL@Zn6<J6s3nSxHOa5F5L<{j{jXQkJZ(gM1_=v?
zloKh6%=P@mB(+2kdkcDk;_>6C@Jqmz(LlWV6YSAHYpH*yPc&g9#$9dL`0;y|yZa?U
zgpwdhco03N1oTpAnmxxkHA?0WkEk{|gH8kMfW{K2iw=Su&!G`Pmv<CXqQ*Qcp?C9o
zMQq8;#K^&jIISPp_#Q$5X}Wip4ZJeKiTl(+63G=Q&)Ui<aMv>Feofm05I#FGNEPh`
znh2zK4z75$`d2|}hxKt6h^vLZ=V-dNtYi|xcT640X{fxme9E;bg%lA{+d`bioZ3<e
zY$_<R-oKEHd~4ZvAl0y=>QV2n#^<L#cPg?7lp2Ks4-ry&m(^`I)NOPBfXCm2a*q~8
z0dW`=j{P-7VZ<i>YU`zTbdbX0{othih&FN%-e-V?9ML+Ip{X7}2?3#A?TcAxY;yrL
z?^DTa4i=RF3l(i$B*&p%^|d;gRc-scoHvtJm@-fm%Q}l6;@D{YL^1BNo0>}qru#Ob
zMolt79#*-o_~R`#Kk*2-j2_p2KDt8=F7HC0%D6EEt4M7V0O6l4je-z~cJWGjwTX<u
z*p0)`kt#LKiAVUO4v#~4-d0fkim^6wT}kg^3gb2P22&9xcdi`?mANcfqf0BR)~#*V
z$THSun^GY=yPcj1kdNx2u4Tym;vr@pZBxn3Ke^t1xqP!-OZlN8ts$l$B^{}Xb9JtO
zmufDrZ!%=g6B86WyA_rbzGQna<Q}`xs>wZCDoz=CQn3_R`4B%uLygRPCCXwJjcC1!
zGUI?733XW)yI6B%9c&;2wj(tHir->%Hdhpyc8}KDFP)XG2GT~Y&9W0RR2wNKWAul9
zGGtjdoT>2D%R_lx97Wv}6FHGw*FLhg&P8FYCG{q>yjM`d0dLndzhGO-hd5X1E>CmC
zbBQY-w-TGef>PhBM4I&2iGRT`TpT*unmcM(<-Uc(=yulz<%HGI%%RB8;)Mp!<Z6$v
z>-~0C=O^&EA56hJ1379QzW6IEO-O_mVeNF(10{x=+ZAEvgApMVrv3pJ5%a>S_Y-yY
zED9bfyp+|pOlUg-6)~CZNWS#!cjW94-|>Hc;p8&Bq$WArw((K%L+{$ymiw)C#I@wF
zqe#qrTN1n*T-N<{b}e*r>rB`z0~+@Y{pjj(G6U>ElRvdCL?zwKmJ?$6*$~x^*iLPh
z*(l3mMaMr{tU}pen{%o+BL>}>rgE1w#h9&d@l4Qwp9syIK9o>&oi(2@kvGSjcso~c
z?8)X{#f(sI^cpR6M%(~}5x<AB+Cmx1nC^^KKT>SuWs+FY2azeQpqM3<e0!LNvLmm+
z8@(SUZrZ^7TSb^L7e);9k=<TSD-D-j^>D{`=Zt3=4M#*@$rK%R>@va3-LsVy{*~b7
zLuN2?t~95-;~?;Lt>ThGy>22;kzYc#wsu`;^e-4;4uKFI;C`#scHz~J8U)57kAKVp
z;8)t<+P2{D;H;4sKInjt57`}Tz7hdzZgNI;f#Ax2*RVesy{p~i7ZR{pje@M35ip;G
z4VfRMG0srUV;3j_JepDRc<P0lTfJZJDXO5nq=>S(5!E827;#rlYNn0U>e+%8R?o$T
zhwbU#a*0G4odYm*nrHY6jEKMPSFA^q;nLz4EN+!F=lGX%;6RT(1>3dXb-cEW9FG=3
zggti-xIO#A{b58~0jMZp#=!>VORn}Uzz_&LP*<-RC~w@#K}@BwOe-WlnkMH$c%gZ|
zcng$e*ssvBWi05<!83RK8{4v2r~Bg_5o9b|37_(vGURQE;6>uhl6un0`CJA*L}?@6
ztBDmf2p8};m%(<wdJoA$%bT_!Y<dsnkwigvPIvz>^_MO|DLNyP!WgVnv1@35T+~KG
z4t}@lOTIaE^DFPVN~p|2F6#0W-6Fa8s{OWa+cL=>eMBY^@1LlV^-xVzg{-D9<qFrf
zFuCx~W$;s-PMl2A!lpr?zW@(~*JjKKgeNIq?z-L7D*F?DVeh@^eD1NB%%G!ZCEeG>
zS&5$K5lHk6V%0)5rFaH@Sq6-`F{;hm{(9Fst83lCV}hk%C8x1FqpqI$nm1OOjUF#o
z(UieSOS}M48|?Q_HUg|S#~9Hb*ATgWP<<f0YKskoDBKx$=zm5~nenrmsp_dKM((w?
zb%M%3yR3WY^96R5T>+r946z4ucO#vSp!->SO>!TM$8aJvMP?Vcak{{;u+5c?CS4Ji
zcufP>!6SbYK80X18xL8}&&-4ifryf_aYdNUvW=i|1kmrF61sw5JuhX&z=ClKmJu@i
zO!Sbszs4;!uT`db3Z%=m*CFWqe^>BAs!8yh&3MfmoCTt=T3B_1)2*iMLSzHzIB7qm
zO&t_Ob#v?bW)PK!ipTpNdiNKIHml<P38DD52{;^IRxklkU-c~xlSvYif1_qu)CLCO
zW4{I`euHG@e@&n5xH)>>UMr9D=`~;@+1h6|@@pfHZca`}cuyXj4pOjGncZrFsh7VK
zoIF>QwP~-aHzVi0f*{Pg7u4m1tpTifBC`wbE$D?SCk7xkt8ym`)DSsAFABi*Bf?!A
zI)l5`0a=2vqs%zEd9Y?FVIFsBpUH4K>F&_-J`a9za`9~cDZJ>2FtpZl^t_2A1`OwR
z=W(mC--(57e39BMMO5v%Mma?tInVJ11<lHY7t;ZmOc%AJ*IYTur?+;Bb@^V3n%soD
zHu`keVv&fi?#k7#uM=Os`lKrof3ed~qDEzgz)I16XU9aPPmgFkZtT!Z$!Zon=mEy>
z;&e2Oi`U3yB;5^TdzYW4bt}9(urcM^wP5;bb?7_iXF>i;#GOwNnbTnTNj}KHeOkmD
z@=KB`Qs^oj=5tQp?9u&d4|>{<h1<_a<Pm8igAX=bvG%!tLY1;f&XW9|SpPiAnZIgt
z#c$KR)>(|$e9GXn9aC!~i6Mow4Z^h?Nv7@d>8T}=VV9j8eY!qNbxP-I+~)dBICM8K
zi+pQgc*$8?6?`Qy%qL=GJ#eymRV2&1C72pKnaKv$-&2WHEXcWMex4|!Ic7PXMHRS(
zvKU%Q;aK#f?0Ru@h;Kn=$Vh{Ukqj$Df)boPJ<n6xJ8HQN|K%71tCcjJ>U+DTff1bB
z8)S*L%8Xjam?FI^M9(`fetv+EvM?_I1?wSQl~=m28cnjQL;kR9*K55w<sZnmm@?$&
zn|4~re$^;4yP!WfZ+6a}8xp@<EpC9^XUOT&B^H^m(3J$&HTA4&PmIlcsVd8&Yc_wE
zlZAwSTUFILOYtgOJ47^-^A|9O)->at1rb42>%rj37Bzf!_siy3ZKP{1Z=2|4LPjx%
zT7S1;<-v=QC#%cg6?=L6UDe+bp8Qz|lw&17^W2-~Zo3H9-D--i8tz0dNjyB2A{#JT
zu9v)^xzI3sYdK)rqwlUZGI&a-tgWJ9W7yS5R_TLr&<)Djh~XA6zR_$&95E;O-c@xf
zM`lxfn5}X<rtQ6Y=(1OK!IthZAN?#X=~%po+N&{Z2iL*=Y%1l9xf!hx%sm8EQuP(w
zvWIxTXFstd0dEDG3Ye@Z&T7KE-?pmOes;&Wqj|#Cq-MYy(N<_=FRL-fMef?5dhK$<
zH4M5u8F19rPVc4FzX>+jPiC_}3~e)zmIR6eN7EQR&g$eo$2V>8KsMjh>fY_@b#wyC
zf3n&?JS;BJc|upBm8T-N|CCjp+A%ORuu6maTF{#Pu!BHMpnAGTqG*L8*?wkNqp;G2
z<Zo;XN;=BPe(u6)Vheous)LNSqAf)v_EaVKW!O=lTS~B!LL!vo-0MrEzoIJFUtP~W
zM(mkV8$H~Zn!%hZcItV!T8JVoK{0Ib1*k5v`N+%8j=385zHR2tJmPiBDA;|kowaVC
z`zNqa(}JW-zrJkj48POrM*S20U5Qz;SKJSC3x|mz8EVYB-m;I4c)h@ym?lsa%U)2m
z^2+H>{Dcy-5e6cecVpDSVYU@)%D<3dp3udc{HRk{*|g<uVme0f>kNn)s5!<Yi<wxO
zM?Sk>{D#VlK&wVNv_L0aK)5=fScGED!KpKo!czv;Q+WhTo)58Ve!_)7a~|QA{l$?<
zFix*ypVJdVxb_Q?NY*`KMNJsamEOKPU=WcUmvm+B>VZ%ZGY=pZ^dP)oHbONn0jOUZ
z%f!FP3b=;J!IiKEt=c2Q8i*`=76)}Kdymv=hx3wiS*K-RYR4v<_XJ~bM&^)vMn!06
z5PpphuCnOv6Y5B~HzA{Isbi2ndyDZw@Uc(*<ghc0UlJoD%bJ6hZ=u6Or*-a8zi3`0
zvELt`x&o&g;%Mjea5)-S{-Y(&*`?VOk#~2!zhgZg|3=n;UFCv@Y4+@~tZD{p&hgvQ
z_<^ttQx<IUH=ydfearP0rknPh-quK}_^+UoTS=x=N`mH#PBOYdiH>{<%s~>LJ)X&A
zYhPRxPHj7OfBlJf2$`N;`LfzH{^J$TKl`0H<U^MGnTx4t0+=QEKBhvZllClJz_0hN
zk-H|eEe*YmFM61GF^yx;T;;}cv-Tx*V23n-I)%>mht0~Et9(3(fMz*!ZRBNBTIYo7
z&6Ypa%4kaIy)pX032|CE-FA&8IOJX{Rfgc@`J=-J>e!Glg7kbwaHW&oKCN2b5ZMYl
zVRJ(vJuaZKZ4=}LbyHL!{^9HE&8q7a16SgCXt!Bx!p2X{Dzx2ZTAVlM?1A=iMR6xP
zPE&%bO_jL4Cc6dU>P5k-_soeZ=|XF0cmXR3?4m>*Tg_os3GeXRBblKyqzD#LTt#qF
zH}f<lbo1UdKlRc1wP^Rew6%Z%_?djS^O3ehZ;0kpiaT$CvfYOw(F%Kd`(Y&mNKc=u
zQuW*Qw$^UY)8m^(rKy6(|F^Sn>dfI;>^J+MsRCY|M3eyjDFhc%_HaIF?nu<5x$vwE
z@xej$;6M8R+NlS~!Vr8pk-s-26?(SbpW_DMA3Fwfg=g%7N!*IU@C(||q(1y_->%A#
zWqui65+bUKTul0eWf~vC-6IE|YZ+3#fg+1@&OvKDQqPkOWtL$7Jcj|VRw3G&dGOf0
z=%Wh{A)gl0GIm$ixX6_3NaD>0ad5VGQVVs0^65+jx=1UQkun;U8>LuCB=M82dy4WD
zLzcJCu!r_RXw!|>vU#;_dmm9@)kRYu`7~!D(KoIQ$9jhd;|bGBQ|<9XQi#vBUDzEs
zlYFE(Vw4yq)4OfymCrzw&O;b;obx>YDtk!nJ?XCquRsVdnBnq%wT;7&E7){#*!~X<
z<6y^bOitCyKsK=t1+i>@lb!?iOeU!@K8#JDr~9b`5whpVS_`RHx;gx@eW+dZKtc9a
zK37oJomD&`c3$*FllhJ1z>o|($;gM?y{n<UFO@gg1q>}i!r0<`5g|c7_NoSNyZW|4
z_}?1iQj44UB=^1^o2yvwC<`mPPF9KAHf3bzN3d_VKByEo2PoYqL;(h_8=zKuRe#?y
z(nA{<Mn2J^c#k^A4D!uiSx29h9!Y1-0@i@?ZmVC;T?+uRZoI5}7C29ZV3~^vuX%E0
z`O|z8U@L@G1gJ@Qh2nQcP`>Vtvsc5zzMY4f)H>DD>F!X^H1ulWyy;i_U42aSRd2`u
zt=P@k%<aabKon07IX&WKoy~57Yp&<=Mpu70?22bp7@=h=C@WUY=L5;cF5zmgw^{!u
zjMGiGoHq8WuuZ{<;9|f@<+oj#N}QPuJ|A$0v~w><u8^v~x9C?obvP74NVB55g>QVk
zI(K?vyt}B)JmQn%TQV7`h$IuQyKx42RIdYX2#k15Kc0X=p31(PpX~j|J%%*OUsT0k
zvgzBcEJK`GU#PCwLXtsvaV2@?d(MfU@;^iR@XtJIS&>IRJ?|P~&?XOVEK5C5uY|Xe
zg63I7AXm6^B6~dMx8s}rqbJ7#=&K6(Au`LVhbs5bnKF-?bM6;}1uf|g8?a=!zpm2C
zHUNKWV8`8GM&rb#(tC85p-Cr}XPtA8Xn2c#C`>?2d+>e?IcQzw4C~&}I_lOitI*wd
zoXg!W6dQ+zvv?VGWKoWH(mU+PTVTI~@O964bC7#MQZjLMU8-Y?%Bu|p_jAMgeBQJ@
zA}3DW3$|VTxQiFWR^Km@U4d$pm|C~CaHb99B4quwh3;<IMaF{&0C)v)3Ur>;vz>{^
zSgrepoi$6b?|?ZO=yHa-$N3;;Z_>ubvUI4f5a5dUmSI!vJPk+mYb_!~@J^4f6Dxd5
zSuBf8CS|j3F2`dyYcY^UO}hu#@~IPZTaoF^J-;iVH-5;6`~&L#FTdbmKSg|D;aapK
zVr}Rndv1vV<R*gOcGO_HzMbxq%}oDv^)DYHDfgY@9nM%4uJE)fnd|VJu2bzCC5tS0
zxmHqEpOAYkR2kd=DF^!}^L>vocfDip(FkYKzPuM>z9DelUaz#l?p}v1g>R*{h<gHz
zLX7z4qUj1MQX%sKJ2y-&yS`xF=~rBpq~>SlcT`NN;QrvOdB}csL%O^2(MngHQ&a!w
z@TN(YsOGQ*G_fqHF-XU|5lD*-naUFW<z4@Qw2b=w6DIg}@s%rVzt9_@SwRxST^O?&
zq8Q5{IOJiXyQV7bJnK+XcS8;QI1L;=EiBRbURbM>5c52!$>lBZ)H}T)#2?>APT`#I
zac5T4YoWwnNnKr!eP@{kwgOMy5w-_K_?*3@ras6*TQJ`DVtv&0EFE$ytoH_a&77V)
zkU)I5Jk(6iMI8NL>mc+Km}D~hrQ&8zL91Umo`xuzzf}47*bk3NNd#i^9VYv2JMU?v
zqkKc9z(aaI59_L)5o%K^FscGGT_UUMbuat-=%k&2M+;K^J*MWLffy~{(?32VuAEoC
zzaT}WY)z>13rAgkYkmO;t)p^2i~q=%A4+QY#CJRm<K<z5;{q7>i$LSKm!OFxiPgRH
zS#h|4$w_IivL{Uvk+8nXy8i9ev_pN))h>C#^iRrhuj+5jbj=Y+yVt1#@iXi8>+s7^
zd(H1!xC@_#1os?bg6TLE#TC`xEazU=1d&J|FsG`_eP}rME5xE;8MrwwvnGi38`4J)
zZz1<g?M(oBnL=yZp^f@=iC5-_x15K<Qk@bFGpwdED3OjD%xMjp9Rn<>4;2!1gp}J&
zWm39#w@4?p`Y%I^zTco+HU~-LB=^~ngx`UjzqQL*x*uWwKggNpJrcxr10;S3%TMlF
z8D6(;G4wRPPHPRW+WghtoDb74<%~b>L1^DJ-RYdIJm%_mo6rR^amH-E^p&OSZvK?3
zP-(II*yA?a=Jb5tH6T#_ac5lj-l1JTN4@rap*)vx>W`U4?k{F>^?|lspMQa=mp-XI
zZuHDz%Za?c<zA@!M3sSeJ0T##X}u0&#<m>{^)6Fn>D#kMzD1}UeyGyuM4pMuQVv|0
z6jiD1PZ;pnTR2`ubO=ML{1Ti3IRxbIC93TO?y^*J`ZH3q#w8A%p)ObvTUIf-nKQMu
z)2hqoBU)ces0w$y=JfC7I2-ot;M1;isiS02iBZ0gvW5Isr7Mm1I8Ug((sleu?+#sm
z_&;U&2QvRp+mxnU$h^&l0gLA%jn9M<7r%<#&4|{Z8x{F&wV)bMkOH+Z6f<?hSrqtQ
zWuWT#Q4uqrzyr(3wnmUYboTHVPXuXr=3h45R*;YGvo9Fe_dXVvYh2~J?SM&Zvx_uk
zAywH=Myptrpf1|4qc3y5RrvQ?oCnZnC#^|$2}GVK!4QRZaZ1~}*?VXFcV&%|XPxSA
z{)!myuxBgNMJP@4e+ACKDyK_SGwKGg<jlrOmEuZG?`ggqa)H@5C6ya1|Bth;4vVVm
z-hMz70Tl%i1ObDP5@~599a>-*8l)Q~g&`aik=CKRYle{SP#WoOq`Mh9zJrRdqVIFP
zzwaNenPKLfz3)}`T6^zx%&fbU8@3)|NvSm?88SDM;Nf>zkI*-lE0k<<+4+hg_#(3>
zE_sN_+q~bh{P={0*Y+>YXo~LY{n1n=mj7j74h*Ys?-&n47U;tcan9+B9yyeEZ~F3s
zw?>0w{p_~YJ63J5qz>v6XA$(pcO?)V6x^)4RO&@ywnu#Gn62dma{L107E!hp#x6Ol
z;pfc$6rw;47EsZ6Q4oRkKU6d(@5dk=7HMNWNOLIS@a7szvNw_eBC;&q+n$%Ho?k%~
zi^X82w&Wb137YW06B9SY^Um;uOW2EXnPk^@;*IUVNY@}UdTBWy#=(BvV&l3VGV+`4
z)2$RO{(VmbA<%}TO%5;9Y2m*-i8*HRyr3Sx5Y@K_m;Xog{`yM58N<5!s!uGQKcy(>
zQgF%#C5pA!YA9Z&_-3x5pjT}z9#lMyI?K1((!-BZ<HM?NMMdtEivAZguA|!?q!dA*
z*-}0@kZH03zBaXEPSK-`GXH(sLN|f>bj^gGefJyxsDLB=c7IOGOMg;#S$1@alz`@R
z-MfE+{l|WN5xh16iDlZ=BGz`1_%=Pah_4<RQ51LU_{sb<u1rB+x@50L{_~f^^#YU^
z0jXixg>wFG(tp2Yn-I*bV&mZP<$Kh%zDMIe(B?tAh`y>1kN(!jJtpEj`V<f6^vRmf
z|6=@qlYs0?Ai`l=G~N7PoZ(*<4}qV-vTpz$Kh1*C|NnA;@yz|O83kO$$NtZUmw%nN
zY|n`}QA>K3zkd3+N&olH1jf!kk)<hRGWh#c{|NeLX7TN9qsncCo*bS3Z4_;EFvQ-S
z^p=0vUW-;R3~qxYtp;A6iT&TlA^?nR?0;;SL318~nbteLXiA)SsMdcJDnIkR<K5Zu
zi@m}i5Oal`b#HsIl@FS~)LoD?w9XGEW7GE%D9teoe^$blIlwZgBSkT=H^hip9%AFd
z^}p!!3EB$Y{{W}mt&pMje^*rjPHZKrhdf%L)KF%QFU<Z7{)bt%ZF_kU?ihhlT2_BC
z+P}$8b{f4UI4NkCHI(f4%m0HuMYHO-QAubz=$z7)<KL~QGI^ZwVAGz6{45k_G58_b
z7#E<7+`+#>0`WOxWda2PU44Xh5bXSCn#XDz^)=^7qG10^7J4Giy*vPq!@aE+YiD(p
zNPZTqpv3ED_v1A8+5i7x=mJzQ*`zCre>C4Z*+T+WaIM`lewGo?Ess6Xm~wh80SDOd
z12vmMf?Ny9QIjQ&!M^UGEbYs1-Ge+3VdjDM(#R#%eEZe@X*6oR*&6fz0?oJl>~`Wm
zbSVi5&oIvde<&6si%j|GAbxC*`#kZ-on@WTTgy<ZF5YHA5*-a(%A)@9TEHK0Hm^A;
zxAt*6XV4$I_^XA*Py-geDeR;3>xU=9xc?YUU4-&DxMrSamqSnZbilVIVBCJ5^{)4W
z_4`W*hZixU_opfep-lg{|Eu1<#UL2LIK}Ez>H!`C<&dY(Pyx*LIu9@ZxWjBOdTaSn
zJ-_K-lx96hxXSH`^@B+Q7WT7X{Bp5?I6(4=FCm4eRiK}q#^L_^(G$@px);EzPG<-^
zz`E{8K%gkoFxsq5aXPVY(Es*_R`q0ieED(HnU)kWO&IHWp4iCbfPRwKb3c?vpyI~;
zNm|;psE*80Vl|%sK3Ymjz$=4>uRQpp=j8~_q`}Fs<_;sZes&1DErWhuRm81Ze%;!g
z=<H8^H8^3w?~MFW?+*wSu=ox!ROlh;kGbB*feEI?eV(V$TkFxn`vu4|h7fS#VJNNW
z3a_38X`Q;Q<OEkr7dzLV4ftR@Sd&iQaP5qIMs)Tkr|1J?eg{<kKC7PX3k74oK09}U
zpxj%F3rVLnP`!KaTfa%rbmWHQUmh+0L)8C0MG|m6?~s2Yt;y{N4~b9Z@?H#9ntlR-
zeJS0gvyC24_45Rd7eFk3Np}XA+p<aMMH$}^+!NJOIkiMZ^j)PBQ8mspeA4}8zCYFR
z`)BuW0@oCuR{XPT-lXR3f^V=M+r7GW>i1wgIGYaNVBH{n_UQiu)*JP<CqJi|;6RiA
zK__6@__NL5u(q|pq&mg=PfZYLIAL1M1+F+}_k6@WMDPCwpj*3v!s(!5{z2!}I6(mW
zIS0|d?T{S@oX)D^8C(Qnn$9zSyQbdJ1uchDRD;lYmA|37o?e;o|AXpv>D!0j=K9E9
z^k<(qY>M|A-#(1t{`B$BiT_lAtAw{7yA`0H>dk4WB)I+F_yp9{SK+PKe@H6E^t|Qw
zk#3n)R{z^bVX_TsSZ9*?fYi!$x~9p#uDG>c=1A&JS|?cYqC8&_*LuMloZ$wC$!;7W
zG3Q>jpf<_#uyGuWCht!(VK-~|nGC*HlVZqmW@%~hAsVzSAz{up(xTua?q&ZnvG=%u
z7OuzE6aDrb0c_Y*aHzgcxR-WG`t?Xd(U>RbP!+pSqjsI9FIz`hEIlDNtZZkwOKyHy
zR7{Zg%6c*3#{1F0b?%MvPcscGQl}?5bs1uiDUxB&D|dtRd?akHB9jx~v487XcIt80
zX|C4KXnCJ8#gB@&gCewR{N;PtbKC@?v2PI;#_b<yULG>NpsZ=#+b<B)dVbO_DUFM*
ztTppB&Ub~1;l~1y2r6PlQRX;^1Dku!TJNb4ftd^N0Ti$ynM&vPOw}9>E<Z$U8vL$u
zr}B$Fbm3_{GTHIo>ONi(?Spe@!LQ7yB;bv@5S;KI;Jtq=0C}2a1})d*P?TAdkG=e>
zXFj6|*mr;ca7W=L{;%Jm)p+HOq&s4e?f9qIv+*%39Ee#7D{?iOLm~u>|NPL#qKgS?
zCzfFOEP?T?LjEGfP3hZRB~_e%Hu2jFf0Yr(+~ZtUq0Zj>y+4lt2-}*PN=*4MmBK>y
zyt+aknRX;mXoYE@vcWr+rDzOk4eMjQr2f<mPl4!*KXlH?()_>ol#x{c)XKB5x#7&L
zuin25Ge(xNK6r!tAtiDg`d-Xqv(CJE|876(&xyfq0`=J8^39A*EB82Rr*mGT-u1J*
z+@8~8;-qS?I6t|aa8O?Qa@C$f0<}WY&>b9JnL(H~KzB8_BIvMmajo_d`O8H!Dz?l6
zv;4Y&T8&k;jY@vq!D#1Skol<yhbJfBj-V@rmZz@kR1Z&w4dcUmN13d&HW|QId_mgl
z6+|oBrewEZH+8qm)YSK~+SsrZDXbj-{b+FZc$Y>tyfL7><YJ5WKj`ML=O0m##>iz!
z5NE1YfkA<pnYQwD#NuA9Lu8PUX7S%>(xwhffWO@K!GAXpSoyWvoQW1goF*!;7y>pi
z%uMEDhsK@C#ayR;4}tdcrYy>E1uDYB(uk{Q7>?&POv#QyvzAC4?WXhEkG?AJPpPhR
zQM~?L$-e^=kaiN}p(ce7|0@)7*T14I$d%Hm`QHPqI^qUTZ|b_IKq{7+f-zw=>b7hH
z`j~GIotcgbj?wK2P5rRsH#qiJYJa-gQ%C`QzfXD+`ernqpS0fn>0@P$&=nJ<LUw<J
zMj-aQH9_Fho&&+?w`<=VY@>iy&(u9W#>H=L`+^02(tob-^VK8;U|iB_)_=SCAK``v
zgG+J{?56T9x=iqXrvGK6KfVl8!vOq)h*$Zq<NbNJU*>S~S%62SS929VUDLM_QdwL}
zg8iK1&)@u?M(Bv~nKT_T4gD6sqqi|x&W2$?Z7qS~FeM?w-mmEWdE4(pl@&$Lc|g8q
z@BEz;IzS&1{3-R@)PRsI$K!slGya^w$(58ZDk*Qq;+#4m*}Lew-#nA}WRm60KcNyJ
z2WSManji6RoZu9EnB2Ac(QgRSZU-!!6&f(%Jwd>D%VlRG{=Ctj^A30~z^8bUW8f9X
zs+~rsU*y(?d6NI2aV7sRYceqb7!DQRn19FNq{``l|KrDD<tLl-8M`>CPm%czwKL<T
zq-qQQMs2Ijse)hsIrL2!ungPMKyyk2Pwz9E^{V?(<E2L%!*3?IM_sCOhpb)-k`U;)
zE-Yyi`|WF@i_{@G!h5KlL)wxE)cl_qp2aX`90RZ|P&1vy!yp@JbAJO=z)<Pz*<>6T
zP(U)^8>Z|vLw|1^FI$ctq?d-+d$X(jDeMhbVBsr*C)v!Oj9tjo4%H3MR$5{{pWIDW
z>M%5j$);aAvRghoFpm6BfMiF|Ve3(3Isa3|3uX$q2Lhcz4&A{~A|R6+@r*iyF7Q^s
z9!NCgMz(|d|K|99QG#z$dhZWJ$Ggb5TzxBmuIR@9wGPXSAsEOcrT=e-FQ#a<E)(gv
zRuwSA_Qq<BiJqkmJg!|<(pzNn@)^$V6NWhJ-i>Y!niKmH_{#qKz}`cETw70Wqmjg0
zFVf#xOHl^2Ql;^yWyWL!-Y7L~zuEMxPOK7q9E61RBtMYsBi!BTf5Y|Dt^u7ze_a<k
zpDx1!p18vN3Ft+np^`g=gYSHNt#h}fENNpHnpojD3cW%3@VQW@_1*#&FZax_fyxoo
zRFEXFgW#f1{~+rv`{MzeT(i#gO3=DSA=HdS^&8wE^kPr(l%ylcFCoPVk!u%#fl~_o
z@yrR}**c+sl_^X+8T6c$5(Tz(8tYQ)5p$N$&*V(j6<u*jOTGk}lRC1@U5l-#=i~<4
zi%oJE$I80s!rYn5x8<Y^o=R-0Qw_>O%;WE$H^E!z5?8InrWmgJTE7I6rCBPe1sW9#
zU|rPxmXC;kWq*Nn_SS%cfUq#c=Zy>hEm;+~?e?nsVFzo}YHn)(+F}G;zO>3s6tJ`k
zHi_EZSUW<SA98wr!aB3X#gWKVsDHP7F-_lDyF1|+f6aYk9-#-(kKaVlwG=&)n7cU3
zi>pAm<&YM`1om;`72BRw>@C7RN}pTj?|;KH+BM>I+4P4y4@EDw6rWg=3H$mH%O35c
zF4$+l$=L)!74bfJjp%!}470or)U3SCM&I)Q+!Vwf`?sz{0#R4u&f0_QKQP1v`LgbY
zBx8ReDXq3zs)x^Wv(WG86gU~hX+C&Jx-w$Ir4eqt7)V|#tx+~?=L0T~-gXn_3pM(T
zLk9Hau&n}m1R@G}JBcLlF7YJlY@uoq<j)Z7+&s@@c3#&>>9#k$$(IbQ{Q4rv*h>e$
zSOnsz#WlJkCa6lIh{)$@z_{SOT%J3dZJ)G}K3KBQB8pN>CPEqo$g@QPHI|a(fvdNQ
zUZ+OAbj9)+?!^e};ZR(vRH`ssu*6sA8?`Rqj!`<0&!4d&k6sXcUY(z0eDI7aw;gT0
zoS1IgJY}foRo1=8T61O7yIz_sksIU>BP~V=kTYE6otmR8u|Bx0vF-j+hGmgH-4E5K
zSsrci_3+uUnl6I9Bwx0t_3bBEjT|bsA|1k6v?n-c=0i3Jbuu+5X7&-O!%I9bhDMZP
ziSgC%ZS0O;-c{ZWFFcfxCbqpY_TwQbp7<%D*0ER_EF*R!?TmzBTM!H8cD<FbS%i#t
z*|e1onB%4d-|Gt(yqUVg$@K(D7Ne}wD5Ebg;2wo5Q$~}=NoYt6$ei>fZ3{;KlKIeI
zXU4BqbAMg?k(~$*eXgB%1ZimhO{Q2{%<;BL8><!E2xxgxlhGIfKx!|B+<e{2#<DlN
z7;)n?p#0QR9>7Xp3Z$H25}S&*wT!7E4UBDh>M-UnP+v~M!ud3iE;alved_UnW=Q@V
zKgSZ|(MZ?g^L(D?g+_MA`N|ays;-`3?rK)D{Wn;RoMnj{W$se@O-RY+7JjC@k|X+O
zb~7o%O<~+uBn`hP0A>zNvP~wZvNnl6OojuGA}1CLefv6@O3iM@5k$2{k%Do-+;ARu
zPeQPwzOI$H#3?bAdsS4vtp6I;euQT^A-WB>cwC)tl5NLslFruUB%3`MAFKi{b>{L5
zZ%vKolhkM$Xf!vLOwJ><Jy_5{QK|51JZK|0$*iY4Yd`#py<`mSmUG(hp`cFprB&Ww
zcH_^1e48o^&)E4px|W^t<{UR{JNI__Yt7Ze)(a~?GDsc5JT&=8oR6R44^5gR9lct>
z_r-Z4=PlYDLPM3kP^918tV*$%{G(52;zh%|R?Y0m)KU=tXjL+9x-E;37I$aHAv$%|
zrH<)XKK<NVD>jCcwpbIh6?1tdktQlf;UT&aZ5}6tMOOD`N}eor6y<hT^KvrM8rI&!
zJ*}lIJXO)2qka8e6S|f7*8?FRy({U~B-5Jc=<6kN&ITGhJ3PB(pg%|>jF0f*<}Sf=
zv@<So>fTiZ3re%1s%y#K!@|<jE>7{|K-bc~s{D3$xE|zLRAD;-Be5K1a_PQi&U8kV
zw?}TcZ&ATZ#@09N2jZrBfeECk#A)wSquw8A#b-D_U9U0FyE$sIHAhJFsFpR7i&_u3
z<}Ig-x%%!ol2sqJzrgA8Muy8#ZG}RToisV}RLMJ}x?qlwV%{*Ri8Yo<zAM`c@U9!T
z@{mScW*Mo2GrdOs^6)DG7u0JK9X`KETB{DtCT@FJ@n9`~a=V!Cp=!agjgc=CRk7u>
zfy2cgb&-hkh=PNlw09lvABw0qn>Lq;*;1k#WHl=7R2k_vFYSM6v}ia^QZ%#GHSMl=
zgD-o6*4~pJb%(uK;ztbV8nCK!6>Qx}3ldbm*cxW8iq(T|T<A{Jw<}omx>M(z?9d`^
z{$WhF02X}<OXEzWcA&ZX@Uj>$1qj=HhDW-&C%JK+X2Oc^K_{AG^X<;(O*&MpmV*fy
zq!z<Bp)u{k+}itb5~IvxGWt}TIp~k&^J<u|%;spr+VBjCKD1G0PhQ5~-K=?&Z;9+8
z--O^3eROW=L=czwrH;(2-t<Ebj4O3M-zlp_AX@u+Glc+mCON?o%|sP&^wOk~sPpCK
zJuJH29iQM-hDW*)X}33DOAh00^-2{9;;F&$Aw!V|@RXtb{_n}u$<k;Q6_r%xL?DTk
z#*0?-LqWcCWY1WOYScHaGz}Nam#OlB$pT3+kg2XLa$1SQKK0;PX?<lBg~;^R6*K7Z
z+um+7K+g}Q2hsZPFCIh?{ehV_`kh;6X?4B30E#_16v5>0|2CUjRcf%aIm;dMp4UuR
zC0BS@+iaQS(lQ|xr4<F~Q25(<G<d{%3RZfFP{)1J%u#(6vPTU>+-rk;Hi`aiDe8E8
z5E-6N)}_``f~p8{R1h`Q(tHRvTWb0c$>->1_OV^uuy+)PN_j$(Peur1+vO!=oFfxb
zdrgmdJJ%X7gO;R)wCeSfjw-`__*mx|J}{0jFXd!%d}y5^f;K0wj|>bMGK&LIDny{u
z0c-fR5c`R%QycLjq{=O!v)MnWwd)S32jbb`6u)>}yF{(VVJdwf-os*JNv+u%N&3m;
zp<iPz4_8&^BMbI@ji5)^I$XUJOADaa!{r^;<@?c5w;Gwgn{0sFt0UEAhA}r8`|xap
z$@+6)Jbxh|PPFkqG8Ggot0Pbbw;7^K=H}E1>GH|qsUmenZt}XPT~I2#J8E?UPO$Qs
zftTNnGJ2vb!OEt!u73K7a&sF7a#E$Fe3(C6X!f-L4R$qIM!v*aBe1IC-40f$RnAf7
zb>k4C!2ynlaKr~NzBf6`)MJKDR(iL&n+|@HA?)r&C7UYAC0%=>QTaGJes+^(z9wNl
zXc1Iq(~CpWT{xNIAuE}E1=f|(Y^df4J0e^%o+|lCwrfFd>n_sBY1Z1v*TF8-x8TTZ
ze(SI-t?LeMTa6c0AqL>u<Gzf%2s_ZEyaSMwOiJJw$n<gu)I?B~6B3vHMRL&FN~#X&
z5@eKtS_*NBq;S5(>_g4Vi1arN61W;*A%tuGG1$X#Q&+G=K9&C^A`3A!CIQKczt+jO
zFt0T3zYlAgUEhThSydRh%=smak=Qi~B|{5+nUnN`qeHehU6{5Saq$Q8@QCsgSLXRi
zGl#WRfF7w16nuAp{X9`j<kJ3=Vc``0^XT&2a|dsj&e;g*mfo8>l94iORmkF23|C}}
zfO_$$^_#oiD_~*cb{rM12r!v`_C?b1``Y7u9W*3@$=-Hrpp~S7yR*RBdG)27t0z^-
zRsx2YIU{nJ)C9rKC#x7p!Vs-a>|-`~!e;=4nIkurIRKJGdmQD`=0D~G=IDB(yVzz3
z+EceM&|$k2O!4&-oMjMoJl@_Kv4s$!Dm4u9pbxvrt!cemn$`j?YrdJa7STewr)-3}
z%Q*^>S~8z$F!-?KLx5mvXY-O0=C6|ww3{J0hWm-aU-`#jh?$xoYRUBlNl0&l%5;3d
z{0fSmI8V%98Z=R4@38Hvensot-BQnT@O%a=NkilG2Q9+e-tLD3)X(xk79oDgk_7&T
zR^!NQ?KtP84&fiifPj501c41bk-Im&pZ=y$Kio#698d?yE=`yG{ZU-^GXzjQ+1hSA
z?^hn|{WCD{vKYGlm`4gv$!39~)4GEm|JFc`JKFHf!blKiH?g1dBO}V|!=rrr7drQ5
z?eyrF`eXL{wiu;mxa~;P*=p1g)WDYasmn{MW&LAV9t;CFoL8RN&n{aH0};jYm_x4x
zcekb6T=QAa(2MsJsZ9Za3U4VlQvnxj0SGGNjp$ND8-&hGM%{2Ax`HQl*hqGxyI{#|
z9=C+|&RjLz;y43{YDrTg_aZZfGr<!TBV|jbv*HgKa7~!yJ-5AVy=|*As9E(b@Kszm
zX4?9YeD_05jGV@Abu!Jg4!h3@B{6I_FJ|O?FEj}xVjISZJ+JP;>db_<YDTUM1-1p5
zC2mAQC1sD&^9O^%4HpOj5<`pC&`)XeXy)U*`xgVRQ3@y9uEQWb(0JXuK2KQKs#=Ly
zj*-h)L6+`F8blFG0TegipB0Fi9){Lp<gdX-Qw5*&Hpa@?{g7n4f|QyGF9`3cYxQlW
zn|t;0HPvhnc}O8#edkb2N*126fLq8n_VE=(jiG{1sP^5GC=<ouQjyQj>4tK4!vS=Z
zoRmyPgL8%r^HhqR_m$_&73f3jqAUZA{1Ya&LLpN@@95~VyQGUN6;^i6;ir&JUK%|#
z?!Ll$2ejVZhB3uvNA1$Gu-x=-X9y<20zLZ4XOo*YZnTxgJ6tMt3obID5gktXzTl*#
zeob2lwCrj3?FJfLP+Vm~LhOKMsVH2&{N9og{IYJQGhJ?h0?1W7NsetT9Hf3wD?YD4
zbvbf_>`7?ExFl2YxKk(7zKnF<KA2o#la7r$DzfXA#-=Mz^-DcDsN8EuWC0blZ!G_f
zdUrVK9b}?uU|Uik?QYO>i@BF`Dj<QmP4@jRv)v1;lNRsu9LYj9ISWftt&W7S8fb>O
ze<qXo9|Ai{6CJ#CBvJVW4&gZnv5J?&xjfI~95r@eM!~J|q(DR(JMU*;lV*#3W#m!N
z%LOS@X|`8XX<f?)I-M`mB82ehnFGJ3s7rMkS?M(7&B5glYTLWT0Yx_;mWiN2W>QdH
zpx!FLL&|j~3>F_|N*LR52tyuNLv#jXU$}=P$?GReN`trrh^+%lxj;ikJkD>^vYCf6
zB`j4xDk?UXsNy+4SeKGs1G+A48b>wH*so;$@p!tQDaTcT+jb}AMvjTIJ8pwtf%!jU
z@FLavY!ws>>+gC9)B-$wlq!3ZwD%qF(53G1xA({&#$MifLL4KNk%;mjr^?%Bd&3`L
zewXeB#Ue2GC(|K?TyINWvuWjG`9v+6Gq(p(CRrVw!!nyvGlK4YPD>)xV5Liq27{x!
zs761LLIpZX;>~J{;5ygW*!O#uz~rE~u5{3T>1cTdsmigPuaW(%b?4SWrevL}UD)bp
zYAJV_4p4{I+S_*&qZ9<YJsWD``x3pA9g#oE#R>reGb9DOEd@uf_(R{uQ#`h>I@EPp
zJJKZkG^TnI>zy)b)O8FuHIhUXZ-XFdXgU;gd)M&Q3_7icnU`^{+%z!O4g8=c4p@pd
zvLb;yNoSb>s|1<<sE75Ib7mUV>~`pbSfmAI?uc~KT6pUdTgkT*>F<qrrawf$wKm_J
zkatrQ!>Co~zwH~`CAc_c_ek_iFi&BHy>~3hahAv_0Wx~MxGytB;T@(D%wN1JIQE`D
z^Yh}l)cbt$Fv+-SGU3iZR8e&<uhUaG==_C2>!wKJeRc!8Xk%rOM3n!`kks&Cu}lTQ
zpgNaEuscp&e(Py3fcrTrPE}3{$HTt#PTWYZf?mg{AsI5_&7jJUSfEi)GqEbFg9b^9
z-dt3Don>o2CvU9G^Hr+ak_Y0G{xd$JGM>&X_Rk|3;JD39k131k2cttL<=cPcTtzql
z!=MQgKwy!L$mOlUvWW*^>R6wFxHyJO=7q}Zb(~&O`RJhAu#gqFv&r&-AdLxT!_L7r
z7-@ulj{OOj!BPg0)#hZdQ*}h;%8<-&?fR^#vQgfRCK%8>iuZ8bgJ(fpXhR9n@PX6@
z<E{BKW81wC7M~?iwxb^~hUh5h9DP=bEX{I(s$7fRai^2g58^l_2Zy+MIIaa51qT}4
zjc#Sy3X{>H3iFmL>!zV<%hlK>D!z6r+v91lHK?_Fe9rE8$*|hG)IokR3hDA(7Gi1N
zoNw?>NB*;Y%2}88OQ~7l*$EL#V~qcOSF0YQ7MrhRn2z<7JIHK~_^xJYsJUIO6IdrH
zVX<?qhm>YJaiE=M6kEMyUjz$84Gt$C&cpTsR2)@pL3O!%ZD15W(3tla;>9%9la^#)
zmnJ@Mpxl7VI$r>XdJ$X<HI~WtDDW#FyH#9N(10<CR!>Gy)Y2yuBg7uaR4XCZs85x9
zM1fDl&NNom;h(ki#N8u9t<PVG^ZT|InCCSmt7DI(xr9)vw!(~+h;yhaNgzS07UZ6#
zbsF=Ff>)pSq4vqh2wKs$%MS~3uP_6uMeYW(P|DCr70^gwgM!5i$<KVq;>UbKi{_un
zj~HXs>0r-P!{(c>$QZA@T)NS@{<WdQoO;nK<7I0IZgQwE#2nt5Nf<H!nxS6A?YjJ7
z&DWb!i+ZtM_8U7>E_#r!B3?KQa4n$tu~Y8hOcTAK9=$)08)a<Gw;YFcFYHz4mbH~2
zp0xF@BYu?2Po7AX?x>XWMMiQzqJn5}#sUjz`!02OLVMdtd$7Gn4+!`1c=YjJGf(iE
z%d+0YzP)!u*T6Ud_l-w`IWuZCT*7l9-Q1^P`XfF1&VufrtlT$F@G;~3Rw0gQ#X!I8
zj73X60~{7dBX8<;pH>{rM)H*6x9o)h1-tj|LrI~CtbJ>8{1nZ)H|$)uw`A4r;!T0s
zS(JjVj<xEC&%%3$h8rEYw_>#OSptN;O+ESE%tGcX)}a;C5d-MiuHw^p@Au21u`&vB
zuzdTd@&(1U+LU%hHz&I_nEe8a#H?#D*U5Zzm!IH%9~!Na06$Ac(ZtujM*9(hM+Zu-
z3#PQkZ!L_C$FVYsi$!Njf%X_IAN-;}2svz97nq4P&kt^U`Au1!hld~==<e)FrZ%uF
z;}Mq`2+FzTWEvmzMm8@)4o&;lKW1DEo_2^QO<GFDkuq!=fgfb6Zf6x=BZ`2!iZ0;K
zg!pp0ER#`Z8zvbQF}|QI`Fa5w+LjVovj2$0fpn==b6LS(V=1T=xZf5#%6&NMy|<2=
zu^uW!Q{r>O2T+}Yg-cMoE{R<`#%Q<8Xt!{1*@F4<i0=a@`J*d+4Xja*<VP?kN(TFX
ziDM_#-Cy6rK4W2EbFkj~-3BizxB}&59;2|Wc^UXX2OF>)V7iG`AsEnB%pDS@?h>@o
z=1do(Aj5OV=+<$!d=eFkZsr2CJFZlwb)Dx`NGm=@XL<}Cf~9*f$|$Vm<sd8B;m6d$
z_<jXHe)=t0omjZMwfUaU__qf=00XM0zq;#~uWYk{q1->w(-g_eYmLeoP(NyAb=sim
zw4UaXXS=hT6FKufx7#;3QzkNE{1y#}c8T$zCDo=v1=SE8Yh%z|tGTp1VG3l;fL701
zBaAf605%^9+D{*NeBwvAX7~f%N5Byl&AtZIOW2GKpK@DEnNxXFAH(a|uxhUqP%a`y
z7B4D`P}x(KAzoloZ+42;4OIXn;sw{B*%v9*Fx{KU<e6t`QNO-@PY!7O`b7Qj%{T?7
zuy-hf6maXCg~()cRD)-1B|S%+Js#whXd0pmQ71k6Tu}s2U+>{9z0WO(7w$w8QQ-v4
zVAWgUZDJJ-MH@bYcms`P+JstDxJX2~mZ_r8@qQ<QL_bVAQfD4_#~&14J#M8p;!fvO
zoM4u%8k{)XD*Z#mN_1KUnE*+TFjPFWfp#7ZDCiVqs+F*ON&ouQN2)5eAgmTY3f}60
zpUp#?6|O00rR|rYqT4W(Fs;x~n10M%m&;>aOaVS<v@W$t#$t5ru->7Fq_|9E@h<Hp
zzda;|wunJW0-zEJ{R@lm?rGh;8x}_4FU)(8pZk$~%%qvr)Wqk-Od}trQEgyQ&JC$A
z8d8)96LF?&pavy%YtXmHOwVZDufak9h9P*KIQZF_$`q||rNxwc1SIUEYb}DbkB(_D
zUuq$Klw}MzxdwhIO;&3Ar2{VtQ-%#`(Jw+4f@4>ZJbbaBQ3zHL9%j}$yQ~jxm?sW*
z7AjA-H%?>7Z9k&=s{A;~YQzcNCz$=1Lj{*34he^hc{uOJ@+9mk;EJh1Lo5WPz^{!S
z&3{V6laGT-5~%Mbc{s8UzTvkYdBg88fPrZNtY}8(Z&8^3Siuwcbl#dRum@9yJsmgt
znIOqBkN|VFy{TQ+aTVfd-5Ubv;6o{DJ_E&LD~sKN!>KesFL6tw$qh*MJUI&F`_cFS
z!sbzLfa4Q&T`K=_MvlmOEGJcpaB#B(Syd+<5&I_$9=e>q_K%M2VRLh#<;1%?oH{4O
z!{0f>GLsOWn73akD6mZo-|pb(3TSpxVRFNzZgD|!cm%@DP<p0reg<<OB44Gi`u#hB
zcdxG5zhZd!TvEETfVh`MF=oakZXQ9CjvF-)`KV*7i89H2T@S1Gs)`*{K4+m-YTqiD
zDmNK9Tc8c}@yoU7elt<rD$l$2CZQ_lkZLL6oY9l3tI&tc*)Po(2Rrb&rc>oyRvD;r
zM|?r?{U4-3o}te^lk=G01wu~~8^ApL_mmf?$mwL{s8I2z`!TU^UtM}QwoT^g=4;ND
zW+E8B4JY8E`m%5xG(Z%+l~7!)Qq}Gk(p|9p3@E-x5BB!oR4<ZcH%kuN>ekq>Uq2Ff
zSi~@R1GL=JItaTHF76$?Yo;%_Aj1rMhVPHZnN8X;myATSZEiMNbGdwFl-9`IIVy9D
zBAmizoppy2|NL81cpF_CD{X8>;A}9?Hy0VCZ>QPm^NH_w|N5J|t*39>NOocNlll>O
zPCYRU`|7&~GDmNxDqpp8HuLnpxv}zfY@2*Rnz-F*i6xU6;5bHt_wqrrDkb~pMInzo
z9@sO)@`5R|fr5zRT9PBq_JBp)6`#<%$g)d8Zb}1E-GxGUS9}0t{=C8;)+U;0fDSC&
zP>~YdDl(H(<s(La&CU$YX7()*hPW8_4B3PLOwrX2A~%d6g%9bUhYy@C#=}127&03r
zr@cvztuY>`R4{XlIe@wprsb{^j8gat9Jcjglb9@vA2<fj8!zQgrP?0yL$}cwMQqre
z4?;uD>{iM)@5+e?Jif*qr_@FS^`j*Ap)~dcjZ8{udoyX{`KMSEx4SG7>u|8ke!7s7
z6^D*a`|?BFCeBc{lfjX}1-t(Ke(1{b(iBH@tNsj+RZA$#XuqGSR+JNVwU>l0%|lr?
z>ouPv1QEB)4I!t3Ji3HgCs9xX0S=DLvPYOIB&9SwmpY34{^oGqGCtoP<F2}d`2xaK
zd^u?fOrPtgoM&QL!dhg}9ne$JfHn6EHT`3o!}$}cS}zD}TRN55$*UM8wEa8mUnG&(
z_qm<V5A+%Fw>MsRM=-3=#ae!6pRhaglk_fbyE2Uj-#$a?&a`TVO+W6sn@p+L{4%F3
zZ+DRK^Wvd%wju%9_beLUE^5Br>i3{&cxMP(Hys)QIqqb}k<D6Nj(aLwc?34yCoHuh
zFVA)1RYl%X>^SGKb<P*&L6}kmZ(%eW+z3-es#)!b+LJ5fA=kS`xJA;!0?+pbld~$g
zk+q7tdb*$Po+Z7b)twPkjak6S==4>XhI^4?oZQaSfTo;@I)#=3AI!XQx0Olew#Ecj
zo^d54sl3Wj-WBsm$-C6^Zn{HMX+smkmyNq_;VPjVoCVKYJK}eHk#3k!DkcfZM@-!Y
zul{XI;ef`g=yg-5PRs8=TU(&X^X_Dl?X(UrR{MN?%-?*Ot<>TT#iOG7^2@E`oNx^e
z&2fw4cKq#B+FPGqyNr2oTz^8p7c5@+f;wl%j$7)W#HE>qB5RdQ##?~pyh6v*qnXC2
z)Wy{WiLi&PC>1QybqUn%c^f(BjxxDjpCY*z)0cX0xzX+<RJ=@{oxI-eI1lNUdEPw7
zYF)u0db)XA<#EjJaqctf=q`R|M773#o&jlC5-6t9jz>m};G@6tpmAs81@S3L916$9
zbV}-ErBo%aAUzSJ7jfopl?cj2nwp{S3!CJIo0?fk?0GugtXgzMYpd{)oZ6OVrr(t4
zH%8_F|A#=qp<{Sj<+r#8W5;_}v2Tavg0jfyf4%Q4Gj4Gdjfl%`Cup_6*eX)nY>eEf
zO7HM3<5EB5+M1jHRL+#KyZFX!GCR3?=wl@TW7o&K3zT^W%51F-{Z+6R$q3A$G}FNm
zP`bTP)4ZP=6EOq+6X*2+B6(XoDQ$?52V<dp2Xdxp+>O}b7;8NGx$lguc02E~k~Av1
z3DEgkZ4T}+&<gM8L3Qxrv(xR`H?WBwV?{ew?3F)K%UypQ7mRx&XA39waHj8u)Ga7V
zWz8pos(j47c)ERmao)H9TBZ45%AlE^mZ`?u{F>DU9w-wkBNtt9C$&4%GZs+?j@Y&g
zXB<pB9J0D%H7E{BH{Rn8GtkngY()AheWZ=gk9W`+S^eBBz}TK*4(WUQ?SxU~6*Sa6
z+VZDjsmDO4#7)mTwD&r$<}BlYfoHU#4t0DII36gvpk428uZZ97!qqn%gAGDH_<9fw
zcC^!tAN)$^zH8|#;1GcOZz2NBz6RZ6+dS}V`RFyB%AG|uTY|uK7BWbMtdr<#5N+*h
z6$L8_($AD$?(ZqSStOBoFcjQ<&Pc`9p?Otery82D9Cc1a;e1F^e)Z=Pbu3!j<DuQf
zeCoMRSF-b-+}_%=p!_(q8-<ayie-mv?KaXjSL^$Fpi(0{GbYWzxLfVni@*pjQJd%>
zxluG=Tg9<>wVl>I)m^sdrSH^*gNatf=rI#Zp?)VRUaiK{1A+|KWxH9{kzsuebt=j(
zDAJsrHywh=cNH$GFpR)<x8i(NddKmy@xk4HT6fc|<tSa6K6H5!Ql!JvmR1-Q4^?FN
z&EDCi?TydR^>(~cE^A_OD@&^P#^G5Dy3kZ?6Gej!uH3M0#0nVTH@IK&4a9VstBVX_
z2zHf+yC2zF<=551FuMIhF}9CmXUC2%`({9AiBJY>ix>O~wqb2!6(8CHqPdgvkR{zC
z?2iv$Hr6ebU1~Y|{CU7D7h+gLy#6SpHedTFhvzis!h3H?Ga;jNR-K6lw{Z?nmOzoV
z{3vc#?~rFgg++_MK6vM=ZEHaJC#b~iS0wM6AZ#+3r6z4ucJ@7f4*V&I-ah5i#4W=l
zfo$nG#^WxQzFBS+r;L#gUv+JoHWvEB<k^zPOF6YSZ%*A@punRvR(r7zsoUUS=u&Nz
zfl3}4&!-#+Ne@bE&~Q)Rpy5Z|K|Plm8$E6lbi(naJgsd}UaT%i)qJo`rM+XjYmDW8
zec4z98@y))X`tiDHj8tJZXX|AvTmK+LiHY+AiDTB!5$Sh{u^4Rv6I)KF*nwo3(UP5
z@93kbqil$|mnJ;&L}S|W=F3EbSo20qgh7XMz~)|H$Hf487{*rd69*?~=+5?|f;?)A
zHODdkBT7B@!{YYI>-!XQ&5_i*ReeUsOD>G-Q{CJyBgjUpzjU_{5W~6d>>3vjsoDL_
zBcWjwM+hM1IYG{Q5j%FgEqS(O+ZR(ySIwAAdY%|BamVde%|l{w<#1b3wkVS?qML!y
z9xhE-2)sJkUL#k`V?lT{PpUI)?;K>PQx>d<M?S(B+rYndEBjo7-*LQnN})d)wk?HH
zwt!{BIrzYg<(ki<CN@PDbUE8eA3QGW;tibfxn>Cw9EEpGhH#?8+TuNZ;?%qiO0M4U
z0Nvx_ltZu4+8B?}jFu9MS>4pq10B!<C*h9pAkW0*EDa`M?@#-l7QyjgZO^g!=+U<Q
z<zik~t@sRi-0A*8%8Pw2R|Z`!%$Uc%&Ip)<V_FGOt}GndZFn;k&~H}{(0wIIEZ3X_
zbH8xDd^kd*-JM2ne6#lbVd-IR&v81l)u4fPB0eqT(R<$LL-mpnZ0mkBW3d%(MRa^d
zSu4t#s7`ycbW3oSgvG%_yLUMjDJw?W5Try8)RzF8h<i!(zT~%azcHj3-jAV3@|QdA
z2nC1d+!5)U@;h~r8Hj?Y-bsOyyGU>jqQQBo{V^T+-7$GG)Abd~0Ud^jzdR72p3v~n
zC9t0V+?igHFtU4BLtn%u^pt6?I-oSFC_WAvjgvb`mQW=>@_JfPN*apdZ7pW4VjRO?
zc`8%8%Uid8&cn@a#yxwf)>n@^tJ4&1->SPUGk^<AW*)8Ps|KOC)^;XaNQAaurpt~}
z;1h)fb$9XGsV?icvgpPpEw+S$VZ0T0^u*K{$1;!9!Na?5;-9&CHm(9nQ!)<|-+mo$
zYjb-vC38{+MDl}eiC_OnH$G)i`xS)8^HpqjY<w_G#&!R=vXfsuhWTBxJjwaZkrBl0
zImQthE>4jywp}~kPbI~7%NHCl*{(eujg}020<Q=pb#`)2X}2|;&%@)I44#K9WcsiB
zF9$YSGzEIH7!=kfEUhTBA=R09d1PG?cXghISHyiSdtgi-`Hk_ZP<$PnB1)AFwBa_f
z^mKah>D+O0yGd@^Qg?Sp_zk4<;5G?qTP<c@Hcaltvc&ufe{JqsXvmo}c_(qiFbdeM
z+H9qB9Wc{hh>!pp8eR6|d9o0yhA9@DTMW0dU9NGiFY3AB=4-R&Ez*Ee#EEvp51;GI
zzBgH~ipnpV69S`6t6Qj1QYI*tFM8k`@}gM{VyBi^cv1xOcBX;+!8wV`pW<sg&5fUz
z7TJ63-Ex)ygHlaC%EX@AVsx(?DFvmkT=i(JsOhBqDArl=dH&|l&Bbcy9=pe~E}~F6
zy*Ivl@fe;`a`ll<StuMoM{4(Vw^*|8qjgQz$xI3l*cS-cu+tS6&5HDOqb<$5MKL5d
zddw){pf|`wt$XJqV~MJU16Oz=%P-9yQonQg%o$bXA+kMLLl1J?e5oPHE9=<SDvTZ&
z_0qXgTDYU?+<H~+H#alzSU?`jO$tnI=B~$TiLLG(xwU;uCf!-ecx6B)|L7Crp!vo6
z{0iK_y<^Ab&djoNDpRXV4K(L!3${}^|F&&67eHM^zWv%60)HP3oStyhj-Pn6)=63B
z{P?xdBIZ6m;EeLNNz-U$(hNUpGGR9Dp=zxCXBStzZl-T-UM7oiX2^M`86-_#_=SZ%
zYgU6`VMnuVlRgb5kfpe_s=cowvNvd@GQaUGaeT`h6izUB7v&d8pSMl^paG$*?cy9Z
zu;`q(E&Vi5-uD`^=MXvQR!bGkbDOQe_rnfmftOwqj!i>tQ@uX*oy6%Lb;>PIhesWf
zPmF687uTA6nX30=B&L;?p;<jeI(^>{|HdR=RLU|&&1{%Oed%VE`dl|ePcRyiO!i>7
z)v8pslBm=#Aeu#DQP}f<A#c$r%U_Y(Si(YSk~7bu?@k=}%0~6ZxuZ1$2}9$t1e0dX
zsL?X;Kvu5{o&FMGRMBVZA%?xmGM=@eSBgvujjdsG_J;kQz2c>9cIM#bhsXP&WXp_G
z$vi1=1&1l=rGWxuzgv>poM-#@D&uXst7+GpHG|9sxVJ`?yZBKyC_9yv{>w9Z-A;ED
zhi5XJmxfn;_!Mawoi)85;}7n4Md1D}qZ6aK52}R7S#X^#7s)0Ftou_}JI@`~dRX)+
zptOc%+XbD#1YXUwccxfCVBVEQLQeJkaBzue{*lI)e2M-ny3($LL>C$%H7ER)QQ9pe
z(sZu%STz@%l`-9eh?}9G)TItHEum4Em3^AEId4xprW^Inp{;}CK(>9~`$oFyt36ZH
z)97SlrUGY#23XrXRc^^gq&al3!80v*)+!ie+5&DdfUM*o_iRv1Etyt>xOFNhk}e_I
z6$yt(2U(G(p;-9H>f@*E)3t6QC@U%H@4gyF_TcLA3Tm9(Dh>!{4it+k7=eUkbaTz*
zsh%b>5)REIwC%Q$h+;V;t*1AhP9>T4r6D0osYM!FSI@{kctt;Me67a5jdP*;;9!Mj
zp-ifqgT$_pA80bwDq^K=IewfX^Nqq(06<wA_yf!aM?VefO7ws>r=k%QA6!b3IQD!M
zw<kzFnhz7JN00m+K5}rZLGX_YJ1JO~E->wRTl(aXNTULTeW=Y{Ts$lGIj~4~?)5Sq
zhu|lye=bn4Jh*A!AX1D$eJm^}cF^LFF%kCa8%Y!JyU~WNFfMMP66ZXrEj9Ck$yItv
zvqVlMcI9EDL2TM-w)IO=-S<Gi6+8Op$>Q(S8^%uX5Xt}HjYHO*HUxTu19d|xc%~_|
zNjgE?<>>8(_;hvuj}84W-)kR_O*33(iGYNg1V}+16kd!!Ue>M<AEy5jP|q@Bah$j*
zhgqQQlC!51xLRDl@nyC=)<1E5-H)<p^BTxP1%YQ_ESyOc2RGXy7=0kd5&9~2dl;;j
zO=RzJH$CTh3A(y*^p_fr@rS){!mhmsQulzi*v?23aSIJvl^8+w!30;3=+@V+fnB^v
zXr4*bkf`Yh8(!a%b)YpXGsoi4hDI<$P_DSFbY^%aJb2&Bq8=?SRkOz6dlD7HgeJAk
z_uyH#t!ZKa=>ixOiQn=8;Ky_!T}AQa(BDGFw<3kW4BqOIkHm}$Ir5JUl)u`M!ubyb
z2>8GYJifyhsF>*o>36jg{qZ6Xoj}f;hRCMgIFLf(hdt`n7l==WN6ftnwx7kcS}h8-
z-2hg#;z>p}D?Z-#WzWN+3qp~ntr&ZU!B<3I?f#Sje=?qk?M74LZk0B5UteZz{^qn_
zcNwo@plqsHVYs+#hCE2Tr3ib-rNq+fkTzF2%M3BDu_EaH4%qi@K==`V(Q&@Q17R#*
z(8r2u-FU01HzhK-WjW9+1=$t@gQP?Twj2gt*dp1Mr6g8z_L?9O9Ab{#<VvfJ3}52H
z#2?>u{R5J=o51mdxl491wqN7xnTn2pKl&VMMPkVepTc?-jK?!8-tPE`s*bhmPM&d;
zaMW>r2%=cbpueeEf?|EKfWmQzKhpDFO~w~-YKo&4O-lCdV1SCaD4^gZYC`9r5xx3A
z8aFOG>?BWdl1Gt4;R~fTtZ;Bnq)C2Uc-W#jBs8^TMT>^t>Ma7Et?7TbbVc9VOBh(7
zX>1Mc*3Aozqe{R>znGi)is>c!njbgVG2Rc{U>h%tgDZ<WzMxHgUFE}N1Z}4v=s|QW
z9uB$HfX@R_N+XP|5<#%S{M5XDcUn+&98@jqEphp^&T1mQZmUN{laB$a+FE-Ks!p#m
z)n>|FwD~NHuF;c`9=2xSp-Fj@_}tdjbjSBry!yKS#K)-!Z&WPBNSU&FFJ^yMbH5R|
zAmTj6R{4;!6;*)J^JFC(&36{P6W!4@5U;FG=a%Ri{jy??@Ea1^W5aN@tz3-l!>D)W
zj{<X;by!xMZ7lM>`X)S<2V+V&A1F;f53j)N8ek19HL_4i$((VleT9X`Gf52LDcfTM
zr90FRd@-9ntOaRx6etKbOfE&MqLkaZP~jhZTAcFv=~4q3k*&i{B4E}|1y^yQ2AZvC
zllGYhV3+BryD*QLnFp_({;a6P$e*0%qA@?`_3LuiuO<OF9atPj2a`vcL;A+mG?xQT
zX}LE{o403Rqe->lS?O?<EpG}w2VuE+w3%sh#lyWgca?^BN*={=_|k9*viD)F8gRfg
zug$;=P_Imc!f*D|Zwa+!aX2$7E)gfI8rPVjmZfEx-Y$-|UC$^~{%kXKC0AQD;V8?$
zTWH8V6*I`?PWgw<>WsaPzM!<Q#N`QVkjt9B&M;41+MbP^-HPs-AHS-gymJPbbJXmd
z4kkTI;_{lLN9pRhEMr9vr`FuYNSv*c#FCUTW_wVoF=;pkE-y_}NF)U&&B7P3My;<n
zL@0Hd2lT0OfDW2TNYX^(-1Z(}`8e!q^+wpDu&f3fMe(mJqN!wF!{eLak3-s9a&X_g
zw{~RK|9%8Yh!y?ZAm3~yfrI{Pv*w*Kd2+n?QKep{l2~Ll$-W*W1Y5dT7#_{X*dFYx
zX4#>uTBvsei5roEM|G^3G*_D3<x+AvkmwrU)(wan#Din&OXksjv?3OD?W?tniK5H<
zNBx;iK|8Z+wo<ZKWUE!K+Y~uuI$z1;*jIZ)lQzhphkLHkggUF`cwF;-^TmG8QLU^I
z2z^9$XvzwWEoClHD1^9d9f}|JAHJK{)+;7BjM~}Z$BRVqq6`MOfG4QU!mNK1+W*9t
zfv3QBJlpDNg)@GGS6%?ww?>Cp0_vkqBV@^Aw@$wx;Eo+K$S03Wb^OWRNN@hJCdzAO
z>-86hsfX>fZ&NTlVw6%-_7b=Av=Nw`&1T4aYN3z@c#oNRIy}9iSDbiaKEks;pI@<9
zP>L<B-T}2R`;8=MAJdkM7(kfze6L|CcoHBLagz|52uQbX$HIubd}zkpqUIQ*e%TL}
zFd}@f%pt0apGT%z^8rvoL)iE!R>)-{$Wz=)(hc)MRQbV~v?!BxFrqBWqo0`5G_gi{
zGK-Fg=rxrYhaB!J6TL%H5_!sn1jW)A1l{5j3Pe$nQ-wcAw!;;`4@bSSXadP);Bm6L
zJX(2DCB-27K+jzo;&G);;xL|?AT3E-ghN7ino{mX5ci#@ovbw6M^{qIgeVJBk3|Vb
ztZDe?G@~Z3a~DyrH|=H|m#16;wO%iO<}UvHc!f}X?n}i&mi8r1fq8OHjSiF1ttQC*
zHbryp7Vy_%Q8Jp?d(H*%5-?tQ%Jb|&F>+c@f-miEmuf+4STs{+Q6z_ToYAPlJr-uJ
zeb2Z$N=W(LwO;3RxbF0I-nYJ|68evC7Z3wFrFA@Ak0AWr&|?fP&Z5m79<nVtq<@&H
zKsq<0dTe>Asef)K;*zWMG{@uSygU5C%kVY8+91jNE>-q{L5iIhylC_qb;}YEQH*-R
zEc+@5@lTj7$WZ);K!(nxRLMWKfT!qjTjgGN9I<Mg#mu?H@q^xkt(Gzzs_+E9NSd|p
zmBJTNI-8^ndbI6#_r{M4$3sPB!G`NTkLWv_%Im=0K}&&%>{wUs*w@Z-4@Z<_akqm#
ztu*1qn11Hj#R{q=Y`HDjMimybvg}cNiy~`&d{l$W*4&g-h#M*{&(~WM7VAl{Noi?s
z46Q)n8Arl+D@W<=M-7O6Jm?_@Tw_Be{_xRh0qPgftf_znYp%{=Erwq{{1!BGU0;nr
zb?KTuU;pDjk*`K9?7*<`E!PH`IVlOSb^`-Kr3)Q=(<puRu+A&e8?SFc8%$$j&>MFN
z{Fo*IPh30msF9$RHIDI6*zsPnN4v|mWW7CZ3O2PzA}%-RRS~B59Zd1rj|-MUEza{B
z1r$JVje5xh+rhMZV!3t>E(dZCL%f|Lv3-Y;tVUBFi&^y61ct3=8QwLL=l2eA18iu*
zSRgoKPOD#ILZQqi0d5!=w??hJ$k-7hog)rXT7NITZqG!LLl>7fqj)^YIiJJY&zU#^
z4R~{<XuxI<AZl<~kW^84<fYL5#a8Xv)luEB3_EC=`q&re0tL#O>Alaf7GFeRY#jyB
z=Ej%o1vyQ%B2-#R<~O!AAw%X$Yi4pAt;5v{Ao{<R&mj1~SzF{VQ_uHK2LDmjKVMnD
zfUSCLr|9}v_JWo4RTgO}vmnV{2^0M~(?L^kl_T%!(K6*m85ae|D%@tW96MLcLxpji
zWd#r#o%)O951vmZ=PaJooTufiUN!I=#5y;IzdWGfL}~S)Li{S{F0h7Kd)PXwXc{gY
zwO9Paqnm@Cg|b~4@?`wAi+-z#hz=0M6T~N9wt(8OBzgFuO(RSfqyj4Wf{_#+s%mSJ
z@xw#R)by+7l=Bo^GqfQ3VhA|H17HH>cnVnj)A@lwnVJF%mCYehG)<k-2jT*XL^ne6
zoCJ#&=mKK2&C`u_ox<MuK|o;9%ACHwL6@LDI!H19y+Oq%3gDL#^reHTzVW+N{V%p=
z7*m$55(I#}CPLU!Jjj#ukJvt|jlNu=yrAu6P;^E;hlOsVY6kJ38=}2635v~Dc_pS*
zl210L=tOX~Eea%_q=yZf&x{M7`PIb$TCGd`YGB-cjhef5rlupA6i2lymO>d$blDp7
zM@|WEWSpL$mqdTyP1+Tdv+X(tO?t58WDP}b7Vpx|O{vguf95Sv2UA+tv*Cpolrf9<
zx4RI;kA@7zwm44~eNkAi!ImjaeHBUMuK-rTHm9|}$2@_4Eim^zl)-ki?M}r(wx|0P
z@xX|AhW`v;CPz>3aF**uJ2XpE;jc!T@=F$cWAWk_=Q(!6m#(?YFz$|-4zAg6smeJw
zp6d@On;^Ffd$%%3+dw_<+4&c)Q=AQyKGb)Yvuu9@GN$Arg>n9Iqpn?1RO!OIiYe*x
z25w1wSQKSU>K7Y+o$j2BXyk9Fq=6`X&d>F5f?$oqVzrtVO#)2$6btQ$iTYh+2)rM?
z#6?Q`+_oG6&kcRSjO`r~j(0A-6W9uxyP*^}_-{K@WZ^|vXPI~pzjJwBLc#A)jc-A#
zr782efrj(-QuWPa^JK52{$m59{UpexYdZC4aE*Ois>xEjH{m8|h!1K<X>^>F$1m;7
z_v@k-i%K_pS}FLB$$`4jZGh*3WOj=X?{gqq?{B0jL{IpKp>`XkKVSL1!ggYCZ4DTk
z()naTzn^2@yoE;Aw#>O5Mkn$Qj1>K#z{36fuc_AN6Et<ldw!d0_CR(|8_K)nL5QuC
zhooE^tKf0>{^ZY-MT#arAI+zk_OjXc{XplXih$^~H*fq#pI)Qryi@ifOIUN~S(0)G
zbg_F*1jWI@pooR-XTP^s7ZF0ZaGztFd3@!ln{@Uu!*QQH4}`?!u;8wK2R<XP%2_N!
z|3$9*rJ{JYaenpaAvA|}<9b*)(O)K}e3JCg+qv5OOVabh<qWF{kQcho#`0w#bDIJZ
z_*^;(Ob{P5HZ)}j<5!eq=sC2y(*o+#R;J!YIK>}7YOgw{_JZzaAdnp10EwutX%ho(
zpAw8xV^WXg^T1u%*hKk6OncfSs4fdO&Fd8bt|=3|NdNy>d+WHUw(oyf5CajBR*;Zz
zfB{B8L>i<Aq`Rd<K#8G26qW8qhVBySMkS<E8b;~v&gYEx>P6vtKi}8$`|mL4aL(Rq
zuYRw!w{SI6&U|4PI-n`~@J+LjjQf5ug*m-cKct7($p`AP=avz)E~zW<;0H)`LMpA<
zQMAI<m@xi_4KikeQ0wC=x#oMVV}WR;2iJmL`|6El_N;AFF8(^w0ClHoDtVir8njTS
zuFnL{tFE;<=3Q?YDvia&KDA`W+V7c$3TWi{!SV@pL3$Cand<X%w3wAT9@Da|QLI;}
zCq3D2yaS^pduQVGe7$w+4DsS91;i`n;9?%vFXHtZWcdA0K;(?#T`oYVoi?<t)Qh9f
zJ(8DhY{8G&Bw5Ae6x`7K#C*HYQD-(WXy@yFrL2(GrDpoHBN)IzhLbNftgJ7xS<PSJ
z6t64Hfp>L$e909)C>LzQb!yWyWK?qth1)Pz<J5R&rciSuQyaw^Ri`)K^f-?kn3Ps9
zg0-858Ovb=ekTo*?-e8cSGe*-B=Qsdi5^$zw&WF`cniu0K?T${UXk6mPD?9MkyCx)
zMP&lOktI{T2amB`h3@y>90tk*^5<ubId{<owl>wc3!)^X%D2@|?*kNa2iO6{Saajo
z1@^+3aSUyt&7k}%=l+k^j+_)J)mJLbg3e8o51K;l3uMmfFP#PCa*bD%nJ(QM(UM?O
zB5P*WFiVYJ+?hUT`)VMRS!udJi_Q6%#v(r`u;TKgQ{(^^lkE51{>-Sf8*LIA3(f9v
zb|Z_RM&$T{*_~Z)cb8AsJySTrq2$`VE=&A(i?c&|GVI}Y=_t)mLe|>i`Bj|Z<#Nm|
z%%y!2AXdMfE){F`PHOy8Rz21<*=D#m>D;)NYI;O}r~<Q^;nU`<WKdOfp($5*w!*@M
z=WhVp_vt&70qn(nh+DIHGWp3DjlNN-nqpC^`A8}V8hw+63H7oy3rgtQgq)c&bC`nq
zFS>8ya>SQrOVuIUZejv4z4{AqJ2kX|_ii{m-*WDx7VY<p^2lV()Zf{kvh8s6{fxyr
z*b9E+=of7>uMP-?6+_CU`z>>m@J!%Wdz~|==mrN;c2KEfw5Px9C*P5R3Jdb$aEnRP
zsm{OM@eMUIdlk2bqOE}Ny(UbBK8&mPo3{?i<rzsGuZ>EReNOU|)a`J-&@v~*MQFHC
z;1B9mbX!@q_0Z%gX`Ef;`8IA5zSkgS_TIwfQg#nosOBgdekKnuf3oh#T5euyVQaR6
zj!KGA6ui>9vc|cYl)fPGrYx+5fuc;RB^%t62?Yuss4LA~;zw(LVX*HJgBv3)FqiNO
zGx}dPrYei73HPMaFdN7!CcYw3xkSgwUMD84V!l}E-fqBF(Ws8|i%RbBywGA6>lrHP
zVSx5Nvj_afD<wllxzO}|UJ7iZN|*ZXDk`gz>;ijCc#L4j=UZ-K;a;x8jqr5J{2b}|
zRq+9(qXmvBpe(0f7YD=Kwl3Oa2R10B(4%mSXOSE=H+O5>m#4lPQn^K<ltnqBjxn3=
zYb}u##hS1=DnlZIfrV4KO*5UZbk#d=-3@1Wnpvr@r))cE$$ZoPN2$ii=%~yS;INSi
z;j^~HIjIwX<>5;ueAgv8iA<>GB!`i>R0<k60mdT>JIWy&ZHue@3jm?G0d9FQILvs)
zxxo%cdO^1_)QV~qo?&u3YR8sW-04$_-s=a;ur8>{7qE`G%90mbZ-)ZT%NVX;WjiI^
zk+-_41AOR!@JLky08@!B$;6bVV6$2&zNjjX+J0MrVl2x3sKNb5-Bb@Ir*WK*h9adC
zm#Z|Y=GftT8!A%Y!S55O+IO2DdI8oGyV#`kVel*g`;7<6+yO`-m218Lm&51V8~d?W
zs3!|x;1RgeUnD+)Mj)zj!#2av9`TI<arqWmh#UnAL{UgeCx>V2hGUi-cAZVqs_@EJ
zpd1L7YX$^>;W>=}pd*$IpY|H?h1srW#ntk`lUW9*t9o$^7dn>Gr3@X~$ol6gfSiKE
z*e(h~kJJmt^32qJXD(0C-SfsLh5zqG(U1T#8H}(`%8bA8-fy%2{%1$%1)bbu#g;(`
zSNL_FZ|0qsmL@%?FD~dkU&}Y}N+B+NoHtg8qa|IUBW5(mI!4Sf@C9KmIDIgjJ{82S
zt>{XXXWwTKH2?%f+#<BmowfxbB<on%BZ#kek4dM+o*t9g`GE*V(=83U;C7DX_5{*8
zHBccg%I~gG4p6z|n4gq60_49b<8UIDaI>>q6T@X-!)6IeOAB+l=4!M+Dz;ST-ft@b
z{OOj1)GSKXFSGu8*WMW@Y9ohd)qz~Q*J6@illS+s4GAkyT-tL0F=xb|ZpBV)r)gJC
zb$}++<FdhcMBkd+u@{leoPz39LCG73fEqIyS*?m-LVUw(jybYLUwOjC>PjJ*SBRHt
z(U@M~M8SeXVzV;Jb{^)?E+)O|H?4-%z4wJ#H^cKUaKc_i9WdNIqDLp$1g-kA%T!9@
zIM={;H8?DWB+}X!j~#la$!wDLhO0pJ9kg&=2SlE%%N9NQg3Ie^<%LDwv-N5Y<Jb8)
z!kWGRg>A!57D^O8EvFC~2xNoKbj${gz7y?=4)-ERyRO?o#kA*Zv)Ou2)^u9jdg)8#
zl%K8ZEB8gJfCCdS-7^a2b)7HmgPm;osdV9j2AWi%gw?5*dZ>|UEN{4#7GoT}(*y%`
z*bpn}UGrY7I_9brv%cPf%d8IpbXLxj_QMM~K?AM$sqH&3J&s~;wJALYC$>yp+6MwN
zHDsRskiuj(difNM)C{P}KzES(YQm1~uGjqF#wcad+KXc^m14!NxpkwuB9Bpvw`G0%
zB|IXL{GQZU;$tJ#`cV~UNWh5j^DA^CQ;L?iPnwRi1mHPeAGSwJi8OVblls#SQM~j3
z<U?K3_bOwrwvR|cGOwYJ*C|Uhp_fGp2;Hy~mvi8D)@=Z~m%UcYf@0M@_Ci5op>%;=
zeB|CETp&k@6Y<n<=QWA?UG_R=IUM^fW-*4?&WR1P#oh^2uLbQdnR>o+`o3Ge`-_Tk
zs*;HU90%GPrOiDy43Ls*fN#0e0H*=+)||13VXD-Y=Nt8PbDCN~#%sU0V1G@?8Vsap
z%Dya4=$w|0lS=%%u-w)*SG#Sq{U~tay<t@0SG_M!(j8Zf@)DxC#t+9OgTGQ);P9Dz
zQ5S2e9_Z%fz3w`GQ7qe$mv^CMmdr5&1Sb|{jK00Pd;A%bwalW<H0dZ1mn!e;mhis*
zwW4ITsax5bn#ViFI>Pta9>i#f2`{sheHf2f<?B7p4;R<e)LA{>W>}O*$4)KPbndj5
z5QmG*%z2t$6#pYk<xs)*uGmQbyaU(ybC);pP{2h(b7Y(<Cd<6)0Gd(@1w~a@muoFn
z$^l@eZf%B!#yi)Q04;|xMP`b?NLZKGB>NP)az;X9i+7Jh5M1CxdY_$6DN3^h)Org)
z16rS~)0^GQ4V=<3&n9X;nwvD$+Zm+FG-Q*NF059{3e(JYan^l<L7Jc0*~4r3Hc7Gi
zPFN&tPq{HIGk4Sx&|#XG;HpPi^20V9nV(Gyf)nP*xS(^S+$uoTB5jy{*;J!NQg2)M
z;C>2pVs^bYhrW9-T0gzrx@mE6a9aC}%J8@wC|XQ0GrxwTry@AMp({o}R_!7BqjG+v
z9!IA4%%mC?fOs|?4Jgx0cMrCwII>*D<n2k%cnr!kj_1jE2>m-N%v3C&zEyvxPt$hW
z<<XXL#>>*jZcoaYI^>Hl`hy6DZ+pJl_OR#))VUf#u3FbdL5I>X{9e85OEXoad6h{6
z(JieOw_aHD+wS2KX(WbzPi+IPx$oCA;+j?#?p)Vz?5ZH6{}PB$>atW8E2vcZ5_Y3G
z`z6-~U~*}tpJJY+PqvH3I2h}nuVY)g+zEc;smLQL$Wnxuek5KuTv4DM8XNZZ0gw*K
z(>g1AW{Aw5<&R+Z{*gi(cTsdZMgbp>x0Y?;D=BPJ9WaS@XhEAn84jbkOYcz>JnM4&
zWKCX$1mzZElnkKXPH)$I%9y?CKuN9T{08e?a7qvv_u#jA?ccYWLHax*$P?f?ZJ)27
zp@II<JKV}O#m!8zF1-?+)`^$EKhC2%wU&VKwsuCToYz8CI#W^o^td_{E)=Zr1<94o
zwd#)i^PS(XCIFNx=;eq1_5uF=Ykae)Rb6lHqW-2&{^d`jNT3d+Kp!{c!C=OIMJ=}V
zsr~1Hf20utd=uG%ng33DA_>Gm*m1sVf8SJ<M+%(<0AF*cMym_mvHv>U@wbqhMu-oR
z%;NkfljtT1in~d1<Rw|0WdISCtA<R0O}ADOE){u4@1@dcmCR5VN{ZM>POrxmwpq7)
z@5JpeLj%lEd28Y6-L3%+FvN>{0X3`uXMI0-_pHbs_>Bnf?!{%wqi3mJ78`j9+oaU0
zcF&EX7!^5a74=>M7+T20+RdPY8K%^cuYy}Bp-I^N%U3#IRH~0;MSTfUw)O}2N`GS0
ziMsJA)!`Z_oQnp?`3~4==Xwie-ik5<h4at@HJ_5GBH9NkV~BNPovZx<K#6tRW<=m4
zlY`Wt(T(4*TL|Ix{(Vcs%E(PBf?-<45yG*dO*?cFr$V5*5EKQW0l-_x5AgONPBc#a
z1yEHk$X0vf9xV&k{*uzRBF0u*Q7e$?*<IBgfGznk8Ck3XN%b2*j(^m`XHQ~b>}@x*
zoQ_ato%}X)p@|;=Eq$#JY#IKx@CP~J10$^fG(h4{@@xh^31`|-w61WXlb1nJSAT^w
zg=9NF>OORqr>Ii^$~<5s#q{DWW?%)KPv9Io7lC=!haU|OM66k*mJC3807EelzYo3x
z6f;Gd4ROw5VhGBG@_bC?p%Sjn%h81id<Q7mtly)7G$<OSV#8;|c?qo6Wu*}HsAk`$
zvMjy|qLXbOtEWmTwBqP4(Bk}sYKj4<Ca+~wQmQ&Nj6!?>yRUq2;{L+EVKuYbSYiV=
zI$ar56rIfV4d5%hot<*%vyKhs(v9t7)8h?nJkAW#3lp_&gHQq0eM(wHNL6EXA*)}}
zbHKH2;t8Ok+Klst;+{ta%rq-y2zDP~!i*Qsrz=>gIYOs?;#McTg3rhuRxiCgo%Z=h
zf@a>L0(~B0rO=6vgKr6hY3Xz%{{r;&UhS&@`W+&nE@_pz2~cy-62C>G8K8xtnyZl(
zW+^@qx&CFhcX=jbsu|(*>00R5BZO^+^8^{VXKUFZ>xRI!fIShv@QuM@44c*3!6vhP
z&nxQPpDC%WYnQp^KVt%kHu*`S<^ExwM~kMRpOVJrAs#V4@=XSS6~fnYl;ysD$l{qf
zL46-^B;t}$6|+=za@7aAj2#wscGQ+8;G`HIUa>J~yV|^qS`z-PnF>%pGs-BHx>D@}
zf7BB77Y=Z%fCdCE8Cgr{l226(R*2WS7Vav(q`8<ghuqlmNEB!70NdW(cUK?%*bBf<
zQGtg@vgoqpZt0Hjt*M)aAf6<}mMx%u4vE}ZTS5)8a$NMQ#)iXd`*-6c(mre#!NAsI
z0CuNT3i1WTQW1=(85ynSX<MW}*e?wVma~+S1`3+Th9)D1ukT;BCP5~mIkvq*DW3Dq
zqpu*Udh_*89Tzbu4Y<8#9?d<!+kz9urh^%!60wBCvjpq3{=_2V&@CIPZ0ViU2f3c<
zassCdt$3H8S>0)o<u3nFY~hPmDF94vB*%JKOx$DITZW^vFQX`YqsrTOEC!TWfz7Y7
zOFw+0fec}LrY*#tEJ3(ebx>EamI5ZmF7@29@E}OTL%b6JLUZUUrVS|3=qF6KN4;wO
zA$<sFJZ&UKS6w9%b6EpgwHkm{sj-cXA5dNB52%i%*g0$jYiVDD>EaSnTmc_!>Jfd3
zTuZis$&yKD)1J2m6ZM1L2ESy}=O+E3nsbURO<3K(rV>vG1`tOYAkT7<c<}lUJFM~z
zVYmaGS=6Q><&t_J%6<g^x0EClYshZ^a+*Nv%uP=qj17rBO51r&7R7C_jE1$BUaNrm
zQX#(!-<BxdLlA!ST3$UaNZ_jLR!Pb%kX&O-ue4ewnO@683k*Ka4l)=%ZqcbL0!s37
zy(1bIc64AT0Jw6s&Iq7xm$oru`Yl>aP3?Mq!vcwJhamcbRVkJH3(5HGk>M*b>UCLy
zF@n?T)RKiE^6Eg;6&eTKVk)NHmzQtx^`?r~+%iaajEIfX0nJhojLzO_KPE$XeA2kx
z{5}NWwu2dLTdp!0ApIpj*5pcMJ!69tH76;#md-ls3{ya7h%Kw{{6|-ux^kOBpJ^qo
zr#bE0|6>v3%q{>~lgzo>!Q3Fv*8R|0`85G^OQ<>MZy*gQr{Q9wkAhI?iyX#+!-hnW
zMR1xqt(fflFon>L+kJZD`Xa{)J}$cN7h5Ee*cocQmqh-IEhp0*HugGJX=)2W1NE<^
z1m#jQ*UM$Sr{>Jr28UYn&*6Ji?N9=NjS7`aCJqcI=xFRw=s9b62IIQ-!x3+PV;(71
zyz;U~E|$<EcNnvr$!6y6<5gddCfq+5t8=KWNcYp;+~?sxGyMI^?LNU}02gb&38W4f
zP5@(KIEr*r9C-bQrU_WX%|LxqEuth|n?hhEF4l9@^~Fv-oZ^ER;q<j<TjONIqk2K^
zh5=Xl`-0W?p2iF0M8rJMRl3qR0<<pyl@sJ_6AQNPYD*MB&9R}iFE}j)vWs%lO?y%C
zfgUgA?r3UHtm8%N{4v%b)$J<ip1nlNAnPNn<nIUbJg+c--vLpZ#`KZNZr^^vH;s2!
z-+WsAM&|ERikn83OIa<kD*gLmqkM!o9e^7z+U^k3M5{OmAXM*6lrodtUG1;kd<nAG
z;Y3a8;dQR+7rR;-ZLpJk-~*y^s$g5C_!|K;-%i+GY$C0DGL=$Chcd+^r3BQEuIrFY
z7U|wxwLLCN>}+yWJah<6(=9V#-CLm0Zy32V2C=PMCkC+O#v<PBU^CY_1{>RDa3H8H
zq$jm~$~Ms#sLt7_gicw{pg#Dw1I9a0ff!o#d4u#HIM;|6#rZ2~!kfL4JH`JuOiVFZ
zIw8l7$#)|q83;FWB61{SYu}7&Wo-kX7!e}SJn|Kgd}l7tJ~qEon{`ynV)1lYJh?B~
zo7#G3Q|BtTvLFkWbFG9I{?M2&E`MA;y0WE)p&HISGD_yCe16AjPm`gW^N>S@{P!&1
zh<Z(5Yp#FuS-!yE!t2IJqkVO-Ip5^bwiq8ZMacV`=j$EI<`0R0y-P1q5h5Vy3Nci7
z`Ke>?y%*9tAj`{)zGo<Ip(@*T*J|Dnexy1nt(28Gl2rioZ>UVF%9*m^EFa4?RjnDe
z7dR{j;&AK`wl({hh4vUiGSK2wg0dVmvj$n=M}Ps&LftywtCpjb**!7XRwmqwJTb$X
z4hc~73Rr5TmeJDxc1loODpEzuEpzq#=)gH`-S|KR9S;w$<S_o<@H3ac@Uw#GQkz#E
zp|I@qLgLl!qu3pglLk;q+W)l&wi#2iq-CWEOWk-a6m&3w4m2l3L=Y3QKJYt#*_yx&
zl6kJ*ldhYyIZ9cioj}>lH#QEftpLBbnbEs0sKKG=vJ!$8%^Jicw5M5kSSgp~DRjaT
ze~-I5J^}s1v){<hG5&iZ)2)prE)-u<+Ss>{NJw5fuU)sUNK}wIUH<K^#D{KS07Cv?
zm&c!<Qh<gp*5iSni_OpvcD&L`MzE@s76h@81Xc=(zL}cl$5>0uk#Pg1B91`8%{B1c
zJ)jdxr~09sD|I@ng(%><)eY|%%jnL!^NCz>icISY6#qJW`2nh2Zh4cW<TD)kFf~WW
zxBSLSXS5OV#RWXqq!B&qP{N;@aYhscw3^^c5D@*fvxoK|(cko)IFMGxBn3}BPucgL
zoto2CNy%KiS3q(++}&xeSTI;#b>Blda8q`BnUacm9Ve9oN}IZZ8BKvtSVdXY*$~F;
z^QV&yI^2Pt*{E}4cYl|V6C6}f{aj_gbUFzrR0&dLZO4VMYOBm9yxxIvYrfcbF8)yU
z!MU0e=)9rYoXyAFGCp|zW#`*L;*n*ifU~*`7$3L%J=9?Ss(EB#!C^TyppJm<=^8k)
zQ*YIuUB>73zGql)ft(SEgNha{uW1Tkqo^9Omcc!*&VcHF2I3(!Kyuqt`^*6PhX5B5
z-pvfRmqaFUsqxgmdKL@ufr)8=${rI-lX9uWMO>i2=Lr%CtL*bQkWMtKv@Dq}Q7LZ2
zpEqLb(;3WrY`!Nwwl%-0<xMaLstOdzxI)|Dp`{?!>4*s~!Qu2wUhqJOSvJ0;FnF&`
z_TmJ=e&=B6iW<MD<vRcPKOPzYV1CHf#9iN*Ex_%kyUufTt+V+O8++vl$mwA5(H6Le
zAI!?19L~mHm(eaIJ*!;&uy0_L39QJ_=uZ|5^iM!rd0&hBLdm@<4-ElK0-ih*Do}TW
zqYfAJlWf0_47ibuA<ds`%_4lptshh3BgO!#IB~%-*IO^^wd;X!jY2;G`NLpe*22VX
z67XNG^g8Xc6-A1y93t9X$v|8Dd&9ZUS>PUr1KF)m-g3X)h8T*ih}sujI0{Zc5S@_j
znvo})vrA;skI%nWyfJ929`9#u#gODn1oYT;?)wu8RBjxVdjp9>LAZ>9jcnk>b|WPv
z&&my;7xAcA{_Vb((FdrKR%JV5kS2FKe&QL&aQvK5GV+XovYV8@xV67#XyURjWEDiv
z1=dQ7T`pA{_|_+`SI=f8;JfrDMHB4C-EOrhma9cxXuG)RrBCF5V1lFJ?JHYO-V&>@
z^&af5t=?A|Bc94Bdko4>Qmmu^(hrg8QBHl1{!fDpaWIxm-sdXbmj;pq4c(C4Roo@z
zOQofRooit!vll?<v39vT4R614gzc>Gh%q=0;)8nt&kh1Ov($HwB>En6_w;=^>y(k7
z(`&`0IVj$5jrNxLv0gv(U?<j{FB6@L*S?7*Tm!T2hD!uUlO}FEmi!2P+U^=P!N|BL
zJBmUpl~dPY!i6S{hHSw~!PrW{I{Hm!Y5><z*q{RRj(=VQ2fmED2$2OER#jOV(}$NX
z6j8hjQ7t}C!sOkW$QGBTR25i=?8Dlw@@y9x7(B{#<;{<F#*%ozR*vZLBY3JL1?<Gl
zni)-0|LhOm1P$`+4~l@9KOKa<2)XWfSgh0Q@{Rr3j(fQfSxGpMg72Vv4c%2?5$g6G
zey6;u>ItDy&jY&2iB>UV7UnG260BU7<gFTS0nIQ${S~GxtqbMcz3{}4R-jY4$#3_K
z$CAa<2v`4gs*+<Yt_EC;WqEcMW2uVD5iSfGChcSjI5yBk36CHVrFX8y1!kp&>Zq~c
zIeiIm+Py7$WOg3|{V#K8)*<|0Jhv|7P8cIT*ne244$>b`T;#(4ODN7;F)!aS0z0Im
zAUf$lPvJ`)p(P0mP=WEkKyi|UR^t?67DqJI<sl@FneKi&FM(PdxGro>N;_9P>0qPb
zOG91E<q*~N*Ada-b`97608_XHU2vlI*FK9yfBx1XiP}hwCHkXP=q4*AxxUCKO}s#=
zxWmhsf_L%-e6N7&Y)>He&a_AgROQr4u*z+_XIrrkrg{Q$n^01d@x%on4gG7TB8V1v
zhB{89eW1MN0UYXZSudgO)|Psq?Gi%*j*!=g)2CEuF1XjD)Yvmg0q6CosXLJ4OU03U
zaUs>CrRrhp@}q^%*5-u8Esd%>=8CSePhWSq@#5rZA$tJRB#M<3JgYXUWnHFN5{x-r
z-n3R-il<eAK1D1;9S$8c###+CN+f^{CgR%+A1<?IeP!3lWeb>)d&kEA2LUibV$=23
zXO(vSlz9?HWaDv2?BUxRKwEW|(E6J?AOW|&cB=(FiOrRM6N?NSl-BBLj$q*2gXnCG
ze8Kf^D}_>X$lH%aJ}))SldDTwSV0&(Y%Avu=Qi9JuqYjS#QGOL*OnPy6&4vZvyKSu
zS-Xh|bW;uuGB_p>EGciAQfpLZeW;~ybf}Ci+lCe13cV2mR1SDn9<b1I&K+|Z_Pz+E
zi=2Imz5k__z<H7mXd*-iLazNGOytW$<#bWa=*QmpgC~xHHYigwT=Z~mDVm~Dzy02P
z7EqlnyZyBjD6O}z-*1Sy^Fb`!ZVX88Q8OJINOAo?K<79dxsJBTsJ9S^Os7!2`-%&G
zp=rUN7sx~pT6ys}-Ch15z|!tONr(`6=UInv{BZL)6l=W90{0pu!v^=U!u7t;heiY#
zS@OfG2OU$>ngB0Dp}b7{`0MTJjoNHIK?;_|B>ZNp7TkBT8OuT!aH(I&<*r({V=5cS
zCgkS^e9jLC@L$I508>O3g1pndIw(p_L>%a#>B~zYGMnt}uy5Vl#3V7fXRef(?mF>6
z4Cn-=XIb1R43g=)QMoOu#Cd<4P<Frl8IG2nluYcUeDmz!>+(QZw)|QrLc{8hX^k+9
z9aWwSEI;}3EV?XG2553iQNL}$T@ClP*ol`^zyw-Bjn2T?2o?SXXG0Iw`$lY&bmROT
z4?vB1Kx=^JtZ?g1W-%ZIGMIO<fzT?haAd(MikPse7hziWwknNhws2W=<nT>~<A4&7
z>ruj2xDV7oDYry@X%i2!f!hNmMq={RmxUC9`~k%L#`$eL=Nh|C)Jl0Rar)3<UEyAD
zq(<ngny=*w{-M5qbs7FzOE*OzbDXSjBl3)H%%54HK?QOZNP5Qf&}N941M@$CT3^qF
z;e`yycR;O`ehW1;7U(r!r-NNS{KF^XY%Xl{#`q2V;gKI~_OFjPL;*a?z36B4r{g0}
zI1fKH^NHX1z({PbDNf>yoksr14<~qJ;~tP*a2O<+cKB1a`7FBs^P8I@@*wq4{>W?p
z{PusnnlVNm5mvz2w)yMkZ6}8M-&4w_2SO6?OJ<Tkgi`<UmX2;TC+@`;rDq%r_0O8d
zRenc0D(w^J3jo%E6&jOwBwx5!=#VL{h2nh{qWGN=jpBhdXF$_?@bAMTMIr6l3fS0q
zj-QF_Fxaxbm{ZM(v4W^va<VN&!U_Kj;-9NH8kEX3P3zxtV*fnwKU~G30DM67{PtNT
zXeU_iEH?c4oAg^?es%1f82{JBKQXKSX&lznZhbA?6G?j6>w5xoRXGLLy@PUS5|Kl4
zjG^;qbj^Jyn?nLp;#PC4TK|=&S_zN^yvAGF7yp9^J02kuA}MTP&HqYtBLNig9Mew&
zk;qES85h*3mRZL_iDehUdi;M)<Le>MK@?p%qmbrruH_3vZ4>CFO2<nVVguf~ib}^W
zbTgp8Z=Cn?xw9FY&z5k-W*cK-?`+mVa)uFx8w89Zfw1JpM;ZJW&Mr>)Jx3%QW9+XE
zDW@k8Cj~nb2$Oesp_T<lP&nrf9*ugju>5*j-3b=q1K|PM0CyBG?8l~Ur|8{bgH|OW
zKg)q(kUXNOJ!hjd&`2x!qRG;9ncC@o`C|6eM_{yW1Q@rtDuJCQv*f&EKNEoxoCih*
z_XB2GUFl$1)V+qsF{Aqybz7-tY?lGlOW^}!m>=a-{C8UZWZUO_ji|GN6S`V=56nU>
zWfj-7JL^rN4$|D8A?V^E8fWRnvUkr%U-h+hoptXNq2mPIie1?3D)m*0Lmp(^9LdJd
zRa;*+tLJ-t){<HCBTJ^fKGJPGMYa<}{xSFhLGl=nQ9VyVHD(bx-7g1y%^Sw)DSOXu
zogsp8j>sH2_Y$SjpF;H?U*+pWHPaG$HAgFixXN*cIE(;ip@*7VQkGu*7p?S*(t%<d
zHPg|_OVLhBcKp`dh<t8_6wv<G{`;R5&LYY0zexx24JpXPJ3z(QUbOuf?+=OqI5Zp5
zA&4Fp1x^wE@3FbbUKmX9y7EO1T8Xi7hUzYR1E`IWaUM(j(_i}Mn?J5X=(Uw(I_c+o
zgD%~F@EqvH=z97K9sXhtppyZZ?mBVH<iDzd6Lao};A6(Flu_XJyx*JJuL}@S(d!KN
zmv0*h_w0|z<y`(>rk!v>H5WN;FX2?m)ApzX6RF@{vxSQPtAabJwDH_}(-IzThRD{=
zo>4Pn&5LAd1ox!ZDcLxKI5c8$lM3=_a9NxOF{kCr%^aC_WUEO3t!DiU*+7yo`AR|3
z_r?8^!h6On50Pz0F54mC|7n(bZc|D~wrlLadSdcFrcj2o?jtKHmSX=4qudjTV@J-i
z>y~l-iM9SQjS(q8x|VFEN-6%EgHh0;sHK6!>oZdCVMOtt{x}~SQXvzM$}|3{0O-fk
zxJ_LNkwwaVtAfF;|KB7VK&|^uvi!iCXLaxjAQ|J^qsgEjU;h*58hP?DeJ2^t<NC7{
zl2lFKA*1K1r39eCTlbusX@DL$uSy*`y@ykMboAmEB}-F)&5jyObcB!gqeqwV&N{@p
z!{!2T5VBlq-!NU&*7!#y{>_k&jDlw{B^(SeWqake`fnf}AKUc~L9!%M=3@5n1Y=A0
zZ^7(B{)Gj^3R*bwUKbNj37&>pO-{lX#_Q@t&yYzi^e=L2LPjTI_m$%TPAoeyFH7h4
zxB23l2@?gi;L8+rbV0E2IKrN0eE!;VFA@@z4!v(GUK6`&hv_k^iH(nqP1&SyJ&YOy
z6)KHQWk|L7zIKG=c$=HEvZuGVWuql>iN&gI{cvj}XOGVMOZOLUZo`bS+viX&^PNMz
z{+w^CLtgaz)j2fsA3p+@9dFPW$y8^XRI_1{tB-P9qqikCYsa=fk)C|$@8@UmQC+;D
zg^K%AL5trOX~mg8od5gnzrM2u6~F%1Uyv_|Ye&DTWFkU1`bu?j!c;0^P5RsZ<4f0{
zpL!Cgx2+ZuvJkhYdxB>ng>HQP^{GdY6Zw4k9UnF22LfPnzuxaiMe}m?zG5LY+a1rJ
z|C0hgA4<IQwKz@Qq|ky}3)@1<9)98WUnKhZz+W$P3pj@%|B%T2^b4G!aUI2JD_bEv
zB>4A;Pk)~Tbv@R3Zhb&a-p&zf8zGDF3&sB?<_&)|<v3<ie>&$Ezke8AB(CK9`_b7V
zX6~N)XL5>_gYU0)e@%1h=J&Pcdwziq<?4&y33JZP<&N~{@Bhk+g+6%g)Z)z$0hCXc
z%_LE|8O2j=8Y+u@=KE_(C~8ncpV>T^z54ro2|d)2@3fEnJnmT~fE>*)vwH||P!qKP
zIEHlZ^69(Zd5)Y4czM9Z$EOyBFBW%ZN)1@JCbk^z$4vsxe(pd8+V5XDgKz&HLI{#9
z)O-enzm19foZI`$*2@fZh4xZBa)&x9iw3->9yvv;f97@g2zc+I^MMif@6_PaMn@g7
zL$aa@D)`!|3H?HuU&ifr2Vjmav}<Cg#^L5db$9PObGUFV72N#aXYWIeLXhS|?YxC~
zYAQ4yHAbTeABW_waL~wOoVxwbOn)0qJrnYk<1fYgei=OQ0_`!JnF9v83U2)|KO%?I
zjP|du<N)1GUrz{eN0P>k6m&Oji-fdr^<LewuamwITc6*SA8bpvZrRI8a6k53L)o$a
zZEl)3H5Uh4k2-T);&)mD{gEQ!=7+w+K=(3q4!$T~GbA?{FQoZ@A-)eF_{=y@Zr|lW
zf0AX6DYhIuCDva{@zwNTI*~CqX$LznH*2PMP-Ykop44@Ns))o@96rrz)@_CFMc3Rh
zadFbv7{en-+t_}Vym@_w7RmD01gzT971tSM0lmp>bI$mb;GChFe`sAw)bq|gicwck
zE>D2o`Mf8k9-*n!&|!)q*QP|vczS}A5z1%>1fdn&Q;W3rVu4uyJ8$rj-vMMU;_|c0
z8@z5x=@@LH0zrfs+P^G!4H{By_Lvk}sow4w3jxJCrR>!y?ux$ocP>87Nqk-aX=UYC
z_~WY47viTZx)2(1o4<U9j}3jo(Hpw>mxW;47T5UY_J4c~_YLs=J8gL>sVDxcQt0b5
z*6A)$wrxdRV@x#iyPB5^9FRhJ6Cx7j^6!K{LD}z@Zh4e<jTd08@RJtdZalhr;^^{&
zc=Qf?z{bfV7h5~I%+Mj##j(k&f}0xi66BOx_;;nncMY`yD4(Tze|lJZF?@-C2|n-p
z0<u3X?p<?q>$P)Bl1)Bp&#kZid@|(G^XR<yp833|WTEB;wrI7`tKw9j&TrbqCrEe3
zCB3A!xDZxwiuk-JSbROnF{wN`RvaWAKS^r^zp_9=AiLAhr4i_Fl*~+X`Hcv<q};*i
zXA$d#%Nt!xgX%XP6xW;6zcWORfLjbuo8Dvu)iMUI#sg(TY{iD6QyY@Ae8#x5=(84G
z%gJ&{sY$|HQ0fXvhr)ue4b)kcS^inYS=+9G&64{hK*!@;b`b9ZTZog@QJS^5MxTje
z*~DRyDT{o~=Dx5CM&qHYu;so*a9gMg<p^XlFN1jzlr7!<1ZE!H`nI*Hp=Mu2%0aSB
zyiA2dp-dV&AZbZTRT@JqijfqZR4tyLoS$SsP;9CkS*-;5s%uc?2sQ5}>iX2hOI<vF
zsf$$>Y;`<J!GN#FCGu_mg<WD?LrR0TQ6Jb+f8;TIYR;viU<rfGLW$~DDzD7&cu8a^
z6D)d>SHjz0D%U6ao$uq4q3K7;#h)_8mcyK~oO7}ut$|Bni~&`Uk=v7n6b%NW_a_>a
zCL->MYq{RfHq9tvZJgc{=u+q+OBx8u6|dAegqcrF7Ns!naYfy!lvY((E7?}wG|cB6
zcN(dD$f311-Kng0b6Uo!!h0u+&Rh4U^QvL$GHsc?R^Q@$IeUq3vsmP=s+9=AW2C~5
z8$xOhlGjsj<+2qI4^zg9Q)dvKBcoWG{Qu6Y-*xnT^a_#P${oGCbj1~8(S0^HAz{TW
zy>#2VX<5@6_L@XR4^Y}&Jyf!(5ZK;^l=okwIn3U8X72lqPrL6rO<+y^U|+xnPviRb
zU0@KHsO7Y|AS%N$wa2azu{U?#7d)BwY>tNJEs{6r%ZB9+Y>7?oXWbJrY0%7Vsgf~Q
zv<-truaVHyv8`BAo2l7Dg5IyS7D<5-N>q>uBmE(;fO6l<O;os^t9S^(RP#skU=W?*
zR2@w?RK+1No3LvKb>Q<DOdq$S@||u@Pugh9vE$_GL%2avF;4!$08AA)LwK;u^Zhrf
z_x@B}`OZm2axcrqwQuUOjT&a!IXOA#57#a-&Q&yS<tLzm#Ezazk@U4p*BLE`$d1?d
zMfH2#ivCu$B|~MZTNFcJV04|)QlbjF-{MGO6KxTEEhuUrI%KCCbwZa&Ef-E4Lu%@J
zZO{K!yXe3T<HDHSic~Sp9_BE`dV4&wWeT3mCLLvAEts6)$5pH1vEU>YO4o?gPG=37
zzHJR0vt6lOfPUvtU*Yt-%&aOc*li3)CD+ku(OVnJ|7Co2h57n+#$V1KrZ!~VUbumJ
zohI~o+rD$Ym6n?8rTX8^_c_!<K5Z;k=7Yr|nH>^-!$I2aS+3XGwzzj9Y^yrmQMz7|
zEGFyLmVSA8-u(`LoHtsSjE0DqOIihQbfq@-%X4UhR4YW2SQh5oft|Se0$R7=nH!VI
z9Xu%>u_mO`X86aMO;2tp_K<UtAFs-S_IDzic8y1TTby@`3V_bQ(FY^oCN+kxex{5*
z3`|X`5#rtPvQIm{P33PVifOrqh1r)gJL{Ot!$F5_PZG_G+jSo+r3d;>8PMCv-DH$r
zjlj5Bcty&r<8wlT`OA?a>xPC@rTRy`D+%qy%MHQHhxHzc4fiFEbtqg-+@OKFa*vet
zVir<AGnM+P-WRzNIf=Q?F)*XL7kzF1N#SyC#%RC8=J3pfa<xp1>2yB%ludL~F7bIf
zrctrj{HemSdc)nZagAC-g`(VqV*QO|;%yrtEkr7YHCLEQob1L-q6^>-xj(<da147B
zU%IZzZGB8c?)F`FR*@QTe7&R#e7OMWQ7gKivZijN)GMGu@X_Kud{^U+BUqEq6x*S8
zI4(s*)Te+bi+i`VH;h1zNRir{A&*zNK7)$Fd$3PHd%$IW?@~ENd7{^<U=0SzyLvRP
z&drtjMoL=4*<*Wtcp~1>iyosf%K#hCuLNuAjM+tkO=Es!baSbsu=iL}=Z=QQzAzAQ
zs^keIOV-yr(AjzxB!~1VD%B5k4ccSS#LgdE%qn$d$ZD=vRm@QN@1oB)P%*cKF9*6K
za`2N7&~nbMH;VQBiv(es#B88UT_HGI)t#M3{6j_PD~V=P%Y_R<eqz<mv$q;87G`Rl
zDtJ4@KD=n&7%UAeF4xR@n$pu|$gjRF&(1^2*<kO)<*XRm87{dr?^RXhzkVCK4Y+uk
zckZgElVn#@#yAahsA6qdavBr=$<WrIrI2w>W0v3Iw&l22Pwf`^CG+VwvED|ND-{|J
z*&772FN4U=nawA)d<;6k`rC8iBSm$j>Aqvn9CQhl$xV{rcCC%i_KtSXhX%Y@!ksEI
zcQ(0x3=y66OkW<#Me=MKGnz$k3}f9?V`)Qu8~IYI3EB19{IBX4Dwd6dl+t!-OWLA=
z(<w_nbGdC}dh4`b8cq4?)-FQ$id5mVO?v>BjEm{4_ACYMeO}`O;=FtzF7*Jbmupp0
zgPLpM(Be&W+M?~x-`JpfFmF`2Pj%`9gi0%rG(cya5TC`oFGtC(PxQ!ge9N5PS$6G3
zXh#Ld?od#%eiXtdUr8e$mva;%YFicY2vA?0Su&-7@(Ou2x)T+!7q|W5?lQu-DrW(5
zC`^Q<xDm0jsS|U#=aHA=H&2zlTX6ba^<xjbSQbkKzmS6~<(m1L>}+e6H|6Vd#YbP?
z7s1wQE)MPusCfR^+1PMaP1fl!G}F?f!0SA2kHcYpM$9K!d9AT$y$e-2KJ$*85}&`@
zo6Ntm4vutK`;K%MPdkeL_<1wVsOP_!T?8pWB3y=t2k7Nz4?le|z(}QYWu+T0PHYK~
z40eIU+>W8*W*A6;E`~j0|HtD2RAMsD3aY#6d3%%^$u2&P<M?bs3U{_L_zlwVYKf{x
z$-+~O1y#?kHQRSBdgETGD4B%Q<Th)7?U`+xxZ<~;&}JdN+2NEA%{10Ls{&69Nkbz+
z_M{+8@n`J0no<LPmi6}eJJTTqYb{L{@y`h_=<kj#HbT`Hn{PcSmf1n4ZL4++0AbuW
z?Xw6??^GjzDGYv+t-ylJc$$&1k2!UZNnnn_>B+*m^Y}HTn7!9?ddMxN7#$W2_Ghh!
z7Iu6lOK8U{RtszT+;<`zZbuMG*G^e|)+M*mEc0uxzkmlZeNvPko6s;}GiUpG!=cXE
z*G$i4{o&kRxmrTax3$A}=j>55pHw=S7g%K-+7pptyg9Tluo)hpIkNELrKr<K5Kn@^
zgOai1_a`gs**Hf>_OiT&6y+;?s(hx9t5rTA89v6eaH0OjNx>3+r2QtIy<z#gk$F02
zp^!Lw2_K+zr3l~jLDfKM?qXzR-$PZjUl%ow1-08hmeZ<!RjP+MPdb^!k6X3LYHbYh
zh<7+4dok>0tHmBe6!TJ$oYf84b>Di8hW#{wN^747#d}6yJLE4Hh{Xk$el}Zrv2Z=&
z1{%>Ly$a0{t&9sVT$LQ05}jh5GK*-Y^`tobnl%GxY@bpfD%X+;4BhK0AABoHTd?w~
zCjUIq*z=6I0*9Lwg@@)o&Vh777UKTbV&B?cqla@?V?Htm(OwOURtIA=TDP2S6Vy%C
zEE8cG>1!{aCCeOOdOF*6Le<UhvFWBqH<>d?)9Nk=e;)L^&%vY1ZQk_2LJ~jQZsC~b
z62BCeXEU$~$VcVg{+4oJZIW`ES5e`|;V);a&{H&%*ur`<r-es#&0vV&u{;ND$+S1r
zO%9!d%F3M1ena8gM2HzX+x2`pb2XV&<~`=9{W9j9WrJqa&;;Qdn-Bl-#pIRaK9WDL
zsYT{WJne^PSx>Ie9aJ|vw<{P5^<QMRKk<*-pNA)QwhS_`L?Je!3w60tlBez_O%eMx
zRBemLes{+<o73WBt{cSy!PB6juqOvfH71&=WKD1c;x_jXmzq9Jpi7#QOPM2>Q{-lw
zFSGr}v1ZCx!iTj}o+VQCZ8S$EV=hDDM!|Tia`P-h>=<ZjQct%ZhAB?5u@B?us|u{1
zr_f5AaiVt0kkJ`A_7*Iwjwn!NI4&0~^~J3c&Pfb;7l*uIYIb~UausbyNu)~C_Zl@(
z?_yqbe_fhJ<y~_x9*cwfA$`J0Tuc7`PU$m!6D?MUVxc6a2<Zfrkz2v087}#lX3=-M
z$QRh;4}{h-bt{dQ5@qKGskS3jA#aHn&qWaEyU4GyAP15q`0)xl!p_z=wLK$YKWMuu
zXJ&rijx2cNe~6fqucs+ya$-qlXx0!bKtE_qNy2Be4P!!Pwb}5#8}+}q-blE{8}Ho|
z+LH8IVQapRg!L##)4Eh$yPs2Q0(_xDB;+wh-5~+)(EF|>%J}Kya*+>q-x}A`z28X6
z^=u;OMyL)o_7e4m;&K8-534Z>eF<CLXCL=_EBl3vq1QpKvaq5%K6|s@)v_c>RR&Q4
z9mF?Ja>et$^*MTRS0MP^?txbD#@*Q)LN25ncrG#4&nD&z(@OEUVcv7=%Kp*JZPDTA
za6&ovEpeeEA`T+=!{q(Uu1?G@$1fW75fv<*yXPkzRXNn7!bOXly-~Ef$g{(Tb)C%%
znsu!3`=tkyaxN7*3peQ&Q!_J!uJ{`E+3jV~B|>bC?iQ5gg2(rG;0;vHfD?GytNbOL
z<eNn!V3bd{i*FM8gh3~e?;pjSxgjj^mZVKuiQ832+!&*=dVAm2<#==%vC2KYz}9sr
zOS!H_sF(vPgZFc_o5j@3?!^WTGK{lZjl478C{^L);{CiIamT(bhRm-b>Ymsd7Xk0W
zI??QvStnXnt%pk~Q+uNfmGWy$YFbg0_AVnAy0)p0Q#8Zu{W@G^x5#D55Z&{fw$<F*
z=B|xq8d7Vw>dKR%ljV{zl0z1$rhx-GLtT%x$o1E$4Z%)fJ)Zd77l%{K$tbOyyz$zS
z*+o|OlN9gAU0wEJ*2vun4YVleRLVdlGv&SN#L#HP85`=vU8=AbV-4fZd89U#w^YXV
zJ*=C3JbQbVZWb%3K?r`S6s$(uYcc&gKIEAT9=jrx{oSD0&Az=X$kAJu%!1N__=$FT
z*RWyP@M)>tsQGn<0(`vH!h!LQY3U~x;h*~ETU-*JPiN>{SN3_%`yAjuC+4E}HldFP
zXu2n5Dik)PJg-Jy;6d0i64sJ%X|1)|l9sDQ9^4LKMugN&VMuYp!kG6394w_jo3-hN
zPk!Fuqwu%jQ;oEs;YxSfRlJEi%PX-%RO&>p<0!N+^TF<>ZgHBq|0dE%*(;#J5o3XU
zl5+Kl>jq1KPWnD#uFtl^w{|Yhaj%h(?|S=gI?j@&Bc5cn`@-yZ^C`<$Th(A*%zL*a
zoO3Xzvi06h5KF}>=x<QBJ8RtR+kVD;Fmv(0Sm+#@gZDWMg)7{oLm4n$de+V84BLge
zbCB?@P_EQPwXSDf)k*ye7Wa!6KFPQ>;8j<@WQKFH5X~FJ$}O%OyrHDtwoRA6xT_eR
z0yS^G-Gw@Pb@nMOYpUc@-4mfYzwm2eW4mv19C3{a8y8oGBfFECTh<eX{aY%uYxZG|
zT!sxp{k`GiU%K47>bgd|D1%zZmm7k*b*)NAlCVUl#g;?YTDawlu$M-NwsRo*LL`ZO
z#)iBeum>8e`xIJqbM|q^#MDk=tAIHf;J0y>8RQuhA<&RQX<u{*o@iXr3X}|(f@Zhq
z&YO0@Fqw1YwjD=1K}kc@?RlIECVJ)#(Ztk#AD1r(AiSHln@hhL5IXyA@Oa$<=Tu(>
zkE_ORj&ELR0YXtk-vOY421%(g#g4Kz480en8>by8IccE}w4M>92}*k~sy`kQzjaMU
zx7Q?}A0h98(DO-olCLp3OrHlYQlV%cnppR3HKNb5Z50%<rFM!bfl1(%aep(NTC1Ct
zG!7>3uD<Zi;Dcinb#M0+Xl*!bC0V__X`m4`v|`gS){4p%TXi6?EZI*btQS1=)JNZJ
zZmXYRZur*xGr4;EU?Krq!%K7xz4oTEtRkbDjk5I-)>={_vRY}*EF~5igYxD9E=qk*
z%;fqgN*$Z(_l9Tg_!KHcHn>kl>$>LH&F@g5n=Yr*R351))K8nkzuQY6jZ+z})7mJG
z4;Anw>D1fH_bg^Ly7=0n!WFBq#P&tu^!tYCu~|Mm)iQiljP~Y&ocGgOEJC@AhpH{u
zqCsP&BF7+m9D8td*40!|wW6l}Wb19pBErUJ@Q^o+l1;XG6Jq^p;_#X3jGiW#(rYkP
zJGXs>*c5a9b6c$mCMc76*JM`^PYSoQW)o*>E0+IpNI~(&au<m#<&ZG?G!%Y5tS^Ob
zd^>588lt)8RNj9rNH@2C;MGp0Cx>i9c6b{mTWQvM;^({|J}r+-AJa<WrS@rE>R7OS
zdv<!0W5&kCM(=lATXN%4=2uY{v1bY!W0sZH*N3mZs^pTxhDhIhH1MwKfs^=xXf3zO
zef{#Jw`My|xFG{XD&+kR{Gx16Dg~*FK~CA~#k8O9@ZW)StY*xE&TG;2F9<_IdDpo^
zE?gPBUmhs7*wsL(9@n*`lPMn~wBK~2Tqq{`FJDNwoUhJ1pEN{v6OX%yBfHv%X}0K4
zd5v|C+wWdpc8Gzq{2F+vEvyQnLC(qyd1`N!SSz!0C^YDL*+?I0W=yp5tr6i7Bb-zS
ztSrXIt>`|0p=RC-n*JU^tS3V0{#!4Qci<zJ_ey=RLbDr5?*u{%5~z9mkTq?5mi#SS
zzgT32O~bC&j8%z03I1wDL#G59JY$V4yy7~IerBV6I$qnf_@uWpoRWoLik%{#x|lQ9
zEZs+wYw{I_JwdA8rh^tsvVk*$!_^XoLzvI-ROg^fhIZN~XpwYjGc`oi;ekxU#Wi~u
z1x|0dk0#d?CN9n<bqt*Urpd|XljMV3JlWpI3`iNBd~@YifY9Ku19t0ll2n0;-@<hx
z(YQW>w0loxZrm9T(?u+f)=s8bQZ@IGC-Um)l!IW_H(bMY0||fZ1wbd06I=&*=9rPu
zml=5+nZ|3~ib-kaZpyx1MiQf3j;l}R0ekN?HflQx6Rs2Y3}XVkfX{z<0fbavN1|6)
ztwxe-S)=u5@(P$6CLYg~SIavoHm%pyP8SeLDyNDv9{b=8xmhmYbCzHC&&~7AxHWH;
zC{wPwTN%;HR;oGdF$ua02NF~>;(tlsuK!*3lNs{AVT`}C>3C8#QdFAIxsBV(b;a*0
zxN8!)dKb`v95r7$nsQnf(^7GI(lmeV)Eg%knYCz|BZ_QFGY|>YBK@nP3JOF($^}WO
z*PD7)LrYz6L>{&u+lIVwb>LC8m;9!d1Zzf9n7BspWjYJx<rOE5wfhC^UUoc9{#3^<
zOK)I~OC!QTAd+fvuW3h8;7t?b*h6`iPjEQ#s|EMMCOnVUlNYJCcWt_qhfMg-bEP%}
ze?6vWptN^#7FOlI!r<~CyTNmR`hpBtEH`A7qHaBvhXxUzw4c=PMS6bVXm9m~zS*)~
zdE^ym4w;Vidy)Zd;W^=4BJ=oNuFzF?$5`B{DSNd%XZdSd-Ve*gu*s$o;@je%f~N1-
z%XEX1?9^}3B92A%89lm}R8@1|5}O;{oEGWSO{QL9)Vm>1qOU$b($iC_Q$3BnH(VxB
zrl_jAHdO2vUQq)n@CRF_Z?MePJG^i%@A))fz<XgzAKQ%V@<+7W?>zXNKE0=LtD!($
zCwxV8DZ@8dIF$K%h@|t^U4Pw?cTag$oSSJ^BnTQyHJAdISKT2n1`iT$9I0o<y#=9(
zwIIA)p(@G5qjz*4kpf;YWaGD4BIfIF9?O<))v+(?oc^SCK&!*Mz}&x;xgn|g>CT;%
zdyQ;uRrYI`%5(iql@TOa{k=iQ+Lqfow7O(B0mi{a6-n!OW`?)%kOA@FSeQd$&X%~<
zKE$x%wUgID)|KsUu@axEiPBv0!o;DzZOI+!i$C!TAjbUFfy74Jl1bJI<1(I%7Fvk&
zpK})?BcVs)WPEXQfb<)C52rL8`U-mTHf<Zs`jNMK(*nkXoP@z1$Kk=HT3mGm<DvBX
z5ykLN?NBlp`MXTKqiS;a1&y{|$;emKYlm8TvH1jcw?E%kNvhhU8Y4-br>ve%QYIAF
z3S<oDvgc)i9IfW9yzuGJ%=#Q3pGdlOIQk~(DNkpNS1fI7$m^t)5?YQsTlR~aa<UJ$
zmmbz}OVwh89}v4VacT12;l4=)k6%d-0%=Ao83g0^L(3pmJ;nMyD9afLe;IcMr5*%l
zn3tu4M72N}^~%1}N(q;E86Ss=s%le<?vU<LS&Ixp(tE7xjy+=pq3f%ud5@v7XRgV3
zj3z1q&tkn@10F^ve@=`~6n_kPYur%3HvE*cnS<Ah6ufwM`JTniK8NDg^`dIH(M0Uz
zkW?8vbU%5lD(vW?C9%MIsXijxX1cj>B$>4B{xn^Hp{89}>>?WVUE^CDoE4Eq#hi(>
z#7kfvx0o%`+2Y43*FM6zVg`8g`QB+jF8klP;ZkC_DWMQIg4KhW|8X-LB(~oyQayOl
zq{KpYEo%ME6n=3w{WALuRv<-|Pu`GNdAW@_?EP|uJ`hIB>VBRc;I=1DcZ$&j)tfx=
ztBY`wc<U5wkbQfRl;!}HVq2#uMkiIA6Ea2J#pq_6QS%J*3ODH3H>g_LVZYCzU+baS
z(aTTi*EZ5ZpOJ>HswfbQYR-xS{^a|J_hkR>75H9$lHwkm)OWAxuG_)82);|vImloe
zC^3<yrtGwK43SM0WY1A_0yS1or?#xu`Gt@MRm4#g;Nr8Zt#SEk7BcTu*e0wWQ)^W`
zpl*W&-!gEt47tnd#kq$~>!O@8U2<!x*8R%_r*IU6BvusCJt-s+`{doH>c->qvz4A3
z;uDUpA1(T{l8!;n4S_ZHWp*mamjIxlW`A~CcVes9^f(9AQRo#HTU+qA=vJn~oUY|y
z7qI?R-r{_-PfPq_MwcT0@CUgh_CScwXyWHL)DU@evnUGJ%OqD^X}$b~th3rNw@U%T
zGXof&Jis|zuBs68ntrhNc7WgH9#&u9Ua^>KSPC?9clkUd;3$;6{~;hj_8>`^s%!b=
zmb_b|86J_MrmJYbupI0$X3X)izo1O)ipmg9qglDRScozN=?&IOxn4)yc?Iq_m&A*r
zLlmOBvh=-{8T;h5e`7N<JNGprAb(XH91ALX6Q-(uJ%zJEe!=LvR;in7!4h-*k}^Cx
zak4wh>E`q!ldHR2<yk79Ufk0fACLAYFExIeRc~-p_q|F3nf#dloMjcEcy8!cs3wqV
z<t?iaTx(S|Af)M7@Rke99Wg5hPYq?e(yw6|rKUrk9$D`2&rS!$dUbq0h$M)fU%&^C
zbJ30GXVAkdyTXCwiawM6AF4fM(O_Zg>W$UUB&~^+=u^npw%?hUc_Ol-Nyi8I%to^)
z6%TZ}fr_cwhIaN64)qaosZn|<v$*%#z1VVotz)f4-TwAt&rJ%8?DM(O3mTToW_L2H
z*Vm?HjpHr$@~jYPA|Lm+tBV#QOQl?AyC7Mk3v8Kzdnsi5JMcm`x5dE>^+-Z4ncez2
zki@FsQp)}D{!*6LmxS-UVrwM?UF(B$PbN?*m4mk$R&O@6+HXEBEOT72z$YUm<vC1<
z1Sh*_sbtMaHt=qBeQ5eRPiQseaTStwbdb=V)nU>F(@%{rRl6ohx~st^lHG2Y&Txar
zaLZ7!Y|dVaqhP#4msl!_YiG;aMAyQMfAPgYtjg+GJv%|l!rhGyj$jaEeMxb5Y-8|X
zPCwsY^;<<hSwk_IgVUE}5Y~YbFo@hQCVeeDmUa1Rla2!&mCcVLxh8e9ftcZZVUrf*
zfn}4F?lI14d^}1_b$9rR?S~H!LsPCFvfms^oiws<Re)QFk_FrkWnLf7I8T+jnGxDK
zZ7ef=czyO~4ku75wfVDpc!!4E#7;*i`2p+h|3}$Zhc&&wVIL6#v1kMdLAqNSr8i*G
zJ)|2{IwvY1Eo=j6lx}IMfgm-&$>^N4<Y07-cb<4p{LXo=>-|r;c8&4<JfA1-`+n}-
zO48X8qN7|XnuO+^Ey3)T^6@Pa$nW@|2e^&_OvlX*39UmU?ZvaeX!7po^<m`oH{k-P
zyTu+b6vkrz>gtyL%jM^1g`1|jrXIheNudJZ)&)QRI>0%MALifhKRYD|75*Ccp7_))
z7Ou1x`YBJj#{0x1Q~x}S_qL0C{ucD14At&koI8+%I3mOSm<jE3k?G-8Po<wd-60F6
zFec9>|D+Fj#!5;&>H6yF%y$*!VTos^=v}G?^JkFm*Im3`uPCWZc~=FR=!azSPY!OZ
zrREGVT?5~5abLnsm>@ryz>Y#VXWR!gCmgm&;Sz|?^=#bOd78<}itL9nr<EhjK-J&x
z6T`zE<AUg&rC23XnGswHN5HM6?kS*kRkTp@J!6%S{@fn)aI(k*IRDe$B}c=a(GD&h
z2Ry6yCxyk9cBM8=5|`2;{SS8$c71P8hoTH>Em;%;`Lt}fzK9ttYuCvaUkia&p?}M+
zX=>g74dQ3rCUPz|pw8!2k}fmSEB1Htt!c1L&F&;+MZ7E4DD_vf%{pE+^UFjnsP}cr
zF}ae{6*TE@99+CTS>-jd?d~@N@}MtR&(7St<&eP@-!RoZII_&HL+zt~WuLA<@0RJ*
zxK!<vl@yNUlLw+TCN-7CtLL7B8WCSYkPABt2%h-rcfdVt)^RvBA^d-I)fdC~_D+yi
zgfP9;q%m*rAY>o=X4PkUv<@ldr?aRmg;4w%q$YYc`<sfo^d(voN+2Y?Qwmpnf$|f^
zR2q&1Y7b31Bd2MtLuaxv8vXdVXS%5}M#iPt$2{<J?GAg1QnQF&SrOZ#ke-d5ay^yK
zbCBuqd1Ziwgwgm%sb+n|Ku*<TTN3{a>%@MMUUC7w?^C>JtF;P9j;9K&Jl#(~d!Q}b
zc5mE6Uk3BJCEoNF0Hx-)UaORs%Yx~pJg_<Y7F^hEp{V+5ivkDHmuh(EKj4OHsn74w
z+SJOya9d>8%Nu(q6{{aR7~2RKQJT<lI$z)25sG}f7@1PPd^L0_l$lT7;w2GA&o_>F
z^E<Upt-|70&i;Xmx%s*6xqdoGnTT_?!q<#DF?;ovcWu*{Y@^|oH(dut{SVrA;(X$y
z7Mu%cG8QTZVLi+6^7j(zkR&{(<!xHA3qd|@4GQ1E!?USX-@J9d&@42Go8JjaI8ph*
zwL;C+q~<%fwxQz?z*PzU;XuCM!S51hC@F7%f_QvLGyEu!xh3&DNI3o*j@0{uhCuyH
zap!A^oc@WW)OTZZ^aVpS5ngkTZp<+<Wml-2aaym(=1n)7)kg1hTC4_bhA%y<HzT}h
zupzWs(2=*xZtP}wFD(>*v8H9=FsjRf@U@ffp0S*c@#*E?`jjh{FZz|Yqmr0mWbYC$
zz1#p%eyw`Yp$w*g^}d8wD)dZNNX0dr8S4wzc6g8S1aZwDBaFHt{@H&1FQh==KprYY
zofVn%wWl=za50+sb$+m~I>Cf$yb)@d)72);HleGO*}p)&a)6e&@8M({{BAV^{VXE-
zRXkQ-drBJ>)8m}K*l(nO=zTtPL_pt`&1;1Pv>f+TLMl5I<e9P!(sWnWkSh_p@68!i
zp1-aN<+L2>fB9xP#GHaaGzL(90!isg89g^W9v~nkf>ne}CEN)x?rQnr+<Y>N8uH~|
zIl|>GRY*cye0qfx{gW1yQ5id@S+5^H86swnX}HYYebub+1!$-*5FH@44Y7-*o=kpt
ztaB~EYxhnF>T_|9=EcmNAhY8q-<>G~O<C>*C`QV^a3{cL$$nHdYqIw|M90g@MM2y#
z8~GL4s5t?hfU~e<gR`xeuNSm2P2E3y;ss1^Q#PEv57POe_S!HJTS@|9O_Cq8GL`Zs
z-WKk3``Gx5f|md%Dpm8tyh#g6-+!ce8+30g_a%5Xb9v%>rEN^Vp0qB4z^l-Nx!zqL
z8*MZM7!o5?K8I6~a0N%Wd?F_fgx*cX7(u&aI&W#(?Ap&889Cjhd=?XTGO~dx`h*vr
z|Gn-%x3&1&IPv%M6nuDf$O#TDu1SG_`rI#yftGshhbMe^+DkD*AB}7;!Y@U2q$-E;
z$QS838;bQNXfz(o47o1=?HI~VHEwCO`1QP|&fwtjFkLWuXGV{A%@xOq$C{1P*!t(~
zP3orAnG~m46vi~gSZfc+gd7boZz7?fJ&U5_xUW!&zU+%>Q7ax7+YLdCW!{e=d&f^9
zF>R>+@~H=YAJ6u0jfQ!|)L*JOI=_3D(WUXMy=KH6^el7fmV5x}iC6Oe8dDi~;*I(b
zh}*-(S#|i$L&7Hv-xIwiU%xx$$8!VZ*m&j-TZoBSy?$m`zLR=Mg!^0^Xy3N`4Kw|8
z!^(U<jQ^eF;>*nu>7t37+AB2^C9|nW>=AqI01WfYk}B#9a$;36LI$}!#pTCnW_O-=
z$^YpjX%N`dOopa^!c17eodG31I=J)1s!`foG(y__G%x<ai9@G>nIUZpN3`fw6zI?$
zc&Jlmzi1{0e|!KJyNrCGqS;{VpU~y*;tsVw8ymNiDLYVt%gbtq)A(+fCPCfb%G3>}
z@a1;GreuF&M6@b|@i<VU8N<>UxyAro>#UjC0GR_f>3U-;+^^aY<7TRDj>g5=w}jY^
zfNKhGZQVl&!=4}^j`-;zAvmVT2r^qaz;;m1Q`|-))p>?z=}<Sg>@`Rw0>p_AZ}s`q
zc5K~ewbUW7tX?>rZ>)m(cofx-PR>T`0T!cJEaX+<^s|E~a8F)!3UHjOqo2U=z>du5
zz(RV<I5N;Td;m-$>iDi0X6TA~BFD_vD2B*@N5M{epQ;GxOLOM7gKe6GJo%?-7GF6D
zRD57nny^K<oIvSYfW{$`E;YiO!(2J8C)26iJ!b#(Mu*SajL1{V>Y{GbN2;=?aj$O~
z$`RGwuWWvV&?U!zTSVQ3?B|xnY*Wy*Jy#Cv!pW>I+3Uwg<A$bx&0HDbSN#5;L`*$N
zpVae6$PRa(9YcjxG3KdC4c0i*_@3DNr+Qvtq4xc0IFF9fM=m3M@KgFS{WrrL{WRjw
z@Gn^nn>23JU8{(SGdiNvccHSPxutnj%+1K1^IYG*=Nqz)+W0(Iu^3}&bCz_gbXP5Y
z&qB8PD^z*J;tm~M83W{y_soF_RgQL3b(XCcWLq0Lp&Ap9TA0cx43wK8#F~ZsAF&ol
zcAkh(e>+Tpmc<~g^G02GkJsxh-4kc-9T}DL$BgKBaBaF}>j=zknbqsb8jEG$F*Rl=
z+?eOGzll+l#4YsbjL@o8Z0rjm2Qig6mzgK3jiOu2UR_alf=gbO?a1<Rj{#OjRC85x
zt8%fx{>`d={RY&qTPl_T)T6y(lu7SqMTj<IYql%6GCQt06YS-(c!yk?Fss|N9z?TN
zkwWeonG&rQAI>=j!kLE73M1xvIXAAZkVZVW?^zbed_P_ChDP!Ak`zDmCQ16k!<uX7
zW+`_M@5*RivzT9yS(d!_5l)5BJ68Bk`pT5^IlS8NM@hJR>BiGGjC&bG_tlx`n+@_C
zn8$KEewwhQ4tb`yz*zr~?vifo{PVKi%5LnB5rQc2jKQ&8$KpGp1cxXz0sDuDa@rE~
zH~sr!Gqr?hLxh>#2{F~&?#-1@305_P2hRI{WR)x}sYtGpWQD+ZasI2rSdR~#hd?|i
zH--g5y$3ihR}Zj@J>Cy?6^IB_+M!V6E0fXdz<kSz>?btV<<4*Ib1&mROk4XgdhJvd
zIcSxxV&Mqbvmp15m!AJMXlJ!tb@<9lefx##FRz$?1#x#-E(mB8ZuoyD)Aio0sLR{E
zR+3MCqh4gzd9i^5`8|MGUJ^l5x-QpZaoY_p6X$-bkUmORufKar&|NWV`Cgb~;g&g<
zJqe<oVKY0i%B(nGsIv3LGPYwxpo`PZC&TxX8XpMX<d?@T%*c*`sQY!HxsfL~R{~cW
z(P$myO_)a}QMIO%J&DB({);jJi1;+~vu1Vy`icsCwFw}J#9loaWRrSWU^P@hmk64Y
zmoUbEK|p*PpbolyYr{O!m!*_rXomAhv5yqE4;_iO+PTCMJg=U(Y;@@~gcfXMHhK;K
ztKn1~GZvJul_z*ZtxlruGDW0ELe!p5EHJxrg@O`JKW{Y17M?gdN;?MhoWyQuNNPo-
z2i-kDf|weka^%&)pXP^8rGXjMjFTFYo3ZJ8hZ5E4jJfsvqXF+dUlC}|nD8TSE1&z<
zHdruwUgt!v3@z1+2YJyI^y_YBm!jm{!<5$^_gH_iX04*4d;rAxtytHmRi}t*nkYJr
zB56}Mstdb?RyzDFV1cqXTm_W(-^vJ`xhIr&$YCTBW_Xj`T{w`l&MYH$F2t>c=e{jH
zr`_UNNkXG?8t+5gwBY;k-q6cADz%o`ugOn;4+mmBO`icm{y9FLp(Kj<uQICuqVTOS
zsVlC|tyT_N!%#B98DV(dGp*wq8w;KtOy{jcgKNUz;;j<q8Qvc)#hWy?pLDi?M46CL
z86_NM%!2%|D(j@mg?eBgi<Tq+Aa!caSLDv~o;NOFk}%B8=Kk^3Web?(GYLEdFiOxU
z*Qu43SQu5WURn9|6Md^9_CJo{YF6UgYiwH1^ZlwUU2Da%Gf!e6@CmtIwcK%cR3#s)
zKhv9oQOgJP^Lj-GuH&YX?Dpi6E&*oz!IMTN#@1O(u>((NZoS5CM|~XH#+fyi1M}70
zp{KHrSGeGYn;q*<I%*SVIH#M9kYlPpo3aqfb~gw=Ows`Pphy|KHjN)@>evbhx)Wr>
zbX$%59c*`Q9rl7_9HA?v7zx7FEClG9Pl$?+Ry2Crzq*Tf2ubqHqbwiUWg^#wXutyE
zk5mSkvI>WzGv1z&trY8=TF%Lrn3S#)**>V&&y&+QV0!FRTx`4(<gru3ekZuFdNL=5
z+%K=kKDTea4e}{H5fB|{$h9dzf>(Ts4BYvY>*@H`sB`Rww?x@XLf(LoRrnD8cE+gW
zqp)>-FI$8b0ix{po*;<CWWX`w09+gNTj0zWgrMy8^G9SB0u^6L`m_gL<yx>6jhV_7
zC(D+7QO(&(X$YB{1mI`OT<F(!3QP$suLp3WoaR`kb5C~ICy<1w{I0)m$KquUP^y?m
zhXqH%v;R;XXYmp_JE-DgK0_uM)z&@rk4k6&CqTS(EmO*#?TUI~e6t=2fm03E+-@S(
z6W5**P+pYJKV%ERGKWsbsw{;o87Ch}V4sE{8zM6^*hcE`(nXLed_=Y8G>uyxx77xB
z(BtFe-DfM_h4FhAqf}hGeiH)9YCdV>)o}Di#V=pAUDPE7<`=zG-O(doh+q<4Q6Isl
zx6qW9$hGLE%a>jF1$ES*-B5-&bCwOvuN4PfmLCpb_lB_acg<$JZi#npRw2pWvbT1o
z)Tabjmk0?ab~)&1MHY70N=EU=Z!EFNz>xt9C$4Rn*xo2@x^Zj3mYJ6G4aWn8!f+&y
z-<Q+x-Vp4#pC#=4A`xLUvlcRV$?q*@v|BS1vv={H70=u;1NPMeViWT26U)I$7GYkN
zE$7fYb+7_C=1K4HP@sq3+XeXrF%hh_M9<09%*TTz(SxTs#|MBqDRk>;$U6N5R=rrf
z8~XgDeMaJ|DzIPfP}eqKSRyBkf!Q@9J7jZb7Hw#<kY4VISx*yKWMGm5pQt0aQrT;S
zF=ow*$MK@yn5$?Cz7T3p%z-3$?q{b%U2-KS6Uejpi=l2t_WiiJ^ydo#)-Fp9omR$?
zccDx3sjXb-jHrzouP?Ts_?yta8}tkpnCv0JfhJC#{eTV2+O#X`sp`vs_syE|&Wft)
zvlS2G!iMVHAO`T)&JokQRu;%l2!RVkT|2~iH)I>0d!ch~85F^98Pd%39;bx-M=U+z
zz3Lm%A*7Rmaje{zEI+@DXJ6SvPwOBjj!t}w0vRz52l3qJIvZnAx~Bm^9*?7-T!x%c
zH3>B?;06cWT@)d_FO;&c{oDFsgIbnBvAJHUek|h25{$IL$kkZiPp*AI+*`b)>45l1
zRaZ_--M47f?V#@P#;g4YhaVTb_dmb~>@neiTmwgvxTD~~otI1OXpzw(0uS4HARa?4
zZ$=Gsq)yBE;+10ic!JPBM-_F6)<+9{znS<fnl8kwNc+)(g=AzL`*EC-Wn9IUKfD_)
zHu|b)Mb)bw{SzQPJzH4RamjE#AYVZ}d?~NUNHRBl4UaITf<Fe`S?)55gS%Ps60qRf
znFNO|e-aCk(aV5c`(D=srpl0qSv<snA|YZN-&5zjHvm*yA1(`U`nK%4>xk?yVPfII
z+<tQ&jEzdk;!rsjKlwF4OrM0Psu~808#;GhoHRNv@ccNm9=4;Mo5>!e>a?q>#3oiF
z!!#9Knr^5qFj}4G`615Iw;g5=DO<s!d2-!>!6x6bC{ri*F7{a5b&I5+|I+NVt&Hx<
zJy0(Oa*3FZ%Ie~1;=DWO?jT#Lce%LeAt>&&=_gwH4{U9tr-y>2w-+U2<>YK*2=XR+
z%#Y&<+{2#H#HVIF4c&qK{-1~H(Y@4urdJ5%?ir5>Ll;Zeg052^7|owh{#hE3az6D<
zCh;p&j>CLFaCD5L+G>?L(a+6BDW#@&w7%-T1<<Ge+Wp$No@JqD9~P`FckI*R7As?i
z`n#T&l&tR*6rhSKz8k7oOdRp{(-L|h)P6^;){~R>U54g%=+^L}VPv?cBe-YNgl{y6
z_xgX3qmPM06!lgNRkgQ)dLt0b47kCXa&OmUV4hC2yG32%xWA)C*5bY<v<&*I)}O$b
z-XpFI<*GXNump5|X$g<(RMEvReTkLDtk4jq>QNJb5g%Z;-8f(v`D15f^^}|J7pD9>
zI>q58jbmYWfyzS!k5|i4TLHn>`WT?m4<+r5wsQ$3{Kij&7?VqS7TuJ2M0nZLU`<%H
zk>_BCRd&=Vt9Ibz>vFg=tL@g^?vor9(&<F)l;u&)#Ii)4CG5Qsn)@hSoR@1P<wO+K
zk;nH#EVGREtDg{3ESx3I`X}pj?%lI|bf2p#rb^-KMh9|PnOhu%O=WBDA-hhLeed>7
z<iL;!7>NfnM7WPhZKTRy1r_yjV2g9ba%t4?uclnlrC!tK2abb@{koghYq%?Oz+AB$
zeKt(rN#1aSE>3PzsQmTLYs+4*lQ8DufY>&KdgbXod^K#8U%OL_Hh`&jd0HLjKFB$v
zH3WT+;XNW9bW3n=HH7sIF?|{Z+yZW-#7%3Xg%Y~LT3So_j+SP4UdhVYsb?r<jOSB+
z#7!K)2XI$n-5X9jTB8i6_U$al3CyU|BhwLq`MiQ1_Sbt^EUJJ^Xdu}C{e%kIBcbc7
zSVEVrU4VT^+9XTY5^5++>+3l|vg=agZ0~rfXL-&5t=B`^5ZJtP^gL-HwIV+UTnUzy
zkdhpMkd3WNH5Rs|s_Ev|qxZay9(-9sg)dqy({@G4#OavxtX6T?lz#}CUhl;Yk7<=j
z7Rph31z`<NbX+%q7#=w>`<lh1@?a&^&6XngDy)E_T|4dH_l18UMlyd4^~Tj)mTu6)
zTc{hV<NKdFufWlbIfw3}&b`awVn_FYC^g$jrfS8y-lZP)at0FFb4(oNE*6@6RJZQE
zG0FWj%rkTW<L&cS4vcZC>Ne2+A&uVGagDEQQn<s5T)PqQ>zZ>&$JJ<PC|A^;V>C-)
zL^so8Qv~mk#J0?Sj}kko`G-DFHfoSO{X8NtV?L=14DA->!lf4<26FAT7;dA0!4I5y
zdN=QwF4D)X5U5fQo;BKBgii@{r&SY`EC8wvfUl7BcvJ~bYKGjnkFxEvc0*m-i$*!E
zNh&P^v4tm1V>V2VMWW1p`1J{$Vk~`GyN68+OK2^a_MXh6ECN3kU7^!Co0d4HURS<1
z6I#Ak2TTPdZ{)t{!cHe=_l)dB1~J+#)h}l>4)?uV8;ZW^7<E;{gp%d){R@ycOJ~d*
z)eNJBS^BT3jD5&9_&$6+(O+@Aq}Qb2bsS|FZ-F@NvM#3Jv0nE^mj!d`?)uHA?z41h
z-LbBM&KnUny$0n+hSeuI2N{U)M2?~IFTL`YaL!yOGTf_GMS;&rX~L+s>nCFno;$~z
zOb*sNNnIrN(m_s}tI(ukMsV@Sj76zIyF7r;>b?7?QvB0uPJzEGu>WMwJtJC$i@Km{
za~<sMI0jY*9?7(1rca&?y9_J&Xlfk?0mCm8VsSUq*+Siif_YOR6n-0OKaS3u#+DjU
zs*~4b^QW+IEfT=d1>Kg!V2&W5e?Daj4X(-j_~8#;F0=bl2@S1ozhVxAwbC}&yY7Pd
zgTk=M+(I2YXYQ?sE3f+oY#I^hK*;S>Y`p)^>_keDe&GJiZ~YR|(}87PCmu`IAwYot
zPJ;z&XlVV&AtF0+fzcdAaG}e6wI$ZD6bA%!AEFa=v8fV`nP34m_81nmMrXZku@2Hd
zMzxOeJ`hpceTPw!V%_qa?eBazHOd27{=~8+cJ0{mVnyy)habC16G1~oU_(2f{U(*;
zz9inyGI4ym^@;^&){CJgAi=@ekL~bfOq1veLPMoPd|5{;@Na{R!Yr`?VEx`+c$`x0
z_N@QnHmF2${!o%b3ykn@%&^2os6^>bnU-o<h9D0gtnU*2UA*uc+PF&*a;vx^m<QnE
z4r`CnXzSpzWE6NYrqv+hInthSKf&Kulf@bHX{IDGjG!b6J6Y+x0+hd*zPI(bEu8eX
zH~dCTn4paXTg0tL;&r|2f3_bJ+!{*;#5X}UTRDaT4X$>G(W5*{kX4WZA}WPyJ*4`>
zNg(h;T)EnzRg!Tq2$yp*mKehjd9vYKaWv*iCH?BtazZc|Aop0XTVm#wa$HEX#htDn
zwJ!Fbz1>4~q03dgP|B^NR61wl46Rxw-S>wl(8)RFwbDM@eAQ%`XL$M$ftthqmBJlX
zPJ=>jT;ezFTj#8^?fQj3MXj=gxhK}vd~G@@`#uDFb7MNvg<gu>5|Vgwf$qKR`)($p
zt|sQROU<;`Unu~Icpp1Pyt)3@$IkisMJ65PWQ$doZ#4EN5h)UzdSWX}t_-<N|G*4_
z+aA<TNx^edo>vq(29y6ssr`0|sF8hji~eCSe5;Eev~Z}lt{13ZI0<6aTF(<r!ZeY~
zS=J21hQmHG1E$orhlsK`>9v0YHChvh+XzLMAXawnx$8@yJC=*)n+h`ER0qo~n9lf0
zzJ=YvsRO|MU=sxRyubX1oJH#Q=LLc8YKJ1DcGt{Un)yAHCWl^;L9%3mNgp~CbIn~E
z0=qDzVzp86H)AQ-pEUgtdV7y=o>n$1y<@bi2ba#%%<HW7aj&_aBOl9j*g(~!LUjiE
zAnfr`zDCgL&QaW31apUX;@yMGS`bhw#TSNM;vfOD?SojDX8evbIzxNzEb=6+J@NkI
z&LizzBjZZL=>wQosZXzaqE=+XcaLS^mABoQ$`-{h;&RKcyid>T773Z}Asj(VNvi=G
zmw;&yBd|M0FJBl+TmAJQUMSQPU#@N(Dt2Pxue-Cj={oVPeeH&Z$tpz+V9(qHRvFEt
zj1C)~=mr}62OJcg_yPjN$Q6BoWaEeno4iAX@j5tYh+^vdZl6c0OyP0J@qGv<29$I0
z!)b<tU(f>UPiYS!jX9{blIn)h=!F@<(6_bgDCBakowI~OjgoYxpq96wcuqf^!$Gun
z0ya}dNW4~L@d2CnzC%;D+_`$tzI$8orJw^;9{87ylg><aAN62sq@E*_qeM@OfJEgD
z3KH<ug;0SRiwCNnmpV`hfm8hOMCeqaBbtu9_N_v^HM6zZh!D<HDbHTI)cs6QGcJZS
zm;QcSylc+kNt~Iv7$j-J5cjg++e~ivS`ILWo-Gbyq53N_)TkN@QqckxoqzCjSvat&
zm%R__JWpMy1UuBl@7i(6&`>c{Q0%poI~t`Z5b(x$Yqsg)P5+=^__I=!$;Pwun~r;Z
z?@Lkd&*5^+M=A0k+)JpTdZT*9Q5BvwekYc5^9MhnOK4$wH2X;&BzToqrIm^I&&3^D
zcVck)O|g@Unsvu(^cx=c&u6nTOI^TceNLjT2e)c;bER|Bb2f6mx#@qgc-)n~@nE<6
zGL!>^x2Z9ZRU*|*Eb{)=Zc<~gmPM=lA9(UNt`JxKZy@3u8;_JvMMu5+ps910xHDRX
z7}uq|=1#?>a6yYPGzIJ27!uc^hrdO|y8_}cXNa+&S@P#L82ZW!4$pwLRm0pvi9^1H
zDwFX`$nPEGXI9GlezDAVdHo&&ToCrrSo(RHxZdqI)!J`AhO{Akn#H#y4{25y(1mCn
z9Xove2nCFfS2L?y==NqswDm4<V@sUtu81*x9@mrHJ`Wlhk{}cA3dPK|z<kfJY8SmO
zlgO6BGET>eg0(U81`i=@Tle;a3!C4px&xFdBFNI879YxDaf9J?67%m@=!34gH_Wfp
zXOJKm{ni`xu(_3|I*HRttV$JrsIpSXIjA2g&vA4YFrJlW3P{Q$-_~>?WU}P3uFRR=
z{YpA{LNW`ix0A*{95=LZ@8gfhkxO16`F1+x$y?gvd#SK<ZD=(@W8!G6OPsFcW^j>C
zDPvB{bFGm%SgfXUxHP}U!~eKc)u7f}$q^ga3RYDa{Z454sl>W3PBnJbpS#y$(xrbR
z;&hEY@YC-n2)zC7C*UG5`$d~Nq}*9G<2qi}aai1Y6>|^^jZHk>s6()vtXH`3XC_|G
zX%aWRb1OlduulIdAeCjCh${cw$X@|B$!d^blFUTs3#)E~H88p92g~HrNM3%VrOcFl
zG?5&aPm}0QyZK4|&>;6{gAE&>r#>wjckY<LfnIivtBiezDSQ5Q;SxF5&S6l#sPcpe
z{Cx!KHyq%)v)0d^th)l;<TI_#w_}2(4uCYO%hin};!-gCIB?H$kV{<8_RXH1WV#@*
z>m3@Fn-?e?M4x2+#g^!8oWfyFd>|XUcUc6oSH4o*H0^mNx<wis)){ThG--4E3>4_|
zQEG*0*lBOt#7x+4D5zNf^vXTL5}|`;E7TNs+WM^_50APKg_jT8{j-Pql?`q+A|I2g
z*qNk281Ty<dAYF+7Li!A)NeXeefD9)l_zh_{^9MDb#Rryes?C&7)(um@CznCQAdlT
zwb37k`c75ejs@Nt?vvu87)>f-?bEf%w0BY!fHyg0CHp=9NRz#OPZR2v#Gg19`c)b^
z+RgvCZ3(@du;y$d!8u!luF&BTk6EQHl}80<4rpbpKULlS<W$qb(xosK`XO#WzNU)s
z<xy(4LZxHd<b2NdZ@0M4I&m$r8S*=K>;Z=7gpMAr<lPnXcR?6WnBZ)V?Up>O1sqcr
zJaUI+F2?0rsFx?su*vf5eW<Zo6;JYW^Mw$mIptWYgZB<RgabjCvAD7asWhlM*&Xm~
z-(I0gKpm(oNiZ_-hAun#W@xzT-9G=qF6e0GLkRsWt`IGEn^Xe;%dgNs>{09JQxJdw
zJ5q`&0)HPaMeME$9WVjtlF#4G$-gD6DJr%c`rJ*aI$HP~!diVDUupsq9_#lWE#f<W
zI@CX{O+$PPZ<TX8Ov<Imao>05Xc%%c@+7%G{12l3T`O@SXt#O#(j8-}I<BobUL8zX
zk!vvtt9E=w)Xa=sl#KDZ1)B8b^J2~>7^T)zk_3`J7VW+%5;Gc_%8ZZTn)_?fbO~7J
zB{hU$C1I7?k*nN1JNnqKg3r0G4<$7gPOqnzX?|dlGo7=StC_2rQ<y*_VPP?9KGA7c
z+uQM#dix4IIA>R~r>mdhokT(gG!W#JF-n*%(l1?`@@MgC?3(U>cCLG{s^x6EG*w0=
zdAY5YGAMe<(H<<gq|6PD-ez%h?P*yHxPB<z%i9xmSNH@*WFWG(JB2$tSNC$wDxIv(
zTfklu>)EKUDQjGlbA3MPvN&aI^6<GuT|yoJzd5@GCn#i28;o!VFmPqCq(ooIZ-T(x
z3W1*|d1xkI%MUSV(L>4=pQ)6#3@=Io<VJC02k@Q=Bix*DraTZ>)^2&7h>$d@UthUG
z%+7wHyksxN#{tpv`PY1sB~NmAxp7*)+jBw})0AYX9BcsZ?}aR+IAk^&7NM@F<16is
zfHPhR=ihunhek;~z2&?&sb38v$^1=`_|@L5A+O?*qP6}9n-O&cr?7VYGed@gA!6-&
z(oQuY%Ci91EE9T0^+LV#!qH~TP?6B&c}ruWxf2`?)?p|ZHaqyZifkjLWS||wQyM)^
zmXQ%Gk(oFMT%<3N+3OBfbgsr%%8H7999%dkKXS1hpZ&b-RP))MTkMRU(adBv-?>J;
zcgP8(9~RCbD`}S6WL7F(Ck*i|?4lXUNIz&jCCK)kX9~~~n~-gZ4MSRkG>fAtm>H-k
zzB3Oh|05T(k!=mWvQE7#{k*eX9c&zb$v7*nZusUz#%dkDDf|yS@|!*JscF7gd`W>E
zhV9Kr3HcMb_d#Z6h9r&jK8>7C*I#Ev%Bdijur)$Y^CxVmESHYD8X^lar7}5wmZ1eB
zKLkg|Yh=@iz3h*Dp&sfO+r6q#1;w8&^P8~OBr2^u#qP;Zg7O-FHf^0EApu>M%LIir
z9VwV%!{dM~MqOU>iP`wB^%6qO9hhufs&_D4<r;ZLRxgD!85q2BdOQ7dD6MMn8Dm83
zYn6<0T~N^{F%!qRI<>$Cs@<OAG8wO@S^nc{a{3<&ZeF4evP8C-L|CVm_?A|#OHj$U
zd>L4f7tl&JGv1$xA18(CgF|}8+h45e(y1_RSE1VgD&qe9eZs}11jgv5gU+5By88WO
z;+=O*$JQnly1F&~;~L7n4wR;pH_sj#m0!>F`U0z~EyAo2y8ShIGmYozGjRtMf+e|%
zH%Ys?P2E-<N!x4I>=+~$A18HPWBbl=p2l-T!bUOuL})<um$QF(6n!%%D(v7)N7M&2
zD-ALN4j(1q*Apg@{!etLvb@jP9rB7*;eFaukaIV?v(^vBC+h#Y%t#T|`r*icBCpGn
z@^7Qvk82^9;H#}qYQ&A*g$rwUNS_&cXXKMN8?%ALo9S|^7RA7~6WYTvb<YY{u9qU6
zgw%?8wK7eO{S~525^vsmIb&B<;BedQRAHE#M*um*^0?T*qJU1-f1m&nFFWG17DJcm
zPiGe;8$Wd+q((oigNyX@=MS$gD3Sdm3JGm8^WJ56;SCQ7t*wLonh!SeTl2HsG^$CF
z44QL$s+poa+8XG~t_EwNQrjOAN8CBf^o~Q6Q#rf;)vv3Q<EPmdY93Jj*Y@x6gODgW
zg35cMM{Y0{vdada$hpTCc^LP>yC|34$D!vH=J{Mw0YUU9|At!Oew!BEQM_q=m^*SX
zWA*#X{M{K?uj-8C2Q7zZ9j$F!-NBdjGDl50-`9S0+b~T|!y;t%ZfP+3t#t=+%YQ#e
z`mw<R_OE;@-}BV#iDkyBh~Fw?Ga|&v7A_;1)SX?nC|cw5`q9^Q*74{3A06`nxIfbP
zUpG)f$Vp`0jg1ZwP|V(_2d4+Q-<lqDMCl0w&(ibKRWnLb^!0GH)0eo{I3{QW9<LZu
zN0j~FBL27PPZ9UiYHCI(zw574(GTtr(m10<RMAsxdMso;7wQC!9Y97kH#EG8M>-|-
zOFc(km@co=sdrdH))mee(rDMMH66YgZhmW<{$aIc^GnT=>#>D|K-G*Bv<M|5S19HF
z6xUkBb6qbppwc9$LF$2^?YNT{esFlTYB(fhF|kaKMkagf+Hi>NvV4;Bg+8@L=XTTn
za>v=|8F3Ny-j}EQX%ipYQCuaLWM2)h-j}Q)w!iP1fL;jxP#~xKug+A2brE<(&^l)(
zW@+1nr8M3-PN^+DUO?Ysvkp^Kw(;Q@8Hv!4#?foHb%^)xH#@Uz7j=)8^L}u<9r||(
z6~VxbWy`;YEjZhrtbdR)5-F)OgY8kS4w1Sixb)Nfa4G!7<-Zo|CxN#=9ln8=YlFaq
zFR_kumk<>E5z1Y@VS@6dSJj2prB#oM)QR>BHGDA^G3?tDPZ`(<MgR5b|Gs0Skd=S+
zRJ$};Ahf0MB<5Z?z#<7ZR*BqQQR=K>wOD<~;)l_itE=jJv+7Kx*_@jLjgU%pRCQ1I
z>QbYaTYf)?GagG5Uv6QnG|l|`#QDb?6I`p5zH>N5wPYt*o;Y*NFkKEI0cJtvg*A<g
zJn#N34LE2%`KhIL8j0sQJ$ZSly}V4%X}N|!o>jFp;dnqs^<kw#mMc-lP>3w(7e-VS
z^FRC&ej%cWYK5+DVkyk!9w8RV@vt>P$x-$DX;epy(Q~l~y$QkDICHQd)JF(tu)jhO
zA4{hf?A#fSU&dYmr_CvIzn9JGUVN`O8*E4B75YE8uZ99q@D4ZO(`zyZPV~OM{BZei
zP)W!p`Ma}Zs<U|<<6vxR)x1KDe{^tu#>^38QG0q)?tNl9a^DIaZ+I=Nw!qzRWM8`n
zdhZYBo^j`O^sC&bZ5=g{D5S2D-w1R+3I{GE(6H{2ZQ*LcKfVBoAk?0jZR%_g!>Mwz
zuj_o*O4609&+0uJMUDMm?Ru{$qZ<;=eZ7-*K`Z_%He6WJfa8$RhF5=7&Oh%n9r<B`
z&VHoMsLp8@S~{mIyIQcJjzu*z*8lLbt9$e*zOnr*VeM-l=0slLC?GI9ysq!@1Wk^2
zf0Vn`4u(idD9M9f?w;5&{mdGCUx=&rJf4+*5XC!(C;Laq_$6e9`8)RWtox7V0dA8E
zA<qCmt$TJ`(R_c+y1Um1x8H1lV$EJd-fojWMGK-)V=`U8QM-K^UK_7XXn@6F{!I&^
z_cPYXCX1dM3BdM~WX2bw&`G&N#5e0{k~7_oTcWH;FIo(;V~OCJuu-VRqmwk2St9r=
zN*`0YaXG+8VQIw>k5w`ODM?vif%|#h1{w{@aa~0`711Z_CAE;_JkvP-<k>jX=bx@z
zRSW%JS+?K>NM?%y?$O(Cr!T^tb0mO}YZR3G?Tn{B;c*0U5RDpE^Ke5-`PdV-blm)+
zJxc!cmx#X8GyKJbgMl7*{7z-$%Z-q0n0w}&M_9HQOur7JhjeYi&9_ah_eDl`xK?O;
zW(gp|Le+cLs|O-cS@gtzi>#l^S*ll?j5dtQa%yEBql3q85c?-8a6n2{$G?92o!R<5
zk7W6f^d0xX#+|152jhaRMv*xa1H%BX@Tt;L@j!shCPA$lB6UIF<5tca_yqG@Q<u7X
z!s!=OW3CD{sMZ1IE{Y#mXVo7eRd0G=3y4*zKSd;<Fh42W7~Yz46+h{{q>4DLNfpVz
z9^ub?M9V}xQ{-mG6Y4JU3?-gemeBOJeG;N>3Y-M=s&PrHd<E)~;DStXk6@oUr`;cK
zzap$D%HA*K^OMg$DwVI<`r`gbwus+V<#_knmFE-AaBOf-Nf}i9EtriEX?>P}006+a
zrAlp&gz2A)tkDdFKFw`({SFbKyiiE~PkEu$C8~-wnLRaD)flf3mN^%zG1-Vri`j;<
z2wja%Y`s_Xy;vtZf~<5E5+Q8H<vPY~otYc3t?Bd(P^vCJqCKIUkSY2v*QitEf<S|B
z^MWgKBxxrG{T{uK7S=&(B@DV7INJDd&qiO40QR(`SLZKW65O7HkuEfXk<MTZ59_Et
z0hjm;fVZ>-SF<VZ6`u~|m7f;=RW1e?<0A9XVMyZi-_r9FGYVST(dISO*A9qA-(Zv$
za-lFu=C7~hjBdRoVkqZBfV68Z9vgpW8aadY*p0Y+2&{UZ^N^YKV<?eDFU!CPi2v-9
zs5$N>sr;Q`zf73bjxGR|kQhq=h(3RK;%{i)0$e{(eb}%Q#1WsL+za9oi(1T~Q9Hkv
ztnE_VJ=<{hdbpzUgK@Ne%h>;v!d>bR`9evs?=$5q{VCbP!92y7)rKr(KY6qHQ9j(4
z9=vo11OPr9h&?Lh4Q2(h?zHnZ9p)vUnMGJQCf)-CXl#!HFj1$44lNaGdl&xaq<_ZQ
zyQjoPV}kqPfk3fY!5%2&lz0N3>xY&_PbZug_*7DwWC-_$U!ufUcp5ISHoR@k5)W4u
z)Y45e^jCh|$vNTNW$*E9Oxo)65!Hoze?X(*t?R4?$MTEkE#|SQJI9}qtph2moB0Zz
zoj(mex4p0OX7|a*H+^RjZAo6HT`}Sc1jc1P@NZb?ppl0+ePXLl;=!q}{P<>UH3oXn
zpYi<obz8<5Sqcp_P{MQJdH_prK(6k~QxMO3gb?`Z4&O?;jzdP;j`zxy_f=n>YV!it
zi5maFJpO2xLgem!uk$~=!Q~D+&5GK{vAgy9G#q_yD5dXIdt0d|YW+^=Y1UN%#$+y6
zR<X)8*wYWY?fzj9G2~b)1bT{3eM$4~lSPK~R>?wLmW{Z6NMog%m9z!ooa0RSxTLxk
zR(Y~0qkf#X>%gtpHtNnxZr*z7-<U6U#-9!Bh7csq8&bJIa`@iwSp>&tQ^RnQYs-8)
z6$Ml9NQskqOIXk&<simDxz%kzBoo=uo7fv6U}-s1Qbif?gO*M^myd>5U3iHV(I+{6
zI*e;M8X5jQ1h~mFgz(%AWlV?DWH-hn0}P8VDh@jxUJT?jlld0ftldG(x@EaXaJ8uL
zd(=~IWq-Rxdv+sd=RRoYYClMJ@BZ7S%!iia;2SN*H%^pab^5(TentzUO+UyeMxHa{
z$o|<_UAP);NEF<fwOALc1s#EpyiH8pgm1pZbDbHl4h84E9AQU)MN6Tb5@>v;_{@1a
z_F7aB-WzmOPtSOO!OUH(2`7sfI2MM(PtWTup1l`U#I;jTgXOQzFEx~njF4(cRRmiP
zs55c>1xNd{=62q=06OjL3~ObaD9x0cM7muS_S9kL)cSaXXjmMVP!mXWHxrj!rFwOj
z`~oQY?DejC&+K`Ei7|(*$*{T<G4tcwA*A+Yhd}+aV#ltp)A>@5C5uc7-EtqeKjwOK
zKwRiOsw97=^42a&w~{5}%{JKzj<D)lR<vQ7i&er0#~hdP#8BtI4x-bsk#`h)>Xx%L
zg5C>>`%O;zw{EhDVv8R5mHQJE{KJaMHO}!0gmzJjy5Q8$E2gr@W;@s_e*G7%f9C$5
zot_}s)$1Cswe}sD<WsB63B|X`7AM!kZcp*YlGBenMCCfb)A0xQ{RY(k3<64EmiKHQ
zmwmtgCUEllsMCp|({Kw-qqGxK7xC$nu)_@w@Rp|%eGuZ2U-MILad|4wrZH_vY22;F
z6Ll8s8o8X=UvuE^J!dop(If(kYxb7$ckA$<4VI_nX#PO|=L_GH<4XSU0{CkKI?din
zuh*a{Y7rEyi~~{_+xPfH?!K*#tZvvf?D=)W`S5oWSwZ~OLszTR>hV%7WOY4hB>|%j
zC8MRILwpp5rChfd<1h4`A{#T1HIZ8#4vn;+I5JTN>4i0CYcwit2giW(!!j`{^DX5~
zSCo#<5>qHxn0}|q{xJ>w+npr%_>m|PIw@<9kNHk@!w#bmou=_YpGmGNke<Xp--ZI-
zsudCIgHy@SORmv7s6^{V1nok30t6q;h+{IkKe_#1PV~=U{p+mykDopvBBOn$5VVn7
zrll^rq)KA(V>L&^DB~Y~>md<pPu7(3-9d*Y^^68#^${Ay?15)X4cYydG4SpdyZ`Us
z`6L86c6U9OqOqA{mef2QM$$|14}W_>G7{d*AD)e*!j;@^y+ohAWq2Rg%l-gmWMOD`
z$~ot`BL%3A-u&Be`|pqV!c`4s0xYzW{_0HWisnj=#+AnRSO4C5gdo_q96l^UDG)Mc
z_T<Ub_X&R@f$;gR*K$O!c!imZ=)d0iuNUsOeBVhLqCTI?dg;JAjL?M;q`jfl5epIO
zw*bK}H|x>vIVZU)wQ=OXnK<pgwf<@1bf!1q-J3=|G~oD={>-%oD+d33l1wnQ#p#9{
zLS@*O3F;f~7|&2o1k7KjVoI}rui4bUaKXA3A*$M7+MsZZ7+Qfs^{ZWXa>~zgs8-p6
z0zu{B)7Vk<6QJ9gKiUIrj_xOrL!mlI?}Q!Zo_VK>Jro^o33&eg#IFfb6(71D*2Tr$
zK`39#oCu7aJ6Qa2`U2M^g7j&gGgLviDOnT(0!u9TO4*tf{0(q>3x7BeyC!!vtj$!H
z6;6!ggy)9m6z9mRG14$vG`k@-5&9Em6OVCX=+Q9K)dmC8OvR<qGB1z7fYZY%^V*rK
z7VTf(%CO6p-Gl{#1GHx`IT&ER8sf5O6-yVO>dafjRa4(?WH4vIY|ZeopM`vJLYE)G
z;fZzO2<ji^-Fryu$240rF~1hRcN|!f50slhFc}$|s5EHf>=V6`J@a4xIw~n(Z>#aC
z^vKmsy5?dBt#^V9cy^Y7U{ikgPX47GdoyV}0@8Kq)O>BFJ1KHsDoQVP`+rBQ|JMx~
zBCgfHL8G4WeHxJQW{0W=+ONAWA!#~HmKQ+o&0*Vw4I)2DS$a)9(vKIP7^Y+k1mlR(
zKDP9??Z6L2dnnR(#v+lP2@C3T{i}O9^F9C|2QJ6};WoKnB<K=HbAL?c*)+vwd$rz}
z^)r1g5doEYJ;36Ppk;1Ur}6u{lZ12A?y|=cM>$F!ukjWNJ+*pOnVw-GT_6WCyA`LI
zy#+-s3;oz(i)#iXH@(Z#4yrg8UzAmIvUAs&1ChQ()f?>l+jdVf9V{w1e$>z1+hyrI
za4V9w39Op~r#-h@xV7WUvX-ZVBv>jHo>9%N?bqO0oou?x^?i$QK02MebIyC#-pJZ`
zx3?x6Cz6Xt4X}e#eS_cJC$F(@u?;g>w8UYbWFqYE#;)S#wtSy;d22IbPfT^dlNNX8
z_cOlbY^72hVMgztA4i<xV@G0LbNZgqy94yTt?`m;cK+;Qo}Atuvvl~nEzKxfXUBEA
zvOOr`Pq>VttXUqxnDwdA5lCj+?<Zt_Z!5ChT}tpIdVJ{{>6vuxdB#>snVuHgpS9wz
z^<h5j1;;llAh{8^?3}>ybU<Sg`<lPa_W+m@Wc;R~#pk?xQSf1m#I-jZ8)eh%v>+Ks
z@q+rKG~hrjrZ36}55nEwH6SclAM;)Q)dXpGi1?VMaC0X|M#EbN5yb`~EGtW?UZW}D
zOsSq;+)H&3hcbeejDGN1-=*{m)Co-WtQ*RSb9BeE2F>Iv3lukHCN%w6!sJTls4S`P
zfL{-NwK*(Ap36E0tTZQZ&~$pgy&KT+mH!LYRVTc>t_!dk05;HtALy;zNT3<cIcD+6
z?cPqO3+QsrJn`?k(dRz>Ng&=}!AO0*?6x7<=rQHIK@2u0dQAiFkr~X(w|Jee@4x_?
zbSk;Pw3PAox4Dznl;{?_x4al(`2<I_eaN_IdDP^<Ry}mqy&$L<^)w^klv=+!eH~F_
z4PGFhJgZRL4z`K^ahd;LdWb}mXAJUBT)*U%oK&t+9V*Ep|I5cD>gs}dU`0VIF-clU
zFeA5Do4qW&Q7Mu}_V5PJh6#u5#B}%$%>d>T1K4OTzh=?+4OvqEoG#U&FSWXhArz~_
zg_?tXJXCF0i<lj*DIeF7){rS{@?AC5k){}JDo@rC;qkw_JGt%HS(IL#4Lc&$kx3Q;
zF_;n@;S$Dk`zT179~Y(*WJj`yuR+Y7(a#RUCt5OpOpx#hFdv#h_i1Di?dIi#S?N71
znH&(|7vIudLB3J+(ShJf*G7lQnu9OMYa**2u$9aWN8Y8Dy80`@-TIE~lW>JS#I7d5
z6~n5HN+-cokzkM{7ObP*G33{FTQt1tcFuf8l}yV3hsW{CAL##Jr2jou|M64YZ3lZI
zK*n*WVn$sd#T9lbTtQ&Q16f6!&9j!zbH6$07M|0`4?^<~iq?3vQ?fJOrR4@RW)7*}
zp>pnxTy-D2DjzfBt`(69kXMzf+9ECyyD#rG1DcyQK28+1h}^g>X%Io!v#FSHOHkW+
zdGBD*k)E(<&=T+exh?dHM8QKW^wc0~g`iI6$Qheq4aX=YjPqR!)SJ-tkFv?TrB)I2
zI94Uf5=ndI%s0>~Y@<^`_kx9lRRACsr54QdEi$2N5rJOOagnpLOA=-}@cQsNl~P{s
zc{-4wf<EX(tL6-SdY}{B(r;a|AwT@$B6-X^p3FO1(5A<&qt}+tiYZf3cUsihYK1#}
znoys+x8KdpzJy^_SCr%64nCnu)63<+XPCxkrRT}3{c^1K940LmZ#M%O*V19?da`pS
z;%dHXv-cd`SN0MzZ~K(#L9GHZ?_R8cKKPB6)%qScF@Ne%4*szgkk_<?BEmqwczdru
zK23g&I_b3r7f7>$I+;PMA_dbZCQMWlS3#BWw*wm{fX$(D+h`mw`SicfUw<nkA?K)>
z^j39);cz0k;gj`~Ho?}9*BjTFudTi)D2Vv}lD+ds6uS`W%VA}qp9+K3xBDpzcgU-{
z*rEcyC`G7TqI^hp!PtiWiBb6YJInptB>!bmUB$gp)9GQLzM&6fIZ1K)q-8lRVABUX
zwEOM^tN7xV3q-_MZ!-%9hFDuA?XtM~f6i~h3vM!zIfK;&=J47jK^2Jy>XR+B&ft2^
z%T=xx8KWP0#na@<`|m*X$BlCPRm`z$k~`{Oxq9qBJ~xy3#5PhgT)yZj#ny=i+K<;}
zzNy%K8C;t~)1vcIVI~M@9r$3<^7=k1&e~?#a00|5pCf78OCt0Z>?Bar8;tjo5tp6h
z+RUU|`)X9RVUaLcONM&Lx(O;p1>JlIVNo4fwf4JV?ebvx$iyX+;h>WixHOEMoC~S;
z*YRuFb>V1tsb{^6oSeRMU~f+k-^ho{X1u9s0!?UP1jcdFLR$h}1zhpmrs%YtB`?$H
z@;-67j~HtCbDoj)n)^FA^qb`q+wIrmez1MrgfFC4ngS7dDTrI={3Gj`2mbOr+b}PI
zn8Pv<O+2Czh{#1rDw<6{J-)tges-`e<58TpcHprDUpf%cpCF}RR(PRceF=vz3li44
zhqYg6O3uxU0YI=YNCs^10ozbgo%&>R1!Mf6IK6+sb(@pCvp5_2#Elm2d=xg30p*rw
zaqX>7Y@?OTi3p(9eFAk7;1v|T%+?{=s64Y5LB_1`1bsEF&i{mJ*#eRM&S~l^usTh2
zboJ5I+r9?O_~aIXrJ;Oq@a#BiE#Y_-UbOxoK4lP*l5ZyY`M=*!{kMPQ<tg~zkv{qG
zHO{)E`Esn!;_mcqYd?X?yrj=n%Xi|4;e;dR@F+Xr6EXn$sk*>w06lldafP(qwI_8h
z*=c9H=V_>Y;7N?e<0Fl)O-MzEu5IjSk_2)b4^IpB>yeG|y(tG5Nvc4|m-kC{710*w
zdUmReFF&a!;a)*`diU*;FPze`!xA1;29@{h0_S&aC(aD=G-upQ3fZLhnHI%do~)Nj
zdYI5yD>e(JB#@8T9ea13g;pd2STnFm9Z*}%L410JH;i4l+rBvy{!mLcFI?`9YOhpN
zZl}o5v3X`#Zva;<840(<&{Mw5Sl7zSdxs}EVTBHlem;jgA*6G``_fVy5V4W>ct-VN
zyIkTEm-Ahppqhl>&EDE`v*;U%zdW~;LGa~U&@Ho881VR=BTj8<I)%V|582X}vn!U^
zQea`g;_!_(Vgas9++y8FE_P9AE7yJ=67H%Rfpb4V!*A2ZzDNoW&(7uhdE>6Dwqk_Z
z2hfRsJBgnc(d*LXfNdM@q9(!R54uE({<D4s2?nda%!3ZkrXberBN~k{4Kw=5Q8RQ0
zgT2NzFlP_C{=P40AehG9Y9+LgN4TPCx+VHEpWhWaN+6GQ4yHlg=m_xjvuDsb2M3t(
zSmo$z$VT>atG(cx?6|X@*&@kmsr<$AP9r|l<Z#9Gxy^BY5;uxFiZ5bvoaQcwPa5pH
zv$oy3Q4uUM0y~8TH+s*AP`BUUS*Z)Q?`7(0-E<MG$^?JcD2|6E)qe6&H_n<t`HUA{
zcG>S}mgs7LD2PJry|jFJGvR5oYvd;IJ6_IHOug9SzO5V6ng=@)P7mhclCC#<B1j02
z>xIJnC3Fi<S&yXavE;&tQ7_~xw-_)YkIRzvtVFb!Qf4dJK<TJPqa^Skv)AXQOUu3B
zy!#t4W*sn^+lT7{+Zo~ORZkUSCLKFET>OC7OJcqBOK3V9ghxyO$K!%$O%DVHADp>g
z!A9*B8C9J#25@#>26m?vP;XaK{+b5=aWj6sIe(r|$n4b|8b7!_@y$4<!o;MtIjw!R
zZQT6gBv-`ZEvtS+bU>hAHv5Fz!X6`i`N<ojIJw6Xov5IS5uK4GQGh@sJV|6;%ip-A
zDTgM1lmC|9_{^Porno&Ech?#NUR~xhg<{&mcG~={jxj#TZQSEk!{{2B2un-9SD&We
zEI(nvWL`JyZ|pghOdh$_d3KKca(Pm-hgG0o^ufK&^FxL8fxe3BRIt(%{TlX5%E1S#
z>jxY`X^G+~IT}$rzUp@TD<~!L?fZiZ-#1%2OBY~xjVYS7=873gXYLqZM5d(fB7`a8
z{UmogRlJG}4;4qLZq^K8Z#KnSvmMV|TeKLQhuvL2aI@F6|MuqRGoib;MY?@|<s{H?
z$wsx~EY&(_y>)r%FgZ9;T6Uz&Xa~vE`Td69m78DCC}C_wjo9GQ@;u(zx><Havx1S_
zS!&b)?}}jGM<woxbY8hN<jHG}k#x)sd;ZL&N^Zo)OSNRhzR1Hmoq|w^c;0XH{2%Jx
zJFKZ}`yal8A_4*;(xmA~5fB0C9YsYt(p#v~483;-MG&MYy%(idLseRoDm4`8O+Y%K
zLm(l^@8I0IBVpz`_xs2DJntVo56?L#d+)W@XVtyVK4K161z&TW`*1TDM=HNNFdWI1
z;l1@FWEC5_H|>9?2mi=h+2&nu{OqTgyne%>JY5QnE1vc3%*D5&K_)q2`KHcVVOb)E
zI*wV}ETy|;8dX`-NPVyz_j@Bl@Eot5yBW!x?Pu#~!&at(toW<}qRV{etVe#A$epEg
z5-wKaNtd`g6udt93TMZ>P@A$eA61r@mV?4kVP!gFKHf#v^EvkPdDCI(>@#n56_61r
zX^9i=vzP|OqOtY4E$J!5y(iNz7Hh}qo}e4R&E;yg5oQdf2zRs&%w2d+!oX}UZR8e<
zWDc}RKKpIOO!P7LTR~S@Ku0!3?A9LEJvO8-AsWFP+O6lbFZ3<-c^o-IFn5(}cio#t
zMYhzIlHNmZ48^=xQ;;JRkP6?+x^smLF`aE40h3TIBuU1st4Mk0qze&nQcRk@13QQP
z3HZNc?J@f^6f;IIX-rWq#v$IQj>d2b4se{&H)T+o=3hEomP*`qu<*~DoEo`W@8j&E
z+ceivm26$ETY`A?BFeBvWp6|$S{tc|s+3GuFdes%<1QZ7QyX8iuY>I~z;|L9N-MDU
ztj%7*b{n0min`AgtZhVYeF8q-y+zMIi_!RS<Vc*mD5O7i`AlWqW&&Kh0f+<4w!T8+
zg@oUw8b(0Zq?IyjO}SW#e1z;4CDijLJ!0w>=ODaE4*_@aL>;6Kmu5|?&s}zqNCUow
zP1hhsHN8iY?lMZq#6HgWgfBUx|F#A}+p(L>$i!nOJoo_dlN@pm_FRBN(E4U*r*L#X
z^jbg6ytDw~`(!^`M;!_t=;4vqi=9X6sGBMn%I!RqdZ~pHS(TcF?AniFDZpIw%Of{Z
z`B(<NsIf-f=Nf)X^0$or(=$s)NEZ|SO9Zrf3Arb{vfM;}NDT3n2ac(ocIlY^oyX9)
z$rxb=fWf<$RP`9H7C-J&G_NZWl5n<vvDXdB@dm;13q0}`q7Y9}4wp34F15Al?6k5g
znO4BGty9((t86k_(MkeUbOpNC)1HYB0}fgSWcfBSm$|TR`%T?f9@EjsL9o^a>#p78
zLIO^<wrBYmyCQuvC8)sB0EBe^u8F*ZoWE9EjR6C}(PG&h>Ui6D!GX@#W{<9H`DiCE
zo*5*4JR9z%bvw5hWhc?Fx+=MRN0}$(Y;41wR>$uqBuVBfx@fCT$B1IJ)=WZdC{Cv^
z7Bv^(AGW6sAzfVD^o5{VAUXFU5N%#hiUvCvwm!C*qT|ny2x^0MSp1B_jObUfF9ksQ
z$Xa2vSnl;9HC=L{0+zhpW-}<|D&Vn%hh$8<Ijt#^IL*+>Is>rS%D9lnk5&b&W&h;!
z-=h5$`9tpF8?f_h?Ey6H3jHdNTiKh@yH2oYi3c7V5p8=p%#n!Zq$s!6vYTDBHXy*l
zsCqpDw;PkJxY)FvZ{r#o;g;}!7W+V)fZ`By!9r*RvdvuiX^aFh5}HD~@d8o{gqN3c
zro$vO5~=C3&O$WQ-#lDNH>Bx`t8V_tS^kN41m-8Zz>;r19=2NnSm?rTTiePri5}O}
zW4rQKtMp4ZkTpv0g92oe6m^<<la9l>kHSP*nlfriQl%xKd%Zg9+cuU(%hMF>cX-ia
z>uivSoGeOhH6{NID9?~N<kkxT7LTlE__%3z+MGGB;Di%+?Nw_!-Ks7m$LQOu5-zkp
z8+zPLyGSNhrR5y<(&}?a8VxuJ>|SK;`d-66L}0_Rjiw}Dk@;oPt%ukDiTkv~XU-D`
z_ie8&d@5dc1i$B=?vflqjfPwJKq~u&Q4OEBWMNSkKExWHujL&oZ9(Si5O8`TZcNtB
z96gxc?H}}rnc5Pz9u=Y6qE`$=4d_|irb?D!ITyZ5?};enCf(PDV8BK8p?iCfG-|Nn
zI1QsJ5G18hfRXT-<@wKBH@uydMY-4nqg2!GylogBu)kKC&A+XiZ!Y=PRvos1?CVkT
z|4xRg($!+E?d&H6Z9HjFAhlw5Ptmlm<8OuKkU|~Rq+cFg;IkSIG38ylFZ}Qy;D|@n
z_%?Ii8l#6?(-`LlN?I94$t#$`9L(QLzAR<VQa=&x=WJUZ`@nF6yX9qRTuRg>8NVy5
z3<Q!JlJC$~M~W=<ikSQg-iNyB&NnB6XNnr%iCc#C1$CsC_C;uxVu0wnbqDhW$q*l}
zBY0&;KqPgV(aZ=ILdF&o^{60r_g7PS4LMiGB5wA=Wg#PNPRq74SVl0?c6{#9wqZYg
zp;RP*Em^Q2Bdo(}(V*DB9=ckXs-5z?TF~TtY>YN!i10^SBE4-f%i4|gMjksnvc3Pl
zkDE%Ibj{M)(#72N+WSQ+-zXhN_DwFN8V=oJxfYgVFKRxe(8y_eRORA8!;+k_t+6IH
zqA=e?<E5A^xZ*WHbk0AHZwJo1pA5ms7Ku4U#NXXfpfgV&)g&)qk<OyjdBVO7`((_$
znaleC<uiCYa?5Jq#bO3IA~y*n3+TwfL>I=&eFH{&Rl+QLqcEk(?8+F8EUNm90Ev%$
z5p|oFZjoh65*PQ2fB{pq+-LxJc0aBR4H-MKu*j%mj+$Z3qi1YWoTdbuaT!4h$YJga
zPC@lzQ?QPAYzEmf*Be#~Q}h!4)^Om2BH#Tb?b#$CS=hM0ub28@Kq&S4#{Gq8(<9DG
z<*QZtS5}`YqDT_a7NfRb_9TJ5u)41Mx!$fOMTp0<p0%Ta>0feQSsy&Kkb*O&QxsB(
z-ej~YcH5Npselb%Y>FJ80W3`Tdnv+qY_g7e$~U=P6b&6NL+Y}g4ao9Ly@tRrI_Y7R
zdT;c##od5#;loCp&W^lZ=8Zuh5@><sY*gw(;^JeP)zUJy7v~99-=iJq#U^XFp^*_+
z77j}y;p$QH9PIfEV>(9EV8DTTBt43KAj$bQY3X7r-4;ly`7%B0Ul9DadO@QSuqJ<b
zP9%Oy(tYY>7nGb$z>J2%+g^Emch-e34oEc%QeI+x!Um&KqG}p$=K3GLTwC76HYX`5
zA5_vLr;!e+`h5rzjINXeeAa`>bYBr6Yu#h2?;KO@3fbsU#J-&MV-NY&`G&ht_j0BV
zQ>3d(5m__R9Y`4E1wDC&K#(cM)nPNPW>JkpWt&Y*%7ad4)9~}zQnh53%DWx!jaGfs
zoRmL+_2lN4&&&zT9!feCko)t`>e6m~3+i*!R33+5N?;-_pH#VEm`|+)f)D!H#DaYP
zfd%}J+YdN{cS$47_i{fGKABYQIJA-(gAn3_JgK#&jEK4Xn#-u`T+(CcqhW8Kp1qt{
z`X}tfW^awUtw&ymWK&X<bT(79p<|5sHRKla_87?ohVF?^0q}o>$c>3n^<y$rZ2-=u
zunWW9jWv5qh`li&!lLLaPC;i=Egr%wu_N17lV2nw-yp{s9@Ee^fEaY{j3BEz)adyn
z4o$IeC7ev56##A29<Ro}Lqt{Y@K^zQmV+4z5aqYm2*Wdu6+mHN<(6%XuUnk$qHRQ$
z%9OC_aTVzw13~kwJhz~>i1>UXG}^gknPs`$X?8x7)^X7uNTaUSUTba8o_g0_Mdw&g
z_RU~NL<C}UL)Y*2&~CqPm15_)SqyZ(xxZdagw2pBdl9Q*nyO2r*a!Pv55Ugr)7iXn
zh*_U}YrSwb^k}ztk37(Gbm$E+Gs<yJf^l1zde-LK>)pu)@~}0jM^v8`{1n=DFg`J^
zt48MYi=Af(sy1LD0vx-aJl>!aqtWscQW6xM>p2?>YaN<mW^3c-By?Lo@u_zAlK1?y
z9`NUzxIOve?wd4JlJbG1Qm_9Z@>|M-@A1t)Wl2WTZ)alzZez*ap#nUY`&WpBn)Pf2
zA}D|WavW<!3$8kvrup2Ek!H)6h6;Z_HSTn4Q`>A!6Nv4t%qI@~ilo$u9!oAh-XB=E
zH0>_A`|fJ1-^Eq1X?NTquY+rmeK%vmQxxDQt~mM&Hha6<E?_F#(Abc@E=6&;B->ET
z=S^qpT+;a=;RUI0VaYeu#JPD3I9sQwSJPXQxQs#rT+Kh(H^brPC~H0zm#pU4>jTQl
ziIiskw5y!A+lJcEyOwr!9Z4RuoP2C5R6?!DD#f}AZ^wy;FuR(z`FW$co`y1Mz;u)i
zgj<yZM)U=%K$mhjvP5NIES0Ys`T`PdmP!uRqb(E|+P)nnIghqIE}s2k^LpXhD=jkB
zb3}EV>r;&2Lj@Rvqg1^(Izq=jRG|KHgV0d9nXh5d^8FQ?!@C!boCQY$QF{$jOl@F4
zj-i;FY8=nsZw=~{hVFGjB&oo9yyo|dO2hZw=;)gkYejq(d37m3ZB{|l=+Ie$Z{44L
z$Yt8@g{xPQ^{qlv5Pjixm~?&mu^Ccn?rbj+chU*VUYxDb*$5SoV9ISx!mJI|W<8GX
zG*u#eG(R6d*8^)tqILYQfW0sSh|jBL-TH*HAvCa(%TZB0l2=xPO}V()b=R{YIn80-
zrb@o*Z$R>To-#%A{#0{z{)ak8_DE$qSX9YeH{=#a^1a)KZ94wfhjuMJw2RpLzE0?H
zhYINTyCdu^pE*K-B1{iEptFmB&bEhT$RPQKUTh%wD`lM)5N~K|ccklt{BqE{sYV>v
zm(^ERzvq<a6$Jy}161_U7zsDIns+@Pf~V=ESR0|B0T;@DVBG>(sUC7a;XH%iPdvGd
zzZlt;q!cB5tyPVoz<`1->v2ek>WXhYd*Wg-B!@bqY`&l7q3gRcF2;#VV6P7i8l;hQ
zxouwh><(Xd_74&b9k3cj41mSHW|u`hrmne-zk;UEDi@r^WM$=6LgJ*jmnZUzL{$ge
zfYS2nKqtJgjNrkC^j37XjuiE3MVF4k9A=gDOIPMEwbo(r&B&$SmiWZ9n<JymltS3%
zvnAFao6Wa%RzokjOByoL<ic&Y>ewCGSP<EPsoPF^6LP@H1Q>mkioBh!01lu>1pe+W
zpzA>biRAot-`vsNj((GnoJH}{tU-0<u)UzNX8;$6CRx85_yi5S@d%GC0t}boBmbuz
zMKUl&DFT0IHBjcuYySY(G}HmW$NIoK>AR(~myH;}@?kW}%P+&RJVWu#NrqKvH?pUO
zQR(Efk)}$LP3+U*;R6cG_ORvMoD>}*U%{dHHoZcB-YT?o(QupSpaKBTZ~7ak0See_
z%#lQtvX+_WjVf>5BJdL%sR@haLnF#nhXoB$Sv}DFtd9EA`-XBbd->l!h;Dqn2>F6Y
zH${KYiPq<fc^s?tz|$!qTCUJN3zfERUI~=SYsWr-!-sct^Ib{bS`X)KAam!Kr|H4d
zSEH2(Ip)ooeLGO&|66QuSNf^EYsT&y_#;3CLR|-w_n73MAFv%dD&c$2fQ+->Mm#0?
zAj;1hW6Z*>Ymp~8KR3U}EoBH>Ra*4)FS2Y#rdIGhl;bAHsuXdD1GPW0wxlT8vcg}1
z6QKq>1hzgxAco}NE2VlLI2j*uWBqRoW%?9^^Nx9NElyIalz9iTbx2;9NiCg%-Kh!l
znocK}6HyM`BQu-446cmAO!$cp4Vcyg8yuP!1`UFo(2wZZC8D9UtG)tZNon0e&BzoW
z<!UizL63Tv7ZsW+`D>}owi~lZC>VwB9YS&_753Z46z5Tnw}Et2x=7^$FJy1)VBA!K
zZODZz__zqC;7qf;qXDzb=CAhh6I`j4ESBE|ilA8WSgB4<r&~_UzVb~M_AKkZ-0~&O
zwe^%5{~w%g03nxZLT|bK+BcPuT%bbN0_ll#1xi!*12qO-_IaD7DrTn*d+P*>vJSm9
z1Cg&r=xm6qGQCVU)3s>*)bFdnUB^TV<bKQ92z|49E-iD1053^`<*vO3sp2hqcJk{i
zMf8`zV=4%L+oIvtq$vN^xAcrENeh+CL(d^OSyBAC!K7c>$eL%{-YMKx8uoT7;xq;7
z<-SJQk0W*$SCKm9ghsqyDp(|*SvKn-*C#JlZ-}Teg{QaMWo1vn5DynMr%Kac@`s))
zkVyb)Z)d-$<LP*{ikT_F?vF99jnc@a3uk>V=$~Z`{&*O_@&2Y^J2k06H#HRdNw+ke
zUVDM#dn9h~wQg{{^u}c(tWZm6%Ph)HM*`I}-v;;`9ufSbA|c=-x`n?2g=oI>2Vyz2
z>rX{ag*8b=8O4XWBm8`I*L#=;^VL>7>z7#$HA-#&wb0`cv3u9Apkot%<&ZvW^a~2J
z+v?0ehgRY4&DD#HQnVLUUFG$A+g7y5rvrnub>yePI&``rW5Ubcl7n9U!||7D3%aj!
zOGZh0&b>IUhyP<QNPp4l+MR#?_>V*x^8>-?gSWZBQDOfBp8j}P`c=m4wf|+Me@;yk
zD?Og)wW|30viS=?!dgQFekHFIN?TmfK_yW(*8S4}>iY;Q+*j{pV_JKD=US`G*PX4b
zfh#Vv=`R=M7S;c!p>pzofQ;R14kY-UOd6Sh2wwV5)xTqpbSi!t0>i-l#G1=c4P_nL
z7NjcNTjw$6Sy+wrEcSi*e0GdZN_eqDZybEO^2Fl^ltmOLye${L4{yv9Fu7&?R!f!E
zhB4xAA)v(CJ6wyJimO`2GN>y**u*N_D7tyM(SFcNuI1vC9#|iKnqJl_fT$2(Y**TM
zRB2EMvoIzY!oSkA359KW1YQ3-w5{B57zmEHc~O{gWP9WC6J>tdQJ2{j_7;7N`D5Qb
zKv!@;nqURsiFJ&D&*(dzRFgl}j3rs$$9?P%%qZRuNJD#LvWlINrQtS|u~%9`XCfO{
zh`pZJ6x05Jvj0r+@hr7f;@2IapQl^K33%4Pk#09JaGqL{x1Nr0L=#s!?E?a((>tK7
z?*Yyb`khfq|Ia*s^H12ltHp2BE||a`B1uQvR@h}OS)Zog=9vC*23Lu{!^<CoXPgT_
z7QZpwi@9<ja4IxV6#a#eBYHiq=IYPXQZ-o`r1c2mC2xD6cM37Ai2!YKZZaU!Q@s3R
z8UL0{o~H4d_VF;q`(vkvNd|^#casH?{cISTppOY9$3&TY2Fv3LJ;C6ZH^XQbfuHfl
zSO?FnIy7`pJqW*)7k5tMC_-Rc^|8>G`9H16A9y->C18dqtet~HONI7U?LO|D0dg!?
z@0hDiDpUWb5#p3I|2=FFHxE)%qbTjRkE_Hf)rwY-e%kS^CcPvGhoz_16!y?qqcBfL
z!u;NX=xH8DAM@Bn2t<raFD@HtGCzJk7FEajcGpvStH^N-oE5hknx6rO$0>mPJN{lj
zTb09|BjMh$sQS8H*m2p0yE?#uG#TA*Sz&*>H58A_0iMLOfO*TQUDi9rHk@%k-oOv4
zJeK%_{9>_FbNsoHeu}9$5`RSerQ!Q9;kN(6<-kYgORq9+huGyPoYJ?Gqx^zrc1^XD
zgQMkQ$@xO=CEAxew9jx!lQy0u24HxA=%vK{F%D_}3gD%r@uG|c{6J{6kC&p#6CT`^
z0Uh6502uF1OGdN(9SHy6;2t@R4JBn^?w7WSyi+rU+ysXCby~vtr^C2j%KOf!m3wwN
zxXbw%NM)7WTQ1~@d*dV%6z>Lb+Sj^h))X~dL`n@*-8B|JTLt&9->7at&8(XmfxVZH
zI3tm*sqv{t8~zt>eW<v+EoW?ScGlj!emynU$aV64%7h$_MGhbW08GjJ-)Fu^BNY%i
z?Y6vPdd-KYJA=n>z`+6!jpuAxO3WE~l<t6i#=S9}&g*L?yq5*5FHk*SdiS1j9eybA
zw4bFRrevU0Ph!g1(GO>qtAPid%s*=eusRnIlXA)EXhTKWI_NOrOaQaP5w8((F&7HO
zt1P@PC8MBVFfMzoml<zaY^NjLz{@sTv5o%^>HRLTk1<pL60_~0FaN8YG640-<sX>j
ziI0-j9^QA@eLYQRy%$3n3^F1@G57N0IkLoRYEQW>7vv<q3ONbG0CtXE!TY7-=KsB(
zl97M5zjJVl1q>YJ4eTFjEOdjwdoek6VyuJQn_VmF!+p3K*Q)Z7h-1!jqYD>&r?mF&
zd0@Z@9n3RAUhQb+Q@EyK0wg^*w_fzKfpCq}@rnZEt^-K#pOB~iJ?=lr-;+2!$(ZlU
z{Qtq+G*!|jBN8sS?Eg-vv>9Mm`CCVI|3|WaA1qo6@K{;(mAC%MZvQFf_tF4j3wLy*
z!cm@651tY@F#E?WN&xuQ_Qk}fJl9`H=@jk!4}c<pSxV~jp9=WYPl4_#>*H8^On<fU
z|HQR$@>d!CG?tb>^&Ni&XGOhZx4b`8bK3Ods?A^U@&7Y%Te$^9#4TqQx@wiH3>=+e
zlMK;ARobYBXMQgEk0Zn>JR6Oh3lx&%5y^-~2-Cz1T)qUHBd!oQ@KHXU6`ev(LO&jH
ztyZ9B=QJZ}qT^AFo5L2)Xdk~F`}Pm(xrINMPm<2pP2Jq*=@Q#ycNH=#LoBVI;EeXK
zfEnV73(%L0xYz$mSl7uDhD2qZIv#%A#hp9J5Wwre^sf}Bt>0e&6qjuNY9J(BXppk_
z2OK&*oRt><IVJy>im(1J%F;Ul1b|5>OajASk^BQ&!ZM}36^36t;0%oG)mx%@G(9Ft
zc~@V-(pE`!lmpcHkl}&nN{ze9@SvJiA<A<FEB*z2DdiN#c^2^Yutw&sJ{{+N$|@L9
z3J<o$!~%8nEWo?UfWpm9Y7h#wX(gP=!sjMY$Yr7%zZoWXl_4|N-XFWA<jqLrZiji`
zVLA*VK*b5F;WfT4hmo?~dVk~QnjX1D1?*)ASutcf?w#w#oJr?YTcd4q!xc9ddk$CH
zp~e2?w&zV2sh1Yq8L1XrxyW`z$>8bw_D1d_1La&cV<-oY$Ef0&PqtDQYQ3vS!D}{i
z&j%td$in>WgnjmxP25nkJjJ$dH!kblwTI+l4U!S!^o&!Oxy_GNOuHMFD4{t&?bRZO
z<~6>?ZCbWq7QP6Qf$@dFo}olE;zoCM=?I-$i+80T0*!WsS0y)9p;aOd)E3-s5wrC`
z^ROaGlMrD^vK<sGc73qQceQE2ajngLf_8n<3v?r>A#}&3LXj-@rjI8S)mU<!GlEHS
zb+@-q6m9II;M2+&hLFL9>v=_H05cTJc8Q!nk@|&;58Lsm8TRSUm_}~Yak9GF5K}X4
ziOnpDERqVxxv52UO!UYgS35o|wtNfIZ}gYi?n`)IRtkmJ^~}#ua!xTUxHD+=39&S0
zB}FMHhlTJ;O>B-jpmIv*Sy?n(A_8KVoiSzAfw9gom>9jD{#a92#B=oC=TXbO>U!Uu
z!I?I?hRgd1cn@5iD<O)bN9<eoOr8j=+qVCT_LaoZh74#)g0F=QD$Zdwr?8QW4tgnC
zdw+4CcEi}9jWwlaDRw4>#h&I>@9R`;dDf&sNFFrD+sCi23YCuWJD8%t((LcAe-<pQ
zfm)3{+G0FX(~;cyHX%4*YN}YEU`bCMkzUP_^5H;WCg(u0$P*d#3tS~ML>VUbP3es3
zyjZ)`pql5`Wu}L!>?Q}eXl4xqbehn)P%-8wTFHS=M47oC*RxqJ+rUjPxmpow<{1z}
z<FNdwuqmF&!$?tFM1VE7jxCEfhyz{>n!)Gw%&hOFwVOI<$xI3^k2j#|(K{MTxXQOX
zlKn|-oR1nRN-uy)*)#%|=6O=SqU8NBKYQJ;I%oApDkli$9FH-;b}$?1qh@mSl~{^J
zo%>8S!Z>^A^^%;r)2&+)<`(LCZYDO|_Hpd?HjC=mzlH+74NDw{-M|FqT%%*MdbJQY
z5#KG$>w^gKzp<)ym=ecOaXPEKjK>u{%VBW#C+NB;eAS`-5mS+fNOqvx=dih^ow+C1
zxtT1~WLMI)x)b^YOZhd{7xRhzSyO6v!<*!l*_m#zNY?qyr#EXF#l;3YS4XCzo^Ezf
zZZK#$$sbA|+XO@Ru3#Dc%(vp=CFA8|)Xx<nkRM+}sduoYY4wQM_Q7I;ovL9vF4i9H
zGaFaF{|eBY`@*g>oSiaNI62BMc+rMH5FCx6s+IolzEIINK(=a{igCnEW@NIQE9+jc
z*@C3h&2&#E{0`ce+_V>0*C*-LN4;!E>MU&&h6KgBmUXUZQ;YC*SurJqMQJgLumL-Z
z@Hv0J41a6OzKXX^p1g-FrA5=OMjj61WKBVnWX+lqZh`n9*}!bTda!jv^L|atF0|G1
zpKV_pJ~CWd9*-9?NEr6Cdhc*3cI@tE58~=0pNk3kU$>d66<k}Hx)LoBhbX<_WGX(~
zwSRbKEcadl-`o>!A=`ts-wr(BDNNc0%T}p|j>ud@F|oT%L(R0kJ8a0Kj-_i|=kl>(
zq<CaVx!`gue`zNLpE{Vy{%HGZP$w!nghRCTW9uYY5GoC0h+XViSL(AhF-V-hk4A77
z_p2EizFR9s@905M4Vz05<Bjf&<D>ouBOkcKIQLT2ruZkId&td^o^i6)s*P!Y*mJm`
zF@AI0UVp>AL;RF@e(+B~SP4{<yuGS|OQnw5X4*lB3rp^c>eJ`B4fd-XR^F^|8i+US
zlq9G+n15y^QjFxxuA3Q&Y;6w2KhtV(+jssqbicB<f;Mml(==f*7d%@PYYEZ0AY^}J
zdeDc21)DC3d93BvM+TzI@S0W9ky-}b^9T;D3yIUNI>MIibdBLEehbFw+u<)sQQfaD
zS#&5EL4+o1wh9t#wm<Z0uZehAjbrv^X5domDw`gM<Qt^HJ>pwmAr+3TvkFa<`V9<j
z&G*8#ZW7m!M#|lX6(%%F=$8;qM<8zKSGBEr`7EZ+lyG4wdJLU(cfHWRv9kAB&ZjIQ
z*qBI!9gG(no2DK)Li;nv1<{u89VabkCs!m!xFncX-6k1Q8z>BYcbQ0aDp^^IQtG~%
zUt`G=uSE1=l0^6S_cWOEvIMO!oC>sgUgMv`AR|J2SQeB|d2yTohV*<^R#>U{0VecY
zj3p^`6WI_J41AoBa~M3{z}BSAR8#k>2#bJnp^J|#T=uR!4?DNDYiylU>l^Lh52?W#
z^vKrhOvFOP*tOrj#cV%de>A@X_7f`k2+0wjfTn8wYuP6C5xJ4zToz_fyNsaUvwts+
zEjOlmltMdBPK!(&{cPL8;8XT>*cKDlA7=riK-c_9UWB*xJaX<S7Pi%E|CZDy`G}mG
zENfM181t>fyR!*B?(DQKC;HB&H3X8T+K2I0IZ{8s)Tod?RA#rYFhQUlCH+B+E7Ovq
zJ@u8nkI1P{MP7;zj${3;k{Z^Z*apXd+BD?CCrZxk8#So?AOb!@+HALJBgAq+JAHn4
zjs#gyF)Hnbzy@~BDwV26&ywUtcIrHzO34d}s<{XtgrMV`9hFWplMnqI=Tb7a2zo^l
ztWK>}a=u)WRe7Rpi<8o}JfI@9b8i*HzPk%&a#%|E03#!=iJ-X%-t7t1wbV-4j_5Wj
zO}ZInTeq-GSC%<(HvP-e=g~!t3Z=rVi4TWRV1kcm6(c6W4Nnhg;mN}5mgy2#ErM}f
z7mx07W;G<XT5~F8IH8F=&+*qMk(1X-C0Q6BD}+i^pmS{An_`!JS2;}i#;Gs7sH7Te
zidyC*Xzo|m>&_Y~vm2H$f$uYE^Oq|6um=Kxm|EXDfeHpZsHs;L?(QZ<c^v8p(o-qx
zKOzS|{)%+P(l=0OvmI^)aR<lPY&ZARHYQ!U<RkAq(cmv*z4O&lArt=9?XdNcGY5r+
z-PGpuq-Yah2wUum=(32HOPlS?zLHTS=axnq^eE*otp5G0SbxXFo7#(_|D8~2;|{_s
zO=&^6pTp<XsnPQ6H{C#d#%byqu-+iyIr9sIWgzm{nw&9Rb(>-kTr=dmD)^<xmJWJF
zdqx;6U2e{&eB~W9WA$CA<{e~cy$QMmb*_T$i6y$kyQ!VnP&8w7BEtO4K3#peAHw(A
z;Om%@p-~gRiuSm;yI{e~78TvZ&SGS=7dPOmqzIc2ZqhdIQqygadhdbI=~>A~J{|wI
zQ|qVD1&RXq8oYn1)|7bT`=zDUEicm=d?|gwAo#B=pDt*|bqhn@v=61kYI9sM$J0kh
zybcNM9Tem{4na{UHw2S@Z{?deO7e}u?9n!AeK#l)hAYJ>2r1?0C`pp8s-EXE(Q13c
zQ@YbDJZ+9n5AG7ac5_Ko{4&w=Bn#ns;Ixj~iULJmNu|oTFI754^}fY(PfjI83FnV%
z){UUsvj1K0F6QYP`)}~c(FwTU0Kb3htv+hF*KMk(PgBqp*-k!MW`=FGvHFajl+c%v
ze_;(?rs=H${OJf3n+R4S<n)|>9^AmiEncJ}e_&Ojc0L#;%5^i<&qhZW{+S2r?3pkL
zd2d>oG}GhhHJ%cBORPpnyZM9?{B9(-0gd@q`{L3m>v5tC^6@MsjnY$jBZzxfNlkrr
zxoHvn`a@N@XIXwbe1*v_RXJHPB<RJZC#i#2-AHT~Ts(`hiPXDm=&r1=2?l>=_kQV$
z5sUfAnG1c_gzlL8+Y&0sT!8bOa-{Odj`ULX=>7k`9OISqyO$9{EiLQIqmOtat5ULJ
zLm!9)G?=Q~uh5B8<m-3jJl9R=QlShB5fuC~C@N-9wSE1%ykfh)FRT5&XOM(1YC9?P
z)XvBGZ@~2JcgRm~CgL#XEu|IMx|HTkB~KYv2;H@hPiY2U9(CiXDn#W!8_y1}Sm~%q
zb&q$GJsPD(KA3R7DVo##TAN{yK!7E84L(X+wdQ@(W(=*Wa~yIsm((^*M94IPBMy6C
zsgEX_0Wh0-h33bZ6A+!p1LMcoPHKstpbCgHR0Hbk#W*8iD>??`bR4}?{ggFh7MlFy
z1E@73E|m@Mve^T5Z$`ptueCYKtq>}Yc9RVvg`Co&1UGvF&4rLu_vdYH6O4jzPhr3j
zzkXM13BF&;G7ypf;<;+xi+S1wmUEgemrpNni5l33zjOLJ&K1{-BLnC)BDU-qOZS@`
zwnImW@n6bH-L;oq5?;wJbDgf~U-$&OKM*PUUgD9Eh5JoHS(r$*DYCYvI<T}9J@?K8
zmK16>Dy==<Wpg7ZSA#XrW*U0!@=M=LRV$GZKP^<_v++us%XbxYDv#h>P_Apm#^ttH
z<g4{#Pd3yKqQm7Mu*(3HY%=ZB+fS6JKWb15yYCHAa%I9d^%#(Mg(174_aQV?68?J;
z)I(!JyZi7DTQbgeB_Ha8PAZmhDDgKIm7B?qrdFITfoZdtBv-92(YJ5l5$I%sX1C_Y
zFQyvSd&m0yc+#}63FNCK>$#+@L1PI6jt_<XFa-%m{dD~aFlJpFnjspQR5Rf=vUJZU
ziZrwdicwJy8{RL}o5&(^l7jTa;Y*=zqrB?RrcB*OXp1dkS;&>Z$rKiq+y#dQ*4gpV
z)O9W&4+Yo#czpc2H%MQMp3bxXIz&jgjki_pbjyxCa3kW|)45uP!r4^@0R&eSl_VAH
z`W<uT5#&kp5OdkXY1o&N(@D`jolaTzpaaNvOTRqs2awmJbF>m#1+FGLV)NqNZ5b35
z3_JvN5>)t3^&6d^_FLm70A3;VT<;eO1X4RI>1ZYIp&-~KcKnQK_!zqi6?>|4(>lQu
zWg0ooq@o00t^CzbumfT#QoDG2@%oK+Ai`mPJ>SJrN-`{gsh;mHumg$<K#EGX@jm-k
zEn1p{Ky^=pK-@T?_XscBC~547Pe29-3>kO75)>{T<>}ls1UVXqopG#;o!r7oincag
zi8wim;*(<^Is-{{*D#n%d}%}>s+XhPyn~m^<)sNEP_?tYXH=$D$@Oc|vXl^t1*41o
z1zeReo~fNpKrup_zuvM_fD-ZmpXm8es5%@z`G~xbN*LU}dX70IW~{R68(ZEtmgp4W
zjYgigwto5GK^RbrnaS!}Vb9-^8MnLA%Vxiq0%YaC*sh(B#PJ$g-vFt7h{teO;>i`q
z5bi#ooj0#_9lPl_+d8NqG-U12bkfoW$Z<N1=agz0=mXld8A`bk62j)p5fMx9z0yD#
zty^ZkI>pwwP)}yrG_ycrbV01PTBDg;bS>57&M-OU*0!|#M~}fr8)RIGfv?c@-i<)w
zpgskh9<2iR%$oH9+^J-jm60kr?3`AQ1b0gLx4lsZtY2l|D=|SMhJFr?`RvMUiMnr_
z?$1kxbmwP6;l%OK>s>ZX37^Y+v90f{VKQgN_IKfQ@6z4A$66;8_B*wT{Ku6ectj#j
zBhlycU%ZcZD<6M^<B)7}Uc6?GZ3eJ<+PlIwL+mq)k?5UwJ_`WdmDMiKW7b|p{W^A=
z6AW0GnWQ#s&KngX7X+L{s0gLe0uf17Xrj~d<4=Js^{bwbEhVIvSe)2QGQ->el2U^{
zq2jbCt*5l5Wsb0Cl)39RspN)^&9zNM`}Zl^)UE>gv*@mNQ;Y18-Tg+Td>K7BbLlRn
zcOzohw`^9m#z@M2E&X}+KxCn?VD8+j<4VN&wk}`876=eS!9sQ1m7mJ%eONd$hhEm|
zk}QPwY>3vqIo&*xF^a!ETx7eio6LmcaCxMGK9c?`M5Qzr>B|`kbHiw}15U(a#_~iy
zJFlgPrTiz-wTlvNUoWZJT6v4*0h}1Q;JWA4wUBh;IeE67nmBExqW7M2@r^Ygsp1t6
z?7(m^V0>=Ui|%CoV%yJgb&<<kLD+H`_Pm0lJ<c~WI3nSEwAOmU_y-&5@&2CU+?=Th
zJKVqi-bb?D9yCI~SnDwMk<vJ>M_BM^cFUh($fow1%_Pd>A!j{cVW_E8#5m{$SHBea
zWxq@qfK*87=F%YLGoPh{tEcH1Cmp?o#Us=WEnIIB#TOsVz2huVi?-R{TVJWx=ZAlO
z<x@V7DV`>)w#Rr0(ra7kYLhDj3{ck>Ky%>T-i)N__1NnEoKw4b2|ReQ&@1MRij((n
z()@rZ>H6=TFLEPnCpk>>;bTb}*9>&XkeoubYM!)zP>_>VCWY~MC*s{zR?~{hdd}mM
zKz`E{Ipi;i(DB?Hk3^d*j(NNhcX6ndDES-}C8nI>Z(^`Dcc>Yyu7)j@bn(O%+$0c-
zY<eJn_3$_^l@KY6$jW*(w^&ScT=<*0-|5eiQvc0$=EbO`#)0h!T6(0xf9|(Yww($N
z`AVRKll~%vS!IDyIpn@<kBAt(l)om_+|a2*-J6MI+ViN?3AM34swW_q{l&y^G4n&=
zWK0AKw|ID-|1Dr*Eb2HH?w!oNA6$K<^@#RP^|czHyX5@U;FFw^R>GrmD<w>aVi9+d
z^mcNE&$5`qSJMtquc~Kd-Rfe(wps|LD2D86MZEKJ(6a6mR3%<Z&rIw$B9g|2Yx{=Y
zk9E2;qE<1QYC+B=fD3i#Y%LAsme@pHV?jLbbaXa~Az_%<5Hi?F+Z>4yBW-;$`QCmN
z(WpsQ(xX(s9=^3#=}%7)R~7HZDXHGyxfa1f>3C9JK%gSc2W$rIeCrB7X*)0GfdE}`
znp%NtQxiSc{xVhQrBIlwQSErrw<PHsyCVh!l->=wc|L=$(|SZSnA(r<-MiZsv92^j
z44z_$NZdJcI4-HoTDJurY#h|hpv<iCBhw}KiXC7>;DV9{wc?$IDbNg<%wPC8gb)UA
ztu}5avIW=0N_L>6%FtB~13|DB<|I^SlM=-DSk>CxrpS9h$kzAB&X=58a_s4k<&SiB
zc1eKhG?d=WRcg?GsgFU5aPfI{j1ck5TqX%)soLmEE^2RjLPss1m%p6>Qr$Up0mfTe
z=r~r`2=Tw5vfdyhrqO}{l=V5*XGvxTztfMsq|wO$yO*VDIyOrrlzqF_?9oV-ls{#>
z8eRIy00`I?x44vtfw~9`-0a3J-Q>xA1zHkQ2lT8y<ma@?@N-bTWU4jUTbrhPlh|}T
zPH!L{ugO;E!J9d(lKRW(#SP$($*$2l=@QTnbJGDcFLvvtjNi=1cSc7T0?T^8FclMF
zyt6Ae9p-K0OE{<+E1#vZqNO}uJ`-@Txl8C$*|V-fpV%iUr3l1EV?=NhOIX~y>m|RH
z*<yp<dBVncYtv#5UPWmxG;4|gVRCTK5Uq(3tj8m55B9~EZZ?#CP<xlAJS6OsG^n_|
zaiYh3OD`@6u=_HyofX1}RJ5FU^1k?G!B)LUDfVoXK)LT7ELy&uV|>4uOy8`m9Vl`}
zdbA|EaI2N|r={6CPJA%2L@Z#&Cac{qROIwr+(K>DxTm=d;;S!)g4j>u(Ms|c&OH8U
z30|BxTX{?aAbQ8WPS+doMRD9&6298q&u7N%iawyNuH+1g0OoMqD#$9S!P-1}uXba#
zG(1J4eqCD+z5q$B7TQ)zd5MGctE$uiD`42ryvsQjoI=_PS1)yFd?bdxMP=f>*K%uL
zq}f@V0P3VY_}!ZX<X;7PBseC_hflt$sxE)r$9R*flyUjKzJz&}6Y_LUb}G8bDFIAR
zzV`-5<OHu2O#}MAcag~*S-t1~`J$vjXUA4<7u-2%q5c)%NcpfZ@AycY!RL2pD4P;r
z0KI?zJ}%|Yr6lO#@r^AA?LKwiv$0+}OgY`ZVC(|a03Iyn3Y}QwMJqj^5GD_%F?=e2
zUQ5M#jP8l)NzE~)$JZmWohfERtIJqA0QGk2b*~jV5{caC)2s<I$T0r~+QT)@NxJ{$
zYKd!vmP)>+yciBQGRX1D#i#^k`Fl?HYV$k**4=u(pXcPVGHCIE=&Tafxx!wlZ+&V%
z1t-Bf{;le*UabbRWE^Y(otHp6?2eW_v_|zto#{Kya<2|!(Ni79zAb4EO*JaPB~_y&
zO&+F;Ds*nQ*iNVAH1z-!`sl6%oScQfQd;kRw!nq7j7v`k)sv8{N%S~Uzzfh90u7!m
z=XsugL-hqUC?%A7J?SJ8KAxqpN;KXP0O2t2_l90mh=*&X9l%}I>pxbX0~RPYpStZ#
zQt<sMoq@q?K&{xXT{y`}<C{RoOxW``ye9&!-=w37fgYs-mA)1BL0kG$dD@@Q`wy%0
z?gLx;T=HWlZv_zEQnfvfa}_t~z{rzR|D|fO9(R*43pL_2hDq1sZ+~yYt)C26`KuqE
z<Y)pBo~`=Kub}HcUT#%4UX^Vu!0aFx4(MU+!idY%2^4<yh#Y}PDiwpot9ml-2%}1=
z>i0*X@4P~r-XB?9v?3htMxh-*e2OjfCm3X$@zG2rg~D%h&V8RCp1YP^9J>a+GV+lz
zfc4@MXV8Y=Cw@g77${l;nSeTSzt-ai5w38(6R1Bhy1=w1rII;i$tM|*i9a>C9kD4^
zKfEX8quI$e7Ee8NG}T*Qakckf0S6#jC?9t^LXuFOq}Pq;kAVVMDLodKwLyq@sSg!$
zmX>;d$!HotKR25qPT1#<5Al)F>O=%l>JLO;_?>Ol`8pD(-<bH^KB(uNSY6IMHhgO-
z=sJ&lz(Fdlc501V%-f|u0cL!c`0~q{Z9?B)SOJ*?I9_}KrCX|TPxTvIaTKmpKQR%Y
z66Sy^cktN!c-dGHpE);-J@-?q^4D-KCc;viLljT-JI}Rq0+#|Hmu>`U^2(VH>X2B{
z|3T_6coa%k>y5eO%->83?6621i{q;~$s4rjIZb;^on6FvwTj^E*uxWm1p04wp#~s&
zlw<fq0Q!It;S6;TZ&7W%s>&Te)*4V+IQ+HsPdC%MhDV_Fiw=5g{1#|(%~=4Nf9;)Z
zTP9m8@SRN%yae>p|4MXT1HVAYgr;D*^ig=xaQ0#S&TirNu1_48hLr-=VQbIb?N3sU
z#l&+9kKwuRoRcBjpVI<vE?^9wl)2!`)Exyp*Gt0KZGX9Tb~GV}el>$<wPm?-Z_tUS
zk72Ip5O6@u^0O|@|L7~uKi7d;*=;8=iIZ06q#--O$rHDL44*Rcmmihwgtp*t;>0)~
z^NtNrsEXxDw%~aBza5T{BkjbrVe!%?b;@gXf2jdDGy~7j=mSb=wrc$kJoE{Izpv+G
z3<bcndAkq9II#O?lZO=pfPn1rUpYa*3Fe%h^N%aQMhZ}ycB-DhKV5V92Sc<Tw>pk^
zUBOK&PIKrluJFtscdviYUw`_4LW>zE5PjK5gg^Liu=s;c+-DM;04tI+!R7wxpE3C)
zF9IOPZI)GO5B^VGmWm?B7JNOf*7zh=dF1d#7XuqBo;TILf4xgPMO*O?@SsR|JlL#Y
z?L(Zw67h*L=VFYA%KdGXtfZF11q_!SBEJY%Sp1ye0QMWbg7~GQEV!76zL(*;Wo<In
zi4m>f^7rf3=8uvTAS=1{bL1I^0=<oiZ-l0B`lSOxZuPo->ud}g_LsP<rw49`dA*zO
z=|J)i5TUM<VNPUJtAN|$&H7H-1o_5<B{xffK`xz8@$t$(pg0In18bT}iqqlfNy+~u
zrL4Jt2DW<|fhhm-_TL(v-zAn|0zmP~1Ngn4FZ1;90qyL77C235YybETSU?D;sugjj
ztzPOP6INtG@-1f3Dp?Zgd|7YKz%s%wCFe@!W|u&vo8VwktIh4@L)&GV@|g=bMUTH)
zHn0b{Rm_Bw`i7-TcOIF^wB<wT$KoV}D!O#ZiONYyp2rRKR(q>2jU$bV$O6-3O9)XL
zM~dfLrpT2l_IekCt0=iii-}b(6ca0-=OT_EeqL3gx;9E3x+PvuXHPashH+nN*pRyG
zI2<jI`gX3OI=C^cg<yOgicY`lxLDKcw##{UcxRegFFuVH)<=tUv%OHC?0;o{daJz`
zJpgM3FNut>*}rvdU(Q62D9DUIqL!ND^)&%{1!uF}+di~H1gFSrm=5%o8W0L-l&2ZL
zdJ&JU3BRYOm6(qvRzlh0d5Py1qtuR+++~#4UB|YX-tzUF-rDuxOz&?07Vm}}B#TQA
zVXEbkm*T&yNs3>p6k825P7*<s@^@_;*j%KJif0fXHWae2A*IBWJ;B&=T_U&XJ?>2V
z@g{U_u>y>Ufep$oX-54#PPcPzPr@0~Afb;+pI#?`GH#%uWPYIR0N)>D99#?bZ-dHu
z{5B<_*PP;soP+|D;o44{x!U4SH5RE8>&?zJK>b{!DRXfjdNl31?%g!CDs{1D<M6JH
zQ5QU#gvx@6F$Z+e*X!80S{{r<sN>0*>~Hs80;70KFSLxfO>=1n)eHFv2l|dXp|XrS
z-z)|$b7RMUY4c`8T3CTR<r4h1*v>WUo8Em;0GBnm=#`E@nY2lz2!Hjk&Nat4nvCK5
zUy4@0#s2)950&0Hih^0s6i>xl@!*ExX0FMGl4FC^(ZEHo{R>v(m&Lf?nL9<(7eI^t
zS6RclN<_0`^3)QXJ(gYwq&Vm<g(5Og>#*k%)5vbsi5-pm(4K@Ye%nDx$}P34qgi?!
zs13A};;xA4h;L2jB@Mj=dfFW5cDlUGzW=_YpUjQ*amJkbqm8?5TcvakMHK_LZ4Ipw
z-P{zrM8sTD5d<dKVt=}Q1JwFLol&;j=>p+tV-TjAVIGKf!EprFsjQ?9yG<67?emgS
zqJ$h)pPDs=DM(T0)bDoGH6`j?a{=CI<w0q06a%-Hd!hPP=~Oj6zU~QoaZJzSJUMrg
z=wDC*g?a`@lZ5>;XvvFs0%VnxUW2zoEj%Heq<qQ?Joqi<8A+!g5d&Xi1$7ZApZ`pa
zYC^;Nwa*9kRgB0wp4Iq`3j>BnkUP=p+ZIW{$<e~g;ai6XCN~sc63UON<yF80^GBHE
z(zow8A#=0hb?s~GpE${TA}UfZIf}xN;XHF?Sfb2IPVW)P(`E<vigDxd%UYY&z4$+J
z$k>$7MG`S-(hBnvy*&0{Kgw@05y~h!+d=;<wx@iWGN*o%Oh#UqR!**+hi`hj>6Mm1
zex+jV*Xx<DqCcxn`D0VlZ%xCe?WDFAo5F_uSbAK&S|ECOww*nMeEwUgYZc)|{#=VR
z%E^su*BiTpe6CmA%A34pe>8O17U;s1T&d_kFNG+r<C{3FEcI|)5}x-bsZKuiq9QIW
zK+v2+Z3RZsCS5&oOYurT96noM3AOJHgqK%Z9gE8}dgGce)tvnG=cB<bvHX75TvGNs
zdAH2IZfGaC^%(_DvH8<`rN4)m77b-v29*$FvVKj&lTbP0ahXQMjvy9#s~7WoYYs#*
zZAU+3&V8xYWG=5zOlG2x#av%I_#$%6Wut?^Uii~&13VLjDLm(5o#^kAfZ6tcQ3$Re
zeUY0w)r|=Bt@itM*Y3T<Wp@eMD3JVvW#ReaO!J|#U*P(Sr58XjZ~GB%G{L#%HZ#ky
zm)M)`gE3QF;Kl}hOaZom(p@q!?lPe@WmMdAwxAwoKe37)i89j~2Fs(3cL)L|+ON~-
zULhGr*bt@)?wB3BPf1O>RE-7HFRgM~l4f;$-(hLI-f_g2lvH)@0Su`x13P0xR<ANi
zK?K^787D8iJ3d1l7`IH!@!+AH7Rnwq9{T?80<s_CL*rkM!SvZ3Ht}N=z>|5F$OOr0
zdiw!bwxtMxS>kS-0#8%iHA0lPRTYzXGolPf{M*=?x`g~JKN4MMH_6`Ftqz_XJsTUL
zYl37+FBFyq8VgZ&b-%JY6p?0*cyH4CCa<5L#HQDyev8iLpa=#{@(DH0Ut-Qx;Q!50
z>HM$8hD`;W7LMJu>jmjGHHA+j&Rc+n|5dwgMEym;l$5ii@xI2rvl?gby}EkmX;yNR
zA_9??xSW|&lap(??lQfcxOn8p4O*1QEl`#KNeoD*io|17s<wu)v9>#1rL6@_1d)~5
z;SCAmkb@WQts_&nZ%r4dO;@n$L$`}ZH=nRSqbqNiWQ^kSS^}4kO%AtQNZgxq10lZ*
zlyx-Pw$86qX4=dhGSSnn_O?Q*#!|Ig&c4`qOy|EBVv}s?E3i6^>NB&oHtV}Sonm{`
zoonW#l48jjyV8SzQB5t{C-*cgSLa%;HX!;x54Tkgt|QIjK<-+KLkAkd>94pXj}9E+
z)=|BUD0;fM-0*pDH7Lq9^kFumWD6n3GodtUL|1MbQb;hTT*Yi!tF&^@{V`oX3cZS@
zo_t~k5(4eEC*+m;7BxE?PG}=&J9EPuQpviJ`+kaoE@+XtZH}LZIst}MG*~t(??d^p
z)DxJosFXHR%v+jX-OS-F!S>OWkx2Q?9kjE5Fq9EO|EiBbYk#|aJ(-x)&mwM*n>OC)
z=!KA_uNb57t$2>rhQkS)CKY8GArY2ob|np6-!Yl;Psz7F=LS~;-#*wdyC24ov`xX%
zzh@H~d$&xfa4t#EylD=*Ph3v2SZ6zN%lheEY+58uyh1FAlxN#!VDQc3k@6eX<7~RI
z^wG9R^zim_Q9o2ze|v;1ywkPz_EFkp<s>aCT?Di=Yz<iDT+MU!n)Od~N!rc%>%B*Q
z5##Qh+C#pJExaQ0>b#-ethxL_8q*%yRr{POA(RGo;H*|d>LQP|kt&jt@%>*VwjT{H
z^4u}MRrj*wM9oOz>btKQxkBMiOC{j)aB9CYb|XcbhP9WYy~R+5_>!A?aa_Wi)^kp1
z6r)5*8b@-e1n2k>s#I`3`4VsDV|3*7ml#G#SZ15ReCMU)QlwwivXC`PczfrY;QEsM
zfv-x?E<wyJTIn$7o-dCAg~`e@x>f<MI#_x6N)H)L{Hlfdr(gUhU22q|$b-P>$X54~
z_y(2Omhs39!Iw{xOONaX585`W=^w#M3d@zKQ^&sKmf65^QhwUR4xAwykFTh@Jns%0
z`O#4H%HR=`B9n9a?2bm|Y?TdP;LF`Qw9zUZKGq`h<Wh`D6)&sgm-U8)G_!rK9;BQ9
zh{RrB@oWc+5`$AnCS6arpwV8vfyHCzJMGQ!Im)j`4hsFb72>}Y=wch%_~(U+d{zSs
z>n@88Ya6B8A}1F^my7I&2?XbBVs)@{Ld{QvfTQnI5Dhj1^G4n0urA6)yZx=-GWvcY
zZFwA#qXz1YN#Y72S!{S)oOxl&#ZMjD@ibdWgz)F$mF6biMQnc!KdBF1G8tuXM&0QM
zf(SYrUn_M*Q4wE*5>4|te5UY0Ey>sBGIR>H+3oDHnEP{M=Ss2Gf*Q9)j16+{ngiA&
zB33u;_O0=1#Ot{g3e6|(1|p9J-ut*N%9K4ZF@NP{%R^VUMLma9(xR=IS=8On(2iIg
zs&2Y7y5N|&t{G$}&aJ?ks-|1!JXAe^PI6Ceq0k7j!}{)cDBRwwtmd~wQ3)<SKAA^I
z$Dd^sc!&HfujT&$m0QS^VjkPKzDK|y`EKPpd|e;5GK}#tK5QF<m*Q^g1Dlu|L<t%F
zwHv9ge@sj>EYB8<uTSw0jTO+DR=)a_a}*eYR8>?CNyymUKriLQWr~$pbtuK%NOn;L
zmC3|$=`ku~O-RhFTQ@4%4yLo(z=U?;+rRm^deo}HVN_JC$^5Py%*b`u>AS3*1!9a}
zVv;pO<c+jI4fbI>Q{)6~Suv=`Xm5rzO?OHDLX9hAHC+Yd@uWFb>s2@D42;GABPvvU
zD`!>@mE?5kq@l~u$CF7O0`)<ymZ#gGA>4^b5l(^@Xd~2w2vu)N<m-B>8h3HY<QEDJ
zuY+zrmmdML8!BzSd`n5j*8R5ZKm-Ld(o{3bG=I|7mRG>KzF~hML8$zimYJ@2iM?yP
zvTkw9+1N4AaC2aC2wU$rGA-RYvj)+s`rR!ihViuZ2Lf8Ei4xgKdmGrmBKt_$$G)|a
zRvyn|R`xl|I_PyIvfL1^aK5&0EI4Uj7i+-IbonY*<P{xP`6I{+Ay+qpJ+Pk^baS^S
zt*P%vmHgdUS-kv>kfVG)m5204Qo2k6Rs?GWL7@8X!3RlhDOVrqzEe_HfUkp-oub)R
zbjP@h*IskSwm?GVU#iva6i^k<C37fru;qnq2-NFM+vwcM_Lr;L#BjG$NgRcb7SX_V
zZ|JXU8u_Ysx`m%)zi}1u1zSDrr;az-rHNA<9B>dwqEor&Ttr&qr5ZScHusO~`X0yz
zjIyhyrZ(N?IB_RI9gjFp5Txw8Q<F0h$NXd!+h5_G{jE+Zwd0aGr{Nd9(g-ypf7#L3
z^3DT{+RJ^dY@9Ev=+c**ipw4^+KhJkM|1i1=c}isL}`^Q$Ew*oSEd+#V4OR#;}vko
z2J{@{53lCBtalWr^^54so~j0QDFgecT|J1FydQOpC->}=LVK&4w2rD;f%z)tN6Y#X
zb>7pB3kGzF9cA@NUV}rEV=>8gZ>o^3IUumRQ9KD|nN8_1pS5bGIYg($B6-oLv^azv
zaOxJ7<J_G=b`3WGGf-!<iF7V+U?A<N@}1FDuYZijk#K-Dk&Dy!J(?XQ2;GU$8Q^6`
z@Ug9)5rXyp__JP4yh^c>Cs&?SKECu&J(rz%g>Kzu1q5n%!!BfPI#X6u^SCX?ZqK!w
zvx49C3E!jIxsIGPnQFtRV$`~$(xIXJP2ZJeT`VIR_==OSm9t`lZX$`{j{mV|3wh66
zSc-NA%<FnL$|6=?T?*I48e1F}?!Ut~Tg`Kziyq>5xiFWXx$acjy$}r`cV~Iht@uYk
zjmYCw)|T|F?CEt0<VA#M)J#s9joaF4Aj?=(#92BE(7Nyv@UJC3fLSHybTAOE^R|U^
zDFx@>nyzLS(tT3{+L7ENaktd99e7+@HECEoN2cV9a`SdjFNwc22>*ZVy?0boX%{y-
zqK;jRsGu|j=}i>r0UQEImo6<JMWjow0nt$c2uSas^p5ln7K+qBkP=!X)KCQifk2Y)
z;LJO(g74?7d)K;u-G5w><UHp&yFI)7_CA20HoBv$EL~Wz?NqV((!xyImt?M?&#$IC
zD$dXDi@30p_q`R|OAqMt_5|$BQ7M^T31a>}5c`%p@Kk$uBns+1`0@w*Ts`ZJvTJ!o
zHP`aE(H1sSQ5m&nmd_)AlQLvl&lb0|`qifQnbGaSjxSx*=GLU0q8Comz756(`nh|y
zb*mK#n7qNyf@&u352@N&<rWY$H0%`5W8)X36G{}&ABsO}#w^8YnP|lk=4o3TBWyL@
zgxU4o$&;A-3I6nb4*qP1l_zK-lG4UDNf$#yPu!i<5tEMy*G}=dUxyzocZqEnlN;}F
zNOFpZZ<*HmUds1s8<#;qQJaqc?fmC0>K&o(Pls;hWp%zxOFQ`Js$Y^N4M)68g-m`>
z<hg5pZ^VkO2n`FwJ?MQv7UdnxMK+?T;^U8zw}{1~UxH`P6d`X$!-D|}^)QHmFzlVX
zvH8Iql`2P4)JW;lfn{-2$^;O4zCyw={L$hwQzp6QuvO4X=OsoT?~18UZssH6OE?=;
zP<LAec(axVkJU6qS>2WvGVIx$ip)o*@Cbk5==zw)`>4D8lafVq6n3%C<aqp#&nbmC
zVKZJvI$S*8Mu?-S>eqPqNAJEeE5Q4D4&$c^_6_#3%HJi5{F13?b|>-iEp=ZH@pW;I
zETba%SUH2-Ixm4A?E;uf#0*!1gX*AiUdelFUvE{Iv$gw&w$y3m?UE+Ixx1FfJC>7U
zxjt=V+iq)Vvd-Ah?+h=yqJSQk69inr{-os?o+|Wue7vBcqn<_~SAdmKpXL05y?wiK
z<b7OrWpkh?KL72l0lv!i&ijSM{8ev?T@b(&&@8REZi#}BtbIj;&O>`3f()pnO9vl-
z0nxXcJN~`4?FEQ_;}>t-$#6r9F@K-*l~O%nv)j*B@<!}rUUt*evkcW88*N>rILCEg
z2gT&woctP_P;bt>F_@vCbBD~NBQ1jbwaPPBGiLFX?RtKZ1t9YR<<nHB&VqUM4N6iw
zdX{v|q$^3HXcyTEq20yw{0c&5rYR4s-PfW&iLoaV!G=svwoP8I&sUGVppLE*9ob4i
zOx3#Ddd&?e#&`EO4o>KrqoB&$CMNG;DG59hm_JQCtDwNKW+}n4@N6M|nBWRC*5FNv
z?UvZyKakb0n)3$Lw<$)2<Mf3GKrB7UfF)WI!G53Z1r(q2-biGD<btxYobFpptPM($
zubr?do}W`t12}7f2=Fa^_moCktC6oAp#syt7m3`Wts{+Q?tT@4k{0rP(WddkCC@tm
zvqdVhNyth{*1E)fc@SDOSmHw7nRlqZoM!{2Vz9RpH9();@ozl}$Uj6sql#0WMgQ^F
z(h^&vs+$WVJ~Dj`AM>)B6mIILogNeSDY<t=+I)2l2u71ZTxt32^y=|-B~t-eW+UR0
ziu|a|#ySLsUSXX>7hP77V><y$iQw^Blq(?}PPwvLmhV|4-MRsY$>iDB`yh)m+3%u@
z_C{cdcv@F+|Deu*h)9i4Oso2q)z^mnC_ArVe($*&lWZGi<WNtE3_^BU^J}mY^H!pG
z9$;SuakHmN2`|J-=R=NJz-qz5Gmp6G28FZ)4L7H4`idk5B2@#*(cV54i__E={TX7R
z)<t$eL;BJwpTzzFG-!59OWVlTYoM}&SURtQx77<sFP>~kX=J0V0R~{Zv=|8)Z9cd8
z`o5@}fERHbE$+L`Co*#wg^{Cdn&*iDWX%YmxE<{z%y^)3^3Ehdkn`C@1u3yoDA&-p
zU@S~KzS?X^z;}%VW(?!;+PH|C>1Yf;L!4fV)hyvuT#-Q+QnnHBUb}y82Acx*)z~am
z1(qFl#j9X!)xUUYw*}+hc&$Sna=B)s5;d=iF3oLs-fUwW`D^^%KY5qfjy((uNk)<U
zCft0I94JRiZMzXE_{#YJYNOKua7kyjXcyPMH&i!9jtr1j@=E5{eQ2n7&e~kji?6x|
zp370*_BNTTP%L#p`+7*+wySNJX|Y+-Nr|>~T6BwN%}EdGXAyhYPfWRCmr~N+5~R{4
zn7C6|l6kocIO5UGoc!d3jV0%m%t`G_++D~cp%)6;_w>`caW_4-y0k{Cvb&=BNI1Y8
zXg+BzFEi>9^3MdIgXj@*owB9rjX-gBjW@cu0ZW`WTCL(P2+`o)DCd8J4dG%-EQ=uM
zxSbv>JKr^TBnG>vAzmc<qtg<bw?`4?-?fwCwK(k)hi~hF&8?uG`%m2?GnrB@cF^d(
z$(H}kRQRDd@73<a+jLn_b_g1I4Mh4^VUc`h)(u-*+#_`r(+$!>Nht*)OB1Ls%G70x
zIs^C7Ri5st&4gP<R1sHx@2Kf~WY;;_zM`~H1%#{<Z{=Y`qs(%$(z0ePu_RU;7kHn=
zyesmoKI3qt?N;-#$p&HZ5%&oW<sDw}sprbovL;J|$#H<EH=Zq6uT7vf2OJ8<n3SI?
z)LQ^o#G);&D;cF&=Gm@UhC2|2bQ<n)eA#QE{RfU)A4Qr4mK%B7RLtb5B7V2ul>wq`
zy%3kL2<nG<l3tLC<q^J4<Vl*PNWh<XXszN4so11NTu#nbFKSIY&(q#RuVcP|APw5|
z`_e_)Htcrrct#8L4~(mi_sCW>Omy2UT~<YC`;z!kGkJ)Y%@TsjH9GT^1l&iH=}5MR
z8~uZ-!;Sj%f_uu<pL6UXtuN6?w_fn`TA8JZeEh4D`Ok+-&R8_EbXX;rWJEFwF7;LJ
zNGCW3Lzehr9$Gi9O#vQlN=Q;sgU9YIf$fXBNS{7bmP12}HSFYy8*Flnf6&s@RHx`i
zLt?DGhiKy5Aml7jBIh}l^sxtEW3eL?QPnVy!XG*-^yR*E;~&bIZ!9iGTd0y{2RyqW
z9b>cVrsi$kKwd%4yx-i*Jj&L!tpo@)fiTBrjxWXnaK8}`O<Z1ibr)M`>()3$Z}4=_
zTibP70Zx1Wa*Zf$&GcaD>>1!>8EC|(`RkCfxvO=6@D4^Z3Yy%V{CeNo4NAW;@{q`L
zPq#!vdu;VfRmkBIz=!YLGxvbOGYRi4n1DG5I<Lz(oGD;qQ=Vqx3Ud2IaP@kWpY3R^
zJ@#-gzha&+@kvm%F15pXGp;=rmy{{RqF?<{zcekYx^nFJx#NBxk7*|v<Y<mZ-!KWE
z8Sxc-JLdHcK&0T0hPV&Tn@(~z-t^2cB4Wj2z!u9!<y?Dlj-SeLag1%{;xG^j-(F2A
zidJ&n_0;xlMvPaFWw9+}{g~s}lRD$&Q#b@s(fRog{J1d!$Y4-cBiTyM9~JD>Ids$p
zns5e6e0EuEbU^eTjJSmA0lc`@8Q#-O1&~|rOXM>tIb)W50H;sIU@rWymXvYc-iWRc
zn)@zR+BtQ_8&dhn$YHqyvpS4@PH4^!PS@z|KYaeH{eK%0<C}h62?M@YFOdz=xkhCv
ziD&4*L%E@X2QKv6W;TA>zUxu6A1eEgdH(0aBgH)&WX_;!QdMcc25Uv{zVq?;x!4p6
z*6dCMeoX6tsrp`h_qY&EQSc4ouPC}NR0h8D;OLR$D;k#haS=Ze?t3U#L?P4=))j0B
zPB{SJ9Q(&9&sX=D+`0uA%{u>s*c7SrH|{jUzPFeF&1S--Tpm>HmJ2y=1P&0t+(u;v
zsV>cnB<%;H&V}qjA$@z1zS(A|@RrL!eEjO`fq9r$Ur}eza2;$j*0`9>i+0Pco!gH9
z#KHG0VcsBlGI>G|Jk4-Q|3~@QGw&XNC0x65aMAt0^mCwimN3vLsjaM=5)MqrluJ9n
zr>n21<`A|cKZN}N5j+iFS8(Y24m6rYWr(>fDeR>X2fL^D!vm!7d)QTf%6vj-emHp2
zV&jzR3h8)a*a0DYO??E2yq~+Z(R?3y1GB;|91%<=`*Gj~qNHMuaym3@In|#0;RM7*
z1N1O||Dn0D{r3=t#ey)}Z|_(9A-s~R+*>`mY<=&qKwo&*e`W#vXCJTGzZ_ahg#%~n
z`ki-(hV8GswX)yI|IF(>n)5E4a{7ui0Fdze==kpR2~CSR;j_P-A-Yep|61sVDv)%D
z%d)TQIFLK};UWIB*YD2ny#+cW0cZOb3s!jFrvAsze_YbfgA-6`aoB(69REjwJ5~Ks
z29o2zw(Ii~?&vQTvuscEl8F@Mi-{=7yUhEA;oxJf0m?mf+va<%G=6c!)!n+n@x|Hc
zeE{cu5A@$1#%}D{2twtj-@X%yIbA=`&BA2*T0dZQW9t)|B~fB^VA>(QEH~(W#vGu5
zu_v&i5~2(GJ+WdnjkgY2M`*ITAoofVrWE`lEEUSjw1<liVzl?!0#CWujczZj(!XDB
z4(!*f_1o{u;wY=%>ha56T_7W)#uXx_Jw#c@O|b%<p$ir>wS(s5&2#w8e{G7i@vjW2
z+x$EPykzu%WbOM4%_YDd1%JN>Ki=anRWz|5;u4=qS^F`MZwv^Cn7co1*V;H{I5Tdu
zlo7RWiT`l;zf=a&USK~fuQA*YuHmn?^51pt@d818brAQzzv8<GkV*g;Qg$NaKq~Hk
z3dMhK3<Ofufb386mV8hC@4x@QKfCQ-qMkL<?*G}<)4;|fYd;-G!yQzHpZukaqX+#^
z67Wm@{ciVm*<(!s<NqRb@ps~OHd^zOOPc57#Hq{AsbKu`gVc_>z)$An_cHNowznSr
zQWw6Xu%F=3%}}4iAo96l!9U`RD6k$_JT<R}$7eKj?<Qnnc?a&smx(7h$9mk(QjA@U
zjcN4C9lPy%&`0=RLeNtom9dn(*Dv#0*VrL_{IY{(QxNm+1H9C|7e{w}9Dqz<4~L%z
zv9^_OSY=E*MDe@b{jW8@iIM*G$DeC;h|1;a9(QbSh&fEZD?!`#zF$;&^3I#e-OS+Y
zEc4k|d!UnJ&x9D~o#@;_gSGFWtbo_TeQ%xqZ?}`;@>9^xc^K7R9Cb7b%>Y}Q=VOFn
z=B4#E1_4;7U*bC|laZ-NI=J#4$>*xx)se?cpDMJj@U#6}?)q7tXgVELX0)4jJ^_)V
zX`0$DpV#O-RjY$`(`zlI`i=nfR6uUYwCwEPBlCYPKessVwdF=BOji@5+E+|c+o?l<
zN=V9OaKp3jP=q&Z0jbGUw)oZ1jDMbIE?uf#q68RgC?d<6E8aM4RF9jB&><swSD-E{
znG)Yul$^DIjBcT$*uhEe|3f=FW2NG=!?mFIay0pZE^0~mHoePzX0bSQ_B-g`yT~Ji
ztk3%Y9!K57;wP|^UZ4@Ir0Fs7;b_z>Jq#Joce>wyoESkBZG;+V5WRR5$=KinZ3^#w
zQNBvZfaB^W6q%vt9i*D#;T_D~FSnGM2ypFbQ18}fxSY)5C5IxoGN@A4fPVEEeC6f9
zq`BVoo#Me$%t%NEjCm7YbqkZ*TO|x_JQvn2*6*ma_$tJ=5xygP{aG-`a(582djnGo
z5|UHWM#6}Ra9cSs)>?r=&2T42g_S#^tx!+`b7AORJkAot#oXSW#=PwYvV6G%IW1!V
zTHS#=F?GWqX$4N^%FL1(#;(;HwQUwLBV%0ys=$~~kflLNa20!MQh4F&Ht3X7QX}Jt
zZ+Ac2uoB*)hH`y6JbTwsAWSL3wkF#D<;)r%Ux)$;N_70(S1+!HZGnmPRtIfkc5dk}
zCt!rvH{y0jr$R@D#yJF`m&3*va2_Zz6RoB*APbp91_wh(ZAJwCVXbLwzERdnn^~OC
z@ASe~xG?1mVG=8qG6v^X(}2Aq5t%4;&!zQVEJD`PNEBA&yikw6trA%m5?H0$6avDQ
zrbrZF#S{qO6d&>~6Q&h}bz5_8kM{PD#iGG|mm6UR4CNTLFvc)1G&no7MW;rR!s1)F
zW_O$psL)QeAa;g)^o6>ybVrWczz(4|Do9?&N4~T2optZ{*$N~!V<QGXXMvu{HF|Yn
z<<5*IF}&5vWc^w-m&_n**L0>hiGf0yZ1g6Kt0B`rnyo}aeAs8)1if24Ak&D4a0kUI
zc<4E+P+eGSjeoQgKZg&8H^5!npQZJwGO>z6B^s|0U;<3aGmo^+2DR+^RED^!P!46a
z32axCED0pzp4g?Ox3NnGu`eVbGUNqzgj_j2ctY}4zk0-fE*53-%`aKHm8R<X&aGta
z<=nkda(0{R6<Z_Ck`;f)ordD{diO$an2a5I_o2}8$nN$qinl?sbh?3q){9L-XnWm;
zIx18y-M?>r#lk^MS=P%Ef`Uc(3|vrTTj^@4CJ71#PKxLD4q?js@Kel_qe~<@<;g}?
z%nFlUl!Y8n-Z+N^^F%|py`2fAVFm!hE-Z;@siKdLGZyowZpFoAOVGe18N*o1%k&>&
z#F~_d%a(4Pr8PzbWcIHu(%+2m(`CC?WFDnZ+jqPrw4q47D<n^I2kPD_Z!Hnie@s<$
z;m;+qa6{1W5ejn*n;hDdZ7u~F#N4jk=<YPt_b~&@H&Uq)4ausMP4{m~p#vl_Pd&KB
zO)eca3~mja^U`bTV@7`L!(JOKDN(*Id44qz?wstZ;CY<10m2@_;odD2F}EwktzPS0
zB{RA5jl6l~<@^~D?0&zA$c&t4-af&l#={dDYC|}+eoSW65SeCUzy@igQY1uYB%f=O
zlS#*~w3^e)I+@Q}_QDjDZ#gWgper?MUK`AoSSOspE8D?!*(8O6*VB(7$U;ouIZ0jJ
zbMz8a4vlT1Ar&!~E32*LYF!A)-o`H_l#LasD292sQYU@o=q|0|LFST*WZpH^fLN@f
zM%$r|@mdsad#=TTx!9Qi<#@AlcV5@wvnr%)U5sDyi8Eu}4JKuz2ot6S9?-7j<Ii48
zW))pYL{#o%SawTS{bDvDkO84Qij;ocF+1TBV|X*@=0y?U6vH(D@Fo4NTF(qJUM{6<
z=9b*)(Q@AvBlW=|V}$Tz0;|Zv<=%#hp4lWdvJ|A2N^Y$dEyOu1vlyuwZR5-9Xde{Y
zErR@5w8ACwka?qR$X%<+Ik|L{YjejEQcKyqLI*<x#I`@H!VXzdbX779$0eR0d!Z<*
z6Yq>vk@r19Yzhy-@_k0Dg{$V+GB;3%v3|sv>{%ua-qO{EZ8HM{fV5bP4~V@h!`(=;
z-WjbGVldUri?PV-l{rc(ZB&`~os_iz-4gJK1Y;7(=K7`znrqC{y00m`2%<FYX5wJO
z#k3l2QgUx)$_@S@XGX_G)F^Q#=2hN-wBmk&cG9>?G+*Ijg5D6~vsch*Hc>DT1w4t@
zUKAViWvmPWXWpu}5tyMhuN%pMvDs@_W;ATjM>*-b-iZt3n_Gsx9=3M=eNH2m>#yMg
z4(ay?<S_IgEMI~>b+LU@F~|tc9*r}euMKK5AjTCNZh{}WO!ZZV2HvG|a0|E#;|l`5
z{yN_m)Rx`Ij$vWh7GsVHfhj>e?@1Iu*Gk|ikPC_g>+P%y5j^u)_u#eKp)G&-N2ZXF
zyORb~YTQL35IyBowZgz)+*Oj1oVX+=4O9ua(6PW2=AEU-8$h2-Qw)P#Ss{VQSHf8H
zrxf+%XC#@C)|DNDm_IF>UtF+6suqj0oe$o`#p3~n=+hyh2Q2zveZdH2Lfm7i6MFII
zqPtUuk%*{S8E>J~K}DhEHry@KP>RnN)l{ie!_5hFeDJ#|cp=#-$y;jlysfR0eF}P|
z1)b3dIUxz!u_~RKRnlg&yaXr=x^AweiCii3msva?#(Hm=-odQN7jjzmLPrO4u{6Jk
zUJ7>Qoc>y^#nWq>cB1tnKV}8?b8XM)-0(eue8$BXdY6*A8-LS+J4C{A!y407H~5uS
zuWcS%uFmLD5Zu4;aohc~*Hk%F*K2dNLqf0)ah)tY-_(!D)8wojY~7>>%b8EbHap2!
zi)$qJcZ)ti<wVeVR&CEktoWFK^A?l(WkdDpVe|!qvJ2QMOaJyckA(z2!iRRhv<?Q@
zD{jMSDm6P{QF^zDE_3-eJ@bc$ie*Dk!md2N%F>l&<LP%kv@P`c>5jO7Jj$Gne%TPT
z^BHDY>2gw7@oeGJX1}%1>ImghwC}sxskzlZH{K<2m5>_h)lD-V@4hJ}%z5$aF}?)m
z57^b&Ouyr`R-lYml&~`1;0?-sR!cc>=_Rdcxu9ofFF&PVcfXK{HocxNOsrLMm#2~^
zkK6FAlZ#Db*rfK3Tu7<qTQx=)Jzbl9o!8jXEOBHU`(QJA`rRP3`cYAmD?KR>y&xE}
zI8ym&>OPEC7P><nT2}~_)H6~qDr&PEe~f*1EkTb#{Px+?nv`|K=h*k1HoYndFM=<j
z%Y3$6Pz-tgMmkW>k+0USpUjzy4j;z4n29ZAD;d4MBv~F9%C16ww7PD(<<}CkVNPtN
zcS6qhSv`BkRzx>6G0Y=8<vp^o;P4)Ms5d1Ty48odt^9&r<;Yg$Cmy<`K6ep5!H}S;
z+%WiHKgc)mF4x<{i>qz$-Sii@C(fQ<ah0I$5<^Yo;r!;cg}dn|j_X{y(r|fWMb?`c
zlk>d7ImCX}@mR|yN42Nv`tFdsU6V^Nq;4FvS_?<W)^RZ@LvtVk?WQJ^&7c(P>%;Ca
zUz~?Egk<rlJv(ca%xPi?(Umv#acZhDn2a&mDTCvDY^i$Lu@=K`K)!as?}<J*c}ijy
zK3}acAXs2rps)zCNYl%{)7+B4G~RdD8M`qrtQx4va3yT~nfLYs&E;-3$C0E4d#9}U
z)vf{<+IiAx{mDJWltB}Dw2%>FvB=;npTcynn+2By&V`W;WXmlS0G&%ac?D~s7Z0h*
zeWjX$7f7BWbN~d1Hca(_G=(y)O)qAT#;JDpY{qGbcMt4(J5QE|3D-CjDTTrG#`;#~
zj$PVRYIH*qUF5I%N`KZ!qz!9GNME_UeS!mvhZ5?O8sw%F*&3v6+E$Wxhh~~w$%$1b
zsehR)IbO#6*vwgha(bF^*T$l;ddNaWx9RX8nFcX7W<N@Mn?M6qx^S7Jm&0g2ieRfo
zzAquv>#8*GZo5dzE@r+U6yJS&SJT$>?hWt_VUHYqeW6~!vBcKj#cDbp!MMS$<6)ax
zsCopv;C-745IY#@WAo~+qWQXo8eR_UTtV>3!X|qvykxTyTHLsEv`PnY=CGp6c9&@v
zA2s%R8p-=?SB%h|ujl0w!BlBxG?JtE@66eLZ2Rzx0DJB^d546Xau{e71{-XxPZ0HX
zO?#gb6gg2(DY+ks4!EbAc-7aigQYg>FZnar>6O{^u9sMCXqXaDUOMG}Ol&}OO{gZV
z#@%0r?_~_%l%9eR9_3P*)tdhd_tvap=}4>9VwLz{Ls<ht5~@c!=b5(ib-|RAygSq|
z_QKb7JOYhR?kaQXe0F7{kpU&Vol@#qzqwf&ie3f>M+ZPdMX`nc+MGm5dnH_;{@Y|i
zi)Q@9pltn6a{oMIu)J}rs`IEU3GP?BX*7g(yE7>qL0`!rN%sPgVxD5;v0H(CfRCPj
z>sRY$Q1^P}!-R?{Rii8`MY*U>8SyG>TYbayd$A$mn`#+rf$dL^@&(BIWNDSWk3fj}
z_%DdMw^u)&!}YAKGYd}h&gd5>F&QP(MP*G<6~mk$gt#zPXC+Mu`ZJavAf#CZLR%z*
zD3}%a34IiJEnrAxCo{!s`^1%PUM#*jh{=b(m?>2)nT8f0xYh9ple8)-j<qdIh9beF
zIe1r&y7~*I7>Q7<oAnj@25)vwbU+nXN}}<OQy6*h#4vB@t_3rxA)S~Nw$mE!tw3bO
zoB`o?=~Pf*QLH3;Gbf}k^c*;uA*`@8e^)r1FPMG&l;u+e&l}#U#8k_HNW0O>q?#er
z<8S)a@*B60zC4;rw&B_Bg-o;sn`}CQ)YjEiD$y70byJyBtnBJ)TsAdZ)>Q2%O%$2W
zWUE%Ea3Xt%q*j<LC~1&wA3Q%m+Ec3DV6P1Qm9bk2D!AcK7um8d)MjY;B3C9<FI~~T
zFrPErwsMXoE5SGCL*sIn<Zn8^4C-gX-Yq*6ux7oio8X@54PH?fyMCi4Rlz?*!v3s&
zQ8O-0fG6vGo^m?Pwn#mDc#uDHM>J$7{E@yW9dq7Q8>+ms84VH9Rz@r{hFX|IpW9V9
zFgExrA^9avEKI|$mw^w~Wh+-^M{R{m2y60`4D<w{mdhnJ&5t!ph$d(vMtCPHTa)wH
zX*q3?8{IC(iL<XKK5-tCBjcRh^I~XPpVCZnEkqsFX$j;cN7L^_VLl@CVJ}O~ek-SG
zbpUO*aZsJHeVsm`6=r=~B%@eSFJsa_xP5M+ca_ld2QL79jmFEEq_%McH@LO7_zH5S
z6=5l1WM$qM*5XSYBTW3gQ<2_ijZ{Z13D8I_r0(@AuDneDIkeuus}2}*<=BuC<wTZ$
zvwjp~q(IAT*-|*e+hm*Yw#qa_(J474%0|iDR|y6Ee&K#yIGDa;AdgB{^iF19O2U>y
z5;*5|)H2bPHQAe+P`<vN^?0iQO3!}zy%IWNwN|ph@FgE5oe@t#M!=tzYa?+W%RdN`
za*;&uK%6#D>BP6FmV$v@Eg1{ID=*N?(`yOO1AejkH6gW)#qbxlijGOUXVn~Qwqr_w
z>X{e9@W9K=4kVv$v>vf4UyM}9NM?AVtQd1z(o8!v>%B&~Ube-CySKPY-b3p?YcZAq
zjV<+hb+N{qHTHBKQ#5I<cAwMG;Mqb|!d2_#&l|T)M}?Y~KkmqVtQyRINmm)h2u8eE
z?8fz=(lgXMeAJ2t{8J(<Ul#Mv+;4s`Wso))X<uY>|E&-5S*UZBKnuCi@>X&rozUjp
zGlP@F%hfMNx93qax0!2I*ANP>hV@XgrD<m{UbyUJE8VKK!AIkQ8qaWMN}<+R`yf+N
zCVxxIDlPtA+PE+pykVt3S=7&!m#0gdzaMAX^w4<}PnYFRpSGc3TXL*RuGI*Aq4f%M
z`?YY3_Tafp+L6_Oi+>^yzh#EWp&PYZm0RiT7!6ODmpFaS)$P0*Cu@n{l74fuUeYwN
z&<lC9j6HAaQf6G4Z<;hsxy!DV3$DR(ama6w<`W?wDpoF7HSr9klX43sy+u!RN0n}^
zhGx3i_N4%<d&OuD-<s{4^~zdHnNm#hP0vJw8oB?GoqU}{Lr*d(mF#=TkxEOF@eFoL
zNA6qRWt~|aWg+l_D~){ra#6GVt?kWS*Z3w!Z^-I{;hiZl<+n+!Zs`k;wCva|l?KDK
z-7U3{KG<7AYfkRzv-NsQudEdXKQ>M}RtE^GzI;(ceI@0}hSRBPl=e~!Y$V4z8HgK&
zB!qk~KB>KQuJQ6l$I0pvD?-EC3TTDXIx(Z#ZY;u=vzinGv6F5}U}7cUpX2D4?tn_`
zZI=FWpKEk#HlU1jY{NpBC%sj-p(BKD24Z0Vd-U_?5Vl6?3o5sL^FER>U|&9T)W67n
zdKwu|pXiW4wdDyf{Atd(GPR13@X;l74A66>O|A^xZrV}}c%L<QS7c|YL*n8wmEN$D
zwWX1yOMiW$??65qZuT#?STwL;Y#gh^7;}iKYbU>53dOt&5_9}2JXhVy*Z8Ffx{>#s
zcJuD3j3z4gN<KQDG3zW+7-5XLC?PuQxq`_F|J(f&gKuo@?JaKmFJ__S3Po+!X<e@j
z3*YQDwNzr4Z?*i~cO+uMGoLve8e;<$+O6?~kqMe9`McsfUyVYBcL_xGMm_W=ek*C_
zRs$OA%G0xb*W)HOz2>8i(&}W5nq@$P;;j4P-`_%orPVX!*=ky46?Wt@ij%jN>?}<~
zw@0UQ1^Ijsb1@90&Y1PJD8UL^Dz!C3%62WJS!hPJ=kTrr5Y@iTY`fB$o}|#8rs!zI
z-d3)V1PaEkS6?0E2=*vhTW=GEc%fkNK=nx*ITeYM+7^s(;kG0j$lIlAdNKx1Pc%1i
z=etcdEPXadyx7U4O-Yc8{w-5We3&OCG-So9a-Dl_umCuGq^j95*PdJZxO_G`7p85b
z;P9}?jA|~mBcDruXT3KVAzQf|WmjK~#aj@%dzbMc0)^(FwwgsZuT_0BKm^i@*{>Nt
z4IDXOa=xp{e4m%qSAFd>k;M?t&_b(|xu!W@MIRL9=Qr$udPm=kd)Ys^#Z~HOZL#P+
zCekyk2NrYbGbTQhWE|opnlGcYp$vzIZ3CYv<b?=~&+?2lE`HXDW>Iw94!9mXL*3jm
z9^5WqwBE{7nEL9h2T>qYe{>0M!Zbe^J*AiNB9*^D;jvU<U%k@^m2JiPSG;jsx4AEW
zL%MziM|fY{Yvl53pNNjk=*41K?#t*Bd|t1hK6D+gW2$ruhR&RH#gvNBE+^P|+qEyJ
z383=F!U>rHdLOA+N5(9<HWvoKalEV2Aakl!q%^rTM7GYXG3b%feG#^b4j+aLpVFNF
zXjC20<m6TBb^5BIec&@@<jMdpetxMtfPF{zrUv?&35~+}NBFMSKLQ=U%QXHIhDM%B
z_fJHARB4C*M(IYem(cIj_AB4;+-!SazCoF^Gj3klHE^e;io}n3uR({_N%b{-+8(`Z
z<T{_%9?nvYd;Lh)LrK^1WcBXhs@llK7}h^KA*j5ms{pZB6*J`Nw@+|dK2_1%U@0)U
zsaEZk`a*s!sUfB)VQRN|1_uQunku=JXsx=BWdkCnJlGq6n6as{uUW0;@XGcE#-JmQ
z6bM`^UKGaG$Ju7+<IQT8`DD{mdW%$|E=~75Qd~uV++@AkFgSQA6Dx3|4ktFN(Wkq3
zQ#7KinEbb|)8&<MX6^D)_>4&U&TJJNSN~`h&cg$oeZ?833Ac6<Uf;AB|Mi>N49~es
zB~BKD@Ih58bJm`}@qu4QFt`(3AcVXUbjaQ6Hih`k)e`u0bqICqCaT&quM79C!|5cw
z&*hDj8AmK1I<s-WAlUsLFM3F6urysQU=S^re#~cWdRtB2x1da|fq`~VmK*Nw36!7C
z1`ORYdC;^0@_ku#9TNf4kx|n2Y&I!b==nDH=N9|DqK8qqQznWE^-v>?932T=+FaP9
z>36QasBqJ+Nm$>XL9XqGmYm~dQ!ZA++8OTE^A9$#BrGVpCU;N8VBC?mi4BSwD{@p(
z{ZzM=!i!Xq<ByV;<*+p^hlaiDGkWa(v$LEk#kxN%8|Jv~ss8iMzf|8V>bG&taO)@q
za!84F-r3UZf)V_bHD<W)B*#j?T{1~nS3YGy!1Ah0JbU;innsPAFQ7>yu<dd-wQq}x
z^QQvhUb--C1m6xBxRs<zMIf;-DSu6DLjwNhEs^eIiRcrYVB+Rw@$U*tKNDJK`K!dJ
zt_WtNS-3!N+I`oTkiZ>?I3E)+V}Me2SHGzS$T&);PLMBNk-W4~da61g7yBTRlKKjo
zdSmxURAZ3ml~%~KecCmdG;a>UH9*SJBlrZa$Qkj44$6RxLfpSR;w6_badI%wuGYYd
zWa?Nj!|(tFzWYi7uHW7|8T*{V(nJBA=gmZ$@4L0=XU>{wgYps>npy5kI>L{dSVuPs
zMml$^R`6KbiA71Onr3Abt8IR9NvmIUsSP(H_zdNqF1H#Dv^8ffcoAU)X4%lE;*W^X
zAm}A|$J+t}Dzik&<lXkNwJ7jSz!fY(7>l+hMVuV#d$0~pphv~Gd6kaf&3GUZpw(?S
zwTw6$<}17eFNmCM??`ne!!2Y+Zxos@048EKt<yeJl6I^C$X^vUfK?ok7gQoy^N8X=
zjVDW{j1$m;-ub%q+yY;yVL{L@bBr3)cd58lUf!2@Zm5CANi~-;Ctu?4eI{y7nGeBV
z=F#^IDIxX&Oc62HrDay$VWbUXc<gD?w8GS`fbwW3*vc2u&JKmIqQ1S0HISnGzQ=>W
z(A|Mj!n`{jY0`RdKys}E&s$sX{z*j7{`R$sB*k$gk#@GKq&UnA=3?W^r<2nC(9^v}
zjWjg$seU<XWIVhpPr(%ms?HHVl5)1#o*-A4?w`K$so%@eDTQVcf=0I&GH)Pa64s+G
zjVwtZ^aQuFnjef<%>#bTfRuu0*Jq2;sh(TW^DVQPixLVcrLH>2yVPQ1DOCEC<D+``
zDZ>d|K<^L>lrbd;Y~q!=_V&?T-_Kt@1&DU`c>{?-{RJrYvpxYS{Fq?RVQ%hpoiBK6
zX>P$hZNxWlP!l<o(ub4{+7oXc6|u(nVd5K+Mooi*IH2!OHbCHMK<*xOyE~VR#o0HB
z=1mVkgSPJ-9*z`Jfp3J9L^mP6?{9;9a1rn{Cgfq3zw(Q*0Qv-`6(3vjAbT@x{r716
zyx9}8wL#FS%qJ17_EU_&)NNwc<N3@&-$(6cx`eZ@7>(p(I5OH9wvy(b&03ys4gn29
zrgQ{3yn5VZaMi*loEgG*Sd=3bRy6pc>rH!>RPM=hEoS%+uV>hp+-ayUu76w1-)8Ob
zJX*e{vVSyqNAQf9wU6M))7Xbue;sZ(5yd>#G7aQ95^K@x+&)kxRO_Nzu~v2a3fV$H
zz3~`CoP&wD>gNw6PScpfNJRW#^@~!ITB^oSL}!wzpuOv4DL?IURGcRzk}-1Yaoc-s
znPnG{QRsa`<oMkT^g?quFwYr~-!Mr;-MHW#b7&)x_pkSd|8uw<_@H-uRA^X2@Xii`
ziCh3b1(44lAL<Ti;$n)Ho$6Rt6Jf@{;lbN4cx}n--4QKO?WN2}X_@VC22oM5m@3iA
zvE><G*;YQ8T_%$R_($gPn}jQTrsn=b1WkZR8^tzw!I}rX^?43^FIVY2>}Tb|&MKi&
z@~485QUDuSQn2db9J3@0y{Tb{H4)?-uY3M%7)h+0!kJUHOSoS-Hp;h&y0AJ6$25gl
zGYip#`H~#S*{T_dv`P5OW7ojqNe!nmR^-hosTj|1+e6CqU4ufzE{dYnv+?R1PdH9*
zO7vC=>u%?U6Z>{_xD^8fPRt)ud4&#l1}bp^4F%h%(eo*7n@cUTGd1CN(sv5R!ddl{
z*E-C$?v+lDFAjiFU-ju|kI8YvtKNoj*O3J0IV8@l4#9Cw4!V#A@vEG`QO?*V)ctYE
z&RehssFkkSDv=FYh(gc2<t2qe1FFP<tcRGg9#=GGCEYqrI;%nY0s?Hm(5!oB{nCf$
zzjEvS5~Vz)W;;O5ZLHWsZGLcOW(k_&o0xDoAEZtKGXXNER7r2Vu~P)nPcyi}M#Mcz
z)V}#d0F~LB3|)`VQm6@4v<(^VU?xeJm^ZzEk1#jThcPg1ufR>@sXU+tO0GHM@<F_Y
zPy6;{;vJY`qjq6~5Rfo%y5PRYZVA=Qr>RIU-Xft&yTAuU9H)s6YW7OMa|lFfhlVWn
zgM=XYp?9h1kY+=&@|4lgk?fZZ5-d<W=M-x$1KpYJ)kT_(T3G;~tF)`y8_oKqLchIX
zJ!xdQyY#IQAz8=!om=XIhMe>XF_!8Q`4O*39G7Coj;W#os@!?9VJw^<u?eePt=@ic
zH67v8knUBwwPFp#Z<}0efX&ihAMd2h13aI2o3&PWIjAvU3kpG5Tn9{jXqxMXs?NI^
zV<{h5YP;Js_v#ctptbrty_RD*4sG5}qE(b1Tnu1oBmGerfxdP!>xz!;tkZ2fe`p~9
zFzQ%IF8&2mq_U2X8UfR`kkq`Ef47b+RojN9AnU|A${^vQbfQaGLi@*H7XMh6oY@w*
z#I5()f-)p?kP){fBYT_tHx!2-tDk4a_8nAgq@#{@USnQ`$Kz&HE1y?Z0Ee=TzSwHV
zhu@Hgr^wJx<+(GAo=_TkN_bVl+mA<|d(Ovb+goSVWv6SHK97`?8!3OjX_%JW&|0ps
z8KQ!Hvu(0%#osHvA#IXx=@Qr0ey4Zh=yMgzr@fRXt2J$vj<*YeMUCQ&y>>n|K7LwJ
zf}07SN)0zqaG7jK`tli^vJ5Ce$UQyXSHmpxmJ}$nX=v}8F~sA_<*v4t#Nk>8)y#kg
zJ8}ZaY^stg44_`1_80s5e*119Y(}G=HYIop1?10YS4Ln8D&g8omRaeD?K}n1%#~x!
z$efjP!RtUFPt`O<WMg>C?`9Tnd-DN1v=27m$8*8mrj3N1sIiZl3d+F^TA=SM$%2V4
z%Q@orbR6NjQ3_l|0t_=Yx8M3bqT_TP;+r?8dUA41^BepSkY>OeX%f>Z7_Y<RQ$?T2
zryF|z>@EIlXJ&y=t8rk2M+&H#x|07>*-GzoAM723#>r|oS+Mept{5HJ>G#`^woPef
z9fJuo<td-Y1^tSXcjI@%H*K{l0$Mbf1F5ds1&pfdEe##ET?ygOZ2OHWO5g-`5(ImG
z7X6Y?^9Qe15l35wSNh__l<Xi*=WB$$=MzAv8)B#xJ+C(A0}{?7wqnrc0+a_NS?*&?
zJs!z7gef588zoIfWKF^c?Zy%!PMcO$)8frgQ`H)nMMc)j#(HNk+bl5Bz3_yUoIPvf
zP~<!btIM6bPRZAHQ3H#&)2lCE#7m><M}ec&(ne!@EP{ZjHhJ2f=%r|Q?ib#p%^NAt
zt);gg_~+Gpy{--B5`YMca_V9Se=~$~uuqzPDH8Vz8KWpFQGT86uK=p$#OH-5z~4h(
zUJVi8XVF#?S^a=tamQDwaF!~6Nu}xWjXFJFnzmhXteIL)pHR5r7f$Skk19XkjRLBR
zOlvIkld~W`1e?={YT#0l9QUVb<jgFy;EZDqf;>HM>9hkE5W%OYXQYn`0g*z9WJdau
zZ5Wrz5C>K;PXiy)Ye=*DHe79)w%TmH=@0$*tq4}i$S8d#57#%>>iHyJZsq+Uq1D$k
zBM>m>k#qBTNn!d*qs(#-T;R<d6cj9f8*nHmot-Lx3QdI1iwin|LmZB?GU-vgK`cbS
zCo6Jw`9nRa;{{%&&F5O;YXI4o{P5Ol@g*~@WvWqpRX8bIN>}$%Td2&g%bV4iY;xHv
z4f^f+J}jE!TU8VaHE;|s<81F!Tnqkur#Ni6HcRmQ^TFxop1H1TVh~Z1rVl=y<W!hW
z&d&Um9vq^;A5TB#h&*h)U2>BX`zACel2T?C7xQ|3ntxk?(w;2XLYm}wNyg3X0JB6|
zWr_9A6FI+en}+RP*9+~!F#~-E(mR#rV~<)lG~!$+>_b8Hw8f?m%!=Tio^S!MlBZ=;
zDqNs=N(AHul$V3U*bCi}bL-1P!n`K4vP{svVv<plq)Z}Xu^<zeo$v|XDs!R3o@uqk
z<mP-)=v`Y&BqWXdWv~Fp%xw^M2%9{5ZuKo(pjsrU_I|_p0K|m|upXMLC271e1+vS@
zq&$og!+e6{-gy=$o(rH21WM<TL%fC25g{=l^*1qYAR>tJCA$hksHXmOO*j<0v&E9O
z{l;B+BPDpC)&U)oY^BoROmK=|^~~_^?*oS~2g{-a1-pSdbq5a08+?~aCnRu)#UwX_
zb`R+^j|0cC&cbdm5yiV=vLMqZ0AHy}oQcS-lCmveIb98EtGq99Apmzv&GOP5&Vo6q
zxDT(euGQtu3ahXW{e(7(%J${LB!G;9sw9|TZudc}m7vg|Y*{7E&7{|Ffb%1h@OMp{
ze7MG~=)<S)u+RVC@4o-=uJeR=&D6a{zBc7(&Jl~sAxrrtLYr&y^iqtngZE%G4qf_R
z1xs|<J!?+69lVfslj|t*a3<hKt3wFO5q#L4o>DpH6h6BwQ*jB5YV(U;!q_Sy5H4`y
zIygU`k1|jpP<y)jk+(Jkg)hNLxyDxkY02heMk}<{7b2ocQ>8s)eR!juJMd#4d;!|u
z*iu(b3q&FMo<m#laH0Z%{$lk7bCxmc7zmx#!2M5#=Qg@Pr%rCmmh%ZN1Ne^H>B!(6
z*C{2Z^^~!#(ewb*R++?0Vau(uIZZ9ha%o`+%hp26U9PPI!Cq@qG@Wli2%giNFuTC&
zKVpjKg-p1Jb(-x*YgSQnE%oX?F1KQosU(dqtrmn!OE5cf=tb40t%ksNSE694q%ZIw
z<~5d$rO$5g9MW=IsI`g>b)#@sK#(KoXCd(Ss$_iX$g)w%N;yk)n>zMTSU*dWD|g$5
z-h2(mSz%&Sp9!iw_TJwuzcZ2VDkrT%0%Pa#FaCU`t>d8%@yxE&lApaB!9p6%)sWXG
za4m&~R@I~zgegRDCtnU@a7|Lkk(s4pMy~c9ep?Uy9YaCtnUP7i_LXk3W=6W^-J}U_
zd&G&C^K49}aWIF7O&88N8$j}k(@$`21H=>bZK$po3r2KNA6{w$E%`Le_u}b6v$ACx
zH4gR_w?br7RL~J^vl$??ed6PjMV4q)IUtg+=Ab&YSEs^ynPc)2s-vCHu*sAWstsK$
zoS3#>!*yZ=8nUm)sACP1w$Ab=p}|zAi-r1FvvD&!4?q8cTs<OwwhCv#QO9Q2YO@+o
z8gh$Wq5|J<bPY6xG7`z^Ll*wov*dY)H@bf#)MI5REHO*Y?R=(L&zPaq4l=aIwhVC|
zc98fRiL7B|8a`K&&IaD4e2o@vJ1Zo&)4|NZ;4pC=rs3r2?m~v+-YzPG3KaChrY8Yq
zceiNLWGu9`VCrlUUao;-BQah>-P=eHAI4QUyiC389#Ev8Box5~P?Krq^sJwV{A>P@
z^b_Dp<sP<-h^0mVIv3ivqg@w$SNT4YQ-Uw(6WTYyv0i7Yf;IuL^0c$V$U)cfRy(v1
zAgzfPA7d>#+CbO`Ujq~fapA1B^<t^?c`XhA;RU;+plr1I05lS}Vyzk=3>d2^c`8-C
zD4f%rJ}5NMT?RtBb+KPpgVjm-@Y2j|8sPQpA(h;nRN)cg*D<XTiQKSFUz*iFfgrRn
z_%7bQCS`pZ?w#>=4Df*Zw#PJR^j5TYyxitw|2wMsf8=&WnO-qG;fg-m3`y9)y;Uqe
zeG%YCk&<br`JiD^i^JmItv}A%Upj?%oqYyX1#)p;9OJ9POj6)Dsr5n-9w7gntP}M*
zp)F_9s@>z%b!qGS4$pBgoC{;H8DlYNh3hl%G6qXFFPKq6<zgII3XrcDqLsr!h+p6>
z#X8Nd@t+czloN-}0gfJJlS->FeWT4y7qMCKw@E5g04m~;q>xD^a1<=k7aKy>l=9*X
z==h`LnR{XR>G<U&5Kx8nVphJ70;lpZDGMv$TT!6xm45qR&p_7>Y%hN^31q1*9iLss
z&J-+Z8xsXobj@dNqZTrDGO1D$Qo9Pmge5RA5Z1}#UXu@iu0_^6sX=OlXs0g+2MZdB
zx!C*gbW0{7d;#=?lZlO-tPO3LM3BF1KKwpSn9q0yqGJQlrVDI`AXa(bkdc2?cl+!&
z?J-A!(Lw-Z<PeC93CUR=^HR2k8aVTl4fK>$)K>sQ(>GbikqID`!ryGlI{Dh4?qmOz
zO_v%x($1R39XEr*bD~x%!VM&}+H_b^$@9;sBj^g3W~@fQa-yt#NvC$wk0r8t!Aq%u
z*X?Bd-CS07?k*LDx^u~{;MwF<s*H%%<tBFvLM(Oyhd|)&$c4tR-pcYH*Kp1iVEsdR
zV{0!DCYlMIM`ICP@$crQ>VYD+Q|^kz9-h~e^hn9YhEU1Z13-7L$Y-rYZ5FwDg8QyV
zUa(9x5mI0qkHC_au`L^}Q*`|ybp88Z>2SbB1g0ZJ7Q8Moxe^mW;&dW#^=+#MwVU}G
ziV|IhT|3rX#kwCi!ZO|K1k|#&UVec17Kr-JLEnA~9o*Ll4q*AmPhKETBI=$$p7@0&
zk}}r(GH^cN;-1~MJTK>P^p$`Ji?X_9q^Bt}Ua#V*Vxh<AOvt3wSe}y0)An%|X)cv}
zSgoLN#fSqPF`beCP<{sOwH){h4!$mb$EO${3Ln+fUcc+_{zsbwN%NB}=VuL<o$yge
zj=%Efo{3$f=GCmV&iAH@Elzi?4dz{X-f{0hY7pqeB5ix13gh?XuD<sGzI~;{{@ADN
zwCof)#Z!-eN(+<%f0V@?pkmL3-Aeky&h$-k?ouHTMp=ktnvu10O0*O*Z}z4;EIu=t
zM1hPfuj|$EhE@#507cYS3tB-R%(H)NZ1GE`cGoj_?@aP~ugT}l?z-gz@hL~;Fi4Yi
zufHOPT9!V#|Kecp;J%_#Mf|%O;q^F>D#yz&OKM~#h*e5{9joY~8WQ{RkL7_sz*>yw
z0TiT54l*_U#H}Tjhn92?r2!trv-T-v)iJvgtRSfLnB7&NfF>Z%Vq_Q4?y(KA!I#KP
zsQ(qB{X-)u)l=RgvXWzD=#BnUzR<;2-C4lo4xlGF{;(474|C^!B0Dp2@B9@D-6}u9
z_)~>ZsY4r=!`v<SFK?h_4D>#sFK;BAs>YGm(2QZ)cjL0@sLn9PW*5se*f@4w;~`X9
zDePqMmw9epB{ug9#n4SPGU@CtWY^oMIR>cjyO$I##i^0GG{w;4WrR=&*zv@R16|tt
zx%|-J%c3wwmqiZdImH|3{zzWm^EKuf2o1ll{fIG;fTnf~p0~I{TRk}2vPp(4CUs<V
zftkl?fl4%r7a03u+jf>OU{tC}4P=SwxB~)Egdrj5prJQ!r9Bz06$r0)!EI03>c|nn
z&kfRsJ1zxV|I@D;t4Lu^l{kyssJ^t<<-$(=b-i&^#`U}(Nq+eDFVY*o`E7H4{ZREa
zh#A?=v0dV;$qTRY!ENxZdXnKl9xQ~Ni%oXT!u(9w4g&5QFvnFUDLL;;yP5zWVR8+<
zJBtb@5Bi!f0{M=N=rW9=X@88fEhV3;m71@5S%#Kzc)vaKW`Mg{(;B>FE&gsHpEcZ|
zNMGr(F7j`3`z4>-(t23}r0n98X{zt+$@M#>Q{q?8Ryk)<ioRd9i@VA<_KZS*jj_P<
ztK*p;1`qAy*f;qq<9WquD9(cs12xNYVy{}dL|{&`aTswkhBmb|n-!9>hDhp=_;VWE
zmRiyN7bB_(y2bTQkoYP9+8klY8!VhIGrLpvC?T68cpa3+ua?r)`*j|Ei*5egcC~RY
zW=-HfeK`-y-@Tw2(<=4+^Y?GBxg9%Sm7~w*m|V?rFG)XU(rRCF<AAAs_wr<VBU2J9
zbl2pHoZ-bCaWRYwNIGJXy!ME^c^wwmWdXqeU=~cPrh;A+OTnj`YPy%sL@3v7dje>4
z$T9Xd^u=m6nO!$|eg1yyOU<hXCOI9M$=I7XwPx$q`!hOy=Q`gw>Q|QA`T%5U(z*>Q
z2byRod$*$DhnX7&a!m_uO>M}^kocap1cL^f^~x}DUd#{>cCZ!%-i&gTaaM_t=?<o$
z8*}KJvz|1UF*SWUScax$f?ipjJwZsvb%E?dha{N_HjygW+d-~{@{D`(`Mut}OQfDl
zS=X7P_=U-S@~-oEx|X#1C>JIh4rX=@xPQSxqsav*4sN+LIV;BZgkzon>C#R35C^6k
zPRqm_!p=Yr2urZ4W_P{Yik85<0l7ia7iuJ!lt1ax_Ns{Rh{AX=vG5(4^wrFZhJ|#&
zrzUSJ3&-v46Kjxmu&2dI7U+}`S=X*PxBn6Tv1@;Qm<yNEE1r6JHib*sS-uzRsFV0b
zPTxKJ%@CO^YkG=OtHz5@Hxl~f`hJlxmN*{!`X=M}{&zv`BiP<eGC96}@fBX-7+G|k
z`#SDhjiW`aG&+t7gqjH**qmQA_r0VJ>e=BR>Ar8%-?QY0uSEHP{?Xasq+glkpRRr*
zYp;2_&%P*V-*D@nJkw7WaE1aT@1_+yyq{G6!-xK131gL006DYJQ0fD(q5hu_``rR_
zb%6FH3Az4$fS~_kGw%{F&876|%woP<@aH%6<2ir-t#j)}@L}#@pA>ZKW+ijC)zrc9
zamUZy1v*-*D1W`+iKCb7DqCx0CriNO`S2l9jO^Hf@e@E_<O`|t;)#FLe*6<!zjFo}
z_Txh04=TyO;%f_V_2sm;zeXfSl;EWQGYjAcqw%+d-k{%Wj7)oYz^MEn!T*b7e*D1I
zE$lWwTX~;r^pho&<N(>kW~2Q7)xGx1jIk1MHUCN4e?_kYckcrMGkQg>HV=$d`d5!a
z12n0czx?p~{yqJK(FdRH|0~x2>`DHAcK<6D?PJ>iTXqw&N+b3wx*zJ=*H3<8I+~><
z*f6~^Psj4$mi5IA&x-Z5-?bT0%MGY|wnPNFe)nm`&ccsxHvGB7->~1aC_?t-r+x^r
z{Hoc3{^(jwE|f@Lb%g?pSZ=uT19U_CF^o6>)jKvK@$=Qb*Iqvn-kz<r9H-rzh73PS
zd~5>6>=|ydIRKUQnJp@8a9wI?_ZRhLu=u&9{_)~D8NhgWt{UEY_~ZHTiYnWD!)-b@
z36*b=Qnyv#VYkrvp8nt1+1G=6(@i(rdbWP>gp9{eIs5F5MD5&3IK7XxKijDBd0=C#
zhmiZw>YjIAr|pdd%%HYAWcup~A;)dBfMqH;8zX*<9{l1!bEBM`+NyDJZlBZ+7^Z{l
zaMA}1T{V$ZBl-iKV&y34b@yJ=xjuK+G3VfA`~TvH-8;--VqW{=UR{<O>)$3FYS+L2
z{a*}tiERc>(N2s|YtN5+f2Vo$q2t9UPq5fD27un!=9=~#d|mWG?K38w0JcRQciPt+
z;B?=32AI5CWLR0e_FhD38u{bhe$K{+_W)8|ZrUZIYAJ<9+6kePh()Nf>Ks@TTd#0R
z=1xuO{d*Vgy<5{EU%GJU1O-8EkwQE0?lCn_)ju@!q>jI#I`64`x3;4^iHvuvu1`ox
z8cP}*85^nJ5$E$+6i0}!civiwOBh}1E~Qky_T)E;<5&Ot&%IL=)3GwoQ5segO2}hJ
z&Pn}t=$HSz_BnE8J}VD*;lNKZ$A`E%YgG)LgJwOBHRYrj`lB>Tl-cj>Thw^#_@d)H
z223SS?%9%?vY$#E)EPtTt%&V%E>|?nHtw$UwU8z3VDtNTySMMpMZX=^oX3y$I_=-6
z<4NG|@;bx6U3{q8*4BP<|MxGROXX%Qp&%c#^>0t5mv0?j6U2$*8yzawz1?g^Yd%Zw
z+veVXv+eyLFt?O(=KqrHuTN5uPr6buJ0u>pg|!%dF9jdQ0j7Pwxksj;Bsxb`CasW*
z*W@sb-i-+!^3(W_3_qi?)tx7YILjYjnmhahe}E+~o_njS8E6}g(g?hmCb9q94UV2M
zluyNqftQTeO6{$&&-FE2HI!H6e)cSv5B;9~xijZA^Y;?4x9Lco*F(CR0>T*u1s}z-
z>gr$L-cP%8*)o3Zh|~{@9OdrgSgQTIIDYk>Jg~T?dQ1gqp9s;I_+cKZts=a3Elkn$
z7~WE^gKLLN$ylg8*o@>%l5u_Zqk!&h?{f&ia;sT`9;*G9w)38Iy9BVCLkxW80QuJ`
zq7uG|HfgHw&i*UUNxi0^IDIeYH%{dP7b;S$KHS@mo~Q-x=sqF$vpS(^ws*^%0j#Oy
zenMV-BBgxdhEztT%CV&VQvZ6F;`z5_*An`e_7iY}=^jZ}GZvfy?FJJe2btH-^X|7O
z#~>ub{tIujzw-UIo%b5DG5ZB8J6=ln8)0*SfacGi5H@~~uuCs{826Kh^Xe<>OBWNs
zGGt;ZMQytL{_oFaQ~9lR?a70SV>_00|E>Ue0PV@cpHZ^=<slpFH>az)yG{cCAIjb{
ztjTBF162`F6cHt$BA_Tr5m1oO1*IsU^xk_5y+a^~NHHKFy;p(ILhl`oNbdv)y+ud>
zDM{$u=-&Ie|Baq=?)}D_gqe5NthIit&bW@aTNhXFTsTGHv*+{K6k~rABQZDB=Px>d
z?;(*GiNY){r~lQj^TPYz>+KX6Vn6*(IM&Czzei>46ZVn*3yIl&k=S~W_VhpUaYYD*
zKz*N;=ML{)%mSa@b5$in#d6qVuq>dDrvAmB<65iyl59`?AAr*X1E^p1We)O#I|Tvb
z7eAh6!?)sk6w%K@D2UT8UCbRiJuQ*d5=d{PE1N4Oo>pAxqolEYne%DwL}`WO{9rmg
z?+6{M)o-3T{$L#bmvJd6h%8K|K%sYfHR+M(fFYfj^&=(?C#H&ze=rn|P4Vy%dv~gG
znnj}iA1d?Axg|Qox3YV0tf)_IxnCk(ys+z(8Q+YIQ1Bn4N-!&SKVolkwy4bmyHd%#
z9A`VdAmSSSXSw@xN3Mz+T9VEaDb#%UR&3i|0;Piw`(suj3v>KZ5HY;Z#ZJrBDHZqo
z5k~DN@S6=jCO@k4`*XPA;9s(~DA`Z>hb;YNIBTi&pT8-|frdi<f!F_mm`Y|MvuX=o
z-yu0g%D+AO-MaRj|2HG`f5_zc%Z7ivK-_R?`SVkMd6PfZo*sMN>0e0TlKSJ)pIbpB
z8)fAn_5NRIaC#vnx`wTNlhM=JsxusqFI@iA!f|{>bO!5LQqa!#G^gI-)qH~%{{wj;
zL`n?T9Ki4HwO2JauPZq523l@lY5!R13*vTN^ox-C+lK$wXMxIwty*^Z+f!DJK0gD?
z9%AXwqDS$m&g6T(F38-bfv!?$5++P0*(rE?^6u`P$BXBPXUC>`?thJPow@&qK5B`$
zbU(0x+~vQf_=9S{zfIRjcXxmy`2|R?pSPBr{rR<6Hwv_|)<fOltUHLw`;<CBdH#Mn
zILACCF^u#N%3f|Ha!}y$?S7K~lY<Ov&-km<T(ed*%p|}MH0RG*l;>g>^wj-DKNAm6
zYH?S2`pT>`*!|4uclNtWYk6b-=r0)kCudyTd8WH2;U{kU%j}}}2qzjrwyBI>mAbP}
z8@tc^$(MohL_H{B(U*D}5&j&SXm?#Yn<Qlhb$_F`6C?kJW+Y12<&WaVmTq18|6d0C
z+XTNp3k)F=bQSYDmS^D&8)`U?qVfOcoxgAF8Na&#!=G&tTXd6qIr*D>2z}@X;5v;Q
zpqN8nMK@PKf%>&$)lG&;@h%Nkeae_mdR!61UG5S$;@D55VWHUm3&Rt8<F76U1+eb!
zQ4m?!F#bYe(ZcTQADYK`hnF&V%KjAEKQ+uhrK=_QDpA$0r7kaw@X>o4Q_Ye`QF4_A
z&2pNPX;=!b181mg2_;)ZdCAx@TeDci_UCVQrrl=0C;aqWr5mK`s>H6>92#T^|JaIk
z6}1&n+f)mnqzH|Y9e*HSKb)uMha0K<*;h{~Z}e*tUa!Fa{cZmE7C`e#wZt}&OK&!7
zVkaE>b2i!`M1hooIDCm<h|~oSso`vE9I1fV2tN}S*k-^lIVZko_Z@TZonGZCrJ?)(
zGylud%tl3An;X0Qhj#eAaqOH|(Q_zSf*-z|6q2Y%qjl|e`Zf2Yn)0)f>k%)A!y3j@
zU<()ORE^4wzrM+~xn6C%diU|8^KL2o>#J`%VpDR{gz$Anj^ek^{~@0LB>x`^zOYC`
z+z<K0yE;#vgQtPU8$)Vt8S{DwNrWsk7Z3{_H{YW@Wcu~ecUTR}6#={gh$>FAq}gX$
z(uQLcV=d9k-(s)(%m`0AAAb~5B!gW>mG}7lp^E;T_8%`*qWjAZac6n09@lo=SuDS-
zaoeRb-u6o0(3OIJbM>9nmKSQ^JF2(<MaWk(cQMS=$Su7tDjP%Sl_o^_jO8J9d&}9s
z;QIf<osXG+87bgg9~;Ryt`OEsv@vCG9Kf;v7Ta_Izjjrh>UrSJrb=G291kB7mB5WT
zC)59B!2clNKYw!dE)fkcMBnAI4jVhcP@A=8Dpy^!>*M*)9w}CwAyI)oF!{d~=YJ{0
zz@lIJVPTkyEh8!0My1Ve?EJskN}6amt4L4WM0@=I(?6__h%eeE9bk<uqR`<0s@Y^}
z8VqE3yxc6Czk+xJt){6RoYQ`iVV_-jU7ks;b=B_$s98@53ydS1xY2Da7$!J#tT7;q
zYNtVbgUUbwG;WPtYL65}ay%3rtJ{)8-i%Ib(4g3{$wk?kpFQ3@d9%rn5Rs~k%w=H6
zxG_{MA>15C;6ynxeI^!h?APUF%5CP)lOhn%n^0IqJw@~95{$L2RQS9gq8tjQL9)xr
z#59q0`arsWmP2z|TBkFY_(}G3LoOBx2vs=6Zcznwc7B?Y`8Za4Tw<(-fI{U1rWBT0
z?Nlbm;*fGm9dK6=*uN>hP{UxeiOA~rp!GCvdSf`Xt~^~r@Drg|2esvCV%ryUxnV63
z+M$*FrU2vW(zE_m;j?`;Dad9*r3G?KpW@(VweQPO#R^t)4y?Y#ragiKs{LxtL?%y#
z4(HkpAHN46XT|DQmbJl4<7nKP>m-fZRag@~cvj%mD!?Z(6bD+*v^S*!rl@-GHxX;l
zOMq5l50CnU4vYU0zwi`HP?PCp_N5(bF^TPSdq&mYTcVBJWd-m2&l=`$0MPlskV+#`
z_WGD~(vi9m$r%b3dd01mhy*BsnH}||5_t8V-M%<rMwzC*N7B~0W?ozB@mh9wsNwUl
zB_~GbVXklaKWPxvfL6crO0Gj@z}YrN?PEBx>dq(?>8R}<JAXs!XU6=`2l%(go6s&m
z*9#8coWt-JX8t|vnPk|rJ1%8sN$`f%qHk@f^iiNDmo#<8ocI}y4zeuWj1s>qdxI(3
zp6}Yo#J>L8Pj**G_i|bWNho-VA5{O7@26ah5BG4YncUIdgc>R}s|1s-SVsQSm-vT+
zV@9WTbNReGl)-qolHD1Q%fY3-D4RA6RX}SC25;$9*m|W)sPZl~z4U%5T<Uz2zo-D3
zrbt|+uI=^Xn+CA5i_jvPI5wclqTH1B-a~{8Q~)h%pyRJc_ly>!lhsDHlU!WlC$=}p
zpuI<e@ZDCO(7<4iqveZ1MrCd59cjvvEC+;<eZ2A4PT9$N{4m7)J>CrYa##O)zH-|5
zYzs{vhCv)~BJO>XXq6i(%6v>V{7>K6&*=hI$aO#~6^&~OS~@uHpnA<f7;>tRqbC~o
zsB`rxO=Hu-wROdD$VuN@zEMd0%`Eo|rk5V=CM0DR$0epVK2V&u8mC&+`aP&7*K94(
z`dmFS7Lfdd2@HeJW0StiATEZg;|$oE3QO!g!lSTf3HEH*Ih;RWkFj%PHBoLvz#I%C
z=pwA87=4s+;i_XK$MtO3yUV&(fT9{TBPw<}S$s*Q5B*6_0DG)eBg0F9aS{yvt=sME
z^sHw7sy0{KXeq$`zLA-V`@!rYurWm=s&2IGNDUdgCnVF`nE_PU)m+SwT2vAqHGe-z
zY5qQf*kH)E%;v;Ge`x4`_S<s)Ri$TIX24fp8F#8%<&>BlePou%**jCFzYiI*tW#&}
z(q-G21OX~j7={4Q2L+0E#+#7_8e>Nwv4Ec2JgRJ~n(}HEkZa`<OlqR^A{NM>Q-W0=
zCfc>~lRyqozM)KyA=-0!kpy^6Gs*cn72l9nKejr#N=pZdi+{&sA|3ta*dlBkDWeFE
zA2u;HUvKzGVp#O7JLr224(sGNAw9STVN~sURfZT|GHP4<fl%|B9*%`?=O}~xp9t_T
zR1g;@LfqNV_z-m7G@Z#PrG}0QDS^XHRfIwFW@^ga5_YsP8*0&}umvi6+u0!wSn{=L
zfOFMv(Xi3GE2?r!GJx_tRF(tw^DJUtiDNrjsweVOuGhGF(dXE8CC%oA2lTH^{ByAx
z8v_0+gKHB_*tsq5cI_2V`gE_tgk4E-Zk}Hr3$W}%)j@b0+5JYQfxw<j=a9_!R<>62
zFqLIC#D;%>M1}50uw*5bZG)Rf9&_((wBd}6VLOsWjEeTkX4w%JHJ?DX&Qb`C65(Ew
z*^)V;6*~Kxty4OOPj!FlJ0D2^P<U+6&t^k8dqXc{iZab+qlKn<NpglDZ)n*0Q?U5<
z#s6U5DV%)#`CQLJN}Qz&r2Y+5U#`Qsr#^K)vHB!dF0}J#6C24vaGv5uxqngP^@MLq
z1l0$+=G1)sN~vcYX*@aM^jfGHoFLcZ+luS<?aiL{9d#8w;YMzcc3ZoJ{VhVHb?)9g
zs<}L@Zs%PeXb9HIc1iX49%7mT+-`jdLA<IIFlRcm41vJ<o1{)!ns(n<rkJhW#qW55
zMPDd&0w)T8sOFi-yMHCiPA<4WC0`np*keqN=%fIj@!~`<LdDpzt!M`!b<*o!0(|y(
zcj_Qt>kib++buDpO5W0rQJJ*LKc^JvS%0bp^jMe1%}XM#NeM6%pG*31KSh7IjX*=r
z`mEsbP&Hs~=MCTo=q<BKcJW!1m1t_(t3qZ9+tFL~_U~2dDIkR<T7KWyuyu#g^G;F%
z<AS{PFjKS-t{;kyuJ(I4YF?hHRGK<3425ClX<|@?G!hAs6MMB3V9Fe)$$Q5cWJ3gX
z`m40^ZpCkkt2KDp&5|7ISG+-9&x<$MbJ%U(j<-UYSh3fy7*NX_s6A)a)s)!NTJ3^B
zFH+QpkMrRrq^g<Yv9}JR+VmGb6ZNzokmAa#JssF5>&f~T^@S(c=)8@>IzmE-@6>E{
zuVX5~wkMuvnDBQ0<<f;;a<zJCY~u#-M(pBkT3yo9vNHVsEjqDEWa6Xn94WG99BrNX
zvQ<7*Xm<nd8lJYukZg<tPgjRe<i|yVu&dv0<CysOuOn-g*P7OEArD}3@|x8|M;6xx
zvUKc-sdjin?>ks&`L-vXTNJBJ5N>vNi&@1*ivGnuO*n_|7h_9pA<K7zNR9-l;;j%3
zEp>a^5Lw3$^DY6R@7LJ|trR%DpKlVH-o@dpjq+4B*QT@mPDC>iI^<<Xa@jUR&5oe~
zvW(7N1ad2*0+*Z(*HV3Vl`)Cl)}2HiS^^s+Yd4%(;Eq|vmK&GN8M(){d7+}wO!B^p
zt<^Moet=@jJ?nrTRvtBK5&O<7GD13FQJFb%{4<Fzel|w6P$J~i>o*r$UMg%*?xN$c
z0p8q}eMk>0sh#g}c|HT;gZp`jiQO$58#gF0_rPDS&k0v<xM7KaFk<T()StX@Ol95`
zZ`MP(^@!zN$YE;@ja^GM_1CvCeEE3X=j*;P^5_fEGhC@Cqs3Zm!ighdTpGD9#KhbK
zgC+09G`#E{IDYiVws@@E-D=j?-O_PRyvjW<+V;g8Q?rxI$m;;$EOQDLeSoGo7(cP9
zE7@=@)2C9@OwpFBhDiYX``{>-GEn!riCc3S#wL<EWpt#?9WZC|x;WgcQ9b}~|8}bV
z(fC`&?V5HTpS)WH_)b$GqJZ+X%+reF7DB^<t9TLdSiio45596i2+#RDzu4%;xuAlu
zBRc|bz)>S_<aOG6dst`eDR$z!7oqD3qD&O%<15^oOI%SYx%ZLYr;X1RVp_uLG`^vU
zJ(R+K(&JNZ`zZ8-8Cii)l3MHDz6SS171v2qYFahp&iBBKnern3z*Qm}Q#@65cXw^9
z9vVI<&A1nzceoCnJriUY!)PY2G{(Px`cl!lbIHt3^*qz7EAZlgCF|SUM%3i{V(du(
zCB{c(#ko&(z?x$556F3ax@HDIyGaCYjd3LNs`jVi-4B^Mj<Lq9QIy7I3FMiByzC8<
zjxWA;9;o4Q!A=}rFE*Q@Vmc2ZMv+@7ehr-J3%6kC?d$W<l%sfS#vA!Zv0K0!CJ>lN
ze4}^*KY*W{Vr`=8pljNXhyF~7=N<bHI(FKC&UfR;n7yhtva9EZXw2G3R3SGD73Y?$
zC-1R)U+_#isUBsF%MBFOtt(|dy!hS+ssbWWWmHPLqBPd*qqJfNU@WS6R<Tvzi8>Rj
ziy|!$%kH)Sq!9fA?n!gLbv-|>;SJ-BmgsMzO#8j%n34t(l^2hMQz}hkMx&W<yp0X!
zVSbN9xo~4XxuL!S!@BBj)l87brB*J3WUmCpUg~Jk=ZX6}rY5>|j)AjANxP~Jr**O(
zzPVwGbWmcOBZ!Et4PSJpnXv2}UZK?gg0F?}uS`uMFJw9hr^Y1;A4NOPrTIBjP9N%9
za2@CLx-vHosyt{+#*7e-_$>}S3Mm~p<MGc-HZ@dw>D5Y%(_ge@FDnSUPD=S@CWol>
zYVVl2dKmXotN?DdFL5S7I>)w$3bq>8T}RupDpsspg+7E<y#oz)eOM=&ccn9!OF4~w
zx?5T^*`u7APgIS(2*t;JZ&DH|02XEhA(PFAE&(=)mCTuUc6rNSpiOQIdA&x?Ol|3b
zXkJ=|MbGJ~n*p8YQ541H-l%IQlR>_~^&^;Tbc~vzL?s@KfSB1<KBNDV`K*t=@l<-?
zM&LP0%sN_{$<g^Fl&R7qxJP)R-J~{Z`rgR<g;b2EeHHt7`}Y`yp8CV|A4Ig*XOS*i
zlxFZs(D5F?bB&-jX0`w0@&3cX^y?(X*;$&1Ew*aDpUZUpRo;!PWz^WSV(F)btjsvQ
z<*6}#wL8*kkfLu-6fQDE^>kjv+<i5Y#5gfpX7a)eKf83C!2?D=?rn*h$x9U#u%$Yn
zk}5mP0|dWl+EFMHxvSb5#(g53*E>Z!@vRIa6|J`an}1sY^_JF>Z@>Z~Gqce<52`I&
z8C{Mo>%@z}Y*iW}6B<=e{T^U^6@|IeBUDS=LmGRwWmbLVZpF)HiPt7PYd5GfmJ1T!
zSP@$+Br;5?k{AK`(VJ&nEvFRC6o$esQS#J2cRel`6Y7_#Z-;0)8l*q69%$&!;I%e`
z*@P=jmx(D~M*EiEyCUeWBxUisqZAdJI_&kqbUs{A9sLNeEJF`db9gIj?Bb*r_tVR_
zm}2IPGY}4F+O`e!dq<-BP2qKzoIoFhF+{dWPCtVZ+myoZ`e-t})KzMudXL?VN<qn1
z%(38@3G^2FZ7n$qn7B`%CXrP;&f4FE@NZ8Jzei);CwDstHZ5yXj=+c$0<I4wO^P84
zlS^Z0&f71A%-|0fkxS(!(g%K3Je>zT``HD0A<e<sbX+P^+$;2#{ZAl41(M7B&T0Dy
z8~)0ljoe1O7rkCe0<W=OW>hbyKX_PVcS)_Ib5&y3AJ874@h^^3DYq9Rxvco1jm;q8
zsNRB*u)M{mRn2LnRGk|Zb}FQD`Q=#!5|)PdK1l7NocD5YYo=1OoOO*2_ceD&7p`I3
zbc4cvr_+mbM_$vMK6{x6(ev8)U75vYy}p@Qp|qw<_ma>Ve5%}QU4x^U-b(M?`*AlK
zWxoOSB|_+Zg30o93#&y`0%r8P$Ko&lX)Zp#;CF1V$S6sF&uFO9HG;PFsYJp%Dtf%F
zA=TFj8>YDm$C~uOtq~z}@b~_vIXB;co?RoVMpxGP&vEzI`2sqtRGw{5380U&%W@@f
znPqaS@r36Cu#cUZ=2VW&`ss~G=FvW-@$TR)!%w!k==;2b(3`bONbl_81FvUdcj04N
zV1ieOqAmReCllBs13VQq2mi(B#%r4L+z_Qpc*ETvF3-Ofh$dT;kYmvUO^A~?=nG_I
zzUou8eMY5$UOLN3#(hYGID?KY;oZ+ZP?6}{{fK>!0#~3kbYFET{RXN>B}G7gWS_tx
z9S5qBEps)|Qx#qf9M=BQ_!;yY*EY}F%vu*6*<CG0FBJz!{ViWL$I&k`>iIFjKI&kK
z!EObY<|=A0mZM;?<MWQfw}T$~f(aGt$2Raon#n7G3}*`Ivy}B$iq$8uRqrqb)xj;`
zE_v=2g1#$BaCGH?9y+VpHRPnC6yv>Ruf^~RUKRry_pw$N{KIM^rny*1d1l68i>a&i
zYg6~?d_5(#nleF^gJk(n$=FA>&cs+VlqWQd(Vb0f2r;P~-LH0+Ec&VO6MI{!-TXsv
z5|<icXhEIJxzV@Xa$_x62q>PdSQa#8`}jpuD+SSyT-`gQ;pwe}P!@gn&IRvP525p!
z)Q&ABGbndtbF;Pk>zD0Nzl@emucPGos-_yfVl@wbJusW-FXDUkku@((#=jY}kp&v|
zuKTReFowo)G36th=}H&s@2#$4>M0%^R5Q!xe{1BB7qhPG4OMCHRU~(x+TLMdlDU7B
zmk5~q60`20z>afcUO8ezLPXw}U90WgW`aJV0zB}Vpxga0x<s<;R0$v|w275QYJ`@Q
zZAhf;Li;}2yogmX9WFUlUS74TyYJ@SMR6SxSIMlHJ}z1o>difxj#?`6$eR(K!JO!I
z5S})_%2)(DUs97P)-dti(Pf|6pk2v7Dqi&+-rzaH_+KhJERgf$Anjs~Bas*SCiTgc
znL?$>SEh}SL#~m1&X%-msEfJ6H`kb@PeE@*n0pj=)y4sj9T#o&hMT4IU`%(s;keeV
z**;Vxi`*I^@EzOIVbV3o#v{;8SAScx!XI5OZp4&v^d+-Xrqu683GP`wQZX;B>9kZr
zs@t*g0lazjVG<?F6G%!SL-mwXAqFxRSh(flH?!#ns^7a27yo#iudvagu7*(g%Xu>+
z9I|GG{zbMGW&FHur8^bgQRlq&g;6=V_(peCRo4{OH{&UPG<S3*d6$j4KaK<-MxzO@
zL}&|`AK-PsKc<cE1$d4ZEG_8Q7r^WU0NY`iBO}|fo@g6JEDo@4AjD)b`&P7uQJy>K
zlGCEFTI$B#L2F%-5dks5E56^MYx|VBeFcU^4o08$9O|3<@o_IJj5aEK^SNzsD$t#`
zG2n{Zu5Yv&wO=pQyu7^R*u!Cro;%C_fw`(kdZdQHfC&+ObnIvR&TL<KxbaRS*U|n9
zuZ$KD(fT2+Qxq%M5VY-1SzSvRxdC%J8NLX7yY*gnr8~ZEOUD{z?wkKA(G&kUz<Sr0
zeX!<M${u>nHvLOA`$$uz@Y<r|Oo8dakit!{wiHUtb5bzI#(<={_t$6g4DL!Ss5HmU
zv4Y_yDXBrx+|cszX=uV`5%W>H(qE&4oh(;V^S(~t=-T&!t8_)b+vFOpu_cgpx;}H)
zn{H(zq5nKNNFVOBAn(!Y*^zTR|MkAl`!NA&gKR4%zE6KKd^<0F_4(esCCb(iR9w&r
z3@9)guF)2xPv?oZEo$Xk`@Lo1QI+H#{v@hcza1rtq>jwdLB-qUdf{p6x3f!}^>*pY
z)bhlkUO#H%&$0u97Z{{eg;(^b@cXjW*CCbUMshM6v1LkMfkeU2)a~+tFki8K6=D;i
zraHl!UlLsQ^Gpm!MNy?ydCsC-5-2-Bw4y=Wh$?mB_UsT%Z6u{bY1)WWuFmjmfak5E
zucFL->+DVW#EJ5t1ak*r)@4l_1)w3XfH+I<nPEB;w^E|J5wDk*W_^=H?bhLt2zpIh
ztT8wDZ|r~J&SM)=n-{ZzqjTF1jk)1MkstT#O7NYjW%;!$2Nk!pT|_E(9o?<ILIiu?
zd_W6|9DHDSj$ZH$^SN`JsiE5^T(0}Y8iljZx;d=p6jP+v2O<#SN>2quem$xxw=m*a
z0JOngLH&$yPGCT@9YTc!NYWyQ0ICM;k32J{U)&@?7+QR~FjRPhOY!()aGAGMvGo%B
z9=odL)ciFR&cx0FY0#wl5tN?Vr|_)a8}rV8cx!~iZE7)?(&m9*E{&L)X`fPRW2+=!
z{LGU}^sL4xch<I*5O#sbx|)IByNow&%9IJgv(NGybyx3HSQB`AkD(0dTW&C=dOS8Y
zdgB=9WSOKpPIUU6*6P>0Zo-raxe+8p$875Y%|3sg2skfI{U<EVSxVu*QW1W2bVMNe
z2%7xy7+ErX%=H9Pwk=)sLxg+oFqI2k-Ix`$k>;YuJ-t|$+E~S(MtIBjD$A<gUZB`h
zSDMdvpaAYXinwW3%R>p;Sc(L##a7l~RycI6TU!Y1e72sU+uz^=2E~|!0$uCl?OP2`
zI{9p!-h(C&x4CNP&3ow^oi;W<PfQQtxoe3U%qb;2wP0c2{-i4VQS=CBzh6#K_{N%Z
z9c=r<rR3&CN@U6LS@_bIhxLp3D8b^4@$A^u$p(Iu>od<!7_T`pWJA%jQJMRCPD@3D
zN%&^q>ZDiC!ohYPA=Oc@PY;z=yz?Szf4NeAb?PJ1m`TAjXAzoIx2&ho!;1C9L6AqC
zJE3KU51v@vLspE$>3vxh=4`fe<@6w)&4H?KM32K`);)(@rt~ruUU<Dry7CC)WgA>G
z>NO4aN|z7Kp2l!J7DoU}O^n~lUKYv!M?US}AxEwAH`1?#D;XR`#@H-J!N{x2*~ut)
zW{z~5GS!(B;mc3b9zl6Y<O67J-*z+u_}PdNbU7vT_uiBq8ft!5M3LsQm*8~DqiuaB
zehnfQG4_TxB^DHRF#ar%gbEn|G2d#Ta)30A+ZiQV)S0XK0{0lQCTXDxtomJ=Do3wb
zHmFC$66b2$1(2#gzx$WxYVKW=YHDRu&qW>6-IKzZ)tn?T_!IBjUh!jB>D4SNj{BL*
zlO^j28J0fdS9fhpxpC@cX`XRZlg5F2=dr%WOCyD#$()+!)CBtr2R?m}xLY2BZME$&
zeMdC@>STk7+Ep{CcPmC_>r^yjvkN8w{KoXC-_}khLsXM~m6&Kjw$8qDuaA0H0qGXf
z$`k&=U~ct_lK1n{1Y=66sChMn98?RsO+%tE%Nq4?#9u0Py`18|Bg5(1=RU|VITe9Z
zidxx%q}K<B3hx2W+|RJoJu&H>!JzHNCD1it<i4rSz-z^3@5q#r7q4aUHV5?*eB!hP
zxHsOCVBYiF+48Uc#OS_QJ0C^)?Lx2hryI?1de>RfR`y#CTkp4Twbv*G>8(UY0lPIV
z65f+dYd$S3jRH-tY9euiw(z6%jhf+uKGTsN4aJMqJIB{JZ0DqXu;D8<8z4e5O3&$w
z#1!nPua8y1a`vOtf)Lm$y_5IWRU`Xy$%n$ev@64VSh0K|@odi^d#ThY7vD$@+huKy
zeHkItK^@kwoVhq|ksVbR>jTMKNR3nyOOXlHmO*>6G=3>72MylIUQY4-5j#aNLE^ec
z@~`b{N|Kbuih6{>H%Ii0yH^{L^||BzhX{Pt>%+y57n_#Ky@i{>T8Ct?4}WHVFDs`T
zMWB%T%eMYM3>{FgAo~XeYc*7?+4ZIJUKtyeFN?P&%qk;`MxQVhkv!vVe~xO27>7PQ
z&`(>}fk+j{O0#iQ1&FK^1-rU2>W8!TshLs*#|V=a7dut!!SVTyl$V$GK7dc?HMshl
z-1LJjH#}mdZY$}({zNI=5%a8{f*xjES~h>rPgAd}cJPX}W36$xE+Ah~e+4WFxMC!U
zHMV6~QjxlyH$s&I13!20dD1mf;|>@~tAh(K3_o#SIkRq@E~}R&Efjv{+`u`!@PHNt
z{VQXCk+zN|=^<kigar*AaSqK2DPrBJKdj2%){uI9B%e@2qB^QEWkAM1x^==vbQzmI
zH{e(_gmgI`4X(&?P7cPEK7WW8)_*)c#N7iepwS`K*x^(dEeN3W-piS1f?fwW1NM}Q
zbQUqfm{YKvf4{$^wbibI%*o>;e!!q%k?*5hQN6;5co50tAm~b1@*9M`?@Z;CcOkv=
z*EcsmdZnVZsdfu=4mErfG~D50+t=-OT7CO&B^&$eOxPX1!xQl0N!HxSw?-$Y`0P)_
z8p}i8Bc+z;kux(a8mLyz#{DzM?d2~@YGs}5OTdz>;00|9dnC_NE{Pg*ta6Bchp;${
zaa7$S>&Zh_%GtfBUPn3X@k5O<7(si3cz-hz($3FQE$3PrM$SY2%~U=3Q@NI=dS$f<
zPgIYW#OdNbVq_wp<tpLqnl<j>4CG3G=QnS_3k=q1ZRgJWRvS={w5Z}Ut)*~OC$qB0
z+s=cD(S5Cuyj$%yRPO?q8hApaD8N5fgb>D3@TY9XQoD0y)y%77p1bano~oaYLKner
zXR2-zt;Nb2ZPtQse2O2pqq@r2kWfX>Wu$R{dThjq_qtXGymaS?9b}epX<7*!hYR3q
z%r^E#<kFxx+N|6GW7SS4{3XVTv1szGCG7^$VMV@m>I}U|P9;N5TRQi*P|?FWCrdFU
z!aK#7v0s2`>~3D8YMS2ZAQ*L<{M?0|i(@ml@Kz_g%#wxc2~Y+0E9wn7-(%W*qZae|
z<F{I|_pEimPOW6Qp&tj>P}30iNwv~6=9!H%N(X}v%-)QQz0?wZZF0(BAvyOk2{QU|
zQ2Z+Qr`xQLjPs0e3{&yklN+=K>ES%gWJxZ~F69^xEY+`Ir+GzUa!o2|L_7pv-;|=1
z+WT~4O9EMfXI!y40R@@cu0Zy}j=kXlu_?`QAhq7DuQ0%5eBabstnMZ?a{HUVAZ8pY
zTfa2niP0D9s?2Y00I%!89Yw)i>C#bT?=A!RHe516QwP|{B&+Fl4BX;)8@@d-6Ei72
z9QY>jV>^g3uC$Y&0!Obs<mvqW2b7&T7efQfJ<IOz>EZeqmFPSC1)h|5m?o8+tfuB3
zpm@}xfq6fA*{AAOdqcn#XX77=Yr1Pi!Mwh%9xdT~fMfPFPxTwJ^g&yDW%z#A4EL|(
zV4o$_i6Z=8+iem!MIBK@ZKfnePZa*lN7ipUfh-l(@TwKnhP>~5{Y6q3pz_#kQvwvW
z^O=GZ0xz-uT6Lhk+bT)_l~|>-#j#ZIDc><1_RTT$y-%UR*XI+W0^Y@8iMY?f4|wy{
z7p7xMzkG`BxB;8r%Uww#bElVS?<-|~+fs5>K>g(=>iO*tscw!`9BxXIM_j_eIYBNe
z^xoHFya9nHQ}W32=c5Gn99Aqr=Gy6ih}6aV4{Qr>vL0U(6v#$5T0AXTJe(%S9A(?r
zeWgeepbs6aSbD<sWc2tx!>bM%`Z`)lRxP)gaB>O~xixoRVnDn+P7WTiM{N{kF0kUg
z%#PgN_dQpMYn`<gu8S3w?_({0O~6pqnh}yC)fjWr7n*%t^~$+5=uaH=hcuBT%W$}7
z98*mopIh(PB<R+?D6x73-D=jo=7%PFLMog@c!GN3glTKxg4M#!KF>GIKPLI~^2t|)
zL&%lULmJi*eR@Zvc>&G`aRsF%I|gY3l@%%*@M{YX>tpp4ddF-DNxXWwWPGG5eZnM6
zQR9_kW$2r&BS&3Tw#Xr&J}(?!R4Bh4C$eJv6Wj{JE!-!!gDeji_3<qlcVZmD(GkD^
zUSMDE(~m>0NmIqCNXGs>5B~aRET^P|_$VH5e%0!&Y`_I$K6+e9K~L=g2r;P1DM0uV
z>_w1@9+6ez3-gry+Nz&jMoh2T48e-0FB%NF!}P0}H6ib|@G-#JW$bAF@y^n5_NQt^
zqN${f&b>FkV+Qe}R>~;0ES@Zo)!q)Q($2hbeay>%v$)8EMKRWXVE<`R@rzu21GUK_
zfu`bavZUMa_`(wRdfUg7))e5D#|C9Ud9}j_s#ZOUP1@sZ)WYlpQD$ShBPsn8<--8w
zst+?>YoiqhtDeL#Q#?bfJJIoSF<}*pw;}qU{wp&jV?a?wv*MvR>_X(RDp~&JkfF}!
zcOG9m4;*;DyszC!Ua{^)EPWvWfQqB3swK_j(roWKAjNg+rAf@Igl@wrSx%J7?UiOV
z8}+y&3E{Gv+d1T&%jaVk9v#cXz*R+HH;Ts89F1oR^!JA(IghkTLzsIv&K}XnI%W*g
znqk>bcl4Ymm1b}$B?FD6rI2(e-~^JgO~-vyQZP4n%Y`dvrI1JSAK?oF&Qm`WbQf-o
zPWTw!<K){o^g(WZCA~%Sm^-V1KPy3P!fv;L-Z`2q=>D2;W9&ov$B$QAF2(gVMNVsE
zkp<s&N+9cb`ndqtwBJQrSA`KPD87v8WaW<DRGESw;q7aBPbShPx}s*9x?LN8a#JqW
zRJy;FAEGsQRlMr+2Hp?=nppOrj1&;d5=qHR)bR?UE{345A)hVWM+#!>_wQGIVbPcx
z<pZ@SSkH;XdAY}8R5iDjZZW1C`@0CGHQdP@S*0O8pyLx~!L=rf9Bog+NS?yj$D>Ea
z&8SW$IFpY3yFsRNUGSq3@W`}OWBG!|M-qG9aU8O6ds02;EPD|+b?I7`M(=7|&mGCX
zp&(nt9v{QVq_fW&=8dbn9jvQ>^`u8p3y#&+Hlo@H9O9dMcdJulA)Y@>V7g|ep+2)p
zl*{<wXT9IHnuRnhwUO6?B$R>$^>Wf#7gpq@Yw(*_(f+~LBlbF^BuN2p6mQ>bv{uZ<
z7ol2BH>Byd-%RI{0jod|A4EMdcjR53%Ip(feJ~>LhufoF^XqniZe2p=NjBXg1iLh6
zJyWulFYeyd?gR=Z<{uE#lIQUbHW?Ke?A~0ed3t*&VqH)NQ62KdA5#|C=B5Fc*H>~w
zr=~XC4)&XTPfCOYu8<K+Uz5Z`u6!q}yCOnba(F?-D<OYO!ZFM^uq`#cMt74F5eBuO
znX71|$ZUFlN4HpcZ`^t;b+M#IlUkk%cBgS#-``)Y&DG!Y;AmMh;OIz5qV%;<Gt+eX
zn2zHo=433PoBRExg81TSDFBt9-hZco?<w4=9ZgLXy?>AuVn#R~;~Z)Unw^bK51YYP
z={Su~!?(wM&xfQS`{%U@ds5O`kB7ELUsHMsa%v9O+mz>5V7;t;JuxpK7^3<9qc4|k
z!cJ7)pIcwv!lZekC7w7^C?%gq)>=+hSr}rK_-p8K+EI$;h+EeW`#VDr<Vjs`0$K=J
zt4YEmq}NAJWqq~(Q=-xGOF!)|_D}Nslz<1svmn~b>YJkk1y~gGkmy)$a@kmJpxut6
zPmeK3-LsSwSgyE@A7}D)2L;A*^=SUpkYbB~c+@5y6e4|m_%jzC=65r`?u?TLEiuu|
zk*Cb4k2mfC3;kSUo-Nbe3mafO!x7y*$QY{4l4(3F>=7q5T<%=$Dd}1F9RloC50#+{
z?MKQQkbKkrxP2Y>q19-@efUi9PM%%=yCbZ$p2dn#b`!*ae&S0+tLf2)ZNoI%dJP8&
zPL%(9G<hfd;-g$TTc6>+mtKvH6Au~o_lpa6ywzn~De_&zOR}_XIlcn+KQtf~CwQ-o
z9Mti;oZ&zVUI5?OB|uGKz0J|~mtrV^Iv>GE!vlfBDvR{a5}}mam(P7sn?(sdP6Dtb
zKg)PG1elxY8N```jK~qbgQR{M^xby88<e>_Rt((%f>&TFOYN-iU`;W;%uwdjp))6n
zryCJR431OwB3L2Dif}Z=eIJuB$_qd1*T4WW<)Lpp1=1t`ISU}o<wH!>ZPp8%u7_wF
z<k6~6hH{e6l4P8wkqWY8E(7k#aWKTs>v~AwhGWJHcrTeOVm<0oYx51?&<kX7pXr3f
z-6&6#gPYsSnC4_SbYcCTMSE@1BKN);d_mk}o~fC;tJOZf-KvXxvfWU)<?tLiq*0UV
z&eL2l0Aj@4;ujFmt%P|mNT)dzG113oJZvm2POzP@7u6r{25~f|YMfphvBymL9RBJv
zV!eZFzMBPX_59LWl2v(gn5ie?&CXWve9O{!=m`qhzUXy<J!}4|IlE7H_{-ZUNA@m=
zg?;*bpmu|SW52l4x53@e$*pb{FHZs0CL$C@B#1`P%15a>gfJg4{BY=*BZffu4Brvs
zt@a?XA&J}qr9EL9R6r*uh>CkUdpFdKJ%Ujs$@haL@=>9PwPCamhR^{`<7iva1~gE!
zHoWNyIp?V}DQ!feF<SXr@)q$%2evC0iokN72MP30)gthF2A$zE9}Eh-odZNY>N1%{
zN;0Euhr3JOFb#g{Q8o27UY-<UulD68sGJ|VW+s1`HrlF|813V6b@kx8ejAoa)mSFB
z!k{12H9#~2d@f?I59l4Ftn3GGISeQRydUbq8jhwMOeH-VX~&yKPC^Iv^74nS=UX(J
zOl<W3QR{7d;lAz{A+W^=CPffIb<&ZbR#e?$aF~zrUfs~*h$^s&Q3EPQILX=XzS|b&
z5qq&2`O$7??!+bQ%6CSWf$eo0Bm`YemdI*sMZ`su5&1@b9TG!qR%-WfP3!XnQajir
zPyN8=4}0EA(u!~;|M<4N1wUTr-sEd=lprQ@y7g6Fz_u<jSy;C{tC`!%Jk_NhNYVxS
z0(a4GYjWr2t(-Y_MQ&I5+?V+qvT#Hk$xFEs4D7+>?o4TlWBzL^!=FhHq<4$i7?sC_
z6Y;L1__ym^6kwQ0GF<&R+&-HU$*}6p627OzFnK^h|0q%(=<hFj-+TskLWL-Rnj=gR
zb5I|^MJc!`)$^bz%JXG@+*Qr9d}7jjAb{I6)~Ha40@Fo9teqNk@{`pksS2`AN!zY0
zZj5D8J~=@SM(WY=(kQTD_s3+7#Oh<@fn$p*6yTC4@lA!@&!Go^a)8?r%4hL{)Nm+c
zalv5_)NoaC`8MKa>kCelVQn}&cJ9c6rmBdtqMTU#fGYDB3+cq?82mVKQ=o^MqH}H$
zE(lk7Qj^!@cADd2P!Ek`tHDx2#1e2X)Euz@`ARo@3Jh>N#_pG;m5b7t0OnlsDfBPV
z#j+W!fHe~81Vxfc{n&-d>IuXeg6ZDvDu&dT9ZGlKiZfh%3M-zQ+3=1T-!O3;^3S*Q
z&ALrH(UO|8=L{><FV96_cenH1W9FSR_VL0t`|i2@?0@u7T)uE$eWII18@smPx0qX}
zN5J6G?uWzM6U`~8eU6*-_aEAg39ore8{JvpPDD@a%pU)c;32QRuQA4|2-?e>8Cj1#
zmSakDskSKMQ0?EI=7ocMivsd-#T(wM*N_z-d~BXZ+!`Hd*CtRd1`XP#)9oADA75(4
z25aHJ6|NKE>pSrp^4uZU-hCZut)|!Q_nHT6IMAyFV%V&P*6bY4@PTeN-_hA<Q+0_h
z#{C##cJN)H6UC{J1o6?&`ofsM(C6KAeXvz`8hqvBx#!oCVBMLDi8&PQfP7U642Neo
zFdls)fb!MFhv1(ahd(uHdS3v#H45o9ORHk5l|&d>%MlN0UJ`?oPuT`LhT3|4K5;GZ
zsia3D#k~}WdgWxWS6}^naoo;vu4OAT`}moQ;C?&V;m+sLTJ<kE;HnjY6IyC}3V10?
zHR;ib-Qf9{(7~mI@}08U`JoKgkJM%v>__9i)+^OVwX58!+l-sRSBm9~o+))qPtB-q
ziW{1-q0R%DszThtr>eQP7(PGqTS=*WXpqucSn#b8^!SLL8YM66T6i>V^8A-r1uC~3
zKfA%O+H!d*<cOlfz%uFQ3@h-htH9@nu2+FIkL~u8*aOL7*sQWNgza0eQ_jdejXSkl
zv0CKzyy8y-$dXLeC;<00x{ob^PprNu7GZ6WSUJ61^M_UwPY<bO&8LX2`kIYfl(KRk
zSb?rmYt_G23)ff*GL5FJOmh83X8M^{%I8tcz=%<TOsEzoriK`oxi0Ij?%8`T=`QT$
zlkdX~T{3XjPKA3qD;|s0cH<Qr((tUS{~h)GRTTmYBz7A>HLA$1*Yv7GXriEFBH48o
z?D868PNLbejN6p&9>+1UU6V?m%&Id`9;@$mXMMM_-T9szHnz_}W6xoO58i~n=+Fol
zqG3|P>ch4k8yIwe?!3`h**4j@f~u_`-x!tIsA$35E~-wD3)6V56fHcC6O%G3XGg3>
z<>Mb5NG{4zfG>!#kK;fjhBl*6l1%nRwsKo;Rqr=SU+Y&rwIBJYRjyMr!9s|fqOw>=
zH-09@kZ3u~;=fpV=3n{7s655`?fH1KAHL(ez40)EO$;#zLxy@;^}8D#)*wE-vDFt_
zf`ZxF7?x53?YJ`<wI@E;<eDcZ(GTo(=bAIKE1vFBetXe_bAYKdZ2$Z03{c*ZEJh&j
z2wz+m$mj0*!4V{_zTv%m3%NHV_7WbV5>E^O%U2!?DfvGm+V?94_LnMWWA|sx=(8gS
zia$keN*5)&!j#aQ1<Hb#uDyY~o`}^g7h&(UV23xqZzNL|GXJ-w`kQSe*dj1@Co`}r
zqF5ATyuN}F3CA>R{~W3I2^gUc9iqV-+BKQ-zIz|l6B)rxRBR=Uur+^-ZEa+`cmBi-
zu^E7Tiv@gRk}S(x^@-FI4aYR*!^dNG>|HTARso{oKCx{RT`{SH&?A5a>-_GbwZ-El
zM?7d-tU9J9>ZhxkrEg3F*<g_3%Jz1oTSdbBL#X?Z@JElzG@T?tQxM{{)R?SPsmfhs
z&`6|S)QqYKU6xusP^@U|#ipg-KEB*B-5A;iQMZ|uX);9);-FL<O~lgL;TQR%qHX5o
zg3{C?RL}i8sBdK3MA_x^SAK$96eETy?KFzaz=rM*5hKvl+rpIxBL+0Uel)+}>LLdh
z4qZY-cJCu|hF@`#mVRQ5>6^Ky{JK}~*`LsIhQavRG7G1nP_Xs4T8(>G9Abibpr;kq
z_iI13IS9EfJl`&}kKXkiKZneD&R(m0kTTO*$*%5ct@O#)+MUY@(w-Sw)%Z;*k>K;%
zSBtYIo3>$WChb$zRmR7r?Hd}7z?<~xPnXNx7jGvILN>zr1k4<eYp;ZJZ*!lH;9O9Z
zPS4=!2@6)ITV-9#unOS>pjfdW6)b}qn9x7%S`nY8nG>AEs3OH}x76f?u-Oj3Hlg62
ztK4q0CYCX#Q5tJZg4l<45X+Bk)D0v>&zn_wxmUq<!mQIWcf|ry_z&2#T>*MfBw%^3
zhB@Vm9JlIE9HdeY>sh@%wi#X(a4k}hE{WZf^7Dc6%67S`a9OJ{@D+m0c<gP=2#riY
zimwkYDhXZ^v@*1EpHq`YHv8Cn{LTgX@8+ddZTn_ayG5{)=PaYSf<Bwt1_}DcxnNai
zssJCTQG0!@aapE}5uu=(2EXl^)<#x2Y)stSd3zy`5F8EA;k!*Dpm>K9!@_|W&lyhJ
z-08*6BxoxDe{2jv)c^<VSjaPZBR6hVY?Cz={+mp7K_p$Sako%RjeAVF?-MG2>)#^4
zLLqt7BvSmf3QH*a+1!QaPd{SR>a()@lu|u&b4F!sqhRGJu`=ht%;BxQ7enzWZF^zo
z($1BJAAK?It)Qyv?1k)axM#8D#$9%~b$C>Btg}xBKtHooF{TjLl-3C`nIs073P|~z
zeIUNR(v$BfZ&0u_7YuJuU*=Q2lN4W)O7HWPldHBZh<F2|s2+T6w73SYg?9w}rN7{Y
zmtSDUMq9~`)+iy0kWbO<uHE>Eh5*W|Fc<AihB1;o+RyW(?5>ob!Ngq|%3>Iq->VMV
z<7EHTy_3-dlROx)99pNMjcFp@Q+6KU{F>MR_WA&)j3mS9YJW-LL^g7)8ll&3J+rI(
z)%+N0>PJdVZg8l=+(m!!im^ODs99qa%x*SoHta|x!l$yqyz{%e@Bc0TOE*53hZvvP
zj?fn9b8U9oNGGfuDEY$8`BA^Jp`P`VB#MdENBJ19%?HS%LC$gdcJ@t^k=5d1fe9Av
z2RjzFtsA3+VJq_(mweJ{7X#^eko6`=D`@Rw=>|66MiyR`MSqk>uV-B0qhW5oB}`kN
zjbvD_Lr*8vzPtU0gxq=jp4dEn^M$wDf13JTIUYaDL_1mpe;AAY?X>w(P)JfrTX8!2
zf~0K-c9L2(u=ulg#tqtU+7%t{8I#AxugGTDja}yjelf{V#n1-q_j&!dL*)0I;`1`<
zGb$kuouUONdgwftIHI?;HJq}pG?W-()Gam*svIETWU(XOB)L!>;qr8Gt|x9qg}3rp
z@IHzpR<J2DFCLya<!ys*p`TyJVAasF{21N-bS;Rq@4Pl0A2^IsQ`-sW$A#;?&QjeM
zc$yJ^RZkI<waOROCjQ+e<Tu9^ab%U@yg^CuU*7#?N(lji6Q7$oU2GpLv~4p8As(ik
zd=~mf3d?PM)s#9ZPPOtET9xN=xA|-Hq$PGteblc0KIJ@mDwZBCb%*a{uh1>Bz%EKn
zJ@1_o`Jr05QlWB*X94f(+vb|=6M;g*DF@Qxv9s(8Wiw9rIP>E?(P6teliOB%!{pA%
z8@b77$_AFmU;%qiy@t=AsSWSOg27p3riQ}!1?BBDXHol}?ukO96raf-Uy7aeCFDly
z)~gxa<%$Kr@5xPn*M52SFMB-vYmZlXTaN$JeK~y|JCqy#(nYA3q?0;H={NQ&8u__1
zPo&*eQipvfb9`@cQUY$KRKEB?EPh2NED^zysck>-S(k%dQy+UK21}$ym$$XKrv+vp
z^IiAT|8UR0NpE<L7<o>a8Cq#rY;nJF{~LTxq(^#=8^Q@xHl7cJTZLo|t83UNjoNM8
zc_(7~)D{Oo)(qU$I<TxiqF+_W0!<N+H;I0?(C|1gMd6Sot3ffxXk=}N>)kUbq5hKE
zn%+#wG(-#MHSK*r%Z7D|M~Fw6SF|g+`3E2RBa;U`g1)j$E;MZte1tHPfj4D=rk86%
z^QY`Zmt%E5V^|_0LF>!XzFYg>TiNuuCl_P&Pi~^?V#%ruPoGcm%Q|EDb$Cv1ZTiE1
ztu30_p#*qqw+^nX9bEk;Jb&t1@oGt-Yhsvn**pNT4rQQeefshy8SK22=C5L6>zA{0
z!EkI2511l{*H{mp87^H|;!D@Ymfv%6PS$g4&evp1l@rAdrf7qtU7)J!sPy+?RyTdd
zSKjbKR9o`)EvZx7YzFuV_9BlYIwJyE<D;T|PPY6$PqH2v<d{_t=?*QwG*?~mJh)iB
z+g8NeIha7~<S8RdAFG3V^6H{Z5=UOl$PLtNQp1gU^ahi5nkI-BEo<b|55J@1?c6rI
zcI&I5zXdVU`q-dy+EpJT`#W_tg7I2UOE8mIj2-{bihjo%ak;5-wW>c0)Y5A!<e7?2
zbl=MWHLtyQQJO{ux|YxAuR_?iv*_%1th+4at!|}^sJCCItHtXyXZk7NKT-ZgVZ+}1
zBJr|khQ`0v`V~+k3(#3mXEYV{VTl9#MA!~vmfQ0elxAqto~3pTUTXM#Fu-tfi^S?D
zA3dv|8wTLWmL~X?u2ixq@%l^F%z2{c!9Z<BqQE6tc$sGWGAEdP%aJzAoUOF=0pc^I
z+Flg2z{KXMzH1XeiF;@GHSyH?ieQ1Kr%jel&VJk>j;mH~`3!)*e}EV&hoeWpm)5yT
z+j8KJD&Q_5Uf13_f$}>kLpBext~3pMW&|A{^=8A5Xu$`_;|9DQs;9lCwEbD?6XyV1
z6BO58ebWZ>awuyRYMCkxD`cUk?2xpa(u{8Src@>4D_hyPQFrfl{`%-l)-}4iI5YdP
z2+zWCfOYlfgX|=|g~je_yI;LKob@M{(cB#;n;(!o9enn-_AkwHy8T{V)C0xUe1*mE
zaIhgaf7HT{dhM;*R*TXLS7}bEsb7y04-~Ny91^2SZlV9KACTbcf4V4oBk@XM#?&%>
z@>b5M!Q-cTCjY|qC+8<r(fDAmhXs$;uTvYSOpV-|r~zhTQ-aHgS0>3zde(eTRSp)8
zXKA$k8pX&$see>@tKuwAKqRGn>2vnhohqJ_i?lnO$O5Ua8K-T1MR`9ns^k^rkeTn+
zPQ!k5wO?g*%wxzeH9P^;47{AgHc0Xi8%e?tw`QG;q_u>G%20W5qmL<r<Hy9#O)+dr
z4~t$z;z^;7+lCb}pITrGvZaZNypD7(I!MVCl%sdH8?7N`u{^=_E)fYrX~R0b)a9nC
zD9&;X1p3eptn+}EMPr=3JUz?_`0BF-Wcjs_ep#No;s<2ii|Kn-M`LMv#x~0+xnJR*
zK2$i)9$B#zr@K&hfJYBq(&L`VYh}a7UM4mMKKK_~{*%_?YZT|3pI5noW%~YY4d01-
z>r<yvbW?Q;cNbYVr`RNw>hB?oxuV(-nfvc{{6PKLxYc)4#r{@mW^l#Z*_t<WskNWI
z-orjrNiO##F%MeRcJrI(45&<#$1h#A;CNQ>9e3Z)=V-YPfXKl<6ir4Ij!CP|RFxi`
zh<{R+%=TB?JqiZ+DrpS^an{t1!8E75ooKam&<9W&-iLp`zq=#PR<V67i^;KTT@c|a
za#+xRhq8~47z9U_A;-A{jhxioy(|<~bfeW$b$!s&<FDH|>I`bzU3-VmL@KU&?8d<z
z7ZmCON8G<_n($`2G`bXEJQrF+x4aXUZNr>90Q|0UCpfE~66zZfQ8sRz;N%tUr(juu
z@$>*Q!pVKz1`z|P>K|8VG^}}t->VEgcxmY{h=GHrpB8OB75`xt=hZiDp64IEI_!F&
z$G;qfv~U?USGx(`G5J-C?I>Do0SS8f3t@0s=sKdknOOSKEOwkPRX%a49bWolu~sP~
zE#`;UKsEz0VT5bgbJ&rU{jch|-`=?&&n4oqINj+_gMLefcnoRP@UgU-RT_TnhaQwt
z;s7id&19!_FuW`W#qGB-K7uG48Gy{wB&9`f6dM0dB#TpCe63-C%_FBjf9-V;s|jIS
zfrF6}-tQDO|40fS#oUX79iEqhyJV;zvQ`Y@jr;{{&U`QMa|K!M(Nmxl*nG1r#8&1e
z=|)CH5SOK17#)D0E3s-Z0X4kkjr`>XJ0=8VHm?V8k`LWBgWP&_;g?+fc>Mj;c`4C4
z>6n!+Jp!=3KCqJT-EzYGv*pwQ8g53&ga5L`&_@=@$fw^}G^RUQ9tO;$>Ni>a$_gKd
zQd1H5!yL1&)IJ4Q0VtC#iIv8-Hui+TNOLZ$`aMg2e0+N{jwQwB8!ZU5NveZM*<x9l
ze^*QiT3i0+v%p8$=0PN`a2LzBqBWu=gl-62{%(s}??;{2Q}SKsF4hNBe@_J%#3!D3
zFG_O?9~V2*%8G7?uniS4V@8aP{vYz*Gpwns3m6@-A}U4%q${AIqtZK$6ltONDoAhA
zJBW@Ny7XQILlqFDNJl|>?}QSCgkBRuhuniY^9ne3=9}-=eeNHgkdw3b+N-a%_g-s*
zfagPA^m0qwEd3pCYo7H?pNhUP9<FPT(lODjkNu=5W_4`C?WkZOS(w=z9pDVTj49XV
zoPz1eqEw~6w@ivy`$%jhmiMHJ{a1zK<(-U48BkxN3tgU``O_JC<jW&HUbuFVV>+L`
zD$#w|zM6l#)P0uadpWiy#>a041oO^ySQ$nxhT|l!azpF%fZ%6AbA&<bs^Tn|$@98}
zxc+eRn#D=Qu-E+D&sx6vgRd?#sVNUtpTU&6_0<x|TPF>Wy3Y#bQCZ)Rfa^PrE5$@!
zY|_y#cyCxq%j6~Zd{L2kyJc>ew)0`Z(PMFxcz}U#4%%PK{TBViVaO%RfI|UuL2n~{
zpC)Z2JILhsB)?A#I0<Vd)fPch2(8t4tGVq2hI2Z+^qT(NQM!t!!i6Lp@D$oI4Xice
z7fU_)0Dr0hBN(k?{(ihkbx_`^Vtu4~s$9R4Q9W`WEnR_KHhV>|uXJ%B2KRf1ulRtc
zVOU31MyUel?MywTo!rK<defyYJL^~dsQQ?G9mBe3g)Prc3oUmzTbEK&u-G^EmvlCT
zAFYB1_(F54*A?@rG7v{RnsZlg7(8RUq&8%!q9yY@S%ETRNW420DqW-gy8Ue!dsS+r
z`c$TYepUIiiQk@Ie2Py9iA^um^#9O&rmu}n!EF+6zAh&>M1Zk6bx8i;7PSnp>tv&j
zl)>7BE)QvZm*KyT$lZS6+xIDYEKJ7P4siW6lw@=rmLALGZ9*|4i=C6eUO;!FE=Dc(
z*>7(pHkda)0DFceYQat4ww-cg!JnGpR--HL9@|M&&p(gXX>7yXZ9yCF28-Lw_Ma{9
z0Plz8YvIbJ%dyLEWC>(&t9H>A9wMfe+~Kl@pWOKd2Pgc!ab@e=5W6ggbyU@$m1|T2
zaAuWw@N)4lv=BM*gFdsGnPX+?P$aG@7vZ%5XJyP>Lrb>aOvL;1>d%_X*N&zz+|tcy
zp-~_D5@Oi%HQS1#5Gid`{OQaWr1YTqXIuN+Kn9$s@eVVazXqwQiad$pUdEAnIVn%^
z_6twj%oe}=@Hu*EwtQ+d&ySOzV5xnMb$^5{j?Y+Rn%$eL&>NGN@hEaf*E(;}WQU`_
zQ!85ai%ZT@_niEC@@#uWVw|dw#+Qt8ogFGf7YmubUDi$8)9S`78Bx`f@a4S%?X{O!
zosCwPM{k)UvLdvr_tK-P6K$Q4{P961v;8y<WBKFDyM5?VmEF9yW(}grLBG+Gp6A|;
zzfZfkJX&Z7aSP;Tu$U#@`EYZPa-v+{r)$fbDJ}7F`c8YS&FamHvJ#45NACBsY8K?-
zs$#zgh!jHbzS>&kHX5aL9_7V);&)_1Rxhr`v!cYFx0{45w^AexZr3mYr*|n%rS_9G
zC)x0-JbWg(kQJE=6m1FDS|#|5*kP)S&M^!|PpjCRl(2f`M-{)9Yb%c#l4Y3X)FkMZ
zUzlk%Dqiv-mS^zJ){p~5F8IYEnD<jOF00;<)D+T0@}BZPLVqUKF3emN`D6QhhoCgw
zI<1P0j_j|+c{7z!_;ELkt7W`a!VH6DSYy&WzlwB6>lBTpR3x}h<Cm^Xe-z0Nuq?#*
z)d1|s?hKSya6T})<1zi%V$wwZ7o@tKyQDgu+8mMkYC37J;V5Sro@Dn8sf~}Fm5$B|
z`9ikB6Hz+l6&=9ts*aOKdnP(qCQ$D)jHC)qk!RVQ0%vzJqCmc1Nr+P7Ssu&V`Ajhe
zv^^{jv(zy4mkHGEbxTus?vP;<>0B0?R9Y4x;%k+E+$h725Qs4*`$g(o`@r*G%u1U)
zLWgHS?bG`#YAUjdZ(n!yp3Ku+ePNh1ar3he@iU-yYy-(W^SZ)})YVd;x`}ILx~mRo
zywRJ9nOi9R;@drZ|3zX9v9lF71B&>wlK5-K0;ZwsWFEAJ%FnU=)wkB4S9d9MbexHg
zh?{Fp?K65La3ovt@w_9e)0l$|g%xx0`zjN|f?-Af#2c`glbI)ZC@%L1ZFG#^Zxum2
z=#~^gQVgYJ0m)M`w0GC>)rlHp0yU8C_L)A}J9^O%_M~0S-kO--;F&H9Q=)L!r6uxu
zh<j145M$x4dFVaP2-9Bof+mM^Z-23wcr>s2t;*Rsz-76~@;mhrD10BXyqhqgaiO5G
zlUfE<^Xw!q4d;ZAH%PX=KH>%b?6&@;Ex+BBkh!jyOI2{JQs^(i_;tlKpwvh01*SSo
zrM=1#I{)(DzM>~QCPCTjo%(2Z)s4oE<1_h&-U8|=g*rFz=^ZBCk>&sR_l4H=Ze#<l
z(^7R&e8gJcAu$B^I(z4l>G9cXA6$75_~sc(<A^*S2jO67gGcA-8}d2s5+<Y_xj1})
zhirNPMy7l3DgF(Je>1^{^PCq3PfmXL_2d5~6%RgTzIjr!aI$paW3uT&s>f1MpkOQ|
zUKV6fc=2opMe*;!=TYoS-l!xrk95HCh5v@P2PBDRGYn1~L80$M_IH_1&oAZ;DpFfW
z!UZ2Lnquu=vxXmoD8==J&#-5y)i$9&;uWj=X@2S!%F&YqczmY?b5|YmjE~nf?1PG?
zN^IZbIX)B`k6>_kx-lB&!r>gWXkxy#?s)W+g)jf*FDDlHd8HhIq)iK3ZB_ol8_uIO
zM33hEj;!rp{VVYb*q)Q<YMxT~S3Gf;CO_oCSNfpP=9TQCU!2wdTfsZ>An-b%D+5!)
z+g~4X^N;-Q`@afrYxlB3DxOu4Y&77U^pzH-`Xp2M@fcm?ot*3Sj$!<IG_Xkf=3n^B
zM_C1^dOIOyBSGxan0IWH?`!uhJSZdkHi7e>ivwQxz1Agz(9n|mp#Gvn`jcrBS;1jB
zs@Sugibpi&bp-){KeG}Y?#chnw0{xP;17Z|VT1PTksxp4$D2UKr-&zKGBSmq?fCzm
z_E%-N@0WOhlg}WtOxgEE{)bfFuQBQ`ieULonKa?E)o-xP0=cczdRU%em0a%@E$@+B
zfsw5xIC}Xx35Y@Idbhys&vJt1Tb7~P%2a>B^(SXAT?h2S`IH{|<C)q&q56UJ+Zw*r
zzD&=oUo3*I71i2gBtLql?0z=0a>jO#b8sp}%KXnEe?rN%C_KN1ZYq>l|8evqll}Cs
zX#YXFoXg@@rK5j|o1fnO6Ok9>0d~F6iD?0jZT$J&zkT<w->DJ-#S{B8he7{+F}DQ|
zY_MP_v*h#t48}PXIiS9jYMP1UQEB)K(tmtVy8;w~Y*`V^r#w~;`XAr@3vdN40I4cL
zaa8{oIITM<OZ~5H@vj&7|3e1Z74g_OalKiQ`uqnIlMQ<X2~F@H53VTEf~5??tphK{
z7`$sXsC^@OJB*hOKkx+R+^rY`KS`42M}gh=r=V|GZv1hIA>~OS8{$Rm#^6H7*n;fx
zl2!Rg;@0{7_`JEfxxD=8bqSX}`9HhO{i1)w0_O>QCV?Qh-!Gp1s(X&;lb_p8j4@un
z2NPJ%(fogz;qOOrj(c({GErht>}a2k!%b3t)t}_aZ@x=cU#0XUZuGzXPml}9i9T(C
zq&d?bYAEvSsg8jo>T>>p!feNmh1!3DJn`1`d&U`x0-$39|BTze89GJaG<~Myv=<li
ze}TLfLi8%7us1Ui`Y)zBMCt=@x)SlOsFY)><B0zY<OUh|4-|&Y`ygV+20qMHzg+nY
z1Yp!LH~n`1{{s2D-c$6M3!E0<W8Gf=o<0A^SB@C~qb^nEn~47x$cM}UtT~ex6a6P4
zDOVPKD(y0^{{BBf{{Ln1o|*43kNsy(Q=nhzDyE>;lrGM=y2xykGsXq$naP8D(Udsv
z)sSvj+UCOGks`Vk+k1;1MaQY_7!T0arXp?X{y)+7*5zx|L!O$531jx4h;SH@BZaB@
z?1~a&l}SRPcso0bMW4j6A-cTqP)qS)WdBV1N&Ma2MVD>mT(zAp*B#URs`Z!2eI0z(
z>kS-Nj!9dsC&{anyUEMJmjBoh(c$`S!FMR#;)s&Ak6j}#16SpxEJLW^;|9-;$x%wq
zfoj1na`F9>YUyITz45INl&&tpQQ%T44o2D7;}ZC%Q4|<%M}HgTl@8spQT{b73M{u>
zzKx=xbF4$*zlY^^UaijCR6}fpUD@I>E8tiR@JL>D)vbT*0og#>l#>}@QA`+uFv|9L
zn{-$8&fcL$Tn8L7MhjR??WH$v$GP$FH@@cQE?lzOD%HR~YUR9SEl@wlXiZQ4KfUzh
z*Fto_c&0YReuvvy9YrZ%Go5{hwGaAbEtYE?8>Q-C6qlvq*nb@5ci&-+gHfIrksn*z
zzlX)pH#YwO;f9*IM-{&BI0&%q0vmAIW8)wBY3&C*j&B{nCJW*Bj!NGQ8DKJm9CG9$
zj<NcYp9Zqu-cwZp$W@&!bYzC#{ttlp1F!cRFhpVSV_lyAJutE?x3S-VDafq)zX0Y9
zp7J+f!tLjdnb-dVVA#60zIAHTQ6@Yp&!Wrc05>Sn!#e#am`7PJh0YhErdZ%EEg8Xo
zOclCI02Z!u)Pm?x)4roa98fF~U$)xczh9X@yf5G=NxK99E9*Y37ti>sH-83jM#CWW
z0KYwV(t>~f{y^+~lMXnKqZBUoPn;(@jW_v?e>;e=$2#Gy1p^9Wyll`!`Us;PmTv&k
zbb5uNgC(VlWpW$^JjE1nTGrZfxfPFM{^;UwbNN>KOK0~B7fv$dWGuXUcid6wVgbZk
zvyS`dB*y@}En#40&?;cmAHiDIJ;!KdrgBVGoQeYg<9s-DY~9C}*KmH}o2oD}?)$|k
zr?=ynOa2Mbz*=+h+=wIh#bvxB2dD-5%H<P}u3z+tC}0pLypA#YF;y<hpr9{)6V?eC
zYK9!cs&5hr!^YZzT+;FJ|HU^34FCt4$eR6jY}|juIRL*c0U%3@JGSdbS@K`J{F^Ts
z@OJE-*C&o6`Coif#R{-S>2l5R;VzB;W@6wsCIDj@H~ewt7^;sk!!JNtD>)F9YMhDu
zF+TifT3(JR0Ny4eBL6S%Uhf*g^Qsc^dJjH45rOuXkCwLD$`G%!F8l!&(i>Ku+TyuU
z>)tc)iy;eZL!3He=-zNOT;b$C|36vE0}QSO0x%|(_1`+I3dR@|q)}ciu(i6`wp<nI
z#t5b9#sManOQRe8K}*s*|I7uxY@}wM<`&WNy=#XN3q+?0RnAy;i(+``w7YWwyk%+=
zS249M*l=X2znnuO2H;|nDKCfq!;cLrM7jF+jXjJzxGiY5XW?G93GcsNJklTZFd#o<
z=3NTl5VFjSOivsZbx~=Dssq(m+4{@Z4wL27Eg*2c8~bYS2vjG|wJjdlC(Yk%PelKV
z9<f8PgJ6vDSVz)fP)yD@qbsn{$Z&X$UvZbO4JlwJ7F~vR55W#->?xCjHmhrG6ixqV
zLIC`5Tn1KkQt;(rPV(KNzdiD;A*t|~?%?mg0^qQ4#vp)IO>OMiIUNSwKpO9T#W(Zv
zNB%MMa(vj7mjI>KR@!--4ymviVc%h`1BY*HWBfMVp+~<I`dXdyYwf5n4Y!Vv(79Sv
zPaGB|Egv%+o11i(Lqzg#6pnAw)y?=AY+}Z3%-0!#%LS|xQBV<a9TGd`ddC)TyDmgA
zC2);Fzo#diU*t5Coo2peS&_WtUU?L^>UZc2l7IwFYz9w*@nLVnclJC|RfQUe3`=ET
zvp2IegpAA4!G9M+39+v{z3e*5!k}~2$dhg2JxE>dVdb;bWdZGNeHtfxCRPSaQCzaQ
zb(K#}+9E4v&0Uw|J6XDD6>PF{4%74mp6>Nm>WZ=ZoPe9@B_Mh9>3713<`W1`M8u<+
zs9V}jEfK%05t{IahSQRWGgL7u%(a`xbA6lPZ-w;t13v%=A=m#%JnyU}1p7^#=rE`J
zMX;EsqWad&9w9<*lP)ya5&`Pl&+8mnNt&1~C~53${-{qjr2Eq^_jLn81LN`0^Q1>u
z(L~+A{TrJ)FF)-*BnW@;R?6ThX_$(is})RX>vI9vb`xG;Td^oyWukB!(cV2kuAQ2z
z)>WE03pLxdsh>+tg#R_n)Js5dZrRc3oIbpQ7$@JXyo10&v*z@fUwa_FdRH@H(nfHY
z(k#0&3|IL;Wo!42<shP}d)z30!+(p79ctVir4CeB0iTtiBwdYy643g$RHd!f(2<ZX
zQd(Eg>D3HgA(ih!u>HyHzM4R~@oqrOiz8_mjGTeRHxyKli46Wt3V}c|B_f=yBw9&2
zG(*W<qO8Vq6~Kfb_N%3B4Q4%&JNw={;R!g@31l0>%QWExil^0TO-ubsoEs#EZ8hAn
zV+D%0c*p-G*$xZ<H7?m*l=}<G-#$1Oi)MU-L><96{cBC{YXi!JQ88y0wPh(W!KSer
z8+%hj%G{pFP%y$?q1C$|<onfiYXe%D+ENmley66I(So_LDDy8v-bMffRG10w59j_w
z%XiY;cEL-`Js?S0#Yu@HQjgIxFxMHIbMtyGusor$AZTd{U{JJ_K;>4P!Q0FpeQ8+q
zS|o|S_d+wpxJw(HVk0rrhj4jEylwC1X$*oHuBY3T=l7@gWk3q}Im4fMYG)6dl3E=C
z*^&c2H?@7~+L4yaZQ8yy!ORsb6PlRy^Dg7h3x?bVI)D_UFIncmr$1`}P+!iJi%ID@
z_3ncTQ&sk8>4HmRQsrtQp(kcrD1LpI62!}@C#?Tu_0!W5iQZnr$<uOn(G9`)ddoCB
zkiboqdU?9MfDhagpP)2iyO#AvwA*y)+DjB?tC=Ti=sH3^+@bX$*-@+Cq1_iYRIFEN
zO`LdQD&fgF@e)KTj-2nD#NC(}g@#BzdVinp7V}nF&+gW{hnuBoXV*T@2he&`+IiKZ
zK>8t@jnIS{0mbI!#Y%KLDP4zO)1b3>eJp5r@71~I-%&J!=ez5B@5T|uTds3GKUGNz
z2EHoaJIDB$P3tiBL<i4PQgiHx)*DqgzetpSYsnJKXI>|x#J8xL*Z)G9iSToD*o6!w
zYgZ)K#p!h|zMwuMpX`AbGAMYIQ)7M$VKYRF+O3D`<=_iunVpt95K8S_u?vGQNU@*i
z6}US11UW0MXIHu==2mK>zJ86ANpo*3`T8E=(G?|icF-4qTlba%6AVJ6B4MH@-D;>B
z$-|<r=&t!T6FrWlID^|ua9OgGj4o(<#82b*j6&*4klm%=pb9Z0{mUtP6&3p{Vh(OL
zDq1^S>mt0|<20*JB<{D)@26oWC$K>vb%|Y>`Ycdl6j-v6h}OTOz@-RLY@SluunvdH
zOV%gn6GcT9Y-~a+<Jw&PSlxEHKpO!~_+YoTgcVFVU~{fU3J!L1E`H!Hskyp349(yw
zo^JOfMDGqr2-2tayQvl>URm9R8iU$z>W@4Gh4?pNnsQJ`haVn9Egs2?cJ1I&&tZK3
z=6mThnu&=}yJCN2RnE#N>>wDukz(_ggn#>x;&F<aOnECA`gx24X5RTGheaB-=C8y=
z6+4kNqa&f0YWzmawkdvOK5{!)J2flqRHSs)ZnKh)yKCSpX*;WZ3v8(V1WZI-LT_g>
zb^*4xRkk2*KC9P?zW<I&DE<ob*#vU0kbti*mFauyof<6nZopq_F^Lz-k+T)kk0JN8
zZOitg66rruy7X-OWJbymUL&Im(GH+bvfraDet=|b)Zp_4Cs#yu>ITs#?eZ6GO`7J7
znHuSg{DF!orU-tERVU0ALql@mq(+}D167-)?v=65h2FlJCAf!m*Qeh^;>IHOU9mSU
zL$1X<ek_20S>rL@s>^cZqKIwAW9%xCv+?c%pIv|UzB;uBdfNqIzcaO=ySe1WBli$$
zv$&lWqoIe7?25t{zBQMN4^o!8R4z}MEJBJB3xTx8&1~v}_{b(<pknB%^!_CJ4U|Wc
zb6<=)-!{!l&MJX?l@#PdbZ2~1KFEtEaERTI>E@20z{Eq)>a@F8WQ;4Bvi}8#FeyuW
zY0N#DRC_On`&QGDy46IX;YNUX_SS>xmiRO!cfM2AcZ;>FTOY4&neoG+?(}W8z9ZwI
z!CCqboz|SZ_49J(4Si|)lYAB{USMis7Z?axyf$`;c9PbC)u}I<m53Wm6GZgJR@Cq%
zmSN-<0)OU)89v|L%W24n(2;R#5@nw=kLXXlNkg1kO>ye3%O;``h+I{MgQ>@aGKCE$
zvzEFg-AYBewGn2Jaw&5I?1S|7deSTM8&NVLessyjldE64?F`$7vPHNB?|N312yK~Y
zGoft1CUf3{^+oHcK_a84-y}XDAt!TKdjxxx6GC+|^1b0+*rZ^$ZSvP=A(iy7O<X<d
zLpXN=<*>MI9=P)l%gc)o(xuE9`lajag57E5jj#y`wM4BWf*48glJ|IhT886-6h#s_
zaC=1pPu?rA+ir5gXyHLdR<Z+<vP_>V@<opVmdG?u7aQN)AY!t(UC}s71KN8b5$3`a
zdg`j*y&zRD@|-<n{cDSxS3D*>?Sfyk&JLPZ8KLCDgK)0~b>2I!&V9}aP+Qfkao=0H
zIlK0{$l2Pd&~5>II{;4^7dz^R-o!8P*i8~K?i`D*;lHxEU(D^;`?5pFV`d<ot*Rt8
zoO55l-j9_?gnN$0+<d;s{juY&bG=HKa$KXgoeEoUQ%+BiI+{zwvNAEeX!=xyPf$}%
zNl{dSMi|bs#V1>~^sV1X#N}0astb=r*P5C0WL41TncJIHJS6tOpqFxv3LmQnNsG*X
zcn>76Y6FGOv8T9|tNd(97q0k@K%^`uX-*)tMBFCzue&ry@-q5HP-|{0@uzK<&L+T(
z<|zYur`EEoqlGy4eYakFo?Ec@-P9x#;lLShl-763{V)Xo^Yh9@S-p7Ga6@3F8{65T
zvPw*<%%MdJel<H;Vt4P|Oj-zfsgb4^KrAO+yxYECX6=;$&hD31kB}}#xv4&Bh}q(T
zJCuv-d+iD?&Fqh|!v*xn=xIa}YkseqjFNMUjDoZjC54jB>BEONA9gp37p*tv<P?<<
z57M2!MUsb~W?wmF8yhmh7bmNX$SdvhA!}atODc0H8&&*Wi#~`_4}z*`YDC_>D>0s_
zK2L!K>O$(#)!b5LE~cq(eS^<r=R!lQuI~dCBvhpFSqtXLbJ^j;)Y?sq=KHwqeO&59
zMftUajk$Q(R}$5{QpcEUeg?D3RWzz^Hexz5`jXwKb-7GAKW3<TU}M2=x9i1>roBSk
zN48%^Frx?q=~n{jJL^wRd6d%eF<kk>BD5)dL~^@dy>n}{oJh^ft8kUZ9^HPHC70D(
z(TF%%(8lX|^0gen@@#n_bA~G}E&U1foMBDjJzs9lLq_+Weq&vls5jMPM$cjEs-6?n
zm((YH>bzDh&6~S=KF+Ol4a}{43H$n0+osHEgqVicbL7$Fi2ikjmMn7)_Emw0%dYZO
z8GSVy^T9%Hb&NchboW-N^_;R8V%fp5vZY9x^E;)Ko|=6KqSY!W1+Bf|$UTs}Uy~oL
zoA|3KdruW%62%C?HT7=2h=jr$a0F<z7n&d%;{%!QmxcJ5=?c+<Um@v3rjqHls<0oS
z0~;Fq9DCuQWWNtg69dqMKo2vq>R?idqJ7QHr)Js|cG~riU|7%6#uKZ`JghWAD@9Q!
zApYAAS*^>qC!xF3ABRErL4<y;kos`itKXS8h2WHy$YaNk5wZtX{V)B6xu24hphPYQ
ziKPK*)t-A}8i@$)AoI4;oxn$hz{aF`lyBVjIW=Kw@|GW)sUd|L+Hz3L`{U;kPMu@9
z;0w4Ko;V%7+44zsK^L}0L{ZlE+ncRtqa$C^UQAKSzHp&2Kh#Q@DLPeH!w-9p>AqV9
zx%s&Q6)8qNx)T}f)Je+>_~~k_UMy-S_&C7JV(9z&^mBpqt727GrMR(glI9+u<;D>Y
zR)+0&(t{G)1j5d;vBB07B~m+@HUUc)qUodHg-pm&lV2e-?;O0Vv!>uM!e}&_kZ>2J
z)Y^Q3Dx$kS^^$w=Vwk!B*@%)(05UvgzE36tUvSH-=j#j|zFL_>2XuC=oT+#$bwjaR
zgoZcFAe_g+y!;NGIhO#Oc6E9;lS@P+Da4F9AUohvu*-zD<O>?I#~z;`R>-JOcdLFP
z*Hmhbyn$`Z-1+cZnQw}w<Lt9`TRkTRR6XbFf`Vf=GWPAYBIuHg^h=hjX{u;p(E}sk
zP1I{ko`v^sM0Bs!fL~4+&gUS+?#c?0F<heXw+vZSqF+HG<0^G?^6{=>m-9VZTf%Q$
zE3cl<B*kU>U_ZL-C}5a`3)qx|{4yw+7yBeSBZ)jq1~Jc;`+JrL65pYj!-JAHMCOoK
z`C!CZJ1Hae)vWwYqMDHGP|7}ad4|@VofmYcZ}+&L;n{Otq*f;l772T+{=qUXXeU}4
zh8xPjy~4fkWSp1Ra@lv0m&p*RN<;Yc3L-oO7vxWq7OUUW&X8*?a;)T?<5Q#uyK_d1
zh@q-LZlA;S#W=ymVuoh!uJt$x&S+J|W-0pIn>bll_j(koKs5>0R>G4jl}nK>zprQ4
zfquD0Ud*9cihYZp&B7*dQ6#uQb9LK?&R(f$2^16-E|UaOl)rAp2|>sC$fFAECuG^=
zgI#~GCv-3_8WqsPIF&{bO4f;pU*KMBs(D$sh!3))lkh&*@Zv{2%pwcKMFg1YGK>Fl
z{V9^CPmmGMA4j_{kuPfrgQ-;a^vXZ1L)ZH3_cu;HuN5T9gc;FFp<*5=@i`>v#<m`H
z+NTu0kS=RD$y`V_g0ukItBH`XR9IBda+)8r8M$>2nffx+6h%1?(O7*^<h&RkT)rd~
z8M(8pS>`m^)6zq{7rCDs`JgCkO_jtp%W9rkoi+e`#eKpjv`ZPMj~&S2FMHC&z+0u1
zqo*U^*UY?(j0C!5q^XeSflsQ^F)~oE=jT_3hih73>628lRL=N}G;fB!!pys_;EYyB
z?SvFF3jF0ENWVAFJgXC4MW|!_kSTC8LwTVr*o^O5iSmF&vGtwxi&#6!D<@09XZyo#
z(b*>o@Z{!wslc_!KQc?wv?VR`qb*UvSI!yEm<Ljy6;GAi(HzlEe^uM-zM^1xU$JVS
zlvPbp!#<<I2sILJ9!J9wvbfkEdg+eJVqEGf9KMdI5xMKAry=6qc%4p40Bbj26Bvld
zobHyH9a-&Bd@4=9;>pLxoPXwyJC6j>GjwL?g{wG3wVpWSQa0k9y^r@0?zNk`B^h6K
zvC|axnu}b*9#P}i^r~(4nB^qgso%v%*qa-vD(wCmNi3(;s+W6_YG4EEEm^YGo)c1Z
zD}&RejGHMxxT*PSJ1Qo#XnzaM(>;*T6t<&J6G~*jJuQ8&xdxF$WanKU2bE|>@JCG#
zZ#EQUa5{5VRo^Yq4p)wI9^_#U&<_dO42hz$)S4qwWe-m547_GGw}vQ4Oe?G8X2K%4
z<${uf7NdFk<9Q|<ecZLhxOstE;MU(6&EP9=q(;+qqmB5dOxnBOPND1iw>(O&;$akl
zzz-b`hpEZOhVV3Uah)0`r!eJa()&3e0e(N>rszY_=a}qzu&Hy~5^weLHec5Mb$1L;
zXJ^Zb2>(MY>W)G%AERl~J9OJM-o}K8Zq>WD<i%X!q|B0(c4Ef;R5?rm>g;(|AC~4G
zSjARa4e>e-WTQ}N@#?G1%$vwaPifTamdB7OGPy5)>wt?0X~?>y+8OV;rXiie?<l7a
z^RZ^kp9l|kMYjmUG}7oAEa+GIM(ojY3n4z<&`1M|EkUZcNmqPCRdYsY6K4`a#ZsbF
zJxla$=~wH|iv;z1Nf3pWINI;=-Lc>-P77ZW*sjlJ5@4FNWgQ;Xm&$2Gh>Ry~)@7!x
zVpg$Y>AM+&SEV(AM7&3nuQ{DhlPj>ya&)C{@thiJnIG2?*y3UbgsfjAe&QTKfd{}z
z-lj;?>kfu>DlLhr4)Yhw<<Fk<WZp(1T!3DKG^-U*4mzz?{UUC$J$<?c8Hl-K4IQ^I
z`D(Dl{mQAWJ>z<?#7Hoimv!YR2}QE4$}Klt^>;qv_p}w2A%D2rh&k9xE7mM*e~6;m
z(w7sE5_I!f^UxK8Tvz&@3iCb91mt^s$9bcVBz)4DfqbH-a;v^Q?5_;Xj}N_<Ke$k0
zW}qGtqeg2^rLOISXUDIF)?l|QfHXprb$I0NWdw#!jmJrly(ED8y475v7p8W|eS00=
z)WAN0YLv5PHrhE2+9E(KkMxZXwP7^jnf3FwwCK_fp6Ffc;T<k6<k^q;flUbRWSURA
z6;L|bOHhrgc5-6gW*k28F8k?A%(S<d2|gPZx(&%csr4l0-J7k8P7uDQ^pSVat3zO;
zi7+UcqhpDT2X|l>8n5H-PWm>GeQ7Js!E}B}YaAAuoTACtz^%r7j9wlr2UJaZb57}n
zej>Ls%qR(3`N~U@cOmr1(Z1$pTYcxrlB(W&>=th)h#Kpq1?l`m`yU|5q}ZCJe$NV1
zKarixli#77w_hH<>X59J?L`Ns`fNhKzwn?h3ykek-)f#ue?!%L&#gaMX|ctOnT4<0
zbm#Xw&xK6JuMK+IWvRLEl)v7X64~&0ef6dUvh&gwrd>s5UuaM(iS(j2o&D4L^nt`Q
zY=VK(nr3#H$G$AqwEbd%m$eu~aQ2NSKmVSa801MA3#|{^j+<E8C>Qr7KVB<N^mL~+
zC^16hs$)y?2tT8`c*ZW7Q=z?zV-vn;JIYdV7P&FmFtbq&<=b%^@>Kk>OX^z&Y){|i
z$&)y;RSqQC6tjALa$*L1AKU%qczyd|V0wOm2;7R;-Q9(zp1pzL5t{zsQIC*7zKlf>
zuZ}}^Z%?|S_6sH_Sp^U@SRf07uJc<q=M(n1@8a;Z1tBS<-{^|UH<AnnRM@AzMTwhw
z4n57L@pEpN9_VL~KJ<gUn58DsMLue()oducWaW*C7U*ir@9(ZH!;1l>l;U#Q&?%Xt
z7fvyC{!3V~B(naG0TE_XtmAyJ{#Jx0)w?7HjvTtBYj2}Ea{DbZ>RayV2Q|QhsdQ_K
z%MD)ey?(=B2TPfcNl$oUZzHz9(5<6)CDK&f(}lB|G@q~R5z|c{5}g72TEf)vS&_Eg
zJKNNQ_MQ$-x0(26StK)Vf|*5VQ_V&bJo}|)*E|W?uCe3`_L?{2^IuWn@9zou=3}<q
zKS*k>q<^6Q$$0;jh5qS7j5;3Q#|dkYIH5$a%j>e`W*~>KiiGaId#K1L?aVmG6J8RV
zls2=uD{2aXG!qI!)X=N)0j?VLMsy9@@ZuT4ZlIM#2XEtBy>}lEh6R*&;M=*o6@ye#
zj3RQy7WC<FZ6LIXW8rPf-OkJ(Dn|(5{MPO+j#hHD?Tu5N+*=MZ!8+*aD2Y!kghn0=
zJP-vfb;4%d{MZzn)e@ZjjDRO6TAJA<peeu1I=$0hkJ`vl$$v^@eELi3sy!mXRh0>z
z74+S$j=|w^>~-D2na-U5*%BN{8-81R?b>O*%P*^Kw_j%LG!)c8T_4n|a$Ga5rgBH~
zl5%#Re7@6oN5r4^*2JeDIY`Hh+P8TrO4`xcvq+6QsKBNAIajr_q-zOMPg|x9D<#E|
zs>-*RBpW&?^FuzcPEdd{!~<PK*8RM4QF8+?G_$9tMUp|QqtG4k1*$vhuwG&nlem;H
z_4=|qsb&dfaq)IP(k_3KkUz}*K$<u2$_L4|slCaz8F2SL0#p<D;pT)Mh|u5lVSOQV
zDy*h`TLwfJGBi+W1h&T6CV=(T+1$jV8~aP0chRv!I;ClG-iV^H*(u4i$!R)ok^({6
z{3!H`?m^dKJ?D^ITwoKv0AH-JX>{Vlg7?!%&x84Icb>8+R)TT%VreX91?u|eq(lVm
z`iC)A?iF+P)H;tR5F|x%uvt_$U;ZofIuxSzyfxX#{_cZdc?smCw#W$aO+OuqJ(G%E
zqYIl2&}SwdU+fxo>UbA(+dYZ_BUApg#4ir*&v8ZN)|J8^Ol)+(#G6weIDcYdAnl8*
ztIyj6Z?c(k3H|ioJ7HPyS^Vs(FL%dTDFv03iYHtREu}!E4sBYAL8^I$Xu+kN(P?Su
zbf}NH&8XB!biJEBvhYqS*S(wI5l`CX1PQW0BR%p^njWi&+=5jsbXmfaeu9QJP6mS9
zs({LK@hrVrEGrt4QhrQ+Fm=o_-m1K4uR9z2{&1#Rc7HXu2D$Lo+esGzm+DRju)aRT
z#+p^c);h*e$R}#Ypgw~xiD-2Vn(#<FIOeUO2tV=^DnnwoAj@H&vQI0jguCtCeiZLM
zk})upNX(mjwH>NVm#+MRUoeyFA($pgM1H#8l%I7A1N+lOSMW9Wx~!Mt;52-cLw*WX
zzdcG(_Enc`-0ga3*-Ut2DpI+DneW8J2@oR0%<C0ewR?RDDhC=<N&*pW&GCklLDzRz
zOd#uSCNjyziU?w0S1|hZ_pN>t{p(lcn6^Ax(iL7<7Q;#Du-FI=h)%4C41>|>b?6*j
zBX_@L4TD)cq<7Iy0ix1eAYY%%^?AQQNUu40%l@f7rg$$e1><|#y4SkJuV(UQ2C_bQ
zcfaiM8g?*kM7`0HjZYETS9#|p>c!InLcbDA0(hfkoLv5D$Lk*lWMb$5u5dVIsd1F{
zhlA_O8eMoHY#+8OX`95F8H4WI<F#1Qb=YtQR!_ZTPNoV~*PMNU4v(pKZDwr8Xj4S)
z1|cKU8}1tkfPX_pntl)dyL9njTD!44ZvN8rg%{Gbw$~n8Z&H2p9LJvD&Ux=<7dTL!
zJx0|XB@)o)Tz2QoZkrjhU>O)Z*+Z+o8<dLzbw5TxF_JDi-S8W8%5oogd4rpiDR7rV
zr-vC(L$2~^J!|E{B@VP}>a^UP#ice=ceG`w9k^Rv@tNFCM?<8&)16Y^q(B<FUVdNQ
z&~v=^dtVJnA)mAUQOCVKhdeu}r7q%U`k+@k{q}Ze>hj?EZ5s8i5{3A2_}Aw+?rpEv
zDa+yK;hf(#lzmULD`W>rwXno>mH-~b2NeoI&#_hJBz8U+)XZaKZpa<A4i6669X4zY
zo+~Pes#oMYfy%@WuD-p#r!A((in;?5$G`3tC{cEo`vrMq_n<&&A2=t8n5rY7iMe9*
z$|?-1V{gB_gw3U@LH_aTtYiX-qNBBdd%EKT2V+MMv~2d%XSdACvkeeq1n>q+eg<bw
zFjvT+yugqBIC)?nzpa;IvhbUvAEEYNsX@R5MdOdMcT`rQ7EC4fMx#oavzOUsRx+QK
zJRgdn8QA@Jl}%8Db6AUd44wk{w#iujdkSaA$@Su*vZ(B3Dp~DIx*JiE>>`t!V*%7t
z%jchJCe-_U)(~g5@bo-04Q!;;1T^K0>t0^Xg+i`yp-q#CXtiW8=Nb9<P%TlH<OAIu
zlB33qvR#6-^zOkF(b-@=#hcMab2Y(xH^Fl{{3Yh2a3g|sWc|W03y$cS@v|v(Pi9fx
zlitc2x1Hzh#3SEDx%Sh+rG}NISmq;pk4Gilw~W|T?BhxeqgnjhteF_E$rM;P)ra5K
zaqj+uuEzIpCx51|rS3hLnznvUy2}Nd>|D)_DAjzSc~%eonlE6!4Wc0dNNBk7e)bm?
zH}Oz?k|Gy)1d$#(`&Q62c=_rGA&}^+OqH@Ur+>u>*Uq<ATyVW_G)<r1$vA}T?=DG=
zd|==67;KzBvo`TGcnfawew6%K;_%%s!iqlPGTMq<`ja{S&SpQ(C6EM*PM#e2RA#7t
z_z2O_)d94hQv0HJx=vrSi(6CK`-$ECdl|cOLj+Z_SPDcwW6WdkYbxKf3}avgCe^v{
z?0y@->9WnebdjoX=?`L@=YW9Pc%N5zB9;u>2+jA~w5~@;7DcspuWS!AFb>GG&){C0
z#k2VF<`<&do*H3_xuZ)Zj8n>Pl;~d#RiBN|q@)r+^z}S6>chDdE&4}!m2KBPEg4Av
z-2E!t%U{?-j(d8bfIrJ8J*Af-b+PH`x`eWK6gCmWOn&i4MC$vT@%BjTdsf-g>PMoT
z<DBe#js;Or=4F|UPZo3%cbQ_tMnys&@L7vltqF47QCHbrW!7r=Xni=G(-x>HwzMm>
zX}(x3Y<x!|NxCTOn2t<8;lPU$@0qsZ9ptHJdhG1yo}?oE6IKdX42{M1bsi@}6Y3S$
zG+E-5Str~;y?Z)QjZLTu&^(<Qs>Q5%y4{Z%h=Gz3S~ArQrILcLjT<j2;_PHbP>MLw
z`uR9`bp5(K9a@?;+04S$N|=p%pMw@W!x=3vX*^<9=hiMDQL0awj|kp$|5!ch=vr?|
zXOEUq)YR?ctt*lLyviP621y#`m@}0`?rcN?u8Ii7w56w&L)xFg|3l!Ai)PG5yXsHD
z|1Vv*QT$Q0+m7>fPfzQfk0&R*%Dqf!lgHAvxGOrVBGX-oDK%#P#!M+Wr!4OCjA7e~
zU)n5YOSN0w%}*NsicNE{2Mj_rCxH>F({$ONH_P}fSYoe}&K~bG48-@^wG?`vE(sJG
z&Z5;?k@k8<Z@M6vqQIs11fLXY5(NC222M0^OA7K%+yT`5Q?n)qn+W~S5_+%mxA@Vf
zg}C5Nt@>!XJijL9^tXPS^H9MhvE<TAetYkAv+o!{5}F4`b_3`9Gkq9oJAu6vK0d}y
zR1DPXr=)WnZe{uDYz*E}Lw@*t1mJ=ch$`M~=`jy>H~47BLtMngBX@I)PGqQ*xkZeS
zpF~%l+uB={V?&chaZS{8e2{_QW2#;5cg7Ue6}4}9TLVIyzNkb?Kt9Ff+Dl(>d<4XJ
z=4@LWGM3ty{i(%#36xj+MY$1|`gH=j>T97y5^=3wX<<gGVMOW4<OQ%SWoK7!F~|dR
zR(Zb{lt8;6xl##Ac;D=op=$klH#zI~1#b5)gb_JHf;6snGi<%rg)}w4{t?mZGTiR3
zxjpwgp7NmsD85Og8kPK!QA=E0FJ>t@iaBPgo}6m&7-h}%-b=qzuppJJCCc$Q>yWAd
zPA3&TZHbE#PE^6vL3c_V+Yk(iHCyhfJwR4KY!6-CAMO;Bwzu`LAuA#wvv)i2(?zNa
z?=G|O$Gl+sK2R;^DQ}9#O2&y5XujO!rr)Jz=EB|k^`+oB&W>u&1u*SCx1m{raM{*8
z$nA9ayo2%ioB4uhQ&0^%@2wCwd$E04zDRW4OhUM!p3>tkhx2fgv{{EcO_Ox?hO*$B
z6-n3Z)WC_@9|_T^4AFW0{kF~=N7X~i5H*0Dk~Z(I!rjPqpG9Tipr%f)01kS;c9pXl
z_$o1B*(r#!?nX|VlC1RU^8d^m<;q_go|@+#SOG?`9n#j%hK-L8w&nY2_qv{CGh%ei
zd^oK)`28q*r!^BQ<!MJQklgK0HL!^Lrh`N~&m)QX)0NQ4I5(#<3|%$ZYBx0D%}Nur
zLcEeAb>_V$r;AQQMS3NFGZ9^|Uy~F_JXt)|&10BS8%MR;1+C<75v_lOYV5uN9zycz
z3P?@w4$qth5eD*_KCK4VSMNyf8ax$t8(ak8h*qaTjbb`vfD2T~B%{(>hhYzoXuk5c
zQD8T4$Zim-OpC?_s<Q=Cg#_?28dmObjTgXwL`QD)T|6OqpO0jQd=qae3{O{^&l=(A
zuv9U)i?83`O*G=C&}2X2AsQs%@1}lnM?dq}8ac=BcDcaL)%)7JVx6%b1<r9BR<B#x
z!Um~{4j+~?uL<}tniSVL2$rWH*tOv#+2XC7D&K$*l_wAsFiAF2kKE<U<GSZsXxW?-
zfcztd7$hl1Nmg)+)@NNC$dpRv`7uZ@L8qiz(`Q17*zHFJXu;77M4?1MxT|)vX2?#z
z@SDwI(e7o}y}e|`Ab8a>^$$UowA^AkoaBS`mX^ch7hAO`wC4~3+bS2eUtCje$Cqf7
z1Ww`X(b|9Ncnx|6;xnHfg<@#QQH((s^M7#guU!UTvh!<H+e1aWvj}OJJ!~>G-9fhT
z+5FLSrENUE$&IHkigV)Pw?8WS*wvq<<7bpf>lGn&+?9|SDT#8=n{secLO*-f=+sk?
zCEtBNgs=Ni_DvCuO&*Z`!k60q?&;+7+g`;m>(^REviBpuduSRdcodW6nCV(d2UWH}
zUxzleW+?`~f%btX4!n?cAxqAw70wqwTpVXut>b=}xw>Ay!(r_Y90iYvS`>mIBRoF=
z8ex)v>!|zpF6R_?qaTJxvNWnxtnLwlcZ_!mRZ!pqIM~~;^u@~98pZNrq;0?Oo8p3$
zBq9@jITLfrq#CY@V&tQ^*$AS^6iBtM05}6lA3252$V?a;y*xrNoNCOJeONw$*n6DF
zY`-LWJidzTFpz*Wn%qk%!44E*O>x{NZ(fUd`pD`Y-nu49sP&lePad-G-;vmJ0Cfg^
zNHf-Vz%nYjSvu6stbbm0my+4BR;;-}8#2bQ)p`p-w%P_wC6>3ZskCny!d+|64UbOn
z?9~c3vz~dE6w*4l(g2;~Z4~5#<K#b9hY(Y~f5mz}9cXm<vuXe&yh|H6OY1)G9nX5?
z!R@nn#qB)lx%24vyyp)+a}2F5k7*?X@qyly?TV{(1i4Do=f?qNAQcc%Jw#7ZP92Dy
z)vQx4IdF42%FAsxVJ#H*3=@~`b+&$;nHJ5Lj<Ma3k87~75@zSl^XCQJghnJrm%3Z3
zdC=}r{d-#br&4K1yK&0Z5@_X^jyFwYDGv{)HT$E0bo1kvFxHX))+o&0h-iyBy0#<U
zfxF_D;XAV!+BkKd=1VTYo5o@gWK%wm<S6rmH%NxoUmDW4Db*cpwqqs+5z8uC=w!~^
zb*euN;t^jU4ejIz_nBvx7G(S!<Muh4vJA;5^sc&pm^~!G#*H|}%IRVKM-XAK29=_U
z9(9nmzq8oL@1@sFwc?HiV`J>M!)uD>ys0=2o#ym)yLGu!dW}6~`4Uy&GUXU|XP)(o
z=-yJ!DPsLMT2wJ#MKLogO1s7PVIr{Ct4TbuL6$-628#jH-Y>X24c=W#6WKi*P%Y|w
zrjGyc{jc(S*TK4id_d+YSBlBa`)rWJcv7b4b?2!_)dptu#^6p#KuHaRsT;eAS{f<u
z1rCh^8Y$0F$^LHpO^~E)*VYoz0)?%5b;sGD23M~McBdX0+G-xXmuAan0`SvKNe{6G
zV7^m4fN0Edxr&S9XC=#G=G)U#rG$7GBJXvUCVYF{M}dhI0gjAXNRmc{;v#ZYUn*JG
zIhoItCue+}_l`Qlj+4k9S6e%sl6E0LJ>&BRt}&t|Qe}8r*qVFK#ao9-JXs=Z+|!)A
z61}4=GF8aV_2NOm(%!C$mRs%m_Lt8%cGcw%g?jl59*$a$NCzcET;m&b%Sj^}y6HTd
zGS(?Oaq``<Qs68@a-u6_L~^{0!WvV)Mv&z`Z%e26NK?x(BqL)#M%_4{PGU(jD<M1H
zHrQ>K?IOa{j(oHKZ0}0Yw2SA*fTf+VZQh`hXV^ByvaKuG#wp|<$WU)bsSIyqs?2Ev
zZ`lN{vP+#L-B<tM#Rh{bOui}^Y&k)AaYM3^Yn(%My;72C*{$@NkrZViYd<&Q>Y&TL
zO&{<@z4b_y5l63))k!Xv@=gL}bMbxJkx-?o*)Yz{k$leVL;G1kV)gtk6PplN&+`Ut
z{`tF+g~I$IA``7tlxt6C-rt>I=bssw3B%FS(v;?PcYU?gi$uMJCJZU5pq-G&cuRlh
z;1U#WroEXyKPg2XlQ)tU))`M^0qkb98y0Q>yBSA>Y6c%jvIyD5%Z2^?fZ##OXfg@S
zUUm`kD_G)FQ??zuAUHchjV6Ia{EdeoD=Xg;P-A#7Szfyum#s2lFEe5nK<8#KduP%|
z(cFi=4rvzQi}2)&RHwO%GCCtkq|%Z@DA$^_>DTZWJNQ*(pT55&S6+q&*IhKT<ila2
zpQfZ&?!m<FB}$JHyw2)2v}*okG~^BI{P^w0oL8J3!dsSKD`{V+_0`a_dbCU{8;8`5
zoMB`iVMfgee7R@7>ZNzNnFb-J?Y#n5sY3OXRJUJ5RV_zG3GLIh%bCw?AY>xzaCccf
zMqf4*Mbr5*O5$!brhk1|$!EPeB-FUj8QGKlthdKC8A%@pc)PmNIhSNn(ESR*JN>E4
zB@V)`O7_<l8LcbQW?n5?qlA{CqJ+L0jm*0leHqjTHZ&x}4yk?ut?OVzLHda-&??Ow
zmRPQ3m0!=#*8Txa2rt(}Ius(U$F|N+XMd#;AGC;g2MS6<eRB13smD>|A_TYLv#rM6
zt9Bq94{ay2h`_68r}mr_&X%~L7>0W{eFW+)Y1yDqoGWApINMoyUQX}cR2`_4*onbR
z+l=J2a@Voi1T#Vad8=GH+Yc#Ij9Q9)?S0y;$r?k|fC{-Sa+w09##FkbNTwa3+t#~%
z_vxfysNoPL^A@9Yz$fLE`B=dB)p$4@k^>gyESFo03x5nShri1BJhgc*X>vr*%79<_
za)Q@s%$&dq$SyJeZT_R4X7M$e^$#E9388#KH!kYq0`M=H*O+>}q>PTD3{Q3=t5F%8
zHhmE%!^j%^DEVgS2PNPXR;AIW#Zi~$XNXtZgg(-6$L;W2n(gU(T42i2!}r}ZGV$E{
zUr~C9Qxb(nL&m^{cJ)pmb@5%cR1E5VnuAPpPA-y{Wg--`Be_5p8ZNMZ%}!5jelsMf
zBrD2c@`>QS4M<XQlst4gGF%Dmqhsqo^Cp2j(LS}!4zxu<hsIu#&=Dq|uD68n#zHjB
zNP1(_jE;ryx(8Q|Ep1?aKZ2ZZ{)53of(SLLFg3x+3ZlHYp5_b89-}u|y=z5axgz}4
zz|L-QXCc_zUTb<@A2U-lGwzdorh}p)uBqru{Ih3Kf}2=n>f%bP&Y}rTkp~na8>Z%)
z<;hN-yFEsTJLU-A00>1dxP~T57;S9?PglxXa2qU|a1+S^3#-~zSuwqwlQ|^cW56CO
z%T^DiJUh4?88dJ1k?_UG)HUoY@BU8Oox0cO3H2Y4JSXZp>L6a8Ieox2kE7c|4=Dvv
zgV{8Ma!kTq&v}&j2p>&gB)4~@1x3C*qndOxb7;gyxvAD}?O=L~&C8N@o9;dRsFEZq
zzYEU<s%IQlHV^@-s%X-n^gbHq+_n5>{kgKV3%lDG^BF>cq|ce-cE;H@)pjk73)k$>
zxvwcY<nF6`GG9d4n=`Qv1Kz&9?3}>nP((P7R_jFCzTNn(%p$${5{yXF8TKtJ^78cs
z6|JJXSgOr;MVmKlWxdV{y2=jrq!ODCPvSVNH<5cv*{Ep^6FrNOfsBs`Z|fb=NUsoS
znP9K!b$3idSFwo9vuH1_aqSw}e%8`AEy5epUOOVZtbn$Lt7$Q^kI}K>=JVfN%n*Lu
zwv+89_#<`qCw5Y_PcLm0zEA$(iJ+VBt=J07Pe*vJmW}bNVh+W--)w7)@4>adFMNm1
zx`|{;<wjLcY6Z?|#NZ@pQWUq(JWXm9w~0P>eY2-6rvv&&5KdNx+WU7c^t*x_81m6m
zaTgyg@((k8i;b$b<qQ|akV+Ke15qhq;f!b4cI(&ogMtoZ*ST_VE=s$1fCG_mhBlNA
zHn(nba(Z8qes+N*$0;F^$z$Bh!(%*i!Rt{r<yt1EXFFkIQD=s9E`LOc=Q8n~{1BU(
zxA3T3^A&VH>?6Vl9HuB7MKU(OTmqB<<zsx*y{8v;rem)wX1$rI*f+>rhPy2XcEMUE
z(rfv^fV8MoO=f4>lJN<K8Aa`(rj@e_&DtIpYq47I%rctKYg#taH>lW_-@alg9A1)$
zD=6B{hlF@M9=y}2O3P@`T!)A&$@8d-aVGxR`S@Ahe&ToEh?dKTg|s%0WTs|bzgYKo
zJ>5qb7rm>CM&Zg0j&Co$0fh0+#l6)Gqc{gx<&G{KJ|$B(WxK0OZ?|iz(-Dk}XejP4
z8B5~!TJ<Y%Z<Py5U(DaLq^|Pv5Vo<}-*#xA&j>7QcyJ!R+diERSUNXZhrB@gYmsm`
z*kTA>lP`6$pb$bnbUE<kf&9YS^?jV-+jTzU-a}z$_mWkH)AqdHZCQ3gH`?)y!jwyB
zu2S$R8rNk>X1t#F9v;;}=y}#eH(2mglonh5l<)p&t%tm{wQ_)?^nA|7i2VHZl&>i6
zp}bjzzpuX6^}DCfjKD`IEX-GUGD^9kqZS4RxM|1%ugrved#n^(88Nq56GAL5JUWn3
zgnm*|vek3jag4p^3rs&|&AucFI}o9M+nfzWyaQ|TCN#5&d+f2|Z=2yufS%(ZimtvN
zY(0P56-D-ZIl3e`bh8k3Hs8{cja4spJH*pQyW-406A!pK23#P<x;)wXIn*+sK4-P{
z(XwaCwuZr4Xh*-d7M79mI>+Z4vvm<TG0-KeF0`v#t!2+z4)glqye8P$k?j89US+O+
zmXD_Ph^P6G`v5$c4d#xy8G0QJE2-XdaZ}PBalJU=jjdjFUNp;RVU%?@*L!FzIX#j4
zRU|3$O#Y_YNXP!u`sfnK@k$j(1H=<}LbtjGjWArghZY#~60dGBDbZdwBO+?)K)~Mb
zb-Bf*-1Tg=h36Niz)EK27jq-U7p*ZV#+y?zy<N*Sy{nNIbau0fW~k*UdTXlT&6-AW
z4Y?||Ri2|>K~!$yQA&sj380&oV|z&N<9idGF%Z^BYY`v$xtdm@#v-*#?#P((!$IAz
zWkgi^ib^HHAt5n%crJn|t8)95%J`%W$R+4aL`6pRNt&n@51LqMSkKd#`%V}+#*5ao
zGg_7>Dd5|YpA3YfzQ+3q!CJG&y^XbTJuN5UaDQ@(x|3Pu`*W$fM)2)R?)5S5c^44w
z02Llx6vr1w!74I}T%DvxmSPqj|G0VZGp`<k=zz_CS%|~n27ZJxA`vI33WOWkqqEgm
zVx^XfTq&4a`O^m(h5Z6~b48YqlrK1ryO3@q^i)o01zeGBX<hE?X2~wa-GISY-gq&4
z&~y)EFx*McRhGGL-Fq>MyNFB8Nb|0VyxVM27SlcBK)=I?bj%mU6$+L3Og2o6UFvA9
z^A?&2TuLm6J$yCuU@@|iKv6EoKFY)Q=p5*}N;z4}2QfVpXREnY*Lgi{ItmCRbFDwv
zkS2L8L3DLRp4Z2FlAWAawjSA5(W<oA5s%!Ch5^1V>|@Z#h(C9Bsjgz*=z!Xs{`i1e
z)nrRaNi&57BTcmWf;k)PW`V!|k$a8qVg}vWdptbp<>)7AZ2pTp+;fK`jPH7c^Pma>
zedVE>oV9Gk46I`Uxvop{>5(x?MV~&iCART`^PS$I8+l)L%ugwm`LqnG=cp=YVKbG=
zieb5WBlecs+KTs`THCGLvGg$|1DDAC)!t`8s{Lio(#;<M?HHW_k(wa~oDE$EX1PO$
zrU^8zt5s<1uh}|9Bq)v9YiEz6=Q%y=%FJir?|ZwrOkpicFEfZcg^f;}WLcBRm$SyR
zt7(;Srf`sj+-+gbX;WiVjVdBk)$AJ|xF=(d^0lr4Vm>R&-Xl4LUjk%&;Qe@Rv>z(e
zfDtnQ9AFv1El63ku&8@xJ~PMB+eK5I{KB^K`b6qMfTvy4<Gt9Jt~9buY29cZL*<Lk
zewM+h;D)+qy_Z8_Yi1cQb>Dj4+WKX*dcL`3e9^=178Sj6N$pbj)H<;ioZtN4==xWH
zuBR>^PXBo~?f>EJy`!4Unt<^YMMVWx5KyX0ldkj*Dxh=_q!*>P&^rXgLNC&#gOt#F
zlaA7n0HFj3y%PvXhXlg+u>0=5E4tt7`F-d7{_&9H@ws#7PM^6mK8N>|CQ;Cw6Kh3K
zZ8@@Sbs&pFg#Og#t^^n0XVRnaE@T3ym4K$ii^_^mEzK2qm|1D=OstxlfF0yTY#|?y
zjWJl>E3y#Fs@x5Y$~_-RW$~RZ`5Bsk&;aDL!^a?sSM#6p@2i+k1CKXpP*SiiDnl(V
z3m+mvH#Dq2)Q_vaCzM;Cav|C|Qov#b!FMFPseY{@Efm`4veaI=OVNOh1%bHC?~xa6
zq&~?QtYXbf>!Bi?@E(~%pw-~|6T?~G73TjajZeSnTg7QmsDw|QTl}Z1>!Zu6RG!=V
z&8cyXUx1!b3h+!?_B5j~2n2&!95j$+K)~d+tVVGy;HD}Bt?!P97B-eUZj>KjIV@L&
zTu0RrV<vz?M2#{r3i8BmZ3$(zK4~odWIl_MWEq&f)L2r`J54fW_G{GTbV^-Vg8Sb8
zMB18#k9{WUiiNhdxg|x>%gx6p0t*)cDxYZ%4wu)zG+W*4K~cCFhXiyvhQ^uIu_LMu
z(rj*NN*YJFv3Y%4Rmt(rcXHZhGhdFUk++|};X;_N_*fmAib{3=+&Mc0LP_EW(R_<n
z)vGi`6gJB$+WfyjYTz?N!C_JR`cz5Cun<~V+JKfD+qqy7ekpxpIz_SQShC8cwz0m&
zN^rE$1f-zg(@Vj2Zkj0+B6?yYF-CMx>{<Mag?KD&G8M~*y4(<h>CcKS^0%xlQ_R0q
zrZhr8{#&q#fh~v^eRK?Jpt9&w+DXS!7CoVxih~N+;~=IR21INAUkoGXlBRF?OO?70
z%{!5Rz1eVsB|YC95ldme)dg$!y2s`XL>O|$)yGXc*WaN&mGBnlfTlir2#c_}?KDCf
zYK56*J9Fyb;f83rr4J9z?}!_39_uBl39kmLYL%RfjH;1B=-V4u*iM2JA<w2F9$<1^
zbfPP<2Y|<BlE=B@x6#_gYFdF0Z(OV{-@;vj0M?TwB9^p34v2>U_HQSoRfa>heXVFH
z*BmfeIkSGa6ZJDG@j@c?fQef0d!Bf~h^K1P!}3fgiWxI*`Slitz*R}>Frxd^tdsk~
zr%x_TzpCQa>Xl;;!2n>Kuo9^-8=|Y!x@u`i<5>G4FN{Nx1&tt-UJ-}ewB@s|>&6Fd
zR|k9**EF6R3$_nlLYC+Qs@_#=H&84K+0&@-g-<uCD~~_{aIFGcqFUb(C7djgFyU7+
zUL$c%mYT~d65)zahRQA&dxQxHHpT)dyOdM2FwE_FT=Wdo&9=M$!mZaM7&AGxH&6wc
zvGn3M0UE=8zonm<z&_Ujj+(^#?!jV9+`B$T18^*F!I7VSMS+HLE~`vXs3$Z7&UXXv
zqQJQ{z~VuHyJh#Ef-R{dx0uNb0J{v$4S2RWrR_5XwUzL0k8zOP>u$%;QBjRBP!!My
zU=-u;9?Z9^<#Z2pxZXX`M>0iO)SX`r=haK#O~THXADK^?$v*33?AVChN;>g`Ie>)r
zzW`3o38%Rj)z3xfcl)VlR$3nlNZ|X<GWts6gJf$|NN7C%tU+%)7sZ~-D+~mr)^yG1
zAk;CHN!w|ahGbNmYn7E!U$Qt(o&#QVrZUK*hs7((rFQ$n9*{*bdBt9EO%L1i?IbMJ
zR2@+%3PR5k4>aM@tR8%bbEIroUoxM|A(R75XZx-%gO#9cMkUbUWVaztavBrO#xFDk
z5-&P!r4=m%O8(~|d|1MH(}=awI+>}>04;g54&{C!)&nW9cH%Luig{{`)vV()J-=j-
z8FgYVlvO&_4+4ewB70&bB3kAE4KFt7)i5?4)3%Pb+zw>@LxvD)Aa)>&$hU|Jkc)(9
zOC=uWYiv>l^2OMeSdl+nI$0*NjiRkSADKll5JQu6a~}nLw{JOqKEYWP@?ykMBNvD<
zjJb}zfn^nm)+c8_%NvS!sR9vBxl4|fuFIPrKOs<j6YG@*YWxL%<u{)a9I#=Y#Xt88
z6ePj%26<1IK!?zvKOAl#mCd7eUx4+wbEB+GkXqK*u(k>t5vA{mqaJy~%y@MN^21fC
zhRq{Kj1bW8fg$gj{z-TF=0MofO!`_#SHqL`KxV$IQ+0zxTOz|RBGesMc0K@}TFTZg
zx4sQvO00ysu$f!OJCc}S|LO&>k2p`}DU7e<YJt>t(8*nM>IKA-w@gWw{B9)+>{~%>
zJ6MPjsV$WJWH;-osnLknM=kQBo|WBGyYDp2!dxVL8xXun&y1yk7&tGHW}(dCB>u_D
z5x?gwUCe{I*sQ%t3WxmfjpUzt8^DD+Aal?x&yD*}CU#5^2YB85V61HQK{YW$Y~(}V
z!XvL^Il}?HHnBs;&|BW+>|-hvxkaJR%0Kpa#v6mW<##2bM{BZxa1QG6bH6dfPx@f~
z06>ju>UichC)+=lK79yiA$yNaYsk5<4jJ|<#wp3Ewzh}#ME(`)`47i_;PL?`6_2$Q
zS~q+ukU#$&%l-h&DenU@A^;n!NjyI!t08zhQwss0rCiXb%wK&Ky2i<1@l2bv8qL)5
z*;h$|^*~LX3-WLg(@8q9PtUZ=rcfbNs1SXgCLX=98C)q_IJuQj$&3Fz>3%1O{_~TM
zH3`rrsK8~o?mOWHWQ4kcU`TbW9RzB#?V_K25sLYyAhL8MqRGT2ut=3t=eTdjQBt$%
z%k8)uH@9B(W3SF1R_g=dELbPJKRk=NG7cp@aK{>9jrbGR4ZAA5NR_Sfi|m4IfG>xW
zl-B(}D(3$1%5(v*Y_Ny&Y=6a_V(}>-!Z_PK1oZeR5S*!!8)^SO#{4rq3-tcL+i^m?
z{raCW&A*;;tQU8>4HB3O$tkz$P2wE_f)pBJ>Y!DW`D!l~^wrI;P}oqe9>Fq%C_0t1
zoY5{~tZL6j(?utcXtAsrv83433>kU#wBdxp3$I@|8eV`df9R&!bVN3?w_PHAS7<fR
zQrkve8(wc`nq;k)Y`TneGM_Uir2JGVnfEM5N4lb*s;ApBDVFScU|w!dH;aSv|6H@_
z8o<t9f9p;A=aV18UtKCUZCQGOv;FABi+Y`)oXN{#m@MY3Gw#E|$=U?oE6V2;-!@f-
zCd%j|lXzGk?&$)<Va0LQ17~s8UriH>RWSx0^xY=@eD_{ZcJBfcA<6ZJk~IDwab8Xt
zT^hxazAQv{C(dYiJ$EB9d0pDcX_t3kU|?dq(H8c+%6g(^H!nGPB~B3hvyl|!kD5UK
z%A8OCzmFaH&8LP02=`la;Gci|lMw^rzxEyZW59cBt5ygm`e6jZ@4xtp-?O#P5vd~g
zVE6Bz-AyisZQlMl0ncI~swrt~F8WgHlMA_braR?_mVHHk>HVqBIGbZsfwbTE(kFiM
z0?kzFx~oaN<SC$Xc2(fTK5f@WP6pQ>kAFD9ZarDsxIJqWkFgOhwnQB+tgi+&uI&}(
zkVPYsoqyKWKZFHnpgE@}B=y~3x?T3^?nlS9$L71Da&4yr%Lb!g(FQ@hRceqHa@Q_?
zhdwjTa~wn4)f5{Cd0C0&>RuMmy--@lnA#K$d!0`p^E2bl{usT`H-ckrNSM-3``sXh
zU6%XD+U{>E=yR0*Kvx%W-~0_w$Et~&R{8f+hoo3-;hG6`YX{h{C*u!Ch0wA#`y0nj
zqdn$l=dGL$D(LIO`I-vriw$f3=X&DxS2H42C2G`CI<0=&^(tWKI#4?h+W5HSo{_ZC
zUD}f#FR_aWuwPF!rMg<Z(sCottE<nDDlF>Em?C-F`}}<#)^!zgCm~j((OWl#!zE!n
zVw!rETHhbzpUCH~`3;Xb#0&S9*gpqIaK9C2P|m#5)^$5U`^EZa6;=WIq}0+D@3}@d
zYu^3c#kI5KbC2#tS@u_y7RJynv53@ytH#|#@m3Oz$3HjtpBW7yjMKc6?{7W$=X$rl
z!}~wTB3P}-M0r!v{Ru*G9@~)Eyv7vHse1Rvr{zznW<XC%;mowQpYoy4XB^(3z&VT<
z`?x`d_EBb4SA8S-%H-NlDdPhU5Eol?G93H|RR0dH)(bfk<(GR=%KBu2KjC0<Wq{$O
z>MVkPGU@Aw$9<SDt9p}?m?-9aHcq?xBcG+-!F{S^-qGEb`ZF1K_Jp!S#h;s=f`-(&
z;r{SPypSsapk1spyY@?<Spcr&AD|8Mr2S;>RqFLq&|31^KKv4BAH|>lfi-5m>owmq
z)*rhdS3DJywI;zgzvJ^C5cbEfKDk$dz5W0ViJQjt6Re#(1+6Kl?fox-R@s#O2WS@h
z1n8egdU6E-G~ZgIYrg~<@@lZ>AE04}09kzYgv3)fF94v`S+@EA3TQWpeXjR>{Jh3w
zXwSBM<44SxaRZ8h92NNBmj-~fJx$%xRQ(`g9L$b8@e>BaX9O7Ri!SHIzh)u--`JsP
z1W%Pa=t>zRc;X$<Me@guHlJ>^_-dQ&FKtxTXX_8jEvX~eIWtE6R1$E<pNcT&)|(u^
zB2o-lm4;cFzb13Gsr#(|xV%pr5un#g3dzzXf8>!LHuB;tu1xPAXW(_6|3C8WpNQ#u
zaVkAVa1E>f`R$M5>(5^YAEl14GB<R@oyzMVJKJkNVh+%)zXR|+C?rmo_yzb<vvA&-
zDsRS4E>2;(;|YA<{7eN5){EDzKk4}GM`WBKDjz(CVzgCUtf(=^Hh(iTWuSn@$MhIn
z#{pTVl#><?ry9bH(};Lhcy!?;J%0DP>EKFrsq3k!;CZRJ|A)8VJ{8$&uYbY^XEv5f
za6v7FNq4r}d?k(F>Hcls`)Rn9_Q~(R9Mg2C)Ny1|@rE1QC)Q4bXFD(q+}9ZOX>ib5
zIYn_-A>Ok3d`v%J{{A}7dUe>VbN?#E|NBD*F07MDfk6P5_ySU5`pr5n>~8%^KP3sK
zuq9D7DLDA$?Nl6ed`_9dg#VAj+kzVbx}5XIzxk8-nrBGhKApJzmEO04H$JS_F>g3~
zv-5g%y@c8zZQouq|9tg;Q!!F?QQq~^Z)XYcrm4>XKTXP)pQzch+s+n8FIv!Cxqwsw
zH0tQ^r<73@<o4G6+&ex4J1-yHJp1_V`(6O)2B(NT`N`bAvtWp)-njtS8g9^N{0C|m
ze>TDGz9`Q60cA}3PbA9^E1$v5-ygYy0DSh?(+d#(gfGsJ>c4$_kqAhhTa2y*K|kTv
zzgPM%0sQQ*|DWMdWt~>7yCn28NKU`wxemw!hOd+t{%y-=xalV!&GZ0G5x+lXZ}Q7$
z@dGsC@)u@#1g>2A!LsmqaGgIj5A&svHa2GB6UPHPUoA|7r`h`F^#=R>p*`>Fg1l|w
zg-Qt3uLy4dzNVJzFD^z0I24?9J$x8$RO>=9>Ha6d$y%10GacXjp7V!}Y~Xf{^^%!O
zcTEe(KkZ=pnE?D@GXQO<J_Hmy^KIA<Y(2-&H?)x1qR+k8zJKDX4iENg!}3#}oeBAG
zhw<-*(dEnIGy30zk|Y;O#GgMWbK1Khw!)}bWC3yf-$Mfb+(4h|sj?Ybm^V7Zp9~pI
zAO6r%#9MBy(!U@pPTxmz`F-lMbENWT<#x8ZPf5IFtBImrM2_UH`(*Idf7|OBN=p^J
zFuVKUnv~BEoR>~itf@+xTz!uLYMhQ_tSj5lcUt|pLg<^P`Q+?NlrJQP%C{udR8*@}
z!rBX3;>rUKL|9rqj?FjK#Mp9%WF;=bdAe5aEBsVOAk~0Vt*Pg}cGgYs={gl)pP=fB
zBzDe)R=vx^E2ApUat6|?o}HBx;>TC`sVHQ-*V~_VC%4O5Er!xxLKc7(&>P1u1&wUM
zoqOCJWVWVj_mbKtV#{8B+Z21+%kyMu@VQ}YoUnG5=Xz&i;l-50gq@?!#{y3IKO??D
zwWb2p3_tk~ED;>{s^{Zf?8n^Q-ZiG@=^3=&@6u=Nay6H8L?|KlVV38nfVtStcxi-+
zmC<?>TLu{GV$`)1E4}K)7(~Y~WYQ$>bmK{)#1n<5`NmezzD}_iVoRz7!+w?@7QOvS
zd=#LpD0#`p-!BKuE7u2<2~gNdfqEJzVqE@uE$sWlRkHMHfzm`n6{l#~guIjzH4G&D
zT!c)_4Y~zjhjD(bCM&5w+dr`4;pvJtlCnRpn5tElecwaoSl?3WjB|aTZo2X*2H{N?
z8G2B_&lZ{Yktm@uL=Jl()@nYL&3xRC_mjQ-hC}@Dw&-<*2#2!<)r-4$fH~%nRFup-
zyCxj5ZL${?o?Sxt{X3dm@f_<mo|%sxcD(kZ(eX;ZCTgFP0g&QkeCBMChqCFDQ8|Nw
z%IFaj%+u8d#y}yrfx2TuE|&;tntFv{U0(eRZ}~}Q<})1opO3fI0dmzL96tN2ndi9#
zN#mJ-AcI*Q-j9)hQ+K_jg!p^F#8J3f&SI4>?7OBTEHgJMB_rHtpmjcnmMPI0tz~>N
zC-q|QhwByISGt)75!-Z|Q%pH>thny?W-jG^#=9>dRQ!ziT%&iv&g2m)(J(W)md!MY
z<m_PpaQTc~t*#KGdvX?eKt_ZV;A)AZc4iTRGKea(l*jk~+h>!=7E=FkY|2@Aeg<PL
zrai2GnA|ZRRYw(35A*JQ2isl>3);xs;$?I`O06EK>dk`~P~u)3mT6?5!KM9l<ZDOy
zyWgp8!jRR3)wP_;GhLVaTDFw`r@Xq1N9iWH_NuUgal;NK8~I3E<J6pIhypAKVx+x8
z=@~^wEOko9-pZ{@-D^xFktOH9$7R#OWyiXGB0DXiZGuUc)w?UTD+zA9namec>SWxD
zFpu=O+U+u(riizZIe$8(FFsyf4l!*^tS!bgpRXNmu=%KMi~V!WjzS87!ns5)WBYba
zS=G^s)#%RbQyw5b4Gjdi4hfv`mwfQ<D6RbAu-r)|P5gc$YTpf<b*Wle&+a!{P-EN9
z3YbfG)i$!b;HK*|B*GI5ty^TPrD0LP<kz9MSauY*1GK>Yg<kK$25<Kt3fP6adK^M{
zU@_K@!Ui6|>f5cwgj}kO*U(kTNh2Lay(6K)lK?V(!Y?oL5sXgQC5q8h+oC2ed0}>|
zLNbW={?zoTITt8RW9W0`Vk<Kp$O?}y1~AJzISJgl@cX^TO_Owg2G0ITzp1xy0ZIFg
zkv%ZLK~odbzQ$zyp#SpsI^@L>K)c5h^S9B~qCBMy5nb3?Xyn1AqOYT^(&e%S@UZTt
zM)s~8j&h;|@T-<3N`?~6+NV~F&{^oXa-wU(L8sTIhhFp`3t`vvfOm8=BlaE^O?K5O
zQe~>N-EP(qN?l0dx=gfLtDXTj)TX2>@BphBBy2DOVv?&JNo<31h;Tpjx>dc=9^tvy
zHUst}<Gw>*ay$boUD_l<xDwhi*szCE5Eo%^r3*ZD>fqGT*xFt)o1W4cU8(XH?W2eH
z%$2{4n0N#Yl$0#@e4X8QSuIg}YQG|8G+n|%w|j&#c1amIz9|AP@I`p!B}X*3v(xDH
zvcH|UNOQRR?XZP+LR-rjlcASa6^Qusjvu5lQ0rEo*Q#AFQPi(*^sT?!@i52Ag+#24
zcT|>0i1N-RU!BT$j>FEl>B^_tS;)oRPu~JX$*3cm>ERvIbB&5c>Yl$jpAu%9vbcU1
z&#HAc|LhuV$;cFs@&c)JSCzFk_d?7XFZG|S^Wi;CeD=qsV=8%70ds5k*tVMN6d?_2
z%yr^z`E<s0@fVNqfLjh}?0k%8G~@mL|LD-&bxPAEgC*!1Q>)|18S6z!A1AbYZL6g2
z4!*EmG=t%Ith&B=>vZH;FVd9z<;%(py}Bn6y%WrxB&3-eWlr2LKOXmO$OJJMN{}#4
zGDD--xJwMKN?YwyZ&rX#MOXu(muN+wxI(rDnp6|wShqwbfQan2iHc(2cdR?9f|br)
zvacKsq;F|4v5F0iNeYu=Y{_p#J`Hc)U8!9Es%sR%Fsnf%UtOJ)uzG02&bg#ia)B@*
zI_o5PBgo|W+k_Y$7rsfAc5EZjlw(m{rGh1-MT;lkNqb*SueZ#}d9m2ZMsI1lp;$Pt
z#*ADtTjZpNbg@jNFn1hXR4UOzjQcvTe6S`9Y6A`2INO;?6fI@3#yHv`_QMYrF;5Xb
zSGk*)7-1fBop(DYts*1B)`O`s!B~O##4T(`r2=cX0)Od}rUAkwK<q?krB*hD&?Q;B
z-|nsu<TPWD9rsGUK3bbwdEs_IcaH;^v;Go&y^4)K_ipkyY5t#<?)~NqzC_QV$-Gh7
zTOvfy8Af)O9It<Pg(DbtANPBv1L!0EG};P?!Om^Ny{HHY?lTd#Y(k$AQU@o7KzuG!
zqbJ5(hN_unL~S*)d*M7wiqTH7)O6q=#qbPh#+~z)L5>?wSA8H|QN5yi_XX%JOcUZV
zQEZH|OAlOvgus|=hR06m>$@|vbPSytv_+wE3V2l&PO`5*9Q8L^F3PZqBs^JyYy<#b
zZ>*NfMeKcnMh+h<;@L2NqE0y`T{)U(3-4r77OHC4<l;PBMzkjF`DXV}AnywET3ut6
zEO&rdvn92QX#gu1ySM`#nqf8jWCO!QmRD^;b;4|1svW6a^qc3SvXfxYOHQ5himVE~
z_lbvm1pytE3iLB4QpGX!^rLXu7*-5!v^Y^wM^R^+0#npa72e_v*PZ9ba!sElBl%5{
zj)A6%4r4JZ!G;h@*R8yyciLsAcs0{G4ZiJ<J+S{?>j8t<PmSFjS_K-G!4IP<S2~r>
znoMCho1>To{O)~l(#-r~9Xed?0G#JY8ip-(sTvg<BNWwToiU=L=XJOg=lM0spv*c-
zo56d>CSqg}w7{!a6=Y-gs(dA%8nA(qLMzfgf)RTs9Ci$=`OG9YWG|LzK4WI$9q5`@
zoMI`F-!j$7$w+ww5}I_dbHAAI2yu0Yagj2stdmmPY}*$6$aFt$CoPY8ugCfKDNp^l
zqa1C?*=XIAHLUQdAJ0STA1$0bM>*3*ObA6G8?2@ZD}<t~6wm*;<#?SYqUYSu2<ZY@
zX;t&Bpq`aG_aEX08kZ<45h+ldp*<f4;{W;p;ihdsoS*e<oPQ7^t{U-nANgf=zmPKa
zTstq=L2!i@Io-Lm5!K9C7%>>M?+=*^0@c;F@OcN7ueo{ZaRwno9}`nx?@h$6v2a$4
z=GK06def`J_$Ex3Nw1eB{#8jdoin<at};2cN)h3u-<`94X}G)TOJmevH{tSEDvA|3
zb_CLjWrNgZD@y=n<r`M{2_n5QrO=U6WMup;$I|Fqr`Sr#I<a}&UEtJYR}--MlfnGf
zi1P9kboZ2=^ggXcg6E2;H2YythMFjY0^(Xgo~CEs;jHu5ud7A7J^Jr`Fy0aBDFuMT
zKu5KKrm-78!c?dfi7*`RA6F$~-pm$xE04zstvbBamv<LX7>ACxq>KckT8Unxtq+|m
zEl-8E51N>NEAWV{7AJkst!YP8=TIYv{|QxJ#a=Au2f^b#E(ixlU!ujI$HMSd++B(!
zyzD4+IFevZ%OiPb=VZFlfe)GdJ7cf(-B;o&SAi`5U~FvU*>LxZr+{>bTlL&kJr<gV
zJ|s3##Iq<q6AtdXjAPxxN}UtCPLq;W<tWpR;UHrij&Sfy=+8Xa<Kk&fm1X3S$G6$m
z13$e;a~!fG_Dzg)<-B@ePa+X3pv5yH#nINn0vfuGqpO-st0%T^Gm-~1`EZP<8)#%P
z2==Jq`o<gCJ-U=S4I1tz_KB{RF^g8q3FUE)UTA9h#1Ba-G*iOfd+~`Ean(GhA{!_g
z?;N{;IUtf2t@51T`dT?x-ZaJ@#9UgiDjR)ipiP;v<Rq&Au28F+d=9HRxN*yMdTA_^
zZyePG7K3T(XRQSDx-9RGnpaPf8c?7ZO4tamQ@E(ExNguJ1PC|JdN7G>Y!HZ$;goH;
z=Ea&Bd>w+F6wx3L_2XQy1vZB@J7Nb{H{tdt&V7-_fVigd(RG=qSi2D2lx<izN4dl}
zn<8AChw(;VL__QL^F#EmS^Xts@EE80q1QxKxBwMxd5LvRbV_r|<$3@!@w#~v9__z+
z`oG`1tz=6|a-vwp9*=$DQ=fe{c$d0<;d7b%$Zw$G_nY@X7_hDZS7o=5V{eM+-8W(S
zaR4C}OGK~B?!|P{)ase`k1%Q{d09=YI>5Z!M`!m7pgCb$oAnv_wN?!y*EkPbBPUy@
zqGBdOhvOb*ErJvZytIb(u>`FY9S9RH&-{~PjPX{<zW*dEPu&C{p=TCkNZqkVc1aTp
zF5$HY?x<m1=MQq&YOS{ss$+x#kB5&J-*+5KX^%SWLg)^6D#9lYj*Gg{ER*Td%q~bd
zW{QP!>8}E;oDW-x)Mv}KiCxjw*h;<LGJEvp!UGYGRRL;dswX<^Y(ii&*GOWU$h|KO
zj3Vk(0y^!F3Y*006s3#AG<Bq9<etH3Rd|I`S9c6sH&xJStkUqRwPKBDWi1g6e5zy)
z1qyJL5=RO#lieD2J<enp$m{M(YXMX?>tcNrMBgZRD#prk$XJ@e1CF2Nj;@WAH4%-C
zC_EhF!}SSZ>0=mp;yD%PWm)nS%28fXNd-x{d6F&~?y!!E-x_Nfn*aKzmDLw^Yfyp&
zyzk`8kn1@_^!ypNiEtem|A~|WVAIt83sOf?Y5fXOn`Z;2_xo`_mt|*|DoSUODmaos
zX}%YvsU;T@{courC2xL$@HFfm3D-3n^WBhFxHb7QRolJjB;L^rxj4hDZL1|jpC(Oz
z{sir%03PgWIu>JlpBP~-Qmt??`QT0Yx|`1AI+)K>gj0YXaeo(9KU?5_*Oc@+MZ$on
z?9t>_ePr(hP{~H`!Mb@db3aQZb2~D~FVE^u!%#C(LVaUKaw9FotDNPHqOJ<?6ryS$
z&1F#5SKf2%T*-XA{MID8)7olcse80_rhWdZi|k3#(zPNooo|saOfNBg$%ID>kxFT%
zF@#WdO3GuI$LpSUtD*R`Vh%@FS*dKQl(QqyT=tlmJhxkTq=uM6Puj*|$2u-8-l|^V
zLindQ3^9|lGQy??bn1~WwCMUr@2Utgc4nbdSl6gUD;47B%?-Y~qu7`lOuGFecc?rD
zgYujdY80vj6a*hg!8*CLQmoT=heao(5T;1qy?%6>XgLRn<1rXLYl2aLYg6#m_YBQt
zK|o`DDb;avW_J}9Z}L`t7&e825|jA`_mzkgS#u&!GQwVV3MMmb`O`(%$)lu;Oe0()
z&d0P2#3ar`BHn;<RG*<EE!7GY+1ysJ89HuJAtA699FR!Bhj|6^=bNKQhZ@~Rb_EOK
zled8Xs51gj5Zst_Fv-uUrggr_;^T}O(Zm15u&2{%ke_V26C}QUc@1rIyuDg01r{BZ
zAc0GJf~?i|58rUw0zxC*Boold*=d0D%hR%`bjJ6+9GoXU`G<p92!HeDOmr~#X_K_$
zJZcF7G-_|FyF1KK(qnHbR${m}vC~sZnB=}rdP`?3*3saRE#xgjLsWF?R!3&@#NF6>
zc<kbkhh<Fnbfo&ikRMB3wBjON_K34=UpC#t%#AGE8Eaa&3X65be4zH0iE;o4y_n0G
zy)zM!;7BBJyFjkNR(&!ndnsj^l6td%u*VXpaKBN%(OBoPbG%N%`K_ZUb2sE9d_d7E
zQbby*^ZID<ta;bd`Wy1x{YSq;xI~iWpxu3sp<TW`K5+r1OZQcy&26H;)Luk#id-AA
z%7))awBoCY8jbfFQL>@(4J&y$5w77sAU~ySgweKF$fC=4kq31aR+{j&(53C-)rZxC
z)gV@avNm7GLHB$Zlj_nrLSinYx0i8TmY=nQg^e1K=TRthNLTesBw0NRW7sg--{uMs
z5%iA=rJG=gY%&ZGUlC2(AxmPIx(43NBz_C)4$Uh=8bw%hw%?TL>vtFNa%LShtPW*3
zm+4Cs?3v`&9y+H@6h<T4R?4-ZaaDd=n(?58Xoc5K7PWb7#Qd^sJtHEyoF0Lxi`u|v
zzzAMc0hbCME~kfO#ozI1w<CUDyCGKuF%cJpRJ!o%3){|&Z8*E^>+JOH^XjI6eK+~p
zad->hxa6jmN3J?jN@AB<KYA77(ms6g8jpWqcPUvCPug?0>D>ew-LcO`WHx7WGEO2p
z4nDI|1>Y5XckHzv%r`vx_eqkiWTuRMqbdx86-!2cO1b|y5I!c?zj^49#MNm+SNK5#
z_QRGX20&iT^I&~#=T0Pxb+oF)v&SWyJ5x@2daq@yy59t$A|@9ZF)bznOwW&;IOchz
z3-lt?$)3-wvU1EAR6%03pA`wyb()gJB6QFjCz@WbyLM3Bla1rPIniOULxcJ!_FlCb
z&~ow>6c8W@q;MxdyZ5?p9SUvlb3BW5Uw00-eI1y5@**6}u$5I++Zd&8d^M?tu2A#*
z15K7Py=u1IhQ?Jc)orH4YA??UQM9wZoDMOiFA|1Fpx_5}HV`1QlMnPa7g9`tQwipE
zD0vi6jI~RPY0<=Yyw9OPK8a+s`be_=iD?cmdoQN<W86C*c<5IH=xV2Jl2a~=&*JEp
zWk`T2nIH8)&m*TesyARl4oHFteCI*&sx7FPlWRK^B+FM}`mpBO(1@IHOp~FpS!5+l
zS~A}#(iO&n+H(w8*|`cLPvUf`DuCe$$JC|!4RCC(t?`a(Iz8&-TX{<^KiEl!GKw@W
zE!$mI=HlA8fa#~ZJ!P`gUn92_8Y&s-TAs3FSgaSzcIOGkZr4%*Vd)+BB*`42WDpi=
z_-u?G95IA$k1GTx4+*%z)E%Xw%Ti*Wi)jv5gfQ?cTikmw<?5Zf8A?uFrbjr$P3llP
z3>_kS*I5YNkM=_?!p$G)c?nJQyS(9ucr4O-cMN?UthJi$l|Cl58#gI7d!P(P2HDOZ
zIAvATxr0Z$^9Ixoop`;(PL6h${OT&vUnKQ(-H)z=c0C??A^e>y8yl#XDu(VXz$MGe
zj-58@D*M5URk9m2{WgP#wbvCYy;Zf3^`iN*I-FMswh%R;*qda`;aMb<Vo0|LQO_0)
zCkcUe_)sFvxkryFS}o9+xfMJ{ySsp3d|{-1>PG)&6JsgW<MYoj72pm3o_*fuD=RY^
z(NAlm%YED|c=P(>Gaj*c>fLiT6S6>1C@c{?zNaa$TA>kO<EAwE@>yPTM;^=b>(Q(;
zm5OBMNzAautm*YhBxJV|4a>QTeW9pFqg<9d&jY(6HcIha*IC!X*qB;*&fauQX>YU3
zd|C6gO574jTxuj#vqR+B!J(NpRg%|7`Y5Z2T6L_J)iBg8dOaA)xI9ml@sLII_c9FR
zd}}x(B!xMJU~tg7B(Gbfc<E}Sl~lkaFW;h!zOd-Bh5KXHLe1YdjmBe=suRcu*KonX
zqr{y7kY@@tdWot!Hl}hMtKgWJncw$yJa#zocpp23ZY+z`#^0QC9aE!m5;@KWub|={
z)Nk_@wo7jm?XDPV(71~k*C=3mtOlIqi~C0(mF~v3a1EN=yY+<F1BNFQ^L8$S*lma8
zP9a|~K?VE*l~76=?_+84#~5>jRe}{%3RU>%3zc-i8iQJoqodZgHOKJP?6yx!Krih=
zLXg77^?X?l!YU|=1+p(rLDiu|Hu_|O4MI~uBii=uQT9fE5%K}=uyzT@$mf*ZiqWgJ
zy~`fhfS`@z(aFrll@M$nas8$4_s6wT))&`XRLyyVQrak8Vt4XfZ^5~asnUX-R~Yt^
z?x<d=7mS*)kw|g&&-UtG=i+W#L-tiQu4!$y;}?0iA|we^T>>b*RgKfuX~(2garGcq
zoo-}@E#LlVVdm$LfBktsW3EUPg10m&<hK1|FVUxIt2aJ*7%lXz_@1y0+Eh=B6q*t}
zw~~r3T)&ETztrfxA;5K|(VP;|42=1{d|T<X@ai?U)eod4a2hBDN9yOS(F!L-zAigc
z8D%DZ&ih!_E!|nrA|;h*K$Gips|Q%KI(NP%DXhnLwgq87R)gYdWr;|gB^!+yP;LpV
z+8gyB1M(o0$I3T=Xl3BuUXA*EMUgwuKX1XU{hmfR`MqkU>SptV>pE0=>#m4*U%F{^
zZQEa5j0+EacN}<m+3)i>gYKaU0m-QGXDirz5?{9|VircOxYKH|nxr8-IhH3aT;eJh
z>B{Q!WSg5ipxycb^&lO!Y>gcVbPrGJ*`}(XIY1bWjP*i1>#pdV0~wT4W!fM7BFta`
z4+I?)+$$*nxs{J_mR&26ppswj+!2T}OV1%)aR;SGM)LEDL`*#4N%WjEHJO&~{b1NG
zzyIl_6ji;lvzhRUQCgZ&iWO0B--Ck2G$R@?k38dz%=XAh4v~wxnKwvmjjq<a958|s
zHV!_46*V!#wY|xurDh(yp=wQ8*|Y-2Pp=2{izbW!_H>(S>a-k}p}VT!Y4)*;7|5>+
zGyYIK*(Z#HzO-GNX=%Ql_`B)r@AF>w!!Oi1aV#cTTd8@KKGjQSR_70pb`w#HfB#_6
zhoe&8YEePpSO6p#BojWnU&K~g(UPSK5`8Pm9G5-PakNh6h~pmx6*bsTBqVV`4!f^b
z7h-a(hO((qH;332JRD!k@PrZ@UW~T54{LY5vb=G3Bf9V?C$Y^zZr9#Qesa`$fsu%+
zAZc^26Hk6DpdEvDx=8)RblSW`(}i34$rpKKP%76=MdVkZ6({*w0Mp={!FE-_hwTxr
z+~JQ+XK#&}baFGvca_x4#5p_0|JF|A1d|pXeM=$xv426XRB_!(NUI2ik{mHv?{9#$
zQ@IaHP}yZI+f9nbB*xQa;4wk>;j&wo2opt)cT0px!L)r}apLJ*g61&2NPJYyO=k2v
z(%h0;{98yE=<Zky!qol5-;3w*{SA$9u-2vPARf*8HYeGlexXI$k0gmHwvn(WS1QV$
zgO6GA3U~u8Om!s&S1&)xY_hXjI#mTyl|ht^4Gl}Xn#%{u?aY0=MT$bL%bT&@SmpNp
z)(}B8Rb7+Id&OxIA!1!Ed;S)ArX+$fQ0b)(bzaJ}F4=~Ynk9a+y?q7_qs1s@1Pmxd
zP<;-tWUf8z*7s?k@6Q;BfB}|tlg1Um^`Cd5LAbiuF@Tt(86@VNqH=5@(D0agQO3#D
z-lz75A+rTu%tbkHjFw(f<T9Wa^6pw>5y%hTQ#&%UDe2iQ>476^nVGzvF$(lurpwNN
zv!Eay5007XYc3Ezm|dk_%V;Vc5X@{M&Al$3I)lsYW*XqQyXvOFtt0TFc@-fTpn>IE
zut*Hx;8+ovTdHRVi@B9s&N&S&M~MFZp#!JNN&awLLev{d<wJgjABYo&J$xP5DCO{y
zU59b<9bux>XA}G)M!!eD`CPvOg!UitJD&~xoNB`}@+rH%X?g&TtQSCkLZSQ1n~74k
z)oyw2z+Mdb#}e040UbTO4PCv9FmCdynyeylFDbG71`gegmNDNdLXH+KUosX=TfwRh
z%k|y52z9tC$tQUB;6c(}99jTdAkb6CBkLrx)7&D!*5=)sOfxfEpvm58YD1u|UMEV5
z$3sH+n(8zbd)X%gI>TAI;eI{IIClquDGQxT!X)OSTT6lVY7H}_)=<*FgOU2_8(=4f
z2OF<1)jXB^GamWxag;NgJq`PkeuLNF&LKOY-|X4%6zFep-YC#|Ez@0?(y@X-C9-W6
z5cH3VT=ZK#ZW#K~J&K#)mN^T9F<E1R>n4tpk8Wvgkq^1O8z@nhq;-T?CBY}Pd3b4{
zDK4qXs;+rkrX$kzTg5v%4m<bUjRBiPJhcIrQl2S)`10-|f<7MR>O6H-5sRKoLDkPB
zfGP;?HymqJnt)BUhKDbOjk!B;10m<9=ZI9uw@M$FUHJVbY4#Y!yBTWDANKL%L5jb)
zhI=su?Eu)&&?kW&K%A$3YdSlW5@Dqu&@+CgC=7eayRa%Y%XhVYmxGYQlx#fb5RY^e
z^o}|`g7c_>C;l3jl3A>8jthg0TE9m6Q5`&`FgtA1Y^}hZbECLKe!nX(9S2y}jNu#~
z&&^0z164={SC;Usz@(;&u=@mw{em(@LuY6%+-|~+Pz3#wyw+?j(Je76EM~|;C0!(S
zlP~0CprhAus7tBDD0GfH>mqUkzCdw)wUrbUSegC^e|>mY`@B<9jc1*2-e#LP*ts7H
z5vlzc7G|kKp;6TuW`3b@<wO}CYd%6e4<=86jXXE3^p&qGd-SEDSuB5(HpIS6xqU31
zoYY<L27vh&MmX`spVX!t$AX!bxD2~Gu8jgEF38iX8t7_*;M1-A53Huj`7|@0(4qVi
z!w1Ifbj-b1aZ9nKj~$RK+IE?!>49zM&HAWo!l`)7J`wJl$xUN@20#i|VwUP|XL23r
z9WxQ4fyE{g(AIS|Qu)OWt4;<20V1tXTCA_jJP>4K-%lQ@)3sEaDbURNBzu0gWi1(z
zW>B`ypQP<Sb*xjFJD)BD_AnnkHbYq|<@Y+)VdW&jQNow;PdO$RFWB=UN&UHf{p5}~
z7f1QSG6Kjg6S7f_-Z{CDIc}X0+maj@t268%+t=h{mkSSj-@U!ZUlJTW)(LOlB{SFk
zgvX4qKx}irUJ$Awl2Z~;gM%pFb@@5Ik*w4yyb3o_-|3G~V85D=m^CCXAa5KPQK8jY
zG<)}szR^>2j*y(ul6ByoTHDy1hRA^#YhXwc2#}%T4}Gi*0tc4#ZC72g3fM>9<DY){
z1}H#+-6jisc{T+0r{Mig7+=G4EfQfq4@C9Z1TAGw<M|wHHo^}hrM<}W**!jiu%i+F
zPa~ePf08<K>RsB2^u!Wuf)!aGFldiUCp=x9)Q`Xl^>(U}cw4PR4Oe;67WGU=<ZUC#
zOFj$Fy30p&V>Rs$piu>x#$k#}729ee(dz0hB!Xq3kN~=pIOnNNUzitAwd>IHfPz_u
zwW3Ev9?0KHDC7Z21eyI-Wg4@Tf#B2PiPS}$&}xz?mn^>NNu(S0mP?}OZcRL)>l%R>
z9x239u3-B2;k|1igqD<rn~74Yqu(A&4Gtp*L-uYeW3=xE&_hOo=`sURHe!8L*)QXA
z!_00l0}5}@nQqr`;^b*tizslnL<5I}URTQ?kOM1eo#HJTz!c(?c8d+?@(r?SzuZ|-
zt6wN<&VKAOx!TBpeS!50b*Worgz;A8&0c48K7LzX-zr)s)VMQs5_#0+QxWfK2i5}%
z&SX^7d8lgd3n-6=#<_v7S!~IdMWtP!gv_s$Qyml}-g;`~H`v8U4-{=gV1^&RX=Vfw
zy&5+tOKYhV=C3MkiWhPqlU_5R$xYF|NZmkJ^_u4tR|*##U$Z9N&8z$9vWoq(z3?7U
zlSMgb)XCJI*R;>mg=tIaHNNHVVriAsn3HD7ddR>mkU0{9NLUr+vs~-%Glfo{rbaj~
zus&$ruIEY=wSl5^RTSw3GOuyR1LXA>$YR5kVuPB0fxJGvnyP<<wiaU{N=Ky*WX$<b
z1qd1Xb)cGMAP%xC{73MW%fqIzm$1h|(xSx24>yiol43!;N6VFpvC=e0=|H&{AzYG;
z&RIxnZ11F~PCh4E@hLL;(iW1U;%-Tg_Nhz=B4}KY*Y6uu3GKEV@R);3)-~6m-Sd}g
zQP(zW^gAfD+C8x9;~{}lmaC^}=1_DHzx8_kq?dQqj+m>RUW)2z4ax59X@3>RizGb~
zeHfeL6WPN_ud1Zs!#zYTFg+wSt_$kUNk+n-hA5vb)myS0;X9W=-95Hv_O{zZYttiS
z_?s_z0fO5!rL$$~Zs=h`le6W1nLlr4u9byQ*>c~9%uO$Rt}C%CS}%`dp;q`Cnn}A_
z8mgulJAm<v&y-r0=@_(;ByAw`SWomKP^M3E`35*$>7f|+FHoE~P^b>%X%vZsz#6j&
zBbVMU#Q>>#80d+Gedm;sxXqSg{hrO{Fze7;zYL%jD01X$5?y7h4-B2v8x(2QJ0VaO
zqhl8YsL$X+N$+VF`xPex(5yCfRGGH{G!5o<bRM?to$@v1Tt=qK7@?59)|A=4>mupY
ze#TM}<GdPgvlftvZ&?`!&X`~GXi}XoQMvB%*p$0Z`TFiCzEfOg^8%mDR?bOxUJ)X&
z*$L+H1#S$(phPF}>3NUwVTUUo89+!A=DmN2R+RybWbaQgI3K(!?-6@qFtB(5=|?(H
z23eL9In^$y)VR->7)gVwx4-VU4s7bJWM1s8GwcucTeVwzep4l3Z&L_gw)!%ukV|)$
zPR7gbQ3HZlbB(Qc@{uN;Y|W)rg&M=i&o7KaVrYiuG&D3Jo6sGYuQzM>JQ;-=i&iP?
zK(G0QGB%9kL_@PS=T}8;(Sqx&n3XOFw--r%e*cz+*5L9D{%gSfx%5}`S^N#&;a-OP
zC!vKoc}Tn9U_3y#WoL=EAl_OyE9yTdEnbbzFYXOrQ<=GA_T6>7vmxX^iui)xi0A4L
zADbl0VZWu&Hw@izJ8w%V|J%r(?n(zj<Kt%&PR$bxt;EjF)-JRdST0g|ovg|*J<()+
zzQ<^*y@;&HB|1azD3-w_KWe0{Hb}@}sV<JQRA_HC_<jxvpRsxZvcxIOFw;0TOd6=A
zX<2oXG$?b99?7yHVvOk!Qj{HBsl{5l?e6M5IN0c>`vk{B>j?9kW-H`NjuDEej}ahY
zqjuCWgI(<{=5hJGej@a|hM2_4(dhZ0eVGmUsjkAU(|W?elvj~3j|dztvBQW#3R{k$
z;J0NP>+xGg$N1H`wCKRSN)L%Jn!y^eCxfPWtmh19LcGn^QVPBbduh@%LP|tcr#kXz
z`kVEiPhYrLKAELhW|R{4Zp?87?Mj#Z*0-4%a2oCl`lQ`bnZJ~LuoV7*Q&4tK3b@vB
zdhMj42+l58*K7F|KB8k7?^}=OrZ*=LB4%;)lvbl_)byF{C0%SmTc8qE-ea6S1;L|)
zUfBwEZ)=m5w#TWHg}nrUsot3#OrGEcTkf7K@EUV=dd8iF8j-(x0i1DC|Eov)zTq5O
z8ci&0L=hx?`e^*5mcwy}zG6#S)4R2{b%W)2rV?ATWPBX$*A3KaKxLnmgjb`uTEJfr
zvIMy&Kw@@^ZEmx9h2BrC;-W&`b0S1zm6>-;A`Da2D~nvf1Xbq2Lqy<|ERmvH>Pcy)
z?R9qq0|)|oCpoCxD9)3t%x6!nTpA&CykwzyT1AMIZL;$#Q$e@xv+rH3cd!<7KyRVR
z7e0h67W*fIq}~IO<5AtU$)X~Q@$iBUVQ62wqS**5OOKM(V)Ua<rsnNcSadq~8g;7G
zs1zCNxek-$v{Ui~E-)GVb%E(QDS+&*)aPsK7f{_1im$~ywfAFm4EUt9$MdeUF060w
zPAk-SR{e!(9(#q$hPj2?$~<?X?6eB1Rt;@Jzx0wdG_<XBK4|k>&<cBSf8tEy<PVPc
z^B<*f@mxiBtZPQb-eGBZ-JvW|0@gRsxtDiuxc-9z(Jj|+WOuzdwRN{1?0(iQU5@}~
z&iS*@H5`i4BQ8-K98bWu;V18#+r7y0tpgX58@&{WYCR>0v=OG9G3<I%t1u6gsnNFp
zu_$XD2pixx8BY}L#fI)<pGDdc6uCyd={?eQA8gtH7tDf3Ps`M^GheY_Umai%>(3ug
z)t;uE^A}3m4>g>NWL)=%^;elc#V#v=P8dVJ`K40lI`u@P+Q%o$3M@#2jKnU0g$GAC
zwWO8s(uxBCeZw)0!H&K;wx66ZRn*ixNjH9b>JpCtmK!H;VLEC^RCI~a*m3J=YR}U>
zyfvr+E{*~&$Xv_gT%jVWN8s-N&AnE-gp<58<&f4UNi?-S0Yfix$ap4H1<gfax5KNP
z-;G=YDkMDh=|;IeIsl3x(y4L$5AC6~kEv<lMbs{rsN}_TG`$gv_3>lB-ng_77`9Tx
zVUuEyki@@TSG1AYl6Twgv-U|ue&bpJ(k`M+y)~&^dhlgSgyTi*vGz)>xetR$9r<XM
zY9=3Glt7_#jF?svubd2^7{~Z{V~P=|1&FCU-@s%i-)CpQCRuyab5T$}6d5si)w!4R
zv;-*I+ef=Li(^<>ORl^$Hn|wwWLwNS9F2KRV$ABTh<Hn^O2A~;C}p<wo?r`o<sw=Z
zM4_NmqG<swOeKszHA<ZB*lNS0Y=&FKZ(cZ+8lk5ba%jE6AE!Jw(zaE(|1fhLY!(0X
z%Kr_EfADx;22L_vtWG3>`O1wd2U$CanWJ^Mm!qIbK-D@uP&Q9LnafC@dFRa#nk$#%
zv<!d%0;ZLh`v7TAVpC|Bx?94#2^Zm*j9-|o#|CHUd5IsR&ckKNEKks*nZ%%v?lPht
zbN>6N+C$Zndl1(=Y7W;#pkVW~C^GgS@k>k6C&RH8^iGX_-)gZ$ME4H0S`sc~eAY!w
z{hLO!sMMUvvBt;F_>D^3+I%yCidhrQliCDHqDZ=ig@>~W9&h+jWi93jSpoccJ8s_`
z(VwOZyi8gG1W54x+x}2PL%;Li`xx0yi<EgTi7LxkA1v(GwpbFJ!fsq?uVVdpyyh>M
z`(KZZT>R9Jt9?4)_|zuw=5UCXONlR7=c^<BF@Bbj_lF`Qnu(CV<s^4X(~+d5OWo!m
zlOp$2oSoE9_1^9K20lL4n`&4gD%y3!w?bobw`1}jbzVM@mS(S4XfgOmBL7N8bh|3R
zc{uU(ieg_rPVJM#ky8JfXTWIqhgDLh*)!L4y+u{wW_jH#l-~mV_xcA4CAl-M{OxL{
zHUQU_a*9@8rpKjU$1W#$n@^;Xzw4r$P+AwIVIIWW{R=R0+ACkR2Ss}AjD5WNQo$el
ztV3K0{`YNT@<%V8wcKNo@91zYQ6<~!C^Vu@xbx6hb2(9_U@Z7Sk@?pv+~tI}*8Mjx
z50?D5>b~zcfFuW}B(IpZA})KY7<`}|g-FC&{ati|)SiCN1+VMgoxly9qkKWTD@mQd
zah>b5%I+r-rT>0sX6rL_eNItgFj{4ZrvDy^GTiL>451dg>5=jhDyEDU^xyvvv^WON
zvxb^Ce%Sv`bbO^99Y%uEm-(+1QrB@4A44@fB;>|%1z~)+OYaA!?4^!jrR$3R=l+-F
zFaG7S1bupWnC*_X#M-`vgl=(e#C%xjbSRHbA$LiO9N(1?n}WjqcgT-w6wS02<AZ-+
zF8bp-x}naP6Yb5#r+c@WUyl4;Y?<xT6t3PvFtI)H<Z#wo)>LkkXUm@ek2#$EY`xn*
zRLGyITD;=IM-!zuuGdi1`2aqA)Y1yNOG($Yf?h}22y77!GW=wocSWd5BFXkyx2*AA
z|7B|9<SRXY;mT(bxH)RP1K(6a&4!!?S@KH?eB~Pt0^6R2G+OGUE3f%260Yo-ldI*Y
z*t*rKSy2OHpgnk)CzfF1H0hf8Z31LMScaPZ&<Bc!j3Kwkxp>wNYxKTME>A%0AZ@$k
zi#p~dSnT6Dd2;8T%E98Vm?duOzJts*`w%UX?nN;fL;J;7&R~viR)kxl`dm6WubPJn
zZ+6L}Ii?U<Zc<B$=r`qC8u%W1qX}XdqeDh+YIwEu+&(!ili@LC_TKJ;FF+t?DoQVJ
zl_SP(tFZFWF?-@mjh9PyA$cCgoYA>}u&-GE(O1~bzG8T!DK|FD&dEfVhM9R!r>dc<
zyM+?G!?Q+4vA068Z+4{eC7EpRC^r^nrca@4J>&4LydUU%JH~^3+g8i7X6a#wU(29q
zIRf7=;$GV=q?S1lUIWJed~I4=HEQs!R~X&@JW38C53?9KV6%q1su|KPC{37`Pg%Zc
zuh<;ZC`&OgoYH287sCS;Eh*Dq7FETp9e!wYA}6yPv2++oo769I>Il)DJR0?;&$6oj
z0*S)j5Q#)3PDe4hK^7UebF=@;3IFz2F5`=R+qd0`wvxA2ooT|3j|DW=yw24^ZfmbK
zgxUtgH`9+Z)LQTBgghy6$jh+0)qjxbUn8~a&rkBVEc96<DW-V9L`ys(>w;-$dx+~?
zMN_^%%>OK1e~yvd)T}P+I=fn<N;PLOHoB>lt)cLx*jx3Q-ptDFdonz|0v0!uv_6S{
zf%1KL`ny$ac;VOn;u6ZDa9Fj@oFzGjWfZeYd2<|jySd|Aa_uSS$6I&Ec|(c0*$1*E
z-GZoFptRboFPlO^b0rk%s0TW0J#qA$BO!xTW|V0qGG78kPQ3D7BEGcldE~W`@nF+7
z1wu@@yG|+LX5Ok{z^%qBNz<OX9E=ZJCr3_%%QxIk3Bj5>?-CL^H8j#F&ZuoxPyfMF
z<BPR#qtcvJh;_)b)?{rm^EH*!nZ-DF+20O)+c-nsk;eiN6yDIV8)N)|?y}_QsXlDT
zR>gI4XQ;&iWklTeQIsgQs%<`T_;G-=kO>m2LUU61629moH{mMsw%0I(U>;8HVR0m7
zs2*ZFs>(vptvcp*qB?gY#6G#U5}{GQII=y_mdMt-`gwzA%?aLMbE(_8#=crA))yQy
zwjU_vT0i`f*EtSeQQi~KE2v;9Te9S>+1RuLHc>KP%Y!a|S48o*GWc6`W_$Q#V<Z=m
z*(g1T&lRbYJ^#vHU*t!00;z?HRYhO@-PXOFwn{!r#zJ3p6Vp-5mDfF}H)>xpl|?=;
zH4FY_HuMeRY-WkpmI!>39zz|Hr}P)oWOOqXjCELZV&D5t%nMHP-CF&fnYn_0i4f`U
zm0hAKNlP4HjsdK0eC(b{uQeyTDsMl&_fRoKvTXEQ-f9iQ*k|~8<r7d^oi!61+1pI*
z^S41RLB91R<GNq0cQ}?*exJ-jtq+dqeX-`{P_@{kyxF}<yr8|TW2t-MBr+{PKBL%r
zkDDN2v)THa*rPdJ@@}3hB(jvn^S;nW_;-;@W=Zq`BOwPdb@P^QyGyh-lWmPjhV}iI
zX^~??18}L9_uIK2p);x=CwIN><tv!X!E=B}DhjiH7CDx^T?}8JvKZkQlOe&x&0Qtm
zBR?VtaRPLyM&qJHQg@u-2imLnPgfD#dV|Fo3AX0tOGd6oP8AL9(j4;f0-!BC!R$y%
zNdAToMcW22r}5ky`oC1o-^X%>XK$YOsV!H;ylR^UyX^URqzn~%q;xSkZ?yB#lIv0B
zt`Zg?EHr7>A*eOcsK+3`D>8#3cNZpU$b^F@{ETS4+$D=ko3>O7sxZjU3%R!M3gNJH
zEz;|dha`U=;5ZKL;knXJk}@e1VdVk!X*UBVNNc7F$Y(PAL<{oDgN;&T!oopfq)Uk?
z1T|Yp%jA126Bb%l2gk<9pfw?ts);OR1$&udAjizYqciz5%qdceo?)-9bUsJSGAc6h
zV-Kt~)R?cEj0NC7v+`AyuelK*i20lYPnWy`-d=rK;dt6hXXRF}J1+xdqUj(PFePw-
z1Y^jgOg$JB+|uqP?d0APx_32yjrHkF4{vvuKKCT)<o(dqJ=u@4JYCWXcmx9xAW@op
ze+}wYc;dG^rk1Vn&(Ud42``Po*-*8F8G&1NT0ow9n@>&t|Frj=VNGRS+hZF<1&jzt
zR}c{Cy@Q3KbOfo9-h1zmVHBhVq}NcTcj+}Ky@VnqKnzh@=tv1IK;AeG%sAicT+jFY
z{T_bh;^buSv-Z8#y4PA~?*;KN->WrOPN`BPRUar47U>A;zjnVG>PYkb?fm~!N&Z}o
zfDopk>^z?r?W`b$6;X!e?7~%tg%mm4a;O49AKUx5{;iZ9r<LGC$2jDknmw<AJ1QSL
z%${U^cOKE8F0$9A?Hq~(U1#T72}P2Ua#ru;=gRDhYoBdTP1ZHM`;=UX=-JGiWC0>F
z#IjO%g}vxlm8enYlX<zY@~k;Nv91z`+fdE%JAdrM>ttk^QZl8qI=Qy%8Qb|}cNlq}
z)5L9HGGHQa0+C1XN($y*GM>~SNkwflL4?$^rYYVeC2EWk14h6@m%jA&A2RrT5;RU>
zx;7~?CFrxgs|L2=U(UQpN<ue$?pO+ZEU;jcT3n%w*FaLV9dop;PS#Zxt0ui!6Do6S
zhnWxaeY$G*D9wx0F9^oVHc1+_$-~!17_uaZvb9#$lp0`H_+{GC66CCbcpu;n>*p+-
za3jGtwI?j8G<cw$UP46(7P9nx&baJV@nFSnJlLBJvsJCek?^`Y+rv_Eq}-*_W($9u
zDA=&VW>=lar#Rsw-L3AThEF+8uR6a_p@#MwgUSG$yWR^yY<w%#p0E&6S9XnqhEf93
zXa1B^bU?crwCUGRoxWZu_3?pJ581t}0`(hxFPsJ(X)MOONtRzh?PaIQeEx;MJvhD`
z>k{U293CHq&-HF!O{H)OO|eEg2z=-v|B|~agc<n|*KiWtnVsp~$g-BB3OO!0rU4t5
zg}+|jb$N`9tDGVowuA>aI2YPVQ9sV~xll~fSo?XU=K6hH_KH3plr~FhN@2n3nP(gw
zzlpwaDg9YmtNYX{lreca$Blkin{L89{Wk{(D*v1c)i(P_BrUAM(r~GqO&cOM1wNy`
z3seRbfe-5C5^z-7gPc?Tn)fFyEF-s0X!~3SB0e`alv}tnZBLX$x(KZ1t{HkMYq8r8
z*Qw4DgAE_`-hEMPV?@aQ{@v@+38jNCTP7v@D&Y66K^ATYSBO&7-YPJ9B3<2pq0I&2
zGHoO@eifVs*_&KoVsO6kpq39Qg>Rc{eW^i~Q%aDRSs2s()!a?v8isA{F!Y0LgY@e<
zlSLGkeetR`KFahZ6?at~3`lmHs#JRF9?r_)>pGLhgLXaxQnBCiFIP(tHqlJY+FM%X
z6Dz^sj<h{|PQEmXgL>t6KaLYVy(q6HHF~Yr%Y_x*F9IRqXt-?PV~&X<UF4P;QRBd*
zF}!^M*3z=p&_C22Z|C~OhRVW>ZFz3kU&QrY5R;o}ShFmo$f(TV%jUpvgNKcFieW*A
zjq+DdwLZ|OJeJO+EJkyc!dP%T#WqLUDhO>FqFy|`;Ps-+cA0rf`6EisU)hJqUxjM(
z;ZBOXb-P$4<Eh#F&Z++_BY_#NABCx9hbK9G164NJ=W2)VyP3zx%CXF=bP<mqBU@D9
z8zbe|gjI)=_r`m{sRfxJMi|?!hcdR&N~2zzZEd>Ju;N4l>|j}Z**3AH&tSCNr2PVB
zp0hO4(ldOV1Fl{C#d=~h?c=??31Rn<{W{sNlB6qL>rBEg+Tvf%Q@$WXnOy&*&lYK|
zcX2QPzhU6J>B=a$B)j#}QvKykk@^u=Jqt^0bA$hzPQ!f3Mr#K;a?7SdBcaA$`}SPt
zt-+sRP4Cw*<-jl(Nro9|=vy}uxlw|mv>~#}>J-dNGi67nO54iE?T;`CONZ#kZj;Xq
zC{~G7pS4Zjim*7SraOXOx%Qc-1Q#beN<FGqeRB2Eu8-=9%ILn1CAOyFBvSPSN7@Cj
zuiqn}Sy_vMUJJ|UA=3*`S!bcDpCg8%!T6~(h)`q71)*<1Of;(V=w<VJ5^|rH_~z&U
zU4MNzG|NdQ-#@MNHL1qH)oRMfvCWgI{7w;6He`Y;Z`DUeqh6m)s$>&AB&(oWAsze(
z8<-q<u)OB|YLHx{ce25b1)TlZj(7-T^T-WvGYDyUEiix=*u>5U8RJK7@Z05Ho)wg7
zF>Ne-@mF0MHSOY_+*Kw=iRed;XwI9A^}%K0RKnIzgQ1`KiHw1Zl9{vh1|}mzb<-Iu
z?ozFx{j)ZQb&Z|+RDmaw)_bgGQqpP^F1;#lup4T|BGr#Uh?6(t68W2jo#s^Tsl%U0
zZpC?KrIRM6ZFZcbIdlxzp?02m;hNM_n%^2}l33a1Y?6ZusMI>v(aYeeHw8s;UE10=
zMK}b{Vn-MwcQXBx6uHbMJ`v%pNug?c?kq_l2Bo_x1715%EjT_OX){sd&4K4*UYRkq
zwg-KW-2yco!Y8;zG%OsVSHa5V`wh>OXL@9&2JA1^rW|)=YyaNQ%0%oNXO9EsgCXfm
zR5#f`VH1`PZ`#c8E&#uvA2j2NJ!JT6sE@*!@=QJIH7QO?H+O9VB?sUWdb0*e_$E)o
zwlB>FKg$Q0Dfm_x04KP9O&j*qHK~woj`QB1$`1-T-pU<??cnn4=!QH^KL56x?D|_L
zM?k}-?#@o@?s196L4csJ=xDdU2sx+8aWbi-z9Vv=qf_{LF}q8>PFjpv+In{~Y(=*c
zLK=CT&>~7(!lfG0q*V~*mQtC(<&S789dl`_OOR)3*5DmZ+k$)bM2u@#m$M{o{fdpY
z$tzWoI9$wTt>4OjW&jfLtNfP2;eYd>F``<=MCk(9t2ijML%ZkWPfv*Y4XNi;*EvS@
z<u2LCBrj<Vuy_JB5{B&BDYl0K+in<t=s2M(SHZ_Z{x3Nl-Wr9l&Bwy4hO?;}$*r2y
zqwE3&Cx!R=q%#HGNu~*@;AGZs3d-;cs}n0MA$XP;26N#LJo17&ve}sf*u&g9>~C=8
zO^^&-<zIz8Di5C{*!_HBQV%S0nSQ8ZYy-CA+YFW5wTV@am*sya#Q%QF{6z&`hv!uq
zwl)l90_NRxaem;yOEr;h)yP=;O5K}mgEF?hvMcN0d=kw`|M>13ipGg#wi$iZp2YlH
z7(%DGX6uD7LMQQ;jiKfDYLB$3TO>w3EnW;V=TD?;I*Q1$9rCy2R!BDSR8D6{zLyR@
z59@69Zh{IHo^woa--7dsiM~J_^Nj8`H|UcP-uGA(UMB%_SdW$J><i=xjFyv^iDW)r
zDCUG$I}b2f<n_z*iL+!66y0~*R@s<-F4c!li4dkACT>e#R*y3=6?39Nmnbt-jE;qR
zKDv8+H^tVbv`TsGiWMA}+sEwiN!MU)716{TauD5M^nOn8MGe|=*m*GtNpaZ1r()2p
zen_i93OssSh9%)x>9=W#k$0%nnV^x1o6hZHGcW)hJw_p_hHN&z@{ov_tPvQuROW3E
zW9&?}{opsOJ|5TCq)o=JBP#9uoyVpyS_vb#rqVdQCm$e8FE`&(#Kb0SsdMnCbZxLm
zC@nr^a`k$l$2+;4^Q*T&0gJ$!RGxaF>N*>0>}2uV!LK3Zcz-d=`t^>){<qiZCkDew
z>kWCL$|TW0E0~SSwyuv%sLXm$39>z5GAj|3RGuX<TXk)!SsL{;ptP%0@-Mqug91;6
z^3t#E;Z4x*Of}ME)LrpZY*~Wl%d2#+Ed*Mgy;X7?S!potYQCebV_L6CH&%YnnacL;
z713mMd|GU)bmbuc#u=8+rfDX%qn8zmq)p<$q-H!TauJMSO6qQi^{x^U6>NFTNY(ks
zP0iut<OV))#5_aRm21D``Q+|H=?A45?!2KPPuAEo8Vz73v--vl7!A@HTxEn{<0WgE
z<nA#brVju208RwK{Z-~gEdTV){K^hee#z2J{1Li&%Hn0imS*2j^aZnWMYLWrYpn&~
zHmg=$-2Zaw8e=?>VHs-jWAuw|@$ne}7T507=U9FIj~}lM_C%%m^c?eJ--8cM?kde(
z21Rn#I}lM>N(P&k#U}Y?qQq;RXbAhC<X8zMNAUuG@ALjJX}1<bnoeu4Zu%aBWOU^h
zYzrqphM&-{^Ht)PyHq7il-EXgg{Fiv>6ck%UFDB-xDkJ<>KJmks0vp|N=b^XRB`89
z2_pz1VaYTJbhs=uy}!03*rqwm3mR@9b?e7jJ>?T^aovhsf3wp;cz1KZU`Md1c8(4i
zzcsfa1oGKxlM#c1x5?cGNA5sD_YN%XiLgN0W0a?&-mcg@dqWF%5xRSMse!ErI$dsf
zxD|cu(fqDpwq|3ou_ZzwCQ@8!Cgh-?j(N+nH>MQ3<c3(V@>Y?zSl&#nF~xo^vf&TO
zE7QQ)${B`VTD5pjZ`ouL9yxLuX)=6SD}?8Np~5ELj(Rm2=J5M=jp<_A*c#TxS9R7D
z?8hH;ayjoRVE$<dw!V{zs?0i8ef7fpbeRz$X`c1P6Y~O_`uRIAj?-j!TVBDPg%W^1
zkMx?<XCD!XaWz66AGDo#CUUsA8p*gb%?z&tgw@(kIz8j@?oVx=siX?(F!9>HB*I!X
zqx26{I$qH@vw$$Rp(lj*yH?C5sy-+fNi~&zyaj7<@2=lf$!EV>lAt?Ajrf@K1@SA@
z`Yr9!+754F3m+F_Aqt(<3?uzr%awUa524|jFI&L6tnP;DYE$CImzAoNYuiPt&_Rl+
z>-n`pu3z5q1Jw+X)8ga`ss$Y`8RYQ1)}=_jY6(!=e2i{|S>3x27)R)CUr-;zVfn=X
zMn0ny`r&To;t4L>>@@Z9(zISrkex<b)~7Ocv3Ni0P(R&ne!ZU9{4&UDjjm*T(-$%B
zM91Id_bHseQIKma4x0r{$y;p6Uqp0HCN(E`V>}((@8p$DcDb2^@rxMs+o$dNfgt9t
zO}VIg6Ytc-eSaYIrHy|$XCA3(Y8_uw>eIxhEKg&-p0$`9KL>iliKYeva>8c2LUG*y
zWFXA{Ml+L!nCp6G@5iDR3a%m$YSW@Fq7w5}#I)GCd*_3xg8MLFqu=&Lg&gD#T4@<a
zwgR#=ShHEgWg@G1kv@lGObFS$#!Zvmr_cJrFm{MkX+)!2nSrVI)4{!L3RBN%dKST0
znCbKPQmp$SlwFpvvX2;<lVM6{Vvml4IN}j;47P+%yf1<Ysr?Q-mRGxCEAGvk+@4J+
z!GK1UXKeQ}yvP%xZOZpS*Uc`{OYqk-zqws?CzIt(yGi9QW>ZU14lPRv_4RvWxD;rG
z^>S>_=3{A6Pj)XkqmNI;cp%`eXpfJ_OM+a@sZSijY<`cl*Qu|wk<S6`-xHvzL0_RM
zE&{<VVI^sAY?RELS_^FUJZK!YUNuFh^5fsqm3aUvoy@&Ydv>nRsKy^B6IUZY$o+K=
z8U7s16oGc*_)c*m>@Oo=fz`b|F3Sm2QQElY1CzJ&Bn{btNRT;{6a_TN4Px?`kWk*U
zd4;9#L0N2!_!om3Rg4_K&#Bz?+4$4O?j3ybn(FLjqJoYKYnV_FQJ488Rvi0H&*&2=
z2vS-V1j<#j%X&xrb&@Q9>VtZr19)4A-$Q~5t-uTQj}Qow?(9i8aed%1>HWS(EAz!u
zL_NE|-qEWW4I~Dq*~-vLM0ANDwl^86<ZLFYr=OTqF)cIO`#jdPueckb#h-Q9v&%Du
z)~ZSWI-Be$leAJ6KUu0#sVh#91uXLOlZRN8)ape)*ATf|`0n?Hwa0d#qLpzpVdml2
zWqVyCr|6*L4>Q(a1M5#2YWdo~hxwIgHUf)i^?|Z^v&YqHo!1ovD-6V+KU<{`-wjK*
zE#NoeYc!~)XA>bQYkzOEnXGPmSs~Ppy`lt9wV9Nic2XP;cSn7y5<CXPOxA$SYU_$h
zg2J9V%|Sknr45^UF_FY@a-h^!=tODbIu%=;MAiGK_k%02-H~V>mW5mzqzb7%P$_8B
zRVPM^Mu<|OK55=HI-GP{pkOz1UMNAJF^J`ap5|DlcvfdiQIt}a)UGMhY)vJGct<$?
ztxZ?W-PC?TCwO3xuHhJHdJjUtZ*(UypA_DZc`?oPyn<s&Ho}(O|J{SvB4Y!pQX+0<
zrLkks;5_tt{N#L)xnN~)Q1u4|tG(VvxJIP5&aBp)Agq|MB_0iX!zV84$Dlo2Mmqe8
z1luOllhwmjDNr!4ja0ymL@zNmr@a_p-hAsrZJk%y&OS6I_VKyQ$<nH8tq0BC;J9@9
z-eV?-tC6KPTM=Yfc(p}doGXN`4KpwzfE9Vk>wZ3d0wwB>6w(+}GRrhjk%Dt^V_(vq
zm59E@eay+fab3lLcdba*szeO%v7n*G`3Uy36Qw1p=k%-JcHL{pwMy2l?2RoI8gj)i
z8uN_LyGgW4$$4GJ>LI0gd3sB)tm|KmwAQ{UeopYLlmxq*A>^gjdwmgXX(Du0OZP*1
zx&5BwBoTkLXPfGhwvFFF{M;rHn1hYhGGsTtnVxgE;9^`X*_3CDynFsL#5!O%d+$U1
z^LMrP$*qCkxxgd)oaI`MF~e5~G`>paRpKMvCTUZ`XY5mF-Jm%EMv~*p9w%z)8cyNk
zP`T_WrXwnyd>lUXyT@NDznq};Q5(@hW9<O%Z3*f-_9j`=uO5qHHkT&Kz!OTfF4#B+
zj5GZ6x4@23cr9w}o#*>lJ-5Nf#5#5BcJ@H>B~mYF6ebp$uxkeW#0~{|X)F)3^`X!~
zmqylX;Kqu*N>_7%IkbA=PwS1|$8K15k()dlFQpdFt3)0o6%mSGU==ctjIW#R{MBT@
z@lhnHg3ftm(M-7!s!y@Z`<{_;0=EYnr(KYwug$wF=4h10ovI>TA4RrH=+(TU(jwJ)
z_f%-S)1$A!T=aR5Lg~Jc4sN`b`L*_=Hw$t5k3&S0Z<`)t;}iK~+4-wrn|<<vd#<T%
zJ=A6p$K2tea5knr?!GTe`tmi?Lm5XG=`h?$SHd<L<NRJ|PDsK2%>8Eo&vi+*zdkKV
z6EWbAB>z}_s7TBH^gfYu$1Udbm%|%>W6oQ=MIM0_zV$7g!5whPeErB@2e;Msv35C$
zu(-A0%mZIe#BTzgmmqnU4xq6`X0oXX*%KgMCFH$gb8Su*+LLvankm#x+pcwuwb0;x
zrodhQVwMVDo(ql3w=snen1|l0y%vus?_KM}nz!Ih4%L^#@Xwn4S%Vkz7RRF;!W;C{
z-1i=X6$B&;HgSkX3T~ak!v~*{E?g83IX5!0TIbC)q*d>9l96@fQ=>>8z!S+_#|9m;
zJCSww&tI4+{hTZfTgcQIcbmQHn_&-32WEW*XtYy=T)BUq!CIB>cOYHu9*(}@5pt_v
zaj)MMRhov{#E!TrLkdOFS`v>VN1=l}fXL8$81Mo|@1~lV@XRDY%b~(zMXP3auv7l>
zHD{7#_~Z=ro=I03Lyfz$yq_;y#S30aG}MMb_nc8$h;o~cb6Nqo^nhM@+Gt(Rna*bl
ztdg?S-^5C{FJ*e5MQxiCId9NKDSw_Ky~W1Aly=er0cKhA#UGAka+m4GD`&Eb@Tl07
z(vx+$@r`;MXRWPnWo~0z|5SA?&Ks8e1hup?lOEL?gX?RDh#AYItt;NexSB6$E*`jf
zz#b5~*6!Nlc59I9PCWb75OF!?;eMt?4MfTYKAMm7T)1K1U?b?!fD;~i)reg2flRY3
zM1vN1jf|@y!d)+lR_6}Z<rhOpM4gb+pAI&r;C9S9#I7Qc%hwaJ<q4oY;^cO4IKFSm
z_SGFxRAaTvh!1+hkmP%-67G=mNpEk=_DaY3KL4XV$z5)~gv5`N^B9lj_y$YH2fzIi
zz;n4B#6N}JWO21W+=PoJK(@DdfJ01Y%|MuS4A<_G*k9@t9IB-pbb%aJBv2gM2G<xx
z(`?<u&PiD_(OW`B9k^)VsoQrNMy#1JFvV;2{4*)XaL}@Y0cDxa*8BOrxz6+RQy*HI
zNZQ2bT%aFB;t?7k5XT-cThyOsBcQ|!^y~4lmOQ&&#DQ7hO9OH{kbxzAE^Cb10$Y8*
zC3B(aDqz#6dV;S8&Gx!0Voy%oyoGowfxX;==3wnk#J~yW+2LfthQsWxus^zL*lfA$
zlggESXrve>H+!~XSG@tUej#3TY5F%dC3ehF1Y<c(Kp4tR+rI_|Vu^eU-0wtgZ?E~t
zI&TRtb&`4<**A%sXrMjx&tLRc|NQ$TEw0U>#`+FkSbsKvF{~!sA$_iisDdS&{wqV~
zd?Wo%@xZQ+czGpqeQyJZ>EJ8Q`k3Wok$>7~MeXvn!{2uAl}fCNfTV~g%!6NU8Ljy~
zVe4~_fXyn9#5x4;DmbIB=Okq~zo1E(O>O6m5JA_@Lrb_1@a5Dv06n;_pd4yFRXvs8
zEC>yN?p*hnetpC|^-%sKoPFmPP?|~oM%ly83W3A$Sqbh4EMGdZ;lQ1LDuqpp-<{;e
zvHq7}-^l0CvMY^RRZA95HG3FFm7_PoEOIooPTJCy;-XZ<;C*kquKJ4OFkQbKL8;1T
zEY|P6c0+lH@$Cfdc3|yU-mJ%~qkFYZJ~U95sl|7?ou3e%9}g=<<<c=xU9^X9DcXAh
zEFPlB<DfL(G82K6%wd`b_>BO!wVGDQrnGEGBgeeau#{7woeA{&2X^Liq&u>i9@|i_
zk4zGPG}CSC7Ui>>o1eBBG3>oG6`S!c^|e!ii*3s#c@h}i%{6-JnPVq>w56AgQ=!H@
zEsQW3An1DKVWlX#F*eJ1Qa`L;C`ju6PDN7uIhRzS)**uAUKI=;OA=L<<X{2y!;4Ht
z89L@3`G?42e)G6_vzDzYE}8hA0B3edY{u3QQ>p_>c%ddZUAjME(LHj)0~f5g-YA@<
z;T)J1&c_fD+KqpEn{k;bMthm?K1dg3<@5ABmc&WpWWf5wo~=ZXx1E*c-3lI<igWEm
zh0Kn-JRbJqjoZ_&p*uzN+EBOyJqoha!ZUreSA@~Ft=z%-SD37_JTP>-I)_d@y0()M
zOt5Rf_wXPAkTj8!9w14(84_$u{dNvo8yPuKFn$m{F|A$1Rh0zYi?DLtcOD%qtDZ+!
zQ^=YAev(<eYt4t&z%^kZ`Jlon%Cf=8I+c=>;fWRo1}Py)iJ3safzXB@BX0$^FnB}G
zmu&_leeF=^EfF~nZQxH<>AH4kYi5_<NkP50JigKCoEOA~^KgaM{^aQ~PmFb$)@d)M
zfU(;{xfkO~yrl5!bkcQeC|%~0_~+Jjc{0?y?Sz_QWeqzT7s+?rvm`Qz+dZsai!oQa
zgis^tV9A4W>*nKmH7SXi_~4FS1@aZ{4s6#Z-7Wa>q(+^J5BZe5QY0IGb=H!FceMaw
zqXV0-CZ@$^dUX+xQbclr9d{L&3+67oGzj+8k&ags^ipmQ3ih?!%mW<!GGmOon1&YS
znHj_{7=5&crsi!B$$T3m3>nk34C)2(_p(r9tnwpfJqGxNOF~6}N2P7{ks&9rb0F7A
zpY_@<KYdBQ5g)%fE2PbEkB5&3=ad`){)s;^y(x-m7GWTLy0!hRa?Tn|nRM^4DVUmJ
znkou(fpgcRy)A1HB|F+Dmc?uNd+*8co*m|;36=wZhEM6=M2N&6$orG`=l2qrWXr48
zJJ>>@Q^Oz_^nDGu8`?#@R6AxMD9tpRAFL(odv~T9pe4<&!HhJ{tU$>9Lx;{<E3x0@
z787$BgpqhYZ&oPHdiNzihTd}tB~dhOd!~PeWQ4JF?K(0e^qz%MA!%Ct-ss2C2ZjT;
z&B_*INf?FtCSgH@g^?x$2jyCozLk^QDt>#GAS)vS|3-)ABZm9vF=}HmdfC2<*&Xuw
zCPO|=6ZKPdAiO<yLZ+`=75Y3JSYtjiF#Pp*^Vut-+tlSNX4w3u=IQcocjhP!Ce-%9
z_1VFqAWzs_$}vMBYV!cQ3;?{0UAY30_YbsB4!Yj$a2K@x%P~o~B(8LL1Fkmc;GAB=
z8-WrTF&D&a*B?O$>_s=)X{Y_$)`Zq->nji3Ao^UmW*&znj5)rGQIQG3x$^=6N8z~U
zU`5$7Nv<(>3L%fs@eu~jU~P6Uk0BRwE`GyV=TbFMLDHu!H`L^nt%zA8r9>Gx=sta#
z%ugsCTYR^=EL>W#`cU-TPnv%v3I@oz8Th$+z#6bZ_+3Qj<kq&kC#i`oYl_aATo)6c
zBXPdwpRRy1Oh?`FnPAAPvslFy1=~>{JIlP}5}aZ#5nUEB&`<ojadRqJ$?5>gu$755
zpDdlcj@cud+M4Fr;eLK%n@Xo<FrVd^T;M>3@wt7j%L(MKU8g=6Uh^zkW6aIYc5cvq
z$xjobv%Het9*{y!fcDg%uw;9_J+V;8S^u%sU|cl)1)@~Y1+;|Do=Pu62BYEBhlqn;
z^4@KocL-HquV}U6b6SD`Y^eMOLJm2=HC6@R6roDHq|gf}-iYQvL&|5Dgiub176`0a
zok4BB^G2&;8RNs8i`ig&>!jAO?C6m9NsjMgoh@l%dpfb~^PdSNnPVKrDLUb7RnL`@
zZbCpKl5efQ{=Kd34{|NO5uh!RgGdo6;78ckit&vswNzB8P@jIMbqC)-obrec#NXgw
zI_!teN7r@^)KYa_Ffs7Ix~9&MJWn({<olp`(!vHVCxKSDLh5T(CDA;jFF<>X8Z9SY
zZEe{UPsi`$uwzxKU7=U?;@0MB=cW!)U`8F#WnmVVpavy~#>aZ-JSPYk>{G8aDn-5d
zRn|w7Bx2vUefu5n?_2o#OygCwQkX3llPr>PN+npBF-u31h6p}N<>CfjXhV>IYZQT#
zwc}+iNhEh$7-#lYz!iRW(kK0_Y+PRvPQ77_#9XPO^xiNKRJ@>KLrV>0mu;6^IJ*zw
zQ_?T8=cItGyU*6Ml%^kJXj2%~m~K4vfti*}(-skTydu1QOZGNSvTsl<CmJ^m19h1*
ze0|KIe?7`i;c4bx$nMcmT*$Uw(VWFt!cKP8yD6+Swi4CI5X1^1g^U<ugMm6*lmM$y
z_D1Q)X=0?1T20F8j`8G#l^@ze|7G_BLdd$u)J2i&FBhl{?-m>dJl9wl8CL$&<Rsb$
zRF6nBlcghzX2May6S?jSZWR!eBQ7*J43FQhdW>5EvvRU?kLO{|n*6Dgwy8sh_s~aU
ziQ8^~B=%=<zU51^fci(Ratx)R-2_lUX>pbrLI59NJMN<pr3Hg3bITlNdE<aKD~%+6
z-mmX<$<~)5n!YH2kMunc!tgDLH(Gd)aL`w$DQT+3<s-v{qCm^|iZP(*UrWyouIDQ-
zr_j(=*vvlS3OQ6~9EhyrXF%Z&eC85^B^Z<EyN16u;G31^Lj@SL7V{Gn<9&>!na>@$
zNYkZ+xMO8$sMT;f;mg2-jm^O$NN%|7XMd-uCb&U(_nz@|q_2S_Geb;J@_f5xUBjfX
z8Ac5^ki=U$CIY!?;c+O_f8-YFxm|W*i`CyC+639RurDKa8#GmQARCt}nYrIQ@J~Bb
zmG)mKOE_ZpwwCs{?_DeOWfPpHkLsaFnoH^7z(liIEm)MRFM~}!4Ge}*>hK0&dR+88
zaE!Q?gLDrqg6Rec?-ucy{wctL&R0@Edc^)p@GQgfJV4FMsNH7;e;Zv~$F022&;H)U
z?X-*Cw>E_`=f{>=k(aufa-pHUVdSf3EN3b<N>~hOpai==I(V>W!!S+9Gg7A^Sw8A(
zSuku?>TQ3-7sE95OjZP~xF90QK>93sK56((g=+mB_k|0E2abNvcO$^BWr5EGb_}3^
zGSIEfv|u;D^B$=<|62OzDR7QUvIlherTh_C7iHk;YXD|X!R?sQgfUx#H{?FOs`$Gq
z7fL&?=~)KJTNN*6#rGl(U9iIw)iMHun?3+7`(babdD!OG=U@|RYz=FbK*Ny@#-swX
z3o4ns75Cso+53?cc|)f``+x`gsFD0G7;nB}Rg*epSXRG|sCte?Rl=LA8{Ls15c>_y
z`*`6b%6%Ri_IETwRc5cv2JRNJ#?TW-?gORyLJs1F<9nKf<P;Tp1|tqA%Qfu7I*+g%
zF@9;NMQ&=$p5T-Q;hD4j@k0RX%|tzk96sDK>sTzdC_k0l;DM*t*h1W?W*Ht{GS3Bt
zvPri2KG@d*B;DdsoTP!MfV*G!INVxjC0rEi6#+3Uc<4$WUQ<80BT7#dg-V=e2MyUS
z!lka4;RKCUar!$FtTPILqGr4%s7^*4o_qpO^rl1UpHt9$#t;mnp8^tyhCEnbY#oD4
z+qv=kTw+KR7KF^uZAUrL8z{x!4m7OL40c|&{<FK=3?G$ryaFbL=l6aUw>$Aanhll;
zy|wEx|64+0o&7h%<P;;Ak#jXws1Y$njjB5_qjtcJj3oGz%iXxmJ%3(~Y+-N766=}S
zJQ+)PugpqN)(;*`adQeW<@)fNZ_8ZlVXAqyvqkfrnmzD6re6da3Vvd5&YdqI&(~C;
zw7oHQ|NcwNc-Wc1?&G@>vabm)Jf6qOm>PEogr&|TRNx?h*V3sp6%%b8IqeBRCZo)E
zu_nuCHd+HzIwCuDJH+YawT$l_uBr$1lDmF_Ga&#uSs);)hY=c)pTM{r*STA-1_iaJ
zFmCy7$2oLG`0nGB05A9ZJ<{f@NSu*MP62xHo>2=}>xvaHUdT#DqkCwqQ|3zsW$?b2
z+wcdQlTR%Y3^5s;5th0#BdumUaIUVTEs2BP;0<EOabJf8Aw7DU%`urn^kAntLqvFY
zdYq!TPX3$^EB(hKAploMw?j`_`@2M|2(MHr>bT+CvxSHLEcgSPoiwGl&kYu;417TX
zNJ96w3!OgbzfRib_H#+buZB9BW`591-@>MeWo28B7ExIl<~N{AOKkoWf6Aw!9l!Xy
z;l%VXWy>lKjpMEfFXha(*{gPTcD+z70@fr`Z04h`Jn|`iz~9Wl4;)hcE_bcByvOF4
zdLiqcmYPULp%2q(=$q&tu)d?>#LnmY1UR3Gx5TSdPA1Z&!Ed5SYEe`?c4itb(`*@A
zk<rJDg3_OEZkG51$;#Mt+=FB0{lvJ9QNajCf11teU<HQQz)P|iyde->##S<O#)jNV
zDtAr~E=C{YjM_VceG`3cVw-2PL`r>RR_UThR2CBbha!uxcALDePe)QWUgCegIIJ6`
z1(%&0geNfY7Rc!61SlpeAiR6VtO^OlHfRwl!QCj3m@JUeNk{sG4qq6QrjdMZHVSpg
zz0zm;ExTcOHJ~j!{^@&6boyS95~r62jf?;B-3my0_Ex27b8P8e#NyXN@4QPj^wih^
zn^lm)r~%5dsIaTElb}c*hauZM)B@KOCZ~&RZOb<M^4)(uN=-iLZ@h|A%pRo3N=vs|
zHv7o2Y*MXs>MySZFe=wCEhawkXO)ky9F+1p=JBvQJhQyt!LFVApg7H08ZCfYz7~}c
zt+k5r#$oX}2!pErBC*s{g~(t!Z==UM@RZ(MvOlIGxi^0ym88dF?t)1+Cp7fx+~Zjo
zL6@8#(n<uEKYbx?Fn?nZa_l2zwI@YSZY#;Hoy5K;pv1M@<vcjQA4l3vpog=cSH_=A
zMupi(EePb}`4+=1iZ>X(DM}*JlkYOkT5<>2scaqR-Qg*{w*z&^R3bg~7pF5i#Sk~s
zd6%L**Ix}Ja+2LaWN{;$smP_RLKxfW4Xry@`V}`u`@iL^{^p)si8*fp)hzXjS+xZF
z$mmkc;O397uGAl*xtX9E89M7@wyG?rF#IQUbER)<sx0lAE9=hKj`zDZH~?oVh`%Mb
z{useB312jiM{>8F!T8(#0ht%qbrvOTAIZ9f(rZ#z!>yjE)@KQ(KNNK7NCa8Aeqvf)
zp|UId+YEm&@~t;7_m`x7r1i(dS}z|z*C5Vc)<5$Hxmx&cv^&#!d}nj`V?<?i!lhSs
zSL|N$+ue6<s2m!v$z8wkD=@hh7imQJhWKv@rc+`5@d%_G<xO~HEYOk3i9Tb_iOvDl
z>XK3;-$xj^|FtS{LYVG6?6ry%S%sJG9oQeXGgm(&HvAp(hJ_#WgOe0jl|V|+(_bcA
zyuRLAZu8H5GJ>)WU+K(8P2r;i7GxH`x!q45*Kq90x2M%PTxKN${_l%Q_(V0S#m8Fq
z+b@)hUX;8!0$gsZZo4AEMclo@`p55PfY0ZUX$Z0#F(5j5_UvgA2vgw#Q}SbOIh|fp
z?#X$6u`nMJuPwz<E9PpT*k=)ZgxhMhDEosj{+gg>z=LY<UTy;^0V#!QJc(J@F#a>^
ziO)D8E+v!1R{2SGs(in@xT%$!B)ary3=;JRDA-j#TLiWV-JBw~>Af4V0P^pdi(R;H
z()=MrzNqeI1598|6cqueAxX)2r)9*Rb1_a(_hxQx$d8ac14OZMK>e6{LmwXVA6Lq<
zoF6>jJBx6*{m$Re$lXXw27CjfVoNIr8+Uu0aMzV<ZLELfSX<k#`qp!QV9c_cxY*A^
zKHy@OxmNQCF%VdDkW8HJ&~+I>Plu*kz|jjAa>tk5oSL#!OAKQEyXu_H126e2mkP(!
zHQkr-4wTJD)8Y((iMIl(N%?ME_ztoW)~{oU;{o7;<8n0vdCVQBp<M+LN5ZZnSHdpg
zSARUEVrZ)j&l@p@We{#5^bQwzKP>oRulw*4QD8(mdzIb&Dx!u*PfLfk_CIW16^PSh
zrDUGGI|xaCo<{AYsY%W%)^VamcIaC?G%g$Vf&XXTCSZ)^o3pW)*HkIE_>C#Ke_2&d
z(6alRD#>M>{5G)D->djtSaX*@Q9Y*Yz!f;d<`jpYeE+BP)wx2G^bk*ZlUNq%<Nn}>
znoK@2JBUgrUri~XB~~YM&1kqrr9JWrc3CNjpU;w4!42p?ZW*tbPg#?ClOUB=P2RI0
zoh?S4_i2tkZxZH%_hV3F33*_FCBvx^`G?6VdXk|l!35Q{ng0WOnkS<JPfl@KUVSO(
zSaYmILLdGQi`90esl>6hIeYB_0A?Cy4!4kP>w$IVgT!V&KEiTkuT9DNoUHkIw?D#B
zvS=xbWIKJjJ@)K(*u$4J6N@oja~O(49AO$c7WdW(sEMMjs2`fv4Evz*bKx@n;mPN<
zz6+N|b*%Y}cqvA<J|am|9At-mU+z;3X;!v&CLYya`Ylfm0cRj)Th*hma{%vaw;y(%
zh-IH_$?7$Gn>T+%Neb$tC@Qo}P=47bC<f=a1X^nBuLaY<#hs08JHeUmU`1aMZsRvw
z>DePFxz&%0sW{ZbQehY6c4DxfRPZ6lu4=5vt~!74z2FaC;B*?7uWL(u3sJHmPI%q6
zESp(9Q+yK&?L~xMm*g#qyFV@t<Be`Ib<U7(__O-^RL~onTmS+&asC-PKu$DUVXE%b
zumeI&pU=reDRa<nco!U;F87Irp{!caICCq0;^5Frbd_$+t29u4V8!E*miOU8Z=h&b
z3yq1cA0GpHs}Ue36Wbe;%@bp@z!qc6gG#JEP4DQkIkvM3(bB$@Dv}EL?$%2n;Q^<Y
zze7|L;5o_)r$?+6AAG*W>5qmq!$|5J-Shgo961usz;!F8)tKacxYaGr$ERre{*1Es
z9qm$&tpMra_m~PyXJlR6Vk{5ATLYF}+YZCqS+c+Ua=rYi^h1p*`^h(iw-4y;^~=q+
zfgD=qB2D<@2ZWj$N){Eu_A*v6A77ctxFjlN7~q%lLZnj;pC*t(*hhNmwBt9~PnNBb
zixB)Q@mow4VDGSy22tMFdC2l<#XX${746pz#OZI0Y$h9w3aBPcenJR|%Bxa7QDFJg
zK0Oe_6ZfiFNlK<4mYLn)bCFZ1V$;5=bm<jd#d;o5diu?%b0pqQJ9_k+JLj+#wIp(L
zPSlmW(q76t$IqmhYhvXi7-Jkt@2`c|M9P{n^c0~dQMN6TR9neOihNJ3VmfYb5C5ZG
z`x%%NOIi;q#nMFj=ja4{X6+&N9-I$zC~&m%-)cMViL;n9W{}7kkhGK>F?n|DA0GlP
zUE&4IMgfuxoz&VFf&+W^jL|lX#KE`tMVJO_S!XB#gJA8|ln*VGeka^Bi@}PodY-EV
zv7*2ODiPgx<HQaLrpG}g`hptNfpGDKNXCe;{ijBtvDl)a)aw-n?HTy&w`~f$bJLX{
zg!JPaPA|3IxLBO=?jrx|A1*&wx#T?dCLGBa5nkueP6t590!cTszoqmgj76AYba{6p
zJr&#20F5$7yfe#q{+$f>d{#xI*FsJ(ci<5r%sxW_5UB+UFvU3AM`#x}v4|d}2Y1Pd
z-(%?V_Gvdet`pYhn=~!RQ6g7>WY7Kq>7V$*Kc`isiKjq?%^2p>Y|(d&-+5CM=j(ax
zM$&3D&$6>0@bh+GT{pyO6@6F{KEm#4smD&9`U-NU>;LjW?@GU|&hs<>cBVrA(Fg8S
zJSF_nw8Q+P1^JiZ{(i~wGQQ32Z~14o^oN!Bcd=Z&e2oRaU00Z^`HU?72g3Yoa4I~R
zpt}82R{HOQx8XS&<$!|2Pq@Rs%OW5MFS1{Q@1NrE--r5Z#^24K<9VCcE2TpJxOvDp
zfZAl$g1-}FpIv&!nEq`ZhL-0D?>~+a`O!LFuG0J*bn5_QwYErf79#!QD}P@kL*q;4
z%<iVY{)}4w`;rV@!Ox+jPU+XD|1!i+FC_x;cgOfRg#RP{`TP8Sdh_l7j{S4l{2Vv`
zmHGdbw*L9$|6=XGSo_a$^IskN&t>%&NBOUg{a45SbKLy@t78GxF=G6T|HAgQLm!mE
zy#@ezpIld-MYvu2k!jx2xP5Z+$$Y5g@T}~i2*vZ+C1{uVS>EQK{Gs@K0L)|8K1F8)
zl44bhND2YlX}zyf`kp{MrE4`=4~~w#JeJ(;p^e6W)vE-q%z;u>|Aj~Da+X_-(4sqp
z;Qr0?Biz0}aB|B`{KH_C%RKsUHeZH_l1>p~c<IveK_8o^Zr;!GQh44yL3N%0rr%&c
z;<j=mlu;6}k^FZuuAOr}0lM9)ROGsxds>+I%N+ev1gB5=FAwtnbcyuNSKPT-cIMFp
TdHBO$@PAKbl%-1_8UFr15!>LM

literal 0
HcmV?d00001

diff --git a/megatron/core/extensions/transformer_engine.py b/megatron/core/extensions/transformer_engine.py
index e95409e08e9..e807ee54fbf 100644
--- a/megatron/core/extensions/transformer_engine.py
+++ b/megatron/core/extensions/transformer_engine.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 import dataclasses
 import inspect
@@ -299,6 +299,7 @@ def __init__(
                 extra_kwargs["delay_wgrad_compute"] = self.config.delay_wgrad_compute
             else:
                 raise RuntimeError("Only TE with version >=2.3.0 supports delay_wgrad_compute now.")
+
         if (
             self.config.tp_comm_overlap
             and tp_comm_buffer_name
@@ -2116,3 +2117,12 @@ def set_save_original_input(module):
             "set_save_original_input is only needed on transformer-engine modules that save "
             "quantized tensors by default. It needs transformer-engine>=2.6.0dev0."
         )
+
+
+try:
+    # pylint: disable=unused-import
+    from transformer_engine.pytorch import cpu_offload
+    from transformer_engine.pytorch.float8_tensor import Float8Tensor
+except ImportError:
+    Float8Tensor = None
+    cpu_offload = None
diff --git a/megatron/core/models/common/model_chunk_schedule_plan.py b/megatron/core/models/common/model_chunk_schedule_plan.py
index d501c11a0a9..74b9a90764d 100644
--- a/megatron/core/models/common/model_chunk_schedule_plan.py
+++ b/megatron/core/models/common/model_chunk_schedule_plan.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 from contextlib import nullcontext
 from typing import Optional
@@ -8,6 +8,9 @@
 
 from megatron.core.enums import Fp8Recipe
 from megatron.core.fp8_utils import get_fp8_context
+from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
+    fine_grained_offloading_set_last_layer,
+)
 from megatron.core.pipeline_parallel.utils import (
     AbstractSchedulePlan,
     NoopScheduleNode,
@@ -450,6 +453,8 @@ def run(
             f_layer = f_schedule_plan.get_layer(i)
             b_layer = b_schedule_plan.get_layer(b_num_layers - 1 - i)
             torch.cuda.nvtx.range_push(f"layer_{i}f-layer_{b_num_layers - 1 - i}b")
+            if f_layer.layer.config.fine_grained_activation_offloading:
+                fine_grained_offloading_set_last_layer(i == f_num_layers - 1)
             f_input, b_grad = TransformerLayerSchedulePlan.run(
                 f_layer,
                 b_layer,
@@ -472,6 +477,8 @@ def run(
         for i in range(overlapped_layers, f_num_layers):
             f_layer = f_schedule_plan.get_layer(i)
             torch.cuda.nvtx.range_push(f"layer_{i}f")
+            if f_layer.layer.config.fine_grained_activation_offloading:
+                fine_grained_offloading_set_last_layer(i == f_num_layers - 1)
             f_input, _ = TransformerLayerSchedulePlan.run(f_layer, None, f_input=f_input)
             torch.cuda.nvtx.range_pop()
 
diff --git a/megatron/core/models/gpt/fine_grained_callables.py b/megatron/core/models/gpt/fine_grained_callables.py
index fd1cc3d33c6..786a1b850dd 100644
--- a/megatron/core/models/gpt/fine_grained_callables.py
+++ b/megatron/core/models/gpt/fine_grained_callables.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 import weakref
 from contextlib import nullcontext
@@ -8,6 +8,11 @@
 import torch
 
 from megatron.core import tensor_parallel
+from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
+    fine_grained_offloading_group_commit,
+    fine_grained_offloading_group_start,
+    get_fine_grained_offloading_context,
+)
 from megatron.core.pipeline_parallel.utils import ScheduleNode, make_viewless
 from megatron.core.transformer.module import float16_to_fp32
 from megatron.core.transformer.moe.moe_layer import MoELayer
@@ -350,13 +355,17 @@ def submodule_post_attn_forward(node: ScheduleNode, hidden_states: torch.Tensor)
         Run forward pass for computations between attention and dispatch:
             pre mlp layernorm->router->dispatch preprocess
         """
+        if layer.offload_mlp_norm:
+            hidden_states = fine_grained_offloading_group_start(hidden_states, name="mlp_norm")
         if layer.recompute_pre_mlp_layernorm:
             layer.pre_mlp_norm_checkpoint = tensor_parallel.CheckpointWithoutOutput()
-            pre_mlp_layernorm_output = layer.pre_mlp_norm_checkpoint.checkpoint(
-                layer.pre_mlp_layernorm, hidden_states
-            )
+            with get_fine_grained_offloading_context(layer.offload_mlp_norm):
+                pre_mlp_layernorm_output = layer.pre_mlp_norm_checkpoint.checkpoint(
+                    layer.pre_mlp_layernorm, hidden_states
+                )
         else:
-            pre_mlp_layernorm_output = layer.pre_mlp_layernorm(hidden_states)
+            with get_fine_grained_offloading_context(layer.offload_mlp_norm):
+                pre_mlp_layernorm_output = layer.pre_mlp_layernorm(hidden_states)
 
         local_tokens, probs, _ = layer.mlp.router_and_preprocess(pre_mlp_layernorm_output)
 
@@ -437,6 +446,10 @@ def submodule_combine_forward(
             hidden_states = layer.mlp_bda(layer.training, layer.config.bias_dropout_fusion)(
                 mlp_output_with_bias, residual, layer.hidden_dropout
             )
+        if layer.offload_mlp_norm:
+            (hidden_states,) = fine_grained_offloading_group_commit(
+                hidden_states, name="mlp_norm", forced_released_tensors=[residual]
+            )
         output = make_viewless_tensor(
             inp=hidden_states, requires_grad=hidden_states.requires_grad, keep_graph=True
         )
diff --git a/megatron/core/models/gpt/gpt_model.py b/megatron/core/models/gpt/gpt_model.py
index 654827dc6fb..ae292649561 100644
--- a/megatron/core/models/gpt/gpt_model.py
+++ b/megatron/core/models/gpt/gpt_model.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 from collections import OrderedDict
 from typing import Dict, Literal, Optional
@@ -18,6 +18,9 @@
 )
 from megatron.core.models.common.language_module.language_module import LanguageModule
 from megatron.core.packed_seq_params import PackedSeqParams
+from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
+    fine_grained_offloading_init_chunk_handler,
+)
 from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.quantization.utils import get_quant_config_or_none
 from megatron.core.tensor_parallel import gather_from_sequence_parallel_region
@@ -117,6 +120,7 @@ def __init__(
         self.parallel_output = parallel_output
         self.share_embeddings_and_output_weights = share_embeddings_and_output_weights
         self.vp_stage = vp_stage
+        self.disable_param_offloading = True
 
         if hasattr(self.config, 'position_embedding_type'):
             self.position_embedding_type = self.config.position_embedding_type
@@ -410,6 +414,22 @@ def _preprocess(
 
         return preproc_output
 
+    def preprocess_for_fine_grained_offloading(self):
+        """Preprocess for fine-grained activation offloading."""
+        fine_grained_offloading_init_chunk_handler(
+            self.vp_stage, self.config.min_offloaded_tensor_size
+        )
+        if self.disable_param_offloading:
+            for param in self.decoder.parameters():
+                param.offloading_activation = False
+            if self.mtp_process:
+                for param in self.mtp.parameters():
+                    param.offloading_activation = False
+            if self.post_process:
+                for param in self.output_layer.parameters():
+                    param.offloading_activation = False
+            self.disable_param_offloading = False
+
     def forward(
         self,
         input_ids: Tensor,
@@ -435,6 +455,8 @@ def forward(
             runtime_gather_output (bool): Gather output at runtime. Default None means
                 `parallel_output` arg in the constructor will be used.
         """
+        if self.config.fine_grained_activation_offloading:
+            self.preprocess_for_fine_grained_offloading()
 
         inference_context = deprecate_inference_params(inference_context, inference_params)
 
@@ -701,6 +723,9 @@ def build_schedule_plan(
             TransformerModelChunkSchedulePlan: The model chunk schedule plan.
         """
 
+        if self.config.fine_grained_activation_offloading:
+            self.preprocess_for_fine_grained_offloading()
+
         from ..common.model_chunk_schedule_plan import TransformerModelChunkSchedulePlan
 
         return TransformerModelChunkSchedulePlan(
diff --git a/megatron/core/pipeline_parallel/fine_grained_activation_offload.py b/megatron/core/pipeline_parallel/fine_grained_activation_offload.py
new file mode 100644
index 00000000000..b28bbcbeddc
--- /dev/null
+++ b/megatron/core/pipeline_parallel/fine_grained_activation_offload.py
@@ -0,0 +1,603 @@
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+import warnings
+from collections import deque
+from contextlib import nullcontext
+from typing import Any
+
+import torch
+
+# CPU offload implementation for pipeline parallelism
+DEBUG = False
+DEBUG_RANK = 0
+
+
+def debug_rank(message):
+    """Print debug message for a specific rank when DEBUG is enabled."""
+    # pylint: disable=bad-builtin
+    if not DEBUG:
+        return
+    assert torch.distributed.is_initialized()
+    if torch.distributed.get_rank() == DEBUG_RANK:
+        print(message)
+
+
+def set_ideal_affinity_for_current_gpu():
+    """Set CPU affinity for the current GPU to optimize host-device transfers."""
+    import uuid
+
+    try:
+        import cuda.bindings.driver as cuda_driver
+        import cuda.bindings.runtime as cuda_runtime
+    except ImportError:
+        import cuda.cuda as cuda_driver
+        import cuda.cudart as cuda_runtime
+    try:
+        import pynvml
+    except ImportError:
+        warnings.warn("pynvml is not installed, skipping GPU affinity setting")
+        return
+
+    # Get current CUDA device ID
+    err, device_id = cuda_runtime.cudaGetDevice()
+    assert err == cuda_runtime.cudaError_t.cudaSuccess
+    # Get device UUID
+    err, device_uuid = cuda_driver.cuDeviceGetUuid(device_id)
+    assert err == cuda_driver.CUresult.CUDA_SUCCESS
+    # Set CPU affinity based on GPU's NUMA node
+    pynvml.nvmlInit()
+    handle = pynvml.nvmlDeviceGetHandleByUUID("GPU-" + str(uuid.UUID(bytes=device_uuid.bytes)))
+    pynvml.nvmlDeviceSetCpuAffinity(handle)
+
+
+class PipelineOffloadManager:
+    """
+    Singleton manager for coordinating activation offloading across pipeline stages.
+    Manages chunk handlers, synchronizes GPU-CPU transfers,
+    and handles virtual pipeline parallelism.
+    """
+
+    OFFLOAD_MGR = None
+
+    @classmethod
+    def get_instance(cls):
+        """Get the singleton instance of PipelineOffloadManager."""
+        if cls.OFFLOAD_MGR is None:
+            cls.OFFLOAD_MGR = PipelineOffloadManager()
+        return cls.OFFLOAD_MGR
+
+    def __init__(self):
+        """Initialize the manager with queues and dedicated CUDA streams."""
+        from megatron.core import parallel_state
+
+        # Queue to store chunk handlers for backward pass
+        self._queue = deque()
+        if parallel_state.get_virtual_pipeline_model_parallel_world_size() is None:
+            self._vpp = 1
+        else:
+            self._vpp = parallel_state.get_virtual_pipeline_model_parallel_world_size()
+
+        # Cache chunk handlers for each virtual pipeline stage
+        self._stages = [[] for _ in range(self._vpp)]
+        # allocate streams and events for synchronization
+        self._d2h_stream = torch.cuda.Stream()
+        self._h2d_stream = torch.cuda.Stream()
+        self.reset()
+
+    @property
+    def d2h_stream(self):
+        """Get the device-to-host (GPU to CPU) transfer stream."""
+        return self._d2h_stream
+
+    @property
+    def h2d_stream(self):
+        """Get the host-to-device (CPU to GPU) transfer stream."""
+        return self._h2d_stream
+
+    def reset(self):
+        """Reset manager state for a new training iteration."""
+        set_ideal_affinity_for_current_gpu()
+        self._inside_context = False
+        self._cur_forward_chunk = None
+        self._cur_backward_chunk = None
+        # Track the first microbatch of the last virtual pipeline stage
+        self._is_first_last_vpp_chunk = True
+
+    def flush(self):
+        """Flush all staged chunks to the backward queue in reverse order."""
+        # Ensure all virtual pipeline stages have the same number of chunks
+        if len(self._stages[0]) == len(self._stages[-1]):
+            lens = [len(e) for e in self._stages]
+            assert min(lens) == max(lens), "All stages must have same chunk count"
+            # Clear the last stage and push all chunks in reverse order for backward
+            self._stages[-1] = []
+            for chunks in reversed(self._stages):
+                for chunk in chunks:
+                    self.push(chunk)
+            # Clear all stages after flushing
+            for i in range(self._vpp):
+                self._stages[i] = []
+
+    def push(self, handler):
+        """Add a chunk handler to the backward queue."""
+        debug_rank(f"pushing handler {handler}")
+        self._queue.append(handler)
+
+    def pop(self):
+        """Remove and set the next non-empty chunk as the current backward chunk."""
+        assert self.size(), "Cannot pop from empty queue"
+        while self._queue:
+            self._cur_backward_chunk = self._queue.popleft()
+            if not self._cur_backward_chunk.is_empty_chunk():
+                break
+        debug_rank(f"popping handler {self._cur_backward_chunk}")
+
+    def front(self):
+        """Get the first non-empty chunk handler without removing it from the queue."""
+        if not self.size():
+            return None
+        for chunk_handler in self._queue:
+            if not chunk_handler.is_empty_chunk():
+                return chunk_handler
+        return None
+
+    def size(self):
+        """Return the number of chunk handlers in the queue."""
+        return len(self._queue)
+
+    def init_model_chunk_offload_handler(self, vp_stage, min_offloaded_tensor_size=1024 * 1024):
+        """
+        Initialize a chunk offload handler for a model chunk (microbatch).
+
+        Args:
+            vp_stage: Virtual pipeline stage index (None means stage 0)
+            min_offloaded_tensor_size: Minimum tensor size (in elements) to offload
+        """
+        if vp_stage is None:
+            cur_vpp_rank = 0
+        else:
+            cur_vpp_rank = vp_stage
+
+        is_first_last_vpp_chunk = self._is_first_last_vpp_chunk
+        # Flush staged chunks when reaching the last virtual pipeline stage
+        if cur_vpp_rank == self._vpp - 1:
+            self.flush()
+        # Determine if this is the first microbatch of the last virtual pipeline stage
+        is_first_last_vpp_chunk = is_first_last_vpp_chunk and (cur_vpp_rank == self._vpp - 1)
+
+        cur_chunk = ChunkOffloadHandler(is_first_last_vpp_chunk, min_offloaded_tensor_size)
+        self._stages[cur_vpp_rank].append(cur_chunk)
+        # For the last stage, push immediately and flush
+        if cur_vpp_rank == self._vpp - 1:
+            self._is_first_last_vpp_chunk = False
+            self.push(cur_chunk)
+            self.flush()
+        self._cur_forward_chunk = cur_chunk
+        cur_chunk.vpp_rank = cur_vpp_rank
+
+    def set_last_layer(self, is_last_layer):
+        """Mark whether the current forward chunk is processing the last layer."""
+        self._cur_forward_chunk.is_last_layer = is_last_layer
+
+    def cur_forward_chunk(self):
+        """Get the current forward pass chunk handler."""
+        return self._cur_forward_chunk
+
+    def cur_backward_chunk(self):
+        """Get the current backward pass chunk handler."""
+        return self._cur_backward_chunk
+
+    def __enter__(self):
+        """Enter context manager to enable activation offloading hooks."""
+        debug_rank("----__enter__")
+        from megatron.core.extensions.transformer_engine import cpu_offload
+
+        if cpu_offload is not None:
+            cpu_offload.CPUOffloadEnabled = True
+        self.inside_context = True
+
+        torch._C._autograd._push_saved_tensors_default_hooks(
+            self.on_save_for_backward, self.on_get_saved_tensor
+        )
+
+    def __exit__(self, *args: Any):
+        """Exit context manager and restore original tensor saving behavior."""
+        debug_rank("----__exit__")
+        from megatron.core.extensions.transformer_engine import cpu_offload
+
+        if cpu_offload is not None:
+            cpu_offload.CPUOffloadEnabled = False
+        self.inside_context = False
+        torch._C._autograd._pop_saved_tensors_default_hooks()
+
+    def on_save_for_backward(self, tensor: torch.Tensor) -> Any:
+        """
+        Hook called when autograd saves a tensor for backward pass.
+        Returns a tag to identify the tensor later.
+        """
+        debug_rank(f"------on_save_for_backward {tensor.shape}")
+        assert self.inside_context, "Must be inside offload context"
+        return self.cur_forward_chunk().tensor_push(tensor)
+
+    def on_get_saved_tensor(self, saved_state: Any) -> torch.Tensor:
+        """
+        Hook called when autograd retrieves a saved tensor during backward pass.
+        Returns the actual tensor (potentially reloading from CPU).
+        """
+        debug_rank(f"----on_get_saved_tensor {saved_state}")
+        return self.cur_backward_chunk().tensor_pop(saved_state)
+
+
+class ChunkOffloadHandler:
+    """
+    Handles activation offloading and reloading for a single pipeline chunk (microbatch).
+    Manages tensor groups, coordinates asynchronous GPU-CPU transfers, and handles synchronization.
+    """
+
+    @staticmethod
+    def offload(src_tensor, pin_memory=True):
+        """Offload."""
+        debug_rank("--------offload")
+        from megatron.core.extensions.transformer_engine import Float8Tensor
+
+        fp8_offload = isinstance(src_tensor, Float8Tensor) if Float8Tensor is not None else False
+
+        if not src_tensor.is_contiguous():
+            src_tensor = src_tensor.contiguous()
+
+        cpu_backup = torch.empty(
+            src_tensor.size(),
+            dtype=torch.uint8 if fp8_offload else src_tensor.dtype,
+            layout=src_tensor.layout,
+            device="cpu",
+            pin_memory=pin_memory,
+        )
+
+        if fp8_offload:
+            cpu_backup = Float8Tensor.make_like(src_tensor, data=cpu_backup)
+
+        cpu_backup.copy_(src_tensor, non_blocking=pin_memory)
+        state = (src_tensor.device, cpu_backup)
+        return state
+
+    @staticmethod
+    def reload(state, non_blocking=None):
+        """Reload."""
+        debug_rank("------reload")
+        dev, cpu_backup = state
+        if non_blocking is None:
+            non_blocking = cpu_backup.is_pinned()
+        return cpu_backup.to(dev, non_blocking=non_blocking)
+
+    def __init__(self, is_first_last_vpp_chunk, min_offloaded_tensor_size):
+        # Data Structure to maintain reference to activation tensors
+        self._tensor_tag_to_state = {}
+        # Mark the first microbatch of the last virtual pipeline stage
+        self._is_first_last_vpp_chunk = is_first_last_vpp_chunk
+
+        # Group management for batching offload/reload operations
+        self._offloaded_group_index = 0
+        self._groups_to_offload = []
+        self._groups_to_reload = []
+        self._tensor_count_current_group = 0
+
+        # Counter for special torch tensor types (FakeTensor, FunctionalTensor)
+        self.torch_tensor_count = 0
+        self.d2h_stream = PipelineOffloadManager.get_instance().d2h_stream
+        self.h2d_stream = PipelineOffloadManager.get_instance().h2d_stream
+        self._offload_events = {}
+        self._reload_events = {}
+        self.min_offloaded_tensor_size = min_offloaded_tensor_size
+        self.is_last_layer = False
+
+    def is_empty_chunk(self):
+        """Check if this chunk has no tensors to manage."""
+        return len(self._tensor_tag_to_state) == 0
+
+    def is_first_last_layer(self):
+        """
+        Check if this is the last layer of the first microbatch of the last vp stage.
+        These tensors should not be offloaded to avoid unnecessary overhead.
+        """
+        debug_rank(
+            f"------is_first_last_layer {self._is_first_last_vpp_chunk} {self.is_last_layer}"
+        )
+        return self._is_first_last_vpp_chunk and self.is_last_layer
+
+    def tensor_push(self, tensor):
+        """Push tensor to the offload handler."""
+        torch_stray_tensor = isinstance(
+            tensor,
+            (
+                torch._subclasses.fake_tensor.FakeTensor,
+                torch._subclasses.functional_tensor.FunctionalTensor,
+            ),
+        )
+
+        if not torch_stray_tensor:
+            # Assign unique tag based on group index and position within group
+            tensor_tag = (self._offloaded_group_index, self._tensor_count_current_group)
+            self._tensor_count_current_group += 1
+            assert tensor_tag not in self._tensor_tag_to_state, "Duplicate tensor tag"
+            self._tensor_tag_to_state[tensor_tag] = tensor
+        else:
+            # Use negative group ID for special tensor types
+            tensor_tag = (-1, self.torch_tensor_count)
+            self.torch_tensor_count += 1
+            self._tensor_tag_to_state[tensor_tag] = tensor
+        debug_rank(f"--------tensor_push {tensor_tag}")
+        return tensor_tag
+
+    def tensor_pop(self, tensor_tag):
+        """Pop tensor from the offload handler."""
+        debug_rank(f"--------tensor_pop {tensor_tag}")
+        assert tensor_tag in self._tensor_tag_to_state, f"Tag {tensor_tag} not found"
+        tensor = self._tensor_tag_to_state.pop(tensor_tag)
+        # If tensor is offloaded (stored as tuple), reload it
+        if isinstance(tensor, tuple):
+            tensor = self.reload(tensor)
+        debug_rank(f"--------tensor_pop {tensor.shape}")
+        return tensor
+
+    def tensor_need_offloading_checker(self, tensor):
+        """Check if the tensor needs to be offloaded."""
+        if tensor.numel() < self.min_offloaded_tensor_size:
+            return False
+        # Respect tensor's offload preference if specified
+        if hasattr(tensor, "offloading_activation") and not tensor.offloading_activation:
+            return False
+        return True
+
+    def bulk_offload_group(self, group_to_offload):
+        """offload a group of tensors recorded in tensor_push()."""
+        debug_rank("------bulk_offload_group")
+        assert not self.is_first_last_layer(), "Should not offload first-last layer"
+        group_id_to_offload, name = group_to_offload
+        torch.cuda.nvtx.range_push("activation offloading " + name)
+        with torch.cuda.stream(self.d2h_stream):
+            for tensor_tag, state in self._tensor_tag_to_state.items():
+                group_id, _ = tensor_tag
+                if group_id == group_id_to_offload:
+                    debug_rank(f"------tensor_tag {tensor_tag}")
+                    debug_rank(f"------group_to_offload {group_to_offload}")
+                    assert not isinstance(state, tuple), "Tensor already offloaded"
+                    tensor_on_device = state
+                    if self.tensor_need_offloading_checker(tensor_on_device):
+                        state = self.offload(tensor_on_device)
+                        event = torch.cuda.Event()
+                        event.record(self.d2h_stream)
+                        self._offload_events[name] = event
+                        tensor_on_device.record_stream(self.d2h_stream)
+                        self._tensor_tag_to_state[tensor_tag] = state
+        torch.cuda.nvtx.range_pop()
+
+    def get_offload_event(self, name):
+        """Get the CUDA event for a named offload operation."""
+        return self._offload_events.get(name, None)
+
+    def get_reload_event(self, name):
+        """Get the CUDA event for a named reload operation."""
+        return self._reload_events.get(name, None)
+
+    def bulk_reload_group(self, group_to_reload):
+        """Bulk reload group."""
+        debug_rank("----bulk_reload_group")
+        found_reload_group = False
+        group_id_to_reload, name = group_to_reload
+        torch.cuda.nvtx.range_push("activation reloading " + name)
+        with torch.cuda.stream(self.h2d_stream):
+            for tensor_label, state in self._tensor_tag_to_state.items():
+                group_id, _ = tensor_label
+                if group_id == group_id_to_reload:
+                    debug_rank(f"----tensor_label {tensor_label}")
+                    found_reload_group = True
+                    event = self.get_offload_event(name)
+                    # Only reload if tensor was offloaded (stored as tuple)
+                    if isinstance(state, tuple):
+                        # Wait for offload to complete before reloading
+                        torch.cuda.current_stream().wait_event(event)
+                        recovered_tensor = self.reload(state)
+                        event.record(self.h2d_stream)
+                        self._reload_events[name] = event
+                        debug_rank(f"----recovered_tensor {recovered_tensor.shape}")
+                        self._tensor_tag_to_state[tensor_label] = recovered_tensor
+        torch.cuda.nvtx.range_pop()
+        return found_reload_group
+
+    def pre_reload_last_layer(self):
+        """Pre-reload the last layer of this chunk to hide reload latency."""
+        debug_rank("pre_reload_last_layer")
+        assert not self._is_first_last_vpp_chunk, "Should not pre-reload first chunk"
+        debug_rank(f"len(self._groups_to_reload) {len(self._groups_to_reload)}")
+        if len(self._groups_to_reload) > 0:
+            # Reload the last group (last layer) early
+            if self.bulk_reload_group(self._groups_to_reload[-1]):
+                self._groups_to_reload.pop()
+
+    def should_bulk_offload(self):
+        """Determine if the current group should be offloaded."""
+        # Don't offload the first backward chunk's last layer
+        if self.is_first_last_layer():
+            return False
+
+        # Check if next backward chunk is this chunk (for last pipeline stage)
+        next_backward_chunk = PipelineOffloadManager.get_instance().front()
+        if next_backward_chunk is not None and next_backward_chunk is self:
+            # Don't offload last layer if it's about to be used immediately
+            if self.is_last_layer:
+                return False
+
+        return True
+
+    def bulk_offload(self, forced_released_tensors):
+        """Offload a group of tensors and optionally release their GPU memory."""
+        debug_rank("----bulk_offload")
+        if self.should_bulk_offload():
+            group_to_offload = self._groups_to_offload.pop()
+            self._groups_to_reload.append(group_to_offload)
+            self.bulk_offload_group(group_to_offload)
+            # Manually release tensors not auto-freed by torch GC
+            if len(forced_released_tensors) > 0:
+                cur_stream = torch.cuda.current_stream()
+                for release_tensor in forced_released_tensors:
+                    if self.tensor_need_offloading_checker(release_tensor):
+                        # Ensure tensor is not in use before freeing
+                        release_tensor.record_stream(cur_stream)
+                        release_tensor.untyped_storage().resize_(0)
+
+    def on_group_commit_forward(self, forced_released_tensors):
+        """Called at the end of a layer group's forward pass to trigger offloading."""
+        debug_rank("--on_group_commit_forward")
+        # Wait for compute to finish before starting offload
+        self.d2h_stream.wait_stream(torch.cuda.current_stream())
+        self.bulk_offload(forced_released_tensors)
+
+    def bulk_reload(self):
+        """Reload the next group of tensors from CPU to GPU."""
+        debug_rank("--bulk_reload")
+        if len(self._groups_to_reload) > 0:
+            # Reload the next layer group
+            if self.bulk_reload_group(self._groups_to_reload[-1]):
+                debug_rank(f"--bulk_reload_group {self._groups_to_reload}")
+                self._groups_to_reload.pop()
+        else:
+            # Pre-load the last layer of the next backward chunk to hide latency
+            next_backward_chunk = PipelineOffloadManager.get_instance().front()
+            if next_backward_chunk is not None:
+                next_backward_chunk.pre_reload_last_layer()
+
+    def on_group_commit_backward(self, name):
+        """
+        Called at the end of a layer group's backward pass.
+        Ensures correct chunk is active and synchronizes reloads.
+        """
+        debug_rank("--on_group_commit_backward")
+        cur_backward_chunk = PipelineOffloadManager.get_instance().cur_backward_chunk()
+        # Switch to this chunk if it's not already current
+        if cur_backward_chunk is not self:
+            PipelineOffloadManager.get_instance().pop()
+        cur_backward_chunk = PipelineOffloadManager.get_instance().cur_backward_chunk()
+        assert cur_backward_chunk is self, "Chunk mismatch"
+        # Wait for reload to complete before using tensors
+        event = self.get_reload_event(name)
+        if event is not None:
+            torch.cuda.current_stream().wait_event(event)
+        self._offloaded_group_index = self._offloaded_group_index - 1
+
+    def on_group_start_forward(self, name):
+        """
+        Called at the start of a layer group's forward pass.
+        Increments group index and prepares for offloading.
+        """
+        debug_rank(f"--on_group_start_forward")
+        self._offloaded_group_index = self._offloaded_group_index + 1
+        self._tensor_count_current_group = 0
+        self._groups_to_offload.append((self._offloaded_group_index, name))
+
+    def on_group_start_backward(self):
+        """
+        Called at the start of a layer group's backward pass.
+        Triggers reloading of tensors from CPU.
+        """
+        debug_rank("--on_group_start_backward")
+        # Wait for compute to finish before starting reload
+        self.h2d_stream.wait_stream(torch.cuda.current_stream())
+        self.bulk_reload()
+
+
+class FineGrainedOffloadingGroupCommitFunction(torch.autograd.Function):
+    """
+    Identity operation that marks the end of a layer group for offload synchronization.
+    Triggers offload during forward and synchronizes reload during backward.
+    """
+
+    @staticmethod
+    def forward(ctx, *args):
+        # pylint: disable=missing-function-docstring
+        debug_rank("FineGrainedOffloadingGroupCommitFunction forward")
+
+        forced_released_tensors = args[-1]
+        name = args[-2]
+        cpu_offload_handler = args[-3]
+        tensor = args[:-3]
+        cpu_offload_handler.on_group_commit_forward(forced_released_tensors)
+        ctx.cpu_offload_handler = cpu_offload_handler
+        ctx.name = name
+
+        # return the identical tensor
+        return tensor
+
+    @staticmethod
+    def backward(ctx, *grad_output):
+        # pylint: disable=missing-function-docstring
+        debug_rank("FineGrainedOffloadingGroupCommitFunction backward")
+
+        cpu_offload_handler = ctx.cpu_offload_handler
+        cpu_offload_handler.on_group_commit_backward(ctx.name)
+        return grad_output + (None, None, None)
+
+
+def fine_grained_offloading_group_commit(*tensor, name, forced_released_tensors=[]):
+    """
+    Specify the tensors to be released after offloading.
+    forced_released_tensors is a list of tensors to be released after offloading.
+    The tensors will be untyped_storage().resize_(0) after offloading.
+    Note: specify the tensors only when they are not automatically released by torch gc.
+    """
+    cur_forward_chunk = PipelineOffloadManager.get_instance().cur_forward_chunk()
+    return FineGrainedOffloadingGroupCommitFunction.apply(
+        *tensor, cur_forward_chunk, name, forced_released_tensors
+    )
+
+
+class FineGrainedOffloadingGroupStartFunction(torch.autograd.Function):
+    """
+    Identity operation that marks the start of a layer group for offload/reload.
+    Prepares for offload during forward and triggers reload during backward.
+    """
+
+    @staticmethod
+    def forward(ctx, tensor, cpu_offload_handler, name):
+        # pylint: disable=missing-function-docstring
+        ctx.cpu_offload_handler = cpu_offload_handler
+        debug_rank("FineGrainedOffloadingGroupStartFunction forward")
+
+        cpu_offload_handler.on_group_start_forward(name)
+        # return the identical tensor
+        return tensor
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        # pylint: disable=missing-function-docstring
+        debug_rank("FineGrainedOffloadingGroupStartFunction backward")
+        cpu_offload_handler = ctx.cpu_offload_handler
+        cpu_offload_handler.on_group_start_backward()
+        return grad_output, None, None
+
+
+def fine_grained_offloading_group_start(tensor, name=None):
+    """Mark the start of a layer group and prepare for offload/reload."""
+    cur_forward_chunk = PipelineOffloadManager.get_instance().cur_forward_chunk()
+    return FineGrainedOffloadingGroupStartFunction.apply(tensor, cur_forward_chunk, name)
+
+
+def get_fine_grained_offloading_context(flag):
+    """Get the fine-grained offload context"""
+    return PipelineOffloadManager.get_instance() if flag else nullcontext()
+
+
+def fine_grained_offloading_set_last_layer(is_last_layer):
+    """Set the last layer flag."""
+    PipelineOffloadManager.get_instance().set_last_layer(is_last_layer)
+
+
+def fine_grained_offloading_init_chunk_handler(vp_stage, min_offloaded_tensor_size):
+    """Initialize the chunk handler, called at the start of a microbatch forward pass."""
+    PipelineOffloadManager.get_instance().init_model_chunk_offload_handler(
+        vp_stage, min_offloaded_tensor_size
+    )
+
+
+def fine_grained_offloading_reset():
+    """Reset the chunk handler, called at the start of a training iteration."""
+    PipelineOffloadManager.get_instance().reset()
diff --git a/megatron/core/pipeline_parallel/schedules.py b/megatron/core/pipeline_parallel/schedules.py
index e83f8d90635..09f95ac25d2 100644
--- a/megatron/core/pipeline_parallel/schedules.py
+++ b/megatron/core/pipeline_parallel/schedules.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 import contextlib
 from functools import partial
@@ -9,6 +9,9 @@
 
 from megatron.core import parallel_state
 from megatron.core.enums import ModelType
+from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
+    fine_grained_offloading_reset,
+)
 from megatron.core.pipeline_parallel.p2p_communication import P2PCommunicator
 from megatron.core.pipeline_parallel.utils import (
     is_pp_first_stage,
@@ -562,6 +565,9 @@ def forward_backward_no_pipelining(
     if config.timers is not None:
         config.timers('forward-backward', log_level=1).start(barrier=config.barrier_with_L1_time)
 
+    if not forward_only and config.fine_grained_activation_offloading:
+        fine_grained_offloading_reset()
+
     no_sync_func = config.no_sync_func
     if no_sync_func is None:
         no_sync_func = contextlib.nullcontext
@@ -898,6 +904,9 @@ def forward_backward_pipelining_with_interleaving(
         adjust_tensor_shapes_fn is None
     ), "adjust_tensor_shapes_fn is not supported for interleaved pipeline parallelism"
 
+    if not forward_only and config.fine_grained_activation_offloading:
+        fine_grained_offloading_reset()
+
     if config.overlap_p2p_comm and config.batch_p2p_comm:
         raise ValueError("Can not use both overlap_p2p_comm and batch_p2p_comm")
 
@@ -2043,6 +2052,9 @@ def forward_backward_pipelining_without_interleaving(
     if config.timers is not None:
         config.timers('forward-backward', log_level=1).start(barrier=config.barrier_with_L1_time)
 
+    if not forward_only and config.fine_grained_activation_offloading:
+        fine_grained_offloading_reset()
+
     # Disable async grad reductions
     no_sync_func = config.no_sync_func
     if no_sync_func is None:
diff --git a/megatron/core/tensor_parallel/random.py b/megatron/core/tensor_parallel/random.py
index 54cac0e41e3..2ae15bef0d9 100644
--- a/megatron/core/tensor_parallel/random.py
+++ b/megatron/core/tensor_parallel/random.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 # Parts of the code here are adapted from PyTorch
 # repo: https://github.com/pytorch/pytorch
@@ -510,10 +510,11 @@ def forward(ctx, run_function, checkpoint_without_output_obj, *args):
     @staticmethod
     def backward(ctx, *args):
         """Backward pass."""
-        inputs = ctx.saved_tensors
+        inputs = ctx.inputs
         outputs = ctx.outputs
         torch.autograd.backward(outputs, args)
         ctx.outputs = None
+        ctx.inputs = None
         grads = tuple(inp.grad if isinstance(inp, torch.Tensor) else inp for inp in inputs)
         return (None, None) + grads
 
@@ -573,8 +574,9 @@ def _recompute(self, _):
                 recompute_ctx = contextlib.nullcontext()
                 fp8_ctx = contextlib.nullcontext()
 
+            inputs = self.ctx.saved_tensors
             with torch.enable_grad(), fp8_ctx, recompute_ctx:
-                outputs = self.run_function(*self.ctx.saved_tensors)
+                outputs = self.run_function(*inputs)
 
         self.run_function = None
         self.rng_states = None
@@ -590,6 +592,7 @@ def _recompute(self, _):
                 output.untyped_storage().copy_(recomputation_output.untyped_storage())
 
         self.ctx.outputs = outputs
+        self.ctx.inputs = inputs
         self.outputs = None
         self.ctx = None
 
diff --git a/megatron/core/transformer/attention.py b/megatron/core/transformer/attention.py
index d4e990041ca..3427b5ee3ab 100644
--- a/megatron/core/transformer/attention.py
+++ b/megatron/core/transformer/attention.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
@@ -22,6 +22,11 @@
     get_tensor_model_parallel_rank,
     get_tensor_model_parallel_world_size,
 )
+from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
+    fine_grained_offloading_group_commit,
+    fine_grained_offloading_group_start,
+    get_fine_grained_offloading_context,
+)
 from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.transformer.identity_op import IdentityOp
 from megatron.core.transformer.module import MegatronModule
@@ -188,6 +193,21 @@ def __init__(
             and "core_attn" in self.config.recompute_modules
         )
 
+        self.offload_qkv_linear = (
+            self.config.fine_grained_activation_offloading
+            and "qkv_linear" in self.config.offload_modules
+        )
+
+        self.offload_core_attention = (
+            self.config.fine_grained_activation_offloading
+            and "core_attn" in self.config.offload_modules
+        )
+
+        self.offload_attn_proj = (
+            self.config.fine_grained_activation_offloading
+            and "attn_proj" in self.config.offload_modules
+        )
+
         # Output.
         self.linear_proj = build_module(
             submodules.linear_proj,
@@ -730,9 +750,17 @@ def forward(
         if output_gate:
             assert split_qkv, "output_gate is not supported for unsplit mixed_qkv tensor."
 
-        qkv_output = self.get_query_key_value_tensors(
-            hidden_states, key_value_states, output_gate=output_gate, split_qkv=split_qkv
-        )
+        if self.offload_qkv_linear:
+            hidden_states = fine_grained_offloading_group_start(hidden_states, name="qkv_linear")
+        with get_fine_grained_offloading_context(self.offload_qkv_linear):
+            qkv_output = self.get_query_key_value_tensors(
+                hidden_states, key_value_states, output_gate=output_gate, split_qkv=split_qkv
+            )
+        if self.offload_qkv_linear:
+            qkv_output, _ = fine_grained_offloading_group_commit(
+                qkv_output, name="qkv_linear", forced_released_tensors=[hidden_states]
+            )
+
         attn_mask_type = self.attn_mask_type
         block_table = None
         gate = None
@@ -881,17 +909,20 @@ def forward(
                 packed_seq_params=packed_seq_params,
             )
         else:
+            if self.offload_core_attention and self.training:
+                query = fine_grained_offloading_group_start(query, name="core_attn")
             if inference_context is None or inference_context.is_static_batching():
                 # Static batching attention kernel.
-                core_attn_out = self.core_attention(
-                    query,
-                    key,
-                    value,
-                    attention_mask,
-                    attn_mask_type=attn_mask_type,
-                    attention_bias=attention_bias,
-                    packed_seq_params=packed_seq_params,
-                )
+                with get_fine_grained_offloading_context(self.offload_core_attention):
+                    core_attn_out = self.core_attention(
+                        query,
+                        key,
+                        value,
+                        attention_mask,
+                        attn_mask_type=attn_mask_type,
+                        attention_bias=attention_bias,
+                        packed_seq_params=packed_seq_params,
+                    )
 
             else:
                 # Dynamic batching attention kernel.
@@ -911,6 +942,10 @@ def forward(
                     block_table,
                 )
                 core_attn_out = rearrange(core_attn_out, 's b h d -> s b (h d)')
+            if self.offload_core_attention and self.training:
+                (core_attn_out,) = fine_grained_offloading_group_commit(
+                    core_attn_out, name="core_attn", forced_released_tensors=[query, key, value]
+                )
 
         if packed_seq_params is not None and packed_seq_params.qkv_format == 'thd':
             # reshape to same output shape as unpacked case
@@ -931,7 +966,14 @@ def forward(
         # =================
 
         nvtx_range_push(suffix="linear_proj")
-        output, bias = self.linear_proj(core_attn_out)
+        if self.offload_attn_proj:
+            core_attn_out = fine_grained_offloading_group_start(core_attn_out, name="attn_proj")
+        with get_fine_grained_offloading_context(self.offload_attn_proj):
+            output, bias = self.linear_proj(core_attn_out)
+        if self.offload_attn_proj:
+            output, bias = fine_grained_offloading_group_commit(
+                output, bias, name="attn_proj", forced_released_tensors=[core_attn_out]
+            )
         nvtx_range_pop(suffix="linear_proj")
 
         return output, bias
diff --git a/megatron/core/transformer/moe/README.md b/megatron/core/transformer/moe/README.md
index 0a933aed0df..a44daea38e2 100644
--- a/megatron/core/transformer/moe/README.md
+++ b/megatron/core/transformer/moe/README.md
@@ -210,6 +210,20 @@ Enable A2A overlap across different batches inspired by the DSv3 DualPipe implme
 --delay-wgrad-compute
 ```
 
+### Fine-grained Activation Offloading (collaborated with rednote)
+Offload the input activation at the granularity of modules
+
+**Usage**
+```bash
+# Enable fine-grained activation offloading
+--fine-grained-activation-offloading
+
+# Specify which modules are going to offload its input
+# Choices: "attn_norm", "core_attn", "attn_proj", "mlp_norm", "expert_fc1", "moe_act".
+--offload-modules expert_fc1
+```
+For more details, please refer to the ```docs/source/api-guide/fine_grained_activation_offloading.md```
+
 ### MoE Related Arguments
 | Item | Description |
 | --- | --- |
diff --git a/megatron/core/transformer/moe/experts.py b/megatron/core/transformer/moe/experts.py
index d0ac20a7536..ca308da0d21 100644
--- a/megatron/core/transformer/moe/experts.py
+++ b/megatron/core/transformer/moe/experts.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 import copy
 import itertools
@@ -27,6 +27,11 @@
 from megatron.core.fusions.fused_bias_swiglu import weighted_bias_swiglu_impl
 from megatron.core.fusions.fused_weighted_squared_relu import weighted_squared_relu_impl
 from megatron.core.jit import jit_fuser
+from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
+    fine_grained_offloading_group_commit,
+    fine_grained_offloading_group_start,
+    get_fine_grained_offloading_context,
+)
 from megatron.core.tensor_parallel.layers import (
     _initialize_affine_weight_cpu,
     _initialize_affine_weight_gpu,
@@ -825,6 +830,16 @@ def __init__(
             tp_group=pg_collection.expt_tp,
         )
 
+        self.offload_expert_fc1 = (
+            self.config.fine_grained_activation_offloading
+            and "expert_fc1" in self.config.offload_modules
+        )
+
+        self.offload_moe_act = (
+            self.config.fine_grained_activation_offloading
+            and "moe_act" in self.config.offload_modules
+        )
+
         self.activation_recompute = (
             self.config.recompute_granularity == 'selective'
             and "moe_act" in self.config.recompute_modules
@@ -834,6 +849,12 @@ def __init__(
 
             set_save_original_input(self.linear_fc2)
 
+        # This is to avoid the CPU overhead of multiple d2h copies
+        if self.offload_expert_fc1 and not (self.config.fp8 or self.config.fp4):
+            from megatron.core.extensions.transformer_engine import set_save_original_input
+
+            set_save_original_input(self.linear_fc1)
+
         if self.config.fp8 or self.config.fp4:
             assert HAVE_TE, "FP8 and FP4 requires TE."
             self.quantization_padding = Fp8Padding(self.num_local_experts)
@@ -898,9 +919,21 @@ def forward(
             # Probs already applied, so reset to 1.
             permuted_probs = torch.ones_like(permuted_probs)
 
-        intermediate_parallel, bias_parallel = self.linear_fc1(
-            permuted_local_hidden_states, tokens_per_expert
-        )
+        if self.offload_expert_fc1:
+            permuted_local_hidden_states = fine_grained_offloading_group_start(
+                permuted_local_hidden_states, name="expert_fc1"
+            )
+        with get_fine_grained_offloading_context(self.offload_expert_fc1):
+            fc1_output, bias_parallel = self.linear_fc1(
+                permuted_local_hidden_states, tokens_per_expert
+            )
+        if self.offload_expert_fc1:
+            fc1_output, bias_parallel = fine_grained_offloading_group_commit(
+                fc1_output,
+                bias_parallel,
+                name="expert_fc1",
+                forced_released_tensors=[permuted_local_hidden_states],
+            )
 
         def bias_act_func(intermediate_parallel, bias_parallel, permuted_probs):
             if self.config.use_te_activation_func:
@@ -960,18 +993,26 @@ def glu(x):
                 intermediate_parallel = intermediate_parallel.to(original_dtype)
             return intermediate_parallel
 
+        if self.offload_moe_act:
+            fc1_output = fine_grained_offloading_group_start(fc1_output, name="moe_act")
+
         if self.activation_recompute:
             self.activation_checkpoint = tensor_parallel.CheckpointWithoutOutput()
-            intermediate_parallel = self.activation_checkpoint.checkpoint(
-                bias_act_func, intermediate_parallel, bias_parallel, permuted_probs
-            )
-            output, output_bias = self.linear_fc2(intermediate_parallel, tokens_per_expert)
-            self.activation_checkpoint.discard_output_and_register_recompute(output)
+            with get_fine_grained_offloading_context(self.offload_moe_act):
+                bias_act_output = self.activation_checkpoint.checkpoint(
+                    bias_act_func, fc1_output, bias_parallel, permuted_probs
+                )
         else:
-            intermediate_parallel = bias_act_func(
-                intermediate_parallel, bias_parallel, permuted_probs
+            with get_fine_grained_offloading_context(self.offload_moe_act):
+                bias_act_output = bias_act_func(fc1_output, bias_parallel, permuted_probs)
+
+        output, output_bias = self.linear_fc2(bias_act_output, tokens_per_expert)
+        if self.activation_recompute:
+            self.activation_checkpoint.discard_output_and_register_recompute(output)
+        if self.offload_moe_act:
+            (output,) = fine_grained_offloading_group_commit(
+                output, name="moe_act", forced_released_tensors=[fc1_output]
             )
-            output, output_bias = self.linear_fc2(intermediate_parallel, tokens_per_expert)
 
         # upad and concat the output
         if self.config.fp8 or self.config.fp4:
diff --git a/megatron/core/transformer/multi_latent_attention.py b/megatron/core/transformer/multi_latent_attention.py
index a8893ebec36..5d3f16c1041 100644
--- a/megatron/core/transformer/multi_latent_attention.py
+++ b/megatron/core/transformer/multi_latent_attention.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 
 import math
@@ -22,6 +22,11 @@
     _yarn_get_mscale,
     apply_rotary_pos_emb,
 )
+from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
+    fine_grained_offloading_group_commit,
+    fine_grained_offloading_group_start,
+    get_fine_grained_offloading_context,
+)
 from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.tensor_parallel.layers import ColumnParallelLinear
 from megatron.core.tensor_parallel.mappings import (
@@ -266,15 +271,19 @@ def forward(
                 query, key, value, attention_mask, packed_seq_params=packed_seq_params
             )
         else:
+            if self.offload_core_attention and self.training:
+                query = fine_grained_offloading_group_start(query, name="core_attn")
+
             if inference_context is None or inference_context.is_static_batching():
-                core_attn_out = self.core_attention(
-                    query,
-                    key,
-                    value,
-                    attention_mask,
-                    packed_seq_params=packed_seq_params,
-                    attn_mask_type=attn_mask_type,
-                )
+                with get_fine_grained_offloading_context(self.offload_core_attention):
+                    core_attn_out = self.core_attention(
+                        query,
+                        key,
+                        value,
+                        attention_mask,
+                        packed_seq_params=packed_seq_params,
+                        attn_mask_type=attn_mask_type,
+                    )
             elif self.cache_mla_latents:
                 # Dynamic batching attention kernel.
                 q, k, v = (query, key, value)
@@ -295,6 +304,10 @@ def forward(
                 # Only rearrange if not in absorption mode (Flash MLA handles format correctly)
                 if not inference_context.is_decode_only():
                     core_attn_out = rearrange(core_attn_out, 's b h d -> s b (h d)')
+            if self.offload_core_attention and self.training:
+                (core_attn_out,) = fine_grained_offloading_group_commit(
+                    core_attn_out, name="core_attn", forced_released_tensors=[query, key, value]
+                )
 
         # We are doing absorption with cache mla latents and decode mode.
         if self.cache_mla_latents and inference_context.is_decode_only():
@@ -320,7 +333,14 @@ def forward(
         # =================
         # Output. [sq, b, h]
         # =================
-        output, bias = self.linear_proj(core_attn_out)
+        if self.offload_attn_proj:
+            core_attn_out = fine_grained_offloading_group_start(core_attn_out, name="attn_proj")
+        with get_fine_grained_offloading_context(self.offload_attn_proj):
+            output, bias = self.linear_proj(core_attn_out)
+        if self.offload_attn_proj:
+            output, bias = fine_grained_offloading_group_commit(
+                output, bias, name="attn_proj", forced_released_tensors=[core_attn_out]
+            )
 
         return output, bias
 
diff --git a/megatron/core/transformer/multi_token_prediction.py b/megatron/core/transformer/multi_token_prediction.py
index bd3aa9c8c96..a619b9ffa55 100755
--- a/megatron/core/transformer/multi_token_prediction.py
+++ b/megatron/core/transformer/multi_token_prediction.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 from contextlib import nullcontext
 from dataclasses import dataclass
@@ -13,6 +13,9 @@
 from megatron.core.fp8_utils import get_fp8_context
 from megatron.core.models.backends import BackendSpecProvider, LocalSpecProvider
 from megatron.core.packed_seq_params import PackedSeqParams
+from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
+    fine_grained_offloading_set_last_layer,
+)
 from megatron.core.pipeline_parallel.utils import is_vp_last_stage
 from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.tensor_parallel import (
@@ -901,6 +904,8 @@ def forward(
         hidden_states_list = list(torch.chunk(hidden_states, 1 + offset, dim=0))
         hidden_states = hidden_states_list[offset]
         for layer_number in range(len(self.layers)):
+            if self.config.fine_grained_activation_offloading:
+                fine_grained_offloading_set_last_layer(layer_number == len(self.layers) - 1)
             (hidden_states, input_ids, position_ids) = self.layers[layer_number](
                 input_ids=input_ids,
                 position_ids=position_ids,
diff --git a/megatron/core/transformer/transformer_block.py b/megatron/core/transformer/transformer_block.py
index aead6133f22..06e8f1372f4 100755
--- a/megatron/core/transformer/transformer_block.py
+++ b/megatron/core/transformer/transformer_block.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 import logging
 from contextlib import nullcontext
 from dataclasses import dataclass
@@ -16,6 +16,9 @@
 from megatron.core.fusions.fused_layer_norm import FusedLayerNorm
 from megatron.core.inference.contexts import BaseInferenceContext
 from megatron.core.packed_seq_params import PackedSeqParams
+from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
+    fine_grained_offloading_set_last_layer,
+)
 from megatron.core.pipeline_parallel.utils import is_vp_first_stage, is_vp_last_stage
 from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.transformer.enums import LayerType
@@ -693,6 +696,11 @@ def forward(
                     else:
                         inner_quantization_context = nullcontext()
 
+                    if self.config.fine_grained_activation_offloading:
+                        fine_grained_offloading_set_last_layer(
+                            l_no == self.num_layers_per_pipeline_rank - 1
+                        )
+
                     with self.offload_context, inner_quantization_context:
                         hidden_states, context = layer(
                             hidden_states=hidden_states,
diff --git a/megatron/core/transformer/transformer_config.py b/megatron/core/transformer/transformer_config.py
index b39b7706feb..ecc700375cd 100644
--- a/megatron/core/transformer/transformer_config.py
+++ b/megatron/core/transformer/transformer_config.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 import warnings
 from dataclasses import dataclass
@@ -772,6 +772,25 @@ class TransformerConfig(ModelParallelConfig):
     """Transformer implementation to use.
     Options are 'transformer_engine' for Transformer Engine and 'local' for MCore."""
 
+    #####################################
+    # Fine-grained Activation Offloading
+    #####################################
+    fine_grained_activation_offloading: bool = False
+    """If True, offload the input of the specified modules to the CPU."""
+
+    offload_modules: Optional[list[str]] = None
+    """The submodules to offload its input.
+    choices: "attn_norm", "core_attn", "attn_proj", "mlp_norm", "expert_fc1", "moe_act".
+    "attn_norm": offload the input of the normalization in the attention part.
+    "core_attn": offload the input of the core attention part.
+    "mlp_norm": offload the input of the normalization in the mlp part.
+    "attn_proj": offload the input of the attn linear projection part.
+    "expert_fc1": offload the input of the expert fc1 part.
+    "moe_act": offload the input of the moe act part.
+    """
+    min_offloaded_tensor_size: int = 1024 * 1024
+    """The minimum size of the tensor to be offloaded."""
+
     def __post_init__(self):
         """Python dataclass method that is used to modify attributes after initialization.
         See https://docs.python.org/3/library/dataclasses.html#post-init-processing for more
@@ -1117,6 +1136,28 @@ def __post_init__(self):
             if "moe" not in self.recompute_modules:
                 self.recompute_modules.append("moe")
 
+        if self.fine_grained_activation_offloading:
+            assert self.offload_modules is not None and len(self.offload_modules) > 0
+            allowed_modules = {
+                "core_attn",
+                "attn_proj",
+                "expert_fc1",
+                "moe_act",
+                "attn_norm",
+                "mlp_norm",
+            }
+            invalid_modules = set(self.offload_modules) - allowed_modules
+            assert not invalid_modules, (
+                f'Invalid choices for offload_modules: {invalid_modules}. '
+                f'Allowed modules are: {allowed_modules}'
+            )
+            if "attn_proj" in self.offload_modules and "core_attn" not in self.offload_modules:
+                raise ValueError(
+                    "attn_proj cannot be set to offload_modules alone without core_attn "
+                    "because the input of attn_proj is the output of core_attn, "
+                    "which is needed in core_attn.backward()."
+                )
+
         if (
             self.num_layers_in_first_pipeline_stage is not None
             or self.num_layers_in_last_pipeline_stage is not None
diff --git a/megatron/core/transformer/transformer_layer.py b/megatron/core/transformer/transformer_layer.py
index a5babece9d0..c36ff7515e4 100644
--- a/megatron/core/transformer/transformer_layer.py
+++ b/megatron/core/transformer/transformer_layer.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 import logging
 import warnings
@@ -397,6 +397,16 @@ def __init__(
             if "mlp" in self.config.recompute_modules:
                 if not isinstance(self.mlp, MoELayer):
                     self.recompute_mlp = True
+        self.offload_attn_norm = (
+            self.config.fine_grained_activation_offloading
+            and "attn_norm" in self.config.offload_modules
+            and not isinstance(self.input_layernorm, IdentityOp)
+        )
+        self.offload_mlp_norm = (
+            self.config.fine_grained_activation_offloading
+            and "mlp_norm" in self.config.offload_modules
+            and not isinstance(self.pre_mlp_layernorm, IdentityOp)
+        )
 
         # @jcasper how should we handle nvfuser?
         # Set bias+dropout+add fusion grad_enable execution handler.
@@ -479,20 +489,29 @@ def _forward_attention(
                 context (Tensor): Updated context tensor if cross-attention is used,
                 otherwise None.
         """
+        from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
+            fine_grained_offloading_group_commit,
+            fine_grained_offloading_group_start,
+            get_fine_grained_offloading_context,
+        )
 
         inference_context = deprecate_inference_params(inference_context, inference_params)
 
         # Residual connection.
         residual = hidden_states
 
+        if self.offload_attn_norm:
+            hidden_states = fine_grained_offloading_group_start(hidden_states, name="attn_norm")
         # Optional Input Layer norm
         if self.recompute_input_layernorm:
             self.input_layernorm_checkpoint = tensor_parallel.CheckpointWithoutOutput()
-            input_layernorm_output = self.input_layernorm_checkpoint.checkpoint(
-                self.input_layernorm, hidden_states
-            )
+            with get_fine_grained_offloading_context(self.offload_attn_norm):
+                input_layernorm_output = self.input_layernorm_checkpoint.checkpoint(
+                    self.input_layernorm, hidden_states
+                )
         else:
-            input_layernorm_output = self.input_layernorm(hidden_states)
+            with get_fine_grained_offloading_context(self.offload_attn_norm):
+                input_layernorm_output = self.input_layernorm(hidden_states)
 
         # Self attention.
         nvtx_range_push(suffix="self_attention")
@@ -526,6 +545,11 @@ def _forward_attention(
             )
         nvtx_range_pop(suffix="self_attn_bda")
 
+        if self.offload_attn_norm:
+            (hidden_states,) = fine_grained_offloading_group_commit(
+                hidden_states, name="attn_norm", forced_released_tensors=[residual]
+            )
+
         # Residual connection.
         residual = hidden_states
 
@@ -563,17 +587,27 @@ def _forward_mlp(self, hidden_states, inference_context=None):
             output (Tensor): Transformed hidden states of shape [s, b, h].
         """
 
+        from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
+            fine_grained_offloading_group_commit,
+            fine_grained_offloading_group_start,
+            get_fine_grained_offloading_context,
+        )
+
         # Residual connection.
         residual = hidden_states
 
+        if self.offload_mlp_norm:
+            hidden_states = fine_grained_offloading_group_start(hidden_states, name="mlp_norm")
         # Optional Layer norm post the cross-attention.
         if self.recompute_pre_mlp_layernorm:
             self.pre_mlp_norm_checkpoint = tensor_parallel.CheckpointWithoutOutput()
-            pre_mlp_layernorm_output = self.pre_mlp_norm_checkpoint.checkpoint(
-                self.pre_mlp_layernorm, hidden_states
-            )
+            with get_fine_grained_offloading_context(self.offload_mlp_norm):
+                pre_mlp_layernorm_output = self.pre_mlp_norm_checkpoint.checkpoint(
+                    self.pre_mlp_layernorm, hidden_states
+                )
         else:
-            pre_mlp_layernorm_output = self.pre_mlp_layernorm(hidden_states)
+            with get_fine_grained_offloading_context(self.offload_mlp_norm):
+                pre_mlp_layernorm_output = self.pre_mlp_layernorm(hidden_states)
 
         nvtx_range_push(suffix="mlp")
         # Potentially chunk the MLP computation during prefill to minimize the peak activation size
@@ -633,6 +667,10 @@ def _forward_mlp(self, hidden_states, inference_context=None):
                 mlp_output_with_bias, residual, self.hidden_dropout
             )
         nvtx_range_pop(suffix="mlp_bda")
+        if self.offload_mlp_norm:
+            (hidden_states,) = fine_grained_offloading_group_commit(
+                hidden_states, name="mlp_norm", forced_released_tensors=[residual]
+            )
 
         # Jit compiled function creates 'view' tensor. This tensor
         # potentially gets saved in the MPU checkpoint function context,
diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py
index bdf915a8ae1..8e5f343b73c 100644
--- a/megatron/training/arguments.py
+++ b/megatron/training/arguments.py
@@ -1216,6 +1216,10 @@ def validate_args(args, defaults={}):
                 "when enabling delay_wgrad_compute"
             )
 
+    if args.fine_grained_activation_offloading:
+        assert args.transformer_impl == 'transformer_engine', \
+            "Fine-grained activation offloading is only supported with transformer_engine implementation"
+
     if args.mtp_num_layers:
         assert not args.use_legacy_models, "The legacy Megatron models does not support Multi-Token Prediction (MTP)."
         assert args.position_embedding_type == "rope" or args.position_embedding_type == "none", (
@@ -2327,7 +2331,12 @@ def _add_training_args(parser):
                        help='The communicator group names to use high priority streams.')
     group.add_argument('--use-te-activation-func', action='store_true',
                        help='Use activation function kernel from Transformer Engine in MLP module.')
-
+    group.add_argument('--fine-grained-activation-offloading', action='store_true',
+                       help='Enable fine-grained activation offloading.')
+    group.add_argument('--offload-modules', nargs='*', type=str, default=[],
+                       help='The submodules to offload its input. Choices: "attn_norm", "core_attn", "attn_proj", "mlp_norm", "expert_fc1", "moe_act".')
+    group.add_argument('--min-offloaded-tensor-size', type=int, default=1024*1024,
+                       help='The minimum size of the tensor to be offloaded.')
     return parser
 
 
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_coreweave.json
new file mode 100644
index 00000000000..30ea509a50b
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_coreweave.json
@@ -0,0 +1,110 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 5,
+        "values": {
+            "1": 11.0637,
+            "5": 9.48263,
+            "10": 9.04035,
+            "15": 8.00837,
+            "20": 7.88364,
+            "25": 7.67597,
+            "30": 7.63447,
+            "35": 7.21393,
+            "40": 7.55564,
+            "45": 7.21045,
+            "50": 7.05439
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 5,
+        "values": {
+            "1": 38802064.0,
+            "5": 394456256.0,
+            "10": 571185472.0,
+            "15": 699100416.0,
+            "20": 891692160.0,
+            "25": 748799104.0,
+            "30": 794511296.0,
+            "35": 671593792.0,
+            "40": 421718816.0,
+            "45": 517934176.0,
+            "50": 472902496.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 5,
+        "values": {
+            "1": 6025468416.0,
+            "5": 6025470464.0,
+            "10": 6025470464.0,
+            "15": 6025470464.0,
+            "20": 6025470464.0,
+            "25": 6025470464.0,
+            "30": 6025470464.0,
+            "35": 6025470464.0,
+            "40": 6025470464.0,
+            "45": 6025470464.0,
+            "50": 6025470464.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 5,
+        "values": {
+            "1": 45099868160.0,
+            "5": 49175810048.0,
+            "10": 49175810048.0,
+            "15": 49175810048.0,
+            "20": 49175810048.0,
+            "25": 49175810048.0,
+            "30": 49211260928.0,
+            "35": 49211260928.0,
+            "40": 49211260928.0,
+            "45": 49211260928.0,
+            "50": 49211260928.0
+        }
+    },
+    "mtp_1 loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 5,
+        "values": {
+            "1": 11.04508,
+            "5": 9.76285,
+            "10": 9.04997,
+            "15": 7.93865,
+            "20": 7.79984,
+            "25": 7.60324,
+            "30": 7.56633,
+            "35": 7.13802,
+            "40": 7.45784,
+            "45": 7.11892,
+            "50": 6.9559
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 5,
+        "values": {
+            "1": 52.8667,
+            "5": 2.06295,
+            "10": 1.09336,
+            "15": 1.10509,
+            "20": 1.08631,
+            "25": 1.08991,
+            "30": 1.10548,
+            "35": 1.10049,
+            "40": 1.11219,
+            "45": 1.09542,
+            "50": 1.09805
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_eos.json b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_eos.json
new file mode 100644
index 00000000000..30ea509a50b
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_eos.json
@@ -0,0 +1,110 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 5,
+        "values": {
+            "1": 11.0637,
+            "5": 9.48263,
+            "10": 9.04035,
+            "15": 8.00837,
+            "20": 7.88364,
+            "25": 7.67597,
+            "30": 7.63447,
+            "35": 7.21393,
+            "40": 7.55564,
+            "45": 7.21045,
+            "50": 7.05439
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 5,
+        "values": {
+            "1": 38802064.0,
+            "5": 394456256.0,
+            "10": 571185472.0,
+            "15": 699100416.0,
+            "20": 891692160.0,
+            "25": 748799104.0,
+            "30": 794511296.0,
+            "35": 671593792.0,
+            "40": 421718816.0,
+            "45": 517934176.0,
+            "50": 472902496.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 5,
+        "values": {
+            "1": 6025468416.0,
+            "5": 6025470464.0,
+            "10": 6025470464.0,
+            "15": 6025470464.0,
+            "20": 6025470464.0,
+            "25": 6025470464.0,
+            "30": 6025470464.0,
+            "35": 6025470464.0,
+            "40": 6025470464.0,
+            "45": 6025470464.0,
+            "50": 6025470464.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 5,
+        "values": {
+            "1": 45099868160.0,
+            "5": 49175810048.0,
+            "10": 49175810048.0,
+            "15": 49175810048.0,
+            "20": 49175810048.0,
+            "25": 49175810048.0,
+            "30": 49211260928.0,
+            "35": 49211260928.0,
+            "40": 49211260928.0,
+            "45": 49211260928.0,
+            "50": 49211260928.0
+        }
+    },
+    "mtp_1 loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 5,
+        "values": {
+            "1": 11.04508,
+            "5": 9.76285,
+            "10": 9.04997,
+            "15": 7.93865,
+            "20": 7.79984,
+            "25": 7.60324,
+            "30": 7.56633,
+            "35": 7.13802,
+            "40": 7.45784,
+            "45": 7.11892,
+            "50": 6.9559
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 5,
+        "values": {
+            "1": 52.8667,
+            "5": 2.06295,
+            "10": 1.09336,
+            "15": 1.10509,
+            "20": 1.08631,
+            "25": 1.08991,
+            "30": 1.10548,
+            "35": 1.10049,
+            "40": 1.11219,
+            "45": 1.09542,
+            "50": 1.09805
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/model_config.yaml
new file mode 100644
index 00000000000..9a125a1cf74
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/model_config.yaml
@@ -0,0 +1,139 @@
+ENV_VARS:
+  CUDA_DEVICE_MAX_CONNECTIONS: 32
+  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
+  PYTORCH_CUDA_ALLOC_CONF: expandable_segments:True
+  NCCL_NVLS_ENABLE: 0
+  PYTHONWARNINGS: ignore
+  NCCL_DEBUG: VERSION
+MODEL_ARGS:
+  # Distributed args
+  --distributed-timeout-minutes: 60
+  --tensor-model-parallel-size: 2
+  --pipeline-model-parallel-size: 2
+  --expert-model-parallel-size: 4
+  --context-parallel-size: 1
+  --expert-tensor-parallel-size: 1
+  --use-distributed-optimizer: true
+  # NOTE: uncomment if TE >= 2.9.0
+  # --overlap-grad-reduce: true
+  # --overlap-param-gather: true
+  # Use unfused attention since MLA with fused attention and deterministic mode leads to NaN
+  --attention-backend: unfused # TODO: switch back to fused attention after fix
+  # Training args
+  --use-mcore-models: true
+  --sequence-parallel: true
+  --disable-bias-linear: true
+  --micro-batch-size: 4
+  --global-batch-size: 32
+  --train-iters: 50
+  --exit-duration-in-mins: 230
+  --no-check-for-nan-in-loss-and-grad: true
+  --no-rope-fusion: true
+  --cross-entropy-loss-fusion: true
+  --cross-entropy-fusion-impl: native
+  --manual-gc: true
+  --manual-gc-interval: 100
+  --recompute-granularity: selective
+  --recompute-modules: "[layernorm mla_up_proj mlp moe_act]"
+  --fine-grained-activation-offloading: true
+  --offload-modules: "[expert_fc1 moe_act attn_norm mlp_norm]"
+  # Transformer Engine args
+  --transformer-impl: transformer_engine
+  # Data args
+  --seq-length: 4096
+  --data-cache-path: ${DATA_CACHE_PATH}
+  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --vocab-file: ${DATA_PATH}/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --split: 949,50,1
+  # Add network size args
+  --num-layers: 15
+  --moe-layer-freq: ([0]*3+[1]*12)
+  --pipeline-model-parallel-layout: Et*3\\|\\(tt\\|\\)*6mL # Et*3|(tt|)*6mL
+  --hidden-size: 1024
+  --ffn-hidden-size: 4096
+  --num-attention-heads: 32
+  --kv-channels: 128
+  --max-position-embeddings: 4096
+  --position-embedding-type: rope
+  --rotary-base: 10000
+  --make-vocab-size-divisible-by: 3232
+  --normalization: RMSNorm
+  --norm-epsilon: 1e-6
+  --swiglu: true
+  --untie-embeddings-and-output-weights: true
+  --multi-latent-attention: true
+  # Comment out the following MTP args to disable MTP
+  --mtp-num-layers: 1
+  --mtp-loss-scaling-factor: 0.1
+  # Add regularization args
+  --attention-dropout: 0.0
+  --hidden-dropout: 0.0
+  --clip-grad: 1.0
+  --weight-decay: 0.1
+  --qk-layernorm: true
+  # Add learning rate args
+  --lr-warmup-fraction: .01
+  --lr: 0.00015
+  --min-lr: 1.0e-5
+  --lr-decay-style: cosine
+  --adam-beta1: 0.9
+  --adam-beta2: 0.95
+  # Add MoE args
+  --num-experts: 32
+  --moe-ffn-hidden-size: 1024
+  --moe-shared-expert-intermediate-size: 1024
+  --moe-router-load-balancing-type: seq_aux_loss
+  --moe-router-topk: 4
+  --moe-token-dispatcher-type: alltoall
+  --moe-router-pre-softmax: true
+  --moe-grouped-gemm: true
+  --moe-aux-loss-coeff: 1e-4
+  --moe-router-group-topk: 2
+  --moe-router-num-groups: 4
+  --moe-router-topk-scaling-factor: 2.0
+  --moe-router-score-function: sigmoid
+  --moe-router-enable-expert-bias: true
+  --moe-router-bias-update-rate: 1e-3
+  --moe-router-dtype: fp32
+  --moe-permute-fusion: true
+  # Add MLA args
+  --q-lora-rank: 1536
+  --kv-lora-rank: 512
+  --qk-head-dim: 128
+  --qk-pos-emb-head-dim: 64
+  --v-head-dim: 128
+  --rotary-scaling-factor: 40
+  --mscale: 1.0
+  --mscale-all-dim: 1.0
+  # Add validation args
+  --eval-iters: 32
+  --eval-interval: 200
+  # Add checkpointing args
+  --save: ${CHECKPOINT_SAVE_PATH}
+  --load: ${CHECKPOINT_LOAD_PATH}
+  --save-interval: 25
+  # Add initialization args
+  --init-method-std: 0.02
+  # Add logging args
+  --log-timers-to-tensorboard: true
+  --log-memory-to-tensorboard: true
+  --log-num-zeros-in-grad: true
+  --log-params-norm: true
+  --log-validation-ppl-to-tensorboard: true
+  --log-throughput: true
+  --log-interval: 1
+  --logging-level: 40
+  --tensorboard-dir: ${TENSORBOARD_PATH}
+  # Add mixed precision args
+  --bf16: true
+  --exit-interval: 50
+  --overlap-moe-expert-parallel-comm: true
+TEST_TYPE: regular # Usually ckpt-resume, but as a WAR to #513 set to regular
+METRICS:
+  - "iteration-time"
+  - "lm loss"
+  - "num-zeros"
+  - "mem-allocated-bytes"
+  - "mem-max-allocated-bytes"
+  - "mtp_1 loss"
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_coreweave.json
new file mode 100644
index 00000000000..3687e19e563
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_coreweave.json
@@ -0,0 +1,92 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 5,
+        "values": {
+            "1": 11.04266,
+            "5": 9.38536,
+            "10": 8.82761,
+            "15": 7.86966,
+            "20": 7.72022,
+            "25": 7.53119,
+            "30": 7.5026,
+            "35": 7.10343,
+            "40": 7.42037,
+            "45": 7.07056,
+            "50": 6.90946
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 5,
+        "values": {
+            "1": 844114112.0,
+            "5": 856834688.0,
+            "10": 928751040.0,
+            "15": 952825152.0,
+            "20": 987111232.0,
+            "25": 926008384.0,
+            "30": 864767232.0,
+            "35": 855095360.0,
+            "40": 849505920.0,
+            "45": 847187584.0,
+            "50": 846195840.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 5,
+        "values": {
+            "1": 4419107328.0,
+            "5": 4419108864.0,
+            "10": 4419108864.0,
+            "15": 4419108864.0,
+            "20": 4419108864.0,
+            "25": 4419108864.0,
+            "30": 4419108864.0,
+            "35": 4419108864.0,
+            "40": 4419108864.0,
+            "45": 4419108864.0,
+            "50": 4419108864.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 5,
+        "values": {
+            "1": 37959917568.0,
+            "5": 39583289344.0,
+            "10": 39583289344.0,
+            "15": 39583289344.0,
+            "20": 39583289344.0,
+            "25": 39583289344.0,
+            "30": 39583289344.0,
+            "35": 39583289344.0,
+            "40": 39583289344.0,
+            "45": 39583289344.0,
+            "50": 39583289344.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 5,
+        "values": {
+            "1": 58.78709,
+            "5": 2.40565,
+            "10": 1.13046,
+            "15": 1.39764,
+            "20": 1.1273,
+            "25": 1.12154,
+            "30": 1.03587,
+            "35": 1.09545,
+            "40": 1.09901,
+            "45": 1.00656,
+            "50": 1.00794
+        }
+    }
+}
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_eos.json b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_eos.json
new file mode 100644
index 00000000000..3687e19e563
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_eos.json
@@ -0,0 +1,92 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 5,
+        "values": {
+            "1": 11.04266,
+            "5": 9.38536,
+            "10": 8.82761,
+            "15": 7.86966,
+            "20": 7.72022,
+            "25": 7.53119,
+            "30": 7.5026,
+            "35": 7.10343,
+            "40": 7.42037,
+            "45": 7.07056,
+            "50": 6.90946
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 5,
+        "values": {
+            "1": 844114112.0,
+            "5": 856834688.0,
+            "10": 928751040.0,
+            "15": 952825152.0,
+            "20": 987111232.0,
+            "25": 926008384.0,
+            "30": 864767232.0,
+            "35": 855095360.0,
+            "40": 849505920.0,
+            "45": 847187584.0,
+            "50": 846195840.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 5,
+        "values": {
+            "1": 4419107328.0,
+            "5": 4419108864.0,
+            "10": 4419108864.0,
+            "15": 4419108864.0,
+            "20": 4419108864.0,
+            "25": 4419108864.0,
+            "30": 4419108864.0,
+            "35": 4419108864.0,
+            "40": 4419108864.0,
+            "45": 4419108864.0,
+            "50": 4419108864.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 5,
+        "values": {
+            "1": 37959917568.0,
+            "5": 39583289344.0,
+            "10": 39583289344.0,
+            "15": 39583289344.0,
+            "20": 39583289344.0,
+            "25": 39583289344.0,
+            "30": 39583289344.0,
+            "35": 39583289344.0,
+            "40": 39583289344.0,
+            "45": 39583289344.0,
+            "50": 39583289344.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 5,
+        "values": {
+            "1": 58.78709,
+            "5": 2.40565,
+            "10": 1.13046,
+            "15": 1.39764,
+            "20": 1.1273,
+            "25": 1.12154,
+            "30": 1.03587,
+            "35": 1.09545,
+            "40": 1.09901,
+            "45": 1.00656,
+            "50": 1.00794
+        }
+    }
+}
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/model_config.yaml
new file mode 100644
index 00000000000..8832d687004
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/model_config.yaml
@@ -0,0 +1,134 @@
+ENV_VARS:
+  CUDA_DEVICE_MAX_CONNECTIONS: 1
+  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
+  PYTORCH_CUDA_ALLOC_CONF: expandable_segments:True
+  NCCL_NVLS_ENABLE: 0
+  PYTHONWARNINGS: ignore
+  NCCL_DEBUG: VERSION
+MODEL_ARGS:
+  # Distributed args
+  --distributed-timeout-minutes: 60
+  --tensor-model-parallel-size: 2
+  --pipeline-model-parallel-size: 2
+  --expert-model-parallel-size: 4
+  --context-parallel-size: 1
+  --expert-tensor-parallel-size: 1
+  --use-distributed-optimizer: true
+  # NOTE: uncomment if TE >= 2.9.0
+  # --overlap-grad-reduce: true
+  # --overlap-param-gather: true
+  # Use unfused attention since MLA with fused attention and deterministic mode leads to NaN
+  --attention-backend: unfused # TODO: switch back to fused attention after fix
+  # Training args
+  --use-mcore-models: true
+  --sequence-parallel: true
+  --disable-bias-linear: true
+  --micro-batch-size: 4
+  --global-batch-size: 32
+  --train-iters: 50
+  --exit-duration-in-mins: 230
+  --no-check-for-nan-in-loss-and-grad: true
+  --no-rope-fusion: true
+  --cross-entropy-loss-fusion: true
+  --cross-entropy-fusion-impl: native
+  --manual-gc: true
+  --manual-gc-interval: 100
+  --recompute-granularity: selective
+  --recompute-modules: "[layernorm mla_up_proj mlp moe_act]"
+  --fine-grained-activation-offloading: true
+  --offload-modules: "[expert_fc1 moe_act attn_norm mlp_norm]"
+  # Transformer Engine args
+  --transformer-impl: transformer_engine
+  # Data args
+  --seq-length: 4096
+  --data-cache-path: ${DATA_CACHE_PATH}
+  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
+  --vocab-file: ${DATA_PATH}/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --split: 949,50,1
+  # Add network size args
+  --num-layers: 15
+  --moe-layer-freq: ([0]*3+[1]*12)
+  --pipeline-model-parallel-layout: Et*3\\|\\(tt\\|\\)*6L # Et*3|(tt|)*6L
+  --hidden-size: 1024
+  --ffn-hidden-size: 4096
+  --num-attention-heads: 32
+  --kv-channels: 128
+  --max-position-embeddings: 4096
+  --position-embedding-type: rope
+  --rotary-base: 10000
+  --make-vocab-size-divisible-by: 3232
+  --normalization: RMSNorm
+  --norm-epsilon: 1e-6
+  --swiglu: true
+  --untie-embeddings-and-output-weights: true
+  --multi-latent-attention: true
+  # Add regularization args
+  --attention-dropout: 0.0
+  --hidden-dropout: 0.0
+  --clip-grad: 1.0
+  --weight-decay: 0.1
+  --qk-layernorm: true
+  # Add learning rate args
+  --lr-warmup-fraction: .01
+  --lr: 0.00015
+  --min-lr: 1.0e-5
+  --lr-decay-style: cosine
+  --adam-beta1: 0.9
+  --adam-beta2: 0.95
+  # Add MoE args
+  --num-experts: 32
+  --moe-ffn-hidden-size: 1024
+  --moe-shared-expert-intermediate-size: 1024
+  --moe-router-load-balancing-type: seq_aux_loss
+  --moe-router-topk: 4
+  --moe-token-dispatcher-type: alltoall
+  --moe-router-pre-softmax: true
+  --moe-grouped-gemm: true
+  --moe-aux-loss-coeff: 1e-4
+  --moe-router-group-topk: 2
+  --moe-router-num-groups: 4
+  --moe-router-topk-scaling-factor: 2.0
+  --moe-router-score-function: sigmoid
+  --moe-router-enable-expert-bias: true
+  --moe-router-bias-update-rate: 1e-3
+  --moe-router-dtype: fp32
+  --moe-permute-fusion: true
+  # Add MLA args
+  --q-lora-rank: 1536
+  --kv-lora-rank: 512
+  --qk-head-dim: 128
+  --qk-pos-emb-head-dim: 64
+  --v-head-dim: 128
+  --rotary-scaling-factor: 40
+  --mscale: 1.0
+  --mscale-all-dim: 1.0
+  # Add validation args
+  --eval-iters: 32
+  --eval-interval: 200
+  # Add checkpointing args
+  --save: ${CHECKPOINT_SAVE_PATH}
+  --load: ${CHECKPOINT_LOAD_PATH}
+  --save-interval: 25
+  # Add initialization args
+  --init-method-std: 0.02
+  # Add logging args
+  --log-timers-to-tensorboard: true
+  --log-memory-to-tensorboard: true
+  --log-num-zeros-in-grad: true
+  --log-params-norm: true
+  --log-validation-ppl-to-tensorboard: true
+  --log-throughput: true
+  --log-interval: 1
+  --logging-level: 40
+  --tensorboard-dir: ${TENSORBOARD_PATH}
+  # Add mixed precision args
+  --bf16: true
+  --exit-interval: 50
+TEST_TYPE: regular # Usually ckpt-resume, but as a WAR to #513 set to regular
+METRICS:
+  - "iteration-time"
+  - "lm loss"
+  - "num-zeros"
+  - "mem-allocated-bytes"
+  - "mem-max-allocated-bytes"
diff --git a/tests/test_utils/recipes/moe.yaml b/tests/test_utils/recipes/moe.yaml
index 8164ca37df8..63320ae3c3d 100644
--- a/tests/test_utils/recipes/moe.yaml
+++ b/tests/test_utils/recipes/moe.yaml
@@ -124,6 +124,16 @@ products:
       - environment: [dev]
         scope: [mr]
         platforms: [dgx_h100]
+  - test_case: [gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading]
+    products:
+      - environment: [dev]
+        scope: [mr]
+        platforms: [dgx_h100]
+  - test_case: [gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading]
+    products:
+      - environment: [dev]
+        scope: [mr]
+        platforms: [dgx_h100]
   #######################################################################
   # Super important MR tests that run for both DEV and LTS per MR       #
   #######################################################################
diff --git a/tests/unit_tests/pipeline_parallel/test_fine_grained_activation_offloading.py b/tests/unit_tests/pipeline_parallel/test_fine_grained_activation_offloading.py
new file mode 100644
index 00000000000..edec95288c2
--- /dev/null
+++ b/tests/unit_tests/pipeline_parallel/test_fine_grained_activation_offloading.py
@@ -0,0 +1,187 @@
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+import gc
+
+import pytest
+import torch
+
+EPSILON = 0.1
+
+# Skip all tests if CUDA is not available
+cuda_available = torch.cuda.is_available()
+
+
+def _reset_cuda_memory():
+    gc.collect()
+    if cuda_available:
+        torch.cuda.empty_cache()
+
+
+class ToyModel(torch.nn.Module):
+    def __init__(self, hidden_size: int = 2048, num_layers: int = 4, dtype=torch.bfloat16):
+        super().__init__()
+        layers = []
+        for _ in range(num_layers):
+            layers.append(
+                torch.nn.Linear(hidden_size, hidden_size, bias=True, dtype=dtype, device="cuda")
+            )
+        self.net = torch.nn.Sequential(*layers).to(device="cuda", dtype=dtype)
+        self.hidden_size = hidden_size
+        self.num_layers = num_layers
+        self.dtype = dtype
+
+        # Prevent weights/bias from being considered activation tensors for offload;
+        # ensure we only count activation tensors (inputs x) in memory accounting.
+        for p in self.parameters():
+            try:
+                setattr(p, "offloading_activation", False)
+            except Exception:
+                pass
+
+    def forward(self, x, use_offload: bool = False):
+        from megatron.core.pipeline_parallel import fine_grained_activation_offload as off
+
+        if use_offload:
+            # Initialize a new chunk (microbatch) and enable offload context.
+            with off.get_fine_grained_offloading_context(True):
+                off.fine_grained_offloading_init_chunk_handler(
+                    vp_stage=None, min_offloaded_tensor_size=1
+                )
+                for i, layer in enumerate(self.net):
+                    # Group by module; with this linear-only model, each group corresponds to a layer.
+                    off.fine_grained_offloading_set_last_layer(i == len(self.net) - 1)
+                    x = off.fine_grained_offloading_group_start(x, name=f"layer_{i}")
+                    x = layer(x)
+                    # Commit the group; returns a tuple of tensors
+                    (x,) = off.fine_grained_offloading_group_commit(
+                        x, name=f"layer_{i}", forced_released_tensors=[]
+                    )
+                return x
+        # Baseline path (no offload hooks)
+        with (
+            torch.autocast(device_type="cuda", dtype=self.dtype)
+            if self.dtype in (torch.float16, torch.bfloat16)
+            else torch.cuda.amp.autocast(enabled=False)
+        ):
+            for layer in self.net:
+                x = layer(x)
+            return x
+
+
+@pytest.fixture(autouse=True)
+def _monkeypatch_offload_deps(monkeypatch):
+    # Avoid requiring torch.distributed initialization and NVML in tests
+    import megatron.core.pipeline_parallel.fine_grained_activation_offload as off
+
+    monkeypatch.setattr(off, "debug_rank", lambda *args, **kwargs: None, raising=False)
+    monkeypatch.setattr(off, "set_ideal_affinity_for_current_gpu", lambda: None, raising=False)
+    # Ensure a clean state each test
+    off.fine_grained_offloading_reset()
+    yield
+    off.fine_grained_offloading_reset()
+
+
+def test_fine_grained_activation_offload_memory_reduction():
+    torch.manual_seed(1234)
+    # Use a linear-only stack so theoretical saved memory equals sum of per-layer input x bytes.
+    model = ToyModel(hidden_size=2048, num_layers=8, dtype=torch.bfloat16).eval()
+
+    # Create input
+    inp = torch.randn(
+        (2048, model.hidden_size), device="cuda", dtype=torch.bfloat16, requires_grad=True
+    )
+
+    # Warmup to stabilize allocator behavior
+    _reset_cuda_memory()
+    out = model(inp, use_offload=False)
+    (out.sum()).backward()
+    torch.cuda.synchronize()
+    _reset_cuda_memory()
+
+    # Baseline memory measurement (no offload)
+    _reset_cuda_memory()
+    inp_baseline = inp.detach().clone().requires_grad_(True)
+    baseline_mem_before = torch.cuda.memory_allocated() / (1024**2)
+    out_base = model(inp_baseline, use_offload=False)
+    baseline_mem_after = (torch.cuda.memory_allocated() - out_base.nbytes) / (1024**2)
+    (out_base.sum()).backward()
+    torch.cuda.synchronize()
+    baseline_delta = baseline_mem_after - baseline_mem_before
+
+    # Offload memory measurement
+    from megatron.core.pipeline_parallel import fine_grained_activation_offload as off
+
+    off.fine_grained_offloading_reset()
+    _reset_cuda_memory()
+    inp_off = inp.detach().clone().requires_grad_(True)
+    offload_mem_before = torch.cuda.memory_allocated() / (1024**2)
+    out_off = model(inp_off, use_offload=True)
+    offload_mem_after = (torch.cuda.memory_allocated() - out_off.nbytes) / (1024**2)
+    (out_off.sum()).backward()
+    torch.cuda.synchronize()
+    offload_delta = offload_mem_after - offload_mem_before
+
+    # Offload should reduce peak cached memory usage after forward
+    assert (
+        offload_delta < baseline_delta
+    ), f"offload did not reduce memory: off={offload_delta:.2f}MiB base={baseline_delta:.2f}MiB"
+
+    # Theoretical savings: storing per-layer input x (same shape each layer).
+    bytes_per_elem = inp.element_size()  # 2 for bfloat16
+    input_bytes = inp.numel() * bytes_per_elem
+    # -2 because the first and last activations are not offloaded
+    expected_saved_mib = (model.num_layers - 2) * (input_bytes / (1024**2))
+
+    # Actual savings ≈ baseline_delta - offload_delta (both exclude output tensor memory).
+    actual_saved_mib = baseline_delta - offload_delta
+
+    # Allow slack for allocator jitter and extra intermediates; magnitudes should match.
+    rel_err = abs(actual_saved_mib - expected_saved_mib) / max(expected_saved_mib, 1e-6)
+    assert (
+        rel_err <= EPSILON
+    ), f"saved mismatch: actual={actual_saved_mib:.2f}MiB expected~={expected_saved_mib:.2f}MiB (rel_err={rel_err:.2f})"
+
+
+def test_fine_grained_activation_offload_output_and_grad_consistency():
+    torch.manual_seed(2025)
+    hidden = 1024
+    layers = 3
+
+    # Create identical models by resetting seed
+    torch.manual_seed(2025)
+    model_base = ToyModel(hidden_size=hidden, num_layers=layers, dtype=torch.bfloat16).train()
+    torch.manual_seed(2025)
+    model_off = ToyModel(hidden_size=hidden, num_layers=layers, dtype=torch.bfloat16).train()
+
+    # Same input and target
+    inp = torch.randn((32, hidden), device="cuda", dtype=torch.bfloat16, requires_grad=True)
+    target = torch.randn_like(inp)
+
+    # Baseline forward/backward
+    out_base = model_base(inp, use_offload=False)
+    loss_base = torch.nn.functional.mse_loss(out_base, target)
+    loss_base.backward()
+    grads_base = [
+        p.grad.detach().clone() if p.grad is not None else None for p in model_base.parameters()
+    ]
+
+    # Offload forward/backward
+    from megatron.core.pipeline_parallel import fine_grained_activation_offload as off
+
+    off.fine_grained_offloading_reset()
+    out_off = model_off(inp.detach().clone().requires_grad_(True), use_offload=True)
+    loss_off = torch.nn.functional.mse_loss(out_off, target)
+    loss_off.backward()
+    grads_off = [
+        p.grad.detach().clone() if p.grad is not None else None for p in model_off.parameters()
+    ]
+
+    # Compare outputs
+    assert torch.allclose(out_off.float(), out_base.float(), rtol=1e-3, atol=1e-3)
+
+    # Compare gradients parameter-wise
+    for gb, go in zip(grads_base, grads_off):
+        if gb is None and go is None:
+            continue
+        assert gb is not None and go is not None
+        assert torch.allclose(go.float(), gb.float(), rtol=1e-3, atol=1e-3)

From 65c8f40b4e8df619b5c829c699b353fe7ee6894d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Mon, 27 Oct 2025 12:36:12 +0000
Subject: [PATCH 075/334] tests: Fix paths for test_cases
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 .../model_config.yaml                                       | 6 +++---
 .../model_config.yaml                                       | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/model_config.yaml
index 9a125a1cf74..d9ec0456190 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/model_config.yaml
@@ -42,9 +42,9 @@ MODEL_ARGS:
   # Data args
   --seq-length: 4096
   --data-cache-path: ${DATA_CACHE_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   # Add network size args
   --num-layers: 15
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/model_config.yaml
index 8832d687004..f4b64722712 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/model_config.yaml
@@ -42,9 +42,9 @@ MODEL_ARGS:
   # Data args
   --seq-length: 4096
   --data-cache-path: ${DATA_CACHE_PATH}
-  --data-path: ${DATA_PATH}/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/bpe/merges.txt
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
   --split: 949,50,1
   # Add network size args
   --num-layers: 15

From 2155c47d19fa2af5e10160194d6b7a79695f091f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Mon, 27 Oct 2025 13:38:29 +0000
Subject: [PATCH 076/334] Revert "[Dev] feat(moe): Fine-grained activation
 offloading (#1912)"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts commit 9069e1268f495407598d9f6771e363737505dab7.

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 .../fine_grained_activation_offloading.md     |  29 -
 docs/source/api-guide/index.rst               |   1 -
 .../offloading_and_recomputing.png            | Bin 332427 -> 0 bytes
 .../core/extensions/transformer_engine.py     |  12 +-
 .../common/model_chunk_schedule_plan.py       |   9 +-
 .../core/models/gpt/fine_grained_callables.py |  23 +-
 megatron/core/models/gpt/gpt_model.py         |  27 +-
 .../fine_grained_activation_offload.py        | 603 ------------------
 megatron/core/pipeline_parallel/schedules.py  |  14 +-
 megatron/core/tensor_parallel/random.py       |   9 +-
 megatron/core/transformer/attention.py        |  70 +-
 megatron/core/transformer/moe/README.md       |  14 -
 megatron/core/transformer/moe/experts.py      |  65 +-
 .../transformer/multi_latent_attention.py     |  40 +-
 .../transformer/multi_token_prediction.py     |   7 +-
 .../core/transformer/transformer_block.py     |  10 +-
 .../core/transformer/transformer_config.py    |  43 +-
 .../core/transformer/transformer_layer.py     |  56 +-
 megatron/training/arguments.py                |  11 +-
 .../golden_values_dev_coreweave.json          | 110 ----
 .../golden_values_dev_eos.json                | 110 ----
 .../model_config.yaml                         | 139 ----
 .../golden_values_dev_coreweave.json          |  92 ---
 .../golden_values_dev_eos.json                |  92 ---
 .../model_config.yaml                         | 134 ----
 tests/test_utils/recipes/moe.yaml             |  10 -
 ...test_fine_grained_activation_offloading.py | 187 ------
 27 files changed, 61 insertions(+), 1856 deletions(-)
 delete mode 100644 docs/source/api-guide/fine_grained_activation_offloading.md
 delete mode 100644 docs/source/images/fine_grained_activation_offloading/offloading_and_recomputing.png
 delete mode 100644 megatron/core/pipeline_parallel/fine_grained_activation_offload.py
 delete mode 100644 tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_eos.json
 delete mode 100644 tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/model_config.yaml
 delete mode 100644 tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_eos.json
 delete mode 100644 tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/model_config.yaml
 delete mode 100644 tests/unit_tests/pipeline_parallel/test_fine_grained_activation_offloading.py

diff --git a/docs/source/api-guide/fine_grained_activation_offloading.md b/docs/source/api-guide/fine_grained_activation_offloading.md
deleted file mode 100644
index b4c2ea753fa..00000000000
--- a/docs/source/api-guide/fine_grained_activation_offloading.md
+++ /dev/null
@@ -1,29 +0,0 @@
-# Fine-grained Activation Offloading (collaborated with rednote)
-
-Memory capacity is more and more important with the rising of extreme sparse MoE models like DeepSeek-V3 and Qwen3-235B. Fine-grained recomputing reduces the memory footprint at the cost of extra recomputation, while offloading could utilize the host-device bandwidth to achieve nearly zero-overhead. Fine-grained Activation Offloading targets at offloading the activation at the granularity of specific modules, so that we can calibrate the amount of offloading activation to maximize the training throughput.
-
-**Features**
-* Support PP=1/PP/Interleaved PP
-* Compatible with fine-grained recomputation
-* Support FP8
-* Support MTP
-* Support mixed dense & moe layer
-* Support A2A Overlap
-* Support CUDA Graph
-  * (Temporary) cuda graph scope cannot contains the offloading modules
-
-**Usage**
-```bash
-# Enable fine-grained activation offloading
---fine-grained-activation-offloading
-
-# Specify which modules are going to offload its input
-# Choices: "attn_norm", "core_attn", "attn_proj", "mlp_norm", "expert_fc1", "moe_act".
---offload-modules expert_fc1
-```
-**Compatible with Fine-grained Recomputation**
-- For modules with minor perf overhead like layernorm or moe_act, use recomputing to reduce memory footprint;
-- For other modules, use offloading to reduce memory footprint;
-- Make sure the offloading/reloading could be overlapped with computing;
-
-![Fine-grained Activation Offloading and Fine-grained Recomputation](../images/fine_grained_activation_offloading/offloading_and_recomputing.png)
diff --git a/docs/source/api-guide/index.rst b/docs/source/api-guide/index.rst
index ac6d7cb0b2d..710a7caf4de 100644
--- a/docs/source/api-guide/index.rst
+++ b/docs/source/api-guide/index.rst
@@ -22,4 +22,3 @@ API Guide
    optimizer_cpu_offload
    multi_token_prediction
    tokenizers
-   fine_grained_activation_offloading
diff --git a/docs/source/images/fine_grained_activation_offloading/offloading_and_recomputing.png b/docs/source/images/fine_grained_activation_offloading/offloading_and_recomputing.png
deleted file mode 100644
index 6c8afa78bb180a0815aff02693690b864e9b01f8..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 332427
zcmeFZXH-*bw+4z>P!Lfeq7<Vby(!XLRHRqwQkCA3UX#dHK&gWCj?$~4_aF$N7b!t{
zj}RcVBtRg!aesU7&vVWl<Nm!r_F#~;Ad8jt&iTw|mUm9xYilaeQZrFgP*BjSC_mMq
zptxK}L2+UI;sx?=euPZFCLhjt>L@*-C?8~5C;#x)#z@6hLxX~se14JQ{F&<%XMbHn
z{-QX;L~;Jla|#O8GtB>->zv{K>l#W5iZBO?bAMf9Og{cPRsHqy&+%;bng8A~oAU3g
zFBfK?{rmjFujimh(_@$-A1=L8e(p&@A>8)s`wX|W@iYa+BMOzLkM;b{Y|c{E^DAT!
zcOzcEet(CTdP)Ck)tyGC_Sct|nI<Tx!+VPhos3r>?BX{XH#RoPUn2RrZz8KNTrPak
z{?_uwgRA#0UI#ore?V%T8BLe-Y$_=ZEJk?^1fl&QcD9YhB_$x%g2uqXRMC6qu0EnT
z^Y6>QGWZt<|C+(SRPZko{>y~_GU2~W_%9Rw%Y^?j;lE7y|6?W$b~q=NX=~l3@V{{y
zTuPZ%SnI?ssCy#nG*DHsshXO*>~>Z?pu3UEUQnOyH3j9xJO7_A`$AtvPKK6d26K`!
z+`4Z$?a?a3(u!t?!%{z)E0r(axeuq@JiF%mV=c967$&$X@v($e9!WoaLk0}g4XbbP
z{pruwLY%BzG|fBUU~vWYfi~s0y(p*c4_X*}wN34K%F7H9qJVLHZo~nRveQ32i?6jn
zLesr;Nxd`q`=Db#J%hNusTQooQaS{BbJ;h1l%YVg8KRRl^z7Sr5pM(}`3=}#9ZKBy
z%=GlwOeSGNB%iiXO5&O2BSNR74ytuGw2oYdT16G&gfI42P!L`pnbkI|@q3l3Ne-?!
zDyhoCu#$PvW5Neq1**@k{8ICsf=8Q1N*2zVE*nq&he$76l`!&dwDK-pWd0EJP=_0=
zmXd=bgmPY|{O!)06#8R6={x6Zsn$}5d6E0GZ9xY6q*#5|<xiTKr(>T4kFfg#dls9k
zpl>snd{v9w!^R~Yg_;=5{>E}P+COxuF??JdKk^M!Q0NL-7X~#cF=c*swK#Bg8od>}
z{;}mIrR0#Lqz`|WREH_s2T%BeYEE#sJdG}h^5SKIwxjXk_En_CclV>4?sZ@gtL0&q
zNXtF|u79GstKUfgR2ax;x`WSO`V-E<>6GbBt7Ex>zfnB(jh#{Sy>p>gCk+z1ZfoDN
zY#e+43npJ~QW9;N(}up5ZWsH%n@vamIH3xGMrDp^yr2PC6x2J_jd06yQSy@{pMUVY
z&kV`lQ=%rJJ|^*P=e|2Nqyt>y1X7s|p#cTFayiP9nT19Z2QuTh=OMs*?rDwpK*JtE
zFW%loUr|rwKWA{UzfTesp0ae7yd>T>$Y*9DaQAF8chs-v7}j_(64RIV-bpQl<cW@A
z2;{~Y91?Ij;kpcPy=Dz}-#^!8^z=+Qd#UM$U+KXD{qG<@U|2YlkohVy^l#`}U-f%p
z$(mL;1L8=`n+yo1La*9|Cvm%efn0?$D!*QZKcWfzRuvFfv(*=_#p^2hLi)6Y_?Vic
zR35|UT9&MEOsDY;-nhXGInJdQ{jMOue&xAu-@RLrf8YNv`)X^yw3#dE1K)oZPRDat
z!TaRS-PdjH7hkcyT>aE#l}DBJJHx)9K66_;g)-*ZwZEc|V4ia)G(H5Mn<R>a3>+g~
zTD^SolekVFsBA}LN+wxn({gk6RU*_)O0l2&OUuUbm!-7u=OijFi>ERMyVji_dPome
zE%MXbVobV#mD`Cb!Z-gHodt(5`n^4O@&+WN{fh2ud&L!sJNJ>4rb<Q)e!E8NU%7q*
z$kpyUk5D=no)-TT83kNC<Xm!*imZFnd(60}GC-N;v?JB2#eg^ba6^qOL8HFFe$S>f
zCcHo=sN9lWe7t(Tk^zz>ee|*iX;WCeekS2AdT`kKzc>6#7B9-rmlrn#hP0C&B)lcd
zvp<ajdCfrZnT$ce&VK;>*D8dmo#SAF#s4){Wk$az0V8~>w+nrVLw9l$D1C<!c-|8}
zt=vxf7e_*K!ay_&=n*a9^^zmILpCX*SAc=eFg*pPL1$9zJH?wu#(m|s!*t)_|E5R&
z@Yf`#Vp3YKG^0&uiq^TS$$@_Y?c4tWn)MAb(E79W!u|%ry^E$6HrpB6u9fc)KU8*0
zdLqx0oRfw-c%UY)vi!5aiKJ$sMd3Bfy5ky@=LGnuF&}RKwt`Pn`|$B~1Q3AK^_w>c
ziD5NE&&w~fl=%JpUSgiFS^khCBlihV{}5Y84XUr2)iG78m^V<*zl+XHzmfGfeEv~(
zw%Ni!?{VHz-<Cr6V=}dcUt5rB?^OsvsM5*Z{ml_PvLyqs=dx7g#$0XhmZV)m!O+To
zZg#Vzjp#U;5Fp~p9<^-o4OEDzkNo4;67NU0(dci%y-SbryrjpS*TRLk1@*ptx8>cr
z*y+ho_a0Ebipj=X=x>%C!l!mx-Q2S7FRAIRjfaCc;>#Gx;y(8Bi}_1deUt}xg-mw-
zBx8!;afZW8PU25>+DTKasTGfFp&_3Aq-7~Fe5|^Nnpm-PAv{7?=@ZNER{oDulgSo_
zqua7Xomzvv``53FZVL&}dMOpSO|g;9?}YjbUGlt5UwX*-lMg-1XBFg%nhD%N>>j#%
z^;CR8rChXpZ4gJJc^GnV4@kJqq(s;*vORGM%QwxfbkjRrjPjI@M17jW09zRXH^%9T
zWgaaqtMVUp#mfWgbkd8HIky;Y9VxHE`E~LI0=pZh^Lt*M^PqmU*<GY!Miv;&nd1+3
zIsGD6AH85YGgy&7Vm0&qoaVDPP`Ov1Pha<fcrriLd$d=d0JZV@&9XWC3jZ;Sg_$Vt
z|KXJc85hm~4Bn12Oj?O@4OU-!^kV${Bvt7#TlSffdkbmdZa0&01qmop`BGYR+V@(f
zpBBS-jo#ds=O?vj(bBl7=R_-uCnfW(=;BKl-CUwlsk}b!eE2+XzJ^i}C1QX9sFQm_
zxnaPI*X%S8clbWe&hG2~!mhITFO1!M^4bb<wfL8ph!AXR^&UY|tU*V}Pr!f7)OLcp
zaR}oL%3op)?_g-+wd`5-f5eghRPL9uUZ0onwUYgZMaZiQ?mMsJmK#j-4_@!Yw9&JT
z2k9RHBjP3R$7zZE`n$F-l*}c^c)s$C`?p9)Ws=nTGC=aSW|7vO-SI2C&RA@4FrG+f
zMobuMb<mBP!~t82rQo_P>xWX-v#e=P+<)h?U!T5ayr7iY6v(5gmfUB;pP<35dhLH-
z@~7m#L)QU(|GX2te!f-@Ku4qJS-TfhwadkjbiQ|1#lP;#%EypD?V<EFNv(VKI_Dy@
z>~`^v(8ntYkcx0j#ST<SMWy0d4s#xZGdDefjtga<4ib$_61MMd)Xmqr_0I?VC4?I)
zZQzj@gSW>yd@)@5ZhrqK$oqBO9nLnW_WMh*e;d%n75bg?Z~@A3u!28S_e67d^^v5V
z#y-tqY;lj`FYsKvxLMeYI=@#^x{vLmg9Vi^-CscG6%!~rY3TFaMH3{Sjue1id{jQ=
zfj3VXbbj!z)A+dff5@=%HQ}>GD#fe5x55m+=;SZn{_oj>N3fiD?xTM=@XoN-!ll>|
zIfFh>Gp==gtCi_zTBORK_pJi>MRPI+Cb!cu%PH=b{19^);1g}ZbEuD!##e7$T5Y^>
zB#*o%6;JkGR|c}V|F^Y!a_nTKrok20?92AV!{hHV|GzxVFMRnu3IUw?`@Q6-wla~F
z>GPae##L}-BI#NSydLMjw>2)>MpyLV&(+v+eW~H}e$>8lg8r;@FP`~Rca{FS|0Mui
z*+xu_Tj~EICD|gFM%0PxsqGL65Y(i6tMdPl@GtSyo8G~dRNTLNA43UIb~5a|626tR
zBo%<A{R%e=e|T_A+j*WG1rlvO%`!LvT$gK-EOlFmn(m3@n7An1jZg3yzfNPNE$%`%
z^SGs2OWlBj+OAd0;IUz;Xisi+ld7c8v-(-M0ZYZ<kM9IEY$~6bE@0Mp5y@LS5~zhl
zM+qrP5VIwoFZ!J5ET*AQ0x-IpKJQ)2H(E5db0_`RPrCxJV(Z{>HwFYRq{8U!!MAR8
z$g?A?Rg{@rj-TF(<^pbvK=8?PTmK*CS}=pfQ&<PWki;0g4?yt*l^&Nb6@*V#>=j*f
z_(sX17HT3PS8x;<g3p>342c<?vn}?Nb5k3a6G2J-Q9KZ*D%I+fu~^{vwnR};O7Tc2
zBL0@}G#atbfO)6>4UJ?6gemmK&T3_kxiow`l}4rWAxCs@U*{B6iC5q{vhxMk>%Jb(
zBvle&^Myj9a|P7#m5@NlPo*nD<KJT*H#y;0nz%zZl>|?d8O&)KS1V6__cx4Fmpu$&
zFMFGYaQU&tZn;4g4+k<-WnB}7f##0$Pr<DFF`YB&u+Q!DcPlD(AU}{PGoXO*rQEz>
zHH;U1)VlY~=&Snqex&Nq^YG<^4fJ!XpAjulQ!s;Pv(pQ;nPoeoJG?=((N6cD+<dvj
z;#40ZOh=Hx%=_pc9y``V5$3WZ0B|#EU<5p;&_Z`YiJU$B<FA&lPg<J}<cQL6?!=_d
zhn-tHy4AZsPztM#p`L-~4X25mn9%w|hOI0@5ZHlg*h(4>Ub++24%9o$uXvtY;=qR2
zm(g|yg#cAmRN#g=Lws^&uhNF}`>pomoZZNNnD}X1XcM+F*n#Jh%a#Ct+G#mBa;NVo
zjv@X*I@pk3!S?jdf6nXY6PcmQ&|4P_*w^XI9Df~Z=qr4Z)GnSA_`^qIySXJ$z%>ug
z%tH>_)0bQpE_N=4R{=&jacA7WmJ}&F5GX(*C~!%g8t5Xx1UZv~yViL({4)box8KU6
z;HMSaL));Pm9$WxsN@&i4??Rn@65FZrXIp<AYW##Uv~R`N?fxLp>fG9Jch5)bPV>n
zE+SyB@rEiDuV+OfCBoqlZM=89Sy29|k!S|fSIfA45)>koO&hzNuS$1OFQ|CW{A#yb
zA5$O9db1BcAS4zf4VbiW*Xqn*FptrR8`5gcK8#$Vrv|mf@%dZGRiGxpQDVq<L&`Ug
zM0u;G5DzgHz><2W+!ig}))>)`>$Xfg?LEQA&Tq7*6jC&>;|Tu{xy$<a$HmjA!IFfo
zdI1D~S0yy2##0ztib@9$vT~vZvQXZ@=J#tZZXs~K$aU%F)G9}Z$mxaVa;Vj;Tj^A<
z;qY6}Efd+k{YFUOoa&~$N0VJbSHR_fN{X)UF*03NamW}`0!1x}woVK`1U82;A!8*3
z-Rflw0=q9=qE&8DOwrbdA<D*o(rN9MCf%}{e`*grF%~5fu(yT-Eeso*+7UfrygKF5
zLGr6}g@<3-kyzdDiM-Vn?W4OjqegpY`ILCg%MNWzU)=*)@3}^y<>mvXo&jf&b#O~A
z1^!co^QMRr28$@b_6L$_0v3Oi?-hJOu7&3_>PwSTGuMT#)8sw5sW?y{`l5hFz2(l$
zL744&EP7^(x~)6yJI+6E9?tNb#hU%Mh_X%QB1>t?j)s=YZULq3o=Gq+u(ehzJq$>-
z;lJ{fReZ$GSG%Sm@3L>f={^9KR;8Eg>w#*Pel@6*Tg{`_mGM|e+c_sWX~m)plDK58
zU_U4C-^w%BH^RtOvch~Y)pg{e&%HQqGrjSnN{k2zfIiROv}Q$>Z1MrjK{|2L+$0nC
zSj)<C2f3!R6XNFPN)I$Ffy3p7OL7YvsjTz%IA(=;EP9#=pC8W4nk}Bw7)0;Lv%ctP
zZ*MNBPpqYVxD3|YG}_4w-3`6%Wu~#95Ka%sIa2MBHuLt&+Kve1S-0bj*Ku<srv#i+
zcA%Q5jeUP3`(yd?CSZ&@!7TX5rI%^?Pek_I7wH$@hihCjv3r8nO~5OSFEf+sbXJu2
z{i0>d`sRwW_gn){YkG6Hhw@#toP&V6x`U!=f^CHKDwo>@ta8?~*!_3CT-L7?_3e9+
z58V1(s>Etf15Kp4!eUO-tA-jnR%DL%o44K|h^(y+w=T;%_mh!V2DO@ZyWCRFT6*bi
z^sF2mYhZ7U8#ZWg`gqHq7e?2!m8s7}cwm3|wA)IK(_glyhVBJ{4oGL~&FVnTwh{Ij
zS6Gw4RanP7q*HrNtt2M%7L(zIyOSuPL-z+_M*P|o73hWKg_g`y(`zQc={+rEeN1Yh
z=iYJ`(~fWmL3euNJuRuU3mY&Z9-^LUTr%czYN${M<(rtOa5|ylIawI0<{67NJ$2S+
zwnAP+>YryexASMsdAXNFH>Cnacw|5lZ#N9iPMMZ0=56n*wm^$KP=LR^_^%l5#-rB}
z5q&h0W5K$lKTw0|5ezWR)UgwKeWXiLTI|;B8~srshN3NH0pIYA0}b!If)<gpC~h5;
zw83^@amjIBQo;ERY-jseyvNHCxZd@RZ%$5i(rV%{A}hST_<{1)#g?*6mW~$Lt<OKx
zJiN7Gbjl`!^4n*(%Mun$6{4{lmgCD|fSG5v(Lr*`HmWL0g*kV#hR)lr(8jvE_zbii
zHdNsZLo#jcA;&BJS+8gm_!EX>iDw3$b}xj%QZ`dC*fx0NjZ~Z&J^D5pmiu-!>q(Fe
zARy|~P{SkpBdH<6GW3~x;8*v#=nA$JsTKv%Zzox@SiL=io$<F~@a&gy@^Vjwt!Kqu
zl9Psg#I{2Auh*`s#KE@bxu$;{G`#iHY+ZJ)Ur{~00)JT&#;nsWCZUC*;Mf!oHJP43
zssr~_7yXUlvUUgT6{f8pvs5K6l-YtjE+Os-=?+$KA~&&FYBNMh!HVaekvA*~Ef%c?
zA_6((txl1y__6|LmG|f}JAY`WBQC6oHb?Kt{L`N-r2&s@tOi#406CEKVVv#|p=85f
zktP+$D{1^pXdZv+P>jL=z&hP4MQVWULSzNcajlz2vz2}l3YyAbEH{d)(x~P0iF!4=
za<OKpsySLaZFAKP;TJ8c7GsKX{a8MZ`i}RHDYboCF*hNkqwZP)24xjChSoz>#^ZjE
zLSMw5tFNzbu9jz&ApU{6QWnE~42h+QLNQbL8m_1kYZ8YKJXTtCMw4-&ou>QKHMc=8
z=$m$HV@7~_7*;@4z@t^4RD=02r7Ot#94|f_qTUE1XGmgu1^lw`ad5LKtKR0>MghzW
zfx;~bYH;uz`eeD;z`IK-i;lX~!m4-banvR6sp*$lkKVKlU3@M%B>tH$Lu~xqiIWzr
zR{bM1X9KQet*H7_8Lx5wL70QkPq7CL52Z!SKon_lT|xXjFOLwBZl5UUb>AlHM>oef
zG8smRa()15A&_m(KE2@?R!5JQoV<$N58LJBzD*Q-W=>t)51I4>?}wEJCejtgR|+5*
zG0Aug)<>*GXf^#KMuIQNoM(Q~ur{DxE6@4OzQ+~qUWiq_s4IBj;BEy&W)gPVQvtNs
zYa@MB6mO-yc_v}np)2&KlHh9c%4+Y?_{x;o8g9tl|I~v8eU_IPGt9_qIlo^~wYZTi
zllyIX!sxg|zB}(g<L%(rlh?h8;qk!d@5?@aJVJBs;D%8T0UuZMkAhk^niju#>Bxnu
z&kRA?6}D>w<e+7fDr9OX_s$rp`<}RKN#=l>NYvL~ig)WW8v2iSDcd$~$qECTJK%-%
zam~LMuiGduy>fT=e;FqA>g2rJ&oqaj5fRrfGwJ4-nVuupP7&4@W?kTl{8fu2Zl0sX
zSnUGCq$4+8H1s9V_(H2ua_SSt8<;ZHXkC$-yEe%PH?=z5P++Y}5tna@Qne`WeHlY4
z0O9p^pA1rN;g)eLM41PFtT&GDQ=ZL}$=>_a9dt<9bt7E+I98Z1DIZB64_fI6BkGq`
zC`=8HmCJPqPIzsX*sF1P<yxpjk)qf{NTfNjH@cTeiq@$@nuURzh`8&C(050gz0LP4
z)1RO<kTRf)Rp4I$^OtD7U|ZoiQOm@>2Gv6uhXKDbrdw^{&Mp{c+4u6%cSVd(CxXa^
zpOH)ly$lYM_?#2en;loX>Ims-g;q@cI57S$$u;kunDjzrelf{JcBUz=_N%`H6}JQY
z!HjWTtL^azL+YW9<CY>@p0ThWK367u{mSKX6VxmxwK!Pc7<>jc<dVY?hQW%&x+I-C
zXmnp+->Xu(!up~6KIsxR{$f%seBLdrx==r6KNT>2j^}-nUB(cvCHh5HkOjPP>-s_z
zHembF=uz~>R=}*R^^)h|GL>Tq>nza6Ng+$*Dagb`S~c*IO+U&g&%$t5N?3@{tg~(5
zI4yW9`tQQoui%*GPFrW^Y6#0lTgv!AHO9hK6Xo0b9(z$CE4s#^>nJy0a%G@D<1Qt;
z3=|N==>mq`7Yz~Q*BtHHNXuwx72zPYkEew9_zZ3AJEY=7Q<-ku6pM(6*gQLheD8oi
z!^2n>YEDx1d(=tzG5JQ!jn<h<e**qfqkn%+f3d$}4*u<S?Yo*XCn834fVs9`iHCJv
z$dDkv&pK65*B|iei_n*ld=!lml5XOQt;QupoK7N?zQ!#b|7IX(Rpe==+nss?-8XC<
zsvtykzd33%%rSCu&tD&9Z1=D|=K2%_{yzWm(bb+0-ht9->J38aNwosgpWmJmRy<1N
zl6(exeABUN<<*#-@nf+uCx_`5>nMu6E%8ueOmV=veEGczcw#GLG^W|ZUkuE<b@RYv
z;OR6z^6qi0nlz{XEl)@dL_^4%fU0kX9O*+5hp&5U#_OvXTnL{W52s5~e(o4=1fapI
z$;TaO-D>Exiy3c@;1EOfkE<0H+NUNw(Lg^bNb)B&>XclmLz#9uM37c&B`XpwtF=61
zFe19TZAYUqpN|#OYPDdz>7R(aHWLZUvKsi#j%+Xi?+%X2RlE&k+`u&}{kJNRUDw9h
zg0OyiuJ>xaXKo6yW+97j=F$Ou=MQQUZ?xOq303Df1JC+1-h-@gw*rPt?&T}TJ<RQ#
z;ikL0Pg)4^OAo1iB^s*Mhr+iQ#I<D}?Y>gjyr2hozW=Dx6Hu`mc8!blM_(WPv~N)_
z_}T8`PC5Yb{7sT&6jbu0oLZ>B3{aASOSN1Ib224OY3Uvt(jJ7ojrWS9K1#X)E&DRE
z9_|U4<_(fRoXSH6u4Bn8<|PXVC|=U%39&N@1wWF;i4b0!6i#_>=~r~`LM8~MbUct^
z30-&n5WFrV+Y;Dm+}WJp<k004-k)BFFxtamwwyjb`g%6O*ZL0JZ31fljrF<`4*KS(
zdWpnfKG*uFvS`Ux%^liuYy?mva*#S0=6tob@uqlR2JDKJf;|aRyVr7CEN?mWg6&P8
z!VSM?*|*aIP9=D?^VKFGv1IZ;Zg=EPc*W0SbEG9jIQfpw7ShYhrP1p~1!ZZIQ+yW(
zs}S_A+~G@VjJ&w+w>W$qP<uwn{dwstOUNYz^4oJ2L7$|zar2toRZI$U<?p)&*Imnn
zv8TECs5v^V`~lHBxUBuut`g)!>)AP<Q1v|5K90@#w8S*LFFIA2DOBLQBx(}>bsxvt
z`qCRMQ#j`G#X%9#O3umcjU6wm0d5kGw~qW^OOW>uy>fbwq(g>wPgsxnJF@@>>@<(p
zbwc1=P;Hp<Y4^UD`u9(LiIc=>o^^TGx|FbVCwtDIbmgO$AK3%?Ulk$BVAwFN?@3eQ
z4<v!(sEiVEfeO&*jvF^5PeKd!amgTjmVpPUfGP55z0w%+N#IoF7RcZ*y{$q{%c#Wf
z^%7@z{J~{FfRgH0p1wn>4|C-bPu<&}nl)|gL&CXuf}%L$PlltX(P4U=6<^<W@o~6K
zAg^^nqD7rz($PK5Tl=aKnEr;7!v7X1$)KiYkT7x~n%tR*6~-M1h~5{Wo8}sPEgk*@
zE3$gjm1oBjH0o^Pf!G`c*zu)qB(wM^n<xY?`s!v%H*lg;Pc|dMDs*$buDj)W#E>g>
z51tRL*v)_Q4D3uEO&YK7@ev0Tn|m8{g%fA49eFC~l7r&b2O^>k&V}Xq&1xcpuy8z2
z(946S@CXKMrT921-^+<s{dr7lxd)xT^NgNK$9+;`g0!%f-q`dAZq8*pj$9drOUl~(
zh;jk$oEP|6i_Mlh`N&%*Z=F|&d%mq#5tFt@%vPSY>V{}pw>E=RHCms^t}LA@Z@?aU
zr{dqKS2@NENt@@?8$ao6Eh2J-JI)NFKhke_#!mW=kQrJ1Mw$-@3(&ur3is92{DQJ`
zQ-b`wFH#0(^G*uj-*t}iJ~CX9%RCPKGQ3x`M=bOZGB~{Z+Nd#~x>?^n<r5R=V8f+a
zjrWM9sV^u$QC9K1eCuFe3!SEmN$CtL{S*;r#G!W7x&!n$G`5u0=nO5Eu8v9{8T*j<
z@)syE^J+4vblgmi@u}(@Rnj4As|RqY=9|lx6>P<~)2^#%vzEchhCMdy-RZm)EWy>0
zlovkSki1VU^~?AJvf@*su%#rpo1Clf&q;cm=hCz!_5FCO5aWF>y3hQLVZPCPJHp@w
z@Zcl-4KUB%goN&hF>Dw^tLlHo<co9MuIrlnvw)Q4`DMtFwNb0)iD<KH{>$y5;))Wp
zdD7!$V4=MUd=>)%DU@PbQny+4#6+<Vc4LP9rlay$^%p$Di_LP7V@~(SMLe-p`q&V2
zx_A8@jl<ov{xFOpK`ip4aN(F|X^xBsHtGi?7UypziBW%!;(7DCr}PxON@%ZFW@K#g
zSzyWcwraY9MriU9bQKEsyZng&DX5C3hDphSL%2~(L5X@+m~!oipAzR7^iyv=y<4OM
zP?UFV*}~gj5z!S0nNQL71y9M#W-sC379$5VkBQh+6p!Y!`iBJqd0xVe{hTg<r=b?l
zkA(EcWT)mTqQ>-x2FD+F%4L@~o<Iu0ZSo@TYga93@d4lk58b#qGo`%NqQVVje_8IL
zwQ)W#T8%03w6L|+5Y|-u7iF`|K{cr{M!v=n>XRqe9O3(#I@Pj|3QD_HPDjE3utqKI
zlz1zr&E`T(NQeVom!Pz^8?;&=^Df)+Yjv21d?L}NVbV)gHu0uyY2UnyXFxSubx?8h
zvfdygT@$Cpp^Wac@lo6ccz;$(&wOLIri0&y_n0riwF<kVD?KhY-Hwc^p4}oDTYe0>
zCbROvSZj18w3tx&C~e5zQMvDnPTi^w9+Q{psTsJfT?Wb9euKZ{>^9!of%psqhgHmk
z`3bl)^LwB2dBY!k3oxvB=u_UT4DR;f@jlH5HnbXu!-cTFe6yx^WMs?nO(_kK$9r;u
zwWR=p^ZxSAn>D^2BB4}z<;AwApiT?`cB+|x!E~7E6rs0nr1rq@VPw`DS!L;`6O%3K
zpvIp|z&@xnw$QbR`7R-@o*qpoJ<0lL<xNO+b=<IAT0d@Xbvky7y6RK~-SH^70zhrp
zsimx(iN#f@73CUm;%3RH&ea)$h3<8GZ?7rTT_ni3eYR?V7S%TF-1r}v_47NpkdchD
zZfP%NJ|Wt>v8y6AXA!K%bQ>d@F-y!XrxdEC^r%6>^knlw57_a^t6uDNv=i@I+B4AS
zcN&NcUn?~t05boR(Xk1+kH9<EL$Pbv*kkl4j+}0ft+p@eUgw>CYE$dXQ&$!mI9p!l
zz1LEAZJrzCBBcsYpO8KJAwFF2tF|}<+d3;Y+J2f~>Cu1$5uUVx^2*QAuQq2QCL)fj
zyNIuQKZ=5|IL%hPoO9TvH^T@%zK6JZSo6Fyp{7E6R>QwyxUEM_3qnlk^-2jl_G-@3
zk_-h^%jxtH-iQWOx?qP0wULa>EGEFZ7`+9WRwAG_z=U%iB%`CGiZfv!_4CY)i;LmN
ztbqwa>jtKmFNOE4${ih*@Q(VF8XAbq8gXbEs82{A9np-WCeq|OXO)w5I+F)sNvJ!R
zrhUEHT)WeK!#zzwCl|S-NV7sMStRSm-Rna;+TH{|6AJ?@zKEQ$H3}p(LWPdfJ*PaW
z+-Nj8w%@3c>)htDq<NjXuh)_AI6U(avN=JtYcK(vZW=FV-4|aEX_`G?H_4Gr2)dlA
z5ArA?qYA4a2j4qMsz3EK#l<;kBJ5wCO0TaFVnkym0t+kN98C@1rpD)yvX)FcSG?r2
z;JJG|pgXyihPl^~CgPCI)5ScT#-tdQr5|*NeH1hN0Ia$ykKVPObJCleR!XWi2oWML
zRiAVcdj*i6T2D;!F03;j+rLSsoMTdnoX!e38dBSRue@9C5oV+HBL->qOn6IUv*}qD
zTA3ruigt04gIq0ixrXzP@NnU9p~++Ttpt8CQjv@}_dS~9;ViA6cR!;092}_zs{*TG
za=rA}yK8<gz;D|E=dFVRo`C5tj>V>~Mx23JoE$xKw0X#nvPgOF-&Ex{b>OalH7$By
z*KNu(P)CTrwL7FhJqCG5Zr8E>W|p6>D0-}Mur?Gvd_ZjAP2cI2yW{MDA_GhZ_r^`t
z2$#bKxpUu}nTc?g7v_DkfA?Ktb37GV=k`zgB4YU0))Yw}*dANLuEy?YZicAGW<G|F
zL;a4A`53DG4=!etP#Be-DMA{MbDAF`KY9;{vI?z>QO{nndT-~_-v_cqdv~B5Aju9>
zD_gry6zsvCb6eNB^p+%jmP)+^*CJlRsAS}RG)(-6-jUW2tad%53{+(zLq#jFuu({N
z)@2`}*sETmUNr4dI%{=WuDvH|7wt(^k@z35WYzQiG%Y1ZVBfG6E2oMmK*{n&nCEJ(
z)rxk9VHMpEFOKs019pVraa_#fBJhh*9D7oL`rdbomHm=G^SdoVnN>VMy6YG&qD~JQ
zJLKXUG<@4ihnQMC(FG98=Qt7?=Msm3x2KZ=QxfKe4rc`UOv*N8v2r#UJ}E-HjdEW4
z8XGmS>nDx8$vde5=2NRVN%=jl#lfv~cUC;#@71|5HM}&khsR9~O=!C;XP*@j8Qb=b
zvZ|qr1dDHYvhviLMb}^kROS8Z=g6g=2PPHFd6@k!FH&3*>;Gu__;U{}<iUXr_+9gO
zbA9sn;N`&?hf&4i1%8EY%=~q(y?SNY%r0qqv?>p-@a_66SKg;viSNC~njQ|Y&W`4z
z3;&p;4K%;q0MwRL@owx0oMH%UTkCe<y8nQfgq1{yEjW7qs`MorDboTyx6Yo>xaO=v
zQ};Yi>N4YTjjU9n{b-{LvrlL*J(t9c$TBsVJZT~NE{*2P>OU?$9rZ~!a0$}4Hb!xS
ze7<u32OHB#R-hUsYvRd57O;OK?RNP!`MwRVOMs*x{@u+e|09xcWssmQF++wv<@-l{
zEqm}Q^9_Z8>oFB6H^mFrQZJQ<x!j+qzmUs66z@5jbDIq|p-s*iou&nK$Vubh)bS9}
z@TM*--o?lHl!XHtR(Eu@=x;QT10eHX0g%V^1CPSRk{@BtN!O7WMA0m4b$9UiN!A`%
zHH%yVUbGSJf~@GosF-Ik9-S1AZDlQc5TYe~23e-(hlEM$16^BFc6p)&W7yia#2p<1
zXLDt4`kSN0=mBG7sh?u6$jO)2(3JwW#tOyDE@6ybg%Wu!sG`;_sj(jh2e{sg*ckuJ
z+tXru-*&WRi%y;C^=l#gi)RwzhG3TS0KAKt@M0uc^ZTts%qDPizoNK^|He0u=z=E)
zAhN~Q4CfSic0w<gd7vVtW8@tO+O`6iTQztMH|Xc!2G<ksQ=m=|(v5OmCAj&yotYB7
zm)&!2S)~-{C4h3jCl?{&dfys9j!EBVJsxmu=w`|I*3&%O#}f*0*uN2bGTv(oyniaU
z&};0659e?5?j?b9)m3p+`acWf|6mtfwxGVuA$?@&f;<36?Y!w$#v2d^V})Ux(02?w
ztqa&#;i4ay_Y5<NM@>q-k&*-_$8>0Ck%0enz*7_3_MKBUn=4T(p8W4AG-6vKx@LKV
ziD?N?^<4F&&AQ?pLpFAqr$=+;9=YPyh4)aBKA~$O28&dBmPLK%e;S#|uMpD36U%S&
zeaSu|N4$C4^I+4J^oK51%%gIC*IGLY1QJ$t@?78ad0vHx=VN0|75$#aPMf_@N7tEX
zfjs<f3AR75pMtFTgE&KwA6y%pNe6d*O{~`H4h%(|>YlMoTisBTb&YBPHIKEv7c3e6
z%DV~l+(^@zKn@k}X<LAur{>=?yqY&2B)FNE9yiJyPE9ns@v!^IZUnoqKWx^2?OAG&
zTJ-S7%PjHQ{lOweCcr;?o8*=ko)7ypd)Bi-iZA#niQd;h*|F?kS)CqzwxdBDt?TwA
zZC(=hK0b$%bw6l13w=4SDOA`l9i{nYnAijVv>y`b5>T<xyPok~Wd1eCrBNMl2qG8f
z7MV>?E$C02*pj%@o3{~CEJ8w)i!odBJsIv+YMW;#NS{Wl39p;%znTktc}2=?HYFHq
zt+@m{M9KC|e{GFboZ47YPO06LpAt?CC4eADaBlUFmW(S`^jD!7wv;vQ&|<s`V~nQ$
z93y}k*_(7IR@-lhR-2Y_D~Tl3*f<{Mg?CUM4K5j7Ka<KvxP584l_y6X$p)igmP72x
zfu}@jm(kVx^i0q+?KPdbNiy5;y}bPdarAZsRjW)U@gjmRChG~B4V8`-uYqm70T$X0
z5PT2Ir5ngDKmLei`WOLWw<&tYssF+mEq>a)%Tad`k?lZ;MI9wGSjc}@`*J!Omfl+1
z@2di0Kw-WX;FAa9Voa<sbV4<5Y~G@KHKAI0H(Ysqjr(?v7krVR13zeVmwPpbMcR)_
zfvV*lQ1MCiR+-k`@NrluKG0$EX2q_Uh_}ca2?Uetvwnh{n=>k-LqNsx&&H8I+jWfW
zgEejhY^NM4Tsy3_6+u9!z98RU6CpWT@z1K9pLq;WwYX%1>nBupc~Z?M7<a+Rf<<y`
z+V^CwE*L58NRKDOZJa+yAI+6W+_%ceDLz5V*o9HwTl4onwxH7DobPV=+>Txrfs2Ve
zuK=MYFQjg;dfX(u#x{;_pS^wcxpJ%DhyAq~k}epk9e?PwO{de;qD7l0FP{?IjOUqW
zq?Tu-!5I=G0ASpix-ma-ooWtDpojU*T`w6Bo&Mp?1JBM~HH^?0S|%57&q>rD!DSC*
zB-9zW$9+-@h^G#lI7?zP-DI4QNc6gfF~Hr9U9x!Aip0+J5QZKyAZhi2#8(?;L>!zE
zNQX5JG1uN!ug&=~-Bvm?o(r(Qtx^)DK0Vx3OP7hq>^P{|*c6eKncz8c;^2%15~9{t
zzYRkutxVwSO3^|Hf6#^1pGui1E1e)(6``*<_*}T$%7M3SrfTC*8#47op4TG7vg^MM
z3L6^-R)FR^HLuhfMvX&}RM0A74l2;1YNx}<g=f_~xYixr0m`M5tcW=@{+dEf!Z+l5
z(b67tkSrW~t&KiYpp2sUpI;dKk7l3WU`qHOCfY{J{gEvwQz_W$;h^3o+&R}vYU`TA
z5GI~fqMRc4WBZ%bUExn|AI!F@Q<z?1l-cDNdoUkmw=`1C@{ghfLjvQmLbc?U#rc&C
zX|wA^_q{%QS&x0K{6SS(kl_QodMren*?ui0DVx;PT_B8<EZNAsnexF+@Gu}(oqHPb
zK6#*)cmJI>M{sisrT^e@?2>q_uvrxtW&`Y0V}MYg0;wyi_Tz%wH@n;VopYNJ=k}&6
zaZY_kOc3RJp!fmUy<GaCec{=MI-6%>Pg;Kvha`c~k(*R{`5IQzt(wxHoZY%BHiE0z
z(+Z<Y`gS~(`cDKP_v5Q|Ufix*g<<9;QKk)7YQaqZA=5g6Fu4vyD{X%NXz~Q8F6mUU
zSLd^OaogmC+i`4%n(MXb$MQJ#p^gVU&7od>HUtaIDzV<7oZa1YyLPV3$AR>!mt0EZ
zJCT<dl}Z0Jf{YKCVG4vE%HTBoz(oZP<w<(jbn_Mme_6E-E!Q5ju%7`^U~WF|*(IA<
zyCA(>Q)l4qTN#@iX9i*UCV;;vbv^N>)o&Z?!)$G5c?IrY&g45Yzmm$g>i1}MFt+Qm
z{|JxmqJ_BemKRv`MZ_a^;wPDTmJI3D&5Q49T)J&o(tNUlNK_F7-nl|Qjod#T+%~jC
zgRK({tc5n@+A@NrD?Uu_bkK>jh+scFcTLh2_bK`Vy)NqW7(IK7)5rb^D-~zF18=3(
zXxr46Wk-yEn&3{k@<=&d;pV4fS7713^D3D40|;IEJ%+=q#WnX@ZB*nzhB8s)$9#BX
zbK0JGMErZ3n5{!5CydAu51nh|DG0LrS`{w(;#%!0M7Ymtbq4M^*s#eID4q}AVG9b2
z5r`S@A8Me@A4HH9fnzRGi}EO<v@(Xz+WxHw4lH{`*FTEDg+H%0Q(CHTc9xj%ZEf8J
zQIp!fmxrhEn3uc)Sw`!T_pZrVPQ<Wy?B^^I9MDK}5EfSw{hYj=Bd&oUy}avt49R^`
z;gq|3DIO9%uJ1)oRbZZFg0_;|W<P~xdw<(}`oRB1#Y2TUk@MAirsku|%v;fMu>6M)
z#rSdO-Im`v%(Cfp^|=fwr;xGe9j_hk-5%OCkWgoq%SP^<3Dff^@YGvW39kvy8NEvS
zanCRf{BTPEV6FuYUb>=&smQf{HfHko{{lej&=X#-av*(yU!saHW;{GZzJ@f)q&Q<B
z$z}OJ^baI7c)j=!OZMpchFM4mCejkJ=B)RT<!%|kjJ1NkaP&DW;%4j!qZolcb>9;(
zmzf$TbP-doyp=pxQ4}}z1;256711|!&v<@t8J6p9Ya(1eF5JwtX*23QUD~DLA7wU%
zb)W6)A<Y)JF-`B*V1jZZ@r1WeT&8{63m@`aCkWk#8t|R4#*vMQti}XS7NW=jad!F|
zDt*yLdo|ht*nDw*kq<u(VGUqb&wxKDgKmoJBpC-vlJ}sdA{|bB`hF^zHw1Bl1jr#u
zDY+9tl2fCcel*CIU@`HT&i{v{=v*gcr3*Y{XmDu!iQs<Pp_3zLX=4#v{>gWn{<xYA
zC1s(JJ+?eCN$6nAabrWu>x?%}9t1!}+irJQp4v%nrySNy0Rk8Dl}3@@AVK`K0tYFa
z%U<-!2+vYiN~a0_b~z!`0|}okHm`w3bY+grZ&_)^xhIT7Y5k{*BbN>@7*RP4q)sMP
z3M#^)kBrQ=4`+pSe#-UwmMS_nxo<5x_E%kS9F`g<gc#4lXd>XH2?M7S;*%8vW)AXT
zl<4KO@$4v(99D)_dw2QHs6@+xJVW200TWB$ht{XlxHB-#b$ZeF5u;omP4~ijI1su!
zmtyl-@=kQwY;-EYGE%0y>B@^|6EHvHEEz?h-}8if&F?Ox!B2;~K^70JJH!c&+P!Ca
z=aQP7xmmCGrVG1WBU@ol`K?=DQ7rOuj}B<YNlKmiXSXI5{YT0D(fP|xFT|>>^ZK2f
zQ|uhN84{{lmrHnc*I>L&$`!CxDY|J7uzxv@nBh5i{QV+)sA1r^*(0@@tjmJkb>VGC
zw>L#DtfRkz6z`&%B(kFN29hT**9oz9Kf*Tuj8(=rer+IEd(dQV(@#SiEvE5m(S7To
zE*zlpNUmC&+ROOerC@4{j_ImAb<s)uc^QSpQ|vN>61PX*^%#@!=9m^x(6wVu!NMC|
zuPdsO70cHOOxb@vESr2&0rM=88?{bPpp5W~uF<(ebRZk5l@umY=$h(pPHkBCHI-gK
z`vc==l#=iT4e4c*B1&JGNKu#3?_ZQ04>L^m@>yk$9z8AHcj~_KZNDHR<=gHl&2g2g
zIqG(C{bx(xP7Od#ntp<;?|bm0V>Xv*s&9hBmuoZ$ptpzl#DVX2FZI>t^mXwa7CBy8
zHbeovJDzh^dh?INL0*O8J`em0S7&qHcizCtv*%Fh;S7lT48)43In`mp$P3hmNyCt<
z-I&x)SZ>K=swgai8;&-&DR|JFwF?cSVcoZKiPU(djzAij6b#^nQcqPDEx26NKBB5!
zRt>BUZ=~8g&lAz(_C{w$BVD-i-1@Ah$<&$B`ixx4{bg^XN$PcRR?`=BO+bHU0~<P@
zO^e%-C)_aXs%Rxh?vzZ4oVC!Xg2aV^V!He>@?P&7)Aw5-Gmo^yPb^6$FD~5P4ATI?
z7@F3x-r=%<oE5o9Egth+KbyiAVa>m^v)Os9Zd)T%KK1ZO3InFkoPWOgX10#Z@XIGI
zsXzerfEAg>olP!|YnWG&BaZ>WoSe=5CsHfFjT>lxpr9*csQzN_=oSn*xc!jy&V?0!
zR4`W2RYr(P^e!E5{&EwY+$Z|E>TtD*b$rr}=ZdzetYHUDSr}Oze>nY8hu{^5%|<o<
zN2{0C#~$*nZ_U#cHRp9O7<&5q+S|q|lu#-8xHz18i=NyLXE)g^`AX1tGd8Dlxi#l~
z9}!n>*|%EJa8kQF%ReY-Ir^>!23xENmft2geZ19aGQYX|u+xRIY}coyXesVd%6M<%
ztj_e4QIZ3<XYjt|w2HHI-($u!#nLDZmz!uJSaj%dvlicK`e>h*I$=TK@XCBOtHwj;
z(&-l)GEokvR`A%yG43SZ`}x>3$jYIP$y=-8;nVIs;KwB=m4HjNB(IV(lW-4H*Wi@J
z1&ot1{#mp6?pU`MnEsTHfsw})GNrdi?z}?IGT@yQsXLEp**<gGAo8y{Svj-8j;)r5
zOo-Zao8O|n4e3|(8RmpO<kK&(l)x>Vp~4ZT3eHS0v92|_T7j-EF5rLbyuobePB1<y
z7yFyqER;mvn8XwcXlUH(5Mt?sWZ3lOhuSguyvz@HGVTXvsT_SNJ`Sy4G7PulyK%b6
zaIN$u@I^@VSS{T<mn3;tX?a&62VRG~<}z8fvY-8GvScls?s$}1c^5{ov!*`x8u3*y
zhP&OHCw;wJU4E)?gozrv*0*MKojx3e7(B9txkb2?jApV&t{*a4u#F}is^n5zG|$TF
z*w$2H;_okJamGLY#<pK85CbL+O4=P8h~<M{IX5H_S>?b*-_)YP%#lN1kFRr-L4yo6
z%!=bn&UIBo5QFl?ViglkCcy9f(+@F-I~6hVVBhp4E1f}X6dCPRE`%7Dk1R=}Q3X8y
zMT(HrrJDB%Ck4H-axmvYBl5Q6_2Uk!T6tE!jlRI2kgn(b@?c08k@!b2(@)aO?ZCqo
z<Fgup&x%)BoQ^qKE)tp_WcX^rzZL$|Xigq&xIvz%aR`2SpYrS_^+5K8M+1$Yj4|{E
zf$*I=KI737&WTG?G6qw_d<B5CVm)W#vBCE08qFmH&&mAtnW9YTAH?RF1VJNk-u79`
z=`OLef{439rfbf)x#E6tu9yc~#fo_|>6^7~Iqyzs0xZ7W!7}c2utVPE3dpM8%JE)*
z+>4K*a9RJ@D^VcuP&nO6<y@h>&nDYaBI^cI@z0y2QH1t>>gPEihn@%Wt#HY+d#g9e
zapi!Tt$FRL4i1y<DOM3?1!N*bn;iQdvJlC=Ly~LW6W(9ulWQbK)AAk_ft9O7Qt?*W
z<R>@sRNwe4MsQc1CHIUCLCh{`?E3ef%xeed$>7h!&31R*R_3tfmDh0`gbk^YTUSqa
z-{|Ms1UvD=2Z(|`r^#~5kd<^Rtfv!`6Pe95_6P3&HdwHx*=7;{Yv@!un7q5Z6=7Zi
zUUuI+^X{(nYG`R-cTGR*PoRMe-~Q8P!61uWP&gHNymUv%E#&)EMosxqK7ooca2tkg
zm{Do3peNcpQA^i%&^_xzRpprt)l(r}=vcMYG>kMLT?B8FWE!W7t>z%e)eGo$;AvJQ
z(I1o{CSr&<u@p#$mAK7Gkwr~LS9k`}FOVybCg^U?PqXX2rHZlAikusst!__y`5kdG
z&V3wP<fMY%Q*Bu~$gd%xnl=<=o!+o1-qm1guYWsOhwx#qA8ahZ^5%Pbu@1{e-uJuk
z#71)2{x-64Rc;Q!+nO<48l}G`8*Xwh-i1$RBf#VdIe|FPs4f$~>~`bk2B_E1jJix~
z&Mkd>CRhK~23Ysfd(YBlHJ<9|098|y9<+wbf4j>sUgp&9)yR5tM*9Kf!yE6+L_9rl
zDeh?-ADKJg^s;qx*`at?ngFw+%54$r*`BDM&vs~;P&u+)B{56T^`Pc<c;+d6XB|W9
zs|Wu3jyxH`j>p`(MTFP$iXve%KB1-3!X`aG#0`6-?<JXM{1G#3omIBeaV=+M`870y
z`v54_MRfAZIbv3uYa06iy)4p2x`aNZJwkr}LJ+%LaUml|JgZF3D`?f|`!#+YnOgX~
zurMTgb5bN|lPq7Bf=_a{T^VoMXs!N`#qqfX^H$o;nurF>UsHuhtTt;0pC629@bjn3
zAk(+Ye}v8w;KGzo^kg5u;x9+;k&6AHIw<JCte#nP4PXs_@G0c6ZY9mPL3x?S<P>64
z{x%nR0WyZCo|2r{x-@KZo-2tZ$*v{;TLGR6@WW&g$QH3x7xV^8#Q-hX@iWscLc3f#
zU3@St&?H!Td(~?69aiHYdF4sW^=Wg$(zgv+6M)wKyz>b##UtM+&{NYXwNV0*s_53?
z{sZM=-+uKsNQeI#No8YXjuJFln0Z?ZRk7(hK;OHhF=%|xwpxCosH}9ekq>lZcm1y8
z>KRs(W3>cQ=8{r4P_QL47dZ*l(2iZuKo1Vh9{rR;@pMkO4WV!1&%h!gU=j4A{2k;e
zt$Z`AJAyc`J2No=h%r0~13itChLY#7{t&Ujp+t!5;pm#Ns-#dt=w6RSW>9>n%am|5
z6)=AX`s8OSxdsA=NQ&9Q$WI$J<EpQlD22N(N>GZ#Ztdp|aW@OOs1fOuwG5gY%i4G^
zLk-$RVwAb73Kdl66V-kNBG1<nZ%k|6IFb$`_bt75x<iP%p2Xy$33#_<VK#J=@IeP9
zq|}f+!UlZ92_}d8;gJMS=UE>#;@8gAZB&5wr&R2{PO9svzG?|aTAjg{Y`7xg8|MP#
z$UE(~t7aWNUS8fwfpneGKC5re{y|*Bz*({jw+wTz;4#RyBJxjcMQcoDOQ4LHKd8&T
z2G5PX)%(3AUE-YIBO8{!U<3Z>wxd-2`I#Gy?@1SWfzxMshn}TP@PsNCXzDSg;m!2t
zAU^#mGY&rJbiiPC`G81N9j7J=$%_YtYjHzjB{X$mMkpKmS=VB>k<f%jItlj5Y}eeN
zc@d~et|@Y}#PJ*=`6DV2u6!1J?UkLv_Nf85QfM@c5lJP|jMo^g(Em2INaG>yd=t*J
zvI<tm1|!WWA@5dX!%=mSTb>7Dd~-6fKojJZ4DPJGSSyuq9lY>d^pkbloc_!H(#6q}
zP-!WF4zC+!;W3V(Kwt{F7o7xWfWD7UQ@{zPcAsA7JXn7#9ATWh^4AQFjjKaYn47sP
zhwGaGPvPwxG99%9+0S}Wa^UU`RMP!uB-7EvgJ=~8O)FmgC7c^5y3fiu!k~8b&9R?b
zC%xen*OLPVT3MY%xetOxW7vs(2^?`Jj=<SiDj#olqyFpObScL5T9L~Xm7ZAi*t}?9
zUpae!y$s5z<8{>#b60}#Z!=NJ4X@oG?Br?8&2vHW#nKM);zyYL6HI@(ii&P((7SsM
zxtSr+QIV)L0kG#m1>0()?Yt5Dp<#uZD%~@yij3g}!=ipQ0GKF1u*OcSUr9a;%6mT)
zWXhwxA-CW>(3KZnyMMvAea>|EtioV*2`@82UW-nf*=l-mk#`@NexKHx_0vlsowv<C
z2E-PI*0!6fFbgiGjhC&n<J>04!HpjQP=kI?DZPhC-0N@@At(I!EU!5Piiu1-5TGSJ
zS2MRK$B!R0NzL;LirC$Q&~SS8)nnUvG2u`6^@69pA6jzSt}(5g>i%Sdksf~k3<%>j
zke1b@y~xTq(~9AN{1nes434W?y^jCoW6MK71G{|>*A(=Y#3p69mT!}rb7%U(CN#LT
zErH*QMh84lFv$yfvF_@so=mz$*Xy7JchlRW#?>!PGV>newd+B=iE}xW7KZ7!_>B65
zyj!sRk>pDLO6&V6-uCG$luh2;(|Pu#a}Gf5-J14vhm!OJT5Y7fp55kgHdmkbvFqXH
zexv_0rr+Yi5$ad$yY`H<v}snsle9DVlx2Udl$ue!ZgFDBch)B<viq*{p3UPdTd&`~
zhwIpwXLs+u3g@aTBc0RP<m_%Mr1_{%p0RK<mV!k)fJ{F~(>}A?7Rx``m|1M#n$pe!
zZb&q~JY2~>%S9y;M1*vFInKU&Tf3o>a|h6A#{e=kB~LdASm-yKz%*kC$U42%V$LDd
zgWwx8X8u^3`-F47QhjthSQQqv<HAnFQBnA}=$5(&!_lUv&ZPA)-JoTPqO{jQu^`WO
zZnT~)`mKljjfN;*tskuxAZzxu>}|5-<5xko80vJ-#D<X_)>i+9gnC-Bp`QA%%K4l4
zy&UgRn)t~ukyFzGWZMJofp9CG)X_=Rgg@zN+PJz>mT}`Vp2}PkR$W8g{J@~_F8mvI
zqUyna%z?>({sPCv!^f3(iGkg&u?_?&UH%ZF(}BJvyKg@ng4nAP9F&~6zqGj@>Yiut
zn@6w}BnMv)x$OS9SbAjqZ6UK4xWPG-R=f?>kx^b`j;zSJX+=Hz1&x!leXWR*ev~0^
zu<pIG9ee3;4aKO`>oqXPE=-9?xcH$`ZTA0R??1zu+P1%8cw4rJ4G|TkDhNmyq*s-u
z^xlgS0qFvU4iO8zNbf{IdXpMzQ0cwb&>@5t0tr2kJPY0DZ1+CpKJWi}Kl;ThxYn9;
zuF>Y0WBkTgB%VCH*1LfVyKQv%x1f<B#Yt{n5TWQgi*)-!ySK$7nJJ<@Ju4-G1fO}J
z!~lNP1&@@iw6eV;xwh!+4#nv<m0pAMcuw`#oV;I_gRQO2T_Cph#vDHAAsOdA)gT;m
z`ekM(_I<Uz+{!3>grw5WDF#M716dCA^=^5wUzpuTUcaq8i4NT<s5^AVkNBvq{@u96
zG<mMm6jpvRMq%pv7NLDr6k-VATbb<NG8Ou)3CbH<l?iKo9F+JpxZj}`3R2Y#2dR%*
zbXI-elUr)F3-oYI{20yvCC_*5LwIHlSpb6^LaU%%^F3=71@`4dpLDV6S&5z}+Bw2n
z<Dh{HqWB&kUU*^T>eSmVt&wSx%<^%)B+y0J=~2{03x%$qe56jz*VF+P?OlyPf_Wy7
zoP@~R_m<J@;CYS4QbQn&RU&nR`=-%HAsvULIZz<#Zu18?jb)=(6o$INiD_EewZ|~J
zD?4S?Qlyx=arz^#u>BG|e#Ic~Y=&;)qj@fQ;~e~N7|BP=&)V~eFhA9?D6wfn(MI?^
zv+*1=y1X~*?=EFDNX5tFBytaGO~{z+)4f%BqTuB1np6DOc0;0;6VONQcdL{j=da!6
z!$?`}(x0-0EUBkJ#?~@@93^~PV!Q^HXcQ7Ou@~{~pK|M>A=}46`x@&{2x(a{wg?sH
znlEGRSYd6B{Tn;^x~V?c=}+!1PqGn6&#z%uuiter;Bq@YRsrWcYc8NrfGa-YIB%ie
z%(6pm#=)XL1{7jSeDZce*mHzFd?pzB<QbF)^}}Js>6gQ8NtThH8qEn|?cn7aHgFwh
zs$^{=JfLwX*p%}mzWf44bkr+4)!MaQjZjP)?MJ7&xiq4j&6v?jtJzAH6U^@r%D)x?
zqz?1B%YTY--=jEghe3Lu;^EWxuSG?;4S*;Y8TZ7?-a#o(+#V;D)tG*R=lrVFRZ+o(
zqUMieFVZM~XRa;ArceNJm2JZi)p0^E*-&G@3+Q)|Mag@qhtU-69PZ)jyC>wVz8>BF
zyfL9xt8%N}ItG$j(9u*>xQ)6?eD|0s_U<&kRF8>B83I%xul*Li`IgOg8P+aT%;{w5
zx*o*18>)!04nG`iU{Bo-(G-7C0m}(Ho%uNyTbQ=7b6MW+9q%bOoFm80Rcu%R%=<E?
z&j&dmxC*@j@FDer%=qsFB|QR$v3SesTBTgR>f+%kt!V-&IOP*>k<wkc@I6R9^?ZdP
z_Bigw3lo7i6dBq93c`;16s)o)*siNwRsuafb{#ENOzYCT{bolK`Aq`U&3TL^6I(2|
z@<MP8cAN<4ns|sbj@KP-k~gwfSR%`U7B>V6vB(_FYZ;g)#U3$&y~H9+%vuPgTise;
zSxx+wG>!~DDJ4o{SI2NKL!$@M?IP#J@wGI8gFb&|;lHq*k8+mUVRPbfJ6&>i&wOyS
zUbw!Xea|P{(2`MWYwoex`n$*b{7aAd^-7M|H;NPUp#{8<*k@KQTC^%1sy+F6kC&si
z-ilR4Ku8z&Nuz<-KTXb{*K5eNWsUGL4<7(yfnQhB3WII(WQh+}8Zlfv%*XbGMztj$
z6}~$M(}^dMY4_rdz?dMa9X=^q?I>x!+<iP-B#q?`S+FN>GwE9E<;69ydayo1ozA*T
zR1mj#Y8=W0G9t)XSM5m-z3>0pJ|Uv{5j0hxNgxe(P}U%jF_^VpI)7+9R_tUwGBZDB
zaaEXp=2e~TGLf~R-Yy{`CF7Cz=kTkPLi#fDh#z+ArU^@n?%YZKK+HymLgs?)OE<1|
zw-)d8Br_R>n)6ZPeuG}h_^_`$cWqjDSl8|^PJD;gfQ<8E5R2APsS<6-2<u@nE&6p$
z`pd2+00v+0I5*U11+w&_aAw-395qars39(~H`gXgOu96ExG3pq)9^4vO4**KT5N+p
zW_rgPR?xf!XK`tFqkTr#UF_(-V5}{d_h`qhN-Kx#r#<}U`Hk1@!C&KPPAZ%vhTqz}
z6g|e1YA8=WeHCW#X$BPy@%+@uQ3?_q*`Ez^VXuf!Q$tuzNL}-OomF;qHEQ#4f_YxC
zP6M2wYaio~fHTwrj(oik_YG_{oWX3$v}2e|YEne|MKlg1_`1f;uiz1N*bpBi?w%3x
zoA=#1wRp(Is~Z&r=cY2e%5<$)CBrM_18XEii(EeT9d9(s1V(Rt5J<J?W}`sDI=^Te
zPqhwu_CoJ>*%0RqkBKYa*+#H`X-~@e8*<-=$KGBz0+uCpt=LayI+NFH<YtJx{uS3)
z<1qQ&4XkU?Hw0F-4b^0?Rm<ma5~^KARNzo3-X@L}m)TZr{n+M_o3!4*HV?nZ)$>Fc
zGio1In*YMm#V5-9I&cmw44Me*1hPnW!Q!xE+zWPeB!3(2kz(8}{~{L;nIxlGwa?83
zEE}x{MCd4Zh^j)jh<k8t%CTK<C(LEvkmnUAiUx@nk&zb>R(~OhG+|U{jzJ%_@`K*H
zFQ95e?~$Q&g~i=DLTB!-;ogWn|0$6KoJ<<d(d4fj(Q}W_sWd1{D4?g$l}+Q2)H~|^
zVOrT;UmIZZId_UXxxy`JqjkfAXeo0#Y+-#v*9Jb-SU)F<IJUOB?P>&b?o(!g(1h@I
z!z{T5`;QLZ;i?dRFJcg8aG_+#HL|GKgz-so^TksG*_r#BgY68+l*D@lRtd%y8bEq!
zXZzE-a3-LM#chK9I%sQ;NkK5vi@PWj5RMyNK4e5MvCsv-s8nOxJG8a_?J;$a4!lmA
zQCx$fKon@!(6w9)DPK>FO>8b8OLV?jD>lHFLYIR=w__q5(OISg-o@tzjWnh`gx9a;
ziG7eHYq`=h6&Fi%wlZm#KtoS;urhU!x$S?A9IeJtMmr>`tLLCbS#dFWo#k46Ie~E=
z(ZqVT8tw4==)(5H%O?CGqpPfJtIoShbb@>#McW0s^B1*t&3pu);iFUi70CWw3=`C8
zQN-mgnBH*3Wv|ibp<{9XKVXz??AkqVfTFi|><&jeG`&Fij4)6>^BtnAh}Z{*dN1??
z0I1<0Z*SlP)Z<WP$zAR}%HwW{SQI}5q0=u#l)8Kp->!gvk~%MMXfN(@jJGJTPlLC2
z&e7PYD1C-^E5jx8r#`YywH(Z<7wNSq=I(fSjF(t=Oj;b;$!O^y@`_YGWya<_H4q<q
z2_acdq)R8yb2lwo6;Zx}0>5(;YH43qGr0<sy^f-Wv=Gy%T1bu;+rm$2jn`iueXo0S
zXc2kYeDGS{ill>#FHe-C$1ckSrm;NDZb~cElGiDlY?Y$)%w_#hjog~}gbnwbjFtPR
za|h;Sx^>&0CLD@NQYN~uL=UaCAgY6i(Jg`l*Tnt(N9yfeh1<)7vWBwn^{y}E=uPQA
z$1>}fEGl8V7?(hZ#GwQy9ptC{ks&@Zs{+f>V3yp2+>5qFm>U(OU&oI+)L8`^>zQ>s
z68JsX>TL-t4R|g;j~wr58?_>9>`_zuv^Kc{KKIK)<#x{$_pWfv;<!rFczyag*P0xd
zF!CrCbWIM6n$?}tqrqb#vEy+iVkTGhjBUpf@6K<W$W#Lm++CL0vo4xV-Wx1tH)}ef
zSBumY@d|H7G-&<q5^^6WSM=oBr^hiH88G@~3Ko%NkLG`}b)(xk#&G{B6qY+dO7k)B
zi!s%N=xU%`#n185aSF>k=NM|k9t#@CQV)A5waXamt%xl#-ueS1spOH%Zynk<)9bRL
zC;4$s`=CDGcU%)$Rljdecl-7agru=L&`LrpzDZcuW<UDibs+4kJ$v;OX2wtBl9ndo
zCBXjG1JB$1RorC-g;te#_Fa{NmpMrYyu6H-T>ENuhJ%KQqp@6^pHqGk-L~0>)rl@}
zNl#vfK>f7LTPZ|`T30&H)jy3q@aqNT%-mc_7l%lBZVvMB4JtIIhy9VsjlA0Xn%&8#
zsab(Cc$rI@dnQ<I@H=B@jQTQLw42?Wm@8o-oN%w{dSKfE@lkQuoI>CS8G{t=>zSFU
z+#K5Iz0Xr+<uIysYI1v%klxvCW;VRIjF!sE>N0I+c;4GUuP?q!QkYXyiJ$E%PaSLr
z=Gl$bUi7uezulv5`026Hh+!e-Ejbl9cMBXsYmJ)BO!&6N2)UOUVv*Om82-fTZYrHY
zt-)+!j79|ar$ZP6(kGbmlV&#Wu1;f20C9d;{Mxn&HLm#CeHnxHa;Q~ay@c3y(nkp>
zeuRB#AHD@ZYCYCErt7zmPE`jb)*eKMXoI}RN-(Xjqwe}jt1qIr_bsg0w)YZ5Ai^E!
zf_5%7&0$uc8VHg#cRMieu6hLhg9(+*`g73v48<(Ad9QWGs{_oK)jW|zW7PhMXuyiN
zjn>DycHcy>*h4GHV`|B`Hbp(>OqVtD7yhU!k<Gf6CpY_+UwMG1hHBBzq9w%<=<NW_
zIEL9~6=X2cy*2L(GsN<=DM*Y+yhcMB(Ebt&7nOYJ9D|uvY2PLq>&%~srX9+ZlpfbA
zADZ6RDAb7YJjRK*0*-*b8J9}wC<!KqRcqC1z`Bhr+toW%XU(C?{II8+Ey7WJVq|lj
zs>n`;)PCB6H6eJlflKQqG|@KIpQ&dgJn&$T6*JPrmeSheMVq?ils`dt&85&uaHK_|
zNZl4PDWjK)MrQA*wf(huPr~m&&GZ^?kaB^QRaO0rk-{}-=2<VxeAK>=4-4zKoz<8#
zD{<ix_`pQlKJA9JBLXPu5N({o`_^IDCGz$FTtlEFtLglE>~R@p*W9->1oo~>c9&L)
z{o_|HtNJo`jr8IR2a;c}N98wTuq&1sg(_7d*`p+9y;!@f=w}i&H0WQ8J~4x4ykwbs
zjxB1DRNgd_iQUd;tDz#xp5RL@V98Nze=l;4nggAYz@F8+>K%T4?^!4X#j1!3lAu+b
zXv({v)}$PG>T0Uj^SXkmu&i|zyVR0x2PsS)1lMqqk|SiEBy?Q8@JQ3(XnWRURVsTj
z(hdGH3HzI_7nKr5K{CcQMP8Qxf3Ond7}-(0jFVy8m3_~B$sG}|C}Ax5z@hS6%Veeq
zv!5HZ?rj=St+Y38CgBwA^_lIU)U;m$iEE<jA<L;V+H`ChxQ1s>DcEyqN)0SxRUb)h
zvpf(JYH482UBm))imOEsXHk4+DxEi}EsF%*t0zph9k|aB?#-n8BUMsu_!*@#H7^ad
zX~D*+q8*n#F1*iR$?++<2*R{3s6LVdM_wBCPqpnuk%CmJb_!B@B1QmY(!8}*K+8u}
z`hXrHP_q#BB!&3Bn+USp(x7e|oEdp76(lWQ)N@?b)Uh~J17cXr+k72}*y(&+v$Fb4
zL;TSrw(ZLDj77Axj8_U`3Yz7Z*GGSB&dPH;P3di7E6k9yep(^%*zLUVWkZ%?Lfn&q
z+P(2+kE<>^eB0F1vJEDAm5=-Q2n(NGatxtc%prVwItYv1I&l>TlJ~pkmA&ig=2!#g
zuiDNx`xXsU8mX_Zp3Q)QkUQBiGKEgCT!|gGXldLk7ZuI8cBmCyNa|0=^INYU)ci(C
z8EXq{ZU&r`$p@2{O*Yokw709q)=r1Gl+95Dg07(e!zYo54ILIrWN%KXP&;+>;mLql
zr`6~(?FVy*XMJ4`C{QfXaW0X0(qW~$BbF^C(=}i&`P}qXw+aS7RF{^DeEV@7(^K%y
ziLjwZ>m_hk$z6)~^i-mcW|dJv*Nj%;I6OWyL&5Il%tB+KZ9PPRRat(;9=xB%m2r2)
zhRVpdp0hI%@2XcWdGGauA@Z{%AJn=1HNl424xT(r{I;2Ij1am?t6@|w=`p0pag}n-
zdehHgE%0o;a`v6^-MPNNfEB4_z&}!9?I1Q;S=&@8FXF#OVD8p(YT>)5ce7Eqsj{ru
zj`qgE-cC{VVQf#1hWgsf#0M=Pa+MyT@dml`<L(#B!Bh0AJW{L(tk~K#+Gt$ZqR`2&
zfA?#Km)~x6jA(T2#TYmtx&pvkt}6}80c{|{`;X;EhpQ)3C4=@KcB>-}m$z;0o`aBV
zK}LJ9?Z*1TH6S)-oAF*)?>R(xg%%qA09m18qxT~v0+j!2=CyNlf4vT1x~)XFR`t$d
zG$c~@0_U#Eh)eDeUPaeurm0}1gi4oAZG)S7tNN=$L;h)>hUS2JB<&%T&W;;6BpTz=
z*V4P=J=i|y`Db&-V+iWpNkc<JfdrW>ZT|99_zXIobdU19lnI>^YTx%6arX_d*VT9z
zwxVYaJT-7Q1~4@HaPaC1hm(bV_aN+TebIc&&>k7<qj|hey@q<>CAOUtv&g=Re8;s@
zmwMl<<5eFR)T}f>at?@T*<^Y3&~2dE0aJJHj!`)}lTFZfi<dvmN@EE;@~&Az*6i?u
zL3WCvoA=A*6jIv+4#%=HTec13ZS64<sPByZIz#OHFzB{Hkknz@*HynsQy`e*9Elv(
zj#jK)G$wdoZYSO}b3Bmp-f{MrY4ptSPV$?#Ua4K=fJL(5Gfi8O3Hq?GZ30tmQUHnO
zu;id_dxaPo-leFXDH(8NoI4vNDnDU1gm)vitK}!ppq?GETh^<YSopd^KC@j+7Oj+Y
zTw@)958;H#zQc$4D?Wi#v!SdOKT3tr-)+Oj=0~*dLwZL~=s?ChduIF6osA*)2Tiyc
ziMc_zH&HHm@DIq`)_uFmGhE}R*iyZoaP&wryS%+fDtgf^T)rjqzAZt6KSb)}fL>N@
z;qr@+UYL<JHM2HWIK~3?LPnWvB>vck(F*V~W-6XtRo&&b6=L%J$(`+0`=YWP33Nl;
z{(Y*5xpKa<-kCRO%SmYgevWw3g>_kxe^8JE!sPVU>oy_$oa8HVdH7eS;5H(86YVQ0
z7RfssitiylM+)Hqlk5>HJH97@RQXh@p4;bdL>~qhY!P?Wx|5@hn2gu%NF}`OcGMw#
zK`7a^xM#*g4)lo&8m7NT5slx6Q4^R5w<CAk!c;wrTcnN0KuK0ni8}V$``bnV1G^0d
zQ$&<J4yd$#wvT_*BguYv3uRVy-S}e)MYIb`Q{pMJ)^_d<cC9MEIb@(72P#u|1HQE%
zh+y~p8Xu+e@rVCu>>lLY9q$riR5uPZWFdHl0RBelrIKk~T9-@_>f@BKd3vcg8_ipR
z!IW6sw~Hv7V&ACaoG=$i{KzL^pFTk8h$PyH-cJ6<IrO;k0_!TNnE-IizAud}r`){>
z6*EH?4env$OG0~>iZn7Iz3eWIHTcZT)yLoM?2<J#b(oj2qQB=)QD>lyVEUJ@xg92r
z<KG0CLd1~=+wa`=ka<>m1w)CLyW=s|p|e0s0MWbhN`&_ph*uV?0xiWuHE9L4h0ytD
zhfv-7gM4`=@H`mGWNTJk<^|osqL<0`x^dztnqgqGK@2g}4y!Yx|D4*Xf050Be#6BI
z#!c8YYGG~>H2`aoi6oO}@7_Xo@$SdYG9yIx$H*K*SocjEo>Q|(;-59;u?URQZg{sk
zl!Veiy~OIZOhv~7_usfcE5u+K-wg3NF)P$0#JkVv4p*N`eYz6H1on%FAsSn`b{{P+
zxD(Ze>a}RITJUZnE$cjZJ=skOTti?z8*^;mWvCa|p2T|yzioCSL{*Q~;9JGo^;n>L
z#-M@_suR3^PX#}71>T^uI;vWfVfz+WMu`4AeZ#6QZzY4QDngeiqO!WH-=!Yl^_V@j
z_49G_oUUs6sxkmcb-BApU_yD=N;TlvbRHiVO3rVS08yMK@qyb0FRbeQ%Zjb)@#vZ@
z@dB;=D<cS?$TJA6MCwIRDnx?NxIT0+w&fmI%zH=jL$4-;pJRv7LW{zb8Ie6h%`lLg
z8Go=s-#T7q^}tt$cw_x`;NWsr6h3{$L%qN@^<ZSE9qg8f7kQIqG$*WW7K1qM$1ntM
z1MDihj3?)<GF#g65~06z9sbRy@_1tn0X9}6k;k#tw%&q4+^+DKJ7=4EuBLKF8sR*P
zS9t>6p1_6IuLV+EHR6QlInWhkvrJ#5&233$&9NM&yq@Pgsb{aO{%$c{VTw#^q~WSD
z3ya3u88COE?)M$_mTf#)9a&|{^*sn1m*y+I9_Ee%pK~KxOeRYS92#CJ$Wv_a?3YhI
za@s}W>g=gBu<%0cxjMSh)IRv@O}?i10I*-?YHiT9A}yQ3I04(VbH+fGsS|HKS~7NF
zTm>9oWFa>WiN_XAXM77aQxJ_Av?wY=LiqU!O@X*;k*m+AdyFmU^IF3f(R&FEpX_OC
ztfeIvZit3%8Ldx+!Sx&%n|c8&D{z$#Utw5Yxp3scPv%XF|Gsg$O^}2EiEJ-g>J_gi
z|Lmz%-mSIW?`QZVACe3^26RPtz|aG6VGr{Fr@x=4ts@GTx5!^Jh=zRcqvhZW#~1AY
zDMZ9Nxh1IX2%b}=aID2n5Lcw+4m?0F=`ETy_~1ZzA6`bHnTw~U`?xQ{&ubrv6WQNS
zq$1K^lVT$3WSHD#&O&UL7EMn-PYd@uy<#M&Ca?_xyr`j93ZgB!<H>)pI}>1c2U}?+
z#L~^GR_f0V9KZRzzw8>Z$h(z6SU7q0k%=<5I=(1Hh%8mP;JjvwdBFCks|fA)gtQl>
zl580fjl9%(6W)P6cMq*^+YVf~xLd>zc~}f%$a<SgtyNvFU#bk8pooqkqF=c*_n3{R
zC)O_&sIVDlkLvYB*%kTR?qE!iwYqt|`m?KkhfviS6Pasn#LMGck8qYS6;`^!jixsh
zTPMxltq2#|HjcmCIl&+73j>UIVUC+#mXI8|z8o_SUVt@rISNwP-tzK_<1p}&$TogC
zvrBfaEZwRni^NU}x^mRp;znvqd=S<?d1CD-cehw#O7l`qk%e_qEW2m=<MpywHZk?^
zaV~o6#AC-FsH1FHBNrLj^F;XH^oxDPVVQvLk0A;q1jMpE!q@aVBS{RWK8<RH-#^lg
zE?0srIIShg8m(5x$hzi!L*?b*vntGHFeCuhNZ;XNQ+~uQ8715J_;xre6|d$bLAiI>
z*xse8Ul)cvIE)5tNl%AvkFZm+C{Rx0YBN2%x;p#;qcC;`1wNu)^#-+qyTJo~wd$_M
zBN)K^YjY)!c~T*GUX%kpT!fk33T6)3YHfinfP2MZjO=5Xm*n;a>>cH9rz2TFS!sfw
zf~H*uUBMq>RDPow=#CNO^w~gq7}3D?iPdD1UdQ&7Qqjnhe97l??y%gUdGy+~b0#3W
z1oMG=yW$5VPU~rLGSwx_*b7a?ftfpH7oF;pi;!Znvm(p`d3+yjC%}>KMZLShw5bwf
z5#7Zr=pDS_aXU6<#K@Lg_^w-YONBc{b*8JP)(4%R_7ii?%hmg?!?w@dfv)XbpW{n*
zR3{W>NjMH=1_UdGCs@JZ-vn7wvV;_76W`YZx%XC?!1EI-GNEU$dc<N=ZnV;ah6}9*
zX=CS@-GSbvmz1U(U5mf1%3otysuTUKzVIc>8B4>HOn{uQee+;<R&%Wf&u45j$rZV*
zKX#!ha)r)WoKvqliO5;r)tb<n-Suw$HjDJinOn;ck}5LMsg&9hde1F!A%4Oaao5wK
z9WQCZC$2O5N}kj6@DBg^D4_k8!@h}5&@=A$J@~^ZlzR#p6z_Jl?Y>IVm$#QO1u=FA
zf1?@mx~HQSGH&Ex&=hiYN=0kd={`NsLp`ZTNN$s&X*c#8UKQFC-ps1{?KqWF2Okth
z<y&(M<NNCydO8Gj=k$SuL1Tt&V$E3~%{*vEXjZM*0`-2n-SX*4xTECv0&XrBuZ|%s
zP?+6=*Tpxic4WMR2e^AsqU(SpJ~*gi0WP&mIvh#AUJV068s&ksG2eC%5A`>L^l8#}
z*G&vV%F>w+RGVn88k6PqB07rB6J(3B*g|z`O=f%Y;(e<_Rab<ERq3R-K|5|gz9R6G
zOLmNw*}5`n0xCZ`8RS=At5Xns+=%_JcU1ij^tSW#>nV`;`hJD=BtGwQ%g3v%?SI_&
z0La0sk|wLz3<)qa`~>~@UQ2fO;fy>JUxWYy5Y0TdH)3oae`rGYXgOGw+3j!hoaOlZ
z+w?4@>+Jni;0lm~dU-(~`TLhY8k2t?>~H^MpSMvbe(?Zg^68zEh+JApH2_agPoEMT
z$-l-c)BOti*o?qL{d51-A5-REoe@3!M@#fh<=3Fzwfy*hz3s1LXgX)8!bx^yFi&Qd
zQYy|cfX~AgrMM;h*#Q2J+ksA)8?@eZF$}n4>PE4C;mPhq%I5u`i|(rv#CxVo`g__z
zml_X$%7OlgTl!$|)Jf~cN>t5*{Y3MmB|pXiQHlAo>~nvK1p<Imw=2L)h;b?{{tjHZ
ztMD46s@D-T{71s!i#w1~eWvs`dj0tG>9x~b{d`hnITP#}iR;<!Tp^09;P^}#{=f61
zGacXu${~kW1-SCy=1H2j$7#tce=H;h=(TJ%_QvJ!Pkkptr{au4g8I^lXecXW9AaOa
zxFW>OOADJ7IQOq)<~t7nGsfQk2m&+!T&J3fWBrF{83XUpw)*5}kmH|hxkD-@m+^=!
zTL^;-%)2vG@hsJde~AOYK${?5YW{rm_(9zVO0O{+{1>La(+Ge8fO-|H;&1#R?$0j+
zD7_2*nm?sM|I~yBIzGw#-^>C>Pu;24h^J()(EjvEG`Cmo8Y`Ii5IJ$_Utg;1_@yR&
zX^!os&$Hi!&RkqoK>tCi4<<ml0ya^d@voZp#s9Pbm72kebHIP!W8E;n=-R&UC8oza
zZb1X+Mw)e(AnD<9lI4jW-KD+6G3p)DSfsI?D%Z8zhZk<q1<3iI?rFTQHh+zR!pZfu
zMH(|54^`wLe}cc<Ty%DJ_UpipuBi;|T_qErKeN=V@e_Axvs|p*b1mEpQ{t0R_@gQG
z0sLSkFE=Z<#-e5wtxsMoY+CP+^m2G4Cc&FFiIwZyWJEg;hy8Zq<mt1QZ#{VZ{f8px
z1kn%LOfp?aaqGd;H^0}CC~chh?Ed4Hr@ANOKRR~aQdl6nUr6^HuJ_Y}{~RG$^_HY(
z&3)7Cn(W|~L+;?0iJY=O#{S1Gndz^I4Js|^c2LsGx9$m_2IdHzf6D%25k5PoCmv>q
zv(){$rhhzlHaO_Z6CNwVqfXa5sby3+qN!I<i<$ie`M(bDYy`Z*G&rn4obkcyFV87A
z==jFMKA?d=j^B{#<P}Y-D|Eojf4uQOe<*(VLRvgk!=TR;6|99{)NO&P^h)S+{)If3
zN3Weer1#bl3f%DlM$^#v;?X-OW>=B$gLr}`NZAI^{s@zwN&0;PpC_!JO>Bm$$U)J;
z{bJoAJUfhxSO0d`(^n@6WRfD13IOR}e`y71A2QJ$@q@+>EKc9ae$_mt|8K}r#<VfI
z1U7CQ_FudcIXet%x`C1t6EuaLegAz`|9rvAH$(x-(2FWCk{?g70TBLTkAnS=ReW>;
z2qR))tMnJ1-aB#9h~l$y-W}k6-Lq@dZ=ajCEt9iIa|FG}vhi2ek@FN`z_dClynFOl
z9-Iw^0z3|b7K9%Y`svFPn1^riaSA_fdSyIgR@`2vpYLp2R$tU4*7cVuA3T*m`&c#p
zieH=Tm0P{OU$9HK*Zwl(L3lbG|9Ysu0ed+83hQ5=c>Sf3bWifIam?U8z^%-h*SjB=
zz0Zt7{zl)wE%8sf{EI03y7Y^5zog}t7W~qJUs~`>3w~+A|4|FFve+2^(F@>juGlY^
z<8Ln9FZ%oviC-e|OACH!!7nZNr3JsV;FlKs(t=-F@JkDRX~8co_@xEEwBVN({L+G7
zTJTE?{+kvw%YU@aKf`<JuPOU~p;>Uq7ZPvxW>(?;9d&nlC;i(162OOWax%kkD|f!$
zRgI|4|F@9$(=;Iat{7FjedS*U`#C-EQFi7?QhYwCu+Y)Di{pDHz4uP<fX38eRtsb^
zDbrl*@8h049{0Cut^dck$`2gnlLG~Sr!R0_rmP3n>F2pQ_|+r+kBs_17ZD75FoD)Q
zexd(D1E02AGu^C_vv)_&0NL_{2P&*to-<h%NxVZ9h@HQ##K%bK!JGC%XK6Iqe;E(}
zLa2Xf8q&ai9(!@}G;`1c9bu0Wn?Us6h&7-EjP*(!yOa3eR`;LgxhD$%6q?WW-}`S8
z@Ndu&-)ZBs%y#RcSv8#-Ug^J*=08k+gAo9BfKFT9_#6NJr?I}gy7u8CuTfa-5%YiN
z%fFvIIQ;eLXr4EG7ykX_f3UvU;G=b|&Qs5SyTV{qfbwT&IsOL3`A^jP+e{nOz}t79
z{<kClWBUIzl#d$eW7V72pzr?;;O77JEU=6_*+L}$Y3RSL@^7@2Wdazk>3RMCMDxoZ
zKF-nnO?v-FvAYosuuE-IFYf;$;}0IZ33U3Ogz8^Lf2s+nhisF@v;PyDjv4&l>G_|y
z@c#z}`!e2h5=zD&gg%ca&u?b<9|`?4d%j4MKBkMiyPs1$l@7XZLg!%B=uIBPC?Gv?
z#lfi&#VBz@jT6m*3(=hkUU|fGl;a<*WSg7OdG_6iz(Y@l6_`MIj&jnM=OVIlXWpWQ
z8<Wo`8<);cS~{%$0UG;HFaKAPy<oldFoUV947%BWxpg73b`P%a942pkHF<M>B{<H`
zC4N2)Bbpdf$FXunGeI>ip{LJ0Po{6@z{jW@z@7oKrx;MYx}w-?MT;SQ1JnQig^@l!
z0Qb)%ykJF=`fSV!+$zl8y4l7X-K2C$>ZA1yFL3gW6D==HCkmqfMQ;A9wE2);SxWi1
zt&d)~?kbm<sH3C`jA~mbEBMFSzARi9+D`(@L3f7BxsxUgELZ<GN&&q4vPW9^wta6*
zc_^H+#TTktx=AUEkYijA9Az}sYm6q>^R4N;U9Z<!|A#pE<ZylnU@bL|7K}+Lv}jE6
z<eHPr+@Tx$1Dg0>U*iXx?(qUr&gg`QjujDdF3htefn41lNEpu@dYjHWVV2vG_zzQ|
z$m)~X7_~lC|IR&gkZV-;e`dtJ%;PO8$|?4S&BHF^B!dBMTdFqRA*9kVpzoOSneQGj
zXLcs(O$};QcL!c1D4DiLjXF|4`5#vQV}NEIz>x19C^z+?25o?ESK1C`hI42bP4WF9
ziBHwfCmqCqTckzWsXZt(s!iqpAHKjJ&wXFO0br_^TT=TUJ@pFO`I;bC()n~gUha>z
zEmgj5j0%l3xc&1J|Bc&<Y5=)4w!UZy;LGB?A2;@q%gNAOcYrh|lM6~G(QhLC5sdoL
z_d(5bz}LByo!lPrt?5{HZVJj?aEVvZY`)R8pu2{9x;iatz9^y`;QF*N!asw2-*fD;
zgMFh41Dt-d!2cDUUE3x>XzrcAn0W`|u<Qhl!^6fFMs4X=xv!c6Q^<Mh?cck3N{$I8
z<By{JEo2rd=q?vCnVL27lhX%P-c2h^q6y*lZ^f;A@wX+{kvSBZLy$v5Zale(ak>Sw
z%nwr&kq|X?zBHxN9n^xLihf8}1DZOYxJYDJiRCX3sW@o8aVu;)fE+KqcC%QR!9=AH
z_<ToW?7_<Bz5EE2d#IPI2@Oq{IC&n!df?u(MnOhwZ7a^YBr0cTBHPJ&$`9SQEO{WZ
z$m4C-h>>1!@+k5bV;+%Nz~LnsvD<dMQHw{dPPqC50S+i*mh8f+CY*Aeo=0#lmb~id
zW~}tYRi1uFeGZ}VT|{%WlZTT5YMXz-qa%wE3!ZyV-V@e3f>7K!)M^}Il%NVP>gu`_
zd9*De;AV9_fK>xmIcD|0N_Cu&Pi?DOe8S0=7V~;Z28!Q&+2U2Pv;Hud38MjFZtpJX
zVCRmNC0CEem$75%_P<*+gPcqs8S8qgO&@BLfvS4ItGbs%?EPJpr1=9r7TTwCQgC6@
z=0;>E`d-j1I6Uk0@MsKTaHjrbV*HWV@2?_jW3{sDqDDkyR`hAp)1F#rLWD$+o^nfd
zH@=ljDT|taimc(m1^as(h?w!VXEE^hJ^PFeiQ6ynSM+|76}&6_0ddc`<*paJw=5E!
zv|H7h<iCiUp-)G1JDO(%Z&iWSgE!ax#mIcw+;|T03|KKkK}}Q~G20&!=;})CRoW3k
zZXdd*E1ieM^UTK=n1`l`Q}|K^P=`EY#yO==&QI2o8GumcGKbF^{TLSIE@*d$#Y3(+
z>_1HwxmV;SIov2hDJLd0H{lOrIKy5wO{CfsBu6LJgcLW=gHc;I#7u}($e1`GAqEp%
zPhd9tJ|X!`2Set!Rq&*QVnD@IW7JKgQuz5-+9UQ?-i%MLr=$?8W-gvj=_8zU|Eq8Q
zT@jbKH+-Ne4)Q8GRi7c&M14=YA^dsZE6_ukrC;9Yb+q@pkwj0La|P-en)i&>*^I%9
zZ6-=GCtzSDy?W%jBdYmOq|id-{@AYNt;<DWlyxh&O~k-dYq>@M;`ishB2xPnB*!3L
z|71<eBAjI|i~&nIiePnfz(ZvhnmScN;+BGfK@`4PBE>x_VVnmje=omsvy4uELQhER
z`9pe&aLSu7HjkFzBH;FJspwkQ!*p^vvLT1yMU*#RZ0)Eh_eZ;yI}{nbPnaSvhX%Et
zKZ=#UphZVd3#WIGmNvGT%6&CQ(+Unl@#NmOE3Z$sA$I7Fi85mG`OflFo42_)BpK&m
zfaaI6#S49CyBL2sWG5^nVjl6>*dN-a0G^Jjy)_P{;Tc8_C!AOfMLc)68??c`&aC!w
z1#P)^yiVD^sN%n<D9`~S6dZ8TS{5L}JkbQr#x;S8Tf{{-8EN<#Ngf7_GOzB<o{{8g
zFE<Yx#k_^6iS8<Ln9N#abSCdmcUxu`lx9ecDN`}j=<~II<Hck~ePP`7))w|II$Uyl
zo{?o?Fji*kRRux#`fr;J3~wXXl>}oFM~G<?#j`6VbvD)>Gg)+4-xA^>$QoZX=_$9r
zptY~Hy3`?2BmObTVKmX2q}q9Vw_Q-j`=hh5(R|t%J_-~g0jH^*Nv&&R1$*V%Qo{;4
z*@url8P+e7AglTJw+-<~hYl0HP~9Yx_v;+g75QvX_(6I2*?~i9Yf*gJ{q1k*lIn}n
zkCe&}5!WE96}t-<<-en?0lCmPJ{D?<nr3+36WHHpoLD<$S{1}dNA6%|798$rB@|)C
zmPa$Z;JW4nK7}0^B!*_!JgpqJ|GfMbw7ANk?<G>lx9G(^_t}3oYEkt01&gu1Eu-t_
zA)*Yhn#qVZzId_d={seSN&PnjN1`b^$SR8Fy)G?OD3*hw5Mtww&KF*GpCu7yi>cvb
zpoq{O3U+u}W|r3fGD0J_q4o}&j&vDONfErfnV4eJ6B52=3Ev%I%32T<w`N>PsdtNw
zE|M1GPjg3wWh?9IU9~1iON+L#4uy0+PmGnT%^TOfKN&ER+-$WQ7BC{f#z`khji$C*
zM4v-F@3I3IJ~5R?<rf6}tQ$XA{XpwujkRq2ka*TO-`eunedkqJ=WJa)WP7c|8JCQ#
zfq4R4*eUVG8EBcm>awchR6x<`?j*T9uJV|_Sa99GMm$nXAMNC#<LxQ=s_l8GoU~()
zgNH;;=L@HJ8t_!-d!&y?Qa?4!zB1Pn9|H-i$u$(*I+re2nB`VO*)vv@R5pFST}BX6
zS-_(YP8j3&_%d2>V8!xRH3pU%cX$##<paWE+O|f&N%~LbD=1e9juQpu%KU3vCAe=u
z)EZqF5)=rBoqh>aILamH1SQ5gA$F+Nzm7+e9!ky=VP;zVEOv7o)$IFRy<Jj{k0onC
zX54af!@rIJOEHTi4-08k(ZfAjU&PM67)`G#E+P!OishS6HTA{d<8YS(U9}ry8JVTp
zoO0B7)n-%}u!YyNWNW!hy`-JoEcq6U9#bL5<Mx95rvqG_r4y#e`wWd~FX9CRcZOQ<
zG%xa<+33=nE5a6dF#3^2)LmvEzFJlW%(=3A3_TsC3=%i{46$(;21af3aFc=HytI@6
z7GZPmc|P{JUVq83Y*Fs1%V@U||2A#r4ku8)p*Q&{aUN3#>2(9T$okMxDm6~GP69sz
zJ&QuT&e(?<(}N`c`wYC{i$x6GVM_roxc1$LCnt1sTvotYCG96LQh6YA{@cj;p>Cyt
znsMQ=GLLzUuEr>hY54m6&8`*hYWb1-)#LS%gL^g~)Q-ztoKNl(;sTQ74LvB%awL)d
zY$oD-PoB5HXr*|zPDc&I)>|p=5d3BK%iCTk=uEtS{%U?rz{x=vx8V}y$KlcbIYJ`=
z<_VpoS50wftZ9F{>nV@Re>N0maL`!dNqFm99^C!&*gqW5nu-S=k_{!+)QZt{qGAGN
z!o_!BM<nDu!Mfqma7Ps}^G9DHvbDkt+>gRmxm=>A19K#U_ln>#)N?$H9gF@WTgvD|
zlpoLjQ1TG12BV&N%QsuaV3dTQHWXSdQK`1<VMrMk7SI}0k%wH&e(ota=S3bXt4X2m
z9Oh0Jv<M|_S24q-9WG+I`!=%34cPA61uhdqTSWttDAdI5)RI47yLDHmrMEPPz10*U
z!gF%uN00<Isss6yBFIe#ll0qklC+z}{<VpU?)>u1Y&6sk2I;AoVhYrQLaSW*bmoeR
zTug{@mrF{k(}5fLh_-AiN-8B58d7;{?k)My>GxtVToTbux;#m}pz#%)842TxKh)yf
z7l}YbppMMQhd#yBrKR*V*~@BnyjQD&jo*_UFri-*qS&9fv6xx0=55^B)>sJ???klR
z+2w>zyBWqv^R?gR*&O$G;iF=1o3C$iuH|Bg6ro}=NuRnFUS!gR@OW8vq%6H-$(8Ir
zX=!YqPS}rY-Hr21?u=*?s4AH5Zoh1%BgEo2+SJ!0flLCoIQMYauPm<|9Jxj&U%-^6
zPqQ)tVUnj=U*dV<AqGcA(gX-dg!1~p{u09FXAf?Rl%{;4ti&8gi}Z6@XqjZ>mgA5p
zHgw0c$Vxoms3NYnL`_;~)$7wty>vrGWzp|cl(bA7`N5tBO&J+YA{UN~H3e*qbPDw+
zyXAVfPl?;@vv*YK)Kvo>Lk|3{_Rmv&58L=yGTn31N23R)#!5jrN<Q?ruyk)nY5uRN
z<7Nh$Q<uZ**`Sm)+8vV8NContAh$dmOT#Oco&Deh+AgU2fsB$z`EqXdCtKru6HXEx
zcCP_97yp||XlU?3N8&iuP)eMu0#MHc6Qp8U;)#qSR5w*auA$>kZ<i$OQ_3OOt;S22
zEyy9)H|_Ouq8dyOsH5GLodE}7TM-Ft5t0*FMDdr%#Ou}KD=Go4r(xPiMjpmUf4KVc
z2b41$o}_Kr@y-{-zMXXVqH#R4!fHDvyOud^pE^w=tw8u8l0MxwtwUN;u-TLdx$#zv
zlVO^Dr5%>G`JSAJ;KP-x^+@|`ckqMhcUPE-ePb__#MD06^&yY<)Z%23&ER^Np;06{
zWNWEt!7x(F$g5YT)?dGWgKtrb-sQbhNwp+M!bnLm;A>4Y<9(*7FJThfwyRT`lY5n-
z9sHEF&j?w$wgs6gZushv7H9BC7iMKS_HK20$4-AKHcgP@a4*#NA+xd<;y@C^CSEfH
z9;3UzG7j=suin@`pD<kt)VmK%Rj;eKSG(_ICy!YjqK#I$UFYAB4RQ?(y0#-m;I}F)
zrZX3(tQ4Rr>ztHz4&DYB>a1GZ$@>aAJzv4|76y4>!^&-OqAJi-*dq@~h0cQ8qA|@|
zr>M5Q!#F)ldNON94sjV6BY3e1F`?bk-stFEKmYwmJ3p-o`$;U3OT;4VHc{o}IAU1b
z*+XTdVf9QHiza$M(hIZ7h9b;t<1pC<%80mgH00?>r{w^U$jTs1aMPbvbjR}bC5l35
zxog$9x-JWah)pTo7}2nsXW2XhctI9bhlSkb<?o#=IPRV|$J{K|(G$RYh#;i_Bf~u;
z^CrsHv5qZXIwM_ndA5B?ULab(m>L1&X>DS)vfR3ZqmdKBHf$6h&l#yPYzxmbEM7S*
zIUpEx9B}5fdl`&GjZHc=nCsau&_3NsglxSAgPTZ1v*`D79d|RSJ93rS&ve>n-5%MD
zJ2KYgs&%U-YRii1AN)%s)7k6z@Qb=3Sz)e06d2cM%F?WX++`~Y7OL%!b76K>mhMi>
z&!E72$`5p-jBaAN=+RN<twp-*t%6qXc0?wOV^6}Juc#1M50LL?+=_|PQHmehM9D5_
z!!O@A86Ld7%T00d47*8QyxR*U1D0-|9M52@)^RJ5tV|u_8A)A7c~Cgcbneifg^y}(
zbI{*U<}ka+7UQZoV&(R-yx&xW!o>T!gNL+tgOgFC40&XMqA#jhH!g|x?S<&)SV$iV
z>+h$j9ar>(Mb;D6FQ68Q05$Cs1T@>uJ3~hXb9NYZmj(bIDN|{=U=h9`U{{zZER861
zRK8T4$e1`8&=}Pm(-d3P(zumKz3sWS%(`Daru|0#PH7S?cS>n)2PV$WTML~?{)8g7
zaiPU!i`phFk0nC6CT0kn(-pXk9(6JkJ2aT&JvCn44s9ypT6{l?j$L=Oe$|ay`jAoc
z%EHm!_D$n!mt=UKY5jUoTg4U4>YDZ-M#-g3PQg!2Jg!RxDsGstj$TqW2Tv$<j+#X|
zO?^{vyb~pa!~A0W$NmBS`Q3T<H<ETdiXDsS`_=bJ?OIPqr=%2Sy>i)|qAfz|g6}Wd
zA!0C;S2R~%HWzYE*I+TF!XUwgDB6IgDDhVUN&j>Jzg)e#wCtwuvV73{1eK3}b?Dz}
zz?2)@#Z&8c#NFPrUWm6%{n`Q@5-*~uU$d;^@sK4>zi7J?JRd7w?rAGyKu=L_*he0u
zLgnyA-sLc6@>vkwkmFja`E%ZG9)&aBufW8Li%5hZ(WA<J+hv|o8Vgz^Fj1(K93HOY
z+E3uZ*jdo{!;%p@zwHWs!t#288ePRmJaSdAE~}4ktpyY-k!o0T&|l){<(={IU*z<x
zqJ_`!NScyGbQ=sq(#&k8*&(CW7W849HNFw+n-EHtZ<)xUlst}N1o`0}wJS2X$uuf3
ztcShGnou3Ebk;yfrKeD?>U2=1M^JiP(i)cY+a(*aq(PxrT45J>2wnWRqOc13kuse`
z$126=IWgjS(Q)g8!uO)X)F||T55?8)RA#2cpT%ZmR(#C`OZtE{&3qEs2143UEscJR
zD-igaU>%lW&QE#)$HV=o+dj}&BsNW<<%e<hgIs>=S>EL{r9OaR<+r8idu5rwm1UbR
z6sCu>R-Tlp1@_z=#KJY(b%n{2ns35-FcZ+o_4pGY!MJcBa&>R{H1BfBx|eOg?F$kC
ze;JYw1#kYz)!%%QPloUI(gI)Z)7&?^lbeKJavD}%#7sHc?Qsc}o8g-s$7rFf(7GM_
zj$!mE2+7iCnJ})JuKfCa`x+WQZjs{2!czyFCL1%n_KV+6pwR-r{yfNQ-8|tv3EQ<{
zH>um4MCJ}n7b+<)j|j8HAL8mq>Z=wewG%pKgH$*GKX70|EdO9WratX?+K0ba*8+>L
z1HRg8W<^7+#*0H7;}QvDOV9weQnn7IMY1;0@*Yb^TNBL}XOaysJ2~N%^OpJ3a|+h8
zKEV42Ek3WXm@X!gtlkU7aan@as!bZYPe-T98{bW7JNuT2{nZLhTzd=9LAZJ!F9*WG
zPPv5>U&_xbvksZ@_T{MwGy>5#<rjQg;&47XD8*(ue9v9%QuOI-M$AU>OMn%5eK>OO
z%)YkLwA^5slJr1`Z6P1#Uc?i~{=wwkF4Fg7Q#rulN<fnySNG!6vF{^SjgjJV=@UL*
z=&x*nOxCP6k1q9)ABhhy;8iAN7uRW~)xg>p(lQOZY=^dArZ7TyOp)7vC?{PI=Zvta
z_HbW`IBAfdgQ`P#$P=Mq!6}o);hfj+7Eg&9bXnVlJ(Hk;1?!DZOc`JT6!@*=bw+l|
z`Mb)M5U^cOu^Vtb!P00Io#mUZ{EsGo>$^S=Vdi3P+buegF|;?f*@E|SL%jaoimJjM
zb5LKn?kLrjub7&*-3GU=uxYK2_vLU{Po_upB-|Di0544qN4s36DQV<>>CGZgLO7&(
zj`ygBO-U@wi2POUd|0_Y1h5sK?^#}7c8?LwRb2aOSZ*m(wKp6Vt#QfqN&UsPns*$D
zE|2w!3k$*?9dIJDITA`$f*o`xWRTUe8i+7Cu0s4?6}qV-qKf3=l4nzHQ46htx4=iQ
z&%R2C;b@$lr;hNde|^|mZnBB>;+7ANUbC>$Vu2pX5m#p+>{d@1ecp~R^EB}(-M<8L
zzi@E@NsOaYhjU9J6VQ;hnhO&Hu)sbJVq8L3ZE>dU=giudT-;B^^81%hL@&1q2;bZh
zhIk2bSN90<GSp#tR?!kJ37C{CK{1IqsDQ#wi1kGSld>B)Stq-T6*tj$F4+{^{7CrL
z;X{ji)AtpqI0s*PHFx9U1U|sJ4F{__e8|UPZ%1QZ=QaWHM@s75mt{ViG(;&Z%1vW`
zC*M;vbG18XO)$Rw*g5-_Hi@z<Wap+cp)D3-2fnlBHV`+`)^e>uSI_B<{G`nAclBSh
zUZFzRAg*<{3Alh4CPgB$`!&4XW~r|yiT4-O&V-!rzPVh2=5U!^r6d@M9?6cQz6o|h
zdLGBxT2uv1RX#PWAA>K2oR{qHb`CD<k+Dx82844Vx*caD+tiM(d>Yp)oRpZwA(s+U
z|H|bPCpk~w$t`=m{1R7$sr!65On?|xCwSjCn_ZjrD)Z`E!9+7&hYs4WhWU$zTo8O{
zCVS}Eof>pNTpuu^wqH_;TNd2A72V{7weBDwW43j6>|c~RqnfQd(A0%&_%6xn=_?A0
zi%QLMI-z>*?p4lROvp?ydg9RY)hd<eav^W2hx3FIitx22#N95RB@{Ef17XQRQdxj|
zI1MW7t%U1`f;+=j*Aw(ALcR)=>H9hV-Npa%^7MFJtG5tnJ=e>GT-5pSp38cDI)w3|
z8D9szYUvcU^|-H9`}v4GQZ~1T?u0;RgZg%{!5NSkgkeV*O4bwXYAj8+YDS*U35LG1
z=zmhf6e)y_?O1KL4CYC2%Hxrpoc|2*sj>f1vvP*|p+Q!+j|iezLY8s#$id)FWd2IK
z{Sg19^ka8iqu3^NzyEf#kieqFp^P-aW-OLLg3jTgbY280*S;a`s-y9Ry*`06$xA3%
z>O`UEv9oua7oNt?=mO~~yKjphsQJEKY8VSc+vcYOo;f(t(bfo*XbWrzV~d5Ma09t3
zTC(-=vVWv#eoAS?eLHzYi+N`TDMMJTn9#c3J(V}@+X>Zz;HJHk*`lp=O!O}qY-M}9
zPzihX1t233guH(1rM9<}5@nq!;U;bC93%N~S6x!J_}X5>xlmY_lI@6nplA^vOZFNM
zY@HE1EM*M1pN?z&EEkqGDo+_3ch{|`01inq{+v5>l1?>-+f`MM*TSvsXcW{<s3T=V
zRM@iuAw5<X+v?(cU3{eDWCA7@Ur_;}Oa{e@rFJHkzi5##u3AYnlj4B%zM*Ju*p<jz
zyXG<&-zT0o0W$+vp$O5)b`Q8#A$+%HbM<Y#uz(z=n_;}9bz3MB0`B?U>?lU7Zu`ht
zGM9$J8^7lKd~2+fJJs9?a|k%mCf*LaBki2jg`>qbAO;HcOL}`7j<4>VLJRNCl;HEl
z-Fy1B4c1wA?W(ILN-fRymyL)BAY#KYAO7dQ`orS%J^}pW<UzXqYJp*n1j_SdFh1fp
zcby9#Bw<dIFG*G&5S1}b%x*yX;su7GPqEa(Novr(z>+%O(yVjB<MX9?KKseDEoe)p
zDly_2_GA%~Oe_yoXVFltRfK+?1f!*7rFm+soPoC^A=JOtsa@49Y-ytwzV||G5et^I
z-9CH)actC$T^woEo>$p&N*{8*nB^rDKr6gXtMnFC+>Ux#)3ck|)RHgK-x#b--3x44
z)!WmmBctYs!k5YwRe7M1g79qYC@f#nZZSATa678-{`@Dpl{(8cfiI6wpH(#XY1^O^
zjQ4a^%uIh~@Uu**AIsGG%PxOWdA?XYtiaxmsw^eo0xX|sW<MvDKA$Fxmo{yC)=(YP
zP98)*pj*4U8Hz7t&BN3em>^W6w-YgRA9@}4(&Xg6HDyR~>WfNJ@L%tlg4vvl1Z;nd
zlEKJDk3{YKBL~lq<Frq%XYCra9FBR!MD9Y=_J`Xc>mw3$(TZ!Xo!&iRqLW*KcKPg)
za*iIn04qOF!6j?(wb*n~3DM^#yZS5BJ99dSpLjGe^Gep-+<qMH&xN;N6Y6^^5cy3^
z^RM??qW#cd^E*qm)q)RKwva_@u*0s^BLCCz{KzLHbrpj9In$ll3q~ebJK>vdyD8KS
zdwJ|rb@R&iJUWFx9(I2%VpmwabSN)M@r;*O4Yl+*c%MKEWs|H~?GTMHOBl&e{!0b|
zh-Tg3_VIyshKwWxi&FWlC%ng5IInO`6fRe%Th4~&BvXLA95CJCa)Lu`Ufy0dD+u4>
zs`#glPk?Bn#C0Cs98p=fwQ-_lih9L`iZ>2PFk6}yJBuad<Wm~6H@aSs<abHTgEzsG
zo9$4)c6)jW@=YKz<}bvWR%p++%g!A*?m(^*R#`Qj6i}qN<pkn5JuZA^Xj>DL5`o_5
zoRVhA(uCRPXRaEGY<t&$uZkA4Cq<@@9`$h^Bnx&CWrAPENJsYx>nD7<dfipk20zab
z8Y+~F;hEmWVzxyYxsFqZM42+YpeF(N22jV^p<9=_H`Vre#&K2>>8!(=9DS~!_NHiG
zMS_=uG~7k}Q~y+4r&q#AcH?{zV|U&lly(5Gy#ftere-vk5#yXAV+;)#l{9s3Z1-Pf
z!1CkXl8?xiX=J{U5yhwe1<M5hWz+zmOoOrOSgq82;T2Jge&3CmbtQJHNLxwc(gTm9
zE1Hg96=^E1K?Fg!r3e>^wnl%eHc=ZuX2(47uVq3f<`*g(nz=i!<-sMMPN`npwW-eW
z1R?Q$D*_9LLH0*sjx%rkqJ9tmO`S9TmIHc4p*;{RnIjOk{A%j!oydpg&OMeM6KnY1
zI`1Ny>J4(Ff?#bOFf`&;2Ou5j*T1(!%V8o-+{(FD8RC}&&1SHrrbjB`3)^+(1#+Xp
zEEu>`b1EvT#|VEQL`F~3#tPAL3{Gv&zv<M`hOhhFxsssa?TR8^82bO%d(XHg)4Wl5
zWUOPshysFuf{FrCq<0)?(jvVR1?f#X0YXF_r6au;=|y_4!4W~a)X<U$1R`Am1QH-Q
zH_pyJfIGW$p7Y`T@a~uSF)_KyzxNCELvd7d+%H{M2KvqnbMcOC(+bBF*Kgsz=;_zf
zDGF9Cq_xUo%uGIefi=gU<hk3VLg>YJs%XWN1Fj*0x1gBmXxM=8N3)m)E25bwpW@{$
zYp2`LD`SnC_s_wauTF=#>P$~C%;A&uiZpS}vdkIokQa2J*R})&%^C&9nluKalCm)q
z1r5FdLw!jB9GAW2B$_U4e{?Ote~Vjf_iwA3PlL5kooJ2FJ!3Yv2)X0dn>fFdCx!w<
zQQmyHvSYPuv@z-1m?)JS6;Zf-ob7J-^+<^XWs{E)tIa^F`-v%~o(;{&H;CzHmrbS6
zso$YojCQBdIv3PL(K8mwrbwkHq~V|E;;Je*EL($v<%zB;Nl4xH%*Y+a0t4Sm(G7Kz
z{2=P0p09~gBFy}ES7(MmsX?B`Eg_&6(d`a=BUOoiRtD@TPCq`nH@SCbGX;Nkk5*V(
zOG+SU=-3;nk4x#9q2FSoem2pltG8JE>b{ju(|MzM7Z*x;MDv=<#}jchFePPaCSiy!
z7Utd?c4dr57qz{Fe`5}2DrwF;(ZF@#5wi(*n7H{nBT9K3hCi*>Q^*eO3#t7wOC5ue
zTwgp=AZkzS?5sujZoKi}xLopx@hmlZ?e?887sjAi$=PKH5FHoq1jeZ&op5o6<NBG^
zy#d>^j$UU>d|`m?^=v-rILjU1$JcwJX*OXdqV99k<4S6Ip_`u@Q%s5)aDXD6IIH+W
z4TY;x8Y(D5yizg}Dv6+B6t<;jjH9#FjXW=3?t?lubE0ianyHoW1o<L?R2=B#fP2D~
zd~vSV?$22OBe;h1g~Z>BEA$w{>5O4grpB@bWtkT8Yai{ak?ZZ#Pamjy8*54_oTz^$
z9ecUER=c{Vy?;2s<?3c7O<QjC+@cPD!q}Q_LtDO)G5^C9vms<b0=L<)t7WjO0|c8!
z9q6giq7R}Dvf^e}xKHNTnu)$l*Rn1dP0~|6M`3Smwdqb~v4p-WLmiE7&=cgDDvYVa
zsk)K1l9vY;dbBp;X~*xVn;TrQ5Pxu$G7ML4Nq_wC38q?Z$5+<mth0-3LH`c?xz4Wq
zfK-Cp;GM*P>+wltX(1sUGi#33k*q27d?T2PZVQ2Otrf1OOlEcOkMFOddL|paDtZE-
zhM2bH&8Y~FWGik!UWjt{vygR9Csq}5aw2ydu2g*6V!F?rQ0_5mi+()QBeb0EjGfv^
zFte;R{}9clMEl7NRYnfn%Zz%Q8y1e<PAqj=NO1m;z!H-<3}J@B^_RyJ5G+Duz$T83
zg%pwGMQ>iXm2Xadmj#5nAx2G=_@9Sn)xkWcw<B2}XB15|P3C{MrNo!(tFWbpVw-7-
z$HmI7lw;R@1AeVtoQOZ&5lKy6z{lH?%j{6TaMaTe4&x0=O?njSF%yT6W2rH=NFSci
zp@gzW!)4A}Q#74N1lzV=Hu_d$;tZ?oIS_%earMIJhqK*AdbHhW_MX1it;sJ4Uy(d3
znRtI#iZ*ux5kjggIP60lZPk={uC!g3(Mazh(1|qW^;WSX(T!nt%E0T!V?bUtoAe~#
z-82<=TWFbb8|4^rNiB%5b8PahlFRR4f@71e3ku99`83y$6PT%vPnK@iidT=n0#MXt
zi+<I;fO+zA3tz{ET{k?TqaoX?V1BaDA~>y)`<^+jnq!IDQ_&T5#mP{%K(@M;Z{?G}
zK1ez=ms2{dHP}NA%j1k&Fj)I7JibLk^&u2I7~Oa3ics*!HNgU6W#O_K+Xs5*e)lDS
zqB4(_JQhEeAHcz1=iS3?Yzz)#|8>pfF^(!5$VPHd@r0g*QEmwd(OB{XdAha)st5Cr
z(spAL>-uRz3Qe}}wiCrcIkH%c%P$AK)iauD-0E3Tq|+;}Mb|6lo1XOG(JdiBRIQLJ
zArcZm;E|M+bn+v=-WEQW5#u3|cQY58{+dV18VK}>^1Jh#;QkbL!9Av6Wp}x!gEyxK
zF}K_3u{#1Aju^Xv>o(E~HSzLteDc~`7^UGb`mNJ>fy6nz8d&DrnLjqun!Z|JO;%W3
zST%pYD7k&sV?AP^!>xelws4W{hM+oP`M!<?;AerwzE+ycd-!ftlw<kGlgSiK?)qJR
zC+hfQh+*MaMY>TVNxbot00Q<ILO_d}<20PMCu%!8*$BQC;~p*6T=mfLR_8hURR8PY
zr#q@r3e5op-3cW6ob{`TJ-Ilf<%`%5Lu5?x#Mb6a)!AJ{1R00~Y~0kB^c;;eJonV)
zI){0G@0gx$C6G7_!*{V^jPLU08W7M+{x(B@7L5JV#CkMw?0Rmn_-fp{wB_fcx~-p9
zLPkE%GLY-CF`uN56kfIqP6TQ@at7^O$Tm+YU@BX&4Z)e>W5ThsxItpyH5!`yC7vLX
zINNd%eg010Lnil0VzS7Ih+@sQ+~u5De0r06N8XaFO*t5y;{$8iQKAf(k(&JzIqYAk
z&Jv71Bd<xDLoNd~U9p5p^Ro7M&<O=sg(r6>nm|y$=|?x(Jk46R6U3N4Yw0pI$~2vW
z@tWqEMc8`TlLxe#2DLES`GB)Os@;(N(Rcku{tnp;Q$`jE;24;CGv01&_$7Cw5pGi&
zgb5;0Lbe{uniM)2wXm__S5{H1`8Oy<M7tI<)_HPk0fDJ_&at&Qgu;n6HclC=NO#Hb
zeVnyK>Tng0q%b5)F|F?UquA5}J&iGD5{tLuzfl}PVWC#Il|ZJ`p_471hQX8B41xQd
zoeioX*=QcvG>lGy(6_>Mnhv{iUlrlJ9?`ae-Y_kZTnWTz#EnYUccqjX#*3SgpNX3Z
z@OaNiVz{d1(_0lgH*A3-412gfBGdl^U9!j%jSp{u_^@NV%99$0?kqM`up)15z0cgM
z>e!qsc#`6;xnPaFb4@^M&zt6Ms4?j!dbS~`E5sJA<)G)S?^&_!X64p0JOD%hf-?iC
zWBAXBd&}f##v&S$wik4xyT`|gHm=Q6y9{c21aI?IAU&21CyoWe{l&wL`VplXowCur
zMr(94o9Dcht<^?E9Dw)f!&Y{TYT<_Ukos6W%23(>-(V93h|#(v_`p!ed|KGJcU3fx
z(PnFzjV21|EIHFZF0Ia7gN=$frPgiUy;~hkX(V@>dnYM(&omWo#_(XidCbHsG>H2;
zTH{tiuqf@rV-laquphH<cQzjuH1*p#N^13g!Ny>|66fjGf)Qjm4(+Ociv9Shdj_7s
zcvHQRW_zbOVb^{$7QTjqn`o5pEM`|V=x)h@;(z*l%lo73P5K$RofpwY3e3}U24YU<
zGVOCh`{omVhB{Zk(y|-JDnvMX3x$H;ubcC=T1PwK(>r%=tYQi(&I-jpxE>OL+j$Iv
z9x=;!GS&SyL`h<HLEGd@Je|ln?Rnp(f*yxlr!E$wFL80nbmiwZq~1suY$RpB6_Vez
zRkg2Issw@&^?}HWF=~KN=bw&`8f!jsSk$?hA<UkL1=}>;m?{K`qHZDl1Hkf6@N!KM
z3uom@2`&yI+fR-~Lhet+iu0|F^vk8PQwJ-SRceTenug*-clzecK`e!?=!*jsy6_bW
zOQ`2#*-9)13a4L99cn$N78Waa!sC7ZUi@2)3YRnhGexF;wEP7%M9Byn=s5F7LrHe3
zjP6`}FSxDZNqPs2F*b!<d79jGrG~QE*+;!KRiUCa>RtVzt!slM7`t(kCfK4`#i+&_
zH8b-@jvB+)l`iTX+c?X_?4e%ku3)&<5)L@B#nv-*snLLox?#)cFjEwQ+`Q?%7ZjUF
zY~f9siS3fSx=imZBl@_QU+JdU&edfh=b?E=YD@on+eJojXsziGF)RH1z}KX9fiM&$
z45)dkBB9>M_r7JJj)N~gQv<99pU>E+Rw{tBIq2GwO?o7Dm_oAAZKM3N(I=LjY>1;M
z$TAY0?>s%mvZ<_QJyfi{4LB@lR?!(@WvshNHL0J;<M+H!m(@a+VpTQk^;+9$w;A-d
z<D6tqNYw<QmY+75G+`Tz`yrOW8wFJSzkPYLcL&*;?Nem!f~T`XfL1fI2{vw<xjV`&
zNj7FGIHt2r1A{`N`O}TPZmIOrtmpIT%!cAKXW+7n#{4cOs)l4QC&7C+>anHbJX3-1
z2BVg(ye(UH5}*Tl)(yRWuguv)`_6_$Ycu!y&z+Fpsq5EC?m~X`ArB|p4*7maCU@$6
zc@2W52r4-!o9JypLSE5^)h_0#uHk=erPdz9v-Z^dlnrSovdh^mBF7N@mxe4i9p?)x
zHzPF#PunnPUFB)%c1&5e>*M74vMbXPoK1uTlVAXuYJ4Zth|!8Mx<AQO`HNtwI8--k
z7Qb%wNjwKAx!ZZ8n~J5><GHU5MF5YIoiBYmCh{>S_3}97H`?tI(J{!y@OG(gczH67
zJ>~NEKxy;!3nHZpy{Q=Ex>uwm+Y9!ks()D7nyOc&`<aXTjUDP{TKpAM7XYvMtHWqj
zo{@W-We0e<BdkIJQl!7z=zEGobPC9o*u|#-DVB8S<{dJNoDporLjiG(?^kpHmXQ+!
z;YMsL&)oBts=dPOVbfmUekO-2u>Ot2WEwKcY-*W<4z)As+`G&9y4<s~i4H?5b^iQ(
zy7T#HCuu{}raiCm!FA|~*zlOk#^uK+cCWW}h=*1f;(PdnTZFEZ@xr~IShlys0!|e-
zqM%VWCOKm>m9)`F9P0XFzBKCxK|L`<cU$J}WtX>a<UXMg%VU%tUjC)$uowEM>u5e5
zd)R2&aRxfS-nLw*x>hAus1o_^GG{?Z^jwCi7|*Y8=d40LVl|)o?r&e@x1V@jS%HzQ
zyL+0hkEQ4gpshL}AL>M=Zil?eZj)2pcsKi-0Z^dK6GF-YLGKy4=wCP+-w;6uR|OuS
z$H{!CrfK^csyF&u$R&bgPhi^`8}ry;%Fxd6R3fOiqk}#+wZ)TS!fzq=WE-G9=(VW@
zHncYXN_n5VZI&e5*xNoUC6bcbxcw}0frI*(uZ3;_+PuArt5)f_QOVhnFAQ{7UC+Es
zk-2s4l;2O2N8X-RVgE+;O6K<$pZShnF!+gaqQv~g?JB1uhRYEJTm>FWJ=;BAx^?{G
z_2T^X<zvxg9In@;w-#4LX%w$dsKrRO$zfZf?>lG7$J5%UhI$$dHEgG+z-Oa?vEs7t
z@E#yy_V~Q>Ljf#?2W}#0c7G6KFjSSJtuPO>1iOqcgtD{e8|@TX!}~_#LvP1+VmNay
zQh<8KU{k+xn?*x%qInuN3N7bM)gis1^Gh*tg%C?SZw9+dSzh50&GNKUPVd*Z^Pk%h
zHkmON)@IWBKFt<;OlhXV3R`Ux^p6TFKNR~zZjY>O;XBH7JQO3+R7f#>6*kI8wz{yM
zW9e&0ufvEoZLb7WH_OJRFPKKtFb`(A5L#5|c7txwC@nNVBBA5A&R$hW&E(A7>*{2h
zQ?!&s=COXg^!6i~VT!$wop1q<{>N^1PT#o4>xJ5+U{w}v{B%38gwIl0vkhJhYoElH
z?1=@H#xp5rh22xvjPJYv8}G_s!d}dRMV-GL^Y(g&(%DgZxBRhAeAd=#Cfo3YvDJsr
z*Q9m*dO^6kkY_ti<_8~lQO(b(-etN9Rnp988eq#7nA#8$=H>hQt}Pp$O%ty=X~>E{
z+eSQvAZFv|@yS;7oM5os3?D`4ijExiNjN%!#Uolex+_eRWFkwxC>$2&{Iq2a@DBTE
z`ylPYZo08aC!=T}ctW7Gi|DDVn32?8!=}r2&a&v(0k&DWThym3@C;uCne$xM9ZTQy
zdkK%N)$y9FRXRP8Wmad0u(io*Nxv0OcvXpg5{Xj)m_2A_NF6HQG6QrMU>BqjVGUPJ
zeP!Kh#Tm%8`A)Wj(#%e3+S>yeWUPPEPv6@78|>67I)w#hbKL0Qv>9W@z$-$jn1QI_
zlU)8ovo5=x{yt2zYvIN11FYF{TCpAq9|GrFB}=3(tcZJG9Y;eI8`<)c>YjYa7#Htg
zUrhcbG@&_{j-Jk2Z?{@ojiVDzJ!I&6iD?Z1>M1h#(v?e5&>q(-Y^v^ILWq)J;!`)Y
zch@j)klK}bdo|t7w?MaI2jsE_-9~SXGS%PvZj=+w_-WiG8*3R%B)pg#D^O6=Zz5A)
zdY7ePXu}httSM%ZGdI0W`DF>$DV^+)Z{`{p7>(3#7v9Cxk~il{=nFHev9M8({wY_=
zta+oVIE1cO!VTw|@+rgJZpEe>$z(jkgxZeT<cV#cw#fQ9oB6VnmI3cfu7K^-5!BM|
zE>Bn!e(^pgvdN=UynnusSdjQOdWNw`6<H`I%_NzFzpO{xNp^2880cLgX=yUXNzeF8
zbry#9{mO8sGSq3RCePYu9J}0SecS+Ii6m6`j=%WX?ebIfnEmEQn(N{x79%ZZUuMsj
zIT-rVqQ7K`h1-0)vHn8}=1j3nle)rXaVp}gWq9DH)G=-L2NE1(&S!ME1J8--o4$+E
zVRliQg__H%Dduq#Tg&GYd-J#7g}mC?O7L-kIg0Y$3=OKOtA2~y81sbb1A*XHh4mN>
z6)d>S1D1diZ?i3y;Vkw;+hdX8A_~Op)^WB#qzc-dJ7-firHETJoCCAy;k6NbZE#Ht
z0&+5G$;{taq}xDz4anEOG}I8vE;pgwM|wK*V%1qoR!!n|OlYoF^(7o#4uzc~l|v}b
zAwb3#>}3i*Q{mU!(gmg-d`z7ntO1bF`S|;_cI|^Zh0t@q{p8oj{)Qq|>??u0(MR}Z
z%($_#f{;bd!`XK9cZ0&_Q;53@zaJ-j<Ls*zyEpUBYWwNW3!lF?a>;9@`Ap6zTliSX
z3R|0f7e<e$=|rIIb<@gO6`ms0{1Hm^*(xS(eb(&0>j48fDDk8MX8wh`)wie3j)v|y
zxh~!1ktd0`FPHaE0^!*}nulCqsc#|HnD(Fno)t3u<Lu)^lPU{qx#vnItLgFV8xJ1F
z_w%s>6@0|XrS1X+Ze7oT&>`#5gwA@lmFdBe)EC*3LWGS|){}!~+ch&%k*%s=_7w_#
zeOmk#4&0ix=_6VNHA;P%uFxYUi7J~7R2(6yqV4XR6Iylz?acKNOMV@%#B4^{*`0!I
zIepC;!M2RTckIWjteXYj1ucCHn|lU3o={up@@0d%Vv)4Oc_N|F#bopmM*NP-;m}FF
zLYn=i-LTH<t8b*kVQR8!!B&}Aa0i+t-0>!4{L}M{G)K-b;%g@#7JsYt$QEU@cr!M#
zP<q{tx!Zm4uHh<a(BEr!O*0uo!{?l;*VZa;Z9OCSPy(xyn!!|By2I~j>6Gz81w*?=
zkw{;eu0F0`5;{sGNlT`O@6H+9GaJv`1`*}S1~(vxV*jlNh4C*cX3P=vtLgrB*=G>V
z5qjpnB&g&KgmlyGsSQv_lkRXixo^xe6YU|tusY5IWMdZ}P(_Dv!L~k;2V0H|8{A|k
z=&)}KmCmD@K82iJe$OOGqTUf1W;kudy1&NT@AWZfQ>7N+dJW<yOR2dKmI=d1&G}kp
z<`Ey;x-UhPL|-pln+9`tdfk=s-hqL<HHbD!f@>p5tF+PPNl;RnhVi*nsI%OKPs2SS
zbD<lEf@LAojkDvrS&YneS9iZuZRR+lc3NG>#~{cF!utW1*x5lvs=QI?n^p;fF1b$W
z#15y&y0QGU-?dX^9rUx8!uU%mnDg(7{8_`!8;b)8_M1ZGwaJ}lZrsZ%kl6afDK7kE
zU){|gF`g1eAR4hKzjMt_45Q_1C9C$X#`4X^yHFxd{tC@1+W)-+cTx0Kgu(;*T!C_x
z4U$b^#z;<d+s^XHXERoAE{BrV^#rT>t5fZu9EAx`c%*`m!o|+pAC-pI<<zAmfNEeU
zY&UwrOumUoN%DVhM>1@CIKD9IzznGyRoK;n=)SNojz-G#<qzS$4v&n4gI2=mzp5(`
zEjKkmq96{qt@_d>Rk!H(M#X`ZSX(>`8z6u~h+|!BX;+w5$M2DFsUbNENzCDJ9g^>9
zP{?l)#aS$}SVtEVrqVm60x|DZ)}+F)z4CpzDX`AnwX;=WVOwglaEuk!Z0pw+5QxIS
z$DrKUmblp%-qhG81_%tOcQ-j0XTBg>g>qA!-PHu#x03o}c+8!lnb7%HV)MjCFIb3g
zxot&AWwty`Oe|8Q8D?8yeULJa91RS(tsJZ3Jr}K<jr0=v{U_TIZ%b4iK^Mqn<|Uju
z-_`?@7xTs7sKt9i71e6ZjAwM&2HJX+x7G5mC|WuNFKYl%iC-0Up(3em+8jkqZ|AfP
zmz5~qZ3_L`o3Q#`XQ*s~96#yi#QL0`<p=y(xFWJx3;|xb%~al>%JfYbHg>{xq#<La
z?e4VA^};h%DH4cb;w@zOPi1axue@|Hj;TyDeWW2>M7`bd1F=4Ux6>Z`Ii2Ns?AOQo
zMq$#^11uqC5*8Fk7w#Bp0Q`ryW3}G@4rn6=qtGTVJ!Iz^zam%~(m*K`n{8OOWlNzi
z<uz4rc??9Auwb#`g#ZKL$L8bhppd8&>1*|@QWwH|4atL~H~u?uIrVI1xFJyE%py8~
zw#kHf!C}1thiY$=z5y0cFLTMr0_egCVgL&jYHfA?T~AG;J#`)TGJ2P7j14dN)Vt*)
zi1-SR#uMT`@jbLASaQM{d>z17=!>DL__MFJWwDtN4bs&cErBUjo_X~~43glWPhlTA
z+Pt({-h-xrex@N9h=^M#^G_PQo6uoJ9<?I)Y?5ZL*V4%^(yg}m?~sKhr;7Z|K}47p
zkQ~3=of3rJg}biEz9=sA#E420-o|fEG%R$B1+Q<rz38d~I#1KGs4B@OjpglkW)VU8
z@Ek1s#SCJy)wwp~eR40nT#`PVVzPLr+?169!>I+CfQt9`4SZF&%AVcwZq2kM%H?|2
z`U@?EwL7-4*%*nZkHj8h=+@_Uw_uQ{<<W0#uA8kp6NQtUox?^cv~4GEe=NH-zv<Y+
zQqcB#ae5P=Arq~nWCxRE1lg*nBDyt8TKb!+wN0Cpb$`ueYv;DE3a9P+bGC3Vdl$|{
z{dweOYV5f<FV_~(rG<FDYjAU3AM?3J$b%EAaFrTtaj8eNLYLSbRGEc!ojppsp5a71
z19G@8N5OkbKr`FDYp#4`mWjCV)8Q8|fKvKgs&1Gm|6rkkZ_P3CdCsm<;b<Kum!=0&
z3To%9+}guN+EW#UDS561n@?Sf5!y(Z)m%eZsF-&T#j5VOv-7W^I#G-9Z6U>y?mnDV
zR>>PDu}9w)BL~z|mob&z<12<a$w2(i`?(Brd{NzcW5(M$g(WXOznW?Va-ze#Sc?nn
zwoD%5dap|;`@w9L1$5U=BD>lAxcp^<h?>mWrFf0JJCejS;VehgdTT`%zjT;`X9v@Q
zZ)%mBgWB{s`;yTlppmn)HhpXBeLc1DN@`wuAhrMj!`A5596A0qmyhnz@iYW5&3MjP
zVvU2I+r8AI(R0&lYn`u(?N-JjxQ)p1u7tNM%FvfFrlMx=F*)82B(qZM=D9n`MBD6+
zyM-L9S<Vhi1`6XX0!|YZQvAZy({BBiGYtF68UC=?yu+U*DiG2mmmcn)@=!we%b=3{
zs2kk_v#Y0VTp{`yg>`xObHVeq(Hrg2L!dlC(x{A3vSFp=LVAkABE!32_n`L#%mMdU
zfa?v7CTr7Ww(KjluI&}E41!sYfA^*P^rFw`DUPzF#*nuxP-*5a278au5vI;V>!V{F
zAe)JSZrHwiv#MU6gHa#(!Fh4jcHNiwv|!A<Ikjblwwjy%w}+HjM3zS8vtKPFiYr*O
zyt%|!q>L;k>8I{?X1q@V?Aq;$O=Jnw8?&1%)kK;OAjhDGOgQt*ZE>wz*HEPek+%=m
zOa0`ReT<>;RMy;C{Wtp~z<?q@?q|T3du5`<SH<6)P+-7~d0dvCU#R2eQ`qV|X0bki
z?wI|V(Fm$Gm`9el7S^EyzPv=capi^7ehSG|mN8W_R=K}i;k(^I{~lW)h%6V87Z80y
zS{1|zj@Q_A&%kKAGm^TJc%_&4=e`6&g^O$7rK445-rnIPaS`I6y>kP@Rv+G-I<c<*
zq<h$VyW*_EeJSaMD#_25+F@~S2Ei70E5Y7{ueODG76v5X9iZD`cLx+jJ;%&Qt`Nl1
zdnx-LF@H3HCb#KrW^JY3fv7j&V5|8xc}QovKBcKoh!n|JpnL*a?85HdLF#!FsvFt3
zE;uBhFu}A{!U<_*C9O?^Kx8C{R`8rE&3?JhK=UT9KMwhHy@$<}3qpCFP$0JZ44D~P
zWpP4tslG!T2}#ELGkz?yYNhF)y39_OsG;Un<1x*`IhK$&(YQ*I3hGXFRTOhiwIo;<
zSHHd3n=E3HnzK3AWU(L#8+(=U*c%z+I*ISibl!O?V7QF()Z~H?`Ba#Ho9Pn_%|O_@
zR;a2}MtJ8de4KNHF73z0A9VlXbTDH>5c5C&!B2{cm{=UH=(-fVv~xWjZ<r%hpRxB!
z(9x6YoKLkwL~~>feH&)07}iL(Ud|zxJ%+3bDtqDuNgGIkhA_7!N6zS_un-$^qshw3
zcsU2%O3`k%{J364t`mL2ewKTfDx;<nkb-l|!i(y1$CGnNG5xkRN4X7FYuEb~c5E4t
zF(>Y!w6jAo?8y}rNS5*nIm|fZe%w6Aieh!)uAA53BELtGdF#vFf$2@}z8<d>`p`a2
zychY~7w~`?(9Pvm0qz<~o{1tBD<+rnAPY%zJyl*eot-=MPeWqyQgq5{%(ZSy#eu3H
z#2QUMxRn-52fd8Q5voKl87B_xY5fyw;#H=^wjOVXI^Q9pwIMP)kYErEEX}FPAl%sd
z&TyD(X(0O#(Cjzc(?o%)u<ly@KWJ%ji3)S>*`zmiqB((R*w`TXhcFjl`?jEGr69YC
zL7b4^6$GUQ<mLeJ?UmfVRRzJZs$r|n@q!#I$iN+i(&)W#d&q0uN=?W*lPk0b4q8nX
zqfG$8&<`^(ltC-TJ&L=3fZ~qRzKOeA(yQ2QT)ot^zOdb+%-#!!rm8Ya;%?7n55;D0
z3!w%rP594tvc7#AmllCk=g=m*_|#m`s&aQwL`u8f6XTBH49wEv$Qqz)xOpc9s#Ld_
z!jl!9=y<26NVE|g+~C|nh9}didAp_(tBdP73ek1x;O(bEMoufb1NR1)Y1;X=g(}Ai
zbjfMp=N&Chngavpd=14YMd0ji@nJ$ede3!|<c`qK@0`Emk#nMYLLK$)hf1ldpEG4v
zk0;;Mea!a{IVGcf3{SIs>HGhEekbGWGfM=4iw)k3XOOe}-?J3Q_uFP*cJgOjHtGe(
zm~;CGaaQ8Drg|9G0#!w=v(Zmt&@)x*Y8OosAF#r}ToeeX@`A}P1<Pt`?GHFUI>WV!
zxP;bK!nA`JUZ)>uj)fh&p1m$Oin^m)yz^MhXV}`>;kxNXWwlum+ezt6z?T5u4tsWY
zu*_$-(z-Kr!+NU5T2OZ=dA)>4NLWj--dWnksJ>Q#8zqRoU1_5^p(4rJ`1Kq70kyq;
z_@1P9#k5Lig@_MlE+r=ptgo*1F9w`uJE!8pI;$ooaKReC#>b%y#2;LTb~hxd)17PO
zZO7}YGEc|CZ%X1yHjxuy?}t5*B-?%zqk{@DF6ajPM8noMaQ5kC5;97`25{)B0_FyZ
z#VQItP>s@B?>eJpB`B8ZUzinJz^tM(sab-1Piy_;SIYXZH>BnTld_2dzcAulmC4$7
z%_zfpR0_mdsP=G_XRPVcYMDQI5AdUWlt$3d49TkIZ5`iy0gyudw6JoA2+C&(@pO`f
zpSlXgMuo`<7})HZNSF0R%TgLt(ByH}I#!!#OZjO-o35>2D~1d#-Wd#v$R2pUEBC^!
zXx|ZSZUnL8#sEc2P{B?3m#FE~9J)IcJ1_E7Say|$_s0~hY)D(Tx5;%=pZfI1cqQF=
zmFl8h?e5Ii4Na2_l35;@D%T>gbj=cMju<4BaMPn}(Z0wtS<>m=^>nUWtAd<}NC9Wl
zwvO)Vp#j(@*fQD^R_8DT9>~~)8fY$vQF_3cbkb9ifSK2Uvs9UjLnUuT(7XoRQRU$5
zGN~p0)ieQ4mHgu0&}S*Lek<ADLc=7_oa)!QKk8R`z$X;o$d>-DBIpgN5FsdIqsq<?
zt0PqkZDQ=Yh1J?aJ6i64Am@zSmnpribpNEI8eF}B9V=`m^x<$zT4m<<3ok&FoRJZJ
z)a7EhZboDEoHE~PC|ar47A>zf_36~rPVX^%=MhkG(5G?hS<F{Pv)Xw)-s)3ErO}1(
zNHaVkM(&IbUh2xF)d>G2qkcoY7@B#N>@Q)=MO==izQmZH-Igym^f-6_u!b~^M|0je
zyWZwOV^$!~M{9+1W6{!T0My-6jXO?pbPjW=7E~8~<uC4gWp2yOQVP`NI0B0KjqlU#
zZGiv13={YsW8^)mEhs(os<W}2O>Z3Zz2&-zg*HZB{}3B(>OM^hFvIbh>9^|{<I-^{
z&`;vq*YmJ~SathzN$wMAob}7^Q?m<}8NJH-(-`}IkiI3crvzNqI!)xUPruV@a~$o+
z1#h_W>Ci7JwC@Hu{z1UUOc*#3V(Fxw*Y`7)gh8SKYzQfOCs)~~$j7F#g%LD@F~bRc
z12K*o9RatZ!)#!lN276a+m#?rHbU5^afS{=x9yFTDAVMXr2x-4sUjM@EYr;GKK8Jm
zA`8jqogTH&uv*9taukQOc2`=gSA>`c9XZ#@b$}M=yQvU$6vMd^!1|q=#%V8FcpEB+
z?Magr!0!qqSa5I(8L?!zzFUgCUg|txZ{0mS?DLZRWp;c(*ue|7smL3ayg3mSmNV+S
z`HHdsBd3%!SG!oq@&_OyKll4gY5)k_^B_xuOrKHeOvyH%8y800klgrEoe5*tmCmsJ
z;%2`vkpyVl(Ph~zKktOYBlyl={T$r<(BZkYg?G%MpnCoNtL(-S{6Jsx&;t%AoR%P)
z!knAL$iMc9<LBsl(^)7;^mcY#aM-Kq0>;xDL8e{~vT8<oIoOv8jI5zo($?5c#Od1V
zpxfFuWebV5_TDWR`J`0NpQB10_?q9w%l)&KWABgqpplF1Ohe`a1i>Ly)gc!uS+yol
zz}%lJ-yv%<#}kq(L{JDQlAlFr>1#bI$>;9F+ify=)$eLqnHqaFIqOgjvt@x^g&ngu
ztC006J$M434SSV^rCj%fc;4Kbxwf3$n=H%$%=V4OBp7O}mJiiL5Vjs2%bE*yLo8K8
zHi8XsS;NEv6NfYsiqTOKuD22C+9d@p{#pfXs~gnrd4W7<H0%h8Zz^9bwC)Rz>^GZD
zPM%dY5O`G9nNSF{Tk7me>^g18p#tQ7E^Z`D7$6@krBoW)iVn}~J0cP~b&L&1on3Gw
z1LMl?LU+IAXu$e;Yg*Buj8W&%<B;;QSHn)?m;5C-6xqy;U46}4Hs_73eqG;IYbl>!
zOs$6rPicdI0WLSwt9`EDJnmJoz-H>(w0(oUYF!ua#1r-@Kt$9!Q;#gpQM(lKvGqvy
znz1<adR&r<Cirq8?Z#sLp3zQUPQ}inzxm#iVM82$B&u`9gWLG-?tL@DqeYSLryL17
z+S5o{9O%qihSL$e2Z^WA&Ip(85yoRTtH8?T?A$7v;<K)8*iikv#=&rRS9|3|cZnz=
z`KjJlieGCSr@O;-_&xD$dICu!Qj&M#ytESf`Rcv35}llO7^g;%j^0-bQSR`LjNV(`
zO)?GACU}w=0^XVe&kQ}}ngp91Vap9hyO{Ovw=iWMZ5?xFZ4ChRhIrb_GIwX*Nk=%l
zbZx;wkWjtL>W-rfOsP!aG@@uGCly)v3^m_0gtcjS;96K<zhdoeG3gsq5H@`241B>c
z=o8!9^_UlCtn9gi9OCJ#sz4PoHZgWHELVFus8!;57mig2IsX|vu2=RIcV6JnIaz-k
z#;=dF<c9pG8q4XB4385Ek9q+`q+I>3(e!B56Ph2b@t^l54Jx)t91s>Gn#b_D*F*^c
zB9c?j{GlLdMVL41a&V2{RD%E8D2XYh&=0I?L2)d6?5ymb@BC}T(!Chr%=RHMm^Lct
zE<YPH-ReVGsj8%?h)8E4d7%IH3X^k0S3F2T77If$m%d~7r7IR4%^bZO!<8T@-}wU}
z9suhfc^w15$n{>z^Z0V$3g@Yy6%i|;&t%oy+xzCrZVG)1x@MQjAF~SupLs0!Gq;!y
z*KvuH{QDj3{bAqz#LsbTjHAHj!0LbX)gO-s_*`lIak1$C-Osgw0ciO$-_xWAq|x76
z?f<V^(*sU%;~~WQ@o|+nj6J9E<=st-Oi2!VJWkpsSKz5}FzYicVa7j>b@?H8cAoym
z-bBdufD*=o(0_FF=rHq|Fr6=3)j)FEg<_=K^9~|giE-kOOZ@oB$cJNNgm2R>o(I?b
zyU4yn3s9Hoqk_M4{vS49oC7XISm$5d`jKq^#wY*X`|o0DA8!8(^S?n^W_oD?uQpM8
zPiF4}v0vi@U`Q6)wTg*zzjXd#Ju`zeiR?{u&t9lr^vDYAp8=46hDL%bfC{s4+G@!I
zs_npLf4xI9^G#q9F17gEbFUAECI7knzx{)^e@}`SOb~uJ_dY85%N4xinrkE%J+J(y
zk=#HvVlY+biqth@;yBWy*y;TaBquVI(#2bZ%0GD~3>YYFEGA)FMtcYVY5IWqBEV@W
zdg$|gSl9#1nT+Ozux;{<4>e*12U22xLao{V7r=H<cVOJ#sqiDTWGGZK&R!6TOZT2(
z?`J@j6~ztx;Dmu_*rx0a=iU#7M_3>r5S&zb=ii$Ik7WFOUQ}L>J`&IcOx%<CO-4=a
zLBuwPiq?U}o;5FDps%hMva3J1$)S4!u7_@#<;7<=QtL>IH()rMYrJ?WTO`X<S-TA)
zw3AV|r~X<*Zj(6P`*F}wj$=~+?_#krNS4jrWu!#m{=@IBTfaAIba|uN?a;*z?Kyby
z>jSMkQ81fd_(hQ!7^JiPHtG~d0g~A{!`9{chJDasF!H(|o$|YIu$KSu-hX*ZzuCPQ
zfTQJue}AaISkG^7uJhf$$v212O&<a9;I;pUxz4tS0QVPX{F5aA-2Rawz<;k={`Yg8
ziw>{&_s-#WZO@#(@V_72U32^(3IBPa|Fr$9>wvgY{C^nS&G26Y{~eV77s3CD5)aTX
zFq!?o9Q^mB?teM>@4@LG^mU5k{~HnAdhI_8N&>SV4W^OYJ2uwYTSW2ie4V2;KW8FE
zz}{Gp(qAc?(Q29wyumMJ><HgIz%AcdtF;JFnWf&#i=zKY9$D$76}Swfj<u?ituvEU
zY&<8m42(Fkgau0`3K+i@{AfQn)6-fpe%N+E9&($1+{o|vG2h*{uodtO9>-|(E?{|P
zJT?Zt8GVzs;^YtCZl=G;`@Dl*!rDFlpt@TBC8^@cu634{RDHG7mM!sH)D$rnmF2V8
zt~?BL{$}pwm4j!}<%(aO^V(k6-x$5OMX-q<)3^}ab>d|^9651|V}nUoG>ba>hUH>z
zaka^hwvL%GV0r0DzaXR@I0V7Qug5;o!G>%R(LbeeyD{%bv##)G-NT8JM;A|H)wQPH
zUrGgMX|Sc96?iHu54QeE`SACiH8WrPT%~a3w-<-Vz&lT-DL&JPn_{>?jZwW(C|q^y
zAU*SzQ9&<8)6YC{;jFS*kJ8CCU_T7WN3N#<>j8ypLmxDM{q7wTtF|Y57C79TL)9cI
zdEIE(pC<u~YDtBw(p#+2ovAEjHhfypq;RGD!QFv!>92IagDRxnaLGeMs(=2a!uR4O
z;pj7-RD0I+e&_)%+NF$K&(9y}o31X3{czlqdq&nXOZ9_W@fsX85Dg4WRFd!1uVOif
zC?G`o3V!htbsasq{7J~cf7?p{UIVNcyCin+L$m#lU$jEq;&9-&d>*ZGaI=yVC(qz+
zaWK1<o-jIp?uYcu=S+Z^ZvOCui+rLCr@Xq8X1kSmC*hET2b7t93rU~4NB05l{_e-N
zCHFlCCE3Lse2|BlpHUGlA~b4uYLAEu3Jj>;IO{a0asZY`TT7as2JbCwwnbI4@Wb67
z17`N!GyXb%)Up3lF31Np6YLYEX@y_dF^BE~M}B(C;KV^p&@)oqXOa8(g1fafzAE;I
zg@*x?)H=CP@Q|MkqN8t8*P76}JH_66{Lp0_Xz4q?12qwx(tYrEz}E7908rJRURb0$
zbPhGYNqV`Dsy9AB<=-9hV1m#3%_^v`rUjKcy;{0)@OOX*UkOl>!Gi6fYLQ1R$6iT&
zr=*?C(W}SS#)AHw1#oZyAZFM79kLl%?C9Bp1OgDg0}wtdO9VeVbQ3?FlV{BL2)|+~
zSnm)o0w4_u`;M#Wt}*4qE6OYY{!|5eg!Z3U`u<0=|7ii~KaTrk^y;A_np1`Ral}RJ
zDAVBun;k9!M`T>JPuS0WM^9F0q{Z*ysK0~X6m<}>kL*qk0-K#M4i4eB;W>yuU=cPp
zfRmmi+5eH&{ioLyTq84`kz14Zqy-#t_K*xw=Xlt=hjczAa0==Nhkx-V5P+{gg|;}j
zRkPwrfxF-DJQX~0sQeTF;){O(QRPP<Hh<c4l|*d~4{gQ9{gCpV*H<IWUL3k})af$<
z-|vi%wA?%heBjRY_W%vxAX9zF&E<`t+5kYG9T6_UR&_AlceM4D4CT9k8xeUyh&Tv*
zAi7ev7sVMKA3nGhfaf!RlI6(NLwIJRs+8ZOb3SK00J1}fe)JL`oM7IX?nCk&b&dD)
z-knF8EL(&Q0eIy6SMh(~+45iDnFqi#w%h)YpaSp=-0M4@UmeCX-_3_tzDtXd_VOPh
zBYvQZ^uo`_%MKLkEk;w}OkjjLVspL5yl?lwFMg<NCGNY@9=kjJ)b>0cl^*L3@YT1{
z^~*#B*qZU&sO6>*^hSx{(W%=Uu?^&!6-b$(kVD1+Wb&rT_<sAiX9!3|_m3)fBDOo8
zC3hdvKfo_WYyprhR_37n_Q@gM3sm2OH8UDsj%Xe0>q1tHM~|P66WvXz*m`rizq?wh
zvK|@LPMv8MDzT!UrQtRFQ&7CzBiW-<ry5p5HoG<(Cu>%CE{C5TJ7~=aTz{jWK1VOT
zT5a8aS)9S(q2(b20y?Tu{wlLt-ak2N?+>6r#o1YDVhfX;)E~HE$nIxxl)3h6TVA8R
z;bwZ6*v{*`!aJ=5^^?ID{B}~H%Yv2=2_^S7hl6X}yBe16OpLgU;Sz}d5e!!!0;)0V
z`TYa@{DJ+dsQDIGq4n0$n`%a}=fttUM=5|sJtxgSRGZqe@~TkrMRd%4N8N`Cv4TT+
z^#kbmL+{HgKAJdb-)6C&-b-{(VG|3cZ`%IWoO|(8Ms8OEw7ySMerB{(6GcTGCgpbO
z2Z?)_k7OP#o}|BLzL>=6eJHW~N^bAgpPzHSHaFJ!abD*^YvlWZuL35L6Vn0E{jl9Z
zz573xIkOj9ezV1ONHPD<7yj+CSMPlk{1*rMtIhVU#d{r)o%g58c@N8jzwuj{XU~pa
zLLRuHd!&rDus!mSBtGz!{a50h*fY}%2TA)s{M#4tjt7)|<KjzkqaRq|Z``K&>7GUF
zp#FdNB-o#(^`bd&azYu+@*lM|e|f{dxXgz;fNZRhD-8R;{ox*=0fjw^4@W!IYChcf
zi{<|X|NIYx&r~<pOq#siI4J1<GsGUn9UC))ITjh<kQu&U?QunI>aerP#+9XxaxkcN
zqRlRq%0hL2B*2P*wa5VuN<O4L|Lvj9=UxyIm{(>cOPQLzEb};p-A<7<aQnocPVULC
z3eRU|V$4UU7Nn~_S6Z`tX1(SfbDfuC&)f|hc*OocR|5$9kt1E)FrRRDyz9dR2?zO>
zE`cVP(ipl@)E?a=WIt9aWIsZF<n?IH{=F7@JnzaMwP$}~bsu8@#u=KOm+u_W9z?kk
zo&#Zdd;SyEjWc_f;Pu+uImP8Mc>m74vqzZ`D?$BEYxxRXMHJqSnVyO)m-}NtA&u$%
zu`;v!UQaujtUtb5=-*{@R5Q*iUyhbsd*iX!si|E~sj~<Au+ZzT64n^(Cmiu?{iClN
zi)T%%-{~j#mhMa>tl0?m&?6vX&OMR+?+2L2lan+Gz@_$Up>A_M8wl`T8F1ZR32*s)
zf|Tv#LjP2Jj@CM=2Bs~xGApcbw<{))AkSGWG8JU~>-C|Fop2*cY+gY_#>kUHX5F6<
zAej2`OP7$18HHk`J%(QA6psa6z5=*eA*Us-(GtB{9*6MQoce?99$nwF95Soy_Y>43
zY8orf%AC-uQ}@O4SC=9=V<1s#!K9#pI(zEO&SkP`oxL&h5$LymlliY6kXaQ8UZBSP
z%{oxibpq0NRnqyn0=#WPCx0b%R5X10rQ))vS87fsK`getUeJ1Cef^Vujz=4e(NQTD
zz55%z_RR1-f+upZp>Q6SD*D)03`&ONDC_w6>bBj?2D&FU^*v|`N)PJOKQDT6@jVrv
z{MLB3%I(IbU>BjJdM8aJ`}O?A_w3_uLG&hm>}~VcbIug!{ZW(T7jv?r@=4+0Dx_Dp
zcwsvY>+AQ?DJeBiJ>_vG64Tgg%jzZ#7unvNv6qZ&>sIMCTz~7jIAJLem88+Qwyg@4
z^?E-QmSskO%5S`TwV=>TT&s9GpTTI-^H<_5P2Eo;PoQ!z2F)D6;USye@mkGuyG}Hu
z=stYkPvmm|vDgPSS-f<Br`JzP*bw69fbJgNCt*Mn1p{8}T%7Y0pwOsbZUY;xnc_Wi
zLW8-_Ttw-1(|d6@GUouB1$p$1uE+52$d4y)(Y$V8=KPm@0*J7UlZz@^@dkCq=}MBO
z`-bP!?@~?6Q@F?O?>in)O{Gk=R|{FBPt2%uhnj9z2-xpPf!6vb9=y&>YF00h@Ws$&
z)el1=mocs`g`;kj-Ek#wyXnsr&TIB=8^uF&-}G~aZHnrbUcw-87OE0y7L<)~o$Li&
z<2S>0jqLU#O&oiS9uoF4+<F@4uE2c6f;`AZ;$nn+$zo^TBCUUk5%15x<3WgrINnG1
z++Kr(bDONzdeQa$l{h}5pBAMwpuwzeZZqcw^w!0PB-Tke?Xd*|KCp0~s`LZxMKY$a
zLW=u8lr(&_$M==g<eJGBi&lwIp^4k?mgzB4wWh?RmrfHCmf5K&V!$4Y23e*|xIR}h
zv;n{F_WU&?abi@Oefo;%_5<;SMjAtL;cR7HpKE)id|GP8joso9m#GKbaL;e-*6#;1
z2HLj~65WYDdLD)=lrg2HIj6Oh*dD|}WTUETBeNi`eJ(~&Hyh~rwzh$u-p{Q2&uVe(
zdj7!#Y=#10ntT<dBfQ+Nth_6m2}Rv=`P+zO2jlI6_2(a1l&8m9bv>^LO}7X@C*INP
zKXH@uW{Z~NS5wlR*sheP9Ub+UtT5}(bQ<s&W=!@3x)wAPwp#)})^#M$<#_l=T==!9
z&}yYPA`<TVbbPx%=p03MGneC|Z;(YwR8hl9qBW{&JFxpKIJNQ{OTVxKsV%Da^<Nog
z@&+>L9VJ249~mGQ`SL8g8K*ZI{C!rwaI%^(ho=TtUYiO7;S_vso2K#n!pyRk3Ubl_
zUKrh$wF~mR%1UicDfegD7@W?^bGGB!==cA?b}lfEB^+_RE{nY=6zM3BT@o_r2|k7=
zt$p4I3;fiE5M*7UEP?1`u{otfYgx*-NsU)cZDhGe@2u!@kBW$!QyM~!fyUWl6u5%6
zoLg>yDm(J}wvZqw3w7|<;F{Z0VBlRHA7RrHjYdb}18d4?`9mp7;0-qr6frA@4G(e%
z`Ls<K^2Y$L)8YA=L`;3&PJ>NJ=wBNV?LPLiwk3K+K^@wXXGaCbXQw??z=o?c#pxOi
zY6zwcA0}!o`Kd7`tL=}_A)KXvvG~W`#boB5)I_u!Ed(b!<H(Ay?UCj=%{D9tl*`?k
z;sklf4;mUk?T72BH`Z-u1mo|ld4En%5fL%z1Q9J^<}Hq_z91)SSzpI7nvN!?^F#wl
z$*FKqIUBw*y7=|ZHV=ileZ?LTSi{STz#%!HwK%QrPn{DFhlKPt;`^>45QG>XRf?`-
zRw{C`=_cOWo-8@og*h^`qDk~vMtTv%i#LP)8<uzK*Pr<ZfJSdI;%Zod4oQoS_|nf$
zcZ-+<@1ZwsUiXoGQNf!f+>!7vhvrG~+8oufujM!P?TsCnJ8hy_3Wi3#sf|4Mz@x}f
z>fT<Pj49LWkGD98<r}RXll)+NJHLE%IXkn~P(=SmK~J%`XqLz1o#cf9@bZ@4UR8aj
z9Z(d$?x)F;Tm=8xgwD=3$q4~kal|OmbLp2-?f(rkO`ZpaXkH0KEqoI8^vx)#sy7p3
z7xv+wBK}5@h<7yY5gy<U5aDBGe{r$1U7hGDo1$BQm3{4Ve;p!=O%$dGPJI9^u+86r
zZm)!jG__K>WGsFAb=%v_>jCfxq!`<A&NjK%V7c|pEU?;9sFAGuD8nkzpITOez7GQZ
zwM(R!mmjm9Y`=DvfZJ?nRhHAIuZ#r9-$}n^6htkDjo6$t1VMe>+B+rrrrbc%VO#b7
z<E$jkq3tR*F|uU~L~h5hWzgTW{Q+m#S8LDv4YRVz)pAAy13C*cm;1Jc@-Gu#&3FM5
z-_tRa0s{h#`iknqb_ngP7kp+S|1zC?BLb*#sc|oGxBP6gLA<PrRCB@^uDWO%QPy*P
zc&DO4kwN^XVw5giYL;}6r!G+Yj7=@}QsOM`_NtQBOVu%nB#x0>WeYd^8X#+HlyoeH
znrOKb>c1)HBbttgz#t(bn67ITxR+HsO#*{=@94ZK4i}aWUa>SMdWiwAF%A?5ehvP#
zf^iJ3kxti3xfDIWnX{rW?h=@MJU->pn#<jem%k!*ni9rsqIHaE?i0nYCSHQK6^4+I
ztf2v0@-5+@A#;S4NI{O1LIc^mWt?q(*YIj;Dp)RwLA^j=t={X_@#+?};ONZ-1C#o!
zLi=jFA^AfAlAoyH1~4a%*iM+23Jbz9waSknrci&(k}ySYo!JN2K8mk<MBbW~0^LN#
zxzsxGqK_$3#>?oZW~o(P40|0Fb&k`ju)3a<IO(9vUZT!DGy{GEc$`#yeU$0-<SCq6
z;YjL;d>*3r7UQ@H(AL>X`*qG($vzJjM|hjZRC4}+0r02ABVQ_`rgghAs~QYw>|Ma*
z@%j3)x;T+P(8EJu*}yv_xkLDcG(z6nQIp)fjRyKC77V90%KT{x#<mM)*(-638gM43
zhw$WImWU>;4G?L6k*3d5pRw=x);d9$fKOoIm132E%M|9-aeuOdXQ7D@{9bsZKvAej
zm6yd^lTKkwNn+tS&h-VyyDQ-3M8__;k1X0m5@I2Fo3zVeDlZGq8g6P77lR9?i-~3p
zi=|u%fns{{1<P{^hlP%RMSp%bY2Z;%jCyr0|8wo|YDkPN`-%l8+`;QIJ{A!x+rSO{
z>_!5IlEPrq#qh}1`P-E~3;GV%Nfr16iJRxHvfmrrmP!cQ{NkT<Jp#kN?ai~h5EmP%
z^-BD>7d)hUNIQkG(IF71l4gVK%-Ujs7it>;Ib=!%p3}O-XiBmMmFJHg&!2snGS5@L
zw(VHh-(j0R-1{pTw!LE@-ajzB?IBZWUcRXfBJSFh+~pSAx!=;udVITapkp)1piRB_
zo>Z$rCvF<SHB~b4_)UKYX5)pO*Jf7_yUC##NQTlQpqp{8IaB~JEdzHG#`;D))PQ>a
zyG%mUr!KLDa3wft)J>s_wcmM^bO3`J5?iZveQ(wQeWNJ0x-(UFD?0}5r)LI-#Oo)K
z^+HC8vtY?|(^`ERI<3KlJR`*XbJxM7o!wu}MQz{P?ln@}9~;)*tu-`9OukzZCW5Rd
zD-?1@X=t;3xXN!{Ie&^{T61si3$$&l!nu=auE!1VI)JvKu?jOc<3uSNLgYYZs4=E{
zqGq1X+KkKlb46R`^d`(VfsSpyqS`G}bsU-bx{4dT*Kkk=-7x2?+W3H*RWI*V(iFE?
znHXkask;SOZu#|6K$&}g_~U8}_CrRm6bcxXYHPT;18|5kQgW*tXi8u3;2swb2Zq(h
z`}&BnUhpHtdp_N>Vz(JkCA)7cygCEdKgZ0>%|}arC13t(-1=j(anFt+xF2E5KQn&m
zjpOg#;2@S<A7KjzE>}-S6`tFJjPX8Nu1$BRzWTqGepKA(O2*ILsI)vgmf#q(tJpG5
znj?-{!J*P?$8CEcO-9U4zqW+Da0?~6uyKc2VU-Jae(Pmt>j@p%$dc7Mej92iI*Fy6
z=`Z$hZ-T7vHb(z$3(Q=q^EVjp5xz)qY~g3~W#$|1P&BIlV19S3D7m-QuM&^DNeSi(
z$;Hhsr#m$wy`AFw1W+Beccr_(v4aK@B)%Y9oQa*?E{R6I9$fB9cI)DxrTZgW?@JdA
z+0YO5+DLI^lNQLOF=u?|X;x=euw+(^-Eu0%l2YILys+V>ja`xWWgO6tb8#%6a_(z#
zG{TeOCCe5d107BBB#!xq%_vD{4XucN>=s(9e&|H(BlsFmRIBqPQ@%Xh@A3jZaxRS(
z&&DqM!f?;ovrCq0rZwIl&Y~VrH=gc^D}h{(xe}w}&{SD%{Tca@e+_3)-?5t*lO<?a
zz;dO^*0(V4>^Spc<Y%DWHz3?D91&^VfW00PKfMVy{QZeCuCBtojr3~sw&MmsYe8*a
zD&*2X(r~()&Tql`fl1PLjca!JFD(`3VzHr`vj_=o@l6L#_BYtwcBfD4ZEl;5g!ieL
z7&HZSgvM&N#s97)*UmP2$Q6OETuyNP6x@~rj5M*A+s1~*-)?)Jr;zi#Wex|-AAUYa
zL)TQ4jtM-ARB-MZTaS8yo=D!&;^XHKeS0%3Cem!RD{_Eoa(h5rItA$DSXvgd{r&lR
z#S2V=1=vWh(aM8WMeSAl12F|z)2`xWicX#TOqj>b&UJm6pAX7odS<@|rNCID_OVHQ
zRGi7_AU+srQ39AX5CqIrek?jLB#bW#sGRLPg&*j2A$EM=K~W%AC~$?hXz^Onn^ISt
z9=EuGh<SK+Sqb)ZvA!o1D{*#KOp-ytpux?}PT`X{dn1sOTgyxTNH)A?Z<znB7#|m>
zrqSKkA6w(do&E;Gqs5_wFa<0-4#LPMJ+<YJ2o<@Lu2<HHkV`VLyYk3O?XgMIs5r*e
zTUFMV6^3s0W_6xYDAn?H3h%$ex3&fnmd7$aXJJ{L@`ten0*~zB1N^;%Lfq7FYXmrJ
z*?6n}eO8n6tA74dbZ(=Pm^aChA<X38*>1|GVG}<%ejQblLcnz@yBmt32=C6_MjFXt
zHm4;Wd*@F>ea=h4ExoF~V3BvPMrQhAig!8L++~yK!#8o4L1v8=q=YW%USi`rJ%>Og
z-!KvBsV4uO|HIyUhc%gg?V@8J#z90BL<B|<kS>DsqJp55(7S+w^b+Zv48uq-(mP1+
zAiWcmUPBK(N>3nQLJ295{W9~Lv&EUS=ljmSuJhOTZ^~PqwVw5?df#zvq^IVf;DP79
zlRKO{p>_Onb|E*>c~WloApkxvFbMja59bmC;M_`HuY}0?MGZhRjTzN_qQY#vv;it|
zQgc>4$x@l54bEGCXc9&?p42I6x_3*pI0chl2rjod!2zyqOrNx=_CLNd-KYkWJ9c*l
z>(kR@M)X3L#R2TuwQ81(>*}HR5};B2P`MS`4^6M((Z@zo*8^I}IjcuavrbwQC6IRc
zF3`+>GAI9uyQZ5joD8?h$8Qkd<zK2_fkoW{KsR{Go~iXIt#|r+(P&|NIm9@#^{_uO
zL?qPCVb^`*SAiI~`$q<m+)dq8*f|DnD*Hz6BOiRhEyKdh#YDtzCbHeSC*K;tfZ`E*
zz68X5xe{=4+q7iqxaalQw5t!wc_H=cCVjrl5<6nZ5hO=0^TP4R8aG1-qEQR77C-iz
zwL$?6ZXgr9Z!)FuV6-D|2g_KAwq(K%J5(IQKiGnpeAl;R6LWVKu)_^kqC!^U>hoG6
zKPmzNoePSjn;SXy`(Em$W7`kgYu^+{fOMt5#f1#I%g2adDg~eNnfR?c1C`UVpxh21
zbGJhcc*{KN;xc_lCaehurRZb~p(a?#8--^hm#H1)?s4a!&8h3<_7e659CbV0S@}YH
zMM`O9u0m8Mc2(9PfPy2{`$TT)Ci+y{f*$GVJX|KKlN~$Ba$1NF8TIPQYJDO<k`3it
z@vcHG_p=YXG~4{~gy0JK7XBqh>Ms7DoSfv^)bX|e>P(b;PrX9i<;yRwjeC+I3<6@=
zI(<Y^La@r)<n^w-L{@BLKZRgx9xHofXhVH{)-XI%4oU|wy-yiBM1{1#JQ3K4i7x)l
zD<OnbR_W^k8mo2FYLPFP=%QhdD_>lVuk8vfM(UU}Yo{_yp|)Gx<d@dhMzi7}#wAL}
zB7F=jb#xXhxOg9_HVM<`a+vi8#b(8T&P%o`vO)`1bK6|C(RfR#9VNF`E&vh_oJ>bk
zAV<nR0YzK#Ffy7HzCGCuy@#lsBuRWuGMd~jB4s4t$W6ruH`HO#&<z+AV5q3nr*36~
zR;(c|<8fNx#=GoQ+J59s-kIIc!4fPNxh6w+8+Q_Ic}7sS+1>2iDJZ<edsF`>tCt0i
zrh*^7ts@c9X4H6hll`5rfE=_nb)#x!c&9hW(R*@SR-kutHhl*^+4H~#wJblBF-?6c
z`}4~PubQIj^bXrYULE_kY763{2+!YR#)0Ab+ITmqAIwe$2XElkcIYnh2Pw<6v3O<!
zBibhzGzY2J-j8&Adkxn6dC2KTymxxPQ}=Jsu=7dF;|~Qiy0Mh_Ge)cZS6J2NrfNBj
zYL=>zGeK?ju&Ln}hqHSTGDpk5vc~V}7Y+xWq}-tEO&QvQDISkbl5qLtrqxWdy(Djx
z{@wH%NEz%bH{H=ViiEd8YYY^+!Hmj?J+)3_$NWkPL!KH&eVok+%F)5Q^DfJ>&}R&0
zLcjb~`%;T)4^f(u&eO8-_AzX<<$>s%z_L)XKx+<{8egp|pYKkM^T9|5gGx}XsN33n
zune<`L~ZJUc~qt7^0`eDL#_fJS^b5)d7VWMj6?yXCF8>I;pZmZgeJvHQ{(mZs*Sv^
z#-w3_Up~x(9GDoVxZ2^JScY>&V^gIMo5>z8FznqaVHu9Yg!OLbXLyU=!2YRyL+Q#r
zGw0n1hjR&n!}`RUn?;AepOKhMYV_44Y?(7>)KztthUN>x^dS~PX*#2N3g!ydb94}V
zx!Ay^O|P{}Q|WR2c^p(z6<)`Y6Z;V+ZQk8gr~?$Ci222CQe^porFK1)4niGxx5S-f
z1K)sGnOz{Gs^j3ugKdTx;vhp|R`vWoG_lRH8+;KJyPjV4dMImoZq7tl4~oL4uVUW<
zUwO4rJGDDuW8!x?YE*rOeoeO-+}h&eZQX0tgczIYJk=ygygJA|sDqSRmFA4;bCI{l
z+*Qb<FO8U+S2FqpXA4#tu5aGU!??QG6o$#g2R4MJ??w#pM^5Wlksd3rZ}6%{gcRU(
zyo`5g8nG%LskCe9N=(8PK}m;*CuYal?1<}W2w3PUKaeyL6Ne29WZbUg)U}CY96d;+
zhRfZ)J!*(W2ovQbr^nwx50j<@7l_#AB+-Z2Py}PTz>X0RB#7;RD@jUPSeP8fVt$FV
zv<J_|!ny)K?J=8)L)7=(-_|Ym4VW!4dvR-5;KOo7o{|)_iLca|_gde7Q1(L%?DXOF
zY~wHIU~W)%+1|l<JIl3E*F#sGGDkIl^o{*w<@UC%<y}9AW6rNw?<bW$f?O56!q@W$
z1dfh_N!8?!Pl$RccKZ*LY5F6J&*C*EN@6ubjkY8E8Ccsi?ZT1ctbXc~2O-4-tHKN;
zGY(^B`+y^~hvr`2&aPN=mEILT9aC`=caspkYomrsnb(nd)aQ(X8et~;d?UMFN-cb4
z|2Y7qC%ulZY*eeMcHF>-k08pKBk}maP!u#%HMEWyfMk2tvVOf@Hw{SA7{j*xpnOaN
zg((T|9~6Uv(!Y&w+8238bZ#YrQpv?}3h9M>0Qv*92$E{gNMZjP6}%q6Cm4W}?=T7-
zHKa3IC{K$FrMl<=MVm5-IYO$a&sx!qY5<nNOtbH-r=dHzL7MS?Ey4NY;o99uk)Ab1
z+i|olBzAlDjmlo@9s5~z7>?maGu&mhd>@{BGK0)W|NMH_gIY6?`2MfDd6yG24egk)
zd-Gpq#A1YP;)%kGaVHr~7jnL#hBU;Po#{Ho<DddoOPNX*{d@cp0CLjaUI1Cl*L%ir
z445#l)q$(c)eWmuqD|nfgXx>)K#XQ|Yu&~^1t>%Drg&-?s4d@?u~@9;o@#7$Rw9_)
z{Uq6dKM(@S7(!+wKgS4ZVfia5+N1nYxlW(__!a3Yda1%S{&KU=T~0gPp6sbjob{Vs
zD47s`W9ba!818#rz()MG9=^CR$9k|;BN}>{g?)=!L9ixZI)_IwuOYb>$jUs9@!3W9
zY#QXh@Y0sNz>ZLxcveKVY~pp;<BG@9xH5190O~xny>s*pHF@h@=ju%!yeYWl%;AnW
z-_q8d)}OC2H*Q8YF`Us;uz!M=80F3kI-}GWH_R2UxO4M^Cg7*{Gve2MJtOpQs8k-u
znjPWSJ_IUy>*6%)6DjM|6m;jzmI>BAN9o8lJ)CPmIpl@-0}_KV#IO<Vujk&T?%h6S
ze9K+WMoFa<+QO^jGxIBVR5Fl3W*!MwB~4~R{2<cP1XfQ=kme?T0e@!`L};480ZtcY
zQ?Z|ieApKjH_29%H8mQQfIQ2r81?Uuxh1$uS+wu-YBzQx5fPB_La&Ej;HV)OK|AY|
z4V?MUg!U5C#^&7Zl}X-mZ4`C&E5tjym#`*V*1gcpk^zbAS6>kfR8cM1rj6#p55IW|
z9K{4fxN9a;21Q1ptyAGo=E$8cuD>aY&0>d^&c$a=CV5&#)cL1iiTdl07)=+cSj$61
z>qhtyKR`4(-|yJIHFtxr#N1+F+P1ClW)GYfx>cU8L!Mtr#bs)+#aA5pI~?wykC90p
zz)q*_G0@jA3kupznHQPLv=3WNGR@0Grakkp@ARdv2beiV_(N`9UfnBBf5EuFTRclB
zF@9mzJ#ow4B&3d%p<m-`J#neYSTzJ6BzIlOSk+ZuW8v!l7a-6p^|RE}Rc_@mof}Rd
zaA2$OxaF6t^BeE#)p>|?+*0EOlU58Nh)KLC_V}UMJ9kf^7L)a(oAJN^X`v*q+bIYr
z@5J^kip$M3A}7TQA+t|@zE|2_@byp<x<*}yF7zMnM0scc9#%Ttkfi6x`FIMn=9vF@
zXl8SP%Pwk7#u@s#T-7z|P#`boz%oC!d+3?Mpr@((itmfo)omcCXBd-Wa0rKQjt8Pk
zoSX;x&2c2l+dGcdvxmN1b&j1|g5zbj<<HzpVE`b97t$&<DZ^<bRTIF8=<3IM9uJ)(
z$Co$l%wa@RNS-4d=938$<3qN7GwF|ZCju5@u)UmJ54@K?xqNElFZI?+YuTzB9zyne
zLz$kl(B%zdBLOt#EM$^zM#dB{X^#qkT&x8U4fde!K6e+I1!Pw2bQ>KeVNJ26k;%k8
zqX=zHZWeV;4pM2D{f&VY$Lg`mRAi6-=a0Lpc2>H&imm_S?mU8?MK5>X0H+}9GwJ`R
z+ruxfFg&?iAn8N@VR{pC`v;(uukcfxCTC1}s7TCqbdXqQK$Bns)a#uqfJ0nDSYb%K
zq+-Cu6Pww#xAH_vRhlaspq126mf;kk?OzZ$+IdTO6l)Td?0HoGbpAS~Ab7Drjq&EV
zSm<g@;R`jpn=i#VDqFa-2^Q-LS3W53l%`EbN145^%I+TYxgFq3ADcKJvP)Ni>F0N|
zDQy^u^JVgDc$!t2*@sggLUf0`<tI|fRi!rOyv327wR71<JD{It)8WI<Nb{1yU!5L2
zt@M%M5}{=vBrje?XNG#1$m}ZE?ml}F>pm%Zn5Zz95G<l#S1KSQviiEcpTCoc6#Xfc
zD#};<-qMGRUXwP5h{A$-<DBusTlVg9b6wPyv}>yfGrair&Jn@<ZJTG+ZOkN4`EOt^
z7S>m96|@OBVpOYql;vceKCp&(i=W=DJV-#nSmc+a3f$)=D^9%5TSLG`j&kkXT$c9<
zJ5_-LV(OZ9>S`E@31j*`U=VS+i{{JN;X$9ZDRDwN$5e~o`^1sM$z{t|uhzLjLFq;i
z-RP^8R^877*oAJ31L?Mb6}dn|_!@vJDdUYh!``RP^mBtn$7*h@%eDS5cVo`S{K#VX
zB+2SQ(#v+?kD^Nm(SS4lA8(%Pmb;`ZG#Y%HP3`_qzfjz{e#RUbr~1WdT8OM$k~*%e
z3fmscpPTXC^lVElN1?V+?q2I|mW0$4y%`}7UBbcsjv~}`BbAjROY(W1A54szwMX51
z5jUff@pLVaQbq|5Zumv=77Z(^K6L~8xp(SW+M$)#;uo;(Y*)yoV@Cq1;+Tgrd<A!p
zLpO4ch_~({`H&UN@E;>~!e*Wg+uD_utO8^vCP%tRd=QlNMyzn;;w}=IEdfobqpJ4;
z*E%RvdK{ExD&w1q<0a}|#DkiV#Rjf}J<+H)1VSzNxjUHKJF`B;&xyD`7NC>y&gnPJ
z%2I@%Az{rGtjg-hdt)FW%X{3t9OCZPN4*ol@7+T7ZIWyb{i|xXq4(aP)hSn_57B)x
zuK2#>{8B|$bos3hAFm}nJ&RdjW-iky_I7`et5{q|wouI@gdCo+$K@}MzH-?Ul!}p-
z88d8sAB%}PXx*Q3Yr7x=ZSrx=EifQlRD(V1P|=K#osJO}7OpAOR%lBtma;W0wO?rQ
zCCzAJyW?%drL;@X>9z}_fm{K$sA66^GxB=(F=DYwHtYswpQu}}H#s6XE$F09erA4_
zSn*=&Lk?~H$1Inp5b*jkb5Z1rY12f?A<W{}+E&(9#wbYc?4ruNEbF=YJsnz?Hga@-
zBjq`Jb^6;NcU&IDp;mF3LyMj0SOT6wgr8Y|cJYuq9vYR|P{r_w;Xo`}(T8FzU#rZ=
zV>bas<oCdhHFH!&Em)=6&Wgg1a#TUT6!E9~J=0K<D?;>8yT+$&RTh?K(5XgQ;Vv$x
zm8wI=-z>K2Wat?ddXt_kI7x&R01Sh(b0+Y{^z-pk%x?409Eoj{`9_z8;k>EKcnbuH
z?7kaxwXD9#WLIWC-Z5d<S)C_}Pk8VXLHR}Vxa({)VKzA=Gqoq~vu}NFnfCa)=~Hj=
zdd+OdU_Gq!VY1BKBhOC9x8vTv6DCZU07gd}$;d>2F}f*x1-?!q>r0ReCP7x#JS&?m
zTja<^gMv@X1&RbH%WS`&s=-w>Kc5{$ej<#S2e_8{O?lZ`n(9Ex94Fbztf#*BW}P+k
zg)8Ehc77Gaq-aXGVH_V^^fYNE>^FHxR~G|BRytCQ7iGUKSvTJK0Lz}#bYp>+L0<|b
zzkDDu`24bnJ?W{q3qHqf+X7GW9V>oYVn!Q3wIgx6afw2Lxc>Gb1OM~6z&WC16F|+`
zKU)TmVD}<*A6GD_m=+p7++OOxa4Gk8Pnnl-f5?^f9%WXjF%Giq0Dm6zFx4*_V4<i<
ze=bmK@BCya_5fl}7)_y?OkpQ^w8!M6)EQM{#sp$DZ5V!zv;z`2%KZE<j#O*&M|(V5
zFOLTthzb}v!QF4o4seOsH2_d1d8IHt`$UAVP1s5tv98~&#ce^=ZTNRhXoa)Gk+QXu
zVi^p0ynOF^!~=pPd=}ftyI~}bw5_O<iTbmps@;J_t>}g0;@oV@V>^*Mjwye9{IdLV
z8lFl?MmT-lV=Xn&*U11!o?<genmU4-TOVIMT&izZow)<^8Nlvz4Ld0-i7oRg?MB#(
z<>#egs0Of<I_UY=53`mtkJfJB1N&}|YYa<!BI7HUV++jJE61Hd#*Pav+vNFJT$`&n
zjvO1Ch!9LCLN`{9>S~(FG>AqF|FTl)+EX$ox0nDMy9&NHVQ3OJG}ajh@9c9UsK|M)
zMUz@oEaoc1E0}CoBVG4H?$&Ns1iw9J8&4rUwNoz|_GEE2g<`wDL4P?&5?9umHEo?n
zcvA`!xxs7<-|=@HD5ISNvH*pzmp7GW`%^AvJ7!aGp_y2j-DNwQf^Rfd<-|!@*1A@&
zRvzCfiLfrNSU?hH<|pC9%B1-?BUur6Mxij1Ga{`}+d5~*4*7rv{#qIi6c@8ra-DyB
z1WGAn1HE7xsQo>j^vZc=Q(eg2wGWYywBUT3KH8*t*Q^}()J3C>HeKdcWvN3>VmH?0
z=?a+xHQ=06)zWq+K}NH19i?nU!xqG3Z?lg3RcCH-+UDtVZ-K{o1tXU1#?C(l&FiO!
zKVIzPmB3ASWo=oOERcgZb|Jo-)I%dFAV{x;mEM(a^&IFTZ*Kqmwa(=uqX|gHgc!k@
z)!?<zNJ+uvSx}-YS#JJY|EIQ9<gaWBBDaaf-Y|zk)tVT~JNPdtRQV?J`{=z7tr_t;
zJ<7bE!pWe1Y?A*<(Iql;ZzRcRwKSE?x=`&~EL{;oWe7fU5%)k=S|eLp83Ok{$8$uo
zN3ZC=U^npMK2oX9T=8AiSD5AXJV@F!`#Ffu`*jDq7$y&}GbW`+pbocm9Y0sFU#8nu
z)vM3L`BxfwVQYfM`t?Usf#>7rA<xMf*R;9=sx!WACHYf}fAC*We1G|xrVXqov-4AB
ztzv2vnc>LkxBl04jGJok@LH#yCLdO?K0czV`^qc|+;5~^WrQ(YQ);T2bb0i=f<bY%
z+qPdMhRGPt$+59Q<?3XrQ&QD78R|F-kF56N(s6e(bCfprlSa{KiL2D5gbKJfMI|@5
z6>M5M>zYUiVw?qM&d2+VyB+^hZYPs&%q=(xVK49V;WG*Etr)F<#S17N3xB*V_Hk)3
z7_6{zH?%S)%h_o_i6y_uhu`h6k8|3(jafQs2dMm!=ewzza$a3ygDyhEmRwzly%ldc
zo~G%*6X;Cz3)jI=l1&z`t|cnvp4gt1rUdj-&1_vPSwY?a{aS&j+_F}4>(lF>vC{7Q
zo316hDL2s9v@E*DUtaysI{jHhcz2<AGWyxgMqwjQT$HKBJLUpOnl7#QP2m^s0_87i
zn7~@nn<Y)Vmd8Eoecdcu{a)>3d-5|BcZTz`qS5vMPs#lIh_{ld{YYEswBZ&!V%%Se
zDdCE-+x*6I{{?(?#6kbD5>t$^Pp3>lg%NQo&JvLn`arbmVfX&%UQ36n!|XyV)nU#a
z-UNDQRo|Lj1G^>ODs@Dey4Zy5cj-wOZhOD7Bmtd?dtZfE3KHI`*ET<DFDuiDn9Hch
zsx|C(UlfXy5wYXhtVb+Pk7Oqt{Rw3GS3m29xW8g}!>nsgM%$U)Bxyr$_LE^5{*k94
zoYnZo{^xYd@r2xP48g7H_+8VmxyaS}-G~*}W~)ZM0}G;Dj7jKnA9Q^MyvDrQ@tcn<
zR!*@Qs=6kiTr)rYCD8pkVTXEu=D2wgIO{GXaF%SNY%SUS91gffQF|TD4=ba56%Par
zQ!TJ5q~%8ql1rH^*g?+ac#>dxthkfU0~Rn;DqGJ_<W`2ZUD|J6n1^mG&4UzX5Qfp?
zpsjRffZe=r^QY8S7Suik7e2+uAPc6OZJaP78Vi?u^$cL9_M^Ht$5*c+B#>hVIXU9o
z=I4d!16q#C{aT03l4^n2tJK%#=kRZL$O88Vi{(7ZfC?=y4R9Vm@3u*D!jR$Qhw?UT
z$3tdCsDqhXD8YW#Xc%A%X~3)_zb2LC>R|N_^gZO7%xs;VI;puc^frYFNtRnl{l6y@
z(xbG3<a-i&5(l!|q{aY2-*VIve~D4Ij=DgEyMqmet1BdNAL{cfjCb>0b`FX>sQB|9
zrt&>cT=2CK_kwlfMzGF@xko0cejekOQoBb%9e!cVV-x7bB!S#<4Y!QTgoIpAsW6R<
ztbA>HCVFLtOWQWJUh@b0=jsch^OI9jMFuETx~g8iKcoj$_W16nozMBJJK>MSM|&x)
zhhF%0ZZJt<JHTCX4IDp=kObJX_*wj}0+fRD!p40fhppnKo<==kX86G-f?gP&K>^Y{
z^5wU}Oig$KSlLTOPPJ3O6<yoqBYoL6-h-qK;KHFiF#yiG#h?M+fM@>dC^IH4SXX`p
z9MubcJs74(6Ahkk;aSR|M*B!u*Xwm6@11P8`{QS?Y36JB6h^eJk`oJZRuBp{d0VOd
zC<sT<erm7Tk)zEVf9Tsak*~J41I&r>LQ(F{+i&`WtMiHm%}XmMVHq(*`_RJ3+Q&dL
zRbpfAIUKM#67;GZzV7?Anr^<z`!JOm*h^`735dib&n4T-<9ni$@HJ)QSkLazC^;WK
z77n!)=nZCO^@DkNXIW3ziO5Kg$DKozwie*kA%PlqxBD%mk9(d#>&2mqT=Oc&C<abV
zRXX<>Uzt|1FdH<^AGX@owC{--f9q8y4(K2{X+H{0Vp2muni2-e2a5cLtREiq-P%II
zH+9w@a?ggz{8(+OcJcJA-valWRjGoWEe5<)1-8oO%NJx)Yi4X@`1yB=c^Kcs0;WCk
zm~>d=Gnuwk=zvJZ>&?l{VSAu{)oj?h<fuZy6-LA<6M2VmiSs7qO}H&0_jpT<`NpEl
zev`92E3?h>@b%AmQNfkc>=t@r`F*@N{porhspB)y)Hdf1nN?I=oHKAL-NsGl=Vo6*
zJ3eW$Snxy=DCyjM(&8^)v6;14ZckUSUw-&HXDM9NGxdV6(s=tLv{d_At6i{}Hfz`G
zZ1a~5`)xgQ>)jt~mtE<NOwy}huCwB4pi?$=q^-SpgHRsqGY*<jxz3#<)f2#Cd@CCK
z!Ux=WiKUwmfNjt-@ey2+9i4Lct^dm??udGSH8aNvWYn?K<;0{=g8ifyoukus{W79N
zdr5#J-6sq1(joLfadm*2D(YT>Z1$a==hWcfLz_;79H4>4E&)$VDs4}|v;3{EO{>so
zb`us}Nh15~452$Eo7@r!Cd^R?UhAFQqGJ_8+Mof?rRxmrdeHpW&9d2Lv8kVT`K}C;
zC*pl%QBtn^4u*rw6E7A&z7J#Gsu48uX~yLxu#sKv*CZHO0x4wAuL3&Cld!d$K}mbX
z$%l=c&4F0h3@>B2Uv>peGirAJ27IOI>mc*&y50>;s%xbQPd!&u=nXs=%2A8DA2th-
z>BbJzF4Kn}aI^3&U_4eMS5ub+64TCOkS!yR@mhOB5L<YEjGl)>C!M=cOLm=UkJ+(I
z`SA{xFxkA6QYdq5r=Jl5Pn&TnZ=MvL^{reeQ%mDHRT5XuY)?0=P$^j0^LIfLGII+U
z_qr$M#u0Bm1numXUS}50s|Fv*$r81+oSt;O9JCun#O|awRIHqjcLo!c%ftyDE1^9i
zcm3^{7t<W3J=A40lS}PxwMZB_!mM*DE$@0rS|Ab&Iue)>Lchx<NIh8HcL$fWR!mLg
z<pfHa4bt06->R-ooBtf`?zD6U66bKV&p&?fMTHSvz1UXw3Un*^S>IgQi;=_QUD6^+
z`ZFW1d%eTwx`fSHeSu`4+0Ev7Q1RO=_YeMjH+m^(cY^g6KOQ<Ge5wb-STYzLo#pg7
zmE1TtMUw0Km~!lhY05FNnhA9RA6kyWSeckHHNgX{{faY={REYKd-h0yWXw#$$!XyB
z<4R}TsUWfW=327AK{u6bw{&t^yW>D4U+j$AR!%tdpg4vPpT9Oct*GNZujOWDv&=wV
z-g&^GRvO+Qo$Y54Ta^l-3X!H$aM1__PAP*yn0kG}IkV4YestU!n#YfL5!1#d{V|tO
z@by*Cu0Y!Tr7a%bCf9jM8u)fOX=dwYZUCjl(t7^Zt$rEH^;)rEAH=R61+5v{Lf0r&
z&`~AQbrIh&4ciHx-+4u?8r2KQk$M31;N7J3TpKc(C=+E(C}0)anP@U|sY)W3dqesz
zGGz0TkB)1DBiJ@=Rq*5pQ~Din7deD-@y4iO{i}?pt6|jm+I?zQv?r#Tdh&^3SyM?x
z$gHT_{xaY&ZK0g!d87Cro7elyrk`TMb!%;}-Gtl<;bK-+l{w^3jbV<QA=Z!8eW65t
zl4yI7caAZ1C5}Ou#KTBUaj&+@Zn3xq_KEeDg}NAvg?kY{yj(^mLMsTB%HEh}8EL(5
zJTcuo+{b}v{gOG3A+`ufc_9*oT!sb5N{x>jIF;1;BuuK2V=nlGg8Up@|Bm{i2`z3X
z+Ruy(gqI8p2=U{wsjM~J86xUhU^k-UNUA?07A5-pm$@L}1!w>T52f}c_7CwYe<lC_
zFi!6WE*R4+lRw`bzJr){e%xus>X6n65DMpu;qyyZM5M5(%;s!}X9vr{G%S1(fA;4*
zifzBGs9cn8I5RX|>$)rjvY5jhNG%eoQZ1tx!8tMeLGp%v&Qg#7ErH7AK&t`f_6bCf
zhu(w)r=`MrT5gO|n<rr~nC@xZ!X9D73g``LHb-nUSY|nIg8R5!(()!dByPe3vcVW!
zlc|RK%q6SKbCTUCVWR7-J$w0r5x#n+NP*+X@<c(v**;~phE6Pxe1CgXg0>j16$9d|
zp;wQ2Zqtu4JF1seeoZ3N4q*7HIi9Y-yYc{o`bxb>RSgd(#8<z>Z`|x)dh587Il!T?
zG9%r?eJRo9Ufbf0=J{Kc1y3b&9G)5v_xZqzYzM;dJ4-OySyYLjr^*DgdmS&V({|WS
zVtv6>QmVO2U;O#08gx?pS$L_8h>?e-=QTx8`!ocjt|`4;0#6^WflX3Ua1--=X&<&R
z`%Sh2o^VKfhN6jA_eV&&aSK5TsZB+Q+6z*EaJFI}GpA1vqw2)Tk^Fgv!mG)_3<Fp^
zzB;Ce*E2A=N*~13&=WiMYJh2B^dsRX!L^kL?+z;#^XCwZU{+k+m9os<a=X(3-gyHG
zNl_i2!mfVjoFgy`D7;r>hXj(_Zf48YW4cNSkY+RJx;@3K-`=a0Sh~b^t{0JSXqd@N
zF3E*?sF3LytGcp-TUjy~q9a1t5OQ%inZ+TQE_8meG!3ohLmeVuo7x@N00NF;R^&pe
zh%S}bF0ay}%g5%#g;v8egKe=xB1^Q;eI)0oj?zVI%TBy|z$S}B<n%2^6}jW|EFCQ6
ziU@%E%9ME{>eBUD1MOXv>HTGW`h8ak@kBsV3Mw5ik^5*#lF{>m5QvplRkhK`UXbIU
z8-$8D5QFh;9;s_pgH0YO0?|5lUm$r5C26rVn?BfJxmyQ>U(N<UtYjV$Vk!nm?s5`r
z<km6JOg!Ae(Fd?6Yp-K%S%no|T*E8`>&UQH+E|K7B`j7|yOEzXS9IXO0@X?!aksYz
zMyEhdii>;JY;(Bynj5Lo+E#kk_S050mDR+!XB~pAv@MoWtX#KdCZuU9#Hx`#?x~f)
zD4l(V4ac|BpZ+tPZ$6?;K=lp|ewl?D$mgn3hx}u|EiNV2DsM|VOn==Kx}PR<t`S;l
zjG0c4XWfj^b}|EbG>BPHCtKM&(dzt#1%NCWR02YCK6LY8Z;j8fbdik3Wqkqi8BHf+
z-T8C`)ShQx4Lv@6|3dA@XnW+|L+0n0FE7UzL6V=N#KjZsJGx+1OPe01w5cE^2<*3-
z_Z9frY4MiIaU-PaHwiG{XfU8y0cQ+}G+YPA-3qCTWzDz_aB5Yv@Pz^NQ<(|?QI(Zn
zHEW)Ny!VVs<ftTiHB|Hyhg)ZXQe*<nbDPcG<zzNrgNKyep>Uw@yni@!nl>7I?^s9f
z_+WO4QgSpdGRni;c>035IIr7Br08AXhD|1qO#4gyuJGRr=OY{MeS9*fH(5VXmPR)a
zq)=K0>_<9URHF&#tV2OdcCMy<VBDPP0_EF%ylkAviRMs+-@xNiP4SAASpz7l2uxdH
zga9+Oud(q){X<mO>ve-CYQAueDP04eir@o;b!B;}7<pKqXE%(}bszAbH?N3T(gNmX
zEn`Hy<d*~)o<WM&%kiz*vduCbg0A+FB5Sh*WE;D)>GUh=TH>eOO{3%Hl5~xPs|IWE
z;Wmq6Sb*Y{iR~Z)UYrmRgWe^}0MVVhLtEhHKuNaK6JJ>#NWL0>2?+O4-EDAYO87)q
z&}r^rm<i)xx3E~(m{VTked76IfmEWkh~Ilo>$_1d&&mBRpUzZdjy<N*y)?GZa7$)W
zkCx(E32ix()G!c;jl5|AEDhc6&KeQKu5(eoE_W!P3s<<b*^<B{SjjkLU+e0zW^uXE
zbKD$>Z4S#6>tqx^;vS1V0OP_!S-x0)@EioR#>ia<&6~hhd*bTI7kBZ$umMB7P7n3?
zU-Ku^*()U0K!2HCWEw?L2BiIfDGbg8YJ_dcHjb5*1DpII7)Vu@*w1pr64o}os^gUy
z;XeomJ}^)`Q$~hz5G%~$x)7cjboS`GDQotsJZa{j2ZwrDXnS=SRB$2@{&aPk5E!#s
zwSrI?-BED$9kBSJuaDnzu2S*X7mZ+DqZ41C-e;zpZK@<h32dZY9VuRs%cWA-qZRjI
zMH&8N52r&lrc^{%rS{kw?ebihanCf6Wr#{u0*YT)!+1c{hK$W~b0?zSWqXFV+%1eb
zFwks6;H9X6@W`!S*(enq&W20gw#9O@ox|(Z@bXw!fcvXUTT0tr^<cHx?5m5ADKQ@<
zn{vyh<4oYYiTC1KkHanFaq*UG_$S7N-25S{gp~vmWR%l&(~uVs`6#NPy37|y&{@oh
z!SI-8YRS8*l{DXuy|-`CO^Y@Nr+d#?DqZrD8?<uTs8Wb8PqkVGq8{s6A(&H@>9L=6
z#oHjE(q@~58Or?-t?d~%=Wz=lpX{e;SU~ALh=5why3T=ZZBmN^Nw@89HdI@?Q_U|w
z7f5~Xy>kIkxsS?6;|xjf;w)2;xwotK8uGVKf@quZzcM8M8O3A2^G2hu4nN1xraxwE
zH+!2B%p)uh%;@aI)wtVgW_acu@UEJrcl1>{HH>^2DNJPh;YQP+6s0d{3iDCjsApcd
zwV_GbVV{8*5u>l$=H`fbX*j6a5VpQ9@HHy&XK6@b?A(C#ZM~~#aO7bkFb*QJwP;|~
zB90QH&u&}sO{2r45W1UI3wp3{7Q*H%c4?gjJJeAdLnM9BnZY!$FzxQ#hZtB@Xfxlw
zj87fT(6%k~_R-{_qj5v&xC51Rvs}Q>@cqI4O^sz{!PALEpAJ{hFZU88vcC*IG4nG9
zg?Y$jN$KwHK*T~e@*2k;&uEO{&W*YO^qlu*xwECSEC7!~bR~@kI6-1fWJ9YJw&?9;
zLV8VpRO7+#6z)82X=*SHCjg3K_H)*&Nf(n?Jb03lQlW)^l%Jz~1=`x*KW0Tz>lD1q
z4UY2Ky@YxTYHsjy?8A_Mna$7~Gqx3Hp~ZS0>6B%CZD>&gYY$QInZ=VJMt-U<TOKUH
zJ3V?bc{eSW9$MC=Z8``>wk@nFr{6mj)C}7AOG@;hR-;rd@%ieFi>gkOsm6v<`BVc~
z4O6F7a=&y(&=p9|bps)V-@dOokf+Q2_@(=3{r8|bVRF@!`KPFQyS5f&%Ym_nmAFNP
zWx+`@mjYdXty)mVr(z8zKV{GFS1y>fkmw*JTE&~*w#JI|3~H6q=E4uSi1P#JOs>V<
z98vL#u;GYZ9v-SNXFPt^ZQC+LHPwqYx9WW6(x9jW)ywYyid^{K$tXeWURB#n><%TM
z6Efv{NW9~ChTP48*c`0MKBZ9$sZO{j1L}(#pyub_gNa-;(y`J!nMn5+<GH%S1GOB_
zM?p=#Zs)~lG_89ML&+_oEQnu>O^g|U_490-!G#9nsl_T+>RAuM?j;yzf7uC*kU)`d
zMpcAPH#_()!XXMI>rOn7utB8deoYE4kh?y>Hu|;O)NFz*$x<=!oTl~xKdtm_Ob@ju
z0-wKaPYq1sLC>w3(G8I0WlQvzLw%dG{2u-MK*n7r@=#w6G%t34={dkJy>IHNBsBlR
zZeE?NCz+wAmxznUdFuc~!@eg$`f((qihVqTlGl-Z&7@!AVBbiipMw$f&r_#>wt%?K
z&f&HC`Zu45726!0gw9U@E$I3KnZz)bgK*J1rrL4^=IrG|pl+db_-oLxH(1V3s*E3h
zZnZ#u<?gSfkM4fF!!kQT@-25+h_nO5>^o5Q;kB5GbW*e2(cv;Ye3U<~7ZL<{XbQMo
zP~^K{I(bR9rDLk19F2RI=ul<Fz=DkhuQ4!e&n=DPq-yqeT#RY#DPJbIq-?{}^qo?g
zVfRrc)9#toZuzHADeeoy4M3WANAfE|!$j3DbK|(`^`bG-k;=oI05!~CTa1WCsWmwq
z;gKJ?caMI-#jox%zl%iR@@i2V%UXjOG43u%BZO+yH1R?av>vk?&as6fT8nl&%`70m
zqT3$vX=zSl{*V-G(xy_afeO5wZ0$Q9$B$@1R^8m8V#!*er<rvJJuo<lk4DDKZq-3L
zi|KL!h~S4ng&LO7&DXPalveWrtgl&MJ2eSe8>un)u3vsb?(_794_U-;zJ667^A0SB
z%w1vd&d{Cy1tBUzr`UF9eeVirMR2q}>AjOiO>1+2@5Ucqoe|%gZW$h5%f!r2+oB|&
zb6<B(20ve&d5I{5S}9!Dd8oB3?I*aDA2>zi-ZaD1Qj1aKVq~-9u$O*{VCIsO&6;P}
zr<1eety#BeyF-kiW)8o%m_+()H6UOt#wxy&id;8HrsIeMPY(;|;GdO}Ms6~;QE!$<
ziPq%8C`J=<N>?_QfcdF@9okvGaKcMDNLH&3I^u1R+yq2Vhz9O}K6FMq0tslRrRfqD
zX~yV^E`y@ZPiAg%W%k)of{ylX59$DC25}=mjBAPQ?c>$VR8zyOfngNfj1y`+d{A}g
z;vp&+obdCW=V*diX_d6jB8@t^$yIG4DxyKuv;|wV@zp;Dgs=VD!-g(P$tMCH@chiw
zA099|ZGF$fYi(~v>@?!?2A4l2kgMGV!!p4_i8(kQuUYrebE&$@A68K@uLBACQdoxq
zuE*xYulX)`se*?QLa--JpUyzu8NWGf9$UOj1vW65`k*`5f}QVJr0sjiAzaqx*Kq;4
zh!zp4RdwwEiW0(Q2)~}+-MulhYd8wJJ>KUq$rUgw<~2Tv=sUv1k*gs8K&f)R8J*w)
zuVV-uLY>m)M=!I_s`AWy)nx;+e)0CP;1!<5d9g)5i)8}0#O(=6wj&(B^|;)hPV%6%
zKe+21D7eA#tn5H_iKG+@!be=EuctTUT7AT1(QBh)+9O1GL-}i?GENRXNf{PI`22JF
zkl~%+ULs6H5cbx|DXml6KI54*85(?89o#Zs{Bu{R-9+DhbY?}t(-(}RAbq-Y;~N4~
zqE5dlRt|R78cMFcD1vpuhUm_1(o>_4eYA6S41tj#s{-K5nPE{BQF;Agvag@alK1{4
zJ7+O4F(f!q{&l^>y%}seFyO!!m)=8ux-39l!LN@Gjj5~3hbfqWQn(Xf^0IYXxxk-C
z`$XMb5y>7yGn4BLbGJ<Zbg5k}iBoWQE<x>Gw$S(~ht@Xh4`Rv7x#!4t_q7Nqz>)K6
zs9R~QE-Qjpgi*!g9=*Z<Ww$=2S_1TZvkqY}YfEOMdd@F}^%d|Q3^L14(#`8gkP=5%
zZ}iqU9s48S<wZT~K_9j3#hpXEGj{4EjwwZXK*Zl5J5mk=B_Aa5HLyY|r}q;73TZsM
z6GGxrg^yVls*3PT2|mXB5?GV*GfI&Tv-Ut#v$t~YSoCP^F=SDw15_&>gWZz&4of=e
ziT|1mNVDnHSt1M(XmD_`v!7ziw$6U6k22VE%T^KeGV?!fD-hrAU0y+`i}TWTUR028
zxw8bfxAXDb^L#FM+{(ofU86Z10s@#m7)HSKmH~5SN46|XjE<wCzPl*EEW84anl&ty
zooQF36Ijwubm<E<IBa8YZQbqKtGXV)dO79X>zPouNL>jK=)JTw3#3r!g)NWdTBWZf
z3x^NM@Rd;ss<v_|i&`wp(!K;Vnqe<<Ga7sa1~bIG$SyMPLpb43!Gull`*PeJ*Ma>o
zK}YlyGx)&(S~5}46Fms13;me{6T`(D(KOKp@uZZP*o9+(Y{r6FKOjslWh^-mT|CSO
zS_E$ec<6LeP>V%dwotk3ZjOkYkebW}6Ecemhf~J-_jm<9Ewg}_Epzd+;=D^m)Eq#n
zIoh{M*VprjuX*??N!;Dym2cg#a#q;_3OGH?%{EqQ>l=Ja*0ZPi;S$JHB9J=)NrU)e
z1$yV|ogD=6Ug<qTw*3?HlY+LOo=2y%Q*#%JF-&A2bD9Fw3cXqA^Xk92=<tc&w}5%P
z0vN~|vwT_Nqdj0pv->hCR13QzN+OxXGnzIM2;mk1-`%GA0=f#ICu9PeJvINe!Rg}}
zPK5$C4y!b`ap+T1m}|O&_?KhA{gE_s!{Np|4!qWWKX61v?5Og4e})PVB~>**%)O4k
z?A%mgYI5FuXl;9!8eRMObB#j7^@^ZbfBu%;IG4>e(gnNkRqBC%7n?Uq=uS>u*rwT+
zSGE0-J@FXPMU}+2Di%ijfmambz@$cQ(~2haLGY?4b@e`bVN;gDpHYAj_Zyq=mIqIB
zU}Ven&WPKWo=WnWW(JeU-faRR<i%K-(q16P>#$j47fFew+z`X=sNuVpfx0lRK(dv{
z@7}3`nMNM(@=yZmcH0xtBR_G_dM9Cb$6L&4sk1KFtzT@^nx)N?Yz}p<a~_;TyWGlA
zZBr+*Q4>#mfp&l^ct!|j<>c5Y=P*A6TbN^6q>iqL2$Gllg$43C5rP|n_ap<P#R~>_
zqF(Q)176u>&{0ngWYZg>xXX&2-gPPgdL0}}%<64+5^OES{TxEs-Q_!E<VenP$BmLK
zOC!OOvQOLG{FtIE1`Irn?X!k@kS)T{TV_eUw(WoeO{QD+(l{BJs`k+8yhDxOsUUeJ
zLX4;6(io%3I;kpon&V2QUu)r2B!Y*>$qxot_~xkbFT&WMSNOSv-PHK~K9HBEg`vYL
zHOlaZjy6AAvB0gQqe9w1lRsn0wdn~+kD=u7$4q27Mz>Y+etMItHMxy*AvOKG_?Gi(
zgY#xBGNu;syr<mDVSEwj763vp6`TXK(b}khQny0Fviq7Ac3Sx7`Q$dwBZ)CVs^hsD
zAHU-k3#@sAc$}ZlZqN9`O;{>vSbV;t5jd0>TlN}66G`}|mgc)7Qph<tlv|28A|k?8
zv?9g3VVH8oAO!(oYJTAc$Bkd8SCxKcy_KVPnlY?7>(Q~rx+#)hxJDmgJO*yaEt^YW
zcM4FN@x-~zg)s2{86Di`dPj5CN!hYd$SuQ6yx(QpTfAF>kvJ$)2ywXOmIhfL5NTac
zD%Vi@J*D>;sNbabHZ;9TMIJZKT>o+W#9Y*hMXx)GSM&kQf00SO(@oZ?X}`y?a(q&8
z_td)m`Oez_l6#hP^2rX5rH3nnB^D~g1bwBOhcI4lse-QAr_R$6?Xxni%V9Ybj9rg$
z2YUsvS4gp@yT7AEz)g}C8@(p&qk2;gjeKQuezs=I*C)4Uy?lY@Lb9Duk*UXB2olyg
zz?)kB%!IUW0(f*{_Dd4Uoe8q6bK^jFK}_)^>h}Yuo`jO|#0tV@9J2)Pxemi|DhD4E
z@y!HsXoUg;(#84cyUBTvv*vyGF89n8b`h59i`N`%4fo0(y2p?NlEFS@j9`|i^hNMH
z6&&AfE7^%%k<du(rjMqj7{k*@z0OLXYu}Fd32LTm0FlXuxu24N3V|;w{ZG!WzanV<
z4|#VR%?4ij?U-ZPIAqd1cOkj!Ob1i>=~6E-0b|maqn^3jwaQe!8c^GH9EedxxhK#t
zl<T`2i+fdAPKXqgOhu62f($v{w9GyZED1o8B-#MDlJz~86(&_>pg~3paGj!yH0a4~
zk?4-p@%>GHW!(xHk%bRmENhMG_Vo_t<D`P@(RK<2hQa_6O)JqG`NdnR<W+T<EW_Bd
z+ASq@hNirT0NZi0@sWr%5FX|&>qzmJ88nFBHbA&nIS}oUq0NvaKhGWdyuPOq-r(|h
zBaWd~gZeo7gnfs46J*_6TW$#l#$iD<?o&d)A8^)Ou<-T+|7BQ|eW+&C8rY!0JI!~9
z!P55#wYncq|3bR3!)#S9eJPb0Z`j<!z3@_CQB3c2>d{&D7g!p$_*2o!Okp<>`}Pxi
z@%gnAoajd4ytv@ES$E~;b~blkj@6`cr}6Hv@gROrKf`7jbUsz??eql6a|)55fwEM{
z%)zt)T7tv!h08iz-t6(tSdG(1e`}y`D5tUz5;b9nR0Vo0tp`S=tex*3+l3z2gWI^q
zY`RrR_axTD>V3smlTg@Y!()lnB;`hnBTrzqchUT$Cn~S&h<hC9k1FE-S-1-izTF^L
z#@xWVN^dpG>ke*q)PRi!iW)b=O&pbh<HqOHTn2<lr+MYCxvoF6E^bBb>1+xzxw-Z&
z`DvZ%B=*3xw3t{en>;(4u`2o(Hb-Gm++mc0T026ISKDJ8Z_sEpO=ho52%R!pj}kBQ
zgkY$)f=L&ytA_V7ukZyI#*=VImuznU{d~E1A9rbgm*cVUZ?8+bvphAVJN#Sp*6bZS
zhQ<3Sg;ll+Up7w3^U*IxYNlU*w;HJSTuIfO<yGZHdT3^q*+IB}^6f5`wkeGtZ`h`;
z+>vBe^sO4w3z8^$z54cTokQ)tv_P)YDT_1yJ3V5O!Yq)+Pm9#%f<777&~Q1RA7blj
zFPN6|^j%M9qW|pQI6AxDvovxvIhrSF*2{c-w!4@wJRZ!2{EB`ucko0?cP>5sE^HXP
z5`*rs;Y!VW&4RmfoBERHsNSVw!+OaCa!a|fA<4`BRgYP{!F7;q{I|_8=huF1uM-Vg
z3_fD*kI@vCCz>b#Pc}iJyrPodG{k!vI`-6S{n2CMFUIkKZFplMrrOK7hL_8l-<Gg!
zHDB@i>9N}+!3n^(>A^Z;Jl;I_uH@txtVtczm=i|4)Z`IMwqRK;t5gTY&{wRi#HHnG
zm~034tD+c88&i3ID#-q}esJb25OR*+T!xt(zawumm%whO8a{ow4N%e~H^a&V|IeH>
zKlq1SZh^iY_sv0vQFpx#Glj1|?U)OVP=`t`g+3v+szX~x5@ZG-s-1d$%&L+5WR2<a
zbQ@3`XF?Dy!))m~8hf+53HN!XvTcjOT!snFO1+<#%8P<^JowLAysY~v?ubv+u*c>F
zAjs*-vm&)O+XDeWgU)q>Qn>QVdfJV-GT36@>ABs1SV$*-!5n;eaN3U=CK)vGnF=j;
zmn`?WO+Ae}n^}&FM>0w}s3Ce|8L86rnEX#oE#$OzOg*(NY6{If0J#+%B@hiGiNMh%
zb%)KBZ_GONsl{gQGVh>*-4vO6)T!}nX=yJtxIJYc%$UgFmKP=f!ja6R$ONx@_0t7w
zy}$CYTTcF@W)$^X^%o|EB`2bPWb023*M`gArg3~Dmq8%}L3XaQI>i7j=Z`|#<rYi1
zW+K?XwMwV@j{c)&_t($+aSU%P1xq?B`ik*z(aEpKMUnuBno;T=d7j{V++ym>^Hn!S
z6Tn`5A@^F}?(>HcF0=B5=rLe^4faXv^O-V9@QAUr@tlhc4jOg#axu25mXso_>c6a-
z%%cX}MRrq9Iek2!b_R7UvrP-xUDnn>gWDf>A8Y`Wd+DK|pVlSd9oy%rdm-z74hn<m
z)lTf^^Yvdo02`?6j%@u?1LY|ZZE5)X2=-UAwlS(JYxJFyc%!hgTG=0DW}cy+r`^b@
z<HO0q(Rp1dBYsuAtGkInU)kagaFbdWth`PkMO08R=IjP^>%ncbnbt89+1i;O^)Q5B
zO+(k<gT|Kv-AMiKz>0son$&z&${9D2Mt|xl{$X{KYT99@RAQK=0-EXZ!0EN8KtIe)
zCQBl`lK_qwFK8gdS$X)$fk+#<EBgA_aGtXMF}>(ma5tApSUg!+C+Jg?t2)H1v*0JR
z`J_YjJ&8;9-<u=<1<(oFGA>Ty)xSu3L+uwB3Nsy?Htp!w`K(iKwO9`?LJgEc#C3X_
zT<6U2jX*578q;)8)uoh!(Wd5)?0tUUQzs78Uy<Vsq7{=cwdx5xC&6S(6*NI>Ai0J<
zW?90qI@peKh#;6SYr!2XSUY}noI|zMdfMKp&XAK;8uavU6N3bTzN7F2*6iifk6Na8
zC-ntyo%{A*j{pkGsdu-%pQc@{1hQv9`m|nt{-&cn&=<#U;PgZ(vI8jmRho2WP~+t1
zZ#z25Ig(Yw>Occ9GXR3%26a?;0|!ecJS8~oUA0(tIu^OHCqu0o>G4dC37j#yiehwB
zmt6ZjHUxOyq+YSvz*9!|q!rE*z`YzyE*J~>Myw<DXB|ei>nsO7D6S^`bf&^rGt25*
zWc|Uj{*q>Woa2&_X&W(SO#@A$?NQz0y_Jx?SWDhTT;V96yAfu8uY^1lTQ3y%rSRyD
zf>3Evc7JJQQub~o!}kJgPrJU69Ba9ndnyY3ha>3whyQbXSttXV5w0%r&&Wvsa?O_x
zCl)*vR#N@nLN@)6pZVip(j!DTW1s(bckwsk`>>XvP>@O5g30$dw)?FY^hlvtn|+io
zK6-!ez6Jo5W~ce@^okbsztU@2;C9VizS#~gMeUi4#E9x9myJM`|Jow?M*?V`F3J|?
zzFqkm4z;rU`g*t~|H-F?6j~bMo3N;(?!MY4g>QfNjb8k3ALhLTpf)Z&yRYNEeX^`a
zZ66I{6zQZL1C@Vm=cDQE<9zOf;M;Y7`EM7zPw(H>XJ==4>UTE{U-t0)zybNXw4N^M
zrfN$u7~W2ZAj;g0+*|Ug8A{bPqJFMbFgO^^_!c-J4ll-6V`pU~4j6KxJoUfbVn6uL
zu#P~jR;OmI)+$6dbSp_^!GbpSOsB(>Ka}e=fSg=@YMBOqZI6_Trx%Tpy?WE|GX<EO
znU?k-<-OAmLAJuhJ`!!t`<-!oH`ZcKv@a6lSs+s<c3}}ysDHZA#QCMmT&Me^|I(>{
zT<}wZra1wmbd)gsH@E1!C;N~1eg_bq6VLqfcmHzl_FO>h%Av6CZ>0V&fBAOLUekJU
z?*H3AW3K^k%qh+AZ~n_azZC%CP5Iw=%x@l1SrM=;xrcWDze9L)0*V;BR4wq&FZ3@j
z^EU|Z@66Nx`$hjp!2jFEo!WBW=)^aR|9^qe{yzf#uiLxxKmN}S{zmKmk1G0qxr2Rw
z{8wAyzo78B5<GLrAk{j#UI*!(WYsL*+Ax+{!*GBP;coT7crQE?SGUUSP!IL{`@UJ=
z2ACzhGbLF6)&F6<9R4#HudRy6-cji9`xGexif84D-=jl2=&B$m(Fc=%hjxD7e8H>9
z`%wOLKI`ufasEdamN&;TXlI3D9db^rJ^Pf1g=&%Uo(#?0l+NFOz5uZE=FMdYEdMJw
zf9&;}x26dhNeizx@45@Ft+8?*nTg{&vjimczayCf*7+Zu?|+o8|H1iu2<?sPf*PFi
z#^2*K34pj@ZP*#9{BOd0G<SAfJWTOn>-ZL@z03{qzj+XAfOilth7l4!4QKWbdvzqa
zeQzjRqxCq=Wz<rFA>tp%=ok1-mM!z~^1q6j=6X_sKG2Zx;G<=S#rNy)zkNbTz5vS{
z{kP-apW?Fe5f7JzYVz0ESEm8Wfj0oMbkEd~SdNje|KRzZ+F$Lj&*!_GEVuGn(TDFH
zJYb~##GT$BxBUA3lYW1&zH~aPDH0Ysqsfj_p8gvu|HuL$wk9H8(Cd`j`wwe<-`bxu
z`Dxb{XRII7*5A&hk)f08;AaHg{`EQ+&c<e*Tu1Y((Da4xpZ34;UN-01AD+Bd{@U{o
zbl>0g5#7l;13X_N|GLgv6v{q-tn;MvOPTLi)Bl7?YitOXQTeZO=e(XY@`v0%S+<w|
zZ4><0O^znl2>y@jjB&IN|Lr<PJb*rjWlGBat3GS;j*R|s9UDL5-%^7vWxyUQ_eK4j
z_N{aL{o9{NN4fP8vAQrx${e+i{*H19ppkAp5j(C?(CK>te1F;dyl3r)vP}=-tSjp^
zzyI;0dLRNuPFW5&{HwHtD8gm_kb20X!{2^vbYj<SGsVyS>*9=a+@Ip%nv7eF2T40x
z5h5L6e}DJ?y2TazWyFg%9<{%&!%rCe`R9*!{wB8bP0!v}gmv2#{{1%JelpE_e%sew
z{MDYEf}7+vttI-dWlp=wqyi=M)nP$EM^Ob-xNUvUXRiv3BOS?f-sbwr--vr8c6fGq
zXI#J8?P?_GyPY`!1WOZlCI4n)Pd(sE+8^hM<t|Re`SlT3@*Z5i`1;4@+V;osCIyJ%
zV+F!mZ)T+UR;<Ibifw$*dak0^umk1tie=EF7M*U}wK<%JbBhDlw{HPhk9Iz~jhR)}
z`#*dy>hJX8=8c;wprWm3pjS^`Cj8j)Vci6n;B$2mf-VpjCk*K`*jNRBK6~eCMY6!=
zL^98*rv@4VjZUb*;4+uw|M9^3e{73C@%-I_hHDmC?M(5tNP4NvXPyUfv0Ry%Z&`(M
z7mVMhfP(MF)3|wG`r-6jyrjFlhhAEi0I*X}fAQTS|K7d<06bqmZ&C0l<Egpp>N;Tj
z(^aF?7dXDwd5B*Ajw9|DfC&-c_Y(J3_$xPi%I<%=Kt^}oS*nX={V&qqJD{m_+Z$Fy
zR1_VhN*|TpRGM@U>C$@_P<oMG10n)L5DQgWK<PD<&;kTSk=|P%K%}=oq$NNgyxY0w
zoI9ZBoVnlk53rNX-s@TQx7K>fW<n`lk=)NR#L$OA%sq{i{wpH*CH3!kenjjJ!DY!4
zZ2d2z{Rg%6%0ODm|J>K;KlA3-WBX`!6-cHHAu5Z<*Mt6&(4T`5QUM^{=W4^}pLz1v
z_hs6Is<xUUPe|(@cL?49bVu<W)3MVh{^t8xZ~Fq_w`+27B>7km?ys`@Pmi*F@S|LK
zTjJjw9rM>Q{7n4cZutCKj$MBMp8vX)&u{;q<TgPK$jvvy)K2kd!v6aw@2&zR1;J(v
z>wjX-zthIhI>5U>S(mQqIktxV?>_4GO+aUCUyRB9yXXI!UH_1Ury(G*36N|0f9*s3
ztgS%ZU6FL(c8>G%Kldm4)jfZ5FW2bNv|BJ|ci^s*IBK;`tMKxbY$2F%jI(iSVXCX<
zXWyf{6y`^tg}gp5bLmHlqb#>edB<e<2%n4Ss~H;B&pC9CS|Iy!@3xR^CFQAVh7`Is
zI)QyBrq%-zSyB=a>zffhVTK_*E_(}|^y7LzaiTsN(2dFYWFN?X=IT2qlEy8aoDl6+
zJN>b8h2DxYN=f^MGJ-j?M|WeK5rPyJdo3_vWlQ!-b>KMKEoVUJl`8fCS2J2EAF7&y
zFGH(c)Ckf9CW}i9e(aSVn#So4HO~2KLhxM8RVPdHnG8TW<=*%8zm~spO)~8^3Wbie
zLm-L7PYDG_p#mu@lPEOR`R!71c0T`kF^B$2>Er;KHL0_Ku!}#be`vvv0-t^??!?-;
zpSa|CA9#_p!^TBK>;lV?q32A{LWjyW(>@F~hYv~Izw9?*CdT%&V|%Ad0`gpOPP_CU
z6%FWxkB|(9syi%22zF2e5OkkR4a6_-sU+<2==5%e>E`HgVHrv;Y8)g9RqXW$`R&=6
z;>xWTNV)IWsOIGPE?izx{#kZRz`<_If2jR@wBvD7-gT3p1c<$`wU;<s>2?+FMuL=h
zw*+p-Vd{)!pO=Z4{}X_LZ$;c1#3&|_H<sC|3tiD~s)@TL6sRlcAXj<w<I@`cWZXxy
zABE!446DWCb7+1hGKXY%ZFQh;Coksf#}&Ig1x<p{im{KZJX%%L(quQ4<srinVY**>
zpv=|o$s_sk;B}exVv}E3Qkew#Hs%$_TG~(W@WeZ18$b35qcu_yemaG&rj}?)_(VNk
zyWr?|0T;*7Um=Q*hky%NuPOV-{NI1;rt8Z;wn-W0bfbfe3k_23RE@uXsmaq@PCuQ#
zOV<>l!kBmWu(fbIXQ<}s{_D72whUeD!S#D1q?hmLULJ$zg^-+qT{tcnR#~9gF*C`2
zlD|s^p{;2_*x#iJu$exsEH$H&**F*KVaIWTJBM44PCY7j`fbVL;q2x`)H?L$@n<gG
z4}HIi$RG<QVjYM#iP=`$jXZ{VQ-`lKKi8y<_E{8dvmDkJ*o_^Ae0V+K=}mtypX69%
zDvbkdX-8&zzQDKlTpr|}V(+HBZjM`U;j2qID<gpeZ4tr_OSzSR7-bk^;%A@)_l}0h
zMsfYb^`^?vZvste9-Hbbrtu3>yEVE;+EC@jKq(=88mMZ5swVxIhar6(eq1PMbl%~c
z9gE6mU!wJ5bvnNu&3eNA<q*XNQ}_ET7oq(&yvUpS0qgZrR@)6CQz;RyQZf|i{XrqW
zotjysVU2Xx!Mp>}eZy>fJ9kR|irdO)2y#%DZGWhv&AjPhSd(;)0Uros?Eg3g|9$k;
zE2!Px?^y;3$fLHzs;tX5@fL9ru97z?%y47zg`aZ>_Ic&LXr(V*5&~+6)4+7djs~{3
z2YTwme7>)wp?eeO1dcL<Dv0|&K6zJ;^Y=%$705x8cQoIgJ`PhtM@SNeDhu;siBRv?
zBTn22UiX7S90(|!0g(;IE|()*Abd<yZ!3{jAI;1H+g<Dp?O*m+b$G7&q@C!n;Rb?F
zW6nYM*56&=mU{c9Vk!A39q>M8pM9!SHI~}inWV0QuJ}DrZ9RH&>y24*HDo7O$ZJQ`
zX)=Y`^Lgh7JJ5&spxqud9X8wB#@h`7Ia3Khi?lNVq7WFPh$gn??Oh2`r*FWWb68z=
zdwA3LsG`8FGveL#$HbfHH#CydC2wz66hEoy-!ci-N}+wrd@uw1){a#VGYsZ(Ih<y=
z@%!R>?%IOVjrL~Bso^0Dr?z$$F~=w03@u%fGY9H>-5oxuU^NfVWoWHVeMeT>W1k`=
zat>MI78I`a9<&~qkNe<EXZ;V*ONK5>5(YUH_WCAtt?dNPMAe4hb&VS|C82~bTl;OW
zZ&4h1YqSSW<31y3rS<+t<@G<E+79gh=V+NSt{*0H60Clc4$VkwIC#Fh(2Z4ZQu5wf
zSWCdGAGtH0Sha)+2z{%(L<i6Er+G`1*WhL`yT#<C!>x8jS<M3XIkBCq1UFniDnAfQ
z3%c1YLxh@otK4Bz9fTv!K;9)btTYVF#pm|)_l4{Z4#pa$r3bKlU#b{ZcWETZi12~d
zp1h6UKma?trHH(YkPPc=cqeDuUKE_deetGPwvlR29x^9A05BG?O&aR1SdKGGV<2-f
zjgn4pb6#+*v$umOmN_zyt%)!QyrqJ|qq*a`vUJC&eBpMYcmGc?ZHXR$VCGwT*N-nI
z`;C*9PGz_z37XtI;<&%mg0+DAkmKzQk13+;BZG^VEO5!W8}kod4%L(uG-gHYbq>ZR
z1<CC2PBvwAuSf_4Z8l{i<TDJ=LJOPETMzULx4T&?HQk@|evkrg%zy;o10TBU%QseF
zzTL}U$)1v8LH12K<oM37{D|CB%eF2%86gMg?3P~QLpomjqLA{z1Em9%6xzzqFx>z{
zDc+sZd$9CU3ef)L{TQL6@;lMM6r2;dd&xQ!!OvX({$$DKM>geu(9VFMMc3|iEuX#<
z*s)zZe*7GlF5A#)34RB9i<}x@Q2w&?#=ho*a36*R7JO@gFE)!FZm1M7_)Q<Jd?qQ#
z+6z5gg8$-tBQRIersH51O($3)iFhAU%6o4*V<2}`)5Xy(&x#;Gl%TNK2p96|)YJjO
zdH-Q=2nL;tIAn;^=H5!oR_NV@!ZzYsbM=_`CwH3TrYyysI=)L80%{GqMS;du>P>o?
zc3mEb13+9_z^0hgeoT%E<JK!e7Ho|JtKsl;tH*}I!q*W1d%mzK7QvEjmd&~J)y?LQ
z36`qzqeu33sJ49uAO6K~h!jg2&B0e05!f)de2sZL8T3-5U$JW~mwqA>U1i;YMv8Bj
zlaifJ!A|Gviy51t{hrqm^9}n8?n#lO77#N2Lp^!~!SIe;$3g!_m(2PVT}{R!BN|B@
zb^y19P96A;o8k68iK}D{fzHV?IIo@ccs%}yB=M(7m=Xfqnyuz_2DHhse|#55ax>LU
zKnmK?+^CcC;JGS<D+{4lGz^<h70?W8+DGp%D0dj8Dz5M8-zp8_<MW$A4L;{+sImf~
z5v@58>PV&gomCWTHu{A(<sDWFLQ=?rH!>vZ^=n+!004ZG!UBJ&thncw;=9e`TiSWC
zvFMe?)+-Q*es`~xe%v$14r_$Zq!p>UVz!?Qo3C$TSsRY<&a!FTcq#OBBcx@ofsN1a
zKt`w-Yg~I5ho2F8I+o{K{kpi3m*T;kPRy4W>*uN(71wLGDJ2M^{CWc)?1Ck)$u9_t
zD7m1{vM8f^4p8xmWLV1@;y%o~`?V6K>L%t+0#QL?RR6{&+u(c74uWzy;<dsqdqkE8
zH~JQR)@KpnP^FokNF~aU1*tjS3;f*w+zTLwVwHP~MyaBYa-c6JPZU=46D)x%WagQY
zIi?-A<sUQJB%@U)`ZgO!q}btKF<g`hl}yKCm{*N#mHzXN4?q;NFW46Dj=0Nsfu?0H
zerCNU9ppi8;^|W^w!`G1ql+@4Fh%TuBj(~G5N{94=Ov)3XPWjM<vmL$l9Y8E9I-N+
zd+ILoHAj-y1TARSV%oVVn3Tu{s#%(ZDJCZ*8B}_iix}V<xdVaUC{r7@h}Fa;fr08l
zH1b`Vj&J6)X-G<S5Vp#j56|}s;)ZU{#aYV(D|$pg%LQ!hToM77of*|Fu8&nz*E+07
zEUBow(Eo(lCxL7~#}vXf`_Bn}|KQT>`8i{~uge2_j+J>{uC~Xsf_Ic8!!G!*FPYA$
zFWwaoQ`L0}q)1@9qMYEbMlZfty`d&dVMg5RcO71180cH01wtK{!{gcRSvPoAYhMX%
z_9x3={|O?hC)l*35<Zj!p|ioBVb(_r!6!m?1yje#f^pM{Q7kd(MQ`72C49}z*QY7A
zcTBaHKf2oMFz)3yNpUBvT`6@ungWQacPP;Q4)UH)hyz1S`(qR?PZC0-r4u9Er1PAI
z4_)47S4i2qb0v0s&~IV?*#t+=Z+mwuQ^cR}lXSYQy&pDr<3xj?!1=Fern?J?B?c){
zISDV1SMiZjq=jcvb)HOjM8uWA?u03RCi6~MS&+D~XLC!R8y}MN7z>skY_>ZXT`FdF
zc<O16{n`?$CL<<3sH%|$>}^z82Gdnshu?VR2_%2tXhprSmWQSeyVV!z$EzVk+5%r_
zWG+PNvZfTz4H_u;OY%Z?Ttv(Y_vh8pqM|h;%(9zn>LWFPDK6t4ggUIVU?_V|Xs2{4
zjpSZq0em`FC<qT7Y?HdW3UwQ|E4TVKph=yoy878kz{yd-!cCn?=qHP?EeG;k<AXUm
z`eXY3+mar@kb%@(b~!c#?~V;^Q{eKrMr-DY!<>P_S<9Hlroa9<QT2muMY0?1(}hFz
zi(&M*J4f$EbBOX8DP*O)LG%p^^$xouTeGsCDL?~lk5cSCI=%x4RDKlH<Q`%(M8<#Q
ztovLQ{fQ&j%V$>cbIrl6`FF|%yb}_P%mWR}RBZWriI}7S!%|md2;{2Ww~AgNSd9Ky
z@N`p)GY^Z3gCD_Yl71uxIGL6L?d~pbTGXN3wV=bBu%{~;_Rt-DVqqgtEnNm$8Fwd}
zaVHv+wpx!?Y_$tqrz#pZr>--?<&T%NLdV#C6U@;ISQL&a01j8yHRYf<LwjUrvKxOz
z$Yr4N;2Z3lyBmGX+k0R8C}hCdvATobyk(S!V?;%Fu-R6-r92YL5LHT!#C3GPw2x2e
zc5aZX6b}oX`<%Up2Kw}CO{qW@YQc6rAD`5fa0uHK`|<g<h^NJug{x|u+f?p<4u9}I
zF&%4q=UQdS7rC#?-sly{EDhb|Wf#YRpHj3X8UWi&Za%!uaa^qSx{tVRweJL~rXu0T
z2KO<II-hr<XnTS^HM8Py@#yV7%Vxz;4LVB=pTQ7FQ>?tjy#1lSZD$%l|6zt8DT;%Y
zc%ceh#l(>Ib>k2lyF;ZoUV71G0FBznJCtzjFyV%V>d_koz8N-(cFN1f?#d1yG?y22
zkw(U_LI*cI+iM|VO$T|q$_ZFf8?Z?f?AStvA8Pthh<y9uEMo0sqYioYN3?XT+HZ_v
z83$Z-O(om0F?LahB0wzK0o=pgH3`)<llL0<T!<Z>1AXc;u7Pn=`bv3cHT|4SaN}wK
z0EHw47%)!Zs_SOkB6_ar4SbByn?$^yrUN1n*N%XLJ<2$0%yxl^5b3bI8j_IMFh_8h
zrql!iWRS(Je8xJpSl!Lqj%$vEY*C^Ty;g-Z<gU6^rayK8I__V*!T>9C^-FwtqHW~G
z3E;5hr*Y|%-H?j7W|CtA!F(lYJSSPbj8Uqdic=ghA03)<koN{oI3HZpR5nErQu@cV
z#m5i=U#sTu{4bMLAyYWp1PHO3Fd}YHbhE03z{1mALkFac>4}JiSlH6?5hTEn**yd;
zl2|B@8ebCr#4q+fSF140l$=l3lFvJJ|1<JLW(;I*2IN9bfkHfy4q7LJCI<=SpWVy!
zd#lHnXL%ZCVn1>Sr%!x$0f2*g!}}BfFFhXLjf%{nV384bIov=E06`(w=Ne-G&H7uK
zBb@J#4A^n&`>+UMZE2+G-VU!(e_6NvU6rol0j&>~%t{hbTsKXz>hhu)sntiA&kkFn
zmXMK<xsR~D&R%Eaq@IDI$kYU^z9SZtI{vehJO42K<B=U!`rAZ)Pvh%e<$BlJ0g2oQ
zi!&D{Mfiqf^WWFJd6SyjaKRz|qxJ0z4x$tT4(}snm481?B9h)G@w?EO+%T7kwaJ93
zqskFvrQPf7x3Gc2gPuj{WU=0~WU;MbWfRZde%KEB{k>}>=UJbf`s<IOxMxxdY)miM
zVz$36bM0)C573hSCVT4izkY;<UQ;fqRp_f8=OCmZoW>799*1w+g1$d2>zhGD$=!QA
zmg!P*A>e+8g<L%(HkQ5)j;DGLvAydxaaL~8bCgsrcM!#{nyXlPy@cH)SFyN1&)l|0
z0_5=-S1)OA>Wz}#bN03C+u4uLTpdR3ym-eE2)P!QPVmX_ndS!JDVobP>piX|FAMC9
z@d-D(jphA*;a{Zvg~$rhcbPgg3=+Q|A04aK5ebeefl0!4@>+xnN5S{)`27(5<=_9x
z0oG@-e54W11bwVS0DG8U&9>#;`bPnMd(9M5cPt~-`I;PV+ecG|eKJjBd=aL!Ew1hO
z#ZfMxYAtMCbbUR+m&dArQcn?=o*A2t3Vg2WxGgS6XqBFYaJO~&OS}vG94d!LUD+91
zV0XUz%G7yAJtV%X%P;_<NGW5{;c0%hmpV7>eV}<U?R&nqt|0vl)i=q+`+bK!S!aE1
zZXPG|hur!KNgirB3>!jL75^e5fD!f=LUkY?ng?}FW)i>MxcXNKo+bsK&o)Heu}5#`
zOP9cNCutwsHXu1HRc|oD&EA-}X0eti^VL|rp|3KyjfJ+7$&EHNM3(0j@z%~d$t_kI
zp8fnfZNfLUW2q`yL(9R`Kb7^oup}!g0WY~hf!n63@oGAJ3e`g)rMAoBrZgqmZ3RDJ
zI%}>}R9dVKi&(NXg7@bWgHtlEc?oFA<er_*&(VB+A+m&Xt6Xj&t6<%@ac8>_bvCR<
zJZH4J)R+0<g4xQlFVsNqm=^vi>d+CYrHl+_2X%>3`u~;v|9l0fEz8S)TsdY6_STEj
zUFbHYBW4;&RNp+I4}D!E(|y#>pInCXpWpd%sfIUbj@GqsZoll1|B51`r{-<v3^b{{
z^hVXAzKC<@T%53wrF*!cS3QQru{;;Yws+EEqHhOo#mJ>P+<5rhnk=kFx=ORNht_o^
zp|V}rbvbx-`b(UybMr$2oB)sP8&n-9ERa8*(NuO%_3rg6FzjF-x->;ZHF|_|1J53_
z^BnC|I1*IL04bj^%O95|H$IXk82IPZX&l%1&_rrrtW-_A7=_m%zF!C}sD6@@?69Re
zFC03%`{+x%=GlU)MLTsLOC9j%(t9mY9F~qB2$QV@%w7B0n}oSdwr07T`_3I*0a=<o
zNpuaLlcO^}%%JExDTs5UVPzmAUP{tre-<)xw~0v}+U$YG)xsx>AxMrSfjfSYg4;rI
zIhvxz1N>6<?T#4<rDs7pa;8|sR|c=f1H50A9jI@CgLL{k0$Pr`%~;`l+plv<Q?p!T
z{Q;A&%*shu1Q;UE#oKZ@36`KzrsD(3WFrMjDTL?c!M?bBRJd_msJo<OugS+pCOPPW
zCz`^n2<G(J5zvV|{pvvN+eg($YjFY`<x6|31dNFN?B_7K)C?pCmVLkpLl29Y>{dTF
z$s&p7SX9eyEvCFQx%%O`CZ_{>F8|T@)}{$Nyn^PTMWrS@aK0WL2sfPMb}7Qry|`(B
zjq{~kV_;&wG`1|^3eRJ%7MSfey^9it_EvAJnr^o%TnJoI$9pYuai^RTbUxyF{&dMh
zTM+B9G}YAqI7dIwr88*Tr0;OurqSaUdVZQTk$fA(zcJuaeA1silZ`u%)YTi1t9^as
zfm!Cd=^p@QiR3-*C}hJfTpFLbh1Jyx;J-PXn-_NJO7o|iVNa1@Ima%z99_g&=+h~_
zjMW;I3(5G5%W#<k%{LaVpDh*{_X{k^3Z96br;*-0WCgq+)J3jPiVn71{^`vIEtwU(
z5in7QE2PO+GFB6K^sYAybIAql#e~&`NK3Z$_&YfaZhw2!#=m$@etVm$2ATcxEMCKF
zP!2Bv;&6N+uXwhT(M$d|cH!ppE+t(8#wN}%VyT_$hYj^sb6&lCow5XHY)jj{7RLbV
zx018PrOJ&u`8t}?J+?*AKbH2hb+3QPPCMyua{J&-YT(+aw9oz9GcQ`7*R9y$Q?G1~
zC(>62-}dUYBwbs@jPbZ7oV{{btNDubIB)(8ejjyCv*+_xJ)$_SWv4=Oe^?d<d1sY|
z0=_J)(gJ~eI3idLTHK?JI2JWo`#eYre$NV%V_?zV@V2W=e)K6FeMvl{?!$~t%az+l
zuLc1Zb6CXwH9Ij|UTWB;4Tm215+i_n7}4I9R(LLv7n5usXW<^rDm-wc7kd`JlYXya
zF4Ny&d<4lwx0mCP@@2!UujGPY!CNP?<%F6G&DEc#q!uP|Ijl<<i95Fh55qq~KEztM
zS6UFNN^nt+zARdv7vF)=1;W*Q`gx_2Z4+1<2S(&j<AJ1B+>=F(rsXQ~DN%!qQG6r(
zf`?76Taf)XYY2K$8tzG~6m=Kza3HVHt8q6eb89%{?3xpHs#0n{V<=xZ>}u;2%j1Ox
zCcww%vy*i%CEtp-@aIVn)*5nd_vrT=JsZuuHx?_-^Rq3VA_1L!mpL%{;?hY0sB?V_
z0juIZ5F`wOp<Dv*qVFFMWwMzML%R;He`;uAVL{GC5c$mo&;x=SPnKLkAp$oTC2!c9
zdXBP|aq28+N=q#T%x5T=?1`duZ3cLp2Pla&DJ-$e<O@ez(Q>0|AzPyDKC3FcN=quY
zUN030$p==IUtkUsk%Alc7%m48-m(fihqDShmawJj$|q5be_dzxY7AJ7HG{>b7q#CS
zw7fLXS;~^L&#T)RT--~Vw0CrmO_|9(6=w={3si@wlr&jp1WEQE5-UG7m}-FYjFe6c
z<;Ugs^Sp;DG`)&CKQXCK@&`I`770}IQjnBli+vPzT+5z)v=03<Bu?(je8O0k>L#>v
zz1?b#45s3(J*p2-Rke#(ZRp}pq3o((>-^Mwt|`ecqlfm0hBs*Y!<V(ni*{!{p=eQ~
z?K!uL>v4PE#ndP68EdK}>z1Y0-$6BU8h!tE<++$4CO_C5<9eCQ#X~Q{EdCuKFYI|&
z!<8(@j2x*8?7{)*7JFSv&=oV(xpZADCP7ojcpEePDa{G@U0Q*?od}crdB`Rz&p7g7
z14Z3cCtNkGK^6;yX@P}hF1`@8Q^)<d5KPtw*y$9HhFpqG-|~5re3)x(@@CB#QNZCd
z!R*z9f6(6AStFmTjHa3JRxDeJLX(F1iYrmxeRT;_a5G$05wspOt*Gu?CwmxDq%|mw
zsz~m6M0BwvhoeC^=qft><s5fAK;uR?1o2YpbbAVAOXbmFn1h2j*%=96X<zT#PAg2G
z-kA4ZBy$P2{w;T4qq}j$#ad-?YxL|z=Ds<`G1KLM9TDKvEveQ1qin-HKiIfk^4>RN
zb;y_m9US#KY}*lqPZL0iHI3?;VWN1GuM6PDMJ0~H9<2-4pLJPXW4Sao{gSnD#Yc{A
z;Yhu=w1$OZ0ZmNEBy<x3nDUHr;sfuZ#GrN_n)mGe%0E|}h=D)YY=1tq?Dqf_juZIq
zH`z=ysSoDe4xaUV>yxlLC!i{cHWK>{uskh8lKtWgs)*_9BV1Bat6c<fMGGxMrffmr
zE9%hno^(+D<si`dw~z*W+SN$?`3H<tr%4OhZ_n0$s%2$($nQr&IQnjTvHFqCP4X8m
za*ksZ&$r1_$qMF0$pPRHXt7kx{_^ocBLm={`ImwTZy{#oot1f!FC};8ExKtMpX%Pm
ze(mM$SvGUA;UHa}7rop(T|^cNv5igM(CL#pY{?{~E9qPIIiYp9r~BpuN^c4WOtMPZ
zHnP$Q-Du-~$Bgv$Y9iFWI(m0GiEIANSyY7lSJLGK8U8H%^P#&uXOoTHwjcBvA4Kv0
zfOXYXUw@#TbJKTuR%8;gX(fWb2ff+*?yugrm-(%3%qWt?>h_ba<xjifnCs?0d7USu
ze$Q1q8-~m{PkOBAvluS!eNdlWa#}M}=m{H@O+Wv*UWjX^4w4`6+KWHXv3-UkX&XC+
zDd3rOlkTHTKMb_T_E%iAB{kP|!;TrOz>+Wr_^_U%gZ@CX*q?j?U=#Bf=0Yr7{9A1V
zjsrhH0rEXV&xtEaZQFjI9`l}3hew_u=HFfR$^OV2GHIhrPgo>tpCyT|#E3<oW$Vnn
zUlH@m&u+_w2mWmKKO^=N@%3MqnKys{toNn0=a*0a1s49*eGj#!nUCi=f1iLK#OEtN
zyTDDoQ7(8=+`oF+zx#|*ulC&k?X|~A=DucMWbfv4BAYvY&p)3Z3cm`N#C2Z3?}Wzv
zLE)Vq3vcf8LTR%3%ytZ=qh56XV(<TmCNz=q5unhGb7r^y+X#h<Po04=;Bt!eTPrW-
zp1a5%Qe==fD|voM@mVio{GKPxNk!zQQr54PsXv)s9e3#eS&p#UQ<Qop2V~zifAVI3
zTeU(}0gRzjz)JZ4K?ol|kY>>|ovsZ!ImmzV{jYo_zc7(1#))k5^wA80u7dBYk41d^
zc;WH$db0I7VD0<KM?3^nY~tL%Q=Wf`MV9IOOEIZ;I_D-cFaDdGWsLy~=8BF<{}olo
zJ`3eO|2WLE=&9c2A8<mbMwag3bx+&A{t4Rl;gk6UP#<yGK`R9O@xyUwoehP(@=x)c
z;L#81Y#%O!hiZ6bdR92;g?w(;R_A}_;K5^Z(tZ3>;@&qOohi0^sN>uGEe*a*#q<Le
ztQgD2=?+P;<A=AL9-xoWc$(Bi5nv79mX7E9^V?UAl#(yE6oBi<Rh=#%7mvbw*k0=#
zKIAYgTCo*>i^_<Mw|TC*2R)TLk1FT*7t@aWy4#r~b$O?d4-#ITUOUgB#LDqq(I+dW
zDj;7UO)(NI(ncmaCsHo^=o_hy6s7L-J9z>Ym(7&!{cQUW05(V6GP=<xP9pDBfYu2z
zwOTzY%4B^+^82NKBbq;XCqq&f#l185d%!>ii9~uJTwP;1IdfCTr*pAZ)D&m%t@&py
z4}DHj@Z59^;C(mycNOzKLGa~pb&dD`FdNzEi!WDqpYCi>E;5eITr!jXHv|8t1db6W
z8%`oAmvdj%M=A3YDNZkB)TK;%(EK0Tng9GF|6)k3qBx^hz&i@ffoV5=GxPAlr^+X_
z_k_rfU53(J1-wgiL!TM@UkGC_({0@rK37$h!Av?GRd|`anYE2<j92^^amNMs3;Okb
zR6bO<k+lD7PoZR2-#sP1tg4VhU6-SSWb)UDD%ULk_0|9N6|!OHf$D;s1t<J3Fk4oP
zq&`j5bIt3Wv=l=`>#7wOF^VhTg{oMqEzAFf9j8fugtQ0Oc>f}1`x~L@;<lxOQ@I?u
zq@Uca=)U`gP&QVZk^hBZ|BY&`*Cdzp)QE-IL)+(D`X=~2s~QZfA~-l-=@u8g%kr~u
zFPgd55~P6Gyx@Y=EDy7O(y|Fw0OP5koWz1%r_xdd<0oos-tpFFT1>$D)Pht3^E2*z
zPy?jD8XqMrj48yXMUlLo7)}o8I2c70qT7(Ath1dp^~nl6UH8@24peE~WBUXenr`Q<
zkT*lo5)JO8(xpOFRK~XQ6lXYb!>I*0iCML3?qMIa+zAQVvud6aNGDqafwNeXhg({&
zp|~vA{Jq~p?PBB($lwm=$ak8OCZ?iA)2hJ1^-q5OCJB)q7q)~9>tB_4*6q&F>CfPo
zy$ewwv;3i%DLyaat_XgoAc?>})w&xH(&RSNTD>z*m7$Ata=|7DQ&mz8Qu%W88#d%B
zn-u!Hv9HO;`V2R4bPq^}jC8gb@l}!sLYts|MjHYx=Hvu3$B2qp{^s>zcQ6HYCpTuQ
zg767(gbDm2jbhfDcwg3g5Y&Q`Y8$gBNUhF+_3?&h2rpy6lcOOox&0Fl{VkDpC7^Q&
zTfWZ-Vo%U@A#FQjuOuw2TC1@G!&LHA3pQVA@H`fV@w)8xdBlU#b<N0tqXz!gdVXB;
z)2S}+N-I?IO4l6~j@<loP^tDYTl!x$|95|7l9AM}GKxGE06Un}gu+K#J^HKd9nK_;
z>efC{VskEY7%J`afQ*}G(!J-*LCRXBW@;@V`>PFy*Y7Eqyv=sbQS{~caCniAHh)r+
zO5|;p(U=9rOlyEZO&CQ*%?05N7lij}9L0-QV0Fbh&1olw-mwQM-XGE>)oLvJPkE&x
z@0ebdNpw3js4D;TDZC?ed}&>xwC{4cOPCaac)R-039`0cH?vb-4w@V`nW$eEiyodF
z&9hi%s6h`0pdTz=FG$GXao&_Y*zp>5d311r91XI`5OytCXuK{n`l+xM#6;`%#;!31
zKgZ$Slbw+@f;XgY<snayhxN1TZ~G#hRuVlPuGKHxZ@C{%2$(DKEm%t+PaT@!Y2{g7
z4~&Cag-+19RAS?H-7%HImIYry<@FjgGDkwgD5_D3J-^f?>?ypbV=k_|B7Ek$LJ$iY
z4#FPX*X}@*4DWX>1Clyypz*dzbZ25I5uH8|OL`>}qOtfw#j}E~Wv|lBxvMC5^nVR?
z??<X7?2zYaXU7K%Rz&#BU6swOBi)9`2nArB9F8WRKmXIi$u^z;&NL|Sl9BXhsoDpe
zW-wAwVgHuea7U${+2oNg;j9<_0v%ebhwNw^TEM}DQ(Md9aelKq1s<ge2On1K`IDnK
zmc_mnt~~*tp%U>MCu`mBxT{Jua<6FaiOWW^?m{`8Ghyl}{`cQPcVCKX&!VV{=1^RT
zXya8pXSKd_ZW+g$FC4}BD|~^RPE89<h?$$37cH9*Sw56fE}#Obgb6t-=zA%LkA^{u
z->Y~yMlD-QagC-uRb$eK)uKoOI~?NJ<c5(Hx$}*MoZ*c{F|6nl8#pFfxXT5<$39WD
z(p~7UIsWBzYJ)+?i)ZDIAqCCsH-qG+3A|Dkdm!pV^Qo1{Gr@|jWh%d^NW0DERR+#+
zhO3j0schulupnl4D1fcX=cG+)R5%2!>+TmtI%R<}zI0M=s5&>A;KQ+vw^bIUgnV3m
zh4&>MJ@E}&SpdaYzp+bAt!Nr0Dp_GS8mT;~_Ef86Joiynl;&-+A-#*0&hHNR7eTOK
zaper^+<>X*<?N7CB3)+(wEr)zJv8DsW%2gR-}!UDrkklRN|dyTx%J`d%0(9K4+&t8
zuN1D?hs;{Md7OI0%otqsAM<IS$vT6fA3wP~+u#CwtfvlFp&P;3VKc)*Msn^+AL2g^
z_ynas@T|PiK1U_I%H_kK>J^)fl(j<7Q3ZlOZcCn{D?LC{2hNRIV3z@FI((w#hVNE@
zmpLNbFoM(43bE~6?z7NV9*SCv?!W;E;xXlfYI52&4%jF`ZR5e2V$_$|RqC9eDR`2w
zL6jR@W!R3#Iaaj+sK<VuiMp}cdJkn+5S$fP=_ri7i1(U5VET>8Cf=Y_@0~nX3%f*c
zdE4b-Vwbz7Wt4m)2-p(O0Kc=b*u9T@p;AfgOH`GCwsW->enwn)5LJ4UA%J6;Z9GZw
zf34&1{Pj*&jFRGwDfhR_Bxwceo=L8k`Cpenq28(YW)k;Q(@K!qcXY}VXS7;|$!m}w
zr?(~^7YDx?dHr%YOP_(!d67A<Os<@VyLinL`1;$-nThNbC7xGXb8Dm6H6$gKh*q#*
zOBA@C+O^!$y9udIQ-HdYwfWLOL*E%n2Q5SF2P+V_I#85sA2wqRoNp7Q!M)tUAN&lb
z69ss*d%j`vsxl5g9NLHP_%gNBP>WcxS&$WNI)(E&89P7PMbwpDkZ~1z7-e72XPyd3
z$2EhyWV92c8;y?vu@|qNuDoF76O6LW;8Efv=GsT-tOX883mAB3T4+tOL*C^yMxv;3
z7Dp+~pk7Yuw4Aa$JE8k3MR849$h#KK*Zu%mxE2{{Tg7dSXkk9=CNv+u)aAaXa_6f-
znn!_dP2psQN=JYrENrlkeGrMaYtSPg{>b1l@$Du*WcqE5k}pz`#{>ag(qha<T5j;Q
zxPXhB`SQ5mdc@VilbILC4A!tKs*Ef1TjQS%nhM;RDn%GWh!~#Fw|CN9&jn6Sj%IH6
z*fru!ms_o_0QvE`(}Y*$A$aZQr!KCO{5^}wj35y~#_QBQI|SEL%rpx4ax$1X#{E7M
zcbcf_jZYe2(dr3a+h<eIvvr#H4wnMWYqThUcVCb(u)3TH%h34}t?pWW`m3w|{R^qj
zzJI8k?kz+;+DWA%h@miyAyM<rPrtX62f+5-N*GK1_*Cu1%r{=nkhj#4r#4*U{!me5
zfJyp(^c&kXegVDuLF$c9a9zC`C%*@G9!~R&VBI(uT0|_0b~>qW0eo41sMZ^Z=r=I2
zUE_7rW?<NLgPQ7D41;$@_n(C2C+R<U;!={9Tg$3C^k@&<V`VizOCCru(?TW}LZx|c
zbr|8BKDguL&5BJmX>b!sN^G>8+*D&jd{}zvQr1gd&$6Z>#{}Lb=nO}24<k5Q#prR#
zwsGfOniU(6fs^~zrB}ENfLAD#V6t}&<J<T+oWwr2?<nnzkYBb&pEkWmK-s12964~v
zsC!2TQsEly&^8+vxNymi=!-ZhY5`rnI|77DFTu&L^dIU|ajy^R7Oy>lFcpF<{WR)s
zCCp);vBjh(0Bb_kTe^&<5GVw+3@e2vSQmQ$xJ<oaXhx7DSQ;I`cKOVn|5|+_ylc&4
z3KKJ6^*#Oe`75J&euyYl?=fHT@KYqhl=qD6DzIO$+!(iJ1AZgeW>Ig|-%7SWK06Pi
zXfsY-%@%pOR|!@q?}Qt#(rAyF^57nW{rr`v8K|5o#WxesPGRbayMe`_hxe-~$RWRV
zj;;I--UT+-A47<ox+g96c~g6NpraqmL1ja&J*LLf$s^sy*3<9tz*WjbvM?c?I$gWj
z*rzBsu{oG}YM((sMsFSp-hDK&>*G7gUKxZ^Us&mNa)`4+d>adfdK}HRo35y|7*aQt
zs}>g*?VxOS?iXEgnmtWkZ(vy`-%d%j5%_65J+~~hb`Mu6vZLeZjx|^{Ik%4}Xdx^(
zHCOh&@T)UP5co=2zEeEH!HsE@;YxJN9(F{>iM)5#JvVo4c(R>qgWaug^N4D+HO_=3
zZ#OB9@KzTZxe^M|IDmR+25xQ)T`WW{HmGO6Oe)_{b)8&q?4)L@gY(^Q(FWhgf2a~u
z1P;_F@UWD8zvGCe?O6`ck2_%MbBZ*Y4=y3FCo8BsKi!A`+hjd(tqV&G@${(L?Q+)`
zD>8}tEE{5hhrbCZewlGEkWyW#Jc=vQz|C5<76R$moY&ISX@607;ggBx*~z1ZEr~2k
z{)M}xvspXOaZk<OT&S>e+#tRmacO!v?5Syg;j{BdPk&r*cBNb{;S(2A76xQqhLK!8
z)E}>u_vx@l-yUgt`N~d0u@y5$!(_QOFxKVSzAt0r#m(e8n(nYVd~~~NcXzVRsocrb
zgTKTM4!6h{xMS=1smEy+X9==ddUNNdD0V=)fUPxF!L^a65qS&KW2>t+5{7xMhd*~E
zt*QakOT{}Y&R!F~>@rfNm_8W|9AUC*RKBC2HsXo$=0CiC$4<4iZOM&zfAM^)cSB9A
z@H@10ajWEHa(6B*zqCQXQxOS)YS*=aixp$W<`-$Cc0MO8nxyM^hwgYsWbk0n9wioK
z_B#^KR)5vC`KbgKdiqB>?aB7_HP<l3**M)GCCv9*F0zjA+qwGrYwYJmS|Hl@J2$|u
zd6)r5_2GpnBJTmBol6__8U{oUiswKoL7QQ4Af1F{8~@D|@_r{MPS?6S0Mf9@#x@eo
zW;nF}794F;7gS_E{-}x7GJhgFW}TFp;hx=@uNM<S0z^t@+vnBNvnX2FlMfVJAX?7X
zCH|mzhL&^J<TZpAF+pDK;2T%P6j}-bYTXJC5#^zT43BIKB4chYXgGt*fieeU;S3v?
zUVa)f6(_Pg;yn8#+K;Ulu6_#^<%Z?7&_+I5=K?Eo&ZcSfXdRkxBtgZ;Dy?cc><T)m
zadp@6d?IpBII1j7uL}4u)_B!k7$$5XzB3{?68evri;Uu-a-j`tPUzPyU<#3H&o7<a
zfMSDaHA@Z9!#;c?tsaak(_7>6hAcm#jwl66_zoKbvrc0aS2R?gQexYfmN+!^zF=vy
z3eui^MZao^G$E7k@WL7%l_TK<od^w}5g&<-H64BMk~QwKOx}T-S~YO!G|;GFdNIAj
zwRmrN>1Ini%F4?xVMugaMh0Wd_g%({yF^8{z;sZJf6AwdiH4=tP}(O=58jRN?FHvF
z6v=;mX~FR@@=7&oq=m_Jg1=@e4)4rx4Q8J0)!z#acS$V~5_)*pdihR%udvZbyXpP4
ziZP77iNttec*lZeh@DX(2#Ff8P0qKfcTV(@yc8i|+$*dFs>3`b1WyQzhd&u6#YlV;
zY57)|{@(raONH46gKeO-@;CCn6fIWipn?km(3Pa^l%DT?Z!3}c)!k_#B5hcxMyb|L
z5+sJ9#Y4Udp;YWz<i_nhqY~?pLqEnki{?QTl(q9~AB_yb6&fCDw~)!Qafw8=h}ye-
zz9mr<qrP6|#HE+(FM{}xCN-a<RetC!8`8kb0;MuKs;NHA`iR@jjRNG>c$I8rcxGy+
zZ(GeV^UnxZ{Z_A6Ai4Dmv3O_EdMBvJ6E>Z<Co<xWpux|rLRMuo>MXeuU$*bSlJnBk
zA5^Ny<473-byUlHS%nLtD?$Ux5zYIqn&}l$Oc$wDI*K5QVUl8M>iS4u^#-JG@?_^;
zM|lq!p$pO6K0JeXs!^8>oKgWkWGc%puiTVv2CZ#%l}cDloNfa=T~r3z9Qws7QwBQe
zR8SPaKD;IQ#>?Qf#`XMHJKL=7?SfBmckrZdt@HY9%scLuag<U?RJ)93S$9YTZwn>e
z;Al@=)t#&DU-)D`zlWrB`Cvn3!Y|P<I+O{Naz$>Fh`9G3y3)OaTskl=r`sKU#{8d)
z*#E56UAzdZ`d)=^&7nQ4vP*v~9Inn9MavLxqU!%;5J^U<)WRNkDVn=(p2>ur>5(&~
zF#A1%(r|iK0&;T&84x?nFx19g2y3P1NYC^Lan1-T!Ul*SNfWGF$mYf}H}8k_Y%$GV
z*%R^G#F^ASkr366+yRj1FdP$#aC6*1>}8+p4bG8tsMDe7RUfa_7$2U2q!l@V9+p>m
zc4rtKDu+w?x6c?Tl(&024*b?4ov}vXZTB_tlDi4B#@Au%ZxWrK<%3`iLzTV#9eeCs
zm$Zp}_N^0}8hn!~52|Vf(p-5960NHr=S_6ziN4L7%4tyN7Tn6OjeF*}kxZG6sww_c
z9-vWO(YpFxW+F`7v#p28Q^d`w7m}6d&zNmY^+Y|JGeCO`6TSa=iA5Z~z{@Kgss6G(
z!d!rMa+E$BZaKjF8tq`XYB{7-v|YT4jP~@wOuR_FJyaa${BDVOp)$E9%z0M{WvI(d
z!=kpTZJA|*bB=zMQ8j26yVJyzmh5uTweoVyYv?kc)Z)O~fW(biymO~*-uOH%Pk|Ty
zfE9()(v;{e_ZThTWqg~Em~vztlE=Y0X&yk|adG7KO3{v6`y6|T(w_6{XkkoUE}oK6
z@|`qCoVlx4uWs|D$=rF~b*FWcVrC|pV2gQM4l!w$zhTM$xFu7Jl-}Wahsq7hx(7ai
zn{8eSYy=wd`RH(wh3XcL*azLOM>^=WbGn`!)UNANVcxjK`<waJq))t9(Uq#ZU*PHj
zJKeJSl^(=fZDB9me(F1a?`eyjxu60C)kHx#w&jhrv`Q*L>a3Dj9>gf`xPd_Dt`yBq
zK@)b;u~GDYkjq!!D{PEk#<x67Ok-#*BtHrTgsuh|M+Xj^56-V0x3Qumb#8tj<S=u+
zM2XU1eE^QFR9=qY$MMW9xR!@DOV_(N?066(YPAXG*>nYebX+yrgEu31X66SAUxGPc
z!F9SdjwS0CdkeyDg`C^PRZckl(A9O)g!sW+nhdcXu0VYrcfZQfw+GbdK%Qm%(>nAP
z5pqcj2YEHExjc04b;doLXx7P*_jRW-Mlh!iEsv<x#r%RclW!t)*01KJ39rblIcX={
z-NW>Yyj?3RWHdXMK5(a5dJtbYvVlG5u}|gPigIKMQEdfdmyiqwtGk+qPv`5zt3e!E
ze_u?eLK{ftFY$K=mT|l7vM_}dUF&sk97h3_XdItcM-sV9mrG@61EQix!1zADG^?*d
zZ}7T6S76F5)sl4w;o&%*#EQ`DjSJi&a;=bkYt7qSdeYpm@{iRIn#jx_9a*QUUN>@X
ztr)#2EtfOYZqTx?8?#nb>sz!l#kfSsrNO4Ea^xd3*YH$`S-sidmDe^SGZ0v?MOInw
zQm<)_?3f;k*V&`xQi~;Z5)X!pWe=?GDkh2kO1@cRk5=PG%8U;a`OAn*MP=9tYN|tH
zjNp-Y%cAwUs@~6&R5dLg)d4ScYb>Xa;()yWPg$u2th}rb;X>cM?rWE45#7tU7~fc^
zLS$a4k}tm7moZviI&`!$SJ^6AV8IOQ{BqqiM_60o3iMtkIv{;58HRL=oR$4M$^O3_
zXbDH3GNs<i#<0rf>g0yX^+SIO8=G@m6s#JX(R1bUbLQ#TF`b?{+8P;L5gNzSg>xDR
z2U95X=RKxee|*T97k{@5`Rb_DGxkm)sxA&UIM&Giqu4kMfAc?X(0D2gpU7dzxf3B$
zs+TX9?ou=_wWnZSf0#l%OCIQ<T5gSXNFB0rk7~wQ=0ELpSH-Nd6W*@}e@*mMPQ8ue
z1AKO|ZJpk^*P2@8o=hcw0KW(Wpx93=30vxp#q*%|{M+16Tw#I%m#=jMmKTUc8|k7Q
z`ZKoyX7vjT4Q;Li%qqPc60oQuSmWAT?Y$%bKICb9cDs1B30R{qiBrnd3k^7uXdGHC
zfJreYkXOk3y+%Oa4WK7gfC*n$Evu(kC{R>sxeRBmW-hrLW58|pwU?5q0}?dD;K$^G
z=Ti!ZRUupS3B(me#`orEHO&CiEqy$x#_mhRm+_h3N9m-D0Cn{onH(6?65*?F$Tptv
z70(RwM8-+RCiY}gbUKfs@wR+el|btj0V+do=gluxU?ytBsKK(u{DWZ;-5|v~C8LS^
zC20kv>6Z8L&Uc3n!A)HJy74;9TC=oNEYWLTZ${RLG3=Q^2CM66&Lplyo~j0ci7k#j
ziI0<c^xkOD7&=2NeBb=fY=stH3P=0&CTFqea$KR4abFOTD_k>EzgzH?v=;808FpTe
zv#bt>RxdBN&YBn?`qVTPOzBF#-_4s50B48TjrqnMO5Ikyn$TKxSFDiAxU_T)SEJM*
zncdM?{z5%dO2rDuroV3X7n<uHpkEwmA12yoCF0EMRf~hOM)J7Fsn2n~(suq}q<%+v
zBVy)B%OE(KN`NaiUa?HsgXvc@n||p{lv%%BCO#}vlNXL6gC`CytKwoZuq}RUHAH{D
zqxJTr*oses+U245UCZUv54D5Ny#BiKQRSSS0?3HRq@B-2cEyWhRC!o<*f<b=f53We
zc0L^d<^cov#WqQTVM=LBqp1c9VYSh?r!H>&N1X}FT!A;{(j4H#ds=6%cZUKXy<>>I
z(5N^cd6TBdQOa`6fT7d?6AP7Nn%C5_?CBlM(3sEAmrO8~PBINKSK|@6Fmu<GtK6&*
zhmKwoG+WIuFpKMCVnW##;iP<_anXfm+$|q0mL@&nllEMhH(8EUdbBx~#iAe3WX=%`
zbKy={?|YB{lI$kWtr!bVy=VGe()mH=D^g4#D7KkB7J2JR4R6tlmI&}oDv>(kIq;D#
z6(P{escJ9%hDC6$>c9o=)xx#cGZIdd2+>(|cx#a>sNL}L2w8N#Sy6(iN5U`+)}%W#
z@2^kBQ>yEH75&DPr6?y{J!r3DMicDDoy$$05KzN8<Xe5RoBfMc;-B6pp7V{@8~3Ql
z5>-zI`%#PLw=N{DsdcynjA=7hqx#Wm`*`L;K5%|ATVBu^T|<ElPLa@NvPX@Df5?n|
z5&iC4JT6%ZbX7j?`R!>@2gcA>6~BK4AT{eVjn1dc8X72}bH+86+!{I<T1b&LP%yYW
z7g$Me+keMt1F<nFkCU#KS$Xg_2SlVzd8BQR4~2_5xogtE*YU88LNlZKZt4sj#=_m&
zW<CiXt0sHm1Lx1_CeW{Q4W>4WoKJ?y=HO=@xf~gjGXw=>Sy$h%EXPvR?2_%lU5J@q
zZYhXYe}a6lrqsi1d8Zmp89=ojr<>^kwtZuzRhHjrBU$jxXlorM`GhwvKXTX$o!)z7
zUti7YqN!Kj%L()$x2empwNYSgkm!835o(J%k`rh6P<M_;1#xj}Emg>}T`Y(&{@_|_
z+T9y+=bS?b1Usb`3-J9T{>^l?`faCz7P0-R&y1J;R$lX)y+NpAO6Cm1%k*vUg2(9W
z#kSEN(WgE|K`|HSMn|d@z%|tCUtSe-cyrA_7sU&cbk?<PQnLXNtf^^${KkXM9955R
zQh5t2E{^CFi$Qy@#2A>@ytwR^U;Tb?DqZg}G{ZCk9woW6vpL73J~}hS)P}dskq*Z}
zfzpkz<~fXonu+r@*&q*D#No>FRB?;cb+JqR9b{mimMrZ7W9b}t3!b{33**_(G*nmT
z0Yq{cgGNY)?MfG2o;(hbmRV-0BVaJm9yu~~*Z=@>J~lU93=_(JRpdSPnNg2{i**OJ
z2ERqgJm{*7n^ei@j9?z5Gzmd<wRW*s`$|Fk)GSBvb`<NdAdr~7q2;%lQh5}Px8Wg)
z;dmMgKNuv3yToa(NmVyC<MBpkBbocmdwV^72+TeBNkn1wWC>kfSyL3}-;KC@{BImL
zR-rBPDAozFo+uwfsgz4I&YK3UJOr?HiZ!B{UG%`@Dm8<wv~b~EBEhi^9@lADC5PM~
z@)qrPnb`o$7H~k_ty@!}LF0trt#MA-L$pI<L1(HaR_}GI=MxP%YWfOHhslE2xO_tN
zbT-YE_|_*?_sx3=BQqeIFv4W}O&`n4YUe#YntjgryTQ!?emlgeTNrfdOBU?b-#`aG
zNKK5FfZ@{Fs&aT#WFnK+wNX{>kU{f2GoZmyUF6mmn7a55F_fd>QM)42-<qCzSD~%|
z9F#+YS3r1<sv!Vmx#Ll?okbejZufd0KUWzMQdloh8MY8GY4LQFmr{>>zpUJtRAeCh
zfVa10BJV(y8dp=Lep^b_%4Lyjq5^zPl#{HU!O`d_spm;Zukpd4r$yk3DQq&aX#>U;
zUlHP#5`O4X*Dy0kF6a@h?p0;Ou@~4rS<|#KT)hMh_F()w$viMMbNR;{rPL&3u$8EV
z^BYaBrf~+AZAoQ9X_22yuxyD8W~d~cqD)oi6;22Yd9ztvt?pe7&E8K+H$$w@k3R}q
znA}tn?SHhRK4cl#{II+NLC?4>RH34{&RSuAU#nrcXG0aT)8B#mR+pVKI9S&A5e}dt
z2)}SpApk0uby8-OOY-6BtULzJZ>$FJPfOOjtwFD<4G9o4eGU_)t00;2#Yv72akR$4
zOJnO>TS5$FLlF=`j>EHF^{+KHFm7lWl@80Hw)I&*F@ps6jTa*VzAFgFD(pqH*eaJ{
zbZ=S!++ttn!$)(w*FoQU1cCFH9}7y@RvI(3smo<jpN&}h;BqbZ(3IEWZlM>akM@D=
z9fb|pjAM%-6^aXDSUW$fy30OOzgDOpVsMTK;^*H=`@$ag0OcY!HuyEEB18bGYl}fY
z)up;fg%f=Ra?{kRrF@vMvS+$XXL>zThcEgxoKYkCzs@Q7GY|-s$_kyTaffE!2_RB<
z50Xi@bcPn*y8gw6FfK6qHJXAReh<>OF9uf7SobV+o1dE~bbB<+C=9-0u`PA_3wsk1
z5#Dikd{-@8;`EI9{5n6(Cd^qv^AOkuXb(&gOsKoDCbV0*vXwSbq%m3O^Lm%;wURmt
zW{u!ch$8*6e?{z{djWLTK}1AYWh@I?8dI?oF_0e~iu+1zBd`{CE-N0iZkb<}r+<s9
z3N9MA-7>C6fNZNn>WvneOtQmpMbjBOustY;p=LpnWcpZ7oTo0X&h?BQC_|>D_=C;Z
zpuh8Xw+6=2j!dIw#A^)gN^}BHMQ-V_yn+Jaf$o)26&CK_YPX8t;x(n6zAz^{+uEt?
z)d>dY-|n)V^&Yuue?Rmn&uP5N(>z0a-7K7dt;%1nG#NA4kd6O?&!ri9n3|Ui2GLr^
zE<VSgQ#rUxH=MEG=am0$EQY-Uru{i?J(tNHJQ(pT<ni!F%eoW7(Sp=yJjO1}8||?v
zQ0NM@j9Ol~H#G;J9yOHCnTrAY*v%#sqCQUN8>OPMR9(eP)Cl@h0%Cjl@V5rc1{el5
z>rO!!#O;((vGF7{_@Pvgp4FL=iHgK)3zbQgpU6y6q$D*BMPD9sI6ymHz%FSG`Yun?
z)o+M5bF;<!L#mJRCUVIc%xW5LNWage!Ix@`#plpq83>o$GZ@R$JVc%ikeR}t7_+%*
zBR@80=NRB^2Z(@JV0jGP9^y8Xhs1m%RJ+nJkQgUhXK=nXMP$u4YLreDz77=k0(+eE
z+thXhl_0&>XxY-{IrA*?9plW%sR@Ww(|kEs(<*E<sX5(ub|9m;{yjST%;C-3^MH69
zKXkZGP9~c_XGLw{l|)iIaJ~Iv<RsoaWvO{~xW<r54s0%_e#yyI$D4p!;RKF|)Wu<w
zQm=U-<XzzDJ~-c4H<CAlFS-9-lJQJd@FJ3uj<f1&Svz|qxASkh4Qa(-ip{D|h+b|o
z9%t)zD_{bVtG{lPTYbCPp(NjH{o>zHuC4p&Giu-v9_ObC>N>mG*1qAU5WQy6-4&ky
z$JuuWG?i_8kEkFh;sg+tW}#P+-cbZ8(wlT?(m{F+h=2@LX+c^50qF_7Lo5^l=_T|i
zy+=wCNC<p~nYs7ftKRp#-+v^)KKrb;SNW~AyQf6bg?^_cBnPC^>?(va20$!BEREZ-
z6q0quFoT;Y{+&i=`kdBA348d=Ow<D!5jR`PaMD0h5xW^)l$K>$`sY#pG56-G-Z-1i
z%s0M@gKx+0S+p)DA*CcYi>HShnkj{<8Y$JIRe*D6#iX~k6iZ;cbvymxW85Dz^jcS8
z$@DbX)@r2t>kn+O7@g8Wjv4?`#FMW90EC>~XQ^nge%JU#CR{Ya)ku8JEVA0ML3Qt6
zV^!yu)*;MX7Fm%Pla1K}&EL_%g^iSB#45n`6s5t=2<f|(Tly%d#KTLIT|?Pjsa2uc
zPFoXCih6byz^e|kbgjFiR<KFnX!7mdD<TPL>SOYI>78{uXl}z41DnlzE?>b-g5i4{
z%-#?zJC(;n*itfL(v>H;NDWL_JYPe&xyKqwvs-i?mv|#N<2)zP1^8k0xL@YsdGa<g
zp}GMZs!#cozkQ*mL2|v2kbY!{k$3;<w5C^MwUOi7V&HARc&iLrTCRLu<h7Vtt#11p
z&D5OOEUnKtp~_PeB5sQX6h4o46uS*+xP6L4DBrr)-ch2nK3&U8k!)@^-%lY_e##Zj
z<@Bz}xP#N;E4R1#xLmngYpnW~#i%6hrn*e=+nUOXxmJ!G8-O+At7#53fGVYnd6&-J
z37?79s!%2s`Mvkekal!HRg#Q80h*E5=TuxC$-ZrgrWd6{F_|NjX$j4xxI3zhQd(|R
z1-WEgwpxXOd9grAx4%qS52dnQe^a?ZH}<H1ctC~<ja}Hy2Wp=SlCIK=Cnin<scMc_
z211rwTSGZK9Or~eo`Ly!=S`+*MxEbS0~m*_MpSGQ#Qm9s3n2;b{eYa<Wa2zS&eg<;
zn*qh$)%mq^{kIS{Y9-+b%m`ueP=S%5z_%<WZgN;pCcpfBL3mW=M>dsa)53V$bYj#I
zQ<Q<!)SD!=axo=~(0EnOtHRL2^7*O9OHVi$8_y9Izp%8!0Q}vt-nf73P(I}o-EN`<
z9BCFs2PDfM3QOo~6*7z5ZRZ8DMKc~Esx82=?if~6cXO4%T~ceyy^*q;m}E4au8jZP
zRR;is1gdLJgJdl}cunDOKnS)pcCMYL-+%^h^3Al<RK=xezu9(v5NgsoqBr@NdU@JC
z9JFMpQnUotQjsOCbOu5`(jB~*C}1@-<KFc_GmqxGXjKoehK7<2vT~XmVwiej14vSM
zbT~*@BaY@*jmvp*0DD<UHx*RYGP3i^dt7eYru{s*caWm<1|30EXPhO0WzkI7$lT)e
z0E(ybqvT{zf~918uEONx6Z*rZxDTX)h<pxRpsPmKz$r)Uf!5j=+sy>KJD=~>q}#^!
zaBMlg_S}i3R40Uefa(utE;o<CQCv<RIyH8x-G=$OD<VIz?KPuaVn%zh1r+oQF7WgK
zMwLZgEKL?Q?KZ(xs}wUn?jKll8Rv2sm|V*4$LQ#X8kqxo6ujH0vH^nHn~U7oRLjJf
zJ{=afs5qVI06__ZYMwEK!))U2Xe5|+9st!9jNd0GTxc|`2%WQ5IF!zJm`MtJi_)U1
z)1TJLEAV!mc{{Enpe~?iA+towFWJtmFt%CcQaRkw%<to_TrvtINo10CCf2vo(KSz}
z3|o?F$4*>aZ)l<JR1V}M#Z+<sw~!^!_-7AsATMmmNoY>rw688@N7@x>vvp0?w0v!N
zmn<dzW1HE32#a}H2FC6ojaKt#i%*9oGqKbmC{1c3=`TBSImuN~E*Y9xgu0vSH77*d
zH)LF9O>S^-K0UxIVle)0o|aa`cg?hYx53v`RnnHT9Xj+lG?jN_+?ex?RsT>UrAwu+
zZiQTY^>oRL#Vb?d5M_T*!HmCpOHwiAh>VE^BCWZ}lCoVXJav*<jmaQM5Qxc}JZPwC
zy<LG^j6%Hrc-XBbSvGE`cP_f>$S8#&_l9dmrMczGlp<R8<v=BA{oust2QK%exF!+a
z8lltcdjg6x^7o-1W>ZA8oSnm^Ge2x)cG7s8D=xmXWBpp0hFi)SiEy7i2EZ6-ZG^Yh
z1%vSbH=XuJ?bpM>vlE1NQoGsQ*YLsu?c&wMwZJ{YCt0!UTpe6a*SCfn${oF;!K73k
ziL7G=K9i`jM(|j&aj~2Eba7vj$ID6KB!W1tDFTdy7!_ku1utDZjgdAl7T*Lwfk9^j
zkM(`aK5tFp5(y1X#~qc3nrjUYc`J6Yz*I&Gf~b>jhh6)TVkO$*S24kI=P9adz6c|y
zSf|2%i*|q%cs`AGNOE9mx<)MP5O98mPu3HAS{G*@I?G$E_a;(vkpTe+wK&ls3=9;7
zQ*VVi%u3eOi}v_2miTF;ts*?M)KS;$nbX58EvUabXw{A?i+HQ=NfwwX*QDVTSc%1p
ztpFITXtCxgA7B=<8BPV@vyCTb?tj4F(~u-g6%MM3C*B4lGaWq7;a`5|gv*tj+ZYfC
zNV)w9^3CdrA#ufYFY8jJ@fOfu+o@budDnZ5$e0@hsg~aSSz7Iy0ni{2OPCz=_{$!r
z-Th*{d~;(}S5!>A_^&Wn_vV-Mm^;;<Pd921D?c`8eR(vv5_?9CJLryKf=9ixO6DWW
zx7=c6sril4nBqr8KGBv$TgmS&mfDSh7Q5N=&Y=<DyT_XR^N7T-^$3e^qoC{4$L4b6
zO+!Dc+GfX}QDF=lQbH=4{|)%gZaupJ0OKS~tJ$?#uTHORDAN9!6#<G0t?Y#{y;Bm$
zFWyg9S5}GV;v>}i3>X(wm%3uR6X8$%dqGq#)Lzra(8n5(huUj1H@k!fNWV*|guI~J
z$DQSFN$-8a4?7CCHgDl4tSpYoQrZ3hQ~>@HfNuH7DNFA~%KW21xl4Wl$*OQ#+xZ!K
zE2;^=D{EEUZIzAX`Wvsl!XAQ?>b$xRnEDBpi?~&05%Km`#2hO<d|?c%;kGTbuFRY6
zDo?wcudZqMahMKuCRwrio5?lK@6XCKo$P?YL*^A`^lpitm&8$CwpE~kaR2*?f5FB7
zbH{5B02X6*jRvUyX0hQH=Ei!zDElbGs43bWpuj|`XBNf&cUNsT^BJZc1}pZ(H<5|Z
z-q{f-bYmSleN)4w8kj~<<J?NA_t`bqDrE;n=DoYOXrTf7omVR5F2(nRT%svF($V$x
zSMit?H=+Gr8qQsRWuBdMisRtrhkrIbWtYDraaQNn*V!jM)31L-Pw}_15_!M5G7b=v
z)dMU)ak6IE6;49G+8-Nexa0s0faI*$FbFx6cs@T+gOd}3!mqENiJ*~CcADZS3ue`S
zsqY%4Z`q?*{fAeU-Va1H&rZ|O`nt;58+`mU{Ku5fGj7ebY+9e-e~wU)1BD>Z3sr{H
znCsFw=sflj`70XJ)O-j1;#E@6p4v^hy8z}SK#fML-b0TTAvmD8Xv{sfvoMpSmZmI`
z$~uW;3u$a;8%Z+i8@niS_wA#KE>00;R<l{X#Sq_{b+d(+p{h3kK-j-+M$Usm7tP5f
zC7myhz5@H6(g5K(cz{NZ4>C>)*R!5cy665Wj4X7YL=UVX7v1vax)&neu>Q3q>-ob^
zRh`}4rnyhqjqBB@bmtgEi|~R`w$cYzyIbZLHc_*5Toh`*rDkuUe6Xt87~+(5B8#Mi
zvlFvqaL%!MIii*T^~D-53R2Vw6^92fj(jh0b`Bv9QYSMcgfP4x+kX<S`kkeql<I1t
zroUt-5c0sYWnI!uHTvq)2H=(D@<|Y5r{c86f3qOe97NNY>z3x!>)-vW^*K9@B+=`e
zq%^yjTtg=fGk_w=o2H@><x~`#YSKG?o)~SwA8Fve9^vSh{GpXFXCtiFT2fxn924WJ
zR4;=Z-V$4spaJ}c?e-(-#JoV$*dy4Ld7;{4@5dS$2UV8!RhzV<{7>Dt9deD@;2~Z=
z$chm#3IOz^3bBERURdQM+IGeNXiY;pKz5F$TSLZYNbf%m>BA2_%ScT;Y31s@^%%k8
zs-gT<nX`8{&7Jk+U31A9@-NE5%GnFrdL)XaN_}tD9m@6e(in5T;0UyNlYl!AS#TE-
z*fox-9$u;{W-j&B-fp#|4bLn|2u?>75@#$m94ZQ0A3dsApMu$z?H>oKsBHvHl|r>k
zi+~6*>4SmOht)vT3lms8FNMkF^zXN8#~0G4u!}&6_>LFhUsA|(w^TY^dPpJB;iKoz
zfc+U(%TDSPfmZj2Bea+AaUvhqV0_46ql+y|E91?$nZW(8+_DDl39j-yxW+Mf?C5+6
z)*9kWOW<$<#Yl=TsxLzIS$IWqmL_h-it<#>%s7D$!{$7qT4(?$S7vusVZEMKp-`hj
zj-wU*zw5$pBx<(?g8$T7H4mMqH~vjg{^|#~deXsnI@a{N7rv(b<#)^^o0-%jMYOGb
zmOPEQz9;+mF-y&X>T~aHHqk=&S0C7dle9OAc(C^&fi__PG$bQ%QPFu{xiuDg_;67s
zzkZ~uH^qVFy!mn0_kukD0%i=*N)z)261AfxfB{Hv?hF0X0RPWU(u@F8tVY~%cl&PR
zanGa&KSEzwCQ2NYed3@<K6aQFrCsMd(k;xXa?V-H(jU)*uxC2EU^rRh^ca9J?lx98
zNxS>JgA=vlmX_zDw@H$*Vsyn|l&w7>P|xl8oO3^x0~4p;rwe#=>@6(3vDWc>X?EW?
zGRqB<>mJe2LZL&RqG##CzB4?)$0UvceanEf@3E)@7f7%pm!cN|kGzCaU-yE3bH1<I
zcI6zo0&6>4&7;ziafOOvOL3r%F4R6xfnx~ZG;r{HC^^3UfLjLk8dmNYAa9ZAzQh%k
z+%xipDvCTRmUhe_Agw35!B<x`-AUWUo+a8H9#8xq%Ki0|o!zN`D3z%%76mWv-mV@w
z4;b(Ks6nRh4j<s7oea<?CWLbAru~jnEWiY#<mp$<eiOXTmC*_{Jr6U#nA2wD853rq
z(G(YTo+1@1IDCJ1y8m5wZ1&ZY?sQ47&v=_lFU+|&@xak$Z&v-2j^Z?(>!TSd?Rwum
z`1-SF8jlauE>d_)_;_nL`Hs`JvMqf7esbC2CyxQ(GKbpdfG2PoRV#eA5#VFmiO<$c
zm~^-KPKti|F^T1|h8B6%ggF~3vY>#a?jL8tzy9&SG@vy0Tga3W`5zzm<0e`Do`R+F
zCSTxt{RF0GjhrmDmXZ?vON$AGEc~g@`CUWb&zm}7q{|5-aqB%lG}Znr-qNg;ODHG=
z)Gxts;?z&1!r^2wOi$0x4yJ#l$DeC!Hp*@Px!922#Oa?b_TTLcBA4^$VwFl0<Ud*L
zKkjTm_3_xi;#iY-_XVbwrQqryWnJ5U>8$*R%C?=|kv%zf+cN+5<NlqGj{p~ecI(Xd
z$C}ZDFS#||l7!+F_Xnl5y~9Qb;o_?AMITtn<P_?}nTsm9|C4bUa_@k#a38=w<HwKR
znVl%`tKK7rRYfSYa^$)2_gey_E##zUeePQ6_g3e})c~H!@rS?^cKJtibe4=03JUh#
z<owka_7B4R<(gkQK^0#J{lN$PLAFoGw!&M!_ou&Acj4#TlQVAqAn~K%|7?F>D1bpy
zlVp^BEGt(~i!Qlw|Guxk{^c2vwh1TvJI8)<di=Rr3W_}`1=e#gzVSa|e-K$<IbPmZ
z4yz^0bKg5=|7pFSQGn|*K&|#LhF{y!&sMl(=M1P?HFmv!^q08!GphVXLVGf(%<|sF
zpAq&KzpJi2nSVgQEU*3l6C@<+q_!w<gY3Vd;dfLqG6rmR84>S+`5DuG?K`>l6NJ+1
zSEv6DH-ApF|9!18ULaN&3nDi7|3vYAv`asI<7`WSx4I8Mo%-$_|BH_yV<#f!h&VEl
z{xv=N8M`$r0N!%3E&TSszw-a~TRF!Q5vwr|hW*5e`0304L?;R#0lcN9cK_|aBq@hh
zc3JnPs+3gkU$SXu6rzprNWc92d`atXqQ;n5@zehuamx70D}HM&yaIouCtxICu>ICD
zZB7wr{VHU!4{o}~d}Z^Mv03gSy?Wf5Wk~hq0`V&b#Yd3DT#4Hxm_5AHb&^@*(;m!E
z_0QntuXp*_9ZJ^8_8D#oY*e9}qT+!jH#{ch`B3)B;XXhY=knj_^}uzKECX5RcSV%<
zVpD8%D?+|qe%T?y;*3beHr%ewDwE0c1;$^P@;tUuG=*z*XOe50o*Nln{}@oh=Rz(q
zc%5XuFpDutht`Akv}r<6#6?fp+}&V3Zqy%KMlv}kwmol9Unh3V0{@FO>VK*b<*o7p
ze>``%q<DMFGrQXkMVmA8n*3_ldCJ7g9C_ZY>sUQikB}oLksrY_v{yx0(O|xP`9qO#
zv8L<kGAD-`3!qMH{sIj&{{`=Xv{r}L=PQyxwd&*ZxQiW?!b9f`jLi9!#--A-wH*jy
z5d~NOVmuL;Si#uUS|>pxqLpu^atj<0^w6%3d#wv@X4d&)>w)y=5eGXtiSJW_|Epop
zgvzne#@MSyAk~zzuqI1~!))rS_(5i2Lsp&H!SRxdXBA5{j05n(h7GDTXzmUUeNCwX
zjvC_RWW?Vc*JWonlv&ru@OcV-25UZc@gMcPcKs#rj8VzO{+BPxTn8lN>LqXWsQ+~v
z-zjvt?)7OgT2iFCo#bZ5mqU1#k1di4ZuO>HJCoFmzN9rm{g_z`421c`^bYw+*ukK`
zQ)+fzAr&NcVdw$kXnzl_$lc_jFcPWD73Ai+d_Qrs{-Z|Qoe9yE_=H`vz2cIWlWg$P
zRdTc8WF*K{27hoP(wi%m?e>S^uO(6q9C{7}J6XtIHl?~zt|d5PGE|sPce?bky9za`
z3{Z|ax)jx4@qGVbNv>&Kzwq&8`0AHpa^o%HbbX{p+><vdw_2m5!Au*g2kB6gB(558
zHM8npB4%Wo4MoUP4Mgu&&!tn~IQh$t8ii*tz%a5^Gi|<ezi7ix9&UFbq<qTnxk{Ex
z1pNvpckt`-_St?7vEy8;*M&o5g}+mKsBCx^{kwq*Il5@7SE)mT9Z903O}c!k{K4XX
z1O5h{T4)|}+eG%<kU<Q}9bHvr{8>VF#ec7ulX1Rjt?nb`L>2iO1_A_x^qgaQJi5&V
zzItxybFNW7G~wOvC&eU?@8T9A(#hqgO0+6G?j!KfYGU$WA1xnYJ#E*^iq*N?BSvTZ
z-LwsD52w?{k_ixkT1Ao3`fz-1$gu1@%&>F}Z*g(tc*6OXb4%9*=BuU^ZHb5per@Ty
zWxZR|EK%ozY{{BC7mo4xaTLb}lcCOFneW|g=62I4dYVOu1B#4D#J`@mr46;D3ydB^
z#f|Qky{-Ow#&&+VSax9npNl7FY%@bM(r|M_40yDGmr>Z(XgBxp;#J}DhEG)>|D2x@
zMMaC|X3{@YwZOfYvBH?)KX#iL2e?vt@r}utL%rNvi)RYRfL0Y-M9YJ+dNex5Jq7Dz
zc!+1p$AD?Dvu0Wr@Xj8JfM9cAx<-?`YVZ|>W$&_$-F>6*mbwk1TgC^OTqpO=-6&()
z6pkSZLZBd76WNS=3~1M?YrWcRll&&xIzxWYvf0)3T-VW~%jHOSV2%j|p|2KQlH$Hy
z4*`HL=w-|HR)Tl$@QU{$zvjcfs!Y#NigEg|AAN1!o8`j|RaB$-W?4<sva<nw=Ll4G
ze_BVNk~>~uHI{e#LVtNr_me8yS){<w(P`PUKtyZu41q~IJiD~>FFu5wc1Wh=lGE%g
zla%4w;wr+gIfJrlnJn|AS=aq!Z4AB|rIMP(<wPP^VVS5LlrLt!eHYpx@)}8+$Hpr-
zdi$D5x;3+-EE{rwbg7Y=x`SJkwN4uK1_7Ut?mx%cr+p~fe%;3k+Nf4verl+IbGCQ5
zL+z!mWy0M3_S-&IrL#qter}jP&mW7<n5HP`F=S<-r_2S;h<>^0lz7%DyO9!Gdv~^$
zM|W$m<23ECN37GGZs90KcTC`U`jx}80FV}#mI7whUB?JyrDsGxbVp6`h0<<Gf4jBV
z>OyaoKaGPhT0Ur}qVeXZ;TCy)%udlnWlQH+x|yYW_^`tHdF_~W@tjG!O;q6GC1<gJ
zwje}rptAJ2S4@tzc-Av^6-Mhc+t}5`)clJ@*Mq5^ii{JUdbFoEG!u@tv|CLrs0I1-
zFBs5p)!EP2(P+>(EI6%e<gN@fIWA%g-{hO39)*JyCy!-k3^pYDw{!v!EWC~-Uq|v{
zdz^61SZ<7@DE^|CMgszb(qi!oR?}+&CMS7Upy7cw&qb!cIvpx`5k4PeTIv9ZZr_Uj
z9rI4=Qc;MUv6WZK?0=eI^;2i8y75_1hSkjNbNrN(vPYTQiBHtJy=DB*QtPR-azh50
zWc4#j%gqxtv`I?_4S=Z}Go+P}JD(o)k^xQDX|lN`hvAmVAoy5N>M=(+Wyh@Y@090|
z#dW258?rPaU2h&$oLa!HAJfx0j#Eu-N=#Qqf<&%JMRxT$*e2>aJ==RiLBD{$SZ`3Q
zF(IEyM!U}K^vxqjJDQN427lRKKY$r?pAz+~JWp%!A#rk8M%`dMFTI5p&bdRIZf!x$
zpt#`prR$|$A6ujoy!<ZOFx)uKz|vfFf(la?#gL0An?)X|Yt<)WH>3r$ES7r=K6hkh
zSUNlJOr_TXokRisY(nGPiAmC<pV-nT);HCSzHOpVii6^TNC9Pfs@QA35c#Ct&1L(^
zr<?@(FH+$xbcwA#oxpe!iY8a5gcNj3z9-x=aez<zP|1Qih(nF*u_z6oQ2(Mv|MATG
z`xhZ;=fjHbpT2f}kK2U(f;NI`s+NeZ-+hu<-OKs1n$GU?+gTD3r4drW6v4QsUYkDf
z529b+gC#=KM%9YOgdfn^QLAn!W0lz~?-S~KDpUf8)g&;Bx(JPUYj>4E1429AD23MZ
zLlaH@>v6D6Vu6c3OKQfOC(`YGHld3rpgFSGJv>bnmkmMO;Sj7abZTqc$~CY1G)2`e
zZ9qbtnm(6}yUL*$))1HPYxxXWVuN8fMcP0e3B~SHemjs!O$)SW-JuD20M%tKZ1x${
zLK5F%!SaM#{Z6pKM!ao65)7i{dARQ`tfwB$F15bZ#;lQeN*UAXv&B5JD@e20?BlGx
zgrhKNP`iJiX+ixMC`W^07CF>-H2at#YM<Zd|B%P~iCZ(EO@fVX&ox?$+BN67eBeBO
zVi$GQt|xb~9kg%0S*H5<cvF`P(-iOMypJLW#(XeP>qR9nmR;`jGwKC~(G;m`0$7b+
z8g{NN{rYz3J-3Q<-5Ju7$>DQ6L2sT$Q%c0uPVJOGlC}@Py;fcuZGIw&AXc_Yi?>6z
z@2Ju?!JKKi0XPBfT9hdbLVH}!r7k0{?TYBtk|{Zqmleitl#O!2YqGY6=44vNl@(By
z=eHOMk7CP!cHeeUS`QkRgJuwxCZVu7FB#5_)e6+Nc#Gzm-mzh&P<v5?1wSU*S0g)x
zc?w!vy({}K)_pdZa|)NfYM1w6t4E&JfP2~!o<4C9)q*XNp#O6K$GE`es`_S>7mc~j
z{#I;NQeDC03!X)_$t`4NZhUvj@T^gH)o0g`92=;6+dE-gs6W_2FxfKi08T~j4X`V`
zvyRpjFeu1%B!w|vr6x9TSt!>xwWxdg(VTnP)S2uR!a8ic)=u~c{A4o_XE9QXfGh?s
zcl3i_aXsjuZoo_!O$PX9d^k!E0S6mSEz`JE^lzV@wuT)gLAbd$JRbY=b-w93yFtbH
zLoMUy)(QIVedEHhPN2bd&sfu;nL&&K|3xm~Pvzz*!m#VpMI)|?u!gUx^X$Dx#~U+b
z8T9R#ciV=_Nc8m6)1l7qURw@Jz8#n2$O#KGBgr|Na0!$UOp(JF2D%=5=^Sf&*sDtp
z_gpK~>NrWQ8Y!=wCUr7-+Q4Ohj9#r?9(5SqV#!p;_bwj~^lfZx^!a$%njR5^Quwz_
z#4xc-<cvfg#j_1NZg<u&GPn8@YI9j-JU53Ul!bG?ph0x1D<6wLOq*g8%3M{ZCub}_
zlU0`GC~O8_nKB||(b?>Ntr-m~Ga;}uWW3d05nnH&f{agkP~w5&HONDHlUdbt-aZng
zD#ia*UCI7c<Lr60$J>yyyt~ZZxXVN--04VlWL$V<8F%AK@y%Y)Rsj3xn-uQFm}Ql5
zm|*{_O)?g<gu;|>-1wl;h(%))N!OIkB2d3dVTw+%wbh%p0`<`Knk(qgqrhMjU2)en
z6z%K+8Q46FLjWb5cJy$Cu)`N=4?-%BD2XYcrtq!Eh{+k7%BGuB_yFZZGkyO_jL%E6
z=vK7(t4A6iI6vCR2Qy_oy*39QUi4}x;AI|tpmRanj$3#P)uW+YpUB6iL<}TXqj}oB
zSXmLP6NzxpesOeUkE^yVmSJIxiyzpPR6s`@O|SO@E$TNw>JyqGK|DFgFTCL*JJ5bp
z!~-sTfMIZ*o%sE^5|D<<3L1uEt=_m^Vc_Jr8O)T}%a7ELuFUVaX5>y3^?b&#+c_Q|
zdF-cG{6P7$s*}iZeB)7TtHIpkei`CKOaJha?#Dh-c=C-)#WD7G{`Twt50m}Jd8eCj
zq{_z2Ki)qVy=#8XQ<`CV+9`Eu$xejsc7vaPA+g5||E_HH%9R*KRD;B&0LGy!<r2o$
zo98&c;$gK<9Sw+!xU5IcTT)za2t&!6>2#yW*Ka$N^pI{|ce1fa|LUrBizT~4M{~9M
zb>VAw^shI&cf~AGP7TGD5xiZ@B(nR$_Hs0-DLXo&J!5rPy(B@d=e=vhF_5u74YO8v
zrH+V0Hc@_N8p_v{oBP{2`@Y_C9dR9SgIdgCjvcaO%dzG;^JfXTz`Uq2`e>Inv}7$5
z|E{8<Y=8TX`fZuF9v2w}#|j7xUk@})Z-4T8tm$)bxh07D!q!#{V|i0Y{IOAGtrGI}
z_AcoL>|mu;X~Z8_be?`0e{szrV{e#MZ8X*k|1{qXHuy=Hlq5c^!WlV!Dl^n(Hr4y+
z*|LfJfy<VE-vcjUdD6@)lK1>qFp?f_FjR{M*&tDiTLRs+OZrwt8@OeVFZQOeBD5Xh
zc7OEs?hDh#wJ6rF;lZq0`NrRFPB^T=yqueP_yd&c6X~Z-_k1E{#+&coW1jX(F;`s-
zilp7@08R0AWKiv<?&9)!hx=D0haNa%?t*=;uEXkUgIT7wBKo|LvaGEo^fK*`T2+?4
zQ(t=mv2-QMiLZm8bJniP1QcuE-o$ZCHMqaPG<``a<Ca^ZzKM<1M9{RJk4W^k%nBUW
zDX`YH9^yW57kXMYu~Ft->*wJu*pUKPn$li&i58$Ml22R>(%FO`dX9SS!a>~9&Rc8j
z+6OsdYzp-~)<sfZU0!XXeAmKDFHd$icoT61_sH`ZFZN50E)++y?H5+Gnztwf&(2~*
zo)jWD2vo@f!Q6-4$9db<k;netM~~Lh-p$Ft9w|aqBlSiXACar=J&iJ}!Zl`siA2n~
z)OY?+<(2+Cw%{7y$;{#-WB2E6B_J!-3}D9p2hB+S3{o?0gJ)-wyIXLd8UAlaC41)#
zHi=V~O-sj5M<dP6(C}e{=cugp3;H#l#hi_;h{=3Vz;81<eqUxPHx`Hc94GmeI|?ki
z^Gl9ht_Gx!%P((N>`7I?w#ZgY2@CJ{`c{`k{g~PIu9`yQnmjS6h1ChpzSpx6lU7J(
zS#z9Bs>9UKBE9r+6QvQD%u6_`U(LM+{~II-?UtfExsG;^_O6XdACxj-E`Ml@ZS@?j
zxwDblhml-<$~2Th?QQ6%xaxGY9afN@KD#Hxu2=RYE@gx=7Chfv<bl2DEwVu7{}}0w
zqw?lSRxqiPc|24t87q_hy7t03VJNFBEm*9)M0d2)>@*&=|HxEwY|JZ*k>L$huFOaa
zf4d@B4<qmaoVyhln_7NDY+DiCdMadXK7U=dCZN&s>^(yG^D#sdp^bSeE~m3@o`2@0
z=X$IE)EL=qI9i6UUC}FaV(?7q11}$3>_+Q&4H7+LI~B6Ka0x6X<Qy4BGG9{qOlBIT
z*GQI7VLDZj32V&bGUauajb%>WX)lHwA}P|Qs^xKC1zz#d7Vv&Gxg$GO>k5&-(;8eF
z5}HPGYW>C5`B$gFot~!8wx}jUBo`Mg8jkVb0&n!JJhLI%``UfxS3Gndr9Dp<j71(G
zU53m)QDZ|ZFeo|K1K=u9e|qp5UeE*5CWdVNS_#g$-;u?R6lkC2>BwLYV5>1%YwUg?
zV$-NmpqM2>+!lW~43m{l@tZs>4t}KwzGA|R<@X*WOY84_s!dcp5|jpex};gZf=SI)
zdECkpapu}1WFuy?XiN-6MG1R{wbl8m>TVbPltlK>iNp+2zs#Ezp>3+$wZHEa*KSqT
zG$mLXQ=2a-t=Kl;SZ5Po-xy5EpIBZvsCK6cj?@*hf6Q)>8&o+QTtyZCn(M3AD1y@^
z{!@(!3t4^Q!^1d)#x>z03woqF5nP?#HVmtMf~>7F<hW%U2#!_B;26wFrr=D|UEoC|
zytwR|TX2*=f~g|X!n}Kx_tV<UO`M(d3!k-K{k0Y}&8sBq-9k-S7VsF(f(#o&!%v}$
zQ{B5Z#$r`!z5^!orZ1u5goVg{R`ui0aBRm;-XWyI!o;dQE*hB#V2!0$80hc8Z0b#r
zSC%C*96i^9kZ?lA=>0}Q{$%;XP!=W6d{65D;vEPN)0~RDl5qV}vl9Nf&6rHYyofUR
z5jH?*@l)(8V*CUqX(M+-0$Z9wo4k%Tn$EX9Z7o(jKz=da?h|BGn?m2Z1|(_E8zu$i
zy)=T;_^MKF3NvT+*|Z?<IkaKTd`@>zL9<pPu6<V4Vio$h+9d5o)8|)FGS{JMgDj9;
zRu`P->}MVRe20o9DKj={May|#9thGq(y5zFCbJkkS(_pPeuOeTx|J+CL&;fdph^%1
z(G`rFe|iGuASPa5-ygR<CJenTA>?uUXB<I2j?cqUb7<anU*<g|VZNq2AGT%ee{tac
ztD49ZXr|1ltnyS46x*A>#Zt(#w=uk?3~~mgdDbX?fUn47x2(@0WPmFNUnB>}^r_In
zm8?P?J6R^ivO2e#m-k!!cJP_|-hBL3dAlDCnX;7yalUva9xwL0`QwUPst1}}4tksl
z=wkz^#8F&08`N6o=tkz=vVO1z;?{O;SSuaJx%#8qq(3P)|1^xtc0V)69ZBQ#A)Lva
zui3-vR27r;p&bXO-dm)}&Y3cq``kU#o-I$rA3wV2peDu#)xB9_Qs_DNy05h`b2Yux
zs-P}cKf1KAYW!+v?c!Qv6fcSC)b@I^o<-w2gnT75=W28eJF3#D9~n2Z>kZTDjCBpb
zr9Re;>oIv#TVt~GU@zpucnzyb?z+EMUJb!gnLf*w$rL7j#f<MNi$({&gK<Pbai#KG
z#zz6_EUL<@<}my4n7wCaP%PaM%*`gymS=(*g~Swl^+fgcJ~_n7J+&DaKaRn~zPyDq
zR>>wvRe<}zuT)<#;zfM<(zio}%gSntng$z!1?KntnsMG`!A^&k(q|f#-1A`j`@8)v
zPZpEL8D~B&w|ck8272e&HL6YuYvp&>Ad;wcql9gR2T@)XC?8-FWV%|V4&2o>OFgK_
zxM_cHSnp;fFJ5lDeZsrX3gHx#wNF$ZBg+y5=4rl-Jhw&mjTpz-abiQ5W#LX!ipmeb
z12d*jAA`zqZ;;lssi>Qt&?%eRHgA~7jFkMGSmcAb?ZW1@)7X5YD2v5Rf|UJ!e3<C!
zyMTVr(N|V1Lh-yvcgKrlhy4X;by#idZFlvo9<<By>!ZWsV;3LZ^m^~h{=t(-{b3h-
z_iPlx)Sk-H*5+IomsQ$l(Eoj8o}_j;%tM~C1m5GgR2sdaazVRP4taV@znSTxXU)0_
zr6X=4b0X@I_d?rKAzkKI1<jeGT%Mz$;|&X4K48lG*5-jtf|eU5`p$M#Q|!nWAG{d9
z!qzW_&B4FIDyH(8o?`X9YR*bJ6Kc#~j*w{!Lh^f%fdO)bSf0%*#hcdx^c<0ng?vYo
zpqi4krrMOUxQ?Zy&Hdqzh=U{RDkRmD<_Z??jMQ&Wkd7W6NJpvd>+|7nS8fvz-$q-f
zDBkUkIiChg3=yW#errDN8F8Uh>(!n4=N;_UkJ0iaev-(TU9#2Cs>EH9Y}@PtzAJ@P
zFPY}-`;5KD{9^F~xNSfB0l|TorLXwg>#5Ks8F;tGZoy72j$87uT}?1e)M<V9yh`&k
zoqUNq6`WRT23H<kMvtb?`{2ZfNwR7^)qbC7(6qENC{Qz<%C9w~@Gvvb+zSba-3G1Y
z!;ShE#ru9ALK*Mf`05mm9Su(ROn$DLFN%iiT0Kb=x`Y+qKb<9lU<23702i4G%#yX&
z%LeVDdqlug0X^hH<so$fosB-z<~`d%U7l$RUe;23G;mcha_%&fHC*dZ5Lm0(W5|&s
zEZN6JRQcmwP=vovJ#fK>vl_{oau;@4x6_QX;-2Lz{ke9c$FPB-KPTR=G$CqgCKdMp
z2qw<$DPB*Z5@`{q>ipH*8;#PJqVr^Qo~+O9=`Q%g<x-gFu?<Ua+rt^mL27&Hb;1-n
zY3aFw*6)+I-lI2WEdi@^=bVyR`r$@ifWFMf-=H%IuXj@0;18|4Vp(6mo-^6RSz8$w
zBrKM&`H7QrwTK6SvvzsE+4qd(QL!{p>+V_X`A3$5$OI{&%S5n%Em&mNE|Qvc&x|VZ
z(}V4RY_XIxv}y|j!5+j8HyB(L&2M`F?WoUf=|`1-2W8blMQG>2UzXF*gyG=>x-~Ji
z>K*QY1L38CB{kjs6owKjXuL&CWTpB1F$+X>n%^zDO(;@Vf!btmSs1l=<LWm!>~6Hp
zqJ#}@$-Xk58-CF803!ik-AW~NM2IuJA&&#*!1;W5Vt%c+-;IgbLU@`*zS5F&eZcV)
z@zVXDT)*>UycPmIzoJv)-A~2ebZeWV$+$ZV_?wU+x`8&D1LS^jtxb=s?~xCEHROmE
zH$jV^*jor0(U1}x2tM{oe)XJ9$iFYuxn|sB?99t7<vs7S<cI@GjJ?UiFa)NIxDkB6
z@#5L>y5b0{CxolUsNmES2_s^TtSeRPP??1gm^qBlfoJ$;YClY?&6hHetaB&%{m|XS
zHX$Lc`!8KnzCjZ6zFv2=Vp^L;CAL@Zn6<J6s3nSxHOa5F5L<{j{jXQkJZ(gM1_=v?
zloKh6%=P@mB(+2kdkcDk;_>6C@Jqmz(LlWV6YSAHYpH*yPc&g9#$9dL`0;y|yZa?U
zgpwdhco03N1oTpAnmxxkHA?0WkEk{|gH8kMfW{K2iw=Su&!G`Pmv<CXqQ*Qcp?C9o
zMQq8;#K^&jIISPp_#Q$5X}Wip4ZJeKiTl(+63G=Q&)Ui<aMv>Feofm05I#FGNEPh`
znh2zK4z75$`d2|}hxKt6h^vLZ=V-dNtYi|xcT640X{fxme9E;bg%lA{+d`bioZ3<e
zY$_<R-oKEHd~4ZvAl0y=>QV2n#^<L#cPg?7lp2Ks4-ry&m(^`I)NOPBfXCm2a*q~8
z0dW`=j{P-7VZ<i>YU`zTbdbX0{othih&FN%-e-V?9ML+Ip{X7}2?3#A?TcAxY;yrL
z?^DTa4i=RF3l(i$B*&p%^|d;gRc-scoHvtJm@-fm%Q}l6;@D{YL^1BNo0>}qru#Ob
zMolt79#*-o_~R`#Kk*2-j2_p2KDt8=F7HC0%D6EEt4M7V0O6l4je-z~cJWGjwTX<u
z*p0)`kt#LKiAVUO4v#~4-d0fkim^6wT}kg^3gb2P22&9xcdi`?mANcfqf0BR)~#*V
z$THSun^GY=yPcj1kdNx2u4Tym;vr@pZBxn3Ke^t1xqP!-OZlN8ts$l$B^{}Xb9JtO
zmufDrZ!%=g6B86WyA_rbzGQna<Q}`xs>wZCDoz=CQn3_R`4B%uLygRPCCXwJjcC1!
zGUI?733XW)yI6B%9c&;2wj(tHir->%Hdhpyc8}KDFP)XG2GT~Y&9W0RR2wNKWAul9
zGGtjdoT>2D%R_lx97Wv}6FHGw*FLhg&P8FYCG{q>yjM`d0dLndzhGO-hd5X1E>CmC
zbBQY-w-TGef>PhBM4I&2iGRT`TpT*unmcM(<-Uc(=yulz<%HGI%%RB8;)Mp!<Z6$v
z>-~0C=O^&EA56hJ1379QzW6IEO-O_mVeNF(10{x=+ZAEvgApMVrv3pJ5%a>S_Y-yY
zED9bfyp+|pOlUg-6)~CZNWS#!cjW94-|>Hc;p8&Bq$WArw((K%L+{$ymiw)C#I@wF
zqe#qrTN1n*T-N<{b}e*r>rB`z0~+@Y{pjj(G6U>ElRvdCL?zwKmJ?$6*$~x^*iLPh
z*(l3mMaMr{tU}pen{%o+BL>}>rgE1w#h9&d@l4Qwp9syIK9o>&oi(2@kvGSjcso~c
z?8)X{#f(sI^cpR6M%(~}5x<AB+Cmx1nC^^KKT>SuWs+FY2azeQpqM3<e0!LNvLmm+
z8@(SUZrZ^7TSb^L7e);9k=<TSD-D-j^>D{`=Zt3=4M#*@$rK%R>@va3-LsVy{*~b7
zLuN2?t~95-;~?;Lt>ThGy>22;kzYc#wsu`;^e-4;4uKFI;C`#scHz~J8U)57kAKVp
z;8)t<+P2{D;H;4sKInjt57`}Tz7hdzZgNI;f#Ax2*RVesy{p~i7ZR{pje@M35ip;G
z4VfRMG0srUV;3j_JepDRc<P0lTfJZJDXO5nq=>S(5!E827;#rlYNn0U>e+%8R?o$T
zhwbU#a*0G4odYm*nrHY6jEKMPSFA^q;nLz4EN+!F=lGX%;6RT(1>3dXb-cEW9FG=3
zggti-xIO#A{b58~0jMZp#=!>VORn}Uzz_&LP*<-RC~w@#K}@BwOe-WlnkMH$c%gZ|
zcng$e*ssvBWi05<!83RK8{4v2r~Bg_5o9b|37_(vGURQE;6>uhl6un0`CJA*L}?@6
ztBDmf2p8};m%(<wdJoA$%bT_!Y<dsnkwigvPIvz>^_MO|DLNyP!WgVnv1@35T+~KG
z4t}@lOTIaE^DFPVN~p|2F6#0W-6Fa8s{OWa+cL=>eMBY^@1LlV^-xVzg{-D9<qFrf
zFuCx~W$;s-PMl2A!lpr?zW@(~*JjKKgeNIq?z-L7D*F?DVeh@^eD1NB%%G!ZCEeG>
zS&5$K5lHk6V%0)5rFaH@Sq6-`F{;hm{(9Fst83lCV}hk%C8x1FqpqI$nm1OOjUF#o
z(UieSOS}M48|?Q_HUg|S#~9Hb*ATgWP<<f0YKskoDBKx$=zm5~nenrmsp_dKM((w?
zb%M%3yR3WY^96R5T>+r946z4ucO#vSp!->SO>!TM$8aJvMP?Vcak{{;u+5c?CS4Ji
zcufP>!6SbYK80X18xL8}&&-4ifryf_aYdNUvW=i|1kmrF61sw5JuhX&z=ClKmJu@i
zO!Sbszs4;!uT`db3Z%=m*CFWqe^>BAs!8yh&3MfmoCTt=T3B_1)2*iMLSzHzIB7qm
zO&t_Ob#v?bW)PK!ipTpNdiNKIHml<P38DD52{;^IRxklkU-c~xlSvYif1_qu)CLCO
zW4{I`euHG@e@&n5xH)>>UMr9D=`~;@+1h6|@@pfHZca`}cuyXj4pOjGncZrFsh7VK
zoIF>QwP~-aHzVi0f*{Pg7u4m1tpTifBC`wbE$D?SCk7xkt8ym`)DSsAFABi*Bf?!A
zI)l5`0a=2vqs%zEd9Y?FVIFsBpUH4K>F&_-J`a9za`9~cDZJ>2FtpZl^t_2A1`OwR
z=W(mC--(57e39BMMO5v%Mma?tInVJ11<lHY7t;ZmOc%AJ*IYTur?+;Bb@^V3n%soD
zHu`keVv&fi?#k7#uM=Os`lKrof3ed~qDEzgz)I16XU9aPPmgFkZtT!Z$!Zon=mEy>
z;&e2Oi`U3yB;5^TdzYW4bt}9(urcM^wP5;bb?7_iXF>i;#GOwNnbTnTNj}KHeOkmD
z@=KB`Qs^oj=5tQp?9u&d4|>{<h1<_a<Pm8igAX=bvG%!tLY1;f&XW9|SpPiAnZIgt
z#c$KR)>(|$e9GXn9aC!~i6Mow4Z^h?Nv7@d>8T}=VV9j8eY!qNbxP-I+~)dBICM8K
zi+pQgc*$8?6?`Qy%qL=GJ#eymRV2&1C72pKnaKv$-&2WHEXcWMex4|!Ic7PXMHRS(
zvKU%Q;aK#f?0Ru@h;Kn=$Vh{Ukqj$Df)boPJ<n6xJ8HQN|K%71tCcjJ>U+DTff1bB
z8)S*L%8Xjam?FI^M9(`fetv+EvM?_I1?wSQl~=m28cnjQL;kR9*K55w<sZnmm@?$&
zn|4~re$^;4yP!WfZ+6a}8xp@<EpC9^XUOT&B^H^m(3J$&HTA4&PmIlcsVd8&Yc_wE
zlZAwSTUFILOYtgOJ47^-^A|9O)->at1rb42>%rj37Bzf!_siy3ZKP{1Z=2|4LPjx%
zT7S1;<-v=QC#%cg6?=L6UDe+bp8Qz|lw&17^W2-~Zo3H9-D--i8tz0dNjyB2A{#JT
zu9v)^xzI3sYdK)rqwlUZGI&a-tgWJ9W7yS5R_TLr&<)Djh~XA6zR_$&95E;O-c@xf
zM`lxfn5}X<rtQ6Y=(1OK!IthZAN?#X=~%po+N&{Z2iL*=Y%1l9xf!hx%sm8EQuP(w
zvWIxTXFstd0dEDG3Ye@Z&T7KE-?pmOes;&Wqj|#Cq-MYy(N<_=FRL-fMef?5dhK$<
zH4M5u8F19rPVc4FzX>+jPiC_}3~e)zmIR6eN7EQR&g$eo$2V>8KsMjh>fY_@b#wyC
zf3n&?JS;BJc|upBm8T-N|CCjp+A%ORuu6maTF{#Pu!BHMpnAGTqG*L8*?wkNqp;G2
z<Zo;XN;=BPe(u6)Vheous)LNSqAf)v_EaVKW!O=lTS~B!LL!vo-0MrEzoIJFUtP~W
zM(mkV8$H~Zn!%hZcItV!T8JVoK{0Ib1*k5v`N+%8j=385zHR2tJmPiBDA;|kowaVC
z`zNqa(}JW-zrJkj48POrM*S20U5Qz;SKJSC3x|mz8EVYB-m;I4c)h@ym?lsa%U)2m
z^2+H>{Dcy-5e6cecVpDSVYU@)%D<3dp3udc{HRk{*|g<uVme0f>kNn)s5!<Yi<wxO
zM?Sk>{D#VlK&wVNv_L0aK)5=fScGED!KpKo!czv;Q+WhTo)58Ve!_)7a~|QA{l$?<
zFix*ypVJdVxb_Q?NY*`KMNJsamEOKPU=WcUmvm+B>VZ%ZGY=pZ^dP)oHbONn0jOUZ
z%f!FP3b=;J!IiKEt=c2Q8i*`=76)}Kdymv=hx3wiS*K-RYR4v<_XJ~bM&^)vMn!06
z5PpphuCnOv6Y5B~HzA{Isbi2ndyDZw@Uc(*<ghc0UlJoD%bJ6hZ=u6Or*-a8zi3`0
zvELt`x&o&g;%Mjea5)-S{-Y(&*`?VOk#~2!zhgZg|3=n;UFCv@Y4+@~tZD{p&hgvQ
z_<^ttQx<IUH=ydfearP0rknPh-quK}_^+UoTS=x=N`mH#PBOYdiH>{<%s~>LJ)X&A
zYhPRxPHj7OfBlJf2$`N;`LfzH{^J$TKl`0H<U^MGnTx4t0+=QEKBhvZllClJz_0hN
zk-H|eEe*YmFM61GF^yx;T;;}cv-Tx*V23n-I)%>mht0~Et9(3(fMz*!ZRBNBTIYo7
z&6Ypa%4kaIy)pX032|CE-FA&8IOJX{Rfgc@`J=-J>e!Glg7kbwaHW&oKCN2b5ZMYl
zVRJ(vJuaZKZ4=}LbyHL!{^9HE&8q7a16SgCXt!Bx!p2X{Dzx2ZTAVlM?1A=iMR6xP
zPE&%bO_jL4Cc6dU>P5k-_soeZ=|XF0cmXR3?4m>*Tg_os3GeXRBblKyqzD#LTt#qF
zH}f<lbo1UdKlRc1wP^Rew6%Z%_?djS^O3ehZ;0kpiaT$CvfYOw(F%Kd`(Y&mNKc=u
zQuW*Qw$^UY)8m^(rKy6(|F^Sn>dfI;>^J+MsRCY|M3eyjDFhc%_HaIF?nu<5x$vwE
z@xej$;6M8R+NlS~!Vr8pk-s-26?(SbpW_DMA3Fwfg=g%7N!*IU@C(||q(1y_->%A#
zWqui65+bUKTul0eWf~vC-6IE|YZ+3#fg+1@&OvKDQqPkOWtL$7Jcj|VRw3G&dGOf0
z=%Wh{A)gl0GIm$ixX6_3NaD>0ad5VGQVVs0^65+jx=1UQkun;U8>LuCB=M82dy4WD
zLzcJCu!r_RXw!|>vU#;_dmm9@)kRYu`7~!D(KoIQ$9jhd;|bGBQ|<9XQi#vBUDzEs
zlYFE(Vw4yq)4OfymCrzw&O;b;obx>YDtk!nJ?XCquRsVdnBnq%wT;7&E7){#*!~X<
z<6y^bOitCyKsK=t1+i>@lb!?iOeU!@K8#JDr~9b`5whpVS_`RHx;gx@eW+dZKtc9a
zK37oJomD&`c3$*FllhJ1z>o|($;gM?y{n<UFO@gg1q>}i!r0<`5g|c7_NoSNyZW|4
z_}?1iQj44UB=^1^o2yvwC<`mPPF9KAHf3bzN3d_VKByEo2PoYqL;(h_8=zKuRe#?y
z(nA{<Mn2J^c#k^A4D!uiSx29h9!Y1-0@i@?ZmVC;T?+uRZoI5}7C29ZV3~^vuX%E0
z`O|z8U@L@G1gJ@Qh2nQcP`>Vtvsc5zzMY4f)H>DD>F!X^H1ulWyy;i_U42aSRd2`u
zt=P@k%<aabKon07IX&WKoy~57Yp&<=Mpu70?22bp7@=h=C@WUY=L5;cF5zmgw^{!u
zjMGiGoHq8WuuZ{<;9|f@<+oj#N}QPuJ|A$0v~w><u8^v~x9C?obvP74NVB55g>QVk
zI(K?vyt}B)JmQn%TQV7`h$IuQyKx42RIdYX2#k15Kc0X=p31(PpX~j|J%%*OUsT0k
zvgzBcEJK`GU#PCwLXtsvaV2@?d(MfU@;^iR@XtJIS&>IRJ?|P~&?XOVEK5C5uY|Xe
zg63I7AXm6^B6~dMx8s}rqbJ7#=&K6(Au`LVhbs5bnKF-?bM6;}1uf|g8?a=!zpm2C
zHUNKWV8`8GM&rb#(tC85p-Cr}XPtA8Xn2c#C`>?2d+>e?IcQzw4C~&}I_lOitI*wd
zoXg!W6dQ+zvv?VGWKoWH(mU+PTVTI~@O964bC7#MQZjLMU8-Y?%Bu|p_jAMgeBQJ@
zA}3DW3$|VTxQiFWR^Km@U4d$pm|C~CaHb99B4quwh3;<IMaF{&0C)v)3Ur>;vz>{^
zSgrepoi$6b?|?ZO=yHa-$N3;;Z_>ubvUI4f5a5dUmSI!vJPk+mYb_!~@J^4f6Dxd5
zSuBf8CS|j3F2`dyYcY^UO}hu#@~IPZTaoF^J-;iVH-5;6`~&L#FTdbmKSg|D;aapK
zVr}Rndv1vV<R*gOcGO_HzMbxq%}oDv^)DYHDfgY@9nM%4uJE)fnd|VJu2bzCC5tS0
zxmHqEpOAYkR2kd=DF^!}^L>vocfDip(FkYKzPuM>z9DelUaz#l?p}v1g>R*{h<gHz
zLX7z4qUj1MQX%sKJ2y-&yS`xF=~rBpq~>SlcT`NN;QrvOdB}csL%O^2(MngHQ&a!w
z@TN(YsOGQ*G_fqHF-XU|5lD*-naUFW<z4@Qw2b=w6DIg}@s%rVzt9_@SwRxST^O?&
zq8Q5{IOJiXyQV7bJnK+XcS8;QI1L;=EiBRbURbM>5c52!$>lBZ)H}T)#2?>APT`#I
zac5T4YoWwnNnKr!eP@{kwgOMy5w-_K_?*3@ras6*TQJ`DVtv&0EFE$ytoH_a&77V)
zkU)I5Jk(6iMI8NL>mc+Km}D~hrQ&8zL91Umo`xuzzf}47*bk3NNd#i^9VYv2JMU?v
zqkKc9z(aaI59_L)5o%K^FscGGT_UUMbuat-=%k&2M+;K^J*MWLffy~{(?32VuAEoC
zzaT}WY)z>13rAgkYkmO;t)p^2i~q=%A4+QY#CJRm<K<z5;{q7>i$LSKm!OFxiPgRH
zS#h|4$w_IivL{Uvk+8nXy8i9ev_pN))h>C#^iRrhuj+5jbj=Y+yVt1#@iXi8>+s7^
zd(H1!xC@_#1os?bg6TLE#TC`xEazU=1d&J|FsG`_eP}rME5xE;8MrwwvnGi38`4J)
zZz1<g?M(oBnL=yZp^f@=iC5-_x15K<Qk@bFGpwdED3OjD%xMjp9Rn<>4;2!1gp}J&
zWm39#w@4?p`Y%I^zTco+HU~-LB=^~ngx`UjzqQL*x*uWwKggNpJrcxr10;S3%TMlF
z8D6(;G4wRPPHPRW+WghtoDb74<%~b>L1^DJ-RYdIJm%_mo6rR^amH-E^p&OSZvK?3
zP-(II*yA?a=Jb5tH6T#_ac5lj-l1JTN4@rap*)vx>W`U4?k{F>^?|lspMQa=mp-XI
zZuHDz%Za?c<zA@!M3sSeJ0T##X}u0&#<m>{^)6Fn>D#kMzD1}UeyGyuM4pMuQVv|0
z6jiD1PZ;pnTR2`ubO=ML{1Ti3IRxbIC93TO?y^*J`ZH3q#w8A%p)ObvTUIf-nKQMu
z)2hqoBU)ces0w$y=JfC7I2-ot;M1;isiS02iBZ0gvW5Isr7Mm1I8Ug((sleu?+#sm
z_&;U&2QvRp+mxnU$h^&l0gLA%jn9M<7r%<#&4|{Z8x{F&wV)bMkOH+Z6f<?hSrqtQ
zWuWT#Q4uqrzyr(3wnmUYboTHVPXuXr=3h45R*;YGvo9Fe_dXVvYh2~J?SM&Zvx_uk
zAywH=Myptrpf1|4qc3y5RrvQ?oCnZnC#^|$2}GVK!4QRZaZ1~}*?VXFcV&%|XPxSA
z{)!myuxBgNMJP@4e+ACKDyK_SGwKGg<jlrOmEuZG?`ggqa)H@5C6ya1|Bth;4vVVm
z-hMz70Tl%i1ObDP5@~599a>-*8l)Q~g&`aik=CKRYle{SP#WoOq`Mh9zJrRdqVIFP
zzwaNenPKLfz3)}`T6^zx%&fbU8@3)|NvSm?88SDM;Nf>zkI*-lE0k<<+4+hg_#(3>
zE_sN_+q~bh{P={0*Y+>YXo~LY{n1n=mj7j74h*Ys?-&n47U;tcan9+B9yyeEZ~F3s
zw?>0w{p_~YJ63J5qz>v6XA$(pcO?)V6x^)4RO&@ywnu#Gn62dma{L107E!hp#x6Ol
z;pfc$6rw;47EsZ6Q4oRkKU6d(@5dk=7HMNWNOLIS@a7szvNw_eBC;&q+n$%Ho?k%~
zi^X82w&Wb137YW06B9SY^Um;uOW2EXnPk^@;*IUVNY@}UdTBWy#=(BvV&l3VGV+`4
z)2$RO{(VmbA<%}TO%5;9Y2m*-i8*HRyr3Sx5Y@K_m;Xog{`yM58N<5!s!uGQKcy(>
zQgF%#C5pA!YA9Z&_-3x5pjT}z9#lMyI?K1((!-BZ<HM?NMMdtEivAZguA|!?q!dA*
z*-}0@kZH03zBaXEPSK-`GXH(sLN|f>bj^gGefJyxsDLB=c7IOGOMg;#S$1@alz`@R
z-MfE+{l|WN5xh16iDlZ=BGz`1_%=Pah_4<RQ51LU_{sb<u1rB+x@50L{_~f^^#YU^
z0jXixg>wFG(tp2Yn-I*bV&mZP<$Kh%zDMIe(B?tAh`y>1kN(!jJtpEj`V<f6^vRmf
z|6=@qlYs0?Ai`l=G~N7PoZ(*<4}qV-vTpz$Kh1*C|NnA;@yz|O83kO$$NtZUmw%nN
zY|n`}QA>K3zkd3+N&olH1jf!kk)<hRGWh#c{|NeLX7TN9qsncCo*bS3Z4_;EFvQ-S
z^p=0vUW-;R3~qxYtp;A6iT&TlA^?nR?0;;SL318~nbteLXiA)SsMdcJDnIkR<K5Zu
zi@m}i5Oal`b#HsIl@FS~)LoD?w9XGEW7GE%D9teoe^$blIlwZgBSkT=H^hip9%AFd
z^}p!!3EB$Y{{W}mt&pMje^*rjPHZKrhdf%L)KF%QFU<Z7{)bt%ZF_kU?ihhlT2_BC
z+P}$8b{f4UI4NkCHI(f4%m0HuMYHO-QAubz=$z7)<KL~QGI^ZwVAGz6{45k_G58_b
z7#E<7+`+#>0`WOxWda2PU44Xh5bXSCn#XDz^)=^7qG10^7J4Giy*vPq!@aE+YiD(p
zNPZTqpv3ED_v1A8+5i7x=mJzQ*`zCre>C4Z*+T+WaIM`lewGo?Ess6Xm~wh80SDOd
z12vmMf?Ny9QIjQ&!M^UGEbYs1-Ge+3VdjDM(#R#%eEZe@X*6oR*&6fz0?oJl>~`Wm
zbSVi5&oIvde<&6si%j|GAbxC*`#kZ-on@WTTgy<ZF5YHA5*-a(%A)@9TEHK0Hm^A;
zxAt*6XV4$I_^XA*Py-geDeR;3>xU=9xc?YUU4-&DxMrSamqSnZbilVIVBCJ5^{)4W
z_4`W*hZixU_opfep-lg{|Eu1<#UL2LIK}Ez>H!`C<&dY(Pyx*LIu9@ZxWjBOdTaSn
zJ-_K-lx96hxXSH`^@B+Q7WT7X{Bp5?I6(4=FCm4eRiK}q#^L_^(G$@px);EzPG<-^
zz`E{8K%gkoFxsq5aXPVY(Es*_R`q0ieED(HnU)kWO&IHWp4iCbfPRwKb3c?vpyI~;
zNm|;psE*80Vl|%sK3Ymjz$=4>uRQpp=j8~_q`}Fs<_;sZes&1DErWhuRm81Ze%;!g
z=<H8^H8^3w?~MFW?+*wSu=ox!ROlh;kGbB*feEI?eV(V$TkFxn`vu4|h7fS#VJNNW
z3a_38X`Q;Q<OEkr7dzLV4ftR@Sd&iQaP5qIMs)Tkr|1J?eg{<kKC7PX3k74oK09}U
zpxj%F3rVLnP`!KaTfa%rbmWHQUmh+0L)8C0MG|m6?~s2Yt;y{N4~b9Z@?H#9ntlR-
zeJS0gvyC24_45Rd7eFk3Np}XA+p<aMMH$}^+!NJOIkiMZ^j)PBQ8mspeA4}8zCYFR
z`)BuW0@oCuR{XPT-lXR3f^V=M+r7GW>i1wgIGYaNVBH{n_UQiu)*JP<CqJi|;6RiA
zK__6@__NL5u(q|pq&mg=PfZYLIAL1M1+F+}_k6@WMDPCwpj*3v!s(!5{z2!}I6(mW
zIS0|d?T{S@oX)D^8C(Qnn$9zSyQbdJ1uchDRD;lYmA|37o?e;o|AXpv>D!0j=K9E9
z^k<(qY>M|A-#(1t{`B$BiT_lAtAw{7yA`0H>dk4WB)I+F_yp9{SK+PKe@H6E^t|Qw
zk#3n)R{z^bVX_TsSZ9*?fYi!$x~9p#uDG>c=1A&JS|?cYqC8&_*LuMloZ$wC$!;7W
zG3Q>jpf<_#uyGuWCht!(VK-~|nGC*HlVZqmW@%~hAsVzSAz{up(xTua?q&ZnvG=%u
z7OuzE6aDrb0c_Y*aHzgcxR-WG`t?Xd(U>RbP!+pSqjsI9FIz`hEIlDNtZZkwOKyHy
zR7{Zg%6c*3#{1F0b?%MvPcscGQl}?5bs1uiDUxB&D|dtRd?akHB9jx~v487XcIt80
zX|C4KXnCJ8#gB@&gCewR{N;PtbKC@?v2PI;#_b<yULG>NpsZ=#+b<B)dVbO_DUFM*
ztTppB&Ub~1;l~1y2r6PlQRX;^1Dku!TJNb4ftd^N0Ti$ynM&vPOw}9>E<Z$U8vL$u
zr}B$Fbm3_{GTHIo>ONi(?Spe@!LQ7yB;bv@5S;KI;Jtq=0C}2a1})d*P?TAdkG=e>
zXFj6|*mr;ca7W=L{;%Jm)p+HOq&s4e?f9qIv+*%39Ee#7D{?iOLm~u>|NPL#qKgS?
zCzfFOEP?T?LjEGfP3hZRB~_e%Hu2jFf0Yr(+~ZtUq0Zj>y+4lt2-}*PN=*4MmBK>y
zyt+aknRX;mXoYE@vcWr+rDzOk4eMjQr2f<mPl4!*KXlH?()_>ol#x{c)XKB5x#7&L
zuin25Ge(xNK6r!tAtiDg`d-Xqv(CJE|876(&xyfq0`=J8^39A*EB82Rr*mGT-u1J*
z+@8~8;-qS?I6t|aa8O?Qa@C$f0<}WY&>b9JnL(H~KzB8_BIvMmajo_d`O8H!Dz?l6
zv;4Y&T8&k;jY@vq!D#1Skol<yhbJfBj-V@rmZz@kR1Z&w4dcUmN13d&HW|QId_mgl
z6+|oBrewEZH+8qm)YSK~+SsrZDXbj-{b+FZc$Y>tyfL7><YJ5WKj`ML=O0m##>iz!
z5NE1YfkA<pnYQwD#NuA9Lu8PUX7S%>(xwhffWO@K!GAXpSoyWvoQW1goF*!;7y>pi
z%uMEDhsK@C#ayR;4}tdcrYy>E1uDYB(uk{Q7>?&POv#QyvzAC4?WXhEkG?AJPpPhR
zQM~?L$-e^=kaiN}p(ce7|0@)7*T14I$d%Hm`QHPqI^qUTZ|b_IKq{7+f-zw=>b7hH
z`j~GIotcgbj?wK2P5rRsH#qiJYJa-gQ%C`QzfXD+`ernqpS0fn>0@P$&=nJ<LUw<J
zMj-aQH9_Fho&&+?w`<=VY@>iy&(u9W#>H=L`+^02(tob-^VK8;U|iB_)_=SCAK``v
zgG+J{?56T9x=iqXrvGK6KfVl8!vOq)h*$Zq<NbNJU*>S~S%62SS929VUDLM_QdwL}
zg8iK1&)@u?M(Bv~nKT_T4gD6sqqi|x&W2$?Z7qS~FeM?w-mmEWdE4(pl@&$Lc|g8q
z@BEz;IzS&1{3-R@)PRsI$K!slGya^w$(58ZDk*Qq;+#4m*}Lew-#nA}WRm60KcNyJ
z2WSManji6RoZu9EnB2Ac(QgRSZU-!!6&f(%Jwd>D%VlRG{=Ctj^A30~z^8bUW8f9X
zs+~rsU*y(?d6NI2aV7sRYceqb7!DQRn19FNq{``l|KrDD<tLl-8M`>CPm%czwKL<T
zq-qQQMs2Ijse)hsIrL2!ungPMKyyk2Pwz9E^{V?(<E2L%!*3?IM_sCOhpb)-k`U;)
zE-Yyi`|WF@i_{@G!h5KlL)wxE)cl_qp2aX`90RZ|P&1vy!yp@JbAJO=z)<Pz*<>6T
zP(U)^8>Z|vLw|1^FI$ctq?d-+d$X(jDeMhbVBsr*C)v!Oj9tjo4%H3MR$5{{pWIDW
z>M%5j$);aAvRghoFpm6BfMiF|Ve3(3Isa3|3uX$q2Lhcz4&A{~A|R6+@r*iyF7Q^s
z9!NCgMz(|d|K|99QG#z$dhZWJ$Ggb5TzxBmuIR@9wGPXSAsEOcrT=e-FQ#a<E)(gv
zRuwSA_Qq<BiJqkmJg!|<(pzNn@)^$V6NWhJ-i>Y!niKmH_{#qKz}`cETw70Wqmjg0
zFVf#xOHl^2Ql;^yWyWL!-Y7L~zuEMxPOK7q9E61RBtMYsBi!BTf5Y|Dt^u7ze_a<k
zpDx1!p18vN3Ft+np^`g=gYSHNt#h}fENNpHnpojD3cW%3@VQW@_1*#&FZax_fyxoo
zRFEXFgW#f1{~+rv`{MzeT(i#gO3=DSA=HdS^&8wE^kPr(l%ylcFCoPVk!u%#fl~_o
z@yrR}**c+sl_^X+8T6c$5(Tz(8tYQ)5p$N$&*V(j6<u*jOTGk}lRC1@U5l-#=i~<4
zi%oJE$I80s!rYn5x8<Y^o=R-0Qw_>O%;WE$H^E!z5?8InrWmgJTE7I6rCBPe1sW9#
zU|rPxmXC;kWq*Nn_SS%cfUq#c=Zy>hEm;+~?e?nsVFzo}YHn)(+F}G;zO>3s6tJ`k
zHi_EZSUW<SA98wr!aB3X#gWKVsDHP7F-_lDyF1|+f6aYk9-#-(kKaVlwG=&)n7cU3
zi>pAm<&YM`1om;`72BRw>@C7RN}pTj?|;KH+BM>I+4P4y4@EDw6rWg=3H$mH%O35c
zF4$+l$=L)!74bfJjp%!}470or)U3SCM&I)Q+!Vwf`?sz{0#R4u&f0_QKQP1v`LgbY
zBx8ReDXq3zs)x^Wv(WG86gU~hX+C&Jx-w$Ir4eqt7)V|#tx+~?=L0T~-gXn_3pM(T
zLk9Hau&n}m1R@G}JBcLlF7YJlY@uoq<j)Z7+&s@@c3#&>>9#k$$(IbQ{Q4rv*h>e$
zSOnsz#WlJkCa6lIh{)$@z_{SOT%J3dZJ)G}K3KBQB8pN>CPEqo$g@QPHI|a(fvdNQ
zUZ+OAbj9)+?!^e};ZR(vRH`ssu*6sA8?`Rqj!`<0&!4d&k6sXcUY(z0eDI7aw;gT0
zoS1IgJY}foRo1=8T61O7yIz_sksIU>BP~V=kTYE6otmR8u|Bx0vF-j+hGmgH-4E5K
zSsrci_3+uUnl6I9Bwx0t_3bBEjT|bsA|1k6v?n-c=0i3Jbuu+5X7&-O!%I9bhDMZP
ziSgC%ZS0O;-c{ZWFFcfxCbqpY_TwQbp7<%D*0ER_EF*R!?TmzBTM!H8cD<FbS%i#t
z*|e1onB%4d-|Gt(yqUVg$@K(D7Ne}wD5Ebg;2wo5Q$~}=NoYt6$ei>fZ3{;KlKIeI
zXU4BqbAMg?k(~$*eXgB%1ZimhO{Q2{%<;BL8><!E2xxgxlhGIfKx!|B+<e{2#<DlN
z7;)n?p#0QR9>7Xp3Z$H25}S&*wT!7E4UBDh>M-UnP+v~M!ud3iE;alved_UnW=Q@V
zKgSZ|(MZ?g^L(D?g+_MA`N|ays;-`3?rK)D{Wn;RoMnj{W$se@O-RY+7JjC@k|X+O
zb~7o%O<~+uBn`hP0A>zNvP~wZvNnl6OojuGA}1CLefv6@O3iM@5k$2{k%Do-+;ARu
zPeQPwzOI$H#3?bAdsS4vtp6I;euQT^A-WB>cwC)tl5NLslFruUB%3`MAFKi{b>{L5
zZ%vKolhkM$Xf!vLOwJ><Jy_5{QK|51JZK|0$*iY4Yd`#py<`mSmUG(hp`cFprB&Ww
zcH_^1e48o^&)E4px|W^t<{UR{JNI__Yt7Ze)(a~?GDsc5JT&=8oR6R44^5gR9lct>
z_r-Z4=PlYDLPM3kP^918tV*$%{G(52;zh%|R?Y0m)KU=tXjL+9x-E;37I$aHAv$%|
zrH<)XKK<NVD>jCcwpbIh6?1tdktQlf;UT&aZ5}6tMOOD`N}eor6y<hT^KvrM8rI&!
zJ*}lIJXO)2qka8e6S|f7*8?FRy({U~B-5Jc=<6kN&ITGhJ3PB(pg%|>jF0f*<}Sf=
zv@<So>fTiZ3re%1s%y#K!@|<jE>7{|K-bc~s{D3$xE|zLRAD;-Be5K1a_PQi&U8kV
zw?}TcZ&ATZ#@09N2jZrBfeECk#A)wSquw8A#b-D_U9U0FyE$sIHAhJFsFpR7i&_u3
z<}Ig-x%%!ol2sqJzrgA8Muy8#ZG}RToisV}RLMJ}x?qlwV%{*Ri8Yo<zAM`c@U9!T
z@{mScW*Mo2GrdOs^6)DG7u0JK9X`KETB{DtCT@FJ@n9`~a=V!Cp=!agjgc=CRk7u>
zfy2cgb&-hkh=PNlw09lvABw0qn>Lq;*;1k#WHl=7R2k_vFYSM6v}ia^QZ%#GHSMl=
zgD-o6*4~pJb%(uK;ztbV8nCK!6>Qx}3ldbm*cxW8iq(T|T<A{Jw<}omx>M(z?9d`^
z{$WhF02X}<OXEzWcA&ZX@Uj>$1qj=HhDW-&C%JK+X2Oc^K_{AG^X<;(O*&MpmV*fy
zq!z<Bp)u{k+}itb5~IvxGWt}TIp~k&^J<u|%;spr+VBjCKD1G0PhQ5~-K=?&Z;9+8
z--O^3eROW=L=czwrH;(2-t<Ebj4O3M-zlp_AX@u+Glc+mCON?o%|sP&^wOk~sPpCK
zJuJH29iQM-hDW*)X}33DOAh00^-2{9;;F&$Aw!V|@RXtb{_n}u$<k;Q6_r%xL?DTk
z#*0?-LqWcCWY1WOYScHaGz}Nam#OlB$pT3+kg2XLa$1SQKK0;PX?<lBg~;^R6*K7Z
z+um+7K+g}Q2hsZPFCIh?{ehV_`kh;6X?4B30E#_16v5>0|2CUjRcf%aIm;dMp4UuR
zC0BS@+iaQS(lQ|xr4<F~Q25(<G<d{%3RZfFP{)1J%u#(6vPTU>+-rk;Hi`aiDe8E8
z5E-6N)}_``f~p8{R1h`Q(tHRvTWb0c$>->1_OV^uuy+)PN_j$(Peur1+vO!=oFfxb
zdrgmdJJ%X7gO;R)wCeSfjw-`__*mx|J}{0jFXd!%d}y5^f;K0wj|>bMGK&LIDny{u
z0c-fR5c`R%QycLjq{=O!v)MnWwd)S32jbb`6u)>}yF{(VVJdwf-os*JNv+u%N&3m;
zp<iPz4_8&^BMbI@ji5)^I$XUJOADaa!{r^;<@?c5w;Gwgn{0sFt0UEAhA}r8`|xap
z$@+6)Jbxh|PPFkqG8Ggot0Pbbw;7^K=H}E1>GH|qsUmenZt}XPT~I2#J8E?UPO$Qs
zftTNnGJ2vb!OEt!u73K7a&sF7a#E$Fe3(C6X!f-L4R$qIM!v*aBe1IC-40f$RnAf7
zb>k4C!2ynlaKr~NzBf6`)MJKDR(iL&n+|@HA?)r&C7UYAC0%=>QTaGJes+^(z9wNl
zXc1Iq(~CpWT{xNIAuE}E1=f|(Y^df4J0e^%o+|lCwrfFd>n_sBY1Z1v*TF8-x8TTZ
ze(SI-t?LeMTa6c0AqL>u<Gzf%2s_ZEyaSMwOiJJw$n<gu)I?B~6B3vHMRL&FN~#X&
z5@eKtS_*NBq;S5(>_g4Vi1arN61W;*A%tuGG1$X#Q&+G=K9&C^A`3A!CIQKczt+jO
zFt0T3zYlAgUEhThSydRh%=smak=Qi~B|{5+nUnN`qeHehU6{5Saq$Q8@QCsgSLXRi
zGl#WRfF7w16nuAp{X9`j<kJ3=Vc``0^XT&2a|dsj&e;g*mfo8>l94iORmkF23|C}}
zfO_$$^_#oiD_~*cb{rM12r!v`_C?b1``Y7u9W*3@$=-Hrpp~S7yR*RBdG)27t0z^-
zRsx2YIU{nJ)C9rKC#x7p!Vs-a>|-`~!e;=4nIkurIRKJGdmQD`=0D~G=IDB(yVzz3
z+EceM&|$k2O!4&-oMjMoJl@_Kv4s$!Dm4u9pbxvrt!cemn$`j?YrdJa7STewr)-3}
z%Q*^>S~8z$F!-?KLx5mvXY-O0=C6|ww3{J0hWm-aU-`#jh?$xoYRUBlNl0&l%5;3d
z{0fSmI8V%98Z=R4@38Hvensot-BQnT@O%a=NkilG2Q9+e-tLD3)X(xk79oDgk_7&T
zR^!NQ?KtP84&fiifPj501c41bk-Im&pZ=y$Kio#698d?yE=`yG{ZU-^GXzjQ+1hSA
z?^hn|{WCD{vKYGlm`4gv$!39~)4GEm|JFc`JKFHf!blKiH?g1dBO}V|!=rrr7drQ5
z?eyrF`eXL{wiu;mxa~;P*=p1g)WDYasmn{MW&LAV9t;CFoL8RN&n{aH0};jYm_x4x
zcekb6T=QAa(2MsJsZ9Za3U4VlQvnxj0SGGNjp$ND8-&hGM%{2Ax`HQl*hqGxyI{#|
z9=C+|&RjLz;y43{YDrTg_aZZfGr<!TBV|jbv*HgKa7~!yJ-5AVy=|*As9E(b@Kszm
zX4?9YeD_05jGV@Abu!Jg4!h3@B{6I_FJ|O?FEj}xVjISZJ+JP;>db_<YDTUM1-1p5
zC2mAQC1sD&^9O^%4HpOj5<`pC&`)XeXy)U*`xgVRQ3@y9uEQWb(0JXuK2KQKs#=Ly
zj*-h)L6+`F8blFG0TegipB0Fi9){Lp<gdX-Qw5*&Hpa@?{g7n4f|QyGF9`3cYxQlW
zn|t;0HPvhnc}O8#edkb2N*126fLq8n_VE=(jiG{1sP^5GC=<ouQjyQj>4tK4!vS=Z
zoRmyPgL8%r^HhqR_m$_&73f3jqAUZA{1Ya&LLpN@@95~VyQGUN6;^i6;ir&JUK%|#
z?!Ll$2ejVZhB3uvNA1$Gu-x=-X9y<20zLZ4XOo*YZnTxgJ6tMt3obID5gktXzTl*#
zeob2lwCrj3?FJfLP+Vm~LhOKMsVH2&{N9og{IYJQGhJ?h0?1W7NsetT9Hf3wD?YD4
zbvbf_>`7?ExFl2YxKk(7zKnF<KA2o#la7r$DzfXA#-=Mz^-DcDsN8EuWC0blZ!G_f
zdUrVK9b}?uU|Uik?QYO>i@BF`Dj<QmP4@jRv)v1;lNRsu9LYj9ISWftt&W7S8fb>O
ze<qXo9|Ai{6CJ#CBvJVW4&gZnv5J?&xjfI~95r@eM!~J|q(DR(JMU*;lV*#3W#m!N
z%LOS@X|`8XX<f?)I-M`mB82ehnFGJ3s7rMkS?M(7&B5glYTLWT0Yx_;mWiN2W>QdH
zpx!FLL&|j~3>F_|N*LR52tyuNLv#jXU$}=P$?GReN`trrh^+%lxj;ikJkD>^vYCf6
zB`j4xDk?UXsNy+4SeKGs1G+A48b>wH*so;$@p!tQDaTcT+jb}AMvjTIJ8pwtf%!jU
z@FLavY!ws>>+gC9)B-$wlq!3ZwD%qF(53G1xA({&#$MifLL4KNk%;mjr^?%Bd&3`L
zewXeB#Ue2GC(|K?TyINWvuWjG`9v+6Gq(p(CRrVw!!nyvGlK4YPD>)xV5Liq27{x!
zs761LLIpZX;>~J{;5ygW*!O#uz~rE~u5{3T>1cTdsmigPuaW(%b?4SWrevL}UD)bp
zYAJV_4p4{I+S_*&qZ9<YJsWD``x3pA9g#oE#R>reGb9DOEd@uf_(R{uQ#`h>I@EPp
zJJKZkG^TnI>zy)b)O8FuHIhUXZ-XFdXgU;gd)M&Q3_7icnU`^{+%z!O4g8=c4p@pd
zvLb;yNoSb>s|1<<sE75Ib7mUV>~`pbSfmAI?uc~KT6pUdTgkT*>F<qrrawf$wKm_J
zkatrQ!>Co~zwH~`CAc_c_ek_iFi&BHy>~3hahAv_0Wx~MxGytB;T@(D%wN1JIQE`D
z^Yh}l)cbt$Fv+-SGU3iZR8e&<uhUaG==_C2>!wKJeRc!8Xk%rOM3n!`kks&Cu}lTQ
zpgNaEuscp&e(Py3fcrTrPE}3{$HTt#PTWYZf?mg{AsI5_&7jJUSfEi)GqEbFg9b^9
z-dt3Don>o2CvU9G^Hr+ak_Y0G{xd$JGM>&X_Rk|3;JD39k131k2cttL<=cPcTtzql
z!=MQgKwy!L$mOlUvWW*^>R6wFxHyJO=7q}Zb(~&O`RJhAu#gqFv&r&-AdLxT!_L7r
z7-@ulj{OOj!BPg0)#hZdQ*}h;%8<-&?fR^#vQgfRCK%8>iuZ8bgJ(fpXhR9n@PX6@
z<E{BKW81wC7M~?iwxb^~hUh5h9DP=bEX{I(s$7fRai^2g58^l_2Zy+MIIaa51qT}4
zjc#Sy3X{>H3iFmL>!zV<%hlK>D!z6r+v91lHK?_Fe9rE8$*|hG)IokR3hDA(7Gi1N
zoNw?>NB*;Y%2}88OQ~7l*$EL#V~qcOSF0YQ7MrhRn2z<7JIHK~_^xJYsJUIO6IdrH
zVX<?qhm>YJaiE=M6kEMyUjz$84Gt$C&cpTsR2)@pL3O!%ZD15W(3tla;>9%9la^#)
zmnJ@Mpxl7VI$r>XdJ$X<HI~WtDDW#FyH#9N(10<CR!>Gy)Y2yuBg7uaR4XCZs85x9
zM1fDl&NNom;h(ki#N8u9t<PVG^ZT|InCCSmt7DI(xr9)vw!(~+h;yhaNgzS07UZ6#
zbsF=Ff>)pSq4vqh2wKs$%MS~3uP_6uMeYW(P|DCr70^gwgM!5i$<KVq;>UbKi{_un
zj~HXs>0r-P!{(c>$QZA@T)NS@{<WdQoO;nK<7I0IZgQwE#2nt5Nf<H!nxS6A?YjJ7
z&DWb!i+ZtM_8U7>E_#r!B3?KQa4n$tu~Y8hOcTAK9=$)08)a<Gw;YFcFYHz4mbH~2
zp0xF@BYu?2Po7AX?x>XWMMiQzqJn5}#sUjz`!02OLVMdtd$7Gn4+!`1c=YjJGf(iE
z%d+0YzP)!u*T6Ud_l-w`IWuZCT*7l9-Q1^P`XfF1&VufrtlT$F@G;~3Rw0gQ#X!I8
zj73X60~{7dBX8<;pH>{rM)H*6x9o)h1-tj|LrI~CtbJ>8{1nZ)H|$)uw`A4r;!T0s
zS(JjVj<xEC&%%3$h8rEYw_>#OSptN;O+ESE%tGcX)}a;C5d-MiuHw^p@Au21u`&vB
zuzdTd@&(1U+LU%hHz&I_nEe8a#H?#D*U5Zzm!IH%9~!Na06$Ac(ZtujM*9(hM+Zu-
z3#PQkZ!L_C$FVYsi$!Njf%X_IAN-;}2svz97nq4P&kt^U`Au1!hld~==<e)FrZ%uF
z;}Mq`2+FzTWEvmzMm8@)4o&;lKW1DEo_2^QO<GFDkuq!=fgfb6Zf6x=BZ`2!iZ0;K
zg!pp0ER#`Z8zvbQF}|QI`Fa5w+LjVovj2$0fpn==b6LS(V=1T=xZf5#%6&NMy|<2=
zu^uW!Q{r>O2T+}Yg-cMoE{R<`#%Q<8Xt!{1*@F4<i0=a@`J*d+4Xja*<VP?kN(TFX
ziDM_#-Cy6rK4W2EbFkj~-3BizxB}&59;2|Wc^UXX2OF>)V7iG`AsEnB%pDS@?h>@o
z=1do(Aj5OV=+<$!d=eFkZsr2CJFZlwb)Dx`NGm=@XL<}Cf~9*f$|$Vm<sd8B;m6d$
z_<jXHe)=t0omjZMwfUaU__qf=00XM0zq;#~uWYk{q1->w(-g_eYmLeoP(NyAb=sim
zw4UaXXS=hT6FKufx7#;3QzkNE{1y#}c8T$zCDo=v1=SE8Yh%z|tGTp1VG3l;fL701
zBaAf605%^9+D{*NeBwvAX7~f%N5Byl&AtZIOW2GKpK@DEnNxXFAH(a|uxhUqP%a`y
z7B4D`P}x(KAzoloZ+42;4OIXn;sw{B*%v9*Fx{KU<e6t`QNO-@PY!7O`b7Qj%{T?7
zuy-hf6maXCg~()cRD)-1B|S%+Js#whXd0pmQ71k6Tu}s2U+>{9z0WO(7w$w8QQ-v4
zVAWgUZDJJ-MH@bYcms`P+JstDxJX2~mZ_r8@qQ<QL_bVAQfD4_#~&14J#M8p;!fvO
zoM4u%8k{)XD*Z#mN_1KUnE*+TFjPFWfp#7ZDCiVqs+F*ON&ouQN2)5eAgmTY3f}60
zpUp#?6|O00rR|rYqT4W(Fs;x~n10M%m&;>aOaVS<v@W$t#$t5ru->7Fq_|9E@h<Hp
zzda;|wunJW0-zEJ{R@lm?rGh;8x}_4FU)(8pZk$~%%qvr)Wqk-Od}trQEgyQ&JC$A
z8d8)96LF?&pavy%YtXmHOwVZDufak9h9P*KIQZF_$`q||rNxwc1SIUEYb}DbkB(_D
zUuq$Klw}MzxdwhIO;&3Ar2{VtQ-%#`(Jw+4f@4>ZJbbaBQ3zHL9%j}$yQ~jxm?sW*
z7AjA-H%?>7Z9k&=s{A;~YQzcNCz$=1Lj{*34he^hc{uOJ@+9mk;EJh1Lo5WPz^{!S
z&3{V6laGT-5~%Mbc{s8UzTvkYdBg88fPrZNtY}8(Z&8^3Siuwcbl#dRum@9yJsmgt
znIOqBkN|VFy{TQ+aTVfd-5Ubv;6o{DJ_E&LD~sKN!>KesFL6tw$qh*MJUI&F`_cFS
z!sbzLfa4Q&T`K=_MvlmOEGJcpaB#B(Syd+<5&I_$9=e>q_K%M2VRLh#<;1%?oH{4O
z!{0f>GLsOWn73akD6mZo-|pb(3TSpxVRFNzZgD|!cm%@DP<p0reg<<OB44Gi`u#hB
zcdxG5zhZd!TvEETfVh`MF=oakZXQ9CjvF-)`KV*7i89H2T@S1Gs)`*{K4+m-YTqiD
zDmNK9Tc8c}@yoU7elt<rD$l$2CZQ_lkZLL6oY9l3tI&tc*)Po(2Rrb&rc>oyRvD;r
zM|?r?{U4-3o}te^lk=G01wu~~8^ApL_mmf?$mwL{s8I2z`!TU^UtM}QwoT^g=4;ND
zW+E8B4JY8E`m%5xG(Z%+l~7!)Qq}Gk(p|9p3@E-x5BB!oR4<ZcH%kuN>ekq>Uq2Ff
zSi~@R1GL=JItaTHF76$?Yo;%_Aj1rMhVPHZnN8X;myATSZEiMNbGdwFl-9`IIVy9D
zBAmizoppy2|NL81cpF_CD{X8>;A}9?Hy0VCZ>QPm^NH_w|N5J|t*39>NOocNlll>O
zPCYRU`|7&~GDmNxDqpp8HuLnpxv}zfY@2*Rnz-F*i6xU6;5bHt_wqrrDkb~pMInzo
z9@sO)@`5R|fr5zRT9PBq_JBp)6`#<%$g)d8Zb}1E-GxGUS9}0t{=C8;)+U;0fDSC&
zP>~YdDl(H(<s(La&CU$YX7()*hPW8_4B3PLOwrX2A~%d6g%9bUhYy@C#=}127&03r
zr@cvztuY>`R4{XlIe@wprsb{^j8gat9Jcjglb9@vA2<fj8!zQgrP?0yL$}cwMQqre
z4?;uD>{iM)@5+e?Jif*qr_@FS^`j*Ap)~dcjZ8{udoyX{`KMSEx4SG7>u|8ke!7s7
z6^D*a`|?BFCeBc{lfjX}1-t(Ke(1{b(iBH@tNsj+RZA$#XuqGSR+JNVwU>l0%|lr?
z>ouPv1QEB)4I!t3Ji3HgCs9xX0S=DLvPYOIB&9SwmpY34{^oGqGCtoP<F2}d`2xaK
zd^u?fOrPtgoM&QL!dhg}9ne$JfHn6EHT`3o!}$}cS}zD}TRN55$*UM8wEa8mUnG&(
z_qm<V5A+%Fw>MsRM=-3=#ae!6pRhaglk_fbyE2Uj-#$a?&a`TVO+W6sn@p+L{4%F3
zZ+DRK^Wvd%wju%9_beLUE^5Br>i3{&cxMP(Hys)QIqqb}k<D6Nj(aLwc?34yCoHuh
zFVA)1RYl%X>^SGKb<P*&L6}kmZ(%eW+z3-es#)!b+LJ5fA=kS`xJA;!0?+pbld~$g
zk+q7tdb*$Po+Z7b)twPkjak6S==4>XhI^4?oZQaSfTo;@I)#=3AI!XQx0Olew#Ecj
zo^d54sl3Wj-WBsm$-C6^Zn{HMX+smkmyNq_;VPjVoCVKYJK}eHk#3k!DkcfZM@-!Y
zul{XI;ef`g=yg-5PRs8=TU(&X^X_Dl?X(UrR{MN?%-?*Ot<>TT#iOG7^2@E`oNx^e
z&2fw4cKq#B+FPGqyNr2oTz^8p7c5@+f;wl%j$7)W#HE>qB5RdQ##?~pyh6v*qnXC2
z)Wy{WiLi&PC>1QybqUn%c^f(BjxxDjpCY*z)0cX0xzX+<RJ=@{oxI-eI1lNUdEPw7
zYF)u0db)XA<#EjJaqctf=q`R|M773#o&jlC5-6t9jz>m};G@6tpmAs81@S3L916$9
zbV}-ErBo%aAUzSJ7jfopl?cj2nwp{S3!CJIo0?fk?0GugtXgzMYpd{)oZ6OVrr(t4
zH%8_F|A#=qp<{Sj<+r#8W5;_}v2Tavg0jfyf4%Q4Gj4Gdjfl%`Cup_6*eX)nY>eEf
zO7HM3<5EB5+M1jHRL+#KyZFX!GCR3?=wl@TW7o&K3zT^W%51F-{Z+6R$q3A$G}FNm
zP`bTP)4ZP=6EOq+6X*2+B6(XoDQ$?52V<dp2Xdxp+>O}b7;8NGx$lguc02E~k~Av1
z3DEgkZ4T}+&<gM8L3Qxrv(xR`H?WBwV?{ew?3F)K%UypQ7mRx&XA39waHj8u)Ga7V
zWz8pos(j47c)ERmao)H9TBZ45%AlE^mZ`?u{F>DU9w-wkBNtt9C$&4%GZs+?j@Y&g
zXB<pB9J0D%H7E{BH{Rn8GtkngY()AheWZ=gk9W`+S^eBBz}TK*4(WUQ?SxU~6*Sa6
z+VZDjsmDO4#7)mTwD&r$<}BlYfoHU#4t0DII36gvpk428uZZ97!qqn%gAGDH_<9fw
zcC^!tAN)$^zH8|#;1GcOZz2NBz6RZ6+dS}V`RFyB%AG|uTY|uK7BWbMtdr<#5N+*h
z6$L8_($AD$?(ZqSStOBoFcjQ<&Pc`9p?Otery82D9Cc1a;e1F^e)Z=Pbu3!j<DuQf
zeCoMRSF-b-+}_%=p!_(q8-<ayie-mv?KaXjSL^$Fpi(0{GbYWzxLfVni@*pjQJd%>
zxluG=Tg9<>wVl>I)m^sdrSH^*gNatf=rI#Zp?)VRUaiK{1A+|KWxH9{kzsuebt=j(
zDAJsrHywh=cNH$GFpR)<x8i(NddKmy@xk4HT6fc|<tSa6K6H5!Ql!JvmR1-Q4^?FN
z&EDCi?TydR^>(~cE^A_OD@&^P#^G5Dy3kZ?6Gej!uH3M0#0nVTH@IK&4a9VstBVX_
z2zHf+yC2zF<=551FuMIhF}9CmXUC2%`({9AiBJY>ix>O~wqb2!6(8CHqPdgvkR{zC
z?2iv$Hr6ebU1~Y|{CU7D7h+gLy#6SpHedTFhvzis!h3H?Ga;jNR-K6lw{Z?nmOzoV
z{3vc#?~rFgg++_MK6vM=ZEHaJC#b~iS0wM6AZ#+3r6z4ucJ@7f4*V&I-ah5i#4W=l
zfo$nG#^WxQzFBS+r;L#gUv+JoHWvEB<k^zPOF6YSZ%*A@punRvR(r7zsoUUS=u&Nz
zfl3}4&!-#+Ne@bE&~Q)Rpy5Z|K|Plm8$E6lbi(naJgsd}UaT%i)qJo`rM+XjYmDW8
zec4z98@y))X`tiDHj8tJZXX|AvTmK+LiHY+AiDTB!5$Sh{u^4Rv6I)KF*nwo3(UP5
z@93kbqil$|mnJ;&L}S|W=F3EbSo20qgh7XMz~)|H$Hf487{*rd69*?~=+5?|f;?)A
zHODdkBT7B@!{YYI>-!XQ&5_i*ReeUsOD>G-Q{CJyBgjUpzjU_{5W~6d>>3vjsoDL_
zBcWjwM+hM1IYG{Q5j%FgEqS(O+ZR(ySIwAAdY%|BamVde%|l{w<#1b3wkVS?qML!y
z9xhE-2)sJkUL#k`V?lT{PpUI)?;K>PQx>d<M?S(B+rYndEBjo7-*LQnN})d)wk?HH
zwt!{BIrzYg<(ki<CN@PDbUE8eA3QGW;tibfxn>Cw9EEpGhH#?8+TuNZ;?%qiO0M4U
z0Nvx_ltZu4+8B?}jFu9MS>4pq10B!<C*h9pAkW0*EDa`M?@#-l7QyjgZO^g!=+U<Q
z<zik~t@sRi-0A*8%8Pw2R|Z`!%$Uc%&Ip)<V_FGOt}GndZFn;k&~H}{(0wIIEZ3X_
zbH8xDd^kd*-JM2ne6#lbVd-IR&v81l)u4fPB0eqT(R<$LL-mpnZ0mkBW3d%(MRa^d
zSu4t#s7`ycbW3oSgvG%_yLUMjDJw?W5Try8)RzF8h<i!(zT~%azcHj3-jAV3@|QdA
z2nC1d+!5)U@;h~r8Hj?Y-bsOyyGU>jqQQBo{V^T+-7$GG)Abd~0Ud^jzdR72p3v~n
zC9t0V+?igHFtU4BLtn%u^pt6?I-oSFC_WAvjgvb`mQW=>@_JfPN*apdZ7pW4VjRO?
zc`8%8%Uid8&cn@a#yxwf)>n@^tJ4&1->SPUGk^<AW*)8Ps|KOC)^;XaNQAaurpt~}
z;1h)fb$9XGsV?icvgpPpEw+S$VZ0T0^u*K{$1;!9!Na?5;-9&CHm(9nQ!)<|-+mo$
zYjb-vC38{+MDl}eiC_OnH$G)i`xS)8^HpqjY<w_G#&!R=vXfsuhWTBxJjwaZkrBl0
zImQthE>4jywp}~kPbI~7%NHCl*{(eujg}020<Q=pb#`)2X}2|;&%@)I44#K9WcsiB
zF9$YSGzEIH7!=kfEUhTBA=R09d1PG?cXghISHyiSdtgi-`Hk_ZP<$PnB1)AFwBa_f
z^mKah>D+O0yGd@^Qg?Sp_zk4<;5G?qTP<c@Hcaltvc&ufe{JqsXvmo}c_(qiFbdeM
z+H9qB9Wc{hh>!pp8eR6|d9o0yhA9@DTMW0dU9NGiFY3AB=4-R&Ez*Ee#EEvp51;GI
zzBgH~ipnpV69S`6t6Qj1QYI*tFM8k`@}gM{VyBi^cv1xOcBX;+!8wV`pW<sg&5fUz
z7TJ63-Ex)ygHlaC%EX@AVsx(?DFvmkT=i(JsOhBqDArl=dH&|l&Bbcy9=pe~E}~F6
zy*Ivl@fe;`a`ll<StuMoM{4(Vw^*|8qjgQz$xI3l*cS-cu+tS6&5HDOqb<$5MKL5d
zddw){pf|`wt$XJqV~MJU16Oz=%P-9yQonQg%o$bXA+kMLLl1J?e5oPHE9=<SDvTZ&
z_0qXgTDYU?+<H~+H#alzSU?`jO$tnI=B~$TiLLG(xwU;uCf!-ecx6B)|L7Crp!vo6
z{0iK_y<^Ab&djoNDpRXV4K(L!3${}^|F&&67eHM^zWv%60)HP3oStyhj-Pn6)=63B
z{P?xdBIZ6m;EeLNNz-U$(hNUpGGR9Dp=zxCXBStzZl-T-UM7oiX2^M`86-_#_=SZ%
zYgU6`VMnuVlRgb5kfpe_s=cowvNvd@GQaUGaeT`h6izUB7v&d8pSMl^paG$*?cy9Z
zu;`q(E&Vi5-uD`^=MXvQR!bGkbDOQe_rnfmftOwqj!i>tQ@uX*oy6%Lb;>PIhesWf
zPmF687uTA6nX30=B&L;?p;<jeI(^>{|HdR=RLU|&&1{%Oed%VE`dl|ePcRyiO!i>7
z)v8pslBm=#Aeu#DQP}f<A#c$r%U_Y(Si(YSk~7bu?@k=}%0~6ZxuZ1$2}9$t1e0dX
zsL?X;Kvu5{o&FMGRMBVZA%?xmGM=@eSBgvujjdsG_J;kQz2c>9cIM#bhsXP&WXp_G
z$vi1=1&1l=rGWxuzgv>poM-#@D&uXst7+GpHG|9sxVJ`?yZBKyC_9yv{>w9Z-A;ED
zhi5XJmxfn;_!Mawoi)85;}7n4Md1D}qZ6aK52}R7S#X^#7s)0Ftou_}JI@`~dRX)+
zptOc%+XbD#1YXUwccxfCVBVEQLQeJkaBzue{*lI)e2M-ny3($LL>C$%H7ER)QQ9pe
z(sZu%STz@%l`-9eh?}9G)TItHEum4Em3^AEId4xprW^Inp{;}CK(>9~`$oFyt36ZH
z)97SlrUGY#23XrXRc^^gq&al3!80v*)+!ie+5&DdfUM*o_iRv1Etyt>xOFNhk}e_I
z6$yt(2U(G(p;-9H>f@*E)3t6QC@U%H@4gyF_TcLA3Tm9(Dh>!{4it+k7=eUkbaTz*
zsh%b>5)REIwC%Q$h+;V;t*1AhP9>T4r6D0osYM!FSI@{kctt;Me67a5jdP*;;9!Mj
zp-ifqgT$_pA80bwDq^K=IewfX^Nqq(06<wA_yf!aM?VefO7ws>r=k%QA6!b3IQD!M
zw<kzFnhz7JN00m+K5}rZLGX_YJ1JO~E->wRTl(aXNTULTeW=Y{Ts$lGIj~4~?)5Sq
zhu|lye=bn4Jh*A!AX1D$eJm^}cF^LFF%kCa8%Y!JyU~WNFfMMP66ZXrEj9Ck$yItv
zvqVlMcI9EDL2TM-w)IO=-S<Gi6+8Op$>Q(S8^%uX5Xt}HjYHO*HUxTu19d|xc%~_|
zNjgE?<>>8(_;hvuj}84W-)kR_O*33(iGYNg1V}+16kd!!Ue>M<AEy5jP|q@Bah$j*
zhgqQQlC!51xLRDl@nyC=)<1E5-H)<p^BTxP1%YQ_ESyOc2RGXy7=0kd5&9~2dl;;j
zO=RzJH$CTh3A(y*^p_fr@rS){!mhmsQulzi*v?23aSIJvl^8+w!30;3=+@V+fnB^v
zXr4*bkf`Yh8(!a%b)YpXGsoi4hDI<$P_DSFbY^%aJb2&Bq8=?SRkOz6dlD7HgeJAk
z_uyH#t!ZKa=>ixOiQn=8;Ky_!T}AQa(BDGFw<3kW4BqOIkHm}$Ir5JUl)u`M!ubyb
z2>8GYJifyhsF>*o>36jg{qZ6Xoj}f;hRCMgIFLf(hdt`n7l==WN6ftnwx7kcS}h8-
z-2hg#;z>p}D?Z-#WzWN+3qp~ntr&ZU!B<3I?f#Sje=?qk?M74LZk0B5UteZz{^qn_
zcNwo@plqsHVYs+#hCE2Tr3ib-rNq+fkTzF2%M3BDu_EaH4%qi@K==`V(Q&@Q17R#*
z(8r2u-FU01HzhK-WjW9+1=$t@gQP?Twj2gt*dp1Mr6g8z_L?9O9Ab{#<VvfJ3}52H
z#2?>u{R5J=o51mdxl491wqN7xnTn2pKl&VMMPkVepTc?-jK?!8-tPE`s*bhmPM&d;
zaMW>r2%=cbpueeEf?|EKfWmQzKhpDFO~w~-YKo&4O-lCdV1SCaD4^gZYC`9r5xx3A
z8aFOG>?BWdl1Gt4;R~fTtZ;Bnq)C2Uc-W#jBs8^TMT>^t>Ma7Et?7TbbVc9VOBh(7
zX>1Mc*3Aozqe{R>znGi)is>c!njbgVG2Rc{U>h%tgDZ<WzMxHgUFE}N1Z}4v=s|QW
z9uB$HfX@R_N+XP|5<#%S{M5XDcUn+&98@jqEphp^&T1mQZmUN{laB$a+FE-Ks!p#m
z)n>|FwD~NHuF;c`9=2xSp-Fj@_}tdjbjSBry!yKS#K)-!Z&WPBNSU&FFJ^yMbH5R|
zAmTj6R{4;!6;*)J^JFC(&36{P6W!4@5U;FG=a%Ri{jy??@Ea1^W5aN@tz3-l!>D)W
zj{<X;by!xMZ7lM>`X)S<2V+V&A1F;f53j)N8ek19HL_4i$((VleT9X`Gf52LDcfTM
zr90FRd@-9ntOaRx6etKbOfE&MqLkaZP~jhZTAcFv=~4q3k*&i{B4E}|1y^yQ2AZvC
zllGYhV3+BryD*QLnFp_({;a6P$e*0%qA@?`_3LuiuO<OF9atPj2a`vcL;A+mG?xQT
zX}LE{o403Rqe->lS?O?<EpG}w2VuE+w3%sh#lyWgca?^BN*={=_|k9*viD)F8gRfg
zug$;=P_Imc!f*D|Zwa+!aX2$7E)gfI8rPVjmZfEx-Y$-|UC$^~{%kXKC0AQD;V8?$
zTWH8V6*I`?PWgw<>WsaPzM!<Q#N`QVkjt9B&M;41+MbP^-HPs-AHS-gymJPbbJXmd
z4kkTI;_{lLN9pRhEMr9vr`FuYNSv*c#FCUTW_wVoF=;pkE-y_}NF)U&&B7P3My;<n
zL@0Hd2lT0OfDW2TNYX^(-1Z(}`8e!q^+wpDu&f3fMe(mJqN!wF!{eLak3-s9a&X_g
zw{~RK|9%8Yh!y?ZAm3~yfrI{Pv*w*Kd2+n?QKep{l2~Ll$-W*W1Y5dT7#_{X*dFYx
zX4#>uTBvsei5roEM|G^3G*_D3<x+AvkmwrU)(wan#Din&OXksjv?3OD?W?tniK5H<
zNBx;iK|8Z+wo<ZKWUE!K+Y~uuI$z1;*jIZ)lQzhphkLHkggUF`cwF;-^TmG8QLU^I
z2z^9$XvzwWEoClHD1^9d9f}|JAHJK{)+;7BjM~}Z$BRVqq6`MOfG4QU!mNK1+W*9t
zfv3QBJlpDNg)@GGS6%?ww?>Cp0_vkqBV@^Aw@$wx;Eo+K$S03Wb^OWRNN@hJCdzAO
z>-86hsfX>fZ&NTlVw6%-_7b=Av=Nw`&1T4aYN3z@c#oNRIy}9iSDbiaKEks;pI@<9
zP>L<B-T}2R`;8=MAJdkM7(kfze6L|CcoHBLagz|52uQbX$HIubd}zkpqUIQ*e%TL}
zFd}@f%pt0apGT%z^8rvoL)iE!R>)-{$Wz=)(hc)MRQbV~v?!BxFrqBWqo0`5G_gi{
zGK-Fg=rxrYhaB!J6TL%H5_!sn1jW)A1l{5j3Pe$nQ-wcAw!;;`4@bSSXadP);Bm6L
zJX(2DCB-27K+jzo;&G);;xL|?AT3E-ghN7ino{mX5ci#@ovbw6M^{qIgeVJBk3|Vb
ztZDe?G@~Z3a~DyrH|=H|m#16;wO%iO<}UvHc!f}X?n}i&mi8r1fq8OHjSiF1ttQC*
zHbryp7Vy_%Q8Jp?d(H*%5-?tQ%Jb|&F>+c@f-miEmuf+4STs{+Q6z_ToYAPlJr-uJ
zeb2Z$N=W(LwO;3RxbF0I-nYJ|68evC7Z3wFrFA@Ak0AWr&|?fP&Z5m79<nVtq<@&H
zKsq<0dTe>Asef)K;*zWMG{@uSygU5C%kVY8+91jNE>-q{L5iIhylC_qb;}YEQH*-R
zEc+@5@lTj7$WZ);K!(nxRLMWKfT!qjTjgGN9I<Mg#mu?H@q^xkt(Gzzs_+E9NSd|p
zmBJTNI-8^ndbI6#_r{M4$3sPB!G`NTkLWv_%Im=0K}&&%>{wUs*w@Z-4@Z<_akqm#
ztu*1qn11Hj#R{q=Y`HDjMimybvg}cNiy~`&d{l$W*4&g-h#M*{&(~WM7VAl{Noi?s
z46Q)n8Arl+D@W<=M-7O6Jm?_@Tw_Be{_xRh0qPgftf_znYp%{=Erwq{{1!BGU0;nr
zb?KTuU;pDjk*`K9?7*<`E!PH`IVlOSb^`-Kr3)Q=(<puRu+A&e8?SFc8%$$j&>MFN
z{Fo*IPh30msF9$RHIDI6*zsPnN4v|mWW7CZ3O2PzA}%-RRS~B59Zd1rj|-MUEza{B
z1r$JVje5xh+rhMZV!3t>E(dZCL%f|Lv3-Y;tVUBFi&^y61ct3=8QwLL=l2eA18iu*
zSRgoKPOD#ILZQqi0d5!=w??hJ$k-7hog)rXT7NITZqG!LLl>7fqj)^YIiJJY&zU#^
z4R~{<XuxI<AZl<~kW^84<fYL5#a8Xv)luEB3_EC=`q&re0tL#O>Alaf7GFeRY#jyB
z=Ej%o1vyQ%B2-#R<~O!AAw%X$Yi4pAt;5v{Ao{<R&mj1~SzF{VQ_uHK2LDmjKVMnD
zfUSCLr|9}v_JWo4RTgO}vmnV{2^0M~(?L^kl_T%!(K6*m85ae|D%@tW96MLcLxpji
zWd#r#o%)O951vmZ=PaJooTufiUN!I=#5y;IzdWGfL}~S)Li{S{F0h7Kd)PXwXc{gY
zwO9Paqnm@Cg|b~4@?`wAi+-z#hz=0M6T~N9wt(8OBzgFuO(RSfqyj4Wf{_#+s%mSJ
z@xw#R)by+7l=Bo^GqfQ3VhA|H17HH>cnVnj)A@lwnVJF%mCYehG)<k-2jT*XL^ne6
zoCJ#&=mKK2&C`u_ox<MuK|o;9%ACHwL6@LDI!H19y+Oq%3gDL#^reHTzVW+N{V%p=
z7*m$55(I#}CPLU!Jjj#ukJvt|jlNu=yrAu6P;^E;hlOsVY6kJ38=}2635v~Dc_pS*
zl210L=tOX~Eea%_q=yZf&x{M7`PIb$TCGd`YGB-cjhef5rlupA6i2lymO>d$blDp7
zM@|WEWSpL$mqdTyP1+Tdv+X(tO?t58WDP}b7Vpx|O{vguf95Sv2UA+tv*Cpolrf9<
zx4RI;kA@7zwm44~eNkAi!ImjaeHBUMuK-rTHm9|}$2@_4Eim^zl)-ki?M}r(wx|0P
z@xX|AhW`v;CPz>3aF**uJ2XpE;jc!T@=F$cWAWk_=Q(!6m#(?YFz$|-4zAg6smeJw
zp6d@On;^Ffd$%%3+dw_<+4&c)Q=AQyKGb)Yvuu9@GN$Arg>n9Iqpn?1RO!OIiYe*x
z25w1wSQKSU>K7Y+o$j2BXyk9Fq=6`X&d>F5f?$oqVzrtVO#)2$6btQ$iTYh+2)rM?
z#6?Q`+_oG6&kcRSjO`r~j(0A-6W9uxyP*^}_-{K@WZ^|vXPI~pzjJwBLc#A)jc-A#
zr782efrj(-QuWPa^JK52{$m59{UpexYdZC4aE*Ois>xEjH{m8|h!1K<X>^>F$1m;7
z_v@k-i%K_pS}FLB$$`4jZGh*3WOj=X?{gqq?{B0jL{IpKp>`XkKVSL1!ggYCZ4DTk
z()naTzn^2@yoE;Aw#>O5Mkn$Qj1>K#z{36fuc_AN6Et<ldw!d0_CR(|8_K)nL5QuC
zhooE^tKf0>{^ZY-MT#arAI+zk_OjXc{XplXih$^~H*fq#pI)Qryi@ifOIUN~S(0)G
zbg_F*1jWI@pooR-XTP^s7ZF0ZaGztFd3@!ln{@Uu!*QQH4}`?!u;8wK2R<XP%2_N!
z|3$9*rJ{JYaenpaAvA|}<9b*)(O)K}e3JCg+qv5OOVabh<qWF{kQcho#`0w#bDIJZ
z_*^;(Ob{P5HZ)}j<5!eq=sC2y(*o+#R;J!YIK>}7YOgw{_JZzaAdnp10EwutX%ho(
zpAw8xV^WXg^T1u%*hKk6OncfSs4fdO&Fd8bt|=3|NdNy>d+WHUw(oyf5CajBR*;Zz
zfB{B8L>i<Aq`Rd<K#8G26qW8qhVBySMkS<E8b;~v&gYEx>P6vtKi}8$`|mL4aL(Rq
zuYRw!w{SI6&U|4PI-n`~@J+LjjQf5ug*m-cKct7($p`AP=avz)E~zW<;0H)`LMpA<
zQMAI<m@xi_4KikeQ0wC=x#oMVV}WR;2iJmL`|6El_N;AFF8(^w0ClHoDtVir8njTS
zuFnL{tFE;<=3Q?YDvia&KDA`W+V7c$3TWi{!SV@pL3$Cand<X%w3wAT9@Da|QLI;}
zCq3D2yaS^pduQVGe7$w+4DsS91;i`n;9?%vFXHtZWcdA0K;(?#T`oYVoi?<t)Qh9f
zJ(8DhY{8G&Bw5Ae6x`7K#C*HYQD-(WXy@yFrL2(GrDpoHBN)IzhLbNftgJ7xS<PSJ
z6t64Hfp>L$e909)C>LzQb!yWyWK?qth1)Pz<J5R&rciSuQyaw^Ri`)K^f-?kn3Ps9
zg0-858Ovb=ekTo*?-e8cSGe*-B=Qsdi5^$zw&WF`cniu0K?T${UXk6mPD?9MkyCx)
zMP&lOktI{T2amB`h3@y>90tk*^5<ubId{<owl>wc3!)^X%D2@|?*kNa2iO6{Saajo
z1@^+3aSUyt&7k}%=l+k^j+_)J)mJLbg3e8o51K;l3uMmfFP#PCa*bD%nJ(QM(UM?O
zB5P*WFiVYJ+?hUT`)VMRS!udJi_Q6%#v(r`u;TKgQ{(^^lkE51{>-Sf8*LIA3(f9v
zb|Z_RM&$T{*_~Z)cb8AsJySTrq2$`VE=&A(i?c&|GVI}Y=_t)mLe|>i`Bj|Z<#Nm|
z%%y!2AXdMfE){F`PHOy8Rz21<*=D#m>D;)NYI;O}r~<Q^;nU`<WKdOfp($5*w!*@M
z=WhVp_vt&70qn(nh+DIHGWp3DjlNN-nqpC^`A8}V8hw+63H7oy3rgtQgq)c&bC`nq
zFS>8ya>SQrOVuIUZejv4z4{AqJ2kX|_ii{m-*WDx7VY<p^2lV()Zf{kvh8s6{fxyr
z*b9E+=of7>uMP-?6+_CU`z>>m@J!%Wdz~|==mrN;c2KEfw5Px9C*P5R3Jdb$aEnRP
zsm{OM@eMUIdlk2bqOE}Ny(UbBK8&mPo3{?i<rzsGuZ>EReNOU|)a`J-&@v~*MQFHC
z;1B9mbX!@q_0Z%gX`Ef;`8IA5zSkgS_TIwfQg#nosOBgdekKnuf3oh#T5euyVQaR6
zj!KGA6ui>9vc|cYl)fPGrYx+5fuc;RB^%t62?Yuss4LA~;zw(LVX*HJgBv3)FqiNO
zGx}dPrYei73HPMaFdN7!CcYw3xkSgwUMD84V!l}E-fqBF(Ws8|i%RbBywGA6>lrHP
zVSx5Nvj_afD<wllxzO}|UJ7iZN|*ZXDk`gz>;ijCc#L4j=UZ-K;a;x8jqr5J{2b}|
zRq+9(qXmvBpe(0f7YD=Kwl3Oa2R10B(4%mSXOSE=H+O5>m#4lPQn^K<ltnqBjxn3=
zYb}u##hS1=DnlZIfrV4KO*5UZbk#d=-3@1Wnpvr@r))cE$$ZoPN2$ii=%~yS;INSi
z;j^~HIjIwX<>5;ueAgv8iA<>GB!`i>R0<k60mdT>JIWy&ZHue@3jm?G0d9FQILvs)
zxxo%cdO^1_)QV~qo?&u3YR8sW-04$_-s=a;ur8>{7qE`G%90mbZ-)ZT%NVX;WjiI^
zk+-_41AOR!@JLky08@!B$;6bVV6$2&zNjjX+J0MrVl2x3sKNb5-Bb@Ir*WK*h9adC
zm#Z|Y=GftT8!A%Y!S55O+IO2DdI8oGyV#`kVel*g`;7<6+yO`-m218Lm&51V8~d?W
zs3!|x;1RgeUnD+)Mj)zj!#2av9`TI<arqWmh#UnAL{UgeCx>V2hGUi-cAZVqs_@EJ
zpd1L7YX$^>;W>=}pd*$IpY|H?h1srW#ntk`lUW9*t9o$^7dn>Gr3@X~$ol6gfSiKE
z*e(h~kJJmt^32qJXD(0C-SfsLh5zqG(U1T#8H}(`%8bA8-fy%2{%1$%1)bbu#g;(`
zSNL_FZ|0qsmL@%?FD~dkU&}Y}N+B+NoHtg8qa|IUBW5(mI!4Sf@C9KmIDIgjJ{82S
zt>{XXXWwTKH2?%f+#<BmowfxbB<on%BZ#kek4dM+o*t9g`GE*V(=83U;C7DX_5{*8
zHBccg%I~gG4p6z|n4gq60_49b<8UIDaI>>q6T@X-!)6IeOAB+l=4!M+Dz;ST-ft@b
z{OOj1)GSKXFSGu8*WMW@Y9ohd)qz~Q*J6@illS+s4GAkyT-tL0F=xb|ZpBV)r)gJC
zb$}++<FdhcMBkd+u@{leoPz39LCG73fEqIyS*?m-LVUw(jybYLUwOjC>PjJ*SBRHt
z(U@M~M8SeXVzV;Jb{^)?E+)O|H?4-%z4wJ#H^cKUaKc_i9WdNIqDLp$1g-kA%T!9@
zIM={;H8?DWB+}X!j~#la$!wDLhO0pJ9kg&=2SlE%%N9NQg3Ie^<%LDwv-N5Y<Jb8)
z!kWGRg>A!57D^O8EvFC~2xNoKbj${gz7y?=4)-ERyRO?o#kA*Zv)Ou2)^u9jdg)8#
zl%K8ZEB8gJfCCdS-7^a2b)7HmgPm;osdV9j2AWi%gw?5*dZ>|UEN{4#7GoT}(*y%`
z*bpn}UGrY7I_9brv%cPf%d8IpbXLxj_QMM~K?AM$sqH&3J&s~;wJALYC$>yp+6MwN
zHDsRskiuj(difNM)C{P}KzES(YQm1~uGjqF#wcad+KXc^m14!NxpkwuB9Bpvw`G0%
zB|IXL{GQZU;$tJ#`cV~UNWh5j^DA^CQ;L?iPnwRi1mHPeAGSwJi8OVblls#SQM~j3
z<U?K3_bOwrwvR|cGOwYJ*C|Uhp_fGp2;Hy~mvi8D)@=Z~m%UcYf@0M@_Ci5op>%;=
zeB|CETp&k@6Y<n<=QWA?UG_R=IUM^fW-*4?&WR1P#oh^2uLbQdnR>o+`o3Ge`-_Tk
zs*;HU90%GPrOiDy43Ls*fN#0e0H*=+)||13VXD-Y=Nt8PbDCN~#%sU0V1G@?8Vsap
z%Dya4=$w|0lS=%%u-w)*SG#Sq{U~tay<t@0SG_M!(j8Zf@)DxC#t+9OgTGQ);P9Dz
zQ5S2e9_Z%fz3w`GQ7qe$mv^CMmdr5&1Sb|{jK00Pd;A%bwalW<H0dZ1mn!e;mhis*
zwW4ITsax5bn#ViFI>Pta9>i#f2`{sheHf2f<?B7p4;R<e)LA{>W>}O*$4)KPbndj5
z5QmG*%z2t$6#pYk<xs)*uGmQbyaU(ybC);pP{2h(b7Y(<Cd<6)0Gd(@1w~a@muoFn
z$^l@eZf%B!#yi)Q04;|xMP`b?NLZKGB>NP)az;X9i+7Jh5M1CxdY_$6DN3^h)Org)
z16rS~)0^GQ4V=<3&n9X;nwvD$+Zm+FG-Q*NF059{3e(JYan^l<L7Jc0*~4r3Hc7Gi
zPFN&tPq{HIGk4Sx&|#XG;HpPi^20V9nV(Gyf)nP*xS(^S+$uoTB5jy{*;J!NQg2)M
z;C>2pVs^bYhrW9-T0gzrx@mE6a9aC}%J8@wC|XQ0GrxwTry@AMp({o}R_!7BqjG+v
z9!IA4%%mC?fOs|?4Jgx0cMrCwII>*D<n2k%cnr!kj_1jE2>m-N%v3C&zEyvxPt$hW
z<<XXL#>>*jZcoaYI^>Hl`hy6DZ+pJl_OR#))VUf#u3FbdL5I>X{9e85OEXoad6h{6
z(JieOw_aHD+wS2KX(WbzPi+IPx$oCA;+j?#?p)Vz?5ZH6{}PB$>atW8E2vcZ5_Y3G
z`z6-~U~*}tpJJY+PqvH3I2h}nuVY)g+zEc;smLQL$Wnxuek5KuTv4DM8XNZZ0gw*K
z(>g1AW{Aw5<&R+Z{*gi(cTsdZMgbp>x0Y?;D=BPJ9WaS@XhEAn84jbkOYcz>JnM4&
zWKCX$1mzZElnkKXPH)$I%9y?CKuN9T{08e?a7qvv_u#jA?ccYWLHax*$P?f?ZJ)27
zp@II<JKV}O#m!8zF1-?+)`^$EKhC2%wU&VKwsuCToYz8CI#W^o^td_{E)=Zr1<94o
zwd#)i^PS(XCIFNx=;eq1_5uF=Ykae)Rb6lHqW-2&{^d`jNT3d+Kp!{c!C=OIMJ=}V
zsr~1Hf20utd=uG%ng33DA_>Gm*m1sVf8SJ<M+%(<0AF*cMym_mvHv>U@wbqhMu-oR
z%;NkfljtT1in~d1<Rw|0WdISCtA<R0O}ADOE){u4@1@dcmCR5VN{ZM>POrxmwpq7)
z@5JpeLj%lEd28Y6-L3%+FvN>{0X3`uXMI0-_pHbs_>Bnf?!{%wqi3mJ78`j9+oaU0
zcF&EX7!^5a74=>M7+T20+RdPY8K%^cuYy}Bp-I^N%U3#IRH~0;MSTfUw)O}2N`GS0
ziMsJA)!`Z_oQnp?`3~4==Xwie-ik5<h4at@HJ_5GBH9NkV~BNPovZx<K#6tRW<=m4
zlY`Wt(T(4*TL|Ix{(Vcs%E(PBf?-<45yG*dO*?cFr$V5*5EKQW0l-_x5AgONPBc#a
z1yEHk$X0vf9xV&k{*uzRBF0u*Q7e$?*<IBgfGznk8Ck3XN%b2*j(^m`XHQ~b>}@x*
zoQ_ato%}X)p@|;=Eq$#JY#IKx@CP~J10$^fG(h4{@@xh^31`|-w61WXlb1nJSAT^w
zg=9NF>OORqr>Ii^$~<5s#q{DWW?%)KPv9Io7lC=!haU|OM66k*mJC3807EelzYo3x
z6f;Gd4ROw5VhGBG@_bC?p%Sjn%h81id<Q7mtly)7G$<OSV#8;|c?qo6Wu*}HsAk`$
zvMjy|qLXbOtEWmTwBqP4(Bk}sYKj4<Ca+~wQmQ&Nj6!?>yRUq2;{L+EVKuYbSYiV=
zI$ar56rIfV4d5%hot<*%vyKhs(v9t7)8h?nJkAW#3lp_&gHQq0eM(wHNL6EXA*)}}
zbHKH2;t8Ok+Klst;+{ta%rq-y2zDP~!i*Qsrz=>gIYOs?;#McTg3rhuRxiCgo%Z=h
zf@a>L0(~B0rO=6vgKr6hY3Xz%{{r;&UhS&@`W+&nE@_pz2~cy-62C>G8K8xtnyZl(
zW+^@qx&CFhcX=jbsu|(*>00R5BZO^+^8^{VXKUFZ>xRI!fIShv@QuM@44c*3!6vhP
z&nxQPpDC%WYnQp^KVt%kHu*`S<^ExwM~kMRpOVJrAs#V4@=XSS6~fnYl;ysD$l{qf
zL46-^B;t}$6|+=za@7aAj2#wscGQ+8;G`HIUa>J~yV|^qS`z-PnF>%pGs-BHx>D@}
zf7BB77Y=Z%fCdCE8Cgr{l226(R*2WS7Vav(q`8<ghuqlmNEB!70NdW(cUK?%*bBf<
zQGtg@vgoqpZt0Hjt*M)aAf6<}mMx%u4vE}ZTS5)8a$NMQ#)iXd`*-6c(mre#!NAsI
z0CuNT3i1WTQW1=(85ynSX<MW}*e?wVma~+S1`3+Th9)D1ukT;BCP5~mIkvq*DW3Dq
zqpu*Udh_*89Tzbu4Y<8#9?d<!+kz9urh^%!60wBCvjpq3{=_2V&@CIPZ0ViU2f3c<
zassCdt$3H8S>0)o<u3nFY~hPmDF94vB*%JKOx$DITZW^vFQX`YqsrTOEC!TWfz7Y7
zOFw+0fec}LrY*#tEJ3(ebx>EamI5ZmF7@29@E}OTL%b6JLUZUUrVS|3=qF6KN4;wO
zA$<sFJZ&UKS6w9%b6EpgwHkm{sj-cXA5dNB52%i%*g0$jYiVDD>EaSnTmc_!>Jfd3
zTuZis$&yKD)1J2m6ZM1L2ESy}=O+E3nsbURO<3K(rV>vG1`tOYAkT7<c<}lUJFM~z
zVYmaGS=6Q><&t_J%6<g^x0EClYshZ^a+*Nv%uP=qj17rBO51r&7R7C_jE1$BUaNrm
zQX#(!-<BxdLlA!ST3$UaNZ_jLR!Pb%kX&O-ue4ewnO@683k*Ka4l)=%ZqcbL0!s37
zy(1bIc64AT0Jw6s&Iq7xm$oru`Yl>aP3?Mq!vcwJhamcbRVkJH3(5HGk>M*b>UCLy
zF@n?T)RKiE^6Eg;6&eTKVk)NHmzQtx^`?r~+%iaajEIfX0nJhojLzO_KPE$XeA2kx
z{5}NWwu2dLTdp!0ApIpj*5pcMJ!69tH76;#md-ls3{ya7h%Kw{{6|-ux^kOBpJ^qo
zr#bE0|6>v3%q{>~lgzo>!Q3Fv*8R|0`85G^OQ<>MZy*gQr{Q9wkAhI?iyX#+!-hnW
zMR1xqt(fflFon>L+kJZD`Xa{)J}$cN7h5Ee*cocQmqh-IEhp0*HugGJX=)2W1NE<^
z1m#jQ*UM$Sr{>Jr28UYn&*6Ji?N9=NjS7`aCJqcI=xFRw=s9b62IIQ-!x3+PV;(71
zyz;U~E|$<EcNnvr$!6y6<5gddCfq+5t8=KWNcYp;+~?sxGyMI^?LNU}02gb&38W4f
zP5@(KIEr*r9C-bQrU_WX%|LxqEuth|n?hhEF4l9@^~Fv-oZ^ER;q<j<TjONIqk2K^
zh5=Xl`-0W?p2iF0M8rJMRl3qR0<<pyl@sJ_6AQNPYD*MB&9R}iFE}j)vWs%lO?y%C
zfgUgA?r3UHtm8%N{4v%b)$J<ip1nlNAnPNn<nIUbJg+c--vLpZ#`KZNZr^^vH;s2!
z-+WsAM&|ERikn83OIa<kD*gLmqkM!o9e^7z+U^k3M5{OmAXM*6lrodtUG1;kd<nAG
z;Y3a8;dQR+7rR;-ZLpJk-~*y^s$g5C_!|K;-%i+GY$C0DGL=$Chcd+^r3BQEuIrFY
z7U|wxwLLCN>}+yWJah<6(=9V#-CLm0Zy32V2C=PMCkC+O#v<PBU^CY_1{>RDa3H8H
zq$jm~$~Ms#sLt7_gicw{pg#Dw1I9a0ff!o#d4u#HIM;|6#rZ2~!kfL4JH`JuOiVFZ
zIw8l7$#)|q83;FWB61{SYu}7&Wo-kX7!e}SJn|Kgd}l7tJ~qEon{`ynV)1lYJh?B~
zo7#G3Q|BtTvLFkWbFG9I{?M2&E`MA;y0WE)p&HISGD_yCe16AjPm`gW^N>S@{P!&1
zh<Z(5Yp#FuS-!yE!t2IJqkVO-Ip5^bwiq8ZMacV`=j$EI<`0R0y-P1q5h5Vy3Nci7
z`Ke>?y%*9tAj`{)zGo<Ip(@*T*J|Dnexy1nt(28Gl2rioZ>UVF%9*m^EFa4?RjnDe
z7dR{j;&AK`wl({hh4vUiGSK2wg0dVmvj$n=M}Ps&LftywtCpjb**!7XRwmqwJTb$X
z4hc~73Rr5TmeJDxc1loODpEzuEpzq#=)gH`-S|KR9S;w$<S_o<@H3ac@Uw#GQkz#E
zp|I@qLgLl!qu3pglLk;q+W)l&wi#2iq-CWEOWk-a6m&3w4m2l3L=Y3QKJYt#*_yx&
zl6kJ*ldhYyIZ9cioj}>lH#QEftpLBbnbEs0sKKG=vJ!$8%^Jicw5M5kSSgp~DRjaT
ze~-I5J^}s1v){<hG5&iZ)2)prE)-u<+Ss>{NJw5fuU)sUNK}wIUH<K^#D{KS07Cv?
zm&c!<Qh<gp*5iSni_OpvcD&L`MzE@s76h@81Xc=(zL}cl$5>0uk#Pg1B91`8%{B1c
zJ)jdxr~09sD|I@ng(%><)eY|%%jnL!^NCz>icISY6#qJW`2nh2Zh4cW<TD)kFf~WW
zxBSLSXS5OV#RWXqq!B&qP{N;@aYhscw3^^c5D@*fvxoK|(cko)IFMGxBn3}BPucgL
zoto2CNy%KiS3q(++}&xeSTI;#b>Blda8q`BnUacm9Ve9oN}IZZ8BKvtSVdXY*$~F;
z^QV&yI^2Pt*{E}4cYl|V6C6}f{aj_gbUFzrR0&dLZO4VMYOBm9yxxIvYrfcbF8)yU
z!MU0e=)9rYoXyAFGCp|zW#`*L;*n*ifU~*`7$3L%J=9?Ss(EB#!C^TyppJm<=^8k)
zQ*YIuUB>73zGql)ft(SEgNha{uW1Tkqo^9Omcc!*&VcHF2I3(!Kyuqt`^*6PhX5B5
z-pvfRmqaFUsqxgmdKL@ufr)8=${rI-lX9uWMO>i2=Lr%CtL*bQkWMtKv@Dq}Q7LZ2
zpEqLb(;3WrY`!Nwwl%-0<xMaLstOdzxI)|Dp`{?!>4*s~!Qu2wUhqJOSvJ0;FnF&`
z_TmJ=e&=B6iW<MD<vRcPKOPzYV1CHf#9iN*Ex_%kyUufTt+V+O8++vl$mwA5(H6Le
zAI!?19L~mHm(eaIJ*!;&uy0_L39QJ_=uZ|5^iM!rd0&hBLdm@<4-ElK0-ih*Do}TW
zqYfAJlWf0_47ibuA<ds`%_4lptshh3BgO!#IB~%-*IO^^wd;X!jY2;G`NLpe*22VX
z67XNG^g8Xc6-A1y93t9X$v|8Dd&9ZUS>PUr1KF)m-g3X)h8T*ih}sujI0{Zc5S@_j
znvo})vrA;skI%nWyfJ929`9#u#gODn1oYT;?)wu8RBjxVdjp9>LAZ>9jcnk>b|WPv
z&&my;7xAcA{_Vb((FdrKR%JV5kS2FKe&QL&aQvK5GV+XovYV8@xV67#XyURjWEDiv
z1=dQ7T`pA{_|_+`SI=f8;JfrDMHB4C-EOrhma9cxXuG)RrBCF5V1lFJ?JHYO-V&>@
z^&af5t=?A|Bc94Bdko4>Qmmu^(hrg8QBHl1{!fDpaWIxm-sdXbmj;pq4c(C4Roo@z
zOQofRooit!vll?<v39vT4R614gzc>Gh%q=0;)8nt&kh1Ov($HwB>En6_w;=^>y(k7
z(`&`0IVj$5jrNxLv0gv(U?<j{FB6@L*S?7*Tm!T2hD!uUlO}FEmi!2P+U^=P!N|BL
zJBmUpl~dPY!i6S{hHSw~!PrW{I{Hm!Y5><z*q{RRj(=VQ2fmED2$2OER#jOV(}$NX
z6j8hjQ7t}C!sOkW$QGBTR25i=?8Dlw@@y9x7(B{#<;{<F#*%ozR*vZLBY3JL1?<Gl
zni)-0|LhOm1P$`+4~l@9KOKa<2)XWfSgh0Q@{Rr3j(fQfSxGpMg72Vv4c%2?5$g6G
zey6;u>ItDy&jY&2iB>UV7UnG260BU7<gFTS0nIQ${S~GxtqbMcz3{}4R-jY4$#3_K
z$CAa<2v`4gs*+<Yt_EC;WqEcMW2uVD5iSfGChcSjI5yBk36CHVrFX8y1!kp&>Zq~c
zIeiIm+Py7$WOg3|{V#K8)*<|0Jhv|7P8cIT*ne244$>b`T;#(4ODN7;F)!aS0z0Im
zAUf$lPvJ`)p(P0mP=WEkKyi|UR^t?67DqJI<sl@FneKi&FM(PdxGro>N;_9P>0qPb
zOG91E<q*~N*Ada-b`97608_XHU2vlI*FK9yfBx1XiP}hwCHkXP=q4*AxxUCKO}s#=
zxWmhsf_L%-e6N7&Y)>He&a_AgROQr4u*z+_XIrrkrg{Q$n^01d@x%on4gG7TB8V1v
zhB{89eW1MN0UYXZSudgO)|Psq?Gi%*j*!=g)2CEuF1XjD)Yvmg0q6CosXLJ4OU03U
zaUs>CrRrhp@}q^%*5-u8Esd%>=8CSePhWSq@#5rZA$tJRB#M<3JgYXUWnHFN5{x-r
z-n3R-il<eAK1D1;9S$8c###+CN+f^{CgR%+A1<?IeP!3lWeb>)d&kEA2LUibV$=23
zXO(vSlz9?HWaDv2?BUxRKwEW|(E6J?AOW|&cB=(FiOrRM6N?NSl-BBLj$q*2gXnCG
ze8Kf^D}_>X$lH%aJ}))SldDTwSV0&(Y%Avu=Qi9JuqYjS#QGOL*OnPy6&4vZvyKSu
zS-Xh|bW;uuGB_p>EGciAQfpLZeW;~ybf}Ci+lCe13cV2mR1SDn9<b1I&K+|Z_Pz+E
zi=2Imz5k__z<H7mXd*-iLazNGOytW$<#bWa=*QmpgC~xHHYigwT=Z~mDVm~Dzy02P
z7EqlnyZyBjD6O}z-*1Sy^Fb`!ZVX88Q8OJINOAo?K<79dxsJBTsJ9S^Os7!2`-%&G
zp=rUN7sx~pT6ys}-Ch15z|!tONr(`6=UInv{BZL)6l=W90{0pu!v^=U!u7t;heiY#
zS@OfG2OU$>ngB0Dp}b7{`0MTJjoNHIK?;_|B>ZNp7TkBT8OuT!aH(I&<*r({V=5cS
zCgkS^e9jLC@L$I508>O3g1pndIw(p_L>%a#>B~zYGMnt}uy5Vl#3V7fXRef(?mF>6
z4Cn-=XIb1R43g=)QMoOu#Cd<4P<Frl8IG2nluYcUeDmz!>+(QZw)|QrLc{8hX^k+9
z9aWwSEI;}3EV?XG2553iQNL}$T@ClP*ol`^zyw-Bjn2T?2o?SXXG0Iw`$lY&bmROT
z4?vB1Kx=^JtZ?g1W-%ZIGMIO<fzT?haAd(MikPse7hziWwknNhws2W=<nT>~<A4&7
z>ruj2xDV7oDYry@X%i2!f!hNmMq={RmxUC9`~k%L#`$eL=Nh|C)Jl0Rar)3<UEyAD
zq(<ngny=*w{-M5qbs7FzOE*OzbDXSjBl3)H%%54HK?QOZNP5Qf&}N941M@$CT3^qF
z;e`yycR;O`ehW1;7U(r!r-NNS{KF^XY%Xl{#`q2V;gKI~_OFjPL;*a?z36B4r{g0}
zI1fKH^NHX1z({PbDNf>yoksr14<~qJ;~tP*a2O<+cKB1a`7FBs^P8I@@*wq4{>W?p
z{PusnnlVNm5mvz2w)yMkZ6}8M-&4w_2SO6?OJ<Tkgi`<UmX2;TC+@`;rDq%r_0O8d
zRenc0D(w^J3jo%E6&jOwBwx5!=#VL{h2nh{qWGN=jpBhdXF$_?@bAMTMIr6l3fS0q
zj-QF_Fxaxbm{ZM(v4W^va<VN&!U_Kj;-9NH8kEX3P3zxtV*fnwKU~G30DM67{PtNT
zXeU_iEH?c4oAg^?es%1f82{JBKQXKSX&lznZhbA?6G?j6>w5xoRXGLLy@PUS5|Kl4
zjG^;qbj^Jyn?nLp;#PC4TK|=&S_zN^yvAGF7yp9^J02kuA}MTP&HqYtBLNig9Mew&
zk;qES85h*3mRZL_iDehUdi;M)<Le>MK@?p%qmbrruH_3vZ4>CFO2<nVVguf~ib}^W
zbTgp8Z=Cn?xw9FY&z5k-W*cK-?`+mVa)uFx8w89Zfw1JpM;ZJW&Mr>)Jx3%QW9+XE
zDW@k8Cj~nb2$Oesp_T<lP&nrf9*ugju>5*j-3b=q1K|PM0CyBG?8l~Ur|8{bgH|OW
zKg)q(kUXNOJ!hjd&`2x!qRG;9ncC@o`C|6eM_{yW1Q@rtDuJCQv*f&EKNEoxoCih*
z_XB2GUFl$1)V+qsF{Aqybz7-tY?lGlOW^}!m>=a-{C8UZWZUO_ji|GN6S`V=56nU>
zWfj-7JL^rN4$|D8A?V^E8fWRnvUkr%U-h+hoptXNq2mPIie1?3D)m*0Lmp(^9LdJd
zRa;*+tLJ-t){<HCBTJ^fKGJPGMYa<}{xSFhLGl=nQ9VyVHD(bx-7g1y%^Sw)DSOXu
zogsp8j>sH2_Y$SjpF;H?U*+pWHPaG$HAgFixXN*cIE(;ip@*7VQkGu*7p?S*(t%<d
zHPg|_OVLhBcKp`dh<t8_6wv<G{`;R5&LYY0zexx24JpXPJ3z(QUbOuf?+=OqI5Zp5
zA&4Fp1x^wE@3FbbUKmX9y7EO1T8Xi7hUzYR1E`IWaUM(j(_i}Mn?J5X=(Uw(I_c+o
zgD%~F@EqvH=z97K9sXhtppyZZ?mBVH<iDzd6Lao};A6(Flu_XJyx*JJuL}@S(d!KN
zmv0*h_w0|z<y`(>rk!v>H5WN;FX2?m)ApzX6RF@{vxSQPtAabJwDH_}(-IzThRD{=
zo>4Pn&5LAd1ox!ZDcLxKI5c8$lM3=_a9NxOF{kCr%^aC_WUEO3t!DiU*+7yo`AR|3
z_r?8^!h6On50Pz0F54mC|7n(bZc|D~wrlLadSdcFrcj2o?jtKHmSX=4qudjTV@J-i
z>y~l-iM9SQjS(q8x|VFEN-6%EgHh0;sHK6!>oZdCVMOtt{x}~SQXvzM$}|3{0O-fk
zxJ_LNkwwaVtAfF;|KB7VK&|^uvi!iCXLaxjAQ|J^qsgEjU;h*58hP?DeJ2^t<NC7{
zl2lFKA*1K1r39eCTlbusX@DL$uSy*`y@ykMboAmEB}-F)&5jyObcB!gqeqwV&N{@p
z!{!2T5VBlq-!NU&*7!#y{>_k&jDlw{B^(SeWqake`fnf}AKUc~L9!%M=3@5n1Y=A0
zZ^7(B{)Gj^3R*bwUKbNj37&>pO-{lX#_Q@t&yYzi^e=L2LPjTI_m$%TPAoeyFH7h4
zxB23l2@?gi;L8+rbV0E2IKrN0eE!;VFA@@z4!v(GUK6`&hv_k^iH(nqP1&SyJ&YOy
z6)KHQWk|L7zIKG=c$=HEvZuGVWuql>iN&gI{cvj}XOGVMOZOLUZo`bS+viX&^PNMz
z{+w^CLtgaz)j2fsA3p+@9dFPW$y8^XRI_1{tB-P9qqikCYsa=fk)C|$@8@UmQC+;D
zg^K%AL5trOX~mg8od5gnzrM2u6~F%1Uyv_|Ye&DTWFkU1`bu?j!c;0^P5RsZ<4f0{
zpL!Cgx2+ZuvJkhYdxB>ng>HQP^{GdY6Zw4k9UnF22LfPnzuxaiMe}m?zG5LY+a1rJ
z|C0hgA4<IQwKz@Qq|ky}3)@1<9)98WUnKhZz+W$P3pj@%|B%T2^b4G!aUI2JD_bEv
zB>4A;Pk)~Tbv@R3Zhb&a-p&zf8zGDF3&sB?<_&)|<v3<ie>&$Ezke8AB(CK9`_b7V
zX6~N)XL5>_gYU0)e@%1h=J&Pcdwziq<?4&y33JZP<&N~{@Bhk+g+6%g)Z)z$0hCXc
z%_LE|8O2j=8Y+u@=KE_(C~8ncpV>T^z54ro2|d)2@3fEnJnmT~fE>*)vwH||P!qKP
zIEHlZ^69(Zd5)Y4czM9Z$EOyBFBW%ZN)1@JCbk^z$4vsxe(pd8+V5XDgKz&HLI{#9
z)O-enzm19foZI`$*2@fZh4xZBa)&x9iw3->9yvv;f97@g2zc+I^MMif@6_PaMn@g7
zL$aa@D)`!|3H?HuU&ifr2Vjmav}<Cg#^L5db$9PObGUFV72N#aXYWIeLXhS|?YxC~
zYAQ4yHAbTeABW_waL~wOoVxwbOn)0qJrnYk<1fYgei=OQ0_`!JnF9v83U2)|KO%?I
zjP|du<N)1GUrz{eN0P>k6m&Oji-fdr^<LewuamwITc6*SA8bpvZrRI8a6k53L)o$a
zZEl)3H5Uh4k2-T);&)mD{gEQ!=7+w+K=(3q4!$T~GbA?{FQoZ@A-)eF_{=y@Zr|lW
zf0AX6DYhIuCDva{@zwNTI*~CqX$LznH*2PMP-Ykop44@Ns))o@96rrz)@_CFMc3Rh
zadFbv7{en-+t_}Vym@_w7RmD01gzT971tSM0lmp>bI$mb;GChFe`sAw)bq|gicwck
zE>D2o`Mf8k9-*n!&|!)q*QP|vczS}A5z1%>1fdn&Q;W3rVu4uyJ8$rj-vMMU;_|c0
z8@z5x=@@LH0zrfs+P^G!4H{By_Lvk}sow4w3jxJCrR>!y?ux$ocP>87Nqk-aX=UYC
z_~WY47viTZx)2(1o4<U9j}3jo(Hpw>mxW;47T5UY_J4c~_YLs=J8gL>sVDxcQt0b5
z*6A)$wrxdRV@x#iyPB5^9FRhJ6Cx7j^6!K{LD}z@Zh4e<jTd08@RJtdZalhr;^^{&
zc=Qf?z{bfV7h5~I%+Mj##j(k&f}0xi66BOx_;;nncMY`yD4(Tze|lJZF?@-C2|n-p
z0<u3X?p<?q>$P)Bl1)Bp&#kZid@|(G^XR<yp833|WTEB;wrI7`tKw9j&TrbqCrEe3
zCB3A!xDZxwiuk-JSbROnF{wN`RvaWAKS^r^zp_9=AiLAhr4i_Fl*~+X`Hcv<q};*i
zXA$d#%Nt!xgX%XP6xW;6zcWORfLjbuo8Dvu)iMUI#sg(TY{iD6QyY@Ae8#x5=(84G
z%gJ&{sY$|HQ0fXvhr)ue4b)kcS^inYS=+9G&64{hK*!@;b`b9ZTZog@QJS^5MxTje
z*~DRyDT{o~=Dx5CM&qHYu;so*a9gMg<p^XlFN1jzlr7!<1ZE!H`nI*Hp=Mu2%0aSB
zyiA2dp-dV&AZbZTRT@JqijfqZR4tyLoS$SsP;9CkS*-;5s%uc?2sQ5}>iX2hOI<vF
zsf$$>Y;`<J!GN#FCGu_mg<WD?LrR0TQ6Jb+f8;TIYR;viU<rfGLW$~DDzD7&cu8a^
z6D)d>SHjz0D%U6ao$uq4q3K7;#h)_8mcyK~oO7}ut$|Bni~&`Uk=v7n6b%NW_a_>a
zCL->MYq{RfHq9tvZJgc{=u+q+OBx8u6|dAegqcrF7Ns!naYfy!lvY((E7?}wG|cB6
zcN(dD$f311-Kng0b6Uo!!h0u+&Rh4U^QvL$GHsc?R^Q@$IeUq3vsmP=s+9=AW2C~5
z8$xOhlGjsj<+2qI4^zg9Q)dvKBcoWG{Qu6Y-*xnT^a_#P${oGCbj1~8(S0^HAz{TW
zy>#2VX<5@6_L@XR4^Y}&Jyf!(5ZK;^l=okwIn3U8X72lqPrL6rO<+y^U|+xnPviRb
zU0@KHsO7Y|AS%N$wa2azu{U?#7d)BwY>tNJEs{6r%ZB9+Y>7?oXWbJrY0%7Vsgf~Q
zv<-truaVHyv8`BAo2l7Dg5IyS7D<5-N>q>uBmE(;fO6l<O;os^t9S^(RP#skU=W?*
zR2@w?RK+1No3LvKb>Q<DOdq$S@||u@Pugh9vE$_GL%2avF;4!$08AA)LwK;u^Zhrf
z_x@B}`OZm2axcrqwQuUOjT&a!IXOA#57#a-&Q&yS<tLzm#Ezazk@U4p*BLE`$d1?d
zMfH2#ivCu$B|~MZTNFcJV04|)QlbjF-{MGO6KxTEEhuUrI%KCCbwZa&Ef-E4Lu%@J
zZO{K!yXe3T<HDHSic~Sp9_BE`dV4&wWeT3mCLLvAEts6)$5pH1vEU>YO4o?gPG=37
zzHJR0vt6lOfPUvtU*Yt-%&aOc*li3)CD+ku(OVnJ|7Co2h57n+#$V1KrZ!~VUbumJ
zohI~o+rD$Ym6n?8rTX8^_c_!<K5Z;k=7Yr|nH>^-!$I2aS+3XGwzzj9Y^yrmQMz7|
zEGFyLmVSA8-u(`LoHtsSjE0DqOIihQbfq@-%X4UhR4YW2SQh5oft|Se0$R7=nH!VI
z9Xu%>u_mO`X86aMO;2tp_K<UtAFs-S_IDzic8y1TTby@`3V_bQ(FY^oCN+kxex{5*
z3`|X`5#rtPvQIm{P33PVifOrqh1r)gJL{Ot!$F5_PZG_G+jSo+r3d;>8PMCv-DH$r
zjlj5Bcty&r<8wlT`OA?a>xPC@rTRy`D+%qy%MHQHhxHzc4fiFEbtqg-+@OKFa*vet
zVir<AGnM+P-WRzNIf=Q?F)*XL7kzF1N#SyC#%RC8=J3pfa<xp1>2yB%ludL~F7bIf
zrctrj{HemSdc)nZagAC-g`(VqV*QO|;%yrtEkr7YHCLEQob1L-q6^>-xj(<da147B
zU%IZzZGB8c?)F`FR*@QTe7&R#e7OMWQ7gKivZijN)GMGu@X_Kud{^U+BUqEq6x*S8
zI4(s*)Te+bi+i`VH;h1zNRir{A&*zNK7)$Fd$3PHd%$IW?@~ENd7{^<U=0SzyLvRP
z&drtjMoL=4*<*Wtcp~1>iyosf%K#hCuLNuAjM+tkO=Es!baSbsu=iL}=Z=QQzAzAQ
zs^keIOV-yr(AjzxB!~1VD%B5k4ccSS#LgdE%qn$d$ZD=vRm@QN@1oB)P%*cKF9*6K
za`2N7&~nbMH;VQBiv(es#B88UT_HGI)t#M3{6j_PD~V=P%Y_R<eqz<mv$q;87G`Rl
zDtJ4@KD=n&7%UAeF4xR@n$pu|$gjRF&(1^2*<kO)<*XRm87{dr?^RXhzkVCK4Y+uk
zckZgElVn#@#yAahsA6qdavBr=$<WrIrI2w>W0v3Iw&l22Pwf`^CG+VwvED|ND-{|J
z*&772FN4U=nawA)d<;6k`rC8iBSm$j>Aqvn9CQhl$xV{rcCC%i_KtSXhX%Y@!ksEI
zcQ(0x3=y66OkW<#Me=MKGnz$k3}f9?V`)Qu8~IYI3EB19{IBX4Dwd6dl+t!-OWLA=
z(<w_nbGdC}dh4`b8cq4?)-FQ$id5mVO?v>BjEm{4_ACYMeO}`O;=FtzF7*Jbmupp0
zgPLpM(Be&W+M?~x-`JpfFmF`2Pj%`9gi0%rG(cya5TC`oFGtC(PxQ!ge9N5PS$6G3
zXh#Ld?od#%eiXtdUr8e$mva;%YFicY2vA?0Su&-7@(Ou2x)T+!7q|W5?lQu-DrW(5
zC`^Q<xDm0jsS|U#=aHA=H&2zlTX6ba^<xjbSQbkKzmS6~<(m1L>}+e6H|6Vd#YbP?
z7s1wQE)MPusCfR^+1PMaP1fl!G}F?f!0SA2kHcYpM$9K!d9AT$y$e-2KJ$*85}&`@
zo6Ntm4vutK`;K%MPdkeL_<1wVsOP_!T?8pWB3y=t2k7Nz4?le|z(}QYWu+T0PHYK~
z40eIU+>W8*W*A6;E`~j0|HtD2RAMsD3aY#6d3%%^$u2&P<M?bs3U{_L_zlwVYKf{x
z$-+~O1y#?kHQRSBdgETGD4B%Q<Th)7?U`+xxZ<~;&}JdN+2NEA%{10Ls{&69Nkbz+
z_M{+8@n`J0no<LPmi6}eJJTTqYb{L{@y`h_=<kj#HbT`Hn{PcSmf1n4ZL4++0AbuW
z?Xw6??^GjzDGYv+t-ylJc$$&1k2!UZNnnn_>B+*m^Y}HTn7!9?ddMxN7#$W2_Ghh!
z7Iu6lOK8U{RtszT+;<`zZbuMG*G^e|)+M*mEc0uxzkmlZeNvPko6s;}GiUpG!=cXE
z*G$i4{o&kRxmrTax3$A}=j>55pHw=S7g%K-+7pptyg9Tluo)hpIkNELrKr<K5Kn@^
zgOai1_a`gs**Hf>_OiT&6y+;?s(hx9t5rTA89v6eaH0OjNx>3+r2QtIy<z#gk$F02
zp^!Lw2_K+zr3l~jLDfKM?qXzR-$PZjUl%ow1-08hmeZ<!RjP+MPdb^!k6X3LYHbYh
zh<7+4dok>0tHmBe6!TJ$oYf84b>Di8hW#{wN^747#d}6yJLE4Hh{Xk$el}Zrv2Z=&
z1{%>Ly$a0{t&9sVT$LQ05}jh5GK*-Y^`tobnl%GxY@bpfD%X+;4BhK0AABoHTd?w~
zCjUIq*z=6I0*9Lwg@@)o&Vh777UKTbV&B?cqla@?V?Htm(OwOURtIA=TDP2S6Vy%C
zEE8cG>1!{aCCeOOdOF*6Le<UhvFWBqH<>d?)9Nk=e;)L^&%vY1ZQk_2LJ~jQZsC~b
z62BCeXEU$~$VcVg{+4oJZIW`ES5e`|;V);a&{H&%*ur`<r-es#&0vV&u{;ND$+S1r
zO%9!d%F3M1ena8gM2HzX+x2`pb2XV&<~`=9{W9j9WrJqa&;;Qdn-Bl-#pIRaK9WDL
zsYT{WJne^PSx>Ie9aJ|vw<{P5^<QMRKk<*-pNA)QwhS_`L?Je!3w60tlBez_O%eMx
zRBemLes{+<o73WBt{cSy!PB6juqOvfH71&=WKD1c;x_jXmzq9Jpi7#QOPM2>Q{-lw
zFSGr}v1ZCx!iTj}o+VQCZ8S$EV=hDDM!|Tia`P-h>=<ZjQct%ZhAB?5u@B?us|u{1
zr_f5AaiVt0kkJ`A_7*Iwjwn!NI4&0~^~J3c&Pfb;7l*uIYIb~UausbyNu)~C_Zl@(
z?_yqbe_fhJ<y~_x9*cwfA$`J0Tuc7`PU$m!6D?MUVxc6a2<Zfrkz2v087}#lX3=-M
z$QRh;4}{h-bt{dQ5@qKGskS3jA#aHn&qWaEyU4GyAP15q`0)xl!p_z=wLK$YKWMuu
zXJ&rijx2cNe~6fqucs+ya$-qlXx0!bKtE_qNy2Be4P!!Pwb}5#8}+}q-blE{8}Ho|
z+LH8IVQapRg!L##)4Eh$yPs2Q0(_xDB;+wh-5~+)(EF|>%J}Kya*+>q-x}A`z28X6
z^=u;OMyL)o_7e4m;&K8-534Z>eF<CLXCL=_EBl3vq1QpKvaq5%K6|s@)v_c>RR&Q4
z9mF?Ja>et$^*MTRS0MP^?txbD#@*Q)LN25ncrG#4&nD&z(@OEUVcv7=%Kp*JZPDTA
za6&ovEpeeEA`T+=!{q(Uu1?G@$1fW75fv<*yXPkzRXNn7!bOXly-~Ef$g{(Tb)C%%
znsu!3`=tkyaxN7*3peQ&Q!_J!uJ{`E+3jV~B|>bC?iQ5gg2(rG;0;vHfD?GytNbOL
z<eNn!V3bd{i*FM8gh3~e?;pjSxgjj^mZVKuiQ832+!&*=dVAm2<#==%vC2KYz}9sr
zOS!H_sF(vPgZFc_o5j@3?!^WTGK{lZjl478C{^L);{CiIamT(bhRm-b>Ymsd7Xk0W
zI??QvStnXnt%pk~Q+uNfmGWy$YFbg0_AVnAy0)p0Q#8Zu{W@G^x5#D55Z&{fw$<F*
z=B|xq8d7Vw>dKR%ljV{zl0z1$rhx-GLtT%x$o1E$4Z%)fJ)Zd77l%{K$tbOyyz$zS
z*+o|OlN9gAU0wEJ*2vun4YVleRLVdlGv&SN#L#HP85`=vU8=AbV-4fZd89U#w^YXV
zJ*=C3JbQbVZWb%3K?r`S6s$(uYcc&gKIEAT9=jrx{oSD0&Az=X$kAJu%!1N__=$FT
z*RWyP@M)>tsQGn<0(`vH!h!LQY3U~x;h*~ETU-*JPiN>{SN3_%`yAjuC+4E}HldFP
zXu2n5Dik)PJg-Jy;6d0i64sJ%X|1)|l9sDQ9^4LKMugN&VMuYp!kG6394w_jo3-hN
zPk!Fuqwu%jQ;oEs;YxSfRlJEi%PX-%RO&>p<0!N+^TF<>ZgHBq|0dE%*(;#J5o3XU
zl5+Kl>jq1KPWnD#uFtl^w{|Yhaj%h(?|S=gI?j@&Bc5cn`@-yZ^C`<$Th(A*%zL*a
zoO3Xzvi06h5KF}>=x<QBJ8RtR+kVD;Fmv(0Sm+#@gZDWMg)7{oLm4n$de+V84BLge
zbCB?@P_EQPwXSDf)k*ye7Wa!6KFPQ>;8j<@WQKFH5X~FJ$}O%OyrHDtwoRA6xT_eR
z0yS^G-Gw@Pb@nMOYpUc@-4mfYzwm2eW4mv19C3{a8y8oGBfFECTh<eX{aY%uYxZG|
zT!sxp{k`GiU%K47>bgd|D1%zZmm7k*b*)NAlCVUl#g;?YTDawlu$M-NwsRo*LL`ZO
z#)iBeum>8e`xIJqbM|q^#MDk=tAIHf;J0y>8RQuhA<&RQX<u{*o@iXr3X}|(f@Zhq
z&YO0@Fqw1YwjD=1K}kc@?RlIECVJ)#(Ztk#AD1r(AiSHln@hhL5IXyA@Oa$<=Tu(>
zkE_ORj&ELR0YXtk-vOY421%(g#g4Kz480en8>by8IccE}w4M>92}*k~sy`kQzjaMU
zx7Q?}A0h98(DO-olCLp3OrHlYQlV%cnppR3HKNb5Z50%<rFM!bfl1(%aep(NTC1Ct
zG!7>3uD<Zi;Dcinb#M0+Xl*!bC0V__X`m4`v|`gS){4p%TXi6?EZI*btQS1=)JNZJ
zZmXYRZur*xGr4;EU?Krq!%K7xz4oTEtRkbDjk5I-)>={_vRY}*EF~5igYxD9E=qk*
z%;fqgN*$Z(_l9Tg_!KHcHn>kl>$>LH&F@g5n=Yr*R351))K8nkzuQY6jZ+z})7mJG
z4;Anw>D1fH_bg^Ly7=0n!WFBq#P&tu^!tYCu~|Mm)iQiljP~Y&ocGgOEJC@AhpH{u
zqCsP&BF7+m9D8td*40!|wW6l}Wb19pBErUJ@Q^o+l1;XG6Jq^p;_#X3jGiW#(rYkP
zJGXs>*c5a9b6c$mCMc76*JM`^PYSoQW)o*>E0+IpNI~(&au<m#<&ZG?G!%Y5tS^Ob
zd^>588lt)8RNj9rNH@2C;MGp0Cx>i9c6b{mTWQvM;^({|J}r+-AJa<WrS@rE>R7OS
zdv<!0W5&kCM(=lATXN%4=2uY{v1bY!W0sZH*N3mZs^pTxhDhIhH1MwKfs^=xXf3zO
zef{#Jw`My|xFG{XD&+kR{Gx16Dg~*FK~CA~#k8O9@ZW)StY*xE&TG;2F9<_IdDpo^
zE?gPBUmhs7*wsL(9@n*`lPMn~wBK~2Tqq{`FJDNwoUhJ1pEN{v6OX%yBfHv%X}0K4
zd5v|C+wWdpc8Gzq{2F+vEvyQnLC(qyd1`N!SSz!0C^YDL*+?I0W=yp5tr6i7Bb-zS
ztSrXIt>`|0p=RC-n*JU^tS3V0{#!4Qci<zJ_ey=RLbDr5?*u{%5~z9mkTq?5mi#SS
zzgT32O~bC&j8%z03I1wDL#G59JY$V4yy7~IerBV6I$qnf_@uWpoRWoLik%{#x|lQ9
zEZs+wYw{I_JwdA8rh^tsvVk*$!_^XoLzvI-ROg^fhIZN~XpwYjGc`oi;ekxU#Wi~u
z1x|0dk0#d?CN9n<bqt*Urpd|XljMV3JlWpI3`iNBd~@YifY9Ku19t0ll2n0;-@<hx
z(YQW>w0loxZrm9T(?u+f)=s8bQZ@IGC-Um)l!IW_H(bMY0||fZ1wbd06I=&*=9rPu
zml=5+nZ|3~ib-kaZpyx1MiQf3j;l}R0ekN?HflQx6Rs2Y3}XVkfX{z<0fbavN1|6)
ztwxe-S)=u5@(P$6CLYg~SIavoHm%pyP8SeLDyNDv9{b=8xmhmYbCzHC&&~7AxHWH;
zC{wPwTN%;HR;oGdF$ua02NF~>;(tlsuK!*3lNs{AVT`}C>3C8#QdFAIxsBV(b;a*0
zxN8!)dKb`v95r7$nsQnf(^7GI(lmeV)Eg%knYCz|BZ_QFGY|>YBK@nP3JOF($^}WO
z*PD7)LrYz6L>{&u+lIVwb>LC8m;9!d1Zzf9n7BspWjYJx<rOE5wfhC^UUoc9{#3^<
zOK)I~OC!QTAd+fvuW3h8;7t?b*h6`iPjEQ#s|EMMCOnVUlNYJCcWt_qhfMg-bEP%}
ze?6vWptN^#7FOlI!r<~CyTNmR`hpBtEH`A7qHaBvhXxUzw4c=PMS6bVXm9m~zS*)~
zdE^ym4w;Vidy)Zd;W^=4BJ=oNuFzF?$5`B{DSNd%XZdSd-Ve*gu*s$o;@je%f~N1-
z%XEX1?9^}3B92A%89lm}R8@1|5}O;{oEGWSO{QL9)Vm>1qOU$b($iC_Q$3BnH(VxB
zrl_jAHdO2vUQq)n@CRF_Z?MePJG^i%@A))fz<XgzAKQ%V@<+7W?>zXNKE0=LtD!($
zCwxV8DZ@8dIF$K%h@|t^U4Pw?cTag$oSSJ^BnTQyHJAdISKT2n1`iT$9I0o<y#=9(
zwIIA)p(@G5qjz*4kpf;YWaGD4BIfIF9?O<))v+(?oc^SCK&!*Mz}&x;xgn|g>CT;%
zdyQ;uRrYI`%5(iql@TOa{k=iQ+Lqfow7O(B0mi{a6-n!OW`?)%kOA@FSeQd$&X%~<
zKE$x%wUgID)|KsUu@axEiPBv0!o;DzZOI+!i$C!TAjbUFfy74Jl1bJI<1(I%7Fvk&
zpK})?BcVs)WPEXQfb<)C52rL8`U-mTHf<Zs`jNMK(*nkXoP@z1$Kk=HT3mGm<DvBX
z5ykLN?NBlp`MXTKqiS;a1&y{|$;emKYlm8TvH1jcw?E%kNvhhU8Y4-br>ve%QYIAF
z3S<oDvgc)i9IfW9yzuGJ%=#Q3pGdlOIQk~(DNkpNS1fI7$m^t)5?YQsTlR~aa<UJ$
zmmbz}OVwh89}v4VacT12;l4=)k6%d-0%=Ao83g0^L(3pmJ;nMyD9afLe;IcMr5*%l
zn3tu4M72N}^~%1}N(q;E86Ss=s%le<?vU<LS&Ixp(tE7xjy+=pq3f%ud5@v7XRgV3
zj3z1q&tkn@10F^ve@=`~6n_kPYur%3HvE*cnS<Ah6ufwM`JTniK8NDg^`dIH(M0Uz
zkW?8vbU%5lD(vW?C9%MIsXijxX1cj>B$>4B{xn^Hp{89}>>?WVUE^CDoE4Eq#hi(>
z#7kfvx0o%`+2Y43*FM6zVg`8g`QB+jF8klP;ZkC_DWMQIg4KhW|8X-LB(~oyQayOl
zq{KpYEo%ME6n=3w{WALuRv<-|Pu`GNdAW@_?EP|uJ`hIB>VBRc;I=1DcZ$&j)tfx=
ztBY`wc<U5wkbQfRl;!}HVq2#uMkiIA6Ea2J#pq_6QS%J*3ODH3H>g_LVZYCzU+baS
z(aTTi*EZ5ZpOJ>HswfbQYR-xS{^a|J_hkR>75H9$lHwkm)OWAxuG_)82);|vImloe
zC^3<yrtGwK43SM0WY1A_0yS1or?#xu`Gt@MRm4#g;Nr8Zt#SEk7BcTu*e0wWQ)^W`
zpl*W&-!gEt47tnd#kq$~>!O@8U2<!x*8R%_r*IU6BvusCJt-s+`{doH>c->qvz4A3
z;uDUpA1(T{l8!;n4S_ZHWp*mamjIxlW`A~CcVes9^f(9AQRo#HTU+qA=vJn~oUY|y
z7qI?R-r{_-PfPq_MwcT0@CUgh_CScwXyWHL)DU@evnUGJ%OqD^X}$b~th3rNw@U%T
zGXof&Jis|zuBs68ntrhNc7WgH9#&u9Ua^>KSPC?9clkUd;3$;6{~;hj_8>`^s%!b=
zmb_b|86J_MrmJYbupI0$X3X)izo1O)ipmg9qglDRScozN=?&IOxn4)yc?Iq_m&A*r
zLlmOBvh=-{8T;h5e`7N<JNGprAb(XH91ALX6Q-(uJ%zJEe!=LvR;in7!4h-*k}^Cx
zak4wh>E`q!ldHR2<yk79Ufk0fACLAYFExIeRc~-p_q|F3nf#dloMjcEcy8!cs3wqV
z<t?iaTx(S|Af)M7@Rke99Wg5hPYq?e(yw6|rKUrk9$D`2&rS!$dUbq0h$M)fU%&^C
zbJ30GXVAkdyTXCwiawM6AF4fM(O_Zg>W$UUB&~^+=u^npw%?hUc_Ol-Nyi8I%to^)
z6%TZ}fr_cwhIaN64)qaosZn|<v$*%#z1VVotz)f4-TwAt&rJ%8?DM(O3mTToW_L2H
z*Vm?HjpHr$@~jYPA|Lm+tBV#QOQl?AyC7Mk3v8Kzdnsi5JMcm`x5dE>^+-Z4ncez2
zki@FsQp)}D{!*6LmxS-UVrwM?UF(B$PbN?*m4mk$R&O@6+HXEBEOT72z$YUm<vC1<
z1Sh*_sbtMaHt=qBeQ5eRPiQseaTStwbdb=V)nU>F(@%{rRl6ohx~st^lHG2Y&Txar
zaLZ7!Y|dVaqhP#4msl!_YiG;aMAyQMfAPgYtjg+GJv%|l!rhGyj$jaEeMxb5Y-8|X
zPCwsY^;<<hSwk_IgVUE}5Y~YbFo@hQCVeeDmUa1Rla2!&mCcVLxh8e9ftcZZVUrf*
zfn}4F?lI14d^}1_b$9rR?S~H!LsPCFvfms^oiws<Re)QFk_FrkWnLf7I8T+jnGxDK
zZ7ef=czyO~4ku75wfVDpc!!4E#7;*i`2p+h|3}$Zhc&&wVIL6#v1kMdLAqNSr8i*G
zJ)|2{IwvY1Eo=j6lx}IMfgm-&$>^N4<Y07-cb<4p{LXo=>-|r;c8&4<JfA1-`+n}-
zO48X8qN7|XnuO+^Ey3)T^6@Pa$nW@|2e^&_OvlX*39UmU?ZvaeX!7po^<m`oH{k-P
zyTu+b6vkrz>gtyL%jM^1g`1|jrXIheNudJZ)&)QRI>0%MALifhKRYD|75*Ccp7_))
z7Ou1x`YBJj#{0x1Q~x}S_qL0C{ucD14At&koI8+%I3mOSm<jE3k?G-8Po<wd-60F6
zFec9>|D+Fj#!5;&>H6yF%y$*!VTos^=v}G?^JkFm*Im3`uPCWZc~=FR=!azSPY!OZ
zrREGVT?5~5abLnsm>@ryz>Y#VXWR!gCmgm&;Sz|?^=#bOd78<}itL9nr<EhjK-J&x
z6T`zE<AUg&rC23XnGswHN5HM6?kS*kRkTp@J!6%S{@fn)aI(k*IRDe$B}c=a(GD&h
z2Ry6yCxyk9cBM8=5|`2;{SS8$c71P8hoTH>Em;%;`Lt}fzK9ttYuCvaUkia&p?}M+
zX=>g74dQ3rCUPz|pw8!2k}fmSEB1Htt!c1L&F&;+MZ7E4DD_vf%{pE+^UFjnsP}cr
zF}ae{6*TE@99+CTS>-jd?d~@N@}MtR&(7St<&eP@-!RoZII_&HL+zt~WuLA<@0RJ*
zxK!<vl@yNUlLw+TCN-7CtLL7B8WCSYkPABt2%h-rcfdVt)^RvBA^d-I)fdC~_D+yi
zgfP9;q%m*rAY>o=X4PkUv<@ldr?aRmg;4w%q$YYc`<sfo^d(voN+2Y?Qwmpnf$|f^
zR2q&1Y7b31Bd2MtLuaxv8vXdVXS%5}M#iPt$2{<J?GAg1QnQF&SrOZ#ke-d5ay^yK
zbCBuqd1Ziwgwgm%sb+n|Ku*<TTN3{a>%@MMUUC7w?^C>JtF;P9j;9K&Jl#(~d!Q}b
zc5mE6Uk3BJCEoNF0Hx-)UaORs%Yx~pJg_<Y7F^hEp{V+5ivkDHmuh(EKj4OHsn74w
z+SJOya9d>8%Nu(q6{{aR7~2RKQJT<lI$z)25sG}f7@1PPd^L0_l$lT7;w2GA&o_>F
z^E<Upt-|70&i;Xmx%s*6xqdoGnTT_?!q<#DF?;ovcWu*{Y@^|oH(dut{SVrA;(X$y
z7Mu%cG8QTZVLi+6^7j(zkR&{(<!xHA3qd|@4GQ1E!?USX-@J9d&@42Go8JjaI8ph*
zwL;C+q~<%fwxQz?z*PzU;XuCM!S51hC@F7%f_QvLGyEu!xh3&DNI3o*j@0{uhCuyH
zap!A^oc@WW)OTZZ^aVpS5ngkTZp<+<Wml-2aaym(=1n)7)kg1hTC4_bhA%y<HzT}h
zupzWs(2=*xZtP}wFD(>*v8H9=FsjRf@U@ffp0S*c@#*E?`jjh{FZz|Yqmr0mWbYC$
zz1#p%eyw`Yp$w*g^}d8wD)dZNNX0dr8S4wzc6g8S1aZwDBaFHt{@H&1FQh==KprYY
zofVn%wWl=za50+sb$+m~I>Cf$yb)@d)72);HleGO*}p)&a)6e&@8M({{BAV^{VXE-
zRXkQ-drBJ>)8m}K*l(nO=zTtPL_pt`&1;1Pv>f+TLMl5I<e9P!(sWnWkSh_p@68!i
zp1-aN<+L2>fB9xP#GHaaGzL(90!isg89g^W9v~nkf>ne}CEN)x?rQnr+<Y>N8uH~|
zIl|>GRY*cye0qfx{gW1yQ5id@S+5^H86swnX}HYYebub+1!$-*5FH@44Y7-*o=kpt
ztaB~EYxhnF>T_|9=EcmNAhY8q-<>G~O<C>*C`QV^a3{cL$$nHdYqIw|M90g@MM2y#
z8~GL4s5t?hfU~e<gR`xeuNSm2P2E3y;ss1^Q#PEv57POe_S!HJTS@|9O_Cq8GL`Zs
z-WKk3``Gx5f|md%Dpm8tyh#g6-+!ce8+30g_a%5Xb9v%>rEN^Vp0qB4z^l-Nx!zqL
z8*MZM7!o5?K8I6~a0N%Wd?F_fgx*cX7(u&aI&W#(?Ap&889Cjhd=?XTGO~dx`h*vr
z|Gn-%x3&1&IPv%M6nuDf$O#TDu1SG_`rI#yftGshhbMe^+DkD*AB}7;!Y@U2q$-E;
z$QS838;bQNXfz(o47o1=?HI~VHEwCO`1QP|&fwtjFkLWuXGV{A%@xOq$C{1P*!t(~
zP3orAnG~m46vi~gSZfc+gd7boZz7?fJ&U5_xUW!&zU+%>Q7ax7+YLdCW!{e=d&f^9
zF>R>+@~H=YAJ6u0jfQ!|)L*JOI=_3D(WUXMy=KH6^el7fmV5x}iC6Oe8dDi~;*I(b
zh}*-(S#|i$L&7Hv-xIwiU%xx$$8!VZ*m&j-TZoBSy?$m`zLR=Mg!^0^Xy3N`4Kw|8
z!^(U<jQ^eF;>*nu>7t37+AB2^C9|nW>=AqI01WfYk}B#9a$;36LI$}!#pTCnW_O-=
z$^YpjX%N`dOopa^!c17eodG31I=J)1s!`foG(y__G%x<ai9@G>nIUZpN3`fw6zI?$
zc&Jlmzi1{0e|!KJyNrCGqS;{VpU~y*;tsVw8ymNiDLYVt%gbtq)A(+fCPCfb%G3>}
z@a1;GreuF&M6@b|@i<VU8N<>UxyAro>#UjC0GR_f>3U-;+^^aY<7TRDj>g5=w}jY^
zfNKhGZQVl&!=4}^j`-;zAvmVT2r^qaz;;m1Q`|-))p>?z=}<Sg>@`Rw0>p_AZ}s`q
zc5K~ewbUW7tX?>rZ>)m(cofx-PR>T`0T!cJEaX+<^s|E~a8F)!3UHjOqo2U=z>du5
zz(RV<I5N;Td;m-$>iDi0X6TA~BFD_vD2B*@N5M{epQ;GxOLOM7gKe6GJo%?-7GF6D
zRD57nny^K<oIvSYfW{$`E;YiO!(2J8C)26iJ!b#(Mu*SajL1{V>Y{GbN2;=?aj$O~
z$`RGwuWWvV&?U!zTSVQ3?B|xnY*Wy*Jy#Cv!pW>I+3Uwg<A$bx&0HDbSN#5;L`*$N
zpVae6$PRa(9YcjxG3KdC4c0i*_@3DNr+Qvtq4xc0IFF9fM=m3M@KgFS{WrrL{WRjw
z@Gn^nn>23JU8{(SGdiNvccHSPxutnj%+1K1^IYG*=Nqz)+W0(Iu^3}&bCz_gbXP5Y
z&qB8PD^z*J;tm~M83W{y_soF_RgQL3b(XCcWLq0Lp&Ap9TA0cx43wK8#F~ZsAF&ol
zcAkh(e>+Tpmc<~g^G02GkJsxh-4kc-9T}DL$BgKBaBaF}>j=zknbqsb8jEG$F*Rl=
z+?eOGzll+l#4YsbjL@o8Z0rjm2Qig6mzgK3jiOu2UR_alf=gbO?a1<Rj{#OjRC85x
zt8%fx{>`d={RY&qTPl_T)T6y(lu7SqMTj<IYql%6GCQt06YS-(c!yk?Fss|N9z?TN
zkwWeonG&rQAI>=j!kLE73M1xvIXAAZkVZVW?^zbed_P_ChDP!Ak`zDmCQ16k!<uX7
zW+`_M@5*RivzT9yS(d!_5l)5BJ68Bk`pT5^IlS8NM@hJR>BiGGjC&bG_tlx`n+@_C
zn8$KEewwhQ4tb`yz*zr~?vifo{PVKi%5LnB5rQc2jKQ&8$KpGp1cxXz0sDuDa@rE~
zH~sr!Gqr?hLxh>#2{F~&?#-1@305_P2hRI{WR)x}sYtGpWQD+ZasI2rSdR~#hd?|i
zH--g5y$3ihR}Zj@J>Cy?6^IB_+M!V6E0fXdz<kSz>?btV<<4*Ib1&mROk4XgdhJvd
zIcSxxV&Mqbvmp15m!AJMXlJ!tb@<9lefx##FRz$?1#x#-E(mB8ZuoyD)Aio0sLR{E
zR+3MCqh4gzd9i^5`8|MGUJ^l5x-QpZaoY_p6X$-bkUmORufKar&|NWV`Cgb~;g&g<
zJqe<oVKY0i%B(nGsIv3LGPYwxpo`PZC&TxX8XpMX<d?@T%*c*`sQY!HxsfL~R{~cW
z(P$myO_)a}QMIO%J&DB({);jJi1;+~vu1Vy`icsCwFw}J#9loaWRrSWU^P@hmk64Y
zmoUbEK|p*PpbolyYr{O!m!*_rXomAhv5yqE4;_iO+PTCMJg=U(Y;@@~gcfXMHhK;K
ztKn1~GZvJul_z*ZtxlruGDW0ELe!p5EHJxrg@O`JKW{Y17M?gdN;?MhoWyQuNNPo-
z2i-kDf|weka^%&)pXP^8rGXjMjFTFYo3ZJ8hZ5E4jJfsvqXF+dUlC}|nD8TSE1&z<
zHdruwUgt!v3@z1+2YJyI^y_YBm!jm{!<5$^_gH_iX04*4d;rAxtytHmRi}t*nkYJr
zB56}Mstdb?RyzDFV1cqXTm_W(-^vJ`xhIr&$YCTBW_Xj`T{w`l&MYH$F2t>c=e{jH
zr`_UNNkXG?8t+5gwBY;k-q6cADz%o`ugOn;4+mmBO`icm{y9FLp(Kj<uQICuqVTOS
zsVlC|tyT_N!%#B98DV(dGp*wq8w;KtOy{jcgKNUz;;j<q8Qvc)#hWy?pLDi?M46CL
z86_NM%!2%|D(j@mg?eBgi<Tq+Aa!caSLDv~o;NOFk}%B8=Kk^3Web?(GYLEdFiOxU
z*Qu43SQu5WURn9|6Md^9_CJo{YF6UgYiwH1^ZlwUU2Da%Gf!e6@CmtIwcK%cR3#s)
zKhv9oQOgJP^Lj-GuH&YX?Dpi6E&*oz!IMTN#@1O(u>((NZoS5CM|~XH#+fyi1M}70
zp{KHrSGeGYn;q*<I%*SVIH#M9kYlPpo3aqfb~gw=Ows`Pphy|KHjN)@>evbhx)Wr>
zbX$%59c*`Q9rl7_9HA?v7zx7FEClG9Pl$?+Ry2Crzq*Tf2ubqHqbwiUWg^#wXutyE
zk5mSkvI>WzGv1z&trY8=TF%Lrn3S#)**>V&&y&+QV0!FRTx`4(<gru3ekZuFdNL=5
z+%K=kKDTea4e}{H5fB|{$h9dzf>(Ts4BYvY>*@H`sB`Rww?x@XLf(LoRrnD8cE+gW
zqp)>-FI$8b0ix{po*;<CWWX`w09+gNTj0zWgrMy8^G9SB0u^6L`m_gL<yx>6jhV_7
zC(D+7QO(&(X$YB{1mI`OT<F(!3QP$suLp3WoaR`kb5C~ICy<1w{I0)m$KquUP^y?m
zhXqH%v;R;XXYmp_JE-DgK0_uM)z&@rk4k6&CqTS(EmO*#?TUI~e6t=2fm03E+-@S(
z6W5**P+pYJKV%ERGKWsbsw{;o87Ch}V4sE{8zM6^*hcE`(nXLed_=Y8G>uyxx77xB
z(BtFe-DfM_h4FhAqf}hGeiH)9YCdV>)o}Di#V=pAUDPE7<`=zG-O(doh+q<4Q6Isl
zx6qW9$hGLE%a>jF1$ES*-B5-&bCwOvuN4PfmLCpb_lB_acg<$JZi#npRw2pWvbT1o
z)Tabjmk0?ab~)&1MHY70N=EU=Z!EFNz>xt9C$4Rn*xo2@x^Zj3mYJ6G4aWn8!f+&y
z-<Q+x-Vp4#pC#=4A`xLUvlcRV$?q*@v|BS1vv={H70=u;1NPMeViWT26U)I$7GYkN
zE$7fYb+7_C=1K4HP@sq3+XeXrF%hh_M9<09%*TTz(SxTs#|MBqDRk>;$U6N5R=rrf
z8~XgDeMaJ|DzIPfP}eqKSRyBkf!Q@9J7jZb7Hw#<kY4VISx*yKWMGm5pQt0aQrT;S
zF=ow*$MK@yn5$?Cz7T3p%z-3$?q{b%U2-KS6Uejpi=l2t_WiiJ^ydo#)-Fp9omR$?
zccDx3sjXb-jHrzouP?Ts_?yta8}tkpnCv0JfhJC#{eTV2+O#X`sp`vs_syE|&Wft)
zvlS2G!iMVHAO`T)&JokQRu;%l2!RVkT|2~iH)I>0d!ch~85F^98Pd%39;bx-M=U+z
zz3Lm%A*7Rmaje{zEI+@DXJ6SvPwOBjj!t}w0vRz52l3qJIvZnAx~Bm^9*?7-T!x%c
zH3>B?;06cWT@)d_FO;&c{oDFsgIbnBvAJHUek|h25{$IL$kkZiPp*AI+*`b)>45l1
zRaZ_--M47f?V#@P#;g4YhaVTb_dmb~>@neiTmwgvxTD~~otI1OXpzw(0uS4HARa?4
zZ$=Gsq)yBE;+10ic!JPBM-_F6)<+9{znS<fnl8kwNc+)(g=AzL`*EC-Wn9IUKfD_)
zHu|b)Mb)bw{SzQPJzH4RamjE#AYVZ}d?~NUNHRBl4UaITf<Fe`S?)55gS%Ps60qRf
znFNO|e-aCk(aV5c`(D=srpl0qSv<snA|YZN-&5zjHvm*yA1(`U`nK%4>xk?yVPfII
z+<tQ&jEzdk;!rsjKlwF4OrM0Psu~808#;GhoHRNv@ccNm9=4;Mo5>!e>a?q>#3oiF
z!!#9Knr^5qFj}4G`615Iw;g5=DO<s!d2-!>!6x6bC{ri*F7{a5b&I5+|I+NVt&Hx<
zJy0(Oa*3FZ%Ie~1;=DWO?jT#Lce%LeAt>&&=_gwH4{U9tr-y>2w-+U2<>YK*2=XR+
z%#Y&<+{2#H#HVIF4c&qK{-1~H(Y@4urdJ5%?ir5>Ll;Zeg052^7|owh{#hE3az6D<
zCh;p&j>CLFaCD5L+G>?L(a+6BDW#@&w7%-T1<<Ge+Wp$No@JqD9~P`FckI*R7As?i
z`n#T&l&tR*6rhSKz8k7oOdRp{(-L|h)P6^;){~R>U54g%=+^L}VPv?cBe-YNgl{y6
z_xgX3qmPM06!lgNRkgQ)dLt0b47kCXa&OmUV4hC2yG32%xWA)C*5bY<v<&*I)}O$b
z-XpFI<*GXNump5|X$g<(RMEvReTkLDtk4jq>QNJb5g%Z;-8f(v`D15f^^}|J7pD9>
zI>q58jbmYWfyzS!k5|i4TLHn>`WT?m4<+r5wsQ$3{Kij&7?VqS7TuJ2M0nZLU`<%H
zk>_BCRd&=Vt9Ibz>vFg=tL@g^?vor9(&<F)l;u&)#Ii)4CG5Qsn)@hSoR@1P<wO+K
zk;nH#EVGREtDg{3ESx3I`X}pj?%lI|bf2p#rb^-KMh9|PnOhu%O=WBDA-hhLeed>7
z<iL;!7>NfnM7WPhZKTRy1r_yjV2g9ba%t4?uclnlrC!tK2abb@{koghYq%?Oz+AB$
zeKt(rN#1aSE>3PzsQmTLYs+4*lQ8DufY>&KdgbXod^K#8U%OL_Hh`&jd0HLjKFB$v
zH3WT+;XNW9bW3n=HH7sIF?|{Z+yZW-#7%3Xg%Y~LT3So_j+SP4UdhVYsb?r<jOSB+
z#7!K)2XI$n-5X9jTB8i6_U$al3CyU|BhwLq`MiQ1_Sbt^EUJJ^Xdu}C{e%kIBcbc7
zSVEVrU4VT^+9XTY5^5++>+3l|vg=agZ0~rfXL-&5t=B`^5ZJtP^gL-HwIV+UTnUzy
zkdhpMkd3WNH5Rs|s_Ev|qxZay9(-9sg)dqy({@G4#OavxtX6T?lz#}CUhl;Yk7<=j
z7Rph31z`<NbX+%q7#=w>`<lh1@?a&^&6XngDy)E_T|4dH_l18UMlyd4^~Tj)mTu6)
zTc{hV<NKdFufWlbIfw3}&b`awVn_FYC^g$jrfS8y-lZP)at0FFb4(oNE*6@6RJZQE
zG0FWj%rkTW<L&cS4vcZC>Ne2+A&uVGagDEQQn<s5T)PqQ>zZ>&$JJ<PC|A^;V>C-)
zL^so8Qv~mk#J0?Sj}kko`G-DFHfoSO{X8NtV?L=14DA->!lf4<26FAT7;dA0!4I5y
zdN=QwF4D)X5U5fQo;BKBgii@{r&SY`EC8wvfUl7BcvJ~bYKGjnkFxEvc0*m-i$*!E
zNh&P^v4tm1V>V2VMWW1p`1J{$Vk~`GyN68+OK2^a_MXh6ECN3kU7^!Co0d4HURS<1
z6I#Ak2TTPdZ{)t{!cHe=_l)dB1~J+#)h}l>4)?uV8;ZW^7<E;{gp%d){R@ycOJ~d*
z)eNJBS^BT3jD5&9_&$6+(O+@Aq}Qb2bsS|FZ-F@NvM#3Jv0nE^mj!d`?)uHA?z41h
z-LbBM&KnUny$0n+hSeuI2N{U)M2?~IFTL`YaL!yOGTf_GMS;&rX~L+s>nCFno;$~z
zOb*sNNnIrN(m_s}tI(ukMsV@Sj76zIyF7r;>b?7?QvB0uPJzEGu>WMwJtJC$i@Km{
za~<sMI0jY*9?7(1rca&?y9_J&Xlfk?0mCm8VsSUq*+Siif_YOR6n-0OKaS3u#+DjU
zs*~4b^QW+IEfT=d1>Kg!V2&W5e?Daj4X(-j_~8#;F0=bl2@S1ozhVxAwbC}&yY7Pd
zgTk=M+(I2YXYQ?sE3f+oY#I^hK*;S>Y`p)^>_keDe&GJiZ~YR|(}87PCmu`IAwYot
zPJ;z&XlVV&AtF0+fzcdAaG}e6wI$ZD6bA%!AEFa=v8fV`nP34m_81nmMrXZku@2Hd
zMzxOeJ`hpceTPw!V%_qa?eBazHOd27{=~8+cJ0{mVnyy)habC16G1~oU_(2f{U(*;
zz9inyGI4ym^@;^&){CJgAi=@ekL~bfOq1veLPMoPd|5{;@Na{R!Yr`?VEx`+c$`x0
z_N@QnHmF2${!o%b3ykn@%&^2os6^>bnU-o<h9D0gtnU*2UA*uc+PF&*a;vx^m<QnE
z4r`CnXzSpzWE6NYrqv+hInthSKf&Kulf@bHX{IDGjG!b6J6Y+x0+hd*zPI(bEu8eX
zH~dCTn4paXTg0tL;&r|2f3_bJ+!{*;#5X}UTRDaT4X$>G(W5*{kX4WZA}WPyJ*4`>
zNg(h;T)EnzRg!Tq2$yp*mKehjd9vYKaWv*iCH?BtazZc|Aop0XTVm#wa$HEX#htDn
zwJ!Fbz1>4~q03dgP|B^NR61wl46Rxw-S>wl(8)RFwbDM@eAQ%`XL$M$ftthqmBJlX
zPJ=>jT;ezFTj#8^?fQj3MXj=gxhK}vd~G@@`#uDFb7MNvg<gu>5|Vgwf$qKR`)($p
zt|sQROU<;`Unu~Icpp1Pyt)3@$IkisMJ65PWQ$doZ#4EN5h)UzdSWX}t_-<N|G*4_
z+aA<TNx^edo>vq(29y6ssr`0|sF8hji~eCSe5;Eev~Z}lt{13ZI0<6aTF(<r!ZeY~
zS=J21hQmHG1E$orhlsK`>9v0YHChvh+XzLMAXawnx$8@yJC=*)n+h`ER0qo~n9lf0
zzJ=YvsRO|MU=sxRyubX1oJH#Q=LLc8YKJ1DcGt{Un)yAHCWl^;L9%3mNgp~CbIn~E
z0=qDzVzp86H)AQ-pEUgtdV7y=o>n$1y<@bi2ba#%%<HW7aj&_aBOl9j*g(~!LUjiE
zAnfr`zDCgL&QaW31apUX;@yMGS`bhw#TSNM;vfOD?SojDX8evbIzxNzEb=6+J@NkI
z&LizzBjZZL=>wQosZXzaqE=+XcaLS^mABoQ$`-{h;&RKcyid>T773Z}Asj(VNvi=G
zmw;&yBd|M0FJBl+TmAJQUMSQPU#@N(Dt2Pxue-Cj={oVPeeH&Z$tpz+V9(qHRvFEt
zj1C)~=mr}62OJcg_yPjN$Q6BoWaEeno4iAX@j5tYh+^vdZl6c0OyP0J@qGv<29$I0
z!)b<tU(f>UPiYS!jX9{blIn)h=!F@<(6_bgDCBakowI~OjgoYxpq96wcuqf^!$Gun
z0ya}dNW4~L@d2CnzC%;D+_`$tzI$8orJw^;9{87ylg><aAN62sq@E*_qeM@OfJEgD
z3KH<ug;0SRiwCNnmpV`hfm8hOMCeqaBbtu9_N_v^HM6zZh!D<HDbHTI)cs6QGcJZS
zm;QcSylc+kNt~Iv7$j-J5cjg++e~ivS`ILWo-Gbyq53N_)TkN@QqckxoqzCjSvat&
zm%R__JWpMy1UuBl@7i(6&`>c{Q0%poI~t`Z5b(x$Yqsg)P5+=^__I=!$;Pwun~r;Z
z?@Lkd&*5^+M=A0k+)JpTdZT*9Q5BvwekYc5^9MhnOK4$wH2X;&BzToqrIm^I&&3^D
zcVck)O|g@Unsvu(^cx=c&u6nTOI^TceNLjT2e)c;bER|Bb2f6mx#@qgc-)n~@nE<6
zGL!>^x2Z9ZRU*|*Eb{)=Zc<~gmPM=lA9(UNt`JxKZy@3u8;_JvMMu5+ps910xHDRX
z7}uq|=1#?>a6yYPGzIJ27!uc^hrdO|y8_}cXNa+&S@P#L82ZW!4$pwLRm0pvi9^1H
zDwFX`$nPEGXI9GlezDAVdHo&&ToCrrSo(RHxZdqI)!J`AhO{Akn#H#y4{25y(1mCn
z9Xove2nCFfS2L?y==NqswDm4<V@sUtu81*x9@mrHJ`Wlhk{}cA3dPK|z<kfJY8SmO
zlgO6BGET>eg0(U81`i=@Tle;a3!C4px&xFdBFNI879YxDaf9J?67%m@=!34gH_Wfp
zXOJKm{ni`xu(_3|I*HRttV$JrsIpSXIjA2g&vA4YFrJlW3P{Q$-_~>?WU}P3uFRR=
z{YpA{LNW`ix0A*{95=LZ@8gfhkxO16`F1+x$y?gvd#SK<ZD=(@W8!G6OPsFcW^j>C
zDPvB{bFGm%SgfXUxHP}U!~eKc)u7f}$q^ga3RYDa{Z454sl>W3PBnJbpS#y$(xrbR
z;&hEY@YC-n2)zC7C*UG5`$d~Nq}*9G<2qi}aai1Y6>|^^jZHk>s6()vtXH`3XC_|G
zX%aWRb1OlduulIdAeCjCh${cw$X@|B$!d^blFUTs3#)E~H88p92g~HrNM3%VrOcFl
zG?5&aPm}0QyZK4|&>;6{gAE&>r#>wjckY<LfnIivtBiezDSQ5Q;SxF5&S6l#sPcpe
z{Cx!KHyq%)v)0d^th)l;<TI_#w_}2(4uCYO%hin};!-gCIB?H$kV{<8_RXH1WV#@*
z>m3@Fn-?e?M4x2+#g^!8oWfyFd>|XUcUc6oSH4o*H0^mNx<wis)){ThG--4E3>4_|
zQEG*0*lBOt#7x+4D5zNf^vXTL5}|`;E7TNs+WM^_50APKg_jT8{j-Pql?`q+A|I2g
z*qNk281Ty<dAYF+7Li!A)NeXeefD9)l_zh_{^9MDb#Rryes?C&7)(um@CznCQAdlT
zwb37k`c75ejs@Nt?vvu87)>f-?bEf%w0BY!fHyg0CHp=9NRz#OPZR2v#Gg19`c)b^
z+RgvCZ3(@du;y$d!8u!luF&BTk6EQHl}80<4rpbpKULlS<W$qb(xosK`XO#WzNU)s
z<xy(4LZxHd<b2NdZ@0M4I&m$r8S*=K>;Z=7gpMAr<lPnXcR?6WnBZ)V?Up>O1sqcr
zJaUI+F2?0rsFx?su*vf5eW<Zo6;JYW^Mw$mIptWYgZB<RgabjCvAD7asWhlM*&Xm~
z-(I0gKpm(oNiZ_-hAun#W@xzT-9G=qF6e0GLkRsWt`IGEn^Xe;%dgNs>{09JQxJdw
zJ5q`&0)HPaMeME$9WVjtlF#4G$-gD6DJr%c`rJ*aI$HP~!diVDUupsq9_#lWE#f<W
zI@CX{O+$PPZ<TX8Ov<Imao>05Xc%%c@+7%G{12l3T`O@SXt#O#(j8-}I<BobUL8zX
zk!vvtt9E=w)Xa=sl#KDZ1)B8b^J2~>7^T)zk_3`J7VW+%5;Gc_%8ZZTn)_?fbO~7J
zB{hU$C1I7?k*nN1JNnqKg3r0G4<$7gPOqnzX?|dlGo7=StC_2rQ<y*_VPP?9KGA7c
z+uQM#dix4IIA>R~r>mdhokT(gG!W#JF-n*%(l1?`@@MgC?3(U>cCLG{s^x6EG*w0=
zdAY5YGAMe<(H<<gq|6PD-ez%h?P*yHxPB<z%i9xmSNH@*WFWG(JB2$tSNC$wDxIv(
zTfklu>)EKUDQjGlbA3MPvN&aI^6<GuT|yoJzd5@GCn#i28;o!VFmPqCq(ooIZ-T(x
z3W1*|d1xkI%MUSV(L>4=pQ)6#3@=Io<VJC02k@Q=Bix*DraTZ>)^2&7h>$d@UthUG
z%+7wHyksxN#{tpv`PY1sB~NmAxp7*)+jBw})0AYX9BcsZ?}aR+IAk^&7NM@F<16is
zfHPhR=ihunhek;~z2&?&sb38v$^1=`_|@L5A+O?*qP6}9n-O&cr?7VYGed@gA!6-&
z(oQuY%Ci91EE9T0^+LV#!qH~TP?6B&c}ruWxf2`?)?p|ZHaqyZifkjLWS||wQyM)^
zmXQ%Gk(oFMT%<3N+3OBfbgsr%%8H7999%dkKXS1hpZ&b-RP))MTkMRU(adBv-?>J;
zcgP8(9~RCbD`}S6WL7F(Ck*i|?4lXUNIz&jCCK)kX9~~~n~-gZ4MSRkG>fAtm>H-k
zzB3Oh|05T(k!=mWvQE7#{k*eX9c&zb$v7*nZusUz#%dkDDf|yS@|!*JscF7gd`W>E
zhV9Kr3HcMb_d#Z6h9r&jK8>7C*I#Ev%Bdijur)$Y^CxVmESHYD8X^lar7}5wmZ1eB
zKLkg|Yh=@iz3h*Dp&sfO+r6q#1;w8&^P8~OBr2^u#qP;Zg7O-FHf^0EApu>M%LIir
z9VwV%!{dM~MqOU>iP`wB^%6qO9hhufs&_D4<r;ZLRxgD!85q2BdOQ7dD6MMn8Dm83
zYn6<0T~N^{F%!qRI<>$Cs@<OAG8wO@S^nc{a{3<&ZeF4evP8C-L|CVm_?A|#OHj$U
zd>L4f7tl&JGv1$xA18(CgF|}8+h45e(y1_RSE1VgD&qe9eZs}11jgv5gU+5By88WO
z;+=O*$JQnly1F&~;~L7n4wR;pH_sj#m0!>F`U0z~EyAo2y8ShIGmYozGjRtMf+e|%
zH%Ys?P2E-<N!x4I>=+~$A18HPWBbl=p2l-T!bUOuL})<um$QF(6n!%%D(v7)N7M&2
zD-ALN4j(1q*Apg@{!etLvb@jP9rB7*;eFaukaIV?v(^vBC+h#Y%t#T|`r*icBCpGn
z@^7Qvk82^9;H#}qYQ&A*g$rwUNS_&cXXKMN8?%ALo9S|^7RA7~6WYTvb<YY{u9qU6
zgw%?8wK7eO{S~525^vsmIb&B<;BedQRAHE#M*um*^0?T*qJU1-f1m&nFFWG17DJcm
zPiGe;8$Wd+q((oigNyX@=MS$gD3Sdm3JGm8^WJ56;SCQ7t*wLonh!SeTl2HsG^$CF
z44QL$s+poa+8XG~t_EwNQrjOAN8CBf^o~Q6Q#rf;)vv3Q<EPmdY93Jj*Y@x6gODgW
zg35cMM{Y0{vdada$hpTCc^LP>yC|34$D!vH=J{Mw0YUU9|At!Oew!BEQM_q=m^*SX
zWA*#X{M{K?uj-8C2Q7zZ9j$F!-NBdjGDl50-`9S0+b~T|!y;t%ZfP+3t#t=+%YQ#e
z`mw<R_OE;@-}BV#iDkyBh~Fw?Ga|&v7A_;1)SX?nC|cw5`q9^Q*74{3A06`nxIfbP
zUpG)f$Vp`0jg1ZwP|V(_2d4+Q-<lqDMCl0w&(ibKRWnLb^!0GH)0eo{I3{QW9<LZu
zN0j~FBL27PPZ9UiYHCI(zw574(GTtr(m10<RMAsxdMso;7wQC!9Y97kH#EG8M>-|-
zOFc(km@co=sdrdH))mee(rDMMH66YgZhmW<{$aIc^GnT=>#>D|K-G*Bv<M|5S19HF
z6xUkBb6qbppwc9$LF$2^?YNT{esFlTYB(fhF|kaKMkagf+Hi>NvV4;Bg+8@L=XTTn
za>v=|8F3Ny-j}EQX%ipYQCuaLWM2)h-j}Q)w!iP1fL;jxP#~xKug+A2brE<(&^l)(
zW@+1nr8M3-PN^+DUO?Ysvkp^Kw(;Q@8Hv!4#?foHb%^)xH#@Uz7j=)8^L}u<9r||(
z6~VxbWy`;YEjZhrtbdR)5-F)OgY8kS4w1Sixb)Nfa4G!7<-Zo|CxN#=9ln8=YlFaq
zFR_kumk<>E5z1Y@VS@6dSJj2prB#oM)QR>BHGDA^G3?tDPZ`(<MgR5b|Gs0Skd=S+
zRJ$};Ahf0MB<5Z?z#<7ZR*BqQQR=K>wOD<~;)l_itE=jJv+7Kx*_@jLjgU%pRCQ1I
z>QbYaTYf)?GagG5Uv6QnG|l|`#QDb?6I`p5zH>N5wPYt*o;Y*NFkKEI0cJtvg*A<g
zJn#N34LE2%`KhIL8j0sQJ$ZSly}V4%X}N|!o>jFp;dnqs^<kw#mMc-lP>3w(7e-VS
z^FRC&ej%cWYK5+DVkyk!9w8RV@vt>P$x-$DX;epy(Q~l~y$QkDICHQd)JF(tu)jhO
zA4{hf?A#fSU&dYmr_CvIzn9JGUVN`O8*E4B75YE8uZ99q@D4ZO(`zyZPV~OM{BZei
zP)W!p`Ma}Zs<U|<<6vxR)x1KDe{^tu#>^38QG0q)?tNl9a^DIaZ+I=Nw!qzRWM8`n
zdhZYBo^j`O^sC&bZ5=g{D5S2D-w1R+3I{GE(6H{2ZQ*LcKfVBoAk?0jZR%_g!>Mwz
zuj_o*O4609&+0uJMUDMm?Ru{$qZ<;=eZ7-*K`Z_%He6WJfa8$RhF5=7&Oh%n9r<B`
z&VHoMsLp8@S~{mIyIQcJjzu*z*8lLbt9$e*zOnr*VeM-l=0slLC?GI9ysq!@1Wk^2
zf0Vn`4u(idD9M9f?w;5&{mdGCUx=&rJf4+*5XC!(C;Laq_$6e9`8)RWtox7V0dA8E
zA<qCmt$TJ`(R_c+y1Um1x8H1lV$EJd-fojWMGK-)V=`U8QM-K^UK_7XXn@6F{!I&^
z_cPYXCX1dM3BdM~WX2bw&`G&N#5e0{k~7_oTcWH;FIo(;V~OCJuu-VRqmwk2St9r=
zN*`0YaXG+8VQIw>k5w`ODM?vif%|#h1{w{@aa~0`711Z_CAE;_JkvP-<k>jX=bx@z
zRSW%JS+?K>NM?%y?$O(Cr!T^tb0mO}YZR3G?Tn{B;c*0U5RDpE^Ke5-`PdV-blm)+
zJxc!cmx#X8GyKJbgMl7*{7z-$%Z-q0n0w}&M_9HQOur7JhjeYi&9_ah_eDl`xK?O;
zW(gp|Le+cLs|O-cS@gtzi>#l^S*ll?j5dtQa%yEBql3q85c?-8a6n2{$G?92o!R<5
zk7W6f^d0xX#+|152jhaRMv*xa1H%BX@Tt;L@j!shCPA$lB6UIF<5tca_yqG@Q<u7X
z!s!=OW3CD{sMZ1IE{Y#mXVo7eRd0G=3y4*zKSd;<Fh42W7~Yz46+h{{q>4DLNfpVz
z9^ub?M9V}xQ{-mG6Y4JU3?-gemeBOJeG;N>3Y-M=s&PrHd<E)~;DStXk6@oUr`;cK
zzap$D%HA*K^OMg$DwVI<`r`gbwus+V<#_knmFE-AaBOf-Nf}i9EtriEX?>P}006+a
zrAlp&gz2A)tkDdFKFw`({SFbKyiiE~PkEu$C8~-wnLRaD)flf3mN^%zG1-Vri`j;<
z2wja%Y`s_Xy;vtZf~<5E5+Q8H<vPY~otYc3t?Bd(P^vCJqCKIUkSY2v*QitEf<S|B
z^MWgKBxxrG{T{uK7S=&(B@DV7INJDd&qiO40QR(`SLZKW65O7HkuEfXk<MTZ59_Et
z0hjm;fVZ>-SF<VZ6`u~|m7f;=RW1e?<0A9XVMyZi-_r9FGYVST(dISO*A9qA-(Zv$
za-lFu=C7~hjBdRoVkqZBfV68Z9vgpW8aadY*p0Y+2&{UZ^N^YKV<?eDFU!CPi2v-9
zs5$N>sr;Q`zf73bjxGR|kQhq=h(3RK;%{i)0$e{(eb}%Q#1WsL+za9oi(1T~Q9Hkv
ztnE_VJ=<{hdbpzUgK@Ne%h>;v!d>bR`9evs?=$5q{VCbP!92y7)rKr(KY6qHQ9j(4
z9=vo11OPr9h&?Lh4Q2(h?zHnZ9p)vUnMGJQCf)-CXl#!HFj1$44lNaGdl&xaq<_ZQ
zyQjoPV}kqPfk3fY!5%2&lz0N3>xY&_PbZug_*7DwWC-_$U!ufUcp5ISHoR@k5)W4u
z)Y45e^jCh|$vNTNW$*E9Oxo)65!Hoze?X(*t?R4?$MTEkE#|SQJI9}qtph2moB0Zz
zoj(mex4p0OX7|a*H+^RjZAo6HT`}Sc1jc1P@NZb?ppl0+ePXLl;=!q}{P<>UH3oXn
zpYi<obz8<5Sqcp_P{MQJdH_prK(6k~QxMO3gb?`Z4&O?;jzdP;j`zxy_f=n>YV!it
zi5maFJpO2xLgem!uk$~=!Q~D+&5GK{vAgy9G#q_yD5dXIdt0d|YW+^=Y1UN%#$+y6
zR<X)8*wYWY?fzj9G2~b)1bT{3eM$4~lSPK~R>?wLmW{Z6NMog%m9z!ooa0RSxTLxk
zR(Y~0qkf#X>%gtpHtNnxZr*z7-<U6U#-9!Bh7csq8&bJIa`@iwSp>&tQ^RnQYs-8)
z6$Ml9NQskqOIXk&<simDxz%kzBoo=uo7fv6U}-s1Qbif?gO*M^myd>5U3iHV(I+{6
zI*e;M8X5jQ1h~mFgz(%AWlV?DWH-hn0}P8VDh@jxUJT?jlld0ftldG(x@EaXaJ8uL
zd(=~IWq-Rxdv+sd=RRoYYClMJ@BZ7S%!iia;2SN*H%^pab^5(TentzUO+UyeMxHa{
z$o|<_UAP);NEF<fwOALc1s#EpyiH8pgm1pZbDbHl4h84E9AQU)MN6Tb5@>v;_{@1a
z_F7aB-WzmOPtSOO!OUH(2`7sfI2MM(PtWTup1l`U#I;jTgXOQzFEx~njF4(cRRmiP
zs55c>1xNd{=62q=06OjL3~ObaD9x0cM7muS_S9kL)cSaXXjmMVP!mXWHxrj!rFwOj
z`~oQY?DejC&+K`Ei7|(*$*{T<G4tcwA*A+Yhd}+aV#ltp)A>@5C5uc7-EtqeKjwOK
zKwRiOsw97=^42a&w~{5}%{JKzj<D)lR<vQ7i&er0#~hdP#8BtI4x-bsk#`h)>Xx%L
zg5C>>`%O;zw{EhDVv8R5mHQJE{KJaMHO}!0gmzJjy5Q8$E2gr@W;@s_e*G7%f9C$5
zot_}s)$1Cswe}sD<WsB63B|X`7AM!kZcp*YlGBenMCCfb)A0xQ{RY(k3<64EmiKHQ
zmwmtgCUEllsMCp|({Kw-qqGxK7xC$nu)_@w@Rp|%eGuZ2U-MILad|4wrZH_vY22;F
z6Ll8s8o8X=UvuE^J!dop(If(kYxb7$ckA$<4VI_nX#PO|=L_GH<4XSU0{CkKI?din
zuh*a{Y7rEyi~~{_+xPfH?!K*#tZvvf?D=)W`S5oWSwZ~OLszTR>hV%7WOY4hB>|%j
zC8MRILwpp5rChfd<1h4`A{#T1HIZ8#4vn;+I5JTN>4i0CYcwit2giW(!!j`{^DX5~
zSCo#<5>qHxn0}|q{xJ>w+npr%_>m|PIw@<9kNHk@!w#bmou=_YpGmGNke<Xp--ZI-
zsudCIgHy@SORmv7s6^{V1nok30t6q;h+{IkKe_#1PV~=U{p+mykDopvBBOn$5VVn7
zrll^rq)KA(V>L&^DB~Y~>md<pPu7(3-9d*Y^^68#^${Ay?15)X4cYydG4SpdyZ`Us
z`6L86c6U9OqOqA{mef2QM$$|14}W_>G7{d*AD)e*!j;@^y+ohAWq2Rg%l-gmWMOD`
z$~ot`BL%3A-u&Be`|pqV!c`4s0xYzW{_0HWisnj=#+AnRSO4C5gdo_q96l^UDG)Mc
z_T<Ub_X&R@f$;gR*K$O!c!imZ=)d0iuNUsOeBVhLqCTI?dg;JAjL?M;q`jfl5epIO
zw*bK}H|x>vIVZU)wQ=OXnK<pgwf<@1bf!1q-J3=|G~oD={>-%oD+d33l1wnQ#p#9{
zLS@*O3F;f~7|&2o1k7KjVoI}rui4bUaKXA3A*$M7+MsZZ7+Qfs^{ZWXa>~zgs8-p6
z0zu{B)7Vk<6QJ9gKiUIrj_xOrL!mlI?}Q!Zo_VK>Jro^o33&eg#IFfb6(71D*2Tr$
zK`39#oCu7aJ6Qa2`U2M^g7j&gGgLviDOnT(0!u9TO4*tf{0(q>3x7BeyC!!vtj$!H
z6;6!ggy)9m6z9mRG14$vG`k@-5&9Em6OVCX=+Q9K)dmC8OvR<qGB1z7fYZY%^V*rK
z7VTf(%CO6p-Gl{#1GHx`IT&ER8sf5O6-yVO>dafjRa4(?WH4vIY|ZeopM`vJLYE)G
z;fZzO2<ji^-Fryu$240rF~1hRcN|!f50slhFc}$|s5EHf>=V6`J@a4xIw~n(Z>#aC
z^vKmsy5?dBt#^V9cy^Y7U{ikgPX47GdoyV}0@8Kq)O>BFJ1KHsDoQVP`+rBQ|JMx~
zBCgfHL8G4WeHxJQW{0W=+ONAWA!#~HmKQ+o&0*Vw4I)2DS$a)9(vKIP7^Y+k1mlR(
zKDP9??Z6L2dnnR(#v+lP2@C3T{i}O9^F9C|2QJ6};WoKnB<K=HbAL?c*)+vwd$rz}
z^)r1g5doEYJ;36Ppk;1Ur}6u{lZ12A?y|=cM>$F!ukjWNJ+*pOnVw-GT_6WCyA`LI
zy#+-s3;oz(i)#iXH@(Z#4yrg8UzAmIvUAs&1ChQ()f?>l+jdVf9V{w1e$>z1+hyrI
za4V9w39Op~r#-h@xV7WUvX-ZVBv>jHo>9%N?bqO0oou?x^?i$QK02MebIyC#-pJZ`
zx3?x6Cz6Xt4X}e#eS_cJC$F(@u?;g>w8UYbWFqYE#;)S#wtSy;d22IbPfT^dlNNX8
z_cOlbY^72hVMgztA4i<xV@G0LbNZgqy94yTt?`m;cK+;Qo}Atuvvl~nEzKxfXUBEA
zvOOr`Pq>VttXUqxnDwdA5lCj+?<Zt_Z!5ChT}tpIdVJ{{>6vuxdB#>snVuHgpS9wz
z^<h5j1;;llAh{8^?3}>ybU<Sg`<lPa_W+m@Wc;R~#pk?xQSf1m#I-jZ8)eh%v>+Ks
z@q+rKG~hrjrZ36}55nEwH6SclAM;)Q)dXpGi1?VMaC0X|M#EbN5yb`~EGtW?UZW}D
zOsSq;+)H&3hcbeejDGN1-=*{m)Co-WtQ*RSb9BeE2F>Iv3lukHCN%w6!sJTls4S`P
zfL{-NwK*(Ap36E0tTZQZ&~$pgy&KT+mH!LYRVTc>t_!dk05;HtALy;zNT3<cIcD+6
z?cPqO3+QsrJn`?k(dRz>Ng&=}!AO0*?6x7<=rQHIK@2u0dQAiFkr~X(w|Jee@4x_?
zbSk;Pw3PAox4Dznl;{?_x4al(`2<I_eaN_IdDP^<Ry}mqy&$L<^)w^klv=+!eH~F_
z4PGFhJgZRL4z`K^ahd;LdWb}mXAJUBT)*U%oK&t+9V*Ep|I5cD>gs}dU`0VIF-clU
zFeA5Do4qW&Q7Mu}_V5PJh6#u5#B}%$%>d>T1K4OTzh=?+4OvqEoG#U&FSWXhArz~_
zg_?tXJXCF0i<lj*DIeF7){rS{@?AC5k){}JDo@rC;qkw_JGt%HS(IL#4Lc&$kx3Q;
zF_;n@;S$Dk`zT179~Y(*WJj`yuR+Y7(a#RUCt5OpOpx#hFdv#h_i1Di?dIi#S?N71
znH&(|7vIudLB3J+(ShJf*G7lQnu9OMYa**2u$9aWN8Y8Dy80`@-TIE~lW>JS#I7d5
z6~n5HN+-cokzkM{7ObP*G33{FTQt1tcFuf8l}yV3hsW{CAL##Jr2jou|M64YZ3lZI
zK*n*WVn$sd#T9lbTtQ&Q16f6!&9j!zbH6$07M|0`4?^<~iq?3vQ?fJOrR4@RW)7*}
zp>pnxTy-D2DjzfBt`(69kXMzf+9ECyyD#rG1DcyQK28+1h}^g>X%Io!v#FSHOHkW+
zdGBD*k)E(<&=T+exh?dHM8QKW^wc0~g`iI6$Qheq4aX=YjPqR!)SJ-tkFv?TrB)I2
zI94Uf5=ndI%s0>~Y@<^`_kx9lRRACsr54QdEi$2N5rJOOagnpLOA=-}@cQsNl~P{s
zc{-4wf<EX(tL6-SdY}{B(r;a|AwT@$B6-X^p3FO1(5A<&qt}+tiYZf3cUsihYK1#}
znoys+x8KdpzJy^_SCr%64nCnu)63<+XPCxkrRT}3{c^1K940LmZ#M%O*V19?da`pS
z;%dHXv-cd`SN0MzZ~K(#L9GHZ?_R8cKKPB6)%qScF@Ne%4*szgkk_<?BEmqwczdru
zK23g&I_b3r7f7>$I+;PMA_dbZCQMWlS3#BWw*wm{fX$(D+h`mw`SicfUw<nkA?K)>
z^j39);cz0k;gj`~Ho?}9*BjTFudTi)D2Vv}lD+ds6uS`W%VA}qp9+K3xBDpzcgU-{
z*rEcyC`G7TqI^hp!PtiWiBb6YJInptB>!bmUB$gp)9GQLzM&6fIZ1K)q-8lRVABUX
zwEOM^tN7xV3q-_MZ!-%9hFDuA?XtM~f6i~h3vM!zIfK;&=J47jK^2Jy>XR+B&ft2^
z%T=xx8KWP0#na@<`|m*X$BlCPRm`z$k~`{Oxq9qBJ~xy3#5PhgT)yZj#ny=i+K<;}
zzNy%K8C;t~)1vcIVI~M@9r$3<^7=k1&e~?#a00|5pCf78OCt0Z>?Bar8;tjo5tp6h
z+RUU|`)X9RVUaLcONM&Lx(O;p1>JlIVNo4fwf4JV?ebvx$iyX+;h>WixHOEMoC~S;
z*YRuFb>V1tsb{^6oSeRMU~f+k-^ho{X1u9s0!?UP1jcdFLR$h}1zhpmrs%YtB`?$H
z@;-67j~HtCbDoj)n)^FA^qb`q+wIrmez1MrgfFC4ngS7dDTrI={3Gj`2mbOr+b}PI
zn8Pv<O+2Czh{#1rDw<6{J-)tges-`e<58TpcHprDUpf%cpCF}RR(PRceF=vz3li44
zhqYg6O3uxU0YI=YNCs^10ozbgo%&>R1!Mf6IK6+sb(@pCvp5_2#Elm2d=xg30p*rw
zaqX>7Y@?OTi3p(9eFAk7;1v|T%+?{=s64Y5LB_1`1bsEF&i{mJ*#eRM&S~l^usTh2
zboJ5I+r9?O_~aIXrJ;Oq@a#BiE#Y_-UbOxoK4lP*l5ZyY`M=*!{kMPQ<tg~zkv{qG
zHO{)E`Esn!;_mcqYd?X?yrj=n%Xi|4;e;dR@F+Xr6EXn$sk*>w06lldafP(qwI_8h
z*=c9H=V_>Y;7N?e<0Fl)O-MzEu5IjSk_2)b4^IpB>yeG|y(tG5Nvc4|m-kC{710*w
zdUmReFF&a!;a)*`diU*;FPze`!xA1;29@{h0_S&aC(aD=G-upQ3fZLhnHI%do~)Nj
zdYI5yD>e(JB#@8T9ea13g;pd2STnFm9Z*}%L410JH;i4l+rBvy{!mLcFI?`9YOhpN
zZl}o5v3X`#Zva;<840(<&{Mw5Sl7zSdxs}EVTBHlem;jgA*6G``_fVy5V4W>ct-VN
zyIkTEm-Ahppqhl>&EDE`v*;U%zdW~;LGa~U&@Ho881VR=BTj8<I)%V|582X}vn!U^
zQea`g;_!_(Vgas9++y8FE_P9AE7yJ=67H%Rfpb4V!*A2ZzDNoW&(7uhdE>6Dwqk_Z
z2hfRsJBgnc(d*LXfNdM@q9(!R54uE({<D4s2?nda%!3ZkrXberBN~k{4Kw=5Q8RQ0
zgT2NzFlP_C{=P40AehG9Y9+LgN4TPCx+VHEpWhWaN+6GQ4yHlg=m_xjvuDsb2M3t(
zSmo$z$VT>atG(cx?6|X@*&@kmsr<$AP9r|l<Z#9Gxy^BY5;uxFiZ5bvoaQcwPa5pH
zv$oy3Q4uUM0y~8TH+s*AP`BUUS*Z)Q?`7(0-E<MG$^?JcD2|6E)qe6&H_n<t`HUA{
zcG>S}mgs7LD2PJry|jFJGvR5oYvd;IJ6_IHOug9SzO5V6ng=@)P7mhclCC#<B1j02
z>xIJnC3Fi<S&yXavE;&tQ7_~xw-_)YkIRzvtVFb!Qf4dJK<TJPqa^Skv)AXQOUu3B
zy!#t4W*sn^+lT7{+Zo~ORZkUSCLKFET>OC7OJcqBOK3V9ghxyO$K!%$O%DVHADp>g
z!A9*B8C9J#25@#>26m?vP;XaK{+b5=aWj6sIe(r|$n4b|8b7!_@y$4<!o;MtIjw!R
zZQT6gBv-`ZEvtS+bU>hAHv5Fz!X6`i`N<ojIJw6Xov5IS5uK4GQGh@sJV|6;%ip-A
zDTgM1lmC|9_{^Porno&Ech?#NUR~xhg<{&mcG~={jxj#TZQSEk!{{2B2un-9SD&We
zEI(nvWL`JyZ|pghOdh$_d3KKca(Pm-hgG0o^ufK&^FxL8fxe3BRIt(%{TlX5%E1S#
z>jxY`X^G+~IT}$rzUp@TD<~!L?fZiZ-#1%2OBY~xjVYS7=873gXYLqZM5d(fB7`a8
z{UmogRlJG}4;4qLZq^K8Z#KnSvmMV|TeKLQhuvL2aI@F6|MuqRGoib;MY?@|<s{H?
z$wsx~EY&(_y>)r%FgZ9;T6Uz&Xa~vE`Td69m78DCC}C_wjo9GQ@;u(zx><Havx1S_
zS!&b)?}}jGM<woxbY8hN<jHG}k#x)sd;ZL&N^Zo)OSNRhzR1Hmoq|w^c;0XH{2%Jx
zJFKZ}`yal8A_4*;(xmA~5fB0C9YsYt(p#v~483;-MG&MYy%(idLseRoDm4`8O+Y%K
zLm(l^@8I0IBVpz`_xs2DJntVo56?L#d+)W@XVtyVK4K161z&TW`*1TDM=HNNFdWI1
z;l1@FWEC5_H|>9?2mi=h+2&nu{OqTgyne%>JY5QnE1vc3%*D5&K_)q2`KHcVVOb)E
zI*wV}ETy|;8dX`-NPVyz_j@Bl@Eot5yBW!x?Pu#~!&at(toW<}qRV{etVe#A$epEg
z5-wKaNtd`g6udt93TMZ>P@A$eA61r@mV?4kVP!gFKHf#v^EvkPdDCI(>@#n56_61r
zX^9i=vzP|OqOtY4E$J!5y(iNz7Hh}qo}e4R&E;yg5oQdf2zRs&%w2d+!oX}UZR8e<
zWDc}RKKpIOO!P7LTR~S@Ku0!3?A9LEJvO8-AsWFP+O6lbFZ3<-c^o-IFn5(}cio#t
zMYhzIlHNmZ48^=xQ;;JRkP6?+x^smLF`aE40h3TIBuU1st4Mk0qze&nQcRk@13QQP
z3HZNc?J@f^6f;IIX-rWq#v$IQj>d2b4se{&H)T+o=3hEomP*`qu<*~DoEo`W@8j&E
z+ceivm26$ETY`A?BFeBvWp6|$S{tc|s+3GuFdes%<1QZ7QyX8iuY>I~z;|L9N-MDU
ztj%7*b{n0min`AgtZhVYeF8q-y+zMIi_!RS<Vc*mD5O7i`AlWqW&&Kh0f+<4w!T8+
zg@oUw8b(0Zq?IyjO}SW#e1z;4CDijLJ!0w>=ODaE4*_@aL>;6Kmu5|?&s}zqNCUow
zP1hhsHN8iY?lMZq#6HgWgfBUx|F#A}+p(L>$i!nOJoo_dlN@pm_FRBN(E4U*r*L#X
z^jbg6ytDw~`(!^`M;!_t=;4vqi=9X6sGBMn%I!RqdZ~pHS(TcF?AniFDZpIw%Of{Z
z`B(<NsIf-f=Nf)X^0$or(=$s)NEZ|SO9Zrf3Arb{vfM;}NDT3n2ac(ocIlY^oyX9)
z$rxb=fWf<$RP`9H7C-J&G_NZWl5n<vvDXdB@dm;13q0}`q7Y9}4wp34F15Al?6k5g
znO4BGty9((t86k_(MkeUbOpNC)1HYB0}fgSWcfBSm$|TR`%T?f9@EjsL9o^a>#p78
zLIO^<wrBYmyCQuvC8)sB0EBe^u8F*ZoWE9EjR6C}(PG&h>Ui6D!GX@#W{<9H`DiCE
zo*5*4JR9z%bvw5hWhc?Fx+=MRN0}$(Y;41wR>$uqBuVBfx@fCT$B1IJ)=WZdC{Cv^
z7Bv^(AGW6sAzfVD^o5{VAUXFU5N%#hiUvCvwm!C*qT|ny2x^0MSp1B_jObUfF9ksQ
z$Xa2vSnl;9HC=L{0+zhpW-}<|D&Vn%hh$8<Ijt#^IL*+>Is>rS%D9lnk5&b&W&h;!
z-=h5$`9tpF8?f_h?Ey6H3jHdNTiKh@yH2oYi3c7V5p8=p%#n!Zq$s!6vYTDBHXy*l
zsCqpDw;PkJxY)FvZ{r#o;g;}!7W+V)fZ`By!9r*RvdvuiX^aFh5}HD~@d8o{gqN3c
zro$vO5~=C3&O$WQ-#lDNH>Bx`t8V_tS^kN41m-8Zz>;r19=2NnSm?rTTiePri5}O}
zW4rQKtMp4ZkTpv0g92oe6m^<<la9l>kHSP*nlfriQl%xKd%Zg9+cuU(%hMF>cX-ia
z>uivSoGeOhH6{NID9?~N<kkxT7LTlE__%3z+MGGB;Di%+?Nw_!-Ks7m$LQOu5-zkp
z8+zPLyGSNhrR5y<(&}?a8VxuJ>|SK;`d-66L}0_Rjiw}Dk@;oPt%ukDiTkv~XU-D`
z_ie8&d@5dc1i$B=?vflqjfPwJKq~u&Q4OEBWMNSkKExWHujL&oZ9(Si5O8`TZcNtB
z96gxc?H}}rnc5Pz9u=Y6qE`$=4d_|irb?D!ITyZ5?};enCf(PDV8BK8p?iCfG-|Nn
zI1QsJ5G18hfRXT-<@wKBH@uydMY-4nqg2!GylogBu)kKC&A+XiZ!Y=PRvos1?CVkT
z|4xRg($!+E?d&H6Z9HjFAhlw5Ptmlm<8OuKkU|~Rq+cFg;IkSIG38ylFZ}Qy;D|@n
z_%?Ii8l#6?(-`LlN?I94$t#$`9L(QLzAR<VQa=&x=WJUZ`@nF6yX9qRTuRg>8NVy5
z3<Q!JlJC$~M~W=<ikSQg-iNyB&NnB6XNnr%iCc#C1$CsC_C;uxVu0wnbqDhW$q*l}
zBY0&;KqPgV(aZ=ILdF&o^{60r_g7PS4LMiGB5wA=Wg#PNPRq74SVl0?c6{#9wqZYg
zp;RP*Em^Q2Bdo(}(V*DB9=ckXs-5z?TF~TtY>YN!i10^SBE4-f%i4|gMjksnvc3Pl
zkDE%Ibj{M)(#72N+WSQ+-zXhN_DwFN8V=oJxfYgVFKRxe(8y_eRORA8!;+k_t+6IH
zqA=e?<E5A^xZ*WHbk0AHZwJo1pA5ms7Ku4U#NXXfpfgV&)g&)qk<OyjdBVO7`((_$
znaleC<uiCYa?5Jq#bO3IA~y*n3+TwfL>I=&eFH{&Rl+QLqcEk(?8+F8EUNm90Ev%$
z5p|oFZjoh65*PQ2fB{pq+-LxJc0aBR4H-MKu*j%mj+$Z3qi1YWoTdbuaT!4h$YJga
zPC@lzQ?QPAYzEmf*Be#~Q}h!4)^Om2BH#Tb?b#$CS=hM0ub28@Kq&S4#{Gq8(<9DG
z<*QZtS5}`YqDT_a7NfRb_9TJ5u)41Mx!$fOMTp0<p0%Ta>0feQSsy&Kkb*O&QxsB(
z-ej~YcH5Npselb%Y>FJ80W3`Tdnv+qY_g7e$~U=P6b&6NL+Y}g4ao9Ly@tRrI_Y7R
zdT;c##od5#;loCp&W^lZ=8Zuh5@><sY*gw(;^JeP)zUJy7v~99-=iJq#U^XFp^*_+
z77j}y;p$QH9PIfEV>(9EV8DTTBt43KAj$bQY3X7r-4;ly`7%B0Ul9DadO@QSuqJ<b
zP9%Oy(tYY>7nGb$z>J2%+g^Emch-e34oEc%QeI+x!Um&KqG}p$=K3GLTwC76HYX`5
zA5_vLr;!e+`h5rzjINXeeAa`>bYBr6Yu#h2?;KO@3fbsU#J-&MV-NY&`G&ht_j0BV
zQ>3d(5m__R9Y`4E1wDC&K#(cM)nPNPW>JkpWt&Y*%7ad4)9~}zQnh53%DWx!jaGfs
zoRmL+_2lN4&&&zT9!feCko)t`>e6m~3+i*!R33+5N?;-_pH#VEm`|+)f)D!H#DaYP
zfd%}J+YdN{cS$47_i{fGKABYQIJA-(gAn3_JgK#&jEK4Xn#-u`T+(CcqhW8Kp1qt{
z`X}tfW^awUtw&ymWK&X<bT(79p<|5sHRKla_87?ohVF?^0q}o>$c>3n^<y$rZ2-=u
zunWW9jWv5qh`li&!lLLaPC;i=Egr%wu_N17lV2nw-yp{s9@Ee^fEaY{j3BEz)adyn
z4o$IeC7ev56##A29<Ro}Lqt{Y@K^zQmV+4z5aqYm2*Wdu6+mHN<(6%XuUnk$qHRQ$
z%9OC_aTVzw13~kwJhz~>i1>UXG}^gknPs`$X?8x7)^X7uNTaUSUTba8o_g0_Mdw&g
z_RU~NL<C}UL)Y*2&~CqPm15_)SqyZ(xxZdagw2pBdl9Q*nyO2r*a!Pv55Ugr)7iXn
zh*_U}YrSwb^k}ztk37(Gbm$E+Gs<yJf^l1zde-LK>)pu)@~}0jM^v8`{1n=DFg`J^
zt48MYi=Af(sy1LD0vx-aJl>!aqtWscQW6xM>p2?>YaN<mW^3c-By?Lo@u_zAlK1?y
z9`NUzxIOve?wd4JlJbG1Qm_9Z@>|M-@A1t)Wl2WTZ)alzZez*ap#nUY`&WpBn)Pf2
zA}D|WavW<!3$8kvrup2Ek!H)6h6;Z_HSTn4Q`>A!6Nv4t%qI@~ilo$u9!oAh-XB=E
zH0>_A`|fJ1-^Eq1X?NTquY+rmeK%vmQxxDQt~mM&Hha6<E?_F#(Abc@E=6&;B->ET
z=S^qpT+;a=;RUI0VaYeu#JPD3I9sQwSJPXQxQs#rT+Kh(H^brPC~H0zm#pU4>jTQl
ziIiskw5y!A+lJcEyOwr!9Z4RuoP2C5R6?!DD#f}AZ^wy;FuR(z`FW$co`y1Mz;u)i
zgj<yZM)U=%K$mhjvP5NIES0Ys`T`PdmP!uRqb(E|+P)nnIghqIE}s2k^LpXhD=jkB
zb3}EV>r;&2Lj@Rvqg1^(Izq=jRG|KHgV0d9nXh5d^8FQ?!@C!boCQY$QF{$jOl@F4
zj-i;FY8=nsZw=~{hVFGjB&oo9yyo|dO2hZw=;)gkYejq(d37m3ZB{|l=+Ie$Z{44L
z$Yt8@g{xPQ^{qlv5Pjixm~?&mu^Ccn?rbj+chU*VUYxDb*$5SoV9ISx!mJI|W<8GX
zG*u#eG(R6d*8^)tqILYQfW0sSh|jBL-TH*HAvCa(%TZB0l2=xPO}V()b=R{YIn80-
zrb@o*Z$R>To-#%A{#0{z{)ak8_DE$qSX9YeH{=#a^1a)KZ94wfhjuMJw2RpLzE0?H
zhYINTyCdu^pE*K-B1{iEptFmB&bEhT$RPQKUTh%wD`lM)5N~K|ccklt{BqE{sYV>v
zm(^ERzvq<a6$Jy}161_U7zsDIns+@Pf~V=ESR0|B0T;@DVBG>(sUC7a;XH%iPdvGd
zzZlt;q!cB5tyPVoz<`1->v2ek>WXhYd*Wg-B!@bqY`&l7q3gRcF2;#VV6P7i8l;hQ
zxouwh><(Xd_74&b9k3cj41mSHW|u`hrmne-zk;UEDi@r^WM$=6LgJ*jmnZUzL{$ge
zfYS2nKqtJgjNrkC^j37XjuiE3MVF4k9A=gDOIPMEwbo(r&B&$SmiWZ9n<JymltS3%
zvnAFao6Wa%RzokjOByoL<ic&Y>ewCGSP<EPsoPF^6LP@H1Q>mkioBh!01lu>1pe+W
zpzA>biRAot-`vsNj((GnoJH}{tU-0<u)UzNX8;$6CRx85_yi5S@d%GC0t}boBmbuz
zMKUl&DFT0IHBjcuYySY(G}HmW$NIoK>AR(~myH;}@?kW}%P+&RJVWu#NrqKvH?pUO
zQR(Efk)}$LP3+U*;R6cG_ORvMoD>}*U%{dHHoZcB-YT?o(QupSpaKBTZ~7ak0See_
z%#lQtvX+_WjVf>5BJdL%sR@haLnF#nhXoB$Sv}DFtd9EA`-XBbd->l!h;Dqn2>F6Y
zH${KYiPq<fc^s?tz|$!qTCUJN3zfERUI~=SYsWr-!-sct^Ib{bS`X)KAam!Kr|H4d
zSEH2(Ip)ooeLGO&|66QuSNf^EYsT&y_#;3CLR|-w_n73MAFv%dD&c$2fQ+->Mm#0?
zAj;1hW6Z*>Ymp~8KR3U}EoBH>Ra*4)FS2Y#rdIGhl;bAHsuXdD1GPW0wxlT8vcg}1
z6QKq>1hzgxAco}NE2VlLI2j*uWBqRoW%?9^^Nx9NElyIalz9iTbx2;9NiCg%-Kh!l
znocK}6HyM`BQu-446cmAO!$cp4Vcyg8yuP!1`UFo(2wZZC8D9UtG)tZNon0e&BzoW
z<!UizL63Tv7ZsW+`D>}owi~lZC>VwB9YS&_753Z46z5Tnw}Et2x=7^$FJy1)VBA!K
zZODZz__zqC;7qf;qXDzb=CAhh6I`j4ESBE|ilA8WSgB4<r&~_UzVb~M_AKkZ-0~&O
zwe^%5{~w%g03nxZLT|bK+BcPuT%bbN0_ll#1xi!*12qO-_IaD7DrTn*d+P*>vJSm9
z1Cg&r=xm6qGQCVU)3s>*)bFdnUB^TV<bKQ92z|49E-iD1053^`<*vO3sp2hqcJk{i
zMf8`zV=4%L+oIvtq$vN^xAcrENeh+CL(d^OSyBAC!K7c>$eL%{-YMKx8uoT7;xq;7
z<-SJQk0W*$SCKm9ghsqyDp(|*SvKn-*C#JlZ-}Teg{QaMWo1vn5DynMr%Kac@`s))
zkVyb)Z)d-$<LP*{ikT_F?vF99jnc@a3uk>V=$~Z`{&*O_@&2Y^J2k06H#HRdNw+ke
zUVDM#dn9h~wQg{{^u}c(tWZm6%Ph)HM*`I}-v;;`9ufSbA|c=-x`n?2g=oI>2Vyz2
z>rX{ag*8b=8O4XWBm8`I*L#=;^VL>7>z7#$HA-#&wb0`cv3u9Apkot%<&ZvW^a~2J
z+v?0ehgRY4&DD#HQnVLUUFG$A+g7y5rvrnub>yePI&``rW5Ubcl7n9U!||7D3%aj!
zOGZh0&b>IUhyP<QNPp4l+MR#?_>V*x^8>-?gSWZBQDOfBp8j}P`c=m4wf|+Me@;yk
zD?Og)wW|30viS=?!dgQFekHFIN?TmfK_yW(*8S4}>iY;Q+*j{pV_JKD=US`G*PX4b
zfh#Vv=`R=M7S;c!p>pzofQ;R14kY-UOd6Sh2wwV5)xTqpbSi!t0>i-l#G1=c4P_nL
z7NjcNTjw$6Sy+wrEcSi*e0GdZN_eqDZybEO^2Fl^ltmOLye${L4{yv9Fu7&?R!f!E
zhB4xAA)v(CJ6wyJimO`2GN>y**u*N_D7tyM(SFcNuI1vC9#|iKnqJl_fT$2(Y**TM
zRB2EMvoIzY!oSkA359KW1YQ3-w5{B57zmEHc~O{gWP9WC6J>tdQJ2{j_7;7N`D5Qb
zKv!@;nqURsiFJ&D&*(dzRFgl}j3rs$$9?P%%qZRuNJD#LvWlINrQtS|u~%9`XCfO{
zh`pZJ6x05Jvj0r+@hr7f;@2IapQl^K33%4Pk#09JaGqL{x1Nr0L=#s!?E?a((>tK7
z?*Yyb`khfq|Ia*s^H12ltHp2BE||a`B1uQvR@h}OS)Zog=9vC*23Lu{!^<CoXPgT_
z7QZpwi@9<ja4IxV6#a#eBYHiq=IYPXQZ-o`r1c2mC2xD6cM37Ai2!YKZZaU!Q@s3R
z8UL0{o~H4d_VF;q`(vkvNd|^#casH?{cISTppOY9$3&TY2Fv3LJ;C6ZH^XQbfuHfl
zSO?FnIy7`pJqW*)7k5tMC_-Rc^|8>G`9H16A9y->C18dqtet~HONI7U?LO|D0dg!?
z@0hDiDpUWb5#p3I|2=FFHxE)%qbTjRkE_Hf)rwY-e%kS^CcPvGhoz_16!y?qqcBfL
z!u;NX=xH8DAM@Bn2t<raFD@HtGCzJk7FEajcGpvStH^N-oE5hknx6rO$0>mPJN{lj
zTb09|BjMh$sQS8H*m2p0yE?#uG#TA*Sz&*>H58A_0iMLOfO*TQUDi9rHk@%k-oOv4
zJeK%_{9>_FbNsoHeu}9$5`RSerQ!Q9;kN(6<-kYgORq9+huGyPoYJ?Gqx^zrc1^XD
zgQMkQ$@xO=CEAxew9jx!lQy0u24HxA=%vK{F%D_}3gD%r@uG|c{6J{6kC&p#6CT`^
z0Uh6502uF1OGdN(9SHy6;2t@R4JBn^?w7WSyi+rU+ysXCby~vtr^C2j%KOf!m3wwN
zxXbw%NM)7WTQ1~@d*dV%6z>Lb+Sj^h))X~dL`n@*-8B|JTLt&9->7at&8(XmfxVZH
zI3tm*sqv{t8~zt>eW<v+EoW?ScGlj!emynU$aV64%7h$_MGhbW08GjJ-)Fu^BNY%i
z?Y6vPdd-KYJA=n>z`+6!jpuAxO3WE~l<t6i#=S9}&g*L?yq5*5FHk*SdiS1j9eybA
zw4bFRrevU0Ph!g1(GO>qtAPid%s*=eusRnIlXA)EXhTKWI_NOrOaQaP5w8((F&7HO
zt1P@PC8MBVFfMzoml<zaY^NjLz{@sTv5o%^>HRLTk1<pL60_~0FaN8YG640-<sX>j
ziI0-j9^QA@eLYQRy%$3n3^F1@G57N0IkLoRYEQW>7vv<q3ONbG0CtXE!TY7-=KsB(
zl97M5zjJVl1q>YJ4eTFjEOdjwdoek6VyuJQn_VmF!+p3K*Q)Z7h-1!jqYD>&r?mF&
zd0@Z@9n3RAUhQb+Q@EyK0wg^*w_fzKfpCq}@rnZEt^-K#pOB~iJ?=lr-;+2!$(ZlU
z{Qtq+G*!|jBN8sS?Eg-vv>9Mm`CCVI|3|WaA1qo6@K{;(mAC%MZvQFf_tF4j3wLy*
z!cm@651tY@F#E?WN&xuQ_Qk}fJl9`H=@jk!4}c<pSxV~jp9=WYPl4_#>*H8^On<fU
z|HQR$@>d!CG?tb>^&Ni&XGOhZx4b`8bK3Ods?A^U@&7Y%Te$^9#4TqQx@wiH3>=+e
zlMK;ARobYBXMQgEk0Zn>JR6Oh3lx&%5y^-~2-Cz1T)qUHBd!oQ@KHXU6`ev(LO&jH
ztyZ9B=QJZ}qT^AFo5L2)Xdk~F`}Pm(xrINMPm<2pP2Jq*=@Q#ycNH=#LoBVI;EeXK
zfEnV73(%L0xYz$mSl7uDhD2qZIv#%A#hp9J5Wwre^sf}Bt>0e&6qjuNY9J(BXppk_
z2OK&*oRt><IVJy>im(1J%F;Ul1b|5>OajASk^BQ&!ZM}36^36t;0%oG)mx%@G(9Ft
zc~@V-(pE`!lmpcHkl}&nN{ze9@SvJiA<A<FEB*z2DdiN#c^2^Yutw&sJ{{+N$|@L9
z3J<o$!~%8nEWo?UfWpm9Y7h#wX(gP=!sjMY$Yr7%zZoWXl_4|N-XFWA<jqLrZiji`
zVLA*VK*b5F;WfT4hmo?~dVk~QnjX1D1?*)ASutcf?w#w#oJr?YTcd4q!xc9ddk$CH
zp~e2?w&zV2sh1Yq8L1XrxyW`z$>8bw_D1d_1La&cV<-oY$Ef0&PqtDQYQ3vS!D}{i
z&j%td$in>WgnjmxP25nkJjJ$dH!kblwTI+l4U!S!^o&!Oxy_GNOuHMFD4{t&?bRZO
z<~6>?ZCbWq7QP6Qf$@dFo}olE;zoCM=?I-$i+80T0*!WsS0y)9p;aOd)E3-s5wrC`
z^ROaGlMrD^vK<sGc73qQceQE2ajngLf_8n<3v?r>A#}&3LXj-@rjI8S)mU<!GlEHS
zb+@-q6m9II;M2+&hLFL9>v=_H05cTJc8Q!nk@|&;58Lsm8TRSUm_}~Yak9GF5K}X4
ziOnpDERqVxxv52UO!UYgS35o|wtNfIZ}gYi?n`)IRtkmJ^~}#ua!xTUxHD+=39&S0
zB}FMHhlTJ;O>B-jpmIv*Sy?n(A_8KVoiSzAfw9gom>9jD{#a92#B=oC=TXbO>U!Uu
z!I?I?hRgd1cn@5iD<O)bN9<eoOr8j=+qVCT_LaoZh74#)g0F=QD$Zdwr?8QW4tgnC
zdw+4CcEi}9jWwlaDRw4>#h&I>@9R`;dDf&sNFFrD+sCi23YCuWJD8%t((LcAe-<pQ
zfm)3{+G0FX(~;cyHX%4*YN}YEU`bCMkzUP_^5H;WCg(u0$P*d#3tS~ML>VUbP3es3
zyjZ)`pql5`Wu}L!>?Q}eXl4xqbehn)P%-8wTFHS=M47oC*RxqJ+rUjPxmpow<{1z}
z<FNdwuqmF&!$?tFM1VE7jxCEfhyz{>n!)Gw%&hOFwVOI<$xI3^k2j#|(K{MTxXQOX
zlKn|-oR1nRN-uy)*)#%|=6O=SqU8NBKYQJ;I%oApDkli$9FH-;b}$?1qh@mSl~{^J
zo%>8S!Z>^A^^%;r)2&+)<`(LCZYDO|_Hpd?HjC=mzlH+74NDw{-M|FqT%%*MdbJQY
z5#KG$>w^gKzp<)ym=ecOaXPEKjK>u{%VBW#C+NB;eAS`-5mS+fNOqvx=dih^ow+C1
zxtT1~WLMI)x)b^YOZhd{7xRhzSyO6v!<*!l*_m#zNY?qyr#EXF#l;3YS4XCzo^Ezf
zZZK#$$sbA|+XO@Ru3#Dc%(vp=CFA8|)Xx<nkRM+}sduoYY4wQM_Q7I;ovL9vF4i9H
zGaFaF{|eBY`@*g>oSiaNI62BMc+rMH5FCx6s+IolzEIINK(=a{igCnEW@NIQE9+jc
z*@C3h&2&#E{0`ce+_V>0*C*-LN4;!E>MU&&h6KgBmUXUZQ;YC*SurJqMQJgLumL-Z
z@Hv0J41a6OzKXX^p1g-FrA5=OMjj61WKBVnWX+lqZh`n9*}!bTda!jv^L|atF0|G1
zpKV_pJ~CWd9*-9?NEr6Cdhc*3cI@tE58~=0pNk3kU$>d66<k}Hx)LoBhbX<_WGX(~
zwSRbKEcadl-`o>!A=`ts-wr(BDNNc0%T}p|j>ud@F|oT%L(R0kJ8a0Kj-_i|=kl>(
zq<CaVx!`gue`zNLpE{Vy{%HGZP$w!nghRCTW9uYY5GoC0h+XViSL(AhF-V-hk4A77
z_p2EizFR9s@905M4Vz05<Bjf&<D>ouBOkcKIQLT2ruZkId&td^o^i6)s*P!Y*mJm`
zF@AI0UVp>AL;RF@e(+B~SP4{<yuGS|OQnw5X4*lB3rp^c>eJ`B4fd-XR^F^|8i+US
zlq9G+n15y^QjFxxuA3Q&Y;6w2KhtV(+jssqbicB<f;Mml(==f*7d%@PYYEZ0AY^}J
zdeDc21)DC3d93BvM+TzI@S0W9ky-}b^9T;D3yIUNI>MIibdBLEehbFw+u<)sQQfaD
zS#&5EL4+o1wh9t#wm<Z0uZehAjbrv^X5domDw`gM<Qt^HJ>pwmAr+3TvkFa<`V9<j
z&G*8#ZW7m!M#|lX6(%%F=$8;qM<8zKSGBEr`7EZ+lyG4wdJLU(cfHWRv9kAB&ZjIQ
z*qBI!9gG(no2DK)Li;nv1<{u89VabkCs!m!xFncX-6k1Q8z>BYcbQ0aDp^^IQtG~%
zUt`G=uSE1=l0^6S_cWOEvIMO!oC>sgUgMv`AR|J2SQeB|d2yTohV*<^R#>U{0VecY
zj3p^`6WI_J41AoBa~M3{z}BSAR8#k>2#bJnp^J|#T=uR!4?DNDYiylU>l^Lh52?W#
z^vKrhOvFOP*tOrj#cV%de>A@X_7f`k2+0wjfTn8wYuP6C5xJ4zToz_fyNsaUvwts+
zEjOlmltMdBPK!(&{cPL8;8XT>*cKDlA7=riK-c_9UWB*xJaX<S7Pi%E|CZDy`G}mG
zENfM181t>fyR!*B?(DQKC;HB&H3X8T+K2I0IZ{8s)Tod?RA#rYFhQUlCH+B+E7Ovq
zJ@u8nkI1P{MP7;zj${3;k{Z^Z*apXd+BD?CCrZxk8#So?AOb!@+HALJBgAq+JAHn4
zjs#gyF)Hnbzy@~BDwV26&ywUtcIrHzO34d}s<{XtgrMV`9hFWplMnqI=Tb7a2zo^l
ztWK>}a=u)WRe7Rpi<8o}JfI@9b8i*HzPk%&a#%|E03#!=iJ-X%-t7t1wbV-4j_5Wj
zO}ZInTeq-GSC%<(HvP-e=g~!t3Z=rVi4TWRV1kcm6(c6W4Nnhg;mN}5mgy2#ErM}f
z7mx07W;G<XT5~F8IH8F=&+*qMk(1X-C0Q6BD}+i^pmS{An_`!JS2;}i#;Gs7sH7Te
zidyC*Xzo|m>&_Y~vm2H$f$uYE^Oq|6um=Kxm|EXDfeHpZsHs;L?(QZ<c^v8p(o-qx
zKOzS|{)%+P(l=0OvmI^)aR<lPY&ZARHYQ!U<RkAq(cmv*z4O&lArt=9?XdNcGY5r+
z-PGpuq-Yah2wUum=(32HOPlS?zLHTS=axnq^eE*otp5G0SbxXFo7#(_|D8~2;|{_s
zO=&^6pTp<XsnPQ6H{C#d#%byqu-+iyIr9sIWgzm{nw&9Rb(>-kTr=dmD)^<xmJWJF
zdqx;6U2e{&eB~W9WA$CA<{e~cy$QMmb*_T$i6y$kyQ!VnP&8w7BEtO4K3#peAHw(A
z;Om%@p-~gRiuSm;yI{e~78TvZ&SGS=7dPOmqzIc2ZqhdIQqygadhdbI=~>A~J{|wI
zQ|qVD1&RXq8oYn1)|7bT`=zDUEicm=d?|gwAo#B=pDt*|bqhn@v=61kYI9sM$J0kh
zybcNM9Tem{4na{UHw2S@Z{?deO7e}u?9n!AeK#l)hAYJ>2r1?0C`pp8s-EXE(Q13c
zQ@YbDJZ+9n5AG7ac5_Ko{4&w=Bn#ns;Ixj~iULJmNu|oTFI754^}fY(PfjI83FnV%
z){UUsvj1K0F6QYP`)}~c(FwTU0Kb3htv+hF*KMk(PgBqp*-k!MW`=FGvHFajl+c%v
ze_;(?rs=H${OJf3n+R4S<n)|>9^AmiEncJ}e_&Ojc0L#;%5^i<&qhZW{+S2r?3pkL
zd2d>oG}GhhHJ%cBORPpnyZM9?{B9(-0gd@q`{L3m>v5tC^6@MsjnY$jBZzxfNlkrr
zxoHvn`a@N@XIXwbe1*v_RXJHPB<RJZC#i#2-AHT~Ts(`hiPXDm=&r1=2?l>=_kQV$
z5sUfAnG1c_gzlL8+Y&0sT!8bOa-{Odj`ULX=>7k`9OISqyO$9{EiLQIqmOtat5ULJ
zLm!9)G?=Q~uh5B8<m-3jJl9R=QlShB5fuC~C@N-9wSE1%ykfh)FRT5&XOM(1YC9?P
z)XvBGZ@~2JcgRm~CgL#XEu|IMx|HTkB~KYv2;H@hPiY2U9(CiXDn#W!8_y1}Sm~%q
zb&q$GJsPD(KA3R7DVo##TAN{yK!7E84L(X+wdQ@(W(=*Wa~yIsm((^*M94IPBMy6C
zsgEX_0Wh0-h33bZ6A+!p1LMcoPHKstpbCgHR0Hbk#W*8iD>??`bR4}?{ggFh7MlFy
z1E@73E|m@Mve^T5Z$`ptueCYKtq>}Yc9RVvg`Co&1UGvF&4rLu_vdYH6O4jzPhr3j
zzkXM13BF&;G7ypf;<;+xi+S1wmUEgemrpNni5l33zjOLJ&K1{-BLnC)BDU-qOZS@`
zwnImW@n6bH-L;oq5?;wJbDgf~U-$&OKM*PUUgD9Eh5JoHS(r$*DYCYvI<T}9J@?K8
zmK16>Dy==<Wpg7ZSA#XrW*U0!@=M=LRV$GZKP^<_v++us%XbxYDv#h>P_Apm#^ttH
z<g4{#Pd3yKqQm7Mu*(3HY%=ZB+fS6JKWb15yYCHAa%I9d^%#(Mg(174_aQV?68?J;
z)I(!JyZi7DTQbgeB_Ha8PAZmhDDgKIm7B?qrdFITfoZdtBv-92(YJ5l5$I%sX1C_Y
zFQyvSd&m0yc+#}63FNCK>$#+@L1PI6jt_<XFa-%m{dD~aFlJpFnjspQR5Rf=vUJZU
ziZrwdicwJy8{RL}o5&(^l7jTa;Y*=zqrB?RrcB*OXp1dkS;&>Z$rKiq+y#dQ*4gpV
z)O9W&4+Yo#czpc2H%MQMp3bxXIz&jgjki_pbjyxCa3kW|)45uP!r4^@0R&eSl_VAH
z`W<uT5#&kp5OdkXY1o&N(@D`jolaTzpaaNvOTRqs2awmJbF>m#1+FGLV)NqNZ5b35
z3_JvN5>)t3^&6d^_FLm70A3;VT<;eO1X4RI>1ZYIp&-~KcKnQK_!zqi6?>|4(>lQu
zWg0ooq@o00t^CzbumfT#QoDG2@%oK+Ai`mPJ>SJrN-`{gsh;mHumg$<K#EGX@jm-k
zEn1p{Ky^=pK-@T?_XscBC~547Pe29-3>kO75)>{T<>}ls1UVXqopG#;o!r7oincag
zi8wim;*(<^Is-{{*D#n%d}%}>s+XhPyn~m^<)sNEP_?tYXH=$D$@Oc|vXl^t1*41o
z1zeReo~fNpKrup_zuvM_fD-ZmpXm8es5%@z`G~xbN*LU}dX70IW~{R68(ZEtmgp4W
zjYgigwto5GK^RbrnaS!}Vb9-^8MnLA%Vxiq0%YaC*sh(B#PJ$g-vFt7h{teO;>i`q
z5bi#ooj0#_9lPl_+d8NqG-U12bkfoW$Z<N1=agz0=mXld8A`bk62j)p5fMx9z0yD#
zty^ZkI>pwwP)}yrG_ycrbV01PTBDg;bS>57&M-OU*0!|#M~}fr8)RIGfv?c@-i<)w
zpgskh9<2iR%$oH9+^J-jm60kr?3`AQ1b0gLx4lsZtY2l|D=|SMhJFr?`RvMUiMnr_
z?$1kxbmwP6;l%OK>s>ZX37^Y+v90f{VKQgN_IKfQ@6z4A$66;8_B*wT{Ku6ectj#j
zBhlycU%ZcZD<6M^<B)7}Uc6?GZ3eJ<+PlIwL+mq)k?5UwJ_`WdmDMiKW7b|p{W^A=
z6AW0GnWQ#s&KngX7X+L{s0gLe0uf17Xrj~d<4=Js^{bwbEhVIvSe)2QGQ->el2U^{
zq2jbCt*5l5Wsb0Cl)39RspN)^&9zNM`}Zl^)UE>gv*@mNQ;Y18-Tg+Td>K7BbLlRn
zcOzohw`^9m#z@M2E&X}+KxCn?VD8+j<4VN&wk}`876=eS!9sQ1m7mJ%eONd$hhEm|
zk}QPwY>3vqIo&*xF^a!ETx7eio6LmcaCxMGK9c?`M5Qzr>B|`kbHiw}15U(a#_~iy
zJFlgPrTiz-wTlvNUoWZJT6v4*0h}1Q;JWA4wUBh;IeE67nmBExqW7M2@r^Ygsp1t6
z?7(m^V0>=Ui|%CoV%yJgb&<<kLD+H`_Pm0lJ<c~WI3nSEwAOmU_y-&5@&2CU+?=Th
zJKVqi-bb?D9yCI~SnDwMk<vJ>M_BM^cFUh($fow1%_Pd>A!j{cVW_E8#5m{$SHBea
zWxq@qfK*87=F%YLGoPh{tEcH1Cmp?o#Us=WEnIIB#TOsVz2huVi?-R{TVJWx=ZAlO
z<x@V7DV`>)w#Rr0(ra7kYLhDj3{ck>Ky%>T-i)N__1NnEoKw4b2|ReQ&@1MRij((n
z()@rZ>H6=TFLEPnCpk>>;bTb}*9>&XkeoubYM!)zP>_>VCWY~MC*s{zR?~{hdd}mM
zKz`E{Ipi;i(DB?Hk3^d*j(NNhcX6ndDES-}C8nI>Z(^`Dcc>Yyu7)j@bn(O%+$0c-
zY<eJn_3$_^l@KY6$jW*(w^&ScT=<*0-|5eiQvc0$=EbO`#)0h!T6(0xf9|(Yww($N
z`AVRKll~%vS!IDyIpn@<kBAt(l)om_+|a2*-J6MI+ViN?3AM34swW_q{l&y^G4n&=
zWK0AKw|ID-|1Dr*Eb2HH?w!oNA6$K<^@#RP^|czHyX5@U;FFw^R>GrmD<w>aVi9+d
z^mcNE&$5`qSJMtquc~Kd-Rfe(wps|LD2D86MZEKJ(6a6mR3%<Z&rIw$B9g|2Yx{=Y
zk9E2;qE<1QYC+B=fD3i#Y%LAsme@pHV?jLbbaXa~Az_%<5Hi?F+Z>4yBW-;$`QCmN
z(WpsQ(xX(s9=^3#=}%7)R~7HZDXHGyxfa1f>3C9JK%gSc2W$rIeCrB7X*)0GfdE}`
znp%NtQxiSc{xVhQrBIlwQSErrw<PHsyCVh!l->=wc|L=$(|SZSnA(r<-MiZsv92^j
z44z_$NZdJcI4-HoTDJurY#h|hpv<iCBhw}KiXC7>;DV9{wc?$IDbNg<%wPC8gb)UA
ztu}5avIW=0N_L>6%FtB~13|DB<|I^SlM=-DSk>CxrpS9h$kzAB&X=58a_s4k<&SiB
zc1eKhG?d=WRcg?GsgFU5aPfI{j1ck5TqX%)soLmEE^2RjLPss1m%p6>Qr$Up0mfTe
z=r~r`2=Tw5vfdyhrqO}{l=V5*XGvxTztfMsq|wO$yO*VDIyOrrlzqF_?9oV-ls{#>
z8eRIy00`I?x44vtfw~9`-0a3J-Q>xA1zHkQ2lT8y<ma@?@N-bTWU4jUTbrhPlh|}T
zPH!L{ugO;E!J9d(lKRW(#SP$($*$2l=@QTnbJGDcFLvvtjNi=1cSc7T0?T^8FclMF
zyt6Ae9p-K0OE{<+E1#vZqNO}uJ`-@Txl8C$*|V-fpV%iUr3l1EV?=NhOIX~y>m|RH
z*<yp<dBVncYtv#5UPWmxG;4|gVRCTK5Uq(3tj8m55B9~EZZ?#CP<xlAJS6OsG^n_|
zaiYh3OD`@6u=_HyofX1}RJ5FU^1k?G!B)LUDfVoXK)LT7ELy&uV|>4uOy8`m9Vl`}
zdbA|EaI2N|r={6CPJA%2L@Z#&Cac{qROIwr+(K>DxTm=d;;S!)g4j>u(Ms|c&OH8U
z30|BxTX{?aAbQ8WPS+doMRD9&6298q&u7N%iawyNuH+1g0OoMqD#$9S!P-1}uXba#
zG(1J4eqCD+z5q$B7TQ)zd5MGctE$uiD`42ryvsQjoI=_PS1)yFd?bdxMP=f>*K%uL
zq}f@V0P3VY_}!ZX<X;7PBseC_hflt$sxE)r$9R*flyUjKzJz&}6Y_LUb}G8bDFIAR
zzV`-5<OHu2O#}MAcag~*S-t1~`J$vjXUA4<7u-2%q5c)%NcpfZ@AycY!RL2pD4P;r
z0KI?zJ}%|Yr6lO#@r^AA?LKwiv$0+}OgY`ZVC(|a03Iyn3Y}QwMJqj^5GD_%F?=e2
zUQ5M#jP8l)NzE~)$JZmWohfERtIJqA0QGk2b*~jV5{caC)2s<I$T0r~+QT)@NxJ{$
zYKd!vmP)>+yciBQGRX1D#i#^k`Fl?HYV$k**4=u(pXcPVGHCIE=&Tafxx!wlZ+&V%
z1t-Bf{;le*UabbRWE^Y(otHp6?2eW_v_|zto#{Kya<2|!(Ni79zAb4EO*JaPB~_y&
zO&+F;Ds*nQ*iNVAH1z-!`sl6%oScQfQd;kRw!nq7j7v`k)sv8{N%S~Uzzfh90u7!m
z=XsugL-hqUC?%A7J?SJ8KAxqpN;KXP0O2t2_l90mh=*&X9l%}I>pxbX0~RPYpStZ#
zQt<sMoq@q?K&{xXT{y`}<C{RoOxW``ye9&!-=w37fgYs-mA)1BL0kG$dD@@Q`wy%0
z?gLx;T=HWlZv_zEQnfvfa}_t~z{rzR|D|fO9(R*43pL_2hDq1sZ+~yYt)C26`KuqE
z<Y)pBo~`=Kub}HcUT#%4UX^Vu!0aFx4(MU+!idY%2^4<yh#Y}PDiwpot9ml-2%}1=
z>i0*X@4P~r-XB?9v?3htMxh-*e2OjfCm3X$@zG2rg~D%h&V8RCp1YP^9J>a+GV+lz
zfc4@MXV8Y=Cw@g77${l;nSeTSzt-ai5w38(6R1Bhy1=w1rII;i$tM|*i9a>C9kD4^
zKfEX8quI$e7Ee8NG}T*Qakckf0S6#jC?9t^LXuFOq}Pq;kAVVMDLodKwLyq@sSg!$
zmX>;d$!HotKR25qPT1#<5Al)F>O=%l>JLO;_?>Ol`8pD(-<bH^KB(uNSY6IMHhgO-
z=sJ&lz(Fdlc501V%-f|u0cL!c`0~q{Z9?B)SOJ*?I9_}KrCX|TPxTvIaTKmpKQR%Y
z66Sy^cktN!c-dGHpE);-J@-?q^4D-KCc;viLljT-JI}Rq0+#|Hmu>`U^2(VH>X2B{
z|3T_6coa%k>y5eO%->83?6621i{q;~$s4rjIZb;^on6FvwTj^E*uxWm1p04wp#~s&
zlw<fq0Q!It;S6;TZ&7W%s>&Te)*4V+IQ+HsPdC%MhDV_Fiw=5g{1#|(%~=4Nf9;)Z
zTP9m8@SRN%yae>p|4MXT1HVAYgr;D*^ig=xaQ0#S&TirNu1_48hLr-=VQbIb?N3sU
z#l&+9kKwuRoRcBjpVI<vE?^9wl)2!`)Exyp*Gt0KZGX9Tb~GV}el>$<wPm?-Z_tUS
zk72Ip5O6@u^0O|@|L7~uKi7d;*=;8=iIZ06q#--O$rHDL44*Rcmmihwgtp*t;>0)~
z^NtNrsEXxDw%~aBza5T{BkjbrVe!%?b;@gXf2jdDGy~7j=mSb=wrc$kJoE{Izpv+G
z3<bcndAkq9II#O?lZO=pfPn1rUpYa*3Fe%h^N%aQMhZ}ycB-DhKV5V92Sc<Tw>pk^
zUBOK&PIKrluJFtscdviYUw`_4LW>zE5PjK5gg^Liu=s;c+-DM;04tI+!R7wxpE3C)
zF9IOPZI)GO5B^VGmWm?B7JNOf*7zh=dF1d#7XuqBo;TILf4xgPMO*O?@SsR|JlL#Y
z?L(Zw67h*L=VFYA%KdGXtfZF11q_!SBEJY%Sp1ye0QMWbg7~GQEV!76zL(*;Wo<In
zi4m>f^7rf3=8uvTAS=1{bL1I^0=<oiZ-l0B`lSOxZuPo->ud}g_LsP<rw49`dA*zO
z=|J)i5TUM<VNPUJtAN|$&H7H-1o_5<B{xffK`xz8@$t$(pg0In18bT}iqqlfNy+~u
zrL4Jt2DW<|fhhm-_TL(v-zAn|0zmP~1Ngn4FZ1;90qyL77C235YybETSU?D;sugjj
ztzPOP6INtG@-1f3Dp?Zgd|7YKz%s%wCFe@!W|u&vo8VwktIh4@L)&GV@|g=bMUTH)
zHn0b{Rm_Bw`i7-TcOIF^wB<wT$KoV}D!O#ZiONYyp2rRKR(q>2jU$bV$O6-3O9)XL
zM~dfLrpT2l_IekCt0=iii-}b(6ca0-=OT_EeqL3gx;9E3x+PvuXHPashH+nN*pRyG
zI2<jI`gX3OI=C^cg<yOgicY`lxLDKcw##{UcxRegFFuVH)<=tUv%OHC?0;o{daJz`
zJpgM3FNut>*}rvdU(Q62D9DUIqL!ND^)&%{1!uF}+di~H1gFSrm=5%o8W0L-l&2ZL
zdJ&JU3BRYOm6(qvRzlh0d5Py1qtuR+++~#4UB|YX-tzUF-rDuxOz&?07Vm}}B#TQA
zVXEbkm*T&yNs3>p6k825P7*<s@^@_;*j%KJif0fXHWae2A*IBWJ;B&=T_U&XJ?>2V
z@g{U_u>y>Ufep$oX-54#PPcPzPr@0~Afb;+pI#?`GH#%uWPYIR0N)>D99#?bZ-dHu
z{5B<_*PP;soP+|D;o44{x!U4SH5RE8>&?zJK>b{!DRXfjdNl31?%g!CDs{1D<M6JH
zQ5QU#gvx@6F$Z+e*X!80S{{r<sN>0*>~Hs80;70KFSLxfO>=1n)eHFv2l|dXp|XrS
z-z)|$b7RMUY4c`8T3CTR<r4h1*v>WUo8Em;0GBnm=#`E@nY2lz2!Hjk&Nat4nvCK5
zUy4@0#s2)950&0Hih^0s6i>xl@!*ExX0FMGl4FC^(ZEHo{R>v(m&Lf?nL9<(7eI^t
zS6RclN<_0`^3)QXJ(gYwq&Vm<g(5Og>#*k%)5vbsi5-pm(4K@Ye%nDx$}P34qgi?!
zs13A};;xA4h;L2jB@Mj=dfFW5cDlUGzW=_YpUjQ*amJkbqm8?5TcvakMHK_LZ4Ipw
z-P{zrM8sTD5d<dKVt=}Q1JwFLol&;j=>p+tV-TjAVIGKf!EprFsjQ?9yG<67?emgS
zqJ$h)pPDs=DM(T0)bDoGH6`j?a{=CI<w0q06a%-Hd!hPP=~Oj6zU~QoaZJzSJUMrg
z=wDC*g?a`@lZ5>;XvvFs0%VnxUW2zoEj%Heq<qQ?Joqi<8A+!g5d&Xi1$7ZApZ`pa
zYC^;Nwa*9kRgB0wp4Iq`3j>BnkUP=p+ZIW{$<e~g;ai6XCN~sc63UON<yF80^GBHE
z(zow8A#=0hb?s~GpE${TA}UfZIf}xN;XHF?Sfb2IPVW)P(`E<vigDxd%UYY&z4$+J
z$k>$7MG`S-(hBnvy*&0{Kgw@05y~h!+d=;<wx@iWGN*o%Oh#UqR!**+hi`hj>6Mm1
zex+jV*Xx<DqCcxn`D0VlZ%xCe?WDFAo5F_uSbAK&S|ECOww*nMeEwUgYZc)|{#=VR
z%E^su*BiTpe6CmA%A34pe>8O17U;s1T&d_kFNG+r<C{3FEcI|)5}x-bsZKuiq9QIW
zK+v2+Z3RZsCS5&oOYurT96noM3AOJHgqK%Z9gE8}dgGce)tvnG=cB<bvHX75TvGNs
zdAH2IZfGaC^%(_DvH8<`rN4)m77b-v29*$FvVKj&lTbP0ahXQMjvy9#s~7WoYYs#*
zZAU+3&V8xYWG=5zOlG2x#av%I_#$%6Wut?^Uii~&13VLjDLm(5o#^kAfZ6tcQ3$Re
zeUY0w)r|=Bt@itM*Y3T<Wp@eMD3JVvW#ReaO!J|#U*P(Sr58XjZ~GB%G{L#%HZ#ky
zm)M)`gE3QF;Kl}hOaZom(p@q!?lPe@WmMdAwxAwoKe37)i89j~2Fs(3cL)L|+ON~-
zULhGr*bt@)?wB3BPf1O>RE-7HFRgM~l4f;$-(hLI-f_g2lvH)@0Su`x13P0xR<ANi
zK?K^787D8iJ3d1l7`IH!@!+AH7Rnwq9{T?80<s_CL*rkM!SvZ3Ht}N=z>|5F$OOr0
zdiw!bwxtMxS>kS-0#8%iHA0lPRTYzXGolPf{M*=?x`g~JKN4MMH_6`Ftqz_XJsTUL
zYl37+FBFyq8VgZ&b-%JY6p?0*cyH4CCa<5L#HQDyev8iLpa=#{@(DH0Ut-Qx;Q!50
z>HM$8hD`;W7LMJu>jmjGHHA+j&Rc+n|5dwgMEym;l$5ii@xI2rvl?gby}EkmX;yNR
zA_9??xSW|&lap(??lQfcxOn8p4O*1QEl`#KNeoD*io|17s<wu)v9>#1rL6@_1d)~5
z;SCAmkb@WQts_&nZ%r4dO;@n$L$`}ZH=nRSqbqNiWQ^kSS^}4kO%AtQNZgxq10lZ*
zlyx-Pw$86qX4=dhGSSnn_O?Q*#!|Ig&c4`qOy|EBVv}s?E3i6^>NB&oHtV}Sonm{`
zoonW#l48jjyV8SzQB5t{C-*cgSLa%;HX!;x54Tkgt|QIjK<-+KLkAkd>94pXj}9E+
z)=|BUD0;fM-0*pDH7Lq9^kFumWD6n3GodtUL|1MbQb;hTT*Yi!tF&^@{V`oX3cZS@
zo_t~k5(4eEC*+m;7BxE?PG}=&J9EPuQpviJ`+kaoE@+XtZH}LZIst}MG*~t(??d^p
z)DxJosFXHR%v+jX-OS-F!S>OWkx2Q?9kjE5Fq9EO|EiBbYk#|aJ(-x)&mwM*n>OC)
z=!KA_uNb57t$2>rhQkS)CKY8GArY2ob|np6-!Yl;Psz7F=LS~;-#*wdyC24ov`xX%
zzh@H~d$&xfa4t#EylD=*Ph3v2SZ6zN%lheEY+58uyh1FAlxN#!VDQc3k@6eX<7~RI
z^wG9R^zim_Q9o2ze|v;1ywkPz_EFkp<s>aCT?Di=Yz<iDT+MU!n)Od~N!rc%>%B*Q
z5##Qh+C#pJExaQ0>b#-ethxL_8q*%yRr{POA(RGo;H*|d>LQP|kt&jt@%>*VwjT{H
z^4u}MRrj*wM9oOz>btKQxkBMiOC{j)aB9CYb|XcbhP9WYy~R+5_>!A?aa_Wi)^kp1
z6r)5*8b@-e1n2k>s#I`3`4VsDV|3*7ml#G#SZ15ReCMU)QlwwivXC`PczfrY;QEsM
zfv-x?E<wyJTIn$7o-dCAg~`e@x>f<MI#_x6N)H)L{Hlfdr(gUhU22q|$b-P>$X54~
z_y(2Omhs39!Iw{xOONaX585`W=^w#M3d@zKQ^&sKmf65^QhwUR4xAwykFTh@Jns%0
z`O#4H%HR=`B9n9a?2bm|Y?TdP;LF`Qw9zUZKGq`h<Wh`D6)&sgm-U8)G_!rK9;BQ9
zh{RrB@oWc+5`$AnCS6arpwV8vfyHCzJMGQ!Im)j`4hsFb72>}Y=wch%_~(U+d{zSs
z>n@88Ya6B8A}1F^my7I&2?XbBVs)@{Ld{QvfTQnI5Dhj1^G4n0urA6)yZx=-GWvcY
zZFwA#qXz1YN#Y72S!{S)oOxl&#ZMjD@ibdWgz)F$mF6biMQnc!KdBF1G8tuXM&0QM
zf(SYrUn_M*Q4wE*5>4|te5UY0Ey>sBGIR>H+3oDHnEP{M=Ss2Gf*Q9)j16+{ngiA&
zB33u;_O0=1#Ot{g3e6|(1|p9J-ut*N%9K4ZF@NP{%R^VUMLma9(xR=IS=8On(2iIg
zs&2Y7y5N|&t{G$}&aJ?ks-|1!JXAe^PI6Ceq0k7j!}{)cDBRwwtmd~wQ3)<SKAA^I
z$Dd^sc!&HfujT&$m0QS^VjkPKzDK|y`EKPpd|e;5GK}#tK5QF<m*Q^g1Dlu|L<t%F
zwHv9ge@sj>EYB8<uTSw0jTO+DR=)a_a}*eYR8>?CNyymUKriLQWr~$pbtuK%NOn;L
zmC3|$=`ku~O-RhFTQ@4%4yLo(z=U?;+rRm^deo}HVN_JC$^5Py%*b`u>AS3*1!9a}
zVv;pO<c+jI4fbI>Q{)6~Suv=`Xm5rzO?OHDLX9hAHC+Yd@uWFb>s2@D42;GABPvvU
zD`!>@mE?5kq@l~u$CF7O0`)<ymZ#gGA>4^b5l(^@Xd~2w2vu)N<m-B>8h3HY<QEDJ
zuY+zrmmdML8!BzSd`n5j*8R5ZKm-Ld(o{3bG=I|7mRG>KzF~hML8$zimYJ@2iM?yP
zvTkw9+1N4AaC2aC2wU$rGA-RYvj)+s`rR!ihViuZ2Lf8Ei4xgKdmGrmBKt_$$G)|a
zRvyn|R`xl|I_PyIvfL1^aK5&0EI4Uj7i+-IbonY*<P{xP`6I{+Ay+qpJ+Pk^baS^S
zt*P%vmHgdUS-kv>kfVG)m5204Qo2k6Rs?GWL7@8X!3RlhDOVrqzEe_HfUkp-oub)R
zbjP@h*IskSwm?GVU#iva6i^k<C37fru;qnq2-NFM+vwcM_Lr;L#BjG$NgRcb7SX_V
zZ|JXU8u_Ysx`m%)zi}1u1zSDrr;az-rHNA<9B>dwqEor&Ttr&qr5ZScHusO~`X0yz
zjIyhyrZ(N?IB_RI9gjFp5Txw8Q<F0h$NXd!+h5_G{jE+Zwd0aGr{Nd9(g-ypf7#L3
z^3DT{+RJ^dY@9Ev=+c**ipw4^+KhJkM|1i1=c}isL}`^Q$Ew*oSEd+#V4OR#;}vko
z2J{@{53lCBtalWr^^54so~j0QDFgecT|J1FydQOpC->}=LVK&4w2rD;f%z)tN6Y#X
zb>7pB3kGzF9cA@NUV}rEV=>8gZ>o^3IUumRQ9KD|nN8_1pS5bGIYg($B6-oLv^azv
zaOxJ7<J_G=b`3WGGf-!<iF7V+U?A<N@}1FDuYZijk#K-Dk&Dy!J(?XQ2;GU$8Q^6`
z@Ug9)5rXyp__JP4yh^c>Cs&?SKECu&J(rz%g>Kzu1q5n%!!BfPI#X6u^SCX?ZqK!w
zvx49C3E!jIxsIGPnQFtRV$`~$(xIXJP2ZJeT`VIR_==OSm9t`lZX$`{j{mV|3wh66
zSc-NA%<FnL$|6=?T?*I48e1F}?!Ut~Tg`Kziyq>5xiFWXx$acjy$}r`cV~Iht@uYk
zjmYCw)|T|F?CEt0<VA#M)J#s9joaF4Aj?=(#92BE(7Nyv@UJC3fLSHybTAOE^R|U^
zDFx@>nyzLS(tT3{+L7ENaktd99e7+@HECEoN2cV9a`SdjFNwc22>*ZVy?0boX%{y-
zqK;jRsGu|j=}i>r0UQEImo6<JMWjow0nt$c2uSas^p5ln7K+qBkP=!X)KCQifk2Y)
z;LJO(g74?7d)K;u-G5w><UHp&yFI)7_CA20HoBv$EL~Wz?NqV((!xyImt?M?&#$IC
zD$dXDi@30p_q`R|OAqMt_5|$BQ7M^T31a>}5c`%p@Kk$uBns+1`0@w*Ts`ZJvTJ!o
zHP`aE(H1sSQ5m&nmd_)AlQLvl&lb0|`qifQnbGaSjxSx*=GLU0q8Comz756(`nh|y
zb*mK#n7qNyf@&u352@N&<rWY$H0%`5W8)X36G{}&ABsO}#w^8YnP|lk=4o3TBWyL@
zgxU4o$&;A-3I6nb4*qP1l_zK-lG4UDNf$#yPu!i<5tEMy*G}=dUxyzocZqEnlN;}F
zNOFpZZ<*HmUds1s8<#;qQJaqc?fmC0>K&o(Pls;hWp%zxOFQ`Js$Y^N4M)68g-m`>
z<hg5pZ^VkO2n`FwJ?MQv7UdnxMK+?T;^U8zw}{1~UxH`P6d`X$!-D|}^)QHmFzlVX
zvH8Iql`2P4)JW;lfn{-2$^;O4zCyw={L$hwQzp6QuvO4X=OsoT?~18UZssH6OE?=;
zP<LAec(axVkJU6qS>2WvGVIx$ip)o*@Cbk5==zw)`>4D8lafVq6n3%C<aqp#&nbmC
zVKZJvI$S*8Mu?-S>eqPqNAJEeE5Q4D4&$c^_6_#3%HJi5{F13?b|>-iEp=ZH@pW;I
zETba%SUH2-Ixm4A?E;uf#0*!1gX*AiUdelFUvE{Iv$gw&w$y3m?UE+Ixx1FfJC>7U
zxjt=V+iq)Vvd-Ah?+h=yqJSQk69inr{-os?o+|Wue7vBcqn<_~SAdmKpXL05y?wiK
z<b7OrWpkh?KL72l0lv!i&ijSM{8ev?T@b(&&@8REZi#}BtbIj;&O>`3f()pnO9vl-
z0nxXcJN~`4?FEQ_;}>t-$#6r9F@K-*l~O%nv)j*B@<!}rUUt*evkcW88*N>rILCEg
z2gT&woctP_P;bt>F_@vCbBD~NBQ1jbwaPPBGiLFX?RtKZ1t9YR<<nHB&VqUM4N6iw
zdX{v|q$^3HXcyTEq20yw{0c&5rYR4s-PfW&iLoaV!G=svwoP8I&sUGVppLE*9ob4i
zOx3#Ddd&?e#&`EO4o>KrqoB&$CMNG;DG59hm_JQCtDwNKW+}n4@N6M|nBWRC*5FNv
z?UvZyKakb0n)3$Lw<$)2<Mf3GKrB7UfF)WI!G53Z1r(q2-biGD<btxYobFpptPM($
zubr?do}W`t12}7f2=Fa^_moCktC6oAp#syt7m3`Wts{+Q?tT@4k{0rP(WddkCC@tm
zvqdVhNyth{*1E)fc@SDOSmHw7nRlqZoM!{2Vz9RpH9();@ozl}$Uj6sql#0WMgQ^F
z(h^&vs+$WVJ~Dj`AM>)B6mIILogNeSDY<t=+I)2l2u71ZTxt32^y=|-B~t-eW+UR0
ziu|a|#ySLsUSXX>7hP77V><y$iQw^Blq(?}PPwvLmhV|4-MRsY$>iDB`yh)m+3%u@
z_C{cdcv@F+|Deu*h)9i4Oso2q)z^mnC_ArVe($*&lWZGi<WNtE3_^BU^J}mY^H!pG
z9$;SuakHmN2`|J-=R=NJz-qz5Gmp6G28FZ)4L7H4`idk5B2@#*(cV54i__E={TX7R
z)<t$eL;BJwpTzzFG-!59OWVlTYoM}&SURtQx77<sFP>~kX=J0V0R~{Zv=|8)Z9cd8
z`o5@}fERHbE$+L`Co*#wg^{Cdn&*iDWX%YmxE<{z%y^)3^3Ehdkn`C@1u3yoDA&-p
zU@S~KzS?X^z;}%VW(?!;+PH|C>1Yf;L!4fV)hyvuT#-Q+QnnHBUb}y82Acx*)z~am
z1(qFl#j9X!)xUUYw*}+hc&$Sna=B)s5;d=iF3oLs-fUwW`D^^%KY5qfjy((uNk)<U
zCft0I94JRiZMzXE_{#YJYNOKua7kyjXcyPMH&i!9jtr1j@=E5{eQ2n7&e~kji?6x|
zp370*_BNTTP%L#p`+7*+wySNJX|Y+-Nr|>~T6BwN%}EdGXAyhYPfWRCmr~N+5~R{4
zn7C6|l6kocIO5UGoc!d3jV0%m%t`G_++D~cp%)6;_w>`caW_4-y0k{Cvb&=BNI1Y8
zXg+BzFEi>9^3MdIgXj@*owB9rjX-gBjW@cu0ZW`WTCL(P2+`o)DCd8J4dG%-EQ=uM
zxSbv>JKr^TBnG>vAzmc<qtg<bw?`4?-?fwCwK(k)hi~hF&8?uG`%m2?GnrB@cF^d(
z$(H}kRQRDd@73<a+jLn_b_g1I4Mh4^VUc`h)(u-*+#_`r(+$!>Nht*)OB1Ls%G70x
zIs^C7Ri5st&4gP<R1sHx@2Kf~WY;;_zM`~H1%#{<Z{=Y`qs(%$(z0ePu_RU;7kHn=
zyesmoKI3qt?N;-#$p&HZ5%&oW<sDw}sprbovL;J|$#H<EH=Zq6uT7vf2OJ8<n3SI?
z)LQ^o#G);&D;cF&=Gm@UhC2|2bQ<n)eA#QE{RfU)A4Qr4mK%B7RLtb5B7V2ul>wq`
zy%3kL2<nG<l3tLC<q^J4<Vl*PNWh<XXszN4so11NTu#nbFKSIY&(q#RuVcP|APw5|
z`_e_)Htcrrct#8L4~(mi_sCW>Omy2UT~<YC`;z!kGkJ)Y%@TsjH9GT^1l&iH=}5MR
z8~uZ-!;Sj%f_uu<pL6UXtuN6?w_fn`TA8JZeEh4D`Ok+-&R8_EbXX;rWJEFwF7;LJ
zNGCW3Lzehr9$Gi9O#vQlN=Q;sgU9YIf$fXBNS{7bmP12}HSFYy8*Flnf6&s@RHx`i
zLt?DGhiKy5Aml7jBIh}l^sxtEW3eL?QPnVy!XG*-^yR*E;~&bIZ!9iGTd0y{2RyqW
z9b>cVrsi$kKwd%4yx-i*Jj&L!tpo@)fiTBrjxWXnaK8}`O<Z1ibr)M`>()3$Z}4=_
zTibP70Zx1Wa*Zf$&GcaD>>1!>8EC|(`RkCfxvO=6@D4^Z3Yy%V{CeNo4NAW;@{q`L
zPq#!vdu;VfRmkBIz=!YLGxvbOGYRi4n1DG5I<Lz(oGD;qQ=Vqx3Ud2IaP@kWpY3R^
zJ@#-gzha&+@kvm%F15pXGp;=rmy{{RqF?<{zcekYx^nFJx#NBxk7*|v<Y<mZ-!KWE
z8Sxc-JLdHcK&0T0hPV&Tn@(~z-t^2cB4Wj2z!u9!<y?Dlj-SeLag1%{;xG^j-(F2A
zidJ&n_0;xlMvPaFWw9+}{g~s}lRD$&Q#b@s(fRog{J1d!$Y4-cBiTyM9~JD>Ids$p
zns5e6e0EuEbU^eTjJSmA0lc`@8Q#-O1&~|rOXM>tIb)W50H;sIU@rWymXvYc-iWRc
zn)@zR+BtQ_8&dhn$YHqyvpS4@PH4^!PS@z|KYaeH{eK%0<C}h62?M@YFOdz=xkhCv
ziD&4*L%E@X2QKv6W;TA>zUxu6A1eEgdH(0aBgH)&WX_;!QdMcc25Uv{zVq?;x!4p6
z*6dCMeoX6tsrp`h_qY&EQSc4ouPC}NR0h8D;OLR$D;k#haS=Ze?t3U#L?P4=))j0B
zPB{SJ9Q(&9&sX=D+`0uA%{u>s*c7SrH|{jUzPFeF&1S--Tpm>HmJ2y=1P&0t+(u;v
zsV>cnB<%;H&V}qjA$@z1zS(A|@RrL!eEjO`fq9r$Ur}eza2;$j*0`9>i+0Pco!gH9
z#KHG0VcsBlGI>G|Jk4-Q|3~@QGw&XNC0x65aMAt0^mCwimN3vLsjaM=5)MqrluJ9n
zr>n21<`A|cKZN}N5j+iFS8(Y24m6rYWr(>fDeR>X2fL^D!vm!7d)QTf%6vj-emHp2
zV&jzR3h8)a*a0DYO??E2yq~+Z(R?3y1GB;|91%<=`*Gj~qNHMuaym3@In|#0;RM7*
z1N1O||Dn0D{r3=t#ey)}Z|_(9A-s~R+*>`mY<=&qKwo&*e`W#vXCJTGzZ_ahg#%~n
z`ki-(hV8GswX)yI|IF(>n)5E4a{7ui0Fdze==kpR2~CSR;j_P-A-Yep|61sVDv)%D
z%d)TQIFLK};UWIB*YD2ny#+cW0cZOb3s!jFrvAsze_YbfgA-6`aoB(69REjwJ5~Ks
z29o2zw(Ii~?&vQTvuscEl8F@Mi-{=7yUhEA;oxJf0m?mf+va<%G=6c!)!n+n@x|Hc
zeE{cu5A@$1#%}D{2twtj-@X%yIbA=`&BA2*T0dZQW9t)|B~fB^VA>(QEH~(W#vGu5
zu_v&i5~2(GJ+WdnjkgY2M`*ITAoofVrWE`lEEUSjw1<liVzl?!0#CWujczZj(!XDB
z4(!*f_1o{u;wY=%>ha56T_7W)#uXx_Jw#c@O|b%<p$ir>wS(s5&2#w8e{G7i@vjW2
z+x$EPykzu%WbOM4%_YDd1%JN>Ki=anRWz|5;u4=qS^F`MZwv^Cn7co1*V;H{I5Tdu
zlo7RWiT`l;zf=a&USK~fuQA*YuHmn?^51pt@d818brAQzzv8<GkV*g;Qg$NaKq~Hk
z3dMhK3<Ofufb386mV8hC@4x@QKfCQ-qMkL<?*G}<)4;|fYd;-G!yQzHpZukaqX+#^
z67Wm@{ciVm*<(!s<NqRb@ps~OHd^zOOPc57#Hq{AsbKu`gVc_>z)$An_cHNowznSr
zQWw6Xu%F=3%}}4iAo96l!9U`RD6k$_JT<R}$7eKj?<Qnnc?a&smx(7h$9mk(QjA@U
zjcN4C9lPy%&`0=RLeNtom9dn(*Dv#0*VrL_{IY{(QxNm+1H9C|7e{w}9Dqz<4~L%z
zv9^_OSY=E*MDe@b{jW8@iIM*G$DeC;h|1;a9(QbSh&fEZD?!`#zF$;&^3I#e-OS+Y
zEc4k|d!UnJ&x9D~o#@;_gSGFWtbo_TeQ%xqZ?}`;@>9^xc^K7R9Cb7b%>Y}Q=VOFn
z=B4#E1_4;7U*bC|laZ-NI=J#4$>*xx)se?cpDMJj@U#6}?)q7tXgVELX0)4jJ^_)V
zX`0$DpV#O-RjY$`(`zlI`i=nfR6uUYwCwEPBlCYPKessVwdF=BOji@5+E+|c+o?l<
zN=V9OaKp3jP=q&Z0jbGUw)oZ1jDMbIE?uf#q68RgC?d<6E8aM4RF9jB&><swSD-E{
znG)Yul$^DIjBcT$*uhEe|3f=FW2NG=!?mFIay0pZE^0~mHoePzX0bSQ_B-g`yT~Ji
ztk3%Y9!K57;wP|^UZ4@Ir0Fs7;b_z>Jq#Joce>wyoESkBZG;+V5WRR5$=KinZ3^#w
zQNBvZfaB^W6q%vt9i*D#;T_D~FSnGM2ypFbQ18}fxSY)5C5IxoGN@A4fPVEEeC6f9
zq`BVoo#Me$%t%NEjCm7YbqkZ*TO|x_JQvn2*6*ma_$tJ=5xygP{aG-`a(582djnGo
z5|UHWM#6}Ra9cSs)>?r=&2T42g_S#^tx!+`b7AORJkAot#oXSW#=PwYvV6G%IW1!V
zTHS#=F?GWqX$4N^%FL1(#;(;HwQUwLBV%0ys=$~~kflLNa20!MQh4F&Ht3X7QX}Jt
zZ+Ac2uoB*)hH`y6JbTwsAWSL3wkF#D<;)r%Ux)$;N_70(S1+!HZGnmPRtIfkc5dk}
zCt!rvH{y0jr$R@D#yJF`m&3*va2_Zz6RoB*APbp91_wh(ZAJwCVXbLwzERdnn^~OC
z@ASe~xG?1mVG=8qG6v^X(}2Aq5t%4;&!zQVEJD`PNEBA&yikw6trA%m5?H0$6avDQ
zrbrZF#S{qO6d&>~6Q&h}bz5_8kM{PD#iGG|mm6UR4CNTLFvc)1G&no7MW;rR!s1)F
zW_O$psL)QeAa;g)^o6>ybVrWczz(4|Do9?&N4~T2optZ{*$N~!V<QGXXMvu{HF|Yn
z<<5*IF}&5vWc^w-m&_n**L0>hiGf0yZ1g6Kt0B`rnyo}aeAs8)1if24Ak&D4a0kUI
zc<4E+P+eGSjeoQgKZg&8H^5!npQZJwGO>z6B^s|0U;<3aGmo^+2DR+^RED^!P!46a
z32axCED0pzp4g?Ox3NnGu`eVbGUNqzgj_j2ctY}4zk0-fE*53-%`aKHm8R<X&aGta
z<=nkda(0{R6<Z_Ck`;f)ordD{diO$an2a5I_o2}8$nN$qinl?sbh?3q){9L-XnWm;
zIx18y-M?>r#lk^MS=P%Ef`Uc(3|vrTTj^@4CJ71#PKxLD4q?js@Kel_qe~<@<;g}?
z%nFlUl!Y8n-Z+N^^F%|py`2fAVFm!hE-Z;@siKdLGZyowZpFoAOVGe18N*o1%k&>&
z#F~_d%a(4Pr8PzbWcIHu(%+2m(`CC?WFDnZ+jqPrw4q47D<n^I2kPD_Z!Hnie@s<$
z;m;+qa6{1W5ejn*n;hDdZ7u~F#N4jk=<YPt_b~&@H&Uq)4ausMP4{m~p#vl_Pd&KB
zO)eca3~mja^U`bTV@7`L!(JOKDN(*Id44qz?wstZ;CY<10m2@_;odD2F}EwktzPS0
zB{RA5jl6l~<@^~D?0&zA$c&t4-af&l#={dDYC|}+eoSW65SeCUzy@igQY1uYB%f=O
zlS#*~w3^e)I+@Q}_QDjDZ#gWgper?MUK`AoSSOspE8D?!*(8O6*VB(7$U;ouIZ0jJ
zbMz8a4vlT1Ar&!~E32*LYF!A)-o`H_l#LasD292sQYU@o=q|0|LFST*WZpH^fLN@f
zM%$r|@mdsad#=TTx!9Qi<#@AlcV5@wvnr%)U5sDyi8Eu}4JKuz2ot6S9?-7j<Ii48
zW))pYL{#o%SawTS{bDvDkO84Qij;ocF+1TBV|X*@=0y?U6vH(D@Fo4NTF(qJUM{6<
z=9b*)(Q@AvBlW=|V}$Tz0;|Zv<=%#hp4lWdvJ|A2N^Y$dEyOu1vlyuwZR5-9Xde{Y
zErR@5w8ACwka?qR$X%<+Ik|L{YjejEQcKyqLI*<x#I`@H!VXzdbX779$0eR0d!Z<*
z6Yq>vk@r19Yzhy-@_k0Dg{$V+GB;3%v3|sv>{%ua-qO{EZ8HM{fV5bP4~V@h!`(=;
z-WjbGVldUri?PV-l{rc(ZB&`~os_iz-4gJK1Y;7(=K7`znrqC{y00m`2%<FYX5wJO
z#k3l2QgUx)$_@S@XGX_G)F^Q#=2hN-wBmk&cG9>?G+*Ijg5D6~vsch*Hc>DT1w4t@
zUKAViWvmPWXWpu}5tyMhuN%pMvDs@_W;ATjM>*-b-iZt3n_Gsx9=3M=eNH2m>#yMg
z4(ay?<S_IgEMI~>b+LU@F~|tc9*r}euMKK5AjTCNZh{}WO!ZZV2HvG|a0|E#;|l`5
z{yN_m)Rx`Ij$vWh7GsVHfhj>e?@1Iu*Gk|ikPC_g>+P%y5j^u)_u#eKp)G&-N2ZXF
zyORb~YTQL35IyBowZgz)+*Oj1oVX+=4O9ua(6PW2=AEU-8$h2-Qw)P#Ss{VQSHf8H
zrxf+%XC#@C)|DNDm_IF>UtF+6suqj0oe$o`#p3~n=+hyh2Q2zveZdH2Lfm7i6MFII
zqPtUuk%*{S8E>J~K}DhEHry@KP>RnN)l{ie!_5hFeDJ#|cp=#-$y;jlysfR0eF}P|
z1)b3dIUxz!u_~RKRnlg&yaXr=x^AweiCii3msva?#(Hm=-odQN7jjzmLPrO4u{6Jk
zUJ7>Qoc>y^#nWq>cB1tnKV}8?b8XM)-0(eue8$BXdY6*A8-LS+J4C{A!y407H~5uS
zuWcS%uFmLD5Zu4;aohc~*Hk%F*K2dNLqf0)ah)tY-_(!D)8wojY~7>>%b8EbHap2!
zi)$qJcZ)ti<wVeVR&CEktoWFK^A?l(WkdDpVe|!qvJ2QMOaJyckA(z2!iRRhv<?Q@
zD{jMSDm6P{QF^zDE_3-eJ@bc$ie*Dk!md2N%F>l&<LP%kv@P`c>5jO7Jj$Gne%TPT
z^BHDY>2gw7@oeGJX1}%1>ImghwC}sxskzlZH{K<2m5>_h)lD-V@4hJ}%z5$aF}?)m
z57^b&Ouyr`R-lYml&~`1;0?-sR!cc>=_Rdcxu9ofFF&PVcfXK{HocxNOsrLMm#2~^
zkK6FAlZ#Db*rfK3Tu7<qTQx=)Jzbl9o!8jXEOBHU`(QJA`rRP3`cYAmD?KR>y&xE}
zI8ym&>OPEC7P><nT2}~_)H6~qDr&PEe~f*1EkTb#{Px+?nv`|K=h*k1HoYndFM=<j
z%Y3$6Pz-tgMmkW>k+0USpUjzy4j;z4n29ZAD;d4MBv~F9%C16ww7PD(<<}CkVNPtN
zcS6qhSv`BkRzx>6G0Y=8<vp^o;P4)Ms5d1Ty48odt^9&r<;Yg$Cmy<`K6ep5!H}S;
z+%WiHKgc)mF4x<{i>qz$-Sii@C(fQ<ah0I$5<^Yo;r!;cg}dn|j_X{y(r|fWMb?`c
zlk>d7ImCX}@mR|yN42Nv`tFdsU6V^Nq;4FvS_?<W)^RZ@LvtVk?WQJ^&7c(P>%;Ca
zUz~?Egk<rlJv(ca%xPi?(Umv#acZhDn2a&mDTCvDY^i$Lu@=K`K)!as?}<J*c}ijy
zK3}acAXs2rps)zCNYl%{)7+B4G~RdD8M`qrtQx4va3yT~nfLYs&E;-3$C0E4d#9}U
z)vf{<+IiAx{mDJWltB}Dw2%>FvB=;npTcynn+2By&V`W;WXmlS0G&%ac?D~s7Z0h*
zeWjX$7f7BWbN~d1Hca(_G=(y)O)qAT#;JDpY{qGbcMt4(J5QE|3D-CjDTTrG#`;#~
zj$PVRYIH*qUF5I%N`KZ!qz!9GNME_UeS!mvhZ5?O8sw%F*&3v6+E$Wxhh~~w$%$1b
zsehR)IbO#6*vwgha(bF^*T$l;ddNaWx9RX8nFcX7W<N@Mn?M6qx^S7Jm&0g2ieRfo
zzAquv>#8*GZo5dzE@r+U6yJS&SJT$>?hWt_VUHYqeW6~!vBcKj#cDbp!MMS$<6)ax
zsCopv;C-745IY#@WAo~+qWQXo8eR_UTtV>3!X|qvykxTyTHLsEv`PnY=CGp6c9&@v
zA2s%R8p-=?SB%h|ujl0w!BlBxG?JtE@66eLZ2Rzx0DJB^d546Xau{e71{-XxPZ0HX
zO?#gb6gg2(DY+ks4!EbAc-7aigQYg>FZnar>6O{^u9sMCXqXaDUOMG}Ol&}OO{gZV
z#@%0r?_~_%l%9eR9_3P*)tdhd_tvap=}4>9VwLz{Ls<ht5~@c!=b5(ib-|RAygSq|
z_QKb7JOYhR?kaQXe0F7{kpU&Vol@#qzqwf&ie3f>M+ZPdMX`nc+MGm5dnH_;{@Y|i
zi)Q@9pltn6a{oMIu)J}rs`IEU3GP?BX*7g(yE7>qL0`!rN%sPgVxD5;v0H(CfRCPj
z>sRY$Q1^P}!-R?{Rii8`MY*U>8SyG>TYbayd$A$mn`#+rf$dL^@&(BIWNDSWk3fj}
z_%DdMw^u)&!}YAKGYd}h&gd5>F&QP(MP*G<6~mk$gt#zPXC+Mu`ZJavAf#CZLR%z*
zD3}%a34IiJEnrAxCo{!s`^1%PUM#*jh{=b(m?>2)nT8f0xYh9ple8)-j<qdIh9beF
zIe1r&y7~*I7>Q7<oAnj@25)vwbU+nXN}}<OQy6*h#4vB@t_3rxA)S~Nw$mE!tw3bO
zoB`o?=~Pf*QLH3;Gbf}k^c*;uA*`@8e^)r1FPMG&l;u+e&l}#U#8k_HNW0O>q?#er
z<8S)a@*B60zC4;rw&B_Bg-o;sn`}CQ)YjEiD$y70byJyBtnBJ)TsAdZ)>Q2%O%$2W
zWUE%Ea3Xt%q*j<LC~1&wA3Q%m+Ec3DV6P1Qm9bk2D!AcK7um8d)MjY;B3C9<FI~~T
zFrPErwsMXoE5SGCL*sIn<Zn8^4C-gX-Yq*6ux7oio8X@54PH?fyMCi4Rlz?*!v3s&
zQ8O-0fG6vGo^m?Pwn#mDc#uDHM>J$7{E@yW9dq7Q8>+ms84VH9Rz@r{hFX|IpW9V9
zFgExrA^9avEKI|$mw^w~Wh+-^M{R{m2y60`4D<w{mdhnJ&5t!ph$d(vMtCPHTa)wH
zX*q3?8{IC(iL<XKK5-tCBjcRh^I~XPpVCZnEkqsFX$j;cN7L^_VLl@CVJ}O~ek-SG
zbpUO*aZsJHeVsm`6=r=~B%@eSFJsa_xP5M+ca_ld2QL79jmFEEq_%McH@LO7_zH5S
z6=5l1WM$qM*5XSYBTW3gQ<2_ijZ{Z13D8I_r0(@AuDneDIkeuus}2}*<=BuC<wTZ$
zvwjp~q(IAT*-|*e+hm*Yw#qa_(J474%0|iDR|y6Ee&K#yIGDa;AdgB{^iF19O2U>y
z5;*5|)H2bPHQAe+P`<vN^?0iQO3!}zy%IWNwN|ph@FgE5oe@t#M!=tzYa?+W%RdN`
za*;&uK%6#D>BP6FmV$v@Eg1{ID=*N?(`yOO1AejkH6gW)#qbxlijGOUXVn~Qwqr_w
z>X{e9@W9K=4kVv$v>vf4UyM}9NM?AVtQd1z(o8!v>%B&~Ube-CySKPY-b3p?YcZAq
zjV<+hb+N{qHTHBKQ#5I<cAwMG;Mqb|!d2_#&l|T)M}?Y~KkmqVtQyRINmm)h2u8eE
z?8fz=(lgXMeAJ2t{8J(<Ul#Mv+;4s`Wso))X<uY>|E&-5S*UZBKnuCi@>X&rozUjp
zGlP@F%hfMNx93qax0!2I*ANP>hV@XgrD<m{UbyUJE8VKK!AIkQ8qaWMN}<+R`yf+N
zCVxxIDlPtA+PE+pykVt3S=7&!m#0gdzaMAX^w4<}PnYFRpSGc3TXL*RuGI*Aq4f%M
z`?YY3_Tafp+L6_Oi+>^yzh#EWp&PYZm0RiT7!6ODmpFaS)$P0*Cu@n{l74fuUeYwN
z&<lC9j6HAaQf6G4Z<;hsxy!DV3$DR(ama6w<`W?wDpoF7HSr9klX43sy+u!RN0n}^
zhGx3i_N4%<d&OuD-<s{4^~zdHnNm#hP0vJw8oB?GoqU}{Lr*d(mF#=TkxEOF@eFoL
zNA6qRWt~|aWg+l_D~){ra#6GVt?kWS*Z3w!Z^-I{;hiZl<+n+!Zs`k;wCva|l?KDK
z-7U3{KG<7AYfkRzv-NsQudEdXKQ>M}RtE^GzI;(ceI@0}hSRBPl=e~!Y$V4z8HgK&
zB!qk~KB>KQuJQ6l$I0pvD?-EC3TTDXIx(Z#ZY;u=vzinGv6F5}U}7cUpX2D4?tn_`
zZI=FWpKEk#HlU1jY{NpBC%sj-p(BKD24Z0Vd-U_?5Vl6?3o5sL^FER>U|&9T)W67n
zdKwu|pXiW4wdDyf{Atd(GPR13@X;l74A66>O|A^xZrV}}c%L<QS7c|YL*n8wmEN$D
zwWX1yOMiW$??65qZuT#?STwL;Y#gh^7;}iKYbU>53dOt&5_9}2JXhVy*Z8Ffx{>#s
zcJuD3j3z4gN<KQDG3zW+7-5XLC?PuQxq`_F|J(f&gKuo@?JaKmFJ__S3Po+!X<e@j
z3*YQDwNzr4Z?*i~cO+uMGoLve8e;<$+O6?~kqMe9`McsfUyVYBcL_xGMm_W=ek*C_
zRs$OA%G0xb*W)HOz2>8i(&}W5nq@$P;;j4P-`_%orPVX!*=ky46?Wt@ij%jN>?}<~
zw@0UQ1^Ijsb1@90&Y1PJD8UL^Dz!C3%62WJS!hPJ=kTrr5Y@iTY`fB$o}|#8rs!zI
z-d3)V1PaEkS6?0E2=*vhTW=GEc%fkNK=nx*ITeYM+7^s(;kG0j$lIlAdNKx1Pc%1i
z=etcdEPXadyx7U4O-Yc8{w-5We3&OCG-So9a-Dl_umCuGq^j95*PdJZxO_G`7p85b
z;P9}?jA|~mBcDruXT3KVAzQf|WmjK~#aj@%dzbMc0)^(FwwgsZuT_0BKm^i@*{>Nt
z4IDXOa=xp{e4m%qSAFd>k;M?t&_b(|xu!W@MIRL9=Qr$udPm=kd)Ys^#Z~HOZL#P+
zCekyk2NrYbGbTQhWE|opnlGcYp$vzIZ3CYv<b?=~&+?2lE`HXDW>Iw94!9mXL*3jm
z9^5WqwBE{7nEL9h2T>qYe{>0M!Zbe^J*AiNB9*^D;jvU<U%k@^m2JiPSG;jsx4AEW
zL%MziM|fY{Yvl53pNNjk=*41K?#t*Bd|t1hK6D+gW2$ruhR&RH#gvNBE+^P|+qEyJ
z383=F!U>rHdLOA+N5(9<HWvoKalEV2Aakl!q%^rTM7GYXG3b%feG#^b4j+aLpVFNF
zXjC20<m6TBb^5BIec&@@<jMdpetxMtfPF{zrUv?&35~+}NBFMSKLQ=U%QXHIhDM%B
z_fJHARB4C*M(IYem(cIj_AB4;+-!SazCoF^Gj3klHE^e;io}n3uR({_N%b{-+8(`Z
z<T{_%9?nvYd;Lh)LrK^1WcBXhs@llK7}h^KA*j5ms{pZB6*J`Nw@+|dK2_1%U@0)U
zsaEZk`a*s!sUfB)VQRN|1_uQunku=JXsx=BWdkCnJlGq6n6as{uUW0;@XGcE#-JmQ
z6bM`^UKGaG$Ju7+<IQT8`DD{mdW%$|E=~75Qd~uV++@AkFgSQA6Dx3|4ktFN(Wkq3
zQ#7KinEbb|)8&<MX6^D)_>4&U&TJJNSN~`h&cg$oeZ?833Ac6<Uf;AB|Mi>N49~es
zB~BKD@Ih58bJm`}@qu4QFt`(3AcVXUbjaQ6Hih`k)e`u0bqICqCaT&quM79C!|5cw
z&*hDj8AmK1I<s-WAlUsLFM3F6urysQU=S^re#~cWdRtB2x1da|fq`~VmK*Nw36!7C
z1`ORYdC;^0@_ku#9TNf4kx|n2Y&I!b==nDH=N9|DqK8qqQznWE^-v>?932T=+FaP9
z>36QasBqJ+Nm$>XL9XqGmYm~dQ!ZA++8OTE^A9$#BrGVpCU;N8VBC?mi4BSwD{@p(
z{ZzM=!i!Xq<ByV;<*+p^hlaiDGkWa(v$LEk#kxN%8|Jv~ss8iMzf|8V>bG&taO)@q
za!84F-r3UZf)V_bHD<W)B*#j?T{1~nS3YGy!1Ah0JbU;innsPAFQ7>yu<dd-wQq}x
z^QQvhUb--C1m6xBxRs<zMIf;-DSu6DLjwNhEs^eIiRcrYVB+Rw@$U*tKNDJK`K!dJ
zt_WtNS-3!N+I`oTkiZ>?I3E)+V}Me2SHGzS$T&);PLMBNk-W4~da61g7yBTRlKKjo
zdSmxURAZ3ml~%~KecCmdG;a>UH9*SJBlrZa$Qkj44$6RxLfpSR;w6_badI%wuGYYd
zWa?Nj!|(tFzWYi7uHW7|8T*{V(nJBA=gmZ$@4L0=XU>{wgYps>npy5kI>L{dSVuPs
zMml$^R`6KbiA71Onr3Abt8IR9NvmIUsSP(H_zdNqF1H#Dv^8ffcoAU)X4%lE;*W^X
zAm}A|$J+t}Dzik&<lXkNwJ7jSz!fY(7>l+hMVuV#d$0~pphv~Gd6kaf&3GUZpw(?S
zwTw6$<}17eFNmCM??`ne!!2Y+Zxos@048EKt<yeJl6I^C$X^vUfK?ok7gQoy^N8X=
zjVDW{j1$m;-ub%q+yY;yVL{L@bBr3)cd58lUf!2@Zm5CANi~-;Ctu?4eI{y7nGeBV
z=F#^IDIxX&Oc62HrDay$VWbUXc<gD?w8GS`fbwW3*vc2u&JKmIqQ1S0HISnGzQ=>W
z(A|Mj!n`{jY0`RdKys}E&s$sX{z*j7{`R$sB*k$gk#@GKq&UnA=3?W^r<2nC(9^v}
zjWjg$seU<XWIVhpPr(%ms?HHVl5)1#o*-A4?w`K$so%@eDTQVcf=0I&GH)Pa64s+G
zjVwtZ^aQuFnjef<%>#bTfRuu0*Jq2;sh(TW^DVQPixLVcrLH>2yVPQ1DOCEC<D+``
zDZ>d|K<^L>lrbd;Y~q!=_V&?T-_Kt@1&DU`c>{?-{RJrYvpxYS{Fq?RVQ%hpoiBK6
zX>P$hZNxWlP!l<o(ub4{+7oXc6|u(nVd5K+Mooi*IH2!OHbCHMK<*xOyE~VR#o0HB
z=1mVkgSPJ-9*z`Jfp3J9L^mP6?{9;9a1rn{Cgfq3zw(Q*0Qv-`6(3vjAbT@x{r716
zyx9}8wL#FS%qJ17_EU_&)NNwc<N3@&-$(6cx`eZ@7>(p(I5OH9wvy(b&03ys4gn29
zrgQ{3yn5VZaMi*loEgG*Sd=3bRy6pc>rH!>RPM=hEoS%+uV>hp+-ayUu76w1-)8Ob
zJX*e{vVSyqNAQf9wU6M))7Xbue;sZ(5yd>#G7aQ95^K@x+&)kxRO_Nzu~v2a3fV$H
zz3~`CoP&wD>gNw6PScpfNJRW#^@~!ITB^oSL}!wzpuOv4DL?IURGcRzk}-1Yaoc-s
znPnG{QRsa`<oMkT^g?quFwYr~-!Mr;-MHW#b7&)x_pkSd|8uw<_@H-uRA^X2@Xii`
ziCh3b1(44lAL<Ti;$n)Ho$6Rt6Jf@{;lbN4cx}n--4QKO?WN2}X_@VC22oM5m@3iA
zvE><G*;YQ8T_%$R_($gPn}jQTrsn=b1WkZR8^tzw!I}rX^?43^FIVY2>}Tb|&MKi&
z@~485QUDuSQn2db9J3@0y{Tb{H4)?-uY3M%7)h+0!kJUHOSoS-Hp;h&y0AJ6$25gl
zGYip#`H~#S*{T_dv`P5OW7ojqNe!nmR^-hosTj|1+e6CqU4ufzE{dYnv+?R1PdH9*
zO7vC=>u%?U6Z>{_xD^8fPRt)ud4&#l1}bp^4F%h%(eo*7n@cUTGd1CN(sv5R!ddl{
z*E-C$?v+lDFAjiFU-ju|kI8YvtKNoj*O3J0IV8@l4#9Cw4!V#A@vEG`QO?*V)ctYE
z&RehssFkkSDv=FYh(gc2<t2qe1FFP<tcRGg9#=GGCEYqrI;%nY0s?Hm(5!oB{nCf$
zzjEvS5~Vz)W;;O5ZLHWsZGLcOW(k_&o0xDoAEZtKGXXNER7r2Vu~P)nPcyi}M#Mcz
z)V}#d0F~LB3|)`VQm6@4v<(^VU?xeJm^ZzEk1#jThcPg1ufR>@sXU+tO0GHM@<F_Y
zPy6;{;vJY`qjq6~5Rfo%y5PRYZVA=Qr>RIU-Xft&yTAuU9H)s6YW7OMa|lFfhlVWn
zgM=XYp?9h1kY+=&@|4lgk?fZZ5-d<W=M-x$1KpYJ)kT_(T3G;~tF)`y8_oKqLchIX
zJ!xdQyY#IQAz8=!om=XIhMe>XF_!8Q`4O*39G7Coj;W#os@!?9VJw^<u?eePt=@ic
zH67v8knUBwwPFp#Z<}0efX&ihAMd2h13aI2o3&PWIjAvU3kpG5Tn9{jXqxMXs?NI^
zV<{h5YP;Js_v#ctptbrty_RD*4sG5}qE(b1Tnu1oBmGerfxdP!>xz!;tkZ2fe`p~9
zFzQ%IF8&2mq_U2X8UfR`kkq`Ef47b+RojN9AnU|A${^vQbfQaGLi@*H7XMh6oY@w*
z#I5()f-)p?kP){fBYT_tHx!2-tDk4a_8nAgq@#{@USnQ`$Kz&HE1y?Z0Ee=TzSwHV
zhu@Hgr^wJx<+(GAo=_TkN_bVl+mA<|d(Ovb+goSVWv6SHK97`?8!3OjX_%JW&|0ps
z8KQ!Hvu(0%#osHvA#IXx=@Qr0ey4Zh=yMgzr@fRXt2J$vj<*YeMUCQ&y>>n|K7LwJ
zf}07SN)0zqaG7jK`tli^vJ5Ce$UQyXSHmpxmJ}$nX=v}8F~sA_<*v4t#Nk>8)y#kg
zJ8}ZaY^stg44_`1_80s5e*119Y(}G=HYIop1?10YS4Ln8D&g8omRaeD?K}n1%#~x!
z$efjP!RtUFPt`O<WMg>C?`9Tnd-DN1v=27m$8*8mrj3N1sIiZl3d+F^TA=SM$%2V4
z%Q@orbR6NjQ3_l|0t_=Yx8M3bqT_TP;+r?8dUA41^BepSkY>OeX%f>Z7_Y<RQ$?T2
zryF|z>@EIlXJ&y=t8rk2M+&H#x|07>*-GzoAM723#>r|oS+Mept{5HJ>G#`^woPef
z9fJuo<td-Y1^tSXcjI@%H*K{l0$Mbf1F5ds1&pfdEe##ET?ygOZ2OHWO5g-`5(ImG
z7X6Y?^9Qe15l35wSNh__l<Xi*=WB$$=MzAv8)B#xJ+C(A0}{?7wqnrc0+a_NS?*&?
zJs!z7gef588zoIfWKF^c?Zy%!PMcO$)8frgQ`H)nMMc)j#(HNk+bl5Bz3_yUoIPvf
zP~<!btIM6bPRZAHQ3H#&)2lCE#7m><M}ec&(ne!@EP{ZjHhJ2f=%r|Q?ib#p%^NAt
zt);gg_~+Gpy{--B5`YMca_V9Se=~$~uuqzPDH8Vz8KWpFQGT86uK=p$#OH-5z~4h(
zUJVi8XVF#?S^a=tamQDwaF!~6Nu}xWjXFJFnzmhXteIL)pHR5r7f$Skk19XkjRLBR
zOlvIkld~W`1e?={YT#0l9QUVb<jgFy;EZDqf;>HM>9hkE5W%OYXQYn`0g*z9WJdau
zZ5Wrz5C>K;PXiy)Ye=*DHe79)w%TmH=@0$*tq4}i$S8d#57#%>>iHyJZsq+Uq1D$k
zBM>m>k#qBTNn!d*qs(#-T;R<d6cj9f8*nHmot-Lx3QdI1iwin|LmZB?GU-vgK`cbS
zCo6Jw`9nRa;{{%&&F5O;YXI4o{P5Ol@g*~@WvWqpRX8bIN>}$%Td2&g%bV4iY;xHv
z4f^f+J}jE!TU8VaHE;|s<81F!Tnqkur#Ni6HcRmQ^TFxop1H1TVh~Z1rVl=y<W!hW
z&d&Um9vq^;A5TB#h&*h)U2>BX`zACel2T?C7xQ|3ntxk?(w;2XLYm}wNyg3X0JB6|
zWr_9A6FI+en}+RP*9+~!F#~-E(mR#rV~<)lG~!$+>_b8Hw8f?m%!=Tio^S!MlBZ=;
zDqNs=N(AHul$V3U*bCi}bL-1P!n`K4vP{svVv<plq)Z}Xu^<zeo$v|XDs!R3o@uqk
z<mP-)=v`Y&BqWXdWv~Fp%xw^M2%9{5ZuKo(pjsrU_I|_p0K|m|upXMLC271e1+vS@
zq&$og!+e6{-gy=$o(rH21WM<TL%fC25g{=l^*1qYAR>tJCA$hksHXmOO*j<0v&E9O
z{l;B+BPDpC)&U)oY^BoROmK=|^~~_^?*oS~2g{-a1-pSdbq5a08+?~aCnRu)#UwX_
zb`R+^j|0cC&cbdm5yiV=vLMqZ0AHy}oQcS-lCmveIb98EtGq99Apmzv&GOP5&Vo6q
zxDT(euGQtu3ahXW{e(7(%J${LB!G;9sw9|TZudc}m7vg|Y*{7E&7{|Ffb%1h@OMp{
ze7MG~=)<S)u+RVC@4o-=uJeR=&D6a{zBc7(&Jl~sAxrrtLYr&y^iqtngZE%G4qf_R
z1xs|<J!?+69lVfslj|t*a3<hKt3wFO5q#L4o>DpH6h6BwQ*jB5YV(U;!q_Sy5H4`y
zIygU`k1|jpP<y)jk+(Jkg)hNLxyDxkY02heMk}<{7b2ocQ>8s)eR!juJMd#4d;!|u
z*iu(b3q&FMo<m#laH0Z%{$lk7bCxmc7zmx#!2M5#=Qg@Pr%rCmmh%ZN1Ne^H>B!(6
z*C{2Z^^~!#(ewb*R++?0Vau(uIZZ9ha%o`+%hp26U9PPI!Cq@qG@Wli2%giNFuTC&
zKVpjKg-p1Jb(-x*YgSQnE%oX?F1KQosU(dqtrmn!OE5cf=tb40t%ksNSE694q%ZIw
z<~5d$rO$5g9MW=IsI`g>b)#@sK#(KoXCd(Ss$_iX$g)w%N;yk)n>zMTSU*dWD|g$5
z-h2(mSz%&Sp9!iw_TJwuzcZ2VDkrT%0%Pa#FaCU`t>d8%@yxE&lApaB!9p6%)sWXG
za4m&~R@I~zgegRDCtnU@a7|Lkk(s4pMy~c9ep?Uy9YaCtnUP7i_LXk3W=6W^-J}U_
zd&G&C^K49}aWIF7O&88N8$j}k(@$`21H=>bZK$po3r2KNA6{w$E%`Le_u}b6v$ACx
zH4gR_w?br7RL~J^vl$??ed6PjMV4q)IUtg+=Ab&YSEs^ynPc)2s-vCHu*sAWstsK$
zoS3#>!*yZ=8nUm)sACP1w$Ab=p}|zAi-r1FvvD&!4?q8cTs<OwwhCv#QO9Q2YO@+o
z8gh$Wq5|J<bPY6xG7`z^Ll*wov*dY)H@bf#)MI5REHO*Y?R=(L&zPaq4l=aIwhVC|
zc98fRiL7B|8a`K&&IaD4e2o@vJ1Zo&)4|NZ;4pC=rs3r2?m~v+-YzPG3KaChrY8Yq
zceiNLWGu9`VCrlUUao;-BQah>-P=eHAI4QUyiC389#Ev8Box5~P?Krq^sJwV{A>P@
z^b_Dp<sP<-h^0mVIv3ivqg@w$SNT4YQ-Uw(6WTYyv0i7Yf;IuL^0c$V$U)cfRy(v1
zAgzfPA7d>#+CbO`Ujq~fapA1B^<t^?c`XhA;RU;+plr1I05lS}Vyzk=3>d2^c`8-C
zD4f%rJ}5NMT?RtBb+KPpgVjm-@Y2j|8sPQpA(h;nRN)cg*D<XTiQKSFUz*iFfgrRn
z_%7bQCS`pZ?w#>=4Df*Zw#PJR^j5TYyxitw|2wMsf8=&WnO-qG;fg-m3`y9)y;Uqe
zeG%YCk&<br`JiD^i^JmItv}A%Upj?%oqYyX1#)p;9OJ9POj6)Dsr5n-9w7gntP}M*
zp)F_9s@>z%b!qGS4$pBgoC{;H8DlYNh3hl%G6qXFFPKq6<zgII3XrcDqLsr!h+p6>
z#X8Nd@t+czloN-}0gfJJlS->FeWT4y7qMCKw@E5g04m~;q>xD^a1<=k7aKy>l=9*X
z==h`LnR{XR>G<U&5Kx8nVphJ70;lpZDGMv$TT!6xm45qR&p_7>Y%hN^31q1*9iLss
z&J-+Z8xsXobj@dNqZTrDGO1D$Qo9Pmge5RA5Z1}#UXu@iu0_^6sX=OlXs0g+2MZdB
zx!C*gbW0{7d;#=?lZlO-tPO3LM3BF1KKwpSn9q0yqGJQlrVDI`AXa(bkdc2?cl+!&
z?J-A!(Lw-Z<PeC93CUR=^HR2k8aVTl4fK>$)K>sQ(>GbikqID`!ryGlI{Dh4?qmOz
zO_v%x($1R39XEr*bD~x%!VM&}+H_b^$@9;sBj^g3W~@fQa-yt#NvC$wk0r8t!Aq%u
z*X?Bd-CS07?k*LDx^u~{;MwF<s*H%%<tBFvLM(Oyhd|)&$c4tR-pcYH*Kp1iVEsdR
zV{0!DCYlMIM`ICP@$crQ>VYD+Q|^kz9-h~e^hn9YhEU1Z13-7L$Y-rYZ5FwDg8QyV
zUa(9x5mI0qkHC_au`L^}Q*`|ybp88Z>2SbB1g0ZJ7Q8Moxe^mW;&dW#^=+#MwVU}G
ziV|IhT|3rX#kwCi!ZO|K1k|#&UVec17Kr-JLEnA~9o*Ll4q*AmPhKETBI=$$p7@0&
zk}}r(GH^cN;-1~MJTK>P^p$`Ji?X_9q^Bt}Ua#V*Vxh<AOvt3wSe}y0)An%|X)cv}
zSgoLN#fSqPF`beCP<{sOwH){h4!$mb$EO${3Ln+fUcc+_{zsbwN%NB}=VuL<o$yge
zj=%Efo{3$f=GCmV&iAH@Elzi?4dz{X-f{0hY7pqeB5ix13gh?XuD<sGzI~;{{@ADN
zwCof)#Z!-eN(+<%f0V@?pkmL3-Aeky&h$-k?ouHTMp=ktnvu10O0*O*Z}z4;EIu=t
zM1hPfuj|$EhE@#507cYS3tB-R%(H)NZ1GE`cGoj_?@aP~ugT}l?z-gz@hL~;Fi4Yi
zufHOPT9!V#|Kecp;J%_#Mf|%O;q^F>D#yz&OKM~#h*e5{9joY~8WQ{RkL7_sz*>yw
z0TiT54l*_U#H}Tjhn92?r2!trv-T-v)iJvgtRSfLnB7&NfF>Z%Vq_Q4?y(KA!I#KP
zsQ(qB{X-)u)l=RgvXWzD=#BnUzR<;2-C4lo4xlGF{;(474|C^!B0Dp2@B9@D-6}u9
z_)~>ZsY4r=!`v<SFK?h_4D>#sFK;BAs>YGm(2QZ)cjL0@sLn9PW*5se*f@4w;~`X9
zDePqMmw9epB{ug9#n4SPGU@CtWY^oMIR>cjyO$I##i^0GG{w;4WrR=&*zv@R16|tt
zx%|-J%c3wwmqiZdImH|3{zzWm^EKuf2o1ll{fIG;fTnf~p0~I{TRk}2vPp(4CUs<V
zftkl?fl4%r7a03u+jf>OU{tC}4P=SwxB~)Egdrj5prJQ!r9Bz06$r0)!EI03>c|nn
z&kfRsJ1zxV|I@D;t4Lu^l{kyssJ^t<<-$(=b-i&^#`U}(Nq+eDFVY*o`E7H4{ZREa
zh#A?=v0dV;$qTRY!ENxZdXnKl9xQ~Ni%oXT!u(9w4g&5QFvnFUDLL;;yP5zWVR8+<
zJBtb@5Bi!f0{M=N=rW9=X@88fEhV3;m71@5S%#Kzc)vaKW`Mg{(;B>FE&gsHpEcZ|
zNMGr(F7j`3`z4>-(t23}r0n98X{zt+$@M#>Q{q?8Ryk)<ioRd9i@VA<_KZS*jj_P<
ztK*p;1`qAy*f;qq<9WquD9(cs12xNYVy{}dL|{&`aTswkhBmb|n-!9>hDhp=_;VWE
zmRiyN7bB_(y2bTQkoYP9+8klY8!VhIGrLpvC?T68cpa3+ua?r)`*j|Ei*5egcC~RY
zW=-HfeK`-y-@Tw2(<=4+^Y?GBxg9%Sm7~w*m|V?rFG)XU(rRCF<AAAs_wr<VBU2J9
zbl2pHoZ-bCaWRYwNIGJXy!ME^c^wwmWdXqeU=~cPrh;A+OTnj`YPy%sL@3v7dje>4
z$T9Xd^u=m6nO!$|eg1yyOU<hXCOI9M$=I7XwPx$q`!hOy=Q`gw>Q|QA`T%5U(z*>Q
z2byRod$*$DhnX7&a!m_uO>M}^kocap1cL^f^~x}DUd#{>cCZ!%-i&gTaaM_t=?<o$
z8*}KJvz|1UF*SWUScax$f?ipjJwZsvb%E?dha{N_HjygW+d-~{@{D`(`Mut}OQfDl
zS=X7P_=U-S@~-oEx|X#1C>JIh4rX=@xPQSxqsav*4sN+LIV;BZgkzon>C#R35C^6k
zPRqm_!p=Yr2urZ4W_P{Yik85<0l7ia7iuJ!lt1ax_Ns{Rh{AX=vG5(4^wrFZhJ|#&
zrzUSJ3&-v46Kjxmu&2dI7U+}`S=X*PxBn6Tv1@;Qm<yNEE1r6JHib*sS-uzRsFV0b
zPTxKJ%@CO^YkG=OtHz5@Hxl~f`hJlxmN*{!`X=M}{&zv`BiP<eGC96}@fBX-7+G|k
z`#SDhjiW`aG&+t7gqjH**qmQA_r0VJ>e=BR>Ar8%-?QY0uSEHP{?Xasq+glkpRRr*
zYp;2_&%P*V-*D@nJkw7WaE1aT@1_+yyq{G6!-xK131gL006DYJQ0fD(q5hu_``rR_
zb%6FH3Az4$fS~_kGw%{F&876|%woP<@aH%6<2ir-t#j)}@L}#@pA>ZKW+ijC)zrc9
zamUZy1v*-*D1W`+iKCb7DqCx0CriNO`S2l9jO^Hf@e@E_<O`|t;)#FLe*6<!zjFo}
z_Txh04=TyO;%f_V_2sm;zeXfSl;EWQGYjAcqw%+d-k{%Wj7)oYz^MEn!T*b7e*D1I
zE$lWwTX~;r^pho&<N(>kW~2Q7)xGx1jIk1MHUCN4e?_kYckcrMGkQg>HV=$d`d5!a
z12n0czx?p~{yqJK(FdRH|0~x2>`DHAcK<6D?PJ>iTXqw&N+b3wx*zJ=*H3<8I+~><
z*f6~^Psj4$mi5IA&x-Z5-?bT0%MGY|wnPNFe)nm`&ccsxHvGB7->~1aC_?t-r+x^r
z{Hoc3{^(jwE|f@Lb%g?pSZ=uT19U_CF^o6>)jKvK@$=Qb*Iqvn-kz<r9H-rzh73PS
zd~5>6>=|ydIRKUQnJp@8a9wI?_ZRhLu=u&9{_)~D8NhgWt{UEY_~ZHTiYnWD!)-b@
z36*b=Qnyv#VYkrvp8nt1+1G=6(@i(rdbWP>gp9{eIs5F5MD5&3IK7XxKijDBd0=C#
zhmiZw>YjIAr|pdd%%HYAWcup~A;)dBfMqH;8zX*<9{l1!bEBM`+NyDJZlBZ+7^Z{l
zaMA}1T{V$ZBl-iKV&y34b@yJ=xjuK+G3VfA`~TvH-8;--VqW{=UR{<O>)$3FYS+L2
z{a*}tiERc>(N2s|YtN5+f2Vo$q2t9UPq5fD27un!=9=~#d|mWG?K38w0JcRQciPt+
z;B?=32AI5CWLR0e_FhD38u{bhe$K{+_W)8|ZrUZIYAJ<9+6kePh()Nf>Ks@TTd#0R
z=1xuO{d*Vgy<5{EU%GJU1O-8EkwQE0?lCn_)ju@!q>jI#I`64`x3;4^iHvuvu1`ox
z8cP}*85^nJ5$E$+6i0}!civiwOBh}1E~Qky_T)E;<5&Ot&%IL=)3GwoQ5segO2}hJ
z&Pn}t=$HSz_BnE8J}VD*;lNKZ$A`E%YgG)LgJwOBHRYrj`lB>Tl-cj>Thw^#_@d)H
z223SS?%9%?vY$#E)EPtTt%&V%E>|?nHtw$UwU8z3VDtNTySMMpMZX=^oX3y$I_=-6
z<4NG|@;bx6U3{q8*4BP<|MxGROXX%Qp&%c#^>0t5mv0?j6U2$*8yzawz1?g^Yd%Zw
z+veVXv+eyLFt?O(=KqrHuTN5uPr6buJ0u>pg|!%dF9jdQ0j7Pwxksj;Bsxb`CasW*
z*W@sb-i-+!^3(W_3_qi?)tx7YILjYjnmhahe}E+~o_njS8E6}g(g?hmCb9q94UV2M
zluyNqftQTeO6{$&&-FE2HI!H6e)cSv5B;9~xijZA^Y;?4x9Lco*F(CR0>T*u1s}z-
z>gr$L-cP%8*)o3Zh|~{@9OdrgSgQTIIDYk>Jg~T?dQ1gqp9s;I_+cKZts=a3Elkn$
z7~WE^gKLLN$ylg8*o@>%l5u_Zqk!&h?{f&ia;sT`9;*G9w)38Iy9BVCLkxW80QuJ`
zq7uG|HfgHw&i*UUNxi0^IDIeYH%{dP7b;S$KHS@mo~Q-x=sqF$vpS(^ws*^%0j#Oy
zenMV-BBgxdhEztT%CV&VQvZ6F;`z5_*An`e_7iY}=^jZ}GZvfy?FJJe2btH-^X|7O
z#~>ub{tIujzw-UIo%b5DG5ZB8J6=ln8)0*SfacGi5H@~~uuCs{826Kh^Xe<>OBWNs
zGGt;ZMQytL{_oFaQ~9lR?a70SV>_00|E>Ue0PV@cpHZ^=<slpFH>az)yG{cCAIjb{
ztjTBF162`F6cHt$BA_Tr5m1oO1*IsU^xk_5y+a^~NHHKFy;p(ILhl`oNbdv)y+ud>
zDM{$u=-&Ie|Baq=?)}D_gqe5NthIit&bW@aTNhXFTsTGHv*+{K6k~rABQZDB=Px>d
z?;(*GiNY){r~lQj^TPYz>+KX6Vn6*(IM&Czzei>46ZVn*3yIl&k=S~W_VhpUaYYD*
zKz*N;=ML{)%mSa@b5$in#d6qVuq>dDrvAmB<65iyl59`?AAr*X1E^p1We)O#I|Tvb
z7eAh6!?)sk6w%K@D2UT8UCbRiJuQ*d5=d{PE1N4Oo>pAxqolEYne%DwL}`WO{9rmg
z?+6{M)o-3T{$L#bmvJd6h%8K|K%sYfHR+M(fFYfj^&=(?C#H&ze=rn|P4Vy%dv~gG
znnj}iA1d?Axg|Qox3YV0tf)_IxnCk(ys+z(8Q+YIQ1Bn4N-!&SKVolkwy4bmyHd%#
z9A`VdAmSSSXSw@xN3Mz+T9VEaDb#%UR&3i|0;Piw`(suj3v>KZ5HY;Z#ZJrBDHZqo
z5k~DN@S6=jCO@k4`*XPA;9s(~DA`Z>hb;YNIBTi&pT8-|frdi<f!F_mm`Y|MvuX=o
z-yu0g%D+AO-MaRj|2HG`f5_zc%Z7ivK-_R?`SVkMd6PfZo*sMN>0e0TlKSJ)pIbpB
z8)fAn_5NRIaC#vnx`wTNlhM=JsxusqFI@iA!f|{>bO!5LQqa!#G^gI-)qH~%{{wj;
zL`n?T9Ki4HwO2JauPZq523l@lY5!R13*vTN^ox-C+lK$wXMxIwty*^Z+f!DJK0gD?
z9%AXwqDS$m&g6T(F38-bfv!?$5++P0*(rE?^6u`P$BXBPXUC>`?thJPow@&qK5B`$
zbU(0x+~vQf_=9S{zfIRjcXxmy`2|R?pSPBr{rR<6Hwv_|)<fOltUHLw`;<CBdH#Mn
zILACCF^u#N%3f|Ha!}y$?S7K~lY<Ov&-km<T(ed*%p|}MH0RG*l;>g>^wj-DKNAm6
zYH?S2`pT>`*!|4uclNtWYk6b-=r0)kCudyTd8WH2;U{kU%j}}}2qzjrwyBI>mAbP}
z8@tc^$(MohL_H{B(U*D}5&j&SXm?#Yn<Qlhb$_F`6C?kJW+Y12<&WaVmTq18|6d0C
z+XTNp3k)F=bQSYDmS^D&8)`U?qVfOcoxgAF8Na&#!=G&tTXd6qIr*D>2z}@X;5v;Q
zpqN8nMK@PKf%>&$)lG&;@h%Nkeae_mdR!61UG5S$;@D55VWHUm3&Rt8<F76U1+eb!
zQ4m?!F#bYe(ZcTQADYK`hnF&V%KjAEKQ+uhrK=_QDpA$0r7kaw@X>o4Q_Ye`QF4_A
z&2pNPX;=!b181mg2_;)ZdCAx@TeDci_UCVQrrl=0C;aqWr5mK`s>H6>92#T^|JaIk
z6}1&n+f)mnqzH|Y9e*HSKb)uMha0K<*;h{~Z}e*tUa!Fa{cZmE7C`e#wZt}&OK&!7
zVkaE>b2i!`M1hooIDCm<h|~oSso`vE9I1fV2tN}S*k-^lIVZko_Z@TZonGZCrJ?)(
zGylud%tl3An;X0Qhj#eAaqOH|(Q_zSf*-z|6q2Y%qjl|e`Zf2Yn)0)f>k%)A!y3j@
zU<()ORE^4wzrM+~xn6C%diU|8^KL2o>#J`%VpDR{gz$Anj^ek^{~@0LB>x`^zOYC`
z+z<K0yE;#vgQtPU8$)Vt8S{DwNrWsk7Z3{_H{YW@Wcu~ecUTR}6#={gh$>FAq}gX$
z(uQLcV=d9k-(s)(%m`0AAAb~5B!gW>mG}7lp^E;T_8%`*qWjAZac6n09@lo=SuDS-
zaoeRb-u6o0(3OIJbM>9nmKSQ^JF2(<MaWk(cQMS=$Su7tDjP%Sl_o^_jO8J9d&}9s
z;QIf<osXG+87bgg9~;Ryt`OEsv@vCG9Kf;v7Ta_Izjjrh>UrSJrb=G291kB7mB5WT
zC)59B!2clNKYw!dE)fkcMBnAI4jVhcP@A=8Dpy^!>*M*)9w}CwAyI)oF!{d~=YJ{0
zz@lIJVPTkyEh8!0My1Ve?EJskN}6amt4L4WM0@=I(?6__h%eeE9bk<uqR`<0s@Y^}
z8VqE3yxc6Czk+xJt){6RoYQ`iVV_-jU7ks;b=B_$s98@53ydS1xY2Da7$!J#tT7;q
zYNtVbgUUbwG;WPtYL65}ay%3rtJ{)8-i%Ib(4g3{$wk?kpFQ3@d9%rn5Rs~k%w=H6
zxG_{MA>15C;6ynxeI^!h?APUF%5CP)lOhn%n^0IqJw@~95{$L2RQS9gq8tjQL9)xr
z#59q0`arsWmP2z|TBkFY_(}G3LoOBx2vs=6Zcznwc7B?Y`8Za4Tw<(-fI{U1rWBT0
z?Nlbm;*fGm9dK6=*uN>hP{UxeiOA~rp!GCvdSf`Xt~^~r@Drg|2esvCV%ryUxnV63
z+M$*FrU2vW(zE_m;j?`;Dad9*r3G?KpW@(VweQPO#R^t)4y?Y#ragiKs{LxtL?%y#
z4(HkpAHN46XT|DQmbJl4<7nKP>m-fZRag@~cvj%mD!?Z(6bD+*v^S*!rl@-GHxX;l
zOMq5l50CnU4vYU0zwi`HP?PCp_N5(bF^TPSdq&mYTcVBJWd-m2&l=`$0MPlskV+#`
z_WGD~(vi9m$r%b3dd01mhy*BsnH}||5_t8V-M%<rMwzC*N7B~0W?ozB@mh9wsNwUl
zB_~GbVXklaKWPxvfL6crO0Gj@z}YrN?PEBx>dq(?>8R}<JAXs!XU6=`2l%(go6s&m
z*9#8coWt-JX8t|vnPk|rJ1%8sN$`f%qHk@f^iiNDmo#<8ocI}y4zeuWj1s>qdxI(3
zp6}Yo#J>L8Pj**G_i|bWNho-VA5{O7@26ah5BG4YncUIdgc>R}s|1s-SVsQSm-vT+
zV@9WTbNReGl)-qolHD1Q%fY3-D4RA6RX}SC25;$9*m|W)sPZl~z4U%5T<Uz2zo-D3
zrbt|+uI=^Xn+CA5i_jvPI5wclqTH1B-a~{8Q~)h%pyRJc_ly>!lhsDHlU!WlC$=}p
zpuI<e@ZDCO(7<4iqveZ1MrCd59cjvvEC+;<eZ2A4PT9$N{4m7)J>CrYa##O)zH-|5
zYzs{vhCv)~BJO>XXq6i(%6v>V{7>K6&*=hI$aO#~6^&~OS~@uHpnA<f7;>tRqbC~o
zsB`rxO=Hu-wROdD$VuN@zEMd0%`Eo|rk5V=CM0DR$0epVK2V&u8mC&+`aP&7*K94(
z`dmFS7Lfdd2@HeJW0StiATEZg;|$oE3QO!g!lSTf3HEH*Ih;RWkFj%PHBoLvz#I%C
z=pwA87=4s+;i_XK$MtO3yUV&(fT9{TBPw<}S$s*Q5B*6_0DG)eBg0F9aS{yvt=sME
z^sHw7sy0{KXeq$`zLA-V`@!rYurWm=s&2IGNDUdgCnVF`nE_PU)m+SwT2vAqHGe-z
zY5qQf*kH)E%;v;Ge`x4`_S<s)Ri$TIX24fp8F#8%<&>BlePou%**jCFzYiI*tW#&}
z(q-G21OX~j7={4Q2L+0E#+#7_8e>Nwv4Ec2JgRJ~n(}HEkZa`<OlqR^A{NM>Q-W0=
zCfc>~lRyqozM)KyA=-0!kpy^6Gs*cn72l9nKejr#N=pZdi+{&sA|3ta*dlBkDWeFE
zA2u;HUvKzGVp#O7JLr224(sGNAw9STVN~sURfZT|GHP4<fl%|B9*%`?=O}~xp9t_T
zR1g;@LfqNV_z-m7G@Z#PrG}0QDS^XHRfIwFW@^ga5_YsP8*0&}umvi6+u0!wSn{=L
zfOFMv(Xi3GE2?r!GJx_tRF(tw^DJUtiDNrjsweVOuGhGF(dXE8CC%oA2lTH^{ByAx
z8v_0+gKHB_*tsq5cI_2V`gE_tgk4E-Zk}Hr3$W}%)j@b0+5JYQfxw<j=a9_!R<>62
zFqLIC#D;%>M1}50uw*5bZG)Rf9&_((wBd}6VLOsWjEeTkX4w%JHJ?DX&Qb`C65(Ew
z*^)V;6*~Kxty4OOPj!FlJ0D2^P<U+6&t^k8dqXc{iZab+qlKn<NpglDZ)n*0Q?U5<
z#s6U5DV%)#`CQLJN}Qz&r2Y+5U#`Qsr#^K)vHB!dF0}J#6C24vaGv5uxqngP^@MLq
z1l0$+=G1)sN~vcYX*@aM^jfGHoFLcZ+luS<?aiL{9d#8w;YMzcc3ZoJ{VhVHb?)9g
zs<}L@Zs%PeXb9HIc1iX49%7mT+-`jdLA<IIFlRcm41vJ<o1{)!ns(n<rkJhW#qW55
zMPDd&0w)T8sOFi-yMHCiPA<4WC0`np*keqN=%fIj@!~`<LdDpzt!M`!b<*o!0(|y(
zcj_Qt>kib++buDpO5W0rQJJ*LKc^JvS%0bp^jMe1%}XM#NeM6%pG*31KSh7IjX*=r
z`mEsbP&Hs~=MCTo=q<BKcJW!1m1t_(t3qZ9+tFL~_U~2dDIkR<T7KWyuyu#g^G;F%
z<AS{PFjKS-t{;kyuJ(I4YF?hHRGK<3425ClX<|@?G!hAs6MMB3V9Fe)$$Q5cWJ3gX
z`m40^ZpCkkt2KDp&5|7ISG+-9&x<$MbJ%U(j<-UYSh3fy7*NX_s6A)a)s)!NTJ3^B
zFH+QpkMrRrq^g<Yv9}JR+VmGb6ZNzokmAa#JssF5>&f~T^@S(c=)8@>IzmE-@6>E{
zuVX5~wkMuvnDBQ0<<f;;a<zJCY~u#-M(pBkT3yo9vNHVsEjqDEWa6Xn94WG99BrNX
zvQ<7*Xm<nd8lJYukZg<tPgjRe<i|yVu&dv0<CysOuOn-g*P7OEArD}3@|x8|M;6xx
zvUKc-sdjin?>ks&`L-vXTNJBJ5N>vNi&@1*ivGnuO*n_|7h_9pA<K7zNR9-l;;j%3
zEp>a^5Lw3$^DY6R@7LJ|trR%DpKlVH-o@dpjq+4B*QT@mPDC>iI^<<Xa@jUR&5oe~
zvW(7N1ad2*0+*Z(*HV3Vl`)Cl)}2HiS^^s+Yd4%(;Eq|vmK&GN8M(){d7+}wO!B^p
zt<^Moet=@jJ?nrTRvtBK5&O<7GD13FQJFb%{4<Fzel|w6P$J~i>o*r$UMg%*?xN$c
z0p8q}eMk>0sh#g}c|HT;gZp`jiQO$58#gF0_rPDS&k0v<xM7KaFk<T()StX@Ol95`
zZ`MP(^@!zN$YE;@ja^GM_1CvCeEE3X=j*;P^5_fEGhC@Cqs3Zm!ighdTpGD9#KhbK
zgC+09G`#E{IDYiVws@@E-D=j?-O_PRyvjW<+V;g8Q?rxI$m;;$EOQDLeSoGo7(cP9
zE7@=@)2C9@OwpFBhDiYX``{>-GEn!riCc3S#wL<EWpt#?9WZC|x;WgcQ9b}~|8}bV
z(fC`&?V5HTpS)WH_)b$GqJZ+X%+reF7DB^<t9TLdSiio45596i2+#RDzu4%;xuAlu
zBRc|bz)>S_<aOG6dst`eDR$z!7oqD3qD&O%<15^oOI%SYx%ZLYr;X1RVp_uLG`^vU
zJ(R+K(&JNZ`zZ8-8Cii)l3MHDz6SS171v2qYFahp&iBBKnern3z*Qm}Q#@65cXw^9
z9vVI<&A1nzceoCnJriUY!)PY2G{(Px`cl!lbIHt3^*qz7EAZlgCF|SUM%3i{V(du(
zCB{c(#ko&(z?x$556F3ax@HDIyGaCYjd3LNs`jVi-4B^Mj<Lq9QIy7I3FMiByzC8<
zjxWA;9;o4Q!A=}rFE*Q@Vmc2ZMv+@7ehr-J3%6kC?d$W<l%sfS#vA!Zv0K0!CJ>lN
ze4}^*KY*W{Vr`=8pljNXhyF~7=N<bHI(FKC&UfR;n7yhtva9EZXw2G3R3SGD73Y?$
zC-1R)U+_#isUBsF%MBFOtt(|dy!hS+ssbWWWmHPLqBPd*qqJfNU@WS6R<Tvzi8>Rj
ziy|!$%kH)Sq!9fA?n!gLbv-|>;SJ-BmgsMzO#8j%n34t(l^2hMQz}hkMx&W<yp0X!
zVSbN9xo~4XxuL!S!@BBj)l87brB*J3WUmCpUg~Jk=ZX6}rY5>|j)AjANxP~Jr**O(
zzPVwGbWmcOBZ!Et4PSJpnXv2}UZK?gg0F?}uS`uMFJw9hr^Y1;A4NOPrTIBjP9N%9
za2@CLx-vHosyt{+#*7e-_$>}S3Mm~p<MGc-HZ@dw>D5Y%(_ge@FDnSUPD=S@CWol>
zYVVl2dKmXotN?DdFL5S7I>)w$3bq>8T}RupDpsspg+7E<y#oz)eOM=&ccn9!OF4~w
zx?5T^*`u7APgIS(2*t;JZ&DH|02XEhA(PFAE&(=)mCTuUc6rNSpiOQIdA&x?Ol|3b
zXkJ=|MbGJ~n*p8YQ541H-l%IQlR>_~^&^;Tbc~vzL?s@KfSB1<KBNDV`K*t=@l<-?
zM&LP0%sN_{$<g^Fl&R7qxJP)R-J~{Z`rgR<g;b2EeHHt7`}Y`yp8CV|A4Ig*XOS*i
zlxFZs(D5F?bB&-jX0`w0@&3cX^y?(X*;$&1Ew*aDpUZUpRo;!PWz^WSV(F)btjsvQ
z<*6}#wL8*kkfLu-6fQDE^>kjv+<i5Y#5gfpX7a)eKf83C!2?D=?rn*h$x9U#u%$Yn
zk}5mP0|dWl+EFMHxvSb5#(g53*E>Z!@vRIa6|J`an}1sY^_JF>Z@>Z~Gqce<52`I&
z8C{Mo>%@z}Y*iW}6B<=e{T^U^6@|IeBUDS=LmGRwWmbLVZpF)HiPt7PYd5GfmJ1T!
zSP@$+Br;5?k{AK`(VJ&nEvFRC6o$esQS#J2cRel`6Y7_#Z-;0)8l*q69%$&!;I%e`
z*@P=jmx(D~M*EiEyCUeWBxUisqZAdJI_&kqbUs{A9sLNeEJF`db9gIj?Bb*r_tVR_
zm}2IPGY}4F+O`e!dq<-BP2qKzoIoFhF+{dWPCtVZ+myoZ`e-t})KzMudXL?VN<qn1
z%(38@3G^2FZ7n$qn7B`%CXrP;&f4FE@NZ8Jzei);CwDstHZ5yXj=+c$0<I4wO^P84
zlS^Z0&f71A%-|0fkxS(!(g%K3Je>zT``HD0A<e<sbX+P^+$;2#{ZAl41(M7B&T0Dy
z8~)0ljoe1O7rkCe0<W=OW>hbyKX_PVcS)_Ib5&y3AJ874@h^^3DYq9Rxvco1jm;q8
zsNRB*u)M{mRn2LnRGk|Zb}FQD`Q=#!5|)PdK1l7NocD5YYo=1OoOO*2_ceD&7p`I3
zbc4cvr_+mbM_$vMK6{x6(ev8)U75vYy}p@Qp|qw<_ma>Ve5%}QU4x^U-b(M?`*AlK
zWxoOSB|_+Zg30o93#&y`0%r8P$Ko&lX)Zp#;CF1V$S6sF&uFO9HG;PFsYJp%Dtf%F
zA=TFj8>YDm$C~uOtq~z}@b~_vIXB;co?RoVMpxGP&vEzI`2sqtRGw{5380U&%W@@f
znPqaS@r36Cu#cUZ=2VW&`ss~G=FvW-@$TR)!%w!k==;2b(3`bONbl_81FvUdcj04N
zV1ieOqAmReCllBs13VQq2mi(B#%r4L+z_Qpc*ETvF3-Ofh$dT;kYmvUO^A~?=nG_I
zzUou8eMY5$UOLN3#(hYGID?KY;oZ+ZP?6}{{fK>!0#~3kbYFET{RXN>B}G7gWS_tx
z9S5qBEps)|Qx#qf9M=BQ_!;yY*EY}F%vu*6*<CG0FBJz!{ViWL$I&k`>iIFjKI&kK
z!EObY<|=A0mZM;?<MWQfw}T$~f(aGt$2Raon#n7G3}*`Ivy}B$iq$8uRqrqb)xj;`
zE_v=2g1#$BaCGH?9y+VpHRPnC6yv>Ruf^~RUKRry_pw$N{KIM^rny*1d1l68i>a&i
zYg6~?d_5(#nleF^gJk(n$=FA>&cs+VlqWQd(Vb0f2r;P~-LH0+Ec&VO6MI{!-TXsv
z5|<icXhEIJxzV@Xa$_x62q>PdSQa#8`}jpuD+SSyT-`gQ;pwe}P!@gn&IRvP525p!
z)Q&ABGbndtbF;Pk>zD0Nzl@emucPGos-_yfVl@wbJusW-FXDUkku@((#=jY}kp&v|
zuKTReFowo)G36th=}H&s@2#$4>M0%^R5Q!xe{1BB7qhPG4OMCHRU~(x+TLMdlDU7B
zmk5~q60`20z>afcUO8ezLPXw}U90WgW`aJV0zB}Vpxga0x<s<;R0$v|w275QYJ`@Q
zZAhf;Li;}2yogmX9WFUlUS74TyYJ@SMR6SxSIMlHJ}z1o>difxj#?`6$eR(K!JO!I
z5S})_%2)(DUs97P)-dti(Pf|6pk2v7Dqi&+-rzaH_+KhJERgf$Anjs~Bas*SCiTgc
znL?$>SEh}SL#~m1&X%-msEfJ6H`kb@PeE@*n0pj=)y4sj9T#o&hMT4IU`%(s;keeV
z**;Vxi`*I^@EzOIVbV3o#v{;8SAScx!XI5OZp4&v^d+-Xrqu683GP`wQZX;B>9kZr
zs@t*g0lazjVG<?F6G%!SL-mwXAqFxRSh(flH?!#ns^7a27yo#iudvagu7*(g%Xu>+
z9I|GG{zbMGW&FHur8^bgQRlq&g;6=V_(peCRo4{OH{&UPG<S3*d6$j4KaK<-MxzO@
zL}&|`AK-PsKc<cE1$d4ZEG_8Q7r^WU0NY`iBO}|fo@g6JEDo@4AjD)b`&P7uQJy>K
zlGCEFTI$B#L2F%-5dks5E56^MYx|VBeFcU^4o08$9O|3<@o_IJj5aEK^SNzsD$t#`
zG2n{Zu5Yv&wO=pQyu7^R*u!Cro;%C_fw`(kdZdQHfC&+ObnIvR&TL<KxbaRS*U|n9
zuZ$KD(fT2+Qxq%M5VY-1SzSvRxdC%J8NLX7yY*gnr8~ZEOUD{z?wkKA(G&kUz<Sr0
zeX!<M${u>nHvLOA`$$uz@Y<r|Oo8dakit!{wiHUtb5bzI#(<={_t$6g4DL!Ss5HmU
zv4Y_yDXBrx+|cszX=uV`5%W>H(qE&4oh(;V^S(~t=-T&!t8_)b+vFOpu_cgpx;}H)
zn{H(zq5nKNNFVOBAn(!Y*^zTR|MkAl`!NA&gKR4%zE6KKd^<0F_4(esCCb(iR9w&r
z3@9)guF)2xPv?oZEo$Xk`@Lo1QI+H#{v@hcza1rtq>jwdLB-qUdf{p6x3f!}^>*pY
z)bhlkUO#H%&$0u97Z{{eg;(^b@cXjW*CCbUMshM6v1LkMfkeU2)a~+tFki8K6=D;i
zraHl!UlLsQ^Gpm!MNy?ydCsC-5-2-Bw4y=Wh$?mB_UsT%Z6u{bY1)WWuFmjmfak5E
zucFL->+DVW#EJ5t1ak*r)@4l_1)w3XfH+I<nPEB;w^E|J5wDk*W_^=H?bhLt2zpIh
ztT8wDZ|r~J&SM)=n-{ZzqjTF1jk)1MkstT#O7NYjW%;!$2Nk!pT|_E(9o?<ILIiu?
zd_W6|9DHDSj$ZH$^SN`JsiE5^T(0}Y8iljZx;d=p6jP+v2O<#SN>2quem$xxw=m*a
z0JOngLH&$yPGCT@9YTc!NYWyQ0ICM;k32J{U)&@?7+QR~FjRPhOY!()aGAGMvGo%B
z9=odL)ciFR&cx0FY0#wl5tN?Vr|_)a8}rV8cx!~iZE7)?(&m9*E{&L)X`fPRW2+=!
z{LGU}^sL4xch<I*5O#sbx|)IByNow&%9IJgv(NGybyx3HSQB`AkD(0dTW&C=dOS8Y
zdgB=9WSOKpPIUU6*6P>0Zo-raxe+8p$875Y%|3sg2skfI{U<EVSxVu*QW1W2bVMNe
z2%7xy7+ErX%=H9Pwk=)sLxg+oFqI2k-Ix`$k>;YuJ-t|$+E~S(MtIBjD$A<gUZB`h
zSDMdvpaAYXinwW3%R>p;Sc(L##a7l~RycI6TU!Y1e72sU+uz^=2E~|!0$uCl?OP2`
zI{9p!-h(C&x4CNP&3ow^oi;W<PfQQtxoe3U%qb;2wP0c2{-i4VQS=CBzh6#K_{N%Z
z9c=r<rR3&CN@U6LS@_bIhxLp3D8b^4@$A^u$p(Iu>od<!7_T`pWJA%jQJMRCPD@3D
zN%&^q>ZDiC!ohYPA=Oc@PY;z=yz?Szf4NeAb?PJ1m`TAjXAzoIx2&ho!;1C9L6AqC
zJE3KU51v@vLspE$>3vxh=4`fe<@6w)&4H?KM32K`);)(@rt~ruUU<Dry7CC)WgA>G
z>NO4aN|z7Kp2l!J7DoU}O^n~lUKYv!M?US}AxEwAH`1?#D;XR`#@H-J!N{x2*~ut)
zW{z~5GS!(B;mc3b9zl6Y<O67J-*z+u_}PdNbU7vT_uiBq8ft!5M3LsQm*8~DqiuaB
zehnfQG4_TxB^DHRF#ar%gbEn|G2d#Ta)30A+ZiQV)S0XK0{0lQCTXDxtomJ=Do3wb
zHmFC$66b2$1(2#gzx$WxYVKW=YHDRu&qW>6-IKzZ)tn?T_!IBjUh!jB>D4SNj{BL*
zlO^j28J0fdS9fhpxpC@cX`XRZlg5F2=dr%WOCyD#$()+!)CBtr2R?m}xLY2BZME$&
zeMdC@>STk7+Ep{CcPmC_>r^yjvkN8w{KoXC-_}khLsXM~m6&Kjw$8qDuaA0H0qGXf
z$`k&=U~ct_lK1n{1Y=66sChMn98?RsO+%tE%Nq4?#9u0Py`18|Bg5(1=RU|VITe9Z
zidxx%q}K<B3hx2W+|RJoJu&H>!JzHNCD1it<i4rSz-z^3@5q#r7q4aUHV5?*eB!hP
zxHsOCVBYiF+48Uc#OS_QJ0C^)?Lx2hryI?1de>RfR`y#CTkp4Twbv*G>8(UY0lPIV
z65f+dYd$S3jRH-tY9euiw(z6%jhf+uKGTsN4aJMqJIB{JZ0DqXu;D8<8z4e5O3&$w
z#1!nPua8y1a`vOtf)Lm$y_5IWRU`Xy$%n$ev@64VSh0K|@odi^d#ThY7vD$@+huKy
zeHkItK^@kwoVhq|ksVbR>jTMKNR3nyOOXlHmO*>6G=3>72MylIUQY4-5j#aNLE^ec
z@~`b{N|Kbuih6{>H%Ii0yH^{L^||BzhX{Pt>%+y57n_#Ky@i{>T8Ct?4}WHVFDs`T
zMWB%T%eMYM3>{FgAo~XeYc*7?+4ZIJUKtyeFN?P&%qk;`MxQVhkv!vVe~xO27>7PQ
z&`(>}fk+j{O0#iQ1&FK^1-rU2>W8!TshLs*#|V=a7dut!!SVTyl$V$GK7dc?HMshl
z-1LJjH#}mdZY$}({zNI=5%a8{f*xjES~h>rPgAd}cJPX}W36$xE+Ah~e+4WFxMC!U
zHMV6~QjxlyH$s&I13!20dD1mf;|>@~tAh(K3_o#SIkRq@E~}R&Efjv{+`u`!@PHNt
z{VQXCk+zN|=^<kigar*AaSqK2DPrBJKdj2%){uI9B%e@2qB^QEWkAM1x^==vbQzmI
zH{e(_gmgI`4X(&?P7cPEK7WW8)_*)c#N7iepwS`K*x^(dEeN3W-piS1f?fwW1NM}Q
zbQUqfm{YKvf4{$^wbibI%*o>;e!!q%k?*5hQN6;5co50tAm~b1@*9M`?@Z;CcOkv=
z*EcsmdZnVZsdfu=4mErfG~D50+t=-OT7CO&B^&$eOxPX1!xQl0N!HxSw?-$Y`0P)_
z8p}i8Bc+z;kux(a8mLyz#{DzM?d2~@YGs}5OTdz>;00|9dnC_NE{Pg*ta6Bchp;${
zaa7$S>&Zh_%GtfBUPn3X@k5O<7(si3cz-hz($3FQE$3PrM$SY2%~U=3Q@NI=dS$f<
zPgIYW#OdNbVq_wp<tpLqnl<j>4CG3G=QnS_3k=q1ZRgJWRvS={w5Z}Ut)*~OC$qB0
z+s=cD(S5Cuyj$%yRPO?q8hApaD8N5fgb>D3@TY9XQoD0y)y%77p1bano~oaYLKner
zXR2-zt;Nb2ZPtQse2O2pqq@r2kWfX>Wu$R{dThjq_qtXGymaS?9b}epX<7*!hYR3q
z%r^E#<kFxx+N|6GW7SS4{3XVTv1szGCG7^$VMV@m>I}U|P9;N5TRQi*P|?FWCrdFU
z!aK#7v0s2`>~3D8YMS2ZAQ*L<{M?0|i(@ml@Kz_g%#wxc2~Y+0E9wn7-(%W*qZae|
z<F{I|_pEimPOW6Qp&tj>P}30iNwv~6=9!H%N(X}v%-)QQz0?wZZF0(BAvyOk2{QU|
zQ2Z+Qr`xQLjPs0e3{&yklN+=K>ES%gWJxZ~F69^xEY+`Ir+GzUa!o2|L_7pv-;|=1
z+WT~4O9EMfXI!y40R@@cu0Zy}j=kXlu_?`QAhq7DuQ0%5eBabstnMZ?a{HUVAZ8pY
zTfa2niP0D9s?2Y00I%!89Yw)i>C#bT?=A!RHe516QwP|{B&+Fl4BX;)8@@d-6Ei72
z9QY>jV>^g3uC$Y&0!Obs<mvqW2b7&T7efQfJ<IOz>EZeqmFPSC1)h|5m?o8+tfuB3
zpm@}xfq6fA*{AAOdqcn#XX77=Yr1Pi!Mwh%9xdT~fMfPFPxTwJ^g&yDW%z#A4EL|(
zV4o$_i6Z=8+iem!MIBK@ZKfnePZa*lN7ipUfh-l(@TwKnhP>~5{Y6q3pz_#kQvwvW
z^O=GZ0xz-uT6Lhk+bT)_l~|>-#j#ZIDc><1_RTT$y-%UR*XI+W0^Y@8iMY?f4|wy{
z7p7xMzkG`BxB;8r%Uww#bElVS?<-|~+fs5>K>g(=>iO*tscw!`9BxXIM_j_eIYBNe
z^xoHFya9nHQ}W32=c5Gn99Aqr=Gy6ih}6aV4{Qr>vL0U(6v#$5T0AXTJe(%S9A(?r
zeWgeepbs6aSbD<sWc2tx!>bM%`Z`)lRxP)gaB>O~xixoRVnDn+P7WTiM{N{kF0kUg
z%#PgN_dQpMYn`<gu8S3w?_({0O~6pqnh}yC)fjWr7n*%t^~$+5=uaH=hcuBT%W$}7
z98*mopIh(PB<R+?D6x73-D=jo=7%PFLMog@c!GN3glTKxg4M#!KF>GIKPLI~^2t|)
zL&%lULmJi*eR@Zvc>&G`aRsF%I|gY3l@%%*@M{YX>tpp4ddF-DNxXWwWPGG5eZnM6
zQR9_kW$2r&BS&3Tw#Xr&J}(?!R4Bh4C$eJv6Wj{JE!-!!gDeji_3<qlcVZmD(GkD^
zUSMDE(~m>0NmIqCNXGs>5B~aRET^P|_$VH5e%0!&Y`_I$K6+e9K~L=g2r;P1DM0uV
z>_w1@9+6ez3-gry+Nz&jMoh2T48e-0FB%NF!}P0}H6ib|@G-#JW$bAF@y^n5_NQt^
zqN${f&b>FkV+Qe}R>~;0ES@Zo)!q)Q($2hbeay>%v$)8EMKRWXVE<`R@rzu21GUK_
zfu`bavZUMa_`(wRdfUg7))e5D#|C9Ud9}j_s#ZOUP1@sZ)WYlpQD$ShBPsn8<--8w
zst+?>YoiqhtDeL#Q#?bfJJIoSF<}*pw;}qU{wp&jV?a?wv*MvR>_X(RDp~&JkfF}!
zcOG9m4;*;DyszC!Ua{^)EPWvWfQqB3swK_j(roWKAjNg+rAf@Igl@wrSx%J7?UiOV
z8}+y&3E{Gv+d1T&%jaVk9v#cXz*R+HH;Ts89F1oR^!JA(IghkTLzsIv&K}XnI%W*g
znqk>bcl4Ymm1b}$B?FD6rI2(e-~^JgO~-vyQZP4n%Y`dvrI1JSAK?oF&Qm`WbQf-o
zPWTw!<K){o^g(WZCA~%Sm^-V1KPy3P!fv;L-Z`2q=>D2;W9&ov$B$QAF2(gVMNVsE
zkp<s&N+9cb`ndqtwBJQrSA`KPD87v8WaW<DRGESw;q7aBPbShPx}s*9x?LN8a#JqW
zRJy;FAEGsQRlMr+2Hp?=nppOrj1&;d5=qHR)bR?UE{345A)hVWM+#!>_wQGIVbPcx
z<pZ@SSkH;XdAY}8R5iDjZZW1C`@0CGHQdP@S*0O8pyLx~!L=rf9Bog+NS?yj$D>Ea
z&8SW$IFpY3yFsRNUGSq3@W`}OWBG!|M-qG9aU8O6ds02;EPD|+b?I7`M(=7|&mGCX
zp&(nt9v{QVq_fW&=8dbn9jvQ>^`u8p3y#&+Hlo@H9O9dMcdJulA)Y@>V7g|ep+2)p
zl*{<wXT9IHnuRnhwUO6?B$R>$^>Wf#7gpq@Yw(*_(f+~LBlbF^BuN2p6mQ>bv{uZ<
z7ol2BH>Byd-%RI{0jod|A4EMdcjR53%Ip(feJ~>LhufoF^XqniZe2p=NjBXg1iLh6
zJyWulFYeyd?gR=Z<{uE#lIQUbHW?Ke?A~0ed3t*&VqH)NQ62KdA5#|C=B5Fc*H>~w
zr=~XC4)&XTPfCOYu8<K+Uz5Z`u6!q}yCOnba(F?-D<OYO!ZFM^uq`#cMt74F5eBuO
znX71|$ZUFlN4HpcZ`^t;b+M#IlUkk%cBgS#-``)Y&DG!Y;AmMh;OIz5qV%;<Gt+eX
zn2zHo=433PoBRExg81TSDFBt9-hZco?<w4=9ZgLXy?>AuVn#R~;~Z)Unw^bK51YYP
z={Su~!?(wM&xfQS`{%U@ds5O`kB7ELUsHMsa%v9O+mz>5V7;t;JuxpK7^3<9qc4|k
z!cJ7)pIcwv!lZekC7w7^C?%gq)>=+hSr}rK_-p8K+EI$;h+EeW`#VDr<Vjs`0$K=J
zt4YEmq}NAJWqq~(Q=-xGOF!)|_D}Nslz<1svmn~b>YJkk1y~gGkmy)$a@kmJpxut6
zPmeK3-LsSwSgyE@A7}D)2L;A*^=SUpkYbB~c+@5y6e4|m_%jzC=65r`?u?TLEiuu|
zk*Cb4k2mfC3;kSUo-Nbe3mafO!x7y*$QY{4l4(3F>=7q5T<%=$Dd}1F9RloC50#+{
z?MKQQkbKkrxP2Y>q19-@efUi9PM%%=yCbZ$p2dn#b`!*ae&S0+tLf2)ZNoI%dJP8&
zPL%(9G<hfd;-g$TTc6>+mtKvH6Au~o_lpa6ywzn~De_&zOR}_XIlcn+KQtf~CwQ-o
z9Mti;oZ&zVUI5?OB|uGKz0J|~mtrV^Iv>GE!vlfBDvR{a5}}mam(P7sn?(sdP6Dtb
zKg)PG1elxY8N```jK~qbgQR{M^xby88<e>_Rt((%f>&TFOYN-iU`;W;%uwdjp))6n
zryCJR431OwB3L2Dif}Z=eIJuB$_qd1*T4WW<)Lpp1=1t`ISU}o<wH!>ZPp8%u7_wF
z<k6~6hH{e6l4P8wkqWY8E(7k#aWKTs>v~AwhGWJHcrTeOVm<0oYx51?&<kX7pXr3f
z-6&6#gPYsSnC4_SbYcCTMSE@1BKN);d_mk}o~fC;tJOZf-KvXxvfWU)<?tLiq*0UV
z&eL2l0Aj@4;ujFmt%P|mNT)dzG113oJZvm2POzP@7u6r{25~f|YMfphvBymL9RBJv
zV!eZFzMBPX_59LWl2v(gn5ie?&CXWve9O{!=m`qhzUXy<J!}4|IlE7H_{-ZUNA@m=
zg?;*bpmu|SW52l4x53@e$*pb{FHZs0CL$C@B#1`P%15a>gfJg4{BY=*BZffu4Brvs
zt@a?XA&J}qr9EL9R6r*uh>CkUdpFdKJ%Ujs$@haL@=>9PwPCamhR^{`<7iva1~gE!
zHoWNyIp?V}DQ!feF<SXr@)q$%2evC0iokN72MP30)gthF2A$zE9}Eh-odZNY>N1%{
zN;0Euhr3JOFb#g{Q8o27UY-<UulD68sGJ|VW+s1`HrlF|813V6b@kx8ejAoa)mSFB
z!k{12H9#~2d@f?I59l4Ftn3GGISeQRydUbq8jhwMOeH-VX~&yKPC^Iv^74nS=UX(J
zOl<W3QR{7d;lAz{A+W^=CPffIb<&ZbR#e?$aF~zrUfs~*h$^s&Q3EPQILX=XzS|b&
z5qq&2`O$7??!+bQ%6CSWf$eo0Bm`YemdI*sMZ`su5&1@b9TG!qR%-WfP3!XnQajir
zPyN8=4}0EA(u!~;|M<4N1wUTr-sEd=lprQ@y7g6Fz_u<jSy;C{tC`!%Jk_NhNYVxS
z0(a4GYjWr2t(-Y_MQ&I5+?V+qvT#Hk$xFEs4D7+>?o4TlWBzL^!=FhHq<4$i7?sC_
z6Y;L1__ym^6kwQ0GF<&R+&-HU$*}6p627OzFnK^h|0q%(=<hFj-+TskLWL-Rnj=gR
zb5I|^MJc!`)$^bz%JXG@+*Qr9d}7jjAb{I6)~Ha40@Fo9teqNk@{`pksS2`AN!zY0
zZj5D8J~=@SM(WY=(kQTD_s3+7#Oh<@fn$p*6yTC4@lA!@&!Go^a)8?r%4hL{)Nm+c
zalv5_)NoaC`8MKa>kCelVQn}&cJ9c6rmBdtqMTU#fGYDB3+cq?82mVKQ=o^MqH}H$
zE(lk7Qj^!@cADd2P!Ek`tHDx2#1e2X)Euz@`ARo@3Jh>N#_pG;m5b7t0OnlsDfBPV
z#j+W!fHe~81Vxfc{n&-d>IuXeg6ZDvDu&dT9ZGlKiZfh%3M-zQ+3=1T-!O3;^3S*Q
z&ALrH(UO|8=L{><FV96_cenH1W9FSR_VL0t`|i2@?0@u7T)uE$eWII18@smPx0qX}
zN5J6G?uWzM6U`~8eU6*-_aEAg39ore8{JvpPDD@a%pU)c;32QRuQA4|2-?e>8Cj1#
zmSakDskSKMQ0?EI=7ocMivsd-#T(wM*N_z-d~BXZ+!`Hd*CtRd1`XP#)9oADA75(4
z25aHJ6|NKE>pSrp^4uZU-hCZut)|!Q_nHT6IMAyFV%V&P*6bY4@PTeN-_hA<Q+0_h
z#{C##cJN)H6UC{J1o6?&`ofsM(C6KAeXvz`8hqvBx#!oCVBMLDi8&PQfP7U642Neo
zFdls)fb!MFhv1(ahd(uHdS3v#H45o9ORHk5l|&d>%MlN0UJ`?oPuT`LhT3|4K5;GZ
zsia3D#k~}WdgWxWS6}^naoo;vu4OAT`}moQ;C?&V;m+sLTJ<kE;HnjY6IyC}3V10?
zHR;ib-Qf9{(7~mI@}08U`JoKgkJM%v>__9i)+^OVwX58!+l-sRSBm9~o+))qPtB-q
ziW{1-q0R%DszThtr>eQP7(PGqTS=*WXpqucSn#b8^!SLL8YM66T6i>V^8A-r1uC~3
zKfA%O+H!d*<cOlfz%uFQ3@h-htH9@nu2+FIkL~u8*aOL7*sQWNgza0eQ_jdejXSkl
zv0CKzyy8y-$dXLeC;<00x{ob^PprNu7GZ6WSUJ61^M_UwPY<bO&8LX2`kIYfl(KRk
zSb?rmYt_G23)ff*GL5FJOmh83X8M^{%I8tcz=%<TOsEzoriK`oxi0Ij?%8`T=`QT$
zlkdX~T{3XjPKA3qD;|s0cH<Qr((tUS{~h)GRTTmYBz7A>HLA$1*Yv7GXriEFBH48o
z?D868PNLbejN6p&9>+1UU6V?m%&Id`9;@$mXMMM_-T9szHnz_}W6xoO58i~n=+Fol
zqG3|P>ch4k8yIwe?!3`h**4j@f~u_`-x!tIsA$35E~-wD3)6V56fHcC6O%G3XGg3>
z<>Mb5NG{4zfG>!#kK;fjhBl*6l1%nRwsKo;Rqr=SU+Y&rwIBJYRjyMr!9s|fqOw>=
zH-09@kZ3u~;=fpV=3n{7s655`?fH1KAHL(ez40)EO$;#zLxy@;^}8D#)*wE-vDFt_
zf`ZxF7?x53?YJ`<wI@E;<eDcZ(GTo(=bAIKE1vFBetXe_bAYKdZ2$Z03{c*ZEJh&j
z2wz+m$mj0*!4V{_zTv%m3%NHV_7WbV5>E^O%U2!?DfvGm+V?94_LnMWWA|sx=(8gS
zia$keN*5)&!j#aQ1<Hb#uDyY~o`}^g7h&(UV23xqZzNL|GXJ-w`kQSe*dj1@Co`}r
zqF5ATyuN}F3CA>R{~W3I2^gUc9iqV-+BKQ-zIz|l6B)rxRBR=Uur+^-ZEa+`cmBi-
zu^E7Tiv@gRk}S(x^@-FI4aYR*!^dNG>|HTARso{oKCx{RT`{SH&?A5a>-_GbwZ-El
zM?7d-tU9J9>ZhxkrEg3F*<g_3%Jz1oTSdbBL#X?Z@JElzG@T?tQxM{{)R?SPsmfhs
z&`6|S)QqYKU6xusP^@U|#ipg-KEB*B-5A;iQMZ|uX);9);-FL<O~lgL;TQR%qHX5o
zg3{C?RL}i8sBdK3MA_x^SAK$96eETy?KFzaz=rM*5hKvl+rpIxBL+0Uel)+}>LLdh
z4qZY-cJCu|hF@`#mVRQ5>6^Ky{JK}~*`LsIhQavRG7G1nP_Xs4T8(>G9Abibpr;kq
z_iI13IS9EfJl`&}kKXkiKZneD&R(m0kTTO*$*%5ct@O#)+MUY@(w-Sw)%Z;*k>K;%
zSBtYIo3>$WChb$zRmR7r?Hd}7z?<~xPnXNx7jGvILN>zr1k4<eYp;ZJZ*!lH;9O9Z
zPS4=!2@6)ITV-9#unOS>pjfdW6)b}qn9x7%S`nY8nG>AEs3OH}x76f?u-Oj3Hlg62
ztK4q0CYCX#Q5tJZg4l<45X+Bk)D0v>&zn_wxmUq<!mQIWcf|ry_z&2#T>*MfBw%^3
zhB@Vm9JlIE9HdeY>sh@%wi#X(a4k}hE{WZf^7Dc6%67S`a9OJ{@D+m0c<gP=2#riY
zimwkYDhXZ^v@*1EpHq`YHv8Cn{LTgX@8+ddZTn_ayG5{)=PaYSf<Bwt1_}DcxnNai
zssJCTQG0!@aapE}5uu=(2EXl^)<#x2Y)stSd3zy`5F8EA;k!*Dpm>K9!@_|W&lyhJ
z-08*6BxoxDe{2jv)c^<VSjaPZBR6hVY?Cz={+mp7K_p$Sako%RjeAVF?-MG2>)#^4
zLLqt7BvSmf3QH*a+1!QaPd{SR>a()@lu|u&b4F!sqhRGJu`=ht%;BxQ7enzWZF^zo
z($1BJAAK?It)Qyv?1k)axM#8D#$9%~b$C>Btg}xBKtHooF{TjLl-3C`nIs073P|~z
zeIUNR(v$BfZ&0u_7YuJuU*=Q2lN4W)O7HWPldHBZh<F2|s2+T6w73SYg?9w}rN7{Y
zmtSDUMq9~`)+iy0kWbO<uHE>Eh5*W|Fc<AihB1;o+RyW(?5>ob!Ngq|%3>Iq->VMV
z<7EHTy_3-dlROx)99pNMjcFp@Q+6KU{F>MR_WA&)j3mS9YJW-LL^g7)8ll&3J+rI(
z)%+N0>PJdVZg8l=+(m!!im^ODs99qa%x*SoHta|x!l$yqyz{%e@Bc0TOE*53hZvvP
zj?fn9b8U9oNGGfuDEY$8`BA^Jp`P`VB#MdENBJ19%?HS%LC$gdcJ@t^k=5d1fe9Av
z2RjzFtsA3+VJq_(mweJ{7X#^eko6`=D`@Rw=>|66MiyR`MSqk>uV-B0qhW5oB}`kN
zjbvD_Lr*8vzPtU0gxq=jp4dEn^M$wDf13JTIUYaDL_1mpe;AAY?X>w(P)JfrTX8!2
zf~0K-c9L2(u=ulg#tqtU+7%t{8I#AxugGTDja}yjelf{V#n1-q_j&!dL*)0I;`1`<
zGb$kuouUONdgwftIHI?;HJq}pG?W-()Gam*svIETWU(XOB)L!>;qr8Gt|x9qg}3rp
z@IHzpR<J2DFCLya<!ys*p`TyJVAasF{21N-bS;Rq@4Pl0A2^IsQ`-sW$A#;?&QjeM
zc$yJ^RZkI<waOROCjQ+e<Tu9^ab%U@yg^CuU*7#?N(lji6Q7$oU2GpLv~4p8As(ik
zd=~mf3d?PM)s#9ZPPOtET9xN=xA|-Hq$PGteblc0KIJ@mDwZBCb%*a{uh1>Bz%EKn
zJ@1_o`Jr05QlWB*X94f(+vb|=6M;g*DF@Qxv9s(8Wiw9rIP>E?(P6teliOB%!{pA%
z8@b77$_AFmU;%qiy@t=AsSWSOg27p3riQ}!1?BBDXHol}?ukO96raf-Uy7aeCFDly
z)~gxa<%$Kr@5xPn*M52SFMB-vYmZlXTaN$JeK~y|JCqy#(nYA3q?0;H={NQ&8u__1
zPo&*eQipvfb9`@cQUY$KRKEB?EPh2NED^zysck>-S(k%dQy+UK21}$ym$$XKrv+vp
z^IiAT|8UR0NpE<L7<o>a8Cq#rY;nJF{~LTxq(^#=8^Q@xHl7cJTZLo|t83UNjoNM8
zc_(7~)D{Oo)(qU$I<TxiqF+_W0!<N+H;I0?(C|1gMd6Sot3ffxXk=}N>)kUbq5hKE
zn%+#wG(-#MHSK*r%Z7D|M~Fw6SF|g+`3E2RBa;U`g1)j$E;MZte1tHPfj4D=rk86%
z^QY`Zmt%E5V^|_0LF>!XzFYg>TiNuuCl_P&Pi~^?V#%ruPoGcm%Q|EDb$Cv1ZTiE1
ztu30_p#*qqw+^nX9bEk;Jb&t1@oGt-Yhsvn**pNT4rQQeefshy8SK22=C5L6>zA{0
z!EkI2511l{*H{mp87^H|;!D@Ymfv%6PS$g4&evp1l@rAdrf7qtU7)J!sPy+?RyTdd
zSKjbKR9o`)EvZx7YzFuV_9BlYIwJyE<D;T|PPY6$PqH2v<d{_t=?*QwG*?~mJh)iB
z+g8NeIha7~<S8RdAFG3V^6H{Z5=UOl$PLtNQp1gU^ahi5nkI-BEo<b|55J@1?c6rI
zcI&I5zXdVU`q-dy+EpJT`#W_tg7I2UOE8mIj2-{bihjo%ak;5-wW>c0)Y5A!<e7?2
zbl=MWHLtyQQJO{ux|YxAuR_?iv*_%1th+4at!|}^sJCCItHtXyXZk7NKT-ZgVZ+}1
zBJr|khQ`0v`V~+k3(#3mXEYV{VTl9#MA!~vmfQ0elxAqto~3pTUTXM#Fu-tfi^S?D
zA3dv|8wTLWmL~X?u2ixq@%l^F%z2{c!9Z<BqQE6tc$sGWGAEdP%aJzAoUOF=0pc^I
z+Flg2z{KXMzH1XeiF;@GHSyH?ieQ1Kr%jel&VJk>j;mH~`3!)*e}EV&hoeWpm)5yT
z+j8KJD&Q_5Uf13_f$}>kLpBext~3pMW&|A{^=8A5Xu$`_;|9DQs;9lCwEbD?6XyV1
z6BO58ebWZ>awuyRYMCkxD`cUk?2xpa(u{8Src@>4D_hyPQFrfl{`%-l)-}4iI5YdP
z2+zWCfOYlfgX|=|g~je_yI;LKob@M{(cB#;n;(!o9enn-_AkwHy8T{V)C0xUe1*mE
zaIhgaf7HT{dhM;*R*TXLS7}bEsb7y04-~Ny91^2SZlV9KACTbcf4V4oBk@XM#?&%>
z@>b5M!Q-cTCjY|qC+8<r(fDAmhXs$;uTvYSOpV-|r~zhTQ-aHgS0>3zde(eTRSp)8
zXKA$k8pX&$see>@tKuwAKqRGn>2vnhohqJ_i?lnO$O5Ua8K-T1MR`9ns^k^rkeTn+
zPQ!k5wO?g*%wxzeH9P^;47{AgHc0Xi8%e?tw`QG;q_u>G%20W5qmL<r<Hy9#O)+dr
z4~t$z;z^;7+lCb}pITrGvZaZNypD7(I!MVCl%sdH8?7N`u{^=_E)fYrX~R0b)a9nC
zD9&;X1p3eptn+}EMPr=3JUz?_`0BF-Wcjs_ep#No;s<2ii|Kn-M`LMv#x~0+xnJR*
zK2$i)9$B#zr@K&hfJYBq(&L`VYh}a7UM4mMKKK_~{*%_?YZT|3pI5noW%~YY4d01-
z>r<yvbW?Q;cNbYVr`RNw>hB?oxuV(-nfvc{{6PKLxYc)4#r{@mW^l#Z*_t<WskNWI
z-orjrNiO##F%MeRcJrI(45&<#$1h#A;CNQ>9e3Z)=V-YPfXKl<6ir4Ij!CP|RFxi`
zh<{R+%=TB?JqiZ+DrpS^an{t1!8E75ooKam&<9W&-iLp`zq=#PR<V67i^;KTT@c|a
za#+xRhq8~47z9U_A;-A{jhxioy(|<~bfeW$b$!s&<FDH|>I`bzU3-VmL@KU&?8d<z
z7ZmCON8G<_n($`2G`bXEJQrF+x4aXUZNr>90Q|0UCpfE~66zZfQ8sRz;N%tUr(juu
z@$>*Q!pVKz1`z|P>K|8VG^}}t->VEgcxmY{h=GHrpB8OB75`xt=hZiDp64IEI_!F&
z$G;qfv~U?USGx(`G5J-C?I>Do0SS8f3t@0s=sKdknOOSKEOwkPRX%a49bWolu~sP~
zE#`;UKsEz0VT5bgbJ&rU{jch|-`=?&&n4oqINj+_gMLefcnoRP@UgU-RT_TnhaQwt
z;s7id&19!_FuW`W#qGB-K7uG48Gy{wB&9`f6dM0dB#TpCe63-C%_FBjf9-V;s|jIS
zfrF6}-tQDO|40fS#oUX79iEqhyJV;zvQ`Y@jr;{{&U`QMa|K!M(Nmxl*nG1r#8&1e
z=|)CH5SOK17#)D0E3s-Z0X4kkjr`>XJ0=8VHm?V8k`LWBgWP&_;g?+fc>Mj;c`4C4
z>6n!+Jp!=3KCqJT-EzYGv*pwQ8g53&ga5L`&_@=@$fw^}G^RUQ9tO;$>Ni>a$_gKd
zQd1H5!yL1&)IJ4Q0VtC#iIv8-Hui+TNOLZ$`aMg2e0+N{jwQwB8!ZU5NveZM*<x9l
ze^*QiT3i0+v%p8$=0PN`a2LzBqBWu=gl-62{%(s}??;{2Q}SKsF4hNBe@_J%#3!D3
zFG_O?9~V2*%8G7?uniS4V@8aP{vYz*Gpwns3m6@-A}U4%q${AIqtZK$6ltONDoAhA
zJBW@Ny7XQILlqFDNJl|>?}QSCgkBRuhuniY^9ne3=9}-=eeNHgkdw3b+N-a%_g-s*
zfagPA^m0qwEd3pCYo7H?pNhUP9<FPT(lODjkNu=5W_4`C?WkZOS(w=z9pDVTj49XV
zoPz1eqEw~6w@ivy`$%jhmiMHJ{a1zK<(-U48BkxN3tgU``O_JC<jW&HUbuFVV>+L`
zD$#w|zM6l#)P0uadpWiy#>a041oO^ySQ$nxhT|l!azpF%fZ%6AbA&<bs^Tn|$@98}
zxc+eRn#D=Qu-E+D&sx6vgRd?#sVNUtpTU&6_0<x|TPF>Wy3Y#bQCZ)Rfa^PrE5$@!
zY|_y#cyCxq%j6~Zd{L2kyJc>ew)0`Z(PMFxcz}U#4%%PK{TBViVaO%RfI|UuL2n~{
zpC)Z2JILhsB)?A#I0<Vd)fPch2(8t4tGVq2hI2Z+^qT(NQM!t!!i6Lp@D$oI4Xice
z7fU_)0Dr0hBN(k?{(ihkbx_`^Vtu4~s$9R4Q9W`WEnR_KHhV>|uXJ%B2KRf1ulRtc
zVOU31MyUel?MywTo!rK<defyYJL^~dsQQ?G9mBe3g)Prc3oUmzTbEK&u-G^EmvlCT
zAFYB1_(F54*A?@rG7v{RnsZlg7(8RUq&8%!q9yY@S%ETRNW420DqW-gy8Ue!dsS+r
z`c$TYepUIiiQk@Ie2Py9iA^um^#9O&rmu}n!EF+6zAh&>M1Zk6bx8i;7PSnp>tv&j
zl)>7BE)QvZm*KyT$lZS6+xIDYEKJ7P4siW6lw@=rmLALGZ9*|4i=C6eUO;!FE=Dc(
z*>7(pHkda)0DFceYQat4ww-cg!JnGpR--HL9@|M&&p(gXX>7yXZ9yCF28-Lw_Ma{9
z0Plz8YvIbJ%dyLEWC>(&t9H>A9wMfe+~Kl@pWOKd2Pgc!ab@e=5W6ggbyU@$m1|T2
zaAuWw@N)4lv=BM*gFdsGnPX+?P$aG@7vZ%5XJyP>Lrb>aOvL;1>d%_X*N&zz+|tcy
zp-~_D5@Oi%HQS1#5Gid`{OQaWr1YTqXIuN+Kn9$s@eVVazXqwQiad$pUdEAnIVn%^
z_6twj%oe}=@Hu*EwtQ+d&ySOzV5xnMb$^5{j?Y+Rn%$eL&>NGN@hEaf*E(;}WQU`_
zQ!85ai%ZT@_niEC@@#uWVw|dw#+Qt8ogFGf7YmubUDi$8)9S`78Bx`f@a4S%?X{O!
zosCwPM{k)UvLdvr_tK-P6K$Q4{P961v;8y<WBKFDyM5?VmEF9yW(}grLBG+Gp6A|;
zzfZfkJX&Z7aSP;Tu$U#@`EYZPa-v+{r)$fbDJ}7F`c8YS&FamHvJ#45NACBsY8K?-
zs$#zgh!jHbzS>&kHX5aL9_7V);&)_1Rxhr`v!cYFx0{45w^AexZr3mYr*|n%rS_9G
zC)x0-JbWg(kQJE=6m1FDS|#|5*kP)S&M^!|PpjCRl(2f`M-{)9Yb%c#l4Y3X)FkMZ
zUzlk%Dqiv-mS^zJ){p~5F8IYEnD<jOF00;<)D+T0@}BZPLVqUKF3emN`D6QhhoCgw
zI<1P0j_j|+c{7z!_;ELkt7W`a!VH6DSYy&WzlwB6>lBTpR3x}h<Cm^Xe-z0Nuq?#*
z)d1|s?hKSya6T})<1zi%V$wwZ7o@tKyQDgu+8mMkYC37J;V5Sro@Dn8sf~}Fm5$B|
z`9ikB6Hz+l6&=9ts*aOKdnP(qCQ$D)jHC)qk!RVQ0%vzJqCmc1Nr+P7Ssu&V`Ajhe
zv^^{jv(zy4mkHGEbxTus?vP;<>0B0?R9Y4x;%k+E+$h725Qs4*`$g(o`@r*G%u1U)
zLWgHS?bG`#YAUjdZ(n!yp3Ku+ePNh1ar3he@iU-yYy-(W^SZ)})YVd;x`}ILx~mRo
zywRJ9nOi9R;@drZ|3zX9v9lF71B&>wlK5-K0;ZwsWFEAJ%FnU=)wkB4S9d9MbexHg
zh?{Fp?K65La3ovt@w_9e)0l$|g%xx0`zjN|f?-Af#2c`glbI)ZC@%L1ZFG#^Zxum2
z=#~^gQVgYJ0m)M`w0GC>)rlHp0yU8C_L)A}J9^O%_M~0S-kO--;F&H9Q=)L!r6uxu
zh<j145M$x4dFVaP2-9Bof+mM^Z-23wcr>s2t;*Rsz-76~@;mhrD10BXyqhqgaiO5G
zlUfE<^Xw!q4d;ZAH%PX=KH>%b?6&@;Ex+BBkh!jyOI2{JQs^(i_;tlKpwvh01*SSo
zrM=1#I{)(DzM>~QCPCTjo%(2Z)s4oE<1_h&-U8|=g*rFz=^ZBCk>&sR_l4H=Ze#<l
z(^7R&e8gJcAu$B^I(z4l>G9cXA6$75_~sc(<A^*S2jO67gGcA-8}d2s5+<Y_xj1})
zhirNPMy7l3DgF(Je>1^{^PCq3PfmXL_2d5~6%RgTzIjr!aI$paW3uT&s>f1MpkOQ|
zUKV6fc=2opMe*;!=TYoS-l!xrk95HCh5v@P2PBDRGYn1~L80$M_IH_1&oAZ;DpFfW
z!UZ2Lnquu=vxXmoD8==J&#-5y)i$9&;uWj=X@2S!%F&YqczmY?b5|YmjE~nf?1PG?
zN^IZbIX)B`k6>_kx-lB&!r>gWXkxy#?s)W+g)jf*FDDlHd8HhIq)iK3ZB_ol8_uIO
zM33hEj;!rp{VVYb*q)Q<YMxT~S3Gf;CO_oCSNfpP=9TQCU!2wdTfsZ>An-b%D+5!)
z+g~4X^N;-Q`@afrYxlB3DxOu4Y&77U^pzH-`Xp2M@fcm?ot*3Sj$!<IG_Xkf=3n^B
zM_C1^dOIOyBSGxan0IWH?`!uhJSZdkHi7e>ivwQxz1Agz(9n|mp#Gvn`jcrBS;1jB
zs@Sugibpi&bp-){KeG}Y?#chnw0{xP;17Z|VT1PTksxp4$D2UKr-&zKGBSmq?fCzm
z_E%-N@0WOhlg}WtOxgEE{)bfFuQBQ`ieULonKa?E)o-xP0=cczdRU%em0a%@E$@+B
zfsw5xIC}Xx35Y@Idbhys&vJt1Tb7~P%2a>B^(SXAT?h2S`IH{|<C)q&q56UJ+Zw*r
zzD&=oUo3*I71i2gBtLql?0z=0a>jO#b8sp}%KXnEe?rN%C_KN1ZYq>l|8evqll}Cs
zX#YXFoXg@@rK5j|o1fnO6Ok9>0d~F6iD?0jZT$J&zkT<w->DJ-#S{B8he7{+F}DQ|
zY_MP_v*h#t48}PXIiS9jYMP1UQEB)K(tmtVy8;w~Y*`V^r#w~;`XAr@3vdN40I4cL
zaa8{oIITM<OZ~5H@vj&7|3e1Z74g_OalKiQ`uqnIlMQ<X2~F@H53VTEf~5??tphK{
z7`$sXsC^@OJB*hOKkx+R+^rY`KS`42M}gh=r=V|GZv1hIA>~OS8{$Rm#^6H7*n;fx
zl2!Rg;@0{7_`JEfxxD=8bqSX}`9HhO{i1)w0_O>QCV?Qh-!Gp1s(X&;lb_p8j4@un
z2NPJ%(fogz;qOOrj(c({GErht>}a2k!%b3t)t}_aZ@x=cU#0XUZuGzXPml}9i9T(C
zq&d?bYAEvSsg8jo>T>>p!feNmh1!3DJn`1`d&U`x0-$39|BTze89GJaG<~Myv=<li
ze}TLfLi8%7us1Ui`Y)zBMCt=@x)SlOsFY)><B0zY<OUh|4-|&Y`ygV+20qMHzg+nY
z1Yp!LH~n`1{{s2D-c$6M3!E0<W8Gf=o<0A^SB@C~qb^nEn~47x$cM}UtT~ex6a6P4
zDOVPKD(y0^{{BBf{{Ln1o|*43kNsy(Q=nhzDyE>;lrGM=y2xykGsXq$naP8D(Udsv
z)sSvj+UCOGks`Vk+k1;1MaQY_7!T0arXp?X{y)+7*5zx|L!O$531jx4h;SH@BZaB@
z?1~a&l}SRPcso0bMW4j6A-cTqP)qS)WdBV1N&Ma2MVD>mT(zAp*B#URs`Z!2eI0z(
z>kS-Nj!9dsC&{anyUEMJmjBoh(c$`S!FMR#;)s&Ak6j}#16SpxEJLW^;|9-;$x%wq
zfoj1na`F9>YUyITz45INl&&tpQQ%T44o2D7;}ZC%Q4|<%M}HgTl@8spQT{b73M{u>
zzKx=xbF4$*zlY^^UaijCR6}fpUD@I>E8tiR@JL>D)vbT*0og#>l#>}@QA`+uFv|9L
zn{-$8&fcL$Tn8L7MhjR??WH$v$GP$FH@@cQE?lzOD%HR~YUR9SEl@wlXiZQ4KfUzh
z*Fto_c&0YReuvvy9YrZ%Go5{hwGaAbEtYE?8>Q-C6qlvq*nb@5ci&-+gHfIrksn*z
zzlX)pH#YwO;f9*IM-{&BI0&%q0vmAIW8)wBY3&C*j&B{nCJW*Bj!NGQ8DKJm9CG9$
zj<NcYp9Zqu-cwZp$W@&!bYzC#{ttlp1F!cRFhpVSV_lyAJutE?x3S-VDafq)zX0Y9
zp7J+f!tLjdnb-dVVA#60zIAHTQ6@Yp&!Wrc05>Sn!#e#am`7PJh0YhErdZ%EEg8Xo
zOclCI02Z!u)Pm?x)4roa98fF~U$)xczh9X@yf5G=NxK99E9*Y37ti>sH-83jM#CWW
z0KYwV(t>~f{y^+~lMXnKqZBUoPn;(@jW_v?e>;e=$2#Gy1p^9Wyll`!`Us;PmTv&k
zbb5uNgC(VlWpW$^JjE1nTGrZfxfPFM{^;UwbNN>KOK0~B7fv$dWGuXUcid6wVgbZk
zvyS`dB*y@}En#40&?;cmAHiDIJ;!KdrgBVGoQeYg<9s-DY~9C}*KmH}o2oD}?)$|k
zr?=ynOa2Mbz*=+h+=wIh#bvxB2dD-5%H<P}u3z+tC}0pLypA#YF;y<hpr9{)6V?eC
zYK9!cs&5hr!^YZzT+;FJ|HU^34FCt4$eR6jY}|juIRL*c0U%3@JGSdbS@K`J{F^Ts
z@OJE-*C&o6`Coif#R{-S>2l5R;VzB;W@6wsCIDj@H~ewt7^;sk!!JNtD>)F9YMhDu
zF+TifT3(JR0Ny4eBL6S%Uhf*g^Qsc^dJjH45rOuXkCwLD$`G%!F8l!&(i>Ku+TyuU
z>)tc)iy;eZL!3He=-zNOT;b$C|36vE0}QSO0x%|(_1`+I3dR@|q)}ciu(i6`wp<nI
z#t5b9#sManOQRe8K}*s*|I7uxY@}wM<`&WNy=#XN3q+?0RnAy;i(+``w7YWwyk%+=
zS249M*l=X2znnuO2H;|nDKCfq!;cLrM7jF+jXjJzxGiY5XW?G93GcsNJklTZFd#o<
z=3NTl5VFjSOivsZbx~=Dssq(m+4{@Z4wL27Eg*2c8~bYS2vjG|wJjdlC(Yk%PelKV
z9<f8PgJ6vDSVz)fP)yD@qbsn{$Z&X$UvZbO4JlwJ7F~vR55W#->?xCjHmhrG6ixqV
zLIC`5Tn1KkQt;(rPV(KNzdiD;A*t|~?%?mg0^qQ4#vp)IO>OMiIUNSwKpO9T#W(Zv
zNB%MMa(vj7mjI>KR@!--4ymviVc%h`1BY*HWBfMVp+~<I`dXdyYwf5n4Y!Vv(79Sv
zPaGB|Egv%+o11i(Lqzg#6pnAw)y?=AY+}Z3%-0!#%LS|xQBV<a9TGd`ddC)TyDmgA
zC2);Fzo#diU*t5Coo2peS&_WtUU?L^>UZc2l7IwFYz9w*@nLVnclJC|RfQUe3`=ET
zvp2IegpAA4!G9M+39+v{z3e*5!k}~2$dhg2JxE>dVdb;bWdZGNeHtfxCRPSaQCzaQ
zb(K#}+9E4v&0Uw|J6XDD6>PF{4%74mp6>Nm>WZ=ZoPe9@B_Mh9>3713<`W1`M8u<+
zs9V}jEfK%05t{IahSQRWGgL7u%(a`xbA6lPZ-w;t13v%=A=m#%JnyU}1p7^#=rE`J
zMX;EsqWad&9w9<*lP)ya5&`Pl&+8mnNt&1~C~53${-{qjr2Eq^_jLn81LN`0^Q1>u
z(L~+A{TrJ)FF)-*BnW@;R?6ThX_$(is})RX>vI9vb`xG;Td^oyWukB!(cV2kuAQ2z
z)>WE03pLxdsh>+tg#R_n)Js5dZrRc3oIbpQ7$@JXyo10&v*z@fUwa_FdRH@H(nfHY
z(k#0&3|IL;Wo!42<shP}d)z30!+(p79ctVir4CeB0iTtiBwdYy643g$RHd!f(2<ZX
zQd(Eg>D3HgA(ih!u>HyHzM4R~@oqrOiz8_mjGTeRHxyKli46Wt3V}c|B_f=yBw9&2
zG(*W<qO8Vq6~Kfb_N%3B4Q4%&JNw={;R!g@31l0>%QWExil^0TO-ubsoEs#EZ8hAn
zV+D%0c*p-G*$xZ<H7?m*l=}<G-#$1Oi)MU-L><96{cBC{YXi!JQ88y0wPh(W!KSer
z8+%hj%G{pFP%y$?q1C$|<onfiYXe%D+ENmley66I(So_LDDy8v-bMffRG10w59j_w
z%XiY;cEL-`Js?S0#Yu@HQjgIxFxMHIbMtyGusor$AZTd{U{JJ_K;>4P!Q0FpeQ8+q
zS|o|S_d+wpxJw(HVk0rrhj4jEylwC1X$*oHuBY3T=l7@gWk3q}Im4fMYG)6dl3E=C
z*^&c2H?@7~+L4yaZQ8yy!ORsb6PlRy^Dg7h3x?bVI)D_UFIncmr$1`}P+!iJi%ID@
z_3ncTQ&sk8>4HmRQsrtQp(kcrD1LpI62!}@C#?Tu_0!W5iQZnr$<uOn(G9`)ddoCB
zkiboqdU?9MfDhagpP)2iyO#AvwA*y)+DjB?tC=Ti=sH3^+@bX$*-@+Cq1_iYRIFEN
zO`LdQD&fgF@e)KTj-2nD#NC(}g@#BzdVinp7V}nF&+gW{hnuBoXV*T@2he&`+IiKZ
zK>8t@jnIS{0mbI!#Y%KLDP4zO)1b3>eJp5r@71~I-%&J!=ez5B@5T|uTds3GKUGNz
z2EHoaJIDB$P3tiBL<i4PQgiHx)*DqgzetpSYsnJKXI>|x#J8xL*Z)G9iSToD*o6!w
zYgZ)K#p!h|zMwuMpX`AbGAMYIQ)7M$VKYRF+O3D`<=_iunVpt95K8S_u?vGQNU@*i
z6}US11UW0MXIHu==2mK>zJ86ANpo*3`T8E=(G?|icF-4qTlba%6AVJ6B4MH@-D;>B
z$-|<r=&t!T6FrWlID^|ua9OgGj4o(<#82b*j6&*4klm%=pb9Z0{mUtP6&3p{Vh(OL
zDq1^S>mt0|<20*JB<{D)@26oWC$K>vb%|Y>`Ycdl6j-v6h}OTOz@-RLY@SluunvdH
zOV%gn6GcT9Y-~a+<Jw&PSlxEHKpO!~_+YoTgcVFVU~{fU3J!L1E`H!Hskyp349(yw
zo^JOfMDGqr2-2tayQvl>URm9R8iU$z>W@4Gh4?pNnsQJ`haVn9Egs2?cJ1I&&tZK3
z=6mThnu&=}yJCN2RnE#N>>wDukz(_ggn#>x;&F<aOnECA`gx24X5RTGheaB-=C8y=
z6+4kNqa&f0YWzmawkdvOK5{!)J2flqRHSs)ZnKh)yKCSpX*;WZ3v8(V1WZI-LT_g>
zb^*4xRkk2*KC9P?zW<I&DE<ob*#vU0kbti*mFauyof<6nZopq_F^Lz-k+T)kk0JN8
zZOitg66rruy7X-OWJbymUL&Im(GH+bvfraDet=|b)Zp_4Cs#yu>ITs#?eZ6GO`7J7
znHuSg{DF!orU-tERVU0ALql@mq(+}D167-)?v=65h2FlJCAf!m*Qeh^;>IHOU9mSU
zL$1X<ek_20S>rL@s>^cZqKIwAW9%xCv+?c%pIv|UzB;uBdfNqIzcaO=ySe1WBli$$
zv$&lWqoIe7?25t{zBQMN4^o!8R4z}MEJBJB3xTx8&1~v}_{b(<pknB%^!_CJ4U|Wc
zb6<=)-!{!l&MJX?l@#PdbZ2~1KFEtEaERTI>E@20z{Eq)>a@F8WQ;4Bvi}8#FeyuW
zY0N#DRC_On`&QGDy46IX;YNUX_SS>xmiRO!cfM2AcZ;>FTOY4&neoG+?(}W8z9ZwI
z!CCqboz|SZ_49J(4Si|)lYAB{USMis7Z?axyf$`;c9PbC)u}I<m53Wm6GZgJR@Cq%
zmSN-<0)OU)89v|L%W24n(2;R#5@nw=kLXXlNkg1kO>ye3%O;``h+I{MgQ>@aGKCE$
zvzEFg-AYBewGn2Jaw&5I?1S|7deSTM8&NVLessyjldE64?F`$7vPHNB?|N312yK~Y
zGoft1CUf3{^+oHcK_a84-y}XDAt!TKdjxxx6GC+|^1b0+*rZ^$ZSvP=A(iy7O<X<d
zLpXN=<*>MI9=P)l%gc)o(xuE9`lajag57E5jj#y`wM4BWf*48glJ|IhT886-6h#s_
zaC=1pPu?rA+ir5gXyHLdR<Z+<vP_>V@<opVmdG?u7aQN)AY!t(UC}s71KN8b5$3`a
zdg`j*y&zRD@|-<n{cDSxS3D*>?Sfyk&JLPZ8KLCDgK)0~b>2I!&V9}aP+Qfkao=0H
zIlK0{$l2Pd&~5>II{;4^7dz^R-o!8P*i8~K?i`D*;lHxEU(D^;`?5pFV`d<ot*Rt8
zoO55l-j9_?gnN$0+<d;s{juY&bG=HKa$KXgoeEoUQ%+BiI+{zwvNAEeX!=xyPf$}%
zNl{dSMi|bs#V1>~^sV1X#N}0astb=r*P5C0WL41TncJIHJS6tOpqFxv3LmQnNsG*X
zcn>76Y6FGOv8T9|tNd(97q0k@K%^`uX-*)tMBFCzue&ry@-q5HP-|{0@uzK<&L+T(
z<|zYur`EEoqlGy4eYakFo?Ec@-P9x#;lLShl-763{V)Xo^Yh9@S-p7Ga6@3F8{65T
zvPw*<%%MdJel<H;Vt4P|Oj-zfsgb4^KrAO+yxYECX6=;$&hD31kB}}#xv4&Bh}q(T
zJCuv-d+iD?&Fqh|!v*xn=xIa}YkseqjFNMUjDoZjC54jB>BEONA9gp37p*tv<P?<<
z57M2!MUsb~W?wmF8yhmh7bmNX$SdvhA!}atODc0H8&&*Wi#~`_4}z*`YDC_>D>0s_
zK2L!K>O$(#)!b5LE~cq(eS^<r=R!lQuI~dCBvhpFSqtXLbJ^j;)Y?sq=KHwqeO&59
zMftUajk$Q(R}$5{QpcEUeg?D3RWzz^Hexz5`jXwKb-7GAKW3<TU}M2=x9i1>roBSk
zN48%^Frx?q=~n{jJL^wRd6d%eF<kk>BD5)dL~^@dy>n}{oJh^ft8kUZ9^HPHC70D(
z(TF%%(8lX|^0gen@@#n_bA~G}E&U1foMBDjJzs9lLq_+Weq&vls5jMPM$cjEs-6?n
zm((YH>bzDh&6~S=KF+Ol4a}{43H$n0+osHEgqVicbL7$Fi2ikjmMn7)_Emw0%dYZO
z8GSVy^T9%Hb&NchboW-N^_;R8V%fp5vZY9x^E;)Ko|=6KqSY!W1+Bf|$UTs}Uy~oL
zoA|3KdruW%62%C?HT7=2h=jr$a0F<z7n&d%;{%!QmxcJ5=?c+<Um@v3rjqHls<0oS
z0~;Fq9DCuQWWNtg69dqMKo2vq>R?idqJ7QHr)Js|cG~riU|7%6#uKZ`JghWAD@9Q!
zApYAAS*^>qC!xF3ABRErL4<y;kos`itKXS8h2WHy$YaNk5wZtX{V)B6xu24hphPYQ
ziKPK*)t-A}8i@$)AoI4;oxn$hz{aF`lyBVjIW=Kw@|GW)sUd|L+Hz3L`{U;kPMu@9
z;0w4Ko;V%7+44zsK^L}0L{ZlE+ncRtqa$C^UQAKSzHp&2Kh#Q@DLPeH!w-9p>AqV9
zx%s&Q6)8qNx)T}f)Je+>_~~k_UMy-S_&C7JV(9z&^mBpqt727GrMR(glI9+u<;D>Y
zR)+0&(t{G)1j5d;vBB07B~m+@HUUc)qUodHg-pm&lV2e-?;O0Vv!>uM!e}&_kZ>2J
z)Y^Q3Dx$kS^^$w=Vwk!B*@%)(05UvgzE36tUvSH-=j#j|zFL_>2XuC=oT+#$bwjaR
zgoZcFAe_g+y!;NGIhO#Oc6E9;lS@P+Da4F9AUohvu*-zD<O>?I#~z;`R>-JOcdLFP
z*Hmhbyn$`Z-1+cZnQw}w<Lt9`TRkTRR6XbFf`Vf=GWPAYBIuHg^h=hjX{u;p(E}sk
zP1I{ko`v^sM0Bs!fL~4+&gUS+?#c?0F<heXw+vZSqF+HG<0^G?^6{=>m-9VZTf%Q$
zE3cl<B*kU>U_ZL-C}5a`3)qx|{4yw+7yBeSBZ)jq1~Jc;`+JrL65pYj!-JAHMCOoK
z`C!CZJ1Hae)vWwYqMDHGP|7}ad4|@VofmYcZ}+&L;n{Otq*f;l772T+{=qUXXeU}4
zh8xPjy~4fkWSp1Ra@lv0m&p*RN<;Yc3L-oO7vxWq7OUUW&X8*?a;)T?<5Q#uyK_d1
zh@q-LZlA;S#W=ymVuoh!uJt$x&S+J|W-0pIn>bll_j(koKs5>0R>G4jl}nK>zprQ4
zfquD0Ud*9cihYZp&B7*dQ6#uQb9LK?&R(f$2^16-E|UaOl)rAp2|>sC$fFAECuG^=
zgI#~GCv-3_8WqsPIF&{bO4f;pU*KMBs(D$sh!3))lkh&*@Zv{2%pwcKMFg1YGK>Fl
z{V9^CPmmGMA4j_{kuPfrgQ-;a^vXZ1L)ZH3_cu;HuN5T9gc;FFp<*5=@i`>v#<m`H
z+NTu0kS=RD$y`V_g0ukItBH`XR9IBda+)8r8M$>2nffx+6h%1?(O7*^<h&RkT)rd~
z8M(8pS>`m^)6zq{7rCDs`JgCkO_jtp%W9rkoi+e`#eKpjv`ZPMj~&S2FMHC&z+0u1
zqo*U^*UY?(j0C!5q^XeSflsQ^F)~oE=jT_3hih73>628lRL=N}G;fB!!pys_;EYyB
z?SvFF3jF0ENWVAFJgXC4MW|!_kSTC8LwTVr*o^O5iSmF&vGtwxi&#6!D<@09XZyo#
z(b*>o@Z{!wslc_!KQc?wv?VR`qb*UvSI!yEm<Ljy6;GAi(HzlEe^uM-zM^1xU$JVS
zlvPbp!#<<I2sILJ9!J9wvbfkEdg+eJVqEGf9KMdI5xMKAry=6qc%4p40Bbj26Bvld
zobHyH9a-&Bd@4=9;>pLxoPXwyJC6j>GjwL?g{wG3wVpWSQa0k9y^r@0?zNk`B^h6K
zvC|axnu}b*9#P}i^r~(4nB^qgso%v%*qa-vD(wCmNi3(;s+W6_YG4EEEm^YGo)c1Z
zD}&RejGHMxxT*PSJ1Qo#XnzaM(>;*T6t<&J6G~*jJuQ8&xdxF$WanKU2bE|>@JCG#
zZ#EQUa5{5VRo^Yq4p)wI9^_#U&<_dO42hz$)S4qwWe-m547_GGw}vQ4Oe?G8X2K%4
z<${uf7NdFk<9Q|<ecZLhxOstE;MU(6&EP9=q(;+qqmB5dOxnBOPND1iw>(O&;$akl
zzz-b`hpEZOhVV3Uah)0`r!eJa()&3e0e(N>rszY_=a}qzu&Hy~5^weLHec5Mb$1L;
zXJ^Zb2>(MY>W)G%AERl~J9OJM-o}K8Zq>WD<i%X!q|B0(c4Ef;R5?rm>g;(|AC~4G
zSjARa4e>e-WTQ}N@#?G1%$vwaPifTamdB7OGPy5)>wt?0X~?>y+8OV;rXiie?<l7a
z^RZ^kp9l|kMYjmUG}7oAEa+GIM(ojY3n4z<&`1M|EkUZcNmqPCRdYsY6K4`a#ZsbF
zJxla$=~wH|iv;z1Nf3pWINI;=-Lc>-P77ZW*sjlJ5@4FNWgQ;Xm&$2Gh>Ry~)@7!x
zVpg$Y>AM+&SEV(AM7&3nuQ{DhlPj>ya&)C{@thiJnIG2?*y3UbgsfjAe&QTKfd{}z
z-lj;?>kfu>DlLhr4)Yhw<<Fk<WZp(1T!3DKG^-U*4mzz?{UUC$J$<?c8Hl-K4IQ^I
z`D(Dl{mQAWJ>z<?#7Hoimv!YR2}QE4$}Klt^>;qv_p}w2A%D2rh&k9xE7mM*e~6;m
z(w7sE5_I!f^UxK8Tvz&@3iCb91mt^s$9bcVBz)4DfqbH-a;v^Q?5_;Xj}N_<Ke$k0
zW}qGtqeg2^rLOISXUDIF)?l|QfHXprb$I0NWdw#!jmJrly(ED8y475v7p8W|eS00=
z)WAN0YLv5PHrhE2+9E(KkMxZXwP7^jnf3FwwCK_fp6Ffc;T<k6<k^q;flUbRWSURA
z6;L|bOHhrgc5-6gW*k28F8k?A%(S<d2|gPZx(&%csr4l0-J7k8P7uDQ^pSVat3zO;
zi7+UcqhpDT2X|l>8n5H-PWm>GeQ7Js!E}B}YaAAuoTACtz^%r7j9wlr2UJaZb57}n
zej>Ls%qR(3`N~U@cOmr1(Z1$pTYcxrlB(W&>=th)h#Kpq1?l`m`yU|5q}ZCJe$NV1
zKarixli#77w_hH<>X59J?L`Ns`fNhKzwn?h3ykek-)f#ue?!%L&#gaMX|ctOnT4<0
zbm#Xw&xK6JuMK+IWvRLEl)v7X64~&0ef6dUvh&gwrd>s5UuaM(iS(j2o&D4L^nt`Q
zY=VK(nr3#H$G$AqwEbd%m$eu~aQ2NSKmVSa801MA3#|{^j+<E8C>Qr7KVB<N^mL~+
zC^16hs$)y?2tT8`c*ZW7Q=z?zV-vn;JIYdV7P&FmFtbq&<=b%^@>Kk>OX^z&Y){|i
z$&)y;RSqQC6tjALa$*L1AKU%qczyd|V0wOm2;7R;-Q9(zp1pzL5t{zsQIC*7zKlf>
zuZ}}^Z%?|S_6sH_Sp^U@SRf07uJc<q=M(n1@8a;Z1tBS<-{^|UH<AnnRM@AzMTwhw
z4n57L@pEpN9_VL~KJ<gUn58DsMLue()oducWaW*C7U*ir@9(ZH!;1l>l;U#Q&?%Xt
z7fvyC{!3V~B(naG0TE_XtmAyJ{#Jx0)w?7HjvTtBYj2}Ea{DbZ>RayV2Q|QhsdQ_K
z%MD)ey?(=B2TPfcNl$oUZzHz9(5<6)CDK&f(}lB|G@q~R5z|c{5}g72TEf)vS&_Eg
zJKNNQ_MQ$-x0(26StK)Vf|*5VQ_V&bJo}|)*E|W?uCe3`_L?{2^IuWn@9zou=3}<q
zKS*k>q<^6Q$$0;jh5qS7j5;3Q#|dkYIH5$a%j>e`W*~>KiiGaId#K1L?aVmG6J8RV
zls2=uD{2aXG!qI!)X=N)0j?VLMsy9@@ZuT4ZlIM#2XEtBy>}lEh6R*&;M=*o6@ye#
zj3RQy7WC<FZ6LIXW8rPf-OkJ(Dn|(5{MPO+j#hHD?Tu5N+*=MZ!8+*aD2Y!kghn0=
zJP-vfb;4%d{MZzn)e@ZjjDRO6TAJA<peeu1I=$0hkJ`vl$$v^@eELi3sy!mXRh0>z
z74+S$j=|w^>~-D2na-U5*%BN{8-81R?b>O*%P*^Kw_j%LG!)c8T_4n|a$Ga5rgBH~
zl5%#Re7@6oN5r4^*2JeDIY`Hh+P8TrO4`xcvq+6QsKBNAIajr_q-zOMPg|x9D<#E|
zs>-*RBpW&?^FuzcPEdd{!~<PK*8RM4QF8+?G_$9tMUp|QqtG4k1*$vhuwG&nlem;H
z_4=|qsb&dfaq)IP(k_3KkUz}*K$<u2$_L4|slCaz8F2SL0#p<D;pT)Mh|u5lVSOQV
zDy*h`TLwfJGBi+W1h&T6CV=(T+1$jV8~aP0chRv!I;ClG-iV^H*(u4i$!R)ok^({6
z{3!H`?m^dKJ?D^ITwoKv0AH-JX>{Vlg7?!%&x84Icb>8+R)TT%VreX91?u|eq(lVm
z`iC)A?iF+P)H;tR5F|x%uvt_$U;ZofIuxSzyfxX#{_cZdc?smCw#W$aO+OuqJ(G%E
zqYIl2&}SwdU+fxo>UbA(+dYZ_BUApg#4ir*&v8ZN)|J8^Ol)+(#G6weIDcYdAnl8*
ztIyj6Z?c(k3H|ioJ7HPyS^Vs(FL%dTDFv03iYHtREu}!E4sBYAL8^I$Xu+kN(P?Su
zbf}NH&8XB!biJEBvhYqS*S(wI5l`CX1PQW0BR%p^njWi&+=5jsbXmfaeu9QJP6mS9
zs({LK@hrVrEGrt4QhrQ+Fm=o_-m1K4uR9z2{&1#Rc7HXu2D$Lo+esGzm+DRju)aRT
z#+p^c);h*e$R}#Ypgw~xiD-2Vn(#<FIOeUO2tV=^DnnwoAj@H&vQI0jguCtCeiZLM
zk})upNX(mjwH>NVm#+MRUoeyFA($pgM1H#8l%I7A1N+lOSMW9Wx~!Mt;52-cLw*WX
zzdcG(_Enc`-0ga3*-Ut2DpI+DneW8J2@oR0%<C0ewR?RDDhC=<N&*pW&GCklLDzRz
zOd#uSCNjyziU?w0S1|hZ_pN>t{p(lcn6^Ax(iL7<7Q;#Du-FI=h)%4C41>|>b?6*j
zBX_@L4TD)cq<7Iy0ix1eAYY%%^?AQQNUu40%l@f7rg$$e1><|#y4SkJuV(UQ2C_bQ
zcfaiM8g?*kM7`0HjZYETS9#|p>c!InLcbDA0(hfkoLv5D$Lk*lWMb$5u5dVIsd1F{
zhlA_O8eMoHY#+8OX`95F8H4WI<F#1Qb=YtQR!_ZTPNoV~*PMNU4v(pKZDwr8Xj4S)
z1|cKU8}1tkfPX_pntl)dyL9njTD!44ZvN8rg%{Gbw$~n8Z&H2p9LJvD&Ux=<7dTL!
zJx0|XB@)o)Tz2QoZkrjhU>O)Z*+Z+o8<dLzbw5TxF_JDi-S8W8%5oogd4rpiDR7rV
zr-vC(L$2~^J!|E{B@VP}>a^UP#ice=ceG`w9k^Rv@tNFCM?<8&)16Y^q(B<FUVdNQ
z&~v=^dtVJnA)mAUQOCVKhdeu}r7q%U`k+@k{q}Ze>hj?EZ5s8i5{3A2_}Aw+?rpEv
zDa+yK;hf(#lzmULD`W>rwXno>mH-~b2NeoI&#_hJBz8U+)XZaKZpa<A4i6669X4zY
zo+~Pes#oMYfy%@WuD-p#r!A((in;?5$G`3tC{cEo`vrMq_n<&&A2=t8n5rY7iMe9*
z$|?-1V{gB_gw3U@LH_aTtYiX-qNBBdd%EKT2V+MMv~2d%XSdACvkeeq1n>q+eg<bw
zFjvT+yugqBIC)?nzpa;IvhbUvAEEYNsX@R5MdOdMcT`rQ7EC4fMx#oavzOUsRx+QK
zJRgdn8QA@Jl}%8Db6AUd44wk{w#iujdkSaA$@Su*vZ(B3Dp~DIx*JiE>>`t!V*%7t
z%jchJCe-_U)(~g5@bo-04Q!;;1T^K0>t0^Xg+i`yp-q#CXtiW8=Nb9<P%TlH<OAIu
zlB33qvR#6-^zOkF(b-@=#hcMab2Y(xH^Fl{{3Yh2a3g|sWc|W03y$cS@v|v(Pi9fx
zlitc2x1Hzh#3SEDx%Sh+rG}NISmq;pk4Gilw~W|T?BhxeqgnjhteF_E$rM;P)ra5K
zaqj+uuEzIpCx51|rS3hLnznvUy2}Nd>|D)_DAjzSc~%eonlE6!4Wc0dNNBk7e)bm?
zH}Oz?k|Gy)1d$#(`&Q62c=_rGA&}^+OqH@Ur+>u>*Uq<ATyVW_G)<r1$vA}T?=DG=
zd|==67;KzBvo`TGcnfawew6%K;_%%s!iqlPGTMq<`ja{S&SpQ(C6EM*PM#e2RA#7t
z_z2O_)d94hQv0HJx=vrSi(6CK`-$ECdl|cOLj+Z_SPDcwW6WdkYbxKf3}avgCe^v{
z?0y@->9WnebdjoX=?`L@=YW9Pc%N5zB9;u>2+jA~w5~@;7DcspuWS!AFb>GG&){C0
z#k2VF<`<&do*H3_xuZ)Zj8n>Pl;~d#RiBN|q@)r+^z}S6>chDdE&4}!m2KBPEg4Av
z-2E!t%U{?-j(d8bfIrJ8J*Af-b+PH`x`eWK6gCmWOn&i4MC$vT@%BjTdsf-g>PMoT
z<DBe#js;Or=4F|UPZo3%cbQ_tMnys&@L7vltqF47QCHbrW!7r=Xni=G(-x>HwzMm>
zX}(x3Y<x!|NxCTOn2t<8;lPU$@0qsZ9ptHJdhG1yo}?oE6IKdX42{M1bsi@}6Y3S$
zG+E-5Str~;y?Z)QjZLTu&^(<Qs>Q5%y4{Z%h=Gz3S~ArQrILcLjT<j2;_PHbP>MLw
z`uR9`bp5(K9a@?;+04S$N|=p%pMw@W!x=3vX*^<9=hiMDQL0awj|kp$|5!ch=vr?|
zXOEUq)YR?ctt*lLyviP621y#`m@}0`?rcN?u8Ii7w56w&L)xFg|3l!Ai)PG5yXsHD
z|1Vv*QT$Q0+m7>fPfzQfk0&R*%Dqf!lgHAvxGOrVBGX-oDK%#P#!M+Wr!4OCjA7e~
zU)n5YOSN0w%}*NsicNE{2Mj_rCxH>F({$ONH_P}fSYoe}&K~bG48-@^wG?`vE(sJG
z&Z5;?k@k8<Z@M6vqQIs11fLXY5(NC222M0^OA7K%+yT`5Q?n)qn+W~S5_+%mxA@Vf
zg}C5Nt@>!XJijL9^tXPS^H9MhvE<TAetYkAv+o!{5}F4`b_3`9Gkq9oJAu6vK0d}y
zR1DPXr=)WnZe{uDYz*E}Lw@*t1mJ=ch$`M~=`jy>H~47BLtMngBX@I)PGqQ*xkZeS
zpF~%l+uB={V?&chaZS{8e2{_QW2#;5cg7Ue6}4}9TLVIyzNkb?Kt9Ff+Dl(>d<4XJ
z=4@LWGM3ty{i(%#36xj+MY$1|`gH=j>T97y5^=3wX<<gGVMOW4<OQ%SWoK7!F~|dR
zR(Zb{lt8;6xl##Ac;D=op=$klH#zI~1#b5)gb_JHf;6snGi<%rg)}w4{t?mZGTiR3
zxjpwgp7NmsD85Og8kPK!QA=E0FJ>t@iaBPgo}6m&7-h}%-b=qzuppJJCCc$Q>yWAd
zPA3&TZHbE#PE^6vL3c_V+Yk(iHCyhfJwR4KY!6-CAMO;Bwzu`LAuA#wvv)i2(?zNa
z?=G|O$Gl+sK2R;^DQ}9#O2&y5XujO!rr)Jz=EB|k^`+oB&W>u&1u*SCx1m{raM{*8
z$nA9ayo2%ioB4uhQ&0^%@2wCwd$E04zDRW4OhUM!p3>tkhx2fgv{{EcO_Ox?hO*$B
z6-n3Z)WC_@9|_T^4AFW0{kF~=N7X~i5H*0Dk~Z(I!rjPqpG9Tipr%f)01kS;c9pXl
z_$o1B*(r#!?nX|VlC1RU^8d^m<;q_go|@+#SOG?`9n#j%hK-L8w&nY2_qv{CGh%ei
zd^oK)`28q*r!^BQ<!MJQklgK0HL!^Lrh`N~&m)QX)0NQ4I5(#<3|%$ZYBx0D%}Nur
zLcEeAb>_V$r;AQQMS3NFGZ9^|Uy~F_JXt)|&10BS8%MR;1+C<75v_lOYV5uN9zycz
z3P?@w4$qth5eD*_KCK4VSMNyf8ax$t8(ak8h*qaTjbb`vfD2T~B%{(>hhYzoXuk5c
zQD8T4$Zim-OpC?_s<Q=Cg#_?28dmObjTgXwL`QD)T|6OqpO0jQd=qae3{O{^&l=(A
zuv9U)i?83`O*G=C&}2X2AsQs%@1}lnM?dq}8ac=BcDcaL)%)7JVx6%b1<r9BR<B#x
z!Um~{4j+~?uL<}tniSVL2$rWH*tOv#+2XC7D&K$*l_wAsFiAF2kKE<U<GSZsXxW?-
zfcztd7$hl1Nmg)+)@NNC$dpRv`7uZ@L8qiz(`Q17*zHFJXu;77M4?1MxT|)vX2?#z
z@SDwI(e7o}y}e|`Ab8a>^$$UowA^AkoaBS`mX^ch7hAO`wC4~3+bS2eUtCje$Cqf7
z1Ww`X(b|9Ncnx|6;xnHfg<@#QQH((s^M7#guU!UTvh!<H+e1aWvj}OJJ!~>G-9fhT
z+5FLSrENUE$&IHkigV)Pw?8WS*wvq<<7bpf>lGn&+?9|SDT#8=n{secLO*-f=+sk?
zCEtBNgs=Ni_DvCuO&*Z`!k60q?&;+7+g`;m>(^REviBpuduSRdcodW6nCV(d2UWH}
zUxzleW+?`~f%btX4!n?cAxqAw70wqwTpVXut>b=}xw>Ay!(r_Y90iYvS`>mIBRoF=
z8ex)v>!|zpF6R_?qaTJxvNWnxtnLwlcZ_!mRZ!pqIM~~;^u@~98pZNrq;0?Oo8p3$
zBq9@jITLfrq#CY@V&tQ^*$AS^6iBtM05}6lA3252$V?a;y*xrNoNCOJeONw$*n6DF
zY`-LWJidzTFpz*Wn%qk%!44E*O>x{NZ(fUd`pD`Y-nu49sP&lePad-G-;vmJ0Cfg^
zNHf-Vz%nYjSvu6stbbm0my+4BR;;-}8#2bQ)p`p-w%P_wC6>3ZskCny!d+|64UbOn
z?9~c3vz~dE6w*4l(g2;~Z4~5#<K#b9hY(Y~f5mz}9cXm<vuXe&yh|H6OY1)G9nX5?
z!R@nn#qB)lx%24vyyp)+a}2F5k7*?X@qyly?TV{(1i4Do=f?qNAQcc%Jw#7ZP92Dy
z)vQx4IdF42%FAsxVJ#H*3=@~`b+&$;nHJ5Lj<Ma3k87~75@zSl^XCQJghnJrm%3Z3
zdC=}r{d-#br&4K1yK&0Z5@_X^jyFwYDGv{)HT$E0bo1kvFxHX))+o&0h-iyBy0#<U
zfxF_D;XAV!+BkKd=1VTYo5o@gWK%wm<S6rmH%NxoUmDW4Db*cpwqqs+5z8uC=w!~^
zb*euN;t^jU4ejIz_nBvx7G(S!<Muh4vJA;5^sc&pm^~!G#*H|}%IRVKM-XAK29=_U
z9(9nmzq8oL@1@sFwc?HiV`J>M!)uD>ys0=2o#ym)yLGu!dW}6~`4Uy&GUXU|XP)(o
z=-yJ!DPsLMT2wJ#MKLogO1s7PVIr{Ct4TbuL6$-628#jH-Y>X24c=W#6WKi*P%Y|w
zrjGyc{jc(S*TK4id_d+YSBlBa`)rWJcv7b4b?2!_)dptu#^6p#KuHaRsT;eAS{f<u
z1rCh^8Y$0F$^LHpO^~E)*VYoz0)?%5b;sGD23M~McBdX0+G-xXmuAan0`SvKNe{6G
zV7^m4fN0Edxr&S9XC=#G=G)U#rG$7GBJXvUCVYF{M}dhI0gjAXNRmc{;v#ZYUn*JG
zIhoItCue+}_l`Qlj+4k9S6e%sl6E0LJ>&BRt}&t|Qe}8r*qVFK#ao9-JXs=Z+|!)A
z61}4=GF8aV_2NOm(%!C$mRs%m_Lt8%cGcw%g?jl59*$a$NCzcET;m&b%Sj^}y6HTd
zGS(?Oaq``<Qs68@a-u6_L~^{0!WvV)Mv&z`Z%e26NK?x(BqL)#M%_4{PGU(jD<M1H
zHrQ>K?IOa{j(oHKZ0}0Yw2SA*fTf+VZQh`hXV^ByvaKuG#wp|<$WU)bsSIyqs?2Ev
zZ`lN{vP+#L-B<tM#Rh{bOui}^Y&k)AaYM3^Yn(%My;72C*{$@NkrZViYd<&Q>Y&TL
zO&{<@z4b_y5l63))k!Xv@=gL}bMbxJkx-?o*)Yz{k$leVL;G1kV)gtk6PplN&+`Ut
z{`tF+g~I$IA``7tlxt6C-rt>I=bssw3B%FS(v;?PcYU?gi$uMJCJZU5pq-G&cuRlh
z;1U#WroEXyKPg2XlQ)tU))`M^0qkb98y0Q>yBSA>Y6c%jvIyD5%Z2^?fZ##OXfg@S
zUUm`kD_G)FQ??zuAUHchjV6Ia{EdeoD=Xg;P-A#7Szfyum#s2lFEe5nK<8#KduP%|
z(cFi=4rvzQi}2)&RHwO%GCCtkq|%Z@DA$^_>DTZWJNQ*(pT55&S6+q&*IhKT<ila2
zpQfZ&?!m<FB}$JHyw2)2v}*okG~^BI{P^w0oL8J3!dsSKD`{V+_0`a_dbCU{8;8`5
zoMB`iVMfgee7R@7>ZNzNnFb-J?Y#n5sY3OXRJUJ5RV_zG3GLIh%bCw?AY>xzaCccf
zMqf4*Mbr5*O5$!brhk1|$!EPeB-FUj8QGKlthdKC8A%@pc)PmNIhSNn(ESR*JN>E4
zB@V)`O7_<l8LcbQW?n5?qlA{CqJ+L0jm*0leHqjTHZ&x}4yk?ut?OVzLHda-&??Ow
zmRPQ3m0!=#*8Txa2rt(}Ius(U$F|N+XMd#;AGC;g2MS6<eRB13smD>|A_TYLv#rM6
zt9Bq94{ay2h`_68r}mr_&X%~L7>0W{eFW+)Y1yDqoGWApINMoyUQX}cR2`_4*onbR
z+l=J2a@Voi1T#Vad8=GH+Yc#Ij9Q9)?S0y;$r?k|fC{-Sa+w09##FkbNTwa3+t#~%
z_vxfysNoPL^A@9Yz$fLE`B=dB)p$4@k^>gyESFo03x5nShri1BJhgc*X>vr*%79<_
za)Q@s%$&dq$SyJeZT_R4X7M$e^$#E9388#KH!kYq0`M=H*O+>}q>PTD3{Q3=t5F%8
zHhmE%!^j%^DEVgS2PNPXR;AIW#Zi~$XNXtZgg(-6$L;W2n(gU(T42i2!}r}ZGV$E{
zUr~C9Qxb(nL&m^{cJ)pmb@5%cR1E5VnuAPpPA-y{Wg--`Be_5p8ZNMZ%}!5jelsMf
zBrD2c@`>QS4M<XQlst4gGF%Dmqhsqo^Cp2j(LS}!4zxu<hsIu#&=Dq|uD68n#zHjB
zNP1(_jE;ryx(8Q|Ep1?aKZ2ZZ{)53of(SLLFg3x+3ZlHYp5_b89-}u|y=z5axgz}4
zz|L-QXCc_zUTb<@A2U-lGwzdorh}p)uBqru{Ih3Kf}2=n>f%bP&Y}rTkp~na8>Z%)
z<;hN-yFEsTJLU-A00>1dxP~T57;S9?PglxXa2qU|a1+S^3#-~zSuwqwlQ|^cW56CO
z%T^DiJUh4?88dJ1k?_UG)HUoY@BU8Oox0cO3H2Y4JSXZp>L6a8Ieox2kE7c|4=Dvv
zgV{8Ma!kTq&v}&j2p>&gB)4~@1x3C*qndOxb7;gyxvAD}?O=L~&C8N@o9;dRsFEZq
zzYEU<s%IQlHV^@-s%X-n^gbHq+_n5>{kgKV3%lDG^BF>cq|ce-cE;H@)pjk73)k$>
zxvwcY<nF6`GG9d4n=`Qv1Kz&9?3}>nP((P7R_jFCzTNn(%p$${5{yXF8TKtJ^78cs
z6|JJXSgOr;MVmKlWxdV{y2=jrq!ODCPvSVNH<5cv*{Ep^6FrNOfsBs`Z|fb=NUsoS
znP9K!b$3idSFwo9vuH1_aqSw}e%8`AEy5epUOOVZtbn$Lt7$Q^kI}K>=JVfN%n*Lu
zwv+89_#<`qCw5Y_PcLm0zEA$(iJ+VBt=J07Pe*vJmW}bNVh+W--)w7)@4>adFMNm1
zx`|{;<wjLcY6Z?|#NZ@pQWUq(JWXm9w~0P>eY2-6rvv&&5KdNx+WU7c^t*x_81m6m
zaTgyg@((k8i;b$b<qQ|akV+Ke15qhq;f!b4cI(&ogMtoZ*ST_VE=s$1fCG_mhBlNA
zHn(nba(Z8qes+N*$0;F^$z$Bh!(%*i!Rt{r<yt1EXFFkIQD=s9E`LOc=Q8n~{1BU(
zxA3T3^A&VH>?6Vl9HuB7MKU(OTmqB<<zsx*y{8v;rem)wX1$rI*f+>rhPy2XcEMUE
z(rfv^fV8MoO=f4>lJN<K8Aa`(rj@e_&DtIpYq47I%rctKYg#taH>lW_-@alg9A1)$
zD=6B{hlF@M9=y}2O3P@`T!)A&$@8d-aVGxR`S@Ahe&ToEh?dKTg|s%0WTs|bzgYKo
zJ>5qb7rm>CM&Zg0j&Co$0fh0+#l6)Gqc{gx<&G{KJ|$B(WxK0OZ?|iz(-Dk}XejP4
z8B5~!TJ<Y%Z<Py5U(DaLq^|Pv5Vo<}-*#xA&j>7QcyJ!R+diERSUNXZhrB@gYmsm`
z*kTA>lP`6$pb$bnbUE<kf&9YS^?jV-+jTzU-a}z$_mWkH)AqdHZCQ3gH`?)y!jwyB
zu2S$R8rNk>X1t#F9v;;}=y}#eH(2mglonh5l<)p&t%tm{wQ_)?^nA|7i2VHZl&>i6
zp}bjzzpuX6^}DCfjKD`IEX-GUGD^9kqZS4RxM|1%ugrved#n^(88Nq56GAL5JUWn3
zgnm*|vek3jag4p^3rs&|&AucFI}o9M+nfzWyaQ|TCN#5&d+f2|Z=2yufS%(ZimtvN
zY(0P56-D-ZIl3e`bh8k3Hs8{cja4spJH*pQyW-406A!pK23#P<x;)wXIn*+sK4-P{
z(XwaCwuZr4Xh*-d7M79mI>+Z4vvm<TG0-KeF0`v#t!2+z4)glqye8P$k?j89US+O+
zmXD_Ph^P6G`v5$c4d#xy8G0QJE2-XdaZ}PBalJU=jjdjFUNp;RVU%?@*L!FzIX#j4
zRU|3$O#Y_YNXP!u`sfnK@k$j(1H=<}LbtjGjWArghZY#~60dGBDbZdwBO+?)K)~Mb
zb-Bf*-1Tg=h36Niz)EK27jq-U7p*ZV#+y?zy<N*Sy{nNIbau0fW~k*UdTXlT&6-AW
z4Y?||Ri2|>K~!$yQA&sj380&oV|z&N<9idGF%Z^BYY`v$xtdm@#v-*#?#P((!$IAz
zWkgi^ib^HHAt5n%crJn|t8)95%J`%W$R+4aL`6pRNt&n@51LqMSkKd#`%V}+#*5ao
zGg_7>Dd5|YpA3YfzQ+3q!CJG&y^XbTJuN5UaDQ@(x|3Pu`*W$fM)2)R?)5S5c^44w
z02Llx6vr1w!74I}T%DvxmSPqj|G0VZGp`<k=zz_CS%|~n27ZJxA`vI33WOWkqqEgm
zVx^XfTq&4a`O^m(h5Z6~b48YqlrK1ryO3@q^i)o01zeGBX<hE?X2~wa-GISY-gq&4
z&~y)EFx*McRhGGL-Fq>MyNFB8Nb|0VyxVM27SlcBK)=I?bj%mU6$+L3Og2o6UFvA9
z^A?&2TuLm6J$yCuU@@|iKv6EoKFY)Q=p5*}N;z4}2QfVpXREnY*Lgi{ItmCRbFDwv
zkS2L8L3DLRp4Z2FlAWAawjSA5(W<oA5s%!Ch5^1V>|@Z#h(C9Bsjgz*=z!Xs{`i1e
z)nrRaNi&57BTcmWf;k)PW`V!|k$a8qVg}vWdptbp<>)7AZ2pTp+;fK`jPH7c^Pma>
zedVE>oV9Gk46I`Uxvop{>5(x?MV~&iCART`^PS$I8+l)L%ugwm`LqnG=cp=YVKbG=
zieb5WBlecs+KTs`THCGLvGg$|1DDAC)!t`8s{Lio(#;<M?HHW_k(wa~oDE$EX1PO$
zrU^8zt5s<1uh}|9Bq)v9YiEz6=Q%y=%FJir?|ZwrOkpicFEfZcg^f;}WLcBRm$SyR
zt7(;Srf`sj+-+gbX;WiVjVdBk)$AJ|xF=(d^0lr4Vm>R&-Xl4LUjk%&;Qe@Rv>z(e
zfDtnQ9AFv1El63ku&8@xJ~PMB+eK5I{KB^K`b6qMfTvy4<Gt9Jt~9buY29cZL*<Lk
zewM+h;D)+qy_Z8_Yi1cQb>Dj4+WKX*dcL`3e9^=178Sj6N$pbj)H<;ioZtN4==xWH
zuBR>^PXBo~?f>EJy`!4Unt<^YMMVWx5KyX0ldkj*Dxh=_q!*>P&^rXgLNC&#gOt#F
zlaA7n0HFj3y%PvXhXlg+u>0=5E4tt7`F-d7{_&9H@ws#7PM^6mK8N>|CQ;Cw6Kh3K
zZ8@@Sbs&pFg#Og#t^^n0XVRnaE@T3ym4K$ii^_^mEzK2qm|1D=OstxlfF0yTY#|?y
zjWJl>E3y#Fs@x5Y$~_-RW$~RZ`5Bsk&;aDL!^a?sSM#6p@2i+k1CKXpP*SiiDnl(V
z3m+mvH#Dq2)Q_vaCzM;Cav|C|Qov#b!FMFPseY{@Efm`4veaI=OVNOh1%bHC?~xa6
zq&~?QtYXbf>!Bi?@E(~%pw-~|6T?~G73TjajZeSnTg7QmsDw|QTl}Z1>!Zu6RG!=V
z&8cyXUx1!b3h+!?_B5j~2n2&!95j$+K)~d+tVVGy;HD}Bt?!P97B-eUZj>KjIV@L&
zTu0RrV<vz?M2#{r3i8BmZ3$(zK4~odWIl_MWEq&f)L2r`J54fW_G{GTbV^-Vg8Sb8
zMB18#k9{WUiiNhdxg|x>%gx6p0t*)cDxYZ%4wu)zG+W*4K~cCFhXiyvhQ^uIu_LMu
z(rj*NN*YJFv3Y%4Rmt(rcXHZhGhdFUk++|};X;_N_*fmAib{3=+&Mc0LP_EW(R_<n
z)vGi`6gJB$+WfyjYTz?N!C_JR`cz5Cun<~V+JKfD+qqy7ekpxpIz_SQShC8cwz0m&
zN^rE$1f-zg(@Vj2Zkj0+B6?yYF-CMx>{<Mag?KD&G8M~*y4(<h>CcKS^0%xlQ_R0q
zrZhr8{#&q#fh~v^eRK?Jpt9&w+DXS!7CoVxih~N+;~=IR21INAUkoGXlBRF?OO?70
z%{!5Rz1eVsB|YC95ldme)dg$!y2s`XL>O|$)yGXc*WaN&mGBnlfTlir2#c_}?KDCf
zYK56*J9Fyb;f83rr4J9z?}!_39_uBl39kmLYL%RfjH;1B=-V4u*iM2JA<w2F9$<1^
zbfPP<2Y|<BlE=B@x6#_gYFdF0Z(OV{-@;vj0M?TwB9^p34v2>U_HQSoRfa>heXVFH
z*BmfeIkSGa6ZJDG@j@c?fQef0d!Bf~h^K1P!}3fgiWxI*`Slitz*R}>Frxd^tdsk~
zr%x_TzpCQa>Xl;;!2n>Kuo9^-8=|Y!x@u`i<5>G4FN{Nx1&tt-UJ-}ewB@s|>&6Fd
zR|k9**EF6R3$_nlLYC+Qs@_#=H&84K+0&@-g-<uCD~~_{aIFGcqFUb(C7djgFyU7+
zUL$c%mYT~d65)zahRQA&dxQxHHpT)dyOdM2FwE_FT=Wdo&9=M$!mZaM7&AGxH&6wc
zvGn3M0UE=8zonm<z&_Ujj+(^#?!jV9+`B$T18^*F!I7VSMS+HLE~`vXs3$Z7&UXXv
zqQJQ{z~VuHyJh#Ef-R{dx0uNb0J{v$4S2RWrR_5XwUzL0k8zOP>u$%;QBjRBP!!My
zU=-u;9?Z9^<#Z2pxZXX`M>0iO)SX`r=haK#O~THXADK^?$v*33?AVChN;>g`Ie>)r
zzW`3o38%Rj)z3xfcl)VlR$3nlNZ|X<GWts6gJf$|NN7C%tU+%)7sZ~-D+~mr)^yG1
zAk;CHN!w|ahGbNmYn7E!U$Qt(o&#QVrZUK*hs7((rFQ$n9*{*bdBt9EO%L1i?IbMJ
zR2@+%3PR5k4>aM@tR8%bbEIroUoxM|A(R75XZx-%gO#9cMkUbUWVaztavBrO#xFDk
z5-&P!r4=m%O8(~|d|1MH(}=awI+>}>04;g54&{C!)&nW9cH%Luig{{`)vV()J-=j-
z8FgYVlvO&_4+4ewB70&bB3kAE4KFt7)i5?4)3%Pb+zw>@LxvD)Aa)>&$hU|Jkc)(9
zOC=uWYiv>l^2OMeSdl+nI$0*NjiRkSADKll5JQu6a~}nLw{JOqKEYWP@?ykMBNvD<
zjJb}zfn^nm)+c8_%NvS!sR9vBxl4|fuFIPrKOs<j6YG@*YWxL%<u{)a9I#=Y#Xt88
z6ePj%26<1IK!?zvKOAl#mCd7eUx4+wbEB+GkXqK*u(k>t5vA{mqaJy~%y@MN^21fC
zhRq{Kj1bW8fg$gj{z-TF=0MofO!`_#SHqL`KxV$IQ+0zxTOz|RBGesMc0K@}TFTZg
zx4sQvO00ysu$f!OJCc}S|LO&>k2p`}DU7e<YJt>t(8*nM>IKA-w@gWw{B9)+>{~%>
zJ6MPjsV$WJWH;-osnLknM=kQBo|WBGyYDp2!dxVL8xXun&y1yk7&tGHW}(dCB>u_D
z5x?gwUCe{I*sQ%t3WxmfjpUzt8^DD+Aal?x&yD*}CU#5^2YB85V61HQK{YW$Y~(}V
z!XvL^Il}?HHnBs;&|BW+>|-hvxkaJR%0Kpa#v6mW<##2bM{BZxa1QG6bH6dfPx@f~
z06>ju>UichC)+=lK79yiA$yNaYsk5<4jJ|<#wp3Ewzh}#ME(`)`47i_;PL?`6_2$Q
zS~q+ukU#$&%l-h&DenU@A^;n!NjyI!t08zhQwss0rCiXb%wK&Ky2i<1@l2bv8qL)5
z*;h$|^*~LX3-WLg(@8q9PtUZ=rcfbNs1SXgCLX=98C)q_IJuQj$&3Fz>3%1O{_~TM
zH3`rrsK8~o?mOWHWQ4kcU`TbW9RzB#?V_K25sLYyAhL8MqRGT2ut=3t=eTdjQBt$%
z%k8)uH@9B(W3SF1R_g=dELbPJKRk=NG7cp@aK{>9jrbGR4ZAA5NR_Sfi|m4IfG>xW
zl-B(}D(3$1%5(v*Y_Ny&Y=6a_V(}>-!Z_PK1oZeR5S*!!8)^SO#{4rq3-tcL+i^m?
z{raCW&A*;;tQU8>4HB3O$tkz$P2wE_f)pBJ>Y!DW`D!l~^wrI;P}oqe9>Fq%C_0t1
zoY5{~tZL6j(?utcXtAsrv83433>kU#wBdxp3$I@|8eV`df9R&!bVN3?w_PHAS7<fR
zQrkve8(wc`nq;k)Y`TneGM_Uir2JGVnfEM5N4lb*s;ApBDVFScU|w!dH;aSv|6H@_
z8o<t9f9p;A=aV18UtKCUZCQGOv;FABi+Y`)oXN{#m@MY3Gw#E|$=U?oE6V2;-!@f-
zCd%j|lXzGk?&$)<Va0LQ17~s8UriH>RWSx0^xY=@eD_{ZcJBfcA<6ZJk~IDwab8Xt
zT^hxazAQv{C(dYiJ$EB9d0pDcX_t3kU|?dq(H8c+%6g(^H!nGPB~B3hvyl|!kD5UK
z%A8OCzmFaH&8LP02=`la;Gci|lMw^rzxEyZW59cBt5ygm`e6jZ@4xtp-?O#P5vd~g
zVE6Bz-AyisZQlMl0ncI~swrt~F8WgHlMA_braR?_mVHHk>HVqBIGbZsfwbTE(kFiM
z0?kzFx~oaN<SC$Xc2(fTK5f@WP6pQ>kAFD9ZarDsxIJqWkFgOhwnQB+tgi+&uI&}(
zkVPYsoqyKWKZFHnpgE@}B=y~3x?T3^?nlS9$L71Da&4yr%Lb!g(FQ@hRceqHa@Q_?
zhdwjTa~wn4)f5{Cd0C0&>RuMmy--@lnA#K$d!0`p^E2bl{usT`H-ckrNSM-3``sXh
zU6%XD+U{>E=yR0*Kvx%W-~0_w$Et~&R{8f+hoo3-;hG6`YX{h{C*u!Ch0wA#`y0nj
zqdn$l=dGL$D(LIO`I-vriw$f3=X&DxS2H42C2G`CI<0=&^(tWKI#4?h+W5HSo{_ZC
zUD}f#FR_aWuwPF!rMg<Z(sCottE<nDDlF>Em?C-F`}}<#)^!zgCm~j((OWl#!zE!n
zVw!rETHhbzpUCH~`3;Xb#0&S9*gpqIaK9C2P|m#5)^$5U`^EZa6;=WIq}0+D@3}@d
zYu^3c#kI5KbC2#tS@u_y7RJynv53@ytH#|#@m3Oz$3HjtpBW7yjMKc6?{7W$=X$rl
z!}~wTB3P}-M0r!v{Ru*G9@~)Eyv7vHse1Rvr{zznW<XC%;mowQpYoy4XB^(3z&VT<
z`?x`d_EBb4SA8S-%H-NlDdPhU5Eol?G93H|RR0dH)(bfk<(GR=%KBu2KjC0<Wq{$O
z>MVkPGU@Aw$9<SDt9p}?m?-9aHcq?xBcG+-!F{S^-qGEb`ZF1K_Jp!S#h;s=f`-(&
z;r{SPypSsapk1spyY@?<Spcr&AD|8Mr2S;>RqFLq&|31^KKv4BAH|>lfi-5m>owmq
z)*rhdS3DJywI;zgzvJ^C5cbEfKDk$dz5W0ViJQjt6Re#(1+6Kl?fox-R@s#O2WS@h
z1n8egdU6E-G~ZgIYrg~<@@lZ>AE04}09kzYgv3)fF94v`S+@EA3TQWpeXjR>{Jh3w
zXwSBM<44SxaRZ8h92NNBmj-~fJx$%xRQ(`g9L$b8@e>BaX9O7Ri!SHIzh)u--`JsP
z1W%Pa=t>zRc;X$<Me@guHlJ>^_-dQ&FKtxTXX_8jEvX~eIWtE6R1$E<pNcT&)|(u^
zB2o-lm4;cFzb13Gsr#(|xV%pr5un#g3dzzXf8>!LHuB;tu1xPAXW(_6|3C8WpNQ#u
zaVkAVa1E>f`R$M5>(5^YAEl14GB<R@oyzMVJKJkNVh+%)zXR|+C?rmo_yzb<vvA&-
zDsRS4E>2;(;|YA<{7eN5){EDzKk4}GM`WBKDjz(CVzgCUtf(=^Hh(iTWuSn@$MhIn
z#{pTVl#><?ry9bH(};Lhcy!?;J%0DP>EKFrsq3k!;CZRJ|A)8VJ{8$&uYbY^XEv5f
za6v7FNq4r}d?k(F>Hcls`)Rn9_Q~(R9Mg2C)Ny1|@rE1QC)Q4bXFD(q+}9ZOX>ib5
zIYn_-A>Ok3d`v%J{{A}7dUe>VbN?#E|NBD*F07MDfk6P5_ySU5`pr5n>~8%^KP3sK
zuq9D7DLDA$?Nl6ed`_9dg#VAj+kzVbx}5XIzxk8-nrBGhKApJzmEO04H$JS_F>g3~
zv-5g%y@c8zZQouq|9tg;Q!!F?QQq~^Z)XYcrm4>XKTXP)pQzch+s+n8FIv!Cxqwsw
zH0tQ^r<73@<o4G6+&ex4J1-yHJp1_V`(6O)2B(NT`N`bAvtWp)-njtS8g9^N{0C|m
ze>TDGz9`Q60cA}3PbA9^E1$v5-ygYy0DSh?(+d#(gfGsJ>c4$_kqAhhTa2y*K|kTv
zzgPM%0sQQ*|DWMdWt~>7yCn28NKU`wxemw!hOd+t{%y-=xalV!&GZ0G5x+lXZ}Q7$
z@dGsC@)u@#1g>2A!LsmqaGgIj5A&svHa2GB6UPHPUoA|7r`h`F^#=R>p*`>Fg1l|w
zg-Qt3uLy4dzNVJzFD^z0I24?9J$x8$RO>=9>Ha6d$y%10GacXjp7V!}Y~Xf{^^%!O
zcTEe(KkZ=pnE?D@GXQO<J_Hmy^KIA<Y(2-&H?)x1qR+k8zJKDX4iENg!}3#}oeBAG
zhw<-*(dEnIGy30zk|Y;O#GgMWbK1Khw!)}bWC3yf-$Mfb+(4h|sj?Ybm^V7Zp9~pI
zAO6r%#9MBy(!U@pPTxmz`F-lMbENWT<#x8ZPf5IFtBImrM2_UH`(*Idf7|OBN=p^J
zFuVKUnv~BEoR>~itf@+xTz!uLYMhQ_tSj5lcUt|pLg<^P`Q+?NlrJQP%C{udR8*@}
z!rBX3;>rUKL|9rqj?FjK#Mp9%WF;=bdAe5aEBsVOAk~0Vt*Pg}cGgYs={gl)pP=fB
zBzDe)R=vx^E2ApUat6|?o}HBx;>TC`sVHQ-*V~_VC%4O5Er!xxLKc7(&>P1u1&wUM
zoqOCJWVWVj_mbKtV#{8B+Z21+%kyMu@VQ}YoUnG5=Xz&i;l-50gq@?!#{y3IKO??D
zwWb2p3_tk~ED;>{s^{Zf?8n^Q-ZiG@=^3=&@6u=Nay6H8L?|KlVV38nfVtStcxi-+
zmC<?>TLu{GV$`)1E4}K)7(~Y~WYQ$>bmK{)#1n<5`NmezzD}_iVoRz7!+w?@7QOvS
zd=#LpD0#`p-!BKuE7u2<2~gNdfqEJzVqE@uE$sWlRkHMHfzm`n6{l#~guIjzH4G&D
zT!c)_4Y~zjhjD(bCM&5w+dr`4;pvJtlCnRpn5tElecwaoSl?3WjB|aTZo2X*2H{N?
z8G2B_&lZ{Yktm@uL=Jl()@nYL&3xRC_mjQ-hC}@Dw&-<*2#2!<)r-4$fH~%nRFup-
zyCxj5ZL${?o?Sxt{X3dm@f_<mo|%sxcD(kZ(eX;ZCTgFP0g&QkeCBMChqCFDQ8|Nw
z%IFaj%+u8d#y}yrfx2TuE|&;tntFv{U0(eRZ}~}Q<})1opO3fI0dmzL96tN2ndi9#
zN#mJ-AcI*Q-j9)hQ+K_jg!p^F#8J3f&SI4>?7OBTEHgJMB_rHtpmjcnmMPI0tz~>N
zC-q|QhwByISGt)75!-Z|Q%pH>thny?W-jG^#=9>dRQ!ziT%&iv&g2m)(J(W)md!MY
z<m_PpaQTc~t*#KGdvX?eKt_ZV;A)AZc4iTRGKea(l*jk~+h>!=7E=FkY|2@Aeg<PL
zrai2GnA|ZRRYw(35A*JQ2isl>3);xs;$?I`O06EK>dk`~P~u)3mT6?5!KM9l<ZDOy
zyWgp8!jRR3)wP_;GhLVaTDFw`r@Xq1N9iWH_NuUgal;NK8~I3E<J6pIhypAKVx+x8
z=@~^wEOko9-pZ{@-D^xFktOH9$7R#OWyiXGB0DXiZGuUc)w?UTD+zA9namec>SWxD
zFpu=O+U+u(riizZIe$8(FFsyf4l!*^tS!bgpRXNmu=%KMi~V!WjzS87!ns5)WBYba
zS=G^s)#%RbQyw5b4Gjdi4hfv`mwfQ<D6RbAu-r)|P5gc$YTpf<b*Wle&+a!{P-EN9
z3YbfG)i$!b;HK*|B*GI5ty^TPrD0LP<kz9MSauY*1GK>Yg<kK$25<Kt3fP6adK^M{
zU@_K@!Ui6|>f5cwgj}kO*U(kTNh2Lay(6K)lK?V(!Y?oL5sXgQC5q8h+oC2ed0}>|
zLNbW={?zoTITt8RW9W0`Vk<Kp$O?}y1~AJzISJgl@cX^TO_Owg2G0ITzp1xy0ZIFg
zkv%ZLK~odbzQ$zyp#SpsI^@L>K)c5h^S9B~qCBMy5nb3?Xyn1AqOYT^(&e%S@UZTt
zM)s~8j&h;|@T-<3N`?~6+NV~F&{^oXa-wU(L8sTIhhFp`3t`vvfOm8=BlaE^O?K5O
zQe~>N-EP(qN?l0dx=gfLtDXTj)TX2>@BphBBy2DOVv?&JNo<31h;Tpjx>dc=9^tvy
zHUst}<Gw>*ay$boUD_l<xDwhi*szCE5Eo%^r3*ZD>fqGT*xFt)o1W4cU8(XH?W2eH
z%$2{4n0N#Yl$0#@e4X8QSuIg}YQG|8G+n|%w|j&#c1amIz9|AP@I`p!B}X*3v(xDH
zvcH|UNOQRR?XZP+LR-rjlcASa6^Qusjvu5lQ0rEo*Q#AFQPi(*^sT?!@i52Ag+#24
zcT|>0i1N-RU!BT$j>FEl>B^_tS;)oRPu~JX$*3cm>ERvIbB&5c>Yl$jpAu%9vbcU1
z&#HAc|LhuV$;cFs@&c)JSCzFk_d?7XFZG|S^Wi;CeD=qsV=8%70ds5k*tVMN6d?_2
z%yr^z`E<s0@fVNqfLjh}?0k%8G~@mL|LD-&bxPAEgC*!1Q>)|18S6z!A1AbYZL6g2
z4!*EmG=t%Ith&B=>vZH;FVd9z<;%(py}Bn6y%WrxB&3-eWlr2LKOXmO$OJJMN{}#4
zGDD--xJwMKN?YwyZ&rX#MOXu(muN+wxI(rDnp6|wShqwbfQan2iHc(2cdR?9f|br)
zvacKsq;F|4v5F0iNeYu=Y{_p#J`Hc)U8!9Es%sR%Fsnf%UtOJ)uzG02&bg#ia)B@*
zI_o5PBgo|W+k_Y$7rsfAc5EZjlw(m{rGh1-MT;lkNqb*SueZ#}d9m2ZMsI1lp;$Pt
z#*ADtTjZpNbg@jNFn1hXR4UOzjQcvTe6S`9Y6A`2INO;?6fI@3#yHv`_QMYrF;5Xb
zSGk*)7-1fBop(DYts*1B)`O`s!B~O##4T(`r2=cX0)Od}rUAkwK<q?krB*hD&?Q;B
z-|nsu<TPWD9rsGUK3bbwdEs_IcaH;^v;Go&y^4)K_ipkyY5t#<?)~NqzC_QV$-Gh7
zTOvfy8Af)O9It<Pg(DbtANPBv1L!0EG};P?!Om^Ny{HHY?lTd#Y(k$AQU@o7KzuG!
zqbJ5(hN_unL~S*)d*M7wiqTH7)O6q=#qbPh#+~z)L5>?wSA8H|QN5yi_XX%JOcUZV
zQEZH|OAlOvgus|=hR06m>$@|vbPSytv_+wE3V2l&PO`5*9Q8L^F3PZqBs^JyYy<#b
zZ>*NfMeKcnMh+h<;@L2NqE0y`T{)U(3-4r77OHC4<l;PBMzkjF`DXV}AnywET3ut6
zEO&rdvn92QX#gu1ySM`#nqf8jWCO!QmRD^;b;4|1svW6a^qc3SvXfxYOHQ5himVE~
z_lbvm1pytE3iLB4QpGX!^rLXu7*-5!v^Y^wM^R^+0#npa72e_v*PZ9ba!sElBl%5{
zj)A6%4r4JZ!G;h@*R8yyciLsAcs0{G4ZiJ<J+S{?>j8t<PmSFjS_K-G!4IP<S2~r>
znoMCho1>To{O)~l(#-r~9Xed?0G#JY8ip-(sTvg<BNWwToiU=L=XJOg=lM0spv*c-
zo56d>CSqg}w7{!a6=Y-gs(dA%8nA(qLMzfgf)RTs9Ci$=`OG9YWG|LzK4WI$9q5`@
zoMI`F-!j$7$w+ww5}I_dbHAAI2yu0Yagj2stdmmPY}*$6$aFt$CoPY8ugCfKDNp^l
zqa1C?*=XIAHLUQdAJ0STA1$0bM>*3*ObA6G8?2@ZD}<t~6wm*;<#?SYqUYSu2<ZY@
zX;t&Bpq`aG_aEX08kZ<45h+ldp*<f4;{W;p;ihdsoS*e<oPQ7^t{U-nANgf=zmPKa
zTstq=L2!i@Io-Lm5!K9C7%>>M?+=*^0@c;F@OcN7ueo{ZaRwno9}`nx?@h$6v2a$4
z=GK06def`J_$Ex3Nw1eB{#8jdoin<at};2cN)h3u-<`94X}G)TOJmevH{tSEDvA|3
zb_CLjWrNgZD@y=n<r`M{2_n5QrO=U6WMup;$I|Fqr`Sr#I<a}&UEtJYR}--MlfnGf
zi1P9kboZ2=^ggXcg6E2;H2YythMFjY0^(Xgo~CEs;jHu5ud7A7J^Jr`Fy0aBDFuMT
zKu5KKrm-78!c?dfi7*`RA6F$~-pm$xE04zstvbBamv<LX7>ACxq>KckT8Unxtq+|m
zEl-8E51N>NEAWV{7AJkst!YP8=TIYv{|QxJ#a=Au2f^b#E(ixlU!ujI$HMSd++B(!
zyzD4+IFevZ%OiPb=VZFlfe)GdJ7cf(-B;o&SAi`5U~FvU*>LxZr+{>bTlL&kJr<gV
zJ|s3##Iq<q6AtdXjAPxxN}UtCPLq;W<tWpR;UHrij&Sfy=+8Xa<Kk&fm1X3S$G6$m
z13$e;a~!fG_Dzg)<-B@ePa+X3pv5yH#nINn0vfuGqpO-st0%T^Gm-~1`EZP<8)#%P
z2==Jq`o<gCJ-U=S4I1tz_KB{RF^g8q3FUE)UTA9h#1Ba-G*iOfd+~`Ean(GhA{!_g
z?;N{;IUtf2t@51T`dT?x-ZaJ@#9UgiDjR)ipiP;v<Rq&Au28F+d=9HRxN*yMdTA_^
zZyePG7K3T(XRQSDx-9RGnpaPf8c?7ZO4tamQ@E(ExNguJ1PC|JdN7G>Y!HZ$;goH;
z=Ea&Bd>w+F6wx3L_2XQy1vZB@J7Nb{H{tdt&V7-_fVigd(RG=qSi2D2lx<izN4dl}
zn<8AChw(;VL__QL^F#EmS^Xts@EE80q1QxKxBwMxd5LvRbV_r|<$3@!@w#~v9__z+
z`oG`1tz=6|a-vwp9*=$DQ=fe{c$d0<;d7b%$Zw$G_nY@X7_hDZS7o=5V{eM+-8W(S
zaR4C}OGK~B?!|P{)ase`k1%Q{d09=YI>5Z!M`!m7pgCb$oAnv_wN?!y*EkPbBPUy@
zqGBdOhvOb*ErJvZytIb(u>`FY9S9RH&-{~PjPX{<zW*dEPu&C{p=TCkNZqkVc1aTp
zF5$HY?x<m1=MQq&YOS{ss$+x#kB5&J-*+5KX^%SWLg)^6D#9lYj*Gg{ER*Td%q~bd
zW{QP!>8}E;oDW-x)Mv}KiCxjw*h;<LGJEvp!UGYGRRL;dswX<^Y(ii&*GOWU$h|KO
zj3Vk(0y^!F3Y*006s3#AG<Bq9<etH3Rd|I`S9c6sH&xJStkUqRwPKBDWi1g6e5zy)
z1qyJL5=RO#lieD2J<enp$m{M(YXMX?>tcNrMBgZRD#prk$XJ@e1CF2Nj;@WAH4%-C
zC_EhF!}SSZ>0=mp;yD%PWm)nS%28fXNd-x{d6F&~?y!!E-x_Nfn*aKzmDLw^Yfyp&
zyzk`8kn1@_^!ypNiEtem|A~|WVAIt83sOf?Y5fXOn`Z;2_xo`_mt|*|DoSUODmaos
zX}%YvsU;T@{courC2xL$@HFfm3D-3n^WBhFxHb7QRolJjB;L^rxj4hDZL1|jpC(Oz
z{sir%03PgWIu>JlpBP~-Qmt??`QT0Yx|`1AI+)K>gj0YXaeo(9KU?5_*Oc@+MZ$on
z?9t>_ePr(hP{~H`!Mb@db3aQZb2~D~FVE^u!%#C(LVaUKaw9FotDNPHqOJ<?6ryS$
z&1F#5SKf2%T*-XA{MID8)7olcse80_rhWdZi|k3#(zPNooo|saOfNBg$%ID>kxFT%
zF@#WdO3GuI$LpSUtD*R`Vh%@FS*dKQl(QqyT=tlmJhxkTq=uM6Puj*|$2u-8-l|^V
zLindQ3^9|lGQy??bn1~WwCMUr@2Utgc4nbdSl6gUD;47B%?-Y~qu7`lOuGFecc?rD
zgYujdY80vj6a*hg!8*CLQmoT=heao(5T;1qy?%6>XgLRn<1rXLYl2aLYg6#m_YBQt
zK|o`DDb;avW_J}9Z}L`t7&e825|jA`_mzkgS#u&!GQwVV3MMmb`O`(%$)lu;Oe0()
z&d0P2#3ar`BHn;<RG*<EE!7GY+1ysJ89HuJAtA699FR!Bhj|6^=bNKQhZ@~Rb_EOK
zled8Xs51gj5Zst_Fv-uUrggr_;^T}O(Zm15u&2{%ke_V26C}QUc@1rIyuDg01r{BZ
zAc0GJf~?i|58rUw0zxC*Boold*=d0D%hR%`bjJ6+9GoXU`G<p92!HeDOmr~#X_K_$
zJZcF7G-_|FyF1KK(qnHbR${m}vC~sZnB=}rdP`?3*3saRE#xgjLsWF?R!3&@#NF6>
zc<kbkhh<Fnbfo&ikRMB3wBjON_K34=UpC#t%#AGE8Eaa&3X65be4zH0iE;o4y_n0G
zy)zM!;7BBJyFjkNR(&!ndnsj^l6td%u*VXpaKBN%(OBoPbG%N%`K_ZUb2sE9d_d7E
zQbby*^ZID<ta;bd`Wy1x{YSq;xI~iWpxu3sp<TW`K5+r1OZQcy&26H;)Luk#id-AA
z%7))awBoCY8jbfFQL>@(4J&y$5w77sAU~ySgweKF$fC=4kq31aR+{j&(53C-)rZxC
z)gV@avNm7GLHB$Zlj_nrLSinYx0i8TmY=nQg^e1K=TRthNLTesBw0NRW7sg--{uMs
z5%iA=rJG=gY%&ZGUlC2(AxmPIx(43NBz_C)4$Uh=8bw%hw%?TL>vtFNa%LShtPW*3
zm+4Cs?3v`&9y+H@6h<T4R?4-ZaaDd=n(?58Xoc5K7PWb7#Qd^sJtHEyoF0Lxi`u|v
zzzAMc0hbCME~kfO#ozI1w<CUDyCGKuF%cJpRJ!o%3){|&Z8*E^>+JOH^XjI6eK+~p
zad->hxa6jmN3J?jN@AB<KYA77(ms6g8jpWqcPUvCPug?0>D>ew-LcO`WHx7WGEO2p
z4nDI|1>Y5XckHzv%r`vx_eqkiWTuRMqbdx86-!2cO1b|y5I!c?zj^49#MNm+SNK5#
z_QRGX20&iT^I&~#=T0Pxb+oF)v&SWyJ5x@2daq@yy59t$A|@9ZF)bznOwW&;IOchz
z3-lt?$)3-wvU1EAR6%03pA`wyb()gJB6QFjCz@WbyLM3Bla1rPIniOULxcJ!_FlCb
z&~ow>6c8W@q;MxdyZ5?p9SUvlb3BW5Uw00-eI1y5@**6}u$5I++Zd&8d^M?tu2A#*
z15K7Py=u1IhQ?Jc)orH4YA??UQM9wZoDMOiFA|1Fpx_5}HV`1QlMnPa7g9`tQwipE
zD0vi6jI~RPY0<=Yyw9OPK8a+s`be_=iD?cmdoQN<W86C*c<5IH=xV2Jl2a~=&*JEp
zWk`T2nIH8)&m*TesyARl4oHFteCI*&sx7FPlWRK^B+FM}`mpBO(1@IHOp~FpS!5+l
zS~A}#(iO&n+H(w8*|`cLPvUf`DuCe$$JC|!4RCC(t?`a(Iz8&-TX{<^KiEl!GKw@W
zE!$mI=HlA8fa#~ZJ!P`gUn92_8Y&s-TAs3FSgaSzcIOGkZr4%*Vd)+BB*`42WDpi=
z_-u?G95IA$k1GTx4+*%z)E%Xw%Ti*Wi)jv5gfQ?cTikmw<?5Zf8A?uFrbjr$P3llP
z3>_kS*I5YNkM=_?!p$G)c?nJQyS(9ucr4O-cMN?UthJi$l|Cl58#gI7d!P(P2HDOZ
zIAvATxr0Z$^9Ixoop`;(PL6h${OT&vUnKQ(-H)z=c0C??A^e>y8yl#XDu(VXz$MGe
zj-58@D*M5URk9m2{WgP#wbvCYy;Zf3^`iN*I-FMswh%R;*qda`;aMb<Vo0|LQO_0)
zCkcUe_)sFvxkryFS}o9+xfMJ{ySsp3d|{-1>PG)&6JsgW<MYoj72pm3o_*fuD=RY^
z(NAlm%YED|c=P(>Gaj*c>fLiT6S6>1C@c{?zNaa$TA>kO<EAwE@>yPTM;^=b>(Q(;
zm5OBMNzAautm*YhBxJV|4a>QTeW9pFqg<9d&jY(6HcIha*IC!X*qB;*&fauQX>YU3
zd|C6gO574jTxuj#vqR+B!J(NpRg%|7`Y5Z2T6L_J)iBg8dOaA)xI9ml@sLII_c9FR
zd}}x(B!xMJU~tg7B(Gbfc<E}Sl~lkaFW;h!zOd-Bh5KXHLe1YdjmBe=suRcu*KonX
zqr{y7kY@@tdWot!Hl}hMtKgWJncw$yJa#zocpp23ZY+z`#^0QC9aE!m5;@KWub|={
z)Nk_@wo7jm?XDPV(71~k*C=3mtOlIqi~C0(mF~v3a1EN=yY+<F1BNFQ^L8$S*lma8
zP9a|~K?VE*l~76=?_+84#~5>jRe}{%3RU>%3zc-i8iQJoqodZgHOKJP?6yx!Krih=
zLXg77^?X?l!YU|=1+p(rLDiu|Hu_|O4MI~uBii=uQT9fE5%K}=uyzT@$mf*ZiqWgJ
zy~`fhfS`@z(aFrll@M$nas8$4_s6wT))&`XRLyyVQrak8Vt4XfZ^5~asnUX-R~Yt^
z?x<d=7mS*)kw|g&&-UtG=i+W#L-tiQu4!$y;}?0iA|we^T>>b*RgKfuX~(2garGcq
zoo-}@E#LlVVdm$LfBktsW3EUPg10m&<hK1|FVUxIt2aJ*7%lXz_@1y0+Eh=B6q*t}
zw~~r3T)&ETztrfxA;5K|(VP;|42=1{d|T<X@ai?U)eod4a2hBDN9yOS(F!L-zAigc
z8D%DZ&ih!_E!|nrA|;h*K$Gips|Q%KI(NP%DXhnLwgq87R)gYdWr;|gB^!+yP;LpV
z+8gyB1M(o0$I3T=Xl3BuUXA*EMUgwuKX1XU{hmfR`MqkU>SptV>pE0=>#m4*U%F{^
zZQEa5j0+EacN}<m+3)i>gYKaU0m-QGXDirz5?{9|VircOxYKH|nxr8-IhH3aT;eJh
z>B{Q!WSg5ipxycb^&lO!Y>gcVbPrGJ*`}(XIY1bWjP*i1>#pdV0~wT4W!fM7BFta`
z4+I?)+$$*nxs{J_mR&26ppswj+!2T}OV1%)aR;SGM)LEDL`*#4N%WjEHJO&~{b1NG
zzyIl_6ji;lvzhRUQCgZ&iWO0B--Ck2G$R@?k38dz%=XAh4v~wxnKwvmjjq<a958|s
zHV!_46*V!#wY|xurDh(yp=wQ8*|Y-2Pp=2{izbW!_H>(S>a-k}p}VT!Y4)*;7|5>+
zGyYIK*(Z#HzO-GNX=%Ql_`B)r@AF>w!!Oi1aV#cTTd8@KKGjQSR_70pb`w#HfB#_6
zhoe&8YEePpSO6p#BojWnU&K~g(UPSK5`8Pm9G5-PakNh6h~pmx6*bsTBqVV`4!f^b
z7h-a(hO((qH;332JRD!k@PrZ@UW~T54{LY5vb=G3Bf9V?C$Y^zZr9#Qesa`$fsu%+
zAZc^26Hk6DpdEvDx=8)RblSW`(}i34$rpKKP%76=MdVkZ6({*w0Mp={!FE-_hwTxr
z+~JQ+XK#&}baFGvca_x4#5p_0|JF|A1d|pXeM=$xv426XRB_!(NUI2ik{mHv?{9#$
zQ@IaHP}yZI+f9nbB*xQa;4wk>;j&wo2opt)cT0px!L)r}apLJ*g61&2NPJYyO=k2v
z(%h0;{98yE=<Zky!qol5-;3w*{SA$9u-2vPARf*8HYeGlexXI$k0gmHwvn(WS1QV$
zgO6GA3U~u8Om!s&S1&)xY_hXjI#mTyl|ht^4Gl}Xn#%{u?aY0=MT$bL%bT&@SmpNp
z)(}B8Rb7+Id&OxIA!1!Ed;S)ArX+$fQ0b)(bzaJ}F4=~Ynk9a+y?q7_qs1s@1Pmxd
zP<;-tWUf8z*7s?k@6Q;BfB}|tlg1Um^`Cd5LAbiuF@Tt(86@VNqH=5@(D0agQO3#D
z-lz75A+rTu%tbkHjFw(f<T9Wa^6pw>5y%hTQ#&%UDe2iQ>476^nVGzvF$(lurpwNN
zv!Eay5007XYc3Ezm|dk_%V;Vc5X@{M&Al$3I)lsYW*XqQyXvOFtt0TFc@-fTpn>IE
zut*Hx;8+ovTdHRVi@B9s&N&S&M~MFZp#!JNN&awLLev{d<wJgjABYo&J$xP5DCO{y
zU59b<9bux>XA}G)M!!eD`CPvOg!UitJD&~xoNB`}@+rH%X?g&TtQSCkLZSQ1n~74k
z)oyw2z+Mdb#}e040UbTO4PCv9FmCdynyeylFDbG71`gegmNDNdLXH+KUosX=TfwRh
z%k|y52z9tC$tQUB;6c(}99jTdAkb6CBkLrx)7&D!*5=)sOfxfEpvm58YD1u|UMEV5
z$3sH+n(8zbd)X%gI>TAI;eI{IIClquDGQxT!X)OSTT6lVY7H}_)=<*FgOU2_8(=4f
z2OF<1)jXB^GamWxag;NgJq`PkeuLNF&LKOY-|X4%6zFep-YC#|Ez@0?(y@X-C9-W6
z5cH3VT=ZK#ZW#K~J&K#)mN^T9F<E1R>n4tpk8Wvgkq^1O8z@nhq;-T?CBY}Pd3b4{
zDK4qXs;+rkrX$kzTg5v%4m<bUjRBiPJhcIrQl2S)`10-|f<7MR>O6H-5sRKoLDkPB
zfGP;?HymqJnt)BUhKDbOjk!B;10m<9=ZI9uw@M$FUHJVbY4#Y!yBTWDANKL%L5jb)
zhI=su?Eu)&&?kW&K%A$3YdSlW5@Dqu&@+CgC=7eayRa%Y%XhVYmxGYQlx#fb5RY^e
z^o}|`g7c_>C;l3jl3A>8jthg0TE9m6Q5`&`FgtA1Y^}hZbECLKe!nX(9S2y}jNu#~
z&&^0z164={SC;Usz@(;&u=@mw{em(@LuY6%+-|~+Pz3#wyw+?j(Je76EM~|;C0!(S
zlP~0CprhAus7tBDD0GfH>mqUkzCdw)wUrbUSegC^e|>mY`@B<9jc1*2-e#LP*ts7H
z5vlzc7G|kKp;6TuW`3b@<wO}CYd%6e4<=86jXXE3^p&qGd-SEDSuB5(HpIS6xqU31
zoYY<L27vh&MmX`spVX!t$AX!bxD2~Gu8jgEF38iX8t7_*;M1-A53Huj`7|@0(4qVi
z!w1Ifbj-b1aZ9nKj~$RK+IE?!>49zM&HAWo!l`)7J`wJl$xUN@20#i|VwUP|XL23r
z9WxQ4fyE{g(AIS|Qu)OWt4;<20V1tXTCA_jJP>4K-%lQ@)3sEaDbURNBzu0gWi1(z
zW>B`ypQP<Sb*xjFJD)BD_AnnkHbYq|<@Y+)VdW&jQNow;PdO$RFWB=UN&UHf{p5}~
z7f1QSG6Kjg6S7f_-Z{CDIc}X0+maj@t268%+t=h{mkSSj-@U!ZUlJTW)(LOlB{SFk
zgvX4qKx}irUJ$Awl2Z~;gM%pFb@@5Ik*w4yyb3o_-|3G~V85D=m^CCXAa5KPQK8jY
zG<)}szR^>2j*y(ul6ByoTHDy1hRA^#YhXwc2#}%T4}Gi*0tc4#ZC72g3fM>9<DY){
z1}H#+-6jisc{T+0r{Mig7+=G4EfQfq4@C9Z1TAGw<M|wHHo^}hrM<}W**!jiu%i+F
zPa~ePf08<K>RsB2^u!Wuf)!aGFldiUCp=x9)Q`Xl^>(U}cw4PR4Oe;67WGU=<ZUC#
zOFj$Fy30p&V>Rs$piu>x#$k#}729ee(dz0hB!Xq3kN~=pIOnNNUzitAwd>IHfPz_u
zwW3Ev9?0KHDC7Z21eyI-Wg4@Tf#B2PiPS}$&}xz?mn^>NNu(S0mP?}OZcRL)>l%R>
z9x239u3-B2;k|1igqD<rn~74Yqu(A&4Gtp*L-uYeW3=xE&_hOo=`sURHe!8L*)QXA
z!_00l0}5}@nQqr`;^b*tizslnL<5I}URTQ?kOM1eo#HJTz!c(?c8d+?@(r?SzuZ|-
zt6wN<&VKAOx!TBpeS!50b*Worgz;A8&0c48K7LzX-zr)s)VMQs5_#0+QxWfK2i5}%
z&SX^7d8lgd3n-6=#<_v7S!~IdMWtP!gv_s$Qyml}-g;`~H`v8U4-{=gV1^&RX=Vfw
zy&5+tOKYhV=C3MkiWhPqlU_5R$xYF|NZmkJ^_u4tR|*##U$Z9N&8z$9vWoq(z3?7U
zlSMgb)XCJI*R;>mg=tIaHNNHVVriAsn3HD7ddR>mkU0{9NLUr+vs~-%Glfo{rbaj~
zus&$ruIEY=wSl5^RTSw3GOuyR1LXA>$YR5kVuPB0fxJGvnyP<<wiaU{N=Ky*WX$<b
z1qd1Xb)cGMAP%xC{73MW%fqIzm$1h|(xSx24>yiol43!;N6VFpvC=e0=|H&{AzYG;
z&RIxnZ11F~PCh4E@hLL;(iW1U;%-Tg_Nhz=B4}KY*Y6uu3GKEV@R);3)-~6m-Sd}g
zQP(zW^gAfD+C8x9;~{}lmaC^}=1_DHzx8_kq?dQqj+m>RUW)2z4ax59X@3>RizGb~
zeHfeL6WPN_ud1Zs!#zYTFg+wSt_$kUNk+n-hA5vb)myS0;X9W=-95Hv_O{zZYttiS
z_?s_z0fO5!rL$$~Zs=h`le6W1nLlr4u9byQ*>c~9%uO$Rt}C%CS}%`dp;q`Cnn}A_
z8mgulJAm<v&y-r0=@_(;ByAw`SWomKP^M3E`35*$>7f|+FHoE~P^b>%X%vZsz#6j&
zBbVMU#Q>>#80d+Gedm;sxXqSg{hrO{Fze7;zYL%jD01X$5?y7h4-B2v8x(2QJ0VaO
zqhl8YsL$X+N$+VF`xPex(5yCfRGGH{G!5o<bRM?to$@v1Tt=qK7@?59)|A=4>mupY
ze#TM}<GdPgvlftvZ&?`!&X`~GXi}XoQMvB%*p$0Z`TFiCzEfOg^8%mDR?bOxUJ)X&
z*$L+H1#S$(phPF}>3NUwVTUUo89+!A=DmN2R+RybWbaQgI3K(!?-6@qFtB(5=|?(H
z23eL9In^$y)VR->7)gVwx4-VU4s7bJWM1s8GwcucTeVwzep4l3Z&L_gw)!%ukV|)$
zPR7gbQ3HZlbB(Qc@{uN;Y|W)rg&M=i&o7KaVrYiuG&D3Jo6sGYuQzM>JQ;-=i&iP?
zK(G0QGB%9kL_@PS=T}8;(Sqx&n3XOFw--r%e*cz+*5L9D{%gSfx%5}`S^N#&;a-OP
zC!vKoc}Tn9U_3y#WoL=EAl_OyE9yTdEnbbzFYXOrQ<=GA_T6>7vmxX^iui)xi0A4L
zADbl0VZWu&Hw@izJ8w%V|J%r(?n(zj<Kt%&PR$bxt;EjF)-JRdST0g|ovg|*J<()+
zzQ<^*y@;&HB|1azD3-w_KWe0{Hb}@}sV<JQRA_HC_<jxvpRsxZvcxIOFw;0TOd6=A
zX<2oXG$?b99?7yHVvOk!Qj{HBsl{5l?e6M5IN0c>`vk{B>j?9kW-H`NjuDEej}ahY
zqjuCWgI(<{=5hJGej@a|hM2_4(dhZ0eVGmUsjkAU(|W?elvj~3j|dztvBQW#3R{k$
z;J0NP>+xGg$N1H`wCKRSN)L%Jn!y^eCxfPWtmh19LcGn^QVPBbduh@%LP|tcr#kXz
z`kVEiPhYrLKAELhW|R{4Zp?87?Mj#Z*0-4%a2oCl`lQ`bnZJ~LuoV7*Q&4tK3b@vB
zdhMj42+l58*K7F|KB8k7?^}=OrZ*=LB4%;)lvbl_)byF{C0%SmTc8qE-ea6S1;L|)
zUfBwEZ)=m5w#TWHg}nrUsot3#OrGEcTkf7K@EUV=dd8iF8j-(x0i1DC|Eov)zTq5O
z8ci&0L=hx?`e^*5mcwy}zG6#S)4R2{b%W)2rV?ATWPBX$*A3KaKxLnmgjb`uTEJfr
zvIMy&Kw@@^ZEmx9h2BrC;-W&`b0S1zm6>-;A`Da2D~nvf1Xbq2Lqy<|ERmvH>Pcy)
z?R9qq0|)|oCpoCxD9)3t%x6!nTpA&CykwzyT1AMIZL;$#Q$e@xv+rH3cd!<7KyRVR
z7e0h67W*fIq}~IO<5AtU$)X~Q@$iBUVQ62wqS**5OOKM(V)Ua<rsnNcSadq~8g;7G
zs1zCNxek-$v{Ui~E-)GVb%E(QDS+&*)aPsK7f{_1im$~ywfAFm4EUt9$MdeUF060w
zPAk-SR{e!(9(#q$hPj2?$~<?X?6eB1Rt;@Jzx0wdG_<XBK4|k>&<cBSf8tEy<PVPc
z^B<*f@mxiBtZPQb-eGBZ-JvW|0@gRsxtDiuxc-9z(Jj|+WOuzdwRN{1?0(iQU5@}~
z&iS*@H5`i4BQ8-K98bWu;V18#+r7y0tpgX58@&{WYCR>0v=OG9G3<I%t1u6gsnNFp
zu_$XD2pixx8BY}L#fI)<pGDdc6uCyd={?eQA8gtH7tDf3Ps`M^GheY_Umai%>(3ug
z)t;uE^A}3m4>g>NWL)=%^;elc#V#v=P8dVJ`K40lI`u@P+Q%o$3M@#2jKnU0g$GAC
zwWO8s(uxBCeZw)0!H&K;wx66ZRn*ixNjH9b>JpCtmK!H;VLEC^RCI~a*m3J=YR}U>
zyfvr+E{*~&$Xv_gT%jVWN8s-N&AnE-gp<58<&f4UNi?-S0Yfix$ap4H1<gfax5KNP
z-;G=YDkMDh=|;IeIsl3x(y4L$5AC6~kEv<lMbs{rsN}_TG`$gv_3>lB-ng_77`9Tx
zVUuEyki@@TSG1AYl6Twgv-U|ue&bpJ(k`M+y)~&^dhlgSgyTi*vGz)>xetR$9r<XM
zY9=3Glt7_#jF?svubd2^7{~Z{V~P=|1&FCU-@s%i-)CpQCRuyab5T$}6d5si)w!4R
zv;-*I+ef=Li(^<>ORl^$Hn|wwWLwNS9F2KRV$ABTh<Hn^O2A~;C}p<wo?r`o<sw=Z
zM4_NmqG<swOeKszHA<ZB*lNS0Y=&FKZ(cZ+8lk5ba%jE6AE!Jw(zaE(|1fhLY!(0X
z%Kr_EfADx;22L_vtWG3>`O1wd2U$CanWJ^Mm!qIbK-D@uP&Q9LnafC@dFRa#nk$#%
zv<!d%0;ZLh`v7TAVpC|Bx?94#2^Zm*j9-|o#|CHUd5IsR&ckKNEKks*nZ%%v?lPht
zbN>6N+C$Zndl1(=Y7W;#pkVW~C^GgS@k>k6C&RH8^iGX_-)gZ$ME4H0S`sc~eAY!w
z{hLO!sMMUvvBt;F_>D^3+I%yCidhrQliCDHqDZ=ig@>~W9&h+jWi93jSpoccJ8s_`
z(VwOZyi8gG1W54x+x}2PL%;Li`xx0yi<EgTi7LxkA1v(GwpbFJ!fsq?uVVdpyyh>M
z`(KZZT>R9Jt9?4)_|zuw=5UCXONlR7=c^<BF@Bbj_lF`Qnu(CV<s^4X(~+d5OWo!m
zlOp$2oSoE9_1^9K20lL4n`&4gD%y3!w?bobw`1}jbzVM@mS(S4XfgOmBL7N8bh|3R
zc{uU(ieg_rPVJM#ky8JfXTWIqhgDLh*)!L4y+u{wW_jH#l-~mV_xcA4CAl-M{OxL{
zHUQU_a*9@8rpKjU$1W#$n@^;Xzw4r$P+AwIVIIWW{R=R0+ACkR2Ss}AjD5WNQo$el
ztV3K0{`YNT@<%V8wcKNo@91zYQ6<~!C^Vu@xbx6hb2(9_U@Z7Sk@?pv+~tI}*8Mjx
z50?D5>b~zcfFuW}B(IpZA})KY7<`}|g-FC&{ati|)SiCN1+VMgoxly9qkKWTD@mQd
zah>b5%I+r-rT>0sX6rL_eNItgFj{4ZrvDy^GTiL>451dg>5=jhDyEDU^xyvvv^WON
zvxb^Ce%Sv`bbO^99Y%uEm-(+1QrB@4A44@fB;>|%1z~)+OYaA!?4^!jrR$3R=l+-F
zFaG7S1bupWnC*_X#M-`vgl=(e#C%xjbSRHbA$LiO9N(1?n}WjqcgT-w6wS02<AZ-+
zF8bp-x}naP6Yb5#r+c@WUyl4;Y?<xT6t3PvFtI)H<Z#wo)>LkkXUm@ek2#$EY`xn*
zRLGyITD;=IM-!zuuGdi1`2aqA)Y1yNOG($Yf?h}22y77!GW=wocSWd5BFXkyx2*AA
z|7B|9<SRXY;mT(bxH)RP1K(6a&4!!?S@KH?eB~Pt0^6R2G+OGUE3f%260Yo-ldI*Y
z*t*rKSy2OHpgnk)CzfF1H0hf8Z31LMScaPZ&<Bc!j3Kwkxp>wNYxKTME>A%0AZ@$k
zi#p~dSnT6Dd2;8T%E98Vm?duOzJts*`w%UX?nN;fL;J;7&R~viR)kxl`dm6WubPJn
zZ+6L}Ii?U<Zc<B$=r`qC8u%W1qX}XdqeDh+YIwEu+&(!ili@LC_TKJ;FF+t?DoQVJ
zl_SP(tFZFWF?-@mjh9PyA$cCgoYA>}u&-GE(O1~bzG8T!DK|FD&dEfVhM9R!r>dc<
zyM+?G!?Q+4vA068Z+4{eC7EpRC^r^nrca@4J>&4LydUU%JH~^3+g8i7X6a#wU(29q
zIRf7=;$GV=q?S1lUIWJed~I4=HEQs!R~X&@JW38C53?9KV6%q1su|KPC{37`Pg%Zc
zuh<;ZC`&OgoYH287sCS;Eh*Dq7FETp9e!wYA}6yPv2++oo769I>Il)DJR0?;&$6oj
z0*S)j5Q#)3PDe4hK^7UebF=@;3IFz2F5`=R+qd0`wvxA2ooT|3j|DW=yw24^ZfmbK
zgxUtgH`9+Z)LQTBgghy6$jh+0)qjxbUn8~a&rkBVEc96<DW-V9L`ys(>w;-$dx+~?
zMN_^%%>OK1e~yvd)T}P+I=fn<N;PLOHoB>lt)cLx*jx3Q-ptDFdonz|0v0!uv_6S{
zf%1KL`ny$ac;VOn;u6ZDa9Fj@oFzGjWfZeYd2<|jySd|Aa_uSS$6I&Ec|(c0*$1*E
z-GZoFptRboFPlO^b0rk%s0TW0J#qA$BO!xTW|V0qGG78kPQ3D7BEGcldE~W`@nF+7
z1wu@@yG|+LX5Ok{z^%qBNz<OX9E=ZJCr3_%%QxIk3Bj5>?-CL^H8j#F&ZuoxPyfMF
z<BPR#qtcvJh;_)b)?{rm^EH*!nZ-DF+20O)+c-nsk;eiN6yDIV8)N)|?y}_QsXlDT
zR>gI4XQ;&iWklTeQIsgQs%<`T_;G-=kO>m2LUU61629moH{mMsw%0I(U>;8HVR0m7
zs2*ZFs>(vptvcp*qB?gY#6G#U5}{GQII=y_mdMt-`gwzA%?aLMbE(_8#=crA))yQy
zwjU_vT0i`f*EtSeQQi~KE2v;9Te9S>+1RuLHc>KP%Y!a|S48o*GWc6`W_$Q#V<Z=m
z*(g1T&lRbYJ^#vHU*t!00;z?HRYhO@-PXOFwn{!r#zJ3p6Vp-5mDfF}H)>xpl|?=;
zH4FY_HuMeRY-WkpmI!>39zz|Hr}P)oWOOqXjCELZV&D5t%nMHP-CF&fnYn_0i4f`U
zm0hAKNlP4HjsdK0eC(b{uQeyTDsMl&_fRoKvTXEQ-f9iQ*k|~8<r7d^oi!61+1pI*
z^S41RLB91R<GNq0cQ}?*exJ-jtq+dqeX-`{P_@{kyxF}<yr8|TW2t-MBr+{PKBL%r
zkDDN2v)THa*rPdJ@@}3hB(jvn^S;nW_;-;@W=Zq`BOwPdb@P^QyGyh-lWmPjhV}iI
zX^~??18}L9_uIK2p);x=CwIN><tv!X!E=B}DhjiH7CDx^T?}8JvKZkQlOe&x&0Qtm
zBR?VtaRPLyM&qJHQg@u-2imLnPgfD#dV|Fo3AX0tOGd6oP8AL9(j4;f0-!BC!R$y%
zNdAToMcW22r}5ky`oC1o-^X%>XK$YOsV!H;ylR^UyX^URqzn~%q;xSkZ?yB#lIv0B
zt`Zg?EHr7>A*eOcsK+3`D>8#3cNZpU$b^F@{ETS4+$D=ko3>O7sxZjU3%R!M3gNJH
zEz;|dha`U=;5ZKL;knXJk}@e1VdVk!X*UBVNNc7F$Y(PAL<{oDgN;&T!oopfq)Uk?
z1T|Yp%jA126Bb%l2gk<9pfw?ts);OR1$&udAjizYqciz5%qdceo?)-9bUsJSGAc6h
zV-Kt~)R?cEj0NC7v+`AyuelK*i20lYPnWy`-d=rK;dt6hXXRF}J1+xdqUj(PFePw-
z1Y^jgOg$JB+|uqP?d0APx_32yjrHkF4{vvuKKCT)<o(dqJ=u@4JYCWXcmx9xAW@op
ze+}wYc;dG^rk1Vn&(Ud42``Po*-*8F8G&1NT0ow9n@>&t|Frj=VNGRS+hZF<1&jzt
zR}c{Cy@Q3KbOfo9-h1zmVHBhVq}NcTcj+}Ky@VnqKnzh@=tv1IK;AeG%sAicT+jFY
z{T_bh;^buSv-Z8#y4PA~?*;KN->WrOPN`BPRUar47U>A;zjnVG>PYkb?fm~!N&Z}o
zfDopk>^z?r?W`b$6;X!e?7~%tg%mm4a;O49AKUx5{;iZ9r<LGC$2jDknmw<AJ1QSL
z%${U^cOKE8F0$9A?Hq~(U1#T72}P2Ua#ru;=gRDhYoBdTP1ZHM`;=UX=-JGiWC0>F
z#IjO%g}vxlm8enYlX<zY@~k;Nv91z`+fdE%JAdrM>ttk^QZl8qI=Qy%8Qb|}cNlq}
z)5L9HGGHQa0+C1XN($y*GM>~SNkwflL4?$^rYYVeC2EWk14h6@m%jA&A2RrT5;RU>
zx;7~?CFrxgs|L2=U(UQpN<ue$?pO+ZEU;jcT3n%w*FaLV9dop;PS#Zxt0ui!6Do6S
zhnWxaeY$G*D9wx0F9^oVHc1+_$-~!17_uaZvb9#$lp0`H_+{GC66CCbcpu;n>*p+-
za3jGtwI?j8G<cw$UP46(7P9nx&baJV@nFSnJlLBJvsJCek?^`Y+rv_Eq}-*_W($9u
zDA=&VW>=lar#Rsw-L3AThEF+8uR6a_p@#MwgUSG$yWR^yY<w%#p0E&6S9XnqhEf93
zXa1B^bU?crwCUGRoxWZu_3?pJ581t}0`(hxFPsJ(X)MOONtRzh?PaIQeEx;MJvhD`
z>k{U293CHq&-HF!O{H)OO|eEg2z=-v|B|~agc<n|*KiWtnVsp~$g-BB3OO!0rU4t5
zg}+|jb$N`9tDGVowuA>aI2YPVQ9sV~xll~fSo?XU=K6hH_KH3plr~FhN@2n3nP(gw
zzlpwaDg9YmtNYX{lreca$Blkin{L89{Wk{(D*v1c)i(P_BrUAM(r~GqO&cOM1wNy`
z3seRbfe-5C5^z-7gPc?Tn)fFyEF-s0X!~3SB0e`alv}tnZBLX$x(KZ1t{HkMYq8r8
z*Qw4DgAE_`-hEMPV?@aQ{@v@+38jNCTP7v@D&Y66K^ATYSBO&7-YPJ9B3<2pq0I&2
zGHoO@eifVs*_&KoVsO6kpq39Qg>Rc{eW^i~Q%aDRSs2s()!a?v8isA{F!Y0LgY@e<
zlSLGkeetR`KFahZ6?at~3`lmHs#JRF9?r_)>pGLhgLXaxQnBCiFIP(tHqlJY+FM%X
z6Dz^sj<h{|PQEmXgL>t6KaLYVy(q6HHF~Yr%Y_x*F9IRqXt-?PV~&X<UF4P;QRBd*
zF}!^M*3z=p&_C22Z|C~OhRVW>ZFz3kU&QrY5R;o}ShFmo$f(TV%jUpvgNKcFieW*A
zjq+DdwLZ|OJeJO+EJkyc!dP%T#WqLUDhO>FqFy|`;Ps-+cA0rf`6EisU)hJqUxjM(
z;ZBOXb-P$4<Eh#F&Z++_BY_#NABCx9hbK9G164NJ=W2)VyP3zx%CXF=bP<mqBU@D9
z8zbe|gjI)=_r`m{sRfxJMi|?!hcdR&N~2zzZEd>Ju;N4l>|j}Z**3AH&tSCNr2PVB
zp0hO4(ldOV1Fl{C#d=~h?c=??31Rn<{W{sNlB6qL>rBEg+Tvf%Q@$WXnOy&*&lYK|
zcX2QPzhU6J>B=a$B)j#}QvKykk@^u=Jqt^0bA$hzPQ!f3Mr#K;a?7SdBcaA$`}SPt
zt-+sRP4Cw*<-jl(Nro9|=vy}uxlw|mv>~#}>J-dNGi67nO54iE?T;`CONZ#kZj;Xq
zC{~G7pS4Zjim*7SraOXOx%Qc-1Q#beN<FGqeRB2Eu8-=9%ILn1CAOyFBvSPSN7@Cj
zuiqn}Sy_vMUJJ|UA=3*`S!bcDpCg8%!T6~(h)`q71)*<1Of;(V=w<VJ5^|rH_~z&U
zU4MNzG|NdQ-#@MNHL1qH)oRMfvCWgI{7w;6He`Y;Z`DUeqh6m)s$>&AB&(oWAsze(
z8<-q<u)OB|YLHx{ce25b1)TlZj(7-T^T-WvGYDyUEiix=*u>5U8RJK7@Z05Ho)wg7
zF>Ne-@mF0MHSOY_+*Kw=iRed;XwI9A^}%K0RKnIzgQ1`KiHw1Zl9{vh1|}mzb<-Iu
z?ozFx{j)ZQb&Z|+RDmaw)_bgGQqpP^F1;#lup4T|BGr#Uh?6(t68W2jo#s^Tsl%U0
zZpC?KrIRM6ZFZcbIdlxzp?02m;hNM_n%^2}l33a1Y?6ZusMI>v(aYeeHw8s;UE10=
zMK}b{Vn-MwcQXBx6uHbMJ`v%pNug?c?kq_l2Bo_x1715%EjT_OX){sd&4K4*UYRkq
zwg-KW-2yco!Y8;zG%OsVSHa5V`wh>OXL@9&2JA1^rW|)=YyaNQ%0%oNXO9EsgCXfm
zR5#f`VH1`PZ`#c8E&#uvA2j2NJ!JT6sE@*!@=QJIH7QO?H+O9VB?sUWdb0*e_$E)o
zwlB>FKg$Q0Dfm_x04KP9O&j*qHK~woj`QB1$`1-T-pU<??cnn4=!QH^KL56x?D|_L
zM?k}-?#@o@?s196L4csJ=xDdU2sx+8aWbi-z9Vv=qf_{LF}q8>PFjpv+In{~Y(=*c
zLK=CT&>~7(!lfG0q*V~*mQtC(<&S789dl`_OOR)3*5DmZ+k$)bM2u@#m$M{o{fdpY
z$tzWoI9$wTt>4OjW&jfLtNfP2;eYd>F``<=MCk(9t2ijML%ZkWPfv*Y4XNi;*EvS@
z<u2LCBrj<Vuy_JB5{B&BDYl0K+in<t=s2M(SHZ_Z{x3Nl-Wr9l&Bwy4hO?;}$*r2y
zqwE3&Cx!R=q%#HGNu~*@;AGZs3d-;cs}n0MA$XP;26N#LJo17&ve}sf*u&g9>~C=8
zO^^&-<zIz8Di5C{*!_HBQV%S0nSQ8ZYy-CA+YFW5wTV@am*sya#Q%QF{6z&`hv!uq
zwl)l90_NRxaem;yOEr;h)yP=;O5K}mgEF?hvMcN0d=kw`|M>13ipGg#wi$iZp2YlH
z7(%DGX6uD7LMQQ;jiKfDYLB$3TO>w3EnW;V=TD?;I*Q1$9rCy2R!BDSR8D6{zLyR@
z59@69Zh{IHo^woa--7dsiM~J_^Nj8`H|UcP-uGA(UMB%_SdW$J><i=xjFyv^iDW)r
zDCUG$I}b2f<n_z*iL+!66y0~*R@s<-F4c!li4dkACT>e#R*y3=6?39Nmnbt-jE;qR
zKDv8+H^tVbv`TsGiWMA}+sEwiN!MU)716{TauD5M^nOn8MGe|=*m*GtNpaZ1r()2p
zen_i93OssSh9%)x>9=W#k$0%nnV^x1o6hZHGcW)hJw_p_hHN&z@{ov_tPvQuROW3E
zW9&?}{opsOJ|5TCq)o=JBP#9uoyVpyS_vb#rqVdQCm$e8FE`&(#Kb0SsdMnCbZxLm
zC@nr^a`k$l$2+;4^Q*T&0gJ$!RGxaF>N*>0>}2uV!LK3Zcz-d=`t^>){<qiZCkDew
z>kWCL$|TW0E0~SSwyuv%sLXm$39>z5GAj|3RGuX<TXk)!SsL{;ptP%0@-Mqug91;6
z^3t#E;Z4x*Of}ME)LrpZY*~Wl%d2#+Ed*Mgy;X7?S!potYQCebV_L6CH&%YnnacL;
z713mMd|GU)bmbuc#u=8+rfDX%qn8zmq)p<$q-H!TauJMSO6qQi^{x^U6>NFTNY(ks
zP0iut<OV))#5_aRm21D``Q+|H=?A45?!2KPPuAEo8Vz73v--vl7!A@HTxEn{<0WgE
z<nA#brVju208RwK{Z-~gEdTV){K^hee#z2J{1Li&%Hn0imS*2j^aZnWMYLWrYpn&~
zHmg=$-2Zaw8e=?>VHs-jWAuw|@$ne}7T507=U9FIj~}lM_C%%m^c?eJ--8cM?kde(
z21Rn#I}lM>N(P&k#U}Y?qQq;RXbAhC<X8zMNAUuG@ALjJX}1<bnoeu4Zu%aBWOU^h
zYzrqphM&-{^Ht)PyHq7il-EXgg{Fiv>6ck%UFDB-xDkJ<>KJmks0vp|N=b^XRB`89
z2_pz1VaYTJbhs=uy}!03*rqwm3mR@9b?e7jJ>?T^aovhsf3wp;cz1KZU`Md1c8(4i
zzcsfa1oGKxlM#c1x5?cGNA5sD_YN%XiLgN0W0a?&-mcg@dqWF%5xRSMse!ErI$dsf
zxD|cu(fqDpwq|3ou_ZzwCQ@8!Cgh-?j(N+nH>MQ3<c3(V@>Y?zSl&#nF~xo^vf&TO
zE7QQ)${B`VTD5pjZ`ouL9yxLuX)=6SD}?8Np~5ELj(Rm2=J5M=jp<_A*c#TxS9R7D
z?8hH;ayjoRVE$<dw!V{zs?0i8ef7fpbeRz$X`c1P6Y~O_`uRIAj?-j!TVBDPg%W^1
zkMx?<XCD!XaWz66AGDo#CUUsA8p*gb%?z&tgw@(kIz8j@?oVx=siX?(F!9>HB*I!X
zqx26{I$qH@vw$$Rp(lj*yH?C5sy-+fNi~&zyaj7<@2=lf$!EV>lAt?Ajrf@K1@SA@
z`Yr9!+754F3m+F_Aqt(<3?uzr%awUa524|jFI&L6tnP;DYE$CImzAoNYuiPt&_Rl+
z>-n`pu3z5q1Jw+X)8ga`ss$Y`8RYQ1)}=_jY6(!=e2i{|S>3x27)R)CUr-;zVfn=X
zMn0ny`r&To;t4L>>@@Z9(zISrkex<b)~7Ocv3Ni0P(R&ne!ZU9{4&UDjjm*T(-$%B
zM91Id_bHseQIKma4x0r{$y;p6Uqp0HCN(E`V>}((@8p$DcDb2^@rxMs+o$dNfgt9t
zO}VIg6Ytc-eSaYIrHy|$XCA3(Y8_uw>eIxhEKg&-p0$`9KL>iliKYeva>8c2LUG*y
zWFXA{Ml+L!nCp6G@5iDR3a%m$YSW@Fq7w5}#I)GCd*_3xg8MLFqu=&Lg&gD#T4@<a
zwgR#=ShHEgWg@G1kv@lGObFS$#!Zvmr_cJrFm{MkX+)!2nSrVI)4{!L3RBN%dKST0
znCbKPQmp$SlwFpvvX2;<lVM6{Vvml4IN}j;47P+%yf1<Ysr?Q-mRGxCEAGvk+@4J+
z!GK1UXKeQ}yvP%xZOZpS*Uc`{OYqk-zqws?CzIt(yGi9QW>ZU14lPRv_4RvWxD;rG
z^>S>_=3{A6Pj)XkqmNI;cp%`eXpfJ_OM+a@sZSijY<`cl*Qu|wk<S6`-xHvzL0_RM
zE&{<VVI^sAY?RELS_^FUJZK!YUNuFh^5fsqm3aUvoy@&Ydv>nRsKy^B6IUZY$o+K=
z8U7s16oGc*_)c*m>@Oo=fz`b|F3Sm2QQElY1CzJ&Bn{btNRT;{6a_TN4Px?`kWk*U
zd4;9#L0N2!_!om3Rg4_K&#Bz?+4$4O?j3ybn(FLjqJoYKYnV_FQJ488Rvi0H&*&2=
z2vS-V1j<#j%X&xrb&@Q9>VtZr19)4A-$Q~5t-uTQj}Qow?(9i8aed%1>HWS(EAz!u
zL_NE|-qEWW4I~Dq*~-vLM0ANDwl^86<ZLFYr=OTqF)cIO`#jdPueckb#h-Q9v&%Du
z)~ZSWI-Be$leAJ6KUu0#sVh#91uXLOlZRN8)ape)*ATf|`0n?Hwa0d#qLpzpVdml2
zWqVyCr|6*L4>Q(a1M5#2YWdo~hxwIgHUf)i^?|Z^v&YqHo!1ovD-6V+KU<{`-wjK*
zE#NoeYc!~)XA>bQYkzOEnXGPmSs~Ppy`lt9wV9Nic2XP;cSn7y5<CXPOxA$SYU_$h
zg2J9V%|Sknr45^UF_FY@a-h^!=tODbIu%=;MAiGK_k%02-H~V>mW5mzqzb7%P$_8B
zRVPM^Mu<|OK55=HI-GP{pkOz1UMNAJF^J`ap5|DlcvfdiQIt}a)UGMhY)vJGct<$?
ztxZ?W-PC?TCwO3xuHhJHdJjUtZ*(UypA_DZc`?oPyn<s&Ho}(O|J{SvB4Y!pQX+0<
zrLkks;5_tt{N#L)xnN~)Q1u4|tG(VvxJIP5&aBp)Agq|MB_0iX!zV84$Dlo2Mmqe8
z1luOllhwmjDNr!4ja0ymL@zNmr@a_p-hAsrZJk%y&OS6I_VKyQ$<nH8tq0BC;J9@9
z-eV?-tC6KPTM=Yfc(p}doGXN`4KpwzfE9Vk>wZ3d0wwB>6w(+}GRrhjk%Dt^V_(vq
zm59E@eay+fab3lLcdba*szeO%v7n*G`3Uy36Qw1p=k%-JcHL{pwMy2l?2RoI8gj)i
z8uN_LyGgW4$$4GJ>LI0gd3sB)tm|KmwAQ{UeopYLlmxq*A>^gjdwmgXX(Du0OZP*1
zx&5BwBoTkLXPfGhwvFFF{M;rHn1hYhGGsTtnVxgE;9^`X*_3CDynFsL#5!O%d+$U1
z^LMrP$*qCkxxgd)oaI`MF~e5~G`>paRpKMvCTUZ`XY5mF-Jm%EMv~*p9w%z)8cyNk
zP`T_WrXwnyd>lUXyT@NDznq};Q5(@hW9<O%Z3*f-_9j`=uO5qHHkT&Kz!OTfF4#B+
zj5GZ6x4@23cr9w}o#*>lJ-5Nf#5#5BcJ@H>B~mYF6ebp$uxkeW#0~{|X)F)3^`X!~
zmqylX;Kqu*N>_7%IkbA=PwS1|$8K15k()dlFQpdFt3)0o6%mSGU==ctjIW#R{MBT@
z@lhnHg3ftm(M-7!s!y@Z`<{_;0=EYnr(KYwug$wF=4h10ovI>TA4RrH=+(TU(jwJ)
z_f%-S)1$A!T=aR5Lg~Jc4sN`b`L*_=Hw$t5k3&S0Z<`)t;}iK~+4-wrn|<<vd#<T%
zJ=A6p$K2tea5knr?!GTe`tmi?Lm5XG=`h?$SHd<L<NRJ|PDsK2%>8Eo&vi+*zdkKV
z6EWbAB>z}_s7TBH^gfYu$1Udbm%|%>W6oQ=MIM0_zV$7g!5whPeErB@2e;Msv35C$
zu(-A0%mZIe#BTzgmmqnU4xq6`X0oXX*%KgMCFH$gb8Su*+LLvankm#x+pcwuwb0;x
zrodhQVwMVDo(ql3w=snen1|l0y%vus?_KM}nz!Ih4%L^#@Xwn4S%Vkz7RRF;!W;C{
z-1i=X6$B&;HgSkX3T~ak!v~*{E?g83IX5!0TIbC)q*d>9l96@fQ=>>8z!S+_#|9m;
zJCSww&tI4+{hTZfTgcQIcbmQHn_&-32WEW*XtYy=T)BUq!CIB>cOYHu9*(}@5pt_v
zaj)MMRhov{#E!TrLkdOFS`v>VN1=l}fXL8$81Mo|@1~lV@XRDY%b~(zMXP3auv7l>
zHD{7#_~Z=ro=I03Lyfz$yq_;y#S30aG}MMb_nc8$h;o~cb6Nqo^nhM@+Gt(Rna*bl
ztdg?S-^5C{FJ*e5MQxiCId9NKDSw_Ky~W1Aly=er0cKhA#UGAka+m4GD`&Eb@Tl07
z(vx+$@r`;MXRWPnWo~0z|5SA?&Ks8e1hup?lOEL?gX?RDh#AYItt;NexSB6$E*`jf
zz#b5~*6!Nlc59I9PCWb75OF!?;eMt?4MfTYKAMm7T)1K1U?b?!fD;~i)reg2flRY3
zM1vN1jf|@y!d)+lR_6}Z<rhOpM4gb+pAI&r;C9S9#I7Qc%hwaJ<q4oY;^cO4IKFSm
z_SGFxRAaTvh!1+hkmP%-67G=mNpEk=_DaY3KL4XV$z5)~gv5`N^B9lj_y$YH2fzIi
zz;n4B#6N}JWO21W+=PoJK(@DdfJ01Y%|MuS4A<_G*k9@t9IB-pbb%aJBv2gM2G<xx
z(`?<u&PiD_(OW`B9k^)VsoQrNMy#1JFvV;2{4*)XaL}@Y0cDxa*8BOrxz6+RQy*HI
zNZQ2bT%aFB;t?7k5XT-cThyOsBcQ|!^y~4lmOQ&&#DQ7hO9OH{kbxzAE^Cb10$Y8*
zC3B(aDqz#6dV;S8&Gx!0Voy%oyoGowfxX;==3wnk#J~yW+2LfthQsWxus^zL*lfA$
zlggESXrve>H+!~XSG@tUej#3TY5F%dC3ehF1Y<c(Kp4tR+rI_|Vu^eU-0wtgZ?E~t
zI&TRtb&`4<**A%sXrMjx&tLRc|NQ$TEw0U>#`+FkSbsKvF{~!sA$_iisDdS&{wqV~
zd?Wo%@xZQ+czGpqeQyJZ>EJ8Q`k3Wok$>7~MeXvn!{2uAl}fCNfTV~g%!6NU8Ljy~
zVe4~_fXyn9#5x4;DmbIB=Okq~zo1E(O>O6m5JA_@Lrb_1@a5Dv06n;_pd4yFRXvs8
zEC>yN?p*hnetpC|^-%sKoPFmPP?|~oM%ly83W3A$Sqbh4EMGdZ;lQ1LDuqpp-<{;e
zvHq7}-^l0CvMY^RRZA95HG3FFm7_PoEOIooPTJCy;-XZ<;C*kquKJ4OFkQbKL8;1T
zEY|P6c0+lH@$Cfdc3|yU-mJ%~qkFYZJ~U95sl|7?ou3e%9}g=<<<c=xU9^X9DcXAh
zEFPlB<DfL(G82K6%wd`b_>BO!wVGDQrnGEGBgeeau#{7woeA{&2X^Liq&u>i9@|i_
zk4zGPG}CSC7Ui>>o1eBBG3>oG6`S!c^|e!ii*3s#c@h}i%{6-JnPVq>w56AgQ=!H@
zEsQW3An1DKVWlX#F*eJ1Qa`L;C`ju6PDN7uIhRzS)**uAUKI=;OA=L<<X{2y!;4Ht
z89L@3`G?42e)G6_vzDzYE}8hA0B3edY{u3QQ>p_>c%ddZUAjME(LHj)0~f5g-YA@<
z;T)J1&c_fD+KqpEn{k;bMthm?K1dg3<@5ABmc&WpWWf5wo~=ZXx1E*c-3lI<igWEm
zh0Kn-JRbJqjoZ_&p*uzN+EBOyJqoha!ZUreSA@~Ft=z%-SD37_JTP>-I)_d@y0()M
zOt5Rf_wXPAkTj8!9w14(84_$u{dNvo8yPuKFn$m{F|A$1Rh0zYi?DLtcOD%qtDZ+!
zQ^=YAev(<eYt4t&z%^kZ`Jlon%Cf=8I+c=>;fWRo1}Py)iJ3safzXB@BX0$^FnB}G
zmu&_leeF=^EfF~nZQxH<>AH4kYi5_<NkP50JigKCoEOA~^KgaM{^aQ~PmFb$)@d)M
zfU(;{xfkO~yrl5!bkcQeC|%~0_~+Jjc{0?y?Sz_QWeqzT7s+?rvm`Qz+dZsai!oQa
zgis^tV9A4W>*nKmH7SXi_~4FS1@aZ{4s6#Z-7Wa>q(+^J5BZe5QY0IGb=H!FceMaw
zqXV0-CZ@$^dUX+xQbclr9d{L&3+67oGzj+8k&ags^ipmQ3ih?!%mW<!GGmOon1&YS
znHj_{7=5&crsi!B$$T3m3>nk34C)2(_p(r9tnwpfJqGxNOF~6}N2P7{ks&9rb0F7A
zpY_@<KYdBQ5g)%fE2PbEkB5&3=ad`){)s;^y(x-m7GWTLy0!hRa?Tn|nRM^4DVUmJ
znkou(fpgcRy)A1HB|F+Dmc?uNd+*8co*m|;36=wZhEM6=M2N&6$orG`=l2qrWXr48
zJJ>>@Q^Oz_^nDGu8`?#@R6AxMD9tpRAFL(odv~T9pe4<&!HhJ{tU$>9Lx;{<E3x0@
z787$BgpqhYZ&oPHdiNzihTd}tB~dhOd!~PeWQ4JF?K(0e^qz%MA!%Ct-ss2C2ZjT;
z&B_*INf?FtCSgH@g^?x$2jyCozLk^QDt>#GAS)vS|3-)ABZm9vF=}HmdfC2<*&Xuw
zCPO|=6ZKPdAiO<yLZ+`=75Y3JSYtjiF#Pp*^Vut-+tlSNX4w3u=IQcocjhP!Ce-%9
z_1VFqAWzs_$}vMBYV!cQ3;?{0UAY30_YbsB4!Yj$a2K@x%P~o~B(8LL1Fkmc;GAB=
z8-WrTF&D&a*B?O$>_s=)X{Y_$)`Zq->nji3Ao^UmW*&znj5)rGQIQG3x$^=6N8z~U
zU`5$7Nv<(>3L%fs@eu~jU~P6Uk0BRwE`GyV=TbFMLDHu!H`L^nt%zA8r9>Gx=sta#
z%ugsCTYR^=EL>W#`cU-TPnv%v3I@oz8Th$+z#6bZ_+3Qj<kq&kC#i`oYl_aATo)6c
zBXPdwpRRy1Oh?`FnPAAPvslFy1=~>{JIlP}5}aZ#5nUEB&`<ojadRqJ$?5>gu$755
zpDdlcj@cud+M4Fr;eLK%n@Xo<FrVd^T;M>3@wt7j%L(MKU8g=6Uh^zkW6aIYc5cvq
z$xjobv%Het9*{y!fcDg%uw;9_J+V;8S^u%sU|cl)1)@~Y1+;|Do=Pu62BYEBhlqn;
z^4@KocL-HquV}U6b6SD`Y^eMOLJm2=HC6@R6roDHq|gf}-iYQvL&|5Dgiub176`0a
zok4BB^G2&;8RNs8i`ig&>!jAO?C6m9NsjMgoh@l%dpfb~^PdSNnPVKrDLUb7RnL`@
zZbCpKl5efQ{=Kd34{|NO5uh!RgGdo6;78ckit&vswNzB8P@jIMbqC)-obrec#NXgw
zI_!teN7r@^)KYa_Ffs7Ix~9&MJWn({<olp`(!vHVCxKSDLh5T(CDA;jFF<>X8Z9SY
zZEe{UPsi`$uwzxKU7=U?;@0MB=cW!)U`8F#WnmVVpavy~#>aZ-JSPYk>{G8aDn-5d
zRn|w7Bx2vUefu5n?_2o#OygCwQkX3llPr>PN+npBF-u31h6p}N<>CfjXhV>IYZQT#
zwc}+iNhEh$7-#lYz!iRW(kK0_Y+PRvPQ77_#9XPO^xiNKRJ@>KLrV>0mu;6^IJ*zw
zQ_?T8=cItGyU*6Ml%^kJXj2%~m~K4vfti*}(-skTydu1QOZGNSvTsl<CmJ^m19h1*
ze0|KIe?7`i;c4bx$nMcmT*$Uw(VWFt!cKP8yD6+Swi4CI5X1^1g^U<ugMm6*lmM$y
z_D1Q)X=0?1T20F8j`8G#l^@ze|7G_BLdd$u)J2i&FBhl{?-m>dJl9wl8CL$&<Rsb$
zRF6nBlcghzX2May6S?jSZWR!eBQ7*J43FQhdW>5EvvRU?kLO{|n*6Dgwy8sh_s~aU
ziQ8^~B=%=<zU51^fci(Ratx)R-2_lUX>pbrLI59NJMN<pr3Hg3bITlNdE<aKD~%+6
z-mmX<$<~)5n!YH2kMunc!tgDLH(Gd)aL`w$DQT+3<s-v{qCm^|iZP(*UrWyouIDQ-
zr_j(=*vvlS3OQ6~9EhyrXF%Z&eC85^B^Z<EyN16u;G31^Lj@SL7V{Gn<9&>!na>@$
zNYkZ+xMO8$sMT;f;mg2-jm^O$NN%|7XMd-uCb&U(_nz@|q_2S_Geb;J@_f5xUBjfX
z8Ac5^ki=U$CIY!?;c+O_f8-YFxm|W*i`CyC+639RurDKa8#GmQARCt}nYrIQ@J~Bb
zmG)mKOE_ZpwwCs{?_DeOWfPpHkLsaFnoH^7z(liIEm)MRFM~}!4Ge}*>hK0&dR+88
zaE!Q?gLDrqg6Rec?-ucy{wctL&R0@Edc^)p@GQgfJV4FMsNH7;e;Zv~$F022&;H)U
z?X-*Cw>E_`=f{>=k(aufa-pHUVdSf3EN3b<N>~hOpai==I(V>W!!S+9Gg7A^Sw8A(
zSuku?>TQ3-7sE95OjZP~xF90QK>93sK56((g=+mB_k|0E2abNvcO$^BWr5EGb_}3^
zGSIEfv|u;D^B$=<|62OzDR7QUvIlherTh_C7iHk;YXD|X!R?sQgfUx#H{?FOs`$Gq
z7fL&?=~)KJTNN*6#rGl(U9iIw)iMHun?3+7`(babdD!OG=U@|RYz=FbK*Ny@#-swX
z3o4ns75Cso+53?cc|)f``+x`gsFD0G7;nB}Rg*epSXRG|sCte?Rl=LA8{Ls15c>_y
z`*`6b%6%Ri_IETwRc5cv2JRNJ#?TW-?gORyLJs1F<9nKf<P;Tp1|tqA%Qfu7I*+g%
zF@9;NMQ&=$p5T-Q;hD4j@k0RX%|tzk96sDK>sTzdC_k0l;DM*t*h1W?W*Ht{GS3Bt
zvPri2KG@d*B;DdsoTP!MfV*G!INVxjC0rEi6#+3Uc<4$WUQ<80BT7#dg-V=e2MyUS
z!lka4;RKCUar!$FtTPILqGr4%s7^*4o_qpO^rl1UpHt9$#t;mnp8^tyhCEnbY#oD4
z+qv=kTw+KR7KF^uZAUrL8z{x!4m7OL40c|&{<FK=3?G$ryaFbL=l6aUw>$Aanhll;
zy|wEx|64+0o&7h%<P;;Ak#jXws1Y$njjB5_qjtcJj3oGz%iXxmJ%3(~Y+-N766=}S
zJQ+)PugpqN)(;*`adQeW<@)fNZ_8ZlVXAqyvqkfrnmzD6re6da3Vvd5&YdqI&(~C;
zw7oHQ|NcwNc-Wc1?&G@>vabm)Jf6qOm>PEogr&|TRNx?h*V3sp6%%b8IqeBRCZo)E
zu_nuCHd+HzIwCuDJH+YawT$l_uBr$1lDmF_Ga&#uSs);)hY=c)pTM{r*STA-1_iaJ
zFmCy7$2oLG`0nGB05A9ZJ<{f@NSu*MP62xHo>2=}>xvaHUdT#DqkCwqQ|3zsW$?b2
z+wcdQlTR%Y3^5s;5th0#BdumUaIUVTEs2BP;0<EOabJf8Aw7DU%`urn^kAntLqvFY
zdYq!TPX3$^EB(hKAploMw?j`_`@2M|2(MHr>bT+CvxSHLEcgSPoiwGl&kYu;417TX
zNJ96w3!OgbzfRib_H#+buZB9BW`591-@>MeWo28B7ExIl<~N{AOKkoWf6Aw!9l!Xy
z;l%VXWy>lKjpMEfFXha(*{gPTcD+z70@fr`Z04h`Jn|`iz~9Wl4;)hcE_bcByvOF4
zdLiqcmYPULp%2q(=$q&tu)d?>#LnmY1UR3Gx5TSdPA1Z&!Ed5SYEe`?c4itb(`*@A
zk<rJDg3_OEZkG51$;#Mt+=FB0{lvJ9QNajCf11teU<HQQz)P|iyde->##S<O#)jNV
zDtAr~E=C{YjM_VceG`3cVw-2PL`r>RR_UThR2CBbha!uxcALDePe)QWUgCegIIJ6`
z1(%&0geNfY7Rc!61SlpeAiR6VtO^OlHfRwl!QCj3m@JUeNk{sG4qq6QrjdMZHVSpg
zz0zm;ExTcOHJ~j!{^@&6boyS95~r62jf?;B-3my0_Ex27b8P8e#NyXN@4QPj^wih^
zn^lm)r~%5dsIaTElb}c*hauZM)B@KOCZ~&RZOb<M^4)(uN=-iLZ@h|A%pRo3N=vs|
zHv7o2Y*MXs>MySZFe=wCEhawkXO)ky9F+1p=JBvQJhQyt!LFVApg7H08ZCfYz7~}c
zt+k5r#$oX}2!pErBC*s{g~(t!Z==UM@RZ(MvOlIGxi^0ym88dF?t)1+Cp7fx+~Zjo
zL6@8#(n<uEKYbx?Fn?nZa_l2zwI@YSZY#;Hoy5K;pv1M@<vcjQA4l3vpog=cSH_=A
zMupi(EePb}`4+=1iZ>X(DM}*JlkYOkT5<>2scaqR-Qg*{w*z&^R3bg~7pF5i#Sk~s
zd6%L**Ix}Ja+2LaWN{;$smP_RLKxfW4Xry@`V}`u`@iL^{^p)si8*fp)hzXjS+xZF
z$mmkc;O397uGAl*xtX9E89M7@wyG?rF#IQUbER)<sx0lAE9=hKj`zDZH~?oVh`%Mb
z{useB312jiM{>8F!T8(#0ht%qbrvOTAIZ9f(rZ#z!>yjE)@KQ(KNNK7NCa8Aeqvf)
zp|UId+YEm&@~t;7_m`x7r1i(dS}z|z*C5Vc)<5$Hxmx&cv^&#!d}nj`V?<?i!lhSs
zSL|N$+ue6<s2m!v$z8wkD=@hh7imQJhWKv@rc+`5@d%_G<xO~HEYOk3i9Tb_iOvDl
z>XK3;-$xj^|FtS{LYVG6?6ry%S%sJG9oQeXGgm(&HvAp(hJ_#WgOe0jl|V|+(_bcA
zyuRLAZu8H5GJ>)WU+K(8P2r;i7GxH`x!q45*Kq90x2M%PTxKN${_l%Q_(V0S#m8Fq
z+b@)hUX;8!0$gsZZo4AEMclo@`p55PfY0ZUX$Z0#F(5j5_UvgA2vgw#Q}SbOIh|fp
z?#X$6u`nMJuPwz<E9PpT*k=)ZgxhMhDEosj{+gg>z=LY<UTy;^0V#!QJc(J@F#a>^
ziO)D8E+v!1R{2SGs(in@xT%$!B)ary3=;JRDA-j#TLiWV-JBw~>Af4V0P^pdi(R;H
z()=MrzNqeI1598|6cqueAxX)2r)9*Rb1_a(_hxQx$d8ac14OZMK>e6{LmwXVA6Lq<
zoF6>jJBx6*{m$Re$lXXw27CjfVoNIr8+Uu0aMzV<ZLELfSX<k#`qp!QV9c_cxY*A^
zKHy@OxmNQCF%VdDkW8HJ&~+I>Plu*kz|jjAa>tk5oSL#!OAKQEyXu_H126e2mkP(!
zHQkr-4wTJD)8Y((iMIl(N%?ME_ztoW)~{oU;{o7;<8n0vdCVQBp<M+LN5ZZnSHdpg
zSARUEVrZ)j&l@p@We{#5^bQwzKP>oRulw*4QD8(mdzIb&Dx!u*PfLfk_CIW16^PSh
zrDUGGI|xaCo<{AYsY%W%)^VamcIaC?G%g$Vf&XXTCSZ)^o3pW)*HkIE_>C#Ke_2&d
z(6alRD#>M>{5G)D->djtSaX*@Q9Y*Yz!f;d<`jpYeE+BP)wx2G^bk*ZlUNq%<Nn}>
znoK@2JBUgrUri~XB~~YM&1kqrr9JWrc3CNjpU;w4!42p?ZW*tbPg#?ClOUB=P2RI0
zoh?S4_i2tkZxZH%_hV3F33*_FCBvx^`G?6VdXk|l!35Q{ng0WOnkS<JPfl@KUVSO(
zSaYmILLdGQi`90esl>6hIeYB_0A?Cy4!4kP>w$IVgT!V&KEiTkuT9DNoUHkIw?D#B
zvS=xbWIKJjJ@)K(*u$4J6N@oja~O(49AO$c7WdW(sEMMjs2`fv4Evz*bKx@n;mPN<
zz6+N|b*%Y}cqvA<J|am|9At-mU+z;3X;!v&CLYya`Ylfm0cRj)Th*hma{%vaw;y(%
zh-IH_$?7$Gn>T+%Neb$tC@Qo}P=47bC<f=a1X^nBuLaY<#hs08JHeUmU`1aMZsRvw
z>DePFxz&%0sW{ZbQehY6c4DxfRPZ6lu4=5vt~!74z2FaC;B*?7uWL(u3sJHmPI%q6
zESp(9Q+yK&?L~xMm*g#qyFV@t<Be`Ib<U7(__O-^RL~onTmS+&asC-PKu$DUVXE%b
zumeI&pU=reDRa<nco!U;F87Irp{!caICCq0;^5Frbd_$+t29u4V8!E*miOU8Z=h&b
z3yq1cA0GpHs}Ue36Wbe;%@bp@z!qc6gG#JEP4DQkIkvM3(bB$@Dv}EL?$%2n;Q^<Y
zze7|L;5o_)r$?+6AAG*W>5qmq!$|5J-Shgo961usz;!F8)tKacxYaGr$ERre{*1Es
z9qm$&tpMra_m~PyXJlR6Vk{5ATLYF}+YZCqS+c+Ua=rYi^h1p*`^h(iw-4y;^~=q+
zfgD=qB2D<@2ZWj$N){Eu_A*v6A77ctxFjlN7~q%lLZnj;pC*t(*hhNmwBt9~PnNBb
zixB)Q@mow4VDGSy22tMFdC2l<#XX${746pz#OZI0Y$h9w3aBPcenJR|%Bxa7QDFJg
zK0Oe_6ZfiFNlK<4mYLn)bCFZ1V$;5=bm<jd#d;o5diu?%b0pqQJ9_k+JLj+#wIp(L
zPSlmW(q76t$IqmhYhvXi7-Jkt@2`c|M9P{n^c0~dQMN6TR9neOihNJ3VmfYb5C5ZG
z`x%%NOIi;q#nMFj=ja4{X6+&N9-I$zC~&m%-)cMViL;n9W{}7kkhGK>F?n|DA0GlP
zUE&4IMgfuxoz&VFf&+W^jL|lX#KE`tMVJO_S!XB#gJA8|ln*VGeka^Bi@}PodY-EV
zv7*2ODiPgx<HQaLrpG}g`hptNfpGDKNXCe;{ijBtvDl)a)aw-n?HTy&w`~f$bJLX{
zg!JPaPA|3IxLBO=?jrx|A1*&wx#T?dCLGBa5nkueP6t590!cTszoqmgj76AYba{6p
zJr&#20F5$7yfe#q{+$f>d{#xI*FsJ(ci<5r%sxW_5UB+UFvU3AM`#x}v4|d}2Y1Pd
z-(%?V_Gvdet`pYhn=~!RQ6g7>WY7Kq>7V$*Kc`isiKjq?%^2p>Y|(d&-+5CM=j(ax
zM$&3D&$6>0@bh+GT{pyO6@6F{KEm#4smD&9`U-NU>;LjW?@GU|&hs<>cBVrA(Fg8S
zJSF_nw8Q+P1^JiZ{(i~wGQQ32Z~14o^oN!Bcd=Z&e2oRaU00Z^`HU?72g3Yoa4I~R
zpt}82R{HOQx8XS&<$!|2Pq@Rs%OW5MFS1{Q@1NrE--r5Z#^24K<9VCcE2TpJxOvDp
zfZAl$g1-}FpIv&!nEq`ZhL-0D?>~+a`O!LFuG0J*bn5_QwYErf79#!QD}P@kL*q;4
z%<iVY{)}4w`;rV@!Ox+jPU+XD|1!i+FC_x;cgOfRg#RP{`TP8Sdh_l7j{S4l{2Vv`
zmHGdbw*L9$|6=XGSo_a$^IskN&t>%&NBOUg{a45SbKLy@t78GxF=G6T|HAgQLm!mE
zy#@ezpIld-MYvu2k!jx2xP5Z+$$Y5g@T}~i2*vZ+C1{uVS>EQK{Gs@K0L)|8K1F8)
zl44bhND2YlX}zyf`kp{MrE4`=4~~w#JeJ(;p^e6W)vE-q%z;u>|Aj~Da+X_-(4sqp
z;Qr0?Biz0}aB|B`{KH_C%RKsUHeZH_l1>p~c<IveK_8o^Zr;!GQh44yL3N%0rr%&c
z;<j=mlu;6}k^FZuuAOr}0lM9)ROGsxds>+I%N+ev1gB5=FAwtnbcyuNSKPT-cIMFp
TdHBO$@PAKbl%-1_8UFr15!>LM

diff --git a/megatron/core/extensions/transformer_engine.py b/megatron/core/extensions/transformer_engine.py
index e807ee54fbf..e95409e08e9 100644
--- a/megatron/core/extensions/transformer_engine.py
+++ b/megatron/core/extensions/transformer_engine.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
 
 import dataclasses
 import inspect
@@ -299,7 +299,6 @@ def __init__(
                 extra_kwargs["delay_wgrad_compute"] = self.config.delay_wgrad_compute
             else:
                 raise RuntimeError("Only TE with version >=2.3.0 supports delay_wgrad_compute now.")
-
         if (
             self.config.tp_comm_overlap
             and tp_comm_buffer_name
@@ -2117,12 +2116,3 @@ def set_save_original_input(module):
             "set_save_original_input is only needed on transformer-engine modules that save "
             "quantized tensors by default. It needs transformer-engine>=2.6.0dev0."
         )
-
-
-try:
-    # pylint: disable=unused-import
-    from transformer_engine.pytorch import cpu_offload
-    from transformer_engine.pytorch.float8_tensor import Float8Tensor
-except ImportError:
-    Float8Tensor = None
-    cpu_offload = None
diff --git a/megatron/core/models/common/model_chunk_schedule_plan.py b/megatron/core/models/common/model_chunk_schedule_plan.py
index 74b9a90764d..d501c11a0a9 100644
--- a/megatron/core/models/common/model_chunk_schedule_plan.py
+++ b/megatron/core/models/common/model_chunk_schedule_plan.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
 
 from contextlib import nullcontext
 from typing import Optional
@@ -8,9 +8,6 @@
 
 from megatron.core.enums import Fp8Recipe
 from megatron.core.fp8_utils import get_fp8_context
-from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
-    fine_grained_offloading_set_last_layer,
-)
 from megatron.core.pipeline_parallel.utils import (
     AbstractSchedulePlan,
     NoopScheduleNode,
@@ -453,8 +450,6 @@ def run(
             f_layer = f_schedule_plan.get_layer(i)
             b_layer = b_schedule_plan.get_layer(b_num_layers - 1 - i)
             torch.cuda.nvtx.range_push(f"layer_{i}f-layer_{b_num_layers - 1 - i}b")
-            if f_layer.layer.config.fine_grained_activation_offloading:
-                fine_grained_offloading_set_last_layer(i == f_num_layers - 1)
             f_input, b_grad = TransformerLayerSchedulePlan.run(
                 f_layer,
                 b_layer,
@@ -477,8 +472,6 @@ def run(
         for i in range(overlapped_layers, f_num_layers):
             f_layer = f_schedule_plan.get_layer(i)
             torch.cuda.nvtx.range_push(f"layer_{i}f")
-            if f_layer.layer.config.fine_grained_activation_offloading:
-                fine_grained_offloading_set_last_layer(i == f_num_layers - 1)
             f_input, _ = TransformerLayerSchedulePlan.run(f_layer, None, f_input=f_input)
             torch.cuda.nvtx.range_pop()
 
diff --git a/megatron/core/models/gpt/fine_grained_callables.py b/megatron/core/models/gpt/fine_grained_callables.py
index 786a1b850dd..fd1cc3d33c6 100644
--- a/megatron/core/models/gpt/fine_grained_callables.py
+++ b/megatron/core/models/gpt/fine_grained_callables.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
 
 import weakref
 from contextlib import nullcontext
@@ -8,11 +8,6 @@
 import torch
 
 from megatron.core import tensor_parallel
-from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
-    fine_grained_offloading_group_commit,
-    fine_grained_offloading_group_start,
-    get_fine_grained_offloading_context,
-)
 from megatron.core.pipeline_parallel.utils import ScheduleNode, make_viewless
 from megatron.core.transformer.module import float16_to_fp32
 from megatron.core.transformer.moe.moe_layer import MoELayer
@@ -355,17 +350,13 @@ def submodule_post_attn_forward(node: ScheduleNode, hidden_states: torch.Tensor)
         Run forward pass for computations between attention and dispatch:
             pre mlp layernorm->router->dispatch preprocess
         """
-        if layer.offload_mlp_norm:
-            hidden_states = fine_grained_offloading_group_start(hidden_states, name="mlp_norm")
         if layer.recompute_pre_mlp_layernorm:
             layer.pre_mlp_norm_checkpoint = tensor_parallel.CheckpointWithoutOutput()
-            with get_fine_grained_offloading_context(layer.offload_mlp_norm):
-                pre_mlp_layernorm_output = layer.pre_mlp_norm_checkpoint.checkpoint(
-                    layer.pre_mlp_layernorm, hidden_states
-                )
+            pre_mlp_layernorm_output = layer.pre_mlp_norm_checkpoint.checkpoint(
+                layer.pre_mlp_layernorm, hidden_states
+            )
         else:
-            with get_fine_grained_offloading_context(layer.offload_mlp_norm):
-                pre_mlp_layernorm_output = layer.pre_mlp_layernorm(hidden_states)
+            pre_mlp_layernorm_output = layer.pre_mlp_layernorm(hidden_states)
 
         local_tokens, probs, _ = layer.mlp.router_and_preprocess(pre_mlp_layernorm_output)
 
@@ -446,10 +437,6 @@ def submodule_combine_forward(
             hidden_states = layer.mlp_bda(layer.training, layer.config.bias_dropout_fusion)(
                 mlp_output_with_bias, residual, layer.hidden_dropout
             )
-        if layer.offload_mlp_norm:
-            (hidden_states,) = fine_grained_offloading_group_commit(
-                hidden_states, name="mlp_norm", forced_released_tensors=[residual]
-            )
         output = make_viewless_tensor(
             inp=hidden_states, requires_grad=hidden_states.requires_grad, keep_graph=True
         )
diff --git a/megatron/core/models/gpt/gpt_model.py b/megatron/core/models/gpt/gpt_model.py
index ae292649561..654827dc6fb 100644
--- a/megatron/core/models/gpt/gpt_model.py
+++ b/megatron/core/models/gpt/gpt_model.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
 
 from collections import OrderedDict
 from typing import Dict, Literal, Optional
@@ -18,9 +18,6 @@
 )
 from megatron.core.models.common.language_module.language_module import LanguageModule
 from megatron.core.packed_seq_params import PackedSeqParams
-from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
-    fine_grained_offloading_init_chunk_handler,
-)
 from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.quantization.utils import get_quant_config_or_none
 from megatron.core.tensor_parallel import gather_from_sequence_parallel_region
@@ -120,7 +117,6 @@ def __init__(
         self.parallel_output = parallel_output
         self.share_embeddings_and_output_weights = share_embeddings_and_output_weights
         self.vp_stage = vp_stage
-        self.disable_param_offloading = True
 
         if hasattr(self.config, 'position_embedding_type'):
             self.position_embedding_type = self.config.position_embedding_type
@@ -414,22 +410,6 @@ def _preprocess(
 
         return preproc_output
 
-    def preprocess_for_fine_grained_offloading(self):
-        """Preprocess for fine-grained activation offloading."""
-        fine_grained_offloading_init_chunk_handler(
-            self.vp_stage, self.config.min_offloaded_tensor_size
-        )
-        if self.disable_param_offloading:
-            for param in self.decoder.parameters():
-                param.offloading_activation = False
-            if self.mtp_process:
-                for param in self.mtp.parameters():
-                    param.offloading_activation = False
-            if self.post_process:
-                for param in self.output_layer.parameters():
-                    param.offloading_activation = False
-            self.disable_param_offloading = False
-
     def forward(
         self,
         input_ids: Tensor,
@@ -455,8 +435,6 @@ def forward(
             runtime_gather_output (bool): Gather output at runtime. Default None means
                 `parallel_output` arg in the constructor will be used.
         """
-        if self.config.fine_grained_activation_offloading:
-            self.preprocess_for_fine_grained_offloading()
 
         inference_context = deprecate_inference_params(inference_context, inference_params)
 
@@ -723,9 +701,6 @@ def build_schedule_plan(
             TransformerModelChunkSchedulePlan: The model chunk schedule plan.
         """
 
-        if self.config.fine_grained_activation_offloading:
-            self.preprocess_for_fine_grained_offloading()
-
         from ..common.model_chunk_schedule_plan import TransformerModelChunkSchedulePlan
 
         return TransformerModelChunkSchedulePlan(
diff --git a/megatron/core/pipeline_parallel/fine_grained_activation_offload.py b/megatron/core/pipeline_parallel/fine_grained_activation_offload.py
deleted file mode 100644
index b28bbcbeddc..00000000000
--- a/megatron/core/pipeline_parallel/fine_grained_activation_offload.py
+++ /dev/null
@@ -1,603 +0,0 @@
-# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-
-import warnings
-from collections import deque
-from contextlib import nullcontext
-from typing import Any
-
-import torch
-
-# CPU offload implementation for pipeline parallelism
-DEBUG = False
-DEBUG_RANK = 0
-
-
-def debug_rank(message):
-    """Print debug message for a specific rank when DEBUG is enabled."""
-    # pylint: disable=bad-builtin
-    if not DEBUG:
-        return
-    assert torch.distributed.is_initialized()
-    if torch.distributed.get_rank() == DEBUG_RANK:
-        print(message)
-
-
-def set_ideal_affinity_for_current_gpu():
-    """Set CPU affinity for the current GPU to optimize host-device transfers."""
-    import uuid
-
-    try:
-        import cuda.bindings.driver as cuda_driver
-        import cuda.bindings.runtime as cuda_runtime
-    except ImportError:
-        import cuda.cuda as cuda_driver
-        import cuda.cudart as cuda_runtime
-    try:
-        import pynvml
-    except ImportError:
-        warnings.warn("pynvml is not installed, skipping GPU affinity setting")
-        return
-
-    # Get current CUDA device ID
-    err, device_id = cuda_runtime.cudaGetDevice()
-    assert err == cuda_runtime.cudaError_t.cudaSuccess
-    # Get device UUID
-    err, device_uuid = cuda_driver.cuDeviceGetUuid(device_id)
-    assert err == cuda_driver.CUresult.CUDA_SUCCESS
-    # Set CPU affinity based on GPU's NUMA node
-    pynvml.nvmlInit()
-    handle = pynvml.nvmlDeviceGetHandleByUUID("GPU-" + str(uuid.UUID(bytes=device_uuid.bytes)))
-    pynvml.nvmlDeviceSetCpuAffinity(handle)
-
-
-class PipelineOffloadManager:
-    """
-    Singleton manager for coordinating activation offloading across pipeline stages.
-    Manages chunk handlers, synchronizes GPU-CPU transfers,
-    and handles virtual pipeline parallelism.
-    """
-
-    OFFLOAD_MGR = None
-
-    @classmethod
-    def get_instance(cls):
-        """Get the singleton instance of PipelineOffloadManager."""
-        if cls.OFFLOAD_MGR is None:
-            cls.OFFLOAD_MGR = PipelineOffloadManager()
-        return cls.OFFLOAD_MGR
-
-    def __init__(self):
-        """Initialize the manager with queues and dedicated CUDA streams."""
-        from megatron.core import parallel_state
-
-        # Queue to store chunk handlers for backward pass
-        self._queue = deque()
-        if parallel_state.get_virtual_pipeline_model_parallel_world_size() is None:
-            self._vpp = 1
-        else:
-            self._vpp = parallel_state.get_virtual_pipeline_model_parallel_world_size()
-
-        # Cache chunk handlers for each virtual pipeline stage
-        self._stages = [[] for _ in range(self._vpp)]
-        # allocate streams and events for synchronization
-        self._d2h_stream = torch.cuda.Stream()
-        self._h2d_stream = torch.cuda.Stream()
-        self.reset()
-
-    @property
-    def d2h_stream(self):
-        """Get the device-to-host (GPU to CPU) transfer stream."""
-        return self._d2h_stream
-
-    @property
-    def h2d_stream(self):
-        """Get the host-to-device (CPU to GPU) transfer stream."""
-        return self._h2d_stream
-
-    def reset(self):
-        """Reset manager state for a new training iteration."""
-        set_ideal_affinity_for_current_gpu()
-        self._inside_context = False
-        self._cur_forward_chunk = None
-        self._cur_backward_chunk = None
-        # Track the first microbatch of the last virtual pipeline stage
-        self._is_first_last_vpp_chunk = True
-
-    def flush(self):
-        """Flush all staged chunks to the backward queue in reverse order."""
-        # Ensure all virtual pipeline stages have the same number of chunks
-        if len(self._stages[0]) == len(self._stages[-1]):
-            lens = [len(e) for e in self._stages]
-            assert min(lens) == max(lens), "All stages must have same chunk count"
-            # Clear the last stage and push all chunks in reverse order for backward
-            self._stages[-1] = []
-            for chunks in reversed(self._stages):
-                for chunk in chunks:
-                    self.push(chunk)
-            # Clear all stages after flushing
-            for i in range(self._vpp):
-                self._stages[i] = []
-
-    def push(self, handler):
-        """Add a chunk handler to the backward queue."""
-        debug_rank(f"pushing handler {handler}")
-        self._queue.append(handler)
-
-    def pop(self):
-        """Remove and set the next non-empty chunk as the current backward chunk."""
-        assert self.size(), "Cannot pop from empty queue"
-        while self._queue:
-            self._cur_backward_chunk = self._queue.popleft()
-            if not self._cur_backward_chunk.is_empty_chunk():
-                break
-        debug_rank(f"popping handler {self._cur_backward_chunk}")
-
-    def front(self):
-        """Get the first non-empty chunk handler without removing it from the queue."""
-        if not self.size():
-            return None
-        for chunk_handler in self._queue:
-            if not chunk_handler.is_empty_chunk():
-                return chunk_handler
-        return None
-
-    def size(self):
-        """Return the number of chunk handlers in the queue."""
-        return len(self._queue)
-
-    def init_model_chunk_offload_handler(self, vp_stage, min_offloaded_tensor_size=1024 * 1024):
-        """
-        Initialize a chunk offload handler for a model chunk (microbatch).
-
-        Args:
-            vp_stage: Virtual pipeline stage index (None means stage 0)
-            min_offloaded_tensor_size: Minimum tensor size (in elements) to offload
-        """
-        if vp_stage is None:
-            cur_vpp_rank = 0
-        else:
-            cur_vpp_rank = vp_stage
-
-        is_first_last_vpp_chunk = self._is_first_last_vpp_chunk
-        # Flush staged chunks when reaching the last virtual pipeline stage
-        if cur_vpp_rank == self._vpp - 1:
-            self.flush()
-        # Determine if this is the first microbatch of the last virtual pipeline stage
-        is_first_last_vpp_chunk = is_first_last_vpp_chunk and (cur_vpp_rank == self._vpp - 1)
-
-        cur_chunk = ChunkOffloadHandler(is_first_last_vpp_chunk, min_offloaded_tensor_size)
-        self._stages[cur_vpp_rank].append(cur_chunk)
-        # For the last stage, push immediately and flush
-        if cur_vpp_rank == self._vpp - 1:
-            self._is_first_last_vpp_chunk = False
-            self.push(cur_chunk)
-            self.flush()
-        self._cur_forward_chunk = cur_chunk
-        cur_chunk.vpp_rank = cur_vpp_rank
-
-    def set_last_layer(self, is_last_layer):
-        """Mark whether the current forward chunk is processing the last layer."""
-        self._cur_forward_chunk.is_last_layer = is_last_layer
-
-    def cur_forward_chunk(self):
-        """Get the current forward pass chunk handler."""
-        return self._cur_forward_chunk
-
-    def cur_backward_chunk(self):
-        """Get the current backward pass chunk handler."""
-        return self._cur_backward_chunk
-
-    def __enter__(self):
-        """Enter context manager to enable activation offloading hooks."""
-        debug_rank("----__enter__")
-        from megatron.core.extensions.transformer_engine import cpu_offload
-
-        if cpu_offload is not None:
-            cpu_offload.CPUOffloadEnabled = True
-        self.inside_context = True
-
-        torch._C._autograd._push_saved_tensors_default_hooks(
-            self.on_save_for_backward, self.on_get_saved_tensor
-        )
-
-    def __exit__(self, *args: Any):
-        """Exit context manager and restore original tensor saving behavior."""
-        debug_rank("----__exit__")
-        from megatron.core.extensions.transformer_engine import cpu_offload
-
-        if cpu_offload is not None:
-            cpu_offload.CPUOffloadEnabled = False
-        self.inside_context = False
-        torch._C._autograd._pop_saved_tensors_default_hooks()
-
-    def on_save_for_backward(self, tensor: torch.Tensor) -> Any:
-        """
-        Hook called when autograd saves a tensor for backward pass.
-        Returns a tag to identify the tensor later.
-        """
-        debug_rank(f"------on_save_for_backward {tensor.shape}")
-        assert self.inside_context, "Must be inside offload context"
-        return self.cur_forward_chunk().tensor_push(tensor)
-
-    def on_get_saved_tensor(self, saved_state: Any) -> torch.Tensor:
-        """
-        Hook called when autograd retrieves a saved tensor during backward pass.
-        Returns the actual tensor (potentially reloading from CPU).
-        """
-        debug_rank(f"----on_get_saved_tensor {saved_state}")
-        return self.cur_backward_chunk().tensor_pop(saved_state)
-
-
-class ChunkOffloadHandler:
-    """
-    Handles activation offloading and reloading for a single pipeline chunk (microbatch).
-    Manages tensor groups, coordinates asynchronous GPU-CPU transfers, and handles synchronization.
-    """
-
-    @staticmethod
-    def offload(src_tensor, pin_memory=True):
-        """Offload."""
-        debug_rank("--------offload")
-        from megatron.core.extensions.transformer_engine import Float8Tensor
-
-        fp8_offload = isinstance(src_tensor, Float8Tensor) if Float8Tensor is not None else False
-
-        if not src_tensor.is_contiguous():
-            src_tensor = src_tensor.contiguous()
-
-        cpu_backup = torch.empty(
-            src_tensor.size(),
-            dtype=torch.uint8 if fp8_offload else src_tensor.dtype,
-            layout=src_tensor.layout,
-            device="cpu",
-            pin_memory=pin_memory,
-        )
-
-        if fp8_offload:
-            cpu_backup = Float8Tensor.make_like(src_tensor, data=cpu_backup)
-
-        cpu_backup.copy_(src_tensor, non_blocking=pin_memory)
-        state = (src_tensor.device, cpu_backup)
-        return state
-
-    @staticmethod
-    def reload(state, non_blocking=None):
-        """Reload."""
-        debug_rank("------reload")
-        dev, cpu_backup = state
-        if non_blocking is None:
-            non_blocking = cpu_backup.is_pinned()
-        return cpu_backup.to(dev, non_blocking=non_blocking)
-
-    def __init__(self, is_first_last_vpp_chunk, min_offloaded_tensor_size):
-        # Data Structure to maintain reference to activation tensors
-        self._tensor_tag_to_state = {}
-        # Mark the first microbatch of the last virtual pipeline stage
-        self._is_first_last_vpp_chunk = is_first_last_vpp_chunk
-
-        # Group management for batching offload/reload operations
-        self._offloaded_group_index = 0
-        self._groups_to_offload = []
-        self._groups_to_reload = []
-        self._tensor_count_current_group = 0
-
-        # Counter for special torch tensor types (FakeTensor, FunctionalTensor)
-        self.torch_tensor_count = 0
-        self.d2h_stream = PipelineOffloadManager.get_instance().d2h_stream
-        self.h2d_stream = PipelineOffloadManager.get_instance().h2d_stream
-        self._offload_events = {}
-        self._reload_events = {}
-        self.min_offloaded_tensor_size = min_offloaded_tensor_size
-        self.is_last_layer = False
-
-    def is_empty_chunk(self):
-        """Check if this chunk has no tensors to manage."""
-        return len(self._tensor_tag_to_state) == 0
-
-    def is_first_last_layer(self):
-        """
-        Check if this is the last layer of the first microbatch of the last vp stage.
-        These tensors should not be offloaded to avoid unnecessary overhead.
-        """
-        debug_rank(
-            f"------is_first_last_layer {self._is_first_last_vpp_chunk} {self.is_last_layer}"
-        )
-        return self._is_first_last_vpp_chunk and self.is_last_layer
-
-    def tensor_push(self, tensor):
-        """Push tensor to the offload handler."""
-        torch_stray_tensor = isinstance(
-            tensor,
-            (
-                torch._subclasses.fake_tensor.FakeTensor,
-                torch._subclasses.functional_tensor.FunctionalTensor,
-            ),
-        )
-
-        if not torch_stray_tensor:
-            # Assign unique tag based on group index and position within group
-            tensor_tag = (self._offloaded_group_index, self._tensor_count_current_group)
-            self._tensor_count_current_group += 1
-            assert tensor_tag not in self._tensor_tag_to_state, "Duplicate tensor tag"
-            self._tensor_tag_to_state[tensor_tag] = tensor
-        else:
-            # Use negative group ID for special tensor types
-            tensor_tag = (-1, self.torch_tensor_count)
-            self.torch_tensor_count += 1
-            self._tensor_tag_to_state[tensor_tag] = tensor
-        debug_rank(f"--------tensor_push {tensor_tag}")
-        return tensor_tag
-
-    def tensor_pop(self, tensor_tag):
-        """Pop tensor from the offload handler."""
-        debug_rank(f"--------tensor_pop {tensor_tag}")
-        assert tensor_tag in self._tensor_tag_to_state, f"Tag {tensor_tag} not found"
-        tensor = self._tensor_tag_to_state.pop(tensor_tag)
-        # If tensor is offloaded (stored as tuple), reload it
-        if isinstance(tensor, tuple):
-            tensor = self.reload(tensor)
-        debug_rank(f"--------tensor_pop {tensor.shape}")
-        return tensor
-
-    def tensor_need_offloading_checker(self, tensor):
-        """Check if the tensor needs to be offloaded."""
-        if tensor.numel() < self.min_offloaded_tensor_size:
-            return False
-        # Respect tensor's offload preference if specified
-        if hasattr(tensor, "offloading_activation") and not tensor.offloading_activation:
-            return False
-        return True
-
-    def bulk_offload_group(self, group_to_offload):
-        """offload a group of tensors recorded in tensor_push()."""
-        debug_rank("------bulk_offload_group")
-        assert not self.is_first_last_layer(), "Should not offload first-last layer"
-        group_id_to_offload, name = group_to_offload
-        torch.cuda.nvtx.range_push("activation offloading " + name)
-        with torch.cuda.stream(self.d2h_stream):
-            for tensor_tag, state in self._tensor_tag_to_state.items():
-                group_id, _ = tensor_tag
-                if group_id == group_id_to_offload:
-                    debug_rank(f"------tensor_tag {tensor_tag}")
-                    debug_rank(f"------group_to_offload {group_to_offload}")
-                    assert not isinstance(state, tuple), "Tensor already offloaded"
-                    tensor_on_device = state
-                    if self.tensor_need_offloading_checker(tensor_on_device):
-                        state = self.offload(tensor_on_device)
-                        event = torch.cuda.Event()
-                        event.record(self.d2h_stream)
-                        self._offload_events[name] = event
-                        tensor_on_device.record_stream(self.d2h_stream)
-                        self._tensor_tag_to_state[tensor_tag] = state
-        torch.cuda.nvtx.range_pop()
-
-    def get_offload_event(self, name):
-        """Get the CUDA event for a named offload operation."""
-        return self._offload_events.get(name, None)
-
-    def get_reload_event(self, name):
-        """Get the CUDA event for a named reload operation."""
-        return self._reload_events.get(name, None)
-
-    def bulk_reload_group(self, group_to_reload):
-        """Bulk reload group."""
-        debug_rank("----bulk_reload_group")
-        found_reload_group = False
-        group_id_to_reload, name = group_to_reload
-        torch.cuda.nvtx.range_push("activation reloading " + name)
-        with torch.cuda.stream(self.h2d_stream):
-            for tensor_label, state in self._tensor_tag_to_state.items():
-                group_id, _ = tensor_label
-                if group_id == group_id_to_reload:
-                    debug_rank(f"----tensor_label {tensor_label}")
-                    found_reload_group = True
-                    event = self.get_offload_event(name)
-                    # Only reload if tensor was offloaded (stored as tuple)
-                    if isinstance(state, tuple):
-                        # Wait for offload to complete before reloading
-                        torch.cuda.current_stream().wait_event(event)
-                        recovered_tensor = self.reload(state)
-                        event.record(self.h2d_stream)
-                        self._reload_events[name] = event
-                        debug_rank(f"----recovered_tensor {recovered_tensor.shape}")
-                        self._tensor_tag_to_state[tensor_label] = recovered_tensor
-        torch.cuda.nvtx.range_pop()
-        return found_reload_group
-
-    def pre_reload_last_layer(self):
-        """Pre-reload the last layer of this chunk to hide reload latency."""
-        debug_rank("pre_reload_last_layer")
-        assert not self._is_first_last_vpp_chunk, "Should not pre-reload first chunk"
-        debug_rank(f"len(self._groups_to_reload) {len(self._groups_to_reload)}")
-        if len(self._groups_to_reload) > 0:
-            # Reload the last group (last layer) early
-            if self.bulk_reload_group(self._groups_to_reload[-1]):
-                self._groups_to_reload.pop()
-
-    def should_bulk_offload(self):
-        """Determine if the current group should be offloaded."""
-        # Don't offload the first backward chunk's last layer
-        if self.is_first_last_layer():
-            return False
-
-        # Check if next backward chunk is this chunk (for last pipeline stage)
-        next_backward_chunk = PipelineOffloadManager.get_instance().front()
-        if next_backward_chunk is not None and next_backward_chunk is self:
-            # Don't offload last layer if it's about to be used immediately
-            if self.is_last_layer:
-                return False
-
-        return True
-
-    def bulk_offload(self, forced_released_tensors):
-        """Offload a group of tensors and optionally release their GPU memory."""
-        debug_rank("----bulk_offload")
-        if self.should_bulk_offload():
-            group_to_offload = self._groups_to_offload.pop()
-            self._groups_to_reload.append(group_to_offload)
-            self.bulk_offload_group(group_to_offload)
-            # Manually release tensors not auto-freed by torch GC
-            if len(forced_released_tensors) > 0:
-                cur_stream = torch.cuda.current_stream()
-                for release_tensor in forced_released_tensors:
-                    if self.tensor_need_offloading_checker(release_tensor):
-                        # Ensure tensor is not in use before freeing
-                        release_tensor.record_stream(cur_stream)
-                        release_tensor.untyped_storage().resize_(0)
-
-    def on_group_commit_forward(self, forced_released_tensors):
-        """Called at the end of a layer group's forward pass to trigger offloading."""
-        debug_rank("--on_group_commit_forward")
-        # Wait for compute to finish before starting offload
-        self.d2h_stream.wait_stream(torch.cuda.current_stream())
-        self.bulk_offload(forced_released_tensors)
-
-    def bulk_reload(self):
-        """Reload the next group of tensors from CPU to GPU."""
-        debug_rank("--bulk_reload")
-        if len(self._groups_to_reload) > 0:
-            # Reload the next layer group
-            if self.bulk_reload_group(self._groups_to_reload[-1]):
-                debug_rank(f"--bulk_reload_group {self._groups_to_reload}")
-                self._groups_to_reload.pop()
-        else:
-            # Pre-load the last layer of the next backward chunk to hide latency
-            next_backward_chunk = PipelineOffloadManager.get_instance().front()
-            if next_backward_chunk is not None:
-                next_backward_chunk.pre_reload_last_layer()
-
-    def on_group_commit_backward(self, name):
-        """
-        Called at the end of a layer group's backward pass.
-        Ensures correct chunk is active and synchronizes reloads.
-        """
-        debug_rank("--on_group_commit_backward")
-        cur_backward_chunk = PipelineOffloadManager.get_instance().cur_backward_chunk()
-        # Switch to this chunk if it's not already current
-        if cur_backward_chunk is not self:
-            PipelineOffloadManager.get_instance().pop()
-        cur_backward_chunk = PipelineOffloadManager.get_instance().cur_backward_chunk()
-        assert cur_backward_chunk is self, "Chunk mismatch"
-        # Wait for reload to complete before using tensors
-        event = self.get_reload_event(name)
-        if event is not None:
-            torch.cuda.current_stream().wait_event(event)
-        self._offloaded_group_index = self._offloaded_group_index - 1
-
-    def on_group_start_forward(self, name):
-        """
-        Called at the start of a layer group's forward pass.
-        Increments group index and prepares for offloading.
-        """
-        debug_rank(f"--on_group_start_forward")
-        self._offloaded_group_index = self._offloaded_group_index + 1
-        self._tensor_count_current_group = 0
-        self._groups_to_offload.append((self._offloaded_group_index, name))
-
-    def on_group_start_backward(self):
-        """
-        Called at the start of a layer group's backward pass.
-        Triggers reloading of tensors from CPU.
-        """
-        debug_rank("--on_group_start_backward")
-        # Wait for compute to finish before starting reload
-        self.h2d_stream.wait_stream(torch.cuda.current_stream())
-        self.bulk_reload()
-
-
-class FineGrainedOffloadingGroupCommitFunction(torch.autograd.Function):
-    """
-    Identity operation that marks the end of a layer group for offload synchronization.
-    Triggers offload during forward and synchronizes reload during backward.
-    """
-
-    @staticmethod
-    def forward(ctx, *args):
-        # pylint: disable=missing-function-docstring
-        debug_rank("FineGrainedOffloadingGroupCommitFunction forward")
-
-        forced_released_tensors = args[-1]
-        name = args[-2]
-        cpu_offload_handler = args[-3]
-        tensor = args[:-3]
-        cpu_offload_handler.on_group_commit_forward(forced_released_tensors)
-        ctx.cpu_offload_handler = cpu_offload_handler
-        ctx.name = name
-
-        # return the identical tensor
-        return tensor
-
-    @staticmethod
-    def backward(ctx, *grad_output):
-        # pylint: disable=missing-function-docstring
-        debug_rank("FineGrainedOffloadingGroupCommitFunction backward")
-
-        cpu_offload_handler = ctx.cpu_offload_handler
-        cpu_offload_handler.on_group_commit_backward(ctx.name)
-        return grad_output + (None, None, None)
-
-
-def fine_grained_offloading_group_commit(*tensor, name, forced_released_tensors=[]):
-    """
-    Specify the tensors to be released after offloading.
-    forced_released_tensors is a list of tensors to be released after offloading.
-    The tensors will be untyped_storage().resize_(0) after offloading.
-    Note: specify the tensors only when they are not automatically released by torch gc.
-    """
-    cur_forward_chunk = PipelineOffloadManager.get_instance().cur_forward_chunk()
-    return FineGrainedOffloadingGroupCommitFunction.apply(
-        *tensor, cur_forward_chunk, name, forced_released_tensors
-    )
-
-
-class FineGrainedOffloadingGroupStartFunction(torch.autograd.Function):
-    """
-    Identity operation that marks the start of a layer group for offload/reload.
-    Prepares for offload during forward and triggers reload during backward.
-    """
-
-    @staticmethod
-    def forward(ctx, tensor, cpu_offload_handler, name):
-        # pylint: disable=missing-function-docstring
-        ctx.cpu_offload_handler = cpu_offload_handler
-        debug_rank("FineGrainedOffloadingGroupStartFunction forward")
-
-        cpu_offload_handler.on_group_start_forward(name)
-        # return the identical tensor
-        return tensor
-
-    @staticmethod
-    def backward(ctx, grad_output):
-        # pylint: disable=missing-function-docstring
-        debug_rank("FineGrainedOffloadingGroupStartFunction backward")
-        cpu_offload_handler = ctx.cpu_offload_handler
-        cpu_offload_handler.on_group_start_backward()
-        return grad_output, None, None
-
-
-def fine_grained_offloading_group_start(tensor, name=None):
-    """Mark the start of a layer group and prepare for offload/reload."""
-    cur_forward_chunk = PipelineOffloadManager.get_instance().cur_forward_chunk()
-    return FineGrainedOffloadingGroupStartFunction.apply(tensor, cur_forward_chunk, name)
-
-
-def get_fine_grained_offloading_context(flag):
-    """Get the fine-grained offload context"""
-    return PipelineOffloadManager.get_instance() if flag else nullcontext()
-
-
-def fine_grained_offloading_set_last_layer(is_last_layer):
-    """Set the last layer flag."""
-    PipelineOffloadManager.get_instance().set_last_layer(is_last_layer)
-
-
-def fine_grained_offloading_init_chunk_handler(vp_stage, min_offloaded_tensor_size):
-    """Initialize the chunk handler, called at the start of a microbatch forward pass."""
-    PipelineOffloadManager.get_instance().init_model_chunk_offload_handler(
-        vp_stage, min_offloaded_tensor_size
-    )
-
-
-def fine_grained_offloading_reset():
-    """Reset the chunk handler, called at the start of a training iteration."""
-    PipelineOffloadManager.get_instance().reset()
diff --git a/megatron/core/pipeline_parallel/schedules.py b/megatron/core/pipeline_parallel/schedules.py
index 09f95ac25d2..e83f8d90635 100644
--- a/megatron/core/pipeline_parallel/schedules.py
+++ b/megatron/core/pipeline_parallel/schedules.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
 
 import contextlib
 from functools import partial
@@ -9,9 +9,6 @@
 
 from megatron.core import parallel_state
 from megatron.core.enums import ModelType
-from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
-    fine_grained_offloading_reset,
-)
 from megatron.core.pipeline_parallel.p2p_communication import P2PCommunicator
 from megatron.core.pipeline_parallel.utils import (
     is_pp_first_stage,
@@ -565,9 +562,6 @@ def forward_backward_no_pipelining(
     if config.timers is not None:
         config.timers('forward-backward', log_level=1).start(barrier=config.barrier_with_L1_time)
 
-    if not forward_only and config.fine_grained_activation_offloading:
-        fine_grained_offloading_reset()
-
     no_sync_func = config.no_sync_func
     if no_sync_func is None:
         no_sync_func = contextlib.nullcontext
@@ -904,9 +898,6 @@ def forward_backward_pipelining_with_interleaving(
         adjust_tensor_shapes_fn is None
     ), "adjust_tensor_shapes_fn is not supported for interleaved pipeline parallelism"
 
-    if not forward_only and config.fine_grained_activation_offloading:
-        fine_grained_offloading_reset()
-
     if config.overlap_p2p_comm and config.batch_p2p_comm:
         raise ValueError("Can not use both overlap_p2p_comm and batch_p2p_comm")
 
@@ -2052,9 +2043,6 @@ def forward_backward_pipelining_without_interleaving(
     if config.timers is not None:
         config.timers('forward-backward', log_level=1).start(barrier=config.barrier_with_L1_time)
 
-    if not forward_only and config.fine_grained_activation_offloading:
-        fine_grained_offloading_reset()
-
     # Disable async grad reductions
     no_sync_func = config.no_sync_func
     if no_sync_func is None:
diff --git a/megatron/core/tensor_parallel/random.py b/megatron/core/tensor_parallel/random.py
index 2ae15bef0d9..54cac0e41e3 100644
--- a/megatron/core/tensor_parallel/random.py
+++ b/megatron/core/tensor_parallel/random.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
 
 # Parts of the code here are adapted from PyTorch
 # repo: https://github.com/pytorch/pytorch
@@ -510,11 +510,10 @@ def forward(ctx, run_function, checkpoint_without_output_obj, *args):
     @staticmethod
     def backward(ctx, *args):
         """Backward pass."""
-        inputs = ctx.inputs
+        inputs = ctx.saved_tensors
         outputs = ctx.outputs
         torch.autograd.backward(outputs, args)
         ctx.outputs = None
-        ctx.inputs = None
         grads = tuple(inp.grad if isinstance(inp, torch.Tensor) else inp for inp in inputs)
         return (None, None) + grads
 
@@ -574,9 +573,8 @@ def _recompute(self, _):
                 recompute_ctx = contextlib.nullcontext()
                 fp8_ctx = contextlib.nullcontext()
 
-            inputs = self.ctx.saved_tensors
             with torch.enable_grad(), fp8_ctx, recompute_ctx:
-                outputs = self.run_function(*inputs)
+                outputs = self.run_function(*self.ctx.saved_tensors)
 
         self.run_function = None
         self.rng_states = None
@@ -592,7 +590,6 @@ def _recompute(self, _):
                 output.untyped_storage().copy_(recomputation_output.untyped_storage())
 
         self.ctx.outputs = outputs
-        self.ctx.inputs = inputs
         self.outputs = None
         self.ctx = None
 
diff --git a/megatron/core/transformer/attention.py b/megatron/core/transformer/attention.py
index 3427b5ee3ab..d4e990041ca 100644
--- a/megatron/core/transformer/attention.py
+++ b/megatron/core/transformer/attention.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
 
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
@@ -22,11 +22,6 @@
     get_tensor_model_parallel_rank,
     get_tensor_model_parallel_world_size,
 )
-from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
-    fine_grained_offloading_group_commit,
-    fine_grained_offloading_group_start,
-    get_fine_grained_offloading_context,
-)
 from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.transformer.identity_op import IdentityOp
 from megatron.core.transformer.module import MegatronModule
@@ -193,21 +188,6 @@ def __init__(
             and "core_attn" in self.config.recompute_modules
         )
 
-        self.offload_qkv_linear = (
-            self.config.fine_grained_activation_offloading
-            and "qkv_linear" in self.config.offload_modules
-        )
-
-        self.offload_core_attention = (
-            self.config.fine_grained_activation_offloading
-            and "core_attn" in self.config.offload_modules
-        )
-
-        self.offload_attn_proj = (
-            self.config.fine_grained_activation_offloading
-            and "attn_proj" in self.config.offload_modules
-        )
-
         # Output.
         self.linear_proj = build_module(
             submodules.linear_proj,
@@ -750,17 +730,9 @@ def forward(
         if output_gate:
             assert split_qkv, "output_gate is not supported for unsplit mixed_qkv tensor."
 
-        if self.offload_qkv_linear:
-            hidden_states = fine_grained_offloading_group_start(hidden_states, name="qkv_linear")
-        with get_fine_grained_offloading_context(self.offload_qkv_linear):
-            qkv_output = self.get_query_key_value_tensors(
-                hidden_states, key_value_states, output_gate=output_gate, split_qkv=split_qkv
-            )
-        if self.offload_qkv_linear:
-            qkv_output, _ = fine_grained_offloading_group_commit(
-                qkv_output, name="qkv_linear", forced_released_tensors=[hidden_states]
-            )
-
+        qkv_output = self.get_query_key_value_tensors(
+            hidden_states, key_value_states, output_gate=output_gate, split_qkv=split_qkv
+        )
         attn_mask_type = self.attn_mask_type
         block_table = None
         gate = None
@@ -909,20 +881,17 @@ def forward(
                 packed_seq_params=packed_seq_params,
             )
         else:
-            if self.offload_core_attention and self.training:
-                query = fine_grained_offloading_group_start(query, name="core_attn")
             if inference_context is None or inference_context.is_static_batching():
                 # Static batching attention kernel.
-                with get_fine_grained_offloading_context(self.offload_core_attention):
-                    core_attn_out = self.core_attention(
-                        query,
-                        key,
-                        value,
-                        attention_mask,
-                        attn_mask_type=attn_mask_type,
-                        attention_bias=attention_bias,
-                        packed_seq_params=packed_seq_params,
-                    )
+                core_attn_out = self.core_attention(
+                    query,
+                    key,
+                    value,
+                    attention_mask,
+                    attn_mask_type=attn_mask_type,
+                    attention_bias=attention_bias,
+                    packed_seq_params=packed_seq_params,
+                )
 
             else:
                 # Dynamic batching attention kernel.
@@ -942,10 +911,6 @@ def forward(
                     block_table,
                 )
                 core_attn_out = rearrange(core_attn_out, 's b h d -> s b (h d)')
-            if self.offload_core_attention and self.training:
-                (core_attn_out,) = fine_grained_offloading_group_commit(
-                    core_attn_out, name="core_attn", forced_released_tensors=[query, key, value]
-                )
 
         if packed_seq_params is not None and packed_seq_params.qkv_format == 'thd':
             # reshape to same output shape as unpacked case
@@ -966,14 +931,7 @@ def forward(
         # =================
 
         nvtx_range_push(suffix="linear_proj")
-        if self.offload_attn_proj:
-            core_attn_out = fine_grained_offloading_group_start(core_attn_out, name="attn_proj")
-        with get_fine_grained_offloading_context(self.offload_attn_proj):
-            output, bias = self.linear_proj(core_attn_out)
-        if self.offload_attn_proj:
-            output, bias = fine_grained_offloading_group_commit(
-                output, bias, name="attn_proj", forced_released_tensors=[core_attn_out]
-            )
+        output, bias = self.linear_proj(core_attn_out)
         nvtx_range_pop(suffix="linear_proj")
 
         return output, bias
diff --git a/megatron/core/transformer/moe/README.md b/megatron/core/transformer/moe/README.md
index a44daea38e2..0a933aed0df 100644
--- a/megatron/core/transformer/moe/README.md
+++ b/megatron/core/transformer/moe/README.md
@@ -210,20 +210,6 @@ Enable A2A overlap across different batches inspired by the DSv3 DualPipe implme
 --delay-wgrad-compute
 ```
 
-### Fine-grained Activation Offloading (collaborated with rednote)
-Offload the input activation at the granularity of modules
-
-**Usage**
-```bash
-# Enable fine-grained activation offloading
---fine-grained-activation-offloading
-
-# Specify which modules are going to offload its input
-# Choices: "attn_norm", "core_attn", "attn_proj", "mlp_norm", "expert_fc1", "moe_act".
---offload-modules expert_fc1
-```
-For more details, please refer to the ```docs/source/api-guide/fine_grained_activation_offloading.md```
-
 ### MoE Related Arguments
 | Item | Description |
 | --- | --- |
diff --git a/megatron/core/transformer/moe/experts.py b/megatron/core/transformer/moe/experts.py
index ca308da0d21..d0ac20a7536 100644
--- a/megatron/core/transformer/moe/experts.py
+++ b/megatron/core/transformer/moe/experts.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
 
 import copy
 import itertools
@@ -27,11 +27,6 @@
 from megatron.core.fusions.fused_bias_swiglu import weighted_bias_swiglu_impl
 from megatron.core.fusions.fused_weighted_squared_relu import weighted_squared_relu_impl
 from megatron.core.jit import jit_fuser
-from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
-    fine_grained_offloading_group_commit,
-    fine_grained_offloading_group_start,
-    get_fine_grained_offloading_context,
-)
 from megatron.core.tensor_parallel.layers import (
     _initialize_affine_weight_cpu,
     _initialize_affine_weight_gpu,
@@ -830,16 +825,6 @@ def __init__(
             tp_group=pg_collection.expt_tp,
         )
 
-        self.offload_expert_fc1 = (
-            self.config.fine_grained_activation_offloading
-            and "expert_fc1" in self.config.offload_modules
-        )
-
-        self.offload_moe_act = (
-            self.config.fine_grained_activation_offloading
-            and "moe_act" in self.config.offload_modules
-        )
-
         self.activation_recompute = (
             self.config.recompute_granularity == 'selective'
             and "moe_act" in self.config.recompute_modules
@@ -849,12 +834,6 @@ def __init__(
 
             set_save_original_input(self.linear_fc2)
 
-        # This is to avoid the CPU overhead of multiple d2h copies
-        if self.offload_expert_fc1 and not (self.config.fp8 or self.config.fp4):
-            from megatron.core.extensions.transformer_engine import set_save_original_input
-
-            set_save_original_input(self.linear_fc1)
-
         if self.config.fp8 or self.config.fp4:
             assert HAVE_TE, "FP8 and FP4 requires TE."
             self.quantization_padding = Fp8Padding(self.num_local_experts)
@@ -919,21 +898,9 @@ def forward(
             # Probs already applied, so reset to 1.
             permuted_probs = torch.ones_like(permuted_probs)
 
-        if self.offload_expert_fc1:
-            permuted_local_hidden_states = fine_grained_offloading_group_start(
-                permuted_local_hidden_states, name="expert_fc1"
-            )
-        with get_fine_grained_offloading_context(self.offload_expert_fc1):
-            fc1_output, bias_parallel = self.linear_fc1(
-                permuted_local_hidden_states, tokens_per_expert
-            )
-        if self.offload_expert_fc1:
-            fc1_output, bias_parallel = fine_grained_offloading_group_commit(
-                fc1_output,
-                bias_parallel,
-                name="expert_fc1",
-                forced_released_tensors=[permuted_local_hidden_states],
-            )
+        intermediate_parallel, bias_parallel = self.linear_fc1(
+            permuted_local_hidden_states, tokens_per_expert
+        )
 
         def bias_act_func(intermediate_parallel, bias_parallel, permuted_probs):
             if self.config.use_te_activation_func:
@@ -993,26 +960,18 @@ def glu(x):
                 intermediate_parallel = intermediate_parallel.to(original_dtype)
             return intermediate_parallel
 
-        if self.offload_moe_act:
-            fc1_output = fine_grained_offloading_group_start(fc1_output, name="moe_act")
-
         if self.activation_recompute:
             self.activation_checkpoint = tensor_parallel.CheckpointWithoutOutput()
-            with get_fine_grained_offloading_context(self.offload_moe_act):
-                bias_act_output = self.activation_checkpoint.checkpoint(
-                    bias_act_func, fc1_output, bias_parallel, permuted_probs
-                )
-        else:
-            with get_fine_grained_offloading_context(self.offload_moe_act):
-                bias_act_output = bias_act_func(fc1_output, bias_parallel, permuted_probs)
-
-        output, output_bias = self.linear_fc2(bias_act_output, tokens_per_expert)
-        if self.activation_recompute:
+            intermediate_parallel = self.activation_checkpoint.checkpoint(
+                bias_act_func, intermediate_parallel, bias_parallel, permuted_probs
+            )
+            output, output_bias = self.linear_fc2(intermediate_parallel, tokens_per_expert)
             self.activation_checkpoint.discard_output_and_register_recompute(output)
-        if self.offload_moe_act:
-            (output,) = fine_grained_offloading_group_commit(
-                output, name="moe_act", forced_released_tensors=[fc1_output]
+        else:
+            intermediate_parallel = bias_act_func(
+                intermediate_parallel, bias_parallel, permuted_probs
             )
+            output, output_bias = self.linear_fc2(intermediate_parallel, tokens_per_expert)
 
         # upad and concat the output
         if self.config.fp8 or self.config.fp4:
diff --git a/megatron/core/transformer/multi_latent_attention.py b/megatron/core/transformer/multi_latent_attention.py
index 5d3f16c1041..a8893ebec36 100644
--- a/megatron/core/transformer/multi_latent_attention.py
+++ b/megatron/core/transformer/multi_latent_attention.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
 
 
 import math
@@ -22,11 +22,6 @@
     _yarn_get_mscale,
     apply_rotary_pos_emb,
 )
-from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
-    fine_grained_offloading_group_commit,
-    fine_grained_offloading_group_start,
-    get_fine_grained_offloading_context,
-)
 from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.tensor_parallel.layers import ColumnParallelLinear
 from megatron.core.tensor_parallel.mappings import (
@@ -271,19 +266,15 @@ def forward(
                 query, key, value, attention_mask, packed_seq_params=packed_seq_params
             )
         else:
-            if self.offload_core_attention and self.training:
-                query = fine_grained_offloading_group_start(query, name="core_attn")
-
             if inference_context is None or inference_context.is_static_batching():
-                with get_fine_grained_offloading_context(self.offload_core_attention):
-                    core_attn_out = self.core_attention(
-                        query,
-                        key,
-                        value,
-                        attention_mask,
-                        packed_seq_params=packed_seq_params,
-                        attn_mask_type=attn_mask_type,
-                    )
+                core_attn_out = self.core_attention(
+                    query,
+                    key,
+                    value,
+                    attention_mask,
+                    packed_seq_params=packed_seq_params,
+                    attn_mask_type=attn_mask_type,
+                )
             elif self.cache_mla_latents:
                 # Dynamic batching attention kernel.
                 q, k, v = (query, key, value)
@@ -304,10 +295,6 @@ def forward(
                 # Only rearrange if not in absorption mode (Flash MLA handles format correctly)
                 if not inference_context.is_decode_only():
                     core_attn_out = rearrange(core_attn_out, 's b h d -> s b (h d)')
-            if self.offload_core_attention and self.training:
-                (core_attn_out,) = fine_grained_offloading_group_commit(
-                    core_attn_out, name="core_attn", forced_released_tensors=[query, key, value]
-                )
 
         # We are doing absorption with cache mla latents and decode mode.
         if self.cache_mla_latents and inference_context.is_decode_only():
@@ -333,14 +320,7 @@ def forward(
         # =================
         # Output. [sq, b, h]
         # =================
-        if self.offload_attn_proj:
-            core_attn_out = fine_grained_offloading_group_start(core_attn_out, name="attn_proj")
-        with get_fine_grained_offloading_context(self.offload_attn_proj):
-            output, bias = self.linear_proj(core_attn_out)
-        if self.offload_attn_proj:
-            output, bias = fine_grained_offloading_group_commit(
-                output, bias, name="attn_proj", forced_released_tensors=[core_attn_out]
-            )
+        output, bias = self.linear_proj(core_attn_out)
 
         return output, bias
 
diff --git a/megatron/core/transformer/multi_token_prediction.py b/megatron/core/transformer/multi_token_prediction.py
index a619b9ffa55..bd3aa9c8c96 100755
--- a/megatron/core/transformer/multi_token_prediction.py
+++ b/megatron/core/transformer/multi_token_prediction.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
 
 from contextlib import nullcontext
 from dataclasses import dataclass
@@ -13,9 +13,6 @@
 from megatron.core.fp8_utils import get_fp8_context
 from megatron.core.models.backends import BackendSpecProvider, LocalSpecProvider
 from megatron.core.packed_seq_params import PackedSeqParams
-from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
-    fine_grained_offloading_set_last_layer,
-)
 from megatron.core.pipeline_parallel.utils import is_vp_last_stage
 from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.tensor_parallel import (
@@ -904,8 +901,6 @@ def forward(
         hidden_states_list = list(torch.chunk(hidden_states, 1 + offset, dim=0))
         hidden_states = hidden_states_list[offset]
         for layer_number in range(len(self.layers)):
-            if self.config.fine_grained_activation_offloading:
-                fine_grained_offloading_set_last_layer(layer_number == len(self.layers) - 1)
             (hidden_states, input_ids, position_ids) = self.layers[layer_number](
                 input_ids=input_ids,
                 position_ids=position_ids,
diff --git a/megatron/core/transformer/transformer_block.py b/megatron/core/transformer/transformer_block.py
index 06e8f1372f4..aead6133f22 100755
--- a/megatron/core/transformer/transformer_block.py
+++ b/megatron/core/transformer/transformer_block.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
 import logging
 from contextlib import nullcontext
 from dataclasses import dataclass
@@ -16,9 +16,6 @@
 from megatron.core.fusions.fused_layer_norm import FusedLayerNorm
 from megatron.core.inference.contexts import BaseInferenceContext
 from megatron.core.packed_seq_params import PackedSeqParams
-from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
-    fine_grained_offloading_set_last_layer,
-)
 from megatron.core.pipeline_parallel.utils import is_vp_first_stage, is_vp_last_stage
 from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.transformer.enums import LayerType
@@ -696,11 +693,6 @@ def forward(
                     else:
                         inner_quantization_context = nullcontext()
 
-                    if self.config.fine_grained_activation_offloading:
-                        fine_grained_offloading_set_last_layer(
-                            l_no == self.num_layers_per_pipeline_rank - 1
-                        )
-
                     with self.offload_context, inner_quantization_context:
                         hidden_states, context = layer(
                             hidden_states=hidden_states,
diff --git a/megatron/core/transformer/transformer_config.py b/megatron/core/transformer/transformer_config.py
index ecc700375cd..b39b7706feb 100644
--- a/megatron/core/transformer/transformer_config.py
+++ b/megatron/core/transformer/transformer_config.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
 
 import warnings
 from dataclasses import dataclass
@@ -772,25 +772,6 @@ class TransformerConfig(ModelParallelConfig):
     """Transformer implementation to use.
     Options are 'transformer_engine' for Transformer Engine and 'local' for MCore."""
 
-    #####################################
-    # Fine-grained Activation Offloading
-    #####################################
-    fine_grained_activation_offloading: bool = False
-    """If True, offload the input of the specified modules to the CPU."""
-
-    offload_modules: Optional[list[str]] = None
-    """The submodules to offload its input.
-    choices: "attn_norm", "core_attn", "attn_proj", "mlp_norm", "expert_fc1", "moe_act".
-    "attn_norm": offload the input of the normalization in the attention part.
-    "core_attn": offload the input of the core attention part.
-    "mlp_norm": offload the input of the normalization in the mlp part.
-    "attn_proj": offload the input of the attn linear projection part.
-    "expert_fc1": offload the input of the expert fc1 part.
-    "moe_act": offload the input of the moe act part.
-    """
-    min_offloaded_tensor_size: int = 1024 * 1024
-    """The minimum size of the tensor to be offloaded."""
-
     def __post_init__(self):
         """Python dataclass method that is used to modify attributes after initialization.
         See https://docs.python.org/3/library/dataclasses.html#post-init-processing for more
@@ -1136,28 +1117,6 @@ def __post_init__(self):
             if "moe" not in self.recompute_modules:
                 self.recompute_modules.append("moe")
 
-        if self.fine_grained_activation_offloading:
-            assert self.offload_modules is not None and len(self.offload_modules) > 0
-            allowed_modules = {
-                "core_attn",
-                "attn_proj",
-                "expert_fc1",
-                "moe_act",
-                "attn_norm",
-                "mlp_norm",
-            }
-            invalid_modules = set(self.offload_modules) - allowed_modules
-            assert not invalid_modules, (
-                f'Invalid choices for offload_modules: {invalid_modules}. '
-                f'Allowed modules are: {allowed_modules}'
-            )
-            if "attn_proj" in self.offload_modules and "core_attn" not in self.offload_modules:
-                raise ValueError(
-                    "attn_proj cannot be set to offload_modules alone without core_attn "
-                    "because the input of attn_proj is the output of core_attn, "
-                    "which is needed in core_attn.backward()."
-                )
-
         if (
             self.num_layers_in_first_pipeline_stage is not None
             or self.num_layers_in_last_pipeline_stage is not None
diff --git a/megatron/core/transformer/transformer_layer.py b/megatron/core/transformer/transformer_layer.py
index c36ff7515e4..a5babece9d0 100644
--- a/megatron/core/transformer/transformer_layer.py
+++ b/megatron/core/transformer/transformer_layer.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
 
 import logging
 import warnings
@@ -397,16 +397,6 @@ def __init__(
             if "mlp" in self.config.recompute_modules:
                 if not isinstance(self.mlp, MoELayer):
                     self.recompute_mlp = True
-        self.offload_attn_norm = (
-            self.config.fine_grained_activation_offloading
-            and "attn_norm" in self.config.offload_modules
-            and not isinstance(self.input_layernorm, IdentityOp)
-        )
-        self.offload_mlp_norm = (
-            self.config.fine_grained_activation_offloading
-            and "mlp_norm" in self.config.offload_modules
-            and not isinstance(self.pre_mlp_layernorm, IdentityOp)
-        )
 
         # @jcasper how should we handle nvfuser?
         # Set bias+dropout+add fusion grad_enable execution handler.
@@ -489,29 +479,20 @@ def _forward_attention(
                 context (Tensor): Updated context tensor if cross-attention is used,
                 otherwise None.
         """
-        from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
-            fine_grained_offloading_group_commit,
-            fine_grained_offloading_group_start,
-            get_fine_grained_offloading_context,
-        )
 
         inference_context = deprecate_inference_params(inference_context, inference_params)
 
         # Residual connection.
         residual = hidden_states
 
-        if self.offload_attn_norm:
-            hidden_states = fine_grained_offloading_group_start(hidden_states, name="attn_norm")
         # Optional Input Layer norm
         if self.recompute_input_layernorm:
             self.input_layernorm_checkpoint = tensor_parallel.CheckpointWithoutOutput()
-            with get_fine_grained_offloading_context(self.offload_attn_norm):
-                input_layernorm_output = self.input_layernorm_checkpoint.checkpoint(
-                    self.input_layernorm, hidden_states
-                )
+            input_layernorm_output = self.input_layernorm_checkpoint.checkpoint(
+                self.input_layernorm, hidden_states
+            )
         else:
-            with get_fine_grained_offloading_context(self.offload_attn_norm):
-                input_layernorm_output = self.input_layernorm(hidden_states)
+            input_layernorm_output = self.input_layernorm(hidden_states)
 
         # Self attention.
         nvtx_range_push(suffix="self_attention")
@@ -545,11 +526,6 @@ def _forward_attention(
             )
         nvtx_range_pop(suffix="self_attn_bda")
 
-        if self.offload_attn_norm:
-            (hidden_states,) = fine_grained_offloading_group_commit(
-                hidden_states, name="attn_norm", forced_released_tensors=[residual]
-            )
-
         # Residual connection.
         residual = hidden_states
 
@@ -587,27 +563,17 @@ def _forward_mlp(self, hidden_states, inference_context=None):
             output (Tensor): Transformed hidden states of shape [s, b, h].
         """
 
-        from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
-            fine_grained_offloading_group_commit,
-            fine_grained_offloading_group_start,
-            get_fine_grained_offloading_context,
-        )
-
         # Residual connection.
         residual = hidden_states
 
-        if self.offload_mlp_norm:
-            hidden_states = fine_grained_offloading_group_start(hidden_states, name="mlp_norm")
         # Optional Layer norm post the cross-attention.
         if self.recompute_pre_mlp_layernorm:
             self.pre_mlp_norm_checkpoint = tensor_parallel.CheckpointWithoutOutput()
-            with get_fine_grained_offloading_context(self.offload_mlp_norm):
-                pre_mlp_layernorm_output = self.pre_mlp_norm_checkpoint.checkpoint(
-                    self.pre_mlp_layernorm, hidden_states
-                )
+            pre_mlp_layernorm_output = self.pre_mlp_norm_checkpoint.checkpoint(
+                self.pre_mlp_layernorm, hidden_states
+            )
         else:
-            with get_fine_grained_offloading_context(self.offload_mlp_norm):
-                pre_mlp_layernorm_output = self.pre_mlp_layernorm(hidden_states)
+            pre_mlp_layernorm_output = self.pre_mlp_layernorm(hidden_states)
 
         nvtx_range_push(suffix="mlp")
         # Potentially chunk the MLP computation during prefill to minimize the peak activation size
@@ -667,10 +633,6 @@ def _forward_mlp(self, hidden_states, inference_context=None):
                 mlp_output_with_bias, residual, self.hidden_dropout
             )
         nvtx_range_pop(suffix="mlp_bda")
-        if self.offload_mlp_norm:
-            (hidden_states,) = fine_grained_offloading_group_commit(
-                hidden_states, name="mlp_norm", forced_released_tensors=[residual]
-            )
 
         # Jit compiled function creates 'view' tensor. This tensor
         # potentially gets saved in the MPU checkpoint function context,
diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py
index 8e5f343b73c..bdf915a8ae1 100644
--- a/megatron/training/arguments.py
+++ b/megatron/training/arguments.py
@@ -1216,10 +1216,6 @@ def validate_args(args, defaults={}):
                 "when enabling delay_wgrad_compute"
             )
 
-    if args.fine_grained_activation_offloading:
-        assert args.transformer_impl == 'transformer_engine', \
-            "Fine-grained activation offloading is only supported with transformer_engine implementation"
-
     if args.mtp_num_layers:
         assert not args.use_legacy_models, "The legacy Megatron models does not support Multi-Token Prediction (MTP)."
         assert args.position_embedding_type == "rope" or args.position_embedding_type == "none", (
@@ -2331,12 +2327,7 @@ def _add_training_args(parser):
                        help='The communicator group names to use high priority streams.')
     group.add_argument('--use-te-activation-func', action='store_true',
                        help='Use activation function kernel from Transformer Engine in MLP module.')
-    group.add_argument('--fine-grained-activation-offloading', action='store_true',
-                       help='Enable fine-grained activation offloading.')
-    group.add_argument('--offload-modules', nargs='*', type=str, default=[],
-                       help='The submodules to offload its input. Choices: "attn_norm", "core_attn", "attn_proj", "mlp_norm", "expert_fc1", "moe_act".')
-    group.add_argument('--min-offloaded-tensor-size', type=int, default=1024*1024,
-                       help='The minimum size of the tensor to be offloaded.')
+
     return parser
 
 
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_coreweave.json
deleted file mode 100644
index 30ea509a50b..00000000000
--- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_coreweave.json
+++ /dev/null
@@ -1,110 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 5,
-        "values": {
-            "1": 11.0637,
-            "5": 9.48263,
-            "10": 9.04035,
-            "15": 8.00837,
-            "20": 7.88364,
-            "25": 7.67597,
-            "30": 7.63447,
-            "35": 7.21393,
-            "40": 7.55564,
-            "45": 7.21045,
-            "50": 7.05439
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 5,
-        "values": {
-            "1": 38802064.0,
-            "5": 394456256.0,
-            "10": 571185472.0,
-            "15": 699100416.0,
-            "20": 891692160.0,
-            "25": 748799104.0,
-            "30": 794511296.0,
-            "35": 671593792.0,
-            "40": 421718816.0,
-            "45": 517934176.0,
-            "50": 472902496.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 5,
-        "values": {
-            "1": 6025468416.0,
-            "5": 6025470464.0,
-            "10": 6025470464.0,
-            "15": 6025470464.0,
-            "20": 6025470464.0,
-            "25": 6025470464.0,
-            "30": 6025470464.0,
-            "35": 6025470464.0,
-            "40": 6025470464.0,
-            "45": 6025470464.0,
-            "50": 6025470464.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 5,
-        "values": {
-            "1": 45099868160.0,
-            "5": 49175810048.0,
-            "10": 49175810048.0,
-            "15": 49175810048.0,
-            "20": 49175810048.0,
-            "25": 49175810048.0,
-            "30": 49211260928.0,
-            "35": 49211260928.0,
-            "40": 49211260928.0,
-            "45": 49211260928.0,
-            "50": 49211260928.0
-        }
-    },
-    "mtp_1 loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 5,
-        "values": {
-            "1": 11.04508,
-            "5": 9.76285,
-            "10": 9.04997,
-            "15": 7.93865,
-            "20": 7.79984,
-            "25": 7.60324,
-            "30": 7.56633,
-            "35": 7.13802,
-            "40": 7.45784,
-            "45": 7.11892,
-            "50": 6.9559
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 5,
-        "values": {
-            "1": 52.8667,
-            "5": 2.06295,
-            "10": 1.09336,
-            "15": 1.10509,
-            "20": 1.08631,
-            "25": 1.08991,
-            "30": 1.10548,
-            "35": 1.10049,
-            "40": 1.11219,
-            "45": 1.09542,
-            "50": 1.09805
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_eos.json b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_eos.json
deleted file mode 100644
index 30ea509a50b..00000000000
--- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_eos.json
+++ /dev/null
@@ -1,110 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 5,
-        "values": {
-            "1": 11.0637,
-            "5": 9.48263,
-            "10": 9.04035,
-            "15": 8.00837,
-            "20": 7.88364,
-            "25": 7.67597,
-            "30": 7.63447,
-            "35": 7.21393,
-            "40": 7.55564,
-            "45": 7.21045,
-            "50": 7.05439
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 5,
-        "values": {
-            "1": 38802064.0,
-            "5": 394456256.0,
-            "10": 571185472.0,
-            "15": 699100416.0,
-            "20": 891692160.0,
-            "25": 748799104.0,
-            "30": 794511296.0,
-            "35": 671593792.0,
-            "40": 421718816.0,
-            "45": 517934176.0,
-            "50": 472902496.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 5,
-        "values": {
-            "1": 6025468416.0,
-            "5": 6025470464.0,
-            "10": 6025470464.0,
-            "15": 6025470464.0,
-            "20": 6025470464.0,
-            "25": 6025470464.0,
-            "30": 6025470464.0,
-            "35": 6025470464.0,
-            "40": 6025470464.0,
-            "45": 6025470464.0,
-            "50": 6025470464.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 5,
-        "values": {
-            "1": 45099868160.0,
-            "5": 49175810048.0,
-            "10": 49175810048.0,
-            "15": 49175810048.0,
-            "20": 49175810048.0,
-            "25": 49175810048.0,
-            "30": 49211260928.0,
-            "35": 49211260928.0,
-            "40": 49211260928.0,
-            "45": 49211260928.0,
-            "50": 49211260928.0
-        }
-    },
-    "mtp_1 loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 5,
-        "values": {
-            "1": 11.04508,
-            "5": 9.76285,
-            "10": 9.04997,
-            "15": 7.93865,
-            "20": 7.79984,
-            "25": 7.60324,
-            "30": 7.56633,
-            "35": 7.13802,
-            "40": 7.45784,
-            "45": 7.11892,
-            "50": 6.9559
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 5,
-        "values": {
-            "1": 52.8667,
-            "5": 2.06295,
-            "10": 1.09336,
-            "15": 1.10509,
-            "20": 1.08631,
-            "25": 1.08991,
-            "30": 1.10548,
-            "35": 1.10049,
-            "40": 1.11219,
-            "45": 1.09542,
-            "50": 1.09805
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/model_config.yaml
deleted file mode 100644
index d9ec0456190..00000000000
--- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/model_config.yaml
+++ /dev/null
@@ -1,139 +0,0 @@
-ENV_VARS:
-  CUDA_DEVICE_MAX_CONNECTIONS: 32
-  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
-  PYTORCH_CUDA_ALLOC_CONF: expandable_segments:True
-  NCCL_NVLS_ENABLE: 0
-  PYTHONWARNINGS: ignore
-  NCCL_DEBUG: VERSION
-MODEL_ARGS:
-  # Distributed args
-  --distributed-timeout-minutes: 60
-  --tensor-model-parallel-size: 2
-  --pipeline-model-parallel-size: 2
-  --expert-model-parallel-size: 4
-  --context-parallel-size: 1
-  --expert-tensor-parallel-size: 1
-  --use-distributed-optimizer: true
-  # NOTE: uncomment if TE >= 2.9.0
-  # --overlap-grad-reduce: true
-  # --overlap-param-gather: true
-  # Use unfused attention since MLA with fused attention and deterministic mode leads to NaN
-  --attention-backend: unfused # TODO: switch back to fused attention after fix
-  # Training args
-  --use-mcore-models: true
-  --sequence-parallel: true
-  --disable-bias-linear: true
-  --micro-batch-size: 4
-  --global-batch-size: 32
-  --train-iters: 50
-  --exit-duration-in-mins: 230
-  --no-check-for-nan-in-loss-and-grad: true
-  --no-rope-fusion: true
-  --cross-entropy-loss-fusion: true
-  --cross-entropy-fusion-impl: native
-  --manual-gc: true
-  --manual-gc-interval: 100
-  --recompute-granularity: selective
-  --recompute-modules: "[layernorm mla_up_proj mlp moe_act]"
-  --fine-grained-activation-offloading: true
-  --offload-modules: "[expert_fc1 moe_act attn_norm mlp_norm]"
-  # Transformer Engine args
-  --transformer-impl: transformer_engine
-  # Data args
-  --seq-length: 4096
-  --data-cache-path: ${DATA_CACHE_PATH}
-  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
-  --split: 949,50,1
-  # Add network size args
-  --num-layers: 15
-  --moe-layer-freq: ([0]*3+[1]*12)
-  --pipeline-model-parallel-layout: Et*3\\|\\(tt\\|\\)*6mL # Et*3|(tt|)*6mL
-  --hidden-size: 1024
-  --ffn-hidden-size: 4096
-  --num-attention-heads: 32
-  --kv-channels: 128
-  --max-position-embeddings: 4096
-  --position-embedding-type: rope
-  --rotary-base: 10000
-  --make-vocab-size-divisible-by: 3232
-  --normalization: RMSNorm
-  --norm-epsilon: 1e-6
-  --swiglu: true
-  --untie-embeddings-and-output-weights: true
-  --multi-latent-attention: true
-  # Comment out the following MTP args to disable MTP
-  --mtp-num-layers: 1
-  --mtp-loss-scaling-factor: 0.1
-  # Add regularization args
-  --attention-dropout: 0.0
-  --hidden-dropout: 0.0
-  --clip-grad: 1.0
-  --weight-decay: 0.1
-  --qk-layernorm: true
-  # Add learning rate args
-  --lr-warmup-fraction: .01
-  --lr: 0.00015
-  --min-lr: 1.0e-5
-  --lr-decay-style: cosine
-  --adam-beta1: 0.9
-  --adam-beta2: 0.95
-  # Add MoE args
-  --num-experts: 32
-  --moe-ffn-hidden-size: 1024
-  --moe-shared-expert-intermediate-size: 1024
-  --moe-router-load-balancing-type: seq_aux_loss
-  --moe-router-topk: 4
-  --moe-token-dispatcher-type: alltoall
-  --moe-router-pre-softmax: true
-  --moe-grouped-gemm: true
-  --moe-aux-loss-coeff: 1e-4
-  --moe-router-group-topk: 2
-  --moe-router-num-groups: 4
-  --moe-router-topk-scaling-factor: 2.0
-  --moe-router-score-function: sigmoid
-  --moe-router-enable-expert-bias: true
-  --moe-router-bias-update-rate: 1e-3
-  --moe-router-dtype: fp32
-  --moe-permute-fusion: true
-  # Add MLA args
-  --q-lora-rank: 1536
-  --kv-lora-rank: 512
-  --qk-head-dim: 128
-  --qk-pos-emb-head-dim: 64
-  --v-head-dim: 128
-  --rotary-scaling-factor: 40
-  --mscale: 1.0
-  --mscale-all-dim: 1.0
-  # Add validation args
-  --eval-iters: 32
-  --eval-interval: 200
-  # Add checkpointing args
-  --save: ${CHECKPOINT_SAVE_PATH}
-  --load: ${CHECKPOINT_LOAD_PATH}
-  --save-interval: 25
-  # Add initialization args
-  --init-method-std: 0.02
-  # Add logging args
-  --log-timers-to-tensorboard: true
-  --log-memory-to-tensorboard: true
-  --log-num-zeros-in-grad: true
-  --log-params-norm: true
-  --log-validation-ppl-to-tensorboard: true
-  --log-throughput: true
-  --log-interval: 1
-  --logging-level: 40
-  --tensorboard-dir: ${TENSORBOARD_PATH}
-  # Add mixed precision args
-  --bf16: true
-  --exit-interval: 50
-  --overlap-moe-expert-parallel-comm: true
-TEST_TYPE: regular # Usually ckpt-resume, but as a WAR to #513 set to regular
-METRICS:
-  - "iteration-time"
-  - "lm loss"
-  - "num-zeros"
-  - "mem-allocated-bytes"
-  - "mem-max-allocated-bytes"
-  - "mtp_1 loss"
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_coreweave.json
deleted file mode 100644
index 3687e19e563..00000000000
--- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_coreweave.json
+++ /dev/null
@@ -1,92 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 5,
-        "values": {
-            "1": 11.04266,
-            "5": 9.38536,
-            "10": 8.82761,
-            "15": 7.86966,
-            "20": 7.72022,
-            "25": 7.53119,
-            "30": 7.5026,
-            "35": 7.10343,
-            "40": 7.42037,
-            "45": 7.07056,
-            "50": 6.90946
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 5,
-        "values": {
-            "1": 844114112.0,
-            "5": 856834688.0,
-            "10": 928751040.0,
-            "15": 952825152.0,
-            "20": 987111232.0,
-            "25": 926008384.0,
-            "30": 864767232.0,
-            "35": 855095360.0,
-            "40": 849505920.0,
-            "45": 847187584.0,
-            "50": 846195840.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 5,
-        "values": {
-            "1": 4419107328.0,
-            "5": 4419108864.0,
-            "10": 4419108864.0,
-            "15": 4419108864.0,
-            "20": 4419108864.0,
-            "25": 4419108864.0,
-            "30": 4419108864.0,
-            "35": 4419108864.0,
-            "40": 4419108864.0,
-            "45": 4419108864.0,
-            "50": 4419108864.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 5,
-        "values": {
-            "1": 37959917568.0,
-            "5": 39583289344.0,
-            "10": 39583289344.0,
-            "15": 39583289344.0,
-            "20": 39583289344.0,
-            "25": 39583289344.0,
-            "30": 39583289344.0,
-            "35": 39583289344.0,
-            "40": 39583289344.0,
-            "45": 39583289344.0,
-            "50": 39583289344.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 5,
-        "values": {
-            "1": 58.78709,
-            "5": 2.40565,
-            "10": 1.13046,
-            "15": 1.39764,
-            "20": 1.1273,
-            "25": 1.12154,
-            "30": 1.03587,
-            "35": 1.09545,
-            "40": 1.09901,
-            "45": 1.00656,
-            "50": 1.00794
-        }
-    }
-}
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_eos.json b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_eos.json
deleted file mode 100644
index 3687e19e563..00000000000
--- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_eos.json
+++ /dev/null
@@ -1,92 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 5,
-        "values": {
-            "1": 11.04266,
-            "5": 9.38536,
-            "10": 8.82761,
-            "15": 7.86966,
-            "20": 7.72022,
-            "25": 7.53119,
-            "30": 7.5026,
-            "35": 7.10343,
-            "40": 7.42037,
-            "45": 7.07056,
-            "50": 6.90946
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 5,
-        "values": {
-            "1": 844114112.0,
-            "5": 856834688.0,
-            "10": 928751040.0,
-            "15": 952825152.0,
-            "20": 987111232.0,
-            "25": 926008384.0,
-            "30": 864767232.0,
-            "35": 855095360.0,
-            "40": 849505920.0,
-            "45": 847187584.0,
-            "50": 846195840.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 5,
-        "values": {
-            "1": 4419107328.0,
-            "5": 4419108864.0,
-            "10": 4419108864.0,
-            "15": 4419108864.0,
-            "20": 4419108864.0,
-            "25": 4419108864.0,
-            "30": 4419108864.0,
-            "35": 4419108864.0,
-            "40": 4419108864.0,
-            "45": 4419108864.0,
-            "50": 4419108864.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 5,
-        "values": {
-            "1": 37959917568.0,
-            "5": 39583289344.0,
-            "10": 39583289344.0,
-            "15": 39583289344.0,
-            "20": 39583289344.0,
-            "25": 39583289344.0,
-            "30": 39583289344.0,
-            "35": 39583289344.0,
-            "40": 39583289344.0,
-            "45": 39583289344.0,
-            "50": 39583289344.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 5,
-        "values": {
-            "1": 58.78709,
-            "5": 2.40565,
-            "10": 1.13046,
-            "15": 1.39764,
-            "20": 1.1273,
-            "25": 1.12154,
-            "30": 1.03587,
-            "35": 1.09545,
-            "40": 1.09901,
-            "45": 1.00656,
-            "50": 1.00794
-        }
-    }
-}
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/model_config.yaml
deleted file mode 100644
index f4b64722712..00000000000
--- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/model_config.yaml
+++ /dev/null
@@ -1,134 +0,0 @@
-ENV_VARS:
-  CUDA_DEVICE_MAX_CONNECTIONS: 1
-  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
-  PYTORCH_CUDA_ALLOC_CONF: expandable_segments:True
-  NCCL_NVLS_ENABLE: 0
-  PYTHONWARNINGS: ignore
-  NCCL_DEBUG: VERSION
-MODEL_ARGS:
-  # Distributed args
-  --distributed-timeout-minutes: 60
-  --tensor-model-parallel-size: 2
-  --pipeline-model-parallel-size: 2
-  --expert-model-parallel-size: 4
-  --context-parallel-size: 1
-  --expert-tensor-parallel-size: 1
-  --use-distributed-optimizer: true
-  # NOTE: uncomment if TE >= 2.9.0
-  # --overlap-grad-reduce: true
-  # --overlap-param-gather: true
-  # Use unfused attention since MLA with fused attention and deterministic mode leads to NaN
-  --attention-backend: unfused # TODO: switch back to fused attention after fix
-  # Training args
-  --use-mcore-models: true
-  --sequence-parallel: true
-  --disable-bias-linear: true
-  --micro-batch-size: 4
-  --global-batch-size: 32
-  --train-iters: 50
-  --exit-duration-in-mins: 230
-  --no-check-for-nan-in-loss-and-grad: true
-  --no-rope-fusion: true
-  --cross-entropy-loss-fusion: true
-  --cross-entropy-fusion-impl: native
-  --manual-gc: true
-  --manual-gc-interval: 100
-  --recompute-granularity: selective
-  --recompute-modules: "[layernorm mla_up_proj mlp moe_act]"
-  --fine-grained-activation-offloading: true
-  --offload-modules: "[expert_fc1 moe_act attn_norm mlp_norm]"
-  # Transformer Engine args
-  --transformer-impl: transformer_engine
-  # Data args
-  --seq-length: 4096
-  --data-cache-path: ${DATA_CACHE_PATH}
-  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
-  --split: 949,50,1
-  # Add network size args
-  --num-layers: 15
-  --moe-layer-freq: ([0]*3+[1]*12)
-  --pipeline-model-parallel-layout: Et*3\\|\\(tt\\|\\)*6L # Et*3|(tt|)*6L
-  --hidden-size: 1024
-  --ffn-hidden-size: 4096
-  --num-attention-heads: 32
-  --kv-channels: 128
-  --max-position-embeddings: 4096
-  --position-embedding-type: rope
-  --rotary-base: 10000
-  --make-vocab-size-divisible-by: 3232
-  --normalization: RMSNorm
-  --norm-epsilon: 1e-6
-  --swiglu: true
-  --untie-embeddings-and-output-weights: true
-  --multi-latent-attention: true
-  # Add regularization args
-  --attention-dropout: 0.0
-  --hidden-dropout: 0.0
-  --clip-grad: 1.0
-  --weight-decay: 0.1
-  --qk-layernorm: true
-  # Add learning rate args
-  --lr-warmup-fraction: .01
-  --lr: 0.00015
-  --min-lr: 1.0e-5
-  --lr-decay-style: cosine
-  --adam-beta1: 0.9
-  --adam-beta2: 0.95
-  # Add MoE args
-  --num-experts: 32
-  --moe-ffn-hidden-size: 1024
-  --moe-shared-expert-intermediate-size: 1024
-  --moe-router-load-balancing-type: seq_aux_loss
-  --moe-router-topk: 4
-  --moe-token-dispatcher-type: alltoall
-  --moe-router-pre-softmax: true
-  --moe-grouped-gemm: true
-  --moe-aux-loss-coeff: 1e-4
-  --moe-router-group-topk: 2
-  --moe-router-num-groups: 4
-  --moe-router-topk-scaling-factor: 2.0
-  --moe-router-score-function: sigmoid
-  --moe-router-enable-expert-bias: true
-  --moe-router-bias-update-rate: 1e-3
-  --moe-router-dtype: fp32
-  --moe-permute-fusion: true
-  # Add MLA args
-  --q-lora-rank: 1536
-  --kv-lora-rank: 512
-  --qk-head-dim: 128
-  --qk-pos-emb-head-dim: 64
-  --v-head-dim: 128
-  --rotary-scaling-factor: 40
-  --mscale: 1.0
-  --mscale-all-dim: 1.0
-  # Add validation args
-  --eval-iters: 32
-  --eval-interval: 200
-  # Add checkpointing args
-  --save: ${CHECKPOINT_SAVE_PATH}
-  --load: ${CHECKPOINT_LOAD_PATH}
-  --save-interval: 25
-  # Add initialization args
-  --init-method-std: 0.02
-  # Add logging args
-  --log-timers-to-tensorboard: true
-  --log-memory-to-tensorboard: true
-  --log-num-zeros-in-grad: true
-  --log-params-norm: true
-  --log-validation-ppl-to-tensorboard: true
-  --log-throughput: true
-  --log-interval: 1
-  --logging-level: 40
-  --tensorboard-dir: ${TENSORBOARD_PATH}
-  # Add mixed precision args
-  --bf16: true
-  --exit-interval: 50
-TEST_TYPE: regular # Usually ckpt-resume, but as a WAR to #513 set to regular
-METRICS:
-  - "iteration-time"
-  - "lm loss"
-  - "num-zeros"
-  - "mem-allocated-bytes"
-  - "mem-max-allocated-bytes"
diff --git a/tests/test_utils/recipes/moe.yaml b/tests/test_utils/recipes/moe.yaml
index 63320ae3c3d..8164ca37df8 100644
--- a/tests/test_utils/recipes/moe.yaml
+++ b/tests/test_utils/recipes/moe.yaml
@@ -124,16 +124,6 @@ products:
       - environment: [dev]
         scope: [mr]
         platforms: [dgx_h100]
-  - test_case: [gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading]
-    products:
-      - environment: [dev]
-        scope: [mr]
-        platforms: [dgx_h100]
-  - test_case: [gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading]
-    products:
-      - environment: [dev]
-        scope: [mr]
-        platforms: [dgx_h100]
   #######################################################################
   # Super important MR tests that run for both DEV and LTS per MR       #
   #######################################################################
diff --git a/tests/unit_tests/pipeline_parallel/test_fine_grained_activation_offloading.py b/tests/unit_tests/pipeline_parallel/test_fine_grained_activation_offloading.py
deleted file mode 100644
index edec95288c2..00000000000
--- a/tests/unit_tests/pipeline_parallel/test_fine_grained_activation_offloading.py
+++ /dev/null
@@ -1,187 +0,0 @@
-# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-
-import gc
-
-import pytest
-import torch
-
-EPSILON = 0.1
-
-# Skip all tests if CUDA is not available
-cuda_available = torch.cuda.is_available()
-
-
-def _reset_cuda_memory():
-    gc.collect()
-    if cuda_available:
-        torch.cuda.empty_cache()
-
-
-class ToyModel(torch.nn.Module):
-    def __init__(self, hidden_size: int = 2048, num_layers: int = 4, dtype=torch.bfloat16):
-        super().__init__()
-        layers = []
-        for _ in range(num_layers):
-            layers.append(
-                torch.nn.Linear(hidden_size, hidden_size, bias=True, dtype=dtype, device="cuda")
-            )
-        self.net = torch.nn.Sequential(*layers).to(device="cuda", dtype=dtype)
-        self.hidden_size = hidden_size
-        self.num_layers = num_layers
-        self.dtype = dtype
-
-        # Prevent weights/bias from being considered activation tensors for offload;
-        # ensure we only count activation tensors (inputs x) in memory accounting.
-        for p in self.parameters():
-            try:
-                setattr(p, "offloading_activation", False)
-            except Exception:
-                pass
-
-    def forward(self, x, use_offload: bool = False):
-        from megatron.core.pipeline_parallel import fine_grained_activation_offload as off
-
-        if use_offload:
-            # Initialize a new chunk (microbatch) and enable offload context.
-            with off.get_fine_grained_offloading_context(True):
-                off.fine_grained_offloading_init_chunk_handler(
-                    vp_stage=None, min_offloaded_tensor_size=1
-                )
-                for i, layer in enumerate(self.net):
-                    # Group by module; with this linear-only model, each group corresponds to a layer.
-                    off.fine_grained_offloading_set_last_layer(i == len(self.net) - 1)
-                    x = off.fine_grained_offloading_group_start(x, name=f"layer_{i}")
-                    x = layer(x)
-                    # Commit the group; returns a tuple of tensors
-                    (x,) = off.fine_grained_offloading_group_commit(
-                        x, name=f"layer_{i}", forced_released_tensors=[]
-                    )
-                return x
-        # Baseline path (no offload hooks)
-        with (
-            torch.autocast(device_type="cuda", dtype=self.dtype)
-            if self.dtype in (torch.float16, torch.bfloat16)
-            else torch.cuda.amp.autocast(enabled=False)
-        ):
-            for layer in self.net:
-                x = layer(x)
-            return x
-
-
-@pytest.fixture(autouse=True)
-def _monkeypatch_offload_deps(monkeypatch):
-    # Avoid requiring torch.distributed initialization and NVML in tests
-    import megatron.core.pipeline_parallel.fine_grained_activation_offload as off
-
-    monkeypatch.setattr(off, "debug_rank", lambda *args, **kwargs: None, raising=False)
-    monkeypatch.setattr(off, "set_ideal_affinity_for_current_gpu", lambda: None, raising=False)
-    # Ensure a clean state each test
-    off.fine_grained_offloading_reset()
-    yield
-    off.fine_grained_offloading_reset()
-
-
-def test_fine_grained_activation_offload_memory_reduction():
-    torch.manual_seed(1234)
-    # Use a linear-only stack so theoretical saved memory equals sum of per-layer input x bytes.
-    model = ToyModel(hidden_size=2048, num_layers=8, dtype=torch.bfloat16).eval()
-
-    # Create input
-    inp = torch.randn(
-        (2048, model.hidden_size), device="cuda", dtype=torch.bfloat16, requires_grad=True
-    )
-
-    # Warmup to stabilize allocator behavior
-    _reset_cuda_memory()
-    out = model(inp, use_offload=False)
-    (out.sum()).backward()
-    torch.cuda.synchronize()
-    _reset_cuda_memory()
-
-    # Baseline memory measurement (no offload)
-    _reset_cuda_memory()
-    inp_baseline = inp.detach().clone().requires_grad_(True)
-    baseline_mem_before = torch.cuda.memory_allocated() / (1024**2)
-    out_base = model(inp_baseline, use_offload=False)
-    baseline_mem_after = (torch.cuda.memory_allocated() - out_base.nbytes) / (1024**2)
-    (out_base.sum()).backward()
-    torch.cuda.synchronize()
-    baseline_delta = baseline_mem_after - baseline_mem_before
-
-    # Offload memory measurement
-    from megatron.core.pipeline_parallel import fine_grained_activation_offload as off
-
-    off.fine_grained_offloading_reset()
-    _reset_cuda_memory()
-    inp_off = inp.detach().clone().requires_grad_(True)
-    offload_mem_before = torch.cuda.memory_allocated() / (1024**2)
-    out_off = model(inp_off, use_offload=True)
-    offload_mem_after = (torch.cuda.memory_allocated() - out_off.nbytes) / (1024**2)
-    (out_off.sum()).backward()
-    torch.cuda.synchronize()
-    offload_delta = offload_mem_after - offload_mem_before
-
-    # Offload should reduce peak cached memory usage after forward
-    assert (
-        offload_delta < baseline_delta
-    ), f"offload did not reduce memory: off={offload_delta:.2f}MiB base={baseline_delta:.2f}MiB"
-
-    # Theoretical savings: storing per-layer input x (same shape each layer).
-    bytes_per_elem = inp.element_size()  # 2 for bfloat16
-    input_bytes = inp.numel() * bytes_per_elem
-    # -2 because the first and last activations are not offloaded
-    expected_saved_mib = (model.num_layers - 2) * (input_bytes / (1024**2))
-
-    # Actual savings ≈ baseline_delta - offload_delta (both exclude output tensor memory).
-    actual_saved_mib = baseline_delta - offload_delta
-
-    # Allow slack for allocator jitter and extra intermediates; magnitudes should match.
-    rel_err = abs(actual_saved_mib - expected_saved_mib) / max(expected_saved_mib, 1e-6)
-    assert (
-        rel_err <= EPSILON
-    ), f"saved mismatch: actual={actual_saved_mib:.2f}MiB expected~={expected_saved_mib:.2f}MiB (rel_err={rel_err:.2f})"
-
-
-def test_fine_grained_activation_offload_output_and_grad_consistency():
-    torch.manual_seed(2025)
-    hidden = 1024
-    layers = 3
-
-    # Create identical models by resetting seed
-    torch.manual_seed(2025)
-    model_base = ToyModel(hidden_size=hidden, num_layers=layers, dtype=torch.bfloat16).train()
-    torch.manual_seed(2025)
-    model_off = ToyModel(hidden_size=hidden, num_layers=layers, dtype=torch.bfloat16).train()
-
-    # Same input and target
-    inp = torch.randn((32, hidden), device="cuda", dtype=torch.bfloat16, requires_grad=True)
-    target = torch.randn_like(inp)
-
-    # Baseline forward/backward
-    out_base = model_base(inp, use_offload=False)
-    loss_base = torch.nn.functional.mse_loss(out_base, target)
-    loss_base.backward()
-    grads_base = [
-        p.grad.detach().clone() if p.grad is not None else None for p in model_base.parameters()
-    ]
-
-    # Offload forward/backward
-    from megatron.core.pipeline_parallel import fine_grained_activation_offload as off
-
-    off.fine_grained_offloading_reset()
-    out_off = model_off(inp.detach().clone().requires_grad_(True), use_offload=True)
-    loss_off = torch.nn.functional.mse_loss(out_off, target)
-    loss_off.backward()
-    grads_off = [
-        p.grad.detach().clone() if p.grad is not None else None for p in model_off.parameters()
-    ]
-
-    # Compare outputs
-    assert torch.allclose(out_off.float(), out_base.float(), rtol=1e-3, atol=1e-3)
-
-    # Compare gradients parameter-wise
-    for gb, go in zip(grads_base, grads_off):
-        if gb is None and go is None:
-            continue
-        assert gb is not None and go is not None
-        assert torch.allclose(go.float(), gb.float(), rtol=1e-3, atol=1e-3)

From d95e86a25bce1c3357755699a2e9e08d39411eac Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Mon, 27 Oct 2025 16:16:24 +0100
Subject: [PATCH 077/334] fix: Missing logger (#1966)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 megatron/core/transformer/transformer_config.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/megatron/core/transformer/transformer_config.py b/megatron/core/transformer/transformer_config.py
index b39b7706feb..d14f991046e 100644
--- a/megatron/core/transformer/transformer_config.py
+++ b/megatron/core/transformer/transformer_config.py
@@ -1,5 +1,6 @@
 # Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
 
+import logging
 import warnings
 from dataclasses import dataclass
 from typing import Callable, List, Literal, Optional, Tuple, Union
@@ -29,6 +30,8 @@
 except ImportError:
     HAVE_PACKAGING = False
 
+logger = logging.getLogger(__name__)
+
 
 @dataclass
 class TransformerConfig(ModelParallelConfig):
@@ -918,7 +921,7 @@ def __post_init__(self):
         if self.moe_enable_deepep:
             if self.moe_token_dispatcher_type != "flex":
                 raise ValueError("DeepEP backend is only supported with flex token dispatcher.")
-            logging.warning(
+            logger.warning(
                 "moe_enable_deepep is deprecated."
                 "Please use --moe-flex-dispatcher-backend=deepep instead."
             )

From 113cefb24a7d7d77b88672630b6670724b877fe3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Mon, 27 Oct 2025 17:28:32 +0100
Subject: [PATCH 078/334] ci: Update copyright checker (#1974)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 .github/workflows/copyright-check.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/copyright-check.yml b/.github/workflows/copyright-check.yml
index 0463e1dd962..74469adf75d 100644
--- a/.github/workflows/copyright-check.yml
+++ b/.github/workflows/copyright-check.yml
@@ -31,7 +31,7 @@ jobs:
     if: |
       !(needs.pre-flight.outputs.docs_only == 'true'
       || needs.pre-flight.outputs.is_deployment_workflow == 'true')
-    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_copyright_check.yml@v0.65.9
+    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_copyright_check.yml@v0.65.11
 
   copyright-check-summary:
     needs: [pre-flight, copyright-check]

From d9e0806d180cdde70450cfaaff9cb7addac20b21 Mon Sep 17 00:00:00 2001
From: Youngeun Kwon <youngeunk@nvidia.com>
Date: Mon, 27 Oct 2025 23:20:49 -0700
Subject: [PATCH 079/334] [Dev] Update symmetric registration interface to
 sync-up with upstream pytorch change (#1930)

Signed-off-by: Youngeun Kwon <youngeunk@nvidia.com>
Signed-off-by: Youngeun <kyeg9404@gmail.com>
---
 megatron/core/distributed/fsdp/src/README.md  |   9 +-
 .../megatron_fsdp/param_and_grad_buffer.py    |  59 +++++++---
 .../core/distributed/param_and_grad_buffer.py |   5 +-
 megatron/core/nccl_allocator.py               | 104 ++++++++++++------
 4 files changed, 126 insertions(+), 51 deletions(-)

diff --git a/megatron/core/distributed/fsdp/src/README.md b/megatron/core/distributed/fsdp/src/README.md
index 8af58d07826..d879c6c26f8 100644
--- a/megatron/core/distributed/fsdp/src/README.md
+++ b/megatron/core/distributed/fsdp/src/README.md
@@ -35,10 +35,14 @@ Megatron-FSDP can provide up to 25% speed up and 23% memory savings compared to
 - **Advanced Bucketing**: Data-type aware bucketing system to minimize the overhead of collective operations
 - **Buffer Management**: Zero copy communication is achieved by reorganizing the storage of parameters and main grad with `ParamAndGradBuffer` class
 - **Communication Overlapping**: Improved communication overlap of paramter all-gather and gradient reduce-scatter
-- **User-Buffer-Registration NCCL communication**: Offload NCCL collective communication to NVL/IB Sharp to reduce GPU SM usage for communication
 - **FP8 Mixed Precision with Transformer Engine**: Compatibility with Transformer Engine enables efficient FP8 mixed precision training
 - **Gradient accumulate fusion support with Transformer Engine**: Remove the explicit gradient copy to the communication buffer in backwards pass
 
+### Advanced Collective Communication
+- **SM Usage Reduction with SHARP**: FSDP's `All-Gather` (AG) and `Reduce-Scatter` (RS) collectives are designed to overlap with compute kernels. However, standard NCCL communication kernels can consume a significant number of GPU SMs (e.g., 16-32 SMs), "stealing" resources from compute (GEMM) kernels and reducing overall TFLOPS.
+- **In-Switch Processing**: We leverage **SHARP** (Scalable Hierarchical Aggregation and Reduction Protocol) to offload these collective operations. SHARP performs aggregation and reduction computations directly on the network switches (InfiniBand or NVLink Switch) instead of on the GPU SMs. This dramatically reduces the SM consumption for communication to **1-6 SM** freeing up GPU resources for compute. It also provides lower communication latency, especially in large, scaled-out workloads.
+- **Symmetric Optimizations for MNNVL**: We support **symmetric-based optimizations**, introduced in NCCL v2.27, which enable switch offloading for **Multi-Node NVLink (MNNVL)** systems such as GB200/GB300. This allows the same SM-saving benefits over the high-bandwidth NVLink fabric itself.
+- **Hierarchical Collectives**: When an FSDP sharding domain spans both NVLink and InfiniBand, the library utilizes **hierarchical SHARP collectives** (e.g., NVL-SHARP + IB-SHARP) to optimize the communication path across the entire system topology.
 <!-- ## 📊 Performance  -->
 
 ## 📦 Installation
@@ -207,6 +211,9 @@ optimizer.load_state_dict(ckpt_state_dict["optimizer"])
 - `nccl_ub` will allocate and register the NCCL userbuffer for param and grad buffers. This option enables an SM-efficient NCCL algorithm that could improve the performance of overlapped computations. This flag will be much more effective when used together with SHARP if the FSDP communication includes both NVL and IB domains. Enabling this option will cause additional memory overhead due to the requirement to enable the `fsdp_double_buffer` option.
     - **Only effective when using Megatron-LM.**
     - Defaults to `False`.
+    - By default we try to use NCCL window (symmetric) registration if it is available. If not it falls back to conventional local registraion.
+- `disable_symmetric_registration` will disable NCCL window (i.e. symmetric) registraion when using `nccl_ub`. 
+    - Dafaults to `False`.
 - `fsdp_double_buffer` will use persistently allocated double buffers for temporarily-defined memory needed in `MegatronFSDP` communications. Having persistent double buffers may increase peak VRAM utilization, but is required to register NCCL user buffers (`nccl_ub=True`) for `MegatronFSDP`. Currently, this is only supported for simple repetitive model structures such as GPT.
     - **Only effective when using Megatron-LM.**
     - Defaults to `False`. Automatically overridden to `True` when `nccl_ub` is enabled.
diff --git a/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py b/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py
index a987ec2cec4..c8116150d52 100644
--- a/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py
+++ b/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py
@@ -76,13 +76,19 @@
 except Exception:
     HAVE_TE = False
 
+NCCL_ALLOCATOR = None
+
 try:
     # Try to import the MCore NCCL nccl_allocator first.
     # If it fails, try to import the APEX NCCL nccl_allocator.
     import megatron.core.nccl_allocator as nccl_allocator
+
+    NCCL_ALLOCATOR = "MCORE"
 except ImportError:
     try:
         import apex.contrib.nccl_allocator as nccl_allocator
+
+        NCCL_ALLOCATOR = "APEX"
     except ImportError:
         nccl_allocator = None
 
@@ -94,8 +100,8 @@ def _p_assert(cond: Any, s: str, raise_assertion_error: bool = True) -> None:
     message ``s`` since otherwise, it is swallowed.
     """
     if not cond:
-        print(s)
-        traceback.print_stack()
+        logger.error(s)
+        logger.error(''.join(traceback.format_stack()))
         if raise_assertion_error:
             raise AssertionError(s)
 
@@ -205,7 +211,7 @@ def __exit__(self, *args):
         for group in self.groups[1:]:
             backend = group._get_backend(torch.device("cuda", torch.cuda.current_device()))
             if torch.distributed.get_rank() == 0:
-                print(
+                logger.info(
                     f"[MultiGroupUBRAllocator] Registering mem pool to group {group}, "
                     f"group.group_desc:{group.group_desc}"
                 )
@@ -1612,7 +1618,9 @@ def __init__(
         # If using nccl_ub, it returns a function that registers buffers to the NCCL memory pool
         # Buffer is registered to data_parallel_group and expert_data_parallel_group if it exists
         # In the case of not using nccl_ub, it returns a nullcontext
-        self.mem_alloc_context = self.get_mem_alloc_context(groups=self.ubr_groups)
+        self.mem_alloc_context = self.get_mem_alloc_context(
+            groups=self.ubr_groups, symmetric=not self.ddp_config.disable_symmetric_registration
+        )
 
         # Mark FP8 params. If TransformerEngine is not installed, we can skip this.
         meta_device_init_fp8_params = {}
@@ -1640,7 +1648,7 @@ def __init__(
 
         self._log_parameter_groups()
 
-    def get_mem_alloc_context(self, groups=None):
+    def get_mem_alloc_context(self, groups=None, symmetric=True):
         """
         Get the memory allocation context for the parameter and gradient buffers.
         """
@@ -1653,22 +1661,43 @@ def get_mem_alloc_context(self, groups=None):
             if groups is None:
                 # data parallel group is a default group for user buffer registration
                 groups = [self.dist_index.get_fsdp_group(is_expert_parallel=False)]
-            if len(groups) == 1:
-                # register buffers to the default group directly using apex memory allocator
-                mem_alloc_context = functools.partial(
-                    nccl_allocator.nccl_mem, NCCL_MEMORY_POOL, group=groups[0]
-                )
-            else:
-                if hasattr(nccl_allocator, "MultiGroupMemPoolAllocator"):
-                    # Case of MCore NCCL allocator
+
+            if NCCL_ALLOCATOR == "MCORE":
+                if len(groups) == 1:
+                    # register buffers to the default group directly using nccl memory allocator
                     mem_alloc_context = functools.partial(
-                        nccl_allocator.MultiGroupMemPoolAllocator, NCCL_MEMORY_POOL, groups=groups
+                        nccl_allocator.nccl_mem,
+                        NCCL_MEMORY_POOL,
+                        group=groups[0],
+                        symmetric=symmetric,
                     )
                 else:
-                    # Case of APEX NCCL allocator.
+                    mem_alloc_context = functools.partial(
+                        nccl_allocator.MultiGroupMemPoolAllocator,
+                        NCCL_MEMORY_POOL,
+                        groups=groups,
+                        symmetric=symmetric,
+                    )
+            elif NCCL_ALLOCATOR == "APEX":
+                if symmetric:
+                    logging.warning(
+                        "Symmetric registration is not supported for APEX NCCL allocator."
+                        "falling back to non-symmetric registration. "
+                        "Please use Megatron Core NCCL allocator for symmetric registration."
+                    )
+
+                if len(groups) == 1:
+                    # register buffers to the default group directly using nccl memory allocator
+                    mem_alloc_context = functools.partial(
+                        nccl_allocator.nccl_mem, NCCL_MEMORY_POOL, group=groups[0]
+                    )
+                else:
+                    # Supports multiple groups registration for APEX NCCL allocator.
                     mem_alloc_context = functools.partial(
                         MultiGroupUBRAllocator, NCCL_MEMORY_POOL, groups=groups
                     )
+            else:
+                raise ValueError(f"Invalid NCCL allocator: {NCCL_ALLOCATOR}")
             return mem_alloc_context
         else:
             return nullcontext
diff --git a/megatron/core/distributed/param_and_grad_buffer.py b/megatron/core/distributed/param_and_grad_buffer.py
index d49d77f6393..30a3c5dd8e2 100644
--- a/megatron/core/distributed/param_and_grad_buffer.py
+++ b/megatron/core/distributed/param_and_grad_buffer.py
@@ -685,7 +685,10 @@ def _does_param_require_new_bucket(param):
                 symmetric=not self.ddp_config.disable_symmetric_registration
             )
             mem_alloc_context = functools.partial(
-                nccl_allocator.nccl_mem, pool, group=self.data_parallel_group
+                nccl_allocator.nccl_mem,
+                pool,
+                group=self.data_parallel_group,
+                symmetric=not self.ddp_config.disable_symmetric_registration,
             )
         else:
             # If nccl_ub is False, mem_alloc_context is nullcontext.
diff --git a/megatron/core/nccl_allocator.py b/megatron/core/nccl_allocator.py
index a328360ba0c..b46157e9d00 100644
--- a/megatron/core/nccl_allocator.py
+++ b/megatron/core/nccl_allocator.py
@@ -2,6 +2,7 @@
 import logging
 import os
 from contextlib import nullcontext
+from functools import lru_cache
 
 import torch
 
@@ -94,6 +95,7 @@ def _build_nccl_allocator():
     _allocator = nccl_allocator.get_nccl_allocator()
 
 
+@lru_cache(maxsize=None)
 def get_func_args(func):
     """
     Get the argument names of a function.
@@ -122,15 +124,17 @@ def create_nccl_mem_pool(symmetric=None):  # symmetric: bool | None = None -> to
         _pool = torch.cuda.MemPool(_allocator)
     else:
         if 'symmetric' in get_func_args(torch.cuda.MemPool):
+            # The PyTorch version >= 2.9.0a0 and before PyTorch PR #161238,
+            # The symmetric knob should passed to the MemPool constructor.
+            # Since PyTorch PR #161238 symmetric knob is now in registration function.
             _pool = torch.cuda.MemPool(_allocator, symmetric=symmetric)
         elif 'symm_mem' in get_func_args(torch.cuda.MemPool):
             # This path handles argument name divergence between
             # nvidia pytorch and the official pytorch.
             _pool = torch.cuda.MemPool(_allocator, symm_mem=symmetric)
         else:
-            raise ValueError(
-                "symmetric setting with torch.cuda.MemPool requires " "higher PyTorch version"
-            )
+            # This path handles the case where the symmetric knob is in the registration function.
+            _pool = torch.cuda.MemPool(_allocator)
     return _pool
 
 
@@ -149,7 +153,7 @@ def init() -> None:
     # Disables the use of the tensor register allocator hook
     os.environ["TORCH_NCCL_USE_TENSOR_REGISTER_ALLOCATOR_HOOK"] = "0"
     _build_nccl_allocator()
-    print(f"[MCORE][NCCL_ALLOCATOR] Initialized NCCL Allocator")
+    logging.info(f"[MCORE][NCCL_ALLOCATOR] Initialized NCCL Allocator")
 
 
 # Preserve the original APEX NCCL allocator interface for backward compatibility
@@ -158,11 +162,12 @@ class nccl_mem:
     An NCCL memory allocator, which inherits APEX nccl_allocator implementation.
     """
 
-    def __init__(self, pool, enabled=True, device=None, group=None):
+    def __init__(self, pool, enabled=True, device=None, group=None, symmetric=True):
         self.device = None
         self.group = None
         self.mem_context = None
         self.pool = pool
+        self.symmetric = symmetric
 
         if enabled:
             if device is None:
@@ -185,26 +190,41 @@ def __init__(self, pool, enabled=True, device=None, group=None):
     def __enter__(self):
         self.mem_context.__enter__()
         if self.group is not None:
-            backend = self.group._get_backend(self.device)
-            try:
-                # Deregister first to avoid duplicate registration of previously
-                # registered memory.
-                backend.deregister_mem_pool(self.pool)
-            except RuntimeError:
-                desc = getattr(self.group, "group_desc", None)
-                print(
-                    f"[MCORE][NCCL_ALLOCATOR] Failed to deregister mem pool from"
-                    f"{repr(self.group)}({desc}) group!!"
-                )
+            # If the pool is not empty, deregister the pool from the group.
+            if self.pool.snapshot():
+                backend = self.group._get_backend(self.device)
+                try:
+                    # Deregister first to avoid duplicate registration of previously
+                    # registered memory.
+                    backend.deregister_mem_pool(self.pool)
+                except RuntimeError:
+                    desc = getattr(self.group, "group_desc", None)
+                    logging.warning(
+                        f"[MCORE][NCCL_ALLOCATOR] Failed to deregister mem pool from"
+                        f"{repr(self.group)}({desc}) group!!"
+                    )
 
     def __exit__(self, *args):
         if self.group is not None:
             backend = self.group._get_backend(self.device)
             try:
-                backend.register_mem_pool(self.pool)
+                # Prefer attempting symmetric registration first; fall back if unsupported.
+                if self.symmetric:
+                    try:
+                        # Since PyTorch PR #161238 symmetric knob is now in registration function.
+                        backend.register_mem_pool(self.pool, symm=self.symmetric)
+                    except TypeError:
+                        # Older PyTorch/APIs without 'symm' keyword.
+                        logging.warning(
+                            f"[MCORE][NCCL_ALLOCATOR] Failed in symmetric registration."
+                            f"Falling back to non-symmetric registration!!"
+                        )
+                        backend.register_mem_pool(self.pool)
+                else:
+                    backend.register_mem_pool(self.pool)
             except RuntimeError:
                 desc = getattr(self.group, "group_desc", None)
-                print(
+                logging.warning(
                     f"[MCORE][NCCL_ALLOCATOR] Failed to register mem pool to"
                     f"{repr(self.group)}({desc}) group!!"
                 )
@@ -238,11 +258,12 @@ class MultiGroupMemPoolAllocator:
     """
 
     def __init__(
-        self, pool, groups
+        self, pool, groups, symmetric=True
     ):  # pool: torch.cuda.MemPool, groups: List[torch.distributed.ProcessGroup]
         self.pool = pool
         self.groups = groups
         self.mem_context = torch.cuda.use_mem_pool(self.pool)
+        self.symmetric = symmetric
 
         assert isinstance(self.pool, torch.cuda.MemPool), "pool must be a torch.cuda.MemPool"
         assert isinstance(self.groups, list), "groups must be a list"
@@ -252,28 +273,43 @@ def __init__(
 
     def __enter__(self):
         self.mem_context.__enter__()
-        for group in self.groups:
-            backend = group._get_backend(torch.device("cuda", torch.cuda.current_device()))
-            try:
-                # Since the registration is done in mempool granularity, we need to deregister
-                # the tensors in the mempool and re-register the mempool including the newly created
-                # tensors after the context is exited.
-                backend.deregister_mem_pool(self.pool)
-            except RuntimeError:
-                desc = getattr(group, "group_desc", None)
-                print(
-                    f"[MCORE][MultiGroupMemPoolAllocator] Failed to deregister mem pool from"
-                    f"{repr(group)}({desc}) group!!"
-                )
+        # If the pool is not empty, deregister the pool from all the groups.
+        if self.pool.snapshot():
+            for group in self.groups:
+                backend = group._get_backend(torch.device("cuda", torch.cuda.current_device()))
+                try:
+                    # Since the registration is done in mempool granularity, we need to deregister
+                    # the tensors in the mempool and re-register the mempool including
+                    # the newly created tensors after the context is exited.
+                    backend.deregister_mem_pool(self.pool)
+                except RuntimeError:
+                    desc = getattr(group, "group_desc", None)
+                    logging.warning(
+                        f"[MCORE][MultiGroupMemPoolAllocator] Failed to deregister mem pool from"
+                        f"{repr(group)}({desc}) group!!"
+                    )
 
     def __exit__(self, *args):
         for group in self.groups:
             backend = group._get_backend(torch.device("cuda", torch.cuda.current_device()))
             try:
-                backend.register_mem_pool(self.pool)
+                # Prefer attempting symmetric registration first; fall back if unsupported.
+                if self.symmetric:
+                    try:
+                        # Since PyTorch PR #161238 symmetric knob is now in registration function.
+                        backend.register_mem_pool(self.pool, symm=self.symmetric)
+                    except TypeError:
+                        # Older PyTorch/APIs without 'symm' keyword.
+                        logging.warning(
+                            f"[MCORE][MultiGroupMemPoolAllocator] Failed in symmetric registration."
+                            f"Falling back to non-symmetric registration!!"
+                        )
+                        backend.register_mem_pool(self.pool)
+                else:
+                    backend.register_mem_pool(self.pool)
             except RuntimeError:
                 desc = getattr(group, "group_desc", None)
-                print(
+                logging.warning(
                     f"[MCORE][MultiGroupMemPoolAllocator] Failed to register mem pool to"
                     f"{repr(group)}({desc}) group!!"
                 )

From cc33e0056b00ee67455fadfb6710e4dbde9e1c33 Mon Sep 17 00:00:00 2001
From: Charlie Truong <chtruong@nvidia.com>
Date: Tue, 28 Oct 2025 03:03:31 -0500
Subject: [PATCH 080/334] cp: `Megatron-FSDP Expert Parallel (DeepSeek-v3)
 Support` into `dev` (#1987)

Signed-off-by: Charlie Truong <chtruong@nvidia.com>
Co-authored-by: Jack Chang <jianbinc@nvidia.com>
Co-authored-by: jianbinc <shjwudp@gmail.com>
Co-authored-by: xuwenc <xuwenc@nvidia.com>
---
 .../distributed/fsdp/mcore_fsdp_adapter.py    | 133 +++-
 megatron/core/distributed/fsdp/src/README.md  |  11 +
 .../fsdp/src/megatron_fsdp/fully_shard.py     |  10 +-
 .../fsdp/src/megatron_fsdp/megatron_fsdp.py   |  11 +-
 .../megatron_fsdp/param_and_grad_buffer.py    |  83 ++-
 .../fsdp/src/megatron_fsdp/uneven_dtensor.py  |   4 +-
 .../fsdp/src/megatron_fsdp/utils.py           | 130 +++-
 .../embeddings/yarn_rotary_pos_embedding.py   |  10 +-
 megatron/core/optimizer/__init__.py           |  23 +
 megatron/core/optimizer/distrib_optimizer.py  |   2 +
 .../transformer/fsdp_dtensor_checkpoint.py    | 336 ++++++++--
 megatron/training/arguments.py                |   4 +
 megatron/training/checkpointing.py            |  74 ++-
 megatron/training/training.py                 |   1 +
 .../golden_values_dev_dgxh100_coreweave.json  | 598 +++++++++---------
 .../golden_values_dev_dgxh100_coreweave.json  | 500 +++++++--------
 .../golden_values_dev_dgx_h100.json           | 143 ++++-
 .../golden_values_dev_dgxh100_coreweave.json  | 537 ++++++++++++++++
 .../model_config.yaml                         |   2 +-
 tests/test_utils/recipes/moe.yaml             |  15 +-
 tools/checkpoint/checkpoint_inspector.py      | 362 +++++++++--
 21 files changed, 2224 insertions(+), 765 deletions(-)
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgxh100_coreweave.json

diff --git a/megatron/core/distributed/fsdp/mcore_fsdp_adapter.py b/megatron/core/distributed/fsdp/mcore_fsdp_adapter.py
index a7c0d5802ab..7432a7f9a36 100644
--- a/megatron/core/distributed/fsdp/mcore_fsdp_adapter.py
+++ b/megatron/core/distributed/fsdp/mcore_fsdp_adapter.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import logging
+import random
 from typing import List, Optional
 
 try:
@@ -22,6 +23,7 @@
 except ImportError:
     HAVE_EINOPS = False
 
+import numpy as np
 import torch
 import torch.distributed as dist
 
@@ -32,10 +34,11 @@
 except ImportError:
     HAVE_DTENSOR = False
 
-from megatron.core import parallel_state
+from megatron.core import parallel_state, tensor_parallel
 from megatron.core.config_logger import has_config_logger_enabled, log_config_to_disk
 from megatron.core.distributed.data_parallel_base import _BaseDataParallel
 from megatron.core.distributed.distributed_data_parallel_config import DistributedDataParallelConfig
+from megatron.core.extensions.transformer_engine import TELinear
 from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.transformer.transformer_config import TransformerConfig
 from megatron.core.transformer.transformer_layer import TransformerLayer
@@ -95,6 +98,8 @@ def __init__(
             else:
                 self.fsdp_unit_modules = []
 
+        self._fix_tensor_parallel_attributes(module)
+
         super().__init__(
             config=config,
             module=MegatronFSDP(
@@ -119,6 +124,8 @@ def __init__(
         self.module.state_dict_for_save_checkpoint = self.module.state_dict
         self.state_dict_for_save_checkpoint = self.state_dict
 
+        self.sync_rng_states_across_tp_group()
+
     def load_state_dict(self, state_dict, strict=True):
         """
         Load the state dictionary into the module.
@@ -141,6 +148,44 @@ def load_state_dict(self, state_dict, strict=True):
 
         self.module.load_state_dict(custom_state_dict, strict=strict)
 
+    def _fix_tensor_parallel_attributes(self, module):
+        is_expert_param = lambda n, p: ".experts." in n
+        is_router_param = lambda n, p: ".router.weight" in n
+
+        if parallel_state.get_tensor_model_parallel_group():
+            tp_size = parallel_state.get_tensor_model_parallel_group().size()
+        else:
+            tp_size = 1
+
+        if parallel_state.get_expert_tensor_parallel_group():
+            expt_tp_size = parallel_state.get_expert_tensor_parallel_group().size()
+        else:
+            expt_tp_size = 1
+
+        param_to_direct_module = {}
+        for name, m in module.named_modules():
+            for p in m.parameters(recurse=False):
+                param_to_direct_module[p] = (name, m)
+
+        for name, param in module.named_parameters():
+            if is_expert_param(name, param) and expt_tp_size > 1:
+                setattr(param, "_mcore_tp", True)
+                if "linear_fc1.weight" in name:
+                    setattr(param, "_tp_partition_dim", 0)
+                elif "linear_fc2.weight" in name:
+                    setattr(param, "_tp_partition_dim", 1)
+
+            if not is_expert_param(name, param) and tp_size > 1:
+                m_name, direct_module = param_to_direct_module[param]
+                if isinstance(direct_module, (TELinear,)):
+                    parallel_mode = getattr(direct_module, "parallel_mode", None)
+                    if parallel_mode is None:
+                        setattr(param, "_mcore_tp", True)
+                        setattr(param, "_tp_duplicated", True)
+                elif is_router_param(name, param):
+                    setattr(param, "_mcore_tp", True)
+                    setattr(param, "_tp_duplicated", True)
+
     def _init_dist_index(self, pg_collection):
         """
         Initialize the distributed index for the module.
@@ -154,6 +199,7 @@ def _init_dist_index(self, pg_collection):
         enable_hsdp = self.ddp_config.num_distributed_optimizer_instances > 1
         if pg_collection is None:
             tp_group = parallel_state.get_tensor_model_parallel_group()
+            expt_tp_group = parallel_state.get_expert_tensor_parallel_group()
             if enable_hsdp:
                 dp_cp_group = parallel_state.get_data_parallel_group(
                     with_context_parallel=True, partial_data_parallel=True
@@ -168,8 +214,11 @@ def _init_dist_index(self, pg_collection):
                 )
                 outer_fsdp_group = None
                 hybrid_fsdp_group = None
+                expt_dp_group = parallel_state.get_expert_data_parallel_group()
+                ep_group = parallel_state.get_expert_model_parallel_group()
         else:
             tp_group = getattr(pg_collection, 'tp', None)
+            expt_tp_group = getattr(pg_collection, 'expt_tp', None)
             if enable_hsdp:
                 dp_cp_group = pg_collection.intra_dp_cp
                 outer_fsdp_group = pg_collection.inter_dist_opt
@@ -178,11 +227,17 @@ def _init_dist_index(self, pg_collection):
                 dp_cp_group = pg_collection.dp_cp
                 outer_fsdp_group = None
                 hybrid_fsdp_group = None
+                expt_dp_group = getattr(pg_collection, 'expt_dp', None)
+                ep_group = getattr(pg_collection, 'ep', None)
 
         if tp_group is None:
             single_rank_group = dist.new_group(ranks=[dist.get_rank()])
             tp_group = single_rank_group
 
+        if expt_tp_group is None:
+            single_rank_group = dist.new_group(ranks=[dist.get_rank()])
+            expt_tp_group = single_rank_group
+
         if enable_hsdp:
             mesh = _get_hsdp_tp_mesh(outer_fsdp_group, dp_cp_group, tp_group)
             dist_index = FSDPDistributedIndex(
@@ -199,6 +254,17 @@ def _init_dist_index(self, pg_collection):
                 hybrid_fsdp_group=hybrid_fsdp_group,
             )
         else:
+            if ep_group is not None:
+                expt_mesh = _get_dp_tp_mesh(expt_dp_group, expt_tp_group, ep_size=ep_group.size())
+                expt_device_mesh = DeviceMesh.from_group(
+                    [expt_dp_group, expt_tp_group],
+                    device_type="cuda",
+                    mesh=expt_mesh.tolist(),
+                    mesh_dim_names=["dp_cp", "tp"],
+                )
+            else:
+                expt_device_mesh = None
+
             mesh = _get_dp_tp_mesh(dp_cp_group, tp_group)
             dist_index = FSDPDistributedIndex(
                 device_mesh=DeviceMesh.from_group(
@@ -209,8 +275,11 @@ def _init_dist_index(self, pg_collection):
                 ),
                 dp_shard_dim="dp_cp",
                 tp_dim="tp",
+                expt_device_mesh=expt_device_mesh,
             )
 
+        self.tp_group = tp_group
+
         return dist_index
 
     def stop_communication(self):
@@ -220,6 +289,20 @@ def stop_communication(self):
         self.module.synchronize_gradient_reduce()
         self.module.synchronize_param_gather()
 
+    def sync_rng_states_across_tp_group(self):
+        """
+        Synchronize the tensor parallel random number generator states.
+        """
+        if self.tp_group.size() <= 1:
+            return
+
+        if self.tp_group.rank() == 0:
+            broadcast_list = [_get_rng_state_dict()]
+        else:
+            broadcast_list = [None]
+        torch.distributed.broadcast_object_list(broadcast_list, group=self.tp_group, group_src=0)
+        _load_rng_state_dict(broadcast_list[0])
+
 
 def _get_hsdp_tp_mesh(outer_fsdp_dp_group, dp_cp_group, tp_group):
     assert HAVE_EINOPS, "einops is not installed. Please install it with `pip install einops`."
@@ -273,29 +356,46 @@ def _get_hsdp_tp_mesh(outer_fsdp_dp_group, dp_cp_group, tp_group):
     return mesh
 
 
-def _get_dp_tp_mesh(dp_cp_group, tp_group):
+def _get_dp_tp_mesh(dp_cp_group, tp_group, ep_size=1):
     assert HAVE_EINOPS, "einops is not installed. Please install it with `pip install einops`."
     world_size = dist.get_world_size()
 
     tp_size = dist.get_world_size(tp_group) if tp_group is not None else 1
-    # TODO: Supports configurable (dp, cp, tp) order.
-    mesh = einops.rearrange(torch.arange(world_size), "(dp_cp tp) -> dp_cp tp", tp=tp_size)
+    # TODO: Supports configurable (dp, cp, ep, tp) order.
+    mesh = einops.rearrange(
+        torch.arange(world_size),
+        "(dp_cp ep tp) -> ep dp_cp tp",
+        dp_cp=dp_cp_group.size(),
+        tp=tp_size,
+        ep=ep_size,
+    )
 
-    mesh_dp_ranks = einops.rearrange(mesh, 'dp_cp tp -> tp dp_cp', tp=tp_size)
+    mesh_dp_ranks = einops.rearrange(mesh, 'ep dp_cp tp -> (ep tp) dp_cp', dp_cp=dp_cp_group.size())
     dp_cp_group_ranks = dist.get_process_group_ranks(dp_cp_group)
     assert _check_mesh_ranks_and_group_ranks_are_consistent(mesh_dp_ranks, dp_cp_group_ranks), (
         f"[Megatron-FSDP] Data Parallel ranks in the mesh {mesh_dp_ranks} "
         f"do not match the ranks in the DP group {dp_cp_group_ranks}."
     )
 
-    mesh_tp_ranks = einops.rearrange(mesh, 'dp_cp tp -> (dp_cp) tp', tp=tp_size)
+    mesh_tp_ranks = einops.rearrange(mesh, 'ep dp_cp tp -> (dp_cp ep) tp', tp=tp_size)
     tp_group_ranks = dist.get_process_group_ranks(tp_group)
     assert _check_mesh_ranks_and_group_ranks_are_consistent(mesh_tp_ranks, tp_group_ranks), (
         f"[Megatron-FSDP] Tensor Parallel ranks in the mesh {mesh_tp_ranks} "
         f"do not match the ranks in the TP group {tp_group_ranks}."
     )
 
-    return mesh
+    # Exclude the expert parallel dimension
+    rank = dist.get_rank()
+    dp_tp_meshes = [per_ep_mesh for per_ep_mesh in mesh if rank in per_ep_mesh.reshape(-1).tolist()]
+    assert (
+        len(dp_tp_meshes) == 1
+    ), f"[Megatron-FSDP] Current rank {rank} is not unique in the mesh ranks {mesh.tolist()}."
+    assert len(dp_tp_meshes[0].reshape(-1).tolist()) == dp_cp_group.size() * tp_group.size(), (
+        f"[Megatron-FSDP] DP-TP mesh size {len(dp_tp_meshes[0].reshape(-1).tolist())} "
+        f"does not match expected size {dp_cp_group.size() * tp_group.size()}."
+    )
+
+    return dp_tp_meshes[0]
 
 
 def _check_mesh_ranks_and_group_ranks_are_consistent(mesh_ranks, group_ranks):
@@ -310,3 +410,22 @@ def _check_mesh_ranks_and_group_ranks_are_consistent(mesh_ranks, group_ranks):
         f"{mesh_ranks.tolist()} does not match the group ranks {group_ranks}."
     )
     return sorted(current_ranks[0]) == sorted(group_ranks)
+
+
+def _get_rng_state_dict():
+    rng_state_dict = {
+        'random_rng_state': random.getstate(),
+        'np_rng_state': np.random.get_state(),
+        'torch_rng_state': torch.get_rng_state(),
+        'cuda_rng_state': torch.cuda.get_rng_state(),
+        'rng_tracker_states': tensor_parallel.get_cuda_rng_tracker().get_states(),
+    }
+    return rng_state_dict
+
+
+def _load_rng_state_dict(rng_state_dict):
+    random.setstate(rng_state_dict['random_rng_state'])
+    np.random.set_state(rng_state_dict['np_rng_state'])
+    torch.set_rng_state(rng_state_dict['torch_rng_state'])
+    torch.cuda.set_rng_state(rng_state_dict['cuda_rng_state'])
+    tensor_parallel.get_cuda_rng_tracker().set_states(rng_state_dict['rng_tracker_states'])
diff --git a/megatron/core/distributed/fsdp/src/README.md b/megatron/core/distributed/fsdp/src/README.md
index d879c6c26f8..9e036f22f67 100644
--- a/megatron/core/distributed/fsdp/src/README.md
+++ b/megatron/core/distributed/fsdp/src/README.md
@@ -127,6 +127,12 @@ device_mesh[("dp_shard", "cp")]._flatten("dp_shard_cp")
 # Only required if using HSDP. Otherwise, don't pass hybrid_fsdp_group.
 device_mesh[("dp_outer", "dp_shard", "cp")]._flatten("hsdp")
 hsdp_group = device_mesh["hsdp"].get_group()
+# Initialize DeviceMesh for expert parallel (EP) modules when using FSDP + EP.
+expert_device_mesh = torch.distributed.device_mesh.init_device_mesh(
+    "cuda",
+    mesh_shape=(expt_dp_shard_size, expt_tp_size),
+    mesh_dim_names=("dp_shard", "tp"),
+)
 
 # Fully-shards your model and distributes your optimizer.
 model, optimizer = fully_shard(
@@ -145,6 +151,8 @@ model, optimizer = fully_shard(
     tp_dim="tp",
     # Only required when using HSDP. Otherwise, set this to None.
     hybrid_fsdp_group=hsdp_group,
+    # Only required for FSDP + EP. Otherwise, set this to None.
+    expt_device_mesh=expt_device_mesh,
     # FSDP Sharding Strategy: no_shard (0) / optim (1) / optim_grads (2) / optim_grads_params (3)
     zero_dp_strategy=3,
     outer_dp_sharding_strategy=1,
@@ -192,6 +200,9 @@ optimizer.load_state_dict(ckpt_state_dict["optimizer"])
   - `tp_dim` is the name of the sub-mesh used for tensor parallelism (TP), which is required for `(FSDP, TP)`-strided sharding when using Megatron-LM or Torch-native `DTensor` TP.
     - For more information about tensor parallelism, refer to: [Megatron-LM: Training Multi-Billion Parameter Language Models Using Model Parallelism](https://arxiv.org/abs/1909.08053).
   - `hybrid_fsdp_group` is the `ProcessGroup` which contains all ranks in the flattened `dp_shard_dim` and `dp_outer_dim` sub-meshes utilized to specify the `(DP-Outer, DP-Shard)` sharded coordinate system for the weight and gradient buffers. Required for HSDP.
+- `expt_device_mesh` is another [`torch.distributed.DeviceMesh`](https://docs.pytorch.org/docs/stable/distributed.html#devicemesh) tailored for the expert parallel (EP) modules in `MegatronFSDP`.
+  - `dp_shard_dim` is the name of the sub-mesh required for FSDP sharding of the EP modules, enabling expert data parallelism (EDP).
+  - `tp_dim` is the name of the sub-mesh used for expert tensor parallelism (ETP), which is required for `(FSDP, ETP)`-strided sharding when using Megatron-LM or Torch-native `DTensor` ETP.
 - `init_model_with_meta_device` has `MegatronFSDP` initialize your `meta`-device model in shards on every CUDA device to avoid OOM when initializing extremely large models that cannot fit on a single device. Users can initialize their model on a [`meta`-device](https://docs.pytorch.org/docs/stable/meta.html) (`with torch.device('meta'): ...`), and ``MegatronFSDP`` will further shard and initialize the model parameters layer-by-layer adhering to the customizable `module.reset_parameters` method, which prevents the entire model from being allocated in memory at any point during runtime.
     - Defaults to `False`.
     - Note that the `device` argument which installs your model on a specific device or rank will be deactivated when `init_model_with_meta_device=True`.
diff --git a/megatron/core/distributed/fsdp/src/megatron_fsdp/fully_shard.py b/megatron/core/distributed/fsdp/src/megatron_fsdp/fully_shard.py
index 24e86cede72..e98362a1a03 100644
--- a/megatron/core/distributed/fsdp/src/megatron_fsdp/fully_shard.py
+++ b/megatron/core/distributed/fsdp/src/megatron_fsdp/fully_shard.py
@@ -64,6 +64,7 @@ def fully_shard_model(
     dp_outer_dim: Optional[str] = None,
     tp_dim: Optional[str] = None,
     hybrid_fsdp_group: Optional[torch.distributed.ProcessGroup] = None,
+    expt_device_mesh: Optional[DeviceMesh] = None,
     fsdp_unit_modules: Optional[Sequence[Type[torch.nn.Module]] | Sequence[str]] = None,
     zero_dp_strategy: str | int = 3,
     outer_dp_sharding_strategy: str | int = 0,
@@ -183,8 +184,10 @@ def fully_shard_model(
         tp_dim=tp_dim,
         # Only required for HSDP.
         hybrid_fsdp_group=hybrid_fsdp_group,
-        # Access to flattened DP rank assignments for HFSDP.
+        # Access to flattened DP rank assignments for HSDP.
         hsdp_outer_dp_shard=_outer_fsdp_sharding,
+        # Only required for Megatron-FSDP + EP.
+        expt_device_mesh=expt_device_mesh,
     )
 
     # Wrap model in Megatron FSDP.
@@ -330,6 +333,7 @@ def fully_shard(
     dp_outer_dim: Optional[str] = None,
     tp_dim: Optional[str] = None,
     hybrid_fsdp_group: Optional[torch.distributed.ProcessGroup] = None,
+    expt_device_mesh: Optional[DeviceMesh] = None,
     fsdp_unit_modules: Optional[Sequence[Type[torch.nn.Module]] | Sequence[str]] = None,
     zero_dp_strategy: str | int = 3,
     outer_dp_sharding_strategy: str | int = 0,
@@ -391,6 +395,9 @@ def fully_shard(
             by flattening the outer-FSDP (dp_outer_dim) and FSDP (dp_shard_dim) process groups
             or sub-meshes. Defaults to None. Required for HSDP, i.e. if dp_outer_dim is not None.
 
+        expt_device_mesh (Optional[DeviceMesh]):
+            Expert parallel device mesh object defining the topology for MoE distributed training.
+
         fsdp_unit_modules (Optional[Sequence[Type[torch.nn.Module]] | Sequence[str]]):
             List of (sub-)module classes or (sub-)module class import paths that are "units",
             which are torch.nn.Module(s) that are sharded and scheduled by Megatron-FSDP.
@@ -503,6 +510,7 @@ def fully_shard(
         dp_outer_dim=dp_outer_dim,
         tp_dim=tp_dim,
         hybrid_fsdp_group=hybrid_fsdp_group,
+        expt_device_mesh=expt_device_mesh,
         fsdp_unit_modules=fsdp_unit_modules,
         zero_dp_strategy=zero_dp_strategy,
         outer_dp_sharding_strategy=outer_dp_sharding_strategy,
diff --git a/megatron/core/distributed/fsdp/src/megatron_fsdp/megatron_fsdp.py b/megatron/core/distributed/fsdp/src/megatron_fsdp/megatron_fsdp.py
index 10a8ae14d65..d6ef5f6210e 100644
--- a/megatron/core/distributed/fsdp/src/megatron_fsdp/megatron_fsdp.py
+++ b/megatron/core/distributed/fsdp/src/megatron_fsdp/megatron_fsdp.py
@@ -235,7 +235,10 @@ def __init__(
         self.dist_index = dist_index
 
         # If Megatron Expert Parallelism is enabled, you need to provide an expt_dp_group.
-        if has_expert_parameters and self.dist_index.get_expert_dp_group() is None:
+        if (
+            has_expert_parameters
+            and self.dist_index.get_fsdp_group(is_expert_parallel=True) is None
+        ):
             raise ValueError(
                 "[Megatron-FSDP] Megatron Expert Parallelism is enabled, but no expt_dp_group is"
                 "provided."
@@ -353,9 +356,7 @@ def _init_fsdp_param_and_grad_buffer(self):
         )
 
         # Set the suggested communication unit size for reduce-scatter and all-gather pipelines.
-        suggested_communication_unit_size = (
-            self.ddp_config.suggested_communication_unit_size or 1_000_000_000
-        )
+        suggested_communication_unit_size = self.ddp_config.suggested_communication_unit_size
         if suggested_communication_unit_size is None:
             if self.data_parallel_sharding_strategy == "optim_grads_params":
                 total_param_elements = 0
@@ -370,6 +371,8 @@ def _init_fsdp_param_and_grad_buffer(self):
                 suggested_communication_unit_size = total_param_elements // total_fsdp_module * 2
             elif self.bucket_size is not None:
                 suggested_communication_unit_size = self.bucket_size
+            else:
+                suggested_communication_unit_size = 1_000_000_000
 
             # Cap to 1B elements.
             suggested_communication_unit_size = max(
diff --git a/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py b/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py
index c8116150d52..bdf480d867b 100644
--- a/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py
+++ b/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py
@@ -34,7 +34,14 @@
 from torch.distributed.tensor.device_mesh import _mesh_resources
 
 from .uneven_dtensor import update_uneven_dtensor_chunk_metadata, validate_uneven_dtensor
-from .utils import _MODEL_PARALLEL_RNG_TRACKER_NAME, FSDPDistributedIndex, get_global_memory_buffer
+from .utils import (
+    _MODEL_PARALLEL_RNG_TRACKER_NAME,
+    FSDPDistributedIndex,
+    get_global_memory_buffer,
+    get_mcore_tensor_parallel_partition_dim,
+    is_mcore_tensor_model_parallel,
+    is_mcore_tensor_parallel_duplicated,
+)
 
 logger = logging.getLogger(__name__)
 
@@ -1299,7 +1306,7 @@ def _does_param_require_new_bucket(param):
             and policy.data_parallel_sharding_strategy != "no_shard"
         )
 
-    is_expert_parameter = lambda p: not getattr(p, "allreduce", True)
+    is_expert_parameter = lambda n, p: ".experts." in n
 
     # Step 1: Group the parameters according to their execution order and attributes.
     # FSDP unit module parameters are split into multiple parameter sub-groups.
@@ -1313,7 +1320,7 @@ def _does_param_require_new_bucket(param):
                 if is_float8tensor(param) or meta_device_init_fp8_params.get(name, False)
                 else param.dtype
             ),
-            is_expert_param=is_expert_parameter(param),
+            is_expert_param=is_expert_parameter(name, param),
             requires_grad=param.requires_grad,
             fsdp_unit_id=None,
         )
@@ -2257,6 +2264,10 @@ def _reset_parameters(self, old_params, new_params):
             self.param_to_direct_module[new_param] = self.param_to_direct_module[old_param]
             del self.param_to_direct_module[old_param]
 
+            for tp_attr in ["_mcore_tp", "_tp_partition_dim", "_tp_duplicated"]:
+                if getattr(old_param, tp_attr, None) is not None:
+                    setattr(new_param, tp_attr, getattr(old_param, tp_attr))
+
         for item_id, p in enumerate(self.params):
             if p in param_map:
                 new_p = param_map[p]
@@ -2340,6 +2351,7 @@ def _init_distributed_params(self):
                         is_expert_param=pg.is_expert_param,
                         run_check=True,
                         update_uneven_dtensor_chunk_meta=True,
+                        force_sync_tp_duplicated_param=True,
                     )
                     dist_main_weight[param_name] = dist_param
                 elif wbuf:
@@ -2351,6 +2363,7 @@ def _init_distributed_params(self):
                         is_expert_param=pg.is_expert_param,
                         run_check=True,
                         update_uneven_dtensor_chunk_meta=True,
+                        force_sync_tp_duplicated_param=True,
                     )
                     dist_main_weight[param_name] = dist_param
                 else:
@@ -2365,6 +2378,7 @@ def _init_distributed_params(self):
                         is_expert_param=pg.is_expert_param,
                         run_check=True,
                         update_uneven_dtensor_chunk_meta=False,
+                        force_sync_tp_duplicated_param=True,
                     )
                     dist_main_weight[param_name] = dist_param
 
@@ -2399,6 +2413,9 @@ def set_param_attribute():
                             "partition_dim",
                             "partition_stride",
                             "is_embedding_or_output_parameter",
+                            "_mcore_tp",
+                            "_tp_duplicated",
+                            "_tp_partition_dim",
                         ]:
                             if hasattr(orig_param, attr_name):
                                 setattr(param, attr_name, getattr(orig_param, attr_name))
@@ -3546,7 +3563,9 @@ def to_local_if_dtensor(tensor):
     return tensor
 
 
-def _get_fsdp_tensor_spec(param, dist_index: FSDPDistributedIndex, is_sharded_param):
+def _get_fsdp_tensor_spec(
+    param, dist_index: FSDPDistributedIndex, is_sharded_param, is_expert_param
+):
     """
     Get the DeviceMesh for the parameter and modify the placement for Megatron-FSDP.
     """
@@ -3557,7 +3576,7 @@ def _get_fsdp_tensor_spec(param, dist_index: FSDPDistributedIndex, is_sharded_pa
         dtensor_mesh = getattr(dtensor_spec, "mesh", None)
 
         # Validate that the DTensor root mesh is identical to the Megatron-FSDP device mesh.
-        megatron_fsdp_global_mesh = dist_index.get_root_mesh()
+        megatron_fsdp_global_mesh = dist_index.get_root_mesh(is_expert_parallel=is_expert_param)
         dtensor_global_mesh = _mesh_resources.get_root_mesh(dtensor_mesh)
         # FIXME(boxiangw): add or megatron_fsdp_global_mesh != dtensor_global_mesh:
         # _mesh_resources.get_root_mesh(dtensor_mesh) is not getting the correct root mesh
@@ -3602,7 +3621,7 @@ def _get_fsdp_tensor_spec(param, dist_index: FSDPDistributedIndex, is_sharded_pa
             placements = [Shard(0), dtensor_placement]
             shard_order = [1, 0]
 
-        device_mesh = dist_index.get_submesh(mesh_dim_names)
+        device_mesh = dist_index.get_submesh(mesh_dim_names, is_expert_parallel=is_expert_param)
         if shard_order is not None:
             setattr(device_mesh, "_shard_order", shard_order)
 
@@ -3627,7 +3646,7 @@ def _get_fsdp_tensor_spec(param, dist_index: FSDPDistributedIndex, is_sharded_pa
     else:
         placements = [Shard(0)]
 
-    device_mesh = dist_index.get_submesh(mesh_dim_names)
+    device_mesh = dist_index.get_submesh(mesh_dim_names, is_expert_parallel=is_expert_param)
     if shard_order is not None:
         setattr(device_mesh, "_shard_order", shard_order)
 
@@ -3642,6 +3661,7 @@ def make_fsdp_dtensor(
     is_expert_param: bool = False,
     run_check: bool = False,
     update_uneven_dtensor_chunk_meta: bool = False,
+    force_sync_tp_duplicated_param: bool = False,
 ):
     """
     Creates a distributed tensor (DTensor) from a local tensor with support for
@@ -3720,38 +3740,39 @@ def make_fsdp_dtensor(
     orig_param = param
 
     # Handle tensor model parallel specific logic
-    if getattr(param, "tensor_model_parallel", False):
+    if is_mcore_tensor_model_parallel(param):
         # Ensure parameter is not already a DTensor
         assert not isinstance(param, DTensor), (
-            "[Megatron-FSDP] Parameter is already a DTensor, yet tensor_model_parallel "
-            "is True. Check usage."
+            "[Megatron-FSDP] Parameter is already a DTensor, yet tensor_model_parallel " "is True."
         )
 
-        # Validate M-Core TP attributes
-        assert hasattr(
-            param, "partition_dim"
-        ), "[Megatron-FSDP] tensor_model_parallel param missing 'partition_dim'."
-        assert hasattr(
-            param, "partition_stride"
-        ), "[Megatron-FSDP] tensor_model_parallel param missing 'partition_stride'."
-        assert (
-            param.partition_stride == 1
-        ), "[Megatron-FSDP] Only partition_stride=1 is currently supported for "
-        "tensor_model_parallel."
-
-        tp_dim = param.partition_dim
-        tp_mesh = dist_index.get_submesh(dist_index.tp_dim)
-
-        # Adjust shape for global dimension
+        tp_mesh = dist_index.get_submesh(dist_index.tp_dim, is_expert_parallel=is_expert_param)
+        global_shape = list(param.shape)
         if tp_mesh.mesh.numel() > 1:
-            global_shape = list(param.shape)
-            global_shape[tp_dim] *= tp_mesh.mesh.numel()
+            if is_mcore_tensor_parallel_duplicated(param):
+                placements = [Replicate()]
+                if force_sync_tp_duplicated_param:
+                    if local_tensor.numel() > 0:
+                        torch.distributed.broadcast(
+                            local_tensor, group=tp_mesh.get_group(), group_src=0
+                        )
+                elif run_check:
+                    # TODO: Implement consistency check for duplicated TP parameters
+                    pass
+            else:
+                tp_dim = get_mcore_tensor_parallel_partition_dim(param)
+                assert tp_dim is not None, (
+                    "[Megatron-FSDP] Parameter is not tensor model parallel, "
+                    "yet tensor_model_parallel is True."
+                )
+                placements = [Shard(tp_dim)]
+                global_shape[tp_dim] *= tp_mesh.mesh.numel()
 
             # Construct TP-sharded DTensor using Megatron-style placement
             param = DTensor.from_local(
-                local_tensor=param,
+                local_tensor=local_tensor,
                 device_mesh=tp_mesh,
-                placements=[Shard(tp_dim)],
+                placements=placements,
                 run_check=run_check,
                 shape=global_shape,
                 stride=torch.empty(global_shape).stride(),
@@ -3759,7 +3780,7 @@ def make_fsdp_dtensor(
 
     # Get FSDP-configured mesh and placements from provided param
     device_mesh, placements = _get_fsdp_tensor_spec(
-        param, dist_index, is_sharded_param=is_sharded_param
+        param, dist_index, is_sharded_param=is_sharded_param, is_expert_param=is_expert_param
     )
 
     # Reshape local tensor for sharded layouts beyond 1D
diff --git a/megatron/core/distributed/fsdp/src/megatron_fsdp/uneven_dtensor.py b/megatron/core/distributed/fsdp/src/megatron_fsdp/uneven_dtensor.py
index 523d8fae333..490d80c0f21 100644
--- a/megatron/core/distributed/fsdp/src/megatron_fsdp/uneven_dtensor.py
+++ b/megatron/core/distributed/fsdp/src/megatron_fsdp/uneven_dtensor.py
@@ -365,7 +365,9 @@ def _assemble_full_tensor_from_uneven_chunks(
 
     # Wrap into a replicated DTensor and return
     return DTensor.from_local(
-        full_tensor, placements=[Replicate()], device_mesh=dtensor.device_mesh
+        full_tensor,
+        placements=[Replicate()] * len(dtensor.placements),
+        device_mesh=dtensor.device_mesh,
     )
 
 
diff --git a/megatron/core/distributed/fsdp/src/megatron_fsdp/utils.py b/megatron/core/distributed/fsdp/src/megatron_fsdp/utils.py
index 1dfe08b90f4..b94a332bb0d 100644
--- a/megatron/core/distributed/fsdp/src/megatron_fsdp/utils.py
+++ b/megatron/core/distributed/fsdp/src/megatron_fsdp/utils.py
@@ -675,6 +675,7 @@ def __init__(
         tp_dim: Optional[str] = None,
         hybrid_fsdp_group: Optional[torch.distributed.ProcessGroup] = None,
         hsdp_outer_dp_shard: bool = False,
+        expt_device_mesh: Optional[DeviceMesh] = None,
     ):
         """
         Args:
@@ -691,6 +692,8 @@ def __init__(
                 in hybrid FSDP. Specifying outer sharding will lift the bucket sharding
                 coordinate system to flattened ranks of (dp_shard, dp_outer) instead of
                 just sharding across dp_shard ranks and replicating across dp_outer ranks.
+            expt_device_mesh (Optional[DeviceMesh]): The expert parallel device mesh
+                to use for the DistributedIndex.
         """
         # Device mesh arguments.
         self.device_mesh = device_mesh
@@ -701,6 +704,11 @@ def __init__(
         self.use_hybrid_fsdp = dp_outer_dim is not None
         # Helper flag to denote if we are outer-sharding in hybrid FSDP.
         self.hsdp_outer_dp_shard = hsdp_outer_dp_shard
+        self.expt_device_mesh = expt_device_mesh
+
+        # Handling the situation where M-Core MoE EP=1
+        if self.expt_device_mesh is None:
+            self.expt_device_mesh = device_mesh
 
         # Hybrid FSDP Process Groups
         # Retrieve the FSDP process group from the DeviceMesh.
@@ -719,6 +727,14 @@ def __init__(
         # combination of the outer-FSDP and FSDP process groups.
         self.hybrid_fsdp_group = hybrid_fsdp_group
 
+        # Retrieve the expert parallel process groups from the DeviceMesh.
+        self.expt_fsdp_group = (
+            self.expt_device_mesh[self.dp_shard_dim].get_group()
+            if self.expt_device_mesh is not None
+            and contains_submesh(self.expt_device_mesh, self.dp_shard_dim)
+            else None
+        )
+
         """
         Store a persistent reference to the core device meshes that back Megatron-FSDP.
         This is necessary because _MeshEnv (_mesh_resources) may not persist:
@@ -732,26 +748,33 @@ def __init__(
         FIXME(@cspades): Identify the root cause of this behavior.
         """
         self.mesh_library = {}
-        # TP Mesh
+
+        def register_submesh(device_mesh, submesh, is_expert_parallel):
+            """Register a submesh with identifier: (*submesh, is_expert_parallel)
+            in the mesh library."""
+            if contains_submesh(device_mesh, submesh):
+                submesh_identifier = tuple(list(submesh) + [is_expert_parallel])
+                self.mesh_library[submesh_identifier] = device_mesh[submesh]
+
+        # Define common submesh patterns
         tp_submesh = (self.tp_dim,)
-        if contains_submesh(self.device_mesh, tp_submesh):
-            self.mesh_library[tp_submesh] = self.device_mesh[tp_submesh]
-        # HSDP-TP Mesh
         hsdp_tp_submesh = (self.dp_outer_dim, self.dp_shard_dim, self.tp_dim)
-        if contains_submesh(self.device_mesh, hsdp_tp_submesh):
-            self.mesh_library[hsdp_tp_submesh] = self.device_mesh[hsdp_tp_submesh]
-        # FSDP-TP Mesh
         fsdp_tp_submesh = (self.dp_shard_dim, self.tp_dim)
-        if contains_submesh(self.device_mesh, fsdp_tp_submesh):
-            self.mesh_library[fsdp_tp_submesh] = self.device_mesh[fsdp_tp_submesh]
-        # HSDP Mesh
         hsdp_submesh = (self.dp_outer_dim, self.dp_shard_dim)
-        if contains_submesh(self.device_mesh, hsdp_submesh):
-            self.mesh_library[hsdp_submesh] = self.device_mesh[hsdp_submesh]
-        # FSDP Mesh
         fsdp_submesh = (self.dp_shard_dim,)
-        if contains_submesh(self.device_mesh, fsdp_submesh):
-            self.mesh_library[fsdp_submesh] = self.device_mesh[fsdp_submesh]
+
+        # Register non-EP submeshes
+        register_submesh(self.device_mesh, tp_submesh, False)
+        register_submesh(self.device_mesh, hsdp_tp_submesh, False)
+        register_submesh(self.device_mesh, fsdp_tp_submesh, False)
+        register_submesh(self.device_mesh, hsdp_submesh, False)
+        register_submesh(self.device_mesh, fsdp_submesh, False)
+
+        # Register EP submeshes
+        if self.expt_device_mesh is not None:
+            register_submesh(self.expt_device_mesh, tp_submesh, True)
+            register_submesh(self.expt_device_mesh, fsdp_tp_submesh, True)
+            register_submesh(self.expt_device_mesh, fsdp_submesh, True)
 
         # Validate FSDP arguments.
         if self.fsdp_group is None:
@@ -776,36 +799,54 @@ def __init__(
                     "process groups or sub-meshes."
                 )
 
-    def get_submesh(self, mesh_dim_names: str | Sequence[str]) -> DeviceMesh:
+    def get_submesh(
+        self, mesh_dim_names: str | Sequence[str], is_expert_parallel: bool = False
+    ) -> DeviceMesh:
         """
-        Retrieve an Megatron-FSDP-registered sub-mesh by name(s).
+        Retrieve an Megatron-FSDP-registered submesh by name(s).
         """
         if isinstance(mesh_dim_names, str):
             mesh_dim_names = (mesh_dim_names,)
-        # Search for the sub-mesh in the mesh library.
-        device_submesh = self.mesh_library.get(tuple(mesh_dim_names), None)
+
+        # Construct submesh identifier: (*mesh_dim_names, is_expert_parallel)
+        submesh_identifier = tuple(list(mesh_dim_names) + [is_expert_parallel])
+
+        # Retrieve the submesh from the mesh library
+        device_submesh = self.mesh_library.get(submesh_identifier, None)
+
         if device_submesh is None:
-            if self.tp_dim is None:
-                # Warn about not specifying tp_dim for
-                # layers or frameworks that depend on this.
+            # Warn about not specifying tp_dim for layers or frameworks that depend on this.
+            if self.tp_dim is None and not is_expert_parallel:
                 logger.warning(
-                    "[FSDPDistributedIndex] Note: For TransformerEngine, or other machine learning "
-                    "frameworks like Megatron that assume TP=1, you must specify tp_dim to use "
-                    "Megatron-FSDP. Create a trivial TP dimension by setting the TP dimension size "
+                    "[FSDPDistributedIndex] Note: For TransformerEngine, or "
+                    "other machine learning frameworks like Megatron that assume "
+                    "TP=1, you must specify tp_dim to use Megatron-FSDP. "
+                    "Create a trivial TP dimension by setting the TP dimension size "
                     "to 1 in the DeviceMesh.\n"
                     f"DeviceMesh: {self.device_mesh}"
                 )
+            elif self.tp_dim is None and is_expert_parallel:
+                logger.warning(
+                    "[FSDPDistributedIndex] Note: For TransformerEngine, or "
+                    "other machine learning frameworks like Megatron that assume "
+                    "ETP=1, you must specify tp_dim to use Megatron-FSDP. "
+                    "Create a trivial ETP dimension by setting the ETP dimension size "
+                    "to 1 in the DeviceMesh.\n"
+                    f"DeviceMesh: {self.expt_device_mesh}"
+                )
+
             raise ValueError(
-                f"[FSDPDistributedIndex][get_submesh] No sub-mesh with "
-                f"mesh_dim_names={mesh_dim_names} has been registered with Megatron-FSDP."
+                f"[FSDPDistributedIndex][get_submesh] No submesh with "
+                f"mesh_dim_names={mesh_dim_names}, is_expert_parallel={is_expert_parallel} "
+                f"has been registered with Megatron-FSDP."
             )
+
         return device_submesh
 
     def get_dp_group(self, is_expert_parallel: bool = False) -> ProcessGroup:
         """Get the data parallel process group."""
         if is_expert_parallel:
-            # Expert parallel is not supported
-            return None
+            return self.expt_fsdp_group
         if self.use_hybrid_fsdp:
             return self.hybrid_fsdp_group
         return self.fsdp_group
@@ -813,8 +854,7 @@ def get_dp_group(self, is_expert_parallel: bool = False) -> ProcessGroup:
     def get_fsdp_group(self, is_expert_parallel: bool = False) -> ProcessGroup:
         """Get the FSDP process group."""
         if is_expert_parallel:
-            # Expert parallel is not supported
-            return None
+            return self.expt_fsdp_group
         return self.fsdp_group
 
     def get_outer_fsdp_group(self) -> ProcessGroup:
@@ -826,7 +866,7 @@ def get_outer_fsdp_group(self) -> ProcessGroup:
     def get_root_mesh(self, is_expert_parallel: bool = False) -> DeviceMesh:
         """Get the device mesh."""
         if is_expert_parallel:
-            raise NotImplementedError("Expert parallel is not supported in Megatron-FSDP.")
+            return self.expt_device_mesh
         return self.device_mesh
 
     def get_logical_hybrid_fsdp_rank(self):
@@ -924,3 +964,29 @@ def create_updated_function_signature(original_function, **extended_kwargs: dict
 
     # Return the updated function signature.
     return inspect.Signature(params)
+
+
+def is_mcore_tensor_model_parallel(param: torch.Tensor) -> bool:
+    """
+    Check if the given parameter is Megatron-Core tensor model parallel.
+    """
+    return getattr(param, "_mcore_tp", False) or getattr(param, "tensor_model_parallel", False)
+
+
+def is_mcore_tensor_parallel_duplicated(param: torch.Tensor) -> bool:
+    """
+    Check if the given parameter is Megatron-Core tensor model parallel and duplicated.
+    """
+    return getattr(param, "_tp_duplicated", False)
+
+
+def get_mcore_tensor_parallel_partition_dim(param: torch.Tensor) -> Optional[int]:
+    """
+    Get the partition dimension for a Megatron-Core tensor model parallel parameter.
+    """
+    if is_mcore_tensor_model_parallel(param):
+        if hasattr(param, "_tp_partition_dim"):
+            return param._tp_partition_dim
+        else:
+            return param.partition_dim
+    return None
diff --git a/megatron/core/models/common/embeddings/yarn_rotary_pos_embedding.py b/megatron/core/models/common/embeddings/yarn_rotary_pos_embedding.py
index 507472f789f..455a7757d28 100644
--- a/megatron/core/models/common/embeddings/yarn_rotary_pos_embedding.py
+++ b/megatron/core/models/common/embeddings/yarn_rotary_pos_embedding.py
@@ -130,9 +130,9 @@ def forward(self, max_seq_len: int, offset: int = 0, packed_seq: bool = False) -
             self.original_max_position_embeddings,
             self.correction_range_round_to_int,
         )
-        inv_freq_mask = 1.0 - _yarn_linear_ramp_mask(low, high, self.dim // 2).to(
-            device=self.inv_freq_extra.device, dtype=torch.float32
-        )
+        inv_freq_mask = 1.0 - _yarn_linear_ramp_mask(
+            low, high, self.dim // 2, device=self.inv_freq_extra.device
+        ).to(dtype=torch.float32)
         inv_freq = self.inv_freq_inter * (1 - inv_freq_mask) + self.inv_freq_extra * inv_freq_mask
 
         seq = (
@@ -211,11 +211,11 @@ def _yarn_find_correction_range(
     return max(low, 0), min(high, dim - 1)  # Clamp values just in case
 
 
-def _yarn_linear_ramp_mask(min: float, max: float, dim: int) -> Tensor:
+def _yarn_linear_ramp_mask(min: float, max: float, dim: int, device: torch.device) -> Tensor:
     if min == max:
         max += 0.001  # Prevent singularity
 
-    linear_func = (torch.arange(dim, dtype=torch.float32) - min) / (max - min)
+    linear_func = (torch.arange(dim, dtype=torch.float32, device=device) - min) / (max - min)
     ramp_func = torch.clamp(linear_func, 0, 1)
     return ramp_func
 
diff --git a/megatron/core/optimizer/__init__.py b/megatron/core/optimizer/__init__.py
index 307538fad22..c254b2f6882 100644
--- a/megatron/core/optimizer/__init__.py
+++ b/megatron/core/optimizer/__init__.py
@@ -34,6 +34,7 @@
 from megatron.core import parallel_state
 from megatron.core.optimizer.cpu_offloading.hybrid_optimizer import HybridDeviceOptimizer
 from megatron.core.process_groups_config import ProcessGroupCollection
+from megatron.core.transformer.fsdp_dtensor_checkpoint import get_global_unique_param_name
 
 from ..distributed.param_and_grad_buffer import _ParamAndGradBuffer
 from ..transformer.module import MegatronModule
@@ -481,6 +482,7 @@ def get_megatron_optimizer(
     use_gloo_process_groups: bool = True,
     default_skip_embedding_weight_decay: bool = False,
     pg_collection: Optional[ProcessGroupCollection] = None,
+    dump_param_to_param_group_map: Optional[str] = None,
 ) -> MegatronOptimizer:
     """Retrieve the Megatron optimizer for model chunks.
 
@@ -502,6 +504,7 @@ def get_megatron_optimizer(
             This is useful if you do not want embeddings to shrink to zero in training
             as recommended in https://arxiv.org/abs/2312.16903
         pg_collection: Optional unified process group for distributed training.
+        dump_param_to_param_group_map (Optional[str]): path to dump parameter to param group map.
 
     Returns:
         Instance of MegatronOptimizer.
@@ -579,6 +582,9 @@ def get_megatron_optimizer(
 
         return ChainedOptimizer(optimizers)
 
+    if dump_param_to_param_group_map is not None:
+        param_to_param_group = {}
+        param_group_id = 0
     for dense_model_chunks, overlap_param_gather_with_optimizer_step in zip(
         all_dense_model_chunks, overlap_param_gather_with_optimizer_step_flags
     ):
@@ -597,6 +603,12 @@ def get_megatron_optimizer(
             model_chunk.overlap_param_gather_with_optimizer_step = (
                 overlap_param_gather_with_optimizer_step
             )
+        if dump_param_to_param_group_map is not None:
+            for param_group in param_groups:
+                for param in param_group["params"]:
+                    param_name = get_global_unique_param_name(model_chunks, param)
+                    param_to_param_group[param_name] = param_group_id
+                param_group_id += 1
 
         # Pass Gloo process groups into optimizer only if needed.
         optimizers.append(
@@ -626,6 +638,12 @@ def get_megatron_optimizer(
         buffer_name='expert_parallel_buffers',
         default_skip_embedding_weight_decay=default_skip_embedding_weight_decay,
     )
+    if dump_param_to_param_group_map is not None:
+        for param_group in moe_param_groups:
+            for param in param_group["params"]:
+                param_name = get_global_unique_param_name(model_chunks, param)
+                param_to_param_group[param_name] = param_group_id
+            param_group_id += 1
     if len(moe_param_groups) > 0:
         expt_model_parallel_rank = get_pg_rank(expt_tp_pp_group)
         # Pass Gloo process groups into optimizer only if needed.
@@ -648,4 +666,9 @@ def get_megatron_optimizer(
             )
         )
 
+    if dump_param_to_param_group_map is not None:
+        torch.distributed.checkpoint.save(
+            state_dict=param_to_param_group, checkpoint_id=dump_param_to_param_group_map
+        )
+
     return ChainedOptimizer(optimizers)
diff --git a/megatron/core/optimizer/distrib_optimizer.py b/megatron/core/optimizer/distrib_optimizer.py
index 2925edcce60..8b4740516e2 100644
--- a/megatron/core/optimizer/distrib_optimizer.py
+++ b/megatron/core/optimizer/distrib_optimizer.py
@@ -47,6 +47,7 @@
 from ..dist_checkpointing.utils import extract_sharded_tensors_and_factories
 from ..distributed.param_and_grad_buffer import _ParamAndGradBuffer, partition_buckets
 from ..fp8_utils import dequantize_fp8_tensor, is_float8tensor, quantize_param_shard
+from ..transformer.fsdp_dtensor_checkpoint import handle_experts_in_state_dict
 from ..transformer.module import MegatronModule
 from .grad_scaler import MegatronGradScaler
 from .optimizer import MixedPrecisionOptimizer, _zero_grad_group_helper, param_group_identifier_keys
@@ -1152,6 +1153,7 @@ def _param_name(self, param: torch.nn.Parameter) -> str:
                         "Ensure that each model chunk has unique parameter names."
                     )
                 name_to_param.update(_name_to_param)
+            name_to_param = handle_experts_in_state_dict(name_to_param)
             self.param_to_name = {param: name for name, param in name_to_param.items()}
         assert (
             param in self.param_to_name
diff --git a/megatron/core/transformer/fsdp_dtensor_checkpoint.py b/megatron/core/transformer/fsdp_dtensor_checkpoint.py
index dad1947a183..9ef3f1f1b82 100644
--- a/megatron/core/transformer/fsdp_dtensor_checkpoint.py
+++ b/megatron/core/transformer/fsdp_dtensor_checkpoint.py
@@ -12,18 +12,160 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import logging
+import re
+
 import torch
+import torch.distributed as dist
+from torch.distributed.checkpoint import default_planner
+
+logger = logging.getLogger(__name__)
 
 try:
+    from torch.distributed import DeviceMesh
+    from torch.distributed._tensor import DTensor
+    from torch.distributed.checkpoint.metadata import TensorStorageMetadata
+    from torch.distributed.tensor.placement_types import Replicate, Shard
+
     from megatron.core.distributed.fsdp.src.megatron_fsdp.param_and_grad_buffer import (
         make_fsdp_dtensor,
     )
+    from megatron.core.distributed.fsdp.src.megatron_fsdp.uneven_dtensor import (
+        gather_uneven_dtensor_to_full_tensor,
+    )
+    from megatron.core.distributed.fsdp.src.megatron_fsdp.utils import (
+        get_mcore_tensor_parallel_partition_dim,
+        is_mcore_tensor_model_parallel,
+    )
 
     HAVE_MEGATRON_FSDP = True
 except ImportError:
     HAVE_MEGATRON_FSDP = False
 
+from megatron.core import parallel_state
 from megatron.core.tensor_parallel.layers import copy_tensor_model_parallel_attributes
+from megatron.core.transformer.transformer_layer import TransformerLayer
+
+
+def get_ep_layer_offset():
+    """
+    Get the expert layer offset for the current model.
+    """
+    from megatron.training.global_vars import get_args
+
+    args = get_args()
+    ep_size = parallel_state.get_expert_model_parallel_world_size()
+    ep_rank = parallel_state.get_expert_model_parallel_rank()
+    num_local_experts = args.num_experts // ep_size if args.num_experts else 0
+    local_expert_offset = ep_rank * num_local_experts
+
+    return local_expert_offset
+
+
+def get_total_num_experts():
+    """
+    Get the total number of experts for the current model.
+    """
+    from megatron.training.global_vars import get_args
+
+    args = get_args()
+    return args.num_experts if args.num_experts else 0
+
+
+def get_expert_index_from_key(key):
+    """Extract expert index from various expert key formats.
+
+    Supported formats:
+    - GroupedMLP: 'mlp.experts.linear_fc1.weight0', 'mlp.experts.linear_fc2.weight0'
+    - SequentialMLP: 'mlp.experts.local_experts.0.linear_fc1.weight',
+        'mlp.experts.local_experts.0.linear_fc2.weight'
+
+    Returns:
+        int: Expert index if found, None otherwise.
+    """
+    # GroupedMLP: index is at the end after 'weight'
+    if 'mlp.experts.linear_fc1.weight' in key or 'mlp.experts.linear_fc2.weight' in key:
+        m = re.search(r'^.*\.mlp\.experts\.linear_fc\d\.weight(\d+)', key)
+        assert m, f"Failed to parse expert index from key: {key}"
+        return int(m.group(1))
+    # SequentialMLP: index is between 'local_experts.' and next '.'
+    elif 'mlp.experts.local_experts' in key:
+        m = re.search(r'^.*\.mlp\.experts\.local_experts\.(\d+)', key)
+        assert m, f"Failed to parse expert index from key: {key}"
+        return int(m.group(1))
+    return None
+
+
+def handle_experts_in_state_dict(state_dict):
+    """
+    Rewrite expert keys in state dict.
+    """
+    local_expert_start = get_ep_layer_offset()
+    local_expert_end = get_total_num_experts()
+
+    def should_keep_expert_key(expert_index):
+        """Determine if this rank should keep this expert key based on expert index"""
+        if expert_index is None:
+            # If we can't determine expert index, keep the key (non-expert weights)
+            return True
+
+        # Check if this expert belongs to this rank
+        return local_expert_start <= expert_index < local_expert_end
+
+    def replace_expert_index_in_key(key, expert_index, state_dict):
+        """Replace expert index in key with new index corresponding to the current rank"""
+        new_expert_index = expert_index + local_expert_start
+        # GroupedMLP: 'mlp.experts.linear_fc1.weight0', 'mlp.experts.linear_fc2.weight0'
+        if 'mlp.experts.linear_fc1.weight' in key or 'mlp.experts.linear_fc2.weight' in key:
+            # Handle SwiGLU weight{idx}_w and weight{idx}_v format
+            if key.endswith('_w') or key.endswith('_v'):
+                suffix = key[-2:]  # '_w' or '_v'
+                new_key = key.replace(
+                    f'weight{expert_index}{suffix}', f'weight{new_expert_index}{suffix}'
+                )
+            # Handle regular weight{idx} format
+            else:
+                new_key = key.replace(f'weight{expert_index}', f'weight{new_expert_index}')
+        # SequentialMLP: index is between 'local_experts.' and next '.'
+        elif 'mlp.experts.local_experts' in key:
+            new_key = key.replace(
+                f'local_experts.{expert_index}.', f'local_experts.{new_expert_index}.'
+            )
+        else:
+            raise ValueError(f"Unexpected expert key format: {key}")
+
+        state_dict[new_key] = state_dict[key]
+        del state_dict[key]
+
+    # Process model state dict
+    state_dict = state_dict.copy()
+    for key in list(state_dict.keys()):
+        expert_index = get_expert_index_from_key(key)
+        if not should_keep_expert_key(expert_index):
+            replace_expert_index_in_key(key, expert_index, state_dict)
+
+    return state_dict
+
+
+def expert_param_local_key(key):
+    """Get the module parameter corresponding to the key."""
+    local_expert_offset = get_ep_layer_offset()
+    expert_index = get_expert_index_from_key(key)
+    if expert_index is not None:
+        new_expert_index = expert_index - local_expert_offset
+        # GroupedMLP: 'mlp.experts.linear_fc1.weight0', 'mlp.experts.linear_fc2.weight0'
+        if 'mlp.experts.linear_fc1.weight' in key or 'mlp.experts.linear_fc2.weight' in key:
+            new_key = key.replace(f'weight{expert_index}', f'weight{new_expert_index}')
+        # SequentialMLP: index is between 'local_experts.' and next '.'
+        elif 'mlp.experts.local_experts' in key:
+            new_key = key.replace(
+                f'local_experts.{expert_index}.', f'local_experts.{new_expert_index}.'
+            )
+        else:
+            raise ValueError(f"Unexpected expert key format: {key}")
+        key = new_key
+
+    return key
 
 
 def handle_swiglu_in_state_dict(model, model_state_dict, optimizer_state_dict):
@@ -43,7 +185,29 @@ def intersection(s1, s2):
     def offset_slice(s, offset):
         return slice(s.start + offset, s.stop + offset)
 
-    def split_swiglu_linear_fc1(data, dist_param, swiglu_shard_axis):
+    def is_swiglu_key(key):
+        """
+        Check if this key should be handled as SwiGLU linear_fc1 weight or bias.
+        """
+        # Non-expert MLP: 'mlp.linear_fc1.weight', 'mlp.linear_fc1.bias'
+        # GroupedMLP: 'mlp.experts.linear_fc1.weight0', 'mlp.experts.linear_fc1.bias0'
+        # SequentialMLP: 'mlp.experts.local_experts.0.linear_fc1.weight',
+        #   'mlp.experts.local_experts.0.linear_fc1.bias'
+        return any(
+            re.search(pat, key)
+            for pat in [
+                r"(.*)\.mlp\.linear_fc1\.weight$",
+                r"(.*)\.mlp\.linear_fc1\.bias$",
+                r"(.*)\.mlp\.experts\.linear_fc1\.weight(\d+)$",
+                r"(.*)\.mlp\.experts\.linear_fc1\.bias(\d+)$",
+                r"(.*)\.mlp\.experts\.local_experts\.(\d+)\.linear_fc1\.weight$",
+                r"(.*)\.mlp\.experts\.local_experts\.(\d+)\.linear_fc1\.bias$",
+                r"(.*)\.mlp\.shared_experts\.linear_fc1\.weight$",
+                r"(.*)\.mlp\.shared_experts\.linear_fc1\.bias$",
+            ]
+        )
+
+    def split_swiglu_linear_fc1(data, dist_param, swiglu_shard_axis, is_expert_param):
         """
         Split the SWiGLU linear_fc1 parameter into two parts: weight_w and weight_v.
         """
@@ -55,7 +219,9 @@ def split_swiglu_linear_fc1(data, dist_param, swiglu_shard_axis):
         fsdp_slice = dist_param.megatron_fsdp_slice
         megatron_fsdp_dist_index = dist_param.megatron_fsdp_dist_index
 
-        tp_mesh = megatron_fsdp_dist_index.get_submesh([megatron_fsdp_dist_index.tp_dim])
+        tp_mesh = megatron_fsdp_dist_index.get_submesh(
+            [megatron_fsdp_dist_index.tp_dim], is_expert_parallel=is_expert_param
+        )
         data_size = data.numel() // tp_mesh.mesh.numel()
         w_slice = slice(0, data_size // 2)
         v_slice = slice(data_size // 2, data_size)
@@ -75,8 +241,9 @@ def split_swiglu_linear_fc1(data, dist_param, swiglu_shard_axis):
         # Fake parameters w and v are used to provide the correct parameter
         # shape and Tensor-Parallelism information.
         per_tp_rank_shape = list(data.shape)
-        if getattr(dist_param, "tensor_model_parallel", False):
-            tp_dim = dist_param.partition_dim
+        if is_mcore_tensor_model_parallel(dist_param):
+            tp_dim = get_mcore_tensor_parallel_partition_dim(dist_param)
+            assert tp_dim is not None, "Tensor model parallel dimension not found"
             per_tp_rank_shape[tp_dim] //= tp_mesh.mesh.numel()
         linear_fc1_meta = torch.empty(*per_tp_rank_shape, device="meta")
         w_meta, v_meta = torch.chunk(linear_fc1_meta, 2, dim=swiglu_shard_axis)
@@ -87,6 +254,7 @@ def split_swiglu_linear_fc1(data, dist_param, swiglu_shard_axis):
             weight_w.data,
             w_meta,
             dist_index=megatron_fsdp_dist_index,
+            is_expert_param=is_expert_param,
             run_check=True,
             update_uneven_dtensor_chunk_meta=True,
         )
@@ -94,16 +262,21 @@ def split_swiglu_linear_fc1(data, dist_param, swiglu_shard_axis):
             weight_v.data,
             v_meta,
             dist_index=megatron_fsdp_dist_index,
+            is_expert_param=is_expert_param,
             run_check=True,
             update_uneven_dtensor_chunk_meta=True,
         )
         return weight_w, weight_v
 
+    model_state_dict = model_state_dict.copy()
     for key in list(model_state_dict.keys()):
-        if key.endswith('mlp.linear_fc1.weight') or key.endswith('mlp.linear_fc1.bias'):
+        if is_swiglu_key(key):
             dist_param = model.get_parameter(f"module.{key}")
             weight_w, weight_v = split_swiglu_linear_fc1(
-                model_state_dict[key], dist_param, swiglu_shard_axis=0
+                model_state_dict[key],
+                dist_param,
+                swiglu_shard_axis=0,
+                is_expert_param='mlp.experts' in key,
             )
 
             # Update the model state dict with the new keys
@@ -111,26 +284,32 @@ def split_swiglu_linear_fc1(data, dist_param, swiglu_shard_axis):
             model_state_dict[f"{key}_v"] = weight_v
             del model_state_dict[key]
 
-    try:
-        optimizer_state_dict = optimizer_state_dict["state"]
-    except KeyError:
-        optimizer_state_dict = {}
+    if optimizer_state_dict is not None:
+        optimizer_state_dict = optimizer_state_dict.copy()
+        if len(optimizer_state_dict["state"]) != 0:
+            opt_state_dict = optimizer_state_dict["state"]
+            new_opt_state_dict = {}
+            for key in list(opt_state_dict.keys()):
+                # Only process SWIGLU keys
+                if not is_swiglu_key(key):
+                    new_opt_state_dict[key] = opt_state_dict[key]
+                    continue
+                new_opt_state_dict[f"{key}_w"] = opt_state_dict[key].copy()
+                new_opt_state_dict[f"{key}_v"] = opt_state_dict[key].copy()
+                for subkey in ["exp_avg", "exp_avg_sq"]:
+                    dist_param = model.get_parameter(expert_param_local_key(key[len("module.") :]))
+                    weight_w, weight_v = split_swiglu_linear_fc1(
+                        opt_state_dict[key][subkey],
+                        dist_param,
+                        swiglu_shard_axis=0,
+                        is_expert_param="mlp.experts" in key,
+                    )
+                    # Update the optimizer state dict with the new keys
+                    new_opt_state_dict[f"{key}_w"][subkey] = weight_w
+                    new_opt_state_dict[f"{key}_v"][subkey] = weight_v
+            optimizer_state_dict["state"] = new_opt_state_dict
 
-    if len(optimizer_state_dict) != 0:
-        for key in list(optimizer_state_dict.keys()):
-            if not (key.endswith('mlp.linear_fc1.weight') or key.endswith('mlp.linear_fc1.bias')):
-                continue
-            optimizer_state_dict[f"{key}_w"] = optimizer_state_dict[key].copy()
-            optimizer_state_dict[f"{key}_v"] = optimizer_state_dict[key].copy()
-            for subkey in ["exp_avg", "exp_avg_sq"]:
-                dist_param = model.get_parameter(key[len("module.") :])
-                weight_w, weight_v = split_swiglu_linear_fc1(
-                    optimizer_state_dict[key][subkey], dist_param, swiglu_shard_axis=0
-                )
-                # Update the optimizer state dict with the new keys
-                optimizer_state_dict[f"{key}_w"][subkey] = weight_w
-                optimizer_state_dict[f"{key}_v"][subkey] = weight_v
-            del optimizer_state_dict[key]
+    return model_state_dict, optimizer_state_dict
 
 
 def handle_fp8_extra_state_case(model_state_dict):
@@ -162,7 +341,7 @@ def flatten_state_dict(obj, parent_key="", sep="."):
     return items
 
 
-def print_diff_in_state_dicts(state_dict_metadata, load_state_dict):
+def print_diff_in_state_dicts(state_dict_metadata, load_state_dict, limit=100):
     """
     Print the differences between two state dicts: metadata state dict and load state dict.
     This function compares the keys and shapes of the tensors in both dicts.
@@ -172,24 +351,105 @@ def print_diff_in_state_dicts(state_dict_metadata, load_state_dict):
     meta_keys = set(state_dict_metadata.keys())
     load_keys = set(load_state_dict.keys())
 
-    only_in_meta = meta_keys - load_keys
-    only_in_load = load_keys - meta_keys
-    in_both = meta_keys & load_keys
+    only_in_meta = list(meta_keys - load_keys)
+    only_in_load = list(load_keys - meta_keys)
+    in_both = list(meta_keys & load_keys)
 
-    print("Keys only in checkpoint metadata_state_dict:")
-    for k in sorted(only_in_meta):
-        print(f"  {k}")
+    logger.info(f"Keys only in checkpoint metadata_state_dict(first {limit}):")
+    for k in sorted(only_in_meta[:limit]):
+        logger.info(f"  {k}")
 
-    print("\nKeys only in load_state_dict:")
-    for k in sorted(only_in_load):
-        print(f"  {k}")
+    logger.info(f"\nKeys only in load_state_dict(first {limit}):")
+    for k in sorted(only_in_load[:limit]):
+        logger.info(f"  {k}")
 
-    print("\nKeys in both but with different shapes:")
-    for k in sorted(in_both):
+    logger.info(f"\nKeys in both but with different shapes(first {limit}):")
+    for k in sorted(in_both[:limit]):
         v_meta = state_dict_metadata[k]
         v_load = load_state_dict[k]
         # If tensors, compare shape; else, compare type/values
         meta_shape = v_meta.size if hasattr(v_meta, "size") else type(v_meta)
         load_shape = v_load.shape if hasattr(v_load, "shape") else type(v_load)
         if meta_shape != load_shape:
-            print(f"  {k}: meta shape={meta_shape}, load shape={load_shape}")
+            logger.info(f"  {k}: meta shape={meta_shape}, load shape={load_shape}")
+
+
+def validate_loaded_state_dict(state_dict, checkpoint_path):
+    """
+    Validate the loaded state dict against the expected structure and types.
+    """
+    assert HAVE_MEGATRON_FSDP, "This function requires Megatron-FSDP to be installed."
+
+    # Initialize reader
+    reader = torch.distributed.checkpoint.FileSystemReader(checkpoint_path)
+    metadata = reader.read_metadata()
+    flat_state_dict = flatten_state_dict(state_dict)
+
+    for key, value in flat_state_dict.items():
+        tensor_metadata = metadata.state_dict_metadata[key]
+
+        if not isinstance(tensor_metadata, TensorStorageMetadata):
+            continue
+        if not isinstance(value, DTensor):
+            load_item_dict = {key: torch.empty_like(value)}
+        else:
+            load_item_dict = {
+                key: torch.distributed.tensor.empty(
+                    tensor_metadata.size,
+                    dtype=tensor_metadata.properties.dtype,
+                    device_mesh=DeviceMesh.from_group(
+                        group=dist.group.WORLD,
+                        device_type="cuda",
+                        mesh=torch.arange(dist.get_world_size()),
+                        mesh_dim_names=("world",),
+                    ),
+                    placements=[Shard(0)],
+                )
+            }
+        torch.distributed.checkpoint.load(
+            load_item_dict, storage_reader=reader, planner=default_planner.DefaultLoadPlanner()
+        )
+        if isinstance(value, DTensor):
+            full_value = gather_uneven_dtensor_to_full_tensor(value)
+            loaded_tensor = load_item_dict[key].redistribute(
+                placements=[Replicate()] * len(value.placements)
+            )
+            assert torch.allclose(
+                loaded_tensor._local_tensor, full_value._local_tensor, atol=1e-8, rtol=1e-5
+            ), f"key: {key}; {loaded_tensor} {full_value}"
+        else:
+            assert torch.allclose(
+                value, load_item_dict[key]
+            ), f"key: {key}; {value} {load_item_dict[key]}"
+
+
+def get_global_unique_param_name(model_chunks, param):
+    """
+    Get the global unique parameter name for a given model and parameter.
+    """
+    param_name = None
+    for model in model_chunks:
+        for name, p in model.named_parameters():
+            if p is param:
+                param_name = name
+                break
+    if param_name is None:
+        raise ValueError("Parameter not found in model chunks")
+
+    # Get PP unique parameter name
+    if re.search(r"layers\.(\d+)", param_name) and "mtp" not in param_name:
+        tf_layer_number = -1
+        for module in model.modules():
+            if not isinstance(module, TransformerLayer):
+                continue
+            for p in module.parameters():
+                if p is param:
+                    tf_layer_number = module.layer_number
+                    break
+        if tf_layer_number != -1:
+            param_name = re.sub(r"layers\.(\d+)", f"layers.{tf_layer_number - 1}", param_name)
+
+    # Get EP unique parameter name
+    param_name = list(handle_experts_in_state_dict({param_name: None}).keys())[0]
+
+    return param_name
diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py
index bdf915a8ae1..1d29aff0827 100644
--- a/megatron/training/arguments.py
+++ b/megatron/training/arguments.py
@@ -2267,6 +2267,10 @@ def _add_training_args(parser):
                        help="Use torch.optim.Optimizer instead of Megatron's optimizer in optimizer cpu offload mode.")
     group.add_argument('--overlap-cpu-optimizer-d2h-h2d', action='store_true', default=False,
                        help='Overlap CPU optimizer step, gradients D2H and updated parameters H2D.')
+    group.add_argument('--dump-param-to-param-group-map', type=str, default=None,
+                        help="Path to a file containing parameter-to-parameter-group mapping. "
+                        "Provide a JSON file that specifies which parameters belong to which "
+                        "parameter group for global coordination.")
     group.add_argument('--no-pin-cpu-grads', action='store_false', dest='pin_cpu_grads',
                        help='Disable pinning of CPU memory for gradients.')
     group.add_argument('--no-pin-cpu-params', action='store_false', dest='pin_cpu_params',
diff --git a/megatron/training/checkpointing.py b/megatron/training/checkpointing.py
index 71b9cd97021..93c23255f4c 100644
--- a/megatron/training/checkpointing.py
+++ b/megatron/training/checkpointing.py
@@ -42,9 +42,10 @@
 try:
     from megatron.core.distributed.fsdp.src.megatron_fsdp.uneven_dtensor import preprocess_state_dict_for_uneven_dtensor
     from megatron.core.transformer.fsdp_dtensor_checkpoint import (
+        print_diff_in_state_dicts,
         handle_fp8_extra_state_case,
         handle_swiglu_in_state_dict,
-        print_diff_in_state_dicts,
+        handle_experts_in_state_dict,
     )
     HAVE_MEGATRON_FSDP = True
 except ImportError:
@@ -561,6 +562,9 @@ def save_checkpoint(iteration, model, optimizer, opt_param_scheduler, num_floati
                 # TODO Handle non-empty directories (e.g., after a crash during saving).
                 ensure_directory_exists(checkpoint_name, check_parent=False)
 
+            if ckpt_format == "fsdp_dtensor":
+                state_dict = preprocess_fsdp_dtensor_state_dict(args, state_dict, model[0])
+
             fs_storage_writer = torch.distributed.checkpoint.FileSystemWriter(checkpoint_name)
             torch.distributed.checkpoint.save(
                 state_dict=state_dict,
@@ -784,9 +788,17 @@ def maybe_save_dataloader_state(train_iterator, iteration, dataloader_save_path)
     torch.save(dataloader_save_dict, data_state_save_path)
 
 
-def generate_state_dict(args, model, optimizer, opt_param_scheduler,
-                        rng_state, iteration=None,
-                        optim_sd_kwargs=None, model_sd_kwargs=None, rerun_state=None):
+def generate_state_dict(
+    args,
+    model,
+    optimizer,
+    opt_param_scheduler,
+    rng_state,
+    iteration=None,
+    optim_sd_kwargs=None,
+    model_sd_kwargs=None,
+    rerun_state=None,
+):
     """Generate a state dict from given model, optimizer, scheduler, rng state and others. """
 
     # Arguments, iteration, and model.
@@ -839,16 +851,27 @@ def generate_state_dict(args, model, optimizer, opt_param_scheduler,
     if not args.no_save_rng and rng_state:
         state_dict["rng_state"] = rng_state
 
-    # fsdp_dtensor ckpt specific state dict preprocessing
-    if args.ckpt_format == "fsdp_dtensor":
-        assert HAVE_MEGATRON_FSDP, "Megatron FSDP is enabled but Megatron-FSDP is not available."
-        assert len(model) == 1, "FSDP DTensor checkpoints are not supported for multiple models."
-        if args.swiglu:
-            state_dict = state_dict.copy()
-            handle_swiglu_in_state_dict(
-                model[0], state_dict["model"], state_dict["optimizer"])
-        handle_fp8_extra_state_case(state_dict["model"])
-        preprocess_state_dict_for_uneven_dtensor(state_dict)
+    return state_dict
+
+
+def preprocess_fsdp_dtensor_state_dict(args, raw_state_dict, model):
+    state_dict = raw_state_dict.copy()
+    handle_fp8_extra_state_case(state_dict["model"])
+    if args.swiglu:
+        if "optimizer" in state_dict:
+            model_state_dict, optimizer_state_dict = handle_swiglu_in_state_dict(
+                model, state_dict["model"], state_dict["optimizer"]
+            )
+            state_dict["model"] = model_state_dict
+            state_dict["optimizer"] = optimizer_state_dict
+        else:
+            model_state_dict, _ = handle_swiglu_in_state_dict(
+                model, state_dict["model"], None
+            )
+            state_dict["model"] = model_state_dict
+    if args.num_experts:
+        state_dict["model"] = handle_experts_in_state_dict(state_dict["model"])
+    preprocess_state_dict_for_uneven_dtensor(state_dict)
 
     return state_dict
 
@@ -1169,6 +1192,12 @@ def _load_base_checkpoint(
         if rank0:
             return {}, checkpoint_name, release, CheckpointType.FSDP_DTENSOR
 
+        state_dict = sharded_state_dict
+        raw_optimizer_state_dict = state_dict["optimizer"].copy() if "optimizer" in state_dict else None
+        raw_model_state_dict = state_dict["model"].copy() if "model" in state_dict else None
+        model = state_dict.pop("_model")
+        state_dict = preprocess_fsdp_dtensor_state_dict(args, state_dict, model[0])
+
         ckpt_type = CheckpointType.FSDP_DTENSOR
         fs_storage_reader = torch.distributed.checkpoint.FileSystemReader(checkpoint_name)
         allow_partial_load = not getattr(args, 'strict_fsdp_dtensor_load', False)
@@ -1177,15 +1206,20 @@ def _load_base_checkpoint(
             rank = torch.distributed.get_rank()
             import time as _time
             _time.sleep(rank * 0.001)  # Make that logs of different ranks do not overlap
-            print_diff_in_state_dicts(state_dict_metadata, sharded_state_dict)
+            print_diff_in_state_dicts(state_dict_metadata, state_dict)
 
         planner = default_planner.DefaultLoadPlanner(allow_partial_load=allow_partial_load)
         torch.distributed.checkpoint.load_state_dict(
-            state_dict=sharded_state_dict,
+            state_dict=state_dict,
             storage_reader=fs_storage_reader,
             planner=planner,
         )
-        state_dict = sharded_state_dict
+
+        if raw_optimizer_state_dict is not None:
+            state_dict["optimizer"] = raw_optimizer_state_dict
+
+        if raw_model_state_dict is not None:
+            state_dict["model"] = raw_model_state_dict
     else:
         raise NotImplementedError(f"checkpoint format {ckpt_format} not supported")
 
@@ -1520,7 +1554,7 @@ def load_checkpoint(ddp_model, optimizer, opt_param_scheduler, load_arg='load',
         except FileNotFoundError:
             state_dict_metadata = {}
 
-        gen_sd_rerun_state = None
+        gen_sd_rerun_state = {}
         gen_sd_opt_param_scheduler = None
         gen_sd_rng_state = None
         gen_sd_optim = None
@@ -1537,7 +1571,7 @@ def load_checkpoint(ddp_model, optimizer, opt_param_scheduler, load_arg='load',
 
         optim_sd_kwargs = dict(metadata=_build_sharded_state_dict_metadata(args), is_loading=True)
 
-        load_kwargs["sharded_state_dict"] = generate_state_dict(
+        state_dict = generate_state_dict(
             args,
             model=model,
             optimizer=gen_sd_optim,
@@ -1547,6 +1581,8 @@ def load_checkpoint(ddp_model, optimizer, opt_param_scheduler, load_arg='load',
             rerun_state=gen_sd_rerun_state,
             iteration=1,
         )
+        state_dict["_model"] = model
+        load_kwargs["sharded_state_dict"] = state_dict
 
     state_dict, checkpoint_name, release, ckpt_type = _load_base_checkpoint(
         load_dir, args, rank0=False, checkpointing_context=checkpointing_context,
diff --git a/megatron/training/training.py b/megatron/training/training.py
index f805dab0f15..bda9e42dc82 100644
--- a/megatron/training/training.py
+++ b/megatron/training/training.py
@@ -1210,6 +1210,7 @@ def setup_model_and_optimizer(
             # If the user is asking for a non-zero embedding init std, skip weight decay for embeddings
             #  to avoid embeddings from shrinking to zero as recommended in https://arxiv.org/abs/2312.16903
             default_skip_embedding_weight_decay=args.embedding_init_method_std is not None,
+            dump_param_to_param_group_map=args.dump_param_to_param_group_map,
         )
     else:
         optimizer = get_megatron_muon_optimizer(
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgxh100_coreweave.json
index 0f2637a9511..717ae3f5fa6 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgxh100_coreweave.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgxh100_coreweave.json
@@ -4,56 +4,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 11.04748,
-            "2": 11.03561,
-            "3": 9.58774,
-            "4": 9.25819,
-            "5": 9.53583,
-            "6": 9.8804,
-            "7": 9.48247,
-            "8": 8.93575,
-            "9": 8.65813,
-            "10": 9.0567,
-            "11": 8.49445,
-            "12": 8.52444,
-            "13": 8.45239,
-            "14": 7.97323,
-            "15": 8.0476,
-            "16": 8.07971,
-            "17": 8.09081,
-            "18": 7.76437,
-            "19": 8.14892,
-            "20": 7.89868,
-            "21": 7.59371,
-            "22": 7.54743,
-            "23": 7.43222,
-            "24": 7.4302,
-            "25": 7.67579,
-            "26": 7.06929,
-            "27": 7.62041,
-            "28": 7.32495,
-            "29": 7.49042,
-            "30": 7.64391,
-            "31": 7.39435,
-            "32": 7.58789,
-            "33": 7.64037,
-            "34": 7.69778,
-            "35": 7.20998,
-            "36": 7.08538,
-            "37": 7.42584,
-            "38": 7.18804,
-            "39": 7.55054,
-            "40": 7.54446,
-            "41": 7.49287,
-            "42": 7.24937,
-            "43": 7.23587,
-            "44": 7.41595,
-            "45": 7.18755,
-            "46": 6.89949,
-            "47": 7.29966,
-            "48": 7.14134,
-            "49": 7.58963,
-            "50": 7.03602
+            "1": 11.04722,
+            "2": 11.03572,
+            "3": 9.58802,
+            "4": 9.25807,
+            "5": 9.46595,
+            "6": 9.99646,
+            "7": 9.50952,
+            "8": 8.97596,
+            "9": 8.64768,
+            "10": 9.40103,
+            "11": 8.86556,
+            "12": 8.63563,
+            "13": 8.52125,
+            "14": 8.08824,
+            "15": 8.1958,
+            "16": 8.22112,
+            "17": 8.14098,
+            "18": 7.8386,
+            "19": 8.23438,
+            "20": 7.95361,
+            "21": 7.62549,
+            "22": 7.60352,
+            "23": 7.47957,
+            "24": 7.46573,
+            "25": 7.70343,
+            "26": 7.10719,
+            "27": 7.64313,
+            "28": 7.34582,
+            "29": 7.5169,
+            "30": 7.67511,
+            "31": 7.41799,
+            "32": 7.61213,
+            "33": 7.66582,
+            "34": 7.73101,
+            "35": 7.23081,
+            "36": 7.10765,
+            "37": 7.4476,
+            "38": 7.21053,
+            "39": 7.57508,
+            "40": 7.5662,
+            "41": 7.51605,
+            "42": 7.27243,
+            "43": 7.25706,
+            "44": 7.44,
+            "45": 7.21244,
+            "46": 6.92421,
+            "47": 7.32604,
+            "48": 7.17147,
+            "49": 7.62154,
+            "50": 7.0624
         }
     },
     "num-zeros": {
@@ -62,55 +62,55 @@
         "step_interval": 1,
         "values": {
             "1": 38802612.0,
-            "2": 38543592.0,
-            "3": 38739528.0,
-            "4": 279937824.0,
-            "5": 259189728.0,
-            "6": 271446400.0,
-            "7": 604773504.0,
-            "8": 768892544.0,
-            "9": 645824128.0,
-            "10": 744257088.0,
-            "11": 718888576.0,
-            "12": 746732544.0,
-            "13": 871990976.0,
-            "14": 821645632.0,
-            "15": 724250816.0,
-            "16": 932241472.0,
-            "17": 648958912.0,
-            "18": 649120000.0,
-            "19": 925992960.0,
-            "20": 989207936.0,
-            "21": 819324096.0,
-            "22": 736955072.0,
-            "23": 910497792.0,
-            "24": 876716672.0,
-            "25": 843170688.0,
-            "26": 809573824.0,
-            "27": 854086912.0,
-            "28": 802857664.0,
-            "29": 805523328.0,
-            "30": 775645184.0,
-            "31": 771754624.0,
-            "32": 749733696.0,
-            "33": 718385216.0,
-            "34": 724771200.0,
-            "35": 737655104.0,
-            "36": 690419968.0,
-            "37": 673203456.0,
-            "38": 627239552.0,
-            "39": 614047168.0,
-            "40": 607288512.0,
-            "41": 582590592.0,
-            "42": 548211200.0,
-            "43": 532740640.0,
-            "44": 554239168.0,
-            "45": 514790528.0,
-            "46": 350258560.0,
-            "47": 472420128.0,
-            "48": 453788736.0,
-            "49": 440597216.0,
-            "50": 303063296.0
+            "2": 38543656.0,
+            "3": 38739356.0,
+            "4": 273649600.0,
+            "5": 252887040.0,
+            "6": 255692384.0,
+            "7": 598483264.0,
+            "8": 787737984.0,
+            "9": 696133120.0,
+            "10": 505146368.0,
+            "11": 718888640.0,
+            "12": 872597184.0,
+            "13": 947495104.0,
+            "14": 1076398976.0,
+            "15": 856390592.0,
+            "16": 1048635648.0,
+            "17": 831370688.0,
+            "18": 963679552.0,
+            "19": 970018240.0,
+            "20": 935737344.0,
+            "21": 904189312.0,
+            "22": 887937280.0,
+            "23": 894777856.0,
+            "24": 703744192.0,
+            "25": 909232512.0,
+            "26": 875633216.0,
+            "27": 894981376.0,
+            "28": 919242816.0,
+            "29": 931351552.0,
+            "30": 929784768.0,
+            "31": 941621376.0,
+            "32": 885000768.0,
+            "33": 828484096.0,
+            "34": 822284800.0,
+            "35": 832032128.0,
+            "36": 787939392.0,
+            "37": 770719808.0,
+            "38": 561204672.0,
+            "39": 617201536.0,
+            "40": 695374592.0,
+            "41": 698978816.0,
+            "42": 692913728.0,
+            "43": 668003776.0,
+            "44": 673780992.0,
+            "45": 631182912.0,
+            "46": 444613312.0,
+            "47": 591957824.0,
+            "48": 617363968.0,
+            "49": 585295808.0,
+            "50": 570423872.0
         }
     },
     "mem-allocated-bytes": {
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 6637267456.0,
-            "2": 6637269504.0,
-            "3": 6637269504.0,
-            "4": 6637269504.0,
-            "5": 6637269504.0,
-            "6": 6637269504.0,
-            "7": 6637269504.0,
-            "8": 6637269504.0,
-            "9": 6637269504.0,
-            "10": 6637269504.0,
-            "11": 6637269504.0,
-            "12": 6637269504.0,
-            "13": 6637269504.0,
-            "14": 6637269504.0,
-            "15": 6637269504.0,
-            "16": 6637269504.0,
-            "17": 6637269504.0,
-            "18": 6637269504.0,
-            "19": 6637269504.0,
-            "20": 6637269504.0,
-            "21": 6637269504.0,
-            "22": 6637269504.0,
-            "23": 6637269504.0,
-            "24": 6637269504.0,
-            "25": 6637269504.0,
-            "26": 6637269504.0,
-            "27": 6637269504.0,
-            "28": 6637269504.0,
-            "29": 6637269504.0,
-            "30": 6637269504.0,
-            "31": 6637269504.0,
-            "32": 6637269504.0,
-            "33": 6637269504.0,
-            "34": 6637269504.0,
-            "35": 6637269504.0,
-            "36": 6637269504.0,
-            "37": 6637269504.0,
-            "38": 6637269504.0,
-            "39": 6637269504.0,
-            "40": 6637269504.0,
-            "41": 6637269504.0,
-            "42": 6637269504.0,
-            "43": 6637269504.0,
-            "44": 6637269504.0,
-            "45": 6637269504.0,
-            "46": 6637269504.0,
-            "47": 6637269504.0,
-            "48": 6637269504.0,
-            "49": 6637269504.0,
-            "50": 6637269504.0
+            "1": 6637272576.0,
+            "2": 6637274624.0,
+            "3": 6637274624.0,
+            "4": 6637274624.0,
+            "5": 6637274624.0,
+            "6": 6637274624.0,
+            "7": 6637274624.0,
+            "8": 6637274624.0,
+            "9": 6637274624.0,
+            "10": 6637274624.0,
+            "11": 6637274624.0,
+            "12": 6637274624.0,
+            "13": 6637274624.0,
+            "14": 6637274624.0,
+            "15": 6637274624.0,
+            "16": 6637274624.0,
+            "17": 6637274624.0,
+            "18": 6637274624.0,
+            "19": 6637274624.0,
+            "20": 6637274624.0,
+            "21": 6637274624.0,
+            "22": 6637274624.0,
+            "23": 6637274624.0,
+            "24": 6637274624.0,
+            "25": 6637274624.0,
+            "26": 6637274624.0,
+            "27": 6637274624.0,
+            "28": 6637274624.0,
+            "29": 6637274624.0,
+            "30": 6637274624.0,
+            "31": 6637274624.0,
+            "32": 6637274624.0,
+            "33": 6637274624.0,
+            "34": 6637274624.0,
+            "35": 6637274624.0,
+            "36": 6637274624.0,
+            "37": 6637274624.0,
+            "38": 6637274624.0,
+            "39": 6637274624.0,
+            "40": 6637274624.0,
+            "41": 6637274624.0,
+            "42": 6637274624.0,
+            "43": 6637274624.0,
+            "44": 6637274624.0,
+            "45": 6637274624.0,
+            "46": 6637274624.0,
+            "47": 6637274624.0,
+            "48": 6637274624.0,
+            "49": 6637274624.0,
+            "50": 6637274624.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 55055331328.0,
-            "2": 57809321984.0,
-            "3": 57918455808.0,
-            "4": 57918455808.0,
-            "5": 57918455808.0,
-            "6": 57918455808.0,
-            "7": 57918455808.0,
-            "8": 57918455808.0,
-            "9": 57918455808.0,
-            "10": 57918455808.0,
-            "11": 57918455808.0,
-            "12": 57918455808.0,
-            "13": 57931390976.0,
-            "14": 57931390976.0,
-            "15": 57931390976.0,
-            "16": 57931390976.0,
-            "17": 57931390976.0,
-            "18": 57931390976.0,
-            "19": 57931390976.0,
-            "20": 57931390976.0,
-            "21": 57931390976.0,
-            "22": 57931390976.0,
-            "23": 57931390976.0,
-            "24": 57931390976.0,
-            "25": 57931390976.0,
-            "26": 57931390976.0,
-            "27": 57931390976.0,
-            "28": 57931390976.0,
-            "29": 57931390976.0,
-            "30": 57931390976.0,
-            "31": 57931390976.0,
-            "32": 58003226624.0,
-            "33": 58003226624.0,
-            "34": 58003226624.0,
-            "35": 58003226624.0,
-            "36": 58003226624.0,
-            "37": 58003226624.0,
-            "38": 58003226624.0,
-            "39": 58003226624.0,
-            "40": 58003226624.0,
-            "41": 58003226624.0,
-            "42": 58003226624.0,
-            "43": 58003226624.0,
-            "44": 58183614464.0,
-            "45": 58234208256.0,
-            "46": 58555555840.0,
-            "47": 58555555840.0,
-            "48": 58555555840.0,
-            "49": 58555555840.0,
-            "50": 58780934144.0
+            "1": 55056003072.0,
+            "2": 57810763776.0,
+            "3": 57920647168.0,
+            "4": 57920647168.0,
+            "5": 57920647168.0,
+            "6": 57920647168.0,
+            "7": 57920647168.0,
+            "8": 57920647168.0,
+            "9": 57920647168.0,
+            "10": 57920647168.0,
+            "11": 57920647168.0,
+            "12": 57920647168.0,
+            "13": 57920647168.0,
+            "14": 57920647168.0,
+            "15": 57920647168.0,
+            "16": 57920647168.0,
+            "17": 57920647168.0,
+            "18": 57920647168.0,
+            "19": 57920647168.0,
+            "20": 57920647168.0,
+            "21": 57920647168.0,
+            "22": 57920647168.0,
+            "23": 57920647168.0,
+            "24": 57920647168.0,
+            "25": 57920647168.0,
+            "26": 57920647168.0,
+            "27": 57920647168.0,
+            "28": 57920647168.0,
+            "29": 57920647168.0,
+            "30": 57920647168.0,
+            "31": 57920647168.0,
+            "32": 57920647168.0,
+            "33": 57920647168.0,
+            "34": 57961472000.0,
+            "35": 57961472000.0,
+            "36": 57961472000.0,
+            "37": 57961472000.0,
+            "38": 57961472000.0,
+            "39": 57961472000.0,
+            "40": 57961472000.0,
+            "41": 57961472000.0,
+            "42": 57961472000.0,
+            "43": 57961472000.0,
+            "44": 57961472000.0,
+            "45": 57961472000.0,
+            "46": 57961472000.0,
+            "47": 57961472000.0,
+            "48": 57961472000.0,
+            "49": 57961472000.0,
+            "50": 57961472000.0
         }
     },
     "mtp_1 loss": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 11.07654,
-            "2": 11.07406,
-            "3": 10.53881,
-            "4": 10.09803,
-            "5": 9.81154,
-            "6": 10.06236,
-            "7": 9.79762,
-            "8": 9.07117,
-            "9": 8.87049,
-            "10": 9.127,
-            "11": 8.49853,
-            "12": 8.53046,
-            "13": 8.42444,
-            "14": 7.847,
-            "15": 7.99077,
-            "16": 8.05015,
-            "17": 8.00064,
-            "18": 7.73104,
-            "19": 8.11087,
-            "20": 7.82933,
-            "21": 7.52501,
-            "22": 7.49916,
-            "23": 7.36982,
-            "24": 7.37235,
-            "25": 7.61578,
-            "26": 7.02029,
-            "27": 7.56014,
-            "28": 7.2681,
-            "29": 7.44399,
-            "30": 7.58618,
-            "31": 7.32468,
-            "32": 7.50596,
-            "33": 7.5715,
-            "34": 7.63581,
-            "35": 7.15224,
-            "36": 7.01784,
-            "37": 7.35163,
-            "38": 7.12551,
-            "39": 7.48656,
-            "40": 7.47408,
-            "41": 7.42096,
-            "42": 7.17595,
-            "43": 7.16059,
-            "44": 7.34289,
-            "45": 7.11969,
-            "46": 6.82753,
-            "47": 7.23525,
-            "48": 7.08042,
-            "49": 7.51043,
-            "50": 6.9735
+            "1": 11.07648,
+            "2": 11.07404,
+            "3": 10.53854,
+            "4": 10.09813,
+            "5": 9.81166,
+            "6": 10.09741,
+            "7": 9.79481,
+            "8": 9.0642,
+            "9": 8.86016,
+            "10": 9.34039,
+            "11": 8.51318,
+            "12": 8.59467,
+            "13": 8.5292,
+            "14": 7.95757,
+            "15": 8.06962,
+            "16": 8.11802,
+            "17": 8.06993,
+            "18": 7.80587,
+            "19": 8.19192,
+            "20": 7.8906,
+            "21": 7.57063,
+            "22": 7.55091,
+            "23": 7.41606,
+            "24": 7.42454,
+            "25": 7.65274,
+            "26": 7.05583,
+            "27": 7.59747,
+            "28": 7.29984,
+            "29": 7.472,
+            "30": 7.61908,
+            "31": 7.35179,
+            "32": 7.52979,
+            "33": 7.59161,
+            "34": 7.66287,
+            "35": 7.17383,
+            "36": 7.04133,
+            "37": 7.37081,
+            "38": 7.1443,
+            "39": 7.50879,
+            "40": 7.48921,
+            "41": 7.43802,
+            "42": 7.19405,
+            "43": 7.17581,
+            "44": 7.35785,
+            "45": 7.13985,
+            "46": 6.84014,
+            "47": 7.25094,
+            "48": 7.09407,
+            "49": 7.52321,
+            "50": 6.98987
         }
     },
     "iteration-time": {
@@ -289,56 +289,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 69.29797,
-            "2": 1.7261,
-            "3": 1.40981,
-            "4": 2.16562,
-            "5": 1.7862,
-            "6": 1.7469,
-            "7": 1.96688,
-            "8": 1.97301,
-            "9": 1.74665,
-            "10": 1.69613,
-            "11": 1.02979,
-            "12": 1.02408,
-            "13": 1.03261,
-            "14": 1.02432,
-            "15": 1.0529,
-            "16": 1.04491,
-            "17": 1.03693,
-            "18": 1.03399,
-            "19": 1.03627,
-            "20": 1.02284,
-            "21": 1.01667,
-            "22": 1.02932,
-            "23": 1.03591,
-            "24": 1.03466,
-            "25": 1.03149,
-            "26": 1.03165,
-            "27": 1.02342,
-            "28": 1.03777,
-            "29": 1.04061,
-            "30": 1.05641,
-            "31": 1.02382,
-            "32": 1.01775,
-            "33": 1.03039,
-            "34": 1.03693,
-            "35": 1.03153,
-            "36": 1.02699,
-            "37": 1.02756,
-            "38": 1.02919,
-            "39": 1.01773,
-            "40": 1.03491,
-            "41": 1.03152,
-            "42": 1.03035,
-            "43": 1.0221,
-            "44": 1.05201,
-            "45": 1.02579,
-            "46": 1.02798,
-            "47": 1.03857,
-            "48": 1.02772,
-            "49": 1.0408,
-            "50": 1.03745
+            "1": 93.39829,
+            "2": 1.82958,
+            "3": 1.3241,
+            "4": 2.19661,
+            "5": 2.13156,
+            "6": 1.75452,
+            "7": 2.08539,
+            "8": 1.58016,
+            "9": 1.60816,
+            "10": 1.03407,
+            "11": 1.01797,
+            "12": 1.0168,
+            "13": 1.01666,
+            "14": 1.0748,
+            "15": 1.04137,
+            "16": 1.05864,
+            "17": 1.05961,
+            "18": 1.03233,
+            "19": 1.02728,
+            "20": 1.02917,
+            "21": 1.04313,
+            "22": 1.03054,
+            "23": 1.0313,
+            "24": 1.03789,
+            "25": 1.04414,
+            "26": 1.05561,
+            "27": 1.03361,
+            "28": 1.03142,
+            "29": 1.02437,
+            "30": 1.02195,
+            "31": 1.0172,
+            "32": 1.03318,
+            "33": 1.03742,
+            "34": 1.03628,
+            "35": 1.03575,
+            "36": 1.05127,
+            "37": 1.03273,
+            "38": 1.03381,
+            "39": 1.02923,
+            "40": 1.02986,
+            "41": 1.03249,
+            "42": 1.033,
+            "43": 1.03169,
+            "44": 1.03818,
+            "45": 1.02736,
+            "46": 1.02698,
+            "47": 1.03158,
+            "48": 1.02471,
+            "49": 1.03674,
+            "50": 1.0291
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgxh100_coreweave.json
index 58eb3fc16cd..8cea616921e 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgxh100_coreweave.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgxh100_coreweave.json
@@ -4,56 +4,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 10.95004,
-            "2": 10.9521,
-            "3": 10.5115,
-            "4": 9.96454,
-            "5": 9.93941,
-            "6": 9.67273,
-            "7": 10.20975,
-            "8": 9.49716,
-            "9": 9.55902,
-            "10": 9.79742,
-            "11": 9.30109,
-            "12": 9.40483,
-            "13": 9.39546,
-            "14": 8.84681,
-            "15": 9.02444,
-            "16": 9.07121,
-            "17": 9.04574,
-            "18": 8.75678,
-            "19": 9.18159,
-            "20": 8.8595,
-            "21": 8.53503,
-            "22": 8.55182,
-            "23": 8.42441,
-            "24": 8.37608,
-            "25": 8.64304,
-            "26": 7.97393,
-            "27": 8.56806,
-            "28": 8.19764,
-            "29": 8.3928,
-            "30": 8.67283,
-            "31": 8.289,
-            "32": 8.43572,
-            "33": 8.5568,
-            "34": 8.66018,
-            "35": 8.07934,
-            "36": 7.94976,
-            "37": 8.29565,
-            "38": 7.98044,
-            "39": 8.39201,
-            "40": 8.35513,
-            "41": 8.31876,
-            "42": 8.0583,
-            "43": 8.03283,
-            "44": 8.24243,
-            "45": 8.10277,
-            "46": 7.61696,
-            "47": 8.15273,
-            "48": 8.00569,
-            "49": 8.38688,
-            "50": 7.81491
+            "1": 10.94971,
+            "2": 10.95163,
+            "3": 10.51641,
+            "4": 9.9652,
+            "5": 9.94116,
+            "6": 9.67394,
+            "7": 10.19887,
+            "8": 9.50035,
+            "9": 9.54982,
+            "10": 9.79667,
+            "11": 9.30128,
+            "12": 9.40566,
+            "13": 9.39438,
+            "14": 8.84572,
+            "15": 9.02231,
+            "16": 9.06973,
+            "17": 9.04712,
+            "18": 8.75662,
+            "19": 9.18074,
+            "20": 8.86175,
+            "21": 8.53558,
+            "22": 8.55288,
+            "23": 8.42513,
+            "24": 8.37683,
+            "25": 8.64426,
+            "26": 7.9756,
+            "27": 8.57026,
+            "28": 8.1987,
+            "29": 8.39406,
+            "30": 8.67631,
+            "31": 8.29096,
+            "32": 8.43692,
+            "33": 8.55897,
+            "34": 8.66123,
+            "35": 8.08,
+            "36": 7.95214,
+            "37": 8.2979,
+            "38": 7.98177,
+            "39": 8.39281,
+            "40": 8.35852,
+            "41": 8.32006,
+            "42": 8.05954,
+            "43": 8.03381,
+            "44": 8.24236,
+            "45": 8.1025,
+            "46": 7.61814,
+            "47": 8.15364,
+            "48": 8.00693,
+            "49": 8.38704,
+            "50": 7.81592
         }
     },
     "num-zeros": {
@@ -61,56 +61,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 19403624.0,
-            "2": 19274194.0,
-            "3": 19372760.0,
-            "4": 86525248.0,
-            "5": 148575568.0,
-            "6": 145226704.0,
-            "7": 171879984.0,
-            "8": 195785248.0,
-            "9": 164124752.0,
-            "10": 167684736.0,
-            "11": 221077344.0,
-            "12": 200384224.0,
-            "13": 248872528.0,
-            "14": 211169424.0,
-            "15": 214304608.0,
-            "16": 216075632.0,
-            "17": 267845984.0,
-            "18": 170470336.0,
-            "19": 176865072.0,
-            "20": 187955392.0,
-            "21": 225750704.0,
-            "22": 247396816.0,
-            "23": 211643856.0,
-            "24": 205638464.0,
-            "25": 277022272.0,
-            "26": 291562304.0,
-            "27": 225789840.0,
-            "28": 288202368.0,
-            "29": 198390384.0,
-            "30": 213302208.0,
-            "31": 227204752.0,
-            "32": 271112416.0,
-            "33": 231840432.0,
-            "34": 203575536.0,
-            "35": 191152368.0,
-            "36": 222566928.0,
-            "37": 177810112.0,
-            "38": 228708544.0,
-            "39": 211168784.0,
-            "40": 215603968.0,
-            "41": 200089440.0,
-            "42": 228529888.0,
-            "43": 198782848.0,
-            "44": 141902272.0,
-            "45": 181922816.0,
-            "46": 115369856.0,
-            "47": 170214176.0,
-            "48": 137292832.0,
-            "49": 97654936.0,
-            "50": 160979632.0
+            "1": 19403704.0,
+            "2": 19274216.0,
+            "3": 22517470.0,
+            "4": 83429816.0,
+            "5": 139167728.0,
+            "6": 138921280.0,
+            "7": 173470304.0,
+            "8": 200511856.0,
+            "9": 165696320.0,
+            "10": 166120112.0,
+            "11": 213254416.0,
+            "12": 187847360.0,
+            "13": 231586656.0,
+            "14": 226879072.0,
+            "15": 219025920.0,
+            "16": 205179664.0,
+            "17": 280450432.0,
+            "18": 181477792.0,
+            "19": 191026096.0,
+            "20": 186395632.0,
+            "21": 233632576.0,
+            "22": 231696832.0,
+            "23": 216390688.0,
+            "24": 215133760.0,
+            "25": 233079504.0,
+            "26": 244437920.0,
+            "27": 222637584.0,
+            "28": 278773952.0,
+            "29": 253409264.0,
+            "30": 240036736.0,
+            "31": 236599008.0,
+            "32": 205066624.0,
+            "33": 263303312.0,
+            "34": 200444544.0,
+            "35": 199033824.0,
+            "36": 243001216.0,
+            "37": 151181872.0,
+            "38": 175301280.0,
+            "39": 219001024.0,
+            "40": 220307936.0,
+            "41": 217385856.0,
+            "42": 230074176.0,
+            "43": 208226784.0,
+            "44": 148172720.0,
+            "45": 141103744.0,
+            "46": 132664976.0,
+            "47": 179619392.0,
+            "48": 118381144.0,
+            "49": 86643984.0,
+            "50": 113798320.0
         }
     },
     "mem-allocated-bytes": {
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 4883602432.0,
-            "2": 4885017088.0,
-            "3": 4882657792.0,
-            "4": 4883046912.0,
-            "5": 4883725824.0,
-            "6": 4883713536.0,
-            "7": 4883040768.0,
-            "8": 4883273216.0,
-            "9": 4882952704.0,
-            "10": 4885949952.0,
-            "11": 4883990016.0,
-            "12": 4887679488.0,
-            "13": 4884011520.0,
-            "14": 4882899456.0,
-            "15": 4883515904.0,
-            "16": 4883990016.0,
-            "17": 4883410432.0,
-            "18": 4883673600.0,
-            "19": 4882903552.0,
-            "20": 4884541952.0,
-            "21": 4883138048.0,
-            "22": 4883247616.0,
-            "23": 4883839488.0,
-            "24": 4885058048.0,
-            "25": 4882676224.0,
-            "26": 4884058624.0,
-            "27": 4884724224.0,
-            "28": 4884874752.0,
-            "29": 4883127808.0,
-            "30": 4883252736.0,
-            "31": 4882955776.0,
-            "32": 4885190144.0,
-            "33": 4883845632.0,
-            "34": 4884392448.0,
-            "35": 4883083776.0,
-            "36": 4883851776.0,
-            "37": 4885246464.0,
-            "38": 4882680320.0,
-            "39": 4884296192.0,
-            "40": 4884689408.0,
-            "41": 4882836992.0,
-            "42": 4883972608.0,
-            "43": 4884519424.0,
-            "44": 4883354112.0,
-            "45": 4883495424.0,
-            "46": 4882788864.0,
-            "47": 4883144192.0,
-            "48": 4883688960.0,
-            "49": 4884182528.0,
-            "50": 4885279232.0
+            "1": 4883287040.0,
+            "2": 4883441152.0,
+            "3": 4881697280.0,
+            "4": 4883730944.0,
+            "5": 4882556416.0,
+            "6": 4882616832.0,
+            "7": 4883438080.0,
+            "8": 4881568256.0,
+            "9": 4883173888.0,
+            "10": 4882272768.0,
+            "11": 4883676672.0,
+            "12": 4881393152.0,
+            "13": 4883141120.0,
+            "14": 4883697152.0,
+            "15": 4882622976.0,
+            "16": 4881830400.0,
+            "17": 4881658368.0,
+            "18": 4881863168.0,
+            "19": 4883804672.0,
+            "20": 4881795584.0,
+            "21": 4883333632.0,
+            "22": 4882194944.0,
+            "23": 4882084352.0,
+            "24": 4884065792.0,
+            "25": 4881804800.0,
+            "26": 4883596800.0,
+            "27": 4883047936.0,
+            "28": 4882476544.0,
+            "29": 4883087872.0,
+            "30": 4882151936.0,
+            "31": 4882625024.0,
+            "32": 4883104256.0,
+            "33": 4882526720.0,
+            "34": 4882292224.0,
+            "35": 4882485760.0,
+            "36": 4882867712.0,
+            "37": 4882634240.0,
+            "38": 4882610688.0,
+            "39": 4881474048.0,
+            "40": 4881961472.0,
+            "41": 4882663936.0,
+            "42": 4881860096.0,
+            "43": 4881499648.0,
+            "44": 4883392000.0,
+            "45": 4882392576.0,
+            "46": 4882815488.0,
+            "47": 4883113472.0,
+            "48": 4882158080.0,
+            "49": 4881207808.0,
+            "50": 4881588736.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 41210470400.0,
-            "2": 41210470400.0,
-            "3": 41210470400.0,
-            "4": 41210470400.0,
-            "5": 41210470400.0,
-            "6": 41210470400.0,
-            "7": 41210470400.0,
-            "8": 41210470400.0,
-            "9": 41210470400.0,
-            "10": 41210470400.0,
-            "11": 41210470400.0,
-            "12": 41210470400.0,
-            "13": 41210470400.0,
-            "14": 41210470400.0,
-            "15": 41210470400.0,
-            "16": 41210470400.0,
-            "17": 41210470400.0,
-            "18": 41210470400.0,
-            "19": 41210470400.0,
-            "20": 41210470400.0,
-            "21": 41210470400.0,
-            "22": 41210470400.0,
-            "23": 41210470400.0,
-            "24": 41210470400.0,
-            "25": 41210470400.0,
-            "26": 41210470400.0,
-            "27": 41210470400.0,
-            "28": 41210470400.0,
-            "29": 41210470400.0,
-            "30": 41210470400.0,
-            "31": 41210470400.0,
-            "32": 41210470400.0,
-            "33": 41210470400.0,
-            "34": 41210470400.0,
-            "35": 41210470400.0,
-            "36": 41210470400.0,
-            "37": 41210470400.0,
-            "38": 41210470400.0,
-            "39": 41210470400.0,
-            "40": 41210470400.0,
-            "41": 41210470400.0,
-            "42": 41210470400.0,
-            "43": 41210470400.0,
-            "44": 41210470400.0,
-            "45": 41210470400.0,
-            "46": 41210470400.0,
-            "47": 41210470400.0,
-            "48": 41210470400.0,
-            "49": 41210470400.0,
-            "50": 41210470400.0
+            "1": 41208348672.0,
+            "2": 41208348672.0,
+            "3": 41208348672.0,
+            "4": 41208348672.0,
+            "5": 41208348672.0,
+            "6": 41208348672.0,
+            "7": 41208348672.0,
+            "8": 41208348672.0,
+            "9": 41208348672.0,
+            "10": 41208348672.0,
+            "11": 41208348672.0,
+            "12": 41208348672.0,
+            "13": 41208348672.0,
+            "14": 41208348672.0,
+            "15": 41208348672.0,
+            "16": 41208348672.0,
+            "17": 41208348672.0,
+            "18": 41208348672.0,
+            "19": 41208348672.0,
+            "20": 41208348672.0,
+            "21": 41208348672.0,
+            "22": 41208348672.0,
+            "23": 41208348672.0,
+            "24": 41208348672.0,
+            "25": 41208348672.0,
+            "26": 41208348672.0,
+            "27": 41208348672.0,
+            "28": 41208348672.0,
+            "29": 41208348672.0,
+            "30": 41208348672.0,
+            "31": 41208348672.0,
+            "32": 41208348672.0,
+            "33": 41208348672.0,
+            "34": 41208348672.0,
+            "35": 41208348672.0,
+            "36": 41208348672.0,
+            "37": 41208348672.0,
+            "38": 41208348672.0,
+            "39": 41208348672.0,
+            "40": 41208348672.0,
+            "41": 41208348672.0,
+            "42": 41208348672.0,
+            "43": 41208348672.0,
+            "44": 41208348672.0,
+            "45": 41208348672.0,
+            "46": 41208348672.0,
+            "47": 41208348672.0,
+            "48": 41208348672.0,
+            "49": 41208348672.0,
+            "50": 41208348672.0
         }
     },
     "iteration-time": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 86.8085,
-            "2": 1.10913,
-            "3": 0.99097,
-            "4": 0.89412,
-            "5": 1.25997,
-            "6": 0.98162,
-            "7": 0.98318,
-            "8": 1.13296,
-            "9": 0.88126,
-            "10": 0.8633,
-            "11": 2.2744,
-            "12": 4.5393,
-            "13": 3.22763,
-            "14": 1.64923,
-            "15": 0.86595,
-            "16": 0.86575,
-            "17": 0.85272,
-            "18": 0.85454,
-            "19": 0.85281,
-            "20": 0.87018,
-            "21": 0.84654,
-            "22": 0.8494,
-            "23": 0.84882,
-            "24": 0.84482,
-            "25": 0.85311,
-            "26": 0.84678,
-            "27": 0.84096,
-            "28": 0.8412,
-            "29": 0.84156,
-            "30": 0.84475,
-            "31": 0.84747,
-            "32": 0.85058,
-            "33": 0.84977,
-            "34": 0.8479,
-            "35": 0.85234,
-            "36": 0.85012,
-            "37": 0.85087,
-            "38": 0.84594,
-            "39": 0.84558,
-            "40": 0.84807,
-            "41": 0.84183,
-            "42": 0.8439,
-            "43": 0.84221,
-            "44": 0.84248,
-            "45": 0.84257,
-            "46": 0.83922,
-            "47": 0.84311,
-            "48": 0.84159,
-            "49": 0.84011,
-            "50": 0.8353
+            "1": 89.10928,
+            "2": 1.08143,
+            "3": 0.94222,
+            "4": 0.89675,
+            "5": 1.34524,
+            "6": 1.06972,
+            "7": 1.00314,
+            "8": 1.04961,
+            "9": 0.86611,
+            "10": 0.86248,
+            "11": 0.98739,
+            "12": 0.86057,
+            "13": 0.86777,
+            "14": 0.85834,
+            "15": 0.8559,
+            "16": 0.85522,
+            "17": 0.84644,
+            "18": 0.85748,
+            "19": 0.85218,
+            "20": 0.85342,
+            "21": 0.84029,
+            "22": 0.84342,
+            "23": 0.84297,
+            "24": 0.83925,
+            "25": 0.8439,
+            "26": 0.85696,
+            "27": 0.83981,
+            "28": 0.84643,
+            "29": 0.8433,
+            "30": 0.86234,
+            "31": 0.85636,
+            "32": 0.84184,
+            "33": 0.84501,
+            "34": 0.84316,
+            "35": 0.83806,
+            "36": 0.84143,
+            "37": 0.84447,
+            "38": 0.84137,
+            "39": 0.84133,
+            "40": 0.84321,
+            "41": 0.84019,
+            "42": 0.84164,
+            "43": 0.83741,
+            "44": 0.84203,
+            "45": 0.83966,
+            "46": 0.84109,
+            "47": 0.83945,
+            "48": 0.84001,
+            "49": 0.84194,
+            "50": 0.83578
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgx_h100.json
index 1ba051f4889..0835e95b926 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgx_h100.json
@@ -1 +1,142 @@
-{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.83281, "5": 10.85975, "10": 10.79613, "15": 10.80527, "20": 10.72502, "25": 10.53599, "30": 10.3571, "35": 10.24605, "40": 10.05992, "45": 9.7836, "50": 9.8722, "55": 9.83189, "60": 9.45075, "65": 8.89679, "70": 9.71414, "75": 9.39795, "80": 9.38169, "85": 9.58585, "90": 9.7999, "95": 9.50528, "100": 9.37224}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 27013.0, "5": 31736.0, "10": 25785.0, "15": 30383.0, "20": 28435.0, "25": 27493.0, "30": 30329.0, "35": 31750.0, "40": 34279.0, "45": 34634.0, "50": 38531.0, "55": 37465.0, "60": 40172.0, "65": 40624.0, "70": 44852.0, "75": 39231.0, "80": 130535.0, "85": 123250.0, "90": 47793.0, "95": 167340.0, "100": 163328.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 814390272.0, "5": 814420480.0, "10": 814376448.0, "15": 814376960.0, "20": 814373376.0, "25": 814321152.0, "30": 814306304.0, "35": 814292992.0, "40": 814288896.0, "45": 814272000.0, "50": 814262272.0, "55": 814258688.0, "60": 814268416.0, "65": 814220800.0, "70": 814266880.0, "75": 814318080.0, "80": 814285312.0, "85": 814289408.0, "90": 814315520.0, "95": 814320128.0, "100": 814311424.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 2111314944.0, "5": 2370209280.0, "10": 2370209280.0, "15": 2370209280.0, "20": 2370209280.0, "25": 2370209280.0, "30": 2370209280.0, "35": 2370209280.0, "40": 2370209280.0, "45": 2370209280.0, "50": 2370209280.0, "55": 2370209280.0, "60": 2370209280.0, "65": 2370209280.0, "70": 2370209280.0, "75": 2370209280.0, "80": 2370209280.0, "85": 2370209280.0, "90": 2370209280.0, "95": 2370209280.0, "100": 2370209280.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 20.98318, "5": 0.79797, "10": 0.74028, "15": 0.67279, "20": 0.62948, "25": 0.61132, "30": 0.61547, "35": 0.6152, "40": 0.60421, "45": 0.59124, "50": 0.5891, "55": 0.57048, "60": 0.54799, "65": 0.52185, "70": 0.51195, "75": 0.50105, "80": 0.4628, "85": 0.45992, "90": 0.46498, "95": 0.4599, "100": 0.42568}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 5,
+        "values": {
+            "1": 10.82922,
+            "5": 10.85652,
+            "10": 10.79298,
+            "15": 10.8067,
+            "20": 10.72654,
+            "25": 10.53282,
+            "30": 10.35802,
+            "35": 10.24483,
+            "40": 10.05533,
+            "45": 9.77951,
+            "50": 9.86874,
+            "55": 9.82995,
+            "60": 9.449,
+            "65": 8.89366,
+            "70": 9.71127,
+            "75": 9.39451,
+            "80": 9.38198,
+            "85": 9.58333,
+            "90": 9.79944,
+            "95": 9.50213,
+            "100": 9.37131
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 5,
+        "values": {
+            "1": 27245.0,
+            "5": 31369.0,
+            "10": 25870.0,
+            "15": 29830.0,
+            "20": 28243.0,
+            "25": 27636.0,
+            "30": 30387.0,
+            "35": 31488.0,
+            "40": 34779.0,
+            "45": 35158.0,
+            "50": 38234.0,
+            "55": 37133.0,
+            "60": 40450.0,
+            "65": 40947.0,
+            "70": 43436.0,
+            "75": 39925.0,
+            "80": 51863.0,
+            "85": 2145177.0,
+            "90": 51330.0,
+            "95": 45247.0,
+            "100": 163741.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 5,
+        "values": {
+            "1": 787511296.0,
+            "5": 787542016.0,
+            "10": 787500032.0,
+            "15": 787499008.0,
+            "20": 787500032.0,
+            "25": 787446272.0,
+            "30": 787429888.0,
+            "35": 787413504.0,
+            "40": 787409920.0,
+            "45": 787394560.0,
+            "50": 787384320.0,
+            "55": 787383808.0,
+            "60": 787389952.0,
+            "65": 787346432.0,
+            "70": 787387904.0,
+            "75": 787437568.0,
+            "80": 787405312.0,
+            "85": 787407360.0,
+            "90": 787441664.0,
+            "95": 787445248.0,
+            "100": 787433472.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 5,
+        "values": {
+            "1": 2465793024.0,
+            "5": 2492764160.0,
+            "10": 2492764160.0,
+            "15": 2492764160.0,
+            "20": 2492764160.0,
+            "25": 2492764160.0,
+            "30": 2492764160.0,
+            "35": 2492764160.0,
+            "40": 2492764160.0,
+            "45": 2492764160.0,
+            "50": 2492764160.0,
+            "55": 2492764160.0,
+            "60": 2492764160.0,
+            "65": 2492764160.0,
+            "70": 2492764160.0,
+            "75": 2492764160.0,
+            "80": 2492764160.0,
+            "85": 2492764160.0,
+            "90": 2492764160.0,
+            "95": 2492764160.0,
+            "100": 2492764160.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 5,
+        "values": {
+            "1": 9.68104,
+            "5": 0.32859,
+            "10": 0.30772,
+            "15": 0.31234,
+            "20": 0.29254,
+            "25": 0.29296,
+            "30": 0.31344,
+            "35": 0.31026,
+            "40": 0.30514,
+            "45": 0.30481,
+            "50": 0.30324,
+            "55": 0.29929,
+            "60": 0.30103,
+            "65": 0.32008,
+            "70": 0.31307,
+            "75": 0.2933,
+            "80": 0.29351,
+            "85": 0.29283,
+            "90": 0.29375,
+            "95": 0.29458,
+            "100": 0.29103
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..7e299df5257
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.82922,
+            "2": 10.84163,
+            "3": 10.84245,
+            "4": 10.82,
+            "5": 10.85652,
+            "6": 10.86906,
+            "7": 10.83778,
+            "8": 10.84312,
+            "9": 10.84423,
+            "10": 10.79298,
+            "11": 10.86697,
+            "12": 10.86875,
+            "13": 10.86207,
+            "14": 10.86919,
+            "15": 10.8067,
+            "16": 10.8057,
+            "17": 10.77686,
+            "18": 10.79541,
+            "19": 10.78384,
+            "20": 10.72654,
+            "21": 10.69491,
+            "22": 10.54462,
+            "23": 10.6993,
+            "24": 10.58151,
+            "25": 10.53282,
+            "26": 10.58817,
+            "27": 10.601,
+            "28": 10.57563,
+            "29": 10.58022,
+            "30": 10.35802,
+            "31": 10.08769,
+            "32": 10.44466,
+            "33": 10.4477,
+            "34": 10.18704,
+            "35": 10.24483,
+            "36": 10.19713,
+            "37": 10.32294,
+            "38": 10.17101,
+            "39": 10.37026,
+            "40": 10.05533,
+            "41": 10.09491,
+            "42": 10.17971,
+            "43": 9.78263,
+            "44": 9.91346,
+            "45": 9.77951,
+            "46": 9.75648,
+            "47": 10.09647,
+            "48": 9.80391,
+            "49": 9.46649,
+            "50": 9.86874,
+            "51": 9.79428,
+            "52": 9.68303,
+            "53": 10.03314,
+            "54": 9.9113,
+            "55": 9.82995,
+            "56": 9.57839,
+            "57": 9.42377,
+            "58": 9.80549,
+            "59": 9.53292,
+            "60": 9.449,
+            "61": 9.65293,
+            "62": 9.95672,
+            "63": 9.33775,
+            "64": 9.74194,
+            "65": 8.89366,
+            "66": 9.67317,
+            "67": 9.33002,
+            "68": 9.76517,
+            "69": 9.76336,
+            "70": 9.71127,
+            "71": 9.59511,
+            "72": 9.54797,
+            "73": 9.47124,
+            "74": 8.89297,
+            "75": 9.39451,
+            "76": 9.04721,
+            "77": 10.04318,
+            "78": 9.70313,
+            "79": 9.35169,
+            "80": 9.38198,
+            "81": 9.45146,
+            "82": 9.67546,
+            "83": 9.27658,
+            "84": 9.39241,
+            "85": 9.58333,
+            "86": 9.04518,
+            "87": 9.56487,
+            "88": 9.72459,
+            "89": 9.57019,
+            "90": 9.79944,
+            "91": 9.30737,
+            "92": 9.3313,
+            "93": 9.04109,
+            "94": 8.80259,
+            "95": 9.50213,
+            "96": 9.5021,
+            "97": 9.28183,
+            "98": 9.64883,
+            "99": 8.8594,
+            "100": 9.37131
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 27245.0,
+            "2": 28958.0,
+            "3": 29464.0,
+            "4": 28046.0,
+            "5": 31369.0,
+            "6": 33287.0,
+            "7": 31200.0,
+            "8": 26921.0,
+            "9": 30008.0,
+            "10": 25870.0,
+            "11": 33681.0,
+            "12": 30344.0,
+            "13": 32737.0,
+            "14": 33315.0,
+            "15": 29830.0,
+            "16": 32475.0,
+            "17": 30747.0,
+            "18": 30381.0,
+            "19": 31032.0,
+            "20": 28243.0,
+            "21": 29224.0,
+            "22": 27340.0,
+            "23": 34119.0,
+            "24": 29049.0,
+            "25": 27636.0,
+            "26": 30662.0,
+            "27": 32009.0,
+            "28": 33355.0,
+            "29": 34714.0,
+            "30": 30387.0,
+            "31": 28212.0,
+            "32": 33411.0,
+            "33": 34696.0,
+            "34": 30053.0,
+            "35": 31488.0,
+            "36": 32943.0,
+            "37": 35829.0,
+            "38": 33740.0,
+            "39": 37632.0,
+            "40": 34779.0,
+            "41": 33958.0,
+            "42": 36396.0,
+            "43": 34088.0,
+            "44": 34090.0,
+            "45": 35158.0,
+            "46": 36174.0,
+            "47": 39772.0,
+            "48": 36516.0,
+            "49": 36733.0,
+            "50": 38234.0,
+            "51": 38608.0,
+            "52": 37030.0,
+            "53": 42442.0,
+            "54": 40944.0,
+            "55": 37133.0,
+            "56": 41001.0,
+            "57": 37524.0,
+            "58": 42317.0,
+            "59": 40804.0,
+            "60": 40450.0,
+            "61": 41478.0,
+            "62": 39766.0,
+            "63": 37941.0,
+            "64": 42197.0,
+            "65": 40947.0,
+            "66": 44094.0,
+            "67": 41958.0,
+            "68": 40060.0,
+            "69": 42189.0,
+            "70": 43436.0,
+            "71": 42748.0,
+            "72": 44280.0,
+            "73": 47478.0,
+            "74": 41456.0,
+            "75": 39925.0,
+            "76": 43490.0,
+            "77": 45636.0,
+            "78": 2141470.0,
+            "79": 46055.0,
+            "80": 51863.0,
+            "81": 151341.0,
+            "82": 49835.0,
+            "83": 143360.0,
+            "84": 2141546.0,
+            "85": 2145177.0,
+            "86": 132114.0,
+            "87": 2147022.0,
+            "88": 59899.0,
+            "89": 162883.0,
+            "90": 51330.0,
+            "91": 2141901.0,
+            "92": 44946.0,
+            "93": 138194.0,
+            "94": 2145772.0,
+            "95": 45247.0,
+            "96": 135045.0,
+            "97": 53170.0,
+            "98": 168576.0,
+            "99": 2141797.0,
+            "100": 163741.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 787516416.0,
+            "2": 787540992.0,
+            "3": 787524096.0,
+            "4": 787512320.0,
+            "5": 787547136.0,
+            "6": 787537920.0,
+            "7": 787512832.0,
+            "8": 787524608.0,
+            "9": 787528192.0,
+            "10": 787505152.0,
+            "11": 787522048.0,
+            "12": 787520000.0,
+            "13": 787529728.0,
+            "14": 787529216.0,
+            "15": 787504128.0,
+            "16": 787513344.0,
+            "17": 787503104.0,
+            "18": 787489280.0,
+            "19": 787514880.0,
+            "20": 787505152.0,
+            "21": 787479552.0,
+            "22": 787486208.0,
+            "23": 787478528.0,
+            "24": 787486208.0,
+            "25": 787451392.0,
+            "26": 787482112.0,
+            "27": 787470848.0,
+            "28": 787450368.0,
+            "29": 787458048.0,
+            "30": 787435008.0,
+            "31": 787406848.0,
+            "32": 787424256.0,
+            "33": 787435520.0,
+            "34": 787426304.0,
+            "35": 787418624.0,
+            "36": 787436544.0,
+            "37": 787428352.0,
+            "38": 787436544.0,
+            "39": 787417600.0,
+            "40": 787415040.0,
+            "41": 787405824.0,
+            "42": 787415040.0,
+            "43": 787367936.0,
+            "44": 787392512.0,
+            "45": 787399680.0,
+            "46": 787355136.0,
+            "47": 787411456.0,
+            "48": 787354112.0,
+            "49": 787374080.0,
+            "50": 787389440.0,
+            "51": 787375616.0,
+            "52": 787383808.0,
+            "53": 787379712.0,
+            "54": 787384832.0,
+            "55": 787388928.0,
+            "56": 787388928.0,
+            "57": 787351040.0,
+            "58": 787382784.0,
+            "59": 787374080.0,
+            "60": 787395072.0,
+            "61": 787405312.0,
+            "62": 787405824.0,
+            "63": 787373056.0,
+            "64": 787388928.0,
+            "65": 787351552.0,
+            "66": 787386880.0,
+            "67": 787392000.0,
+            "68": 787399168.0,
+            "69": 787383296.0,
+            "70": 787393024.0,
+            "71": 787406848.0,
+            "72": 787400704.0,
+            "73": 787401216.0,
+            "74": 787403264.0,
+            "75": 787442688.0,
+            "76": 787444736.0,
+            "77": 787445760.0,
+            "78": 787395072.0,
+            "79": 787430400.0,
+            "80": 787410432.0,
+            "81": 787412992.0,
+            "82": 787427840.0,
+            "83": 787428864.0,
+            "84": 787412480.0,
+            "85": 787412480.0,
+            "86": 787394560.0,
+            "87": 787452928.0,
+            "88": 787414528.0,
+            "89": 787404800.0,
+            "90": 787446784.0,
+            "91": 787446272.0,
+            "92": 787446784.0,
+            "93": 787430400.0,
+            "94": 787440128.0,
+            "95": 787450368.0,
+            "96": 787454976.0,
+            "97": 787427328.0,
+            "98": 787475968.0,
+            "99": 787419136.0,
+            "100": 787438592.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2479493120.0,
+            "2": 2485449728.0,
+            "3": 2487249408.0,
+            "4": 2487249408.0,
+            "5": 2495991808.0,
+            "6": 2495991808.0,
+            "7": 2495991808.0,
+            "8": 2495991808.0,
+            "9": 2495991808.0,
+            "10": 2495991808.0,
+            "11": 2495991808.0,
+            "12": 2495991808.0,
+            "13": 2495991808.0,
+            "14": 2495991808.0,
+            "15": 2495991808.0,
+            "16": 2495991808.0,
+            "17": 2495991808.0,
+            "18": 2495991808.0,
+            "19": 2495991808.0,
+            "20": 2495991808.0,
+            "21": 2495991808.0,
+            "22": 2495991808.0,
+            "23": 2495991808.0,
+            "24": 2495991808.0,
+            "25": 2495991808.0,
+            "26": 2495991808.0,
+            "27": 2495991808.0,
+            "28": 2495991808.0,
+            "29": 2495991808.0,
+            "30": 2495991808.0,
+            "31": 2495991808.0,
+            "32": 2495991808.0,
+            "33": 2495991808.0,
+            "34": 2495991808.0,
+            "35": 2495991808.0,
+            "36": 2495991808.0,
+            "37": 2495991808.0,
+            "38": 2495991808.0,
+            "39": 2495991808.0,
+            "40": 2495991808.0,
+            "41": 2495991808.0,
+            "42": 2495991808.0,
+            "43": 2495991808.0,
+            "44": 2495991808.0,
+            "45": 2495991808.0,
+            "46": 2495991808.0,
+            "47": 2495991808.0,
+            "48": 2495991808.0,
+            "49": 2495991808.0,
+            "50": 2495991808.0,
+            "51": 2495991808.0,
+            "52": 2495991808.0,
+            "53": 2495991808.0,
+            "54": 2495991808.0,
+            "55": 2495991808.0,
+            "56": 2495991808.0,
+            "57": 2495991808.0,
+            "58": 2495991808.0,
+            "59": 2495991808.0,
+            "60": 2495991808.0,
+            "61": 2495991808.0,
+            "62": 2495991808.0,
+            "63": 2495991808.0,
+            "64": 2495991808.0,
+            "65": 2495991808.0,
+            "66": 2495991808.0,
+            "67": 2495991808.0,
+            "68": 2495991808.0,
+            "69": 2495991808.0,
+            "70": 2495991808.0,
+            "71": 2495991808.0,
+            "72": 2495991808.0,
+            "73": 2495991808.0,
+            "74": 2495991808.0,
+            "75": 2495991808.0,
+            "76": 2495991808.0,
+            "77": 2495991808.0,
+            "78": 2495991808.0,
+            "79": 2495991808.0,
+            "80": 2495991808.0,
+            "81": 2495991808.0,
+            "82": 2495991808.0,
+            "83": 2495991808.0,
+            "84": 2495991808.0,
+            "85": 2495991808.0,
+            "86": 2495991808.0,
+            "87": 2495991808.0,
+            "88": 2495991808.0,
+            "89": 2495991808.0,
+            "90": 2495991808.0,
+            "91": 2495991808.0,
+            "92": 2495991808.0,
+            "93": 2495991808.0,
+            "94": 2495991808.0,
+            "95": 2495991808.0,
+            "96": 2495991808.0,
+            "97": 2495991808.0,
+            "98": 2495991808.0,
+            "99": 2495991808.0,
+            "100": 2495991808.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 12.11313,
+            "2": 0.4805,
+            "3": 0.36965,
+            "4": 0.36695,
+            "5": 0.31705,
+            "6": 0.31275,
+            "7": 0.31299,
+            "8": 0.29866,
+            "9": 0.28961,
+            "10": 0.28859,
+            "11": 0.29067,
+            "12": 0.29044,
+            "13": 0.29806,
+            "14": 0.29287,
+            "15": 0.29391,
+            "16": 0.3175,
+            "17": 0.28363,
+            "18": 0.2818,
+            "19": 0.29347,
+            "20": 0.28931,
+            "21": 0.29103,
+            "22": 0.28444,
+            "23": 0.28907,
+            "24": 0.27608,
+            "25": 0.28277,
+            "26": 0.28656,
+            "27": 0.28921,
+            "28": 0.30243,
+            "29": 0.30435,
+            "30": 0.31231,
+            "31": 0.30439,
+            "32": 0.31412,
+            "33": 0.28887,
+            "34": 0.29613,
+            "35": 0.29738,
+            "36": 0.29754,
+            "37": 0.3019,
+            "38": 0.2933,
+            "39": 0.2944,
+            "40": 0.29283,
+            "41": 0.29592,
+            "42": 0.29673,
+            "43": 0.29319,
+            "44": 0.30127,
+            "45": 0.29921,
+            "46": 0.29904,
+            "47": 0.28795,
+            "48": 0.29918,
+            "49": 0.28711,
+            "50": 0.29645,
+            "51": 0.28777,
+            "52": 0.29536,
+            "53": 0.2847,
+            "54": 0.28286,
+            "55": 0.2874,
+            "56": 0.28699,
+            "57": 0.28614,
+            "58": 0.29825,
+            "59": 0.28363,
+            "60": 0.29423,
+            "61": 0.29226,
+            "62": 0.2896,
+            "63": 0.28065,
+            "64": 0.29533,
+            "65": 0.29842,
+            "66": 0.28487,
+            "67": 0.28419,
+            "68": 0.29474,
+            "69": 0.28383,
+            "70": 0.28417,
+            "71": 0.29253,
+            "72": 0.28737,
+            "73": 0.27923,
+            "74": 0.28728,
+            "75": 0.29383,
+            "76": 0.28157,
+            "77": 0.64771,
+            "78": 0.29148,
+            "79": 0.28742,
+            "80": 0.29245,
+            "81": 0.28827,
+            "82": 0.28368,
+            "83": 0.28963,
+            "84": 0.29234,
+            "85": 0.28183,
+            "86": 0.28337,
+            "87": 0.27879,
+            "88": 0.28388,
+            "89": 0.28309,
+            "90": 0.28852,
+            "91": 0.28254,
+            "92": 0.28375,
+            "93": 0.28633,
+            "94": 0.28567,
+            "95": 0.28235,
+            "96": 0.28513,
+            "97": 0.27951,
+            "98": 0.27851,
+            "99": 0.28336,
+            "100": 0.27744
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/model_config.yaml
index 3ecd68b9841..8874f9cf045 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/model_config.yaml
@@ -56,7 +56,7 @@ MODEL_ARGS:
   --attention-softmax-in-fp32: true
   --use-checkpoint-opt_param-scheduler: true
   --use-mcore-models: true
-  --ckpt-format: torch_dist
+  --ckpt-format: fsdp_dtensor
   --dist-ckpt-optim-fully-reshardable: true
   --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
diff --git a/tests/test_utils/recipes/moe.yaml b/tests/test_utils/recipes/moe.yaml
index 8164ca37df8..607d48380d5 100644
--- a/tests/test_utils/recipes/moe.yaml
+++ b/tests/test_utils/recipes/moe.yaml
@@ -106,14 +106,13 @@ products:
       - environment: [dev]
         scope: [mr, mr-github]
         platforms: [dgx_h100]
-  # TODO: The migration of custom fsdp causes EP + FSDP to be temporarily unavailable, which will be fixed in a subsequent MR.
-  # - test_case: [gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router]
-  #   products:
-  #     - environment: [dev]
-  #       scope: [mr]
-  #       platforms: [dgx_h100]
-  #     - environment: [lts]
-  #       scope: [nightly]
+  - test_case: [gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router]
+    products:
+      - environment: [dev]
+        scope: [mr]
+        platforms: [dgx_h100]
+      - environment: [lts]
+        scope: [nightly]
   - test_case: [gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective]
     products:
       - environment: [dev]
diff --git a/tools/checkpoint/checkpoint_inspector.py b/tools/checkpoint/checkpoint_inspector.py
index 34afa27755f..c62f0ca7417 100644
--- a/tools/checkpoint/checkpoint_inspector.py
+++ b/tools/checkpoint/checkpoint_inspector.py
@@ -8,6 +8,8 @@
 import time
 import re
 import shutil
+from typing import Optional
+import tempfile
 
 import click
 import torch
@@ -19,6 +21,7 @@
     FileSystemReader,
     FileSystemWriter,
 )
+from torch.distributed.checkpoint.format_utils import dcp_to_torch_save
 from torch.distributed.checkpoint.metadata import (
     BytesStorageMetadata,
     TensorStorageMetadata,
@@ -64,7 +67,8 @@ def cli():
 @cli.command()
 @click.argument("checkpoint_dir", type=click.Path(exists=True))
 @click.option("--enable-msc", is_flag=True, help="Enable MultiStorageClient feature.")
-def inspect(checkpoint_dir, enable_msc):
+@click.option("--not-ignore-param-to-group-meta", is_flag=True, help="Ignore parameter-to-group metadata.")
+def inspect(checkpoint_dir, enable_msc, not_ignore_param_to_group_meta):
     """Inspect a Megatron Core Distributed Checkpoint"""
     ckpt_path = Path(checkpoint_dir)
 
@@ -138,6 +142,8 @@ def inspect(checkpoint_dir, enable_msc):
     ]
     click.echo(" | ".join(stats) + "\n")
 
+    ignore_param_to_group_meta = not not_ignore_param_to_group_meta
+    ignore_param_to_group_meta_count = 0
     for key, value in metadata.state_dict_metadata.items():
         bullet = click.style("►", fg="blue")
         key_styled = click.style(key, fg="green")
@@ -147,11 +153,18 @@ def inspect(checkpoint_dir, enable_msc):
             shape = click.style(f"{tuple(value.size)}", fg="magenta")
             click.echo(f"  {bullet} {key_styled} [{dtype}, shape={shape}]")
         elif isinstance(value, BytesStorageMetadata):
+            if ignore_param_to_group_meta and key.startswith("optimizer.param_to_group_meta."):
+                ignore_param_to_group_meta_count += 1
+                continue
             click.echo(f"  {bullet} {key_styled} {click.style('[BYTES]', fg='yellow')}")
         else:
             click.echo(
                 f"  {bullet} {key_styled} {click.style('[UNKNOWN TYPE]', fg='red')}"
             )
+    if ignore_param_to_group_meta:
+        click.echo(
+            click.style(f"Ignored parameter-to-group metadata: {ignore_param_to_group_meta_count}", fg="yellow")
+        )
 
     # MCore data section
     try:
@@ -323,8 +336,10 @@ def convert_checkpoint(
     output_dir,
     swiglu,
     process_group,
+    optimizer_param_to_group_prefix="optimizer.param_to_group_meta.module.module.module",
     optimizer_state_prefix="optimizer.state.module.module.module",
     model_weight_prefix="model.module",
+    param_to_param_group_map={},
 ):
     """Convert a Megatron Core Distributed Checkpoint from torch_dist to standard fsdp_dtensor format."""
     device_mesh = DeviceMesh.from_group(process_group, device_type="cuda")
@@ -371,6 +386,104 @@ def _free_up_some_gpu_memory():
             gc.collect()
             torch.cuda.empty_cache()
 
+    def split_layers(
+        key: str,
+        value: torch.Tensor,
+        orig_shape: Optional[torch.Size] = None,
+    ) -> dict[str, torch.Tensor]:
+        """
+        Split layers into separate tensors.
+        """
+        _free_up_some_gpu_memory()
+        layers = {}
+        for i, v in enumerate(split_dtensor(value, 1, dim=0)):
+            v = gather_uneven_dtensor_to_full_tensor(v).reshape(
+                orig_shape[1:] if orig_shape else value.shape[1:]
+            ).redistribute(placements=[Shard(0)])
+
+            layer_key = key.replace(".layers.", f".layers.{i}.")
+            layers[layer_key] = v
+
+        return layers
+
+    def split_expert_weights(
+        key: str,
+        value: torch.Tensor,
+        orig_shape: Optional[torch.Size] = None,
+    ) -> dict[str, torch.Tensor]:
+        """
+        Split expert weights into separate tensors for each expert.
+        """
+        experts = {}
+        layer_key = key.replace(".experts.experts.", ".experts.")
+        expert_weights = split_dtensor(value, 1, dim=0)
+        for expert_idx, expert_weight in enumerate(expert_weights):
+            layer_key_parts = layer_key.split(".weight", 1)
+            if len(layer_key_parts) == 1:
+                expert_key = f"{layer_key}{expert_idx}"
+            elif len(layer_key_parts) == 2:
+                expert_key = f"{layer_key_parts[0]}.weight{expert_idx}{layer_key_parts[1]}"
+            else:
+                raise ValueError(f"Unexpected expert layer key: {layer_key}")
+
+            expert_weight = gather_uneven_dtensor_to_full_tensor(expert_weight)
+            expert_shape = orig_shape[1:] if orig_shape else value.shape[1:]
+            # Handle optimizer states for expert linear_fc2 when ETP is enabled
+            if (
+                layer_key.startswith("optimizer.state.")
+                and "linear_fc2" in layer_key
+                and expert_weight.shape[-2] > 1
+            ):
+                tp_size = expert_weight.shape[-2]
+                rows, cols = expert_shape
+                # Reshape to split column dimension by tp_size
+                expert_weight = expert_weight.reshape(
+                    *expert_weight.shape[:-1], rows, cols // tp_size
+                )
+                dims = list(range(expert_weight.ndim))
+                dims[-3], dims[-2] = dims[-2], dims[-3]
+                expert_weight = (
+                    expert_weight.permute(*dims)
+                    .reshape(expert_shape)
+                    .redistribute(placements=[Shard(0)])
+                )
+            else:
+                expert_weight = expert_weight.reshape(expert_shape).redistribute(
+                    placements=[Shard(0)]
+                )
+            experts[expert_key] = expert_weight
+        return experts
+
+    def is_swiglu_key(key):
+        return any(re.search(pat, key) for pat in [
+            r"(.*)\.mlp\.linear_fc1\.weight",
+            r"(.*)\.mlp\.linear_fc1\.bias",
+            r"(.*)\.mlp\.experts\.linear_fc1\.weight(\d+)",
+            r"(.*)\.mlp\.experts\.linear_fc1\.bias(\d+)",
+            r"(.*)\.mlp\.experts\.local_experts\.(\d+)\.linear_fc1\.weight",
+            r"(.*)\.mlp\.experts\.local_experts\.(\d+)\.linear_fc1\.bias",
+            r"(.*)\.mlp\.shared_experts\.linear_fc1\.weight",
+            r"(.*)\.mlp\.shared_experts\.linear_fc1\.bias",
+        ])
+
+    def split_swiglu_weight(key: str, value: torch.Tensor) -> dict[str, torch.Tensor]:
+        """
+        Split SwiGLU weights into separate tensors.
+        """
+        value = gather_uneven_dtensor_to_full_tensor(value)
+        swiglu_w_and_v = {}
+        w, v = torch.chunk(value, 2, dim=0)
+        w = w.redistribute(placements=[Shard(0)])
+        v = v.redistribute(placements=[Shard(0)])
+        w_key = re.sub(r'(weight\d*)(.*)', r'\1_w\2', key)
+        v_key = re.sub(r'(weight\d*)(.*)', r'\1_v\2', key)
+        swiglu_w_and_v[w_key] = w
+        swiglu_w_and_v[v_key] = v
+        return swiglu_w_and_v
+
+    def has_layer_index(key: str) -> bool:
+        return bool(re.search(r"layers\.(\d+)\.", key))
+
     while state_dict:
         key, value = state_dict.popitem()
         if torch.distributed.get_rank() == 0:
@@ -387,9 +500,11 @@ def _free_up_some_gpu_memory():
                 # Special handling for optimizer state
                 key_list = key.split(".")
                 new_key = f"{optimizer_state_prefix}.{'.'.join(key_list[3:])}.{key_list[2]}"
+                is_param = False
             else:
                 # Special handling for module parameters
                 new_key = f"{model_weight_prefix}.{key}"
+                is_param = True
 
             # Handle dist-opt flatten tensors
             if (
@@ -406,68 +521,47 @@ def _free_up_some_gpu_memory():
             else:
                 orig_shape = None
 
-            # Handle multi-layer tensors
-            if ".layers." in new_key:
-                n_layer = value.shape[0]
-
-                _free_up_some_gpu_memory()
-                per_layer_values = [
-                    gather_uneven_dtensor_to_full_tensor(v).redistribute(
-                        placements=[Shard(len(v.shape) - 1)]
-                    )
-                    for v in split_dtensor(value, 1, dim=0)
-                ]
-                for i in range(n_layer):
-                    if orig_shape is not None:
-                        layer_shape = orig_shape[1:]
-                    else:
-                        layer_shape = value.shape[1:]
-
-                    per_layer_values[i] = (
-                        per_layer_values[i]
-                        .reshape(layer_shape)
-                        .redistribute(placements=[Shard(0)])
-                    )
-                for i in range(0, n_layer):
-                    layer_key = new_key.replace(".layers.", f".layers.{i}.")
-                    if swiglu and "mlp.linear_fc1.weight" in layer_key:
-                        # Special case for SwiGLU
-                        w, v = torch.chunk(per_layer_values[i], 2, dim=0)
-                        w = w.redistribute(placements=[Shard(0)])
-                        v = v.redistribute(placements=[Shard(0)])
-                        w_key = layer_key.replace(
-                            "mlp.linear_fc1.weight", "mlp.linear_fc1.weight_w"
-                        )
-                        v_key = layer_key.replace(
-                            "mlp.linear_fc1.weight", "mlp.linear_fc1.weight_v"
-                        )
-                        # Store both w and v in the state_dict
-                        fsdp_dtensor_state_dict[w_key] = w
-                        fsdp_dtensor_state_dict[v_key] = v
-                    elif (
-                        "experts.experts.linear_fc1.weight" in layer_key
-                        or "experts.experts.linear_fc2.weight" in layer_key
+            # Handle multi-layer / experts tensors
+            split_tensors = {}
+            if ".layers." in new_key and not has_layer_index(new_key):
+                split_tensors = split_layers(new_key, value, orig_shape)
+            elif ".experts.experts." in new_key:
+                split_tensors = split_expert_weights(new_key, value, orig_shape)
+            else:
+                if orig_shape:
+                    value = gather_uneven_dtensor_to_full_tensor(value)
+                    # Handle optimizer states with partition_dim=1 when TP is enabled
+                    if (
+                        new_key.startswith("optimizer.state.")
+                        and value.ndim > 2
+                        and value.shape[-2] > 1
                     ):
-                        # Special case for MoE
-                        layer_key = layer_key.replace(".experts.experts.", ".experts.")
-                        expert_weights = torch.split(per_layer_values[i], 1, dim=0)
-                        for expert_idx, expert_weight in enumerate(expert_weights):
-                            expert_key = f"{layer_key}{expert_idx}"
-                            fsdp_dtensor_state_dict[expert_key] = expert_weight.squeeze(
-                                0
-                            )
+                        tp_size = value.shape[-2]
+                        rows, cols = orig_shape
+                        # Reshape to split column dimension by tp_size
+                        value = value.reshape(*value.shape[:-1], rows, cols // tp_size)
+                        dims = list(range(value.ndim))
+                        dims[-3], dims[-2] = dims[-2], dims[-3]
+                        value = (
+                            value.permute(*dims)
+                            .reshape(orig_shape)
+                            .redistribute(placements=[Shard(0)])
+                        )
                     else:
-                        # General case
-                        fsdp_dtensor_state_dict[layer_key] = per_layer_values[i]
-            else:
-                if orig_shape is not None:
-                    _free_up_some_gpu_memory()
-                    value = (
-                        value.redistribute(placements=[Replicate()])
-                        .reshape(orig_shape)
-                        .redistribute(placements=[Shard(0)])
-                    )
-                fsdp_dtensor_state_dict[new_key] = value
+                        value = value.reshape(orig_shape).redistribute(placements=[Shard(0)])
+                split_tensors = {new_key: value}
+
+            # Handle SWiGLU weights
+            for key, value in list(split_tensors.items()):
+                if swiglu and is_swiglu_key(key):
+                    swiglu_w_and_v = split_swiglu_weight(key, value)
+                    split_tensors.update(swiglu_w_and_v)
+                    del split_tensors[key]
+
+            fsdp_dtensor_state_dict.update(split_tensors)
+            if is_param and key in param_to_param_group_map:
+                for new_key in split_tensors.keys():
+                    param_to_param_group_map[new_key] = param_to_param_group_map[key]
         elif key.startswith("rng_state"):
             # Skip RNG states
             continue
@@ -530,6 +624,15 @@ def _free_up_some_gpu_memory():
         )
     )
     common_state = common_strategy.load_common(input_dir)
+    try:
+        if "param_groups" in common_state["optimizer"]:
+            ckpt_param_groups = common_state["optimizer"]["param_groups"]
+        else:
+            ckpt_param_groups = []
+            for opt_state_dict in common_state["optimizer"].values():
+                ckpt_param_groups.extend(opt_state_dict["optimizer"]["param_groups"])
+    except:
+        ckpt_param_groups = None
     common_state = flatten(common_state)
     for key, value in common_state.items():
         if key.startswith("optimizer.optimizer.param_groups."):
@@ -541,12 +644,29 @@ def _free_up_some_gpu_memory():
         )
         fsdp_dtensor_state_dict[key] = value
 
+    # set up per-parameter param_groups
+    if param_to_param_group_map and ckpt_param_groups is not None:
+        for name in list(fsdp_dtensor_state_dict.keys()):
+            if not name.startswith(model_weight_prefix) or name.endswith(".expert_bias"):
+                continue
+
+            assert name in param_to_param_group_map, f"Missing param group for {name}"
+            param_group_id = param_to_param_group_map[name]
+            assert param_group_id < len(ckpt_param_groups), f"Invalid param group id {param_group_id} for {name}"
+            name_without_prefix = name[len(model_weight_prefix):]
+            fsdp_dtensor_state_dict[
+                f"{optimizer_param_to_group_prefix}.{name_without_prefix}"
+            ] = ckpt_param_groups[param_group_id]
+
     if "checkpoint_version" not in fsdp_dtensor_state_dict:
         fsdp_dtensor_state_dict["checkpoint_version"] = 3.0
 
     # Save modified checkpoint
     save_checkpoint_with_pickle_protocol(fsdp_dtensor_state_dict, output_dir)
 
+    dist.barrier()              # Synchronize all ranks
+    dist.destroy_process_group()
+
 
 @cli.command()
 @click.argument("input_dir", type=click.Path(exists=True))
@@ -560,12 +680,6 @@ def _free_up_some_gpu_memory():
     "--oom-traceback", is_flag=True, help="Enable OOM traceback for debugging."
 )
 @click.option("--enable-msc", is_flag=True, help="Enable MultiStorageClient feature.")
-@click.option(
-    "--distributed-timeout-minutes",
-    default=10,
-    type=int,
-    help="Timeout for distributed operations in minutes.",
-)
 @click.option(
     "--output-optimizer-state-prefix",
     default="optimizer.state.module.module.module",
@@ -576,15 +690,21 @@ def _free_up_some_gpu_memory():
     default="model.module",
     help="Prefix for model weight keys in the checkpoint.",
 )
+@click.option(
+    "--param-to-param-group-map-json",
+    type=str,
+    default="{}",
+    help="JSON string representing the param to parameter group map."
+)
 def convert_torch_dist_to_fsdp_dtensor(
     input_dir,
     output_dir,
     swiglu,
     oom_traceback,
     enable_msc,
-    distributed_timeout_minutes,
     output_optimizer_state_prefix,
     output_model_weight_prefix,
+    param_to_param_group_map_json,
 ):
     """Convert a Megatron Core Distributed Checkpoint from torch_dist to fsdp_dtensor format."""
     if not enable_msc:
@@ -624,10 +744,13 @@ def oom_observer(device, alloc, device_alloc, device_free):
 
     ckpt_path = Path(input_dir)
     output_dir = Path(output_dir)
+    with open(param_to_param_group_map_json, "r") as f:
+        param_to_param_group_map = json.load(f)
     convert_checkpoint(
         ckpt_path, output_dir, swiglu, process_group=dist.group.WORLD,
         optimizer_state_prefix=output_optimizer_state_prefix,
         model_weight_prefix=output_model_weight_prefix,
+        param_to_param_group_map=param_to_param_group_map,
     )
 
     click.echo(
@@ -742,6 +865,109 @@ def modify_state_dict(input_dir, output_dir, op, enable_msc):
     )
 
 
+def _compare_two_checkpoint(checkpoint_1, checkpoint_2):
+    reader_1 = FileSystemReader(checkpoint_1)
+    metadata_1 = reader_1.read_metadata()
+
+    reader_2 = FileSystemReader(checkpoint_2)
+    metadata_2 = reader_2.read_metadata()
+
+    keys_1 = set(metadata_1.state_dict_metadata.keys())
+    keys_2 = set(metadata_2.state_dict_metadata.keys())
+
+    click.echo(click.style("Comparing checkpoints...", fg="blue"))
+
+    # Compare keys
+    missing_in_1 = keys_2 - keys_1
+    missing_in_2 = keys_1 - keys_2
+    common_keys = keys_1 & keys_2
+
+    click.echo(click.style("Keys missing in checkpoint 1:", fg="red"))
+    for key in missing_in_1:
+        click.echo(click.style(f" - {key}", fg="red"))
+
+    click.echo(click.style("Keys missing in checkpoint 2:", fg="red"))
+    for key in missing_in_2:
+        click.echo(click.style(f" - {key}", fg="red"))
+
+    # Compare common keys
+    click.echo(click.style("Common keys in both checkpoints:", fg="green"))
+    for key in common_keys:
+        meta_1 = metadata_1.state_dict_metadata[key]
+        meta_2 = metadata_2.state_dict_metadata[key]
+
+        if not isinstance(meta_1, TensorStorageMetadata):
+            continue
+
+        if meta_1.size != meta_2.size or meta_1.properties.dtype != meta_2.properties.dtype:
+            click.echo(click.style(f" - {key} (metadata differ) meta_1: {meta_1}, meta_2: {meta_2}", fg="red"))
+        else:
+            value_1 = torch.empty(meta_1.size, dtype=meta_1.properties.dtype)
+            value_2 = value_1.clone()
+
+            dcp.load({key: value_1}, storage_reader=reader_1, planner=DefaultLoadPlanner())
+            dcp.load({key: value_2}, storage_reader=reader_2, planner=DefaultLoadPlanner())
+
+            if not torch.allclose(
+                value_1, value_2, atol=1e-8, rtol=1e-5
+            ):
+                click.echo(click.style(f" - {key} (values differ) value_1: {value_1}, value_2: {value_2}", fg="red"))
+
+
+@cli.command()
+@click.argument("checkpoint_1", type=click.Path(exists=True))
+@click.argument("checkpoint_2", type=click.Path(exists=True))
+@click.option("--enable-msc", is_flag=True, help="Enable MultiStorageClient feature.")
+def compare_two_checkpoint(checkpoint_1, checkpoint_2, enable_msc):
+    """
+    Compare two checkpoints.
+    """
+    init_process_group(f"compare_two_checkpoint from {checkpoint_1} to {checkpoint_2}")
+
+    if not enable_msc:
+        MultiStorageClientFeature.disable()
+
+    _compare_two_checkpoint(
+        Path(checkpoint_1),
+        Path(checkpoint_2),
+    )
+
+    click.echo(
+        click.style(
+            f"Comparison between {checkpoint_1} and {checkpoint_2} completed.", fg="green", bold=True
+        )
+    )
+
+
+@cli.command()
+@click.argument("torch_dcp_dir", type=click.Path(exists=True))
+def print_torch_dcp_in_json(torch_dcp_dir, model_weight_prefix="model.module"):
+    # Use a temporary file context
+    with tempfile.NamedTemporaryFile(suffix=".pth") as tmp_file:
+        # Convert distributed checkpoint directory to a single-file checkpoint
+        dcp_to_torch_save(torch_dcp_dir, tmp_file.name)
+
+        # Load the state dict from the temporary file
+        state_dict = torch.load(tmp_file.name, map_location="cpu")
+
+        click.echo(f"torch dcp content: {json.dumps(state_dict)}")
+
+        # Replace all "module.module." with model_weight_prefix in dict keys
+        new_state_dict = {}
+        for key, value in state_dict.items():
+            new_key = key.replace("module.module", model_weight_prefix)
+            new_state_dict[new_key] = value
+        
+        # Convert state dict to JSON-serializable format
+        serializable_dict = {k: v.tolist() if hasattr(v, "tolist") else v for k, v in new_state_dict.items()}
+
+        # Save to a JSON file
+        json_file_path = os.path.join(torch_dcp_dir, "param_to_param_group_map.json")
+        with open(json_file_path, "w") as json_file:
+            json.dump(serializable_dict, json_file, indent=2)
+        click.echo(f"Saved converted param_to_param_group_map to: {json_file_path}")
+
+
 def init_process_group(message):
     rank = int(os.getenv("RANK", "0"))
     world_size = int(os.getenv("WORLD_SIZE", "1"))

From 13edb58560d083ef7ce5d42b90adda3bd9b53306 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Tue, 28 Oct 2025 12:02:51 +0000
Subject: [PATCH 081/334] Revert "cp: `Megatron-FSDP Expert Parallel
 (DeepSeek-v3) Support` into `dev` (#1987)"

This reverts commit cc33e0056b00ee67455fadfb6710e4dbde9e1c33.
---
 .../distributed/fsdp/mcore_fsdp_adapter.py    | 133 +---
 megatron/core/distributed/fsdp/src/README.md  |  11 -
 .../fsdp/src/megatron_fsdp/fully_shard.py     |  10 +-
 .../fsdp/src/megatron_fsdp/megatron_fsdp.py   |  11 +-
 .../megatron_fsdp/param_and_grad_buffer.py    |  83 +--
 .../fsdp/src/megatron_fsdp/uneven_dtensor.py  |   4 +-
 .../fsdp/src/megatron_fsdp/utils.py           | 130 +---
 .../embeddings/yarn_rotary_pos_embedding.py   |  10 +-
 megatron/core/optimizer/__init__.py           |  23 -
 megatron/core/optimizer/distrib_optimizer.py  |   2 -
 .../transformer/fsdp_dtensor_checkpoint.py    | 336 ++--------
 megatron/training/arguments.py                |   4 -
 megatron/training/checkpointing.py            |  74 +--
 megatron/training/training.py                 |   1 -
 .../golden_values_dev_dgxh100_coreweave.json  | 598 +++++++++---------
 .../golden_values_dev_dgxh100_coreweave.json  | 500 +++++++--------
 .../golden_values_dev_dgx_h100.json           | 143 +----
 .../golden_values_dev_dgxh100_coreweave.json  | 537 ----------------
 .../model_config.yaml                         |   2 +-
 tests/test_utils/recipes/moe.yaml             |  15 +-
 tools/checkpoint/checkpoint_inspector.py      | 362 ++---------
 21 files changed, 765 insertions(+), 2224 deletions(-)
 delete mode 100644 tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgxh100_coreweave.json

diff --git a/megatron/core/distributed/fsdp/mcore_fsdp_adapter.py b/megatron/core/distributed/fsdp/mcore_fsdp_adapter.py
index 7432a7f9a36..a7c0d5802ab 100644
--- a/megatron/core/distributed/fsdp/mcore_fsdp_adapter.py
+++ b/megatron/core/distributed/fsdp/mcore_fsdp_adapter.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 
 import logging
-import random
 from typing import List, Optional
 
 try:
@@ -23,7 +22,6 @@
 except ImportError:
     HAVE_EINOPS = False
 
-import numpy as np
 import torch
 import torch.distributed as dist
 
@@ -34,11 +32,10 @@
 except ImportError:
     HAVE_DTENSOR = False
 
-from megatron.core import parallel_state, tensor_parallel
+from megatron.core import parallel_state
 from megatron.core.config_logger import has_config_logger_enabled, log_config_to_disk
 from megatron.core.distributed.data_parallel_base import _BaseDataParallel
 from megatron.core.distributed.distributed_data_parallel_config import DistributedDataParallelConfig
-from megatron.core.extensions.transformer_engine import TELinear
 from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.transformer.transformer_config import TransformerConfig
 from megatron.core.transformer.transformer_layer import TransformerLayer
@@ -98,8 +95,6 @@ def __init__(
             else:
                 self.fsdp_unit_modules = []
 
-        self._fix_tensor_parallel_attributes(module)
-
         super().__init__(
             config=config,
             module=MegatronFSDP(
@@ -124,8 +119,6 @@ def __init__(
         self.module.state_dict_for_save_checkpoint = self.module.state_dict
         self.state_dict_for_save_checkpoint = self.state_dict
 
-        self.sync_rng_states_across_tp_group()
-
     def load_state_dict(self, state_dict, strict=True):
         """
         Load the state dictionary into the module.
@@ -148,44 +141,6 @@ def load_state_dict(self, state_dict, strict=True):
 
         self.module.load_state_dict(custom_state_dict, strict=strict)
 
-    def _fix_tensor_parallel_attributes(self, module):
-        is_expert_param = lambda n, p: ".experts." in n
-        is_router_param = lambda n, p: ".router.weight" in n
-
-        if parallel_state.get_tensor_model_parallel_group():
-            tp_size = parallel_state.get_tensor_model_parallel_group().size()
-        else:
-            tp_size = 1
-
-        if parallel_state.get_expert_tensor_parallel_group():
-            expt_tp_size = parallel_state.get_expert_tensor_parallel_group().size()
-        else:
-            expt_tp_size = 1
-
-        param_to_direct_module = {}
-        for name, m in module.named_modules():
-            for p in m.parameters(recurse=False):
-                param_to_direct_module[p] = (name, m)
-
-        for name, param in module.named_parameters():
-            if is_expert_param(name, param) and expt_tp_size > 1:
-                setattr(param, "_mcore_tp", True)
-                if "linear_fc1.weight" in name:
-                    setattr(param, "_tp_partition_dim", 0)
-                elif "linear_fc2.weight" in name:
-                    setattr(param, "_tp_partition_dim", 1)
-
-            if not is_expert_param(name, param) and tp_size > 1:
-                m_name, direct_module = param_to_direct_module[param]
-                if isinstance(direct_module, (TELinear,)):
-                    parallel_mode = getattr(direct_module, "parallel_mode", None)
-                    if parallel_mode is None:
-                        setattr(param, "_mcore_tp", True)
-                        setattr(param, "_tp_duplicated", True)
-                elif is_router_param(name, param):
-                    setattr(param, "_mcore_tp", True)
-                    setattr(param, "_tp_duplicated", True)
-
     def _init_dist_index(self, pg_collection):
         """
         Initialize the distributed index for the module.
@@ -199,7 +154,6 @@ def _init_dist_index(self, pg_collection):
         enable_hsdp = self.ddp_config.num_distributed_optimizer_instances > 1
         if pg_collection is None:
             tp_group = parallel_state.get_tensor_model_parallel_group()
-            expt_tp_group = parallel_state.get_expert_tensor_parallel_group()
             if enable_hsdp:
                 dp_cp_group = parallel_state.get_data_parallel_group(
                     with_context_parallel=True, partial_data_parallel=True
@@ -214,11 +168,8 @@ def _init_dist_index(self, pg_collection):
                 )
                 outer_fsdp_group = None
                 hybrid_fsdp_group = None
-                expt_dp_group = parallel_state.get_expert_data_parallel_group()
-                ep_group = parallel_state.get_expert_model_parallel_group()
         else:
             tp_group = getattr(pg_collection, 'tp', None)
-            expt_tp_group = getattr(pg_collection, 'expt_tp', None)
             if enable_hsdp:
                 dp_cp_group = pg_collection.intra_dp_cp
                 outer_fsdp_group = pg_collection.inter_dist_opt
@@ -227,17 +178,11 @@ def _init_dist_index(self, pg_collection):
                 dp_cp_group = pg_collection.dp_cp
                 outer_fsdp_group = None
                 hybrid_fsdp_group = None
-                expt_dp_group = getattr(pg_collection, 'expt_dp', None)
-                ep_group = getattr(pg_collection, 'ep', None)
 
         if tp_group is None:
             single_rank_group = dist.new_group(ranks=[dist.get_rank()])
             tp_group = single_rank_group
 
-        if expt_tp_group is None:
-            single_rank_group = dist.new_group(ranks=[dist.get_rank()])
-            expt_tp_group = single_rank_group
-
         if enable_hsdp:
             mesh = _get_hsdp_tp_mesh(outer_fsdp_group, dp_cp_group, tp_group)
             dist_index = FSDPDistributedIndex(
@@ -254,17 +199,6 @@ def _init_dist_index(self, pg_collection):
                 hybrid_fsdp_group=hybrid_fsdp_group,
             )
         else:
-            if ep_group is not None:
-                expt_mesh = _get_dp_tp_mesh(expt_dp_group, expt_tp_group, ep_size=ep_group.size())
-                expt_device_mesh = DeviceMesh.from_group(
-                    [expt_dp_group, expt_tp_group],
-                    device_type="cuda",
-                    mesh=expt_mesh.tolist(),
-                    mesh_dim_names=["dp_cp", "tp"],
-                )
-            else:
-                expt_device_mesh = None
-
             mesh = _get_dp_tp_mesh(dp_cp_group, tp_group)
             dist_index = FSDPDistributedIndex(
                 device_mesh=DeviceMesh.from_group(
@@ -275,11 +209,8 @@ def _init_dist_index(self, pg_collection):
                 ),
                 dp_shard_dim="dp_cp",
                 tp_dim="tp",
-                expt_device_mesh=expt_device_mesh,
             )
 
-        self.tp_group = tp_group
-
         return dist_index
 
     def stop_communication(self):
@@ -289,20 +220,6 @@ def stop_communication(self):
         self.module.synchronize_gradient_reduce()
         self.module.synchronize_param_gather()
 
-    def sync_rng_states_across_tp_group(self):
-        """
-        Synchronize the tensor parallel random number generator states.
-        """
-        if self.tp_group.size() <= 1:
-            return
-
-        if self.tp_group.rank() == 0:
-            broadcast_list = [_get_rng_state_dict()]
-        else:
-            broadcast_list = [None]
-        torch.distributed.broadcast_object_list(broadcast_list, group=self.tp_group, group_src=0)
-        _load_rng_state_dict(broadcast_list[0])
-
 
 def _get_hsdp_tp_mesh(outer_fsdp_dp_group, dp_cp_group, tp_group):
     assert HAVE_EINOPS, "einops is not installed. Please install it with `pip install einops`."
@@ -356,46 +273,29 @@ def _get_hsdp_tp_mesh(outer_fsdp_dp_group, dp_cp_group, tp_group):
     return mesh
 
 
-def _get_dp_tp_mesh(dp_cp_group, tp_group, ep_size=1):
+def _get_dp_tp_mesh(dp_cp_group, tp_group):
     assert HAVE_EINOPS, "einops is not installed. Please install it with `pip install einops`."
     world_size = dist.get_world_size()
 
     tp_size = dist.get_world_size(tp_group) if tp_group is not None else 1
-    # TODO: Supports configurable (dp, cp, ep, tp) order.
-    mesh = einops.rearrange(
-        torch.arange(world_size),
-        "(dp_cp ep tp) -> ep dp_cp tp",
-        dp_cp=dp_cp_group.size(),
-        tp=tp_size,
-        ep=ep_size,
-    )
+    # TODO: Supports configurable (dp, cp, tp) order.
+    mesh = einops.rearrange(torch.arange(world_size), "(dp_cp tp) -> dp_cp tp", tp=tp_size)
 
-    mesh_dp_ranks = einops.rearrange(mesh, 'ep dp_cp tp -> (ep tp) dp_cp', dp_cp=dp_cp_group.size())
+    mesh_dp_ranks = einops.rearrange(mesh, 'dp_cp tp -> tp dp_cp', tp=tp_size)
     dp_cp_group_ranks = dist.get_process_group_ranks(dp_cp_group)
     assert _check_mesh_ranks_and_group_ranks_are_consistent(mesh_dp_ranks, dp_cp_group_ranks), (
         f"[Megatron-FSDP] Data Parallel ranks in the mesh {mesh_dp_ranks} "
         f"do not match the ranks in the DP group {dp_cp_group_ranks}."
     )
 
-    mesh_tp_ranks = einops.rearrange(mesh, 'ep dp_cp tp -> (dp_cp ep) tp', tp=tp_size)
+    mesh_tp_ranks = einops.rearrange(mesh, 'dp_cp tp -> (dp_cp) tp', tp=tp_size)
     tp_group_ranks = dist.get_process_group_ranks(tp_group)
     assert _check_mesh_ranks_and_group_ranks_are_consistent(mesh_tp_ranks, tp_group_ranks), (
         f"[Megatron-FSDP] Tensor Parallel ranks in the mesh {mesh_tp_ranks} "
         f"do not match the ranks in the TP group {tp_group_ranks}."
     )
 
-    # Exclude the expert parallel dimension
-    rank = dist.get_rank()
-    dp_tp_meshes = [per_ep_mesh for per_ep_mesh in mesh if rank in per_ep_mesh.reshape(-1).tolist()]
-    assert (
-        len(dp_tp_meshes) == 1
-    ), f"[Megatron-FSDP] Current rank {rank} is not unique in the mesh ranks {mesh.tolist()}."
-    assert len(dp_tp_meshes[0].reshape(-1).tolist()) == dp_cp_group.size() * tp_group.size(), (
-        f"[Megatron-FSDP] DP-TP mesh size {len(dp_tp_meshes[0].reshape(-1).tolist())} "
-        f"does not match expected size {dp_cp_group.size() * tp_group.size()}."
-    )
-
-    return dp_tp_meshes[0]
+    return mesh
 
 
 def _check_mesh_ranks_and_group_ranks_are_consistent(mesh_ranks, group_ranks):
@@ -410,22 +310,3 @@ def _check_mesh_ranks_and_group_ranks_are_consistent(mesh_ranks, group_ranks):
         f"{mesh_ranks.tolist()} does not match the group ranks {group_ranks}."
     )
     return sorted(current_ranks[0]) == sorted(group_ranks)
-
-
-def _get_rng_state_dict():
-    rng_state_dict = {
-        'random_rng_state': random.getstate(),
-        'np_rng_state': np.random.get_state(),
-        'torch_rng_state': torch.get_rng_state(),
-        'cuda_rng_state': torch.cuda.get_rng_state(),
-        'rng_tracker_states': tensor_parallel.get_cuda_rng_tracker().get_states(),
-    }
-    return rng_state_dict
-
-
-def _load_rng_state_dict(rng_state_dict):
-    random.setstate(rng_state_dict['random_rng_state'])
-    np.random.set_state(rng_state_dict['np_rng_state'])
-    torch.set_rng_state(rng_state_dict['torch_rng_state'])
-    torch.cuda.set_rng_state(rng_state_dict['cuda_rng_state'])
-    tensor_parallel.get_cuda_rng_tracker().set_states(rng_state_dict['rng_tracker_states'])
diff --git a/megatron/core/distributed/fsdp/src/README.md b/megatron/core/distributed/fsdp/src/README.md
index 9e036f22f67..d879c6c26f8 100644
--- a/megatron/core/distributed/fsdp/src/README.md
+++ b/megatron/core/distributed/fsdp/src/README.md
@@ -127,12 +127,6 @@ device_mesh[("dp_shard", "cp")]._flatten("dp_shard_cp")
 # Only required if using HSDP. Otherwise, don't pass hybrid_fsdp_group.
 device_mesh[("dp_outer", "dp_shard", "cp")]._flatten("hsdp")
 hsdp_group = device_mesh["hsdp"].get_group()
-# Initialize DeviceMesh for expert parallel (EP) modules when using FSDP + EP.
-expert_device_mesh = torch.distributed.device_mesh.init_device_mesh(
-    "cuda",
-    mesh_shape=(expt_dp_shard_size, expt_tp_size),
-    mesh_dim_names=("dp_shard", "tp"),
-)
 
 # Fully-shards your model and distributes your optimizer.
 model, optimizer = fully_shard(
@@ -151,8 +145,6 @@ model, optimizer = fully_shard(
     tp_dim="tp",
     # Only required when using HSDP. Otherwise, set this to None.
     hybrid_fsdp_group=hsdp_group,
-    # Only required for FSDP + EP. Otherwise, set this to None.
-    expt_device_mesh=expt_device_mesh,
     # FSDP Sharding Strategy: no_shard (0) / optim (1) / optim_grads (2) / optim_grads_params (3)
     zero_dp_strategy=3,
     outer_dp_sharding_strategy=1,
@@ -200,9 +192,6 @@ optimizer.load_state_dict(ckpt_state_dict["optimizer"])
   - `tp_dim` is the name of the sub-mesh used for tensor parallelism (TP), which is required for `(FSDP, TP)`-strided sharding when using Megatron-LM or Torch-native `DTensor` TP.
     - For more information about tensor parallelism, refer to: [Megatron-LM: Training Multi-Billion Parameter Language Models Using Model Parallelism](https://arxiv.org/abs/1909.08053).
   - `hybrid_fsdp_group` is the `ProcessGroup` which contains all ranks in the flattened `dp_shard_dim` and `dp_outer_dim` sub-meshes utilized to specify the `(DP-Outer, DP-Shard)` sharded coordinate system for the weight and gradient buffers. Required for HSDP.
-- `expt_device_mesh` is another [`torch.distributed.DeviceMesh`](https://docs.pytorch.org/docs/stable/distributed.html#devicemesh) tailored for the expert parallel (EP) modules in `MegatronFSDP`.
-  - `dp_shard_dim` is the name of the sub-mesh required for FSDP sharding of the EP modules, enabling expert data parallelism (EDP).
-  - `tp_dim` is the name of the sub-mesh used for expert tensor parallelism (ETP), which is required for `(FSDP, ETP)`-strided sharding when using Megatron-LM or Torch-native `DTensor` ETP.
 - `init_model_with_meta_device` has `MegatronFSDP` initialize your `meta`-device model in shards on every CUDA device to avoid OOM when initializing extremely large models that cannot fit on a single device. Users can initialize their model on a [`meta`-device](https://docs.pytorch.org/docs/stable/meta.html) (`with torch.device('meta'): ...`), and ``MegatronFSDP`` will further shard and initialize the model parameters layer-by-layer adhering to the customizable `module.reset_parameters` method, which prevents the entire model from being allocated in memory at any point during runtime.
     - Defaults to `False`.
     - Note that the `device` argument which installs your model on a specific device or rank will be deactivated when `init_model_with_meta_device=True`.
diff --git a/megatron/core/distributed/fsdp/src/megatron_fsdp/fully_shard.py b/megatron/core/distributed/fsdp/src/megatron_fsdp/fully_shard.py
index e98362a1a03..24e86cede72 100644
--- a/megatron/core/distributed/fsdp/src/megatron_fsdp/fully_shard.py
+++ b/megatron/core/distributed/fsdp/src/megatron_fsdp/fully_shard.py
@@ -64,7 +64,6 @@ def fully_shard_model(
     dp_outer_dim: Optional[str] = None,
     tp_dim: Optional[str] = None,
     hybrid_fsdp_group: Optional[torch.distributed.ProcessGroup] = None,
-    expt_device_mesh: Optional[DeviceMesh] = None,
     fsdp_unit_modules: Optional[Sequence[Type[torch.nn.Module]] | Sequence[str]] = None,
     zero_dp_strategy: str | int = 3,
     outer_dp_sharding_strategy: str | int = 0,
@@ -184,10 +183,8 @@ def fully_shard_model(
         tp_dim=tp_dim,
         # Only required for HSDP.
         hybrid_fsdp_group=hybrid_fsdp_group,
-        # Access to flattened DP rank assignments for HSDP.
+        # Access to flattened DP rank assignments for HFSDP.
         hsdp_outer_dp_shard=_outer_fsdp_sharding,
-        # Only required for Megatron-FSDP + EP.
-        expt_device_mesh=expt_device_mesh,
     )
 
     # Wrap model in Megatron FSDP.
@@ -333,7 +330,6 @@ def fully_shard(
     dp_outer_dim: Optional[str] = None,
     tp_dim: Optional[str] = None,
     hybrid_fsdp_group: Optional[torch.distributed.ProcessGroup] = None,
-    expt_device_mesh: Optional[DeviceMesh] = None,
     fsdp_unit_modules: Optional[Sequence[Type[torch.nn.Module]] | Sequence[str]] = None,
     zero_dp_strategy: str | int = 3,
     outer_dp_sharding_strategy: str | int = 0,
@@ -395,9 +391,6 @@ def fully_shard(
             by flattening the outer-FSDP (dp_outer_dim) and FSDP (dp_shard_dim) process groups
             or sub-meshes. Defaults to None. Required for HSDP, i.e. if dp_outer_dim is not None.
 
-        expt_device_mesh (Optional[DeviceMesh]):
-            Expert parallel device mesh object defining the topology for MoE distributed training.
-
         fsdp_unit_modules (Optional[Sequence[Type[torch.nn.Module]] | Sequence[str]]):
             List of (sub-)module classes or (sub-)module class import paths that are "units",
             which are torch.nn.Module(s) that are sharded and scheduled by Megatron-FSDP.
@@ -510,7 +503,6 @@ def fully_shard(
         dp_outer_dim=dp_outer_dim,
         tp_dim=tp_dim,
         hybrid_fsdp_group=hybrid_fsdp_group,
-        expt_device_mesh=expt_device_mesh,
         fsdp_unit_modules=fsdp_unit_modules,
         zero_dp_strategy=zero_dp_strategy,
         outer_dp_sharding_strategy=outer_dp_sharding_strategy,
diff --git a/megatron/core/distributed/fsdp/src/megatron_fsdp/megatron_fsdp.py b/megatron/core/distributed/fsdp/src/megatron_fsdp/megatron_fsdp.py
index d6ef5f6210e..10a8ae14d65 100644
--- a/megatron/core/distributed/fsdp/src/megatron_fsdp/megatron_fsdp.py
+++ b/megatron/core/distributed/fsdp/src/megatron_fsdp/megatron_fsdp.py
@@ -235,10 +235,7 @@ def __init__(
         self.dist_index = dist_index
 
         # If Megatron Expert Parallelism is enabled, you need to provide an expt_dp_group.
-        if (
-            has_expert_parameters
-            and self.dist_index.get_fsdp_group(is_expert_parallel=True) is None
-        ):
+        if has_expert_parameters and self.dist_index.get_expert_dp_group() is None:
             raise ValueError(
                 "[Megatron-FSDP] Megatron Expert Parallelism is enabled, but no expt_dp_group is"
                 "provided."
@@ -356,7 +353,9 @@ def _init_fsdp_param_and_grad_buffer(self):
         )
 
         # Set the suggested communication unit size for reduce-scatter and all-gather pipelines.
-        suggested_communication_unit_size = self.ddp_config.suggested_communication_unit_size
+        suggested_communication_unit_size = (
+            self.ddp_config.suggested_communication_unit_size or 1_000_000_000
+        )
         if suggested_communication_unit_size is None:
             if self.data_parallel_sharding_strategy == "optim_grads_params":
                 total_param_elements = 0
@@ -371,8 +370,6 @@ def _init_fsdp_param_and_grad_buffer(self):
                 suggested_communication_unit_size = total_param_elements // total_fsdp_module * 2
             elif self.bucket_size is not None:
                 suggested_communication_unit_size = self.bucket_size
-            else:
-                suggested_communication_unit_size = 1_000_000_000
 
             # Cap to 1B elements.
             suggested_communication_unit_size = max(
diff --git a/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py b/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py
index bdf480d867b..c8116150d52 100644
--- a/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py
+++ b/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py
@@ -34,14 +34,7 @@
 from torch.distributed.tensor.device_mesh import _mesh_resources
 
 from .uneven_dtensor import update_uneven_dtensor_chunk_metadata, validate_uneven_dtensor
-from .utils import (
-    _MODEL_PARALLEL_RNG_TRACKER_NAME,
-    FSDPDistributedIndex,
-    get_global_memory_buffer,
-    get_mcore_tensor_parallel_partition_dim,
-    is_mcore_tensor_model_parallel,
-    is_mcore_tensor_parallel_duplicated,
-)
+from .utils import _MODEL_PARALLEL_RNG_TRACKER_NAME, FSDPDistributedIndex, get_global_memory_buffer
 
 logger = logging.getLogger(__name__)
 
@@ -1306,7 +1299,7 @@ def _does_param_require_new_bucket(param):
             and policy.data_parallel_sharding_strategy != "no_shard"
         )
 
-    is_expert_parameter = lambda n, p: ".experts." in n
+    is_expert_parameter = lambda p: not getattr(p, "allreduce", True)
 
     # Step 1: Group the parameters according to their execution order and attributes.
     # FSDP unit module parameters are split into multiple parameter sub-groups.
@@ -1320,7 +1313,7 @@ def _does_param_require_new_bucket(param):
                 if is_float8tensor(param) or meta_device_init_fp8_params.get(name, False)
                 else param.dtype
             ),
-            is_expert_param=is_expert_parameter(name, param),
+            is_expert_param=is_expert_parameter(param),
             requires_grad=param.requires_grad,
             fsdp_unit_id=None,
         )
@@ -2264,10 +2257,6 @@ def _reset_parameters(self, old_params, new_params):
             self.param_to_direct_module[new_param] = self.param_to_direct_module[old_param]
             del self.param_to_direct_module[old_param]
 
-            for tp_attr in ["_mcore_tp", "_tp_partition_dim", "_tp_duplicated"]:
-                if getattr(old_param, tp_attr, None) is not None:
-                    setattr(new_param, tp_attr, getattr(old_param, tp_attr))
-
         for item_id, p in enumerate(self.params):
             if p in param_map:
                 new_p = param_map[p]
@@ -2351,7 +2340,6 @@ def _init_distributed_params(self):
                         is_expert_param=pg.is_expert_param,
                         run_check=True,
                         update_uneven_dtensor_chunk_meta=True,
-                        force_sync_tp_duplicated_param=True,
                     )
                     dist_main_weight[param_name] = dist_param
                 elif wbuf:
@@ -2363,7 +2351,6 @@ def _init_distributed_params(self):
                         is_expert_param=pg.is_expert_param,
                         run_check=True,
                         update_uneven_dtensor_chunk_meta=True,
-                        force_sync_tp_duplicated_param=True,
                     )
                     dist_main_weight[param_name] = dist_param
                 else:
@@ -2378,7 +2365,6 @@ def _init_distributed_params(self):
                         is_expert_param=pg.is_expert_param,
                         run_check=True,
                         update_uneven_dtensor_chunk_meta=False,
-                        force_sync_tp_duplicated_param=True,
                     )
                     dist_main_weight[param_name] = dist_param
 
@@ -2413,9 +2399,6 @@ def set_param_attribute():
                             "partition_dim",
                             "partition_stride",
                             "is_embedding_or_output_parameter",
-                            "_mcore_tp",
-                            "_tp_duplicated",
-                            "_tp_partition_dim",
                         ]:
                             if hasattr(orig_param, attr_name):
                                 setattr(param, attr_name, getattr(orig_param, attr_name))
@@ -3563,9 +3546,7 @@ def to_local_if_dtensor(tensor):
     return tensor
 
 
-def _get_fsdp_tensor_spec(
-    param, dist_index: FSDPDistributedIndex, is_sharded_param, is_expert_param
-):
+def _get_fsdp_tensor_spec(param, dist_index: FSDPDistributedIndex, is_sharded_param):
     """
     Get the DeviceMesh for the parameter and modify the placement for Megatron-FSDP.
     """
@@ -3576,7 +3557,7 @@ def _get_fsdp_tensor_spec(
         dtensor_mesh = getattr(dtensor_spec, "mesh", None)
 
         # Validate that the DTensor root mesh is identical to the Megatron-FSDP device mesh.
-        megatron_fsdp_global_mesh = dist_index.get_root_mesh(is_expert_parallel=is_expert_param)
+        megatron_fsdp_global_mesh = dist_index.get_root_mesh()
         dtensor_global_mesh = _mesh_resources.get_root_mesh(dtensor_mesh)
         # FIXME(boxiangw): add or megatron_fsdp_global_mesh != dtensor_global_mesh:
         # _mesh_resources.get_root_mesh(dtensor_mesh) is not getting the correct root mesh
@@ -3621,7 +3602,7 @@ def _get_fsdp_tensor_spec(
             placements = [Shard(0), dtensor_placement]
             shard_order = [1, 0]
 
-        device_mesh = dist_index.get_submesh(mesh_dim_names, is_expert_parallel=is_expert_param)
+        device_mesh = dist_index.get_submesh(mesh_dim_names)
         if shard_order is not None:
             setattr(device_mesh, "_shard_order", shard_order)
 
@@ -3646,7 +3627,7 @@ def _get_fsdp_tensor_spec(
     else:
         placements = [Shard(0)]
 
-    device_mesh = dist_index.get_submesh(mesh_dim_names, is_expert_parallel=is_expert_param)
+    device_mesh = dist_index.get_submesh(mesh_dim_names)
     if shard_order is not None:
         setattr(device_mesh, "_shard_order", shard_order)
 
@@ -3661,7 +3642,6 @@ def make_fsdp_dtensor(
     is_expert_param: bool = False,
     run_check: bool = False,
     update_uneven_dtensor_chunk_meta: bool = False,
-    force_sync_tp_duplicated_param: bool = False,
 ):
     """
     Creates a distributed tensor (DTensor) from a local tensor with support for
@@ -3740,39 +3720,38 @@ def make_fsdp_dtensor(
     orig_param = param
 
     # Handle tensor model parallel specific logic
-    if is_mcore_tensor_model_parallel(param):
+    if getattr(param, "tensor_model_parallel", False):
         # Ensure parameter is not already a DTensor
         assert not isinstance(param, DTensor), (
-            "[Megatron-FSDP] Parameter is already a DTensor, yet tensor_model_parallel " "is True."
+            "[Megatron-FSDP] Parameter is already a DTensor, yet tensor_model_parallel "
+            "is True. Check usage."
         )
 
-        tp_mesh = dist_index.get_submesh(dist_index.tp_dim, is_expert_parallel=is_expert_param)
-        global_shape = list(param.shape)
+        # Validate M-Core TP attributes
+        assert hasattr(
+            param, "partition_dim"
+        ), "[Megatron-FSDP] tensor_model_parallel param missing 'partition_dim'."
+        assert hasattr(
+            param, "partition_stride"
+        ), "[Megatron-FSDP] tensor_model_parallel param missing 'partition_stride'."
+        assert (
+            param.partition_stride == 1
+        ), "[Megatron-FSDP] Only partition_stride=1 is currently supported for "
+        "tensor_model_parallel."
+
+        tp_dim = param.partition_dim
+        tp_mesh = dist_index.get_submesh(dist_index.tp_dim)
+
+        # Adjust shape for global dimension
         if tp_mesh.mesh.numel() > 1:
-            if is_mcore_tensor_parallel_duplicated(param):
-                placements = [Replicate()]
-                if force_sync_tp_duplicated_param:
-                    if local_tensor.numel() > 0:
-                        torch.distributed.broadcast(
-                            local_tensor, group=tp_mesh.get_group(), group_src=0
-                        )
-                elif run_check:
-                    # TODO: Implement consistency check for duplicated TP parameters
-                    pass
-            else:
-                tp_dim = get_mcore_tensor_parallel_partition_dim(param)
-                assert tp_dim is not None, (
-                    "[Megatron-FSDP] Parameter is not tensor model parallel, "
-                    "yet tensor_model_parallel is True."
-                )
-                placements = [Shard(tp_dim)]
-                global_shape[tp_dim] *= tp_mesh.mesh.numel()
+            global_shape = list(param.shape)
+            global_shape[tp_dim] *= tp_mesh.mesh.numel()
 
             # Construct TP-sharded DTensor using Megatron-style placement
             param = DTensor.from_local(
-                local_tensor=local_tensor,
+                local_tensor=param,
                 device_mesh=tp_mesh,
-                placements=placements,
+                placements=[Shard(tp_dim)],
                 run_check=run_check,
                 shape=global_shape,
                 stride=torch.empty(global_shape).stride(),
@@ -3780,7 +3759,7 @@ def make_fsdp_dtensor(
 
     # Get FSDP-configured mesh and placements from provided param
     device_mesh, placements = _get_fsdp_tensor_spec(
-        param, dist_index, is_sharded_param=is_sharded_param, is_expert_param=is_expert_param
+        param, dist_index, is_sharded_param=is_sharded_param
     )
 
     # Reshape local tensor for sharded layouts beyond 1D
diff --git a/megatron/core/distributed/fsdp/src/megatron_fsdp/uneven_dtensor.py b/megatron/core/distributed/fsdp/src/megatron_fsdp/uneven_dtensor.py
index 490d80c0f21..523d8fae333 100644
--- a/megatron/core/distributed/fsdp/src/megatron_fsdp/uneven_dtensor.py
+++ b/megatron/core/distributed/fsdp/src/megatron_fsdp/uneven_dtensor.py
@@ -365,9 +365,7 @@ def _assemble_full_tensor_from_uneven_chunks(
 
     # Wrap into a replicated DTensor and return
     return DTensor.from_local(
-        full_tensor,
-        placements=[Replicate()] * len(dtensor.placements),
-        device_mesh=dtensor.device_mesh,
+        full_tensor, placements=[Replicate()], device_mesh=dtensor.device_mesh
     )
 
 
diff --git a/megatron/core/distributed/fsdp/src/megatron_fsdp/utils.py b/megatron/core/distributed/fsdp/src/megatron_fsdp/utils.py
index b94a332bb0d..1dfe08b90f4 100644
--- a/megatron/core/distributed/fsdp/src/megatron_fsdp/utils.py
+++ b/megatron/core/distributed/fsdp/src/megatron_fsdp/utils.py
@@ -675,7 +675,6 @@ def __init__(
         tp_dim: Optional[str] = None,
         hybrid_fsdp_group: Optional[torch.distributed.ProcessGroup] = None,
         hsdp_outer_dp_shard: bool = False,
-        expt_device_mesh: Optional[DeviceMesh] = None,
     ):
         """
         Args:
@@ -692,8 +691,6 @@ def __init__(
                 in hybrid FSDP. Specifying outer sharding will lift the bucket sharding
                 coordinate system to flattened ranks of (dp_shard, dp_outer) instead of
                 just sharding across dp_shard ranks and replicating across dp_outer ranks.
-            expt_device_mesh (Optional[DeviceMesh]): The expert parallel device mesh
-                to use for the DistributedIndex.
         """
         # Device mesh arguments.
         self.device_mesh = device_mesh
@@ -704,11 +701,6 @@ def __init__(
         self.use_hybrid_fsdp = dp_outer_dim is not None
         # Helper flag to denote if we are outer-sharding in hybrid FSDP.
         self.hsdp_outer_dp_shard = hsdp_outer_dp_shard
-        self.expt_device_mesh = expt_device_mesh
-
-        # Handling the situation where M-Core MoE EP=1
-        if self.expt_device_mesh is None:
-            self.expt_device_mesh = device_mesh
 
         # Hybrid FSDP Process Groups
         # Retrieve the FSDP process group from the DeviceMesh.
@@ -727,14 +719,6 @@ def __init__(
         # combination of the outer-FSDP and FSDP process groups.
         self.hybrid_fsdp_group = hybrid_fsdp_group
 
-        # Retrieve the expert parallel process groups from the DeviceMesh.
-        self.expt_fsdp_group = (
-            self.expt_device_mesh[self.dp_shard_dim].get_group()
-            if self.expt_device_mesh is not None
-            and contains_submesh(self.expt_device_mesh, self.dp_shard_dim)
-            else None
-        )
-
         """
         Store a persistent reference to the core device meshes that back Megatron-FSDP.
         This is necessary because _MeshEnv (_mesh_resources) may not persist:
@@ -748,33 +732,26 @@ def __init__(
         FIXME(@cspades): Identify the root cause of this behavior.
         """
         self.mesh_library = {}
-
-        def register_submesh(device_mesh, submesh, is_expert_parallel):
-            """Register a submesh with identifier: (*submesh, is_expert_parallel)
-            in the mesh library."""
-            if contains_submesh(device_mesh, submesh):
-                submesh_identifier = tuple(list(submesh) + [is_expert_parallel])
-                self.mesh_library[submesh_identifier] = device_mesh[submesh]
-
-        # Define common submesh patterns
+        # TP Mesh
         tp_submesh = (self.tp_dim,)
+        if contains_submesh(self.device_mesh, tp_submesh):
+            self.mesh_library[tp_submesh] = self.device_mesh[tp_submesh]
+        # HSDP-TP Mesh
         hsdp_tp_submesh = (self.dp_outer_dim, self.dp_shard_dim, self.tp_dim)
+        if contains_submesh(self.device_mesh, hsdp_tp_submesh):
+            self.mesh_library[hsdp_tp_submesh] = self.device_mesh[hsdp_tp_submesh]
+        # FSDP-TP Mesh
         fsdp_tp_submesh = (self.dp_shard_dim, self.tp_dim)
+        if contains_submesh(self.device_mesh, fsdp_tp_submesh):
+            self.mesh_library[fsdp_tp_submesh] = self.device_mesh[fsdp_tp_submesh]
+        # HSDP Mesh
         hsdp_submesh = (self.dp_outer_dim, self.dp_shard_dim)
+        if contains_submesh(self.device_mesh, hsdp_submesh):
+            self.mesh_library[hsdp_submesh] = self.device_mesh[hsdp_submesh]
+        # FSDP Mesh
         fsdp_submesh = (self.dp_shard_dim,)
-
-        # Register non-EP submeshes
-        register_submesh(self.device_mesh, tp_submesh, False)
-        register_submesh(self.device_mesh, hsdp_tp_submesh, False)
-        register_submesh(self.device_mesh, fsdp_tp_submesh, False)
-        register_submesh(self.device_mesh, hsdp_submesh, False)
-        register_submesh(self.device_mesh, fsdp_submesh, False)
-
-        # Register EP submeshes
-        if self.expt_device_mesh is not None:
-            register_submesh(self.expt_device_mesh, tp_submesh, True)
-            register_submesh(self.expt_device_mesh, fsdp_tp_submesh, True)
-            register_submesh(self.expt_device_mesh, fsdp_submesh, True)
+        if contains_submesh(self.device_mesh, fsdp_submesh):
+            self.mesh_library[fsdp_submesh] = self.device_mesh[fsdp_submesh]
 
         # Validate FSDP arguments.
         if self.fsdp_group is None:
@@ -799,54 +776,36 @@ def register_submesh(device_mesh, submesh, is_expert_parallel):
                     "process groups or sub-meshes."
                 )
 
-    def get_submesh(
-        self, mesh_dim_names: str | Sequence[str], is_expert_parallel: bool = False
-    ) -> DeviceMesh:
+    def get_submesh(self, mesh_dim_names: str | Sequence[str]) -> DeviceMesh:
         """
-        Retrieve an Megatron-FSDP-registered submesh by name(s).
+        Retrieve an Megatron-FSDP-registered sub-mesh by name(s).
         """
         if isinstance(mesh_dim_names, str):
             mesh_dim_names = (mesh_dim_names,)
-
-        # Construct submesh identifier: (*mesh_dim_names, is_expert_parallel)
-        submesh_identifier = tuple(list(mesh_dim_names) + [is_expert_parallel])
-
-        # Retrieve the submesh from the mesh library
-        device_submesh = self.mesh_library.get(submesh_identifier, None)
-
+        # Search for the sub-mesh in the mesh library.
+        device_submesh = self.mesh_library.get(tuple(mesh_dim_names), None)
         if device_submesh is None:
-            # Warn about not specifying tp_dim for layers or frameworks that depend on this.
-            if self.tp_dim is None and not is_expert_parallel:
+            if self.tp_dim is None:
+                # Warn about not specifying tp_dim for
+                # layers or frameworks that depend on this.
                 logger.warning(
-                    "[FSDPDistributedIndex] Note: For TransformerEngine, or "
-                    "other machine learning frameworks like Megatron that assume "
-                    "TP=1, you must specify tp_dim to use Megatron-FSDP. "
-                    "Create a trivial TP dimension by setting the TP dimension size "
+                    "[FSDPDistributedIndex] Note: For TransformerEngine, or other machine learning "
+                    "frameworks like Megatron that assume TP=1, you must specify tp_dim to use "
+                    "Megatron-FSDP. Create a trivial TP dimension by setting the TP dimension size "
                     "to 1 in the DeviceMesh.\n"
                     f"DeviceMesh: {self.device_mesh}"
                 )
-            elif self.tp_dim is None and is_expert_parallel:
-                logger.warning(
-                    "[FSDPDistributedIndex] Note: For TransformerEngine, or "
-                    "other machine learning frameworks like Megatron that assume "
-                    "ETP=1, you must specify tp_dim to use Megatron-FSDP. "
-                    "Create a trivial ETP dimension by setting the ETP dimension size "
-                    "to 1 in the DeviceMesh.\n"
-                    f"DeviceMesh: {self.expt_device_mesh}"
-                )
-
             raise ValueError(
-                f"[FSDPDistributedIndex][get_submesh] No submesh with "
-                f"mesh_dim_names={mesh_dim_names}, is_expert_parallel={is_expert_parallel} "
-                f"has been registered with Megatron-FSDP."
+                f"[FSDPDistributedIndex][get_submesh] No sub-mesh with "
+                f"mesh_dim_names={mesh_dim_names} has been registered with Megatron-FSDP."
             )
-
         return device_submesh
 
     def get_dp_group(self, is_expert_parallel: bool = False) -> ProcessGroup:
         """Get the data parallel process group."""
         if is_expert_parallel:
-            return self.expt_fsdp_group
+            # Expert parallel is not supported
+            return None
         if self.use_hybrid_fsdp:
             return self.hybrid_fsdp_group
         return self.fsdp_group
@@ -854,7 +813,8 @@ def get_dp_group(self, is_expert_parallel: bool = False) -> ProcessGroup:
     def get_fsdp_group(self, is_expert_parallel: bool = False) -> ProcessGroup:
         """Get the FSDP process group."""
         if is_expert_parallel:
-            return self.expt_fsdp_group
+            # Expert parallel is not supported
+            return None
         return self.fsdp_group
 
     def get_outer_fsdp_group(self) -> ProcessGroup:
@@ -866,7 +826,7 @@ def get_outer_fsdp_group(self) -> ProcessGroup:
     def get_root_mesh(self, is_expert_parallel: bool = False) -> DeviceMesh:
         """Get the device mesh."""
         if is_expert_parallel:
-            return self.expt_device_mesh
+            raise NotImplementedError("Expert parallel is not supported in Megatron-FSDP.")
         return self.device_mesh
 
     def get_logical_hybrid_fsdp_rank(self):
@@ -964,29 +924,3 @@ def create_updated_function_signature(original_function, **extended_kwargs: dict
 
     # Return the updated function signature.
     return inspect.Signature(params)
-
-
-def is_mcore_tensor_model_parallel(param: torch.Tensor) -> bool:
-    """
-    Check if the given parameter is Megatron-Core tensor model parallel.
-    """
-    return getattr(param, "_mcore_tp", False) or getattr(param, "tensor_model_parallel", False)
-
-
-def is_mcore_tensor_parallel_duplicated(param: torch.Tensor) -> bool:
-    """
-    Check if the given parameter is Megatron-Core tensor model parallel and duplicated.
-    """
-    return getattr(param, "_tp_duplicated", False)
-
-
-def get_mcore_tensor_parallel_partition_dim(param: torch.Tensor) -> Optional[int]:
-    """
-    Get the partition dimension for a Megatron-Core tensor model parallel parameter.
-    """
-    if is_mcore_tensor_model_parallel(param):
-        if hasattr(param, "_tp_partition_dim"):
-            return param._tp_partition_dim
-        else:
-            return param.partition_dim
-    return None
diff --git a/megatron/core/models/common/embeddings/yarn_rotary_pos_embedding.py b/megatron/core/models/common/embeddings/yarn_rotary_pos_embedding.py
index 455a7757d28..507472f789f 100644
--- a/megatron/core/models/common/embeddings/yarn_rotary_pos_embedding.py
+++ b/megatron/core/models/common/embeddings/yarn_rotary_pos_embedding.py
@@ -130,9 +130,9 @@ def forward(self, max_seq_len: int, offset: int = 0, packed_seq: bool = False) -
             self.original_max_position_embeddings,
             self.correction_range_round_to_int,
         )
-        inv_freq_mask = 1.0 - _yarn_linear_ramp_mask(
-            low, high, self.dim // 2, device=self.inv_freq_extra.device
-        ).to(dtype=torch.float32)
+        inv_freq_mask = 1.0 - _yarn_linear_ramp_mask(low, high, self.dim // 2).to(
+            device=self.inv_freq_extra.device, dtype=torch.float32
+        )
         inv_freq = self.inv_freq_inter * (1 - inv_freq_mask) + self.inv_freq_extra * inv_freq_mask
 
         seq = (
@@ -211,11 +211,11 @@ def _yarn_find_correction_range(
     return max(low, 0), min(high, dim - 1)  # Clamp values just in case
 
 
-def _yarn_linear_ramp_mask(min: float, max: float, dim: int, device: torch.device) -> Tensor:
+def _yarn_linear_ramp_mask(min: float, max: float, dim: int) -> Tensor:
     if min == max:
         max += 0.001  # Prevent singularity
 
-    linear_func = (torch.arange(dim, dtype=torch.float32, device=device) - min) / (max - min)
+    linear_func = (torch.arange(dim, dtype=torch.float32) - min) / (max - min)
     ramp_func = torch.clamp(linear_func, 0, 1)
     return ramp_func
 
diff --git a/megatron/core/optimizer/__init__.py b/megatron/core/optimizer/__init__.py
index c254b2f6882..307538fad22 100644
--- a/megatron/core/optimizer/__init__.py
+++ b/megatron/core/optimizer/__init__.py
@@ -34,7 +34,6 @@
 from megatron.core import parallel_state
 from megatron.core.optimizer.cpu_offloading.hybrid_optimizer import HybridDeviceOptimizer
 from megatron.core.process_groups_config import ProcessGroupCollection
-from megatron.core.transformer.fsdp_dtensor_checkpoint import get_global_unique_param_name
 
 from ..distributed.param_and_grad_buffer import _ParamAndGradBuffer
 from ..transformer.module import MegatronModule
@@ -482,7 +481,6 @@ def get_megatron_optimizer(
     use_gloo_process_groups: bool = True,
     default_skip_embedding_weight_decay: bool = False,
     pg_collection: Optional[ProcessGroupCollection] = None,
-    dump_param_to_param_group_map: Optional[str] = None,
 ) -> MegatronOptimizer:
     """Retrieve the Megatron optimizer for model chunks.
 
@@ -504,7 +502,6 @@ def get_megatron_optimizer(
             This is useful if you do not want embeddings to shrink to zero in training
             as recommended in https://arxiv.org/abs/2312.16903
         pg_collection: Optional unified process group for distributed training.
-        dump_param_to_param_group_map (Optional[str]): path to dump parameter to param group map.
 
     Returns:
         Instance of MegatronOptimizer.
@@ -582,9 +579,6 @@ def get_megatron_optimizer(
 
         return ChainedOptimizer(optimizers)
 
-    if dump_param_to_param_group_map is not None:
-        param_to_param_group = {}
-        param_group_id = 0
     for dense_model_chunks, overlap_param_gather_with_optimizer_step in zip(
         all_dense_model_chunks, overlap_param_gather_with_optimizer_step_flags
     ):
@@ -603,12 +597,6 @@ def get_megatron_optimizer(
             model_chunk.overlap_param_gather_with_optimizer_step = (
                 overlap_param_gather_with_optimizer_step
             )
-        if dump_param_to_param_group_map is not None:
-            for param_group in param_groups:
-                for param in param_group["params"]:
-                    param_name = get_global_unique_param_name(model_chunks, param)
-                    param_to_param_group[param_name] = param_group_id
-                param_group_id += 1
 
         # Pass Gloo process groups into optimizer only if needed.
         optimizers.append(
@@ -638,12 +626,6 @@ def get_megatron_optimizer(
         buffer_name='expert_parallel_buffers',
         default_skip_embedding_weight_decay=default_skip_embedding_weight_decay,
     )
-    if dump_param_to_param_group_map is not None:
-        for param_group in moe_param_groups:
-            for param in param_group["params"]:
-                param_name = get_global_unique_param_name(model_chunks, param)
-                param_to_param_group[param_name] = param_group_id
-            param_group_id += 1
     if len(moe_param_groups) > 0:
         expt_model_parallel_rank = get_pg_rank(expt_tp_pp_group)
         # Pass Gloo process groups into optimizer only if needed.
@@ -666,9 +648,4 @@ def get_megatron_optimizer(
             )
         )
 
-    if dump_param_to_param_group_map is not None:
-        torch.distributed.checkpoint.save(
-            state_dict=param_to_param_group, checkpoint_id=dump_param_to_param_group_map
-        )
-
     return ChainedOptimizer(optimizers)
diff --git a/megatron/core/optimizer/distrib_optimizer.py b/megatron/core/optimizer/distrib_optimizer.py
index 8b4740516e2..2925edcce60 100644
--- a/megatron/core/optimizer/distrib_optimizer.py
+++ b/megatron/core/optimizer/distrib_optimizer.py
@@ -47,7 +47,6 @@
 from ..dist_checkpointing.utils import extract_sharded_tensors_and_factories
 from ..distributed.param_and_grad_buffer import _ParamAndGradBuffer, partition_buckets
 from ..fp8_utils import dequantize_fp8_tensor, is_float8tensor, quantize_param_shard
-from ..transformer.fsdp_dtensor_checkpoint import handle_experts_in_state_dict
 from ..transformer.module import MegatronModule
 from .grad_scaler import MegatronGradScaler
 from .optimizer import MixedPrecisionOptimizer, _zero_grad_group_helper, param_group_identifier_keys
@@ -1153,7 +1152,6 @@ def _param_name(self, param: torch.nn.Parameter) -> str:
                         "Ensure that each model chunk has unique parameter names."
                     )
                 name_to_param.update(_name_to_param)
-            name_to_param = handle_experts_in_state_dict(name_to_param)
             self.param_to_name = {param: name for name, param in name_to_param.items()}
         assert (
             param in self.param_to_name
diff --git a/megatron/core/transformer/fsdp_dtensor_checkpoint.py b/megatron/core/transformer/fsdp_dtensor_checkpoint.py
index 9ef3f1f1b82..dad1947a183 100644
--- a/megatron/core/transformer/fsdp_dtensor_checkpoint.py
+++ b/megatron/core/transformer/fsdp_dtensor_checkpoint.py
@@ -12,160 +12,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import logging
-import re
-
 import torch
-import torch.distributed as dist
-from torch.distributed.checkpoint import default_planner
-
-logger = logging.getLogger(__name__)
 
 try:
-    from torch.distributed import DeviceMesh
-    from torch.distributed._tensor import DTensor
-    from torch.distributed.checkpoint.metadata import TensorStorageMetadata
-    from torch.distributed.tensor.placement_types import Replicate, Shard
-
     from megatron.core.distributed.fsdp.src.megatron_fsdp.param_and_grad_buffer import (
         make_fsdp_dtensor,
     )
-    from megatron.core.distributed.fsdp.src.megatron_fsdp.uneven_dtensor import (
-        gather_uneven_dtensor_to_full_tensor,
-    )
-    from megatron.core.distributed.fsdp.src.megatron_fsdp.utils import (
-        get_mcore_tensor_parallel_partition_dim,
-        is_mcore_tensor_model_parallel,
-    )
 
     HAVE_MEGATRON_FSDP = True
 except ImportError:
     HAVE_MEGATRON_FSDP = False
 
-from megatron.core import parallel_state
 from megatron.core.tensor_parallel.layers import copy_tensor_model_parallel_attributes
-from megatron.core.transformer.transformer_layer import TransformerLayer
-
-
-def get_ep_layer_offset():
-    """
-    Get the expert layer offset for the current model.
-    """
-    from megatron.training.global_vars import get_args
-
-    args = get_args()
-    ep_size = parallel_state.get_expert_model_parallel_world_size()
-    ep_rank = parallel_state.get_expert_model_parallel_rank()
-    num_local_experts = args.num_experts // ep_size if args.num_experts else 0
-    local_expert_offset = ep_rank * num_local_experts
-
-    return local_expert_offset
-
-
-def get_total_num_experts():
-    """
-    Get the total number of experts for the current model.
-    """
-    from megatron.training.global_vars import get_args
-
-    args = get_args()
-    return args.num_experts if args.num_experts else 0
-
-
-def get_expert_index_from_key(key):
-    """Extract expert index from various expert key formats.
-
-    Supported formats:
-    - GroupedMLP: 'mlp.experts.linear_fc1.weight0', 'mlp.experts.linear_fc2.weight0'
-    - SequentialMLP: 'mlp.experts.local_experts.0.linear_fc1.weight',
-        'mlp.experts.local_experts.0.linear_fc2.weight'
-
-    Returns:
-        int: Expert index if found, None otherwise.
-    """
-    # GroupedMLP: index is at the end after 'weight'
-    if 'mlp.experts.linear_fc1.weight' in key or 'mlp.experts.linear_fc2.weight' in key:
-        m = re.search(r'^.*\.mlp\.experts\.linear_fc\d\.weight(\d+)', key)
-        assert m, f"Failed to parse expert index from key: {key}"
-        return int(m.group(1))
-    # SequentialMLP: index is between 'local_experts.' and next '.'
-    elif 'mlp.experts.local_experts' in key:
-        m = re.search(r'^.*\.mlp\.experts\.local_experts\.(\d+)', key)
-        assert m, f"Failed to parse expert index from key: {key}"
-        return int(m.group(1))
-    return None
-
-
-def handle_experts_in_state_dict(state_dict):
-    """
-    Rewrite expert keys in state dict.
-    """
-    local_expert_start = get_ep_layer_offset()
-    local_expert_end = get_total_num_experts()
-
-    def should_keep_expert_key(expert_index):
-        """Determine if this rank should keep this expert key based on expert index"""
-        if expert_index is None:
-            # If we can't determine expert index, keep the key (non-expert weights)
-            return True
-
-        # Check if this expert belongs to this rank
-        return local_expert_start <= expert_index < local_expert_end
-
-    def replace_expert_index_in_key(key, expert_index, state_dict):
-        """Replace expert index in key with new index corresponding to the current rank"""
-        new_expert_index = expert_index + local_expert_start
-        # GroupedMLP: 'mlp.experts.linear_fc1.weight0', 'mlp.experts.linear_fc2.weight0'
-        if 'mlp.experts.linear_fc1.weight' in key or 'mlp.experts.linear_fc2.weight' in key:
-            # Handle SwiGLU weight{idx}_w and weight{idx}_v format
-            if key.endswith('_w') or key.endswith('_v'):
-                suffix = key[-2:]  # '_w' or '_v'
-                new_key = key.replace(
-                    f'weight{expert_index}{suffix}', f'weight{new_expert_index}{suffix}'
-                )
-            # Handle regular weight{idx} format
-            else:
-                new_key = key.replace(f'weight{expert_index}', f'weight{new_expert_index}')
-        # SequentialMLP: index is between 'local_experts.' and next '.'
-        elif 'mlp.experts.local_experts' in key:
-            new_key = key.replace(
-                f'local_experts.{expert_index}.', f'local_experts.{new_expert_index}.'
-            )
-        else:
-            raise ValueError(f"Unexpected expert key format: {key}")
-
-        state_dict[new_key] = state_dict[key]
-        del state_dict[key]
-
-    # Process model state dict
-    state_dict = state_dict.copy()
-    for key in list(state_dict.keys()):
-        expert_index = get_expert_index_from_key(key)
-        if not should_keep_expert_key(expert_index):
-            replace_expert_index_in_key(key, expert_index, state_dict)
-
-    return state_dict
-
-
-def expert_param_local_key(key):
-    """Get the module parameter corresponding to the key."""
-    local_expert_offset = get_ep_layer_offset()
-    expert_index = get_expert_index_from_key(key)
-    if expert_index is not None:
-        new_expert_index = expert_index - local_expert_offset
-        # GroupedMLP: 'mlp.experts.linear_fc1.weight0', 'mlp.experts.linear_fc2.weight0'
-        if 'mlp.experts.linear_fc1.weight' in key or 'mlp.experts.linear_fc2.weight' in key:
-            new_key = key.replace(f'weight{expert_index}', f'weight{new_expert_index}')
-        # SequentialMLP: index is between 'local_experts.' and next '.'
-        elif 'mlp.experts.local_experts' in key:
-            new_key = key.replace(
-                f'local_experts.{expert_index}.', f'local_experts.{new_expert_index}.'
-            )
-        else:
-            raise ValueError(f"Unexpected expert key format: {key}")
-        key = new_key
-
-    return key
 
 
 def handle_swiglu_in_state_dict(model, model_state_dict, optimizer_state_dict):
@@ -185,29 +43,7 @@ def intersection(s1, s2):
     def offset_slice(s, offset):
         return slice(s.start + offset, s.stop + offset)
 
-    def is_swiglu_key(key):
-        """
-        Check if this key should be handled as SwiGLU linear_fc1 weight or bias.
-        """
-        # Non-expert MLP: 'mlp.linear_fc1.weight', 'mlp.linear_fc1.bias'
-        # GroupedMLP: 'mlp.experts.linear_fc1.weight0', 'mlp.experts.linear_fc1.bias0'
-        # SequentialMLP: 'mlp.experts.local_experts.0.linear_fc1.weight',
-        #   'mlp.experts.local_experts.0.linear_fc1.bias'
-        return any(
-            re.search(pat, key)
-            for pat in [
-                r"(.*)\.mlp\.linear_fc1\.weight$",
-                r"(.*)\.mlp\.linear_fc1\.bias$",
-                r"(.*)\.mlp\.experts\.linear_fc1\.weight(\d+)$",
-                r"(.*)\.mlp\.experts\.linear_fc1\.bias(\d+)$",
-                r"(.*)\.mlp\.experts\.local_experts\.(\d+)\.linear_fc1\.weight$",
-                r"(.*)\.mlp\.experts\.local_experts\.(\d+)\.linear_fc1\.bias$",
-                r"(.*)\.mlp\.shared_experts\.linear_fc1\.weight$",
-                r"(.*)\.mlp\.shared_experts\.linear_fc1\.bias$",
-            ]
-        )
-
-    def split_swiglu_linear_fc1(data, dist_param, swiglu_shard_axis, is_expert_param):
+    def split_swiglu_linear_fc1(data, dist_param, swiglu_shard_axis):
         """
         Split the SWiGLU linear_fc1 parameter into two parts: weight_w and weight_v.
         """
@@ -219,9 +55,7 @@ def split_swiglu_linear_fc1(data, dist_param, swiglu_shard_axis, is_expert_param
         fsdp_slice = dist_param.megatron_fsdp_slice
         megatron_fsdp_dist_index = dist_param.megatron_fsdp_dist_index
 
-        tp_mesh = megatron_fsdp_dist_index.get_submesh(
-            [megatron_fsdp_dist_index.tp_dim], is_expert_parallel=is_expert_param
-        )
+        tp_mesh = megatron_fsdp_dist_index.get_submesh([megatron_fsdp_dist_index.tp_dim])
         data_size = data.numel() // tp_mesh.mesh.numel()
         w_slice = slice(0, data_size // 2)
         v_slice = slice(data_size // 2, data_size)
@@ -241,9 +75,8 @@ def split_swiglu_linear_fc1(data, dist_param, swiglu_shard_axis, is_expert_param
         # Fake parameters w and v are used to provide the correct parameter
         # shape and Tensor-Parallelism information.
         per_tp_rank_shape = list(data.shape)
-        if is_mcore_tensor_model_parallel(dist_param):
-            tp_dim = get_mcore_tensor_parallel_partition_dim(dist_param)
-            assert tp_dim is not None, "Tensor model parallel dimension not found"
+        if getattr(dist_param, "tensor_model_parallel", False):
+            tp_dim = dist_param.partition_dim
             per_tp_rank_shape[tp_dim] //= tp_mesh.mesh.numel()
         linear_fc1_meta = torch.empty(*per_tp_rank_shape, device="meta")
         w_meta, v_meta = torch.chunk(linear_fc1_meta, 2, dim=swiglu_shard_axis)
@@ -254,7 +87,6 @@ def split_swiglu_linear_fc1(data, dist_param, swiglu_shard_axis, is_expert_param
             weight_w.data,
             w_meta,
             dist_index=megatron_fsdp_dist_index,
-            is_expert_param=is_expert_param,
             run_check=True,
             update_uneven_dtensor_chunk_meta=True,
         )
@@ -262,21 +94,16 @@ def split_swiglu_linear_fc1(data, dist_param, swiglu_shard_axis, is_expert_param
             weight_v.data,
             v_meta,
             dist_index=megatron_fsdp_dist_index,
-            is_expert_param=is_expert_param,
             run_check=True,
             update_uneven_dtensor_chunk_meta=True,
         )
         return weight_w, weight_v
 
-    model_state_dict = model_state_dict.copy()
     for key in list(model_state_dict.keys()):
-        if is_swiglu_key(key):
+        if key.endswith('mlp.linear_fc1.weight') or key.endswith('mlp.linear_fc1.bias'):
             dist_param = model.get_parameter(f"module.{key}")
             weight_w, weight_v = split_swiglu_linear_fc1(
-                model_state_dict[key],
-                dist_param,
-                swiglu_shard_axis=0,
-                is_expert_param='mlp.experts' in key,
+                model_state_dict[key], dist_param, swiglu_shard_axis=0
             )
 
             # Update the model state dict with the new keys
@@ -284,32 +111,26 @@ def split_swiglu_linear_fc1(data, dist_param, swiglu_shard_axis, is_expert_param
             model_state_dict[f"{key}_v"] = weight_v
             del model_state_dict[key]
 
-    if optimizer_state_dict is not None:
-        optimizer_state_dict = optimizer_state_dict.copy()
-        if len(optimizer_state_dict["state"]) != 0:
-            opt_state_dict = optimizer_state_dict["state"]
-            new_opt_state_dict = {}
-            for key in list(opt_state_dict.keys()):
-                # Only process SWIGLU keys
-                if not is_swiglu_key(key):
-                    new_opt_state_dict[key] = opt_state_dict[key]
-                    continue
-                new_opt_state_dict[f"{key}_w"] = opt_state_dict[key].copy()
-                new_opt_state_dict[f"{key}_v"] = opt_state_dict[key].copy()
-                for subkey in ["exp_avg", "exp_avg_sq"]:
-                    dist_param = model.get_parameter(expert_param_local_key(key[len("module.") :]))
-                    weight_w, weight_v = split_swiglu_linear_fc1(
-                        opt_state_dict[key][subkey],
-                        dist_param,
-                        swiglu_shard_axis=0,
-                        is_expert_param="mlp.experts" in key,
-                    )
-                    # Update the optimizer state dict with the new keys
-                    new_opt_state_dict[f"{key}_w"][subkey] = weight_w
-                    new_opt_state_dict[f"{key}_v"][subkey] = weight_v
-            optimizer_state_dict["state"] = new_opt_state_dict
+    try:
+        optimizer_state_dict = optimizer_state_dict["state"]
+    except KeyError:
+        optimizer_state_dict = {}
 
-    return model_state_dict, optimizer_state_dict
+    if len(optimizer_state_dict) != 0:
+        for key in list(optimizer_state_dict.keys()):
+            if not (key.endswith('mlp.linear_fc1.weight') or key.endswith('mlp.linear_fc1.bias')):
+                continue
+            optimizer_state_dict[f"{key}_w"] = optimizer_state_dict[key].copy()
+            optimizer_state_dict[f"{key}_v"] = optimizer_state_dict[key].copy()
+            for subkey in ["exp_avg", "exp_avg_sq"]:
+                dist_param = model.get_parameter(key[len("module.") :])
+                weight_w, weight_v = split_swiglu_linear_fc1(
+                    optimizer_state_dict[key][subkey], dist_param, swiglu_shard_axis=0
+                )
+                # Update the optimizer state dict with the new keys
+                optimizer_state_dict[f"{key}_w"][subkey] = weight_w
+                optimizer_state_dict[f"{key}_v"][subkey] = weight_v
+            del optimizer_state_dict[key]
 
 
 def handle_fp8_extra_state_case(model_state_dict):
@@ -341,7 +162,7 @@ def flatten_state_dict(obj, parent_key="", sep="."):
     return items
 
 
-def print_diff_in_state_dicts(state_dict_metadata, load_state_dict, limit=100):
+def print_diff_in_state_dicts(state_dict_metadata, load_state_dict):
     """
     Print the differences between two state dicts: metadata state dict and load state dict.
     This function compares the keys and shapes of the tensors in both dicts.
@@ -351,105 +172,24 @@ def print_diff_in_state_dicts(state_dict_metadata, load_state_dict, limit=100):
     meta_keys = set(state_dict_metadata.keys())
     load_keys = set(load_state_dict.keys())
 
-    only_in_meta = list(meta_keys - load_keys)
-    only_in_load = list(load_keys - meta_keys)
-    in_both = list(meta_keys & load_keys)
+    only_in_meta = meta_keys - load_keys
+    only_in_load = load_keys - meta_keys
+    in_both = meta_keys & load_keys
 
-    logger.info(f"Keys only in checkpoint metadata_state_dict(first {limit}):")
-    for k in sorted(only_in_meta[:limit]):
-        logger.info(f"  {k}")
+    print("Keys only in checkpoint metadata_state_dict:")
+    for k in sorted(only_in_meta):
+        print(f"  {k}")
 
-    logger.info(f"\nKeys only in load_state_dict(first {limit}):")
-    for k in sorted(only_in_load[:limit]):
-        logger.info(f"  {k}")
+    print("\nKeys only in load_state_dict:")
+    for k in sorted(only_in_load):
+        print(f"  {k}")
 
-    logger.info(f"\nKeys in both but with different shapes(first {limit}):")
-    for k in sorted(in_both[:limit]):
+    print("\nKeys in both but with different shapes:")
+    for k in sorted(in_both):
         v_meta = state_dict_metadata[k]
         v_load = load_state_dict[k]
         # If tensors, compare shape; else, compare type/values
         meta_shape = v_meta.size if hasattr(v_meta, "size") else type(v_meta)
         load_shape = v_load.shape if hasattr(v_load, "shape") else type(v_load)
         if meta_shape != load_shape:
-            logger.info(f"  {k}: meta shape={meta_shape}, load shape={load_shape}")
-
-
-def validate_loaded_state_dict(state_dict, checkpoint_path):
-    """
-    Validate the loaded state dict against the expected structure and types.
-    """
-    assert HAVE_MEGATRON_FSDP, "This function requires Megatron-FSDP to be installed."
-
-    # Initialize reader
-    reader = torch.distributed.checkpoint.FileSystemReader(checkpoint_path)
-    metadata = reader.read_metadata()
-    flat_state_dict = flatten_state_dict(state_dict)
-
-    for key, value in flat_state_dict.items():
-        tensor_metadata = metadata.state_dict_metadata[key]
-
-        if not isinstance(tensor_metadata, TensorStorageMetadata):
-            continue
-        if not isinstance(value, DTensor):
-            load_item_dict = {key: torch.empty_like(value)}
-        else:
-            load_item_dict = {
-                key: torch.distributed.tensor.empty(
-                    tensor_metadata.size,
-                    dtype=tensor_metadata.properties.dtype,
-                    device_mesh=DeviceMesh.from_group(
-                        group=dist.group.WORLD,
-                        device_type="cuda",
-                        mesh=torch.arange(dist.get_world_size()),
-                        mesh_dim_names=("world",),
-                    ),
-                    placements=[Shard(0)],
-                )
-            }
-        torch.distributed.checkpoint.load(
-            load_item_dict, storage_reader=reader, planner=default_planner.DefaultLoadPlanner()
-        )
-        if isinstance(value, DTensor):
-            full_value = gather_uneven_dtensor_to_full_tensor(value)
-            loaded_tensor = load_item_dict[key].redistribute(
-                placements=[Replicate()] * len(value.placements)
-            )
-            assert torch.allclose(
-                loaded_tensor._local_tensor, full_value._local_tensor, atol=1e-8, rtol=1e-5
-            ), f"key: {key}; {loaded_tensor} {full_value}"
-        else:
-            assert torch.allclose(
-                value, load_item_dict[key]
-            ), f"key: {key}; {value} {load_item_dict[key]}"
-
-
-def get_global_unique_param_name(model_chunks, param):
-    """
-    Get the global unique parameter name for a given model and parameter.
-    """
-    param_name = None
-    for model in model_chunks:
-        for name, p in model.named_parameters():
-            if p is param:
-                param_name = name
-                break
-    if param_name is None:
-        raise ValueError("Parameter not found in model chunks")
-
-    # Get PP unique parameter name
-    if re.search(r"layers\.(\d+)", param_name) and "mtp" not in param_name:
-        tf_layer_number = -1
-        for module in model.modules():
-            if not isinstance(module, TransformerLayer):
-                continue
-            for p in module.parameters():
-                if p is param:
-                    tf_layer_number = module.layer_number
-                    break
-        if tf_layer_number != -1:
-            param_name = re.sub(r"layers\.(\d+)", f"layers.{tf_layer_number - 1}", param_name)
-
-    # Get EP unique parameter name
-    param_name = list(handle_experts_in_state_dict({param_name: None}).keys())[0]
-
-    return param_name
+            print(f"  {k}: meta shape={meta_shape}, load shape={load_shape}")
diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py
index 1d29aff0827..bdf915a8ae1 100644
--- a/megatron/training/arguments.py
+++ b/megatron/training/arguments.py
@@ -2267,10 +2267,6 @@ def _add_training_args(parser):
                        help="Use torch.optim.Optimizer instead of Megatron's optimizer in optimizer cpu offload mode.")
     group.add_argument('--overlap-cpu-optimizer-d2h-h2d', action='store_true', default=False,
                        help='Overlap CPU optimizer step, gradients D2H and updated parameters H2D.')
-    group.add_argument('--dump-param-to-param-group-map', type=str, default=None,
-                        help="Path to a file containing parameter-to-parameter-group mapping. "
-                        "Provide a JSON file that specifies which parameters belong to which "
-                        "parameter group for global coordination.")
     group.add_argument('--no-pin-cpu-grads', action='store_false', dest='pin_cpu_grads',
                        help='Disable pinning of CPU memory for gradients.')
     group.add_argument('--no-pin-cpu-params', action='store_false', dest='pin_cpu_params',
diff --git a/megatron/training/checkpointing.py b/megatron/training/checkpointing.py
index 93c23255f4c..71b9cd97021 100644
--- a/megatron/training/checkpointing.py
+++ b/megatron/training/checkpointing.py
@@ -42,10 +42,9 @@
 try:
     from megatron.core.distributed.fsdp.src.megatron_fsdp.uneven_dtensor import preprocess_state_dict_for_uneven_dtensor
     from megatron.core.transformer.fsdp_dtensor_checkpoint import (
-        print_diff_in_state_dicts,
         handle_fp8_extra_state_case,
         handle_swiglu_in_state_dict,
-        handle_experts_in_state_dict,
+        print_diff_in_state_dicts,
     )
     HAVE_MEGATRON_FSDP = True
 except ImportError:
@@ -562,9 +561,6 @@ def save_checkpoint(iteration, model, optimizer, opt_param_scheduler, num_floati
                 # TODO Handle non-empty directories (e.g., after a crash during saving).
                 ensure_directory_exists(checkpoint_name, check_parent=False)
 
-            if ckpt_format == "fsdp_dtensor":
-                state_dict = preprocess_fsdp_dtensor_state_dict(args, state_dict, model[0])
-
             fs_storage_writer = torch.distributed.checkpoint.FileSystemWriter(checkpoint_name)
             torch.distributed.checkpoint.save(
                 state_dict=state_dict,
@@ -788,17 +784,9 @@ def maybe_save_dataloader_state(train_iterator, iteration, dataloader_save_path)
     torch.save(dataloader_save_dict, data_state_save_path)
 
 
-def generate_state_dict(
-    args,
-    model,
-    optimizer,
-    opt_param_scheduler,
-    rng_state,
-    iteration=None,
-    optim_sd_kwargs=None,
-    model_sd_kwargs=None,
-    rerun_state=None,
-):
+def generate_state_dict(args, model, optimizer, opt_param_scheduler,
+                        rng_state, iteration=None,
+                        optim_sd_kwargs=None, model_sd_kwargs=None, rerun_state=None):
     """Generate a state dict from given model, optimizer, scheduler, rng state and others. """
 
     # Arguments, iteration, and model.
@@ -851,27 +839,16 @@ def generate_state_dict(
     if not args.no_save_rng and rng_state:
         state_dict["rng_state"] = rng_state
 
-    return state_dict
-
-
-def preprocess_fsdp_dtensor_state_dict(args, raw_state_dict, model):
-    state_dict = raw_state_dict.copy()
-    handle_fp8_extra_state_case(state_dict["model"])
-    if args.swiglu:
-        if "optimizer" in state_dict:
-            model_state_dict, optimizer_state_dict = handle_swiglu_in_state_dict(
-                model, state_dict["model"], state_dict["optimizer"]
-            )
-            state_dict["model"] = model_state_dict
-            state_dict["optimizer"] = optimizer_state_dict
-        else:
-            model_state_dict, _ = handle_swiglu_in_state_dict(
-                model, state_dict["model"], None
-            )
-            state_dict["model"] = model_state_dict
-    if args.num_experts:
-        state_dict["model"] = handle_experts_in_state_dict(state_dict["model"])
-    preprocess_state_dict_for_uneven_dtensor(state_dict)
+    # fsdp_dtensor ckpt specific state dict preprocessing
+    if args.ckpt_format == "fsdp_dtensor":
+        assert HAVE_MEGATRON_FSDP, "Megatron FSDP is enabled but Megatron-FSDP is not available."
+        assert len(model) == 1, "FSDP DTensor checkpoints are not supported for multiple models."
+        if args.swiglu:
+            state_dict = state_dict.copy()
+            handle_swiglu_in_state_dict(
+                model[0], state_dict["model"], state_dict["optimizer"])
+        handle_fp8_extra_state_case(state_dict["model"])
+        preprocess_state_dict_for_uneven_dtensor(state_dict)
 
     return state_dict
 
@@ -1192,12 +1169,6 @@ def _load_base_checkpoint(
         if rank0:
             return {}, checkpoint_name, release, CheckpointType.FSDP_DTENSOR
 
-        state_dict = sharded_state_dict
-        raw_optimizer_state_dict = state_dict["optimizer"].copy() if "optimizer" in state_dict else None
-        raw_model_state_dict = state_dict["model"].copy() if "model" in state_dict else None
-        model = state_dict.pop("_model")
-        state_dict = preprocess_fsdp_dtensor_state_dict(args, state_dict, model[0])
-
         ckpt_type = CheckpointType.FSDP_DTENSOR
         fs_storage_reader = torch.distributed.checkpoint.FileSystemReader(checkpoint_name)
         allow_partial_load = not getattr(args, 'strict_fsdp_dtensor_load', False)
@@ -1206,20 +1177,15 @@ def _load_base_checkpoint(
             rank = torch.distributed.get_rank()
             import time as _time
             _time.sleep(rank * 0.001)  # Make that logs of different ranks do not overlap
-            print_diff_in_state_dicts(state_dict_metadata, state_dict)
+            print_diff_in_state_dicts(state_dict_metadata, sharded_state_dict)
 
         planner = default_planner.DefaultLoadPlanner(allow_partial_load=allow_partial_load)
         torch.distributed.checkpoint.load_state_dict(
-            state_dict=state_dict,
+            state_dict=sharded_state_dict,
             storage_reader=fs_storage_reader,
             planner=planner,
         )
-
-        if raw_optimizer_state_dict is not None:
-            state_dict["optimizer"] = raw_optimizer_state_dict
-
-        if raw_model_state_dict is not None:
-            state_dict["model"] = raw_model_state_dict
+        state_dict = sharded_state_dict
     else:
         raise NotImplementedError(f"checkpoint format {ckpt_format} not supported")
 
@@ -1554,7 +1520,7 @@ def load_checkpoint(ddp_model, optimizer, opt_param_scheduler, load_arg='load',
         except FileNotFoundError:
             state_dict_metadata = {}
 
-        gen_sd_rerun_state = {}
+        gen_sd_rerun_state = None
         gen_sd_opt_param_scheduler = None
         gen_sd_rng_state = None
         gen_sd_optim = None
@@ -1571,7 +1537,7 @@ def load_checkpoint(ddp_model, optimizer, opt_param_scheduler, load_arg='load',
 
         optim_sd_kwargs = dict(metadata=_build_sharded_state_dict_metadata(args), is_loading=True)
 
-        state_dict = generate_state_dict(
+        load_kwargs["sharded_state_dict"] = generate_state_dict(
             args,
             model=model,
             optimizer=gen_sd_optim,
@@ -1581,8 +1547,6 @@ def load_checkpoint(ddp_model, optimizer, opt_param_scheduler, load_arg='load',
             rerun_state=gen_sd_rerun_state,
             iteration=1,
         )
-        state_dict["_model"] = model
-        load_kwargs["sharded_state_dict"] = state_dict
 
     state_dict, checkpoint_name, release, ckpt_type = _load_base_checkpoint(
         load_dir, args, rank0=False, checkpointing_context=checkpointing_context,
diff --git a/megatron/training/training.py b/megatron/training/training.py
index bda9e42dc82..f805dab0f15 100644
--- a/megatron/training/training.py
+++ b/megatron/training/training.py
@@ -1210,7 +1210,6 @@ def setup_model_and_optimizer(
             # If the user is asking for a non-zero embedding init std, skip weight decay for embeddings
             #  to avoid embeddings from shrinking to zero as recommended in https://arxiv.org/abs/2312.16903
             default_skip_embedding_weight_decay=args.embedding_init_method_std is not None,
-            dump_param_to_param_group_map=args.dump_param_to_param_group_map,
         )
     else:
         optimizer = get_megatron_muon_optimizer(
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgxh100_coreweave.json
index 717ae3f5fa6..0f2637a9511 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgxh100_coreweave.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgxh100_coreweave.json
@@ -4,56 +4,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 11.04722,
-            "2": 11.03572,
-            "3": 9.58802,
-            "4": 9.25807,
-            "5": 9.46595,
-            "6": 9.99646,
-            "7": 9.50952,
-            "8": 8.97596,
-            "9": 8.64768,
-            "10": 9.40103,
-            "11": 8.86556,
-            "12": 8.63563,
-            "13": 8.52125,
-            "14": 8.08824,
-            "15": 8.1958,
-            "16": 8.22112,
-            "17": 8.14098,
-            "18": 7.8386,
-            "19": 8.23438,
-            "20": 7.95361,
-            "21": 7.62549,
-            "22": 7.60352,
-            "23": 7.47957,
-            "24": 7.46573,
-            "25": 7.70343,
-            "26": 7.10719,
-            "27": 7.64313,
-            "28": 7.34582,
-            "29": 7.5169,
-            "30": 7.67511,
-            "31": 7.41799,
-            "32": 7.61213,
-            "33": 7.66582,
-            "34": 7.73101,
-            "35": 7.23081,
-            "36": 7.10765,
-            "37": 7.4476,
-            "38": 7.21053,
-            "39": 7.57508,
-            "40": 7.5662,
-            "41": 7.51605,
-            "42": 7.27243,
-            "43": 7.25706,
-            "44": 7.44,
-            "45": 7.21244,
-            "46": 6.92421,
-            "47": 7.32604,
-            "48": 7.17147,
-            "49": 7.62154,
-            "50": 7.0624
+            "1": 11.04748,
+            "2": 11.03561,
+            "3": 9.58774,
+            "4": 9.25819,
+            "5": 9.53583,
+            "6": 9.8804,
+            "7": 9.48247,
+            "8": 8.93575,
+            "9": 8.65813,
+            "10": 9.0567,
+            "11": 8.49445,
+            "12": 8.52444,
+            "13": 8.45239,
+            "14": 7.97323,
+            "15": 8.0476,
+            "16": 8.07971,
+            "17": 8.09081,
+            "18": 7.76437,
+            "19": 8.14892,
+            "20": 7.89868,
+            "21": 7.59371,
+            "22": 7.54743,
+            "23": 7.43222,
+            "24": 7.4302,
+            "25": 7.67579,
+            "26": 7.06929,
+            "27": 7.62041,
+            "28": 7.32495,
+            "29": 7.49042,
+            "30": 7.64391,
+            "31": 7.39435,
+            "32": 7.58789,
+            "33": 7.64037,
+            "34": 7.69778,
+            "35": 7.20998,
+            "36": 7.08538,
+            "37": 7.42584,
+            "38": 7.18804,
+            "39": 7.55054,
+            "40": 7.54446,
+            "41": 7.49287,
+            "42": 7.24937,
+            "43": 7.23587,
+            "44": 7.41595,
+            "45": 7.18755,
+            "46": 6.89949,
+            "47": 7.29966,
+            "48": 7.14134,
+            "49": 7.58963,
+            "50": 7.03602
         }
     },
     "num-zeros": {
@@ -62,55 +62,55 @@
         "step_interval": 1,
         "values": {
             "1": 38802612.0,
-            "2": 38543656.0,
-            "3": 38739356.0,
-            "4": 273649600.0,
-            "5": 252887040.0,
-            "6": 255692384.0,
-            "7": 598483264.0,
-            "8": 787737984.0,
-            "9": 696133120.0,
-            "10": 505146368.0,
-            "11": 718888640.0,
-            "12": 872597184.0,
-            "13": 947495104.0,
-            "14": 1076398976.0,
-            "15": 856390592.0,
-            "16": 1048635648.0,
-            "17": 831370688.0,
-            "18": 963679552.0,
-            "19": 970018240.0,
-            "20": 935737344.0,
-            "21": 904189312.0,
-            "22": 887937280.0,
-            "23": 894777856.0,
-            "24": 703744192.0,
-            "25": 909232512.0,
-            "26": 875633216.0,
-            "27": 894981376.0,
-            "28": 919242816.0,
-            "29": 931351552.0,
-            "30": 929784768.0,
-            "31": 941621376.0,
-            "32": 885000768.0,
-            "33": 828484096.0,
-            "34": 822284800.0,
-            "35": 832032128.0,
-            "36": 787939392.0,
-            "37": 770719808.0,
-            "38": 561204672.0,
-            "39": 617201536.0,
-            "40": 695374592.0,
-            "41": 698978816.0,
-            "42": 692913728.0,
-            "43": 668003776.0,
-            "44": 673780992.0,
-            "45": 631182912.0,
-            "46": 444613312.0,
-            "47": 591957824.0,
-            "48": 617363968.0,
-            "49": 585295808.0,
-            "50": 570423872.0
+            "2": 38543592.0,
+            "3": 38739528.0,
+            "4": 279937824.0,
+            "5": 259189728.0,
+            "6": 271446400.0,
+            "7": 604773504.0,
+            "8": 768892544.0,
+            "9": 645824128.0,
+            "10": 744257088.0,
+            "11": 718888576.0,
+            "12": 746732544.0,
+            "13": 871990976.0,
+            "14": 821645632.0,
+            "15": 724250816.0,
+            "16": 932241472.0,
+            "17": 648958912.0,
+            "18": 649120000.0,
+            "19": 925992960.0,
+            "20": 989207936.0,
+            "21": 819324096.0,
+            "22": 736955072.0,
+            "23": 910497792.0,
+            "24": 876716672.0,
+            "25": 843170688.0,
+            "26": 809573824.0,
+            "27": 854086912.0,
+            "28": 802857664.0,
+            "29": 805523328.0,
+            "30": 775645184.0,
+            "31": 771754624.0,
+            "32": 749733696.0,
+            "33": 718385216.0,
+            "34": 724771200.0,
+            "35": 737655104.0,
+            "36": 690419968.0,
+            "37": 673203456.0,
+            "38": 627239552.0,
+            "39": 614047168.0,
+            "40": 607288512.0,
+            "41": 582590592.0,
+            "42": 548211200.0,
+            "43": 532740640.0,
+            "44": 554239168.0,
+            "45": 514790528.0,
+            "46": 350258560.0,
+            "47": 472420128.0,
+            "48": 453788736.0,
+            "49": 440597216.0,
+            "50": 303063296.0
         }
     },
     "mem-allocated-bytes": {
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 6637272576.0,
-            "2": 6637274624.0,
-            "3": 6637274624.0,
-            "4": 6637274624.0,
-            "5": 6637274624.0,
-            "6": 6637274624.0,
-            "7": 6637274624.0,
-            "8": 6637274624.0,
-            "9": 6637274624.0,
-            "10": 6637274624.0,
-            "11": 6637274624.0,
-            "12": 6637274624.0,
-            "13": 6637274624.0,
-            "14": 6637274624.0,
-            "15": 6637274624.0,
-            "16": 6637274624.0,
-            "17": 6637274624.0,
-            "18": 6637274624.0,
-            "19": 6637274624.0,
-            "20": 6637274624.0,
-            "21": 6637274624.0,
-            "22": 6637274624.0,
-            "23": 6637274624.0,
-            "24": 6637274624.0,
-            "25": 6637274624.0,
-            "26": 6637274624.0,
-            "27": 6637274624.0,
-            "28": 6637274624.0,
-            "29": 6637274624.0,
-            "30": 6637274624.0,
-            "31": 6637274624.0,
-            "32": 6637274624.0,
-            "33": 6637274624.0,
-            "34": 6637274624.0,
-            "35": 6637274624.0,
-            "36": 6637274624.0,
-            "37": 6637274624.0,
-            "38": 6637274624.0,
-            "39": 6637274624.0,
-            "40": 6637274624.0,
-            "41": 6637274624.0,
-            "42": 6637274624.0,
-            "43": 6637274624.0,
-            "44": 6637274624.0,
-            "45": 6637274624.0,
-            "46": 6637274624.0,
-            "47": 6637274624.0,
-            "48": 6637274624.0,
-            "49": 6637274624.0,
-            "50": 6637274624.0
+            "1": 6637267456.0,
+            "2": 6637269504.0,
+            "3": 6637269504.0,
+            "4": 6637269504.0,
+            "5": 6637269504.0,
+            "6": 6637269504.0,
+            "7": 6637269504.0,
+            "8": 6637269504.0,
+            "9": 6637269504.0,
+            "10": 6637269504.0,
+            "11": 6637269504.0,
+            "12": 6637269504.0,
+            "13": 6637269504.0,
+            "14": 6637269504.0,
+            "15": 6637269504.0,
+            "16": 6637269504.0,
+            "17": 6637269504.0,
+            "18": 6637269504.0,
+            "19": 6637269504.0,
+            "20": 6637269504.0,
+            "21": 6637269504.0,
+            "22": 6637269504.0,
+            "23": 6637269504.0,
+            "24": 6637269504.0,
+            "25": 6637269504.0,
+            "26": 6637269504.0,
+            "27": 6637269504.0,
+            "28": 6637269504.0,
+            "29": 6637269504.0,
+            "30": 6637269504.0,
+            "31": 6637269504.0,
+            "32": 6637269504.0,
+            "33": 6637269504.0,
+            "34": 6637269504.0,
+            "35": 6637269504.0,
+            "36": 6637269504.0,
+            "37": 6637269504.0,
+            "38": 6637269504.0,
+            "39": 6637269504.0,
+            "40": 6637269504.0,
+            "41": 6637269504.0,
+            "42": 6637269504.0,
+            "43": 6637269504.0,
+            "44": 6637269504.0,
+            "45": 6637269504.0,
+            "46": 6637269504.0,
+            "47": 6637269504.0,
+            "48": 6637269504.0,
+            "49": 6637269504.0,
+            "50": 6637269504.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 55056003072.0,
-            "2": 57810763776.0,
-            "3": 57920647168.0,
-            "4": 57920647168.0,
-            "5": 57920647168.0,
-            "6": 57920647168.0,
-            "7": 57920647168.0,
-            "8": 57920647168.0,
-            "9": 57920647168.0,
-            "10": 57920647168.0,
-            "11": 57920647168.0,
-            "12": 57920647168.0,
-            "13": 57920647168.0,
-            "14": 57920647168.0,
-            "15": 57920647168.0,
-            "16": 57920647168.0,
-            "17": 57920647168.0,
-            "18": 57920647168.0,
-            "19": 57920647168.0,
-            "20": 57920647168.0,
-            "21": 57920647168.0,
-            "22": 57920647168.0,
-            "23": 57920647168.0,
-            "24": 57920647168.0,
-            "25": 57920647168.0,
-            "26": 57920647168.0,
-            "27": 57920647168.0,
-            "28": 57920647168.0,
-            "29": 57920647168.0,
-            "30": 57920647168.0,
-            "31": 57920647168.0,
-            "32": 57920647168.0,
-            "33": 57920647168.0,
-            "34": 57961472000.0,
-            "35": 57961472000.0,
-            "36": 57961472000.0,
-            "37": 57961472000.0,
-            "38": 57961472000.0,
-            "39": 57961472000.0,
-            "40": 57961472000.0,
-            "41": 57961472000.0,
-            "42": 57961472000.0,
-            "43": 57961472000.0,
-            "44": 57961472000.0,
-            "45": 57961472000.0,
-            "46": 57961472000.0,
-            "47": 57961472000.0,
-            "48": 57961472000.0,
-            "49": 57961472000.0,
-            "50": 57961472000.0
+            "1": 55055331328.0,
+            "2": 57809321984.0,
+            "3": 57918455808.0,
+            "4": 57918455808.0,
+            "5": 57918455808.0,
+            "6": 57918455808.0,
+            "7": 57918455808.0,
+            "8": 57918455808.0,
+            "9": 57918455808.0,
+            "10": 57918455808.0,
+            "11": 57918455808.0,
+            "12": 57918455808.0,
+            "13": 57931390976.0,
+            "14": 57931390976.0,
+            "15": 57931390976.0,
+            "16": 57931390976.0,
+            "17": 57931390976.0,
+            "18": 57931390976.0,
+            "19": 57931390976.0,
+            "20": 57931390976.0,
+            "21": 57931390976.0,
+            "22": 57931390976.0,
+            "23": 57931390976.0,
+            "24": 57931390976.0,
+            "25": 57931390976.0,
+            "26": 57931390976.0,
+            "27": 57931390976.0,
+            "28": 57931390976.0,
+            "29": 57931390976.0,
+            "30": 57931390976.0,
+            "31": 57931390976.0,
+            "32": 58003226624.0,
+            "33": 58003226624.0,
+            "34": 58003226624.0,
+            "35": 58003226624.0,
+            "36": 58003226624.0,
+            "37": 58003226624.0,
+            "38": 58003226624.0,
+            "39": 58003226624.0,
+            "40": 58003226624.0,
+            "41": 58003226624.0,
+            "42": 58003226624.0,
+            "43": 58003226624.0,
+            "44": 58183614464.0,
+            "45": 58234208256.0,
+            "46": 58555555840.0,
+            "47": 58555555840.0,
+            "48": 58555555840.0,
+            "49": 58555555840.0,
+            "50": 58780934144.0
         }
     },
     "mtp_1 loss": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 11.07648,
-            "2": 11.07404,
-            "3": 10.53854,
-            "4": 10.09813,
-            "5": 9.81166,
-            "6": 10.09741,
-            "7": 9.79481,
-            "8": 9.0642,
-            "9": 8.86016,
-            "10": 9.34039,
-            "11": 8.51318,
-            "12": 8.59467,
-            "13": 8.5292,
-            "14": 7.95757,
-            "15": 8.06962,
-            "16": 8.11802,
-            "17": 8.06993,
-            "18": 7.80587,
-            "19": 8.19192,
-            "20": 7.8906,
-            "21": 7.57063,
-            "22": 7.55091,
-            "23": 7.41606,
-            "24": 7.42454,
-            "25": 7.65274,
-            "26": 7.05583,
-            "27": 7.59747,
-            "28": 7.29984,
-            "29": 7.472,
-            "30": 7.61908,
-            "31": 7.35179,
-            "32": 7.52979,
-            "33": 7.59161,
-            "34": 7.66287,
-            "35": 7.17383,
-            "36": 7.04133,
-            "37": 7.37081,
-            "38": 7.1443,
-            "39": 7.50879,
-            "40": 7.48921,
-            "41": 7.43802,
-            "42": 7.19405,
-            "43": 7.17581,
-            "44": 7.35785,
-            "45": 7.13985,
-            "46": 6.84014,
-            "47": 7.25094,
-            "48": 7.09407,
-            "49": 7.52321,
-            "50": 6.98987
+            "1": 11.07654,
+            "2": 11.07406,
+            "3": 10.53881,
+            "4": 10.09803,
+            "5": 9.81154,
+            "6": 10.06236,
+            "7": 9.79762,
+            "8": 9.07117,
+            "9": 8.87049,
+            "10": 9.127,
+            "11": 8.49853,
+            "12": 8.53046,
+            "13": 8.42444,
+            "14": 7.847,
+            "15": 7.99077,
+            "16": 8.05015,
+            "17": 8.00064,
+            "18": 7.73104,
+            "19": 8.11087,
+            "20": 7.82933,
+            "21": 7.52501,
+            "22": 7.49916,
+            "23": 7.36982,
+            "24": 7.37235,
+            "25": 7.61578,
+            "26": 7.02029,
+            "27": 7.56014,
+            "28": 7.2681,
+            "29": 7.44399,
+            "30": 7.58618,
+            "31": 7.32468,
+            "32": 7.50596,
+            "33": 7.5715,
+            "34": 7.63581,
+            "35": 7.15224,
+            "36": 7.01784,
+            "37": 7.35163,
+            "38": 7.12551,
+            "39": 7.48656,
+            "40": 7.47408,
+            "41": 7.42096,
+            "42": 7.17595,
+            "43": 7.16059,
+            "44": 7.34289,
+            "45": 7.11969,
+            "46": 6.82753,
+            "47": 7.23525,
+            "48": 7.08042,
+            "49": 7.51043,
+            "50": 6.9735
         }
     },
     "iteration-time": {
@@ -289,56 +289,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 93.39829,
-            "2": 1.82958,
-            "3": 1.3241,
-            "4": 2.19661,
-            "5": 2.13156,
-            "6": 1.75452,
-            "7": 2.08539,
-            "8": 1.58016,
-            "9": 1.60816,
-            "10": 1.03407,
-            "11": 1.01797,
-            "12": 1.0168,
-            "13": 1.01666,
-            "14": 1.0748,
-            "15": 1.04137,
-            "16": 1.05864,
-            "17": 1.05961,
-            "18": 1.03233,
-            "19": 1.02728,
-            "20": 1.02917,
-            "21": 1.04313,
-            "22": 1.03054,
-            "23": 1.0313,
-            "24": 1.03789,
-            "25": 1.04414,
-            "26": 1.05561,
-            "27": 1.03361,
-            "28": 1.03142,
-            "29": 1.02437,
-            "30": 1.02195,
-            "31": 1.0172,
-            "32": 1.03318,
-            "33": 1.03742,
-            "34": 1.03628,
-            "35": 1.03575,
-            "36": 1.05127,
-            "37": 1.03273,
-            "38": 1.03381,
-            "39": 1.02923,
-            "40": 1.02986,
-            "41": 1.03249,
-            "42": 1.033,
-            "43": 1.03169,
-            "44": 1.03818,
-            "45": 1.02736,
-            "46": 1.02698,
-            "47": 1.03158,
-            "48": 1.02471,
-            "49": 1.03674,
-            "50": 1.0291
+            "1": 69.29797,
+            "2": 1.7261,
+            "3": 1.40981,
+            "4": 2.16562,
+            "5": 1.7862,
+            "6": 1.7469,
+            "7": 1.96688,
+            "8": 1.97301,
+            "9": 1.74665,
+            "10": 1.69613,
+            "11": 1.02979,
+            "12": 1.02408,
+            "13": 1.03261,
+            "14": 1.02432,
+            "15": 1.0529,
+            "16": 1.04491,
+            "17": 1.03693,
+            "18": 1.03399,
+            "19": 1.03627,
+            "20": 1.02284,
+            "21": 1.01667,
+            "22": 1.02932,
+            "23": 1.03591,
+            "24": 1.03466,
+            "25": 1.03149,
+            "26": 1.03165,
+            "27": 1.02342,
+            "28": 1.03777,
+            "29": 1.04061,
+            "30": 1.05641,
+            "31": 1.02382,
+            "32": 1.01775,
+            "33": 1.03039,
+            "34": 1.03693,
+            "35": 1.03153,
+            "36": 1.02699,
+            "37": 1.02756,
+            "38": 1.02919,
+            "39": 1.01773,
+            "40": 1.03491,
+            "41": 1.03152,
+            "42": 1.03035,
+            "43": 1.0221,
+            "44": 1.05201,
+            "45": 1.02579,
+            "46": 1.02798,
+            "47": 1.03857,
+            "48": 1.02772,
+            "49": 1.0408,
+            "50": 1.03745
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgxh100_coreweave.json
index 8cea616921e..58eb3fc16cd 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgxh100_coreweave.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgxh100_coreweave.json
@@ -4,56 +4,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 10.94971,
-            "2": 10.95163,
-            "3": 10.51641,
-            "4": 9.9652,
-            "5": 9.94116,
-            "6": 9.67394,
-            "7": 10.19887,
-            "8": 9.50035,
-            "9": 9.54982,
-            "10": 9.79667,
-            "11": 9.30128,
-            "12": 9.40566,
-            "13": 9.39438,
-            "14": 8.84572,
-            "15": 9.02231,
-            "16": 9.06973,
-            "17": 9.04712,
-            "18": 8.75662,
-            "19": 9.18074,
-            "20": 8.86175,
-            "21": 8.53558,
-            "22": 8.55288,
-            "23": 8.42513,
-            "24": 8.37683,
-            "25": 8.64426,
-            "26": 7.9756,
-            "27": 8.57026,
-            "28": 8.1987,
-            "29": 8.39406,
-            "30": 8.67631,
-            "31": 8.29096,
-            "32": 8.43692,
-            "33": 8.55897,
-            "34": 8.66123,
-            "35": 8.08,
-            "36": 7.95214,
-            "37": 8.2979,
-            "38": 7.98177,
-            "39": 8.39281,
-            "40": 8.35852,
-            "41": 8.32006,
-            "42": 8.05954,
-            "43": 8.03381,
-            "44": 8.24236,
-            "45": 8.1025,
-            "46": 7.61814,
-            "47": 8.15364,
-            "48": 8.00693,
-            "49": 8.38704,
-            "50": 7.81592
+            "1": 10.95004,
+            "2": 10.9521,
+            "3": 10.5115,
+            "4": 9.96454,
+            "5": 9.93941,
+            "6": 9.67273,
+            "7": 10.20975,
+            "8": 9.49716,
+            "9": 9.55902,
+            "10": 9.79742,
+            "11": 9.30109,
+            "12": 9.40483,
+            "13": 9.39546,
+            "14": 8.84681,
+            "15": 9.02444,
+            "16": 9.07121,
+            "17": 9.04574,
+            "18": 8.75678,
+            "19": 9.18159,
+            "20": 8.8595,
+            "21": 8.53503,
+            "22": 8.55182,
+            "23": 8.42441,
+            "24": 8.37608,
+            "25": 8.64304,
+            "26": 7.97393,
+            "27": 8.56806,
+            "28": 8.19764,
+            "29": 8.3928,
+            "30": 8.67283,
+            "31": 8.289,
+            "32": 8.43572,
+            "33": 8.5568,
+            "34": 8.66018,
+            "35": 8.07934,
+            "36": 7.94976,
+            "37": 8.29565,
+            "38": 7.98044,
+            "39": 8.39201,
+            "40": 8.35513,
+            "41": 8.31876,
+            "42": 8.0583,
+            "43": 8.03283,
+            "44": 8.24243,
+            "45": 8.10277,
+            "46": 7.61696,
+            "47": 8.15273,
+            "48": 8.00569,
+            "49": 8.38688,
+            "50": 7.81491
         }
     },
     "num-zeros": {
@@ -61,56 +61,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 19403704.0,
-            "2": 19274216.0,
-            "3": 22517470.0,
-            "4": 83429816.0,
-            "5": 139167728.0,
-            "6": 138921280.0,
-            "7": 173470304.0,
-            "8": 200511856.0,
-            "9": 165696320.0,
-            "10": 166120112.0,
-            "11": 213254416.0,
-            "12": 187847360.0,
-            "13": 231586656.0,
-            "14": 226879072.0,
-            "15": 219025920.0,
-            "16": 205179664.0,
-            "17": 280450432.0,
-            "18": 181477792.0,
-            "19": 191026096.0,
-            "20": 186395632.0,
-            "21": 233632576.0,
-            "22": 231696832.0,
-            "23": 216390688.0,
-            "24": 215133760.0,
-            "25": 233079504.0,
-            "26": 244437920.0,
-            "27": 222637584.0,
-            "28": 278773952.0,
-            "29": 253409264.0,
-            "30": 240036736.0,
-            "31": 236599008.0,
-            "32": 205066624.0,
-            "33": 263303312.0,
-            "34": 200444544.0,
-            "35": 199033824.0,
-            "36": 243001216.0,
-            "37": 151181872.0,
-            "38": 175301280.0,
-            "39": 219001024.0,
-            "40": 220307936.0,
-            "41": 217385856.0,
-            "42": 230074176.0,
-            "43": 208226784.0,
-            "44": 148172720.0,
-            "45": 141103744.0,
-            "46": 132664976.0,
-            "47": 179619392.0,
-            "48": 118381144.0,
-            "49": 86643984.0,
-            "50": 113798320.0
+            "1": 19403624.0,
+            "2": 19274194.0,
+            "3": 19372760.0,
+            "4": 86525248.0,
+            "5": 148575568.0,
+            "6": 145226704.0,
+            "7": 171879984.0,
+            "8": 195785248.0,
+            "9": 164124752.0,
+            "10": 167684736.0,
+            "11": 221077344.0,
+            "12": 200384224.0,
+            "13": 248872528.0,
+            "14": 211169424.0,
+            "15": 214304608.0,
+            "16": 216075632.0,
+            "17": 267845984.0,
+            "18": 170470336.0,
+            "19": 176865072.0,
+            "20": 187955392.0,
+            "21": 225750704.0,
+            "22": 247396816.0,
+            "23": 211643856.0,
+            "24": 205638464.0,
+            "25": 277022272.0,
+            "26": 291562304.0,
+            "27": 225789840.0,
+            "28": 288202368.0,
+            "29": 198390384.0,
+            "30": 213302208.0,
+            "31": 227204752.0,
+            "32": 271112416.0,
+            "33": 231840432.0,
+            "34": 203575536.0,
+            "35": 191152368.0,
+            "36": 222566928.0,
+            "37": 177810112.0,
+            "38": 228708544.0,
+            "39": 211168784.0,
+            "40": 215603968.0,
+            "41": 200089440.0,
+            "42": 228529888.0,
+            "43": 198782848.0,
+            "44": 141902272.0,
+            "45": 181922816.0,
+            "46": 115369856.0,
+            "47": 170214176.0,
+            "48": 137292832.0,
+            "49": 97654936.0,
+            "50": 160979632.0
         }
     },
     "mem-allocated-bytes": {
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 4883287040.0,
-            "2": 4883441152.0,
-            "3": 4881697280.0,
-            "4": 4883730944.0,
-            "5": 4882556416.0,
-            "6": 4882616832.0,
-            "7": 4883438080.0,
-            "8": 4881568256.0,
-            "9": 4883173888.0,
-            "10": 4882272768.0,
-            "11": 4883676672.0,
-            "12": 4881393152.0,
-            "13": 4883141120.0,
-            "14": 4883697152.0,
-            "15": 4882622976.0,
-            "16": 4881830400.0,
-            "17": 4881658368.0,
-            "18": 4881863168.0,
-            "19": 4883804672.0,
-            "20": 4881795584.0,
-            "21": 4883333632.0,
-            "22": 4882194944.0,
-            "23": 4882084352.0,
-            "24": 4884065792.0,
-            "25": 4881804800.0,
-            "26": 4883596800.0,
-            "27": 4883047936.0,
-            "28": 4882476544.0,
-            "29": 4883087872.0,
-            "30": 4882151936.0,
-            "31": 4882625024.0,
-            "32": 4883104256.0,
-            "33": 4882526720.0,
-            "34": 4882292224.0,
-            "35": 4882485760.0,
-            "36": 4882867712.0,
-            "37": 4882634240.0,
-            "38": 4882610688.0,
-            "39": 4881474048.0,
-            "40": 4881961472.0,
-            "41": 4882663936.0,
-            "42": 4881860096.0,
-            "43": 4881499648.0,
-            "44": 4883392000.0,
-            "45": 4882392576.0,
-            "46": 4882815488.0,
-            "47": 4883113472.0,
-            "48": 4882158080.0,
-            "49": 4881207808.0,
-            "50": 4881588736.0
+            "1": 4883602432.0,
+            "2": 4885017088.0,
+            "3": 4882657792.0,
+            "4": 4883046912.0,
+            "5": 4883725824.0,
+            "6": 4883713536.0,
+            "7": 4883040768.0,
+            "8": 4883273216.0,
+            "9": 4882952704.0,
+            "10": 4885949952.0,
+            "11": 4883990016.0,
+            "12": 4887679488.0,
+            "13": 4884011520.0,
+            "14": 4882899456.0,
+            "15": 4883515904.0,
+            "16": 4883990016.0,
+            "17": 4883410432.0,
+            "18": 4883673600.0,
+            "19": 4882903552.0,
+            "20": 4884541952.0,
+            "21": 4883138048.0,
+            "22": 4883247616.0,
+            "23": 4883839488.0,
+            "24": 4885058048.0,
+            "25": 4882676224.0,
+            "26": 4884058624.0,
+            "27": 4884724224.0,
+            "28": 4884874752.0,
+            "29": 4883127808.0,
+            "30": 4883252736.0,
+            "31": 4882955776.0,
+            "32": 4885190144.0,
+            "33": 4883845632.0,
+            "34": 4884392448.0,
+            "35": 4883083776.0,
+            "36": 4883851776.0,
+            "37": 4885246464.0,
+            "38": 4882680320.0,
+            "39": 4884296192.0,
+            "40": 4884689408.0,
+            "41": 4882836992.0,
+            "42": 4883972608.0,
+            "43": 4884519424.0,
+            "44": 4883354112.0,
+            "45": 4883495424.0,
+            "46": 4882788864.0,
+            "47": 4883144192.0,
+            "48": 4883688960.0,
+            "49": 4884182528.0,
+            "50": 4885279232.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 41208348672.0,
-            "2": 41208348672.0,
-            "3": 41208348672.0,
-            "4": 41208348672.0,
-            "5": 41208348672.0,
-            "6": 41208348672.0,
-            "7": 41208348672.0,
-            "8": 41208348672.0,
-            "9": 41208348672.0,
-            "10": 41208348672.0,
-            "11": 41208348672.0,
-            "12": 41208348672.0,
-            "13": 41208348672.0,
-            "14": 41208348672.0,
-            "15": 41208348672.0,
-            "16": 41208348672.0,
-            "17": 41208348672.0,
-            "18": 41208348672.0,
-            "19": 41208348672.0,
-            "20": 41208348672.0,
-            "21": 41208348672.0,
-            "22": 41208348672.0,
-            "23": 41208348672.0,
-            "24": 41208348672.0,
-            "25": 41208348672.0,
-            "26": 41208348672.0,
-            "27": 41208348672.0,
-            "28": 41208348672.0,
-            "29": 41208348672.0,
-            "30": 41208348672.0,
-            "31": 41208348672.0,
-            "32": 41208348672.0,
-            "33": 41208348672.0,
-            "34": 41208348672.0,
-            "35": 41208348672.0,
-            "36": 41208348672.0,
-            "37": 41208348672.0,
-            "38": 41208348672.0,
-            "39": 41208348672.0,
-            "40": 41208348672.0,
-            "41": 41208348672.0,
-            "42": 41208348672.0,
-            "43": 41208348672.0,
-            "44": 41208348672.0,
-            "45": 41208348672.0,
-            "46": 41208348672.0,
-            "47": 41208348672.0,
-            "48": 41208348672.0,
-            "49": 41208348672.0,
-            "50": 41208348672.0
+            "1": 41210470400.0,
+            "2": 41210470400.0,
+            "3": 41210470400.0,
+            "4": 41210470400.0,
+            "5": 41210470400.0,
+            "6": 41210470400.0,
+            "7": 41210470400.0,
+            "8": 41210470400.0,
+            "9": 41210470400.0,
+            "10": 41210470400.0,
+            "11": 41210470400.0,
+            "12": 41210470400.0,
+            "13": 41210470400.0,
+            "14": 41210470400.0,
+            "15": 41210470400.0,
+            "16": 41210470400.0,
+            "17": 41210470400.0,
+            "18": 41210470400.0,
+            "19": 41210470400.0,
+            "20": 41210470400.0,
+            "21": 41210470400.0,
+            "22": 41210470400.0,
+            "23": 41210470400.0,
+            "24": 41210470400.0,
+            "25": 41210470400.0,
+            "26": 41210470400.0,
+            "27": 41210470400.0,
+            "28": 41210470400.0,
+            "29": 41210470400.0,
+            "30": 41210470400.0,
+            "31": 41210470400.0,
+            "32": 41210470400.0,
+            "33": 41210470400.0,
+            "34": 41210470400.0,
+            "35": 41210470400.0,
+            "36": 41210470400.0,
+            "37": 41210470400.0,
+            "38": 41210470400.0,
+            "39": 41210470400.0,
+            "40": 41210470400.0,
+            "41": 41210470400.0,
+            "42": 41210470400.0,
+            "43": 41210470400.0,
+            "44": 41210470400.0,
+            "45": 41210470400.0,
+            "46": 41210470400.0,
+            "47": 41210470400.0,
+            "48": 41210470400.0,
+            "49": 41210470400.0,
+            "50": 41210470400.0
         }
     },
     "iteration-time": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 89.10928,
-            "2": 1.08143,
-            "3": 0.94222,
-            "4": 0.89675,
-            "5": 1.34524,
-            "6": 1.06972,
-            "7": 1.00314,
-            "8": 1.04961,
-            "9": 0.86611,
-            "10": 0.86248,
-            "11": 0.98739,
-            "12": 0.86057,
-            "13": 0.86777,
-            "14": 0.85834,
-            "15": 0.8559,
-            "16": 0.85522,
-            "17": 0.84644,
-            "18": 0.85748,
-            "19": 0.85218,
-            "20": 0.85342,
-            "21": 0.84029,
-            "22": 0.84342,
-            "23": 0.84297,
-            "24": 0.83925,
-            "25": 0.8439,
-            "26": 0.85696,
-            "27": 0.83981,
-            "28": 0.84643,
-            "29": 0.8433,
-            "30": 0.86234,
-            "31": 0.85636,
-            "32": 0.84184,
-            "33": 0.84501,
-            "34": 0.84316,
-            "35": 0.83806,
-            "36": 0.84143,
-            "37": 0.84447,
-            "38": 0.84137,
-            "39": 0.84133,
-            "40": 0.84321,
-            "41": 0.84019,
-            "42": 0.84164,
-            "43": 0.83741,
-            "44": 0.84203,
-            "45": 0.83966,
-            "46": 0.84109,
-            "47": 0.83945,
-            "48": 0.84001,
-            "49": 0.84194,
-            "50": 0.83578
+            "1": 86.8085,
+            "2": 1.10913,
+            "3": 0.99097,
+            "4": 0.89412,
+            "5": 1.25997,
+            "6": 0.98162,
+            "7": 0.98318,
+            "8": 1.13296,
+            "9": 0.88126,
+            "10": 0.8633,
+            "11": 2.2744,
+            "12": 4.5393,
+            "13": 3.22763,
+            "14": 1.64923,
+            "15": 0.86595,
+            "16": 0.86575,
+            "17": 0.85272,
+            "18": 0.85454,
+            "19": 0.85281,
+            "20": 0.87018,
+            "21": 0.84654,
+            "22": 0.8494,
+            "23": 0.84882,
+            "24": 0.84482,
+            "25": 0.85311,
+            "26": 0.84678,
+            "27": 0.84096,
+            "28": 0.8412,
+            "29": 0.84156,
+            "30": 0.84475,
+            "31": 0.84747,
+            "32": 0.85058,
+            "33": 0.84977,
+            "34": 0.8479,
+            "35": 0.85234,
+            "36": 0.85012,
+            "37": 0.85087,
+            "38": 0.84594,
+            "39": 0.84558,
+            "40": 0.84807,
+            "41": 0.84183,
+            "42": 0.8439,
+            "43": 0.84221,
+            "44": 0.84248,
+            "45": 0.84257,
+            "46": 0.83922,
+            "47": 0.84311,
+            "48": 0.84159,
+            "49": 0.84011,
+            "50": 0.8353
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgx_h100.json
index 0835e95b926..1ba051f4889 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgx_h100.json
@@ -1,142 +1 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 5,
-        "values": {
-            "1": 10.82922,
-            "5": 10.85652,
-            "10": 10.79298,
-            "15": 10.8067,
-            "20": 10.72654,
-            "25": 10.53282,
-            "30": 10.35802,
-            "35": 10.24483,
-            "40": 10.05533,
-            "45": 9.77951,
-            "50": 9.86874,
-            "55": 9.82995,
-            "60": 9.449,
-            "65": 8.89366,
-            "70": 9.71127,
-            "75": 9.39451,
-            "80": 9.38198,
-            "85": 9.58333,
-            "90": 9.79944,
-            "95": 9.50213,
-            "100": 9.37131
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 5,
-        "values": {
-            "1": 27245.0,
-            "5": 31369.0,
-            "10": 25870.0,
-            "15": 29830.0,
-            "20": 28243.0,
-            "25": 27636.0,
-            "30": 30387.0,
-            "35": 31488.0,
-            "40": 34779.0,
-            "45": 35158.0,
-            "50": 38234.0,
-            "55": 37133.0,
-            "60": 40450.0,
-            "65": 40947.0,
-            "70": 43436.0,
-            "75": 39925.0,
-            "80": 51863.0,
-            "85": 2145177.0,
-            "90": 51330.0,
-            "95": 45247.0,
-            "100": 163741.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 5,
-        "values": {
-            "1": 787511296.0,
-            "5": 787542016.0,
-            "10": 787500032.0,
-            "15": 787499008.0,
-            "20": 787500032.0,
-            "25": 787446272.0,
-            "30": 787429888.0,
-            "35": 787413504.0,
-            "40": 787409920.0,
-            "45": 787394560.0,
-            "50": 787384320.0,
-            "55": 787383808.0,
-            "60": 787389952.0,
-            "65": 787346432.0,
-            "70": 787387904.0,
-            "75": 787437568.0,
-            "80": 787405312.0,
-            "85": 787407360.0,
-            "90": 787441664.0,
-            "95": 787445248.0,
-            "100": 787433472.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 5,
-        "values": {
-            "1": 2465793024.0,
-            "5": 2492764160.0,
-            "10": 2492764160.0,
-            "15": 2492764160.0,
-            "20": 2492764160.0,
-            "25": 2492764160.0,
-            "30": 2492764160.0,
-            "35": 2492764160.0,
-            "40": 2492764160.0,
-            "45": 2492764160.0,
-            "50": 2492764160.0,
-            "55": 2492764160.0,
-            "60": 2492764160.0,
-            "65": 2492764160.0,
-            "70": 2492764160.0,
-            "75": 2492764160.0,
-            "80": 2492764160.0,
-            "85": 2492764160.0,
-            "90": 2492764160.0,
-            "95": 2492764160.0,
-            "100": 2492764160.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 5,
-        "values": {
-            "1": 9.68104,
-            "5": 0.32859,
-            "10": 0.30772,
-            "15": 0.31234,
-            "20": 0.29254,
-            "25": 0.29296,
-            "30": 0.31344,
-            "35": 0.31026,
-            "40": 0.30514,
-            "45": 0.30481,
-            "50": 0.30324,
-            "55": 0.29929,
-            "60": 0.30103,
-            "65": 0.32008,
-            "70": 0.31307,
-            "75": 0.2933,
-            "80": 0.29351,
-            "85": 0.29283,
-            "90": 0.29375,
-            "95": 0.29458,
-            "100": 0.29103
-        }
-    }
-}
\ No newline at end of file
+{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.83281, "5": 10.85975, "10": 10.79613, "15": 10.80527, "20": 10.72502, "25": 10.53599, "30": 10.3571, "35": 10.24605, "40": 10.05992, "45": 9.7836, "50": 9.8722, "55": 9.83189, "60": 9.45075, "65": 8.89679, "70": 9.71414, "75": 9.39795, "80": 9.38169, "85": 9.58585, "90": 9.7999, "95": 9.50528, "100": 9.37224}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 27013.0, "5": 31736.0, "10": 25785.0, "15": 30383.0, "20": 28435.0, "25": 27493.0, "30": 30329.0, "35": 31750.0, "40": 34279.0, "45": 34634.0, "50": 38531.0, "55": 37465.0, "60": 40172.0, "65": 40624.0, "70": 44852.0, "75": 39231.0, "80": 130535.0, "85": 123250.0, "90": 47793.0, "95": 167340.0, "100": 163328.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 814390272.0, "5": 814420480.0, "10": 814376448.0, "15": 814376960.0, "20": 814373376.0, "25": 814321152.0, "30": 814306304.0, "35": 814292992.0, "40": 814288896.0, "45": 814272000.0, "50": 814262272.0, "55": 814258688.0, "60": 814268416.0, "65": 814220800.0, "70": 814266880.0, "75": 814318080.0, "80": 814285312.0, "85": 814289408.0, "90": 814315520.0, "95": 814320128.0, "100": 814311424.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 2111314944.0, "5": 2370209280.0, "10": 2370209280.0, "15": 2370209280.0, "20": 2370209280.0, "25": 2370209280.0, "30": 2370209280.0, "35": 2370209280.0, "40": 2370209280.0, "45": 2370209280.0, "50": 2370209280.0, "55": 2370209280.0, "60": 2370209280.0, "65": 2370209280.0, "70": 2370209280.0, "75": 2370209280.0, "80": 2370209280.0, "85": 2370209280.0, "90": 2370209280.0, "95": 2370209280.0, "100": 2370209280.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 20.98318, "5": 0.79797, "10": 0.74028, "15": 0.67279, "20": 0.62948, "25": 0.61132, "30": 0.61547, "35": 0.6152, "40": 0.60421, "45": 0.59124, "50": 0.5891, "55": 0.57048, "60": 0.54799, "65": 0.52185, "70": 0.51195, "75": 0.50105, "80": 0.4628, "85": 0.45992, "90": 0.46498, "95": 0.4599, "100": 0.42568}}}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 7e299df5257..00000000000
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.82922,
-            "2": 10.84163,
-            "3": 10.84245,
-            "4": 10.82,
-            "5": 10.85652,
-            "6": 10.86906,
-            "7": 10.83778,
-            "8": 10.84312,
-            "9": 10.84423,
-            "10": 10.79298,
-            "11": 10.86697,
-            "12": 10.86875,
-            "13": 10.86207,
-            "14": 10.86919,
-            "15": 10.8067,
-            "16": 10.8057,
-            "17": 10.77686,
-            "18": 10.79541,
-            "19": 10.78384,
-            "20": 10.72654,
-            "21": 10.69491,
-            "22": 10.54462,
-            "23": 10.6993,
-            "24": 10.58151,
-            "25": 10.53282,
-            "26": 10.58817,
-            "27": 10.601,
-            "28": 10.57563,
-            "29": 10.58022,
-            "30": 10.35802,
-            "31": 10.08769,
-            "32": 10.44466,
-            "33": 10.4477,
-            "34": 10.18704,
-            "35": 10.24483,
-            "36": 10.19713,
-            "37": 10.32294,
-            "38": 10.17101,
-            "39": 10.37026,
-            "40": 10.05533,
-            "41": 10.09491,
-            "42": 10.17971,
-            "43": 9.78263,
-            "44": 9.91346,
-            "45": 9.77951,
-            "46": 9.75648,
-            "47": 10.09647,
-            "48": 9.80391,
-            "49": 9.46649,
-            "50": 9.86874,
-            "51": 9.79428,
-            "52": 9.68303,
-            "53": 10.03314,
-            "54": 9.9113,
-            "55": 9.82995,
-            "56": 9.57839,
-            "57": 9.42377,
-            "58": 9.80549,
-            "59": 9.53292,
-            "60": 9.449,
-            "61": 9.65293,
-            "62": 9.95672,
-            "63": 9.33775,
-            "64": 9.74194,
-            "65": 8.89366,
-            "66": 9.67317,
-            "67": 9.33002,
-            "68": 9.76517,
-            "69": 9.76336,
-            "70": 9.71127,
-            "71": 9.59511,
-            "72": 9.54797,
-            "73": 9.47124,
-            "74": 8.89297,
-            "75": 9.39451,
-            "76": 9.04721,
-            "77": 10.04318,
-            "78": 9.70313,
-            "79": 9.35169,
-            "80": 9.38198,
-            "81": 9.45146,
-            "82": 9.67546,
-            "83": 9.27658,
-            "84": 9.39241,
-            "85": 9.58333,
-            "86": 9.04518,
-            "87": 9.56487,
-            "88": 9.72459,
-            "89": 9.57019,
-            "90": 9.79944,
-            "91": 9.30737,
-            "92": 9.3313,
-            "93": 9.04109,
-            "94": 8.80259,
-            "95": 9.50213,
-            "96": 9.5021,
-            "97": 9.28183,
-            "98": 9.64883,
-            "99": 8.8594,
-            "100": 9.37131
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 27245.0,
-            "2": 28958.0,
-            "3": 29464.0,
-            "4": 28046.0,
-            "5": 31369.0,
-            "6": 33287.0,
-            "7": 31200.0,
-            "8": 26921.0,
-            "9": 30008.0,
-            "10": 25870.0,
-            "11": 33681.0,
-            "12": 30344.0,
-            "13": 32737.0,
-            "14": 33315.0,
-            "15": 29830.0,
-            "16": 32475.0,
-            "17": 30747.0,
-            "18": 30381.0,
-            "19": 31032.0,
-            "20": 28243.0,
-            "21": 29224.0,
-            "22": 27340.0,
-            "23": 34119.0,
-            "24": 29049.0,
-            "25": 27636.0,
-            "26": 30662.0,
-            "27": 32009.0,
-            "28": 33355.0,
-            "29": 34714.0,
-            "30": 30387.0,
-            "31": 28212.0,
-            "32": 33411.0,
-            "33": 34696.0,
-            "34": 30053.0,
-            "35": 31488.0,
-            "36": 32943.0,
-            "37": 35829.0,
-            "38": 33740.0,
-            "39": 37632.0,
-            "40": 34779.0,
-            "41": 33958.0,
-            "42": 36396.0,
-            "43": 34088.0,
-            "44": 34090.0,
-            "45": 35158.0,
-            "46": 36174.0,
-            "47": 39772.0,
-            "48": 36516.0,
-            "49": 36733.0,
-            "50": 38234.0,
-            "51": 38608.0,
-            "52": 37030.0,
-            "53": 42442.0,
-            "54": 40944.0,
-            "55": 37133.0,
-            "56": 41001.0,
-            "57": 37524.0,
-            "58": 42317.0,
-            "59": 40804.0,
-            "60": 40450.0,
-            "61": 41478.0,
-            "62": 39766.0,
-            "63": 37941.0,
-            "64": 42197.0,
-            "65": 40947.0,
-            "66": 44094.0,
-            "67": 41958.0,
-            "68": 40060.0,
-            "69": 42189.0,
-            "70": 43436.0,
-            "71": 42748.0,
-            "72": 44280.0,
-            "73": 47478.0,
-            "74": 41456.0,
-            "75": 39925.0,
-            "76": 43490.0,
-            "77": 45636.0,
-            "78": 2141470.0,
-            "79": 46055.0,
-            "80": 51863.0,
-            "81": 151341.0,
-            "82": 49835.0,
-            "83": 143360.0,
-            "84": 2141546.0,
-            "85": 2145177.0,
-            "86": 132114.0,
-            "87": 2147022.0,
-            "88": 59899.0,
-            "89": 162883.0,
-            "90": 51330.0,
-            "91": 2141901.0,
-            "92": 44946.0,
-            "93": 138194.0,
-            "94": 2145772.0,
-            "95": 45247.0,
-            "96": 135045.0,
-            "97": 53170.0,
-            "98": 168576.0,
-            "99": 2141797.0,
-            "100": 163741.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 787516416.0,
-            "2": 787540992.0,
-            "3": 787524096.0,
-            "4": 787512320.0,
-            "5": 787547136.0,
-            "6": 787537920.0,
-            "7": 787512832.0,
-            "8": 787524608.0,
-            "9": 787528192.0,
-            "10": 787505152.0,
-            "11": 787522048.0,
-            "12": 787520000.0,
-            "13": 787529728.0,
-            "14": 787529216.0,
-            "15": 787504128.0,
-            "16": 787513344.0,
-            "17": 787503104.0,
-            "18": 787489280.0,
-            "19": 787514880.0,
-            "20": 787505152.0,
-            "21": 787479552.0,
-            "22": 787486208.0,
-            "23": 787478528.0,
-            "24": 787486208.0,
-            "25": 787451392.0,
-            "26": 787482112.0,
-            "27": 787470848.0,
-            "28": 787450368.0,
-            "29": 787458048.0,
-            "30": 787435008.0,
-            "31": 787406848.0,
-            "32": 787424256.0,
-            "33": 787435520.0,
-            "34": 787426304.0,
-            "35": 787418624.0,
-            "36": 787436544.0,
-            "37": 787428352.0,
-            "38": 787436544.0,
-            "39": 787417600.0,
-            "40": 787415040.0,
-            "41": 787405824.0,
-            "42": 787415040.0,
-            "43": 787367936.0,
-            "44": 787392512.0,
-            "45": 787399680.0,
-            "46": 787355136.0,
-            "47": 787411456.0,
-            "48": 787354112.0,
-            "49": 787374080.0,
-            "50": 787389440.0,
-            "51": 787375616.0,
-            "52": 787383808.0,
-            "53": 787379712.0,
-            "54": 787384832.0,
-            "55": 787388928.0,
-            "56": 787388928.0,
-            "57": 787351040.0,
-            "58": 787382784.0,
-            "59": 787374080.0,
-            "60": 787395072.0,
-            "61": 787405312.0,
-            "62": 787405824.0,
-            "63": 787373056.0,
-            "64": 787388928.0,
-            "65": 787351552.0,
-            "66": 787386880.0,
-            "67": 787392000.0,
-            "68": 787399168.0,
-            "69": 787383296.0,
-            "70": 787393024.0,
-            "71": 787406848.0,
-            "72": 787400704.0,
-            "73": 787401216.0,
-            "74": 787403264.0,
-            "75": 787442688.0,
-            "76": 787444736.0,
-            "77": 787445760.0,
-            "78": 787395072.0,
-            "79": 787430400.0,
-            "80": 787410432.0,
-            "81": 787412992.0,
-            "82": 787427840.0,
-            "83": 787428864.0,
-            "84": 787412480.0,
-            "85": 787412480.0,
-            "86": 787394560.0,
-            "87": 787452928.0,
-            "88": 787414528.0,
-            "89": 787404800.0,
-            "90": 787446784.0,
-            "91": 787446272.0,
-            "92": 787446784.0,
-            "93": 787430400.0,
-            "94": 787440128.0,
-            "95": 787450368.0,
-            "96": 787454976.0,
-            "97": 787427328.0,
-            "98": 787475968.0,
-            "99": 787419136.0,
-            "100": 787438592.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 2479493120.0,
-            "2": 2485449728.0,
-            "3": 2487249408.0,
-            "4": 2487249408.0,
-            "5": 2495991808.0,
-            "6": 2495991808.0,
-            "7": 2495991808.0,
-            "8": 2495991808.0,
-            "9": 2495991808.0,
-            "10": 2495991808.0,
-            "11": 2495991808.0,
-            "12": 2495991808.0,
-            "13": 2495991808.0,
-            "14": 2495991808.0,
-            "15": 2495991808.0,
-            "16": 2495991808.0,
-            "17": 2495991808.0,
-            "18": 2495991808.0,
-            "19": 2495991808.0,
-            "20": 2495991808.0,
-            "21": 2495991808.0,
-            "22": 2495991808.0,
-            "23": 2495991808.0,
-            "24": 2495991808.0,
-            "25": 2495991808.0,
-            "26": 2495991808.0,
-            "27": 2495991808.0,
-            "28": 2495991808.0,
-            "29": 2495991808.0,
-            "30": 2495991808.0,
-            "31": 2495991808.0,
-            "32": 2495991808.0,
-            "33": 2495991808.0,
-            "34": 2495991808.0,
-            "35": 2495991808.0,
-            "36": 2495991808.0,
-            "37": 2495991808.0,
-            "38": 2495991808.0,
-            "39": 2495991808.0,
-            "40": 2495991808.0,
-            "41": 2495991808.0,
-            "42": 2495991808.0,
-            "43": 2495991808.0,
-            "44": 2495991808.0,
-            "45": 2495991808.0,
-            "46": 2495991808.0,
-            "47": 2495991808.0,
-            "48": 2495991808.0,
-            "49": 2495991808.0,
-            "50": 2495991808.0,
-            "51": 2495991808.0,
-            "52": 2495991808.0,
-            "53": 2495991808.0,
-            "54": 2495991808.0,
-            "55": 2495991808.0,
-            "56": 2495991808.0,
-            "57": 2495991808.0,
-            "58": 2495991808.0,
-            "59": 2495991808.0,
-            "60": 2495991808.0,
-            "61": 2495991808.0,
-            "62": 2495991808.0,
-            "63": 2495991808.0,
-            "64": 2495991808.0,
-            "65": 2495991808.0,
-            "66": 2495991808.0,
-            "67": 2495991808.0,
-            "68": 2495991808.0,
-            "69": 2495991808.0,
-            "70": 2495991808.0,
-            "71": 2495991808.0,
-            "72": 2495991808.0,
-            "73": 2495991808.0,
-            "74": 2495991808.0,
-            "75": 2495991808.0,
-            "76": 2495991808.0,
-            "77": 2495991808.0,
-            "78": 2495991808.0,
-            "79": 2495991808.0,
-            "80": 2495991808.0,
-            "81": 2495991808.0,
-            "82": 2495991808.0,
-            "83": 2495991808.0,
-            "84": 2495991808.0,
-            "85": 2495991808.0,
-            "86": 2495991808.0,
-            "87": 2495991808.0,
-            "88": 2495991808.0,
-            "89": 2495991808.0,
-            "90": 2495991808.0,
-            "91": 2495991808.0,
-            "92": 2495991808.0,
-            "93": 2495991808.0,
-            "94": 2495991808.0,
-            "95": 2495991808.0,
-            "96": 2495991808.0,
-            "97": 2495991808.0,
-            "98": 2495991808.0,
-            "99": 2495991808.0,
-            "100": 2495991808.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 12.11313,
-            "2": 0.4805,
-            "3": 0.36965,
-            "4": 0.36695,
-            "5": 0.31705,
-            "6": 0.31275,
-            "7": 0.31299,
-            "8": 0.29866,
-            "9": 0.28961,
-            "10": 0.28859,
-            "11": 0.29067,
-            "12": 0.29044,
-            "13": 0.29806,
-            "14": 0.29287,
-            "15": 0.29391,
-            "16": 0.3175,
-            "17": 0.28363,
-            "18": 0.2818,
-            "19": 0.29347,
-            "20": 0.28931,
-            "21": 0.29103,
-            "22": 0.28444,
-            "23": 0.28907,
-            "24": 0.27608,
-            "25": 0.28277,
-            "26": 0.28656,
-            "27": 0.28921,
-            "28": 0.30243,
-            "29": 0.30435,
-            "30": 0.31231,
-            "31": 0.30439,
-            "32": 0.31412,
-            "33": 0.28887,
-            "34": 0.29613,
-            "35": 0.29738,
-            "36": 0.29754,
-            "37": 0.3019,
-            "38": 0.2933,
-            "39": 0.2944,
-            "40": 0.29283,
-            "41": 0.29592,
-            "42": 0.29673,
-            "43": 0.29319,
-            "44": 0.30127,
-            "45": 0.29921,
-            "46": 0.29904,
-            "47": 0.28795,
-            "48": 0.29918,
-            "49": 0.28711,
-            "50": 0.29645,
-            "51": 0.28777,
-            "52": 0.29536,
-            "53": 0.2847,
-            "54": 0.28286,
-            "55": 0.2874,
-            "56": 0.28699,
-            "57": 0.28614,
-            "58": 0.29825,
-            "59": 0.28363,
-            "60": 0.29423,
-            "61": 0.29226,
-            "62": 0.2896,
-            "63": 0.28065,
-            "64": 0.29533,
-            "65": 0.29842,
-            "66": 0.28487,
-            "67": 0.28419,
-            "68": 0.29474,
-            "69": 0.28383,
-            "70": 0.28417,
-            "71": 0.29253,
-            "72": 0.28737,
-            "73": 0.27923,
-            "74": 0.28728,
-            "75": 0.29383,
-            "76": 0.28157,
-            "77": 0.64771,
-            "78": 0.29148,
-            "79": 0.28742,
-            "80": 0.29245,
-            "81": 0.28827,
-            "82": 0.28368,
-            "83": 0.28963,
-            "84": 0.29234,
-            "85": 0.28183,
-            "86": 0.28337,
-            "87": 0.27879,
-            "88": 0.28388,
-            "89": 0.28309,
-            "90": 0.28852,
-            "91": 0.28254,
-            "92": 0.28375,
-            "93": 0.28633,
-            "94": 0.28567,
-            "95": 0.28235,
-            "96": 0.28513,
-            "97": 0.27951,
-            "98": 0.27851,
-            "99": 0.28336,
-            "100": 0.27744
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/model_config.yaml
index 8874f9cf045..3ecd68b9841 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/model_config.yaml
@@ -56,7 +56,7 @@ MODEL_ARGS:
   --attention-softmax-in-fp32: true
   --use-checkpoint-opt_param-scheduler: true
   --use-mcore-models: true
-  --ckpt-format: fsdp_dtensor
+  --ckpt-format: torch_dist
   --dist-ckpt-optim-fully-reshardable: true
   --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
diff --git a/tests/test_utils/recipes/moe.yaml b/tests/test_utils/recipes/moe.yaml
index 607d48380d5..8164ca37df8 100644
--- a/tests/test_utils/recipes/moe.yaml
+++ b/tests/test_utils/recipes/moe.yaml
@@ -106,13 +106,14 @@ products:
       - environment: [dev]
         scope: [mr, mr-github]
         platforms: [dgx_h100]
-  - test_case: [gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router]
-    products:
-      - environment: [dev]
-        scope: [mr]
-        platforms: [dgx_h100]
-      - environment: [lts]
-        scope: [nightly]
+  # TODO: The migration of custom fsdp causes EP + FSDP to be temporarily unavailable, which will be fixed in a subsequent MR.
+  # - test_case: [gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router]
+  #   products:
+  #     - environment: [dev]
+  #       scope: [mr]
+  #       platforms: [dgx_h100]
+  #     - environment: [lts]
+  #       scope: [nightly]
   - test_case: [gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective]
     products:
       - environment: [dev]
diff --git a/tools/checkpoint/checkpoint_inspector.py b/tools/checkpoint/checkpoint_inspector.py
index c62f0ca7417..34afa27755f 100644
--- a/tools/checkpoint/checkpoint_inspector.py
+++ b/tools/checkpoint/checkpoint_inspector.py
@@ -8,8 +8,6 @@
 import time
 import re
 import shutil
-from typing import Optional
-import tempfile
 
 import click
 import torch
@@ -21,7 +19,6 @@
     FileSystemReader,
     FileSystemWriter,
 )
-from torch.distributed.checkpoint.format_utils import dcp_to_torch_save
 from torch.distributed.checkpoint.metadata import (
     BytesStorageMetadata,
     TensorStorageMetadata,
@@ -67,8 +64,7 @@ def cli():
 @cli.command()
 @click.argument("checkpoint_dir", type=click.Path(exists=True))
 @click.option("--enable-msc", is_flag=True, help="Enable MultiStorageClient feature.")
-@click.option("--not-ignore-param-to-group-meta", is_flag=True, help="Ignore parameter-to-group metadata.")
-def inspect(checkpoint_dir, enable_msc, not_ignore_param_to_group_meta):
+def inspect(checkpoint_dir, enable_msc):
     """Inspect a Megatron Core Distributed Checkpoint"""
     ckpt_path = Path(checkpoint_dir)
 
@@ -142,8 +138,6 @@ def inspect(checkpoint_dir, enable_msc, not_ignore_param_to_group_meta):
     ]
     click.echo(" | ".join(stats) + "\n")
 
-    ignore_param_to_group_meta = not not_ignore_param_to_group_meta
-    ignore_param_to_group_meta_count = 0
     for key, value in metadata.state_dict_metadata.items():
         bullet = click.style("►", fg="blue")
         key_styled = click.style(key, fg="green")
@@ -153,18 +147,11 @@ def inspect(checkpoint_dir, enable_msc, not_ignore_param_to_group_meta):
             shape = click.style(f"{tuple(value.size)}", fg="magenta")
             click.echo(f"  {bullet} {key_styled} [{dtype}, shape={shape}]")
         elif isinstance(value, BytesStorageMetadata):
-            if ignore_param_to_group_meta and key.startswith("optimizer.param_to_group_meta."):
-                ignore_param_to_group_meta_count += 1
-                continue
             click.echo(f"  {bullet} {key_styled} {click.style('[BYTES]', fg='yellow')}")
         else:
             click.echo(
                 f"  {bullet} {key_styled} {click.style('[UNKNOWN TYPE]', fg='red')}"
             )
-    if ignore_param_to_group_meta:
-        click.echo(
-            click.style(f"Ignored parameter-to-group metadata: {ignore_param_to_group_meta_count}", fg="yellow")
-        )
 
     # MCore data section
     try:
@@ -336,10 +323,8 @@ def convert_checkpoint(
     output_dir,
     swiglu,
     process_group,
-    optimizer_param_to_group_prefix="optimizer.param_to_group_meta.module.module.module",
     optimizer_state_prefix="optimizer.state.module.module.module",
     model_weight_prefix="model.module",
-    param_to_param_group_map={},
 ):
     """Convert a Megatron Core Distributed Checkpoint from torch_dist to standard fsdp_dtensor format."""
     device_mesh = DeviceMesh.from_group(process_group, device_type="cuda")
@@ -386,104 +371,6 @@ def _free_up_some_gpu_memory():
             gc.collect()
             torch.cuda.empty_cache()
 
-    def split_layers(
-        key: str,
-        value: torch.Tensor,
-        orig_shape: Optional[torch.Size] = None,
-    ) -> dict[str, torch.Tensor]:
-        """
-        Split layers into separate tensors.
-        """
-        _free_up_some_gpu_memory()
-        layers = {}
-        for i, v in enumerate(split_dtensor(value, 1, dim=0)):
-            v = gather_uneven_dtensor_to_full_tensor(v).reshape(
-                orig_shape[1:] if orig_shape else value.shape[1:]
-            ).redistribute(placements=[Shard(0)])
-
-            layer_key = key.replace(".layers.", f".layers.{i}.")
-            layers[layer_key] = v
-
-        return layers
-
-    def split_expert_weights(
-        key: str,
-        value: torch.Tensor,
-        orig_shape: Optional[torch.Size] = None,
-    ) -> dict[str, torch.Tensor]:
-        """
-        Split expert weights into separate tensors for each expert.
-        """
-        experts = {}
-        layer_key = key.replace(".experts.experts.", ".experts.")
-        expert_weights = split_dtensor(value, 1, dim=0)
-        for expert_idx, expert_weight in enumerate(expert_weights):
-            layer_key_parts = layer_key.split(".weight", 1)
-            if len(layer_key_parts) == 1:
-                expert_key = f"{layer_key}{expert_idx}"
-            elif len(layer_key_parts) == 2:
-                expert_key = f"{layer_key_parts[0]}.weight{expert_idx}{layer_key_parts[1]}"
-            else:
-                raise ValueError(f"Unexpected expert layer key: {layer_key}")
-
-            expert_weight = gather_uneven_dtensor_to_full_tensor(expert_weight)
-            expert_shape = orig_shape[1:] if orig_shape else value.shape[1:]
-            # Handle optimizer states for expert linear_fc2 when ETP is enabled
-            if (
-                layer_key.startswith("optimizer.state.")
-                and "linear_fc2" in layer_key
-                and expert_weight.shape[-2] > 1
-            ):
-                tp_size = expert_weight.shape[-2]
-                rows, cols = expert_shape
-                # Reshape to split column dimension by tp_size
-                expert_weight = expert_weight.reshape(
-                    *expert_weight.shape[:-1], rows, cols // tp_size
-                )
-                dims = list(range(expert_weight.ndim))
-                dims[-3], dims[-2] = dims[-2], dims[-3]
-                expert_weight = (
-                    expert_weight.permute(*dims)
-                    .reshape(expert_shape)
-                    .redistribute(placements=[Shard(0)])
-                )
-            else:
-                expert_weight = expert_weight.reshape(expert_shape).redistribute(
-                    placements=[Shard(0)]
-                )
-            experts[expert_key] = expert_weight
-        return experts
-
-    def is_swiglu_key(key):
-        return any(re.search(pat, key) for pat in [
-            r"(.*)\.mlp\.linear_fc1\.weight",
-            r"(.*)\.mlp\.linear_fc1\.bias",
-            r"(.*)\.mlp\.experts\.linear_fc1\.weight(\d+)",
-            r"(.*)\.mlp\.experts\.linear_fc1\.bias(\d+)",
-            r"(.*)\.mlp\.experts\.local_experts\.(\d+)\.linear_fc1\.weight",
-            r"(.*)\.mlp\.experts\.local_experts\.(\d+)\.linear_fc1\.bias",
-            r"(.*)\.mlp\.shared_experts\.linear_fc1\.weight",
-            r"(.*)\.mlp\.shared_experts\.linear_fc1\.bias",
-        ])
-
-    def split_swiglu_weight(key: str, value: torch.Tensor) -> dict[str, torch.Tensor]:
-        """
-        Split SwiGLU weights into separate tensors.
-        """
-        value = gather_uneven_dtensor_to_full_tensor(value)
-        swiglu_w_and_v = {}
-        w, v = torch.chunk(value, 2, dim=0)
-        w = w.redistribute(placements=[Shard(0)])
-        v = v.redistribute(placements=[Shard(0)])
-        w_key = re.sub(r'(weight\d*)(.*)', r'\1_w\2', key)
-        v_key = re.sub(r'(weight\d*)(.*)', r'\1_v\2', key)
-        swiglu_w_and_v[w_key] = w
-        swiglu_w_and_v[v_key] = v
-        return swiglu_w_and_v
-
-    def has_layer_index(key: str) -> bool:
-        return bool(re.search(r"layers\.(\d+)\.", key))
-
     while state_dict:
         key, value = state_dict.popitem()
         if torch.distributed.get_rank() == 0:
@@ -500,11 +387,9 @@ def has_layer_index(key: str) -> bool:
                 # Special handling for optimizer state
                 key_list = key.split(".")
                 new_key = f"{optimizer_state_prefix}.{'.'.join(key_list[3:])}.{key_list[2]}"
-                is_param = False
             else:
                 # Special handling for module parameters
                 new_key = f"{model_weight_prefix}.{key}"
-                is_param = True
 
             # Handle dist-opt flatten tensors
             if (
@@ -521,47 +406,68 @@ def has_layer_index(key: str) -> bool:
             else:
                 orig_shape = None
 
-            # Handle multi-layer / experts tensors
-            split_tensors = {}
-            if ".layers." in new_key and not has_layer_index(new_key):
-                split_tensors = split_layers(new_key, value, orig_shape)
-            elif ".experts.experts." in new_key:
-                split_tensors = split_expert_weights(new_key, value, orig_shape)
-            else:
-                if orig_shape:
-                    value = gather_uneven_dtensor_to_full_tensor(value)
-                    # Handle optimizer states with partition_dim=1 when TP is enabled
-                    if (
-                        new_key.startswith("optimizer.state.")
-                        and value.ndim > 2
-                        and value.shape[-2] > 1
-                    ):
-                        tp_size = value.shape[-2]
-                        rows, cols = orig_shape
-                        # Reshape to split column dimension by tp_size
-                        value = value.reshape(*value.shape[:-1], rows, cols // tp_size)
-                        dims = list(range(value.ndim))
-                        dims[-3], dims[-2] = dims[-2], dims[-3]
-                        value = (
-                            value.permute(*dims)
-                            .reshape(orig_shape)
-                            .redistribute(placements=[Shard(0)])
+            # Handle multi-layer tensors
+            if ".layers." in new_key:
+                n_layer = value.shape[0]
+
+                _free_up_some_gpu_memory()
+                per_layer_values = [
+                    gather_uneven_dtensor_to_full_tensor(v).redistribute(
+                        placements=[Shard(len(v.shape) - 1)]
+                    )
+                    for v in split_dtensor(value, 1, dim=0)
+                ]
+                for i in range(n_layer):
+                    if orig_shape is not None:
+                        layer_shape = orig_shape[1:]
+                    else:
+                        layer_shape = value.shape[1:]
+
+                    per_layer_values[i] = (
+                        per_layer_values[i]
+                        .reshape(layer_shape)
+                        .redistribute(placements=[Shard(0)])
+                    )
+                for i in range(0, n_layer):
+                    layer_key = new_key.replace(".layers.", f".layers.{i}.")
+                    if swiglu and "mlp.linear_fc1.weight" in layer_key:
+                        # Special case for SwiGLU
+                        w, v = torch.chunk(per_layer_values[i], 2, dim=0)
+                        w = w.redistribute(placements=[Shard(0)])
+                        v = v.redistribute(placements=[Shard(0)])
+                        w_key = layer_key.replace(
+                            "mlp.linear_fc1.weight", "mlp.linear_fc1.weight_w"
+                        )
+                        v_key = layer_key.replace(
+                            "mlp.linear_fc1.weight", "mlp.linear_fc1.weight_v"
                         )
+                        # Store both w and v in the state_dict
+                        fsdp_dtensor_state_dict[w_key] = w
+                        fsdp_dtensor_state_dict[v_key] = v
+                    elif (
+                        "experts.experts.linear_fc1.weight" in layer_key
+                        or "experts.experts.linear_fc2.weight" in layer_key
+                    ):
+                        # Special case for MoE
+                        layer_key = layer_key.replace(".experts.experts.", ".experts.")
+                        expert_weights = torch.split(per_layer_values[i], 1, dim=0)
+                        for expert_idx, expert_weight in enumerate(expert_weights):
+                            expert_key = f"{layer_key}{expert_idx}"
+                            fsdp_dtensor_state_dict[expert_key] = expert_weight.squeeze(
+                                0
+                            )
                     else:
-                        value = value.reshape(orig_shape).redistribute(placements=[Shard(0)])
-                split_tensors = {new_key: value}
-
-            # Handle SWiGLU weights
-            for key, value in list(split_tensors.items()):
-                if swiglu and is_swiglu_key(key):
-                    swiglu_w_and_v = split_swiglu_weight(key, value)
-                    split_tensors.update(swiglu_w_and_v)
-                    del split_tensors[key]
-
-            fsdp_dtensor_state_dict.update(split_tensors)
-            if is_param and key in param_to_param_group_map:
-                for new_key in split_tensors.keys():
-                    param_to_param_group_map[new_key] = param_to_param_group_map[key]
+                        # General case
+                        fsdp_dtensor_state_dict[layer_key] = per_layer_values[i]
+            else:
+                if orig_shape is not None:
+                    _free_up_some_gpu_memory()
+                    value = (
+                        value.redistribute(placements=[Replicate()])
+                        .reshape(orig_shape)
+                        .redistribute(placements=[Shard(0)])
+                    )
+                fsdp_dtensor_state_dict[new_key] = value
         elif key.startswith("rng_state"):
             # Skip RNG states
             continue
@@ -624,15 +530,6 @@ def has_layer_index(key: str) -> bool:
         )
     )
     common_state = common_strategy.load_common(input_dir)
-    try:
-        if "param_groups" in common_state["optimizer"]:
-            ckpt_param_groups = common_state["optimizer"]["param_groups"]
-        else:
-            ckpt_param_groups = []
-            for opt_state_dict in common_state["optimizer"].values():
-                ckpt_param_groups.extend(opt_state_dict["optimizer"]["param_groups"])
-    except:
-        ckpt_param_groups = None
     common_state = flatten(common_state)
     for key, value in common_state.items():
         if key.startswith("optimizer.optimizer.param_groups."):
@@ -644,29 +541,12 @@ def has_layer_index(key: str) -> bool:
         )
         fsdp_dtensor_state_dict[key] = value
 
-    # set up per-parameter param_groups
-    if param_to_param_group_map and ckpt_param_groups is not None:
-        for name in list(fsdp_dtensor_state_dict.keys()):
-            if not name.startswith(model_weight_prefix) or name.endswith(".expert_bias"):
-                continue
-
-            assert name in param_to_param_group_map, f"Missing param group for {name}"
-            param_group_id = param_to_param_group_map[name]
-            assert param_group_id < len(ckpt_param_groups), f"Invalid param group id {param_group_id} for {name}"
-            name_without_prefix = name[len(model_weight_prefix):]
-            fsdp_dtensor_state_dict[
-                f"{optimizer_param_to_group_prefix}.{name_without_prefix}"
-            ] = ckpt_param_groups[param_group_id]
-
     if "checkpoint_version" not in fsdp_dtensor_state_dict:
         fsdp_dtensor_state_dict["checkpoint_version"] = 3.0
 
     # Save modified checkpoint
     save_checkpoint_with_pickle_protocol(fsdp_dtensor_state_dict, output_dir)
 
-    dist.barrier()              # Synchronize all ranks
-    dist.destroy_process_group()
-
 
 @cli.command()
 @click.argument("input_dir", type=click.Path(exists=True))
@@ -680,6 +560,12 @@ def has_layer_index(key: str) -> bool:
     "--oom-traceback", is_flag=True, help="Enable OOM traceback for debugging."
 )
 @click.option("--enable-msc", is_flag=True, help="Enable MultiStorageClient feature.")
+@click.option(
+    "--distributed-timeout-minutes",
+    default=10,
+    type=int,
+    help="Timeout for distributed operations in minutes.",
+)
 @click.option(
     "--output-optimizer-state-prefix",
     default="optimizer.state.module.module.module",
@@ -690,21 +576,15 @@ def has_layer_index(key: str) -> bool:
     default="model.module",
     help="Prefix for model weight keys in the checkpoint.",
 )
-@click.option(
-    "--param-to-param-group-map-json",
-    type=str,
-    default="{}",
-    help="JSON string representing the param to parameter group map."
-)
 def convert_torch_dist_to_fsdp_dtensor(
     input_dir,
     output_dir,
     swiglu,
     oom_traceback,
     enable_msc,
+    distributed_timeout_minutes,
     output_optimizer_state_prefix,
     output_model_weight_prefix,
-    param_to_param_group_map_json,
 ):
     """Convert a Megatron Core Distributed Checkpoint from torch_dist to fsdp_dtensor format."""
     if not enable_msc:
@@ -744,13 +624,10 @@ def oom_observer(device, alloc, device_alloc, device_free):
 
     ckpt_path = Path(input_dir)
     output_dir = Path(output_dir)
-    with open(param_to_param_group_map_json, "r") as f:
-        param_to_param_group_map = json.load(f)
     convert_checkpoint(
         ckpt_path, output_dir, swiglu, process_group=dist.group.WORLD,
         optimizer_state_prefix=output_optimizer_state_prefix,
         model_weight_prefix=output_model_weight_prefix,
-        param_to_param_group_map=param_to_param_group_map,
     )
 
     click.echo(
@@ -865,109 +742,6 @@ def modify_state_dict(input_dir, output_dir, op, enable_msc):
     )
 
 
-def _compare_two_checkpoint(checkpoint_1, checkpoint_2):
-    reader_1 = FileSystemReader(checkpoint_1)
-    metadata_1 = reader_1.read_metadata()
-
-    reader_2 = FileSystemReader(checkpoint_2)
-    metadata_2 = reader_2.read_metadata()
-
-    keys_1 = set(metadata_1.state_dict_metadata.keys())
-    keys_2 = set(metadata_2.state_dict_metadata.keys())
-
-    click.echo(click.style("Comparing checkpoints...", fg="blue"))
-
-    # Compare keys
-    missing_in_1 = keys_2 - keys_1
-    missing_in_2 = keys_1 - keys_2
-    common_keys = keys_1 & keys_2
-
-    click.echo(click.style("Keys missing in checkpoint 1:", fg="red"))
-    for key in missing_in_1:
-        click.echo(click.style(f" - {key}", fg="red"))
-
-    click.echo(click.style("Keys missing in checkpoint 2:", fg="red"))
-    for key in missing_in_2:
-        click.echo(click.style(f" - {key}", fg="red"))
-
-    # Compare common keys
-    click.echo(click.style("Common keys in both checkpoints:", fg="green"))
-    for key in common_keys:
-        meta_1 = metadata_1.state_dict_metadata[key]
-        meta_2 = metadata_2.state_dict_metadata[key]
-
-        if not isinstance(meta_1, TensorStorageMetadata):
-            continue
-
-        if meta_1.size != meta_2.size or meta_1.properties.dtype != meta_2.properties.dtype:
-            click.echo(click.style(f" - {key} (metadata differ) meta_1: {meta_1}, meta_2: {meta_2}", fg="red"))
-        else:
-            value_1 = torch.empty(meta_1.size, dtype=meta_1.properties.dtype)
-            value_2 = value_1.clone()
-
-            dcp.load({key: value_1}, storage_reader=reader_1, planner=DefaultLoadPlanner())
-            dcp.load({key: value_2}, storage_reader=reader_2, planner=DefaultLoadPlanner())
-
-            if not torch.allclose(
-                value_1, value_2, atol=1e-8, rtol=1e-5
-            ):
-                click.echo(click.style(f" - {key} (values differ) value_1: {value_1}, value_2: {value_2}", fg="red"))
-
-
-@cli.command()
-@click.argument("checkpoint_1", type=click.Path(exists=True))
-@click.argument("checkpoint_2", type=click.Path(exists=True))
-@click.option("--enable-msc", is_flag=True, help="Enable MultiStorageClient feature.")
-def compare_two_checkpoint(checkpoint_1, checkpoint_2, enable_msc):
-    """
-    Compare two checkpoints.
-    """
-    init_process_group(f"compare_two_checkpoint from {checkpoint_1} to {checkpoint_2}")
-
-    if not enable_msc:
-        MultiStorageClientFeature.disable()
-
-    _compare_two_checkpoint(
-        Path(checkpoint_1),
-        Path(checkpoint_2),
-    )
-
-    click.echo(
-        click.style(
-            f"Comparison between {checkpoint_1} and {checkpoint_2} completed.", fg="green", bold=True
-        )
-    )
-
-
-@cli.command()
-@click.argument("torch_dcp_dir", type=click.Path(exists=True))
-def print_torch_dcp_in_json(torch_dcp_dir, model_weight_prefix="model.module"):
-    # Use a temporary file context
-    with tempfile.NamedTemporaryFile(suffix=".pth") as tmp_file:
-        # Convert distributed checkpoint directory to a single-file checkpoint
-        dcp_to_torch_save(torch_dcp_dir, tmp_file.name)
-
-        # Load the state dict from the temporary file
-        state_dict = torch.load(tmp_file.name, map_location="cpu")
-
-        click.echo(f"torch dcp content: {json.dumps(state_dict)}")
-
-        # Replace all "module.module." with model_weight_prefix in dict keys
-        new_state_dict = {}
-        for key, value in state_dict.items():
-            new_key = key.replace("module.module", model_weight_prefix)
-            new_state_dict[new_key] = value
-        
-        # Convert state dict to JSON-serializable format
-        serializable_dict = {k: v.tolist() if hasattr(v, "tolist") else v for k, v in new_state_dict.items()}
-
-        # Save to a JSON file
-        json_file_path = os.path.join(torch_dcp_dir, "param_to_param_group_map.json")
-        with open(json_file_path, "w") as json_file:
-            json.dump(serializable_dict, json_file, indent=2)
-        click.echo(f"Saved converted param_to_param_group_map to: {json_file_path}")
-
-
 def init_process_group(message):
     rank = int(os.getenv("RANK", "0"))
     world_size = int(os.getenv("WORLD_SIZE", "1"))

From c22c2aa5d0a26ad544b2d4d48911eadc07346f05 Mon Sep 17 00:00:00 2001
From: Hongbin Liu <lhb8125@users.noreply.github.com>
Date: Tue, 28 Oct 2025 22:15:48 +0800
Subject: [PATCH 082/334] [Was PR1912][Dev] feat(moe): Fine-grained activation
 offloading  (#1969)

Signed-off-by: Hongbin Liu <hongbinl@nvidia.com>
Signed-off-by: Hongbin Liu <hongbinl@cw-dfw-cs-001-login-02.cm.cluster>
Co-authored-by: Hongbin Liu <hongbinl@cw-dfw-cs-001-login-02.cm.cluster>
---
 .../fine_grained_activation_offloading.md     |  31 +
 docs/source/api-guide/index.rst               |   1 +
 .../offloading_and_recomputing.png            | Bin 0 -> 332427 bytes
 .../core/extensions/transformer_engine.py     |  12 +-
 .../common/model_chunk_schedule_plan.py       |   9 +-
 .../core/models/gpt/fine_grained_callables.py |  23 +-
 megatron/core/models/gpt/gpt_model.py         |  29 +-
 .../fine_grained_activation_offload.py        | 609 ++++++++++++++++++
 megatron/core/pipeline_parallel/schedules.py  |  14 +-
 megatron/core/tensor_parallel/random.py       |  13 +-
 megatron/core/transformer/attention.py        |  70 +-
 megatron/core/transformer/moe/README.md       |  14 +
 megatron/core/transformer/moe/experts.py      |  65 +-
 .../transformer/multi_latent_attention.py     |  40 +-
 .../transformer/multi_token_prediction.py     |   7 +-
 .../core/transformer/transformer_block.py     |  10 +-
 .../core/transformer/transformer_config.py    |  51 +-
 .../core/transformer/transformer_layer.py     |  56 +-
 megatron/training/arguments.py                |  11 +-
 .../golden_values_dev_coreweave.json          | 344 ++++++++++
 .../golden_values_dev_eos.json                | 344 ++++++++++
 .../model_config.yaml                         | 139 ++++
 .../golden_values_dev_coreweave.json          | 287 +++++++++
 .../golden_values_dev_eos.json                | 287 +++++++++
 .../model_config.yaml                         | 134 ++++
 tests/test_utils/recipes/moe.yaml             |  10 +
 ...test_fine_grained_activation_offloading.py | 187 ++++++
 27 files changed, 2736 insertions(+), 61 deletions(-)
 create mode 100644 docs/source/api-guide/fine_grained_activation_offloading.md
 create mode 100644 docs/source/images/fine_grained_activation_offloading/offloading_and_recomputing.png
 create mode 100644 megatron/core/pipeline_parallel/fine_grained_activation_offload.py
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_eos.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/model_config.yaml
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_eos.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/model_config.yaml
 create mode 100644 tests/unit_tests/pipeline_parallel/test_fine_grained_activation_offloading.py

diff --git a/docs/source/api-guide/fine_grained_activation_offloading.md b/docs/source/api-guide/fine_grained_activation_offloading.md
new file mode 100644
index 00000000000..969098263fc
--- /dev/null
+++ b/docs/source/api-guide/fine_grained_activation_offloading.md
@@ -0,0 +1,31 @@
+# Fine-grained Activation Offloading (collaborated with rednote)
+
+Memory capacity is more and more important with the rising of extreme sparse MoE models like DeepSeek-V3 and Qwen3-235B. Fine-grained recomputing reduces the memory footprint at the cost of extra recomputation, while offloading could utilize the host-device bandwidth to achieve nearly zero-overhead. Fine-grained Activation Offloading targets at offloading the activation at the granularity of specific modules, so that we can calibrate the amount of offloading activation to maximize the training throughput.
+
+Currently, the supported offloading modules are `"attn_norm", "core_attn", "attn_proj", "mlp_norm", "expert_fc1", "moe_act"`, which could work with fine-grained recomputation to release almost all activations of a transformer layer.
+
+**Features**
+* Support PP=1/PP/Interleaved PP
+* Compatible with fine-grained recomputation
+* Support FP8
+* Support MTP
+* Support mixed dense & moe layer
+* Support A2A Overlap
+* Support CUDA Graph
+  * (Temporary) cuda graph scope cannot contains the offloading modules
+
+**Usage**
+```bash
+# Enable fine-grained activation offloading
+--fine-grained-activation-offloading
+
+# Specify which modules are going to offload its input
+# Choices: "attn_norm", "core_attn", "attn_proj", "mlp_norm", "expert_fc1", "moe_act".
+--offload-modules expert_fc1
+```
+**Compatible with Fine-grained Recomputation**
+- For modules with minor perf overhead like layernorm or moe_act, use recomputing to reduce memory footprint;
+- For other modules, use offloading to reduce memory footprint;
+- Make sure the offloading/reloading could be overlapped with computing;
+
+![Fine-grained Activation Offloading and Fine-grained Recomputation](../images/fine_grained_activation_offloading/offloading_and_recomputing.png)
diff --git a/docs/source/api-guide/index.rst b/docs/source/api-guide/index.rst
index 710a7caf4de..ac6d7cb0b2d 100644
--- a/docs/source/api-guide/index.rst
+++ b/docs/source/api-guide/index.rst
@@ -22,3 +22,4 @@ API Guide
    optimizer_cpu_offload
    multi_token_prediction
    tokenizers
+   fine_grained_activation_offloading
diff --git a/docs/source/images/fine_grained_activation_offloading/offloading_and_recomputing.png b/docs/source/images/fine_grained_activation_offloading/offloading_and_recomputing.png
new file mode 100644
index 0000000000000000000000000000000000000000..6c8afa78bb180a0815aff02693690b864e9b01f8
GIT binary patch
literal 332427
zcmeFZXH-*bw+4z>P!Lfeq7<Vby(!XLRHRqwQkCA3UX#dHK&gWCj?$~4_aF$N7b!t{
zj}RcVBtRg!aesU7&vVWl<Nm!r_F#~;Ad8jt&iTw|mUm9xYilaeQZrFgP*BjSC_mMq
zptxK}L2+UI;sx?=euPZFCLhjt>L@*-C?8~5C;#x)#z@6hLxX~se14JQ{F&<%XMbHn
z{-QX;L~;Jla|#O8GtB>->zv{K>l#W5iZBO?bAMf9Og{cPRsHqy&+%;bng8A~oAU3g
zFBfK?{rmjFujimh(_@$-A1=L8e(p&@A>8)s`wX|W@iYa+BMOzLkM;b{Y|c{E^DAT!
zcOzcEet(CTdP)Ck)tyGC_Sct|nI<Tx!+VPhos3r>?BX{XH#RoPUn2RrZz8KNTrPak
z{?_uwgRA#0UI#ore?V%T8BLe-Y$_=ZEJk?^1fl&QcD9YhB_$x%g2uqXRMC6qu0EnT
z^Y6>QGWZt<|C+(SRPZko{>y~_GU2~W_%9Rw%Y^?j;lE7y|6?W$b~q=NX=~l3@V{{y
zTuPZ%SnI?ssCy#nG*DHsshXO*>~>Z?pu3UEUQnOyH3j9xJO7_A`$AtvPKK6d26K`!
z+`4Z$?a?a3(u!t?!%{z)E0r(axeuq@JiF%mV=c967$&$X@v($e9!WoaLk0}g4XbbP
z{pruwLY%BzG|fBUU~vWYfi~s0y(p*c4_X*}wN34K%F7H9qJVLHZo~nRveQ32i?6jn
zLesr;Nxd`q`=Db#J%hNusTQooQaS{BbJ;h1l%YVg8KRRl^z7Sr5pM(}`3=}#9ZKBy
z%=GlwOeSGNB%iiXO5&O2BSNR74ytuGw2oYdT16G&gfI42P!L`pnbkI|@q3l3Ne-?!
zDyhoCu#$PvW5Neq1**@k{8ICsf=8Q1N*2zVE*nq&he$76l`!&dwDK-pWd0EJP=_0=
zmXd=bgmPY|{O!)06#8R6={x6Zsn$}5d6E0GZ9xY6q*#5|<xiTKr(>T4kFfg#dls9k
zpl>snd{v9w!^R~Yg_;=5{>E}P+COxuF??JdKk^M!Q0NL-7X~#cF=c*swK#Bg8od>}
z{;}mIrR0#Lqz`|WREH_s2T%BeYEE#sJdG}h^5SKIwxjXk_En_CclV>4?sZ@gtL0&q
zNXtF|u79GstKUfgR2ax;x`WSO`V-E<>6GbBt7Ex>zfnB(jh#{Sy>p>gCk+z1ZfoDN
zY#e+43npJ~QW9;N(}up5ZWsH%n@vamIH3xGMrDp^yr2PC6x2J_jd06yQSy@{pMUVY
z&kV`lQ=%rJJ|^*P=e|2Nqyt>y1X7s|p#cTFayiP9nT19Z2QuTh=OMs*?rDwpK*JtE
zFW%loUr|rwKWA{UzfTesp0ae7yd>T>$Y*9DaQAF8chs-v7}j_(64RIV-bpQl<cW@A
z2;{~Y91?Ij;kpcPy=Dz}-#^!8^z=+Qd#UM$U+KXD{qG<@U|2YlkohVy^l#`}U-f%p
z$(mL;1L8=`n+yo1La*9|Cvm%efn0?$D!*QZKcWfzRuvFfv(*=_#p^2hLi)6Y_?Vic
zR35|UT9&MEOsDY;-nhXGInJdQ{jMOue&xAu-@RLrf8YNv`)X^yw3#dE1K)oZPRDat
z!TaRS-PdjH7hkcyT>aE#l}DBJJHx)9K66_;g)-*ZwZEc|V4ia)G(H5Mn<R>a3>+g~
zTD^SolekVFsBA}LN+wxn({gk6RU*_)O0l2&OUuUbm!-7u=OijFi>ERMyVji_dPome
zE%MXbVobV#mD`Cb!Z-gHodt(5`n^4O@&+WN{fh2ud&L!sJNJ>4rb<Q)e!E8NU%7q*
z$kpyUk5D=no)-TT83kNC<Xm!*imZFnd(60}GC-N;v?JB2#eg^ba6^qOL8HFFe$S>f
zCcHo=sN9lWe7t(Tk^zz>ee|*iX;WCeekS2AdT`kKzc>6#7B9-rmlrn#hP0C&B)lcd
zvp<ajdCfrZnT$ce&VK;>*D8dmo#SAF#s4){Wk$az0V8~>w+nrVLw9l$D1C<!c-|8}
zt=vxf7e_*K!ay_&=n*a9^^zmILpCX*SAc=eFg*pPL1$9zJH?wu#(m|s!*t)_|E5R&
z@Yf`#Vp3YKG^0&uiq^TS$$@_Y?c4tWn)MAb(E79W!u|%ry^E$6HrpB6u9fc)KU8*0
zdLqx0oRfw-c%UY)vi!5aiKJ$sMd3Bfy5ky@=LGnuF&}RKwt`Pn`|$B~1Q3AK^_w>c
ziD5NE&&w~fl=%JpUSgiFS^khCBlihV{}5Y84XUr2)iG78m^V<*zl+XHzmfGfeEv~(
zw%Ni!?{VHz-<Cr6V=}dcUt5rB?^OsvsM5*Z{ml_PvLyqs=dx7g#$0XhmZV)m!O+To
zZg#Vzjp#U;5Fp~p9<^-o4OEDzkNo4;67NU0(dci%y-SbryrjpS*TRLk1@*ptx8>cr
z*y+ho_a0Ebipj=X=x>%C!l!mx-Q2S7FRAIRjfaCc;>#Gx;y(8Bi}_1deUt}xg-mw-
zBx8!;afZW8PU25>+DTKasTGfFp&_3Aq-7~Fe5|^Nnpm-PAv{7?=@ZNER{oDulgSo_
zqua7Xomzvv``53FZVL&}dMOpSO|g;9?}YjbUGlt5UwX*-lMg-1XBFg%nhD%N>>j#%
z^;CR8rChXpZ4gJJc^GnV4@kJqq(s;*vORGM%QwxfbkjRrjPjI@M17jW09zRXH^%9T
zWgaaqtMVUp#mfWgbkd8HIky;Y9VxHE`E~LI0=pZh^Lt*M^PqmU*<GY!Miv;&nd1+3
zIsGD6AH85YGgy&7Vm0&qoaVDPP`Ov1Pha<fcrriLd$d=d0JZV@&9XWC3jZ;Sg_$Vt
z|KXJc85hm~4Bn12Oj?O@4OU-!^kV${Bvt7#TlSffdkbmdZa0&01qmop`BGYR+V@(f
zpBBS-jo#ds=O?vj(bBl7=R_-uCnfW(=;BKl-CUwlsk}b!eE2+XzJ^i}C1QX9sFQm_
zxnaPI*X%S8clbWe&hG2~!mhITFO1!M^4bb<wfL8ph!AXR^&UY|tU*V}Pr!f7)OLcp
zaR}oL%3op)?_g-+wd`5-f5eghRPL9uUZ0onwUYgZMaZiQ?mMsJmK#j-4_@!Yw9&JT
z2k9RHBjP3R$7zZE`n$F-l*}c^c)s$C`?p9)Ws=nTGC=aSW|7vO-SI2C&RA@4FrG+f
zMobuMb<mBP!~t82rQo_P>xWX-v#e=P+<)h?U!T5ayr7iY6v(5gmfUB;pP<35dhLH-
z@~7m#L)QU(|GX2te!f-@Ku4qJS-TfhwadkjbiQ|1#lP;#%EypD?V<EFNv(VKI_Dy@
z>~`^v(8ntYkcx0j#ST<SMWy0d4s#xZGdDefjtga<4ib$_61MMd)Xmqr_0I?VC4?I)
zZQzj@gSW>yd@)@5ZhrqK$oqBO9nLnW_WMh*e;d%n75bg?Z~@A3u!28S_e67d^^v5V
z#y-tqY;lj`FYsKvxLMeYI=@#^x{vLmg9Vi^-CscG6%!~rY3TFaMH3{Sjue1id{jQ=
zfj3VXbbj!z)A+dff5@=%HQ}>GD#fe5x55m+=;SZn{_oj>N3fiD?xTM=@XoN-!ll>|
zIfFh>Gp==gtCi_zTBORK_pJi>MRPI+Cb!cu%PH=b{19^);1g}ZbEuD!##e7$T5Y^>
zB#*o%6;JkGR|c}V|F^Y!a_nTKrok20?92AV!{hHV|GzxVFMRnu3IUw?`@Q6-wla~F
z>GPae##L}-BI#NSydLMjw>2)>MpyLV&(+v+eW~H}e$>8lg8r;@FP`~Rca{FS|0Mui
z*+xu_Tj~EICD|gFM%0PxsqGL65Y(i6tMdPl@GtSyo8G~dRNTLNA43UIb~5a|626tR
zBo%<A{R%e=e|T_A+j*WG1rlvO%`!LvT$gK-EOlFmn(m3@n7An1jZg3yzfNPNE$%`%
z^SGs2OWlBj+OAd0;IUz;Xisi+ld7c8v-(-M0ZYZ<kM9IEY$~6bE@0Mp5y@LS5~zhl
zM+qrP5VIwoFZ!J5ET*AQ0x-IpKJQ)2H(E5db0_`RPrCxJV(Z{>HwFYRq{8U!!MAR8
z$g?A?Rg{@rj-TF(<^pbvK=8?PTmK*CS}=pfQ&<PWki;0g4?yt*l^&Nb6@*V#>=j*f
z_(sX17HT3PS8x;<g3p>342c<?vn}?Nb5k3a6G2J-Q9KZ*D%I+fu~^{vwnR};O7Tc2
zBL0@}G#atbfO)6>4UJ?6gemmK&T3_kxiow`l}4rWAxCs@U*{B6iC5q{vhxMk>%Jb(
zBvle&^Myj9a|P7#m5@NlPo*nD<KJT*H#y;0nz%zZl>|?d8O&)KS1V6__cx4Fmpu$&
zFMFGYaQU&tZn;4g4+k<-WnB}7f##0$Pr<DFF`YB&u+Q!DcPlD(AU}{PGoXO*rQEz>
zHH;U1)VlY~=&Snqex&Nq^YG<^4fJ!XpAjulQ!s;Pv(pQ;nPoeoJG?=((N6cD+<dvj
z;#40ZOh=Hx%=_pc9y``V5$3WZ0B|#EU<5p;&_Z`YiJU$B<FA&lPg<J}<cQL6?!=_d
zhn-tHy4AZsPztM#p`L-~4X25mn9%w|hOI0@5ZHlg*h(4>Ub++24%9o$uXvtY;=qR2
zm(g|yg#cAmRN#g=Lws^&uhNF}`>pomoZZNNnD}X1XcM+F*n#Jh%a#Ct+G#mBa;NVo
zjv@X*I@pk3!S?jdf6nXY6PcmQ&|4P_*w^XI9Df~Z=qr4Z)GnSA_`^qIySXJ$z%>ug
z%tH>_)0bQpE_N=4R{=&jacA7WmJ}&F5GX(*C~!%g8t5Xx1UZv~yViL({4)box8KU6
z;HMSaL));Pm9$WxsN@&i4??Rn@65FZrXIp<AYW##Uv~R`N?fxLp>fG9Jch5)bPV>n
zE+SyB@rEiDuV+OfCBoqlZM=89Sy29|k!S|fSIfA45)>koO&hzNuS$1OFQ|CW{A#yb
zA5$O9db1BcAS4zf4VbiW*Xqn*FptrR8`5gcK8#$Vrv|mf@%dZGRiGxpQDVq<L&`Ug
zM0u;G5DzgHz><2W+!ig}))>)`>$Xfg?LEQA&Tq7*6jC&>;|Tu{xy$<a$HmjA!IFfo
zdI1D~S0yy2##0ztib@9$vT~vZvQXZ@=J#tZZXs~K$aU%F)G9}Z$mxaVa;Vj;Tj^A<
z;qY6}Efd+k{YFUOoa&~$N0VJbSHR_fN{X)UF*03NamW}`0!1x}woVK`1U82;A!8*3
z-Rflw0=q9=qE&8DOwrbdA<D*o(rN9MCf%}{e`*grF%~5fu(yT-Eeso*+7UfrygKF5
zLGr6}g@<3-kyzdDiM-Vn?W4OjqegpY`ILCg%MNWzU)=*)@3}^y<>mvXo&jf&b#O~A
z1^!co^QMRr28$@b_6L$_0v3Oi?-hJOu7&3_>PwSTGuMT#)8sw5sW?y{`l5hFz2(l$
zL744&EP7^(x~)6yJI+6E9?tNb#hU%Mh_X%QB1>t?j)s=YZULq3o=Gq+u(ehzJq$>-
z;lJ{fReZ$GSG%Sm@3L>f={^9KR;8Eg>w#*Pel@6*Tg{`_mGM|e+c_sWX~m)plDK58
zU_U4C-^w%BH^RtOvch~Y)pg{e&%HQqGrjSnN{k2zfIiROv}Q$>Z1MrjK{|2L+$0nC
zSj)<C2f3!R6XNFPN)I$Ffy3p7OL7YvsjTz%IA(=;EP9#=pC8W4nk}Bw7)0;Lv%ctP
zZ*MNBPpqYVxD3|YG}_4w-3`6%Wu~#95Ka%sIa2MBHuLt&+Kve1S-0bj*Ku<srv#i+
zcA%Q5jeUP3`(yd?CSZ&@!7TX5rI%^?Pek_I7wH$@hihCjv3r8nO~5OSFEf+sbXJu2
z{i0>d`sRwW_gn){YkG6Hhw@#toP&V6x`U!=f^CHKDwo>@ta8?~*!_3CT-L7?_3e9+
z58V1(s>Etf15Kp4!eUO-tA-jnR%DL%o44K|h^(y+w=T;%_mh!V2DO@ZyWCRFT6*bi
z^sF2mYhZ7U8#ZWg`gqHq7e?2!m8s7}cwm3|wA)IK(_glyhVBJ{4oGL~&FVnTwh{Ij
zS6Gw4RanP7q*HrNtt2M%7L(zIyOSuPL-z+_M*P|o73hWKg_g`y(`zQc={+rEeN1Yh
z=iYJ`(~fWmL3euNJuRuU3mY&Z9-^LUTr%czYN${M<(rtOa5|ylIawI0<{67NJ$2S+
zwnAP+>YryexASMsdAXNFH>Cnacw|5lZ#N9iPMMZ0=56n*wm^$KP=LR^_^%l5#-rB}
z5q&h0W5K$lKTw0|5ezWR)UgwKeWXiLTI|;B8~srshN3NH0pIYA0}b!If)<gpC~h5;
zw83^@amjIBQo;ERY-jseyvNHCxZd@RZ%$5i(rV%{A}hST_<{1)#g?*6mW~$Lt<OKx
zJiN7Gbjl`!^4n*(%Mun$6{4{lmgCD|fSG5v(Lr*`HmWL0g*kV#hR)lr(8jvE_zbii
zHdNsZLo#jcA;&BJS+8gm_!EX>iDw3$b}xj%QZ`dC*fx0NjZ~Z&J^D5pmiu-!>q(Fe
zARy|~P{SkpBdH<6GW3~x;8*v#=nA$JsTKv%Zzox@SiL=io$<F~@a&gy@^Vjwt!Kqu
zl9Psg#I{2Auh*`s#KE@bxu$;{G`#iHY+ZJ)Ur{~00)JT&#;nsWCZUC*;Mf!oHJP43
zssr~_7yXUlvUUgT6{f8pvs5K6l-YtjE+Os-=?+$KA~&&FYBNMh!HVaekvA*~Ef%c?
zA_6((txl1y__6|LmG|f}JAY`WBQC6oHb?Kt{L`N-r2&s@tOi#406CEKVVv#|p=85f
zktP+$D{1^pXdZv+P>jL=z&hP4MQVWULSzNcajlz2vz2}l3YyAbEH{d)(x~P0iF!4=
za<OKpsySLaZFAKP;TJ8c7GsKX{a8MZ`i}RHDYboCF*hNkqwZP)24xjChSoz>#^ZjE
zLSMw5tFNzbu9jz&ApU{6QWnE~42h+QLNQbL8m_1kYZ8YKJXTtCMw4-&ou>QKHMc=8
z=$m$HV@7~_7*;@4z@t^4RD=02r7Ot#94|f_qTUE1XGmgu1^lw`ad5LKtKR0>MghzW
zfx;~bYH;uz`eeD;z`IK-i;lX~!m4-banvR6sp*$lkKVKlU3@M%B>tH$Lu~xqiIWzr
zR{bM1X9KQet*H7_8Lx5wL70QkPq7CL52Z!SKon_lT|xXjFOLwBZl5UUb>AlHM>oef
zG8smRa()15A&_m(KE2@?R!5JQoV<$N58LJBzD*Q-W=>t)51I4>?}wEJCejtgR|+5*
zG0Aug)<>*GXf^#KMuIQNoM(Q~ur{DxE6@4OzQ+~qUWiq_s4IBj;BEy&W)gPVQvtNs
zYa@MB6mO-yc_v}np)2&KlHh9c%4+Y?_{x;o8g9tl|I~v8eU_IPGt9_qIlo^~wYZTi
zllyIX!sxg|zB}(g<L%(rlh?h8;qk!d@5?@aJVJBs;D%8T0UuZMkAhk^niju#>Bxnu
z&kRA?6}D>w<e+7fDr9OX_s$rp`<}RKN#=l>NYvL~ig)WW8v2iSDcd$~$qECTJK%-%
zam~LMuiGduy>fT=e;FqA>g2rJ&oqaj5fRrfGwJ4-nVuupP7&4@W?kTl{8fu2Zl0sX
zSnUGCq$4+8H1s9V_(H2ua_SSt8<;ZHXkC$-yEe%PH?=z5P++Y}5tna@Qne`WeHlY4
z0O9p^pA1rN;g)eLM41PFtT&GDQ=ZL}$=>_a9dt<9bt7E+I98Z1DIZB64_fI6BkGq`
zC`=8HmCJPqPIzsX*sF1P<yxpjk)qf{NTfNjH@cTeiq@$@nuURzh`8&C(050gz0LP4
z)1RO<kTRf)Rp4I$^OtD7U|ZoiQOm@>2Gv6uhXKDbrdw^{&Mp{c+4u6%cSVd(CxXa^
zpOH)ly$lYM_?#2en;loX>Ims-g;q@cI57S$$u;kunDjzrelf{JcBUz=_N%`H6}JQY
z!HjWTtL^azL+YW9<CY>@p0ThWK367u{mSKX6VxmxwK!Pc7<>jc<dVY?hQW%&x+I-C
zXmnp+->Xu(!up~6KIsxR{$f%seBLdrx==r6KNT>2j^}-nUB(cvCHh5HkOjPP>-s_z
zHembF=uz~>R=}*R^^)h|GL>Tq>nza6Ng+$*Dagb`S~c*IO+U&g&%$t5N?3@{tg~(5
zI4yW9`tQQoui%*GPFrW^Y6#0lTgv!AHO9hK6Xo0b9(z$CE4s#^>nJy0a%G@D<1Qt;
z3=|N==>mq`7Yz~Q*BtHHNXuwx72zPYkEew9_zZ3AJEY=7Q<-ku6pM(6*gQLheD8oi
z!^2n>YEDx1d(=tzG5JQ!jn<h<e**qfqkn%+f3d$}4*u<S?Yo*XCn834fVs9`iHCJv
z$dDkv&pK65*B|iei_n*ld=!lml5XOQt;QupoK7N?zQ!#b|7IX(Rpe==+nss?-8XC<
zsvtykzd33%%rSCu&tD&9Z1=D|=K2%_{yzWm(bb+0-ht9->J38aNwosgpWmJmRy<1N
zl6(exeABUN<<*#-@nf+uCx_`5>nMu6E%8ueOmV=veEGczcw#GLG^W|ZUkuE<b@RYv
z;OR6z^6qi0nlz{XEl)@dL_^4%fU0kX9O*+5hp&5U#_OvXTnL{W52s5~e(o4=1fapI
z$;TaO-D>Exiy3c@;1EOfkE<0H+NUNw(Lg^bNb)B&>XclmLz#9uM37c&B`XpwtF=61
zFe19TZAYUqpN|#OYPDdz>7R(aHWLZUvKsi#j%+Xi?+%X2RlE&k+`u&}{kJNRUDw9h
zg0OyiuJ>xaXKo6yW+97j=F$Ou=MQQUZ?xOq303Df1JC+1-h-@gw*rPt?&T}TJ<RQ#
z;ikL0Pg)4^OAo1iB^s*Mhr+iQ#I<D}?Y>gjyr2hozW=Dx6Hu`mc8!blM_(WPv~N)_
z_}T8`PC5Yb{7sT&6jbu0oLZ>B3{aASOSN1Ib224OY3Uvt(jJ7ojrWS9K1#X)E&DRE
z9_|U4<_(fRoXSH6u4Bn8<|PXVC|=U%39&N@1wWF;i4b0!6i#_>=~r~`LM8~MbUct^
z30-&n5WFrV+Y;Dm+}WJp<k004-k)BFFxtamwwyjb`g%6O*ZL0JZ31fljrF<`4*KS(
zdWpnfKG*uFvS`Ux%^liuYy?mva*#S0=6tob@uqlR2JDKJf;|aRyVr7CEN?mWg6&P8
z!VSM?*|*aIP9=D?^VKFGv1IZ;Zg=EPc*W0SbEG9jIQfpw7ShYhrP1p~1!ZZIQ+yW(
zs}S_A+~G@VjJ&w+w>W$qP<uwn{dwstOUNYz^4oJ2L7$|zar2toRZI$U<?p)&*Imnn
zv8TECs5v^V`~lHBxUBuut`g)!>)AP<Q1v|5K90@#w8S*LFFIA2DOBLQBx(}>bsxvt
z`qCRMQ#j`G#X%9#O3umcjU6wm0d5kGw~qW^OOW>uy>fbwq(g>wPgsxnJF@@>>@<(p
zbwc1=P;Hp<Y4^UD`u9(LiIc=>o^^TGx|FbVCwtDIbmgO$AK3%?Ulk$BVAwFN?@3eQ
z4<v!(sEiVEfeO&*jvF^5PeKd!amgTjmVpPUfGP55z0w%+N#IoF7RcZ*y{$q{%c#Wf
z^%7@z{J~{FfRgH0p1wn>4|C-bPu<&}nl)|gL&CXuf}%L$PlltX(P4U=6<^<W@o~6K
zAg^^nqD7rz($PK5Tl=aKnEr;7!v7X1$)KiYkT7x~n%tR*6~-M1h~5{Wo8}sPEgk*@
zE3$gjm1oBjH0o^Pf!G`c*zu)qB(wM^n<xY?`s!v%H*lg;Pc|dMDs*$buDj)W#E>g>
z51tRL*v)_Q4D3uEO&YK7@ev0Tn|m8{g%fA49eFC~l7r&b2O^>k&V}Xq&1xcpuy8z2
z(946S@CXKMrT921-^+<s{dr7lxd)xT^NgNK$9+;`g0!%f-q`dAZq8*pj$9drOUl~(
zh;jk$oEP|6i_Mlh`N&%*Z=F|&d%mq#5tFt@%vPSY>V{}pw>E=RHCms^t}LA@Z@?aU
zr{dqKS2@NENt@@?8$ao6Eh2J-JI)NFKhke_#!mW=kQrJ1Mw$-@3(&ur3is92{DQJ`
zQ-b`wFH#0(^G*uj-*t}iJ~CX9%RCPKGQ3x`M=bOZGB~{Z+Nd#~x>?^n<r5R=V8f+a
zjrWM9sV^u$QC9K1eCuFe3!SEmN$CtL{S*;r#G!W7x&!n$G`5u0=nO5Eu8v9{8T*j<
z@)syE^J+4vblgmi@u}(@Rnj4As|RqY=9|lx6>P<~)2^#%vzEchhCMdy-RZm)EWy>0
zlovkSki1VU^~?AJvf@*su%#rpo1Clf&q;cm=hCz!_5FCO5aWF>y3hQLVZPCPJHp@w
z@Zcl-4KUB%goN&hF>Dw^tLlHo<co9MuIrlnvw)Q4`DMtFwNb0)iD<KH{>$y5;))Wp
zdD7!$V4=MUd=>)%DU@PbQny+4#6+<Vc4LP9rlay$^%p$Di_LP7V@~(SMLe-p`q&V2
zx_A8@jl<ov{xFOpK`ip4aN(F|X^xBsHtGi?7UypziBW%!;(7DCr}PxON@%ZFW@K#g
zSzyWcwraY9MriU9bQKEsyZng&DX5C3hDphSL%2~(L5X@+m~!oipAzR7^iyv=y<4OM
zP?UFV*}~gj5z!S0nNQL71y9M#W-sC379$5VkBQh+6p!Y!`iBJqd0xVe{hTg<r=b?l
zkA(EcWT)mTqQ>-x2FD+F%4L@~o<Iu0ZSo@TYga93@d4lk58b#qGo`%NqQVVje_8IL
zwQ)W#T8%03w6L|+5Y|-u7iF`|K{cr{M!v=n>XRqe9O3(#I@Pj|3QD_HPDjE3utqKI
zlz1zr&E`T(NQeVom!Pz^8?;&=^Df)+Yjv21d?L}NVbV)gHu0uyY2UnyXFxSubx?8h
zvfdygT@$Cpp^Wac@lo6ccz;$(&wOLIri0&y_n0riwF<kVD?KhY-Hwc^p4}oDTYe0>
zCbROvSZj18w3tx&C~e5zQMvDnPTi^w9+Q{psTsJfT?Wb9euKZ{>^9!of%psqhgHmk
z`3bl)^LwB2dBY!k3oxvB=u_UT4DR;f@jlH5HnbXu!-cTFe6yx^WMs?nO(_kK$9r;u
zwWR=p^ZxSAn>D^2BB4}z<;AwApiT?`cB+|x!E~7E6rs0nr1rq@VPw`DS!L;`6O%3K
zpvIp|z&@xnw$QbR`7R-@o*qpoJ<0lL<xNO+b=<IAT0d@Xbvky7y6RK~-SH^70zhrp
zsimx(iN#f@73CUm;%3RH&ea)$h3<8GZ?7rTT_ni3eYR?V7S%TF-1r}v_47NpkdchD
zZfP%NJ|Wt>v8y6AXA!K%bQ>d@F-y!XrxdEC^r%6>^knlw57_a^t6uDNv=i@I+B4AS
zcN&NcUn?~t05boR(Xk1+kH9<EL$Pbv*kkl4j+}0ft+p@eUgw>CYE$dXQ&$!mI9p!l
zz1LEAZJrzCBBcsYpO8KJAwFF2tF|}<+d3;Y+J2f~>Cu1$5uUVx^2*QAuQq2QCL)fj
zyNIuQKZ=5|IL%hPoO9TvH^T@%zK6JZSo6Fyp{7E6R>QwyxUEM_3qnlk^-2jl_G-@3
zk_-h^%jxtH-iQWOx?qP0wULa>EGEFZ7`+9WRwAG_z=U%iB%`CGiZfv!_4CY)i;LmN
ztbqwa>jtKmFNOE4${ih*@Q(VF8XAbq8gXbEs82{A9np-WCeq|OXO)w5I+F)sNvJ!R
zrhUEHT)WeK!#zzwCl|S-NV7sMStRSm-Rna;+TH{|6AJ?@zKEQ$H3}p(LWPdfJ*PaW
z+-Nj8w%@3c>)htDq<NjXuh)_AI6U(avN=JtYcK(vZW=FV-4|aEX_`G?H_4Gr2)dlA
z5ArA?qYA4a2j4qMsz3EK#l<;kBJ5wCO0TaFVnkym0t+kN98C@1rpD)yvX)FcSG?r2
z;JJG|pgXyihPl^~CgPCI)5ScT#-tdQr5|*NeH1hN0Ia$ykKVPObJCleR!XWi2oWML
zRiAVcdj*i6T2D;!F03;j+rLSsoMTdnoX!e38dBSRue@9C5oV+HBL->qOn6IUv*}qD
zTA3ruigt04gIq0ixrXzP@NnU9p~++Ttpt8CQjv@}_dS~9;ViA6cR!;092}_zs{*TG
za=rA}yK8<gz;D|E=dFVRo`C5tj>V>~Mx23JoE$xKw0X#nvPgOF-&Ex{b>OalH7$By
z*KNu(P)CTrwL7FhJqCG5Zr8E>W|p6>D0-}Mur?Gvd_ZjAP2cI2yW{MDA_GhZ_r^`t
z2$#bKxpUu}nTc?g7v_DkfA?Ktb37GV=k`zgB4YU0))Yw}*dANLuEy?YZicAGW<G|F
zL;a4A`53DG4=!etP#Be-DMA{MbDAF`KY9;{vI?z>QO{nndT-~_-v_cqdv~B5Aju9>
zD_gry6zsvCb6eNB^p+%jmP)+^*CJlRsAS}RG)(-6-jUW2tad%53{+(zLq#jFuu({N
z)@2`}*sETmUNr4dI%{=WuDvH|7wt(^k@z35WYzQiG%Y1ZVBfG6E2oMmK*{n&nCEJ(
z)rxk9VHMpEFOKs019pVraa_#fBJhh*9D7oL`rdbomHm=G^SdoVnN>VMy6YG&qD~JQ
zJLKXUG<@4ihnQMC(FG98=Qt7?=Msm3x2KZ=QxfKe4rc`UOv*N8v2r#UJ}E-HjdEW4
z8XGmS>nDx8$vde5=2NRVN%=jl#lfv~cUC;#@71|5HM}&khsR9~O=!C;XP*@j8Qb=b
zvZ|qr1dDHYvhviLMb}^kROS8Z=g6g=2PPHFd6@k!FH&3*>;Gu__;U{}<iUXr_+9gO
zbA9sn;N`&?hf&4i1%8EY%=~q(y?SNY%r0qqv?>p-@a_66SKg;viSNC~njQ|Y&W`4z
z3;&p;4K%;q0MwRL@owx0oMH%UTkCe<y8nQfgq1{yEjW7qs`MorDboTyx6Yo>xaO=v
zQ};Yi>N4YTjjU9n{b-{LvrlL*J(t9c$TBsVJZT~NE{*2P>OU?$9rZ~!a0$}4Hb!xS
ze7<u32OHB#R-hUsYvRd57O;OK?RNP!`MwRVOMs*x{@u+e|09xcWssmQF++wv<@-l{
zEqm}Q^9_Z8>oFB6H^mFrQZJQ<x!j+qzmUs66z@5jbDIq|p-s*iou&nK$Vubh)bS9}
z@TM*--o?lHl!XHtR(Eu@=x;QT10eHX0g%V^1CPSRk{@BtN!O7WMA0m4b$9UiN!A`%
zHH%yVUbGSJf~@GosF-Ik9-S1AZDlQc5TYe~23e-(hlEM$16^BFc6p)&W7yia#2p<1
zXLDt4`kSN0=mBG7sh?u6$jO)2(3JwW#tOyDE@6ybg%Wu!sG`;_sj(jh2e{sg*ckuJ
z+tXru-*&WRi%y;C^=l#gi)RwzhG3TS0KAKt@M0uc^ZTts%qDPizoNK^|He0u=z=E)
zAhN~Q4CfSic0w<gd7vVtW8@tO+O`6iTQztMH|Xc!2G<ksQ=m=|(v5OmCAj&yotYB7
zm)&!2S)~-{C4h3jCl?{&dfys9j!EBVJsxmu=w`|I*3&%O#}f*0*uN2bGTv(oyniaU
z&};0659e?5?j?b9)m3p+`acWf|6mtfwxGVuA$?@&f;<36?Y!w$#v2d^V})Ux(02?w
ztqa&#;i4ay_Y5<NM@>q-k&*-_$8>0Ck%0enz*7_3_MKBUn=4T(p8W4AG-6vKx@LKV
ziD?N?^<4F&&AQ?pLpFAqr$=+;9=YPyh4)aBKA~$O28&dBmPLK%e;S#|uMpD36U%S&
zeaSu|N4$C4^I+4J^oK51%%gIC*IGLY1QJ$t@?78ad0vHx=VN0|75$#aPMf_@N7tEX
zfjs<f3AR75pMtFTgE&KwA6y%pNe6d*O{~`H4h%(|>YlMoTisBTb&YBPHIKEv7c3e6
z%DV~l+(^@zKn@k}X<LAur{>=?yqY&2B)FNE9yiJyPE9ns@v!^IZUnoqKWx^2?OAG&
zTJ-S7%PjHQ{lOweCcr;?o8*=ko)7ypd)Bi-iZA#niQd;h*|F?kS)CqzwxdBDt?TwA
zZC(=hK0b$%bw6l13w=4SDOA`l9i{nYnAijVv>y`b5>T<xyPok~Wd1eCrBNMl2qG8f
z7MV>?E$C02*pj%@o3{~CEJ8w)i!odBJsIv+YMW;#NS{Wl39p;%znTktc}2=?HYFHq
zt+@m{M9KC|e{GFboZ47YPO06LpAt?CC4eADaBlUFmW(S`^jD!7wv;vQ&|<s`V~nQ$
z93y}k*_(7IR@-lhR-2Y_D~Tl3*f<{Mg?CUM4K5j7Ka<KvxP584l_y6X$p)igmP72x
zfu}@jm(kVx^i0q+?KPdbNiy5;y}bPdarAZsRjW)U@gjmRChG~B4V8`-uYqm70T$X0
z5PT2Ir5ngDKmLei`WOLWw<&tYssF+mEq>a)%Tad`k?lZ;MI9wGSjc}@`*J!Omfl+1
z@2di0Kw-WX;FAa9Voa<sbV4<5Y~G@KHKAI0H(Ysqjr(?v7krVR13zeVmwPpbMcR)_
zfvV*lQ1MCiR+-k`@NrluKG0$EX2q_Uh_}ca2?Uetvwnh{n=>k-LqNsx&&H8I+jWfW
zgEejhY^NM4Tsy3_6+u9!z98RU6CpWT@z1K9pLq;WwYX%1>nBupc~Z?M7<a+Rf<<y`
z+V^CwE*L58NRKDOZJa+yAI+6W+_%ceDLz5V*o9HwTl4onwxH7DobPV=+>Txrfs2Ve
zuK=MYFQjg;dfX(u#x{;_pS^wcxpJ%DhyAq~k}epk9e?PwO{de;qD7l0FP{?IjOUqW
zq?Tu-!5I=G0ASpix-ma-ooWtDpojU*T`w6Bo&Mp?1JBM~HH^?0S|%57&q>rD!DSC*
zB-9zW$9+-@h^G#lI7?zP-DI4QNc6gfF~Hr9U9x!Aip0+J5QZKyAZhi2#8(?;L>!zE
zNQX5JG1uN!ug&=~-Bvm?o(r(Qtx^)DK0Vx3OP7hq>^P{|*c6eKncz8c;^2%15~9{t
zzYRkutxVwSO3^|Hf6#^1pGui1E1e)(6``*<_*}T$%7M3SrfTC*8#47op4TG7vg^MM
z3L6^-R)FR^HLuhfMvX&}RM0A74l2;1YNx}<g=f_~xYixr0m`M5tcW=@{+dEf!Z+l5
z(b67tkSrW~t&KiYpp2sUpI;dKk7l3WU`qHOCfY{J{gEvwQz_W$;h^3o+&R}vYU`TA
z5GI~fqMRc4WBZ%bUExn|AI!F@Q<z?1l-cDNdoUkmw=`1C@{ghfLjvQmLbc?U#rc&C
zX|wA^_q{%QS&x0K{6SS(kl_QodMren*?ui0DVx;PT_B8<EZNAsnexF+@Gu}(oqHPb
zK6#*)cmJI>M{sisrT^e@?2>q_uvrxtW&`Y0V}MYg0;wyi_Tz%wH@n;VopYNJ=k}&6
zaZY_kOc3RJp!fmUy<GaCec{=MI-6%>Pg;Kvha`c~k(*R{`5IQzt(wxHoZY%BHiE0z
z(+Z<Y`gS~(`cDKP_v5Q|Ufix*g<<9;QKk)7YQaqZA=5g6Fu4vyD{X%NXz~Q8F6mUU
zSLd^OaogmC+i`4%n(MXb$MQJ#p^gVU&7od>HUtaIDzV<7oZa1YyLPV3$AR>!mt0EZ
zJCT<dl}Z0Jf{YKCVG4vE%HTBoz(oZP<w<(jbn_Mme_6E-E!Q5ju%7`^U~WF|*(IA<
zyCA(>Q)l4qTN#@iX9i*UCV;;vbv^N>)o&Z?!)$G5c?IrY&g45Yzmm$g>i1}MFt+Qm
z{|JxmqJ_BemKRv`MZ_a^;wPDTmJI3D&5Q49T)J&o(tNUlNK_F7-nl|Qjod#T+%~jC
zgRK({tc5n@+A@NrD?Uu_bkK>jh+scFcTLh2_bK`Vy)NqW7(IK7)5rb^D-~zF18=3(
zXxr46Wk-yEn&3{k@<=&d;pV4fS7713^D3D40|;IEJ%+=q#WnX@ZB*nzhB8s)$9#BX
zbK0JGMErZ3n5{!5CydAu51nh|DG0LrS`{w(;#%!0M7Ymtbq4M^*s#eID4q}AVG9b2
z5r`S@A8Me@A4HH9fnzRGi}EO<v@(Xz+WxHw4lH{`*FTEDg+H%0Q(CHTc9xj%ZEf8J
zQIp!fmxrhEn3uc)Sw`!T_pZrVPQ<Wy?B^^I9MDK}5EfSw{hYj=Bd&oUy}avt49R^`
z;gq|3DIO9%uJ1)oRbZZFg0_;|W<P~xdw<(}`oRB1#Y2TUk@MAirsku|%v;fMu>6M)
z#rSdO-Im`v%(Cfp^|=fwr;xGe9j_hk-5%OCkWgoq%SP^<3Dff^@YGvW39kvy8NEvS
zanCRf{BTPEV6FuYUb>=&smQf{HfHko{{lej&=X#-av*(yU!saHW;{GZzJ@f)q&Q<B
z$z}OJ^baI7c)j=!OZMpchFM4mCejkJ=B)RT<!%|kjJ1NkaP&DW;%4j!qZolcb>9;(
zmzf$TbP-doyp=pxQ4}}z1;256711|!&v<@t8J6p9Ya(1eF5JwtX*23QUD~DLA7wU%
zb)W6)A<Y)JF-`B*V1jZZ@r1WeT&8{63m@`aCkWk#8t|R4#*vMQti}XS7NW=jad!F|
zDt*yLdo|ht*nDw*kq<u(VGUqb&wxKDgKmoJBpC-vlJ}sdA{|bB`hF^zHw1Bl1jr#u
zDY+9tl2fCcel*CIU@`HT&i{v{=v*gcr3*Y{XmDu!iQs<Pp_3zLX=4#v{>gWn{<xYA
zC1s(JJ+?eCN$6nAabrWu>x?%}9t1!}+irJQp4v%nrySNy0Rk8Dl}3@@AVK`K0tYFa
z%U<-!2+vYiN~a0_b~z!`0|}okHm`w3bY+grZ&_)^xhIT7Y5k{*BbN>@7*RP4q)sMP
z3M#^)kBrQ=4`+pSe#-UwmMS_nxo<5x_E%kS9F`g<gc#4lXd>XH2?M7S;*%8vW)AXT
zl<4KO@$4v(99D)_dw2QHs6@+xJVW200TWB$ht{XlxHB-#b$ZeF5u;omP4~ijI1su!
zmtyl-@=kQwY;-EYGE%0y>B@^|6EHvHEEz?h-}8if&F?Ox!B2;~K^70JJH!c&+P!Ca
z=aQP7xmmCGrVG1WBU@ol`K?=DQ7rOuj}B<YNlKmiXSXI5{YT0D(fP|xFT|>>^ZK2f
zQ|uhN84{{lmrHnc*I>L&$`!CxDY|J7uzxv@nBh5i{QV+)sA1r^*(0@@tjmJkb>VGC
zw>L#DtfRkz6z`&%B(kFN29hT**9oz9Kf*Tuj8(=rer+IEd(dQV(@#SiEvE5m(S7To
zE*zlpNUmC&+ROOerC@4{j_ImAb<s)uc^QSpQ|vN>61PX*^%#@!=9m^x(6wVu!NMC|
zuPdsO70cHOOxb@vESr2&0rM=88?{bPpp5W~uF<(ebRZk5l@umY=$h(pPHkBCHI-gK
z`vc==l#=iT4e4c*B1&JGNKu#3?_ZQ04>L^m@>yk$9z8AHcj~_KZNDHR<=gHl&2g2g
zIqG(C{bx(xP7Od#ntp<;?|bm0V>Xv*s&9hBmuoZ$ptpzl#DVX2FZI>t^mXwa7CBy8
zHbeovJDzh^dh?INL0*O8J`em0S7&qHcizCtv*%Fh;S7lT48)43In`mp$P3hmNyCt<
z-I&x)SZ>K=swgai8;&-&DR|JFwF?cSVcoZKiPU(djzAij6b#^nQcqPDEx26NKBB5!
zRt>BUZ=~8g&lAz(_C{w$BVD-i-1@Ah$<&$B`ixx4{bg^XN$PcRR?`=BO+bHU0~<P@
zO^e%-C)_aXs%Rxh?vzZ4oVC!Xg2aV^V!He>@?P&7)Aw5-Gmo^yPb^6$FD~5P4ATI?
z7@F3x-r=%<oE5o9Egth+KbyiAVa>m^v)Os9Zd)T%KK1ZO3InFkoPWOgX10#Z@XIGI
zsXzerfEAg>olP!|YnWG&BaZ>WoSe=5CsHfFjT>lxpr9*csQzN_=oSn*xc!jy&V?0!
zR4`W2RYr(P^e!E5{&EwY+$Z|E>TtD*b$rr}=ZdzetYHUDSr}Oze>nY8hu{^5%|<o<
zN2{0C#~$*nZ_U#cHRp9O7<&5q+S|q|lu#-8xHz18i=NyLXE)g^`AX1tGd8Dlxi#l~
z9}!n>*|%EJa8kQF%ReY-Ir^>!23xENmft2geZ19aGQYX|u+xRIY}coyXesVd%6M<%
ztj_e4QIZ3<XYjt|w2HHI-($u!#nLDZmz!uJSaj%dvlicK`e>h*I$=TK@XCBOtHwj;
z(&-l)GEokvR`A%yG43SZ`}x>3$jYIP$y=-8;nVIs;KwB=m4HjNB(IV(lW-4H*Wi@J
z1&ot1{#mp6?pU`MnEsTHfsw})GNrdi?z}?IGT@yQsXLEp**<gGAo8y{Svj-8j;)r5
zOo-Zao8O|n4e3|(8RmpO<kK&(l)x>Vp~4ZT3eHS0v92|_T7j-EF5rLbyuobePB1<y
z7yFyqER;mvn8XwcXlUH(5Mt?sWZ3lOhuSguyvz@HGVTXvsT_SNJ`Sy4G7PulyK%b6
zaIN$u@I^@VSS{T<mn3;tX?a&62VRG~<}z8fvY-8GvScls?s$}1c^5{ov!*`x8u3*y
zhP&OHCw;wJU4E)?gozrv*0*MKojx3e7(B9txkb2?jApV&t{*a4u#F}is^n5zG|$TF
z*w$2H;_okJamGLY#<pK85CbL+O4=P8h~<M{IX5H_S>?b*-_)YP%#lN1kFRr-L4yo6
z%!=bn&UIBo5QFl?ViglkCcy9f(+@F-I~6hVVBhp4E1f}X6dCPRE`%7Dk1R=}Q3X8y
zMT(HrrJDB%Ck4H-axmvYBl5Q6_2Uk!T6tE!jlRI2kgn(b@?c08k@!b2(@)aO?ZCqo
z<Fgup&x%)BoQ^qKE)tp_WcX^rzZL$|Xigq&xIvz%aR`2SpYrS_^+5K8M+1$Yj4|{E
zf$*I=KI737&WTG?G6qw_d<B5CVm)W#vBCE08qFmH&&mAtnW9YTAH?RF1VJNk-u79`
z=`OLef{439rfbf)x#E6tu9yc~#fo_|>6^7~Iqyzs0xZ7W!7}c2utVPE3dpM8%JE)*
z+>4K*a9RJ@D^VcuP&nO6<y@h>&nDYaBI^cI@z0y2QH1t>>gPEihn@%Wt#HY+d#g9e
zapi!Tt$FRL4i1y<DOM3?1!N*bn;iQdvJlC=Ly~LW6W(9ulWQbK)AAk_ft9O7Qt?*W
z<R>@sRNwe4MsQc1CHIUCLCh{`?E3ef%xeed$>7h!&31R*R_3tfmDh0`gbk^YTUSqa
z-{|Ms1UvD=2Z(|`r^#~5kd<^Rtfv!`6Pe95_6P3&HdwHx*=7;{Yv@!un7q5Z6=7Zi
zUUuI+^X{(nYG`R-cTGR*PoRMe-~Q8P!61uWP&gHNymUv%E#&)EMosxqK7ooca2tkg
zm{Do3peNcpQA^i%&^_xzRpprt)l(r}=vcMYG>kMLT?B8FWE!W7t>z%e)eGo$;AvJQ
z(I1o{CSr&<u@p#$mAK7Gkwr~LS9k`}FOVybCg^U?PqXX2rHZlAikusst!__y`5kdG
z&V3wP<fMY%Q*Bu~$gd%xnl=<=o!+o1-qm1guYWsOhwx#qA8ahZ^5%Pbu@1{e-uJuk
z#71)2{x-64Rc;Q!+nO<48l}G`8*Xwh-i1$RBf#VdIe|FPs4f$~>~`bk2B_E1jJix~
z&Mkd>CRhK~23Ysfd(YBlHJ<9|098|y9<+wbf4j>sUgp&9)yR5tM*9Kf!yE6+L_9rl
zDeh?-ADKJg^s;qx*`at?ngFw+%54$r*`BDM&vs~;P&u+)B{56T^`Pc<c;+d6XB|W9
zs|Wu3jyxH`j>p`(MTFP$iXve%KB1-3!X`aG#0`6-?<JXM{1G#3omIBeaV=+M`870y
z`v54_MRfAZIbv3uYa06iy)4p2x`aNZJwkr}LJ+%LaUml|JgZF3D`?f|`!#+YnOgX~
zurMTgb5bN|lPq7Bf=_a{T^VoMXs!N`#qqfX^H$o;nurF>UsHuhtTt;0pC629@bjn3
zAk(+Ye}v8w;KGzo^kg5u;x9+;k&6AHIw<JCte#nP4PXs_@G0c6ZY9mPL3x?S<P>64
z{x%nR0WyZCo|2r{x-@KZo-2tZ$*v{;TLGR6@WW&g$QH3x7xV^8#Q-hX@iWscLc3f#
zU3@St&?H!Td(~?69aiHYdF4sW^=Wg$(zgv+6M)wKyz>b##UtM+&{NYXwNV0*s_53?
z{sZM=-+uKsNQeI#No8YXjuJFln0Z?ZRk7(hK;OHhF=%|xwpxCosH}9ekq>lZcm1y8
z>KRs(W3>cQ=8{r4P_QL47dZ*l(2iZuKo1Vh9{rR;@pMkO4WV!1&%h!gU=j4A{2k;e
zt$Z`AJAyc`J2No=h%r0~13itChLY#7{t&Ujp+t!5;pm#Ns-#dt=w6RSW>9>n%am|5
z6)=AX`s8OSxdsA=NQ&9Q$WI$J<EpQlD22N(N>GZ#Ztdp|aW@OOs1fOuwG5gY%i4G^
zLk-$RVwAb73Kdl66V-kNBG1<nZ%k|6IFb$`_bt75x<iP%p2Xy$33#_<VK#J=@IeP9
zq|}f+!UlZ92_}d8;gJMS=UE>#;@8gAZB&5wr&R2{PO9svzG?|aTAjg{Y`7xg8|MP#
z$UE(~t7aWNUS8fwfpneGKC5re{y|*Bz*({jw+wTz;4#RyBJxjcMQcoDOQ4LHKd8&T
z2G5PX)%(3AUE-YIBO8{!U<3Z>wxd-2`I#Gy?@1SWfzxMshn}TP@PsNCXzDSg;m!2t
zAU^#mGY&rJbiiPC`G81N9j7J=$%_YtYjHzjB{X$mMkpKmS=VB>k<f%jItlj5Y}eeN
zc@d~et|@Y}#PJ*=`6DV2u6!1J?UkLv_Nf85QfM@c5lJP|jMo^g(Em2INaG>yd=t*J
zvI<tm1|!WWA@5dX!%=mSTb>7Dd~-6fKojJZ4DPJGSSyuq9lY>d^pkbloc_!H(#6q}
zP-!WF4zC+!;W3V(Kwt{F7o7xWfWD7UQ@{zPcAsA7JXn7#9ATWh^4AQFjjKaYn47sP
zhwGaGPvPwxG99%9+0S}Wa^UU`RMP!uB-7EvgJ=~8O)FmgC7c^5y3fiu!k~8b&9R?b
zC%xen*OLPVT3MY%xetOxW7vs(2^?`Jj=<SiDj#olqyFpObScL5T9L~Xm7ZAi*t}?9
zUpae!y$s5z<8{>#b60}#Z!=NJ4X@oG?Br?8&2vHW#nKM);zyYL6HI@(ii&P((7SsM
zxtSr+QIV)L0kG#m1>0()?Yt5Dp<#uZD%~@yij3g}!=ipQ0GKF1u*OcSUr9a;%6mT)
zWXhwxA-CW>(3KZnyMMvAea>|EtioV*2`@82UW-nf*=l-mk#`@NexKHx_0vlsowv<C
z2E-PI*0!6fFbgiGjhC&n<J>04!HpjQP=kI?DZPhC-0N@@At(I!EU!5Piiu1-5TGSJ
zS2MRK$B!R0NzL;LirC$Q&~SS8)nnUvG2u`6^@69pA6jzSt}(5g>i%Sdksf~k3<%>j
zke1b@y~xTq(~9AN{1nes434W?y^jCoW6MK71G{|>*A(=Y#3p69mT!}rb7%U(CN#LT
zErH*QMh84lFv$yfvF_@so=mz$*Xy7JchlRW#?>!PGV>newd+B=iE}xW7KZ7!_>B65
zyj!sRk>pDLO6&V6-uCG$luh2;(|Pu#a}Gf5-J14vhm!OJT5Y7fp55kgHdmkbvFqXH
zexv_0rr+Yi5$ad$yY`H<v}snsle9DVlx2Udl$ue!ZgFDBch)B<viq*{p3UPdTd&`~
zhwIpwXLs+u3g@aTBc0RP<m_%Mr1_{%p0RK<mV!k)fJ{F~(>}A?7Rx``m|1M#n$pe!
zZb&q~JY2~>%S9y;M1*vFInKU&Tf3o>a|h6A#{e=kB~LdASm-yKz%*kC$U42%V$LDd
zgWwx8X8u^3`-F47QhjthSQQqv<HAnFQBnA}=$5(&!_lUv&ZPA)-JoTPqO{jQu^`WO
zZnT~)`mKljjfN;*tskuxAZzxu>}|5-<5xko80vJ-#D<X_)>i+9gnC-Bp`QA%%K4l4
zy&UgRn)t~ukyFzGWZMJofp9CG)X_=Rgg@zN+PJz>mT}`Vp2}PkR$W8g{J@~_F8mvI
zqUyna%z?>({sPCv!^f3(iGkg&u?_?&UH%ZF(}BJvyKg@ng4nAP9F&~6zqGj@>Yiut
zn@6w}BnMv)x$OS9SbAjqZ6UK4xWPG-R=f?>kx^b`j;zSJX+=Hz1&x!leXWR*ev~0^
zu<pIG9ee3;4aKO`>oqXPE=-9?xcH$`ZTA0R??1zu+P1%8cw4rJ4G|TkDhNmyq*s-u
z^xlgS0qFvU4iO8zNbf{IdXpMzQ0cwb&>@5t0tr2kJPY0DZ1+CpKJWi}Kl;ThxYn9;
zuF>Y0WBkTgB%VCH*1LfVyKQv%x1f<B#Yt{n5TWQgi*)-!ySK$7nJJ<@Ju4-G1fO}J
z!~lNP1&@@iw6eV;xwh!+4#nv<m0pAMcuw`#oV;I_gRQO2T_Cph#vDHAAsOdA)gT;m
z`ekM(_I<Uz+{!3>grw5WDF#M716dCA^=^5wUzpuTUcaq8i4NT<s5^AVkNBvq{@u96
zG<mMm6jpvRMq%pv7NLDr6k-VATbb<NG8Ou)3CbH<l?iKo9F+JpxZj}`3R2Y#2dR%*
zbXI-elUr)F3-oYI{20yvCC_*5LwIHlSpb6^LaU%%^F3=71@`4dpLDV6S&5z}+Bw2n
z<Dh{HqWB&kUU*^T>eSmVt&wSx%<^%)B+y0J=~2{03x%$qe56jz*VF+P?OlyPf_Wy7
zoP@~R_m<J@;CYS4QbQn&RU&nR`=-%HAsvULIZz<#Zu18?jb)=(6o$INiD_EewZ|~J
zD?4S?Qlyx=arz^#u>BG|e#Ic~Y=&;)qj@fQ;~e~N7|BP=&)V~eFhA9?D6wfn(MI?^
zv+*1=y1X~*?=EFDNX5tFBytaGO~{z+)4f%BqTuB1np6DOc0;0;6VONQcdL{j=da!6
z!$?`}(x0-0EUBkJ#?~@@93^~PV!Q^HXcQ7Ou@~{~pK|M>A=}46`x@&{2x(a{wg?sH
znlEGRSYd6B{Tn;^x~V?c=}+!1PqGn6&#z%uuiter;Bq@YRsrWcYc8NrfGa-YIB%ie
z%(6pm#=)XL1{7jSeDZce*mHzFd?pzB<QbF)^}}Js>6gQ8NtThH8qEn|?cn7aHgFwh
zs$^{=JfLwX*p%}mzWf44bkr+4)!MaQjZjP)?MJ7&xiq4j&6v?jtJzAH6U^@r%D)x?
zqz?1B%YTY--=jEghe3Lu;^EWxuSG?;4S*;Y8TZ7?-a#o(+#V;D)tG*R=lrVFRZ+o(
zqUMieFVZM~XRa;ArceNJm2JZi)p0^E*-&G@3+Q)|Mag@qhtU-69PZ)jyC>wVz8>BF
zyfL9xt8%N}ItG$j(9u*>xQ)6?eD|0s_U<&kRF8>B83I%xul*Li`IgOg8P+aT%;{w5
zx*o*18>)!04nG`iU{Bo-(G-7C0m}(Ho%uNyTbQ=7b6MW+9q%bOoFm80Rcu%R%=<E?
z&j&dmxC*@j@FDer%=qsFB|QR$v3SesTBTgR>f+%kt!V-&IOP*>k<wkc@I6R9^?ZdP
z_Bigw3lo7i6dBq93c`;16s)o)*siNwRsuafb{#ENOzYCT{bolK`Aq`U&3TL^6I(2|
z@<MP8cAN<4ns|sbj@KP-k~gwfSR%`U7B>V6vB(_FYZ;g)#U3$&y~H9+%vuPgTise;
zSxx+wG>!~DDJ4o{SI2NKL!$@M?IP#J@wGI8gFb&|;lHq*k8+mUVRPbfJ6&>i&wOyS
zUbw!Xea|P{(2`MWYwoex`n$*b{7aAd^-7M|H;NPUp#{8<*k@KQTC^%1sy+F6kC&si
z-ilR4Ku8z&Nuz<-KTXb{*K5eNWsUGL4<7(yfnQhB3WII(WQh+}8Zlfv%*XbGMztj$
z6}~$M(}^dMY4_rdz?dMa9X=^q?I>x!+<iP-B#q?`S+FN>GwE9E<;69ydayo1ozA*T
zR1mj#Y8=W0G9t)XSM5m-z3>0pJ|Uv{5j0hxNgxe(P}U%jF_^VpI)7+9R_tUwGBZDB
zaaEXp=2e~TGLf~R-Yy{`CF7Cz=kTkPLi#fDh#z+ArU^@n?%YZKK+HymLgs?)OE<1|
zw-)d8Br_R>n)6ZPeuG}h_^_`$cWqjDSl8|^PJD;gfQ<8E5R2APsS<6-2<u@nE&6p$
z`pd2+00v+0I5*U11+w&_aAw-395qars39(~H`gXgOu96ExG3pq)9^4vO4**KT5N+p
zW_rgPR?xf!XK`tFqkTr#UF_(-V5}{d_h`qhN-Kx#r#<}U`Hk1@!C&KPPAZ%vhTqz}
z6g|e1YA8=WeHCW#X$BPy@%+@uQ3?_q*`Ez^VXuf!Q$tuzNL}-OomF;qHEQ#4f_YxC
zP6M2wYaio~fHTwrj(oik_YG_{oWX3$v}2e|YEne|MKlg1_`1f;uiz1N*bpBi?w%3x
zoA=#1wRp(Is~Z&r=cY2e%5<$)CBrM_18XEii(EeT9d9(s1V(Rt5J<J?W}`sDI=^Te
zPqhwu_CoJ>*%0RqkBKYa*+#H`X-~@e8*<-=$KGBz0+uCpt=LayI+NFH<YtJx{uS3)
z<1qQ&4XkU?Hw0F-4b^0?Rm<ma5~^KARNzo3-X@L}m)TZr{n+M_o3!4*HV?nZ)$>Fc
zGio1In*YMm#V5-9I&cmw44Me*1hPnW!Q!xE+zWPeB!3(2kz(8}{~{L;nIxlGwa?83
zEE}x{MCd4Zh^j)jh<k8t%CTK<C(LEvkmnUAiUx@nk&zb>R(~OhG+|U{jzJ%_@`K*H
zFQ95e?~$Q&g~i=DLTB!-;ogWn|0$6KoJ<<d(d4fj(Q}W_sWd1{D4?g$l}+Q2)H~|^
zVOrT;UmIZZId_UXxxy`JqjkfAXeo0#Y+-#v*9Jb-SU)F<IJUOB?P>&b?o(!g(1h@I
z!z{T5`;QLZ;i?dRFJcg8aG_+#HL|GKgz-so^TksG*_r#BgY68+l*D@lRtd%y8bEq!
zXZzE-a3-LM#chK9I%sQ;NkK5vi@PWj5RMyNK4e5MvCsv-s8nOxJG8a_?J;$a4!lmA
zQCx$fKon@!(6w9)DPK>FO>8b8OLV?jD>lHFLYIR=w__q5(OISg-o@tzjWnh`gx9a;
ziG7eHYq`=h6&Fi%wlZm#KtoS;urhU!x$S?A9IeJtMmr>`tLLCbS#dFWo#k46Ie~E=
z(ZqVT8tw4==)(5H%O?CGqpPfJtIoShbb@>#McW0s^B1*t&3pu);iFUi70CWw3=`C8
zQN-mgnBH*3Wv|ibp<{9XKVXz??AkqVfTFi|><&jeG`&Fij4)6>^BtnAh}Z{*dN1??
z0I1<0Z*SlP)Z<WP$zAR}%HwW{SQI}5q0=u#l)8Kp->!gvk~%MMXfN(@jJGJTPlLC2
z&e7PYD1C-^E5jx8r#`YywH(Z<7wNSq=I(fSjF(t=Oj;b;$!O^y@`_YGWya<_H4q<q
z2_acdq)R8yb2lwo6;Zx}0>5(;YH43qGr0<sy^f-Wv=Gy%T1bu;+rm$2jn`iueXo0S
zXc2kYeDGS{ill>#FHe-C$1ckSrm;NDZb~cElGiDlY?Y$)%w_#hjog~}gbnwbjFtPR
za|h;Sx^>&0CLD@NQYN~uL=UaCAgY6i(Jg`l*Tnt(N9yfeh1<)7vWBwn^{y}E=uPQA
z$1>}fEGl8V7?(hZ#GwQy9ptC{ks&@Zs{+f>V3yp2+>5qFm>U(OU&oI+)L8`^>zQ>s
z68JsX>TL-t4R|g;j~wr58?_>9>`_zuv^Kc{KKIK)<#x{$_pWfv;<!rFczyag*P0xd
zF!CrCbWIM6n$?}tqrqb#vEy+iVkTGhjBUpf@6K<W$W#Lm++CL0vo4xV-Wx1tH)}ef
zSBumY@d|H7G-&<q5^^6WSM=oBr^hiH88G@~3Ko%NkLG`}b)(xk#&G{B6qY+dO7k)B
zi!s%N=xU%`#n185aSF>k=NM|k9t#@CQV)A5waXamt%xl#-ueS1spOH%Zynk<)9bRL
zC;4$s`=CDGcU%)$Rljdecl-7agru=L&`LrpzDZcuW<UDibs+4kJ$v;OX2wtBl9ndo
zCBXjG1JB$1RorC-g;te#_Fa{NmpMrYyu6H-T>ENuhJ%KQqp@6^pHqGk-L~0>)rl@}
zNl#vfK>f7LTPZ|`T30&H)jy3q@aqNT%-mc_7l%lBZVvMB4JtIIhy9VsjlA0Xn%&8#
zsab(Cc$rI@dnQ<I@H=B@jQTQLw42?Wm@8o-oN%w{dSKfE@lkQuoI>CS8G{t=>zSFU
z+#K5Iz0Xr+<uIysYI1v%klxvCW;VRIjF!sE>N0I+c;4GUuP?q!QkYXyiJ$E%PaSLr
z=Gl$bUi7uezulv5`026Hh+!e-Ejbl9cMBXsYmJ)BO!&6N2)UOUVv*Om82-fTZYrHY
zt-)+!j79|ar$ZP6(kGbmlV&#Wu1;f20C9d;{Mxn&HLm#CeHnxHa;Q~ay@c3y(nkp>
zeuRB#AHD@ZYCYCErt7zmPE`jb)*eKMXoI}RN-(Xjqwe}jt1qIr_bsg0w)YZ5Ai^E!
zf_5%7&0$uc8VHg#cRMieu6hLhg9(+*`g73v48<(Ad9QWGs{_oK)jW|zW7PhMXuyiN
zjn>DycHcy>*h4GHV`|B`Hbp(>OqVtD7yhU!k<Gf6CpY_+UwMG1hHBBzq9w%<=<NW_
zIEL9~6=X2cy*2L(GsN<=DM*Y+yhcMB(Ebt&7nOYJ9D|uvY2PLq>&%~srX9+ZlpfbA
zADZ6RDAb7YJjRK*0*-*b8J9}wC<!KqRcqC1z`Bhr+toW%XU(C?{II8+Ey7WJVq|lj
zs>n`;)PCB6H6eJlflKQqG|@KIpQ&dgJn&$T6*JPrmeSheMVq?ils`dt&85&uaHK_|
zNZl4PDWjK)MrQA*wf(huPr~m&&GZ^?kaB^QRaO0rk-{}-=2<VxeAK>=4-4zKoz<8#
zD{<ix_`pQlKJA9JBLXPu5N({o`_^IDCGz$FTtlEFtLglE>~R@p*W9->1oo~>c9&L)
z{o_|HtNJo`jr8IR2a;c}N98wTuq&1sg(_7d*`p+9y;!@f=w}i&H0WQ8J~4x4ykwbs
zjxB1DRNgd_iQUd;tDz#xp5RL@V98Nze=l;4nggAYz@F8+>K%T4?^!4X#j1!3lAu+b
zXv({v)}$PG>T0Uj^SXkmu&i|zyVR0x2PsS)1lMqqk|SiEBy?Q8@JQ3(XnWRURVsTj
z(hdGH3HzI_7nKr5K{CcQMP8Qxf3Ond7}-(0jFVy8m3_~B$sG}|C}Ax5z@hS6%Veeq
zv!5HZ?rj=St+Y38CgBwA^_lIU)U;m$iEE<jA<L;V+H`ChxQ1s>DcEyqN)0SxRUb)h
zvpf(JYH482UBm))imOEsXHk4+DxEi}EsF%*t0zph9k|aB?#-n8BUMsu_!*@#H7^ad
zX~D*+q8*n#F1*iR$?++<2*R{3s6LVdM_wBCPqpnuk%CmJb_!B@B1QmY(!8}*K+8u}
z`hXrHP_q#BB!&3Bn+USp(x7e|oEdp76(lWQ)N@?b)Uh~J17cXr+k72}*y(&+v$Fb4
zL;TSrw(ZLDj77Axj8_U`3Yz7Z*GGSB&dPH;P3di7E6k9yep(^%*zLUVWkZ%?Lfn&q
z+P(2+kE<>^eB0F1vJEDAm5=-Q2n(NGatxtc%prVwItYv1I&l>TlJ~pkmA&ig=2!#g
zuiDNx`xXsU8mX_Zp3Q)QkUQBiGKEgCT!|gGXldLk7ZuI8cBmCyNa|0=^INYU)ci(C
z8EXq{ZU&r`$p@2{O*Yokw709q)=r1Gl+95Dg07(e!zYo54ILIrWN%KXP&;+>;mLql
zr`6~(?FVy*XMJ4`C{QfXaW0X0(qW~$BbF^C(=}i&`P}qXw+aS7RF{^DeEV@7(^K%y
ziLjwZ>m_hk$z6)~^i-mcW|dJv*Nj%;I6OWyL&5Il%tB+KZ9PPRRat(;9=xB%m2r2)
zhRVpdp0hI%@2XcWdGGauA@Z{%AJn=1HNl424xT(r{I;2Ij1am?t6@|w=`p0pag}n-
zdehHgE%0o;a`v6^-MPNNfEB4_z&}!9?I1Q;S=&@8FXF#OVD8p(YT>)5ce7Eqsj{ru
zj`qgE-cC{VVQf#1hWgsf#0M=Pa+MyT@dml`<L(#B!Bh0AJW{L(tk~K#+Gt$ZqR`2&
zfA?#Km)~x6jA(T2#TYmtx&pvkt}6}80c{|{`;X;EhpQ)3C4=@KcB>-}m$z;0o`aBV
zK}LJ9?Z*1TH6S)-oAF*)?>R(xg%%qA09m18qxT~v0+j!2=CyNlf4vT1x~)XFR`t$d
zG$c~@0_U#Eh)eDeUPaeurm0}1gi4oAZG)S7tNN=$L;h)>hUS2JB<&%T&W;;6BpTz=
z*V4P=J=i|y`Db&-V+iWpNkc<JfdrW>ZT|99_zXIobdU19lnI>^YTx%6arX_d*VT9z
zwxVYaJT-7Q1~4@HaPaC1hm(bV_aN+TebIc&&>k7<qj|hey@q<>CAOUtv&g=Re8;s@
zmwMl<<5eFR)T}f>at?@T*<^Y3&~2dE0aJJHj!`)}lTFZfi<dvmN@EE;@~&Az*6i?u
zL3WCvoA=A*6jIv+4#%=HTec13ZS64<sPByZIz#OHFzB{Hkknz@*HynsQy`e*9Elv(
zj#jK)G$wdoZYSO}b3Bmp-f{MrY4ptSPV$?#Ua4K=fJL(5Gfi8O3Hq?GZ30tmQUHnO
zu;id_dxaPo-leFXDH(8NoI4vNDnDU1gm)vitK}!ppq?GETh^<YSopd^KC@j+7Oj+Y
zTw@)958;H#zQc$4D?Wi#v!SdOKT3tr-)+Oj=0~*dLwZL~=s?ChduIF6osA*)2Tiyc
ziMc_zH&HHm@DIq`)_uFmGhE}R*iyZoaP&wryS%+fDtgf^T)rjqzAZt6KSb)}fL>N@
z;qr@+UYL<JHM2HWIK~3?LPnWvB>vck(F*V~W-6XtRo&&b6=L%J$(`+0`=YWP33Nl;
z{(Y*5xpKa<-kCRO%SmYgevWw3g>_kxe^8JE!sPVU>oy_$oa8HVdH7eS;5H(86YVQ0
z7RfssitiylM+)Hqlk5>HJH97@RQXh@p4;bdL>~qhY!P?Wx|5@hn2gu%NF}`OcGMw#
zK`7a^xM#*g4)lo&8m7NT5slx6Q4^R5w<CAk!c;wrTcnN0KuK0ni8}V$``bnV1G^0d
zQ$&<J4yd$#wvT_*BguYv3uRVy-S}e)MYIb`Q{pMJ)^_d<cC9MEIb@(72P#u|1HQE%
zh+y~p8Xu+e@rVCu>>lLY9q$riR5uPZWFdHl0RBelrIKk~T9-@_>f@BKd3vcg8_ipR
z!IW6sw~Hv7V&ACaoG=$i{KzL^pFTk8h$PyH-cJ6<IrO;k0_!TNnE-IizAud}r`){>
z6*EH?4env$OG0~>iZn7Iz3eWIHTcZT)yLoM?2<J#b(oj2qQB=)QD>lyVEUJ@xg92r
z<KG0CLd1~=+wa`=ka<>m1w)CLyW=s|p|e0s0MWbhN`&_ph*uV?0xiWuHE9L4h0ytD
zhfv-7gM4`=@H`mGWNTJk<^|osqL<0`x^dztnqgqGK@2g}4y!Yx|D4*Xf050Be#6BI
z#!c8YYGG~>H2`aoi6oO}@7_Xo@$SdYG9yIx$H*K*SocjEo>Q|(;-59;u?URQZg{sk
zl!Veiy~OIZOhv~7_usfcE5u+K-wg3NF)P$0#JkVv4p*N`eYz6H1on%FAsSn`b{{P+
zxD(Ze>a}RITJUZnE$cjZJ=skOTti?z8*^;mWvCa|p2T|yzioCSL{*Q~;9JGo^;n>L
z#-M@_suR3^PX#}71>T^uI;vWfVfz+WMu`4AeZ#6QZzY4QDngeiqO!WH-=!Yl^_V@j
z_49G_oUUs6sxkmcb-BApU_yD=N;TlvbRHiVO3rVS08yMK@qyb0FRbeQ%Zjb)@#vZ@
z@dB;=D<cS?$TJA6MCwIRDnx?NxIT0+w&fmI%zH=jL$4-;pJRv7LW{zb8Ie6h%`lLg
z8Go=s-#T7q^}tt$cw_x`;NWsr6h3{$L%qN@^<ZSE9qg8f7kQIqG$*WW7K1qM$1ntM
z1MDihj3?)<GF#g65~06z9sbRy@_1tn0X9}6k;k#tw%&q4+^+DKJ7=4EuBLKF8sR*P
zS9t>6p1_6IuLV+EHR6QlInWhkvrJ#5&233$&9NM&yq@Pgsb{aO{%$c{VTw#^q~WSD
z3ya3u88COE?)M$_mTf#)9a&|{^*sn1m*y+I9_Ee%pK~KxOeRYS92#CJ$Wv_a?3YhI
za@s}W>g=gBu<%0cxjMSh)IRv@O}?i10I*-?YHiT9A}yQ3I04(VbH+fGsS|HKS~7NF
zTm>9oWFa>WiN_XAXM77aQxJ_Av?wY=LiqU!O@X*;k*m+AdyFmU^IF3f(R&FEpX_OC
ztfeIvZit3%8Ldx+!Sx&%n|c8&D{z$#Utw5Yxp3scPv%XF|Gsg$O^}2EiEJ-g>J_gi
z|Lmz%-mSIW?`QZVACe3^26RPtz|aG6VGr{Fr@x=4ts@GTx5!^Jh=zRcqvhZW#~1AY
zDMZ9Nxh1IX2%b}=aID2n5Lcw+4m?0F=`ETy_~1ZzA6`bHnTw~U`?xQ{&ubrv6WQNS
zq$1K^lVT$3WSHD#&O&UL7EMn-PYd@uy<#M&Ca?_xyr`j93ZgB!<H>)pI}>1c2U}?+
z#L~^GR_f0V9KZRzzw8>Z$h(z6SU7q0k%=<5I=(1Hh%8mP;JjvwdBFCks|fA)gtQl>
zl580fjl9%(6W)P6cMq*^+YVf~xLd>zc~}f%$a<SgtyNvFU#bk8pooqkqF=c*_n3{R
zC)O_&sIVDlkLvYB*%kTR?qE!iwYqt|`m?KkhfviS6Pasn#LMGck8qYS6;`^!jixsh
zTPMxltq2#|HjcmCIl&+73j>UIVUC+#mXI8|z8o_SUVt@rISNwP-tzK_<1p}&$TogC
zvrBfaEZwRni^NU}x^mRp;znvqd=S<?d1CD-cehw#O7l`qk%e_qEW2m=<MpywHZk?^
zaV~o6#AC-FsH1FHBNrLj^F;XH^oxDPVVQvLk0A;q1jMpE!q@aVBS{RWK8<RH-#^lg
zE?0srIIShg8m(5x$hzi!L*?b*vntGHFeCuhNZ;XNQ+~uQ8715J_;xre6|d$bLAiI>
z*xse8Ul)cvIE)5tNl%AvkFZm+C{Rx0YBN2%x;p#;qcC;`1wNu)^#-+qyTJo~wd$_M
zBN)K^YjY)!c~T*GUX%kpT!fk33T6)3YHfinfP2MZjO=5Xm*n;a>>cH9rz2TFS!sfw
zf~H*uUBMq>RDPow=#CNO^w~gq7}3D?iPdD1UdQ&7Qqjnhe97l??y%gUdGy+~b0#3W
z1oMG=yW$5VPU~rLGSwx_*b7a?ftfpH7oF;pi;!Znvm(p`d3+yjC%}>KMZLShw5bwf
z5#7Zr=pDS_aXU6<#K@Lg_^w-YONBc{b*8JP)(4%R_7ii?%hmg?!?w@dfv)XbpW{n*
zR3{W>NjMH=1_UdGCs@JZ-vn7wvV;_76W`YZx%XC?!1EI-GNEU$dc<N=ZnV;ah6}9*
zX=CS@-GSbvmz1U(U5mf1%3otysuTUKzVIc>8B4>HOn{uQee+;<R&%Wf&u45j$rZV*
zKX#!ha)r)WoKvqliO5;r)tb<n-Suw$HjDJinOn;ck}5LMsg&9hde1F!A%4Oaao5wK
z9WQCZC$2O5N}kj6@DBg^D4_k8!@h}5&@=A$J@~^ZlzR#p6z_Jl?Y>IVm$#QO1u=FA
zf1?@mx~HQSGH&Ex&=hiYN=0kd={`NsLp`ZTNN$s&X*c#8UKQFC-ps1{?KqWF2Okth
z<y&(M<NNCydO8Gj=k$SuL1Tt&V$E3~%{*vEXjZM*0`-2n-SX*4xTECv0&XrBuZ|%s
zP?+6=*Tpxic4WMR2e^AsqU(SpJ~*gi0WP&mIvh#AUJV068s&ksG2eC%5A`>L^l8#}
z*G&vV%F>w+RGVn88k6PqB07rB6J(3B*g|z`O=f%Y;(e<_Rab<ERq3R-K|5|gz9R6G
zOLmNw*}5`n0xCZ`8RS=At5Xns+=%_JcU1ij^tSW#>nV`;`hJD=BtGwQ%g3v%?SI_&
z0La0sk|wLz3<)qa`~>~@UQ2fO;fy>JUxWYy5Y0TdH)3oae`rGYXgOGw+3j!hoaOlZ
z+w?4@>+Jni;0lm~dU-(~`TLhY8k2t?>~H^MpSMvbe(?Zg^68zEh+JApH2_agPoEMT
z$-l-c)BOti*o?qL{d51-A5-REoe@3!M@#fh<=3Fzwfy*hz3s1LXgX)8!bx^yFi&Qd
zQYy|cfX~AgrMM;h*#Q2J+ksA)8?@eZF$}n4>PE4C;mPhq%I5u`i|(rv#CxVo`g__z
zml_X$%7OlgTl!$|)Jf~cN>t5*{Y3MmB|pXiQHlAo>~nvK1p<Imw=2L)h;b?{{tjHZ
ztMD46s@D-T{71s!i#w1~eWvs`dj0tG>9x~b{d`hnITP#}iR;<!Tp^09;P^}#{=f61
zGacXu${~kW1-SCy=1H2j$7#tce=H;h=(TJ%_QvJ!Pkkptr{au4g8I^lXecXW9AaOa
zxFW>OOADJ7IQOq)<~t7nGsfQk2m&+!T&J3fWBrF{83XUpw)*5}kmH|hxkD-@m+^=!
zTL^;-%)2vG@hsJde~AOYK${?5YW{rm_(9zVO0O{+{1>La(+Ge8fO-|H;&1#R?$0j+
zD7_2*nm?sM|I~yBIzGw#-^>C>Pu;24h^J()(EjvEG`Cmo8Y`Ii5IJ$_Utg;1_@yR&
zX^!os&$Hi!&RkqoK>tCi4<<ml0ya^d@voZp#s9Pbm72kebHIP!W8E;n=-R&UC8oza
zZb1X+Mw)e(AnD<9lI4jW-KD+6G3p)DSfsI?D%Z8zhZk<q1<3iI?rFTQHh+zR!pZfu
zMH(|54^`wLe}cc<Ty%DJ_UpipuBi;|T_qErKeN=V@e_Axvs|p*b1mEpQ{t0R_@gQG
z0sLSkFE=Z<#-e5wtxsMoY+CP+^m2G4Cc&FFiIwZyWJEg;hy8Zq<mt1QZ#{VZ{f8px
z1kn%LOfp?aaqGd;H^0}CC~chh?Ed4Hr@ANOKRR~aQdl6nUr6^HuJ_Y}{~RG$^_HY(
z&3)7Cn(W|~L+;?0iJY=O#{S1Gndz^I4Js|^c2LsGx9$m_2IdHzf6D%25k5PoCmv>q
zv(){$rhhzlHaO_Z6CNwVqfXa5sby3+qN!I<i<$ie`M(bDYy`Z*G&rn4obkcyFV87A
z==jFMKA?d=j^B{#<P}Y-D|Eojf4uQOe<*(VLRvgk!=TR;6|99{)NO&P^h)S+{)If3
zN3Weer1#bl3f%DlM$^#v;?X-OW>=B$gLr}`NZAI^{s@zwN&0;PpC_!JO>Bm$$U)J;
z{bJoAJUfhxSO0d`(^n@6WRfD13IOR}e`y71A2QJ$@q@+>EKc9ae$_mt|8K}r#<VfI
z1U7CQ_FudcIXet%x`C1t6EuaLegAz`|9rvAH$(x-(2FWCk{?g70TBLTkAnS=ReW>;
z2qR))tMnJ1-aB#9h~l$y-W}k6-Lq@dZ=ajCEt9iIa|FG}vhi2ek@FN`z_dClynFOl
z9-Iw^0z3|b7K9%Y`svFPn1^riaSA_fdSyIgR@`2vpYLp2R$tU4*7cVuA3T*m`&c#p
zieH=Tm0P{OU$9HK*Zwl(L3lbG|9Ysu0ed+83hQ5=c>Sf3bWifIam?U8z^%-h*SjB=
zz0Zt7{zl)wE%8sf{EI03y7Y^5zog}t7W~qJUs~`>3w~+A|4|FFve+2^(F@>juGlY^
z<8Ln9FZ%oviC-e|OACH!!7nZNr3JsV;FlKs(t=-F@JkDRX~8co_@xEEwBVN({L+G7
zTJTE?{+kvw%YU@aKf`<JuPOU~p;>Uq7ZPvxW>(?;9d&nlC;i(162OOWax%kkD|f!$
zRgI|4|F@9$(=;Iat{7FjedS*U`#C-EQFi7?QhYwCu+Y)Di{pDHz4uP<fX38eRtsb^
zDbrl*@8h049{0Cut^dck$`2gnlLG~Sr!R0_rmP3n>F2pQ_|+r+kBs_17ZD75FoD)Q
zexd(D1E02AGu^C_vv)_&0NL_{2P&*to-<h%NxVZ9h@HQ##K%bK!JGC%XK6Iqe;E(}
zLa2Xf8q&ai9(!@}G;`1c9bu0Wn?Us6h&7-EjP*(!yOa3eR`;LgxhD$%6q?WW-}`S8
z@Ndu&-)ZBs%y#RcSv8#-Ug^J*=08k+gAo9BfKFT9_#6NJr?I}gy7u8CuTfa-5%YiN
z%fFvIIQ;eLXr4EG7ykX_f3UvU;G=b|&Qs5SyTV{qfbwT&IsOL3`A^jP+e{nOz}t79
z{<kClWBUIzl#d$eW7V72pzr?;;O77JEU=6_*+L}$Y3RSL@^7@2Wdazk>3RMCMDxoZ
zKF-nnO?v-FvAYosuuE-IFYf;$;}0IZ33U3Ogz8^Lf2s+nhisF@v;PyDjv4&l>G_|y
z@c#z}`!e2h5=zD&gg%ca&u?b<9|`?4d%j4MKBkMiyPs1$l@7XZLg!%B=uIBPC?Gv?
z#lfi&#VBz@jT6m*3(=hkUU|fGl;a<*WSg7OdG_6iz(Y@l6_`MIj&jnM=OVIlXWpWQ
z8<Wo`8<);cS~{%$0UG;HFaKAPy<oldFoUV947%BWxpg73b`P%a942pkHF<M>B{<H`
zC4N2)Bbpdf$FXunGeI>ip{LJ0Po{6@z{jW@z@7oKrx;MYx}w-?MT;SQ1JnQig^@l!
z0Qb)%ykJF=`fSV!+$zl8y4l7X-K2C$>ZA1yFL3gW6D==HCkmqfMQ;A9wE2);SxWi1
zt&d)~?kbm<sH3C`jA~mbEBMFSzARi9+D`(@L3f7BxsxUgELZ<GN&&q4vPW9^wta6*
zc_^H+#TTktx=AUEkYijA9Az}sYm6q>^R4N;U9Z<!|A#pE<ZylnU@bL|7K}+Lv}jE6
z<eHPr+@Tx$1Dg0>U*iXx?(qUr&gg`QjujDdF3htefn41lNEpu@dYjHWVV2vG_zzQ|
z$m)~X7_~lC|IR&gkZV-;e`dtJ%;PO8$|?4S&BHF^B!dBMTdFqRA*9kVpzoOSneQGj
zXLcs(O$};QcL!c1D4DiLjXF|4`5#vQV}NEIz>x19C^z+?25o?ESK1C`hI42bP4WF9
ziBHwfCmqCqTckzWsXZt(s!iqpAHKjJ&wXFO0br_^TT=TUJ@pFO`I;bC()n~gUha>z
zEmgj5j0%l3xc&1J|Bc&<Y5=)4w!UZy;LGB?A2;@q%gNAOcYrh|lM6~G(QhLC5sdoL
z_d(5bz}LByo!lPrt?5{HZVJj?aEVvZY`)R8pu2{9x;iatz9^y`;QF*N!asw2-*fD;
zgMFh41Dt-d!2cDUUE3x>XzrcAn0W`|u<Qhl!^6fFMs4X=xv!c6Q^<Mh?cck3N{$I8
z<By{JEo2rd=q?vCnVL27lhX%P-c2h^q6y*lZ^f;A@wX+{kvSBZLy$v5Zale(ak>Sw
z%nwr&kq|X?zBHxN9n^xLihf8}1DZOYxJYDJiRCX3sW@o8aVu;)fE+KqcC%QR!9=AH
z_<ToW?7_<Bz5EE2d#IPI2@Oq{IC&n!df?u(MnOhwZ7a^YBr0cTBHPJ&$`9SQEO{WZ
z$m4C-h>>1!@+k5bV;+%Nz~LnsvD<dMQHw{dPPqC50S+i*mh8f+CY*Aeo=0#lmb~id
zW~}tYRi1uFeGZ}VT|{%WlZTT5YMXz-qa%wE3!ZyV-V@e3f>7K!)M^}Il%NVP>gu`_
zd9*De;AV9_fK>xmIcD|0N_Cu&Pi?DOe8S0=7V~;Z28!Q&+2U2Pv;Hud38MjFZtpJX
zVCRmNC0CEem$75%_P<*+gPcqs8S8qgO&@BLfvS4ItGbs%?EPJpr1=9r7TTwCQgC6@
z=0;>E`d-j1I6Uk0@MsKTaHjrbV*HWV@2?_jW3{sDqDDkyR`hAp)1F#rLWD$+o^nfd
zH@=ljDT|taimc(m1^as(h?w!VXEE^hJ^PFeiQ6ynSM+|76}&6_0ddc`<*paJw=5E!
zv|H7h<iCiUp-)G1JDO(%Z&iWSgE!ax#mIcw+;|T03|KKkK}}Q~G20&!=;})CRoW3k
zZXdd*E1ieM^UTK=n1`l`Q}|K^P=`EY#yO==&QI2o8GumcGKbF^{TLSIE@*d$#Y3(+
z>_1HwxmV;SIov2hDJLd0H{lOrIKy5wO{CfsBu6LJgcLW=gHc;I#7u}($e1`GAqEp%
zPhd9tJ|X!`2Set!Rq&*QVnD@IW7JKgQuz5-+9UQ?-i%MLr=$?8W-gvj=_8zU|Eq8Q
zT@jbKH+-Ne4)Q8GRi7c&M14=YA^dsZE6_ukrC;9Yb+q@pkwj0La|P-en)i&>*^I%9
zZ6-=GCtzSDy?W%jBdYmOq|id-{@AYNt;<DWlyxh&O~k-dYq>@M;`ishB2xPnB*!3L
z|71<eBAjI|i~&nIiePnfz(ZvhnmScN;+BGfK@`4PBE>x_VVnmje=omsvy4uELQhER
z`9pe&aLSu7HjkFzBH;FJspwkQ!*p^vvLT1yMU*#RZ0)Eh_eZ;yI}{nbPnaSvhX%Et
zKZ=#UphZVd3#WIGmNvGT%6&CQ(+Unl@#NmOE3Z$sA$I7Fi85mG`OflFo42_)BpK&m
zfaaI6#S49CyBL2sWG5^nVjl6>*dN-a0G^Jjy)_P{;Tc8_C!AOfMLc)68??c`&aC!w
z1#P)^yiVD^sN%n<D9`~S6dZ8TS{5L}JkbQr#x;S8Tf{{-8EN<#Ngf7_GOzB<o{{8g
zFE<Yx#k_^6iS8<Ln9N#abSCdmcUxu`lx9ecDN`}j=<~II<Hck~ePP`7))w|II$Uyl
zo{?o?Fji*kRRux#`fr;J3~wXXl>}oFM~G<?#j`6VbvD)>Gg)+4-xA^>$QoZX=_$9r
zptY~Hy3`?2BmObTVKmX2q}q9Vw_Q-j`=hh5(R|t%J_-~g0jH^*Nv&&R1$*V%Qo{;4
z*@url8P+e7AglTJw+-<~hYl0HP~9Yx_v;+g75QvX_(6I2*?~i9Yf*gJ{q1k*lIn}n
zkCe&}5!WE96}t-<<-en?0lCmPJ{D?<nr3+36WHHpoLD<$S{1}dNA6%|798$rB@|)C
zmPa$Z;JW4nK7}0^B!*_!JgpqJ|GfMbw7ANk?<G>lx9G(^_t}3oYEkt01&gu1Eu-t_
zA)*Yhn#qVZzId_d={seSN&PnjN1`b^$SR8Fy)G?OD3*hw5Mtww&KF*GpCu7yi>cvb
zpoq{O3U+u}W|r3fGD0J_q4o}&j&vDONfErfnV4eJ6B52=3Ev%I%32T<w`N>PsdtNw
zE|M1GPjg3wWh?9IU9~1iON+L#4uy0+PmGnT%^TOfKN&ER+-$WQ7BC{f#z`khji$C*
zM4v-F@3I3IJ~5R?<rf6}tQ$XA{XpwujkRq2ka*TO-`eunedkqJ=WJa)WP7c|8JCQ#
zfq4R4*eUVG8EBcm>awchR6x<`?j*T9uJV|_Sa99GMm$nXAMNC#<LxQ=s_l8GoU~()
zgNH;;=L@HJ8t_!-d!&y?Qa?4!zB1Pn9|H-i$u$(*I+re2nB`VO*)vv@R5pFST}BX6
zS-_(YP8j3&_%d2>V8!xRH3pU%cX$##<paWE+O|f&N%~LbD=1e9juQpu%KU3vCAe=u
z)EZqF5)=rBoqh>aILamH1SQ5gA$F+Nzm7+e9!ky=VP;zVEOv7o)$IFRy<Jj{k0onC
zX54af!@rIJOEHTi4-08k(ZfAjU&PM67)`G#E+P!OishS6HTA{d<8YS(U9}ry8JVTp
zoO0B7)n-%}u!YyNWNW!hy`-JoEcq6U9#bL5<Mx95rvqG_r4y#e`wWd~FX9CRcZOQ<
zG%xa<+33=nE5a6dF#3^2)LmvEzFJlW%(=3A3_TsC3=%i{46$(;21af3aFc=HytI@6
z7GZPmc|P{JUVq83Y*Fs1%V@U||2A#r4ku8)p*Q&{aUN3#>2(9T$okMxDm6~GP69sz
zJ&QuT&e(?<(}N`c`wYC{i$x6GVM_roxc1$LCnt1sTvotYCG96LQh6YA{@cj;p>Cyt
znsMQ=GLLzUuEr>hY54m6&8`*hYWb1-)#LS%gL^g~)Q-ztoKNl(;sTQ74LvB%awL)d
zY$oD-PoB5HXr*|zPDc&I)>|p=5d3BK%iCTk=uEtS{%U?rz{x=vx8V}y$KlcbIYJ`=
z<_VpoS50wftZ9F{>nV@Re>N0maL`!dNqFm99^C!&*gqW5nu-S=k_{!+)QZt{qGAGN
z!o_!BM<nDu!Mfqma7Ps}^G9DHvbDkt+>gRmxm=>A19K#U_ln>#)N?$H9gF@WTgvD|
zlpoLjQ1TG12BV&N%QsuaV3dTQHWXSdQK`1<VMrMk7SI}0k%wH&e(ota=S3bXt4X2m
z9Oh0Jv<M|_S24q-9WG+I`!=%34cPA61uhdqTSWttDAdI5)RI47yLDHmrMEPPz10*U
z!gF%uN00<Isss6yBFIe#ll0qklC+z}{<VpU?)>u1Y&6sk2I;AoVhYrQLaSW*bmoeR
zTug{@mrF{k(}5fLh_-AiN-8B58d7;{?k)My>GxtVToTbux;#m}pz#%)842TxKh)yf
z7l}YbppMMQhd#yBrKR*V*~@BnyjQD&jo*_UFri-*qS&9fv6xx0=55^B)>sJ???klR
z+2w>zyBWqv^R?gR*&O$G;iF=1o3C$iuH|Bg6ro}=NuRnFUS!gR@OW8vq%6H-$(8Ir
zX=!YqPS}rY-Hr21?u=*?s4AH5Zoh1%BgEo2+SJ!0flLCoIQMYauPm<|9Jxj&U%-^6
zPqQ)tVUnj=U*dV<AqGcA(gX-dg!1~p{u09FXAf?Rl%{;4ti&8gi}Z6@XqjZ>mgA5p
zHgw0c$Vxoms3NYnL`_;~)$7wty>vrGWzp|cl(bA7`N5tBO&J+YA{UN~H3e*qbPDw+
zyXAVfPl?;@vv*YK)Kvo>Lk|3{_Rmv&58L=yGTn31N23R)#!5jrN<Q?ruyk)nY5uRN
z<7Nh$Q<uZ**`Sm)+8vV8NContAh$dmOT#Oco&Deh+AgU2fsB$z`EqXdCtKru6HXEx
zcCP_97yp||XlU?3N8&iuP)eMu0#MHc6Qp8U;)#qSR5w*auA$>kZ<i$OQ_3OOt;S22
zEyy9)H|_Ouq8dyOsH5GLodE}7TM-Ft5t0*FMDdr%#Ou}KD=Go4r(xPiMjpmUf4KVc
z2b41$o}_Kr@y-{-zMXXVqH#R4!fHDvyOud^pE^w=tw8u8l0MxwtwUN;u-TLdx$#zv
zlVO^Dr5%>G`JSAJ;KP-x^+@|`ckqMhcUPE-ePb__#MD06^&yY<)Z%23&ER^Np;06{
zWNWEt!7x(F$g5YT)?dGWgKtrb-sQbhNwp+M!bnLm;A>4Y<9(*7FJThfwyRT`lY5n-
z9sHEF&j?w$wgs6gZushv7H9BC7iMKS_HK20$4-AKHcgP@a4*#NA+xd<;y@C^CSEfH
z9;3UzG7j=suin@`pD<kt)VmK%Rj;eKSG(_ICy!YjqK#I$UFYAB4RQ?(y0#-m;I}F)
zrZX3(tQ4Rr>ztHz4&DYB>a1GZ$@>aAJzv4|76y4>!^&-OqAJi-*dq@~h0cQ8qA|@|
zr>M5Q!#F)ldNON94sjV6BY3e1F`?bk-stFEKmYwmJ3p-o`$;U3OT;4VHc{o}IAU1b
z*+XTdVf9QHiza$M(hIZ7h9b;t<1pC<%80mgH00?>r{w^U$jTs1aMPbvbjR}bC5l35
zxog$9x-JWah)pTo7}2nsXW2XhctI9bhlSkb<?o#=IPRV|$J{K|(G$RYh#;i_Bf~u;
z^CrsHv5qZXIwM_ndA5B?ULab(m>L1&X>DS)vfR3ZqmdKBHf$6h&l#yPYzxmbEM7S*
zIUpEx9B}5fdl`&GjZHc=nCsau&_3NsglxSAgPTZ1v*`D79d|RSJ93rS&ve>n-5%MD
zJ2KYgs&%U-YRii1AN)%s)7k6z@Qb=3Sz)e06d2cM%F?WX++`~Y7OL%!b76K>mhMi>
z&!E72$`5p-jBaAN=+RN<twp-*t%6qXc0?wOV^6}Juc#1M50LL?+=_|PQHmehM9D5_
z!!O@A86Ld7%T00d47*8QyxR*U1D0-|9M52@)^RJ5tV|u_8A)A7c~Cgcbneifg^y}(
zbI{*U<}ka+7UQZoV&(R-yx&xW!o>T!gNL+tgOgFC40&XMqA#jhH!g|x?S<&)SV$iV
z>+h$j9ar>(Mb;D6FQ68Q05$Cs1T@>uJ3~hXb9NYZmj(bIDN|{=U=h9`U{{zZER861
zRK8T4$e1`8&=}Pm(-d3P(zumKz3sWS%(`Daru|0#PH7S?cS>n)2PV$WTML~?{)8g7
zaiPU!i`phFk0nC6CT0kn(-pXk9(6JkJ2aT&JvCn44s9ypT6{l?j$L=Oe$|ay`jAoc
z%EHm!_D$n!mt=UKY5jUoTg4U4>YDZ-M#-g3PQg!2Jg!RxDsGstj$TqW2Tv$<j+#X|
zO?^{vyb~pa!~A0W$NmBS`Q3T<H<ETdiXDsS`_=bJ?OIPqr=%2Sy>i)|qAfz|g6}Wd
zA!0C;S2R~%HWzYE*I+TF!XUwgDB6IgDDhVUN&j>Jzg)e#wCtwuvV73{1eK3}b?Dz}
zz?2)@#Z&8c#NFPrUWm6%{n`Q@5-*~uU$d;^@sK4>zi7J?JRd7w?rAGyKu=L_*he0u
zLgnyA-sLc6@>vkwkmFja`E%ZG9)&aBufW8Li%5hZ(WA<J+hv|o8Vgz^Fj1(K93HOY
z+E3uZ*jdo{!;%p@zwHWs!t#288ePRmJaSdAE~}4ktpyY-k!o0T&|l){<(={IU*z<x
zqJ_`!NScyGbQ=sq(#&k8*&(CW7W849HNFw+n-EHtZ<)xUlst}N1o`0}wJS2X$uuf3
ztcShGnou3Ebk;yfrKeD?>U2=1M^JiP(i)cY+a(*aq(PxrT45J>2wnWRqOc13kuse`
z$126=IWgjS(Q)g8!uO)X)F||T55?8)RA#2cpT%ZmR(#C`OZtE{&3qEs2143UEscJR
zD-igaU>%lW&QE#)$HV=o+dj}&BsNW<<%e<hgIs>=S>EL{r9OaR<+r8idu5rwm1UbR
z6sCu>R-Tlp1@_z=#KJY(b%n{2ns35-FcZ+o_4pGY!MJcBa&>R{H1BfBx|eOg?F$kC
ze;JYw1#kYz)!%%QPloUI(gI)Z)7&?^lbeKJavD}%#7sHc?Qsc}o8g-s$7rFf(7GM_
zj$!mE2+7iCnJ})JuKfCa`x+WQZjs{2!czyFCL1%n_KV+6pwR-r{yfNQ-8|tv3EQ<{
zH>um4MCJ}n7b+<)j|j8HAL8mq>Z=wewG%pKgH$*GKX70|EdO9WratX?+K0ba*8+>L
z1HRg8W<^7+#*0H7;}QvDOV9weQnn7IMY1;0@*Yb^TNBL}XOaysJ2~N%^OpJ3a|+h8
zKEV42Ek3WXm@X!gtlkU7aan@as!bZYPe-T98{bW7JNuT2{nZLhTzd=9LAZJ!F9*WG
zPPv5>U&_xbvksZ@_T{MwGy>5#<rjQg;&47XD8*(ue9v9%QuOI-M$AU>OMn%5eK>OO
z%)YkLwA^5slJr1`Z6P1#Uc?i~{=wwkF4Fg7Q#rulN<fnySNG!6vF{^SjgjJV=@UL*
z=&x*nOxCP6k1q9)ABhhy;8iAN7uRW~)xg>p(lQOZY=^dArZ7TyOp)7vC?{PI=Zvta
z_HbW`IBAfdgQ`P#$P=Mq!6}o);hfj+7Eg&9bXnVlJ(Hk;1?!DZOc`JT6!@*=bw+l|
z`Mb)M5U^cOu^Vtb!P00Io#mUZ{EsGo>$^S=Vdi3P+buegF|;?f*@E|SL%jaoimJjM
zb5LKn?kLrjub7&*-3GU=uxYK2_vLU{Po_upB-|Di0544qN4s36DQV<>>CGZgLO7&(
zj`ygBO-U@wi2POUd|0_Y1h5sK?^#}7c8?LwRb2aOSZ*m(wKp6Vt#QfqN&UsPns*$D
zE|2w!3k$*?9dIJDITA`$f*o`xWRTUe8i+7Cu0s4?6}qV-qKf3=l4nzHQ46htx4=iQ
z&%R2C;b@$lr;hNde|^|mZnBB>;+7ANUbC>$Vu2pX5m#p+>{d@1ecp~R^EB}(-M<8L
zzi@E@NsOaYhjU9J6VQ;hnhO&Hu)sbJVq8L3ZE>dU=giudT-;B^^81%hL@&1q2;bZh
zhIk2bSN90<GSp#tR?!kJ37C{CK{1IqsDQ#wi1kGSld>B)Stq-T6*tj$F4+{^{7CrL
z;X{ji)AtpqI0s*PHFx9U1U|sJ4F{__e8|UPZ%1QZ=QaWHM@s75mt{ViG(;&Z%1vW`
zC*M;vbG18XO)$Rw*g5-_Hi@z<Wap+cp)D3-2fnlBHV`+`)^e>uSI_B<{G`nAclBSh
zUZFzRAg*<{3Alh4CPgB$`!&4XW~r|yiT4-O&V-!rzPVh2=5U!^r6d@M9?6cQz6o|h
zdLGBxT2uv1RX#PWAA>K2oR{qHb`CD<k+Dx82844Vx*caD+tiM(d>Yp)oRpZwA(s+U
z|H|bPCpk~w$t`=m{1R7$sr!65On?|xCwSjCn_ZjrD)Z`E!9+7&hYs4WhWU$zTo8O{
zCVS}Eof>pNTpuu^wqH_;TNd2A72V{7weBDwW43j6>|c~RqnfQd(A0%&_%6xn=_?A0
zi%QLMI-z>*?p4lROvp?ydg9RY)hd<eav^W2hx3FIitx22#N95RB@{Ef17XQRQdxj|
zI1MW7t%U1`f;+=j*Aw(ALcR)=>H9hV-Npa%^7MFJtG5tnJ=e>GT-5pSp38cDI)w3|
z8D9szYUvcU^|-H9`}v4GQZ~1T?u0;RgZg%{!5NSkgkeV*O4bwXYAj8+YDS*U35LG1
z=zmhf6e)y_?O1KL4CYC2%Hxrpoc|2*sj>f1vvP*|p+Q!+j|iezLY8s#$id)FWd2IK
z{Sg19^ka8iqu3^NzyEf#kieqFp^P-aW-OLLg3jTgbY280*S;a`s-y9Ry*`06$xA3%
z>O`UEv9oua7oNt?=mO~~yKjphsQJEKY8VSc+vcYOo;f(t(bfo*XbWrzV~d5Ma09t3
zTC(-=vVWv#eoAS?eLHzYi+N`TDMMJTn9#c3J(V}@+X>Zz;HJHk*`lp=O!O}qY-M}9
zPzihX1t233guH(1rM9<}5@nq!;U;bC93%N~S6x!J_}X5>xlmY_lI@6nplA^vOZFNM
zY@HE1EM*M1pN?z&EEkqGDo+_3ch{|`01inq{+v5>l1?>-+f`MM*TSvsXcW{<s3T=V
zRM@iuAw5<X+v?(cU3{eDWCA7@Ur_;}Oa{e@rFJHkzi5##u3AYnlj4B%zM*Ju*p<jz
zyXG<&-zT0o0W$+vp$O5)b`Q8#A$+%HbM<Y#uz(z=n_;}9bz3MB0`B?U>?lU7Zu`ht
zGM9$J8^7lKd~2+fJJs9?a|k%mCf*LaBki2jg`>qbAO;HcOL}`7j<4>VLJRNCl;HEl
z-Fy1B4c1wA?W(ILN-fRymyL)BAY#KYAO7dQ`orS%J^}pW<UzXqYJp*n1j_SdFh1fp
zcby9#Bw<dIFG*G&5S1}b%x*yX;su7GPqEa(Novr(z>+%O(yVjB<MX9?KKseDEoe)p
zDly_2_GA%~Oe_yoXVFltRfK+?1f!*7rFm+soPoC^A=JOtsa@49Y-ytwzV||G5et^I
z-9CH)actC$T^woEo>$p&N*{8*nB^rDKr6gXtMnFC+>Ux#)3ck|)RHgK-x#b--3x44
z)!WmmBctYs!k5YwRe7M1g79qYC@f#nZZSATa678-{`@Dpl{(8cfiI6wpH(#XY1^O^
zjQ4a^%uIh~@Uu**AIsGG%PxOWdA?XYtiaxmsw^eo0xX|sW<MvDKA$Fxmo{yC)=(YP
zP98)*pj*4U8Hz7t&BN3em>^W6w-YgRA9@}4(&Xg6HDyR~>WfNJ@L%tlg4vvl1Z;nd
zlEKJDk3{YKBL~lq<Frq%XYCra9FBR!MD9Y=_J`Xc>mw3$(TZ!Xo!&iRqLW*KcKPg)
za*iIn04qOF!6j?(wb*n~3DM^#yZS5BJ99dSpLjGe^Gep-+<qMH&xN;N6Y6^^5cy3^
z^RM??qW#cd^E*qm)q)RKwva_@u*0s^BLCCz{KzLHbrpj9In$ll3q~ebJK>vdyD8KS
zdwJ|rb@R&iJUWFx9(I2%VpmwabSN)M@r;*O4Yl+*c%MKEWs|H~?GTMHOBl&e{!0b|
zh-Tg3_VIyshKwWxi&FWlC%ng5IInO`6fRe%Th4~&BvXLA95CJCa)Lu`Ufy0dD+u4>
zs`#glPk?Bn#C0Cs98p=fwQ-_lih9L`iZ>2PFk6}yJBuad<Wm~6H@aSs<abHTgEzsG
zo9$4)c6)jW@=YKz<}bvWR%p++%g!A*?m(^*R#`Qj6i}qN<pkn5JuZA^Xj>DL5`o_5
zoRVhA(uCRPXRaEGY<t&$uZkA4Cq<@@9`$h^Bnx&CWrAPENJsYx>nD7<dfipk20zab
z8Y+~F;hEmWVzxyYxsFqZM42+YpeF(N22jV^p<9=_H`Vre#&K2>>8!(=9DS~!_NHiG
zMS_=uG~7k}Q~y+4r&q#AcH?{zV|U&lly(5Gy#ftere-vk5#yXAV+;)#l{9s3Z1-Pf
z!1CkXl8?xiX=J{U5yhwe1<M5hWz+zmOoOrOSgq82;T2Jge&3CmbtQJHNLxwc(gTm9
zE1Hg96=^E1K?Fg!r3e>^wnl%eHc=ZuX2(47uVq3f<`*g(nz=i!<-sMMPN`npwW-eW
z1R?Q$D*_9LLH0*sjx%rkqJ9tmO`S9TmIHc4p*;{RnIjOk{A%j!oydpg&OMeM6KnY1
zI`1Ny>J4(Ff?#bOFf`&;2Ou5j*T1(!%V8o-+{(FD8RC}&&1SHrrbjB`3)^+(1#+Xp
zEEu>`b1EvT#|VEQL`F~3#tPAL3{Gv&zv<M`hOhhFxsssa?TR8^82bO%d(XHg)4Wl5
zWUOPshysFuf{FrCq<0)?(jvVR1?f#X0YXF_r6au;=|y_4!4W~a)X<U$1R`Am1QH-Q
zH_pyJfIGW$p7Y`T@a~uSF)_KyzxNCELvd7d+%H{M2KvqnbMcOC(+bBF*Kgsz=;_zf
zDGF9Cq_xUo%uGIefi=gU<hk3VLg>YJs%XWN1Fj*0x1gBmXxM=8N3)m)E25bwpW@{$
zYp2`LD`SnC_s_wauTF=#>P$~C%;A&uiZpS}vdkIokQa2J*R})&%^C&9nluKalCm)q
z1r5FdLw!jB9GAW2B$_U4e{?Ote~Vjf_iwA3PlL5kooJ2FJ!3Yv2)X0dn>fFdCx!w<
zQQmyHvSYPuv@z-1m?)JS6;Zf-ob7J-^+<^XWs{E)tIa^F`-v%~o(;{&H;CzHmrbS6
zso$YojCQBdIv3PL(K8mwrbwkHq~V|E;;Je*EL($v<%zB;Nl4xH%*Y+a0t4Sm(G7Kz
z{2=P0p09~gBFy}ES7(MmsX?B`Eg_&6(d`a=BUOoiRtD@TPCq`nH@SCbGX;Nkk5*V(
zOG+SU=-3;nk4x#9q2FSoem2pltG8JE>b{ju(|MzM7Z*x;MDv=<#}jchFePPaCSiy!
z7Utd?c4dr57qz{Fe`5}2DrwF;(ZF@#5wi(*n7H{nBT9K3hCi*>Q^*eO3#t7wOC5ue
zTwgp=AZkzS?5sujZoKi}xLopx@hmlZ?e?887sjAi$=PKH5FHoq1jeZ&op5o6<NBG^
zy#d>^j$UU>d|`m?^=v-rILjU1$JcwJX*OXdqV99k<4S6Ip_`u@Q%s5)aDXD6IIH+W
z4TY;x8Y(D5yizg}Dv6+B6t<;jjH9#FjXW=3?t?lubE0ianyHoW1o<L?R2=B#fP2D~
zd~vSV?$22OBe;h1g~Z>BEA$w{>5O4grpB@bWtkT8Yai{ak?ZZ#Pamjy8*54_oTz^$
z9ecUER=c{Vy?;2s<?3c7O<QjC+@cPD!q}Q_LtDO)G5^C9vms<b0=L<)t7WjO0|c8!
z9q6giq7R}Dvf^e}xKHNTnu)$l*Rn1dP0~|6M`3Smwdqb~v4p-WLmiE7&=cgDDvYVa
zsk)K1l9vY;dbBp;X~*xVn;TrQ5Pxu$G7ML4Nq_wC38q?Z$5+<mth0-3LH`c?xz4Wq
zfK-Cp;GM*P>+wltX(1sUGi#33k*q27d?T2PZVQ2Otrf1OOlEcOkMFOddL|paDtZE-
zhM2bH&8Y~FWGik!UWjt{vygR9Csq}5aw2ydu2g*6V!F?rQ0_5mi+()QBeb0EjGfv^
zFte;R{}9clMEl7NRYnfn%Zz%Q8y1e<PAqj=NO1m;z!H-<3}J@B^_RyJ5G+Duz$T83
zg%pwGMQ>iXm2Xadmj#5nAx2G=_@9Sn)xkWcw<B2}XB15|P3C{MrNo!(tFWbpVw-7-
z$HmI7lw;R@1AeVtoQOZ&5lKy6z{lH?%j{6TaMaTe4&x0=O?njSF%yT6W2rH=NFSci
zp@gzW!)4A}Q#74N1lzV=Hu_d$;tZ?oIS_%earMIJhqK*AdbHhW_MX1it;sJ4Uy(d3
znRtI#iZ*ux5kjggIP60lZPk={uC!g3(Mazh(1|qW^;WSX(T!nt%E0T!V?bUtoAe~#
z-82<=TWFbb8|4^rNiB%5b8PahlFRR4f@71e3ku99`83y$6PT%vPnK@iidT=n0#MXt
zi+<I;fO+zA3tz{ET{k?TqaoX?V1BaDA~>y)`<^+jnq!IDQ_&T5#mP{%K(@M;Z{?G}
zK1ez=ms2{dHP}NA%j1k&Fj)I7JibLk^&u2I7~Oa3ics*!HNgU6W#O_K+Xs5*e)lDS
zqB4(_JQhEeAHcz1=iS3?Yzz)#|8>pfF^(!5$VPHd@r0g*QEmwd(OB{XdAha)st5Cr
z(spAL>-uRz3Qe}}wiCrcIkH%c%P$AK)iauD-0E3Tq|+;}Mb|6lo1XOG(JdiBRIQLJ
zArcZm;E|M+bn+v=-WEQW5#u3|cQY58{+dV18VK}>^1Jh#;QkbL!9Av6Wp}x!gEyxK
zF}K_3u{#1Aju^Xv>o(E~HSzLteDc~`7^UGb`mNJ>fy6nz8d&DrnLjqun!Z|JO;%W3
zST%pYD7k&sV?AP^!>xelws4W{hM+oP`M!<?;AerwzE+ycd-!ftlw<kGlgSiK?)qJR
zC+hfQh+*MaMY>TVNxbot00Q<ILO_d}<20PMCu%!8*$BQC;~p*6T=mfLR_8hURR8PY
zr#q@r3e5op-3cW6ob{`TJ-Ilf<%`%5Lu5?x#Mb6a)!AJ{1R00~Y~0kB^c;;eJonV)
zI){0G@0gx$C6G7_!*{V^jPLU08W7M+{x(B@7L5JV#CkMw?0Rmn_-fp{wB_fcx~-p9
zLPkE%GLY-CF`uN56kfIqP6TQ@at7^O$Tm+YU@BX&4Z)e>W5ThsxItpyH5!`yC7vLX
zINNd%eg010Lnil0VzS7Ih+@sQ+~u5De0r06N8XaFO*t5y;{$8iQKAf(k(&JzIqYAk
z&Jv71Bd<xDLoNd~U9p5p^Ro7M&<O=sg(r6>nm|y$=|?x(Jk46R6U3N4Yw0pI$~2vW
z@tWqEMc8`TlLxe#2DLES`GB)Os@;(N(Rcku{tnp;Q$`jE;24;CGv01&_$7Cw5pGi&
zgb5;0Lbe{uniM)2wXm__S5{H1`8Oy<M7tI<)_HPk0fDJ_&at&Qgu;n6HclC=NO#Hb
zeVnyK>Tng0q%b5)F|F?UquA5}J&iGD5{tLuzfl}PVWC#Il|ZJ`p_471hQX8B41xQd
zoeioX*=QcvG>lGy(6_>Mnhv{iUlrlJ9?`ae-Y_kZTnWTz#EnYUccqjX#*3SgpNX3Z
z@OaNiVz{d1(_0lgH*A3-412gfBGdl^U9!j%jSp{u_^@NV%99$0?kqM`up)15z0cgM
z>e!qsc#`6;xnPaFb4@^M&zt6Ms4?j!dbS~`E5sJA<)G)S?^&_!X64p0JOD%hf-?iC
zWBAXBd&}f##v&S$wik4xyT`|gHm=Q6y9{c21aI?IAU&21CyoWe{l&wL`VplXowCur
zMr(94o9Dcht<^?E9Dw)f!&Y{TYT<_Ukos6W%23(>-(V93h|#(v_`p!ed|KGJcU3fx
z(PnFzjV21|EIHFZF0Ia7gN=$frPgiUy;~hkX(V@>dnYM(&omWo#_(XidCbHsG>H2;
zTH{tiuqf@rV-laquphH<cQzjuH1*p#N^13g!Ny>|66fjGf)Qjm4(+Ociv9Shdj_7s
zcvHQRW_zbOVb^{$7QTjqn`o5pEM`|V=x)h@;(z*l%lo73P5K$RofpwY3e3}U24YU<
zGVOCh`{omVhB{Zk(y|-JDnvMX3x$H;ubcC=T1PwK(>r%=tYQi(&I-jpxE>OL+j$Iv
z9x=;!GS&SyL`h<HLEGd@Je|ln?Rnp(f*yxlr!E$wFL80nbmiwZq~1suY$RpB6_Vez
zRkg2Issw@&^?}HWF=~KN=bw&`8f!jsSk$?hA<UkL1=}>;m?{K`qHZDl1Hkf6@N!KM
z3uom@2`&yI+fR-~Lhet+iu0|F^vk8PQwJ-SRceTenug*-clzecK`e!?=!*jsy6_bW
zOQ`2#*-9)13a4L99cn$N78Waa!sC7ZUi@2)3YRnhGexF;wEP7%M9Byn=s5F7LrHe3
zjP6`}FSxDZNqPs2F*b!<d79jGrG~QE*+;!KRiUCa>RtVzt!slM7`t(kCfK4`#i+&_
zH8b-@jvB+)l`iTX+c?X_?4e%ku3)&<5)L@B#nv-*snLLox?#)cFjEwQ+`Q?%7ZjUF
zY~f9siS3fSx=imZBl@_QU+JdU&edfh=b?E=YD@on+eJojXsziGF)RH1z}KX9fiM&$
z45)dkBB9>M_r7JJj)N~gQv<99pU>E+Rw{tBIq2GwO?o7Dm_oAAZKM3N(I=LjY>1;M
z$TAY0?>s%mvZ<_QJyfi{4LB@lR?!(@WvshNHL0J;<M+H!m(@a+VpTQk^;+9$w;A-d
z<D6tqNYw<QmY+75G+`Tz`yrOW8wFJSzkPYLcL&*;?Nem!f~T`XfL1fI2{vw<xjV`&
zNj7FGIHt2r1A{`N`O}TPZmIOrtmpIT%!cAKXW+7n#{4cOs)l4QC&7C+>anHbJX3-1
z2BVg(ye(UH5}*Tl)(yRWuguv)`_6_$Ycu!y&z+Fpsq5EC?m~X`ArB|p4*7maCU@$6
zc@2W52r4-!o9JypLSE5^)h_0#uHk=erPdz9v-Z^dlnrSovdh^mBF7N@mxe4i9p?)x
zHzPF#PunnPUFB)%c1&5e>*M74vMbXPoK1uTlVAXuYJ4Zth|!8Mx<AQO`HNtwI8--k
z7Qb%wNjwKAx!ZZ8n~J5><GHU5MF5YIoiBYmCh{>S_3}97H`?tI(J{!y@OG(gczH67
zJ>~NEKxy;!3nHZpy{Q=Ex>uwm+Y9!ks()D7nyOc&`<aXTjUDP{TKpAM7XYvMtHWqj
zo{@W-We0e<BdkIJQl!7z=zEGobPC9o*u|#-DVB8S<{dJNoDporLjiG(?^kpHmXQ+!
z;YMsL&)oBts=dPOVbfmUekO-2u>Ot2WEwKcY-*W<4z)As+`G&9y4<s~i4H?5b^iQ(
zy7T#HCuu{}raiCm!FA|~*zlOk#^uK+cCWW}h=*1f;(PdnTZFEZ@xr~IShlys0!|e-
zqM%VWCOKm>m9)`F9P0XFzBKCxK|L`<cU$J}WtX>a<UXMg%VU%tUjC)$uowEM>u5e5
zd)R2&aRxfS-nLw*x>hAus1o_^GG{?Z^jwCi7|*Y8=d40LVl|)o?r&e@x1V@jS%HzQ
zyL+0hkEQ4gpshL}AL>M=Zil?eZj)2pcsKi-0Z^dK6GF-YLGKy4=wCP+-w;6uR|OuS
z$H{!CrfK^csyF&u$R&bgPhi^`8}ry;%Fxd6R3fOiqk}#+wZ)TS!fzq=WE-G9=(VW@
zHncYXN_n5VZI&e5*xNoUC6bcbxcw}0frI*(uZ3;_+PuArt5)f_QOVhnFAQ{7UC+Es
zk-2s4l;2O2N8X-RVgE+;O6K<$pZShnF!+gaqQv~g?JB1uhRYEJTm>FWJ=;BAx^?{G
z_2T^X<zvxg9In@;w-#4LX%w$dsKrRO$zfZf?>lG7$J5%UhI$$dHEgG+z-Oa?vEs7t
z@E#yy_V~Q>Ljf#?2W}#0c7G6KFjSSJtuPO>1iOqcgtD{e8|@TX!}~_#LvP1+VmNay
zQh<8KU{k+xn?*x%qInuN3N7bM)gis1^Gh*tg%C?SZw9+dSzh50&GNKUPVd*Z^Pk%h
zHkmON)@IWBKFt<;OlhXV3R`Ux^p6TFKNR~zZjY>O;XBH7JQO3+R7f#>6*kI8wz{yM
zW9e&0ufvEoZLb7WH_OJRFPKKtFb`(A5L#5|c7txwC@nNVBBA5A&R$hW&E(A7>*{2h
zQ?!&s=COXg^!6i~VT!$wop1q<{>N^1PT#o4>xJ5+U{w}v{B%38gwIl0vkhJhYoElH
z?1=@H#xp5rh22xvjPJYv8}G_s!d}dRMV-GL^Y(g&(%DgZxBRhAeAd=#Cfo3YvDJsr
z*Q9m*dO^6kkY_ti<_8~lQO(b(-etN9Rnp988eq#7nA#8$=H>hQt}Pp$O%ty=X~>E{
z+eSQvAZFv|@yS;7oM5os3?D`4ijExiNjN%!#Uolex+_eRWFkwxC>$2&{Iq2a@DBTE
z`ylPYZo08aC!=T}ctW7Gi|DDVn32?8!=}r2&a&v(0k&DWThym3@C;uCne$xM9ZTQy
zdkK%N)$y9FRXRP8Wmad0u(io*Nxv0OcvXpg5{Xj)m_2A_NF6HQG6QrMU>BqjVGUPJ
zeP!Kh#Tm%8`A)Wj(#%e3+S>yeWUPPEPv6@78|>67I)w#hbKL0Qv>9W@z$-$jn1QI_
zlU)8ovo5=x{yt2zYvIN11FYF{TCpAq9|GrFB}=3(tcZJG9Y;eI8`<)c>YjYa7#Htg
zUrhcbG@&_{j-Jk2Z?{@ojiVDzJ!I&6iD?Z1>M1h#(v?e5&>q(-Y^v^ILWq)J;!`)Y
zch@j)klK}bdo|t7w?MaI2jsE_-9~SXGS%PvZj=+w_-WiG8*3R%B)pg#D^O6=Zz5A)
zdY7ePXu}httSM%ZGdI0W`DF>$DV^+)Z{`{p7>(3#7v9Cxk~il{=nFHev9M8({wY_=
zta+oVIE1cO!VTw|@+rgJZpEe>$z(jkgxZeT<cV#cw#fQ9oB6VnmI3cfu7K^-5!BM|
zE>Bn!e(^pgvdN=UynnusSdjQOdWNw`6<H`I%_NzFzpO{xNp^2880cLgX=yUXNzeF8
zbry#9{mO8sGSq3RCePYu9J}0SecS+Ii6m6`j=%WX?ebIfnEmEQn(N{x79%ZZUuMsj
zIT-rVqQ7K`h1-0)vHn8}=1j3nle)rXaVp}gWq9DH)G=-L2NE1(&S!ME1J8--o4$+E
zVRliQg__H%Dduq#Tg&GYd-J#7g}mC?O7L-kIg0Y$3=OKOtA2~y81sbb1A*XHh4mN>
z6)d>S1D1diZ?i3y;Vkw;+hdX8A_~Op)^WB#qzc-dJ7-firHETJoCCAy;k6NbZE#Ht
z0&+5G$;{taq}xDz4anEOG}I8vE;pgwM|wK*V%1qoR!!n|OlYoF^(7o#4uzc~l|v}b
zAwb3#>}3i*Q{mU!(gmg-d`z7ntO1bF`S|;_cI|^Zh0t@q{p8oj{)Qq|>??u0(MR}Z
z%($_#f{;bd!`XK9cZ0&_Q;53@zaJ-j<Ls*zyEpUBYWwNW3!lF?a>;9@`Ap6zTliSX
z3R|0f7e<e$=|rIIb<@gO6`ms0{1Hm^*(xS(eb(&0>j48fDDk8MX8wh`)wie3j)v|y
zxh~!1ktd0`FPHaE0^!*}nulCqsc#|HnD(Fno)t3u<Lu)^lPU{qx#vnItLgFV8xJ1F
z_w%s>6@0|XrS1X+Ze7oT&>`#5gwA@lmFdBe)EC*3LWGS|){}!~+ch&%k*%s=_7w_#
zeOmk#4&0ix=_6VNHA;P%uFxYUi7J~7R2(6yqV4XR6Iylz?acKNOMV@%#B4^{*`0!I
zIepC;!M2RTckIWjteXYj1ucCHn|lU3o={up@@0d%Vv)4Oc_N|F#bopmM*NP-;m}FF
zLYn=i-LTH<t8b*kVQR8!!B&}Aa0i+t-0>!4{L}M{G)K-b;%g@#7JsYt$QEU@cr!M#
zP<q{tx!Zm4uHh<a(BEr!O*0uo!{?l;*VZa;Z9OCSPy(xyn!!|By2I~j>6Gz81w*?=
zkw{;eu0F0`5;{sGNlT`O@6H+9GaJv`1`*}S1~(vxV*jlNh4C*cX3P=vtLgrB*=G>V
z5qjpnB&g&KgmlyGsSQv_lkRXixo^xe6YU|tusY5IWMdZ}P(_Dv!L~k;2V0H|8{A|k
z=&)}KmCmD@K82iJe$OOGqTUf1W;kudy1&NT@AWZfQ>7N+dJW<yOR2dKmI=d1&G}kp
z<`Ey;x-UhPL|-pln+9`tdfk=s-hqL<HHbD!f@>p5tF+PPNl;RnhVi*nsI%OKPs2SS
zbD<lEf@LAojkDvrS&YneS9iZuZRR+lc3NG>#~{cF!utW1*x5lvs=QI?n^p;fF1b$W
z#15y&y0QGU-?dX^9rUx8!uU%mnDg(7{8_`!8;b)8_M1ZGwaJ}lZrsZ%kl6afDK7kE
zU){|gF`g1eAR4hKzjMt_45Q_1C9C$X#`4X^yHFxd{tC@1+W)-+cTx0Kgu(;*T!C_x
z4U$b^#z;<d+s^XHXERoAE{BrV^#rT>t5fZu9EAx`c%*`m!o|+pAC-pI<<zAmfNEeU
zY&UwrOumUoN%DVhM>1@CIKD9IzznGyRoK;n=)SNojz-G#<qzS$4v&n4gI2=mzp5(`
zEjKkmq96{qt@_d>Rk!H(M#X`ZSX(>`8z6u~h+|!BX;+w5$M2DFsUbNENzCDJ9g^>9
zP{?l)#aS$}SVtEVrqVm60x|DZ)}+F)z4CpzDX`AnwX;=WVOwglaEuk!Z0pw+5QxIS
z$DrKUmblp%-qhG81_%tOcQ-j0XTBg>g>qA!-PHu#x03o}c+8!lnb7%HV)MjCFIb3g
zxot&AWwty`Oe|8Q8D?8yeULJa91RS(tsJZ3Jr}K<jr0=v{U_TIZ%b4iK^Mqn<|Uju
z-_`?@7xTs7sKt9i71e6ZjAwM&2HJX+x7G5mC|WuNFKYl%iC-0Up(3em+8jkqZ|AfP
zmz5~qZ3_L`o3Q#`XQ*s~96#yi#QL0`<p=y(xFWJx3;|xb%~al>%JfYbHg>{xq#<La
z?e4VA^};h%DH4cb;w@zOPi1axue@|Hj;TyDeWW2>M7`bd1F=4Ux6>Z`Ii2Ns?AOQo
zMq$#^11uqC5*8Fk7w#Bp0Q`ryW3}G@4rn6=qtGTVJ!Iz^zam%~(m*K`n{8OOWlNzi
z<uz4rc??9Auwb#`g#ZKL$L8bhppd8&>1*|@QWwH|4atL~H~u?uIrVI1xFJyE%py8~
zw#kHf!C}1thiY$=z5y0cFLTMr0_egCVgL&jYHfA?T~AG;J#`)TGJ2P7j14dN)Vt*)
zi1-SR#uMT`@jbLASaQM{d>z17=!>DL__MFJWwDtN4bs&cErBUjo_X~~43glWPhlTA
z+Pt({-h-xrex@N9h=^M#^G_PQo6uoJ9<?I)Y?5ZL*V4%^(yg}m?~sKhr;7Z|K}47p
zkQ~3=of3rJg}biEz9=sA#E420-o|fEG%R$B1+Q<rz38d~I#1KGs4B@OjpglkW)VU8
z@Ek1s#SCJy)wwp~eR40nT#`PVVzPLr+?169!>I+CfQt9`4SZF&%AVcwZq2kM%H?|2
z`U@?EwL7-4*%*nZkHj8h=+@_Uw_uQ{<<W0#uA8kp6NQtUox?^cv~4GEe=NH-zv<Y+
zQqcB#ae5P=Arq~nWCxRE1lg*nBDyt8TKb!+wN0Cpb$`ueYv;DE3a9P+bGC3Vdl$|{
z{dweOYV5f<FV_~(rG<FDYjAU3AM?3J$b%EAaFrTtaj8eNLYLSbRGEc!ojppsp5a71
z19G@8N5OkbKr`FDYp#4`mWjCV)8Q8|fKvKgs&1Gm|6rkkZ_P3CdCsm<;b<Kum!=0&
z3To%9+}guN+EW#UDS561n@?Sf5!y(Z)m%eZsF-&T#j5VOv-7W^I#G-9Z6U>y?mnDV
zR>>PDu}9w)BL~z|mob&z<12<a$w2(i`?(Brd{NzcW5(M$g(WXOznW?Va-ze#Sc?nn
zwoD%5dap|;`@w9L1$5U=BD>lAxcp^<h?>mWrFf0JJCejS;VehgdTT`%zjT;`X9v@Q
zZ)%mBgWB{s`;yTlppmn)HhpXBeLc1DN@`wuAhrMj!`A5596A0qmyhnz@iYW5&3MjP
zVvU2I+r8AI(R0&lYn`u(?N-JjxQ)p1u7tNM%FvfFrlMx=F*)82B(qZM=D9n`MBD6+
zyM-L9S<Vhi1`6XX0!|YZQvAZy({BBiGYtF68UC=?yu+U*DiG2mmmcn)@=!we%b=3{
zs2kk_v#Y0VTp{`yg>`xObHVeq(Hrg2L!dlC(x{A3vSFp=LVAkABE!32_n`L#%mMdU
zfa?v7CTr7Ww(KjluI&}E41!sYfA^*P^rFw`DUPzF#*nuxP-*5a278au5vI;V>!V{F
zAe)JSZrHwiv#MU6gHa#(!Fh4jcHNiwv|!A<Ikjblwwjy%w}+HjM3zS8vtKPFiYr*O
zyt%|!q>L;k>8I{?X1q@V?Aq;$O=Jnw8?&1%)kK;OAjhDGOgQt*ZE>wz*HEPek+%=m
zOa0`ReT<>;RMy;C{Wtp~z<?q@?q|T3du5`<SH<6)P+-7~d0dvCU#R2eQ`qV|X0bki
z?wI|V(Fm$Gm`9el7S^EyzPv=capi^7ehSG|mN8W_R=K}i;k(^I{~lW)h%6V87Z80y
zS{1|zj@Q_A&%kKAGm^TJc%_&4=e`6&g^O$7rK445-rnIPaS`I6y>kP@Rv+G-I<c<*
zq<h$VyW*_EeJSaMD#_25+F@~S2Ei70E5Y7{ueODG76v5X9iZD`cLx+jJ;%&Qt`Nl1
zdnx-LF@H3HCb#KrW^JY3fv7j&V5|8xc}QovKBcKoh!n|JpnL*a?85HdLF#!FsvFt3
zE;uBhFu}A{!U<_*C9O?^Kx8C{R`8rE&3?JhK=UT9KMwhHy@$<}3qpCFP$0JZ44D~P
zWpP4tslG!T2}#ELGkz?yYNhF)y39_OsG;Un<1x*`IhK$&(YQ*I3hGXFRTOhiwIo;<
zSHHd3n=E3HnzK3AWU(L#8+(=U*c%z+I*ISibl!O?V7QF()Z~H?`Ba#Ho9Pn_%|O_@
zR;a2}MtJ8de4KNHF73z0A9VlXbTDH>5c5C&!B2{cm{=UH=(-fVv~xWjZ<r%hpRxB!
z(9x6YoKLkwL~~>feH&)07}iL(Ud|zxJ%+3bDtqDuNgGIkhA_7!N6zS_un-$^qshw3
zcsU2%O3`k%{J364t`mL2ewKTfDx;<nkb-l|!i(y1$CGnNG5xkRN4X7FYuEb~c5E4t
zF(>Y!w6jAo?8y}rNS5*nIm|fZe%w6Aieh!)uAA53BELtGdF#vFf$2@}z8<d>`p`a2
zychY~7w~`?(9Pvm0qz<~o{1tBD<+rnAPY%zJyl*eot-=MPeWqyQgq5{%(ZSy#eu3H
z#2QUMxRn-52fd8Q5voKl87B_xY5fyw;#H=^wjOVXI^Q9pwIMP)kYErEEX}FPAl%sd
z&TyD(X(0O#(Cjzc(?o%)u<ly@KWJ%ji3)S>*`zmiqB((R*w`TXhcFjl`?jEGr69YC
zL7b4^6$GUQ<mLeJ?UmfVRRzJZs$r|n@q!#I$iN+i(&)W#d&q0uN=?W*lPk0b4q8nX
zqfG$8&<`^(ltC-TJ&L=3fZ~qRzKOeA(yQ2QT)ot^zOdb+%-#!!rm8Ya;%?7n55;D0
z3!w%rP594tvc7#AmllCk=g=m*_|#m`s&aQwL`u8f6XTBH49wEv$Qqz)xOpc9s#Ld_
z!jl!9=y<26NVE|g+~C|nh9}didAp_(tBdP73ek1x;O(bEMoufb1NR1)Y1;X=g(}Ai
zbjfMp=N&Chngavpd=14YMd0ji@nJ$ede3!|<c`qK@0`Emk#nMYLLK$)hf1ldpEG4v
zk0;;Mea!a{IVGcf3{SIs>HGhEekbGWGfM=4iw)k3XOOe}-?J3Q_uFP*cJgOjHtGe(
zm~;CGaaQ8Drg|9G0#!w=v(Zmt&@)x*Y8OosAF#r}ToeeX@`A}P1<Pt`?GHFUI>WV!
zxP;bK!nA`JUZ)>uj)fh&p1m$Oin^m)yz^MhXV}`>;kxNXWwlum+ezt6z?T5u4tsWY
zu*_$-(z-Kr!+NU5T2OZ=dA)>4NLWj--dWnksJ>Q#8zqRoU1_5^p(4rJ`1Kq70kyq;
z_@1P9#k5Lig@_MlE+r=ptgo*1F9w`uJE!8pI;$ooaKReC#>b%y#2;LTb~hxd)17PO
zZO7}YGEc|CZ%X1yHjxuy?}t5*B-?%zqk{@DF6ajPM8noMaQ5kC5;97`25{)B0_FyZ
z#VQItP>s@B?>eJpB`B8ZUzinJz^tM(sab-1Piy_;SIYXZH>BnTld_2dzcAulmC4$7
z%_zfpR0_mdsP=G_XRPVcYMDQI5AdUWlt$3d49TkIZ5`iy0gyudw6JoA2+C&(@pO`f
zpSlXgMuo`<7})HZNSF0R%TgLt(ByH}I#!!#OZjO-o35>2D~1d#-Wd#v$R2pUEBC^!
zXx|ZSZUnL8#sEc2P{B?3m#FE~9J)IcJ1_E7Say|$_s0~hY)D(Tx5;%=pZfI1cqQF=
zmFl8h?e5Ii4Na2_l35;@D%T>gbj=cMju<4BaMPn}(Z0wtS<>m=^>nUWtAd<}NC9Wl
zwvO)Vp#j(@*fQD^R_8DT9>~~)8fY$vQF_3cbkb9ifSK2Uvs9UjLnUuT(7XoRQRU$5
zGN~p0)ieQ4mHgu0&}S*Lek<ADLc=7_oa)!QKk8R`z$X;o$d>-DBIpgN5FsdIqsq<?
zt0PqkZDQ=Yh1J?aJ6i64Am@zSmnpribpNEI8eF}B9V=`m^x<$zT4m<<3ok&FoRJZJ
z)a7EhZboDEoHE~PC|ar47A>zf_36~rPVX^%=MhkG(5G?hS<F{Pv)Xw)-s)3ErO}1(
zNHaVkM(&IbUh2xF)d>G2qkcoY7@B#N>@Q)=MO==izQmZH-Igym^f-6_u!b~^M|0je
zyWZwOV^$!~M{9+1W6{!T0My-6jXO?pbPjW=7E~8~<uC4gWp2yOQVP`NI0B0KjqlU#
zZGiv13={YsW8^)mEhs(os<W}2O>Z3Zz2&-zg*HZB{}3B(>OM^hFvIbh>9^|{<I-^{
z&`;vq*YmJ~SathzN$wMAob}7^Q?m<}8NJH-(-`}IkiI3crvzNqI!)xUPruV@a~$o+
z1#h_W>Ci7JwC@Hu{z1UUOc*#3V(Fxw*Y`7)gh8SKYzQfOCs)~~$j7F#g%LD@F~bRc
z12K*o9RatZ!)#!lN276a+m#?rHbU5^afS{=x9yFTDAVMXr2x-4sUjM@EYr;GKK8Jm
zA`8jqogTH&uv*9taukQOc2`=gSA>`c9XZ#@b$}M=yQvU$6vMd^!1|q=#%V8FcpEB+
z?Magr!0!qqSa5I(8L?!zzFUgCUg|txZ{0mS?DLZRWp;c(*ue|7smL3ayg3mSmNV+S
z`HHdsBd3%!SG!oq@&_OyKll4gY5)k_^B_xuOrKHeOvyH%8y800klgrEoe5*tmCmsJ
z;%2`vkpyVl(Ph~zKktOYBlyl={T$r<(BZkYg?G%MpnCoNtL(-S{6Jsx&;t%AoR%P)
z!knAL$iMc9<LBsl(^)7;^mcY#aM-Kq0>;xDL8e{~vT8<oIoOv8jI5zo($?5c#Od1V
zpxfFuWebV5_TDWR`J`0NpQB10_?q9w%l)&KWABgqpplF1Ohe`a1i>Ly)gc!uS+yol
zz}%lJ-yv%<#}kq(L{JDQlAlFr>1#bI$>;9F+ify=)$eLqnHqaFIqOgjvt@x^g&ngu
ztC006J$M434SSV^rCj%fc;4Kbxwf3$n=H%$%=V4OBp7O}mJiiL5Vjs2%bE*yLo8K8
zHi8XsS;NEv6NfYsiqTOKuD22C+9d@p{#pfXs~gnrd4W7<H0%h8Zz^9bwC)Rz>^GZD
zPM%dY5O`G9nNSF{Tk7me>^g18p#tQ7E^Z`D7$6@krBoW)iVn}~J0cP~b&L&1on3Gw
z1LMl?LU+IAXu$e;Yg*Buj8W&%<B;;QSHn)?m;5C-6xqy;U46}4Hs_73eqG;IYbl>!
zOs$6rPicdI0WLSwt9`EDJnmJoz-H>(w0(oUYF!ua#1r-@Kt$9!Q;#gpQM(lKvGqvy
znz1<adR&r<Cirq8?Z#sLp3zQUPQ}inzxm#iVM82$B&u`9gWLG-?tL@DqeYSLryL17
z+S5o{9O%qihSL$e2Z^WA&Ip(85yoRTtH8?T?A$7v;<K)8*iikv#=&rRS9|3|cZnz=
z`KjJlieGCSr@O;-_&xD$dICu!Qj&M#ytESf`Rcv35}llO7^g;%j^0-bQSR`LjNV(`
zO)?GACU}w=0^XVe&kQ}}ngp91Vap9hyO{Ovw=iWMZ5?xFZ4ChRhIrb_GIwX*Nk=%l
zbZx;wkWjtL>W-rfOsP!aG@@uGCly)v3^m_0gtcjS;96K<zhdoeG3gsq5H@`241B>c
z=o8!9^_UlCtn9gi9OCJ#sz4PoHZgWHELVFus8!;57mig2IsX|vu2=RIcV6JnIaz-k
z#;=dF<c9pG8q4XB4385Ek9q+`q+I>3(e!B56Ph2b@t^l54Jx)t91s>Gn#b_D*F*^c
zB9c?j{GlLdMVL41a&V2{RD%E8D2XYh&=0I?L2)d6?5ymb@BC}T(!Chr%=RHMm^Lct
zE<YPH-ReVGsj8%?h)8E4d7%IH3X^k0S3F2T77If$m%d~7r7IR4%^bZO!<8T@-}wU}
z9suhfc^w15$n{>z^Z0V$3g@Yy6%i|;&t%oy+xzCrZVG)1x@MQjAF~SupLs0!Gq;!y
z*KvuH{QDj3{bAqz#LsbTjHAHj!0LbX)gO-s_*`lIak1$C-Osgw0ciO$-_xWAq|x76
z?f<V^(*sU%;~~WQ@o|+nj6J9E<=st-Oi2!VJWkpsSKz5}FzYicVa7j>b@?H8cAoym
z-bBdufD*=o(0_FF=rHq|Fr6=3)j)FEg<_=K^9~|giE-kOOZ@oB$cJNNgm2R>o(I?b
zyU4yn3s9Hoqk_M4{vS49oC7XISm$5d`jKq^#wY*X`|o0DA8!8(^S?n^W_oD?uQpM8
zPiF4}v0vi@U`Q6)wTg*zzjXd#Ju`zeiR?{u&t9lr^vDYAp8=46hDL%bfC{s4+G@!I
zs_npLf4xI9^G#q9F17gEbFUAECI7knzx{)^e@}`SOb~uJ_dY85%N4xinrkE%J+J(y
zk=#HvVlY+biqth@;yBWy*y;TaBquVI(#2bZ%0GD~3>YYFEGA)FMtcYVY5IWqBEV@W
zdg$|gSl9#1nT+Ozux;{<4>e*12U22xLao{V7r=H<cVOJ#sqiDTWGGZK&R!6TOZT2(
z?`J@j6~ztx;Dmu_*rx0a=iU#7M_3>r5S&zb=ii$Ik7WFOUQ}L>J`&IcOx%<CO-4=a
zLBuwPiq?U}o;5FDps%hMva3J1$)S4!u7_@#<;7<=QtL>IH()rMYrJ?WTO`X<S-TA)
zw3AV|r~X<*Zj(6P`*F}wj$=~+?_#krNS4jrWu!#m{=@IBTfaAIba|uN?a;*z?Kyby
z>jSMkQ81fd_(hQ!7^JiPHtG~d0g~A{!`9{chJDasF!H(|o$|YIu$KSu-hX*ZzuCPQ
zfTQJue}AaISkG^7uJhf$$v212O&<a9;I;pUxz4tS0QVPX{F5aA-2Rawz<;k={`Yg8
ziw>{&_s-#WZO@#(@V_72U32^(3IBPa|Fr$9>wvgY{C^nS&G26Y{~eV77s3CD5)aTX
zFq!?o9Q^mB?teM>@4@LG^mU5k{~HnAdhI_8N&>SV4W^OYJ2uwYTSW2ie4V2;KW8FE
zz}{Gp(qAc?(Q29wyumMJ><HgIz%AcdtF;JFnWf&#i=zKY9$D$76}Swfj<u?ituvEU
zY&<8m42(Fkgau0`3K+i@{AfQn)6-fpe%N+E9&($1+{o|vG2h*{uodtO9>-|(E?{|P
zJT?Zt8GVzs;^YtCZl=G;`@Dl*!rDFlpt@TBC8^@cu634{RDHG7mM!sH)D$rnmF2V8
zt~?BL{$}pwm4j!}<%(aO^V(k6-x$5OMX-q<)3^}ab>d|^9651|V}nUoG>ba>hUH>z
zaka^hwvL%GV0r0DzaXR@I0V7Qug5;o!G>%R(LbeeyD{%bv##)G-NT8JM;A|H)wQPH
zUrGgMX|Sc96?iHu54QeE`SACiH8WrPT%~a3w-<-Vz&lT-DL&JPn_{>?jZwW(C|q^y
zAU*SzQ9&<8)6YC{;jFS*kJ8CCU_T7WN3N#<>j8ypLmxDM{q7wTtF|Y57C79TL)9cI
zdEIE(pC<u~YDtBw(p#+2ovAEjHhfypq;RGD!QFv!>92IagDRxnaLGeMs(=2a!uR4O
z;pj7-RD0I+e&_)%+NF$K&(9y}o31X3{czlqdq&nXOZ9_W@fsX85Dg4WRFd!1uVOif
zC?G`o3V!htbsasq{7J~cf7?p{UIVNcyCin+L$m#lU$jEq;&9-&d>*ZGaI=yVC(qz+
zaWK1<o-jIp?uYcu=S+Z^ZvOCui+rLCr@Xq8X1kSmC*hET2b7t93rU~4NB05l{_e-N
zCHFlCCE3Lse2|BlpHUGlA~b4uYLAEu3Jj>;IO{a0asZY`TT7as2JbCwwnbI4@Wb67
z17`N!GyXb%)Up3lF31Np6YLYEX@y_dF^BE~M}B(C;KV^p&@)oqXOa8(g1fafzAE;I
zg@*x?)H=CP@Q|MkqN8t8*P76}JH_66{Lp0_Xz4q?12qwx(tYrEz}E7908rJRURb0$
zbPhGYNqV`Dsy9AB<=-9hV1m#3%_^v`rUjKcy;{0)@OOX*UkOl>!Gi6fYLQ1R$6iT&
zr=*?C(W}SS#)AHw1#oZyAZFM79kLl%?C9Bp1OgDg0}wtdO9VeVbQ3?FlV{BL2)|+~
zSnm)o0w4_u`;M#Wt}*4qE6OYY{!|5eg!Z3U`u<0=|7ii~KaTrk^y;A_np1`Ral}RJ
zDAVBun;k9!M`T>JPuS0WM^9F0q{Z*ysK0~X6m<}>kL*qk0-K#M4i4eB;W>yuU=cPp
zfRmmi+5eH&{ioLyTq84`kz14Zqy-#t_K*xw=Xlt=hjczAa0==Nhkx-V5P+{gg|;}j
zRkPwrfxF-DJQX~0sQeTF;){O(QRPP<Hh<c4l|*d~4{gQ9{gCpV*H<IWUL3k})af$<
z-|vi%wA?%heBjRY_W%vxAX9zF&E<`t+5kYG9T6_UR&_AlceM4D4CT9k8xeUyh&Tv*
zAi7ev7sVMKA3nGhfaf!RlI6(NLwIJRs+8ZOb3SK00J1}fe)JL`oM7IX?nCk&b&dD)
z-knF8EL(&Q0eIy6SMh(~+45iDnFqi#w%h)YpaSp=-0M4@UmeCX-_3_tzDtXd_VOPh
zBYvQZ^uo`_%MKLkEk;w}OkjjLVspL5yl?lwFMg<NCGNY@9=kjJ)b>0cl^*L3@YT1{
z^~*#B*qZU&sO6>*^hSx{(W%=Uu?^&!6-b$(kVD1+Wb&rT_<sAiX9!3|_m3)fBDOo8
zC3hdvKfo_WYyprhR_37n_Q@gM3sm2OH8UDsj%Xe0>q1tHM~|P66WvXz*m`rizq?wh
zvK|@LPMv8MDzT!UrQtRFQ&7CzBiW-<ry5p5HoG<(Cu>%CE{C5TJ7~=aTz{jWK1VOT
zT5a8aS)9S(q2(b20y?Tu{wlLt-ak2N?+>6r#o1YDVhfX;)E~HE$nIxxl)3h6TVA8R
z;bwZ6*v{*`!aJ=5^^?ID{B}~H%Yv2=2_^S7hl6X}yBe16OpLgU;Sz}d5e!!!0;)0V
z`TYa@{DJ+dsQDIGq4n0$n`%a}=fttUM=5|sJtxgSRGZqe@~TkrMRd%4N8N`Cv4TT+
z^#kbmL+{HgKAJdb-)6C&-b-{(VG|3cZ`%IWoO|(8Ms8OEw7ySMerB{(6GcTGCgpbO
z2Z?)_k7OP#o}|BLzL>=6eJHW~N^bAgpPzHSHaFJ!abD*^YvlWZuL35L6Vn0E{jl9Z
zz573xIkOj9ezV1ONHPD<7yj+CSMPlk{1*rMtIhVU#d{r)o%g58c@N8jzwuj{XU~pa
zLLRuHd!&rDus!mSBtGz!{a50h*fY}%2TA)s{M#4tjt7)|<KjzkqaRq|Z``K&>7GUF
zp#FdNB-o#(^`bd&azYu+@*lM|e|f{dxXgz;fNZRhD-8R;{ox*=0fjw^4@W!IYChcf
zi{<|X|NIYx&r~<pOq#siI4J1<GsGUn9UC))ITjh<kQu&U?QunI>aerP#+9XxaxkcN
zqRlRq%0hL2B*2P*wa5VuN<O4L|Lvj9=UxyIm{(>cOPQLzEb};p-A<7<aQnocPVULC
z3eRU|V$4UU7Nn~_S6Z`tX1(SfbDfuC&)f|hc*OocR|5$9kt1E)FrRRDyz9dR2?zO>
zE`cVP(ipl@)E?a=WIt9aWIsZF<n?IH{=F7@JnzaMwP$}~bsu8@#u=KOm+u_W9z?kk
zo&#Zdd;SyEjWc_f;Pu+uImP8Mc>m74vqzZ`D?$BEYxxRXMHJqSnVyO)m-}NtA&u$%
zu`;v!UQaujtUtb5=-*{@R5Q*iUyhbsd*iX!si|E~sj~<Au+ZzT64n^(Cmiu?{iClN
zi)T%%-{~j#mhMa>tl0?m&?6vX&OMR+?+2L2lan+Gz@_$Up>A_M8wl`T8F1ZR32*s)
zf|Tv#LjP2Jj@CM=2Bs~xGApcbw<{))AkSGWG8JU~>-C|Fop2*cY+gY_#>kUHX5F6<
zAej2`OP7$18HHk`J%(QA6psa6z5=*eA*Us-(GtB{9*6MQoce?99$nwF95Soy_Y>43
zY8orf%AC-uQ}@O4SC=9=V<1s#!K9#pI(zEO&SkP`oxL&h5$LymlliY6kXaQ8UZBSP
z%{oxibpq0NRnqyn0=#WPCx0b%R5X10rQ))vS87fsK`getUeJ1Cef^Vujz=4e(NQTD
zz55%z_RR1-f+upZp>Q6SD*D)03`&ONDC_w6>bBj?2D&FU^*v|`N)PJOKQDT6@jVrv
z{MLB3%I(IbU>BjJdM8aJ`}O?A_w3_uLG&hm>}~VcbIug!{ZW(T7jv?r@=4+0Dx_Dp
zcwsvY>+AQ?DJeBiJ>_vG64Tgg%jzZ#7unvNv6qZ&>sIMCTz~7jIAJLem88+Qwyg@4
z^?E-QmSskO%5S`TwV=>TT&s9GpTTI-^H<_5P2Eo;PoQ!z2F)D6;USye@mkGuyG}Hu
z=stYkPvmm|vDgPSS-f<Br`JzP*bw69fbJgNCt*Mn1p{8}T%7Y0pwOsbZUY;xnc_Wi
zLW8-_Ttw-1(|d6@GUouB1$p$1uE+52$d4y)(Y$V8=KPm@0*J7UlZz@^@dkCq=}MBO
z`-bP!?@~?6Q@F?O?>in)O{Gk=R|{FBPt2%uhnj9z2-xpPf!6vb9=y&>YF00h@Ws$&
z)el1=mocs`g`;kj-Ek#wyXnsr&TIB=8^uF&-}G~aZHnrbUcw-87OE0y7L<)~o$Li&
z<2S>0jqLU#O&oiS9uoF4+<F@4uE2c6f;`AZ;$nn+$zo^TBCUUk5%15x<3WgrINnG1
z++Kr(bDONzdeQa$l{h}5pBAMwpuwzeZZqcw^w!0PB-Tke?Xd*|KCp0~s`LZxMKY$a
zLW=u8lr(&_$M==g<eJGBi&lwIp^4k?mgzB4wWh?RmrfHCmf5K&V!$4Y23e*|xIR}h
zv;n{F_WU&?abi@Oefo;%_5<;SMjAtL;cR7HpKE)id|GP8joso9m#GKbaL;e-*6#;1
z2HLj~65WYDdLD)=lrg2HIj6Oh*dD|}WTUETBeNi`eJ(~&Hyh~rwzh$u-p{Q2&uVe(
zdj7!#Y=#10ntT<dBfQ+Nth_6m2}Rv=`P+zO2jlI6_2(a1l&8m9bv>^LO}7X@C*INP
zKXH@uW{Z~NS5wlR*sheP9Ub+UtT5}(bQ<s&W=!@3x)wAPwp#)})^#M$<#_l=T==!9
z&}yYPA`<TVbbPx%=p03MGneC|Z;(YwR8hl9qBW{&JFxpKIJNQ{OTVxKsV%Da^<Nog
z@&+>L9VJ249~mGQ`SL8g8K*ZI{C!rwaI%^(ho=TtUYiO7;S_vso2K#n!pyRk3Ubl_
zUKrh$wF~mR%1UicDfegD7@W?^bGGB!==cA?b}lfEB^+_RE{nY=6zM3BT@o_r2|k7=
zt$p4I3;fiE5M*7UEP?1`u{otfYgx*-NsU)cZDhGe@2u!@kBW$!QyM~!fyUWl6u5%6
zoLg>yDm(J}wvZqw3w7|<;F{Z0VBlRHA7RrHjYdb}18d4?`9mp7;0-qr6frA@4G(e%
z`Ls<K^2Y$L)8YA=L`;3&PJ>NJ=wBNV?LPLiwk3K+K^@wXXGaCbXQw??z=o?c#pxOi
zY6zwcA0}!o`Kd7`tL=}_A)KXvvG~W`#boB5)I_u!Ed(b!<H(Ay?UCj=%{D9tl*`?k
z;sklf4;mUk?T72BH`Z-u1mo|ld4En%5fL%z1Q9J^<}Hq_z91)SSzpI7nvN!?^F#wl
z$*FKqIUBw*y7=|ZHV=ileZ?LTSi{STz#%!HwK%QrPn{DFhlKPt;`^>45QG>XRf?`-
zRw{C`=_cOWo-8@og*h^`qDk~vMtTv%i#LP)8<uzK*Pr<ZfJSdI;%Zod4oQoS_|nf$
zcZ-+<@1ZwsUiXoGQNf!f+>!7vhvrG~+8oufujM!P?TsCnJ8hy_3Wi3#sf|4Mz@x}f
z>fT<Pj49LWkGD98<r}RXll)+NJHLE%IXkn~P(=SmK~J%`XqLz1o#cf9@bZ@4UR8aj
z9Z(d$?x)F;Tm=8xgwD=3$q4~kal|OmbLp2-?f(rkO`ZpaXkH0KEqoI8^vx)#sy7p3
z7xv+wBK}5@h<7yY5gy<U5aDBGe{r$1U7hGDo1$BQm3{4Ve;p!=O%$dGPJI9^u+86r
zZm)!jG__K>WGsFAb=%v_>jCfxq!`<A&NjK%V7c|pEU?;9sFAGuD8nkzpITOez7GQZ
zwM(R!mmjm9Y`=DvfZJ?nRhHAIuZ#r9-$}n^6htkDjo6$t1VMe>+B+rrrrbc%VO#b7
z<E$jkq3tR*F|uU~L~h5hWzgTW{Q+m#S8LDv4YRVz)pAAy13C*cm;1Jc@-Gu#&3FM5
z-_tRa0s{h#`iknqb_ngP7kp+S|1zC?BLb*#sc|oGxBP6gLA<PrRCB@^uDWO%QPy*P
zc&DO4kwN^XVw5giYL;}6r!G+Yj7=@}QsOM`_NtQBOVu%nB#x0>WeYd^8X#+HlyoeH
znrOKb>c1)HBbttgz#t(bn67ITxR+HsO#*{=@94ZK4i}aWUa>SMdWiwAF%A?5ehvP#
zf^iJ3kxti3xfDIWnX{rW?h=@MJU->pn#<jem%k!*ni9rsqIHaE?i0nYCSHQK6^4+I
ztf2v0@-5+@A#;S4NI{O1LIc^mWt?q(*YIj;Dp)RwLA^j=t={X_@#+?};ONZ-1C#o!
zLi=jFA^AfAlAoyH1~4a%*iM+23Jbz9waSknrci&(k}ySYo!JN2K8mk<MBbW~0^LN#
zxzsxGqK_$3#>?oZW~o(P40|0Fb&k`ju)3a<IO(9vUZT!DGy{GEc$`#yeU$0-<SCq6
z;YjL;d>*3r7UQ@H(AL>X`*qG($vzJjM|hjZRC4}+0r02ABVQ_`rgghAs~QYw>|Ma*
z@%j3)x;T+P(8EJu*}yv_xkLDcG(z6nQIp)fjRyKC77V90%KT{x#<mM)*(-638gM43
zhw$WImWU>;4G?L6k*3d5pRw=x);d9$fKOoIm132E%M|9-aeuOdXQ7D@{9bsZKvAej
zm6yd^lTKkwNn+tS&h-VyyDQ-3M8__;k1X0m5@I2Fo3zVeDlZGq8g6P77lR9?i-~3p
zi=|u%fns{{1<P{^hlP%RMSp%bY2Z;%jCyr0|8wo|YDkPN`-%l8+`;QIJ{A!x+rSO{
z>_!5IlEPrq#qh}1`P-E~3;GV%Nfr16iJRxHvfmrrmP!cQ{NkT<Jp#kN?ai~h5EmP%
z^-BD>7d)hUNIQkG(IF71l4gVK%-Ujs7it>;Ib=!%p3}O-XiBmMmFJHg&!2snGS5@L
zw(VHh-(j0R-1{pTw!LE@-ajzB?IBZWUcRXfBJSFh+~pSAx!=;udVITapkp)1piRB_
zo>Z$rCvF<SHB~b4_)UKYX5)pO*Jf7_yUC##NQTlQpqp{8IaB~JEdzHG#`;D))PQ>a
zyG%mUr!KLDa3wft)J>s_wcmM^bO3`J5?iZveQ(wQeWNJ0x-(UFD?0}5r)LI-#Oo)K
z^+HC8vtY?|(^`ERI<3KlJR`*XbJxM7o!wu}MQz{P?ln@}9~;)*tu-`9OukzZCW5Rd
zD-?1@X=t;3xXN!{Ie&^{T61si3$$&l!nu=auE!1VI)JvKu?jOc<3uSNLgYYZs4=E{
zqGq1X+KkKlb46R`^d`(VfsSpyqS`G}bsU-bx{4dT*Kkk=-7x2?+W3H*RWI*V(iFE?
znHXkask;SOZu#|6K$&}g_~U8}_CrRm6bcxXYHPT;18|5kQgW*tXi8u3;2swb2Zq(h
z`}&BnUhpHtdp_N>Vz(JkCA)7cygCEdKgZ0>%|}arC13t(-1=j(anFt+xF2E5KQn&m
zjpOg#;2@S<A7KjzE>}-S6`tFJjPX8Nu1$BRzWTqGepKA(O2*ILsI)vgmf#q(tJpG5
znj?-{!J*P?$8CEcO-9U4zqW+Da0?~6uyKc2VU-Jae(Pmt>j@p%$dc7Mej92iI*Fy6
z=`Z$hZ-T7vHb(z$3(Q=q^EVjp5xz)qY~g3~W#$|1P&BIlV19S3D7m-QuM&^DNeSi(
z$;Hhsr#m$wy`AFw1W+Beccr_(v4aK@B)%Y9oQa*?E{R6I9$fB9cI)DxrTZgW?@JdA
z+0YO5+DLI^lNQLOF=u?|X;x=euw+(^-Eu0%l2YILys+V>ja`xWWgO6tb8#%6a_(z#
zG{TeOCCe5d107BBB#!xq%_vD{4XucN>=s(9e&|H(BlsFmRIBqPQ@%Xh@A3jZaxRS(
z&&DqM!f?;ovrCq0rZwIl&Y~VrH=gc^D}h{(xe}w}&{SD%{Tca@e+_3)-?5t*lO<?a
zz;dO^*0(V4>^Spc<Y%DWHz3?D91&^VfW00PKfMVy{QZeCuCBtojr3~sw&MmsYe8*a
zD&*2X(r~()&Tql`fl1PLjca!JFD(`3VzHr`vj_=o@l6L#_BYtwcBfD4ZEl;5g!ieL
z7&HZSgvM&N#s97)*UmP2$Q6OETuyNP6x@~rj5M*A+s1~*-)?)Jr;zi#Wex|-AAUYa
zL)TQ4jtM-ARB-MZTaS8yo=D!&;^XHKeS0%3Cem!RD{_Eoa(h5rItA$DSXvgd{r&lR
z#S2V=1=vWh(aM8WMeSAl12F|z)2`xWicX#TOqj>b&UJm6pAX7odS<@|rNCID_OVHQ
zRGi7_AU+srQ39AX5CqIrek?jLB#bW#sGRLPg&*j2A$EM=K~W%AC~$?hXz^Onn^ISt
z9=EuGh<SK+Sqb)ZvA!o1D{*#KOp-ytpux?}PT`X{dn1sOTgyxTNH)A?Z<znB7#|m>
zrqSKkA6w(do&E;Gqs5_wFa<0-4#LPMJ+<YJ2o<@Lu2<HHkV`VLyYk3O?XgMIs5r*e
zTUFMV6^3s0W_6xYDAn?H3h%$ex3&fnmd7$aXJJ{L@`ten0*~zB1N^;%Lfq7FYXmrJ
z*?6n}eO8n6tA74dbZ(=Pm^aChA<X38*>1|GVG}<%ejQblLcnz@yBmt32=C6_MjFXt
zHm4;Wd*@F>ea=h4ExoF~V3BvPMrQhAig!8L++~yK!#8o4L1v8=q=YW%USi`rJ%>Og
z-!KvBsV4uO|HIyUhc%gg?V@8J#z90BL<B|<kS>DsqJp55(7S+w^b+Zv48uq-(mP1+
zAiWcmUPBK(N>3nQLJ295{W9~Lv&EUS=ljmSuJhOTZ^~PqwVw5?df#zvq^IVf;DP79
zlRKO{p>_Onb|E*>c~WloApkxvFbMja59bmC;M_`HuY}0?MGZhRjTzN_qQY#vv;it|
zQgc>4$x@l54bEGCXc9&?p42I6x_3*pI0chl2rjod!2zyqOrNx=_CLNd-KYkWJ9c*l
z>(kR@M)X3L#R2TuwQ81(>*}HR5};B2P`MS`4^6M((Z@zo*8^I}IjcuavrbwQC6IRc
zF3`+>GAI9uyQZ5joD8?h$8Qkd<zK2_fkoW{KsR{Go~iXIt#|r+(P&|NIm9@#^{_uO
zL?qPCVb^`*SAiI~`$q<m+)dq8*f|DnD*Hz6BOiRhEyKdh#YDtzCbHeSC*K;tfZ`E*
zz68X5xe{=4+q7iqxaalQw5t!wc_H=cCVjrl5<6nZ5hO=0^TP4R8aG1-qEQR77C-iz
zwL$?6ZXgr9Z!)FuV6-D|2g_KAwq(K%J5(IQKiGnpeAl;R6LWVKu)_^kqC!^U>hoG6
zKPmzNoePSjn;SXy`(Em$W7`kgYu^+{fOMt5#f1#I%g2adDg~eNnfR?c1C`UVpxh21
zbGJhcc*{KN;xc_lCaehurRZb~p(a?#8--^hm#H1)?s4a!&8h3<_7e659CbV0S@}YH
zMM`O9u0m8Mc2(9PfPy2{`$TT)Ci+y{f*$GVJX|KKlN~$Ba$1NF8TIPQYJDO<k`3it
z@vcHG_p=YXG~4{~gy0JK7XBqh>Ms7DoSfv^)bX|e>P(b;PrX9i<;yRwjeC+I3<6@=
zI(<Y^La@r)<n^w-L{@BLKZRgx9xHofXhVH{)-XI%4oU|wy-yiBM1{1#JQ3K4i7x)l
zD<OnbR_W^k8mo2FYLPFP=%QhdD_>lVuk8vfM(UU}Yo{_yp|)Gx<d@dhMzi7}#wAL}
zB7F=jb#xXhxOg9_HVM<`a+vi8#b(8T&P%o`vO)`1bK6|C(RfR#9VNF`E&vh_oJ>bk
zAV<nR0YzK#Ffy7HzCGCuy@#lsBuRWuGMd~jB4s4t$W6ruH`HO#&<z+AV5q3nr*36~
zR;(c|<8fNx#=GoQ+J59s-kIIc!4fPNxh6w+8+Q_Ic}7sS+1>2iDJZ<edsF`>tCt0i
zrh*^7ts@c9X4H6hll`5rfE=_nb)#x!c&9hW(R*@SR-kutHhl*^+4H~#wJblBF-?6c
z`}4~PubQIj^bXrYULE_kY763{2+!YR#)0Ab+ITmqAIwe$2XElkcIYnh2Pw<6v3O<!
zBibhzGzY2J-j8&Adkxn6dC2KTymxxPQ}=Jsu=7dF;|~Qiy0Mh_Ge)cZS6J2NrfNBj
zYL=>zGeK?ju&Ln}hqHSTGDpk5vc~V}7Y+xWq}-tEO&QvQDISkbl5qLtrqxWdy(Djx
z{@wH%NEz%bH{H=ViiEd8YYY^+!Hmj?J+)3_$NWkPL!KH&eVok+%F)5Q^DfJ>&}R&0
zLcjb~`%;T)4^f(u&eO8-_AzX<<$>s%z_L)XKx+<{8egp|pYKkM^T9|5gGx}XsN33n
zune<`L~ZJUc~qt7^0`eDL#_fJS^b5)d7VWMj6?yXCF8>I;pZmZgeJvHQ{(mZs*Sv^
z#-w3_Up~x(9GDoVxZ2^JScY>&V^gIMo5>z8FznqaVHu9Yg!OLbXLyU=!2YRyL+Q#r
zGw0n1hjR&n!}`RUn?;AepOKhMYV_44Y?(7>)KztthUN>x^dS~PX*#2N3g!ydb94}V
zx!Ay^O|P{}Q|WR2c^p(z6<)`Y6Z;V+ZQk8gr~?$Ci222CQe^porFK1)4niGxx5S-f
z1K)sGnOz{Gs^j3ugKdTx;vhp|R`vWoG_lRH8+;KJyPjV4dMImoZq7tl4~oL4uVUW<
zUwO4rJGDDuW8!x?YE*rOeoeO-+}h&eZQX0tgczIYJk=ygygJA|sDqSRmFA4;bCI{l
z+*Qb<FO8U+S2FqpXA4#tu5aGU!??QG6o$#g2R4MJ??w#pM^5Wlksd3rZ}6%{gcRU(
zyo`5g8nG%LskCe9N=(8PK}m;*CuYal?1<}W2w3PUKaeyL6Ne29WZbUg)U}CY96d;+
zhRfZ)J!*(W2ovQbr^nwx50j<@7l_#AB+-Z2Py}PTz>X0RB#7;RD@jUPSeP8fVt$FV
zv<J_|!ny)K?J=8)L)7=(-_|Ym4VW!4dvR-5;KOo7o{|)_iLca|_gde7Q1(L%?DXOF
zY~wHIU~W)%+1|l<JIl3E*F#sGGDkIl^o{*w<@UC%<y}9AW6rNw?<bW$f?O56!q@W$
z1dfh_N!8?!Pl$RccKZ*LY5F6J&*C*EN@6ubjkY8E8Ccsi?ZT1ctbXc~2O-4-tHKN;
zGY(^B`+y^~hvr`2&aPN=mEILT9aC`=caspkYomrsnb(nd)aQ(X8et~;d?UMFN-cb4
z|2Y7qC%ulZY*eeMcHF>-k08pKBk}maP!u#%HMEWyfMk2tvVOf@Hw{SA7{j*xpnOaN
zg((T|9~6Uv(!Y&w+8238bZ#YrQpv?}3h9M>0Qv*92$E{gNMZjP6}%q6Cm4W}?=T7-
zHKa3IC{K$FrMl<=MVm5-IYO$a&sx!qY5<nNOtbH-r=dHzL7MS?Ey4NY;o99uk)Ab1
z+i|olBzAlDjmlo@9s5~z7>?maGu&mhd>@{BGK0)W|NMH_gIY6?`2MfDd6yG24egk)
zd-Gpq#A1YP;)%kGaVHr~7jnL#hBU;Po#{Ho<DddoOPNX*{d@cp0CLjaUI1Cl*L%ir
z445#l)q$(c)eWmuqD|nfgXx>)K#XQ|Yu&~^1t>%Drg&-?s4d@?u~@9;o@#7$Rw9_)
z{Uq6dKM(@S7(!+wKgS4ZVfia5+N1nYxlW(__!a3Yda1%S{&KU=T~0gPp6sbjob{Vs
zD47s`W9ba!818#rz()MG9=^CR$9k|;BN}>{g?)=!L9ixZI)_IwuOYb>$jUs9@!3W9
zY#QXh@Y0sNz>ZLxcveKVY~pp;<BG@9xH5190O~xny>s*pHF@h@=ju%!yeYWl%;AnW
z-_q8d)}OC2H*Q8YF`Us;uz!M=80F3kI-}GWH_R2UxO4M^Cg7*{Gve2MJtOpQs8k-u
znjPWSJ_IUy>*6%)6DjM|6m;jzmI>BAN9o8lJ)CPmIpl@-0}_KV#IO<Vujk&T?%h6S
ze9K+WMoFa<+QO^jGxIBVR5Fl3W*!MwB~4~R{2<cP1XfQ=kme?T0e@!`L};480ZtcY
zQ?Z|ieApKjH_29%H8mQQfIQ2r81?Uuxh1$uS+wu-YBzQx5fPB_La&Ej;HV)OK|AY|
z4V?MUg!U5C#^&7Zl}X-mZ4`C&E5tjym#`*V*1gcpk^zbAS6>kfR8cM1rj6#p55IW|
z9K{4fxN9a;21Q1ptyAGo=E$8cuD>aY&0>d^&c$a=CV5&#)cL1iiTdl07)=+cSj$61
z>qhtyKR`4(-|yJIHFtxr#N1+F+P1ClW)GYfx>cU8L!Mtr#bs)+#aA5pI~?wykC90p
zz)q*_G0@jA3kupznHQPLv=3WNGR@0Grakkp@ARdv2beiV_(N`9UfnBBf5EuFTRclB
zF@9mzJ#ow4B&3d%p<m-`J#neYSTzJ6BzIlOSk+ZuW8v!l7a-6p^|RE}Rc_@mof}Rd
zaA2$OxaF6t^BeE#)p>|?+*0EOlU58Nh)KLC_V}UMJ9kf^7L)a(oAJN^X`v*q+bIYr
z@5J^kip$M3A}7TQA+t|@zE|2_@byp<x<*}yF7zMnM0scc9#%Ttkfi6x`FIMn=9vF@
zXl8SP%Pwk7#u@s#T-7z|P#`boz%oC!d+3?Mpr@((itmfo)omcCXBd-Wa0rKQjt8Pk
zoSX;x&2c2l+dGcdvxmN1b&j1|g5zbj<<HzpVE`b97t$&<DZ^<bRTIF8=<3IM9uJ)(
z$Co$l%wa@RNS-4d=938$<3qN7GwF|ZCju5@u)UmJ54@K?xqNElFZI?+YuTzB9zyne
zLz$kl(B%zdBLOt#EM$^zM#dB{X^#qkT&x8U4fde!K6e+I1!Pw2bQ>KeVNJ26k;%k8
zqX=zHZWeV;4pM2D{f&VY$Lg`mRAi6-=a0Lpc2>H&imm_S?mU8?MK5>X0H+}9GwJ`R
z+ruxfFg&?iAn8N@VR{pC`v;(uukcfxCTC1}s7TCqbdXqQK$Bns)a#uqfJ0nDSYb%K
zq+-Cu6Pww#xAH_vRhlaspq126mf;kk?OzZ$+IdTO6l)Td?0HoGbpAS~Ab7Drjq&EV
zSm<g@;R`jpn=i#VDqFa-2^Q-LS3W53l%`EbN145^%I+TYxgFq3ADcKJvP)Ni>F0N|
zDQy^u^JVgDc$!t2*@sggLUf0`<tI|fRi!rOyv327wR71<JD{It)8WI<Nb{1yU!5L2
zt@M%M5}{=vBrje?XNG#1$m}ZE?ml}F>pm%Zn5Zz95G<l#S1KSQviiEcpTCoc6#Xfc
zD#};<-qMGRUXwP5h{A$-<DBusTlVg9b6wPyv}>yfGrair&Jn@<ZJTG+ZOkN4`EOt^
z7S>m96|@OBVpOYql;vceKCp&(i=W=DJV-#nSmc+a3f$)=D^9%5TSLG`j&kkXT$c9<
zJ5_-LV(OZ9>S`E@31j*`U=VS+i{{JN;X$9ZDRDwN$5e~o`^1sM$z{t|uhzLjLFq;i
z-RP^8R^877*oAJ31L?Mb6}dn|_!@vJDdUYh!``RP^mBtn$7*h@%eDS5cVo`S{K#VX
zB+2SQ(#v+?kD^Nm(SS4lA8(%Pmb;`ZG#Y%HP3`_qzfjz{e#RUbr~1WdT8OM$k~*%e
z3fmscpPTXC^lVElN1?V+?q2I|mW0$4y%`}7UBbcsjv~}`BbAjROY(W1A54szwMX51
z5jUff@pLVaQbq|5Zumv=77Z(^K6L~8xp(SW+M$)#;uo;(Y*)yoV@Cq1;+Tgrd<A!p
zLpO4ch_~({`H&UN@E;>~!e*Wg+uD_utO8^vCP%tRd=QlNMyzn;;w}=IEdfobqpJ4;
z*E%RvdK{ExD&w1q<0a}|#DkiV#Rjf}J<+H)1VSzNxjUHKJF`B;&xyD`7NC>y&gnPJ
z%2I@%Az{rGtjg-hdt)FW%X{3t9OCZPN4*ol@7+T7ZIWyb{i|xXq4(aP)hSn_57B)x
zuK2#>{8B|$bos3hAFm}nJ&RdjW-iky_I7`et5{q|wouI@gdCo+$K@}MzH-?Ul!}p-
z88d8sAB%}PXx*Q3Yr7x=ZSrx=EifQlRD(V1P|=K#osJO}7OpAOR%lBtma;W0wO?rQ
zCCzAJyW?%drL;@X>9z}_fm{K$sA66^GxB=(F=DYwHtYswpQu}}H#s6XE$F09erA4_
zSn*=&Lk?~H$1Inp5b*jkb5Z1rY12f?A<W{}+E&(9#wbYc?4ruNEbF=YJsnz?Hga@-
zBjq`Jb^6;NcU&IDp;mF3LyMj0SOT6wgr8Y|cJYuq9vYR|P{r_w;Xo`}(T8FzU#rZ=
zV>bas<oCdhHFH!&Em)=6&Wgg1a#TUT6!E9~J=0K<D?;>8yT+$&RTh?K(5XgQ;Vv$x
zm8wI=-z>K2Wat?ddXt_kI7x&R01Sh(b0+Y{^z-pk%x?409Eoj{`9_z8;k>EKcnbuH
z?7kaxwXD9#WLIWC-Z5d<S)C_}Pk8VXLHR}Vxa({)VKzA=Gqoq~vu}NFnfCa)=~Hj=
zdd+OdU_Gq!VY1BKBhOC9x8vTv6DCZU07gd}$;d>2F}f*x1-?!q>r0ReCP7x#JS&?m
zTja<^gMv@X1&RbH%WS`&s=-w>Kc5{$ej<#S2e_8{O?lZ`n(9Ex94Fbztf#*BW}P+k
zg)8Ehc77Gaq-aXGVH_V^^fYNE>^FHxR~G|BRytCQ7iGUKSvTJK0Lz}#bYp>+L0<|b
zzkDDu`24bnJ?W{q3qHqf+X7GW9V>oYVn!Q3wIgx6afw2Lxc>Gb1OM~6z&WC16F|+`
zKU)TmVD}<*A6GD_m=+p7++OOxa4Gk8Pnnl-f5?^f9%WXjF%Giq0Dm6zFx4*_V4<i<
ze=bmK@BCya_5fl}7)_y?OkpQ^w8!M6)EQM{#sp$DZ5V!zv;z`2%KZE<j#O*&M|(V5
zFOLTthzb}v!QF4o4seOsH2_d1d8IHt`$UAVP1s5tv98~&#ce^=ZTNRhXoa)Gk+QXu
zVi^p0ynOF^!~=pPd=}ftyI~}bw5_O<iTbmps@;J_t>}g0;@oV@V>^*Mjwye9{IdLV
z8lFl?MmT-lV=Xn&*U11!o?<genmU4-TOVIMT&izZow)<^8Nlvz4Ld0-i7oRg?MB#(
z<>#egs0Of<I_UY=53`mtkJfJB1N&}|YYa<!BI7HUV++jJE61Hd#*Pav+vNFJT$`&n
zjvO1Ch!9LCLN`{9>S~(FG>AqF|FTl)+EX$ox0nDMy9&NHVQ3OJG}ajh@9c9UsK|M)
zMUz@oEaoc1E0}CoBVG4H?$&Ns1iw9J8&4rUwNoz|_GEE2g<`wDL4P?&5?9umHEo?n
zcvA`!xxs7<-|=@HD5ISNvH*pzmp7GW`%^AvJ7!aGp_y2j-DNwQf^Rfd<-|!@*1A@&
zRvzCfiLfrNSU?hH<|pC9%B1-?BUur6Mxij1Ga{`}+d5~*4*7rv{#qIi6c@8ra-DyB
z1WGAn1HE7xsQo>j^vZc=Q(eg2wGWYywBUT3KH8*t*Q^}()J3C>HeKdcWvN3>VmH?0
z=?a+xHQ=06)zWq+K}NH19i?nU!xqG3Z?lg3RcCH-+UDtVZ-K{o1tXU1#?C(l&FiO!
zKVIzPmB3ASWo=oOERcgZb|Jo-)I%dFAV{x;mEM(a^&IFTZ*Kqmwa(=uqX|gHgc!k@
z)!?<zNJ+uvSx}-YS#JJY|EIQ9<gaWBBDaaf-Y|zk)tVT~JNPdtRQV?J`{=z7tr_t;
zJ<7bE!pWe1Y?A*<(Iql;ZzRcRwKSE?x=`&~EL{;oWe7fU5%)k=S|eLp83Ok{$8$uo
zN3ZC=U^npMK2oX9T=8AiSD5AXJV@F!`#Ffu`*jDq7$y&}GbW`+pbocm9Y0sFU#8nu
z)vM3L`BxfwVQYfM`t?Usf#>7rA<xMf*R;9=sx!WACHYf}fAC*We1G|xrVXqov-4AB
ztzv2vnc>LkxBl04jGJok@LH#yCLdO?K0czV`^qc|+;5~^WrQ(YQ);T2bb0i=f<bY%
z+qPdMhRGPt$+59Q<?3XrQ&QD78R|F-kF56N(s6e(bCfprlSa{KiL2D5gbKJfMI|@5
z6>M5M>zYUiVw?qM&d2+VyB+^hZYPs&%q=(xVK49V;WG*Etr)F<#S17N3xB*V_Hk)3
z7_6{zH?%S)%h_o_i6y_uhu`h6k8|3(jafQs2dMm!=ewzza$a3ygDyhEmRwzly%ldc
zo~G%*6X;Cz3)jI=l1&z`t|cnvp4gt1rUdj-&1_vPSwY?a{aS&j+_F}4>(lF>vC{7Q
zo316hDL2s9v@E*DUtaysI{jHhcz2<AGWyxgMqwjQT$HKBJLUpOnl7#QP2m^s0_87i
zn7~@nn<Y)Vmd8Eoecdcu{a)>3d-5|BcZTz`qS5vMPs#lIh_{ld{YYEswBZ&!V%%Se
zDdCE-+x*6I{{?(?#6kbD5>t$^Pp3>lg%NQo&JvLn`arbmVfX&%UQ36n!|XyV)nU#a
z-UNDQRo|Lj1G^>ODs@Dey4Zy5cj-wOZhOD7Bmtd?dtZfE3KHI`*ET<DFDuiDn9Hch
zsx|C(UlfXy5wYXhtVb+Pk7Oqt{Rw3GS3m29xW8g}!>nsgM%$U)Bxyr$_LE^5{*k94
zoYnZo{^xYd@r2xP48g7H_+8VmxyaS}-G~*}W~)ZM0}G;Dj7jKnA9Q^MyvDrQ@tcn<
zR!*@Qs=6kiTr)rYCD8pkVTXEu=D2wgIO{GXaF%SNY%SUS91gffQF|TD4=ba56%Par
zQ!TJ5q~%8ql1rH^*g?+ac#>dxthkfU0~Rn;DqGJ_<W`2ZUD|J6n1^mG&4UzX5Qfp?
zpsjRffZe=r^QY8S7Suik7e2+uAPc6OZJaP78Vi?u^$cL9_M^Ht$5*c+B#>hVIXU9o
z=I4d!16q#C{aT03l4^n2tJK%#=kRZL$O88Vi{(7ZfC?=y4R9Vm@3u*D!jR$Qhw?UT
z$3tdCsDqhXD8YW#Xc%A%X~3)_zb2LC>R|N_^gZO7%xs;VI;puc^frYFNtRnl{l6y@
z(xbG3<a-i&5(l!|q{aY2-*VIve~D4Ij=DgEyMqmet1BdNAL{cfjCb>0b`FX>sQB|9
zrt&>cT=2CK_kwlfMzGF@xko0cejekOQoBb%9e!cVV-x7bB!S#<4Y!QTgoIpAsW6R<
ztbA>HCVFLtOWQWJUh@b0=jsch^OI9jMFuETx~g8iKcoj$_W16nozMBJJK>MSM|&x)
zhhF%0ZZJt<JHTCX4IDp=kObJX_*wj}0+fRD!p40fhppnKo<==kX86G-f?gP&K>^Y{
z^5wU}Oig$KSlLTOPPJ3O6<yoqBYoL6-h-qK;KHFiF#yiG#h?M+fM@>dC^IH4SXX`p
z9MubcJs74(6Ahkk;aSR|M*B!u*Xwm6@11P8`{QS?Y36JB6h^eJk`oJZRuBp{d0VOd
zC<sT<erm7Tk)zEVf9Tsak*~J41I&r>LQ(F{+i&`WtMiHm%}XmMVHq(*`_RJ3+Q&dL
zRbpfAIUKM#67;GZzV7?Anr^<z`!JOm*h^`735dib&n4T-<9ni$@HJ)QSkLazC^;WK
z77n!)=nZCO^@DkNXIW3ziO5Kg$DKozwie*kA%PlqxBD%mk9(d#>&2mqT=Oc&C<abV
zRXX<>Uzt|1FdH<^AGX@owC{--f9q8y4(K2{X+H{0Vp2muni2-e2a5cLtREiq-P%II
zH+9w@a?ggz{8(+OcJcJA-valWRjGoWEe5<)1-8oO%NJx)Yi4X@`1yB=c^Kcs0;WCk
zm~>d=Gnuwk=zvJZ>&?l{VSAu{)oj?h<fuZy6-LA<6M2VmiSs7qO}H&0_jpT<`NpEl
zev`92E3?h>@b%AmQNfkc>=t@r`F*@N{porhspB)y)Hdf1nN?I=oHKAL-NsGl=Vo6*
zJ3eW$Snxy=DCyjM(&8^)v6;14ZckUSUw-&HXDM9NGxdV6(s=tLv{d_At6i{}Hfz`G
zZ1a~5`)xgQ>)jt~mtE<NOwy}huCwB4pi?$=q^-SpgHRsqGY*<jxz3#<)f2#Cd@CCK
z!Ux=WiKUwmfNjt-@ey2+9i4Lct^dm??udGSH8aNvWYn?K<;0{=g8ifyoukus{W79N
zdr5#J-6sq1(joLfadm*2D(YT>Z1$a==hWcfLz_;79H4>4E&)$VDs4}|v;3{EO{>so
zb`us}Nh15~452$Eo7@r!Cd^R?UhAFQqGJ_8+Mof?rRxmrdeHpW&9d2Lv8kVT`K}C;
zC*pl%QBtn^4u*rw6E7A&z7J#Gsu48uX~yLxu#sKv*CZHO0x4wAuL3&Cld!d$K}mbX
z$%l=c&4F0h3@>B2Uv>peGirAJ27IOI>mc*&y50>;s%xbQPd!&u=nXs=%2A8DA2th-
z>BbJzF4Kn}aI^3&U_4eMS5ub+64TCOkS!yR@mhOB5L<YEjGl)>C!M=cOLm=UkJ+(I
z`SA{xFxkA6QYdq5r=Jl5Pn&TnZ=MvL^{reeQ%mDHRT5XuY)?0=P$^j0^LIfLGII+U
z_qr$M#u0Bm1numXUS}50s|Fv*$r81+oSt;O9JCun#O|awRIHqjcLo!c%ftyDE1^9i
zcm3^{7t<W3J=A40lS}PxwMZB_!mM*DE$@0rS|Ab&Iue)>Lchx<NIh8HcL$fWR!mLg
z<pfHa4bt06->R-ooBtf`?zD6U66bKV&p&?fMTHSvz1UXw3Un*^S>IgQi;=_QUD6^+
z`ZFW1d%eTwx`fSHeSu`4+0Ev7Q1RO=_YeMjH+m^(cY^g6KOQ<Ge5wb-STYzLo#pg7
zmE1TtMUw0Km~!lhY05FNnhA9RA6kyWSeckHHNgX{{faY={REYKd-h0yWXw#$$!XyB
z<4R}TsUWfW=327AK{u6bw{&t^yW>D4U+j$AR!%tdpg4vPpT9Oct*GNZujOWDv&=wV
z-g&^GRvO+Qo$Y54Ta^l-3X!H$aM1__PAP*yn0kG}IkV4YestU!n#YfL5!1#d{V|tO
z@by*Cu0Y!Tr7a%bCf9jM8u)fOX=dwYZUCjl(t7^Zt$rEH^;)rEAH=R61+5v{Lf0r&
z&`~AQbrIh&4ciHx-+4u?8r2KQk$M31;N7J3TpKc(C=+E(C}0)anP@U|sY)W3dqesz
zGGz0TkB)1DBiJ@=Rq*5pQ~Din7deD-@y4iO{i}?pt6|jm+I?zQv?r#Tdh&^3SyM?x
z$gHT_{xaY&ZK0g!d87Cro7elyrk`TMb!%;}-Gtl<;bK-+l{w^3jbV<QA=Z!8eW65t
zl4yI7caAZ1C5}Ou#KTBUaj&+@Zn3xq_KEeDg}NAvg?kY{yj(^mLMsTB%HEh}8EL(5
zJTcuo+{b}v{gOG3A+`ufc_9*oT!sb5N{x>jIF;1;BuuK2V=nlGg8Up@|Bm{i2`z3X
z+Ruy(gqI8p2=U{wsjM~J86xUhU^k-UNUA?07A5-pm$@L}1!w>T52f}c_7CwYe<lC_
zFi!6WE*R4+lRw`bzJr){e%xus>X6n65DMpu;qyyZM5M5(%;s!}X9vr{G%S1(fA;4*
zifzBGs9cn8I5RX|>$)rjvY5jhNG%eoQZ1tx!8tMeLGp%v&Qg#7ErH7AK&t`f_6bCf
zhu(w)r=`MrT5gO|n<rr~nC@xZ!X9D73g``LHb-nUSY|nIg8R5!(()!dByPe3vcVW!
zlc|RK%q6SKbCTUCVWR7-J$w0r5x#n+NP*+X@<c(v**;~phE6Pxe1CgXg0>j16$9d|
zp;wQ2Zqtu4JF1seeoZ3N4q*7HIi9Y-yYc{o`bxb>RSgd(#8<z>Z`|x)dh587Il!T?
zG9%r?eJRo9Ufbf0=J{Kc1y3b&9G)5v_xZqzYzM;dJ4-OySyYLjr^*DgdmS&V({|WS
zVtv6>QmVO2U;O#08gx?pS$L_8h>?e-=QTx8`!ocjt|`4;0#6^WflX3Ua1--=X&<&R
z`%Sh2o^VKfhN6jA_eV&&aSK5TsZB+Q+6z*EaJFI}GpA1vqw2)Tk^Fgv!mG)_3<Fp^
zzB;Ce*E2A=N*~13&=WiMYJh2B^dsRX!L^kL?+z;#^XCwZU{+k+m9os<a=X(3-gyHG
zNl_i2!mfVjoFgy`D7;r>hXj(_Zf48YW4cNSkY+RJx;@3K-`=a0Sh~b^t{0JSXqd@N
zF3E*?sF3LytGcp-TUjy~q9a1t5OQ%inZ+TQE_8meG!3ohLmeVuo7x@N00NF;R^&pe
zh%S}bF0ay}%g5%#g;v8egKe=xB1^Q;eI)0oj?zVI%TBy|z$S}B<n%2^6}jW|EFCQ6
ziU@%E%9ME{>eBUD1MOXv>HTGW`h8ak@kBsV3Mw5ik^5*#lF{>m5QvplRkhK`UXbIU
z8-$8D5QFh;9;s_pgH0YO0?|5lUm$r5C26rVn?BfJxmyQ>U(N<UtYjV$Vk!nm?s5`r
z<km6JOg!Ae(Fd?6Yp-K%S%no|T*E8`>&UQH+E|K7B`j7|yOEzXS9IXO0@X?!aksYz
zMyEhdii>;JY;(Bynj5Lo+E#kk_S050mDR+!XB~pAv@MoWtX#KdCZuU9#Hx`#?x~f)
zD4l(V4ac|BpZ+tPZ$6?;K=lp|ewl?D$mgn3hx}u|EiNV2DsM|VOn==Kx}PR<t`S;l
zjG0c4XWfj^b}|EbG>BPHCtKM&(dzt#1%NCWR02YCK6LY8Z;j8fbdik3Wqkqi8BHf+
z-T8C`)ShQx4Lv@6|3dA@XnW+|L+0n0FE7UzL6V=N#KjZsJGx+1OPe01w5cE^2<*3-
z_Z9frY4MiIaU-PaHwiG{XfU8y0cQ+}G+YPA-3qCTWzDz_aB5Yv@Pz^NQ<(|?QI(Zn
zHEW)Ny!VVs<ftTiHB|Hyhg)ZXQe*<nbDPcG<zzNrgNKyep>Uw@yni@!nl>7I?^s9f
z_+WO4QgSpdGRni;c>035IIr7Br08AXhD|1qO#4gyuJGRr=OY{MeS9*fH(5VXmPR)a
zq)=K0>_<9URHF&#tV2OdcCMy<VBDPP0_EF%ylkAviRMs+-@xNiP4SAASpz7l2uxdH
zga9+Oud(q){X<mO>ve-CYQAueDP04eir@o;b!B;}7<pKqXE%(}bszAbH?N3T(gNmX
zEn`Hy<d*~)o<WM&%kiz*vduCbg0A+FB5Sh*WE;D)>GUh=TH>eOO{3%Hl5~xPs|IWE
z;Wmq6Sb*Y{iR~Z)UYrmRgWe^}0MVVhLtEhHKuNaK6JJ>#NWL0>2?+O4-EDAYO87)q
z&}r^rm<i)xx3E~(m{VTked76IfmEWkh~Ilo>$_1d&&mBRpUzZdjy<N*y)?GZa7$)W
zkCx(E32ix()G!c;jl5|AEDhc6&KeQKu5(eoE_W!P3s<<b*^<B{SjjkLU+e0zW^uXE
zbKD$>Z4S#6>tqx^;vS1V0OP_!S-x0)@EioR#>ia<&6~hhd*bTI7kBZ$umMB7P7n3?
zU-Ku^*()U0K!2HCWEw?L2BiIfDGbg8YJ_dcHjb5*1DpII7)Vu@*w1pr64o}os^gUy
z;XeomJ}^)`Q$~hz5G%~$x)7cjboS`GDQotsJZa{j2ZwrDXnS=SRB$2@{&aPk5E!#s
zwSrI?-BED$9kBSJuaDnzu2S*X7mZ+DqZ41C-e;zpZK@<h32dZY9VuRs%cWA-qZRjI
zMH&8N52r&lrc^{%rS{kw?ebihanCf6Wr#{u0*YT)!+1c{hK$W~b0?zSWqXFV+%1eb
zFwks6;H9X6@W`!S*(enq&W20gw#9O@ox|(Z@bXw!fcvXUTT0tr^<cHx?5m5ADKQ@<
zn{vyh<4oYYiTC1KkHanFaq*UG_$S7N-25S{gp~vmWR%l&(~uVs`6#NPy37|y&{@oh
z!SI-8YRS8*l{DXuy|-`CO^Y@Nr+d#?DqZrD8?<uTs8Wb8PqkVGq8{s6A(&H@>9L=6
z#oHjE(q@~58Or?-t?d~%=Wz=lpX{e;SU~ALh=5why3T=ZZBmN^Nw@89HdI@?Q_U|w
z7f5~Xy>kIkxsS?6;|xjf;w)2;xwotK8uGVKf@quZzcM8M8O3A2^G2hu4nN1xraxwE
zH+!2B%p)uh%;@aI)wtVgW_acu@UEJrcl1>{HH>^2DNJPh;YQP+6s0d{3iDCjsApcd
zwV_GbVV{8*5u>l$=H`fbX*j6a5VpQ9@HHy&XK6@b?A(C#ZM~~#aO7bkFb*QJwP;|~
zB90QH&u&}sO{2r45W1UI3wp3{7Q*H%c4?gjJJeAdLnM9BnZY!$FzxQ#hZtB@Xfxlw
zj87fT(6%k~_R-{_qj5v&xC51Rvs}Q>@cqI4O^sz{!PALEpAJ{hFZU88vcC*IG4nG9
zg?Y$jN$KwHK*T~e@*2k;&uEO{&W*YO^qlu*xwECSEC7!~bR~@kI6-1fWJ9YJw&?9;
zLV8VpRO7+#6z)82X=*SHCjg3K_H)*&Nf(n?Jb03lQlW)^l%Jz~1=`x*KW0Tz>lD1q
z4UY2Ky@YxTYHsjy?8A_Mna$7~Gqx3Hp~ZS0>6B%CZD>&gYY$QInZ=VJMt-U<TOKUH
zJ3V?bc{eSW9$MC=Z8``>wk@nFr{6mj)C}7AOG@;hR-;rd@%ieFi>gkOsm6v<`BVc~
z4O6F7a=&y(&=p9|bps)V-@dOokf+Q2_@(=3{r8|bVRF@!`KPFQyS5f&%Ym_nmAFNP
zWx+`@mjYdXty)mVr(z8zKV{GFS1y>fkmw*JTE&~*w#JI|3~H6q=E4uSi1P#JOs>V<
z98vL#u;GYZ9v-SNXFPt^ZQC+LHPwqYx9WW6(x9jW)ywYyid^{K$tXeWURB#n><%TM
z6Efv{NW9~ChTP48*c`0MKBZ9$sZO{j1L}(#pyub_gNa-;(y`J!nMn5+<GH%S1GOB_
zM?p=#Zs)~lG_89ML&+_oEQnu>O^g|U_490-!G#9nsl_T+>RAuM?j;yzf7uC*kU)`d
zMpcAPH#_()!XXMI>rOn7utB8deoYE4kh?y>Hu|;O)NFz*$x<=!oTl~xKdtm_Ob@ju
z0-wKaPYq1sLC>w3(G8I0WlQvzLw%dG{2u-MK*n7r@=#w6G%t34={dkJy>IHNBsBlR
zZeE?NCz+wAmxznUdFuc~!@eg$`f((qihVqTlGl-Z&7@!AVBbiipMw$f&r_#>wt%?K
z&f&HC`Zu45726!0gw9U@E$I3KnZz)bgK*J1rrL4^=IrG|pl+db_-oLxH(1V3s*E3h
zZnZ#u<?gSfkM4fF!!kQT@-25+h_nO5>^o5Q;kB5GbW*e2(cv;Ye3U<~7ZL<{XbQMo
zP~^K{I(bR9rDLk19F2RI=ul<Fz=DkhuQ4!e&n=DPq-yqeT#RY#DPJbIq-?{}^qo?g
zVfRrc)9#toZuzHADeeoy4M3WANAfE|!$j3DbK|(`^`bG-k;=oI05!~CTa1WCsWmwq
z;gKJ?caMI-#jox%zl%iR@@i2V%UXjOG43u%BZO+yH1R?av>vk?&as6fT8nl&%`70m
zqT3$vX=zSl{*V-G(xy_afeO5wZ0$Q9$B$@1R^8m8V#!*er<rvJJuo<lk4DDKZq-3L
zi|KL!h~S4ng&LO7&DXPalveWrtgl&MJ2eSe8>un)u3vsb?(_794_U-;zJ667^A0SB
z%w1vd&d{Cy1tBUzr`UF9eeVirMR2q}>AjOiO>1+2@5Ucqoe|%gZW$h5%f!r2+oB|&
zb6<B(20ve&d5I{5S}9!Dd8oB3?I*aDA2>zi-ZaD1Qj1aKVq~-9u$O*{VCIsO&6;P}
zr<1eety#BeyF-kiW)8o%m_+()H6UOt#wxy&id;8HrsIeMPY(;|;GdO}Ms6~;QE!$<
ziPq%8C`J=<N>?_QfcdF@9okvGaKcMDNLH&3I^u1R+yq2Vhz9O}K6FMq0tslRrRfqD
zX~yV^E`y@ZPiAg%W%k)of{ylX59$DC25}=mjBAPQ?c>$VR8zyOfngNfj1y`+d{A}g
z;vp&+obdCW=V*diX_d6jB8@t^$yIG4DxyKuv;|wV@zp;Dgs=VD!-g(P$tMCH@chiw
zA099|ZGF$fYi(~v>@?!?2A4l2kgMGV!!p4_i8(kQuUYrebE&$@A68K@uLBACQdoxq
zuE*xYulX)`se*?QLa--JpUyzu8NWGf9$UOj1vW65`k*`5f}QVJr0sjiAzaqx*Kq;4
zh!zp4RdwwEiW0(Q2)~}+-MulhYd8wJJ>KUq$rUgw<~2Tv=sUv1k*gs8K&f)R8J*w)
zuVV-uLY>m)M=!I_s`AWy)nx;+e)0CP;1!<5d9g)5i)8}0#O(=6wj&(B^|;)hPV%6%
zKe+21D7eA#tn5H_iKG+@!be=EuctTUT7AT1(QBh)+9O1GL-}i?GENRXNf{PI`22JF
zkl~%+ULs6H5cbx|DXml6KI54*85(?89o#Zs{Bu{R-9+DhbY?}t(-(}RAbq-Y;~N4~
zqE5dlRt|R78cMFcD1vpuhUm_1(o>_4eYA6S41tj#s{-K5nPE{BQF;Agvag@alK1{4
zJ7+O4F(f!q{&l^>y%}seFyO!!m)=8ux-39l!LN@Gjj5~3hbfqWQn(Xf^0IYXxxk-C
z`$XMb5y>7yGn4BLbGJ<Zbg5k}iBoWQE<x>Gw$S(~ht@Xh4`Rv7x#!4t_q7Nqz>)K6
zs9R~QE-Qjpgi*!g9=*Z<Ww$=2S_1TZvkqY}YfEOMdd@F}^%d|Q3^L14(#`8gkP=5%
zZ}iqU9s48S<wZT~K_9j3#hpXEGj{4EjwwZXK*Zl5J5mk=B_Aa5HLyY|r}q;73TZsM
z6GGxrg^yVls*3PT2|mXB5?GV*GfI&Tv-Ut#v$t~YSoCP^F=SDw15_&>gWZz&4of=e
ziT|1mNVDnHSt1M(XmD_`v!7ziw$6U6k22VE%T^KeGV?!fD-hrAU0y+`i}TWTUR028
zxw8bfxAXDb^L#FM+{(ofU86Z10s@#m7)HSKmH~5SN46|XjE<wCzPl*EEW84anl&ty
zooQF36Ijwubm<E<IBa8YZQbqKtGXV)dO79X>zPouNL>jK=)JTw3#3r!g)NWdTBWZf
z3x^NM@Rd;ss<v_|i&`wp(!K;Vnqe<<Ga7sa1~bIG$SyMPLpb43!Gull`*PeJ*Ma>o
zK}YlyGx)&(S~5}46Fms13;me{6T`(D(KOKp@uZZP*o9+(Y{r6FKOjslWh^-mT|CSO
zS_E$ec<6LeP>V%dwotk3ZjOkYkebW}6Ecemhf~J-_jm<9Ewg}_Epzd+;=D^m)Eq#n
zIoh{M*VprjuX*??N!;Dym2cg#a#q;_3OGH?%{EqQ>l=Ja*0ZPi;S$JHB9J=)NrU)e
z1$yV|ogD=6Ug<qTw*3?HlY+LOo=2y%Q*#%JF-&A2bD9Fw3cXqA^Xk92=<tc&w}5%P
z0vN~|vwT_Nqdj0pv->hCR13QzN+OxXGnzIM2;mk1-`%GA0=f#ICu9PeJvINe!Rg}}
zPK5$C4y!b`ap+T1m}|O&_?KhA{gE_s!{Np|4!qWWKX61v?5Og4e})PVB~>**%)O4k
z?A%mgYI5FuXl;9!8eRMObB#j7^@^ZbfBu%;IG4>e(gnNkRqBC%7n?Uq=uS>u*rwT+
zSGE0-J@FXPMU}+2Di%ijfmambz@$cQ(~2haLGY?4b@e`bVN;gDpHYAj_Zyq=mIqIB
zU}Ven&WPKWo=WnWW(JeU-faRR<i%K-(q16P>#$j47fFew+z`X=sNuVpfx0lRK(dv{
z@7}3`nMNM(@=yZmcH0xtBR_G_dM9Cb$6L&4sk1KFtzT@^nx)N?Yz}p<a~_;TyWGlA
zZBr+*Q4>#mfp&l^ct!|j<>c5Y=P*A6TbN^6q>iqL2$Gllg$43C5rP|n_ap<P#R~>_
zqF(Q)176u>&{0ngWYZg>xXX&2-gPPgdL0}}%<64+5^OES{TxEs-Q_!E<VenP$BmLK
zOC!OOvQOLG{FtIE1`Irn?X!k@kS)T{TV_eUw(WoeO{QD+(l{BJs`k+8yhDxOsUUeJ
zLX4;6(io%3I;kpon&V2QUu)r2B!Y*>$qxot_~xkbFT&WMSNOSv-PHK~K9HBEg`vYL
zHOlaZjy6AAvB0gQqe9w1lRsn0wdn~+kD=u7$4q27Mz>Y+etMItHMxy*AvOKG_?Gi(
zgY#xBGNu;syr<mDVSEwj763vp6`TXK(b}khQny0Fviq7Ac3Sx7`Q$dwBZ)CVs^hsD
zAHU-k3#@sAc$}ZlZqN9`O;{>vSbV;t5jd0>TlN}66G`}|mgc)7Qph<tlv|28A|k?8
zv?9g3VVH8oAO!(oYJTAc$Bkd8SCxKcy_KVPnlY?7>(Q~rx+#)hxJDmgJO*yaEt^YW
zcM4FN@x-~zg)s2{86Di`dPj5CN!hYd$SuQ6yx(QpTfAF>kvJ$)2ywXOmIhfL5NTac
zD%Vi@J*D>;sNbabHZ;9TMIJZKT>o+W#9Y*hMXx)GSM&kQf00SO(@oZ?X}`y?a(q&8
z_td)m`Oez_l6#hP^2rX5rH3nnB^D~g1bwBOhcI4lse-QAr_R$6?Xxni%V9Ybj9rg$
z2YUsvS4gp@yT7AEz)g}C8@(p&qk2;gjeKQuezs=I*C)4Uy?lY@Lb9Duk*UXB2olyg
zz?)kB%!IUW0(f*{_Dd4Uoe8q6bK^jFK}_)^>h}Yuo`jO|#0tV@9J2)Pxemi|DhD4E
z@y!HsXoUg;(#84cyUBTvv*vyGF89n8b`h59i`N`%4fo0(y2p?NlEFS@j9`|i^hNMH
z6&&AfE7^%%k<du(rjMqj7{k*@z0OLXYu}Fd32LTm0FlXuxu24N3V|;w{ZG!WzanV<
z4|#VR%?4ij?U-ZPIAqd1cOkj!Ob1i>=~6E-0b|maqn^3jwaQe!8c^GH9EedxxhK#t
zl<T`2i+fdAPKXqgOhu62f($v{w9GyZED1o8B-#MDlJz~86(&_>pg~3paGj!yH0a4~
zk?4-p@%>GHW!(xHk%bRmENhMG_Vo_t<D`P@(RK<2hQa_6O)JqG`NdnR<W+T<EW_Bd
z+ASq@hNirT0NZi0@sWr%5FX|&>qzmJ88nFBHbA&nIS}oUq0NvaKhGWdyuPOq-r(|h
zBaWd~gZeo7gnfs46J*_6TW$#l#$iD<?o&d)A8^)Ou<-T+|7BQ|eW+&C8rY!0JI!~9
z!P55#wYncq|3bR3!)#S9eJPb0Z`j<!z3@_CQB3c2>d{&D7g!p$_*2o!Okp<>`}Pxi
z@%gnAoajd4ytv@ES$E~;b~blkj@6`cr}6Hv@gROrKf`7jbUsz??eql6a|)55fwEM{
z%)zt)T7tv!h08iz-t6(tSdG(1e`}y`D5tUz5;b9nR0Vo0tp`S=tex*3+l3z2gWI^q
zY`RrR_axTD>V3smlTg@Y!()lnB;`hnBTrzqchUT$Cn~S&h<hC9k1FE-S-1-izTF^L
z#@xWVN^dpG>ke*q)PRi!iW)b=O&pbh<HqOHTn2<lr+MYCxvoF6E^bBb>1+xzxw-Z&
z`DvZ%B=*3xw3t{en>;(4u`2o(Hb-Gm++mc0T026ISKDJ8Z_sEpO=ho52%R!pj}kBQ
zgkY$)f=L&ytA_V7ukZyI#*=VImuznU{d~E1A9rbgm*cVUZ?8+bvphAVJN#Sp*6bZS
zhQ<3Sg;ll+Up7w3^U*IxYNlU*w;HJSTuIfO<yGZHdT3^q*+IB}^6f5`wkeGtZ`h`;
z+>vBe^sO4w3z8^$z54cTokQ)tv_P)YDT_1yJ3V5O!Yq)+Pm9#%f<777&~Q1RA7blj
zFPN6|^j%M9qW|pQI6AxDvovxvIhrSF*2{c-w!4@wJRZ!2{EB`ucko0?cP>5sE^HXP
z5`*rs;Y!VW&4RmfoBERHsNSVw!+OaCa!a|fA<4`BRgYP{!F7;q{I|_8=huF1uM-Vg
z3_fD*kI@vCCz>b#Pc}iJyrPodG{k!vI`-6S{n2CMFUIkKZFplMrrOK7hL_8l-<Gg!
zHDB@i>9N}+!3n^(>A^Z;Jl;I_uH@txtVtczm=i|4)Z`IMwqRK;t5gTY&{wRi#HHnG
zm~034tD+c88&i3ID#-q}esJb25OR*+T!xt(zawumm%whO8a{ow4N%e~H^a&V|IeH>
zKlq1SZh^iY_sv0vQFpx#Glj1|?U)OVP=`t`g+3v+szX~x5@ZG-s-1d$%&L+5WR2<a
zbQ@3`XF?Dy!))m~8hf+53HN!XvTcjOT!snFO1+<#%8P<^JowLAysY~v?ubv+u*c>F
zAjs*-vm&)O+XDeWgU)q>Qn>QVdfJV-GT36@>ABs1SV$*-!5n;eaN3U=CK)vGnF=j;
zmn`?WO+Ae}n^}&FM>0w}s3Ce|8L86rnEX#oE#$OzOg*(NY6{If0J#+%B@hiGiNMh%
zb%)KBZ_GONsl{gQGVh>*-4vO6)T!}nX=yJtxIJYc%$UgFmKP=f!ja6R$ONx@_0t7w
zy}$CYTTcF@W)$^X^%o|EB`2bPWb023*M`gArg3~Dmq8%}L3XaQI>i7j=Z`|#<rYi1
zW+K?XwMwV@j{c)&_t($+aSU%P1xq?B`ik*z(aEpKMUnuBno;T=d7j{V++ym>^Hn!S
z6Tn`5A@^F}?(>HcF0=B5=rLe^4faXv^O-V9@QAUr@tlhc4jOg#axu25mXso_>c6a-
z%%cX}MRrq9Iek2!b_R7UvrP-xUDnn>gWDf>A8Y`Wd+DK|pVlSd9oy%rdm-z74hn<m
z)lTf^^Yvdo02`?6j%@u?1LY|ZZE5)X2=-UAwlS(JYxJFyc%!hgTG=0DW}cy+r`^b@
z<HO0q(Rp1dBYsuAtGkInU)kagaFbdWth`PkMO08R=IjP^>%ncbnbt89+1i;O^)Q5B
zO+(k<gT|Kv-AMiKz>0son$&z&${9D2Mt|xl{$X{KYT99@RAQK=0-EXZ!0EN8KtIe)
zCQBl`lK_qwFK8gdS$X)$fk+#<EBgA_aGtXMF}>(ma5tApSUg!+C+Jg?t2)H1v*0JR
z`J_YjJ&8;9-<u=<1<(oFGA>Ty)xSu3L+uwB3Nsy?Htp!w`K(iKwO9`?LJgEc#C3X_
zT<6U2jX*578q;)8)uoh!(Wd5)?0tUUQzs78Uy<Vsq7{=cwdx5xC&6S(6*NI>Ai0J<
zW?90qI@peKh#;6SYr!2XSUY}noI|zMdfMKp&XAK;8uavU6N3bTzN7F2*6iifk6Na8
zC-ntyo%{A*j{pkGsdu-%pQc@{1hQv9`m|nt{-&cn&=<#U;PgZ(vI8jmRho2WP~+t1
zZ#z25Ig(Yw>Occ9GXR3%26a?;0|!ecJS8~oUA0(tIu^OHCqu0o>G4dC37j#yiehwB
zmt6ZjHUxOyq+YSvz*9!|q!rE*z`YzyE*J~>Myw<DXB|ei>nsO7D6S^`bf&^rGt25*
zWc|Uj{*q>Woa2&_X&W(SO#@A$?NQz0y_Jx?SWDhTT;V96yAfu8uY^1lTQ3y%rSRyD
zf>3Evc7JJQQub~o!}kJgPrJU69Ba9ndnyY3ha>3whyQbXSttXV5w0%r&&Wvsa?O_x
zCl)*vR#N@nLN@)6pZVip(j!DTW1s(bckwsk`>>XvP>@O5g30$dw)?FY^hlvtn|+io
zK6-!ez6Jo5W~ce@^okbsztU@2;C9VizS#~gMeUi4#E9x9myJM`|Jow?M*?V`F3J|?
zzFqkm4z;rU`g*t~|H-F?6j~bMo3N;(?!MY4g>QfNjb8k3ALhLTpf)Z&yRYNEeX^`a
zZ66I{6zQZL1C@Vm=cDQE<9zOf;M;Y7`EM7zPw(H>XJ==4>UTE{U-t0)zybNXw4N^M
zrfN$u7~W2ZAj;g0+*|Ug8A{bPqJFMbFgO^^_!c-J4ll-6V`pU~4j6KxJoUfbVn6uL
zu#P~jR;OmI)+$6dbSp_^!GbpSOsB(>Ka}e=fSg=@YMBOqZI6_Trx%Tpy?WE|GX<EO
znU?k-<-OAmLAJuhJ`!!t`<-!oH`ZcKv@a6lSs+s<c3}}ysDHZA#QCMmT&Me^|I(>{
zT<}wZra1wmbd)gsH@E1!C;N~1eg_bq6VLqfcmHzl_FO>h%Av6CZ>0V&fBAOLUekJU
z?*H3AW3K^k%qh+AZ~n_azZC%CP5Iw=%x@l1SrM=;xrcWDze9L)0*V;BR4wq&FZ3@j
z^EU|Z@66Nx`$hjp!2jFEo!WBW=)^aR|9^qe{yzf#uiLxxKmN}S{zmKmk1G0qxr2Rw
z{8wAyzo78B5<GLrAk{j#UI*!(WYsL*+Ax+{!*GBP;coT7crQE?SGUUSP!IL{`@UJ=
z2ACzhGbLF6)&F6<9R4#HudRy6-cji9`xGexif84D-=jl2=&B$m(Fc=%hjxD7e8H>9
z`%wOLKI`ufasEdamN&;TXlI3D9db^rJ^Pf1g=&%Uo(#?0l+NFOz5uZE=FMdYEdMJw
zf9&;}x26dhNeizx@45@Ft+8?*nTg{&vjimczayCf*7+Zu?|+o8|H1iu2<?sPf*PFi
z#^2*K34pj@ZP*#9{BOd0G<SAfJWTOn>-ZL@z03{qzj+XAfOilth7l4!4QKWbdvzqa
zeQzjRqxCq=Wz<rFA>tp%=ok1-mM!z~^1q6j=6X_sKG2Zx;G<=S#rNy)zkNbTz5vS{
z{kP-apW?Fe5f7JzYVz0ESEm8Wfj0oMbkEd~SdNje|KRzZ+F$Lj&*!_GEVuGn(TDFH
zJYb~##GT$BxBUA3lYW1&zH~aPDH0Ysqsfj_p8gvu|HuL$wk9H8(Cd`j`wwe<-`bxu
z`Dxb{XRII7*5A&hk)f08;AaHg{`EQ+&c<e*Tu1Y((Da4xpZ34;UN-01AD+Bd{@U{o
zbl>0g5#7l;13X_N|GLgv6v{q-tn;MvOPTLi)Bl7?YitOXQTeZO=e(XY@`v0%S+<w|
zZ4><0O^znl2>y@jjB&IN|Lr<PJb*rjWlGBat3GS;j*R|s9UDL5-%^7vWxyUQ_eK4j
z_N{aL{o9{NN4fP8vAQrx${e+i{*H19ppkAp5j(C?(CK>te1F;dyl3r)vP}=-tSjp^
zzyI;0dLRNuPFW5&{HwHtD8gm_kb20X!{2^vbYj<SGsVyS>*9=a+@Ip%nv7eF2T40x
z5h5L6e}DJ?y2TazWyFg%9<{%&!%rCe`R9*!{wB8bP0!v}gmv2#{{1%JelpE_e%sew
z{MDYEf}7+vttI-dWlp=wqyi=M)nP$EM^Ob-xNUvUXRiv3BOS?f-sbwr--vr8c6fGq
zXI#J8?P?_GyPY`!1WOZlCI4n)Pd(sE+8^hM<t|Re`SlT3@*Z5i`1;4@+V;osCIyJ%
zV+F!mZ)T+UR;<Ibifw$*dak0^umk1tie=EF7M*U}wK<%JbBhDlw{HPhk9Iz~jhR)}
z`#*dy>hJX8=8c;wprWm3pjS^`Cj8j)Vci6n;B$2mf-VpjCk*K`*jNRBK6~eCMY6!=
zL^98*rv@4VjZUb*;4+uw|M9^3e{73C@%-I_hHDmC?M(5tNP4NvXPyUfv0Ry%Z&`(M
z7mVMhfP(MF)3|wG`r-6jyrjFlhhAEi0I*X}fAQTS|K7d<06bqmZ&C0l<Egpp>N;Tj
z(^aF?7dXDwd5B*Ajw9|DfC&-c_Y(J3_$xPi%I<%=Kt^}oS*nX={V&qqJD{m_+Z$Fy
zR1_VhN*|TpRGM@U>C$@_P<oMG10n)L5DQgWK<PD<&;kTSk=|P%K%}=oq$NNgyxY0w
zoI9ZBoVnlk53rNX-s@TQx7K>fW<n`lk=)NR#L$OA%sq{i{wpH*CH3!kenjjJ!DY!4
zZ2d2z{Rg%6%0ODm|J>K;KlA3-WBX`!6-cHHAu5Z<*Mt6&(4T`5QUM^{=W4^}pLz1v
z_hs6Is<xUUPe|(@cL?49bVu<W)3MVh{^t8xZ~Fq_w`+27B>7km?ys`@Pmi*F@S|LK
zTjJjw9rM>Q{7n4cZutCKj$MBMp8vX)&u{;q<TgPK$jvvy)K2kd!v6aw@2&zR1;J(v
z>wjX-zthIhI>5U>S(mQqIktxV?>_4GO+aUCUyRB9yXXI!UH_1Ury(G*36N|0f9*s3
ztgS%ZU6FL(c8>G%Kldm4)jfZ5FW2bNv|BJ|ci^s*IBK;`tMKxbY$2F%jI(iSVXCX<
zXWyf{6y`^tg}gp5bLmHlqb#>edB<e<2%n4Ss~H;B&pC9CS|Iy!@3xR^CFQAVh7`Is
zI)QyBrq%-zSyB=a>zffhVTK_*E_(}|^y7LzaiTsN(2dFYWFN?X=IT2qlEy8aoDl6+
zJN>b8h2DxYN=f^MGJ-j?M|WeK5rPyJdo3_vWlQ!-b>KMKEoVUJl`8fCS2J2EAF7&y
zFGH(c)Ckf9CW}i9e(aSVn#So4HO~2KLhxM8RVPdHnG8TW<=*%8zm~spO)~8^3Wbie
zLm-L7PYDG_p#mu@lPEOR`R!71c0T`kF^B$2>Er;KHL0_Ku!}#be`vvv0-t^??!?-;
zpSa|CA9#_p!^TBK>;lV?q32A{LWjyW(>@F~hYv~Izw9?*CdT%&V|%Ad0`gpOPP_CU
z6%FWxkB|(9syi%22zF2e5OkkR4a6_-sU+<2==5%e>E`HgVHrv;Y8)g9RqXW$`R&=6
z;>xWTNV)IWsOIGPE?izx{#kZRz`<_If2jR@wBvD7-gT3p1c<$`wU;<s>2?+FMuL=h
zw*+p-Vd{)!pO=Z4{}X_LZ$;c1#3&|_H<sC|3tiD~s)@TL6sRlcAXj<w<I@`cWZXxy
zABE!446DWCb7+1hGKXY%ZFQh;Coksf#}&Ig1x<p{im{KZJX%%L(quQ4<srinVY**>
zpv=|o$s_sk;B}exVv}E3Qkew#Hs%$_TG~(W@WeZ18$b35qcu_yemaG&rj}?)_(VNk
zyWr?|0T;*7Um=Q*hky%NuPOV-{NI1;rt8Z;wn-W0bfbfe3k_23RE@uXsmaq@PCuQ#
zOV<>l!kBmWu(fbIXQ<}s{_D72whUeD!S#D1q?hmLULJ$zg^-+qT{tcnR#~9gF*C`2
zlD|s^p{;2_*x#iJu$exsEH$H&**F*KVaIWTJBM44PCY7j`fbVL;q2x`)H?L$@n<gG
z4}HIi$RG<QVjYM#iP=`$jXZ{VQ-`lKKi8y<_E{8dvmDkJ*o_^Ae0V+K=}mtypX69%
zDvbkdX-8&zzQDKlTpr|}V(+HBZjM`U;j2qID<gpeZ4tr_OSzSR7-bk^;%A@)_l}0h
zMsfYb^`^?vZvste9-Hbbrtu3>yEVE;+EC@jKq(=88mMZ5swVxIhar6(eq1PMbl%~c
z9gE6mU!wJ5bvnNu&3eNA<q*XNQ}_ET7oq(&yvUpS0qgZrR@)6CQz;RyQZf|i{XrqW
zotjysVU2Xx!Mp>}eZy>fJ9kR|irdO)2y#%DZGWhv&AjPhSd(;)0Uros?Eg3g|9$k;
zE2!Px?^y;3$fLHzs;tX5@fL9ru97z?%y47zg`aZ>_Ic&LXr(V*5&~+6)4+7djs~{3
z2YTwme7>)wp?eeO1dcL<Dv0|&K6zJ;^Y=%$705x8cQoIgJ`PhtM@SNeDhu;siBRv?
zBTn22UiX7S90(|!0g(;IE|()*Abd<yZ!3{jAI;1H+g<Dp?O*m+b$G7&q@C!n;Rb?F
zW6nYM*56&=mU{c9Vk!A39q>M8pM9!SHI~}inWV0QuJ}DrZ9RH&>y24*HDo7O$ZJQ`
zX)=Y`^Lgh7JJ5&spxqud9X8wB#@h`7Ia3Khi?lNVq7WFPh$gn??Oh2`r*FWWb68z=
zdwA3LsG`8FGveL#$HbfHH#CydC2wz66hEoy-!ci-N}+wrd@uw1){a#VGYsZ(Ih<y=
z@%!R>?%IOVjrL~Bso^0Dr?z$$F~=w03@u%fGY9H>-5oxuU^NfVWoWHVeMeT>W1k`=
zat>MI78I`a9<&~qkNe<EXZ;V*ONK5>5(YUH_WCAtt?dNPMAe4hb&VS|C82~bTl;OW
zZ&4h1YqSSW<31y3rS<+t<@G<E+79gh=V+NSt{*0H60Clc4$VkwIC#Fh(2Z4ZQu5wf
zSWCdGAGtH0Sha)+2z{%(L<i6Er+G`1*WhL`yT#<C!>x8jS<M3XIkBCq1UFniDnAfQ
z3%c1YLxh@otK4Bz9fTv!K;9)btTYVF#pm|)_l4{Z4#pa$r3bKlU#b{ZcWETZi12~d
zp1h6UKma?trHH(YkPPc=cqeDuUKE_deetGPwvlR29x^9A05BG?O&aR1SdKGGV<2-f
zjgn4pb6#+*v$umOmN_zyt%)!QyrqJ|qq*a`vUJC&eBpMYcmGc?ZHXR$VCGwT*N-nI
z`;C*9PGz_z37XtI;<&%mg0+DAkmKzQk13+;BZG^VEO5!W8}kod4%L(uG-gHYbq>ZR
z1<CC2PBvwAuSf_4Z8l{i<TDJ=LJOPETMzULx4T&?HQk@|evkrg%zy;o10TBU%QseF
zzTL}U$)1v8LH12K<oM37{D|CB%eF2%86gMg?3P~QLpomjqLA{z1Em9%6xzzqFx>z{
zDc+sZd$9CU3ef)L{TQL6@;lMM6r2;dd&xQ!!OvX({$$DKM>geu(9VFMMc3|iEuX#<
z*s)zZe*7GlF5A#)34RB9i<}x@Q2w&?#=ho*a36*R7JO@gFE)!FZm1M7_)Q<Jd?qQ#
z+6z5gg8$-tBQRIersH51O($3)iFhAU%6o4*V<2}`)5Xy(&x#;Gl%TNK2p96|)YJjO
zdH-Q=2nL;tIAn;^=H5!oR_NV@!ZzYsbM=_`CwH3TrYyysI=)L80%{GqMS;du>P>o?
zc3mEb13+9_z^0hgeoT%E<JK!e7Ho|JtKsl;tH*}I!q*W1d%mzK7QvEjmd&~J)y?LQ
z36`qzqeu33sJ49uAO6K~h!jg2&B0e05!f)de2sZL8T3-5U$JW~mwqA>U1i;YMv8Bj
zlaifJ!A|Gviy51t{hrqm^9}n8?n#lO77#N2Lp^!~!SIe;$3g!_m(2PVT}{R!BN|B@
zb^y19P96A;o8k68iK}D{fzHV?IIo@ccs%}yB=M(7m=Xfqnyuz_2DHhse|#55ax>LU
zKnmK?+^CcC;JGS<D+{4lGz^<h70?W8+DGp%D0dj8Dz5M8-zp8_<MW$A4L;{+sImf~
z5v@58>PV&gomCWTHu{A(<sDWFLQ=?rH!>vZ^=n+!004ZG!UBJ&thncw;=9e`TiSWC
zvFMe?)+-Q*es`~xe%v$14r_$Zq!p>UVz!?Qo3C$TSsRY<&a!FTcq#OBBcx@ofsN1a
zKt`w-Yg~I5ho2F8I+o{K{kpi3m*T;kPRy4W>*uN(71wLGDJ2M^{CWc)?1Ck)$u9_t
zD7m1{vM8f^4p8xmWLV1@;y%o~`?V6K>L%t+0#QL?RR6{&+u(c74uWzy;<dsqdqkE8
zH~JQR)@KpnP^FokNF~aU1*tjS3;f*w+zTLwVwHP~MyaBYa-c6JPZU=46D)x%WagQY
zIi?-A<sUQJB%@U)`ZgO!q}btKF<g`hl}yKCm{*N#mHzXN4?q;NFW46Dj=0Nsfu?0H
zerCNU9ppi8;^|W^w!`G1ql+@4Fh%TuBj(~G5N{94=Ov)3XPWjM<vmL$l9Y8E9I-N+
zd+ILoHAj-y1TARSV%oVVn3Tu{s#%(ZDJCZ*8B}_iix}V<xdVaUC{r7@h}Fa;fr08l
zH1b`Vj&J6)X-G<S5Vp#j56|}s;)ZU{#aYV(D|$pg%LQ!hToM77of*|Fu8&nz*E+07
zEUBow(Eo(lCxL7~#}vXf`_Bn}|KQT>`8i{~uge2_j+J>{uC~Xsf_Ic8!!G!*FPYA$
zFWwaoQ`L0}q)1@9qMYEbMlZfty`d&dVMg5RcO71180cH01wtK{!{gcRSvPoAYhMX%
z_9x3={|O?hC)l*35<Zj!p|ioBVb(_r!6!m?1yje#f^pM{Q7kd(MQ`72C49}z*QY7A
zcTBaHKf2oMFz)3yNpUBvT`6@ungWQacPP;Q4)UH)hyz1S`(qR?PZC0-r4u9Er1PAI
z4_)47S4i2qb0v0s&~IV?*#t+=Z+mwuQ^cR}lXSYQy&pDr<3xj?!1=Fern?J?B?c){
zISDV1SMiZjq=jcvb)HOjM8uWA?u03RCi6~MS&+D~XLC!R8y}MN7z>skY_>ZXT`FdF
zc<O16{n`?$CL<<3sH%|$>}^z82Gdnshu?VR2_%2tXhprSmWQSeyVV!z$EzVk+5%r_
zWG+PNvZfTz4H_u;OY%Z?Ttv(Y_vh8pqM|h;%(9zn>LWFPDK6t4ggUIVU?_V|Xs2{4
zjpSZq0em`FC<qT7Y?HdW3UwQ|E4TVKph=yoy878kz{yd-!cCn?=qHP?EeG;k<AXUm
z`eXY3+mar@kb%@(b~!c#?~V;^Q{eKrMr-DY!<>P_S<9Hlroa9<QT2muMY0?1(}hFz
zi(&M*J4f$EbBOX8DP*O)LG%p^^$xouTeGsCDL?~lk5cSCI=%x4RDKlH<Q`%(M8<#Q
ztovLQ{fQ&j%V$>cbIrl6`FF|%yb}_P%mWR}RBZWriI}7S!%|md2;{2Ww~AgNSd9Ky
z@N`p)GY^Z3gCD_Yl71uxIGL6L?d~pbTGXN3wV=bBu%{~;_Rt-DVqqgtEnNm$8Fwd}
zaVHv+wpx!?Y_$tqrz#pZr>--?<&T%NLdV#C6U@;ISQL&a01j8yHRYf<LwjUrvKxOz
z$Yr4N;2Z3lyBmGX+k0R8C}hCdvATobyk(S!V?;%Fu-R6-r92YL5LHT!#C3GPw2x2e
zc5aZX6b}oX`<%Up2Kw}CO{qW@YQc6rAD`5fa0uHK`|<g<h^NJug{x|u+f?p<4u9}I
zF&%4q=UQdS7rC#?-sly{EDhb|Wf#YRpHj3X8UWi&Za%!uaa^qSx{tVRweJL~rXu0T
z2KO<II-hr<XnTS^HM8Py@#yV7%Vxz;4LVB=pTQ7FQ>?tjy#1lSZD$%l|6zt8DT;%Y
zc%ceh#l(>Ib>k2lyF;ZoUV71G0FBznJCtzjFyV%V>d_koz8N-(cFN1f?#d1yG?y22
zkw(U_LI*cI+iM|VO$T|q$_ZFf8?Z?f?AStvA8Pthh<y9uEMo0sqYioYN3?XT+HZ_v
z83$Z-O(om0F?LahB0wzK0o=pgH3`)<llL0<T!<Z>1AXc;u7Pn=`bv3cHT|4SaN}wK
z0EHw47%)!Zs_SOkB6_ar4SbByn?$^yrUN1n*N%XLJ<2$0%yxl^5b3bI8j_IMFh_8h
zrql!iWRS(Je8xJpSl!Lqj%$vEY*C^Ty;g-Z<gU6^rayK8I__V*!T>9C^-FwtqHW~G
z3E;5hr*Y|%-H?j7W|CtA!F(lYJSSPbj8Uqdic=ghA03)<koN{oI3HZpR5nErQu@cV
z#m5i=U#sTu{4bMLAyYWp1PHO3Fd}YHbhE03z{1mALkFac>4}JiSlH6?5hTEn**yd;
zl2|B@8ebCr#4q+fSF140l$=l3lFvJJ|1<JLW(;I*2IN9bfkHfy4q7LJCI<=SpWVy!
zd#lHnXL%ZCVn1>Sr%!x$0f2*g!}}BfFFhXLjf%{nV384bIov=E06`(w=Ne-G&H7uK
zBb@J#4A^n&`>+UMZE2+G-VU!(e_6NvU6rol0j&>~%t{hbTsKXz>hhu)sntiA&kkFn
zmXMK<xsR~D&R%Eaq@IDI$kYU^z9SZtI{vehJO42K<B=U!`rAZ)Pvh%e<$BlJ0g2oQ
zi!&D{Mfiqf^WWFJd6SyjaKRz|qxJ0z4x$tT4(}snm481?B9h)G@w?EO+%T7kwaJ93
zqskFvrQPf7x3Gc2gPuj{WU=0~WU;MbWfRZde%KEB{k>}>=UJbf`s<IOxMxxdY)miM
zVz$36bM0)C573hSCVT4izkY;<UQ;fqRp_f8=OCmZoW>799*1w+g1$d2>zhGD$=!QA
zmg!P*A>e+8g<L%(HkQ5)j;DGLvAydxaaL~8bCgsrcM!#{nyXlPy@cH)SFyN1&)l|0
z0_5=-S1)OA>Wz}#bN03C+u4uLTpdR3ym-eE2)P!QPVmX_ndS!JDVobP>piX|FAMC9
z@d-D(jphA*;a{Zvg~$rhcbPgg3=+Q|A04aK5ebeefl0!4@>+xnN5S{)`27(5<=_9x
z0oG@-e54W11bwVS0DG8U&9>#;`bPnMd(9M5cPt~-`I;PV+ecG|eKJjBd=aL!Ew1hO
z#ZfMxYAtMCbbUR+m&dArQcn?=o*A2t3Vg2WxGgS6XqBFYaJO~&OS}vG94d!LUD+91
zV0XUz%G7yAJtV%X%P;_<NGW5{;c0%hmpV7>eV}<U?R&nqt|0vl)i=q+`+bK!S!aE1
zZXPG|hur!KNgirB3>!jL75^e5fD!f=LUkY?ng?}FW)i>MxcXNKo+bsK&o)Heu}5#`
zOP9cNCutwsHXu1HRc|oD&EA-}X0eti^VL|rp|3KyjfJ+7$&EHNM3(0j@z%~d$t_kI
zp8fnfZNfLUW2q`yL(9R`Kb7^oup}!g0WY~hf!n63@oGAJ3e`g)rMAoBrZgqmZ3RDJ
zI%}>}R9dVKi&(NXg7@bWgHtlEc?oFA<er_*&(VB+A+m&Xt6Xj&t6<%@ac8>_bvCR<
zJZH4J)R+0<g4xQlFVsNqm=^vi>d+CYrHl+_2X%>3`u~;v|9l0fEz8S)TsdY6_STEj
zUFbHYBW4;&RNp+I4}D!E(|y#>pInCXpWpd%sfIUbj@GqsZoll1|B51`r{-<v3^b{{
z^hVXAzKC<@T%53wrF*!cS3QQru{;;Yws+EEqHhOo#mJ>P+<5rhnk=kFx=ORNht_o^
zp|V}rbvbx-`b(UybMr$2oB)sP8&n-9ERa8*(NuO%_3rg6FzjF-x->;ZHF|_|1J53_
z^BnC|I1*IL04bj^%O95|H$IXk82IPZX&l%1&_rrrtW-_A7=_m%zF!C}sD6@@?69Re
zFC03%`{+x%=GlU)MLTsLOC9j%(t9mY9F~qB2$QV@%w7B0n}oSdwr07T`_3I*0a=<o
zNpuaLlcO^}%%JExDTs5UVPzmAUP{tre-<)xw~0v}+U$YG)xsx>AxMrSfjfSYg4;rI
zIhvxz1N>6<?T#4<rDs7pa;8|sR|c=f1H50A9jI@CgLL{k0$Pr`%~;`l+plv<Q?p!T
z{Q;A&%*shu1Q;UE#oKZ@36`KzrsD(3WFrMjDTL?c!M?bBRJd_msJo<OugS+pCOPPW
zCz`^n2<G(J5zvV|{pvvN+eg($YjFY`<x6|31dNFN?B_7K)C?pCmVLkpLl29Y>{dTF
z$s&p7SX9eyEvCFQx%%O`CZ_{>F8|T@)}{$Nyn^PTMWrS@aK0WL2sfPMb}7Qry|`(B
zjq{~kV_;&wG`1|^3eRJ%7MSfey^9it_EvAJnr^o%TnJoI$9pYuai^RTbUxyF{&dMh
zTM+B9G}YAqI7dIwr88*Tr0;OurqSaUdVZQTk$fA(zcJuaeA1silZ`u%)YTi1t9^as
zfm!Cd=^p@QiR3-*C}hJfTpFLbh1Jyx;J-PXn-_NJO7o|iVNa1@Ima%z99_g&=+h~_
zjMW;I3(5G5%W#<k%{LaVpDh*{_X{k^3Z96br;*-0WCgq+)J3jPiVn71{^`vIEtwU(
z5in7QE2PO+GFB6K^sYAybIAql#e~&`NK3Z$_&YfaZhw2!#=m$@etVm$2ATcxEMCKF
zP!2Bv;&6N+uXwhT(M$d|cH!ppE+t(8#wN}%VyT_$hYj^sb6&lCow5XHY)jj{7RLbV
zx018PrOJ&u`8t}?J+?*AKbH2hb+3QPPCMyua{J&-YT(+aw9oz9GcQ`7*R9y$Q?G1~
zC(>62-}dUYBwbs@jPbZ7oV{{btNDubIB)(8ejjyCv*+_xJ)$_SWv4=Oe^?d<d1sY|
z0=_J)(gJ~eI3idLTHK?JI2JWo`#eYre$NV%V_?zV@V2W=e)K6FeMvl{?!$~t%az+l
zuLc1Zb6CXwH9Ij|UTWB;4Tm215+i_n7}4I9R(LLv7n5usXW<^rDm-wc7kd`JlYXya
zF4Ny&d<4lwx0mCP@@2!UujGPY!CNP?<%F6G&DEc#q!uP|Ijl<<i95Fh55qq~KEztM
zS6UFNN^nt+zARdv7vF)=1;W*Q`gx_2Z4+1<2S(&j<AJ1B+>=F(rsXQ~DN%!qQG6r(
zf`?76Taf)XYY2K$8tzG~6m=Kza3HVHt8q6eb89%{?3xpHs#0n{V<=xZ>}u;2%j1Ox
zCcww%vy*i%CEtp-@aIVn)*5nd_vrT=JsZuuHx?_-^Rq3VA_1L!mpL%{;?hY0sB?V_
z0juIZ5F`wOp<Dv*qVFFMWwMzML%R;He`;uAVL{GC5c$mo&;x=SPnKLkAp$oTC2!c9
zdXBP|aq28+N=q#T%x5T=?1`duZ3cLp2Pla&DJ-$e<O@ez(Q>0|AzPyDKC3FcN=quY
zUN030$p==IUtkUsk%Alc7%m48-m(fihqDShmawJj$|q5be_dzxY7AJ7HG{>b7q#CS
zw7fLXS;~^L&#T)RT--~Vw0CrmO_|9(6=w={3si@wlr&jp1WEQE5-UG7m}-FYjFe6c
z<;Ugs^Sp;DG`)&CKQXCK@&`I`770}IQjnBli+vPzT+5z)v=03<Bu?(je8O0k>L#>v
zz1?b#45s3(J*p2-Rke#(ZRp}pq3o((>-^Mwt|`ecqlfm0hBs*Y!<V(ni*{!{p=eQ~
z?K!uL>v4PE#ndP68EdK}>z1Y0-$6BU8h!tE<++$4CO_C5<9eCQ#X~Q{EdCuKFYI|&
z!<8(@j2x*8?7{)*7JFSv&=oV(xpZADCP7ojcpEePDa{G@U0Q*?od}crdB`Rz&p7g7
z14Z3cCtNkGK^6;yX@P}hF1`@8Q^)<d5KPtw*y$9HhFpqG-|~5re3)x(@@CB#QNZCd
z!R*z9f6(6AStFmTjHa3JRxDeJLX(F1iYrmxeRT;_a5G$05wspOt*Gu?CwmxDq%|mw
zsz~m6M0BwvhoeC^=qft><s5fAK;uR?1o2YpbbAVAOXbmFn1h2j*%=96X<zT#PAg2G
z-kA4ZBy$P2{w;T4qq}j$#ad-?YxL|z=Ds<`G1KLM9TDKvEveQ1qin-HKiIfk^4>RN
zb;y_m9US#KY}*lqPZL0iHI3?;VWN1GuM6PDMJ0~H9<2-4pLJPXW4Sao{gSnD#Yc{A
z;Yhu=w1$OZ0ZmNEBy<x3nDUHr;sfuZ#GrN_n)mGe%0E|}h=D)YY=1tq?Dqf_juZIq
zH`z=ysSoDe4xaUV>yxlLC!i{cHWK>{uskh8lKtWgs)*_9BV1Bat6c<fMGGxMrffmr
zE9%hno^(+D<si`dw~z*W+SN$?`3H<tr%4OhZ_n0$s%2$($nQr&IQnjTvHFqCP4X8m
za*ksZ&$r1_$qMF0$pPRHXt7kx{_^ocBLm={`ImwTZy{#oot1f!FC};8ExKtMpX%Pm
ze(mM$SvGUA;UHa}7rop(T|^cNv5igM(CL#pY{?{~E9qPIIiYp9r~BpuN^c4WOtMPZ
zHnP$Q-Du-~$Bgv$Y9iFWI(m0GiEIANSyY7lSJLGK8U8H%^P#&uXOoTHwjcBvA4Kv0
zfOXYXUw@#TbJKTuR%8;gX(fWb2ff+*?yugrm-(%3%qWt?>h_ba<xjifnCs?0d7USu
ze$Q1q8-~m{PkOBAvluS!eNdlWa#}M}=m{H@O+Wv*UWjX^4w4`6+KWHXv3-UkX&XC+
zDd3rOlkTHTKMb_T_E%iAB{kP|!;TrOz>+Wr_^_U%gZ@CX*q?j?U=#Bf=0Yr7{9A1V
zjsrhH0rEXV&xtEaZQFjI9`l}3hew_u=HFfR$^OV2GHIhrPgo>tpCyT|#E3<oW$Vnn
zUlH@m&u+_w2mWmKKO^=N@%3MqnKys{toNn0=a*0a1s49*eGj#!nUCi=f1iLK#OEtN
zyTDDoQ7(8=+`oF+zx#|*ulC&k?X|~A=DucMWbfv4BAYvY&p)3Z3cm`N#C2Z3?}Wzv
zLE)Vq3vcf8LTR%3%ytZ=qh56XV(<TmCNz=q5unhGb7r^y+X#h<Po04=;Bt!eTPrW-
zp1a5%Qe==fD|voM@mVio{GKPxNk!zQQr54PsXv)s9e3#eS&p#UQ<Qop2V~zifAVI3
zTeU(}0gRzjz)JZ4K?ol|kY>>|ovsZ!ImmzV{jYo_zc7(1#))k5^wA80u7dBYk41d^
zc;WH$db0I7VD0<KM?3^nY~tL%Q=Wf`MV9IOOEIZ;I_D-cFaDdGWsLy~=8BF<{}olo
zJ`3eO|2WLE=&9c2A8<mbMwag3bx+&A{t4Rl;gk6UP#<yGK`R9O@xyUwoehP(@=x)c
z;L#81Y#%O!hiZ6bdR92;g?w(;R_A}_;K5^Z(tZ3>;@&qOohi0^sN>uGEe*a*#q<Le
ztQgD2=?+P;<A=AL9-xoWc$(Bi5nv79mX7E9^V?UAl#(yE6oBi<Rh=#%7mvbw*k0=#
zKIAYgTCo*>i^_<Mw|TC*2R)TLk1FT*7t@aWy4#r~b$O?d4-#ITUOUgB#LDqq(I+dW
zDj;7UO)(NI(ncmaCsHo^=o_hy6s7L-J9z>Ym(7&!{cQUW05(V6GP=<xP9pDBfYu2z
zwOTzY%4B^+^82NKBbq;XCqq&f#l185d%!>ii9~uJTwP;1IdfCTr*pAZ)D&m%t@&py
z4}DHj@Z59^;C(mycNOzKLGa~pb&dD`FdNzEi!WDqpYCi>E;5eITr!jXHv|8t1db6W
z8%`oAmvdj%M=A3YDNZkB)TK;%(EK0Tng9GF|6)k3qBx^hz&i@ffoV5=GxPAlr^+X_
z_k_rfU53(J1-wgiL!TM@UkGC_({0@rK37$h!Av?GRd|`anYE2<j92^^amNMs3;Okb
zR6bO<k+lD7PoZR2-#sP1tg4VhU6-SSWb)UDD%ULk_0|9N6|!OHf$D;s1t<J3Fk4oP
zq&`j5bIt3Wv=l=`>#7wOF^VhTg{oMqEzAFf9j8fugtQ0Oc>f}1`x~L@;<lxOQ@I?u
zq@Uca=)U`gP&QVZk^hBZ|BY&`*Cdzp)QE-IL)+(D`X=~2s~QZfA~-l-=@u8g%kr~u
zFPgd55~P6Gyx@Y=EDy7O(y|Fw0OP5koWz1%r_xdd<0oos-tpFFT1>$D)Pht3^E2*z
zPy?jD8XqMrj48yXMUlLo7)}o8I2c70qT7(Ath1dp^~nl6UH8@24peE~WBUXenr`Q<
zkT*lo5)JO8(xpOFRK~XQ6lXYb!>I*0iCML3?qMIa+zAQVvud6aNGDqafwNeXhg({&
zp|~vA{Jq~p?PBB($lwm=$ak8OCZ?iA)2hJ1^-q5OCJB)q7q)~9>tB_4*6q&F>CfPo
zy$ewwv;3i%DLyaat_XgoAc?>})w&xH(&RSNTD>z*m7$Ata=|7DQ&mz8Qu%W88#d%B
zn-u!Hv9HO;`V2R4bPq^}jC8gb@l}!sLYts|MjHYx=Hvu3$B2qp{^s>zcQ6HYCpTuQ
zg767(gbDm2jbhfDcwg3g5Y&Q`Y8$gBNUhF+_3?&h2rpy6lcOOox&0Fl{VkDpC7^Q&
zTfWZ-Vo%U@A#FQjuOuw2TC1@G!&LHA3pQVA@H`fV@w)8xdBlU#b<N0tqXz!gdVXB;
z)2S}+N-I?IO4l6~j@<loP^tDYTl!x$|95|7l9AM}GKxGE06Un}gu+K#J^HKd9nK_;
z>efC{VskEY7%J`afQ*}G(!J-*LCRXBW@;@V`>PFy*Y7Eqyv=sbQS{~caCniAHh)r+
zO5|;p(U=9rOlyEZO&CQ*%?05N7lij}9L0-QV0Fbh&1olw-mwQM-XGE>)oLvJPkE&x
z@0ebdNpw3js4D;TDZC?ed}&>xwC{4cOPCaac)R-039`0cH?vb-4w@V`nW$eEiyodF
z&9hi%s6h`0pdTz=FG$GXao&_Y*zp>5d311r91XI`5OytCXuK{n`l+xM#6;`%#;!31
zKgZ$Slbw+@f;XgY<snayhxN1TZ~G#hRuVlPuGKHxZ@C{%2$(DKEm%t+PaT@!Y2{g7
z4~&Cag-+19RAS?H-7%HImIYry<@FjgGDkwgD5_D3J-^f?>?ypbV=k_|B7Ek$LJ$iY
z4#FPX*X}@*4DWX>1Clyypz*dzbZ25I5uH8|OL`>}qOtfw#j}E~Wv|lBxvMC5^nVR?
z??<X7?2zYaXU7K%Rz&#BU6swOBi)9`2nArB9F8WRKmXIi$u^z;&NL|Sl9BXhsoDpe
zW-wAwVgHuea7U${+2oNg;j9<_0v%ebhwNw^TEM}DQ(Md9aelKq1s<ge2On1K`IDnK
zmc_mnt~~*tp%U>MCu`mBxT{Jua<6FaiOWW^?m{`8Ghyl}{`cQPcVCKX&!VV{=1^RT
zXya8pXSKd_ZW+g$FC4}BD|~^RPE89<h?$$37cH9*Sw56fE}#Obgb6t-=zA%LkA^{u
z->Y~yMlD-QagC-uRb$eK)uKoOI~?NJ<c5(Hx$}*MoZ*c{F|6nl8#pFfxXT5<$39WD
z(p~7UIsWBzYJ)+?i)ZDIAqCCsH-qG+3A|Dkdm!pV^Qo1{Gr@|jWh%d^NW0DERR+#+
zhO3j0schulupnl4D1fcX=cG+)R5%2!>+TmtI%R<}zI0M=s5&>A;KQ+vw^bIUgnV3m
zh4&>MJ@E}&SpdaYzp+bAt!Nr0Dp_GS8mT;~_Ef86Joiynl;&-+A-#*0&hHNR7eTOK
zaper^+<>X*<?N7CB3)+(wEr)zJv8DsW%2gR-}!UDrkklRN|dyTx%J`d%0(9K4+&t8
zuN1D?hs;{Md7OI0%otqsAM<IS$vT6fA3wP~+u#CwtfvlFp&P;3VKc)*Msn^+AL2g^
z_ynas@T|PiK1U_I%H_kK>J^)fl(j<7Q3ZlOZcCn{D?LC{2hNRIV3z@FI((w#hVNE@
zmpLNbFoM(43bE~6?z7NV9*SCv?!W;E;xXlfYI52&4%jF`ZR5e2V$_$|RqC9eDR`2w
zL6jR@W!R3#Iaaj+sK<VuiMp}cdJkn+5S$fP=_ri7i1(U5VET>8Cf=Y_@0~nX3%f*c
zdE4b-Vwbz7Wt4m)2-p(O0Kc=b*u9T@p;AfgOH`GCwsW->enwn)5LJ4UA%J6;Z9GZw
zf34&1{Pj*&jFRGwDfhR_Bxwceo=L8k`Cpenq28(YW)k;Q(@K!qcXY}VXS7;|$!m}w
zr?(~^7YDx?dHr%YOP_(!d67A<Os<@VyLinL`1;$-nThNbC7xGXb8Dm6H6$gKh*q#*
zOBA@C+O^!$y9udIQ-HdYwfWLOL*E%n2Q5SF2P+V_I#85sA2wqRoNp7Q!M)tUAN&lb
z69ss*d%j`vsxl5g9NLHP_%gNBP>WcxS&$WNI)(E&89P7PMbwpDkZ~1z7-e72XPyd3
z$2EhyWV92c8;y?vu@|qNuDoF76O6LW;8Efv=GsT-tOX883mAB3T4+tOL*C^yMxv;3
z7Dp+~pk7Yuw4Aa$JE8k3MR849$h#KK*Zu%mxE2{{Tg7dSXkk9=CNv+u)aAaXa_6f-
znn!_dP2psQN=JYrENrlkeGrMaYtSPg{>b1l@$Du*WcqE5k}pz`#{>ag(qha<T5j;Q
zxPXhB`SQ5mdc@VilbILC4A!tKs*Ef1TjQS%nhM;RDn%GWh!~#Fw|CN9&jn6Sj%IH6
z*fru!ms_o_0QvE`(}Y*$A$aZQr!KCO{5^}wj35y~#_QBQI|SEL%rpx4ax$1X#{E7M
zcbcf_jZYe2(dr3a+h<eIvvr#H4wnMWYqThUcVCb(u)3TH%h34}t?pWW`m3w|{R^qj
zzJI8k?kz+;+DWA%h@miyAyM<rPrtX62f+5-N*GK1_*Cu1%r{=nkhj#4r#4*U{!me5
zfJyp(^c&kXegVDuLF$c9a9zC`C%*@G9!~R&VBI(uT0|_0b~>qW0eo41sMZ^Z=r=I2
zUE_7rW?<NLgPQ7D41;$@_n(C2C+R<U;!={9Tg$3C^k@&<V`VizOCCru(?TW}LZx|c
zbr|8BKDguL&5BJmX>b!sN^G>8+*D&jd{}zvQr1gd&$6Z>#{}Lb=nO}24<k5Q#prR#
zwsGfOniU(6fs^~zrB}ENfLAD#V6t}&<J<T+oWwr2?<nnzkYBb&pEkWmK-s12964~v
zsC!2TQsEly&^8+vxNymi=!-ZhY5`rnI|77DFTu&L^dIU|ajy^R7Oy>lFcpF<{WR)s
zCCp);vBjh(0Bb_kTe^&<5GVw+3@e2vSQmQ$xJ<oaXhx7DSQ;I`cKOVn|5|+_ylc&4
z3KKJ6^*#Oe`75J&euyYl?=fHT@KYqhl=qD6DzIO$+!(iJ1AZgeW>Ig|-%7SWK06Pi
zXfsY-%@%pOR|!@q?}Qt#(rAyF^57nW{rr`v8K|5o#WxesPGRbayMe`_hxe-~$RWRV
zj;;I--UT+-A47<ox+g96c~g6NpraqmL1ja&J*LLf$s^sy*3<9tz*WjbvM?c?I$gWj
z*rzBsu{oG}YM((sMsFSp-hDK&>*G7gUKxZ^Us&mNa)`4+d>adfdK}HRo35y|7*aQt
zs}>g*?VxOS?iXEgnmtWkZ(vy`-%d%j5%_65J+~~hb`Mu6vZLeZjx|^{Ik%4}Xdx^(
zHCOh&@T)UP5co=2zEeEH!HsE@;YxJN9(F{>iM)5#JvVo4c(R>qgWaug^N4D+HO_=3
zZ#OB9@KzTZxe^M|IDmR+25xQ)T`WW{HmGO6Oe)_{b)8&q?4)L@gY(^Q(FWhgf2a~u
z1P;_F@UWD8zvGCe?O6`ck2_%MbBZ*Y4=y3FCo8BsKi!A`+hjd(tqV&G@${(L?Q+)`
zD>8}tEE{5hhrbCZewlGEkWyW#Jc=vQz|C5<76R$moY&ISX@607;ggBx*~z1ZEr~2k
z{)M}xvspXOaZk<OT&S>e+#tRmacO!v?5Syg;j{BdPk&r*cBNb{;S(2A76xQqhLK!8
z)E}>u_vx@l-yUgt`N~d0u@y5$!(_QOFxKVSzAt0r#m(e8n(nYVd~~~NcXzVRsocrb
zgTKTM4!6h{xMS=1smEy+X9==ddUNNdD0V=)fUPxF!L^a65qS&KW2>t+5{7xMhd*~E
zt*QakOT{}Y&R!F~>@rfNm_8W|9AUC*RKBC2HsXo$=0CiC$4<4iZOM&zfAM^)cSB9A
z@H@10ajWEHa(6B*zqCQXQxOS)YS*=aixp$W<`-$Cc0MO8nxyM^hwgYsWbk0n9wioK
z_B#^KR)5vC`KbgKdiqB>?aB7_HP<l3**M)GCCv9*F0zjA+qwGrYwYJmS|Hl@J2$|u
zd6)r5_2GpnBJTmBol6__8U{oUiswKoL7QQ4Af1F{8~@D|@_r{MPS?6S0Mf9@#x@eo
zW;nF}794F;7gS_E{-}x7GJhgFW}TFp;hx=@uNM<S0z^t@+vnBNvnX2FlMfVJAX?7X
zCH|mzhL&^J<TZpAF+pDK;2T%P6j}-bYTXJC5#^zT43BIKB4chYXgGt*fieeU;S3v?
zUVa)f6(_Pg;yn8#+K;Ulu6_#^<%Z?7&_+I5=K?Eo&ZcSfXdRkxBtgZ;Dy?cc><T)m
zadp@6d?IpBII1j7uL}4u)_B!k7$$5XzB3{?68evri;Uu-a-j`tPUzPyU<#3H&o7<a
zfMSDaHA@Z9!#;c?tsaak(_7>6hAcm#jwl66_zoKbvrc0aS2R?gQexYfmN+!^zF=vy
z3eui^MZao^G$E7k@WL7%l_TK<od^w}5g&<-H64BMk~QwKOx}T-S~YO!G|;GFdNIAj
zwRmrN>1Ini%F4?xVMugaMh0Wd_g%({yF^8{z;sZJf6AwdiH4=tP}(O=58jRN?FHvF
z6v=;mX~FR@@=7&oq=m_Jg1=@e4)4rx4Q8J0)!z#acS$V~5_)*pdihR%udvZbyXpP4
ziZP77iNttec*lZeh@DX(2#Ff8P0qKfcTV(@yc8i|+$*dFs>3`b1WyQzhd&u6#YlV;
zY57)|{@(raONH46gKeO-@;CCn6fIWipn?km(3Pa^l%DT?Z!3}c)!k_#B5hcxMyb|L
z5+sJ9#Y4Udp;YWz<i_nhqY~?pLqEnki{?QTl(q9~AB_yb6&fCDw~)!Qafw8=h}ye-
zz9mr<qrP6|#HE+(FM{}xCN-a<RetC!8`8kb0;MuKs;NHA`iR@jjRNG>c$I8rcxGy+
zZ(GeV^UnxZ{Z_A6Ai4Dmv3O_EdMBvJ6E>Z<Co<xWpux|rLRMuo>MXeuU$*bSlJnBk
zA5^Ny<473-byUlHS%nLtD?$Ux5zYIqn&}l$Oc$wDI*K5QVUl8M>iS4u^#-JG@?_^;
zM|lq!p$pO6K0JeXs!^8>oKgWkWGc%puiTVv2CZ#%l}cDloNfa=T~r3z9Qws7QwBQe
zR8SPaKD;IQ#>?Qf#`XMHJKL=7?SfBmckrZdt@HY9%scLuag<U?RJ)93S$9YTZwn>e
z;Al@=)t#&DU-)D`zlWrB`Cvn3!Y|P<I+O{Naz$>Fh`9G3y3)OaTskl=r`sKU#{8d)
z*#E56UAzdZ`d)=^&7nQ4vP*v~9Inn9MavLxqU!%;5J^U<)WRNkDVn=(p2>ur>5(&~
zF#A1%(r|iK0&;T&84x?nFx19g2y3P1NYC^Lan1-T!Ul*SNfWGF$mYf}H}8k_Y%$GV
z*%R^G#F^ASkr366+yRj1FdP$#aC6*1>}8+p4bG8tsMDe7RUfa_7$2U2q!l@V9+p>m
zc4rtKDu+w?x6c?Tl(&024*b?4ov}vXZTB_tlDi4B#@Au%ZxWrK<%3`iLzTV#9eeCs
zm$Zp}_N^0}8hn!~52|Vf(p-5960NHr=S_6ziN4L7%4tyN7Tn6OjeF*}kxZG6sww_c
z9-vWO(YpFxW+F`7v#p28Q^d`w7m}6d&zNmY^+Y|JGeCO`6TSa=iA5Z~z{@Kgss6G(
z!d!rMa+E$BZaKjF8tq`XYB{7-v|YT4jP~@wOuR_FJyaa${BDVOp)$E9%z0M{WvI(d
z!=kpTZJA|*bB=zMQ8j26yVJyzmh5uTweoVyYv?kc)Z)O~fW(biymO~*-uOH%Pk|Ty
zfE9()(v;{e_ZThTWqg~Em~vztlE=Y0X&yk|adG7KO3{v6`y6|T(w_6{XkkoUE}oK6
z@|`qCoVlx4uWs|D$=rF~b*FWcVrC|pV2gQM4l!w$zhTM$xFu7Jl-}Wahsq7hx(7ai
zn{8eSYy=wd`RH(wh3XcL*azLOM>^=WbGn`!)UNANVcxjK`<waJq))t9(Uq#ZU*PHj
zJKeJSl^(=fZDB9me(F1a?`eyjxu60C)kHx#w&jhrv`Q*L>a3Dj9>gf`xPd_Dt`yBq
zK@)b;u~GDYkjq!!D{PEk#<x67Ok-#*BtHrTgsuh|M+Xj^56-V0x3Qumb#8tj<S=u+
zM2XU1eE^QFR9=qY$MMW9xR!@DOV_(N?066(YPAXG*>nYebX+yrgEu31X66SAUxGPc
z!F9SdjwS0CdkeyDg`C^PRZckl(A9O)g!sW+nhdcXu0VYrcfZQfw+GbdK%Qm%(>nAP
z5pqcj2YEHExjc04b;doLXx7P*_jRW-Mlh!iEsv<x#r%RclW!t)*01KJ39rblIcX={
z-NW>Yyj?3RWHdXMK5(a5dJtbYvVlG5u}|gPigIKMQEdfdmyiqwtGk+qPv`5zt3e!E
ze_u?eLK{ftFY$K=mT|l7vM_}dUF&sk97h3_XdItcM-sV9mrG@61EQix!1zADG^?*d
zZ}7T6S76F5)sl4w;o&%*#EQ`DjSJi&a;=bkYt7qSdeYpm@{iRIn#jx_9a*QUUN>@X
ztr)#2EtfOYZqTx?8?#nb>sz!l#kfSsrNO4Ea^xd3*YH$`S-sidmDe^SGZ0v?MOInw
zQm<)_?3f;k*V&`xQi~;Z5)X!pWe=?GDkh2kO1@cRk5=PG%8U;a`OAn*MP=9tYN|tH
zjNp-Y%cAwUs@~6&R5dLg)d4ScYb>Xa;()yWPg$u2th}rb;X>cM?rWE45#7tU7~fc^
zLS$a4k}tm7moZviI&`!$SJ^6AV8IOQ{BqqiM_60o3iMtkIv{;58HRL=oR$4M$^O3_
zXbDH3GNs<i#<0rf>g0yX^+SIO8=G@m6s#JX(R1bUbLQ#TF`b?{+8P;L5gNzSg>xDR
z2U95X=RKxee|*T97k{@5`Rb_DGxkm)sxA&UIM&Giqu4kMfAc?X(0D2gpU7dzxf3B$
zs+TX9?ou=_wWnZSf0#l%OCIQ<T5gSXNFB0rk7~wQ=0ELpSH-Nd6W*@}e@*mMPQ8ue
z1AKO|ZJpk^*P2@8o=hcw0KW(Wpx93=30vxp#q*%|{M+16Tw#I%m#=jMmKTUc8|k7Q
z`ZKoyX7vjT4Q;Li%qqPc60oQuSmWAT?Y$%bKICb9cDs1B30R{qiBrnd3k^7uXdGHC
zfJreYkXOk3y+%Oa4WK7gfC*n$Evu(kC{R>sxeRBmW-hrLW58|pwU?5q0}?dD;K$^G
z=Ti!ZRUupS3B(me#`orEHO&CiEqy$x#_mhRm+_h3N9m-D0Cn{onH(6?65*?F$Tptv
z70(RwM8-+RCiY}gbUKfs@wR+el|btj0V+do=gluxU?ytBsKK(u{DWZ;-5|v~C8LS^
zC20kv>6Z8L&Uc3n!A)HJy74;9TC=oNEYWLTZ${RLG3=Q^2CM66&Lplyo~j0ci7k#j
ziI0<c^xkOD7&=2NeBb=fY=stH3P=0&CTFqea$KR4abFOTD_k>EzgzH?v=;808FpTe
zv#bt>RxdBN&YBn?`qVTPOzBF#-_4s50B48TjrqnMO5Ikyn$TKxSFDiAxU_T)SEJM*
zncdM?{z5%dO2rDuroV3X7n<uHpkEwmA12yoCF0EMRf~hOM)J7Fsn2n~(suq}q<%+v
zBVy)B%OE(KN`NaiUa?HsgXvc@n||p{lv%%BCO#}vlNXL6gC`CytKwoZuq}RUHAH{D
zqxJTr*oses+U245UCZUv54D5Ny#BiKQRSSS0?3HRq@B-2cEyWhRC!o<*f<b=f53We
zc0L^d<^cov#WqQTVM=LBqp1c9VYSh?r!H>&N1X}FT!A;{(j4H#ds=6%cZUKXy<>>I
z(5N^cd6TBdQOa`6fT7d?6AP7Nn%C5_?CBlM(3sEAmrO8~PBINKSK|@6Fmu<GtK6&*
zhmKwoG+WIuFpKMCVnW##;iP<_anXfm+$|q0mL@&nllEMhH(8EUdbBx~#iAe3WX=%`
zbKy={?|YB{lI$kWtr!bVy=VGe()mH=D^g4#D7KkB7J2JR4R6tlmI&}oDv>(kIq;D#
z6(P{escJ9%hDC6$>c9o=)xx#cGZIdd2+>(|cx#a>sNL}L2w8N#Sy6(iN5U`+)}%W#
z@2^kBQ>yEH75&DPr6?y{J!r3DMicDDoy$$05KzN8<Xe5RoBfMc;-B6pp7V{@8~3Ql
z5>-zI`%#PLw=N{DsdcynjA=7hqx#Wm`*`L;K5%|ATVBu^T|<ElPLa@NvPX@Df5?n|
z5&iC4JT6%ZbX7j?`R!>@2gcA>6~BK4AT{eVjn1dc8X72}bH+86+!{I<T1b&LP%yYW
z7g$Me+keMt1F<nFkCU#KS$Xg_2SlVzd8BQR4~2_5xogtE*YU88LNlZKZt4sj#=_m&
zW<CiXt0sHm1Lx1_CeW{Q4W>4WoKJ?y=HO=@xf~gjGXw=>Sy$h%EXPvR?2_%lU5J@q
zZYhXYe}a6lrqsi1d8Zmp89=ojr<>^kwtZuzRhHjrBU$jxXlorM`GhwvKXTX$o!)z7
zUti7YqN!Kj%L()$x2empwNYSgkm!835o(J%k`rh6P<M_;1#xj}Emg>}T`Y(&{@_|_
z+T9y+=bS?b1Usb`3-J9T{>^l?`faCz7P0-R&y1J;R$lX)y+NpAO6Cm1%k*vUg2(9W
z#kSEN(WgE|K`|HSMn|d@z%|tCUtSe-cyrA_7sU&cbk?<PQnLXNtf^^${KkXM9955R
zQh5t2E{^CFi$Qy@#2A>@ytwR^U;Tb?DqZg}G{ZCk9woW6vpL73J~}hS)P}dskq*Z}
zfzpkz<~fXonu+r@*&q*D#No>FRB?;cb+JqR9b{mimMrZ7W9b}t3!b{33**_(G*nmT
z0Yq{cgGNY)?MfG2o;(hbmRV-0BVaJm9yu~~*Z=@>J~lU93=_(JRpdSPnNg2{i**OJ
z2ERqgJm{*7n^ei@j9?z5Gzmd<wRW*s`$|Fk)GSBvb`<NdAdr~7q2;%lQh5}Px8Wg)
z;dmMgKNuv3yToa(NmVyC<MBpkBbocmdwV^72+TeBNkn1wWC>kfSyL3}-;KC@{BImL
zR-rBPDAozFo+uwfsgz4I&YK3UJOr?HiZ!B{UG%`@Dm8<wv~b~EBEhi^9@lADC5PM~
z@)qrPnb`o$7H~k_ty@!}LF0trt#MA-L$pI<L1(HaR_}GI=MxP%YWfOHhslE2xO_tN
zbT-YE_|_*?_sx3=BQqeIFv4W}O&`n4YUe#YntjgryTQ!?emlgeTNrfdOBU?b-#`aG
zNKK5FfZ@{Fs&aT#WFnK+wNX{>kU{f2GoZmyUF6mmn7a55F_fd>QM)42-<qCzSD~%|
z9F#+YS3r1<sv!Vmx#Ll?okbejZufd0KUWzMQdloh8MY8GY4LQFmr{>>zpUJtRAeCh
zfVa10BJV(y8dp=Lep^b_%4Lyjq5^zPl#{HU!O`d_spm;Zukpd4r$yk3DQq&aX#>U;
zUlHP#5`O4X*Dy0kF6a@h?p0;Ou@~4rS<|#KT)hMh_F()w$viMMbNR;{rPL&3u$8EV
z^BYaBrf~+AZAoQ9X_22yuxyD8W~d~cqD)oi6;22Yd9ztvt?pe7&E8K+H$$w@k3R}q
znA}tn?SHhRK4cl#{II+NLC?4>RH34{&RSuAU#nrcXG0aT)8B#mR+pVKI9S&A5e}dt
z2)}SpApk0uby8-OOY-6BtULzJZ>$FJPfOOjtwFD<4G9o4eGU_)t00;2#Yv72akR$4
zOJnO>TS5$FLlF=`j>EHF^{+KHFm7lWl@80Hw)I&*F@ps6jTa*VzAFgFD(pqH*eaJ{
zbZ=S!++ttn!$)(w*FoQU1cCFH9}7y@RvI(3smo<jpN&}h;BqbZ(3IEWZlM>akM@D=
z9fb|pjAM%-6^aXDSUW$fy30OOzgDOpVsMTK;^*H=`@$ag0OcY!HuyEEB18bGYl}fY
z)up;fg%f=Ra?{kRrF@vMvS+$XXL>zThcEgxoKYkCzs@Q7GY|-s$_kyTaffE!2_RB<
z50Xi@bcPn*y8gw6FfK6qHJXAReh<>OF9uf7SobV+o1dE~bbB<+C=9-0u`PA_3wsk1
z5#Dikd{-@8;`EI9{5n6(Cd^qv^AOkuXb(&gOsKoDCbV0*vXwSbq%m3O^Lm%;wURmt
zW{u!ch$8*6e?{z{djWLTK}1AYWh@I?8dI?oF_0e~iu+1zBd`{CE-N0iZkb<}r+<s9
z3N9MA-7>C6fNZNn>WvneOtQmpMbjBOustY;p=LpnWcpZ7oTo0X&h?BQC_|>D_=C;Z
zpuh8Xw+6=2j!dIw#A^)gN^}BHMQ-V_yn+Jaf$o)26&CK_YPX8t;x(n6zAz^{+uEt?
z)d>dY-|n)V^&Yuue?Rmn&uP5N(>z0a-7K7dt;%1nG#NA4kd6O?&!ri9n3|Ui2GLr^
zE<VSgQ#rUxH=MEG=am0$EQY-Uru{i?J(tNHJQ(pT<ni!F%eoW7(Sp=yJjO1}8||?v
zQ0NM@j9Ol~H#G;J9yOHCnTrAY*v%#sqCQUN8>OPMR9(eP)Cl@h0%Cjl@V5rc1{el5
z>rO!!#O;((vGF7{_@Pvgp4FL=iHgK)3zbQgpU6y6q$D*BMPD9sI6ymHz%FSG`Yun?
z)o+M5bF;<!L#mJRCUVIc%xW5LNWage!Ix@`#plpq83>o$GZ@R$JVc%ikeR}t7_+%*
zBR@80=NRB^2Z(@JV0jGP9^y8Xhs1m%RJ+nJkQgUhXK=nXMP$u4YLreDz77=k0(+eE
z+thXhl_0&>XxY-{IrA*?9plW%sR@Ww(|kEs(<*E<sX5(ub|9m;{yjST%;C-3^MH69
zKXkZGP9~c_XGLw{l|)iIaJ~Iv<RsoaWvO{~xW<r54s0%_e#yyI$D4p!;RKF|)Wu<w
zQm=U-<XzzDJ~-c4H<CAlFS-9-lJQJd@FJ3uj<f1&Svz|qxASkh4Qa(-ip{D|h+b|o
z9%t)zD_{bVtG{lPTYbCPp(NjH{o>zHuC4p&Giu-v9_ObC>N>mG*1qAU5WQy6-4&ky
z$JuuWG?i_8kEkFh;sg+tW}#P+-cbZ8(wlT?(m{F+h=2@LX+c^50qF_7Lo5^l=_T|i
zy+=wCNC<p~nYs7ftKRp#-+v^)KKrb;SNW~AyQf6bg?^_cBnPC^>?(va20$!BEREZ-
z6q0quFoT;Y{+&i=`kdBA348d=Ow<D!5jR`PaMD0h5xW^)l$K>$`sY#pG56-G-Z-1i
z%s0M@gKx+0S+p)DA*CcYi>HShnkj{<8Y$JIRe*D6#iX~k6iZ;cbvymxW85Dz^jcS8
z$@DbX)@r2t>kn+O7@g8Wjv4?`#FMW90EC>~XQ^nge%JU#CR{Ya)ku8JEVA0ML3Qt6
zV^!yu)*;MX7Fm%Pla1K}&EL_%g^iSB#45n`6s5t=2<f|(Tly%d#KTLIT|?Pjsa2uc
zPFoXCih6byz^e|kbgjFiR<KFnX!7mdD<TPL>SOYI>78{uXl}z41DnlzE?>b-g5i4{
z%-#?zJC(;n*itfL(v>H;NDWL_JYPe&xyKqwvs-i?mv|#N<2)zP1^8k0xL@YsdGa<g
zp}GMZs!#cozkQ*mL2|v2kbY!{k$3;<w5C^MwUOi7V&HARc&iLrTCRLu<h7Vtt#11p
z&D5OOEUnKtp~_PeB5sQX6h4o46uS*+xP6L4DBrr)-ch2nK3&U8k!)@^-%lY_e##Zj
z<@Bz}xP#N;E4R1#xLmngYpnW~#i%6hrn*e=+nUOXxmJ!G8-O+At7#53fGVYnd6&-J
z37?79s!%2s`Mvkekal!HRg#Q80h*E5=TuxC$-ZrgrWd6{F_|NjX$j4xxI3zhQd(|R
z1-WEgwpxXOd9grAx4%qS52dnQe^a?ZH}<H1ctC~<ja}Hy2Wp=SlCIK=Cnin<scMc_
z211rwTSGZK9Or~eo`Ly!=S`+*MxEbS0~m*_MpSGQ#Qm9s3n2;b{eYa<Wa2zS&eg<;
zn*qh$)%mq^{kIS{Y9-+b%m`ueP=S%5z_%<WZgN;pCcpfBL3mW=M>dsa)53V$bYj#I
zQ<Q<!)SD!=axo=~(0EnOtHRL2^7*O9OHVi$8_y9Izp%8!0Q}vt-nf73P(I}o-EN`<
z9BCFs2PDfM3QOo~6*7z5ZRZ8DMKc~Esx82=?if~6cXO4%T~ceyy^*q;m}E4au8jZP
zRR;is1gdLJgJdl}cunDOKnS)pcCMYL-+%^h^3Al<RK=xezu9(v5NgsoqBr@NdU@JC
z9JFMpQnUotQjsOCbOu5`(jB~*C}1@-<KFc_GmqxGXjKoehK7<2vT~XmVwiej14vSM
zbT~*@BaY@*jmvp*0DD<UHx*RYGP3i^dt7eYru{s*caWm<1|30EXPhO0WzkI7$lT)e
z0E(ybqvT{zf~918uEONx6Z*rZxDTX)h<pxRpsPmKz$r)Uf!5j=+sy>KJD=~>q}#^!
zaBMlg_S}i3R40Uefa(utE;o<CQCv<RIyH8x-G=$OD<VIz?KPuaVn%zh1r+oQF7WgK
zMwLZgEKL?Q?KZ(xs}wUn?jKll8Rv2sm|V*4$LQ#X8kqxo6ujH0vH^nHn~U7oRLjJf
zJ{=afs5qVI06__ZYMwEK!))U2Xe5|+9st!9jNd0GTxc|`2%WQ5IF!zJm`MtJi_)U1
z)1TJLEAV!mc{{Enpe~?iA+towFWJtmFt%CcQaRkw%<to_TrvtINo10CCf2vo(KSz}
z3|o?F$4*>aZ)l<JR1V}M#Z+<sw~!^!_-7AsATMmmNoY>rw688@N7@x>vvp0?w0v!N
zmn<dzW1HE32#a}H2FC6ojaKt#i%*9oGqKbmC{1c3=`TBSImuN~E*Y9xgu0vSH77*d
zH)LF9O>S^-K0UxIVle)0o|aa`cg?hYx53v`RnnHT9Xj+lG?jN_+?ex?RsT>UrAwu+
zZiQTY^>oRL#Vb?d5M_T*!HmCpOHwiAh>VE^BCWZ}lCoVXJav*<jmaQM5Qxc}JZPwC
zy<LG^j6%Hrc-XBbSvGE`cP_f>$S8#&_l9dmrMczGlp<R8<v=BA{oust2QK%exF!+a
z8lltcdjg6x^7o-1W>ZA8oSnm^Ge2x)cG7s8D=xmXWBpp0hFi)SiEy7i2EZ6-ZG^Yh
z1%vSbH=XuJ?bpM>vlE1NQoGsQ*YLsu?c&wMwZJ{YCt0!UTpe6a*SCfn${oF;!K73k
ziL7G=K9i`jM(|j&aj~2Eba7vj$ID6KB!W1tDFTdy7!_ku1utDZjgdAl7T*Lwfk9^j
zkM(`aK5tFp5(y1X#~qc3nrjUYc`J6Yz*I&Gf~b>jhh6)TVkO$*S24kI=P9adz6c|y
zSf|2%i*|q%cs`AGNOE9mx<)MP5O98mPu3HAS{G*@I?G$E_a;(vkpTe+wK&ls3=9;7
zQ*VVi%u3eOi}v_2miTF;ts*?M)KS;$nbX58EvUabXw{A?i+HQ=NfwwX*QDVTSc%1p
ztpFITXtCxgA7B=<8BPV@vyCTb?tj4F(~u-g6%MM3C*B4lGaWq7;a`5|gv*tj+ZYfC
zNV)w9^3CdrA#ufYFY8jJ@fOfu+o@budDnZ5$e0@hsg~aSSz7Iy0ni{2OPCz=_{$!r
z-Th*{d~;(}S5!>A_^&Wn_vV-Mm^;;<Pd921D?c`8eR(vv5_?9CJLryKf=9ixO6DWW
zx7=c6sril4nBqr8KGBv$TgmS&mfDSh7Q5N=&Y=<DyT_XR^N7T-^$3e^qoC{4$L4b6
zO+!Dc+GfX}QDF=lQbH=4{|)%gZaupJ0OKS~tJ$?#uTHORDAN9!6#<G0t?Y#{y;Bm$
zFWyg9S5}GV;v>}i3>X(wm%3uR6X8$%dqGq#)Lzra(8n5(huUj1H@k!fNWV*|guI~J
z$DQSFN$-8a4?7CCHgDl4tSpYoQrZ3hQ~>@HfNuH7DNFA~%KW21xl4Wl$*OQ#+xZ!K
zE2;^=D{EEUZIzAX`Wvsl!XAQ?>b$xRnEDBpi?~&05%Km`#2hO<d|?c%;kGTbuFRY6
zDo?wcudZqMahMKuCRwrio5?lK@6XCKo$P?YL*^A`^lpitm&8$CwpE~kaR2*?f5FB7
zbH{5B02X6*jRvUyX0hQH=Ei!zDElbGs43bWpuj|`XBNf&cUNsT^BJZc1}pZ(H<5|Z
z-q{f-bYmSleN)4w8kj~<<J?NA_t`bqDrE;n=DoYOXrTf7omVR5F2(nRT%svF($V$x
zSMit?H=+Gr8qQsRWuBdMisRtrhkrIbWtYDraaQNn*V!jM)31L-Pw}_15_!M5G7b=v
z)dMU)ak6IE6;49G+8-Nexa0s0faI*$FbFx6cs@T+gOd}3!mqENiJ*~CcADZS3ue`S
zsqY%4Z`q?*{fAeU-Va1H&rZ|O`nt;58+`mU{Ku5fGj7ebY+9e-e~wU)1BD>Z3sr{H
znCsFw=sflj`70XJ)O-j1;#E@6p4v^hy8z}SK#fML-b0TTAvmD8Xv{sfvoMpSmZmI`
z$~uW;3u$a;8%Z+i8@niS_wA#KE>00;R<l{X#Sq_{b+d(+p{h3kK-j-+M$Usm7tP5f
zC7myhz5@H6(g5K(cz{NZ4>C>)*R!5cy665Wj4X7YL=UVX7v1vax)&neu>Q3q>-ob^
zRh`}4rnyhqjqBB@bmtgEi|~R`w$cYzyIbZLHc_*5Toh`*rDkuUe6Xt87~+(5B8#Mi
zvlFvqaL%!MIii*T^~D-53R2Vw6^92fj(jh0b`Bv9QYSMcgfP4x+kX<S`kkeql<I1t
zroUt-5c0sYWnI!uHTvq)2H=(D@<|Y5r{c86f3qOe97NNY>z3x!>)-vW^*K9@B+=`e
zq%^yjTtg=fGk_w=o2H@><x~`#YSKG?o)~SwA8Fve9^vSh{GpXFXCtiFT2fxn924WJ
zR4;=Z-V$4spaJ}c?e-(-#JoV$*dy4Ld7;{4@5dS$2UV8!RhzV<{7>Dt9deD@;2~Z=
z$chm#3IOz^3bBERURdQM+IGeNXiY;pKz5F$TSLZYNbf%m>BA2_%ScT;Y31s@^%%k8
zs-gT<nX`8{&7Jk+U31A9@-NE5%GnFrdL)XaN_}tD9m@6e(in5T;0UyNlYl!AS#TE-
z*fox-9$u;{W-j&B-fp#|4bLn|2u?>75@#$m94ZQ0A3dsApMu$z?H>oKsBHvHl|r>k
zi+~6*>4SmOht)vT3lms8FNMkF^zXN8#~0G4u!}&6_>LFhUsA|(w^TY^dPpJB;iKoz
zfc+U(%TDSPfmZj2Bea+AaUvhqV0_46ql+y|E91?$nZW(8+_DDl39j-yxW+Mf?C5+6
z)*9kWOW<$<#Yl=TsxLzIS$IWqmL_h-it<#>%s7D$!{$7qT4(?$S7vusVZEMKp-`hj
zj-wU*zw5$pBx<(?g8$T7H4mMqH~vjg{^|#~deXsnI@a{N7rv(b<#)^^o0-%jMYOGb
zmOPEQz9;+mF-y&X>T~aHHqk=&S0C7dle9OAc(C^&fi__PG$bQ%QPFu{xiuDg_;67s
zzkZ~uH^qVFy!mn0_kukD0%i=*N)z)261AfxfB{Hv?hF0X0RPWU(u@F8tVY~%cl&PR
zanGa&KSEzwCQ2NYed3@<K6aQFrCsMd(k;xXa?V-H(jU)*uxC2EU^rRh^ca9J?lx98
zNxS>JgA=vlmX_zDw@H$*Vsyn|l&w7>P|xl8oO3^x0~4p;rwe#=>@6(3vDWc>X?EW?
zGRqB<>mJe2LZL&RqG##CzB4?)$0UvceanEf@3E)@7f7%pm!cN|kGzCaU-yE3bH1<I
zcI6zo0&6>4&7;ziafOOvOL3r%F4R6xfnx~ZG;r{HC^^3UfLjLk8dmNYAa9ZAzQh%k
z+%xipDvCTRmUhe_Agw35!B<x`-AUWUo+a8H9#8xq%Ki0|o!zN`D3z%%76mWv-mV@w
z4;b(Ks6nRh4j<s7oea<?CWLbAru~jnEWiY#<mp$<eiOXTmC*_{Jr6U#nA2wD853rq
z(G(YTo+1@1IDCJ1y8m5wZ1&ZY?sQ47&v=_lFU+|&@xak$Z&v-2j^Z?(>!TSd?Rwum
z`1-SF8jlauE>d_)_;_nL`Hs`JvMqf7esbC2CyxQ(GKbpdfG2PoRV#eA5#VFmiO<$c
zm~^-KPKti|F^T1|h8B6%ggF~3vY>#a?jL8tzy9&SG@vy0Tga3W`5zzm<0e`Do`R+F
zCSTxt{RF0GjhrmDmXZ?vON$AGEc~g@`CUWb&zm}7q{|5-aqB%lG}Znr-qNg;ODHG=
z)Gxts;?z&1!r^2wOi$0x4yJ#l$DeC!Hp*@Px!922#Oa?b_TTLcBA4^$VwFl0<Ud*L
zKkjTm_3_xi;#iY-_XVbwrQqryWnJ5U>8$*R%C?=|kv%zf+cN+5<NlqGj{p~ecI(Xd
z$C}ZDFS#||l7!+F_Xnl5y~9Qb;o_?AMITtn<P_?}nTsm9|C4bUa_@k#a38=w<HwKR
znVl%`tKK7rRYfSYa^$)2_gey_E##zUeePQ6_g3e})c~H!@rS?^cKJtibe4=03JUh#
z<owka_7B4R<(gkQK^0#J{lN$PLAFoGw!&M!_ou&Acj4#TlQVAqAn~K%|7?F>D1bpy
zlVp^BEGt(~i!Qlw|Guxk{^c2vwh1TvJI8)<di=Rr3W_}`1=e#gzVSa|e-K$<IbPmZ
z4yz^0bKg5=|7pFSQGn|*K&|#LhF{y!&sMl(=M1P?HFmv!^q08!GphVXLVGf(%<|sF
zpAq&KzpJi2nSVgQEU*3l6C@<+q_!w<gY3Vd;dfLqG6rmR84>S+`5DuG?K`>l6NJ+1
zSEv6DH-ApF|9!18ULaN&3nDi7|3vYAv`asI<7`WSx4I8Mo%-$_|BH_yV<#f!h&VEl
z{xv=N8M`$r0N!%3E&TSszw-a~TRF!Q5vwr|hW*5e`0304L?;R#0lcN9cK_|aBq@hh
zc3JnPs+3gkU$SXu6rzprNWc92d`atXqQ;n5@zehuamx70D}HM&yaIouCtxICu>ICD
zZB7wr{VHU!4{o}~d}Z^Mv03gSy?Wf5Wk~hq0`V&b#Yd3DT#4Hxm_5AHb&^@*(;m!E
z_0QntuXp*_9ZJ^8_8D#oY*e9}qT+!jH#{ch`B3)B;XXhY=knj_^}uzKECX5RcSV%<
zVpD8%D?+|qe%T?y;*3beHr%ewDwE0c1;$^P@;tUuG=*z*XOe50o*Nln{}@oh=Rz(q
zc%5XuFpDutht`Akv}r<6#6?fp+}&V3Zqy%KMlv}kwmol9Unh3V0{@FO>VK*b<*o7p
ze>``%q<DMFGrQXkMVmA8n*3_ldCJ7g9C_ZY>sUQikB}oLksrY_v{yx0(O|xP`9qO#
zv8L<kGAD-`3!qMH{sIj&{{`=Xv{r}L=PQyxwd&*ZxQiW?!b9f`jLi9!#--A-wH*jy
z5d~NOVmuL;Si#uUS|>pxqLpu^atj<0^w6%3d#wv@X4d&)>w)y=5eGXtiSJW_|Epop
zgvzne#@MSyAk~zzuqI1~!))rS_(5i2Lsp&H!SRxdXBA5{j05n(h7GDTXzmUUeNCwX
zjvC_RWW?Vc*JWonlv&ru@OcV-25UZc@gMcPcKs#rj8VzO{+BPxTn8lN>LqXWsQ+~v
z-zjvt?)7OgT2iFCo#bZ5mqU1#k1di4ZuO>HJCoFmzN9rm{g_z`421c`^bYw+*ukK`
zQ)+fzAr&NcVdw$kXnzl_$lc_jFcPWD73Ai+d_Qrs{-Z|Qoe9yE_=H`vz2cIWlWg$P
zRdTc8WF*K{27hoP(wi%m?e>S^uO(6q9C{7}J6XtIHl?~zt|d5PGE|sPce?bky9za`
z3{Z|ax)jx4@qGVbNv>&Kzwq&8`0AHpa^o%HbbX{p+><vdw_2m5!Au*g2kB6gB(558
zHM8npB4%Wo4MoUP4Mgu&&!tn~IQh$t8ii*tz%a5^Gi|<ezi7ix9&UFbq<qTnxk{Ex
z1pNvpckt`-_St?7vEy8;*M&o5g}+mKsBCx^{kwq*Il5@7SE)mT9Z903O}c!k{K4XX
z1O5h{T4)|}+eG%<kU<Q}9bHvr{8>VF#ec7ulX1Rjt?nb`L>2iO1_A_x^qgaQJi5&V
zzItxybFNW7G~wOvC&eU?@8T9A(#hqgO0+6G?j!KfYGU$WA1xnYJ#E*^iq*N?BSvTZ
z-LwsD52w?{k_ixkT1Ao3`fz-1$gu1@%&>F}Z*g(tc*6OXb4%9*=BuU^ZHb5per@Ty
zWxZR|EK%ozY{{BC7mo4xaTLb}lcCOFneW|g=62I4dYVOu1B#4D#J`@mr46;D3ydB^
z#f|Qky{-Ow#&&+VSax9npNl7FY%@bM(r|M_40yDGmr>Z(XgBxp;#J}DhEG)>|D2x@
zMMaC|X3{@YwZOfYvBH?)KX#iL2e?vt@r}utL%rNvi)RYRfL0Y-M9YJ+dNex5Jq7Dz
zc!+1p$AD?Dvu0Wr@Xj8JfM9cAx<-?`YVZ|>W$&_$-F>6*mbwk1TgC^OTqpO=-6&()
z6pkSZLZBd76WNS=3~1M?YrWcRll&&xIzxWYvf0)3T-VW~%jHOSV2%j|p|2KQlH$Hy
z4*`HL=w-|HR)Tl$@QU{$zvjcfs!Y#NigEg|AAN1!o8`j|RaB$-W?4<sva<nw=Ll4G
ze_BVNk~>~uHI{e#LVtNr_me8yS){<w(P`PUKtyZu41q~IJiD~>FFu5wc1Wh=lGE%g
zla%4w;wr+gIfJrlnJn|AS=aq!Z4AB|rIMP(<wPP^VVS5LlrLt!eHYpx@)}8+$Hpr-
zdi$D5x;3+-EE{rwbg7Y=x`SJkwN4uK1_7Ut?mx%cr+p~fe%;3k+Nf4verl+IbGCQ5
zL+z!mWy0M3_S-&IrL#qter}jP&mW7<n5HP`F=S<-r_2S;h<>^0lz7%DyO9!Gdv~^$
zM|W$m<23ECN37GGZs90KcTC`U`jx}80FV}#mI7whUB?JyrDsGxbVp6`h0<<Gf4jBV
z>OyaoKaGPhT0Ur}qVeXZ;TCy)%udlnWlQH+x|yYW_^`tHdF_~W@tjG!O;q6GC1<gJ
zwje}rptAJ2S4@tzc-Av^6-Mhc+t}5`)clJ@*Mq5^ii{JUdbFoEG!u@tv|CLrs0I1-
zFBs5p)!EP2(P+>(EI6%e<gN@fIWA%g-{hO39)*JyCy!-k3^pYDw{!v!EWC~-Uq|v{
zdz^61SZ<7@DE^|CMgszb(qi!oR?}+&CMS7Upy7cw&qb!cIvpx`5k4PeTIv9ZZr_Uj
z9rI4=Qc;MUv6WZK?0=eI^;2i8y75_1hSkjNbNrN(vPYTQiBHtJy=DB*QtPR-azh50
zWc4#j%gqxtv`I?_4S=Z}Go+P}JD(o)k^xQDX|lN`hvAmVAoy5N>M=(+Wyh@Y@090|
z#dW258?rPaU2h&$oLa!HAJfx0j#Eu-N=#Qqf<&%JMRxT$*e2>aJ==RiLBD{$SZ`3Q
zF(IEyM!U}K^vxqjJDQN427lRKKY$r?pAz+~JWp%!A#rk8M%`dMFTI5p&bdRIZf!x$
zpt#`prR$|$A6ujoy!<ZOFx)uKz|vfFf(la?#gL0An?)X|Yt<)WH>3r$ES7r=K6hkh
zSUNlJOr_TXokRisY(nGPiAmC<pV-nT);HCSzHOpVii6^TNC9Pfs@QA35c#Ct&1L(^
zr<?@(FH+$xbcwA#oxpe!iY8a5gcNj3z9-x=aez<zP|1Qih(nF*u_z6oQ2(Mv|MATG
z`xhZ;=fjHbpT2f}kK2U(f;NI`s+NeZ-+hu<-OKs1n$GU?+gTD3r4drW6v4QsUYkDf
z529b+gC#=KM%9YOgdfn^QLAn!W0lz~?-S~KDpUf8)g&;Bx(JPUYj>4E1429AD23MZ
zLlaH@>v6D6Vu6c3OKQfOC(`YGHld3rpgFSGJv>bnmkmMO;Sj7abZTqc$~CY1G)2`e
zZ9qbtnm(6}yUL*$))1HPYxxXWVuN8fMcP0e3B~SHemjs!O$)SW-JuD20M%tKZ1x${
zLK5F%!SaM#{Z6pKM!ao65)7i{dARQ`tfwB$F15bZ#;lQeN*UAXv&B5JD@e20?BlGx
zgrhKNP`iJiX+ixMC`W^07CF>-H2at#YM<Zd|B%P~iCZ(EO@fVX&ox?$+BN67eBeBO
zVi$GQt|xb~9kg%0S*H5<cvF`P(-iOMypJLW#(XeP>qR9nmR;`jGwKC~(G;m`0$7b+
z8g{NN{rYz3J-3Q<-5Ju7$>DQ6L2sT$Q%c0uPVJOGlC}@Py;fcuZGIw&AXc_Yi?>6z
z@2Ju?!JKKi0XPBfT9hdbLVH}!r7k0{?TYBtk|{Zqmleitl#O!2YqGY6=44vNl@(By
z=eHOMk7CP!cHeeUS`QkRgJuwxCZVu7FB#5_)e6+Nc#Gzm-mzh&P<v5?1wSU*S0g)x
zc?w!vy({}K)_pdZa|)NfYM1w6t4E&JfP2~!o<4C9)q*XNp#O6K$GE`es`_S>7mc~j
z{#I;NQeDC03!X)_$t`4NZhUvj@T^gH)o0g`92=;6+dE-gs6W_2FxfKi08T~j4X`V`
zvyRpjFeu1%B!w|vr6x9TSt!>xwWxdg(VTnP)S2uR!a8ic)=u~c{A4o_XE9QXfGh?s
zcl3i_aXsjuZoo_!O$PX9d^k!E0S6mSEz`JE^lzV@wuT)gLAbd$JRbY=b-w93yFtbH
zLoMUy)(QIVedEHhPN2bd&sfu;nL&&K|3xm~Pvzz*!m#VpMI)|?u!gUx^X$Dx#~U+b
z8T9R#ciV=_Nc8m6)1l7qURw@Jz8#n2$O#KGBgr|Na0!$UOp(JF2D%=5=^Sf&*sDtp
z_gpK~>NrWQ8Y!=wCUr7-+Q4Ohj9#r?9(5SqV#!p;_bwj~^lfZx^!a$%njR5^Quwz_
z#4xc-<cvfg#j_1NZg<u&GPn8@YI9j-JU53Ul!bG?ph0x1D<6wLOq*g8%3M{ZCub}_
zlU0`GC~O8_nKB||(b?>Ntr-m~Ga;}uWW3d05nnH&f{agkP~w5&HONDHlUdbt-aZng
zD#ia*UCI7c<Lr60$J>yyyt~ZZxXVN--04VlWL$V<8F%AK@y%Y)Rsj3xn-uQFm}Ql5
zm|*{_O)?g<gu;|>-1wl;h(%))N!OIkB2d3dVTw+%wbh%p0`<`Knk(qgqrhMjU2)en
z6z%K+8Q46FLjWb5cJy$Cu)`N=4?-%BD2XYcrtq!Eh{+k7%BGuB_yFZZGkyO_jL%E6
z=vK7(t4A6iI6vCR2Qy_oy*39QUi4}x;AI|tpmRanj$3#P)uW+YpUB6iL<}TXqj}oB
zSXmLP6NzxpesOeUkE^yVmSJIxiyzpPR6s`@O|SO@E$TNw>JyqGK|DFgFTCL*JJ5bp
z!~-sTfMIZ*o%sE^5|D<<3L1uEt=_m^Vc_Jr8O)T}%a7ELuFUVaX5>y3^?b&#+c_Q|
zdF-cG{6P7$s*}iZeB)7TtHIpkei`CKOaJha?#Dh-c=C-)#WD7G{`Twt50m}Jd8eCj
zq{_z2Ki)qVy=#8XQ<`CV+9`Eu$xejsc7vaPA+g5||E_HH%9R*KRD;B&0LGy!<r2o$
zo98&c;$gK<9Sw+!xU5IcTT)za2t&!6>2#yW*Ka$N^pI{|ce1fa|LUrBizT~4M{~9M
zb>VAw^shI&cf~AGP7TGD5xiZ@B(nR$_Hs0-DLXo&J!5rPy(B@d=e=vhF_5u74YO8v
zrH+V0Hc@_N8p_v{oBP{2`@Y_C9dR9SgIdgCjvcaO%dzG;^JfXTz`Uq2`e>Inv}7$5
z|E{8<Y=8TX`fZuF9v2w}#|j7xUk@})Z-4T8tm$)bxh07D!q!#{V|i0Y{IOAGtrGI}
z_AcoL>|mu;X~Z8_be?`0e{szrV{e#MZ8X*k|1{qXHuy=Hlq5c^!WlV!Dl^n(Hr4y+
z*|LfJfy<VE-vcjUdD6@)lK1>qFp?f_FjR{M*&tDiTLRs+OZrwt8@OeVFZQOeBD5Xh
zc7OEs?hDh#wJ6rF;lZq0`NrRFPB^T=yqueP_yd&c6X~Z-_k1E{#+&coW1jX(F;`s-
zilp7@08R0AWKiv<?&9)!hx=D0haNa%?t*=;uEXkUgIT7wBKo|LvaGEo^fK*`T2+?4
zQ(t=mv2-QMiLZm8bJniP1QcuE-o$ZCHMqaPG<``a<Ca^ZzKM<1M9{RJk4W^k%nBUW
zDX`YH9^yW57kXMYu~Ft->*wJu*pUKPn$li&i58$Ml22R>(%FO`dX9SS!a>~9&Rc8j
z+6OsdYzp-~)<sfZU0!XXeAmKDFHd$icoT61_sH`ZFZN50E)++y?H5+Gnztwf&(2~*
zo)jWD2vo@f!Q6-4$9db<k;netM~~Lh-p$Ft9w|aqBlSiXACar=J&iJ}!Zl`siA2n~
z)OY?+<(2+Cw%{7y$;{#-WB2E6B_J!-3}D9p2hB+S3{o?0gJ)-wyIXLd8UAlaC41)#
zHi=V~O-sj5M<dP6(C}e{=cugp3;H#l#hi_;h{=3Vz;81<eqUxPHx`Hc94GmeI|?ki
z^Gl9ht_Gx!%P((N>`7I?w#ZgY2@CJ{`c{`k{g~PIu9`yQnmjS6h1ChpzSpx6lU7J(
zS#z9Bs>9UKBE9r+6QvQD%u6_`U(LM+{~II-?UtfExsG;^_O6XdACxj-E`Ml@ZS@?j
zxwDblhml-<$~2Th?QQ6%xaxGY9afN@KD#Hxu2=RYE@gx=7Chfv<bl2DEwVu7{}}0w
zqw?lSRxqiPc|24t87q_hy7t03VJNFBEm*9)M0d2)>@*&=|HxEwY|JZ*k>L$huFOaa
zf4d@B4<qmaoVyhln_7NDY+DiCdMadXK7U=dCZN&s>^(yG^D#sdp^bSeE~m3@o`2@0
z=X$IE)EL=qI9i6UUC}FaV(?7q11}$3>_+Q&4H7+LI~B6Ka0x6X<Qy4BGG9{qOlBIT
z*GQI7VLDZj32V&bGUauajb%>WX)lHwA}P|Qs^xKC1zz#d7Vv&Gxg$GO>k5&-(;8eF
z5}HPGYW>C5`B$gFot~!8wx}jUBo`Mg8jkVb0&n!JJhLI%``UfxS3Gndr9Dp<j71(G
zU53m)QDZ|ZFeo|K1K=u9e|qp5UeE*5CWdVNS_#g$-;u?R6lkC2>BwLYV5>1%YwUg?
zV$-NmpqM2>+!lW~43m{l@tZs>4t}KwzGA|R<@X*WOY84_s!dcp5|jpex};gZf=SI)
zdECkpapu}1WFuy?XiN-6MG1R{wbl8m>TVbPltlK>iNp+2zs#Ezp>3+$wZHEa*KSqT
zG$mLXQ=2a-t=Kl;SZ5Po-xy5EpIBZvsCK6cj?@*hf6Q)>8&o+QTtyZCn(M3AD1y@^
z{!@(!3t4^Q!^1d)#x>z03woqF5nP?#HVmtMf~>7F<hW%U2#!_B;26wFrr=D|UEoC|
zytwR|TX2*=f~g|X!n}Kx_tV<UO`M(d3!k-K{k0Y}&8sBq-9k-S7VsF(f(#o&!%v}$
zQ{B5Z#$r`!z5^!orZ1u5goVg{R`ui0aBRm;-XWyI!o;dQE*hB#V2!0$80hc8Z0b#r
zSC%C*96i^9kZ?lA=>0}Q{$%;XP!=W6d{65D;vEPN)0~RDl5qV}vl9Nf&6rHYyofUR
z5jH?*@l)(8V*CUqX(M+-0$Z9wo4k%Tn$EX9Z7o(jKz=da?h|BGn?m2Z1|(_E8zu$i
zy)=T;_^MKF3NvT+*|Z?<IkaKTd`@>zL9<pPu6<V4Vio$h+9d5o)8|)FGS{JMgDj9;
zRu`P->}MVRe20o9DKj={May|#9thGq(y5zFCbJkkS(_pPeuOeTx|J+CL&;fdph^%1
z(G`rFe|iGuASPa5-ygR<CJenTA>?uUXB<I2j?cqUb7<anU*<g|VZNq2AGT%ee{tac
ztD49ZXr|1ltnyS46x*A>#Zt(#w=uk?3~~mgdDbX?fUn47x2(@0WPmFNUnB>}^r_In
zm8?P?J6R^ivO2e#m-k!!cJP_|-hBL3dAlDCnX;7yalUva9xwL0`QwUPst1}}4tksl
z=wkz^#8F&08`N6o=tkz=vVO1z;?{O;SSuaJx%#8qq(3P)|1^xtc0V)69ZBQ#A)Lva
zui3-vR27r;p&bXO-dm)}&Y3cq``kU#o-I$rA3wV2peDu#)xB9_Qs_DNy05h`b2Yux
zs-P}cKf1KAYW!+v?c!Qv6fcSC)b@I^o<-w2gnT75=W28eJF3#D9~n2Z>kZTDjCBpb
zr9Re;>oIv#TVt~GU@zpucnzyb?z+EMUJb!gnLf*w$rL7j#f<MNi$({&gK<Pbai#KG
z#zz6_EUL<@<}my4n7wCaP%PaM%*`gymS=(*g~Swl^+fgcJ~_n7J+&DaKaRn~zPyDq
zR>>wvRe<}zuT)<#;zfM<(zio}%gSntng$z!1?KntnsMG`!A^&k(q|f#-1A`j`@8)v
zPZpEL8D~B&w|ck8272e&HL6YuYvp&>Ad;wcql9gR2T@)XC?8-FWV%|V4&2o>OFgK_
zxM_cHSnp;fFJ5lDeZsrX3gHx#wNF$ZBg+y5=4rl-Jhw&mjTpz-abiQ5W#LX!ipmeb
z12d*jAA`zqZ;;lssi>Qt&?%eRHgA~7jFkMGSmcAb?ZW1@)7X5YD2v5Rf|UJ!e3<C!
zyMTVr(N|V1Lh-yvcgKrlhy4X;by#idZFlvo9<<By>!ZWsV;3LZ^m^~h{=t(-{b3h-
z_iPlx)Sk-H*5+IomsQ$l(Eoj8o}_j;%tM~C1m5GgR2sdaazVRP4taV@znSTxXU)0_
zr6X=4b0X@I_d?rKAzkKI1<jeGT%Mz$;|&X4K48lG*5-jtf|eU5`p$M#Q|!nWAG{d9
z!qzW_&B4FIDyH(8o?`X9YR*bJ6Kc#~j*w{!Lh^f%fdO)bSf0%*#hcdx^c<0ng?vYo
zpqi4krrMOUxQ?Zy&Hdqzh=U{RDkRmD<_Z??jMQ&Wkd7W6NJpvd>+|7nS8fvz-$q-f
zDBkUkIiChg3=yW#errDN8F8Uh>(!n4=N;_UkJ0iaev-(TU9#2Cs>EH9Y}@PtzAJ@P
zFPY}-`;5KD{9^F~xNSfB0l|TorLXwg>#5Ks8F;tGZoy72j$87uT}?1e)M<V9yh`&k
zoqUNq6`WRT23H<kMvtb?`{2ZfNwR7^)qbC7(6qENC{Qz<%C9w~@Gvvb+zSba-3G1Y
z!;ShE#ru9ALK*Mf`05mm9Su(ROn$DLFN%iiT0Kb=x`Y+qKb<9lU<23702i4G%#yX&
z%LeVDdqlug0X^hH<so$fosB-z<~`d%U7l$RUe;23G;mcha_%&fHC*dZ5Lm0(W5|&s
zEZN6JRQcmwP=vovJ#fK>vl_{oau;@4x6_QX;-2Lz{ke9c$FPB-KPTR=G$CqgCKdMp
z2qw<$DPB*Z5@`{q>ipH*8;#PJqVr^Qo~+O9=`Q%g<x-gFu?<Ua+rt^mL27&Hb;1-n
zY3aFw*6)+I-lI2WEdi@^=bVyR`r$@ifWFMf-=H%IuXj@0;18|4Vp(6mo-^6RSz8$w
zBrKM&`H7QrwTK6SvvzsE+4qd(QL!{p>+V_X`A3$5$OI{&%S5n%Em&mNE|Qvc&x|VZ
z(}V4RY_XIxv}y|j!5+j8HyB(L&2M`F?WoUf=|`1-2W8blMQG>2UzXF*gyG=>x-~Ji
z>K*QY1L38CB{kjs6owKjXuL&CWTpB1F$+X>n%^zDO(;@Vf!btmSs1l=<LWm!>~6Hp
zqJ#}@$-Xk58-CF803!ik-AW~NM2IuJA&&#*!1;W5Vt%c+-;IgbLU@`*zS5F&eZcV)
z@zVXDT)*>UycPmIzoJv)-A~2ebZeWV$+$ZV_?wU+x`8&D1LS^jtxb=s?~xCEHROmE
zH$jV^*jor0(U1}x2tM{oe)XJ9$iFYuxn|sB?99t7<vs7S<cI@GjJ?UiFa)NIxDkB6
z@#5L>y5b0{CxolUsNmES2_s^TtSeRPP??1gm^qBlfoJ$;YClY?&6hHetaB&%{m|XS
zHX$Lc`!8KnzCjZ6zFv2=Vp^L;CAL@Zn6<J6s3nSxHOa5F5L<{j{jXQkJZ(gM1_=v?
zloKh6%=P@mB(+2kdkcDk;_>6C@Jqmz(LlWV6YSAHYpH*yPc&g9#$9dL`0;y|yZa?U
zgpwdhco03N1oTpAnmxxkHA?0WkEk{|gH8kMfW{K2iw=Su&!G`Pmv<CXqQ*Qcp?C9o
zMQq8;#K^&jIISPp_#Q$5X}Wip4ZJeKiTl(+63G=Q&)Ui<aMv>Feofm05I#FGNEPh`
znh2zK4z75$`d2|}hxKt6h^vLZ=V-dNtYi|xcT640X{fxme9E;bg%lA{+d`bioZ3<e
zY$_<R-oKEHd~4ZvAl0y=>QV2n#^<L#cPg?7lp2Ks4-ry&m(^`I)NOPBfXCm2a*q~8
z0dW`=j{P-7VZ<i>YU`zTbdbX0{othih&FN%-e-V?9ML+Ip{X7}2?3#A?TcAxY;yrL
z?^DTa4i=RF3l(i$B*&p%^|d;gRc-scoHvtJm@-fm%Q}l6;@D{YL^1BNo0>}qru#Ob
zMolt79#*-o_~R`#Kk*2-j2_p2KDt8=F7HC0%D6EEt4M7V0O6l4je-z~cJWGjwTX<u
z*p0)`kt#LKiAVUO4v#~4-d0fkim^6wT}kg^3gb2P22&9xcdi`?mANcfqf0BR)~#*V
z$THSun^GY=yPcj1kdNx2u4Tym;vr@pZBxn3Ke^t1xqP!-OZlN8ts$l$B^{}Xb9JtO
zmufDrZ!%=g6B86WyA_rbzGQna<Q}`xs>wZCDoz=CQn3_R`4B%uLygRPCCXwJjcC1!
zGUI?733XW)yI6B%9c&;2wj(tHir->%Hdhpyc8}KDFP)XG2GT~Y&9W0RR2wNKWAul9
zGGtjdoT>2D%R_lx97Wv}6FHGw*FLhg&P8FYCG{q>yjM`d0dLndzhGO-hd5X1E>CmC
zbBQY-w-TGef>PhBM4I&2iGRT`TpT*unmcM(<-Uc(=yulz<%HGI%%RB8;)Mp!<Z6$v
z>-~0C=O^&EA56hJ1379QzW6IEO-O_mVeNF(10{x=+ZAEvgApMVrv3pJ5%a>S_Y-yY
zED9bfyp+|pOlUg-6)~CZNWS#!cjW94-|>Hc;p8&Bq$WArw((K%L+{$ymiw)C#I@wF
zqe#qrTN1n*T-N<{b}e*r>rB`z0~+@Y{pjj(G6U>ElRvdCL?zwKmJ?$6*$~x^*iLPh
z*(l3mMaMr{tU}pen{%o+BL>}>rgE1w#h9&d@l4Qwp9syIK9o>&oi(2@kvGSjcso~c
z?8)X{#f(sI^cpR6M%(~}5x<AB+Cmx1nC^^KKT>SuWs+FY2azeQpqM3<e0!LNvLmm+
z8@(SUZrZ^7TSb^L7e);9k=<TSD-D-j^>D{`=Zt3=4M#*@$rK%R>@va3-LsVy{*~b7
zLuN2?t~95-;~?;Lt>ThGy>22;kzYc#wsu`;^e-4;4uKFI;C`#scHz~J8U)57kAKVp
z;8)t<+P2{D;H;4sKInjt57`}Tz7hdzZgNI;f#Ax2*RVesy{p~i7ZR{pje@M35ip;G
z4VfRMG0srUV;3j_JepDRc<P0lTfJZJDXO5nq=>S(5!E827;#rlYNn0U>e+%8R?o$T
zhwbU#a*0G4odYm*nrHY6jEKMPSFA^q;nLz4EN+!F=lGX%;6RT(1>3dXb-cEW9FG=3
zggti-xIO#A{b58~0jMZp#=!>VORn}Uzz_&LP*<-RC~w@#K}@BwOe-WlnkMH$c%gZ|
zcng$e*ssvBWi05<!83RK8{4v2r~Bg_5o9b|37_(vGURQE;6>uhl6un0`CJA*L}?@6
ztBDmf2p8};m%(<wdJoA$%bT_!Y<dsnkwigvPIvz>^_MO|DLNyP!WgVnv1@35T+~KG
z4t}@lOTIaE^DFPVN~p|2F6#0W-6Fa8s{OWa+cL=>eMBY^@1LlV^-xVzg{-D9<qFrf
zFuCx~W$;s-PMl2A!lpr?zW@(~*JjKKgeNIq?z-L7D*F?DVeh@^eD1NB%%G!ZCEeG>
zS&5$K5lHk6V%0)5rFaH@Sq6-`F{;hm{(9Fst83lCV}hk%C8x1FqpqI$nm1OOjUF#o
z(UieSOS}M48|?Q_HUg|S#~9Hb*ATgWP<<f0YKskoDBKx$=zm5~nenrmsp_dKM((w?
zb%M%3yR3WY^96R5T>+r946z4ucO#vSp!->SO>!TM$8aJvMP?Vcak{{;u+5c?CS4Ji
zcufP>!6SbYK80X18xL8}&&-4ifryf_aYdNUvW=i|1kmrF61sw5JuhX&z=ClKmJu@i
zO!Sbszs4;!uT`db3Z%=m*CFWqe^>BAs!8yh&3MfmoCTt=T3B_1)2*iMLSzHzIB7qm
zO&t_Ob#v?bW)PK!ipTpNdiNKIHml<P38DD52{;^IRxklkU-c~xlSvYif1_qu)CLCO
zW4{I`euHG@e@&n5xH)>>UMr9D=`~;@+1h6|@@pfHZca`}cuyXj4pOjGncZrFsh7VK
zoIF>QwP~-aHzVi0f*{Pg7u4m1tpTifBC`wbE$D?SCk7xkt8ym`)DSsAFABi*Bf?!A
zI)l5`0a=2vqs%zEd9Y?FVIFsBpUH4K>F&_-J`a9za`9~cDZJ>2FtpZl^t_2A1`OwR
z=W(mC--(57e39BMMO5v%Mma?tInVJ11<lHY7t;ZmOc%AJ*IYTur?+;Bb@^V3n%soD
zHu`keVv&fi?#k7#uM=Os`lKrof3ed~qDEzgz)I16XU9aPPmgFkZtT!Z$!Zon=mEy>
z;&e2Oi`U3yB;5^TdzYW4bt}9(urcM^wP5;bb?7_iXF>i;#GOwNnbTnTNj}KHeOkmD
z@=KB`Qs^oj=5tQp?9u&d4|>{<h1<_a<Pm8igAX=bvG%!tLY1;f&XW9|SpPiAnZIgt
z#c$KR)>(|$e9GXn9aC!~i6Mow4Z^h?Nv7@d>8T}=VV9j8eY!qNbxP-I+~)dBICM8K
zi+pQgc*$8?6?`Qy%qL=GJ#eymRV2&1C72pKnaKv$-&2WHEXcWMex4|!Ic7PXMHRS(
zvKU%Q;aK#f?0Ru@h;Kn=$Vh{Ukqj$Df)boPJ<n6xJ8HQN|K%71tCcjJ>U+DTff1bB
z8)S*L%8Xjam?FI^M9(`fetv+EvM?_I1?wSQl~=m28cnjQL;kR9*K55w<sZnmm@?$&
zn|4~re$^;4yP!WfZ+6a}8xp@<EpC9^XUOT&B^H^m(3J$&HTA4&PmIlcsVd8&Yc_wE
zlZAwSTUFILOYtgOJ47^-^A|9O)->at1rb42>%rj37Bzf!_siy3ZKP{1Z=2|4LPjx%
zT7S1;<-v=QC#%cg6?=L6UDe+bp8Qz|lw&17^W2-~Zo3H9-D--i8tz0dNjyB2A{#JT
zu9v)^xzI3sYdK)rqwlUZGI&a-tgWJ9W7yS5R_TLr&<)Djh~XA6zR_$&95E;O-c@xf
zM`lxfn5}X<rtQ6Y=(1OK!IthZAN?#X=~%po+N&{Z2iL*=Y%1l9xf!hx%sm8EQuP(w
zvWIxTXFstd0dEDG3Ye@Z&T7KE-?pmOes;&Wqj|#Cq-MYy(N<_=FRL-fMef?5dhK$<
zH4M5u8F19rPVc4FzX>+jPiC_}3~e)zmIR6eN7EQR&g$eo$2V>8KsMjh>fY_@b#wyC
zf3n&?JS;BJc|upBm8T-N|CCjp+A%ORuu6maTF{#Pu!BHMpnAGTqG*L8*?wkNqp;G2
z<Zo;XN;=BPe(u6)Vheous)LNSqAf)v_EaVKW!O=lTS~B!LL!vo-0MrEzoIJFUtP~W
zM(mkV8$H~Zn!%hZcItV!T8JVoK{0Ib1*k5v`N+%8j=385zHR2tJmPiBDA;|kowaVC
z`zNqa(}JW-zrJkj48POrM*S20U5Qz;SKJSC3x|mz8EVYB-m;I4c)h@ym?lsa%U)2m
z^2+H>{Dcy-5e6cecVpDSVYU@)%D<3dp3udc{HRk{*|g<uVme0f>kNn)s5!<Yi<wxO
zM?Sk>{D#VlK&wVNv_L0aK)5=fScGED!KpKo!czv;Q+WhTo)58Ve!_)7a~|QA{l$?<
zFix*ypVJdVxb_Q?NY*`KMNJsamEOKPU=WcUmvm+B>VZ%ZGY=pZ^dP)oHbONn0jOUZ
z%f!FP3b=;J!IiKEt=c2Q8i*`=76)}Kdymv=hx3wiS*K-RYR4v<_XJ~bM&^)vMn!06
z5PpphuCnOv6Y5B~HzA{Isbi2ndyDZw@Uc(*<ghc0UlJoD%bJ6hZ=u6Or*-a8zi3`0
zvELt`x&o&g;%Mjea5)-S{-Y(&*`?VOk#~2!zhgZg|3=n;UFCv@Y4+@~tZD{p&hgvQ
z_<^ttQx<IUH=ydfearP0rknPh-quK}_^+UoTS=x=N`mH#PBOYdiH>{<%s~>LJ)X&A
zYhPRxPHj7OfBlJf2$`N;`LfzH{^J$TKl`0H<U^MGnTx4t0+=QEKBhvZllClJz_0hN
zk-H|eEe*YmFM61GF^yx;T;;}cv-Tx*V23n-I)%>mht0~Et9(3(fMz*!ZRBNBTIYo7
z&6Ypa%4kaIy)pX032|CE-FA&8IOJX{Rfgc@`J=-J>e!Glg7kbwaHW&oKCN2b5ZMYl
zVRJ(vJuaZKZ4=}LbyHL!{^9HE&8q7a16SgCXt!Bx!p2X{Dzx2ZTAVlM?1A=iMR6xP
zPE&%bO_jL4Cc6dU>P5k-_soeZ=|XF0cmXR3?4m>*Tg_os3GeXRBblKyqzD#LTt#qF
zH}f<lbo1UdKlRc1wP^Rew6%Z%_?djS^O3ehZ;0kpiaT$CvfYOw(F%Kd`(Y&mNKc=u
zQuW*Qw$^UY)8m^(rKy6(|F^Sn>dfI;>^J+MsRCY|M3eyjDFhc%_HaIF?nu<5x$vwE
z@xej$;6M8R+NlS~!Vr8pk-s-26?(SbpW_DMA3Fwfg=g%7N!*IU@C(||q(1y_->%A#
zWqui65+bUKTul0eWf~vC-6IE|YZ+3#fg+1@&OvKDQqPkOWtL$7Jcj|VRw3G&dGOf0
z=%Wh{A)gl0GIm$ixX6_3NaD>0ad5VGQVVs0^65+jx=1UQkun;U8>LuCB=M82dy4WD
zLzcJCu!r_RXw!|>vU#;_dmm9@)kRYu`7~!D(KoIQ$9jhd;|bGBQ|<9XQi#vBUDzEs
zlYFE(Vw4yq)4OfymCrzw&O;b;obx>YDtk!nJ?XCquRsVdnBnq%wT;7&E7){#*!~X<
z<6y^bOitCyKsK=t1+i>@lb!?iOeU!@K8#JDr~9b`5whpVS_`RHx;gx@eW+dZKtc9a
zK37oJomD&`c3$*FllhJ1z>o|($;gM?y{n<UFO@gg1q>}i!r0<`5g|c7_NoSNyZW|4
z_}?1iQj44UB=^1^o2yvwC<`mPPF9KAHf3bzN3d_VKByEo2PoYqL;(h_8=zKuRe#?y
z(nA{<Mn2J^c#k^A4D!uiSx29h9!Y1-0@i@?ZmVC;T?+uRZoI5}7C29ZV3~^vuX%E0
z`O|z8U@L@G1gJ@Qh2nQcP`>Vtvsc5zzMY4f)H>DD>F!X^H1ulWyy;i_U42aSRd2`u
zt=P@k%<aabKon07IX&WKoy~57Yp&<=Mpu70?22bp7@=h=C@WUY=L5;cF5zmgw^{!u
zjMGiGoHq8WuuZ{<;9|f@<+oj#N}QPuJ|A$0v~w><u8^v~x9C?obvP74NVB55g>QVk
zI(K?vyt}B)JmQn%TQV7`h$IuQyKx42RIdYX2#k15Kc0X=p31(PpX~j|J%%*OUsT0k
zvgzBcEJK`GU#PCwLXtsvaV2@?d(MfU@;^iR@XtJIS&>IRJ?|P~&?XOVEK5C5uY|Xe
zg63I7AXm6^B6~dMx8s}rqbJ7#=&K6(Au`LVhbs5bnKF-?bM6;}1uf|g8?a=!zpm2C
zHUNKWV8`8GM&rb#(tC85p-Cr}XPtA8Xn2c#C`>?2d+>e?IcQzw4C~&}I_lOitI*wd
zoXg!W6dQ+zvv?VGWKoWH(mU+PTVTI~@O964bC7#MQZjLMU8-Y?%Bu|p_jAMgeBQJ@
zA}3DW3$|VTxQiFWR^Km@U4d$pm|C~CaHb99B4quwh3;<IMaF{&0C)v)3Ur>;vz>{^
zSgrepoi$6b?|?ZO=yHa-$N3;;Z_>ubvUI4f5a5dUmSI!vJPk+mYb_!~@J^4f6Dxd5
zSuBf8CS|j3F2`dyYcY^UO}hu#@~IPZTaoF^J-;iVH-5;6`~&L#FTdbmKSg|D;aapK
zVr}Rndv1vV<R*gOcGO_HzMbxq%}oDv^)DYHDfgY@9nM%4uJE)fnd|VJu2bzCC5tS0
zxmHqEpOAYkR2kd=DF^!}^L>vocfDip(FkYKzPuM>z9DelUaz#l?p}v1g>R*{h<gHz
zLX7z4qUj1MQX%sKJ2y-&yS`xF=~rBpq~>SlcT`NN;QrvOdB}csL%O^2(MngHQ&a!w
z@TN(YsOGQ*G_fqHF-XU|5lD*-naUFW<z4@Qw2b=w6DIg}@s%rVzt9_@SwRxST^O?&
zq8Q5{IOJiXyQV7bJnK+XcS8;QI1L;=EiBRbURbM>5c52!$>lBZ)H}T)#2?>APT`#I
zac5T4YoWwnNnKr!eP@{kwgOMy5w-_K_?*3@ras6*TQJ`DVtv&0EFE$ytoH_a&77V)
zkU)I5Jk(6iMI8NL>mc+Km}D~hrQ&8zL91Umo`xuzzf}47*bk3NNd#i^9VYv2JMU?v
zqkKc9z(aaI59_L)5o%K^FscGGT_UUMbuat-=%k&2M+;K^J*MWLffy~{(?32VuAEoC
zzaT}WY)z>13rAgkYkmO;t)p^2i~q=%A4+QY#CJRm<K<z5;{q7>i$LSKm!OFxiPgRH
zS#h|4$w_IivL{Uvk+8nXy8i9ev_pN))h>C#^iRrhuj+5jbj=Y+yVt1#@iXi8>+s7^
zd(H1!xC@_#1os?bg6TLE#TC`xEazU=1d&J|FsG`_eP}rME5xE;8MrwwvnGi38`4J)
zZz1<g?M(oBnL=yZp^f@=iC5-_x15K<Qk@bFGpwdED3OjD%xMjp9Rn<>4;2!1gp}J&
zWm39#w@4?p`Y%I^zTco+HU~-LB=^~ngx`UjzqQL*x*uWwKggNpJrcxr10;S3%TMlF
z8D6(;G4wRPPHPRW+WghtoDb74<%~b>L1^DJ-RYdIJm%_mo6rR^amH-E^p&OSZvK?3
zP-(II*yA?a=Jb5tH6T#_ac5lj-l1JTN4@rap*)vx>W`U4?k{F>^?|lspMQa=mp-XI
zZuHDz%Za?c<zA@!M3sSeJ0T##X}u0&#<m>{^)6Fn>D#kMzD1}UeyGyuM4pMuQVv|0
z6jiD1PZ;pnTR2`ubO=ML{1Ti3IRxbIC93TO?y^*J`ZH3q#w8A%p)ObvTUIf-nKQMu
z)2hqoBU)ces0w$y=JfC7I2-ot;M1;isiS02iBZ0gvW5Isr7Mm1I8Ug((sleu?+#sm
z_&;U&2QvRp+mxnU$h^&l0gLA%jn9M<7r%<#&4|{Z8x{F&wV)bMkOH+Z6f<?hSrqtQ
zWuWT#Q4uqrzyr(3wnmUYboTHVPXuXr=3h45R*;YGvo9Fe_dXVvYh2~J?SM&Zvx_uk
zAywH=Myptrpf1|4qc3y5RrvQ?oCnZnC#^|$2}GVK!4QRZaZ1~}*?VXFcV&%|XPxSA
z{)!myuxBgNMJP@4e+ACKDyK_SGwKGg<jlrOmEuZG?`ggqa)H@5C6ya1|Bth;4vVVm
z-hMz70Tl%i1ObDP5@~599a>-*8l)Q~g&`aik=CKRYle{SP#WoOq`Mh9zJrRdqVIFP
zzwaNenPKLfz3)}`T6^zx%&fbU8@3)|NvSm?88SDM;Nf>zkI*-lE0k<<+4+hg_#(3>
zE_sN_+q~bh{P={0*Y+>YXo~LY{n1n=mj7j74h*Ys?-&n47U;tcan9+B9yyeEZ~F3s
zw?>0w{p_~YJ63J5qz>v6XA$(pcO?)V6x^)4RO&@ywnu#Gn62dma{L107E!hp#x6Ol
z;pfc$6rw;47EsZ6Q4oRkKU6d(@5dk=7HMNWNOLIS@a7szvNw_eBC;&q+n$%Ho?k%~
zi^X82w&Wb137YW06B9SY^Um;uOW2EXnPk^@;*IUVNY@}UdTBWy#=(BvV&l3VGV+`4
z)2$RO{(VmbA<%}TO%5;9Y2m*-i8*HRyr3Sx5Y@K_m;Xog{`yM58N<5!s!uGQKcy(>
zQgF%#C5pA!YA9Z&_-3x5pjT}z9#lMyI?K1((!-BZ<HM?NMMdtEivAZguA|!?q!dA*
z*-}0@kZH03zBaXEPSK-`GXH(sLN|f>bj^gGefJyxsDLB=c7IOGOMg;#S$1@alz`@R
z-MfE+{l|WN5xh16iDlZ=BGz`1_%=Pah_4<RQ51LU_{sb<u1rB+x@50L{_~f^^#YU^
z0jXixg>wFG(tp2Yn-I*bV&mZP<$Kh%zDMIe(B?tAh`y>1kN(!jJtpEj`V<f6^vRmf
z|6=@qlYs0?Ai`l=G~N7PoZ(*<4}qV-vTpz$Kh1*C|NnA;@yz|O83kO$$NtZUmw%nN
zY|n`}QA>K3zkd3+N&olH1jf!kk)<hRGWh#c{|NeLX7TN9qsncCo*bS3Z4_;EFvQ-S
z^p=0vUW-;R3~qxYtp;A6iT&TlA^?nR?0;;SL318~nbteLXiA)SsMdcJDnIkR<K5Zu
zi@m}i5Oal`b#HsIl@FS~)LoD?w9XGEW7GE%D9teoe^$blIlwZgBSkT=H^hip9%AFd
z^}p!!3EB$Y{{W}mt&pMje^*rjPHZKrhdf%L)KF%QFU<Z7{)bt%ZF_kU?ihhlT2_BC
z+P}$8b{f4UI4NkCHI(f4%m0HuMYHO-QAubz=$z7)<KL~QGI^ZwVAGz6{45k_G58_b
z7#E<7+`+#>0`WOxWda2PU44Xh5bXSCn#XDz^)=^7qG10^7J4Giy*vPq!@aE+YiD(p
zNPZTqpv3ED_v1A8+5i7x=mJzQ*`zCre>C4Z*+T+WaIM`lewGo?Ess6Xm~wh80SDOd
z12vmMf?Ny9QIjQ&!M^UGEbYs1-Ge+3VdjDM(#R#%eEZe@X*6oR*&6fz0?oJl>~`Wm
zbSVi5&oIvde<&6si%j|GAbxC*`#kZ-on@WTTgy<ZF5YHA5*-a(%A)@9TEHK0Hm^A;
zxAt*6XV4$I_^XA*Py-geDeR;3>xU=9xc?YUU4-&DxMrSamqSnZbilVIVBCJ5^{)4W
z_4`W*hZixU_opfep-lg{|Eu1<#UL2LIK}Ez>H!`C<&dY(Pyx*LIu9@ZxWjBOdTaSn
zJ-_K-lx96hxXSH`^@B+Q7WT7X{Bp5?I6(4=FCm4eRiK}q#^L_^(G$@px);EzPG<-^
zz`E{8K%gkoFxsq5aXPVY(Es*_R`q0ieED(HnU)kWO&IHWp4iCbfPRwKb3c?vpyI~;
zNm|;psE*80Vl|%sK3Ymjz$=4>uRQpp=j8~_q`}Fs<_;sZes&1DErWhuRm81Ze%;!g
z=<H8^H8^3w?~MFW?+*wSu=ox!ROlh;kGbB*feEI?eV(V$TkFxn`vu4|h7fS#VJNNW
z3a_38X`Q;Q<OEkr7dzLV4ftR@Sd&iQaP5qIMs)Tkr|1J?eg{<kKC7PX3k74oK09}U
zpxj%F3rVLnP`!KaTfa%rbmWHQUmh+0L)8C0MG|m6?~s2Yt;y{N4~b9Z@?H#9ntlR-
zeJS0gvyC24_45Rd7eFk3Np}XA+p<aMMH$}^+!NJOIkiMZ^j)PBQ8mspeA4}8zCYFR
z`)BuW0@oCuR{XPT-lXR3f^V=M+r7GW>i1wgIGYaNVBH{n_UQiu)*JP<CqJi|;6RiA
zK__6@__NL5u(q|pq&mg=PfZYLIAL1M1+F+}_k6@WMDPCwpj*3v!s(!5{z2!}I6(mW
zIS0|d?T{S@oX)D^8C(Qnn$9zSyQbdJ1uchDRD;lYmA|37o?e;o|AXpv>D!0j=K9E9
z^k<(qY>M|A-#(1t{`B$BiT_lAtAw{7yA`0H>dk4WB)I+F_yp9{SK+PKe@H6E^t|Qw
zk#3n)R{z^bVX_TsSZ9*?fYi!$x~9p#uDG>c=1A&JS|?cYqC8&_*LuMloZ$wC$!;7W
zG3Q>jpf<_#uyGuWCht!(VK-~|nGC*HlVZqmW@%~hAsVzSAz{up(xTua?q&ZnvG=%u
z7OuzE6aDrb0c_Y*aHzgcxR-WG`t?Xd(U>RbP!+pSqjsI9FIz`hEIlDNtZZkwOKyHy
zR7{Zg%6c*3#{1F0b?%MvPcscGQl}?5bs1uiDUxB&D|dtRd?akHB9jx~v487XcIt80
zX|C4KXnCJ8#gB@&gCewR{N;PtbKC@?v2PI;#_b<yULG>NpsZ=#+b<B)dVbO_DUFM*
ztTppB&Ub~1;l~1y2r6PlQRX;^1Dku!TJNb4ftd^N0Ti$ynM&vPOw}9>E<Z$U8vL$u
zr}B$Fbm3_{GTHIo>ONi(?Spe@!LQ7yB;bv@5S;KI;Jtq=0C}2a1})d*P?TAdkG=e>
zXFj6|*mr;ca7W=L{;%Jm)p+HOq&s4e?f9qIv+*%39Ee#7D{?iOLm~u>|NPL#qKgS?
zCzfFOEP?T?LjEGfP3hZRB~_e%Hu2jFf0Yr(+~ZtUq0Zj>y+4lt2-}*PN=*4MmBK>y
zyt+aknRX;mXoYE@vcWr+rDzOk4eMjQr2f<mPl4!*KXlH?()_>ol#x{c)XKB5x#7&L
zuin25Ge(xNK6r!tAtiDg`d-Xqv(CJE|876(&xyfq0`=J8^39A*EB82Rr*mGT-u1J*
z+@8~8;-qS?I6t|aa8O?Qa@C$f0<}WY&>b9JnL(H~KzB8_BIvMmajo_d`O8H!Dz?l6
zv;4Y&T8&k;jY@vq!D#1Skol<yhbJfBj-V@rmZz@kR1Z&w4dcUmN13d&HW|QId_mgl
z6+|oBrewEZH+8qm)YSK~+SsrZDXbj-{b+FZc$Y>tyfL7><YJ5WKj`ML=O0m##>iz!
z5NE1YfkA<pnYQwD#NuA9Lu8PUX7S%>(xwhffWO@K!GAXpSoyWvoQW1goF*!;7y>pi
z%uMEDhsK@C#ayR;4}tdcrYy>E1uDYB(uk{Q7>?&POv#QyvzAC4?WXhEkG?AJPpPhR
zQM~?L$-e^=kaiN}p(ce7|0@)7*T14I$d%Hm`QHPqI^qUTZ|b_IKq{7+f-zw=>b7hH
z`j~GIotcgbj?wK2P5rRsH#qiJYJa-gQ%C`QzfXD+`ernqpS0fn>0@P$&=nJ<LUw<J
zMj-aQH9_Fho&&+?w`<=VY@>iy&(u9W#>H=L`+^02(tob-^VK8;U|iB_)_=SCAK``v
zgG+J{?56T9x=iqXrvGK6KfVl8!vOq)h*$Zq<NbNJU*>S~S%62SS929VUDLM_QdwL}
zg8iK1&)@u?M(Bv~nKT_T4gD6sqqi|x&W2$?Z7qS~FeM?w-mmEWdE4(pl@&$Lc|g8q
z@BEz;IzS&1{3-R@)PRsI$K!slGya^w$(58ZDk*Qq;+#4m*}Lew-#nA}WRm60KcNyJ
z2WSManji6RoZu9EnB2Ac(QgRSZU-!!6&f(%Jwd>D%VlRG{=Ctj^A30~z^8bUW8f9X
zs+~rsU*y(?d6NI2aV7sRYceqb7!DQRn19FNq{``l|KrDD<tLl-8M`>CPm%czwKL<T
zq-qQQMs2Ijse)hsIrL2!ungPMKyyk2Pwz9E^{V?(<E2L%!*3?IM_sCOhpb)-k`U;)
zE-Yyi`|WF@i_{@G!h5KlL)wxE)cl_qp2aX`90RZ|P&1vy!yp@JbAJO=z)<Pz*<>6T
zP(U)^8>Z|vLw|1^FI$ctq?d-+d$X(jDeMhbVBsr*C)v!Oj9tjo4%H3MR$5{{pWIDW
z>M%5j$);aAvRghoFpm6BfMiF|Ve3(3Isa3|3uX$q2Lhcz4&A{~A|R6+@r*iyF7Q^s
z9!NCgMz(|d|K|99QG#z$dhZWJ$Ggb5TzxBmuIR@9wGPXSAsEOcrT=e-FQ#a<E)(gv
zRuwSA_Qq<BiJqkmJg!|<(pzNn@)^$V6NWhJ-i>Y!niKmH_{#qKz}`cETw70Wqmjg0
zFVf#xOHl^2Ql;^yWyWL!-Y7L~zuEMxPOK7q9E61RBtMYsBi!BTf5Y|Dt^u7ze_a<k
zpDx1!p18vN3Ft+np^`g=gYSHNt#h}fENNpHnpojD3cW%3@VQW@_1*#&FZax_fyxoo
zRFEXFgW#f1{~+rv`{MzeT(i#gO3=DSA=HdS^&8wE^kPr(l%ylcFCoPVk!u%#fl~_o
z@yrR}**c+sl_^X+8T6c$5(Tz(8tYQ)5p$N$&*V(j6<u*jOTGk}lRC1@U5l-#=i~<4
zi%oJE$I80s!rYn5x8<Y^o=R-0Qw_>O%;WE$H^E!z5?8InrWmgJTE7I6rCBPe1sW9#
zU|rPxmXC;kWq*Nn_SS%cfUq#c=Zy>hEm;+~?e?nsVFzo}YHn)(+F}G;zO>3s6tJ`k
zHi_EZSUW<SA98wr!aB3X#gWKVsDHP7F-_lDyF1|+f6aYk9-#-(kKaVlwG=&)n7cU3
zi>pAm<&YM`1om;`72BRw>@C7RN}pTj?|;KH+BM>I+4P4y4@EDw6rWg=3H$mH%O35c
zF4$+l$=L)!74bfJjp%!}470or)U3SCM&I)Q+!Vwf`?sz{0#R4u&f0_QKQP1v`LgbY
zBx8ReDXq3zs)x^Wv(WG86gU~hX+C&Jx-w$Ir4eqt7)V|#tx+~?=L0T~-gXn_3pM(T
zLk9Hau&n}m1R@G}JBcLlF7YJlY@uoq<j)Z7+&s@@c3#&>>9#k$$(IbQ{Q4rv*h>e$
zSOnsz#WlJkCa6lIh{)$@z_{SOT%J3dZJ)G}K3KBQB8pN>CPEqo$g@QPHI|a(fvdNQ
zUZ+OAbj9)+?!^e};ZR(vRH`ssu*6sA8?`Rqj!`<0&!4d&k6sXcUY(z0eDI7aw;gT0
zoS1IgJY}foRo1=8T61O7yIz_sksIU>BP~V=kTYE6otmR8u|Bx0vF-j+hGmgH-4E5K
zSsrci_3+uUnl6I9Bwx0t_3bBEjT|bsA|1k6v?n-c=0i3Jbuu+5X7&-O!%I9bhDMZP
ziSgC%ZS0O;-c{ZWFFcfxCbqpY_TwQbp7<%D*0ER_EF*R!?TmzBTM!H8cD<FbS%i#t
z*|e1onB%4d-|Gt(yqUVg$@K(D7Ne}wD5Ebg;2wo5Q$~}=NoYt6$ei>fZ3{;KlKIeI
zXU4BqbAMg?k(~$*eXgB%1ZimhO{Q2{%<;BL8><!E2xxgxlhGIfKx!|B+<e{2#<DlN
z7;)n?p#0QR9>7Xp3Z$H25}S&*wT!7E4UBDh>M-UnP+v~M!ud3iE;alved_UnW=Q@V
zKgSZ|(MZ?g^L(D?g+_MA`N|ays;-`3?rK)D{Wn;RoMnj{W$se@O-RY+7JjC@k|X+O
zb~7o%O<~+uBn`hP0A>zNvP~wZvNnl6OojuGA}1CLefv6@O3iM@5k$2{k%Do-+;ARu
zPeQPwzOI$H#3?bAdsS4vtp6I;euQT^A-WB>cwC)tl5NLslFruUB%3`MAFKi{b>{L5
zZ%vKolhkM$Xf!vLOwJ><Jy_5{QK|51JZK|0$*iY4Yd`#py<`mSmUG(hp`cFprB&Ww
zcH_^1e48o^&)E4px|W^t<{UR{JNI__Yt7Ze)(a~?GDsc5JT&=8oR6R44^5gR9lct>
z_r-Z4=PlYDLPM3kP^918tV*$%{G(52;zh%|R?Y0m)KU=tXjL+9x-E;37I$aHAv$%|
zrH<)XKK<NVD>jCcwpbIh6?1tdktQlf;UT&aZ5}6tMOOD`N}eor6y<hT^KvrM8rI&!
zJ*}lIJXO)2qka8e6S|f7*8?FRy({U~B-5Jc=<6kN&ITGhJ3PB(pg%|>jF0f*<}Sf=
zv@<So>fTiZ3re%1s%y#K!@|<jE>7{|K-bc~s{D3$xE|zLRAD;-Be5K1a_PQi&U8kV
zw?}TcZ&ATZ#@09N2jZrBfeECk#A)wSquw8A#b-D_U9U0FyE$sIHAhJFsFpR7i&_u3
z<}Ig-x%%!ol2sqJzrgA8Muy8#ZG}RToisV}RLMJ}x?qlwV%{*Ri8Yo<zAM`c@U9!T
z@{mScW*Mo2GrdOs^6)DG7u0JK9X`KETB{DtCT@FJ@n9`~a=V!Cp=!agjgc=CRk7u>
zfy2cgb&-hkh=PNlw09lvABw0qn>Lq;*;1k#WHl=7R2k_vFYSM6v}ia^QZ%#GHSMl=
zgD-o6*4~pJb%(uK;ztbV8nCK!6>Qx}3ldbm*cxW8iq(T|T<A{Jw<}omx>M(z?9d`^
z{$WhF02X}<OXEzWcA&ZX@Uj>$1qj=HhDW-&C%JK+X2Oc^K_{AG^X<;(O*&MpmV*fy
zq!z<Bp)u{k+}itb5~IvxGWt}TIp~k&^J<u|%;spr+VBjCKD1G0PhQ5~-K=?&Z;9+8
z--O^3eROW=L=czwrH;(2-t<Ebj4O3M-zlp_AX@u+Glc+mCON?o%|sP&^wOk~sPpCK
zJuJH29iQM-hDW*)X}33DOAh00^-2{9;;F&$Aw!V|@RXtb{_n}u$<k;Q6_r%xL?DTk
z#*0?-LqWcCWY1WOYScHaGz}Nam#OlB$pT3+kg2XLa$1SQKK0;PX?<lBg~;^R6*K7Z
z+um+7K+g}Q2hsZPFCIh?{ehV_`kh;6X?4B30E#_16v5>0|2CUjRcf%aIm;dMp4UuR
zC0BS@+iaQS(lQ|xr4<F~Q25(<G<d{%3RZfFP{)1J%u#(6vPTU>+-rk;Hi`aiDe8E8
z5E-6N)}_``f~p8{R1h`Q(tHRvTWb0c$>->1_OV^uuy+)PN_j$(Peur1+vO!=oFfxb
zdrgmdJJ%X7gO;R)wCeSfjw-`__*mx|J}{0jFXd!%d}y5^f;K0wj|>bMGK&LIDny{u
z0c-fR5c`R%QycLjq{=O!v)MnWwd)S32jbb`6u)>}yF{(VVJdwf-os*JNv+u%N&3m;
zp<iPz4_8&^BMbI@ji5)^I$XUJOADaa!{r^;<@?c5w;Gwgn{0sFt0UEAhA}r8`|xap
z$@+6)Jbxh|PPFkqG8Ggot0Pbbw;7^K=H}E1>GH|qsUmenZt}XPT~I2#J8E?UPO$Qs
zftTNnGJ2vb!OEt!u73K7a&sF7a#E$Fe3(C6X!f-L4R$qIM!v*aBe1IC-40f$RnAf7
zb>k4C!2ynlaKr~NzBf6`)MJKDR(iL&n+|@HA?)r&C7UYAC0%=>QTaGJes+^(z9wNl
zXc1Iq(~CpWT{xNIAuE}E1=f|(Y^df4J0e^%o+|lCwrfFd>n_sBY1Z1v*TF8-x8TTZ
ze(SI-t?LeMTa6c0AqL>u<Gzf%2s_ZEyaSMwOiJJw$n<gu)I?B~6B3vHMRL&FN~#X&
z5@eKtS_*NBq;S5(>_g4Vi1arN61W;*A%tuGG1$X#Q&+G=K9&C^A`3A!CIQKczt+jO
zFt0T3zYlAgUEhThSydRh%=smak=Qi~B|{5+nUnN`qeHehU6{5Saq$Q8@QCsgSLXRi
zGl#WRfF7w16nuAp{X9`j<kJ3=Vc``0^XT&2a|dsj&e;g*mfo8>l94iORmkF23|C}}
zfO_$$^_#oiD_~*cb{rM12r!v`_C?b1``Y7u9W*3@$=-Hrpp~S7yR*RBdG)27t0z^-
zRsx2YIU{nJ)C9rKC#x7p!Vs-a>|-`~!e;=4nIkurIRKJGdmQD`=0D~G=IDB(yVzz3
z+EceM&|$k2O!4&-oMjMoJl@_Kv4s$!Dm4u9pbxvrt!cemn$`j?YrdJa7STewr)-3}
z%Q*^>S~8z$F!-?KLx5mvXY-O0=C6|ww3{J0hWm-aU-`#jh?$xoYRUBlNl0&l%5;3d
z{0fSmI8V%98Z=R4@38Hvensot-BQnT@O%a=NkilG2Q9+e-tLD3)X(xk79oDgk_7&T
zR^!NQ?KtP84&fiifPj501c41bk-Im&pZ=y$Kio#698d?yE=`yG{ZU-^GXzjQ+1hSA
z?^hn|{WCD{vKYGlm`4gv$!39~)4GEm|JFc`JKFHf!blKiH?g1dBO}V|!=rrr7drQ5
z?eyrF`eXL{wiu;mxa~;P*=p1g)WDYasmn{MW&LAV9t;CFoL8RN&n{aH0};jYm_x4x
zcekb6T=QAa(2MsJsZ9Za3U4VlQvnxj0SGGNjp$ND8-&hGM%{2Ax`HQl*hqGxyI{#|
z9=C+|&RjLz;y43{YDrTg_aZZfGr<!TBV|jbv*HgKa7~!yJ-5AVy=|*As9E(b@Kszm
zX4?9YeD_05jGV@Abu!Jg4!h3@B{6I_FJ|O?FEj}xVjISZJ+JP;>db_<YDTUM1-1p5
zC2mAQC1sD&^9O^%4HpOj5<`pC&`)XeXy)U*`xgVRQ3@y9uEQWb(0JXuK2KQKs#=Ly
zj*-h)L6+`F8blFG0TegipB0Fi9){Lp<gdX-Qw5*&Hpa@?{g7n4f|QyGF9`3cYxQlW
zn|t;0HPvhnc}O8#edkb2N*126fLq8n_VE=(jiG{1sP^5GC=<ouQjyQj>4tK4!vS=Z
zoRmyPgL8%r^HhqR_m$_&73f3jqAUZA{1Ya&LLpN@@95~VyQGUN6;^i6;ir&JUK%|#
z?!Ll$2ejVZhB3uvNA1$Gu-x=-X9y<20zLZ4XOo*YZnTxgJ6tMt3obID5gktXzTl*#
zeob2lwCrj3?FJfLP+Vm~LhOKMsVH2&{N9og{IYJQGhJ?h0?1W7NsetT9Hf3wD?YD4
zbvbf_>`7?ExFl2YxKk(7zKnF<KA2o#la7r$DzfXA#-=Mz^-DcDsN8EuWC0blZ!G_f
zdUrVK9b}?uU|Uik?QYO>i@BF`Dj<QmP4@jRv)v1;lNRsu9LYj9ISWftt&W7S8fb>O
ze<qXo9|Ai{6CJ#CBvJVW4&gZnv5J?&xjfI~95r@eM!~J|q(DR(JMU*;lV*#3W#m!N
z%LOS@X|`8XX<f?)I-M`mB82ehnFGJ3s7rMkS?M(7&B5glYTLWT0Yx_;mWiN2W>QdH
zpx!FLL&|j~3>F_|N*LR52tyuNLv#jXU$}=P$?GReN`trrh^+%lxj;ikJkD>^vYCf6
zB`j4xDk?UXsNy+4SeKGs1G+A48b>wH*so;$@p!tQDaTcT+jb}AMvjTIJ8pwtf%!jU
z@FLavY!ws>>+gC9)B-$wlq!3ZwD%qF(53G1xA({&#$MifLL4KNk%;mjr^?%Bd&3`L
zewXeB#Ue2GC(|K?TyINWvuWjG`9v+6Gq(p(CRrVw!!nyvGlK4YPD>)xV5Liq27{x!
zs761LLIpZX;>~J{;5ygW*!O#uz~rE~u5{3T>1cTdsmigPuaW(%b?4SWrevL}UD)bp
zYAJV_4p4{I+S_*&qZ9<YJsWD``x3pA9g#oE#R>reGb9DOEd@uf_(R{uQ#`h>I@EPp
zJJKZkG^TnI>zy)b)O8FuHIhUXZ-XFdXgU;gd)M&Q3_7icnU`^{+%z!O4g8=c4p@pd
zvLb;yNoSb>s|1<<sE75Ib7mUV>~`pbSfmAI?uc~KT6pUdTgkT*>F<qrrawf$wKm_J
zkatrQ!>Co~zwH~`CAc_c_ek_iFi&BHy>~3hahAv_0Wx~MxGytB;T@(D%wN1JIQE`D
z^Yh}l)cbt$Fv+-SGU3iZR8e&<uhUaG==_C2>!wKJeRc!8Xk%rOM3n!`kks&Cu}lTQ
zpgNaEuscp&e(Py3fcrTrPE}3{$HTt#PTWYZf?mg{AsI5_&7jJUSfEi)GqEbFg9b^9
z-dt3Don>o2CvU9G^Hr+ak_Y0G{xd$JGM>&X_Rk|3;JD39k131k2cttL<=cPcTtzql
z!=MQgKwy!L$mOlUvWW*^>R6wFxHyJO=7q}Zb(~&O`RJhAu#gqFv&r&-AdLxT!_L7r
z7-@ulj{OOj!BPg0)#hZdQ*}h;%8<-&?fR^#vQgfRCK%8>iuZ8bgJ(fpXhR9n@PX6@
z<E{BKW81wC7M~?iwxb^~hUh5h9DP=bEX{I(s$7fRai^2g58^l_2Zy+MIIaa51qT}4
zjc#Sy3X{>H3iFmL>!zV<%hlK>D!z6r+v91lHK?_Fe9rE8$*|hG)IokR3hDA(7Gi1N
zoNw?>NB*;Y%2}88OQ~7l*$EL#V~qcOSF0YQ7MrhRn2z<7JIHK~_^xJYsJUIO6IdrH
zVX<?qhm>YJaiE=M6kEMyUjz$84Gt$C&cpTsR2)@pL3O!%ZD15W(3tla;>9%9la^#)
zmnJ@Mpxl7VI$r>XdJ$X<HI~WtDDW#FyH#9N(10<CR!>Gy)Y2yuBg7uaR4XCZs85x9
zM1fDl&NNom;h(ki#N8u9t<PVG^ZT|InCCSmt7DI(xr9)vw!(~+h;yhaNgzS07UZ6#
zbsF=Ff>)pSq4vqh2wKs$%MS~3uP_6uMeYW(P|DCr70^gwgM!5i$<KVq;>UbKi{_un
zj~HXs>0r-P!{(c>$QZA@T)NS@{<WdQoO;nK<7I0IZgQwE#2nt5Nf<H!nxS6A?YjJ7
z&DWb!i+ZtM_8U7>E_#r!B3?KQa4n$tu~Y8hOcTAK9=$)08)a<Gw;YFcFYHz4mbH~2
zp0xF@BYu?2Po7AX?x>XWMMiQzqJn5}#sUjz`!02OLVMdtd$7Gn4+!`1c=YjJGf(iE
z%d+0YzP)!u*T6Ud_l-w`IWuZCT*7l9-Q1^P`XfF1&VufrtlT$F@G;~3Rw0gQ#X!I8
zj73X60~{7dBX8<;pH>{rM)H*6x9o)h1-tj|LrI~CtbJ>8{1nZ)H|$)uw`A4r;!T0s
zS(JjVj<xEC&%%3$h8rEYw_>#OSptN;O+ESE%tGcX)}a;C5d-MiuHw^p@Au21u`&vB
zuzdTd@&(1U+LU%hHz&I_nEe8a#H?#D*U5Zzm!IH%9~!Na06$Ac(ZtujM*9(hM+Zu-
z3#PQkZ!L_C$FVYsi$!Njf%X_IAN-;}2svz97nq4P&kt^U`Au1!hld~==<e)FrZ%uF
z;}Mq`2+FzTWEvmzMm8@)4o&;lKW1DEo_2^QO<GFDkuq!=fgfb6Zf6x=BZ`2!iZ0;K
zg!pp0ER#`Z8zvbQF}|QI`Fa5w+LjVovj2$0fpn==b6LS(V=1T=xZf5#%6&NMy|<2=
zu^uW!Q{r>O2T+}Yg-cMoE{R<`#%Q<8Xt!{1*@F4<i0=a@`J*d+4Xja*<VP?kN(TFX
ziDM_#-Cy6rK4W2EbFkj~-3BizxB}&59;2|Wc^UXX2OF>)V7iG`AsEnB%pDS@?h>@o
z=1do(Aj5OV=+<$!d=eFkZsr2CJFZlwb)Dx`NGm=@XL<}Cf~9*f$|$Vm<sd8B;m6d$
z_<jXHe)=t0omjZMwfUaU__qf=00XM0zq;#~uWYk{q1->w(-g_eYmLeoP(NyAb=sim
zw4UaXXS=hT6FKufx7#;3QzkNE{1y#}c8T$zCDo=v1=SE8Yh%z|tGTp1VG3l;fL701
zBaAf605%^9+D{*NeBwvAX7~f%N5Byl&AtZIOW2GKpK@DEnNxXFAH(a|uxhUqP%a`y
z7B4D`P}x(KAzoloZ+42;4OIXn;sw{B*%v9*Fx{KU<e6t`QNO-@PY!7O`b7Qj%{T?7
zuy-hf6maXCg~()cRD)-1B|S%+Js#whXd0pmQ71k6Tu}s2U+>{9z0WO(7w$w8QQ-v4
zVAWgUZDJJ-MH@bYcms`P+JstDxJX2~mZ_r8@qQ<QL_bVAQfD4_#~&14J#M8p;!fvO
zoM4u%8k{)XD*Z#mN_1KUnE*+TFjPFWfp#7ZDCiVqs+F*ON&ouQN2)5eAgmTY3f}60
zpUp#?6|O00rR|rYqT4W(Fs;x~n10M%m&;>aOaVS<v@W$t#$t5ru->7Fq_|9E@h<Hp
zzda;|wunJW0-zEJ{R@lm?rGh;8x}_4FU)(8pZk$~%%qvr)Wqk-Od}trQEgyQ&JC$A
z8d8)96LF?&pavy%YtXmHOwVZDufak9h9P*KIQZF_$`q||rNxwc1SIUEYb}DbkB(_D
zUuq$Klw}MzxdwhIO;&3Ar2{VtQ-%#`(Jw+4f@4>ZJbbaBQ3zHL9%j}$yQ~jxm?sW*
z7AjA-H%?>7Z9k&=s{A;~YQzcNCz$=1Lj{*34he^hc{uOJ@+9mk;EJh1Lo5WPz^{!S
z&3{V6laGT-5~%Mbc{s8UzTvkYdBg88fPrZNtY}8(Z&8^3Siuwcbl#dRum@9yJsmgt
znIOqBkN|VFy{TQ+aTVfd-5Ubv;6o{DJ_E&LD~sKN!>KesFL6tw$qh*MJUI&F`_cFS
z!sbzLfa4Q&T`K=_MvlmOEGJcpaB#B(Syd+<5&I_$9=e>q_K%M2VRLh#<;1%?oH{4O
z!{0f>GLsOWn73akD6mZo-|pb(3TSpxVRFNzZgD|!cm%@DP<p0reg<<OB44Gi`u#hB
zcdxG5zhZd!TvEETfVh`MF=oakZXQ9CjvF-)`KV*7i89H2T@S1Gs)`*{K4+m-YTqiD
zDmNK9Tc8c}@yoU7elt<rD$l$2CZQ_lkZLL6oY9l3tI&tc*)Po(2Rrb&rc>oyRvD;r
zM|?r?{U4-3o}te^lk=G01wu~~8^ApL_mmf?$mwL{s8I2z`!TU^UtM}QwoT^g=4;ND
zW+E8B4JY8E`m%5xG(Z%+l~7!)Qq}Gk(p|9p3@E-x5BB!oR4<ZcH%kuN>ekq>Uq2Ff
zSi~@R1GL=JItaTHF76$?Yo;%_Aj1rMhVPHZnN8X;myATSZEiMNbGdwFl-9`IIVy9D
zBAmizoppy2|NL81cpF_CD{X8>;A}9?Hy0VCZ>QPm^NH_w|N5J|t*39>NOocNlll>O
zPCYRU`|7&~GDmNxDqpp8HuLnpxv}zfY@2*Rnz-F*i6xU6;5bHt_wqrrDkb~pMInzo
z9@sO)@`5R|fr5zRT9PBq_JBp)6`#<%$g)d8Zb}1E-GxGUS9}0t{=C8;)+U;0fDSC&
zP>~YdDl(H(<s(La&CU$YX7()*hPW8_4B3PLOwrX2A~%d6g%9bUhYy@C#=}127&03r
zr@cvztuY>`R4{XlIe@wprsb{^j8gat9Jcjglb9@vA2<fj8!zQgrP?0yL$}cwMQqre
z4?;uD>{iM)@5+e?Jif*qr_@FS^`j*Ap)~dcjZ8{udoyX{`KMSEx4SG7>u|8ke!7s7
z6^D*a`|?BFCeBc{lfjX}1-t(Ke(1{b(iBH@tNsj+RZA$#XuqGSR+JNVwU>l0%|lr?
z>ouPv1QEB)4I!t3Ji3HgCs9xX0S=DLvPYOIB&9SwmpY34{^oGqGCtoP<F2}d`2xaK
zd^u?fOrPtgoM&QL!dhg}9ne$JfHn6EHT`3o!}$}cS}zD}TRN55$*UM8wEa8mUnG&(
z_qm<V5A+%Fw>MsRM=-3=#ae!6pRhaglk_fbyE2Uj-#$a?&a`TVO+W6sn@p+L{4%F3
zZ+DRK^Wvd%wju%9_beLUE^5Br>i3{&cxMP(Hys)QIqqb}k<D6Nj(aLwc?34yCoHuh
zFVA)1RYl%X>^SGKb<P*&L6}kmZ(%eW+z3-es#)!b+LJ5fA=kS`xJA;!0?+pbld~$g
zk+q7tdb*$Po+Z7b)twPkjak6S==4>XhI^4?oZQaSfTo;@I)#=3AI!XQx0Olew#Ecj
zo^d54sl3Wj-WBsm$-C6^Zn{HMX+smkmyNq_;VPjVoCVKYJK}eHk#3k!DkcfZM@-!Y
zul{XI;ef`g=yg-5PRs8=TU(&X^X_Dl?X(UrR{MN?%-?*Ot<>TT#iOG7^2@E`oNx^e
z&2fw4cKq#B+FPGqyNr2oTz^8p7c5@+f;wl%j$7)W#HE>qB5RdQ##?~pyh6v*qnXC2
z)Wy{WiLi&PC>1QybqUn%c^f(BjxxDjpCY*z)0cX0xzX+<RJ=@{oxI-eI1lNUdEPw7
zYF)u0db)XA<#EjJaqctf=q`R|M773#o&jlC5-6t9jz>m};G@6tpmAs81@S3L916$9
zbV}-ErBo%aAUzSJ7jfopl?cj2nwp{S3!CJIo0?fk?0GugtXgzMYpd{)oZ6OVrr(t4
zH%8_F|A#=qp<{Sj<+r#8W5;_}v2Tavg0jfyf4%Q4Gj4Gdjfl%`Cup_6*eX)nY>eEf
zO7HM3<5EB5+M1jHRL+#KyZFX!GCR3?=wl@TW7o&K3zT^W%51F-{Z+6R$q3A$G}FNm
zP`bTP)4ZP=6EOq+6X*2+B6(XoDQ$?52V<dp2Xdxp+>O}b7;8NGx$lguc02E~k~Av1
z3DEgkZ4T}+&<gM8L3Qxrv(xR`H?WBwV?{ew?3F)K%UypQ7mRx&XA39waHj8u)Ga7V
zWz8pos(j47c)ERmao)H9TBZ45%AlE^mZ`?u{F>DU9w-wkBNtt9C$&4%GZs+?j@Y&g
zXB<pB9J0D%H7E{BH{Rn8GtkngY()AheWZ=gk9W`+S^eBBz}TK*4(WUQ?SxU~6*Sa6
z+VZDjsmDO4#7)mTwD&r$<}BlYfoHU#4t0DII36gvpk428uZZ97!qqn%gAGDH_<9fw
zcC^!tAN)$^zH8|#;1GcOZz2NBz6RZ6+dS}V`RFyB%AG|uTY|uK7BWbMtdr<#5N+*h
z6$L8_($AD$?(ZqSStOBoFcjQ<&Pc`9p?Otery82D9Cc1a;e1F^e)Z=Pbu3!j<DuQf
zeCoMRSF-b-+}_%=p!_(q8-<ayie-mv?KaXjSL^$Fpi(0{GbYWzxLfVni@*pjQJd%>
zxluG=Tg9<>wVl>I)m^sdrSH^*gNatf=rI#Zp?)VRUaiK{1A+|KWxH9{kzsuebt=j(
zDAJsrHywh=cNH$GFpR)<x8i(NddKmy@xk4HT6fc|<tSa6K6H5!Ql!JvmR1-Q4^?FN
z&EDCi?TydR^>(~cE^A_OD@&^P#^G5Dy3kZ?6Gej!uH3M0#0nVTH@IK&4a9VstBVX_
z2zHf+yC2zF<=551FuMIhF}9CmXUC2%`({9AiBJY>ix>O~wqb2!6(8CHqPdgvkR{zC
z?2iv$Hr6ebU1~Y|{CU7D7h+gLy#6SpHedTFhvzis!h3H?Ga;jNR-K6lw{Z?nmOzoV
z{3vc#?~rFgg++_MK6vM=ZEHaJC#b~iS0wM6AZ#+3r6z4ucJ@7f4*V&I-ah5i#4W=l
zfo$nG#^WxQzFBS+r;L#gUv+JoHWvEB<k^zPOF6YSZ%*A@punRvR(r7zsoUUS=u&Nz
zfl3}4&!-#+Ne@bE&~Q)Rpy5Z|K|Plm8$E6lbi(naJgsd}UaT%i)qJo`rM+XjYmDW8
zec4z98@y))X`tiDHj8tJZXX|AvTmK+LiHY+AiDTB!5$Sh{u^4Rv6I)KF*nwo3(UP5
z@93kbqil$|mnJ;&L}S|W=F3EbSo20qgh7XMz~)|H$Hf487{*rd69*?~=+5?|f;?)A
zHODdkBT7B@!{YYI>-!XQ&5_i*ReeUsOD>G-Q{CJyBgjUpzjU_{5W~6d>>3vjsoDL_
zBcWjwM+hM1IYG{Q5j%FgEqS(O+ZR(ySIwAAdY%|BamVde%|l{w<#1b3wkVS?qML!y
z9xhE-2)sJkUL#k`V?lT{PpUI)?;K>PQx>d<M?S(B+rYndEBjo7-*LQnN})d)wk?HH
zwt!{BIrzYg<(ki<CN@PDbUE8eA3QGW;tibfxn>Cw9EEpGhH#?8+TuNZ;?%qiO0M4U
z0Nvx_ltZu4+8B?}jFu9MS>4pq10B!<C*h9pAkW0*EDa`M?@#-l7QyjgZO^g!=+U<Q
z<zik~t@sRi-0A*8%8Pw2R|Z`!%$Uc%&Ip)<V_FGOt}GndZFn;k&~H}{(0wIIEZ3X_
zbH8xDd^kd*-JM2ne6#lbVd-IR&v81l)u4fPB0eqT(R<$LL-mpnZ0mkBW3d%(MRa^d
zSu4t#s7`ycbW3oSgvG%_yLUMjDJw?W5Try8)RzF8h<i!(zT~%azcHj3-jAV3@|QdA
z2nC1d+!5)U@;h~r8Hj?Y-bsOyyGU>jqQQBo{V^T+-7$GG)Abd~0Ud^jzdR72p3v~n
zC9t0V+?igHFtU4BLtn%u^pt6?I-oSFC_WAvjgvb`mQW=>@_JfPN*apdZ7pW4VjRO?
zc`8%8%Uid8&cn@a#yxwf)>n@^tJ4&1->SPUGk^<AW*)8Ps|KOC)^;XaNQAaurpt~}
z;1h)fb$9XGsV?icvgpPpEw+S$VZ0T0^u*K{$1;!9!Na?5;-9&CHm(9nQ!)<|-+mo$
zYjb-vC38{+MDl}eiC_OnH$G)i`xS)8^HpqjY<w_G#&!R=vXfsuhWTBxJjwaZkrBl0
zImQthE>4jywp}~kPbI~7%NHCl*{(eujg}020<Q=pb#`)2X}2|;&%@)I44#K9WcsiB
zF9$YSGzEIH7!=kfEUhTBA=R09d1PG?cXghISHyiSdtgi-`Hk_ZP<$PnB1)AFwBa_f
z^mKah>D+O0yGd@^Qg?Sp_zk4<;5G?qTP<c@Hcaltvc&ufe{JqsXvmo}c_(qiFbdeM
z+H9qB9Wc{hh>!pp8eR6|d9o0yhA9@DTMW0dU9NGiFY3AB=4-R&Ez*Ee#EEvp51;GI
zzBgH~ipnpV69S`6t6Qj1QYI*tFM8k`@}gM{VyBi^cv1xOcBX;+!8wV`pW<sg&5fUz
z7TJ63-Ex)ygHlaC%EX@AVsx(?DFvmkT=i(JsOhBqDArl=dH&|l&Bbcy9=pe~E}~F6
zy*Ivl@fe;`a`ll<StuMoM{4(Vw^*|8qjgQz$xI3l*cS-cu+tS6&5HDOqb<$5MKL5d
zddw){pf|`wt$XJqV~MJU16Oz=%P-9yQonQg%o$bXA+kMLLl1J?e5oPHE9=<SDvTZ&
z_0qXgTDYU?+<H~+H#alzSU?`jO$tnI=B~$TiLLG(xwU;uCf!-ecx6B)|L7Crp!vo6
z{0iK_y<^Ab&djoNDpRXV4K(L!3${}^|F&&67eHM^zWv%60)HP3oStyhj-Pn6)=63B
z{P?xdBIZ6m;EeLNNz-U$(hNUpGGR9Dp=zxCXBStzZl-T-UM7oiX2^M`86-_#_=SZ%
zYgU6`VMnuVlRgb5kfpe_s=cowvNvd@GQaUGaeT`h6izUB7v&d8pSMl^paG$*?cy9Z
zu;`q(E&Vi5-uD`^=MXvQR!bGkbDOQe_rnfmftOwqj!i>tQ@uX*oy6%Lb;>PIhesWf
zPmF687uTA6nX30=B&L;?p;<jeI(^>{|HdR=RLU|&&1{%Oed%VE`dl|ePcRyiO!i>7
z)v8pslBm=#Aeu#DQP}f<A#c$r%U_Y(Si(YSk~7bu?@k=}%0~6ZxuZ1$2}9$t1e0dX
zsL?X;Kvu5{o&FMGRMBVZA%?xmGM=@eSBgvujjdsG_J;kQz2c>9cIM#bhsXP&WXp_G
z$vi1=1&1l=rGWxuzgv>poM-#@D&uXst7+GpHG|9sxVJ`?yZBKyC_9yv{>w9Z-A;ED
zhi5XJmxfn;_!Mawoi)85;}7n4Md1D}qZ6aK52}R7S#X^#7s)0Ftou_}JI@`~dRX)+
zptOc%+XbD#1YXUwccxfCVBVEQLQeJkaBzue{*lI)e2M-ny3($LL>C$%H7ER)QQ9pe
z(sZu%STz@%l`-9eh?}9G)TItHEum4Em3^AEId4xprW^Inp{;}CK(>9~`$oFyt36ZH
z)97SlrUGY#23XrXRc^^gq&al3!80v*)+!ie+5&DdfUM*o_iRv1Etyt>xOFNhk}e_I
z6$yt(2U(G(p;-9H>f@*E)3t6QC@U%H@4gyF_TcLA3Tm9(Dh>!{4it+k7=eUkbaTz*
zsh%b>5)REIwC%Q$h+;V;t*1AhP9>T4r6D0osYM!FSI@{kctt;Me67a5jdP*;;9!Mj
zp-ifqgT$_pA80bwDq^K=IewfX^Nqq(06<wA_yf!aM?VefO7ws>r=k%QA6!b3IQD!M
zw<kzFnhz7JN00m+K5}rZLGX_YJ1JO~E->wRTl(aXNTULTeW=Y{Ts$lGIj~4~?)5Sq
zhu|lye=bn4Jh*A!AX1D$eJm^}cF^LFF%kCa8%Y!JyU~WNFfMMP66ZXrEj9Ck$yItv
zvqVlMcI9EDL2TM-w)IO=-S<Gi6+8Op$>Q(S8^%uX5Xt}HjYHO*HUxTu19d|xc%~_|
zNjgE?<>>8(_;hvuj}84W-)kR_O*33(iGYNg1V}+16kd!!Ue>M<AEy5jP|q@Bah$j*
zhgqQQlC!51xLRDl@nyC=)<1E5-H)<p^BTxP1%YQ_ESyOc2RGXy7=0kd5&9~2dl;;j
zO=RzJH$CTh3A(y*^p_fr@rS){!mhmsQulzi*v?23aSIJvl^8+w!30;3=+@V+fnB^v
zXr4*bkf`Yh8(!a%b)YpXGsoi4hDI<$P_DSFbY^%aJb2&Bq8=?SRkOz6dlD7HgeJAk
z_uyH#t!ZKa=>ixOiQn=8;Ky_!T}AQa(BDGFw<3kW4BqOIkHm}$Ir5JUl)u`M!ubyb
z2>8GYJifyhsF>*o>36jg{qZ6Xoj}f;hRCMgIFLf(hdt`n7l==WN6ftnwx7kcS}h8-
z-2hg#;z>p}D?Z-#WzWN+3qp~ntr&ZU!B<3I?f#Sje=?qk?M74LZk0B5UteZz{^qn_
zcNwo@plqsHVYs+#hCE2Tr3ib-rNq+fkTzF2%M3BDu_EaH4%qi@K==`V(Q&@Q17R#*
z(8r2u-FU01HzhK-WjW9+1=$t@gQP?Twj2gt*dp1Mr6g8z_L?9O9Ab{#<VvfJ3}52H
z#2?>u{R5J=o51mdxl491wqN7xnTn2pKl&VMMPkVepTc?-jK?!8-tPE`s*bhmPM&d;
zaMW>r2%=cbpueeEf?|EKfWmQzKhpDFO~w~-YKo&4O-lCdV1SCaD4^gZYC`9r5xx3A
z8aFOG>?BWdl1Gt4;R~fTtZ;Bnq)C2Uc-W#jBs8^TMT>^t>Ma7Et?7TbbVc9VOBh(7
zX>1Mc*3Aozqe{R>znGi)is>c!njbgVG2Rc{U>h%tgDZ<WzMxHgUFE}N1Z}4v=s|QW
z9uB$HfX@R_N+XP|5<#%S{M5XDcUn+&98@jqEphp^&T1mQZmUN{laB$a+FE-Ks!p#m
z)n>|FwD~NHuF;c`9=2xSp-Fj@_}tdjbjSBry!yKS#K)-!Z&WPBNSU&FFJ^yMbH5R|
zAmTj6R{4;!6;*)J^JFC(&36{P6W!4@5U;FG=a%Ri{jy??@Ea1^W5aN@tz3-l!>D)W
zj{<X;by!xMZ7lM>`X)S<2V+V&A1F;f53j)N8ek19HL_4i$((VleT9X`Gf52LDcfTM
zr90FRd@-9ntOaRx6etKbOfE&MqLkaZP~jhZTAcFv=~4q3k*&i{B4E}|1y^yQ2AZvC
zllGYhV3+BryD*QLnFp_({;a6P$e*0%qA@?`_3LuiuO<OF9atPj2a`vcL;A+mG?xQT
zX}LE{o403Rqe->lS?O?<EpG}w2VuE+w3%sh#lyWgca?^BN*={=_|k9*viD)F8gRfg
zug$;=P_Imc!f*D|Zwa+!aX2$7E)gfI8rPVjmZfEx-Y$-|UC$^~{%kXKC0AQD;V8?$
zTWH8V6*I`?PWgw<>WsaPzM!<Q#N`QVkjt9B&M;41+MbP^-HPs-AHS-gymJPbbJXmd
z4kkTI;_{lLN9pRhEMr9vr`FuYNSv*c#FCUTW_wVoF=;pkE-y_}NF)U&&B7P3My;<n
zL@0Hd2lT0OfDW2TNYX^(-1Z(}`8e!q^+wpDu&f3fMe(mJqN!wF!{eLak3-s9a&X_g
zw{~RK|9%8Yh!y?ZAm3~yfrI{Pv*w*Kd2+n?QKep{l2~Ll$-W*W1Y5dT7#_{X*dFYx
zX4#>uTBvsei5roEM|G^3G*_D3<x+AvkmwrU)(wan#Din&OXksjv?3OD?W?tniK5H<
zNBx;iK|8Z+wo<ZKWUE!K+Y~uuI$z1;*jIZ)lQzhphkLHkggUF`cwF;-^TmG8QLU^I
z2z^9$XvzwWEoClHD1^9d9f}|JAHJK{)+;7BjM~}Z$BRVqq6`MOfG4QU!mNK1+W*9t
zfv3QBJlpDNg)@GGS6%?ww?>Cp0_vkqBV@^Aw@$wx;Eo+K$S03Wb^OWRNN@hJCdzAO
z>-86hsfX>fZ&NTlVw6%-_7b=Av=Nw`&1T4aYN3z@c#oNRIy}9iSDbiaKEks;pI@<9
zP>L<B-T}2R`;8=MAJdkM7(kfze6L|CcoHBLagz|52uQbX$HIubd}zkpqUIQ*e%TL}
zFd}@f%pt0apGT%z^8rvoL)iE!R>)-{$Wz=)(hc)MRQbV~v?!BxFrqBWqo0`5G_gi{
zGK-Fg=rxrYhaB!J6TL%H5_!sn1jW)A1l{5j3Pe$nQ-wcAw!;;`4@bSSXadP);Bm6L
zJX(2DCB-27K+jzo;&G);;xL|?AT3E-ghN7ino{mX5ci#@ovbw6M^{qIgeVJBk3|Vb
ztZDe?G@~Z3a~DyrH|=H|m#16;wO%iO<}UvHc!f}X?n}i&mi8r1fq8OHjSiF1ttQC*
zHbryp7Vy_%Q8Jp?d(H*%5-?tQ%Jb|&F>+c@f-miEmuf+4STs{+Q6z_ToYAPlJr-uJ
zeb2Z$N=W(LwO;3RxbF0I-nYJ|68evC7Z3wFrFA@Ak0AWr&|?fP&Z5m79<nVtq<@&H
zKsq<0dTe>Asef)K;*zWMG{@uSygU5C%kVY8+91jNE>-q{L5iIhylC_qb;}YEQH*-R
zEc+@5@lTj7$WZ);K!(nxRLMWKfT!qjTjgGN9I<Mg#mu?H@q^xkt(Gzzs_+E9NSd|p
zmBJTNI-8^ndbI6#_r{M4$3sPB!G`NTkLWv_%Im=0K}&&%>{wUs*w@Z-4@Z<_akqm#
ztu*1qn11Hj#R{q=Y`HDjMimybvg}cNiy~`&d{l$W*4&g-h#M*{&(~WM7VAl{Noi?s
z46Q)n8Arl+D@W<=M-7O6Jm?_@Tw_Be{_xRh0qPgftf_znYp%{=Erwq{{1!BGU0;nr
zb?KTuU;pDjk*`K9?7*<`E!PH`IVlOSb^`-Kr3)Q=(<puRu+A&e8?SFc8%$$j&>MFN
z{Fo*IPh30msF9$RHIDI6*zsPnN4v|mWW7CZ3O2PzA}%-RRS~B59Zd1rj|-MUEza{B
z1r$JVje5xh+rhMZV!3t>E(dZCL%f|Lv3-Y;tVUBFi&^y61ct3=8QwLL=l2eA18iu*
zSRgoKPOD#ILZQqi0d5!=w??hJ$k-7hog)rXT7NITZqG!LLl>7fqj)^YIiJJY&zU#^
z4R~{<XuxI<AZl<~kW^84<fYL5#a8Xv)luEB3_EC=`q&re0tL#O>Alaf7GFeRY#jyB
z=Ej%o1vyQ%B2-#R<~O!AAw%X$Yi4pAt;5v{Ao{<R&mj1~SzF{VQ_uHK2LDmjKVMnD
zfUSCLr|9}v_JWo4RTgO}vmnV{2^0M~(?L^kl_T%!(K6*m85ae|D%@tW96MLcLxpji
zWd#r#o%)O951vmZ=PaJooTufiUN!I=#5y;IzdWGfL}~S)Li{S{F0h7Kd)PXwXc{gY
zwO9Paqnm@Cg|b~4@?`wAi+-z#hz=0M6T~N9wt(8OBzgFuO(RSfqyj4Wf{_#+s%mSJ
z@xw#R)by+7l=Bo^GqfQ3VhA|H17HH>cnVnj)A@lwnVJF%mCYehG)<k-2jT*XL^ne6
zoCJ#&=mKK2&C`u_ox<MuK|o;9%ACHwL6@LDI!H19y+Oq%3gDL#^reHTzVW+N{V%p=
z7*m$55(I#}CPLU!Jjj#ukJvt|jlNu=yrAu6P;^E;hlOsVY6kJ38=}2635v~Dc_pS*
zl210L=tOX~Eea%_q=yZf&x{M7`PIb$TCGd`YGB-cjhef5rlupA6i2lymO>d$blDp7
zM@|WEWSpL$mqdTyP1+Tdv+X(tO?t58WDP}b7Vpx|O{vguf95Sv2UA+tv*Cpolrf9<
zx4RI;kA@7zwm44~eNkAi!ImjaeHBUMuK-rTHm9|}$2@_4Eim^zl)-ki?M}r(wx|0P
z@xX|AhW`v;CPz>3aF**uJ2XpE;jc!T@=F$cWAWk_=Q(!6m#(?YFz$|-4zAg6smeJw
zp6d@On;^Ffd$%%3+dw_<+4&c)Q=AQyKGb)Yvuu9@GN$Arg>n9Iqpn?1RO!OIiYe*x
z25w1wSQKSU>K7Y+o$j2BXyk9Fq=6`X&d>F5f?$oqVzrtVO#)2$6btQ$iTYh+2)rM?
z#6?Q`+_oG6&kcRSjO`r~j(0A-6W9uxyP*^}_-{K@WZ^|vXPI~pzjJwBLc#A)jc-A#
zr782efrj(-QuWPa^JK52{$m59{UpexYdZC4aE*Ois>xEjH{m8|h!1K<X>^>F$1m;7
z_v@k-i%K_pS}FLB$$`4jZGh*3WOj=X?{gqq?{B0jL{IpKp>`XkKVSL1!ggYCZ4DTk
z()naTzn^2@yoE;Aw#>O5Mkn$Qj1>K#z{36fuc_AN6Et<ldw!d0_CR(|8_K)nL5QuC
zhooE^tKf0>{^ZY-MT#arAI+zk_OjXc{XplXih$^~H*fq#pI)Qryi@ifOIUN~S(0)G
zbg_F*1jWI@pooR-XTP^s7ZF0ZaGztFd3@!ln{@Uu!*QQH4}`?!u;8wK2R<XP%2_N!
z|3$9*rJ{JYaenpaAvA|}<9b*)(O)K}e3JCg+qv5OOVabh<qWF{kQcho#`0w#bDIJZ
z_*^;(Ob{P5HZ)}j<5!eq=sC2y(*o+#R;J!YIK>}7YOgw{_JZzaAdnp10EwutX%ho(
zpAw8xV^WXg^T1u%*hKk6OncfSs4fdO&Fd8bt|=3|NdNy>d+WHUw(oyf5CajBR*;Zz
zfB{B8L>i<Aq`Rd<K#8G26qW8qhVBySMkS<E8b;~v&gYEx>P6vtKi}8$`|mL4aL(Rq
zuYRw!w{SI6&U|4PI-n`~@J+LjjQf5ug*m-cKct7($p`AP=avz)E~zW<;0H)`LMpA<
zQMAI<m@xi_4KikeQ0wC=x#oMVV}WR;2iJmL`|6El_N;AFF8(^w0ClHoDtVir8njTS
zuFnL{tFE;<=3Q?YDvia&KDA`W+V7c$3TWi{!SV@pL3$Cand<X%w3wAT9@Da|QLI;}
zCq3D2yaS^pduQVGe7$w+4DsS91;i`n;9?%vFXHtZWcdA0K;(?#T`oYVoi?<t)Qh9f
zJ(8DhY{8G&Bw5Ae6x`7K#C*HYQD-(WXy@yFrL2(GrDpoHBN)IzhLbNftgJ7xS<PSJ
z6t64Hfp>L$e909)C>LzQb!yWyWK?qth1)Pz<J5R&rciSuQyaw^Ri`)K^f-?kn3Ps9
zg0-858Ovb=ekTo*?-e8cSGe*-B=Qsdi5^$zw&WF`cniu0K?T${UXk6mPD?9MkyCx)
zMP&lOktI{T2amB`h3@y>90tk*^5<ubId{<owl>wc3!)^X%D2@|?*kNa2iO6{Saajo
z1@^+3aSUyt&7k}%=l+k^j+_)J)mJLbg3e8o51K;l3uMmfFP#PCa*bD%nJ(QM(UM?O
zB5P*WFiVYJ+?hUT`)VMRS!udJi_Q6%#v(r`u;TKgQ{(^^lkE51{>-Sf8*LIA3(f9v
zb|Z_RM&$T{*_~Z)cb8AsJySTrq2$`VE=&A(i?c&|GVI}Y=_t)mLe|>i`Bj|Z<#Nm|
z%%y!2AXdMfE){F`PHOy8Rz21<*=D#m>D;)NYI;O}r~<Q^;nU`<WKdOfp($5*w!*@M
z=WhVp_vt&70qn(nh+DIHGWp3DjlNN-nqpC^`A8}V8hw+63H7oy3rgtQgq)c&bC`nq
zFS>8ya>SQrOVuIUZejv4z4{AqJ2kX|_ii{m-*WDx7VY<p^2lV()Zf{kvh8s6{fxyr
z*b9E+=of7>uMP-?6+_CU`z>>m@J!%Wdz~|==mrN;c2KEfw5Px9C*P5R3Jdb$aEnRP
zsm{OM@eMUIdlk2bqOE}Ny(UbBK8&mPo3{?i<rzsGuZ>EReNOU|)a`J-&@v~*MQFHC
z;1B9mbX!@q_0Z%gX`Ef;`8IA5zSkgS_TIwfQg#nosOBgdekKnuf3oh#T5euyVQaR6
zj!KGA6ui>9vc|cYl)fPGrYx+5fuc;RB^%t62?Yuss4LA~;zw(LVX*HJgBv3)FqiNO
zGx}dPrYei73HPMaFdN7!CcYw3xkSgwUMD84V!l}E-fqBF(Ws8|i%RbBywGA6>lrHP
zVSx5Nvj_afD<wllxzO}|UJ7iZN|*ZXDk`gz>;ijCc#L4j=UZ-K;a;x8jqr5J{2b}|
zRq+9(qXmvBpe(0f7YD=Kwl3Oa2R10B(4%mSXOSE=H+O5>m#4lPQn^K<ltnqBjxn3=
zYb}u##hS1=DnlZIfrV4KO*5UZbk#d=-3@1Wnpvr@r))cE$$ZoPN2$ii=%~yS;INSi
z;j^~HIjIwX<>5;ueAgv8iA<>GB!`i>R0<k60mdT>JIWy&ZHue@3jm?G0d9FQILvs)
zxxo%cdO^1_)QV~qo?&u3YR8sW-04$_-s=a;ur8>{7qE`G%90mbZ-)ZT%NVX;WjiI^
zk+-_41AOR!@JLky08@!B$;6bVV6$2&zNjjX+J0MrVl2x3sKNb5-Bb@Ir*WK*h9adC
zm#Z|Y=GftT8!A%Y!S55O+IO2DdI8oGyV#`kVel*g`;7<6+yO`-m218Lm&51V8~d?W
zs3!|x;1RgeUnD+)Mj)zj!#2av9`TI<arqWmh#UnAL{UgeCx>V2hGUi-cAZVqs_@EJ
zpd1L7YX$^>;W>=}pd*$IpY|H?h1srW#ntk`lUW9*t9o$^7dn>Gr3@X~$ol6gfSiKE
z*e(h~kJJmt^32qJXD(0C-SfsLh5zqG(U1T#8H}(`%8bA8-fy%2{%1$%1)bbu#g;(`
zSNL_FZ|0qsmL@%?FD~dkU&}Y}N+B+NoHtg8qa|IUBW5(mI!4Sf@C9KmIDIgjJ{82S
zt>{XXXWwTKH2?%f+#<BmowfxbB<on%BZ#kek4dM+o*t9g`GE*V(=83U;C7DX_5{*8
zHBccg%I~gG4p6z|n4gq60_49b<8UIDaI>>q6T@X-!)6IeOAB+l=4!M+Dz;ST-ft@b
z{OOj1)GSKXFSGu8*WMW@Y9ohd)qz~Q*J6@illS+s4GAkyT-tL0F=xb|ZpBV)r)gJC
zb$}++<FdhcMBkd+u@{leoPz39LCG73fEqIyS*?m-LVUw(jybYLUwOjC>PjJ*SBRHt
z(U@M~M8SeXVzV;Jb{^)?E+)O|H?4-%z4wJ#H^cKUaKc_i9WdNIqDLp$1g-kA%T!9@
zIM={;H8?DWB+}X!j~#la$!wDLhO0pJ9kg&=2SlE%%N9NQg3Ie^<%LDwv-N5Y<Jb8)
z!kWGRg>A!57D^O8EvFC~2xNoKbj${gz7y?=4)-ERyRO?o#kA*Zv)Ou2)^u9jdg)8#
zl%K8ZEB8gJfCCdS-7^a2b)7HmgPm;osdV9j2AWi%gw?5*dZ>|UEN{4#7GoT}(*y%`
z*bpn}UGrY7I_9brv%cPf%d8IpbXLxj_QMM~K?AM$sqH&3J&s~;wJALYC$>yp+6MwN
zHDsRskiuj(difNM)C{P}KzES(YQm1~uGjqF#wcad+KXc^m14!NxpkwuB9Bpvw`G0%
zB|IXL{GQZU;$tJ#`cV~UNWh5j^DA^CQ;L?iPnwRi1mHPeAGSwJi8OVblls#SQM~j3
z<U?K3_bOwrwvR|cGOwYJ*C|Uhp_fGp2;Hy~mvi8D)@=Z~m%UcYf@0M@_Ci5op>%;=
zeB|CETp&k@6Y<n<=QWA?UG_R=IUM^fW-*4?&WR1P#oh^2uLbQdnR>o+`o3Ge`-_Tk
zs*;HU90%GPrOiDy43Ls*fN#0e0H*=+)||13VXD-Y=Nt8PbDCN~#%sU0V1G@?8Vsap
z%Dya4=$w|0lS=%%u-w)*SG#Sq{U~tay<t@0SG_M!(j8Zf@)DxC#t+9OgTGQ);P9Dz
zQ5S2e9_Z%fz3w`GQ7qe$mv^CMmdr5&1Sb|{jK00Pd;A%bwalW<H0dZ1mn!e;mhis*
zwW4ITsax5bn#ViFI>Pta9>i#f2`{sheHf2f<?B7p4;R<e)LA{>W>}O*$4)KPbndj5
z5QmG*%z2t$6#pYk<xs)*uGmQbyaU(ybC);pP{2h(b7Y(<Cd<6)0Gd(@1w~a@muoFn
z$^l@eZf%B!#yi)Q04;|xMP`b?NLZKGB>NP)az;X9i+7Jh5M1CxdY_$6DN3^h)Org)
z16rS~)0^GQ4V=<3&n9X;nwvD$+Zm+FG-Q*NF059{3e(JYan^l<L7Jc0*~4r3Hc7Gi
zPFN&tPq{HIGk4Sx&|#XG;HpPi^20V9nV(Gyf)nP*xS(^S+$uoTB5jy{*;J!NQg2)M
z;C>2pVs^bYhrW9-T0gzrx@mE6a9aC}%J8@wC|XQ0GrxwTry@AMp({o}R_!7BqjG+v
z9!IA4%%mC?fOs|?4Jgx0cMrCwII>*D<n2k%cnr!kj_1jE2>m-N%v3C&zEyvxPt$hW
z<<XXL#>>*jZcoaYI^>Hl`hy6DZ+pJl_OR#))VUf#u3FbdL5I>X{9e85OEXoad6h{6
z(JieOw_aHD+wS2KX(WbzPi+IPx$oCA;+j?#?p)Vz?5ZH6{}PB$>atW8E2vcZ5_Y3G
z`z6-~U~*}tpJJY+PqvH3I2h}nuVY)g+zEc;smLQL$Wnxuek5KuTv4DM8XNZZ0gw*K
z(>g1AW{Aw5<&R+Z{*gi(cTsdZMgbp>x0Y?;D=BPJ9WaS@XhEAn84jbkOYcz>JnM4&
zWKCX$1mzZElnkKXPH)$I%9y?CKuN9T{08e?a7qvv_u#jA?ccYWLHax*$P?f?ZJ)27
zp@II<JKV}O#m!8zF1-?+)`^$EKhC2%wU&VKwsuCToYz8CI#W^o^td_{E)=Zr1<94o
zwd#)i^PS(XCIFNx=;eq1_5uF=Ykae)Rb6lHqW-2&{^d`jNT3d+Kp!{c!C=OIMJ=}V
zsr~1Hf20utd=uG%ng33DA_>Gm*m1sVf8SJ<M+%(<0AF*cMym_mvHv>U@wbqhMu-oR
z%;NkfljtT1in~d1<Rw|0WdISCtA<R0O}ADOE){u4@1@dcmCR5VN{ZM>POrxmwpq7)
z@5JpeLj%lEd28Y6-L3%+FvN>{0X3`uXMI0-_pHbs_>Bnf?!{%wqi3mJ78`j9+oaU0
zcF&EX7!^5a74=>M7+T20+RdPY8K%^cuYy}Bp-I^N%U3#IRH~0;MSTfUw)O}2N`GS0
ziMsJA)!`Z_oQnp?`3~4==Xwie-ik5<h4at@HJ_5GBH9NkV~BNPovZx<K#6tRW<=m4
zlY`Wt(T(4*TL|Ix{(Vcs%E(PBf?-<45yG*dO*?cFr$V5*5EKQW0l-_x5AgONPBc#a
z1yEHk$X0vf9xV&k{*uzRBF0u*Q7e$?*<IBgfGznk8Ck3XN%b2*j(^m`XHQ~b>}@x*
zoQ_ato%}X)p@|;=Eq$#JY#IKx@CP~J10$^fG(h4{@@xh^31`|-w61WXlb1nJSAT^w
zg=9NF>OORqr>Ii^$~<5s#q{DWW?%)KPv9Io7lC=!haU|OM66k*mJC3807EelzYo3x
z6f;Gd4ROw5VhGBG@_bC?p%Sjn%h81id<Q7mtly)7G$<OSV#8;|c?qo6Wu*}HsAk`$
zvMjy|qLXbOtEWmTwBqP4(Bk}sYKj4<Ca+~wQmQ&Nj6!?>yRUq2;{L+EVKuYbSYiV=
zI$ar56rIfV4d5%hot<*%vyKhs(v9t7)8h?nJkAW#3lp_&gHQq0eM(wHNL6EXA*)}}
zbHKH2;t8Ok+Klst;+{ta%rq-y2zDP~!i*Qsrz=>gIYOs?;#McTg3rhuRxiCgo%Z=h
zf@a>L0(~B0rO=6vgKr6hY3Xz%{{r;&UhS&@`W+&nE@_pz2~cy-62C>G8K8xtnyZl(
zW+^@qx&CFhcX=jbsu|(*>00R5BZO^+^8^{VXKUFZ>xRI!fIShv@QuM@44c*3!6vhP
z&nxQPpDC%WYnQp^KVt%kHu*`S<^ExwM~kMRpOVJrAs#V4@=XSS6~fnYl;ysD$l{qf
zL46-^B;t}$6|+=za@7aAj2#wscGQ+8;G`HIUa>J~yV|^qS`z-PnF>%pGs-BHx>D@}
zf7BB77Y=Z%fCdCE8Cgr{l226(R*2WS7Vav(q`8<ghuqlmNEB!70NdW(cUK?%*bBf<
zQGtg@vgoqpZt0Hjt*M)aAf6<}mMx%u4vE}ZTS5)8a$NMQ#)iXd`*-6c(mre#!NAsI
z0CuNT3i1WTQW1=(85ynSX<MW}*e?wVma~+S1`3+Th9)D1ukT;BCP5~mIkvq*DW3Dq
zqpu*Udh_*89Tzbu4Y<8#9?d<!+kz9urh^%!60wBCvjpq3{=_2V&@CIPZ0ViU2f3c<
zassCdt$3H8S>0)o<u3nFY~hPmDF94vB*%JKOx$DITZW^vFQX`YqsrTOEC!TWfz7Y7
zOFw+0fec}LrY*#tEJ3(ebx>EamI5ZmF7@29@E}OTL%b6JLUZUUrVS|3=qF6KN4;wO
zA$<sFJZ&UKS6w9%b6EpgwHkm{sj-cXA5dNB52%i%*g0$jYiVDD>EaSnTmc_!>Jfd3
zTuZis$&yKD)1J2m6ZM1L2ESy}=O+E3nsbURO<3K(rV>vG1`tOYAkT7<c<}lUJFM~z
zVYmaGS=6Q><&t_J%6<g^x0EClYshZ^a+*Nv%uP=qj17rBO51r&7R7C_jE1$BUaNrm
zQX#(!-<BxdLlA!ST3$UaNZ_jLR!Pb%kX&O-ue4ewnO@683k*Ka4l)=%ZqcbL0!s37
zy(1bIc64AT0Jw6s&Iq7xm$oru`Yl>aP3?Mq!vcwJhamcbRVkJH3(5HGk>M*b>UCLy
zF@n?T)RKiE^6Eg;6&eTKVk)NHmzQtx^`?r~+%iaajEIfX0nJhojLzO_KPE$XeA2kx
z{5}NWwu2dLTdp!0ApIpj*5pcMJ!69tH76;#md-ls3{ya7h%Kw{{6|-ux^kOBpJ^qo
zr#bE0|6>v3%q{>~lgzo>!Q3Fv*8R|0`85G^OQ<>MZy*gQr{Q9wkAhI?iyX#+!-hnW
zMR1xqt(fflFon>L+kJZD`Xa{)J}$cN7h5Ee*cocQmqh-IEhp0*HugGJX=)2W1NE<^
z1m#jQ*UM$Sr{>Jr28UYn&*6Ji?N9=NjS7`aCJqcI=xFRw=s9b62IIQ-!x3+PV;(71
zyz;U~E|$<EcNnvr$!6y6<5gddCfq+5t8=KWNcYp;+~?sxGyMI^?LNU}02gb&38W4f
zP5@(KIEr*r9C-bQrU_WX%|LxqEuth|n?hhEF4l9@^~Fv-oZ^ER;q<j<TjONIqk2K^
zh5=Xl`-0W?p2iF0M8rJMRl3qR0<<pyl@sJ_6AQNPYD*MB&9R}iFE}j)vWs%lO?y%C
zfgUgA?r3UHtm8%N{4v%b)$J<ip1nlNAnPNn<nIUbJg+c--vLpZ#`KZNZr^^vH;s2!
z-+WsAM&|ERikn83OIa<kD*gLmqkM!o9e^7z+U^k3M5{OmAXM*6lrodtUG1;kd<nAG
z;Y3a8;dQR+7rR;-ZLpJk-~*y^s$g5C_!|K;-%i+GY$C0DGL=$Chcd+^r3BQEuIrFY
z7U|wxwLLCN>}+yWJah<6(=9V#-CLm0Zy32V2C=PMCkC+O#v<PBU^CY_1{>RDa3H8H
zq$jm~$~Ms#sLt7_gicw{pg#Dw1I9a0ff!o#d4u#HIM;|6#rZ2~!kfL4JH`JuOiVFZ
zIw8l7$#)|q83;FWB61{SYu}7&Wo-kX7!e}SJn|Kgd}l7tJ~qEon{`ynV)1lYJh?B~
zo7#G3Q|BtTvLFkWbFG9I{?M2&E`MA;y0WE)p&HISGD_yCe16AjPm`gW^N>S@{P!&1
zh<Z(5Yp#FuS-!yE!t2IJqkVO-Ip5^bwiq8ZMacV`=j$EI<`0R0y-P1q5h5Vy3Nci7
z`Ke>?y%*9tAj`{)zGo<Ip(@*T*J|Dnexy1nt(28Gl2rioZ>UVF%9*m^EFa4?RjnDe
z7dR{j;&AK`wl({hh4vUiGSK2wg0dVmvj$n=M}Ps&LftywtCpjb**!7XRwmqwJTb$X
z4hc~73Rr5TmeJDxc1loODpEzuEpzq#=)gH`-S|KR9S;w$<S_o<@H3ac@Uw#GQkz#E
zp|I@qLgLl!qu3pglLk;q+W)l&wi#2iq-CWEOWk-a6m&3w4m2l3L=Y3QKJYt#*_yx&
zl6kJ*ldhYyIZ9cioj}>lH#QEftpLBbnbEs0sKKG=vJ!$8%^Jicw5M5kSSgp~DRjaT
ze~-I5J^}s1v){<hG5&iZ)2)prE)-u<+Ss>{NJw5fuU)sUNK}wIUH<K^#D{KS07Cv?
zm&c!<Qh<gp*5iSni_OpvcD&L`MzE@s76h@81Xc=(zL}cl$5>0uk#Pg1B91`8%{B1c
zJ)jdxr~09sD|I@ng(%><)eY|%%jnL!^NCz>icISY6#qJW`2nh2Zh4cW<TD)kFf~WW
zxBSLSXS5OV#RWXqq!B&qP{N;@aYhscw3^^c5D@*fvxoK|(cko)IFMGxBn3}BPucgL
zoto2CNy%KiS3q(++}&xeSTI;#b>Blda8q`BnUacm9Ve9oN}IZZ8BKvtSVdXY*$~F;
z^QV&yI^2Pt*{E}4cYl|V6C6}f{aj_gbUFzrR0&dLZO4VMYOBm9yxxIvYrfcbF8)yU
z!MU0e=)9rYoXyAFGCp|zW#`*L;*n*ifU~*`7$3L%J=9?Ss(EB#!C^TyppJm<=^8k)
zQ*YIuUB>73zGql)ft(SEgNha{uW1Tkqo^9Omcc!*&VcHF2I3(!Kyuqt`^*6PhX5B5
z-pvfRmqaFUsqxgmdKL@ufr)8=${rI-lX9uWMO>i2=Lr%CtL*bQkWMtKv@Dq}Q7LZ2
zpEqLb(;3WrY`!Nwwl%-0<xMaLstOdzxI)|Dp`{?!>4*s~!Qu2wUhqJOSvJ0;FnF&`
z_TmJ=e&=B6iW<MD<vRcPKOPzYV1CHf#9iN*Ex_%kyUufTt+V+O8++vl$mwA5(H6Le
zAI!?19L~mHm(eaIJ*!;&uy0_L39QJ_=uZ|5^iM!rd0&hBLdm@<4-ElK0-ih*Do}TW
zqYfAJlWf0_47ibuA<ds`%_4lptshh3BgO!#IB~%-*IO^^wd;X!jY2;G`NLpe*22VX
z67XNG^g8Xc6-A1y93t9X$v|8Dd&9ZUS>PUr1KF)m-g3X)h8T*ih}sujI0{Zc5S@_j
znvo})vrA;skI%nWyfJ929`9#u#gODn1oYT;?)wu8RBjxVdjp9>LAZ>9jcnk>b|WPv
z&&my;7xAcA{_Vb((FdrKR%JV5kS2FKe&QL&aQvK5GV+XovYV8@xV67#XyURjWEDiv
z1=dQ7T`pA{_|_+`SI=f8;JfrDMHB4C-EOrhma9cxXuG)RrBCF5V1lFJ?JHYO-V&>@
z^&af5t=?A|Bc94Bdko4>Qmmu^(hrg8QBHl1{!fDpaWIxm-sdXbmj;pq4c(C4Roo@z
zOQofRooit!vll?<v39vT4R614gzc>Gh%q=0;)8nt&kh1Ov($HwB>En6_w;=^>y(k7
z(`&`0IVj$5jrNxLv0gv(U?<j{FB6@L*S?7*Tm!T2hD!uUlO}FEmi!2P+U^=P!N|BL
zJBmUpl~dPY!i6S{hHSw~!PrW{I{Hm!Y5><z*q{RRj(=VQ2fmED2$2OER#jOV(}$NX
z6j8hjQ7t}C!sOkW$QGBTR25i=?8Dlw@@y9x7(B{#<;{<F#*%ozR*vZLBY3JL1?<Gl
zni)-0|LhOm1P$`+4~l@9KOKa<2)XWfSgh0Q@{Rr3j(fQfSxGpMg72Vv4c%2?5$g6G
zey6;u>ItDy&jY&2iB>UV7UnG260BU7<gFTS0nIQ${S~GxtqbMcz3{}4R-jY4$#3_K
z$CAa<2v`4gs*+<Yt_EC;WqEcMW2uVD5iSfGChcSjI5yBk36CHVrFX8y1!kp&>Zq~c
zIeiIm+Py7$WOg3|{V#K8)*<|0Jhv|7P8cIT*ne244$>b`T;#(4ODN7;F)!aS0z0Im
zAUf$lPvJ`)p(P0mP=WEkKyi|UR^t?67DqJI<sl@FneKi&FM(PdxGro>N;_9P>0qPb
zOG91E<q*~N*Ada-b`97608_XHU2vlI*FK9yfBx1XiP}hwCHkXP=q4*AxxUCKO}s#=
zxWmhsf_L%-e6N7&Y)>He&a_AgROQr4u*z+_XIrrkrg{Q$n^01d@x%on4gG7TB8V1v
zhB{89eW1MN0UYXZSudgO)|Psq?Gi%*j*!=g)2CEuF1XjD)Yvmg0q6CosXLJ4OU03U
zaUs>CrRrhp@}q^%*5-u8Esd%>=8CSePhWSq@#5rZA$tJRB#M<3JgYXUWnHFN5{x-r
z-n3R-il<eAK1D1;9S$8c###+CN+f^{CgR%+A1<?IeP!3lWeb>)d&kEA2LUibV$=23
zXO(vSlz9?HWaDv2?BUxRKwEW|(E6J?AOW|&cB=(FiOrRM6N?NSl-BBLj$q*2gXnCG
ze8Kf^D}_>X$lH%aJ}))SldDTwSV0&(Y%Avu=Qi9JuqYjS#QGOL*OnPy6&4vZvyKSu
zS-Xh|bW;uuGB_p>EGciAQfpLZeW;~ybf}Ci+lCe13cV2mR1SDn9<b1I&K+|Z_Pz+E
zi=2Imz5k__z<H7mXd*-iLazNGOytW$<#bWa=*QmpgC~xHHYigwT=Z~mDVm~Dzy02P
z7EqlnyZyBjD6O}z-*1Sy^Fb`!ZVX88Q8OJINOAo?K<79dxsJBTsJ9S^Os7!2`-%&G
zp=rUN7sx~pT6ys}-Ch15z|!tONr(`6=UInv{BZL)6l=W90{0pu!v^=U!u7t;heiY#
zS@OfG2OU$>ngB0Dp}b7{`0MTJjoNHIK?;_|B>ZNp7TkBT8OuT!aH(I&<*r({V=5cS
zCgkS^e9jLC@L$I508>O3g1pndIw(p_L>%a#>B~zYGMnt}uy5Vl#3V7fXRef(?mF>6
z4Cn-=XIb1R43g=)QMoOu#Cd<4P<Frl8IG2nluYcUeDmz!>+(QZw)|QrLc{8hX^k+9
z9aWwSEI;}3EV?XG2553iQNL}$T@ClP*ol`^zyw-Bjn2T?2o?SXXG0Iw`$lY&bmROT
z4?vB1Kx=^JtZ?g1W-%ZIGMIO<fzT?haAd(MikPse7hziWwknNhws2W=<nT>~<A4&7
z>ruj2xDV7oDYry@X%i2!f!hNmMq={RmxUC9`~k%L#`$eL=Nh|C)Jl0Rar)3<UEyAD
zq(<ngny=*w{-M5qbs7FzOE*OzbDXSjBl3)H%%54HK?QOZNP5Qf&}N941M@$CT3^qF
z;e`yycR;O`ehW1;7U(r!r-NNS{KF^XY%Xl{#`q2V;gKI~_OFjPL;*a?z36B4r{g0}
zI1fKH^NHX1z({PbDNf>yoksr14<~qJ;~tP*a2O<+cKB1a`7FBs^P8I@@*wq4{>W?p
z{PusnnlVNm5mvz2w)yMkZ6}8M-&4w_2SO6?OJ<Tkgi`<UmX2;TC+@`;rDq%r_0O8d
zRenc0D(w^J3jo%E6&jOwBwx5!=#VL{h2nh{qWGN=jpBhdXF$_?@bAMTMIr6l3fS0q
zj-QF_Fxaxbm{ZM(v4W^va<VN&!U_Kj;-9NH8kEX3P3zxtV*fnwKU~G30DM67{PtNT
zXeU_iEH?c4oAg^?es%1f82{JBKQXKSX&lznZhbA?6G?j6>w5xoRXGLLy@PUS5|Kl4
zjG^;qbj^Jyn?nLp;#PC4TK|=&S_zN^yvAGF7yp9^J02kuA}MTP&HqYtBLNig9Mew&
zk;qES85h*3mRZL_iDehUdi;M)<Le>MK@?p%qmbrruH_3vZ4>CFO2<nVVguf~ib}^W
zbTgp8Z=Cn?xw9FY&z5k-W*cK-?`+mVa)uFx8w89Zfw1JpM;ZJW&Mr>)Jx3%QW9+XE
zDW@k8Cj~nb2$Oesp_T<lP&nrf9*ugju>5*j-3b=q1K|PM0CyBG?8l~Ur|8{bgH|OW
zKg)q(kUXNOJ!hjd&`2x!qRG;9ncC@o`C|6eM_{yW1Q@rtDuJCQv*f&EKNEoxoCih*
z_XB2GUFl$1)V+qsF{Aqybz7-tY?lGlOW^}!m>=a-{C8UZWZUO_ji|GN6S`V=56nU>
zWfj-7JL^rN4$|D8A?V^E8fWRnvUkr%U-h+hoptXNq2mPIie1?3D)m*0Lmp(^9LdJd
zRa;*+tLJ-t){<HCBTJ^fKGJPGMYa<}{xSFhLGl=nQ9VyVHD(bx-7g1y%^Sw)DSOXu
zogsp8j>sH2_Y$SjpF;H?U*+pWHPaG$HAgFixXN*cIE(;ip@*7VQkGu*7p?S*(t%<d
zHPg|_OVLhBcKp`dh<t8_6wv<G{`;R5&LYY0zexx24JpXPJ3z(QUbOuf?+=OqI5Zp5
zA&4Fp1x^wE@3FbbUKmX9y7EO1T8Xi7hUzYR1E`IWaUM(j(_i}Mn?J5X=(Uw(I_c+o
zgD%~F@EqvH=z97K9sXhtppyZZ?mBVH<iDzd6Lao};A6(Flu_XJyx*JJuL}@S(d!KN
zmv0*h_w0|z<y`(>rk!v>H5WN;FX2?m)ApzX6RF@{vxSQPtAabJwDH_}(-IzThRD{=
zo>4Pn&5LAd1ox!ZDcLxKI5c8$lM3=_a9NxOF{kCr%^aC_WUEO3t!DiU*+7yo`AR|3
z_r?8^!h6On50Pz0F54mC|7n(bZc|D~wrlLadSdcFrcj2o?jtKHmSX=4qudjTV@J-i
z>y~l-iM9SQjS(q8x|VFEN-6%EgHh0;sHK6!>oZdCVMOtt{x}~SQXvzM$}|3{0O-fk
zxJ_LNkwwaVtAfF;|KB7VK&|^uvi!iCXLaxjAQ|J^qsgEjU;h*58hP?DeJ2^t<NC7{
zl2lFKA*1K1r39eCTlbusX@DL$uSy*`y@ykMboAmEB}-F)&5jyObcB!gqeqwV&N{@p
z!{!2T5VBlq-!NU&*7!#y{>_k&jDlw{B^(SeWqake`fnf}AKUc~L9!%M=3@5n1Y=A0
zZ^7(B{)Gj^3R*bwUKbNj37&>pO-{lX#_Q@t&yYzi^e=L2LPjTI_m$%TPAoeyFH7h4
zxB23l2@?gi;L8+rbV0E2IKrN0eE!;VFA@@z4!v(GUK6`&hv_k^iH(nqP1&SyJ&YOy
z6)KHQWk|L7zIKG=c$=HEvZuGVWuql>iN&gI{cvj}XOGVMOZOLUZo`bS+viX&^PNMz
z{+w^CLtgaz)j2fsA3p+@9dFPW$y8^XRI_1{tB-P9qqikCYsa=fk)C|$@8@UmQC+;D
zg^K%AL5trOX~mg8od5gnzrM2u6~F%1Uyv_|Ye&DTWFkU1`bu?j!c;0^P5RsZ<4f0{
zpL!Cgx2+ZuvJkhYdxB>ng>HQP^{GdY6Zw4k9UnF22LfPnzuxaiMe}m?zG5LY+a1rJ
z|C0hgA4<IQwKz@Qq|ky}3)@1<9)98WUnKhZz+W$P3pj@%|B%T2^b4G!aUI2JD_bEv
zB>4A;Pk)~Tbv@R3Zhb&a-p&zf8zGDF3&sB?<_&)|<v3<ie>&$Ezke8AB(CK9`_b7V
zX6~N)XL5>_gYU0)e@%1h=J&Pcdwziq<?4&y33JZP<&N~{@Bhk+g+6%g)Z)z$0hCXc
z%_LE|8O2j=8Y+u@=KE_(C~8ncpV>T^z54ro2|d)2@3fEnJnmT~fE>*)vwH||P!qKP
zIEHlZ^69(Zd5)Y4czM9Z$EOyBFBW%ZN)1@JCbk^z$4vsxe(pd8+V5XDgKz&HLI{#9
z)O-enzm19foZI`$*2@fZh4xZBa)&x9iw3->9yvv;f97@g2zc+I^MMif@6_PaMn@g7
zL$aa@D)`!|3H?HuU&ifr2Vjmav}<Cg#^L5db$9PObGUFV72N#aXYWIeLXhS|?YxC~
zYAQ4yHAbTeABW_waL~wOoVxwbOn)0qJrnYk<1fYgei=OQ0_`!JnF9v83U2)|KO%?I
zjP|du<N)1GUrz{eN0P>k6m&Oji-fdr^<LewuamwITc6*SA8bpvZrRI8a6k53L)o$a
zZEl)3H5Uh4k2-T);&)mD{gEQ!=7+w+K=(3q4!$T~GbA?{FQoZ@A-)eF_{=y@Zr|lW
zf0AX6DYhIuCDva{@zwNTI*~CqX$LznH*2PMP-Ykop44@Ns))o@96rrz)@_CFMc3Rh
zadFbv7{en-+t_}Vym@_w7RmD01gzT971tSM0lmp>bI$mb;GChFe`sAw)bq|gicwck
zE>D2o`Mf8k9-*n!&|!)q*QP|vczS}A5z1%>1fdn&Q;W3rVu4uyJ8$rj-vMMU;_|c0
z8@z5x=@@LH0zrfs+P^G!4H{By_Lvk}sow4w3jxJCrR>!y?ux$ocP>87Nqk-aX=UYC
z_~WY47viTZx)2(1o4<U9j}3jo(Hpw>mxW;47T5UY_J4c~_YLs=J8gL>sVDxcQt0b5
z*6A)$wrxdRV@x#iyPB5^9FRhJ6Cx7j^6!K{LD}z@Zh4e<jTd08@RJtdZalhr;^^{&
zc=Qf?z{bfV7h5~I%+Mj##j(k&f}0xi66BOx_;;nncMY`yD4(Tze|lJZF?@-C2|n-p
z0<u3X?p<?q>$P)Bl1)Bp&#kZid@|(G^XR<yp833|WTEB;wrI7`tKw9j&TrbqCrEe3
zCB3A!xDZxwiuk-JSbROnF{wN`RvaWAKS^r^zp_9=AiLAhr4i_Fl*~+X`Hcv<q};*i
zXA$d#%Nt!xgX%XP6xW;6zcWORfLjbuo8Dvu)iMUI#sg(TY{iD6QyY@Ae8#x5=(84G
z%gJ&{sY$|HQ0fXvhr)ue4b)kcS^inYS=+9G&64{hK*!@;b`b9ZTZog@QJS^5MxTje
z*~DRyDT{o~=Dx5CM&qHYu;so*a9gMg<p^XlFN1jzlr7!<1ZE!H`nI*Hp=Mu2%0aSB
zyiA2dp-dV&AZbZTRT@JqijfqZR4tyLoS$SsP;9CkS*-;5s%uc?2sQ5}>iX2hOI<vF
zsf$$>Y;`<J!GN#FCGu_mg<WD?LrR0TQ6Jb+f8;TIYR;viU<rfGLW$~DDzD7&cu8a^
z6D)d>SHjz0D%U6ao$uq4q3K7;#h)_8mcyK~oO7}ut$|Bni~&`Uk=v7n6b%NW_a_>a
zCL->MYq{RfHq9tvZJgc{=u+q+OBx8u6|dAegqcrF7Ns!naYfy!lvY((E7?}wG|cB6
zcN(dD$f311-Kng0b6Uo!!h0u+&Rh4U^QvL$GHsc?R^Q@$IeUq3vsmP=s+9=AW2C~5
z8$xOhlGjsj<+2qI4^zg9Q)dvKBcoWG{Qu6Y-*xnT^a_#P${oGCbj1~8(S0^HAz{TW
zy>#2VX<5@6_L@XR4^Y}&Jyf!(5ZK;^l=okwIn3U8X72lqPrL6rO<+y^U|+xnPviRb
zU0@KHsO7Y|AS%N$wa2azu{U?#7d)BwY>tNJEs{6r%ZB9+Y>7?oXWbJrY0%7Vsgf~Q
zv<-truaVHyv8`BAo2l7Dg5IyS7D<5-N>q>uBmE(;fO6l<O;os^t9S^(RP#skU=W?*
zR2@w?RK+1No3LvKb>Q<DOdq$S@||u@Pugh9vE$_GL%2avF;4!$08AA)LwK;u^Zhrf
z_x@B}`OZm2axcrqwQuUOjT&a!IXOA#57#a-&Q&yS<tLzm#Ezazk@U4p*BLE`$d1?d
zMfH2#ivCu$B|~MZTNFcJV04|)QlbjF-{MGO6KxTEEhuUrI%KCCbwZa&Ef-E4Lu%@J
zZO{K!yXe3T<HDHSic~Sp9_BE`dV4&wWeT3mCLLvAEts6)$5pH1vEU>YO4o?gPG=37
zzHJR0vt6lOfPUvtU*Yt-%&aOc*li3)CD+ku(OVnJ|7Co2h57n+#$V1KrZ!~VUbumJ
zohI~o+rD$Ym6n?8rTX8^_c_!<K5Z;k=7Yr|nH>^-!$I2aS+3XGwzzj9Y^yrmQMz7|
zEGFyLmVSA8-u(`LoHtsSjE0DqOIihQbfq@-%X4UhR4YW2SQh5oft|Se0$R7=nH!VI
z9Xu%>u_mO`X86aMO;2tp_K<UtAFs-S_IDzic8y1TTby@`3V_bQ(FY^oCN+kxex{5*
z3`|X`5#rtPvQIm{P33PVifOrqh1r)gJL{Ot!$F5_PZG_G+jSo+r3d;>8PMCv-DH$r
zjlj5Bcty&r<8wlT`OA?a>xPC@rTRy`D+%qy%MHQHhxHzc4fiFEbtqg-+@OKFa*vet
zVir<AGnM+P-WRzNIf=Q?F)*XL7kzF1N#SyC#%RC8=J3pfa<xp1>2yB%ludL~F7bIf
zrctrj{HemSdc)nZagAC-g`(VqV*QO|;%yrtEkr7YHCLEQob1L-q6^>-xj(<da147B
zU%IZzZGB8c?)F`FR*@QTe7&R#e7OMWQ7gKivZijN)GMGu@X_Kud{^U+BUqEq6x*S8
zI4(s*)Te+bi+i`VH;h1zNRir{A&*zNK7)$Fd$3PHd%$IW?@~ENd7{^<U=0SzyLvRP
z&drtjMoL=4*<*Wtcp~1>iyosf%K#hCuLNuAjM+tkO=Es!baSbsu=iL}=Z=QQzAzAQ
zs^keIOV-yr(AjzxB!~1VD%B5k4ccSS#LgdE%qn$d$ZD=vRm@QN@1oB)P%*cKF9*6K
za`2N7&~nbMH;VQBiv(es#B88UT_HGI)t#M3{6j_PD~V=P%Y_R<eqz<mv$q;87G`Rl
zDtJ4@KD=n&7%UAeF4xR@n$pu|$gjRF&(1^2*<kO)<*XRm87{dr?^RXhzkVCK4Y+uk
zckZgElVn#@#yAahsA6qdavBr=$<WrIrI2w>W0v3Iw&l22Pwf`^CG+VwvED|ND-{|J
z*&772FN4U=nawA)d<;6k`rC8iBSm$j>Aqvn9CQhl$xV{rcCC%i_KtSXhX%Y@!ksEI
zcQ(0x3=y66OkW<#Me=MKGnz$k3}f9?V`)Qu8~IYI3EB19{IBX4Dwd6dl+t!-OWLA=
z(<w_nbGdC}dh4`b8cq4?)-FQ$id5mVO?v>BjEm{4_ACYMeO}`O;=FtzF7*Jbmupp0
zgPLpM(Be&W+M?~x-`JpfFmF`2Pj%`9gi0%rG(cya5TC`oFGtC(PxQ!ge9N5PS$6G3
zXh#Ld?od#%eiXtdUr8e$mva;%YFicY2vA?0Su&-7@(Ou2x)T+!7q|W5?lQu-DrW(5
zC`^Q<xDm0jsS|U#=aHA=H&2zlTX6ba^<xjbSQbkKzmS6~<(m1L>}+e6H|6Vd#YbP?
z7s1wQE)MPusCfR^+1PMaP1fl!G}F?f!0SA2kHcYpM$9K!d9AT$y$e-2KJ$*85}&`@
zo6Ntm4vutK`;K%MPdkeL_<1wVsOP_!T?8pWB3y=t2k7Nz4?le|z(}QYWu+T0PHYK~
z40eIU+>W8*W*A6;E`~j0|HtD2RAMsD3aY#6d3%%^$u2&P<M?bs3U{_L_zlwVYKf{x
z$-+~O1y#?kHQRSBdgETGD4B%Q<Th)7?U`+xxZ<~;&}JdN+2NEA%{10Ls{&69Nkbz+
z_M{+8@n`J0no<LPmi6}eJJTTqYb{L{@y`h_=<kj#HbT`Hn{PcSmf1n4ZL4++0AbuW
z?Xw6??^GjzDGYv+t-ylJc$$&1k2!UZNnnn_>B+*m^Y}HTn7!9?ddMxN7#$W2_Ghh!
z7Iu6lOK8U{RtszT+;<`zZbuMG*G^e|)+M*mEc0uxzkmlZeNvPko6s;}GiUpG!=cXE
z*G$i4{o&kRxmrTax3$A}=j>55pHw=S7g%K-+7pptyg9Tluo)hpIkNELrKr<K5Kn@^
zgOai1_a`gs**Hf>_OiT&6y+;?s(hx9t5rTA89v6eaH0OjNx>3+r2QtIy<z#gk$F02
zp^!Lw2_K+zr3l~jLDfKM?qXzR-$PZjUl%ow1-08hmeZ<!RjP+MPdb^!k6X3LYHbYh
zh<7+4dok>0tHmBe6!TJ$oYf84b>Di8hW#{wN^747#d}6yJLE4Hh{Xk$el}Zrv2Z=&
z1{%>Ly$a0{t&9sVT$LQ05}jh5GK*-Y^`tobnl%GxY@bpfD%X+;4BhK0AABoHTd?w~
zCjUIq*z=6I0*9Lwg@@)o&Vh777UKTbV&B?cqla@?V?Htm(OwOURtIA=TDP2S6Vy%C
zEE8cG>1!{aCCeOOdOF*6Le<UhvFWBqH<>d?)9Nk=e;)L^&%vY1ZQk_2LJ~jQZsC~b
z62BCeXEU$~$VcVg{+4oJZIW`ES5e`|;V);a&{H&%*ur`<r-es#&0vV&u{;ND$+S1r
zO%9!d%F3M1ena8gM2HzX+x2`pb2XV&<~`=9{W9j9WrJqa&;;Qdn-Bl-#pIRaK9WDL
zsYT{WJne^PSx>Ie9aJ|vw<{P5^<QMRKk<*-pNA)QwhS_`L?Je!3w60tlBez_O%eMx
zRBemLes{+<o73WBt{cSy!PB6juqOvfH71&=WKD1c;x_jXmzq9Jpi7#QOPM2>Q{-lw
zFSGr}v1ZCx!iTj}o+VQCZ8S$EV=hDDM!|Tia`P-h>=<ZjQct%ZhAB?5u@B?us|u{1
zr_f5AaiVt0kkJ`A_7*Iwjwn!NI4&0~^~J3c&Pfb;7l*uIYIb~UausbyNu)~C_Zl@(
z?_yqbe_fhJ<y~_x9*cwfA$`J0Tuc7`PU$m!6D?MUVxc6a2<Zfrkz2v087}#lX3=-M
z$QRh;4}{h-bt{dQ5@qKGskS3jA#aHn&qWaEyU4GyAP15q`0)xl!p_z=wLK$YKWMuu
zXJ&rijx2cNe~6fqucs+ya$-qlXx0!bKtE_qNy2Be4P!!Pwb}5#8}+}q-blE{8}Ho|
z+LH8IVQapRg!L##)4Eh$yPs2Q0(_xDB;+wh-5~+)(EF|>%J}Kya*+>q-x}A`z28X6
z^=u;OMyL)o_7e4m;&K8-534Z>eF<CLXCL=_EBl3vq1QpKvaq5%K6|s@)v_c>RR&Q4
z9mF?Ja>et$^*MTRS0MP^?txbD#@*Q)LN25ncrG#4&nD&z(@OEUVcv7=%Kp*JZPDTA
za6&ovEpeeEA`T+=!{q(Uu1?G@$1fW75fv<*yXPkzRXNn7!bOXly-~Ef$g{(Tb)C%%
znsu!3`=tkyaxN7*3peQ&Q!_J!uJ{`E+3jV~B|>bC?iQ5gg2(rG;0;vHfD?GytNbOL
z<eNn!V3bd{i*FM8gh3~e?;pjSxgjj^mZVKuiQ832+!&*=dVAm2<#==%vC2KYz}9sr
zOS!H_sF(vPgZFc_o5j@3?!^WTGK{lZjl478C{^L);{CiIamT(bhRm-b>Ymsd7Xk0W
zI??QvStnXnt%pk~Q+uNfmGWy$YFbg0_AVnAy0)p0Q#8Zu{W@G^x5#D55Z&{fw$<F*
z=B|xq8d7Vw>dKR%ljV{zl0z1$rhx-GLtT%x$o1E$4Z%)fJ)Zd77l%{K$tbOyyz$zS
z*+o|OlN9gAU0wEJ*2vun4YVleRLVdlGv&SN#L#HP85`=vU8=AbV-4fZd89U#w^YXV
zJ*=C3JbQbVZWb%3K?r`S6s$(uYcc&gKIEAT9=jrx{oSD0&Az=X$kAJu%!1N__=$FT
z*RWyP@M)>tsQGn<0(`vH!h!LQY3U~x;h*~ETU-*JPiN>{SN3_%`yAjuC+4E}HldFP
zXu2n5Dik)PJg-Jy;6d0i64sJ%X|1)|l9sDQ9^4LKMugN&VMuYp!kG6394w_jo3-hN
zPk!Fuqwu%jQ;oEs;YxSfRlJEi%PX-%RO&>p<0!N+^TF<>ZgHBq|0dE%*(;#J5o3XU
zl5+Kl>jq1KPWnD#uFtl^w{|Yhaj%h(?|S=gI?j@&Bc5cn`@-yZ^C`<$Th(A*%zL*a
zoO3Xzvi06h5KF}>=x<QBJ8RtR+kVD;Fmv(0Sm+#@gZDWMg)7{oLm4n$de+V84BLge
zbCB?@P_EQPwXSDf)k*ye7Wa!6KFPQ>;8j<@WQKFH5X~FJ$}O%OyrHDtwoRA6xT_eR
z0yS^G-Gw@Pb@nMOYpUc@-4mfYzwm2eW4mv19C3{a8y8oGBfFECTh<eX{aY%uYxZG|
zT!sxp{k`GiU%K47>bgd|D1%zZmm7k*b*)NAlCVUl#g;?YTDawlu$M-NwsRo*LL`ZO
z#)iBeum>8e`xIJqbM|q^#MDk=tAIHf;J0y>8RQuhA<&RQX<u{*o@iXr3X}|(f@Zhq
z&YO0@Fqw1YwjD=1K}kc@?RlIECVJ)#(Ztk#AD1r(AiSHln@hhL5IXyA@Oa$<=Tu(>
zkE_ORj&ELR0YXtk-vOY421%(g#g4Kz480en8>by8IccE}w4M>92}*k~sy`kQzjaMU
zx7Q?}A0h98(DO-olCLp3OrHlYQlV%cnppR3HKNb5Z50%<rFM!bfl1(%aep(NTC1Ct
zG!7>3uD<Zi;Dcinb#M0+Xl*!bC0V__X`m4`v|`gS){4p%TXi6?EZI*btQS1=)JNZJ
zZmXYRZur*xGr4;EU?Krq!%K7xz4oTEtRkbDjk5I-)>={_vRY}*EF~5igYxD9E=qk*
z%;fqgN*$Z(_l9Tg_!KHcHn>kl>$>LH&F@g5n=Yr*R351))K8nkzuQY6jZ+z})7mJG
z4;Anw>D1fH_bg^Ly7=0n!WFBq#P&tu^!tYCu~|Mm)iQiljP~Y&ocGgOEJC@AhpH{u
zqCsP&BF7+m9D8td*40!|wW6l}Wb19pBErUJ@Q^o+l1;XG6Jq^p;_#X3jGiW#(rYkP
zJGXs>*c5a9b6c$mCMc76*JM`^PYSoQW)o*>E0+IpNI~(&au<m#<&ZG?G!%Y5tS^Ob
zd^>588lt)8RNj9rNH@2C;MGp0Cx>i9c6b{mTWQvM;^({|J}r+-AJa<WrS@rE>R7OS
zdv<!0W5&kCM(=lATXN%4=2uY{v1bY!W0sZH*N3mZs^pTxhDhIhH1MwKfs^=xXf3zO
zef{#Jw`My|xFG{XD&+kR{Gx16Dg~*FK~CA~#k8O9@ZW)StY*xE&TG;2F9<_IdDpo^
zE?gPBUmhs7*wsL(9@n*`lPMn~wBK~2Tqq{`FJDNwoUhJ1pEN{v6OX%yBfHv%X}0K4
zd5v|C+wWdpc8Gzq{2F+vEvyQnLC(qyd1`N!SSz!0C^YDL*+?I0W=yp5tr6i7Bb-zS
ztSrXIt>`|0p=RC-n*JU^tS3V0{#!4Qci<zJ_ey=RLbDr5?*u{%5~z9mkTq?5mi#SS
zzgT32O~bC&j8%z03I1wDL#G59JY$V4yy7~IerBV6I$qnf_@uWpoRWoLik%{#x|lQ9
zEZs+wYw{I_JwdA8rh^tsvVk*$!_^XoLzvI-ROg^fhIZN~XpwYjGc`oi;ekxU#Wi~u
z1x|0dk0#d?CN9n<bqt*Urpd|XljMV3JlWpI3`iNBd~@YifY9Ku19t0ll2n0;-@<hx
z(YQW>w0loxZrm9T(?u+f)=s8bQZ@IGC-Um)l!IW_H(bMY0||fZ1wbd06I=&*=9rPu
zml=5+nZ|3~ib-kaZpyx1MiQf3j;l}R0ekN?HflQx6Rs2Y3}XVkfX{z<0fbavN1|6)
ztwxe-S)=u5@(P$6CLYg~SIavoHm%pyP8SeLDyNDv9{b=8xmhmYbCzHC&&~7AxHWH;
zC{wPwTN%;HR;oGdF$ua02NF~>;(tlsuK!*3lNs{AVT`}C>3C8#QdFAIxsBV(b;a*0
zxN8!)dKb`v95r7$nsQnf(^7GI(lmeV)Eg%knYCz|BZ_QFGY|>YBK@nP3JOF($^}WO
z*PD7)LrYz6L>{&u+lIVwb>LC8m;9!d1Zzf9n7BspWjYJx<rOE5wfhC^UUoc9{#3^<
zOK)I~OC!QTAd+fvuW3h8;7t?b*h6`iPjEQ#s|EMMCOnVUlNYJCcWt_qhfMg-bEP%}
ze?6vWptN^#7FOlI!r<~CyTNmR`hpBtEH`A7qHaBvhXxUzw4c=PMS6bVXm9m~zS*)~
zdE^ym4w;Vidy)Zd;W^=4BJ=oNuFzF?$5`B{DSNd%XZdSd-Ve*gu*s$o;@je%f~N1-
z%XEX1?9^}3B92A%89lm}R8@1|5}O;{oEGWSO{QL9)Vm>1qOU$b($iC_Q$3BnH(VxB
zrl_jAHdO2vUQq)n@CRF_Z?MePJG^i%@A))fz<XgzAKQ%V@<+7W?>zXNKE0=LtD!($
zCwxV8DZ@8dIF$K%h@|t^U4Pw?cTag$oSSJ^BnTQyHJAdISKT2n1`iT$9I0o<y#=9(
zwIIA)p(@G5qjz*4kpf;YWaGD4BIfIF9?O<))v+(?oc^SCK&!*Mz}&x;xgn|g>CT;%
zdyQ;uRrYI`%5(iql@TOa{k=iQ+Lqfow7O(B0mi{a6-n!OW`?)%kOA@FSeQd$&X%~<
zKE$x%wUgID)|KsUu@axEiPBv0!o;DzZOI+!i$C!TAjbUFfy74Jl1bJI<1(I%7Fvk&
zpK})?BcVs)WPEXQfb<)C52rL8`U-mTHf<Zs`jNMK(*nkXoP@z1$Kk=HT3mGm<DvBX
z5ykLN?NBlp`MXTKqiS;a1&y{|$;emKYlm8TvH1jcw?E%kNvhhU8Y4-br>ve%QYIAF
z3S<oDvgc)i9IfW9yzuGJ%=#Q3pGdlOIQk~(DNkpNS1fI7$m^t)5?YQsTlR~aa<UJ$
zmmbz}OVwh89}v4VacT12;l4=)k6%d-0%=Ao83g0^L(3pmJ;nMyD9afLe;IcMr5*%l
zn3tu4M72N}^~%1}N(q;E86Ss=s%le<?vU<LS&Ixp(tE7xjy+=pq3f%ud5@v7XRgV3
zj3z1q&tkn@10F^ve@=`~6n_kPYur%3HvE*cnS<Ah6ufwM`JTniK8NDg^`dIH(M0Uz
zkW?8vbU%5lD(vW?C9%MIsXijxX1cj>B$>4B{xn^Hp{89}>>?WVUE^CDoE4Eq#hi(>
z#7kfvx0o%`+2Y43*FM6zVg`8g`QB+jF8klP;ZkC_DWMQIg4KhW|8X-LB(~oyQayOl
zq{KpYEo%ME6n=3w{WALuRv<-|Pu`GNdAW@_?EP|uJ`hIB>VBRc;I=1DcZ$&j)tfx=
ztBY`wc<U5wkbQfRl;!}HVq2#uMkiIA6Ea2J#pq_6QS%J*3ODH3H>g_LVZYCzU+baS
z(aTTi*EZ5ZpOJ>HswfbQYR-xS{^a|J_hkR>75H9$lHwkm)OWAxuG_)82);|vImloe
zC^3<yrtGwK43SM0WY1A_0yS1or?#xu`Gt@MRm4#g;Nr8Zt#SEk7BcTu*e0wWQ)^W`
zpl*W&-!gEt47tnd#kq$~>!O@8U2<!x*8R%_r*IU6BvusCJt-s+`{doH>c->qvz4A3
z;uDUpA1(T{l8!;n4S_ZHWp*mamjIxlW`A~CcVes9^f(9AQRo#HTU+qA=vJn~oUY|y
z7qI?R-r{_-PfPq_MwcT0@CUgh_CScwXyWHL)DU@evnUGJ%OqD^X}$b~th3rNw@U%T
zGXof&Jis|zuBs68ntrhNc7WgH9#&u9Ua^>KSPC?9clkUd;3$;6{~;hj_8>`^s%!b=
zmb_b|86J_MrmJYbupI0$X3X)izo1O)ipmg9qglDRScozN=?&IOxn4)yc?Iq_m&A*r
zLlmOBvh=-{8T;h5e`7N<JNGprAb(XH91ALX6Q-(uJ%zJEe!=LvR;in7!4h-*k}^Cx
zak4wh>E`q!ldHR2<yk79Ufk0fACLAYFExIeRc~-p_q|F3nf#dloMjcEcy8!cs3wqV
z<t?iaTx(S|Af)M7@Rke99Wg5hPYq?e(yw6|rKUrk9$D`2&rS!$dUbq0h$M)fU%&^C
zbJ30GXVAkdyTXCwiawM6AF4fM(O_Zg>W$UUB&~^+=u^npw%?hUc_Ol-Nyi8I%to^)
z6%TZ}fr_cwhIaN64)qaosZn|<v$*%#z1VVotz)f4-TwAt&rJ%8?DM(O3mTToW_L2H
z*Vm?HjpHr$@~jYPA|Lm+tBV#QOQl?AyC7Mk3v8Kzdnsi5JMcm`x5dE>^+-Z4ncez2
zki@FsQp)}D{!*6LmxS-UVrwM?UF(B$PbN?*m4mk$R&O@6+HXEBEOT72z$YUm<vC1<
z1Sh*_sbtMaHt=qBeQ5eRPiQseaTStwbdb=V)nU>F(@%{rRl6ohx~st^lHG2Y&Txar
zaLZ7!Y|dVaqhP#4msl!_YiG;aMAyQMfAPgYtjg+GJv%|l!rhGyj$jaEeMxb5Y-8|X
zPCwsY^;<<hSwk_IgVUE}5Y~YbFo@hQCVeeDmUa1Rla2!&mCcVLxh8e9ftcZZVUrf*
zfn}4F?lI14d^}1_b$9rR?S~H!LsPCFvfms^oiws<Re)QFk_FrkWnLf7I8T+jnGxDK
zZ7ef=czyO~4ku75wfVDpc!!4E#7;*i`2p+h|3}$Zhc&&wVIL6#v1kMdLAqNSr8i*G
zJ)|2{IwvY1Eo=j6lx}IMfgm-&$>^N4<Y07-cb<4p{LXo=>-|r;c8&4<JfA1-`+n}-
zO48X8qN7|XnuO+^Ey3)T^6@Pa$nW@|2e^&_OvlX*39UmU?ZvaeX!7po^<m`oH{k-P
zyTu+b6vkrz>gtyL%jM^1g`1|jrXIheNudJZ)&)QRI>0%MALifhKRYD|75*Ccp7_))
z7Ou1x`YBJj#{0x1Q~x}S_qL0C{ucD14At&koI8+%I3mOSm<jE3k?G-8Po<wd-60F6
zFec9>|D+Fj#!5;&>H6yF%y$*!VTos^=v}G?^JkFm*Im3`uPCWZc~=FR=!azSPY!OZ
zrREGVT?5~5abLnsm>@ryz>Y#VXWR!gCmgm&;Sz|?^=#bOd78<}itL9nr<EhjK-J&x
z6T`zE<AUg&rC23XnGswHN5HM6?kS*kRkTp@J!6%S{@fn)aI(k*IRDe$B}c=a(GD&h
z2Ry6yCxyk9cBM8=5|`2;{SS8$c71P8hoTH>Em;%;`Lt}fzK9ttYuCvaUkia&p?}M+
zX=>g74dQ3rCUPz|pw8!2k}fmSEB1Htt!c1L&F&;+MZ7E4DD_vf%{pE+^UFjnsP}cr
zF}ae{6*TE@99+CTS>-jd?d~@N@}MtR&(7St<&eP@-!RoZII_&HL+zt~WuLA<@0RJ*
zxK!<vl@yNUlLw+TCN-7CtLL7B8WCSYkPABt2%h-rcfdVt)^RvBA^d-I)fdC~_D+yi
zgfP9;q%m*rAY>o=X4PkUv<@ldr?aRmg;4w%q$YYc`<sfo^d(voN+2Y?Qwmpnf$|f^
zR2q&1Y7b31Bd2MtLuaxv8vXdVXS%5}M#iPt$2{<J?GAg1QnQF&SrOZ#ke-d5ay^yK
zbCBuqd1Ziwgwgm%sb+n|Ku*<TTN3{a>%@MMUUC7w?^C>JtF;P9j;9K&Jl#(~d!Q}b
zc5mE6Uk3BJCEoNF0Hx-)UaORs%Yx~pJg_<Y7F^hEp{V+5ivkDHmuh(EKj4OHsn74w
z+SJOya9d>8%Nu(q6{{aR7~2RKQJT<lI$z)25sG}f7@1PPd^L0_l$lT7;w2GA&o_>F
z^E<Upt-|70&i;Xmx%s*6xqdoGnTT_?!q<#DF?;ovcWu*{Y@^|oH(dut{SVrA;(X$y
z7Mu%cG8QTZVLi+6^7j(zkR&{(<!xHA3qd|@4GQ1E!?USX-@J9d&@42Go8JjaI8ph*
zwL;C+q~<%fwxQz?z*PzU;XuCM!S51hC@F7%f_QvLGyEu!xh3&DNI3o*j@0{uhCuyH
zap!A^oc@WW)OTZZ^aVpS5ngkTZp<+<Wml-2aaym(=1n)7)kg1hTC4_bhA%y<HzT}h
zupzWs(2=*xZtP}wFD(>*v8H9=FsjRf@U@ffp0S*c@#*E?`jjh{FZz|Yqmr0mWbYC$
zz1#p%eyw`Yp$w*g^}d8wD)dZNNX0dr8S4wzc6g8S1aZwDBaFHt{@H&1FQh==KprYY
zofVn%wWl=za50+sb$+m~I>Cf$yb)@d)72);HleGO*}p)&a)6e&@8M({{BAV^{VXE-
zRXkQ-drBJ>)8m}K*l(nO=zTtPL_pt`&1;1Pv>f+TLMl5I<e9P!(sWnWkSh_p@68!i
zp1-aN<+L2>fB9xP#GHaaGzL(90!isg89g^W9v~nkf>ne}CEN)x?rQnr+<Y>N8uH~|
zIl|>GRY*cye0qfx{gW1yQ5id@S+5^H86swnX}HYYebub+1!$-*5FH@44Y7-*o=kpt
ztaB~EYxhnF>T_|9=EcmNAhY8q-<>G~O<C>*C`QV^a3{cL$$nHdYqIw|M90g@MM2y#
z8~GL4s5t?hfU~e<gR`xeuNSm2P2E3y;ss1^Q#PEv57POe_S!HJTS@|9O_Cq8GL`Zs
z-WKk3``Gx5f|md%Dpm8tyh#g6-+!ce8+30g_a%5Xb9v%>rEN^Vp0qB4z^l-Nx!zqL
z8*MZM7!o5?K8I6~a0N%Wd?F_fgx*cX7(u&aI&W#(?Ap&889Cjhd=?XTGO~dx`h*vr
z|Gn-%x3&1&IPv%M6nuDf$O#TDu1SG_`rI#yftGshhbMe^+DkD*AB}7;!Y@U2q$-E;
z$QS838;bQNXfz(o47o1=?HI~VHEwCO`1QP|&fwtjFkLWuXGV{A%@xOq$C{1P*!t(~
zP3orAnG~m46vi~gSZfc+gd7boZz7?fJ&U5_xUW!&zU+%>Q7ax7+YLdCW!{e=d&f^9
zF>R>+@~H=YAJ6u0jfQ!|)L*JOI=_3D(WUXMy=KH6^el7fmV5x}iC6Oe8dDi~;*I(b
zh}*-(S#|i$L&7Hv-xIwiU%xx$$8!VZ*m&j-TZoBSy?$m`zLR=Mg!^0^Xy3N`4Kw|8
z!^(U<jQ^eF;>*nu>7t37+AB2^C9|nW>=AqI01WfYk}B#9a$;36LI$}!#pTCnW_O-=
z$^YpjX%N`dOopa^!c17eodG31I=J)1s!`foG(y__G%x<ai9@G>nIUZpN3`fw6zI?$
zc&Jlmzi1{0e|!KJyNrCGqS;{VpU~y*;tsVw8ymNiDLYVt%gbtq)A(+fCPCfb%G3>}
z@a1;GreuF&M6@b|@i<VU8N<>UxyAro>#UjC0GR_f>3U-;+^^aY<7TRDj>g5=w}jY^
zfNKhGZQVl&!=4}^j`-;zAvmVT2r^qaz;;m1Q`|-))p>?z=}<Sg>@`Rw0>p_AZ}s`q
zc5K~ewbUW7tX?>rZ>)m(cofx-PR>T`0T!cJEaX+<^s|E~a8F)!3UHjOqo2U=z>du5
zz(RV<I5N;Td;m-$>iDi0X6TA~BFD_vD2B*@N5M{epQ;GxOLOM7gKe6GJo%?-7GF6D
zRD57nny^K<oIvSYfW{$`E;YiO!(2J8C)26iJ!b#(Mu*SajL1{V>Y{GbN2;=?aj$O~
z$`RGwuWWvV&?U!zTSVQ3?B|xnY*Wy*Jy#Cv!pW>I+3Uwg<A$bx&0HDbSN#5;L`*$N
zpVae6$PRa(9YcjxG3KdC4c0i*_@3DNr+Qvtq4xc0IFF9fM=m3M@KgFS{WrrL{WRjw
z@Gn^nn>23JU8{(SGdiNvccHSPxutnj%+1K1^IYG*=Nqz)+W0(Iu^3}&bCz_gbXP5Y
z&qB8PD^z*J;tm~M83W{y_soF_RgQL3b(XCcWLq0Lp&Ap9TA0cx43wK8#F~ZsAF&ol
zcAkh(e>+Tpmc<~g^G02GkJsxh-4kc-9T}DL$BgKBaBaF}>j=zknbqsb8jEG$F*Rl=
z+?eOGzll+l#4YsbjL@o8Z0rjm2Qig6mzgK3jiOu2UR_alf=gbO?a1<Rj{#OjRC85x
zt8%fx{>`d={RY&qTPl_T)T6y(lu7SqMTj<IYql%6GCQt06YS-(c!yk?Fss|N9z?TN
zkwWeonG&rQAI>=j!kLE73M1xvIXAAZkVZVW?^zbed_P_ChDP!Ak`zDmCQ16k!<uX7
zW+`_M@5*RivzT9yS(d!_5l)5BJ68Bk`pT5^IlS8NM@hJR>BiGGjC&bG_tlx`n+@_C
zn8$KEewwhQ4tb`yz*zr~?vifo{PVKi%5LnB5rQc2jKQ&8$KpGp1cxXz0sDuDa@rE~
zH~sr!Gqr?hLxh>#2{F~&?#-1@305_P2hRI{WR)x}sYtGpWQD+ZasI2rSdR~#hd?|i
zH--g5y$3ihR}Zj@J>Cy?6^IB_+M!V6E0fXdz<kSz>?btV<<4*Ib1&mROk4XgdhJvd
zIcSxxV&Mqbvmp15m!AJMXlJ!tb@<9lefx##FRz$?1#x#-E(mB8ZuoyD)Aio0sLR{E
zR+3MCqh4gzd9i^5`8|MGUJ^l5x-QpZaoY_p6X$-bkUmORufKar&|NWV`Cgb~;g&g<
zJqe<oVKY0i%B(nGsIv3LGPYwxpo`PZC&TxX8XpMX<d?@T%*c*`sQY!HxsfL~R{~cW
z(P$myO_)a}QMIO%J&DB({);jJi1;+~vu1Vy`icsCwFw}J#9loaWRrSWU^P@hmk64Y
zmoUbEK|p*PpbolyYr{O!m!*_rXomAhv5yqE4;_iO+PTCMJg=U(Y;@@~gcfXMHhK;K
ztKn1~GZvJul_z*ZtxlruGDW0ELe!p5EHJxrg@O`JKW{Y17M?gdN;?MhoWyQuNNPo-
z2i-kDf|weka^%&)pXP^8rGXjMjFTFYo3ZJ8hZ5E4jJfsvqXF+dUlC}|nD8TSE1&z<
zHdruwUgt!v3@z1+2YJyI^y_YBm!jm{!<5$^_gH_iX04*4d;rAxtytHmRi}t*nkYJr
zB56}Mstdb?RyzDFV1cqXTm_W(-^vJ`xhIr&$YCTBW_Xj`T{w`l&MYH$F2t>c=e{jH
zr`_UNNkXG?8t+5gwBY;k-q6cADz%o`ugOn;4+mmBO`icm{y9FLp(Kj<uQICuqVTOS
zsVlC|tyT_N!%#B98DV(dGp*wq8w;KtOy{jcgKNUz;;j<q8Qvc)#hWy?pLDi?M46CL
z86_NM%!2%|D(j@mg?eBgi<Tq+Aa!caSLDv~o;NOFk}%B8=Kk^3Web?(GYLEdFiOxU
z*Qu43SQu5WURn9|6Md^9_CJo{YF6UgYiwH1^ZlwUU2Da%Gf!e6@CmtIwcK%cR3#s)
zKhv9oQOgJP^Lj-GuH&YX?Dpi6E&*oz!IMTN#@1O(u>((NZoS5CM|~XH#+fyi1M}70
zp{KHrSGeGYn;q*<I%*SVIH#M9kYlPpo3aqfb~gw=Ows`Pphy|KHjN)@>evbhx)Wr>
zbX$%59c*`Q9rl7_9HA?v7zx7FEClG9Pl$?+Ry2Crzq*Tf2ubqHqbwiUWg^#wXutyE
zk5mSkvI>WzGv1z&trY8=TF%Lrn3S#)**>V&&y&+QV0!FRTx`4(<gru3ekZuFdNL=5
z+%K=kKDTea4e}{H5fB|{$h9dzf>(Ts4BYvY>*@H`sB`Rww?x@XLf(LoRrnD8cE+gW
zqp)>-FI$8b0ix{po*;<CWWX`w09+gNTj0zWgrMy8^G9SB0u^6L`m_gL<yx>6jhV_7
zC(D+7QO(&(X$YB{1mI`OT<F(!3QP$suLp3WoaR`kb5C~ICy<1w{I0)m$KquUP^y?m
zhXqH%v;R;XXYmp_JE-DgK0_uM)z&@rk4k6&CqTS(EmO*#?TUI~e6t=2fm03E+-@S(
z6W5**P+pYJKV%ERGKWsbsw{;o87Ch}V4sE{8zM6^*hcE`(nXLed_=Y8G>uyxx77xB
z(BtFe-DfM_h4FhAqf}hGeiH)9YCdV>)o}Di#V=pAUDPE7<`=zG-O(doh+q<4Q6Isl
zx6qW9$hGLE%a>jF1$ES*-B5-&bCwOvuN4PfmLCpb_lB_acg<$JZi#npRw2pWvbT1o
z)Tabjmk0?ab~)&1MHY70N=EU=Z!EFNz>xt9C$4Rn*xo2@x^Zj3mYJ6G4aWn8!f+&y
z-<Q+x-Vp4#pC#=4A`xLUvlcRV$?q*@v|BS1vv={H70=u;1NPMeViWT26U)I$7GYkN
zE$7fYb+7_C=1K4HP@sq3+XeXrF%hh_M9<09%*TTz(SxTs#|MBqDRk>;$U6N5R=rrf
z8~XgDeMaJ|DzIPfP}eqKSRyBkf!Q@9J7jZb7Hw#<kY4VISx*yKWMGm5pQt0aQrT;S
zF=ow*$MK@yn5$?Cz7T3p%z-3$?q{b%U2-KS6Uejpi=l2t_WiiJ^ydo#)-Fp9omR$?
zccDx3sjXb-jHrzouP?Ts_?yta8}tkpnCv0JfhJC#{eTV2+O#X`sp`vs_syE|&Wft)
zvlS2G!iMVHAO`T)&JokQRu;%l2!RVkT|2~iH)I>0d!ch~85F^98Pd%39;bx-M=U+z
zz3Lm%A*7Rmaje{zEI+@DXJ6SvPwOBjj!t}w0vRz52l3qJIvZnAx~Bm^9*?7-T!x%c
zH3>B?;06cWT@)d_FO;&c{oDFsgIbnBvAJHUek|h25{$IL$kkZiPp*AI+*`b)>45l1
zRaZ_--M47f?V#@P#;g4YhaVTb_dmb~>@neiTmwgvxTD~~otI1OXpzw(0uS4HARa?4
zZ$=Gsq)yBE;+10ic!JPBM-_F6)<+9{znS<fnl8kwNc+)(g=AzL`*EC-Wn9IUKfD_)
zHu|b)Mb)bw{SzQPJzH4RamjE#AYVZ}d?~NUNHRBl4UaITf<Fe`S?)55gS%Ps60qRf
znFNO|e-aCk(aV5c`(D=srpl0qSv<snA|YZN-&5zjHvm*yA1(`U`nK%4>xk?yVPfII
z+<tQ&jEzdk;!rsjKlwF4OrM0Psu~808#;GhoHRNv@ccNm9=4;Mo5>!e>a?q>#3oiF
z!!#9Knr^5qFj}4G`615Iw;g5=DO<s!d2-!>!6x6bC{ri*F7{a5b&I5+|I+NVt&Hx<
zJy0(Oa*3FZ%Ie~1;=DWO?jT#Lce%LeAt>&&=_gwH4{U9tr-y>2w-+U2<>YK*2=XR+
z%#Y&<+{2#H#HVIF4c&qK{-1~H(Y@4urdJ5%?ir5>Ll;Zeg052^7|owh{#hE3az6D<
zCh;p&j>CLFaCD5L+G>?L(a+6BDW#@&w7%-T1<<Ge+Wp$No@JqD9~P`FckI*R7As?i
z`n#T&l&tR*6rhSKz8k7oOdRp{(-L|h)P6^;){~R>U54g%=+^L}VPv?cBe-YNgl{y6
z_xgX3qmPM06!lgNRkgQ)dLt0b47kCXa&OmUV4hC2yG32%xWA)C*5bY<v<&*I)}O$b
z-XpFI<*GXNump5|X$g<(RMEvReTkLDtk4jq>QNJb5g%Z;-8f(v`D15f^^}|J7pD9>
zI>q58jbmYWfyzS!k5|i4TLHn>`WT?m4<+r5wsQ$3{Kij&7?VqS7TuJ2M0nZLU`<%H
zk>_BCRd&=Vt9Ibz>vFg=tL@g^?vor9(&<F)l;u&)#Ii)4CG5Qsn)@hSoR@1P<wO+K
zk;nH#EVGREtDg{3ESx3I`X}pj?%lI|bf2p#rb^-KMh9|PnOhu%O=WBDA-hhLeed>7
z<iL;!7>NfnM7WPhZKTRy1r_yjV2g9ba%t4?uclnlrC!tK2abb@{koghYq%?Oz+AB$
zeKt(rN#1aSE>3PzsQmTLYs+4*lQ8DufY>&KdgbXod^K#8U%OL_Hh`&jd0HLjKFB$v
zH3WT+;XNW9bW3n=HH7sIF?|{Z+yZW-#7%3Xg%Y~LT3So_j+SP4UdhVYsb?r<jOSB+
z#7!K)2XI$n-5X9jTB8i6_U$al3CyU|BhwLq`MiQ1_Sbt^EUJJ^Xdu}C{e%kIBcbc7
zSVEVrU4VT^+9XTY5^5++>+3l|vg=agZ0~rfXL-&5t=B`^5ZJtP^gL-HwIV+UTnUzy
zkdhpMkd3WNH5Rs|s_Ev|qxZay9(-9sg)dqy({@G4#OavxtX6T?lz#}CUhl;Yk7<=j
z7Rph31z`<NbX+%q7#=w>`<lh1@?a&^&6XngDy)E_T|4dH_l18UMlyd4^~Tj)mTu6)
zTc{hV<NKdFufWlbIfw3}&b`awVn_FYC^g$jrfS8y-lZP)at0FFb4(oNE*6@6RJZQE
zG0FWj%rkTW<L&cS4vcZC>Ne2+A&uVGagDEQQn<s5T)PqQ>zZ>&$JJ<PC|A^;V>C-)
zL^so8Qv~mk#J0?Sj}kko`G-DFHfoSO{X8NtV?L=14DA->!lf4<26FAT7;dA0!4I5y
zdN=QwF4D)X5U5fQo;BKBgii@{r&SY`EC8wvfUl7BcvJ~bYKGjnkFxEvc0*m-i$*!E
zNh&P^v4tm1V>V2VMWW1p`1J{$Vk~`GyN68+OK2^a_MXh6ECN3kU7^!Co0d4HURS<1
z6I#Ak2TTPdZ{)t{!cHe=_l)dB1~J+#)h}l>4)?uV8;ZW^7<E;{gp%d){R@ycOJ~d*
z)eNJBS^BT3jD5&9_&$6+(O+@Aq}Qb2bsS|FZ-F@NvM#3Jv0nE^mj!d`?)uHA?z41h
z-LbBM&KnUny$0n+hSeuI2N{U)M2?~IFTL`YaL!yOGTf_GMS;&rX~L+s>nCFno;$~z
zOb*sNNnIrN(m_s}tI(ukMsV@Sj76zIyF7r;>b?7?QvB0uPJzEGu>WMwJtJC$i@Km{
za~<sMI0jY*9?7(1rca&?y9_J&Xlfk?0mCm8VsSUq*+Siif_YOR6n-0OKaS3u#+DjU
zs*~4b^QW+IEfT=d1>Kg!V2&W5e?Daj4X(-j_~8#;F0=bl2@S1ozhVxAwbC}&yY7Pd
zgTk=M+(I2YXYQ?sE3f+oY#I^hK*;S>Y`p)^>_keDe&GJiZ~YR|(}87PCmu`IAwYot
zPJ;z&XlVV&AtF0+fzcdAaG}e6wI$ZD6bA%!AEFa=v8fV`nP34m_81nmMrXZku@2Hd
zMzxOeJ`hpceTPw!V%_qa?eBazHOd27{=~8+cJ0{mVnyy)habC16G1~oU_(2f{U(*;
zz9inyGI4ym^@;^&){CJgAi=@ekL~bfOq1veLPMoPd|5{;@Na{R!Yr`?VEx`+c$`x0
z_N@QnHmF2${!o%b3ykn@%&^2os6^>bnU-o<h9D0gtnU*2UA*uc+PF&*a;vx^m<QnE
z4r`CnXzSpzWE6NYrqv+hInthSKf&Kulf@bHX{IDGjG!b6J6Y+x0+hd*zPI(bEu8eX
zH~dCTn4paXTg0tL;&r|2f3_bJ+!{*;#5X}UTRDaT4X$>G(W5*{kX4WZA}WPyJ*4`>
zNg(h;T)EnzRg!Tq2$yp*mKehjd9vYKaWv*iCH?BtazZc|Aop0XTVm#wa$HEX#htDn
zwJ!Fbz1>4~q03dgP|B^NR61wl46Rxw-S>wl(8)RFwbDM@eAQ%`XL$M$ftthqmBJlX
zPJ=>jT;ezFTj#8^?fQj3MXj=gxhK}vd~G@@`#uDFb7MNvg<gu>5|Vgwf$qKR`)($p
zt|sQROU<;`Unu~Icpp1Pyt)3@$IkisMJ65PWQ$doZ#4EN5h)UzdSWX}t_-<N|G*4_
z+aA<TNx^edo>vq(29y6ssr`0|sF8hji~eCSe5;Eev~Z}lt{13ZI0<6aTF(<r!ZeY~
zS=J21hQmHG1E$orhlsK`>9v0YHChvh+XzLMAXawnx$8@yJC=*)n+h`ER0qo~n9lf0
zzJ=YvsRO|MU=sxRyubX1oJH#Q=LLc8YKJ1DcGt{Un)yAHCWl^;L9%3mNgp~CbIn~E
z0=qDzVzp86H)AQ-pEUgtdV7y=o>n$1y<@bi2ba#%%<HW7aj&_aBOl9j*g(~!LUjiE
zAnfr`zDCgL&QaW31apUX;@yMGS`bhw#TSNM;vfOD?SojDX8evbIzxNzEb=6+J@NkI
z&LizzBjZZL=>wQosZXzaqE=+XcaLS^mABoQ$`-{h;&RKcyid>T773Z}Asj(VNvi=G
zmw;&yBd|M0FJBl+TmAJQUMSQPU#@N(Dt2Pxue-Cj={oVPeeH&Z$tpz+V9(qHRvFEt
zj1C)~=mr}62OJcg_yPjN$Q6BoWaEeno4iAX@j5tYh+^vdZl6c0OyP0J@qGv<29$I0
z!)b<tU(f>UPiYS!jX9{blIn)h=!F@<(6_bgDCBakowI~OjgoYxpq96wcuqf^!$Gun
z0ya}dNW4~L@d2CnzC%;D+_`$tzI$8orJw^;9{87ylg><aAN62sq@E*_qeM@OfJEgD
z3KH<ug;0SRiwCNnmpV`hfm8hOMCeqaBbtu9_N_v^HM6zZh!D<HDbHTI)cs6QGcJZS
zm;QcSylc+kNt~Iv7$j-J5cjg++e~ivS`ILWo-Gbyq53N_)TkN@QqckxoqzCjSvat&
zm%R__JWpMy1UuBl@7i(6&`>c{Q0%poI~t`Z5b(x$Yqsg)P5+=^__I=!$;Pwun~r;Z
z?@Lkd&*5^+M=A0k+)JpTdZT*9Q5BvwekYc5^9MhnOK4$wH2X;&BzToqrIm^I&&3^D
zcVck)O|g@Unsvu(^cx=c&u6nTOI^TceNLjT2e)c;bER|Bb2f6mx#@qgc-)n~@nE<6
zGL!>^x2Z9ZRU*|*Eb{)=Zc<~gmPM=lA9(UNt`JxKZy@3u8;_JvMMu5+ps910xHDRX
z7}uq|=1#?>a6yYPGzIJ27!uc^hrdO|y8_}cXNa+&S@P#L82ZW!4$pwLRm0pvi9^1H
zDwFX`$nPEGXI9GlezDAVdHo&&ToCrrSo(RHxZdqI)!J`AhO{Akn#H#y4{25y(1mCn
z9Xove2nCFfS2L?y==NqswDm4<V@sUtu81*x9@mrHJ`Wlhk{}cA3dPK|z<kfJY8SmO
zlgO6BGET>eg0(U81`i=@Tle;a3!C4px&xFdBFNI879YxDaf9J?67%m@=!34gH_Wfp
zXOJKm{ni`xu(_3|I*HRttV$JrsIpSXIjA2g&vA4YFrJlW3P{Q$-_~>?WU}P3uFRR=
z{YpA{LNW`ix0A*{95=LZ@8gfhkxO16`F1+x$y?gvd#SK<ZD=(@W8!G6OPsFcW^j>C
zDPvB{bFGm%SgfXUxHP}U!~eKc)u7f}$q^ga3RYDa{Z454sl>W3PBnJbpS#y$(xrbR
z;&hEY@YC-n2)zC7C*UG5`$d~Nq}*9G<2qi}aai1Y6>|^^jZHk>s6()vtXH`3XC_|G
zX%aWRb1OlduulIdAeCjCh${cw$X@|B$!d^blFUTs3#)E~H88p92g~HrNM3%VrOcFl
zG?5&aPm}0QyZK4|&>;6{gAE&>r#>wjckY<LfnIivtBiezDSQ5Q;SxF5&S6l#sPcpe
z{Cx!KHyq%)v)0d^th)l;<TI_#w_}2(4uCYO%hin};!-gCIB?H$kV{<8_RXH1WV#@*
z>m3@Fn-?e?M4x2+#g^!8oWfyFd>|XUcUc6oSH4o*H0^mNx<wis)){ThG--4E3>4_|
zQEG*0*lBOt#7x+4D5zNf^vXTL5}|`;E7TNs+WM^_50APKg_jT8{j-Pql?`q+A|I2g
z*qNk281Ty<dAYF+7Li!A)NeXeefD9)l_zh_{^9MDb#Rryes?C&7)(um@CznCQAdlT
zwb37k`c75ejs@Nt?vvu87)>f-?bEf%w0BY!fHyg0CHp=9NRz#OPZR2v#Gg19`c)b^
z+RgvCZ3(@du;y$d!8u!luF&BTk6EQHl}80<4rpbpKULlS<W$qb(xosK`XO#WzNU)s
z<xy(4LZxHd<b2NdZ@0M4I&m$r8S*=K>;Z=7gpMAr<lPnXcR?6WnBZ)V?Up>O1sqcr
zJaUI+F2?0rsFx?su*vf5eW<Zo6;JYW^Mw$mIptWYgZB<RgabjCvAD7asWhlM*&Xm~
z-(I0gKpm(oNiZ_-hAun#W@xzT-9G=qF6e0GLkRsWt`IGEn^Xe;%dgNs>{09JQxJdw
zJ5q`&0)HPaMeME$9WVjtlF#4G$-gD6DJr%c`rJ*aI$HP~!diVDUupsq9_#lWE#f<W
zI@CX{O+$PPZ<TX8Ov<Imao>05Xc%%c@+7%G{12l3T`O@SXt#O#(j8-}I<BobUL8zX
zk!vvtt9E=w)Xa=sl#KDZ1)B8b^J2~>7^T)zk_3`J7VW+%5;Gc_%8ZZTn)_?fbO~7J
zB{hU$C1I7?k*nN1JNnqKg3r0G4<$7gPOqnzX?|dlGo7=StC_2rQ<y*_VPP?9KGA7c
z+uQM#dix4IIA>R~r>mdhokT(gG!W#JF-n*%(l1?`@@MgC?3(U>cCLG{s^x6EG*w0=
zdAY5YGAMe<(H<<gq|6PD-ez%h?P*yHxPB<z%i9xmSNH@*WFWG(JB2$tSNC$wDxIv(
zTfklu>)EKUDQjGlbA3MPvN&aI^6<GuT|yoJzd5@GCn#i28;o!VFmPqCq(ooIZ-T(x
z3W1*|d1xkI%MUSV(L>4=pQ)6#3@=Io<VJC02k@Q=Bix*DraTZ>)^2&7h>$d@UthUG
z%+7wHyksxN#{tpv`PY1sB~NmAxp7*)+jBw})0AYX9BcsZ?}aR+IAk^&7NM@F<16is
zfHPhR=ihunhek;~z2&?&sb38v$^1=`_|@L5A+O?*qP6}9n-O&cr?7VYGed@gA!6-&
z(oQuY%Ci91EE9T0^+LV#!qH~TP?6B&c}ruWxf2`?)?p|ZHaqyZifkjLWS||wQyM)^
zmXQ%Gk(oFMT%<3N+3OBfbgsr%%8H7999%dkKXS1hpZ&b-RP))MTkMRU(adBv-?>J;
zcgP8(9~RCbD`}S6WL7F(Ck*i|?4lXUNIz&jCCK)kX9~~~n~-gZ4MSRkG>fAtm>H-k
zzB3Oh|05T(k!=mWvQE7#{k*eX9c&zb$v7*nZusUz#%dkDDf|yS@|!*JscF7gd`W>E
zhV9Kr3HcMb_d#Z6h9r&jK8>7C*I#Ev%Bdijur)$Y^CxVmESHYD8X^lar7}5wmZ1eB
zKLkg|Yh=@iz3h*Dp&sfO+r6q#1;w8&^P8~OBr2^u#qP;Zg7O-FHf^0EApu>M%LIir
z9VwV%!{dM~MqOU>iP`wB^%6qO9hhufs&_D4<r;ZLRxgD!85q2BdOQ7dD6MMn8Dm83
zYn6<0T~N^{F%!qRI<>$Cs@<OAG8wO@S^nc{a{3<&ZeF4evP8C-L|CVm_?A|#OHj$U
zd>L4f7tl&JGv1$xA18(CgF|}8+h45e(y1_RSE1VgD&qe9eZs}11jgv5gU+5By88WO
z;+=O*$JQnly1F&~;~L7n4wR;pH_sj#m0!>F`U0z~EyAo2y8ShIGmYozGjRtMf+e|%
zH%Ys?P2E-<N!x4I>=+~$A18HPWBbl=p2l-T!bUOuL})<um$QF(6n!%%D(v7)N7M&2
zD-ALN4j(1q*Apg@{!etLvb@jP9rB7*;eFaukaIV?v(^vBC+h#Y%t#T|`r*icBCpGn
z@^7Qvk82^9;H#}qYQ&A*g$rwUNS_&cXXKMN8?%ALo9S|^7RA7~6WYTvb<YY{u9qU6
zgw%?8wK7eO{S~525^vsmIb&B<;BedQRAHE#M*um*^0?T*qJU1-f1m&nFFWG17DJcm
zPiGe;8$Wd+q((oigNyX@=MS$gD3Sdm3JGm8^WJ56;SCQ7t*wLonh!SeTl2HsG^$CF
z44QL$s+poa+8XG~t_EwNQrjOAN8CBf^o~Q6Q#rf;)vv3Q<EPmdY93Jj*Y@x6gODgW
zg35cMM{Y0{vdada$hpTCc^LP>yC|34$D!vH=J{Mw0YUU9|At!Oew!BEQM_q=m^*SX
zWA*#X{M{K?uj-8C2Q7zZ9j$F!-NBdjGDl50-`9S0+b~T|!y;t%ZfP+3t#t=+%YQ#e
z`mw<R_OE;@-}BV#iDkyBh~Fw?Ga|&v7A_;1)SX?nC|cw5`q9^Q*74{3A06`nxIfbP
zUpG)f$Vp`0jg1ZwP|V(_2d4+Q-<lqDMCl0w&(ibKRWnLb^!0GH)0eo{I3{QW9<LZu
zN0j~FBL27PPZ9UiYHCI(zw574(GTtr(m10<RMAsxdMso;7wQC!9Y97kH#EG8M>-|-
zOFc(km@co=sdrdH))mee(rDMMH66YgZhmW<{$aIc^GnT=>#>D|K-G*Bv<M|5S19HF
z6xUkBb6qbppwc9$LF$2^?YNT{esFlTYB(fhF|kaKMkagf+Hi>NvV4;Bg+8@L=XTTn
za>v=|8F3Ny-j}EQX%ipYQCuaLWM2)h-j}Q)w!iP1fL;jxP#~xKug+A2brE<(&^l)(
zW@+1nr8M3-PN^+DUO?Ysvkp^Kw(;Q@8Hv!4#?foHb%^)xH#@Uz7j=)8^L}u<9r||(
z6~VxbWy`;YEjZhrtbdR)5-F)OgY8kS4w1Sixb)Nfa4G!7<-Zo|CxN#=9ln8=YlFaq
zFR_kumk<>E5z1Y@VS@6dSJj2prB#oM)QR>BHGDA^G3?tDPZ`(<MgR5b|Gs0Skd=S+
zRJ$};Ahf0MB<5Z?z#<7ZR*BqQQR=K>wOD<~;)l_itE=jJv+7Kx*_@jLjgU%pRCQ1I
z>QbYaTYf)?GagG5Uv6QnG|l|`#QDb?6I`p5zH>N5wPYt*o;Y*NFkKEI0cJtvg*A<g
zJn#N34LE2%`KhIL8j0sQJ$ZSly}V4%X}N|!o>jFp;dnqs^<kw#mMc-lP>3w(7e-VS
z^FRC&ej%cWYK5+DVkyk!9w8RV@vt>P$x-$DX;epy(Q~l~y$QkDICHQd)JF(tu)jhO
zA4{hf?A#fSU&dYmr_CvIzn9JGUVN`O8*E4B75YE8uZ99q@D4ZO(`zyZPV~OM{BZei
zP)W!p`Ma}Zs<U|<<6vxR)x1KDe{^tu#>^38QG0q)?tNl9a^DIaZ+I=Nw!qzRWM8`n
zdhZYBo^j`O^sC&bZ5=g{D5S2D-w1R+3I{GE(6H{2ZQ*LcKfVBoAk?0jZR%_g!>Mwz
zuj_o*O4609&+0uJMUDMm?Ru{$qZ<;=eZ7-*K`Z_%He6WJfa8$RhF5=7&Oh%n9r<B`
z&VHoMsLp8@S~{mIyIQcJjzu*z*8lLbt9$e*zOnr*VeM-l=0slLC?GI9ysq!@1Wk^2
zf0Vn`4u(idD9M9f?w;5&{mdGCUx=&rJf4+*5XC!(C;Laq_$6e9`8)RWtox7V0dA8E
zA<qCmt$TJ`(R_c+y1Um1x8H1lV$EJd-fojWMGK-)V=`U8QM-K^UK_7XXn@6F{!I&^
z_cPYXCX1dM3BdM~WX2bw&`G&N#5e0{k~7_oTcWH;FIo(;V~OCJuu-VRqmwk2St9r=
zN*`0YaXG+8VQIw>k5w`ODM?vif%|#h1{w{@aa~0`711Z_CAE;_JkvP-<k>jX=bx@z
zRSW%JS+?K>NM?%y?$O(Cr!T^tb0mO}YZR3G?Tn{B;c*0U5RDpE^Ke5-`PdV-blm)+
zJxc!cmx#X8GyKJbgMl7*{7z-$%Z-q0n0w}&M_9HQOur7JhjeYi&9_ah_eDl`xK?O;
zW(gp|Le+cLs|O-cS@gtzi>#l^S*ll?j5dtQa%yEBql3q85c?-8a6n2{$G?92o!R<5
zk7W6f^d0xX#+|152jhaRMv*xa1H%BX@Tt;L@j!shCPA$lB6UIF<5tca_yqG@Q<u7X
z!s!=OW3CD{sMZ1IE{Y#mXVo7eRd0G=3y4*zKSd;<Fh42W7~Yz46+h{{q>4DLNfpVz
z9^ub?M9V}xQ{-mG6Y4JU3?-gemeBOJeG;N>3Y-M=s&PrHd<E)~;DStXk6@oUr`;cK
zzap$D%HA*K^OMg$DwVI<`r`gbwus+V<#_knmFE-AaBOf-Nf}i9EtriEX?>P}006+a
zrAlp&gz2A)tkDdFKFw`({SFbKyiiE~PkEu$C8~-wnLRaD)flf3mN^%zG1-Vri`j;<
z2wja%Y`s_Xy;vtZf~<5E5+Q8H<vPY~otYc3t?Bd(P^vCJqCKIUkSY2v*QitEf<S|B
z^MWgKBxxrG{T{uK7S=&(B@DV7INJDd&qiO40QR(`SLZKW65O7HkuEfXk<MTZ59_Et
z0hjm;fVZ>-SF<VZ6`u~|m7f;=RW1e?<0A9XVMyZi-_r9FGYVST(dISO*A9qA-(Zv$
za-lFu=C7~hjBdRoVkqZBfV68Z9vgpW8aadY*p0Y+2&{UZ^N^YKV<?eDFU!CPi2v-9
zs5$N>sr;Q`zf73bjxGR|kQhq=h(3RK;%{i)0$e{(eb}%Q#1WsL+za9oi(1T~Q9Hkv
ztnE_VJ=<{hdbpzUgK@Ne%h>;v!d>bR`9evs?=$5q{VCbP!92y7)rKr(KY6qHQ9j(4
z9=vo11OPr9h&?Lh4Q2(h?zHnZ9p)vUnMGJQCf)-CXl#!HFj1$44lNaGdl&xaq<_ZQ
zyQjoPV}kqPfk3fY!5%2&lz0N3>xY&_PbZug_*7DwWC-_$U!ufUcp5ISHoR@k5)W4u
z)Y45e^jCh|$vNTNW$*E9Oxo)65!Hoze?X(*t?R4?$MTEkE#|SQJI9}qtph2moB0Zz
zoj(mex4p0OX7|a*H+^RjZAo6HT`}Sc1jc1P@NZb?ppl0+ePXLl;=!q}{P<>UH3oXn
zpYi<obz8<5Sqcp_P{MQJdH_prK(6k~QxMO3gb?`Z4&O?;jzdP;j`zxy_f=n>YV!it
zi5maFJpO2xLgem!uk$~=!Q~D+&5GK{vAgy9G#q_yD5dXIdt0d|YW+^=Y1UN%#$+y6
zR<X)8*wYWY?fzj9G2~b)1bT{3eM$4~lSPK~R>?wLmW{Z6NMog%m9z!ooa0RSxTLxk
zR(Y~0qkf#X>%gtpHtNnxZr*z7-<U6U#-9!Bh7csq8&bJIa`@iwSp>&tQ^RnQYs-8)
z6$Ml9NQskqOIXk&<simDxz%kzBoo=uo7fv6U}-s1Qbif?gO*M^myd>5U3iHV(I+{6
zI*e;M8X5jQ1h~mFgz(%AWlV?DWH-hn0}P8VDh@jxUJT?jlld0ftldG(x@EaXaJ8uL
zd(=~IWq-Rxdv+sd=RRoYYClMJ@BZ7S%!iia;2SN*H%^pab^5(TentzUO+UyeMxHa{
z$o|<_UAP);NEF<fwOALc1s#EpyiH8pgm1pZbDbHl4h84E9AQU)MN6Tb5@>v;_{@1a
z_F7aB-WzmOPtSOO!OUH(2`7sfI2MM(PtWTup1l`U#I;jTgXOQzFEx~njF4(cRRmiP
zs55c>1xNd{=62q=06OjL3~ObaD9x0cM7muS_S9kL)cSaXXjmMVP!mXWHxrj!rFwOj
z`~oQY?DejC&+K`Ei7|(*$*{T<G4tcwA*A+Yhd}+aV#ltp)A>@5C5uc7-EtqeKjwOK
zKwRiOsw97=^42a&w~{5}%{JKzj<D)lR<vQ7i&er0#~hdP#8BtI4x-bsk#`h)>Xx%L
zg5C>>`%O;zw{EhDVv8R5mHQJE{KJaMHO}!0gmzJjy5Q8$E2gr@W;@s_e*G7%f9C$5
zot_}s)$1Cswe}sD<WsB63B|X`7AM!kZcp*YlGBenMCCfb)A0xQ{RY(k3<64EmiKHQ
zmwmtgCUEllsMCp|({Kw-qqGxK7xC$nu)_@w@Rp|%eGuZ2U-MILad|4wrZH_vY22;F
z6Ll8s8o8X=UvuE^J!dop(If(kYxb7$ckA$<4VI_nX#PO|=L_GH<4XSU0{CkKI?din
zuh*a{Y7rEyi~~{_+xPfH?!K*#tZvvf?D=)W`S5oWSwZ~OLszTR>hV%7WOY4hB>|%j
zC8MRILwpp5rChfd<1h4`A{#T1HIZ8#4vn;+I5JTN>4i0CYcwit2giW(!!j`{^DX5~
zSCo#<5>qHxn0}|q{xJ>w+npr%_>m|PIw@<9kNHk@!w#bmou=_YpGmGNke<Xp--ZI-
zsudCIgHy@SORmv7s6^{V1nok30t6q;h+{IkKe_#1PV~=U{p+mykDopvBBOn$5VVn7
zrll^rq)KA(V>L&^DB~Y~>md<pPu7(3-9d*Y^^68#^${Ay?15)X4cYydG4SpdyZ`Us
z`6L86c6U9OqOqA{mef2QM$$|14}W_>G7{d*AD)e*!j;@^y+ohAWq2Rg%l-gmWMOD`
z$~ot`BL%3A-u&Be`|pqV!c`4s0xYzW{_0HWisnj=#+AnRSO4C5gdo_q96l^UDG)Mc
z_T<Ub_X&R@f$;gR*K$O!c!imZ=)d0iuNUsOeBVhLqCTI?dg;JAjL?M;q`jfl5epIO
zw*bK}H|x>vIVZU)wQ=OXnK<pgwf<@1bf!1q-J3=|G~oD={>-%oD+d33l1wnQ#p#9{
zLS@*O3F;f~7|&2o1k7KjVoI}rui4bUaKXA3A*$M7+MsZZ7+Qfs^{ZWXa>~zgs8-p6
z0zu{B)7Vk<6QJ9gKiUIrj_xOrL!mlI?}Q!Zo_VK>Jro^o33&eg#IFfb6(71D*2Tr$
zK`39#oCu7aJ6Qa2`U2M^g7j&gGgLviDOnT(0!u9TO4*tf{0(q>3x7BeyC!!vtj$!H
z6;6!ggy)9m6z9mRG14$vG`k@-5&9Em6OVCX=+Q9K)dmC8OvR<qGB1z7fYZY%^V*rK
z7VTf(%CO6p-Gl{#1GHx`IT&ER8sf5O6-yVO>dafjRa4(?WH4vIY|ZeopM`vJLYE)G
z;fZzO2<ji^-Fryu$240rF~1hRcN|!f50slhFc}$|s5EHf>=V6`J@a4xIw~n(Z>#aC
z^vKmsy5?dBt#^V9cy^Y7U{ikgPX47GdoyV}0@8Kq)O>BFJ1KHsDoQVP`+rBQ|JMx~
zBCgfHL8G4WeHxJQW{0W=+ONAWA!#~HmKQ+o&0*Vw4I)2DS$a)9(vKIP7^Y+k1mlR(
zKDP9??Z6L2dnnR(#v+lP2@C3T{i}O9^F9C|2QJ6};WoKnB<K=HbAL?c*)+vwd$rz}
z^)r1g5doEYJ;36Ppk;1Ur}6u{lZ12A?y|=cM>$F!ukjWNJ+*pOnVw-GT_6WCyA`LI
zy#+-s3;oz(i)#iXH@(Z#4yrg8UzAmIvUAs&1ChQ()f?>l+jdVf9V{w1e$>z1+hyrI
za4V9w39Op~r#-h@xV7WUvX-ZVBv>jHo>9%N?bqO0oou?x^?i$QK02MebIyC#-pJZ`
zx3?x6Cz6Xt4X}e#eS_cJC$F(@u?;g>w8UYbWFqYE#;)S#wtSy;d22IbPfT^dlNNX8
z_cOlbY^72hVMgztA4i<xV@G0LbNZgqy94yTt?`m;cK+;Qo}Atuvvl~nEzKxfXUBEA
zvOOr`Pq>VttXUqxnDwdA5lCj+?<Zt_Z!5ChT}tpIdVJ{{>6vuxdB#>snVuHgpS9wz
z^<h5j1;;llAh{8^?3}>ybU<Sg`<lPa_W+m@Wc;R~#pk?xQSf1m#I-jZ8)eh%v>+Ks
z@q+rKG~hrjrZ36}55nEwH6SclAM;)Q)dXpGi1?VMaC0X|M#EbN5yb`~EGtW?UZW}D
zOsSq;+)H&3hcbeejDGN1-=*{m)Co-WtQ*RSb9BeE2F>Iv3lukHCN%w6!sJTls4S`P
zfL{-NwK*(Ap36E0tTZQZ&~$pgy&KT+mH!LYRVTc>t_!dk05;HtALy;zNT3<cIcD+6
z?cPqO3+QsrJn`?k(dRz>Ng&=}!AO0*?6x7<=rQHIK@2u0dQAiFkr~X(w|Jee@4x_?
zbSk;Pw3PAox4Dznl;{?_x4al(`2<I_eaN_IdDP^<Ry}mqy&$L<^)w^klv=+!eH~F_
z4PGFhJgZRL4z`K^ahd;LdWb}mXAJUBT)*U%oK&t+9V*Ep|I5cD>gs}dU`0VIF-clU
zFeA5Do4qW&Q7Mu}_V5PJh6#u5#B}%$%>d>T1K4OTzh=?+4OvqEoG#U&FSWXhArz~_
zg_?tXJXCF0i<lj*DIeF7){rS{@?AC5k){}JDo@rC;qkw_JGt%HS(IL#4Lc&$kx3Q;
zF_;n@;S$Dk`zT179~Y(*WJj`yuR+Y7(a#RUCt5OpOpx#hFdv#h_i1Di?dIi#S?N71
znH&(|7vIudLB3J+(ShJf*G7lQnu9OMYa**2u$9aWN8Y8Dy80`@-TIE~lW>JS#I7d5
z6~n5HN+-cokzkM{7ObP*G33{FTQt1tcFuf8l}yV3hsW{CAL##Jr2jou|M64YZ3lZI
zK*n*WVn$sd#T9lbTtQ&Q16f6!&9j!zbH6$07M|0`4?^<~iq?3vQ?fJOrR4@RW)7*}
zp>pnxTy-D2DjzfBt`(69kXMzf+9ECyyD#rG1DcyQK28+1h}^g>X%Io!v#FSHOHkW+
zdGBD*k)E(<&=T+exh?dHM8QKW^wc0~g`iI6$Qheq4aX=YjPqR!)SJ-tkFv?TrB)I2
zI94Uf5=ndI%s0>~Y@<^`_kx9lRRACsr54QdEi$2N5rJOOagnpLOA=-}@cQsNl~P{s
zc{-4wf<EX(tL6-SdY}{B(r;a|AwT@$B6-X^p3FO1(5A<&qt}+tiYZf3cUsihYK1#}
znoys+x8KdpzJy^_SCr%64nCnu)63<+XPCxkrRT}3{c^1K940LmZ#M%O*V19?da`pS
z;%dHXv-cd`SN0MzZ~K(#L9GHZ?_R8cKKPB6)%qScF@Ne%4*szgkk_<?BEmqwczdru
zK23g&I_b3r7f7>$I+;PMA_dbZCQMWlS3#BWw*wm{fX$(D+h`mw`SicfUw<nkA?K)>
z^j39);cz0k;gj`~Ho?}9*BjTFudTi)D2Vv}lD+ds6uS`W%VA}qp9+K3xBDpzcgU-{
z*rEcyC`G7TqI^hp!PtiWiBb6YJInptB>!bmUB$gp)9GQLzM&6fIZ1K)q-8lRVABUX
zwEOM^tN7xV3q-_MZ!-%9hFDuA?XtM~f6i~h3vM!zIfK;&=J47jK^2Jy>XR+B&ft2^
z%T=xx8KWP0#na@<`|m*X$BlCPRm`z$k~`{Oxq9qBJ~xy3#5PhgT)yZj#ny=i+K<;}
zzNy%K8C;t~)1vcIVI~M@9r$3<^7=k1&e~?#a00|5pCf78OCt0Z>?Bar8;tjo5tp6h
z+RUU|`)X9RVUaLcONM&Lx(O;p1>JlIVNo4fwf4JV?ebvx$iyX+;h>WixHOEMoC~S;
z*YRuFb>V1tsb{^6oSeRMU~f+k-^ho{X1u9s0!?UP1jcdFLR$h}1zhpmrs%YtB`?$H
z@;-67j~HtCbDoj)n)^FA^qb`q+wIrmez1MrgfFC4ngS7dDTrI={3Gj`2mbOr+b}PI
zn8Pv<O+2Czh{#1rDw<6{J-)tges-`e<58TpcHprDUpf%cpCF}RR(PRceF=vz3li44
zhqYg6O3uxU0YI=YNCs^10ozbgo%&>R1!Mf6IK6+sb(@pCvp5_2#Elm2d=xg30p*rw
zaqX>7Y@?OTi3p(9eFAk7;1v|T%+?{=s64Y5LB_1`1bsEF&i{mJ*#eRM&S~l^usTh2
zboJ5I+r9?O_~aIXrJ;Oq@a#BiE#Y_-UbOxoK4lP*l5ZyY`M=*!{kMPQ<tg~zkv{qG
zHO{)E`Esn!;_mcqYd?X?yrj=n%Xi|4;e;dR@F+Xr6EXn$sk*>w06lldafP(qwI_8h
z*=c9H=V_>Y;7N?e<0Fl)O-MzEu5IjSk_2)b4^IpB>yeG|y(tG5Nvc4|m-kC{710*w
zdUmReFF&a!;a)*`diU*;FPze`!xA1;29@{h0_S&aC(aD=G-upQ3fZLhnHI%do~)Nj
zdYI5yD>e(JB#@8T9ea13g;pd2STnFm9Z*}%L410JH;i4l+rBvy{!mLcFI?`9YOhpN
zZl}o5v3X`#Zva;<840(<&{Mw5Sl7zSdxs}EVTBHlem;jgA*6G``_fVy5V4W>ct-VN
zyIkTEm-Ahppqhl>&EDE`v*;U%zdW~;LGa~U&@Ho881VR=BTj8<I)%V|582X}vn!U^
zQea`g;_!_(Vgas9++y8FE_P9AE7yJ=67H%Rfpb4V!*A2ZzDNoW&(7uhdE>6Dwqk_Z
z2hfRsJBgnc(d*LXfNdM@q9(!R54uE({<D4s2?nda%!3ZkrXberBN~k{4Kw=5Q8RQ0
zgT2NzFlP_C{=P40AehG9Y9+LgN4TPCx+VHEpWhWaN+6GQ4yHlg=m_xjvuDsb2M3t(
zSmo$z$VT>atG(cx?6|X@*&@kmsr<$AP9r|l<Z#9Gxy^BY5;uxFiZ5bvoaQcwPa5pH
zv$oy3Q4uUM0y~8TH+s*AP`BUUS*Z)Q?`7(0-E<MG$^?JcD2|6E)qe6&H_n<t`HUA{
zcG>S}mgs7LD2PJry|jFJGvR5oYvd;IJ6_IHOug9SzO5V6ng=@)P7mhclCC#<B1j02
z>xIJnC3Fi<S&yXavE;&tQ7_~xw-_)YkIRzvtVFb!Qf4dJK<TJPqa^Skv)AXQOUu3B
zy!#t4W*sn^+lT7{+Zo~ORZkUSCLKFET>OC7OJcqBOK3V9ghxyO$K!%$O%DVHADp>g
z!A9*B8C9J#25@#>26m?vP;XaK{+b5=aWj6sIe(r|$n4b|8b7!_@y$4<!o;MtIjw!R
zZQT6gBv-`ZEvtS+bU>hAHv5Fz!X6`i`N<ojIJw6Xov5IS5uK4GQGh@sJV|6;%ip-A
zDTgM1lmC|9_{^Porno&Ech?#NUR~xhg<{&mcG~={jxj#TZQSEk!{{2B2un-9SD&We
zEI(nvWL`JyZ|pghOdh$_d3KKca(Pm-hgG0o^ufK&^FxL8fxe3BRIt(%{TlX5%E1S#
z>jxY`X^G+~IT}$rzUp@TD<~!L?fZiZ-#1%2OBY~xjVYS7=873gXYLqZM5d(fB7`a8
z{UmogRlJG}4;4qLZq^K8Z#KnSvmMV|TeKLQhuvL2aI@F6|MuqRGoib;MY?@|<s{H?
z$wsx~EY&(_y>)r%FgZ9;T6Uz&Xa~vE`Td69m78DCC}C_wjo9GQ@;u(zx><Havx1S_
zS!&b)?}}jGM<woxbY8hN<jHG}k#x)sd;ZL&N^Zo)OSNRhzR1Hmoq|w^c;0XH{2%Jx
zJFKZ}`yal8A_4*;(xmA~5fB0C9YsYt(p#v~483;-MG&MYy%(idLseRoDm4`8O+Y%K
zLm(l^@8I0IBVpz`_xs2DJntVo56?L#d+)W@XVtyVK4K161z&TW`*1TDM=HNNFdWI1
z;l1@FWEC5_H|>9?2mi=h+2&nu{OqTgyne%>JY5QnE1vc3%*D5&K_)q2`KHcVVOb)E
zI*wV}ETy|;8dX`-NPVyz_j@Bl@Eot5yBW!x?Pu#~!&at(toW<}qRV{etVe#A$epEg
z5-wKaNtd`g6udt93TMZ>P@A$eA61r@mV?4kVP!gFKHf#v^EvkPdDCI(>@#n56_61r
zX^9i=vzP|OqOtY4E$J!5y(iNz7Hh}qo}e4R&E;yg5oQdf2zRs&%w2d+!oX}UZR8e<
zWDc}RKKpIOO!P7LTR~S@Ku0!3?A9LEJvO8-AsWFP+O6lbFZ3<-c^o-IFn5(}cio#t
zMYhzIlHNmZ48^=xQ;;JRkP6?+x^smLF`aE40h3TIBuU1st4Mk0qze&nQcRk@13QQP
z3HZNc?J@f^6f;IIX-rWq#v$IQj>d2b4se{&H)T+o=3hEomP*`qu<*~DoEo`W@8j&E
z+ceivm26$ETY`A?BFeBvWp6|$S{tc|s+3GuFdes%<1QZ7QyX8iuY>I~z;|L9N-MDU
ztj%7*b{n0min`AgtZhVYeF8q-y+zMIi_!RS<Vc*mD5O7i`AlWqW&&Kh0f+<4w!T8+
zg@oUw8b(0Zq?IyjO}SW#e1z;4CDijLJ!0w>=ODaE4*_@aL>;6Kmu5|?&s}zqNCUow
zP1hhsHN8iY?lMZq#6HgWgfBUx|F#A}+p(L>$i!nOJoo_dlN@pm_FRBN(E4U*r*L#X
z^jbg6ytDw~`(!^`M;!_t=;4vqi=9X6sGBMn%I!RqdZ~pHS(TcF?AniFDZpIw%Of{Z
z`B(<NsIf-f=Nf)X^0$or(=$s)NEZ|SO9Zrf3Arb{vfM;}NDT3n2ac(ocIlY^oyX9)
z$rxb=fWf<$RP`9H7C-J&G_NZWl5n<vvDXdB@dm;13q0}`q7Y9}4wp34F15Al?6k5g
znO4BGty9((t86k_(MkeUbOpNC)1HYB0}fgSWcfBSm$|TR`%T?f9@EjsL9o^a>#p78
zLIO^<wrBYmyCQuvC8)sB0EBe^u8F*ZoWE9EjR6C}(PG&h>Ui6D!GX@#W{<9H`DiCE
zo*5*4JR9z%bvw5hWhc?Fx+=MRN0}$(Y;41wR>$uqBuVBfx@fCT$B1IJ)=WZdC{Cv^
z7Bv^(AGW6sAzfVD^o5{VAUXFU5N%#hiUvCvwm!C*qT|ny2x^0MSp1B_jObUfF9ksQ
z$Xa2vSnl;9HC=L{0+zhpW-}<|D&Vn%hh$8<Ijt#^IL*+>Is>rS%D9lnk5&b&W&h;!
z-=h5$`9tpF8?f_h?Ey6H3jHdNTiKh@yH2oYi3c7V5p8=p%#n!Zq$s!6vYTDBHXy*l
zsCqpDw;PkJxY)FvZ{r#o;g;}!7W+V)fZ`By!9r*RvdvuiX^aFh5}HD~@d8o{gqN3c
zro$vO5~=C3&O$WQ-#lDNH>Bx`t8V_tS^kN41m-8Zz>;r19=2NnSm?rTTiePri5}O}
zW4rQKtMp4ZkTpv0g92oe6m^<<la9l>kHSP*nlfriQl%xKd%Zg9+cuU(%hMF>cX-ia
z>uivSoGeOhH6{NID9?~N<kkxT7LTlE__%3z+MGGB;Di%+?Nw_!-Ks7m$LQOu5-zkp
z8+zPLyGSNhrR5y<(&}?a8VxuJ>|SK;`d-66L}0_Rjiw}Dk@;oPt%ukDiTkv~XU-D`
z_ie8&d@5dc1i$B=?vflqjfPwJKq~u&Q4OEBWMNSkKExWHujL&oZ9(Si5O8`TZcNtB
z96gxc?H}}rnc5Pz9u=Y6qE`$=4d_|irb?D!ITyZ5?};enCf(PDV8BK8p?iCfG-|Nn
zI1QsJ5G18hfRXT-<@wKBH@uydMY-4nqg2!GylogBu)kKC&A+XiZ!Y=PRvos1?CVkT
z|4xRg($!+E?d&H6Z9HjFAhlw5Ptmlm<8OuKkU|~Rq+cFg;IkSIG38ylFZ}Qy;D|@n
z_%?Ii8l#6?(-`LlN?I94$t#$`9L(QLzAR<VQa=&x=WJUZ`@nF6yX9qRTuRg>8NVy5
z3<Q!JlJC$~M~W=<ikSQg-iNyB&NnB6XNnr%iCc#C1$CsC_C;uxVu0wnbqDhW$q*l}
zBY0&;KqPgV(aZ=ILdF&o^{60r_g7PS4LMiGB5wA=Wg#PNPRq74SVl0?c6{#9wqZYg
zp;RP*Em^Q2Bdo(}(V*DB9=ckXs-5z?TF~TtY>YN!i10^SBE4-f%i4|gMjksnvc3Pl
zkDE%Ibj{M)(#72N+WSQ+-zXhN_DwFN8V=oJxfYgVFKRxe(8y_eRORA8!;+k_t+6IH
zqA=e?<E5A^xZ*WHbk0AHZwJo1pA5ms7Ku4U#NXXfpfgV&)g&)qk<OyjdBVO7`((_$
znaleC<uiCYa?5Jq#bO3IA~y*n3+TwfL>I=&eFH{&Rl+QLqcEk(?8+F8EUNm90Ev%$
z5p|oFZjoh65*PQ2fB{pq+-LxJc0aBR4H-MKu*j%mj+$Z3qi1YWoTdbuaT!4h$YJga
zPC@lzQ?QPAYzEmf*Be#~Q}h!4)^Om2BH#Tb?b#$CS=hM0ub28@Kq&S4#{Gq8(<9DG
z<*QZtS5}`YqDT_a7NfRb_9TJ5u)41Mx!$fOMTp0<p0%Ta>0feQSsy&Kkb*O&QxsB(
z-ej~YcH5Npselb%Y>FJ80W3`Tdnv+qY_g7e$~U=P6b&6NL+Y}g4ao9Ly@tRrI_Y7R
zdT;c##od5#;loCp&W^lZ=8Zuh5@><sY*gw(;^JeP)zUJy7v~99-=iJq#U^XFp^*_+
z77j}y;p$QH9PIfEV>(9EV8DTTBt43KAj$bQY3X7r-4;ly`7%B0Ul9DadO@QSuqJ<b
zP9%Oy(tYY>7nGb$z>J2%+g^Emch-e34oEc%QeI+x!Um&KqG}p$=K3GLTwC76HYX`5
zA5_vLr;!e+`h5rzjINXeeAa`>bYBr6Yu#h2?;KO@3fbsU#J-&MV-NY&`G&ht_j0BV
zQ>3d(5m__R9Y`4E1wDC&K#(cM)nPNPW>JkpWt&Y*%7ad4)9~}zQnh53%DWx!jaGfs
zoRmL+_2lN4&&&zT9!feCko)t`>e6m~3+i*!R33+5N?;-_pH#VEm`|+)f)D!H#DaYP
zfd%}J+YdN{cS$47_i{fGKABYQIJA-(gAn3_JgK#&jEK4Xn#-u`T+(CcqhW8Kp1qt{
z`X}tfW^awUtw&ymWK&X<bT(79p<|5sHRKla_87?ohVF?^0q}o>$c>3n^<y$rZ2-=u
zunWW9jWv5qh`li&!lLLaPC;i=Egr%wu_N17lV2nw-yp{s9@Ee^fEaY{j3BEz)adyn
z4o$IeC7ev56##A29<Ro}Lqt{Y@K^zQmV+4z5aqYm2*Wdu6+mHN<(6%XuUnk$qHRQ$
z%9OC_aTVzw13~kwJhz~>i1>UXG}^gknPs`$X?8x7)^X7uNTaUSUTba8o_g0_Mdw&g
z_RU~NL<C}UL)Y*2&~CqPm15_)SqyZ(xxZdagw2pBdl9Q*nyO2r*a!Pv55Ugr)7iXn
zh*_U}YrSwb^k}ztk37(Gbm$E+Gs<yJf^l1zde-LK>)pu)@~}0jM^v8`{1n=DFg`J^
zt48MYi=Af(sy1LD0vx-aJl>!aqtWscQW6xM>p2?>YaN<mW^3c-By?Lo@u_zAlK1?y
z9`NUzxIOve?wd4JlJbG1Qm_9Z@>|M-@A1t)Wl2WTZ)alzZez*ap#nUY`&WpBn)Pf2
zA}D|WavW<!3$8kvrup2Ek!H)6h6;Z_HSTn4Q`>A!6Nv4t%qI@~ilo$u9!oAh-XB=E
zH0>_A`|fJ1-^Eq1X?NTquY+rmeK%vmQxxDQt~mM&Hha6<E?_F#(Abc@E=6&;B->ET
z=S^qpT+;a=;RUI0VaYeu#JPD3I9sQwSJPXQxQs#rT+Kh(H^brPC~H0zm#pU4>jTQl
ziIiskw5y!A+lJcEyOwr!9Z4RuoP2C5R6?!DD#f}AZ^wy;FuR(z`FW$co`y1Mz;u)i
zgj<yZM)U=%K$mhjvP5NIES0Ys`T`PdmP!uRqb(E|+P)nnIghqIE}s2k^LpXhD=jkB
zb3}EV>r;&2Lj@Rvqg1^(Izq=jRG|KHgV0d9nXh5d^8FQ?!@C!boCQY$QF{$jOl@F4
zj-i;FY8=nsZw=~{hVFGjB&oo9yyo|dO2hZw=;)gkYejq(d37m3ZB{|l=+Ie$Z{44L
z$Yt8@g{xPQ^{qlv5Pjixm~?&mu^Ccn?rbj+chU*VUYxDb*$5SoV9ISx!mJI|W<8GX
zG*u#eG(R6d*8^)tqILYQfW0sSh|jBL-TH*HAvCa(%TZB0l2=xPO}V()b=R{YIn80-
zrb@o*Z$R>To-#%A{#0{z{)ak8_DE$qSX9YeH{=#a^1a)KZ94wfhjuMJw2RpLzE0?H
zhYINTyCdu^pE*K-B1{iEptFmB&bEhT$RPQKUTh%wD`lM)5N~K|ccklt{BqE{sYV>v
zm(^ERzvq<a6$Jy}161_U7zsDIns+@Pf~V=ESR0|B0T;@DVBG>(sUC7a;XH%iPdvGd
zzZlt;q!cB5tyPVoz<`1->v2ek>WXhYd*Wg-B!@bqY`&l7q3gRcF2;#VV6P7i8l;hQ
zxouwh><(Xd_74&b9k3cj41mSHW|u`hrmne-zk;UEDi@r^WM$=6LgJ*jmnZUzL{$ge
zfYS2nKqtJgjNrkC^j37XjuiE3MVF4k9A=gDOIPMEwbo(r&B&$SmiWZ9n<JymltS3%
zvnAFao6Wa%RzokjOByoL<ic&Y>ewCGSP<EPsoPF^6LP@H1Q>mkioBh!01lu>1pe+W
zpzA>biRAot-`vsNj((GnoJH}{tU-0<u)UzNX8;$6CRx85_yi5S@d%GC0t}boBmbuz
zMKUl&DFT0IHBjcuYySY(G}HmW$NIoK>AR(~myH;}@?kW}%P+&RJVWu#NrqKvH?pUO
zQR(Efk)}$LP3+U*;R6cG_ORvMoD>}*U%{dHHoZcB-YT?o(QupSpaKBTZ~7ak0See_
z%#lQtvX+_WjVf>5BJdL%sR@haLnF#nhXoB$Sv}DFtd9EA`-XBbd->l!h;Dqn2>F6Y
zH${KYiPq<fc^s?tz|$!qTCUJN3zfERUI~=SYsWr-!-sct^Ib{bS`X)KAam!Kr|H4d
zSEH2(Ip)ooeLGO&|66QuSNf^EYsT&y_#;3CLR|-w_n73MAFv%dD&c$2fQ+->Mm#0?
zAj;1hW6Z*>Ymp~8KR3U}EoBH>Ra*4)FS2Y#rdIGhl;bAHsuXdD1GPW0wxlT8vcg}1
z6QKq>1hzgxAco}NE2VlLI2j*uWBqRoW%?9^^Nx9NElyIalz9iTbx2;9NiCg%-Kh!l
znocK}6HyM`BQu-446cmAO!$cp4Vcyg8yuP!1`UFo(2wZZC8D9UtG)tZNon0e&BzoW
z<!UizL63Tv7ZsW+`D>}owi~lZC>VwB9YS&_753Z46z5Tnw}Et2x=7^$FJy1)VBA!K
zZODZz__zqC;7qf;qXDzb=CAhh6I`j4ESBE|ilA8WSgB4<r&~_UzVb~M_AKkZ-0~&O
zwe^%5{~w%g03nxZLT|bK+BcPuT%bbN0_ll#1xi!*12qO-_IaD7DrTn*d+P*>vJSm9
z1Cg&r=xm6qGQCVU)3s>*)bFdnUB^TV<bKQ92z|49E-iD1053^`<*vO3sp2hqcJk{i
zMf8`zV=4%L+oIvtq$vN^xAcrENeh+CL(d^OSyBAC!K7c>$eL%{-YMKx8uoT7;xq;7
z<-SJQk0W*$SCKm9ghsqyDp(|*SvKn-*C#JlZ-}Teg{QaMWo1vn5DynMr%Kac@`s))
zkVyb)Z)d-$<LP*{ikT_F?vF99jnc@a3uk>V=$~Z`{&*O_@&2Y^J2k06H#HRdNw+ke
zUVDM#dn9h~wQg{{^u}c(tWZm6%Ph)HM*`I}-v;;`9ufSbA|c=-x`n?2g=oI>2Vyz2
z>rX{ag*8b=8O4XWBm8`I*L#=;^VL>7>z7#$HA-#&wb0`cv3u9Apkot%<&ZvW^a~2J
z+v?0ehgRY4&DD#HQnVLUUFG$A+g7y5rvrnub>yePI&``rW5Ubcl7n9U!||7D3%aj!
zOGZh0&b>IUhyP<QNPp4l+MR#?_>V*x^8>-?gSWZBQDOfBp8j}P`c=m4wf|+Me@;yk
zD?Og)wW|30viS=?!dgQFekHFIN?TmfK_yW(*8S4}>iY;Q+*j{pV_JKD=US`G*PX4b
zfh#Vv=`R=M7S;c!p>pzofQ;R14kY-UOd6Sh2wwV5)xTqpbSi!t0>i-l#G1=c4P_nL
z7NjcNTjw$6Sy+wrEcSi*e0GdZN_eqDZybEO^2Fl^ltmOLye${L4{yv9Fu7&?R!f!E
zhB4xAA)v(CJ6wyJimO`2GN>y**u*N_D7tyM(SFcNuI1vC9#|iKnqJl_fT$2(Y**TM
zRB2EMvoIzY!oSkA359KW1YQ3-w5{B57zmEHc~O{gWP9WC6J>tdQJ2{j_7;7N`D5Qb
zKv!@;nqURsiFJ&D&*(dzRFgl}j3rs$$9?P%%qZRuNJD#LvWlINrQtS|u~%9`XCfO{
zh`pZJ6x05Jvj0r+@hr7f;@2IapQl^K33%4Pk#09JaGqL{x1Nr0L=#s!?E?a((>tK7
z?*Yyb`khfq|Ia*s^H12ltHp2BE||a`B1uQvR@h}OS)Zog=9vC*23Lu{!^<CoXPgT_
z7QZpwi@9<ja4IxV6#a#eBYHiq=IYPXQZ-o`r1c2mC2xD6cM37Ai2!YKZZaU!Q@s3R
z8UL0{o~H4d_VF;q`(vkvNd|^#casH?{cISTppOY9$3&TY2Fv3LJ;C6ZH^XQbfuHfl
zSO?FnIy7`pJqW*)7k5tMC_-Rc^|8>G`9H16A9y->C18dqtet~HONI7U?LO|D0dg!?
z@0hDiDpUWb5#p3I|2=FFHxE)%qbTjRkE_Hf)rwY-e%kS^CcPvGhoz_16!y?qqcBfL
z!u;NX=xH8DAM@Bn2t<raFD@HtGCzJk7FEajcGpvStH^N-oE5hknx6rO$0>mPJN{lj
zTb09|BjMh$sQS8H*m2p0yE?#uG#TA*Sz&*>H58A_0iMLOfO*TQUDi9rHk@%k-oOv4
zJeK%_{9>_FbNsoHeu}9$5`RSerQ!Q9;kN(6<-kYgORq9+huGyPoYJ?Gqx^zrc1^XD
zgQMkQ$@xO=CEAxew9jx!lQy0u24HxA=%vK{F%D_}3gD%r@uG|c{6J{6kC&p#6CT`^
z0Uh6502uF1OGdN(9SHy6;2t@R4JBn^?w7WSyi+rU+ysXCby~vtr^C2j%KOf!m3wwN
zxXbw%NM)7WTQ1~@d*dV%6z>Lb+Sj^h))X~dL`n@*-8B|JTLt&9->7at&8(XmfxVZH
zI3tm*sqv{t8~zt>eW<v+EoW?ScGlj!emynU$aV64%7h$_MGhbW08GjJ-)Fu^BNY%i
z?Y6vPdd-KYJA=n>z`+6!jpuAxO3WE~l<t6i#=S9}&g*L?yq5*5FHk*SdiS1j9eybA
zw4bFRrevU0Ph!g1(GO>qtAPid%s*=eusRnIlXA)EXhTKWI_NOrOaQaP5w8((F&7HO
zt1P@PC8MBVFfMzoml<zaY^NjLz{@sTv5o%^>HRLTk1<pL60_~0FaN8YG640-<sX>j
ziI0-j9^QA@eLYQRy%$3n3^F1@G57N0IkLoRYEQW>7vv<q3ONbG0CtXE!TY7-=KsB(
zl97M5zjJVl1q>YJ4eTFjEOdjwdoek6VyuJQn_VmF!+p3K*Q)Z7h-1!jqYD>&r?mF&
zd0@Z@9n3RAUhQb+Q@EyK0wg^*w_fzKfpCq}@rnZEt^-K#pOB~iJ?=lr-;+2!$(ZlU
z{Qtq+G*!|jBN8sS?Eg-vv>9Mm`CCVI|3|WaA1qo6@K{;(mAC%MZvQFf_tF4j3wLy*
z!cm@651tY@F#E?WN&xuQ_Qk}fJl9`H=@jk!4}c<pSxV~jp9=WYPl4_#>*H8^On<fU
z|HQR$@>d!CG?tb>^&Ni&XGOhZx4b`8bK3Ods?A^U@&7Y%Te$^9#4TqQx@wiH3>=+e
zlMK;ARobYBXMQgEk0Zn>JR6Oh3lx&%5y^-~2-Cz1T)qUHBd!oQ@KHXU6`ev(LO&jH
ztyZ9B=QJZ}qT^AFo5L2)Xdk~F`}Pm(xrINMPm<2pP2Jq*=@Q#ycNH=#LoBVI;EeXK
zfEnV73(%L0xYz$mSl7uDhD2qZIv#%A#hp9J5Wwre^sf}Bt>0e&6qjuNY9J(BXppk_
z2OK&*oRt><IVJy>im(1J%F;Ul1b|5>OajASk^BQ&!ZM}36^36t;0%oG)mx%@G(9Ft
zc~@V-(pE`!lmpcHkl}&nN{ze9@SvJiA<A<FEB*z2DdiN#c^2^Yutw&sJ{{+N$|@L9
z3J<o$!~%8nEWo?UfWpm9Y7h#wX(gP=!sjMY$Yr7%zZoWXl_4|N-XFWA<jqLrZiji`
zVLA*VK*b5F;WfT4hmo?~dVk~QnjX1D1?*)ASutcf?w#w#oJr?YTcd4q!xc9ddk$CH
zp~e2?w&zV2sh1Yq8L1XrxyW`z$>8bw_D1d_1La&cV<-oY$Ef0&PqtDQYQ3vS!D}{i
z&j%td$in>WgnjmxP25nkJjJ$dH!kblwTI+l4U!S!^o&!Oxy_GNOuHMFD4{t&?bRZO
z<~6>?ZCbWq7QP6Qf$@dFo}olE;zoCM=?I-$i+80T0*!WsS0y)9p;aOd)E3-s5wrC`
z^ROaGlMrD^vK<sGc73qQceQE2ajngLf_8n<3v?r>A#}&3LXj-@rjI8S)mU<!GlEHS
zb+@-q6m9II;M2+&hLFL9>v=_H05cTJc8Q!nk@|&;58Lsm8TRSUm_}~Yak9GF5K}X4
ziOnpDERqVxxv52UO!UYgS35o|wtNfIZ}gYi?n`)IRtkmJ^~}#ua!xTUxHD+=39&S0
zB}FMHhlTJ;O>B-jpmIv*Sy?n(A_8KVoiSzAfw9gom>9jD{#a92#B=oC=TXbO>U!Uu
z!I?I?hRgd1cn@5iD<O)bN9<eoOr8j=+qVCT_LaoZh74#)g0F=QD$Zdwr?8QW4tgnC
zdw+4CcEi}9jWwlaDRw4>#h&I>@9R`;dDf&sNFFrD+sCi23YCuWJD8%t((LcAe-<pQ
zfm)3{+G0FX(~;cyHX%4*YN}YEU`bCMkzUP_^5H;WCg(u0$P*d#3tS~ML>VUbP3es3
zyjZ)`pql5`Wu}L!>?Q}eXl4xqbehn)P%-8wTFHS=M47oC*RxqJ+rUjPxmpow<{1z}
z<FNdwuqmF&!$?tFM1VE7jxCEfhyz{>n!)Gw%&hOFwVOI<$xI3^k2j#|(K{MTxXQOX
zlKn|-oR1nRN-uy)*)#%|=6O=SqU8NBKYQJ;I%oApDkli$9FH-;b}$?1qh@mSl~{^J
zo%>8S!Z>^A^^%;r)2&+)<`(LCZYDO|_Hpd?HjC=mzlH+74NDw{-M|FqT%%*MdbJQY
z5#KG$>w^gKzp<)ym=ecOaXPEKjK>u{%VBW#C+NB;eAS`-5mS+fNOqvx=dih^ow+C1
zxtT1~WLMI)x)b^YOZhd{7xRhzSyO6v!<*!l*_m#zNY?qyr#EXF#l;3YS4XCzo^Ezf
zZZK#$$sbA|+XO@Ru3#Dc%(vp=CFA8|)Xx<nkRM+}sduoYY4wQM_Q7I;ovL9vF4i9H
zGaFaF{|eBY`@*g>oSiaNI62BMc+rMH5FCx6s+IolzEIINK(=a{igCnEW@NIQE9+jc
z*@C3h&2&#E{0`ce+_V>0*C*-LN4;!E>MU&&h6KgBmUXUZQ;YC*SurJqMQJgLumL-Z
z@Hv0J41a6OzKXX^p1g-FrA5=OMjj61WKBVnWX+lqZh`n9*}!bTda!jv^L|atF0|G1
zpKV_pJ~CWd9*-9?NEr6Cdhc*3cI@tE58~=0pNk3kU$>d66<k}Hx)LoBhbX<_WGX(~
zwSRbKEcadl-`o>!A=`ts-wr(BDNNc0%T}p|j>ud@F|oT%L(R0kJ8a0Kj-_i|=kl>(
zq<CaVx!`gue`zNLpE{Vy{%HGZP$w!nghRCTW9uYY5GoC0h+XViSL(AhF-V-hk4A77
z_p2EizFR9s@905M4Vz05<Bjf&<D>ouBOkcKIQLT2ruZkId&td^o^i6)s*P!Y*mJm`
zF@AI0UVp>AL;RF@e(+B~SP4{<yuGS|OQnw5X4*lB3rp^c>eJ`B4fd-XR^F^|8i+US
zlq9G+n15y^QjFxxuA3Q&Y;6w2KhtV(+jssqbicB<f;Mml(==f*7d%@PYYEZ0AY^}J
zdeDc21)DC3d93BvM+TzI@S0W9ky-}b^9T;D3yIUNI>MIibdBLEehbFw+u<)sQQfaD
zS#&5EL4+o1wh9t#wm<Z0uZehAjbrv^X5domDw`gM<Qt^HJ>pwmAr+3TvkFa<`V9<j
z&G*8#ZW7m!M#|lX6(%%F=$8;qM<8zKSGBEr`7EZ+lyG4wdJLU(cfHWRv9kAB&ZjIQ
z*qBI!9gG(no2DK)Li;nv1<{u89VabkCs!m!xFncX-6k1Q8z>BYcbQ0aDp^^IQtG~%
zUt`G=uSE1=l0^6S_cWOEvIMO!oC>sgUgMv`AR|J2SQeB|d2yTohV*<^R#>U{0VecY
zj3p^`6WI_J41AoBa~M3{z}BSAR8#k>2#bJnp^J|#T=uR!4?DNDYiylU>l^Lh52?W#
z^vKrhOvFOP*tOrj#cV%de>A@X_7f`k2+0wjfTn8wYuP6C5xJ4zToz_fyNsaUvwts+
zEjOlmltMdBPK!(&{cPL8;8XT>*cKDlA7=riK-c_9UWB*xJaX<S7Pi%E|CZDy`G}mG
zENfM181t>fyR!*B?(DQKC;HB&H3X8T+K2I0IZ{8s)Tod?RA#rYFhQUlCH+B+E7Ovq
zJ@u8nkI1P{MP7;zj${3;k{Z^Z*apXd+BD?CCrZxk8#So?AOb!@+HALJBgAq+JAHn4
zjs#gyF)Hnbzy@~BDwV26&ywUtcIrHzO34d}s<{XtgrMV`9hFWplMnqI=Tb7a2zo^l
ztWK>}a=u)WRe7Rpi<8o}JfI@9b8i*HzPk%&a#%|E03#!=iJ-X%-t7t1wbV-4j_5Wj
zO}ZInTeq-GSC%<(HvP-e=g~!t3Z=rVi4TWRV1kcm6(c6W4Nnhg;mN}5mgy2#ErM}f
z7mx07W;G<XT5~F8IH8F=&+*qMk(1X-C0Q6BD}+i^pmS{An_`!JS2;}i#;Gs7sH7Te
zidyC*Xzo|m>&_Y~vm2H$f$uYE^Oq|6um=Kxm|EXDfeHpZsHs;L?(QZ<c^v8p(o-qx
zKOzS|{)%+P(l=0OvmI^)aR<lPY&ZARHYQ!U<RkAq(cmv*z4O&lArt=9?XdNcGY5r+
z-PGpuq-Yah2wUum=(32HOPlS?zLHTS=axnq^eE*otp5G0SbxXFo7#(_|D8~2;|{_s
zO=&^6pTp<XsnPQ6H{C#d#%byqu-+iyIr9sIWgzm{nw&9Rb(>-kTr=dmD)^<xmJWJF
zdqx;6U2e{&eB~W9WA$CA<{e~cy$QMmb*_T$i6y$kyQ!VnP&8w7BEtO4K3#peAHw(A
z;Om%@p-~gRiuSm;yI{e~78TvZ&SGS=7dPOmqzIc2ZqhdIQqygadhdbI=~>A~J{|wI
zQ|qVD1&RXq8oYn1)|7bT`=zDUEicm=d?|gwAo#B=pDt*|bqhn@v=61kYI9sM$J0kh
zybcNM9Tem{4na{UHw2S@Z{?deO7e}u?9n!AeK#l)hAYJ>2r1?0C`pp8s-EXE(Q13c
zQ@YbDJZ+9n5AG7ac5_Ko{4&w=Bn#ns;Ixj~iULJmNu|oTFI754^}fY(PfjI83FnV%
z){UUsvj1K0F6QYP`)}~c(FwTU0Kb3htv+hF*KMk(PgBqp*-k!MW`=FGvHFajl+c%v
ze_;(?rs=H${OJf3n+R4S<n)|>9^AmiEncJ}e_&Ojc0L#;%5^i<&qhZW{+S2r?3pkL
zd2d>oG}GhhHJ%cBORPpnyZM9?{B9(-0gd@q`{L3m>v5tC^6@MsjnY$jBZzxfNlkrr
zxoHvn`a@N@XIXwbe1*v_RXJHPB<RJZC#i#2-AHT~Ts(`hiPXDm=&r1=2?l>=_kQV$
z5sUfAnG1c_gzlL8+Y&0sT!8bOa-{Odj`ULX=>7k`9OISqyO$9{EiLQIqmOtat5ULJ
zLm!9)G?=Q~uh5B8<m-3jJl9R=QlShB5fuC~C@N-9wSE1%ykfh)FRT5&XOM(1YC9?P
z)XvBGZ@~2JcgRm~CgL#XEu|IMx|HTkB~KYv2;H@hPiY2U9(CiXDn#W!8_y1}Sm~%q
zb&q$GJsPD(KA3R7DVo##TAN{yK!7E84L(X+wdQ@(W(=*Wa~yIsm((^*M94IPBMy6C
zsgEX_0Wh0-h33bZ6A+!p1LMcoPHKstpbCgHR0Hbk#W*8iD>??`bR4}?{ggFh7MlFy
z1E@73E|m@Mve^T5Z$`ptueCYKtq>}Yc9RVvg`Co&1UGvF&4rLu_vdYH6O4jzPhr3j
zzkXM13BF&;G7ypf;<;+xi+S1wmUEgemrpNni5l33zjOLJ&K1{-BLnC)BDU-qOZS@`
zwnImW@n6bH-L;oq5?;wJbDgf~U-$&OKM*PUUgD9Eh5JoHS(r$*DYCYvI<T}9J@?K8
zmK16>Dy==<Wpg7ZSA#XrW*U0!@=M=LRV$GZKP^<_v++us%XbxYDv#h>P_Apm#^ttH
z<g4{#Pd3yKqQm7Mu*(3HY%=ZB+fS6JKWb15yYCHAa%I9d^%#(Mg(174_aQV?68?J;
z)I(!JyZi7DTQbgeB_Ha8PAZmhDDgKIm7B?qrdFITfoZdtBv-92(YJ5l5$I%sX1C_Y
zFQyvSd&m0yc+#}63FNCK>$#+@L1PI6jt_<XFa-%m{dD~aFlJpFnjspQR5Rf=vUJZU
ziZrwdicwJy8{RL}o5&(^l7jTa;Y*=zqrB?RrcB*OXp1dkS;&>Z$rKiq+y#dQ*4gpV
z)O9W&4+Yo#czpc2H%MQMp3bxXIz&jgjki_pbjyxCa3kW|)45uP!r4^@0R&eSl_VAH
z`W<uT5#&kp5OdkXY1o&N(@D`jolaTzpaaNvOTRqs2awmJbF>m#1+FGLV)NqNZ5b35
z3_JvN5>)t3^&6d^_FLm70A3;VT<;eO1X4RI>1ZYIp&-~KcKnQK_!zqi6?>|4(>lQu
zWg0ooq@o00t^CzbumfT#QoDG2@%oK+Ai`mPJ>SJrN-`{gsh;mHumg$<K#EGX@jm-k
zEn1p{Ky^=pK-@T?_XscBC~547Pe29-3>kO75)>{T<>}ls1UVXqopG#;o!r7oincag
zi8wim;*(<^Is-{{*D#n%d}%}>s+XhPyn~m^<)sNEP_?tYXH=$D$@Oc|vXl^t1*41o
z1zeReo~fNpKrup_zuvM_fD-ZmpXm8es5%@z`G~xbN*LU}dX70IW~{R68(ZEtmgp4W
zjYgigwto5GK^RbrnaS!}Vb9-^8MnLA%Vxiq0%YaC*sh(B#PJ$g-vFt7h{teO;>i`q
z5bi#ooj0#_9lPl_+d8NqG-U12bkfoW$Z<N1=agz0=mXld8A`bk62j)p5fMx9z0yD#
zty^ZkI>pwwP)}yrG_ycrbV01PTBDg;bS>57&M-OU*0!|#M~}fr8)RIGfv?c@-i<)w
zpgskh9<2iR%$oH9+^J-jm60kr?3`AQ1b0gLx4lsZtY2l|D=|SMhJFr?`RvMUiMnr_
z?$1kxbmwP6;l%OK>s>ZX37^Y+v90f{VKQgN_IKfQ@6z4A$66;8_B*wT{Ku6ectj#j
zBhlycU%ZcZD<6M^<B)7}Uc6?GZ3eJ<+PlIwL+mq)k?5UwJ_`WdmDMiKW7b|p{W^A=
z6AW0GnWQ#s&KngX7X+L{s0gLe0uf17Xrj~d<4=Js^{bwbEhVIvSe)2QGQ->el2U^{
zq2jbCt*5l5Wsb0Cl)39RspN)^&9zNM`}Zl^)UE>gv*@mNQ;Y18-Tg+Td>K7BbLlRn
zcOzohw`^9m#z@M2E&X}+KxCn?VD8+j<4VN&wk}`876=eS!9sQ1m7mJ%eONd$hhEm|
zk}QPwY>3vqIo&*xF^a!ETx7eio6LmcaCxMGK9c?`M5Qzr>B|`kbHiw}15U(a#_~iy
zJFlgPrTiz-wTlvNUoWZJT6v4*0h}1Q;JWA4wUBh;IeE67nmBExqW7M2@r^Ygsp1t6
z?7(m^V0>=Ui|%CoV%yJgb&<<kLD+H`_Pm0lJ<c~WI3nSEwAOmU_y-&5@&2CU+?=Th
zJKVqi-bb?D9yCI~SnDwMk<vJ>M_BM^cFUh($fow1%_Pd>A!j{cVW_E8#5m{$SHBea
zWxq@qfK*87=F%YLGoPh{tEcH1Cmp?o#Us=WEnIIB#TOsVz2huVi?-R{TVJWx=ZAlO
z<x@V7DV`>)w#Rr0(ra7kYLhDj3{ck>Ky%>T-i)N__1NnEoKw4b2|ReQ&@1MRij((n
z()@rZ>H6=TFLEPnCpk>>;bTb}*9>&XkeoubYM!)zP>_>VCWY~MC*s{zR?~{hdd}mM
zKz`E{Ipi;i(DB?Hk3^d*j(NNhcX6ndDES-}C8nI>Z(^`Dcc>Yyu7)j@bn(O%+$0c-
zY<eJn_3$_^l@KY6$jW*(w^&ScT=<*0-|5eiQvc0$=EbO`#)0h!T6(0xf9|(Yww($N
z`AVRKll~%vS!IDyIpn@<kBAt(l)om_+|a2*-J6MI+ViN?3AM34swW_q{l&y^G4n&=
zWK0AKw|ID-|1Dr*Eb2HH?w!oNA6$K<^@#RP^|czHyX5@U;FFw^R>GrmD<w>aVi9+d
z^mcNE&$5`qSJMtquc~Kd-Rfe(wps|LD2D86MZEKJ(6a6mR3%<Z&rIw$B9g|2Yx{=Y
zk9E2;qE<1QYC+B=fD3i#Y%LAsme@pHV?jLbbaXa~Az_%<5Hi?F+Z>4yBW-;$`QCmN
z(WpsQ(xX(s9=^3#=}%7)R~7HZDXHGyxfa1f>3C9JK%gSc2W$rIeCrB7X*)0GfdE}`
znp%NtQxiSc{xVhQrBIlwQSErrw<PHsyCVh!l->=wc|L=$(|SZSnA(r<-MiZsv92^j
z44z_$NZdJcI4-HoTDJurY#h|hpv<iCBhw}KiXC7>;DV9{wc?$IDbNg<%wPC8gb)UA
ztu}5avIW=0N_L>6%FtB~13|DB<|I^SlM=-DSk>CxrpS9h$kzAB&X=58a_s4k<&SiB
zc1eKhG?d=WRcg?GsgFU5aPfI{j1ck5TqX%)soLmEE^2RjLPss1m%p6>Qr$Up0mfTe
z=r~r`2=Tw5vfdyhrqO}{l=V5*XGvxTztfMsq|wO$yO*VDIyOrrlzqF_?9oV-ls{#>
z8eRIy00`I?x44vtfw~9`-0a3J-Q>xA1zHkQ2lT8y<ma@?@N-bTWU4jUTbrhPlh|}T
zPH!L{ugO;E!J9d(lKRW(#SP$($*$2l=@QTnbJGDcFLvvtjNi=1cSc7T0?T^8FclMF
zyt6Ae9p-K0OE{<+E1#vZqNO}uJ`-@Txl8C$*|V-fpV%iUr3l1EV?=NhOIX~y>m|RH
z*<yp<dBVncYtv#5UPWmxG;4|gVRCTK5Uq(3tj8m55B9~EZZ?#CP<xlAJS6OsG^n_|
zaiYh3OD`@6u=_HyofX1}RJ5FU^1k?G!B)LUDfVoXK)LT7ELy&uV|>4uOy8`m9Vl`}
zdbA|EaI2N|r={6CPJA%2L@Z#&Cac{qROIwr+(K>DxTm=d;;S!)g4j>u(Ms|c&OH8U
z30|BxTX{?aAbQ8WPS+doMRD9&6298q&u7N%iawyNuH+1g0OoMqD#$9S!P-1}uXba#
zG(1J4eqCD+z5q$B7TQ)zd5MGctE$uiD`42ryvsQjoI=_PS1)yFd?bdxMP=f>*K%uL
zq}f@V0P3VY_}!ZX<X;7PBseC_hflt$sxE)r$9R*flyUjKzJz&}6Y_LUb}G8bDFIAR
zzV`-5<OHu2O#}MAcag~*S-t1~`J$vjXUA4<7u-2%q5c)%NcpfZ@AycY!RL2pD4P;r
z0KI?zJ}%|Yr6lO#@r^AA?LKwiv$0+}OgY`ZVC(|a03Iyn3Y}QwMJqj^5GD_%F?=e2
zUQ5M#jP8l)NzE~)$JZmWohfERtIJqA0QGk2b*~jV5{caC)2s<I$T0r~+QT)@NxJ{$
zYKd!vmP)>+yciBQGRX1D#i#^k`Fl?HYV$k**4=u(pXcPVGHCIE=&Tafxx!wlZ+&V%
z1t-Bf{;le*UabbRWE^Y(otHp6?2eW_v_|zto#{Kya<2|!(Ni79zAb4EO*JaPB~_y&
zO&+F;Ds*nQ*iNVAH1z-!`sl6%oScQfQd;kRw!nq7j7v`k)sv8{N%S~Uzzfh90u7!m
z=XsugL-hqUC?%A7J?SJ8KAxqpN;KXP0O2t2_l90mh=*&X9l%}I>pxbX0~RPYpStZ#
zQt<sMoq@q?K&{xXT{y`}<C{RoOxW``ye9&!-=w37fgYs-mA)1BL0kG$dD@@Q`wy%0
z?gLx;T=HWlZv_zEQnfvfa}_t~z{rzR|D|fO9(R*43pL_2hDq1sZ+~yYt)C26`KuqE
z<Y)pBo~`=Kub}HcUT#%4UX^Vu!0aFx4(MU+!idY%2^4<yh#Y}PDiwpot9ml-2%}1=
z>i0*X@4P~r-XB?9v?3htMxh-*e2OjfCm3X$@zG2rg~D%h&V8RCp1YP^9J>a+GV+lz
zfc4@MXV8Y=Cw@g77${l;nSeTSzt-ai5w38(6R1Bhy1=w1rII;i$tM|*i9a>C9kD4^
zKfEX8quI$e7Ee8NG}T*Qakckf0S6#jC?9t^LXuFOq}Pq;kAVVMDLodKwLyq@sSg!$
zmX>;d$!HotKR25qPT1#<5Al)F>O=%l>JLO;_?>Ol`8pD(-<bH^KB(uNSY6IMHhgO-
z=sJ&lz(Fdlc501V%-f|u0cL!c`0~q{Z9?B)SOJ*?I9_}KrCX|TPxTvIaTKmpKQR%Y
z66Sy^cktN!c-dGHpE);-J@-?q^4D-KCc;viLljT-JI}Rq0+#|Hmu>`U^2(VH>X2B{
z|3T_6coa%k>y5eO%->83?6621i{q;~$s4rjIZb;^on6FvwTj^E*uxWm1p04wp#~s&
zlw<fq0Q!It;S6;TZ&7W%s>&Te)*4V+IQ+HsPdC%MhDV_Fiw=5g{1#|(%~=4Nf9;)Z
zTP9m8@SRN%yae>p|4MXT1HVAYgr;D*^ig=xaQ0#S&TirNu1_48hLr-=VQbIb?N3sU
z#l&+9kKwuRoRcBjpVI<vE?^9wl)2!`)Exyp*Gt0KZGX9Tb~GV}el>$<wPm?-Z_tUS
zk72Ip5O6@u^0O|@|L7~uKi7d;*=;8=iIZ06q#--O$rHDL44*Rcmmihwgtp*t;>0)~
z^NtNrsEXxDw%~aBza5T{BkjbrVe!%?b;@gXf2jdDGy~7j=mSb=wrc$kJoE{Izpv+G
z3<bcndAkq9II#O?lZO=pfPn1rUpYa*3Fe%h^N%aQMhZ}ycB-DhKV5V92Sc<Tw>pk^
zUBOK&PIKrluJFtscdviYUw`_4LW>zE5PjK5gg^Liu=s;c+-DM;04tI+!R7wxpE3C)
zF9IOPZI)GO5B^VGmWm?B7JNOf*7zh=dF1d#7XuqBo;TILf4xgPMO*O?@SsR|JlL#Y
z?L(Zw67h*L=VFYA%KdGXtfZF11q_!SBEJY%Sp1ye0QMWbg7~GQEV!76zL(*;Wo<In
zi4m>f^7rf3=8uvTAS=1{bL1I^0=<oiZ-l0B`lSOxZuPo->ud}g_LsP<rw49`dA*zO
z=|J)i5TUM<VNPUJtAN|$&H7H-1o_5<B{xffK`xz8@$t$(pg0In18bT}iqqlfNy+~u
zrL4Jt2DW<|fhhm-_TL(v-zAn|0zmP~1Ngn4FZ1;90qyL77C235YybETSU?D;sugjj
ztzPOP6INtG@-1f3Dp?Zgd|7YKz%s%wCFe@!W|u&vo8VwktIh4@L)&GV@|g=bMUTH)
zHn0b{Rm_Bw`i7-TcOIF^wB<wT$KoV}D!O#ZiONYyp2rRKR(q>2jU$bV$O6-3O9)XL
zM~dfLrpT2l_IekCt0=iii-}b(6ca0-=OT_EeqL3gx;9E3x+PvuXHPashH+nN*pRyG
zI2<jI`gX3OI=C^cg<yOgicY`lxLDKcw##{UcxRegFFuVH)<=tUv%OHC?0;o{daJz`
zJpgM3FNut>*}rvdU(Q62D9DUIqL!ND^)&%{1!uF}+di~H1gFSrm=5%o8W0L-l&2ZL
zdJ&JU3BRYOm6(qvRzlh0d5Py1qtuR+++~#4UB|YX-tzUF-rDuxOz&?07Vm}}B#TQA
zVXEbkm*T&yNs3>p6k825P7*<s@^@_;*j%KJif0fXHWae2A*IBWJ;B&=T_U&XJ?>2V
z@g{U_u>y>Ufep$oX-54#PPcPzPr@0~Afb;+pI#?`GH#%uWPYIR0N)>D99#?bZ-dHu
z{5B<_*PP;soP+|D;o44{x!U4SH5RE8>&?zJK>b{!DRXfjdNl31?%g!CDs{1D<M6JH
zQ5QU#gvx@6F$Z+e*X!80S{{r<sN>0*>~Hs80;70KFSLxfO>=1n)eHFv2l|dXp|XrS
z-z)|$b7RMUY4c`8T3CTR<r4h1*v>WUo8Em;0GBnm=#`E@nY2lz2!Hjk&Nat4nvCK5
zUy4@0#s2)950&0Hih^0s6i>xl@!*ExX0FMGl4FC^(ZEHo{R>v(m&Lf?nL9<(7eI^t
zS6RclN<_0`^3)QXJ(gYwq&Vm<g(5Og>#*k%)5vbsi5-pm(4K@Ye%nDx$}P34qgi?!
zs13A};;xA4h;L2jB@Mj=dfFW5cDlUGzW=_YpUjQ*amJkbqm8?5TcvakMHK_LZ4Ipw
z-P{zrM8sTD5d<dKVt=}Q1JwFLol&;j=>p+tV-TjAVIGKf!EprFsjQ?9yG<67?emgS
zqJ$h)pPDs=DM(T0)bDoGH6`j?a{=CI<w0q06a%-Hd!hPP=~Oj6zU~QoaZJzSJUMrg
z=wDC*g?a`@lZ5>;XvvFs0%VnxUW2zoEj%Heq<qQ?Joqi<8A+!g5d&Xi1$7ZApZ`pa
zYC^;Nwa*9kRgB0wp4Iq`3j>BnkUP=p+ZIW{$<e~g;ai6XCN~sc63UON<yF80^GBHE
z(zow8A#=0hb?s~GpE${TA}UfZIf}xN;XHF?Sfb2IPVW)P(`E<vigDxd%UYY&z4$+J
z$k>$7MG`S-(hBnvy*&0{Kgw@05y~h!+d=;<wx@iWGN*o%Oh#UqR!**+hi`hj>6Mm1
zex+jV*Xx<DqCcxn`D0VlZ%xCe?WDFAo5F_uSbAK&S|ECOww*nMeEwUgYZc)|{#=VR
z%E^su*BiTpe6CmA%A34pe>8O17U;s1T&d_kFNG+r<C{3FEcI|)5}x-bsZKuiq9QIW
zK+v2+Z3RZsCS5&oOYurT96noM3AOJHgqK%Z9gE8}dgGce)tvnG=cB<bvHX75TvGNs
zdAH2IZfGaC^%(_DvH8<`rN4)m77b-v29*$FvVKj&lTbP0ahXQMjvy9#s~7WoYYs#*
zZAU+3&V8xYWG=5zOlG2x#av%I_#$%6Wut?^Uii~&13VLjDLm(5o#^kAfZ6tcQ3$Re
zeUY0w)r|=Bt@itM*Y3T<Wp@eMD3JVvW#ReaO!J|#U*P(Sr58XjZ~GB%G{L#%HZ#ky
zm)M)`gE3QF;Kl}hOaZom(p@q!?lPe@WmMdAwxAwoKe37)i89j~2Fs(3cL)L|+ON~-
zULhGr*bt@)?wB3BPf1O>RE-7HFRgM~l4f;$-(hLI-f_g2lvH)@0Su`x13P0xR<ANi
zK?K^787D8iJ3d1l7`IH!@!+AH7Rnwq9{T?80<s_CL*rkM!SvZ3Ht}N=z>|5F$OOr0
zdiw!bwxtMxS>kS-0#8%iHA0lPRTYzXGolPf{M*=?x`g~JKN4MMH_6`Ftqz_XJsTUL
zYl37+FBFyq8VgZ&b-%JY6p?0*cyH4CCa<5L#HQDyev8iLpa=#{@(DH0Ut-Qx;Q!50
z>HM$8hD`;W7LMJu>jmjGHHA+j&Rc+n|5dwgMEym;l$5ii@xI2rvl?gby}EkmX;yNR
zA_9??xSW|&lap(??lQfcxOn8p4O*1QEl`#KNeoD*io|17s<wu)v9>#1rL6@_1d)~5
z;SCAmkb@WQts_&nZ%r4dO;@n$L$`}ZH=nRSqbqNiWQ^kSS^}4kO%AtQNZgxq10lZ*
zlyx-Pw$86qX4=dhGSSnn_O?Q*#!|Ig&c4`qOy|EBVv}s?E3i6^>NB&oHtV}Sonm{`
zoonW#l48jjyV8SzQB5t{C-*cgSLa%;HX!;x54Tkgt|QIjK<-+KLkAkd>94pXj}9E+
z)=|BUD0;fM-0*pDH7Lq9^kFumWD6n3GodtUL|1MbQb;hTT*Yi!tF&^@{V`oX3cZS@
zo_t~k5(4eEC*+m;7BxE?PG}=&J9EPuQpviJ`+kaoE@+XtZH}LZIst}MG*~t(??d^p
z)DxJosFXHR%v+jX-OS-F!S>OWkx2Q?9kjE5Fq9EO|EiBbYk#|aJ(-x)&mwM*n>OC)
z=!KA_uNb57t$2>rhQkS)CKY8GArY2ob|np6-!Yl;Psz7F=LS~;-#*wdyC24ov`xX%
zzh@H~d$&xfa4t#EylD=*Ph3v2SZ6zN%lheEY+58uyh1FAlxN#!VDQc3k@6eX<7~RI
z^wG9R^zim_Q9o2ze|v;1ywkPz_EFkp<s>aCT?Di=Yz<iDT+MU!n)Od~N!rc%>%B*Q
z5##Qh+C#pJExaQ0>b#-ethxL_8q*%yRr{POA(RGo;H*|d>LQP|kt&jt@%>*VwjT{H
z^4u}MRrj*wM9oOz>btKQxkBMiOC{j)aB9CYb|XcbhP9WYy~R+5_>!A?aa_Wi)^kp1
z6r)5*8b@-e1n2k>s#I`3`4VsDV|3*7ml#G#SZ15ReCMU)QlwwivXC`PczfrY;QEsM
zfv-x?E<wyJTIn$7o-dCAg~`e@x>f<MI#_x6N)H)L{Hlfdr(gUhU22q|$b-P>$X54~
z_y(2Omhs39!Iw{xOONaX585`W=^w#M3d@zKQ^&sKmf65^QhwUR4xAwykFTh@Jns%0
z`O#4H%HR=`B9n9a?2bm|Y?TdP;LF`Qw9zUZKGq`h<Wh`D6)&sgm-U8)G_!rK9;BQ9
zh{RrB@oWc+5`$AnCS6arpwV8vfyHCzJMGQ!Im)j`4hsFb72>}Y=wch%_~(U+d{zSs
z>n@88Ya6B8A}1F^my7I&2?XbBVs)@{Ld{QvfTQnI5Dhj1^G4n0urA6)yZx=-GWvcY
zZFwA#qXz1YN#Y72S!{S)oOxl&#ZMjD@ibdWgz)F$mF6biMQnc!KdBF1G8tuXM&0QM
zf(SYrUn_M*Q4wE*5>4|te5UY0Ey>sBGIR>H+3oDHnEP{M=Ss2Gf*Q9)j16+{ngiA&
zB33u;_O0=1#Ot{g3e6|(1|p9J-ut*N%9K4ZF@NP{%R^VUMLma9(xR=IS=8On(2iIg
zs&2Y7y5N|&t{G$}&aJ?ks-|1!JXAe^PI6Ceq0k7j!}{)cDBRwwtmd~wQ3)<SKAA^I
z$Dd^sc!&HfujT&$m0QS^VjkPKzDK|y`EKPpd|e;5GK}#tK5QF<m*Q^g1Dlu|L<t%F
zwHv9ge@sj>EYB8<uTSw0jTO+DR=)a_a}*eYR8>?CNyymUKriLQWr~$pbtuK%NOn;L
zmC3|$=`ku~O-RhFTQ@4%4yLo(z=U?;+rRm^deo}HVN_JC$^5Py%*b`u>AS3*1!9a}
zVv;pO<c+jI4fbI>Q{)6~Suv=`Xm5rzO?OHDLX9hAHC+Yd@uWFb>s2@D42;GABPvvU
zD`!>@mE?5kq@l~u$CF7O0`)<ymZ#gGA>4^b5l(^@Xd~2w2vu)N<m-B>8h3HY<QEDJ
zuY+zrmmdML8!BzSd`n5j*8R5ZKm-Ld(o{3bG=I|7mRG>KzF~hML8$zimYJ@2iM?yP
zvTkw9+1N4AaC2aC2wU$rGA-RYvj)+s`rR!ihViuZ2Lf8Ei4xgKdmGrmBKt_$$G)|a
zRvyn|R`xl|I_PyIvfL1^aK5&0EI4Uj7i+-IbonY*<P{xP`6I{+Ay+qpJ+Pk^baS^S
zt*P%vmHgdUS-kv>kfVG)m5204Qo2k6Rs?GWL7@8X!3RlhDOVrqzEe_HfUkp-oub)R
zbjP@h*IskSwm?GVU#iva6i^k<C37fru;qnq2-NFM+vwcM_Lr;L#BjG$NgRcb7SX_V
zZ|JXU8u_Ysx`m%)zi}1u1zSDrr;az-rHNA<9B>dwqEor&Ttr&qr5ZScHusO~`X0yz
zjIyhyrZ(N?IB_RI9gjFp5Txw8Q<F0h$NXd!+h5_G{jE+Zwd0aGr{Nd9(g-ypf7#L3
z^3DT{+RJ^dY@9Ev=+c**ipw4^+KhJkM|1i1=c}isL}`^Q$Ew*oSEd+#V4OR#;}vko
z2J{@{53lCBtalWr^^54so~j0QDFgecT|J1FydQOpC->}=LVK&4w2rD;f%z)tN6Y#X
zb>7pB3kGzF9cA@NUV}rEV=>8gZ>o^3IUumRQ9KD|nN8_1pS5bGIYg($B6-oLv^azv
zaOxJ7<J_G=b`3WGGf-!<iF7V+U?A<N@}1FDuYZijk#K-Dk&Dy!J(?XQ2;GU$8Q^6`
z@Ug9)5rXyp__JP4yh^c>Cs&?SKECu&J(rz%g>Kzu1q5n%!!BfPI#X6u^SCX?ZqK!w
zvx49C3E!jIxsIGPnQFtRV$`~$(xIXJP2ZJeT`VIR_==OSm9t`lZX$`{j{mV|3wh66
zSc-NA%<FnL$|6=?T?*I48e1F}?!Ut~Tg`Kziyq>5xiFWXx$acjy$}r`cV~Iht@uYk
zjmYCw)|T|F?CEt0<VA#M)J#s9joaF4Aj?=(#92BE(7Nyv@UJC3fLSHybTAOE^R|U^
zDFx@>nyzLS(tT3{+L7ENaktd99e7+@HECEoN2cV9a`SdjFNwc22>*ZVy?0boX%{y-
zqK;jRsGu|j=}i>r0UQEImo6<JMWjow0nt$c2uSas^p5ln7K+qBkP=!X)KCQifk2Y)
z;LJO(g74?7d)K;u-G5w><UHp&yFI)7_CA20HoBv$EL~Wz?NqV((!xyImt?M?&#$IC
zD$dXDi@30p_q`R|OAqMt_5|$BQ7M^T31a>}5c`%p@Kk$uBns+1`0@w*Ts`ZJvTJ!o
zHP`aE(H1sSQ5m&nmd_)AlQLvl&lb0|`qifQnbGaSjxSx*=GLU0q8Comz756(`nh|y
zb*mK#n7qNyf@&u352@N&<rWY$H0%`5W8)X36G{}&ABsO}#w^8YnP|lk=4o3TBWyL@
zgxU4o$&;A-3I6nb4*qP1l_zK-lG4UDNf$#yPu!i<5tEMy*G}=dUxyzocZqEnlN;}F
zNOFpZZ<*HmUds1s8<#;qQJaqc?fmC0>K&o(Pls;hWp%zxOFQ`Js$Y^N4M)68g-m`>
z<hg5pZ^VkO2n`FwJ?MQv7UdnxMK+?T;^U8zw}{1~UxH`P6d`X$!-D|}^)QHmFzlVX
zvH8Iql`2P4)JW;lfn{-2$^;O4zCyw={L$hwQzp6QuvO4X=OsoT?~18UZssH6OE?=;
zP<LAec(axVkJU6qS>2WvGVIx$ip)o*@Cbk5==zw)`>4D8lafVq6n3%C<aqp#&nbmC
zVKZJvI$S*8Mu?-S>eqPqNAJEeE5Q4D4&$c^_6_#3%HJi5{F13?b|>-iEp=ZH@pW;I
zETba%SUH2-Ixm4A?E;uf#0*!1gX*AiUdelFUvE{Iv$gw&w$y3m?UE+Ixx1FfJC>7U
zxjt=V+iq)Vvd-Ah?+h=yqJSQk69inr{-os?o+|Wue7vBcqn<_~SAdmKpXL05y?wiK
z<b7OrWpkh?KL72l0lv!i&ijSM{8ev?T@b(&&@8REZi#}BtbIj;&O>`3f()pnO9vl-
z0nxXcJN~`4?FEQ_;}>t-$#6r9F@K-*l~O%nv)j*B@<!}rUUt*evkcW88*N>rILCEg
z2gT&woctP_P;bt>F_@vCbBD~NBQ1jbwaPPBGiLFX?RtKZ1t9YR<<nHB&VqUM4N6iw
zdX{v|q$^3HXcyTEq20yw{0c&5rYR4s-PfW&iLoaV!G=svwoP8I&sUGVppLE*9ob4i
zOx3#Ddd&?e#&`EO4o>KrqoB&$CMNG;DG59hm_JQCtDwNKW+}n4@N6M|nBWRC*5FNv
z?UvZyKakb0n)3$Lw<$)2<Mf3GKrB7UfF)WI!G53Z1r(q2-biGD<btxYobFpptPM($
zubr?do}W`t12}7f2=Fa^_moCktC6oAp#syt7m3`Wts{+Q?tT@4k{0rP(WddkCC@tm
zvqdVhNyth{*1E)fc@SDOSmHw7nRlqZoM!{2Vz9RpH9();@ozl}$Uj6sql#0WMgQ^F
z(h^&vs+$WVJ~Dj`AM>)B6mIILogNeSDY<t=+I)2l2u71ZTxt32^y=|-B~t-eW+UR0
ziu|a|#ySLsUSXX>7hP77V><y$iQw^Blq(?}PPwvLmhV|4-MRsY$>iDB`yh)m+3%u@
z_C{cdcv@F+|Deu*h)9i4Oso2q)z^mnC_ArVe($*&lWZGi<WNtE3_^BU^J}mY^H!pG
z9$;SuakHmN2`|J-=R=NJz-qz5Gmp6G28FZ)4L7H4`idk5B2@#*(cV54i__E={TX7R
z)<t$eL;BJwpTzzFG-!59OWVlTYoM}&SURtQx77<sFP>~kX=J0V0R~{Zv=|8)Z9cd8
z`o5@}fERHbE$+L`Co*#wg^{Cdn&*iDWX%YmxE<{z%y^)3^3Ehdkn`C@1u3yoDA&-p
zU@S~KzS?X^z;}%VW(?!;+PH|C>1Yf;L!4fV)hyvuT#-Q+QnnHBUb}y82Acx*)z~am
z1(qFl#j9X!)xUUYw*}+hc&$Sna=B)s5;d=iF3oLs-fUwW`D^^%KY5qfjy((uNk)<U
zCft0I94JRiZMzXE_{#YJYNOKua7kyjXcyPMH&i!9jtr1j@=E5{eQ2n7&e~kji?6x|
zp370*_BNTTP%L#p`+7*+wySNJX|Y+-Nr|>~T6BwN%}EdGXAyhYPfWRCmr~N+5~R{4
zn7C6|l6kocIO5UGoc!d3jV0%m%t`G_++D~cp%)6;_w>`caW_4-y0k{Cvb&=BNI1Y8
zXg+BzFEi>9^3MdIgXj@*owB9rjX-gBjW@cu0ZW`WTCL(P2+`o)DCd8J4dG%-EQ=uM
zxSbv>JKr^TBnG>vAzmc<qtg<bw?`4?-?fwCwK(k)hi~hF&8?uG`%m2?GnrB@cF^d(
z$(H}kRQRDd@73<a+jLn_b_g1I4Mh4^VUc`h)(u-*+#_`r(+$!>Nht*)OB1Ls%G70x
zIs^C7Ri5st&4gP<R1sHx@2Kf~WY;;_zM`~H1%#{<Z{=Y`qs(%$(z0ePu_RU;7kHn=
zyesmoKI3qt?N;-#$p&HZ5%&oW<sDw}sprbovL;J|$#H<EH=Zq6uT7vf2OJ8<n3SI?
z)LQ^o#G);&D;cF&=Gm@UhC2|2bQ<n)eA#QE{RfU)A4Qr4mK%B7RLtb5B7V2ul>wq`
zy%3kL2<nG<l3tLC<q^J4<Vl*PNWh<XXszN4so11NTu#nbFKSIY&(q#RuVcP|APw5|
z`_e_)Htcrrct#8L4~(mi_sCW>Omy2UT~<YC`;z!kGkJ)Y%@TsjH9GT^1l&iH=}5MR
z8~uZ-!;Sj%f_uu<pL6UXtuN6?w_fn`TA8JZeEh4D`Ok+-&R8_EbXX;rWJEFwF7;LJ
zNGCW3Lzehr9$Gi9O#vQlN=Q;sgU9YIf$fXBNS{7bmP12}HSFYy8*Flnf6&s@RHx`i
zLt?DGhiKy5Aml7jBIh}l^sxtEW3eL?QPnVy!XG*-^yR*E;~&bIZ!9iGTd0y{2RyqW
z9b>cVrsi$kKwd%4yx-i*Jj&L!tpo@)fiTBrjxWXnaK8}`O<Z1ibr)M`>()3$Z}4=_
zTibP70Zx1Wa*Zf$&GcaD>>1!>8EC|(`RkCfxvO=6@D4^Z3Yy%V{CeNo4NAW;@{q`L
zPq#!vdu;VfRmkBIz=!YLGxvbOGYRi4n1DG5I<Lz(oGD;qQ=Vqx3Ud2IaP@kWpY3R^
zJ@#-gzha&+@kvm%F15pXGp;=rmy{{RqF?<{zcekYx^nFJx#NBxk7*|v<Y<mZ-!KWE
z8Sxc-JLdHcK&0T0hPV&Tn@(~z-t^2cB4Wj2z!u9!<y?Dlj-SeLag1%{;xG^j-(F2A
zidJ&n_0;xlMvPaFWw9+}{g~s}lRD$&Q#b@s(fRog{J1d!$Y4-cBiTyM9~JD>Ids$p
zns5e6e0EuEbU^eTjJSmA0lc`@8Q#-O1&~|rOXM>tIb)W50H;sIU@rWymXvYc-iWRc
zn)@zR+BtQ_8&dhn$YHqyvpS4@PH4^!PS@z|KYaeH{eK%0<C}h62?M@YFOdz=xkhCv
ziD&4*L%E@X2QKv6W;TA>zUxu6A1eEgdH(0aBgH)&WX_;!QdMcc25Uv{zVq?;x!4p6
z*6dCMeoX6tsrp`h_qY&EQSc4ouPC}NR0h8D;OLR$D;k#haS=Ze?t3U#L?P4=))j0B
zPB{SJ9Q(&9&sX=D+`0uA%{u>s*c7SrH|{jUzPFeF&1S--Tpm>HmJ2y=1P&0t+(u;v
zsV>cnB<%;H&V}qjA$@z1zS(A|@RrL!eEjO`fq9r$Ur}eza2;$j*0`9>i+0Pco!gH9
z#KHG0VcsBlGI>G|Jk4-Q|3~@QGw&XNC0x65aMAt0^mCwimN3vLsjaM=5)MqrluJ9n
zr>n21<`A|cKZN}N5j+iFS8(Y24m6rYWr(>fDeR>X2fL^D!vm!7d)QTf%6vj-emHp2
zV&jzR3h8)a*a0DYO??E2yq~+Z(R?3y1GB;|91%<=`*Gj~qNHMuaym3@In|#0;RM7*
z1N1O||Dn0D{r3=t#ey)}Z|_(9A-s~R+*>`mY<=&qKwo&*e`W#vXCJTGzZ_ahg#%~n
z`ki-(hV8GswX)yI|IF(>n)5E4a{7ui0Fdze==kpR2~CSR;j_P-A-Yep|61sVDv)%D
z%d)TQIFLK};UWIB*YD2ny#+cW0cZOb3s!jFrvAsze_YbfgA-6`aoB(69REjwJ5~Ks
z29o2zw(Ii~?&vQTvuscEl8F@Mi-{=7yUhEA;oxJf0m?mf+va<%G=6c!)!n+n@x|Hc
zeE{cu5A@$1#%}D{2twtj-@X%yIbA=`&BA2*T0dZQW9t)|B~fB^VA>(QEH~(W#vGu5
zu_v&i5~2(GJ+WdnjkgY2M`*ITAoofVrWE`lEEUSjw1<liVzl?!0#CWujczZj(!XDB
z4(!*f_1o{u;wY=%>ha56T_7W)#uXx_Jw#c@O|b%<p$ir>wS(s5&2#w8e{G7i@vjW2
z+x$EPykzu%WbOM4%_YDd1%JN>Ki=anRWz|5;u4=qS^F`MZwv^Cn7co1*V;H{I5Tdu
zlo7RWiT`l;zf=a&USK~fuQA*YuHmn?^51pt@d818brAQzzv8<GkV*g;Qg$NaKq~Hk
z3dMhK3<Ofufb386mV8hC@4x@QKfCQ-qMkL<?*G}<)4;|fYd;-G!yQzHpZukaqX+#^
z67Wm@{ciVm*<(!s<NqRb@ps~OHd^zOOPc57#Hq{AsbKu`gVc_>z)$An_cHNowznSr
zQWw6Xu%F=3%}}4iAo96l!9U`RD6k$_JT<R}$7eKj?<Qnnc?a&smx(7h$9mk(QjA@U
zjcN4C9lPy%&`0=RLeNtom9dn(*Dv#0*VrL_{IY{(QxNm+1H9C|7e{w}9Dqz<4~L%z
zv9^_OSY=E*MDe@b{jW8@iIM*G$DeC;h|1;a9(QbSh&fEZD?!`#zF$;&^3I#e-OS+Y
zEc4k|d!UnJ&x9D~o#@;_gSGFWtbo_TeQ%xqZ?}`;@>9^xc^K7R9Cb7b%>Y}Q=VOFn
z=B4#E1_4;7U*bC|laZ-NI=J#4$>*xx)se?cpDMJj@U#6}?)q7tXgVELX0)4jJ^_)V
zX`0$DpV#O-RjY$`(`zlI`i=nfR6uUYwCwEPBlCYPKessVwdF=BOji@5+E+|c+o?l<
zN=V9OaKp3jP=q&Z0jbGUw)oZ1jDMbIE?uf#q68RgC?d<6E8aM4RF9jB&><swSD-E{
znG)Yul$^DIjBcT$*uhEe|3f=FW2NG=!?mFIay0pZE^0~mHoePzX0bSQ_B-g`yT~Ji
ztk3%Y9!K57;wP|^UZ4@Ir0Fs7;b_z>Jq#Joce>wyoESkBZG;+V5WRR5$=KinZ3^#w
zQNBvZfaB^W6q%vt9i*D#;T_D~FSnGM2ypFbQ18}fxSY)5C5IxoGN@A4fPVEEeC6f9
zq`BVoo#Me$%t%NEjCm7YbqkZ*TO|x_JQvn2*6*ma_$tJ=5xygP{aG-`a(582djnGo
z5|UHWM#6}Ra9cSs)>?r=&2T42g_S#^tx!+`b7AORJkAot#oXSW#=PwYvV6G%IW1!V
zTHS#=F?GWqX$4N^%FL1(#;(;HwQUwLBV%0ys=$~~kflLNa20!MQh4F&Ht3X7QX}Jt
zZ+Ac2uoB*)hH`y6JbTwsAWSL3wkF#D<;)r%Ux)$;N_70(S1+!HZGnmPRtIfkc5dk}
zCt!rvH{y0jr$R@D#yJF`m&3*va2_Zz6RoB*APbp91_wh(ZAJwCVXbLwzERdnn^~OC
z@ASe~xG?1mVG=8qG6v^X(}2Aq5t%4;&!zQVEJD`PNEBA&yikw6trA%m5?H0$6avDQ
zrbrZF#S{qO6d&>~6Q&h}bz5_8kM{PD#iGG|mm6UR4CNTLFvc)1G&no7MW;rR!s1)F
zW_O$psL)QeAa;g)^o6>ybVrWczz(4|Do9?&N4~T2optZ{*$N~!V<QGXXMvu{HF|Yn
z<<5*IF}&5vWc^w-m&_n**L0>hiGf0yZ1g6Kt0B`rnyo}aeAs8)1if24Ak&D4a0kUI
zc<4E+P+eGSjeoQgKZg&8H^5!npQZJwGO>z6B^s|0U;<3aGmo^+2DR+^RED^!P!46a
z32axCED0pzp4g?Ox3NnGu`eVbGUNqzgj_j2ctY}4zk0-fE*53-%`aKHm8R<X&aGta
z<=nkda(0{R6<Z_Ck`;f)ordD{diO$an2a5I_o2}8$nN$qinl?sbh?3q){9L-XnWm;
zIx18y-M?>r#lk^MS=P%Ef`Uc(3|vrTTj^@4CJ71#PKxLD4q?js@Kel_qe~<@<;g}?
z%nFlUl!Y8n-Z+N^^F%|py`2fAVFm!hE-Z;@siKdLGZyowZpFoAOVGe18N*o1%k&>&
z#F~_d%a(4Pr8PzbWcIHu(%+2m(`CC?WFDnZ+jqPrw4q47D<n^I2kPD_Z!Hnie@s<$
z;m;+qa6{1W5ejn*n;hDdZ7u~F#N4jk=<YPt_b~&@H&Uq)4ausMP4{m~p#vl_Pd&KB
zO)eca3~mja^U`bTV@7`L!(JOKDN(*Id44qz?wstZ;CY<10m2@_;odD2F}EwktzPS0
zB{RA5jl6l~<@^~D?0&zA$c&t4-af&l#={dDYC|}+eoSW65SeCUzy@igQY1uYB%f=O
zlS#*~w3^e)I+@Q}_QDjDZ#gWgper?MUK`AoSSOspE8D?!*(8O6*VB(7$U;ouIZ0jJ
zbMz8a4vlT1Ar&!~E32*LYF!A)-o`H_l#LasD292sQYU@o=q|0|LFST*WZpH^fLN@f
zM%$r|@mdsad#=TTx!9Qi<#@AlcV5@wvnr%)U5sDyi8Eu}4JKuz2ot6S9?-7j<Ii48
zW))pYL{#o%SawTS{bDvDkO84Qij;ocF+1TBV|X*@=0y?U6vH(D@Fo4NTF(qJUM{6<
z=9b*)(Q@AvBlW=|V}$Tz0;|Zv<=%#hp4lWdvJ|A2N^Y$dEyOu1vlyuwZR5-9Xde{Y
zErR@5w8ACwka?qR$X%<+Ik|L{YjejEQcKyqLI*<x#I`@H!VXzdbX779$0eR0d!Z<*
z6Yq>vk@r19Yzhy-@_k0Dg{$V+GB;3%v3|sv>{%ua-qO{EZ8HM{fV5bP4~V@h!`(=;
z-WjbGVldUri?PV-l{rc(ZB&`~os_iz-4gJK1Y;7(=K7`znrqC{y00m`2%<FYX5wJO
z#k3l2QgUx)$_@S@XGX_G)F^Q#=2hN-wBmk&cG9>?G+*Ijg5D6~vsch*Hc>DT1w4t@
zUKAViWvmPWXWpu}5tyMhuN%pMvDs@_W;ATjM>*-b-iZt3n_Gsx9=3M=eNH2m>#yMg
z4(ay?<S_IgEMI~>b+LU@F~|tc9*r}euMKK5AjTCNZh{}WO!ZZV2HvG|a0|E#;|l`5
z{yN_m)Rx`Ij$vWh7GsVHfhj>e?@1Iu*Gk|ikPC_g>+P%y5j^u)_u#eKp)G&-N2ZXF
zyORb~YTQL35IyBowZgz)+*Oj1oVX+=4O9ua(6PW2=AEU-8$h2-Qw)P#Ss{VQSHf8H
zrxf+%XC#@C)|DNDm_IF>UtF+6suqj0oe$o`#p3~n=+hyh2Q2zveZdH2Lfm7i6MFII
zqPtUuk%*{S8E>J~K}DhEHry@KP>RnN)l{ie!_5hFeDJ#|cp=#-$y;jlysfR0eF}P|
z1)b3dIUxz!u_~RKRnlg&yaXr=x^AweiCii3msva?#(Hm=-odQN7jjzmLPrO4u{6Jk
zUJ7>Qoc>y^#nWq>cB1tnKV}8?b8XM)-0(eue8$BXdY6*A8-LS+J4C{A!y407H~5uS
zuWcS%uFmLD5Zu4;aohc~*Hk%F*K2dNLqf0)ah)tY-_(!D)8wojY~7>>%b8EbHap2!
zi)$qJcZ)ti<wVeVR&CEktoWFK^A?l(WkdDpVe|!qvJ2QMOaJyckA(z2!iRRhv<?Q@
zD{jMSDm6P{QF^zDE_3-eJ@bc$ie*Dk!md2N%F>l&<LP%kv@P`c>5jO7Jj$Gne%TPT
z^BHDY>2gw7@oeGJX1}%1>ImghwC}sxskzlZH{K<2m5>_h)lD-V@4hJ}%z5$aF}?)m
z57^b&Ouyr`R-lYml&~`1;0?-sR!cc>=_Rdcxu9ofFF&PVcfXK{HocxNOsrLMm#2~^
zkK6FAlZ#Db*rfK3Tu7<qTQx=)Jzbl9o!8jXEOBHU`(QJA`rRP3`cYAmD?KR>y&xE}
zI8ym&>OPEC7P><nT2}~_)H6~qDr&PEe~f*1EkTb#{Px+?nv`|K=h*k1HoYndFM=<j
z%Y3$6Pz-tgMmkW>k+0USpUjzy4j;z4n29ZAD;d4MBv~F9%C16ww7PD(<<}CkVNPtN
zcS6qhSv`BkRzx>6G0Y=8<vp^o;P4)Ms5d1Ty48odt^9&r<;Yg$Cmy<`K6ep5!H}S;
z+%WiHKgc)mF4x<{i>qz$-Sii@C(fQ<ah0I$5<^Yo;r!;cg}dn|j_X{y(r|fWMb?`c
zlk>d7ImCX}@mR|yN42Nv`tFdsU6V^Nq;4FvS_?<W)^RZ@LvtVk?WQJ^&7c(P>%;Ca
zUz~?Egk<rlJv(ca%xPi?(Umv#acZhDn2a&mDTCvDY^i$Lu@=K`K)!as?}<J*c}ijy
zK3}acAXs2rps)zCNYl%{)7+B4G~RdD8M`qrtQx4va3yT~nfLYs&E;-3$C0E4d#9}U
z)vf{<+IiAx{mDJWltB}Dw2%>FvB=;npTcynn+2By&V`W;WXmlS0G&%ac?D~s7Z0h*
zeWjX$7f7BWbN~d1Hca(_G=(y)O)qAT#;JDpY{qGbcMt4(J5QE|3D-CjDTTrG#`;#~
zj$PVRYIH*qUF5I%N`KZ!qz!9GNME_UeS!mvhZ5?O8sw%F*&3v6+E$Wxhh~~w$%$1b
zsehR)IbO#6*vwgha(bF^*T$l;ddNaWx9RX8nFcX7W<N@Mn?M6qx^S7Jm&0g2ieRfo
zzAquv>#8*GZo5dzE@r+U6yJS&SJT$>?hWt_VUHYqeW6~!vBcKj#cDbp!MMS$<6)ax
zsCopv;C-745IY#@WAo~+qWQXo8eR_UTtV>3!X|qvykxTyTHLsEv`PnY=CGp6c9&@v
zA2s%R8p-=?SB%h|ujl0w!BlBxG?JtE@66eLZ2Rzx0DJB^d546Xau{e71{-XxPZ0HX
zO?#gb6gg2(DY+ks4!EbAc-7aigQYg>FZnar>6O{^u9sMCXqXaDUOMG}Ol&}OO{gZV
z#@%0r?_~_%l%9eR9_3P*)tdhd_tvap=}4>9VwLz{Ls<ht5~@c!=b5(ib-|RAygSq|
z_QKb7JOYhR?kaQXe0F7{kpU&Vol@#qzqwf&ie3f>M+ZPdMX`nc+MGm5dnH_;{@Y|i
zi)Q@9pltn6a{oMIu)J}rs`IEU3GP?BX*7g(yE7>qL0`!rN%sPgVxD5;v0H(CfRCPj
z>sRY$Q1^P}!-R?{Rii8`MY*U>8SyG>TYbayd$A$mn`#+rf$dL^@&(BIWNDSWk3fj}
z_%DdMw^u)&!}YAKGYd}h&gd5>F&QP(MP*G<6~mk$gt#zPXC+Mu`ZJavAf#CZLR%z*
zD3}%a34IiJEnrAxCo{!s`^1%PUM#*jh{=b(m?>2)nT8f0xYh9ple8)-j<qdIh9beF
zIe1r&y7~*I7>Q7<oAnj@25)vwbU+nXN}}<OQy6*h#4vB@t_3rxA)S~Nw$mE!tw3bO
zoB`o?=~Pf*QLH3;Gbf}k^c*;uA*`@8e^)r1FPMG&l;u+e&l}#U#8k_HNW0O>q?#er
z<8S)a@*B60zC4;rw&B_Bg-o;sn`}CQ)YjEiD$y70byJyBtnBJ)TsAdZ)>Q2%O%$2W
zWUE%Ea3Xt%q*j<LC~1&wA3Q%m+Ec3DV6P1Qm9bk2D!AcK7um8d)MjY;B3C9<FI~~T
zFrPErwsMXoE5SGCL*sIn<Zn8^4C-gX-Yq*6ux7oio8X@54PH?fyMCi4Rlz?*!v3s&
zQ8O-0fG6vGo^m?Pwn#mDc#uDHM>J$7{E@yW9dq7Q8>+ms84VH9Rz@r{hFX|IpW9V9
zFgExrA^9avEKI|$mw^w~Wh+-^M{R{m2y60`4D<w{mdhnJ&5t!ph$d(vMtCPHTa)wH
zX*q3?8{IC(iL<XKK5-tCBjcRh^I~XPpVCZnEkqsFX$j;cN7L^_VLl@CVJ}O~ek-SG
zbpUO*aZsJHeVsm`6=r=~B%@eSFJsa_xP5M+ca_ld2QL79jmFEEq_%McH@LO7_zH5S
z6=5l1WM$qM*5XSYBTW3gQ<2_ijZ{Z13D8I_r0(@AuDneDIkeuus}2}*<=BuC<wTZ$
zvwjp~q(IAT*-|*e+hm*Yw#qa_(J474%0|iDR|y6Ee&K#yIGDa;AdgB{^iF19O2U>y
z5;*5|)H2bPHQAe+P`<vN^?0iQO3!}zy%IWNwN|ph@FgE5oe@t#M!=tzYa?+W%RdN`
za*;&uK%6#D>BP6FmV$v@Eg1{ID=*N?(`yOO1AejkH6gW)#qbxlijGOUXVn~Qwqr_w
z>X{e9@W9K=4kVv$v>vf4UyM}9NM?AVtQd1z(o8!v>%B&~Ube-CySKPY-b3p?YcZAq
zjV<+hb+N{qHTHBKQ#5I<cAwMG;Mqb|!d2_#&l|T)M}?Y~KkmqVtQyRINmm)h2u8eE
z?8fz=(lgXMeAJ2t{8J(<Ul#Mv+;4s`Wso))X<uY>|E&-5S*UZBKnuCi@>X&rozUjp
zGlP@F%hfMNx93qax0!2I*ANP>hV@XgrD<m{UbyUJE8VKK!AIkQ8qaWMN}<+R`yf+N
zCVxxIDlPtA+PE+pykVt3S=7&!m#0gdzaMAX^w4<}PnYFRpSGc3TXL*RuGI*Aq4f%M
z`?YY3_Tafp+L6_Oi+>^yzh#EWp&PYZm0RiT7!6ODmpFaS)$P0*Cu@n{l74fuUeYwN
z&<lC9j6HAaQf6G4Z<;hsxy!DV3$DR(ama6w<`W?wDpoF7HSr9klX43sy+u!RN0n}^
zhGx3i_N4%<d&OuD-<s{4^~zdHnNm#hP0vJw8oB?GoqU}{Lr*d(mF#=TkxEOF@eFoL
zNA6qRWt~|aWg+l_D~){ra#6GVt?kWS*Z3w!Z^-I{;hiZl<+n+!Zs`k;wCva|l?KDK
z-7U3{KG<7AYfkRzv-NsQudEdXKQ>M}RtE^GzI;(ceI@0}hSRBPl=e~!Y$V4z8HgK&
zB!qk~KB>KQuJQ6l$I0pvD?-EC3TTDXIx(Z#ZY;u=vzinGv6F5}U}7cUpX2D4?tn_`
zZI=FWpKEk#HlU1jY{NpBC%sj-p(BKD24Z0Vd-U_?5Vl6?3o5sL^FER>U|&9T)W67n
zdKwu|pXiW4wdDyf{Atd(GPR13@X;l74A66>O|A^xZrV}}c%L<QS7c|YL*n8wmEN$D
zwWX1yOMiW$??65qZuT#?STwL;Y#gh^7;}iKYbU>53dOt&5_9}2JXhVy*Z8Ffx{>#s
zcJuD3j3z4gN<KQDG3zW+7-5XLC?PuQxq`_F|J(f&gKuo@?JaKmFJ__S3Po+!X<e@j
z3*YQDwNzr4Z?*i~cO+uMGoLve8e;<$+O6?~kqMe9`McsfUyVYBcL_xGMm_W=ek*C_
zRs$OA%G0xb*W)HOz2>8i(&}W5nq@$P;;j4P-`_%orPVX!*=ky46?Wt@ij%jN>?}<~
zw@0UQ1^Ijsb1@90&Y1PJD8UL^Dz!C3%62WJS!hPJ=kTrr5Y@iTY`fB$o}|#8rs!zI
z-d3)V1PaEkS6?0E2=*vhTW=GEc%fkNK=nx*ITeYM+7^s(;kG0j$lIlAdNKx1Pc%1i
z=etcdEPXadyx7U4O-Yc8{w-5We3&OCG-So9a-Dl_umCuGq^j95*PdJZxO_G`7p85b
z;P9}?jA|~mBcDruXT3KVAzQf|WmjK~#aj@%dzbMc0)^(FwwgsZuT_0BKm^i@*{>Nt
z4IDXOa=xp{e4m%qSAFd>k;M?t&_b(|xu!W@MIRL9=Qr$udPm=kd)Ys^#Z~HOZL#P+
zCekyk2NrYbGbTQhWE|opnlGcYp$vzIZ3CYv<b?=~&+?2lE`HXDW>Iw94!9mXL*3jm
z9^5WqwBE{7nEL9h2T>qYe{>0M!Zbe^J*AiNB9*^D;jvU<U%k@^m2JiPSG;jsx4AEW
zL%MziM|fY{Yvl53pNNjk=*41K?#t*Bd|t1hK6D+gW2$ruhR&RH#gvNBE+^P|+qEyJ
z383=F!U>rHdLOA+N5(9<HWvoKalEV2Aakl!q%^rTM7GYXG3b%feG#^b4j+aLpVFNF
zXjC20<m6TBb^5BIec&@@<jMdpetxMtfPF{zrUv?&35~+}NBFMSKLQ=U%QXHIhDM%B
z_fJHARB4C*M(IYem(cIj_AB4;+-!SazCoF^Gj3klHE^e;io}n3uR({_N%b{-+8(`Z
z<T{_%9?nvYd;Lh)LrK^1WcBXhs@llK7}h^KA*j5ms{pZB6*J`Nw@+|dK2_1%U@0)U
zsaEZk`a*s!sUfB)VQRN|1_uQunku=JXsx=BWdkCnJlGq6n6as{uUW0;@XGcE#-JmQ
z6bM`^UKGaG$Ju7+<IQT8`DD{mdW%$|E=~75Qd~uV++@AkFgSQA6Dx3|4ktFN(Wkq3
zQ#7KinEbb|)8&<MX6^D)_>4&U&TJJNSN~`h&cg$oeZ?833Ac6<Uf;AB|Mi>N49~es
zB~BKD@Ih58bJm`}@qu4QFt`(3AcVXUbjaQ6Hih`k)e`u0bqICqCaT&quM79C!|5cw
z&*hDj8AmK1I<s-WAlUsLFM3F6urysQU=S^re#~cWdRtB2x1da|fq`~VmK*Nw36!7C
z1`ORYdC;^0@_ku#9TNf4kx|n2Y&I!b==nDH=N9|DqK8qqQznWE^-v>?932T=+FaP9
z>36QasBqJ+Nm$>XL9XqGmYm~dQ!ZA++8OTE^A9$#BrGVpCU;N8VBC?mi4BSwD{@p(
z{ZzM=!i!Xq<ByV;<*+p^hlaiDGkWa(v$LEk#kxN%8|Jv~ss8iMzf|8V>bG&taO)@q
za!84F-r3UZf)V_bHD<W)B*#j?T{1~nS3YGy!1Ah0JbU;innsPAFQ7>yu<dd-wQq}x
z^QQvhUb--C1m6xBxRs<zMIf;-DSu6DLjwNhEs^eIiRcrYVB+Rw@$U*tKNDJK`K!dJ
zt_WtNS-3!N+I`oTkiZ>?I3E)+V}Me2SHGzS$T&);PLMBNk-W4~da61g7yBTRlKKjo
zdSmxURAZ3ml~%~KecCmdG;a>UH9*SJBlrZa$Qkj44$6RxLfpSR;w6_badI%wuGYYd
zWa?Nj!|(tFzWYi7uHW7|8T*{V(nJBA=gmZ$@4L0=XU>{wgYps>npy5kI>L{dSVuPs
zMml$^R`6KbiA71Onr3Abt8IR9NvmIUsSP(H_zdNqF1H#Dv^8ffcoAU)X4%lE;*W^X
zAm}A|$J+t}Dzik&<lXkNwJ7jSz!fY(7>l+hMVuV#d$0~pphv~Gd6kaf&3GUZpw(?S
zwTw6$<}17eFNmCM??`ne!!2Y+Zxos@048EKt<yeJl6I^C$X^vUfK?ok7gQoy^N8X=
zjVDW{j1$m;-ub%q+yY;yVL{L@bBr3)cd58lUf!2@Zm5CANi~-;Ctu?4eI{y7nGeBV
z=F#^IDIxX&Oc62HrDay$VWbUXc<gD?w8GS`fbwW3*vc2u&JKmIqQ1S0HISnGzQ=>W
z(A|Mj!n`{jY0`RdKys}E&s$sX{z*j7{`R$sB*k$gk#@GKq&UnA=3?W^r<2nC(9^v}
zjWjg$seU<XWIVhpPr(%ms?HHVl5)1#o*-A4?w`K$so%@eDTQVcf=0I&GH)Pa64s+G
zjVwtZ^aQuFnjef<%>#bTfRuu0*Jq2;sh(TW^DVQPixLVcrLH>2yVPQ1DOCEC<D+``
zDZ>d|K<^L>lrbd;Y~q!=_V&?T-_Kt@1&DU`c>{?-{RJrYvpxYS{Fq?RVQ%hpoiBK6
zX>P$hZNxWlP!l<o(ub4{+7oXc6|u(nVd5K+Mooi*IH2!OHbCHMK<*xOyE~VR#o0HB
z=1mVkgSPJ-9*z`Jfp3J9L^mP6?{9;9a1rn{Cgfq3zw(Q*0Qv-`6(3vjAbT@x{r716
zyx9}8wL#FS%qJ17_EU_&)NNwc<N3@&-$(6cx`eZ@7>(p(I5OH9wvy(b&03ys4gn29
zrgQ{3yn5VZaMi*loEgG*Sd=3bRy6pc>rH!>RPM=hEoS%+uV>hp+-ayUu76w1-)8Ob
zJX*e{vVSyqNAQf9wU6M))7Xbue;sZ(5yd>#G7aQ95^K@x+&)kxRO_Nzu~v2a3fV$H
zz3~`CoP&wD>gNw6PScpfNJRW#^@~!ITB^oSL}!wzpuOv4DL?IURGcRzk}-1Yaoc-s
znPnG{QRsa`<oMkT^g?quFwYr~-!Mr;-MHW#b7&)x_pkSd|8uw<_@H-uRA^X2@Xii`
ziCh3b1(44lAL<Ti;$n)Ho$6Rt6Jf@{;lbN4cx}n--4QKO?WN2}X_@VC22oM5m@3iA
zvE><G*;YQ8T_%$R_($gPn}jQTrsn=b1WkZR8^tzw!I}rX^?43^FIVY2>}Tb|&MKi&
z@~485QUDuSQn2db9J3@0y{Tb{H4)?-uY3M%7)h+0!kJUHOSoS-Hp;h&y0AJ6$25gl
zGYip#`H~#S*{T_dv`P5OW7ojqNe!nmR^-hosTj|1+e6CqU4ufzE{dYnv+?R1PdH9*
zO7vC=>u%?U6Z>{_xD^8fPRt)ud4&#l1}bp^4F%h%(eo*7n@cUTGd1CN(sv5R!ddl{
z*E-C$?v+lDFAjiFU-ju|kI8YvtKNoj*O3J0IV8@l4#9Cw4!V#A@vEG`QO?*V)ctYE
z&RehssFkkSDv=FYh(gc2<t2qe1FFP<tcRGg9#=GGCEYqrI;%nY0s?Hm(5!oB{nCf$
zzjEvS5~Vz)W;;O5ZLHWsZGLcOW(k_&o0xDoAEZtKGXXNER7r2Vu~P)nPcyi}M#Mcz
z)V}#d0F~LB3|)`VQm6@4v<(^VU?xeJm^ZzEk1#jThcPg1ufR>@sXU+tO0GHM@<F_Y
zPy6;{;vJY`qjq6~5Rfo%y5PRYZVA=Qr>RIU-Xft&yTAuU9H)s6YW7OMa|lFfhlVWn
zgM=XYp?9h1kY+=&@|4lgk?fZZ5-d<W=M-x$1KpYJ)kT_(T3G;~tF)`y8_oKqLchIX
zJ!xdQyY#IQAz8=!om=XIhMe>XF_!8Q`4O*39G7Coj;W#os@!?9VJw^<u?eePt=@ic
zH67v8knUBwwPFp#Z<}0efX&ihAMd2h13aI2o3&PWIjAvU3kpG5Tn9{jXqxMXs?NI^
zV<{h5YP;Js_v#ctptbrty_RD*4sG5}qE(b1Tnu1oBmGerfxdP!>xz!;tkZ2fe`p~9
zFzQ%IF8&2mq_U2X8UfR`kkq`Ef47b+RojN9AnU|A${^vQbfQaGLi@*H7XMh6oY@w*
z#I5()f-)p?kP){fBYT_tHx!2-tDk4a_8nAgq@#{@USnQ`$Kz&HE1y?Z0Ee=TzSwHV
zhu@Hgr^wJx<+(GAo=_TkN_bVl+mA<|d(Ovb+goSVWv6SHK97`?8!3OjX_%JW&|0ps
z8KQ!Hvu(0%#osHvA#IXx=@Qr0ey4Zh=yMgzr@fRXt2J$vj<*YeMUCQ&y>>n|K7LwJ
zf}07SN)0zqaG7jK`tli^vJ5Ce$UQyXSHmpxmJ}$nX=v}8F~sA_<*v4t#Nk>8)y#kg
zJ8}ZaY^stg44_`1_80s5e*119Y(}G=HYIop1?10YS4Ln8D&g8omRaeD?K}n1%#~x!
z$efjP!RtUFPt`O<WMg>C?`9Tnd-DN1v=27m$8*8mrj3N1sIiZl3d+F^TA=SM$%2V4
z%Q@orbR6NjQ3_l|0t_=Yx8M3bqT_TP;+r?8dUA41^BepSkY>OeX%f>Z7_Y<RQ$?T2
zryF|z>@EIlXJ&y=t8rk2M+&H#x|07>*-GzoAM723#>r|oS+Mept{5HJ>G#`^woPef
z9fJuo<td-Y1^tSXcjI@%H*K{l0$Mbf1F5ds1&pfdEe##ET?ygOZ2OHWO5g-`5(ImG
z7X6Y?^9Qe15l35wSNh__l<Xi*=WB$$=MzAv8)B#xJ+C(A0}{?7wqnrc0+a_NS?*&?
zJs!z7gef588zoIfWKF^c?Zy%!PMcO$)8frgQ`H)nMMc)j#(HNk+bl5Bz3_yUoIPvf
zP~<!btIM6bPRZAHQ3H#&)2lCE#7m><M}ec&(ne!@EP{ZjHhJ2f=%r|Q?ib#p%^NAt
zt);gg_~+Gpy{--B5`YMca_V9Se=~$~uuqzPDH8Vz8KWpFQGT86uK=p$#OH-5z~4h(
zUJVi8XVF#?S^a=tamQDwaF!~6Nu}xWjXFJFnzmhXteIL)pHR5r7f$Skk19XkjRLBR
zOlvIkld~W`1e?={YT#0l9QUVb<jgFy;EZDqf;>HM>9hkE5W%OYXQYn`0g*z9WJdau
zZ5Wrz5C>K;PXiy)Ye=*DHe79)w%TmH=@0$*tq4}i$S8d#57#%>>iHyJZsq+Uq1D$k
zBM>m>k#qBTNn!d*qs(#-T;R<d6cj9f8*nHmot-Lx3QdI1iwin|LmZB?GU-vgK`cbS
zCo6Jw`9nRa;{{%&&F5O;YXI4o{P5Ol@g*~@WvWqpRX8bIN>}$%Td2&g%bV4iY;xHv
z4f^f+J}jE!TU8VaHE;|s<81F!Tnqkur#Ni6HcRmQ^TFxop1H1TVh~Z1rVl=y<W!hW
z&d&Um9vq^;A5TB#h&*h)U2>BX`zACel2T?C7xQ|3ntxk?(w;2XLYm}wNyg3X0JB6|
zWr_9A6FI+en}+RP*9+~!F#~-E(mR#rV~<)lG~!$+>_b8Hw8f?m%!=Tio^S!MlBZ=;
zDqNs=N(AHul$V3U*bCi}bL-1P!n`K4vP{svVv<plq)Z}Xu^<zeo$v|XDs!R3o@uqk
z<mP-)=v`Y&BqWXdWv~Fp%xw^M2%9{5ZuKo(pjsrU_I|_p0K|m|upXMLC271e1+vS@
zq&$og!+e6{-gy=$o(rH21WM<TL%fC25g{=l^*1qYAR>tJCA$hksHXmOO*j<0v&E9O
z{l;B+BPDpC)&U)oY^BoROmK=|^~~_^?*oS~2g{-a1-pSdbq5a08+?~aCnRu)#UwX_
zb`R+^j|0cC&cbdm5yiV=vLMqZ0AHy}oQcS-lCmveIb98EtGq99Apmzv&GOP5&Vo6q
zxDT(euGQtu3ahXW{e(7(%J${LB!G;9sw9|TZudc}m7vg|Y*{7E&7{|Ffb%1h@OMp{
ze7MG~=)<S)u+RVC@4o-=uJeR=&D6a{zBc7(&Jl~sAxrrtLYr&y^iqtngZE%G4qf_R
z1xs|<J!?+69lVfslj|t*a3<hKt3wFO5q#L4o>DpH6h6BwQ*jB5YV(U;!q_Sy5H4`y
zIygU`k1|jpP<y)jk+(Jkg)hNLxyDxkY02heMk}<{7b2ocQ>8s)eR!juJMd#4d;!|u
z*iu(b3q&FMo<m#laH0Z%{$lk7bCxmc7zmx#!2M5#=Qg@Pr%rCmmh%ZN1Ne^H>B!(6
z*C{2Z^^~!#(ewb*R++?0Vau(uIZZ9ha%o`+%hp26U9PPI!Cq@qG@Wli2%giNFuTC&
zKVpjKg-p1Jb(-x*YgSQnE%oX?F1KQosU(dqtrmn!OE5cf=tb40t%ksNSE694q%ZIw
z<~5d$rO$5g9MW=IsI`g>b)#@sK#(KoXCd(Ss$_iX$g)w%N;yk)n>zMTSU*dWD|g$5
z-h2(mSz%&Sp9!iw_TJwuzcZ2VDkrT%0%Pa#FaCU`t>d8%@yxE&lApaB!9p6%)sWXG
za4m&~R@I~zgegRDCtnU@a7|Lkk(s4pMy~c9ep?Uy9YaCtnUP7i_LXk3W=6W^-J}U_
zd&G&C^K49}aWIF7O&88N8$j}k(@$`21H=>bZK$po3r2KNA6{w$E%`Le_u}b6v$ACx
zH4gR_w?br7RL~J^vl$??ed6PjMV4q)IUtg+=Ab&YSEs^ynPc)2s-vCHu*sAWstsK$
zoS3#>!*yZ=8nUm)sACP1w$Ab=p}|zAi-r1FvvD&!4?q8cTs<OwwhCv#QO9Q2YO@+o
z8gh$Wq5|J<bPY6xG7`z^Ll*wov*dY)H@bf#)MI5REHO*Y?R=(L&zPaq4l=aIwhVC|
zc98fRiL7B|8a`K&&IaD4e2o@vJ1Zo&)4|NZ;4pC=rs3r2?m~v+-YzPG3KaChrY8Yq
zceiNLWGu9`VCrlUUao;-BQah>-P=eHAI4QUyiC389#Ev8Box5~P?Krq^sJwV{A>P@
z^b_Dp<sP<-h^0mVIv3ivqg@w$SNT4YQ-Uw(6WTYyv0i7Yf;IuL^0c$V$U)cfRy(v1
zAgzfPA7d>#+CbO`Ujq~fapA1B^<t^?c`XhA;RU;+plr1I05lS}Vyzk=3>d2^c`8-C
zD4f%rJ}5NMT?RtBb+KPpgVjm-@Y2j|8sPQpA(h;nRN)cg*D<XTiQKSFUz*iFfgrRn
z_%7bQCS`pZ?w#>=4Df*Zw#PJR^j5TYyxitw|2wMsf8=&WnO-qG;fg-m3`y9)y;Uqe
zeG%YCk&<br`JiD^i^JmItv}A%Upj?%oqYyX1#)p;9OJ9POj6)Dsr5n-9w7gntP}M*
zp)F_9s@>z%b!qGS4$pBgoC{;H8DlYNh3hl%G6qXFFPKq6<zgII3XrcDqLsr!h+p6>
z#X8Nd@t+czloN-}0gfJJlS->FeWT4y7qMCKw@E5g04m~;q>xD^a1<=k7aKy>l=9*X
z==h`LnR{XR>G<U&5Kx8nVphJ70;lpZDGMv$TT!6xm45qR&p_7>Y%hN^31q1*9iLss
z&J-+Z8xsXobj@dNqZTrDGO1D$Qo9Pmge5RA5Z1}#UXu@iu0_^6sX=OlXs0g+2MZdB
zx!C*gbW0{7d;#=?lZlO-tPO3LM3BF1KKwpSn9q0yqGJQlrVDI`AXa(bkdc2?cl+!&
z?J-A!(Lw-Z<PeC93CUR=^HR2k8aVTl4fK>$)K>sQ(>GbikqID`!ryGlI{Dh4?qmOz
zO_v%x($1R39XEr*bD~x%!VM&}+H_b^$@9;sBj^g3W~@fQa-yt#NvC$wk0r8t!Aq%u
z*X?Bd-CS07?k*LDx^u~{;MwF<s*H%%<tBFvLM(Oyhd|)&$c4tR-pcYH*Kp1iVEsdR
zV{0!DCYlMIM`ICP@$crQ>VYD+Q|^kz9-h~e^hn9YhEU1Z13-7L$Y-rYZ5FwDg8QyV
zUa(9x5mI0qkHC_au`L^}Q*`|ybp88Z>2SbB1g0ZJ7Q8Moxe^mW;&dW#^=+#MwVU}G
ziV|IhT|3rX#kwCi!ZO|K1k|#&UVec17Kr-JLEnA~9o*Ll4q*AmPhKETBI=$$p7@0&
zk}}r(GH^cN;-1~MJTK>P^p$`Ji?X_9q^Bt}Ua#V*Vxh<AOvt3wSe}y0)An%|X)cv}
zSgoLN#fSqPF`beCP<{sOwH){h4!$mb$EO${3Ln+fUcc+_{zsbwN%NB}=VuL<o$yge
zj=%Efo{3$f=GCmV&iAH@Elzi?4dz{X-f{0hY7pqeB5ix13gh?XuD<sGzI~;{{@ADN
zwCof)#Z!-eN(+<%f0V@?pkmL3-Aeky&h$-k?ouHTMp=ktnvu10O0*O*Z}z4;EIu=t
zM1hPfuj|$EhE@#507cYS3tB-R%(H)NZ1GE`cGoj_?@aP~ugT}l?z-gz@hL~;Fi4Yi
zufHOPT9!V#|Kecp;J%_#Mf|%O;q^F>D#yz&OKM~#h*e5{9joY~8WQ{RkL7_sz*>yw
z0TiT54l*_U#H}Tjhn92?r2!trv-T-v)iJvgtRSfLnB7&NfF>Z%Vq_Q4?y(KA!I#KP
zsQ(qB{X-)u)l=RgvXWzD=#BnUzR<;2-C4lo4xlGF{;(474|C^!B0Dp2@B9@D-6}u9
z_)~>ZsY4r=!`v<SFK?h_4D>#sFK;BAs>YGm(2QZ)cjL0@sLn9PW*5se*f@4w;~`X9
zDePqMmw9epB{ug9#n4SPGU@CtWY^oMIR>cjyO$I##i^0GG{w;4WrR=&*zv@R16|tt
zx%|-J%c3wwmqiZdImH|3{zzWm^EKuf2o1ll{fIG;fTnf~p0~I{TRk}2vPp(4CUs<V
zftkl?fl4%r7a03u+jf>OU{tC}4P=SwxB~)Egdrj5prJQ!r9Bz06$r0)!EI03>c|nn
z&kfRsJ1zxV|I@D;t4Lu^l{kyssJ^t<<-$(=b-i&^#`U}(Nq+eDFVY*o`E7H4{ZREa
zh#A?=v0dV;$qTRY!ENxZdXnKl9xQ~Ni%oXT!u(9w4g&5QFvnFUDLL;;yP5zWVR8+<
zJBtb@5Bi!f0{M=N=rW9=X@88fEhV3;m71@5S%#Kzc)vaKW`Mg{(;B>FE&gsHpEcZ|
zNMGr(F7j`3`z4>-(t23}r0n98X{zt+$@M#>Q{q?8Ryk)<ioRd9i@VA<_KZS*jj_P<
ztK*p;1`qAy*f;qq<9WquD9(cs12xNYVy{}dL|{&`aTswkhBmb|n-!9>hDhp=_;VWE
zmRiyN7bB_(y2bTQkoYP9+8klY8!VhIGrLpvC?T68cpa3+ua?r)`*j|Ei*5egcC~RY
zW=-HfeK`-y-@Tw2(<=4+^Y?GBxg9%Sm7~w*m|V?rFG)XU(rRCF<AAAs_wr<VBU2J9
zbl2pHoZ-bCaWRYwNIGJXy!ME^c^wwmWdXqeU=~cPrh;A+OTnj`YPy%sL@3v7dje>4
z$T9Xd^u=m6nO!$|eg1yyOU<hXCOI9M$=I7XwPx$q`!hOy=Q`gw>Q|QA`T%5U(z*>Q
z2byRod$*$DhnX7&a!m_uO>M}^kocap1cL^f^~x}DUd#{>cCZ!%-i&gTaaM_t=?<o$
z8*}KJvz|1UF*SWUScax$f?ipjJwZsvb%E?dha{N_HjygW+d-~{@{D`(`Mut}OQfDl
zS=X7P_=U-S@~-oEx|X#1C>JIh4rX=@xPQSxqsav*4sN+LIV;BZgkzon>C#R35C^6k
zPRqm_!p=Yr2urZ4W_P{Yik85<0l7ia7iuJ!lt1ax_Ns{Rh{AX=vG5(4^wrFZhJ|#&
zrzUSJ3&-v46Kjxmu&2dI7U+}`S=X*PxBn6Tv1@;Qm<yNEE1r6JHib*sS-uzRsFV0b
zPTxKJ%@CO^YkG=OtHz5@Hxl~f`hJlxmN*{!`X=M}{&zv`BiP<eGC96}@fBX-7+G|k
z`#SDhjiW`aG&+t7gqjH**qmQA_r0VJ>e=BR>Ar8%-?QY0uSEHP{?Xasq+glkpRRr*
zYp;2_&%P*V-*D@nJkw7WaE1aT@1_+yyq{G6!-xK131gL006DYJQ0fD(q5hu_``rR_
zb%6FH3Az4$fS~_kGw%{F&876|%woP<@aH%6<2ir-t#j)}@L}#@pA>ZKW+ijC)zrc9
zamUZy1v*-*D1W`+iKCb7DqCx0CriNO`S2l9jO^Hf@e@E_<O`|t;)#FLe*6<!zjFo}
z_Txh04=TyO;%f_V_2sm;zeXfSl;EWQGYjAcqw%+d-k{%Wj7)oYz^MEn!T*b7e*D1I
zE$lWwTX~;r^pho&<N(>kW~2Q7)xGx1jIk1MHUCN4e?_kYckcrMGkQg>HV=$d`d5!a
z12n0czx?p~{yqJK(FdRH|0~x2>`DHAcK<6D?PJ>iTXqw&N+b3wx*zJ=*H3<8I+~><
z*f6~^Psj4$mi5IA&x-Z5-?bT0%MGY|wnPNFe)nm`&ccsxHvGB7->~1aC_?t-r+x^r
z{Hoc3{^(jwE|f@Lb%g?pSZ=uT19U_CF^o6>)jKvK@$=Qb*Iqvn-kz<r9H-rzh73PS
zd~5>6>=|ydIRKUQnJp@8a9wI?_ZRhLu=u&9{_)~D8NhgWt{UEY_~ZHTiYnWD!)-b@
z36*b=Qnyv#VYkrvp8nt1+1G=6(@i(rdbWP>gp9{eIs5F5MD5&3IK7XxKijDBd0=C#
zhmiZw>YjIAr|pdd%%HYAWcup~A;)dBfMqH;8zX*<9{l1!bEBM`+NyDJZlBZ+7^Z{l
zaMA}1T{V$ZBl-iKV&y34b@yJ=xjuK+G3VfA`~TvH-8;--VqW{=UR{<O>)$3FYS+L2
z{a*}tiERc>(N2s|YtN5+f2Vo$q2t9UPq5fD27un!=9=~#d|mWG?K38w0JcRQciPt+
z;B?=32AI5CWLR0e_FhD38u{bhe$K{+_W)8|ZrUZIYAJ<9+6kePh()Nf>Ks@TTd#0R
z=1xuO{d*Vgy<5{EU%GJU1O-8EkwQE0?lCn_)ju@!q>jI#I`64`x3;4^iHvuvu1`ox
z8cP}*85^nJ5$E$+6i0}!civiwOBh}1E~Qky_T)E;<5&Ot&%IL=)3GwoQ5segO2}hJ
z&Pn}t=$HSz_BnE8J}VD*;lNKZ$A`E%YgG)LgJwOBHRYrj`lB>Tl-cj>Thw^#_@d)H
z223SS?%9%?vY$#E)EPtTt%&V%E>|?nHtw$UwU8z3VDtNTySMMpMZX=^oX3y$I_=-6
z<4NG|@;bx6U3{q8*4BP<|MxGROXX%Qp&%c#^>0t5mv0?j6U2$*8yzawz1?g^Yd%Zw
z+veVXv+eyLFt?O(=KqrHuTN5uPr6buJ0u>pg|!%dF9jdQ0j7Pwxksj;Bsxb`CasW*
z*W@sb-i-+!^3(W_3_qi?)tx7YILjYjnmhahe}E+~o_njS8E6}g(g?hmCb9q94UV2M
zluyNqftQTeO6{$&&-FE2HI!H6e)cSv5B;9~xijZA^Y;?4x9Lco*F(CR0>T*u1s}z-
z>gr$L-cP%8*)o3Zh|~{@9OdrgSgQTIIDYk>Jg~T?dQ1gqp9s;I_+cKZts=a3Elkn$
z7~WE^gKLLN$ylg8*o@>%l5u_Zqk!&h?{f&ia;sT`9;*G9w)38Iy9BVCLkxW80QuJ`
zq7uG|HfgHw&i*UUNxi0^IDIeYH%{dP7b;S$KHS@mo~Q-x=sqF$vpS(^ws*^%0j#Oy
zenMV-BBgxdhEztT%CV&VQvZ6F;`z5_*An`e_7iY}=^jZ}GZvfy?FJJe2btH-^X|7O
z#~>ub{tIujzw-UIo%b5DG5ZB8J6=ln8)0*SfacGi5H@~~uuCs{826Kh^Xe<>OBWNs
zGGt;ZMQytL{_oFaQ~9lR?a70SV>_00|E>Ue0PV@cpHZ^=<slpFH>az)yG{cCAIjb{
ztjTBF162`F6cHt$BA_Tr5m1oO1*IsU^xk_5y+a^~NHHKFy;p(ILhl`oNbdv)y+ud>
zDM{$u=-&Ie|Baq=?)}D_gqe5NthIit&bW@aTNhXFTsTGHv*+{K6k~rABQZDB=Px>d
z?;(*GiNY){r~lQj^TPYz>+KX6Vn6*(IM&Czzei>46ZVn*3yIl&k=S~W_VhpUaYYD*
zKz*N;=ML{)%mSa@b5$in#d6qVuq>dDrvAmB<65iyl59`?AAr*X1E^p1We)O#I|Tvb
z7eAh6!?)sk6w%K@D2UT8UCbRiJuQ*d5=d{PE1N4Oo>pAxqolEYne%DwL}`WO{9rmg
z?+6{M)o-3T{$L#bmvJd6h%8K|K%sYfHR+M(fFYfj^&=(?C#H&ze=rn|P4Vy%dv~gG
znnj}iA1d?Axg|Qox3YV0tf)_IxnCk(ys+z(8Q+YIQ1Bn4N-!&SKVolkwy4bmyHd%#
z9A`VdAmSSSXSw@xN3Mz+T9VEaDb#%UR&3i|0;Piw`(suj3v>KZ5HY;Z#ZJrBDHZqo
z5k~DN@S6=jCO@k4`*XPA;9s(~DA`Z>hb;YNIBTi&pT8-|frdi<f!F_mm`Y|MvuX=o
z-yu0g%D+AO-MaRj|2HG`f5_zc%Z7ivK-_R?`SVkMd6PfZo*sMN>0e0TlKSJ)pIbpB
z8)fAn_5NRIaC#vnx`wTNlhM=JsxusqFI@iA!f|{>bO!5LQqa!#G^gI-)qH~%{{wj;
zL`n?T9Ki4HwO2JauPZq523l@lY5!R13*vTN^ox-C+lK$wXMxIwty*^Z+f!DJK0gD?
z9%AXwqDS$m&g6T(F38-bfv!?$5++P0*(rE?^6u`P$BXBPXUC>`?thJPow@&qK5B`$
zbU(0x+~vQf_=9S{zfIRjcXxmy`2|R?pSPBr{rR<6Hwv_|)<fOltUHLw`;<CBdH#Mn
zILACCF^u#N%3f|Ha!}y$?S7K~lY<Ov&-km<T(ed*%p|}MH0RG*l;>g>^wj-DKNAm6
zYH?S2`pT>`*!|4uclNtWYk6b-=r0)kCudyTd8WH2;U{kU%j}}}2qzjrwyBI>mAbP}
z8@tc^$(MohL_H{B(U*D}5&j&SXm?#Yn<Qlhb$_F`6C?kJW+Y12<&WaVmTq18|6d0C
z+XTNp3k)F=bQSYDmS^D&8)`U?qVfOcoxgAF8Na&#!=G&tTXd6qIr*D>2z}@X;5v;Q
zpqN8nMK@PKf%>&$)lG&;@h%Nkeae_mdR!61UG5S$;@D55VWHUm3&Rt8<F76U1+eb!
zQ4m?!F#bYe(ZcTQADYK`hnF&V%KjAEKQ+uhrK=_QDpA$0r7kaw@X>o4Q_Ye`QF4_A
z&2pNPX;=!b181mg2_;)ZdCAx@TeDci_UCVQrrl=0C;aqWr5mK`s>H6>92#T^|JaIk
z6}1&n+f)mnqzH|Y9e*HSKb)uMha0K<*;h{~Z}e*tUa!Fa{cZmE7C`e#wZt}&OK&!7
zVkaE>b2i!`M1hooIDCm<h|~oSso`vE9I1fV2tN}S*k-^lIVZko_Z@TZonGZCrJ?)(
zGylud%tl3An;X0Qhj#eAaqOH|(Q_zSf*-z|6q2Y%qjl|e`Zf2Yn)0)f>k%)A!y3j@
zU<()ORE^4wzrM+~xn6C%diU|8^KL2o>#J`%VpDR{gz$Anj^ek^{~@0LB>x`^zOYC`
z+z<K0yE;#vgQtPU8$)Vt8S{DwNrWsk7Z3{_H{YW@Wcu~ecUTR}6#={gh$>FAq}gX$
z(uQLcV=d9k-(s)(%m`0AAAb~5B!gW>mG}7lp^E;T_8%`*qWjAZac6n09@lo=SuDS-
zaoeRb-u6o0(3OIJbM>9nmKSQ^JF2(<MaWk(cQMS=$Su7tDjP%Sl_o^_jO8J9d&}9s
z;QIf<osXG+87bgg9~;Ryt`OEsv@vCG9Kf;v7Ta_Izjjrh>UrSJrb=G291kB7mB5WT
zC)59B!2clNKYw!dE)fkcMBnAI4jVhcP@A=8Dpy^!>*M*)9w}CwAyI)oF!{d~=YJ{0
zz@lIJVPTkyEh8!0My1Ve?EJskN}6amt4L4WM0@=I(?6__h%eeE9bk<uqR`<0s@Y^}
z8VqE3yxc6Czk+xJt){6RoYQ`iVV_-jU7ks;b=B_$s98@53ydS1xY2Da7$!J#tT7;q
zYNtVbgUUbwG;WPtYL65}ay%3rtJ{)8-i%Ib(4g3{$wk?kpFQ3@d9%rn5Rs~k%w=H6
zxG_{MA>15C;6ynxeI^!h?APUF%5CP)lOhn%n^0IqJw@~95{$L2RQS9gq8tjQL9)xr
z#59q0`arsWmP2z|TBkFY_(}G3LoOBx2vs=6Zcznwc7B?Y`8Za4Tw<(-fI{U1rWBT0
z?Nlbm;*fGm9dK6=*uN>hP{UxeiOA~rp!GCvdSf`Xt~^~r@Drg|2esvCV%ryUxnV63
z+M$*FrU2vW(zE_m;j?`;Dad9*r3G?KpW@(VweQPO#R^t)4y?Y#ragiKs{LxtL?%y#
z4(HkpAHN46XT|DQmbJl4<7nKP>m-fZRag@~cvj%mD!?Z(6bD+*v^S*!rl@-GHxX;l
zOMq5l50CnU4vYU0zwi`HP?PCp_N5(bF^TPSdq&mYTcVBJWd-m2&l=`$0MPlskV+#`
z_WGD~(vi9m$r%b3dd01mhy*BsnH}||5_t8V-M%<rMwzC*N7B~0W?ozB@mh9wsNwUl
zB_~GbVXklaKWPxvfL6crO0Gj@z}YrN?PEBx>dq(?>8R}<JAXs!XU6=`2l%(go6s&m
z*9#8coWt-JX8t|vnPk|rJ1%8sN$`f%qHk@f^iiNDmo#<8ocI}y4zeuWj1s>qdxI(3
zp6}Yo#J>L8Pj**G_i|bWNho-VA5{O7@26ah5BG4YncUIdgc>R}s|1s-SVsQSm-vT+
zV@9WTbNReGl)-qolHD1Q%fY3-D4RA6RX}SC25;$9*m|W)sPZl~z4U%5T<Uz2zo-D3
zrbt|+uI=^Xn+CA5i_jvPI5wclqTH1B-a~{8Q~)h%pyRJc_ly>!lhsDHlU!WlC$=}p
zpuI<e@ZDCO(7<4iqveZ1MrCd59cjvvEC+;<eZ2A4PT9$N{4m7)J>CrYa##O)zH-|5
zYzs{vhCv)~BJO>XXq6i(%6v>V{7>K6&*=hI$aO#~6^&~OS~@uHpnA<f7;>tRqbC~o
zsB`rxO=Hu-wROdD$VuN@zEMd0%`Eo|rk5V=CM0DR$0epVK2V&u8mC&+`aP&7*K94(
z`dmFS7Lfdd2@HeJW0StiATEZg;|$oE3QO!g!lSTf3HEH*Ih;RWkFj%PHBoLvz#I%C
z=pwA87=4s+;i_XK$MtO3yUV&(fT9{TBPw<}S$s*Q5B*6_0DG)eBg0F9aS{yvt=sME
z^sHw7sy0{KXeq$`zLA-V`@!rYurWm=s&2IGNDUdgCnVF`nE_PU)m+SwT2vAqHGe-z
zY5qQf*kH)E%;v;Ge`x4`_S<s)Ri$TIX24fp8F#8%<&>BlePou%**jCFzYiI*tW#&}
z(q-G21OX~j7={4Q2L+0E#+#7_8e>Nwv4Ec2JgRJ~n(}HEkZa`<OlqR^A{NM>Q-W0=
zCfc>~lRyqozM)KyA=-0!kpy^6Gs*cn72l9nKejr#N=pZdi+{&sA|3ta*dlBkDWeFE
zA2u;HUvKzGVp#O7JLr224(sGNAw9STVN~sURfZT|GHP4<fl%|B9*%`?=O}~xp9t_T
zR1g;@LfqNV_z-m7G@Z#PrG}0QDS^XHRfIwFW@^ga5_YsP8*0&}umvi6+u0!wSn{=L
zfOFMv(Xi3GE2?r!GJx_tRF(tw^DJUtiDNrjsweVOuGhGF(dXE8CC%oA2lTH^{ByAx
z8v_0+gKHB_*tsq5cI_2V`gE_tgk4E-Zk}Hr3$W}%)j@b0+5JYQfxw<j=a9_!R<>62
zFqLIC#D;%>M1}50uw*5bZG)Rf9&_((wBd}6VLOsWjEeTkX4w%JHJ?DX&Qb`C65(Ew
z*^)V;6*~Kxty4OOPj!FlJ0D2^P<U+6&t^k8dqXc{iZab+qlKn<NpglDZ)n*0Q?U5<
z#s6U5DV%)#`CQLJN}Qz&r2Y+5U#`Qsr#^K)vHB!dF0}J#6C24vaGv5uxqngP^@MLq
z1l0$+=G1)sN~vcYX*@aM^jfGHoFLcZ+luS<?aiL{9d#8w;YMzcc3ZoJ{VhVHb?)9g
zs<}L@Zs%PeXb9HIc1iX49%7mT+-`jdLA<IIFlRcm41vJ<o1{)!ns(n<rkJhW#qW55
zMPDd&0w)T8sOFi-yMHCiPA<4WC0`np*keqN=%fIj@!~`<LdDpzt!M`!b<*o!0(|y(
zcj_Qt>kib++buDpO5W0rQJJ*LKc^JvS%0bp^jMe1%}XM#NeM6%pG*31KSh7IjX*=r
z`mEsbP&Hs~=MCTo=q<BKcJW!1m1t_(t3qZ9+tFL~_U~2dDIkR<T7KWyuyu#g^G;F%
z<AS{PFjKS-t{;kyuJ(I4YF?hHRGK<3425ClX<|@?G!hAs6MMB3V9Fe)$$Q5cWJ3gX
z`m40^ZpCkkt2KDp&5|7ISG+-9&x<$MbJ%U(j<-UYSh3fy7*NX_s6A)a)s)!NTJ3^B
zFH+QpkMrRrq^g<Yv9}JR+VmGb6ZNzokmAa#JssF5>&f~T^@S(c=)8@>IzmE-@6>E{
zuVX5~wkMuvnDBQ0<<f;;a<zJCY~u#-M(pBkT3yo9vNHVsEjqDEWa6Xn94WG99BrNX
zvQ<7*Xm<nd8lJYukZg<tPgjRe<i|yVu&dv0<CysOuOn-g*P7OEArD}3@|x8|M;6xx
zvUKc-sdjin?>ks&`L-vXTNJBJ5N>vNi&@1*ivGnuO*n_|7h_9pA<K7zNR9-l;;j%3
zEp>a^5Lw3$^DY6R@7LJ|trR%DpKlVH-o@dpjq+4B*QT@mPDC>iI^<<Xa@jUR&5oe~
zvW(7N1ad2*0+*Z(*HV3Vl`)Cl)}2HiS^^s+Yd4%(;Eq|vmK&GN8M(){d7+}wO!B^p
zt<^Moet=@jJ?nrTRvtBK5&O<7GD13FQJFb%{4<Fzel|w6P$J~i>o*r$UMg%*?xN$c
z0p8q}eMk>0sh#g}c|HT;gZp`jiQO$58#gF0_rPDS&k0v<xM7KaFk<T()StX@Ol95`
zZ`MP(^@!zN$YE;@ja^GM_1CvCeEE3X=j*;P^5_fEGhC@Cqs3Zm!ighdTpGD9#KhbK
zgC+09G`#E{IDYiVws@@E-D=j?-O_PRyvjW<+V;g8Q?rxI$m;;$EOQDLeSoGo7(cP9
zE7@=@)2C9@OwpFBhDiYX``{>-GEn!riCc3S#wL<EWpt#?9WZC|x;WgcQ9b}~|8}bV
z(fC`&?V5HTpS)WH_)b$GqJZ+X%+reF7DB^<t9TLdSiio45596i2+#RDzu4%;xuAlu
zBRc|bz)>S_<aOG6dst`eDR$z!7oqD3qD&O%<15^oOI%SYx%ZLYr;X1RVp_uLG`^vU
zJ(R+K(&JNZ`zZ8-8Cii)l3MHDz6SS171v2qYFahp&iBBKnern3z*Qm}Q#@65cXw^9
z9vVI<&A1nzceoCnJriUY!)PY2G{(Px`cl!lbIHt3^*qz7EAZlgCF|SUM%3i{V(du(
zCB{c(#ko&(z?x$556F3ax@HDIyGaCYjd3LNs`jVi-4B^Mj<Lq9QIy7I3FMiByzC8<
zjxWA;9;o4Q!A=}rFE*Q@Vmc2ZMv+@7ehr-J3%6kC?d$W<l%sfS#vA!Zv0K0!CJ>lN
ze4}^*KY*W{Vr`=8pljNXhyF~7=N<bHI(FKC&UfR;n7yhtva9EZXw2G3R3SGD73Y?$
zC-1R)U+_#isUBsF%MBFOtt(|dy!hS+ssbWWWmHPLqBPd*qqJfNU@WS6R<Tvzi8>Rj
ziy|!$%kH)Sq!9fA?n!gLbv-|>;SJ-BmgsMzO#8j%n34t(l^2hMQz}hkMx&W<yp0X!
zVSbN9xo~4XxuL!S!@BBj)l87brB*J3WUmCpUg~Jk=ZX6}rY5>|j)AjANxP~Jr**O(
zzPVwGbWmcOBZ!Et4PSJpnXv2}UZK?gg0F?}uS`uMFJw9hr^Y1;A4NOPrTIBjP9N%9
za2@CLx-vHosyt{+#*7e-_$>}S3Mm~p<MGc-HZ@dw>D5Y%(_ge@FDnSUPD=S@CWol>
zYVVl2dKmXotN?DdFL5S7I>)w$3bq>8T}RupDpsspg+7E<y#oz)eOM=&ccn9!OF4~w
zx?5T^*`u7APgIS(2*t;JZ&DH|02XEhA(PFAE&(=)mCTuUc6rNSpiOQIdA&x?Ol|3b
zXkJ=|MbGJ~n*p8YQ541H-l%IQlR>_~^&^;Tbc~vzL?s@KfSB1<KBNDV`K*t=@l<-?
zM&LP0%sN_{$<g^Fl&R7qxJP)R-J~{Z`rgR<g;b2EeHHt7`}Y`yp8CV|A4Ig*XOS*i
zlxFZs(D5F?bB&-jX0`w0@&3cX^y?(X*;$&1Ew*aDpUZUpRo;!PWz^WSV(F)btjsvQ
z<*6}#wL8*kkfLu-6fQDE^>kjv+<i5Y#5gfpX7a)eKf83C!2?D=?rn*h$x9U#u%$Yn
zk}5mP0|dWl+EFMHxvSb5#(g53*E>Z!@vRIa6|J`an}1sY^_JF>Z@>Z~Gqce<52`I&
z8C{Mo>%@z}Y*iW}6B<=e{T^U^6@|IeBUDS=LmGRwWmbLVZpF)HiPt7PYd5GfmJ1T!
zSP@$+Br;5?k{AK`(VJ&nEvFRC6o$esQS#J2cRel`6Y7_#Z-;0)8l*q69%$&!;I%e`
z*@P=jmx(D~M*EiEyCUeWBxUisqZAdJI_&kqbUs{A9sLNeEJF`db9gIj?Bb*r_tVR_
zm}2IPGY}4F+O`e!dq<-BP2qKzoIoFhF+{dWPCtVZ+myoZ`e-t})KzMudXL?VN<qn1
z%(38@3G^2FZ7n$qn7B`%CXrP;&f4FE@NZ8Jzei);CwDstHZ5yXj=+c$0<I4wO^P84
zlS^Z0&f71A%-|0fkxS(!(g%K3Je>zT``HD0A<e<sbX+P^+$;2#{ZAl41(M7B&T0Dy
z8~)0ljoe1O7rkCe0<W=OW>hbyKX_PVcS)_Ib5&y3AJ874@h^^3DYq9Rxvco1jm;q8
zsNRB*u)M{mRn2LnRGk|Zb}FQD`Q=#!5|)PdK1l7NocD5YYo=1OoOO*2_ceD&7p`I3
zbc4cvr_+mbM_$vMK6{x6(ev8)U75vYy}p@Qp|qw<_ma>Ve5%}QU4x^U-b(M?`*AlK
zWxoOSB|_+Zg30o93#&y`0%r8P$Ko&lX)Zp#;CF1V$S6sF&uFO9HG;PFsYJp%Dtf%F
zA=TFj8>YDm$C~uOtq~z}@b~_vIXB;co?RoVMpxGP&vEzI`2sqtRGw{5380U&%W@@f
znPqaS@r36Cu#cUZ=2VW&`ss~G=FvW-@$TR)!%w!k==;2b(3`bONbl_81FvUdcj04N
zV1ieOqAmReCllBs13VQq2mi(B#%r4L+z_Qpc*ETvF3-Ofh$dT;kYmvUO^A~?=nG_I
zzUou8eMY5$UOLN3#(hYGID?KY;oZ+ZP?6}{{fK>!0#~3kbYFET{RXN>B}G7gWS_tx
z9S5qBEps)|Qx#qf9M=BQ_!;yY*EY}F%vu*6*<CG0FBJz!{ViWL$I&k`>iIFjKI&kK
z!EObY<|=A0mZM;?<MWQfw}T$~f(aGt$2Raon#n7G3}*`Ivy}B$iq$8uRqrqb)xj;`
zE_v=2g1#$BaCGH?9y+VpHRPnC6yv>Ruf^~RUKRry_pw$N{KIM^rny*1d1l68i>a&i
zYg6~?d_5(#nleF^gJk(n$=FA>&cs+VlqWQd(Vb0f2r;P~-LH0+Ec&VO6MI{!-TXsv
z5|<icXhEIJxzV@Xa$_x62q>PdSQa#8`}jpuD+SSyT-`gQ;pwe}P!@gn&IRvP525p!
z)Q&ABGbndtbF;Pk>zD0Nzl@emucPGos-_yfVl@wbJusW-FXDUkku@((#=jY}kp&v|
zuKTReFowo)G36th=}H&s@2#$4>M0%^R5Q!xe{1BB7qhPG4OMCHRU~(x+TLMdlDU7B
zmk5~q60`20z>afcUO8ezLPXw}U90WgW`aJV0zB}Vpxga0x<s<;R0$v|w275QYJ`@Q
zZAhf;Li;}2yogmX9WFUlUS74TyYJ@SMR6SxSIMlHJ}z1o>difxj#?`6$eR(K!JO!I
z5S})_%2)(DUs97P)-dti(Pf|6pk2v7Dqi&+-rzaH_+KhJERgf$Anjs~Bas*SCiTgc
znL?$>SEh}SL#~m1&X%-msEfJ6H`kb@PeE@*n0pj=)y4sj9T#o&hMT4IU`%(s;keeV
z**;Vxi`*I^@EzOIVbV3o#v{;8SAScx!XI5OZp4&v^d+-Xrqu683GP`wQZX;B>9kZr
zs@t*g0lazjVG<?F6G%!SL-mwXAqFxRSh(flH?!#ns^7a27yo#iudvagu7*(g%Xu>+
z9I|GG{zbMGW&FHur8^bgQRlq&g;6=V_(peCRo4{OH{&UPG<S3*d6$j4KaK<-MxzO@
zL}&|`AK-PsKc<cE1$d4ZEG_8Q7r^WU0NY`iBO}|fo@g6JEDo@4AjD)b`&P7uQJy>K
zlGCEFTI$B#L2F%-5dks5E56^MYx|VBeFcU^4o08$9O|3<@o_IJj5aEK^SNzsD$t#`
zG2n{Zu5Yv&wO=pQyu7^R*u!Cro;%C_fw`(kdZdQHfC&+ObnIvR&TL<KxbaRS*U|n9
zuZ$KD(fT2+Qxq%M5VY-1SzSvRxdC%J8NLX7yY*gnr8~ZEOUD{z?wkKA(G&kUz<Sr0
zeX!<M${u>nHvLOA`$$uz@Y<r|Oo8dakit!{wiHUtb5bzI#(<={_t$6g4DL!Ss5HmU
zv4Y_yDXBrx+|cszX=uV`5%W>H(qE&4oh(;V^S(~t=-T&!t8_)b+vFOpu_cgpx;}H)
zn{H(zq5nKNNFVOBAn(!Y*^zTR|MkAl`!NA&gKR4%zE6KKd^<0F_4(esCCb(iR9w&r
z3@9)guF)2xPv?oZEo$Xk`@Lo1QI+H#{v@hcza1rtq>jwdLB-qUdf{p6x3f!}^>*pY
z)bhlkUO#H%&$0u97Z{{eg;(^b@cXjW*CCbUMshM6v1LkMfkeU2)a~+tFki8K6=D;i
zraHl!UlLsQ^Gpm!MNy?ydCsC-5-2-Bw4y=Wh$?mB_UsT%Z6u{bY1)WWuFmjmfak5E
zucFL->+DVW#EJ5t1ak*r)@4l_1)w3XfH+I<nPEB;w^E|J5wDk*W_^=H?bhLt2zpIh
ztT8wDZ|r~J&SM)=n-{ZzqjTF1jk)1MkstT#O7NYjW%;!$2Nk!pT|_E(9o?<ILIiu?
zd_W6|9DHDSj$ZH$^SN`JsiE5^T(0}Y8iljZx;d=p6jP+v2O<#SN>2quem$xxw=m*a
z0JOngLH&$yPGCT@9YTc!NYWyQ0ICM;k32J{U)&@?7+QR~FjRPhOY!()aGAGMvGo%B
z9=odL)ciFR&cx0FY0#wl5tN?Vr|_)a8}rV8cx!~iZE7)?(&m9*E{&L)X`fPRW2+=!
z{LGU}^sL4xch<I*5O#sbx|)IByNow&%9IJgv(NGybyx3HSQB`AkD(0dTW&C=dOS8Y
zdgB=9WSOKpPIUU6*6P>0Zo-raxe+8p$875Y%|3sg2skfI{U<EVSxVu*QW1W2bVMNe
z2%7xy7+ErX%=H9Pwk=)sLxg+oFqI2k-Ix`$k>;YuJ-t|$+E~S(MtIBjD$A<gUZB`h
zSDMdvpaAYXinwW3%R>p;Sc(L##a7l~RycI6TU!Y1e72sU+uz^=2E~|!0$uCl?OP2`
zI{9p!-h(C&x4CNP&3ow^oi;W<PfQQtxoe3U%qb;2wP0c2{-i4VQS=CBzh6#K_{N%Z
z9c=r<rR3&CN@U6LS@_bIhxLp3D8b^4@$A^u$p(Iu>od<!7_T`pWJA%jQJMRCPD@3D
zN%&^q>ZDiC!ohYPA=Oc@PY;z=yz?Szf4NeAb?PJ1m`TAjXAzoIx2&ho!;1C9L6AqC
zJE3KU51v@vLspE$>3vxh=4`fe<@6w)&4H?KM32K`);)(@rt~ruUU<Dry7CC)WgA>G
z>NO4aN|z7Kp2l!J7DoU}O^n~lUKYv!M?US}AxEwAH`1?#D;XR`#@H-J!N{x2*~ut)
zW{z~5GS!(B;mc3b9zl6Y<O67J-*z+u_}PdNbU7vT_uiBq8ft!5M3LsQm*8~DqiuaB
zehnfQG4_TxB^DHRF#ar%gbEn|G2d#Ta)30A+ZiQV)S0XK0{0lQCTXDxtomJ=Do3wb
zHmFC$66b2$1(2#gzx$WxYVKW=YHDRu&qW>6-IKzZ)tn?T_!IBjUh!jB>D4SNj{BL*
zlO^j28J0fdS9fhpxpC@cX`XRZlg5F2=dr%WOCyD#$()+!)CBtr2R?m}xLY2BZME$&
zeMdC@>STk7+Ep{CcPmC_>r^yjvkN8w{KoXC-_}khLsXM~m6&Kjw$8qDuaA0H0qGXf
z$`k&=U~ct_lK1n{1Y=66sChMn98?RsO+%tE%Nq4?#9u0Py`18|Bg5(1=RU|VITe9Z
zidxx%q}K<B3hx2W+|RJoJu&H>!JzHNCD1it<i4rSz-z^3@5q#r7q4aUHV5?*eB!hP
zxHsOCVBYiF+48Uc#OS_QJ0C^)?Lx2hryI?1de>RfR`y#CTkp4Twbv*G>8(UY0lPIV
z65f+dYd$S3jRH-tY9euiw(z6%jhf+uKGTsN4aJMqJIB{JZ0DqXu;D8<8z4e5O3&$w
z#1!nPua8y1a`vOtf)Lm$y_5IWRU`Xy$%n$ev@64VSh0K|@odi^d#ThY7vD$@+huKy
zeHkItK^@kwoVhq|ksVbR>jTMKNR3nyOOXlHmO*>6G=3>72MylIUQY4-5j#aNLE^ec
z@~`b{N|Kbuih6{>H%Ii0yH^{L^||BzhX{Pt>%+y57n_#Ky@i{>T8Ct?4}WHVFDs`T
zMWB%T%eMYM3>{FgAo~XeYc*7?+4ZIJUKtyeFN?P&%qk;`MxQVhkv!vVe~xO27>7PQ
z&`(>}fk+j{O0#iQ1&FK^1-rU2>W8!TshLs*#|V=a7dut!!SVTyl$V$GK7dc?HMshl
z-1LJjH#}mdZY$}({zNI=5%a8{f*xjES~h>rPgAd}cJPX}W36$xE+Ah~e+4WFxMC!U
zHMV6~QjxlyH$s&I13!20dD1mf;|>@~tAh(K3_o#SIkRq@E~}R&Efjv{+`u`!@PHNt
z{VQXCk+zN|=^<kigar*AaSqK2DPrBJKdj2%){uI9B%e@2qB^QEWkAM1x^==vbQzmI
zH{e(_gmgI`4X(&?P7cPEK7WW8)_*)c#N7iepwS`K*x^(dEeN3W-piS1f?fwW1NM}Q
zbQUqfm{YKvf4{$^wbibI%*o>;e!!q%k?*5hQN6;5co50tAm~b1@*9M`?@Z;CcOkv=
z*EcsmdZnVZsdfu=4mErfG~D50+t=-OT7CO&B^&$eOxPX1!xQl0N!HxSw?-$Y`0P)_
z8p}i8Bc+z;kux(a8mLyz#{DzM?d2~@YGs}5OTdz>;00|9dnC_NE{Pg*ta6Bchp;${
zaa7$S>&Zh_%GtfBUPn3X@k5O<7(si3cz-hz($3FQE$3PrM$SY2%~U=3Q@NI=dS$f<
zPgIYW#OdNbVq_wp<tpLqnl<j>4CG3G=QnS_3k=q1ZRgJWRvS={w5Z}Ut)*~OC$qB0
z+s=cD(S5Cuyj$%yRPO?q8hApaD8N5fgb>D3@TY9XQoD0y)y%77p1bano~oaYLKner
zXR2-zt;Nb2ZPtQse2O2pqq@r2kWfX>Wu$R{dThjq_qtXGymaS?9b}epX<7*!hYR3q
z%r^E#<kFxx+N|6GW7SS4{3XVTv1szGCG7^$VMV@m>I}U|P9;N5TRQi*P|?FWCrdFU
z!aK#7v0s2`>~3D8YMS2ZAQ*L<{M?0|i(@ml@Kz_g%#wxc2~Y+0E9wn7-(%W*qZae|
z<F{I|_pEimPOW6Qp&tj>P}30iNwv~6=9!H%N(X}v%-)QQz0?wZZF0(BAvyOk2{QU|
zQ2Z+Qr`xQLjPs0e3{&yklN+=K>ES%gWJxZ~F69^xEY+`Ir+GzUa!o2|L_7pv-;|=1
z+WT~4O9EMfXI!y40R@@cu0Zy}j=kXlu_?`QAhq7DuQ0%5eBabstnMZ?a{HUVAZ8pY
zTfa2niP0D9s?2Y00I%!89Yw)i>C#bT?=A!RHe516QwP|{B&+Fl4BX;)8@@d-6Ei72
z9QY>jV>^g3uC$Y&0!Obs<mvqW2b7&T7efQfJ<IOz>EZeqmFPSC1)h|5m?o8+tfuB3
zpm@}xfq6fA*{AAOdqcn#XX77=Yr1Pi!Mwh%9xdT~fMfPFPxTwJ^g&yDW%z#A4EL|(
zV4o$_i6Z=8+iem!MIBK@ZKfnePZa*lN7ipUfh-l(@TwKnhP>~5{Y6q3pz_#kQvwvW
z^O=GZ0xz-uT6Lhk+bT)_l~|>-#j#ZIDc><1_RTT$y-%UR*XI+W0^Y@8iMY?f4|wy{
z7p7xMzkG`BxB;8r%Uww#bElVS?<-|~+fs5>K>g(=>iO*tscw!`9BxXIM_j_eIYBNe
z^xoHFya9nHQ}W32=c5Gn99Aqr=Gy6ih}6aV4{Qr>vL0U(6v#$5T0AXTJe(%S9A(?r
zeWgeepbs6aSbD<sWc2tx!>bM%`Z`)lRxP)gaB>O~xixoRVnDn+P7WTiM{N{kF0kUg
z%#PgN_dQpMYn`<gu8S3w?_({0O~6pqnh}yC)fjWr7n*%t^~$+5=uaH=hcuBT%W$}7
z98*mopIh(PB<R+?D6x73-D=jo=7%PFLMog@c!GN3glTKxg4M#!KF>GIKPLI~^2t|)
zL&%lULmJi*eR@Zvc>&G`aRsF%I|gY3l@%%*@M{YX>tpp4ddF-DNxXWwWPGG5eZnM6
zQR9_kW$2r&BS&3Tw#Xr&J}(?!R4Bh4C$eJv6Wj{JE!-!!gDeji_3<qlcVZmD(GkD^
zUSMDE(~m>0NmIqCNXGs>5B~aRET^P|_$VH5e%0!&Y`_I$K6+e9K~L=g2r;P1DM0uV
z>_w1@9+6ez3-gry+Nz&jMoh2T48e-0FB%NF!}P0}H6ib|@G-#JW$bAF@y^n5_NQt^
zqN${f&b>FkV+Qe}R>~;0ES@Zo)!q)Q($2hbeay>%v$)8EMKRWXVE<`R@rzu21GUK_
zfu`bavZUMa_`(wRdfUg7))e5D#|C9Ud9}j_s#ZOUP1@sZ)WYlpQD$ShBPsn8<--8w
zst+?>YoiqhtDeL#Q#?bfJJIoSF<}*pw;}qU{wp&jV?a?wv*MvR>_X(RDp~&JkfF}!
zcOG9m4;*;DyszC!Ua{^)EPWvWfQqB3swK_j(roWKAjNg+rAf@Igl@wrSx%J7?UiOV
z8}+y&3E{Gv+d1T&%jaVk9v#cXz*R+HH;Ts89F1oR^!JA(IghkTLzsIv&K}XnI%W*g
znqk>bcl4Ymm1b}$B?FD6rI2(e-~^JgO~-vyQZP4n%Y`dvrI1JSAK?oF&Qm`WbQf-o
zPWTw!<K){o^g(WZCA~%Sm^-V1KPy3P!fv;L-Z`2q=>D2;W9&ov$B$QAF2(gVMNVsE
zkp<s&N+9cb`ndqtwBJQrSA`KPD87v8WaW<DRGESw;q7aBPbShPx}s*9x?LN8a#JqW
zRJy;FAEGsQRlMr+2Hp?=nppOrj1&;d5=qHR)bR?UE{345A)hVWM+#!>_wQGIVbPcx
z<pZ@SSkH;XdAY}8R5iDjZZW1C`@0CGHQdP@S*0O8pyLx~!L=rf9Bog+NS?yj$D>Ea
z&8SW$IFpY3yFsRNUGSq3@W`}OWBG!|M-qG9aU8O6ds02;EPD|+b?I7`M(=7|&mGCX
zp&(nt9v{QVq_fW&=8dbn9jvQ>^`u8p3y#&+Hlo@H9O9dMcdJulA)Y@>V7g|ep+2)p
zl*{<wXT9IHnuRnhwUO6?B$R>$^>Wf#7gpq@Yw(*_(f+~LBlbF^BuN2p6mQ>bv{uZ<
z7ol2BH>Byd-%RI{0jod|A4EMdcjR53%Ip(feJ~>LhufoF^XqniZe2p=NjBXg1iLh6
zJyWulFYeyd?gR=Z<{uE#lIQUbHW?Ke?A~0ed3t*&VqH)NQ62KdA5#|C=B5Fc*H>~w
zr=~XC4)&XTPfCOYu8<K+Uz5Z`u6!q}yCOnba(F?-D<OYO!ZFM^uq`#cMt74F5eBuO
znX71|$ZUFlN4HpcZ`^t;b+M#IlUkk%cBgS#-``)Y&DG!Y;AmMh;OIz5qV%;<Gt+eX
zn2zHo=433PoBRExg81TSDFBt9-hZco?<w4=9ZgLXy?>AuVn#R~;~Z)Unw^bK51YYP
z={Su~!?(wM&xfQS`{%U@ds5O`kB7ELUsHMsa%v9O+mz>5V7;t;JuxpK7^3<9qc4|k
z!cJ7)pIcwv!lZekC7w7^C?%gq)>=+hSr}rK_-p8K+EI$;h+EeW`#VDr<Vjs`0$K=J
zt4YEmq}NAJWqq~(Q=-xGOF!)|_D}Nslz<1svmn~b>YJkk1y~gGkmy)$a@kmJpxut6
zPmeK3-LsSwSgyE@A7}D)2L;A*^=SUpkYbB~c+@5y6e4|m_%jzC=65r`?u?TLEiuu|
zk*Cb4k2mfC3;kSUo-Nbe3mafO!x7y*$QY{4l4(3F>=7q5T<%=$Dd}1F9RloC50#+{
z?MKQQkbKkrxP2Y>q19-@efUi9PM%%=yCbZ$p2dn#b`!*ae&S0+tLf2)ZNoI%dJP8&
zPL%(9G<hfd;-g$TTc6>+mtKvH6Au~o_lpa6ywzn~De_&zOR}_XIlcn+KQtf~CwQ-o
z9Mti;oZ&zVUI5?OB|uGKz0J|~mtrV^Iv>GE!vlfBDvR{a5}}mam(P7sn?(sdP6Dtb
zKg)PG1elxY8N```jK~qbgQR{M^xby88<e>_Rt((%f>&TFOYN-iU`;W;%uwdjp))6n
zryCJR431OwB3L2Dif}Z=eIJuB$_qd1*T4WW<)Lpp1=1t`ISU}o<wH!>ZPp8%u7_wF
z<k6~6hH{e6l4P8wkqWY8E(7k#aWKTs>v~AwhGWJHcrTeOVm<0oYx51?&<kX7pXr3f
z-6&6#gPYsSnC4_SbYcCTMSE@1BKN);d_mk}o~fC;tJOZf-KvXxvfWU)<?tLiq*0UV
z&eL2l0Aj@4;ujFmt%P|mNT)dzG113oJZvm2POzP@7u6r{25~f|YMfphvBymL9RBJv
zV!eZFzMBPX_59LWl2v(gn5ie?&CXWve9O{!=m`qhzUXy<J!}4|IlE7H_{-ZUNA@m=
zg?;*bpmu|SW52l4x53@e$*pb{FHZs0CL$C@B#1`P%15a>gfJg4{BY=*BZffu4Brvs
zt@a?XA&J}qr9EL9R6r*uh>CkUdpFdKJ%Ujs$@haL@=>9PwPCamhR^{`<7iva1~gE!
zHoWNyIp?V}DQ!feF<SXr@)q$%2evC0iokN72MP30)gthF2A$zE9}Eh-odZNY>N1%{
zN;0Euhr3JOFb#g{Q8o27UY-<UulD68sGJ|VW+s1`HrlF|813V6b@kx8ejAoa)mSFB
z!k{12H9#~2d@f?I59l4Ftn3GGISeQRydUbq8jhwMOeH-VX~&yKPC^Iv^74nS=UX(J
zOl<W3QR{7d;lAz{A+W^=CPffIb<&ZbR#e?$aF~zrUfs~*h$^s&Q3EPQILX=XzS|b&
z5qq&2`O$7??!+bQ%6CSWf$eo0Bm`YemdI*sMZ`su5&1@b9TG!qR%-WfP3!XnQajir
zPyN8=4}0EA(u!~;|M<4N1wUTr-sEd=lprQ@y7g6Fz_u<jSy;C{tC`!%Jk_NhNYVxS
z0(a4GYjWr2t(-Y_MQ&I5+?V+qvT#Hk$xFEs4D7+>?o4TlWBzL^!=FhHq<4$i7?sC_
z6Y;L1__ym^6kwQ0GF<&R+&-HU$*}6p627OzFnK^h|0q%(=<hFj-+TskLWL-Rnj=gR
zb5I|^MJc!`)$^bz%JXG@+*Qr9d}7jjAb{I6)~Ha40@Fo9teqNk@{`pksS2`AN!zY0
zZj5D8J~=@SM(WY=(kQTD_s3+7#Oh<@fn$p*6yTC4@lA!@&!Go^a)8?r%4hL{)Nm+c
zalv5_)NoaC`8MKa>kCelVQn}&cJ9c6rmBdtqMTU#fGYDB3+cq?82mVKQ=o^MqH}H$
zE(lk7Qj^!@cADd2P!Ek`tHDx2#1e2X)Euz@`ARo@3Jh>N#_pG;m5b7t0OnlsDfBPV
z#j+W!fHe~81Vxfc{n&-d>IuXeg6ZDvDu&dT9ZGlKiZfh%3M-zQ+3=1T-!O3;^3S*Q
z&ALrH(UO|8=L{><FV96_cenH1W9FSR_VL0t`|i2@?0@u7T)uE$eWII18@smPx0qX}
zN5J6G?uWzM6U`~8eU6*-_aEAg39ore8{JvpPDD@a%pU)c;32QRuQA4|2-?e>8Cj1#
zmSakDskSKMQ0?EI=7ocMivsd-#T(wM*N_z-d~BXZ+!`Hd*CtRd1`XP#)9oADA75(4
z25aHJ6|NKE>pSrp^4uZU-hCZut)|!Q_nHT6IMAyFV%V&P*6bY4@PTeN-_hA<Q+0_h
z#{C##cJN)H6UC{J1o6?&`ofsM(C6KAeXvz`8hqvBx#!oCVBMLDi8&PQfP7U642Neo
zFdls)fb!MFhv1(ahd(uHdS3v#H45o9ORHk5l|&d>%MlN0UJ`?oPuT`LhT3|4K5;GZ
zsia3D#k~}WdgWxWS6}^naoo;vu4OAT`}moQ;C?&V;m+sLTJ<kE;HnjY6IyC}3V10?
zHR;ib-Qf9{(7~mI@}08U`JoKgkJM%v>__9i)+^OVwX58!+l-sRSBm9~o+))qPtB-q
ziW{1-q0R%DszThtr>eQP7(PGqTS=*WXpqucSn#b8^!SLL8YM66T6i>V^8A-r1uC~3
zKfA%O+H!d*<cOlfz%uFQ3@h-htH9@nu2+FIkL~u8*aOL7*sQWNgza0eQ_jdejXSkl
zv0CKzyy8y-$dXLeC;<00x{ob^PprNu7GZ6WSUJ61^M_UwPY<bO&8LX2`kIYfl(KRk
zSb?rmYt_G23)ff*GL5FJOmh83X8M^{%I8tcz=%<TOsEzoriK`oxi0Ij?%8`T=`QT$
zlkdX~T{3XjPKA3qD;|s0cH<Qr((tUS{~h)GRTTmYBz7A>HLA$1*Yv7GXriEFBH48o
z?D868PNLbejN6p&9>+1UU6V?m%&Id`9;@$mXMMM_-T9szHnz_}W6xoO58i~n=+Fol
zqG3|P>ch4k8yIwe?!3`h**4j@f~u_`-x!tIsA$35E~-wD3)6V56fHcC6O%G3XGg3>
z<>Mb5NG{4zfG>!#kK;fjhBl*6l1%nRwsKo;Rqr=SU+Y&rwIBJYRjyMr!9s|fqOw>=
zH-09@kZ3u~;=fpV=3n{7s655`?fH1KAHL(ez40)EO$;#zLxy@;^}8D#)*wE-vDFt_
zf`ZxF7?x53?YJ`<wI@E;<eDcZ(GTo(=bAIKE1vFBetXe_bAYKdZ2$Z03{c*ZEJh&j
z2wz+m$mj0*!4V{_zTv%m3%NHV_7WbV5>E^O%U2!?DfvGm+V?94_LnMWWA|sx=(8gS
zia$keN*5)&!j#aQ1<Hb#uDyY~o`}^g7h&(UV23xqZzNL|GXJ-w`kQSe*dj1@Co`}r
zqF5ATyuN}F3CA>R{~W3I2^gUc9iqV-+BKQ-zIz|l6B)rxRBR=Uur+^-ZEa+`cmBi-
zu^E7Tiv@gRk}S(x^@-FI4aYR*!^dNG>|HTARso{oKCx{RT`{SH&?A5a>-_GbwZ-El
zM?7d-tU9J9>ZhxkrEg3F*<g_3%Jz1oTSdbBL#X?Z@JElzG@T?tQxM{{)R?SPsmfhs
z&`6|S)QqYKU6xusP^@U|#ipg-KEB*B-5A;iQMZ|uX);9);-FL<O~lgL;TQR%qHX5o
zg3{C?RL}i8sBdK3MA_x^SAK$96eETy?KFzaz=rM*5hKvl+rpIxBL+0Uel)+}>LLdh
z4qZY-cJCu|hF@`#mVRQ5>6^Ky{JK}~*`LsIhQavRG7G1nP_Xs4T8(>G9Abibpr;kq
z_iI13IS9EfJl`&}kKXkiKZneD&R(m0kTTO*$*%5ct@O#)+MUY@(w-Sw)%Z;*k>K;%
zSBtYIo3>$WChb$zRmR7r?Hd}7z?<~xPnXNx7jGvILN>zr1k4<eYp;ZJZ*!lH;9O9Z
zPS4=!2@6)ITV-9#unOS>pjfdW6)b}qn9x7%S`nY8nG>AEs3OH}x76f?u-Oj3Hlg62
ztK4q0CYCX#Q5tJZg4l<45X+Bk)D0v>&zn_wxmUq<!mQIWcf|ry_z&2#T>*MfBw%^3
zhB@Vm9JlIE9HdeY>sh@%wi#X(a4k}hE{WZf^7Dc6%67S`a9OJ{@D+m0c<gP=2#riY
zimwkYDhXZ^v@*1EpHq`YHv8Cn{LTgX@8+ddZTn_ayG5{)=PaYSf<Bwt1_}DcxnNai
zssJCTQG0!@aapE}5uu=(2EXl^)<#x2Y)stSd3zy`5F8EA;k!*Dpm>K9!@_|W&lyhJ
z-08*6BxoxDe{2jv)c^<VSjaPZBR6hVY?Cz={+mp7K_p$Sako%RjeAVF?-MG2>)#^4
zLLqt7BvSmf3QH*a+1!QaPd{SR>a()@lu|u&b4F!sqhRGJu`=ht%;BxQ7enzWZF^zo
z($1BJAAK?It)Qyv?1k)axM#8D#$9%~b$C>Btg}xBKtHooF{TjLl-3C`nIs073P|~z
zeIUNR(v$BfZ&0u_7YuJuU*=Q2lN4W)O7HWPldHBZh<F2|s2+T6w73SYg?9w}rN7{Y
zmtSDUMq9~`)+iy0kWbO<uHE>Eh5*W|Fc<AihB1;o+RyW(?5>ob!Ngq|%3>Iq->VMV
z<7EHTy_3-dlROx)99pNMjcFp@Q+6KU{F>MR_WA&)j3mS9YJW-LL^g7)8ll&3J+rI(
z)%+N0>PJdVZg8l=+(m!!im^ODs99qa%x*SoHta|x!l$yqyz{%e@Bc0TOE*53hZvvP
zj?fn9b8U9oNGGfuDEY$8`BA^Jp`P`VB#MdENBJ19%?HS%LC$gdcJ@t^k=5d1fe9Av
z2RjzFtsA3+VJq_(mweJ{7X#^eko6`=D`@Rw=>|66MiyR`MSqk>uV-B0qhW5oB}`kN
zjbvD_Lr*8vzPtU0gxq=jp4dEn^M$wDf13JTIUYaDL_1mpe;AAY?X>w(P)JfrTX8!2
zf~0K-c9L2(u=ulg#tqtU+7%t{8I#AxugGTDja}yjelf{V#n1-q_j&!dL*)0I;`1`<
zGb$kuouUONdgwftIHI?;HJq}pG?W-()Gam*svIETWU(XOB)L!>;qr8Gt|x9qg}3rp
z@IHzpR<J2DFCLya<!ys*p`TyJVAasF{21N-bS;Rq@4Pl0A2^IsQ`-sW$A#;?&QjeM
zc$yJ^RZkI<waOROCjQ+e<Tu9^ab%U@yg^CuU*7#?N(lji6Q7$oU2GpLv~4p8As(ik
zd=~mf3d?PM)s#9ZPPOtET9xN=xA|-Hq$PGteblc0KIJ@mDwZBCb%*a{uh1>Bz%EKn
zJ@1_o`Jr05QlWB*X94f(+vb|=6M;g*DF@Qxv9s(8Wiw9rIP>E?(P6teliOB%!{pA%
z8@b77$_AFmU;%qiy@t=AsSWSOg27p3riQ}!1?BBDXHol}?ukO96raf-Uy7aeCFDly
z)~gxa<%$Kr@5xPn*M52SFMB-vYmZlXTaN$JeK~y|JCqy#(nYA3q?0;H={NQ&8u__1
zPo&*eQipvfb9`@cQUY$KRKEB?EPh2NED^zysck>-S(k%dQy+UK21}$ym$$XKrv+vp
z^IiAT|8UR0NpE<L7<o>a8Cq#rY;nJF{~LTxq(^#=8^Q@xHl7cJTZLo|t83UNjoNM8
zc_(7~)D{Oo)(qU$I<TxiqF+_W0!<N+H;I0?(C|1gMd6Sot3ffxXk=}N>)kUbq5hKE
zn%+#wG(-#MHSK*r%Z7D|M~Fw6SF|g+`3E2RBa;U`g1)j$E;MZte1tHPfj4D=rk86%
z^QY`Zmt%E5V^|_0LF>!XzFYg>TiNuuCl_P&Pi~^?V#%ruPoGcm%Q|EDb$Cv1ZTiE1
ztu30_p#*qqw+^nX9bEk;Jb&t1@oGt-Yhsvn**pNT4rQQeefshy8SK22=C5L6>zA{0
z!EkI2511l{*H{mp87^H|;!D@Ymfv%6PS$g4&evp1l@rAdrf7qtU7)J!sPy+?RyTdd
zSKjbKR9o`)EvZx7YzFuV_9BlYIwJyE<D;T|PPY6$PqH2v<d{_t=?*QwG*?~mJh)iB
z+g8NeIha7~<S8RdAFG3V^6H{Z5=UOl$PLtNQp1gU^ahi5nkI-BEo<b|55J@1?c6rI
zcI&I5zXdVU`q-dy+EpJT`#W_tg7I2UOE8mIj2-{bihjo%ak;5-wW>c0)Y5A!<e7?2
zbl=MWHLtyQQJO{ux|YxAuR_?iv*_%1th+4at!|}^sJCCItHtXyXZk7NKT-ZgVZ+}1
zBJr|khQ`0v`V~+k3(#3mXEYV{VTl9#MA!~vmfQ0elxAqto~3pTUTXM#Fu-tfi^S?D
zA3dv|8wTLWmL~X?u2ixq@%l^F%z2{c!9Z<BqQE6tc$sGWGAEdP%aJzAoUOF=0pc^I
z+Flg2z{KXMzH1XeiF;@GHSyH?ieQ1Kr%jel&VJk>j;mH~`3!)*e}EV&hoeWpm)5yT
z+j8KJD&Q_5Uf13_f$}>kLpBext~3pMW&|A{^=8A5Xu$`_;|9DQs;9lCwEbD?6XyV1
z6BO58ebWZ>awuyRYMCkxD`cUk?2xpa(u{8Src@>4D_hyPQFrfl{`%-l)-}4iI5YdP
z2+zWCfOYlfgX|=|g~je_yI;LKob@M{(cB#;n;(!o9enn-_AkwHy8T{V)C0xUe1*mE
zaIhgaf7HT{dhM;*R*TXLS7}bEsb7y04-~Ny91^2SZlV9KACTbcf4V4oBk@XM#?&%>
z@>b5M!Q-cTCjY|qC+8<r(fDAmhXs$;uTvYSOpV-|r~zhTQ-aHgS0>3zde(eTRSp)8
zXKA$k8pX&$see>@tKuwAKqRGn>2vnhohqJ_i?lnO$O5Ua8K-T1MR`9ns^k^rkeTn+
zPQ!k5wO?g*%wxzeH9P^;47{AgHc0Xi8%e?tw`QG;q_u>G%20W5qmL<r<Hy9#O)+dr
z4~t$z;z^;7+lCb}pITrGvZaZNypD7(I!MVCl%sdH8?7N`u{^=_E)fYrX~R0b)a9nC
zD9&;X1p3eptn+}EMPr=3JUz?_`0BF-Wcjs_ep#No;s<2ii|Kn-M`LMv#x~0+xnJR*
zK2$i)9$B#zr@K&hfJYBq(&L`VYh}a7UM4mMKKK_~{*%_?YZT|3pI5noW%~YY4d01-
z>r<yvbW?Q;cNbYVr`RNw>hB?oxuV(-nfvc{{6PKLxYc)4#r{@mW^l#Z*_t<WskNWI
z-orjrNiO##F%MeRcJrI(45&<#$1h#A;CNQ>9e3Z)=V-YPfXKl<6ir4Ij!CP|RFxi`
zh<{R+%=TB?JqiZ+DrpS^an{t1!8E75ooKam&<9W&-iLp`zq=#PR<V67i^;KTT@c|a
za#+xRhq8~47z9U_A;-A{jhxioy(|<~bfeW$b$!s&<FDH|>I`bzU3-VmL@KU&?8d<z
z7ZmCON8G<_n($`2G`bXEJQrF+x4aXUZNr>90Q|0UCpfE~66zZfQ8sRz;N%tUr(juu
z@$>*Q!pVKz1`z|P>K|8VG^}}t->VEgcxmY{h=GHrpB8OB75`xt=hZiDp64IEI_!F&
z$G;qfv~U?USGx(`G5J-C?I>Do0SS8f3t@0s=sKdknOOSKEOwkPRX%a49bWolu~sP~
zE#`;UKsEz0VT5bgbJ&rU{jch|-`=?&&n4oqINj+_gMLefcnoRP@UgU-RT_TnhaQwt
z;s7id&19!_FuW`W#qGB-K7uG48Gy{wB&9`f6dM0dB#TpCe63-C%_FBjf9-V;s|jIS
zfrF6}-tQDO|40fS#oUX79iEqhyJV;zvQ`Y@jr;{{&U`QMa|K!M(Nmxl*nG1r#8&1e
z=|)CH5SOK17#)D0E3s-Z0X4kkjr`>XJ0=8VHm?V8k`LWBgWP&_;g?+fc>Mj;c`4C4
z>6n!+Jp!=3KCqJT-EzYGv*pwQ8g53&ga5L`&_@=@$fw^}G^RUQ9tO;$>Ni>a$_gKd
zQd1H5!yL1&)IJ4Q0VtC#iIv8-Hui+TNOLZ$`aMg2e0+N{jwQwB8!ZU5NveZM*<x9l
ze^*QiT3i0+v%p8$=0PN`a2LzBqBWu=gl-62{%(s}??;{2Q}SKsF4hNBe@_J%#3!D3
zFG_O?9~V2*%8G7?uniS4V@8aP{vYz*Gpwns3m6@-A}U4%q${AIqtZK$6ltONDoAhA
zJBW@Ny7XQILlqFDNJl|>?}QSCgkBRuhuniY^9ne3=9}-=eeNHgkdw3b+N-a%_g-s*
zfagPA^m0qwEd3pCYo7H?pNhUP9<FPT(lODjkNu=5W_4`C?WkZOS(w=z9pDVTj49XV
zoPz1eqEw~6w@ivy`$%jhmiMHJ{a1zK<(-U48BkxN3tgU``O_JC<jW&HUbuFVV>+L`
zD$#w|zM6l#)P0uadpWiy#>a041oO^ySQ$nxhT|l!azpF%fZ%6AbA&<bs^Tn|$@98}
zxc+eRn#D=Qu-E+D&sx6vgRd?#sVNUtpTU&6_0<x|TPF>Wy3Y#bQCZ)Rfa^PrE5$@!
zY|_y#cyCxq%j6~Zd{L2kyJc>ew)0`Z(PMFxcz}U#4%%PK{TBViVaO%RfI|UuL2n~{
zpC)Z2JILhsB)?A#I0<Vd)fPch2(8t4tGVq2hI2Z+^qT(NQM!t!!i6Lp@D$oI4Xice
z7fU_)0Dr0hBN(k?{(ihkbx_`^Vtu4~s$9R4Q9W`WEnR_KHhV>|uXJ%B2KRf1ulRtc
zVOU31MyUel?MywTo!rK<defyYJL^~dsQQ?G9mBe3g)Prc3oUmzTbEK&u-G^EmvlCT
zAFYB1_(F54*A?@rG7v{RnsZlg7(8RUq&8%!q9yY@S%ETRNW420DqW-gy8Ue!dsS+r
z`c$TYepUIiiQk@Ie2Py9iA^um^#9O&rmu}n!EF+6zAh&>M1Zk6bx8i;7PSnp>tv&j
zl)>7BE)QvZm*KyT$lZS6+xIDYEKJ7P4siW6lw@=rmLALGZ9*|4i=C6eUO;!FE=Dc(
z*>7(pHkda)0DFceYQat4ww-cg!JnGpR--HL9@|M&&p(gXX>7yXZ9yCF28-Lw_Ma{9
z0Plz8YvIbJ%dyLEWC>(&t9H>A9wMfe+~Kl@pWOKd2Pgc!ab@e=5W6ggbyU@$m1|T2
zaAuWw@N)4lv=BM*gFdsGnPX+?P$aG@7vZ%5XJyP>Lrb>aOvL;1>d%_X*N&zz+|tcy
zp-~_D5@Oi%HQS1#5Gid`{OQaWr1YTqXIuN+Kn9$s@eVVazXqwQiad$pUdEAnIVn%^
z_6twj%oe}=@Hu*EwtQ+d&ySOzV5xnMb$^5{j?Y+Rn%$eL&>NGN@hEaf*E(;}WQU`_
zQ!85ai%ZT@_niEC@@#uWVw|dw#+Qt8ogFGf7YmubUDi$8)9S`78Bx`f@a4S%?X{O!
zosCwPM{k)UvLdvr_tK-P6K$Q4{P961v;8y<WBKFDyM5?VmEF9yW(}grLBG+Gp6A|;
zzfZfkJX&Z7aSP;Tu$U#@`EYZPa-v+{r)$fbDJ}7F`c8YS&FamHvJ#45NACBsY8K?-
zs$#zgh!jHbzS>&kHX5aL9_7V);&)_1Rxhr`v!cYFx0{45w^AexZr3mYr*|n%rS_9G
zC)x0-JbWg(kQJE=6m1FDS|#|5*kP)S&M^!|PpjCRl(2f`M-{)9Yb%c#l4Y3X)FkMZ
zUzlk%Dqiv-mS^zJ){p~5F8IYEnD<jOF00;<)D+T0@}BZPLVqUKF3emN`D6QhhoCgw
zI<1P0j_j|+c{7z!_;ELkt7W`a!VH6DSYy&WzlwB6>lBTpR3x}h<Cm^Xe-z0Nuq?#*
z)d1|s?hKSya6T})<1zi%V$wwZ7o@tKyQDgu+8mMkYC37J;V5Sro@Dn8sf~}Fm5$B|
z`9ikB6Hz+l6&=9ts*aOKdnP(qCQ$D)jHC)qk!RVQ0%vzJqCmc1Nr+P7Ssu&V`Ajhe
zv^^{jv(zy4mkHGEbxTus?vP;<>0B0?R9Y4x;%k+E+$h725Qs4*`$g(o`@r*G%u1U)
zLWgHS?bG`#YAUjdZ(n!yp3Ku+ePNh1ar3he@iU-yYy-(W^SZ)})YVd;x`}ILx~mRo
zywRJ9nOi9R;@drZ|3zX9v9lF71B&>wlK5-K0;ZwsWFEAJ%FnU=)wkB4S9d9MbexHg
zh?{Fp?K65La3ovt@w_9e)0l$|g%xx0`zjN|f?-Af#2c`glbI)ZC@%L1ZFG#^Zxum2
z=#~^gQVgYJ0m)M`w0GC>)rlHp0yU8C_L)A}J9^O%_M~0S-kO--;F&H9Q=)L!r6uxu
zh<j145M$x4dFVaP2-9Bof+mM^Z-23wcr>s2t;*Rsz-76~@;mhrD10BXyqhqgaiO5G
zlUfE<^Xw!q4d;ZAH%PX=KH>%b?6&@;Ex+BBkh!jyOI2{JQs^(i_;tlKpwvh01*SSo
zrM=1#I{)(DzM>~QCPCTjo%(2Z)s4oE<1_h&-U8|=g*rFz=^ZBCk>&sR_l4H=Ze#<l
z(^7R&e8gJcAu$B^I(z4l>G9cXA6$75_~sc(<A^*S2jO67gGcA-8}d2s5+<Y_xj1})
zhirNPMy7l3DgF(Je>1^{^PCq3PfmXL_2d5~6%RgTzIjr!aI$paW3uT&s>f1MpkOQ|
zUKV6fc=2opMe*;!=TYoS-l!xrk95HCh5v@P2PBDRGYn1~L80$M_IH_1&oAZ;DpFfW
z!UZ2Lnquu=vxXmoD8==J&#-5y)i$9&;uWj=X@2S!%F&YqczmY?b5|YmjE~nf?1PG?
zN^IZbIX)B`k6>_kx-lB&!r>gWXkxy#?s)W+g)jf*FDDlHd8HhIq)iK3ZB_ol8_uIO
zM33hEj;!rp{VVYb*q)Q<YMxT~S3Gf;CO_oCSNfpP=9TQCU!2wdTfsZ>An-b%D+5!)
z+g~4X^N;-Q`@afrYxlB3DxOu4Y&77U^pzH-`Xp2M@fcm?ot*3Sj$!<IG_Xkf=3n^B
zM_C1^dOIOyBSGxan0IWH?`!uhJSZdkHi7e>ivwQxz1Agz(9n|mp#Gvn`jcrBS;1jB
zs@Sugibpi&bp-){KeG}Y?#chnw0{xP;17Z|VT1PTksxp4$D2UKr-&zKGBSmq?fCzm
z_E%-N@0WOhlg}WtOxgEE{)bfFuQBQ`ieULonKa?E)o-xP0=cczdRU%em0a%@E$@+B
zfsw5xIC}Xx35Y@Idbhys&vJt1Tb7~P%2a>B^(SXAT?h2S`IH{|<C)q&q56UJ+Zw*r
zzD&=oUo3*I71i2gBtLql?0z=0a>jO#b8sp}%KXnEe?rN%C_KN1ZYq>l|8evqll}Cs
zX#YXFoXg@@rK5j|o1fnO6Ok9>0d~F6iD?0jZT$J&zkT<w->DJ-#S{B8he7{+F}DQ|
zY_MP_v*h#t48}PXIiS9jYMP1UQEB)K(tmtVy8;w~Y*`V^r#w~;`XAr@3vdN40I4cL
zaa8{oIITM<OZ~5H@vj&7|3e1Z74g_OalKiQ`uqnIlMQ<X2~F@H53VTEf~5??tphK{
z7`$sXsC^@OJB*hOKkx+R+^rY`KS`42M}gh=r=V|GZv1hIA>~OS8{$Rm#^6H7*n;fx
zl2!Rg;@0{7_`JEfxxD=8bqSX}`9HhO{i1)w0_O>QCV?Qh-!Gp1s(X&;lb_p8j4@un
z2NPJ%(fogz;qOOrj(c({GErht>}a2k!%b3t)t}_aZ@x=cU#0XUZuGzXPml}9i9T(C
zq&d?bYAEvSsg8jo>T>>p!feNmh1!3DJn`1`d&U`x0-$39|BTze89GJaG<~Myv=<li
ze}TLfLi8%7us1Ui`Y)zBMCt=@x)SlOsFY)><B0zY<OUh|4-|&Y`ygV+20qMHzg+nY
z1Yp!LH~n`1{{s2D-c$6M3!E0<W8Gf=o<0A^SB@C~qb^nEn~47x$cM}UtT~ex6a6P4
zDOVPKD(y0^{{BBf{{Ln1o|*43kNsy(Q=nhzDyE>;lrGM=y2xykGsXq$naP8D(Udsv
z)sSvj+UCOGks`Vk+k1;1MaQY_7!T0arXp?X{y)+7*5zx|L!O$531jx4h;SH@BZaB@
z?1~a&l}SRPcso0bMW4j6A-cTqP)qS)WdBV1N&Ma2MVD>mT(zAp*B#URs`Z!2eI0z(
z>kS-Nj!9dsC&{anyUEMJmjBoh(c$`S!FMR#;)s&Ak6j}#16SpxEJLW^;|9-;$x%wq
zfoj1na`F9>YUyITz45INl&&tpQQ%T44o2D7;}ZC%Q4|<%M}HgTl@8spQT{b73M{u>
zzKx=xbF4$*zlY^^UaijCR6}fpUD@I>E8tiR@JL>D)vbT*0og#>l#>}@QA`+uFv|9L
zn{-$8&fcL$Tn8L7MhjR??WH$v$GP$FH@@cQE?lzOD%HR~YUR9SEl@wlXiZQ4KfUzh
z*Fto_c&0YReuvvy9YrZ%Go5{hwGaAbEtYE?8>Q-C6qlvq*nb@5ci&-+gHfIrksn*z
zzlX)pH#YwO;f9*IM-{&BI0&%q0vmAIW8)wBY3&C*j&B{nCJW*Bj!NGQ8DKJm9CG9$
zj<NcYp9Zqu-cwZp$W@&!bYzC#{ttlp1F!cRFhpVSV_lyAJutE?x3S-VDafq)zX0Y9
zp7J+f!tLjdnb-dVVA#60zIAHTQ6@Yp&!Wrc05>Sn!#e#am`7PJh0YhErdZ%EEg8Xo
zOclCI02Z!u)Pm?x)4roa98fF~U$)xczh9X@yf5G=NxK99E9*Y37ti>sH-83jM#CWW
z0KYwV(t>~f{y^+~lMXnKqZBUoPn;(@jW_v?e>;e=$2#Gy1p^9Wyll`!`Us;PmTv&k
zbb5uNgC(VlWpW$^JjE1nTGrZfxfPFM{^;UwbNN>KOK0~B7fv$dWGuXUcid6wVgbZk
zvyS`dB*y@}En#40&?;cmAHiDIJ;!KdrgBVGoQeYg<9s-DY~9C}*KmH}o2oD}?)$|k
zr?=ynOa2Mbz*=+h+=wIh#bvxB2dD-5%H<P}u3z+tC}0pLypA#YF;y<hpr9{)6V?eC
zYK9!cs&5hr!^YZzT+;FJ|HU^34FCt4$eR6jY}|juIRL*c0U%3@JGSdbS@K`J{F^Ts
z@OJE-*C&o6`Coif#R{-S>2l5R;VzB;W@6wsCIDj@H~ewt7^;sk!!JNtD>)F9YMhDu
zF+TifT3(JR0Ny4eBL6S%Uhf*g^Qsc^dJjH45rOuXkCwLD$`G%!F8l!&(i>Ku+TyuU
z>)tc)iy;eZL!3He=-zNOT;b$C|36vE0}QSO0x%|(_1`+I3dR@|q)}ciu(i6`wp<nI
z#t5b9#sManOQRe8K}*s*|I7uxY@}wM<`&WNy=#XN3q+?0RnAy;i(+``w7YWwyk%+=
zS249M*l=X2znnuO2H;|nDKCfq!;cLrM7jF+jXjJzxGiY5XW?G93GcsNJklTZFd#o<
z=3NTl5VFjSOivsZbx~=Dssq(m+4{@Z4wL27Eg*2c8~bYS2vjG|wJjdlC(Yk%PelKV
z9<f8PgJ6vDSVz)fP)yD@qbsn{$Z&X$UvZbO4JlwJ7F~vR55W#->?xCjHmhrG6ixqV
zLIC`5Tn1KkQt;(rPV(KNzdiD;A*t|~?%?mg0^qQ4#vp)IO>OMiIUNSwKpO9T#W(Zv
zNB%MMa(vj7mjI>KR@!--4ymviVc%h`1BY*HWBfMVp+~<I`dXdyYwf5n4Y!Vv(79Sv
zPaGB|Egv%+o11i(Lqzg#6pnAw)y?=AY+}Z3%-0!#%LS|xQBV<a9TGd`ddC)TyDmgA
zC2);Fzo#diU*t5Coo2peS&_WtUU?L^>UZc2l7IwFYz9w*@nLVnclJC|RfQUe3`=ET
zvp2IegpAA4!G9M+39+v{z3e*5!k}~2$dhg2JxE>dVdb;bWdZGNeHtfxCRPSaQCzaQ
zb(K#}+9E4v&0Uw|J6XDD6>PF{4%74mp6>Nm>WZ=ZoPe9@B_Mh9>3713<`W1`M8u<+
zs9V}jEfK%05t{IahSQRWGgL7u%(a`xbA6lPZ-w;t13v%=A=m#%JnyU}1p7^#=rE`J
zMX;EsqWad&9w9<*lP)ya5&`Pl&+8mnNt&1~C~53${-{qjr2Eq^_jLn81LN`0^Q1>u
z(L~+A{TrJ)FF)-*BnW@;R?6ThX_$(is})RX>vI9vb`xG;Td^oyWukB!(cV2kuAQ2z
z)>WE03pLxdsh>+tg#R_n)Js5dZrRc3oIbpQ7$@JXyo10&v*z@fUwa_FdRH@H(nfHY
z(k#0&3|IL;Wo!42<shP}d)z30!+(p79ctVir4CeB0iTtiBwdYy643g$RHd!f(2<ZX
zQd(Eg>D3HgA(ih!u>HyHzM4R~@oqrOiz8_mjGTeRHxyKli46Wt3V}c|B_f=yBw9&2
zG(*W<qO8Vq6~Kfb_N%3B4Q4%&JNw={;R!g@31l0>%QWExil^0TO-ubsoEs#EZ8hAn
zV+D%0c*p-G*$xZ<H7?m*l=}<G-#$1Oi)MU-L><96{cBC{YXi!JQ88y0wPh(W!KSer
z8+%hj%G{pFP%y$?q1C$|<onfiYXe%D+ENmley66I(So_LDDy8v-bMffRG10w59j_w
z%XiY;cEL-`Js?S0#Yu@HQjgIxFxMHIbMtyGusor$AZTd{U{JJ_K;>4P!Q0FpeQ8+q
zS|o|S_d+wpxJw(HVk0rrhj4jEylwC1X$*oHuBY3T=l7@gWk3q}Im4fMYG)6dl3E=C
z*^&c2H?@7~+L4yaZQ8yy!ORsb6PlRy^Dg7h3x?bVI)D_UFIncmr$1`}P+!iJi%ID@
z_3ncTQ&sk8>4HmRQsrtQp(kcrD1LpI62!}@C#?Tu_0!W5iQZnr$<uOn(G9`)ddoCB
zkiboqdU?9MfDhagpP)2iyO#AvwA*y)+DjB?tC=Ti=sH3^+@bX$*-@+Cq1_iYRIFEN
zO`LdQD&fgF@e)KTj-2nD#NC(}g@#BzdVinp7V}nF&+gW{hnuBoXV*T@2he&`+IiKZ
zK>8t@jnIS{0mbI!#Y%KLDP4zO)1b3>eJp5r@71~I-%&J!=ez5B@5T|uTds3GKUGNz
z2EHoaJIDB$P3tiBL<i4PQgiHx)*DqgzetpSYsnJKXI>|x#J8xL*Z)G9iSToD*o6!w
zYgZ)K#p!h|zMwuMpX`AbGAMYIQ)7M$VKYRF+O3D`<=_iunVpt95K8S_u?vGQNU@*i
z6}US11UW0MXIHu==2mK>zJ86ANpo*3`T8E=(G?|icF-4qTlba%6AVJ6B4MH@-D;>B
z$-|<r=&t!T6FrWlID^|ua9OgGj4o(<#82b*j6&*4klm%=pb9Z0{mUtP6&3p{Vh(OL
zDq1^S>mt0|<20*JB<{D)@26oWC$K>vb%|Y>`Ycdl6j-v6h}OTOz@-RLY@SluunvdH
zOV%gn6GcT9Y-~a+<Jw&PSlxEHKpO!~_+YoTgcVFVU~{fU3J!L1E`H!Hskyp349(yw
zo^JOfMDGqr2-2tayQvl>URm9R8iU$z>W@4Gh4?pNnsQJ`haVn9Egs2?cJ1I&&tZK3
z=6mThnu&=}yJCN2RnE#N>>wDukz(_ggn#>x;&F<aOnECA`gx24X5RTGheaB-=C8y=
z6+4kNqa&f0YWzmawkdvOK5{!)J2flqRHSs)ZnKh)yKCSpX*;WZ3v8(V1WZI-LT_g>
zb^*4xRkk2*KC9P?zW<I&DE<ob*#vU0kbti*mFauyof<6nZopq_F^Lz-k+T)kk0JN8
zZOitg66rruy7X-OWJbymUL&Im(GH+bvfraDet=|b)Zp_4Cs#yu>ITs#?eZ6GO`7J7
znHuSg{DF!orU-tERVU0ALql@mq(+}D167-)?v=65h2FlJCAf!m*Qeh^;>IHOU9mSU
zL$1X<ek_20S>rL@s>^cZqKIwAW9%xCv+?c%pIv|UzB;uBdfNqIzcaO=ySe1WBli$$
zv$&lWqoIe7?25t{zBQMN4^o!8R4z}MEJBJB3xTx8&1~v}_{b(<pknB%^!_CJ4U|Wc
zb6<=)-!{!l&MJX?l@#PdbZ2~1KFEtEaERTI>E@20z{Eq)>a@F8WQ;4Bvi}8#FeyuW
zY0N#DRC_On`&QGDy46IX;YNUX_SS>xmiRO!cfM2AcZ;>FTOY4&neoG+?(}W8z9ZwI
z!CCqboz|SZ_49J(4Si|)lYAB{USMis7Z?axyf$`;c9PbC)u}I<m53Wm6GZgJR@Cq%
zmSN-<0)OU)89v|L%W24n(2;R#5@nw=kLXXlNkg1kO>ye3%O;``h+I{MgQ>@aGKCE$
zvzEFg-AYBewGn2Jaw&5I?1S|7deSTM8&NVLessyjldE64?F`$7vPHNB?|N312yK~Y
zGoft1CUf3{^+oHcK_a84-y}XDAt!TKdjxxx6GC+|^1b0+*rZ^$ZSvP=A(iy7O<X<d
zLpXN=<*>MI9=P)l%gc)o(xuE9`lajag57E5jj#y`wM4BWf*48glJ|IhT886-6h#s_
zaC=1pPu?rA+ir5gXyHLdR<Z+<vP_>V@<opVmdG?u7aQN)AY!t(UC}s71KN8b5$3`a
zdg`j*y&zRD@|-<n{cDSxS3D*>?Sfyk&JLPZ8KLCDgK)0~b>2I!&V9}aP+Qfkao=0H
zIlK0{$l2Pd&~5>II{;4^7dz^R-o!8P*i8~K?i`D*;lHxEU(D^;`?5pFV`d<ot*Rt8
zoO55l-j9_?gnN$0+<d;s{juY&bG=HKa$KXgoeEoUQ%+BiI+{zwvNAEeX!=xyPf$}%
zNl{dSMi|bs#V1>~^sV1X#N}0astb=r*P5C0WL41TncJIHJS6tOpqFxv3LmQnNsG*X
zcn>76Y6FGOv8T9|tNd(97q0k@K%^`uX-*)tMBFCzue&ry@-q5HP-|{0@uzK<&L+T(
z<|zYur`EEoqlGy4eYakFo?Ec@-P9x#;lLShl-763{V)Xo^Yh9@S-p7Ga6@3F8{65T
zvPw*<%%MdJel<H;Vt4P|Oj-zfsgb4^KrAO+yxYECX6=;$&hD31kB}}#xv4&Bh}q(T
zJCuv-d+iD?&Fqh|!v*xn=xIa}YkseqjFNMUjDoZjC54jB>BEONA9gp37p*tv<P?<<
z57M2!MUsb~W?wmF8yhmh7bmNX$SdvhA!}atODc0H8&&*Wi#~`_4}z*`YDC_>D>0s_
zK2L!K>O$(#)!b5LE~cq(eS^<r=R!lQuI~dCBvhpFSqtXLbJ^j;)Y?sq=KHwqeO&59
zMftUajk$Q(R}$5{QpcEUeg?D3RWzz^Hexz5`jXwKb-7GAKW3<TU}M2=x9i1>roBSk
zN48%^Frx?q=~n{jJL^wRd6d%eF<kk>BD5)dL~^@dy>n}{oJh^ft8kUZ9^HPHC70D(
z(TF%%(8lX|^0gen@@#n_bA~G}E&U1foMBDjJzs9lLq_+Weq&vls5jMPM$cjEs-6?n
zm((YH>bzDh&6~S=KF+Ol4a}{43H$n0+osHEgqVicbL7$Fi2ikjmMn7)_Emw0%dYZO
z8GSVy^T9%Hb&NchboW-N^_;R8V%fp5vZY9x^E;)Ko|=6KqSY!W1+Bf|$UTs}Uy~oL
zoA|3KdruW%62%C?HT7=2h=jr$a0F<z7n&d%;{%!QmxcJ5=?c+<Um@v3rjqHls<0oS
z0~;Fq9DCuQWWNtg69dqMKo2vq>R?idqJ7QHr)Js|cG~riU|7%6#uKZ`JghWAD@9Q!
zApYAAS*^>qC!xF3ABRErL4<y;kos`itKXS8h2WHy$YaNk5wZtX{V)B6xu24hphPYQ
ziKPK*)t-A}8i@$)AoI4;oxn$hz{aF`lyBVjIW=Kw@|GW)sUd|L+Hz3L`{U;kPMu@9
z;0w4Ko;V%7+44zsK^L}0L{ZlE+ncRtqa$C^UQAKSzHp&2Kh#Q@DLPeH!w-9p>AqV9
zx%s&Q6)8qNx)T}f)Je+>_~~k_UMy-S_&C7JV(9z&^mBpqt727GrMR(glI9+u<;D>Y
zR)+0&(t{G)1j5d;vBB07B~m+@HUUc)qUodHg-pm&lV2e-?;O0Vv!>uM!e}&_kZ>2J
z)Y^Q3Dx$kS^^$w=Vwk!B*@%)(05UvgzE36tUvSH-=j#j|zFL_>2XuC=oT+#$bwjaR
zgoZcFAe_g+y!;NGIhO#Oc6E9;lS@P+Da4F9AUohvu*-zD<O>?I#~z;`R>-JOcdLFP
z*Hmhbyn$`Z-1+cZnQw}w<Lt9`TRkTRR6XbFf`Vf=GWPAYBIuHg^h=hjX{u;p(E}sk
zP1I{ko`v^sM0Bs!fL~4+&gUS+?#c?0F<heXw+vZSqF+HG<0^G?^6{=>m-9VZTf%Q$
zE3cl<B*kU>U_ZL-C}5a`3)qx|{4yw+7yBeSBZ)jq1~Jc;`+JrL65pYj!-JAHMCOoK
z`C!CZJ1Hae)vWwYqMDHGP|7}ad4|@VofmYcZ}+&L;n{Otq*f;l772T+{=qUXXeU}4
zh8xPjy~4fkWSp1Ra@lv0m&p*RN<;Yc3L-oO7vxWq7OUUW&X8*?a;)T?<5Q#uyK_d1
zh@q-LZlA;S#W=ymVuoh!uJt$x&S+J|W-0pIn>bll_j(koKs5>0R>G4jl}nK>zprQ4
zfquD0Ud*9cihYZp&B7*dQ6#uQb9LK?&R(f$2^16-E|UaOl)rAp2|>sC$fFAECuG^=
zgI#~GCv-3_8WqsPIF&{bO4f;pU*KMBs(D$sh!3))lkh&*@Zv{2%pwcKMFg1YGK>Fl
z{V9^CPmmGMA4j_{kuPfrgQ-;a^vXZ1L)ZH3_cu;HuN5T9gc;FFp<*5=@i`>v#<m`H
z+NTu0kS=RD$y`V_g0ukItBH`XR9IBda+)8r8M$>2nffx+6h%1?(O7*^<h&RkT)rd~
z8M(8pS>`m^)6zq{7rCDs`JgCkO_jtp%W9rkoi+e`#eKpjv`ZPMj~&S2FMHC&z+0u1
zqo*U^*UY?(j0C!5q^XeSflsQ^F)~oE=jT_3hih73>628lRL=N}G;fB!!pys_;EYyB
z?SvFF3jF0ENWVAFJgXC4MW|!_kSTC8LwTVr*o^O5iSmF&vGtwxi&#6!D<@09XZyo#
z(b*>o@Z{!wslc_!KQc?wv?VR`qb*UvSI!yEm<Ljy6;GAi(HzlEe^uM-zM^1xU$JVS
zlvPbp!#<<I2sILJ9!J9wvbfkEdg+eJVqEGf9KMdI5xMKAry=6qc%4p40Bbj26Bvld
zobHyH9a-&Bd@4=9;>pLxoPXwyJC6j>GjwL?g{wG3wVpWSQa0k9y^r@0?zNk`B^h6K
zvC|axnu}b*9#P}i^r~(4nB^qgso%v%*qa-vD(wCmNi3(;s+W6_YG4EEEm^YGo)c1Z
zD}&RejGHMxxT*PSJ1Qo#XnzaM(>;*T6t<&J6G~*jJuQ8&xdxF$WanKU2bE|>@JCG#
zZ#EQUa5{5VRo^Yq4p)wI9^_#U&<_dO42hz$)S4qwWe-m547_GGw}vQ4Oe?G8X2K%4
z<${uf7NdFk<9Q|<ecZLhxOstE;MU(6&EP9=q(;+qqmB5dOxnBOPND1iw>(O&;$akl
zzz-b`hpEZOhVV3Uah)0`r!eJa()&3e0e(N>rszY_=a}qzu&Hy~5^weLHec5Mb$1L;
zXJ^Zb2>(MY>W)G%AERl~J9OJM-o}K8Zq>WD<i%X!q|B0(c4Ef;R5?rm>g;(|AC~4G
zSjARa4e>e-WTQ}N@#?G1%$vwaPifTamdB7OGPy5)>wt?0X~?>y+8OV;rXiie?<l7a
z^RZ^kp9l|kMYjmUG}7oAEa+GIM(ojY3n4z<&`1M|EkUZcNmqPCRdYsY6K4`a#ZsbF
zJxla$=~wH|iv;z1Nf3pWINI;=-Lc>-P77ZW*sjlJ5@4FNWgQ;Xm&$2Gh>Ry~)@7!x
zVpg$Y>AM+&SEV(AM7&3nuQ{DhlPj>ya&)C{@thiJnIG2?*y3UbgsfjAe&QTKfd{}z
z-lj;?>kfu>DlLhr4)Yhw<<Fk<WZp(1T!3DKG^-U*4mzz?{UUC$J$<?c8Hl-K4IQ^I
z`D(Dl{mQAWJ>z<?#7Hoimv!YR2}QE4$}Klt^>;qv_p}w2A%D2rh&k9xE7mM*e~6;m
z(w7sE5_I!f^UxK8Tvz&@3iCb91mt^s$9bcVBz)4DfqbH-a;v^Q?5_;Xj}N_<Ke$k0
zW}qGtqeg2^rLOISXUDIF)?l|QfHXprb$I0NWdw#!jmJrly(ED8y475v7p8W|eS00=
z)WAN0YLv5PHrhE2+9E(KkMxZXwP7^jnf3FwwCK_fp6Ffc;T<k6<k^q;flUbRWSURA
z6;L|bOHhrgc5-6gW*k28F8k?A%(S<d2|gPZx(&%csr4l0-J7k8P7uDQ^pSVat3zO;
zi7+UcqhpDT2X|l>8n5H-PWm>GeQ7Js!E}B}YaAAuoTACtz^%r7j9wlr2UJaZb57}n
zej>Ls%qR(3`N~U@cOmr1(Z1$pTYcxrlB(W&>=th)h#Kpq1?l`m`yU|5q}ZCJe$NV1
zKarixli#77w_hH<>X59J?L`Ns`fNhKzwn?h3ykek-)f#ue?!%L&#gaMX|ctOnT4<0
zbm#Xw&xK6JuMK+IWvRLEl)v7X64~&0ef6dUvh&gwrd>s5UuaM(iS(j2o&D4L^nt`Q
zY=VK(nr3#H$G$AqwEbd%m$eu~aQ2NSKmVSa801MA3#|{^j+<E8C>Qr7KVB<N^mL~+
zC^16hs$)y?2tT8`c*ZW7Q=z?zV-vn;JIYdV7P&FmFtbq&<=b%^@>Kk>OX^z&Y){|i
z$&)y;RSqQC6tjALa$*L1AKU%qczyd|V0wOm2;7R;-Q9(zp1pzL5t{zsQIC*7zKlf>
zuZ}}^Z%?|S_6sH_Sp^U@SRf07uJc<q=M(n1@8a;Z1tBS<-{^|UH<AnnRM@AzMTwhw
z4n57L@pEpN9_VL~KJ<gUn58DsMLue()oducWaW*C7U*ir@9(ZH!;1l>l;U#Q&?%Xt
z7fvyC{!3V~B(naG0TE_XtmAyJ{#Jx0)w?7HjvTtBYj2}Ea{DbZ>RayV2Q|QhsdQ_K
z%MD)ey?(=B2TPfcNl$oUZzHz9(5<6)CDK&f(}lB|G@q~R5z|c{5}g72TEf)vS&_Eg
zJKNNQ_MQ$-x0(26StK)Vf|*5VQ_V&bJo}|)*E|W?uCe3`_L?{2^IuWn@9zou=3}<q
zKS*k>q<^6Q$$0;jh5qS7j5;3Q#|dkYIH5$a%j>e`W*~>KiiGaId#K1L?aVmG6J8RV
zls2=uD{2aXG!qI!)X=N)0j?VLMsy9@@ZuT4ZlIM#2XEtBy>}lEh6R*&;M=*o6@ye#
zj3RQy7WC<FZ6LIXW8rPf-OkJ(Dn|(5{MPO+j#hHD?Tu5N+*=MZ!8+*aD2Y!kghn0=
zJP-vfb;4%d{MZzn)e@ZjjDRO6TAJA<peeu1I=$0hkJ`vl$$v^@eELi3sy!mXRh0>z
z74+S$j=|w^>~-D2na-U5*%BN{8-81R?b>O*%P*^Kw_j%LG!)c8T_4n|a$Ga5rgBH~
zl5%#Re7@6oN5r4^*2JeDIY`Hh+P8TrO4`xcvq+6QsKBNAIajr_q-zOMPg|x9D<#E|
zs>-*RBpW&?^FuzcPEdd{!~<PK*8RM4QF8+?G_$9tMUp|QqtG4k1*$vhuwG&nlem;H
z_4=|qsb&dfaq)IP(k_3KkUz}*K$<u2$_L4|slCaz8F2SL0#p<D;pT)Mh|u5lVSOQV
zDy*h`TLwfJGBi+W1h&T6CV=(T+1$jV8~aP0chRv!I;ClG-iV^H*(u4i$!R)ok^({6
z{3!H`?m^dKJ?D^ITwoKv0AH-JX>{Vlg7?!%&x84Icb>8+R)TT%VreX91?u|eq(lVm
z`iC)A?iF+P)H;tR5F|x%uvt_$U;ZofIuxSzyfxX#{_cZdc?smCw#W$aO+OuqJ(G%E
zqYIl2&}SwdU+fxo>UbA(+dYZ_BUApg#4ir*&v8ZN)|J8^Ol)+(#G6weIDcYdAnl8*
ztIyj6Z?c(k3H|ioJ7HPyS^Vs(FL%dTDFv03iYHtREu}!E4sBYAL8^I$Xu+kN(P?Su
zbf}NH&8XB!biJEBvhYqS*S(wI5l`CX1PQW0BR%p^njWi&+=5jsbXmfaeu9QJP6mS9
zs({LK@hrVrEGrt4QhrQ+Fm=o_-m1K4uR9z2{&1#Rc7HXu2D$Lo+esGzm+DRju)aRT
z#+p^c);h*e$R}#Ypgw~xiD-2Vn(#<FIOeUO2tV=^DnnwoAj@H&vQI0jguCtCeiZLM
zk})upNX(mjwH>NVm#+MRUoeyFA($pgM1H#8l%I7A1N+lOSMW9Wx~!Mt;52-cLw*WX
zzdcG(_Enc`-0ga3*-Ut2DpI+DneW8J2@oR0%<C0ewR?RDDhC=<N&*pW&GCklLDzRz
zOd#uSCNjyziU?w0S1|hZ_pN>t{p(lcn6^Ax(iL7<7Q;#Du-FI=h)%4C41>|>b?6*j
zBX_@L4TD)cq<7Iy0ix1eAYY%%^?AQQNUu40%l@f7rg$$e1><|#y4SkJuV(UQ2C_bQ
zcfaiM8g?*kM7`0HjZYETS9#|p>c!InLcbDA0(hfkoLv5D$Lk*lWMb$5u5dVIsd1F{
zhlA_O8eMoHY#+8OX`95F8H4WI<F#1Qb=YtQR!_ZTPNoV~*PMNU4v(pKZDwr8Xj4S)
z1|cKU8}1tkfPX_pntl)dyL9njTD!44ZvN8rg%{Gbw$~n8Z&H2p9LJvD&Ux=<7dTL!
zJx0|XB@)o)Tz2QoZkrjhU>O)Z*+Z+o8<dLzbw5TxF_JDi-S8W8%5oogd4rpiDR7rV
zr-vC(L$2~^J!|E{B@VP}>a^UP#ice=ceG`w9k^Rv@tNFCM?<8&)16Y^q(B<FUVdNQ
z&~v=^dtVJnA)mAUQOCVKhdeu}r7q%U`k+@k{q}Ze>hj?EZ5s8i5{3A2_}Aw+?rpEv
zDa+yK;hf(#lzmULD`W>rwXno>mH-~b2NeoI&#_hJBz8U+)XZaKZpa<A4i6669X4zY
zo+~Pes#oMYfy%@WuD-p#r!A((in;?5$G`3tC{cEo`vrMq_n<&&A2=t8n5rY7iMe9*
z$|?-1V{gB_gw3U@LH_aTtYiX-qNBBdd%EKT2V+MMv~2d%XSdACvkeeq1n>q+eg<bw
zFjvT+yugqBIC)?nzpa;IvhbUvAEEYNsX@R5MdOdMcT`rQ7EC4fMx#oavzOUsRx+QK
zJRgdn8QA@Jl}%8Db6AUd44wk{w#iujdkSaA$@Su*vZ(B3Dp~DIx*JiE>>`t!V*%7t
z%jchJCe-_U)(~g5@bo-04Q!;;1T^K0>t0^Xg+i`yp-q#CXtiW8=Nb9<P%TlH<OAIu
zlB33qvR#6-^zOkF(b-@=#hcMab2Y(xH^Fl{{3Yh2a3g|sWc|W03y$cS@v|v(Pi9fx
zlitc2x1Hzh#3SEDx%Sh+rG}NISmq;pk4Gilw~W|T?BhxeqgnjhteF_E$rM;P)ra5K
zaqj+uuEzIpCx51|rS3hLnznvUy2}Nd>|D)_DAjzSc~%eonlE6!4Wc0dNNBk7e)bm?
zH}Oz?k|Gy)1d$#(`&Q62c=_rGA&}^+OqH@Ur+>u>*Uq<ATyVW_G)<r1$vA}T?=DG=
zd|==67;KzBvo`TGcnfawew6%K;_%%s!iqlPGTMq<`ja{S&SpQ(C6EM*PM#e2RA#7t
z_z2O_)d94hQv0HJx=vrSi(6CK`-$ECdl|cOLj+Z_SPDcwW6WdkYbxKf3}avgCe^v{
z?0y@->9WnebdjoX=?`L@=YW9Pc%N5zB9;u>2+jA~w5~@;7DcspuWS!AFb>GG&){C0
z#k2VF<`<&do*H3_xuZ)Zj8n>Pl;~d#RiBN|q@)r+^z}S6>chDdE&4}!m2KBPEg4Av
z-2E!t%U{?-j(d8bfIrJ8J*Af-b+PH`x`eWK6gCmWOn&i4MC$vT@%BjTdsf-g>PMoT
z<DBe#js;Or=4F|UPZo3%cbQ_tMnys&@L7vltqF47QCHbrW!7r=Xni=G(-x>HwzMm>
zX}(x3Y<x!|NxCTOn2t<8;lPU$@0qsZ9ptHJdhG1yo}?oE6IKdX42{M1bsi@}6Y3S$
zG+E-5Str~;y?Z)QjZLTu&^(<Qs>Q5%y4{Z%h=Gz3S~ArQrILcLjT<j2;_PHbP>MLw
z`uR9`bp5(K9a@?;+04S$N|=p%pMw@W!x=3vX*^<9=hiMDQL0awj|kp$|5!ch=vr?|
zXOEUq)YR?ctt*lLyviP621y#`m@}0`?rcN?u8Ii7w56w&L)xFg|3l!Ai)PG5yXsHD
z|1Vv*QT$Q0+m7>fPfzQfk0&R*%Dqf!lgHAvxGOrVBGX-oDK%#P#!M+Wr!4OCjA7e~
zU)n5YOSN0w%}*NsicNE{2Mj_rCxH>F({$ONH_P}fSYoe}&K~bG48-@^wG?`vE(sJG
z&Z5;?k@k8<Z@M6vqQIs11fLXY5(NC222M0^OA7K%+yT`5Q?n)qn+W~S5_+%mxA@Vf
zg}C5Nt@>!XJijL9^tXPS^H9MhvE<TAetYkAv+o!{5}F4`b_3`9Gkq9oJAu6vK0d}y
zR1DPXr=)WnZe{uDYz*E}Lw@*t1mJ=ch$`M~=`jy>H~47BLtMngBX@I)PGqQ*xkZeS
zpF~%l+uB={V?&chaZS{8e2{_QW2#;5cg7Ue6}4}9TLVIyzNkb?Kt9Ff+Dl(>d<4XJ
z=4@LWGM3ty{i(%#36xj+MY$1|`gH=j>T97y5^=3wX<<gGVMOW4<OQ%SWoK7!F~|dR
zR(Zb{lt8;6xl##Ac;D=op=$klH#zI~1#b5)gb_JHf;6snGi<%rg)}w4{t?mZGTiR3
zxjpwgp7NmsD85Og8kPK!QA=E0FJ>t@iaBPgo}6m&7-h}%-b=qzuppJJCCc$Q>yWAd
zPA3&TZHbE#PE^6vL3c_V+Yk(iHCyhfJwR4KY!6-CAMO;Bwzu`LAuA#wvv)i2(?zNa
z?=G|O$Gl+sK2R;^DQ}9#O2&y5XujO!rr)Jz=EB|k^`+oB&W>u&1u*SCx1m{raM{*8
z$nA9ayo2%ioB4uhQ&0^%@2wCwd$E04zDRW4OhUM!p3>tkhx2fgv{{EcO_Ox?hO*$B
z6-n3Z)WC_@9|_T^4AFW0{kF~=N7X~i5H*0Dk~Z(I!rjPqpG9Tipr%f)01kS;c9pXl
z_$o1B*(r#!?nX|VlC1RU^8d^m<;q_go|@+#SOG?`9n#j%hK-L8w&nY2_qv{CGh%ei
zd^oK)`28q*r!^BQ<!MJQklgK0HL!^Lrh`N~&m)QX)0NQ4I5(#<3|%$ZYBx0D%}Nur
zLcEeAb>_V$r;AQQMS3NFGZ9^|Uy~F_JXt)|&10BS8%MR;1+C<75v_lOYV5uN9zycz
z3P?@w4$qth5eD*_KCK4VSMNyf8ax$t8(ak8h*qaTjbb`vfD2T~B%{(>hhYzoXuk5c
zQD8T4$Zim-OpC?_s<Q=Cg#_?28dmObjTgXwL`QD)T|6OqpO0jQd=qae3{O{^&l=(A
zuv9U)i?83`O*G=C&}2X2AsQs%@1}lnM?dq}8ac=BcDcaL)%)7JVx6%b1<r9BR<B#x
z!Um~{4j+~?uL<}tniSVL2$rWH*tOv#+2XC7D&K$*l_wAsFiAF2kKE<U<GSZsXxW?-
zfcztd7$hl1Nmg)+)@NNC$dpRv`7uZ@L8qiz(`Q17*zHFJXu;77M4?1MxT|)vX2?#z
z@SDwI(e7o}y}e|`Ab8a>^$$UowA^AkoaBS`mX^ch7hAO`wC4~3+bS2eUtCje$Cqf7
z1Ww`X(b|9Ncnx|6;xnHfg<@#QQH((s^M7#guU!UTvh!<H+e1aWvj}OJJ!~>G-9fhT
z+5FLSrENUE$&IHkigV)Pw?8WS*wvq<<7bpf>lGn&+?9|SDT#8=n{secLO*-f=+sk?
zCEtBNgs=Ni_DvCuO&*Z`!k60q?&;+7+g`;m>(^REviBpuduSRdcodW6nCV(d2UWH}
zUxzleW+?`~f%btX4!n?cAxqAw70wqwTpVXut>b=}xw>Ay!(r_Y90iYvS`>mIBRoF=
z8ex)v>!|zpF6R_?qaTJxvNWnxtnLwlcZ_!mRZ!pqIM~~;^u@~98pZNrq;0?Oo8p3$
zBq9@jITLfrq#CY@V&tQ^*$AS^6iBtM05}6lA3252$V?a;y*xrNoNCOJeONw$*n6DF
zY`-LWJidzTFpz*Wn%qk%!44E*O>x{NZ(fUd`pD`Y-nu49sP&lePad-G-;vmJ0Cfg^
zNHf-Vz%nYjSvu6stbbm0my+4BR;;-}8#2bQ)p`p-w%P_wC6>3ZskCny!d+|64UbOn
z?9~c3vz~dE6w*4l(g2;~Z4~5#<K#b9hY(Y~f5mz}9cXm<vuXe&yh|H6OY1)G9nX5?
z!R@nn#qB)lx%24vyyp)+a}2F5k7*?X@qyly?TV{(1i4Do=f?qNAQcc%Jw#7ZP92Dy
z)vQx4IdF42%FAsxVJ#H*3=@~`b+&$;nHJ5Lj<Ma3k87~75@zSl^XCQJghnJrm%3Z3
zdC=}r{d-#br&4K1yK&0Z5@_X^jyFwYDGv{)HT$E0bo1kvFxHX))+o&0h-iyBy0#<U
zfxF_D;XAV!+BkKd=1VTYo5o@gWK%wm<S6rmH%NxoUmDW4Db*cpwqqs+5z8uC=w!~^
zb*euN;t^jU4ejIz_nBvx7G(S!<Muh4vJA;5^sc&pm^~!G#*H|}%IRVKM-XAK29=_U
z9(9nmzq8oL@1@sFwc?HiV`J>M!)uD>ys0=2o#ym)yLGu!dW}6~`4Uy&GUXU|XP)(o
z=-yJ!DPsLMT2wJ#MKLogO1s7PVIr{Ct4TbuL6$-628#jH-Y>X24c=W#6WKi*P%Y|w
zrjGyc{jc(S*TK4id_d+YSBlBa`)rWJcv7b4b?2!_)dptu#^6p#KuHaRsT;eAS{f<u
z1rCh^8Y$0F$^LHpO^~E)*VYoz0)?%5b;sGD23M~McBdX0+G-xXmuAan0`SvKNe{6G
zV7^m4fN0Edxr&S9XC=#G=G)U#rG$7GBJXvUCVYF{M}dhI0gjAXNRmc{;v#ZYUn*JG
zIhoItCue+}_l`Qlj+4k9S6e%sl6E0LJ>&BRt}&t|Qe}8r*qVFK#ao9-JXs=Z+|!)A
z61}4=GF8aV_2NOm(%!C$mRs%m_Lt8%cGcw%g?jl59*$a$NCzcET;m&b%Sj^}y6HTd
zGS(?Oaq``<Qs68@a-u6_L~^{0!WvV)Mv&z`Z%e26NK?x(BqL)#M%_4{PGU(jD<M1H
zHrQ>K?IOa{j(oHKZ0}0Yw2SA*fTf+VZQh`hXV^ByvaKuG#wp|<$WU)bsSIyqs?2Ev
zZ`lN{vP+#L-B<tM#Rh{bOui}^Y&k)AaYM3^Yn(%My;72C*{$@NkrZViYd<&Q>Y&TL
zO&{<@z4b_y5l63))k!Xv@=gL}bMbxJkx-?o*)Yz{k$leVL;G1kV)gtk6PplN&+`Ut
z{`tF+g~I$IA``7tlxt6C-rt>I=bssw3B%FS(v;?PcYU?gi$uMJCJZU5pq-G&cuRlh
z;1U#WroEXyKPg2XlQ)tU))`M^0qkb98y0Q>yBSA>Y6c%jvIyD5%Z2^?fZ##OXfg@S
zUUm`kD_G)FQ??zuAUHchjV6Ia{EdeoD=Xg;P-A#7Szfyum#s2lFEe5nK<8#KduP%|
z(cFi=4rvzQi}2)&RHwO%GCCtkq|%Z@DA$^_>DTZWJNQ*(pT55&S6+q&*IhKT<ila2
zpQfZ&?!m<FB}$JHyw2)2v}*okG~^BI{P^w0oL8J3!dsSKD`{V+_0`a_dbCU{8;8`5
zoMB`iVMfgee7R@7>ZNzNnFb-J?Y#n5sY3OXRJUJ5RV_zG3GLIh%bCw?AY>xzaCccf
zMqf4*Mbr5*O5$!brhk1|$!EPeB-FUj8QGKlthdKC8A%@pc)PmNIhSNn(ESR*JN>E4
zB@V)`O7_<l8LcbQW?n5?qlA{CqJ+L0jm*0leHqjTHZ&x}4yk?ut?OVzLHda-&??Ow
zmRPQ3m0!=#*8Txa2rt(}Ius(U$F|N+XMd#;AGC;g2MS6<eRB13smD>|A_TYLv#rM6
zt9Bq94{ay2h`_68r}mr_&X%~L7>0W{eFW+)Y1yDqoGWApINMoyUQX}cR2`_4*onbR
z+l=J2a@Voi1T#Vad8=GH+Yc#Ij9Q9)?S0y;$r?k|fC{-Sa+w09##FkbNTwa3+t#~%
z_vxfysNoPL^A@9Yz$fLE`B=dB)p$4@k^>gyESFo03x5nShri1BJhgc*X>vr*%79<_
za)Q@s%$&dq$SyJeZT_R4X7M$e^$#E9388#KH!kYq0`M=H*O+>}q>PTD3{Q3=t5F%8
zHhmE%!^j%^DEVgS2PNPXR;AIW#Zi~$XNXtZgg(-6$L;W2n(gU(T42i2!}r}ZGV$E{
zUr~C9Qxb(nL&m^{cJ)pmb@5%cR1E5VnuAPpPA-y{Wg--`Be_5p8ZNMZ%}!5jelsMf
zBrD2c@`>QS4M<XQlst4gGF%Dmqhsqo^Cp2j(LS}!4zxu<hsIu#&=Dq|uD68n#zHjB
zNP1(_jE;ryx(8Q|Ep1?aKZ2ZZ{)53of(SLLFg3x+3ZlHYp5_b89-}u|y=z5axgz}4
zz|L-QXCc_zUTb<@A2U-lGwzdorh}p)uBqru{Ih3Kf}2=n>f%bP&Y}rTkp~na8>Z%)
z<;hN-yFEsTJLU-A00>1dxP~T57;S9?PglxXa2qU|a1+S^3#-~zSuwqwlQ|^cW56CO
z%T^DiJUh4?88dJ1k?_UG)HUoY@BU8Oox0cO3H2Y4JSXZp>L6a8Ieox2kE7c|4=Dvv
zgV{8Ma!kTq&v}&j2p>&gB)4~@1x3C*qndOxb7;gyxvAD}?O=L~&C8N@o9;dRsFEZq
zzYEU<s%IQlHV^@-s%X-n^gbHq+_n5>{kgKV3%lDG^BF>cq|ce-cE;H@)pjk73)k$>
zxvwcY<nF6`GG9d4n=`Qv1Kz&9?3}>nP((P7R_jFCzTNn(%p$${5{yXF8TKtJ^78cs
z6|JJXSgOr;MVmKlWxdV{y2=jrq!ODCPvSVNH<5cv*{Ep^6FrNOfsBs`Z|fb=NUsoS
znP9K!b$3idSFwo9vuH1_aqSw}e%8`AEy5epUOOVZtbn$Lt7$Q^kI}K>=JVfN%n*Lu
zwv+89_#<`qCw5Y_PcLm0zEA$(iJ+VBt=J07Pe*vJmW}bNVh+W--)w7)@4>adFMNm1
zx`|{;<wjLcY6Z?|#NZ@pQWUq(JWXm9w~0P>eY2-6rvv&&5KdNx+WU7c^t*x_81m6m
zaTgyg@((k8i;b$b<qQ|akV+Ke15qhq;f!b4cI(&ogMtoZ*ST_VE=s$1fCG_mhBlNA
zHn(nba(Z8qes+N*$0;F^$z$Bh!(%*i!Rt{r<yt1EXFFkIQD=s9E`LOc=Q8n~{1BU(
zxA3T3^A&VH>?6Vl9HuB7MKU(OTmqB<<zsx*y{8v;rem)wX1$rI*f+>rhPy2XcEMUE
z(rfv^fV8MoO=f4>lJN<K8Aa`(rj@e_&DtIpYq47I%rctKYg#taH>lW_-@alg9A1)$
zD=6B{hlF@M9=y}2O3P@`T!)A&$@8d-aVGxR`S@Ahe&ToEh?dKTg|s%0WTs|bzgYKo
zJ>5qb7rm>CM&Zg0j&Co$0fh0+#l6)Gqc{gx<&G{KJ|$B(WxK0OZ?|iz(-Dk}XejP4
z8B5~!TJ<Y%Z<Py5U(DaLq^|Pv5Vo<}-*#xA&j>7QcyJ!R+diERSUNXZhrB@gYmsm`
z*kTA>lP`6$pb$bnbUE<kf&9YS^?jV-+jTzU-a}z$_mWkH)AqdHZCQ3gH`?)y!jwyB
zu2S$R8rNk>X1t#F9v;;}=y}#eH(2mglonh5l<)p&t%tm{wQ_)?^nA|7i2VHZl&>i6
zp}bjzzpuX6^}DCfjKD`IEX-GUGD^9kqZS4RxM|1%ugrved#n^(88Nq56GAL5JUWn3
zgnm*|vek3jag4p^3rs&|&AucFI}o9M+nfzWyaQ|TCN#5&d+f2|Z=2yufS%(ZimtvN
zY(0P56-D-ZIl3e`bh8k3Hs8{cja4spJH*pQyW-406A!pK23#P<x;)wXIn*+sK4-P{
z(XwaCwuZr4Xh*-d7M79mI>+Z4vvm<TG0-KeF0`v#t!2+z4)glqye8P$k?j89US+O+
zmXD_Ph^P6G`v5$c4d#xy8G0QJE2-XdaZ}PBalJU=jjdjFUNp;RVU%?@*L!FzIX#j4
zRU|3$O#Y_YNXP!u`sfnK@k$j(1H=<}LbtjGjWArghZY#~60dGBDbZdwBO+?)K)~Mb
zb-Bf*-1Tg=h36Niz)EK27jq-U7p*ZV#+y?zy<N*Sy{nNIbau0fW~k*UdTXlT&6-AW
z4Y?||Ri2|>K~!$yQA&sj380&oV|z&N<9idGF%Z^BYY`v$xtdm@#v-*#?#P((!$IAz
zWkgi^ib^HHAt5n%crJn|t8)95%J`%W$R+4aL`6pRNt&n@51LqMSkKd#`%V}+#*5ao
zGg_7>Dd5|YpA3YfzQ+3q!CJG&y^XbTJuN5UaDQ@(x|3Pu`*W$fM)2)R?)5S5c^44w
z02Llx6vr1w!74I}T%DvxmSPqj|G0VZGp`<k=zz_CS%|~n27ZJxA`vI33WOWkqqEgm
zVx^XfTq&4a`O^m(h5Z6~b48YqlrK1ryO3@q^i)o01zeGBX<hE?X2~wa-GISY-gq&4
z&~y)EFx*McRhGGL-Fq>MyNFB8Nb|0VyxVM27SlcBK)=I?bj%mU6$+L3Og2o6UFvA9
z^A?&2TuLm6J$yCuU@@|iKv6EoKFY)Q=p5*}N;z4}2QfVpXREnY*Lgi{ItmCRbFDwv
zkS2L8L3DLRp4Z2FlAWAawjSA5(W<oA5s%!Ch5^1V>|@Z#h(C9Bsjgz*=z!Xs{`i1e
z)nrRaNi&57BTcmWf;k)PW`V!|k$a8qVg}vWdptbp<>)7AZ2pTp+;fK`jPH7c^Pma>
zedVE>oV9Gk46I`Uxvop{>5(x?MV~&iCART`^PS$I8+l)L%ugwm`LqnG=cp=YVKbG=
zieb5WBlecs+KTs`THCGLvGg$|1DDAC)!t`8s{Lio(#;<M?HHW_k(wa~oDE$EX1PO$
zrU^8zt5s<1uh}|9Bq)v9YiEz6=Q%y=%FJir?|ZwrOkpicFEfZcg^f;}WLcBRm$SyR
zt7(;Srf`sj+-+gbX;WiVjVdBk)$AJ|xF=(d^0lr4Vm>R&-Xl4LUjk%&;Qe@Rv>z(e
zfDtnQ9AFv1El63ku&8@xJ~PMB+eK5I{KB^K`b6qMfTvy4<Gt9Jt~9buY29cZL*<Lk
zewM+h;D)+qy_Z8_Yi1cQb>Dj4+WKX*dcL`3e9^=178Sj6N$pbj)H<;ioZtN4==xWH
zuBR>^PXBo~?f>EJy`!4Unt<^YMMVWx5KyX0ldkj*Dxh=_q!*>P&^rXgLNC&#gOt#F
zlaA7n0HFj3y%PvXhXlg+u>0=5E4tt7`F-d7{_&9H@ws#7PM^6mK8N>|CQ;Cw6Kh3K
zZ8@@Sbs&pFg#Og#t^^n0XVRnaE@T3ym4K$ii^_^mEzK2qm|1D=OstxlfF0yTY#|?y
zjWJl>E3y#Fs@x5Y$~_-RW$~RZ`5Bsk&;aDL!^a?sSM#6p@2i+k1CKXpP*SiiDnl(V
z3m+mvH#Dq2)Q_vaCzM;Cav|C|Qov#b!FMFPseY{@Efm`4veaI=OVNOh1%bHC?~xa6
zq&~?QtYXbf>!Bi?@E(~%pw-~|6T?~G73TjajZeSnTg7QmsDw|QTl}Z1>!Zu6RG!=V
z&8cyXUx1!b3h+!?_B5j~2n2&!95j$+K)~d+tVVGy;HD}Bt?!P97B-eUZj>KjIV@L&
zTu0RrV<vz?M2#{r3i8BmZ3$(zK4~odWIl_MWEq&f)L2r`J54fW_G{GTbV^-Vg8Sb8
zMB18#k9{WUiiNhdxg|x>%gx6p0t*)cDxYZ%4wu)zG+W*4K~cCFhXiyvhQ^uIu_LMu
z(rj*NN*YJFv3Y%4Rmt(rcXHZhGhdFUk++|};X;_N_*fmAib{3=+&Mc0LP_EW(R_<n
z)vGi`6gJB$+WfyjYTz?N!C_JR`cz5Cun<~V+JKfD+qqy7ekpxpIz_SQShC8cwz0m&
zN^rE$1f-zg(@Vj2Zkj0+B6?yYF-CMx>{<Mag?KD&G8M~*y4(<h>CcKS^0%xlQ_R0q
zrZhr8{#&q#fh~v^eRK?Jpt9&w+DXS!7CoVxih~N+;~=IR21INAUkoGXlBRF?OO?70
z%{!5Rz1eVsB|YC95ldme)dg$!y2s`XL>O|$)yGXc*WaN&mGBnlfTlir2#c_}?KDCf
zYK56*J9Fyb;f83rr4J9z?}!_39_uBl39kmLYL%RfjH;1B=-V4u*iM2JA<w2F9$<1^
zbfPP<2Y|<BlE=B@x6#_gYFdF0Z(OV{-@;vj0M?TwB9^p34v2>U_HQSoRfa>heXVFH
z*BmfeIkSGa6ZJDG@j@c?fQef0d!Bf~h^K1P!}3fgiWxI*`Slitz*R}>Frxd^tdsk~
zr%x_TzpCQa>Xl;;!2n>Kuo9^-8=|Y!x@u`i<5>G4FN{Nx1&tt-UJ-}ewB@s|>&6Fd
zR|k9**EF6R3$_nlLYC+Qs@_#=H&84K+0&@-g-<uCD~~_{aIFGcqFUb(C7djgFyU7+
zUL$c%mYT~d65)zahRQA&dxQxHHpT)dyOdM2FwE_FT=Wdo&9=M$!mZaM7&AGxH&6wc
zvGn3M0UE=8zonm<z&_Ujj+(^#?!jV9+`B$T18^*F!I7VSMS+HLE~`vXs3$Z7&UXXv
zqQJQ{z~VuHyJh#Ef-R{dx0uNb0J{v$4S2RWrR_5XwUzL0k8zOP>u$%;QBjRBP!!My
zU=-u;9?Z9^<#Z2pxZXX`M>0iO)SX`r=haK#O~THXADK^?$v*33?AVChN;>g`Ie>)r
zzW`3o38%Rj)z3xfcl)VlR$3nlNZ|X<GWts6gJf$|NN7C%tU+%)7sZ~-D+~mr)^yG1
zAk;CHN!w|ahGbNmYn7E!U$Qt(o&#QVrZUK*hs7((rFQ$n9*{*bdBt9EO%L1i?IbMJ
zR2@+%3PR5k4>aM@tR8%bbEIroUoxM|A(R75XZx-%gO#9cMkUbUWVaztavBrO#xFDk
z5-&P!r4=m%O8(~|d|1MH(}=awI+>}>04;g54&{C!)&nW9cH%Luig{{`)vV()J-=j-
z8FgYVlvO&_4+4ewB70&bB3kAE4KFt7)i5?4)3%Pb+zw>@LxvD)Aa)>&$hU|Jkc)(9
zOC=uWYiv>l^2OMeSdl+nI$0*NjiRkSADKll5JQu6a~}nLw{JOqKEYWP@?ykMBNvD<
zjJb}zfn^nm)+c8_%NvS!sR9vBxl4|fuFIPrKOs<j6YG@*YWxL%<u{)a9I#=Y#Xt88
z6ePj%26<1IK!?zvKOAl#mCd7eUx4+wbEB+GkXqK*u(k>t5vA{mqaJy~%y@MN^21fC
zhRq{Kj1bW8fg$gj{z-TF=0MofO!`_#SHqL`KxV$IQ+0zxTOz|RBGesMc0K@}TFTZg
zx4sQvO00ysu$f!OJCc}S|LO&>k2p`}DU7e<YJt>t(8*nM>IKA-w@gWw{B9)+>{~%>
zJ6MPjsV$WJWH;-osnLknM=kQBo|WBGyYDp2!dxVL8xXun&y1yk7&tGHW}(dCB>u_D
z5x?gwUCe{I*sQ%t3WxmfjpUzt8^DD+Aal?x&yD*}CU#5^2YB85V61HQK{YW$Y~(}V
z!XvL^Il}?HHnBs;&|BW+>|-hvxkaJR%0Kpa#v6mW<##2bM{BZxa1QG6bH6dfPx@f~
z06>ju>UichC)+=lK79yiA$yNaYsk5<4jJ|<#wp3Ewzh}#ME(`)`47i_;PL?`6_2$Q
zS~q+ukU#$&%l-h&DenU@A^;n!NjyI!t08zhQwss0rCiXb%wK&Ky2i<1@l2bv8qL)5
z*;h$|^*~LX3-WLg(@8q9PtUZ=rcfbNs1SXgCLX=98C)q_IJuQj$&3Fz>3%1O{_~TM
zH3`rrsK8~o?mOWHWQ4kcU`TbW9RzB#?V_K25sLYyAhL8MqRGT2ut=3t=eTdjQBt$%
z%k8)uH@9B(W3SF1R_g=dELbPJKRk=NG7cp@aK{>9jrbGR4ZAA5NR_Sfi|m4IfG>xW
zl-B(}D(3$1%5(v*Y_Ny&Y=6a_V(}>-!Z_PK1oZeR5S*!!8)^SO#{4rq3-tcL+i^m?
z{raCW&A*;;tQU8>4HB3O$tkz$P2wE_f)pBJ>Y!DW`D!l~^wrI;P}oqe9>Fq%C_0t1
zoY5{~tZL6j(?utcXtAsrv83433>kU#wBdxp3$I@|8eV`df9R&!bVN3?w_PHAS7<fR
zQrkve8(wc`nq;k)Y`TneGM_Uir2JGVnfEM5N4lb*s;ApBDVFScU|w!dH;aSv|6H@_
z8o<t9f9p;A=aV18UtKCUZCQGOv;FABi+Y`)oXN{#m@MY3Gw#E|$=U?oE6V2;-!@f-
zCd%j|lXzGk?&$)<Va0LQ17~s8UriH>RWSx0^xY=@eD_{ZcJBfcA<6ZJk~IDwab8Xt
zT^hxazAQv{C(dYiJ$EB9d0pDcX_t3kU|?dq(H8c+%6g(^H!nGPB~B3hvyl|!kD5UK
z%A8OCzmFaH&8LP02=`la;Gci|lMw^rzxEyZW59cBt5ygm`e6jZ@4xtp-?O#P5vd~g
zVE6Bz-AyisZQlMl0ncI~swrt~F8WgHlMA_braR?_mVHHk>HVqBIGbZsfwbTE(kFiM
z0?kzFx~oaN<SC$Xc2(fTK5f@WP6pQ>kAFD9ZarDsxIJqWkFgOhwnQB+tgi+&uI&}(
zkVPYsoqyKWKZFHnpgE@}B=y~3x?T3^?nlS9$L71Da&4yr%Lb!g(FQ@hRceqHa@Q_?
zhdwjTa~wn4)f5{Cd0C0&>RuMmy--@lnA#K$d!0`p^E2bl{usT`H-ckrNSM-3``sXh
zU6%XD+U{>E=yR0*Kvx%W-~0_w$Et~&R{8f+hoo3-;hG6`YX{h{C*u!Ch0wA#`y0nj
zqdn$l=dGL$D(LIO`I-vriw$f3=X&DxS2H42C2G`CI<0=&^(tWKI#4?h+W5HSo{_ZC
zUD}f#FR_aWuwPF!rMg<Z(sCottE<nDDlF>Em?C-F`}}<#)^!zgCm~j((OWl#!zE!n
zVw!rETHhbzpUCH~`3;Xb#0&S9*gpqIaK9C2P|m#5)^$5U`^EZa6;=WIq}0+D@3}@d
zYu^3c#kI5KbC2#tS@u_y7RJynv53@ytH#|#@m3Oz$3HjtpBW7yjMKc6?{7W$=X$rl
z!}~wTB3P}-M0r!v{Ru*G9@~)Eyv7vHse1Rvr{zznW<XC%;mowQpYoy4XB^(3z&VT<
z`?x`d_EBb4SA8S-%H-NlDdPhU5Eol?G93H|RR0dH)(bfk<(GR=%KBu2KjC0<Wq{$O
z>MVkPGU@Aw$9<SDt9p}?m?-9aHcq?xBcG+-!F{S^-qGEb`ZF1K_Jp!S#h;s=f`-(&
z;r{SPypSsapk1spyY@?<Spcr&AD|8Mr2S;>RqFLq&|31^KKv4BAH|>lfi-5m>owmq
z)*rhdS3DJywI;zgzvJ^C5cbEfKDk$dz5W0ViJQjt6Re#(1+6Kl?fox-R@s#O2WS@h
z1n8egdU6E-G~ZgIYrg~<@@lZ>AE04}09kzYgv3)fF94v`S+@EA3TQWpeXjR>{Jh3w
zXwSBM<44SxaRZ8h92NNBmj-~fJx$%xRQ(`g9L$b8@e>BaX9O7Ri!SHIzh)u--`JsP
z1W%Pa=t>zRc;X$<Me@guHlJ>^_-dQ&FKtxTXX_8jEvX~eIWtE6R1$E<pNcT&)|(u^
zB2o-lm4;cFzb13Gsr#(|xV%pr5un#g3dzzXf8>!LHuB;tu1xPAXW(_6|3C8WpNQ#u
zaVkAVa1E>f`R$M5>(5^YAEl14GB<R@oyzMVJKJkNVh+%)zXR|+C?rmo_yzb<vvA&-
zDsRS4E>2;(;|YA<{7eN5){EDzKk4}GM`WBKDjz(CVzgCUtf(=^Hh(iTWuSn@$MhIn
z#{pTVl#><?ry9bH(};Lhcy!?;J%0DP>EKFrsq3k!;CZRJ|A)8VJ{8$&uYbY^XEv5f
za6v7FNq4r}d?k(F>Hcls`)Rn9_Q~(R9Mg2C)Ny1|@rE1QC)Q4bXFD(q+}9ZOX>ib5
zIYn_-A>Ok3d`v%J{{A}7dUe>VbN?#E|NBD*F07MDfk6P5_ySU5`pr5n>~8%^KP3sK
zuq9D7DLDA$?Nl6ed`_9dg#VAj+kzVbx}5XIzxk8-nrBGhKApJzmEO04H$JS_F>g3~
zv-5g%y@c8zZQouq|9tg;Q!!F?QQq~^Z)XYcrm4>XKTXP)pQzch+s+n8FIv!Cxqwsw
zH0tQ^r<73@<o4G6+&ex4J1-yHJp1_V`(6O)2B(NT`N`bAvtWp)-njtS8g9^N{0C|m
ze>TDGz9`Q60cA}3PbA9^E1$v5-ygYy0DSh?(+d#(gfGsJ>c4$_kqAhhTa2y*K|kTv
zzgPM%0sQQ*|DWMdWt~>7yCn28NKU`wxemw!hOd+t{%y-=xalV!&GZ0G5x+lXZ}Q7$
z@dGsC@)u@#1g>2A!LsmqaGgIj5A&svHa2GB6UPHPUoA|7r`h`F^#=R>p*`>Fg1l|w
zg-Qt3uLy4dzNVJzFD^z0I24?9J$x8$RO>=9>Ha6d$y%10GacXjp7V!}Y~Xf{^^%!O
zcTEe(KkZ=pnE?D@GXQO<J_Hmy^KIA<Y(2-&H?)x1qR+k8zJKDX4iENg!}3#}oeBAG
zhw<-*(dEnIGy30zk|Y;O#GgMWbK1Khw!)}bWC3yf-$Mfb+(4h|sj?Ybm^V7Zp9~pI
zAO6r%#9MBy(!U@pPTxmz`F-lMbENWT<#x8ZPf5IFtBImrM2_UH`(*Idf7|OBN=p^J
zFuVKUnv~BEoR>~itf@+xTz!uLYMhQ_tSj5lcUt|pLg<^P`Q+?NlrJQP%C{udR8*@}
z!rBX3;>rUKL|9rqj?FjK#Mp9%WF;=bdAe5aEBsVOAk~0Vt*Pg}cGgYs={gl)pP=fB
zBzDe)R=vx^E2ApUat6|?o}HBx;>TC`sVHQ-*V~_VC%4O5Er!xxLKc7(&>P1u1&wUM
zoqOCJWVWVj_mbKtV#{8B+Z21+%kyMu@VQ}YoUnG5=Xz&i;l-50gq@?!#{y3IKO??D
zwWb2p3_tk~ED;>{s^{Zf?8n^Q-ZiG@=^3=&@6u=Nay6H8L?|KlVV38nfVtStcxi-+
zmC<?>TLu{GV$`)1E4}K)7(~Y~WYQ$>bmK{)#1n<5`NmezzD}_iVoRz7!+w?@7QOvS
zd=#LpD0#`p-!BKuE7u2<2~gNdfqEJzVqE@uE$sWlRkHMHfzm`n6{l#~guIjzH4G&D
zT!c)_4Y~zjhjD(bCM&5w+dr`4;pvJtlCnRpn5tElecwaoSl?3WjB|aTZo2X*2H{N?
z8G2B_&lZ{Yktm@uL=Jl()@nYL&3xRC_mjQ-hC}@Dw&-<*2#2!<)r-4$fH~%nRFup-
zyCxj5ZL${?o?Sxt{X3dm@f_<mo|%sxcD(kZ(eX;ZCTgFP0g&QkeCBMChqCFDQ8|Nw
z%IFaj%+u8d#y}yrfx2TuE|&;tntFv{U0(eRZ}~}Q<})1opO3fI0dmzL96tN2ndi9#
zN#mJ-AcI*Q-j9)hQ+K_jg!p^F#8J3f&SI4>?7OBTEHgJMB_rHtpmjcnmMPI0tz~>N
zC-q|QhwByISGt)75!-Z|Q%pH>thny?W-jG^#=9>dRQ!ziT%&iv&g2m)(J(W)md!MY
z<m_PpaQTc~t*#KGdvX?eKt_ZV;A)AZc4iTRGKea(l*jk~+h>!=7E=FkY|2@Aeg<PL
zrai2GnA|ZRRYw(35A*JQ2isl>3);xs;$?I`O06EK>dk`~P~u)3mT6?5!KM9l<ZDOy
zyWgp8!jRR3)wP_;GhLVaTDFw`r@Xq1N9iWH_NuUgal;NK8~I3E<J6pIhypAKVx+x8
z=@~^wEOko9-pZ{@-D^xFktOH9$7R#OWyiXGB0DXiZGuUc)w?UTD+zA9namec>SWxD
zFpu=O+U+u(riizZIe$8(FFsyf4l!*^tS!bgpRXNmu=%KMi~V!WjzS87!ns5)WBYba
zS=G^s)#%RbQyw5b4Gjdi4hfv`mwfQ<D6RbAu-r)|P5gc$YTpf<b*Wle&+a!{P-EN9
z3YbfG)i$!b;HK*|B*GI5ty^TPrD0LP<kz9MSauY*1GK>Yg<kK$25<Kt3fP6adK^M{
zU@_K@!Ui6|>f5cwgj}kO*U(kTNh2Lay(6K)lK?V(!Y?oL5sXgQC5q8h+oC2ed0}>|
zLNbW={?zoTITt8RW9W0`Vk<Kp$O?}y1~AJzISJgl@cX^TO_Owg2G0ITzp1xy0ZIFg
zkv%ZLK~odbzQ$zyp#SpsI^@L>K)c5h^S9B~qCBMy5nb3?Xyn1AqOYT^(&e%S@UZTt
zM)s~8j&h;|@T-<3N`?~6+NV~F&{^oXa-wU(L8sTIhhFp`3t`vvfOm8=BlaE^O?K5O
zQe~>N-EP(qN?l0dx=gfLtDXTj)TX2>@BphBBy2DOVv?&JNo<31h;Tpjx>dc=9^tvy
zHUst}<Gw>*ay$boUD_l<xDwhi*szCE5Eo%^r3*ZD>fqGT*xFt)o1W4cU8(XH?W2eH
z%$2{4n0N#Yl$0#@e4X8QSuIg}YQG|8G+n|%w|j&#c1amIz9|AP@I`p!B}X*3v(xDH
zvcH|UNOQRR?XZP+LR-rjlcASa6^Qusjvu5lQ0rEo*Q#AFQPi(*^sT?!@i52Ag+#24
zcT|>0i1N-RU!BT$j>FEl>B^_tS;)oRPu~JX$*3cm>ERvIbB&5c>Yl$jpAu%9vbcU1
z&#HAc|LhuV$;cFs@&c)JSCzFk_d?7XFZG|S^Wi;CeD=qsV=8%70ds5k*tVMN6d?_2
z%yr^z`E<s0@fVNqfLjh}?0k%8G~@mL|LD-&bxPAEgC*!1Q>)|18S6z!A1AbYZL6g2
z4!*EmG=t%Ith&B=>vZH;FVd9z<;%(py}Bn6y%WrxB&3-eWlr2LKOXmO$OJJMN{}#4
zGDD--xJwMKN?YwyZ&rX#MOXu(muN+wxI(rDnp6|wShqwbfQan2iHc(2cdR?9f|br)
zvacKsq;F|4v5F0iNeYu=Y{_p#J`Hc)U8!9Es%sR%Fsnf%UtOJ)uzG02&bg#ia)B@*
zI_o5PBgo|W+k_Y$7rsfAc5EZjlw(m{rGh1-MT;lkNqb*SueZ#}d9m2ZMsI1lp;$Pt
z#*ADtTjZpNbg@jNFn1hXR4UOzjQcvTe6S`9Y6A`2INO;?6fI@3#yHv`_QMYrF;5Xb
zSGk*)7-1fBop(DYts*1B)`O`s!B~O##4T(`r2=cX0)Od}rUAkwK<q?krB*hD&?Q;B
z-|nsu<TPWD9rsGUK3bbwdEs_IcaH;^v;Go&y^4)K_ipkyY5t#<?)~NqzC_QV$-Gh7
zTOvfy8Af)O9It<Pg(DbtANPBv1L!0EG};P?!Om^Ny{HHY?lTd#Y(k$AQU@o7KzuG!
zqbJ5(hN_unL~S*)d*M7wiqTH7)O6q=#qbPh#+~z)L5>?wSA8H|QN5yi_XX%JOcUZV
zQEZH|OAlOvgus|=hR06m>$@|vbPSytv_+wE3V2l&PO`5*9Q8L^F3PZqBs^JyYy<#b
zZ>*NfMeKcnMh+h<;@L2NqE0y`T{)U(3-4r77OHC4<l;PBMzkjF`DXV}AnywET3ut6
zEO&rdvn92QX#gu1ySM`#nqf8jWCO!QmRD^;b;4|1svW6a^qc3SvXfxYOHQ5himVE~
z_lbvm1pytE3iLB4QpGX!^rLXu7*-5!v^Y^wM^R^+0#npa72e_v*PZ9ba!sElBl%5{
zj)A6%4r4JZ!G;h@*R8yyciLsAcs0{G4ZiJ<J+S{?>j8t<PmSFjS_K-G!4IP<S2~r>
znoMCho1>To{O)~l(#-r~9Xed?0G#JY8ip-(sTvg<BNWwToiU=L=XJOg=lM0spv*c-
zo56d>CSqg}w7{!a6=Y-gs(dA%8nA(qLMzfgf)RTs9Ci$=`OG9YWG|LzK4WI$9q5`@
zoMI`F-!j$7$w+ww5}I_dbHAAI2yu0Yagj2stdmmPY}*$6$aFt$CoPY8ugCfKDNp^l
zqa1C?*=XIAHLUQdAJ0STA1$0bM>*3*ObA6G8?2@ZD}<t~6wm*;<#?SYqUYSu2<ZY@
zX;t&Bpq`aG_aEX08kZ<45h+ldp*<f4;{W;p;ihdsoS*e<oPQ7^t{U-nANgf=zmPKa
zTstq=L2!i@Io-Lm5!K9C7%>>M?+=*^0@c;F@OcN7ueo{ZaRwno9}`nx?@h$6v2a$4
z=GK06def`J_$Ex3Nw1eB{#8jdoin<at};2cN)h3u-<`94X}G)TOJmevH{tSEDvA|3
zb_CLjWrNgZD@y=n<r`M{2_n5QrO=U6WMup;$I|Fqr`Sr#I<a}&UEtJYR}--MlfnGf
zi1P9kboZ2=^ggXcg6E2;H2YythMFjY0^(Xgo~CEs;jHu5ud7A7J^Jr`Fy0aBDFuMT
zKu5KKrm-78!c?dfi7*`RA6F$~-pm$xE04zstvbBamv<LX7>ACxq>KckT8Unxtq+|m
zEl-8E51N>NEAWV{7AJkst!YP8=TIYv{|QxJ#a=Au2f^b#E(ixlU!ujI$HMSd++B(!
zyzD4+IFevZ%OiPb=VZFlfe)GdJ7cf(-B;o&SAi`5U~FvU*>LxZr+{>bTlL&kJr<gV
zJ|s3##Iq<q6AtdXjAPxxN}UtCPLq;W<tWpR;UHrij&Sfy=+8Xa<Kk&fm1X3S$G6$m
z13$e;a~!fG_Dzg)<-B@ePa+X3pv5yH#nINn0vfuGqpO-st0%T^Gm-~1`EZP<8)#%P
z2==Jq`o<gCJ-U=S4I1tz_KB{RF^g8q3FUE)UTA9h#1Ba-G*iOfd+~`Ean(GhA{!_g
z?;N{;IUtf2t@51T`dT?x-ZaJ@#9UgiDjR)ipiP;v<Rq&Au28F+d=9HRxN*yMdTA_^
zZyePG7K3T(XRQSDx-9RGnpaPf8c?7ZO4tamQ@E(ExNguJ1PC|JdN7G>Y!HZ$;goH;
z=Ea&Bd>w+F6wx3L_2XQy1vZB@J7Nb{H{tdt&V7-_fVigd(RG=qSi2D2lx<izN4dl}
zn<8AChw(;VL__QL^F#EmS^Xts@EE80q1QxKxBwMxd5LvRbV_r|<$3@!@w#~v9__z+
z`oG`1tz=6|a-vwp9*=$DQ=fe{c$d0<;d7b%$Zw$G_nY@X7_hDZS7o=5V{eM+-8W(S
zaR4C}OGK~B?!|P{)ase`k1%Q{d09=YI>5Z!M`!m7pgCb$oAnv_wN?!y*EkPbBPUy@
zqGBdOhvOb*ErJvZytIb(u>`FY9S9RH&-{~PjPX{<zW*dEPu&C{p=TCkNZqkVc1aTp
zF5$HY?x<m1=MQq&YOS{ss$+x#kB5&J-*+5KX^%SWLg)^6D#9lYj*Gg{ER*Td%q~bd
zW{QP!>8}E;oDW-x)Mv}KiCxjw*h;<LGJEvp!UGYGRRL;dswX<^Y(ii&*GOWU$h|KO
zj3Vk(0y^!F3Y*006s3#AG<Bq9<etH3Rd|I`S9c6sH&xJStkUqRwPKBDWi1g6e5zy)
z1qyJL5=RO#lieD2J<enp$m{M(YXMX?>tcNrMBgZRD#prk$XJ@e1CF2Nj;@WAH4%-C
zC_EhF!}SSZ>0=mp;yD%PWm)nS%28fXNd-x{d6F&~?y!!E-x_Nfn*aKzmDLw^Yfyp&
zyzk`8kn1@_^!ypNiEtem|A~|WVAIt83sOf?Y5fXOn`Z;2_xo`_mt|*|DoSUODmaos
zX}%YvsU;T@{courC2xL$@HFfm3D-3n^WBhFxHb7QRolJjB;L^rxj4hDZL1|jpC(Oz
z{sir%03PgWIu>JlpBP~-Qmt??`QT0Yx|`1AI+)K>gj0YXaeo(9KU?5_*Oc@+MZ$on
z?9t>_ePr(hP{~H`!Mb@db3aQZb2~D~FVE^u!%#C(LVaUKaw9FotDNPHqOJ<?6ryS$
z&1F#5SKf2%T*-XA{MID8)7olcse80_rhWdZi|k3#(zPNooo|saOfNBg$%ID>kxFT%
zF@#WdO3GuI$LpSUtD*R`Vh%@FS*dKQl(QqyT=tlmJhxkTq=uM6Puj*|$2u-8-l|^V
zLindQ3^9|lGQy??bn1~WwCMUr@2Utgc4nbdSl6gUD;47B%?-Y~qu7`lOuGFecc?rD
zgYujdY80vj6a*hg!8*CLQmoT=heao(5T;1qy?%6>XgLRn<1rXLYl2aLYg6#m_YBQt
zK|o`DDb;avW_J}9Z}L`t7&e825|jA`_mzkgS#u&!GQwVV3MMmb`O`(%$)lu;Oe0()
z&d0P2#3ar`BHn;<RG*<EE!7GY+1ysJ89HuJAtA699FR!Bhj|6^=bNKQhZ@~Rb_EOK
zled8Xs51gj5Zst_Fv-uUrggr_;^T}O(Zm15u&2{%ke_V26C}QUc@1rIyuDg01r{BZ
zAc0GJf~?i|58rUw0zxC*Boold*=d0D%hR%`bjJ6+9GoXU`G<p92!HeDOmr~#X_K_$
zJZcF7G-_|FyF1KK(qnHbR${m}vC~sZnB=}rdP`?3*3saRE#xgjLsWF?R!3&@#NF6>
zc<kbkhh<Fnbfo&ikRMB3wBjON_K34=UpC#t%#AGE8Eaa&3X65be4zH0iE;o4y_n0G
zy)zM!;7BBJyFjkNR(&!ndnsj^l6td%u*VXpaKBN%(OBoPbG%N%`K_ZUb2sE9d_d7E
zQbby*^ZID<ta;bd`Wy1x{YSq;xI~iWpxu3sp<TW`K5+r1OZQcy&26H;)Luk#id-AA
z%7))awBoCY8jbfFQL>@(4J&y$5w77sAU~ySgweKF$fC=4kq31aR+{j&(53C-)rZxC
z)gV@avNm7GLHB$Zlj_nrLSinYx0i8TmY=nQg^e1K=TRthNLTesBw0NRW7sg--{uMs
z5%iA=rJG=gY%&ZGUlC2(AxmPIx(43NBz_C)4$Uh=8bw%hw%?TL>vtFNa%LShtPW*3
zm+4Cs?3v`&9y+H@6h<T4R?4-ZaaDd=n(?58Xoc5K7PWb7#Qd^sJtHEyoF0Lxi`u|v
zzzAMc0hbCME~kfO#ozI1w<CUDyCGKuF%cJpRJ!o%3){|&Z8*E^>+JOH^XjI6eK+~p
zad->hxa6jmN3J?jN@AB<KYA77(ms6g8jpWqcPUvCPug?0>D>ew-LcO`WHx7WGEO2p
z4nDI|1>Y5XckHzv%r`vx_eqkiWTuRMqbdx86-!2cO1b|y5I!c?zj^49#MNm+SNK5#
z_QRGX20&iT^I&~#=T0Pxb+oF)v&SWyJ5x@2daq@yy59t$A|@9ZF)bznOwW&;IOchz
z3-lt?$)3-wvU1EAR6%03pA`wyb()gJB6QFjCz@WbyLM3Bla1rPIniOULxcJ!_FlCb
z&~ow>6c8W@q;MxdyZ5?p9SUvlb3BW5Uw00-eI1y5@**6}u$5I++Zd&8d^M?tu2A#*
z15K7Py=u1IhQ?Jc)orH4YA??UQM9wZoDMOiFA|1Fpx_5}HV`1QlMnPa7g9`tQwipE
zD0vi6jI~RPY0<=Yyw9OPK8a+s`be_=iD?cmdoQN<W86C*c<5IH=xV2Jl2a~=&*JEp
zWk`T2nIH8)&m*TesyARl4oHFteCI*&sx7FPlWRK^B+FM}`mpBO(1@IHOp~FpS!5+l
zS~A}#(iO&n+H(w8*|`cLPvUf`DuCe$$JC|!4RCC(t?`a(Iz8&-TX{<^KiEl!GKw@W
zE!$mI=HlA8fa#~ZJ!P`gUn92_8Y&s-TAs3FSgaSzcIOGkZr4%*Vd)+BB*`42WDpi=
z_-u?G95IA$k1GTx4+*%z)E%Xw%Ti*Wi)jv5gfQ?cTikmw<?5Zf8A?uFrbjr$P3llP
z3>_kS*I5YNkM=_?!p$G)c?nJQyS(9ucr4O-cMN?UthJi$l|Cl58#gI7d!P(P2HDOZ
zIAvATxr0Z$^9Ixoop`;(PL6h${OT&vUnKQ(-H)z=c0C??A^e>y8yl#XDu(VXz$MGe
zj-58@D*M5URk9m2{WgP#wbvCYy;Zf3^`iN*I-FMswh%R;*qda`;aMb<Vo0|LQO_0)
zCkcUe_)sFvxkryFS}o9+xfMJ{ySsp3d|{-1>PG)&6JsgW<MYoj72pm3o_*fuD=RY^
z(NAlm%YED|c=P(>Gaj*c>fLiT6S6>1C@c{?zNaa$TA>kO<EAwE@>yPTM;^=b>(Q(;
zm5OBMNzAautm*YhBxJV|4a>QTeW9pFqg<9d&jY(6HcIha*IC!X*qB;*&fauQX>YU3
zd|C6gO574jTxuj#vqR+B!J(NpRg%|7`Y5Z2T6L_J)iBg8dOaA)xI9ml@sLII_c9FR
zd}}x(B!xMJU~tg7B(Gbfc<E}Sl~lkaFW;h!zOd-Bh5KXHLe1YdjmBe=suRcu*KonX
zqr{y7kY@@tdWot!Hl}hMtKgWJncw$yJa#zocpp23ZY+z`#^0QC9aE!m5;@KWub|={
z)Nk_@wo7jm?XDPV(71~k*C=3mtOlIqi~C0(mF~v3a1EN=yY+<F1BNFQ^L8$S*lma8
zP9a|~K?VE*l~76=?_+84#~5>jRe}{%3RU>%3zc-i8iQJoqodZgHOKJP?6yx!Krih=
zLXg77^?X?l!YU|=1+p(rLDiu|Hu_|O4MI~uBii=uQT9fE5%K}=uyzT@$mf*ZiqWgJ
zy~`fhfS`@z(aFrll@M$nas8$4_s6wT))&`XRLyyVQrak8Vt4XfZ^5~asnUX-R~Yt^
z?x<d=7mS*)kw|g&&-UtG=i+W#L-tiQu4!$y;}?0iA|we^T>>b*RgKfuX~(2garGcq
zoo-}@E#LlVVdm$LfBktsW3EUPg10m&<hK1|FVUxIt2aJ*7%lXz_@1y0+Eh=B6q*t}
zw~~r3T)&ETztrfxA;5K|(VP;|42=1{d|T<X@ai?U)eod4a2hBDN9yOS(F!L-zAigc
z8D%DZ&ih!_E!|nrA|;h*K$Gips|Q%KI(NP%DXhnLwgq87R)gYdWr;|gB^!+yP;LpV
z+8gyB1M(o0$I3T=Xl3BuUXA*EMUgwuKX1XU{hmfR`MqkU>SptV>pE0=>#m4*U%F{^
zZQEa5j0+EacN}<m+3)i>gYKaU0m-QGXDirz5?{9|VircOxYKH|nxr8-IhH3aT;eJh
z>B{Q!WSg5ipxycb^&lO!Y>gcVbPrGJ*`}(XIY1bWjP*i1>#pdV0~wT4W!fM7BFta`
z4+I?)+$$*nxs{J_mR&26ppswj+!2T}OV1%)aR;SGM)LEDL`*#4N%WjEHJO&~{b1NG
zzyIl_6ji;lvzhRUQCgZ&iWO0B--Ck2G$R@?k38dz%=XAh4v~wxnKwvmjjq<a958|s
zHV!_46*V!#wY|xurDh(yp=wQ8*|Y-2Pp=2{izbW!_H>(S>a-k}p}VT!Y4)*;7|5>+
zGyYIK*(Z#HzO-GNX=%Ql_`B)r@AF>w!!Oi1aV#cTTd8@KKGjQSR_70pb`w#HfB#_6
zhoe&8YEePpSO6p#BojWnU&K~g(UPSK5`8Pm9G5-PakNh6h~pmx6*bsTBqVV`4!f^b
z7h-a(hO((qH;332JRD!k@PrZ@UW~T54{LY5vb=G3Bf9V?C$Y^zZr9#Qesa`$fsu%+
zAZc^26Hk6DpdEvDx=8)RblSW`(}i34$rpKKP%76=MdVkZ6({*w0Mp={!FE-_hwTxr
z+~JQ+XK#&}baFGvca_x4#5p_0|JF|A1d|pXeM=$xv426XRB_!(NUI2ik{mHv?{9#$
zQ@IaHP}yZI+f9nbB*xQa;4wk>;j&wo2opt)cT0px!L)r}apLJ*g61&2NPJYyO=k2v
z(%h0;{98yE=<Zky!qol5-;3w*{SA$9u-2vPARf*8HYeGlexXI$k0gmHwvn(WS1QV$
zgO6GA3U~u8Om!s&S1&)xY_hXjI#mTyl|ht^4Gl}Xn#%{u?aY0=MT$bL%bT&@SmpNp
z)(}B8Rb7+Id&OxIA!1!Ed;S)ArX+$fQ0b)(bzaJ}F4=~Ynk9a+y?q7_qs1s@1Pmxd
zP<;-tWUf8z*7s?k@6Q;BfB}|tlg1Um^`Cd5LAbiuF@Tt(86@VNqH=5@(D0agQO3#D
z-lz75A+rTu%tbkHjFw(f<T9Wa^6pw>5y%hTQ#&%UDe2iQ>476^nVGzvF$(lurpwNN
zv!Eay5007XYc3Ezm|dk_%V;Vc5X@{M&Al$3I)lsYW*XqQyXvOFtt0TFc@-fTpn>IE
zut*Hx;8+ovTdHRVi@B9s&N&S&M~MFZp#!JNN&awLLev{d<wJgjABYo&J$xP5DCO{y
zU59b<9bux>XA}G)M!!eD`CPvOg!UitJD&~xoNB`}@+rH%X?g&TtQSCkLZSQ1n~74k
z)oyw2z+Mdb#}e040UbTO4PCv9FmCdynyeylFDbG71`gegmNDNdLXH+KUosX=TfwRh
z%k|y52z9tC$tQUB;6c(}99jTdAkb6CBkLrx)7&D!*5=)sOfxfEpvm58YD1u|UMEV5
z$3sH+n(8zbd)X%gI>TAI;eI{IIClquDGQxT!X)OSTT6lVY7H}_)=<*FgOU2_8(=4f
z2OF<1)jXB^GamWxag;NgJq`PkeuLNF&LKOY-|X4%6zFep-YC#|Ez@0?(y@X-C9-W6
z5cH3VT=ZK#ZW#K~J&K#)mN^T9F<E1R>n4tpk8Wvgkq^1O8z@nhq;-T?CBY}Pd3b4{
zDK4qXs;+rkrX$kzTg5v%4m<bUjRBiPJhcIrQl2S)`10-|f<7MR>O6H-5sRKoLDkPB
zfGP;?HymqJnt)BUhKDbOjk!B;10m<9=ZI9uw@M$FUHJVbY4#Y!yBTWDANKL%L5jb)
zhI=su?Eu)&&?kW&K%A$3YdSlW5@Dqu&@+CgC=7eayRa%Y%XhVYmxGYQlx#fb5RY^e
z^o}|`g7c_>C;l3jl3A>8jthg0TE9m6Q5`&`FgtA1Y^}hZbECLKe!nX(9S2y}jNu#~
z&&^0z164={SC;Usz@(;&u=@mw{em(@LuY6%+-|~+Pz3#wyw+?j(Je76EM~|;C0!(S
zlP~0CprhAus7tBDD0GfH>mqUkzCdw)wUrbUSegC^e|>mY`@B<9jc1*2-e#LP*ts7H
z5vlzc7G|kKp;6TuW`3b@<wO}CYd%6e4<=86jXXE3^p&qGd-SEDSuB5(HpIS6xqU31
zoYY<L27vh&MmX`spVX!t$AX!bxD2~Gu8jgEF38iX8t7_*;M1-A53Huj`7|@0(4qVi
z!w1Ifbj-b1aZ9nKj~$RK+IE?!>49zM&HAWo!l`)7J`wJl$xUN@20#i|VwUP|XL23r
z9WxQ4fyE{g(AIS|Qu)OWt4;<20V1tXTCA_jJP>4K-%lQ@)3sEaDbURNBzu0gWi1(z
zW>B`ypQP<Sb*xjFJD)BD_AnnkHbYq|<@Y+)VdW&jQNow;PdO$RFWB=UN&UHf{p5}~
z7f1QSG6Kjg6S7f_-Z{CDIc}X0+maj@t268%+t=h{mkSSj-@U!ZUlJTW)(LOlB{SFk
zgvX4qKx}irUJ$Awl2Z~;gM%pFb@@5Ik*w4yyb3o_-|3G~V85D=m^CCXAa5KPQK8jY
zG<)}szR^>2j*y(ul6ByoTHDy1hRA^#YhXwc2#}%T4}Gi*0tc4#ZC72g3fM>9<DY){
z1}H#+-6jisc{T+0r{Mig7+=G4EfQfq4@C9Z1TAGw<M|wHHo^}hrM<}W**!jiu%i+F
zPa~ePf08<K>RsB2^u!Wuf)!aGFldiUCp=x9)Q`Xl^>(U}cw4PR4Oe;67WGU=<ZUC#
zOFj$Fy30p&V>Rs$piu>x#$k#}729ee(dz0hB!Xq3kN~=pIOnNNUzitAwd>IHfPz_u
zwW3Ev9?0KHDC7Z21eyI-Wg4@Tf#B2PiPS}$&}xz?mn^>NNu(S0mP?}OZcRL)>l%R>
z9x239u3-B2;k|1igqD<rn~74Yqu(A&4Gtp*L-uYeW3=xE&_hOo=`sURHe!8L*)QXA
z!_00l0}5}@nQqr`;^b*tizslnL<5I}URTQ?kOM1eo#HJTz!c(?c8d+?@(r?SzuZ|-
zt6wN<&VKAOx!TBpeS!50b*Worgz;A8&0c48K7LzX-zr)s)VMQs5_#0+QxWfK2i5}%
z&SX^7d8lgd3n-6=#<_v7S!~IdMWtP!gv_s$Qyml}-g;`~H`v8U4-{=gV1^&RX=Vfw
zy&5+tOKYhV=C3MkiWhPqlU_5R$xYF|NZmkJ^_u4tR|*##U$Z9N&8z$9vWoq(z3?7U
zlSMgb)XCJI*R;>mg=tIaHNNHVVriAsn3HD7ddR>mkU0{9NLUr+vs~-%Glfo{rbaj~
zus&$ruIEY=wSl5^RTSw3GOuyR1LXA>$YR5kVuPB0fxJGvnyP<<wiaU{N=Ky*WX$<b
z1qd1Xb)cGMAP%xC{73MW%fqIzm$1h|(xSx24>yiol43!;N6VFpvC=e0=|H&{AzYG;
z&RIxnZ11F~PCh4E@hLL;(iW1U;%-Tg_Nhz=B4}KY*Y6uu3GKEV@R);3)-~6m-Sd}g
zQP(zW^gAfD+C8x9;~{}lmaC^}=1_DHzx8_kq?dQqj+m>RUW)2z4ax59X@3>RizGb~
zeHfeL6WPN_ud1Zs!#zYTFg+wSt_$kUNk+n-hA5vb)myS0;X9W=-95Hv_O{zZYttiS
z_?s_z0fO5!rL$$~Zs=h`le6W1nLlr4u9byQ*>c~9%uO$Rt}C%CS}%`dp;q`Cnn}A_
z8mgulJAm<v&y-r0=@_(;ByAw`SWomKP^M3E`35*$>7f|+FHoE~P^b>%X%vZsz#6j&
zBbVMU#Q>>#80d+Gedm;sxXqSg{hrO{Fze7;zYL%jD01X$5?y7h4-B2v8x(2QJ0VaO
zqhl8YsL$X+N$+VF`xPex(5yCfRGGH{G!5o<bRM?to$@v1Tt=qK7@?59)|A=4>mupY
ze#TM}<GdPgvlftvZ&?`!&X`~GXi}XoQMvB%*p$0Z`TFiCzEfOg^8%mDR?bOxUJ)X&
z*$L+H1#S$(phPF}>3NUwVTUUo89+!A=DmN2R+RybWbaQgI3K(!?-6@qFtB(5=|?(H
z23eL9In^$y)VR->7)gVwx4-VU4s7bJWM1s8GwcucTeVwzep4l3Z&L_gw)!%ukV|)$
zPR7gbQ3HZlbB(Qc@{uN;Y|W)rg&M=i&o7KaVrYiuG&D3Jo6sGYuQzM>JQ;-=i&iP?
zK(G0QGB%9kL_@PS=T}8;(Sqx&n3XOFw--r%e*cz+*5L9D{%gSfx%5}`S^N#&;a-OP
zC!vKoc}Tn9U_3y#WoL=EAl_OyE9yTdEnbbzFYXOrQ<=GA_T6>7vmxX^iui)xi0A4L
zADbl0VZWu&Hw@izJ8w%V|J%r(?n(zj<Kt%&PR$bxt;EjF)-JRdST0g|ovg|*J<()+
zzQ<^*y@;&HB|1azD3-w_KWe0{Hb}@}sV<JQRA_HC_<jxvpRsxZvcxIOFw;0TOd6=A
zX<2oXG$?b99?7yHVvOk!Qj{HBsl{5l?e6M5IN0c>`vk{B>j?9kW-H`NjuDEej}ahY
zqjuCWgI(<{=5hJGej@a|hM2_4(dhZ0eVGmUsjkAU(|W?elvj~3j|dztvBQW#3R{k$
z;J0NP>+xGg$N1H`wCKRSN)L%Jn!y^eCxfPWtmh19LcGn^QVPBbduh@%LP|tcr#kXz
z`kVEiPhYrLKAELhW|R{4Zp?87?Mj#Z*0-4%a2oCl`lQ`bnZJ~LuoV7*Q&4tK3b@vB
zdhMj42+l58*K7F|KB8k7?^}=OrZ*=LB4%;)lvbl_)byF{C0%SmTc8qE-ea6S1;L|)
zUfBwEZ)=m5w#TWHg}nrUsot3#OrGEcTkf7K@EUV=dd8iF8j-(x0i1DC|Eov)zTq5O
z8ci&0L=hx?`e^*5mcwy}zG6#S)4R2{b%W)2rV?ATWPBX$*A3KaKxLnmgjb`uTEJfr
zvIMy&Kw@@^ZEmx9h2BrC;-W&`b0S1zm6>-;A`Da2D~nvf1Xbq2Lqy<|ERmvH>Pcy)
z?R9qq0|)|oCpoCxD9)3t%x6!nTpA&CykwzyT1AMIZL;$#Q$e@xv+rH3cd!<7KyRVR
z7e0h67W*fIq}~IO<5AtU$)X~Q@$iBUVQ62wqS**5OOKM(V)Ua<rsnNcSadq~8g;7G
zs1zCNxek-$v{Ui~E-)GVb%E(QDS+&*)aPsK7f{_1im$~ywfAFm4EUt9$MdeUF060w
zPAk-SR{e!(9(#q$hPj2?$~<?X?6eB1Rt;@Jzx0wdG_<XBK4|k>&<cBSf8tEy<PVPc
z^B<*f@mxiBtZPQb-eGBZ-JvW|0@gRsxtDiuxc-9z(Jj|+WOuzdwRN{1?0(iQU5@}~
z&iS*@H5`i4BQ8-K98bWu;V18#+r7y0tpgX58@&{WYCR>0v=OG9G3<I%t1u6gsnNFp
zu_$XD2pixx8BY}L#fI)<pGDdc6uCyd={?eQA8gtH7tDf3Ps`M^GheY_Umai%>(3ug
z)t;uE^A}3m4>g>NWL)=%^;elc#V#v=P8dVJ`K40lI`u@P+Q%o$3M@#2jKnU0g$GAC
zwWO8s(uxBCeZw)0!H&K;wx66ZRn*ixNjH9b>JpCtmK!H;VLEC^RCI~a*m3J=YR}U>
zyfvr+E{*~&$Xv_gT%jVWN8s-N&AnE-gp<58<&f4UNi?-S0Yfix$ap4H1<gfax5KNP
z-;G=YDkMDh=|;IeIsl3x(y4L$5AC6~kEv<lMbs{rsN}_TG`$gv_3>lB-ng_77`9Tx
zVUuEyki@@TSG1AYl6Twgv-U|ue&bpJ(k`M+y)~&^dhlgSgyTi*vGz)>xetR$9r<XM
zY9=3Glt7_#jF?svubd2^7{~Z{V~P=|1&FCU-@s%i-)CpQCRuyab5T$}6d5si)w!4R
zv;-*I+ef=Li(^<>ORl^$Hn|wwWLwNS9F2KRV$ABTh<Hn^O2A~;C}p<wo?r`o<sw=Z
zM4_NmqG<swOeKszHA<ZB*lNS0Y=&FKZ(cZ+8lk5ba%jE6AE!Jw(zaE(|1fhLY!(0X
z%Kr_EfADx;22L_vtWG3>`O1wd2U$CanWJ^Mm!qIbK-D@uP&Q9LnafC@dFRa#nk$#%
zv<!d%0;ZLh`v7TAVpC|Bx?94#2^Zm*j9-|o#|CHUd5IsR&ckKNEKks*nZ%%v?lPht
zbN>6N+C$Zndl1(=Y7W;#pkVW~C^GgS@k>k6C&RH8^iGX_-)gZ$ME4H0S`sc~eAY!w
z{hLO!sMMUvvBt;F_>D^3+I%yCidhrQliCDHqDZ=ig@>~W9&h+jWi93jSpoccJ8s_`
z(VwOZyi8gG1W54x+x}2PL%;Li`xx0yi<EgTi7LxkA1v(GwpbFJ!fsq?uVVdpyyh>M
z`(KZZT>R9Jt9?4)_|zuw=5UCXONlR7=c^<BF@Bbj_lF`Qnu(CV<s^4X(~+d5OWo!m
zlOp$2oSoE9_1^9K20lL4n`&4gD%y3!w?bobw`1}jbzVM@mS(S4XfgOmBL7N8bh|3R
zc{uU(ieg_rPVJM#ky8JfXTWIqhgDLh*)!L4y+u{wW_jH#l-~mV_xcA4CAl-M{OxL{
zHUQU_a*9@8rpKjU$1W#$n@^;Xzw4r$P+AwIVIIWW{R=R0+ACkR2Ss}AjD5WNQo$el
ztV3K0{`YNT@<%V8wcKNo@91zYQ6<~!C^Vu@xbx6hb2(9_U@Z7Sk@?pv+~tI}*8Mjx
z50?D5>b~zcfFuW}B(IpZA})KY7<`}|g-FC&{ati|)SiCN1+VMgoxly9qkKWTD@mQd
zah>b5%I+r-rT>0sX6rL_eNItgFj{4ZrvDy^GTiL>451dg>5=jhDyEDU^xyvvv^WON
zvxb^Ce%Sv`bbO^99Y%uEm-(+1QrB@4A44@fB;>|%1z~)+OYaA!?4^!jrR$3R=l+-F
zFaG7S1bupWnC*_X#M-`vgl=(e#C%xjbSRHbA$LiO9N(1?n}WjqcgT-w6wS02<AZ-+
zF8bp-x}naP6Yb5#r+c@WUyl4;Y?<xT6t3PvFtI)H<Z#wo)>LkkXUm@ek2#$EY`xn*
zRLGyITD;=IM-!zuuGdi1`2aqA)Y1yNOG($Yf?h}22y77!GW=wocSWd5BFXkyx2*AA
z|7B|9<SRXY;mT(bxH)RP1K(6a&4!!?S@KH?eB~Pt0^6R2G+OGUE3f%260Yo-ldI*Y
z*t*rKSy2OHpgnk)CzfF1H0hf8Z31LMScaPZ&<Bc!j3Kwkxp>wNYxKTME>A%0AZ@$k
zi#p~dSnT6Dd2;8T%E98Vm?duOzJts*`w%UX?nN;fL;J;7&R~viR)kxl`dm6WubPJn
zZ+6L}Ii?U<Zc<B$=r`qC8u%W1qX}XdqeDh+YIwEu+&(!ili@LC_TKJ;FF+t?DoQVJ
zl_SP(tFZFWF?-@mjh9PyA$cCgoYA>}u&-GE(O1~bzG8T!DK|FD&dEfVhM9R!r>dc<
zyM+?G!?Q+4vA068Z+4{eC7EpRC^r^nrca@4J>&4LydUU%JH~^3+g8i7X6a#wU(29q
zIRf7=;$GV=q?S1lUIWJed~I4=HEQs!R~X&@JW38C53?9KV6%q1su|KPC{37`Pg%Zc
zuh<;ZC`&OgoYH287sCS;Eh*Dq7FETp9e!wYA}6yPv2++oo769I>Il)DJR0?;&$6oj
z0*S)j5Q#)3PDe4hK^7UebF=@;3IFz2F5`=R+qd0`wvxA2ooT|3j|DW=yw24^ZfmbK
zgxUtgH`9+Z)LQTBgghy6$jh+0)qjxbUn8~a&rkBVEc96<DW-V9L`ys(>w;-$dx+~?
zMN_^%%>OK1e~yvd)T}P+I=fn<N;PLOHoB>lt)cLx*jx3Q-ptDFdonz|0v0!uv_6S{
zf%1KL`ny$ac;VOn;u6ZDa9Fj@oFzGjWfZeYd2<|jySd|Aa_uSS$6I&Ec|(c0*$1*E
z-GZoFptRboFPlO^b0rk%s0TW0J#qA$BO!xTW|V0qGG78kPQ3D7BEGcldE~W`@nF+7
z1wu@@yG|+LX5Ok{z^%qBNz<OX9E=ZJCr3_%%QxIk3Bj5>?-CL^H8j#F&ZuoxPyfMF
z<BPR#qtcvJh;_)b)?{rm^EH*!nZ-DF+20O)+c-nsk;eiN6yDIV8)N)|?y}_QsXlDT
zR>gI4XQ;&iWklTeQIsgQs%<`T_;G-=kO>m2LUU61629moH{mMsw%0I(U>;8HVR0m7
zs2*ZFs>(vptvcp*qB?gY#6G#U5}{GQII=y_mdMt-`gwzA%?aLMbE(_8#=crA))yQy
zwjU_vT0i`f*EtSeQQi~KE2v;9Te9S>+1RuLHc>KP%Y!a|S48o*GWc6`W_$Q#V<Z=m
z*(g1T&lRbYJ^#vHU*t!00;z?HRYhO@-PXOFwn{!r#zJ3p6Vp-5mDfF}H)>xpl|?=;
zH4FY_HuMeRY-WkpmI!>39zz|Hr}P)oWOOqXjCELZV&D5t%nMHP-CF&fnYn_0i4f`U
zm0hAKNlP4HjsdK0eC(b{uQeyTDsMl&_fRoKvTXEQ-f9iQ*k|~8<r7d^oi!61+1pI*
z^S41RLB91R<GNq0cQ}?*exJ-jtq+dqeX-`{P_@{kyxF}<yr8|TW2t-MBr+{PKBL%r
zkDDN2v)THa*rPdJ@@}3hB(jvn^S;nW_;-;@W=Zq`BOwPdb@P^QyGyh-lWmPjhV}iI
zX^~??18}L9_uIK2p);x=CwIN><tv!X!E=B}DhjiH7CDx^T?}8JvKZkQlOe&x&0Qtm
zBR?VtaRPLyM&qJHQg@u-2imLnPgfD#dV|Fo3AX0tOGd6oP8AL9(j4;f0-!BC!R$y%
zNdAToMcW22r}5ky`oC1o-^X%>XK$YOsV!H;ylR^UyX^URqzn~%q;xSkZ?yB#lIv0B
zt`Zg?EHr7>A*eOcsK+3`D>8#3cNZpU$b^F@{ETS4+$D=ko3>O7sxZjU3%R!M3gNJH
zEz;|dha`U=;5ZKL;knXJk}@e1VdVk!X*UBVNNc7F$Y(PAL<{oDgN;&T!oopfq)Uk?
z1T|Yp%jA126Bb%l2gk<9pfw?ts);OR1$&udAjizYqciz5%qdceo?)-9bUsJSGAc6h
zV-Kt~)R?cEj0NC7v+`AyuelK*i20lYPnWy`-d=rK;dt6hXXRF}J1+xdqUj(PFePw-
z1Y^jgOg$JB+|uqP?d0APx_32yjrHkF4{vvuKKCT)<o(dqJ=u@4JYCWXcmx9xAW@op
ze+}wYc;dG^rk1Vn&(Ud42``Po*-*8F8G&1NT0ow9n@>&t|Frj=VNGRS+hZF<1&jzt
zR}c{Cy@Q3KbOfo9-h1zmVHBhVq}NcTcj+}Ky@VnqKnzh@=tv1IK;AeG%sAicT+jFY
z{T_bh;^buSv-Z8#y4PA~?*;KN->WrOPN`BPRUar47U>A;zjnVG>PYkb?fm~!N&Z}o
zfDopk>^z?r?W`b$6;X!e?7~%tg%mm4a;O49AKUx5{;iZ9r<LGC$2jDknmw<AJ1QSL
z%${U^cOKE8F0$9A?Hq~(U1#T72}P2Ua#ru;=gRDhYoBdTP1ZHM`;=UX=-JGiWC0>F
z#IjO%g}vxlm8enYlX<zY@~k;Nv91z`+fdE%JAdrM>ttk^QZl8qI=Qy%8Qb|}cNlq}
z)5L9HGGHQa0+C1XN($y*GM>~SNkwflL4?$^rYYVeC2EWk14h6@m%jA&A2RrT5;RU>
zx;7~?CFrxgs|L2=U(UQpN<ue$?pO+ZEU;jcT3n%w*FaLV9dop;PS#Zxt0ui!6Do6S
zhnWxaeY$G*D9wx0F9^oVHc1+_$-~!17_uaZvb9#$lp0`H_+{GC66CCbcpu;n>*p+-
za3jGtwI?j8G<cw$UP46(7P9nx&baJV@nFSnJlLBJvsJCek?^`Y+rv_Eq}-*_W($9u
zDA=&VW>=lar#Rsw-L3AThEF+8uR6a_p@#MwgUSG$yWR^yY<w%#p0E&6S9XnqhEf93
zXa1B^bU?crwCUGRoxWZu_3?pJ581t}0`(hxFPsJ(X)MOONtRzh?PaIQeEx;MJvhD`
z>k{U293CHq&-HF!O{H)OO|eEg2z=-v|B|~agc<n|*KiWtnVsp~$g-BB3OO!0rU4t5
zg}+|jb$N`9tDGVowuA>aI2YPVQ9sV~xll~fSo?XU=K6hH_KH3plr~FhN@2n3nP(gw
zzlpwaDg9YmtNYX{lreca$Blkin{L89{Wk{(D*v1c)i(P_BrUAM(r~GqO&cOM1wNy`
z3seRbfe-5C5^z-7gPc?Tn)fFyEF-s0X!~3SB0e`alv}tnZBLX$x(KZ1t{HkMYq8r8
z*Qw4DgAE_`-hEMPV?@aQ{@v@+38jNCTP7v@D&Y66K^ATYSBO&7-YPJ9B3<2pq0I&2
zGHoO@eifVs*_&KoVsO6kpq39Qg>Rc{eW^i~Q%aDRSs2s()!a?v8isA{F!Y0LgY@e<
zlSLGkeetR`KFahZ6?at~3`lmHs#JRF9?r_)>pGLhgLXaxQnBCiFIP(tHqlJY+FM%X
z6Dz^sj<h{|PQEmXgL>t6KaLYVy(q6HHF~Yr%Y_x*F9IRqXt-?PV~&X<UF4P;QRBd*
zF}!^M*3z=p&_C22Z|C~OhRVW>ZFz3kU&QrY5R;o}ShFmo$f(TV%jUpvgNKcFieW*A
zjq+DdwLZ|OJeJO+EJkyc!dP%T#WqLUDhO>FqFy|`;Ps-+cA0rf`6EisU)hJqUxjM(
z;ZBOXb-P$4<Eh#F&Z++_BY_#NABCx9hbK9G164NJ=W2)VyP3zx%CXF=bP<mqBU@D9
z8zbe|gjI)=_r`m{sRfxJMi|?!hcdR&N~2zzZEd>Ju;N4l>|j}Z**3AH&tSCNr2PVB
zp0hO4(ldOV1Fl{C#d=~h?c=??31Rn<{W{sNlB6qL>rBEg+Tvf%Q@$WXnOy&*&lYK|
zcX2QPzhU6J>B=a$B)j#}QvKykk@^u=Jqt^0bA$hzPQ!f3Mr#K;a?7SdBcaA$`}SPt
zt-+sRP4Cw*<-jl(Nro9|=vy}uxlw|mv>~#}>J-dNGi67nO54iE?T;`CONZ#kZj;Xq
zC{~G7pS4Zjim*7SraOXOx%Qc-1Q#beN<FGqeRB2Eu8-=9%ILn1CAOyFBvSPSN7@Cj
zuiqn}Sy_vMUJJ|UA=3*`S!bcDpCg8%!T6~(h)`q71)*<1Of;(V=w<VJ5^|rH_~z&U
zU4MNzG|NdQ-#@MNHL1qH)oRMfvCWgI{7w;6He`Y;Z`DUeqh6m)s$>&AB&(oWAsze(
z8<-q<u)OB|YLHx{ce25b1)TlZj(7-T^T-WvGYDyUEiix=*u>5U8RJK7@Z05Ho)wg7
zF>Ne-@mF0MHSOY_+*Kw=iRed;XwI9A^}%K0RKnIzgQ1`KiHw1Zl9{vh1|}mzb<-Iu
z?ozFx{j)ZQb&Z|+RDmaw)_bgGQqpP^F1;#lup4T|BGr#Uh?6(t68W2jo#s^Tsl%U0
zZpC?KrIRM6ZFZcbIdlxzp?02m;hNM_n%^2}l33a1Y?6ZusMI>v(aYeeHw8s;UE10=
zMK}b{Vn-MwcQXBx6uHbMJ`v%pNug?c?kq_l2Bo_x1715%EjT_OX){sd&4K4*UYRkq
zwg-KW-2yco!Y8;zG%OsVSHa5V`wh>OXL@9&2JA1^rW|)=YyaNQ%0%oNXO9EsgCXfm
zR5#f`VH1`PZ`#c8E&#uvA2j2NJ!JT6sE@*!@=QJIH7QO?H+O9VB?sUWdb0*e_$E)o
zwlB>FKg$Q0Dfm_x04KP9O&j*qHK~woj`QB1$`1-T-pU<??cnn4=!QH^KL56x?D|_L
zM?k}-?#@o@?s196L4csJ=xDdU2sx+8aWbi-z9Vv=qf_{LF}q8>PFjpv+In{~Y(=*c
zLK=CT&>~7(!lfG0q*V~*mQtC(<&S789dl`_OOR)3*5DmZ+k$)bM2u@#m$M{o{fdpY
z$tzWoI9$wTt>4OjW&jfLtNfP2;eYd>F``<=MCk(9t2ijML%ZkWPfv*Y4XNi;*EvS@
z<u2LCBrj<Vuy_JB5{B&BDYl0K+in<t=s2M(SHZ_Z{x3Nl-Wr9l&Bwy4hO?;}$*r2y
zqwE3&Cx!R=q%#HGNu~*@;AGZs3d-;cs}n0MA$XP;26N#LJo17&ve}sf*u&g9>~C=8
zO^^&-<zIz8Di5C{*!_HBQV%S0nSQ8ZYy-CA+YFW5wTV@am*sya#Q%QF{6z&`hv!uq
zwl)l90_NRxaem;yOEr;h)yP=;O5K}mgEF?hvMcN0d=kw`|M>13ipGg#wi$iZp2YlH
z7(%DGX6uD7LMQQ;jiKfDYLB$3TO>w3EnW;V=TD?;I*Q1$9rCy2R!BDSR8D6{zLyR@
z59@69Zh{IHo^woa--7dsiM~J_^Nj8`H|UcP-uGA(UMB%_SdW$J><i=xjFyv^iDW)r
zDCUG$I}b2f<n_z*iL+!66y0~*R@s<-F4c!li4dkACT>e#R*y3=6?39Nmnbt-jE;qR
zKDv8+H^tVbv`TsGiWMA}+sEwiN!MU)716{TauD5M^nOn8MGe|=*m*GtNpaZ1r()2p
zen_i93OssSh9%)x>9=W#k$0%nnV^x1o6hZHGcW)hJw_p_hHN&z@{ov_tPvQuROW3E
zW9&?}{opsOJ|5TCq)o=JBP#9uoyVpyS_vb#rqVdQCm$e8FE`&(#Kb0SsdMnCbZxLm
zC@nr^a`k$l$2+;4^Q*T&0gJ$!RGxaF>N*>0>}2uV!LK3Zcz-d=`t^>){<qiZCkDew
z>kWCL$|TW0E0~SSwyuv%sLXm$39>z5GAj|3RGuX<TXk)!SsL{;ptP%0@-Mqug91;6
z^3t#E;Z4x*Of}ME)LrpZY*~Wl%d2#+Ed*Mgy;X7?S!potYQCebV_L6CH&%YnnacL;
z713mMd|GU)bmbuc#u=8+rfDX%qn8zmq)p<$q-H!TauJMSO6qQi^{x^U6>NFTNY(ks
zP0iut<OV))#5_aRm21D``Q+|H=?A45?!2KPPuAEo8Vz73v--vl7!A@HTxEn{<0WgE
z<nA#brVju208RwK{Z-~gEdTV){K^hee#z2J{1Li&%Hn0imS*2j^aZnWMYLWrYpn&~
zHmg=$-2Zaw8e=?>VHs-jWAuw|@$ne}7T507=U9FIj~}lM_C%%m^c?eJ--8cM?kde(
z21Rn#I}lM>N(P&k#U}Y?qQq;RXbAhC<X8zMNAUuG@ALjJX}1<bnoeu4Zu%aBWOU^h
zYzrqphM&-{^Ht)PyHq7il-EXgg{Fiv>6ck%UFDB-xDkJ<>KJmks0vp|N=b^XRB`89
z2_pz1VaYTJbhs=uy}!03*rqwm3mR@9b?e7jJ>?T^aovhsf3wp;cz1KZU`Md1c8(4i
zzcsfa1oGKxlM#c1x5?cGNA5sD_YN%XiLgN0W0a?&-mcg@dqWF%5xRSMse!ErI$dsf
zxD|cu(fqDpwq|3ou_ZzwCQ@8!Cgh-?j(N+nH>MQ3<c3(V@>Y?zSl&#nF~xo^vf&TO
zE7QQ)${B`VTD5pjZ`ouL9yxLuX)=6SD}?8Np~5ELj(Rm2=J5M=jp<_A*c#TxS9R7D
z?8hH;ayjoRVE$<dw!V{zs?0i8ef7fpbeRz$X`c1P6Y~O_`uRIAj?-j!TVBDPg%W^1
zkMx?<XCD!XaWz66AGDo#CUUsA8p*gb%?z&tgw@(kIz8j@?oVx=siX?(F!9>HB*I!X
zqx26{I$qH@vw$$Rp(lj*yH?C5sy-+fNi~&zyaj7<@2=lf$!EV>lAt?Ajrf@K1@SA@
z`Yr9!+754F3m+F_Aqt(<3?uzr%awUa524|jFI&L6tnP;DYE$CImzAoNYuiPt&_Rl+
z>-n`pu3z5q1Jw+X)8ga`ss$Y`8RYQ1)}=_jY6(!=e2i{|S>3x27)R)CUr-;zVfn=X
zMn0ny`r&To;t4L>>@@Z9(zISrkex<b)~7Ocv3Ni0P(R&ne!ZU9{4&UDjjm*T(-$%B
zM91Id_bHseQIKma4x0r{$y;p6Uqp0HCN(E`V>}((@8p$DcDb2^@rxMs+o$dNfgt9t
zO}VIg6Ytc-eSaYIrHy|$XCA3(Y8_uw>eIxhEKg&-p0$`9KL>iliKYeva>8c2LUG*y
zWFXA{Ml+L!nCp6G@5iDR3a%m$YSW@Fq7w5}#I)GCd*_3xg8MLFqu=&Lg&gD#T4@<a
zwgR#=ShHEgWg@G1kv@lGObFS$#!Zvmr_cJrFm{MkX+)!2nSrVI)4{!L3RBN%dKST0
znCbKPQmp$SlwFpvvX2;<lVM6{Vvml4IN}j;47P+%yf1<Ysr?Q-mRGxCEAGvk+@4J+
z!GK1UXKeQ}yvP%xZOZpS*Uc`{OYqk-zqws?CzIt(yGi9QW>ZU14lPRv_4RvWxD;rG
z^>S>_=3{A6Pj)XkqmNI;cp%`eXpfJ_OM+a@sZSijY<`cl*Qu|wk<S6`-xHvzL0_RM
zE&{<VVI^sAY?RELS_^FUJZK!YUNuFh^5fsqm3aUvoy@&Ydv>nRsKy^B6IUZY$o+K=
z8U7s16oGc*_)c*m>@Oo=fz`b|F3Sm2QQElY1CzJ&Bn{btNRT;{6a_TN4Px?`kWk*U
zd4;9#L0N2!_!om3Rg4_K&#Bz?+4$4O?j3ybn(FLjqJoYKYnV_FQJ488Rvi0H&*&2=
z2vS-V1j<#j%X&xrb&@Q9>VtZr19)4A-$Q~5t-uTQj}Qow?(9i8aed%1>HWS(EAz!u
zL_NE|-qEWW4I~Dq*~-vLM0ANDwl^86<ZLFYr=OTqF)cIO`#jdPueckb#h-Q9v&%Du
z)~ZSWI-Be$leAJ6KUu0#sVh#91uXLOlZRN8)ape)*ATf|`0n?Hwa0d#qLpzpVdml2
zWqVyCr|6*L4>Q(a1M5#2YWdo~hxwIgHUf)i^?|Z^v&YqHo!1ovD-6V+KU<{`-wjK*
zE#NoeYc!~)XA>bQYkzOEnXGPmSs~Ppy`lt9wV9Nic2XP;cSn7y5<CXPOxA$SYU_$h
zg2J9V%|Sknr45^UF_FY@a-h^!=tODbIu%=;MAiGK_k%02-H~V>mW5mzqzb7%P$_8B
zRVPM^Mu<|OK55=HI-GP{pkOz1UMNAJF^J`ap5|DlcvfdiQIt}a)UGMhY)vJGct<$?
ztxZ?W-PC?TCwO3xuHhJHdJjUtZ*(UypA_DZc`?oPyn<s&Ho}(O|J{SvB4Y!pQX+0<
zrLkks;5_tt{N#L)xnN~)Q1u4|tG(VvxJIP5&aBp)Agq|MB_0iX!zV84$Dlo2Mmqe8
z1luOllhwmjDNr!4ja0ymL@zNmr@a_p-hAsrZJk%y&OS6I_VKyQ$<nH8tq0BC;J9@9
z-eV?-tC6KPTM=Yfc(p}doGXN`4KpwzfE9Vk>wZ3d0wwB>6w(+}GRrhjk%Dt^V_(vq
zm59E@eay+fab3lLcdba*szeO%v7n*G`3Uy36Qw1p=k%-JcHL{pwMy2l?2RoI8gj)i
z8uN_LyGgW4$$4GJ>LI0gd3sB)tm|KmwAQ{UeopYLlmxq*A>^gjdwmgXX(Du0OZP*1
zx&5BwBoTkLXPfGhwvFFF{M;rHn1hYhGGsTtnVxgE;9^`X*_3CDynFsL#5!O%d+$U1
z^LMrP$*qCkxxgd)oaI`MF~e5~G`>paRpKMvCTUZ`XY5mF-Jm%EMv~*p9w%z)8cyNk
zP`T_WrXwnyd>lUXyT@NDznq};Q5(@hW9<O%Z3*f-_9j`=uO5qHHkT&Kz!OTfF4#B+
zj5GZ6x4@23cr9w}o#*>lJ-5Nf#5#5BcJ@H>B~mYF6ebp$uxkeW#0~{|X)F)3^`X!~
zmqylX;Kqu*N>_7%IkbA=PwS1|$8K15k()dlFQpdFt3)0o6%mSGU==ctjIW#R{MBT@
z@lhnHg3ftm(M-7!s!y@Z`<{_;0=EYnr(KYwug$wF=4h10ovI>TA4RrH=+(TU(jwJ)
z_f%-S)1$A!T=aR5Lg~Jc4sN`b`L*_=Hw$t5k3&S0Z<`)t;}iK~+4-wrn|<<vd#<T%
zJ=A6p$K2tea5knr?!GTe`tmi?Lm5XG=`h?$SHd<L<NRJ|PDsK2%>8Eo&vi+*zdkKV
z6EWbAB>z}_s7TBH^gfYu$1Udbm%|%>W6oQ=MIM0_zV$7g!5whPeErB@2e;Msv35C$
zu(-A0%mZIe#BTzgmmqnU4xq6`X0oXX*%KgMCFH$gb8Su*+LLvankm#x+pcwuwb0;x
zrodhQVwMVDo(ql3w=snen1|l0y%vus?_KM}nz!Ih4%L^#@Xwn4S%Vkz7RRF;!W;C{
z-1i=X6$B&;HgSkX3T~ak!v~*{E?g83IX5!0TIbC)q*d>9l96@fQ=>>8z!S+_#|9m;
zJCSww&tI4+{hTZfTgcQIcbmQHn_&-32WEW*XtYy=T)BUq!CIB>cOYHu9*(}@5pt_v
zaj)MMRhov{#E!TrLkdOFS`v>VN1=l}fXL8$81Mo|@1~lV@XRDY%b~(zMXP3auv7l>
zHD{7#_~Z=ro=I03Lyfz$yq_;y#S30aG}MMb_nc8$h;o~cb6Nqo^nhM@+Gt(Rna*bl
ztdg?S-^5C{FJ*e5MQxiCId9NKDSw_Ky~W1Aly=er0cKhA#UGAka+m4GD`&Eb@Tl07
z(vx+$@r`;MXRWPnWo~0z|5SA?&Ks8e1hup?lOEL?gX?RDh#AYItt;NexSB6$E*`jf
zz#b5~*6!Nlc59I9PCWb75OF!?;eMt?4MfTYKAMm7T)1K1U?b?!fD;~i)reg2flRY3
zM1vN1jf|@y!d)+lR_6}Z<rhOpM4gb+pAI&r;C9S9#I7Qc%hwaJ<q4oY;^cO4IKFSm
z_SGFxRAaTvh!1+hkmP%-67G=mNpEk=_DaY3KL4XV$z5)~gv5`N^B9lj_y$YH2fzIi
zz;n4B#6N}JWO21W+=PoJK(@DdfJ01Y%|MuS4A<_G*k9@t9IB-pbb%aJBv2gM2G<xx
z(`?<u&PiD_(OW`B9k^)VsoQrNMy#1JFvV;2{4*)XaL}@Y0cDxa*8BOrxz6+RQy*HI
zNZQ2bT%aFB;t?7k5XT-cThyOsBcQ|!^y~4lmOQ&&#DQ7hO9OH{kbxzAE^Cb10$Y8*
zC3B(aDqz#6dV;S8&Gx!0Voy%oyoGowfxX;==3wnk#J~yW+2LfthQsWxus^zL*lfA$
zlggESXrve>H+!~XSG@tUej#3TY5F%dC3ehF1Y<c(Kp4tR+rI_|Vu^eU-0wtgZ?E~t
zI&TRtb&`4<**A%sXrMjx&tLRc|NQ$TEw0U>#`+FkSbsKvF{~!sA$_iisDdS&{wqV~
zd?Wo%@xZQ+czGpqeQyJZ>EJ8Q`k3Wok$>7~MeXvn!{2uAl}fCNfTV~g%!6NU8Ljy~
zVe4~_fXyn9#5x4;DmbIB=Okq~zo1E(O>O6m5JA_@Lrb_1@a5Dv06n;_pd4yFRXvs8
zEC>yN?p*hnetpC|^-%sKoPFmPP?|~oM%ly83W3A$Sqbh4EMGdZ;lQ1LDuqpp-<{;e
zvHq7}-^l0CvMY^RRZA95HG3FFm7_PoEOIooPTJCy;-XZ<;C*kquKJ4OFkQbKL8;1T
zEY|P6c0+lH@$Cfdc3|yU-mJ%~qkFYZJ~U95sl|7?ou3e%9}g=<<<c=xU9^X9DcXAh
zEFPlB<DfL(G82K6%wd`b_>BO!wVGDQrnGEGBgeeau#{7woeA{&2X^Liq&u>i9@|i_
zk4zGPG}CSC7Ui>>o1eBBG3>oG6`S!c^|e!ii*3s#c@h}i%{6-JnPVq>w56AgQ=!H@
zEsQW3An1DKVWlX#F*eJ1Qa`L;C`ju6PDN7uIhRzS)**uAUKI=;OA=L<<X{2y!;4Ht
z89L@3`G?42e)G6_vzDzYE}8hA0B3edY{u3QQ>p_>c%ddZUAjME(LHj)0~f5g-YA@<
z;T)J1&c_fD+KqpEn{k;bMthm?K1dg3<@5ABmc&WpWWf5wo~=ZXx1E*c-3lI<igWEm
zh0Kn-JRbJqjoZ_&p*uzN+EBOyJqoha!ZUreSA@~Ft=z%-SD37_JTP>-I)_d@y0()M
zOt5Rf_wXPAkTj8!9w14(84_$u{dNvo8yPuKFn$m{F|A$1Rh0zYi?DLtcOD%qtDZ+!
zQ^=YAev(<eYt4t&z%^kZ`Jlon%Cf=8I+c=>;fWRo1}Py)iJ3safzXB@BX0$^FnB}G
zmu&_leeF=^EfF~nZQxH<>AH4kYi5_<NkP50JigKCoEOA~^KgaM{^aQ~PmFb$)@d)M
zfU(;{xfkO~yrl5!bkcQeC|%~0_~+Jjc{0?y?Sz_QWeqzT7s+?rvm`Qz+dZsai!oQa
zgis^tV9A4W>*nKmH7SXi_~4FS1@aZ{4s6#Z-7Wa>q(+^J5BZe5QY0IGb=H!FceMaw
zqXV0-CZ@$^dUX+xQbclr9d{L&3+67oGzj+8k&ags^ipmQ3ih?!%mW<!GGmOon1&YS
znHj_{7=5&crsi!B$$T3m3>nk34C)2(_p(r9tnwpfJqGxNOF~6}N2P7{ks&9rb0F7A
zpY_@<KYdBQ5g)%fE2PbEkB5&3=ad`){)s;^y(x-m7GWTLy0!hRa?Tn|nRM^4DVUmJ
znkou(fpgcRy)A1HB|F+Dmc?uNd+*8co*m|;36=wZhEM6=M2N&6$orG`=l2qrWXr48
zJJ>>@Q^Oz_^nDGu8`?#@R6AxMD9tpRAFL(odv~T9pe4<&!HhJ{tU$>9Lx;{<E3x0@
z787$BgpqhYZ&oPHdiNzihTd}tB~dhOd!~PeWQ4JF?K(0e^qz%MA!%Ct-ss2C2ZjT;
z&B_*INf?FtCSgH@g^?x$2jyCozLk^QDt>#GAS)vS|3-)ABZm9vF=}HmdfC2<*&Xuw
zCPO|=6ZKPdAiO<yLZ+`=75Y3JSYtjiF#Pp*^Vut-+tlSNX4w3u=IQcocjhP!Ce-%9
z_1VFqAWzs_$}vMBYV!cQ3;?{0UAY30_YbsB4!Yj$a2K@x%P~o~B(8LL1Fkmc;GAB=
z8-WrTF&D&a*B?O$>_s=)X{Y_$)`Zq->nji3Ao^UmW*&znj5)rGQIQG3x$^=6N8z~U
zU`5$7Nv<(>3L%fs@eu~jU~P6Uk0BRwE`GyV=TbFMLDHu!H`L^nt%zA8r9>Gx=sta#
z%ugsCTYR^=EL>W#`cU-TPnv%v3I@oz8Th$+z#6bZ_+3Qj<kq&kC#i`oYl_aATo)6c
zBXPdwpRRy1Oh?`FnPAAPvslFy1=~>{JIlP}5}aZ#5nUEB&`<ojadRqJ$?5>gu$755
zpDdlcj@cud+M4Fr;eLK%n@Xo<FrVd^T;M>3@wt7j%L(MKU8g=6Uh^zkW6aIYc5cvq
z$xjobv%Het9*{y!fcDg%uw;9_J+V;8S^u%sU|cl)1)@~Y1+;|Do=Pu62BYEBhlqn;
z^4@KocL-HquV}U6b6SD`Y^eMOLJm2=HC6@R6roDHq|gf}-iYQvL&|5Dgiub176`0a
zok4BB^G2&;8RNs8i`ig&>!jAO?C6m9NsjMgoh@l%dpfb~^PdSNnPVKrDLUb7RnL`@
zZbCpKl5efQ{=Kd34{|NO5uh!RgGdo6;78ckit&vswNzB8P@jIMbqC)-obrec#NXgw
zI_!teN7r@^)KYa_Ffs7Ix~9&MJWn({<olp`(!vHVCxKSDLh5T(CDA;jFF<>X8Z9SY
zZEe{UPsi`$uwzxKU7=U?;@0MB=cW!)U`8F#WnmVVpavy~#>aZ-JSPYk>{G8aDn-5d
zRn|w7Bx2vUefu5n?_2o#OygCwQkX3llPr>PN+npBF-u31h6p}N<>CfjXhV>IYZQT#
zwc}+iNhEh$7-#lYz!iRW(kK0_Y+PRvPQ77_#9XPO^xiNKRJ@>KLrV>0mu;6^IJ*zw
zQ_?T8=cItGyU*6Ml%^kJXj2%~m~K4vfti*}(-skTydu1QOZGNSvTsl<CmJ^m19h1*
ze0|KIe?7`i;c4bx$nMcmT*$Uw(VWFt!cKP8yD6+Swi4CI5X1^1g^U<ugMm6*lmM$y
z_D1Q)X=0?1T20F8j`8G#l^@ze|7G_BLdd$u)J2i&FBhl{?-m>dJl9wl8CL$&<Rsb$
zRF6nBlcghzX2May6S?jSZWR!eBQ7*J43FQhdW>5EvvRU?kLO{|n*6Dgwy8sh_s~aU
ziQ8^~B=%=<zU51^fci(Ratx)R-2_lUX>pbrLI59NJMN<pr3Hg3bITlNdE<aKD~%+6
z-mmX<$<~)5n!YH2kMunc!tgDLH(Gd)aL`w$DQT+3<s-v{qCm^|iZP(*UrWyouIDQ-
zr_j(=*vvlS3OQ6~9EhyrXF%Z&eC85^B^Z<EyN16u;G31^Lj@SL7V{Gn<9&>!na>@$
zNYkZ+xMO8$sMT;f;mg2-jm^O$NN%|7XMd-uCb&U(_nz@|q_2S_Geb;J@_f5xUBjfX
z8Ac5^ki=U$CIY!?;c+O_f8-YFxm|W*i`CyC+639RurDKa8#GmQARCt}nYrIQ@J~Bb
zmG)mKOE_ZpwwCs{?_DeOWfPpHkLsaFnoH^7z(liIEm)MRFM~}!4Ge}*>hK0&dR+88
zaE!Q?gLDrqg6Rec?-ucy{wctL&R0@Edc^)p@GQgfJV4FMsNH7;e;Zv~$F022&;H)U
z?X-*Cw>E_`=f{>=k(aufa-pHUVdSf3EN3b<N>~hOpai==I(V>W!!S+9Gg7A^Sw8A(
zSuku?>TQ3-7sE95OjZP~xF90QK>93sK56((g=+mB_k|0E2abNvcO$^BWr5EGb_}3^
zGSIEfv|u;D^B$=<|62OzDR7QUvIlherTh_C7iHk;YXD|X!R?sQgfUx#H{?FOs`$Gq
z7fL&?=~)KJTNN*6#rGl(U9iIw)iMHun?3+7`(babdD!OG=U@|RYz=FbK*Ny@#-swX
z3o4ns75Cso+53?cc|)f``+x`gsFD0G7;nB}Rg*epSXRG|sCte?Rl=LA8{Ls15c>_y
z`*`6b%6%Ri_IETwRc5cv2JRNJ#?TW-?gORyLJs1F<9nKf<P;Tp1|tqA%Qfu7I*+g%
zF@9;NMQ&=$p5T-Q;hD4j@k0RX%|tzk96sDK>sTzdC_k0l;DM*t*h1W?W*Ht{GS3Bt
zvPri2KG@d*B;DdsoTP!MfV*G!INVxjC0rEi6#+3Uc<4$WUQ<80BT7#dg-V=e2MyUS
z!lka4;RKCUar!$FtTPILqGr4%s7^*4o_qpO^rl1UpHt9$#t;mnp8^tyhCEnbY#oD4
z+qv=kTw+KR7KF^uZAUrL8z{x!4m7OL40c|&{<FK=3?G$ryaFbL=l6aUw>$Aanhll;
zy|wEx|64+0o&7h%<P;;Ak#jXws1Y$njjB5_qjtcJj3oGz%iXxmJ%3(~Y+-N766=}S
zJQ+)PugpqN)(;*`adQeW<@)fNZ_8ZlVXAqyvqkfrnmzD6re6da3Vvd5&YdqI&(~C;
zw7oHQ|NcwNc-Wc1?&G@>vabm)Jf6qOm>PEogr&|TRNx?h*V3sp6%%b8IqeBRCZo)E
zu_nuCHd+HzIwCuDJH+YawT$l_uBr$1lDmF_Ga&#uSs);)hY=c)pTM{r*STA-1_iaJ
zFmCy7$2oLG`0nGB05A9ZJ<{f@NSu*MP62xHo>2=}>xvaHUdT#DqkCwqQ|3zsW$?b2
z+wcdQlTR%Y3^5s;5th0#BdumUaIUVTEs2BP;0<EOabJf8Aw7DU%`urn^kAntLqvFY
zdYq!TPX3$^EB(hKAploMw?j`_`@2M|2(MHr>bT+CvxSHLEcgSPoiwGl&kYu;417TX
zNJ96w3!OgbzfRib_H#+buZB9BW`591-@>MeWo28B7ExIl<~N{AOKkoWf6Aw!9l!Xy
z;l%VXWy>lKjpMEfFXha(*{gPTcD+z70@fr`Z04h`Jn|`iz~9Wl4;)hcE_bcByvOF4
zdLiqcmYPULp%2q(=$q&tu)d?>#LnmY1UR3Gx5TSdPA1Z&!Ed5SYEe`?c4itb(`*@A
zk<rJDg3_OEZkG51$;#Mt+=FB0{lvJ9QNajCf11teU<HQQz)P|iyde->##S<O#)jNV
zDtAr~E=C{YjM_VceG`3cVw-2PL`r>RR_UThR2CBbha!uxcALDePe)QWUgCegIIJ6`
z1(%&0geNfY7Rc!61SlpeAiR6VtO^OlHfRwl!QCj3m@JUeNk{sG4qq6QrjdMZHVSpg
zz0zm;ExTcOHJ~j!{^@&6boyS95~r62jf?;B-3my0_Ex27b8P8e#NyXN@4QPj^wih^
zn^lm)r~%5dsIaTElb}c*hauZM)B@KOCZ~&RZOb<M^4)(uN=-iLZ@h|A%pRo3N=vs|
zHv7o2Y*MXs>MySZFe=wCEhawkXO)ky9F+1p=JBvQJhQyt!LFVApg7H08ZCfYz7~}c
zt+k5r#$oX}2!pErBC*s{g~(t!Z==UM@RZ(MvOlIGxi^0ym88dF?t)1+Cp7fx+~Zjo
zL6@8#(n<uEKYbx?Fn?nZa_l2zwI@YSZY#;Hoy5K;pv1M@<vcjQA4l3vpog=cSH_=A
zMupi(EePb}`4+=1iZ>X(DM}*JlkYOkT5<>2scaqR-Qg*{w*z&^R3bg~7pF5i#Sk~s
zd6%L**Ix}Ja+2LaWN{;$smP_RLKxfW4Xry@`V}`u`@iL^{^p)si8*fp)hzXjS+xZF
z$mmkc;O397uGAl*xtX9E89M7@wyG?rF#IQUbER)<sx0lAE9=hKj`zDZH~?oVh`%Mb
z{useB312jiM{>8F!T8(#0ht%qbrvOTAIZ9f(rZ#z!>yjE)@KQ(KNNK7NCa8Aeqvf)
zp|UId+YEm&@~t;7_m`x7r1i(dS}z|z*C5Vc)<5$Hxmx&cv^&#!d}nj`V?<?i!lhSs
zSL|N$+ue6<s2m!v$z8wkD=@hh7imQJhWKv@rc+`5@d%_G<xO~HEYOk3i9Tb_iOvDl
z>XK3;-$xj^|FtS{LYVG6?6ry%S%sJG9oQeXGgm(&HvAp(hJ_#WgOe0jl|V|+(_bcA
zyuRLAZu8H5GJ>)WU+K(8P2r;i7GxH`x!q45*Kq90x2M%PTxKN${_l%Q_(V0S#m8Fq
z+b@)hUX;8!0$gsZZo4AEMclo@`p55PfY0ZUX$Z0#F(5j5_UvgA2vgw#Q}SbOIh|fp
z?#X$6u`nMJuPwz<E9PpT*k=)ZgxhMhDEosj{+gg>z=LY<UTy;^0V#!QJc(J@F#a>^
ziO)D8E+v!1R{2SGs(in@xT%$!B)ary3=;JRDA-j#TLiWV-JBw~>Af4V0P^pdi(R;H
z()=MrzNqeI1598|6cqueAxX)2r)9*Rb1_a(_hxQx$d8ac14OZMK>e6{LmwXVA6Lq<
zoF6>jJBx6*{m$Re$lXXw27CjfVoNIr8+Uu0aMzV<ZLELfSX<k#`qp!QV9c_cxY*A^
zKHy@OxmNQCF%VdDkW8HJ&~+I>Plu*kz|jjAa>tk5oSL#!OAKQEyXu_H126e2mkP(!
zHQkr-4wTJD)8Y((iMIl(N%?ME_ztoW)~{oU;{o7;<8n0vdCVQBp<M+LN5ZZnSHdpg
zSARUEVrZ)j&l@p@We{#5^bQwzKP>oRulw*4QD8(mdzIb&Dx!u*PfLfk_CIW16^PSh
zrDUGGI|xaCo<{AYsY%W%)^VamcIaC?G%g$Vf&XXTCSZ)^o3pW)*HkIE_>C#Ke_2&d
z(6alRD#>M>{5G)D->djtSaX*@Q9Y*Yz!f;d<`jpYeE+BP)wx2G^bk*ZlUNq%<Nn}>
znoK@2JBUgrUri~XB~~YM&1kqrr9JWrc3CNjpU;w4!42p?ZW*tbPg#?ClOUB=P2RI0
zoh?S4_i2tkZxZH%_hV3F33*_FCBvx^`G?6VdXk|l!35Q{ng0WOnkS<JPfl@KUVSO(
zSaYmILLdGQi`90esl>6hIeYB_0A?Cy4!4kP>w$IVgT!V&KEiTkuT9DNoUHkIw?D#B
zvS=xbWIKJjJ@)K(*u$4J6N@oja~O(49AO$c7WdW(sEMMjs2`fv4Evz*bKx@n;mPN<
zz6+N|b*%Y}cqvA<J|am|9At-mU+z;3X;!v&CLYya`Ylfm0cRj)Th*hma{%vaw;y(%
zh-IH_$?7$Gn>T+%Neb$tC@Qo}P=47bC<f=a1X^nBuLaY<#hs08JHeUmU`1aMZsRvw
z>DePFxz&%0sW{ZbQehY6c4DxfRPZ6lu4=5vt~!74z2FaC;B*?7uWL(u3sJHmPI%q6
zESp(9Q+yK&?L~xMm*g#qyFV@t<Be`Ib<U7(__O-^RL~onTmS+&asC-PKu$DUVXE%b
zumeI&pU=reDRa<nco!U;F87Irp{!caICCq0;^5Frbd_$+t29u4V8!E*miOU8Z=h&b
z3yq1cA0GpHs}Ue36Wbe;%@bp@z!qc6gG#JEP4DQkIkvM3(bB$@Dv}EL?$%2n;Q^<Y
zze7|L;5o_)r$?+6AAG*W>5qmq!$|5J-Shgo961usz;!F8)tKacxYaGr$ERre{*1Es
z9qm$&tpMra_m~PyXJlR6Vk{5ATLYF}+YZCqS+c+Ua=rYi^h1p*`^h(iw-4y;^~=q+
zfgD=qB2D<@2ZWj$N){Eu_A*v6A77ctxFjlN7~q%lLZnj;pC*t(*hhNmwBt9~PnNBb
zixB)Q@mow4VDGSy22tMFdC2l<#XX${746pz#OZI0Y$h9w3aBPcenJR|%Bxa7QDFJg
zK0Oe_6ZfiFNlK<4mYLn)bCFZ1V$;5=bm<jd#d;o5diu?%b0pqQJ9_k+JLj+#wIp(L
zPSlmW(q76t$IqmhYhvXi7-Jkt@2`c|M9P{n^c0~dQMN6TR9neOihNJ3VmfYb5C5ZG
z`x%%NOIi;q#nMFj=ja4{X6+&N9-I$zC~&m%-)cMViL;n9W{}7kkhGK>F?n|DA0GlP
zUE&4IMgfuxoz&VFf&+W^jL|lX#KE`tMVJO_S!XB#gJA8|ln*VGeka^Bi@}PodY-EV
zv7*2ODiPgx<HQaLrpG}g`hptNfpGDKNXCe;{ijBtvDl)a)aw-n?HTy&w`~f$bJLX{
zg!JPaPA|3IxLBO=?jrx|A1*&wx#T?dCLGBa5nkueP6t590!cTszoqmgj76AYba{6p
zJr&#20F5$7yfe#q{+$f>d{#xI*FsJ(ci<5r%sxW_5UB+UFvU3AM`#x}v4|d}2Y1Pd
z-(%?V_Gvdet`pYhn=~!RQ6g7>WY7Kq>7V$*Kc`isiKjq?%^2p>Y|(d&-+5CM=j(ax
zM$&3D&$6>0@bh+GT{pyO6@6F{KEm#4smD&9`U-NU>;LjW?@GU|&hs<>cBVrA(Fg8S
zJSF_nw8Q+P1^JiZ{(i~wGQQ32Z~14o^oN!Bcd=Z&e2oRaU00Z^`HU?72g3Yoa4I~R
zpt}82R{HOQx8XS&<$!|2Pq@Rs%OW5MFS1{Q@1NrE--r5Z#^24K<9VCcE2TpJxOvDp
zfZAl$g1-}FpIv&!nEq`ZhL-0D?>~+a`O!LFuG0J*bn5_QwYErf79#!QD}P@kL*q;4
z%<iVY{)}4w`;rV@!Ox+jPU+XD|1!i+FC_x;cgOfRg#RP{`TP8Sdh_l7j{S4l{2Vv`
zmHGdbw*L9$|6=XGSo_a$^IskN&t>%&NBOUg{a45SbKLy@t78GxF=G6T|HAgQLm!mE
zy#@ezpIld-MYvu2k!jx2xP5Z+$$Y5g@T}~i2*vZ+C1{uVS>EQK{Gs@K0L)|8K1F8)
zl44bhND2YlX}zyf`kp{MrE4`=4~~w#JeJ(;p^e6W)vE-q%z;u>|Aj~Da+X_-(4sqp
z;Qr0?Biz0}aB|B`{KH_C%RKsUHeZH_l1>p~c<IveK_8o^Zr;!GQh44yL3N%0rr%&c
z;<j=mlu;6}k^FZuuAOr}0lM9)ROGsxds>+I%N+ev1gB5=FAwtnbcyuNSKPT-cIMFp
TdHBO$@PAKbl%-1_8UFr15!>LM

literal 0
HcmV?d00001

diff --git a/megatron/core/extensions/transformer_engine.py b/megatron/core/extensions/transformer_engine.py
index e95409e08e9..e807ee54fbf 100644
--- a/megatron/core/extensions/transformer_engine.py
+++ b/megatron/core/extensions/transformer_engine.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 import dataclasses
 import inspect
@@ -299,6 +299,7 @@ def __init__(
                 extra_kwargs["delay_wgrad_compute"] = self.config.delay_wgrad_compute
             else:
                 raise RuntimeError("Only TE with version >=2.3.0 supports delay_wgrad_compute now.")
+
         if (
             self.config.tp_comm_overlap
             and tp_comm_buffer_name
@@ -2116,3 +2117,12 @@ def set_save_original_input(module):
             "set_save_original_input is only needed on transformer-engine modules that save "
             "quantized tensors by default. It needs transformer-engine>=2.6.0dev0."
         )
+
+
+try:
+    # pylint: disable=unused-import
+    from transformer_engine.pytorch import cpu_offload
+    from transformer_engine.pytorch.float8_tensor import Float8Tensor
+except ImportError:
+    Float8Tensor = None
+    cpu_offload = None
diff --git a/megatron/core/models/common/model_chunk_schedule_plan.py b/megatron/core/models/common/model_chunk_schedule_plan.py
index d501c11a0a9..74b9a90764d 100644
--- a/megatron/core/models/common/model_chunk_schedule_plan.py
+++ b/megatron/core/models/common/model_chunk_schedule_plan.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 from contextlib import nullcontext
 from typing import Optional
@@ -8,6 +8,9 @@
 
 from megatron.core.enums import Fp8Recipe
 from megatron.core.fp8_utils import get_fp8_context
+from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
+    fine_grained_offloading_set_last_layer,
+)
 from megatron.core.pipeline_parallel.utils import (
     AbstractSchedulePlan,
     NoopScheduleNode,
@@ -450,6 +453,8 @@ def run(
             f_layer = f_schedule_plan.get_layer(i)
             b_layer = b_schedule_plan.get_layer(b_num_layers - 1 - i)
             torch.cuda.nvtx.range_push(f"layer_{i}f-layer_{b_num_layers - 1 - i}b")
+            if f_layer.layer.config.fine_grained_activation_offloading:
+                fine_grained_offloading_set_last_layer(i == f_num_layers - 1)
             f_input, b_grad = TransformerLayerSchedulePlan.run(
                 f_layer,
                 b_layer,
@@ -472,6 +477,8 @@ def run(
         for i in range(overlapped_layers, f_num_layers):
             f_layer = f_schedule_plan.get_layer(i)
             torch.cuda.nvtx.range_push(f"layer_{i}f")
+            if f_layer.layer.config.fine_grained_activation_offloading:
+                fine_grained_offloading_set_last_layer(i == f_num_layers - 1)
             f_input, _ = TransformerLayerSchedulePlan.run(f_layer, None, f_input=f_input)
             torch.cuda.nvtx.range_pop()
 
diff --git a/megatron/core/models/gpt/fine_grained_callables.py b/megatron/core/models/gpt/fine_grained_callables.py
index fd1cc3d33c6..786a1b850dd 100644
--- a/megatron/core/models/gpt/fine_grained_callables.py
+++ b/megatron/core/models/gpt/fine_grained_callables.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 import weakref
 from contextlib import nullcontext
@@ -8,6 +8,11 @@
 import torch
 
 from megatron.core import tensor_parallel
+from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
+    fine_grained_offloading_group_commit,
+    fine_grained_offloading_group_start,
+    get_fine_grained_offloading_context,
+)
 from megatron.core.pipeline_parallel.utils import ScheduleNode, make_viewless
 from megatron.core.transformer.module import float16_to_fp32
 from megatron.core.transformer.moe.moe_layer import MoELayer
@@ -350,13 +355,17 @@ def submodule_post_attn_forward(node: ScheduleNode, hidden_states: torch.Tensor)
         Run forward pass for computations between attention and dispatch:
             pre mlp layernorm->router->dispatch preprocess
         """
+        if layer.offload_mlp_norm:
+            hidden_states = fine_grained_offloading_group_start(hidden_states, name="mlp_norm")
         if layer.recompute_pre_mlp_layernorm:
             layer.pre_mlp_norm_checkpoint = tensor_parallel.CheckpointWithoutOutput()
-            pre_mlp_layernorm_output = layer.pre_mlp_norm_checkpoint.checkpoint(
-                layer.pre_mlp_layernorm, hidden_states
-            )
+            with get_fine_grained_offloading_context(layer.offload_mlp_norm):
+                pre_mlp_layernorm_output = layer.pre_mlp_norm_checkpoint.checkpoint(
+                    layer.pre_mlp_layernorm, hidden_states
+                )
         else:
-            pre_mlp_layernorm_output = layer.pre_mlp_layernorm(hidden_states)
+            with get_fine_grained_offloading_context(layer.offload_mlp_norm):
+                pre_mlp_layernorm_output = layer.pre_mlp_layernorm(hidden_states)
 
         local_tokens, probs, _ = layer.mlp.router_and_preprocess(pre_mlp_layernorm_output)
 
@@ -437,6 +446,10 @@ def submodule_combine_forward(
             hidden_states = layer.mlp_bda(layer.training, layer.config.bias_dropout_fusion)(
                 mlp_output_with_bias, residual, layer.hidden_dropout
             )
+        if layer.offload_mlp_norm:
+            (hidden_states,) = fine_grained_offloading_group_commit(
+                hidden_states, name="mlp_norm", forced_released_tensors=[residual]
+            )
         output = make_viewless_tensor(
             inp=hidden_states, requires_grad=hidden_states.requires_grad, keep_graph=True
         )
diff --git a/megatron/core/models/gpt/gpt_model.py b/megatron/core/models/gpt/gpt_model.py
index 654827dc6fb..209fdc9530d 100644
--- a/megatron/core/models/gpt/gpt_model.py
+++ b/megatron/core/models/gpt/gpt_model.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 from collections import OrderedDict
 from typing import Dict, Literal, Optional
@@ -18,6 +18,9 @@
 )
 from megatron.core.models.common.language_module.language_module import LanguageModule
 from megatron.core.packed_seq_params import PackedSeqParams
+from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
+    fine_grained_offloading_init_chunk_handler,
+)
 from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.quantization.utils import get_quant_config_or_none
 from megatron.core.tensor_parallel import gather_from_sequence_parallel_region
@@ -117,6 +120,7 @@ def __init__(
         self.parallel_output = parallel_output
         self.share_embeddings_and_output_weights = share_embeddings_and_output_weights
         self.vp_stage = vp_stage
+        self.disable_param_offloading = True
 
         if hasattr(self.config, 'position_embedding_type'):
             self.position_embedding_type = self.config.position_embedding_type
@@ -410,6 +414,24 @@ def _preprocess(
 
         return preproc_output
 
+    def preprocess_for_fine_grained_offloading(self):
+        """Preprocess for fine-grained activation offloading."""
+        fine_grained_offloading_init_chunk_handler(
+            vp_size=self.config.virtual_pipeline_model_parallel_size,
+            vp_stage=self.vp_stage,
+            min_offloaded_tensor_size=self.config.min_offloaded_tensor_size,
+        )
+        if self.disable_param_offloading:
+            for param in self.decoder.parameters():
+                param.offloading_activation = False
+            if self.mtp_process:
+                for param in self.mtp.parameters():
+                    param.offloading_activation = False
+            if self.post_process:
+                for param in self.output_layer.parameters():
+                    param.offloading_activation = False
+            self.disable_param_offloading = False
+
     def forward(
         self,
         input_ids: Tensor,
@@ -435,6 +457,8 @@ def forward(
             runtime_gather_output (bool): Gather output at runtime. Default None means
                 `parallel_output` arg in the constructor will be used.
         """
+        if self.config.fine_grained_activation_offloading:
+            self.preprocess_for_fine_grained_offloading()
 
         inference_context = deprecate_inference_params(inference_context, inference_params)
 
@@ -701,6 +725,9 @@ def build_schedule_plan(
             TransformerModelChunkSchedulePlan: The model chunk schedule plan.
         """
 
+        if self.config.fine_grained_activation_offloading:
+            self.preprocess_for_fine_grained_offloading()
+
         from ..common.model_chunk_schedule_plan import TransformerModelChunkSchedulePlan
 
         return TransformerModelChunkSchedulePlan(
diff --git a/megatron/core/pipeline_parallel/fine_grained_activation_offload.py b/megatron/core/pipeline_parallel/fine_grained_activation_offload.py
new file mode 100644
index 00000000000..1e280a09d35
--- /dev/null
+++ b/megatron/core/pipeline_parallel/fine_grained_activation_offload.py
@@ -0,0 +1,609 @@
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+import warnings
+from collections import deque
+from contextlib import nullcontext
+from typing import Any
+
+import torch
+
+# CPU offload implementation for pipeline parallelism
+DEBUG = False
+DEBUG_RANK = 0
+
+
+def debug_rank(message):
+    """Print debug message for a specific rank when DEBUG is enabled."""
+    # pylint: disable=bad-builtin
+    if not DEBUG:
+        return
+    assert torch.distributed.is_initialized()
+    if torch.distributed.get_rank() == DEBUG_RANK:
+        print(message)
+
+
+def set_ideal_affinity_for_current_gpu():
+    """Set CPU affinity for the current GPU to optimize host-device transfers."""
+    import uuid
+
+    try:
+        import cuda.bindings.driver as cuda_driver
+        import cuda.bindings.runtime as cuda_runtime
+    except ImportError:
+        try:
+            import cuda.cuda as cuda_driver
+            import cuda.cudart as cuda_runtime
+        except ImportError:
+            # print("cuda-python may not be installed, skipping GPU affinity setting")
+            warnings.warn("cuda-python may not be installed, skipping GPU affinity setting")
+            return
+    try:
+        import pynvml
+    except ImportError:
+        warnings.warn("pynvml is not installed, skipping GPU affinity setting")
+        return
+
+    # Get current CUDA device ID
+    err, device_id = cuda_runtime.cudaGetDevice()
+    assert err == cuda_runtime.cudaError_t.cudaSuccess
+    # Get device UUID
+    err, device_uuid = cuda_driver.cuDeviceGetUuid(device_id)
+    assert err == cuda_driver.CUresult.CUDA_SUCCESS
+    # Set CPU affinity based on GPU's NUMA node
+    pynvml.nvmlInit()
+    handle = pynvml.nvmlDeviceGetHandleByUUID("GPU-" + str(uuid.UUID(bytes=device_uuid.bytes)))
+    pynvml.nvmlDeviceSetCpuAffinity(handle)
+
+
+class PipelineOffloadManager:
+    """
+    Singleton manager for coordinating activation offloading across pipeline stages.
+    Manages chunk handlers, synchronizes GPU-CPU transfers,
+    and handles virtual pipeline parallelism.
+    """
+
+    OFFLOAD_MGR = None
+
+    @classmethod
+    def get_instance(cls):
+        """Get the singleton instance of PipelineOffloadManager."""
+        if cls.OFFLOAD_MGR is None:
+            cls.OFFLOAD_MGR = PipelineOffloadManager()
+        return cls.OFFLOAD_MGR
+
+    def __init__(self):
+        """Initialize the manager with queues and dedicated CUDA streams."""
+        # Queue to store chunk handlers for backward pass
+        self._queue = deque()
+        # Cache chunk handlers for each virtual pipeline stage
+        self._stages = None
+        # allocate streams and events for synchronization
+        self._d2h_stream = torch.cuda.Stream()
+        self._h2d_stream = torch.cuda.Stream()
+        self.reset()
+
+    @property
+    def d2h_stream(self):
+        """Get the device-to-host (GPU to CPU) transfer stream."""
+        return self._d2h_stream
+
+    @property
+    def h2d_stream(self):
+        """Get the host-to-device (CPU to GPU) transfer stream."""
+        return self._h2d_stream
+
+    def reset(self):
+        """Reset manager state for a new training iteration."""
+        set_ideal_affinity_for_current_gpu()
+        self._inside_context = False
+        self._cur_forward_chunk = None
+        self._cur_backward_chunk = None
+        # Track the first microbatch of the last virtual pipeline stage
+        self._is_first_last_vpp_chunk = True
+
+    def flush(self):
+        """Flush all staged chunks to the backward queue in reverse order."""
+        # Ensure all virtual pipeline stages have the same number of chunks
+        if len(self._stages[0]) == len(self._stages[-1]):
+            lens = [len(e) for e in self._stages]
+            assert min(lens) == max(lens), "All stages must have same chunk count"
+            # Clear the last stage and push all chunks in reverse order for backward
+            self._stages[-1] = []
+            for chunks in reversed(self._stages):
+                for chunk in chunks:
+                    self.push(chunk)
+            # Clear all stages after flushing
+            for i in range(self._vpp):
+                self._stages[i] = []
+
+    def push(self, handler):
+        """Add a chunk handler to the backward queue."""
+        debug_rank(f"pushing handler {handler}")
+        self._queue.append(handler)
+
+    def pop(self):
+        """Remove and set the next non-empty chunk as the current backward chunk."""
+        assert self.size(), "Cannot pop from empty queue"
+        while self._queue:
+            self._cur_backward_chunk = self._queue.popleft()
+            if not self._cur_backward_chunk.is_empty_chunk():
+                break
+        debug_rank(f"popping handler {self._cur_backward_chunk}")
+
+    def front(self):
+        """Get the first non-empty chunk handler without removing it from the queue."""
+        if not self.size():
+            return None
+        for chunk_handler in self._queue:
+            if not chunk_handler.is_empty_chunk():
+                return chunk_handler
+        return None
+
+    def size(self):
+        """Return the number of chunk handlers in the queue."""
+        return len(self._queue)
+
+    def init_model_chunk_offload_handler(
+        self, vp_size, vp_stage, min_offloaded_tensor_size=1024 * 1024
+    ):
+        """
+        Initialize a chunk offload handler for a model chunk (microbatch).
+
+        Args:
+            vp_size: Virtual pipeline size
+            vp_stage: Virtual pipeline stage index (None means stage 0)
+            min_offloaded_tensor_size: Minimum tensor size (in elements) to offload
+        """
+        if self._stages is None:
+            vp_size = 1 if vp_size is None else vp_size
+            self._vpp = vp_size
+            self._stages = [[] for _ in range(vp_size)]
+
+        if vp_stage is None:
+            cur_vpp_rank = 0
+        else:
+            cur_vpp_rank = vp_stage
+
+        is_first_last_vpp_chunk = self._is_first_last_vpp_chunk
+        # Flush staged chunks when reaching the last virtual pipeline stage
+        if cur_vpp_rank == self._vpp - 1:
+            self.flush()
+        # Determine if this is the first microbatch of the last virtual pipeline stage
+        is_first_last_vpp_chunk = is_first_last_vpp_chunk and (cur_vpp_rank == self._vpp - 1)
+
+        cur_chunk = ChunkOffloadHandler(is_first_last_vpp_chunk, min_offloaded_tensor_size)
+        self._stages[cur_vpp_rank].append(cur_chunk)
+        # For the last stage, push immediately and flush
+        if cur_vpp_rank == self._vpp - 1:
+            self._is_first_last_vpp_chunk = False
+            self.push(cur_chunk)
+            self.flush()
+        self._cur_forward_chunk = cur_chunk
+        cur_chunk.vpp_rank = cur_vpp_rank
+
+    def set_last_layer(self, is_last_layer):
+        """Mark whether the current forward chunk is processing the last layer."""
+        self._cur_forward_chunk.is_last_layer = is_last_layer
+
+    def cur_forward_chunk(self):
+        """Get the current forward pass chunk handler."""
+        return self._cur_forward_chunk
+
+    def cur_backward_chunk(self):
+        """Get the current backward pass chunk handler."""
+        return self._cur_backward_chunk
+
+    def __enter__(self):
+        """Enter context manager to enable activation offloading hooks."""
+        debug_rank("----__enter__")
+        from megatron.core.extensions.transformer_engine import cpu_offload
+
+        if cpu_offload is not None:
+            cpu_offload.CPUOffloadEnabled = True
+        self.inside_context = True
+
+        torch._C._autograd._push_saved_tensors_default_hooks(
+            self.on_save_for_backward, self.on_get_saved_tensor
+        )
+
+    def __exit__(self, *args: Any):
+        """Exit context manager and restore original tensor saving behavior."""
+        debug_rank("----__exit__")
+        from megatron.core.extensions.transformer_engine import cpu_offload
+
+        if cpu_offload is not None:
+            cpu_offload.CPUOffloadEnabled = False
+        self.inside_context = False
+        torch._C._autograd._pop_saved_tensors_default_hooks()
+
+    def on_save_for_backward(self, tensor: torch.Tensor) -> Any:
+        """
+        Hook called when autograd saves a tensor for backward pass.
+        Returns a tag to identify the tensor later.
+        """
+        debug_rank(f"------on_save_for_backward {tensor.shape}")
+        assert self.inside_context, "Must be inside offload context"
+        return self.cur_forward_chunk().tensor_push(tensor)
+
+    def on_get_saved_tensor(self, saved_state: Any) -> torch.Tensor:
+        """
+        Hook called when autograd retrieves a saved tensor during backward pass.
+        Returns the actual tensor (potentially reloading from CPU).
+        """
+        debug_rank(f"----on_get_saved_tensor {saved_state}")
+        return self.cur_backward_chunk().tensor_pop(saved_state)
+
+
+class ChunkOffloadHandler:
+    """
+    Handles activation offloading and reloading for a single pipeline chunk (microbatch).
+    Manages tensor groups, coordinates asynchronous GPU-CPU transfers, and handles synchronization.
+    """
+
+    @staticmethod
+    def offload(src_tensor, pin_memory=True):
+        """Offload."""
+        debug_rank("--------offload")
+        from megatron.core.extensions.transformer_engine import Float8Tensor
+
+        fp8_offload = isinstance(src_tensor, Float8Tensor) if Float8Tensor is not None else False
+
+        if not src_tensor.is_contiguous():
+            src_tensor = src_tensor.contiguous()
+
+        cpu_backup = torch.empty(
+            src_tensor.size(),
+            dtype=torch.uint8 if fp8_offload else src_tensor.dtype,
+            layout=src_tensor.layout,
+            device="cpu",
+            pin_memory=pin_memory,
+        )
+
+        if fp8_offload:
+            cpu_backup = Float8Tensor.make_like(src_tensor, data=cpu_backup)
+
+        cpu_backup.copy_(src_tensor, non_blocking=pin_memory)
+        state = (src_tensor.device, cpu_backup)
+        return state
+
+    @staticmethod
+    def reload(state, non_blocking=None):
+        """Reload."""
+        debug_rank("------reload")
+        dev, cpu_backup = state
+        if non_blocking is None:
+            non_blocking = cpu_backup.is_pinned()
+        return cpu_backup.to(dev, non_blocking=non_blocking)
+
+    def __init__(self, is_first_last_vpp_chunk, min_offloaded_tensor_size):
+        # Data Structure to maintain reference to activation tensors
+        self._tensor_tag_to_state = {}
+        # Mark the first microbatch of the last virtual pipeline stage
+        self._is_first_last_vpp_chunk = is_first_last_vpp_chunk
+
+        # Group management for batching offload/reload operations
+        self._offloaded_group_index = 0
+        self._groups_to_offload = []
+        self._groups_to_reload = []
+        self._tensor_count_current_group = 0
+
+        # Counter for special torch tensor types (FakeTensor, FunctionalTensor)
+        self.torch_tensor_count = 0
+        self.d2h_stream = PipelineOffloadManager.get_instance().d2h_stream
+        self.h2d_stream = PipelineOffloadManager.get_instance().h2d_stream
+        self._offload_events = {}
+        self._reload_events = {}
+        self.min_offloaded_tensor_size = min_offloaded_tensor_size
+        self.is_last_layer = False
+
+    def is_empty_chunk(self):
+        """Check if this chunk has no tensors to manage."""
+        return len(self._tensor_tag_to_state) == 0
+
+    def is_first_last_layer(self):
+        """
+        Check if this is the last layer of the first microbatch of the last vp stage.
+        These tensors should not be offloaded to avoid unnecessary overhead.
+        """
+        debug_rank(
+            f"------is_first_last_layer {self._is_first_last_vpp_chunk} {self.is_last_layer}"
+        )
+        return self._is_first_last_vpp_chunk and self.is_last_layer
+
+    def tensor_push(self, tensor):
+        """Push tensor to the offload handler."""
+        torch_stray_tensor = isinstance(
+            tensor,
+            (
+                torch._subclasses.fake_tensor.FakeTensor,
+                torch._subclasses.functional_tensor.FunctionalTensor,
+            ),
+        )
+
+        if not torch_stray_tensor:
+            # Assign unique tag based on group index and position within group
+            tensor_tag = (self._offloaded_group_index, self._tensor_count_current_group)
+            self._tensor_count_current_group += 1
+            assert tensor_tag not in self._tensor_tag_to_state, "Duplicate tensor tag"
+            self._tensor_tag_to_state[tensor_tag] = tensor
+        else:
+            # Use negative group ID for special tensor types
+            tensor_tag = (-1, self.torch_tensor_count)
+            self.torch_tensor_count += 1
+            self._tensor_tag_to_state[tensor_tag] = tensor
+        debug_rank(f"--------tensor_push {tensor_tag}")
+        return tensor_tag
+
+    def tensor_pop(self, tensor_tag):
+        """Pop tensor from the offload handler."""
+        debug_rank(f"--------tensor_pop {tensor_tag}")
+        assert tensor_tag in self._tensor_tag_to_state, f"Tag {tensor_tag} not found"
+        tensor = self._tensor_tag_to_state.pop(tensor_tag)
+        # If tensor is offloaded (stored as tuple), reload it
+        if isinstance(tensor, tuple):
+            tensor = self.reload(tensor)
+        debug_rank(f"--------tensor_pop {tensor.shape}")
+        return tensor
+
+    def tensor_need_offloading_checker(self, tensor):
+        """Check if the tensor needs to be offloaded."""
+        if tensor.numel() < self.min_offloaded_tensor_size:
+            return False
+        # Respect tensor's offload preference if specified
+        if hasattr(tensor, "offloading_activation") and not tensor.offloading_activation:
+            return False
+        return True
+
+    def bulk_offload_group(self, group_to_offload):
+        """offload a group of tensors recorded in tensor_push()."""
+        debug_rank("------bulk_offload_group")
+        assert not self.is_first_last_layer(), "Should not offload first-last layer"
+        group_id_to_offload, name = group_to_offload
+        torch.cuda.nvtx.range_push("activation offloading " + name)
+        with torch.cuda.stream(self.d2h_stream):
+            for tensor_tag, state in self._tensor_tag_to_state.items():
+                group_id, _ = tensor_tag
+                if group_id == group_id_to_offload:
+                    debug_rank(f"------tensor_tag {tensor_tag}")
+                    debug_rank(f"------group_to_offload {group_to_offload}")
+                    assert not isinstance(state, tuple), "Tensor already offloaded"
+                    tensor_on_device = state
+                    if self.tensor_need_offloading_checker(tensor_on_device):
+                        state = self.offload(tensor_on_device)
+                        event = torch.cuda.Event()
+                        event.record(self.d2h_stream)
+                        self._offload_events[name] = event
+                        tensor_on_device.record_stream(self.d2h_stream)
+                        self._tensor_tag_to_state[tensor_tag] = state
+        torch.cuda.nvtx.range_pop()
+
+    def get_offload_event(self, name):
+        """Get the CUDA event for a named offload operation."""
+        return self._offload_events.get(name, None)
+
+    def get_reload_event(self, name):
+        """Get the CUDA event for a named reload operation."""
+        return self._reload_events.get(name, None)
+
+    def bulk_reload_group(self, group_to_reload):
+        """Bulk reload group."""
+        debug_rank("----bulk_reload_group")
+        found_reload_group = False
+        group_id_to_reload, name = group_to_reload
+        torch.cuda.nvtx.range_push("activation reloading " + name)
+        with torch.cuda.stream(self.h2d_stream):
+            for tensor_label, state in self._tensor_tag_to_state.items():
+                group_id, _ = tensor_label
+                if group_id == group_id_to_reload:
+                    debug_rank(f"----tensor_label {tensor_label}")
+                    found_reload_group = True
+                    event = self.get_offload_event(name)
+                    # Only reload if tensor was offloaded (stored as tuple)
+                    if isinstance(state, tuple):
+                        # Wait for offload to complete before reloading
+                        torch.cuda.current_stream().wait_event(event)
+                        recovered_tensor = self.reload(state)
+                        event.record(self.h2d_stream)
+                        self._reload_events[name] = event
+                        debug_rank(f"----recovered_tensor {recovered_tensor.shape}")
+                        self._tensor_tag_to_state[tensor_label] = recovered_tensor
+        torch.cuda.nvtx.range_pop()
+        return found_reload_group
+
+    def pre_reload_last_layer(self):
+        """Pre-reload the last layer of this chunk to hide reload latency."""
+        debug_rank("pre_reload_last_layer")
+        assert not self._is_first_last_vpp_chunk, "Should not pre-reload first chunk"
+        debug_rank(f"len(self._groups_to_reload) {len(self._groups_to_reload)}")
+        if len(self._groups_to_reload) > 0:
+            # Reload the last group (last layer) early
+            if self.bulk_reload_group(self._groups_to_reload[-1]):
+                self._groups_to_reload.pop()
+
+    def should_bulk_offload(self):
+        """Determine if the current group should be offloaded."""
+        # Don't offload the first backward chunk's last layer
+        if self.is_first_last_layer():
+            return False
+
+        # Check if next backward chunk is this chunk (for last pipeline stage)
+        next_backward_chunk = PipelineOffloadManager.get_instance().front()
+        if next_backward_chunk is not None and next_backward_chunk is self:
+            # Don't offload last layer if it's about to be used immediately
+            if self.is_last_layer:
+                return False
+
+        return True
+
+    def bulk_offload(self, forced_released_tensors):
+        """Offload a group of tensors and optionally release their GPU memory."""
+        debug_rank("----bulk_offload")
+        if self.should_bulk_offload():
+            group_to_offload = self._groups_to_offload.pop()
+            self._groups_to_reload.append(group_to_offload)
+            self.bulk_offload_group(group_to_offload)
+            # Manually release tensors not auto-freed by torch GC
+            if len(forced_released_tensors) > 0:
+                cur_stream = torch.cuda.current_stream()
+                for release_tensor in forced_released_tensors:
+                    if self.tensor_need_offloading_checker(release_tensor):
+                        # Ensure tensor is not in use before freeing
+                        release_tensor.record_stream(cur_stream)
+                        release_tensor.untyped_storage().resize_(0)
+
+    def on_group_commit_forward(self, forced_released_tensors):
+        """Called at the end of a layer group's forward pass to trigger offloading."""
+        debug_rank("--on_group_commit_forward")
+        # Wait for compute to finish before starting offload
+        self.d2h_stream.wait_stream(torch.cuda.current_stream())
+        self.bulk_offload(forced_released_tensors)
+
+    def bulk_reload(self):
+        """Reload the next group of tensors from CPU to GPU."""
+        debug_rank("--bulk_reload")
+        if len(self._groups_to_reload) > 0:
+            # Reload the next layer group
+            if self.bulk_reload_group(self._groups_to_reload[-1]):
+                debug_rank(f"--bulk_reload_group {self._groups_to_reload}")
+                self._groups_to_reload.pop()
+        else:
+            # Pre-load the last layer of the next backward chunk to hide latency
+            next_backward_chunk = PipelineOffloadManager.get_instance().front()
+            if next_backward_chunk is not None:
+                next_backward_chunk.pre_reload_last_layer()
+
+    def on_group_commit_backward(self, name):
+        """
+        Called at the end of a layer group's backward pass.
+        Ensures correct chunk is active and synchronizes reloads.
+        """
+        debug_rank("--on_group_commit_backward")
+        cur_backward_chunk = PipelineOffloadManager.get_instance().cur_backward_chunk()
+        # Switch to this chunk if it's not already current
+        if cur_backward_chunk is not self:
+            PipelineOffloadManager.get_instance().pop()
+        cur_backward_chunk = PipelineOffloadManager.get_instance().cur_backward_chunk()
+        assert cur_backward_chunk is self, "Chunk mismatch"
+        # Wait for reload to complete before using tensors
+        event = self.get_reload_event(name)
+        if event is not None:
+            torch.cuda.current_stream().wait_event(event)
+        self._offloaded_group_index = self._offloaded_group_index - 1
+
+    def on_group_start_forward(self, name):
+        """
+        Called at the start of a layer group's forward pass.
+        Increments group index and prepares for offloading.
+        """
+        debug_rank(f"--on_group_start_forward")
+        self._offloaded_group_index = self._offloaded_group_index + 1
+        self._tensor_count_current_group = 0
+        self._groups_to_offload.append((self._offloaded_group_index, name))
+
+    def on_group_start_backward(self):
+        """
+        Called at the start of a layer group's backward pass.
+        Triggers reloading of tensors from CPU.
+        """
+        debug_rank("--on_group_start_backward")
+        # Wait for compute to finish before starting reload
+        self.h2d_stream.wait_stream(torch.cuda.current_stream())
+        self.bulk_reload()
+
+
+class FineGrainedOffloadingGroupCommitFunction(torch.autograd.Function):
+    """
+    Identity operation that marks the end of a layer group for offload synchronization.
+    Triggers offload during forward and synchronizes reload during backward.
+    """
+
+    @staticmethod
+    def forward(ctx, *args):
+        # pylint: disable=missing-function-docstring
+        debug_rank("FineGrainedOffloadingGroupCommitFunction forward")
+
+        forced_released_tensors = args[-1]
+        name = args[-2]
+        cpu_offload_handler = args[-3]
+        tensor = args[:-3]
+        cpu_offload_handler.on_group_commit_forward(forced_released_tensors)
+        ctx.cpu_offload_handler = cpu_offload_handler
+        ctx.name = name
+
+        # return the identical tensor
+        return tensor
+
+    @staticmethod
+    def backward(ctx, *grad_output):
+        # pylint: disable=missing-function-docstring
+        debug_rank("FineGrainedOffloadingGroupCommitFunction backward")
+
+        cpu_offload_handler = ctx.cpu_offload_handler
+        cpu_offload_handler.on_group_commit_backward(ctx.name)
+        return grad_output + (None, None, None)
+
+
+def fine_grained_offloading_group_commit(*tensor, name, forced_released_tensors=[]):
+    """
+    Specify the tensors to be released after offloading.
+    forced_released_tensors is a list of tensors to be released after offloading.
+    The tensors will be untyped_storage().resize_(0) after offloading.
+    Note: specify the tensors only when they are not automatically released by torch gc.
+    """
+    cur_forward_chunk = PipelineOffloadManager.get_instance().cur_forward_chunk()
+    return FineGrainedOffloadingGroupCommitFunction.apply(
+        *tensor, cur_forward_chunk, name, forced_released_tensors
+    )
+
+
+class FineGrainedOffloadingGroupStartFunction(torch.autograd.Function):
+    """
+    Identity operation that marks the start of a layer group for offload/reload.
+    Prepares for offload during forward and triggers reload during backward.
+    """
+
+    @staticmethod
+    def forward(ctx, tensor, cpu_offload_handler, name):
+        # pylint: disable=missing-function-docstring
+        ctx.cpu_offload_handler = cpu_offload_handler
+        debug_rank("FineGrainedOffloadingGroupStartFunction forward")
+
+        cpu_offload_handler.on_group_start_forward(name)
+        # return the identical tensor
+        return tensor
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        # pylint: disable=missing-function-docstring
+        debug_rank("FineGrainedOffloadingGroupStartFunction backward")
+        cpu_offload_handler = ctx.cpu_offload_handler
+        cpu_offload_handler.on_group_start_backward()
+        return grad_output, None, None
+
+
+def fine_grained_offloading_group_start(tensor, name=None):
+    """Mark the start of a layer group and prepare for offload/reload."""
+    cur_forward_chunk = PipelineOffloadManager.get_instance().cur_forward_chunk()
+    return FineGrainedOffloadingGroupStartFunction.apply(tensor, cur_forward_chunk, name)
+
+
+def get_fine_grained_offloading_context(flag):
+    """Get the fine-grained offload context"""
+    return PipelineOffloadManager.get_instance() if flag else nullcontext()
+
+
+def fine_grained_offloading_set_last_layer(is_last_layer):
+    """Set the last layer flag."""
+    PipelineOffloadManager.get_instance().set_last_layer(is_last_layer)
+
+
+def fine_grained_offloading_init_chunk_handler(vp_size, vp_stage, min_offloaded_tensor_size):
+    """Initialize the chunk handler, called at the start of a microbatch forward pass."""
+    PipelineOffloadManager.get_instance().init_model_chunk_offload_handler(
+        vp_size, vp_stage, min_offloaded_tensor_size
+    )
+
+
+def fine_grained_offloading_reset():
+    """Reset the chunk handler, called at the start of a training iteration."""
+    PipelineOffloadManager.get_instance().reset()
diff --git a/megatron/core/pipeline_parallel/schedules.py b/megatron/core/pipeline_parallel/schedules.py
index e83f8d90635..09f95ac25d2 100644
--- a/megatron/core/pipeline_parallel/schedules.py
+++ b/megatron/core/pipeline_parallel/schedules.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 import contextlib
 from functools import partial
@@ -9,6 +9,9 @@
 
 from megatron.core import parallel_state
 from megatron.core.enums import ModelType
+from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
+    fine_grained_offloading_reset,
+)
 from megatron.core.pipeline_parallel.p2p_communication import P2PCommunicator
 from megatron.core.pipeline_parallel.utils import (
     is_pp_first_stage,
@@ -562,6 +565,9 @@ def forward_backward_no_pipelining(
     if config.timers is not None:
         config.timers('forward-backward', log_level=1).start(barrier=config.barrier_with_L1_time)
 
+    if not forward_only and config.fine_grained_activation_offloading:
+        fine_grained_offloading_reset()
+
     no_sync_func = config.no_sync_func
     if no_sync_func is None:
         no_sync_func = contextlib.nullcontext
@@ -898,6 +904,9 @@ def forward_backward_pipelining_with_interleaving(
         adjust_tensor_shapes_fn is None
     ), "adjust_tensor_shapes_fn is not supported for interleaved pipeline parallelism"
 
+    if not forward_only and config.fine_grained_activation_offloading:
+        fine_grained_offloading_reset()
+
     if config.overlap_p2p_comm and config.batch_p2p_comm:
         raise ValueError("Can not use both overlap_p2p_comm and batch_p2p_comm")
 
@@ -2043,6 +2052,9 @@ def forward_backward_pipelining_without_interleaving(
     if config.timers is not None:
         config.timers('forward-backward', log_level=1).start(barrier=config.barrier_with_L1_time)
 
+    if not forward_only and config.fine_grained_activation_offloading:
+        fine_grained_offloading_reset()
+
     # Disable async grad reductions
     no_sync_func = config.no_sync_func
     if no_sync_func is None:
diff --git a/megatron/core/tensor_parallel/random.py b/megatron/core/tensor_parallel/random.py
index 54cac0e41e3..5a44c38713d 100644
--- a/megatron/core/tensor_parallel/random.py
+++ b/megatron/core/tensor_parallel/random.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 # Parts of the code here are adapted from PyTorch
 # repo: https://github.com/pytorch/pytorch
@@ -510,10 +510,14 @@ def forward(ctx, run_function, checkpoint_without_output_obj, *args):
     @staticmethod
     def backward(ctx, *args):
         """Backward pass."""
-        inputs = ctx.saved_tensors
+        # Get the inputs from the context instead of the saved tensors
+        # because the saved tensors are already cached by the recomputation.
+        # This is to avoid double-reloading the inputs in CPU offloading scenario.
+        inputs = ctx.inputs
         outputs = ctx.outputs
         torch.autograd.backward(outputs, args)
         ctx.outputs = None
+        ctx.inputs = None
         grads = tuple(inp.grad if isinstance(inp, torch.Tensor) else inp for inp in inputs)
         return (None, None) + grads
 
@@ -573,8 +577,10 @@ def _recompute(self, _):
                 recompute_ctx = contextlib.nullcontext()
                 fp8_ctx = contextlib.nullcontext()
 
+            # Store the inputs for backward pass
+            inputs = self.ctx.saved_tensors
             with torch.enable_grad(), fp8_ctx, recompute_ctx:
-                outputs = self.run_function(*self.ctx.saved_tensors)
+                outputs = self.run_function(*inputs)
 
         self.run_function = None
         self.rng_states = None
@@ -590,6 +596,7 @@ def _recompute(self, _):
                 output.untyped_storage().copy_(recomputation_output.untyped_storage())
 
         self.ctx.outputs = outputs
+        self.ctx.inputs = inputs
         self.outputs = None
         self.ctx = None
 
diff --git a/megatron/core/transformer/attention.py b/megatron/core/transformer/attention.py
index d4e990041ca..af6dada6746 100644
--- a/megatron/core/transformer/attention.py
+++ b/megatron/core/transformer/attention.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
@@ -22,6 +22,11 @@
     get_tensor_model_parallel_rank,
     get_tensor_model_parallel_world_size,
 )
+from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
+    fine_grained_offloading_group_commit,
+    fine_grained_offloading_group_start,
+    get_fine_grained_offloading_context,
+)
 from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.transformer.identity_op import IdentityOp
 from megatron.core.transformer.module import MegatronModule
@@ -188,6 +193,21 @@ def __init__(
             and "core_attn" in self.config.recompute_modules
         )
 
+        self.offload_qkv_linear = (
+            self.config.fine_grained_activation_offloading
+            and "qkv_linear" in self.config.offload_modules
+        )
+
+        self.offload_core_attention = (
+            self.config.fine_grained_activation_offloading
+            and "core_attn" in self.config.offload_modules
+        )
+
+        self.offload_attn_proj = (
+            self.config.fine_grained_activation_offloading
+            and "attn_proj" in self.config.offload_modules
+        )
+
         # Output.
         self.linear_proj = build_module(
             submodules.linear_proj,
@@ -730,9 +750,17 @@ def forward(
         if output_gate:
             assert split_qkv, "output_gate is not supported for unsplit mixed_qkv tensor."
 
-        qkv_output = self.get_query_key_value_tensors(
-            hidden_states, key_value_states, output_gate=output_gate, split_qkv=split_qkv
-        )
+        if self.offload_qkv_linear:
+            hidden_states = fine_grained_offloading_group_start(hidden_states, name="qkv_linear")
+        with get_fine_grained_offloading_context(self.offload_qkv_linear):
+            qkv_output = self.get_query_key_value_tensors(
+                hidden_states, key_value_states, output_gate=output_gate, split_qkv=split_qkv
+            )
+        if self.offload_qkv_linear:
+            (qkv_output,) = fine_grained_offloading_group_commit(
+                qkv_output, name="qkv_linear", forced_released_tensors=[]
+            )
+
         attn_mask_type = self.attn_mask_type
         block_table = None
         gate = None
@@ -881,17 +909,20 @@ def forward(
                 packed_seq_params=packed_seq_params,
             )
         else:
+            if self.offload_core_attention and self.training:
+                query = fine_grained_offloading_group_start(query, name="core_attn")
             if inference_context is None or inference_context.is_static_batching():
                 # Static batching attention kernel.
-                core_attn_out = self.core_attention(
-                    query,
-                    key,
-                    value,
-                    attention_mask,
-                    attn_mask_type=attn_mask_type,
-                    attention_bias=attention_bias,
-                    packed_seq_params=packed_seq_params,
-                )
+                with get_fine_grained_offloading_context(self.offload_core_attention):
+                    core_attn_out = self.core_attention(
+                        query,
+                        key,
+                        value,
+                        attention_mask,
+                        attn_mask_type=attn_mask_type,
+                        attention_bias=attention_bias,
+                        packed_seq_params=packed_seq_params,
+                    )
 
             else:
                 # Dynamic batching attention kernel.
@@ -911,6 +942,10 @@ def forward(
                     block_table,
                 )
                 core_attn_out = rearrange(core_attn_out, 's b h d -> s b (h d)')
+            if self.offload_core_attention and self.training:
+                (core_attn_out,) = fine_grained_offloading_group_commit(
+                    core_attn_out, name="core_attn", forced_released_tensors=[query, key, value]
+                )
 
         if packed_seq_params is not None and packed_seq_params.qkv_format == 'thd':
             # reshape to same output shape as unpacked case
@@ -931,7 +966,14 @@ def forward(
         # =================
 
         nvtx_range_push(suffix="linear_proj")
-        output, bias = self.linear_proj(core_attn_out)
+        if self.offload_attn_proj:
+            core_attn_out = fine_grained_offloading_group_start(core_attn_out, name="attn_proj")
+        with get_fine_grained_offloading_context(self.offload_attn_proj):
+            output, bias = self.linear_proj(core_attn_out)
+        if self.offload_attn_proj:
+            output, bias = fine_grained_offloading_group_commit(
+                output, bias, name="attn_proj", forced_released_tensors=[core_attn_out]
+            )
         nvtx_range_pop(suffix="linear_proj")
 
         return output, bias
diff --git a/megatron/core/transformer/moe/README.md b/megatron/core/transformer/moe/README.md
index 0a933aed0df..a44daea38e2 100644
--- a/megatron/core/transformer/moe/README.md
+++ b/megatron/core/transformer/moe/README.md
@@ -210,6 +210,20 @@ Enable A2A overlap across different batches inspired by the DSv3 DualPipe implme
 --delay-wgrad-compute
 ```
 
+### Fine-grained Activation Offloading (collaborated with rednote)
+Offload the input activation at the granularity of modules
+
+**Usage**
+```bash
+# Enable fine-grained activation offloading
+--fine-grained-activation-offloading
+
+# Specify which modules are going to offload its input
+# Choices: "attn_norm", "core_attn", "attn_proj", "mlp_norm", "expert_fc1", "moe_act".
+--offload-modules expert_fc1
+```
+For more details, please refer to the ```docs/source/api-guide/fine_grained_activation_offloading.md```
+
 ### MoE Related Arguments
 | Item | Description |
 | --- | --- |
diff --git a/megatron/core/transformer/moe/experts.py b/megatron/core/transformer/moe/experts.py
index d0ac20a7536..ca308da0d21 100644
--- a/megatron/core/transformer/moe/experts.py
+++ b/megatron/core/transformer/moe/experts.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 import copy
 import itertools
@@ -27,6 +27,11 @@
 from megatron.core.fusions.fused_bias_swiglu import weighted_bias_swiglu_impl
 from megatron.core.fusions.fused_weighted_squared_relu import weighted_squared_relu_impl
 from megatron.core.jit import jit_fuser
+from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
+    fine_grained_offloading_group_commit,
+    fine_grained_offloading_group_start,
+    get_fine_grained_offloading_context,
+)
 from megatron.core.tensor_parallel.layers import (
     _initialize_affine_weight_cpu,
     _initialize_affine_weight_gpu,
@@ -825,6 +830,16 @@ def __init__(
             tp_group=pg_collection.expt_tp,
         )
 
+        self.offload_expert_fc1 = (
+            self.config.fine_grained_activation_offloading
+            and "expert_fc1" in self.config.offload_modules
+        )
+
+        self.offload_moe_act = (
+            self.config.fine_grained_activation_offloading
+            and "moe_act" in self.config.offload_modules
+        )
+
         self.activation_recompute = (
             self.config.recompute_granularity == 'selective'
             and "moe_act" in self.config.recompute_modules
@@ -834,6 +849,12 @@ def __init__(
 
             set_save_original_input(self.linear_fc2)
 
+        # This is to avoid the CPU overhead of multiple d2h copies
+        if self.offload_expert_fc1 and not (self.config.fp8 or self.config.fp4):
+            from megatron.core.extensions.transformer_engine import set_save_original_input
+
+            set_save_original_input(self.linear_fc1)
+
         if self.config.fp8 or self.config.fp4:
             assert HAVE_TE, "FP8 and FP4 requires TE."
             self.quantization_padding = Fp8Padding(self.num_local_experts)
@@ -898,9 +919,21 @@ def forward(
             # Probs already applied, so reset to 1.
             permuted_probs = torch.ones_like(permuted_probs)
 
-        intermediate_parallel, bias_parallel = self.linear_fc1(
-            permuted_local_hidden_states, tokens_per_expert
-        )
+        if self.offload_expert_fc1:
+            permuted_local_hidden_states = fine_grained_offloading_group_start(
+                permuted_local_hidden_states, name="expert_fc1"
+            )
+        with get_fine_grained_offloading_context(self.offload_expert_fc1):
+            fc1_output, bias_parallel = self.linear_fc1(
+                permuted_local_hidden_states, tokens_per_expert
+            )
+        if self.offload_expert_fc1:
+            fc1_output, bias_parallel = fine_grained_offloading_group_commit(
+                fc1_output,
+                bias_parallel,
+                name="expert_fc1",
+                forced_released_tensors=[permuted_local_hidden_states],
+            )
 
         def bias_act_func(intermediate_parallel, bias_parallel, permuted_probs):
             if self.config.use_te_activation_func:
@@ -960,18 +993,26 @@ def glu(x):
                 intermediate_parallel = intermediate_parallel.to(original_dtype)
             return intermediate_parallel
 
+        if self.offload_moe_act:
+            fc1_output = fine_grained_offloading_group_start(fc1_output, name="moe_act")
+
         if self.activation_recompute:
             self.activation_checkpoint = tensor_parallel.CheckpointWithoutOutput()
-            intermediate_parallel = self.activation_checkpoint.checkpoint(
-                bias_act_func, intermediate_parallel, bias_parallel, permuted_probs
-            )
-            output, output_bias = self.linear_fc2(intermediate_parallel, tokens_per_expert)
-            self.activation_checkpoint.discard_output_and_register_recompute(output)
+            with get_fine_grained_offloading_context(self.offload_moe_act):
+                bias_act_output = self.activation_checkpoint.checkpoint(
+                    bias_act_func, fc1_output, bias_parallel, permuted_probs
+                )
         else:
-            intermediate_parallel = bias_act_func(
-                intermediate_parallel, bias_parallel, permuted_probs
+            with get_fine_grained_offloading_context(self.offload_moe_act):
+                bias_act_output = bias_act_func(fc1_output, bias_parallel, permuted_probs)
+
+        output, output_bias = self.linear_fc2(bias_act_output, tokens_per_expert)
+        if self.activation_recompute:
+            self.activation_checkpoint.discard_output_and_register_recompute(output)
+        if self.offload_moe_act:
+            (output,) = fine_grained_offloading_group_commit(
+                output, name="moe_act", forced_released_tensors=[fc1_output]
             )
-            output, output_bias = self.linear_fc2(intermediate_parallel, tokens_per_expert)
 
         # upad and concat the output
         if self.config.fp8 or self.config.fp4:
diff --git a/megatron/core/transformer/multi_latent_attention.py b/megatron/core/transformer/multi_latent_attention.py
index a8893ebec36..5d3f16c1041 100644
--- a/megatron/core/transformer/multi_latent_attention.py
+++ b/megatron/core/transformer/multi_latent_attention.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 
 import math
@@ -22,6 +22,11 @@
     _yarn_get_mscale,
     apply_rotary_pos_emb,
 )
+from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
+    fine_grained_offloading_group_commit,
+    fine_grained_offloading_group_start,
+    get_fine_grained_offloading_context,
+)
 from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.tensor_parallel.layers import ColumnParallelLinear
 from megatron.core.tensor_parallel.mappings import (
@@ -266,15 +271,19 @@ def forward(
                 query, key, value, attention_mask, packed_seq_params=packed_seq_params
             )
         else:
+            if self.offload_core_attention and self.training:
+                query = fine_grained_offloading_group_start(query, name="core_attn")
+
             if inference_context is None or inference_context.is_static_batching():
-                core_attn_out = self.core_attention(
-                    query,
-                    key,
-                    value,
-                    attention_mask,
-                    packed_seq_params=packed_seq_params,
-                    attn_mask_type=attn_mask_type,
-                )
+                with get_fine_grained_offloading_context(self.offload_core_attention):
+                    core_attn_out = self.core_attention(
+                        query,
+                        key,
+                        value,
+                        attention_mask,
+                        packed_seq_params=packed_seq_params,
+                        attn_mask_type=attn_mask_type,
+                    )
             elif self.cache_mla_latents:
                 # Dynamic batching attention kernel.
                 q, k, v = (query, key, value)
@@ -295,6 +304,10 @@ def forward(
                 # Only rearrange if not in absorption mode (Flash MLA handles format correctly)
                 if not inference_context.is_decode_only():
                     core_attn_out = rearrange(core_attn_out, 's b h d -> s b (h d)')
+            if self.offload_core_attention and self.training:
+                (core_attn_out,) = fine_grained_offloading_group_commit(
+                    core_attn_out, name="core_attn", forced_released_tensors=[query, key, value]
+                )
 
         # We are doing absorption with cache mla latents and decode mode.
         if self.cache_mla_latents and inference_context.is_decode_only():
@@ -320,7 +333,14 @@ def forward(
         # =================
         # Output. [sq, b, h]
         # =================
-        output, bias = self.linear_proj(core_attn_out)
+        if self.offload_attn_proj:
+            core_attn_out = fine_grained_offloading_group_start(core_attn_out, name="attn_proj")
+        with get_fine_grained_offloading_context(self.offload_attn_proj):
+            output, bias = self.linear_proj(core_attn_out)
+        if self.offload_attn_proj:
+            output, bias = fine_grained_offloading_group_commit(
+                output, bias, name="attn_proj", forced_released_tensors=[core_attn_out]
+            )
 
         return output, bias
 
diff --git a/megatron/core/transformer/multi_token_prediction.py b/megatron/core/transformer/multi_token_prediction.py
index bd3aa9c8c96..a619b9ffa55 100755
--- a/megatron/core/transformer/multi_token_prediction.py
+++ b/megatron/core/transformer/multi_token_prediction.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 from contextlib import nullcontext
 from dataclasses import dataclass
@@ -13,6 +13,9 @@
 from megatron.core.fp8_utils import get_fp8_context
 from megatron.core.models.backends import BackendSpecProvider, LocalSpecProvider
 from megatron.core.packed_seq_params import PackedSeqParams
+from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
+    fine_grained_offloading_set_last_layer,
+)
 from megatron.core.pipeline_parallel.utils import is_vp_last_stage
 from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.tensor_parallel import (
@@ -901,6 +904,8 @@ def forward(
         hidden_states_list = list(torch.chunk(hidden_states, 1 + offset, dim=0))
         hidden_states = hidden_states_list[offset]
         for layer_number in range(len(self.layers)):
+            if self.config.fine_grained_activation_offloading:
+                fine_grained_offloading_set_last_layer(layer_number == len(self.layers) - 1)
             (hidden_states, input_ids, position_ids) = self.layers[layer_number](
                 input_ids=input_ids,
                 position_ids=position_ids,
diff --git a/megatron/core/transformer/transformer_block.py b/megatron/core/transformer/transformer_block.py
index aead6133f22..06e8f1372f4 100755
--- a/megatron/core/transformer/transformer_block.py
+++ b/megatron/core/transformer/transformer_block.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 import logging
 from contextlib import nullcontext
 from dataclasses import dataclass
@@ -16,6 +16,9 @@
 from megatron.core.fusions.fused_layer_norm import FusedLayerNorm
 from megatron.core.inference.contexts import BaseInferenceContext
 from megatron.core.packed_seq_params import PackedSeqParams
+from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
+    fine_grained_offloading_set_last_layer,
+)
 from megatron.core.pipeline_parallel.utils import is_vp_first_stage, is_vp_last_stage
 from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.transformer.enums import LayerType
@@ -693,6 +696,11 @@ def forward(
                     else:
                         inner_quantization_context = nullcontext()
 
+                    if self.config.fine_grained_activation_offloading:
+                        fine_grained_offloading_set_last_layer(
+                            l_no == self.num_layers_per_pipeline_rank - 1
+                        )
+
                     with self.offload_context, inner_quantization_context:
                         hidden_states, context = layer(
                             hidden_states=hidden_states,
diff --git a/megatron/core/transformer/transformer_config.py b/megatron/core/transformer/transformer_config.py
index d14f991046e..9f1b112ba83 100644
--- a/megatron/core/transformer/transformer_config.py
+++ b/megatron/core/transformer/transformer_config.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 import logging
 import warnings
@@ -775,6 +775,29 @@ class TransformerConfig(ModelParallelConfig):
     """Transformer implementation to use.
     Options are 'transformer_engine' for Transformer Engine and 'local' for MCore."""
 
+    #####################################
+    # Fine-grained Activation Offloading
+    #####################################
+    fine_grained_activation_offloading: bool = False
+    """If True, offload the input of the specified modules to the CPU.
+    Fine-grained activation offloading is a module-level offloading method
+    instead of a layer-level offloading method like cpu_offloading."""
+
+    offload_modules: Optional[list[str]] = None
+    """The submodules to offload its input.
+    choices: "attn_norm", "qkv_linear", "core_attn", "attn_proj",
+             "mlp_norm", "expert_fc1", "moe_act".
+    "attn_norm": offload the input of the normalization in the attention part.
+    "qkv_linear": offload the input of the qkv linear part.
+    "core_attn": offload the input of the core attention part.
+    "attn_proj": offload the input of the attn linear projection part.
+    "mlp_norm": offload the input of the normalization in the mlp part.
+    "expert_fc1": offload the input of the expert fc1 part.
+    "moe_act": offload the input of the moe act part.
+    """
+    min_offloaded_tensor_size: int = 1024 * 1024
+    """The minimum size of the tensor to be offloaded."""
+
     def __post_init__(self):
         """Python dataclass method that is used to modify attributes after initialization.
         See https://docs.python.org/3/library/dataclasses.html#post-init-processing for more
@@ -1120,6 +1143,32 @@ def __post_init__(self):
             if "moe" not in self.recompute_modules:
                 self.recompute_modules.append("moe")
 
+        if self.fine_grained_activation_offloading:
+            assert (
+                not self.cpu_offloading
+            ), "fine_grained_activation_offloading cannot be enabled with cpu_offloading."
+            assert self.offload_modules is not None and len(self.offload_modules) > 0
+            allowed_modules = {
+                "core_attn",
+                "attn_proj",
+                "expert_fc1",
+                "moe_act",
+                "attn_norm",
+                "mlp_norm",
+                "qkv_linear",
+            }
+            invalid_modules = set(self.offload_modules) - allowed_modules
+            assert not invalid_modules, (
+                f'Invalid choices for offload_modules: {invalid_modules}. '
+                f'Allowed modules are: {allowed_modules}'
+            )
+            if "attn_proj" in self.offload_modules and "core_attn" not in self.offload_modules:
+                raise ValueError(
+                    "attn_proj cannot be set to offload_modules alone without core_attn "
+                    "because the input of attn_proj is the output of core_attn, "
+                    "which is needed in core_attn.backward()."
+                )
+
         if (
             self.num_layers_in_first_pipeline_stage is not None
             or self.num_layers_in_last_pipeline_stage is not None
diff --git a/megatron/core/transformer/transformer_layer.py b/megatron/core/transformer/transformer_layer.py
index a5babece9d0..c36ff7515e4 100644
--- a/megatron/core/transformer/transformer_layer.py
+++ b/megatron/core/transformer/transformer_layer.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 import logging
 import warnings
@@ -397,6 +397,16 @@ def __init__(
             if "mlp" in self.config.recompute_modules:
                 if not isinstance(self.mlp, MoELayer):
                     self.recompute_mlp = True
+        self.offload_attn_norm = (
+            self.config.fine_grained_activation_offloading
+            and "attn_norm" in self.config.offload_modules
+            and not isinstance(self.input_layernorm, IdentityOp)
+        )
+        self.offload_mlp_norm = (
+            self.config.fine_grained_activation_offloading
+            and "mlp_norm" in self.config.offload_modules
+            and not isinstance(self.pre_mlp_layernorm, IdentityOp)
+        )
 
         # @jcasper how should we handle nvfuser?
         # Set bias+dropout+add fusion grad_enable execution handler.
@@ -479,20 +489,29 @@ def _forward_attention(
                 context (Tensor): Updated context tensor if cross-attention is used,
                 otherwise None.
         """
+        from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
+            fine_grained_offloading_group_commit,
+            fine_grained_offloading_group_start,
+            get_fine_grained_offloading_context,
+        )
 
         inference_context = deprecate_inference_params(inference_context, inference_params)
 
         # Residual connection.
         residual = hidden_states
 
+        if self.offload_attn_norm:
+            hidden_states = fine_grained_offloading_group_start(hidden_states, name="attn_norm")
         # Optional Input Layer norm
         if self.recompute_input_layernorm:
             self.input_layernorm_checkpoint = tensor_parallel.CheckpointWithoutOutput()
-            input_layernorm_output = self.input_layernorm_checkpoint.checkpoint(
-                self.input_layernorm, hidden_states
-            )
+            with get_fine_grained_offloading_context(self.offload_attn_norm):
+                input_layernorm_output = self.input_layernorm_checkpoint.checkpoint(
+                    self.input_layernorm, hidden_states
+                )
         else:
-            input_layernorm_output = self.input_layernorm(hidden_states)
+            with get_fine_grained_offloading_context(self.offload_attn_norm):
+                input_layernorm_output = self.input_layernorm(hidden_states)
 
         # Self attention.
         nvtx_range_push(suffix="self_attention")
@@ -526,6 +545,11 @@ def _forward_attention(
             )
         nvtx_range_pop(suffix="self_attn_bda")
 
+        if self.offload_attn_norm:
+            (hidden_states,) = fine_grained_offloading_group_commit(
+                hidden_states, name="attn_norm", forced_released_tensors=[residual]
+            )
+
         # Residual connection.
         residual = hidden_states
 
@@ -563,17 +587,27 @@ def _forward_mlp(self, hidden_states, inference_context=None):
             output (Tensor): Transformed hidden states of shape [s, b, h].
         """
 
+        from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
+            fine_grained_offloading_group_commit,
+            fine_grained_offloading_group_start,
+            get_fine_grained_offloading_context,
+        )
+
         # Residual connection.
         residual = hidden_states
 
+        if self.offload_mlp_norm:
+            hidden_states = fine_grained_offloading_group_start(hidden_states, name="mlp_norm")
         # Optional Layer norm post the cross-attention.
         if self.recompute_pre_mlp_layernorm:
             self.pre_mlp_norm_checkpoint = tensor_parallel.CheckpointWithoutOutput()
-            pre_mlp_layernorm_output = self.pre_mlp_norm_checkpoint.checkpoint(
-                self.pre_mlp_layernorm, hidden_states
-            )
+            with get_fine_grained_offloading_context(self.offload_mlp_norm):
+                pre_mlp_layernorm_output = self.pre_mlp_norm_checkpoint.checkpoint(
+                    self.pre_mlp_layernorm, hidden_states
+                )
         else:
-            pre_mlp_layernorm_output = self.pre_mlp_layernorm(hidden_states)
+            with get_fine_grained_offloading_context(self.offload_mlp_norm):
+                pre_mlp_layernorm_output = self.pre_mlp_layernorm(hidden_states)
 
         nvtx_range_push(suffix="mlp")
         # Potentially chunk the MLP computation during prefill to minimize the peak activation size
@@ -633,6 +667,10 @@ def _forward_mlp(self, hidden_states, inference_context=None):
                 mlp_output_with_bias, residual, self.hidden_dropout
             )
         nvtx_range_pop(suffix="mlp_bda")
+        if self.offload_mlp_norm:
+            (hidden_states,) = fine_grained_offloading_group_commit(
+                hidden_states, name="mlp_norm", forced_released_tensors=[residual]
+            )
 
         # Jit compiled function creates 'view' tensor. This tensor
         # potentially gets saved in the MPU checkpoint function context,
diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py
index bdf915a8ae1..8e5f343b73c 100644
--- a/megatron/training/arguments.py
+++ b/megatron/training/arguments.py
@@ -1216,6 +1216,10 @@ def validate_args(args, defaults={}):
                 "when enabling delay_wgrad_compute"
             )
 
+    if args.fine_grained_activation_offloading:
+        assert args.transformer_impl == 'transformer_engine', \
+            "Fine-grained activation offloading is only supported with transformer_engine implementation"
+
     if args.mtp_num_layers:
         assert not args.use_legacy_models, "The legacy Megatron models does not support Multi-Token Prediction (MTP)."
         assert args.position_embedding_type == "rope" or args.position_embedding_type == "none", (
@@ -2327,7 +2331,12 @@ def _add_training_args(parser):
                        help='The communicator group names to use high priority streams.')
     group.add_argument('--use-te-activation-func', action='store_true',
                        help='Use activation function kernel from Transformer Engine in MLP module.')
-
+    group.add_argument('--fine-grained-activation-offloading', action='store_true',
+                       help='Enable fine-grained activation offloading.')
+    group.add_argument('--offload-modules', nargs='*', type=str, default=[],
+                       help='The submodules to offload its input. Choices: "attn_norm", "core_attn", "attn_proj", "mlp_norm", "expert_fc1", "moe_act".')
+    group.add_argument('--min-offloaded-tensor-size', type=int, default=1024*1024,
+                       help='The minimum size of the tensor to be offloaded.')
     return parser
 
 
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_coreweave.json
new file mode 100644
index 00000000000..b3f192ba287
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_coreweave.json
@@ -0,0 +1,344 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 11.07546,
+            "2": 11.03837,
+            "3": 9.66011,
+            "4": 9.91381,
+            "5": 9.32909,
+            "6": 9.13922,
+            "7": 9.13574,
+            "8": 8.65508,
+            "9": 8.51394,
+            "10": 8.8409,
+            "11": 8.29149,
+            "12": 8.34581,
+            "13": 8.25518,
+            "14": 7.73711,
+            "15": 7.86249,
+            "16": 7.9371,
+            "17": 7.89319,
+            "18": 7.63123,
+            "19": 7.99731,
+            "20": 7.74538,
+            "21": 7.44348,
+            "22": 7.42249,
+            "23": 7.29714,
+            "24": 7.27462,
+            "25": 7.54574,
+            "26": 6.96838,
+            "27": 7.50556,
+            "28": 7.22743,
+            "29": 7.36588,
+            "30": 7.52622,
+            "31": 7.27026,
+            "32": 7.45521,
+            "33": 7.50954,
+            "34": 7.55686,
+            "35": 7.10177,
+            "36": 6.96431,
+            "37": 7.28463,
+            "38": 7.0808,
+            "39": 7.40923,
+            "40": 7.43338,
+            "41": 7.38496,
+            "42": 7.15749,
+            "43": 7.15858,
+            "44": 7.28852,
+            "45": 7.16793,
+            "46": 6.78468,
+            "47": 7.4114,
+            "48": 7.0027,
+            "49": 7.46249,
+            "50": 6.92151
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 911219392.0,
+            "2": 910960384.0,
+            "3": 911156352.0,
+            "4": 912204800.0,
+            "5": 920796544.0,
+            "6": 940387968.0,
+            "7": 990599872.0,
+            "8": 976457728.0,
+            "9": 998097664.0,
+            "10": 995852672.0,
+            "11": 994583680.0,
+            "12": 977344896.0,
+            "13": 1028141824.0,
+            "14": 1007166208.0,
+            "15": 987423616.0,
+            "16": 993054784.0,
+            "17": 982319168.0,
+            "18": 998261760.0,
+            "19": 984696320.0,
+            "20": 982914752.0,
+            "21": 979667456.0,
+            "22": 953988864.0,
+            "23": 972353984.0,
+            "24": 964792064.0,
+            "25": 958512192.0,
+            "26": 946928512.0,
+            "27": 948458304.0,
+            "28": 949643968.0,
+            "29": 942877440.0,
+            "30": 935020160.0,
+            "31": 935327616.0,
+            "32": 934281088.0,
+            "33": 921805568.0,
+            "34": 928189312.0,
+            "35": 922202496.0,
+            "36": 924246656.0,
+            "37": 920661248.0,
+            "38": 922930752.0,
+            "39": 922322816.0,
+            "40": 921856512.0,
+            "41": 920227968.0,
+            "42": 918353664.0,
+            "43": 918607040.0,
+            "44": 914948032.0,
+            "45": 914295232.0,
+            "46": 914344448.0,
+            "47": 911769536.0,
+            "48": 912013312.0,
+            "49": 910349440.0,
+            "50": 914351552.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 5498353152.0,
+            "2": 5499147776.0,
+            "3": 5499940352.0,
+            "4": 5500732928.0,
+            "5": 5501525504.0,
+            "6": 5502318080.0,
+            "7": 5503110656.0,
+            "8": 5503903232.0,
+            "9": 5497958912.0,
+            "10": 5498751488.0,
+            "11": 5499544064.0,
+            "12": 5500336640.0,
+            "13": 5501129216.0,
+            "14": 5501921792.0,
+            "15": 5502714368.0,
+            "16": 5503506944.0,
+            "17": 5504299520.0,
+            "18": 5505092096.0,
+            "19": 5505884672.0,
+            "20": 5506677248.0,
+            "21": 5507469824.0,
+            "22": 5508262400.0,
+            "23": 5509054976.0,
+            "24": 5509847552.0,
+            "25": 5510640128.0,
+            "26": 5511432704.0,
+            "27": 5512225280.0,
+            "28": 5513017856.0,
+            "29": 5513810432.0,
+            "30": 5514603008.0,
+            "31": 5515395584.0,
+            "32": 5516188160.0,
+            "33": 5516980736.0,
+            "34": 5517773312.0,
+            "35": 5518565888.0,
+            "36": 5519358464.0,
+            "37": 5520151040.0,
+            "38": 5520943616.0,
+            "39": 5521736192.0,
+            "40": 5522528768.0,
+            "41": 5523321344.0,
+            "42": 5524113920.0,
+            "43": 5524906496.0,
+            "44": 5525699072.0,
+            "45": 5526491648.0,
+            "46": 5527284224.0,
+            "47": 5528076800.0,
+            "48": 5528869376.0,
+            "49": 5529661952.0,
+            "50": 5530454528.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 41739952128.0,
+            "2": 43687571456.0,
+            "3": 43687571456.0,
+            "4": 43983216640.0,
+            "5": 43983216640.0,
+            "6": 43983216640.0,
+            "7": 43983216640.0,
+            "8": 44024635392.0,
+            "9": 44041216000.0,
+            "10": 44041216000.0,
+            "11": 44041216000.0,
+            "12": 44041216000.0,
+            "13": 44041216000.0,
+            "14": 44041216000.0,
+            "15": 44041216000.0,
+            "16": 44041216000.0,
+            "17": 44041216000.0,
+            "18": 44041216000.0,
+            "19": 44041216000.0,
+            "20": 44041216000.0,
+            "21": 44041216000.0,
+            "22": 44041216000.0,
+            "23": 44041216000.0,
+            "24": 44041216000.0,
+            "25": 44041216000.0,
+            "26": 44041216000.0,
+            "27": 44041216000.0,
+            "28": 44041216000.0,
+            "29": 44041326592.0,
+            "30": 44162326528.0,
+            "31": 44220485632.0,
+            "32": 44270411776.0,
+            "33": 44293799936.0,
+            "34": 44293799936.0,
+            "35": 44293799936.0,
+            "36": 44293799936.0,
+            "37": 44293799936.0,
+            "38": 44293799936.0,
+            "39": 44293799936.0,
+            "40": 44293799936.0,
+            "41": 44293799936.0,
+            "42": 44293799936.0,
+            "43": 44293799936.0,
+            "44": 44293799936.0,
+            "45": 44293799936.0,
+            "46": 44293799936.0,
+            "47": 44293799936.0,
+            "48": 44293799936.0,
+            "49": 44293799936.0,
+            "50": 44293799936.0
+        }
+    },
+    "mtp_1 loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 11.08617,
+            "2": 11.10475,
+            "3": 10.48001,
+            "4": 10.13466,
+            "5": 9.79047,
+            "6": 9.50601,
+            "7": 9.5113,
+            "8": 8.85336,
+            "9": 8.66683,
+            "10": 8.95866,
+            "11": 8.29315,
+            "12": 8.36982,
+            "13": 8.25544,
+            "14": 7.73322,
+            "15": 7.86639,
+            "16": 7.92442,
+            "17": 7.86278,
+            "18": 7.61012,
+            "19": 8.00269,
+            "20": 7.73019,
+            "21": 7.4165,
+            "22": 7.41478,
+            "23": 7.28671,
+            "24": 7.27903,
+            "25": 7.54456,
+            "26": 6.96542,
+            "27": 7.50538,
+            "28": 7.20607,
+            "29": 7.377,
+            "30": 7.52777,
+            "31": 7.27094,
+            "32": 7.4604,
+            "33": 7.51419,
+            "34": 7.56867,
+            "35": 7.09252,
+            "36": 6.96015,
+            "37": 7.29846,
+            "38": 7.0742,
+            "39": 7.43347,
+            "40": 7.43116,
+            "41": 7.40919,
+            "42": 7.15527,
+            "43": 7.15652,
+            "44": 7.30441,
+            "45": 7.1893,
+            "46": 6.77296,
+            "47": 7.45045,
+            "48": 7.02403,
+            "49": 7.45719,
+            "50": 6.92656
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 64.40054,
+            "2": 2.16564,
+            "3": 3.72378,
+            "4": 1.63174,
+            "5": 2.30947,
+            "6": 1.7246,
+            "7": 1.5089,
+            "8": 1.60943,
+            "9": 1.48606,
+            "10": 1.47162,
+            "11": 1.05608,
+            "12": 1.3309,
+            "13": 1.06824,
+            "14": 1.41914,
+            "15": 1.10033,
+            "16": 1.15759,
+            "17": 1.23897,
+            "18": 1.10439,
+            "19": 1.11869,
+            "20": 1.09363,
+            "21": 1.23622,
+            "22": 1.14797,
+            "23": 1.23037,
+            "24": 1.03991,
+            "25": 1.07795,
+            "26": 1.04416,
+            "27": 1.03654,
+            "28": 1.04098,
+            "29": 1.03502,
+            "30": 1.02909,
+            "31": 1.17935,
+            "32": 1.14717,
+            "33": 1.05403,
+            "34": 1.13894,
+            "35": 1.04538,
+            "36": 1.04367,
+            "37": 1.0843,
+            "38": 1.04631,
+            "39": 1.06131,
+            "40": 1.06988,
+            "41": 1.09756,
+            "42": 1.04759,
+            "43": 1.09649,
+            "44": 1.05666,
+            "45": 1.05249,
+            "46": 1.04539,
+            "47": 1.04041,
+            "48": 1.04904,
+            "49": 1.04777,
+            "50": 1.06237
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_eos.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_eos.json
new file mode 100644
index 00000000000..d7372742ca7
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_eos.json
@@ -0,0 +1,344 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 11.07546,
+            "2": 11.03837,
+            "3": 9.66011,
+            "4": 9.91381,
+            "5": 9.32909,
+            "6": 9.13922,
+            "7": 9.13574,
+            "8": 8.65508,
+            "9": 8.51394,
+            "10": 8.8409,
+            "11": 8.29149,
+            "12": 8.34581,
+            "13": 8.25518,
+            "14": 7.73711,
+            "15": 7.86249,
+            "16": 7.9371,
+            "17": 7.89319,
+            "18": 7.63123,
+            "19": 7.99731,
+            "20": 7.74538,
+            "21": 7.44348,
+            "22": 7.42249,
+            "23": 7.29714,
+            "24": 7.27462,
+            "25": 7.54574,
+            "26": 6.96838,
+            "27": 7.50556,
+            "28": 7.22743,
+            "29": 7.36588,
+            "30": 7.52622,
+            "31": 7.27026,
+            "32": 7.45521,
+            "33": 7.50954,
+            "34": 7.55686,
+            "35": 7.10177,
+            "36": 6.96431,
+            "37": 7.28463,
+            "38": 7.0808,
+            "39": 7.40923,
+            "40": 7.43338,
+            "41": 7.38496,
+            "42": 7.15749,
+            "43": 7.15858,
+            "44": 7.28852,
+            "45": 7.16793,
+            "46": 6.78468,
+            "47": 7.4114,
+            "48": 7.0027,
+            "49": 7.46249,
+            "50": 6.92151
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 911219392.0,
+            "2": 910960384.0,
+            "3": 911156352.0,
+            "4": 912204800.0,
+            "5": 920796544.0,
+            "6": 940387968.0,
+            "7": 990599872.0,
+            "8": 976457728.0,
+            "9": 998097664.0,
+            "10": 995852672.0,
+            "11": 994583680.0,
+            "12": 977344896.0,
+            "13": 1028141824.0,
+            "14": 1007166208.0,
+            "15": 987423616.0,
+            "16": 993054784.0,
+            "17": 982319168.0,
+            "18": 998261760.0,
+            "19": 984696320.0,
+            "20": 982914752.0,
+            "21": 979667456.0,
+            "22": 953988864.0,
+            "23": 972353984.0,
+            "24": 964792064.0,
+            "25": 958512192.0,
+            "26": 946928512.0,
+            "27": 948458304.0,
+            "28": 949643968.0,
+            "29": 942877440.0,
+            "30": 935020160.0,
+            "31": 935327616.0,
+            "32": 934281088.0,
+            "33": 921805568.0,
+            "34": 928189312.0,
+            "35": 922202496.0,
+            "36": 924246656.0,
+            "37": 920661248.0,
+            "38": 922930752.0,
+            "39": 922322816.0,
+            "40": 921856512.0,
+            "41": 920227968.0,
+            "42": 918353664.0,
+            "43": 918607040.0,
+            "44": 914948032.0,
+            "45": 914295232.0,
+            "46": 914344448.0,
+            "47": 911769536.0,
+            "48": 912013312.0,
+            "49": 910349440.0,
+            "50": 914351552.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 5498353152.0,
+            "2": 5499147776.0,
+            "3": 5499940352.0,
+            "4": 5500732928.0,
+            "5": 5501525504.0,
+            "6": 5502318080.0,
+            "7": 5503110656.0,
+            "8": 5503903232.0,
+            "9": 5497958912.0,
+            "10": 5498751488.0,
+            "11": 5499544064.0,
+            "12": 5500336640.0,
+            "13": 5501129216.0,
+            "14": 5501921792.0,
+            "15": 5502714368.0,
+            "16": 5503506944.0,
+            "17": 5504299520.0,
+            "18": 5505092096.0,
+            "19": 5505884672.0,
+            "20": 5506677248.0,
+            "21": 5507469824.0,
+            "22": 5508262400.0,
+            "23": 5509054976.0,
+            "24": 5509847552.0,
+            "25": 5510640128.0,
+            "26": 5511432704.0,
+            "27": 5512225280.0,
+            "28": 5513017856.0,
+            "29": 5513810432.0,
+            "30": 5514603008.0,
+            "31": 5515395584.0,
+            "32": 5516188160.0,
+            "33": 5516980736.0,
+            "34": 5517773312.0,
+            "35": 5518565888.0,
+            "36": 5519358464.0,
+            "37": 5520151040.0,
+            "38": 5520943616.0,
+            "39": 5521736192.0,
+            "40": 5522528768.0,
+            "41": 5523321344.0,
+            "42": 5524113920.0,
+            "43": 5524906496.0,
+            "44": 5525699072.0,
+            "45": 5526491648.0,
+            "46": 5527284224.0,
+            "47": 5528076800.0,
+            "48": 5528869376.0,
+            "49": 5529661952.0,
+            "50": 5530454528.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 41739952128.0,
+            "2": 43687571456.0,
+            "3": 43687571456.0,
+            "4": 43983216640.0,
+            "5": 43983216640.0,
+            "6": 43983216640.0,
+            "7": 43983216640.0,
+            "8": 44024635392.0,
+            "9": 44041216000.0,
+            "10": 44041216000.0,
+            "11": 44041216000.0,
+            "12": 44041216000.0,
+            "13": 44041216000.0,
+            "14": 44041216000.0,
+            "15": 44041216000.0,
+            "16": 44041216000.0,
+            "17": 44041216000.0,
+            "18": 44041216000.0,
+            "19": 44041216000.0,
+            "20": 44041216000.0,
+            "21": 44041216000.0,
+            "22": 44041216000.0,
+            "23": 44041216000.0,
+            "24": 44041216000.0,
+            "25": 44041216000.0,
+            "26": 44041216000.0,
+            "27": 44041216000.0,
+            "28": 44041216000.0,
+            "29": 44041326592.0,
+            "30": 44162326528.0,
+            "31": 44220485632.0,
+            "32": 44270411776.0,
+            "33": 44293799936.0,
+            "34": 44293799936.0,
+            "35": 44293799936.0,
+            "36": 44293799936.0,
+            "37": 44293799936.0,
+            "38": 44293799936.0,
+            "39": 44293799936.0,
+            "40": 44293799936.0,
+            "41": 44293799936.0,
+            "42": 44293799936.0,
+            "43": 44293799936.0,
+            "44": 44293799936.0,
+            "45": 44293799936.0,
+            "46": 44293799936.0,
+            "47": 44293799936.0,
+            "48": 44293799936.0,
+            "49": 44293799936.0,
+            "50": 44293799936.0
+        }
+    },
+    "mtp_1 loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 11.08617,
+            "2": 11.10475,
+            "3": 10.48001,
+            "4": 10.13466,
+            "5": 9.79047,
+            "6": 9.50601,
+            "7": 9.5113,
+            "8": 8.85336,
+            "9": 8.66683,
+            "10": 8.95866,
+            "11": 8.29315,
+            "12": 8.36982,
+            "13": 8.25544,
+            "14": 7.73322,
+            "15": 7.86639,
+            "16": 7.92442,
+            "17": 7.86278,
+            "18": 7.61012,
+            "19": 8.00269,
+            "20": 7.73019,
+            "21": 7.4165,
+            "22": 7.41478,
+            "23": 7.28671,
+            "24": 7.27903,
+            "25": 7.54456,
+            "26": 6.96542,
+            "27": 7.50538,
+            "28": 7.20607,
+            "29": 7.377,
+            "30": 7.52777,
+            "31": 7.27094,
+            "32": 7.4604,
+            "33": 7.51419,
+            "34": 7.56867,
+            "35": 7.09252,
+            "36": 6.96015,
+            "37": 7.29846,
+            "38": 7.0742,
+            "39": 7.43347,
+            "40": 7.43116,
+            "41": 7.40919,
+            "42": 7.15527,
+            "43": 7.15652,
+            "44": 7.30441,
+            "45": 7.1893,
+            "46": 6.77296,
+            "47": 7.45045,
+            "48": 7.02403,
+            "49": 7.45719,
+            "50": 6.92656
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 87.63934,
+            "2": 1.98402,
+            "3": 3.95877,
+            "4": 1.64812,
+            "5": 2.312,
+            "6": 2.02902,
+            "7": 1.56333,
+            "8": 1.66703,
+            "9": 1.6393,
+            "10": 1.40472,
+            "11": 1.086,
+            "12": 1.34921,
+            "13": 1.0854,
+            "14": 1.4242,
+            "15": 1.09539,
+            "16": 1.79766,
+            "17": 1.2562,
+            "18": 1.08887,
+            "19": 1.08371,
+            "20": 1.10071,
+            "21": 1.25979,
+            "22": 1.3212,
+            "23": 1.25044,
+            "24": 1.05384,
+            "25": 1.11356,
+            "26": 1.0605,
+            "27": 1.03418,
+            "28": 1.0405,
+            "29": 1.05174,
+            "30": 1.04166,
+            "31": 1.20036,
+            "32": 1.12936,
+            "33": 1.02917,
+            "34": 1.13473,
+            "35": 1.02829,
+            "36": 1.04352,
+            "37": 1.0843,
+            "38": 1.03714,
+            "39": 1.04534,
+            "40": 1.07031,
+            "41": 1.07618,
+            "42": 1.03008,
+            "43": 1.06043,
+            "44": 1.04049,
+            "45": 1.02875,
+            "46": 1.03669,
+            "47": 1.03128,
+            "48": 1.02808,
+            "49": 1.03038,
+            "50": 1.04621
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/model_config.yaml
new file mode 100644
index 00000000000..d9ec0456190
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/model_config.yaml
@@ -0,0 +1,139 @@
+ENV_VARS:
+  CUDA_DEVICE_MAX_CONNECTIONS: 32
+  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
+  PYTORCH_CUDA_ALLOC_CONF: expandable_segments:True
+  NCCL_NVLS_ENABLE: 0
+  PYTHONWARNINGS: ignore
+  NCCL_DEBUG: VERSION
+MODEL_ARGS:
+  # Distributed args
+  --distributed-timeout-minutes: 60
+  --tensor-model-parallel-size: 2
+  --pipeline-model-parallel-size: 2
+  --expert-model-parallel-size: 4
+  --context-parallel-size: 1
+  --expert-tensor-parallel-size: 1
+  --use-distributed-optimizer: true
+  # NOTE: uncomment if TE >= 2.9.0
+  # --overlap-grad-reduce: true
+  # --overlap-param-gather: true
+  # Use unfused attention since MLA with fused attention and deterministic mode leads to NaN
+  --attention-backend: unfused # TODO: switch back to fused attention after fix
+  # Training args
+  --use-mcore-models: true
+  --sequence-parallel: true
+  --disable-bias-linear: true
+  --micro-batch-size: 4
+  --global-batch-size: 32
+  --train-iters: 50
+  --exit-duration-in-mins: 230
+  --no-check-for-nan-in-loss-and-grad: true
+  --no-rope-fusion: true
+  --cross-entropy-loss-fusion: true
+  --cross-entropy-fusion-impl: native
+  --manual-gc: true
+  --manual-gc-interval: 100
+  --recompute-granularity: selective
+  --recompute-modules: "[layernorm mla_up_proj mlp moe_act]"
+  --fine-grained-activation-offloading: true
+  --offload-modules: "[expert_fc1 moe_act attn_norm mlp_norm]"
+  # Transformer Engine args
+  --transformer-impl: transformer_engine
+  # Data args
+  --seq-length: 4096
+  --data-cache-path: ${DATA_CACHE_PATH}
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
+  --split: 949,50,1
+  # Add network size args
+  --num-layers: 15
+  --moe-layer-freq: ([0]*3+[1]*12)
+  --pipeline-model-parallel-layout: Et*3\\|\\(tt\\|\\)*6mL # Et*3|(tt|)*6mL
+  --hidden-size: 1024
+  --ffn-hidden-size: 4096
+  --num-attention-heads: 32
+  --kv-channels: 128
+  --max-position-embeddings: 4096
+  --position-embedding-type: rope
+  --rotary-base: 10000
+  --make-vocab-size-divisible-by: 3232
+  --normalization: RMSNorm
+  --norm-epsilon: 1e-6
+  --swiglu: true
+  --untie-embeddings-and-output-weights: true
+  --multi-latent-attention: true
+  # Comment out the following MTP args to disable MTP
+  --mtp-num-layers: 1
+  --mtp-loss-scaling-factor: 0.1
+  # Add regularization args
+  --attention-dropout: 0.0
+  --hidden-dropout: 0.0
+  --clip-grad: 1.0
+  --weight-decay: 0.1
+  --qk-layernorm: true
+  # Add learning rate args
+  --lr-warmup-fraction: .01
+  --lr: 0.00015
+  --min-lr: 1.0e-5
+  --lr-decay-style: cosine
+  --adam-beta1: 0.9
+  --adam-beta2: 0.95
+  # Add MoE args
+  --num-experts: 32
+  --moe-ffn-hidden-size: 1024
+  --moe-shared-expert-intermediate-size: 1024
+  --moe-router-load-balancing-type: seq_aux_loss
+  --moe-router-topk: 4
+  --moe-token-dispatcher-type: alltoall
+  --moe-router-pre-softmax: true
+  --moe-grouped-gemm: true
+  --moe-aux-loss-coeff: 1e-4
+  --moe-router-group-topk: 2
+  --moe-router-num-groups: 4
+  --moe-router-topk-scaling-factor: 2.0
+  --moe-router-score-function: sigmoid
+  --moe-router-enable-expert-bias: true
+  --moe-router-bias-update-rate: 1e-3
+  --moe-router-dtype: fp32
+  --moe-permute-fusion: true
+  # Add MLA args
+  --q-lora-rank: 1536
+  --kv-lora-rank: 512
+  --qk-head-dim: 128
+  --qk-pos-emb-head-dim: 64
+  --v-head-dim: 128
+  --rotary-scaling-factor: 40
+  --mscale: 1.0
+  --mscale-all-dim: 1.0
+  # Add validation args
+  --eval-iters: 32
+  --eval-interval: 200
+  # Add checkpointing args
+  --save: ${CHECKPOINT_SAVE_PATH}
+  --load: ${CHECKPOINT_LOAD_PATH}
+  --save-interval: 25
+  # Add initialization args
+  --init-method-std: 0.02
+  # Add logging args
+  --log-timers-to-tensorboard: true
+  --log-memory-to-tensorboard: true
+  --log-num-zeros-in-grad: true
+  --log-params-norm: true
+  --log-validation-ppl-to-tensorboard: true
+  --log-throughput: true
+  --log-interval: 1
+  --logging-level: 40
+  --tensorboard-dir: ${TENSORBOARD_PATH}
+  # Add mixed precision args
+  --bf16: true
+  --exit-interval: 50
+  --overlap-moe-expert-parallel-comm: true
+TEST_TYPE: regular # Usually ckpt-resume, but as a WAR to #513 set to regular
+METRICS:
+  - "iteration-time"
+  - "lm loss"
+  - "num-zeros"
+  - "mem-allocated-bytes"
+  - "mem-max-allocated-bytes"
+  - "mtp_1 loss"
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_coreweave.json
new file mode 100644
index 00000000000..4e979e64295
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_coreweave.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 11.04266,
+            "2": 11.02309,
+            "3": 9.43552,
+            "4": 10.04614,
+            "5": 9.38535,
+            "6": 9.14543,
+            "7": 9.21141,
+            "8": 8.63458,
+            "9": 8.48937,
+            "10": 8.82763,
+            "11": 8.29457,
+            "12": 8.3282,
+            "13": 8.23008,
+            "14": 7.71714,
+            "15": 7.86981,
+            "16": 7.92286,
+            "17": 7.8604,
+            "18": 7.62039,
+            "19": 7.98493,
+            "20": 7.72023,
+            "21": 7.39758,
+            "22": 7.39771,
+            "23": 7.28314,
+            "24": 7.25048,
+            "25": 7.53113,
+            "26": 6.95329,
+            "27": 7.49432,
+            "28": 7.20394,
+            "29": 7.37282,
+            "30": 7.50232,
+            "31": 7.25348,
+            "32": 7.4305,
+            "33": 7.48364,
+            "34": 7.53486,
+            "35": 7.10336,
+            "36": 6.94516,
+            "37": 7.26117,
+            "38": 7.07009,
+            "39": 7.40543,
+            "40": 7.42044,
+            "41": 7.34202,
+            "42": 7.11816,
+            "43": 7.11373,
+            "44": 7.27067,
+            "45": 7.07036,
+            "46": 6.77823,
+            "47": 7.1875,
+            "48": 6.99998,
+            "49": 7.45868,
+            "50": 6.90956
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 844114112.0,
+            "2": 843855104.0,
+            "3": 844048640.0,
+            "4": 842998144.0,
+            "5": 855786112.0,
+            "6": 874329728.0,
+            "7": 925591552.0,
+            "8": 915644608.0,
+            "9": 935187584.0,
+            "10": 927702400.0,
+            "11": 957888256.0,
+            "12": 923872512.0,
+            "13": 969427072.0,
+            "14": 965228416.0,
+            "15": 952825344.0,
+            "16": 943777088.0,
+            "17": 928845824.0,
+            "18": 925913856.0,
+            "19": 955339136.0,
+            "20": 989208256.0,
+            "21": 924095424.0,
+            "22": 908902272.0,
+            "23": 892664576.0,
+            "24": 900830400.0,
+            "25": 928105472.0,
+            "26": 877724352.0,
+            "27": 912808320.0,
+            "28": 904557696.0,
+            "29": 872625088.0,
+            "30": 864767104.0,
+            "31": 868220416.0,
+            "32": 861931136.0,
+            "33": 859941312.0,
+            "34": 855839104.0,
+            "35": 854046848.0,
+            "36": 852944896.0,
+            "37": 851456704.0,
+            "38": 849532096.0,
+            "39": 849972608.0,
+            "40": 849505792.0,
+            "41": 845780288.0,
+            "42": 846003328.0,
+            "43": 846257472.0,
+            "44": 852034880.0,
+            "45": 847187456.0,
+            "46": 855625856.0,
+            "47": 844661952.0,
+            "48": 851197248.0,
+            "49": 851630464.0,
+            "50": 846195904.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 4419107328.0,
+            "2": 4419108864.0,
+            "3": 4419108864.0,
+            "4": 4419108864.0,
+            "5": 4419108864.0,
+            "6": 4419108864.0,
+            "7": 4419108864.0,
+            "8": 4419108864.0,
+            "9": 4419108864.0,
+            "10": 4419108864.0,
+            "11": 4419108864.0,
+            "12": 4419108864.0,
+            "13": 4419108864.0,
+            "14": 4419108864.0,
+            "15": 4419108864.0,
+            "16": 4419108864.0,
+            "17": 4419108864.0,
+            "18": 4419108864.0,
+            "19": 4419108864.0,
+            "20": 4419108864.0,
+            "21": 4419108864.0,
+            "22": 4419108864.0,
+            "23": 4419108864.0,
+            "24": 4419108864.0,
+            "25": 4419108864.0,
+            "26": 4419108864.0,
+            "27": 4419108864.0,
+            "28": 4419108864.0,
+            "29": 4419108864.0,
+            "30": 4419108864.0,
+            "31": 4419108864.0,
+            "32": 4419108864.0,
+            "33": 4419108864.0,
+            "34": 4419108864.0,
+            "35": 4419108864.0,
+            "36": 4419108864.0,
+            "37": 4419108864.0,
+            "38": 4419108864.0,
+            "39": 4419108864.0,
+            "40": 4419108864.0,
+            "41": 4419108864.0,
+            "42": 4419108864.0,
+            "43": 4419108864.0,
+            "44": 4419108864.0,
+            "45": 4419108864.0,
+            "46": 4419108864.0,
+            "47": 4419108864.0,
+            "48": 4419108864.0,
+            "49": 4419108864.0,
+            "50": 4419108864.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 37959917568.0,
+            "2": 39578677248.0,
+            "3": 39580196864.0,
+            "4": 39580196864.0,
+            "5": 39583309824.0,
+            "6": 39583309824.0,
+            "7": 39583309824.0,
+            "8": 39583309824.0,
+            "9": 39583309824.0,
+            "10": 39583309824.0,
+            "11": 39583309824.0,
+            "12": 39583309824.0,
+            "13": 39583309824.0,
+            "14": 39583309824.0,
+            "15": 39583309824.0,
+            "16": 39583309824.0,
+            "17": 39583309824.0,
+            "18": 39583309824.0,
+            "19": 39583309824.0,
+            "20": 39583309824.0,
+            "21": 39583309824.0,
+            "22": 39583309824.0,
+            "23": 39583309824.0,
+            "24": 39583309824.0,
+            "25": 39583309824.0,
+            "26": 39583309824.0,
+            "27": 39583309824.0,
+            "28": 39583309824.0,
+            "29": 39583309824.0,
+            "30": 39583309824.0,
+            "31": 39583309824.0,
+            "32": 39583309824.0,
+            "33": 39583309824.0,
+            "34": 39583309824.0,
+            "35": 39583309824.0,
+            "36": 39583309824.0,
+            "37": 39583309824.0,
+            "38": 39583309824.0,
+            "39": 39583309824.0,
+            "40": 39583309824.0,
+            "41": 39583309824.0,
+            "42": 39583309824.0,
+            "43": 39583309824.0,
+            "44": 39583309824.0,
+            "45": 39583309824.0,
+            "46": 39583309824.0,
+            "47": 39583309824.0,
+            "48": 39583309824.0,
+            "49": 39583309824.0,
+            "50": 39583309824.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 60.48727,
+            "2": 2.0537,
+            "3": 3.26481,
+            "4": 2.56819,
+            "5": 2.40218,
+            "6": 1.26492,
+            "7": 1.5836,
+            "8": 1.37182,
+            "9": 1.10133,
+            "10": 1.10352,
+            "11": 1.18687,
+            "12": 1.53724,
+            "13": 1.25166,
+            "14": 1.69801,
+            "15": 1.42166,
+            "16": 1.104,
+            "17": 1.22214,
+            "18": 1.34911,
+            "19": 1.09323,
+            "20": 1.08552,
+            "21": 1.22223,
+            "22": 1.19712,
+            "23": 1.05456,
+            "24": 1.03745,
+            "25": 1.14154,
+            "26": 1.07349,
+            "27": 1.05181,
+            "28": 1.0364,
+            "29": 1.17111,
+            "30": 1.02943,
+            "31": 1.0758,
+            "32": 1.03304,
+            "33": 1.04107,
+            "34": 1.03092,
+            "35": 1.07869,
+            "36": 1.02457,
+            "37": 1.08557,
+            "38": 1.00729,
+            "39": 1.07249,
+            "40": 1.08655,
+            "41": 1.02362,
+            "42": 1.02046,
+            "43": 1.07618,
+            "44": 1.08709,
+            "45": 1.00443,
+            "46": 1.00379,
+            "47": 1.06019,
+            "48": 0.98958,
+            "49": 1.08317,
+            "50": 0.9932
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_eos.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_eos.json
new file mode 100644
index 00000000000..537e20b09d8
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_eos.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 11.04266,
+            "2": 11.02309,
+            "3": 9.43552,
+            "4": 10.04614,
+            "5": 9.38535,
+            "6": 9.14543,
+            "7": 9.21141,
+            "8": 8.63458,
+            "9": 8.48937,
+            "10": 8.82763,
+            "11": 8.29457,
+            "12": 8.3282,
+            "13": 8.23008,
+            "14": 7.71714,
+            "15": 7.86981,
+            "16": 7.92286,
+            "17": 7.8604,
+            "18": 7.62039,
+            "19": 7.98493,
+            "20": 7.72023,
+            "21": 7.39758,
+            "22": 7.39771,
+            "23": 7.28314,
+            "24": 7.25048,
+            "25": 7.53113,
+            "26": 6.95329,
+            "27": 7.49432,
+            "28": 7.20394,
+            "29": 7.37282,
+            "30": 7.50232,
+            "31": 7.25348,
+            "32": 7.4305,
+            "33": 7.48364,
+            "34": 7.53486,
+            "35": 7.10336,
+            "36": 6.94516,
+            "37": 7.26117,
+            "38": 7.07009,
+            "39": 7.40543,
+            "40": 7.42044,
+            "41": 7.34202,
+            "42": 7.11816,
+            "43": 7.11373,
+            "44": 7.27067,
+            "45": 7.07036,
+            "46": 6.77823,
+            "47": 7.1875,
+            "48": 6.99998,
+            "49": 7.45868,
+            "50": 6.90956
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 844114112.0,
+            "2": 843855104.0,
+            "3": 844048640.0,
+            "4": 842998144.0,
+            "5": 855786112.0,
+            "6": 874329728.0,
+            "7": 925591552.0,
+            "8": 915644608.0,
+            "9": 935187584.0,
+            "10": 927702400.0,
+            "11": 957888256.0,
+            "12": 923872512.0,
+            "13": 969427072.0,
+            "14": 965228416.0,
+            "15": 952825344.0,
+            "16": 943777088.0,
+            "17": 928845824.0,
+            "18": 925913856.0,
+            "19": 955339136.0,
+            "20": 989208256.0,
+            "21": 924095424.0,
+            "22": 908902272.0,
+            "23": 892664576.0,
+            "24": 900830400.0,
+            "25": 928105472.0,
+            "26": 877724352.0,
+            "27": 912808320.0,
+            "28": 904557696.0,
+            "29": 872625088.0,
+            "30": 864767104.0,
+            "31": 868220416.0,
+            "32": 861931136.0,
+            "33": 859941312.0,
+            "34": 855839104.0,
+            "35": 854046848.0,
+            "36": 852944896.0,
+            "37": 851456704.0,
+            "38": 849532096.0,
+            "39": 849972608.0,
+            "40": 849505792.0,
+            "41": 845780288.0,
+            "42": 846003328.0,
+            "43": 846257472.0,
+            "44": 852034880.0,
+            "45": 847187456.0,
+            "46": 855625856.0,
+            "47": 844661952.0,
+            "48": 851197248.0,
+            "49": 851630464.0,
+            "50": 846195904.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 4419107328.0,
+            "2": 4419108864.0,
+            "3": 4419108864.0,
+            "4": 4419108864.0,
+            "5": 4419108864.0,
+            "6": 4419108864.0,
+            "7": 4419108864.0,
+            "8": 4419108864.0,
+            "9": 4419108864.0,
+            "10": 4419108864.0,
+            "11": 4419108864.0,
+            "12": 4419108864.0,
+            "13": 4419108864.0,
+            "14": 4419108864.0,
+            "15": 4419108864.0,
+            "16": 4419108864.0,
+            "17": 4419108864.0,
+            "18": 4419108864.0,
+            "19": 4419108864.0,
+            "20": 4419108864.0,
+            "21": 4419108864.0,
+            "22": 4419108864.0,
+            "23": 4419108864.0,
+            "24": 4419108864.0,
+            "25": 4419108864.0,
+            "26": 4419108864.0,
+            "27": 4419108864.0,
+            "28": 4419108864.0,
+            "29": 4419108864.0,
+            "30": 4419108864.0,
+            "31": 4419108864.0,
+            "32": 4419108864.0,
+            "33": 4419108864.0,
+            "34": 4419108864.0,
+            "35": 4419108864.0,
+            "36": 4419108864.0,
+            "37": 4419108864.0,
+            "38": 4419108864.0,
+            "39": 4419108864.0,
+            "40": 4419108864.0,
+            "41": 4419108864.0,
+            "42": 4419108864.0,
+            "43": 4419108864.0,
+            "44": 4419108864.0,
+            "45": 4419108864.0,
+            "46": 4419108864.0,
+            "47": 4419108864.0,
+            "48": 4419108864.0,
+            "49": 4419108864.0,
+            "50": 4419108864.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 37959917568.0,
+            "2": 39578677248.0,
+            "3": 39580196864.0,
+            "4": 39580196864.0,
+            "5": 39583309824.0,
+            "6": 39583309824.0,
+            "7": 39583309824.0,
+            "8": 39583309824.0,
+            "9": 39583309824.0,
+            "10": 39583309824.0,
+            "11": 39583309824.0,
+            "12": 39583309824.0,
+            "13": 39583309824.0,
+            "14": 39583309824.0,
+            "15": 39583309824.0,
+            "16": 39583309824.0,
+            "17": 39583309824.0,
+            "18": 39583309824.0,
+            "19": 39583309824.0,
+            "20": 39583309824.0,
+            "21": 39583309824.0,
+            "22": 39583309824.0,
+            "23": 39583309824.0,
+            "24": 39583309824.0,
+            "25": 39583309824.0,
+            "26": 39583309824.0,
+            "27": 39583309824.0,
+            "28": 39583309824.0,
+            "29": 39583309824.0,
+            "30": 39583309824.0,
+            "31": 39583309824.0,
+            "32": 39583309824.0,
+            "33": 39583309824.0,
+            "34": 39583309824.0,
+            "35": 39583309824.0,
+            "36": 39583309824.0,
+            "37": 39583309824.0,
+            "38": 39583309824.0,
+            "39": 39583309824.0,
+            "40": 39583309824.0,
+            "41": 39583309824.0,
+            "42": 39583309824.0,
+            "43": 39583309824.0,
+            "44": 39583309824.0,
+            "45": 39583309824.0,
+            "46": 39583309824.0,
+            "47": 39583309824.0,
+            "48": 39583309824.0,
+            "49": 39583309824.0,
+            "50": 39583309824.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 67.13422,
+            "2": 1.95457,
+            "3": 3.25371,
+            "4": 2.66673,
+            "5": 3.05794,
+            "6": 1.35128,
+            "7": 1.66174,
+            "8": 2.19011,
+            "9": 1.16207,
+            "10": 1.16456,
+            "11": 1.26279,
+            "12": 1.60263,
+            "13": 1.29219,
+            "14": 2.93489,
+            "15": 1.48729,
+            "16": 1.15146,
+            "17": 1.27648,
+            "18": 1.39906,
+            "19": 1.13846,
+            "20": 1.14415,
+            "21": 1.27567,
+            "22": 1.26287,
+            "23": 1.11223,
+            "24": 1.10986,
+            "25": 1.20096,
+            "26": 1.13382,
+            "27": 1.11305,
+            "28": 1.11424,
+            "29": 1.22341,
+            "30": 1.08856,
+            "31": 1.15539,
+            "32": 1.10684,
+            "33": 1.11399,
+            "34": 1.09048,
+            "35": 1.1509,
+            "36": 1.09151,
+            "37": 1.13904,
+            "38": 1.06658,
+            "39": 1.1325,
+            "40": 1.14715,
+            "41": 1.07533,
+            "42": 1.08243,
+            "43": 1.13881,
+            "44": 1.14004,
+            "45": 1.06323,
+            "46": 1.06103,
+            "47": 1.11785,
+            "48": 1.04242,
+            "49": 1.13933,
+            "50": 1.0407
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/model_config.yaml
new file mode 100644
index 00000000000..f4b64722712
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/model_config.yaml
@@ -0,0 +1,134 @@
+ENV_VARS:
+  CUDA_DEVICE_MAX_CONNECTIONS: 1
+  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
+  PYTORCH_CUDA_ALLOC_CONF: expandable_segments:True
+  NCCL_NVLS_ENABLE: 0
+  PYTHONWARNINGS: ignore
+  NCCL_DEBUG: VERSION
+MODEL_ARGS:
+  # Distributed args
+  --distributed-timeout-minutes: 60
+  --tensor-model-parallel-size: 2
+  --pipeline-model-parallel-size: 2
+  --expert-model-parallel-size: 4
+  --context-parallel-size: 1
+  --expert-tensor-parallel-size: 1
+  --use-distributed-optimizer: true
+  # NOTE: uncomment if TE >= 2.9.0
+  # --overlap-grad-reduce: true
+  # --overlap-param-gather: true
+  # Use unfused attention since MLA with fused attention and deterministic mode leads to NaN
+  --attention-backend: unfused # TODO: switch back to fused attention after fix
+  # Training args
+  --use-mcore-models: true
+  --sequence-parallel: true
+  --disable-bias-linear: true
+  --micro-batch-size: 4
+  --global-batch-size: 32
+  --train-iters: 50
+  --exit-duration-in-mins: 230
+  --no-check-for-nan-in-loss-and-grad: true
+  --no-rope-fusion: true
+  --cross-entropy-loss-fusion: true
+  --cross-entropy-fusion-impl: native
+  --manual-gc: true
+  --manual-gc-interval: 100
+  --recompute-granularity: selective
+  --recompute-modules: "[layernorm mla_up_proj mlp moe_act]"
+  --fine-grained-activation-offloading: true
+  --offload-modules: "[expert_fc1 moe_act attn_norm mlp_norm]"
+  # Transformer Engine args
+  --transformer-impl: transformer_engine
+  # Data args
+  --seq-length: 4096
+  --data-cache-path: ${DATA_CACHE_PATH}
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
+  --split: 949,50,1
+  # Add network size args
+  --num-layers: 15
+  --moe-layer-freq: ([0]*3+[1]*12)
+  --pipeline-model-parallel-layout: Et*3\\|\\(tt\\|\\)*6L # Et*3|(tt|)*6L
+  --hidden-size: 1024
+  --ffn-hidden-size: 4096
+  --num-attention-heads: 32
+  --kv-channels: 128
+  --max-position-embeddings: 4096
+  --position-embedding-type: rope
+  --rotary-base: 10000
+  --make-vocab-size-divisible-by: 3232
+  --normalization: RMSNorm
+  --norm-epsilon: 1e-6
+  --swiglu: true
+  --untie-embeddings-and-output-weights: true
+  --multi-latent-attention: true
+  # Add regularization args
+  --attention-dropout: 0.0
+  --hidden-dropout: 0.0
+  --clip-grad: 1.0
+  --weight-decay: 0.1
+  --qk-layernorm: true
+  # Add learning rate args
+  --lr-warmup-fraction: .01
+  --lr: 0.00015
+  --min-lr: 1.0e-5
+  --lr-decay-style: cosine
+  --adam-beta1: 0.9
+  --adam-beta2: 0.95
+  # Add MoE args
+  --num-experts: 32
+  --moe-ffn-hidden-size: 1024
+  --moe-shared-expert-intermediate-size: 1024
+  --moe-router-load-balancing-type: seq_aux_loss
+  --moe-router-topk: 4
+  --moe-token-dispatcher-type: alltoall
+  --moe-router-pre-softmax: true
+  --moe-grouped-gemm: true
+  --moe-aux-loss-coeff: 1e-4
+  --moe-router-group-topk: 2
+  --moe-router-num-groups: 4
+  --moe-router-topk-scaling-factor: 2.0
+  --moe-router-score-function: sigmoid
+  --moe-router-enable-expert-bias: true
+  --moe-router-bias-update-rate: 1e-3
+  --moe-router-dtype: fp32
+  --moe-permute-fusion: true
+  # Add MLA args
+  --q-lora-rank: 1536
+  --kv-lora-rank: 512
+  --qk-head-dim: 128
+  --qk-pos-emb-head-dim: 64
+  --v-head-dim: 128
+  --rotary-scaling-factor: 40
+  --mscale: 1.0
+  --mscale-all-dim: 1.0
+  # Add validation args
+  --eval-iters: 32
+  --eval-interval: 200
+  # Add checkpointing args
+  --save: ${CHECKPOINT_SAVE_PATH}
+  --load: ${CHECKPOINT_LOAD_PATH}
+  --save-interval: 25
+  # Add initialization args
+  --init-method-std: 0.02
+  # Add logging args
+  --log-timers-to-tensorboard: true
+  --log-memory-to-tensorboard: true
+  --log-num-zeros-in-grad: true
+  --log-params-norm: true
+  --log-validation-ppl-to-tensorboard: true
+  --log-throughput: true
+  --log-interval: 1
+  --logging-level: 40
+  --tensorboard-dir: ${TENSORBOARD_PATH}
+  # Add mixed precision args
+  --bf16: true
+  --exit-interval: 50
+TEST_TYPE: regular # Usually ckpt-resume, but as a WAR to #513 set to regular
+METRICS:
+  - "iteration-time"
+  - "lm loss"
+  - "num-zeros"
+  - "mem-allocated-bytes"
+  - "mem-max-allocated-bytes"
diff --git a/tests/test_utils/recipes/moe.yaml b/tests/test_utils/recipes/moe.yaml
index 8164ca37df8..7a0f7d8a3f6 100644
--- a/tests/test_utils/recipes/moe.yaml
+++ b/tests/test_utils/recipes/moe.yaml
@@ -124,6 +124,16 @@ products:
       - environment: [dev]
         scope: [mr]
         platforms: [dgx_h100]
+  - test_case: [gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading]
+    products:
+      - environment: [dev]
+        scope: [mr]
+        platforms: [dgx_h100]
+  - test_case: [gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading]
+    products:
+      - environment: [dev]
+        scope: [mr]
+        platforms: [dgx_h100]
   #######################################################################
   # Super important MR tests that run for both DEV and LTS per MR       #
   #######################################################################
diff --git a/tests/unit_tests/pipeline_parallel/test_fine_grained_activation_offloading.py b/tests/unit_tests/pipeline_parallel/test_fine_grained_activation_offloading.py
new file mode 100644
index 00000000000..7c1b7f1fe4b
--- /dev/null
+++ b/tests/unit_tests/pipeline_parallel/test_fine_grained_activation_offloading.py
@@ -0,0 +1,187 @@
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+import gc
+
+import pytest
+import torch
+
+EPSILON = 0.1
+
+# Skip all tests if CUDA is not available
+cuda_available = torch.cuda.is_available()
+
+
+def _reset_cuda_memory():
+    gc.collect()
+    if cuda_available:
+        torch.cuda.empty_cache()
+
+
+class ToyModel(torch.nn.Module):
+    def __init__(self, hidden_size: int = 2048, num_layers: int = 4, dtype=torch.bfloat16):
+        super().__init__()
+        layers = []
+        for _ in range(num_layers):
+            layers.append(
+                torch.nn.Linear(hidden_size, hidden_size, bias=True, dtype=dtype, device="cuda")
+            )
+        self.net = torch.nn.Sequential(*layers).to(device="cuda", dtype=dtype)
+        self.hidden_size = hidden_size
+        self.num_layers = num_layers
+        self.dtype = dtype
+
+        # Prevent weights/bias from being considered activation tensors for offload;
+        # ensure we only count activation tensors (inputs x) in memory accounting.
+        for p in self.parameters():
+            try:
+                setattr(p, "offloading_activation", False)
+            except Exception:
+                pass
+
+    def forward(self, x, use_offload: bool = False):
+        from megatron.core.pipeline_parallel import fine_grained_activation_offload as off
+
+        if use_offload:
+            # Initialize a new chunk (microbatch) and enable offload context.
+            with off.get_fine_grained_offloading_context(True):
+                off.fine_grained_offloading_init_chunk_handler(
+                    vp_size=1, vp_stage=None, min_offloaded_tensor_size=1
+                )
+                for i, layer in enumerate(self.net):
+                    # Group by module; with this linear-only model, each group corresponds to a layer.
+                    off.fine_grained_offloading_set_last_layer(i == len(self.net) - 1)
+                    x = off.fine_grained_offloading_group_start(x, name=f"layer_{i}")
+                    x = layer(x)
+                    # Commit the group; returns a tuple of tensors
+                    (x,) = off.fine_grained_offloading_group_commit(
+                        x, name=f"layer_{i}", forced_released_tensors=[]
+                    )
+                return x
+        # Baseline path (no offload hooks)
+        with (
+            torch.autocast(device_type="cuda", dtype=self.dtype)
+            if self.dtype in (torch.float16, torch.bfloat16)
+            else torch.cuda.amp.autocast(enabled=False)
+        ):
+            for layer in self.net:
+                x = layer(x)
+            return x
+
+
+@pytest.fixture(autouse=True)
+def _monkeypatch_offload_deps(monkeypatch):
+    # Avoid requiring torch.distributed initialization and NVML in tests
+    import megatron.core.pipeline_parallel.fine_grained_activation_offload as off
+
+    monkeypatch.setattr(off, "debug_rank", lambda *args, **kwargs: None, raising=False)
+    monkeypatch.setattr(off, "set_ideal_affinity_for_current_gpu", lambda: None, raising=False)
+    # Ensure a clean state each test
+    off.fine_grained_offloading_reset()
+    yield
+    off.fine_grained_offloading_reset()
+
+
+def test_fine_grained_activation_offload_memory_reduction():
+    torch.manual_seed(1234)
+    # Use a linear-only stack so theoretical saved memory equals sum of per-layer input x bytes.
+    model = ToyModel(hidden_size=2048, num_layers=8, dtype=torch.bfloat16).eval()
+
+    # Create input
+    inp = torch.randn(
+        (2048, model.hidden_size), device="cuda", dtype=torch.bfloat16, requires_grad=True
+    )
+
+    # Warmup to stabilize allocator behavior
+    _reset_cuda_memory()
+    out = model(inp, use_offload=False)
+    (out.sum()).backward()
+    torch.cuda.synchronize()
+    _reset_cuda_memory()
+
+    # Baseline memory measurement (no offload)
+    _reset_cuda_memory()
+    inp_baseline = inp.detach().clone().requires_grad_(True)
+    baseline_mem_before = torch.cuda.memory_allocated() / (1024**2)
+    out_base = model(inp_baseline, use_offload=False)
+    baseline_mem_after = (torch.cuda.memory_allocated() - out_base.nbytes) / (1024**2)
+    (out_base.sum()).backward()
+    torch.cuda.synchronize()
+    baseline_delta = baseline_mem_after - baseline_mem_before
+
+    # Offload memory measurement
+    from megatron.core.pipeline_parallel import fine_grained_activation_offload as off
+
+    off.fine_grained_offloading_reset()
+    _reset_cuda_memory()
+    inp_off = inp.detach().clone().requires_grad_(True)
+    offload_mem_before = torch.cuda.memory_allocated() / (1024**2)
+    out_off = model(inp_off, use_offload=True)
+    offload_mem_after = (torch.cuda.memory_allocated() - out_off.nbytes) / (1024**2)
+    (out_off.sum()).backward()
+    torch.cuda.synchronize()
+    offload_delta = offload_mem_after - offload_mem_before
+
+    # Offload should reduce peak cached memory usage after forward
+    assert (
+        offload_delta < baseline_delta
+    ), f"offload did not reduce memory: off={offload_delta:.2f}MiB base={baseline_delta:.2f}MiB"
+
+    # Theoretical savings: storing per-layer input x (same shape each layer).
+    bytes_per_elem = inp.element_size()  # 2 for bfloat16
+    input_bytes = inp.numel() * bytes_per_elem
+    # -2 because the first and last activations are not offloaded
+    expected_saved_mib = (model.num_layers - 2) * (input_bytes / (1024**2))
+
+    # Actual savings ≈ baseline_delta - offload_delta (both exclude output tensor memory).
+    actual_saved_mib = baseline_delta - offload_delta
+
+    # Allow slack for allocator jitter and extra intermediates; magnitudes should match.
+    rel_err = abs(actual_saved_mib - expected_saved_mib) / max(expected_saved_mib, 1e-6)
+    assert (
+        rel_err <= EPSILON
+    ), f"saved mismatch: actual={actual_saved_mib:.2f}MiB expected~={expected_saved_mib:.2f}MiB (rel_err={rel_err:.2f})"
+
+
+def test_fine_grained_activation_offload_output_and_grad_consistency():
+    torch.manual_seed(2025)
+    hidden = 1024
+    layers = 3
+
+    # Create identical models by resetting seed
+    torch.manual_seed(2025)
+    model_base = ToyModel(hidden_size=hidden, num_layers=layers, dtype=torch.bfloat16).train()
+    torch.manual_seed(2025)
+    model_off = ToyModel(hidden_size=hidden, num_layers=layers, dtype=torch.bfloat16).train()
+
+    # Same input and target
+    inp = torch.randn((32, hidden), device="cuda", dtype=torch.bfloat16, requires_grad=True)
+    target = torch.randn_like(inp)
+
+    # Baseline forward/backward
+    out_base = model_base(inp, use_offload=False)
+    loss_base = torch.nn.functional.mse_loss(out_base, target)
+    loss_base.backward()
+    grads_base = [
+        p.grad.detach().clone() if p.grad is not None else None for p in model_base.parameters()
+    ]
+
+    # Offload forward/backward
+    from megatron.core.pipeline_parallel import fine_grained_activation_offload as off
+
+    off.fine_grained_offloading_reset()
+    out_off = model_off(inp.detach().clone().requires_grad_(True), use_offload=True)
+    loss_off = torch.nn.functional.mse_loss(out_off, target)
+    loss_off.backward()
+    grads_off = [
+        p.grad.detach().clone() if p.grad is not None else None for p in model_off.parameters()
+    ]
+
+    # Compare outputs
+    assert torch.allclose(out_off.float(), out_base.float(), rtol=1e-3, atol=1e-3)
+
+    # Compare gradients parameter-wise
+    for gb, go in zip(grads_base, grads_off):
+        if gb is None and go is None:
+            continue
+        assert gb is not None and go is not None
+        assert torch.allclose(go.float(), gb.float(), rtol=1e-3, atol=1e-3)

From bada8f96681f7610500e6acd5aa51a7cca0bd5e7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Wed, 29 Oct 2025 10:09:18 +0100
Subject: [PATCH 083/334] ci(fix): `Run tests` label (#1970) (#2006)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 .github/workflows/auto-assign-milestone.yml   |   1 +
 .github/workflows/auto-reminder-bot.yml       |  34 ++++
 .github/workflows/auto-swap-labels.yml        |  33 ++++
 .../workflows/build-test-publish-wheel.yml    |   3 +
 .../workflows/cherry-pick-release-commit.yml  |   1 +
 .github/workflows/cicd-approve-test-queue.yml |   1 +
 .github/workflows/cicd-main.yml               |  28 ++--
 .github/workflows/close-inactive-issue-pr.yml |   1 +
 .github/workflows/community-bot.yml           |   1 +
 .github/workflows/copyright-check.yml         |  11 +-
 .github/workflows/dependabot.yml              |   4 +-
 .github/workflows/install-test.yml            |   4 +
 .gitlab/stages/05.publish.yml                 |   2 +-
 hello_world                                   |   0
 .../launch_nemo_run_workload.py               |   2 +
 .../python_scripts/swap_pr_labels.py          | 147 ++++++++++++++++++
 tests/test_utils/recipes/ckpt_converter.yaml  |   2 +-
 .../gpt-dynamic-inference-cuda-graphs.yaml    |   2 +-
 .../recipes/gpt-dynamic-inference.yaml        |   2 +-
 tests/test_utils/recipes/gpt-grads.yaml       |   2 +-
 tests/test_utils/recipes/gpt.yaml             |  88 +++++------
 .../recipes/mamba-static-inference.yaml       |   2 +-
 tests/test_utils/recipes/mamba.yaml           |   2 +-
 .../recipes/moe-dynamic-inference.yaml        |   2 +-
 .../recipes/moe-static-inference.yaml         |   6 +-
 tests/test_utils/recipes/moe.yaml             |  28 ++--
 26 files changed, 321 insertions(+), 88 deletions(-)
 create mode 100644 .github/workflows/auto-reminder-bot.yml
 create mode 100644 .github/workflows/auto-swap-labels.yml
 create mode 100644 hello_world
 create mode 100644 tests/test_utils/python_scripts/swap_pr_labels.py

diff --git a/.github/workflows/auto-assign-milestone.yml b/.github/workflows/auto-assign-milestone.yml
index 7eae6838332..8153728f9fd 100644
--- a/.github/workflows/auto-assign-milestone.yml
+++ b/.github/workflows/auto-assign-milestone.yml
@@ -14,6 +14,7 @@ jobs:
   assign-milestone:
     runs-on: ubuntu-latest
     environment: nemo-ci
+    if: github.repository == 'NVIDIA/Megatron-LM'
     steps:
       - name: Get PR info
         id: get-pr-info
diff --git a/.github/workflows/auto-reminder-bot.yml b/.github/workflows/auto-reminder-bot.yml
new file mode 100644
index 00000000000..c3aa8169b50
--- /dev/null
+++ b/.github/workflows/auto-reminder-bot.yml
@@ -0,0 +1,34 @@
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+name: Auto Reminder Bot
+
+on:
+  workflow_dispatch:
+  schedule:
+    - cron: "0 12 * * *"
+
+jobs:
+  run-script:
+    environment: main
+    name: Run Auto Reminder Bot
+    runs-on: ubuntu-latest
+    if: github.repository == 'NVIDIA/Megatron-LM'
+    steps:
+      - name: Check out repository code
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.10"
+
+      - name: Install dependencies
+        run: |
+          pip install --no-cache-dir PyGithub slack-sdk
+
+      - name: Run Auto Reminder Bot
+        run: |
+          export SLACK_TOKEN=${{ secrets.SLACK_TOKEN }}
+          export SLACK_WEBHOOK_URL=${{ secrets.SLACK_WEBHOOK_URL }}
+          export GH_TOKEN=${{ secrets.PAT }}
+          python tests/test_utils/python_scripts/auto_reminder_github.py
diff --git a/.github/workflows/auto-swap-labels.yml b/.github/workflows/auto-swap-labels.yml
new file mode 100644
index 00000000000..5335026e2af
--- /dev/null
+++ b/.github/workflows/auto-swap-labels.yml
@@ -0,0 +1,33 @@
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+name: Auto Swap Labels
+on:
+  pull_request_review:
+    types: [submitted]
+
+permissions:
+  pull-requests: write
+  contents: read
+
+jobs:
+  check-approval:
+    runs-on: ubuntu-latest
+    if: github.event.review.state == 'approved' && github.repository == 'NVIDIA/Megatron-LM'
+    steps:
+      - name: Check out repository code
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.10"
+
+      - name: Install dependencies
+        run: |
+          pip install --no-cache-dir PyGithub slack-sdk
+
+      - name: Run Auto Reminder Bot
+        run: |
+          export GH_TOKEN=${{ github.token }}
+          export PR_NUMBER=${{ github.event.pull_request.number }}
+          python tests/test_utils/python_scripts/swap_pr_labels.py
diff --git a/.github/workflows/build-test-publish-wheel.yml b/.github/workflows/build-test-publish-wheel.yml
index 1ff9f53202b..0f3a037979a 100644
--- a/.github/workflows/build-test-publish-wheel.yml
+++ b/.github/workflows/build-test-publish-wheel.yml
@@ -35,6 +35,7 @@ permissions:
 jobs:
   pre-flight:
     uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_cicd_preflight.yml@v0.65.5
+    if: github.repository == 'NVIDIA/Megatron-LM'
 
   build-test-publish-wheel:
     needs: [pre-flight]
@@ -42,6 +43,7 @@ jobs:
       !(needs.pre-flight.outputs.docs_only == 'true'
       || needs.pre-flight.outputs.is_merge_group == 'true'
       || needs.pre-flight.outputs.is_deployment_workflow == 'true')
+      && github.repository == 'NVIDIA/Megatron-LM'
     uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_build_test_publish_wheel.yml@v0.63.1
     with:
       dry-run: true
@@ -68,6 +70,7 @@ jobs:
         || needs.pre-flight.outputs.is_deployment_workflow == 'true'
         || always()
       )
+      && github.repository == 'NVIDIA/Megatron-LM'
       && !cancelled()
     runs-on: ubuntu-latest
     steps:
diff --git a/.github/workflows/cherry-pick-release-commit.yml b/.github/workflows/cherry-pick-release-commit.yml
index 9cf8ed98660..58b447939a7 100644
--- a/.github/workflows/cherry-pick-release-commit.yml
+++ b/.github/workflows/cherry-pick-release-commit.yml
@@ -22,6 +22,7 @@ on:
 jobs:
   cherry-pick:
     uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_cherry_pick.yml@v0.65.9
+    if: github.repository == 'NVIDIA/Megatron-LM'
     with:
       target-branches-pattern: 'core_(*dev_)?r[0-9]+\.[0-9]+\.[0-9]+'
     secrets:
diff --git a/.github/workflows/cicd-approve-test-queue.yml b/.github/workflows/cicd-approve-test-queue.yml
index 1f23905d5d8..ccc8327368d 100644
--- a/.github/workflows/cicd-approve-test-queue.yml
+++ b/.github/workflows/cicd-approve-test-queue.yml
@@ -23,6 +23,7 @@ jobs:
   approve-queue:
     runs-on: ubuntu-latest
     environment: main
+    if: github.repository == 'NVIDIA/Megatron-LM'
     strategy:
       matrix:
         branch: [main, dev]
diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml
index d1e411be98f..27e1f6cdacb 100644
--- a/.github/workflows/cicd-main.yml
+++ b/.github/workflows/cicd-main.yml
@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
 name: CICD Megatron-LM
 on:
   schedule:
@@ -150,6 +151,7 @@ jobs:
 
   pre-flight:
     needs: [is-not-external-contributor]
+    if: github.repository == 'NVIDIA/Megatron-LM'
     uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_cicd_preflight.yml@v0.65.10
 
   linting:
@@ -251,11 +253,6 @@ jobs:
           apt-get update
           apt-get install -y gh
 
-      - name: Pull cache
-        run: |
-          docker pull ${{ env.container-registry }}/megatron-lm:main || true
-          docker pull ${{ env.container-registry }}/megatron-lm:${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number }} || true
-
       - name: Get last merged PR
         id: cache_from
         env:
@@ -271,13 +268,16 @@ jobs:
                 }
               }
             }' | jq -r '.data.repository.pullRequests.nodes[].number' | while read -r number; do
-              echo "${{ env.container-registry }}/megatron-lm:$number"
+              echo "type=registry,ref=${{ env.container-registry }}/megatron-lm:$number-buildcache,mode=max"
             done)
 
           echo "LAST_PRS<<EOF" | tee -a $GITHUB_OUTPUT
           echo "$LAST_PRS" | tee -a $GITHUB_OUTPUT
           echo "EOF" | tee -a $GITHUB_OUTPUT
 
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
       - name: Build and push
         uses: docker/build-push-action@v5
         with:
@@ -288,9 +288,11 @@ jobs:
           build-args: |
             FROM_IMAGE_NAME=nvcr.io/nvidia/pytorch:25.09-py3
           cache-from: |
-            ${{ env.container-registry }}/megatron-lm:${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number || 0 }}
-            ${{ env.container-registry }}/megatron-lm:main
+            type=registry,ref=${{ env.container-registry }}/megatron-lm:${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number || 0 }}-buildcache,mode=max
+            type=registry,ref=${{ env.container-registry }}/megatron-lm:main-buildcache,mode=max
             ${{ steps.cache_from.outputs.LAST_PRS }}
+          cache-to: |
+            type=registry,ref=${{ env.container-registry }}/megatron-lm:${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number || 0 }}-buildcache,mode=max
           no-cache: false
           tags: |
             ${{ env.container-registry }}/megatron-lm:${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number || 0 }}
@@ -368,6 +370,7 @@ jobs:
       - cicd-wait-in-queue
       - cicd-container-build
       - cicd-unit-tests-latest
+    environment: nemo-ci
     if: |
       (
         success() 
@@ -399,21 +402,26 @@ jobs:
       - name: Parse functional tests
         id: main
         env:
-          HAS_RUN_TESTS_LABEL: ${{ steps.has-run-tests-label.outputs.HAS_RUN_TESTS_LABEL }}
+          HAS_RUN_TESTS_LABEL: ${{ steps.has-run-tests-label.outputs.main }}
         run: |
           export PYTHONPATH=$(pwd)
 
           if [ "$HAS_RUN_TESTS_LABEL" == "true" ]; then
             ARGS=(
-              --scope mr
+              --scope mr-github
               --enable-lightweight-mode
             )
+            echo ":warning: The Run tests label is not yet supported."
           else
             ARGS=(
               --scope mr-slim
             )
           fi
 
+          ARGS=(
+            --scope mr-slim
+          )
+
           python tests/test_utils/python_scripts/generate_jet_trigger_job.py \
             --n-repeat 5 \
             --time-limit 2700 \
diff --git a/.github/workflows/close-inactive-issue-pr.yml b/.github/workflows/close-inactive-issue-pr.yml
index 6eb2f7e113d..7dcac837ba9 100644
--- a/.github/workflows/close-inactive-issue-pr.yml
+++ b/.github/workflows/close-inactive-issue-pr.yml
@@ -18,4 +18,5 @@ on:
 
 jobs:
   close-issues:
+    if: github.repository == 'NVIDIA/Megatron-LM'
     uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_close_inactive_issue_pr.yml@v0.44.0
diff --git a/.github/workflows/community-bot.yml b/.github/workflows/community-bot.yml
index 9f939510ed1..3b102894e1f 100644
--- a/.github/workflows/community-bot.yml
+++ b/.github/workflows/community-bot.yml
@@ -22,6 +22,7 @@ on:
 jobs:
   community-bot:
     uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_community_bot.yml@v0.65.10
+    if: github.repository == 'NVIDIA/Megatron-LM'
     secrets:
       GH_TOKEN: ${{ secrets.PAT }}
       environment: main
diff --git a/.github/workflows/copyright-check.yml b/.github/workflows/copyright-check.yml
index 74469adf75d..bb9640a1147 100644
--- a/.github/workflows/copyright-check.yml
+++ b/.github/workflows/copyright-check.yml
@@ -21,17 +21,21 @@ on:
       - main
       - "pull-request/[0-9]+"
       - "deploy-release/*"
+  merge_group:
+    types: [checks_requested]
 
 jobs:
   pre-flight:
-    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_cicd_preflight.yml@v0.64.2
+    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_cicd_preflight.yml@v0.65.10
+    if: github.repository == 'NVIDIA/Megatron-LM'
 
   copyright-check:
     needs: [pre-flight]
     if: |
       !(needs.pre-flight.outputs.docs_only == 'true'
       || needs.pre-flight.outputs.is_deployment_workflow == 'true')
-    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_copyright_check.yml@v0.65.11
+      && github.repository == 'NVIDIA/Megatron-LM'
+    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_copyright_check.yml@v0.65.12
 
   copyright-check-summary:
     needs: [pre-flight, copyright-check]
@@ -42,9 +46,12 @@ jobs:
         || always()
       )
       && !cancelled()
+      && github.repository == 'NVIDIA/Megatron-LM'
     runs-on: ubuntu-latest
     steps:
       - name: Result
+        env:
+          SKIPPING_IS_ALLOWED: ${{ needs.pre-flight.outputs.docs_only == 'true' || needs.pre-flight.outputs.is_deployment_workflow == 'true' || needs.pre-flight.outputs.is_merge_group == 'true' || needs.pre-flight.outputs.is_ci_workload == 'true' }}
         run: |
           FAILED_JOBS=$(gh run view $GITHUB_RUN_ID --json jobs --jq '[.jobs[] | select(.status == "completed" and .conclusion != "success")] | length') || echo 0
 
diff --git a/.github/workflows/dependabot.yml b/.github/workflows/dependabot.yml
index 4e67e1899d6..9dc1e6ac5a9 100644
--- a/.github/workflows/dependabot.yml
+++ b/.github/workflows/dependabot.yml
@@ -14,6 +14,7 @@ jobs:
     environment: nemo-ci
     outputs:
       mcore: ${{ steps.get-branch.outputs.mcore_release_branch }}
+    if: github.repository == 'NVIDIA/Megatron-LM'
     steps:
       - name: Get release branch names
         id: get-branch
@@ -28,6 +29,7 @@ jobs:
 
   bump-tags:
     needs: [get-release-branch-names]
+    if: github.repository == 'NVIDIA/Megatron-LM'
     strategy:
       fail-fast: false
       matrix:
@@ -46,7 +48,7 @@ jobs:
       SSH_PWD: ${{ secrets.SSH_PWD }}
 
   notify:
-    if: failure()
+    if: failure() && github.repository == 'NVIDIA/Megatron-LM'
     runs-on: ubuntu-latest
     needs: [bump-tags]
     steps:
diff --git a/.github/workflows/install-test.yml b/.github/workflows/install-test.yml
index 419202dbc2c..1eff8553875 100644
--- a/.github/workflows/install-test.yml
+++ b/.github/workflows/install-test.yml
@@ -30,6 +30,7 @@ on:
 jobs:
   pre-flight:
     uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_cicd_preflight.yml@v0.65.5
+    if: github.repository == 'NVIDIA/Megatron-LM'
 
   pip-test-pytorch:
     needs: [pre-flight]
@@ -37,6 +38,7 @@ jobs:
       !(needs.pre-flight.outputs.docs_only == 'true'
       || needs.pre-flight.outputs.is_merge_group == 'true'
       || needs.pre-flight.outputs.is_deployment_workflow == 'true')
+      && github.repository == 'NVIDIA/Megatron-LM'
     runs-on: linux-amd64-cpu16
     name: Pip - Python${{ matrix.python-version }} - AMD64/Linux - NGC PyTorch
     container:
@@ -82,6 +84,7 @@ jobs:
       !(needs.pre-flight.outputs.docs_only == 'true'
       || needs.pre-flight.outputs.is_merge_group == 'true'
       || needs.pre-flight.outputs.is_deployment_workflow == 'true')
+      && github.repository == 'NVIDIA/Megatron-LM'
     runs-on: linux-amd64-cpu16
     name: UV - Python${{ matrix.python-version }} - AMD64/Linux - NGC PyTorch
     container:
@@ -135,6 +138,7 @@ jobs:
         || always()
       )
       && !cancelled()
+      && github.repository == 'NVIDIA/Megatron-LM'
     steps:
       - name: Get workflow result
         id: result
diff --git a/.gitlab/stages/05.publish.yml b/.gitlab/stages/05.publish.yml
index 024ec2aa490..3b50562629a 100644
--- a/.gitlab/stages/05.publish.yml
+++ b/.gitlab/stages/05.publish.yml
@@ -784,7 +784,7 @@ publish:approve_merge_gate:
         export COMMENT="Main is healthy. Submitting PR."
       elif [[ $EXIT_CODE -eq 1 ]]; then
         export STATUS="rejected"
-        export COMMENT="Main is not healthy. An automation engineer is investigating. No need to take any action."
+        export COMMENT="$TARGET_BRANCH is not healthy. An automation engineer is investigating. No need to take any action."
       elif [[ $EXIT_CODE -eq 2 ]]; then
         echo "Main is running. We won't cancel the deployment."
         exit 0
diff --git a/hello_world b/hello_world
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/test_utils/python_scripts/launch_nemo_run_workload.py b/tests/test_utils/python_scripts/launch_nemo_run_workload.py
index 648ac28d19a..33d2a4a6a74 100644
--- a/tests/test_utils/python_scripts/launch_nemo_run_workload.py
+++ b/tests/test_utils/python_scripts/launch_nemo_run_workload.py
@@ -1,3 +1,5 @@
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
 import logging
 import os
 import pathlib
diff --git a/tests/test_utils/python_scripts/swap_pr_labels.py b/tests/test_utils/python_scripts/swap_pr_labels.py
new file mode 100644
index 00000000000..aa5875443f8
--- /dev/null
+++ b/tests/test_utils/python_scripts/swap_pr_labels.py
@@ -0,0 +1,147 @@
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#!/usr/bin/env python3
+"""
+GitHub PR Review Reminder Automation
+Requirements: pip install PyGithub slack-sdk requests
+Usage: GH_TOKEN=ghp_... SLACK_TOKEN=xoxb-... SLACK_WEBHOOK_URL=https://... REPO=NVIDIA/Megatron-LM python github_pr_reminder.py
+"""
+
+import logging
+import os
+import sys
+from dataclasses import dataclass
+from typing import List
+
+from github import Github
+
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class Reminder:
+    id: int
+    pr: str
+    milestone: str
+    author: str
+    priority: str
+    review_stage: str
+    total_review_time: int
+    current_stage_time: int
+    reviewers: List[str]
+    action_message: str
+
+
+class PRReviewTracker:
+    EXPERT_REVIEW = "Expert Review"
+    FINAL_REVIEW = "Final Review"
+    EXCLUDED_TEAMS = {"core-adlr", "core-nemo"}
+
+    def __init__(self, token: str, repo_name: str, pr_number: str):
+        self.github = Github(token)
+        self.repo = self.github.get_repo(repo_name)
+        self.pr = self.repo.get_pull(pr_number)
+        self.stage = self.get_stage(self.pr)
+        self.org = self.github.get_organization(self.repo.organization.login)
+
+    def get_stage(self, pr):
+        """Get current review stage."""
+        labels = {l.name for l in pr.labels}
+        return self.FINAL_REVIEW if self.FINAL_REVIEW in labels else self.EXPERT_REVIEW
+
+    def swap_labels(self):
+        """Get filtered reviewer emails who haven't approved yet."""
+        pr = self.pr
+        if self.stage == self.FINAL_REVIEW:
+            logger.info(f"PR #{self.pr.number} is in the {self.stage} stage. No reviewers needed.")
+            return
+
+        # 1. Get the latest review state for everyone who has submitted a review
+        latest_reviews = {}
+        try:
+            for review in pr.get_reviews():
+                if not review.user:  # Handle rare cases of deleted users
+                    continue
+                # Only track 'APPROVED' or 'CHANGES_REQUESTED' as definitive states
+                if review.state in ("APPROVED", "CHANGES_REQUESTED"):
+                    if (
+                        review.user.login not in latest_reviews
+                        or review.submitted_at > latest_reviews[review.user.login].submitted_at
+                    ):
+                        latest_reviews[review.user.login] = review
+        except Exception as e:
+            logger.warning(f"Could not get reviews for PR #{pr.number}: {e}")
+
+        # 2. Separate reviewers into approvers (List B) and non-approvers
+        approvers = {user for user, review in latest_reviews.items() if review.state == "APPROVED"}
+        non_approving_reviewers = {
+            user for user, review in latest_reviews.items() if review.state == "CHANGES_REQUESTED"
+        }
+
+        # 3. Get all *currently pending* review requests
+        try:
+            pending_users_req, pending_teams_req = pr.get_review_requests()
+            pending_individuals = {r.login for r in pending_users_req}
+            pending_teams_slugs = {t.slug for t in pending_teams_req}
+        except Exception as e:
+            logger.warning(f"Could not get review requests for PR #{pr.number}: {e}")
+            pending_individuals = set()
+            pending_teams_slugs = set()
+
+        # 4. Filter pending teams based on the current stage
+        teams_to_query = (
+            pending_teams_slugs - self.EXCLUDED_TEAMS
+            if self.stage == self.EXPERT_REVIEW
+            else pending_teams_slugs & self.EXCLUDED_TEAMS
+        )
+
+        # 5. Get members from the required pending teams
+        pending_team_members = set()
+        for slug in teams_to_query:
+            try:
+                pending_team_members.update(
+                    m.login for m in self.org.get_team_by_slug(slug).get_members()
+                )
+            except Exception as e:
+                logger.warning(f"Could not get members for team {slug} on PR #{pr.number}: {e}")
+
+        # 6. "List A": Combine all users who *still need to review*
+        all_required_reviewers = (
+            pending_individuals | pending_team_members | non_approving_reviewers
+        )
+
+        # 7. Final list (List A - List B):
+        pending_reviewers = all_required_reviewers - approvers
+        logger.info(f"Pending reviewers: {pending_reviewers}")
+        if len(pending_reviewers) == 0:
+            try:
+                pr.remove_from_labels(self.EXPERT_REVIEW)
+                logger.info(f'Removed "{self.EXPERT_REVIEW}" label from PR #{pr.number}')
+            except Exception as e:
+                logger.warning(
+                    f'Failed to remove "{self.EXPERT_REVIEW}" label from PR #{pr.number}: {e}'
+                )
+
+            try:
+                pr.add_to_labels(self.FINAL_REVIEW)
+                logger.info(f'Added "{self.FINAL_REVIEW}" label to PR #{pr.number}')
+            except Exception as e:
+                logger.warning(f'Failed to add "{self.FINAL_REVIEW}" label to PR #{pr.number}: {e}')
+
+
+def main():
+    token = os.environ.get("GH_TOKEN")
+    repo = os.environ.get("REPO", "NVIDIA/Megatron-LM")
+    pr_number = int(os.environ.get("PR_NUMBER"))
+
+    if not token:
+        logger.error("GH_TOKEN environment variable is required")
+        sys.exit(1)
+
+    logger.info(f"Starting PR review reminder for {repo}")
+    tracker = PRReviewTracker(token, repo, pr_number)
+    tracker.swap_labels()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/test_utils/recipes/ckpt_converter.yaml b/tests/test_utils/recipes/ckpt_converter.yaml
index f78f184a326..bf328ae44c9 100644
--- a/tests/test_utils/recipes/ckpt_converter.yaml
+++ b/tests/test_utils/recipes/ckpt_converter.yaml
@@ -48,7 +48,7 @@ products:
   - test_case: [ckpt_converter]
     products:
       - environment: [dev]
-        scope: [mr-broken]
+        scope: [mr-github-broken, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly-broken]
diff --git a/tests/test_utils/recipes/gpt-dynamic-inference-cuda-graphs.yaml b/tests/test_utils/recipes/gpt-dynamic-inference-cuda-graphs.yaml
index 47b8d346150..f4a7d6c786b 100644
--- a/tests/test_utils/recipes/gpt-dynamic-inference-cuda-graphs.yaml
+++ b/tests/test_utils/recipes/gpt-dynamic-inference-cuda-graphs.yaml
@@ -47,5 +47,5 @@ products:
   - test_case: [gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_validation]
     products:
       - environment: [dev]
-        scope: [mr-broken]
+        scope: [mr-broken, mr-github]
         platforms: [dgx_h100]
diff --git a/tests/test_utils/recipes/gpt-dynamic-inference.yaml b/tests/test_utils/recipes/gpt-dynamic-inference.yaml
index 748e4734a6d..77a98d4bd7f 100644
--- a/tests/test_utils/recipes/gpt-dynamic-inference.yaml
+++ b/tests/test_utils/recipes/gpt-dynamic-inference.yaml
@@ -72,5 +72,5 @@ products:
   - test_case: [gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_logitsmatch_decode_graphs_only]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
diff --git a/tests/test_utils/recipes/gpt-grads.yaml b/tests/test_utils/recipes/gpt-grads.yaml
index cdd3a050ff2..bf048542410 100644
--- a/tests/test_utils/recipes/gpt-grads.yaml
+++ b/tests/test_utils/recipes/gpt-grads.yaml
@@ -62,5 +62,5 @@ products:
   - test_case: [gpt3_mcore_reruns_resume_check_grads]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr]
         platforms: [dgx_h100]
diff --git a/tests/test_utils/recipes/gpt.yaml b/tests/test_utils/recipes/gpt.yaml
index 0dafb8685c2..baf07cb9759 100644
--- a/tests/test_utils/recipes/gpt.yaml
+++ b/tests/test_utils/recipes/gpt.yaml
@@ -104,75 +104,75 @@ products:
         scope: [nightly]
         platforms: [dgx_h100]
   #######################################################################
-  # MR tests: Mostly DEV on MR, and LTS on nightly cadence, except for  #
+  # mr, mr-github tests: Mostly DEV on mr, mr-github, and LTS on nightly cadence, except for  #
   #             some very important tests.                              #
   #######################################################################
   - test_case: [gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
   - test_case: [gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
   - test_case: [gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
   # - test_case: [gpt3_mcore_te_tp1_pp2_resume_torch_dist_cp4_a2a_p2p_nondeterministic]
   #   products:
   #     - environment: [dev]
-  #       scope: [mr]
+  #       scope: [mr, mr-github]
   #     - environment: [lts]
   #       scope: [nightly] # Non-deterministic: #487
   - test_case: [gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       # - environment: [lts]
       #   scope: [nightly] # outdated TE: #501
   - test_case: [gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
   - test_case: [gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       # - environment: [lts]
       #   scope: [nightly] # non-determinism: #436
   - test_case: [gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
   - test_case: [gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       # - environment: [lts]
       #   scope: [nightly] # non-determinism: #437
   - test_case: [gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
@@ -193,42 +193,42 @@ products:
   - test_case: [gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
       # - test_case: [gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist]
       #   products:
       #     - environment: [dev]
-      #       scope: [mr]
+      #       scope: [mr, mr-github]
       #       platforms: [dgx_h100] # Hangs: #513
       # - environment: [lts]
       #   scope: [nightly]
   - test_case: [gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
   - test_case: [gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather]
     products:
       # - environment: [dev]
-      #   scope: [mr]
+      #   scope: [mr, mr-github]
       #   platforms: [dgx_h100] # Hangs: #513
       - environment: [lts]
         scope: [nightly]
   - test_case: [gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied]
     products:
       # - environment: [dev]
-      #   scope: [mr] # Hangs: #513
+      #   scope: [mr, mr-github] # Hangs: #513
       #   platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
   - test_case: [gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap]
     products:
       # - environment: [dev]
-      #   scope: [mr] # Hangs: #513
+      #   scope: [mr, mr-github] # Hangs: #513
       #   platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
@@ -326,14 +326,14 @@ products:
   - test_case: [gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
   - test_case: [gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
@@ -345,49 +345,49 @@ products:
   - test_case: [gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
   - test_case: [gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
   - test_case: [gpt3_mcore_te_tp2_pp2_resume_torch_dist]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
   - test_case: [gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader]
     products:
       # - environment: [dev]
-      #   scope: [mr] # Hangs: #513
+      #   scope: [mr, mr-github] # Hangs: #513
       #   platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
   - test_case: [gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
   - test_case: [gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
   - test_case: [gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
@@ -415,25 +415,25 @@ products:
   - test_case: [gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       # - environment: [lts]
       #   scope: [nightly]
   - test_case: [gpt3_mcore_te_tp2_pp1_modelopt_distill_resume]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       # - environment: [lts]
       #   scope: [nightly] # Outdated: #502
   # - test_case: [gpt3_mcore_te_tp2_pp1_fsdp2_resume_torch_dist]
   #   products:
   # - environment: [dev]
-  #   scope: [mr] # Broken: #484
+  #   scope: [mr, mr-github] # Broken: #484
   # - environment: [lts]
   #   scope: [nightly] # Requires PyT 2.4: #481
   #######################################################################
-  # Super important MR tests that run for both DEV and LTS per MR       #
+  # Super important mr, mr-github tests that run for both DEV and LTS per mr, mr-github       #
   #######################################################################
   - test_case: [gpt3_mcore_reruns_persistent_1]
     products:
@@ -445,19 +445,16 @@ products:
   # - test_case: [gpt3_mcore_reruns_persistent_2]
   #   products:
   # - environment: [dev]
-  #   scope: [mr]
+  #   scope: [mr, mr-github]
   #   platforms: [dgx_h100]
   # - environment: [lts]
   #   scope: [nightly]
   - test_case: [gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer]
     products:
       - environment: [lts]
-        scope: [mr]
-      - environment: [dev]
         scope: [mr, mr-github]
-        platforms: [dgx_h100]
       - environment: [dev]
-        scope: [mr-slim]
+        scope: [mr, mr-github, mr-slim]
         platforms: [dgx_h100]
   - test_case: [gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather]
     products:
@@ -465,43 +462,40 @@ products:
         scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
-        scope: [mr]
+        scope: [mr, mr-github]
   - test_case: [gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather]
     products:
       - environment: [lts]
-        scope: [mr]
-      - environment: [dev]
         scope: [mr, mr-github]
-        platforms: [dgx_h100]
       - environment: [dev]
-        scope: [mr-slim]
+        scope: [mr, mr-slim]
         platforms: [dgx_h100]
   - test_case: [gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
-        scope: [mr]
+        scope: [mr, mr-github]
   # - test_case: [gpt3_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone]
   #   products:
   #     - environment: [dev]
-  #       scope: [mr]
+  #       scope: [mr, mr-github]
   #       platforms: [dgx_h100]
   # - test_case: [gpt3_mcore_te_tp1_pp1_frozen_resume_torch_dist_dist_optimizer]
   #   products:
   #     - environment: [dev]
-  #       scope: [mr]
+  #       scope: [mr, mr-github]
   #       platforms: [dgx_h100]
   # - test_case: [gpt3_mcore_te_tp1_pp4_frozen_resume_torch_dist_swiglu]
   #   products:
   #     - environment: [dev]
-  #       scope: [mr]
+  #       scope: [mr, mr-github]
   #       platforms: [dgx_h100]
   # - test_case: [gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_cp2_nondeterministic]
   #   products:
   #     - environment: [dev]
-  #       scope: [mr]
+  #       scope: [mr, mr-github]
   #       platforms: [dgx_a100, dgx_h100]
   # - test_case: [gpt3_weekly_dgx_b200_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap]
   #   products:
@@ -551,4 +545,4 @@ products:
   # - test_case: [gpt3_mcore_tp2_pp2_resume_torch_dist_uninstall_te]
   #   products:
   #     - environment: [dev, lts]
-  #       scope: [mr] # Non-deterministic: #483
+  #       scope: [mr, mr-github] # Non-deterministic: #483
diff --git a/tests/test_utils/recipes/mamba-static-inference.yaml b/tests/test_utils/recipes/mamba-static-inference.yaml
index e727c4db5ee..9fcc86830f0 100644
--- a/tests/test_utils/recipes/mamba-static-inference.yaml
+++ b/tests/test_utils/recipes/mamba-static-inference.yaml
@@ -62,5 +62,5 @@ products:
   - test_case: [hybrid_static_inference_tp1_pp1_2B_cudagraphs]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dg  x_h100]
diff --git a/tests/test_utils/recipes/mamba.yaml b/tests/test_utils/recipes/mamba.yaml
index 0f8a4085ea5..40d1d095aa4 100644
--- a/tests/test_utils/recipes/mamba.yaml
+++ b/tests/test_utils/recipes/mamba.yaml
@@ -67,7 +67,7 @@ products:
   # - test_case: [hybrid_mr_mcore_te_tp1_pp4_cp1_dgx_a100_1N8G]
   #   products:
   #     - environment: [dev]
-  #       scope: [mr]
+  #       scope: [mr, mr-github]
   #     - environment: [lts] # disabled until triton is bumped
   #       scope: [nightly]
 
diff --git a/tests/test_utils/recipes/moe-dynamic-inference.yaml b/tests/test_utils/recipes/moe-dynamic-inference.yaml
index c9d1be57add..d477bdeda4a 100644
--- a/tests/test_utils/recipes/moe-dynamic-inference.yaml
+++ b/tests/test_utils/recipes/moe-dynamic-inference.yaml
@@ -62,5 +62,5 @@ products:
   - test_case: [gpt_dynamic_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
diff --git a/tests/test_utils/recipes/moe-static-inference.yaml b/tests/test_utils/recipes/moe-static-inference.yaml
index c11cd294592..bd7c4ca0f50 100644
--- a/tests/test_utils/recipes/moe-static-inference.yaml
+++ b/tests/test_utils/recipes/moe-static-inference.yaml
@@ -57,15 +57,15 @@ products:
   - test_case: [gpt_static_inference_tp1_pp1_ep1_16B_logitsmatch]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
   - test_case: [gpt_static_inference_tp4_pp1_ep4_16B_logitsmatch]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
   - test_case: [gpt_static_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
diff --git a/tests/test_utils/recipes/moe.yaml b/tests/test_utils/recipes/moe.yaml
index 7a0f7d8a3f6..649da3ba518 100644
--- a/tests/test_utils/recipes/moe.yaml
+++ b/tests/test_utils/recipes/moe.yaml
@@ -78,28 +78,28 @@ products:
   # Weekly tests: Run both DEV and LTS unless something is flaky        #
   #######################################################################
   #######################################################################
-  # MR tests: Mostly DEV on MR, and LTS on nightly cadence, except for  #
+  # mr, mr-github tests: Mostly DEV on mr, mr-github, and LTS on nightly cadence, except for  #
   #             some very important tests.                              #
   #######################################################################
   - test_case: [gpt3_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
   - test_case: [gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100] # hang: #513
   # - test_case: [gpt3_mcore_te_tp2_pp2_ep4_etp1_selective_recompute_experimental]
   #   products:
   #     - environment: [dev]
-  #       scope: [mr]
+  #       scope: [mr, mr-github]
   #       platforms: [dgx_h100] # hang: #513
   - test_case: [gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
   - test_case: [gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4]
     products:
@@ -122,7 +122,7 @@ products:
   - test_case: [gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_optimizer]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
   - test_case: [gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading]
     products:
@@ -135,17 +135,17 @@ products:
         scope: [mr]
         platforms: [dgx_h100]
   #######################################################################
-  # Super important MR tests that run for both DEV and LTS per MR       #
+  # Super important mr, mr-github tests that run for both DEV and LTS per mr, mr-github       #
   #######################################################################
   # - test_case: [gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_dist_optimizer]
   #   products:
   #     - environment: [dev]
-  #       scope: [mr]
+  #       scope: [mr, mr-github]
   #       platforms: [dgx_h100]
   # - test_case: [gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_groupedGEMM]
   #   products:
   #     - environment: [dev]
-  #       scope: [mr]
+  #       scope: [mr, mr-github]
   #       platforms: [dgx_h100]
   ###########################
   # Merge train tests       #
@@ -153,18 +153,12 @@ products:
   - test_case: [gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer]
     products:
       - environment: [dev]
-        scope: [mr]
-        platforms: [dgx_h100]
-      - environment: [dev]
-        scope: [mr-slim]
+        scope: [mr, mr-github, mr-slim]
         platforms: [dgx_h100]
   - test_case: [gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
-        platforms: [dgx_h100]
-      - environment: [dev]
-        scope: [mr-slim]
+        scope: [mr, mr-github, mr-slim]
         platforms: [dgx_h100]
   - test_case: [gpt3_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8]
     products:

From ccf794e8e51af72bed287219e9da3ab32c0938e1 Mon Sep 17 00:00:00 2001
From: Hongbin Liu <lhb8125@users.noreply.github.com>
Date: Wed, 29 Oct 2025 17:56:26 +0800
Subject: [PATCH 084/334] Renaming golden values (#2020)

Signed-off-by: Hongbin Liu <hongbinl@nvidia.com>
---
 ...ev_coreweave.json => golden_values_dev_dgxh100_coreweave.json} | 0
 ...den_values_dev_eos.json => golden_values_dev_dgxh100_eos.json} | 0
 2 files changed, 0 insertions(+), 0 deletions(-)
 rename tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/{golden_values_dev_coreweave.json => golden_values_dev_dgxh100_coreweave.json} (100%)
 rename tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/{golden_values_dev_eos.json => golden_values_dev_dgxh100_eos.json} (100%)

diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_coreweave.json
rename to tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_eos.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_eos.json
rename to tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_dgxh100_eos.json

From 7342f67d2f2dc8cb3b5a9d18bf6674f56f505678 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Wed, 29 Oct 2025 10:56:40 +0100
Subject: [PATCH 085/334] Ko3n1g/chore/sync main to dev (#2018)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
Co-authored-by: James Shen <yueshen@nvidia.com>
Co-authored-by: Chen-Han Yu <chenhany@cw-dfw-cs-001-login-01.cm.cluster>
Co-authored-by: Shanmugam Ramasamy <shanmugamr@nvidia.com>
Co-authored-by: Shanmugam Ramasamy <shanmugamr@cw-dfw-cs-001-vscode-01.cm.cluster>
Co-authored-by: Mcore Bot <mcore-bot@nvidia.com>
Co-authored-by: Shanmugam Ramasamy <shanmugamr@shanmugamr-mlt.client.nvidia.com>
Co-authored-by: Siddharth Singh <sidsingh@nvidia.com>
Co-authored-by: Shanmugam Ramasamy <shanmugamr@cw-dfw-cs-001-login-01.cm.cluster>
Co-authored-by: Youngeun Kwon <youngeunk@nvidia.com>
Co-authored-by: Shunjia Ding <shunjiad@nvidia.com>
Co-authored-by: Maanu Grover <maanug@nvidia.com>
Co-authored-by: Jack Chang <jianbinc@nvidia.com>
Co-authored-by: jianbinc <shjwudp@gmail.com>
Co-authored-by: xuwenc <xuwenc@nvidia.com>
Co-authored-by: Teodor-Dumitru Ene <34819528+tdene@users.noreply.github.com>
---
 .github/workflows/cicd-approve-test-queue.yml |   8 +-
 .github/workflows/cicd-main.yml               |   2 +-
 .github/workflows/copyright-check.yml         |   1 +
 .gitlab/stages/00.pre.yml                     |  24 +-
 .gitlab/stages/05.publish.yml                 |  56 ++
 pyproject.toml                                |   7 +-
 .../python_scripts/auto_reminder_github.py    | 326 ++++++++++
 .../python_scripts/check_status_of_main.py    |   2 +
 .../launch_nemo_run_workload.py               |   6 -
 tests/test_utils/recipes/gpt.yaml             |   2 +-
 .../recipes/mamba-static-inference.yaml       |   2 +-
 uv.lock                                       | 586 ++++++++++--------
 12 files changed, 716 insertions(+), 306 deletions(-)
 create mode 100644 tests/test_utils/python_scripts/auto_reminder_github.py

diff --git a/.github/workflows/cicd-approve-test-queue.yml b/.github/workflows/cicd-approve-test-queue.yml
index ccc8327368d..1c35031cb35 100644
--- a/.github/workflows/cicd-approve-test-queue.yml
+++ b/.github/workflows/cicd-approve-test-queue.yml
@@ -26,7 +26,7 @@ jobs:
     if: github.repository == 'NVIDIA/Megatron-LM'
     strategy:
       matrix:
-        branch: [main, dev]
+        branch: [main, dev, others]
     steps:
       - name: Checkout repository
         uses: actions/checkout@v4
@@ -45,6 +45,7 @@ jobs:
         env:
           GITHUB_TOKEN: ${{ secrets.PAT }}
           MAX_CONCURRENCY: ${{ vars.MAX_CONCURRENCY || 1 }}
+          PYTHONUNBUFFERED: 1
         shell: python
         run: |
           import os
@@ -100,7 +101,10 @@ jobs:
                   return False
 
               base_branch = pr_info.get("base", {}).get("ref")
-              if base_branch == target_branch:
+              if (
+                (base_branch == target_branch) or 
+                (base_branch != "main" and base_branch != "dev" and target_branch == "others")
+              ):
                   print(f"PR #{pr_number} targets {target_branch}")
                   return True
 
diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml
index 27e1f6cdacb..855b444ad64 100644
--- a/.github/workflows/cicd-main.yml
+++ b/.github/workflows/cicd-main.yml
@@ -192,7 +192,7 @@ jobs:
           export PATH=".venv/bin:$PATH"
           export GITLAB_ENDPOINT=github.com
           export CI_PROJECT_NAMESPACE=NVIDIA
-          export BASE_REF="${{ fromJSON(steps.get-pr-info.outputs.pr-info).base.ref }}" 
+          export BASE_REF="${{ fromJSON(steps.get-pr-info.outputs.pr-info).base.ref }}"
           export CHECK_ONLY=true 
           export SKIP_DOCS=false 
           bash tools/autoformat.sh
diff --git a/.github/workflows/copyright-check.yml b/.github/workflows/copyright-check.yml
index bb9640a1147..05ca4b4cec9 100644
--- a/.github/workflows/copyright-check.yml
+++ b/.github/workflows/copyright-check.yml
@@ -33,6 +33,7 @@ jobs:
     needs: [pre-flight]
     if: |
       !(needs.pre-flight.outputs.docs_only == 'true'
+      || needs.pre-flight.outputs.is_merge_group == 'true'
       || needs.pre-flight.outputs.is_deployment_workflow == 'true')
       && github.repository == 'NVIDIA/Megatron-LM'
     uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_copyright_check.yml@v0.65.12
diff --git a/.gitlab/stages/00.pre.yml b/.gitlab/stages/00.pre.yml
index dca3a7b47ae..a22c2cf3ea7 100644
--- a/.gitlab/stages/00.pre.yml
+++ b/.gitlab/stages/00.pre.yml
@@ -21,29 +21,6 @@ include:
     - echo "$NGC_API_KEY" | docker login nvcr.io -u '$oauthtoken' --password-stdin
     - echo "$CI_REGISTRY_PASSWORD" | docker login $CI_REGISTRY -u $CI_REGISTRY_USER --password-stdin
 
-pre:mirror_to_github:
-  rules:
-    - if: '($CI_COMMIT_BRANCH == "main" || $CI_COMMIT_BRANCH == "dev") && $CI_PIPELINE_SOURCE == "push"'
-      allow_failure: true
-    - when: never
-  tags:
-    - arch/amd64
-    - env/prod
-    - origin/jet-fleet
-    - owner/jet-core
-    - purpose/utility
-    - team/megatron
-  stage: .pre
-  image: python:3.10
-  variables:
-    GIT_STRATEGY: "clone"
-  script:
-    - git checkout $CI_COMMIT_BRANCH
-    - git remote add github https://ko3n1g:$GH_TOKEN@github.com/NVIDIA/Megatron-LM.git || true
-    - git push -u github $CI_COMMIT_BRANCH
-  retry:
-    max: 2
-
 pre:create_ci_branches:
   rules:
     - if: '$CI_COMMIT_BRANCH == "main" && $CI_PIPELINE_SOURCE == "push"'
@@ -61,6 +38,7 @@ pre:create_ci_branches:
       - branch: ci-upgrade-dependencies
       - branch: ci-approve-main
       - branch: ci-approve-dev
+      - branch: ci-sync-branches
   tags:
     - arch/amd64
     - env/prod
diff --git a/.gitlab/stages/05.publish.yml b/.gitlab/stages/05.publish.yml
index 3b50562629a..39f072c88ae 100644
--- a/.gitlab/stages/05.publish.yml
+++ b/.gitlab/stages/05.publish.yml
@@ -800,3 +800,59 @@ publish:approve_merge_gate:
     - if: $CI_PIPELINE_SOURCE == "schedule" && ($CI_COMMIT_BRANCH == 'ci-approve-dev' || $CI_COMMIT_BRANCH == 'ci-approve-main')
       when: always
     - when: never
+
+publish:sync_branches:
+  stage: publish
+  image: python:3.10
+  script:
+    - set -x
+    - git remote add github https://github.com/NVIDIA/Megatron-LM.git || true
+    - git remote add gitlab https://gitlab-ci-token:${PROJECT_ACCESS_TOKEN_MCORE}@${GITLAB_ENDPOINT}/${CI_PROJECT_NAMESPACE}/Megatron-LM.git || true
+    - BRANCHES=("main" "dev")
+    - |
+      while IFS= read -r line; do
+        BRANCHES+=("$line") # Add each line to the array
+      done < <( \
+        git ls-remote --heads "https://token:${PAT}@github.com/NVIDIA/Megatron-LM.git" 'refs/heads/core_*' | \
+        cut -d'/' -f3- \
+      )
+    - |
+      for BRANCH in "${BRANCHES[@]}"; do
+        # Define the full refspec for the branch
+        BRANCH_REF="refs/heads/$BRANCH"
+        
+        echo "--- Processing branch: $BRANCH ---"
+        
+        # 1. Explicitly fetch the branch ref from 'github'
+        # This avoids fetching a tag with the same name.
+        # It updates/creates the remote-tracking branch (e.g., 'refs/remotes/github/core_r0.10.0')
+        if ! git fetch github "$BRANCH_REF:refs/remotes/github/$BRANCH"; then
+            echo "Failed to fetch branch $BRANCH. Skipping."
+            continue
+        fi
+
+        # 2. Create or update the local branch from the remote-tracking branch we just fetched.
+        # The -B flag creates the branch if it doesn't exist or resets it if it does.
+        if ! git checkout -B "$BRANCH" "github/$BRANCH"; then
+            echo "Failed to checkout local branch $BRANCH. Skipping."
+            continue
+        fi
+        
+        # 3. Now you are on the correct local branch, ready to push.
+        echo "Successfully on branch $BRANCH. Echoing push command:"
+        git push -u gitlab HEAD:refs/heads/$BRANCH --force
+        echo "-----------------------------------"
+      done
+  tags:
+    - arch/amd64
+    - env/prod
+    - origin/jet-fleet
+    - owner/jet-core
+    - purpose/utility
+    - team/megatron
+  retry:
+    max: 2
+  rules:
+    - if: $CI_PIPELINE_SOURCE == "schedule" && ($CI_COMMIT_BRANCH == 'ci-sync-branches')
+      when: always
+    - when: never
diff --git a/pyproject.toml b/pyproject.toml
index db91ce393e7..246189d6bd3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -77,7 +77,7 @@ dev = [
     "mamba-ssm~=2.2",
     "causal-conv1d~=1.5",
     "nv-grouped-gemm~=1.1",
-    "transformer-engine[pytorch]>=2.7.0a0,<2.9.0",
+    "transformer-engine[pytorch]>=2.7.0a0,<2.10.0",
     "nvidia-resiliency-ext>=0.4.0a0,<0.5.0",
     "nvidia-modelopt[torch]>=0.33.0a0,<0.34.0; sys_platform != 'darwin'",
     "megatron-energon[av_decode]~=6.0",
@@ -168,9 +168,10 @@ override-dependencies = [
 flash_mla = [
     { git = "https://github.com/deepseek-ai/FlashMLA", rev = "9edee0c022cd0938148a18e334203b0aab43aa19" },
 ]
-transformer-engine = { git = "https://github.com/NVIDIA/TransformerEngine.git", rev = "release_v2.8" } # on `release_v2.8`
+transformer-engine = { git = "https://github.com/NVIDIA/TransformerEngine.git", rev = "release_v2.9" } # on `release_v2.9`
+nemo-run = { git = "https://github.com/NVIDIA-NeMo/Run.git", rev = "01a9a8ba360f7b2908728ad0516e0ad9d936966d" }
 emerging_optimizers = { git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git", rev = "cf9909b777ffac18e05b67a6708282cadc000942" }
-nemo-run = { git = "https://github.com/NVIDIA-NeMo/Run.git", rev = "8ca8f7952a597f944985f1f1368a7acb9aa3a6c2" }
+
 [tool.isort]
 profile = "black"                                                          # black-compatible
 line_length = 100                                                          # should match black parameters
diff --git a/tests/test_utils/python_scripts/auto_reminder_github.py b/tests/test_utils/python_scripts/auto_reminder_github.py
new file mode 100644
index 00000000000..df75ec0542c
--- /dev/null
+++ b/tests/test_utils/python_scripts/auto_reminder_github.py
@@ -0,0 +1,326 @@
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#!/usr/bin/env python3
+"""
+GitHub PR Review Reminder Automation
+Requirements: pip install PyGithub slack-sdk requests
+Usage: GH_TOKEN=ghp_... SLACK_TOKEN=xoxb-... SLACK_WEBHOOK_URL=https://... REPO=NVIDIA/Megatron-LM python github_pr_reminder.py
+"""
+
+import logging
+import os
+import sys
+from dataclasses import dataclass
+from datetime import datetime, timezone
+from typing import List
+
+import requests
+from github import Github
+from slack_sdk import WebClient
+from slack_sdk.errors import SlackApiError
+
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class Reminder:
+    id: int
+    pr: str
+    milestone: str
+    author: str
+    priority: str
+    review_stage: str
+    total_review_time: int
+    current_stage_time: int
+    reviewers: List[str]
+    action_message: str
+
+
+class PRReviewTracker:
+    EXPERT_REVIEW = "Expert Review"
+    FINAL_REVIEW = "Final Review"
+    EXCLUDED_TEAMS = {"core-adlr", "core-nemo"}
+
+    def __init__(
+        self, token: str, repo_name: str, slack_token: str = None, webhook_url: str = None
+    ):
+        self.github = Github(token)
+        self.repo = self.github.get_repo(repo_name)
+        self.email_cache = {}
+        self.slack_id_cache = {}
+        self.slack_client = WebClient(token=slack_token) if slack_token else None
+        self.webhook_url = webhook_url
+
+    def get_user_email(self, username: str):
+        """Get user's email, prioritizing public profile, then recent commits."""
+        if username in self.email_cache:
+            return self.email_cache[username]
+
+        try:
+            user = self.github.get_user(username)
+
+            # 1. Try public profile email first
+            if user.email and not user.email.endswith("@users.noreply.github.com"):
+                self.email_cache[username] = user.email
+                return user.email
+
+            # 2. If no public email, check recent commits on the main repo
+            try:
+                # Use get_commits(author=...) which is more direct than search_commits
+                for commit in self.repo.get_commits(author=user)[:10]:
+                    email = commit.commit.author.email
+                    if email and not email.endswith("@users.noreply.github.com"):
+                        self.email_cache[username] = email
+                        return email
+            except Exception as e:
+                logger.debug(f"Could not check commits for {username}: {e}")
+
+            # 3. Fallback to public email (even if noreply) or a constructed noreply
+            email = user.email or f"{username}@users.noreply.github.com"
+            self.email_cache[username] = email
+            return email
+
+        except Exception as e:
+            logger.warning(f"Could not get user object for {username}: {e}")
+            email = f"{username}@users.noreply.github.com"
+            self.email_cache[username] = email
+            return email
+
+    def get_slack_user_id(self, email: str):
+        """Get Slack user ID from email."""
+        if not self.slack_client:
+            return email
+        if email in self.slack_id_cache:
+            return self.slack_id_cache[email]
+        try:
+            response = self.slack_client.users_lookupByEmail(email=email)
+            user_id = response["user"]["id"]
+            self.slack_id_cache[email] = f"<@{user_id}>"
+            return self.slack_id_cache[email]
+        except SlackApiError as e:
+            logger.warning(f"Could not find Slack user for {email}: {e.response['error']}")
+            self.slack_id_cache[email] = email
+            return email
+
+    def get_label_date(self, pr, label: str):
+        """Get most recent date when label was attached."""
+        dates = [
+            e.created_at
+            for e in pr.as_issue().get_events()
+            if e.event == "labeled" and e.label and e.label.name == label
+        ]
+        return max(dates) if dates else None
+
+    def days_since(self, date):
+        """Calculate days since given date."""
+        if not date:
+            return 0
+        if date.tzinfo is None:
+            date = date.replace(tzinfo=timezone.utc)
+        return (datetime.now(timezone.utc) - date).days
+
+    def get_stage(self, pr):
+        """Get current review stage."""
+        labels = {l.name for l in pr.labels}
+        return self.FINAL_REVIEW if self.FINAL_REVIEW in labels else self.EXPERT_REVIEW
+
+    def get_reviewers(self, pr):
+        """Get filtered reviewer emails who haven't approved yet."""
+        stage = self.get_stage(pr)
+        org = self.github.get_organization(self.repo.organization.login)
+
+        # 1. Get the latest review state for everyone who has submitted a review
+        latest_reviews = {}
+        try:
+            for review in pr.get_reviews():
+                if not review.user:  # Handle rare cases of deleted users
+                    continue
+                # Only track 'APPROVED' or 'CHANGES_REQUESTED' as definitive states
+                if review.state in ("APPROVED", "CHANGES_REQUESTED"):
+                    if (
+                        review.user.login not in latest_reviews
+                        or review.submitted_at > latest_reviews[review.user.login].submitted_at
+                    ):
+                        latest_reviews[review.user.login] = review
+        except Exception as e:
+            logger.warning(f"Could not get reviews for PR #{pr.number}: {e}")
+
+        # 2. Separate reviewers into approvers (List B) and non-approvers
+        approvers = {user for user, review in latest_reviews.items() if review.state == "APPROVED"}
+        non_approving_reviewers = {
+            user for user, review in latest_reviews.items() if review.state == "CHANGES_REQUESTED"
+        }
+
+        # 3. Get all *currently pending* review requests
+        try:
+            pending_users_req, pending_teams_req = pr.get_review_requests()
+            pending_individuals = {r.login for r in pending_users_req}
+            pending_teams_slugs = {t.slug for t in pending_teams_req}
+        except Exception as e:
+            logger.warning(f"Could not get review requests for PR #{pr.number}: {e}")
+            pending_individuals = set()
+            pending_teams_slugs = set()
+
+        # 4. Filter pending teams based on the current stage
+        teams_to_query = (
+            pending_teams_slugs - self.EXCLUDED_TEAMS
+            if stage == self.EXPERT_REVIEW
+            else pending_teams_slugs & self.EXCLUDED_TEAMS
+        )
+
+        # 5. Get members from the required pending teams
+        pending_team_members = set()
+        for slug in teams_to_query:
+            try:
+                pending_team_members.update(
+                    m.login for m in org.get_team_by_slug(slug).get_members()
+                )
+            except Exception as e:
+                logger.warning(f"Could not get members for team {slug} on PR #{pr.number}: {e}")
+
+        # 6. "List A": Combine all users who *still need to review*
+        all_required_reviewers = (
+            pending_individuals | pending_team_members | non_approving_reviewers
+        )
+
+        # 7. Final list (List A - List B):
+        pending_reviewers = all_required_reviewers - approvers
+        reviewer_emails = sorted([self.get_user_email(u) for u in pending_reviewers])
+        action_message = "Please review the PR."
+
+        # 8. Handle the original edge cases
+        if len(reviewer_emails) == 0:
+            if stage == self.EXPERT_REVIEW:
+                # Assign to PR author
+                reviewer_emails = [self.get_user_email(pr.user.login)]
+                action_message = "All Expert Reviewers approved the PR. Please attach the Final Review label to proceed with the review."
+            elif stage == self.FINAL_REVIEW:
+                # Assign to mcore-reviewers who approved
+                try:
+                    mcore_team = org.get_team_by_slug("mcore-reviewers")
+                    mcore_members = {m.login for m in mcore_team.get_members()}
+                    valid_approvers = approvers & mcore_members
+                    reviewer_emails = sorted([self.get_user_email(u) for u in valid_approvers])
+                    action_message = "All Final Reviewers approved the PR. Please ping an Expert or Final Reviewer to merge the PR."
+
+                except Exception as e:
+                    logger.warning(
+                        f"Could not get mcore-reviewers approvers for PR #{pr.number}: {e}"
+                    )
+
+        return reviewer_emails, action_message
+
+    def create_reminder(self, pr):
+        """Create reminder for PR."""
+        stage = self.get_stage(pr)
+        stage_days = self.days_since(self.get_label_date(pr, stage))
+        author_email = self.get_user_email(pr.user.login)
+        reviewer_emails, action_message = self.get_reviewers(pr)
+
+        return Reminder(
+            id=pr.number,
+            pr=f"<{pr.html_url}|#{pr.number} - {pr.title}>",
+            milestone=pr.milestone.title if pr.milestone else "No Milestone",
+            author=self.get_slack_user_id(author_email),
+            priority="P0" if stage_days > 3 else "P1" if stage_days >= 1 else "P2",
+            review_stage=stage,
+            total_review_time=self.days_since(self.get_label_date(pr, self.EXPERT_REVIEW)),
+            current_stage_time=stage_days,
+            reviewers=[self.get_slack_user_id(email) for email in reviewer_emails],
+            action_message=action_message,
+        )
+
+    def generate_reminders(self):
+        """Generate all reminders."""
+        milestones = list(self.repo.get_milestones(state="open", sort="due_on", direction="desc"))[
+            :2
+        ]
+        logger.info(f"Found milestones: {', '.join(m.title for m in milestones)}")
+
+        reminders = []
+        for milestone in milestones:
+            # Find issues with the 'Expert Review' or 'Final Review' label
+            query = (
+                f'repo:"{self.repo.full_name}" '
+                f'milestone:"{milestone.title}" '
+                f'is:open is:pr '
+                f'label:"{self.EXPERT_REVIEW}","{self.FINAL_REVIEW}"'
+            )
+            try:
+                # Use search_issues for a more direct query instead of get_issues + filtering
+                issues = self.github.search_issues(query)
+                for issue in issues:
+                    try:
+                        reminders.append(self.create_reminder(issue.as_pull_request()))
+                        logger.info(f"Processed PR #{issue.number}")
+                    except Exception as e:
+                        logger.error(f"Failed to process PR #{issue.number}: {e}")
+            except Exception as e:
+                logger.error(f"Failed to search issues for milestone {milestone.title}: {e}")
+
+        return sorted(reminders, key=lambda r: (r.priority, -r.current_stage_time))
+
+    def send_slack_notification(self, reminder: Reminder):
+        """Send Slack notification via webhook."""
+        if not self.webhook_url:
+            return
+
+        reviewers_str = ', '.join(reminder.reviewers) if reminder.reviewers else 'None'
+        message = [
+            f"*PR*: {reminder.pr}",
+            f"*Milestone*: {reminder.milestone}",
+            f"*Author*: {reminder.author}",
+            f"*Priority*: {reminder.priority}",
+            f"*Review stage*: {reminder.review_stage}",
+            f"*Days in review*: {reminder.total_review_time}",
+            f"*Days in {reminder.review_stage}*: {reminder.current_stage_time}",
+            f"*Reviewers*: {reviewers_str}",
+        ]
+
+        payload = {
+            "text": f"PR Review Reminder: {reminder.priority} - PR #{reminder.id}",
+            "blocks": [{"type": "section", "text": {"type": "mrkdwn", "text": "\n".join(message)}}],
+        }
+
+        try:
+            response = requests.post(self.webhook_url, json=payload, timeout=10)
+            response.raise_for_status()
+            logger.info(f"Sent Slack notification for PR #{reminder.id}")
+        except requests.exceptions.RequestException as e:
+            logger.error(f"Failed to send Slack notification for PR #{reminder.id}: {e}")
+
+
+def main():
+    token = os.environ.get("GH_TOKEN")
+    slack_token = os.environ.get("SLACK_TOKEN")
+    webhook_url = os.environ.get("SLACK_WEBHOOK_URL")
+    repo = os.environ.get("REPO", "NVIDIA/Megatron-LM")
+
+    if not token:
+        logger.error("GH_TOKEN environment variable is required")
+        sys.exit(1)
+
+    logger.info(f"Starting PR review reminder for {repo}")
+    tracker = PRReviewTracker(token, repo, slack_token, webhook_url)
+    reminders = tracker.generate_reminders()
+    logger.info(f"Generated {len(reminders)} reminders\n{'=' * 80}")
+
+    if not reminders:
+        logger.info("No reminders to send.")
+        return
+
+    for r in reminders:
+        logger.info(f"{r.priority} | PR #{r.id} | {r.milestone}")
+        logger.info(f"   Author: {r.author} | Stage: {r.review_stage}")
+        logger.info(f"   Stage time: {r.current_stage_time}d | Total: {r.total_review_time}")
+        logger.info(f"   Reviewers: {', '.join(r.reviewers) if r.reviewers else 'None'}")
+        logger.info(f"   Action message: {r.action_message}")
+        logger.info("-" * 80)
+        if webhook_url:
+            tracker.send_slack_notification(r)
+
+    logger.info("All reminders processed.")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/test_utils/python_scripts/check_status_of_main.py b/tests/test_utils/python_scripts/check_status_of_main.py
index a1cae393bfb..ce777814b91 100644
--- a/tests/test_utils/python_scripts/check_status_of_main.py
+++ b/tests/test_utils/python_scripts/check_status_of_main.py
@@ -1,3 +1,5 @@
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
 from __future__ import annotations
 
 import logging
diff --git a/tests/test_utils/python_scripts/launch_nemo_run_workload.py b/tests/test_utils/python_scripts/launch_nemo_run_workload.py
index 33d2a4a6a74..6e2b73e430f 100644
--- a/tests/test_utils/python_scripts/launch_nemo_run_workload.py
+++ b/tests/test_utils/python_scripts/launch_nemo_run_workload.py
@@ -153,12 +153,6 @@ def main(
 
     sys.exit(1)
 
-    result_dict = exp.status(return_dict=True)
-    _, job_dict = list(result_dict.items())[0]
-
-    logger.info(f"Job status: {job_dict["status"]}")
-    sys.exit(0 if str(job_dict["status"]) == "SUCCEEDED" else 1)
-
 
 if __name__ == "__main__":
     main()
diff --git a/tests/test_utils/recipes/gpt.yaml b/tests/test_utils/recipes/gpt.yaml
index baf07cb9759..488f3747a0f 100644
--- a/tests/test_utils/recipes/gpt.yaml
+++ b/tests/test_utils/recipes/gpt.yaml
@@ -468,7 +468,7 @@ products:
       - environment: [lts]
         scope: [mr, mr-github]
       - environment: [dev]
-        scope: [mr, mr-slim]
+        scope: [mr, mr-github, mr-slim]
         platforms: [dgx_h100]
   - test_case: [gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone]
     products:
diff --git a/tests/test_utils/recipes/mamba-static-inference.yaml b/tests/test_utils/recipes/mamba-static-inference.yaml
index 9fcc86830f0..79a5ab4eee2 100644
--- a/tests/test_utils/recipes/mamba-static-inference.yaml
+++ b/tests/test_utils/recipes/mamba-static-inference.yaml
@@ -63,4 +63,4 @@ products:
     products:
       - environment: [dev]
         scope: [mr, mr-github]
-        platforms: [dg  x_h100]
+        platforms: [dgx_h100]
diff --git a/uv.lock b/uv.lock
index c20d3f55dfe..92ad88abd33 100644
--- a/uv.lock
+++ b/uv.lock
@@ -76,7 +76,7 @@ wheels = [
 
 [[package]]
 name = "aiobotocore"
-version = "2.25.0"
+version = "2.25.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "aiohttp" },
@@ -87,9 +87,9 @@ dependencies = [
     { name = "python-dateutil" },
     { name = "wrapt" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/29/89/b1ae494cfd12520c5d3b19704a14ffa19153634be47d48052e45223eee86/aiobotocore-2.25.0.tar.gz", hash = "sha256:169d07de312fd51292292f2c8faf8f67d0f466f525cea03855fe065ddc85f79d", size = 120514, upload-time = "2025-10-10T17:39:12.291Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/62/94/2e4ec48cf1abb89971cb2612d86f979a6240520f0a659b53a43116d344dc/aiobotocore-2.25.1.tar.gz", hash = "sha256:ea9be739bfd7ece8864f072ec99bb9ed5c7e78ebb2b0b15f29781fbe02daedbc", size = 120560, upload-time = "2025-10-28T22:33:21.787Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/a8/4e/3592d88436bbd60984a08440793c0ba245f538f9f6287b59c1e2c0aead8c/aiobotocore-2.25.0-py3-none-any.whl", hash = "sha256:0524fd36f6d522ddc9d013df2c19fb56369ffdfbffd129895918fbfe95216dad", size = 86028, upload-time = "2025-10-10T17:39:10.423Z" },
+    { url = "https://files.pythonhosted.org/packages/95/2a/d275ec4ce5cd0096665043995a7d76f5d0524853c76a3d04656de49f8808/aiobotocore-2.25.1-py3-none-any.whl", hash = "sha256:eb6daebe3cbef5b39a0bb2a97cffbe9c7cb46b2fcc399ad141f369f3c2134b1f", size = 86039, upload-time = "2025-10-28T22:33:19.949Z" },
 ]
 
 [[package]]
@@ -103,7 +103,7 @@ wheels = [
 
 [[package]]
 name = "aiohttp"
-version = "3.13.1"
+version = "3.13.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "aiohappyeyeballs" },
@@ -115,110 +115,110 @@ dependencies = [
     { name = "propcache" },
     { name = "yarl" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/ba/fa/3ae643cd525cf6844d3dc810481e5748107368eb49563c15a5fb9f680750/aiohttp-3.13.1.tar.gz", hash = "sha256:4b7ee9c355015813a6aa085170b96ec22315dabc3d866fd77d147927000e9464", size = 7835344, upload-time = "2025-10-17T14:03:29.337Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/e6/34/5097441cc3047eccc2e0bfed3760ed068489b8392545d3aec0d8fbfab2b5/aiohttp-3.13.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:2349a6b642020bf20116a8a5c83bae8ba071acf1461c7cbe45fc7fafd552e7e2", size = 735069, upload-time = "2025-10-17T13:58:56.602Z" },
-    { url = "https://files.pythonhosted.org/packages/8c/2b/726466b4b4b16271a3db2a8a914d754d6cb9cee7bebde1f3ac6043e4e030/aiohttp-3.13.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2a8434ca31c093a90edb94d7d70e98706ce4d912d7f7a39f56e1af26287f4bb7", size = 492575, upload-time = "2025-10-17T13:58:58.696Z" },
-    { url = "https://files.pythonhosted.org/packages/82/1f/364e64292c95bb6c9e2823b0afa1ad3f06524c573d45df82294be572489d/aiohttp-3.13.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0bd610a7e87431741021a9a6ab775e769ea8c01bf01766d481282bfb17df597f", size = 487862, upload-time = "2025-10-17T13:59:00.315Z" },
-    { url = "https://files.pythonhosted.org/packages/23/b0/c5a774b3125ac854987b8ca45a6d995829987d01ece4525d3fc369a9ca88/aiohttp-3.13.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:777ec887264b629395b528af59b8523bf3164d4c6738cd8989485ff3eda002e2", size = 1666761, upload-time = "2025-10-17T13:59:02.224Z" },
-    { url = "https://files.pythonhosted.org/packages/29/be/32c6c1d3a6c69e594b855bbf4014bea4c42008b0daac8c6e5c9f03207b89/aiohttp-3.13.1-cp310-cp310-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:ac1892f56e2c445aca5ba28f3bf8e16b26dfc05f3c969867b7ef553b74cb4ebe", size = 1634627, upload-time = "2025-10-17T13:59:03.829Z" },
-    { url = "https://files.pythonhosted.org/packages/73/8d/fde3a8f4801b14e0b9490f5bc86c5106cb7d96bd60ff2aaee53749c72fe1/aiohttp-3.13.1-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:499a047d1c5e490c31d16c033e2e47d1358f0e15175c7a1329afc6dfeb04bc09", size = 1726564, upload-time = "2025-10-17T13:59:05.997Z" },
-    { url = "https://files.pythonhosted.org/packages/52/b2/8290556f1f6b17b1af976a9abb17f9b54dc7218e11bbf6abbebaa7cc70fb/aiohttp-3.13.1-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:610be925f89501938c770f1e28ca9dd62e9b308592c81bd5d223ce92434c0089", size = 1814413, upload-time = "2025-10-17T13:59:08.975Z" },
-    { url = "https://files.pythonhosted.org/packages/ef/6b/4b657e9fa72479df38117609d4ec8e4b07e8110b872df3872f9c6a96e26b/aiohttp-3.13.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:90eb902c06c6ac85d6b80fa9f2bd681f25b1ebf73433d428b3d182a507242711", size = 1667964, upload-time = "2025-10-17T13:59:10.606Z" },
-    { url = "https://files.pythonhosted.org/packages/ee/ed/563de175d01fa26459a60a7c82dbf69d20e356d459476a7526329091b4c3/aiohttp-3.13.1-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:ab8ac3224b2beb46266c094b3869d68d5f96f35dba98e03dea0acbd055eefa03", size = 1553917, upload-time = "2025-10-17T13:59:12.312Z" },
-    { url = "https://files.pythonhosted.org/packages/39/26/48a4b5681eada16eb5b39cae277765aed1644b03610c43eadb8b331ccfea/aiohttp-3.13.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:79ac65b6e2731558aad1e4c1a655d2aa2a77845b62acecf5898b0d4fe8c76618", size = 1637730, upload-time = "2025-10-17T13:59:14.395Z" },
-    { url = "https://files.pythonhosted.org/packages/c1/43/57b137af37344e03c7f6b28ddf38a4af820b53c1fa9ce13f668fe468d2e2/aiohttp-3.13.1-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:4dadbd858ed8c04d1aa7a2a91ad65f8e1fbd253ae762ef5be8111e763d576c3c", size = 1644088, upload-time = "2025-10-17T13:59:16.749Z" },
-    { url = "https://files.pythonhosted.org/packages/0d/c4/e49bafa4babef09929b10968a6b6efe3707fbaa5c5bb7c8db7f810232269/aiohttp-3.13.1-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:e0b2ccd331bc77149e88e919aa95c228a011e03e1168fd938e6aeb1a317d7a8a", size = 1696215, upload-time = "2025-10-17T13:59:18.711Z" },
-    { url = "https://files.pythonhosted.org/packages/15/e4/8414be434b3e50f9089ffa7c4d5130ba6ff0d1c6fa9f55cd760b088abbe0/aiohttp-3.13.1-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:fba3c85fb24fe204e73f3c92f09f4f5cfa55fa7e54b34d59d91b7c5a258d0f6a", size = 1540617, upload-time = "2025-10-17T13:59:20.46Z" },
-    { url = "https://files.pythonhosted.org/packages/bd/8b/31cb6725f819b74a9c0b0055c500187294e73aea40708b6a5aa7b328ea4c/aiohttp-3.13.1-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:8d5011e4e741d2635cda18f2997a56e8e1d1b94591dc8732f2ef1d3e1bfc5f45", size = 1713509, upload-time = "2025-10-17T13:59:22.61Z" },
-    { url = "https://files.pythonhosted.org/packages/24/ac/49a79c2711423cfa091e265c46e58617de31258c64502b890f25421cb742/aiohttp-3.13.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:c5fe2728a89c82574bd3132d59237c3b5fb83e2e00a320e928d05d74d1ae895f", size = 1654702, upload-time = "2025-10-17T13:59:24.396Z" },
-    { url = "https://files.pythonhosted.org/packages/30/52/1cf23cffeda1f079f20cd9c72174a76e8b0c6595def6803892e37ee35c8a/aiohttp-3.13.1-cp310-cp310-win32.whl", hash = "sha256:add14a5e68cbcfc526c89c1ed8ea963f5ff8b9b4b854985b07820c6fbfdb3c3c", size = 430898, upload-time = "2025-10-17T13:59:26.227Z" },
-    { url = "https://files.pythonhosted.org/packages/0e/13/214a01f2936f4645b1fbd5cba9001331ca5af5c04bbdbe747eed330a8516/aiohttp-3.13.1-cp310-cp310-win_amd64.whl", hash = "sha256:a4cc9d9cfdf75a69ae921c407e02d0c1799ab333b0bc6f7928c175f47c080d6a", size = 453684, upload-time = "2025-10-17T13:59:28.129Z" },
-    { url = "https://files.pythonhosted.org/packages/be/2c/739d03730ffce57d2093e2e611e1541ac9a4b3bb88288c33275058b9ffc2/aiohttp-3.13.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:9eefa0a891e85dca56e2d00760945a6325bd76341ec386d3ad4ff72eb97b7e64", size = 742004, upload-time = "2025-10-17T13:59:29.73Z" },
-    { url = "https://files.pythonhosted.org/packages/fc/f8/7f5b7f7184d7c80e421dbaecbd13e0b2a0bb8663fd0406864f9a167a438c/aiohttp-3.13.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6c20eb646371a5a57a97de67e52aac6c47badb1564e719b3601bbb557a2e8fd0", size = 495601, upload-time = "2025-10-17T13:59:31.312Z" },
-    { url = "https://files.pythonhosted.org/packages/3e/af/fb78d028b9642dd33ff127d9a6a151586f33daff631b05250fecd0ab23f8/aiohttp-3.13.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:bfc28038cd86fb1deed5cc75c8fda45c6b0f5c51dfd76f8c63d3d22dc1ab3d1b", size = 491790, upload-time = "2025-10-17T13:59:33.304Z" },
-    { url = "https://files.pythonhosted.org/packages/1e/ae/e40e422ee995e4f91f7f087b86304e3dd622d3a5b9ca902a1e94ebf9a117/aiohttp-3.13.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8b22eeffca2e522451990c31a36fe0e71079e6112159f39a4391f1c1e259a795", size = 1746350, upload-time = "2025-10-17T13:59:35.158Z" },
-    { url = "https://files.pythonhosted.org/packages/28/a5/fe6022bb869bf2d2633b155ed8348d76358c22d5ff9692a15016b2d1019f/aiohttp-3.13.1-cp311-cp311-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:65782b2977c05ebd78787e3c834abe499313bf69d6b8be4ff9c340901ee7541f", size = 1703046, upload-time = "2025-10-17T13:59:37.077Z" },
-    { url = "https://files.pythonhosted.org/packages/5a/a5/c4ef3617d7cdc49f2d5af077f19794946f0f2d94b93c631ace79047361a2/aiohttp-3.13.1-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:dacba54f9be3702eb866b0b9966754b475e1e39996e29e442c3cd7f1117b43a9", size = 1806161, upload-time = "2025-10-17T13:59:38.837Z" },
-    { url = "https://files.pythonhosted.org/packages/ad/45/b87d2430aee7e7d00b24e3dff2c5bd69f21017f6edb19cfd91e514664fc8/aiohttp-3.13.1-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:aa878da718e8235302c365e376b768035add36b55177706d784a122cb822a6a4", size = 1894546, upload-time = "2025-10-17T13:59:40.741Z" },
-    { url = "https://files.pythonhosted.org/packages/e8/a2/79eb466786a7f11a0292c353a8a9b95e88268c48c389239d7531d66dbb48/aiohttp-3.13.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0e4b4e607fbd4964d65945a7b9d1e7f98b0d5545736ea613f77d5a2a37ff1e46", size = 1745683, upload-time = "2025-10-17T13:59:42.59Z" },
-    { url = "https://files.pythonhosted.org/packages/93/1a/153b0ad694f377e94eacc85338efe03ed4776a396c8bb47bd9227135792a/aiohttp-3.13.1-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:0c3db2d0e5477ad561bf7ba978c3ae5f8f78afda70daa05020179f759578754f", size = 1605418, upload-time = "2025-10-17T13:59:45.229Z" },
-    { url = "https://files.pythonhosted.org/packages/3f/4e/18605b1bfeb4b00d3396d833647cdb213118e2a96862e5aebee62ad065b4/aiohttp-3.13.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:9739d34506fdf59bf2c092560d502aa728b8cdb33f34ba15fb5e2852c35dd829", size = 1722379, upload-time = "2025-10-17T13:59:46.969Z" },
-    { url = "https://files.pythonhosted.org/packages/72/13/0a38ad385d547fb283e0e1fe1ff1dff8899bd4ed0aaceeb13ec14abbf136/aiohttp-3.13.1-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:b902e30a268a85d50197b4997edc6e78842c14c0703450f632c2d82f17577845", size = 1716693, upload-time = "2025-10-17T13:59:49.217Z" },
-    { url = "https://files.pythonhosted.org/packages/55/65/7029d7573ab9009adde380052c6130d02c8db52195fda112db35e914fe7b/aiohttp-3.13.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:1bbfc04c8de7def6504cce0a97f9885a5c805fd2395a0634bc10f9d6ecb42524", size = 1784174, upload-time = "2025-10-17T13:59:51.439Z" },
-    { url = "https://files.pythonhosted.org/packages/2d/36/fd46e39cb85418e45b0e4a8bfc39651ee0b8f08ea006adf217a221cdb269/aiohttp-3.13.1-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:6941853405a38a5eeb7d9776db77698df373ff7fa8c765cb81ea14a344fccbeb", size = 1593716, upload-time = "2025-10-17T13:59:53.367Z" },
-    { url = "https://files.pythonhosted.org/packages/85/b8/188e0cb1be37b4408373171070fda17c3bf9c67c0d3d4fd5ee5b1fa108e1/aiohttp-3.13.1-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:7764adcd2dc8bd21c8228a53dda2005428498dc4d165f41b6086f0ac1c65b1c9", size = 1799254, upload-time = "2025-10-17T13:59:55.352Z" },
-    { url = "https://files.pythonhosted.org/packages/67/ff/fdf768764eb427b0cc9ebb2cebddf990f94d98b430679f8383c35aa114be/aiohttp-3.13.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:c09e08d38586fa59e5a2f9626505a0326fadb8e9c45550f029feeb92097a0afc", size = 1738122, upload-time = "2025-10-17T13:59:57.263Z" },
-    { url = "https://files.pythonhosted.org/packages/94/84/fce7a4d575943394d7c0e632273838eb6f39de8edf25386017bf5f0de23b/aiohttp-3.13.1-cp311-cp311-win32.whl", hash = "sha256:ce1371675e74f6cf271d0b5530defb44cce713fd0ab733713562b3a2b870815c", size = 430491, upload-time = "2025-10-17T13:59:59.466Z" },
-    { url = "https://files.pythonhosted.org/packages/ac/d2/d21b8ab6315a5d588c550ab285b4f02ae363edf012920e597904c5a56608/aiohttp-3.13.1-cp311-cp311-win_amd64.whl", hash = "sha256:77a2f5cc28cf4704cc157be135c6a6cfb38c9dea478004f1c0fd7449cf445c28", size = 454808, upload-time = "2025-10-17T14:00:01.247Z" },
-    { url = "https://files.pythonhosted.org/packages/1a/72/d463a10bf29871f6e3f63bcf3c91362dc4d72ed5917a8271f96672c415ad/aiohttp-3.13.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0760bd9a28efe188d77b7c3fe666e6ef74320d0f5b105f2e931c7a7e884c8230", size = 736218, upload-time = "2025-10-17T14:00:03.51Z" },
-    { url = "https://files.pythonhosted.org/packages/26/13/f7bccedbe52ea5a6eef1e4ebb686a8d7765319dfd0a5939f4238cb6e79e6/aiohttp-3.13.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7129a424b441c3fe018a414401bf1b9e1d49492445f5676a3aecf4f74f67fcdb", size = 491251, upload-time = "2025-10-17T14:00:05.756Z" },
-    { url = "https://files.pythonhosted.org/packages/0c/7c/7ea51b5aed6cc69c873f62548da8345032aa3416336f2d26869d4d37b4a2/aiohttp-3.13.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:e1cb04ae64a594f6ddf5cbb024aba6b4773895ab6ecbc579d60414f8115e9e26", size = 490394, upload-time = "2025-10-17T14:00:07.504Z" },
-    { url = "https://files.pythonhosted.org/packages/31/05/1172cc4af4557f6522efdee6eb2b9f900e1e320a97e25dffd3c5a6af651b/aiohttp-3.13.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:782d656a641e755decd6bd98d61d2a8ea062fd45fd3ff8d4173605dd0d2b56a1", size = 1737455, upload-time = "2025-10-17T14:00:09.403Z" },
-    { url = "https://files.pythonhosted.org/packages/24/3d/ce6e4eca42f797d6b1cd3053cf3b0a22032eef3e4d1e71b9e93c92a3f201/aiohttp-3.13.1-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:f92ad8169767429a6d2237331726c03ccc5f245222f9373aa045510976af2b35", size = 1699176, upload-time = "2025-10-17T14:00:11.314Z" },
-    { url = "https://files.pythonhosted.org/packages/25/04/7127ba55653e04da51477372566b16ae786ef854e06222a1c96b4ba6c8ef/aiohttp-3.13.1-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:0e778f634ca50ec005eefa2253856921c429581422d887be050f2c1c92e5ce12", size = 1767216, upload-time = "2025-10-17T14:00:13.668Z" },
-    { url = "https://files.pythonhosted.org/packages/b8/3b/43bca1e75847e600f40df829a6b2f0f4e1d4c70fb6c4818fdc09a462afd5/aiohttp-3.13.1-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:9bc36b41cf4aab5d3b34d22934a696ab83516603d1bc1f3e4ff9930fe7d245e5", size = 1865870, upload-time = "2025-10-17T14:00:15.852Z" },
-    { url = "https://files.pythonhosted.org/packages/9e/69/b204e5d43384197a614c88c1717c324319f5b4e7d0a1b5118da583028d40/aiohttp-3.13.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3fd4570ea696aee27204dd524f287127ed0966d14d309dc8cc440f474e3e7dbd", size = 1751021, upload-time = "2025-10-17T14:00:18.297Z" },
-    { url = "https://files.pythonhosted.org/packages/1c/af/845dc6b6fdf378791d720364bf5150f80d22c990f7e3a42331d93b337cc7/aiohttp-3.13.1-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:7bda795f08b8a620836ebfb0926f7973972a4bf8c74fdf9145e489f88c416811", size = 1561448, upload-time = "2025-10-17T14:00:20.152Z" },
-    { url = "https://files.pythonhosted.org/packages/7a/91/d2ab08cd77ed76a49e4106b1cfb60bce2768242dd0c4f9ec0cb01e2cbf94/aiohttp-3.13.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:055a51d90e351aae53dcf324d0eafb2abe5b576d3ea1ec03827d920cf81a1c15", size = 1698196, upload-time = "2025-10-17T14:00:22.131Z" },
-    { url = "https://files.pythonhosted.org/packages/5e/d1/082f0620dc428ecb8f21c08a191a4694915cd50f14791c74a24d9161cc50/aiohttp-3.13.1-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:d4131df864cbcc09bb16d3612a682af0db52f10736e71312574d90f16406a867", size = 1719252, upload-time = "2025-10-17T14:00:24.453Z" },
-    { url = "https://files.pythonhosted.org/packages/fc/78/2af2f44491be7b08e43945b72d2b4fd76f0a14ba850ba9e41d28a7ce716a/aiohttp-3.13.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:163d3226e043f79bf47c87f8dfc89c496cc7bc9128cb7055ce026e435d551720", size = 1736529, upload-time = "2025-10-17T14:00:26.567Z" },
-    { url = "https://files.pythonhosted.org/packages/b0/34/3e919ecdc93edaea8d140138049a0d9126141072e519535e2efa38eb7a02/aiohttp-3.13.1-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:a2370986a3b75c1a5f3d6f6d763fc6be4b430226577b0ed16a7c13a75bf43d8f", size = 1553723, upload-time = "2025-10-17T14:00:28.592Z" },
-    { url = "https://files.pythonhosted.org/packages/21/4b/d8003aeda2f67f359b37e70a5a4b53fee336d8e89511ac307ff62aeefcdb/aiohttp-3.13.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:d7c14de0c7c9f1e6e785ce6cbe0ed817282c2af0012e674f45b4e58c6d4ea030", size = 1763394, upload-time = "2025-10-17T14:00:31.051Z" },
-    { url = "https://files.pythonhosted.org/packages/4c/7b/1dbe6a39e33af9baaafc3fc016a280663684af47ba9f0e5d44249c1f72ec/aiohttp-3.13.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:bb611489cf0db10b99beeb7280bd39e0ef72bc3eb6d8c0f0a16d8a56075d1eb7", size = 1718104, upload-time = "2025-10-17T14:00:33.407Z" },
-    { url = "https://files.pythonhosted.org/packages/5c/88/bd1b38687257cce67681b9b0fa0b16437be03383fa1be4d1a45b168bef25/aiohttp-3.13.1-cp312-cp312-win32.whl", hash = "sha256:f90fe0ee75590f7428f7c8b5479389d985d83c949ea10f662ab928a5ed5cf5e6", size = 425303, upload-time = "2025-10-17T14:00:35.829Z" },
-    { url = "https://files.pythonhosted.org/packages/0e/e3/4481f50dd6f27e9e58c19a60cff44029641640237e35d32b04aaee8cf95f/aiohttp-3.13.1-cp312-cp312-win_amd64.whl", hash = "sha256:3461919a9dca272c183055f2aab8e6af0adc810a1b386cce28da11eb00c859d9", size = 452071, upload-time = "2025-10-17T14:00:37.764Z" },
-    { url = "https://files.pythonhosted.org/packages/16/6d/d267b132342e1080f4c1bb7e1b4e96b168b3cbce931ec45780bff693ff95/aiohttp-3.13.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:55785a7f8f13df0c9ca30b5243d9909bd59f48b274262a8fe78cee0828306e5d", size = 730727, upload-time = "2025-10-17T14:00:39.681Z" },
-    { url = "https://files.pythonhosted.org/packages/92/c8/1cf495bac85cf71b80fad5f6d7693e84894f11b9fe876b64b0a1e7cbf32f/aiohttp-3.13.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:4bef5b83296cebb8167707b4f8d06c1805db0af632f7a72d7c5288a84667e7c3", size = 488678, upload-time = "2025-10-17T14:00:41.541Z" },
-    { url = "https://files.pythonhosted.org/packages/a8/19/23c6b81cca587ec96943d977a58d11d05a82837022e65cd5502d665a7d11/aiohttp-3.13.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:27af0619c33f9ca52f06069ec05de1a357033449ab101836f431768ecfa63ff5", size = 487637, upload-time = "2025-10-17T14:00:43.527Z" },
-    { url = "https://files.pythonhosted.org/packages/48/58/8f9464afb88b3eed145ad7c665293739b3a6f91589694a2bb7e5778cbc72/aiohttp-3.13.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a47fe43229a8efd3764ef7728a5c1158f31cdf2a12151fe99fde81c9ac87019c", size = 1718975, upload-time = "2025-10-17T14:00:45.496Z" },
-    { url = "https://files.pythonhosted.org/packages/e1/8b/c3da064ca392b2702f53949fd7c403afa38d9ee10bf52c6ad59a42537103/aiohttp-3.13.1-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:6e68e126de5b46e8b2bee73cab086b5d791e7dc192056916077aa1e2e2b04437", size = 1686905, upload-time = "2025-10-17T14:00:47.707Z" },
-    { url = "https://files.pythonhosted.org/packages/0a/a4/9c8a3843ecf526daee6010af1a66eb62579be1531d2d5af48ea6f405ad3c/aiohttp-3.13.1-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e65ef49dd22514329c55970d39079618a8abf856bae7147913bb774a3ab3c02f", size = 1754907, upload-time = "2025-10-17T14:00:49.702Z" },
-    { url = "https://files.pythonhosted.org/packages/a4/80/1f470ed93e06436e3fc2659a9fc329c192fa893fb7ed4e884d399dbfb2a8/aiohttp-3.13.1-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:0e425a7e0511648b3376839dcc9190098671a47f21a36e815b97762eb7d556b0", size = 1857129, upload-time = "2025-10-17T14:00:51.822Z" },
-    { url = "https://files.pythonhosted.org/packages/cc/e6/33d305e6cce0a8daeb79c7d8d6547d6e5f27f4e35fa4883fc9c9eb638596/aiohttp-3.13.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:010dc9b7110f055006acd3648d5d5955bb6473b37c3663ec42a1b4cba7413e6b", size = 1738189, upload-time = "2025-10-17T14:00:53.976Z" },
-    { url = "https://files.pythonhosted.org/packages/ac/42/8df03367e5a64327fe0c39291080697795430c438fc1139c7cc1831aa1df/aiohttp-3.13.1-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:1b5c722d0ca5f57d61066b5dfa96cdb87111e2519156b35c1f8dd17c703bee7a", size = 1553608, upload-time = "2025-10-17T14:00:56.144Z" },
-    { url = "https://files.pythonhosted.org/packages/96/17/6d5c73cd862f1cf29fddcbb54aac147037ff70a043a2829d03a379e95742/aiohttp-3.13.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:93029f0e9b77b714904a281b5aa578cdc8aa8ba018d78c04e51e1c3d8471b8ec", size = 1681809, upload-time = "2025-10-17T14:00:58.603Z" },
-    { url = "https://files.pythonhosted.org/packages/be/31/8926c8ab18533f6076ce28d2c329a203b58c6861681906e2d73b9c397588/aiohttp-3.13.1-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:d1824c7d08d8ddfc8cb10c847f696942e5aadbd16fd974dfde8bd2c3c08a9fa1", size = 1711161, upload-time = "2025-10-17T14:01:01.744Z" },
-    { url = "https://files.pythonhosted.org/packages/f2/36/2f83e1ca730b1e0a8cf1c8ab9559834c5eec9f5da86e77ac71f0d16b521d/aiohttp-3.13.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:8f47d0ff5b3eb9c1278a2f56ea48fda667da8ebf28bd2cb378b7c453936ce003", size = 1731999, upload-time = "2025-10-17T14:01:04.626Z" },
-    { url = "https://files.pythonhosted.org/packages/b9/ec/1f818cc368dfd4d5ab4e9efc8f2f6f283bfc31e1c06d3e848bcc862d4591/aiohttp-3.13.1-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:8a396b1da9b51ded79806ac3b57a598f84e0769eaa1ba300655d8b5e17b70c7b", size = 1548684, upload-time = "2025-10-17T14:01:06.828Z" },
-    { url = "https://files.pythonhosted.org/packages/d3/ad/33d36efd16e4fefee91b09a22a3a0e1b830f65471c3567ac5a8041fac812/aiohttp-3.13.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:d9c52a65f54796e066b5d674e33b53178014752d28bca555c479c2c25ffcec5b", size = 1756676, upload-time = "2025-10-17T14:01:09.517Z" },
-    { url = "https://files.pythonhosted.org/packages/3c/c4/4a526d84e77d464437713ca909364988ed2e0cd0cdad2c06cb065ece9e08/aiohttp-3.13.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a89da72d18d6c95a653470b78d8ee5aa3c4b37212004c103403d0776cbea6ff0", size = 1715577, upload-time = "2025-10-17T14:01:11.958Z" },
-    { url = "https://files.pythonhosted.org/packages/a2/21/e39638b7d9c7f1362c4113a91870f89287e60a7ea2d037e258b81e8b37d5/aiohttp-3.13.1-cp313-cp313-win32.whl", hash = "sha256:02e0258b7585ddf5d01c79c716ddd674386bfbf3041fbbfe7bdf9c7c32eb4a9b", size = 424468, upload-time = "2025-10-17T14:01:14.344Z" },
-    { url = "https://files.pythonhosted.org/packages/cc/00/f3a92c592a845ebb2f47d102a67f35f0925cb854c5e7386f1a3a1fdff2ab/aiohttp-3.13.1-cp313-cp313-win_amd64.whl", hash = "sha256:ef56ffe60e8d97baac123272bde1ab889ee07d3419606fae823c80c2b86c403e", size = 450806, upload-time = "2025-10-17T14:01:16.437Z" },
-    { url = "https://files.pythonhosted.org/packages/97/be/0f6c41d2fd0aab0af133c509cabaf5b1d78eab882cb0ceb872e87ceeabf7/aiohttp-3.13.1-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:77f83b3dc5870a2ea79a0fcfdcc3fc398187ec1675ff61ec2ceccad27ecbd303", size = 733828, upload-time = "2025-10-17T14:01:18.58Z" },
-    { url = "https://files.pythonhosted.org/packages/75/14/24e2ac5efa76ae30e05813e0f50737005fd52da8ddffee474d4a5e7f38a6/aiohttp-3.13.1-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:9cafd2609ebb755e47323306c7666283fbba6cf82b5f19982ea627db907df23a", size = 489320, upload-time = "2025-10-17T14:01:20.644Z" },
-    { url = "https://files.pythonhosted.org/packages/da/5a/4cbe599358d05ea7db4869aff44707b57d13f01724d48123dc68b3288d5a/aiohttp-3.13.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:9c489309a2ca548d5f11131cfb4092f61d67954f930bba7e413bcdbbb82d7fae", size = 489899, upload-time = "2025-10-17T14:01:22.638Z" },
-    { url = "https://files.pythonhosted.org/packages/67/96/3aec9d9cfc723273d4386328a1e2562cf23629d2f57d137047c49adb2afb/aiohttp-3.13.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:79ac15fe5fdbf3c186aa74b656cd436d9a1e492ba036db8901c75717055a5b1c", size = 1716556, upload-time = "2025-10-17T14:01:25.406Z" },
-    { url = "https://files.pythonhosted.org/packages/b9/99/39a3d250595b5c8172843831221fa5662884f63f8005b00b4034f2a7a836/aiohttp-3.13.1-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:095414be94fce3bc080684b4cd50fb70d439bc4662b2a1984f45f3bf9ede08aa", size = 1665814, upload-time = "2025-10-17T14:01:27.683Z" },
-    { url = "https://files.pythonhosted.org/packages/3b/96/8319e7060a85db14a9c178bc7b3cf17fad458db32ba6d2910de3ca71452d/aiohttp-3.13.1-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c68172e1a2dca65fa1272c85ca72e802d78b67812b22827df01017a15c5089fa", size = 1755767, upload-time = "2025-10-17T14:01:29.914Z" },
-    { url = "https://files.pythonhosted.org/packages/1c/c6/0a2b3d886b40aa740fa2294cd34ed46d2e8108696748492be722e23082a7/aiohttp-3.13.1-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:3751f9212bcd119944d4ea9de6a3f0fee288c177b8ca55442a2cdff0c8201eb3", size = 1836591, upload-time = "2025-10-17T14:01:32.28Z" },
-    { url = "https://files.pythonhosted.org/packages/fb/34/8ab5904b3331c91a58507234a1e2f662f837e193741609ee5832eb436251/aiohttp-3.13.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8619dca57d98a8353abdc7a1eeb415548952b39d6676def70d9ce76d41a046a9", size = 1714915, upload-time = "2025-10-17T14:01:35.138Z" },
-    { url = "https://files.pythonhosted.org/packages/b5/d3/d36077ca5f447649112189074ac6c192a666bf68165b693e48c23b0d008c/aiohttp-3.13.1-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:97795a0cb0a5f8a843759620e9cbd8889f8079551f5dcf1ccd99ed2f056d9632", size = 1546579, upload-time = "2025-10-17T14:01:38.237Z" },
-    { url = "https://files.pythonhosted.org/packages/a8/14/dbc426a1bb1305c4fc78ce69323498c9e7c699983366ef676aa5d3f949fa/aiohttp-3.13.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:1060e058da8f9f28a7026cdfca9fc886e45e551a658f6a5c631188f72a3736d2", size = 1680633, upload-time = "2025-10-17T14:01:40.902Z" },
-    { url = "https://files.pythonhosted.org/packages/29/83/1e68e519aff9f3ef6d4acb6cdda7b5f592ef5c67c8f095dc0d8e06ce1c3e/aiohttp-3.13.1-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:f48a2c26333659101ef214907d29a76fe22ad7e912aa1e40aeffdff5e8180977", size = 1678675, upload-time = "2025-10-17T14:01:43.779Z" },
-    { url = "https://files.pythonhosted.org/packages/38/b9/7f3e32a81c08b6d29ea15060c377e1f038ad96cd9923a85f30e817afff22/aiohttp-3.13.1-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:f1dfad638b9c91ff225162b2824db0e99ae2d1abe0dc7272b5919701f0a1e685", size = 1726829, upload-time = "2025-10-17T14:01:46.546Z" },
-    { url = "https://files.pythonhosted.org/packages/23/ce/610b1f77525a0a46639aea91377b12348e9f9412cc5ddcb17502aa4681c7/aiohttp-3.13.1-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:8fa09ab6dd567cb105db4e8ac4d60f377a7a94f67cf669cac79982f626360f32", size = 1542985, upload-time = "2025-10-17T14:01:49.082Z" },
-    { url = "https://files.pythonhosted.org/packages/53/39/3ac8dfdad5de38c401846fa071fcd24cb3b88ccfb024854df6cbd9b4a07e/aiohttp-3.13.1-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:4159fae827f9b5f655538a4f99b7cbc3a2187e5ca2eee82f876ef1da802ccfa9", size = 1741556, upload-time = "2025-10-17T14:01:51.846Z" },
-    { url = "https://files.pythonhosted.org/packages/2a/48/b1948b74fea7930b0f29595d1956842324336de200593d49a51a40607fdc/aiohttp-3.13.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:ad671118c19e9cfafe81a7a05c294449fe0ebb0d0c6d5bb445cd2190023f5cef", size = 1696175, upload-time = "2025-10-17T14:01:54.232Z" },
-    { url = "https://files.pythonhosted.org/packages/96/26/063bba38e4b27b640f56cc89fe83cc3546a7ae162c2e30ca345f0ccdc3d1/aiohttp-3.13.1-cp314-cp314-win32.whl", hash = "sha256:c5c970c148c48cf6acb65224ca3c87a47f74436362dde75c27bc44155ccf7dfc", size = 430254, upload-time = "2025-10-17T14:01:56.451Z" },
-    { url = "https://files.pythonhosted.org/packages/88/aa/25fd764384dc4eab714023112d3548a8dd69a058840d61d816ea736097a2/aiohttp-3.13.1-cp314-cp314-win_amd64.whl", hash = "sha256:748a00167b7a88385756fa615417d24081cba7e58c8727d2e28817068b97c18c", size = 456256, upload-time = "2025-10-17T14:01:58.752Z" },
-    { url = "https://files.pythonhosted.org/packages/d4/9f/9ba6059de4bad25c71cd88e3da53f93e9618ea369cf875c9f924b1c167e2/aiohttp-3.13.1-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:390b73e99d7a1f0f658b3f626ba345b76382f3edc65f49d6385e326e777ed00e", size = 765956, upload-time = "2025-10-17T14:02:01.515Z" },
-    { url = "https://files.pythonhosted.org/packages/1f/30/b86da68b494447d3060f45c7ebb461347535dab4af9162a9267d9d86ca31/aiohttp-3.13.1-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:27e83abb330e687e019173d8fc1fd6a1cf471769624cf89b1bb49131198a810a", size = 503206, upload-time = "2025-10-17T14:02:03.818Z" },
-    { url = "https://files.pythonhosted.org/packages/c1/21/d27a506552843ff9eeb9fcc2d45f943b09eefdfdf205aab044f4f1f39f6a/aiohttp-3.13.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:2b20eed07131adbf3e873e009c2869b16a579b236e9d4b2f211bf174d8bef44a", size = 507719, upload-time = "2025-10-17T14:02:05.947Z" },
-    { url = "https://files.pythonhosted.org/packages/58/23/4042230ec7e4edc7ba43d0342b5a3d2fe0222ca046933c4251a35aaf17f5/aiohttp-3.13.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:58fee9ef8477fd69e823b92cfd1f590ee388521b5ff8f97f3497e62ee0656212", size = 1862758, upload-time = "2025-10-17T14:02:08.469Z" },
-    { url = "https://files.pythonhosted.org/packages/df/88/525c45bea7cbb9f65df42cadb4ff69f6a0dbf95931b0ff7d1fdc40a1cb5f/aiohttp-3.13.1-cp314-cp314t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:1f62608fcb7b3d034d5e9496bea52d94064b7b62b06edba82cd38191336bbeda", size = 1717790, upload-time = "2025-10-17T14:02:11.37Z" },
-    { url = "https://files.pythonhosted.org/packages/1d/80/21e9b5eb77df352a5788713f37359b570a793f0473f3a72db2e46df379b9/aiohttp-3.13.1-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:fdc4d81c3dfc999437f23e36d197e8b557a3f779625cd13efe563a9cfc2ce712", size = 1842088, upload-time = "2025-10-17T14:02:13.872Z" },
-    { url = "https://files.pythonhosted.org/packages/d2/bf/d1738f6d63fe8b2a0ad49533911b3347f4953cd001bf3223cb7b61f18dff/aiohttp-3.13.1-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:601d7ec812f746fd80ff8af38eeb3f196e1bab4a4d39816ccbc94c222d23f1d0", size = 1934292, upload-time = "2025-10-17T14:02:16.624Z" },
-    { url = "https://files.pythonhosted.org/packages/04/e6/26cab509b42610ca49573f2fc2867810f72bd6a2070182256c31b14f2e98/aiohttp-3.13.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:47c3f21c469b840d9609089435c0d9918ae89f41289bf7cc4afe5ff7af5458db", size = 1791328, upload-time = "2025-10-17T14:02:19.051Z" },
-    { url = "https://files.pythonhosted.org/packages/8a/6d/baf7b462852475c9d045bee8418d9cdf280efb687752b553e82d0c58bcc2/aiohttp-3.13.1-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:d6c6cdc0750db88520332d4aaa352221732b0cafe89fd0e42feec7cb1b5dc236", size = 1622663, upload-time = "2025-10-17T14:02:21.397Z" },
-    { url = "https://files.pythonhosted.org/packages/c8/48/396a97318af9b5f4ca8b3dc14a67976f71c6400a9609c622f96da341453f/aiohttp-3.13.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:58a12299eeb1fca2414ee2bc345ac69b0f765c20b82c3ab2a75d91310d95a9f6", size = 1787791, upload-time = "2025-10-17T14:02:24.212Z" },
-    { url = "https://files.pythonhosted.org/packages/a8/e2/6925f6784134ce3ff3ce1a8502ab366432a3b5605387618c1a939ce778d9/aiohttp-3.13.1-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:0989cbfc195a4de1bb48f08454ef1cb47424b937e53ed069d08404b9d3c7aea1", size = 1775459, upload-time = "2025-10-17T14:02:26.971Z" },
-    { url = "https://files.pythonhosted.org/packages/c3/e3/b372047ba739fc39f199b99290c4cc5578ce5fd125f69168c967dac44021/aiohttp-3.13.1-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:feb5ee664300e2435e0d1bc3443a98925013dfaf2cae9699c1f3606b88544898", size = 1789250, upload-time = "2025-10-17T14:02:29.686Z" },
-    { url = "https://files.pythonhosted.org/packages/02/8c/9f48b93d7d57fc9ef2ad4adace62e4663ea1ce1753806c4872fb36b54c39/aiohttp-3.13.1-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:58a6f8702da0c3606fb5cf2e669cce0ca681d072fe830968673bb4c69eb89e88", size = 1616139, upload-time = "2025-10-17T14:02:32.151Z" },
-    { url = "https://files.pythonhosted.org/packages/5c/c6/c64e39d61aaa33d7de1be5206c0af3ead4b369bf975dac9fdf907a4291c1/aiohttp-3.13.1-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:a417ceb433b9d280e2368ffea22d4bc6e3e0d894c4bc7768915124d57d0964b6", size = 1815829, upload-time = "2025-10-17T14:02:34.635Z" },
-    { url = "https://files.pythonhosted.org/packages/22/75/e19e93965ea675f1151753b409af97a14f1d888588a555e53af1e62b83eb/aiohttp-3.13.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:8ac8854f7b0466c5d6a9ea49249b3f6176013859ac8f4bb2522ad8ed6b94ded2", size = 1760923, upload-time = "2025-10-17T14:02:37.364Z" },
-    { url = "https://files.pythonhosted.org/packages/6c/a4/06ed38f1dabd98ea136fd116cba1d02c9b51af5a37d513b6850a9a567d86/aiohttp-3.13.1-cp314-cp314t-win32.whl", hash = "sha256:be697a5aeff42179ed13b332a411e674994bcd406c81642d014ace90bf4bb968", size = 463318, upload-time = "2025-10-17T14:02:39.924Z" },
-    { url = "https://files.pythonhosted.org/packages/04/0f/27e4fdde899e1e90e35eeff56b54ed63826435ad6cdb06b09ed312d1b3fa/aiohttp-3.13.1-cp314-cp314t-win_amd64.whl", hash = "sha256:f1d6aa90546a4e8f20c3500cb68ab14679cd91f927fa52970035fd3207dfb3da", size = 496721, upload-time = "2025-10-17T14:02:42.199Z" },
+sdist = { url = "https://files.pythonhosted.org/packages/1c/ce/3b83ebba6b3207a7135e5fcaba49706f8a4b6008153b4e30540c982fae26/aiohttp-3.13.2.tar.gz", hash = "sha256:40176a52c186aefef6eb3cad2cdd30cd06e3afbe88fe8ab2af9c0b90f228daca", size = 7837994, upload-time = "2025-10-28T20:59:39.937Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/6d/34/939730e66b716b76046dedfe0842995842fa906ccc4964bba414ff69e429/aiohttp-3.13.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:2372b15a5f62ed37789a6b383ff7344fc5b9f243999b0cd9b629d8bc5f5b4155", size = 736471, upload-time = "2025-10-28T20:55:27.924Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/cf/dcbdf2df7f6ca72b0bb4c0b4509701f2d8942cf54e29ca197389c214c07f/aiohttp-3.13.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e7f8659a48995edee7229522984bd1009c1213929c769c2daa80b40fe49a180c", size = 493985, upload-time = "2025-10-28T20:55:29.456Z" },
+    { url = "https://files.pythonhosted.org/packages/9d/87/71c8867e0a1d0882dcbc94af767784c3cb381c1c4db0943ab4aae4fed65e/aiohttp-3.13.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:939ced4a7add92296b0ad38892ce62b98c619288a081170695c6babe4f50e636", size = 489274, upload-time = "2025-10-28T20:55:31.134Z" },
+    { url = "https://files.pythonhosted.org/packages/38/0f/46c24e8dae237295eaadd113edd56dee96ef6462adf19b88592d44891dc5/aiohttp-3.13.2-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6315fb6977f1d0dd41a107c527fee2ed5ab0550b7d885bc15fee20ccb17891da", size = 1668171, upload-time = "2025-10-28T20:55:36.065Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/c6/4cdfb4440d0e28483681a48f69841fa5e39366347d66ef808cbdadddb20e/aiohttp-3.13.2-cp310-cp310-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:6e7352512f763f760baaed2637055c49134fd1d35b37c2dedfac35bfe5cf8725", size = 1636036, upload-time = "2025-10-28T20:55:37.576Z" },
+    { url = "https://files.pythonhosted.org/packages/84/37/8708cf678628216fb678ab327a4e1711c576d6673998f4f43e86e9ae90dd/aiohttp-3.13.2-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e09a0a06348a2dd73e7213353c90d709502d9786219f69b731f6caa0efeb46f5", size = 1727975, upload-time = "2025-10-28T20:55:39.457Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/2e/3ebfe12fdcb9b5f66e8a0a42dffcd7636844c8a018f261efb2419f68220b/aiohttp-3.13.2-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a09a6d073fb5789456545bdee2474d14395792faa0527887f2f4ec1a486a59d3", size = 1815823, upload-time = "2025-10-28T20:55:40.958Z" },
+    { url = "https://files.pythonhosted.org/packages/a1/4f/ca2ef819488cbb41844c6cf92ca6dd15b9441e6207c58e5ae0e0fc8d70ad/aiohttp-3.13.2-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b59d13c443f8e049d9e94099c7e412e34610f1f49be0f230ec656a10692a5802", size = 1669374, upload-time = "2025-10-28T20:55:42.745Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/fe/1fe2e1179a0d91ce09c99069684aab619bf2ccde9b20bd6ca44f8837203e/aiohttp-3.13.2-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:20db2d67985d71ca033443a1ba2001c4b5693fe09b0e29f6d9358a99d4d62a8a", size = 1555315, upload-time = "2025-10-28T20:55:44.264Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/2b/f3781899b81c45d7cbc7140cddb8a3481c195e7cbff8e36374759d2ab5a5/aiohttp-3.13.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:960c2fc686ba27b535f9fd2b52d87ecd7e4fd1cf877f6a5cba8afb5b4a8bd204", size = 1639140, upload-time = "2025-10-28T20:55:46.626Z" },
+    { url = "https://files.pythonhosted.org/packages/72/27/c37e85cd3ece6f6c772e549bd5a253d0c122557b25855fb274224811e4f2/aiohttp-3.13.2-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:6c00dbcf5f0d88796151e264a8eab23de2997c9303dd7c0bf622e23b24d3ce22", size = 1645496, upload-time = "2025-10-28T20:55:48.933Z" },
+    { url = "https://files.pythonhosted.org/packages/66/20/3af1ab663151bd3780b123e907761cdb86ec2c4e44b2d9b195ebc91fbe37/aiohttp-3.13.2-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:fed38a5edb7945f4d1bcabe2fcd05db4f6ec7e0e82560088b754f7e08d93772d", size = 1697625, upload-time = "2025-10-28T20:55:50.377Z" },
+    { url = "https://files.pythonhosted.org/packages/95/eb/ae5cab15efa365e13d56b31b0d085a62600298bf398a7986f8388f73b598/aiohttp-3.13.2-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:b395bbca716c38bef3c764f187860e88c724b342c26275bc03e906142fc5964f", size = 1542025, upload-time = "2025-10-28T20:55:51.861Z" },
+    { url = "https://files.pythonhosted.org/packages/e9/2d/1683e8d67ec72d911397fe4e575688d2a9b8f6a6e03c8fdc9f3fd3d4c03f/aiohttp-3.13.2-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:204ffff2426c25dfda401ba08da85f9c59525cdc42bda26660463dd1cbcfec6f", size = 1714918, upload-time = "2025-10-28T20:55:53.515Z" },
+    { url = "https://files.pythonhosted.org/packages/99/a2/ffe8e0e1c57c5e542d47ffa1fcf95ef2b3ea573bf7c4d2ee877252431efc/aiohttp-3.13.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:05c4dd3c48fb5f15db31f57eb35374cb0c09afdde532e7fb70a75aede0ed30f6", size = 1656113, upload-time = "2025-10-28T20:55:55.438Z" },
+    { url = "https://files.pythonhosted.org/packages/0d/42/d511aff5c3a2b06c09d7d214f508a4ad8ac7799817f7c3d23e7336b5e896/aiohttp-3.13.2-cp310-cp310-win32.whl", hash = "sha256:e574a7d61cf10351d734bcddabbe15ede0eaa8a02070d85446875dc11189a251", size = 432290, upload-time = "2025-10-28T20:55:56.96Z" },
+    { url = "https://files.pythonhosted.org/packages/8b/ea/1c2eb7098b5bad4532994f2b7a8228d27674035c9b3234fe02c37469ef14/aiohttp-3.13.2-cp310-cp310-win_amd64.whl", hash = "sha256:364f55663085d658b8462a1c3f17b2b84a5c2e1ba858e1b79bff7b2e24ad1514", size = 455075, upload-time = "2025-10-28T20:55:58.373Z" },
+    { url = "https://files.pythonhosted.org/packages/35/74/b321e7d7ca762638cdf8cdeceb39755d9c745aff7a64c8789be96ddf6e96/aiohttp-3.13.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:4647d02df098f6434bafd7f32ad14942f05a9caa06c7016fdcc816f343997dd0", size = 743409, upload-time = "2025-10-28T20:56:00.354Z" },
+    { url = "https://files.pythonhosted.org/packages/99/3d/91524b905ec473beaf35158d17f82ef5a38033e5809fe8742e3657cdbb97/aiohttp-3.13.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e3403f24bcb9c3b29113611c3c16a2a447c3953ecf86b79775e7be06f7ae7ccb", size = 497006, upload-time = "2025-10-28T20:56:01.85Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/d3/7f68bc02a67716fe80f063e19adbd80a642e30682ce74071269e17d2dba1/aiohttp-3.13.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:43dff14e35aba17e3d6d5ba628858fb8cb51e30f44724a2d2f0c75be492c55e9", size = 493195, upload-time = "2025-10-28T20:56:03.314Z" },
+    { url = "https://files.pythonhosted.org/packages/98/31/913f774a4708775433b7375c4f867d58ba58ead833af96c8af3621a0d243/aiohttp-3.13.2-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e2a9ea08e8c58bb17655630198833109227dea914cd20be660f52215f6de5613", size = 1747759, upload-time = "2025-10-28T20:56:04.904Z" },
+    { url = "https://files.pythonhosted.org/packages/e8/63/04efe156f4326f31c7c4a97144f82132c3bb21859b7bb84748d452ccc17c/aiohttp-3.13.2-cp311-cp311-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:53b07472f235eb80e826ad038c9d106c2f653584753f3ddab907c83f49eedead", size = 1704456, upload-time = "2025-10-28T20:56:06.986Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/02/4e16154d8e0a9cf4ae76f692941fd52543bbb148f02f098ca73cab9b1c1b/aiohttp-3.13.2-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e736c93e9c274fce6419af4aac199984d866e55f8a4cec9114671d0ea9688780", size = 1807572, upload-time = "2025-10-28T20:56:08.558Z" },
+    { url = "https://files.pythonhosted.org/packages/34/58/b0583defb38689e7f06798f0285b1ffb3a6fb371f38363ce5fd772112724/aiohttp-3.13.2-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ff5e771f5dcbc81c64898c597a434f7682f2259e0cd666932a913d53d1341d1a", size = 1895954, upload-time = "2025-10-28T20:56:10.545Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/f3/083907ee3437425b4e376aa58b2c915eb1a33703ec0dc30040f7ae3368c6/aiohttp-3.13.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a3b6fb0c207cc661fa0bf8c66d8d9b657331ccc814f4719468af61034b478592", size = 1747092, upload-time = "2025-10-28T20:56:12.118Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/61/98a47319b4e425cc134e05e5f3fc512bf9a04bf65aafd9fdcda5d57ec693/aiohttp-3.13.2-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:97a0895a8e840ab3520e2288db7cace3a1981300d48babeb50e7425609e2e0ab", size = 1606815, upload-time = "2025-10-28T20:56:14.191Z" },
+    { url = "https://files.pythonhosted.org/packages/97/4b/e78b854d82f66bb974189135d31fce265dee0f5344f64dd0d345158a5973/aiohttp-3.13.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:9e8f8afb552297aca127c90cb840e9a1d4bfd6a10d7d8f2d9176e1acc69bad30", size = 1723789, upload-time = "2025-10-28T20:56:16.101Z" },
+    { url = "https://files.pythonhosted.org/packages/ed/fc/9d2ccc794fc9b9acd1379d625c3a8c64a45508b5091c546dea273a41929e/aiohttp-3.13.2-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:ed2f9c7216e53c3df02264f25d824b079cc5914f9e2deba94155190ef648ee40", size = 1718104, upload-time = "2025-10-28T20:56:17.655Z" },
+    { url = "https://files.pythonhosted.org/packages/66/65/34564b8765ea5c7d79d23c9113135d1dd3609173da13084830f1507d56cf/aiohttp-3.13.2-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:99c5280a329d5fa18ef30fd10c793a190d996567667908bef8a7f81f8202b948", size = 1785584, upload-time = "2025-10-28T20:56:19.238Z" },
+    { url = "https://files.pythonhosted.org/packages/30/be/f6a7a426e02fc82781afd62016417b3948e2207426d90a0e478790d1c8a4/aiohttp-3.13.2-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:2ca6ffef405fc9c09a746cb5d019c1672cd7f402542e379afc66b370833170cf", size = 1595126, upload-time = "2025-10-28T20:56:20.836Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/c7/8e22d5d28f94f67d2af496f14a83b3c155d915d1fe53d94b66d425ec5b42/aiohttp-3.13.2-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:47f438b1a28e926c37632bff3c44df7d27c9b57aaf4e34b1def3c07111fdb782", size = 1800665, upload-time = "2025-10-28T20:56:22.922Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/11/91133c8b68b1da9fc16555706aa7276fdf781ae2bb0876c838dd86b8116e/aiohttp-3.13.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:9acda8604a57bb60544e4646a4615c1866ee6c04a8edef9b8ee6fd1d8fa2ddc8", size = 1739532, upload-time = "2025-10-28T20:56:25.924Z" },
+    { url = "https://files.pythonhosted.org/packages/17/6b/3747644d26a998774b21a616016620293ddefa4d63af6286f389aedac844/aiohttp-3.13.2-cp311-cp311-win32.whl", hash = "sha256:868e195e39b24aaa930b063c08bb0c17924899c16c672a28a65afded9c46c6ec", size = 431876, upload-time = "2025-10-28T20:56:27.524Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/63/688462108c1a00eb9f05765331c107f95ae86f6b197b865d29e930b7e462/aiohttp-3.13.2-cp311-cp311-win_amd64.whl", hash = "sha256:7fd19df530c292542636c2a9a85854fab93474396a52f1695e799186bbd7f24c", size = 456205, upload-time = "2025-10-28T20:56:29.062Z" },
+    { url = "https://files.pythonhosted.org/packages/29/9b/01f00e9856d0a73260e86dd8ed0c2234a466c5c1712ce1c281548df39777/aiohttp-3.13.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:b1e56bab2e12b2b9ed300218c351ee2a3d8c8fdab5b1ec6193e11a817767e47b", size = 737623, upload-time = "2025-10-28T20:56:30.797Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/1b/4be39c445e2b2bd0aab4ba736deb649fabf14f6757f405f0c9685019b9e9/aiohttp-3.13.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:364e25edaabd3d37b1db1f0cbcee8c73c9a3727bfa262b83e5e4cf3489a2a9dc", size = 492664, upload-time = "2025-10-28T20:56:32.708Z" },
+    { url = "https://files.pythonhosted.org/packages/28/66/d35dcfea8050e131cdd731dff36434390479b4045a8d0b9d7111b0a968f1/aiohttp-3.13.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c5c94825f744694c4b8db20b71dba9a257cd2ba8e010a803042123f3a25d50d7", size = 491808, upload-time = "2025-10-28T20:56:34.57Z" },
+    { url = "https://files.pythonhosted.org/packages/00/29/8e4609b93e10a853b65f8291e64985de66d4f5848c5637cddc70e98f01f8/aiohttp-3.13.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ba2715d842ffa787be87cbfce150d5e88c87a98e0b62e0f5aa489169a393dbbb", size = 1738863, upload-time = "2025-10-28T20:56:36.377Z" },
+    { url = "https://files.pythonhosted.org/packages/9d/fa/4ebdf4adcc0def75ced1a0d2d227577cd7b1b85beb7edad85fcc87693c75/aiohttp-3.13.2-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:585542825c4bc662221fb257889e011a5aa00f1ae4d75d1d246a5225289183e3", size = 1700586, upload-time = "2025-10-28T20:56:38.034Z" },
+    { url = "https://files.pythonhosted.org/packages/da/04/73f5f02ff348a3558763ff6abe99c223381b0bace05cd4530a0258e52597/aiohttp-3.13.2-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:39d02cb6025fe1aabca329c5632f48c9532a3dabccd859e7e2f110668972331f", size = 1768625, upload-time = "2025-10-28T20:56:39.75Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/49/a825b79ffec124317265ca7d2344a86bcffeb960743487cb11988ffb3494/aiohttp-3.13.2-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:e67446b19e014d37342f7195f592a2a948141d15a312fe0e700c2fd2f03124f6", size = 1867281, upload-time = "2025-10-28T20:56:41.471Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/48/adf56e05f81eac31edcfae45c90928f4ad50ef2e3ea72cb8376162a368f8/aiohttp-3.13.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4356474ad6333e41ccefd39eae869ba15a6c5299c9c01dfdcfdd5c107be4363e", size = 1752431, upload-time = "2025-10-28T20:56:43.162Z" },
+    { url = "https://files.pythonhosted.org/packages/30/ab/593855356eead019a74e862f21523db09c27f12fd24af72dbc3555b9bfd9/aiohttp-3.13.2-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:eeacf451c99b4525f700f078becff32c32ec327b10dcf31306a8a52d78166de7", size = 1562846, upload-time = "2025-10-28T20:56:44.85Z" },
+    { url = "https://files.pythonhosted.org/packages/39/0f/9f3d32271aa8dc35036e9668e31870a9d3b9542dd6b3e2c8a30931cb27ae/aiohttp-3.13.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:d8a9b889aeabd7a4e9af0b7f4ab5ad94d42e7ff679aaec6d0db21e3b639ad58d", size = 1699606, upload-time = "2025-10-28T20:56:46.519Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/3c/52d2658c5699b6ef7692a3f7128b2d2d4d9775f2a68093f74bca06cf01e1/aiohttp-3.13.2-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:fa89cb11bc71a63b69568d5b8a25c3ca25b6d54c15f907ca1c130d72f320b76b", size = 1720663, upload-time = "2025-10-28T20:56:48.528Z" },
+    { url = "https://files.pythonhosted.org/packages/9b/d4/8f8f3ff1fb7fb9e3f04fcad4e89d8a1cd8fc7d05de67e3de5b15b33008ff/aiohttp-3.13.2-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:8aa7c807df234f693fed0ecd507192fc97692e61fee5702cdc11155d2e5cadc8", size = 1737939, upload-time = "2025-10-28T20:56:50.77Z" },
+    { url = "https://files.pythonhosted.org/packages/03/d3/ddd348f8a27a634daae39a1b8e291ff19c77867af438af844bf8b7e3231b/aiohttp-3.13.2-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:9eb3e33fdbe43f88c3c75fa608c25e7c47bbd80f48d012763cb67c47f39a7e16", size = 1555132, upload-time = "2025-10-28T20:56:52.568Z" },
+    { url = "https://files.pythonhosted.org/packages/39/b8/46790692dc46218406f94374903ba47552f2f9f90dad554eed61bfb7b64c/aiohttp-3.13.2-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:9434bc0d80076138ea986833156c5a48c9c7a8abb0c96039ddbb4afc93184169", size = 1764802, upload-time = "2025-10-28T20:56:54.292Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/e4/19ce547b58ab2a385e5f0b8aa3db38674785085abcf79b6e0edd1632b12f/aiohttp-3.13.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ff15c147b2ad66da1f2cbb0622313f2242d8e6e8f9b79b5206c84523a4473248", size = 1719512, upload-time = "2025-10-28T20:56:56.428Z" },
+    { url = "https://files.pythonhosted.org/packages/70/30/6355a737fed29dcb6dfdd48682d5790cb5eab050f7b4e01f49b121d3acad/aiohttp-3.13.2-cp312-cp312-win32.whl", hash = "sha256:27e569eb9d9e95dbd55c0fc3ec3a9335defbf1d8bc1d20171a49f3c4c607b93e", size = 426690, upload-time = "2025-10-28T20:56:58.736Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/0d/b10ac09069973d112de6ef980c1f6bb31cb7dcd0bc363acbdad58f927873/aiohttp-3.13.2-cp312-cp312-win_amd64.whl", hash = "sha256:8709a0f05d59a71f33fd05c17fc11fcb8c30140506e13c2f5e8ee1b8964e1b45", size = 453465, upload-time = "2025-10-28T20:57:00.795Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/78/7e90ca79e5aa39f9694dcfd74f4720782d3c6828113bb1f3197f7e7c4a56/aiohttp-3.13.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:7519bdc7dfc1940d201651b52bf5e03f5503bda45ad6eacf64dda98be5b2b6be", size = 732139, upload-time = "2025-10-28T20:57:02.455Z" },
+    { url = "https://files.pythonhosted.org/packages/db/ed/1f59215ab6853fbaa5c8495fa6cbc39edfc93553426152b75d82a5f32b76/aiohttp-3.13.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:088912a78b4d4f547a1f19c099d5a506df17eacec3c6f4375e2831ec1d995742", size = 490082, upload-time = "2025-10-28T20:57:04.784Z" },
+    { url = "https://files.pythonhosted.org/packages/68/7b/fe0fe0f5e05e13629d893c760465173a15ad0039c0a5b0d0040995c8075e/aiohttp-3.13.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5276807b9de9092af38ed23ce120539ab0ac955547b38563a9ba4f5b07b95293", size = 489035, upload-time = "2025-10-28T20:57:06.894Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/04/db5279e38471b7ac801d7d36a57d1230feeee130bbe2a74f72731b23c2b1/aiohttp-3.13.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1237c1375eaef0db4dcd7c2559f42e8af7b87ea7d295b118c60c36a6e61cb811", size = 1720387, upload-time = "2025-10-28T20:57:08.685Z" },
+    { url = "https://files.pythonhosted.org/packages/31/07/8ea4326bd7dae2bd59828f69d7fdc6e04523caa55e4a70f4a8725a7e4ed2/aiohttp-3.13.2-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:96581619c57419c3d7d78703d5b78c1e5e5fc0172d60f555bdebaced82ded19a", size = 1688314, upload-time = "2025-10-28T20:57:10.693Z" },
+    { url = "https://files.pythonhosted.org/packages/48/ab/3d98007b5b87ffd519d065225438cc3b668b2f245572a8cb53da5dd2b1bc/aiohttp-3.13.2-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a2713a95b47374169409d18103366de1050fe0ea73db358fc7a7acb2880422d4", size = 1756317, upload-time = "2025-10-28T20:57:12.563Z" },
+    { url = "https://files.pythonhosted.org/packages/97/3d/801ca172b3d857fafb7b50c7c03f91b72b867a13abca982ed6b3081774ef/aiohttp-3.13.2-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:228a1cd556b3caca590e9511a89444925da87d35219a49ab5da0c36d2d943a6a", size = 1858539, upload-time = "2025-10-28T20:57:14.623Z" },
+    { url = "https://files.pythonhosted.org/packages/f7/0d/4764669bdf47bd472899b3d3db91fffbe925c8e3038ec591a2fd2ad6a14d/aiohttp-3.13.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ac6cde5fba8d7d8c6ac963dbb0256a9854e9fafff52fbcc58fdf819357892c3e", size = 1739597, upload-time = "2025-10-28T20:57:16.399Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/52/7bd3c6693da58ba16e657eb904a5b6decfc48ecd06e9ac098591653b1566/aiohttp-3.13.2-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f2bef8237544f4e42878c61cef4e2839fee6346dc60f5739f876a9c50be7fcdb", size = 1555006, upload-time = "2025-10-28T20:57:18.288Z" },
+    { url = "https://files.pythonhosted.org/packages/48/30/9586667acec5993b6f41d2ebcf96e97a1255a85f62f3c653110a5de4d346/aiohttp-3.13.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:16f15a4eac3bc2d76c45f7ebdd48a65d41b242eb6c31c2245463b40b34584ded", size = 1683220, upload-time = "2025-10-28T20:57:20.241Z" },
+    { url = "https://files.pythonhosted.org/packages/71/01/3afe4c96854cfd7b30d78333852e8e851dceaec1c40fd00fec90c6402dd2/aiohttp-3.13.2-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:bb7fb776645af5cc58ab804c58d7eba545a97e047254a52ce89c157b5af6cd0b", size = 1712570, upload-time = "2025-10-28T20:57:22.253Z" },
+    { url = "https://files.pythonhosted.org/packages/11/2c/22799d8e720f4697a9e66fd9c02479e40a49de3de2f0bbe7f9f78a987808/aiohttp-3.13.2-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:e1b4951125ec10c70802f2cb09736c895861cd39fd9dcb35107b4dc8ae6220b8", size = 1733407, upload-time = "2025-10-28T20:57:24.37Z" },
+    { url = "https://files.pythonhosted.org/packages/34/cb/90f15dd029f07cebbd91f8238a8b363978b530cd128488085b5703683594/aiohttp-3.13.2-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:550bf765101ae721ee1d37d8095f47b1f220650f85fe1af37a90ce75bab89d04", size = 1550093, upload-time = "2025-10-28T20:57:26.257Z" },
+    { url = "https://files.pythonhosted.org/packages/69/46/12dce9be9d3303ecbf4d30ad45a7683dc63d90733c2d9fe512be6716cd40/aiohttp-3.13.2-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:fe91b87fc295973096251e2d25a811388e7d8adf3bd2b97ef6ae78bc4ac6c476", size = 1758084, upload-time = "2025-10-28T20:57:28.349Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/c8/0932b558da0c302ffd639fc6362a313b98fdf235dc417bc2493da8394df7/aiohttp-3.13.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e0c8e31cfcc4592cb200160344b2fb6ae0f9e4effe06c644b5a125d4ae5ebe23", size = 1716987, upload-time = "2025-10-28T20:57:30.233Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/8b/f5bd1a75003daed099baec373aed678f2e9b34f2ad40d85baa1368556396/aiohttp-3.13.2-cp313-cp313-win32.whl", hash = "sha256:0740f31a60848d6edb296a0df827473eede90c689b8f9f2a4cdde74889eb2254", size = 425859, upload-time = "2025-10-28T20:57:32.105Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/28/a8a9fc6957b2cee8902414e41816b5ab5536ecf43c3b1843c10e82c559b2/aiohttp-3.13.2-cp313-cp313-win_amd64.whl", hash = "sha256:a88d13e7ca367394908f8a276b89d04a3652044612b9a408a0bb22a5ed976a1a", size = 452192, upload-time = "2025-10-28T20:57:34.166Z" },
+    { url = "https://files.pythonhosted.org/packages/9b/36/e2abae1bd815f01c957cbf7be817b3043304e1c87bad526292a0410fdcf9/aiohttp-3.13.2-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:2475391c29230e063ef53a66669b7b691c9bfc3f1426a0f7bcdf1216bdbac38b", size = 735234, upload-time = "2025-10-28T20:57:36.415Z" },
+    { url = "https://files.pythonhosted.org/packages/ca/e3/1ee62dde9b335e4ed41db6bba02613295a0d5b41f74a783c142745a12763/aiohttp-3.13.2-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:f33c8748abef4d8717bb20e8fb1b3e07c6adacb7fd6beaae971a764cf5f30d61", size = 490733, upload-time = "2025-10-28T20:57:38.205Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/aa/7a451b1d6a04e8d15a362af3e9b897de71d86feac3babf8894545d08d537/aiohttp-3.13.2-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:ae32f24bbfb7dbb485a24b30b1149e2f200be94777232aeadba3eecece4d0aa4", size = 491303, upload-time = "2025-10-28T20:57:40.122Z" },
+    { url = "https://files.pythonhosted.org/packages/57/1e/209958dbb9b01174870f6a7538cd1f3f28274fdbc88a750c238e2c456295/aiohttp-3.13.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5d7f02042c1f009ffb70067326ef183a047425bb2ff3bc434ead4dd4a4a66a2b", size = 1717965, upload-time = "2025-10-28T20:57:42.28Z" },
+    { url = "https://files.pythonhosted.org/packages/08/aa/6a01848d6432f241416bc4866cae8dc03f05a5a884d2311280f6a09c73d6/aiohttp-3.13.2-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:93655083005d71cd6c072cdab54c886e6570ad2c4592139c3fb967bfc19e4694", size = 1667221, upload-time = "2025-10-28T20:57:44.869Z" },
+    { url = "https://files.pythonhosted.org/packages/87/4f/36c1992432d31bbc789fa0b93c768d2e9047ec8c7177e5cd84ea85155f36/aiohttp-3.13.2-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:0db1e24b852f5f664cd728db140cf11ea0e82450471232a394b3d1a540b0f906", size = 1757178, upload-time = "2025-10-28T20:57:47.216Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/b4/8e940dfb03b7e0f68a82b88fd182b9be0a65cb3f35612fe38c038c3112cf/aiohttp-3.13.2-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:b009194665bcd128e23eaddef362e745601afa4641930848af4c8559e88f18f9", size = 1838001, upload-time = "2025-10-28T20:57:49.337Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/ef/39f3448795499c440ab66084a9db7d20ca7662e94305f175a80f5b7e0072/aiohttp-3.13.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c038a8fdc8103cd51dbd986ecdce141473ffd9775a7a8057a6ed9c3653478011", size = 1716325, upload-time = "2025-10-28T20:57:51.327Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/51/b311500ffc860b181c05d91c59a1313bdd05c82960fdd4035a15740d431e/aiohttp-3.13.2-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:66bac29b95a00db411cd758fea0e4b9bdba6d549dfe333f9a945430f5f2cc5a6", size = 1547978, upload-time = "2025-10-28T20:57:53.554Z" },
+    { url = "https://files.pythonhosted.org/packages/31/64/b9d733296ef79815226dab8c586ff9e3df41c6aff2e16c06697b2d2e6775/aiohttp-3.13.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:4ebf9cfc9ba24a74cf0718f04aac2a3bbe745902cc7c5ebc55c0f3b5777ef213", size = 1682042, upload-time = "2025-10-28T20:57:55.617Z" },
+    { url = "https://files.pythonhosted.org/packages/3f/30/43d3e0f9d6473a6db7d472104c4eff4417b1e9df01774cb930338806d36b/aiohttp-3.13.2-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:a4b88ebe35ce54205c7074f7302bd08a4cb83256a3e0870c72d6f68a3aaf8e49", size = 1680085, upload-time = "2025-10-28T20:57:57.59Z" },
+    { url = "https://files.pythonhosted.org/packages/16/51/c709f352c911b1864cfd1087577760ced64b3e5bee2aa88b8c0c8e2e4972/aiohttp-3.13.2-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:98c4fb90bb82b70a4ed79ca35f656f4281885be076f3f970ce315402b53099ae", size = 1728238, upload-time = "2025-10-28T20:57:59.525Z" },
+    { url = "https://files.pythonhosted.org/packages/19/e2/19bd4c547092b773caeb48ff5ae4b1ae86756a0ee76c16727fcfd281404b/aiohttp-3.13.2-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:ec7534e63ae0f3759df3a1ed4fa6bc8f75082a924b590619c0dd2f76d7043caa", size = 1544395, upload-time = "2025-10-28T20:58:01.914Z" },
+    { url = "https://files.pythonhosted.org/packages/cf/87/860f2803b27dfc5ed7be532832a3498e4919da61299b4a1f8eb89b8ff44d/aiohttp-3.13.2-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:5b927cf9b935a13e33644cbed6c8c4b2d0f25b713d838743f8fe7191b33829c4", size = 1742965, upload-time = "2025-10-28T20:58:03.972Z" },
+    { url = "https://files.pythonhosted.org/packages/67/7f/db2fc7618925e8c7a601094d5cbe539f732df4fb570740be88ed9e40e99a/aiohttp-3.13.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:88d6c017966a78c5265d996c19cdb79235be5e6412268d7e2ce7dee339471b7a", size = 1697585, upload-time = "2025-10-28T20:58:06.189Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/07/9127916cb09bb38284db5036036042b7b2c514c8ebaeee79da550c43a6d6/aiohttp-3.13.2-cp314-cp314-win32.whl", hash = "sha256:f7c183e786e299b5d6c49fb43a769f8eb8e04a2726a2bd5887b98b5cc2d67940", size = 431621, upload-time = "2025-10-28T20:58:08.636Z" },
+    { url = "https://files.pythonhosted.org/packages/fb/41/554a8a380df6d3a2bba8a7726429a23f4ac62aaf38de43bb6d6cde7b4d4d/aiohttp-3.13.2-cp314-cp314-win_amd64.whl", hash = "sha256:fe242cd381e0fb65758faf5ad96c2e460df6ee5b2de1072fe97e4127927e00b4", size = 457627, upload-time = "2025-10-28T20:58:11Z" },
+    { url = "https://files.pythonhosted.org/packages/c7/8e/3824ef98c039d3951cb65b9205a96dd2b20f22241ee17d89c5701557c826/aiohttp-3.13.2-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:f10d9c0b0188fe85398c61147bbd2a657d616c876863bfeff43376e0e3134673", size = 767360, upload-time = "2025-10-28T20:58:13.358Z" },
+    { url = "https://files.pythonhosted.org/packages/a4/0f/6a03e3fc7595421274fa34122c973bde2d89344f8a881b728fa8c774e4f1/aiohttp-3.13.2-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:e7c952aefdf2460f4ae55c5e9c3e80aa72f706a6317e06020f80e96253b1accd", size = 504616, upload-time = "2025-10-28T20:58:15.339Z" },
+    { url = "https://files.pythonhosted.org/packages/c6/aa/ed341b670f1bc8a6f2c6a718353d13b9546e2cef3544f573c6a1ff0da711/aiohttp-3.13.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c20423ce14771d98353d2e25e83591fa75dfa90a3c1848f3d7c68243b4fbded3", size = 509131, upload-time = "2025-10-28T20:58:17.693Z" },
+    { url = "https://files.pythonhosted.org/packages/7f/f0/c68dac234189dae5c4bbccc0f96ce0cc16b76632cfc3a08fff180045cfa4/aiohttp-3.13.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e96eb1a34396e9430c19d8338d2ec33015e4a87ef2b4449db94c22412e25ccdf", size = 1864168, upload-time = "2025-10-28T20:58:20.113Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/65/75a9a76db8364b5d0e52a0c20eabc5d52297385d9af9c35335b924fafdee/aiohttp-3.13.2-cp314-cp314t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:23fb0783bc1a33640036465019d3bba069942616a6a2353c6907d7fe1ccdaf4e", size = 1719200, upload-time = "2025-10-28T20:58:22.583Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/55/8df2ed78d7f41d232f6bd3ff866b6f617026551aa1d07e2f03458f964575/aiohttp-3.13.2-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2e1a9bea6244a1d05a4e57c295d69e159a5c50d8ef16aa390948ee873478d9a5", size = 1843497, upload-time = "2025-10-28T20:58:24.672Z" },
+    { url = "https://files.pythonhosted.org/packages/e9/e0/94d7215e405c5a02ccb6a35c7a3a6cfff242f457a00196496935f700cde5/aiohttp-3.13.2-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:0a3d54e822688b56e9f6b5816fb3de3a3a64660efac64e4c2dc435230ad23bad", size = 1935703, upload-time = "2025-10-28T20:58:26.758Z" },
+    { url = "https://files.pythonhosted.org/packages/0b/78/1eeb63c3f9b2d1015a4c02788fb543141aad0a03ae3f7a7b669b2483f8d4/aiohttp-3.13.2-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7a653d872afe9f33497215745da7a943d1dc15b728a9c8da1c3ac423af35178e", size = 1792738, upload-time = "2025-10-28T20:58:29.787Z" },
+    { url = "https://files.pythonhosted.org/packages/41/75/aaf1eea4c188e51538c04cc568040e3082db263a57086ea74a7d38c39e42/aiohttp-3.13.2-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:56d36e80d2003fa3fc0207fac644216d8532e9504a785ef9a8fd013f84a42c61", size = 1624061, upload-time = "2025-10-28T20:58:32.529Z" },
+    { url = "https://files.pythonhosted.org/packages/9b/c2/3b6034de81fbcc43de8aeb209073a2286dfb50b86e927b4efd81cf848197/aiohttp-3.13.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:78cd586d8331fb8e241c2dd6b2f4061778cc69e150514b39a9e28dd050475661", size = 1789201, upload-time = "2025-10-28T20:58:34.618Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/38/c15dcf6d4d890217dae79d7213988f4e5fe6183d43893a9cf2fe9e84ca8d/aiohttp-3.13.2-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:20b10bbfbff766294fe99987f7bb3b74fdd2f1a2905f2562132641ad434dcf98", size = 1776868, upload-time = "2025-10-28T20:58:38.835Z" },
+    { url = "https://files.pythonhosted.org/packages/04/75/f74fd178ac81adf4f283a74847807ade5150e48feda6aef024403716c30c/aiohttp-3.13.2-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:9ec49dff7e2b3c85cdeaa412e9d438f0ecd71676fde61ec57027dd392f00c693", size = 1790660, upload-time = "2025-10-28T20:58:41.507Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/80/7368bd0d06b16b3aba358c16b919e9c46cf11587dc572091031b0e9e3ef0/aiohttp-3.13.2-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:94f05348c4406450f9d73d38efb41d669ad6cd90c7ee194810d0eefbfa875a7a", size = 1617548, upload-time = "2025-10-28T20:58:43.674Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/4b/a6212790c50483cb3212e507378fbe26b5086d73941e1ec4b56a30439688/aiohttp-3.13.2-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:fa4dcb605c6f82a80c7f95713c2b11c3b8e9893b3ebd2bc9bde93165ed6107be", size = 1817240, upload-time = "2025-10-28T20:58:45.787Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/f7/ba5f0ba4ea8d8f3c32850912944532b933acbf0f3a75546b89269b9b7dde/aiohttp-3.13.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:cf00e5db968c3f67eccd2778574cf64d8b27d95b237770aa32400bd7a1ca4f6c", size = 1762334, upload-time = "2025-10-28T20:58:47.936Z" },
+    { url = "https://files.pythonhosted.org/packages/7e/83/1a5a1856574588b1cad63609ea9ad75b32a8353ac995d830bf5da9357364/aiohttp-3.13.2-cp314-cp314t-win32.whl", hash = "sha256:d23b5fe492b0805a50d3371e8a728a9134d8de5447dce4c885f5587294750734", size = 464685, upload-time = "2025-10-28T20:58:50.642Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/4d/d22668674122c08f4d56972297c51a624e64b3ed1efaa40187607a7cb66e/aiohttp-3.13.2-cp314-cp314t-win_amd64.whl", hash = "sha256:ff0a7b0a82a7ab905cbda74006318d1b12e37c797eb1b0d4eb3e316cf47f658f", size = 498093, upload-time = "2025-10-28T20:58:52.782Z" },
 ]
 
 [[package]]
@@ -261,6 +261,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/59/75/e0e10dc7ed1408c28e03a6cb2d7a407f99320eb953f229d008a7a6d05546/aniso8601-10.0.1-py2.py3-none-any.whl", hash = "sha256:eb19717fd4e0db6de1aab06f12450ab92144246b257423fe020af5748c0cb89e", size = 52848, upload-time = "2025-04-18T17:29:41.492Z" },
 ]
 
+[[package]]
+name = "annotated-doc"
+version = "0.0.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/d7/a6/dc46877b911e40c00d395771ea710d5e77b6de7bacd5fdcd78d70cc5a48f/annotated_doc-0.0.3.tar.gz", hash = "sha256:e18370014c70187422c33e945053ff4c286f453a984eba84d0dbfa0c935adeda", size = 5535, upload-time = "2025-10-24T14:57:10.718Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/02/b7/cf592cb5de5cb3bade3357f8d2cf42bf103bbe39f459824b4939fd212911/annotated_doc-0.0.3-py3-none-any.whl", hash = "sha256:348ec6664a76f1fd3be81f43dffbee4c7e8ce931ba71ec67cc7f4ade7fbbb580", size = 5488, upload-time = "2025-10-24T14:57:09.462Z" },
+]
+
 [[package]]
 name = "annotated-types"
 version = "0.7.0"
@@ -604,16 +613,16 @@ wheels = [
 
 [[package]]
 name = "botocore"
-version = "1.40.49"
+version = "1.40.61"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "jmespath" },
     { name = "python-dateutil" },
     { name = "urllib3" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/01/6a/eb7503536552bbd3388b2607bc7a64e59d4f988336406b51a69d29f17ed2/botocore-1.40.49.tar.gz", hash = "sha256:fe8d4cbcc22de84c20190ae728c46b931bafeb40fce247010fb071c31b6532b5", size = 14415240, upload-time = "2025-10-09T19:21:37.133Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/28/a3/81d3a47c2dbfd76f185d3b894f2ad01a75096c006a2dd91f237dca182188/botocore-1.40.61.tar.gz", hash = "sha256:a2487ad69b090f9cccd64cf07c7021cd80ee9c0655ad974f87045b02f3ef52cd", size = 14393956, upload-time = "2025-10-28T19:26:46.108Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/fc/7b/dce396a3f7078e0432d40a9778602cbf0785ca91e7bcb64e05f19dfb5662/botocore-1.40.49-py3-none-any.whl", hash = "sha256:bf1089d0e77e4fc2e195d81c519b194ab62a4d4dd3e7113ee4e2bf903b0b75ab", size = 14085172, upload-time = "2025-10-09T19:21:32.721Z" },
+    { url = "https://files.pythonhosted.org/packages/38/c5/f6ce561004db45f0b847c2cd9b19c67c6bf348a82018a48cb718be6b58b0/botocore-1.40.61-py3-none-any.whl", hash = "sha256:17ebae412692fd4824f99cde0f08d50126dc97954008e5ba2b522eb049238aa7", size = 14055973, upload-time = "2025-10-28T19:26:42.15Z" },
 ]
 
 [[package]]
@@ -1152,7 +1161,7 @@ wheels = [
 
 [[package]]
 name = "datasets"
-version = "4.2.0"
+version = "4.3.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "dill" },
@@ -1170,9 +1179,9 @@ dependencies = [
     { name = "tqdm" },
     { name = "xxhash" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/70/48/0186fbc4b86a4f9ecaf04eb01e877e78b53bfa0b03be9c84b2298431ba33/datasets-4.2.0.tar.gz", hash = "sha256:8333a7db9f3bb8044c1b819a35d4e3e2809596c837793b0921382efffdc36e78", size = 582256, upload-time = "2025-10-09T16:10:15.534Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/2a/47/325206ac160f7699ed9f1798afa8f8f8d5189b03bf3815654859ac1d5cba/datasets-4.3.0.tar.gz", hash = "sha256:bc9118ed9afd92346c5be7ed3aaa00177eb907c25467f9d072a0d22777efbd2b", size = 582801, upload-time = "2025-10-23T16:31:51.547Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/91/9e/0bbbd09b116fd8ee2d3617e28e6598551d2f0f24d3a2ce99cc87ec85aeb0/datasets-4.2.0-py3-none-any.whl", hash = "sha256:fdc43aaf4a73b31f64f80f72f195ab413a1141ed15555d675b2fd17926f8b026", size = 506316, upload-time = "2025-10-09T16:10:13.375Z" },
+    { url = "https://files.pythonhosted.org/packages/ca/51/409a8184ed35453d9cbb3d6b20d524b1115c2c2d117b85d5e9b06cd70b45/datasets-4.3.0-py3-none-any.whl", hash = "sha256:0ea157e72138b3ca6c7d2415f19a164ecf7d4c4fa72da2a570da286882e96903", size = 506846, upload-time = "2025-10-23T16:31:49.965Z" },
 ]
 
 [[package]]
@@ -1315,16 +1324,17 @@ wheels = [
 
 [[package]]
 name = "fastapi"
-version = "0.119.1"
+version = "0.120.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
+    { name = "annotated-doc" },
     { name = "pydantic" },
     { name = "starlette" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/a6/f4/152127681182e6413e7a89684c434e19e7414ed7ac0c632999c3c6980640/fastapi-0.119.1.tar.gz", hash = "sha256:a5e3426edce3fe221af4e1992c6d79011b247e3b03cc57999d697fe76cbf8ae0", size = 338616, upload-time = "2025-10-20T11:30:27.734Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/40/cc/28aff6e246ee85bd571b26e4a793b84d42700e3bdc3008c3d747eda7b06d/fastapi-0.120.1.tar.gz", hash = "sha256:b5c6217e9ddca6dfcf54c97986180d4a1955e10c693d74943fc5327700178bff", size = 337616, upload-time = "2025-10-27T17:53:42.954Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/b1/26/e6d959b4ac959fdb3e9c4154656fc160794db6af8e64673d52759456bf07/fastapi-0.119.1-py3-none-any.whl", hash = "sha256:0b8c2a2cce853216e150e9bd4faaed88227f8eb37de21cb200771f491586a27f", size = 108123, upload-time = "2025-10-20T11:30:26.185Z" },
+    { url = "https://files.pythonhosted.org/packages/7e/bb/1a74dbe87e9a595bf63052c886dfef965dc5b91d149456a8301eb3d41ce2/fastapi-0.120.1-py3-none-any.whl", hash = "sha256:0e8a2c328e96c117272d8c794d3a97d205f753cc2e69dd7ee387b7488a75601f", size = 108254, upload-time = "2025-10-27T17:53:40.076Z" },
 ]
 
 [[package]]
@@ -1736,17 +1746,31 @@ wheels = [
 
 [[package]]
 name = "hf-xet"
-version = "1.1.10"
+version = "1.2.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/74/31/feeddfce1748c4a233ec1aa5b7396161c07ae1aa9b7bdbc9a72c3c7dd768/hf_xet-1.1.10.tar.gz", hash = "sha256:408aef343800a2102374a883f283ff29068055c111f003ff840733d3b715bb97", size = 487910, upload-time = "2025-09-12T20:10:27.12Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/f7/a2/343e6d05de96908366bdc0081f2d8607d61200be2ac802769c4284cc65bd/hf_xet-1.1.10-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:686083aca1a6669bc85c21c0563551cbcdaa5cf7876a91f3d074a030b577231d", size = 2761466, upload-time = "2025-09-12T20:10:22.836Z" },
-    { url = "https://files.pythonhosted.org/packages/31/f9/6215f948ac8f17566ee27af6430ea72045e0418ce757260248b483f4183b/hf_xet-1.1.10-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:71081925383b66b24eedff3013f8e6bbd41215c3338be4b94ba75fd75b21513b", size = 2623807, upload-time = "2025-09-12T20:10:21.118Z" },
-    { url = "https://files.pythonhosted.org/packages/15/07/86397573efefff941e100367bbda0b21496ffcdb34db7ab51912994c32a2/hf_xet-1.1.10-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6b6bceb6361c80c1cc42b5a7b4e3efd90e64630bcf11224dcac50ef30a47e435", size = 3186960, upload-time = "2025-09-12T20:10:19.336Z" },
-    { url = "https://files.pythonhosted.org/packages/01/a7/0b2e242b918cc30e1f91980f3c4b026ff2eedaf1e2ad96933bca164b2869/hf_xet-1.1.10-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:eae7c1fc8a664e54753ffc235e11427ca61f4b0477d757cc4eb9ae374b69f09c", size = 3087167, upload-time = "2025-09-12T20:10:17.255Z" },
-    { url = "https://files.pythonhosted.org/packages/4a/25/3e32ab61cc7145b11eee9d745988e2f0f4fafda81b25980eebf97d8cff15/hf_xet-1.1.10-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:0a0005fd08f002180f7a12d4e13b22be277725bc23ed0529f8add5c7a6309c06", size = 3248612, upload-time = "2025-09-12T20:10:24.093Z" },
-    { url = "https://files.pythonhosted.org/packages/2c/3d/ab7109e607ed321afaa690f557a9ada6d6d164ec852fd6bf9979665dc3d6/hf_xet-1.1.10-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:f900481cf6e362a6c549c61ff77468bd59d6dd082f3170a36acfef2eb6a6793f", size = 3353360, upload-time = "2025-09-12T20:10:25.563Z" },
-    { url = "https://files.pythonhosted.org/packages/ee/0e/471f0a21db36e71a2f1752767ad77e92d8cde24e974e03d662931b1305ec/hf_xet-1.1.10-cp37-abi3-win_amd64.whl", hash = "sha256:5f54b19cc347c13235ae7ee98b330c26dd65ef1df47e5316ffb1e87713ca7045", size = 2804691, upload-time = "2025-09-12T20:10:28.433Z" },
+sdist = { url = "https://files.pythonhosted.org/packages/5e/6e/0f11bacf08a67f7fb5ee09740f2ca54163863b07b70d579356e9222ce5d8/hf_xet-1.2.0.tar.gz", hash = "sha256:a8c27070ca547293b6890c4bf389f713f80e8c478631432962bb7f4bc0bd7d7f", size = 506020, upload-time = "2025-10-24T19:04:32.129Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/9e/a5/85ef910a0aa034a2abcfadc360ab5ac6f6bc4e9112349bd40ca97551cff0/hf_xet-1.2.0-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:ceeefcd1b7aed4956ae8499e2199607765fbd1c60510752003b6cc0b8413b649", size = 2861870, upload-time = "2025-10-24T19:04:11.422Z" },
+    { url = "https://files.pythonhosted.org/packages/ea/40/e2e0a7eb9a51fe8828ba2d47fe22a7e74914ea8a0db68a18c3aa7449c767/hf_xet-1.2.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:b70218dd548e9840224df5638fdc94bd033552963cfa97f9170829381179c813", size = 2717584, upload-time = "2025-10-24T19:04:09.586Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/7d/daf7f8bc4594fdd59a8a596f9e3886133fdc68e675292218a5e4c1b7e834/hf_xet-1.2.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7d40b18769bb9a8bc82a9ede575ce1a44c75eb80e7375a01d76259089529b5dc", size = 3315004, upload-time = "2025-10-24T19:04:00.314Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/ba/45ea2f605fbf6d81c8b21e4d970b168b18a53515923010c312c06cd83164/hf_xet-1.2.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:cd3a6027d59cfb60177c12d6424e31f4b5ff13d8e3a1247b3a584bf8977e6df5", size = 3222636, upload-time = "2025-10-24T19:03:58.111Z" },
+    { url = "https://files.pythonhosted.org/packages/4a/1d/04513e3cab8f29ab8c109d309ddd21a2705afab9d52f2ba1151e0c14f086/hf_xet-1.2.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:6de1fc44f58f6dd937956c8d304d8c2dea264c80680bcfa61ca4a15e7b76780f", size = 3408448, upload-time = "2025-10-24T19:04:20.951Z" },
+    { url = "https://files.pythonhosted.org/packages/f0/7c/60a2756d7feec7387db3a1176c632357632fbe7849fce576c5559d4520c7/hf_xet-1.2.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:f182f264ed2acd566c514e45da9f2119110e48a87a327ca271027904c70c5832", size = 3503401, upload-time = "2025-10-24T19:04:22.549Z" },
+    { url = "https://files.pythonhosted.org/packages/4e/64/48fffbd67fb418ab07451e4ce641a70de1c40c10a13e25325e24858ebe5a/hf_xet-1.2.0-cp313-cp313t-win_amd64.whl", hash = "sha256:293a7a3787e5c95d7be1857358a9130694a9c6021de3f27fa233f37267174382", size = 2900866, upload-time = "2025-10-24T19:04:33.461Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/51/f7e2caae42f80af886db414d4e9885fac959330509089f97cccb339c6b87/hf_xet-1.2.0-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:10bfab528b968c70e062607f663e21e34e2bba349e8038db546646875495179e", size = 2861861, upload-time = "2025-10-24T19:04:19.01Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/1d/a641a88b69994f9371bd347f1dd35e5d1e2e2460a2e350c8d5165fc62005/hf_xet-1.2.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:2a212e842647b02eb6a911187dc878e79c4aa0aa397e88dd3b26761676e8c1f8", size = 2717699, upload-time = "2025-10-24T19:04:17.306Z" },
+    { url = "https://files.pythonhosted.org/packages/df/e0/e5e9bba7d15f0318955f7ec3f4af13f92e773fbb368c0b8008a5acbcb12f/hf_xet-1.2.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:30e06daccb3a7d4c065f34fc26c14c74f4653069bb2b194e7f18f17cbe9939c0", size = 3314885, upload-time = "2025-10-24T19:04:07.642Z" },
+    { url = "https://files.pythonhosted.org/packages/21/90/b7fe5ff6f2b7b8cbdf1bd56145f863c90a5807d9758a549bf3d916aa4dec/hf_xet-1.2.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:29c8fc913a529ec0a91867ce3d119ac1aac966e098cf49501800c870328cc090", size = 3221550, upload-time = "2025-10-24T19:04:05.55Z" },
+    { url = "https://files.pythonhosted.org/packages/6f/cb/73f276f0a7ce46cc6a6ec7d6c7d61cbfe5f2e107123d9bbd0193c355f106/hf_xet-1.2.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:66e159cbfcfbb29f920db2c09ed8b660eb894640d284f102ada929b6e3dc410a", size = 3408010, upload-time = "2025-10-24T19:04:28.598Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/1e/d642a12caa78171f4be64f7cd9c40e3ca5279d055d0873188a58c0f5fbb9/hf_xet-1.2.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:9c91d5ae931510107f148874e9e2de8a16052b6f1b3ca3c1b12f15ccb491390f", size = 3503264, upload-time = "2025-10-24T19:04:30.397Z" },
+    { url = "https://files.pythonhosted.org/packages/17/b5/33764714923fa1ff922770f7ed18c2daae034d21ae6e10dbf4347c854154/hf_xet-1.2.0-cp314-cp314t-win_amd64.whl", hash = "sha256:210d577732b519ac6ede149d2f2f34049d44e8622bf14eb3d63bbcd2d4b332dc", size = 2901071, upload-time = "2025-10-24T19:04:37.463Z" },
+    { url = "https://files.pythonhosted.org/packages/96/2d/22338486473df5923a9ab7107d375dbef9173c338ebef5098ef593d2b560/hf_xet-1.2.0-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:46740d4ac024a7ca9b22bebf77460ff43332868b661186a8e46c227fdae01848", size = 2866099, upload-time = "2025-10-24T19:04:15.366Z" },
+    { url = "https://files.pythonhosted.org/packages/7f/8c/c5becfa53234299bc2210ba314eaaae36c2875e0045809b82e40a9544f0c/hf_xet-1.2.0-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:27df617a076420d8845bea087f59303da8be17ed7ec0cd7ee3b9b9f579dff0e4", size = 2722178, upload-time = "2025-10-24T19:04:13.695Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/92/cf3ab0b652b082e66876d08da57fcc6fa2f0e6c70dfbbafbd470bb73eb47/hf_xet-1.2.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3651fd5bfe0281951b988c0facbe726aa5e347b103a675f49a3fa8144c7968fd", size = 3320214, upload-time = "2025-10-24T19:04:03.596Z" },
+    { url = "https://files.pythonhosted.org/packages/46/92/3f7ec4a1b6a65bf45b059b6d4a5d38988f63e193056de2f420137e3c3244/hf_xet-1.2.0-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:d06fa97c8562fb3ee7a378dd9b51e343bc5bc8190254202c9771029152f5e08c", size = 3229054, upload-time = "2025-10-24T19:04:01.949Z" },
+    { url = "https://files.pythonhosted.org/packages/0b/dd/7ac658d54b9fb7999a0ccb07ad863b413cbaf5cf172f48ebcd9497ec7263/hf_xet-1.2.0-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:4c1428c9ae73ec0939410ec73023c4f842927f39db09b063b9482dac5a3bb737", size = 3413812, upload-time = "2025-10-24T19:04:24.585Z" },
+    { url = "https://files.pythonhosted.org/packages/92/68/89ac4e5b12a9ff6286a12174c8538a5930e2ed662091dd2572bbe0a18c8a/hf_xet-1.2.0-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a55558084c16b09b5ed32ab9ed38421e2d87cf3f1f89815764d1177081b99865", size = 3508920, upload-time = "2025-10-24T19:04:26.927Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/44/870d44b30e1dcfb6a65932e3e1506c103a8a5aea9103c337e7a53180322c/hf_xet-1.2.0-cp37-abi3-win_amd64.whl", hash = "sha256:e6584a52253f72c9f52f9e549d5895ca7a471608495c4ecaa6cc73dba2b24d69", size = 2905735, upload-time = "2025-10-24T19:04:35.928Z" },
 ]
 
 [[package]]
@@ -2012,11 +2036,11 @@ wheels = [
 
 [[package]]
 name = "lark"
-version = "1.3.0"
+version = "1.3.1"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/1d/37/a13baf0135f348af608c667633cbe5d13aa2c5c15a56ae9ad3e6cba45ae3/lark-1.3.0.tar.gz", hash = "sha256:9a3839d0ca5e1faf7cfa3460e420e859b66bcbde05b634e73c369c8244c5fa48", size = 259551, upload-time = "2025-09-22T13:45:05.072Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/da/34/28fff3ab31ccff1fd4f6c7c7b0ceb2b6968d8ea4950663eadcb5720591a0/lark-1.3.1.tar.gz", hash = "sha256:b426a7a6d6d53189d318f2b6236ab5d6429eaf09259f1ca33eb716eed10d2905", size = 382732, upload-time = "2025-10-27T18:25:56.653Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/a8/3e/1c6b43277de64fc3c0333b0e72ab7b52ddaaea205210d60d9b9f83c3d0c7/lark-1.3.0-py3-none-any.whl", hash = "sha256:80661f261fb2584a9828a097a2432efd575af27d20be0fd35d17f0fe37253831", size = 113002, upload-time = "2025-09-22T13:45:03.747Z" },
+    { url = "https://files.pythonhosted.org/packages/82/3d/14ce75ef66813643812f3093ab17e46d3a206942ce7376d31ec2d36229e7/lark-1.3.1-py3-none-any.whl", hash = "sha256:c629b661023a014c37da873b4ff58a817398d12635d3bbb2c5a03be7fe5d1e12", size = 113151, upload-time = "2025-10-27T18:25:54.882Z" },
 ]
 
 [[package]]
@@ -2426,7 +2450,7 @@ requires-dist = [
     { name = "torch" },
     { name = "tqdm", marker = "extra == 'dev'" },
     { name = "tqdm", marker = "extra == 'lts'" },
-    { name = "transformer-engine", extras = ["pytorch"], marker = "extra == 'dev'", git = "https://github.com/NVIDIA/TransformerEngine.git?rev=release_v2.8" },
+    { name = "transformer-engine", extras = ["pytorch"], marker = "extra == 'dev'", git = "https://github.com/NVIDIA/TransformerEngine.git?rev=release_v2.9" },
     { name = "transformers", marker = "extra == 'lts'" },
     { name = "transformers", marker = "extra == 'mlm'" },
     { name = "wandb", marker = "extra == 'mlm'" },
@@ -2469,7 +2493,7 @@ linting = [
 ]
 test = [
     { name = "coverage" },
-    { name = "nemo-run", git = "https://github.com/NVIDIA-NeMo/Run.git?rev=8ca8f7952a597f944985f1f1368a7acb9aa3a6c2" },
+    { name = "nemo-run", git = "https://github.com/NVIDIA-NeMo/Run.git?rev=01a9a8ba360f7b2908728ad0516e0ad9d936966d" },
     { name = "nltk" },
     { name = "pydantic" },
     { name = "pygithub" },
@@ -2887,7 +2911,7 @@ wheels = [
 [[package]]
 name = "nemo-run"
 version = "0.7.0rc0.dev0"
-source = { git = "https://github.com/NVIDIA-NeMo/Run.git?rev=8ca8f7952a597f944985f1f1368a7acb9aa3a6c2#8ca8f7952a597f944985f1f1368a7acb9aa3a6c2" }
+source = { git = "https://github.com/NVIDIA-NeMo/Run.git?rev=01a9a8ba360f7b2908728ad0516e0ad9d936966d#01a9a8ba360f7b2908728ad0516e0ad9d936966d" }
 dependencies = [
     { name = "catalogue" },
     { name = "cryptography" },
@@ -3296,7 +3320,7 @@ dependencies = [
     { name = "rich" },
     { name = "safetensors" },
     { name = "scipy", version = "1.15.3", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "scipy", version = "1.16.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "scipy", version = "1.16.3", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "torch", marker = "sys_platform == 'never'" },
     { name = "torchprofile" },
     { name = "torchvision", marker = "sys_platform == 'never'" },
@@ -3551,7 +3575,7 @@ wheels = [
 
 [[package]]
 name = "onnx-ir"
-version = "0.1.11"
+version = "0.1.12"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
     "python_full_version == '3.12.*' and sys_platform == 'linux'",
@@ -3567,9 +3591,9 @@ dependencies = [
     { name = "onnx", version = "1.19.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.13'" },
     { name = "typing-extensions", marker = "python_full_version < '3.13'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/4b/c4/d7c52d89120ae2d90025bf30999f44ec029bb297be706ada81a2b7ce3e73/onnx_ir-0.1.11.tar.gz", hash = "sha256:05fd55f7548f4301a17476c53e19c16f92f4fc4c0f468fcd8d3afb6869f8ae75", size = 112093, upload-time = "2025-10-15T22:20:46.785Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/6c/1a/2a94112a39d01a9d1490f5ef3c205d8a17fe1ca27f307b026c40d62d8e9f/onnx_ir-0.1.12.tar.gz", hash = "sha256:742e0bff875d0547724187560b3f441833191c8aa939c05f14176f4892784deb", size = 112699, upload-time = "2025-10-28T23:43:54.129Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/3a/de/a9bb49f36e2d27ff2b1941972ce01838c9032155256e3380960c6f545455/onnx_ir-0.1.11-py3-none-any.whl", hash = "sha256:f23edd0d3f49b92abfab275625cb325da3978f5b41ba8cdaa28e85e87b44d2c1", size = 128694, upload-time = "2025-10-15T22:20:45.208Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/36/c4df116f5dcaa82ec7944e5d25624a3811f6603fd190660b0b079ea759fb/onnx_ir-0.1.12-py3-none-any.whl", hash = "sha256:17f86faf8a53b979430bde1bc6022c7a162b0d1534550ddb17a1d37eb993e765", size = 129277, upload-time = "2025-10-28T23:43:52.493Z" },
 ]
 
 [[package]]
@@ -3613,7 +3637,7 @@ dependencies = [
     { name = "ml-dtypes", version = "0.5.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.13'" },
     { name = "numpy", marker = "python_full_version < '3.13'" },
     { name = "onnx", version = "1.19.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.13'" },
-    { name = "onnx-ir", version = "0.1.11", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.13'" },
+    { name = "onnx-ir", version = "0.1.12", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.13'" },
     { name = "packaging", marker = "python_full_version < '3.13'" },
     { name = "typing-extensions", marker = "python_full_version < '3.13'" },
 ]
@@ -4031,18 +4055,28 @@ wheels = [
 
 [[package]]
 name = "psutil"
-version = "7.1.1"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/89/fc/889242351a932d6183eec5df1fc6539b6f36b6a88444f1e63f18668253aa/psutil-7.1.1.tar.gz", hash = "sha256:092b6350145007389c1cfe5716050f02030a05219d90057ea867d18fe8d372fc", size = 487067, upload-time = "2025-10-19T15:43:59.373Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/51/30/f97f8fb1f9ecfbeae4b5ca738dcae66ab28323b5cfbc96cb5565f3754056/psutil-7.1.1-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:8fa59d7b1f01f0337f12cd10dbd76e4312a4d3c730a4fedcbdd4e5447a8b8460", size = 244221, upload-time = "2025-10-19T15:44:03.145Z" },
-    { url = "https://files.pythonhosted.org/packages/7b/98/b8d1f61ebf35f4dbdbaabadf9208282d8adc820562f0257e5e6e79e67bf2/psutil-7.1.1-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:2a95104eae85d088891716db676f780c1404fc15d47fde48a46a5d61e8f5ad2c", size = 245660, upload-time = "2025-10-19T15:44:05.657Z" },
-    { url = "https://files.pythonhosted.org/packages/f0/4a/b8015d7357fefdfe34bc4a3db48a107bae4bad0b94fb6eb0613f09a08ada/psutil-7.1.1-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:98629cd8567acefcc45afe2f4ba1e9290f579eacf490a917967decce4b74ee9b", size = 286963, upload-time = "2025-10-19T15:44:08.877Z" },
-    { url = "https://files.pythonhosted.org/packages/3d/3c/b56076bb35303d0733fc47b110a1c9cce081a05ae2e886575a3587c1ee76/psutil-7.1.1-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:92ebc58030fb054fa0f26c3206ef01c31c29d67aee1367e3483c16665c25c8d2", size = 290118, upload-time = "2025-10-19T15:44:11.897Z" },
-    { url = "https://files.pythonhosted.org/packages/dc/af/c13d360c0adc6f6218bf9e2873480393d0f729c8dd0507d171f53061c0d3/psutil-7.1.1-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:146a704f224fb2ded2be3da5ac67fc32b9ea90c45b51676f9114a6ac45616967", size = 292587, upload-time = "2025-10-19T15:44:14.67Z" },
-    { url = "https://files.pythonhosted.org/packages/90/2d/c933e7071ba60c7862813f2c7108ec4cf8304f1c79660efeefd0de982258/psutil-7.1.1-cp37-abi3-win32.whl", hash = "sha256:295c4025b5cd880f7445e4379e6826f7307e3d488947bf9834e865e7847dc5f7", size = 243772, upload-time = "2025-10-19T15:44:16.938Z" },
-    { url = "https://files.pythonhosted.org/packages/be/f3/11fd213fff15427bc2853552138760c720fd65032d99edfb161910d04127/psutil-7.1.1-cp37-abi3-win_amd64.whl", hash = "sha256:9b4f17c5f65e44f69bd3a3406071a47b79df45cf2236d1f717970afcb526bcd3", size = 246936, upload-time = "2025-10-19T15:44:18.663Z" },
-    { url = "https://files.pythonhosted.org/packages/0a/8d/8a9a45c8b655851f216c1d44f68e3533dc8d2c752ccd0f61f1aa73be4893/psutil-7.1.1-cp37-abi3-win_arm64.whl", hash = "sha256:5457cf741ca13da54624126cd5d333871b454ab133999a9a103fb097a7d7d21a", size = 243944, upload-time = "2025-10-19T15:44:20.666Z" },
+version = "7.1.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/cd/ec/7b8e6b9b1d22708138630ef34c53ab2b61032c04f16adfdbb96791c8c70c/psutil-7.1.2.tar.gz", hash = "sha256:aa225cdde1335ff9684708ee8c72650f6598d5ed2114b9a7c5802030b1785018", size = 487424, upload-time = "2025-10-25T10:46:34.931Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b8/d9/b56cc9f883140ac10021a8c9b0f4e16eed1ba675c22513cdcbce3ba64014/psutil-7.1.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0cc5c6889b9871f231ed5455a9a02149e388fffcb30b607fb7a8896a6d95f22e", size = 238575, upload-time = "2025-10-25T10:46:38.728Z" },
+    { url = "https://files.pythonhosted.org/packages/36/eb/28d22de383888deb252c818622196e709da98816e296ef95afda33f1c0a2/psutil-7.1.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:8e9e77a977208d84aa363a4a12e0f72189d58bbf4e46b49aae29a2c6e93ef206", size = 239297, upload-time = "2025-10-25T10:46:41.347Z" },
+    { url = "https://files.pythonhosted.org/packages/89/5d/220039e2f28cc129626e54d63892ab05c0d56a29818bfe7268dcb5008932/psutil-7.1.2-cp313-cp313t-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7d9623a5e4164d2220ecceb071f4b333b3c78866141e8887c072129185f41278", size = 280420, upload-time = "2025-10-25T10:46:44.122Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/7a/286f0e1c167445b2ef4a6cbdfc8c59fdb45a5a493788950cf8467201dc73/psutil-7.1.2-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:364b1c10fe4ed59c89ec49e5f1a70da353b27986fa8233b4b999df4742a5ee2f", size = 283049, upload-time = "2025-10-25T10:46:47.095Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/cc/7eb93260794a42e39b976f3a4dde89725800b9f573b014fac142002a5c98/psutil-7.1.2-cp313-cp313t-win_amd64.whl", hash = "sha256:f101ef84de7e05d41310e3ccbdd65a6dd1d9eed85e8aaf0758405d022308e204", size = 248713, upload-time = "2025-10-25T10:46:49.573Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/1a/0681a92b53366e01f0a099f5237d0c8a2f79d322ac589cccde5e30c8a4e2/psutil-7.1.2-cp313-cp313t-win_arm64.whl", hash = "sha256:20c00824048a95de67f00afedc7b08b282aa08638585b0206a9fb51f28f1a165", size = 244644, upload-time = "2025-10-25T10:46:51.924Z" },
+    { url = "https://files.pythonhosted.org/packages/56/9e/f1c5c746b4ed5320952acd3002d3962fe36f30524c00ea79fdf954cc6779/psutil-7.1.2-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:e09cfe92aa8e22b1ec5e2d394820cf86c5dff6367ac3242366485dfa874d43bc", size = 238640, upload-time = "2025-10-25T10:46:54.089Z" },
+    { url = "https://files.pythonhosted.org/packages/32/ee/fd26216a735395cc25c3899634e34aeb41fb1f3dbb44acc67d9e594be562/psutil-7.1.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:fa6342cf859c48b19df3e4aa170e4cfb64aadc50b11e06bb569c6c777b089c9e", size = 239303, upload-time = "2025-10-25T10:46:56.932Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/cd/7d96eaec4ef7742b845a9ce2759a2769ecce4ab7a99133da24abacbc9e41/psutil-7.1.2-cp314-cp314t-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:625977443498ee7d6c1e63e93bacca893fd759a66c5f635d05e05811d23fb5ee", size = 281717, upload-time = "2025-10-25T10:46:59.116Z" },
+    { url = "https://files.pythonhosted.org/packages/bc/1a/7f0b84bdb067d35fe7fade5fff888408688caf989806ce2d6dae08c72dd5/psutil-7.1.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4a24bcd7b7f2918d934af0fb91859f621b873d6aa81267575e3655cd387572a7", size = 284575, upload-time = "2025-10-25T10:47:00.944Z" },
+    { url = "https://files.pythonhosted.org/packages/de/05/7820ef8f7b275268917e0c750eada5834581206d9024ca88edce93c4b762/psutil-7.1.2-cp314-cp314t-win_amd64.whl", hash = "sha256:329f05610da6380982e6078b9d0881d9ab1e9a7eb7c02d833bfb7340aa634e31", size = 249491, upload-time = "2025-10-25T10:47:03.174Z" },
+    { url = "https://files.pythonhosted.org/packages/db/9a/58de399c7cb58489f08498459ff096cd76b3f1ddc4f224ec2c5ef729c7d0/psutil-7.1.2-cp314-cp314t-win_arm64.whl", hash = "sha256:7b04c29e3c0c888e83ed4762b70f31e65c42673ea956cefa8ced0e31e185f582", size = 244880, upload-time = "2025-10-25T10:47:05.228Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/89/b9f8d47ddbc52d7301fc868e8224e5f44ed3c7f55e6d0f54ecaf5dd9ff5e/psutil-7.1.2-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:c9ba5c19f2d46203ee8c152c7b01df6eec87d883cfd8ee1af2ef2727f6b0f814", size = 237244, upload-time = "2025-10-25T10:47:07.086Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/7a/8628c2f6b240680a67d73d8742bb9ff39b1820a693740e43096d5dcb01e5/psutil-7.1.2-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:2a486030d2fe81bec023f703d3d155f4823a10a47c36784c84f1cc7f8d39bedb", size = 238101, upload-time = "2025-10-25T10:47:09.523Z" },
+    { url = "https://files.pythonhosted.org/packages/30/28/5e27f4d5a0e347f8e3cc16cd7d35533dbce086c95807f1f0e9cd77e26c10/psutil-7.1.2-cp36-abi3-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3efd8fc791492e7808a51cb2b94889db7578bfaea22df931424f874468e389e3", size = 258675, upload-time = "2025-10-25T10:47:11.082Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/5c/79cf60c9acf36d087f0db0f82066fca4a780e97e5b3a2e4c38209c03d170/psutil-7.1.2-cp36-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e2aeb9b64f481b8eabfc633bd39e0016d4d8bbcd590d984af764d80bf0851b8a", size = 260203, upload-time = "2025-10-25T10:47:13.226Z" },
+    { url = "https://files.pythonhosted.org/packages/f7/03/0a464404c51685dcb9329fdd660b1721e076ccd7b3d97dee066bcc9ffb15/psutil-7.1.2-cp37-abi3-win_amd64.whl", hash = "sha256:8e17852114c4e7996fe9da4745c2bdef001ebbf2f260dec406290e66628bdb91", size = 246714, upload-time = "2025-10-25T10:47:15.093Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/32/97ca2090f2f1b45b01b6aa7ae161cfe50671de097311975ca6eea3e7aabc/psutil-7.1.2-cp37-abi3-win_arm64.whl", hash = "sha256:3e988455e61c240cc879cb62a008c2699231bf3e3d061d7fce4234463fd2abb4", size = 243742, upload-time = "2025-10-25T10:47:17.302Z" },
 ]
 
 [[package]]
@@ -4056,45 +4090,59 @@ wheels = [
 
 [[package]]
 name = "pyarrow"
-version = "21.0.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/ef/c2/ea068b8f00905c06329a3dfcd40d0fcc2b7d0f2e355bdb25b65e0a0e4cd4/pyarrow-21.0.0.tar.gz", hash = "sha256:5051f2dccf0e283ff56335760cbc8622cf52264d67e359d5569541ac11b6d5bc", size = 1133487, upload-time = "2025-07-18T00:57:31.761Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/17/d9/110de31880016e2afc52d8580b397dbe47615defbf09ca8cf55f56c62165/pyarrow-21.0.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:e563271e2c5ff4d4a4cbeb2c83d5cf0d4938b891518e676025f7268c6fe5fe26", size = 31196837, upload-time = "2025-07-18T00:54:34.755Z" },
-    { url = "https://files.pythonhosted.org/packages/df/5f/c1c1997613abf24fceb087e79432d24c19bc6f7259cab57c2c8e5e545fab/pyarrow-21.0.0-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:fee33b0ca46f4c85443d6c450357101e47d53e6c3f008d658c27a2d020d44c79", size = 32659470, upload-time = "2025-07-18T00:54:38.329Z" },
-    { url = "https://files.pythonhosted.org/packages/3e/ed/b1589a777816ee33ba123ba1e4f8f02243a844fed0deec97bde9fb21a5cf/pyarrow-21.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:7be45519b830f7c24b21d630a31d48bcebfd5d4d7f9d3bdb49da9cdf6d764edb", size = 41055619, upload-time = "2025-07-18T00:54:42.172Z" },
-    { url = "https://files.pythonhosted.org/packages/44/28/b6672962639e85dc0ac36f71ab3a8f5f38e01b51343d7aa372a6b56fa3f3/pyarrow-21.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:26bfd95f6bff443ceae63c65dc7e048670b7e98bc892210acba7e4995d3d4b51", size = 42733488, upload-time = "2025-07-18T00:54:47.132Z" },
-    { url = "https://files.pythonhosted.org/packages/f8/cc/de02c3614874b9089c94eac093f90ca5dfa6d5afe45de3ba847fd950fdf1/pyarrow-21.0.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:bd04ec08f7f8bd113c55868bd3fc442a9db67c27af098c5f814a3091e71cc61a", size = 43329159, upload-time = "2025-07-18T00:54:51.686Z" },
-    { url = "https://files.pythonhosted.org/packages/a6/3e/99473332ac40278f196e105ce30b79ab8affab12f6194802f2593d6b0be2/pyarrow-21.0.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:9b0b14b49ac10654332a805aedfc0147fb3469cbf8ea951b3d040dab12372594", size = 45050567, upload-time = "2025-07-18T00:54:56.679Z" },
-    { url = "https://files.pythonhosted.org/packages/7b/f5/c372ef60593d713e8bfbb7e0c743501605f0ad00719146dc075faf11172b/pyarrow-21.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:9d9f8bcb4c3be7738add259738abdeddc363de1b80e3310e04067aa1ca596634", size = 26217959, upload-time = "2025-07-18T00:55:00.482Z" },
-    { url = "https://files.pythonhosted.org/packages/94/dc/80564a3071a57c20b7c32575e4a0120e8a330ef487c319b122942d665960/pyarrow-21.0.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:c077f48aab61738c237802836fc3844f85409a46015635198761b0d6a688f87b", size = 31243234, upload-time = "2025-07-18T00:55:03.812Z" },
-    { url = "https://files.pythonhosted.org/packages/ea/cc/3b51cb2db26fe535d14f74cab4c79b191ed9a8cd4cbba45e2379b5ca2746/pyarrow-21.0.0-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:689f448066781856237eca8d1975b98cace19b8dd2ab6145bf49475478bcaa10", size = 32714370, upload-time = "2025-07-18T00:55:07.495Z" },
-    { url = "https://files.pythonhosted.org/packages/24/11/a4431f36d5ad7d83b87146f515c063e4d07ef0b7240876ddb885e6b44f2e/pyarrow-21.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:479ee41399fcddc46159a551705b89c05f11e8b8cb8e968f7fec64f62d91985e", size = 41135424, upload-time = "2025-07-18T00:55:11.461Z" },
-    { url = "https://files.pythonhosted.org/packages/74/dc/035d54638fc5d2971cbf1e987ccd45f1091c83bcf747281cf6cc25e72c88/pyarrow-21.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:40ebfcb54a4f11bcde86bc586cbd0272bac0d516cfa539c799c2453768477569", size = 42823810, upload-time = "2025-07-18T00:55:16.301Z" },
-    { url = "https://files.pythonhosted.org/packages/2e/3b/89fced102448a9e3e0d4dded1f37fa3ce4700f02cdb8665457fcc8015f5b/pyarrow-21.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8d58d8497814274d3d20214fbb24abcad2f7e351474357d552a8d53bce70c70e", size = 43391538, upload-time = "2025-07-18T00:55:23.82Z" },
-    { url = "https://files.pythonhosted.org/packages/fb/bb/ea7f1bd08978d39debd3b23611c293f64a642557e8141c80635d501e6d53/pyarrow-21.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:585e7224f21124dd57836b1530ac8f2df2afc43c861d7bf3d58a4870c42ae36c", size = 45120056, upload-time = "2025-07-18T00:55:28.231Z" },
-    { url = "https://files.pythonhosted.org/packages/6e/0b/77ea0600009842b30ceebc3337639a7380cd946061b620ac1a2f3cb541e2/pyarrow-21.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:555ca6935b2cbca2c0e932bedd853e9bc523098c39636de9ad4693b5b1df86d6", size = 26220568, upload-time = "2025-07-18T00:55:32.122Z" },
-    { url = "https://files.pythonhosted.org/packages/ca/d4/d4f817b21aacc30195cf6a46ba041dd1be827efa4a623cc8bf39a1c2a0c0/pyarrow-21.0.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:3a302f0e0963db37e0a24a70c56cf91a4faa0bca51c23812279ca2e23481fccd", size = 31160305, upload-time = "2025-07-18T00:55:35.373Z" },
-    { url = "https://files.pythonhosted.org/packages/a2/9c/dcd38ce6e4b4d9a19e1d36914cb8e2b1da4e6003dd075474c4cfcdfe0601/pyarrow-21.0.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:b6b27cf01e243871390474a211a7922bfbe3bda21e39bc9160daf0da3fe48876", size = 32684264, upload-time = "2025-07-18T00:55:39.303Z" },
-    { url = "https://files.pythonhosted.org/packages/4f/74/2a2d9f8d7a59b639523454bec12dba35ae3d0a07d8ab529dc0809f74b23c/pyarrow-21.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:e72a8ec6b868e258a2cd2672d91f2860ad532d590ce94cdf7d5e7ec674ccf03d", size = 41108099, upload-time = "2025-07-18T00:55:42.889Z" },
-    { url = "https://files.pythonhosted.org/packages/ad/90/2660332eeb31303c13b653ea566a9918484b6e4d6b9d2d46879a33ab0622/pyarrow-21.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:b7ae0bbdc8c6674259b25bef5d2a1d6af5d39d7200c819cf99e07f7dfef1c51e", size = 42829529, upload-time = "2025-07-18T00:55:47.069Z" },
-    { url = "https://files.pythonhosted.org/packages/33/27/1a93a25c92717f6aa0fca06eb4700860577d016cd3ae51aad0e0488ac899/pyarrow-21.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:58c30a1729f82d201627c173d91bd431db88ea74dcaa3885855bc6203e433b82", size = 43367883, upload-time = "2025-07-18T00:55:53.069Z" },
-    { url = "https://files.pythonhosted.org/packages/05/d9/4d09d919f35d599bc05c6950095e358c3e15148ead26292dfca1fb659b0c/pyarrow-21.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:072116f65604b822a7f22945a7a6e581cfa28e3454fdcc6939d4ff6090126623", size = 45133802, upload-time = "2025-07-18T00:55:57.714Z" },
-    { url = "https://files.pythonhosted.org/packages/71/30/f3795b6e192c3ab881325ffe172e526499eb3780e306a15103a2764916a2/pyarrow-21.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:cf56ec8b0a5c8c9d7021d6fd754e688104f9ebebf1bf4449613c9531f5346a18", size = 26203175, upload-time = "2025-07-18T00:56:01.364Z" },
-    { url = "https://files.pythonhosted.org/packages/16/ca/c7eaa8e62db8fb37ce942b1ea0c6d7abfe3786ca193957afa25e71b81b66/pyarrow-21.0.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:e99310a4ebd4479bcd1964dff9e14af33746300cb014aa4a3781738ac63baf4a", size = 31154306, upload-time = "2025-07-18T00:56:04.42Z" },
-    { url = "https://files.pythonhosted.org/packages/ce/e8/e87d9e3b2489302b3a1aea709aaca4b781c5252fcb812a17ab6275a9a484/pyarrow-21.0.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:d2fe8e7f3ce329a71b7ddd7498b3cfac0eeb200c2789bd840234f0dc271a8efe", size = 32680622, upload-time = "2025-07-18T00:56:07.505Z" },
-    { url = "https://files.pythonhosted.org/packages/84/52/79095d73a742aa0aba370c7942b1b655f598069489ab387fe47261a849e1/pyarrow-21.0.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:f522e5709379d72fb3da7785aa489ff0bb87448a9dc5a75f45763a795a089ebd", size = 41104094, upload-time = "2025-07-18T00:56:10.994Z" },
-    { url = "https://files.pythonhosted.org/packages/89/4b/7782438b551dbb0468892a276b8c789b8bbdb25ea5c5eb27faadd753e037/pyarrow-21.0.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:69cbbdf0631396e9925e048cfa5bce4e8c3d3b41562bbd70c685a8eb53a91e61", size = 42825576, upload-time = "2025-07-18T00:56:15.569Z" },
-    { url = "https://files.pythonhosted.org/packages/b3/62/0f29de6e0a1e33518dec92c65be0351d32d7ca351e51ec5f4f837a9aab91/pyarrow-21.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:731c7022587006b755d0bdb27626a1a3bb004bb56b11fb30d98b6c1b4718579d", size = 43368342, upload-time = "2025-07-18T00:56:19.531Z" },
-    { url = "https://files.pythonhosted.org/packages/90/c7/0fa1f3f29cf75f339768cc698c8ad4ddd2481c1742e9741459911c9ac477/pyarrow-21.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:dc56bc708f2d8ac71bd1dcb927e458c93cec10b98eb4120206a4091db7b67b99", size = 45131218, upload-time = "2025-07-18T00:56:23.347Z" },
-    { url = "https://files.pythonhosted.org/packages/01/63/581f2076465e67b23bc5a37d4a2abff8362d389d29d8105832e82c9c811c/pyarrow-21.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:186aa00bca62139f75b7de8420f745f2af12941595bbbfa7ed3870ff63e25636", size = 26087551, upload-time = "2025-07-18T00:56:26.758Z" },
-    { url = "https://files.pythonhosted.org/packages/c9/ab/357d0d9648bb8241ee7348e564f2479d206ebe6e1c47ac5027c2e31ecd39/pyarrow-21.0.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:a7a102574faa3f421141a64c10216e078df467ab9576684d5cd696952546e2da", size = 31290064, upload-time = "2025-07-18T00:56:30.214Z" },
-    { url = "https://files.pythonhosted.org/packages/3f/8a/5685d62a990e4cac2043fc76b4661bf38d06efed55cf45a334b455bd2759/pyarrow-21.0.0-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:1e005378c4a2c6db3ada3ad4c217b381f6c886f0a80d6a316fe586b90f77efd7", size = 32727837, upload-time = "2025-07-18T00:56:33.935Z" },
-    { url = "https://files.pythonhosted.org/packages/fc/de/c0828ee09525c2bafefd3e736a248ebe764d07d0fd762d4f0929dbc516c9/pyarrow-21.0.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:65f8e85f79031449ec8706b74504a316805217b35b6099155dd7e227eef0d4b6", size = 41014158, upload-time = "2025-07-18T00:56:37.528Z" },
-    { url = "https://files.pythonhosted.org/packages/6e/26/a2865c420c50b7a3748320b614f3484bfcde8347b2639b2b903b21ce6a72/pyarrow-21.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:3a81486adc665c7eb1a2bde0224cfca6ceaba344a82a971ef059678417880eb8", size = 42667885, upload-time = "2025-07-18T00:56:41.483Z" },
-    { url = "https://files.pythonhosted.org/packages/0a/f9/4ee798dc902533159250fb4321267730bc0a107d8c6889e07c3add4fe3a5/pyarrow-21.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:fc0d2f88b81dcf3ccf9a6ae17f89183762c8a94a5bdcfa09e05cfe413acf0503", size = 43276625, upload-time = "2025-07-18T00:56:48.002Z" },
-    { url = "https://files.pythonhosted.org/packages/5a/da/e02544d6997037a4b0d22d8e5f66bc9315c3671371a8b18c79ade1cefe14/pyarrow-21.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:6299449adf89df38537837487a4f8d3bd91ec94354fdd2a7d30bc11c48ef6e79", size = 44951890, upload-time = "2025-07-18T00:56:52.568Z" },
-    { url = "https://files.pythonhosted.org/packages/e5/4e/519c1bc1876625fe6b71e9a28287c43ec2f20f73c658b9ae1d485c0c206e/pyarrow-21.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:222c39e2c70113543982c6b34f3077962b44fca38c0bd9e68bb6781534425c10", size = 26371006, upload-time = "2025-07-18T00:56:56.379Z" },
+version = "22.0.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/30/53/04a7fdc63e6056116c9ddc8b43bc28c12cdd181b85cbeadb79278475f3ae/pyarrow-22.0.0.tar.gz", hash = "sha256:3d600dc583260d845c7d8a6db540339dd883081925da2bd1c5cb808f720b3cd9", size = 1151151, upload-time = "2025-10-24T12:30:00.762Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d9/9b/cb3f7e0a345353def531ca879053e9ef6b9f38ed91aebcf68b09ba54dec0/pyarrow-22.0.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:77718810bd3066158db1e95a63c160ad7ce08c6b0710bc656055033e39cdad88", size = 34223968, upload-time = "2025-10-24T10:03:31.21Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/41/3184b8192a120306270c5307f105b70320fdaa592c99843c5ef78aaefdcf/pyarrow-22.0.0-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:44d2d26cda26d18f7af7db71453b7b783788322d756e81730acb98f24eb90ace", size = 35942085, upload-time = "2025-10-24T10:03:38.146Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/3d/a1eab2f6f08001f9fb714b8ed5cfb045e2fe3e3e3c0c221f2c9ed1e6d67d/pyarrow-22.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:b9d71701ce97c95480fecb0039ec5bb889e75f110da72005743451339262f4ce", size = 44964613, upload-time = "2025-10-24T10:03:46.516Z" },
+    { url = "https://files.pythonhosted.org/packages/46/46/a1d9c24baf21cfd9ce994ac820a24608decf2710521b29223d4334985127/pyarrow-22.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:710624ab925dc2b05a6229d47f6f0dac1c1155e6ed559be7109f684eba048a48", size = 47627059, upload-time = "2025-10-24T10:03:55.353Z" },
+    { url = "https://files.pythonhosted.org/packages/3a/4c/f711acb13075c1391fd54bc17e078587672c575f8de2a6e62509af026dcf/pyarrow-22.0.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:f963ba8c3b0199f9d6b794c90ec77545e05eadc83973897a4523c9e8d84e9340", size = 47947043, upload-time = "2025-10-24T10:04:05.408Z" },
+    { url = "https://files.pythonhosted.org/packages/4e/70/1f3180dd7c2eab35c2aca2b29ace6c519f827dcd4cfeb8e0dca41612cf7a/pyarrow-22.0.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:bd0d42297ace400d8febe55f13fdf46e86754842b860c978dfec16f081e5c653", size = 50206505, upload-time = "2025-10-24T10:04:15.786Z" },
+    { url = "https://files.pythonhosted.org/packages/80/07/fea6578112c8c60ffde55883a571e4c4c6bc7049f119d6b09333b5cc6f73/pyarrow-22.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:00626d9dc0f5ef3a75fe63fd68b9c7c8302d2b5bbc7f74ecaedba83447a24f84", size = 28101641, upload-time = "2025-10-24T10:04:22.57Z" },
+    { url = "https://files.pythonhosted.org/packages/2e/b7/18f611a8cdc43417f9394a3ccd3eace2f32183c08b9eddc3d17681819f37/pyarrow-22.0.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:3e294c5eadfb93d78b0763e859a0c16d4051fc1c5231ae8956d61cb0b5666f5a", size = 34272022, upload-time = "2025-10-24T10:04:28.973Z" },
+    { url = "https://files.pythonhosted.org/packages/26/5c/f259e2526c67eb4b9e511741b19870a02363a47a35edbebc55c3178db22d/pyarrow-22.0.0-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:69763ab2445f632d90b504a815a2a033f74332997052b721002298ed6de40f2e", size = 35995834, upload-time = "2025-10-24T10:04:35.467Z" },
+    { url = "https://files.pythonhosted.org/packages/50/8d/281f0f9b9376d4b7f146913b26fac0aa2829cd1ee7e997f53a27411bbb92/pyarrow-22.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:b41f37cabfe2463232684de44bad753d6be08a7a072f6a83447eeaf0e4d2a215", size = 45030348, upload-time = "2025-10-24T10:04:43.366Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/e5/53c0a1c428f0976bf22f513d79c73000926cb00b9c138d8e02daf2102e18/pyarrow-22.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:35ad0f0378c9359b3f297299c3309778bb03b8612f987399a0333a560b43862d", size = 47699480, upload-time = "2025-10-24T10:04:51.486Z" },
+    { url = "https://files.pythonhosted.org/packages/95/e1/9dbe4c465c3365959d183e6345d0a8d1dc5b02ca3f8db4760b3bc834cf25/pyarrow-22.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8382ad21458075c2e66a82a29d650f963ce51c7708c7c0ff313a8c206c4fd5e8", size = 48011148, upload-time = "2025-10-24T10:04:59.585Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/b4/7caf5d21930061444c3cf4fa7535c82faf5263e22ce43af7c2759ceb5b8b/pyarrow-22.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:1a812a5b727bc09c3d7ea072c4eebf657c2f7066155506ba31ebf4792f88f016", size = 50276964, upload-time = "2025-10-24T10:05:08.175Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/f3/cec89bd99fa3abf826f14d4e53d3d11340ce6f6af4d14bdcd54cd83b6576/pyarrow-22.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:ec5d40dd494882704fb876c16fa7261a69791e784ae34e6b5992e977bd2e238c", size = 28106517, upload-time = "2025-10-24T10:05:14.314Z" },
+    { url = "https://files.pythonhosted.org/packages/af/63/ba23862d69652f85b615ca14ad14f3bcfc5bf1b99ef3f0cd04ff93fdad5a/pyarrow-22.0.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:bea79263d55c24a32b0d79c00a1c58bb2ee5f0757ed95656b01c0fb310c5af3d", size = 34211578, upload-time = "2025-10-24T10:05:21.583Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/d0/f9ad86fe809efd2bcc8be32032fa72e8b0d112b01ae56a053006376c5930/pyarrow-22.0.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:12fe549c9b10ac98c91cf791d2945e878875d95508e1a5d14091a7aaa66d9cf8", size = 35989906, upload-time = "2025-10-24T10:05:29.485Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/a8/f910afcb14630e64d673f15904ec27dd31f1e009b77033c365c84e8c1e1d/pyarrow-22.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:334f900ff08ce0423407af97e6c26ad5d4e3b0763645559ece6fbf3747d6a8f5", size = 45021677, upload-time = "2025-10-24T10:05:38.274Z" },
+    { url = "https://files.pythonhosted.org/packages/13/95/aec81f781c75cd10554dc17a25849c720d54feafb6f7847690478dcf5ef8/pyarrow-22.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:c6c791b09c57ed76a18b03f2631753a4960eefbbca80f846da8baefc6491fcfe", size = 47726315, upload-time = "2025-10-24T10:05:47.314Z" },
+    { url = "https://files.pythonhosted.org/packages/bb/d4/74ac9f7a54cfde12ee42734ea25d5a3c9a45db78f9def949307a92720d37/pyarrow-22.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:c3200cb41cdbc65156e5f8c908d739b0dfed57e890329413da2748d1a2cd1a4e", size = 47990906, upload-time = "2025-10-24T10:05:58.254Z" },
+    { url = "https://files.pythonhosted.org/packages/2e/71/fedf2499bf7a95062eafc989ace56572f3343432570e1c54e6599d5b88da/pyarrow-22.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ac93252226cf288753d8b46280f4edf3433bf9508b6977f8dd8526b521a1bbb9", size = 50306783, upload-time = "2025-10-24T10:06:08.08Z" },
+    { url = "https://files.pythonhosted.org/packages/68/ed/b202abd5a5b78f519722f3d29063dda03c114711093c1995a33b8e2e0f4b/pyarrow-22.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:44729980b6c50a5f2bfcc2668d36c569ce17f8b17bccaf470c4313dcbbf13c9d", size = 27972883, upload-time = "2025-10-24T10:06:14.204Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/d6/d0fac16a2963002fc22c8fa75180a838737203d558f0ed3b564c4a54eef5/pyarrow-22.0.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:e6e95176209257803a8b3d0394f21604e796dadb643d2f7ca21b66c9c0b30c9a", size = 34204629, upload-time = "2025-10-24T10:06:20.274Z" },
+    { url = "https://files.pythonhosted.org/packages/c6/9c/1d6357347fbae062ad3f17082f9ebc29cc733321e892c0d2085f42a2212b/pyarrow-22.0.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:001ea83a58024818826a9e3f89bf9310a114f7e26dfe404a4c32686f97bd7901", size = 35985783, upload-time = "2025-10-24T10:06:27.301Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/c0/782344c2ce58afbea010150df07e3a2f5fdad299cd631697ae7bd3bac6e3/pyarrow-22.0.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:ce20fe000754f477c8a9125543f1936ea5b8867c5406757c224d745ed033e691", size = 45020999, upload-time = "2025-10-24T10:06:35.387Z" },
+    { url = "https://files.pythonhosted.org/packages/1b/8b/5362443737a5307a7b67c1017c42cd104213189b4970bf607e05faf9c525/pyarrow-22.0.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:e0a15757fccb38c410947df156f9749ae4a3c89b2393741a50521f39a8cf202a", size = 47724601, upload-time = "2025-10-24T10:06:43.551Z" },
+    { url = "https://files.pythonhosted.org/packages/69/4d/76e567a4fc2e190ee6072967cb4672b7d9249ac59ae65af2d7e3047afa3b/pyarrow-22.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:cedb9dd9358e4ea1d9bce3665ce0797f6adf97ff142c8e25b46ba9cdd508e9b6", size = 48001050, upload-time = "2025-10-24T10:06:52.284Z" },
+    { url = "https://files.pythonhosted.org/packages/01/5e/5653f0535d2a1aef8223cee9d92944cb6bccfee5cf1cd3f462d7cb022790/pyarrow-22.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:252be4a05f9d9185bb8c18e83764ebcfea7185076c07a7a662253af3a8c07941", size = 50307877, upload-time = "2025-10-24T10:07:02.405Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/f8/1d0bd75bf9328a3b826e24a16e5517cd7f9fbf8d34a3184a4566ef5a7f29/pyarrow-22.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:a4893d31e5ef780b6edcaf63122df0f8d321088bb0dee4c8c06eccb1ca28d145", size = 27977099, upload-time = "2025-10-24T10:08:07.259Z" },
+    { url = "https://files.pythonhosted.org/packages/90/81/db56870c997805bf2b0f6eeeb2d68458bf4654652dccdcf1bf7a42d80903/pyarrow-22.0.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:f7fe3dbe871294ba70d789be16b6e7e52b418311e166e0e3cba9522f0f437fb1", size = 34336685, upload-time = "2025-10-24T10:07:11.47Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/98/0727947f199aba8a120f47dfc229eeb05df15bcd7a6f1b669e9f882afc58/pyarrow-22.0.0-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:ba95112d15fd4f1105fb2402c4eab9068f0554435e9b7085924bcfaac2cc306f", size = 36032158, upload-time = "2025-10-24T10:07:18.626Z" },
+    { url = "https://files.pythonhosted.org/packages/96/b4/9babdef9c01720a0785945c7cf550e4acd0ebcd7bdd2e6f0aa7981fa85e2/pyarrow-22.0.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:c064e28361c05d72eed8e744c9605cbd6d2bb7481a511c74071fd9b24bc65d7d", size = 44892060, upload-time = "2025-10-24T10:07:26.002Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/ca/2f8804edd6279f78a37062d813de3f16f29183874447ef6d1aadbb4efa0f/pyarrow-22.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:6f9762274496c244d951c819348afbcf212714902742225f649cf02823a6a10f", size = 47504395, upload-time = "2025-10-24T10:07:34.09Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/f0/77aa5198fd3943682b2e4faaf179a674f0edea0d55d326d83cb2277d9363/pyarrow-22.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:a9d9ffdc2ab696f6b15b4d1f7cec6658e1d788124418cb30030afbae31c64746", size = 48066216, upload-time = "2025-10-24T10:07:43.528Z" },
+    { url = "https://files.pythonhosted.org/packages/79/87/a1937b6e78b2aff18b706d738c9e46ade5bfcf11b294e39c87706a0089ac/pyarrow-22.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:ec1a15968a9d80da01e1d30349b2b0d7cc91e96588ee324ce1b5228175043e95", size = 50288552, upload-time = "2025-10-24T10:07:53.519Z" },
+    { url = "https://files.pythonhosted.org/packages/60/ae/b5a5811e11f25788ccfdaa8f26b6791c9807119dffcf80514505527c384c/pyarrow-22.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:bba208d9c7decf9961998edf5c65e3ea4355d5818dd6cd0f6809bec1afb951cc", size = 28262504, upload-time = "2025-10-24T10:08:00.932Z" },
+    { url = "https://files.pythonhosted.org/packages/bd/b0/0fa4d28a8edb42b0a7144edd20befd04173ac79819547216f8a9f36f9e50/pyarrow-22.0.0-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:9bddc2cade6561f6820d4cd73f99a0243532ad506bc510a75a5a65a522b2d74d", size = 34224062, upload-time = "2025-10-24T10:08:14.101Z" },
+    { url = "https://files.pythonhosted.org/packages/0f/a8/7a719076b3c1be0acef56a07220c586f25cd24de0e3f3102b438d18ae5df/pyarrow-22.0.0-cp314-cp314-macosx_12_0_x86_64.whl", hash = "sha256:e70ff90c64419709d38c8932ea9fe1cc98415c4f87ea8da81719e43f02534bc9", size = 35990057, upload-time = "2025-10-24T10:08:21.842Z" },
+    { url = "https://files.pythonhosted.org/packages/89/3c/359ed54c93b47fb6fe30ed16cdf50e3f0e8b9ccfb11b86218c3619ae50a8/pyarrow-22.0.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:92843c305330aa94a36e706c16209cd4df274693e777ca47112617db7d0ef3d7", size = 45068002, upload-time = "2025-10-24T10:08:29.034Z" },
+    { url = "https://files.pythonhosted.org/packages/55/fc/4945896cc8638536ee787a3bd6ce7cec8ec9acf452d78ec39ab328efa0a1/pyarrow-22.0.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:6dda1ddac033d27421c20d7a7943eec60be44e0db4e079f33cc5af3b8280ccde", size = 47737765, upload-time = "2025-10-24T10:08:38.559Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/5e/7cb7edeb2abfaa1f79b5d5eb89432356155c8426f75d3753cbcb9592c0fd/pyarrow-22.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:84378110dd9a6c06323b41b56e129c504d157d1a983ce8f5443761eb5256bafc", size = 48048139, upload-time = "2025-10-24T10:08:46.784Z" },
+    { url = "https://files.pythonhosted.org/packages/88/c6/546baa7c48185f5e9d6e59277c4b19f30f48c94d9dd938c2a80d4d6b067c/pyarrow-22.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:854794239111d2b88b40b6ef92aa478024d1e5074f364033e73e21e3f76b25e0", size = 50314244, upload-time = "2025-10-24T10:08:55.771Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/79/755ff2d145aafec8d347bf18f95e4e81c00127f06d080135dfc86aea417c/pyarrow-22.0.0-cp314-cp314-win_amd64.whl", hash = "sha256:b883fe6fd85adad7932b3271c38ac289c65b7337c2c132e9569f9d3940620730", size = 28757501, upload-time = "2025-10-24T10:09:59.891Z" },
+    { url = "https://files.pythonhosted.org/packages/0e/d2/237d75ac28ced3147912954e3c1a174df43a95f4f88e467809118a8165e0/pyarrow-22.0.0-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:7a820d8ae11facf32585507c11f04e3f38343c1e784c9b5a8b1da5c930547fe2", size = 34355506, upload-time = "2025-10-24T10:09:02.953Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/2c/733dfffe6d3069740f98e57ff81007809067d68626c5faef293434d11bd6/pyarrow-22.0.0-cp314-cp314t-macosx_12_0_x86_64.whl", hash = "sha256:c6ec3675d98915bf1ec8b3c7986422682f7232ea76cad276f4c8abd5b7319b70", size = 36047312, upload-time = "2025-10-24T10:09:10.334Z" },
+    { url = "https://files.pythonhosted.org/packages/7c/2b/29d6e3782dc1f299727462c1543af357a0f2c1d3c160ce199950d9ca51eb/pyarrow-22.0.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:3e739edd001b04f654b166204fc7a9de896cf6007eaff33409ee9e50ceaff754", size = 45081609, upload-time = "2025-10-24T10:09:18.61Z" },
+    { url = "https://files.pythonhosted.org/packages/8d/42/aa9355ecc05997915af1b7b947a7f66c02dcaa927f3203b87871c114ba10/pyarrow-22.0.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:7388ac685cab5b279a41dfe0a6ccd99e4dbf322edfb63e02fc0443bf24134e91", size = 47703663, upload-time = "2025-10-24T10:09:27.369Z" },
+    { url = "https://files.pythonhosted.org/packages/ee/62/45abedde480168e83a1de005b7b7043fd553321c1e8c5a9a114425f64842/pyarrow-22.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:f633074f36dbc33d5c05b5dc75371e5660f1dbf9c8b1d95669def05e5425989c", size = 48066543, upload-time = "2025-10-24T10:09:34.908Z" },
+    { url = "https://files.pythonhosted.org/packages/84/e9/7878940a5b072e4f3bf998770acafeae13b267f9893af5f6d4ab3904b67e/pyarrow-22.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:4c19236ae2402a8663a2c8f21f1870a03cc57f0bef7e4b6eb3238cc82944de80", size = 50288838, upload-time = "2025-10-24T10:09:44.394Z" },
+    { url = "https://files.pythonhosted.org/packages/7b/03/f335d6c52b4a4761bcc83499789a1e2e16d9d201a58c327a9b5cc9a41bd9/pyarrow-22.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:0c34fe18094686194f204a3b1787a27456897d8a2d62caf84b61e8dfbc0252ae", size = 29185594, upload-time = "2025-10-24T10:09:53.111Z" },
 ]
 
 [[package]]
@@ -5074,7 +5122,7 @@ wheels = [
 
 [[package]]
 name = "scipy"
-version = "1.16.2"
+version = "1.16.3"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
     "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform == 'linux'",
@@ -5091,68 +5139,68 @@ resolution-markers = [
 dependencies = [
     { name = "numpy", marker = "python_full_version >= '3.11'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/4c/3b/546a6f0bfe791bbb7f8d591613454d15097e53f906308ec6f7c1ce588e8e/scipy-1.16.2.tar.gz", hash = "sha256:af029b153d243a80afb6eabe40b0a07f8e35c9adc269c019f364ad747f826a6b", size = 30580599, upload-time = "2025-09-11T17:48:08.271Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/0b/ef/37ed4b213d64b48422df92560af7300e10fe30b5d665dd79932baebee0c6/scipy-1.16.2-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:6ab88ea43a57da1af33292ebd04b417e8e2eaf9d5aa05700be8d6e1b6501cd92", size = 36619956, upload-time = "2025-09-11T17:39:20.5Z" },
-    { url = "https://files.pythonhosted.org/packages/85/ab/5c2eba89b9416961a982346a4d6a647d78c91ec96ab94ed522b3b6baf444/scipy-1.16.2-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:c95e96c7305c96ede73a7389f46ccd6c659c4da5ef1b2789466baeaed3622b6e", size = 28931117, upload-time = "2025-09-11T17:39:29.06Z" },
-    { url = "https://files.pythonhosted.org/packages/80/d1/eed51ab64d227fe60229a2d57fb60ca5898cfa50ba27d4f573e9e5f0b430/scipy-1.16.2-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:87eb178db04ece7c698220d523c170125dbffebb7af0345e66c3554f6f60c173", size = 20921997, upload-time = "2025-09-11T17:39:34.892Z" },
-    { url = "https://files.pythonhosted.org/packages/be/7c/33ea3e23bbadde96726edba6bf9111fb1969d14d9d477ffa202c67bec9da/scipy-1.16.2-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:4e409eac067dcee96a57fbcf424c13f428037827ec7ee3cb671ff525ca4fc34d", size = 23523374, upload-time = "2025-09-11T17:39:40.846Z" },
-    { url = "https://files.pythonhosted.org/packages/96/0b/7399dc96e1e3f9a05e258c98d716196a34f528eef2ec55aad651ed136d03/scipy-1.16.2-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:e574be127bb760f0dad24ff6e217c80213d153058372362ccb9555a10fc5e8d2", size = 33583702, upload-time = "2025-09-11T17:39:49.011Z" },
-    { url = "https://files.pythonhosted.org/packages/1a/bc/a5c75095089b96ea72c1bd37a4497c24b581ec73db4ef58ebee142ad2d14/scipy-1.16.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f5db5ba6188d698ba7abab982ad6973265b74bb40a1efe1821b58c87f73892b9", size = 35883427, upload-time = "2025-09-11T17:39:57.406Z" },
-    { url = "https://files.pythonhosted.org/packages/ab/66/e25705ca3d2b87b97fe0a278a24b7f477b4023a926847935a1a71488a6a6/scipy-1.16.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:ec6e74c4e884104ae006d34110677bfe0098203a3fec2f3faf349f4cb05165e3", size = 36212940, upload-time = "2025-09-11T17:40:06.013Z" },
-    { url = "https://files.pythonhosted.org/packages/d6/fd/0bb911585e12f3abdd603d721d83fc1c7492835e1401a0e6d498d7822b4b/scipy-1.16.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:912f46667d2d3834bc3d57361f854226475f695eb08c08a904aadb1c936b6a88", size = 38865092, upload-time = "2025-09-11T17:40:15.143Z" },
-    { url = "https://files.pythonhosted.org/packages/d6/73/c449a7d56ba6e6f874183759f8483cde21f900a8be117d67ffbb670c2958/scipy-1.16.2-cp311-cp311-win_amd64.whl", hash = "sha256:91e9e8a37befa5a69e9cacbe0bcb79ae5afb4a0b130fd6db6ee6cc0d491695fa", size = 38687626, upload-time = "2025-09-11T17:40:24.041Z" },
-    { url = "https://files.pythonhosted.org/packages/68/72/02f37316adf95307f5d9e579023c6899f89ff3a051fa079dbd6faafc48e5/scipy-1.16.2-cp311-cp311-win_arm64.whl", hash = "sha256:f3bf75a6dcecab62afde4d1f973f1692be013110cad5338007927db8da73249c", size = 25503506, upload-time = "2025-09-11T17:40:30.703Z" },
-    { url = "https://files.pythonhosted.org/packages/b7/8d/6396e00db1282279a4ddd507c5f5e11f606812b608ee58517ce8abbf883f/scipy-1.16.2-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:89d6c100fa5c48472047632e06f0876b3c4931aac1f4291afc81a3644316bb0d", size = 36646259, upload-time = "2025-09-11T17:40:39.329Z" },
-    { url = "https://files.pythonhosted.org/packages/3b/93/ea9edd7e193fceb8eef149804491890bde73fb169c896b61aa3e2d1e4e77/scipy-1.16.2-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:ca748936cd579d3f01928b30a17dc474550b01272d8046e3e1ee593f23620371", size = 28888976, upload-time = "2025-09-11T17:40:46.82Z" },
-    { url = "https://files.pythonhosted.org/packages/91/4d/281fddc3d80fd738ba86fd3aed9202331180b01e2c78eaae0642f22f7e83/scipy-1.16.2-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:fac4f8ce2ddb40e2e3d0f7ec36d2a1e7f92559a2471e59aec37bd8d9de01fec0", size = 20879905, upload-time = "2025-09-11T17:40:52.545Z" },
-    { url = "https://files.pythonhosted.org/packages/69/40/b33b74c84606fd301b2915f0062e45733c6ff5708d121dd0deaa8871e2d0/scipy-1.16.2-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:033570f1dcefd79547a88e18bccacff025c8c647a330381064f561d43b821232", size = 23553066, upload-time = "2025-09-11T17:40:59.014Z" },
-    { url = "https://files.pythonhosted.org/packages/55/a7/22c739e2f21a42cc8f16bc76b47cff4ed54fbe0962832c589591c2abec34/scipy-1.16.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ea3421209bf00c8a5ef2227de496601087d8f638a2363ee09af059bd70976dc1", size = 33336407, upload-time = "2025-09-11T17:41:06.796Z" },
-    { url = "https://files.pythonhosted.org/packages/53/11/a0160990b82999b45874dc60c0c183d3a3a969a563fffc476d5a9995c407/scipy-1.16.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f66bd07ba6f84cd4a380b41d1bf3c59ea488b590a2ff96744845163309ee8e2f", size = 35673281, upload-time = "2025-09-11T17:41:15.055Z" },
-    { url = "https://files.pythonhosted.org/packages/96/53/7ef48a4cfcf243c3d0f1643f5887c81f29fdf76911c4e49331828e19fc0a/scipy-1.16.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:5e9feab931bd2aea4a23388c962df6468af3d808ddf2d40f94a81c5dc38f32ef", size = 36004222, upload-time = "2025-09-11T17:41:23.868Z" },
-    { url = "https://files.pythonhosted.org/packages/49/7f/71a69e0afd460049d41c65c630c919c537815277dfea214031005f474d78/scipy-1.16.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:03dfc75e52f72cf23ec2ced468645321407faad8f0fe7b1f5b49264adbc29cb1", size = 38664586, upload-time = "2025-09-11T17:41:31.021Z" },
-    { url = "https://files.pythonhosted.org/packages/34/95/20e02ca66fb495a95fba0642fd48e0c390d0ece9b9b14c6e931a60a12dea/scipy-1.16.2-cp312-cp312-win_amd64.whl", hash = "sha256:0ce54e07bbb394b417457409a64fd015be623f36e330ac49306433ffe04bc97e", size = 38550641, upload-time = "2025-09-11T17:41:36.61Z" },
-    { url = "https://files.pythonhosted.org/packages/92/ad/13646b9beb0a95528ca46d52b7babafbe115017814a611f2065ee4e61d20/scipy-1.16.2-cp312-cp312-win_arm64.whl", hash = "sha256:2a8ffaa4ac0df81a0b94577b18ee079f13fecdb924df3328fc44a7dc5ac46851", size = 25456070, upload-time = "2025-09-11T17:41:41.3Z" },
-    { url = "https://files.pythonhosted.org/packages/c1/27/c5b52f1ee81727a9fc457f5ac1e9bf3d6eab311805ea615c83c27ba06400/scipy-1.16.2-cp313-cp313-macosx_10_14_x86_64.whl", hash = "sha256:84f7bf944b43e20b8a894f5fe593976926744f6c185bacfcbdfbb62736b5cc70", size = 36604856, upload-time = "2025-09-11T17:41:47.695Z" },
-    { url = "https://files.pythonhosted.org/packages/32/a9/15c20d08e950b540184caa8ced675ba1128accb0e09c653780ba023a4110/scipy-1.16.2-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:5c39026d12edc826a1ef2ad35ad1e6d7f087f934bb868fc43fa3049c8b8508f9", size = 28864626, upload-time = "2025-09-11T17:41:52.642Z" },
-    { url = "https://files.pythonhosted.org/packages/4c/fc/ea36098df653cca26062a627c1a94b0de659e97127c8491e18713ca0e3b9/scipy-1.16.2-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:e52729ffd45b68777c5319560014d6fd251294200625d9d70fd8626516fc49f5", size = 20855689, upload-time = "2025-09-11T17:41:57.886Z" },
-    { url = "https://files.pythonhosted.org/packages/dc/6f/d0b53be55727f3e6d7c72687ec18ea6d0047cf95f1f77488b99a2bafaee1/scipy-1.16.2-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:024dd4a118cccec09ca3209b7e8e614931a6ffb804b2a601839499cb88bdf925", size = 23512151, upload-time = "2025-09-11T17:42:02.303Z" },
-    { url = "https://files.pythonhosted.org/packages/11/85/bf7dab56e5c4b1d3d8eef92ca8ede788418ad38a7dc3ff50262f00808760/scipy-1.16.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7a5dc7ee9c33019973a470556081b0fd3c9f4c44019191039f9769183141a4d9", size = 33329824, upload-time = "2025-09-11T17:42:07.549Z" },
-    { url = "https://files.pythonhosted.org/packages/da/6a/1a927b14ddc7714111ea51f4e568203b2bb6ed59bdd036d62127c1a360c8/scipy-1.16.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c2275ff105e508942f99d4e3bc56b6ef5e4b3c0af970386ca56b777608ce95b7", size = 35681881, upload-time = "2025-09-11T17:42:13.255Z" },
-    { url = "https://files.pythonhosted.org/packages/c1/5f/331148ea5780b4fcc7007a4a6a6ee0a0c1507a796365cc642d4d226e1c3a/scipy-1.16.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:af80196eaa84f033e48444d2e0786ec47d328ba00c71e4299b602235ffef9acb", size = 36006219, upload-time = "2025-09-11T17:42:18.765Z" },
-    { url = "https://files.pythonhosted.org/packages/46/3a/e991aa9d2aec723b4a8dcfbfc8365edec5d5e5f9f133888067f1cbb7dfc1/scipy-1.16.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9fb1eb735fe3d6ed1f89918224e3385fbf6f9e23757cacc35f9c78d3b712dd6e", size = 38682147, upload-time = "2025-09-11T17:42:25.177Z" },
-    { url = "https://files.pythonhosted.org/packages/a1/57/0f38e396ad19e41b4c5db66130167eef8ee620a49bc7d0512e3bb67e0cab/scipy-1.16.2-cp313-cp313-win_amd64.whl", hash = "sha256:fda714cf45ba43c9d3bae8f2585c777f64e3f89a2e073b668b32ede412d8f52c", size = 38520766, upload-time = "2025-09-11T17:43:25.342Z" },
-    { url = "https://files.pythonhosted.org/packages/1b/a5/85d3e867b6822d331e26c862a91375bb7746a0b458db5effa093d34cdb89/scipy-1.16.2-cp313-cp313-win_arm64.whl", hash = "sha256:2f5350da923ccfd0b00e07c3e5cfb316c1c0d6c1d864c07a72d092e9f20db104", size = 25451169, upload-time = "2025-09-11T17:43:30.198Z" },
-    { url = "https://files.pythonhosted.org/packages/09/d9/60679189bcebda55992d1a45498de6d080dcaf21ce0c8f24f888117e0c2d/scipy-1.16.2-cp313-cp313t-macosx_10_14_x86_64.whl", hash = "sha256:53d8d2ee29b925344c13bda64ab51785f016b1b9617849dac10897f0701b20c1", size = 37012682, upload-time = "2025-09-11T17:42:30.677Z" },
-    { url = "https://files.pythonhosted.org/packages/83/be/a99d13ee4d3b7887a96f8c71361b9659ba4ef34da0338f14891e102a127f/scipy-1.16.2-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:9e05e33657efb4c6a9d23bd8300101536abd99c85cca82da0bffff8d8764d08a", size = 29389926, upload-time = "2025-09-11T17:42:35.845Z" },
-    { url = "https://files.pythonhosted.org/packages/bf/0a/130164a4881cec6ca8c00faf3b57926f28ed429cd6001a673f83c7c2a579/scipy-1.16.2-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:7fe65b36036357003b3ef9d37547abeefaa353b237e989c21027b8ed62b12d4f", size = 21381152, upload-time = "2025-09-11T17:42:40.07Z" },
-    { url = "https://files.pythonhosted.org/packages/47/a6/503ffb0310ae77fba874e10cddfc4a1280bdcca1d13c3751b8c3c2996cf8/scipy-1.16.2-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:6406d2ac6d40b861cccf57f49592f9779071655e9f75cd4f977fa0bdd09cb2e4", size = 23914410, upload-time = "2025-09-11T17:42:44.313Z" },
-    { url = "https://files.pythonhosted.org/packages/fa/c7/1147774bcea50d00c02600aadaa919facbd8537997a62496270133536ed6/scipy-1.16.2-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ff4dc42bd321991fbf611c23fc35912d690f731c9914bf3af8f417e64aca0f21", size = 33481880, upload-time = "2025-09-11T17:42:49.325Z" },
-    { url = "https://files.pythonhosted.org/packages/6a/74/99d5415e4c3e46b2586f30cdbecb95e101c7192628a484a40dd0d163811a/scipy-1.16.2-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:654324826654d4d9133e10675325708fb954bc84dae6e9ad0a52e75c6b1a01d7", size = 35791425, upload-time = "2025-09-11T17:42:54.711Z" },
-    { url = "https://files.pythonhosted.org/packages/1b/ee/a6559de7c1cc710e938c0355d9d4fbcd732dac4d0d131959d1f3b63eb29c/scipy-1.16.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:63870a84cd15c44e65220eaed2dac0e8f8b26bbb991456a033c1d9abfe8a94f8", size = 36178622, upload-time = "2025-09-11T17:43:00.375Z" },
-    { url = "https://files.pythonhosted.org/packages/4e/7b/f127a5795d5ba8ece4e0dce7d4a9fb7cb9e4f4757137757d7a69ab7d4f1a/scipy-1.16.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:fa01f0f6a3050fa6a9771a95d5faccc8e2f5a92b4a2e5440a0fa7264a2398472", size = 38783985, upload-time = "2025-09-11T17:43:06.661Z" },
-    { url = "https://files.pythonhosted.org/packages/3e/9f/bc81c1d1e033951eb5912cd3750cc005943afa3e65a725d2443a3b3c4347/scipy-1.16.2-cp313-cp313t-win_amd64.whl", hash = "sha256:116296e89fba96f76353a8579820c2512f6e55835d3fad7780fece04367de351", size = 38631367, upload-time = "2025-09-11T17:43:14.44Z" },
-    { url = "https://files.pythonhosted.org/packages/d6/5e/2cc7555fd81d01814271412a1d59a289d25f8b63208a0a16c21069d55d3e/scipy-1.16.2-cp313-cp313t-win_arm64.whl", hash = "sha256:98e22834650be81d42982360382b43b17f7ba95e0e6993e2a4f5b9ad9283a94d", size = 25787992, upload-time = "2025-09-11T17:43:19.745Z" },
-    { url = "https://files.pythonhosted.org/packages/8b/ac/ad8951250516db71619f0bd3b2eb2448db04b720a003dd98619b78b692c0/scipy-1.16.2-cp314-cp314-macosx_10_14_x86_64.whl", hash = "sha256:567e77755019bb7461513c87f02bb73fb65b11f049aaaa8ca17cfaa5a5c45d77", size = 36595109, upload-time = "2025-09-11T17:43:35.713Z" },
-    { url = "https://files.pythonhosted.org/packages/ff/f6/5779049ed119c5b503b0f3dc6d6f3f68eefc3a9190d4ad4c276f854f051b/scipy-1.16.2-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:17d9bb346194e8967296621208fcdfd39b55498ef7d2f376884d5ac47cec1a70", size = 28859110, upload-time = "2025-09-11T17:43:40.814Z" },
-    { url = "https://files.pythonhosted.org/packages/82/09/9986e410ae38bf0a0c737ff8189ac81a93b8e42349aac009891c054403d7/scipy-1.16.2-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:0a17541827a9b78b777d33b623a6dcfe2ef4a25806204d08ead0768f4e529a88", size = 20850110, upload-time = "2025-09-11T17:43:44.981Z" },
-    { url = "https://files.pythonhosted.org/packages/0d/ad/485cdef2d9215e2a7df6d61b81d2ac073dfacf6ae24b9ae87274c4e936ae/scipy-1.16.2-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:d7d4c6ba016ffc0f9568d012f5f1eb77ddd99412aea121e6fa8b4c3b7cbad91f", size = 23497014, upload-time = "2025-09-11T17:43:49.074Z" },
-    { url = "https://files.pythonhosted.org/packages/a7/74/f6a852e5d581122b8f0f831f1d1e32fb8987776ed3658e95c377d308ed86/scipy-1.16.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:9702c4c023227785c779cba2e1d6f7635dbb5b2e0936cdd3a4ecb98d78fd41eb", size = 33401155, upload-time = "2025-09-11T17:43:54.661Z" },
-    { url = "https://files.pythonhosted.org/packages/d9/f5/61d243bbc7c6e5e4e13dde9887e84a5cbe9e0f75fd09843044af1590844e/scipy-1.16.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d1cdf0ac28948d225decdefcc45ad7dd91716c29ab56ef32f8e0d50657dffcc7", size = 35691174, upload-time = "2025-09-11T17:44:00.101Z" },
-    { url = "https://files.pythonhosted.org/packages/03/99/59933956331f8cc57e406cdb7a483906c74706b156998f322913e789c7e1/scipy-1.16.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:70327d6aa572a17c2941cdfb20673f82e536e91850a2e4cb0c5b858b690e1548", size = 36070752, upload-time = "2025-09-11T17:44:05.619Z" },
-    { url = "https://files.pythonhosted.org/packages/c6/7d/00f825cfb47ee19ef74ecf01244b43e95eae74e7e0ff796026ea7cd98456/scipy-1.16.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:5221c0b2a4b58aa7c4ed0387d360fd90ee9086d383bb34d9f2789fafddc8a936", size = 38701010, upload-time = "2025-09-11T17:44:11.322Z" },
-    { url = "https://files.pythonhosted.org/packages/e4/9f/b62587029980378304ba5a8563d376c96f40b1e133daacee76efdcae32de/scipy-1.16.2-cp314-cp314-win_amd64.whl", hash = "sha256:f5a85d7b2b708025af08f060a496dd261055b617d776fc05a1a1cc69e09fe9ff", size = 39360061, upload-time = "2025-09-11T17:45:09.814Z" },
-    { url = "https://files.pythonhosted.org/packages/82/04/7a2f1609921352c7fbee0815811b5050582f67f19983096c4769867ca45f/scipy-1.16.2-cp314-cp314-win_arm64.whl", hash = "sha256:2cc73a33305b4b24556957d5857d6253ce1e2dcd67fa0ff46d87d1670b3e1e1d", size = 26126914, upload-time = "2025-09-11T17:45:14.73Z" },
-    { url = "https://files.pythonhosted.org/packages/51/b9/60929ce350c16b221928725d2d1d7f86cf96b8bc07415547057d1196dc92/scipy-1.16.2-cp314-cp314t-macosx_10_14_x86_64.whl", hash = "sha256:9ea2a3fed83065d77367775d689401a703d0f697420719ee10c0780bcab594d8", size = 37013193, upload-time = "2025-09-11T17:44:16.757Z" },
-    { url = "https://files.pythonhosted.org/packages/2a/41/ed80e67782d4bc5fc85a966bc356c601afddd175856ba7c7bb6d9490607e/scipy-1.16.2-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:7280d926f11ca945c3ef92ba960fa924e1465f8d07ce3a9923080363390624c4", size = 29390172, upload-time = "2025-09-11T17:44:21.783Z" },
-    { url = "https://files.pythonhosted.org/packages/c4/a3/2f673ace4090452696ccded5f5f8efffb353b8f3628f823a110e0170b605/scipy-1.16.2-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:8afae1756f6a1fe04636407ef7dbece33d826a5d462b74f3d0eb82deabefd831", size = 21381326, upload-time = "2025-09-11T17:44:25.982Z" },
-    { url = "https://files.pythonhosted.org/packages/42/bf/59df61c5d51395066c35836b78136accf506197617c8662e60ea209881e1/scipy-1.16.2-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:5c66511f29aa8d233388e7416a3f20d5cae7a2744d5cee2ecd38c081f4e861b3", size = 23915036, upload-time = "2025-09-11T17:44:30.527Z" },
-    { url = "https://files.pythonhosted.org/packages/91/c3/edc7b300dc16847ad3672f1a6f3f7c5d13522b21b84b81c265f4f2760d4a/scipy-1.16.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:efe6305aeaa0e96b0ccca5ff647a43737d9a092064a3894e46c414db84bc54ac", size = 33484341, upload-time = "2025-09-11T17:44:35.981Z" },
-    { url = "https://files.pythonhosted.org/packages/26/c7/24d1524e72f06ff141e8d04b833c20db3021020563272ccb1b83860082a9/scipy-1.16.2-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7f3a337d9ae06a1e8d655ee9d8ecb835ea5ddcdcbd8d23012afa055ab014f374", size = 35790840, upload-time = "2025-09-11T17:44:41.76Z" },
-    { url = "https://files.pythonhosted.org/packages/aa/b7/5aaad984eeedd56858dc33d75efa59e8ce798d918e1033ef62d2708f2c3d/scipy-1.16.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:bab3605795d269067d8ce78a910220262711b753de8913d3deeaedb5dded3bb6", size = 36174716, upload-time = "2025-09-11T17:44:47.316Z" },
-    { url = "https://files.pythonhosted.org/packages/fd/c2/e276a237acb09824822b0ada11b028ed4067fdc367a946730979feacb870/scipy-1.16.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:b0348d8ddb55be2a844c518cd8cc8deeeb8aeba707cf834db5758fc89b476a2c", size = 38790088, upload-time = "2025-09-11T17:44:53.011Z" },
-    { url = "https://files.pythonhosted.org/packages/c6/b4/5c18a766e8353015439f3780f5fc473f36f9762edc1a2e45da3ff5a31b21/scipy-1.16.2-cp314-cp314t-win_amd64.whl", hash = "sha256:26284797e38b8a75e14ea6631d29bda11e76ceaa6ddb6fdebbfe4c4d90faf2f9", size = 39457455, upload-time = "2025-09-11T17:44:58.899Z" },
-    { url = "https://files.pythonhosted.org/packages/97/30/2f9a5243008f76dfc5dee9a53dfb939d9b31e16ce4bd4f2e628bfc5d89d2/scipy-1.16.2-cp314-cp314t-win_arm64.whl", hash = "sha256:d2a4472c231328d4de38d5f1f68fdd6d28a615138f842580a8a321b5845cf779", size = 26448374, upload-time = "2025-09-11T17:45:03.45Z" },
+sdist = { url = "https://files.pythonhosted.org/packages/0a/ca/d8ace4f98322d01abcd52d381134344bf7b431eba7ed8b42bdea5a3c2ac9/scipy-1.16.3.tar.gz", hash = "sha256:01e87659402762f43bd2fee13370553a17ada367d42e7487800bf2916535aecb", size = 30597883, upload-time = "2025-10-28T17:38:54.068Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/9b/5f/6f37d7439de1455ce9c5a556b8d1db0979f03a796c030bafdf08d35b7bf9/scipy-1.16.3-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:40be6cf99e68b6c4321e9f8782e7d5ff8265af28ef2cd56e9c9b2638fa08ad97", size = 36630881, upload-time = "2025-10-28T17:31:47.104Z" },
+    { url = "https://files.pythonhosted.org/packages/7c/89/d70e9f628749b7e4db2aa4cd89735502ff3f08f7b9b27d2e799485987cd9/scipy-1.16.3-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:8be1ca9170fcb6223cc7c27f4305d680ded114a1567c0bd2bfcbf947d1b17511", size = 28941012, upload-time = "2025-10-28T17:31:53.411Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/a8/0e7a9a6872a923505dbdf6bb93451edcac120363131c19013044a1e7cb0c/scipy-1.16.3-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:bea0a62734d20d67608660f69dcda23e7f90fb4ca20974ab80b6ed40df87a005", size = 20931935, upload-time = "2025-10-28T17:31:57.361Z" },
+    { url = "https://files.pythonhosted.org/packages/bd/c7/020fb72bd79ad798e4dbe53938543ecb96b3a9ac3fe274b7189e23e27353/scipy-1.16.3-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:2a207a6ce9c24f1951241f4693ede2d393f59c07abc159b2cb2be980820e01fb", size = 23534466, upload-time = "2025-10-28T17:32:01.875Z" },
+    { url = "https://files.pythonhosted.org/packages/be/a0/668c4609ce6dbf2f948e167836ccaf897f95fb63fa231c87da7558a374cd/scipy-1.16.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:532fb5ad6a87e9e9cd9c959b106b73145a03f04c7d57ea3e6f6bb60b86ab0876", size = 33593618, upload-time = "2025-10-28T17:32:06.902Z" },
+    { url = "https://files.pythonhosted.org/packages/ca/6e/8942461cf2636cdae083e3eb72622a7fbbfa5cf559c7d13ab250a5dbdc01/scipy-1.16.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0151a0749efeaaab78711c78422d413c583b8cdd2011a3c1d6c794938ee9fdb2", size = 35899798, upload-time = "2025-10-28T17:32:12.665Z" },
+    { url = "https://files.pythonhosted.org/packages/79/e8/d0f33590364cdbd67f28ce79368b373889faa4ee959588beddf6daef9abe/scipy-1.16.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:b7180967113560cca57418a7bc719e30366b47959dd845a93206fbed693c867e", size = 36226154, upload-time = "2025-10-28T17:32:17.961Z" },
+    { url = "https://files.pythonhosted.org/packages/39/c1/1903de608c0c924a1749c590064e65810f8046e437aba6be365abc4f7557/scipy-1.16.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:deb3841c925eeddb6afc1e4e4a45e418d19ec7b87c5df177695224078e8ec733", size = 38878540, upload-time = "2025-10-28T17:32:23.907Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/d0/22ec7036ba0b0a35bccb7f25ab407382ed34af0b111475eb301c16f8a2e5/scipy-1.16.3-cp311-cp311-win_amd64.whl", hash = "sha256:53c3844d527213631e886621df5695d35e4f6a75f620dca412bcd292f6b87d78", size = 38722107, upload-time = "2025-10-28T17:32:29.921Z" },
+    { url = "https://files.pythonhosted.org/packages/7b/60/8a00e5a524bb3bf8898db1650d350f50e6cffb9d7a491c561dc9826c7515/scipy-1.16.3-cp311-cp311-win_arm64.whl", hash = "sha256:9452781bd879b14b6f055b26643703551320aa8d79ae064a71df55c00286a184", size = 25506272, upload-time = "2025-10-28T17:32:34.577Z" },
+    { url = "https://files.pythonhosted.org/packages/40/41/5bf55c3f386b1643812f3a5674edf74b26184378ef0f3e7c7a09a7e2ca7f/scipy-1.16.3-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:81fc5827606858cf71446a5e98715ba0e11f0dbc83d71c7409d05486592a45d6", size = 36659043, upload-time = "2025-10-28T17:32:40.285Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/0f/65582071948cfc45d43e9870bf7ca5f0e0684e165d7c9ef4e50d783073eb/scipy-1.16.3-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:c97176013d404c7346bf57874eaac5187d969293bf40497140b0a2b2b7482e07", size = 28898986, upload-time = "2025-10-28T17:32:45.325Z" },
+    { url = "https://files.pythonhosted.org/packages/96/5e/36bf3f0ac298187d1ceadde9051177d6a4fe4d507e8f59067dc9dd39e650/scipy-1.16.3-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:2b71d93c8a9936046866acebc915e2af2e292b883ed6e2cbe5c34beb094b82d9", size = 20889814, upload-time = "2025-10-28T17:32:49.277Z" },
+    { url = "https://files.pythonhosted.org/packages/80/35/178d9d0c35394d5d5211bbff7ac4f2986c5488b59506fef9e1de13ea28d3/scipy-1.16.3-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:3d4a07a8e785d80289dfe66b7c27d8634a773020742ec7187b85ccc4b0e7b686", size = 23565795, upload-time = "2025-10-28T17:32:53.337Z" },
+    { url = "https://files.pythonhosted.org/packages/fa/46/d1146ff536d034d02f83c8afc3c4bab2eddb634624d6529a8512f3afc9da/scipy-1.16.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0553371015692a898e1aa858fed67a3576c34edefa6b7ebdb4e9dde49ce5c203", size = 33349476, upload-time = "2025-10-28T17:32:58.353Z" },
+    { url = "https://files.pythonhosted.org/packages/79/2e/415119c9ab3e62249e18c2b082c07aff907a273741b3f8160414b0e9193c/scipy-1.16.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:72d1717fd3b5e6ec747327ce9bda32d5463f472c9dce9f54499e81fbd50245a1", size = 35676692, upload-time = "2025-10-28T17:33:03.88Z" },
+    { url = "https://files.pythonhosted.org/packages/27/82/df26e44da78bf8d2aeaf7566082260cfa15955a5a6e96e6a29935b64132f/scipy-1.16.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1fb2472e72e24d1530debe6ae078db70fb1605350c88a3d14bc401d6306dbffe", size = 36019345, upload-time = "2025-10-28T17:33:09.773Z" },
+    { url = "https://files.pythonhosted.org/packages/82/31/006cbb4b648ba379a95c87262c2855cd0d09453e500937f78b30f02fa1cd/scipy-1.16.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:c5192722cffe15f9329a3948c4b1db789fbb1f05c97899187dcf009b283aea70", size = 38678975, upload-time = "2025-10-28T17:33:15.809Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/7f/acbd28c97e990b421af7d6d6cd416358c9c293fc958b8529e0bd5d2a2a19/scipy-1.16.3-cp312-cp312-win_amd64.whl", hash = "sha256:56edc65510d1331dae01ef9b658d428e33ed48b4f77b1d51caf479a0253f96dc", size = 38555926, upload-time = "2025-10-28T17:33:21.388Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/69/c5c7807fd007dad4f48e0a5f2153038dc96e8725d3345b9ee31b2b7bed46/scipy-1.16.3-cp312-cp312-win_arm64.whl", hash = "sha256:a8a26c78ef223d3e30920ef759e25625a0ecdd0d60e5a8818b7513c3e5384cf2", size = 25463014, upload-time = "2025-10-28T17:33:25.975Z" },
+    { url = "https://files.pythonhosted.org/packages/72/f1/57e8327ab1508272029e27eeef34f2302ffc156b69e7e233e906c2a5c379/scipy-1.16.3-cp313-cp313-macosx_10_14_x86_64.whl", hash = "sha256:d2ec56337675e61b312179a1ad124f5f570c00f920cc75e1000025451b88241c", size = 36617856, upload-time = "2025-10-28T17:33:31.375Z" },
+    { url = "https://files.pythonhosted.org/packages/44/13/7e63cfba8a7452eb756306aa2fd9b37a29a323b672b964b4fdeded9a3f21/scipy-1.16.3-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:16b8bc35a4cc24db80a0ec836a9286d0e31b2503cb2fd7ff7fb0e0374a97081d", size = 28874306, upload-time = "2025-10-28T17:33:36.516Z" },
+    { url = "https://files.pythonhosted.org/packages/15/65/3a9400efd0228a176e6ec3454b1fa998fbbb5a8defa1672c3f65706987db/scipy-1.16.3-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:5803c5fadd29de0cf27fa08ccbfe7a9e5d741bf63e4ab1085437266f12460ff9", size = 20865371, upload-time = "2025-10-28T17:33:42.094Z" },
+    { url = "https://files.pythonhosted.org/packages/33/d7/eda09adf009a9fb81827194d4dd02d2e4bc752cef16737cc4ef065234031/scipy-1.16.3-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:b81c27fc41954319a943d43b20e07c40bdcd3ff7cf013f4fb86286faefe546c4", size = 23524877, upload-time = "2025-10-28T17:33:48.483Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/6b/3f911e1ebc364cb81320223a3422aab7d26c9c7973109a9cd0f27c64c6c0/scipy-1.16.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0c3b4dd3d9b08dbce0f3440032c52e9e2ab9f96ade2d3943313dfe51a7056959", size = 33342103, upload-time = "2025-10-28T17:33:56.495Z" },
+    { url = "https://files.pythonhosted.org/packages/21/f6/4bfb5695d8941e5c570a04d9fcd0d36bce7511b7d78e6e75c8f9791f82d0/scipy-1.16.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7dc1360c06535ea6116a2220f760ae572db9f661aba2d88074fe30ec2aa1ff88", size = 35697297, upload-time = "2025-10-28T17:34:04.722Z" },
+    { url = "https://files.pythonhosted.org/packages/04/e1/6496dadbc80d8d896ff72511ecfe2316b50313bfc3ebf07a3f580f08bd8c/scipy-1.16.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:663b8d66a8748051c3ee9c96465fb417509315b99c71550fda2591d7dd634234", size = 36021756, upload-time = "2025-10-28T17:34:13.482Z" },
+    { url = "https://files.pythonhosted.org/packages/fe/bd/a8c7799e0136b987bda3e1b23d155bcb31aec68a4a472554df5f0937eef7/scipy-1.16.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:eab43fae33a0c39006a88096cd7b4f4ef545ea0447d250d5ac18202d40b6611d", size = 38696566, upload-time = "2025-10-28T17:34:22.384Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/01/1204382461fcbfeb05b6161b594f4007e78b6eba9b375382f79153172b4d/scipy-1.16.3-cp313-cp313-win_amd64.whl", hash = "sha256:062246acacbe9f8210de8e751b16fc37458213f124bef161a5a02c7a39284304", size = 38529877, upload-time = "2025-10-28T17:35:51.076Z" },
+    { url = "https://files.pythonhosted.org/packages/7f/14/9d9fbcaa1260a94f4bb5b64ba9213ceb5d03cd88841fe9fd1ffd47a45b73/scipy-1.16.3-cp313-cp313-win_arm64.whl", hash = "sha256:50a3dbf286dbc7d84f176f9a1574c705f277cb6565069f88f60db9eafdbe3ee2", size = 25455366, upload-time = "2025-10-28T17:35:59.014Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/a3/9ec205bd49f42d45d77f1730dbad9ccf146244c1647605cf834b3a8c4f36/scipy-1.16.3-cp313-cp313t-macosx_10_14_x86_64.whl", hash = "sha256:fb4b29f4cf8cc5a8d628bc8d8e26d12d7278cd1f219f22698a378c3d67db5e4b", size = 37027931, upload-time = "2025-10-28T17:34:31.451Z" },
+    { url = "https://files.pythonhosted.org/packages/25/06/ca9fd1f3a4589cbd825b1447e5db3a8ebb969c1eaf22c8579bd286f51b6d/scipy-1.16.3-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:8d09d72dc92742988b0e7750bddb8060b0c7079606c0d24a8cc8e9c9c11f9079", size = 29400081, upload-time = "2025-10-28T17:34:39.087Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/56/933e68210d92657d93fb0e381683bc0e53a965048d7358ff5fbf9e6a1b17/scipy-1.16.3-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:03192a35e661470197556de24e7cb1330d84b35b94ead65c46ad6f16f6b28f2a", size = 21391244, upload-time = "2025-10-28T17:34:45.234Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/7e/779845db03dc1418e215726329674b40576879b91814568757ff0014ad65/scipy-1.16.3-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:57d01cb6f85e34f0946b33caa66e892aae072b64b034183f3d87c4025802a119", size = 23929753, upload-time = "2025-10-28T17:34:51.793Z" },
+    { url = "https://files.pythonhosted.org/packages/4c/4b/f756cf8161d5365dcdef9e5f460ab226c068211030a175d2fc7f3f41ca64/scipy-1.16.3-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:96491a6a54e995f00a28a3c3badfff58fd093bf26cd5fb34a2188c8c756a3a2c", size = 33496912, upload-time = "2025-10-28T17:34:59.8Z" },
+    { url = "https://files.pythonhosted.org/packages/09/b5/222b1e49a58668f23839ca1542a6322bb095ab8d6590d4f71723869a6c2c/scipy-1.16.3-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:cd13e354df9938598af2be05822c323e97132d5e6306b83a3b4ee6724c6e522e", size = 35802371, upload-time = "2025-10-28T17:35:08.173Z" },
+    { url = "https://files.pythonhosted.org/packages/c1/8d/5964ef68bb31829bde27611f8c9deeac13764589fe74a75390242b64ca44/scipy-1.16.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:63d3cdacb8a824a295191a723ee5e4ea7768ca5ca5f2838532d9f2e2b3ce2135", size = 36190477, upload-time = "2025-10-28T17:35:16.7Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/f2/b31d75cb9b5fa4dd39a0a931ee9b33e7f6f36f23be5ef560bf72e0f92f32/scipy-1.16.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:e7efa2681ea410b10dde31a52b18b0154d66f2485328830e45fdf183af5aefc6", size = 38796678, upload-time = "2025-10-28T17:35:26.354Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/1e/b3723d8ff64ab548c38d87055483714fefe6ee20e0189b62352b5e015bb1/scipy-1.16.3-cp313-cp313t-win_amd64.whl", hash = "sha256:2d1ae2cf0c350e7705168ff2429962a89ad90c2d49d1dd300686d8b2a5af22fc", size = 38640178, upload-time = "2025-10-28T17:35:35.304Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/f3/d854ff38789aca9b0cc23008d607ced9de4f7ab14fa1ca4329f86b3758ca/scipy-1.16.3-cp313-cp313t-win_arm64.whl", hash = "sha256:0c623a54f7b79dd88ef56da19bc2873afec9673a48f3b85b18e4d402bdd29a5a", size = 25803246, upload-time = "2025-10-28T17:35:42.155Z" },
+    { url = "https://files.pythonhosted.org/packages/99/f6/99b10fd70f2d864c1e29a28bbcaa0c6340f9d8518396542d9ea3b4aaae15/scipy-1.16.3-cp314-cp314-macosx_10_14_x86_64.whl", hash = "sha256:875555ce62743e1d54f06cdf22c1e0bc47b91130ac40fe5d783b6dfa114beeb6", size = 36606469, upload-time = "2025-10-28T17:36:08.741Z" },
+    { url = "https://files.pythonhosted.org/packages/4d/74/043b54f2319f48ea940dd025779fa28ee360e6b95acb7cd188fad4391c6b/scipy-1.16.3-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:bb61878c18a470021fb515a843dc7a76961a8daceaaaa8bad1332f1bf4b54657", size = 28872043, upload-time = "2025-10-28T17:36:16.599Z" },
+    { url = "https://files.pythonhosted.org/packages/4d/e1/24b7e50cc1c4ee6ffbcb1f27fe9f4c8b40e7911675f6d2d20955f41c6348/scipy-1.16.3-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:f2622206f5559784fa5c4b53a950c3c7c1cf3e84ca1b9c4b6c03f062f289ca26", size = 20862952, upload-time = "2025-10-28T17:36:22.966Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/3a/3e8c01a4d742b730df368e063787c6808597ccb38636ed821d10b39ca51b/scipy-1.16.3-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:7f68154688c515cdb541a31ef8eb66d8cd1050605be9dcd74199cbd22ac739bc", size = 23508512, upload-time = "2025-10-28T17:36:29.731Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/60/c45a12b98ad591536bfe5330cb3cfe1850d7570259303563b1721564d458/scipy-1.16.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8b3c820ddb80029fe9f43d61b81d8b488d3ef8ca010d15122b152db77dc94c22", size = 33413639, upload-time = "2025-10-28T17:36:37.982Z" },
+    { url = "https://files.pythonhosted.org/packages/71/bc/35957d88645476307e4839712642896689df442f3e53b0fa016ecf8a3357/scipy-1.16.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d3837938ae715fc0fe3c39c0202de3a8853aff22ca66781ddc2ade7554b7e2cc", size = 35704729, upload-time = "2025-10-28T17:36:46.547Z" },
+    { url = "https://files.pythonhosted.org/packages/3b/15/89105e659041b1ca11c386e9995aefacd513a78493656e57789f9d9eab61/scipy-1.16.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:aadd23f98f9cb069b3bd64ddc900c4d277778242e961751f77a8cb5c4b946fb0", size = 36086251, upload-time = "2025-10-28T17:36:55.161Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/87/c0ea673ac9c6cc50b3da2196d860273bc7389aa69b64efa8493bdd25b093/scipy-1.16.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:b7c5f1bda1354d6a19bc6af73a649f8285ca63ac6b52e64e658a5a11d4d69800", size = 38716681, upload-time = "2025-10-28T17:37:04.1Z" },
+    { url = "https://files.pythonhosted.org/packages/91/06/837893227b043fb9b0d13e4bd7586982d8136cb249ffb3492930dab905b8/scipy-1.16.3-cp314-cp314-win_amd64.whl", hash = "sha256:e5d42a9472e7579e473879a1990327830493a7047506d58d73fc429b84c1d49d", size = 39358423, upload-time = "2025-10-28T17:38:20.005Z" },
+    { url = "https://files.pythonhosted.org/packages/95/03/28bce0355e4d34a7c034727505a02d19548549e190bedd13a721e35380b7/scipy-1.16.3-cp314-cp314-win_arm64.whl", hash = "sha256:6020470b9d00245926f2d5bb93b119ca0340f0d564eb6fbaad843eaebf9d690f", size = 26135027, upload-time = "2025-10-28T17:38:24.966Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/6f/69f1e2b682efe9de8fe9f91040f0cd32f13cfccba690512ba4c582b0bc29/scipy-1.16.3-cp314-cp314t-macosx_10_14_x86_64.whl", hash = "sha256:e1d27cbcb4602680a49d787d90664fa4974063ac9d4134813332a8c53dbe667c", size = 37028379, upload-time = "2025-10-28T17:37:14.061Z" },
+    { url = "https://files.pythonhosted.org/packages/7c/2d/e826f31624a5ebbab1cd93d30fd74349914753076ed0593e1d56a98c4fb4/scipy-1.16.3-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:9b9c9c07b6d56a35777a1b4cc8966118fb16cfd8daf6743867d17d36cfad2d40", size = 29400052, upload-time = "2025-10-28T17:37:21.709Z" },
+    { url = "https://files.pythonhosted.org/packages/69/27/d24feb80155f41fd1f156bf144e7e049b4e2b9dd06261a242905e3bc7a03/scipy-1.16.3-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:3a4c460301fb2cffb7f88528f30b3127742cff583603aa7dc964a52c463b385d", size = 21391183, upload-time = "2025-10-28T17:37:29.559Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/d3/1b229e433074c5738a24277eca520a2319aac7465eea7310ea6ae0e98ae2/scipy-1.16.3-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:f667a4542cc8917af1db06366d3f78a5c8e83badd56409f94d1eac8d8d9133fa", size = 23930174, upload-time = "2025-10-28T17:37:36.306Z" },
+    { url = "https://files.pythonhosted.org/packages/16/9d/d9e148b0ec680c0f042581a2be79a28a7ab66c0c4946697f9e7553ead337/scipy-1.16.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f379b54b77a597aa7ee5e697df0d66903e41b9c85a6dd7946159e356319158e8", size = 33497852, upload-time = "2025-10-28T17:37:42.228Z" },
+    { url = "https://files.pythonhosted.org/packages/2f/22/4e5f7561e4f98b7bea63cf3fd7934bff1e3182e9f1626b089a679914d5c8/scipy-1.16.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4aff59800a3b7f786b70bfd6ab551001cb553244988d7d6b8299cb1ea653b353", size = 35798595, upload-time = "2025-10-28T17:37:48.102Z" },
+    { url = "https://files.pythonhosted.org/packages/83/42/6644d714c179429fc7196857866f219fef25238319b650bb32dde7bf7a48/scipy-1.16.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:da7763f55885045036fabcebd80144b757d3db06ab0861415d1c3b7c69042146", size = 36186269, upload-time = "2025-10-28T17:37:53.72Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/70/64b4d7ca92f9cf2e6fc6aaa2eecf80bb9b6b985043a9583f32f8177ea122/scipy-1.16.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:ffa6eea95283b2b8079b821dc11f50a17d0571c92b43e2b5b12764dc5f9b285d", size = 38802779, upload-time = "2025-10-28T17:37:59.393Z" },
+    { url = "https://files.pythonhosted.org/packages/61/82/8d0e39f62764cce5ffd5284131e109f07cf8955aef9ab8ed4e3aa5e30539/scipy-1.16.3-cp314-cp314t-win_amd64.whl", hash = "sha256:d9f48cafc7ce94cf9b15c6bffdc443a81a27bf7075cf2dcd5c8b40f85d10c4e7", size = 39471128, upload-time = "2025-10-28T17:38:05.259Z" },
+    { url = "https://files.pythonhosted.org/packages/64/47/a494741db7280eae6dc033510c319e34d42dd41b7ac0c7ead39354d1a2b5/scipy-1.16.3-cp314-cp314t-win_arm64.whl", hash = "sha256:21d9d6b197227a12dcbf9633320a4e34c6b0e51c57268df255a0942983bac562", size = 26464127, upload-time = "2025-10-28T17:38:11.34Z" },
 ]
 
 [[package]]
@@ -5581,15 +5629,15 @@ wheels = [
 
 [[package]]
 name = "starlette"
-version = "0.48.0"
+version = "0.49.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "anyio" },
     { name = "typing-extensions", marker = "python_full_version < '3.13' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/a7/a5/d6f429d43394057b67a6b5bbe6eae2f77a6bf7459d961fdb224bf206eee6/starlette-0.48.0.tar.gz", hash = "sha256:7e8cee469a8ab2352911528110ce9088fdc6a37d9876926e73da7ce4aa4c7a46", size = 2652949, upload-time = "2025-09-13T08:41:05.699Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/1b/3f/507c21db33b66fb027a332f2cb3abbbe924cc3a79ced12f01ed8645955c9/starlette-0.49.1.tar.gz", hash = "sha256:481a43b71e24ed8c43b11ea02f5353d77840e01480881b8cb5a26b8cae64a8cb", size = 2654703, upload-time = "2025-10-28T17:34:10.928Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/be/72/2db2f49247d0a18b4f1bb9a5a39a0162869acf235f3a96418363947b3d46/starlette-0.48.0-py3-none-any.whl", hash = "sha256:0764ca97b097582558ecb498132ed0c7d942f233f365b86ba37770e026510659", size = 73736, upload-time = "2025-09-13T08:41:03.869Z" },
+    { url = "https://files.pythonhosted.org/packages/51/da/545b75d420bb23b5d494b0517757b351963e974e79933f01e05c929f20a6/starlette-0.49.1-py3-none-any.whl", hash = "sha256:d92ce9f07e4a3caa3ac13a79523bd18e3bc0042bb8ff2d759a8e7dd0e1859875", size = 74175, upload-time = "2025-10-28T17:34:09.13Z" },
 ]
 
 [[package]]
@@ -6026,8 +6074,8 @@ wheels = [
 
 [[package]]
 name = "transformer-engine"
-version = "2.8.0+40c69e75"
-source = { git = "https://github.com/NVIDIA/TransformerEngine.git?rev=release_v2.8#40c69e751a47ec87786283e125c5eb264101270f" }
+version = "2.9.0+c4c185db"
+source = { git = "https://github.com/NVIDIA/TransformerEngine.git?rev=release_v2.9#c4c185dbec1aab3627ab2ecffbc4c429d31f23c0" }
 dependencies = [
     { name = "einops" },
     { name = "importlib-metadata", version = "8.6.1", source = { registry = "https://pypi.org/simple" } },
@@ -6174,7 +6222,7 @@ wheels = [
 
 [[package]]
 name = "wandb"
-version = "0.22.2"
+version = "0.22.3"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "click" },
@@ -6188,17 +6236,17 @@ dependencies = [
     { name = "sentry-sdk" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/c1/a8/680bd77e11a278e6c14a2cb4646e8ab9525b2baaa81c3d12dc0f616aa4aa/wandb-0.22.2.tar.gz", hash = "sha256:510f5a1ac30d16921c36c3b932da852f046641d4aee98a86a7f5ec03a6e95bda", size = 41401439, upload-time = "2025-10-07T19:54:21.88Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/e7/b3/8c637fb594cfd574ce9c9f7d0ac2f2d12742eb38ec59dcbb713beae95343/wandb-0.22.2-py3-none-macosx_12_0_arm64.whl", hash = "sha256:2e29c9fa4462b5411b2cd2175ae33eff4309c91de7c426bca6bc8e7abc7e5dec", size = 18677549, upload-time = "2025-10-07T19:54:00.839Z" },
-    { url = "https://files.pythonhosted.org/packages/d3/f3/e309a726eaebddad6b8d9a73a50891e5796962ec8a091bb6a61d31692d1e/wandb-0.22.2-py3-none-macosx_12_0_x86_64.whl", hash = "sha256:c42d594cd7a9da4fd39ecdb0abbc081b61f304123277b2b6c4ba84283956fd21", size = 19715188, upload-time = "2025-10-07T19:54:03.805Z" },
-    { url = "https://files.pythonhosted.org/packages/f9/73/fad59910215876008f4781b57d828d1b19b3677c9b46af615e7229746435/wandb-0.22.2-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5188d84e66d3fd584f3b3ae4d2a70e78f29403c0528e6aecaa4188a1fcf54d8", size = 18463148, upload-time = "2025-10-07T19:54:05.676Z" },
-    { url = "https://files.pythonhosted.org/packages/87/11/572c1913b5b92e4c519f735adfae572b46f2d79d99ede63eec0d6a272d6e/wandb-0.22.2-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:88ccd484af9f21cfc127976793c3cf66cfe1acd75bd8cd650086a64e88bac4bf", size = 19908645, upload-time = "2025-10-07T19:54:07.693Z" },
-    { url = "https://files.pythonhosted.org/packages/6d/0d/133aa82f5a505ba638b4fda5014cefddfe7f1f6238ef4afc0871ec61c41f/wandb-0.22.2-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:abf0ed175e791af64110e0a0b99ce02bbbbd1017722bc32d3bc328efb86450cd", size = 18501348, upload-time = "2025-10-07T19:54:10.234Z" },
-    { url = "https://files.pythonhosted.org/packages/d0/d5/776203be2601872f01dacc6a5b4274106ec0db7cd3bf2cdb3b741f8fc932/wandb-0.22.2-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:44e77c56403b90bf3473a7ca3bfc4d42c636b7c0e31a5fb9cd0382f08302f74b", size = 20001756, upload-time = "2025-10-07T19:54:12.452Z" },
-    { url = "https://files.pythonhosted.org/packages/30/43/ae3fa46e20b1d9a6508dd9abe716d57205c038ed4661c5c98ace48a60eac/wandb-0.22.2-py3-none-win32.whl", hash = "sha256:44d12bd379dbe15be5ceed6bdf23803d42f648ba0dd111297b4c47a3c7be6dbd", size = 19075950, upload-time = "2025-10-07T19:54:14.892Z" },
-    { url = "https://files.pythonhosted.org/packages/09/59/c174321e868205f7a659d1e5ec51f546e62267296d6f4179bb9119294964/wandb-0.22.2-py3-none-win_amd64.whl", hash = "sha256:c95eb221bf316c0872f7ac55071856b9f25f95a2de983ada48acf653ce259386", size = 19075953, upload-time = "2025-10-07T19:54:16.837Z" },
-    { url = "https://files.pythonhosted.org/packages/7a/a2/c7c24fda78513cab5686949d8cb36459dbbccbbb4b2b6fc67237ece31a00/wandb-0.22.2-py3-none-win_arm64.whl", hash = "sha256:20d2ab9aa10445aab3d60914a980f002a4f66566e28b0cd156b1e462f0080a0d", size = 17383217, upload-time = "2025-10-07T19:54:19.384Z" },
+sdist = { url = "https://files.pythonhosted.org/packages/c1/d1/6b70f365ed86bd69debba8ad55dec8606fc21006e7ca703a5a091bd3b719/wandb-0.22.3.tar.gz", hash = "sha256:04468a8ab2769a46f5e384c9c4ada5da0dced005ca689a8424e4b8b5cb2a0291", size = 44337368, upload-time = "2025-10-28T23:59:10.275Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/23/02/87fb60f587ec249f784a40bd91c30de1b2b24d691ee72675d5b66c3d0728/wandb-0.22.3-py3-none-macosx_12_0_arm64.whl", hash = "sha256:81b3b6e405f38342b0a080898b7d00c5b9375432f5ba358942a09e65cdcfe781", size = 18758047, upload-time = "2025-10-28T23:58:46.56Z" },
+    { url = "https://files.pythonhosted.org/packages/26/88/64081740ef2b2efc7fbcb2139a07a849e42bcb09ae0c56ae50c41bd0ad63/wandb-0.22.3-py3-none-macosx_12_0_x86_64.whl", hash = "sha256:d29c16817cca6401b4919069ec7570c781eacb67dc0b1ff2e0096a9a59581720", size = 19798011, upload-time = "2025-10-28T23:58:49.718Z" },
+    { url = "https://files.pythonhosted.org/packages/19/72/c4f922b33dbb84d1c81ee045ff8791dd14e26d79e1e9bbafff964b7043e2/wandb-0.22.3-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb955d73a4ba55df9adc61fafbabef5556784d33fc39c7b5c8165d2694ddeb3b", size = 18542713, upload-time = "2025-10-28T23:58:51.927Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/98/3ce5f6e2086d91b0c51b38ae7ff591109e7da2bb25fe1a12eec0cdbaa494/wandb-0.22.3-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:23f3ebe41a26506117a098fdfd2706ed0e50b37899bfbefe3a0628fcbd70c69d", size = 19984910, upload-time = "2025-10-28T23:58:54.641Z" },
+    { url = "https://files.pythonhosted.org/packages/5e/57/e68cb38427b60490d6ddf1b992e6c7f36be83be1079d291ce87a8d347f48/wandb-0.22.3-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:2973462bed5d4a653b1a97cf9fc350673bb200fb356a2f4eba34beae9b87e0aa", size = 18581776, upload-time = "2025-10-28T23:58:56.975Z" },
+    { url = "https://files.pythonhosted.org/packages/66/6d/543f907ce0c6b6da13628b23d19ca7282c559fd73eb47b04977b9a61d0c6/wandb-0.22.3-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:c5c2bd18f95c1639863c527da0a5818ac6b0e5194f9c691426b265908ddd8b2c", size = 20078800, upload-time = "2025-10-28T23:58:59.217Z" },
+    { url = "https://files.pythonhosted.org/packages/da/91/1decaf1a6ac2017481c782e0fad7f90bc9ae4057f3d76d478cb6527f3dd3/wandb-0.22.3-py3-none-win32.whl", hash = "sha256:09ca1edfe0fd6dc30447d368acddb825668e60ee705c98594a6bbfd30d34d47e", size = 19160297, upload-time = "2025-10-28T23:59:01.536Z" },
+    { url = "https://files.pythonhosted.org/packages/4c/ba/3b092634279994b0c79fe05220532822be09f3a353ae95c54e7142769db8/wandb-0.22.3-py3-none-win_amd64.whl", hash = "sha256:55403bf93872c9978433d101324f51e43e78c70c809bf6d06ca7b2760e39f497", size = 19160300, upload-time = "2025-10-28T23:59:04.06Z" },
+    { url = "https://files.pythonhosted.org/packages/7f/80/4662fce9eebcc8c71f5083e9152ccaf7d43d4ca9c446e1422f9aa784a51c/wandb-0.22.3-py3-none-win_arm64.whl", hash = "sha256:49f66b05882abfa53816cc8d01b3c2435a89c5a090176802fa6928b5979d34d9", size = 17461959, upload-time = "2025-10-28T23:59:07.059Z" },
 ]
 
 [[package]]

From 0d0f29cd8a5f2f6c39786c979cea2b61fdda8626 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Wed, 29 Oct 2025 23:54:28 +0100
Subject: [PATCH 086/334] Ko3n1g/fix/golden values (#2037)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 .gitlab/stages/02.test.yml                                 | 7 +++++++
 ...weave.json => golden_values_dev_dgxh100_coreweave.json} | 0
 ...ues_dev_eos.json => golden_values_dev_dgxh100_eos.json} | 0
 3 files changed, 7 insertions(+)
 rename tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/{golden_values_dev_coreweave.json => golden_values_dev_dgxh100_coreweave.json} (100%)
 rename tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/{golden_values_dev_eos.json => golden_values_dev_dgxh100_eos.json} (100%)

diff --git a/.gitlab/stages/02.test.yml b/.gitlab/stages/02.test.yml
index 2f018f94e66..33dd8d7a5fb 100644
--- a/.gitlab/stages/02.test.yml
+++ b/.gitlab/stages/02.test.yml
@@ -228,6 +228,13 @@ test:linting_docs_build:
     - mv megatron-lm/ documentation/
     - cd documentation/
     - ./repo docs
+  rules:
+    - if: $PUBLISH == "yes"
+      when: never
+    - if: $BUILD == "no"
+      when: never
+    - when: on_success
+      allow_failure: true
 
 # Override from template
 secret_detection:
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_dgxh100_coreweave.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_coreweave.json
rename to tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_dgxh100_coreweave.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_eos.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_dgxh100_eos.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_eos.json
rename to tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_dgxh100_eos.json

From 1d1ac739c69180d3c7410064748f1005f789154d Mon Sep 17 00:00:00 2001
From: Charlie Truong <chtruong@nvidia.com>
Date: Thu, 30 Oct 2025 18:57:52 -0500
Subject: [PATCH 087/334] cp: `Megatron-FSDP Expert Parallel (DeepSeek-v3)
 Support` into `dev` (#2007)

Signed-off-by: Charlie Truong <chtruong@nvidia.com>
Co-authored-by: Jack Chang <jianbinc@nvidia.com>
Co-authored-by: jianbinc <shjwudp@gmail.com>
Co-authored-by: xuwenc <xuwenc@nvidia.com>
---
 .../distributed/fsdp/mcore_fsdp_adapter.py    | 133 +++-
 megatron/core/distributed/fsdp/src/README.md  |  11 +
 .../fsdp/src/megatron_fsdp/fully_shard.py     |  10 +-
 .../fsdp/src/megatron_fsdp/megatron_fsdp.py   |  11 +-
 .../megatron_fsdp/param_and_grad_buffer.py    |  83 ++-
 .../fsdp/src/megatron_fsdp/uneven_dtensor.py  |   4 +-
 .../fsdp/src/megatron_fsdp/utils.py           | 130 +++-
 .../embeddings/yarn_rotary_pos_embedding.py   |  10 +-
 megatron/core/optimizer/__init__.py           |  23 +
 megatron/core/optimizer/distrib_optimizer.py  |   2 +
 .../transformer/fsdp_dtensor_checkpoint.py    | 336 ++++++++--
 megatron/training/arguments.py                |   4 +
 megatron/training/checkpointing.py            |  74 ++-
 megatron/training/training.py                 |   1 +
 .../golden_values_dev_dgxh100_coreweave.json  | 598 ++++++++---------
 .../golden_values_dev_dgxh100_coreweave.json  | 600 +++++++++---------
 .../golden_values_dev_dgxh100_eos.json        | 600 +++++++++---------
 .../golden_values_dev_dgxh100_coreweave.json  | 500 +++++++--------
 .../golden_values_dev_dgx_h100.json           | 143 ++++-
 .../golden_values_dev_dgxh100_coreweave.json  | 537 ++++++++++++++++
 .../model_config.yaml                         |   2 +-
 .../golden_values_dev_dgxh100_coreweave.json  | 478 +++++++-------
 .../golden_values_dev_dgxh100_eos.json        | 478 +++++++-------
 tests/test_utils/recipes/moe.yaml             |  15 +-
 tools/checkpoint/checkpoint_inspector.py      | 362 +++++++++--
 25 files changed, 3302 insertions(+), 1843 deletions(-)
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgxh100_coreweave.json

diff --git a/megatron/core/distributed/fsdp/mcore_fsdp_adapter.py b/megatron/core/distributed/fsdp/mcore_fsdp_adapter.py
index a7c0d5802ab..7432a7f9a36 100644
--- a/megatron/core/distributed/fsdp/mcore_fsdp_adapter.py
+++ b/megatron/core/distributed/fsdp/mcore_fsdp_adapter.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import logging
+import random
 from typing import List, Optional
 
 try:
@@ -22,6 +23,7 @@
 except ImportError:
     HAVE_EINOPS = False
 
+import numpy as np
 import torch
 import torch.distributed as dist
 
@@ -32,10 +34,11 @@
 except ImportError:
     HAVE_DTENSOR = False
 
-from megatron.core import parallel_state
+from megatron.core import parallel_state, tensor_parallel
 from megatron.core.config_logger import has_config_logger_enabled, log_config_to_disk
 from megatron.core.distributed.data_parallel_base import _BaseDataParallel
 from megatron.core.distributed.distributed_data_parallel_config import DistributedDataParallelConfig
+from megatron.core.extensions.transformer_engine import TELinear
 from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.transformer.transformer_config import TransformerConfig
 from megatron.core.transformer.transformer_layer import TransformerLayer
@@ -95,6 +98,8 @@ def __init__(
             else:
                 self.fsdp_unit_modules = []
 
+        self._fix_tensor_parallel_attributes(module)
+
         super().__init__(
             config=config,
             module=MegatronFSDP(
@@ -119,6 +124,8 @@ def __init__(
         self.module.state_dict_for_save_checkpoint = self.module.state_dict
         self.state_dict_for_save_checkpoint = self.state_dict
 
+        self.sync_rng_states_across_tp_group()
+
     def load_state_dict(self, state_dict, strict=True):
         """
         Load the state dictionary into the module.
@@ -141,6 +148,44 @@ def load_state_dict(self, state_dict, strict=True):
 
         self.module.load_state_dict(custom_state_dict, strict=strict)
 
+    def _fix_tensor_parallel_attributes(self, module):
+        is_expert_param = lambda n, p: ".experts." in n
+        is_router_param = lambda n, p: ".router.weight" in n
+
+        if parallel_state.get_tensor_model_parallel_group():
+            tp_size = parallel_state.get_tensor_model_parallel_group().size()
+        else:
+            tp_size = 1
+
+        if parallel_state.get_expert_tensor_parallel_group():
+            expt_tp_size = parallel_state.get_expert_tensor_parallel_group().size()
+        else:
+            expt_tp_size = 1
+
+        param_to_direct_module = {}
+        for name, m in module.named_modules():
+            for p in m.parameters(recurse=False):
+                param_to_direct_module[p] = (name, m)
+
+        for name, param in module.named_parameters():
+            if is_expert_param(name, param) and expt_tp_size > 1:
+                setattr(param, "_mcore_tp", True)
+                if "linear_fc1.weight" in name:
+                    setattr(param, "_tp_partition_dim", 0)
+                elif "linear_fc2.weight" in name:
+                    setattr(param, "_tp_partition_dim", 1)
+
+            if not is_expert_param(name, param) and tp_size > 1:
+                m_name, direct_module = param_to_direct_module[param]
+                if isinstance(direct_module, (TELinear,)):
+                    parallel_mode = getattr(direct_module, "parallel_mode", None)
+                    if parallel_mode is None:
+                        setattr(param, "_mcore_tp", True)
+                        setattr(param, "_tp_duplicated", True)
+                elif is_router_param(name, param):
+                    setattr(param, "_mcore_tp", True)
+                    setattr(param, "_tp_duplicated", True)
+
     def _init_dist_index(self, pg_collection):
         """
         Initialize the distributed index for the module.
@@ -154,6 +199,7 @@ def _init_dist_index(self, pg_collection):
         enable_hsdp = self.ddp_config.num_distributed_optimizer_instances > 1
         if pg_collection is None:
             tp_group = parallel_state.get_tensor_model_parallel_group()
+            expt_tp_group = parallel_state.get_expert_tensor_parallel_group()
             if enable_hsdp:
                 dp_cp_group = parallel_state.get_data_parallel_group(
                     with_context_parallel=True, partial_data_parallel=True
@@ -168,8 +214,11 @@ def _init_dist_index(self, pg_collection):
                 )
                 outer_fsdp_group = None
                 hybrid_fsdp_group = None
+                expt_dp_group = parallel_state.get_expert_data_parallel_group()
+                ep_group = parallel_state.get_expert_model_parallel_group()
         else:
             tp_group = getattr(pg_collection, 'tp', None)
+            expt_tp_group = getattr(pg_collection, 'expt_tp', None)
             if enable_hsdp:
                 dp_cp_group = pg_collection.intra_dp_cp
                 outer_fsdp_group = pg_collection.inter_dist_opt
@@ -178,11 +227,17 @@ def _init_dist_index(self, pg_collection):
                 dp_cp_group = pg_collection.dp_cp
                 outer_fsdp_group = None
                 hybrid_fsdp_group = None
+                expt_dp_group = getattr(pg_collection, 'expt_dp', None)
+                ep_group = getattr(pg_collection, 'ep', None)
 
         if tp_group is None:
             single_rank_group = dist.new_group(ranks=[dist.get_rank()])
             tp_group = single_rank_group
 
+        if expt_tp_group is None:
+            single_rank_group = dist.new_group(ranks=[dist.get_rank()])
+            expt_tp_group = single_rank_group
+
         if enable_hsdp:
             mesh = _get_hsdp_tp_mesh(outer_fsdp_group, dp_cp_group, tp_group)
             dist_index = FSDPDistributedIndex(
@@ -199,6 +254,17 @@ def _init_dist_index(self, pg_collection):
                 hybrid_fsdp_group=hybrid_fsdp_group,
             )
         else:
+            if ep_group is not None:
+                expt_mesh = _get_dp_tp_mesh(expt_dp_group, expt_tp_group, ep_size=ep_group.size())
+                expt_device_mesh = DeviceMesh.from_group(
+                    [expt_dp_group, expt_tp_group],
+                    device_type="cuda",
+                    mesh=expt_mesh.tolist(),
+                    mesh_dim_names=["dp_cp", "tp"],
+                )
+            else:
+                expt_device_mesh = None
+
             mesh = _get_dp_tp_mesh(dp_cp_group, tp_group)
             dist_index = FSDPDistributedIndex(
                 device_mesh=DeviceMesh.from_group(
@@ -209,8 +275,11 @@ def _init_dist_index(self, pg_collection):
                 ),
                 dp_shard_dim="dp_cp",
                 tp_dim="tp",
+                expt_device_mesh=expt_device_mesh,
             )
 
+        self.tp_group = tp_group
+
         return dist_index
 
     def stop_communication(self):
@@ -220,6 +289,20 @@ def stop_communication(self):
         self.module.synchronize_gradient_reduce()
         self.module.synchronize_param_gather()
 
+    def sync_rng_states_across_tp_group(self):
+        """
+        Synchronize the tensor parallel random number generator states.
+        """
+        if self.tp_group.size() <= 1:
+            return
+
+        if self.tp_group.rank() == 0:
+            broadcast_list = [_get_rng_state_dict()]
+        else:
+            broadcast_list = [None]
+        torch.distributed.broadcast_object_list(broadcast_list, group=self.tp_group, group_src=0)
+        _load_rng_state_dict(broadcast_list[0])
+
 
 def _get_hsdp_tp_mesh(outer_fsdp_dp_group, dp_cp_group, tp_group):
     assert HAVE_EINOPS, "einops is not installed. Please install it with `pip install einops`."
@@ -273,29 +356,46 @@ def _get_hsdp_tp_mesh(outer_fsdp_dp_group, dp_cp_group, tp_group):
     return mesh
 
 
-def _get_dp_tp_mesh(dp_cp_group, tp_group):
+def _get_dp_tp_mesh(dp_cp_group, tp_group, ep_size=1):
     assert HAVE_EINOPS, "einops is not installed. Please install it with `pip install einops`."
     world_size = dist.get_world_size()
 
     tp_size = dist.get_world_size(tp_group) if tp_group is not None else 1
-    # TODO: Supports configurable (dp, cp, tp) order.
-    mesh = einops.rearrange(torch.arange(world_size), "(dp_cp tp) -> dp_cp tp", tp=tp_size)
+    # TODO: Supports configurable (dp, cp, ep, tp) order.
+    mesh = einops.rearrange(
+        torch.arange(world_size),
+        "(dp_cp ep tp) -> ep dp_cp tp",
+        dp_cp=dp_cp_group.size(),
+        tp=tp_size,
+        ep=ep_size,
+    )
 
-    mesh_dp_ranks = einops.rearrange(mesh, 'dp_cp tp -> tp dp_cp', tp=tp_size)
+    mesh_dp_ranks = einops.rearrange(mesh, 'ep dp_cp tp -> (ep tp) dp_cp', dp_cp=dp_cp_group.size())
     dp_cp_group_ranks = dist.get_process_group_ranks(dp_cp_group)
     assert _check_mesh_ranks_and_group_ranks_are_consistent(mesh_dp_ranks, dp_cp_group_ranks), (
         f"[Megatron-FSDP] Data Parallel ranks in the mesh {mesh_dp_ranks} "
         f"do not match the ranks in the DP group {dp_cp_group_ranks}."
     )
 
-    mesh_tp_ranks = einops.rearrange(mesh, 'dp_cp tp -> (dp_cp) tp', tp=tp_size)
+    mesh_tp_ranks = einops.rearrange(mesh, 'ep dp_cp tp -> (dp_cp ep) tp', tp=tp_size)
     tp_group_ranks = dist.get_process_group_ranks(tp_group)
     assert _check_mesh_ranks_and_group_ranks_are_consistent(mesh_tp_ranks, tp_group_ranks), (
         f"[Megatron-FSDP] Tensor Parallel ranks in the mesh {mesh_tp_ranks} "
         f"do not match the ranks in the TP group {tp_group_ranks}."
     )
 
-    return mesh
+    # Exclude the expert parallel dimension
+    rank = dist.get_rank()
+    dp_tp_meshes = [per_ep_mesh for per_ep_mesh in mesh if rank in per_ep_mesh.reshape(-1).tolist()]
+    assert (
+        len(dp_tp_meshes) == 1
+    ), f"[Megatron-FSDP] Current rank {rank} is not unique in the mesh ranks {mesh.tolist()}."
+    assert len(dp_tp_meshes[0].reshape(-1).tolist()) == dp_cp_group.size() * tp_group.size(), (
+        f"[Megatron-FSDP] DP-TP mesh size {len(dp_tp_meshes[0].reshape(-1).tolist())} "
+        f"does not match expected size {dp_cp_group.size() * tp_group.size()}."
+    )
+
+    return dp_tp_meshes[0]
 
 
 def _check_mesh_ranks_and_group_ranks_are_consistent(mesh_ranks, group_ranks):
@@ -310,3 +410,22 @@ def _check_mesh_ranks_and_group_ranks_are_consistent(mesh_ranks, group_ranks):
         f"{mesh_ranks.tolist()} does not match the group ranks {group_ranks}."
     )
     return sorted(current_ranks[0]) == sorted(group_ranks)
+
+
+def _get_rng_state_dict():
+    rng_state_dict = {
+        'random_rng_state': random.getstate(),
+        'np_rng_state': np.random.get_state(),
+        'torch_rng_state': torch.get_rng_state(),
+        'cuda_rng_state': torch.cuda.get_rng_state(),
+        'rng_tracker_states': tensor_parallel.get_cuda_rng_tracker().get_states(),
+    }
+    return rng_state_dict
+
+
+def _load_rng_state_dict(rng_state_dict):
+    random.setstate(rng_state_dict['random_rng_state'])
+    np.random.set_state(rng_state_dict['np_rng_state'])
+    torch.set_rng_state(rng_state_dict['torch_rng_state'])
+    torch.cuda.set_rng_state(rng_state_dict['cuda_rng_state'])
+    tensor_parallel.get_cuda_rng_tracker().set_states(rng_state_dict['rng_tracker_states'])
diff --git a/megatron/core/distributed/fsdp/src/README.md b/megatron/core/distributed/fsdp/src/README.md
index d879c6c26f8..9e036f22f67 100644
--- a/megatron/core/distributed/fsdp/src/README.md
+++ b/megatron/core/distributed/fsdp/src/README.md
@@ -127,6 +127,12 @@ device_mesh[("dp_shard", "cp")]._flatten("dp_shard_cp")
 # Only required if using HSDP. Otherwise, don't pass hybrid_fsdp_group.
 device_mesh[("dp_outer", "dp_shard", "cp")]._flatten("hsdp")
 hsdp_group = device_mesh["hsdp"].get_group()
+# Initialize DeviceMesh for expert parallel (EP) modules when using FSDP + EP.
+expert_device_mesh = torch.distributed.device_mesh.init_device_mesh(
+    "cuda",
+    mesh_shape=(expt_dp_shard_size, expt_tp_size),
+    mesh_dim_names=("dp_shard", "tp"),
+)
 
 # Fully-shards your model and distributes your optimizer.
 model, optimizer = fully_shard(
@@ -145,6 +151,8 @@ model, optimizer = fully_shard(
     tp_dim="tp",
     # Only required when using HSDP. Otherwise, set this to None.
     hybrid_fsdp_group=hsdp_group,
+    # Only required for FSDP + EP. Otherwise, set this to None.
+    expt_device_mesh=expt_device_mesh,
     # FSDP Sharding Strategy: no_shard (0) / optim (1) / optim_grads (2) / optim_grads_params (3)
     zero_dp_strategy=3,
     outer_dp_sharding_strategy=1,
@@ -192,6 +200,9 @@ optimizer.load_state_dict(ckpt_state_dict["optimizer"])
   - `tp_dim` is the name of the sub-mesh used for tensor parallelism (TP), which is required for `(FSDP, TP)`-strided sharding when using Megatron-LM or Torch-native `DTensor` TP.
     - For more information about tensor parallelism, refer to: [Megatron-LM: Training Multi-Billion Parameter Language Models Using Model Parallelism](https://arxiv.org/abs/1909.08053).
   - `hybrid_fsdp_group` is the `ProcessGroup` which contains all ranks in the flattened `dp_shard_dim` and `dp_outer_dim` sub-meshes utilized to specify the `(DP-Outer, DP-Shard)` sharded coordinate system for the weight and gradient buffers. Required for HSDP.
+- `expt_device_mesh` is another [`torch.distributed.DeviceMesh`](https://docs.pytorch.org/docs/stable/distributed.html#devicemesh) tailored for the expert parallel (EP) modules in `MegatronFSDP`.
+  - `dp_shard_dim` is the name of the sub-mesh required for FSDP sharding of the EP modules, enabling expert data parallelism (EDP).
+  - `tp_dim` is the name of the sub-mesh used for expert tensor parallelism (ETP), which is required for `(FSDP, ETP)`-strided sharding when using Megatron-LM or Torch-native `DTensor` ETP.
 - `init_model_with_meta_device` has `MegatronFSDP` initialize your `meta`-device model in shards on every CUDA device to avoid OOM when initializing extremely large models that cannot fit on a single device. Users can initialize their model on a [`meta`-device](https://docs.pytorch.org/docs/stable/meta.html) (`with torch.device('meta'): ...`), and ``MegatronFSDP`` will further shard and initialize the model parameters layer-by-layer adhering to the customizable `module.reset_parameters` method, which prevents the entire model from being allocated in memory at any point during runtime.
     - Defaults to `False`.
     - Note that the `device` argument which installs your model on a specific device or rank will be deactivated when `init_model_with_meta_device=True`.
diff --git a/megatron/core/distributed/fsdp/src/megatron_fsdp/fully_shard.py b/megatron/core/distributed/fsdp/src/megatron_fsdp/fully_shard.py
index 24e86cede72..e98362a1a03 100644
--- a/megatron/core/distributed/fsdp/src/megatron_fsdp/fully_shard.py
+++ b/megatron/core/distributed/fsdp/src/megatron_fsdp/fully_shard.py
@@ -64,6 +64,7 @@ def fully_shard_model(
     dp_outer_dim: Optional[str] = None,
     tp_dim: Optional[str] = None,
     hybrid_fsdp_group: Optional[torch.distributed.ProcessGroup] = None,
+    expt_device_mesh: Optional[DeviceMesh] = None,
     fsdp_unit_modules: Optional[Sequence[Type[torch.nn.Module]] | Sequence[str]] = None,
     zero_dp_strategy: str | int = 3,
     outer_dp_sharding_strategy: str | int = 0,
@@ -183,8 +184,10 @@ def fully_shard_model(
         tp_dim=tp_dim,
         # Only required for HSDP.
         hybrid_fsdp_group=hybrid_fsdp_group,
-        # Access to flattened DP rank assignments for HFSDP.
+        # Access to flattened DP rank assignments for HSDP.
         hsdp_outer_dp_shard=_outer_fsdp_sharding,
+        # Only required for Megatron-FSDP + EP.
+        expt_device_mesh=expt_device_mesh,
     )
 
     # Wrap model in Megatron FSDP.
@@ -330,6 +333,7 @@ def fully_shard(
     dp_outer_dim: Optional[str] = None,
     tp_dim: Optional[str] = None,
     hybrid_fsdp_group: Optional[torch.distributed.ProcessGroup] = None,
+    expt_device_mesh: Optional[DeviceMesh] = None,
     fsdp_unit_modules: Optional[Sequence[Type[torch.nn.Module]] | Sequence[str]] = None,
     zero_dp_strategy: str | int = 3,
     outer_dp_sharding_strategy: str | int = 0,
@@ -391,6 +395,9 @@ def fully_shard(
             by flattening the outer-FSDP (dp_outer_dim) and FSDP (dp_shard_dim) process groups
             or sub-meshes. Defaults to None. Required for HSDP, i.e. if dp_outer_dim is not None.
 
+        expt_device_mesh (Optional[DeviceMesh]):
+            Expert parallel device mesh object defining the topology for MoE distributed training.
+
         fsdp_unit_modules (Optional[Sequence[Type[torch.nn.Module]] | Sequence[str]]):
             List of (sub-)module classes or (sub-)module class import paths that are "units",
             which are torch.nn.Module(s) that are sharded and scheduled by Megatron-FSDP.
@@ -503,6 +510,7 @@ def fully_shard(
         dp_outer_dim=dp_outer_dim,
         tp_dim=tp_dim,
         hybrid_fsdp_group=hybrid_fsdp_group,
+        expt_device_mesh=expt_device_mesh,
         fsdp_unit_modules=fsdp_unit_modules,
         zero_dp_strategy=zero_dp_strategy,
         outer_dp_sharding_strategy=outer_dp_sharding_strategy,
diff --git a/megatron/core/distributed/fsdp/src/megatron_fsdp/megatron_fsdp.py b/megatron/core/distributed/fsdp/src/megatron_fsdp/megatron_fsdp.py
index 10a8ae14d65..d6ef5f6210e 100644
--- a/megatron/core/distributed/fsdp/src/megatron_fsdp/megatron_fsdp.py
+++ b/megatron/core/distributed/fsdp/src/megatron_fsdp/megatron_fsdp.py
@@ -235,7 +235,10 @@ def __init__(
         self.dist_index = dist_index
 
         # If Megatron Expert Parallelism is enabled, you need to provide an expt_dp_group.
-        if has_expert_parameters and self.dist_index.get_expert_dp_group() is None:
+        if (
+            has_expert_parameters
+            and self.dist_index.get_fsdp_group(is_expert_parallel=True) is None
+        ):
             raise ValueError(
                 "[Megatron-FSDP] Megatron Expert Parallelism is enabled, but no expt_dp_group is"
                 "provided."
@@ -353,9 +356,7 @@ def _init_fsdp_param_and_grad_buffer(self):
         )
 
         # Set the suggested communication unit size for reduce-scatter and all-gather pipelines.
-        suggested_communication_unit_size = (
-            self.ddp_config.suggested_communication_unit_size or 1_000_000_000
-        )
+        suggested_communication_unit_size = self.ddp_config.suggested_communication_unit_size
         if suggested_communication_unit_size is None:
             if self.data_parallel_sharding_strategy == "optim_grads_params":
                 total_param_elements = 0
@@ -370,6 +371,8 @@ def _init_fsdp_param_and_grad_buffer(self):
                 suggested_communication_unit_size = total_param_elements // total_fsdp_module * 2
             elif self.bucket_size is not None:
                 suggested_communication_unit_size = self.bucket_size
+            else:
+                suggested_communication_unit_size = 1_000_000_000
 
             # Cap to 1B elements.
             suggested_communication_unit_size = max(
diff --git a/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py b/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py
index c8116150d52..bdf480d867b 100644
--- a/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py
+++ b/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py
@@ -34,7 +34,14 @@
 from torch.distributed.tensor.device_mesh import _mesh_resources
 
 from .uneven_dtensor import update_uneven_dtensor_chunk_metadata, validate_uneven_dtensor
-from .utils import _MODEL_PARALLEL_RNG_TRACKER_NAME, FSDPDistributedIndex, get_global_memory_buffer
+from .utils import (
+    _MODEL_PARALLEL_RNG_TRACKER_NAME,
+    FSDPDistributedIndex,
+    get_global_memory_buffer,
+    get_mcore_tensor_parallel_partition_dim,
+    is_mcore_tensor_model_parallel,
+    is_mcore_tensor_parallel_duplicated,
+)
 
 logger = logging.getLogger(__name__)
 
@@ -1299,7 +1306,7 @@ def _does_param_require_new_bucket(param):
             and policy.data_parallel_sharding_strategy != "no_shard"
         )
 
-    is_expert_parameter = lambda p: not getattr(p, "allreduce", True)
+    is_expert_parameter = lambda n, p: ".experts." in n
 
     # Step 1: Group the parameters according to their execution order and attributes.
     # FSDP unit module parameters are split into multiple parameter sub-groups.
@@ -1313,7 +1320,7 @@ def _does_param_require_new_bucket(param):
                 if is_float8tensor(param) or meta_device_init_fp8_params.get(name, False)
                 else param.dtype
             ),
-            is_expert_param=is_expert_parameter(param),
+            is_expert_param=is_expert_parameter(name, param),
             requires_grad=param.requires_grad,
             fsdp_unit_id=None,
         )
@@ -2257,6 +2264,10 @@ def _reset_parameters(self, old_params, new_params):
             self.param_to_direct_module[new_param] = self.param_to_direct_module[old_param]
             del self.param_to_direct_module[old_param]
 
+            for tp_attr in ["_mcore_tp", "_tp_partition_dim", "_tp_duplicated"]:
+                if getattr(old_param, tp_attr, None) is not None:
+                    setattr(new_param, tp_attr, getattr(old_param, tp_attr))
+
         for item_id, p in enumerate(self.params):
             if p in param_map:
                 new_p = param_map[p]
@@ -2340,6 +2351,7 @@ def _init_distributed_params(self):
                         is_expert_param=pg.is_expert_param,
                         run_check=True,
                         update_uneven_dtensor_chunk_meta=True,
+                        force_sync_tp_duplicated_param=True,
                     )
                     dist_main_weight[param_name] = dist_param
                 elif wbuf:
@@ -2351,6 +2363,7 @@ def _init_distributed_params(self):
                         is_expert_param=pg.is_expert_param,
                         run_check=True,
                         update_uneven_dtensor_chunk_meta=True,
+                        force_sync_tp_duplicated_param=True,
                     )
                     dist_main_weight[param_name] = dist_param
                 else:
@@ -2365,6 +2378,7 @@ def _init_distributed_params(self):
                         is_expert_param=pg.is_expert_param,
                         run_check=True,
                         update_uneven_dtensor_chunk_meta=False,
+                        force_sync_tp_duplicated_param=True,
                     )
                     dist_main_weight[param_name] = dist_param
 
@@ -2399,6 +2413,9 @@ def set_param_attribute():
                             "partition_dim",
                             "partition_stride",
                             "is_embedding_or_output_parameter",
+                            "_mcore_tp",
+                            "_tp_duplicated",
+                            "_tp_partition_dim",
                         ]:
                             if hasattr(orig_param, attr_name):
                                 setattr(param, attr_name, getattr(orig_param, attr_name))
@@ -3546,7 +3563,9 @@ def to_local_if_dtensor(tensor):
     return tensor
 
 
-def _get_fsdp_tensor_spec(param, dist_index: FSDPDistributedIndex, is_sharded_param):
+def _get_fsdp_tensor_spec(
+    param, dist_index: FSDPDistributedIndex, is_sharded_param, is_expert_param
+):
     """
     Get the DeviceMesh for the parameter and modify the placement for Megatron-FSDP.
     """
@@ -3557,7 +3576,7 @@ def _get_fsdp_tensor_spec(param, dist_index: FSDPDistributedIndex, is_sharded_pa
         dtensor_mesh = getattr(dtensor_spec, "mesh", None)
 
         # Validate that the DTensor root mesh is identical to the Megatron-FSDP device mesh.
-        megatron_fsdp_global_mesh = dist_index.get_root_mesh()
+        megatron_fsdp_global_mesh = dist_index.get_root_mesh(is_expert_parallel=is_expert_param)
         dtensor_global_mesh = _mesh_resources.get_root_mesh(dtensor_mesh)
         # FIXME(boxiangw): add or megatron_fsdp_global_mesh != dtensor_global_mesh:
         # _mesh_resources.get_root_mesh(dtensor_mesh) is not getting the correct root mesh
@@ -3602,7 +3621,7 @@ def _get_fsdp_tensor_spec(param, dist_index: FSDPDistributedIndex, is_sharded_pa
             placements = [Shard(0), dtensor_placement]
             shard_order = [1, 0]
 
-        device_mesh = dist_index.get_submesh(mesh_dim_names)
+        device_mesh = dist_index.get_submesh(mesh_dim_names, is_expert_parallel=is_expert_param)
         if shard_order is not None:
             setattr(device_mesh, "_shard_order", shard_order)
 
@@ -3627,7 +3646,7 @@ def _get_fsdp_tensor_spec(param, dist_index: FSDPDistributedIndex, is_sharded_pa
     else:
         placements = [Shard(0)]
 
-    device_mesh = dist_index.get_submesh(mesh_dim_names)
+    device_mesh = dist_index.get_submesh(mesh_dim_names, is_expert_parallel=is_expert_param)
     if shard_order is not None:
         setattr(device_mesh, "_shard_order", shard_order)
 
@@ -3642,6 +3661,7 @@ def make_fsdp_dtensor(
     is_expert_param: bool = False,
     run_check: bool = False,
     update_uneven_dtensor_chunk_meta: bool = False,
+    force_sync_tp_duplicated_param: bool = False,
 ):
     """
     Creates a distributed tensor (DTensor) from a local tensor with support for
@@ -3720,38 +3740,39 @@ def make_fsdp_dtensor(
     orig_param = param
 
     # Handle tensor model parallel specific logic
-    if getattr(param, "tensor_model_parallel", False):
+    if is_mcore_tensor_model_parallel(param):
         # Ensure parameter is not already a DTensor
         assert not isinstance(param, DTensor), (
-            "[Megatron-FSDP] Parameter is already a DTensor, yet tensor_model_parallel "
-            "is True. Check usage."
+            "[Megatron-FSDP] Parameter is already a DTensor, yet tensor_model_parallel " "is True."
         )
 
-        # Validate M-Core TP attributes
-        assert hasattr(
-            param, "partition_dim"
-        ), "[Megatron-FSDP] tensor_model_parallel param missing 'partition_dim'."
-        assert hasattr(
-            param, "partition_stride"
-        ), "[Megatron-FSDP] tensor_model_parallel param missing 'partition_stride'."
-        assert (
-            param.partition_stride == 1
-        ), "[Megatron-FSDP] Only partition_stride=1 is currently supported for "
-        "tensor_model_parallel."
-
-        tp_dim = param.partition_dim
-        tp_mesh = dist_index.get_submesh(dist_index.tp_dim)
-
-        # Adjust shape for global dimension
+        tp_mesh = dist_index.get_submesh(dist_index.tp_dim, is_expert_parallel=is_expert_param)
+        global_shape = list(param.shape)
         if tp_mesh.mesh.numel() > 1:
-            global_shape = list(param.shape)
-            global_shape[tp_dim] *= tp_mesh.mesh.numel()
+            if is_mcore_tensor_parallel_duplicated(param):
+                placements = [Replicate()]
+                if force_sync_tp_duplicated_param:
+                    if local_tensor.numel() > 0:
+                        torch.distributed.broadcast(
+                            local_tensor, group=tp_mesh.get_group(), group_src=0
+                        )
+                elif run_check:
+                    # TODO: Implement consistency check for duplicated TP parameters
+                    pass
+            else:
+                tp_dim = get_mcore_tensor_parallel_partition_dim(param)
+                assert tp_dim is not None, (
+                    "[Megatron-FSDP] Parameter is not tensor model parallel, "
+                    "yet tensor_model_parallel is True."
+                )
+                placements = [Shard(tp_dim)]
+                global_shape[tp_dim] *= tp_mesh.mesh.numel()
 
             # Construct TP-sharded DTensor using Megatron-style placement
             param = DTensor.from_local(
-                local_tensor=param,
+                local_tensor=local_tensor,
                 device_mesh=tp_mesh,
-                placements=[Shard(tp_dim)],
+                placements=placements,
                 run_check=run_check,
                 shape=global_shape,
                 stride=torch.empty(global_shape).stride(),
@@ -3759,7 +3780,7 @@ def make_fsdp_dtensor(
 
     # Get FSDP-configured mesh and placements from provided param
     device_mesh, placements = _get_fsdp_tensor_spec(
-        param, dist_index, is_sharded_param=is_sharded_param
+        param, dist_index, is_sharded_param=is_sharded_param, is_expert_param=is_expert_param
     )
 
     # Reshape local tensor for sharded layouts beyond 1D
diff --git a/megatron/core/distributed/fsdp/src/megatron_fsdp/uneven_dtensor.py b/megatron/core/distributed/fsdp/src/megatron_fsdp/uneven_dtensor.py
index 523d8fae333..490d80c0f21 100644
--- a/megatron/core/distributed/fsdp/src/megatron_fsdp/uneven_dtensor.py
+++ b/megatron/core/distributed/fsdp/src/megatron_fsdp/uneven_dtensor.py
@@ -365,7 +365,9 @@ def _assemble_full_tensor_from_uneven_chunks(
 
     # Wrap into a replicated DTensor and return
     return DTensor.from_local(
-        full_tensor, placements=[Replicate()], device_mesh=dtensor.device_mesh
+        full_tensor,
+        placements=[Replicate()] * len(dtensor.placements),
+        device_mesh=dtensor.device_mesh,
     )
 
 
diff --git a/megatron/core/distributed/fsdp/src/megatron_fsdp/utils.py b/megatron/core/distributed/fsdp/src/megatron_fsdp/utils.py
index 1dfe08b90f4..b94a332bb0d 100644
--- a/megatron/core/distributed/fsdp/src/megatron_fsdp/utils.py
+++ b/megatron/core/distributed/fsdp/src/megatron_fsdp/utils.py
@@ -675,6 +675,7 @@ def __init__(
         tp_dim: Optional[str] = None,
         hybrid_fsdp_group: Optional[torch.distributed.ProcessGroup] = None,
         hsdp_outer_dp_shard: bool = False,
+        expt_device_mesh: Optional[DeviceMesh] = None,
     ):
         """
         Args:
@@ -691,6 +692,8 @@ def __init__(
                 in hybrid FSDP. Specifying outer sharding will lift the bucket sharding
                 coordinate system to flattened ranks of (dp_shard, dp_outer) instead of
                 just sharding across dp_shard ranks and replicating across dp_outer ranks.
+            expt_device_mesh (Optional[DeviceMesh]): The expert parallel device mesh
+                to use for the DistributedIndex.
         """
         # Device mesh arguments.
         self.device_mesh = device_mesh
@@ -701,6 +704,11 @@ def __init__(
         self.use_hybrid_fsdp = dp_outer_dim is not None
         # Helper flag to denote if we are outer-sharding in hybrid FSDP.
         self.hsdp_outer_dp_shard = hsdp_outer_dp_shard
+        self.expt_device_mesh = expt_device_mesh
+
+        # Handling the situation where M-Core MoE EP=1
+        if self.expt_device_mesh is None:
+            self.expt_device_mesh = device_mesh
 
         # Hybrid FSDP Process Groups
         # Retrieve the FSDP process group from the DeviceMesh.
@@ -719,6 +727,14 @@ def __init__(
         # combination of the outer-FSDP and FSDP process groups.
         self.hybrid_fsdp_group = hybrid_fsdp_group
 
+        # Retrieve the expert parallel process groups from the DeviceMesh.
+        self.expt_fsdp_group = (
+            self.expt_device_mesh[self.dp_shard_dim].get_group()
+            if self.expt_device_mesh is not None
+            and contains_submesh(self.expt_device_mesh, self.dp_shard_dim)
+            else None
+        )
+
         """
         Store a persistent reference to the core device meshes that back Megatron-FSDP.
         This is necessary because _MeshEnv (_mesh_resources) may not persist:
@@ -732,26 +748,33 @@ def __init__(
         FIXME(@cspades): Identify the root cause of this behavior.
         """
         self.mesh_library = {}
-        # TP Mesh
+
+        def register_submesh(device_mesh, submesh, is_expert_parallel):
+            """Register a submesh with identifier: (*submesh, is_expert_parallel)
+            in the mesh library."""
+            if contains_submesh(device_mesh, submesh):
+                submesh_identifier = tuple(list(submesh) + [is_expert_parallel])
+                self.mesh_library[submesh_identifier] = device_mesh[submesh]
+
+        # Define common submesh patterns
         tp_submesh = (self.tp_dim,)
-        if contains_submesh(self.device_mesh, tp_submesh):
-            self.mesh_library[tp_submesh] = self.device_mesh[tp_submesh]
-        # HSDP-TP Mesh
         hsdp_tp_submesh = (self.dp_outer_dim, self.dp_shard_dim, self.tp_dim)
-        if contains_submesh(self.device_mesh, hsdp_tp_submesh):
-            self.mesh_library[hsdp_tp_submesh] = self.device_mesh[hsdp_tp_submesh]
-        # FSDP-TP Mesh
         fsdp_tp_submesh = (self.dp_shard_dim, self.tp_dim)
-        if contains_submesh(self.device_mesh, fsdp_tp_submesh):
-            self.mesh_library[fsdp_tp_submesh] = self.device_mesh[fsdp_tp_submesh]
-        # HSDP Mesh
         hsdp_submesh = (self.dp_outer_dim, self.dp_shard_dim)
-        if contains_submesh(self.device_mesh, hsdp_submesh):
-            self.mesh_library[hsdp_submesh] = self.device_mesh[hsdp_submesh]
-        # FSDP Mesh
         fsdp_submesh = (self.dp_shard_dim,)
-        if contains_submesh(self.device_mesh, fsdp_submesh):
-            self.mesh_library[fsdp_submesh] = self.device_mesh[fsdp_submesh]
+
+        # Register non-EP submeshes
+        register_submesh(self.device_mesh, tp_submesh, False)
+        register_submesh(self.device_mesh, hsdp_tp_submesh, False)
+        register_submesh(self.device_mesh, fsdp_tp_submesh, False)
+        register_submesh(self.device_mesh, hsdp_submesh, False)
+        register_submesh(self.device_mesh, fsdp_submesh, False)
+
+        # Register EP submeshes
+        if self.expt_device_mesh is not None:
+            register_submesh(self.expt_device_mesh, tp_submesh, True)
+            register_submesh(self.expt_device_mesh, fsdp_tp_submesh, True)
+            register_submesh(self.expt_device_mesh, fsdp_submesh, True)
 
         # Validate FSDP arguments.
         if self.fsdp_group is None:
@@ -776,36 +799,54 @@ def __init__(
                     "process groups or sub-meshes."
                 )
 
-    def get_submesh(self, mesh_dim_names: str | Sequence[str]) -> DeviceMesh:
+    def get_submesh(
+        self, mesh_dim_names: str | Sequence[str], is_expert_parallel: bool = False
+    ) -> DeviceMesh:
         """
-        Retrieve an Megatron-FSDP-registered sub-mesh by name(s).
+        Retrieve an Megatron-FSDP-registered submesh by name(s).
         """
         if isinstance(mesh_dim_names, str):
             mesh_dim_names = (mesh_dim_names,)
-        # Search for the sub-mesh in the mesh library.
-        device_submesh = self.mesh_library.get(tuple(mesh_dim_names), None)
+
+        # Construct submesh identifier: (*mesh_dim_names, is_expert_parallel)
+        submesh_identifier = tuple(list(mesh_dim_names) + [is_expert_parallel])
+
+        # Retrieve the submesh from the mesh library
+        device_submesh = self.mesh_library.get(submesh_identifier, None)
+
         if device_submesh is None:
-            if self.tp_dim is None:
-                # Warn about not specifying tp_dim for
-                # layers or frameworks that depend on this.
+            # Warn about not specifying tp_dim for layers or frameworks that depend on this.
+            if self.tp_dim is None and not is_expert_parallel:
                 logger.warning(
-                    "[FSDPDistributedIndex] Note: For TransformerEngine, or other machine learning "
-                    "frameworks like Megatron that assume TP=1, you must specify tp_dim to use "
-                    "Megatron-FSDP. Create a trivial TP dimension by setting the TP dimension size "
+                    "[FSDPDistributedIndex] Note: For TransformerEngine, or "
+                    "other machine learning frameworks like Megatron that assume "
+                    "TP=1, you must specify tp_dim to use Megatron-FSDP. "
+                    "Create a trivial TP dimension by setting the TP dimension size "
                     "to 1 in the DeviceMesh.\n"
                     f"DeviceMesh: {self.device_mesh}"
                 )
+            elif self.tp_dim is None and is_expert_parallel:
+                logger.warning(
+                    "[FSDPDistributedIndex] Note: For TransformerEngine, or "
+                    "other machine learning frameworks like Megatron that assume "
+                    "ETP=1, you must specify tp_dim to use Megatron-FSDP. "
+                    "Create a trivial ETP dimension by setting the ETP dimension size "
+                    "to 1 in the DeviceMesh.\n"
+                    f"DeviceMesh: {self.expt_device_mesh}"
+                )
+
             raise ValueError(
-                f"[FSDPDistributedIndex][get_submesh] No sub-mesh with "
-                f"mesh_dim_names={mesh_dim_names} has been registered with Megatron-FSDP."
+                f"[FSDPDistributedIndex][get_submesh] No submesh with "
+                f"mesh_dim_names={mesh_dim_names}, is_expert_parallel={is_expert_parallel} "
+                f"has been registered with Megatron-FSDP."
             )
+
         return device_submesh
 
     def get_dp_group(self, is_expert_parallel: bool = False) -> ProcessGroup:
         """Get the data parallel process group."""
         if is_expert_parallel:
-            # Expert parallel is not supported
-            return None
+            return self.expt_fsdp_group
         if self.use_hybrid_fsdp:
             return self.hybrid_fsdp_group
         return self.fsdp_group
@@ -813,8 +854,7 @@ def get_dp_group(self, is_expert_parallel: bool = False) -> ProcessGroup:
     def get_fsdp_group(self, is_expert_parallel: bool = False) -> ProcessGroup:
         """Get the FSDP process group."""
         if is_expert_parallel:
-            # Expert parallel is not supported
-            return None
+            return self.expt_fsdp_group
         return self.fsdp_group
 
     def get_outer_fsdp_group(self) -> ProcessGroup:
@@ -826,7 +866,7 @@ def get_outer_fsdp_group(self) -> ProcessGroup:
     def get_root_mesh(self, is_expert_parallel: bool = False) -> DeviceMesh:
         """Get the device mesh."""
         if is_expert_parallel:
-            raise NotImplementedError("Expert parallel is not supported in Megatron-FSDP.")
+            return self.expt_device_mesh
         return self.device_mesh
 
     def get_logical_hybrid_fsdp_rank(self):
@@ -924,3 +964,29 @@ def create_updated_function_signature(original_function, **extended_kwargs: dict
 
     # Return the updated function signature.
     return inspect.Signature(params)
+
+
+def is_mcore_tensor_model_parallel(param: torch.Tensor) -> bool:
+    """
+    Check if the given parameter is Megatron-Core tensor model parallel.
+    """
+    return getattr(param, "_mcore_tp", False) or getattr(param, "tensor_model_parallel", False)
+
+
+def is_mcore_tensor_parallel_duplicated(param: torch.Tensor) -> bool:
+    """
+    Check if the given parameter is Megatron-Core tensor model parallel and duplicated.
+    """
+    return getattr(param, "_tp_duplicated", False)
+
+
+def get_mcore_tensor_parallel_partition_dim(param: torch.Tensor) -> Optional[int]:
+    """
+    Get the partition dimension for a Megatron-Core tensor model parallel parameter.
+    """
+    if is_mcore_tensor_model_parallel(param):
+        if hasattr(param, "_tp_partition_dim"):
+            return param._tp_partition_dim
+        else:
+            return param.partition_dim
+    return None
diff --git a/megatron/core/models/common/embeddings/yarn_rotary_pos_embedding.py b/megatron/core/models/common/embeddings/yarn_rotary_pos_embedding.py
index 507472f789f..455a7757d28 100644
--- a/megatron/core/models/common/embeddings/yarn_rotary_pos_embedding.py
+++ b/megatron/core/models/common/embeddings/yarn_rotary_pos_embedding.py
@@ -130,9 +130,9 @@ def forward(self, max_seq_len: int, offset: int = 0, packed_seq: bool = False) -
             self.original_max_position_embeddings,
             self.correction_range_round_to_int,
         )
-        inv_freq_mask = 1.0 - _yarn_linear_ramp_mask(low, high, self.dim // 2).to(
-            device=self.inv_freq_extra.device, dtype=torch.float32
-        )
+        inv_freq_mask = 1.0 - _yarn_linear_ramp_mask(
+            low, high, self.dim // 2, device=self.inv_freq_extra.device
+        ).to(dtype=torch.float32)
         inv_freq = self.inv_freq_inter * (1 - inv_freq_mask) + self.inv_freq_extra * inv_freq_mask
 
         seq = (
@@ -211,11 +211,11 @@ def _yarn_find_correction_range(
     return max(low, 0), min(high, dim - 1)  # Clamp values just in case
 
 
-def _yarn_linear_ramp_mask(min: float, max: float, dim: int) -> Tensor:
+def _yarn_linear_ramp_mask(min: float, max: float, dim: int, device: torch.device) -> Tensor:
     if min == max:
         max += 0.001  # Prevent singularity
 
-    linear_func = (torch.arange(dim, dtype=torch.float32) - min) / (max - min)
+    linear_func = (torch.arange(dim, dtype=torch.float32, device=device) - min) / (max - min)
     ramp_func = torch.clamp(linear_func, 0, 1)
     return ramp_func
 
diff --git a/megatron/core/optimizer/__init__.py b/megatron/core/optimizer/__init__.py
index 307538fad22..c254b2f6882 100644
--- a/megatron/core/optimizer/__init__.py
+++ b/megatron/core/optimizer/__init__.py
@@ -34,6 +34,7 @@
 from megatron.core import parallel_state
 from megatron.core.optimizer.cpu_offloading.hybrid_optimizer import HybridDeviceOptimizer
 from megatron.core.process_groups_config import ProcessGroupCollection
+from megatron.core.transformer.fsdp_dtensor_checkpoint import get_global_unique_param_name
 
 from ..distributed.param_and_grad_buffer import _ParamAndGradBuffer
 from ..transformer.module import MegatronModule
@@ -481,6 +482,7 @@ def get_megatron_optimizer(
     use_gloo_process_groups: bool = True,
     default_skip_embedding_weight_decay: bool = False,
     pg_collection: Optional[ProcessGroupCollection] = None,
+    dump_param_to_param_group_map: Optional[str] = None,
 ) -> MegatronOptimizer:
     """Retrieve the Megatron optimizer for model chunks.
 
@@ -502,6 +504,7 @@ def get_megatron_optimizer(
             This is useful if you do not want embeddings to shrink to zero in training
             as recommended in https://arxiv.org/abs/2312.16903
         pg_collection: Optional unified process group for distributed training.
+        dump_param_to_param_group_map (Optional[str]): path to dump parameter to param group map.
 
     Returns:
         Instance of MegatronOptimizer.
@@ -579,6 +582,9 @@ def get_megatron_optimizer(
 
         return ChainedOptimizer(optimizers)
 
+    if dump_param_to_param_group_map is not None:
+        param_to_param_group = {}
+        param_group_id = 0
     for dense_model_chunks, overlap_param_gather_with_optimizer_step in zip(
         all_dense_model_chunks, overlap_param_gather_with_optimizer_step_flags
     ):
@@ -597,6 +603,12 @@ def get_megatron_optimizer(
             model_chunk.overlap_param_gather_with_optimizer_step = (
                 overlap_param_gather_with_optimizer_step
             )
+        if dump_param_to_param_group_map is not None:
+            for param_group in param_groups:
+                for param in param_group["params"]:
+                    param_name = get_global_unique_param_name(model_chunks, param)
+                    param_to_param_group[param_name] = param_group_id
+                param_group_id += 1
 
         # Pass Gloo process groups into optimizer only if needed.
         optimizers.append(
@@ -626,6 +638,12 @@ def get_megatron_optimizer(
         buffer_name='expert_parallel_buffers',
         default_skip_embedding_weight_decay=default_skip_embedding_weight_decay,
     )
+    if dump_param_to_param_group_map is not None:
+        for param_group in moe_param_groups:
+            for param in param_group["params"]:
+                param_name = get_global_unique_param_name(model_chunks, param)
+                param_to_param_group[param_name] = param_group_id
+            param_group_id += 1
     if len(moe_param_groups) > 0:
         expt_model_parallel_rank = get_pg_rank(expt_tp_pp_group)
         # Pass Gloo process groups into optimizer only if needed.
@@ -648,4 +666,9 @@ def get_megatron_optimizer(
             )
         )
 
+    if dump_param_to_param_group_map is not None:
+        torch.distributed.checkpoint.save(
+            state_dict=param_to_param_group, checkpoint_id=dump_param_to_param_group_map
+        )
+
     return ChainedOptimizer(optimizers)
diff --git a/megatron/core/optimizer/distrib_optimizer.py b/megatron/core/optimizer/distrib_optimizer.py
index 2925edcce60..8b4740516e2 100644
--- a/megatron/core/optimizer/distrib_optimizer.py
+++ b/megatron/core/optimizer/distrib_optimizer.py
@@ -47,6 +47,7 @@
 from ..dist_checkpointing.utils import extract_sharded_tensors_and_factories
 from ..distributed.param_and_grad_buffer import _ParamAndGradBuffer, partition_buckets
 from ..fp8_utils import dequantize_fp8_tensor, is_float8tensor, quantize_param_shard
+from ..transformer.fsdp_dtensor_checkpoint import handle_experts_in_state_dict
 from ..transformer.module import MegatronModule
 from .grad_scaler import MegatronGradScaler
 from .optimizer import MixedPrecisionOptimizer, _zero_grad_group_helper, param_group_identifier_keys
@@ -1152,6 +1153,7 @@ def _param_name(self, param: torch.nn.Parameter) -> str:
                         "Ensure that each model chunk has unique parameter names."
                     )
                 name_to_param.update(_name_to_param)
+            name_to_param = handle_experts_in_state_dict(name_to_param)
             self.param_to_name = {param: name for name, param in name_to_param.items()}
         assert (
             param in self.param_to_name
diff --git a/megatron/core/transformer/fsdp_dtensor_checkpoint.py b/megatron/core/transformer/fsdp_dtensor_checkpoint.py
index dad1947a183..9ef3f1f1b82 100644
--- a/megatron/core/transformer/fsdp_dtensor_checkpoint.py
+++ b/megatron/core/transformer/fsdp_dtensor_checkpoint.py
@@ -12,18 +12,160 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import logging
+import re
+
 import torch
+import torch.distributed as dist
+from torch.distributed.checkpoint import default_planner
+
+logger = logging.getLogger(__name__)
 
 try:
+    from torch.distributed import DeviceMesh
+    from torch.distributed._tensor import DTensor
+    from torch.distributed.checkpoint.metadata import TensorStorageMetadata
+    from torch.distributed.tensor.placement_types import Replicate, Shard
+
     from megatron.core.distributed.fsdp.src.megatron_fsdp.param_and_grad_buffer import (
         make_fsdp_dtensor,
     )
+    from megatron.core.distributed.fsdp.src.megatron_fsdp.uneven_dtensor import (
+        gather_uneven_dtensor_to_full_tensor,
+    )
+    from megatron.core.distributed.fsdp.src.megatron_fsdp.utils import (
+        get_mcore_tensor_parallel_partition_dim,
+        is_mcore_tensor_model_parallel,
+    )
 
     HAVE_MEGATRON_FSDP = True
 except ImportError:
     HAVE_MEGATRON_FSDP = False
 
+from megatron.core import parallel_state
 from megatron.core.tensor_parallel.layers import copy_tensor_model_parallel_attributes
+from megatron.core.transformer.transformer_layer import TransformerLayer
+
+
+def get_ep_layer_offset():
+    """
+    Get the expert layer offset for the current model.
+    """
+    from megatron.training.global_vars import get_args
+
+    args = get_args()
+    ep_size = parallel_state.get_expert_model_parallel_world_size()
+    ep_rank = parallel_state.get_expert_model_parallel_rank()
+    num_local_experts = args.num_experts // ep_size if args.num_experts else 0
+    local_expert_offset = ep_rank * num_local_experts
+
+    return local_expert_offset
+
+
+def get_total_num_experts():
+    """
+    Get the total number of experts for the current model.
+    """
+    from megatron.training.global_vars import get_args
+
+    args = get_args()
+    return args.num_experts if args.num_experts else 0
+
+
+def get_expert_index_from_key(key):
+    """Extract expert index from various expert key formats.
+
+    Supported formats:
+    - GroupedMLP: 'mlp.experts.linear_fc1.weight0', 'mlp.experts.linear_fc2.weight0'
+    - SequentialMLP: 'mlp.experts.local_experts.0.linear_fc1.weight',
+        'mlp.experts.local_experts.0.linear_fc2.weight'
+
+    Returns:
+        int: Expert index if found, None otherwise.
+    """
+    # GroupedMLP: index is at the end after 'weight'
+    if 'mlp.experts.linear_fc1.weight' in key or 'mlp.experts.linear_fc2.weight' in key:
+        m = re.search(r'^.*\.mlp\.experts\.linear_fc\d\.weight(\d+)', key)
+        assert m, f"Failed to parse expert index from key: {key}"
+        return int(m.group(1))
+    # SequentialMLP: index is between 'local_experts.' and next '.'
+    elif 'mlp.experts.local_experts' in key:
+        m = re.search(r'^.*\.mlp\.experts\.local_experts\.(\d+)', key)
+        assert m, f"Failed to parse expert index from key: {key}"
+        return int(m.group(1))
+    return None
+
+
+def handle_experts_in_state_dict(state_dict):
+    """
+    Rewrite expert keys in state dict.
+    """
+    local_expert_start = get_ep_layer_offset()
+    local_expert_end = get_total_num_experts()
+
+    def should_keep_expert_key(expert_index):
+        """Determine if this rank should keep this expert key based on expert index"""
+        if expert_index is None:
+            # If we can't determine expert index, keep the key (non-expert weights)
+            return True
+
+        # Check if this expert belongs to this rank
+        return local_expert_start <= expert_index < local_expert_end
+
+    def replace_expert_index_in_key(key, expert_index, state_dict):
+        """Replace expert index in key with new index corresponding to the current rank"""
+        new_expert_index = expert_index + local_expert_start
+        # GroupedMLP: 'mlp.experts.linear_fc1.weight0', 'mlp.experts.linear_fc2.weight0'
+        if 'mlp.experts.linear_fc1.weight' in key or 'mlp.experts.linear_fc2.weight' in key:
+            # Handle SwiGLU weight{idx}_w and weight{idx}_v format
+            if key.endswith('_w') or key.endswith('_v'):
+                suffix = key[-2:]  # '_w' or '_v'
+                new_key = key.replace(
+                    f'weight{expert_index}{suffix}', f'weight{new_expert_index}{suffix}'
+                )
+            # Handle regular weight{idx} format
+            else:
+                new_key = key.replace(f'weight{expert_index}', f'weight{new_expert_index}')
+        # SequentialMLP: index is between 'local_experts.' and next '.'
+        elif 'mlp.experts.local_experts' in key:
+            new_key = key.replace(
+                f'local_experts.{expert_index}.', f'local_experts.{new_expert_index}.'
+            )
+        else:
+            raise ValueError(f"Unexpected expert key format: {key}")
+
+        state_dict[new_key] = state_dict[key]
+        del state_dict[key]
+
+    # Process model state dict
+    state_dict = state_dict.copy()
+    for key in list(state_dict.keys()):
+        expert_index = get_expert_index_from_key(key)
+        if not should_keep_expert_key(expert_index):
+            replace_expert_index_in_key(key, expert_index, state_dict)
+
+    return state_dict
+
+
+def expert_param_local_key(key):
+    """Get the module parameter corresponding to the key."""
+    local_expert_offset = get_ep_layer_offset()
+    expert_index = get_expert_index_from_key(key)
+    if expert_index is not None:
+        new_expert_index = expert_index - local_expert_offset
+        # GroupedMLP: 'mlp.experts.linear_fc1.weight0', 'mlp.experts.linear_fc2.weight0'
+        if 'mlp.experts.linear_fc1.weight' in key or 'mlp.experts.linear_fc2.weight' in key:
+            new_key = key.replace(f'weight{expert_index}', f'weight{new_expert_index}')
+        # SequentialMLP: index is between 'local_experts.' and next '.'
+        elif 'mlp.experts.local_experts' in key:
+            new_key = key.replace(
+                f'local_experts.{expert_index}.', f'local_experts.{new_expert_index}.'
+            )
+        else:
+            raise ValueError(f"Unexpected expert key format: {key}")
+        key = new_key
+
+    return key
 
 
 def handle_swiglu_in_state_dict(model, model_state_dict, optimizer_state_dict):
@@ -43,7 +185,29 @@ def intersection(s1, s2):
     def offset_slice(s, offset):
         return slice(s.start + offset, s.stop + offset)
 
-    def split_swiglu_linear_fc1(data, dist_param, swiglu_shard_axis):
+    def is_swiglu_key(key):
+        """
+        Check if this key should be handled as SwiGLU linear_fc1 weight or bias.
+        """
+        # Non-expert MLP: 'mlp.linear_fc1.weight', 'mlp.linear_fc1.bias'
+        # GroupedMLP: 'mlp.experts.linear_fc1.weight0', 'mlp.experts.linear_fc1.bias0'
+        # SequentialMLP: 'mlp.experts.local_experts.0.linear_fc1.weight',
+        #   'mlp.experts.local_experts.0.linear_fc1.bias'
+        return any(
+            re.search(pat, key)
+            for pat in [
+                r"(.*)\.mlp\.linear_fc1\.weight$",
+                r"(.*)\.mlp\.linear_fc1\.bias$",
+                r"(.*)\.mlp\.experts\.linear_fc1\.weight(\d+)$",
+                r"(.*)\.mlp\.experts\.linear_fc1\.bias(\d+)$",
+                r"(.*)\.mlp\.experts\.local_experts\.(\d+)\.linear_fc1\.weight$",
+                r"(.*)\.mlp\.experts\.local_experts\.(\d+)\.linear_fc1\.bias$",
+                r"(.*)\.mlp\.shared_experts\.linear_fc1\.weight$",
+                r"(.*)\.mlp\.shared_experts\.linear_fc1\.bias$",
+            ]
+        )
+
+    def split_swiglu_linear_fc1(data, dist_param, swiglu_shard_axis, is_expert_param):
         """
         Split the SWiGLU linear_fc1 parameter into two parts: weight_w and weight_v.
         """
@@ -55,7 +219,9 @@ def split_swiglu_linear_fc1(data, dist_param, swiglu_shard_axis):
         fsdp_slice = dist_param.megatron_fsdp_slice
         megatron_fsdp_dist_index = dist_param.megatron_fsdp_dist_index
 
-        tp_mesh = megatron_fsdp_dist_index.get_submesh([megatron_fsdp_dist_index.tp_dim])
+        tp_mesh = megatron_fsdp_dist_index.get_submesh(
+            [megatron_fsdp_dist_index.tp_dim], is_expert_parallel=is_expert_param
+        )
         data_size = data.numel() // tp_mesh.mesh.numel()
         w_slice = slice(0, data_size // 2)
         v_slice = slice(data_size // 2, data_size)
@@ -75,8 +241,9 @@ def split_swiglu_linear_fc1(data, dist_param, swiglu_shard_axis):
         # Fake parameters w and v are used to provide the correct parameter
         # shape and Tensor-Parallelism information.
         per_tp_rank_shape = list(data.shape)
-        if getattr(dist_param, "tensor_model_parallel", False):
-            tp_dim = dist_param.partition_dim
+        if is_mcore_tensor_model_parallel(dist_param):
+            tp_dim = get_mcore_tensor_parallel_partition_dim(dist_param)
+            assert tp_dim is not None, "Tensor model parallel dimension not found"
             per_tp_rank_shape[tp_dim] //= tp_mesh.mesh.numel()
         linear_fc1_meta = torch.empty(*per_tp_rank_shape, device="meta")
         w_meta, v_meta = torch.chunk(linear_fc1_meta, 2, dim=swiglu_shard_axis)
@@ -87,6 +254,7 @@ def split_swiglu_linear_fc1(data, dist_param, swiglu_shard_axis):
             weight_w.data,
             w_meta,
             dist_index=megatron_fsdp_dist_index,
+            is_expert_param=is_expert_param,
             run_check=True,
             update_uneven_dtensor_chunk_meta=True,
         )
@@ -94,16 +262,21 @@ def split_swiglu_linear_fc1(data, dist_param, swiglu_shard_axis):
             weight_v.data,
             v_meta,
             dist_index=megatron_fsdp_dist_index,
+            is_expert_param=is_expert_param,
             run_check=True,
             update_uneven_dtensor_chunk_meta=True,
         )
         return weight_w, weight_v
 
+    model_state_dict = model_state_dict.copy()
     for key in list(model_state_dict.keys()):
-        if key.endswith('mlp.linear_fc1.weight') or key.endswith('mlp.linear_fc1.bias'):
+        if is_swiglu_key(key):
             dist_param = model.get_parameter(f"module.{key}")
             weight_w, weight_v = split_swiglu_linear_fc1(
-                model_state_dict[key], dist_param, swiglu_shard_axis=0
+                model_state_dict[key],
+                dist_param,
+                swiglu_shard_axis=0,
+                is_expert_param='mlp.experts' in key,
             )
 
             # Update the model state dict with the new keys
@@ -111,26 +284,32 @@ def split_swiglu_linear_fc1(data, dist_param, swiglu_shard_axis):
             model_state_dict[f"{key}_v"] = weight_v
             del model_state_dict[key]
 
-    try:
-        optimizer_state_dict = optimizer_state_dict["state"]
-    except KeyError:
-        optimizer_state_dict = {}
+    if optimizer_state_dict is not None:
+        optimizer_state_dict = optimizer_state_dict.copy()
+        if len(optimizer_state_dict["state"]) != 0:
+            opt_state_dict = optimizer_state_dict["state"]
+            new_opt_state_dict = {}
+            for key in list(opt_state_dict.keys()):
+                # Only process SWIGLU keys
+                if not is_swiglu_key(key):
+                    new_opt_state_dict[key] = opt_state_dict[key]
+                    continue
+                new_opt_state_dict[f"{key}_w"] = opt_state_dict[key].copy()
+                new_opt_state_dict[f"{key}_v"] = opt_state_dict[key].copy()
+                for subkey in ["exp_avg", "exp_avg_sq"]:
+                    dist_param = model.get_parameter(expert_param_local_key(key[len("module.") :]))
+                    weight_w, weight_v = split_swiglu_linear_fc1(
+                        opt_state_dict[key][subkey],
+                        dist_param,
+                        swiglu_shard_axis=0,
+                        is_expert_param="mlp.experts" in key,
+                    )
+                    # Update the optimizer state dict with the new keys
+                    new_opt_state_dict[f"{key}_w"][subkey] = weight_w
+                    new_opt_state_dict[f"{key}_v"][subkey] = weight_v
+            optimizer_state_dict["state"] = new_opt_state_dict
 
-    if len(optimizer_state_dict) != 0:
-        for key in list(optimizer_state_dict.keys()):
-            if not (key.endswith('mlp.linear_fc1.weight') or key.endswith('mlp.linear_fc1.bias')):
-                continue
-            optimizer_state_dict[f"{key}_w"] = optimizer_state_dict[key].copy()
-            optimizer_state_dict[f"{key}_v"] = optimizer_state_dict[key].copy()
-            for subkey in ["exp_avg", "exp_avg_sq"]:
-                dist_param = model.get_parameter(key[len("module.") :])
-                weight_w, weight_v = split_swiglu_linear_fc1(
-                    optimizer_state_dict[key][subkey], dist_param, swiglu_shard_axis=0
-                )
-                # Update the optimizer state dict with the new keys
-                optimizer_state_dict[f"{key}_w"][subkey] = weight_w
-                optimizer_state_dict[f"{key}_v"][subkey] = weight_v
-            del optimizer_state_dict[key]
+    return model_state_dict, optimizer_state_dict
 
 
 def handle_fp8_extra_state_case(model_state_dict):
@@ -162,7 +341,7 @@ def flatten_state_dict(obj, parent_key="", sep="."):
     return items
 
 
-def print_diff_in_state_dicts(state_dict_metadata, load_state_dict):
+def print_diff_in_state_dicts(state_dict_metadata, load_state_dict, limit=100):
     """
     Print the differences between two state dicts: metadata state dict and load state dict.
     This function compares the keys and shapes of the tensors in both dicts.
@@ -172,24 +351,105 @@ def print_diff_in_state_dicts(state_dict_metadata, load_state_dict):
     meta_keys = set(state_dict_metadata.keys())
     load_keys = set(load_state_dict.keys())
 
-    only_in_meta = meta_keys - load_keys
-    only_in_load = load_keys - meta_keys
-    in_both = meta_keys & load_keys
+    only_in_meta = list(meta_keys - load_keys)
+    only_in_load = list(load_keys - meta_keys)
+    in_both = list(meta_keys & load_keys)
 
-    print("Keys only in checkpoint metadata_state_dict:")
-    for k in sorted(only_in_meta):
-        print(f"  {k}")
+    logger.info(f"Keys only in checkpoint metadata_state_dict(first {limit}):")
+    for k in sorted(only_in_meta[:limit]):
+        logger.info(f"  {k}")
 
-    print("\nKeys only in load_state_dict:")
-    for k in sorted(only_in_load):
-        print(f"  {k}")
+    logger.info(f"\nKeys only in load_state_dict(first {limit}):")
+    for k in sorted(only_in_load[:limit]):
+        logger.info(f"  {k}")
 
-    print("\nKeys in both but with different shapes:")
-    for k in sorted(in_both):
+    logger.info(f"\nKeys in both but with different shapes(first {limit}):")
+    for k in sorted(in_both[:limit]):
         v_meta = state_dict_metadata[k]
         v_load = load_state_dict[k]
         # If tensors, compare shape; else, compare type/values
         meta_shape = v_meta.size if hasattr(v_meta, "size") else type(v_meta)
         load_shape = v_load.shape if hasattr(v_load, "shape") else type(v_load)
         if meta_shape != load_shape:
-            print(f"  {k}: meta shape={meta_shape}, load shape={load_shape}")
+            logger.info(f"  {k}: meta shape={meta_shape}, load shape={load_shape}")
+
+
+def validate_loaded_state_dict(state_dict, checkpoint_path):
+    """
+    Validate the loaded state dict against the expected structure and types.
+    """
+    assert HAVE_MEGATRON_FSDP, "This function requires Megatron-FSDP to be installed."
+
+    # Initialize reader
+    reader = torch.distributed.checkpoint.FileSystemReader(checkpoint_path)
+    metadata = reader.read_metadata()
+    flat_state_dict = flatten_state_dict(state_dict)
+
+    for key, value in flat_state_dict.items():
+        tensor_metadata = metadata.state_dict_metadata[key]
+
+        if not isinstance(tensor_metadata, TensorStorageMetadata):
+            continue
+        if not isinstance(value, DTensor):
+            load_item_dict = {key: torch.empty_like(value)}
+        else:
+            load_item_dict = {
+                key: torch.distributed.tensor.empty(
+                    tensor_metadata.size,
+                    dtype=tensor_metadata.properties.dtype,
+                    device_mesh=DeviceMesh.from_group(
+                        group=dist.group.WORLD,
+                        device_type="cuda",
+                        mesh=torch.arange(dist.get_world_size()),
+                        mesh_dim_names=("world",),
+                    ),
+                    placements=[Shard(0)],
+                )
+            }
+        torch.distributed.checkpoint.load(
+            load_item_dict, storage_reader=reader, planner=default_planner.DefaultLoadPlanner()
+        )
+        if isinstance(value, DTensor):
+            full_value = gather_uneven_dtensor_to_full_tensor(value)
+            loaded_tensor = load_item_dict[key].redistribute(
+                placements=[Replicate()] * len(value.placements)
+            )
+            assert torch.allclose(
+                loaded_tensor._local_tensor, full_value._local_tensor, atol=1e-8, rtol=1e-5
+            ), f"key: {key}; {loaded_tensor} {full_value}"
+        else:
+            assert torch.allclose(
+                value, load_item_dict[key]
+            ), f"key: {key}; {value} {load_item_dict[key]}"
+
+
+def get_global_unique_param_name(model_chunks, param):
+    """
+    Get the global unique parameter name for a given model and parameter.
+    """
+    param_name = None
+    for model in model_chunks:
+        for name, p in model.named_parameters():
+            if p is param:
+                param_name = name
+                break
+    if param_name is None:
+        raise ValueError("Parameter not found in model chunks")
+
+    # Get PP unique parameter name
+    if re.search(r"layers\.(\d+)", param_name) and "mtp" not in param_name:
+        tf_layer_number = -1
+        for module in model.modules():
+            if not isinstance(module, TransformerLayer):
+                continue
+            for p in module.parameters():
+                if p is param:
+                    tf_layer_number = module.layer_number
+                    break
+        if tf_layer_number != -1:
+            param_name = re.sub(r"layers\.(\d+)", f"layers.{tf_layer_number - 1}", param_name)
+
+    # Get EP unique parameter name
+    param_name = list(handle_experts_in_state_dict({param_name: None}).keys())[0]
+
+    return param_name
diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py
index 8e5f343b73c..cd1de6a5118 100644
--- a/megatron/training/arguments.py
+++ b/megatron/training/arguments.py
@@ -2271,6 +2271,10 @@ def _add_training_args(parser):
                        help="Use torch.optim.Optimizer instead of Megatron's optimizer in optimizer cpu offload mode.")
     group.add_argument('--overlap-cpu-optimizer-d2h-h2d', action='store_true', default=False,
                        help='Overlap CPU optimizer step, gradients D2H and updated parameters H2D.')
+    group.add_argument('--dump-param-to-param-group-map', type=str, default=None,
+                        help="Path to a file containing parameter-to-parameter-group mapping. "
+                        "Provide a JSON file that specifies which parameters belong to which "
+                        "parameter group for global coordination.")
     group.add_argument('--no-pin-cpu-grads', action='store_false', dest='pin_cpu_grads',
                        help='Disable pinning of CPU memory for gradients.')
     group.add_argument('--no-pin-cpu-params', action='store_false', dest='pin_cpu_params',
diff --git a/megatron/training/checkpointing.py b/megatron/training/checkpointing.py
index 71b9cd97021..93c23255f4c 100644
--- a/megatron/training/checkpointing.py
+++ b/megatron/training/checkpointing.py
@@ -42,9 +42,10 @@
 try:
     from megatron.core.distributed.fsdp.src.megatron_fsdp.uneven_dtensor import preprocess_state_dict_for_uneven_dtensor
     from megatron.core.transformer.fsdp_dtensor_checkpoint import (
+        print_diff_in_state_dicts,
         handle_fp8_extra_state_case,
         handle_swiglu_in_state_dict,
-        print_diff_in_state_dicts,
+        handle_experts_in_state_dict,
     )
     HAVE_MEGATRON_FSDP = True
 except ImportError:
@@ -561,6 +562,9 @@ def save_checkpoint(iteration, model, optimizer, opt_param_scheduler, num_floati
                 # TODO Handle non-empty directories (e.g., after a crash during saving).
                 ensure_directory_exists(checkpoint_name, check_parent=False)
 
+            if ckpt_format == "fsdp_dtensor":
+                state_dict = preprocess_fsdp_dtensor_state_dict(args, state_dict, model[0])
+
             fs_storage_writer = torch.distributed.checkpoint.FileSystemWriter(checkpoint_name)
             torch.distributed.checkpoint.save(
                 state_dict=state_dict,
@@ -784,9 +788,17 @@ def maybe_save_dataloader_state(train_iterator, iteration, dataloader_save_path)
     torch.save(dataloader_save_dict, data_state_save_path)
 
 
-def generate_state_dict(args, model, optimizer, opt_param_scheduler,
-                        rng_state, iteration=None,
-                        optim_sd_kwargs=None, model_sd_kwargs=None, rerun_state=None):
+def generate_state_dict(
+    args,
+    model,
+    optimizer,
+    opt_param_scheduler,
+    rng_state,
+    iteration=None,
+    optim_sd_kwargs=None,
+    model_sd_kwargs=None,
+    rerun_state=None,
+):
     """Generate a state dict from given model, optimizer, scheduler, rng state and others. """
 
     # Arguments, iteration, and model.
@@ -839,16 +851,27 @@ def generate_state_dict(args, model, optimizer, opt_param_scheduler,
     if not args.no_save_rng and rng_state:
         state_dict["rng_state"] = rng_state
 
-    # fsdp_dtensor ckpt specific state dict preprocessing
-    if args.ckpt_format == "fsdp_dtensor":
-        assert HAVE_MEGATRON_FSDP, "Megatron FSDP is enabled but Megatron-FSDP is not available."
-        assert len(model) == 1, "FSDP DTensor checkpoints are not supported for multiple models."
-        if args.swiglu:
-            state_dict = state_dict.copy()
-            handle_swiglu_in_state_dict(
-                model[0], state_dict["model"], state_dict["optimizer"])
-        handle_fp8_extra_state_case(state_dict["model"])
-        preprocess_state_dict_for_uneven_dtensor(state_dict)
+    return state_dict
+
+
+def preprocess_fsdp_dtensor_state_dict(args, raw_state_dict, model):
+    state_dict = raw_state_dict.copy()
+    handle_fp8_extra_state_case(state_dict["model"])
+    if args.swiglu:
+        if "optimizer" in state_dict:
+            model_state_dict, optimizer_state_dict = handle_swiglu_in_state_dict(
+                model, state_dict["model"], state_dict["optimizer"]
+            )
+            state_dict["model"] = model_state_dict
+            state_dict["optimizer"] = optimizer_state_dict
+        else:
+            model_state_dict, _ = handle_swiglu_in_state_dict(
+                model, state_dict["model"], None
+            )
+            state_dict["model"] = model_state_dict
+    if args.num_experts:
+        state_dict["model"] = handle_experts_in_state_dict(state_dict["model"])
+    preprocess_state_dict_for_uneven_dtensor(state_dict)
 
     return state_dict
 
@@ -1169,6 +1192,12 @@ def _load_base_checkpoint(
         if rank0:
             return {}, checkpoint_name, release, CheckpointType.FSDP_DTENSOR
 
+        state_dict = sharded_state_dict
+        raw_optimizer_state_dict = state_dict["optimizer"].copy() if "optimizer" in state_dict else None
+        raw_model_state_dict = state_dict["model"].copy() if "model" in state_dict else None
+        model = state_dict.pop("_model")
+        state_dict = preprocess_fsdp_dtensor_state_dict(args, state_dict, model[0])
+
         ckpt_type = CheckpointType.FSDP_DTENSOR
         fs_storage_reader = torch.distributed.checkpoint.FileSystemReader(checkpoint_name)
         allow_partial_load = not getattr(args, 'strict_fsdp_dtensor_load', False)
@@ -1177,15 +1206,20 @@ def _load_base_checkpoint(
             rank = torch.distributed.get_rank()
             import time as _time
             _time.sleep(rank * 0.001)  # Make that logs of different ranks do not overlap
-            print_diff_in_state_dicts(state_dict_metadata, sharded_state_dict)
+            print_diff_in_state_dicts(state_dict_metadata, state_dict)
 
         planner = default_planner.DefaultLoadPlanner(allow_partial_load=allow_partial_load)
         torch.distributed.checkpoint.load_state_dict(
-            state_dict=sharded_state_dict,
+            state_dict=state_dict,
             storage_reader=fs_storage_reader,
             planner=planner,
         )
-        state_dict = sharded_state_dict
+
+        if raw_optimizer_state_dict is not None:
+            state_dict["optimizer"] = raw_optimizer_state_dict
+
+        if raw_model_state_dict is not None:
+            state_dict["model"] = raw_model_state_dict
     else:
         raise NotImplementedError(f"checkpoint format {ckpt_format} not supported")
 
@@ -1520,7 +1554,7 @@ def load_checkpoint(ddp_model, optimizer, opt_param_scheduler, load_arg='load',
         except FileNotFoundError:
             state_dict_metadata = {}
 
-        gen_sd_rerun_state = None
+        gen_sd_rerun_state = {}
         gen_sd_opt_param_scheduler = None
         gen_sd_rng_state = None
         gen_sd_optim = None
@@ -1537,7 +1571,7 @@ def load_checkpoint(ddp_model, optimizer, opt_param_scheduler, load_arg='load',
 
         optim_sd_kwargs = dict(metadata=_build_sharded_state_dict_metadata(args), is_loading=True)
 
-        load_kwargs["sharded_state_dict"] = generate_state_dict(
+        state_dict = generate_state_dict(
             args,
             model=model,
             optimizer=gen_sd_optim,
@@ -1547,6 +1581,8 @@ def load_checkpoint(ddp_model, optimizer, opt_param_scheduler, load_arg='load',
             rerun_state=gen_sd_rerun_state,
             iteration=1,
         )
+        state_dict["_model"] = model
+        load_kwargs["sharded_state_dict"] = state_dict
 
     state_dict, checkpoint_name, release, ckpt_type = _load_base_checkpoint(
         load_dir, args, rank0=False, checkpointing_context=checkpointing_context,
diff --git a/megatron/training/training.py b/megatron/training/training.py
index f805dab0f15..bda9e42dc82 100644
--- a/megatron/training/training.py
+++ b/megatron/training/training.py
@@ -1210,6 +1210,7 @@ def setup_model_and_optimizer(
             # If the user is asking for a non-zero embedding init std, skip weight decay for embeddings
             #  to avoid embeddings from shrinking to zero as recommended in https://arxiv.org/abs/2312.16903
             default_skip_embedding_weight_decay=args.embedding_init_method_std is not None,
+            dump_param_to_param_group_map=args.dump_param_to_param_group_map,
         )
     else:
         optimizer = get_megatron_muon_optimizer(
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgxh100_coreweave.json
index 0f2637a9511..717ae3f5fa6 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgxh100_coreweave.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgxh100_coreweave.json
@@ -4,56 +4,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 11.04748,
-            "2": 11.03561,
-            "3": 9.58774,
-            "4": 9.25819,
-            "5": 9.53583,
-            "6": 9.8804,
-            "7": 9.48247,
-            "8": 8.93575,
-            "9": 8.65813,
-            "10": 9.0567,
-            "11": 8.49445,
-            "12": 8.52444,
-            "13": 8.45239,
-            "14": 7.97323,
-            "15": 8.0476,
-            "16": 8.07971,
-            "17": 8.09081,
-            "18": 7.76437,
-            "19": 8.14892,
-            "20": 7.89868,
-            "21": 7.59371,
-            "22": 7.54743,
-            "23": 7.43222,
-            "24": 7.4302,
-            "25": 7.67579,
-            "26": 7.06929,
-            "27": 7.62041,
-            "28": 7.32495,
-            "29": 7.49042,
-            "30": 7.64391,
-            "31": 7.39435,
-            "32": 7.58789,
-            "33": 7.64037,
-            "34": 7.69778,
-            "35": 7.20998,
-            "36": 7.08538,
-            "37": 7.42584,
-            "38": 7.18804,
-            "39": 7.55054,
-            "40": 7.54446,
-            "41": 7.49287,
-            "42": 7.24937,
-            "43": 7.23587,
-            "44": 7.41595,
-            "45": 7.18755,
-            "46": 6.89949,
-            "47": 7.29966,
-            "48": 7.14134,
-            "49": 7.58963,
-            "50": 7.03602
+            "1": 11.04722,
+            "2": 11.03572,
+            "3": 9.58802,
+            "4": 9.25807,
+            "5": 9.46595,
+            "6": 9.99646,
+            "7": 9.50952,
+            "8": 8.97596,
+            "9": 8.64768,
+            "10": 9.40103,
+            "11": 8.86556,
+            "12": 8.63563,
+            "13": 8.52125,
+            "14": 8.08824,
+            "15": 8.1958,
+            "16": 8.22112,
+            "17": 8.14098,
+            "18": 7.8386,
+            "19": 8.23438,
+            "20": 7.95361,
+            "21": 7.62549,
+            "22": 7.60352,
+            "23": 7.47957,
+            "24": 7.46573,
+            "25": 7.70343,
+            "26": 7.10719,
+            "27": 7.64313,
+            "28": 7.34582,
+            "29": 7.5169,
+            "30": 7.67511,
+            "31": 7.41799,
+            "32": 7.61213,
+            "33": 7.66582,
+            "34": 7.73101,
+            "35": 7.23081,
+            "36": 7.10765,
+            "37": 7.4476,
+            "38": 7.21053,
+            "39": 7.57508,
+            "40": 7.5662,
+            "41": 7.51605,
+            "42": 7.27243,
+            "43": 7.25706,
+            "44": 7.44,
+            "45": 7.21244,
+            "46": 6.92421,
+            "47": 7.32604,
+            "48": 7.17147,
+            "49": 7.62154,
+            "50": 7.0624
         }
     },
     "num-zeros": {
@@ -62,55 +62,55 @@
         "step_interval": 1,
         "values": {
             "1": 38802612.0,
-            "2": 38543592.0,
-            "3": 38739528.0,
-            "4": 279937824.0,
-            "5": 259189728.0,
-            "6": 271446400.0,
-            "7": 604773504.0,
-            "8": 768892544.0,
-            "9": 645824128.0,
-            "10": 744257088.0,
-            "11": 718888576.0,
-            "12": 746732544.0,
-            "13": 871990976.0,
-            "14": 821645632.0,
-            "15": 724250816.0,
-            "16": 932241472.0,
-            "17": 648958912.0,
-            "18": 649120000.0,
-            "19": 925992960.0,
-            "20": 989207936.0,
-            "21": 819324096.0,
-            "22": 736955072.0,
-            "23": 910497792.0,
-            "24": 876716672.0,
-            "25": 843170688.0,
-            "26": 809573824.0,
-            "27": 854086912.0,
-            "28": 802857664.0,
-            "29": 805523328.0,
-            "30": 775645184.0,
-            "31": 771754624.0,
-            "32": 749733696.0,
-            "33": 718385216.0,
-            "34": 724771200.0,
-            "35": 737655104.0,
-            "36": 690419968.0,
-            "37": 673203456.0,
-            "38": 627239552.0,
-            "39": 614047168.0,
-            "40": 607288512.0,
-            "41": 582590592.0,
-            "42": 548211200.0,
-            "43": 532740640.0,
-            "44": 554239168.0,
-            "45": 514790528.0,
-            "46": 350258560.0,
-            "47": 472420128.0,
-            "48": 453788736.0,
-            "49": 440597216.0,
-            "50": 303063296.0
+            "2": 38543656.0,
+            "3": 38739356.0,
+            "4": 273649600.0,
+            "5": 252887040.0,
+            "6": 255692384.0,
+            "7": 598483264.0,
+            "8": 787737984.0,
+            "9": 696133120.0,
+            "10": 505146368.0,
+            "11": 718888640.0,
+            "12": 872597184.0,
+            "13": 947495104.0,
+            "14": 1076398976.0,
+            "15": 856390592.0,
+            "16": 1048635648.0,
+            "17": 831370688.0,
+            "18": 963679552.0,
+            "19": 970018240.0,
+            "20": 935737344.0,
+            "21": 904189312.0,
+            "22": 887937280.0,
+            "23": 894777856.0,
+            "24": 703744192.0,
+            "25": 909232512.0,
+            "26": 875633216.0,
+            "27": 894981376.0,
+            "28": 919242816.0,
+            "29": 931351552.0,
+            "30": 929784768.0,
+            "31": 941621376.0,
+            "32": 885000768.0,
+            "33": 828484096.0,
+            "34": 822284800.0,
+            "35": 832032128.0,
+            "36": 787939392.0,
+            "37": 770719808.0,
+            "38": 561204672.0,
+            "39": 617201536.0,
+            "40": 695374592.0,
+            "41": 698978816.0,
+            "42": 692913728.0,
+            "43": 668003776.0,
+            "44": 673780992.0,
+            "45": 631182912.0,
+            "46": 444613312.0,
+            "47": 591957824.0,
+            "48": 617363968.0,
+            "49": 585295808.0,
+            "50": 570423872.0
         }
     },
     "mem-allocated-bytes": {
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 6637267456.0,
-            "2": 6637269504.0,
-            "3": 6637269504.0,
-            "4": 6637269504.0,
-            "5": 6637269504.0,
-            "6": 6637269504.0,
-            "7": 6637269504.0,
-            "8": 6637269504.0,
-            "9": 6637269504.0,
-            "10": 6637269504.0,
-            "11": 6637269504.0,
-            "12": 6637269504.0,
-            "13": 6637269504.0,
-            "14": 6637269504.0,
-            "15": 6637269504.0,
-            "16": 6637269504.0,
-            "17": 6637269504.0,
-            "18": 6637269504.0,
-            "19": 6637269504.0,
-            "20": 6637269504.0,
-            "21": 6637269504.0,
-            "22": 6637269504.0,
-            "23": 6637269504.0,
-            "24": 6637269504.0,
-            "25": 6637269504.0,
-            "26": 6637269504.0,
-            "27": 6637269504.0,
-            "28": 6637269504.0,
-            "29": 6637269504.0,
-            "30": 6637269504.0,
-            "31": 6637269504.0,
-            "32": 6637269504.0,
-            "33": 6637269504.0,
-            "34": 6637269504.0,
-            "35": 6637269504.0,
-            "36": 6637269504.0,
-            "37": 6637269504.0,
-            "38": 6637269504.0,
-            "39": 6637269504.0,
-            "40": 6637269504.0,
-            "41": 6637269504.0,
-            "42": 6637269504.0,
-            "43": 6637269504.0,
-            "44": 6637269504.0,
-            "45": 6637269504.0,
-            "46": 6637269504.0,
-            "47": 6637269504.0,
-            "48": 6637269504.0,
-            "49": 6637269504.0,
-            "50": 6637269504.0
+            "1": 6637272576.0,
+            "2": 6637274624.0,
+            "3": 6637274624.0,
+            "4": 6637274624.0,
+            "5": 6637274624.0,
+            "6": 6637274624.0,
+            "7": 6637274624.0,
+            "8": 6637274624.0,
+            "9": 6637274624.0,
+            "10": 6637274624.0,
+            "11": 6637274624.0,
+            "12": 6637274624.0,
+            "13": 6637274624.0,
+            "14": 6637274624.0,
+            "15": 6637274624.0,
+            "16": 6637274624.0,
+            "17": 6637274624.0,
+            "18": 6637274624.0,
+            "19": 6637274624.0,
+            "20": 6637274624.0,
+            "21": 6637274624.0,
+            "22": 6637274624.0,
+            "23": 6637274624.0,
+            "24": 6637274624.0,
+            "25": 6637274624.0,
+            "26": 6637274624.0,
+            "27": 6637274624.0,
+            "28": 6637274624.0,
+            "29": 6637274624.0,
+            "30": 6637274624.0,
+            "31": 6637274624.0,
+            "32": 6637274624.0,
+            "33": 6637274624.0,
+            "34": 6637274624.0,
+            "35": 6637274624.0,
+            "36": 6637274624.0,
+            "37": 6637274624.0,
+            "38": 6637274624.0,
+            "39": 6637274624.0,
+            "40": 6637274624.0,
+            "41": 6637274624.0,
+            "42": 6637274624.0,
+            "43": 6637274624.0,
+            "44": 6637274624.0,
+            "45": 6637274624.0,
+            "46": 6637274624.0,
+            "47": 6637274624.0,
+            "48": 6637274624.0,
+            "49": 6637274624.0,
+            "50": 6637274624.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 55055331328.0,
-            "2": 57809321984.0,
-            "3": 57918455808.0,
-            "4": 57918455808.0,
-            "5": 57918455808.0,
-            "6": 57918455808.0,
-            "7": 57918455808.0,
-            "8": 57918455808.0,
-            "9": 57918455808.0,
-            "10": 57918455808.0,
-            "11": 57918455808.0,
-            "12": 57918455808.0,
-            "13": 57931390976.0,
-            "14": 57931390976.0,
-            "15": 57931390976.0,
-            "16": 57931390976.0,
-            "17": 57931390976.0,
-            "18": 57931390976.0,
-            "19": 57931390976.0,
-            "20": 57931390976.0,
-            "21": 57931390976.0,
-            "22": 57931390976.0,
-            "23": 57931390976.0,
-            "24": 57931390976.0,
-            "25": 57931390976.0,
-            "26": 57931390976.0,
-            "27": 57931390976.0,
-            "28": 57931390976.0,
-            "29": 57931390976.0,
-            "30": 57931390976.0,
-            "31": 57931390976.0,
-            "32": 58003226624.0,
-            "33": 58003226624.0,
-            "34": 58003226624.0,
-            "35": 58003226624.0,
-            "36": 58003226624.0,
-            "37": 58003226624.0,
-            "38": 58003226624.0,
-            "39": 58003226624.0,
-            "40": 58003226624.0,
-            "41": 58003226624.0,
-            "42": 58003226624.0,
-            "43": 58003226624.0,
-            "44": 58183614464.0,
-            "45": 58234208256.0,
-            "46": 58555555840.0,
-            "47": 58555555840.0,
-            "48": 58555555840.0,
-            "49": 58555555840.0,
-            "50": 58780934144.0
+            "1": 55056003072.0,
+            "2": 57810763776.0,
+            "3": 57920647168.0,
+            "4": 57920647168.0,
+            "5": 57920647168.0,
+            "6": 57920647168.0,
+            "7": 57920647168.0,
+            "8": 57920647168.0,
+            "9": 57920647168.0,
+            "10": 57920647168.0,
+            "11": 57920647168.0,
+            "12": 57920647168.0,
+            "13": 57920647168.0,
+            "14": 57920647168.0,
+            "15": 57920647168.0,
+            "16": 57920647168.0,
+            "17": 57920647168.0,
+            "18": 57920647168.0,
+            "19": 57920647168.0,
+            "20": 57920647168.0,
+            "21": 57920647168.0,
+            "22": 57920647168.0,
+            "23": 57920647168.0,
+            "24": 57920647168.0,
+            "25": 57920647168.0,
+            "26": 57920647168.0,
+            "27": 57920647168.0,
+            "28": 57920647168.0,
+            "29": 57920647168.0,
+            "30": 57920647168.0,
+            "31": 57920647168.0,
+            "32": 57920647168.0,
+            "33": 57920647168.0,
+            "34": 57961472000.0,
+            "35": 57961472000.0,
+            "36": 57961472000.0,
+            "37": 57961472000.0,
+            "38": 57961472000.0,
+            "39": 57961472000.0,
+            "40": 57961472000.0,
+            "41": 57961472000.0,
+            "42": 57961472000.0,
+            "43": 57961472000.0,
+            "44": 57961472000.0,
+            "45": 57961472000.0,
+            "46": 57961472000.0,
+            "47": 57961472000.0,
+            "48": 57961472000.0,
+            "49": 57961472000.0,
+            "50": 57961472000.0
         }
     },
     "mtp_1 loss": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 11.07654,
-            "2": 11.07406,
-            "3": 10.53881,
-            "4": 10.09803,
-            "5": 9.81154,
-            "6": 10.06236,
-            "7": 9.79762,
-            "8": 9.07117,
-            "9": 8.87049,
-            "10": 9.127,
-            "11": 8.49853,
-            "12": 8.53046,
-            "13": 8.42444,
-            "14": 7.847,
-            "15": 7.99077,
-            "16": 8.05015,
-            "17": 8.00064,
-            "18": 7.73104,
-            "19": 8.11087,
-            "20": 7.82933,
-            "21": 7.52501,
-            "22": 7.49916,
-            "23": 7.36982,
-            "24": 7.37235,
-            "25": 7.61578,
-            "26": 7.02029,
-            "27": 7.56014,
-            "28": 7.2681,
-            "29": 7.44399,
-            "30": 7.58618,
-            "31": 7.32468,
-            "32": 7.50596,
-            "33": 7.5715,
-            "34": 7.63581,
-            "35": 7.15224,
-            "36": 7.01784,
-            "37": 7.35163,
-            "38": 7.12551,
-            "39": 7.48656,
-            "40": 7.47408,
-            "41": 7.42096,
-            "42": 7.17595,
-            "43": 7.16059,
-            "44": 7.34289,
-            "45": 7.11969,
-            "46": 6.82753,
-            "47": 7.23525,
-            "48": 7.08042,
-            "49": 7.51043,
-            "50": 6.9735
+            "1": 11.07648,
+            "2": 11.07404,
+            "3": 10.53854,
+            "4": 10.09813,
+            "5": 9.81166,
+            "6": 10.09741,
+            "7": 9.79481,
+            "8": 9.0642,
+            "9": 8.86016,
+            "10": 9.34039,
+            "11": 8.51318,
+            "12": 8.59467,
+            "13": 8.5292,
+            "14": 7.95757,
+            "15": 8.06962,
+            "16": 8.11802,
+            "17": 8.06993,
+            "18": 7.80587,
+            "19": 8.19192,
+            "20": 7.8906,
+            "21": 7.57063,
+            "22": 7.55091,
+            "23": 7.41606,
+            "24": 7.42454,
+            "25": 7.65274,
+            "26": 7.05583,
+            "27": 7.59747,
+            "28": 7.29984,
+            "29": 7.472,
+            "30": 7.61908,
+            "31": 7.35179,
+            "32": 7.52979,
+            "33": 7.59161,
+            "34": 7.66287,
+            "35": 7.17383,
+            "36": 7.04133,
+            "37": 7.37081,
+            "38": 7.1443,
+            "39": 7.50879,
+            "40": 7.48921,
+            "41": 7.43802,
+            "42": 7.19405,
+            "43": 7.17581,
+            "44": 7.35785,
+            "45": 7.13985,
+            "46": 6.84014,
+            "47": 7.25094,
+            "48": 7.09407,
+            "49": 7.52321,
+            "50": 6.98987
         }
     },
     "iteration-time": {
@@ -289,56 +289,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 69.29797,
-            "2": 1.7261,
-            "3": 1.40981,
-            "4": 2.16562,
-            "5": 1.7862,
-            "6": 1.7469,
-            "7": 1.96688,
-            "8": 1.97301,
-            "9": 1.74665,
-            "10": 1.69613,
-            "11": 1.02979,
-            "12": 1.02408,
-            "13": 1.03261,
-            "14": 1.02432,
-            "15": 1.0529,
-            "16": 1.04491,
-            "17": 1.03693,
-            "18": 1.03399,
-            "19": 1.03627,
-            "20": 1.02284,
-            "21": 1.01667,
-            "22": 1.02932,
-            "23": 1.03591,
-            "24": 1.03466,
-            "25": 1.03149,
-            "26": 1.03165,
-            "27": 1.02342,
-            "28": 1.03777,
-            "29": 1.04061,
-            "30": 1.05641,
-            "31": 1.02382,
-            "32": 1.01775,
-            "33": 1.03039,
-            "34": 1.03693,
-            "35": 1.03153,
-            "36": 1.02699,
-            "37": 1.02756,
-            "38": 1.02919,
-            "39": 1.01773,
-            "40": 1.03491,
-            "41": 1.03152,
-            "42": 1.03035,
-            "43": 1.0221,
-            "44": 1.05201,
-            "45": 1.02579,
-            "46": 1.02798,
-            "47": 1.03857,
-            "48": 1.02772,
-            "49": 1.0408,
-            "50": 1.03745
+            "1": 93.39829,
+            "2": 1.82958,
+            "3": 1.3241,
+            "4": 2.19661,
+            "5": 2.13156,
+            "6": 1.75452,
+            "7": 2.08539,
+            "8": 1.58016,
+            "9": 1.60816,
+            "10": 1.03407,
+            "11": 1.01797,
+            "12": 1.0168,
+            "13": 1.01666,
+            "14": 1.0748,
+            "15": 1.04137,
+            "16": 1.05864,
+            "17": 1.05961,
+            "18": 1.03233,
+            "19": 1.02728,
+            "20": 1.02917,
+            "21": 1.04313,
+            "22": 1.03054,
+            "23": 1.0313,
+            "24": 1.03789,
+            "25": 1.04414,
+            "26": 1.05561,
+            "27": 1.03361,
+            "28": 1.03142,
+            "29": 1.02437,
+            "30": 1.02195,
+            "31": 1.0172,
+            "32": 1.03318,
+            "33": 1.03742,
+            "34": 1.03628,
+            "35": 1.03575,
+            "36": 1.05127,
+            "37": 1.03273,
+            "38": 1.03381,
+            "39": 1.02923,
+            "40": 1.02986,
+            "41": 1.03249,
+            "42": 1.033,
+            "43": 1.03169,
+            "44": 1.03818,
+            "45": 1.02736,
+            "46": 1.02698,
+            "47": 1.03158,
+            "48": 1.02471,
+            "49": 1.03674,
+            "50": 1.0291
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/golden_values_dev_dgxh100_coreweave.json
index 0af1bff480e..adec1b3bd58 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/golden_values_dev_dgxh100_coreweave.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/golden_values_dev_dgxh100_coreweave.json
@@ -4,56 +4,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 11.04624,
-            "2": 11.03476,
-            "3": 9.59903,
-            "4": 9.26301,
-            "5": 9.36373,
-            "6": 9.59608,
-            "7": 9.45214,
-            "8": 8.95198,
-            "9": 8.65952,
-            "10": 9.17778,
-            "11": 9.21306,
-            "12": 8.68184,
-            "13": 8.6038,
-            "14": 8.01576,
-            "15": 8.13595,
-            "16": 8.20124,
-            "17": 8.13602,
-            "18": 7.83369,
-            "19": 8.22974,
-            "20": 7.9452,
-            "21": 7.62338,
-            "22": 7.60791,
-            "23": 7.48374,
-            "24": 7.46559,
-            "25": 7.71274,
-            "26": 7.12081,
-            "27": 7.64626,
-            "28": 7.35234,
-            "29": 7.52084,
-            "30": 7.67784,
-            "31": 7.42246,
-            "32": 7.6137,
-            "33": 7.66159,
-            "34": 7.72817,
-            "35": 7.23134,
-            "36": 7.10612,
-            "37": 7.44953,
-            "38": 7.20946,
-            "39": 7.57073,
-            "40": 7.56124,
-            "41": 7.51119,
-            "42": 7.27048,
-            "43": 7.25633,
-            "44": 7.43634,
-            "45": 7.21132,
-            "46": 6.91913,
-            "47": 7.32211,
-            "48": 7.16551,
-            "49": 7.6155,
-            "50": 7.05648
+            "1": 11.04577,
+            "2": 11.03578,
+            "3": 9.5968,
+            "4": 9.26068,
+            "5": 9.09365,
+            "6": 8.97825,
+            "7": 9.18096,
+            "8": 8.70673,
+            "9": 8.55632,
+            "10": 8.85377,
+            "11": 8.31245,
+            "12": 8.35862,
+            "13": 8.28114,
+            "14": 7.73951,
+            "15": 7.91242,
+            "16": 7.94944,
+            "17": 7.89918,
+            "18": 7.64375,
+            "19": 8.02647,
+            "20": 7.73813,
+            "21": 7.44557,
+            "22": 7.43367,
+            "23": 7.31291,
+            "24": 7.30268,
+            "25": 7.57549,
+            "26": 6.98093,
+            "27": 7.50005,
+            "28": 7.241,
+            "29": 7.40369,
+            "30": 7.51839,
+            "31": 7.29514,
+            "32": 7.47818,
+            "33": 7.52568,
+            "34": 7.57647,
+            "35": 7.12091,
+            "36": 6.97439,
+            "37": 7.30929,
+            "38": 7.09349,
+            "39": 7.43659,
+            "40": 7.45122,
+            "41": 7.37904,
+            "42": 7.14627,
+            "43": 7.13408,
+            "44": 7.30886,
+            "45": 7.08523,
+            "46": 6.8067,
+            "47": 7.21159,
+            "48": 7.0245,
+            "49": 7.50096,
+            "50": 6.92687
         }
     },
     "num-zeros": {
@@ -61,56 +61,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 38802568,
-            "2": 38543544,
-            "3": 41886704,
-            "4": 264367872,
-            "5": 224737792,
-            "6": 302994528,
-            "7": 645808768,
-            "8": 775291136,
-            "9": 765475328,
-            "10": 675259904,
-            "11": 615098624,
-            "12": 702764352,
-            "13": 934951360,
-            "14": 1060699008,
-            "15": 802967296,
-            "16": 1026771392,
-            "17": 756706880,
-            "18": 715253696,
-            "19": 929126208,
-            "20": 875969472,
-            "21": 665188032,
-            "22": 903854976,
-            "23": 747044352,
-            "24": 920777856,
-            "25": 733230528,
-            "26": 863183104,
-            "27": 879318336,
-            "28": 916219136,
-            "29": 909384256,
-            "30": 879622720,
-            "31": 866425152,
-            "32": 819074560,
-            "33": 589493056,
-            "34": 772011648,
-            "35": 778655488,
-            "36": 759651584,
-            "37": 761302144,
-            "38": 463804224,
-            "39": 543038400,
-            "40": 497278720,
-            "41": 658241792,
-            "42": 661600512,
-            "43": 495713632,
-            "44": 673788672,
-            "45": 470873536,
-            "46": 614455040,
-            "47": 554219584,
-            "48": 570200064,
-            "49": 557109312,
-            "50": 347212736
+            "1": 38802664.0,
+            "2": 38543552.0,
+            "3": 38740472.0,
+            "4": 273766176.0,
+            "5": 196515488.0,
+            "6": 432153600.0,
+            "7": 715038528.0,
+            "8": 797328960.0,
+            "9": 696279488.0,
+            "10": 668928192.0,
+            "11": 583742720.0,
+            "12": 595799040.0,
+            "13": 695916288.0,
+            "14": 617245056.0,
+            "15": 629936832.0,
+            "16": 639940800.0,
+            "17": 642766016.0,
+            "18": 664898112.0,
+            "19": 671247104.0,
+            "20": 602545216.0,
+            "21": 542607872.0,
+            "22": 551419008.0,
+            "23": 533094816.0,
+            "24": 527647904.0,
+            "25": 570717824.0,
+            "26": 510874176.0,
+            "27": 498748096.0,
+            "28": 510353632.0,
+            "29": 506802112.0,
+            "30": 486336928.0,
+            "31": 410143360.0,
+            "32": 372280800.0,
+            "33": 369351776.0,
+            "34": 353666688.0,
+            "35": 344549376.0,
+            "36": 278456576.0,
+            "37": 289517152.0,
+            "38": 274950816.0,
+            "39": 242921776.0,
+            "40": 223597264.0,
+            "41": 186386944.0,
+            "42": 180387488.0,
+            "43": 224573440.0,
+            "44": 217714800.0,
+            "45": 143723568.0,
+            "46": 161525888.0,
+            "47": 120124336.0,
+            "48": 183368272.0,
+            "49": 154411968.0,
+            "50": 167778288.0
         }
     },
     "mem-allocated-bytes": {
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 7321308672,
-            "2": 7321310720,
-            "3": 7321310720,
-            "4": 7321310720,
-            "5": 7321310720,
-            "6": 7321310720,
-            "7": 7321310720,
-            "8": 7321310720,
-            "9": 7321310720,
-            "10": 7321310720,
-            "11": 7321310720,
-            "12": 7321310720,
-            "13": 7321310720,
-            "14": 7321310720,
-            "15": 7321310720,
-            "16": 7321310720,
-            "17": 7321310720,
-            "18": 7321310720,
-            "19": 7321310720,
-            "20": 7321310720,
-            "21": 7321310720,
-            "22": 7321310720,
-            "23": 7321310720,
-            "24": 7321310720,
-            "25": 7321310720,
-            "26": 7321310720,
-            "27": 7321310720,
-            "28": 7321310720,
-            "29": 7321310720,
-            "30": 7321310720,
-            "31": 7321310720,
-            "32": 7321310720,
-            "33": 7321310720,
-            "34": 7321310720,
-            "35": 7321310720,
-            "36": 7321310720,
-            "37": 7321310720,
-            "38": 7321310720,
-            "39": 7321310720,
-            "40": 7321310720,
-            "41": 7321310720,
-            "42": 7321310720,
-            "43": 7321310720,
-            "44": 7321310720,
-            "45": 7321310720,
-            "46": 7321310720,
-            "47": 7321310720,
-            "48": 7321310720,
-            "49": 7321310720,
-            "50": 7321310720
+            "1": 7321336320.0,
+            "2": 7321338368.0,
+            "3": 7321338368.0,
+            "4": 7321338368.0,
+            "5": 7321338368.0,
+            "6": 7321338368.0,
+            "7": 7321338368.0,
+            "8": 7321338368.0,
+            "9": 7321338368.0,
+            "10": 7321338368.0,
+            "11": 7321338368.0,
+            "12": 7321338368.0,
+            "13": 7321338368.0,
+            "14": 7321338368.0,
+            "15": 7321338368.0,
+            "16": 7321338368.0,
+            "17": 7321338368.0,
+            "18": 7321338368.0,
+            "19": 7321338368.0,
+            "20": 7321338368.0,
+            "21": 7321338368.0,
+            "22": 7321338368.0,
+            "23": 7321338368.0,
+            "24": 7321338368.0,
+            "25": 7321338368.0,
+            "26": 7321338368.0,
+            "27": 7321338368.0,
+            "28": 7321338368.0,
+            "29": 7321338368.0,
+            "30": 7321338368.0,
+            "31": 7321338368.0,
+            "32": 7321338368.0,
+            "33": 7321338368.0,
+            "34": 7321338368.0,
+            "35": 7321338368.0,
+            "36": 7321338368.0,
+            "37": 7321338368.0,
+            "38": 7321338368.0,
+            "39": 7321338368.0,
+            "40": 7321338368.0,
+            "41": 7321338368.0,
+            "42": 7321338368.0,
+            "43": 7321338368.0,
+            "44": 7321338368.0,
+            "45": 7321338368.0,
+            "46": 7321338368.0,
+            "47": 7321338368.0,
+            "48": 7321338368.0,
+            "49": 7321338368.0,
+            "50": 7321338368.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 54396813312,
-            "2": 57149165568,
-            "3": 57165475840,
-            "4": 57165475840,
-            "5": 57165475840,
-            "6": 57165475840,
-            "7": 57165475840,
-            "8": 57165475840,
-            "9": 57165475840,
-            "10": 57165475840,
-            "11": 57165475840,
-            "12": 57165475840,
-            "13": 57165475840,
-            "14": 57165475840,
-            "15": 57165475840,
-            "16": 57165475840,
-            "17": 57165475840,
-            "18": 57165475840,
-            "19": 57165475840,
-            "20": 57165475840,
-            "21": 57165475840,
-            "22": 57165475840,
-            "23": 57165475840,
-            "24": 57165475840,
-            "25": 57165475840,
-            "26": 57165475840,
-            "27": 57165475840,
-            "28": 57165475840,
-            "29": 57165475840,
-            "30": 57165475840,
-            "31": 57165475840,
-            "32": 57165475840,
-            "33": 57165475840,
-            "34": 57165475840,
-            "35": 57165475840,
-            "36": 57165475840,
-            "37": 57165475840,
-            "38": 57165475840,
-            "39": 57165475840,
-            "40": 57295986688,
-            "41": 57295986688,
-            "42": 57331482624,
-            "43": 57360437248,
-            "44": 57561960448,
-            "45": 57561960448,
-            "46": 57561960448,
-            "47": 57585307648,
-            "48": 57602347008,
-            "49": 57823961088,
-            "50": 57823961088
+            "1": 54402162688.0,
+            "2": 57150373888.0,
+            "3": 57150373888.0,
+            "4": 57150373888.0,
+            "5": 57150373888.0,
+            "6": 57150373888.0,
+            "7": 57150373888.0,
+            "8": 57150373888.0,
+            "9": 57150373888.0,
+            "10": 57150373888.0,
+            "11": 57150373888.0,
+            "12": 57150373888.0,
+            "13": 57150373888.0,
+            "14": 57150373888.0,
+            "15": 57150373888.0,
+            "16": 57150373888.0,
+            "17": 57150373888.0,
+            "18": 57150373888.0,
+            "19": 57150373888.0,
+            "20": 57150373888.0,
+            "21": 57150373888.0,
+            "22": 57150373888.0,
+            "23": 57150373888.0,
+            "24": 57150373888.0,
+            "25": 57150373888.0,
+            "26": 57150373888.0,
+            "27": 57150373888.0,
+            "28": 57150373888.0,
+            "29": 57150373888.0,
+            "30": 57150373888.0,
+            "31": 57150373888.0,
+            "32": 57150373888.0,
+            "33": 57150373888.0,
+            "34": 57150373888.0,
+            "35": 57152438272.0,
+            "36": 57344114688.0,
+            "37": 57344114688.0,
+            "38": 57449279488.0,
+            "39": 57449279488.0,
+            "40": 57449279488.0,
+            "41": 57449279488.0,
+            "42": 57449279488.0,
+            "43": 57449279488.0,
+            "44": 57449279488.0,
+            "45": 57470353408.0,
+            "46": 57470353408.0,
+            "47": 57470353408.0,
+            "48": 57470353408.0,
+            "49": 57470353408.0,
+            "50": 57470353408.0
         }
     },
     "mtp_1 loss": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 11.07779,
-            "2": 11.07564,
-            "3": 10.52904,
-            "4": 10.08924,
-            "5": 9.81101,
-            "6": 9.88786,
-            "7": 9.72987,
-            "8": 9.02044,
-            "9": 8.8145,
-            "10": 9.09362,
-            "11": 8.77612,
-            "12": 8.56714,
-            "13": 8.54777,
-            "14": 8.04338,
-            "15": 8.10946,
-            "16": 8.13231,
-            "17": 8.0853,
-            "18": 7.83475,
-            "19": 8.21923,
-            "20": 7.91097,
-            "21": 7.58489,
-            "22": 7.56231,
-            "23": 7.44204,
-            "24": 7.44303,
-            "25": 7.67594,
-            "26": 7.07138,
-            "27": 7.60696,
-            "28": 7.30925,
-            "29": 7.48219,
-            "30": 7.62699,
-            "31": 7.3655,
-            "32": 7.54203,
-            "33": 7.60199,
-            "34": 7.66716,
-            "35": 7.18385,
-            "36": 7.05252,
-            "37": 7.38377,
-            "38": 7.15521,
-            "39": 7.51639,
-            "40": 7.4929,
-            "41": 7.44762,
-            "42": 7.20298,
-            "43": 7.18681,
-            "44": 7.36683,
-            "45": 7.15506,
-            "46": 6.85064,
-            "47": 7.26072,
-            "48": 7.10489,
-            "49": 7.53477,
-            "50": 6.99715
+            "1": 11.07769,
+            "2": 11.07625,
+            "3": 10.52909,
+            "4": 10.08687,
+            "5": 9.82013,
+            "6": 9.48246,
+            "7": 9.54169,
+            "8": 8.83661,
+            "9": 8.64933,
+            "10": 8.95821,
+            "11": 8.32934,
+            "12": 8.36033,
+            "13": 8.26936,
+            "14": 7.73441,
+            "15": 7.87122,
+            "16": 7.9153,
+            "17": 7.86923,
+            "18": 7.61191,
+            "19": 7.99919,
+            "20": 7.72174,
+            "21": 7.4147,
+            "22": 7.40336,
+            "23": 7.27676,
+            "24": 7.28557,
+            "25": 7.53782,
+            "26": 6.94933,
+            "27": 7.48504,
+            "28": 7.20219,
+            "29": 7.38696,
+            "30": 7.51152,
+            "31": 7.26613,
+            "32": 7.45631,
+            "33": 7.51482,
+            "34": 7.57527,
+            "35": 7.10374,
+            "36": 6.97224,
+            "37": 7.31053,
+            "38": 7.08607,
+            "39": 7.44371,
+            "40": 7.43612,
+            "41": 7.37848,
+            "42": 7.13561,
+            "43": 7.11558,
+            "44": 7.30254,
+            "45": 7.08147,
+            "46": 6.78911,
+            "47": 7.21791,
+            "48": 7.03066,
+            "49": 7.46668,
+            "50": 6.93251
         }
     },
     "iteration-time": {
@@ -289,56 +289,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 98.46571,
-            "2": 1.63304,
-            "3": 1.32772,
-            "4": 1.63453,
-            "5": 1.11673,
-            "6": 1.14377,
-            "7": 1.33213,
-            "8": 1.32699,
-            "9": 1.07499,
-            "10": 1.12938,
-            "11": 1.07438,
-            "12": 1.11078,
-            "13": 1.06958,
-            "14": 1.08718,
-            "15": 1.10547,
-            "16": 1.07557,
-            "17": 1.08606,
-            "18": 1.0832,
-            "19": 1.08226,
-            "20": 1.126,
-            "21": 1.08645,
-            "22": 1.07978,
-            "23": 1.07859,
-            "24": 1.08221,
-            "25": 1.08192,
-            "26": 1.09185,
-            "27": 1.0923,
-            "28": 1.09562,
-            "29": 1.10486,
-            "30": 1.10038,
-            "31": 1.09094,
-            "32": 1.08693,
-            "33": 1.0883,
-            "34": 1.08169,
-            "35": 1.08611,
-            "36": 1.07758,
-            "37": 1.07933,
-            "38": 1.08289,
-            "39": 1.07885,
-            "40": 1.08075,
-            "41": 1.0781,
-            "42": 1.08028,
-            "43": 1.08035,
-            "44": 1.08973,
-            "45": 1.08944,
-            "46": 1.07483,
-            "47": 1.08306,
-            "48": 1.07701,
-            "49": 1.0768,
-            "50": 1.07022
+            "1": 92.7075,
+            "2": 1.62502,
+            "3": 1.31213,
+            "4": 1.71707,
+            "5": 1.11852,
+            "6": 1.39151,
+            "7": 1.37049,
+            "8": 1.22293,
+            "9": 1.10694,
+            "10": 1.11053,
+            "11": 1.10169,
+            "12": 1.14642,
+            "13": 1.11639,
+            "14": 1.12927,
+            "15": 1.12868,
+            "16": 1.11899,
+            "17": 1.10545,
+            "18": 1.11542,
+            "19": 1.11417,
+            "20": 1.11349,
+            "21": 1.11071,
+            "22": 1.11032,
+            "23": 1.11836,
+            "24": 1.11402,
+            "25": 1.11546,
+            "26": 1.10471,
+            "27": 1.10368,
+            "28": 1.09929,
+            "29": 1.10324,
+            "30": 1.10507,
+            "31": 1.10255,
+            "32": 1.10727,
+            "33": 1.1043,
+            "34": 1.10476,
+            "35": 1.10252,
+            "36": 1.10053,
+            "37": 1.1068,
+            "38": 1.09229,
+            "39": 1.08165,
+            "40": 1.07889,
+            "41": 1.07583,
+            "42": 1.07174,
+            "43": 1.07738,
+            "44": 1.08604,
+            "45": 1.09529,
+            "46": 1.08309,
+            "47": 1.08896,
+            "48": 1.08318,
+            "49": 1.08597,
+            "50": 1.08649
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/golden_values_dev_dgxh100_eos.json
index 585139e83c9..b7df693e1f7 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/golden_values_dev_dgxh100_eos.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/golden_values_dev_dgxh100_eos.json
@@ -4,56 +4,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 11.04624,
-            "2": 11.03476,
-            "3": 9.59903,
-            "4": 9.26301,
-            "5": 9.36373,
-            "6": 9.59608,
-            "7": 9.45214,
-            "8": 8.95198,
-            "9": 8.65952,
-            "10": 9.17778,
-            "11": 9.21306,
-            "12": 8.68184,
-            "13": 8.6038,
-            "14": 8.01576,
-            "15": 8.13595,
-            "16": 8.20124,
-            "17": 8.13602,
-            "18": 7.83369,
-            "19": 8.22974,
-            "20": 7.9452,
-            "21": 7.62338,
-            "22": 7.60791,
-            "23": 7.48374,
-            "24": 7.46559,
-            "25": 7.71274,
-            "26": 7.12081,
-            "27": 7.64626,
-            "28": 7.35234,
-            "29": 7.52084,
-            "30": 7.67784,
-            "31": 7.42246,
-            "32": 7.6137,
-            "33": 7.66159,
-            "34": 7.72817,
-            "35": 7.23134,
-            "36": 7.10612,
-            "37": 7.44953,
-            "38": 7.20946,
-            "39": 7.57073,
-            "40": 7.56124,
-            "41": 7.51119,
-            "42": 7.27048,
-            "43": 7.25633,
-            "44": 7.43634,
-            "45": 7.21132,
-            "46": 6.91913,
-            "47": 7.32211,
-            "48": 7.16551,
-            "49": 7.6155,
-            "50": 7.05648
+            "1": 11.04577,
+            "2": 11.03578,
+            "3": 9.5968,
+            "4": 9.26068,
+            "5": 9.09365,
+            "6": 8.97825,
+            "7": 9.18096,
+            "8": 8.70673,
+            "9": 8.55632,
+            "10": 8.85377,
+            "11": 8.31245,
+            "12": 8.35862,
+            "13": 8.28114,
+            "14": 7.73951,
+            "15": 7.91242,
+            "16": 7.94944,
+            "17": 7.89918,
+            "18": 7.64375,
+            "19": 8.02647,
+            "20": 7.73813,
+            "21": 7.44557,
+            "22": 7.43367,
+            "23": 7.31291,
+            "24": 7.30268,
+            "25": 7.57549,
+            "26": 6.98093,
+            "27": 7.50005,
+            "28": 7.241,
+            "29": 7.40369,
+            "30": 7.51839,
+            "31": 7.29514,
+            "32": 7.47818,
+            "33": 7.52568,
+            "34": 7.57647,
+            "35": 7.12091,
+            "36": 6.97439,
+            "37": 7.30929,
+            "38": 7.09349,
+            "39": 7.43659,
+            "40": 7.45122,
+            "41": 7.37904,
+            "42": 7.14627,
+            "43": 7.13408,
+            "44": 7.30886,
+            "45": 7.08523,
+            "46": 6.8067,
+            "47": 7.21159,
+            "48": 7.0245,
+            "49": 7.50096,
+            "50": 6.92687
         }
     },
     "num-zeros": {
@@ -61,56 +61,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 38802568,
-            "2": 38543544,
-            "3": 41886704,
-            "4": 264367872,
-            "5": 224737792,
-            "6": 302994528,
-            "7": 645808768,
-            "8": 775291136,
-            "9": 765475328,
-            "10": 675259904,
-            "11": 615098624,
-            "12": 702764352,
-            "13": 934951360,
-            "14": 1060699008,
-            "15": 802967296,
-            "16": 1026771392,
-            "17": 756706880,
-            "18": 715253696,
-            "19": 929126208,
-            "20": 875969472,
-            "21": 665188032,
-            "22": 903854976,
-            "23": 747044352,
-            "24": 920777856,
-            "25": 733230528,
-            "26": 863183104,
-            "27": 879318336,
-            "28": 916219136,
-            "29": 909384256,
-            "30": 879622720,
-            "31": 866425152,
-            "32": 819074560,
-            "33": 589493056,
-            "34": 772011648,
-            "35": 778655488,
-            "36": 759651584,
-            "37": 761302144,
-            "38": 463804224,
-            "39": 543038400,
-            "40": 497278720,
-            "41": 658241792,
-            "42": 661600512,
-            "43": 495713632,
-            "44": 673788672,
-            "45": 470873536,
-            "46": 614455040,
-            "47": 554219584,
-            "48": 570200064,
-            "49": 557109312,
-            "50": 347212736
+            "1": 38802664.0,
+            "2": 38543552.0,
+            "3": 38740472.0,
+            "4": 273766176.0,
+            "5": 196515488.0,
+            "6": 432153600.0,
+            "7": 715038528.0,
+            "8": 797328960.0,
+            "9": 696279488.0,
+            "10": 668928192.0,
+            "11": 583742720.0,
+            "12": 595799040.0,
+            "13": 695916288.0,
+            "14": 617245056.0,
+            "15": 629936832.0,
+            "16": 639940800.0,
+            "17": 642766016.0,
+            "18": 664898112.0,
+            "19": 671247104.0,
+            "20": 602545216.0,
+            "21": 542607872.0,
+            "22": 551419008.0,
+            "23": 533094816.0,
+            "24": 527647904.0,
+            "25": 570717824.0,
+            "26": 510874176.0,
+            "27": 498748096.0,
+            "28": 510353632.0,
+            "29": 506802112.0,
+            "30": 486336928.0,
+            "31": 410143360.0,
+            "32": 372280800.0,
+            "33": 369351776.0,
+            "34": 353666688.0,
+            "35": 344549376.0,
+            "36": 278456576.0,
+            "37": 289517152.0,
+            "38": 274950816.0,
+            "39": 242921776.0,
+            "40": 223597264.0,
+            "41": 186386944.0,
+            "42": 180387488.0,
+            "43": 224573440.0,
+            "44": 217714800.0,
+            "45": 143723568.0,
+            "46": 161525888.0,
+            "47": 120124336.0,
+            "48": 183368272.0,
+            "49": 154411968.0,
+            "50": 167778288.0
         }
     },
     "mem-allocated-bytes": {
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 7321308672,
-            "2": 7321310720,
-            "3": 7321310720,
-            "4": 7321310720,
-            "5": 7321310720,
-            "6": 7321310720,
-            "7": 7321310720,
-            "8": 7321310720,
-            "9": 7321310720,
-            "10": 7321310720,
-            "11": 7321310720,
-            "12": 7321310720,
-            "13": 7321310720,
-            "14": 7321310720,
-            "15": 7321310720,
-            "16": 7321310720,
-            "17": 7321310720,
-            "18": 7321310720,
-            "19": 7321310720,
-            "20": 7321310720,
-            "21": 7321310720,
-            "22": 7321310720,
-            "23": 7321310720,
-            "24": 7321310720,
-            "25": 7321310720,
-            "26": 7321310720,
-            "27": 7321310720,
-            "28": 7321310720,
-            "29": 7321310720,
-            "30": 7321310720,
-            "31": 7321310720,
-            "32": 7321310720,
-            "33": 7321310720,
-            "34": 7321310720,
-            "35": 7321310720,
-            "36": 7321310720,
-            "37": 7321310720,
-            "38": 7321310720,
-            "39": 7321310720,
-            "40": 7321310720,
-            "41": 7321310720,
-            "42": 7321310720,
-            "43": 7321310720,
-            "44": 7321310720,
-            "45": 7321310720,
-            "46": 7321310720,
-            "47": 7321310720,
-            "48": 7321310720,
-            "49": 7321310720,
-            "50": 7321310720
+            "1": 7321336320.0,
+            "2": 7321338368.0,
+            "3": 7321338368.0,
+            "4": 7321338368.0,
+            "5": 7321338368.0,
+            "6": 7321338368.0,
+            "7": 7321338368.0,
+            "8": 7321338368.0,
+            "9": 7321338368.0,
+            "10": 7321338368.0,
+            "11": 7321338368.0,
+            "12": 7321338368.0,
+            "13": 7321338368.0,
+            "14": 7321338368.0,
+            "15": 7321338368.0,
+            "16": 7321338368.0,
+            "17": 7321338368.0,
+            "18": 7321338368.0,
+            "19": 7321338368.0,
+            "20": 7321338368.0,
+            "21": 7321338368.0,
+            "22": 7321338368.0,
+            "23": 7321338368.0,
+            "24": 7321338368.0,
+            "25": 7321338368.0,
+            "26": 7321338368.0,
+            "27": 7321338368.0,
+            "28": 7321338368.0,
+            "29": 7321338368.0,
+            "30": 7321338368.0,
+            "31": 7321338368.0,
+            "32": 7321338368.0,
+            "33": 7321338368.0,
+            "34": 7321338368.0,
+            "35": 7321338368.0,
+            "36": 7321338368.0,
+            "37": 7321338368.0,
+            "38": 7321338368.0,
+            "39": 7321338368.0,
+            "40": 7321338368.0,
+            "41": 7321338368.0,
+            "42": 7321338368.0,
+            "43": 7321338368.0,
+            "44": 7321338368.0,
+            "45": 7321338368.0,
+            "46": 7321338368.0,
+            "47": 7321338368.0,
+            "48": 7321338368.0,
+            "49": 7321338368.0,
+            "50": 7321338368.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 54396813312,
-            "2": 57149165568,
-            "3": 57165475840,
-            "4": 57165475840,
-            "5": 57165475840,
-            "6": 57165475840,
-            "7": 57165475840,
-            "8": 57165475840,
-            "9": 57165475840,
-            "10": 57165475840,
-            "11": 57165475840,
-            "12": 57165475840,
-            "13": 57165475840,
-            "14": 57165475840,
-            "15": 57165475840,
-            "16": 57165475840,
-            "17": 57165475840,
-            "18": 57165475840,
-            "19": 57165475840,
-            "20": 57165475840,
-            "21": 57165475840,
-            "22": 57165475840,
-            "23": 57165475840,
-            "24": 57165475840,
-            "25": 57165475840,
-            "26": 57165475840,
-            "27": 57165475840,
-            "28": 57165475840,
-            "29": 57165475840,
-            "30": 57165475840,
-            "31": 57165475840,
-            "32": 57165475840,
-            "33": 57165475840,
-            "34": 57165475840,
-            "35": 57165475840,
-            "36": 57165475840,
-            "37": 57165475840,
-            "38": 57165475840,
-            "39": 57165475840,
-            "40": 57295986688,
-            "41": 57295986688,
-            "42": 57331482624,
-            "43": 57360437248,
-            "44": 57561960448,
-            "45": 57561960448,
-            "46": 57561960448,
-            "47": 57585307648,
-            "48": 57602347008,
-            "49": 57823961088,
-            "50": 57823961088
+            "1": 54402162688.0,
+            "2": 57150373888.0,
+            "3": 57150373888.0,
+            "4": 57150373888.0,
+            "5": 57150373888.0,
+            "6": 57150373888.0,
+            "7": 57150373888.0,
+            "8": 57150373888.0,
+            "9": 57150373888.0,
+            "10": 57150373888.0,
+            "11": 57150373888.0,
+            "12": 57150373888.0,
+            "13": 57150373888.0,
+            "14": 57150373888.0,
+            "15": 57150373888.0,
+            "16": 57150373888.0,
+            "17": 57150373888.0,
+            "18": 57150373888.0,
+            "19": 57150373888.0,
+            "20": 57150373888.0,
+            "21": 57150373888.0,
+            "22": 57150373888.0,
+            "23": 57150373888.0,
+            "24": 57150373888.0,
+            "25": 57150373888.0,
+            "26": 57150373888.0,
+            "27": 57150373888.0,
+            "28": 57150373888.0,
+            "29": 57150373888.0,
+            "30": 57150373888.0,
+            "31": 57150373888.0,
+            "32": 57150373888.0,
+            "33": 57150373888.0,
+            "34": 57150373888.0,
+            "35": 57152438272.0,
+            "36": 57344114688.0,
+            "37": 57344114688.0,
+            "38": 57449279488.0,
+            "39": 57449279488.0,
+            "40": 57449279488.0,
+            "41": 57449279488.0,
+            "42": 57449279488.0,
+            "43": 57449279488.0,
+            "44": 57449279488.0,
+            "45": 57470353408.0,
+            "46": 57470353408.0,
+            "47": 57470353408.0,
+            "48": 57470353408.0,
+            "49": 57470353408.0,
+            "50": 57470353408.0
         }
     },
     "mtp_1 loss": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 11.07779,
-            "2": 11.07564,
-            "3": 10.52904,
-            "4": 10.08924,
-            "5": 9.81101,
-            "6": 9.88786,
-            "7": 9.72987,
-            "8": 9.02044,
-            "9": 8.8145,
-            "10": 9.09362,
-            "11": 8.77612,
-            "12": 8.56714,
-            "13": 8.54777,
-            "14": 8.04338,
-            "15": 8.10946,
-            "16": 8.13231,
-            "17": 8.0853,
-            "18": 7.83475,
-            "19": 8.21923,
-            "20": 7.91097,
-            "21": 7.58489,
-            "22": 7.56231,
-            "23": 7.44204,
-            "24": 7.44303,
-            "25": 7.67594,
-            "26": 7.07138,
-            "27": 7.60696,
-            "28": 7.30925,
-            "29": 7.48219,
-            "30": 7.62699,
-            "31": 7.3655,
-            "32": 7.54203,
-            "33": 7.60199,
-            "34": 7.66716,
-            "35": 7.18385,
-            "36": 7.05252,
-            "37": 7.38377,
-            "38": 7.15521,
-            "39": 7.51639,
-            "40": 7.4929,
-            "41": 7.44762,
-            "42": 7.20298,
-            "43": 7.18681,
-            "44": 7.36683,
-            "45": 7.15506,
-            "46": 6.85064,
-            "47": 7.26072,
-            "48": 7.10489,
-            "49": 7.53477,
-            "50": 6.99715
+            "1": 11.07769,
+            "2": 11.07625,
+            "3": 10.52909,
+            "4": 10.08687,
+            "5": 9.82013,
+            "6": 9.48246,
+            "7": 9.54169,
+            "8": 8.83661,
+            "9": 8.64933,
+            "10": 8.95821,
+            "11": 8.32934,
+            "12": 8.36033,
+            "13": 8.26936,
+            "14": 7.73441,
+            "15": 7.87122,
+            "16": 7.9153,
+            "17": 7.86923,
+            "18": 7.61191,
+            "19": 7.99919,
+            "20": 7.72174,
+            "21": 7.4147,
+            "22": 7.40336,
+            "23": 7.27676,
+            "24": 7.28557,
+            "25": 7.53782,
+            "26": 6.94933,
+            "27": 7.48504,
+            "28": 7.20219,
+            "29": 7.38696,
+            "30": 7.51152,
+            "31": 7.26613,
+            "32": 7.45631,
+            "33": 7.51482,
+            "34": 7.57527,
+            "35": 7.10374,
+            "36": 6.97224,
+            "37": 7.31053,
+            "38": 7.08607,
+            "39": 7.44371,
+            "40": 7.43612,
+            "41": 7.37848,
+            "42": 7.13561,
+            "43": 7.11558,
+            "44": 7.30254,
+            "45": 7.08147,
+            "46": 6.78911,
+            "47": 7.21791,
+            "48": 7.03066,
+            "49": 7.46668,
+            "50": 6.93251
         }
     },
     "iteration-time": {
@@ -289,56 +289,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 89.12995,
-            "2": 1.33749,
-            "3": 1.24205,
-            "4": 1.63759,
-            "5": 1.13139,
-            "6": 1.12938,
-            "7": 1.37914,
-            "8": 1.3886,
-            "9": 1.10046,
-            "10": 1.11649,
-            "11": 1.11259,
-            "12": 1.10822,
-            "13": 1.10532,
-            "14": 1.11189,
-            "15": 1.1132,
-            "16": 1.10539,
-            "17": 1.11434,
-            "18": 1.11836,
-            "19": 1.11073,
-            "20": 1.11278,
-            "21": 1.11212,
-            "22": 1.10671,
-            "23": 1.11034,
-            "24": 1.11107,
-            "25": 1.11085,
-            "26": 1.10756,
-            "27": 1.10109,
-            "28": 1.1069,
-            "29": 1.11354,
-            "30": 1.11254,
-            "31": 1.10893,
-            "32": 1.11311,
-            "33": 1.10722,
-            "34": 1.10243,
-            "35": 1.10358,
-            "36": 1.09746,
-            "37": 1.09875,
-            "38": 1.10151,
-            "39": 1.10188,
-            "40": 1.10069,
-            "41": 1.10545,
-            "42": 1.10709,
-            "43": 1.1028,
-            "44": 1.10723,
-            "45": 1.10614,
-            "46": 1.09997,
-            "47": 1.1053,
-            "48": 1.10274,
-            "49": 1.09986,
-            "50": 1.10191
+            "1": 95.02242,
+            "2": 1.29728,
+            "3": 1.24413,
+            "4": 1.67309,
+            "5": 1.12527,
+            "6": 1.39226,
+            "7": 1.33351,
+            "8": 1.19614,
+            "9": 1.10737,
+            "10": 1.09796,
+            "11": 1.10736,
+            "12": 1.10105,
+            "13": 1.10552,
+            "14": 1.11007,
+            "15": 1.09853,
+            "16": 1.10142,
+            "17": 1.09718,
+            "18": 1.10103,
+            "19": 1.10339,
+            "20": 1.1069,
+            "21": 1.10541,
+            "22": 1.10374,
+            "23": 1.1028,
+            "24": 1.1,
+            "25": 1.09935,
+            "26": 1.09318,
+            "27": 1.09779,
+            "28": 1.09457,
+            "29": 1.09,
+            "30": 1.09267,
+            "31": 1.08899,
+            "32": 1.09268,
+            "33": 1.08757,
+            "34": 1.08991,
+            "35": 1.09705,
+            "36": 1.09429,
+            "37": 1.09459,
+            "38": 1.08857,
+            "39": 1.09547,
+            "40": 1.09224,
+            "41": 1.089,
+            "42": 1.08879,
+            "43": 1.0834,
+            "44": 1.08212,
+            "45": 1.08363,
+            "46": 1.08596,
+            "47": 1.07798,
+            "48": 1.07329,
+            "49": 1.07678,
+            "50": 1.07483
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgxh100_coreweave.json
index 58eb3fc16cd..8cea616921e 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgxh100_coreweave.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgxh100_coreweave.json
@@ -4,56 +4,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 10.95004,
-            "2": 10.9521,
-            "3": 10.5115,
-            "4": 9.96454,
-            "5": 9.93941,
-            "6": 9.67273,
-            "7": 10.20975,
-            "8": 9.49716,
-            "9": 9.55902,
-            "10": 9.79742,
-            "11": 9.30109,
-            "12": 9.40483,
-            "13": 9.39546,
-            "14": 8.84681,
-            "15": 9.02444,
-            "16": 9.07121,
-            "17": 9.04574,
-            "18": 8.75678,
-            "19": 9.18159,
-            "20": 8.8595,
-            "21": 8.53503,
-            "22": 8.55182,
-            "23": 8.42441,
-            "24": 8.37608,
-            "25": 8.64304,
-            "26": 7.97393,
-            "27": 8.56806,
-            "28": 8.19764,
-            "29": 8.3928,
-            "30": 8.67283,
-            "31": 8.289,
-            "32": 8.43572,
-            "33": 8.5568,
-            "34": 8.66018,
-            "35": 8.07934,
-            "36": 7.94976,
-            "37": 8.29565,
-            "38": 7.98044,
-            "39": 8.39201,
-            "40": 8.35513,
-            "41": 8.31876,
-            "42": 8.0583,
-            "43": 8.03283,
-            "44": 8.24243,
-            "45": 8.10277,
-            "46": 7.61696,
-            "47": 8.15273,
-            "48": 8.00569,
-            "49": 8.38688,
-            "50": 7.81491
+            "1": 10.94971,
+            "2": 10.95163,
+            "3": 10.51641,
+            "4": 9.9652,
+            "5": 9.94116,
+            "6": 9.67394,
+            "7": 10.19887,
+            "8": 9.50035,
+            "9": 9.54982,
+            "10": 9.79667,
+            "11": 9.30128,
+            "12": 9.40566,
+            "13": 9.39438,
+            "14": 8.84572,
+            "15": 9.02231,
+            "16": 9.06973,
+            "17": 9.04712,
+            "18": 8.75662,
+            "19": 9.18074,
+            "20": 8.86175,
+            "21": 8.53558,
+            "22": 8.55288,
+            "23": 8.42513,
+            "24": 8.37683,
+            "25": 8.64426,
+            "26": 7.9756,
+            "27": 8.57026,
+            "28": 8.1987,
+            "29": 8.39406,
+            "30": 8.67631,
+            "31": 8.29096,
+            "32": 8.43692,
+            "33": 8.55897,
+            "34": 8.66123,
+            "35": 8.08,
+            "36": 7.95214,
+            "37": 8.2979,
+            "38": 7.98177,
+            "39": 8.39281,
+            "40": 8.35852,
+            "41": 8.32006,
+            "42": 8.05954,
+            "43": 8.03381,
+            "44": 8.24236,
+            "45": 8.1025,
+            "46": 7.61814,
+            "47": 8.15364,
+            "48": 8.00693,
+            "49": 8.38704,
+            "50": 7.81592
         }
     },
     "num-zeros": {
@@ -61,56 +61,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 19403624.0,
-            "2": 19274194.0,
-            "3": 19372760.0,
-            "4": 86525248.0,
-            "5": 148575568.0,
-            "6": 145226704.0,
-            "7": 171879984.0,
-            "8": 195785248.0,
-            "9": 164124752.0,
-            "10": 167684736.0,
-            "11": 221077344.0,
-            "12": 200384224.0,
-            "13": 248872528.0,
-            "14": 211169424.0,
-            "15": 214304608.0,
-            "16": 216075632.0,
-            "17": 267845984.0,
-            "18": 170470336.0,
-            "19": 176865072.0,
-            "20": 187955392.0,
-            "21": 225750704.0,
-            "22": 247396816.0,
-            "23": 211643856.0,
-            "24": 205638464.0,
-            "25": 277022272.0,
-            "26": 291562304.0,
-            "27": 225789840.0,
-            "28": 288202368.0,
-            "29": 198390384.0,
-            "30": 213302208.0,
-            "31": 227204752.0,
-            "32": 271112416.0,
-            "33": 231840432.0,
-            "34": 203575536.0,
-            "35": 191152368.0,
-            "36": 222566928.0,
-            "37": 177810112.0,
-            "38": 228708544.0,
-            "39": 211168784.0,
-            "40": 215603968.0,
-            "41": 200089440.0,
-            "42": 228529888.0,
-            "43": 198782848.0,
-            "44": 141902272.0,
-            "45": 181922816.0,
-            "46": 115369856.0,
-            "47": 170214176.0,
-            "48": 137292832.0,
-            "49": 97654936.0,
-            "50": 160979632.0
+            "1": 19403704.0,
+            "2": 19274216.0,
+            "3": 22517470.0,
+            "4": 83429816.0,
+            "5": 139167728.0,
+            "6": 138921280.0,
+            "7": 173470304.0,
+            "8": 200511856.0,
+            "9": 165696320.0,
+            "10": 166120112.0,
+            "11": 213254416.0,
+            "12": 187847360.0,
+            "13": 231586656.0,
+            "14": 226879072.0,
+            "15": 219025920.0,
+            "16": 205179664.0,
+            "17": 280450432.0,
+            "18": 181477792.0,
+            "19": 191026096.0,
+            "20": 186395632.0,
+            "21": 233632576.0,
+            "22": 231696832.0,
+            "23": 216390688.0,
+            "24": 215133760.0,
+            "25": 233079504.0,
+            "26": 244437920.0,
+            "27": 222637584.0,
+            "28": 278773952.0,
+            "29": 253409264.0,
+            "30": 240036736.0,
+            "31": 236599008.0,
+            "32": 205066624.0,
+            "33": 263303312.0,
+            "34": 200444544.0,
+            "35": 199033824.0,
+            "36": 243001216.0,
+            "37": 151181872.0,
+            "38": 175301280.0,
+            "39": 219001024.0,
+            "40": 220307936.0,
+            "41": 217385856.0,
+            "42": 230074176.0,
+            "43": 208226784.0,
+            "44": 148172720.0,
+            "45": 141103744.0,
+            "46": 132664976.0,
+            "47": 179619392.0,
+            "48": 118381144.0,
+            "49": 86643984.0,
+            "50": 113798320.0
         }
     },
     "mem-allocated-bytes": {
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 4883602432.0,
-            "2": 4885017088.0,
-            "3": 4882657792.0,
-            "4": 4883046912.0,
-            "5": 4883725824.0,
-            "6": 4883713536.0,
-            "7": 4883040768.0,
-            "8": 4883273216.0,
-            "9": 4882952704.0,
-            "10": 4885949952.0,
-            "11": 4883990016.0,
-            "12": 4887679488.0,
-            "13": 4884011520.0,
-            "14": 4882899456.0,
-            "15": 4883515904.0,
-            "16": 4883990016.0,
-            "17": 4883410432.0,
-            "18": 4883673600.0,
-            "19": 4882903552.0,
-            "20": 4884541952.0,
-            "21": 4883138048.0,
-            "22": 4883247616.0,
-            "23": 4883839488.0,
-            "24": 4885058048.0,
-            "25": 4882676224.0,
-            "26": 4884058624.0,
-            "27": 4884724224.0,
-            "28": 4884874752.0,
-            "29": 4883127808.0,
-            "30": 4883252736.0,
-            "31": 4882955776.0,
-            "32": 4885190144.0,
-            "33": 4883845632.0,
-            "34": 4884392448.0,
-            "35": 4883083776.0,
-            "36": 4883851776.0,
-            "37": 4885246464.0,
-            "38": 4882680320.0,
-            "39": 4884296192.0,
-            "40": 4884689408.0,
-            "41": 4882836992.0,
-            "42": 4883972608.0,
-            "43": 4884519424.0,
-            "44": 4883354112.0,
-            "45": 4883495424.0,
-            "46": 4882788864.0,
-            "47": 4883144192.0,
-            "48": 4883688960.0,
-            "49": 4884182528.0,
-            "50": 4885279232.0
+            "1": 4883287040.0,
+            "2": 4883441152.0,
+            "3": 4881697280.0,
+            "4": 4883730944.0,
+            "5": 4882556416.0,
+            "6": 4882616832.0,
+            "7": 4883438080.0,
+            "8": 4881568256.0,
+            "9": 4883173888.0,
+            "10": 4882272768.0,
+            "11": 4883676672.0,
+            "12": 4881393152.0,
+            "13": 4883141120.0,
+            "14": 4883697152.0,
+            "15": 4882622976.0,
+            "16": 4881830400.0,
+            "17": 4881658368.0,
+            "18": 4881863168.0,
+            "19": 4883804672.0,
+            "20": 4881795584.0,
+            "21": 4883333632.0,
+            "22": 4882194944.0,
+            "23": 4882084352.0,
+            "24": 4884065792.0,
+            "25": 4881804800.0,
+            "26": 4883596800.0,
+            "27": 4883047936.0,
+            "28": 4882476544.0,
+            "29": 4883087872.0,
+            "30": 4882151936.0,
+            "31": 4882625024.0,
+            "32": 4883104256.0,
+            "33": 4882526720.0,
+            "34": 4882292224.0,
+            "35": 4882485760.0,
+            "36": 4882867712.0,
+            "37": 4882634240.0,
+            "38": 4882610688.0,
+            "39": 4881474048.0,
+            "40": 4881961472.0,
+            "41": 4882663936.0,
+            "42": 4881860096.0,
+            "43": 4881499648.0,
+            "44": 4883392000.0,
+            "45": 4882392576.0,
+            "46": 4882815488.0,
+            "47": 4883113472.0,
+            "48": 4882158080.0,
+            "49": 4881207808.0,
+            "50": 4881588736.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 41210470400.0,
-            "2": 41210470400.0,
-            "3": 41210470400.0,
-            "4": 41210470400.0,
-            "5": 41210470400.0,
-            "6": 41210470400.0,
-            "7": 41210470400.0,
-            "8": 41210470400.0,
-            "9": 41210470400.0,
-            "10": 41210470400.0,
-            "11": 41210470400.0,
-            "12": 41210470400.0,
-            "13": 41210470400.0,
-            "14": 41210470400.0,
-            "15": 41210470400.0,
-            "16": 41210470400.0,
-            "17": 41210470400.0,
-            "18": 41210470400.0,
-            "19": 41210470400.0,
-            "20": 41210470400.0,
-            "21": 41210470400.0,
-            "22": 41210470400.0,
-            "23": 41210470400.0,
-            "24": 41210470400.0,
-            "25": 41210470400.0,
-            "26": 41210470400.0,
-            "27": 41210470400.0,
-            "28": 41210470400.0,
-            "29": 41210470400.0,
-            "30": 41210470400.0,
-            "31": 41210470400.0,
-            "32": 41210470400.0,
-            "33": 41210470400.0,
-            "34": 41210470400.0,
-            "35": 41210470400.0,
-            "36": 41210470400.0,
-            "37": 41210470400.0,
-            "38": 41210470400.0,
-            "39": 41210470400.0,
-            "40": 41210470400.0,
-            "41": 41210470400.0,
-            "42": 41210470400.0,
-            "43": 41210470400.0,
-            "44": 41210470400.0,
-            "45": 41210470400.0,
-            "46": 41210470400.0,
-            "47": 41210470400.0,
-            "48": 41210470400.0,
-            "49": 41210470400.0,
-            "50": 41210470400.0
+            "1": 41208348672.0,
+            "2": 41208348672.0,
+            "3": 41208348672.0,
+            "4": 41208348672.0,
+            "5": 41208348672.0,
+            "6": 41208348672.0,
+            "7": 41208348672.0,
+            "8": 41208348672.0,
+            "9": 41208348672.0,
+            "10": 41208348672.0,
+            "11": 41208348672.0,
+            "12": 41208348672.0,
+            "13": 41208348672.0,
+            "14": 41208348672.0,
+            "15": 41208348672.0,
+            "16": 41208348672.0,
+            "17": 41208348672.0,
+            "18": 41208348672.0,
+            "19": 41208348672.0,
+            "20": 41208348672.0,
+            "21": 41208348672.0,
+            "22": 41208348672.0,
+            "23": 41208348672.0,
+            "24": 41208348672.0,
+            "25": 41208348672.0,
+            "26": 41208348672.0,
+            "27": 41208348672.0,
+            "28": 41208348672.0,
+            "29": 41208348672.0,
+            "30": 41208348672.0,
+            "31": 41208348672.0,
+            "32": 41208348672.0,
+            "33": 41208348672.0,
+            "34": 41208348672.0,
+            "35": 41208348672.0,
+            "36": 41208348672.0,
+            "37": 41208348672.0,
+            "38": 41208348672.0,
+            "39": 41208348672.0,
+            "40": 41208348672.0,
+            "41": 41208348672.0,
+            "42": 41208348672.0,
+            "43": 41208348672.0,
+            "44": 41208348672.0,
+            "45": 41208348672.0,
+            "46": 41208348672.0,
+            "47": 41208348672.0,
+            "48": 41208348672.0,
+            "49": 41208348672.0,
+            "50": 41208348672.0
         }
     },
     "iteration-time": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 86.8085,
-            "2": 1.10913,
-            "3": 0.99097,
-            "4": 0.89412,
-            "5": 1.25997,
-            "6": 0.98162,
-            "7": 0.98318,
-            "8": 1.13296,
-            "9": 0.88126,
-            "10": 0.8633,
-            "11": 2.2744,
-            "12": 4.5393,
-            "13": 3.22763,
-            "14": 1.64923,
-            "15": 0.86595,
-            "16": 0.86575,
-            "17": 0.85272,
-            "18": 0.85454,
-            "19": 0.85281,
-            "20": 0.87018,
-            "21": 0.84654,
-            "22": 0.8494,
-            "23": 0.84882,
-            "24": 0.84482,
-            "25": 0.85311,
-            "26": 0.84678,
-            "27": 0.84096,
-            "28": 0.8412,
-            "29": 0.84156,
-            "30": 0.84475,
-            "31": 0.84747,
-            "32": 0.85058,
-            "33": 0.84977,
-            "34": 0.8479,
-            "35": 0.85234,
-            "36": 0.85012,
-            "37": 0.85087,
-            "38": 0.84594,
-            "39": 0.84558,
-            "40": 0.84807,
-            "41": 0.84183,
-            "42": 0.8439,
-            "43": 0.84221,
-            "44": 0.84248,
-            "45": 0.84257,
-            "46": 0.83922,
-            "47": 0.84311,
-            "48": 0.84159,
-            "49": 0.84011,
-            "50": 0.8353
+            "1": 89.10928,
+            "2": 1.08143,
+            "3": 0.94222,
+            "4": 0.89675,
+            "5": 1.34524,
+            "6": 1.06972,
+            "7": 1.00314,
+            "8": 1.04961,
+            "9": 0.86611,
+            "10": 0.86248,
+            "11": 0.98739,
+            "12": 0.86057,
+            "13": 0.86777,
+            "14": 0.85834,
+            "15": 0.8559,
+            "16": 0.85522,
+            "17": 0.84644,
+            "18": 0.85748,
+            "19": 0.85218,
+            "20": 0.85342,
+            "21": 0.84029,
+            "22": 0.84342,
+            "23": 0.84297,
+            "24": 0.83925,
+            "25": 0.8439,
+            "26": 0.85696,
+            "27": 0.83981,
+            "28": 0.84643,
+            "29": 0.8433,
+            "30": 0.86234,
+            "31": 0.85636,
+            "32": 0.84184,
+            "33": 0.84501,
+            "34": 0.84316,
+            "35": 0.83806,
+            "36": 0.84143,
+            "37": 0.84447,
+            "38": 0.84137,
+            "39": 0.84133,
+            "40": 0.84321,
+            "41": 0.84019,
+            "42": 0.84164,
+            "43": 0.83741,
+            "44": 0.84203,
+            "45": 0.83966,
+            "46": 0.84109,
+            "47": 0.83945,
+            "48": 0.84001,
+            "49": 0.84194,
+            "50": 0.83578
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgx_h100.json
index 1ba051f4889..0835e95b926 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgx_h100.json
@@ -1 +1,142 @@
-{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.83281, "5": 10.85975, "10": 10.79613, "15": 10.80527, "20": 10.72502, "25": 10.53599, "30": 10.3571, "35": 10.24605, "40": 10.05992, "45": 9.7836, "50": 9.8722, "55": 9.83189, "60": 9.45075, "65": 8.89679, "70": 9.71414, "75": 9.39795, "80": 9.38169, "85": 9.58585, "90": 9.7999, "95": 9.50528, "100": 9.37224}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 27013.0, "5": 31736.0, "10": 25785.0, "15": 30383.0, "20": 28435.0, "25": 27493.0, "30": 30329.0, "35": 31750.0, "40": 34279.0, "45": 34634.0, "50": 38531.0, "55": 37465.0, "60": 40172.0, "65": 40624.0, "70": 44852.0, "75": 39231.0, "80": 130535.0, "85": 123250.0, "90": 47793.0, "95": 167340.0, "100": 163328.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 814390272.0, "5": 814420480.0, "10": 814376448.0, "15": 814376960.0, "20": 814373376.0, "25": 814321152.0, "30": 814306304.0, "35": 814292992.0, "40": 814288896.0, "45": 814272000.0, "50": 814262272.0, "55": 814258688.0, "60": 814268416.0, "65": 814220800.0, "70": 814266880.0, "75": 814318080.0, "80": 814285312.0, "85": 814289408.0, "90": 814315520.0, "95": 814320128.0, "100": 814311424.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 2111314944.0, "5": 2370209280.0, "10": 2370209280.0, "15": 2370209280.0, "20": 2370209280.0, "25": 2370209280.0, "30": 2370209280.0, "35": 2370209280.0, "40": 2370209280.0, "45": 2370209280.0, "50": 2370209280.0, "55": 2370209280.0, "60": 2370209280.0, "65": 2370209280.0, "70": 2370209280.0, "75": 2370209280.0, "80": 2370209280.0, "85": 2370209280.0, "90": 2370209280.0, "95": 2370209280.0, "100": 2370209280.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 20.98318, "5": 0.79797, "10": 0.74028, "15": 0.67279, "20": 0.62948, "25": 0.61132, "30": 0.61547, "35": 0.6152, "40": 0.60421, "45": 0.59124, "50": 0.5891, "55": 0.57048, "60": 0.54799, "65": 0.52185, "70": 0.51195, "75": 0.50105, "80": 0.4628, "85": 0.45992, "90": 0.46498, "95": 0.4599, "100": 0.42568}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 5,
+        "values": {
+            "1": 10.82922,
+            "5": 10.85652,
+            "10": 10.79298,
+            "15": 10.8067,
+            "20": 10.72654,
+            "25": 10.53282,
+            "30": 10.35802,
+            "35": 10.24483,
+            "40": 10.05533,
+            "45": 9.77951,
+            "50": 9.86874,
+            "55": 9.82995,
+            "60": 9.449,
+            "65": 8.89366,
+            "70": 9.71127,
+            "75": 9.39451,
+            "80": 9.38198,
+            "85": 9.58333,
+            "90": 9.79944,
+            "95": 9.50213,
+            "100": 9.37131
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 5,
+        "values": {
+            "1": 27245.0,
+            "5": 31369.0,
+            "10": 25870.0,
+            "15": 29830.0,
+            "20": 28243.0,
+            "25": 27636.0,
+            "30": 30387.0,
+            "35": 31488.0,
+            "40": 34779.0,
+            "45": 35158.0,
+            "50": 38234.0,
+            "55": 37133.0,
+            "60": 40450.0,
+            "65": 40947.0,
+            "70": 43436.0,
+            "75": 39925.0,
+            "80": 51863.0,
+            "85": 2145177.0,
+            "90": 51330.0,
+            "95": 45247.0,
+            "100": 163741.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 5,
+        "values": {
+            "1": 787511296.0,
+            "5": 787542016.0,
+            "10": 787500032.0,
+            "15": 787499008.0,
+            "20": 787500032.0,
+            "25": 787446272.0,
+            "30": 787429888.0,
+            "35": 787413504.0,
+            "40": 787409920.0,
+            "45": 787394560.0,
+            "50": 787384320.0,
+            "55": 787383808.0,
+            "60": 787389952.0,
+            "65": 787346432.0,
+            "70": 787387904.0,
+            "75": 787437568.0,
+            "80": 787405312.0,
+            "85": 787407360.0,
+            "90": 787441664.0,
+            "95": 787445248.0,
+            "100": 787433472.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 5,
+        "values": {
+            "1": 2465793024.0,
+            "5": 2492764160.0,
+            "10": 2492764160.0,
+            "15": 2492764160.0,
+            "20": 2492764160.0,
+            "25": 2492764160.0,
+            "30": 2492764160.0,
+            "35": 2492764160.0,
+            "40": 2492764160.0,
+            "45": 2492764160.0,
+            "50": 2492764160.0,
+            "55": 2492764160.0,
+            "60": 2492764160.0,
+            "65": 2492764160.0,
+            "70": 2492764160.0,
+            "75": 2492764160.0,
+            "80": 2492764160.0,
+            "85": 2492764160.0,
+            "90": 2492764160.0,
+            "95": 2492764160.0,
+            "100": 2492764160.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 5,
+        "values": {
+            "1": 9.68104,
+            "5": 0.32859,
+            "10": 0.30772,
+            "15": 0.31234,
+            "20": 0.29254,
+            "25": 0.29296,
+            "30": 0.31344,
+            "35": 0.31026,
+            "40": 0.30514,
+            "45": 0.30481,
+            "50": 0.30324,
+            "55": 0.29929,
+            "60": 0.30103,
+            "65": 0.32008,
+            "70": 0.31307,
+            "75": 0.2933,
+            "80": 0.29351,
+            "85": 0.29283,
+            "90": 0.29375,
+            "95": 0.29458,
+            "100": 0.29103
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..7e299df5257
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.82922,
+            "2": 10.84163,
+            "3": 10.84245,
+            "4": 10.82,
+            "5": 10.85652,
+            "6": 10.86906,
+            "7": 10.83778,
+            "8": 10.84312,
+            "9": 10.84423,
+            "10": 10.79298,
+            "11": 10.86697,
+            "12": 10.86875,
+            "13": 10.86207,
+            "14": 10.86919,
+            "15": 10.8067,
+            "16": 10.8057,
+            "17": 10.77686,
+            "18": 10.79541,
+            "19": 10.78384,
+            "20": 10.72654,
+            "21": 10.69491,
+            "22": 10.54462,
+            "23": 10.6993,
+            "24": 10.58151,
+            "25": 10.53282,
+            "26": 10.58817,
+            "27": 10.601,
+            "28": 10.57563,
+            "29": 10.58022,
+            "30": 10.35802,
+            "31": 10.08769,
+            "32": 10.44466,
+            "33": 10.4477,
+            "34": 10.18704,
+            "35": 10.24483,
+            "36": 10.19713,
+            "37": 10.32294,
+            "38": 10.17101,
+            "39": 10.37026,
+            "40": 10.05533,
+            "41": 10.09491,
+            "42": 10.17971,
+            "43": 9.78263,
+            "44": 9.91346,
+            "45": 9.77951,
+            "46": 9.75648,
+            "47": 10.09647,
+            "48": 9.80391,
+            "49": 9.46649,
+            "50": 9.86874,
+            "51": 9.79428,
+            "52": 9.68303,
+            "53": 10.03314,
+            "54": 9.9113,
+            "55": 9.82995,
+            "56": 9.57839,
+            "57": 9.42377,
+            "58": 9.80549,
+            "59": 9.53292,
+            "60": 9.449,
+            "61": 9.65293,
+            "62": 9.95672,
+            "63": 9.33775,
+            "64": 9.74194,
+            "65": 8.89366,
+            "66": 9.67317,
+            "67": 9.33002,
+            "68": 9.76517,
+            "69": 9.76336,
+            "70": 9.71127,
+            "71": 9.59511,
+            "72": 9.54797,
+            "73": 9.47124,
+            "74": 8.89297,
+            "75": 9.39451,
+            "76": 9.04721,
+            "77": 10.04318,
+            "78": 9.70313,
+            "79": 9.35169,
+            "80": 9.38198,
+            "81": 9.45146,
+            "82": 9.67546,
+            "83": 9.27658,
+            "84": 9.39241,
+            "85": 9.58333,
+            "86": 9.04518,
+            "87": 9.56487,
+            "88": 9.72459,
+            "89": 9.57019,
+            "90": 9.79944,
+            "91": 9.30737,
+            "92": 9.3313,
+            "93": 9.04109,
+            "94": 8.80259,
+            "95": 9.50213,
+            "96": 9.5021,
+            "97": 9.28183,
+            "98": 9.64883,
+            "99": 8.8594,
+            "100": 9.37131
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 27245.0,
+            "2": 28958.0,
+            "3": 29464.0,
+            "4": 28046.0,
+            "5": 31369.0,
+            "6": 33287.0,
+            "7": 31200.0,
+            "8": 26921.0,
+            "9": 30008.0,
+            "10": 25870.0,
+            "11": 33681.0,
+            "12": 30344.0,
+            "13": 32737.0,
+            "14": 33315.0,
+            "15": 29830.0,
+            "16": 32475.0,
+            "17": 30747.0,
+            "18": 30381.0,
+            "19": 31032.0,
+            "20": 28243.0,
+            "21": 29224.0,
+            "22": 27340.0,
+            "23": 34119.0,
+            "24": 29049.0,
+            "25": 27636.0,
+            "26": 30662.0,
+            "27": 32009.0,
+            "28": 33355.0,
+            "29": 34714.0,
+            "30": 30387.0,
+            "31": 28212.0,
+            "32": 33411.0,
+            "33": 34696.0,
+            "34": 30053.0,
+            "35": 31488.0,
+            "36": 32943.0,
+            "37": 35829.0,
+            "38": 33740.0,
+            "39": 37632.0,
+            "40": 34779.0,
+            "41": 33958.0,
+            "42": 36396.0,
+            "43": 34088.0,
+            "44": 34090.0,
+            "45": 35158.0,
+            "46": 36174.0,
+            "47": 39772.0,
+            "48": 36516.0,
+            "49": 36733.0,
+            "50": 38234.0,
+            "51": 38608.0,
+            "52": 37030.0,
+            "53": 42442.0,
+            "54": 40944.0,
+            "55": 37133.0,
+            "56": 41001.0,
+            "57": 37524.0,
+            "58": 42317.0,
+            "59": 40804.0,
+            "60": 40450.0,
+            "61": 41478.0,
+            "62": 39766.0,
+            "63": 37941.0,
+            "64": 42197.0,
+            "65": 40947.0,
+            "66": 44094.0,
+            "67": 41958.0,
+            "68": 40060.0,
+            "69": 42189.0,
+            "70": 43436.0,
+            "71": 42748.0,
+            "72": 44280.0,
+            "73": 47478.0,
+            "74": 41456.0,
+            "75": 39925.0,
+            "76": 43490.0,
+            "77": 45636.0,
+            "78": 2141470.0,
+            "79": 46055.0,
+            "80": 51863.0,
+            "81": 151341.0,
+            "82": 49835.0,
+            "83": 143360.0,
+            "84": 2141546.0,
+            "85": 2145177.0,
+            "86": 132114.0,
+            "87": 2147022.0,
+            "88": 59899.0,
+            "89": 162883.0,
+            "90": 51330.0,
+            "91": 2141901.0,
+            "92": 44946.0,
+            "93": 138194.0,
+            "94": 2145772.0,
+            "95": 45247.0,
+            "96": 135045.0,
+            "97": 53170.0,
+            "98": 168576.0,
+            "99": 2141797.0,
+            "100": 163741.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 787516416.0,
+            "2": 787540992.0,
+            "3": 787524096.0,
+            "4": 787512320.0,
+            "5": 787547136.0,
+            "6": 787537920.0,
+            "7": 787512832.0,
+            "8": 787524608.0,
+            "9": 787528192.0,
+            "10": 787505152.0,
+            "11": 787522048.0,
+            "12": 787520000.0,
+            "13": 787529728.0,
+            "14": 787529216.0,
+            "15": 787504128.0,
+            "16": 787513344.0,
+            "17": 787503104.0,
+            "18": 787489280.0,
+            "19": 787514880.0,
+            "20": 787505152.0,
+            "21": 787479552.0,
+            "22": 787486208.0,
+            "23": 787478528.0,
+            "24": 787486208.0,
+            "25": 787451392.0,
+            "26": 787482112.0,
+            "27": 787470848.0,
+            "28": 787450368.0,
+            "29": 787458048.0,
+            "30": 787435008.0,
+            "31": 787406848.0,
+            "32": 787424256.0,
+            "33": 787435520.0,
+            "34": 787426304.0,
+            "35": 787418624.0,
+            "36": 787436544.0,
+            "37": 787428352.0,
+            "38": 787436544.0,
+            "39": 787417600.0,
+            "40": 787415040.0,
+            "41": 787405824.0,
+            "42": 787415040.0,
+            "43": 787367936.0,
+            "44": 787392512.0,
+            "45": 787399680.0,
+            "46": 787355136.0,
+            "47": 787411456.0,
+            "48": 787354112.0,
+            "49": 787374080.0,
+            "50": 787389440.0,
+            "51": 787375616.0,
+            "52": 787383808.0,
+            "53": 787379712.0,
+            "54": 787384832.0,
+            "55": 787388928.0,
+            "56": 787388928.0,
+            "57": 787351040.0,
+            "58": 787382784.0,
+            "59": 787374080.0,
+            "60": 787395072.0,
+            "61": 787405312.0,
+            "62": 787405824.0,
+            "63": 787373056.0,
+            "64": 787388928.0,
+            "65": 787351552.0,
+            "66": 787386880.0,
+            "67": 787392000.0,
+            "68": 787399168.0,
+            "69": 787383296.0,
+            "70": 787393024.0,
+            "71": 787406848.0,
+            "72": 787400704.0,
+            "73": 787401216.0,
+            "74": 787403264.0,
+            "75": 787442688.0,
+            "76": 787444736.0,
+            "77": 787445760.0,
+            "78": 787395072.0,
+            "79": 787430400.0,
+            "80": 787410432.0,
+            "81": 787412992.0,
+            "82": 787427840.0,
+            "83": 787428864.0,
+            "84": 787412480.0,
+            "85": 787412480.0,
+            "86": 787394560.0,
+            "87": 787452928.0,
+            "88": 787414528.0,
+            "89": 787404800.0,
+            "90": 787446784.0,
+            "91": 787446272.0,
+            "92": 787446784.0,
+            "93": 787430400.0,
+            "94": 787440128.0,
+            "95": 787450368.0,
+            "96": 787454976.0,
+            "97": 787427328.0,
+            "98": 787475968.0,
+            "99": 787419136.0,
+            "100": 787438592.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2479493120.0,
+            "2": 2485449728.0,
+            "3": 2487249408.0,
+            "4": 2487249408.0,
+            "5": 2495991808.0,
+            "6": 2495991808.0,
+            "7": 2495991808.0,
+            "8": 2495991808.0,
+            "9": 2495991808.0,
+            "10": 2495991808.0,
+            "11": 2495991808.0,
+            "12": 2495991808.0,
+            "13": 2495991808.0,
+            "14": 2495991808.0,
+            "15": 2495991808.0,
+            "16": 2495991808.0,
+            "17": 2495991808.0,
+            "18": 2495991808.0,
+            "19": 2495991808.0,
+            "20": 2495991808.0,
+            "21": 2495991808.0,
+            "22": 2495991808.0,
+            "23": 2495991808.0,
+            "24": 2495991808.0,
+            "25": 2495991808.0,
+            "26": 2495991808.0,
+            "27": 2495991808.0,
+            "28": 2495991808.0,
+            "29": 2495991808.0,
+            "30": 2495991808.0,
+            "31": 2495991808.0,
+            "32": 2495991808.0,
+            "33": 2495991808.0,
+            "34": 2495991808.0,
+            "35": 2495991808.0,
+            "36": 2495991808.0,
+            "37": 2495991808.0,
+            "38": 2495991808.0,
+            "39": 2495991808.0,
+            "40": 2495991808.0,
+            "41": 2495991808.0,
+            "42": 2495991808.0,
+            "43": 2495991808.0,
+            "44": 2495991808.0,
+            "45": 2495991808.0,
+            "46": 2495991808.0,
+            "47": 2495991808.0,
+            "48": 2495991808.0,
+            "49": 2495991808.0,
+            "50": 2495991808.0,
+            "51": 2495991808.0,
+            "52": 2495991808.0,
+            "53": 2495991808.0,
+            "54": 2495991808.0,
+            "55": 2495991808.0,
+            "56": 2495991808.0,
+            "57": 2495991808.0,
+            "58": 2495991808.0,
+            "59": 2495991808.0,
+            "60": 2495991808.0,
+            "61": 2495991808.0,
+            "62": 2495991808.0,
+            "63": 2495991808.0,
+            "64": 2495991808.0,
+            "65": 2495991808.0,
+            "66": 2495991808.0,
+            "67": 2495991808.0,
+            "68": 2495991808.0,
+            "69": 2495991808.0,
+            "70": 2495991808.0,
+            "71": 2495991808.0,
+            "72": 2495991808.0,
+            "73": 2495991808.0,
+            "74": 2495991808.0,
+            "75": 2495991808.0,
+            "76": 2495991808.0,
+            "77": 2495991808.0,
+            "78": 2495991808.0,
+            "79": 2495991808.0,
+            "80": 2495991808.0,
+            "81": 2495991808.0,
+            "82": 2495991808.0,
+            "83": 2495991808.0,
+            "84": 2495991808.0,
+            "85": 2495991808.0,
+            "86": 2495991808.0,
+            "87": 2495991808.0,
+            "88": 2495991808.0,
+            "89": 2495991808.0,
+            "90": 2495991808.0,
+            "91": 2495991808.0,
+            "92": 2495991808.0,
+            "93": 2495991808.0,
+            "94": 2495991808.0,
+            "95": 2495991808.0,
+            "96": 2495991808.0,
+            "97": 2495991808.0,
+            "98": 2495991808.0,
+            "99": 2495991808.0,
+            "100": 2495991808.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 12.11313,
+            "2": 0.4805,
+            "3": 0.36965,
+            "4": 0.36695,
+            "5": 0.31705,
+            "6": 0.31275,
+            "7": 0.31299,
+            "8": 0.29866,
+            "9": 0.28961,
+            "10": 0.28859,
+            "11": 0.29067,
+            "12": 0.29044,
+            "13": 0.29806,
+            "14": 0.29287,
+            "15": 0.29391,
+            "16": 0.3175,
+            "17": 0.28363,
+            "18": 0.2818,
+            "19": 0.29347,
+            "20": 0.28931,
+            "21": 0.29103,
+            "22": 0.28444,
+            "23": 0.28907,
+            "24": 0.27608,
+            "25": 0.28277,
+            "26": 0.28656,
+            "27": 0.28921,
+            "28": 0.30243,
+            "29": 0.30435,
+            "30": 0.31231,
+            "31": 0.30439,
+            "32": 0.31412,
+            "33": 0.28887,
+            "34": 0.29613,
+            "35": 0.29738,
+            "36": 0.29754,
+            "37": 0.3019,
+            "38": 0.2933,
+            "39": 0.2944,
+            "40": 0.29283,
+            "41": 0.29592,
+            "42": 0.29673,
+            "43": 0.29319,
+            "44": 0.30127,
+            "45": 0.29921,
+            "46": 0.29904,
+            "47": 0.28795,
+            "48": 0.29918,
+            "49": 0.28711,
+            "50": 0.29645,
+            "51": 0.28777,
+            "52": 0.29536,
+            "53": 0.2847,
+            "54": 0.28286,
+            "55": 0.2874,
+            "56": 0.28699,
+            "57": 0.28614,
+            "58": 0.29825,
+            "59": 0.28363,
+            "60": 0.29423,
+            "61": 0.29226,
+            "62": 0.2896,
+            "63": 0.28065,
+            "64": 0.29533,
+            "65": 0.29842,
+            "66": 0.28487,
+            "67": 0.28419,
+            "68": 0.29474,
+            "69": 0.28383,
+            "70": 0.28417,
+            "71": 0.29253,
+            "72": 0.28737,
+            "73": 0.27923,
+            "74": 0.28728,
+            "75": 0.29383,
+            "76": 0.28157,
+            "77": 0.64771,
+            "78": 0.29148,
+            "79": 0.28742,
+            "80": 0.29245,
+            "81": 0.28827,
+            "82": 0.28368,
+            "83": 0.28963,
+            "84": 0.29234,
+            "85": 0.28183,
+            "86": 0.28337,
+            "87": 0.27879,
+            "88": 0.28388,
+            "89": 0.28309,
+            "90": 0.28852,
+            "91": 0.28254,
+            "92": 0.28375,
+            "93": 0.28633,
+            "94": 0.28567,
+            "95": 0.28235,
+            "96": 0.28513,
+            "97": 0.27951,
+            "98": 0.27851,
+            "99": 0.28336,
+            "100": 0.27744
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/model_config.yaml
index 3ecd68b9841..8874f9cf045 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/model_config.yaml
@@ -56,7 +56,7 @@ MODEL_ARGS:
   --attention-softmax-in-fp32: true
   --use-checkpoint-opt_param-scheduler: true
   --use-mcore-models: true
-  --ckpt-format: torch_dist
+  --ckpt-format: fsdp_dtensor
   --dist-ckpt-optim-fully-reshardable: true
   --dist-ckpt-strictness: log_all # backward compatibility for TE changes
   --data-cache-path: ${DATA_CACHE_PATH}
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_dgxh100_coreweave.json
index b3f192ba287..73fb00c9231 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_dgxh100_coreweave.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_dgxh100_coreweave.json
@@ -4,56 +4,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 11.07546,
-            "2": 11.03837,
-            "3": 9.66011,
-            "4": 9.91381,
-            "5": 9.32909,
-            "6": 9.13922,
-            "7": 9.13574,
-            "8": 8.65508,
-            "9": 8.51394,
-            "10": 8.8409,
-            "11": 8.29149,
-            "12": 8.34581,
-            "13": 8.25518,
-            "14": 7.73711,
-            "15": 7.86249,
-            "16": 7.9371,
-            "17": 7.89319,
-            "18": 7.63123,
-            "19": 7.99731,
-            "20": 7.74538,
-            "21": 7.44348,
-            "22": 7.42249,
-            "23": 7.29714,
-            "24": 7.27462,
-            "25": 7.54574,
-            "26": 6.96838,
-            "27": 7.50556,
-            "28": 7.22743,
-            "29": 7.36588,
-            "30": 7.52622,
-            "31": 7.27026,
-            "32": 7.45521,
-            "33": 7.50954,
-            "34": 7.55686,
-            "35": 7.10177,
-            "36": 6.96431,
-            "37": 7.28463,
-            "38": 7.0808,
-            "39": 7.40923,
-            "40": 7.43338,
-            "41": 7.38496,
-            "42": 7.15749,
-            "43": 7.15858,
-            "44": 7.28852,
-            "45": 7.16793,
-            "46": 6.78468,
-            "47": 7.4114,
-            "48": 7.0027,
-            "49": 7.46249,
-            "50": 6.92151
+            "1": 11.07559,
+            "2": 11.03834,
+            "3": 9.66022,
+            "4": 9.91367,
+            "5": 9.3291,
+            "6": 9.13927,
+            "7": 9.13591,
+            "8": 8.65527,
+            "9": 8.51396,
+            "10": 8.84095,
+            "11": 8.29144,
+            "12": 8.34584,
+            "13": 8.25509,
+            "14": 7.73685,
+            "15": 7.86273,
+            "16": 7.93699,
+            "17": 7.89257,
+            "18": 7.63116,
+            "19": 7.99719,
+            "20": 7.7453,
+            "21": 7.44298,
+            "22": 7.42242,
+            "23": 7.29721,
+            "24": 7.27467,
+            "25": 7.54562,
+            "26": 6.96839,
+            "27": 7.50569,
+            "28": 7.22761,
+            "29": 7.36579,
+            "30": 7.52635,
+            "31": 7.27036,
+            "32": 7.45548,
+            "33": 7.50952,
+            "34": 7.55694,
+            "35": 7.10212,
+            "36": 6.96414,
+            "37": 7.28438,
+            "38": 7.08049,
+            "39": 7.40908,
+            "40": 7.4335,
+            "41": 7.38491,
+            "42": 7.15766,
+            "43": 7.15867,
+            "44": 7.28831,
+            "45": 7.16729,
+            "46": 6.78429,
+            "47": 7.40937,
+            "48": 7.00259,
+            "49": 7.46241,
+            "50": 6.92143
         }
     },
     "num-zeros": {
@@ -63,54 +63,54 @@
         "values": {
             "1": 911219392.0,
             "2": 910960384.0,
-            "3": 911156352.0,
-            "4": 912204800.0,
-            "5": 920796544.0,
-            "6": 940387968.0,
-            "7": 990599872.0,
-            "8": 976457728.0,
-            "9": 998097664.0,
-            "10": 995852672.0,
-            "11": 994583680.0,
-            "12": 977344896.0,
-            "13": 1028141824.0,
-            "14": 1007166208.0,
-            "15": 987423616.0,
-            "16": 993054784.0,
-            "17": 982319168.0,
-            "18": 998261760.0,
-            "19": 984696320.0,
-            "20": 982914752.0,
-            "21": 979667456.0,
-            "22": 953988864.0,
-            "23": 972353984.0,
-            "24": 964792064.0,
-            "25": 958512192.0,
-            "26": 946928512.0,
+            "3": 911156288.0,
+            "4": 913253376.0,
+            "5": 921845056.0,
+            "6": 941436672.0,
+            "7": 993745472.0,
+            "8": 974360512.0,
+            "9": 999146112.0,
+            "10": 992706944.0,
+            "11": 991438144.0,
+            "12": 979442048.0,
+            "13": 1029190272.0,
+            "14": 1008214656.0,
+            "15": 988472000.0,
+            "16": 988861120.0,
+            "17": 979173312.0,
+            "18": 996164608.0,
+            "19": 979453440.0,
+            "20": 982914688.0,
+            "21": 975473344.0,
+            "22": 955037568.0,
+            "23": 969208128.0,
+            "24": 965840832.0,
+            "25": 953269440.0,
+            "26": 949025536.0,
             "27": 948458304.0,
-            "28": 949643968.0,
-            "29": 942877440.0,
+            "28": 951741184.0,
+            "29": 943926272.0,
             "30": 935020160.0,
-            "31": 935327616.0,
-            "32": 934281088.0,
-            "33": 921805568.0,
-            "34": 928189312.0,
-            "35": 922202496.0,
-            "36": 924246656.0,
-            "37": 920661248.0,
+            "31": 933230336.0,
+            "32": 930086848.0,
+            "33": 922853952.0,
+            "34": 927140800.0,
+            "35": 925348224.0,
+            "36": 925295168.0,
+            "37": 922758272.0,
             "38": 922930752.0,
-            "39": 922322816.0,
-            "40": 921856512.0,
-            "41": 920227968.0,
+            "39": 922322880.0,
+            "40": 921856640.0,
+            "41": 920227776.0,
             "42": 918353664.0,
-            "43": 918607040.0,
-            "44": 914948032.0,
-            "45": 914295232.0,
+            "43": 919655616.0,
+            "44": 914948224.0,
+            "45": 916392512.0,
             "46": 914344448.0,
             "47": 911769536.0,
-            "48": 912013312.0,
-            "49": 910349440.0,
-            "50": 914351552.0
+            "48": 912013248.0,
+            "49": 910349376.0,
+            "50": 914351616.0
         }
     },
     "mem-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 41739952128.0,
-            "2": 43687571456.0,
-            "3": 43687571456.0,
-            "4": 43983216640.0,
-            "5": 43983216640.0,
-            "6": 43983216640.0,
-            "7": 43983216640.0,
-            "8": 44024635392.0,
-            "9": 44041216000.0,
-            "10": 44041216000.0,
-            "11": 44041216000.0,
-            "12": 44041216000.0,
-            "13": 44041216000.0,
-            "14": 44041216000.0,
-            "15": 44041216000.0,
-            "16": 44041216000.0,
-            "17": 44041216000.0,
-            "18": 44041216000.0,
-            "19": 44041216000.0,
-            "20": 44041216000.0,
-            "21": 44041216000.0,
-            "22": 44041216000.0,
-            "23": 44041216000.0,
-            "24": 44041216000.0,
-            "25": 44041216000.0,
-            "26": 44041216000.0,
-            "27": 44041216000.0,
-            "28": 44041216000.0,
-            "29": 44041326592.0,
-            "30": 44162326528.0,
-            "31": 44220485632.0,
-            "32": 44270411776.0,
-            "33": 44293799936.0,
-            "34": 44293799936.0,
-            "35": 44293799936.0,
-            "36": 44293799936.0,
-            "37": 44293799936.0,
-            "38": 44293799936.0,
-            "39": 44293799936.0,
-            "40": 44293799936.0,
-            "41": 44293799936.0,
-            "42": 44293799936.0,
-            "43": 44293799936.0,
-            "44": 44293799936.0,
-            "45": 44293799936.0,
-            "46": 44293799936.0,
-            "47": 44293799936.0,
-            "48": 44293799936.0,
-            "49": 44293799936.0,
-            "50": 44293799936.0
+            "1": 41740259328.0,
+            "2": 43687292928.0,
+            "3": 43687292928.0,
+            "4": 43984064512.0,
+            "5": 43984064512.0,
+            "6": 43984064512.0,
+            "7": 43984064512.0,
+            "8": 44026380288.0,
+            "9": 44041506816.0,
+            "10": 44041506816.0,
+            "11": 44041506816.0,
+            "12": 44041506816.0,
+            "13": 44041506816.0,
+            "14": 44041506816.0,
+            "15": 44041506816.0,
+            "16": 44041506816.0,
+            "17": 44041506816.0,
+            "18": 44041506816.0,
+            "19": 44041506816.0,
+            "20": 44041506816.0,
+            "21": 44041506816.0,
+            "22": 44041506816.0,
+            "23": 44041506816.0,
+            "24": 44041506816.0,
+            "25": 44041506816.0,
+            "26": 44041506816.0,
+            "27": 44041506816.0,
+            "28": 44041506816.0,
+            "29": 44044173312.0,
+            "30": 44164231168.0,
+            "31": 44221079552.0,
+            "32": 44271415296.0,
+            "33": 44290232320.0,
+            "34": 44290232320.0,
+            "35": 44290232320.0,
+            "36": 44290232320.0,
+            "37": 44290232320.0,
+            "38": 44290232320.0,
+            "39": 44290232320.0,
+            "40": 44290232320.0,
+            "41": 44290232320.0,
+            "42": 44290232320.0,
+            "43": 44290232320.0,
+            "44": 44290232320.0,
+            "45": 44290232320.0,
+            "46": 44290232320.0,
+            "47": 44290232320.0,
+            "48": 44290232320.0,
+            "49": 44290232320.0,
+            "50": 44290232320.0
         }
     },
     "mtp_1 loss": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 11.08617,
-            "2": 11.10475,
-            "3": 10.48001,
-            "4": 10.13466,
-            "5": 9.79047,
-            "6": 9.50601,
-            "7": 9.5113,
-            "8": 8.85336,
-            "9": 8.66683,
-            "10": 8.95866,
-            "11": 8.29315,
-            "12": 8.36982,
-            "13": 8.25544,
-            "14": 7.73322,
+            "1": 11.08623,
+            "2": 11.1047,
+            "3": 10.47999,
+            "4": 10.13471,
+            "5": 9.79045,
+            "6": 9.50607,
+            "7": 9.51139,
+            "8": 8.85331,
+            "9": 8.66688,
+            "10": 8.95867,
+            "11": 8.29318,
+            "12": 8.36986,
+            "13": 8.25545,
+            "14": 7.73323,
             "15": 7.86639,
-            "16": 7.92442,
-            "17": 7.86278,
-            "18": 7.61012,
-            "19": 8.00269,
-            "20": 7.73019,
-            "21": 7.4165,
-            "22": 7.41478,
-            "23": 7.28671,
-            "24": 7.27903,
-            "25": 7.54456,
-            "26": 6.96542,
-            "27": 7.50538,
-            "28": 7.20607,
-            "29": 7.377,
-            "30": 7.52777,
-            "31": 7.27094,
-            "32": 7.4604,
+            "16": 7.92438,
+            "17": 7.86276,
+            "18": 7.61004,
+            "19": 8.00261,
+            "20": 7.73004,
+            "21": 7.41636,
+            "22": 7.41466,
+            "23": 7.28656,
+            "24": 7.27882,
+            "25": 7.54458,
+            "26": 6.96533,
+            "27": 7.5053,
+            "28": 7.20603,
+            "29": 7.37687,
+            "30": 7.52783,
+            "31": 7.27097,
+            "32": 7.46043,
             "33": 7.51419,
-            "34": 7.56867,
-            "35": 7.09252,
-            "36": 6.96015,
-            "37": 7.29846,
-            "38": 7.0742,
-            "39": 7.43347,
-            "40": 7.43116,
-            "41": 7.40919,
+            "34": 7.56879,
+            "35": 7.09276,
+            "36": 6.96019,
+            "37": 7.29843,
+            "38": 7.07417,
+            "39": 7.43338,
+            "40": 7.43134,
+            "41": 7.40946,
             "42": 7.15527,
-            "43": 7.15652,
-            "44": 7.30441,
-            "45": 7.1893,
-            "46": 6.77296,
-            "47": 7.45045,
-            "48": 7.02403,
-            "49": 7.45719,
-            "50": 6.92656
+            "43": 7.15684,
+            "44": 7.30429,
+            "45": 7.18917,
+            "46": 6.77286,
+            "47": 7.44985,
+            "48": 7.02383,
+            "49": 7.4572,
+            "50": 6.92645
         }
     },
     "iteration-time": {
@@ -289,56 +289,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 64.40054,
-            "2": 2.16564,
-            "3": 3.72378,
-            "4": 1.63174,
-            "5": 2.30947,
-            "6": 1.7246,
-            "7": 1.5089,
-            "8": 1.60943,
-            "9": 1.48606,
-            "10": 1.47162,
-            "11": 1.05608,
-            "12": 1.3309,
-            "13": 1.06824,
-            "14": 1.41914,
-            "15": 1.10033,
-            "16": 1.15759,
-            "17": 1.23897,
-            "18": 1.10439,
-            "19": 1.11869,
-            "20": 1.09363,
-            "21": 1.23622,
-            "22": 1.14797,
-            "23": 1.23037,
-            "24": 1.03991,
-            "25": 1.07795,
-            "26": 1.04416,
-            "27": 1.03654,
-            "28": 1.04098,
-            "29": 1.03502,
-            "30": 1.02909,
-            "31": 1.17935,
-            "32": 1.14717,
-            "33": 1.05403,
-            "34": 1.13894,
-            "35": 1.04538,
-            "36": 1.04367,
-            "37": 1.0843,
-            "38": 1.04631,
-            "39": 1.06131,
-            "40": 1.06988,
-            "41": 1.09756,
-            "42": 1.04759,
-            "43": 1.09649,
-            "44": 1.05666,
-            "45": 1.05249,
-            "46": 1.04539,
-            "47": 1.04041,
-            "48": 1.04904,
-            "49": 1.04777,
-            "50": 1.06237
+            "1": 89.89187,
+            "2": 2.19484,
+            "3": 3.80506,
+            "4": 1.63188,
+            "5": 2.52939,
+            "6": 2.46374,
+            "7": 1.5097,
+            "8": 1.75664,
+            "9": 1.62191,
+            "10": 1.35808,
+            "11": 1.04295,
+            "12": 1.35317,
+            "13": 1.07545,
+            "14": 1.42301,
+            "15": 1.10347,
+            "16": 1.28287,
+            "17": 1.22104,
+            "18": 1.07676,
+            "19": 1.08763,
+            "20": 1.12221,
+            "21": 1.25145,
+            "22": 1.04596,
+            "23": 1.22539,
+            "24": 1.06194,
+            "25": 1.11205,
+            "26": 1.05389,
+            "27": 1.03357,
+            "28": 1.0291,
+            "29": 1.04027,
+            "30": 1.06631,
+            "31": 1.18617,
+            "32": 1.142,
+            "33": 1.03842,
+            "34": 1.12457,
+            "35": 1.04164,
+            "36": 1.04698,
+            "37": 1.07674,
+            "38": 1.03833,
+            "39": 1.03043,
+            "40": 1.02697,
+            "41": 1.11388,
+            "42": 1.04538,
+            "43": 1.03328,
+            "44": 1.04873,
+            "45": 1.03241,
+            "46": 1.03847,
+            "47": 1.04164,
+            "48": 1.04077,
+            "49": 1.03715,
+            "50": 1.02734
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_dgxh100_eos.json
index d7372742ca7..0a6724a3e95 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_dgxh100_eos.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_dgxh100_eos.json
@@ -4,56 +4,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 11.07546,
-            "2": 11.03837,
-            "3": 9.66011,
-            "4": 9.91381,
-            "5": 9.32909,
-            "6": 9.13922,
-            "7": 9.13574,
-            "8": 8.65508,
-            "9": 8.51394,
-            "10": 8.8409,
-            "11": 8.29149,
-            "12": 8.34581,
-            "13": 8.25518,
-            "14": 7.73711,
-            "15": 7.86249,
-            "16": 7.9371,
-            "17": 7.89319,
-            "18": 7.63123,
-            "19": 7.99731,
-            "20": 7.74538,
-            "21": 7.44348,
-            "22": 7.42249,
-            "23": 7.29714,
-            "24": 7.27462,
-            "25": 7.54574,
-            "26": 6.96838,
-            "27": 7.50556,
-            "28": 7.22743,
-            "29": 7.36588,
-            "30": 7.52622,
-            "31": 7.27026,
-            "32": 7.45521,
-            "33": 7.50954,
-            "34": 7.55686,
-            "35": 7.10177,
-            "36": 6.96431,
-            "37": 7.28463,
-            "38": 7.0808,
-            "39": 7.40923,
-            "40": 7.43338,
-            "41": 7.38496,
-            "42": 7.15749,
-            "43": 7.15858,
-            "44": 7.28852,
-            "45": 7.16793,
-            "46": 6.78468,
-            "47": 7.4114,
-            "48": 7.0027,
-            "49": 7.46249,
-            "50": 6.92151
+            "1": 11.07559,
+            "2": 11.03834,
+            "3": 9.66022,
+            "4": 9.91367,
+            "5": 9.3291,
+            "6": 9.13927,
+            "7": 9.13591,
+            "8": 8.65527,
+            "9": 8.51396,
+            "10": 8.84095,
+            "11": 8.29144,
+            "12": 8.34584,
+            "13": 8.25509,
+            "14": 7.73685,
+            "15": 7.86273,
+            "16": 7.93699,
+            "17": 7.89257,
+            "18": 7.63116,
+            "19": 7.99719,
+            "20": 7.7453,
+            "21": 7.44298,
+            "22": 7.42242,
+            "23": 7.29721,
+            "24": 7.27467,
+            "25": 7.54562,
+            "26": 6.96839,
+            "27": 7.50569,
+            "28": 7.22761,
+            "29": 7.36579,
+            "30": 7.52635,
+            "31": 7.27036,
+            "32": 7.45548,
+            "33": 7.50952,
+            "34": 7.55694,
+            "35": 7.10212,
+            "36": 6.96414,
+            "37": 7.28438,
+            "38": 7.08049,
+            "39": 7.40908,
+            "40": 7.4335,
+            "41": 7.38491,
+            "42": 7.15766,
+            "43": 7.15867,
+            "44": 7.28831,
+            "45": 7.16729,
+            "46": 6.78429,
+            "47": 7.40937,
+            "48": 7.00259,
+            "49": 7.46241,
+            "50": 6.92143
         }
     },
     "num-zeros": {
@@ -63,54 +63,54 @@
         "values": {
             "1": 911219392.0,
             "2": 910960384.0,
-            "3": 911156352.0,
-            "4": 912204800.0,
-            "5": 920796544.0,
-            "6": 940387968.0,
-            "7": 990599872.0,
-            "8": 976457728.0,
-            "9": 998097664.0,
-            "10": 995852672.0,
-            "11": 994583680.0,
-            "12": 977344896.0,
-            "13": 1028141824.0,
-            "14": 1007166208.0,
-            "15": 987423616.0,
-            "16": 993054784.0,
-            "17": 982319168.0,
-            "18": 998261760.0,
-            "19": 984696320.0,
-            "20": 982914752.0,
-            "21": 979667456.0,
-            "22": 953988864.0,
-            "23": 972353984.0,
-            "24": 964792064.0,
-            "25": 958512192.0,
-            "26": 946928512.0,
+            "3": 911156288.0,
+            "4": 913253376.0,
+            "5": 921845056.0,
+            "6": 941436672.0,
+            "7": 993745472.0,
+            "8": 974360512.0,
+            "9": 999146112.0,
+            "10": 992706944.0,
+            "11": 991438144.0,
+            "12": 979442048.0,
+            "13": 1029190272.0,
+            "14": 1008214656.0,
+            "15": 988472000.0,
+            "16": 988861120.0,
+            "17": 979173312.0,
+            "18": 996164608.0,
+            "19": 979453440.0,
+            "20": 982914688.0,
+            "21": 975473344.0,
+            "22": 955037568.0,
+            "23": 969208128.0,
+            "24": 965840832.0,
+            "25": 953269440.0,
+            "26": 949025536.0,
             "27": 948458304.0,
-            "28": 949643968.0,
-            "29": 942877440.0,
+            "28": 951741184.0,
+            "29": 943926272.0,
             "30": 935020160.0,
-            "31": 935327616.0,
-            "32": 934281088.0,
-            "33": 921805568.0,
-            "34": 928189312.0,
-            "35": 922202496.0,
-            "36": 924246656.0,
-            "37": 920661248.0,
+            "31": 933230336.0,
+            "32": 930086848.0,
+            "33": 922853952.0,
+            "34": 927140800.0,
+            "35": 925348224.0,
+            "36": 925295168.0,
+            "37": 922758272.0,
             "38": 922930752.0,
-            "39": 922322816.0,
-            "40": 921856512.0,
-            "41": 920227968.0,
+            "39": 922322880.0,
+            "40": 921856640.0,
+            "41": 920227776.0,
             "42": 918353664.0,
-            "43": 918607040.0,
-            "44": 914948032.0,
-            "45": 914295232.0,
+            "43": 919655616.0,
+            "44": 914948224.0,
+            "45": 916392512.0,
             "46": 914344448.0,
             "47": 911769536.0,
-            "48": 912013312.0,
-            "49": 910349440.0,
-            "50": 914351552.0
+            "48": 912013248.0,
+            "49": 910349376.0,
+            "50": 914351616.0
         }
     },
     "mem-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 41739952128.0,
-            "2": 43687571456.0,
-            "3": 43687571456.0,
-            "4": 43983216640.0,
-            "5": 43983216640.0,
-            "6": 43983216640.0,
-            "7": 43983216640.0,
-            "8": 44024635392.0,
-            "9": 44041216000.0,
-            "10": 44041216000.0,
-            "11": 44041216000.0,
-            "12": 44041216000.0,
-            "13": 44041216000.0,
-            "14": 44041216000.0,
-            "15": 44041216000.0,
-            "16": 44041216000.0,
-            "17": 44041216000.0,
-            "18": 44041216000.0,
-            "19": 44041216000.0,
-            "20": 44041216000.0,
-            "21": 44041216000.0,
-            "22": 44041216000.0,
-            "23": 44041216000.0,
-            "24": 44041216000.0,
-            "25": 44041216000.0,
-            "26": 44041216000.0,
-            "27": 44041216000.0,
-            "28": 44041216000.0,
-            "29": 44041326592.0,
-            "30": 44162326528.0,
-            "31": 44220485632.0,
-            "32": 44270411776.0,
-            "33": 44293799936.0,
-            "34": 44293799936.0,
-            "35": 44293799936.0,
-            "36": 44293799936.0,
-            "37": 44293799936.0,
-            "38": 44293799936.0,
-            "39": 44293799936.0,
-            "40": 44293799936.0,
-            "41": 44293799936.0,
-            "42": 44293799936.0,
-            "43": 44293799936.0,
-            "44": 44293799936.0,
-            "45": 44293799936.0,
-            "46": 44293799936.0,
-            "47": 44293799936.0,
-            "48": 44293799936.0,
-            "49": 44293799936.0,
-            "50": 44293799936.0
+            "1": 41740259328.0,
+            "2": 43687292928.0,
+            "3": 43687292928.0,
+            "4": 43984064512.0,
+            "5": 43984064512.0,
+            "6": 43984064512.0,
+            "7": 43984064512.0,
+            "8": 44026380288.0,
+            "9": 44041506816.0,
+            "10": 44041506816.0,
+            "11": 44041506816.0,
+            "12": 44041506816.0,
+            "13": 44041506816.0,
+            "14": 44041506816.0,
+            "15": 44041506816.0,
+            "16": 44041506816.0,
+            "17": 44041506816.0,
+            "18": 44041506816.0,
+            "19": 44041506816.0,
+            "20": 44041506816.0,
+            "21": 44041506816.0,
+            "22": 44041506816.0,
+            "23": 44041506816.0,
+            "24": 44041506816.0,
+            "25": 44041506816.0,
+            "26": 44041506816.0,
+            "27": 44041506816.0,
+            "28": 44041506816.0,
+            "29": 44044173312.0,
+            "30": 44164231168.0,
+            "31": 44221079552.0,
+            "32": 44271415296.0,
+            "33": 44290232320.0,
+            "34": 44290232320.0,
+            "35": 44290232320.0,
+            "36": 44290232320.0,
+            "37": 44290232320.0,
+            "38": 44290232320.0,
+            "39": 44290232320.0,
+            "40": 44290232320.0,
+            "41": 44290232320.0,
+            "42": 44290232320.0,
+            "43": 44290232320.0,
+            "44": 44290232320.0,
+            "45": 44290232320.0,
+            "46": 44290232320.0,
+            "47": 44290232320.0,
+            "48": 44290232320.0,
+            "49": 44290232320.0,
+            "50": 44290232320.0
         }
     },
     "mtp_1 loss": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 11.08617,
-            "2": 11.10475,
-            "3": 10.48001,
-            "4": 10.13466,
-            "5": 9.79047,
-            "6": 9.50601,
-            "7": 9.5113,
-            "8": 8.85336,
-            "9": 8.66683,
-            "10": 8.95866,
-            "11": 8.29315,
-            "12": 8.36982,
-            "13": 8.25544,
-            "14": 7.73322,
+            "1": 11.08623,
+            "2": 11.1047,
+            "3": 10.47999,
+            "4": 10.13471,
+            "5": 9.79045,
+            "6": 9.50607,
+            "7": 9.51139,
+            "8": 8.85331,
+            "9": 8.66688,
+            "10": 8.95867,
+            "11": 8.29318,
+            "12": 8.36986,
+            "13": 8.25545,
+            "14": 7.73323,
             "15": 7.86639,
-            "16": 7.92442,
-            "17": 7.86278,
-            "18": 7.61012,
-            "19": 8.00269,
-            "20": 7.73019,
-            "21": 7.4165,
-            "22": 7.41478,
-            "23": 7.28671,
-            "24": 7.27903,
-            "25": 7.54456,
-            "26": 6.96542,
-            "27": 7.50538,
-            "28": 7.20607,
-            "29": 7.377,
-            "30": 7.52777,
-            "31": 7.27094,
-            "32": 7.4604,
+            "16": 7.92438,
+            "17": 7.86276,
+            "18": 7.61004,
+            "19": 8.00261,
+            "20": 7.73004,
+            "21": 7.41636,
+            "22": 7.41466,
+            "23": 7.28656,
+            "24": 7.27882,
+            "25": 7.54458,
+            "26": 6.96533,
+            "27": 7.5053,
+            "28": 7.20603,
+            "29": 7.37687,
+            "30": 7.52783,
+            "31": 7.27097,
+            "32": 7.46043,
             "33": 7.51419,
-            "34": 7.56867,
-            "35": 7.09252,
-            "36": 6.96015,
-            "37": 7.29846,
-            "38": 7.0742,
-            "39": 7.43347,
-            "40": 7.43116,
-            "41": 7.40919,
+            "34": 7.56879,
+            "35": 7.09276,
+            "36": 6.96019,
+            "37": 7.29843,
+            "38": 7.07417,
+            "39": 7.43338,
+            "40": 7.43134,
+            "41": 7.40946,
             "42": 7.15527,
-            "43": 7.15652,
-            "44": 7.30441,
-            "45": 7.1893,
-            "46": 6.77296,
-            "47": 7.45045,
-            "48": 7.02403,
-            "49": 7.45719,
-            "50": 6.92656
+            "43": 7.15684,
+            "44": 7.30429,
+            "45": 7.18917,
+            "46": 6.77286,
+            "47": 7.44985,
+            "48": 7.02383,
+            "49": 7.4572,
+            "50": 6.92645
         }
     },
     "iteration-time": {
@@ -289,56 +289,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 87.63934,
-            "2": 1.98402,
-            "3": 3.95877,
-            "4": 1.64812,
-            "5": 2.312,
-            "6": 2.02902,
-            "7": 1.56333,
-            "8": 1.66703,
-            "9": 1.6393,
-            "10": 1.40472,
-            "11": 1.086,
-            "12": 1.34921,
-            "13": 1.0854,
-            "14": 1.4242,
-            "15": 1.09539,
-            "16": 1.79766,
-            "17": 1.2562,
-            "18": 1.08887,
-            "19": 1.08371,
-            "20": 1.10071,
-            "21": 1.25979,
-            "22": 1.3212,
-            "23": 1.25044,
-            "24": 1.05384,
-            "25": 1.11356,
-            "26": 1.0605,
-            "27": 1.03418,
-            "28": 1.0405,
-            "29": 1.05174,
-            "30": 1.04166,
-            "31": 1.20036,
-            "32": 1.12936,
-            "33": 1.02917,
-            "34": 1.13473,
-            "35": 1.02829,
-            "36": 1.04352,
-            "37": 1.0843,
-            "38": 1.03714,
-            "39": 1.04534,
-            "40": 1.07031,
-            "41": 1.07618,
-            "42": 1.03008,
-            "43": 1.06043,
-            "44": 1.04049,
-            "45": 1.02875,
-            "46": 1.03669,
-            "47": 1.03128,
-            "48": 1.02808,
-            "49": 1.03038,
-            "50": 1.04621
+            "1": 85.92313,
+            "2": 1.99152,
+            "3": 3.91366,
+            "4": 1.68454,
+            "5": 2.53883,
+            "6": 2.55539,
+            "7": 1.60104,
+            "8": 1.70562,
+            "9": 1.72325,
+            "10": 1.4332,
+            "11": 1.07958,
+            "12": 1.399,
+            "13": 1.10259,
+            "14": 1.43922,
+            "15": 1.12046,
+            "16": 1.33695,
+            "17": 1.24765,
+            "18": 1.11257,
+            "19": 1.10335,
+            "20": 1.12919,
+            "21": 1.27711,
+            "22": 1.09482,
+            "23": 1.27635,
+            "24": 1.112,
+            "25": 1.17791,
+            "26": 1.10426,
+            "27": 1.09103,
+            "28": 1.08338,
+            "29": 1.07904,
+            "30": 1.08709,
+            "31": 1.2237,
+            "32": 1.18059,
+            "33": 1.07913,
+            "34": 1.17232,
+            "35": 1.09059,
+            "36": 1.09648,
+            "37": 1.12683,
+            "38": 1.10153,
+            "39": 1.09557,
+            "40": 1.07747,
+            "41": 1.12905,
+            "42": 1.09275,
+            "43": 1.08609,
+            "44": 1.08042,
+            "45": 1.08321,
+            "46": 1.0732,
+            "47": 1.08666,
+            "48": 1.08865,
+            "49": 1.08808,
+            "50": 1.08086
         }
     }
 }
\ No newline at end of file
diff --git a/tests/test_utils/recipes/moe.yaml b/tests/test_utils/recipes/moe.yaml
index 649da3ba518..53047ff4a3b 100644
--- a/tests/test_utils/recipes/moe.yaml
+++ b/tests/test_utils/recipes/moe.yaml
@@ -106,14 +106,13 @@ products:
       - environment: [dev]
         scope: [mr, mr-github]
         platforms: [dgx_h100]
-  # TODO: The migration of custom fsdp causes EP + FSDP to be temporarily unavailable, which will be fixed in a subsequent MR.
-  # - test_case: [gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router]
-  #   products:
-  #     - environment: [dev]
-  #       scope: [mr]
-  #       platforms: [dgx_h100]
-  #     - environment: [lts]
-  #       scope: [nightly]
+  - test_case: [gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router]
+    products:
+      - environment: [dev]
+        scope: [mr]
+        platforms: [dgx_h100]
+      - environment: [lts]
+        scope: [nightly]
   - test_case: [gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective]
     products:
       - environment: [dev]
diff --git a/tools/checkpoint/checkpoint_inspector.py b/tools/checkpoint/checkpoint_inspector.py
index 34afa27755f..c62f0ca7417 100644
--- a/tools/checkpoint/checkpoint_inspector.py
+++ b/tools/checkpoint/checkpoint_inspector.py
@@ -8,6 +8,8 @@
 import time
 import re
 import shutil
+from typing import Optional
+import tempfile
 
 import click
 import torch
@@ -19,6 +21,7 @@
     FileSystemReader,
     FileSystemWriter,
 )
+from torch.distributed.checkpoint.format_utils import dcp_to_torch_save
 from torch.distributed.checkpoint.metadata import (
     BytesStorageMetadata,
     TensorStorageMetadata,
@@ -64,7 +67,8 @@ def cli():
 @cli.command()
 @click.argument("checkpoint_dir", type=click.Path(exists=True))
 @click.option("--enable-msc", is_flag=True, help="Enable MultiStorageClient feature.")
-def inspect(checkpoint_dir, enable_msc):
+@click.option("--not-ignore-param-to-group-meta", is_flag=True, help="Ignore parameter-to-group metadata.")
+def inspect(checkpoint_dir, enable_msc, not_ignore_param_to_group_meta):
     """Inspect a Megatron Core Distributed Checkpoint"""
     ckpt_path = Path(checkpoint_dir)
 
@@ -138,6 +142,8 @@ def inspect(checkpoint_dir, enable_msc):
     ]
     click.echo(" | ".join(stats) + "\n")
 
+    ignore_param_to_group_meta = not not_ignore_param_to_group_meta
+    ignore_param_to_group_meta_count = 0
     for key, value in metadata.state_dict_metadata.items():
         bullet = click.style("►", fg="blue")
         key_styled = click.style(key, fg="green")
@@ -147,11 +153,18 @@ def inspect(checkpoint_dir, enable_msc):
             shape = click.style(f"{tuple(value.size)}", fg="magenta")
             click.echo(f"  {bullet} {key_styled} [{dtype}, shape={shape}]")
         elif isinstance(value, BytesStorageMetadata):
+            if ignore_param_to_group_meta and key.startswith("optimizer.param_to_group_meta."):
+                ignore_param_to_group_meta_count += 1
+                continue
             click.echo(f"  {bullet} {key_styled} {click.style('[BYTES]', fg='yellow')}")
         else:
             click.echo(
                 f"  {bullet} {key_styled} {click.style('[UNKNOWN TYPE]', fg='red')}"
             )
+    if ignore_param_to_group_meta:
+        click.echo(
+            click.style(f"Ignored parameter-to-group metadata: {ignore_param_to_group_meta_count}", fg="yellow")
+        )
 
     # MCore data section
     try:
@@ -323,8 +336,10 @@ def convert_checkpoint(
     output_dir,
     swiglu,
     process_group,
+    optimizer_param_to_group_prefix="optimizer.param_to_group_meta.module.module.module",
     optimizer_state_prefix="optimizer.state.module.module.module",
     model_weight_prefix="model.module",
+    param_to_param_group_map={},
 ):
     """Convert a Megatron Core Distributed Checkpoint from torch_dist to standard fsdp_dtensor format."""
     device_mesh = DeviceMesh.from_group(process_group, device_type="cuda")
@@ -371,6 +386,104 @@ def _free_up_some_gpu_memory():
             gc.collect()
             torch.cuda.empty_cache()
 
+    def split_layers(
+        key: str,
+        value: torch.Tensor,
+        orig_shape: Optional[torch.Size] = None,
+    ) -> dict[str, torch.Tensor]:
+        """
+        Split layers into separate tensors.
+        """
+        _free_up_some_gpu_memory()
+        layers = {}
+        for i, v in enumerate(split_dtensor(value, 1, dim=0)):
+            v = gather_uneven_dtensor_to_full_tensor(v).reshape(
+                orig_shape[1:] if orig_shape else value.shape[1:]
+            ).redistribute(placements=[Shard(0)])
+
+            layer_key = key.replace(".layers.", f".layers.{i}.")
+            layers[layer_key] = v
+
+        return layers
+
+    def split_expert_weights(
+        key: str,
+        value: torch.Tensor,
+        orig_shape: Optional[torch.Size] = None,
+    ) -> dict[str, torch.Tensor]:
+        """
+        Split expert weights into separate tensors for each expert.
+        """
+        experts = {}
+        layer_key = key.replace(".experts.experts.", ".experts.")
+        expert_weights = split_dtensor(value, 1, dim=0)
+        for expert_idx, expert_weight in enumerate(expert_weights):
+            layer_key_parts = layer_key.split(".weight", 1)
+            if len(layer_key_parts) == 1:
+                expert_key = f"{layer_key}{expert_idx}"
+            elif len(layer_key_parts) == 2:
+                expert_key = f"{layer_key_parts[0]}.weight{expert_idx}{layer_key_parts[1]}"
+            else:
+                raise ValueError(f"Unexpected expert layer key: {layer_key}")
+
+            expert_weight = gather_uneven_dtensor_to_full_tensor(expert_weight)
+            expert_shape = orig_shape[1:] if orig_shape else value.shape[1:]
+            # Handle optimizer states for expert linear_fc2 when ETP is enabled
+            if (
+                layer_key.startswith("optimizer.state.")
+                and "linear_fc2" in layer_key
+                and expert_weight.shape[-2] > 1
+            ):
+                tp_size = expert_weight.shape[-2]
+                rows, cols = expert_shape
+                # Reshape to split column dimension by tp_size
+                expert_weight = expert_weight.reshape(
+                    *expert_weight.shape[:-1], rows, cols // tp_size
+                )
+                dims = list(range(expert_weight.ndim))
+                dims[-3], dims[-2] = dims[-2], dims[-3]
+                expert_weight = (
+                    expert_weight.permute(*dims)
+                    .reshape(expert_shape)
+                    .redistribute(placements=[Shard(0)])
+                )
+            else:
+                expert_weight = expert_weight.reshape(expert_shape).redistribute(
+                    placements=[Shard(0)]
+                )
+            experts[expert_key] = expert_weight
+        return experts
+
+    def is_swiglu_key(key):
+        return any(re.search(pat, key) for pat in [
+            r"(.*)\.mlp\.linear_fc1\.weight",
+            r"(.*)\.mlp\.linear_fc1\.bias",
+            r"(.*)\.mlp\.experts\.linear_fc1\.weight(\d+)",
+            r"(.*)\.mlp\.experts\.linear_fc1\.bias(\d+)",
+            r"(.*)\.mlp\.experts\.local_experts\.(\d+)\.linear_fc1\.weight",
+            r"(.*)\.mlp\.experts\.local_experts\.(\d+)\.linear_fc1\.bias",
+            r"(.*)\.mlp\.shared_experts\.linear_fc1\.weight",
+            r"(.*)\.mlp\.shared_experts\.linear_fc1\.bias",
+        ])
+
+    def split_swiglu_weight(key: str, value: torch.Tensor) -> dict[str, torch.Tensor]:
+        """
+        Split SwiGLU weights into separate tensors.
+        """
+        value = gather_uneven_dtensor_to_full_tensor(value)
+        swiglu_w_and_v = {}
+        w, v = torch.chunk(value, 2, dim=0)
+        w = w.redistribute(placements=[Shard(0)])
+        v = v.redistribute(placements=[Shard(0)])
+        w_key = re.sub(r'(weight\d*)(.*)', r'\1_w\2', key)
+        v_key = re.sub(r'(weight\d*)(.*)', r'\1_v\2', key)
+        swiglu_w_and_v[w_key] = w
+        swiglu_w_and_v[v_key] = v
+        return swiglu_w_and_v
+
+    def has_layer_index(key: str) -> bool:
+        return bool(re.search(r"layers\.(\d+)\.", key))
+
     while state_dict:
         key, value = state_dict.popitem()
         if torch.distributed.get_rank() == 0:
@@ -387,9 +500,11 @@ def _free_up_some_gpu_memory():
                 # Special handling for optimizer state
                 key_list = key.split(".")
                 new_key = f"{optimizer_state_prefix}.{'.'.join(key_list[3:])}.{key_list[2]}"
+                is_param = False
             else:
                 # Special handling for module parameters
                 new_key = f"{model_weight_prefix}.{key}"
+                is_param = True
 
             # Handle dist-opt flatten tensors
             if (
@@ -406,68 +521,47 @@ def _free_up_some_gpu_memory():
             else:
                 orig_shape = None
 
-            # Handle multi-layer tensors
-            if ".layers." in new_key:
-                n_layer = value.shape[0]
-
-                _free_up_some_gpu_memory()
-                per_layer_values = [
-                    gather_uneven_dtensor_to_full_tensor(v).redistribute(
-                        placements=[Shard(len(v.shape) - 1)]
-                    )
-                    for v in split_dtensor(value, 1, dim=0)
-                ]
-                for i in range(n_layer):
-                    if orig_shape is not None:
-                        layer_shape = orig_shape[1:]
-                    else:
-                        layer_shape = value.shape[1:]
-
-                    per_layer_values[i] = (
-                        per_layer_values[i]
-                        .reshape(layer_shape)
-                        .redistribute(placements=[Shard(0)])
-                    )
-                for i in range(0, n_layer):
-                    layer_key = new_key.replace(".layers.", f".layers.{i}.")
-                    if swiglu and "mlp.linear_fc1.weight" in layer_key:
-                        # Special case for SwiGLU
-                        w, v = torch.chunk(per_layer_values[i], 2, dim=0)
-                        w = w.redistribute(placements=[Shard(0)])
-                        v = v.redistribute(placements=[Shard(0)])
-                        w_key = layer_key.replace(
-                            "mlp.linear_fc1.weight", "mlp.linear_fc1.weight_w"
-                        )
-                        v_key = layer_key.replace(
-                            "mlp.linear_fc1.weight", "mlp.linear_fc1.weight_v"
-                        )
-                        # Store both w and v in the state_dict
-                        fsdp_dtensor_state_dict[w_key] = w
-                        fsdp_dtensor_state_dict[v_key] = v
-                    elif (
-                        "experts.experts.linear_fc1.weight" in layer_key
-                        or "experts.experts.linear_fc2.weight" in layer_key
+            # Handle multi-layer / experts tensors
+            split_tensors = {}
+            if ".layers." in new_key and not has_layer_index(new_key):
+                split_tensors = split_layers(new_key, value, orig_shape)
+            elif ".experts.experts." in new_key:
+                split_tensors = split_expert_weights(new_key, value, orig_shape)
+            else:
+                if orig_shape:
+                    value = gather_uneven_dtensor_to_full_tensor(value)
+                    # Handle optimizer states with partition_dim=1 when TP is enabled
+                    if (
+                        new_key.startswith("optimizer.state.")
+                        and value.ndim > 2
+                        and value.shape[-2] > 1
                     ):
-                        # Special case for MoE
-                        layer_key = layer_key.replace(".experts.experts.", ".experts.")
-                        expert_weights = torch.split(per_layer_values[i], 1, dim=0)
-                        for expert_idx, expert_weight in enumerate(expert_weights):
-                            expert_key = f"{layer_key}{expert_idx}"
-                            fsdp_dtensor_state_dict[expert_key] = expert_weight.squeeze(
-                                0
-                            )
+                        tp_size = value.shape[-2]
+                        rows, cols = orig_shape
+                        # Reshape to split column dimension by tp_size
+                        value = value.reshape(*value.shape[:-1], rows, cols // tp_size)
+                        dims = list(range(value.ndim))
+                        dims[-3], dims[-2] = dims[-2], dims[-3]
+                        value = (
+                            value.permute(*dims)
+                            .reshape(orig_shape)
+                            .redistribute(placements=[Shard(0)])
+                        )
                     else:
-                        # General case
-                        fsdp_dtensor_state_dict[layer_key] = per_layer_values[i]
-            else:
-                if orig_shape is not None:
-                    _free_up_some_gpu_memory()
-                    value = (
-                        value.redistribute(placements=[Replicate()])
-                        .reshape(orig_shape)
-                        .redistribute(placements=[Shard(0)])
-                    )
-                fsdp_dtensor_state_dict[new_key] = value
+                        value = value.reshape(orig_shape).redistribute(placements=[Shard(0)])
+                split_tensors = {new_key: value}
+
+            # Handle SWiGLU weights
+            for key, value in list(split_tensors.items()):
+                if swiglu and is_swiglu_key(key):
+                    swiglu_w_and_v = split_swiglu_weight(key, value)
+                    split_tensors.update(swiglu_w_and_v)
+                    del split_tensors[key]
+
+            fsdp_dtensor_state_dict.update(split_tensors)
+            if is_param and key in param_to_param_group_map:
+                for new_key in split_tensors.keys():
+                    param_to_param_group_map[new_key] = param_to_param_group_map[key]
         elif key.startswith("rng_state"):
             # Skip RNG states
             continue
@@ -530,6 +624,15 @@ def _free_up_some_gpu_memory():
         )
     )
     common_state = common_strategy.load_common(input_dir)
+    try:
+        if "param_groups" in common_state["optimizer"]:
+            ckpt_param_groups = common_state["optimizer"]["param_groups"]
+        else:
+            ckpt_param_groups = []
+            for opt_state_dict in common_state["optimizer"].values():
+                ckpt_param_groups.extend(opt_state_dict["optimizer"]["param_groups"])
+    except:
+        ckpt_param_groups = None
     common_state = flatten(common_state)
     for key, value in common_state.items():
         if key.startswith("optimizer.optimizer.param_groups."):
@@ -541,12 +644,29 @@ def _free_up_some_gpu_memory():
         )
         fsdp_dtensor_state_dict[key] = value
 
+    # set up per-parameter param_groups
+    if param_to_param_group_map and ckpt_param_groups is not None:
+        for name in list(fsdp_dtensor_state_dict.keys()):
+            if not name.startswith(model_weight_prefix) or name.endswith(".expert_bias"):
+                continue
+
+            assert name in param_to_param_group_map, f"Missing param group for {name}"
+            param_group_id = param_to_param_group_map[name]
+            assert param_group_id < len(ckpt_param_groups), f"Invalid param group id {param_group_id} for {name}"
+            name_without_prefix = name[len(model_weight_prefix):]
+            fsdp_dtensor_state_dict[
+                f"{optimizer_param_to_group_prefix}.{name_without_prefix}"
+            ] = ckpt_param_groups[param_group_id]
+
     if "checkpoint_version" not in fsdp_dtensor_state_dict:
         fsdp_dtensor_state_dict["checkpoint_version"] = 3.0
 
     # Save modified checkpoint
     save_checkpoint_with_pickle_protocol(fsdp_dtensor_state_dict, output_dir)
 
+    dist.barrier()              # Synchronize all ranks
+    dist.destroy_process_group()
+
 
 @cli.command()
 @click.argument("input_dir", type=click.Path(exists=True))
@@ -560,12 +680,6 @@ def _free_up_some_gpu_memory():
     "--oom-traceback", is_flag=True, help="Enable OOM traceback for debugging."
 )
 @click.option("--enable-msc", is_flag=True, help="Enable MultiStorageClient feature.")
-@click.option(
-    "--distributed-timeout-minutes",
-    default=10,
-    type=int,
-    help="Timeout for distributed operations in minutes.",
-)
 @click.option(
     "--output-optimizer-state-prefix",
     default="optimizer.state.module.module.module",
@@ -576,15 +690,21 @@ def _free_up_some_gpu_memory():
     default="model.module",
     help="Prefix for model weight keys in the checkpoint.",
 )
+@click.option(
+    "--param-to-param-group-map-json",
+    type=str,
+    default="{}",
+    help="JSON string representing the param to parameter group map."
+)
 def convert_torch_dist_to_fsdp_dtensor(
     input_dir,
     output_dir,
     swiglu,
     oom_traceback,
     enable_msc,
-    distributed_timeout_minutes,
     output_optimizer_state_prefix,
     output_model_weight_prefix,
+    param_to_param_group_map_json,
 ):
     """Convert a Megatron Core Distributed Checkpoint from torch_dist to fsdp_dtensor format."""
     if not enable_msc:
@@ -624,10 +744,13 @@ def oom_observer(device, alloc, device_alloc, device_free):
 
     ckpt_path = Path(input_dir)
     output_dir = Path(output_dir)
+    with open(param_to_param_group_map_json, "r") as f:
+        param_to_param_group_map = json.load(f)
     convert_checkpoint(
         ckpt_path, output_dir, swiglu, process_group=dist.group.WORLD,
         optimizer_state_prefix=output_optimizer_state_prefix,
         model_weight_prefix=output_model_weight_prefix,
+        param_to_param_group_map=param_to_param_group_map,
     )
 
     click.echo(
@@ -742,6 +865,109 @@ def modify_state_dict(input_dir, output_dir, op, enable_msc):
     )
 
 
+def _compare_two_checkpoint(checkpoint_1, checkpoint_2):
+    reader_1 = FileSystemReader(checkpoint_1)
+    metadata_1 = reader_1.read_metadata()
+
+    reader_2 = FileSystemReader(checkpoint_2)
+    metadata_2 = reader_2.read_metadata()
+
+    keys_1 = set(metadata_1.state_dict_metadata.keys())
+    keys_2 = set(metadata_2.state_dict_metadata.keys())
+
+    click.echo(click.style("Comparing checkpoints...", fg="blue"))
+
+    # Compare keys
+    missing_in_1 = keys_2 - keys_1
+    missing_in_2 = keys_1 - keys_2
+    common_keys = keys_1 & keys_2
+
+    click.echo(click.style("Keys missing in checkpoint 1:", fg="red"))
+    for key in missing_in_1:
+        click.echo(click.style(f" - {key}", fg="red"))
+
+    click.echo(click.style("Keys missing in checkpoint 2:", fg="red"))
+    for key in missing_in_2:
+        click.echo(click.style(f" - {key}", fg="red"))
+
+    # Compare common keys
+    click.echo(click.style("Common keys in both checkpoints:", fg="green"))
+    for key in common_keys:
+        meta_1 = metadata_1.state_dict_metadata[key]
+        meta_2 = metadata_2.state_dict_metadata[key]
+
+        if not isinstance(meta_1, TensorStorageMetadata):
+            continue
+
+        if meta_1.size != meta_2.size or meta_1.properties.dtype != meta_2.properties.dtype:
+            click.echo(click.style(f" - {key} (metadata differ) meta_1: {meta_1}, meta_2: {meta_2}", fg="red"))
+        else:
+            value_1 = torch.empty(meta_1.size, dtype=meta_1.properties.dtype)
+            value_2 = value_1.clone()
+
+            dcp.load({key: value_1}, storage_reader=reader_1, planner=DefaultLoadPlanner())
+            dcp.load({key: value_2}, storage_reader=reader_2, planner=DefaultLoadPlanner())
+
+            if not torch.allclose(
+                value_1, value_2, atol=1e-8, rtol=1e-5
+            ):
+                click.echo(click.style(f" - {key} (values differ) value_1: {value_1}, value_2: {value_2}", fg="red"))
+
+
+@cli.command()
+@click.argument("checkpoint_1", type=click.Path(exists=True))
+@click.argument("checkpoint_2", type=click.Path(exists=True))
+@click.option("--enable-msc", is_flag=True, help="Enable MultiStorageClient feature.")
+def compare_two_checkpoint(checkpoint_1, checkpoint_2, enable_msc):
+    """
+    Compare two checkpoints.
+    """
+    init_process_group(f"compare_two_checkpoint from {checkpoint_1} to {checkpoint_2}")
+
+    if not enable_msc:
+        MultiStorageClientFeature.disable()
+
+    _compare_two_checkpoint(
+        Path(checkpoint_1),
+        Path(checkpoint_2),
+    )
+
+    click.echo(
+        click.style(
+            f"Comparison between {checkpoint_1} and {checkpoint_2} completed.", fg="green", bold=True
+        )
+    )
+
+
+@cli.command()
+@click.argument("torch_dcp_dir", type=click.Path(exists=True))
+def print_torch_dcp_in_json(torch_dcp_dir, model_weight_prefix="model.module"):
+    # Use a temporary file context
+    with tempfile.NamedTemporaryFile(suffix=".pth") as tmp_file:
+        # Convert distributed checkpoint directory to a single-file checkpoint
+        dcp_to_torch_save(torch_dcp_dir, tmp_file.name)
+
+        # Load the state dict from the temporary file
+        state_dict = torch.load(tmp_file.name, map_location="cpu")
+
+        click.echo(f"torch dcp content: {json.dumps(state_dict)}")
+
+        # Replace all "module.module." with model_weight_prefix in dict keys
+        new_state_dict = {}
+        for key, value in state_dict.items():
+            new_key = key.replace("module.module", model_weight_prefix)
+            new_state_dict[new_key] = value
+        
+        # Convert state dict to JSON-serializable format
+        serializable_dict = {k: v.tolist() if hasattr(v, "tolist") else v for k, v in new_state_dict.items()}
+
+        # Save to a JSON file
+        json_file_path = os.path.join(torch_dcp_dir, "param_to_param_group_map.json")
+        with open(json_file_path, "w") as json_file:
+            json.dump(serializable_dict, json_file, indent=2)
+        click.echo(f"Saved converted param_to_param_group_map to: {json_file_path}")
+
+
 def init_process_group(message):
     rank = int(os.getenv("RANK", "0"))
     world_size = int(os.getenv("WORLD_SIZE", "1"))

From 2c854484431191e661242eb27185492f3760dfb6 Mon Sep 17 00:00:00 2001
From: Charlie Truong <chtruong@nvidia.com>
Date: Thu, 30 Oct 2025 23:30:40 -0500
Subject: [PATCH 088/334] Update golden values due to PR #2007 (#2057)

Signed-off-by: Charlie Truong <chtruong@nvidia.com>
---
 .../golden_values_dev_dgxh100_eos.json        | 598 +++++++++---------
 .../golden_values_dev_dgxh100_eos.json        | 500 +++++++--------
 .../golden_values_dev_dgxh100_eos.json        | 537 ++++++++++++++++
 .../golden_values_dev_dgxh100_eos.json        | 380 +++++------
 4 files changed, 1276 insertions(+), 739 deletions(-)
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgxh100_eos.json

diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgxh100_eos.json
index b3668b31178..01651f27b62 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgxh100_eos.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgxh100_eos.json
@@ -4,56 +4,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 11.04748,
-            "2": 11.03561,
-            "3": 9.58773,
-            "4": 9.25819,
-            "5": 9.52742,
-            "6": 9.87911,
-            "7": 9.48366,
-            "8": 8.93879,
-            "9": 8.6551,
-            "10": 9.10915,
-            "11": 8.51806,
-            "12": 8.54732,
-            "13": 8.48144,
-            "14": 8.05312,
-            "15": 8.10118,
-            "16": 8.10344,
-            "17": 8.08878,
-            "18": 7.78589,
-            "19": 8.15794,
-            "20": 7.88069,
-            "21": 7.58542,
-            "22": 7.54895,
-            "23": 7.4296,
-            "24": 7.41901,
-            "25": 7.67277,
-            "26": 7.07835,
-            "27": 7.61157,
-            "28": 7.31513,
-            "29": 7.49487,
-            "30": 7.64287,
-            "31": 7.39102,
-            "32": 7.59148,
-            "33": 7.6393,
-            "34": 7.70086,
-            "35": 7.2119,
-            "36": 7.08623,
-            "37": 7.43064,
-            "38": 7.18999,
-            "39": 7.5525,
-            "40": 7.54961,
-            "41": 7.49385,
-            "42": 7.25481,
-            "43": 7.24066,
-            "44": 7.42131,
-            "45": 7.19201,
-            "46": 6.90547,
-            "47": 7.30704,
-            "48": 7.15325,
-            "49": 7.60504,
-            "50": 7.04512
+            "1": 11.04722,
+            "2": 11.03572,
+            "3": 9.58802,
+            "4": 9.25807,
+            "5": 9.46595,
+            "6": 9.99646,
+            "7": 9.50952,
+            "8": 8.97596,
+            "9": 8.64768,
+            "10": 9.40103,
+            "11": 8.86557,
+            "12": 8.63562,
+            "13": 8.52126,
+            "14": 8.08764,
+            "15": 8.19553,
+            "16": 8.22117,
+            "17": 8.14088,
+            "18": 7.83923,
+            "19": 8.23508,
+            "20": 7.95432,
+            "21": 7.62712,
+            "22": 7.60353,
+            "23": 7.48451,
+            "24": 7.46602,
+            "25": 7.70409,
+            "26": 7.10906,
+            "27": 7.6443,
+            "28": 7.34234,
+            "29": 7.5189,
+            "30": 7.67585,
+            "31": 7.41996,
+            "32": 7.61477,
+            "33": 7.66691,
+            "34": 7.73349,
+            "35": 7.23566,
+            "36": 7.11008,
+            "37": 7.44958,
+            "38": 7.21125,
+            "39": 7.57837,
+            "40": 7.56809,
+            "41": 7.51465,
+            "42": 7.27318,
+            "43": 7.25818,
+            "44": 7.44014,
+            "45": 7.21234,
+            "46": 6.92392,
+            "47": 7.32631,
+            "48": 7.17263,
+            "49": 7.62149,
+            "50": 7.06495
         }
     },
     "num-zeros": {
@@ -62,55 +62,55 @@
         "step_interval": 1,
         "values": {
             "1": 38802612.0,
-            "2": 38543592.0,
-            "3": 38739480.0,
-            "4": 279954336.0,
-            "5": 249745312.0,
-            "6": 268288496.0,
-            "7": 604756224.0,
-            "8": 781485184.0,
-            "9": 636362112.0,
-            "10": 653025216.0,
-            "11": 668551168.0,
-            "12": 765583616.0,
-            "13": 815362944.0,
-            "14": 834270656.0,
-            "15": 755756096.0,
-            "16": 995153536.0,
-            "17": 938291584.0,
-            "18": 721524928.0,
-            "19": 756173504.0,
-            "20": 901129600.0,
-            "21": 721816384.0,
-            "22": 831311872.0,
-            "23": 803536768.0,
-            "24": 628253248.0,
-            "25": 663895680.0,
-            "26": 847321664.0,
-            "27": 828927424.0,
-            "28": 777678976.0,
-            "29": 764628608.0,
-            "30": 781930112.0,
-            "31": 771767616.0,
-            "32": 771755392.0,
-            "33": 586323648.0,
-            "34": 734207552.0,
-            "35": 690468480.0,
-            "36": 485982688.0,
-            "37": 506506336.0,
-            "38": 642964160.0,
-            "39": 661240000.0,
-            "40": 645048768.0,
-            "41": 636072704.0,
-            "42": 491645856.0,
-            "43": 601942528.0,
-            "44": 623448960.0,
-            "45": 539959424.0,
-            "46": 532669088.0,
-            "47": 529039680.0,
-            "48": 504121984.0,
-            "49": 478344480.0,
-            "50": 331385728.0
+            "2": 38543656.0,
+            "3": 38739356.0,
+            "4": 273649600.0,
+            "5": 252887040.0,
+            "6": 255692384.0,
+            "7": 598483264.0,
+            "8": 787737984.0,
+            "9": 696133120.0,
+            "10": 505146400.0,
+            "11": 715718272.0,
+            "12": 872566848.0,
+            "13": 947497344.0,
+            "14": 1076390912.0,
+            "15": 853234624.0,
+            "16": 1045488064.0,
+            "17": 831385088.0,
+            "18": 969961792.0,
+            "19": 973165952.0,
+            "20": 951461376.0,
+            "21": 901033280.0,
+            "22": 897373440.0,
+            "23": 901066560.0,
+            "24": 710038592.0,
+            "25": 912381952.0,
+            "26": 866199936.0,
+            "27": 876109696.0,
+            "28": 912952192.0,
+            "29": 972247104.0,
+            "30": 951806720.0,
+            "31": 960493312.0,
+            "32": 910169408.0,
+            "33": 853655744.0,
+            "34": 834879424.0,
+            "35": 835171520.0,
+            "36": 797371392.0,
+            "37": 777009408.0,
+            "38": 598948480.0,
+            "39": 664393152.0,
+            "40": 767727104.0,
+            "41": 771335168.0,
+            "42": 752681344.0,
+            "43": 715187840.0,
+            "44": 714677440.0,
+            "45": 687806016.0,
+            "46": 501256736.0,
+            "47": 629706368.0,
+            "48": 651967104.0,
+            "49": 629336832.0,
+            "50": 589310016.0
         }
     },
     "mem-allocated-bytes": {
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 6637267456.0,
-            "2": 6637269504.0,
-            "3": 6637269504.0,
-            "4": 6637269504.0,
-            "5": 6637269504.0,
-            "6": 6637269504.0,
-            "7": 6637269504.0,
-            "8": 6637269504.0,
-            "9": 6637269504.0,
-            "10": 6637269504.0,
-            "11": 6637269504.0,
-            "12": 6637269504.0,
-            "13": 6637269504.0,
-            "14": 6637269504.0,
-            "15": 6637269504.0,
-            "16": 6637269504.0,
-            "17": 6637269504.0,
-            "18": 6637269504.0,
-            "19": 6637269504.0,
-            "20": 6637269504.0,
-            "21": 6637269504.0,
-            "22": 6637269504.0,
-            "23": 6637269504.0,
-            "24": 6637269504.0,
-            "25": 6637269504.0,
-            "26": 6637269504.0,
-            "27": 6637269504.0,
-            "28": 6637269504.0,
-            "29": 6637269504.0,
-            "30": 6637269504.0,
-            "31": 6637269504.0,
-            "32": 6637269504.0,
-            "33": 6637269504.0,
-            "34": 6637269504.0,
-            "35": 6637269504.0,
-            "36": 6637269504.0,
-            "37": 6637269504.0,
-            "38": 6637269504.0,
-            "39": 6637269504.0,
-            "40": 6637269504.0,
-            "41": 6637269504.0,
-            "42": 6637269504.0,
-            "43": 6637269504.0,
-            "44": 6637269504.0,
-            "45": 6637269504.0,
-            "46": 6637269504.0,
-            "47": 6637269504.0,
-            "48": 6637269504.0,
-            "49": 6637269504.0,
-            "50": 6637269504.0
+            "1": 6637272576.0,
+            "2": 6637274624.0,
+            "3": 6637274624.0,
+            "4": 6637274624.0,
+            "5": 6637274624.0,
+            "6": 6637274624.0,
+            "7": 6637274624.0,
+            "8": 6637274624.0,
+            "9": 6637274624.0,
+            "10": 6637274624.0,
+            "11": 6637274624.0,
+            "12": 6637274624.0,
+            "13": 6637274624.0,
+            "14": 6637274624.0,
+            "15": 6637274624.0,
+            "16": 6637274624.0,
+            "17": 6637274624.0,
+            "18": 6637274624.0,
+            "19": 6637274624.0,
+            "20": 6637274624.0,
+            "21": 6637274624.0,
+            "22": 6637274624.0,
+            "23": 6637274624.0,
+            "24": 6637274624.0,
+            "25": 6637274624.0,
+            "26": 6637274624.0,
+            "27": 6637274624.0,
+            "28": 6637274624.0,
+            "29": 6637274624.0,
+            "30": 6637274624.0,
+            "31": 6637274624.0,
+            "32": 6637274624.0,
+            "33": 6637274624.0,
+            "34": 6637274624.0,
+            "35": 6637274624.0,
+            "36": 6637274624.0,
+            "37": 6637274624.0,
+            "38": 6637274624.0,
+            "39": 6637274624.0,
+            "40": 6637274624.0,
+            "41": 6637274624.0,
+            "42": 6637274624.0,
+            "43": 6637274624.0,
+            "44": 6637274624.0,
+            "45": 6637274624.0,
+            "46": 6637274624.0,
+            "47": 6637274624.0,
+            "48": 6637274624.0,
+            "49": 6637274624.0,
+            "50": 6637274624.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 55055331328.0,
-            "2": 57809321984.0,
-            "3": 57919823872.0,
-            "4": 57919823872.0,
-            "5": 57919823872.0,
-            "6": 57919823872.0,
-            "7": 57919823872.0,
-            "8": 57919823872.0,
-            "9": 57919823872.0,
-            "10": 57919823872.0,
-            "11": 57919823872.0,
-            "12": 57919823872.0,
-            "13": 57932275712.0,
-            "14": 57932275712.0,
-            "15": 57932275712.0,
-            "16": 57932275712.0,
-            "17": 57932275712.0,
-            "18": 57932275712.0,
-            "19": 57932275712.0,
-            "20": 57932275712.0,
-            "21": 57932275712.0,
-            "22": 57932275712.0,
-            "23": 57932275712.0,
-            "24": 57932275712.0,
-            "25": 57932275712.0,
-            "26": 57932275712.0,
-            "27": 57932275712.0,
-            "28": 57932275712.0,
-            "29": 57932275712.0,
-            "30": 57932275712.0,
-            "31": 57932275712.0,
-            "32": 57932275712.0,
-            "33": 57932275712.0,
-            "34": 57932275712.0,
-            "35": 57932275712.0,
-            "36": 57932275712.0,
-            "37": 57932275712.0,
-            "38": 57932275712.0,
-            "39": 57932275712.0,
-            "40": 57932275712.0,
-            "41": 57932275712.0,
-            "42": 57932275712.0,
-            "43": 57932275712.0,
-            "44": 57932275712.0,
-            "45": 57932275712.0,
-            "46": 57932275712.0,
-            "47": 57932275712.0,
-            "48": 57932275712.0,
-            "49": 57932275712.0,
-            "50": 57932275712.0
+            "1": 55056003072.0,
+            "2": 57810763776.0,
+            "3": 57920647168.0,
+            "4": 57920647168.0,
+            "5": 57920647168.0,
+            "6": 57920647168.0,
+            "7": 57920647168.0,
+            "8": 57920647168.0,
+            "9": 57920647168.0,
+            "10": 57920647168.0,
+            "11": 57920647168.0,
+            "12": 57920647168.0,
+            "13": 57920647168.0,
+            "14": 57920647168.0,
+            "15": 57920647168.0,
+            "16": 57920647168.0,
+            "17": 57920647168.0,
+            "18": 57920647168.0,
+            "19": 57920647168.0,
+            "20": 57920647168.0,
+            "21": 57920647168.0,
+            "22": 57920647168.0,
+            "23": 57920647168.0,
+            "24": 57920647168.0,
+            "25": 57920647168.0,
+            "26": 57920647168.0,
+            "27": 57920647168.0,
+            "28": 57920647168.0,
+            "29": 57920647168.0,
+            "30": 57920647168.0,
+            "31": 57920647168.0,
+            "32": 57920647168.0,
+            "33": 57920647168.0,
+            "34": 57920647168.0,
+            "35": 57920647168.0,
+            "36": 57920647168.0,
+            "37": 57920647168.0,
+            "38": 57920647168.0,
+            "39": 57920647168.0,
+            "40": 57920647168.0,
+            "41": 57920647168.0,
+            "42": 57920647168.0,
+            "43": 57920647168.0,
+            "44": 57920647168.0,
+            "45": 57920647168.0,
+            "46": 57921617920.0,
+            "47": 57921617920.0,
+            "48": 57921617920.0,
+            "49": 57921617920.0,
+            "50": 57921617920.0
         }
     },
     "mtp_1 loss": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 11.07654,
-            "2": 11.07406,
-            "3": 10.53883,
-            "4": 10.09801,
-            "5": 9.81156,
-            "6": 10.06025,
-            "7": 9.7962,
-            "8": 9.06987,
-            "9": 8.86879,
-            "10": 9.13393,
-            "11": 8.5017,
-            "12": 8.54094,
-            "13": 8.43678,
-            "14": 7.85637,
-            "15": 7.99846,
-            "16": 8.05889,
-            "17": 8.01134,
-            "18": 7.73929,
-            "19": 8.1188,
-            "20": 7.83458,
-            "21": 7.53103,
-            "22": 7.50125,
-            "23": 7.37135,
-            "24": 7.37419,
-            "25": 7.61596,
-            "26": 7.01586,
-            "27": 7.55739,
-            "28": 7.26274,
-            "29": 7.43991,
-            "30": 7.58436,
-            "31": 7.32289,
-            "32": 7.50362,
-            "33": 7.56884,
-            "34": 7.6339,
-            "35": 7.151,
-            "36": 7.01725,
-            "37": 7.35013,
-            "38": 7.12483,
-            "39": 7.48708,
-            "40": 7.47451,
-            "41": 7.4181,
-            "42": 7.17557,
-            "43": 7.15957,
-            "44": 7.34227,
-            "45": 7.12176,
-            "46": 6.82526,
-            "47": 7.23374,
-            "48": 7.07893,
-            "49": 7.5077,
-            "50": 6.97094
+            "1": 11.07648,
+            "2": 11.07404,
+            "3": 10.53854,
+            "4": 10.09813,
+            "5": 9.81166,
+            "6": 10.09741,
+            "7": 9.79481,
+            "8": 9.0642,
+            "9": 8.86016,
+            "10": 9.34039,
+            "11": 8.51318,
+            "12": 8.59468,
+            "13": 8.52921,
+            "14": 7.95758,
+            "15": 8.06962,
+            "16": 8.11803,
+            "17": 8.06994,
+            "18": 7.80584,
+            "19": 8.19191,
+            "20": 7.89063,
+            "21": 7.5707,
+            "22": 7.55089,
+            "23": 7.41603,
+            "24": 7.42509,
+            "25": 7.65319,
+            "26": 7.05604,
+            "27": 7.59797,
+            "28": 7.29977,
+            "29": 7.47274,
+            "30": 7.61938,
+            "31": 7.35308,
+            "32": 7.53089,
+            "33": 7.59296,
+            "34": 7.66429,
+            "35": 7.17544,
+            "36": 7.04045,
+            "37": 7.37008,
+            "38": 7.14419,
+            "39": 7.51022,
+            "40": 7.48928,
+            "41": 7.43717,
+            "42": 7.19432,
+            "43": 7.17612,
+            "44": 7.35764,
+            "45": 7.13893,
+            "46": 6.84092,
+            "47": 7.25121,
+            "48": 7.09497,
+            "49": 7.52321,
+            "50": 6.98958
         }
     },
     "iteration-time": {
@@ -289,56 +289,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 57.80279,
-            "2": 1.26321,
-            "3": 1.18918,
-            "4": 2.24643,
-            "5": 2.25191,
-            "6": 1.80757,
-            "7": 2.09086,
-            "8": 1.69153,
-            "9": 1.81279,
-            "10": 1.64882,
-            "11": 1.03476,
-            "12": 1.03593,
-            "13": 1.04348,
-            "14": 1.03841,
-            "15": 1.04432,
-            "16": 1.05281,
-            "17": 1.04826,
-            "18": 1.04981,
-            "19": 1.05351,
-            "20": 1.04668,
-            "21": 1.05254,
-            "22": 1.05391,
-            "23": 1.04635,
-            "24": 1.05503,
-            "25": 1.04226,
-            "26": 1.0684,
-            "27": 1.04985,
-            "28": 1.04233,
-            "29": 1.05036,
-            "30": 1.06219,
-            "31": 1.044,
-            "32": 1.05614,
-            "33": 1.05729,
-            "34": 1.05618,
-            "35": 1.06289,
-            "36": 1.05761,
-            "37": 1.05956,
-            "38": 1.06343,
-            "39": 1.06848,
-            "40": 1.06027,
-            "41": 1.05493,
-            "42": 1.05258,
-            "43": 1.04879,
-            "44": 1.04949,
-            "45": 1.05964,
-            "46": 1.04465,
-            "47": 1.0491,
-            "48": 1.05387,
-            "49": 1.05218,
-            "50": 1.05453
+            "1": 85.33545,
+            "2": 1.29783,
+            "3": 1.20289,
+            "4": 2.24602,
+            "5": 2.32616,
+            "6": 1.7486,
+            "7": 2.17383,
+            "8": 1.65491,
+            "9": 1.70888,
+            "10": 1.05169,
+            "11": 1.03097,
+            "12": 1.02332,
+            "13": 1.0314,
+            "14": 1.03723,
+            "15": 1.02333,
+            "16": 1.04585,
+            "17": 1.05489,
+            "18": 1.05149,
+            "19": 1.04366,
+            "20": 1.04123,
+            "21": 1.04123,
+            "22": 1.05131,
+            "23": 1.04784,
+            "24": 1.05156,
+            "25": 1.05897,
+            "26": 1.05841,
+            "27": 1.03255,
+            "28": 1.03763,
+            "29": 1.0362,
+            "30": 1.04244,
+            "31": 1.03393,
+            "32": 1.04177,
+            "33": 1.06033,
+            "34": 1.06132,
+            "35": 1.06434,
+            "36": 1.05438,
+            "37": 1.64369,
+            "38": 1.06374,
+            "39": 1.07491,
+            "40": 1.07295,
+            "41": 1.06978,
+            "42": 1.06102,
+            "43": 1.05808,
+            "44": 1.06997,
+            "45": 1.06476,
+            "46": 1.06795,
+            "47": 1.06701,
+            "48": 1.06649,
+            "49": 1.06638,
+            "50": 1.06224
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgxh100_eos.json
index daa04af43dd..dc2c39d712d 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgxh100_eos.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgxh100_eos.json
@@ -4,56 +4,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 10.95004,
-            "2": 10.9521,
-            "3": 10.5115,
-            "4": 9.96454,
-            "5": 9.93941,
-            "6": 9.67273,
-            "7": 10.20975,
-            "8": 9.49716,
-            "9": 9.55902,
-            "10": 9.79742,
-            "11": 9.30109,
-            "12": 9.40483,
-            "13": 9.39546,
-            "14": 8.84681,
-            "15": 9.02444,
-            "16": 9.07121,
-            "17": 9.04574,
-            "18": 8.75678,
-            "19": 9.18159,
-            "20": 8.8595,
-            "21": 8.53503,
-            "22": 8.55182,
-            "23": 8.42441,
-            "24": 8.37608,
-            "25": 8.64304,
-            "26": 7.97393,
-            "27": 8.56806,
-            "28": 8.19764,
-            "29": 8.3928,
-            "30": 8.67283,
-            "31": 8.289,
-            "32": 8.43572,
-            "33": 8.5568,
-            "34": 8.66018,
-            "35": 8.07934,
-            "36": 7.94976,
-            "37": 8.29565,
-            "38": 7.98044,
-            "39": 8.39201,
-            "40": 8.35513,
-            "41": 8.31876,
-            "42": 8.0583,
-            "43": 8.03283,
-            "44": 8.24243,
-            "45": 8.10277,
-            "46": 7.61696,
-            "47": 8.15273,
-            "48": 8.00569,
-            "49": 8.38688,
-            "50": 7.81491
+            "1": 10.94971,
+            "2": 10.95174,
+            "3": 10.51547,
+            "4": 9.96574,
+            "5": 9.941,
+            "6": 9.67424,
+            "7": 10.20193,
+            "8": 9.50006,
+            "9": 9.54983,
+            "10": 9.79714,
+            "11": 9.30093,
+            "12": 9.40563,
+            "13": 9.39461,
+            "14": 8.84641,
+            "15": 9.02323,
+            "16": 9.07046,
+            "17": 9.04704,
+            "18": 8.75684,
+            "19": 9.18168,
+            "20": 8.86245,
+            "21": 8.53735,
+            "22": 8.55361,
+            "23": 8.42666,
+            "24": 8.37856,
+            "25": 8.64287,
+            "26": 7.9729,
+            "27": 8.56717,
+            "28": 8.19494,
+            "29": 8.39321,
+            "30": 8.67278,
+            "31": 8.2887,
+            "32": 8.43529,
+            "33": 8.5564,
+            "34": 8.65783,
+            "35": 8.07826,
+            "36": 7.94839,
+            "37": 8.29395,
+            "38": 7.9776,
+            "39": 8.39027,
+            "40": 8.35602,
+            "41": 8.31509,
+            "42": 8.06463,
+            "43": 8.03334,
+            "44": 8.24022,
+            "45": 8.10462,
+            "46": 7.61777,
+            "47": 8.15389,
+            "48": 8.0077,
+            "49": 8.38728,
+            "50": 7.81501
         }
     },
     "num-zeros": {
@@ -61,56 +61,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 19403624.0,
-            "2": 19274194.0,
-            "3": 19372760.0,
-            "4": 86525248.0,
-            "5": 148575568.0,
-            "6": 145226704.0,
-            "7": 171879984.0,
-            "8": 195785248.0,
-            "9": 164124752.0,
-            "10": 167684736.0,
-            "11": 221077344.0,
-            "12": 200384224.0,
-            "13": 248872528.0,
-            "14": 211169424.0,
-            "15": 214304608.0,
-            "16": 216075632.0,
-            "17": 267845984.0,
-            "18": 170470336.0,
-            "19": 176865072.0,
-            "20": 187955392.0,
-            "21": 225750704.0,
-            "22": 247396816.0,
-            "23": 211643856.0,
-            "24": 205638464.0,
-            "25": 277022272.0,
-            "26": 291562304.0,
-            "27": 225789840.0,
-            "28": 288202368.0,
-            "29": 198390384.0,
-            "30": 213302208.0,
-            "31": 227204752.0,
-            "32": 271112416.0,
-            "33": 231840432.0,
-            "34": 203575536.0,
-            "35": 191152368.0,
-            "36": 222566928.0,
-            "37": 177810112.0,
-            "38": 228708544.0,
-            "39": 211168784.0,
-            "40": 215603968.0,
-            "41": 200089440.0,
-            "42": 228529888.0,
-            "43": 198782848.0,
-            "44": 141902272.0,
-            "45": 181922816.0,
-            "46": 115369856.0,
-            "47": 170214176.0,
-            "48": 137292832.0,
-            "49": 97654936.0,
-            "50": 160979632.0
+            "1": 19403704.0,
+            "2": 19274202.0,
+            "3": 19372672.0,
+            "4": 84955472.0,
+            "5": 148573088.0,
+            "6": 140513744.0,
+            "7": 176606368.0,
+            "8": 198919440.0,
+            "9": 175143840.0,
+            "10": 164545552.0,
+            "11": 216370368.0,
+            "12": 201999712.0,
+            "13": 239390272.0,
+            "14": 230012880.0,
+            "15": 215921904.0,
+            "16": 211344080.0,
+            "17": 274153920.0,
+            "18": 173627616.0,
+            "19": 176950304.0,
+            "20": 194330304.0,
+            "21": 243134016.0,
+            "22": 234854608.0,
+            "23": 219609264.0,
+            "24": 205630080.0,
+            "25": 198436912.0,
+            "26": 293244384.0,
+            "27": 274552608.0,
+            "28": 277179296.0,
+            "29": 210959616.0,
+            "30": 233757584.0,
+            "31": 236548544.0,
+            "32": 264864608.0,
+            "33": 250754976.0,
+            "34": 258614240.0,
+            "35": 208476240.0,
+            "36": 241437056.0,
+            "37": 177817504.0,
+            "38": 227178000.0,
+            "39": 222169216.0,
+            "40": 214031296.0,
+            "41": 209523040.0,
+            "42": 212816672.0,
+            "43": 195600416.0,
+            "44": 154459088.0,
+            "45": 166289280.0,
+            "46": 116993536.0,
+            "47": 168587312.0,
+            "48": 162414240.0,
+            "49": 119666904.0,
+            "50": 171972272.0
         }
     },
     "mem-allocated-bytes": {
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 4882187264.0,
-            "2": 4881607168.0,
-            "3": 4882283008.0,
-            "4": 4881322496.0,
-            "5": 4882174464.0,
-            "6": 4883177984.0,
-            "7": 4883252736.0,
-            "8": 4881774080.0,
-            "9": 4881443328.0,
-            "10": 4884319744.0,
-            "11": 4882319872.0,
-            "12": 4881232384.0,
-            "13": 4880836096.0,
-            "14": 4882124288.0,
-            "15": 4882108928.0,
-            "16": 4883384832.0,
-            "17": 4880466432.0,
-            "18": 4881518080.0,
-            "19": 4881734144.0,
-            "20": 4883215872.0,
-            "21": 4883534336.0,
-            "22": 4882774528.0,
-            "23": 4881818112.0,
-            "24": 4882441728.0,
-            "25": 4880546304.0,
-            "26": 4882178560.0,
-            "27": 4881892864.0,
-            "28": 4881869312.0,
-            "29": 4882979328.0,
-            "30": 4882715136.0,
-            "31": 4883084800.0,
-            "32": 4881436160.0,
-            "33": 4881766912.0,
-            "34": 4881406464.0,
-            "35": 4881531392.0,
-            "36": 4881479168.0,
-            "37": 4882455040.0,
-            "38": 4882054656.0,
-            "39": 4882005504.0,
-            "40": 4882743808.0,
-            "41": 4881211904.0,
-            "42": 4881378816.0,
-            "43": 4882133504.0,
-            "44": 4881860096.0,
-            "45": 4883165696.0,
-            "46": 4882168320.0,
-            "47": 4881526272.0,
-            "48": 4882125312.0,
-            "49": 4881533440.0,
-            "50": 4881598976.0
+            "1": 4880827392.0,
+            "2": 4880161280.0,
+            "3": 4879780352.0,
+            "4": 4881006080.0,
+            "5": 4881443328.0,
+            "6": 4880235008.0,
+            "7": 4878593536.0,
+            "8": 4880183808.0,
+            "9": 4878518784.0,
+            "10": 4880639488.0,
+            "11": 4878592512.0,
+            "12": 4879459840.0,
+            "13": 4879073792.0,
+            "14": 4881052160.0,
+            "15": 4878580224.0,
+            "16": 4878705152.0,
+            "17": 4880005632.0,
+            "18": 4880081408.0,
+            "19": 4879190528.0,
+            "20": 4879407616.0,
+            "21": 4878837248.0,
+            "22": 4878897664.0,
+            "23": 4878346752.0,
+            "24": 4880498176.0,
+            "25": 4880417280.0,
+            "26": 4878027264.0,
+            "27": 4878756352.0,
+            "28": 4880044544.0,
+            "29": 4879154688.0,
+            "30": 4879779328.0,
+            "31": 4881071616.0,
+            "32": 4879392256.0,
+            "33": 4879744512.0,
+            "34": 4878250496.0,
+            "35": 4878979584.0,
+            "36": 4880133632.0,
+            "37": 4880431616.0,
+            "38": 4878993920.0,
+            "39": 4878280192.0,
+            "40": 4879473152.0,
+            "41": 4880439808.0,
+            "42": 4879638016.0,
+            "43": 4879913472.0,
+            "44": 4879031808.0,
+            "45": 4879471104.0,
+            "46": 4878890496.0,
+            "47": 4879007232.0,
+            "48": 4879195648.0,
+            "49": 4879473152.0,
+            "50": 4878174720.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 41210470400.0,
-            "2": 41210470400.0,
-            "3": 41210470400.0,
-            "4": 41210470400.0,
-            "5": 41210470400.0,
-            "6": 41210470400.0,
-            "7": 41210470400.0,
-            "8": 41210470400.0,
-            "9": 41210470400.0,
-            "10": 41210470400.0,
-            "11": 41210470400.0,
-            "12": 41210470400.0,
-            "13": 41210470400.0,
-            "14": 41210470400.0,
-            "15": 41210470400.0,
-            "16": 41210470400.0,
-            "17": 41210470400.0,
-            "18": 41210470400.0,
-            "19": 41210470400.0,
-            "20": 41210470400.0,
-            "21": 41210470400.0,
-            "22": 41210470400.0,
-            "23": 41210470400.0,
-            "24": 41210470400.0,
-            "25": 41210470400.0,
-            "26": 41210470400.0,
-            "27": 41210470400.0,
-            "28": 41210470400.0,
-            "29": 41210470400.0,
-            "30": 41210470400.0,
-            "31": 41210470400.0,
-            "32": 41210470400.0,
-            "33": 41210470400.0,
-            "34": 41210470400.0,
-            "35": 41210470400.0,
-            "36": 41210470400.0,
-            "37": 41210470400.0,
-            "38": 41210470400.0,
-            "39": 41210470400.0,
-            "40": 41210470400.0,
-            "41": 41210470400.0,
-            "42": 41210470400.0,
-            "43": 41210470400.0,
-            "44": 41210470400.0,
-            "45": 41210470400.0,
-            "46": 41210470400.0,
-            "47": 41210470400.0,
-            "48": 41210470400.0,
-            "49": 41210470400.0,
-            "50": 41210470400.0
+            "1": 41208373248.0,
+            "2": 41208373248.0,
+            "3": 41208373248.0,
+            "4": 41208373248.0,
+            "5": 41208373248.0,
+            "6": 41208373248.0,
+            "7": 41208373248.0,
+            "8": 41208373248.0,
+            "9": 41208373248.0,
+            "10": 41208373248.0,
+            "11": 41208373248.0,
+            "12": 41208373248.0,
+            "13": 41208373248.0,
+            "14": 41208373248.0,
+            "15": 41208373248.0,
+            "16": 41208373248.0,
+            "17": 41208373248.0,
+            "18": 41208373248.0,
+            "19": 41208373248.0,
+            "20": 41208373248.0,
+            "21": 41208373248.0,
+            "22": 41208373248.0,
+            "23": 41208373248.0,
+            "24": 41208373248.0,
+            "25": 41208373248.0,
+            "26": 41208373248.0,
+            "27": 41208373248.0,
+            "28": 41208373248.0,
+            "29": 41208373248.0,
+            "30": 41208373248.0,
+            "31": 41208373248.0,
+            "32": 41208373248.0,
+            "33": 41208373248.0,
+            "34": 41208373248.0,
+            "35": 41208373248.0,
+            "36": 41208373248.0,
+            "37": 41208373248.0,
+            "38": 41208373248.0,
+            "39": 41208373248.0,
+            "40": 41208373248.0,
+            "41": 41208373248.0,
+            "42": 41208373248.0,
+            "43": 41208373248.0,
+            "44": 41208373248.0,
+            "45": 41208373248.0,
+            "46": 41208373248.0,
+            "47": 41208373248.0,
+            "48": 41208373248.0,
+            "49": 41208373248.0,
+            "50": 41208373248.0
         }
     },
     "iteration-time": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 96.21947,
-            "2": 1.10023,
-            "3": 0.96399,
-            "4": 0.91113,
-            "5": 1.27509,
-            "6": 1.00484,
-            "7": 1.01236,
-            "8": 1.1739,
-            "9": 0.89406,
-            "10": 0.88836,
-            "11": 0.92033,
-            "12": 0.88331,
-            "13": 0.88179,
-            "14": 0.88307,
-            "15": 0.88648,
-            "16": 0.88425,
-            "17": 0.87155,
-            "18": 0.87556,
-            "19": 0.87374,
-            "20": 0.8744,
-            "21": 0.86757,
-            "22": 0.87217,
-            "23": 0.8736,
-            "24": 0.86646,
-            "25": 0.87328,
-            "26": 0.87121,
-            "27": 0.85886,
-            "28": 0.86392,
-            "29": 0.86385,
-            "30": 0.86425,
-            "31": 0.8631,
-            "32": 0.8617,
-            "33": 0.86069,
-            "34": 0.86829,
-            "35": 0.86837,
-            "36": 0.86776,
-            "37": 0.86686,
-            "38": 0.86359,
-            "39": 0.8677,
-            "40": 0.86441,
-            "41": 0.86179,
-            "42": 0.86079,
-            "43": 0.86149,
-            "44": 0.86222,
-            "45": 0.86336,
-            "46": 0.85875,
-            "47": 0.86219,
-            "48": 0.86026,
-            "49": 0.85894,
-            "50": 0.8544
+            "1": 94.76465,
+            "2": 1.07136,
+            "3": 0.97804,
+            "4": 0.91812,
+            "5": 1.39406,
+            "6": 1.11113,
+            "7": 1.05399,
+            "8": 1.07764,
+            "9": 0.8817,
+            "10": 0.88267,
+            "11": 0.97121,
+            "12": 0.87696,
+            "13": 0.87547,
+            "14": 0.87457,
+            "15": 0.87326,
+            "16": 0.87868,
+            "17": 0.86846,
+            "18": 0.86669,
+            "19": 0.86508,
+            "20": 0.86847,
+            "21": 0.86661,
+            "22": 0.85614,
+            "23": 0.8576,
+            "24": 0.86445,
+            "25": 0.86658,
+            "26": 0.86708,
+            "27": 0.86226,
+            "28": 0.85806,
+            "29": 0.86248,
+            "30": 0.85836,
+            "31": 0.85969,
+            "32": 0.85739,
+            "33": 0.86134,
+            "34": 0.8621,
+            "35": 0.86104,
+            "36": 0.85793,
+            "37": 0.85834,
+            "38": 0.85618,
+            "39": 0.85754,
+            "40": 0.8554,
+            "41": 0.85094,
+            "42": 0.85738,
+            "43": 0.85524,
+            "44": 0.85844,
+            "45": 0.85739,
+            "46": 0.85581,
+            "47": 0.85717,
+            "48": 0.85118,
+            "49": 0.85577,
+            "50": 0.85127
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..fe8428055c3
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.82922,
+            "2": 10.84163,
+            "3": 10.84245,
+            "4": 10.82,
+            "5": 10.85652,
+            "6": 10.86906,
+            "7": 10.83778,
+            "8": 10.84312,
+            "9": 10.84423,
+            "10": 10.79298,
+            "11": 10.86697,
+            "12": 10.86875,
+            "13": 10.86207,
+            "14": 10.86919,
+            "15": 10.8067,
+            "16": 10.8057,
+            "17": 10.77686,
+            "18": 10.79541,
+            "19": 10.78384,
+            "20": 10.72654,
+            "21": 10.69491,
+            "22": 10.54462,
+            "23": 10.6993,
+            "24": 10.58151,
+            "25": 10.53282,
+            "26": 10.58817,
+            "27": 10.601,
+            "28": 10.57563,
+            "29": 10.58022,
+            "30": 10.35802,
+            "31": 10.08769,
+            "32": 10.44466,
+            "33": 10.4477,
+            "34": 10.18704,
+            "35": 10.24483,
+            "36": 10.19713,
+            "37": 10.32294,
+            "38": 10.17101,
+            "39": 10.37026,
+            "40": 10.05533,
+            "41": 10.09491,
+            "42": 10.17971,
+            "43": 9.78263,
+            "44": 9.91346,
+            "45": 9.77951,
+            "46": 9.75648,
+            "47": 10.09647,
+            "48": 9.80391,
+            "49": 9.46649,
+            "50": 9.86874,
+            "51": 9.79428,
+            "52": 9.68303,
+            "53": 10.03314,
+            "54": 9.9113,
+            "55": 9.82995,
+            "56": 9.57839,
+            "57": 9.42377,
+            "58": 9.80549,
+            "59": 9.53292,
+            "60": 9.449,
+            "61": 9.65293,
+            "62": 9.95672,
+            "63": 9.33775,
+            "64": 9.74194,
+            "65": 8.89366,
+            "66": 9.67317,
+            "67": 9.33002,
+            "68": 9.76517,
+            "69": 9.76336,
+            "70": 9.71127,
+            "71": 9.59511,
+            "72": 9.54797,
+            "73": 9.47124,
+            "74": 8.89297,
+            "75": 9.39451,
+            "76": 9.04721,
+            "77": 10.04318,
+            "78": 9.70313,
+            "79": 9.35169,
+            "80": 9.38198,
+            "81": 9.45146,
+            "82": 9.67546,
+            "83": 9.27658,
+            "84": 9.39241,
+            "85": 9.58333,
+            "86": 9.04518,
+            "87": 9.56487,
+            "88": 9.72459,
+            "89": 9.57019,
+            "90": 9.79944,
+            "91": 9.30737,
+            "92": 9.3313,
+            "93": 9.04109,
+            "94": 8.80259,
+            "95": 9.50213,
+            "96": 9.5021,
+            "97": 9.28183,
+            "98": 9.64883,
+            "99": 8.8594,
+            "100": 9.37131
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 27245.0,
+            "2": 28958.0,
+            "3": 29464.0,
+            "4": 28046.0,
+            "5": 31369.0,
+            "6": 33287.0,
+            "7": 31200.0,
+            "8": 26921.0,
+            "9": 30008.0,
+            "10": 25870.0,
+            "11": 33681.0,
+            "12": 30344.0,
+            "13": 32737.0,
+            "14": 33315.0,
+            "15": 29830.0,
+            "16": 32475.0,
+            "17": 30747.0,
+            "18": 30381.0,
+            "19": 31032.0,
+            "20": 28243.0,
+            "21": 29224.0,
+            "22": 27340.0,
+            "23": 34119.0,
+            "24": 29049.0,
+            "25": 27636.0,
+            "26": 30662.0,
+            "27": 32009.0,
+            "28": 33355.0,
+            "29": 34714.0,
+            "30": 30387.0,
+            "31": 28212.0,
+            "32": 33411.0,
+            "33": 34696.0,
+            "34": 30053.0,
+            "35": 31488.0,
+            "36": 32943.0,
+            "37": 35829.0,
+            "38": 33740.0,
+            "39": 37632.0,
+            "40": 34779.0,
+            "41": 33958.0,
+            "42": 36396.0,
+            "43": 34088.0,
+            "44": 34090.0,
+            "45": 35158.0,
+            "46": 36174.0,
+            "47": 39772.0,
+            "48": 36516.0,
+            "49": 36733.0,
+            "50": 38234.0,
+            "51": 38608.0,
+            "52": 37030.0,
+            "53": 42442.0,
+            "54": 40944.0,
+            "55": 37133.0,
+            "56": 41001.0,
+            "57": 37524.0,
+            "58": 42317.0,
+            "59": 40804.0,
+            "60": 40450.0,
+            "61": 41478.0,
+            "62": 39766.0,
+            "63": 37941.0,
+            "64": 42197.0,
+            "65": 40947.0,
+            "66": 44094.0,
+            "67": 41958.0,
+            "68": 40060.0,
+            "69": 42189.0,
+            "70": 43436.0,
+            "71": 42748.0,
+            "72": 44280.0,
+            "73": 47478.0,
+            "74": 41456.0,
+            "75": 39925.0,
+            "76": 43490.0,
+            "77": 45636.0,
+            "78": 2141470.0,
+            "79": 46055.0,
+            "80": 51863.0,
+            "81": 151341.0,
+            "82": 49835.0,
+            "83": 143360.0,
+            "84": 2141546.0,
+            "85": 2145177.0,
+            "86": 132114.0,
+            "87": 2147022.0,
+            "88": 59899.0,
+            "89": 162883.0,
+            "90": 51330.0,
+            "91": 2141901.0,
+            "92": 44946.0,
+            "93": 138194.0,
+            "94": 2145772.0,
+            "95": 45247.0,
+            "96": 135045.0,
+            "97": 53170.0,
+            "98": 168576.0,
+            "99": 2141797.0,
+            "100": 163741.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 787516416.0,
+            "2": 787540992.0,
+            "3": 787524096.0,
+            "4": 787512320.0,
+            "5": 787547136.0,
+            "6": 787537920.0,
+            "7": 787512832.0,
+            "8": 787524608.0,
+            "9": 787528192.0,
+            "10": 787505152.0,
+            "11": 787522048.0,
+            "12": 787520000.0,
+            "13": 787529728.0,
+            "14": 787529216.0,
+            "15": 787504128.0,
+            "16": 787513344.0,
+            "17": 787503104.0,
+            "18": 787489280.0,
+            "19": 787514880.0,
+            "20": 787505152.0,
+            "21": 787479552.0,
+            "22": 787486208.0,
+            "23": 787478528.0,
+            "24": 787486208.0,
+            "25": 787451392.0,
+            "26": 787482112.0,
+            "27": 787470848.0,
+            "28": 787450368.0,
+            "29": 787458048.0,
+            "30": 787435008.0,
+            "31": 787406848.0,
+            "32": 787424256.0,
+            "33": 787435520.0,
+            "34": 787426304.0,
+            "35": 787418624.0,
+            "36": 787436544.0,
+            "37": 787428352.0,
+            "38": 787436544.0,
+            "39": 787417600.0,
+            "40": 787415040.0,
+            "41": 787405824.0,
+            "42": 787415040.0,
+            "43": 787367936.0,
+            "44": 787392512.0,
+            "45": 787399680.0,
+            "46": 787355136.0,
+            "47": 787411456.0,
+            "48": 787354112.0,
+            "49": 787374080.0,
+            "50": 787389440.0,
+            "51": 787375616.0,
+            "52": 787383808.0,
+            "53": 787379712.0,
+            "54": 787384832.0,
+            "55": 787388928.0,
+            "56": 787388928.0,
+            "57": 787351040.0,
+            "58": 787382784.0,
+            "59": 787374080.0,
+            "60": 787395072.0,
+            "61": 787405312.0,
+            "62": 787405824.0,
+            "63": 787373056.0,
+            "64": 787388928.0,
+            "65": 787351552.0,
+            "66": 787386880.0,
+            "67": 787392000.0,
+            "68": 787399168.0,
+            "69": 787383296.0,
+            "70": 787393024.0,
+            "71": 787406848.0,
+            "72": 787400704.0,
+            "73": 787401216.0,
+            "74": 787403264.0,
+            "75": 787442688.0,
+            "76": 787444736.0,
+            "77": 787445760.0,
+            "78": 787395072.0,
+            "79": 787430400.0,
+            "80": 787410432.0,
+            "81": 787412992.0,
+            "82": 787427840.0,
+            "83": 787428864.0,
+            "84": 787412480.0,
+            "85": 787412480.0,
+            "86": 787394560.0,
+            "87": 787452928.0,
+            "88": 787414528.0,
+            "89": 787404800.0,
+            "90": 787446784.0,
+            "91": 787446272.0,
+            "92": 787446784.0,
+            "93": 787430400.0,
+            "94": 787440128.0,
+            "95": 787450368.0,
+            "96": 787454976.0,
+            "97": 787427328.0,
+            "98": 787475968.0,
+            "99": 787419136.0,
+            "100": 787438592.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2579673088.0,
+            "2": 2590714880.0,
+            "3": 2590714880.0,
+            "4": 2590714880.0,
+            "5": 2596039680.0,
+            "6": 2596039680.0,
+            "7": 2596039680.0,
+            "8": 2596039680.0,
+            "9": 2596039680.0,
+            "10": 2596039680.0,
+            "11": 2596039680.0,
+            "12": 2596039680.0,
+            "13": 2596039680.0,
+            "14": 2596039680.0,
+            "15": 2596039680.0,
+            "16": 2596039680.0,
+            "17": 2596039680.0,
+            "18": 2596039680.0,
+            "19": 2596039680.0,
+            "20": 2596039680.0,
+            "21": 2596039680.0,
+            "22": 2596039680.0,
+            "23": 2596039680.0,
+            "24": 2596039680.0,
+            "25": 2596039680.0,
+            "26": 2596039680.0,
+            "27": 2596039680.0,
+            "28": 2596039680.0,
+            "29": 2596039680.0,
+            "30": 2596039680.0,
+            "31": 2596039680.0,
+            "32": 2596039680.0,
+            "33": 2596039680.0,
+            "34": 2596039680.0,
+            "35": 2596039680.0,
+            "36": 2596039680.0,
+            "37": 2596039680.0,
+            "38": 2596039680.0,
+            "39": 2596039680.0,
+            "40": 2596039680.0,
+            "41": 2596039680.0,
+            "42": 2596039680.0,
+            "43": 2596039680.0,
+            "44": 2596039680.0,
+            "45": 2596039680.0,
+            "46": 2596039680.0,
+            "47": 2596039680.0,
+            "48": 2596039680.0,
+            "49": 2596039680.0,
+            "50": 2596039680.0,
+            "51": 2596039680.0,
+            "52": 2596039680.0,
+            "53": 2596039680.0,
+            "54": 2596039680.0,
+            "55": 2596039680.0,
+            "56": 2596039680.0,
+            "57": 2596039680.0,
+            "58": 2596039680.0,
+            "59": 2596039680.0,
+            "60": 2596039680.0,
+            "61": 2596039680.0,
+            "62": 2596039680.0,
+            "63": 2596039680.0,
+            "64": 2596039680.0,
+            "65": 2596039680.0,
+            "66": 2596039680.0,
+            "67": 2596039680.0,
+            "68": 2596039680.0,
+            "69": 2596039680.0,
+            "70": 2596039680.0,
+            "71": 2596039680.0,
+            "72": 2596039680.0,
+            "73": 2596039680.0,
+            "74": 2596039680.0,
+            "75": 2596039680.0,
+            "76": 2596039680.0,
+            "77": 2596039680.0,
+            "78": 2596039680.0,
+            "79": 2596039680.0,
+            "80": 2596039680.0,
+            "81": 2596039680.0,
+            "82": 2596039680.0,
+            "83": 2596039680.0,
+            "84": 2596039680.0,
+            "85": 2596039680.0,
+            "86": 2596039680.0,
+            "87": 2596039680.0,
+            "88": 2596039680.0,
+            "89": 2596039680.0,
+            "90": 2596039680.0,
+            "91": 2596039680.0,
+            "92": 2596039680.0,
+            "93": 2596039680.0,
+            "94": 2596039680.0,
+            "95": 2596039680.0,
+            "96": 2596039680.0,
+            "97": 2596039680.0,
+            "98": 2596039680.0,
+            "99": 2596039680.0,
+            "100": 2596039680.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 11.07685,
+            "2": 0.45645,
+            "3": 0.41285,
+            "4": 0.40148,
+            "5": 0.35405,
+            "6": 0.35535,
+            "7": 0.35437,
+            "8": 0.32989,
+            "9": 0.32686,
+            "10": 0.32734,
+            "11": 0.32243,
+            "12": 0.32634,
+            "13": 0.33475,
+            "14": 0.33636,
+            "15": 0.33838,
+            "16": 0.32741,
+            "17": 0.33364,
+            "18": 0.33147,
+            "19": 0.33328,
+            "20": 0.33281,
+            "21": 0.33587,
+            "22": 0.3271,
+            "23": 0.33537,
+            "24": 0.32125,
+            "25": 0.33225,
+            "26": 0.33085,
+            "27": 0.3387,
+            "28": 0.34305,
+            "29": 0.34938,
+            "30": 0.34814,
+            "31": 0.35223,
+            "32": 0.36489,
+            "33": 0.33408,
+            "34": 0.34688,
+            "35": 0.33945,
+            "36": 0.34851,
+            "37": 0.3471,
+            "38": 0.3338,
+            "39": 0.3395,
+            "40": 0.3414,
+            "41": 0.34662,
+            "42": 0.34093,
+            "43": 0.34012,
+            "44": 0.34423,
+            "45": 0.34205,
+            "46": 0.34681,
+            "47": 0.33694,
+            "48": 0.34136,
+            "49": 0.34255,
+            "50": 0.34412,
+            "51": 0.32987,
+            "52": 0.34834,
+            "53": 0.34028,
+            "54": 0.33718,
+            "55": 0.33563,
+            "56": 0.3372,
+            "57": 0.33927,
+            "58": 0.34337,
+            "59": 0.34056,
+            "60": 0.34048,
+            "61": 0.33816,
+            "62": 0.3357,
+            "63": 0.3365,
+            "64": 0.33906,
+            "65": 0.34134,
+            "66": 0.34125,
+            "67": 0.33859,
+            "68": 0.34726,
+            "69": 0.3385,
+            "70": 0.34428,
+            "71": 0.34339,
+            "72": 0.33789,
+            "73": 0.33975,
+            "74": 0.34759,
+            "75": 0.33612,
+            "76": 0.33913,
+            "77": 0.34664,
+            "78": 0.33673,
+            "79": 0.33903,
+            "80": 0.33519,
+            "81": 0.33434,
+            "82": 0.34003,
+            "83": 0.33784,
+            "84": 0.33367,
+            "85": 0.33382,
+            "86": 0.34029,
+            "87": 0.33537,
+            "88": 0.33703,
+            "89": 0.33416,
+            "90": 0.33113,
+            "91": 0.33369,
+            "92": 0.33443,
+            "93": 0.33841,
+            "94": 0.339,
+            "95": 0.33271,
+            "96": 0.33211,
+            "97": 0.33492,
+            "98": 0.33877,
+            "99": 0.33548,
+            "100": 0.33195
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_dgxh100_eos.json
index 537e20b09d8..eca2cabacaf 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_dgxh100_eos.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_dgxh100_eos.json
@@ -4,56 +4,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 11.04266,
-            "2": 11.02309,
-            "3": 9.43552,
-            "4": 10.04614,
-            "5": 9.38535,
-            "6": 9.14543,
-            "7": 9.21141,
-            "8": 8.63458,
-            "9": 8.48937,
-            "10": 8.82763,
-            "11": 8.29457,
-            "12": 8.3282,
-            "13": 8.23008,
-            "14": 7.71714,
-            "15": 7.86981,
-            "16": 7.92286,
-            "17": 7.8604,
-            "18": 7.62039,
-            "19": 7.98493,
-            "20": 7.72023,
-            "21": 7.39758,
-            "22": 7.39771,
-            "23": 7.28314,
-            "24": 7.25048,
-            "25": 7.53113,
-            "26": 6.95329,
-            "27": 7.49432,
-            "28": 7.20394,
-            "29": 7.37282,
-            "30": 7.50232,
-            "31": 7.25348,
-            "32": 7.4305,
-            "33": 7.48364,
-            "34": 7.53486,
-            "35": 7.10336,
-            "36": 6.94516,
-            "37": 7.26117,
-            "38": 7.07009,
-            "39": 7.40543,
-            "40": 7.42044,
-            "41": 7.34202,
-            "42": 7.11816,
-            "43": 7.11373,
-            "44": 7.27067,
-            "45": 7.07036,
-            "46": 6.77823,
-            "47": 7.1875,
-            "48": 6.99998,
-            "49": 7.45868,
-            "50": 6.90956
+            "1": 11.04276,
+            "2": 11.02298,
+            "3": 9.43542,
+            "4": 10.04672,
+            "5": 9.38572,
+            "6": 9.14547,
+            "7": 9.21155,
+            "8": 8.63445,
+            "9": 8.48944,
+            "10": 8.82764,
+            "11": 8.29479,
+            "12": 8.32819,
+            "13": 8.23003,
+            "14": 7.71724,
+            "15": 7.86963,
+            "16": 7.9228,
+            "17": 7.86049,
+            "18": 7.62035,
+            "19": 7.9851,
+            "20": 7.72027,
+            "21": 7.39754,
+            "22": 7.39767,
+            "23": 7.28334,
+            "24": 7.25057,
+            "25": 7.53131,
+            "26": 6.95335,
+            "27": 7.49421,
+            "28": 7.20415,
+            "29": 7.373,
+            "30": 7.50279,
+            "31": 7.25342,
+            "32": 7.43069,
+            "33": 7.48385,
+            "34": 7.53476,
+            "35": 7.10325,
+            "36": 6.94471,
+            "37": 7.26141,
+            "38": 7.07026,
+            "39": 7.40536,
+            "40": 7.42025,
+            "41": 7.34194,
+            "42": 7.11724,
+            "43": 7.11421,
+            "44": 7.27077,
+            "45": 7.0701,
+            "46": 6.77811,
+            "47": 7.18895,
+            "48": 7.00013,
+            "49": 7.45875,
+            "50": 6.90988
         }
     },
     "num-zeros": {
@@ -62,55 +62,55 @@
         "step_interval": 1,
         "values": {
             "1": 844114112.0,
-            "2": 843855104.0,
+            "2": 843855296.0,
             "3": 844048640.0,
-            "4": 842998144.0,
+            "4": 842998208.0,
             "5": 855786112.0,
-            "6": 874329728.0,
-            "7": 925591552.0,
-            "8": 915644608.0,
-            "9": 935187584.0,
-            "10": 927702400.0,
-            "11": 957888256.0,
-            "12": 923872512.0,
-            "13": 969427072.0,
+            "6": 878524160.0,
+            "7": 924542976.0,
+            "8": 917741504.0,
+            "9": 932042112.0,
+            "10": 930847360.0,
+            "11": 954742400.0,
+            "12": 922824128.0,
+            "13": 968378816.0,
             "14": 965228416.0,
-            "15": 952825344.0,
-            "16": 943777088.0,
-            "17": 928845824.0,
-            "18": 925913856.0,
-            "19": 955339136.0,
-            "20": 989208256.0,
-            "21": 924095424.0,
-            "22": 908902272.0,
-            "23": 892664576.0,
-            "24": 900830400.0,
-            "25": 928105472.0,
-            "26": 877724352.0,
-            "27": 912808320.0,
-            "28": 904557696.0,
-            "29": 872625088.0,
-            "30": 864767104.0,
-            "31": 868220416.0,
-            "32": 861931136.0,
-            "33": 859941312.0,
+            "15": 951776640.0,
+            "16": 941679424.0,
+            "17": 929894336.0,
+            "18": 928011136.0,
+            "19": 955339264.0,
+            "20": 987111232.0,
+            "21": 924095488.0,
+            "22": 906805504.0,
+            "23": 895810432.0,
+            "24": 902927680.0,
+            "25": 927056960.0,
+            "26": 879821440.0,
+            "27": 911759744.0,
+            "28": 902460416.0,
+            "29": 872625216.0,
+            "30": 865815744.0,
+            "31": 868220352.0,
+            "32": 865076800.0,
+            "33": 864135552.0,
             "34": 855839104.0,
-            "35": 854046848.0,
-            "36": 852944896.0,
-            "37": 851456704.0,
-            "38": 849532096.0,
+            "35": 854046784.0,
+            "36": 855042176.0,
+            "37": 850408192.0,
+            "38": 850580480.0,
             "39": 849972608.0,
             "40": 849505792.0,
-            "41": 845780288.0,
-            "42": 846003328.0,
-            "43": 846257472.0,
-            "44": 852034880.0,
-            "45": 847187456.0,
+            "41": 845780352.0,
+            "42": 846003392.0,
+            "43": 848354688.0,
+            "44": 850986496.0,
+            "45": 848236160.0,
             "46": 855625856.0,
-            "47": 844661952.0,
-            "48": 851197248.0,
+            "47": 843613312.0,
+            "48": 851197312.0,
             "49": 851630464.0,
-            "50": 846195904.0
+            "50": 846195968.0
         }
     },
     "mem-allocated-bytes": {
@@ -176,55 +176,55 @@
         "step_interval": 1,
         "values": {
             "1": 37959917568.0,
-            "2": 39578677248.0,
-            "3": 39580196864.0,
-            "4": 39580196864.0,
-            "5": 39583309824.0,
-            "6": 39583309824.0,
-            "7": 39583309824.0,
-            "8": 39583309824.0,
-            "9": 39583309824.0,
-            "10": 39583309824.0,
-            "11": 39583309824.0,
-            "12": 39583309824.0,
-            "13": 39583309824.0,
-            "14": 39583309824.0,
-            "15": 39583309824.0,
-            "16": 39583309824.0,
-            "17": 39583309824.0,
-            "18": 39583309824.0,
-            "19": 39583309824.0,
-            "20": 39583309824.0,
-            "21": 39583309824.0,
-            "22": 39583309824.0,
-            "23": 39583309824.0,
-            "24": 39583309824.0,
-            "25": 39583309824.0,
-            "26": 39583309824.0,
-            "27": 39583309824.0,
-            "28": 39583309824.0,
-            "29": 39583309824.0,
-            "30": 39583309824.0,
-            "31": 39583309824.0,
-            "32": 39583309824.0,
-            "33": 39583309824.0,
-            "34": 39583309824.0,
-            "35": 39583309824.0,
-            "36": 39583309824.0,
-            "37": 39583309824.0,
-            "38": 39583309824.0,
-            "39": 39583309824.0,
-            "40": 39583309824.0,
-            "41": 39583309824.0,
-            "42": 39583309824.0,
-            "43": 39583309824.0,
-            "44": 39583309824.0,
-            "45": 39583309824.0,
-            "46": 39583309824.0,
-            "47": 39583309824.0,
-            "48": 39583309824.0,
-            "49": 39583309824.0,
-            "50": 39583309824.0
+            "2": 39578673152.0,
+            "3": 39580192768.0,
+            "4": 39580192768.0,
+            "5": 39583301632.0,
+            "6": 39583301632.0,
+            "7": 39583301632.0,
+            "8": 39583301632.0,
+            "9": 39583301632.0,
+            "10": 39583301632.0,
+            "11": 39583301632.0,
+            "12": 39583301632.0,
+            "13": 39583301632.0,
+            "14": 39583301632.0,
+            "15": 39583301632.0,
+            "16": 39583301632.0,
+            "17": 39583301632.0,
+            "18": 39583301632.0,
+            "19": 39583301632.0,
+            "20": 39583301632.0,
+            "21": 39583301632.0,
+            "22": 39583301632.0,
+            "23": 39583301632.0,
+            "24": 39583301632.0,
+            "25": 39583301632.0,
+            "26": 39583301632.0,
+            "27": 39583301632.0,
+            "28": 39583301632.0,
+            "29": 39583301632.0,
+            "30": 39583301632.0,
+            "31": 39583301632.0,
+            "32": 39583301632.0,
+            "33": 39583301632.0,
+            "34": 39583301632.0,
+            "35": 39583301632.0,
+            "36": 39583301632.0,
+            "37": 39583301632.0,
+            "38": 39583301632.0,
+            "39": 39583301632.0,
+            "40": 39583301632.0,
+            "41": 39583301632.0,
+            "42": 39583301632.0,
+            "43": 39583301632.0,
+            "44": 39583301632.0,
+            "45": 39583301632.0,
+            "46": 39583301632.0,
+            "47": 39583301632.0,
+            "48": 39583301632.0,
+            "49": 39583301632.0,
+            "50": 39583301632.0
         }
     },
     "iteration-time": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 67.13422,
-            "2": 1.95457,
-            "3": 3.25371,
-            "4": 2.66673,
-            "5": 3.05794,
-            "6": 1.35128,
-            "7": 1.66174,
-            "8": 2.19011,
-            "9": 1.16207,
-            "10": 1.16456,
-            "11": 1.26279,
-            "12": 1.60263,
-            "13": 1.29219,
-            "14": 2.93489,
-            "15": 1.48729,
-            "16": 1.15146,
-            "17": 1.27648,
-            "18": 1.39906,
-            "19": 1.13846,
-            "20": 1.14415,
-            "21": 1.27567,
-            "22": 1.26287,
-            "23": 1.11223,
-            "24": 1.10986,
-            "25": 1.20096,
-            "26": 1.13382,
-            "27": 1.11305,
-            "28": 1.11424,
-            "29": 1.22341,
-            "30": 1.08856,
-            "31": 1.15539,
-            "32": 1.10684,
-            "33": 1.11399,
-            "34": 1.09048,
-            "35": 1.1509,
-            "36": 1.09151,
-            "37": 1.13904,
-            "38": 1.06658,
-            "39": 1.1325,
-            "40": 1.14715,
-            "41": 1.07533,
-            "42": 1.08243,
-            "43": 1.13881,
-            "44": 1.14004,
-            "45": 1.06323,
-            "46": 1.06103,
-            "47": 1.11785,
-            "48": 1.04242,
-            "49": 1.13933,
-            "50": 1.0407
+            "1": 89.14162,
+            "2": 2.00665,
+            "3": 3.2832,
+            "4": 2.63833,
+            "5": 2.43073,
+            "6": 1.4868,
+            "7": 1.81732,
+            "8": 2.74562,
+            "9": 1.18286,
+            "10": 1.18542,
+            "11": 1.27273,
+            "12": 1.63885,
+            "13": 1.31323,
+            "14": 2.29007,
+            "15": 1.52021,
+            "16": 1.87975,
+            "17": 1.3507,
+            "18": 1.48627,
+            "19": 1.17842,
+            "20": 1.17004,
+            "21": 1.30369,
+            "22": 1.24781,
+            "23": 1.13565,
+            "24": 1.13418,
+            "25": 1.21915,
+            "26": 1.24288,
+            "27": 1.15052,
+            "28": 1.12573,
+            "29": 1.15398,
+            "30": 1.13143,
+            "31": 1.17104,
+            "32": 1.12919,
+            "33": 1.1286,
+            "34": 1.14327,
+            "35": 1.1721,
+            "36": 1.12494,
+            "37": 1.2626,
+            "38": 1.11425,
+            "39": 1.14594,
+            "40": 1.18189,
+            "41": 1.09297,
+            "42": 1.09247,
+            "43": 1.18621,
+            "44": 1.19564,
+            "45": 1.08252,
+            "46": 1.08511,
+            "47": 1.23319,
+            "48": 1.08249,
+            "49": 1.0979,
+            "50": 1.07182
         }
     }
 }
\ No newline at end of file

From 402bc50b1c2693dbde1fdc6c45416e37e1692f85 Mon Sep 17 00:00:00 2001
From: Santosh Bhavani <santosh.bhavani@live.com>
Date: Thu, 30 Oct 2025 23:37:28 -0700
Subject: [PATCH 089/334] Add DeepSeek-V3 GB200 NVL72 optimization guide
 (#2059)

Co-authored-by: Xin Yao <xiny@nvidia.com>
---
 docs/discussions/README.md                    |  22 ++
 .../deepseek-v3-gb200-optimization.md         | 252 ++++++++++++++++++
 .../images/image1.png                         | Bin 0 -> 325505 bytes
 .../images/image2.png                         | Bin 0 -> 205208 bytes
 .../images/image3.png                         | Bin 0 -> 98729 bytes
 .../images/image4.png                         | Bin 0 -> 191466 bytes
 .../images/image5.png                         | Bin 0 -> 330297 bytes
 .../images/image6.png                         | Bin 0 -> 203011 bytes
 .../images/image7.png                         | Bin 0 -> 209740 bytes
 9 files changed, 274 insertions(+)
 create mode 100644 docs/discussions/README.md
 create mode 100644 docs/discussions/deepseek-v3-gb200-optimization/deepseek-v3-gb200-optimization.md
 create mode 100644 docs/discussions/deepseek-v3-gb200-optimization/images/image1.png
 create mode 100644 docs/discussions/deepseek-v3-gb200-optimization/images/image2.png
 create mode 100644 docs/discussions/deepseek-v3-gb200-optimization/images/image3.png
 create mode 100644 docs/discussions/deepseek-v3-gb200-optimization/images/image4.png
 create mode 100644 docs/discussions/deepseek-v3-gb200-optimization/images/image5.png
 create mode 100644 docs/discussions/deepseek-v3-gb200-optimization/images/image6.png
 create mode 100644 docs/discussions/deepseek-v3-gb200-optimization/images/image7.png

diff --git a/docs/discussions/README.md b/docs/discussions/README.md
new file mode 100644
index 00000000000..5dc19181842
--- /dev/null
+++ b/docs/discussions/README.md
@@ -0,0 +1,22 @@
+# Megatron Discussions
+
+This directory contains in-depth guides, tutorials, and discussions about optimizing and using Megatron for various use cases.
+
+## Available Guides
+
+### Performance Optimization
+
+- **[Optimizing DeepSeek-V3 Training Performance on NVIDIA GB200 NVL72](deepseek-v3-gb200-optimization/deepseek-v3-gb200-optimization.md)**
+  
+  A comprehensive guide on optimizing DeepSeek-V3 model training on NVIDIA GB200 NVL72 systems, covering profiling techniques, performance bottlenecks, and optimization strategies.
+
+## Contributing
+
+If you'd like to contribute a guide or tutorial, please follow this structure:
+
+1. Create a new directory: `docs/discussions/your-guide-name/`
+2. Add your main guide: `docs/discussions/your-guide-name/your-guide-name.md`
+3. Create an images directory: `docs/discussions/your-guide-name/images/`
+4. Update this README.md with a link to your guide
+
+Each guide should be self-contained with its own images and supporting files.
diff --git a/docs/discussions/deepseek-v3-gb200-optimization/deepseek-v3-gb200-optimization.md b/docs/discussions/deepseek-v3-gb200-optimization/deepseek-v3-gb200-optimization.md
new file mode 100644
index 00000000000..e3573fa76ba
--- /dev/null
+++ b/docs/discussions/deepseek-v3-gb200-optimization/deepseek-v3-gb200-optimization.md
@@ -0,0 +1,252 @@
+# **Optimizing DeepSeek-V3 Training Performance on NVIDIA GB200 NVL72**
+
+**Authors:** Xin Yao (@yaox12), Hongxiao Bai (@hxbai), Yaobin Zhang (@buptzyb), Tong Liu (@Autumn1998), Fan Yu (@HWZealot), Kunlun Li (@kunlunl), Zhongbo Zhu (@zhongbozhu), Zijie Yan (@yanring)
+
+---
+
+This guide describes how we used Megatron Core (MCore) and Transformer Engine (TE) to pre-train the DeepSeek-V3 model with MXFP8 precision on 256 GB200 GPUs. We will detail the step-by-step process of optimizing performance to **970 TFLOPS/GPU**, which is a **2.55x** speedup compared to the estimated 380 TFLOPS on H100/H800 (refer to the estimation in this article \[[1](https://zhuanlan.zhihu.com/p/16480858047)\] in Chinese). The related features have been or will be open-sourced to the [Megatron Core](https://github.com/NVIDIA/Megatron-LM) and [Transformer Engine](https://github.com/NVIDIA/TransformerEngine) repositories.
+
+## **0. Methodology**
+
+To optimize the pre-training performance of a model, our methodology is generally as follows:
+
+1. Find a performance baseline. This baseline is usually the best performance that the current software stack can achieve on a given hardware platform and training precision by adjusting model parallelism, recomputation, and other configurations.  
+2. Use performance analysis tools such as [Nsight Systems](https://developer.nvidia.com/nsight-systems) (Nsys) or [PyTorch Profiler](https://docs.pytorch.org/tutorials/recipes/recipes/profiler_recipe.html) to capture a profile file (also called a timeline or trace) and analyze it to find performance bottlenecks. For example, are there significant exposed communications, kernels with a significantly high proportion, or whether the GPU kernel layout is dense? We usually prefer to use Nsys because, with the help of NVTX, it provides a clearer display of CUDA API and GPU kernel execution.   
+3. Optimize for performance bottlenecks. Then repeat steps 1-3 until the performance expectations are met.
+
+## **1. Baseline**
+
+DeepSeek-V3 innovatively uses FP8 mixed precision for pre-training, which saves memory and improves training speed without sacrificing model accuracy. We refer to the FP8 recipe used by DeepSeek-V3, where activations are quantized at a 1x128 granularity and weights are quantized at a 128x128 granularity, as the blockwise scaling recipe. MCore (v0.13+) and TE (v2.3+) also support it.
+
+On the Blackwell platform, thanks to the native support of the fifth-generation Tensor Core for the MXFP8 format, we adopted the MXFP8 recipe, a more fine-grained quantization scheme for training. Both activations and weights are quantized at a 1x32 granularity, and E8M0 is used as the format for the scaling factor.
+
+Here, we will briefly introduce the difference in implementation between MXFP8 GEMM on the Blackwell platform and Blockwise FP8 GEMM on the Hopper platform. On the Hopper platform, since the Tensor Core itself does not support multiplication with a scale, after the matrix multiplication of each tile, it is necessary to multiply by the scale and accumulate the result with the CUDA Core. This also determines that on the Hopper platform, 1x128 is almost the finest quantization granularity available. If a finer granularity was used for quantization, the GEMM performance would suffer a great loss. On the other hand, since the Blackwell platform natively supports MXFP8, the dequantization process in GEMM (i.e., multiplying by the scale) is completed inside the Tensor Core, so the CUDA Core is not involved throughout the process, which can achieve better performance and support finer-grained quantization (1x32).
+
+When we started optimizing DeepSeek-V3 on the GB200 NVL72 platform with MCore, our baseline already included the following features:
+
+1. **MXFP8 recipe**, where the fprop/wgrad/dgrad inputs of all linear layers in the model are quantized at a 1x32 granularity, while Scaled Dot Product Attention (SDPA)/Embedding/LM Head/Router/Loss/Optimizer, etc., remain at their original high precision. For details on the FP8 recipe, please refer to our presentation at the NVIDIA AI Open Day in June 2025 (Video \[[2](https://www.bilibili.com/video/BV1mpMwz9Ey5/)\] in Chinese) and GTC 2025 (Video \[[3](https://www.nvidia.com/en-us/on-demand/session/gtc25-s72778/)\] in English). The option to enable this in MCore is `--fp8-recipe mxfp8 --fp8-format e4m3`.  
+2. **Multi-head Latent Attention (MLA) kernels** on the Blackwell platform, provided by cuDNN 9.11.  
+3. **MXFP8 Grouped GEMM**, implemented using multi-stream \+ cuBLAS. The advantage of this implementation is that we can support various quantization schemes at the fastest speed: as long as the single GEMM is ready, we can have a Grouped GEMM implementation with good performance. Our multi-stream \+ cuBLAS solution can achieve 2,672 TFLOP/s (flush L2) on the shape K=7,168, N=2,048, which is basically equivalent to a highly optimized Grouped GEMM \[[4](https://cursor.com/cn/blog/kernels)\]. We will continue to optimize the performance of Grouped GEMM. The option to enable this in MCore is `--moe-grouped-gemm`.  
+4. **Kernel fusions**, such as:  
+   1. Yarn RoPE fusion, enabled by default.  
+   2. Permute fusion, the option to enable this in MCore is `--moe-permute-fusion`.  
+   3. Cross-entropy loss fusion, the option to enable this in MCore is `--cross-entropy-loss-fusion`.  
+5. **Flexible Pipeline Parallelism (PP) layout**, making PP more balanced. The corresponding option in MCore is `--pipeline-model-parallel-layout [layout]`.  
+6. **Primary weights in FP8**. FP8 mixed-precision training supports two weight schemes:  
+   1. Dual precision weights (default): Maintains both BF16 and FP8 weight copies. Simple implementation but uses more memory than BF16 training alone.  
+   2. FP8 only weights: Stores only FP8 weights, saving memory and enables FP8 AllGather of the updated parameters for per-tensor and blockwise FP8 recipes when using Distributed Optimizer (ZeRO-1). Complex implementation requiring recipe-specific handling. The option to enable this in MCore is `--fp8-param-gather`.  
+7. **BF16 optimizer states**. According to the technical report, DeepSeek-v3 uses BF16 for optimizer states. This feature is orthogonal to the training precision, and it can be used for both BF16 and FP8 training. The options to enable this in MCore are `--use-precision-aware-optimizer --main-grads-dtype fp32 --main-params-dtype fp32 --exp-avg-dtype bf16 --exp-avg-sq-dtype bf16`.  
+8. **Fine-grained recompute**. By recomputing some modules with smaller computational workload but larger memory occupation, a large amount of memory is saved at a small recomputation cost, thereby minimizing model parallel sizes. In our baseline version, fine-grained recompute only supports BF16, and FP8 training is currently not supported. The options to enable this in MCore are `--recompute-granularity selective --recompute-modules [modules]`.  
+9. **Token dispatcher** supports both NCCL AlltoAll and DeepEP backends. However, at the time we tested the baseline performance, DeepEP did not support the Multi-Node NVLink (MNNVL) of GB200, so we could only use the NCCL AlltoAll backend. The option to use the AlltoAll dispatcher in MCore is `--moe-token-dispatcher-type alltoall`.
+
+On the above software stack, using the parallel configuration of TP1/PP8/VPP4/EP32/MBS1/GBS2048 on 256 GB200s, enabling recomputation of the MLP part of the dense layers (i.e., the first three layers of DeepSeek-v3) and the MLA up projection (`--recompute-modules mlp up_proj`), with the PP layout as `--pipeline-model-parallel-layout Et|(tt|)*30L` (a total of 32 stages, where the first stage is Embedding \+ 1 transformer layer, the last stage is Loss, and the middle 30 stages are 2 transformer layers), using the AlltoAll token dispatcher (NCCL backend), and enabling BF16 optimizer states, we achieved a performance of 494 TFLOPS/GPU. This performance is obviously not satisfactory, and we will optimize it from several aspects.
+
+## **2. Performance Optimization**
+
+By capturing and analyzing the Nsys timeline corresponding to the baseline, taking a forward iteration as an example, we can see that the biggest performance issue is that there are large gaps between kernels, and the CPU kernel launch speed cannot keep up with the kernel execution speed on GPU. We call this phenomenon *CPU overhead* or *host boundedness*. This overhead mainly comes from Python code (such as loops, `getattr`, etc.), PyTorch's Python and C++ logic code (for example, a simple `torch.empty` will not call any CUDA kernel, but it will generate a few microseconds of overhead on the host side), CUDA kernel launch, etc. The reason for this phenomenon is that, on the one hand, the speed of GPU executing kernels is getting faster and faster, resulting in not enough time to overlap the CPU execution time. On the other hand, FP8 training and fine-grained MoE models introduce more quantization, router, and other kernels. The main idea to solve CPU overhead is to reduce the number of kernels through kernel fusion and use CUDA Graphs for graph launch to bypass repeated work on the CPU side.
+
+![images/image1.png](images/image1.png)
+
+In addition to CPU overhead, we can also see several other obvious problems:
+
+* The length of the Permute kernel is clearly abnormal, suggesting that this kernel needs to be optimized.  
+* Before the GEMM in the Expert part, there are a large number of small, fragmented kernels. This is obviously abnormal, and we need to locate what these kernels are doing and whether they can be eliminated or fused.
+* The NCCL-based token dispatcher, which requires explicit global token permutation, is not optimal.
+* The overhead of recomputing MLA up projection is not as small as expected due to the CPU overhead.
+
+Therefore, our optimization plan is roughly as follows:
+
+1. Kernel fusion and optimization  
+2. Memory saving to allow more optimizations  
+3. CUDA Graphs to resolve CPU-side overhead  
+4. CPU-side optimizations  
+5. HybridEP: An Expert Parallel (EP) communication library developed based on a new set of API, with functions similar to DeepEP, but able to achieve higher bandwidth with fewer SMs, and fully supporting MNNVL.
+
+### **2.1 Kernel Fusion and Optimization**
+
+#### **2.1.1 Optimizing the Permute Kernel**
+
+The permute operation in the MoE model rearranges tokens in memory for communication and computation. The AlltoAll dispatcher using the NCCL backend requires one global and one local permute before and after EP communication, respectively. The Flex Dispatcher of DeepEP or HybridEP fuses the global permute into the communication kernel, eliminating the need to explicitly copy the tokens top-k times, but still requires a permute kernel to copy and rearrange the tokens distributed to different local experts after EP communication. TE [PR 1927](https://github.com/NVIDIA/TransformerEngine/pull/1927) significantly improves performance when top-k is much smaller than the number of experts (e.g., DeepSeek-v3's 256 experts with top-8), with up to a 10x unit speedup. The option to enable this in MCore is `--moe-permute-fusion`, and we recommend setting `--enable-experimental` for more aggressive fusions.
+
+#### **2.1.2 Fused Memory Allocation for the MXFP8 Quantization**
+
+By comparing the code and the Nsys GPU trace timeline, we found that there are mainly two types of fragmented kernels in the Expert part: `torch.zeros` kernels that allocate the scaling factor for MXFP8, and the kernels that swizzle the MXFP8 scaling factors. The reason for using `torch.zeros` instead of `torch.empty` to allocate memory for the scaling factor is that the Tensor Core requires the scaling factor to be padded to a specific shape, with the padded part filled with 0. In optimization 2.1.3, we fuse the zero-padding to the swizzle scaling factor kernel to avoid `torch.zeros` kernels.
+
+When performing MXFP8 quantization for each tensor, four tensors need to be allocated, namely {row-wise, col-wise} * {data, scaling factor}. As mentioned earlier, even when using `torch.empty` to allocate memory, each PyTorch API call introduces several microseconds of overhead, resulting in significant CPU overhead. Our solution here is to pre-allocate a large memory buffer for data and scaling factors, and then construct tensors from this buffer using the `aten::from_blob` API by calculating pointer offsets, thus avoiding a large number of tiny `torch.empty/zeros`. For the specific implementation, please refer to TE PR [1793](https://github.com/NVIDIA/TransformerEngine/pull/1793), [1934](https://github.com/NVIDIA/TransformerEngine/pull/1934), and [2134](https://github.com/NVIDIA/TransformerEngine/pull/2134). This optimization replaces the previous implementation and is enabled by default.
+
+#### **2.1.3 Fused Multiple Swizzle Scaling Factor Kernels**
+
+As mentioned earlier, the second type of fragmented kernels in the Expert part is swizzling the scaling factor. This is because the Tensor Core requires the scaling factors to be swizzled according to certain rules (refer to the [cuBLAS documentation](https://docs.nvidia.com/cuda/cublas/#d-block-scaling-factors-layout)). We fused the swizzle operations of the scaling factors of multiple input tensors into a single kernel, and handled padding with 0 in it. This completely eliminates the `torch.zeros` kernel when allocating the buffer mentioned above, reduces the number of kernels, and alleviates CPU overhead. For the specific implementation, please refer to TE [PR 2019](https://github.com/NVIDIA/TransformerEngine/pull/2019). This optimization replaces the previous implementation and is enabled by default.
+
+In addition, theoretically, we can fuse the swizzle scaling factor into the quantization kernel. The main reason we haven't done so yet is to consider that when MXFP8 data needs to be communicated, such as in TP and EP Dispatch (which are not yet supported), un-swizzled scaling factors are more convenient for communication. Of course, the ideal situation is to make the quantization kernel configurable, so that it does not perform swizzling where communication is needed, and performs swizzling otherwise, thus avoiding redundant operations.
+
+#### **2.1.4 Kernel Fusion in the Router Part**
+
+The Router part contains a large number of element-wise operators, mainly for calculating the routing map, i.e., which experts the tokens should be assigned to, and for calculating and counting the aux loss. We fused some of these kernels, reducing the total number of kernels in the router part from 72 to 31. For the specific implementation, please refer to TE [PR 1883](https://github.com/NVIDIA/TransformerEngine/pull/1883). The option to enable this in MCore is `--moe-router-fusion`. 
+
+The reason why it cannot be completely fused is that the remaining kernels are separated by communication kernels of global auxiliary losses calculation, which are not easy to fuse. There are also many kernels scattered in different Python logic codes. If they are forcibly fused, it will mess up the code structure of Python. Moreover, we will apply CUDA Graphs for the router part later, which can already solve the CPU overhead problem well, so there is little performance gain from fusing the remaining kernels.
+
+#### **2.1.5 Quantization Fused to Normalization**
+
+cuDNN supports fusing MXFP8 quantization into normalization, including layer norm and RMS norm. To enable this feature, we suggest using cuDNN 9.14 or later and set the following environment variables.
+
+```shell
+NVTE_NORM_FWD_USE_CUDNN=1
+NVTE_NORM_BWD_USE_CUDNN=1
+```
+
+Under the same parallel configuration, we measured that optimizations 2.1.1 and 2.1.2 improved the end-to-end (E2E) performance by 35 TFLOPS, optimization 2.1.3 improved it by 35.5 TFLOPS, optimization 2.1.4 improved it by 10.5 TFLOPS, and optimization 2.1.5 improved it by 13.8 TFLOPS. The Nsys timeline with optimizations 2.1.1, 2.1.2, and 2.1.4 enabled is as follows (the reason for not including 2.1.3 nor 2.1.5 is that they were done later, and at that time the timeline had already been superimposed with other optimizations, so it could not be directly compared):
+
+![images/image2.png](images/image2.png)
+
+Although it still doesn't look very satisfactory, it has improved.
+
+### **2.2 Memory Saving to Allow More Optimizations**
+
+#### **2.2.1 DeepEP**
+
+Theoretically, on the GB200 NVL72 system, all EP communication is within the NVLink domain. Thanks to the bidirectional 1.8 TB/s bandwidth of MNNVL on the GB200, EP communication will be greatly accelerated. However, DeepEP still does not officially support scenarios where the NVLink domain is larger than 8. We have supported the EP32 scenario based on [this community PR](https://github.com/deepseek-ai/DeepEP/pull/218). But this support is not well-optimized. In the EP32 scenario, the dispatch can only reach about 400 GB/s and the combine can only reach about 190 GB/s algorithm bandwidth with 24 SMs, which is a large gap from the unidirectional bandwidth of 900 GB/s for MNNVL on the GB200 NVL72. Therefore, after switching to DeepEP, we did not get the communication benefits, but got some memory-saving benefits (DeepEP does not need explicit global permute, so it reduces the peak memory consumption), and reduced CPU overhead (DeepEP uses a fused kernel for the EP communication preprocess, further reducing the number of kernels in the router and preprocess parts to 17), so we put DeepEP in the memory optimization part.
+
+The options to enable DeepEP in MCore are:
+
+```shell
+--moe-token-dispatcher-type flex
+--moe-flex-dispatcher-backend deepep
+```
+
+#### **2.2.2 Fine-grained Recompute for FP8**
+
+The conventional recomputation method recomputes multiple modules to save all intermediate activations of a Transformer layer, but recomputing a single module does not take effect. We want to do more fine-grained recomputation, that is, recomputing some modules within a Transformer layer with low computational intensity but high memory consumption, to save more memory at a lower performance cost. Therefore, we implemented the [output discarding recompute](https://github.com/NVIDIA/Megatron-LM/blob/e000263e21ac89571123303c4043ec9ea7261513/megatron/core/tensor_parallel/random.py#L521) in MCore to support recomputing a single module.
+
+In addition, for FP8, we need to consider that the FP8 quantized version of the discarded output may be saved by subsequent layers, which would not achieve the goal of saving memory. Therefore, we need to tell the FP8 module to save the original input (so that it can be correctly discarded) instead of the quantized version. The cost is that we need to re-quantize during the backward pass. For implementation details, please refer to \[[MCore commit](https://github.com/NVIDIA/Megatron-LM/commit/781e765818b86b8f2e03ac6bb6b09aaaa9d17074)\] and \[[TE PR 1865](https://github.com/NVIDIA/TransformerEngine/pull/1865)\].
+
+This technique is also applicable to SDPA and the subsequent Linear module (called Projection Linear). Because SDPA is a special module, it saves its own output for backward computation, while Projection Linear saves the input for backward computation. In BF16 training, these two tensors are actually the same tensor, occupying only one copy of memory. In FP8 training, SDPA saves a BF16 output tensor, while Projection Linear saves an FP8 tensor quantized from the input tensor. These two tensors do not share memory, so it actually saves 1.5 times the size. We can use a similar method to tell Projection Linear to save the original input instead of the quantized version to save memory. Similarly, the cost is that it needs to be re-quantized during the backward pass.
+
+![images/image3.png](images/image3.png)
+
+E2E testing shows that enabling DeepEP reduces the CPU overhead of the router and preprocess, improving performance by 54.3 TFLOPS. By using fine-grained recompute, the redundant activation saved between SDPA and Projection is eliminated, allowing us to turn off the recomputation of MLA up projection, which improves performance by 44.7 TFLOPS. The reason is that although the MLA up projection has a low computational density and the cost of recomputation is theoretically small, it also has serious CPU overhead, so turning off recomputation can achieve a certain performance improvement. Correspondingly, the recomputation parameters were changed to `--recompute-modules mlp moe_act`. The following figure shows the Nsys timeline with DeepEP enabled and using new recompute parameters:
+
+![images/image4.png](images/image4.png)
+
+### **2.3 CUDA Graphs to Resolve CPU-side Overhead**
+
+CUDA Graphs significantly reduce CPU overhead by capturing GPU kernels into a static graph that replays entire kernel sequences in subsequent iterations, bypassing most CPU logic. However, captured parts must be static with no dynamic shapes allowed. In Dropless MoE models, routed experts are dynamic while attention, router, EP preprocess, and shared experts remain static, so we capture these static components to minimize CPU overhead.
+
+We have developed the Partial CUDA Graphs feature in MCore and TE, which allows us to capture only a part of the model. The parameter in MCore is `--cuda-graph-scope`, and the supported options are:
+
+* `attn`: capture the attention part.  
+* `mlp`: capture the MLP part of the dense layer, for example, the first three layers of DeepSeek-V3 are dense layers.  
+* `moe`: capture the moe part, only supports token-drop MoE.  
+* `moe_router`: capture the moe router part. Also capture shared experts unless the shared experts overlap is enabled.
+* `moe_preprocess`: capture the EP preprocess part, must be used with `moe_router`.  
+* `mamba`: captures the mamba layer.
+
+In DeepSeek-v3, we finally used `--cuda-graph-impl transformer_engine --cuda-graph-scope attn moe_router moe_preprocess` to capture attention, router, EP preprocess, and shared experts of each layer. The partial CUDA Graphs feature is temporarily only available in `--cuda-graph-impl transformer_engine` implementation. Another implementation is called `local`, which introduces full-layer and full-iteration CUDA Graphs support, but not feasible for MoE models due to the dynamic shape issue.
+
+One limitation of CUDA Graphs is that it occupies additional memory. The number of CUDA Graphs we need to capture is `L*M*2`, where `L` is the number of layers per GPU and `M` is the number of micro-batches in one iteration. `*2` because we need to capture both forward and backward graphs. This additional memory of these graphs comes from three aspects.
+
+1. The structure of CUDA Graphs itself occupies some memory. This memory usage increases with the number of nodes in the graph, but the amount is typically negligible.
+2. CUDA Graphs need to use an independent memory pool. PyTorch’s caching allocator cannot reuse the memory in this pool for operators outside of CUDA Graphs.
+3. CUDA Graphs need static memory buffers for input and output data of the graphs.
+
+We have made a series of optimizations to optimize the memory consumption of CUDA Graphs, especially targeting 2 and 3. For 2, though graphed and non-graphed parts must use separate pools, we managed to make all graphs share one pool by capturing them in the same order they will be replayed. For 3, we reuse the static memory buffers between graphs as much as possible following its PP pattern. For details, please refer to the `_order` and `_reuse_graph_input_output_buffers` arguments in TE [make_graphed_callables()](https://github.com/NVIDIA/TransformerEngine/blob/release_v2.8/transformer_engine/pytorch/graph.py#L847-L863) API. In addition, we have also made a series of adaptations and optimizations for CUDA Graphs for MoE models, different FP8 recipes, MTP support, flexible PP layouts, and precision alignment to ensure it works correctly and efficiently.
+
+The following figure shows our timeline after enabling CUDA Graphs (this figure also includes 2.1.3 fuse swizzle scaling factor). It can be seen that the CPU overhead problem has been greatly alleviated, and currently only the routed experts part still has some CPU overhead. Enabling CUDA Graphs has improved the E2E performance by a total of 84.8 TFLOPS.
+
+![images/image5.png](images/image5.png)
+
+At this point, we can see that the performance problem of DeepEP is beginning to become a bottleneck, and we will have work to optimize it later.
+
+### **2.4 CPU-side Optimizations**
+
+Adding [bindpcie](https://github.com/NVIDIA/mlperf-common/blob/main/client/bindpcie) to the startup phase of each training process, so as to automatically detect the GPU/NUMA topology of the local machine based on the rank of the local process, and use `numactl` to bind the CPU and memory of the process to the local NUMA node corresponding to its GPU. This reduces per-GPU kernel launch latency and the latency variation among GPUs, and improves E2E performance by 70.6 TFLOPS.
+
+It is worth mentioning that since CPU overhead is a major performance issue in FP8 training, and in language model training tasks where the data loading pressure is small, usually only a few CPU cores are responsible for launching kernels and are in a high-load state. For example, on a DGX/HGX NVL8 system, if core binding is performed, then 8 GPUs correspond to 8 processes, which correspond to 8 CPU cores. Therefore, we recommend configuring the CPU to a mode that allows some cores to boost to the highest frequency, which can significantly improve the performance of FP8 training.
+
+With the help of CPU-side profiling, we're working on simplifying the host-side code of TE, such as removing unnecessary checks, PyTorch APIs, and CUDA calls. In addition, we are working with CPU experts to explore other CPU-side optimizations.
+
+### **2.5 HybridEP**
+
+HybridEP is a new EP communication library developed by NVIDIA, with functions similar to DeepEP, but it can fully release the performance potential of the NVL72 architecture and also supports intra-node and inter-node communication on the Hopper platform. HybridEP mainly has the following features:
+
+* Fully adapted to the NVL72 architecture. Within the NVLink domain, Tensor Memory Accelerator (TMA) is used for data copy to minimize the number of instructions and reduce resource occupation.  
+* Deeply optimized RDMA communication across NVLink domains using IBGDA technology.  
+* Ensured that there is no redundant communication during data distribution.  
+* Completely asynchronous at the kernel level and adapted to CUDA Graphs.  
+* Can flexibly adjust the number of occupied SMs and achieve excellent performance with as few SMs as possible.
+
+HybridEP is fully adapted to the NVL72 architecture and can achieve high transmission bandwidth with fewer SM resources.  
+![images/image6.png](images/image6.png)
+
+It is worth mentioning that although we only report the performance of EP36 here, HybridEP actually supports the full NVL72. Therefore, if future models are designed with the number of experts being a multiple of 72, HybridEP can fully utilize the bandwidth of NVL72. This also reflects the philosophy of model and hardware architecture co-design.
+
+When integrating HybridEP into MCore, we need to solve a problem: in the implementation, we need to register some special buffers so that they can be accessed by other ranks in the same NVLink domain. And since the output of dispatch and the input of combine both exist in the buffer managed by HybridEP itself. This buffer is globally unique on the current rank and is reused between layers. We need an extra D2D (Device to Device) copy to copy the output of the dispatch kernel from the buffer to the downstream required PyTorch tensor, or to copy the input of the combine kernel from the upstream PyTorch tensor to the combine input buffer. And the duration of this D2D copy is about 10%-20% of the communication time.
+
+Considering that the MoE permute operation following dispatch, we’re doing
+
+1. EP communication over NVLink: dispatch -> HybridEP managed buffer
+2. D2D copy: HybridEP managed buffer -> output buffer in PyTorch tensors
+3. Permute: output buffer -> permuted tensors to be fed into experts
+
+Therefore, we choose to fuse this D2D copy with the subsequent permute, that is, while permuting, we also complete the data transfer between the HybridEP managed buffer and the ordinary PyTorch tensor. Furthermore, since cuBLAS FP8 GEMM requires the input M dimension to be aligned to 16 (per-tensor recipe or blockwise recipe) or 32 (MXFP8 recipe), and the output generated by permute is very likely not to meet this requirement, it needs to be padded in the M dimension. This padding task is also essentially a D2D copy, and we also fuse it into the permute process.
+
+The options to enable HybridEP in MCore are:
+
+```shell
+--moe-token-dispatcher-type flex
+--moe-flex-dispatcher-backend hybridep
+```
+
+The figure below shows the timeline after we used HybridEP to optimize EP communication and permute/pad, which improved the E2E performance by 113.6 TFLOPS.
+
+![images/image7.png](images/image7.png)
+
+HybridEP has been open-sourced as an [independent branch](https://github.com/deepseek-ai/DeepEP/tree/hybrid-ep) in the DeepEP repository, have a try now!
+
+## **3. Summary and Outlook**
+
+We started from a baseline of 494 TFLOPS, and through multiple rounds of performance analysis and optimization, we finally reached 970 TFLOPS, achieving a 1.96x performance improvement. The following is our optimization history sorted by time:
+
+| Model | System | Precision | Dispatcher | Feature Roadmap | TFLOPS/GPU |
+| ----- | ----- | ----- | ----- | ----- | ----- |
+| DeepSeek-V3 | GB200 | MXFP8 | AlltoAll | Baseline | 494.46 |
+| DeepSeek-V3 | GB200 | MXFP8 | AlltoAll | Fuse torch.zeros for scaling factor allocation & Permute kernel Optimization | 529.55 |
+| DeepSeek-V3 | GB200 | MXFP8 | AlltoAll | Router fusion | 540.00 |
+| DeepSeek-V3 | GB200 | MXFP8 | DeepEP | Enable DeepEP (Will switch to HybridEP) | 566.07 |
+| DeepSeek-V3 | GB200 | MXFP8 | DeepEP | Remove up\_proj recompute | 610.71 |
+| DeepSeek-V3 | GB200 | MXFP8 | DeepEP | CUDA Graphs | 663.27 |
+| DeepSeek-V3 | GB200 | MXFP8 | DeepEP | Tune DeepEP (Will switch to HybridEP) | 691.49 |
+| DeepSeek-V3 | GB200 | MXFP8 | DeepEP | CPU-side optimization | 762.12 |
+| DeepSeek-V3 | GB200 | MXFP8 | DeepEP | PDL for quantization kernels & Fuse MXFP8 swizzle scaling factor | 797.67 |
+| DeepSeek-V3 | GB200 | MXFP8 | DeepEP | CUDA Graphs capture shared expert | 829.93 |
+| DeepSeek-V3 | GB200 | MXFP8 | HybridEP | HybridEP | 943.56 |
+| DeepSeek-V3 | GB200 | MXFP8 | HybridEP | CPU-side optimization | 956.21 |
+| DeepSeek-V3 | GB200 | MXFP8 | HybridEP | Fuse quantization to normalization (cuDNN 9.14) | 970.01 |
+
+### **3.1 Future Work**
+
+1. Completely eliminate CPU overhead. We hope to eliminate the device-host sync in the MoE model (its purpose is to get the tokens per expert information), so that we can use CUDA Graphs for the entire model and completely eliminate CPU overhead. We used a small proxy model to estimate that this optimization can achieve at least a 10% additional performance gain. Please refer to the MCore MoE [roadmap](https://github.com/NVIDIA/Megatron-LM/issues/1729).  
+2. Scale to a larger amount of GPUs. Our current parallel configuration is already limited by the number of GPUs (EP32 * PP8 = 256 GPUs). If we expand to 512 cards, we can explore the performance of EP64. Theoretically, since EP64 is still within the NVLink domain, its communication overhead is still small. And a large EP can reduce the number of local experts, thereby reducing quantization and other overheads, and improving the performance of Grouped GEMM.  
+3. Explore the use of NVLink-C2C's CPU offloading technology. Since the GB200 NVL72 system has NVLink-C2C, the connection between CPU and GPU is faster than PCIe 5.0, so offloading is a very promising feature. For example, with the help of CPU offloading, can we increase MBS to 2? If so, it will greatly improve the computational intensity, and many of the CPU overhead problems mentioned earlier may no longer exist.
+
+### **3.2 Some Discussions**
+
+1. Why didn't we use FP8 dispatch on the GB200?  
+   * FP8 dispatch is not a free lunch. Since we can only transmit row-wise FP8 data, we need some extra "de-quantize and re-quantize" kernels to calculate col-wise FP8 data for backward computation. The overhead of these kernels offsets the communication time saved by FP8 dispatch.  
+2. Why didn't we use 1F1B AlltoAll overlap on the GB200 (a kind of inter-batch overlap scheme similar to DualPipe, for details see MCore commits [8333bd5](https://github.com/NVIDIA/Megatron-LM/commit/8333bd5bb6de2bdbdb3ebebf224b4a339a04ec90), [ae1c882](https://github.com/NVIDIA/Megatron-LM/commit/ae1c88296f465ab4ac9c503d75a57ba4044c47d1), [d7bf5aa](https://github.com/NVIDIA/Megatron-LM/commit/d7bf5aaaa8e331f901366621db009b0c2880c8fd))?  
+   * First, thanks to NVL72, EP communication is very fast, and the necessity of overlap is not great. Second, 1F1B AlltoAll overlap is not a free lunch either. It divides the forward and backward into multiple stages for scheduling, and there is some synchronization between different stages, which aggravates the CPU overhead, so the overall benefit is negative on the GB200. If we can further solve the CPU overhead problem, we can re-evaluate the benefits of 1F1B AlltoAll overlap.  
+3. How much performance improvement is there compared to the H100?  
+   * DeepSeek's technical report did not announce the TFLOPS during its pre-training phase, but some article \[[1](https://zhuanlan.zhihu.com/p/16480858047)\] (in Chinese, we recommend reading it by translation) has estimated it to be around 380 TFLOPS, so the 970 TFLOPS on the GB200 is a 2.55x performance improvement. This surpasses the 2.5x improvement of the GB200 over the H100 in FP8 computing power. This significant performance gain is attributed to leveraging MNNVL on the GB200 for optimized EP communication and utilizing the substantially larger device memory on the GB200 to explore enhanced parallel configurations. 
+
+## **4. Resources**
+
+**Complete Training Examples**
+
+* [DeepSeek-V3 Training Scripts](https://github.com/yanring/Megatron-MoE-ModelZoo) \- End-to-end training configurations and launch scripts
+
+**Papers and Technical Reports**
+
+1. [DeepSeek-V3 MFU Estimation](https://zhuanlan.zhihu.com/p/16480858047). An article in Chinese estimates the MFU of DeepSeek-V3 training.  
+2. [FP8 Training Recipes, Performance and Convergence](https://www.bilibili.com/video/BV1mpMwz9Ey5/). A video in Chinese introduces FP8 training recipes, performance and convergence.  
+3. [Stable and Scalable FP8 Deep Learning Training on Blackwell](https://www.nvidia.com/en-us/on-demand/session/gtc25-s72778/). GTC talk on FP8 training on Blackwell.  
+4. [Cursor's Blog on Faster Grouped GEMM Kernels and MoE Training.](https://cursor.com/cn/blog/kernels)
diff --git a/docs/discussions/deepseek-v3-gb200-optimization/images/image1.png b/docs/discussions/deepseek-v3-gb200-optimization/images/image1.png
new file mode 100644
index 0000000000000000000000000000000000000000..6e4dad685c4251ecee64a1c8d221ae869ea5ff43
GIT binary patch
literal 325505
zcmZU41yGeyw>BtJ5>kRlgS51igfu8AQqm35NT&kQjY_wK7)VH`G)PN#cX!`)zWd*q
zZ{~N#I>0&ayZ2t}sr7~^E6QMFl42quAz{nOO1?xwLVbpWbc-7u6<*Oi8k&H=P~M8m
zy+VgyUg##lNJvyja*|@N+)_4cU7gfiT~4k9R_^OMuN9o&pi>Lp8uYX)b9h)@B)s5$
zAe1P-oGKL8;lbd(Pp8GS(eWj`#LTpX+xBlWNwU#BD#2U*Vs|j!lcGQQSM9VRuyVDq
ze%VJ<#_jS8d)24b<coH%z{Td(M7?gE_8S|Uq1W2la!N`vDuiCUjcAnr^@m$9^}}as
z(PS=trFIt<A_PfexbpPMuV&|jq!Hg1WQQG#g&%&|8+4KJzQAV!ZO$K8M2MaP{xu8#
zI{mLdnszY!k<V;tm#s`m?z+%uq#|k5Gb9}UkhrZN5tbb0d2~==wltTPu+ZiEpC4O|
zrsMI)tIWG?Y0*x+c*81-51;**YdmgOV{+yg?+o5Lir)+4-hW>$T#yX1G;LrN^QG?@
zAIZ$vUG;|am|OEfjKS)@cumt`2`kbIZVM};dS}S7Hm4IJ%#;);bQs+h;sZQ93Ue`q
zdhw=o3b)JI8`hinko=Lmw_1Hyo|1gCrExBOH-BChwLa%hm;FIgMYZ^s=x^*tA{ZTO
zmw|N?#tTSFK}-HT4zU4W-lPTeNQ~F6!R4|1|Cbj;UK~C-{JcT`rk(^RB~Y@@Lk#<l
z2`#Qd@&^;imt^gRRKH7&-4CMd^KurF$_YER?j8ycaF9f!qU}8`!yQaCmc8vSG=Oo>
zKl}}LOVQR79qwhCuLbOkYfU4l-3ASP<B|>9H^F;9T&eCUOZ#~32BK)b^WmVF=-ySj
zA$6VpQ&_Go9ot9uAO(k&vOXyDoOF~Nx3$@lN)G?E8%ock4g1~{wAjTGK9SR1Y;jWI
z+h4wX3CPHx;z`FppY<mu!$C7Ec_8NzKz_+-&rg;hPj7N&nDGK}W+>3Hq&Y~1aDF3p
zy~)iV)uJFT_JZ>ZVLx5)*&%K0_4<uREG75-zN1b`M?Qf&-MhR0ep|*$%4$6AXu~mf
zyNEY<pIP%s=VtAu)c|Anju(YmLN`h0&)DPgLOhS#!6;ZW$6~@rqt7;ia(C*VUAWfa
zQfJnA{tH6Gl?$x19k^N0N1uto4S9z(zWM0hy-OR)sV20Rh*w%jHt#Sc=9Eux(`V%s
zPw1E_dFU`E*|0`JJFc&98gQvllsZoRInV~$SFn&3E2=GV6nuB<V-^U!r_2$HAm-MY
zl#BQ9;R|*3`|HLPhH%6RF||HhT3gf6)1$_99BR^`KT%lNo~o9Vlr$*PqzHSv-x%b8
zEw^9#if@Q^XvDus!*By?AzY#zKL91uOwsR`y)s31@oTwTYK^;gH|Y$!lPH~bKF{0N
zy8|r8n}~mXGW{nP7xU068WRU^bgY5QmwP=tF)CiC^{&YSk7~}#bWuNbD(s3Y8_IwF
z{vR>q?0jYC|Fq<@*RHwm8V)j1QrNU}?GL?>eY#JLk?=8SKQ2An)Z~4mMzk9x5B514
z$K_1DxL{p+19QUotEghbur>-|i>r@xUpgHZg(;s2QQcO+J!V1SqS~G{onKCqO1!D3
zqT4KD9fw;F!lpr{k;5-2$YKm-N1pe`HJ)`6{NN+B-f>HJgd7JgG)w6I%A!B+0%<G?
zJwdDfHTM2lm^}yOdHt6EKBBXne&*<Yci9}qXm^R^eC&0a%imOQD~n$-Me>t85UE#U
ze9>~&MKr~ePwti#-FVHCEG32C#1NydZhq48i+4hy93&iMqDa+>V$6NFJKs;`6F6?G
z@ru5Qe`bK)Ctndf`klB)I`jnS&y@5umh^x<5iSgerCj&x$4WdYvDo8LsHM}aXt
zEix+a>ySyXEN(7#IIdQFqNdKOMa8_&E-bFEagUSa@MD`I=i+c@XYT~xu^D&zsDJgK
znNFx;M}_c9m^sJg=}Ppi%b^*uY4MMy-@H!V<BE9Qc60N{@!p-I{*Xqdtael+OBgO3
z)HavmwQD#j9^0_==QLha?0TY{3U&9%e;VAv0|m?hPfCNoH%KGC6f&Mh82M2u4O1Ux
zX-BYd1#Y5QGZU|b>RC>2bw+W$!md1zTsiTuCr*2&`>R<(-~?Ub_Q&+=C%OAC+JhpI
z4EReQE)Cu6ozf|JpB{4#<S-1W2M1*G+T5q|pKwk0W+S}w#Kv#M6~h9xTLcSk@wz%N
zF#4uf)E%Dy&!NKSi1UkCWS^=q{~N=tkQR$I0*<<CR(>%-Zxpj$e=17sB)Gw;3U=fy
z%OrNH^(F~4xmb~dI<lv63mX-#G8@ecSr5+z<YEKHtXtQdvMfoZv+<hR&0ebzPKt<h
z+F=#58uLtZcWlRm_VjZ-xEs_ocU0bJ&|txX>3+U@BvwCs?(9;jbF$WFkzWwn<;>2V
z*`+<mQ`lC|6E3}&MbJ)Jv*a+^ol$qjU=f$+SrJIt5&IVn*MAJF%Eyv4v>OMX<nmI;
z=Fm8kkx_A_-HO#HH+0=8qmnZF*?JR}+<v>l2!k#?tsq0Pyexg(ih$aY5FftICoHbw
zF`DCV1x@dlzwzIAP|K09o;ckppP-GyBl%0uF&!5_vHgs+NU69#z=Stsgqe3j<D1_{
zIubHTk!z-b{@Z<MfhRs6o<1Re%sW?vQD-%fuN=Oe?bEUGNDiMqPVThdw)~;#$H+^O
z>x{a{wW;QG47G#0?JCio1C=`b*3RxFM|bC&@U$&H_1yD|2x;14R$l>qmCu=Z1min?
z7^OPO*);ub@>TA17)LobA(z@pw*o$+Mme9`P3OUaa!r-k7#E&dxJP`-u~RzkmWUtG
zoS~?3R#H=&v@IWb3tJ!;8%?*y?TouSYNd7{IjfT~R}xfDDU#>*c5$Y^3bv_7rLN7Q
z(!2jH(sr6{F9D7+gM!41?f_2DEi`A5H`4U-?ExFV`|Jt2w@CTOsj`=zxcFK_0g_<J
z#vXof>9yv)IXUSQxr`^VnrQ7@Jc)_le(RN4c#8ce9%*kbGw`RfOZ5~BztH#@$%Ti1
z<L<K^LtO@nc)H+iB-vO=ZS99SIXNcg<^=fof4Y<TDm_owkox=k;T2KBz;%Zp)-ES!
zXBA!Dh^9FzM#f;dIC?xnLP=d+a%^mDJuzpvJJZCogO$7F@ON%5z3o(0%il!qy^|B)
z#hsxbB2@qPpFiJ!V`Zg6_yG{i@$Fkq60BG1>I2i$Y`mqjF&g&v9ICnUcsq7hR@s@E
z$fBa6@ICcOX>wcxvq*vSMz}IRP&(Of@J<d2Gd^g0`}fHhd^9bvn^{&grp3QUNcMO0
z0k=KB?qrR-|8$)QyE}4ob8~b|T4WaE$IqWQd4$4QI~JD}sfAdvmN+rKh<uNx!{CWX
zF1K|k&$;uiqQZ@fd!V0(gT&<ZQ^EX#f<Hg<HJh5+(|F|g*Lag7tcZw4w_N_x6Ab1m
zwun+Sm=n~~%hE1=e?&xdcz8%9$JNmx_a!fPUj3CkYNB>`_hi90iQC`RSt?a?GrlsT
zYUNhe`WF-=TO<W`n9$qxSeuN^V*K>cs?ce_yKKyr5EHp6=3`ELDeCKYpWr3|<$meK
z^?5NekJe|0N*zLCVs3tZ^oHgTf{?JVrY#p^4wC#IKXSf)#ZXg|Q&gmX{J6V|__JwZ
zW24yf7cUU6Ffpx_SXS`VV44f3HyYD~+1*F=M^5f;{cxAqUNDse3)Rul5giv72qqTl
zZP<1vS5H4TOG`_j7wDq>1{@sx{OzevU01iZ9<j6UoXl`X^nJDrouKV$po?B?V{rRs
zY2kCfcZVw@|7Hs%xZmPSb#?MXKHFKxN{&SNzX_a8R>$+7STt65PV7G~FD~AD@F1k1
z01lag0=9xe8Zt^qNQjAv$?36)J}q3p*4Ea)fB)cz(J?TnpFY)Y?=maFNPd8iFRrK<
zDf^i*RNUG5$&VjD;JRPFd?_n0e^?e0Yo7D5bzFjifc(6QO73LD(eh+iIIg34YofgJ
z?FtTX1VX~*@CV#Ty@Ep{BQmC@kJ#eF-n~OI>rIQuVr(xCZ<?F!{KSHVi|gOf{HomF
zV%JFv6;&jfZq;0Tu)ze?zoSH?YBM7;%2VvKX~DN|mbSL2LKi<U2!zN*V(FkR#Ne9X
zEk{Q+=@nB}TmAqH!x8`UN1TS1w&U;L%Smxd%4^TQI8rq=wbI|espatH<m6y|gGYGy
z_}*R|+Ym}o3kW1GE}ArCrx$PT;k|kDW_5LyN?16xy}jK|S6u{?m6GB$aJud3+OC`z
z@v!AfpEUHSg@n9J{H<w3MbkSwpU>5>g#Jkpc$>gymnQpJNY?+ff(}iFooZ@oDn-OQ
zDLfpDO~3AMrbJM1XsEcF+W7dmW~Jk!7x8RYwcjG8^(m)T*E*Ofg`kJfoFY*Ol$Mtt
z^vh6k^6*G{c?p%<&Ex{#9MSWPNK2#msZ;5fo*pDFKFD)^alx4=&#L+BnOAFH-#tp7
zQ|jBdZ_6j~VEFppTnK&<!Oh+Y>Gkjuk|=+4{+&$g^Ve_RjCZDMkphB(2K)LNpuM;s
z4QarE%l!Ub{?(;rlC*{f@z~hd;_@=|!92y(d1xJ@*1XF-sa^lRs=mJ{tiQhQjSHiD
zA?sZ$yMjrDQYsautp4+ePPElhD!kD`@|IyqI6Z->Mq#n<jgOz7A4hU`X=!QS#Bgh|
zX_rO*x9p!kdsrGrdG~cZjA+ug$0Xd{1u!r$oSdALRlGjgOqAgf5jofUtadEL%)Iv;
z8Vhef;ifogEScVTTSnqg!LD1?JVz@d8ywZP<?=SG_MO@{^&;yruJw(Lu@ZCKtE($X
z8JV}aIUmMT$jWRc0=vx?R_B^Ne*74nkRWSi#qx-leLtJ>3O#7hT`669?GrD){Q%{l
z^bakON_UAuRD%6muM`#*JK_k5h++~F+F?=iBFW4q%53D5c_TVEM#g?y@zKY{G&DSe
z)9_-ZK#f(m>ZA7W-(LG23>7+TSy|b@fWNG%lPigweR1WRvUo77AkF!jK9)6G;jdU~
zJyO%La9Uf^Ln_zv5iiraN0-DW*{oeqP=yboLbHCoT~UO7{m5x?edwF@Sn&sF?hb1M
zkB+yeST=Q3RPZ)N3eC@*dY}CB-hdO{lP=mDi2YFZ`E&dkhQ-W{NEfBow4^ROcIh*F
ziG4@%@yUP{uC67p6R<wek8*$i{?rxEeo(vP9rf!MM`&m$zt<@T6cK-_qGGy8H%noS
z`19wOv2*}03O+s}-rnAttoIS6%Gy<*UdR}6Xtl;ZREoD3`Dv9llvA#fU0WoJcy^8a
zwg-pz#qkzD8IF;$v89E@?C=k5>&c3k;9zu@ooNL*IgGu%J-C#&SLe=Uwo}iIjA%?v
zO*hAXhqbrM>aY<@NlC%&F1;Z7>o(TXzmclcwmlKnQT!Kr-)Nop)9dSNP!Y<yy4{nN
zZ?&Ls#KgolCoAIu15vYNhQ9)yeSWMh{p=YMw9w6o@~D6S6b}!N3j4WR>@~Z$;G^}9
zl~04_sXJ7kaWL!6m4p~)Se`I_Nn?aA`7u7e#A<|1TtWgMGOw~Sae7)${k!IS=u58k
z7v!)6D0lAkPfz#2SzuBBZs6vYK>pOFZ*)`^ZX&{F>+r;B_f>Q4&fflt=*e!K-(*1T
zUP?00`Cc3PImPRlxWsNV%)565JPsK-IXPux0X!H+YuvfH&3jDj?W0|1yor$jVinVb
zI^Z4>M7*mi(x87q6aQs5BeL9^uJrnK5~B}kSdr&yFI;%~!@}M1u!vcBAH*3=N%_0G
z%Yi7=55#&@EG%Kwt~<rv7cPIzxRE}5_)xRo$wJ4#Kr4qYEG$eW?C}u+4l64%uU-+x
z#KbhLv<XRHMC-@bc#%_y2z~se$!a}Qml_(1X=G$%VPivtEIX9|U7G;-g^LSsF>7`K
zGa!auPfgR@z4r@+qydyu$omzmfj(@u<;h=F^X7MUdZFtl2)T1179I|?c`wED=g$+K
zSlvgGkdQb%JLBZwct<A}@Zp0Y2g&O8cI%c40V(NoW##A!X@DPSHLUmVs((9r@45`4
zhBP8Y*z<F7F_)gtndvQQ=Gd^Xu(geig{Yg;sG`?DUTJ6?eAB-Qt*R0<>rQ%PLe<4Z
zPEL++X;`(({<1Lo&NpM6pP$zVidyqNgpW_?HbX^4g?H8bp$>-*w$>s^T2qtcnpUVl
zF)t2!D+qaHWMt@$H`8(-+wi2bWBmMX2?`317MtFE|NcEtkI=BNqKU1`%N0>#uBlp2
zzV7bs?>d#SL@esTCo`3AgJ47BwMxw3?ig8E7EbpTgw7V@{-y{J$i>=6qL^M>Tu6PI
zZD>F;D@lan2n-5Z+uD-)Z2G6EY470R;8o?s7FKyV-dO)TE!028$`ayzliO`|eyb%J
z1<Yq>XBJ7V<G-z-=N}#&zkKydQb&hO^t2Hd>GW_=iHV8HaQ)v`>+w>#ni&h8mryN{
z<WCcLEb-wQU#Y1TId7=%@9zV)&HgB#&MGVWyt--uWg?lgJUdGT6@`L^W?VA1y0Ot@
z%?q2tOxBc{Ne%0#O!y%>`kuJBcx$Z!O0UPwVEXmPh=+V!cGE&U3fYB)QCV3u%gake
zrd{__6dCT^xua8I{~nIPz`uWz-rmADySO*+I!xvd*9I3?R!ZCt%t%Q|`8|(W?ds0x
z0c(JJ?)N<1f$COx{hAcc={s`?v+@ZHbo7QdzXt{fX@>>A4Kd$}hOcBj7vbkm=<Dlq
z(Ejkvb@6&Z^qLlWv+;a$2#}f=FYd1Nq!M5h%*`22d+rD`GczY}7~cBy=@SYn>aL9M
zp-g-6-#YJ$3YRU?!CZxf`Iazf+1H!B*I3#8?Qp^TZo5=amvpqW{yja4c$!XvsR022
z_ee-&mF(F4{rzEqfRxh+(t1`^Kd`StSACUrczb6@V(=9OUC>GSttp-G0y(_8HS<yY
zTJFOs*O{f?BYL#d)W~wNz+|&(YEm@x+&@?j<^l;3J?l^cCh-WUM{F!U&_K8KuX#{_
zHAlnR|AvM*9p)ufuywVxfUJb<QaWi54-W^Xr&GFAlb2`RyHD~gK?t8NzG2&mhKV^a
z=4AEUv`7<JTVrpp-Q<K+?XxxF`qMk2mvguw5fM^qYQSk2+1QrZujko=BO-Es{*0Wt
zJ}%d(_ep)gZDtQmnO;7j;ag!{C!EZkngbkIM_M32KYy}aABf(>O-|~l7wRc!Y9?f;
z{2sFgvS!ScNJ2(NC{ky812vO5r|#GI7MD~^<&l!Bo<09uW20WFSQOpib1x)!6P>2^
zg&QK^mqSBCH5c2}mrMHBIkNZktDG?6yIOF4PbL2R`2!0ye|0jGmy>g+WK13aqwDiy
z<E73|SXfxQdA{d)(D;Ksek6nwEv=}C1L#Tzpm2FLb0a)bsLvoQOul<c0Ok7rJ+eGK
zfuW%xRJNTRJ4j79J5H)9gjLnmMPt^`%JE1^1%IBH(8q-!5Hv-gDgEXw{VwGtfIbM7
z-OGbs<KM3<z0SA*&y3;g|AvPhS9-|xe6O70Ffu)UYzm75Vk|B(v7?RB_b<>I%_5^a
z1qJ);H~Z|bUcI_UP98C3+csxU^$-i|_ymvJnwAlL^+RM2&gSE8NfflZGd_oU^7=D1
z_+28wMj)}M0eTf)zQnhy*+YfaKUM9j`ucTespTNz&_H?dO}qI6q5(w685&Y2CMF)9
zoTx%|13c-{E2YWb>~f&_9Evs~$<8_ouB}rxa0oRw%u+$Wg3~)|&%e61)@Z?lfB!!H
zygW!KXmfK5XlTBTLb!$o(pP79tj@C0)zlF532p-Jf5EYmgoMOk<fpFn=&$!f-!wqy
zxLs_OS2`|X3J3_0iFhS|aMW{~yOWibHCkkhmZy|X|LD;NU=&EO%mO}_{P5ZHo=8<y
zRg(ShrgMlu0`&Do|B6%prbcIq=y_{vVuPg@Mw$zFaKq)(9$|or#czg~CvEv^Yir5)
z?cc@gd4$x|2tnhXD7RbL94q;fljE>ED>gqr?{<B*5@VVeC3>B38x3c-^`RX!M*utE
z^9S&s=(M!R!8JlD@^g_f+j8UbiI%#G?CjgsQ;|?Vcz9{nU+CoER0V6w!g&tP^d>sR
zM}S}7?E`mrcMy@kJWp)*QgSWx3pxSvOU$~B=Kg%}Yf@Qpwzjj&gVH%Zb^(?nr=akN
zo__fSF4s-X=3;huzb<Dn>p?St(yU9hR)sw^unwR@4#GzxpsDN^+wq{qTiV&REje<U
zb`oA*UNW+?gYLt}#~&@VBvAkM3B-WI<|t>lG%Ze$ziHQJ?@INHHY^VVX;EMAqG1A8
zglx&l32|pd^%p}?aITX~_1s(TP6o()sj4caW&oNFQ3~wrQTzLj4JFY}o;=y>U`W4*
zhbN}5PYL)2qSNtYTX%c15+v}K+}!=ix4oH+n8a*4v#`EK2W3Vrn9IQTx7~uA-PayQ
z_V35ig}}yed7mG8>Y_?#YkUf0W6v9u1Ik`dz-9Oc1@uQHG}8wU9suiB)z*FiD~evG
zSLc;DKW_v@y}G^*Vn^x2;ScT7va)$6!Nez4pGOVInHdds2i((~D>#yo6r@yt7Z#dc
zP=8=!zj7^~c)W<l;d|``nq4uEAiJN@>4nm*4LPjjZfQ9DfN^wmbN~SFSiAhp)$>UD
z1+s7{MdX3m`}p_(!6YCdX$PPIokI{Z2V#xOyoU^+-t%}fv$9f)jsjLl>E+9Tilb_$
z$~9fTD&avM?2@s+p+qdeqW{d<XIPGV6Y1R#yA#y(e$4tlR}7upcf*d34nY2afdSbU
zFS4_;{7OrC6jKGI-oE7q;R#>^lm#|#2|gF_gis1^Xgf7E6`=@d#!P>|9xJnXh=+%_
zz>JYqC|ac#Q5G^A({g^a0phgTw*0o+-t<PHbEOUt?Z%t$mZYkAilzYLG?|5H1SVR*
zk4iukG~x+!a6ALTNkA_@3);XauGFvzwYa$0!otF!H%%Ce5%Yzuo~aaT-#SK(mB&PT
z_IE3MSyGD7G(0bgJD=HDQeCxt<@3<pw<}i=H)>nHu(QMNmoxuXW>`d*K07Q7wo(8v
z8JwTYs;cwpIXV3L%O&=ZD9~As{S!020pUEnzj=(YQCqM^032M#t?{$6zyAE0o00L1
zj@JBdLSyWs7ZBKy#HThHAIanXUFQmqidsPkWqy7Vur`-GSBv}2!xH7`=UzOIl>{RI
zn3jB+P$HBQurkRzwh|Jz(Qql~0O7Z{Z2?u{7?kMg=mG`?UV=pDrG6iww1{WN<M&{*
z$v=gOU;gYXjXJANI{*Xa6kYIQ=I%r9RodTU`WEa??rT$&I6)Gp=%HEBpFT;wdBY5+
z12hhMGXJ=pikjML!gWG=dU||nsV1w@u?{#07biOm@LAy8z-bxGQ);s<f4edK82$r?
z`@42|Bq+X~)TeD=thJaw2F(X3?%rMW5SQeKHn@)E6SZDf2-;VqxeGLI|FKi2g)q8R
z`2-ZLm<pj%*2q??Sqa1?EI;>vb_lp`lfC%Nnh9sA#JtytEAc_ugj+#YI8)|3j8Mh&
z+|BdHxe6%_pzZ7{b-+~!gmR+Z0V5|iF7CF_IcIR;tu^kde^{|tv>s&!jkkcCHK)nz
zcO7CL_Vp>xFDwAmEiNq;0sDp?92_3Lm}s6xO-(I!8i5*z@rnvv@ZpC0h5wc&5pk-n
zuHSY=QBe$FJ*@e`N-wd~KQtUNgo4X2crGi8zA^lR;FZ`(mjVHI-?s1{W3l%+8;N=u
zmL+50R#Z&cLeFNeJ&bLWA&Jp=_Us*l7-JW7Vs0QXG&D57Cbqzycn1LPi@)!{PnII;
zOOf3#D=8VEA!G$W>R0@=ygYt)*B%%U5ay;O$2rGJ&EIc`fQNkn?1Kj1H#r$zPyh|4
z#AWL>G~V}nIp*10S#)wxY*?uNqYpQ9znuGw&CKYZestZQGz8*c<jd=MY(4F<@z|*>
zHkr?^*lo{<P%0BBZ$bhINF(5KUYC1qBoWeZ?19cpN=b1#ufK|<6lJsCjZ+c<sf+`p
z8F1)Xvvh21?C3AU+rTOGJ2Vbe49<3y|13F%^h5%gzQBs<TKco)qN=7QWp2)plp7Nl
zCjmd))YOC!x6m7kOG=m+8L8;$10QRb-Fx`ZQ~&8F5p$Gnn^PxK(uWBiWLQ}~I0Xgp
zA>yE4k>IuoTYvue0sPuFIwnR8gi(8LZ;C)1yzo+8y>)Fc5BkR9kcNIvZEY&Z<CpsS
zQCW;+PhG%cGvjdG)&b{Hz+oQc>({T-KN=R$3kn9t$3t1WfHPz>V!G~3x6I6>&(0da
zdN<eRF=Do~w4k7%+@qx2F@+xY)VwFf5a3SS7M2L1z5f1{hwrR#TnZg|vc0)!(h^F9
z0OGTK(?|;8&W}v0r_(2OZATj;2!RWT156uqN%Nl%cN2MSND=P_Gz#R;!NYpJ8T!L!
zDOcC>S$<c3vdLOc4gOcq?1<iHUSIn#v#?;?y-R>0dG&UDVgkW5VB78&f+TENa@+$p
zKj$Q<95s*rf3pA}L+8HcrQuhBo<!6M*lP2u{n=y!jsZYqN_?(7deVenI5==38d`mQ
z`u@g<yW_F7-?ttKXYO&?;!SR857Dwd0%8yI7C`yXEW{}rv!-Wd4TmKT3ylK-Pykjb
zc(`0#11ub)qoW8T)@`qaSViqqMFl^s3ct%H33%>KE-sjg78QKIoW_bxK{_5(&s;?k
zFev_VTzU@9`B=3pJ3y?#{$dA$Z$EY2{slq|YV>TV&_~L_p=gvNif=U)MORlBAOH?0
z$Wp%@Awj_;=qDjh0Lj9vMt*Vv+5%?)N0LfJB(2u-1Rqqhjm_h?D?Okd5&iQ~e40E6
zLmG}9mufJpg{}^PKXx|yqak+2YWO=fCua=&oFmvtVBS_Zt>MEVtvQ_!167PrOJGs}
zQcYAme}{B<bQEHc3~n-f_VTh>5#EsdZwO+Xot+UZ96<<qlN0&uD0XU3C_GQL5hn_a
zsS=Y8d_RAGsZU?FJ=HWc2#AQJtgTsrfd@5-nV8T4Lf)C(DxWbkFn9;-29J`mxBlj$
zUiIZmap1n-V9+pfL^JT&&|hZUnZ7ak_rOg<N2H+7;P1V|3;pbk@WHh&z~cNHpUKFe
z#nR1zi2?5p&LLx{-<Wl7MFn7t7|3DJ)sJ3$-bL)e<)s&B0f6GaNl#v&3>NBC(pZ>(
zD=4U`^Mq?SJ#|B<VM>zOsgs$7kO{tDf&PoQSJ>s26^<XUSu`AhvH{uDta4(49k~P<
z#I2>JWnyOLGZnv=8BkDgcziq$<a%eP&8Vmk)L9ne>BU7H*o;O-S&YRM-WR;Ez)&o3
zscpcj1RNL9frd1imGn<db;FTWR{I20eADGAC}ucAM!#Qc)wppg3GJbTW*L->wRd-$
zf=c~2GIFq%m(B&8vF;#shXaoLj>viNQ_h%X+V{7v-y?tbS(kgsLz}*R9?;w@$;q%B
z<Ilg$*><g;5?W9|Lp3-(jo`h|&Dx9QA=-g^cxT?tncNM&7YLn@;9x2aj_5mg@7phR
z5}~4@t?leEfc^!k1=t5(p*gUN&FNZgr{(Xje`A7E1uwf)r%Hw0j{yPn+iHX#?hCMN
zu7y}1Hpo8;E)L`(w0=Z+eA6g2gM$zLtFp54GuUVFz@l<%>S(|?C@R7Or3WGnq!{E>
zU>yKUl27E4+H8R;Kxhg$v9mGT;h^aNnE<}#51@&|nS}U2MTlMwA5=zKB|g=0AM^@p
zdiv1lvWNC(TuzO#rd=3!Bj$E}l=3>DxZxH|!6j&JX+e0YB3Fm1K%6A^dffcBS&k2n
zqcPj@^Bem-PS|X#{ew9@i{bMSr7tH(O+zETxpv4Vp-T@$4<s0z2W7%Lun0f(ssaYL
zcO_*WDNnHxwsdE;goQu{%8n92bfNG1^=a4-d6hLa+ktZ@2)v~S4+2sA=IP!D(`DNC
z#v5F*<|GeF$w0M~HQMx3B+1yUZ{O}J43dzN{#kPx7#$6s*n-v*i)aFV@o-6VxPVnq
zc(6fj5X)$zoiLU{t7KwjUBm^8MZkXcUHnxsNL*CZVgO2z+rVj7W*hx$j>oM)2-+|I
zeE<NbXlu)+QK*+wSC<B6#2qOW6z+zkCswUQ2OktOrza-D`YoEbTp;I?H8!RV&ALZS
zyh22|xv`O5Rh9HZyByLL(A`GZJfsl-zbe72PJ$0eNwX>{62K^fUkUsUGyq*FLi?B)
z7;|SU>3IO}&<uCyf+%6&;u1wo4WSRg?=>+sb=aGi5`6m9{h*r<l#zKay6_Q@qxu2&
znRukLVxoPRGjVR*a`AL#Sxq)p7z8c^`aR_N3{Vx3%Uy{ypL0vVO$%#llc_3a1kGSb
zbMtEo!Ua0E^TGmz5hf;PKv5iB2%@F0d+aKP3b9pXrbhv>UlACuAQyB(_it>-RY*oj
z|4~rzH%&MhfDum~iVd@<s0hwWk#QSNk-;4ll)ixhaY#nE&Hhr+(n?@ilG*i598Y_n
z=&E3YmjgY$dCr~#{IKCdeRkBSG+=DNTJ^k-)Gn{CYME_Vn3;hqA=KNQ<$;_RpoR7q
zC~q`Ta2u1WY=*yU0TBUahhT+om;X}3t_!?fR@Tu$$QlF*rlNY++A0ky&wW0GApu%J
z#rHqfHa4aJt$IWrl-`GUV81$V49CRBGcZ#lXey*{?h6rokTL#|W(-Bd$Y7^Y2?%I8
z1p*C*SjO#aDc+%a%IkD4C`HgYG+y6F>y<hLh7wo=Rv;<P57)pWRsZq21zcS)1E5FT
z!w88SR-eQP3cB^;v#>~Mfyn*H!<{ft=ZJiv5XTSx1wJ#BB0U2G8rY3v)(BPtEw1ij
zY6laIOvot&K@iZ;;*ye>Qz{^^>+I|V{gP2%Kf}V2#~=q@MmU`u=sOq}YVYZ3ee=86
z<LJ%xk^T)4s6Pt}M$pCJ_(QIDc70OMzDh<J@4VDC-x~1{<g1Le2QZ&Xk0WMq6eure
z(E<VkG2c{xnL=>yUPG1h2CRi24uV+$+rV>DBa}MDIZxs(%McsnfkPy#q(rPH1K<IA
z7ax8AfDUjoe^h?VnwRw9!$dgN?Ddz$`O&0Gj;e$oSTuej>Zw0d1!{b6p-oO!b{4cY
z;XW8`^9}EB(>;2`2_jadg#r(cI$s}L3<z2D1oIg&f%9g6{~o^Mb5ad(4_`n6P3x!1
z55xl^NG@(}*eC;FU=W|HqU5fxuUovaq!uR^Mw63J-C4o&UCfA(RFIzXD9H6&*L||?
zQY`?3j?iAR2xg(yp(dfXA?Pajxw5h5efxRoKAm7uCGgrL$=ZWS5@PUEkv#KYF<r<#
zRzu&L2<$Phh5k<oU@IXm8UjaI`)GdNvDHLm)qY1}H8r4R*(D`$Z~*BQlA3#cj=4eD
zxBe*P7ZUp0pDpvXpa8$l39=$E7~s_YvK*w(R05T~0>%&`8w0`(oxQQK5ezw1RRWx#
z+hHv<RhEDv#jN11kOCJ$WKaxBX>?3Xa<7t<AZN?ZX9VR7tm6B3Rv=2q=omQ1o7u1K
zbfDX6Q(j%&mQVQ9mTXrSGW5p#%Tn%F=Hj2?w5dh~xinwb&3lmgPF^77er+u|2n}WB
zdvGoh4O3sG4bCzm69OzC*ax~p(AWs(8$zULvgQ{sAC`dk2>impAVXux#?6f%lpkVr
zAuUbOz<>&bHx(Njj<2t8R(AFSGBP?|-uq!K;@~;M&Vts80i7u;+fMtqko|7$!&$!i
zGi>Q>6{n?}^O1Tp2zn4E-oJmZ4BK!hgk*uj%$)VOqGMq}I1>aUSOw4#2I*kkh(jJv
z>HpiBT@`<RbpntxFffRYgZ0Nc0;zY>^H0+clU#GhmN!IV9;qUHaArV{#w90j47m%W
zWvHh4rlj}k)4i8MH7E4i@5d1gH`8-F)`DD|5mOuZ`g31jQMh>tH8m);zrf?pAU2Q#
zPa{8HRylO-I!n*mbG03BSTPxO$0mrUFGb=D!=o5sOMLngEhlTGB%To18Hj>kKoV(N
zuI-co<OC5E96NbDy3({rIuTKblfE33@w%b!;)1vx!5fsT<YJpA?CQG#h(*u)qnfeM
zVg3Pnl$^Z0$#|(Hbf+w-FgPqhjQ<c+0!|8aL+}X4>b#!<t7L$tHOj8LLnvhkasxr9
zT&fY%7baXWpFan-7*X@_#X~iM1K3ErF$M?);Zt#j{eY-lO@jf4JpexJbde@2Qr25z
z3ya`-nC&I1Yl4fZKkK%GE<+Ff+=+lf$W3{45Xkf#*dm*s2G{5yO5>aHii0c+K^UMe
zrDQrli3f*;(Skt%(G5^bs9d9Tb#?VXIynF)P9Y(Az)YZ|HRtQ!ptuW#o8#YW+>*3=
zNUG#_GB?*cN)_ni`#iSUs<1Y&Yl#MWYTh7|89i8WTp(BBIx8I(Pyqo9K}Z-KH?H<>
zZ*48*#Ov*!_W+k=Z^AqwOK5A+`bUg;g?3LYl~yLQZ)BuRRY~|!cv_v?Mk?RApBYyo
zz+@J4-;yI98QEWm6#0Fxg&;Wswpt9)OW~W1J=Cz4KP9~)VXR#UPI7+k0pAq}+DAx8
z2c?IFg+%~}+e2P!(5$T+KHyBH@H_Z1spbZoyoA#ZEd)~8c<plAkcgKyHa|2BoHxA-
zHPlu+NXjP^oSb-I&sISG`8k>l6{tZ=iFk_1`faH5ZLe@pqXELZ0(HR&Iu%Sa;CnPk
z4%e6Nh#(toW64zi@L4N#V1xm1KTL{F9@@lRVm6p5f~5x3^mwG{ETgB?u&PE>MZ4uB
z*wNEtc9ryGQrHz$+m*nK)+jiuAU`7#S9g#A*F)+WPPmk5afb<%xOk~<I?Q2=kH1dy
zKGu?_kE<v=qVm^s&g;s`%?$+ZA{O)yXgM5I1t@lqOUU=QhPE;ln|PA8!4~AO9H2ua
zAm9|$Umq2QL`Iqc<<KLvTEK{<i%CoCdBI*A&|)+P35GdfCr#oPG!JNd;G;AwIU<6$
zBEwp@JsMDpHD}AopcCGLf_B-SWQtYxaK)jj@@>WO8?LmFQ8id?)c20~+tt-HXRi!;
zKL%zU6ckLa&X4qvBlnQxVgdd!!BjGmh7kDxVdYu#hGpo2!3)+|b8l}HxD4pq2gYrY
znU<t5tRV>G8lgoY;YMVrjT<^IU;Y7wtX1zL3~LGw^&B^2=zNa3V0W+xx7>~Q+5QsL
z9}KI6U+hb%uM=t(glelrQ=DO;rzL*PR+La9;*mn$n!aL`!#`N=HinypX*7q+Q-{y&
z`idqdUsfw#s(mUe<HaEtloS`gb@x7NgINjG>{z)SC0OF$^=e7s_}TehXagDTfiP3*
zsA6z!5kMTWO+f2P&`luj|K#;uNyT*tF&oliJb=_302~n+#<S@f&D41#xSspLvatFc
z8GTd$ZD844R>Q1;fq|otV!#O`CciC+EekVoaPkrAZ!{nD{PEUA8@MEzbzTApe+K3U
zKo9Q^ykN(`tnY|p2%AraBo0V+ZD<#GT1mMf+ivX7<%E&Vk;(}hmf{#H9M`duN51bp
z4-_s?J`xUHlw)NMotJy(h2y0E?4&IU43RMQI!6A^BgItUzzF&eo@jeV2cB*VaB=hQ
zq$lFj^`|ZD6>t>7h}oCR>#rUI?ta%QL6js!TZjM>flkPxQ|0|n$&u%*);Dwk!g_@c
zurz+^%mKoKNdttDo9)Ep+#1>2-UYmP_PW!oRiyrtW|)2e&qf{V%gEVUJ%~kn;Mc&h
ze!c!-G&Sh$Rof>QMZ8K*U8Pr{%Qk6CEG^Z@F;MVhy@X6=kqG2C!uW#V?WxP=J*Nzh
z#s)Wt37Xql_Ws0ab6zc_OJF8k7)~vBS4cF4ga+UHmje<l7cx!rG<Vc*L!mH@$ix)#
z#A?|7U|Hd(ZnZSv8JJ5n$XNx<3JMB@goM$FF&*E(mwM`p;Cnh&PI1h8u6pF|bTlly
zP)y)>k1)6(?zFUIg0}EoxBB?tN>bsrT9l>xwK}2qW8VOqRrKG-u)ydIEn&o9B|_|u
zAKxVfmSaZbQ_vJ3^lJ_WUKsfpnVHdp@XUp%26i0`xJvf}ddLQ0$uT`O)YLvS&1HZp
zDJWouuhn|tPNA+>2_!w&Q6U7o2HqI5@(|=1nDroW70fMQ26%o187@S`V5UfA>CE`A
z?F}Ft7;^IN{iUuJ=b@33q$E;c`Uohks)9i?VAfFM+|blbv=GS{2y;XhCN^kM{nfQ6
zB9eQ(t{uQ6nl1$F-vQ`5U*zJzXjt=?d$;QLF_oB-f-*cehx9Joo{UV8y(XO;jfjYz
zvJUI((qQPsedFVu|2s;CKk1Vlp)D_uB@;zq3jG!dG5rFZ!1K{-X^jY<={=zW5^uJG
z0!JXLKx4<MoWEt<eK>)cuOC)4BArKIWn+_*mlp)P2C3E8{QQiJjCYXrHzo>B!!QNR
zg~8Yx%&J+~+R_6K_k~#|wF1QQ+s`5)Mqv=0ZuCbv5kO!TBPJ-7Ruis}&|{G2K!YpX
z4{|kkl~VZM*VotQy_n@*<S8>!jn@Vaq9lAo2;&$u83~_1^KPG3TwPuQ|89es1#Rsx
zz}`oXq}uOhfH@9>1;amey8<u>0@7CZnVFcVnVCa@6tc6uit7MJ7!6w6A*N5%mf|4g
z#rGA6&R|rob#tXH)OiC#XqPvioUus)Yhv^Cz-{a+yI-y)h+}**vhV_rz~(9)Hb``8
zR?*pXDxOJ6-GPcFRP(8_1=Aa{2|#gx<<N|ZZnGws$1X?!*$(Cw5WmCNiB;nVSK0M&
zvnnboV&Fm{A{(H1xT2TQu%E>+!Q~)us1dJiW|nLJg_ec}T)SOnenw{Itgl}~R?<8p
zffP5CT)Vou#(n-wUtj{?vU_quT%rj+o_+Nc__2LsV{-cXGu}kT=k?h(6BfE}tEVQb
zoWp=Y;$=Y7BnH3C2>P#H`ay-nAP<CPa49r~ln^j1=AbD-Bo>~<2m@Fu%Gv^CIKZYr
zpdd7y!XOZs1q2A6oDh(cD?s-E=4}E^->+D)JlS|_MpJk&9TYPFB{-Z3<!%_LHs~Uw
zPxk41qmJ8qk@o&F?{+R&oE!vh?8Dq}RvZr}r^+Z7_!eYjiy(Y}2NgLiNY80Np8|2?
z;NSpv15eRFW0|aQ@CW6tg5h?$Yq+N?h^?-plM=sk?H>O+R>(s~LqkSSj{=(c@US{O
zDsvlX_MProw|;hVkC9B)E+ceDu#e#VXbfkDhNM7n03$c(&yq4)mQ+&0g*i(<5rl{W
zhWh&T>*faQpeE{NidC_|{!`Yes*{Dx))AhDJJ;99tXk#o-TV{%F0T+$G%@jPwbL5w
zsE&h3-yQ}^Vd#%Bb{UZ!QDm<nMUe@J>*=LsJme<9^5bF=5O}cLCjzTEI5ELurnn{&
z1Rf3WPl+I$ke~dg)=A4n*FInq0J<QWlbTvf+TS<7@rjA~etLPz#lkUYFyZPS81Tzt
z{F0X!_U>z4AVPdEb;XCVxd~e&ef&t5|G;jtLJX2HWEevV2naT*|EYR(bOg4t=Y+(z
zx!R!g^}*kaehVur*miImqF}%g&I1hA94sf>g&w^X0zG?q`<Cnr1u1nT>_tT-jr{u*
z@3~?#hN@-wY$s(sJgTCT2iHtm!ym|}@Z+heF|o0M&-Bvi?W)v6a6JHwNWa$gTwI*Z
z*FJDN?&GtYZU!L^6b8Y`OU$Eo_h86URMmDGcdD9z!jw{KkOzF906R@Bt&m0oDsFC7
z!ruVp(1gCipe$II$D!TOHb77(3OX|<<vM_`3WW#5DsuqHum`F-ItVQyMN<hf6Y_?_
zqs<Z{*KA`da0@^lfxLMEGx2Rvlq;8K2Vkw~ou(}yCoMlfqNJc;GYbSlis8N{G6x>d
z8jLm_jp&~+yTH^bf~CT1Fww*hLO$W_8v{Sw0R}cU(M45%>e)HxYTt>k2VW#>7mTw2
z^Cw4)zyLjfUbhS$o=&CX2MFAJa%gmC?m58#@G5dszwgG~@&7j=lgw*l1ZxPY7DzFG
zI1ewc<J_M+>OXZlK+ix>yfAaUH3LI*j|2tJMT`^T<7FVfhNXiZj^4mh@x(=>eZj9N
z5)dR_04O?Gt!>u4gOih3&%i8C(o5u^ui!hk^WKBWO<I^S2D%fIoXk?E)H@y}bHQ~!
z?I4&6?2R+}u29NnFoI|yVFJ^Zy=Mi&G_Xryuc@f1--FAtR|m$z^(^i+(k?4LoFzms
zU2}cD0sfYXs_KD$r-JBD2wY)Kvm0309lIhuYT@Zc-gCi99kvw3yy$p6Z}-^-WKjQ-
z(CZ*UB0`=5jDYF}AJG)tU2s}LpZO2Hc?3uY`PDrlq7S*2Fg6pLn)>+X!Rni$A`dk5
z70BNcX+M0h?n?cORd3L1tE;B+>W+eE={Jsg*24nf+NaWmNCP~*vzq|jj^~G#Fo_^x
zZ~u!#k^)AqVm()S(-8!U20dRRYxQ9+!s&ytYdnJi@{0b*ea)*|l1H0BAz=5+w#I+^
zJ(n*0M`m}PBN3krPt@3!-z6X9QRurMA|eXG4q+>YHG=*Eeg#$>y#fvR%+UP}$|qo0
z3em_R8bFX!P%)Lj)PTHr?Rd@*ypJquJSy_Yy8-8X5D*wBsifzrdB0^O+N$8PV<kNJ
zC*C_tu`~eYo0OCjm>u9m{qNvh{`c=De*!RIp<e3gDMF?So&gv<aiHq(R&Ah`)zqMd
zBzJudk0^9aMP9qa(m4QP!)uzA4g@jFp0|U98yUJKh>r4&>7waFL}h+<CtNr<jm%l3
zON_94D>g%UwP4+f>!4dMhRcH#B6!St85%=#Yb!nhLDNC6D7%+&^;8c+i9;DBa+^o4
zs@Ke{ko+Daj{zzK4TKvahR>{8G+vVFh54ZkjhgyJ;OE07f|U$odNj(myvZ<h36_XG
z%vC}>+y;}UkO=_}BL+l)>?8Q6sBhZgp%Y?ONm3F8m>kcOCsP{XoV?F6FYY6?XYhN(
zD9}%`Decbn$0xhGy+#qKpY>`z53SPu7Z#J0lr#z>l>lW>_k;cY?+XeLfc@|xkb*_n
z4OXq<01%%2!^2Q{+`%CsFzyxwI|mjGjvHes?C1vtG8<wUZo2SK>59~q7X382O6hEV
z(NT3Zbwbkm#8c%XBb-|QnSwvro?2X7jE;_`I-~(X4+Bkbph2HOE*TF*kqZ=RzZ|#C
zI1ig%jV#RILlTJ?YebOUxDKau>rxd|ok5|E*n)f-WJf3uSUGVQm$Hh&){?^fugnEp
zulZz#`Qq=EbVvfHW>8EqINludt5O-{p)Q9h10YBiFm(^UIrz4q)LHR~AYuR&GB`35
zRsACZ<;s(XNbTqt&skvgX1#?JdLfJv{Q^H!<=OcMkvhS3P7EG}eLBM-_A-H}Thcon
zt?P-f<?S)FIyG)Fkmne1C4w_9ImiQc9T;3N*rcqluLK4Kn1wKFeMezX^5mf#CeaNH
zQTjum37sos%=)<4QlIb}NH`d}g)~U3&a3j**Q&G#zVJ`+*VzpB5p@Bfk7ljM6L4c8
zwoxI>@4apP=1o>XK`3N}Ft8j&C+GKM0URB~*aVCs0UX1CT#NTP&ujJBt%;|piijr|
z0D_=SNg~n?fsUVc^6qgHP=Vk7>zM|U@GMeVw;jyoLT%3P&Naaia)9SFcob-#K23&(
zhL}_b=;-L|%z_jHC{d#T|M9^S1!FiBMopa48G}b~x6vOzN`hR6yPZFSv<i=egmHX`
zM*)l@JYr%7=6JOsZsjR!-ndVnTEHL82ku*1N(hb~kPsk$3yy{luwgn8f;$MZlk(AH
z=s@)2G*0O#D^vZng!yTNCPR!HK)(R23!_$>jY*x3{5IISO2{FohQQg1UIej);rx%(
zASNT!Jv;;<$;~M(eP#c}7N&G}lOkon{({H@Mp7m>&KsH#1G%1_5al4v7QFyaVb!lo
z0VW1J2)RmD26dk)xCk)GFxQtU0m$R%$c32KcpGtOxSF*5H4CP7PDo=@V`ARZ$sy8=
z$TW%qTRn#C+V}Pku-=QIoLyu`4m#_6Hbv|;o!lR>;)yMI(j+cA`Wb*2+zhf%nJ*ku
z$m`>PdIKO*RrxrXb8vIV!F;pw>(|fS-777YhT^tikk9FjT&$#pMLs`{!U5gx;NT#L
z9+*82f#(1aV{CZ&>5zNf4>O&-&;@Ha{c6)3f@X)ukaxm_ANY6(?yRNtV79*lF{TAl
z3F;5HDB<K9l=vemDkPA^0FIRqR3of91YZC$(c0UK3qh}D8a<cN|IGrxRI1BAK3zfQ
z#9VjgBp6+X<Bm{DAP($dVvw@|a;6qP5yKiW{TtuQ1<JebYf?<W(GURm@+FORTEB$}
zOqwFxYS1u<=T=f~L_O^&7&O#YeCvBF%gcUF!>JuuW4+f9{fjo4KnESw<bh-Z-r?TL
z>OoQ)w$rn~2d(L=3qsuW4tki8k=*>;S??;SvuL9e0u-9=W23YWkHt+7cpkl+s67$#
zofIs$Q`2zPa4y^aBEn9qt7fiWe<7EX56|X+gp;JfgMo7>UAjbhME<dJtzB$_L9FLS
z4J4(z*vsb4oBiz<-imJ)K7)_=c_rO9t<C3LRlVklOT$FmAVQc!ut~z;liMo`mi_wc
zjr!w1zh#+VXuipT3?83yhewxcYVU<eRxBr3oi?C}G=MSJhH&*izC=6^1@jG2CC^;Q
z_{zMUoqw-5ltZY$Q%87fnYWu-Q}?cAgQBX6e(z?h^sV<uFt^D9p$Hek)(w`V`L0>y
z2Zcu7Pn{gn_ow>is{ZDr!u>AFt%TeT_>dJ0ba?2na=qYN1^jlK`#^j;hwB?`nFQs%
z_a4`)fnXOK3+oqnlzN_9PoO`73HXW66-3U#Hd_-10DI5AVhW!t8xfHT-@hbexh{il
zb{|EV197(MO|JW_nj9rNT)u;M9&J5obAaADS9h`96-qlZJS+`#`Z*H-)=Gag<ls86
z20ETwzY)3U<%;h$L!&`79B=#MG4r_RIj6fpqTm==xosA;DL?0gOb_w)uyVCepL?dE
z+(J%DP8J^5doBm~4nGKub~o*$CJ00eFR*KHoDj*5Fl|a4UJ~(B7^N;29|3bY8t5Bh
zQU%^vi29f*FXhk0^oa=6y8BvAJUXgBWb2`T(KFe^g%$Idh|yXK_eH!va~u%N2>=`&
z6Y2pr)p0ob=xI_b_~#0|6{6V63S%>ZVJC#?!NbF24-aysf1IYI_;}(wHe)Iz1j#Dm
z!7j+l^U^#p?JZki6XNf(*F+0@9`AOt6e??LcR7#fwn0d%#QZ5MFAuGW1164*ngX%E
zyyLDKw~V}ngVLn-Z)n)PR9a2#(}^_xtUIsf=OwMI`rSWwryrWb+4B+GiMpH;Dq7Zf
z-T||IA(lyc0iR8{)}$Io!SI7%6h-I5aP{;N+h;c2=MZNiEJ?t!Nso;mDIV)z>Eh(r
zzjIWi9gSNlrhA@W(F#w)kT?a*L0-U7GEAZ$(RquFaFh{`;(UD-3V_2_cWT65v)+L|
zQC7KCHUW#LxBAUZND&4J9zTAJuvZb}7AcL(==$=^dZH`>wg#RKX@O@RI>9pi#To`3
zmA&*_0#{fP252BV_yWwdzh7m?o4H#CXsa2MSXO_&%ufA|_l_|vhR|L!LE7f}pS$cS
zT$J>%s%kLlm6zg(36vmhxHz0E_bzNC!rz1umm?frrbi)?e(<62XhpDeHWU?*`puSt
ziORPOMVjFjJe|N~EZ@9I8W<mr66s0gX4u$G?NILYrgMPSOW~jWWTmptPm=HRK|-`_
zTg6DgGp@u#ikJU_a((&~f|vxxdhge|9L<nk2YFugJ;%DL_VFsaS9+{h-;s5&)+<P#
zohH-ZsjNwG&tp2Pf3-uw{vx{C<+qv=+o31<x$BRV&c|J4HsF*hKX*q-n)WZV@jUqc
zfaS*P5k7_z(md6#<E`A|<7D)j%Dd)6*v)m<z4=-S=o38`_A4F1;<)M?2hW30I)rC_
zBe|NGG(YWvr790{W~B1nv`J-{5m@6#%)Whj$pi&ljB?GueOS3l8P>5)_ah(1gsCZ_
zU{dZAQGwD7Xk3Yf9<Hs9oE_fBNc{&pq72TX39t>UT?i_n%(&=ol={70m>?{Xy{njn
zg5vmNt>FDblNC$mPk%VDqGGyJ*=;J#lIQ~sqvh#C9;yBl+Cwj~tw(#;?CTqUicN{k
zS)z}u@=iH){p24l+Iw$~3ah7z5(GwmC3G`hXEUe=d4mfHig_U|MvO~=vVGGIT3a0-
z7t+(=@P5jC=j8Qqy^Vt2YekrYGO+I8?fj0OTy#<S3OF?AAH?C2m9$rL4D=G)HEShI
zl!p`iu;q{?D*al)y~C7a0zs1rmz=3-PM%Ij4)#H}RcA3rAnMnvc;DvJeqY1%>geOb
zYX(+bX*IgoBeMnh04^-QhiSAPLr3@g7KQaq4%2)kn`Odsa~xN?ZhDMt1#4vvh2aT`
zuiw8Pu8P(2!S*3(B;=hKu-W9VD2Z;TKJ`5p32!k1=Mt~u??^{ouj9z3iE9jnD^t!%
zWNbZaS24^fF%V+nMM&3H9?q!T%0G?w#PB)rjD9!wq@LP&&GVPEOk0majw~I1vk@n>
zyBGLb>A~%z0jk;>QJPq9aGQ@e|9rK2W-{zix3|}uJWV;S+Mtx}86zSe*QcHQ)%(QL
zkoSwIy|DMqv8s3ozsd55(t=f*_TcIB%FIUuApz!^s=t(88f{wUte2Eim)W*e?K?_e
z_e${pJ|dW2;>JGfNqoipiSLI<;iaC_^;xGB+vEJD2R@ab)bHKa;=;3O{^HIhtj};D
z;y<wbVusJ7?y@E^joq8=_~^*psa~D&e21}HqG8nQ+?46%^Bs|Y#RN7;97G=h2|2eo
zAsg7)W3MNcWh9*6Ea^R_cs%4M^$tv1wwhh>9lq2RdxxwoY+qkY-G71+_lMVdUpvI^
zl)MbJ=ikJ$S$b=V^&xmeyc7E|8S8Q?(p5txg`KZRxAIa?_VxD(q7ccNyXz(=qy6pV
zu8;Y9;}x})?pGxo)j%uBcvbeo$5w<XM}}l|r}THg0>1%CfH*Z4ceJrG@4-Slg`B)o
z*;=)vZ_fu)EvN=z{VJVQrMsk<sbS*iun8s86iiK0bwvpl(}%kHT3T!Fw@>dVRJumj
zd)1?nMd$pwRoo-j&2zEx_=G$r4(|+}c<@^K>D#?2?~=AWtyOujBFbPhL2r=6^GY2L
z9=v_5a$~nFj$VFMx0RJ=ly#J_Q|^W7k*Xe)`>n9>q_rwKXDXmH^B&4=<S%LZcd@u<
zdkm%DTPNk3N9c(Vm&81^^C_J1vV?ae!Scwox?1?^8zr3$#bDOGCRsW+T6dQQt4Y@$
z2i;VK&+5Lq8c`?1yOg4ptbHwmFfxW{nv0JUuHL*v&%a^o@#w}s)z7tr=~diWCXvgf
z`w|oJ?!|xFbxS^M)1<bFrdO_H=3X77nofJgU^sk?Q=&pgy}do$xyU40pOV6u@$Qwe
z-dA_uo~FB2RZYJ7?rYct67f0DEEXCh$+!+S^7h`k+r9&b9~O~e#<*bDSLsWw7V}n2
zm&8g>JY{96!S;jIbapB$a*G!8M24=`xgL!=*ZbH_yEUa(#wk%{CoA2LHR^3#=$2VF
zKC<hFn##)%7H8~H8dTO_gq$~FtIo|3GYm|(Z^b`caSYe&im&TinM8}&&*=^N`kLxf
zY_C%O$2W;CtmAo^6Pk=Ok~7urV!?Z#<IY4bHWFEDDcd}RCWUS>MQzuRcbz$s-67PA
zdbGuzqa3*q_wKCqF~dwh8Sah1Q}UBUMO8-cOfLG35ff8|k++=H`o(lI)HlhRGT*#O
z<GLp#w|2SfT9)^2mX)~268W`~#LsfM0tt>bWQW~(dL&cnDRu6IR8uf?#Z|XCT5{iE
zoN(!t)=!&sj68OG*WmE|@0iNCmx5;cOGaOZ*9I?}PtA$B_sBU_1lYE8>AL5#AAds1
zN%_vdGB_vyZ}()wV)!lp8BV%7Yua&pfsDX|h0vnjvaQ)s5z<AD^Cv@I+XLm?u49IM
z6wgg9Wr&O~)_2GGexLaj@y*~QtEYUS@U1fzd&;szcI`!;<E1K0>|8%t?e2HxNIZO6
z<4#B$H>bH>(%}}LJucFC%K4VdJZ8r>y;G#b2CY#4Wz|Ybd`B&T368Lvq??8JXAg<5
zT;_?SBO?PRIoldO?mf-!FY+v-9@*?@FyH4szOK4FWB4Yspcx*dT^E<Rb-?mB%H-mC
zcf2S4au?s8*V4|2iR-B5b;m~HckfEZRDb5|@VI-bl?~>Yr5rgWC1K#y$CS@ZzwR?)
zqWmA8&N3{ju3zJHNlQwXba%Iu)PMp40@BjmAt~M6Ez%*~U4noJ(jC&>aMrx<Ima(P
zmlqEMGkfo~)*ZjU^}-c-HEVF{<qk7q(Pjd~@x)NrYF2Ep!A7|FcNJu%vzleU`tF^g
z4@7Z(K^TPgL@O#<BmU*su4vx7u$(C{{%!YS2ez{LO2?x{dgCYjoED!VCdKJJ=ix^#
z^guG<4?3Ms9+>3T%wZfqXH~AX&yJf<cM`|7%8P=Iv=}$QR>-ciJs1QC<F2m8<L`?0
znP=h)e)|?dO5^uAl@n$^-oKm>y{6XJU!uFVV#4}IrXu#NtMjruisBhL+v`Xc_?k)(
zj@Jppj6c|u6v^tmqc=cSheYK0;rK<1?LAe{ed@X9JparoK*4-O`SEE@AYAL?-=R_V
z&Np0w9-R-rHnh<%+!N=;3>9w+@jz}E3G8<_`@`4>JV^&>RC#Pvk8%}XVRf}mEA6FN
zzAD!_X8{r^TPa(S=GxuGPC09t2;Em-o>shF@BRb)tf$hFKhWk@oT4&B>&V^|L9wV$
z=sHG@k%8BMD{XVH#BWg7&ZfUsu>iSPie&Ffnb&}EgDNN&R01yd4Gb3=$Pg<QBP<P~
ziMwtXvp;oSc*PWLzLfg4vNG{+W*@ZlClcMUCwc^q)5qTbCR6ED%qtDTc6Pfy8{@VD
zTQxt0OXVatM&1?8JGvqeTTLj*`KV-WKAX5*ruo6=JC848TLg9hC?A|%Ok3?8Q)Ctz
z{M{dGV8I<Wo~<gKy0L4&<lFxvs_NxsuF$rPlXv(odx^2ulv`ts?(OvE<?M>#Qp*l#
zxw+s|-maF(HCAcPGw|n(Fo@LNZG0ZAJyZT+P}No&_rog3*wC={GBwJCO9$ekL5x+T
zsUQ+T0s0h7;9@ZbR0PFGrgw31^0eXPSbm*~UrcpCrJF*h-P33I-h=58P9)o$X+zz*
zLx#HBs_GHgui30&et&o@OE*xAMgE!`j^_Ipe`?lwLrgF9FyHsm)bECs@S-CHVn~Dp
z8Ofi3qgotPy}!Fd3GE%r;II0oB!$vFG=u`Gw%`~Fl0LeS4j>KZPcnd1N*YH_BK1qG
zsV)x1eJvJ(0+6Z)3M_yF=tJ(owg@OFsr5e1Kw!QE^nk-)c81qE!gjcf{e_R`ja9|P
z12612i7z%Xz`k#p#=ieTNpToR>7Zsn_lKX)uK!|mrBj;)+SvrDFp$YGgIHw(oeW6!
zrBziqJnfwZ;D?Hgj7<E^?$)uHc%{QaKtm7#UD+qvKQtA8&#ibD4AVDv-33HRpt1w?
zfA)n9pL!?QuPeMxpo<D!U^iAgpIKQ^rH;$Z%j<q&%0pb32vRXzAhx5Sp`rfx4?tG3
zRHlKzbg{rE)#NprRkeO!G{~8y!VNx){C#lK@BX5SAEu3pib}W|jRBe5H*#|Ap=$K|
zDVB<fQ$F!1oVUJw=z$>pkO%>1$g<rl1;)Ljqc@DQ#>U2zr{zC>jn}dOku&^#(A`c&
zsvx&Av}Q1&+&UM!SCj~H6jVipH8l=X{OJ$3AU_d=9RLg$LT7f06?U#`ZOe<64FM3I
ziBl;_6Zd;6jQnau1k{_CXu+wXUapgNIjB@nZV4XRWU9wz%bfQSId>a(ZR)8TX(%wL
zjEJDgqT!>I*k$6>QiMVY%9=wS@Rl+ZrHcFcsvlYb+7EDC2L=Z6g9eQs<Ck>|tUD7S
zAefFDCIi$6Qt=7JEoWuQ!mgSmesUm1u->7oyiNd>JDQ+eApCx7)gQF^v$KwXpWB!E
zOOH|hwFbjrH;d!re1kGDY9}ptp55^zgtCgkdDLD2yyMeBmY2Sc#~k3{;%!eF3f{CB
zS7yE(n*{)AQP<Mi2^M5RN;U=Hg`>$&{SrCnMT?Vr&gtOGpwg*@#n3N&|7a|_0Sh8O
zofe=~xO{91|CGDWY%1csBwNv+=?#t{;M4Ww>Byq5GRM!h%sJ8m6^qHsz72=}>MMnw
zdfFX5w+|v|iPW<U-aWo9Zf+VtB87gHTgA5Ig|})ldz}0);5Gu)6vWwNuSb{6&4$l;
zK}b?nRaL#-$F>7p1fb0hirlP>E1*RWF8+k==^d}hM_l93^^;%NS%-&XrB(E=qEzPj
z^bJ@DjDcJT4zcWoOD4bCMu81Mz|QI6b0><?K9O1<1MUrA%!2dcTLDtG76Wi|fXz7Q
zc%~$<4>H4j8LO9^{&Z)wHD%##e6ToiqV^OWG4>c`UvXz#Lj=GtEP74V79S)Sw=S)~
z*>|}$6LMpffHL6DAJx$zwtgxA#1FYBq^l3Hc{4T(X5h4$Fm<EI2=3jl{hxqf_RWH-
z5fTpKv2WhI(QdLgLhW&g%M*R8cuIMGZ3SARveSzpM!=a4!wx7eE>`d$9CT>&P2wW=
z?07rPP->va2<=$2L;y9Eu$>%MNLb&vAC&3|l*=-T6}-HJyAACj%ccjm$#lajhLH`b
zCDSY)+nNf!!TSvZPRXhQPa9H(?cnTAr?Zf(J`jdUwJ0|O>Kbz`KX{J6?xFa&V_{%;
z_`nm9MFE`#U=xgiHYEFE*;(%c6#}@DWgDe;NJvQm0?A7edi(GgeV{V581iYm`juzP
zQs(|b#ZMGuL}VLP#a*@D-k;47M)ZRlhJh&tn?pn-91>nxDP0IjuxfvV-VDqwK2P5L
z4HG%+1QGc9IjwBQ0@2_?7N();m7U0_R27H&pm?uXqn7%)^X?7_s0x9|SX=@E97Vo+
zu0jyVU#G98r3h;%g~-TrEj}dg2dQKRqI7pifMxmJ?{2aQG9Zm2X=g_PG!1CQZFW{y
zQ_SxkDDEX9PeoT{?)f?#JUo%6f1!DfqoeifI@=Y{Kpf5%OV?qNU8_9bbMm2<X9Q0+
z6e_4?a(Gyj17Eo#Z}|vUoM{IVthB|soqn;-Li!h3qDh_)w_y1%Nvr~YXp0@(=R{vL
z+AwDmsE=R}exLQ@+`BAD038>Ne_D2Sb_%z8N(($7G~3G&PRO+Dy8Xu-8IHxmY6A~d
z;FnKzl&D>MFzr1Vq4G1{G&`$5v#dqQ1n3PlB=Hy|n*xn%m2sqP1tt&g7@;T4w9a&a
zZ;CgR@*LZ-<21EgOq3ivn`Mg#dz4#EN8h6toZP*GY3QH2a(z|i&vD`AVsAf6Fh<S#
z^U@t16LV_jUE55PpAVk2@jveRs;U&Q8H3cDXI7fKyU)E$PE4-?EUHcK8tpOTs|ZQz
zHBV%42SXd+Zxy{vF3Spfed^uZzdGG}hP@9+YM7gD3H%a=FjaHD%^W=No96^96hOhD
zyBu{O1&c)3pqx^r>hw)R;@IE1P&XJ&Nk2D!T|T?lvGVRcsy>)+e;5><#GE!+%`E(f
z1+%&1+HnW6yxPuU;!!$dH^R{xxHhGnTwG`jSH~*`xD(n;tQi?q+S!x_9pDpwo4(wh
zLFRfmugsuHX(r|mVL3c>ndlX`xVq-%;24q*tDlS`ih)>o+e88Z8Wwz9;oH1QKYq{d
zoB%>j!zX-gb>w0gb3W^pmLwUMsB6Qfajg+|3*xV)Iw1TW8l7$X&+X7ePy+jnF}$_s
z28I+(?=o@j>?2S;mHwrWV=b}M(Z-EGX6BIgqey$3e6`VSjQ#$7*b`w%bs=S{DlQJk
zdg0S-WtJ?-&dok^h}KknAS4`Q2@^%gHHP_xgu)@1;F4heV2`D-0sF`!!?*!^a<yQW
zLfZsf_yGsf25XN9h?ISD&;4y)LBreN*`RfIa%E(J=w9&nim2@EtX%fJKg>VBUEtU0
z9B?28Ml2nM46$kEF1C~!h-4WN1@*2GSHYap5TRg4H4zX@hNIMxkoce0$kR(jBk!nB
z>`Vyjh#>~`;(#5;mM`0+tVXQqw?H`3+2cP-DRh~seNqf7$1i<yO5P2<71n$c`w~;m
z!kX0Ugv&q>ZcB7m`{Fm+m$A*>GABKYse%weCo>wE>7Yj@o@IzGJ-)HcK6_ozRjGrY
zc_aPG17i{3c&T(Z>{k`4c`W2}Z3!vnB}$l*JHM1_iHUy|FV7H(A4ZudTB)qjEwgbj
z2prY6`g!3k`+ws<4XQ<`Buq(^@e``%&InmQpN_EbboPHpPnzoRzE!NS%fD<g8U9pN
z9yZUdejlD@tbZ(Hx_K{peLp;(bnnp}fTjv_$j(l27NwDQ^)}IsiNVE}kj+{Wg)FA^
zMv%cZ`?Zxc<9$nFeMxr9P+8-gmvjDEmc%d9Xzp=zGY)xLQpU7D0(zOgcclqemei5D
zF;bMI$eq3K1xN=XWw6scBO$_jtxORkOon^H$uM{ArKrOGwbQA2l!X!u*OPz0hojUE
zVXO2{dEL(IeCR~@!8?ODQ$_kMQ-~)jncb8PLyJjK^aU?<9LnWo<&M<Rae~fm3&Ahk
zUPYR>y-QkB*ytg^3XmTh2`|kOChs@=eok8Pm7^?g93w#kE$aAQ7iNS5C73%vYVe0^
z1&SQU>$tE)Pjj;$MgdoEr~#VeAAM12xDHnTs`i$H=z^K)W!dgoQhpyjY)M-sPiKPg
z`iA@WG0B%ea|!jO*9%1}H1c%QpvBE=`Hp5()P9B)YQ}0Rs*=cleXsY*FW=G7asFH3
z6AUI=9?iM{Hd-i7npf4_9Z#)g|Lw9{v=R2yM|!j%BQ+dM9Q%Vd9!o|dw{1lQ0aDjn
zeniMCP0b)wO&M%o_OUThK6M*nCjK(al=mzm21Vx<aMGJ=x`XLDT#6m^2$`NEaQsye
zI#k++!`~ZpXp2+9XGtGaC$ZJ^-|cN{sSMZ4ab9|Gk?Q@$B>uaiIWC*Wt@1}DrkHwP
zx#QFR<P52b*p^R<{*(aXkSw9T=BIb2i4N-<AWa3_$54=MB|k1E#bGG2u(P81u+YU|
zq}VoSj*uAet@In3fCld33s_WLj@bAibXZ(IydRRtai}wEvsduMT9*$oP4f+h%>~=O
zIT`GYdP~_&ti<wYp98|trhR1WsUxswKlGdR#rHvqwlt!ZSy+jG=((8}QNB{sOf!BI
z|M-hQ;C4VmknfREVTxeTL|+)~b0DUXv<H=*^Sxu<g6^){Gz<cI5rTB-LyQJ9q6<~1
zyaeoP(naJr26V~lj=1x2QREUMzUYaT2E4|(@(z5W9hxN7s`JGw%h8y{)2Y413AAZ7
za$YMzHs04kCsO0Pj*g=fHU_pn&3kBD$hH5N6TxQRsWa!GjzS7W6}773&ME1vD4AQQ
zL+U(1H(IRjA?KVC_w4cG?!DCQ*^Guo7U8^I=ydj{d0t|)B;5nwpNDRFcl0DV!vn2z
zj%P_JRhd%1l3n}9&kHx&jT5eS+WI5XuE$@by~^<QUX2mW;f>x)ybmxoQ^;{UYW~Hs
zKoS$JK8&D3JV*owGJ$rB2nyJIzsC#$PNSXT{;V-#l`!*EXuovUvFpSgc=HQCMMhcH
z<(DLNEFICiW}YnJ(-0$fw{Lzv+R5+ml=tMQ=3PjU?v1WUcY{}_P>uzh7{@J-DYF{(
zlt<!&r>9Xzux0L$mau$&{rWBQ_}9D{txUnf<<W<cNA&GDH=eF-o^$i%9io;_086pi
zqUN{k;atm#PqRfz`vSBq-4b_m<=jQNOepWVPeleK6yvLd2aS{aUJ7ECXT$p(4bqjc
zNoKlLDvc22W`Vc#EArX&@BU=1uX3yn%ZqnZ;x?JOM<(^fS~-?29}gWQzD&aJWV*4b
z=t~n+6>#GGJbL!gd$yl|g~3&Or#lD=yvp*@Du^E+y}rk;G4xRpRpeO?H_y3X=#Wz;
zbgQdTIHD-NtG~9csn663-JrDJjhyg?H(dDAjHAWIlS(l7_PxDGp1s1rU97YOLksR3
zMxkWl7l=enT(30DI-9w9OXb1ZVDbxnAPjCceqgPudOb{3VNB%1V_I|~uHK*6?ZptD
z5bPj~;G$Yl%0d|Frj@^#I<+{c2c&#AGf&t!(ngHkX}knw+jOWw1wp+=?C4+2V5IWY
zmWe9CDFPfcd0x@shPkiM<D#yg-ZXiaYK}N<%mmO%_awt63}PWP*9iWcYT@ZNe7fTS
zN4~Hdl#IK8@*@=cUJ-r+O1$Fy{7*gJr8g)fEz)g&tZ=cz>v8oKW4CJu@Ve#ecT+3n
z;D=4(ft!&(dEiQmi+ZrhF!3YUL)eTo!G+8Xfqz1^rh1+luac4!lN&U>uO<J=(@N3e
z4&S#j8d(|mH>fMPDgMfeuS|MFpr`b$*(mh$p8Z~_dioEaK-E#O94j)#oe(91c38Y#
zFfZ7y%a=^c6imkJeRFvRO?vuBoOF3wB4T1dJm3R2`Q>}y3!o!~4Q_xR1stC^loQ-V
zq7ziA^SJw}>9jvsP3l>;dJ(mGx(GHuLf7*K8*t?BHssi^>&$7P0FIo_VZ`9Za>0NU
zN)_qz@u_<zK(3+pp3LLK9-+V2oT0`D1ive8SWt?}&h+UYm&fM!$Kb#Oj9*Z+iQZ5H
z%Jrox<5m(lUd1F%m91d;2@2)d#&kZ(P_HEw<n1GBNvMPPATSTzi!FE*DX}RDBDV0$
z_dPG0UcgF4CT*~<<fvY2Jb%>C2uzHJ5?4m^D=Td?ITRR^8lP{Hq+R|m3*dV*Iyu(b
zX>fbDW|tD}RCQk^`i8z2)H;`s!KR)b|II%&v}99{(=(SxU_+9(=j-pYj^^j(8R-dn
zdOK2hCP9)lqdP^O-HAg0Sky)RVtREoH--G9$7SZvA5SV_>xE*dcyP*P<mK&Dj8~~<
z6!Oc#lav>|Hy218kA^!apYXJ!dxw5uCl|OIDj~XT$G!YRim5pOzu9ZUs8R+a3`26Q
z+jf$y%%Hzmu+NQQNvHkZ=G<JFvC!Y2v`(B<QK91pO%ELg!Lf}Z4j0{w=f?;};Vd)Y
zwJqe_)h+X<zs5ic1d_YwEu<8WhK2@llvSoRI&kIRdwIwn-f13{X9%fpHNDL5)Y7&f
zN*f}*dj9?y7%KVKxP`14Ow*J=%>c(&aH-{I@v?p&6uzzt8PB=LRI?Sr_(^gzN(M|i
zQcUxo8b{aZ@275^1wTx#F~|!C<mRC$$O~8Ir7#YP42H+Ge@>T~u1~0_(hKTk!^#Jn
zx)xx;G9{U~jctYnN-{-v3n>ZN_XohFOi&a&#t-A|^>MJ*htBa>oGlOV5RNJ0lz*ev
z+-!vtP)W8LKG{Wq9ew@j)$+sE8DIK#`M05M&WjrhaIpe`MKoP2#ty(D32e%yXMX?2
zMnM1yC5UVS0#=snXmoUR{M*;<4=p0f89UT&VlPfHo40naEIw!(gSTvp?)!I?t<RT9
z($wViI57nk7d#UHQzTqTDvqHI4)m1RCmpjQy(bo=oFYNj`O0S8yQku~*@sV8bT1iu
z_7JEML}>^*=aN5FbA@RF%mGcqsvoNgN2`=NP6bQ&>~wu4d1zR!2Yj!>*=_?`eDsig
zKKD!b<jrXAHxB;d&WM&|TxXsQneu8V-<A7WBVO_Zk&>^xm+YiF8zNF+VL+IH4aI6G
z{Psey@PHF5t`@E^k}Db>93QYGltxn{)&V>pbP(5oV<q<a5)%XCl;7{1-}~u2r=+qt
zy|BU}LO94Q<ZZ4DL$)pdjecn1fF_R|{?6|YES2?o71jQKr~-dJ!V<{YjAtYb)GmF&
z_KyuW`C!j+q-G<b$haM<7lWn@V`)Wr`02xtRA0_G7FikpES#cW&K7w&-f6wb@i*$Y
z+IIK+m(l(Zfrg**^G1Gpaq_|fDsi`@8Eb{ogWsgpq(;=y-wDgkc-7|((l?T|EJ>q=
zMmJH~qyM>#t&lIWAvN%Nc%Syvuy_0WaN>@v|EL?r>K*|f7vXkb?&k_|Xm;Z27=Z=(
z{d{2OLvLD{)PBR9cYk?Z*p=*&`>ps3)AJ8ihHg?E%->SAj5Y+Qj|B*s3>sCR^%?X#
z`1VhK4q|U@Pv;mMpiPOvQRpE3z)9lIvJK5{^&WB4&v`x=u2>Pjx*bXTw$$<@WuD&Y
z?f2Mx$Y}C+?|I3=ItY$&FhzqQ$1l@yA&Xr#&tYtq-LGZ%Zmmk%3pXi1%1L+3_gvRl
zg~#^kiSV-*=LWZqVDp}{xFTv%{^&S=_LEPELtzD12}wPjAE%n4k@<k}$~S4I++t)a
z=B`?@^7;T62G~>D`WPV@6*VkuwFfaO_ab`v-)f8PEGi<>y2UpA**Kxx1$@aiFBv9t
z`DQ<fQB<g>@-e;5QL&^zy*9EPtWnUxM1F(svdyF8MkgH?9}u^h+B!bh6zMU)Nuu@K
z%!6Z-?j=Up%e}ioU+j@pUN^l$?nhRv**1m0x;IYAiWBOA6PjI^>oT+|{nHX$20e_D
z^?U5XD?*vTld8AxaArUz&2awU$Wi%?Gx|wLX)ZiY^T_Al>KvamV`yLjW&vZcjV@iN
z+c48~>tJNSxd^ec-scvnogvN9QC4&6JN1+e{+5$zjOM*FAMvZ!o?099wGXGlm;#@@
zJ;i@!+dX=;`5V)`_|+yig3TAfr$ua<3N!j;IVgY7K34Wj4{o;+Z?8VkhLF6(+lC>y
z(TNn<Xn{0bxQ~o8@WEnG+UUHCK+7Y=5kckS$9tK;d{qYDx)sV#L<i*W3$di*_?>ch
zm6)D({3Vqo9N#(N+U)7Q_fJ$G;Cv&LwN3G~MgeroO~5t*EB(pMMvN2S44i|%Dc&K<
z1CIg9&;c6uUCXPWLnb)IXkt<6T>FaC7L9;$JnLOv`BKiZ^jsKe2Bi2S80~1Khgk&D
zS3Tj{irDeQsyOwhyKxld-ZC>c$9ixRc*Kax3R|sCdXVff@piGou832SrS?2$znK0O
zK3U&_brvITA={I_*dF}^Gn_-r;hu93+TrO9!P(&bacqvXQ=vab{Kb5)0}eR@weIOC
zro#U)COWZfZ`kiM(D@Dd->*bZS)z8hS9ZtG_kO>tUf@}p;1hSm{}5`N<<4{G!!kP=
z_O<kC1shIKndMbdN2rTdP)ANbs*>C1!lYbw)*FMWcV}M)cxGAUad;M6TiIDX+Gh)?
zsGD~=#@qI4<#;Lt5~K}@Wq(-&$9fT{CU46p3-8%U7(i;><ZJEvFnk<2*#1(a$Wt12
zP0YiKG>H$*)J=Q4JH)Fi;XQve3`-pm=!3*->v6@BZwyEC)vNiC21(rcioa48BzIn}
z5mDl4cf3KAlwi2iKvG!1u>4KJ-_-U&Es180r9LHkhj>0amPJuvLR;1BXaK@;9yLbI
z@`JGGVqwdO*9La_b<Z|l93&mt$yAsg-WtIUJ(WRk`BtW+lgFCQo!JaK&R#Nb%R?X;
zpAwv#B7sTN(lYO4`nYuLJNT1-Wegf_vu!(l(>2r3_~QpoL_aim3u;<}4p6zA2p!tL
zA~w(L28}9c3be5i=$hKqhF^A){|R7%2B#rNa{$6Hd!{T9W#_-!7x!*7a}zdL+3;sq
z&%I2s7RQzm=Of_D?NOUU>!BZ1OL9o0^fyw9b(ILA;<Aj!eIY)~nGn*2_ma6%g^d9Y
z3E>NqH0Q_#&U~`^8)8#Zwt9%6io|3>Y}+C+?V&^ks!c(>jkM%=RkB|Fu6tvB<V*Yz
zH3W>=q5b=NX&%Hxdh5#<Pt>?lu(EDaXN%p~FUoAp<PI!1ex470Ot?HWLZ|vVv^Vvg
zE1G{jnWV=?U(Vl#Ccd6#jxh9`5bb+-%)Hm)FQHEye!nR>qPEb{_{Wly2#Ul%h$%Zz
zU*u_`gmYJlOj3dhmr}RiA*KgftYzEQ2*XC-8&Hvg0xFLISR7Q>J9?4DZzQ{15cL~v
z$tXaO5ZbEDxg9f~>+}-`QuLzhf{pjxbdDLQGK0nXyj0aEe8T6*Nzc9;QP1acM&$Ch
zMz-$$OI49>-dVj~9rLVy(d)%t5bKTbjl3`kWX$dHM?+QbHATm5pcKW>7saP6x_>=1
zoZp>O8Vg5Vyk1lZBff}V?2>EkWZxH8W5QWd^Qn8tJw2?}pJGS^=>R2LjA(PmOUk*}
zykX0p*ciLR&oogz#Ovu;Ro-_R-_WH|L59gBWE-7S&dAmyC;MiO&+u%tor=#H*#hkw
zDxx8z>#1oo)CRU?V>%+YmMm^<xcFV&h(NSj7sWl?gPT>*A0zlt93qJyd1fOT29=W&
zJUcI@Fx!6PuObUm(~jO(MG{9AvdRk{b^h<V4ib|9rMA&_KE45zvz@w}$MS2UL(qix
zc)nkm0CBV+n*~6AJ*}|wiO^;l0CB(xpa#eQ<s71z?fH*!Txg#K=z`719rrh@$l_3l
z0N@;~6=&G4LqSTS!6V!gBE6tw6cmG-!WptRLi&cbjhOpZ-z%0i&@}jub1>W$CW**2
zxyj(yLS{Po`Dq7P{16!0@6NNI^~~<HIjE3M2K>7<kOTXsK7Bx&T6x8xvw%}5PB$t;
z*c4L(XNR6{y~|Dfqa-K4IkHp|md3JM-t)D10x%cl2txe7YEBOn%sC<ySV}J9PS3mN
zM)pdpN}WMEhbz@}5-+Q$a;H+Wkx=EB7k{rwI38FmaSa&%#wC5L2>JY4*ID(rzaViR
zb}BiunMUsoajZ@5yN^G??MzA;wCeY?<99P=F4nsdBR&o)D(x$-2XnAp%k5sk2jvCz
zj77ia$87UKz<ja=p5=Z~CkU9rG=W%Td#!=z9vy%Si-191I<2m({EPPRpxSN-To^+R
zfh~bGQ&K;K;7|9ln`z@Pt)J*T@xJzI9VDVYe##RNp!TR0mA9LUIu)7@sT8N%{Ynz4
zUG&47gLU_-67JO`6=x99J^dF}X{4A&WMZb8tU)wk<hR*-XdJ0xH9aDTg(M%1zxUv3
zaL`=K5(mfCBR;QOaXg!Q1=Wj+;>Xidk7V<k;=PdWicakow~0BKTgGlz=$80iUM+${
z#pP!GSNCB^I-lsR^!>N)U7jLQRTKAPh+4;WG*h3pz#!L%fV$dye^N=1bU!)i#Y&K;
zjds%*5$F+^iZS`y@YCRF%iwH_-7jswvNgKOdy5&ki?C4eSY`9}E3m_y0Bz%MRaHBK
z;!ia@RO<qm{w00C!f_tk6XQX_vlgU&!Ku=HzoJpQ-U0_2^o^76Q!*GB3uatwR8XXn
zr;V*PnhW@iC!kM7SuwC&ms**s!js-`C$cTo73tWOctkU;QSq$01nb+zKGd-`B@(s#
z594HL85{XcG-g<wqoDw6byiJNG5+*i_)OxF?@zL*wDM5CqV*q@l>75i7qlh&Lot$!
zZtMv3b6BvYDCD8BJd%4I7*6?J2yk8B5DQElH&VOaA8BLh!qu^iuo>f`P}1gdGv2Np
zagcbybKi-9G>0*UMpv<9AjJV@on7HUl)=F17dSY_PxB3tTq8pR>M!J%fs_xJO(Wp3
z<FO;WGR)ezC<P+C4L0YOReo6jF_H#uscEHWko~L%t;VOjs^@f&?9BE)Qvi0_7YTo`
zGtang+YtrW-^em3Dof;Ux4?Vtg+VUpPuS@KUj)kQz6F_LfQBfzegyU9A@FU7OT-^7
zv)$KG6CvKa(iCuAwJhY`Cz5PUt8mbGE0m&p3mE5EVo#?r(^FIN?*@TS<@bE;7lR$3
zYuk)C=y3CR>^Gt6cc%d^_)%!I6=aIni&k1(n4!>T;KJ$JwI@Ln#rKDxH2$zjFKz^Q
zz7MxK&w|h=c)MH~*`N_9U_6|qbb>H`@Pch?6iD1|9bN+%g!^t@C`=OoQ43x#I<>DP
zLgpHNPOFOl1o7FX)qodg$woQ^6XMSz&@Q_+V3sv5xsm{HarqcZ^oG7Z2&6z?AG|xl
zc7i}}0enCx8P|T^vg9-_#ybe4p|_NEQLw~rD1n)-=eFdM%>~{A-`i0GXcK^d{o~km
z=d(}8NiaJH@ZQTie8Hz<1X`-lLIlO*uAH9V<7KAnyd@M+49d%f>90W+TZyeX2n4y^
z(PBVw0Nn5ZoyS~Sk<i^>11;x(5V*nPKP?@AmN2-VpS`(i?EDE0CU;yEv7b^rpHhGT
zL=5mr6CpsY)BGE~VGp9&XV6a`aQsvRx9~pI5ET>@fCM;hSRanoZYGPsM}lOCKjUvB
z%W@h>&6S!zZ`SK$eeBiw?^9>U2-XLvCsR}jF<G{bMi(5foOsk57?2k^=N9d1OSR4j
zl@OKb#`z_BaGgtNOL6f(GLp2!$0s_Om`lMV!?U|0dDJKyMuf4BmD~%(t_v^>Mq{JH
z>LMv$715gWzDSRD3w$j(rzTP>{B};Q+@k&)mau@>7lV-y^Z5YhL41BTdGeS0lwX*e
zKF;D8&f%74X1u12v3dCHr;P(EC*#Z(TxRcC%{$ifZSy5>K=|3YoJI&HqkwvVv_oL1
zYY4Fl<3D&)g{xt79a@aSiJYTClRh>te-p{34FLcl;)=%9Q@Y`7VYPjMG!>w!Gifm3
zAXTJUzK5=c?|;pfT#pd#7BRwLe7EuK#2q|sb3wiuJX9P9h|N#_l~sYvAN1*iEoHyr
za}8K{SxM#qinQplljUCqb2F9FAp@cv<T2+}&v#Wa1?qetf?rt)`VU{&{ccA^9!`1@
z7ohdK9r0%m0H?3*@H_l53x%g+OJPU!H|-T?w5@=78KdUn_c|ZFPh}@+Kh7pi^cr9O
zn>ko&O{wxZv;q-K-;+QJ_q=fMnSyXUQ>*JC+$sQ(!-IO)YehzAPXS!#+O4h?v)5qj
z2dzeemQFLky4+s-J-bcq@g$p`LWgKTr;mU}4k-6S?*m{*Zp@#9<9aQaNb+`?_EnVQ
zvd7LTz%0X7PVF)q+@gU;#(q_2Hb~&(k+$0To@aoba6EVgOj3a^z=|IQbow^29G|Pd
zK(ds|!`{hs!US}O^(HRxpg7N1*-X~8H3FSysO3ju*A@pnXM%Em3h1)~y2FK{O^#dL
zMaDsD3)*1-sh)>k@dxQY$E!>mww;fzAWZ}CaXXc*M`zp6ff1`<dIA#&N<bU1gJf>i
zfWUox4U{|(?b5D4%hQgGj>e4{1Wyx?LpcZZwjjUzM!${wm1i-4l@fAWbb#_=cfKH#
zuf{8}t;vbs;S8JL8tUm@-2Hx5E`qdy1bqQ`xTt!szl5&Q|Jz{04MPCNt6ZBnJv=&i
zWV<)?7BU}^XD#a@Yp~R%J`@Md3I%!}{7@ucprHK_kQayjcdG<HQUJ~h_e+C#Uu<Uu
zAIAYoaT)wjH%{P=P(VFB-QWnhlEaPNN75sq5!P}SwX;uJv09#Qv-f%B-<}@gkbU}$
zrrgCKmC03VII)76bH!PK*DK4Q_3;?%#M?YJ@1n0AMSk|KQ`;zXb%=rO6XS56dYDfT
zooYf<877$;%18>Ef#97N_xZ7afPm>tn%8)g5;LVC!=LNawm){;IPV#(5$!P3%a1u@
zmNF81@Fk8S)2#nt*)y>1g()56<AtJ0Gr&0>L}h=vPM~0vt8QM|#L|a-vS;WTU4osG
zTdkmmjbxI9B+-4<2$or6*i}VCig)aJ`7TeMNwV#sX+t3@5o4(!KF|5|{^;u@IX&+8
z)gRH6F~@95FJpMZuX)ma%g$8WVa3iA={k?O+fTBPwtjVS<IlsGyq++S=m9#A_!UVX
zgu96b?5NNk`fO^!<pgMan-K;N)X-%`6^v;B(Fnl1^_QdYNrXa@5Kly2@u2q-U0<hk
zT+o36g)AtxUoW{)TrYdEg9mr+_0yNDL>TBO6Cel6({fcRx3nz^!GD3tQ$z7Dem(2!
z05(}5V*PY8C=Lhsm~R010d!bE;PCum#^CYYM>~J$B`ino5d?+XmEz^x{VusVjqw{Z
zc<46(iHtGh7|{U$=Mw-&h34m{`)2UU&0u^$*qZ{7iXr#8PzPgLx}hW)w$2B~r=#Pi
zXb?Sd0#5?RX=Q!~fWO0ydULzDBQ^-YCc~f?t_pe^&@Ku1&z<^d6{x`m6l&0%Z3aot
zCa|q;Iv*EC4VEGma{dZkD)c<}ilB`)a6?V_JvnuN)y^y`0vPUq90OiSLq}(L<>BYb
z$$8FmW`=D`7(m?t#s-wT2huZC0ItUSe4+Z>2bibPiSeL32mnGq5efBTw<<t>r3t*I
zJHYuht%Z;XIPYSEZEV}=00{`f5FSQ>YR-!;{*F_Oalsu@==dG%07DLFRSJ~MpC7NC
zMp+w8+8s^-y~}A*maxYA_RPVmheuiLswTz(d`3Xx90fO{y4Yo@>M1w|r&Rep?#nZx
zBEZT0HkMKxrWZR!1*efO{z`f|zIS`j&dh&D%KdD=ULM8ZUz7ku1uW4EZ16L(NudDV
zOfLZ0z<|dI!1v6bzxfP^i{NnzrGH_DdaQmyo&yAX&~1S({h2O*bPxh1$vt<ez%m|b
ztg6L%brFR33n#^z9G$aqz)-!NS>)eWrSE|O-*j4jTMyx0^wIr*tX>;%(*~gjCm6w?
zqt+>3!(KqJ8xgY{Qi%UJKq7E{53+XWsSkgp8z{;e-<)~E8CJzYvP2;j9u?J)@Kc;3
zPT@g7*s#kDqq1KatJVlGx<{$kWpKqveQ3~Ew~@LK2)bY-=q0Ehq?>%jZ=yog2MNel
zmz4dnznwMlQVCYF6oq8u1;#ZbW>`ryAVL=b*jP$`n+=XR!%EPTClbb9MEOv}qMfxl
zaHb|-L@B=N4e!F^wMnLNZ(#0<x$f>&4!EP9h_PPlcF*t$Zq4?TL;P{Mfv*h#5{e<C
zf`Aa;5nN;Eh%;i9V%S^3`1z0;N4#GNiSk8Xg>UgqTB4|WA88IzND-+@^+rGFnf}w9
z%)-`=ru^s6lS@zawS<=UbKA)*5>h<wLb4`tCB1H*Gz;YnCx$FOmjp`Vcp~J%(C(|G
z?rE2`q#^D<5z`k97ioQO5RXe=5-Cn5;0Z(*euxT|OYF%)F6eT}aa>hxe{F}^6tjtj
zy7OBnC`RHLbJiHbobn=xED^0YwPR;tO@eW;b%F23MK@MQa+pl{WoWy|;whW)_J$hd
zEb0$O*>?KkqS{9+jSx8Qms~YNo5p^G0`aefCW@I0hki+V<R<ovEvCHL&Ss2$_db=F
zInY1IUUJ@?Wy$#qW8QGks(SX2F;4W6d-TESH{q41eO0tK@o+TMD8Xl2>BPSV<U!JA
z+R8jheW0cvN6R024}yTve->OoD}w=1)Lt-L<!6=eCG&)0I}uUHm&ACGG&}oI<r4)u
z{m@{F7g{WcR%Ux_6Z?YG^|`s<mHB_3cwj%rqMc2^XSK_r2xFe<5VF&45B3kWZpR;?
zn{6j>cb%vB-KKy--3fF9;tl?9iV_$o!ylksu6neKAdD~stRFx&6QJP$O`jZkcR-hJ
zcHh$%zK{C`z?Z=20`ml#K$7kD$gOh}EW}Og;mjgN&w@Ld@DkK<l&@Uq1cVUYfk*}i
zI@oI{(BdwFA|7=A2|em=17UJ&)ZnR>kl+C9G1#HYtikPw7PK}A1)~Ae2i$q)j4!}y
zt^?@AP*I_FlnowAjf4Rsk)SL2$=!Zc9oV-fLcorBQA0@Ip&#G-eqm)BEd3)8e~TDm
zeU*3b@S)!t^yfE08<3U=ogkB2oOEz4>)0C#E?b4}iEsZjb|hV9B*t@(3&8tBlWKro
zVzcn@Ppfl1-Kx?E6l=lL&`GZuqG(r<1)e3|8zd|L*$jX~H%p66k%Xe4Z5A3;8@B!`
z6Zi6Y_I2c_7pS0m`XL8OE<ZnAd*$r^`&TQ*&%*<@*&`07phOB+gm;LHm0M`_<-^r7
zE4ie>*ZAzFRj&#o+f+M;zt$uL{wHs_k(+xWDjt!_u0yps2Me2h5AGj16IPBQ?Z;U%
zM$;h(K~{MOjd+bSn=+0IvE99`J+Y|xCd}30LydkztCMfV$X*+je{@-7q}OPnvKh-f
zCFseb2o_j>e~Gw2ZRpb&w#(P#S<_w4bnO!qyVf$Y?@>oY(%`R&@=^jPwz`tLOBeoi
zf$JLqJ(&gibU8*mNx@{>?`obule9y64v$UdI1SH+#!;?+D-hhJ#^mP3hMbY3`~4U+
zIqc>D|G16BeMs$<Neww2>&B)kZSo8=F;eSn=j=Fz?d?`GzCQ(Ggx7jF`;5Z&O!Vbi
zQYEF`wY;s~>$HqXwRy#p&#L|_WOxsUQwYH7M~jy*^Z9q2H74-yv1E;*+NSLkL*Cw7
zDhT|r(xwhJmAKPCu@f;l_*7baGCbiQTszluw$VPLH|nJ^(wi{6KgOzKHae*psc}%9
zJMor_<!?nWX!`xm`>Awcvoz4?X+lrZWt&M8En9@hgm&+j9cqslZ2bcHmpJ0{oP%(f
z@au8qX^7?NZY2t%oc%OJ!N$b;E%uT2t^>-eTf)ZdDg#m!&Un`wsjR{Q|CM^gza-qZ
zo-a4>kWqC{?xkT}FJ4eXj0!|#(>{uOW-s2VAL$%sbmr#0#?wPJ5YjxAstncbC4!sB
zO>_DKJK9M8T59SGhUn`IWLoCswFVy>-B@*{Q>`fMp9jxt^{b_iRP&*4kD5$G0?S#o
z;kkMxl(f`~@S~o@LePVqU)yC?NSJIleBCtO%0Q!W>il=N=#ncU8=XL$*_~+k^9fs@
z=5?b1M30OcH5vZ-hw$4XMxwW<uiz3w8K&+ng;4qM1l{D#6j_U+|B{L7tB*YWvcHBM
z8Y5;_M~LE-&4pAt>L*xwVp+Y?d?t%U3thMDk>cPwM3ryMOcN<-fNbCqd+MRDEfY7x
z-PRoI{|!mQ+077i4jn5TeTThSd_IyCUM+al%D8w>$KOu1ct4_)B2D6TAG3Z;r_@a4
zMiY{6ta)|;%kOMrt2E8mX4Nq)`198*ZtYHHskzLDhZ?dbdt3h(%&V7T3E~a11CwV$
zOMji-vgvaCi7_57`=A)LK**+CUhR_|bvu<5;IB#k6E7BRkm$bL$SiUl-7Z^yp2JOa
z*ZD5{`j7N@3vVzr3mJPB6&}@I<c9hWoY*HZ%JcKuC&$O|YAF#1$^@0^%HnH5Vf|k)
zMor6=&1#XS3{OsjBfWJ97#tV0jKNnWi|!0vId`;;2pFFWj=Zd?W7K84VBXaS*nJX^
z%CmE7RPenMb;4BsvV)s{<0L|fZ&s6ifJVCLAS!vNJR*P^*UyAT1(M-VXg0J|T{vwy
z0a<Ra0q3He`^6o=l#opx450vru1t^ZG@U<JFrdr(2g2#~e^~%%sb9m_U&qS=^ak7y
zJvQUyZsun@wo(59sL>dxJ&TV|OY0u5fRhP$+^-5?=Ba%JN(C5<3DcyQ4xV^{faFr=
zed`F-CH^zr35abV4&D#cYQ(0I(8Z3<hqF;=l^mc0@VZ_c<70-l-|d4*lx-e$gq}lH
zk)_`q1M7sNd%<zH34E#OR%9h}k|Llz^n<e>3AB88)OM}{1|mY6$xsD-%|5mDxFZAf
z4wDYGPJkK=^6x4_Syy-hV0#6|k!>xhsjIIAH%w7|uMYqTVh~IRWWno1k*6A<=9xpQ
z+tYjHr;}T|r?C(K5g!5;@*rd1#T~X(WX2!}PC&Pa&=V_I2ZY2xZ$Dbs9D3#j@7yNf
z7jHV{+>?Ux32x`(p<OeO4jB-VoQ!v`r9t7e?b0D%@>-D6$VhvWPHc22ril*4G`Sr+
z;Lg8$;9~+IR<QfEUP6-6z|LPji?h}3SGK~UUHu(9frYU_81OXa6(xzegIu>hZvNm)
zIfK?C6;J9olHrpQ+;^0yP)LyQk$6lHQZ145xBE0>5(BDvzNS+;I{Tw4VT+8PF7msC
z-t@B#^OhjF9|zPN(}Wm0$atPRmFFhjbF_&m<=8}=IBNM<!<LweV6h<g5Ld4GVZL#%
z6aJc(C#IbDjf3(t-f_6TxfBNlVSX+br8W@`0_|_2;I!CbHGDjla1<%JakpSb0Xk|?
zy0=sag}GC)8>va5uJ5Y7D93gtICZpljuO7{w9OnJ=TH<xcl#qIZ#=0{1Wi+KOf=G8
zX;xdwpXN=&vE*R9pbkJoH8K##=ddPrF2di-jFo|)Y=_Wd9ac&xc4H{LO^y;}A2=44
zDi<?6D~h12W6l-Y{eu&$W{wu)QI3f=9<Equ-%k6dAym6=W%F5|>Kl63d?0??JQ;1Q
z4!vS?<ec0%kB%O^_=3KI9K_+A_;tzbeuJb0id!33PlY+A%-nC`_t8{%_{CimS@LK^
z4H)x~%AGV=A_#8M20bSwog`x_1%b@lHkSuAD>@p!#Rtp;jn8@XHmeLb!yjJ;OQ1Dc
zk2>J9C>wnnVYvzSKN-2LB1=Lq#Dmx&Ax8VFvT<9M2FE7OLEQJLC~5JL@NiFwq_AU=
zO5QhF?@V0DioO=aSoHWJDXaDiFFxoyDf!%c1yc$8b7Axk-`5s&%c`?Zy)+nQhp@fx
z;3VI{kT2*Af30XbWKl52sWi;8O!$g7hPj6w7>;DM?q<VN=>ONKn^@hAlr%_~U6Ay9
z1%i)d;JD@!wj$iO7rg<AU9m^6?yt$z&E#mGX9eBu+|X(1Vh$r~ox0XCjx*-CjeX@Q
zx!~qPIEc;s)>U+(SIg`)r?GLouSgZT{OxQ@Wni!MXo(bvMVcz&kDAsqj$IHDeUc^3
z8L24DQg2;kUddp5mC2>zaX80dOnn*hN^sfg{6@-fAwWVx0?j^`8YY4|K9{%RmFJj|
zuo@wg4Z16Oy2$be1*PECmpeUIsl)6IS~c~Gj-!8%^!+{~Y9iK-E^%V7mQ*CAemcNR
zed#<=Ul0GT=#3{tqRYrCrn^cmKekq&49#ru^?7%EfTHrT5SwT&{<9qx(#@pq%)>2p
z!YE<PmPZ}`&_?TbfmX>g!cmZ0l=N+tI~mtO^6PMBS$fj;Kkn3*4UV~LAniYU6R5k)
z*}+QEDlO8k<*1V|jI0u><rVngsfDjK!S8WgYP`36gmE@E&cv2wb=Ez-_Cs1Sd!D{I
zcXCx)iRzP-QeXRoQd+u8s8snL8jVBJdW}d!nSyRyz5X5L8VY8Sa*u3~$XnrX_tz_5
zSp^fhP&e~TS>-rCxAu7#F>2dKddwuUejEBrNCi(IXF^ahhrL?u9-VVkwi+od$(Diq
zJWa5@)c`{wO4HTcVGeozg=41Q3T|_Ro4VWTZHb`S7Qn_ZbiAeJkAmYPM?0O)ihthG
z3*;s-G}ih`fAwndEE<Mzf=uapJ&!~6i0h`+?bTRC2<xNdla~^PpG)*`Z4JGq$GQZf
z-#_#-KlT^)1J>HJ#OrQe7-CM*!<HU>G#U3&A{`pIoQRBar-UVlI%4MDEiPnK?~i$#
zHbfHoFHb&!7jh%<-hu?`bnN*0)$sQ$v`dP0@C;4q2u7$${tgPpiQ6iDXB^Fc;cjh*
zc#mw2&M5ykJSNN3@lBSJaEMRF#;GWNkegg{f66TE#{-mey$l5lYQiyU93{r17I`lE
ze`o7G*sD$EZW`E3)VO(y5x?>hmEiIA<DMS>Ftj^_s<QHH^9;Xil#*}>*<l(DA4%QA
zHW4X)FkmW*g*f)(YXP3i4cMt~p;p)|Y(ve(c%|2TKO9Lz!Gzy7tYiwz`mXoP*Z@f3
z4i4)#V16e!zJyZvrXG9m2tyS}Krz{5l~x1+Jb`F#l`trp2Mjj@OhrKO&HzG263Ba=
zPR95q8UUO*Scic}upLZsRsd>GWo=R)*_s1f{aSfgM$<VK<1N#n9&c(fz#q^7{W5x|
zNNtzO$&V}{*zwE@-$FK8JmyK*z`&WTxl<>ZS0ycueF^I1HQFvcPb_DIDyU~gJm)y6
zykno9uEXN)g6V;&ML}z#hE><&$nRt2bl`vWGKF^K;M$XSnD^GwHgYnfkEkw9mhRPt
zBk!b)G*5W`z*iEGa7II~bG^Q}fV|temUFpwp182);DG<L<MC$ipO8|Pkp5-%xh)D6
zz=vc4Af8}V3JVBq0te)@h}|a)s?08WpZ~n^?pRC(2-CQYXyItUD1_p1c0|N5MK6B&
zRte`E@D@W#eS~zHDvaI~ZFbd2W6W>FnSKghpYMl{S%Li#o)NR>v%IeGI)eW4qsRCw
z<C7I*SN&0rvhh`27wIZ<m8|Z}EQZDs0q^|owU=Jkc1D5SJt1+&*J0DjDw)gI(LKc6
z-kItqldT%{*Ea`Ury9!9IW7A&W#TQJF)>Fn&Bn!d0>xjcIYxD}V(kq-XAM8|TpaD}
zU@~3EihHETB$T*Vt*cmEI7k^!K-!z_RUC4z>UIdopT`20Vf|?Br=G-A<~!BkE#w7E
z(_=J>FDu=o$KDC>L|!^biYA(V7q#>gr^nw5)m3$Sq*p$7fP8FcHcKr}oxJvGv-e$d
zvHsdj5dVVyUcmRWTYJg)nZxItA6>ef@-*6FOM_rQ*=Ux)&%v;1v?>F%8*j6=53CXO
zu1P{LDHITa=oNwdXY&pG^9bY*cRhLiF?M?aS~MrO(r}19`&&N0;VWwoHtH4~?kKa~
zP<`XWf3s|>rZJyC`bUpz!hol6_T39)LhOI*h|&>xCx<jkr<OG=EEq2E{p}+KzXAK+
zzvqfAeYQ0RAGX^(f^-6x3o%E?aCN^74UH{B{NxhxZTBn|Max?w`J3*k9>>|uC@-)a
zkS;$q7t36E>G#Xqy5iq;d~mE;a7LSAT`i=FZFK&{N4w%C9gruPx=J#wKdvCzLhKCA
z2A+*Y)3KjjpARX73vJ<6xe-=~gRIy1iis%#m<b~xI^)UJ%UJq`v}?P>NOBbAhC{G4
zqGq#9X0xtU{N}R^X0y(I*+j{DJ|@SP*9SuVjr$=EZHTeXslxBM+|m`EC0VYi55~BQ
zh_{tFQd4eiD74F(j4Y2&bvc#|yKjzmu5d7r<nZDoNv!Z@S&1KRC<MD#hftR;Sw3@U
zYZfkhY+!6}{phraAI@*%?e9TK2$nS|Uo-Q~>VC-5=4Q#VsC8cNder@6QCse?xicqu
ziG)0Zb^K~0K=8FmB3X&*wZq-T@(xSJ=UNukVgteH!I8A!9hB7}1td9@kqoQ^WddbB
zudGIE@j%mimmIfW+JPB^R~?nA<2<*UV@GH7Dx882iYwP^tEWiFF%<6JD%L!LoX5Uw
z8P>Jdj>n!8f|ROH2SyUC2&A1k-KX~$i_5Z3;d18!9?3Zg=VCd+x&cgyPPF#c1;aJ1
zMo+Bd<`Pk`$K6eiffHobQd>tc{g?fL$SWZRs48hk8;R8CQ@zG^((uQ9_o!cf*sn>k
zjIM*23hW1~j=PyE%x@1cvl;~e!3)Nr?DXJ#DXX^l{CR{C+tZc>t9~+biDhE@04mGE
z-1JgBbphY@Z<7PfgxEa`Xg(bm!3-z215*f0=oX6JSUta0Q-tcTJL9_5hOl7hGe5{X
zgQ?SCQXLau4vqs!c@)Z81p2m)X7dzxo-^Np=99xh0~n6Z@crwMrv?fY*s;?mDPBS_
z8Hzw>25N00le_x^8XCZ(_FQ*di$Xn}%J!>y-$C{aiPlu=eD+$Zu`^L25J=zJ+7djD
zf7l1Smbd9VrCsN=x-lC9h#PU2GQ;f0=eJ-Ag5!~Aga`581}Nk=Mc<VeOhQ{rHI4>A
zDCUxH@B+Z7ZUzJ_58(B>14Ra^E`=1vj(X;SB9>Uv`+GELm(FWYlAC+?)al0Cxf>*|
zikGRzKh;hwHtxBwW>7j7%43~MpT<WzjE6LAzC2alz++X6f>}k{cJ`E!!P8l1Xj8Bn
zTie}uZK0G{k=#qnYsz!ucDya4*lV{k`yf|mHVQX<lVx!vr@Qb?Exyibgg9lds)9GB
zUdGambevp6vZ%&<X%G^<J;AD;&YBUiUxsp6-kL1eX`wrPAsi+6k$c<u%h=bT_sI1R
z3JIPD+<U@H#>ccq3qLVy(W|?yv43aL#HOv(%^>}B`#g1Qb#*U!>}4jKyA|__Hu8S4
z!%Ci;fwU#~=YyZu!Q+8(BShFE8h2Uba_*)3xwU3qh3M{JvBOH3$<uL>+2zSxMg4+(
ze@4qQDP;r!YfpnsVIrwRA5P$R_wE41HsDQ&oR4t}BFYP8nE{HM$VnIc!=9=i+@k#(
zo%%mA)Nvng3C{QjwJJU!HsyVXfx2<PzK{fIgGq}-plU+B45)+%>T<JIFUc3xRG!*c
z<iv8S3<JG@@tp}gL8yfW%@_E$dWahjphG404nW2uNSxp@8zTZa$M(e?YulIydwr-m
z52eKgm&~`gSh3mlBU5-w0N9ljirIMPF!(|_t~4O{Z4V6ZoC72fY$~TQ66kOzCMP@g
z;mFwo*8%UH|G``hd<Cx*3K(;=<SU(mPa}?p*Myb@1<nCx?an~nj~~2lPr#sYTxPsA
z>6JGf?5$-1s0l$8K+FnwiX`EWcGwu;#0ITUk24eKFANRhz<>Zs`r<Y44e4>i>>n>@
zp;H~fK&(e!P_W;|lm44YwaQc*k`Gg3lK~yTVZsG<XuK{xHU^?BD!}~MKe_uL@p3XZ
z+f>Qc(>QH8q9}c!^u7x#X1}Xu2|v=6hxZMgZAh?a?{|m8x%s?&T6A<VzZnc;;Twv<
zyhRE#hUB(UZ$n1q8S=S0enju$*8?6!3IpGm!b=Q}b0%&j8NZF|E-_IFESETtqQrEG
z{a{>mr<g+vpg8k6TMARd7U(4F<)Ku2;~F8YId`KHLp@FKi|k$+fi%Ft`e(3_Mu^M}
zQ_JW(9RxA_{_4X3M;(*qsOL!e?P(ZJFk$2W|G1KDLC!~O4j-8%3KM~T;B0}PlH$~h
z!EX<y8$;(m=Y^7wK#i-d6oc`V=O;HXFsx}SS=TWe9huA-8$fcm!<j3G7zJzBB0qx3
z4&GuEU?8%9mc9!<bV7yT&1xV}Ma<oSMC}g#7C5Nxb+n;yoihTZ>r^o5yTy=3v^7kh
zWktqUCBA$RKKA?qIxJmF&jB$-&k+@f-E5ve!Ssez=#T<HWHber1IRv%i$8n&0_#vs
zOAD}17NB;!Hb?*#Tb=>MI{pP)lRp@;106jN?EFFCe5r5{ZLUYKn}I$$^yvsy=b<Y7
zC|hee<>?(%#s6!b^Al(uqd*+*g+?`j0u3ITd4+{R&^dn4WLu`!QS;jzu`D%)J}?Yr
zgv@O*Zj?WsEX7^qBL^F3k3+?CQkUQA&|$LBjT(6FW`TYRpY>c<FP8Wiu=`>rb~Zpd
zj1Bsmf#I%lj9|~K*G=(!BL@(BAh`m)TcP`4Kr2-dSVMs7J<#6)SoP!3?*JSzHBI#y
z!Z4Zq+$vxiAOVc$7D!Gxbo<~*oq$PRso?jC1PPh;)sCyo7l5n+r5J+bS^Xd2{fk0(
zk`EwIR0QUa0v}^dU_2L(3d)mp`pF9g|B(s1ZsN(WhFp&@iLs2nil_eUMp?92((C2n
z?jHFu@~z{B&(FD~l$651IPEVLk8C6L9Ix|q@X{3v`hSfQH{GgT^_ikA_T>~ix=LNH
ztrJ$x7w(iz;K8xVv3a&j78}s~ehNign?{jtC7!P<VC9)QP#H*%x!60x)(#IKp$$UH
z39P>7_=}fz6>?Ag{}-r;NJvoEQs5kG_Y_csAYdXM)T-33GsA!eaG)tfM-L)sCb2LP
zoFZ1%z~{I{Lvf1&g>`B-JJJCk_$zR5lfheqo-;9M6iPM#rlU-R(PyJ{foUmy=3?h*
za9hYtiV=0=Ld+?oB1&MLD#>#Fz<wMB2z>+5tmxxjOD?mMfQDdGkuj;s*bkt;U`Fgd
zH^N*1;tS|c0<uACzco~4QM!R=@)M-DdH}v0jJkT&2}X`Nu4SnRAMya$gTQvGv3wB(
z8X5wtEGZQNGO0a`+p-7!tiBr=^m_mYR|Jq2J;q4@mh$5&G{^whv!4L6`C(U1e0_|2
zq7Njrybd@)`fw27`@ps81S(g}VjxPjvNLBK!#d~wKWx1PP*z_U?M-)gcY`$2-3XGB
zQqt1WE!`c`9R>}8bT^VpcS?8H-S7X#d~?5hW*mWmH_m&`-p}68TEBJk@C2C;;D=MS
zSo*-D3<CAj2ndyL0=Ec6OEnM1IKb@?gAhn<?CeIDt-x97LZn3N#PmOfh4f0Xf)wbw
zf#kCnM8vtctj`!YT7ZNFGH}l>fdb>Oan&DIr33~nKGeX6@rm>XB6;FyOHs*seTAap
z4(WR@+BZ6bcR;XoW5PM;JU}iFPAL><x`TT+7~ETRpfMXKw9f>|RFFg@PicAi`SWKm
z3OPf`D6;_sI0<POZ@2qE<X=F~;(V+NI0yiBLjXQX(4&9!7YD6=P&|RI@7-E3CI~RM
z0!h_&%gw34DXs=?7?211$qXc^jso&!uqhGHprGcB%d@0z03CKQhzbGwsUSj+2-xi*
z>986~;0rIHF+9EVZSp!^Uth0boF^<o{kTkyZBEBNPOe#_Y1YE?^jBEVL1!-2iwEb4
zoNd0{i*%<2`}YK5^EfgsLatio%ED{R|8vA+O9JMjiX-4Yf}o7k=Z6y>v;I?a%b)gH
zfKCil20(!W&R)>(8!Z-DfRzV&m5x<a(0o6B0NDgIi3;q#$>Y1MbH$Ffwml6;FsKi9
zS@!n!w@=S7s^Q=_fG-XZg`2Rnpb`~AWN5m;%!v}Gmj?0-WgVtl0NdpH`r2qX$rqGu
z`ryj@1tdc*_VP5?z%U(V3Vd*S)!2CobN~x4YpViGO_FgaBeg(}P7QX2wYAe0%MoXT
zL2_A+E?bx>5uo{P??dR|%&DkdsrIlS5u;Ft0u9hj9UMwQ2Ki0SrU2xV1&jRmZRF(S
zO;{G72!%IG2Tx#QBZ;IwA$R?}o40B#8f-ccM-G<c;8k|%TAX-YH(xArfMXDI>cSH^
zAqk?XLk8)MPxJsB#{~Q;*bcuusF?zsZHNc~Fa+d5@c_AD0OB2@6wuHJ0wXGromv4I
z0EFKP(JVuRiJw=WuNxuT@sOS^z~^&=I_0?KL@ZADSRhX9-Wt*@g^U8=3U30V32@m{
zfbu7ry*UC*0i^#4LV#!yoS|+Ylz^miK-4CXf8r1G<F{Z3LI&)3|In`3s2QNx1j#)X
z0AeEB(+md6L;BVmjQ76WB6r-`&J0UVJbz-2dr8133$`mjFd#C*4QQ!@UIF|{DDX4-
z{8_95X>U$F{21F!D<HgeQvj5`Mg<Cpnw90{c7rDuIn=Cmz;1@doTrxtfGx!-0IN=T
z*;rA5Ucm>7Z=WS$9@c44T)%tw_pRX<u(G?y_uY_!x-{%4-A<4HK!gB3-6rP-wrFeK
zC<{G!1nRiWEpr2>SW^}jmQQ)Wa|pt07aLG#u=IeX5PVH7({*p5t3bCFK~)cZ0wdfI
zdu-(|1mE#2`2G`d4D|6W51C81!p*Rmm9=B2qai&1Dopfs@FGRn9uFd0Lt}oPeynav
zr0-cyRad}!{#x?$xtnUX+hd*eEdFMt^D>IyQ2BV*Dmig?5;RI>gxAooRMhw$<+ATn
z-68}LrSP5Z_nZ!vdsgJ)JYIJ<v97OHbq$TIl?vxC@F;`Y*U-Ac#@2RGl!9uFonU|e
zo?y^V%s6iqF}Ta0U-n*KnW@B^S@!-%Re5>2Nu98d-Pb4RU+2TD82|wtPT|c2hIJBv
zo4T?B5){9k7EZLH@tPs_(x_O;akHnN(3JyH<Dpq8wK|L7%)%?B=i0|minq_6eYol9
z@J5j4a)bvg5{eVs=PMBkjBwNxi@q_;P$?jl!TcT%c5Q&DHXXvXWz6$U0uu3DNHzed
z9h@#F<uAT;Jcfc2@|})h=;{e0LG&5e9peFPcMP#x6T3xAkgEkj(Jq0KINNnf(HHdZ
zcL8O32Nct?%zYrF7jlJyoiYK)tPff+U<3dS`wU(i98e(boDER!hKoN^L-f)99eE(Z
zEOpk%Cl(Bq4lPHo0c2Yd0DY>eohbev#{)|4w|P!*)AvJ^wV-6<0J2gAj{&oEVmt0j
zyP#{J;miJF<PA|D8G0=D4X3g~YC7QI)lKVz$dSP7fXLsg+V5G`7!n)VqQPYQns8^I
zC+`joQs+FJbckyxIW+G<khOp}y8%QBW<yEzK>P?Sz>v8dqS}F|zCI07lldIh0CgS~
z7})=miS<ENuHdXH@RIyDrKedOjZt5KqWk|oh}7M)5C_8(6_Xj9fAh_HQiqE5B0(rr
zYmgZgL<9jVP9~wTZz4px2}#9MV0JMgRQmQ_pb?a`H$cvmq(BE|<|Y^AX*Q&xf)ptw
zAcg?7UolmTJb<@@y0g!j%x;lg&B=G~jqwJgk_793+b4er(HGR8*Arho{~yPpmk4Hp
zDFYVL)(1ErUjZOs1i7IgNaOCZOdNua&k*%_on~RW=>E2V3#1-Esu+mUDo*5F0mNK{
zU=?;Q%GfTCJoGl$Lt-yPD0YJ}#Rkbcz$HyA?l011QvJnapf?g1JTNFC2S8<uHqDE8
z^mhZyVgL70!@uG6+0zpZSO4IB#19Fo&e@J<Hy{Ped86T=M*zedv{*=|-~&yDY(9b=
zg=sMAf)ck2$i$H)TwGk<3KskX&@9&3F#AQo)dhGt@RW#<#AQ$>n}V|eNkssEyDoqM
z9*kAZ2@F&hsa3wA5N$eS()V%F`u`Ml|L03N1XbvbbuML)&Flo${}2*qCn&3a0i_)z
z+Z!sJ>=P0Qb7s;|B4B?%2*NHrAC4N;l^~z<1ZXTmTAHA;`GkA~VmbOCdUmitffCsN
zT7)G9TB(qDGtgn(&GA_2;zR=yw%?!>0z=3qxDrsoiypapmBscPK^q4g@IC-q{r^86
zL~>zzd(6_2;9$U#Mc}3h<n)2{kl>9M(Ek8JJ<pBTShgS_WR&H{fqD^aNvA!CfQ2TA
zw#<GI0WkZZ3qm$s$Xh}IA|&lkk7onBVzw2Kd*(lB9*C6%{@=lEqO<?5>f7o~Vt7xq
z(5^8;M<Xzw{I4Shqm_(`g5p5{pu91FHj)<%1ag5v{XjHpMmM4F(^TsYD=b_(|4_rT
zeRGL$QaP5DyIuE0xpyZpbrc{lcjvS0l8d=>v$3p+8Q+GkYPwewepZ?t9v(hG(RTrL
zc1i>%+#<+J0`_aT(=4`=s-$naeI45vFfrrNq<#g6zKBK*h21XB0x<Ui>*DJCC!c$<
zwA9oijXN&0{!1Z-5TZ%QGo38Dilc_6fLr)lsE0GAaFe5?xS`?Z?r!`3VVSAKebw*i
zE{3pZDXn7H$A-gG-s@8G+UVowHt7{26q=XD`gp^v0Vc_W8&a5J{&58PapwimO@`~O
zr#aMFM+yob9zdi0{@t4uh?gP=DF%9IF{$AIn++s%q!jrU%p*!gz@W-i@TFOR%>+fj
zk4#4ZZx*{mt%ittQ7Ut-IH{Kgn~Vo3PIRgbGK>%r`-(kA3UrMQ$h0N{gOlZ^4M&<s
zh{PO$Y1v|^IL=-t*Apkntb>$<kgTFe9HxYn2nFNcdE~uIg_|)mCWXid87375b$=(j
zXiqCfZC->1zNxlFx0G(eXmyyiU@dM%k;Kn3A5M@k|NU<*K(WtGO%)Z~-0K2!m}yM9
z0uVJ+e<N~8N!{tpH1N*Hnu;WHgQ$Jo?}QnamZStvV&gcuSR=yI_jX~B2A>6|;7<b<
zg06Oyrz`*a<nVBL4sT`&se*(u@B#h<;xi!5G(9AJs~NWmIx*maO-X@s88B2OgqrNZ
zD&Y7LGHG3r4;^bS*m9^>UabD-*HjJpA+-aiRh#{Js>&UOK%bP|)_p-tVHowor0x@&
z2)Z=Be*N~sr|NS5`>GhRiX^ztNBQzj!EGJR$x``fT@)Ev`eyTQD=SvkUEzf4RJJKT
zO~|fP-MRD}b&USeJ0x)}H6(pSN3$lcfDCB7SZHw6THI_ft~G+fWZU;B(tmXB0>v;J
zmMU}k)m}WVZzb}4xaTeqNnQ%BtE}z{uFI+}!dsSe(cIh7|BUD4$op}rwFJNFsJrm|
z`ZPDc+hsz$f6~7CoX#+Hx$Spp79us#LuB6R9X7FA;4eORUuRvhOsL4MPK5~vE6Ic$
z=+2qqx{6qYBs>{<dbf-Tqa-D%Bt?Dwdya!-(cLL~!;}IZCsgK*6B%jj%C;B(pIxZS
zG-kvkdLDd<@2ECJCQU}~pgSm~=!vAPL=xpTGfk+tZ4q8gJdcW~1lwe$Ai(^Kxp{Qz
zK)e^ON{<mvzxGLEeYP8SpuV~CvOcsU8tMv@`m1kda-FLOL#F<VfG0)$Pf*A1qc+iC
z`_6YLQc`)s#DdiQD_{K24M&Bwe(H9p7U>ClZ#z#0bZ}r;MYjZJE|v@vk-bTjFiP5U
zcL@&`$NppqV;tX_$HR7+Kw+cc^I6Z?Wc=l%(v@mZFQKE(bDWDc{y9@AC8!&efI`MO
zN@ZcOop7>em$=I;bFKa^0?y!u%lN^>${d#&Ug@gwHTjWs{iey{@k&Ax%(v?qj~TUz
zjNLq8@e~||Z?~N98#m|X+|O>5tCruFXi0u01Iry9=nt7YVkJ=6QdFshMYOWz(@zJ7
z%Gj~=kshUyebc=k6q|7f43VSFr%G8(la&|fk_mFacisII_F+4v=3_x;zSre~A&;*U
z6<o{8#n5d|Z%N~w`t9>TP<%e@1xx997V7gLwC3)$fmogljU=!k0&$8OQ;IMJz~&tu
z(qSQ~foL>g!0VT#!;wS^w7K;>yz6l8|IR~2`^B^;>8$SdesXm;gf{JF{uo!abvTK>
zqo5@pR?GS2DO&Ju%gfK>_U^CKk;d0O=cT~3eT!moKiaDYO3r@=ln%g`QKMTv>j`sc
z{ib?<z0q@v^3@!(`?#=?En3Hx8Dq@)<_M{CIlGYXCU8oCnde`G3_x6jSThti1}E6r
zyfsVV(^%zf(s2XJG(e_sbHhJU52t)tUln;?Ky)p!msx=qu<lQ}7qoSzWleiqz*gMJ
zxfF}IxSYVL0-N&b#XS)Sw!uC-c+bEn=oHv`u(!Wo2$rxCR1*gezyroMV)!H&Ck={}
zQX2x_Jf#wl@zi^AgBvTicX&9OLl`Ru$nrqIgU_Z60u&)Nj{73`pCK8n>eesrynh;N
zYs*n#0dEZPfU~!u6>xI`hCCp;1Oct;eQg0Y2!NFoU1Mhh#hRKDL<Yp2G5U(L?#j*8
z6)S##n8y-Li5aXaK>^ovFx-&G(TYqB?8}|JDj;tMsHVVH7T6L5i3oEUh_y2y_;#TX
zwGrT8*hW~sG8&9IlYytglDgbiO?$j3fh=y!G8w>57*x|ZSPaYV%7f)gmHWdF#y8Ib
zL8Q%Zm}S9M0B~}F$1ys$)t(1$7`%l;@7k@KvTxtMt+|Xlmw)>vi6jYvWb|zTg9eda
zo{%)}(QBGAZgXOCQj#TgIQ06Xy?vi^_WJ-RyT!2_9b;qT`nKz9_bJhG)shsTT*Yk8
zT?c4id!`aF-3`YwN~oxygJn~H3$SJFre?h@dpK5IAySgAEowub<^*w-gN8Z{6eG1D
zIfc~K7_6z<+C*pba_1UUe0+S4{W0(>Anwp6f0mN30Di9JfY&;!JPjvZ@N*+_xAD_7
zrIDF|f$tW%Asm@lUD_Mf+-}#NHbQNp<A=T<z7OqG@Am#c;U5|jX>i(V5E~2n5M1%K
zFFlQ~AO_|yTdzlZGZ40UFowq)#;g(&X&`ljn}nC-_tRT&)qutL|7?JRNpfmRL#s4K
zuE;?=b!gStFJ=1W5?P`9aX<3C%Oj@X-k@dzY^WxW@*v53q(D=W;)KWJ0(6n{ErVjA
zWNC`VL^+})SWN4+R!u9OvAjfFnB#PyKxXk31J45XB|tF`(uphW!2C!OyK(aX&IuJo
zJZ^N~>BAHJ=4!HCQ|=8j02?zxE)(zCU9eyaQcDnx_W)Se=)O-^mIp(ItRxlW^#n*I
zSPRfcYgADrZ?3{;GS!%5KzK-3rUmCVxVwPzzv=n<T_xC*1#d@tUVIbdfd`J;me`;-
zW3E2QCFh60S5wL3N5p4m@v_H{7B`u{gOV9IcHWG%52;^#*%>pi6(7<$>ieS^RqC^;
z8!cd_pLPL%CS4krvGPOQkV91E5_44=juk55ZDz&0X(E>?!I=3f=T2J>)W{`ABr1}F
zUga|6J7ut=@(<jG<>fI+NlEV$%SST|0OqCV@YV|4bii(Pos`4M!h&Lc_8;J>q_h-a
znis5?07e<Y7!>~j7xF%!cKhKwN5|9Q(;t0J5M1hdlu*IAIBpjJ)>EkFN9ChBAfc_(
zir^0*ZcoYtdB)!*rv{!MfYNnjNI;MH_wL1ZoYgFh{UlRiQ7j4+x|D*i1Rcfr&|`~I
zWq=a)n^Gpl{zQw@;joU=nwRn&71-+a;j=HeZHIzFZ*pnr$<r!2;sR7#zr=Owhp<E2
zrLdb#wT#?#ryqP{C2f75mg+S!GJ<;S5n#7~vt8!V(Oue@iKTz??qOG7T2_U*jIu-g
z4JLWKje=3Yk62^TTZZwD`M}MX`s#Wgtj^vAs@*(+jFRwJ#I;q&`tvOD_U;{i{e&LQ
zJbA`UHNW)n92YFa-(P7+_Wu3spXYaX5fyRgdH5K?v@QULtFBcaoTPxbcZ~sZy^Do~
z6g*rLKH3gM$<}P-%n|E&2vhXL^;Zk&*XWB$2!AX0dKfH}TuWdlANO$#$X<+5{WL5|
zPxrctk=GgLN|U6947Du0FFFOM!i{5`pPIfvBH})h^QZZ1DCV+g8+t_3uTqkBRJEif
zqM>DW4}GthOH!Ing1ewL89TQ=FUA*7_tg)>8XmOxbE4rAp>orH6~cHE$3l-qmj5Cq
z&ddJUNqXJcp7sstd(=*BI$?IvfjZjEt;&bj=R3<$wEUwoZr_&~iuI_Y3Eg>34IG_@
zr_P8+hzK)|DE+C>Gzv%IYU5Q@sNupiWD0HkJqLtxm)cQ$Mp;tkw4HL{K8$reUJk`4
zDpjpzr@AbCxpk-?WPd#t{A!la@AqBq>TY9JfWI;5*E<^TGMAwEpUk^>=Ngpz&U{qg
z)(hQn)UX_?68CAx!l}qvx~Sb1*qSu}Bjv#Tf$_CVz)5-Tc;O*W^_*AV>y3-WqpIA%
z@}2X8J+IZM-%TGbXkKnj7MSH~4#vehbf4}bH!og9Q&yE~F2XmSs7u(=!~G1+0#+>^
zR%S&=>_l6lW}0UOc3)bo#C?ojI?+Gl)voz(H`ejY@=bUi?t5N6s%$ZF%krLHc8IO*
ztcuH9jVz`V3(LQENR>L@XZ`!gi@pA)LfHDNv+&0d-_Z(G>{>sczYN0?*64}WQIyZe
z7Gq-H5<S_ZB3CUt!u0J1B&j%yhw!wqE>o%8Kdf9`#T^LdJ^08eF*6~jIBm^6DVA!E
zB8@Yyk+ud(8<hxPEZy9;l)vmsXKOu?(VI2Y@_St)QEi8K1suRsKDZIilBbVlpdwSF
zps7X0+Hw*5b8=5h)c!R&I68Q~d8Ck_Whw}ksgJzYLf(GoMkEwA{Z~f5IcfO&0<|d5
zd0vrpDn6pH(flTLw=y=PDUJ+OT>V$=2h*tTpE2TVMeaC<l&-T%$in^psne&sUa&B?
zA@Rf?u(xB5+9@PZ!@2e1siUf#i^t>D`H9y((5s3YxZZpZvlLh|i->NEMra;?0-#m%
zwGXwWtVINljaB}5KRh@u@^{@Zm~2thw`vNb8+6K#f>tG0T92-d3H$H^g<A;rqmK`c
zpBsshcseQrKX&v^a(!g|$ovH{0SmWUCaGGlOTZZ_^#ct|AORJ*;!iGrlRh`hTJ95*
zs@=8<E`PTh{p)Da;qIXqsl=7&+;jFsSK4@yTkOXj?rT!xG5&xvZu-&>9}TUw>`adc
zcm?z?a#q{l3Ycq<oI8(K<(bc{zI3XSYE!==6&)4{^T1yh5Gc)SKpy$U=*50tNUv;*
zbm@)Zje><4kIB>hQ)#5_vc$EAk{LIJ()1p?G?Y{Fmady4F)q>!F~J>6)}sH&<FZv0
zChqt1`07~mJdfAb71ZXluju0}eu@e@_fH;$Ch-s~m}y=cdwUD8aSVzvP*-?|zu@Nk
zQjNw`7s7u~6&|!;dv*RmO)i0MxoQB#upIBL;<0rKJxmPUIO)qJ|6{V?a6kC!r2c$(
z)xNZC{<9hmi+f(gj>n_2w*^_1=+Dq#D-X_Z?Gw1N!3P=C+yu$R_sfKwXk9!Ud}m4{
zBpT~koavF0ZdDg{3yJSKb}?-k>kbEPluvP(Ff!T+?u1)zDV4n{_zXbYjnHFM@VFqU
z86mFs2@ewV9`6FLyl>A;k6D$Bgo{toz)ZVpy&F%rqG@_tg^N54Be`i$+N*j;`!ec_
zD7;|_;vKP!2Y&0@e@Z2HzooMhgoet`n6DklRB1B0W7P`Pf2}jRyYtvv<tDccD<qUU
zy^`>dEB!0cIbN!Qo$F;U{N{k7D+R$J<+&}IDs!)$(dLA#hsSnU&^zT25&h*sjiy9S
zm|06m@jI<#cLUO$UAZexJ;hY%zvEIXPX^7*^j|2L21w>LKV$HS?rdMvN3nY?-9HlH
zv8eIB*1+RO8cO3yS+7in{^k=Fz0yJqwzP&^q$M_w2hw5oaMzIPClm@eiviVJ`jcWP
zHQH=C$`Og%;m*^S`t9oLZE~%CBNL_h4reCG_(IXVE)6rgZLG~ak+V^M5yDEEEuOOn
zpF`)vV1<0=@-E8f=E$1uq)1yzv+hzx=i%M_o04!H3Ju%+#`d7PYix>~*SY`L6er3u
z+FzcnE7X;|z$J$nuPh@^oq&V~BX9of`Z_$&7MVVzv)gZ7iP}(7J%-^5F&|CF2H%a+
z&h*U@9?T|*C+kFC+d1D`uO8KR3o<N`t0!fk!vcI={c+@P0I-_jeITor!jkTSShDh4
z1T}q{Eu>a<_;fiVaWX!I5y`+Gk3@wqGB7M_EZygm7z%tJw?k*FgVbu_rmqWvTLizP
z!q_lh_EUXxZN4p3S4hafdItg|I+3YuY!oNH8}(hP$X(xgV|<AIqPWK{^nsb-R{n3*
zco7lt!OuCB4y%EB?n6~y-(nx$k81wn2adPUdr#M9$5F{&iG<zPq`;2WGLoc~PUcP=
zozll*G*10u?gGAr8i_4?OC{J6->>V(X8j+GF{ugek2|r0;&ih!tAVNn5`HIDV(~|N
zmL{aPv$x@u#t>c@S%JkbCzn{_!9s%Z;BKp_DLi>0uHi@KsSpHq9;HN@Uf_?i^K#?<
z1Cy9|Jb6k}Q!|Ckw*8qmkvr7P&2$ka2wJKz*wuhB(KRCT9=V8}uw16_w1680HBbWo
zuifXW0sU`Z<)L6;11j6u^uKOIrP>KHCxitKda^30OlH$k&<-ewzk~;kJt68q7cTAA
zI^iM31_lO4Vv*?R*yo2}7A#*CXQ_X@8519Iw~j3ScHv>dB<@|cEQF!PoTcC{<N0Hr
zN()Eo&@vhXDSRy6Ok|OsTAEaHHApFO$D>ti829qWqk^L$Djklz*tm8V`#O_?Xm2^1
zDGNAWv9m^twpKM@!;F)P`(<bWOJS)^C{hC1gF4&^zO`KVR|<^qg`Q=bhI}f49&HL~
z<22UsOa~T@rUKc>Z_^LL7&W?RJ)Ea^OS->-`4_YRCTAALLTucWZL&j3HdAgp2X%1;
z^{Ha8e`zDPD~t1*pe^9aLWwb9og-dMC7awYJ>uM%<5Lw8=meIfM5g&r3C7|OG37^e
zRzMdx{hlm-;72M(p1P!VHQq(l`BsZL!u~)9<NMb%#$>2-*FPOA$n^~>R|lnVD#fiY
zxCIK4xD`LkCx!;e6qT^3eOCC!nG(3}fR2VLg*5dIZb&A^#0FnO5w7+N#o+vJ#e)#2
zMfaWn<`Sp+o}HVOq6UAocYjUm-I0h&TF>uCmY#MW1iku(vb`6+wft0V8{A^m5nE>n
zTHed~93#(7GR-^TJw6mR|8vCYh*hyut>|~At>=V<g!|;eHG&O31J-F^J@+SODzp%|
zff^?^)W|>v(U^`gLwTeO#rMbGRSdIeWL1l3sJrqSEqQolXO&<AaV+~ue~`X{)3~(X
zqEAq;_%&2J<o(&R`&OYa5l%MdS|Pi7wX~uw2WZ&ZXsiwpE?!xZW}VvJEEgyTa>Hw&
z*2B@`X)y7Ep11*1irO-D^<d>Ij~B<kyl|xjPVU1NPP>C*ap4F1#WPHa#I`n6sSB@~
zk*OtwGd-<X#R8a1IX(~S2$(OpUT_^f`|LKnTDl>Fk&f`2d7zEPg2CmdYg#>#j$q#q
z&QzcUeTY*$uz5@A&K=h+h#6mw81Vzn*|=8UYK4f{<Bg6Xjgyn8gxunH$+hN2=A)e~
z+{(EqQzkU&Ap7kNFBV7^8t5gXGt<+zYrKY^J+E{lU8H#z@-&TQ$>3hAO<Q<?tx=?m
zRcd=cO)jYLgEsQV;~<L!w|@id>xdR-$IEqS?kH!&F#54v-DK-^6oSGC7`jHKk&sT3
z;*v<t_w*6A>(laeQ$*!reP(zC?k#oqMiE%ICB>RY?Y^SmWn;ieWek2mxx<!%)yHS&
z3)NDvpb6tj2uq1N$}+aNqO97eAJIuvrki_S84*};x6m{GTsvH&HLKFo;Nf`g_RaYf
zR%)Px8V*uTPe-8pOLBU}CUP9}!0rW4X(holf4%Rgo=_~R*qE{l_9#~=BxUI}0iR>&
z%AX7tg15gxccBD}sKEp?{dWwKK2yj!ZA#*qlJ@b&;A93?1r9ad(axgGoRr@_lvI0^
zdJ$ZrlnD|nZJj;6Gk$iPH{)PMDpYY%)CnePAhohLg49qL{yr*oXiH7*1(b6vjHnz&
zEEUrSI~%_|2@7QH@3=>Y<Q-cKf4fFUAMv{Rs#C&TQ-^5ka`ELX2jJ<#vho<dNSoh;
zcXl69gy9f{krJE7Q&DBqa;EeWF*w2_CZe`6r$#2U3<Ss_Ne;g~-Ce9&qh+lsHQlE!
z4_KEh=9Uyzu+Y#qlK-2|%+%Ak*p~L`PCnF@=uj#0;hG_6uy=AWthG^7-ALepW-Ds@
zs~6(`ERlpOH)uDXB(LcYP_mUilk#huphh~`ksW!5AyHRrrajQSh7-z_N=M15r9Prk
zQa){oCn8s+PLWx}pqq-o$$K!=&N31H=ZRt$i$qe1S%;O-FggF)4iTH;{d-&Ly7ks(
zescQCT50CUqMU5E2o07m%I|DrzoFqsEa|B(<%ZeZClXg-e&ZR=6K!L_g0V<C2p<gD
zAd!(oarrEv?igy+M@RI$LR~#k^d341*|;=jls&|j`m5$=c%n)%DTRsGCM|i`$5uIa
zBG&X@#&n}6!?&ALi0#9OS`i~~6f`9$rAnb@-P~5w46hZ@j=~Hm$qi6w<JC}#dsAx1
z?|>kKO9Ad{%!h*UcYX}a48Km~<ZWkBC0q?UY%6N)Jq~zo$d8-!J@U-Hu22xPWlrZ?
z6zub7!OB=*M!xT9LZ8hxeajva0*DuMRxz6nWY!^C*1Q!4D-y&Alh^JP?MELhUx|_0
z8?@FPJ4pRStiJqXYn|@Rq%DU`gR(HOMH=}}`I#@#)@|)*?f6ZS!Z+?==wytasYm;u
zm-%IrHU<x-PX5D=Xp|HGLUF?3CvF1hFluK6Q@5nNOP$@{WydFjzOzTmi5&6B>y<hy
z{vE*`Vi)XX<f1wIQBZMpwvO+Z$x`U6Z)(2OjaB;BDq$A9Cl)(MWpnM{m!syKw-g&k
zctTC{5LFm7=K2_dKS>Lh)-S2it0}Eft)0~Tt0tWkW+XhZru?KIo%N5Ec!(1}E)Quf
ziy4-HQrMofJo3@u+!==9yRK0(eh(krqV?WEOcQz@6fDjBpArK3PHp0F^LLi!{;+x{
z!t;)U4ePyHFw+SFesYY130_u8)bBH(QF?r#HBiXPPFoIL#2x~e;REg;5E%ZXI{yhm
zrqgWs{%1<Sm9$2wBw={aXLEOmNovPD`thW0#mLX^OX@p`{1n-N2M%bGypE8Uhiwbx
ztj6r~ePlbmj!bjzF;m7zaZ7gHbr#851Zq+9EMM{TTtD)ZAbl%*Dv|<2Oe{^5?1I*t
z<TQgs@jAwoh6x))Y+h>nmMB*V552YIi2U(XRy#xu0)@c0y7|Qhauz!YGYR!ePvRZx
z{*3ej7(%Ah+}kp9Uo?d<2@2>D04JFU!Cl{^?UI0PFMpI*{w*{_<P5s@Al-E9Gc1P5
z`PL9@+K}oPUnY(lI-#oQ6&~1)1QCkd0RI!zwWi=PWM<5G(zn_F#kEabE7vERs)!HK
zH#S+paU}}>1oVy9s$r`Q7Hf=GV^01B*Q|O`7CF<fkTKltEb4M}1W%Z-3u2Xh!`Cy2
zt*T7hD)p$&U4~kRTBuF5kbIOUk3#v9;CVrQ_>+{{ypUvodO(YIia^eSKIi3zuJDie
zngz1C<>G2AE^CO)N1Y+lbd-^XPbio@oZL}~F)%Wj#Qc(!QY<OVF5UG~oHPs{rri1y
zh#MVB`ltviL`5Y!|H7eZDTDbS=_dhc$%en&FO7NG23R5Vb$G1G;4#aHN?1npXJnH!
z$w!qNvLXVQ&3qS8>*y}QVRO82KTLVc0=nAoeSwKe`O5H5NEuQH)i)HxtI{YTc<F^5
zC0F4{)DuH^Ik9<G53+U6xqifFHDQUzso?`g@jsxPqioN3v`fu?1ot+xeII!Wk@jym
zj;Lv?U~M!tB}w|GIgEUd-e4DBTO=BYrAB(a$%9`?jr;|{^XiBI;S<ytiv-GjzOOa9
zyHF^%ud7@pHjLD|ehPOTIsN>+q*Zx_gd%)k68DWTy1qfL@S<YBFjA=PDSS7RMPbS~
zIyJK&(Xc#;ZtDhV2<v{0#_I02)$z_*OUF=Dw7=9lLs}9AM&U^LMzeHi7<zoB9(kcu
zs3Q{useHb;oPMP;wXjIj+xt|1nVrl~ECriiC|G%zk)4=K>n;`=%si$=a&l=nIK|BB
z8W_RP(rY|u^@c?I<+EN=2}<+zGR+hEn#34bXhZ_~w4qd~?$*cuVDbxjB_^tz>KRhq
ztL+g70u@9QWxwSqunl$aH#jY=EB0$4@CZ`6`?Jwk?mB%;iE0pjlfYz{V_-dvVhRVf
z$08ylha8l1hUDx5-~6$Ca`P@xmWIn&^!5ADU`^Le9kfpsblB;{@Cssnf3sK_;kr4y
zT=QgMe&;&#-!O&YkqG~dlxBKRBi)ws4jr^3YL1!B=cOOUvwlUb(l0$Uy7j?LoHRXS
z_4AmQ)R99yf;5V93BoCv6F%useP6?lz`JeeVI^Orzil2_3i@`o(VChBba=e#!>^`O
z>p9UR>CzCOElR{z?x=!nh~^q>ooaq8w#PA;O;9IH?S_pYIFV_56Hv;~-Ge(Dd)<xi
zmTq|?R&HMwS?~85D{87%KV`CwFjc8PvcZvFS3{CYD^RPH)Il>)Lkw*@S2$ouRyn5*
z?MS!vp<i<;;9vmx%Pg10HE3))lSO-$r@GcAJN4<J<x#Sxb~=-xO~)spRFF>%31|>A
zxnRk4k))VzZPhzqCP-qG9+{BMAt^PXz{wX`D$Be}P1b)P=|3E*1b*hnWJc2?YwD{F
zZ&w`>!2*P$N)pE0&edO(?sQW7p|egx$w<;tyrDHt1kaJU3XSy%{?B~bo+x~!XnYnh
zZ*>TtBhiaz+9bW-;C2%|IvP-MhT3{>USzoT@jW{w)F-6VeuJ8?HiaFa3=o01o`Kj|
z77Rr5^Fu&AFz49ubo$W3ID3C}Fb_C-sD$hwfgR-ksk%%^BB~o3ul_g&5<+0sj!H=a
z?2rC<%8r3bTOCiJVZQ@9xh@a_bN9E(f16oC<Qj?QX7`xlO^%o1n%G3Wt8o<#k;!8T
z;iT_NPkog)7CyV_6n<XO(@!0nr_sg?_bdK?3%3M?bf5L!$u?Ky06{432Ii9DrjCCs
zeq`aajg}sfDNDoihKK9Q8B=XR(gKH()ZXaR@@<_`%@w}~cFXpcKYn*OEb-!DTGJTW
zy4j762BDc^7HhV{Ta4;3DT50VO(<COhVfv?aw041gza_a&ZX9GStkww^pSVk#Pu!H
zu~%-A8TFVfwpWSh(yt?e;3=pog>2POWqY73E(LTjyF@>jV|D)(v|f5q6-2x?M)=0g
zUonJO)!EU3IISAy%(EV8TVRW;EJD7MjTF#AAhbGGj`(1Um@mGmm?Ow)Wy9}@iDi0e
zg`JmyR`l@NbE{x=MVT!ps6Mr>^tqZA>TfOa4?mIT$`t-Quq#bGE2Ove=!H6|kBgR`
z?KTX9*0U_YjGQ^w&?6dPG!X50m|x4*$^52_$6kp%&0<p^6OqCvl@O55e#I+Wjg2C(
z$xknkZ2r8Np{bv)Vk=~cHBI>6S^#L_2dS}3r40X-#Y+x~`{yAt=kC<jjy?DlpH1d8
zzm1krV_yo547=4wZV&$xB9h&_A>o5#({<)quA7Ky^{{w;<7d)<`@PUny<21>|9kea
z)=&7;JM7Xdp|p~UsnLxUKZ7@Q6?)8Ps9C-CyIx8y9<pL=(q-jG2%g<<-WB6;Z6q>~
zV7?y<qTj814t?3&=JRk`e-{s1eOAKgxki|`vEkUJ{7w8%z%P_U<d)<!8GI=dqGL-N
z_dRxJi?PHjsG&g;sZRE=N*tV&B&Ny6_becSuPB;w^#bQ$)pkG&OZB5e>nH5w{<O)Z
zl|Bwi-_)N6jXb3^5+duTu`Yz1ej4}z{@%1YPnYnVXk#MZ18exGjh6o?^X6`a2^*~~
z5^|W^+3&G4C(~6j{`t3Xz3Nt6jqRRoK1@e++C=(8PZf)rN3ZqW@%T?6N<R)We&psv
zRJ{`HK79P{<(Fflx=1k@$HWolsXF?byLla)1iQJ#@|&z_$6xyK%Q2i-8q`|-(?*s%
z=SJt!W5$>fhdg>s!rTiqJmT;A&sVR4KV8ghohTk=+?Aztsy&6jt+oH%u`)L{>!3GM
zDERkrb%Eje;84)lZb&QSGa>)++WunhW&-iMa>Lc``2m$p+<{}&a@OBov(K^ls*~8N
z7iUGJiluBu^MCpcPUM+UGsryV%$3b%L3~qfPPR@+3Q7nS#cJ31^ClDn<t3NK8B)dK
zERX*%Hk;@Xi4yC6M1%eIUbrLOR0BR-$nZzO^Ii@$ZN9*Ylh(m~@hATE^RxQHxxr_|
z3X|ug3$=`iJPpf2jn4<<h4y3vAC~+Po#&8Wh7QJ8N9tkY8B-(9rlvj4t2$PdmP5RX
zr=8=(o(y^y8l7)2bv`}+wf#yg9ILR1`1tRK%iW(ZlqTKvJqXGnC6i1xTiJ?um5CSm
zGlj&Mv1f$2uws-a#+Q@=i~3*NcTm3XjBWppt~5Vn9H$a+5z^M=k#oam!7_PRzL|Vp
z?V#~}W{wel($dHhmq-caQhF27dg_?za4*=*EOo0%S#td+d2&H6Q>>7qZ8VP5U+#1b
z*5>xG%sEL>E$OK7DQ%F{AOSykykIZv5|Nv_8V5<4z6S1AP6Ij#UHm^*Dlm3z#Z~Fm
zg+V0$K&}2I5|?o&uF9w_6IhQDAY#AzhK5?e(g5t#AtRIo9V4$$AQ%qK187nx0R0Jq
za2d$}q5~v`0bmjcRD@{Xeb!jQ`+>1Qg^gen7zZRUG+Tw^3lf#}mz)RcBw#=;Jbz^z
z?pp!tyo=W)Xi^^GG)~KSd~dl!8`wNBmz=AU6|=QSswW#%gmMb+5IcImR+)iO7)R(F
ztC`{#?m$EJB=6Hk<J@8v@x$MdZ(`d;t`+`ir;+m5nS8+&;tzb0KDp+js#S&H^eBj&
ztGaTBF8`H1Sbcu`I8Uga%z##TlJR1d+`RkgGgGMP*Z0SmOlX1I&F3Bii-(mJ2I&;Y
zT}n_=59i)|y6{Ms8YXCIFdcu}FmuB8WjT+S=58_PE)CM1SVo_S?8M;8@pKj6`z1gP
zd=j@UTcrKyDb)Se{`fi|pzS@54?IjY-~G$;@WXHQVSQmW&0)D_ogxHEF|F5dX^@?<
z$(zNl$xQQMDMiW-pHo@jp}}RO=p<&O7%bv=%)lj7DRu7PT5^8rm}_lIcF`W)DSBgd
zcKG9mVu^hSmdJJQLF@BdR;h!WWa)Yd=H$$KxqU|R!Sw;-u@*;>K26Nk!9Vi@YnWkP
z8|^2xhnnJ+3rl8uB4?<Nhx?&_{-{#xY7A}uXdk|Nxzk~>W#D>9nD$(2s1tbkZk3oJ
z>+c^CQSKZ+gYj%4xczflk+G$E3OP66arcjE<~SGvP0NxW&(bcw=5dBEobf31W=8zv
z$7cCqIxvGiYVe63pAT1oaTE5vrH5FX-7mChS9HbmyiXc~qi2a`0zj;G?u_4AS7o<-
zgemmvE;<_d7R~>xlXdChR<8ZiSeMAz$*b1;x2)oK=Nc(^BB#9uOZR18ns%ncaW6?h
zCd2xE5s~IQsZOiYVSrcI@0z{s*1P!ngMHpjHPi21{xDrCt{w*_BWr?EGc4su%-_}z
z0vG3K{LLn5R~6+fk57a;2p)NQw}$>u?d`%z#$(Bbaw&G!{QS$#Ga63CK77{aDlBy_
z<I7e|o(7kvgDj8ahw7h5rP>muCMHuiR9`%kSyi)!6e(@CE=yizzZ}U6UROslQQ!7g
z*f#A1-HqmGkIazgEV<4`$5f#^4<6RURAF#?eB@*OqFhqn+pCsDZqHS!Qf_+J&D@i0
zD}VWDb?x`+8%Y3`+vcuQ=X%*BvCU4ws?kux%?y{IE8anFGj4z){fO#gfIwW|LdSCd
z8`b5$f&t&KfX6+343YEqrK3ht<wcR=%ib7n=ymCwSEml!KaMAn(Zi}jhPh%zE+zs3
zpP5fOo;X%K&m_n0H(=ABsj9?pplqLgc?PndUtuN0cu--vy7O|`yo74as?r}I|9PRm
zp=k7XFynBwJmJ_q3+q%Y0rzG#S+)!gHM%}!Kv1s>2iML>9zZ7-$doO`zb%zeT6lSK
z{NVADQR|{w;yolELE%mXMm0XxwCS2-i-pQ(<dQtGj7<EINOCtOp+qR=2)j9ntuA>5
zM^8e%*T1lC`zlv?(L$Vphv>11N}CQ;kIlIiLF(+=FC46_Bh1oJ5OXaQ1lUHHZS|j8
zpSVC{0vxuQ-atqDIR5g8nZ@t81A#>Xo7#4QGzNsc+IrfJ3Iy2Tr{h3szVd_8flXt1
zcQ<$UT7LWl$YowyB&Z~q@VXn0uw9~`t2YHO2#s!uY;$ekKi*xIsMeaAp(@2{Cpy)e
zY|v@;J~yRp1=~i7Hz52F4MO|UY%H8vsgf13>c+U&QB`(D`O>SS7nnUGu}6&x&X+m2
z%Ftf)-OIesg@EvPp_3QCk4?5WM{U!4muV$+$)fqD$O%~2A<)p|7TD@bCPZ>tmuHD*
z{M`Fg`yxT8>}IZb@&xd%GB^Yl*fdx)ShbEh$8V<|+)paLyNsR0fA?u|<WRJtMLr$+
zG41zbUpwc!(1^#AFGxq2i-g;09Wo&jFkE)^zq1lJcztz&Nr87?_7XD?kPq8i#&XId
zrQFlo;D>Rvsao#m@m7FIAi)@s8(vsDaI-s3ey!<5Y8(S2EOnn8wE{C1Npg)aj4aOQ
zE(d85Zo4@U>h9W`fjHw(%>*88VW!1paS=uTWBru%w^hT2IBDI|t{w7+xuuD&v@rJE
z01HAdFYJDT0V18m{1$#$Hj>iz()4SG@WLRXNKYhbq|9qKFtWAGPcQhS^JFk&8uy3n
zS>cRxlz49p(Y25|Ylyo3JSr`7!Nj%7mgTKVPvT1c_532HqPMLjr0;<pi?1oVhevZJ
z^|6d?<Ec?Zj!xUDv%3+NmaAnkYp(9V&2HqwcxA_Qg9lHdC-m}>N=i(Ig3Xoh^|60&
zWdp+NiYHN?x9xa)rI_~=@Fw%xamsr<ktFk@31;Dq#~V@-Mr<->22tW2Q=d1L#Jq<T
zeJ_(T>fIRhI)27t<_L$@lsGG)7G;YyM(xaqkr;S$p>-WD_*w#kMKP6<Kbwrv=t+F2
zm#Ol_ZSqV!X}Y@_{?Z^HTJ&K+m(?ouhf{}~S`TL!>P@Jj8U4|TrW_)oF?i9SzSDpF
zJDdfRx@q~PB6*BPA5D7VUYEvltaFIB26cO;8~QY7Xy=R{Dq7&}gl4=q=)H^nc=tQm
z01GCma_f&?s#g3^85IQsHYRq1$_93H>sP-N-m>RimNvYfnyN+lzoH6j_4gU_Bpr7t
zL^yu;>rk3@$J7n7lDsMW6{VTxmrabyK|jAJr!&IPKl7U**H}(bR`iSCx!5pE*}c{3
zxGk=fi9W%e*r`bP(Lr8mq4(<}AO1SW8&d1-+T(d+9FzgU3%{4QJu@8zldZS6H|ak8
zZoTJk;=Io;yNIsc*`l|`3|&_rIL9<Ordupn-6f$@7_hoKZEeeL=J|d(zMgzKkardm
zMv5noHsmAV;fQmvXKZ-X3fXjt4`*eO_<Xouy2}wje|NB}w%j(~{00#-@i!6%az}f$
zjb3;jG{~1~0=80!hYvII<H!7h4#2a9piUwA7CXT9$_;$VJdwSplSw_yOTc0TagvGy
zn51&fzDGNU-`*SGRDn>T&jAAcbdWZ_+GY3c`T4mn;C-#$&olz9+Xvtn_XWN{E5K6(
z=4o6Ab#j0c4Fc%`#y}7T4MN8PA!J?%AOKif#qNewxvk>z$en<L5j%bWB+WzI0PD>M
zm1ba&5_veRdj!~B%e|=*NZ1CDA%f6wi1J+=Ain@I24cH*-P0(X{MQl}Zx)j_BQ~b1
zKk|6@j!jQv#7gW%=e^E(Wyz%k(&LamEsnFy;AQG$;HFk^KHhNA*IbGaYOFJ+$!3yj
zHYG8C^lr`xjiiM-{+mDQiyYrw6$WKQBA+kN9on#-!bF_ctQ*XNHY+6u1nEM~C6&~e
zv8WR$ctz#jYRS3xiyGOvZQOeO#T0!Ou(z*ZGxo+vW*|%VW)J?7yP#z2f_fr)L)Sl(
zLVg!U9A+%>04JFqlf35XRTmBf=s??OT@fimBN)h5BrBk>6Y!&dd0oIZjsan&L3ncg
zQs~iP_(Z)ArOAl<s%vOe|1~bF?*<Y5Tf0M!29CxPgVI3G{Fd>)3p%r?4_hF8f}sDG
ziL<B_k_hvBT@~#c(L`qdN(_S&j17*^Kn)yIxLrJiJ#)g`X?cN631(z{O(Q;7Q=(rE
znKF89ly_h8!fe#ld46<Q?;$BROCe>uVzwE^&aD&ene*p<F+Vv*cl9`ywpH=^Yj=)1
zprNMm`_{&)+IHHQO9hp2UP)b#d|xdF9kzFjnsw%kAD^6$sY0*08exRj)60*Cf8~mI
zGNO;1GJpf7D@%Cs*H*cr!Z&sO<cM&m5}H(Zu8={3-|zHkWBhxPyh%qh<ng=DLyeCP
z!?oQy*ISF4x3OaOS5}8G|9MLHL(_2At1-@u5OWCy4DM{cwAY8pu=S?yD(`UEdR#1)
zy{`LsMB&i%f#FLwS7fa4V#a&T&%}e6Fp_k3iON2nmBxwPoDwYJ0!tJA?Og>38?hKo
zuUXKr%P&Yw_2)CqavqEr@!|q!tSbgNzV~t}XL?l9YO6)t%rRA9b?4WAz=3-E-J@eT
zcDHoT!LiVCM=zi5!M#J0Nxw5~HdhKZ4Sy%ag8zd)!Bri{nonNd@2&aW+N+<40(aS}
zCP=s?^6Jx9)z~7m74l4H^pDL;&Zt6#Pth(Ws?#JPHdImF|BmSvQi;})<k-S82FpzL
zJ{Y@elI%4Nj<MH9x0+J*_ha<jC8AFM3)l>=NCeziQ=-S+fM>?M+g?T>A{E|pc>>8Q
zYk;!84xyU@DXPfjgfzsbUG(e^c&T~Mmt{>y4U5(GSzvvm0WizJngvnDLR?rNjeIjj
z^R4MZy^Uw?f8KZCzJ&}8AnRD<ZYvq$(*c%9;CK=Mu?k*?H3Pa0cErGp_}*(f)%VyX
z4}z$F@Amf{<T(N1^@8K&oC6ACO@kl?=iLLY>+-f679BPr&~^};@Sp1%L_pw<eBsz7
z?6`%OLQ3N>9s>rOH)5xOm}Fmu?}U;0S1vhuO7UuJbue)mpA(1~LxXD`F>q^TUtqGE
zTq=sik!8ET9@8Pkay}_4VJdV_2k*<6D^Z(dGPzS<>(7;xSbtM{SWrB1keX*`VHY5j
zgiq+^{qFpiLg49-oiiu;V5|y}1$=}ITvG|QnJ>I+>4d#a%>Blpo6G9Vzy3cao0|PG
zoSd2my7&lxK2I6F6|N_$;I0IQ7dBDl+wMp%jK}UDYpzmEFme>0EmT@a$qt`OJ8n~S
zw2|tOjc^mbA%Bw)TswmP@kcyw7Qvl((BbjVUz&gxqS$E}H2-Hrhn6X!H<J91T>G!Q
za1F`H%U2t6==~7$pNK*XAFgYzJxJ5rO&$|HJ~PEGG=*#FAU1#fH<<gt)WrbI!M#M{
zMS%dS&Bq5)2vlrrm}}<#HR<tGuXOvS$Mp2`ssOWZgD7v-sjdJ`{Y-shV^K=suV1$z
zU!YTG4wu^q+YC0>)n|00G#vB%1D2!_4}L>8Ct|@2T%8t|7Sbuvnb+AG^ADx;jP+wY
zZ(4DfAO+eJ&ou;|YsERr+Q=GD(b{F{mP)El(G%1pXsmliJ<hG2@u@NV(JKm@5n`1_
zfmV7WHR)Tf7Lz+>88yt9|MSBlX+UXyJ&7=$>B(3JbxE-4!mm9ecYFW9^Y42|ra{BJ
zTvT{mY|0~YwtZtN0evd;ig@pEBJrVuY*5)PH%3bo_BKkr)%2-LMtU=>E+%fOidu>?
zz~vHY6aEyTA{?_@`lS4!N2b|^O*lvcXHO7`OYJS$9z7Z5W`S{o=;$?E;RXPHl$b1E
z`=#K^-IZaE?n>t{``-`RK5nhgHl0N^jS?AmlNuOgXd1_9MeU>+eK1XyF$xj2_nU{p
z&uL3r9Y8ll<M@tHtsH2dQA~z8!pn)?G3Pz_`mDG+nZ*V5A1r1TE@s#sX{589+wIv$
zscYkt&ihc>n?<`9DBh2~V+G;g!oSw-lCLwH3n}gRoCvMOadBVat3KYW{fI&-ftAHW
zst=H`nB~hU`9>;05GQsCz2ij<XV`{eKqAy+?kq}}^E&b3_2G7rNBPE<jai0Rdx)_}
zdn9I#c(AW@f;;`IBgOAZ7uEK-C7-IWMdI>Hf6R@$QJZHml_Y9l*S>nMN)tl4mFS-8
zm~T~0FLU#f$7QfL`uZhLiZ(>ibWi!-dr<-j&2+Na3fE86zV~9pdi9E?^2<4_XR3Tp
z+Lwr4)FF;?D&5>#?m#StFdI`M1|QPdy0g?(9*dPe6C%7cRjQ8aAW$^H@G6XVxjjsf
zC@)&BB*mX?Y_3`j2iY>SRmj;y;zv=HglprB#H}@bE4SNd+GA7Cr=Y^!hZZjinzCl*
zy4YYs#cQdnGmERlo7=1Z*RdOOA#D@;@lbY@D4rOd*2h~dE|~BS%G>81D+#h_eRHG_
zv+Td$){#u)C|>3o#VFE$-C;lF?f(V$=?$*jGLek6r$_&q51tg6AR^N3r4|?%H-SZV
zJB-v}@Y^z%J|u()KoY7y^r1kEnShYs1<>{w!_XSKx`Tx6r|=HH))7DmOE<vC3p`y;
z-}N$zNdi6`I3So9y*f*-fM;Le&WC~!sUh~f2Kzd|Z-S6UoB*pM|L4wu@rw{7egG0Y
z0m0jV^j6?pgkVT!+pqaDv$DQbRUxirtv)h9{=C$t*8A!s_$kEBDhq4`ZEbDu{qMFP
z2Lc`ut6I-dfQcW%eFMWR!1&)aL_7*77Nj^X@{eMzwmhJ-|EkX%&@mp~XLRQ@^~F9q
ziXCW<lt<4PGKL>c_=B>on8J7_g1P;RX;#wxrXZC@%!k0Qto5y37g>1?{@0%erwPv?
zvE7;|1t|e<JK=eQT5YBWJI;6nsvHZpf-e5qe58B&Bd1^XlJPy>NcxPwU1fLqM&0+o
zEu>$2!s&rOAu#DsF><Lse%Pm7W}NguQS6!w|K*DBdbus>pZF-PhSAu{Nx4j~^fO>(
z=Z+bA-k-R8UXtb_JZk3`4@Xg;#IA^sbWa8%Y}=#|O<<Vux`ZJ-)_40eVSL@N?!0)s
zjLbeF8GT|x9DuIB9;j<5IO1DlOLwDxd=fVumM^fRIDAGeZl=~obBiu$gJF1uAdHfW
zTBE2IKw;-cqKy0TZ76z2;CFm~iRDt`poq_d|FnBVF`%ELHM;_fTy&Ygaf$p~{iALF
z>*@SVJ%ktq8zUTZfytH>ySrv0v8)pZ<~`=q+Q;kGrrac9aaFn<tG6WUr%0=UwJHL<
zw^CnTBaBNh9yeG0WB^*N>1h`txrfFv;C5mFQ8K{Z5)~Z{5+DR0e$Kcyv6p`S<;J8H
zjR?v?C1w!5019E#l~%7FA<MUPNK|>gTBqZjs}9AE13bJM-b7NgsdB?hcd|B|=&=Xh
zqZwRrZlsx96WI+;BBYH+1r<f%uNH4Ut?2Q1t<EQVTH3=B=5QsRPr)2N4)Ju6>=@J9
zb)CDE)m9Y5R<ta5uGVIV+)TNnpkAbF=#2UW_~xiF*vg=3w5Q6j1fG)>pP#0Lr%*V2
zhaXqb6l%!T{eV^_c4J#Q))LVmYT$XeJL|kxMo|EUb57lxr5Dc9k3x(5B0bGkFIkvN
z=Tu7GOo+vo<5n;#i^ioTTv7+Q73F$v{B`5EoF%ErT*_JJ--%n#FUOH(&#R@lW~%PU
zxBv2_i#)tPM`cyx{=J%aCs@YzK&Q{zypTD6zra$;{wpLdjoh{Iq}1}9#>n`>0OgJC
z*Yd336de3P6`|^lt?PhTsrxWXXS8SsD|4k1`D7xeP8TnY`&#ka_eZevsX?m;uNzN0
zH4W|_-(4v7O1oZ_a-juV_kK{a$6obYL+{riz>3usIo-0{XF>hS6nEC2=JzYN`y$0r
zVKS)N{`7o?ea6v|Ufcl}JwsZC2HI|6e&}FPUst3m)4oM`etKrW$Lihr(vOyJRqTb9
zcI}kbEO?(x^pnN>c@Qu$g6wjlVwArl66v`0cY6D-mWh9Cg)X3DaWX*@oEX2y0A4sg
zZ{0Uv<Tj|!5bXaFJ>XaJ>3d;K93kV~pH8o@^sTSm9yf!T)xYB}x05ofngX3swdKFV
z-54!;Ks667?;IaJEtau?l}`Av3Xc)eo-)w*Y(-AkUKZSU@%`W76?v5sD_EN2_1XD9
zmGKJ(OnR;?t@JNrb`0ITY`Z4e5DvWaoazXpeB_7-v!8LaHIsKC?7K5XqxJZWC$XR|
zQs>TpmagLWj{3)}-TD?Welht_f6h^>)sF55b<}#fj~TCm=}O>QB``b^9fqrPSVcSV
z0vXjM^ZQ0Mt2ROh+TYM=hh!aZ_lYm}ge0f^54O$%Ca!2(!^Pd*-Ju1FySux)ySqEZ
z-QC?O?k>flI1I(z9o}%0o4mZdlaNUuGn2{ToV{nQe|;bG`#<UOa{esWnaxqj%kCiG
z&XuMY^}K^bc|H*y#jv!2H0feE0qH$2p#*B(8icOr38Fsa=sY({p>&yCU&7w@QzD6E
z6{c1*Om4RKl+?{r&fCNvsImW)+3dJJJ8#TV^*o_=HAtkqx^6anlhyM?^aMGx^n9bo
zo&S988I})ZWpcd@KWDLbuH&`#ELmB+=dWjZ2WNd+OC?)d6xqx%uOAtN+s&03?FqS3
z9%j%u{^{UIysw@<HKyyTN$oyMTW<apnG63uIx$&hz0Rhh4aJ;Md`6}|*H#lbM$<&y
zex&ojbH%7fEG)^gl}mmdmm}^oFQRqv3?ce?=jTE#a@dWeaJ^7B|8NT>VQYyE07uax
zQ8j?T9~to*a+((|K(6ck;QAV1|C-Q8>WFv*l9)y-RjdAEm~wY#|Jts9?WEbaJjj5Y
zE9`quV5)c$h<lO&n!@Dyp2>lmbfm9GAR8OVuGzqn3m&Gu6#&9k>TU68D1dO(FURE<
zJ%Urw^9y?dgaH8AeSnUOdb!+z0U;rhyi#-Es5&|(CU6ym7O<=!lA<s9_k~CTB8Lot
zd6%gi?zAt`=oc@A)7A@1oTwr>ii*OvMzXf&;1NAF?D<*#$9}56mDyS|vy@aJJ+C+H
zH(T4$wgz#yLVvwQ3vz=LOtKpQwr^-eE@>~aC#~oV-KmAoj(c>w3&hfI|J)u1?PF`J
z0oFcIvbi>`J!t>kL@j`2C>S+`<nLxBpJ9IF8hq);EWE$>DfYXmtIgFu=r}Qq558#%
zZmY0fzimp6%X-P5AE!OEc6al(P3a)Yy2F!Q;(xs5yoKIc;@na{G>;FB#*Ny3l8~D1
z;w`;hN+Kwsc-&A(S*a)(i_?TAGi!j4+fCBeeTWrTH7-Ia5f#1gOgxI06Q2rOMiQpv
z(C-CJyqw~s?CV(fO10gMG0j^c05m)T#E?Qiw^SvX4k?}{b261bBpp8Lz;$D;&X8CM
zvhwYbl9vbXCLwacY_oh0!xl3&FjW4au~e^w3@R{b*Y7*Z9>ezq-TNAZT&2_`bOb{-
z-zm!u^+n{q3wy&JOAPtNs*8E?m;Lm}%;}<hG&EA!BGJIDqe6*{J9z;k5h95e&d)>*
zVGKj{!^g3ZZR<=^7y^VwZO2`YV7kbIoxuHO+Rv<tOh$hsqQmeQjdSAK(B@S<r_(yE
zM%#5b91?7rQWOSabyB1drXSFP_-0H2m8#VeB~QCI(ui7Ny!axdC_pI3cH=3+P+tlO
z8iYHCt`hMW*$)SQ{HpBn-&o9|Tu9IeQXLWu71ZNJPBKA(4Gls9*5a@VEG(_p{rgSh
zapHfwe2MtIk3^*aL8E!=GWzp|!;MKCkI~~S7svnlNA*O#V(sy-c#02^W7cGHRK(UK
z7n;jbRu@JaOtr~WQy^rrRDSJorDK1zgyt-uHM^kvO;(e;U8j3@UXjxW(J~A-rhn}t
z;Q1W={1*xJi7?sR_EgkcTIh!P4NL!1IM@G-5V(jS&v}s0@!j|iB=E5Ox^9{gr{g~>
z_4}Zx_WocGpv3+0uX?gaXtQ}iQL3;75FN!oxSjjPG{f^^iySm@?Oa~rk0j*<J!rDa
znO6$Dt2mG&%}6Jm+3vMq`3(+fPFh&50dHYq6C<+5D+gG!Qp}}Zlv17&$4{D>6T)Eg
zNq#YUKAMZdFJV43af#;_edbG2MLdxzH}wR>urYg{ZCZ(P(r)MHV*3#>nO)`6^GVhc
zrM!MOiyoPl|IGqiaq(1?(=+TCe=?$trewG0<EmR-yq|M5FGttW1NPE#LWB&pAkk=V
z9ZNnpLP8P;s1K(Y4$I=Y!fZ4C4Of$_;A`WWuk_gIY4wJ$CQVxWU7x1Idt`D}qM3Q`
zj?`E*v(bb{z<xr^J}Zg}T3-?Gs6&DcD+BN~AcOB(>`#_(CC!!i;9$UHTzlaQ6<j%t
zbrFY>^F$+8mk<pVM@RoD4MV_Grq-GFXlvgVH)M8d&@Ew^xb3?lpbArq?t{DizD4^F
zyx<s(wwAiALaT^p`yjtXh&tZGqm4SKh5QLWM1_R0UVIwT5gU1jCiGMi9g=~7X01a>
z*v`zxGWv%ucOF#U#+38tk^sGt^C5qZprIf50{Vv6J*eO7qS?jMv3{hekk7j@)z5;)
zfY@A?d(~%R@LDQg=}@&UcQp$4?N3-F-j>MFg;B)0G@FHdV*aTSU0V?$w!Mj4jrZTW
zGL_$pYv{%^*)ucv3epwE1S~`6nHk?s`)GFIx}~HhDi1-x8)Kze^dc;u16>ym^h1!Z
zrpVlb0wkN#m|QPH)72tMh}wc(XSPNww>RX-)%u=SK9X-7m9M*4RDaGqR)}=7EG5Ys
zKenc=wm*x~a}R|aM`bWN!|Y0cuE;W%K9HySJ5khiJMf?8zOp`tV$k!vl2M<Ngh)QO
zRmkXi&?q0|#vM{B=1wlN=Tf5n09v%zdS0+o&<^HTTeBGIYK-QqO;@p!5Xu*K3LIVR
z;!E5pc5(J1m*ubS58&a07sD9N-sgNnMq-L?7KnzA&r03b5rrs1+foCod?mLrdbHPs
z_h=%6DaqmI&j~_KDl?v5KL^f_3|?paC1)yo0~!9l)wwI(ugWv+_FR&@CZ4?x<{fz)
z(E!rEk1W7q#S|%#Q!b+9v1gSIy^EO5h-hL06@C90bOhFwXZE^!AO`=WB!n)v35wO8
zKCJh<5PK6aL47euf!^h>(l1mA#|y!rG0^mh9o7%*W61*N%z@v!+G;0(oUIuk97PNx
zw83_r?F-CBk)|UAEOLDzB(Zc|P=<$xyPg11Kw_SchN$mV>zCpT2nyfn_zvWJL=Des
zeSJf_Xxl^l(v||9erS-6g-AScO0?Dr1a0!xvK;saK=jwaL6*H4aIO6Un7#q9dUt|}
z-gefB*ZvI!EZ5&0{%{x^{TM1-uuA_~mhIj1O3VG}%2bxaw%C6E;3i{w1nooE>mbVt
z4YuDwJqkgSAeVW;uzuw8VY=qui@TIG;9CSU&Xy0Us6Ha?Y;H_mYLt9Pl7x8rryD!i
ziP@2Pb9&mQ_sC~pai;;Q$tzQTk;kOr5U)4?dE0)pcwn2|4AN+T!|m-h2Uw+!%|kNc
zvo@O^G9itg+$+IZV5F5Re4oRbZ3h172xwr3E{P9R7MstTIFdw~`8lVx=c)JLK7v!&
za`PK1OsYnJlkbv|Ypja{lM?`2j7HvX*61q6I!!x9i;%&`gDd<C^v5~3ASAay^xj{P
z!q6W()H8>ONc}yoS3AeZjo`Iq4U|DR;V!tjAH;th16MZl;E8e(yR~=O9|LqR=~S{V
zz^!R{JbCl`GIzum3PpC<@JofEK}8oJq7}^pBvw8r#&l)!;it!#>kPPGpDMOwfLpy|
zHTuRXq-hW%?C_!PV{ZP~En*lru*pVfZbUPAZcs^ceS5d=X@QTGqGsIhb5);#vjU@I
zI}ebtJ9D+Y;f=T!eQ17|X~;e8yGuRDn+!SGs^yIG`I(eBA>tDqI26n?j`>j!7J}BY
zJcLN#y9LP55ySDZlLg(o!okQv9z1-+h4y#vv+U7CTS?9kHnAn~+}fV5_Eg5US)r_~
zA&7$@CdTHQM3)@`h)9HlHD+)PTO6XIp-{lcW=Sv=1zHuH$4nPYx>zrq2zHRV-s3S`
zW~NsAe%7b7Cuz`L8@Ul%>9KrB)n`XSc&?s2k5Q6TJGl`i$-<5}(g%V%E$klKxJu%T
z&mKx43EDbK+XdXCB65@CXDh*>pseDCP(?s1DM#Hlbi5qV(*32O)wxXZ;SMK_Pyz>*
z@6$T<@ouqK!~sUb-LJZx(j<)nMq-Ce@^2-3rHyYLmw*mQ8}488i7p-COiMpC{oZ*c
zW(vxyQ{n2~)&4XoO6_{Sqr`&t50tPo&d3HMYp6lf=<;Mh!YCi;A!Cc{@UV*tTC%Ed
zJV_@fF3#wQy;qxBu$b$<hzjg=@TnFj>g9h*`ga}%GBh%Wp$1kTdx--bniBM@va~oj
z4Plf|fCv{>DcWXN4`UVUm{Z!~*=DyAkCT!x;w(}|H9ApMPd@WMRR=c|N$n=r<oN2S
zQGY$85~N;TC=PqdVkzLP-+T@6D$rxAI9(q&<nok7*QRf>3^O6~o@ae@)SFrwf<Jxz
zxPb_~Fe@H;=a+29sXVSQPvwzsnLiIyn>_pwwCL=*tw^-2vx!wVkSAu+C%WQD|2~d>
znmuOFQFk5<40`d6VhToRFJCiVQ9(z{&(Elcy?E^~*<Cv*XhHme9gGvhLJh^|@^i_j
z!@<#|Iob}6UC`RwC3b|`tRte}Lv3F6o7G`+kSmzpyN>KuMxLs~F>Za}U}HfD52No5
zxq3}?W1X0{VCcUK{{9mY`D!1e;+4Wo7|%Y#cjCV9-p6~abwqBtYcAT-1fG;{bMDx=
zGM%X*WX8t=M3)zFE+W(t*JD0zq>eiF_IUDecIgxuMq8saaE}I{n6+NZi&OD(Xo58z
z4MX8`d(h;7KYr_PZ{n?NzC2PrHzb59Enl%e;avK%DPZ~Wu3T1-T<T^M)UG^|-$Hr*
z6f3n2gUaK5oG?xbCkn{+zt5qjxb8wvB{1usj|+NOaUE<<f6krEVDsn1Jzvm+bP@!%
zp+&JaM7UOi31T5unZJh_Hom=hD@+vCm8H<xy@_6t2cNM}%t-;p{lt}~lSLQhG((au
z$k4~(){-C*hK(tmc9*vADjKrUq5D16eFw|Rg_-~_-U8OB!%K^@x*^GO4^C@WE5-<I
zxLx_VYHt4GiCqv)G`dchYv(h+&`Kpw^KuPldOxEP()^I4n0Z=3nOJ-^mYn~-?9PBl
zPF?RA8o|i2Zx7qG8ZrhR%`Yfapl4k^v0eQ17eNbiU>Irh>6N$L_=AU)9VbbW$!5Sa
z@<hr-=#{wa-5aIJ48~NDeDK3BPV6^MkryalT^q`5#0gUuk29CpGrWi&IwwcHEKOIy
z>c}n6EUx_X-n8o`+1@wF5~XaflL{Z8sF(c5kIVcXYivkv_lti(!n7e!?DWN4n*r8o
zvw)b-dSI=u7U<X`8428l^iftu1E$L;9=HAZzc_fMmh*FS3C;`)EROiS%J6FVKUaVq
zP@st#xQ2b9i@yO0!ry=xvG6a)EwG3Q?~Pc~lDAwPF5eC;Z+xlKfki@RAc)rS<+$t?
z$a-x6zT5ZCv&P>nCs#S}B?2_zNc!waO*6H|3kP;`+G@h8s{IXiYud4aIK;R|9;O6(
z>g;;vSaRM_jb&IFGKF5RvvbIe9D(qf<?VY4yrLErK6P(Ho*>F4H&wiH*<nIEHmiZq
z>k$grqM+_RFxVyiPJ|*POB4b#gA2IPQjMJ0Xi<Rx;)niTF}3ClMkJbmNKpYrhv*+i
zuq;ZirN_}=B1z*<M>8MTZ`x9!;b;Doxcp!tG6$i!rHT8yhAfcuHNoYj`MDvBxV*G_
zt_Xqiv!9;m3x1SfLwMOBg?(H-q0)JS>!<8t;eENs-1OBuisqpBQ^$Ug;UsTFpRWSg
z)fw;Zc98yff%Yuc=IZcF9ftx?xpWQwP!WL;7I}v%G|k*XUi>@U0!rzx?KTm}tZIac
zG61m?#s&D;LI;MBOB2qkK+Zi8@B;wP!Vsn>^rq+~ql<RI43Csm>p=J0+k0;JDr<c)
zc{Mr|^D!KFDkildsra<;0l#oYj%455Rni}5|MiW@1y{m^*<EEK(~WFY4rKOwivt4b
zVAFKrh)Nj-#5^{LE6{y=kF=q&2<RA8F*fb~Wl+O(sIId2`F0qPYYs8_OTpFyM&z!-
zc-uXg=}ZZ#f(LSQN>q^`8$d#Sb7ALif*NoIcw8Hb_>q1Fh1^*i#P+*pR)@hCT^)j0
zM)wVl0jRc*kE>eFqp?1bS@U8IJnw`=S&YTlszD9`BK6$xKtKi7@0+10ZD+l(`OLTU
z=?$E{dfWc=Ho?DF5n>3l^GM)OLX^oP@XZeqBX4J-6c|i+^$0EoZ*&phe=rzmBC`Sp
z8i}nUZ76HzajOLp9T^-q(PO6wp6BtdG!qeWud{Wu!lId=!E2MIHZKr4mU$gE!9FuT
zrjEySxUxS^|FN6~DA4YGy9PZBfMZHXvBv0`0<I)I0Tndlikl<3YJ$({4P->+f}XJZ
z1T?VpF-7R2o+#h-nLWq+29bfiDShUMY^)#?+Q$!Fb%;k|iIUex&sR<2kbsU~;OaT-
zyy{bPAfMh|#q6n_Zm;MX?V@@y8Z-wma*{{fTwQ{Q@l%J<VC2Ru4+tis(tpkb+J%ea
z*Mz}g(-Dg+NWi0Db+D3gL_FzO2M{caAL0YU(l=>Y3f@iit@A+Rr*sNb+1CH*;)9jv
z%hRz(Xe2}Je`Bg=R2Zdy;wZ~BHj{IEw}A+`{Fb0a8hWVB)GQyElF`gM;^#6<1{KIZ
z`pOdW%d&++rj}48STWcXhdh{bs}){H-SqGt9{+6$;h44y<aNXFV(`{ylLwWV)AoS_
zGe?CpL~pKDMycY%o$tvD46j@v(287PLKr@RJY~!8Uqbc_D0_}JmU0K8P`ZblRmf!k
znYWmQIzKaWIPyWfH8rP156B{P)ZPxt&cdDPok9#9<X>!X1=N!LtyjzU34imr$&2dP
zLMrq?>H$^Up{_ITt8bKQcb_p6YyV7)*(Vy2cab}ak?V{oNijj0^0&nq(mALM=F8z8
zKp+gM(^u@_s>i`7pYwuGLEhc^3}0deUN3CVxp|tuQROeW=aB7uOHg9A+u#7UTf#`P
zCyD}>Gj%)N8jME~zqswcqF#$w_-_*8zEI8ou@W;&>*lurvn7TR!G=J4=vS}#%h|p6
z<pV{Hyn5P;YXY`(zR;NTlZ8N(=a-&z#8sE?t^mpOk|Y?2%+Bz<J-B|modKeBMu3RY
zXP|9(v|7{e<X)D2bLeX#0?0i_5RjIZ{?jzpciUB61Ti)auGwG&VV>iP8OQs8?dj?H
z)$iuH86yKq5`KG@_FqG++ye#5pocl!DOG%T1Zw=AG!T)4Q5L*&`6Sf;sueTyF(ZU%
z`3W`-VHYJz8vlS-xC;Cz@fCI|)mm~u#;v}#d0hg#V`U|irhONFH1Fb152_%;r%e`@
z!sf$Ja*(HW2=K`LBX(^vVzUfVNS^C9j_xdEU3@6sS`h4FRR9&W5e_deg+Gcj@cNyR
zu~7~#0prA!vPSz@(TWDwUz7RR-8F_1Oe8q%CgzcIMgyS+&M8}vG+_b`_7d@gn9Q|C
zIMWS6D2U3ZL<X8{H2rdcfli+hYs4?~1_<1dtV_}>`!)zi*3DwXU+O_2Ft(d>_6-u!
zkbZ#k{b;8kg{$p4{dtRVm?Atl<BT#zs_6E1%qb+`5*sH!-Xz)&%oQEsFYZ}>n=Yl3
zUw$B?9R@Ou1d`)77!s#iM{{M+Dq|>id8TwOfmoCQY}91p(1Xi4z05WDCPtprh)sLy
zKPHKH^2j(86y>JHzTsKWZo@LNTi?7O$t*q%Ol^t*8yN;s9|-X(4*uGF-gvFop$o9%
zNJy+Ec!FW;JDyX>n%%IGX10Dro$NOIfB}dQ1nxGhzc^BYd0??g)&a=v2hhd>oveQS
zTD&`c8-1bw2!_5jgZuc}XLq>JrOC&l#o+J|+EaB~|Dd}$L?)o569wlr5qYL<+GGiL
z#Ux}{(wNN~E`T^=<29)aUJx(nd!*>}xU0xBtTL+g`$V9g1Y8fu0q`Ci&5RI97O*ad
zp?zt=z^KVZ@(cwWVGKtJuKi*J_pHg=I9BU}gDbunL6fb_aZYV#FpyfjmLp*GV-4*(
z`E-jaXf<&!g=094BOMkk1|BD>Aa~QdlwPRY`LnYWFC>nBzHt%gcBhAJ^n!gt3ls^4
zI;{b2Z8{}9>uYDwzJWI1&1ykp5bt=}^%`oP22g45a*6MLAmEV>TP$&vYku(eVpsMm
zNXnD&hVuQ(&+W@E6io$%!)|>@gWM>h;D%+KDp*M_SB$C(yOQt@llMb+S^6!?QFsHo
z=dklMrer9oAcl;PzDm^yg6?O}so;40@vLhA&R>+H^V)kjSw83^e@9f7B4)Lc)&gQW
zW_bF~@?5-9dS(XmrkRlKK>{pSiVP}5|EV#H%RnLNhKKRJBaL+I8|S?AL}zQhq7c-*
zhRX;DD#tBkH!@Rx9KyzuQQhD%*kl#}YApJS^5^a0j`P1B3f`l$ckiEv1aE3TT??5f
zTVP*m=efpk@8Ty!aeJz3n(MY<&u8|vvIO-x+X#=ag8AASGE;XXBA3I6EV!n4ezZ&&
zDsUAdgECF=@V~eYn}c7gP%6w55VRbve<P(Py;Pon)8T|?H5dx%Fg$w_`nUG{V1A4D
zO&7nwd0MOWu~g#1Y5*=H0?hYOp|m^UEI@0QRN+aG8r0z@4)GR>LjXA{j9vSFs_^%X
zzpJZ2aPTgY{}UJlMEl$$Fmm&s1TXOJlI=Kxlbn(QOvwR_B0zn7+<206k&Mmy0Z@@_
z4h+iyjR9d>TY6tIPM}Br=8WmfwVcl7N}EI_Uz(pE@bwg+T-xk$23W2KVuaOdv_khd
zzUux!2rXIw5EDq~b0s)`%%cBs1FS(gQvu0&-@a~VDr#zGDGgt6Y9KWl2sHc#R0;rq
zK>9E53DC&V=z6LK9EOktJ~QwA%=`;UTzd-48g5U6dcqlG0{hMo`Y*jR*F^5!1BJ?P
zo-Y-Ue|U^5_4_V!K({A|ricRFdGhTSv<jOK7ho)rO0oqkT@~g9y)28f;6sMrd72s&
z-F)|;kfTVY021!$q|)LyVa-C1oM)LlF&)(92p=J3A^JMfQWy#(9-Jz!IER&0!8)kB
zlZwon_7<s9htyHFIj<hiE&RsXB%}q{EvgIU)K&QNw1(Kw&K4mz!c}UHgTB3hz&M>5
z(Zk%V%^vO-RDaud!iDUr@*jHt=&E9ga=`sgit$sD3k<*6+ZnJC4zGYhMX*x`IT5>G
zF@*>`jWh}(;HcwLX)cN}XAj9~i>n8#1~s3@`=^>b#t|BgW>sZ)zMjkIHjkCXFvIKT
zOcnIqtY;OXMauucOoVu2c;IAvtSzkkSGQ=#8N<H7--o(~^8uNNeFHy1Ivh<CDJVZ0
zPv(>Bh_R@~;UX%<Ef;I-->kp^=0p&-e)*ejt1V$9Esx6WcoaJ2QM_tG1$M@9HWD0n
z8m#IPu+&?(-^A{d#uV9TV;d;rmk5F#HlDjh2-rnd-L03ODaJ|7)#Q|uOFgiIPcFKG
zjHZbJ%=~yyZB4cNuHnQ<AzHHPD~Z=y!kuBA_Im2dSi7zRMnb_<;B<C$oS#!WTW-+V
zXC8*b!OX2p<JHkgL;rmQmRSj5u(+ucK^S`o@T>tw_7=`$V-4=zGTxzQKlH*PkMS_y
z;)Zr8lRb^yi41Z_-T!GhUS>P0=I1WQ6lMy5PSPF!V}n2*^!OlD9oNGfGj%o=lP&G;
z5@3lEC)VHU+3LtHS}LmR$P{`F?q2&QK5=t!{jr}{Os*Gzhx`pba$zeD-UR09_xC6`
zu0$OkJyhWxjUZn;eL3!*5;&Qoa9$Q3zPx3{MttFK`M0l#pl=R8Ymie@ZuMSy;v!7<
zu|GqRKeB5fdB6s*Al?)KJwtrI+*}maFMbywXAy>(ZNS47Tva2!tOfMJl*?o-yV%P)
z3eP05{B{GBBmvz+2wCu{zPMpyf5pO^`;>nz_wXg}DQU4eHL+ysc{QPHh~h`IRuJj(
zo`(!70_%;qe^(ZQDaMxP(SpvnSGN`hMQ8X16JzB*^pU5U^(|&~DdiSJeVib3<)St~
z$BKgFO@HfuP8wG8$Er=O)u+x>pu^38TKi+oXBLWtz!;--J%`2Y-pBrB91UKY=?)`-
z7iwjyj1C(N6hm$;f*zcpt<;iCc4-O~+_pnr1CFjnn=l_wCdcNP0?>?57K@ZQYMrkV
zMuiMC)IJ}XZ{wMW4Mys5Y%ml&Jc^OmgbW@QI=ygh2?~d4Q%Wnp2hQOXuxbizT-?SO
zQ(?bJD2}IYS?chy!}3Gky!EX6D^O~?)|)Q*e`H)!!t)~^2wbYBQx4!ia~NV{tM&eg
zUm}65i^*_{aoG@nk|_1~?exAKcEXNZl@A#~Gb`d4{XMZGsrjf__hN|hqz-9b2nGp6
zV$d}EXOsqySlpaMlU*O{nq$}ZkYB_$q7qofMnx&)3>{1jvth-|yjdPyVc)Hx7|!5S
zeiIK4ULo{qc}7dFpvA2egbWOmBn4e!1rwS*DEY;^jI@;%<FaXQ9ag(LK&=2txHSJF
z#8@$AE3nPtIi0{ygb>eu3@|dvI7c{g-s<7_Z0s0hpv2-e5@NT5XlMAb{PQWzDC&$Y
zo<!uUgElF6CuFbxjC0uPUm*hB2b94<x=gPTu>GB4>TqZoLWZ>XdCKEW@c`fH{`Tv(
z`Z6IRFNjJheefT7rx+2<Qn>9Jv`n!B^8D2#w^@!qq!dR!xKMgO8*(~FAzpHzXhdL8
zppbbPdJW5(Dz);HTZ)9jt#F%1TaVUv9i5G{TgK+Kx>{CLFE4IISb%2(7$IIGLglE4
z=kq=6uk$oUMCmFXH{^jLPR<m7qj9>9*3jOMGYbBX2P7{{0p@c>{glzRd8LPs?jWDC
zG(X8tY8)5vyJ}7G$4|a+Z0#L9z3oI3&R+LlCfw84uqRg1k)nxrrEmG%HLQOU^81)&
zZ{ypleu|}hk@9`6qxAp1-G(cvXr!j>bpbtbP(I?LDe8Khfc7$=T<-38A$(IE=!gPt
zvXkF=Hax$yG^{H+KTW&^1wn{DuELx0%4D-r-(L%(zE5zv<tszv1KmSFcI(;U_1@@D
zzqhAs<Nsu<7G&8Sf!q78(|3QkdY5B(O8JU`+qdMl%#$JR$!8E^@LbD1Ekk^6F)n&Z
zaR|#FAw`A7Ch(&&yP`^M2`qXXF<A0@`z4OI%59;?bBau=MDN<;0vRNkzfLOnPtnav
z*&FwNR}kB(06cd(GR9zX_TI+01d{{Bb}Vrmq|0ESK%<K0CA7`3J|AMceO}!!i+DgF
z3KrVlru^}iK>)3-a}9=I%Xbx=8BPsvJb!BN<SpXCRz(|Tykj`zi)#2XJ6p-{9YTJj
zm!#}rE%RJ0e8cCTr+cC|JrX$M=m8_&MA!}dL%ZERq?m8Zp~3P?_v~zI9Nidq&q#Ik
zJjptW;}(FJdu&@+PeP<jNMWiIz=FZhLeO<t4Lj1SIElxNrb$88?utjSxf$_`aV{ud
zEaKvD&_B|?Dfkscc6ICYX#A3#zRcky+sUOen*H;}K4ec@0G^gPTx?PZ^~I!Cmn-c?
z2(Kf&M+PbGd&&?R<?tH!5Ih!tx0?g*Re4VtK{D=hhyQVO;>Cqw0vY2iY?#RI+5557
z1MINmik`H`6n1sJ7+lWuiKJy{6?*r?;82+W%qj_Ht@fzOj=OYfJW*UyvgQ0Bhl7@S
zO{prj499)9NM#ihvmg!FT2)JFQ(X!JH(L^+Kock*gbFo!iS@abl4yyFOf8j;(txb@
zO~v`&ag0tqN-N2m9~^lWEJRce^xm<DlYDDLM&kiq+TSE;+PzOPbA69Z340#2P<I)5
z>$>H{C-&8W60Zo}3^+P0w7Oa{C4QLo)?=+#+HnVL?-O*Oq22%ZSK*7+>fR8vX>^m;
zLX$0?ojDdZq!~Suwb0n%2%_Cn=mm7emfG4pN{8+bI!tX%3_D_Q2o2~#(u|V{#|XBK
zkIy+KUtH(UYp{^2Dvggd>uLvHK5aF`7PKQrc0*S+vweOjs1o#8c-R|=ncmxBR&qEr
z7P%ThtRWOFL${LEH5{_KGN>`KW&7KH{MH=4H+ZNlHA7&aEcA}2V{0P-C#oPs;Bdd8
zE<d8@LhtBI$l}QJ+*L%ACSgs>W)d)p$F5+XzNn^(5;Kf!fr@)(yY5(s0(*1YAd_^F
zrbnr%B4M+}GNouD5*{kLc7JpsS<p;*oF>k<^T>SFwDx-G-8R83Il4b{+B&yYA7iOZ
zg@j{`do+Bza(zKKA7-$00a-c>n35*12YdF@=bF~^*B`~1o__1PSbJ}+O5<}eD8xPA
zC`$h4jkM8My*Al)7gXgqjjWvKAA6d7kUROEtOPxdl_fq+%F$61XM#~8bikzqawcI=
zm`?Cz*XLo;zVNDAC|aVAY||Rhg>724E-`Fs{1TTIzXg%rgSHg;n4pe7`M{aO?8qK&
z`S_s7mMuJnc65N;t%fBUDmsW??+;CqUmjs`sOM#hrTAc6KARcTHXLfcSa0YJr`>8t
zVq!9NE}Za)dHjYOXg<REUMv|+V;eFW2peB}YTi@%I}J?4`bf?zo3ku^Re8eWD02Ga
z?Ldm38){Yy03b=pd-txlZd%!rTPdB!#xSO)r|z@0F7sZXte$8<Cr9-gb@j&PL2C|c
zt^eZ_AaN>Uw2y+z?Y_rFXt^R7y4Zi34JuJmfsb{V7=-KRybW7Aw2a2~%3%_?hb?%5
z-1%Brk=q^ctm^$9GvK~x8e(ehPWGzjn=;VuJU)3^S9Wl=D*W1dF4Drd=knhyKvk8*
zzr!Rut3u;ZIa1e_e}{*UL@RTWGpRMu3jq%4=$Snah=4s>=h#W&%?+8u3o)aXrGNYT
zyDXOl&MV&+L$3b~L+SWm>aDffk@@=&JbQW;<GL;Mc(c!CTA@Ba`-OD5ylY{s&BzEr
z%;{=9t^ZWKg)UZ0uMAoNY#SDVZn8(lUO|S~35M#8$moW&CR*$vB&wlJc6`csDpJH0
zjCw}<Q>5mGDs&>#5?PMSnsK-NFn(kZ7vn+ZcJRpg>GdHZ>WH8{*!vIS+I8U1D)Bl>
z2Ob6u2!>F56(_~BHtny|cr8FBI!<s7GtWQ9#u~NEWtPRvj}5?>io&*|LUqqFM<rI;
zMgR|k4`o1tO^}vlFJI{{<Y7Yj)vo5fS7s)(yDJJE4Q0D6H$hHLx-unN%-tk%McAf`
zOS7?Dx&R9*NxLu9W_R~dW~NO(L%_nKjJEU7hK!%kt8B*yW^3A7^^|ht%*5PmoAp@_
z-QHta+*s?aj@nulo9#(Q(KsQHv-*_4&{0!a_|=^y5G6X_iECkRNy6o9_Qw&Y?e$@<
z$NWd?X(2pVl^<%t*lgiVMNX%MwIvDLDY?x$CGbUCeZ3{8veJx|&(^y_b5(xLF4Bac
z36H`|2-8I(W_xLQ`n^XBYG8Fo)gt225e{E`Hx{&kC@G*N{VNhJ1;3jmyfW%1&GQWT
zPS={((#828-pX!AtcFWD|1&jr2(!A?iTb=xE>>^|yx@<|%gt}H7g)w$@TBJu5pAR!
z>Vn2|!2a*ANO3DhZ?tS34-lrGf%wFag&2Bk{V149B??k|R#@sKSJ5oEp=J0wIX4wq
zAtSQ~^@03Q1Ml*F2$Kwljkop&Hp3sLstd^3PD(g<zkAbG*ePft*~r2Q!g+c~jvV3s
zTL0L2H(yldrHr#h*tD@YqQGQEilcbp2!q5|H`EMz%wtb@>tcMo&=~PDRak7Q(g)$q
zO!wIlj^zjujMEhwZO3wWwq$L-T$T8(RP2qf0K4KCtAlbEJiJ(;tl8<tP<yJR$+>9o
zBsT;YjshmL5w1OW!eP&J@qutx%KN0P)fr8=K&<J}IYT8k5MRZ#tq^JE{REkgI|_ot
zSn_j;>>c;Dgtpq^ALqLq%n@w7a%xC54TEgv-*IO$E-TEoe`+)CJ&Oz~4v@->Vg6-i
z|K$!{6K#&t0KUd38CsaWi#Zj)Eibbq!-D!p%p7iz7$qp3HS$dYBH>Cw5Q(_k@+Y@l
zC&4Myc0pTpmw))7Aq}TUJj?7qP2(e$PnoRepPBQA@eyH6HCr~^H-(Nu%OD<r6p)a>
zDaT(D^g<ji0TtWG$Z&!~q@&t}6r{MrjB%^;Abpg}w-$$WZGYb2_oQA3h!g6Ef0gXf
z{ICVY`EN;c(Zs?2uA0x(*Z>qA#u+a+Gue167u?*`3-<)g-QVf|Z8s!aPBB+j(t(gN
zOvg04+VHY8BB~<^i0?#GcQwMHB++g%vLq;>P`@6<`kf@h%^8$2pZROR=a2sk_`x6O
zYP-R|xIX+Tf#{GZWG-Wxp|E37!wuEJKHJ}iZi+M>uKH1Wj}R7?Png>~vqZ)SQI~+7
z*0Q7Mb&t;{!Eeo2q(+CPWUD;uBhaGPTsk~&@aZ1p149lGlBp@G`AV)#E;7%G`e3u~
zXSdBFh0$3**FWj=$xVzhgQ=&AiBWzG;Vy0ypznHQw|e&ma+?!IHY&kNN&PKHAk8g3
zufXcFHeE}kK!!SoJ=1{yy+DZ{V_x(+068>(IdEnCF&HMONq_RFNptLV!0FcN21vO#
z+84KYy+*R+Jmrz1qm9@<TzHv$wWz5o@vwn-78TvtA~5yR&Qu>Dgd=OsYuM>v;k#qB
z&1*+9e(<>SG+(%Ro{S7FO7C}m7L-pO>LIr#gOQ638B;JUz5@MKx6%B;m%L5I5;BMq
zO-v_xe^&stjL=9=8zQ(;=Qo(z?o6yJaQ10_*}X^3IT8G6uj=|ZG~M|-%^7w6I2jF;
zLSaXA09xh>iqW8N<Y4#`?hTp^+olh+x0Q)F5h2|`CKfXATcE-p6!J~Uj{HeO`@yrJ
zi`tj>xy`1ZO+SA-d;fO#F8jDa`OUkjTTxY&S9w@*czjw7WAryTMW%>tzy7DD3HFfN
zC%=EDZyJAGL1>!N$;s$EVa1$d4^Lx|^8*$M7|tl-h&?<7ORFur?~3(l!8>Sc{Mp;Q
z`s!<rHY3&aaPI>wTi5juhX;hf)A`NS@r%a~-U~c^S$BP+ahb5*?e-}CltgYs{2fUe
zL0G<kfAc^52`&MLjUG$1^x(pu*BD_xdP77fQxhzNL6k7V2-n53K*vpj8bd;s4(zPR
zuB9n8-k^2aFt5h`%55T|1`=CJa&iA;aEEA1Cq9TcEMT_Ws`o_8J3TRcKUq?RfCh(6
zbsIqq6$t;NNR-nC1cE%oexw>5-!P^U8fdy8pE=<=>wcIlQ0k@N9iBM<5yg?MR4V<+
z+X7)ci<mEybmI-$BrhBLsSvnJ6f6OPD<%hwa@}GUoLzks4*4>U$J6vd$et)Cr@uCx
zovw})riAm2GL7_k<R;thscxh;iZWEC-CwcWbB5c0bj6vw9a7TzMKg*y^jA20%z54(
zz0V29gK;8tbl`I}VoI*?w?gm)VZ)~;c3quub{!Q`oDtB*JCIkXfJ%E7pMgt)NOH?c
zB};PB04oS`Xjq|8Ma`B!JGbryJ!aN5x|}FV%5u#PJNZKH``zmuo^SdRw<nF)@3m@!
zc>2`tkzCsB3ryaBft5@;WW&~O2$Sx{JCt7N%U-i$Wv|Ikbo|QOZVf9)U<{kTzR_su
zm*mp%nlt*tZA=0vPyhi0%aqm&uXepHQo8lr#qC^lruUjS-aib4#n`+KnICzrXKA(>
zL4=0;&&2xo<i_^;Bf+J9uhC=gvBi_mc7<HtxIdbl5v$V>+HZ)t>$+~4s$q()E`9AA
zwE28XJNEqSOz(6+dG;pS$&mLC?2q%0;Dp1J(fNHXnfv^EhVhZW-gqmKBAWjOdcEh)
zwd3=pM$o^HxMWBPQ%Wj2(U3<4k0#iUE=7n&ff`lqSMwrv0nPKeBQ|+a2eMfH{<uaZ
z9s@!Aq@od4D{PNC8-dH1C%Gyn$w9!_jx<Uefr`1YCa;C2NEGMan{TZt(3hv=bSr>z
zxsZCn$NUnAzu2yXFbS}A%H)m?62u`Qdr7LA^qN1iBq8ADcgS^ol>3>;TuQm9?(IX#
zrBj>BKR5HowKS{MCXJ;GQE5b22(6LmGGN_wWy1abI#1TPWIOWt`gTWu%plmbKl)|I
z`NZdbMCy*VcV3v?<~$m7-^<>?!!y22>fcU{lxe(`@|XDQ9ieSwj}ZApl=;6nd+|N7
zn{aXC_+J^aZTkEi?|pLaczvl{*|+ZwmSx`>%Rc>#QQ?wx928^oedTuPz3euz?rMnk
zd409<J)vP<@lF48VK?IUuFbaX$E{2B3cXtUM{<dV=Yb>3zCCmiE$jNlOQP$v?%1j^
zH>LB~jBV2os{8X!kvRi?@At-^Qf<G{Ao&lvNZiozF@OKG^FiU_?Z?l#RdRktjZ}gm
zY$yEWWQ`W?e?lYGuQ(*ZRSy6UYlnNd!A@!N8pLV^kVbVLfw*>WrSB9>a(b6$V;{_t
z@2Ip!c$%L)PA5&|Q!VJ{`U#Hu0+D!<&vm4SJ{i^Ctfu$hXdZD#_75c-Av&`!4QQh{
zboD`9j8uR72cUjGZWg6Qp<ZLoU`drIqTVBs<hGn=29r6%Po{2kdx$^uD0*ff_gPZW
zT#W9yAg}u@a@}ySrh5D|G|!jd)f@2;upYd@=<a$5GOgA^#peFpr;`U9c)L4|UWQ+}
zCV2>r@<JMem(`sBe!*cj`Y2xM^*+nb9GXiXT+J47gDq^K$-WxHL-!|^_;%e7TOUi{
zBA#IaEBrhwBU0u9G(#7gozuV{P2ki#x7(QkFwr>Lam2*b6bp3E);sJD2<t;<msr%S
z_V{vxfq@k(Rg7DgmmcShBFu>qqP4d``&P3%hcDnO-;b|OG1?h{%!GJ`tOsj9YazL(
zv)D1kx)rc&nGXo)s#-7fU3C+p(AyE%1nQqg-W9o!{w9D@g-^%Ch~pTsdkK1A3fy#$
z@;VZ3vB-v8!iHjfmx*AE=T!7B$Lx3!KtNl59Ej*!Lk>1ELS3Q{kxUou?z)-6^tfep
zI4!ywtqbsfM(^-<0GJ7D`@LqNJPX;a)@g5Hq3gk}sz}5F48SK_%^Fp`lI{P_xL8oA
zV%)UU+Go}qpE5#<w)FHMZh3oN-38x-TpA|5u*-cf6esiXA3#V!M!RReKZe>zfJ*N0
zLTOEcjthRQ<?0b>7$o-C9+VP)%qo2rhqYAR=C8U8?xPN5L^Bqce1#q<ANfFAqV$4Z
zx~8C&U#u<gE+1o1Q3<m0e!)e1PQyq#4Mkt_4q}uMd<LD_V?7V!wBSd|Y`62C8EQbr
zripa3fB_deExj*SO@%|zF%L=i&_lS-gO&B2)kSklaNAxP`M@bzd8$?09>v_B4`8Hp
zK%%7d5;ErWF-9(R<yyLXi+n(B+-x9w6N6PGKq_=gTk_M&>$PVo%upf&tbfAPc^SvQ
z{O*PQ;VrvkRn)-2b*75I9dHUyH*Dfg*#A8KD4Sx(jU99?5awxYwbq~#bgRq=Zv*jg
z9`=#BNoNMo<!%kRELRWwd?W4JuQT9FuY1RLUQJo}^@DXi(k%fmNVGcgyK&_VQ@p6C
z@~XX*47Ib^p3I(OdYUCR6ln%oB0H|>dg~;U)3=Y0_p6GT{!r;%e!GCBK=u$d=E-XD
z#VH)SUL40=ia?o%xyOOmTDV?ULvHqotGVB-HlHVsMcgE|vtsgNSWB0Bx0j;ETnW~I
zQs;=wcF~2mL2Ml~Pvy^St5{i<#|pD=(<W_4T81V|$o&Rdx@!*lv!B7QE|c^pb#Nbt
zul==ek?9Ke4e-%0Ypboc`0E8q6+51KNu0|=v1eu?4YxMywRw-?jl!~lU~?bH2O_yp
zpHFsAvs6KkR}b5L5NY6X1xwC$Pn%DHR6%MhqThMSq~yhZWjVxZoxY%@C(9eZ#9$LV
z1Y0;*?jqJQeM;_IPU+KxTn6p;F&q43Y{v3}s)R(Fdus(c01V!$P#mjf=rXOLn8mR1
zE|Gf0vl|h;0@Hf&d#*MmABq0-3i=s-Xy*!bU6foMU0&C<|C)Ez56JCt@UAO&E~1Gp
z@94KH*J0%ybzHldSg^8NK>Uf%W;2q`;*K~+m5;9^y~!ma!T6h-a+uQ!+G=HXV8Tk7
z*U_l@U}R_rA2X>320(lmOGXJ%V%N!B_%hRf5CR$)Z>#YE%_Lb5=V!-v43GJ6YL~Yh
z3{APW(AJPr%qVSPlV>L5KIhNHB4-e41BfrX$f@M~L74+N0Q1aJMU0esRZ#p>R&+(>
zU2aLWNAfcdy{JGiVZPcoP|`l#%1_t$rE+*@w(?_1iM?xK$%|DktZKhzMwsm%9o|-F
zd3KC%iPv8if2L!A<a-lIP3j|)Cn=1~-B!f}+CE+8Qf}s<vVSRC&RWNczV?D08dVml
z)r7#_w(H^Gc3Ju85R7Wtd4X0STk7j<k7ClRd6irGiEngu-y*tksnu;KvFf5dlIa!7
zQjW;d8)bLd{c{y2>~d-PRf~;Vw~gNN%SzkQb1-_oXvaBm{(&ccm%n_+<Lvi5!NOJZ
zX?a4lUk7M^_p1XSht*?baNfBIa6}_s980e`riz)4E2k9KOJW^viDIXiPoeKsRWn<i
zomcB<looN?WJI-@DRw(mUb}5ldo7q@Sw9rcbCbB_PMO&@RxUKu{q^Y%Q>GWTfp^{B
zm!-N&yOa}Ix}b+E-aOZ`+C3mV^YF{mJSR>hDydp_m0npLM(l8rVV*l)QI@NvTt5Ee
z&#s%NqIRv?O#80;9I<zESMBtj)*nSY(`?LR*=+kdqGsJcnD^D4l!-rzdglK@3dU3u
zpZM2x>j|;)jU_+I-)?g$qJKl6!Cq|#0_C1KpIzL90v8WRTVN1$zIPmvVlJK`TMzFJ
z;quK5p!1*VY@<z|$GXzQ5~^xi8(ovA1q}V~V~UZg<JfQtMrcFc98U-~v4T8O<WxYc
zXI~x5BSnckY21?GNsIMD#n{L!40k`Ru>lP+Qp89I#0gr021&XB1?|>`)KA&y{~mJC
zxO+S!LsN19KU_5D<_HX8rvLw-!f$}9PXE7$W+c-=ekYgJjOdD-IhI71Gx<Tek=4rs
z$wN0W@>>&ixv<QXAu64Kk<l9QM?LWKOlbjjX(%Zv4>TvPA3!*^Ch*XJaB2l9AO*$|
zzigj1_t0>i?OUsDl3ZCC244Ry4?Ue8WmJabfPDdskq3DajYLT)0T_9#CK}oTp{)3W
z#wh`s^31MA!jxE{7t06m)tvE3zzvYY6k=;}iU96hz_(icg=}DzwpP?|%|V1EY@Dry
z5hNJyKQ+(+gOW_=g$W9O&3@8Gp2%+H$5oJ9f*eTUIxP<=7u}IV{51z>UE6LpaY|Y=
z%xb+1jB}n?9#RePISxUO4qDI`FaPDgFt5qD<25%s;1YL+FF%J9Jvzx7q`=aLe|Hu*
zwWOdaiC%FAetd&OQ1Z0Ng0g48@KAOHG|823p3Zmo0gIIwCXZb40tH|MX=hLG9k3!(
zpM`xwfYj=}mQF@mCqaTzIOjB$AH!EEG~zNzQX(+%LS+VT!5hjf4{c$}Wcsbhyrup-
z_*hZB_2i0jOJMou&-mgpnMZ?tb`e-j0UDDE88L4OxaL!4KE7DVgHi^8QQ&F7zr>~z
zY}6seAxw*cm}tiz4F`8-1jm*Q&xHo0PsSLdq8axj8!ay?ft4zesD1~0gDzL0Rhgg=
zDRGLHcK<hbumdRy8Yp5UVxi(1jqVgELlXg>Bw&^+WEPL=v}ZzxkqkCc7yl#WdgK;*
zJ4^NbLSb$szhr>spCs5{1xH=j6xc!yp5@`F3<hWr1ZSrHf_4Vxk@)XvjyC!5!%8T>
z6~G^d@Q~uLg$$xgqarI*?79LRW3O~Z8|z^`nZKd(%upR#|MR#Q+eaaxaN-eH^M;05
zt5Kj&n|pq%&gq3Ai61>CwufHkv*OG>UHCqI!VtP6;%>80z11S46abX24Z+5zr$Zv*
za|p0y@fx-cTGim-5df#+3|C=%ws)+9FY$P4=H?m@DaEw#q7kWJ5JsC|j0I@=2m0w(
zn?p~%o_hFennU8F(GlxiKPi({3}3=Rq#8~BIB_Wv*p`*F00YCxO0H5Y+mF}2L^P(l
z+rMhV`wEROj|kmjZ0hXx*hb=8o2HI&i|eX-k3+Pf`)hTLZT>QbD4{*Nx@ghs{2dqY
zH$RPhIr974jzCHC245?ce@IWBVAuO0o>{($yN(t#oaZxV{F;lOYi{<B%pU%4s>yKS
z_heKR+V5JYR-QaFaIubxrJ&FgJ%Z)ra-_0FsN@}6jr8==iuBJsO?2UxEBfXEDnbng
zjKTwo7U1M)5~hJr^R44%%(HWjBH%}BP6WT;0xK?@b${1hvg%>w#QE9F{NQS}?fk=Q
zSan(|)qCB>)UEc+lG;08pO3Y<;^BLK*>kzaA-CyTb74JIr^-w0u~Ous-QwT$8c^vS
zIda45?I#3HKPdzqc-paa14BaDB5wR%CM8a@bm!SDRd&rOdcO1Och5&MRP2Ixy6vG`
zqEtbrZ|rF`si^%~kzVPGC}!JTc!;r2)czS!d&^U|MwD5CmSbQ2Sy1duUT$lipQ*1B
zyY^OAot7LSQs{L?uKah^22D6<RoeEmoDYeFabGFNSBNdCCiYoLDt9%-n1>$@a{(Ow
zt`z4;&Iq-DmKMg?n9tk2GorpXml~P|`%HRaJ!R;&s}f$FH?9u@qxF`hd%xX<oGU?S
zx!YVBWjsvL--3s2WT)ez-&Bf(Gk$yK&+fTJkz_f#bLZjQ`qHSNDz;FNy4V_98*#BL
zSAu}aPYeAfSM^w<asJw5V#xYrgWV1K92&eFDJUuy;P5LaVymg@dR>DqnI?<Xry8>_
znX>MRN6*r|vMv_E40^(p_IfYkeow^&yDB%K?b`<R$L#k)Rb`Om6yZYiZ&W#U!Z4(a
zxlu0o(Bb@(jSb2)D$Gnw?iT6mEFW7Z9p$`biGg@^x4yk6T-6y<OHW-|;}0=2O-rsZ
z`@b%@(`9}{*Tyn{j+xCrRj~B1KXWsuOl?tftC<=aBfC+{Thu2PDoeU>`HF0lt+%2`
z2~q~fV?O26N69e5tj6|@IY3&n9&%zUNrj0?o#D{pyW?5LFuAyrr}I+<BL{*MBnN>N
zQ9PW{O__QB87!?L>+j+`KKYrX)aYK`*Ru<VJl@(4K$*>P)nH5)wcd1zUAe1bCeV*$
zVxnRef-X*`TJ9tP7lQspktft&qTn=+75w>2G|p(QM8SndjfWN^?$@a@?Q61b9`Nz|
zYcd^ShhgVY!w@!ds?kS~-^X1h>z(^Lx(?Q!r^O04`JOxlMx2db{Nu-&>y#{ncDb&u
zk*68KW!sCrMt9r0?NaVx!sPy#S(9thrc#ASg9dqm#3BPgg%;!H*{2@6*oJWPiQ3=&
z7B4+X11EchB6q=?(%<L7y3OJJR@-j~NaLO?N&5&23^cel+~%8R^<NzS^nA6{aT=Gb
z;rCl-?`PkJTJ84|9DPFP#suizrWL2Uw(5S8$yTToDp1Fdn@}<kp5cAJ3D)NyMw372
z&%>!0`8FSPPvw3+npOvLu0)MqwO^q^n>=Z%6m5_NMoexz_`T7X1S}W_m)odBsX&Ex
zZeeqHwth0r|B8bf5q{U&eqQY?B;{Z9(yiyX>Eu}Pl86?hfK0TaxIr~-id0*z_t?aC
z`A5wKU5eC_C|(UKaQ?v6*gDxBXz5G^{#TZDc6N3c=u@K~ot{>8)@jfKJP8R12sBbf
z8#IPoH%pISd*msTD)bItKZ<WsCrxQnV831=X{y|B?iFoQogQ;eOZ{|dVIerVQ7dmO
zM@S`3szgaWt~?iL%z`GVd}assoe2meXsVt?pRr&=tv-4#<*oA8yhP8@OVAhqz)CHU
z;)?NW7*Om|=tCTj#m&W~Af-r+UU_b~?sngn0Q7GqD=qmHx!_(-0?*qsTdTOaxp~W5
zF7Q%Z=dS1(!G;YL+VoZIwtkgW`8>)k*He-Lo=ja{>&vJc3?T)2Or{Z)Nmh(G>}`#$
z@@;&0WB6wu!WCCwfY0^W-Q8V%;oi4b>(9r>N9plmPu!fnR0Y>?+U4t#AMhF`>wkLU
zkfH%jPooIM&m4jr9UlXy&wva2qwmY>Ygsjxf-4rr8_tCNXe8(m_TbZ#$5KhC(k~I!
z0_AOUtI;?8?e<{(t!wxasn@84pJ9QV?>Ks1yP$Hd!X>E)k6S%CME3qcDoLSLdpIQd
z+{U+=*2|4XhM7hMA#6LZ1vg93Vl{>z3qH}4?!B?D@8*g1sH`)tR)sn1N>ZqiBKLnA
zt4|#H@<tuAxiaQmH(@fAw0Ihi4f#AL!z(;Y)lB4fQx3ahqvU$QIhS5ZOqK``jD)tH
zCggJuuBW}PHYOZRHF4=fXbv9kuR79lhjggkKGt{CXxnnd2O1p)H(M%HDl4ilIDnhZ
zAg6Yj0)_O&j~L*!GBP?!HU5DT;~Wh9Engm0hRm|=_LH}ZezLzx?#W+5HLkzAKZLIN
zt_r-$<EF8N{|662@V+XQ!nW<Y{?*QX$z+mDCQ~hMv-_g-XG-Bd9V)xFkpInh5s4H4
zQv2-dm`m^TUPLmJp{>p@Yu+KCR4!q89=1Ip>o(49BAGLtPGj44RThfcKO&kNMKZ7J
zx)h5=ve_)jWU}tMd_G?%i;j<v)78~ANd_*L%XD;fVB0puVv%C8Se4BisqJsZ7&4hm
zUA_@fxuSHU@-)hDQU2ME4wBYnW4EGPp;XhqfjZf{w!XD8cxPv4wH<5cIHi06Q7TaF
zaRL-j3ZKi7STL@Z4<IzItQ$`SGcKn@GH*0jYKM(mbA0O;PjX;%G#EpUvvEF^NOSSo
zi@5B(`B+Nu_|wDu;8&a2KUO|r6(9!4CKb;=Yd%*zcM$;hKYoB8|MqD{hekL)q~Sxy
zAaJr7{^+t*TySb1<#Gwnt5;yo9<_Q*%q-@F^{J>eiSZG{@F<C~Q8FVVxWmJ^qhnM?
zMuWd|Idmb9n;*x|jpL7x<L7dC!^8Nyx1qh+(>4OoO0s+L6@0Gz5nkARfU#eFm;RT&
zgyk1J7vs5A0kzgrx|vWT5V*R+W_LdcyAKSMO`iTr?vO1(j4^0E*<ICCzY8HSLgQ<n
z&F)@o*#kgX=gGNaxKmhM1?l?i+ZUBrb6}`~>rPRAt)Vmmc7u|`_?U75t!K`ntQ}gl
z+rHr<pZfCSl)ux-yp8jbMq$K}?Vr*1Yah*nr|c(R_ygBox(G{}x;~|~4h7j@%e*!|
zwCFb`bU!gXR^gw%`Y^v6{yR+Krsny(aK|0*VC&vj^R73Xg<W;6H8V78=c#D*d>YS3
z3+_|h-|DzoSr2w!tMsG4TV2=ex2XKJKB8-5Mb+L{J@&lN)kuB5&B_&}8|BlC#`{~<
zqg6ZA=G&^=&C)V0>d+V~tJQAeN%w{eF;P7ac3-phI9Pvcd|vIQTU(FDde#0``@9Zy
z_jIeewEMob)7NXyHKVp3jf)7)@|&q0;;qV1tCVTgZ%<Te>)KAc|NNG%eCogNp?Kg)
zgz;vFk*1Yp(=%)N+dbP^F*o?BT!!(o3`5}I001BWNkl<Z#Q1V{l<^2MoOk);e?7uW
zE?L5OZj|1v&nasb;%kjSpp7O^j*1uT8a-4OSdH&Vv-hIaH?R7x3yr5e4!C<Ldu8TS
zmf%iD85J%te&+j6GWyVeb7ALg*s9dtBJeCi8;jk$*6_aTU&*I0)MO#>G-v8vXdFzI
zh}wTD*{AWogI#wh<9p+EwdJY)ha|%RN@F|OSL&c|&LR?e6mNV7`zpQk4a{dsH)vDj
z+pYqnu^n0|KxW(cF0@9vMTC-IKpG!iE+5v8YojN(<5aR}yRvPoZ+|OUuif<8k?jxm
z{MyZbs&~ZbgUfEU>C~31RleG2WD~bJQ^}BrLjChFP5+!}4ki;e<MB-+eBqmSaHjY=
zXE-|%LUTO16BQT=<!)~N=@l4r4J#LS^WM)sOkamn^^kF*k}qpM`JIgjAy_ac!#6(p
zGtT^<>*?Bg&#^Ar%pss?`uXk~K8E|&D_OrPQ{BNC5i3AqVxG+i0Ev(?FlkpdSPobY
z!cIaWg{WxoB~n{x-$sZe$P7YuAQJtk8CnpTF47nlk3G!r19x#(s)MB)Hn4eo7ejJ8
zOL7Uab`qr&sZ@&L;o+)#vw?wu1_4PRq#&CBJKPd$jEA!1QUB}<5tbvggfiM|Gaiiq
zY>=UaP`H(O$_fYxb_T2l%S7V`{bZK1aqBSqA5^Ry=|jvF7%?5Ywy6<-GKxjJ2YKRm
zTiJQaD9h$IxCD!Q1xq`$=8*?2i)|@xxo<a*kKTgnxCxt+n_g$|5zY_!?S|8N_1=?N
zI>Qc3XKIUVJLw*3-bZUY?{xFo{mt)_b0i&q`dX}xj%lTHJZxD$q%sPDJ2Zq$r_t#&
zgw|+Bx}H;|H{JY><@2b`yHOc;IBCrmW@DL-CT-VW!tMhl?s#}Neqk3$#|F{<u7a6F
z<Fjl3PKx`-AuN)V7M5k9PQb@&S>S4yAN+PZ7G+LeROXCRmr*Xe7#&Guc6C?z;r8)q
zk6RL)goBZwtZ6I&86chH@eQMF+*07mzTbi9fIxk4Tp=+mbvCju{|LAKrkkrK8>~-B
z<5+yC_sqogO;SKeiAIn#qf`zQDQZnNDG{cE@k@AGVA)O^uAB~|6vy$nE~G$OHbMn*
ziZminh8*d9jzSyF7TM+S5;_KXo*=Z_9u-&f@P-4dFb^`29mCMW@ud8S=&)Q)@4t`R
z?>>pU9_?kIGl`wBk2`l>(_!%(iyLm+MqdWjZMc&@GlW@o+OaOyjM7mlaK>Z5<2QG%
z#xFQzl7^KlS7A)B!+-X!WW~h9%s7IHkmrIRsHPwYkR*jb5iR0|R|<g;G6>@+WVl`4
z%r1*B0V0`UaA+?(2C`UAip};ocge?j*`_4DzJpSVWHL!UpC^~gF>l^H=FXjq>$;T7
zWo+BVO4y-4a*$R~KV>9B9fR+s0K29fQcFZz3*Vsvq&n0a+7es@ppt<?tQMmJtG`()
zFysnlQf?B<OJGtnte7)^1occB&-a?Y0wPo_ipcd!>xL6Tkjqub*-zp)%MmR%r%HUD
zz?T)q3oayP(YqJZoY39D=qJ9;uGL+f{OVU8>k`cxR3gFC-~BG-dGk5-sw;8k%{$gb
ziitVS5R}Ruqj?ugN`!QnEejM9GifnZDhs6?0uzoSSWXe{&@}bG9c_8V2uZ0_puf`w
zQ{ssyo*<phGSD}G5E2WALNU*-UAyS*?xm}%>*yAc(5<hMC?wO?L@A5ha1on4X&WlW
znkBvN(2*j`U~(7xpH4s*Fm|j<bYxM&!!3>Bsxk;0AvIci!44OhBFQ-3=oZGj4E=+P
zgB{B;lMqs(EEU9WAXI=Hubut`!Vs)kz|8GZeYWCj&1l}E&naSC3S-(azMPFD=#s;j
z(L7so3DU_7Le6Yms-Er|mtKytOd>2nE<ZwLXpA+cgl#1-$B8@G=^~Uvr*Ih_8e_*U
zO;=j8YV{g?*X60Fo{AM9F)`0}gsAb<Ltv*a5hy?tSkWs2>(c7~L}YQwtn{fZkT@pr
z-Dv?z2vP)r<BCNeZ8T$JV^zP}LqkK{bI(2azE8PaX3^q>EMLAHq#@yevP0J_Mk0=d
z*`fs7o+yVhg7&`KlF15?Hg1#>>;zbi3J~2SF#7DlAW%pwrO-3HGa}3og#yt+;Fb!c
z6DmO3Yd4_`!m<#i^?PN6oQ<zt%)t=g^mkyV)5q;1(TpGk<*m<<^L+dkLBV5UVx~p_
zQYmDOY0I%j8x#7xpP2CrDN)Ko9pO~~V>Cwls7Aw$AlGn*)XXoA>2NoelnQM$+n(9V
zoWc3@_4Z;gBxIT`&uroDdmiMX=U>F4MT=&<Om#%Y8@516ft;>P8?O8cVX%~iX-Bp^
zf(UWsHY!LAmIY>Zyf#M^1302gA-4rjrb#3$!YKCPSqmXSYZvXgGqP*?L_r{smW8mQ
z9UwkJoOrJMjwcL~4l%;s19`eqnz4~#vYE56td6N~j%f=6*p|Rn3VjsE`6D^TgFzY@
z>iY#!S;!eyfN0-EN~@{CYA|k0HV~N!gbeZ$fQ7*llGzizcV-YmV4J|8JDErzWf|?d
z_@2js;o(>T5)%^>3=kr;ppZhKgusYkt2E(cPRnpoSG`8!AzLs09z#U9z0ER#ahpIQ
zk=T~PaTKoSqLiw-A&3IU0a(W2k<^Xc@BM(`1MBEXuASI<ag3~egfu7{q#czb+Lbfq
zodgPyXhERO9UzF%6<Wh5!Wi1ryc{#6P)KPb<P2M|89<6qfNJ#)$WQ?y+EWx75Vk-$
z0&$R;xR41Bq92KwrC36qutDdhMHrMiVOivrHM@2a#>5<V2$XUV=-HzG2yHIv<p`l{
zl;yN1!nBEyifvEc&G$b2C;a8d9;8yvv*ody@qHH|B-zfn^vz#GSKoq~g3Sa1{7SgP
z!a^WPJ6X0T?q+N1W2E<8LS}AnxUi!SIiB1RE2RKw%fJoSVTyDDf#V3!M{B!{z}JRS
z#Uq`tLRT(_m#-88<qMj)|35K^b|FXZj%B!;Tbtnh6bgajfVGh!Hn6aAE?KANkYm)G
z=m-f)S#>)=q&)g=kz)b`K`aYL6)BAEBCRs`zB}t;$D9aQ3T#`T&Cwa>g%FGvU2G*u
zIO+&IQ-MZ=p{op1*+^wEGj$QTl>(o6&uP5k?cZYQ+UIiDFF%WACox*%q&k?h@MOCC
z7o#j^hRQHOuuc(JIa@;D2#G$y=UYNZtiW0u+i~#9Ic^#p#CpfONMCw!tN@9Lc~)bn
zTKmZ8KM^eFN-?oBUI>AY1Y^S8A_5<Rz;EF~FrEe}Fun%YLnNJWfeY~bV8*2lrs5%z
z_Jmt402BO<ufucUeZCF^bPUGx5vfF5-w7ckDs&<wBWFTJpp-%>iLYffhK6NXD5a26
zVvNDYMjMB+w((2_%eIiAk5`EdOvH}1Y$ZgX0I?>@A=_F>ZFGSJtrzR-^TTo`yTD?W
zD?o%n3W<_(Vh4x`LYEj9gdJ|g5W!{^OsJO#c1~kzkEg-_CQz-l><~hMH0>)u0Hp-d
z610xpE`s+gUP(Xhun6{F#Z$8|_~gD}{A?PlGK~PdF)=aq2*O48sRJ-0L=ZsM7*uH7
zQj2bK&V;@kOlY)dsi|@bJy1!j&Z=Ap3#DwVFlLPmzl~b^bqlUiDuf8+&f0SVfsla#
zi_scmG(svYWs|nRvK+=o_Hfg;-_J$Y{vDR%@XUr=x#=5!&ed=I9*b6<gZBNv97Kq^
z{DblZ@@MUFw0mT#MHqoV7!%whgT;B<vMI<s`{e)7S6Yfq7Aa&kw%jp6fU*=)wChgw
zAV~?ZltfSA<}<ifAZ1{1<u?R4j?RIo3w|8MHv&`LiN1N)D8IaOD=)un6+NBF!*+Wk
zg~5`7$_e&y4rXi$AuSMAD3huQg^19>Z$crk*<(MzAI!gSb@4W`iJn+kX+Vfz?pRfT
zgd<zrOvi>KIra@dL%GL6DjOjkgeV{DVx5p!N(AG)nQDh=FpkS`sl?Z{yBwl(0&Ps-
zf-f}8lroYBLeC>o@V%R!WZC>Ko`1#yN@ee`>!6WUJ1eCuq_l#a5utl`V<!IA+7G`$
z(RWBzgJ@5DpG@c#C6GysF&OO!@`JMJNP|)~wv*<TA6>_pm%N#Q1t*ak*~#5My^b?p
z_}83z{wq*+BFMWc+eCTjKt73NohZL>y$WO6Qc?%IVhtFu5LiNl-|KcT;D>pr@Y(Bd
zZO#gn=%H<`DY;-Pgrz37?`R#AzwCk~5w^ud|1sd};GAOx_c8fC;aG?4eMAUBl#^py
zxUL}4y%=Muz|H+3r4|ztb3!69Xu=5yLF^BM2q(X?dd}(T#yv2CmoFiKL}xdCv4n9e
zSeY!|ANFIPy%1~BAf;O##dkb}qp-4F=uP{P%X&zhy^P|`kKmWeh`~&tIN3T*;-WRE
zo=!?P{T`nJqCbPmWDo@p5*E(t#pLh*L!cm;sy^Ysryz`D5-e0JrI1P@yda3UF{Y~g
zs2&Ro30TPBgJ(%(IMJ7gW3%B!5-LDM=)_9Uo+)|c0;{&o+7Ke70Od@w+zb>TQ%`b_
zB?3^AP~ist5M&lZ*K!!&3S*l<^?{uXHn@}u;bKk#oFo);VeO_^6Nros54v^`#Q>pQ
ztqKTBAZ<x&4TnI=1UgoL96!iFV6{Z-TmlGQ9@nGz-s{<S_6p2nPa~EsKh~v+i8;;@
zC}ksCYtI2lr<083EA(_F=<80hZFi1R*~PH}%lfisSUj(jY|3Htj#1o-i)~Ai2^(z$
zN(tumrP;K7c*^n20J_#_!UQ`&wD!o4?j_SX!2VquF~GorwFm?TgHkqQ2X>Mh*+C-J
zN$;GM;k-DI*@kvMNijc4wrh~?{>8yUfwIsNq_VNBK)G<zS=W&696)<6^Om2<tv~)4
zFM8d-(K~kq7)`m9XWyQuvFs$hgDX(VCO5JRrEJn2ec>(*kMZH%q&o*tc7mzppFqJR
zr9=v_9II-IB880#WlPJpj+v!$5X@Or7jzB>DFlwPFjE-E*h-Kud2HG_iqVFJ16jJV
z2|TS)Qc`w(p4v8w5U^k%OIOCh^9^H#3KiGK7#JHb)7_C|$KG-7dwehFoHCE@js%Wn
z9hOuoMX<PcqB&tqF-9@JonfsIR)RnlRiV-&luJc)Y$1)pW@YRKs|7*?V{9}x5*T7(
z{Me!J@QTBD-GhLv>DMtw5(C&$U|Sa2v|c(6T&WB84Hww4f1LiV6!ZJDp}X{84Q2mm
zk!^c(40NZM*Ox&mf$M4xjFm_`7JG+_Boc~L!eYag19(21wt9ec(mw1!_S#T&5jwF0
z1dR46<o1zBby3I-Q5YYhZ*V1vWCt3JlnS?$XYbCZ5JJ*BxB}ZrV~meLQONISc<&Zc
z*>1Y~mLe@XFtQMVJR(DRWZ82rV)-eT;8w~kf6gWR_=~Tje{dzsPPqhQ4BGb?*|(WW
zDNoPfa#HCYN`+x6rExM{a}ZLZeN7>^56elDNVVtYEpiJQxWE$FLbSgFL>p8#1KQx1
zOTn@2AQMU4+!!jG!R*OF(I@%B^|)Ji<37C$dXospMwJbEOM&EzPsZDOfXZE)VR>g@
z$hk9*HMf)Gb5F(Hu!YJaJ78%iB4MMj&^vP^FIh)r_a40cBjMssdk7)15uvPQ;~KDS
z3s(p%D^`HS#60T>+#yCWJ_3VvLl8G(Vj)-{k_i_4-@j!4PkzSecYlWJ%ChJ~?_=z-
z$2joTkFaXvuW|Pep|V+I!lttS0J}c%ImW;DRaQLkaIng(Br=)66ie*+@9#18A3sDc
z>0|DHy@%`>r(%S_jOW<<&F?evFP~@5r~Z-N|8)i4_&7Q@&Qlj&g<8FK$~s9A_<6L1
z2$!Ct*bo-BC2?IB%d+r&zp4PSEDNoDY-NQzT}-gu*_!Z?Y><e<H$pgMgaFHuRaaa(
zQ0KIBCxP90;|`EErizv|*#%a0%f#$Oh)_op?(#VGy|CgUzye`l$9-_`Kfs#5fCX#e
z)_;V;7Fc{Oth)+s`bQ{iraiY1kpiSyb4D+Zs11TXHBt$brGkL-Ezr`5=Um4Vdk;{m
zl#s3ow&cbjjKTWBgH-wwBt{GE>IY+DVh+N=c7PZIsiei5|M*OLdoox`2J(<+_}&c<
zao3}}>C7Z}>r2+NVrd^3D0`Y8-S{ZCKfIeizG5}!oVGMrlpY)Bx-Z=c+Dz@aPRgnU
zr~s6mVEfibx%qQ1WB!UuFi3W9y_b{EeHE8n`vHWMY}s%dKls!iv26Xz8QH&y`OD7W
z(wBV@%T99Fjo0(=?O$hb@tN$}bUWu=@qan{vbO{=HzX*j0&7gbvXVGX8XUtZ=f0e~
zfBtdy@7l<~f;E&2<9z+&FQ9ksIx6LHI=TkA<k}Cg^@-cL=hvU)E&p&I7@tiK-oR}?
z`&VA|_HU9%W~YcFB0vUKn^M{s0k-WR%tZ7xNnnWM9OE4z(h?{w5c)6&-%vjF50Z|O
zf*t$D`RIS%gReD83T}CDFE@PR#VnoQ#r}~Z*M0RKM#f8IlNNV8G0Yd=eG%(c_H*~$
zJ9*C+?q|jP6z8rTpfi);iLFEA%N0KN{RjEzUtGw-!H&?HczQ??7<|oM)1K)er9t>b
z$`yxH(#Dw3C?{Gd7U8aL8JdfkHf^h(r3fK{MYyWs&>YlVZOjZKZG>_VG7%b_*}<Ia
zM0=5sH74**V5!Ij)?@;w9fq6k+sY@t{V?m7_we}EQ7%7s5pRCUI&3Am^}+4@{Z}90
z+|@m7-8;@1s|We3m#?Q-_IUs2?_|eNfkpi(&OT`_#j?w$-8r`I$<g2CaMcCNNhaj9
zLuLrTM^@+TN`~&Tg&<$p&-Xv~S}ZF=s$+m+ewfnuF8<^lKc%C49wYm=aPyD;k#f0+
zK#(8Z!Asu!AM_3^W7n34_{kUF#K4j>DU9xA&cc(p@alJATS?F$q(X%Y3d%}g+bNW^
znYZLr&cE_KJb2qzS-JjlDy4DmzxgX{eCX$7yXSFW*P~qhXaA1xd;H+~m-Fg({(}Db
zYZ%Mz;PdZ0lh?fW7c5wIdRs;mQd-qU2-e2hbb)0Ils)GE(>uVi+4<)mK<^x)_e&q5
z_tbSf{igrJ+;{#t+0#x%mrBU4PNac7Kl%wH@4cRm_rHPpuX#0oE{E*MB7`J&-vjLa
z*!Ac=V|2dbRm^$mHHb_aVGOx@?q~OhKSk=|vsw7|zrgq!;n?i{+?N^q#tn$xO#9DU
zi0XzF$FV61fg=?j;3S+_0TL4vGnv5CLBpVG4BB-A7gqpUV_X;Qd4P}ce2Sa5viE)e
zjJ<vZ3qSO3(&wK+{(E1=cpjDEVRnD$pYiW}iUmKto}Q~-!1xWnrSs~`NUdJZmUn)D
z;tk(n!B@XQ&x<Z6_rqU<=i}wZ+544mP`KeX)QZ)IR0j#cc1tF9fJCuBYF%JOJ3s(z
z+opblXYJ836m(7Pw!ve`@gZ62GR(aM7M}%=ejgtDAgp{Roc(98`2z4O;CZ1dECM%K
zzB_Sz#Ar9{O3n3C-7?h%<YP_jem6t_p2q+Bw|HvvetLHep_XNjb*W|zHVKqcI7h$=
zIVL6s4*q=(+*<isGcsD_GvB-qFudv7^<4deb^PqE?Y!jT74-FHx$X<M^4PX9-u;?0
zxa_%WxaHwp_`ar8@%Z5{|G*;~_aTj$=7%VVot5fb!fDTcJ&RVI%QGAA;N~BGly&F7
zl2oRfTYhvMSHAv>oOI^Z4DZ{*=ih%WE1q)+$xIJ--}rG}{mvViGyf#EY`B&0{p-tE
zdeZZe%4TwrLN^gc<0LX<dzRz5C49HSFTeFZ)<5sfoPFt^;k#wN_u1F6{i%EC9bAs@
z`-}~3Bhxv+-tCXG`n0R)>{&pioNu#=VQc^)u<gM8MC~XSxWJlvH}lML^dt!b-1qn%
zhQ}*h|E`NkI*K3O{1i9cx0N@&WF5b}YYXLy&;NV-^GVo>@BijWKJ&eY`1oHv56hCw
z>9P6HpPkE+xt;WOCHei+`}yp5ALjjkb}sY#Gl83xn8^qQ(m_ZSC_X$0o_Xl>ScHFz
zV9&e^L)pizc@U;_0Tgr8P}YRvBk%!9oBp9=jxa(9a)mO#x_c|HxOfF`x%N~Z+_0Cg
z{roY?u1CIPc-v?1<&$qci_6Yh%v0M(_|&%_;+BWDbK&|0*p}qw7cJ+?^OrK%n<kU8
z_{f*<<fNtjyz+%>D3-j#np?Io3ju=Iu1Y0Xefo=8f5EHBjqc$W-*^|>H{HjobuZw-
zTfa_6_dKqA@jF3lZvE-M@W5}s#B*Qt4u1TF*K)y2KENsGyo{0kTlxO=uVm>-7qN8B
zg@KN_Zbl!J!WfORlXUhiV#nh*0)}Tc-o=hh_j2u9{)3L5c|35-*SX_opWv$3{TsHO
zrkEcE1f%=6vts>~^v+oxraN_`9L{KEw86M;U??5Bnl!G9uDIZ7OvS^vF1tVTC5m7D
zEpxyBak|bpgR!;q(4L2OUG{zFM~wcz57Yg@>zH%xi%C8AOx)dj>3hMY4BzxS_PqPA
z>3Z}3X6_qbOX`IeV2TBd>oa=yy$t{RH}S`Eh@SMc$}bRDp<$36IxTc;J680^#Kas4
zY)qw0{{DyXo*ALMVH5ty7$TJlP8&mM$4<(3-h<9K4Bv7ac4sGQ&0?}=oq_Lqq|aJU
z|404_?fKYUUD#)z1OnXg9Jzn{0bZqaNZa2gh(107sbt^+t409=R23sqN|Y50l(TPe
zjHe`rL<O>o!7IY{yCAU+cHIj1{}2j$iRUkE;CddO?;o<8jzr2LnR0@iEYEg?z)7Z1
zof(8Z`$m<+AyOPqcU!tm5F0?oW7~;?0XOI2jd{4E9@;w=J|klehLV!a+lCk$FEKJ!
z;=V_B(vfkP*Pmu^paa+QIrF4JUh~40NGa*<PSKfmkV3F$e~w3<*n{gH?p9bsGSkPR
zRp*jSbun+rX_UtI;JGEnhIitY3oKppTo96;fn~h;L$@<`>FMM~_t4$9iUmtg2Mmi=
zp3Cb$bQ`H`Zx9RQAUTTA9jXwLvEiMJkL?5P@yy0M*|qf%EGxlyZa;m4E10ujJ^OY(
zL2h^_Pe1xg)~vq@&n+Ed4Z{cwDsU<reGQEX409~|*<-0JfVq7cw(l=;_aAn#dFLn>
zpRtISU$%z*BPE{PHq1*eUO|6nnsmbExu-5*Ebp>o|2R?#=J%yJW6d0TIuoHnD>Sq)
z&zdHj9>NM2heP*3QEQ3n|AoiF94S(b#KasIL~(6|9qwofW8K6Ybqof_RxDr8#S>c&
zaK|G%NT+Px^V+lNNIPuXJ;n=8>ErB^=3;>53%Yp0Ig9wg&6@y2sp50)DRWseuM@|X
zM4>~BnFup6llWeVC2OBcx^sZ8-UZBGb{6IFVSKN`=HLH<RqL-Lk;vktIynD|zvA@g
zzn(&V7-=V1u>35fvgqnt#LM3H9p)}tM-(ru6;Z4a&jZh`uyw<2boS0?@6N~B{P->S
zUYREz_$f{z!}%}%TedxY4`8_Wrq6ToIWH%f?wVpZ>XD~ubu5d*w(aD9d>cMR3OjaS
zWi!y3Mo5V_JWAp54bZoa@f&ZZGIW5%;2_q*d8p1TGMi@L!yhNPdL=?B5_1MXfF2)b
z{DFsYpWGS5IhnTigun$g1a91f5J;us9Uw6=G1I`%+{{}@L}w7-+=Ky)>Q0k}jEx@8
z2cmZ>NpkT*(9n6#nL%JVDX8q;69hmHC#<BC2z%xiQ-crE?6<IwnHr<P%7E{IDF!mJ
zUx7`Z2AKfyBI>^8@veo*{NxwEV9U;(yz-iBc+TqfcVGSLf9~h$`<~(I*FKNsCoMYi
z1rrkjO)baDpZZidMB~YcGqOl2*|lpI#u%0^T}n24=sR2NHXZpLDGt|v@JulgWO{b?
z-$S8XDD%y$zJnPsxJ8ds-uN8OzUEBqHez?h96bV8kpd+I<3$(G*9>%~$YhdqWD=ab
zc78Bg3D`fB$FU~HTcyN7-5s@0pdc{DM@kul_Y{h9X&kRoAd%_}Cin*CELaOt;<*(%
z`W6Esj0fT{XTeFK%S*rI@G6ABbBmP5_o1u=mC`uATVl)Ozhm2zcT@wMTgoESIl#h|
z=dfeb12~Bch1^bh29}_wyVxLTvamcm$D)}+8^c*^2l?9jFXf-U{s23M##z*#;RA0v
z4-7oDeUz77x(Z)wv@vAUHm;{Bl|5uAmunsDU{Z$}wEbCyw8A)aF75QWSOL-oarc`&
zR23k#UK-*DbSy9i(n*UqTzv|^x%U~~`I);}IXA@%&t1$bU$B<VJ4ac*pd$=+4j@R_
z7G=-F^-Q?-5(Gv+dZKWRuMZO~Z3S+^qxc|NYex5Nz;?nN2nMoUgLL*TVAs}1NM`1s
zWDw6wN=5Hr5YMnu96yNGM&NrD3c3BvU2!gG&Csq5RLTWL_iYViBIPhJcPUaT7Ogsu
zFaO;+Jm<`-+41!4oPXtC2QEMLQSiwg1#pQpQOznP1S0ETM(iMXdG)^VPbndxV6=$#
zJwzgbN~KWQESc4-FclXBc!dIbEH~jYTuR8;hoSta$C9G@eTx+!F)=YAHI_jFOtdHy
zo%KDC5|K`$$MWEGA*>)?hN&44gMjJO9S5Nh6q@aby#{CTLbUJW@7hOc(-urehOIBY
zGIU#&M6Ftn|KujRpLapz!5~;1+Byb(Nl4kSD-Uy1Q1YSZAqLXbjO(YV(;GMiBFtM?
zSH}&<#+o(w<hV8EFqpQZu{VCO+LbKA>rCUlvtPJ001C>(;H1IngTh8gFNZbnfE~XL
z6eCVDFdXu>1VQGJSA`Of001BWNkl<Zf#O8FKmkNLNX(RizfCzvU#4pEnzA{g4%}|p
zr@r18eN?O-bEP7?M~3lS_mJ)z8*$nBL=Ly?ANfLQgFY}yd3+4|`|D24)&*{dNa$)I
zJRXNgDcQSsFJoh43=R%XTLE&ko|q7V;o)JVl%&(?rtW7{s8sXy(HrNDF^r6iptYvE
zyZeX)B|fGYDBWI0qw(=6zN{E4myad}0>{&n-z*cF%Ns)w@Y(h8e9gka47R1%vS*C^
zc!_OgkAL{$os?Xk-i%FG%BJk=AW0LZWHhdCP$GD@T;E`+Kq;Y35b%0pJf5)L3r0X)
zRrYCPf_pFsA<)_*neM_)q#4=&41M!g<GU69aNiH;o4Xn(k!I(H8>v(ZNGT~5$9VGL
z8(FgELWEQZ4Z3EWjppuJYqmUoE0t1#o;gcMb@YJ{T=2i%!OBxF$7r8&X&j?{wDCFV
z%op>uk6+I2tq=0t7yk{?PK5q38)cv{*jP<nCln>EMz`7VLYUAs*kP9^veIqd?SSCi
z!%yzTR+6uN@N$MmOMK>g5AoUWKFGUYcMhkn?B~G^`#5FA01|j+caBWTqQ5J}zTrZY
z&BV@+(4AXt{;lgd_4-Ma?(CEx4g+a{bO5)l$9%%oe<m<Ba$L>)Fy<H_WD}QH@$;Ao
z452%u7UzY4(R_uw9@)j^=Pu>iOICBoBRhEgKm3yCtzXE=%lrA<kDuTbm#rn6awwKP
z_8iEwdO;74t?)v{M>UV|(-e)%(;DOJKG$X<0;&k*p+NEo<dG;76V=h6(Fm#NUvwV1
zk(~@KJ_Q8q-1;E-v3>N-UCsVy?xj>5rCSKxN|~)2Zzt0^hu%5M!hCAij>4UG0*3Z%
z;-TMth0`y214u#t{IzuTFX8OV-h{Foyh;h*En_ew(w&_C!vD=p-~9lqPQRA!IZH6c
ztCu~D2^9y84P}D-8`kC8eoH)q_FU}FEGD7Q>_<ew<B8fWT$ie8_G*!oLJNVvb05y>
zYq3%pywNfIVi9lO5Ig_<E9AfPl^|m6@^dh`66u#-T(<;e3Khszpg#>`<si?j1G@__
zCxvjDt1}{5VWLD6jSwq9Vq%`<M3ahWBI;|6@B8?khwpm$u8a0u{P6-_p-A@3({Nv3
zz?nCQH8_CQ1`i*D#24WE8exL3jxPi*ft_D^&02at_OJNkd3v6AA-T<4C_k`?{lC4H
z1#fsQbHDU83Xg9fbIM8JdhB@fNAU$1A@Q}3erlL`U-=-pNB=<SmV4=c|JxY;){k)3
zpG0cWLiS$wc|;~b5Q9?VRkisI3-~R*qN2`%B)r;y_6*T-vo;+07!mEDg9UpnY8DOJ
zGM&~;w*1meIn6Ny0W!Qs>jDkZpZfk|h%umjFg6T53f}YK<X6FtRWNTo^e=@iH$!0v
zoDNv|GRR*D%bo{z20Rxs&w-U!z|+45+lB=fz|%j5`OgCsJoPmY3BaEarY0c(bl71e
zjcU1;<@XHQ(+JZ_c_8qmdsO<NvTbbZ&@H!>Oki2ok?$r#K*hy;;+t%L=pj;{d@qSq
z0<=bUcA+iRLJB_i2*8L2RpfCzPcz1l&1T8x^M|!)-<}8C!^g5L?!W(jEX$&^vy)UR
zRd?Rgn#T_uAYly4dV7yTiGpvA@B3`ovW2m+G0s2#{9~p7IdZCPL56HNb<|bs_n4!B
zL2D0z!PkK-S3AZS14<ZN*W={X^SJuL9NHMpJ#7h3ZQjkuXr2vE@8;?kuIFtpU&r>{
zqpV#qheFBcD?fS=j1JQFJkklt3(i}?mYpMP+IE0zE`1JnJ+Os={to62WclGu8<Ew?
zf4#6`v<_rC-$QE`t-auHuZZWB8JM$_zIkhS_>OO}YW)?AkL=;M-}zf!`j`Jr@8B}h
zJ!^R6j&IX9xPo0<A7;}ZZesZ<m*RUax&m5z_`Zh?Y=7!L5}7VWhqm*ZZ@rz%Uj1o$
z2Uj5ktT^Qb-1nO=VT?ft#gmWzlKD$cW7YaANM?KK>RZN+4Zq;BS9}<)ea1)juw&DG
zEI;{T4(!=Xu`tZs#p~F)`9ZQ>bC|R6IT)?czBkd})c1db!N;HU`_U$lZ8cgS{`hMQ
zc)pLX10|UhqP}jS=4{*U9H0Bi6MW=NXOXZK#ga>JXNrWaShupD>;CH@RxaqmvK2r2
z-P2rn>O6WnlYyen`~TT{?|8eaGXMX*)-I>q-dlP?3Q2&_OQZ-!Kxu+>kuZt~Ix32a
z0y@JW&QDRn5eFMzgh3od1u3E+gg^+=2`vebp4^+<UQXG2ueE-E?0wEX=a%FqA%r+Q
zuUB%<*=Oz5_S(;S)>A%@Aqc|g5N!mmWAWz3KAwA{jU)DLrlKTPB<q`~-PId=Sl8Ca
z+-Y^RbqvtalV<L;T3%kcg_gQfCN!0YkYqv=3^9BQAxDff-Lt5`BVGWJ(l2EAkz#1K
z9BTGVGFoGlLa68+HkyBi$jv>P-k<+nPC`JS41v~Yqw`UDL-~B*D}M9v3i?tW(<fBX
z)|mn~iPD<-sw8`lFXO%^-r}HHjjU_y<Dq}7=d0(<Bc1V4fr;8M(Vh$?371#j?%<8p
zoix{$VB1#l^A>`1HsIB_+Hq`)y(U!h^ee5*oLo&h6R>eh$oor^E|_rd&83S{Dj+mQ
z2$TviK^CO~l=eeeLMe0*VA&3H4n2#Pp1cz$R)PvVUVQuxrq4T`L`fwx5Bnr<Ja<3Y
z%mDq}Z7luE4J<hC2WYJ@9-(X#$po9%zlQC^$)@{wVezfhji1A$nI8ZIQ|BJXAAWcW
zr4<d7l-06%{j2nKZ{+Y3zl0krp|)uTPe1fyPPpJl#1j>G*%Yf@TSQg;WD+HnY<Tlo
znkMf@Ce=$%$2z9WJ~}^5#^i-D0t3qEK?j0d87~M84Mi7$jtr?yZPd@1P34y^#P|Uf
zbLV3GfWVl@=~)oyQ0iAAL~ijhI6;72TTRu+Pa!pJ3T1~Mgs~)jD_5d>`tSw@sJrMq
zoXHchYpW@pI)&EX{4SJTg(Lui&J3lWI)UWO>1_GMozz@(21;xC|M(Eq-~2p%5B`nR
z)2|@nZt<Gc8kI9;jgTYkeFMn*^gjJxra_(<ip2)(`Z{DqIVzP#_0^%X8FV^B-}5ig
z_TXQsJ?aBg9eFtEbsOk=^>wn{J@_QZZfwQt?ZZo02xG{!w&AyT;saVKvK^ho$B!f4
zSWo|&wQRcMF8qW==U*Pj)KpP>-~l9C#*t}nr~T<?@zG>EyRkR5;+0F(mQIq5O$@AE
zM-V7VCQYJUgH>BgthN?E8$dE-fD%oBK^aiS<drd6<&**jtu;C__CU@XF1$`|PI8CD
zoVOyoN!T@Eg6=^>%vkOBhA``)7;<e}w`mBXEa5|=N9_0ZVM!CNV{L0XUxt6&0dtNQ
z%sftz=`%e08$<6~;H9BvlAw8pAlnQ*YryLQ<!KU0sA&|eDKWS)L2Wa<B@N}Z0%3vj
zOz2c+1f%BO4BchG1Z<B1L@NWHMvQtW6=><fsKFJIC1wrFRXb7$D!bw6<P0hjC1Yy8
zs3pfiTH6m&BykYC5J({wvq?^C=GFhW9;>ehGFjAuW0`u!>BOt53JSVN!Fxc75x#$F
zhylcvhCuF_3Swgn@pwG{d1pwm>y04B7^<tQ$!4>|B87wycuMif7oKN+Hp>mipRkL2
zGe}ZO%FD}1rBZ|ac^6lNKwukzkQR<5QO4j6GkkAPeUKy(8b5g77xSGUff$=%%zz4f
zj8QbzmeJf$8Ljo4B3{tZn?edr+vaZen%Y1`S%S42JNd(3UZ%EGu;j(Hfac)+r!sL|
z4c)ydUVUQ~!uWLer*Y#kg22PI9U2=d=<FRpplE2Q#Bl|Qgv+?5D$?03F<V5@#7?mk
zp?o9`u~;cuD+1+VjH0Y^92kS`#5wYmuk+;nKj8jfT|~UJmXk03Ro<iWq<^`Y`)|Dz
z-|M4(!aR;W<KJ*&r5Fq~<EEpP!e~X^#C>_{f$xD3)Q#Vl)Bp9))HP2>n*d=1a}GTN
zAtevo`Y+gal39nH&$wv^69ieLWijW_v+16CD2|&zDUb9(CyV~~GRA%5A9S~`X2aXh
zQrA3#Wl!D3w7CnYZJH4pb4-9S8m$8igfgaG5Fvt!WX)amc8mcAMw`&NM`_GXZjT0o
z7btwANoNAySkqZ-j4)798e{&v7Sf*LYqva$uN4dCHFDO18F)%_;NA^fan3$`;lH23
z6^7I1PvGPur-shmLQqp4$5T2~WC*CPh;zX7I<CC=8GiEBBZe8$wSh^^<!ovn;J1HU
z!L;#JEMMEjv#)PvuL+gh^TcXSJ8}w5b)}K_unH5Y=cX+Q+3rqK9UYW6)bHE`y(cI#
zbQm3a2<7Ba9m%Lh=dbOF^2obIp@WqVDpa3IqcP(BU2eCM7;?US-ypI}C{PA3&=@ou
z+xuy49~f3<Qd2o!`@{j<{+H$4^5^9=RK)nv=MSQ$GJ!zwuV?SiZ~nHN%Wr;)mZ~J5
zIdyNQj;o}*KTB;z0&T**#28T8aM(UATy*WjR8_>;Z?C#x(b<k|F|NLpXWnR~wjx1G
zeF?w3?+w0j;XHc#eI9sf4O7Qg5p%83!50<ja|~LC$Oa;U&n&B+2<XuGp%soD!?InB
z(M;d(crcp3|Mm(HHU}Q}Ii}1x25H$Gy5LflJ@vo*<+jgKSvQTNPx}_NjeDV#N7eX!
z07DRj@Da<OxtrIP-a$##c=kE`T*gg17$;Uj5M-$zw>KaC?43OI!1wXe-84)+gaeQM
z7nJgm(xR?qIxUm-Wy<WM@Utn5(LD3Fn>qMHmm(Xd^4Rab#K*t%AYEHl@y0X1XX1>*
z^Ueo_xk7vlo-xIcp1vYBYck0xlR_xbh=i16R<5S~&kx}?)Kf8UKm4w4`j)T2abx%x
zvh5w%o`-J>bj-o)=pwVR6+h#lq{Qp#MJPk{hmJvO&HB6lME`TIfu~sWi`!^8^`q1r
zb0mT1(f!)%^siV=V(-1kY-+{t@5c)iqPmJ$Lj!@**j3d?$HpJXQZjKOT}V8ohUrWM
z2A@!`7eofA9B0r<?Op?jF@_)ritvwfiHHGw-!J+tmvD1ylv8FFmdj?d*tWgRdvoom
zf~2p7{TVCu_xID&(}U}})YR1MkVl{AdGz%3P+3_yB2>ekN;!pdp#oy8&-Q)VRTMIa
zTrgni<cT0GOb}YZQ3jLE(EIi(dX_#%_Y+T}g8+YHJF2@6Vm4x83Cliv1p<L6k6}E;
z@~gfJB8)#k2>M%FS^KHaW6#|e-PQ$`g-E&>rP%ymce1(sZe&>!y`?uiy-coT!>#`t
zdXSbnn9_+X`^=XSg!qxmKK&)|HN9`HMraKcaiTapgfRqw!jKd7HJU&LsKCne9<6o$
zuTl!70)ik2Lm~mRL3tYGgp5@gjoBR#quLPopd#2Stu=Z~Y-MN!$Qb>6jR8L!Ce0|i
zo{Io^kCG8+6Fx_+HCV9eVc7H-IOT?*Pk;g$hjbe({ZFvVK?k6E!>A5vhrd1eEP40z
zwU39+a!YRtn-Ph@^P)zDuKhzv07gZ4T5`-@b~*rjGrAJg5I*}+WqiR(I=Gp)D92((
zzv2*Q;*J?Wq%}f#`55lQ+NDa|xUg_+8|U>^Sj(P9py=LbHaaL?oBjpYv!Oh|zV1s@
zHjb{uu%dT*dmyy7cYL8KGUP`hT!uW}AEReW4<}V<;_u-8_inMLSS+?v_ve<d$Ikb8
zThAjGvEpw9&TW<Buy=p|yJqChwNg@262kvr!aZh;W&<H4QtX`e(i570l=l-XZ{;_u
zHgQlQ!Kr)CCgHl<zIhaKambBgtYdkfMka-#SM-p`B&now8>J0_@(BW;>66AY|KRCG
z`4WT>JiBZyU|7GYgWLc70;LIuOjaQ+fh7f=@AKEER`BRQ-o&*n(q4FQv@OA(|Ncr;
z4`jUzH{JGV{`+@+@&Hl@UVLK%4?X@0wg9a~sBDm8P$?xq`Dt3F9n4;r`~sr`0zZS}
zCOGGc`!HJJc>~0f<sAF*e?zOQLue}jDpFXKRW)(;m;MwQGNcU{Ja2%8@v}MSiu*BI
zQ&m5Svo628Xl`;&E-F$+SdPQK^Uq=K;b#IMrG+sXr96~^8T+3IfHsQ2&rne_jtl?o
zRgBS0o_#cvXCDnlbHXQX0Auj|(1@vnAT)5OP?6*NKG-4;JE4R^YmFcH?>Ms-Mq{ZI
z*{lM}MlEMFC|{$L274#pQ3HOUP-r@O(mb?eHGaW4F9<X<CRI~kHHDAPpUeq|PeLn$
zZA&zUtQR1F!}n?C!MV*St#K@g2C}~51N*gb<h*fs{-9yjmVzrUJdi6dJdju4>fnDL
zUQQxz=lhfO6{j3EmHnpI^Piuaj{!b-(0GnLZ~_L+xBlf|i~%o_86=uO6~Kg4K<ASW
zv+;!&XgdDGJ2yYOm2x;DD*T6v3?3?aCw0MZ5{($$x8{8c`9q)(Ry0dSfo0w|8tpDJ
zI`Vk)$>SG~8a@9$?F<FV5CjTsByX?p=HaK;Vp}qQPS(b7@&~4}-;8>`alwIPJ%w#a
zq!b}<1**zpeEy7m`Si(ia4d<GArDT*9lmqvVQ6FUBc2>6m_DJBMgM&|8Bg)hQ>%FO
z?GEfHADPjHvJ#imKRBHWPMjUe8rtyRUpp#fZS^o`YE2QJ*Jd!}QL7*?>pSrzCtdt=
zFoqz=U^xzl9Df-GgYTyiQZn<PlbOE%M*)MdER5C!L6&%F6$dZ)EC+n>U$86(L==fD
zB_IFdAHXP51D$;7#y5*6G;+>W1$vg2X$Nw~zutvb0hZ;25u=ejQdu{NGrx2<Ml1Xv
z1HkE@yPX_H*eAdB3>d@2nMW{b<`H>$DQqf4{xBjuhx$Vz*Gy-M@*@tUVZm`|FBl9O
zv@F^m_!}Fqy@j`yUx!s!j_OZiy3>gAI0(twn|=jCAS?kbrELE7AJ}Z}#%vrQ@ezqm
zr&)W~@9^GShw%gG_Yh@qNLchOdY-<&KZ9(jz+^m3cN$7#cuQZPXUTIQBw}JYZFl^M
zHuFaSR(<y;7%4z$Ub*;k2oxgW4l{fTqwx*VGYrD$7Zo}*d%nMW4Is}w_Z$yB^ibYF
z5Cj2-9d;PU9d{h5REnE!x+(vFlu}HaHjUFyKYdh#KyPm^_uO+20H658CkTRorAwD`
z>#eu4|Ni@P)>&s!8)1fr;Z?)8tnTh^e)hAUp(8Kxi4!L>d-iN*&z?OdLqun1Ckq!Y
z<lcMl<-6bgE|*?<DP?7&3!$UY%P+soK?fbgvSrJdGiMGR9UVw1sVekt9RqcCb|Qq>
z?)!EXJ@UvSJo)64MRRFc7LzAW=A$3|C?zG^ucUjQc8*NMAaYS4?Teqn@9M_y?!oWq
zB<Sly4Wz)f(XN9E0)k8y?OFs$H}oKqA{0UjQKX{yNFOjlpryn^g!w~CVv*TBIviKV
z!~1(vVSbNXJ-LH(?p=*S>kx!V((TBlTIM$4$kS14RAdCv049t8Q7S+wlQ(?ijBz;w
zh}MR{_c0>6PlNUgAh5K?yn|d*N{MY*!#$FRc<>o52)scEETs*`8EXVcp?c9UrocaA
zAjpPzP=$3F1NM7axD34-0v+Tdc}M^TyaB)h>?n_y1YGdb(XGiyIRVAEJh{(>*P@q2
zC{S7<EQuK5IfhyWFXv)1YOe(r5``239YhfzTRDa*nZnwNJ;fCc<LwfXSR_UEJqrsp
zS~ByYB!wK&k)XtJa76>cY^2{B{(Iv0s}P4OwA%Wb-H@>nIzvlbu`Qjx81`W-6fT_5
zC>!3IHE?@(jKwSZICe@cad+n(9db*wUki*<j0WT+5y%nNgv?bk*yYjD-FBpZL)!GC
z0Ojq8H^_6wQEZMQU6Tk50@rm%H*yI9B}uHl{_W_^&XF`=5IBy5FAPiKJ-D4IK3q{T
zCTE|{?#*ZoRn=8__Po8%MhR%Q3_bnr{J*x%7}w?GnX`6^XS`d6j}V68J<8kO0kVap
z*XrkqlD|{)TqDg5&D%71Y2{-J#eI*x#_jjNR1~48wP9RMk}E&EAWSN)0<u|+M1;9B
zqCLeCV3a1S{O~t1s8ccpJ`_38%s{yQBGD|D3&Id8MX+p&<QWwZ_*nuq)EGI6adJqa
z&!VkQ?Rd1(ux5s|KlJ;U>rpC-011_HzVAaaCyVAHKs3I$WA$&1EE$lbjkh;JU0uV-
zg>nijUxyJOJK3f&2H!V$MpIjv<ZGWico-wn+Ti($bU|LQz#F<R5B5YUJ>vC|%Fmiy
z!*!R=A66;|G@d`2pj2F1Vo=H;3oc8`dC!VYW2T%FPA(cbZv=@9AVt?@<OsKmyR-)z
z#dXe`IEs?Aj7E!hCnw+gGy>(;t`5Pr^Txa}zPBQjhX1MMh<A#DAmsBJ!y&VqIbxr2
z!@wo{K$FT0?#W&-l$)4Qelw%&wvmCtmVE4k(>dd~p=a#S*qtfLH#z#-sL?0`Akaa8
zkh<Wv?b!c^$u1!Ux_D13tYdrSiJ^7v3>gu{*_#j*WvBs!x)L^B^K;f;edVxwrJRg4
zr7q-seW_5mtBMpRl0gU1Lj50L><INh2m;%}>rNp`Tqw^SvJI3(hKv48cwc!uAGH$Z
z_bKd&D0ZfCWGHOfa+vW0a*oj9!N~VKi~uj%eSFW`y#|oSAAg*yuDXh=uf7@x4Inx1
zyp=0ga^;m*@|CZAg=8{`@B8?^KdNV*F@_~emT<uZ7jV>3NAZbIe1f;$dW$2EJd#Hq
zd4xwEd4zlKy_bax7qWQqVoFO(*?<52ht-qGWcd2mzs}db_BEQCnz;M!yE*^-^ZCh7
ze!``fUb@XOJ@Ld7+<Wi6y!hgaj2}OKhZ2M4XyU|)`Om-m-R}s3fX{vIbK6zNz4zYB
zmMvSj;)*MFxF7E%B@zjmo12j`+;F6nTy)VzTzTb{oOt4iySsb;=crJx8yZ3Y*G42_
z;3dE-37L^?hmcq%f|e2@%=eKO8g#^{a%L3r+BCxi);|1qQtLOME1C$bA?0$EXLJQx
z=DfB`7nCu@pYvtR;B8uwu`G;;unvhfKBWm1`ADzf5g>}dgpg52gyjO?prZBzqd~rN
z-ahL#ZDK=fD^rH>3L|_pSu7I^2WP+wK*@qglrh047(%6jEciYaD2<T}AVeNEdi%9(
z?-EHV2!ulE05eh|;}Mo!(Qgd-eOsS3=n_Nkn``OvyQv&Mp*Xid=w=xj=(b`2+2|~#
zKj@|>I}TeV@PjOZ(KYcJiKWD-hjR-_3=0=)r_xLttw)~6ZQVD96l(@>yEoI@+e46;
zvsDL!&fYEb_V!X*R*F-AC+`5ZF-Rgsw-i`8BPo!y61YxwSbakeJ6CTmrL4Pyug|b3
zFG=RFhlUr6!UeAdydBTb+O>(fZXGda&{KZ+^9V2Gyy2BJ=mJ}ArDJGu2V)mRq|hB&
zO<^SLR!Klc!vj4fbZuJ87pki%i4A|>yKRNeWJu~#a<i@tY*rS=52!H)E#a+Xmex&c
z`CxSwuFR!zH>`}Mp*Kw|n<<{lA$^Zo7K*{7;sdE(q-9fP$^i&a=ur<oBa<!Qf7;{J
zdR?55C?%P+cp=e8wyT3<D$9pO(kKC293Q_gwUu{2`oXf_Rbyd~9bkL8s95N-G=8=h
zHHHY3bheKN7vG9&>X|U^vtZl2ST;axRVN2mR^UWQ3WX3@QldvOG)kZxhkhxcstR{2
z5i(+o1_wV3JV&D`W$aEZL+jsKOTN}@=<ml;D(~3z&qRUIEZWjaZOo^tq6S;qTa9uO
zVF+7@TWtbjwxF@1n5P;8meeeLZaJM@16V?DHL2-4LE0GP5XerWRpgBF&KWP>OJsBy
z(O`5K0TTFORB;5h8p5&a&@l3jM@mWuVzmg-QFM7|JH~(t!l(yr?cfS98X*M=3olRv
zC{U2~hd<X{$yEcTNPEMtZ%>;NpG;s=YKi<(75Nj=9!xp_6M4nwpssZ8Z)EP!IRl8u
z8$fbSSQ3fE9?@uFFo?(?8#?hgScH74FpY5Tq?ucT2J-hwY%H7tr>_FfBVpK;P^vpb
z#-LS@&jF&1!r1Refb2HP89eevzFag@F%|Q_!OVSSR~=E5B@V&e-Q6L$2M+{ycXxMp
zcMlFhg1b8;xI@qvEVw&N(KEefR(Jn^`S2DCc&n;z)vbH>*=OIgr;m51laA!rc!d%)
ztD|N&?+X>8))m8Sd=x2ysK~0Idf-{5Axb3)Cu@CCUJNbPIiV!Kqe_1QI^FZfythRp
zlBU+gQC)^nMiwbXTRKAr?|5I%$S}=4YeEw{iz5<$(RYE6SBvdg)~K6`OR_o!%KAp}
zcZJco;31<=g?vN7EfW0)gmJ0{wc5p(VNi6JA*gqpGllhQD8U;xV0gH=hJZKU$HK7E
z61T*6e%oTbNUy+mL|NN;)^Snz1y;->weIEa&V7piTp+NE+zD7<a7}pjGff=zGIZ?Q
z!xH>XObFc;lo+*}EUWzZUvK8KK06TO82GZ?Z+MVc<a<%W5_^)ulK8Ts;>!Yy6R`d1
zcfMebCZi<aBMw-tf^l+lqbH1t+geS7CO>bLR8_@v->t<wJ)uSy7Z-<+!dY5cjxDcl
zZcep1fxsLIBS_(Zwj4Xa!dO&Z9^o>@8!<64VYC@!XJ=QfSs(oGpSAg9b{J|f(75vg
zFyXr1jbU-#3ByqaTnYClbLg0u%qFwB0EcqG=0r*Ma?R%F?AF#afC;~T`SPVlfBISz
zun+9+?*5(4#Xus{;B{?oyDkaW|A7p-?eHfjClL`5F8~KafOZ&9rBex~Yep7+aYP0n
zEUCcBQ_u7l9KE3=DdKkVB=+<9sg$Vd0qOy$1!{p@tKu2$u*?xYkuOnH%~wi65m${+
z`tZdq9PDRv|EUEi1U8y9+~s^0%Zi~*C~P>Y$=H%Xmn7r1GrTQ1l8+8j<}I@wkcjIy
zJWCkpB59xQxw<}C$YiA%GCYgh{R`6J<F0?m@^Q-J8TaJnC?k}Xn}0y#!}97tQ6lbc
zGKTK;dN5+<Kcb|LC+Tt!e?iK(eCKuHDCelU3z0S{geMni;x?*{v{Rk}OFnLb8QHe7
z2TO|!oftT#A3)jcrc7EHATEowsg|9?IVZI-tK)D#H14jVY!E^H?uas$to;Q4Q}6-a
z$M4^~>e5nhsu9FjdC!LhUPx&Lva`a=LBFlkIzE2_hkHRet=k&T8iClo&B>u52}yQ!
zJQ!5#0Q82`jvr7r9&z8OIoB$KLk{B;F9m;d7tu(P#}!P2v!b^TkWx`Tt*tyvAgIO{
zAW}h3L3n+!^t9Bxnj`T%ZBEm5?{AIDSWX1Y!=U47lr?Q-%jp!$@iL@QDO|Dfl=>;D
z=31_2X|%}jF5HCV2?jz=DHJyb_gf}&y?1#R7CFORAY6H~mpSr&yV}TThD0rM@Tumv
z#fOAVwVr&rrxV|#;AfG;cKtbhFXKbZ)y}rcoz87K=LZL+z(`zMRUwEs{<Zy14;+t9
z^Cw~c`GoTkZS>8uY~xL>_V6rK>5vJVt`RP3a)BH5k^b_?bdGh>T-j*wGRzw9oTYPe
zhG-r8ev+(%(a#>%YMy11G=&hey}f3i<?6Cwrz~Z?uWDAj!tKW~3#q^Vo~Q_kf0cpO
zZ<2j^`JGIHGZH#d4dHCcV^whM!rRV2$(DjCW7t@{ntQrCh8FAA!`hW}W2M)b-Q9?-
zeT?Q32nV(WTZ7iHLNODa%LQMYC^0mTkajoM()!Er!LcDHdU7N79BJ8#jvG?b#6)Z0
z`vzwB=?1E(raK?R&)yWs*m0Lf5xO<Lj!!R)T+QuiYTuA}nJL6HWw%Dt&}}07fzCLA
z=f$l_BtwSMY$B1Eb`|C)v!nXkJCY2OU>o@f4BzK)OHkX=(-N?)zm~Q=?PaW1l}DUo
z^B}x7CbBzfk4p9wEC0@|iyIjEri5Kw%012?Ykc&(Vy06c`>o+Ytptw3(p?K59t<*w
zji&)VmheVtwve!Lksk%Td0i%i8{AFJDRx3~7R_J3p72c#ZDABSHULViA=5ynPFGSH
zZY;8OWk@`04&^#9lPIVUU1X`)ON`e#MqF`(Y^P+#$Ix0SNOb;}&MOCx$vN%|A}vzD
zct$;A2?+5s7>XA$xpUu2dLfU7J!goIeE^=ef76}Ia#-r{u_!F`g$OjSNE1Afi)+6J
z<r+G!Zj*HJI6LY3Dq&Z&jVx-q2rUlNK!r0CN9;7QEc>ch@1H*EO-hs7#H$v=N+|L&
znF@SeG$}msXaz<_)ac8sL1}85S(Dx``~(9}u7RA~nlN9+Nbnn<6B)C*Zebsoleu9^
zAwtz>+qLZ0*~|ziohx8=GSG^PB6vv<z0L+Fu&PJVimM}PyJ!E!EU759*VLR3Y>jH$
zGu$HcLB}a4M92E@B{#T?bd#Be4t!5qnQdq6>fyU1tf?HWj1GwTH&;fx>r~>R5lGfY
z9xn1dO!4rMnPNU4np~Bhpg1Y?n{*~>o>&q%BsSY$pT0UA+JN7)0ksrXKjN>&eq?KZ
zCfMs*z2>%$LDZS^NvW>cn^~hL_UXyHN?yt5Nv&78spzUMDrN2u9l>AAQ2jPLA5B-a
z)lAI`-W`))c%M&$>h`Jbuex(a&$TI%Yh8PLY+0EsdOhP&Z2xl7Nizqk<_K;_M@RIP
zE9TpN2Xk6lS~0t@2_nEAn6i&|xE)KE&o?zS{S|EIwp~G`mecCZ2Vw%N|BEMpiKR}b
z2f`2@*Oq%E4Gc)+GMJ&i7ZwJkr(?f8j0!hc&XNng?4Vo^v-l^GydFvsdi_n(Au6z2
ze+&R}0fB>jALb7#;v=Zl)YNctaa{uDkho+}fV-A~=ksZ+m4m}Ux{f{gcD7CJj!MB3
zCTKLFys|QC5Sc%=ql53P`(FR;rd-%*g09^07gOs+O2JDg5&<`uBM>sLU(W*yMu~k8
z>8Jp&N88P=EFd39h#>LL1c(j$o=0<`+e*dRg@x4fV~2oeohhMb+mFv#$1yXQ`cM7I
zfY;@K%jMp;v=9H>b%)+O3<aJb2VW4u%aLLL#5NG{_8NXMn*c$-TBkK0@Sb#Dw`;v@
z?R|{{#q-DC9xnq1UY<e^{V3=$!=4xYuwI9S5t?;|;2#E*?Z8(u+0Ed6PMdK6)&`a2
zp$}HG%aiScR$0<<*y}j11;rS=kdK=NNu8on4FIlTgL@eUQ*5Jzjf)4DLZ|fTv{pNE
zu+z?$xs-VPq{r|A%j5*Dmuhs;j6GxM+h{z*6L%MS1*AI(Oz1|b(l2_VTBGK}Y|qXs
zQU@IC2IF$AEkI}Yr#Qu>dDF&1WgZJKI?g%>TIe7p)2}r8-$t|F(iWsHIO$I32X1_o
zZ}Q)e?cdg^IMc;TJ6SArgb8QCieQxyUEOxqkjI1Jz^R}%ppzVU;ic!X826E?Vwmni
zst;*RPQfT&_l$&Nrx%7!b}7{<64~H!afez&<`?aXTg{s0z;C)gHIQb5LrNM|M{lIS
z^#rOPwR&|v+ufl(vy-t^5M>M2#H+BJ9Ef@#Vf|d%e&{@7J!=;dgtguIgF(&u*6gkS
zFwB=q(m__nZTr3TCy5r5bZXdgEb`SzWUn*X&kF*|nT9)#8@1Pb)$b20g||a)g1w%*
z)ZR52aN!Rl1c**5Q&7in%tC%bw`s^XNCDV*0!u&qCqlo<5FdLzk58KdQ-A%n%P);O
z=Sp%9$-u6r@1O3dg$iV3VPRXJjs=scxUTbIz$?(D7j$t{=3<B?8)4LC#UjU@QW_vf
zll&EoN7?bBaXMJJ>um>x!(~_EDMV;)V~|2h<9*%DmR_cA+UsCOz>g0S+;ajki&{Kj
zX5^_QZE#c33M!qz<X19{%p%turwev{hj#X|s-x+1j_=OfdQ`hs{L@HUs=8{D8aO>D
zNvjtvK+?|DF`O8B!~al_=R4*W(3I%R*P6~j*ZEUgJ#Gz(fQZGC<NCjAX}<EyOFRoD
zK_mD}E0y|R6Dz>9Tb)y;p2`Z*Q5i(ik*y?N0jkxk@Z*X94oSz%(cntVijTP<dm`cm
z_WU=LWUB)(rXQGJk4Up4>I^6Zz4>>u1iSr=&}S-UCa!VjocaU+19tR(G>W?~%le>9
z`Sj!c-(*Xi`3mp;@`?flRZMaoJS+r!-+BrhY(e4yR5}LY_x>2;>gk47UmGX}cWe2m
zZ{36FKVh|cnI!kcrbf_^<H)GP-nicGJQKg#*|lZK<K9}X-|!*(9dWw4C*KLMZ)`}9
zQsE_pfWEyRZau0qPFcAeC>v-+al`1qb+xENK&4?}X)4b>jZr|w#ZlgE;8cm6;iZL^
z2wxojlPOFs!YSxOyo#zxW_!>=DTVFc6>~AV>^_Q}9-Qv|`n;Cr>+;h|T1!9?|F9py
zNRW^Ju$8ga_vp)1b>&{>>)l|pIB^GAp81@LqSH`^n}E}*S+NJgVHs>lKaHX8j6j)q
z`W$${dp}Qnwi6ak-Yr%thRMIdfW6|NU5|r2sh{W9c{m1!KMt-Rp%LIp{-U%Dulo6!
zRQOF(Py~9$2Ibp#yfFX!YU>){YEr@2{P=J=zcQD#8>V4ElXGgJD&O>{vylPijr!(F
zL4D~uNxXc%p@)bVVnlq~>!ytT@H?yUsxbH`@$XzDYrWyG7N4(v33o>u{1`J8fWSxu
zN9X%G0X6i3`0Do>@-5GVGEE`n>|`i<w6GzWG>v?5HcBNKF>g6GVcZ=Fw0^v+c&+(F
zJHhP-?y>8w-ukuFWE<(p57*-B)|o$8;`kgp^Z27%%Hv*M2J5<iyz^?Kl%P@&naOS!
z)^GEPo(RXWhsk!5Z~ELQ1zFzDcIqtJ5}^7~Wp*>!w>MiUSiR%rl<J@5UDdI+7Bt;8
zHI)C3V{)HfRHI#?Wa7+{X0sRiz*5t~-oq1xSw%Xk5a=i97#JuHk0};6;;L(zqp|Nh
zI*-=sf*QhM07^)v)vm0uCT(s61`th%gQOCNEm4X&sb>sVaZgQ9l~RAg6{mqw3CuJz
z(!G*H{i=HwlB8b^XRVrbhID_jj9pBq&o?85p}7;1)l)+t_ygNEtt)9|EhN?(KXXgl
ziEh$t^wR1CBNQ10Yx#(tzc|Py7*JU6jqWh=MxuBjh`vgJY7LpWc=(_js_?x#z)|y`
zefEznei+nZOi^nvw6H5#qVKj2<x>(_JwkuvVF+P=cIE`BMrVRBzohsaVM1;Q@}vVz
zHn(r@*=B?T)%5gAOfFUEo;vyHWGEepV1-kRzE{Px*<mD%!v4CwWuKXs^U~5fv|Zl<
zG=AL4^_oe=PAyd_L9XEvj4+ox{)#NGkgrCs6g-qw^4Y+gD*1FRJWN<#vGL;B;d6L?
zUffhd847lKTHM<j5q)*FOW3m^h-lh41adN-y6~P6l6=I?Lt1Wve@DjRBc|ieu5ISZ
zywW$$xi=C$A>i!r31bfKU|~TJ)_&rme@=IDeVanXK6`91X!LzWH@$X{aV=2%afO6V
zfJ{2?i%{QleaT=@SaM1V^8<V5hA?XRg0-|}RCJhr!dv+FQ~1wa`$yEGYLY4!?($SU
z#jNIMa*cqC91N$IRbAhu=2)B6j)*WSXepwFp{EvTM!!Y#hG%MZ;@=*MWP{j?(l)z>
z%)CzAlSouec6f%5PpXvEr!pq1$moT28Lm(n`OPeB%e(2dE?%__(U-?t1MJ4jqg1Cb
z@03)9HATqhH}kyD;wn};_xOVTfy-~)rj4d^M|m89T`yO^{`g)`HZ?cz;T4`g1IE$L
zw~HE$TmH{+(I&Z*<{w}Mr}qIGNbEtEqa@`$J^28O$?En`6#!RlKzT(A$cXa~>dhzd
zfgKnVWD$hy)wMJQzRAz$y#)jgT}i_+I0wc2WnXhf9Q>~duvzp!YAF&{R&)+smoT6I
zI1akUHCI>1uB~aqz`#hCD!-gq2!jCc(A}~edoi0ystnzMpZ>QDwcU5xJ+G$@E}LEl
zdq`v-0BhMGK(GKnN;1?by1tjg0xuVXxvq0Esi~=T(?ZXG*P}*3@|mn;4y1?w{uPy$
zhJBE<06lPaHZ>Tw2N;_se2Kxn1ZdRc5AgEAap(fbrR#A&$M@W4s~7-eIQpJJK=CIR
zu;`YRl?4h*Lj4)NFJ^aZHZ1<H#|9uT;()j70xNs_vWA9a0F=c(ULRZ^7AH7=Sew2*
z9)LQSBZ$3eS$bdC5wID*|M(HDqQDzQVBZ!rF(J#`{ukZ<{dvn}S<mD0G9e(hIG!&W
zfu$?4af&adQl&he=O+QT?RLJ;*$#jcSC348oX;aCCw@~3QBW<i;R-7MBoKDR74-8{
z7pbf-mbVma5ZMe&sTdDk&2M{+aFo(aR6+wHP(?`2k>=8P2?I-+@2)ET34B>I+%EB#
ze3rp0a5@<`=!beanK*{>yF?VjFEXu7v#Fg}s;Oa0%BADZQ-Ux2LmN;|<*6f2NpVf9
zts1j;eULmB1u|xfhUD{?Zq2zahlNd;xSZe<73KwV0mJF|r{-DT42I(({7HVnchhoZ
z|4YWtwwC-152>o2emUuICPj{h@7T1ourOj}wHmHr|AmRr1*=UQO2NbtWTt6`MX`0y
ztVBO6m!m63pQ-M;(E>%n=S$9&=A`cMPtvMU4%IFed_7s7L^ui3{BvDlc~-iMu)-J2
zjRO_~(dCh^l*oBs-Si-Fzc$E|6-oU4nj2|KZAhy)MQS%aq!?&7%zHW*r>d4el|Gda
z{z@`e3xd1LR-MW{QVNeS`dMI<T)7LufrcQXhK@cJf4PjOQa-D>C8D<1l6;OmKlJG~
zHJ`b%f*23NSwCv%-lR=|E9!ypM6>AdeoaLY?2|pCZUDwHG9|YIdz2^njkbU*vf3(L
zn+vrVH)Ieww-1c4Wd#i#6joBO$on>h>JVbRgwyNd^La#r-JiqlhkDt_PBT?X*aI2T
ztZ$!VCj4j;8Kp^C0^teG)wq%@{eo20(d3a)4uz}JQ+dtg`7B9N$4TuFpvJNqEi?at
zCT)ipSo+=d7{N!=`hqa%9c>YG;PyXrvf8}AMtwm;rLxZ@7M^5#-jWtjl6y(Jb%hRo
zO2?*U7_?d!I~RfH669=98!@J2N0u5cG&aVnrRu-8^1EU4F7N?Iiz&>}i7aqI<gm_(
zI#>!jHLiQmto-h3Or;!cnalmWz-fYVNotGg^zWB7olPu=^!nLjkk=x)`X_}t{?xJu
zwW5*k_qu$qcsDK^Ue}3UmGo*F;+T-sp<Wj>E{x--Nqfl?!92}WNXDg(EPtKd2T&qf
zWX^23^QhPw^KynPzTB@UUTz3WbT(^xf;y?h&s5){CaWi?>G4%8iDn+}M-SK9KI!6c
zd65$+2tl?A4}0j0%%UdD-*$Fh-sgW=eW8e?Le~{Gf<Ow|&2P3j(rU!lfGZ=<y#Iqx
z5rtuy#R~q*Dbm5doI)%3)5!ilrxLY{`clfxJTBs%0<U`MP`b|aJGh}1^Slo7s1=>m
z#6_ZhLX6IaX6nP56XU+THKhsx!7P~!xh1uzxE{iCxd~@G+mfcU<~Oe>&_#$0M&*wc
zzVlYb@!t-UV58rCzx>*a+v#aWLKk!XhxSe~xyVs(2WzGEG8zlC&c)}y604zSqSz$a
zx1?)k^w-oA*Au9Z%f6Ry2z-xCW3OTpjjAkasddlsS^b;NF648ktMA*LB%k_h=s|Q$
zK|sHl=7gNBB(vx@Edo;#Ovgdb<7Q9fzWqL5L4;^DWb{QVwiqQ#MA0F<_^ztFtU5Wm
zt*AqL#@Ily%b6W1o?NIhn2vn-;c^}RoQ~RIQk(DHlZDU=>MLkur$$@+px}Iv@6HlG
zCkH&pj;5l5K0IGedGhayhvTmi49%2Gb%RMW*cY%G&b@kB?SCVundoE7gglMtPD`+w
z?e@a#`J148D_wXYRRWW@ex^EC6HhisD%dl**P)9?&F<H>oK{rR7&5NKk*|P?Ab2}+
z&@xK^@BMV~hS?wDU3_#-6?IXt&I+T--x&eu39_JS@`FL?R4<jpAI>c<ZtyA9e~+t$
z(JQ(EBrIX)2YHlXV0JxM4yW)&%LSbn&>tS>`u@r`q$D-a<^*N4Ja>P?afm`FdOajV
zYBD5iLl;L!$S@?^$|NHrPhoa=u{T0VT~=S8Wl1EeWlN8Y9-Hu;5Cft6B<|EGN;y^)
z*NdztuPD6kT<!PX_DbA}0(xx34E7o{8P*CsogAA&Aeu#yM7lN_xgMt@ChV|I-Dxr_
z{IF##MW1x6kI=38L|yx89_6npr+Z!)qX-LnBTlSKXXA0A-v-%%@i_WLrd#3_j0`5*
z^}|i}e<FQGtWt4Knkx?sO5BEJPoPJYl+-5sQsk`Gn?4D7a;kB08IhTcbv@w|7<flr
zCHTdfTG3@`7?{j6{6yDrBsAmY-Nc=H_ssvwu=#zzC=7r=FN-#oI!~Nf<@xe5L87(q
zg}w7@8(+p(Ny?k5V{d>X>$QeoxlEDsLJaws59`=^-(nf(=|R=wUZx1kYI!}@IagB2
ztv0zMr5zN=krUIr{&unTwxL48VH#qZfxUg-`)NB>BJF>|DJz#9z8I4Cy{Y394M0A1
z<a!QBu4Ao;&q$5L$W_R!9cp50X-C^cGz4I%(ADrHG-y63aJleANkAuxM|~>&?rm;3
zK|fRPwGxF7@l2X~h41S@uU?ehmldI+9OZ0bl(DeQ%i$<X{|Vit?$x?X`#mb9nl#dk
zbsSguY*V^j{6IGrQC5a>WV-?~HH|0NWmlikir&H{q1A>$YRd|JXVuGiV0R&FQ67(^
zc67RcKckezuB4d%dK%1~C)PQkg3`WmYZHlcDZJq-to&@Uah|~P{Qld!YcaA6*t~J_
z;9X^T;aWdQ1r@Q}_uhBNzkj1x{RN7en-`l}TG-7dFadt4?{^l*=$}Bg@PHQ($qT^k
zO!}zY-`zQHxGl4Joc*cUtZFtN3`e@TzgJOF5%=);e30vcY?0@N4eWr`SZ%Vx5EK+l
z&By@pAZzG4L8qHJf#N=6dkNdFHAQ`tIAWMPA`zI|0zYufAFqQv0YHrM$i8ZuUR%pt
z+qMfoC2;-w#SZ`|C#-uf01~sbx8Hv}D6rV<@_IZ5h#+1uIHBj&>Hnx#12`;R_{|f{
zzVnnDA0Ph#Ai4VAU#8z30X%b%HjsaC`Eoto2vmXQjyQqxHN*A(Bq*G~e!E=wl@3Md
zf%M~pDjE0!l&tsT`Ogf28cweN)6qwL`on7lD3WIbaLT^xa>VG`bCPTMVvyu5@#DgO
zXR?I=3>#3ck)}lV1%T1PFpj}r;1J(|LXKj<(>$Ty&FrHuHOb?c6!48)_cbe^PLoU}
zr`h33*9#y+D{JfBqw)fvzQg62Kl-`%@oUdT7|X@tPye)!0{Ft>;^nDB072kKSqdmR
z1?D*PWC14xoarrXZA~Aq_T&qM{D!zbST>gtJd2r)4LzvNTm6#7Dh&?%KRmWR(+KCp
zQO>h!Zf<Ur=uy7pHv$r(H^BaYX2V8;r*$9e^~hMfS`0i!UlI)3CX+(jD8ckUmN2o1
zBDAqBV$&<R`((jBPN%ra(oO1~{xE>5UVL~GZi1m0lP~AX5a<iqH~;P8)RFpAa+CTI
z@hnRlbbKOBvUva6>k-nhFgspN9i?=y9_k#Wkd4oV>k!LnIvTQ-q`%+U<Uz))V!@}e
zxYRH9Ne&H`c8(?d*2<dG%U?d}Y$>ttEc{*8;wCW8>tt4zb>$|2S8c%(Jg4m-4Q5d`
zz4|m8>NE&tMjcu<Hd%FWz%GS}*oRw!$>ZLixm+8rSb!Ll(LXvJN?<=wRU`GKzHAQe
z50U&;#-#w%_R3vFB0WV?2C_&XNE?o~YXcl9osCB_>`hIbEjMc-6ryr(1XXz(mRQ?H
zmqv9rI~IrDQZ3O!VZdo@j8#|2TuYja_2`gyC?>_=;Cr?+tL7`;Z>n9{=;V-tO}kO%
z9^sqW&tt0CC;i^epoUl0%Pu0Kzw-1kEfNp<pQ5l5f>2F4af_pL3X_G4F(qx#XLr6@
zu;Y?_3Kf%%+Ow*&E5Vq`kS#4bC*zdrg&DPkpu{%nHWFNQtR+8JIpu@H@OJ+Me};i>
z#Tj+j0~RuiZvmHZS&WK}mly2J+Z}#8dhZn4cjXsqhJh;K%bu&qk=Vb24thp)8T9(S
z2wJi9LDiSXy@(x(chqAti5zD|i6_~eLOiyZy`IC*5-+@d>DQHPiF4PT!B=Epq;q1|
za?JA$a0bDJl4^uOuH?g}hhg!8g}u7h=k9i~6(>bVT8oF;;?^eB#G0(U>cfyxDx^k?
zNKrJ=v46+K5EC};bCOIq@I@4}X^@RbVj4lsKreSxWDk#3mNtoWpfW&#c>x!G*P^CO
z7i_sSZJ=ZP)n9Li7@T@;f*|_4Wp*S{9UdW4YKvp`e#O#x)xjaGkp;ffdD$+KV>)7^
z&A|=@j}!~4O)PLhngaIzNgf7SP6Ve8W@Qe(ImD<Xh+SzjZ-X%hF9f71eLDEl9~s90
zX7aneU*NVlO<x^UrH@d1i$o-98Rs{?P$Nt<sF)kHHBq?`I7&%^lGJs#TGk*jAL2IT
zbUUerz3uzDL`j0^JKMN~0dY$L3dq=o!N>{eGB-w#ss_{i4*s33+Ta|2_-mT}X_COB
z#z0o^+D$XD<ywmV?~E4tzKCq^xOM%y*H|};TZ81OJ8Il9vBQbbzOxTPeBa<kk;zag
zGDUVehWCQO@+QM;WYGE*@j{O=(->+!>WD$GhkYda4z;fp#HV3N#O-gP4du)&<`IR#
zgYS|vLLFe`5~K)I-qf)zzu$k<M^c1g)v|!FM4vGwDZ-b(zG=fjn-*ToP~$2)VERS(
z4Sb2NKP5Sbx8g#?UV;m%3m3nNmlXxudZhP#jxZDvyhGisfR+}q5R6@l%+z^#GjGI&
zoO44}4reC{oo;s&^dy$Gsn(=qu;@Ot-Nh8u!kd<=0>cw+CfJgqOUr1FSzCm`b3s!!
z7C@=tWEW}>54(J2&1MWXPpjo8hXUTg)HzKh!Qw@N_<2&3dQ|t{?uP7j@P{jii*A-R
z13pEOQYi<1VeCG9b2^rx^>q>}3L@}LL+H$6(hv=GCuS#p$l=!8u^ij!TZLnxbdn`v
zxCkL)nTZ+j6l`J?O;~SS?2Mmpd|XK+jekk3bS^UN1gq=}FK_<^ZIAF5mf71LThh=T
zm!pjtS>)Iyag!BnfyQtd<&1TV5BJc}o8MiMn4p&uootBqRadEqi_W3(ue#FUlUQR9
z$ZQEp{_xf*kU>QdI>Xf-Uy;0DlBjBETu(aqrvSd+aYvhFWo2F48U_Z_FOS!&4Q2>g
z^5IWUPvn{9DT+c_B<~NSfT6=*BvMgS$1T76dY~kuAoOTz-*H3@L^-y%xBK4z$g7hQ
zd_x-=*p2IPrv<JNfrXn<uOsz?zc3j=6uQoup4r+`lo?-Qx`%{>Y-(-g0KzrkAn~*7
z3O+6_?qhc<DO}{Rv3xeC)sWBoU2Ct?<uH!RAE!~L?I75Fad0VP9{!^5$;!@)Y8q^I
zYn=W-c%R?c_^PKzbiWnwUf0l02;1(iT!{%>{ob#P;?#j_0pMByYD735ba5d3o(1%G
z&iMF1BJ+?IbW%!%k<A1C8WQA5TXn6ajO%Sq6ds#iG@my;K|Y%|i+0ohsRekr3<wf~
zQ)U}zH=8z(2l(kv^VIy`5-qKCO!wY-mJT<sm9>XV*6&`>(z(bfqfPi@OHz{CFpuIq
z5>-0mf0LZF#4z%|e1_=8Q4ggsUiVGs=R6~##EZcq>zF_zUxQ!m{6ZK*+d=K&H#T%8
z|Goit<Wu6^8nIja_s^R7b5sq>3i>C^J100Taj|`(&NdSAGs!{v9-$i~jHs2!!Panz
z(A52B75Ou1vU?F+4^mPsT}6Lg^-Di;yqI(wrvMN-{B_q569PPPBYSqlOB&fcS892x
z<ZW1$L!CZe0(gkcTy#f!&9g03>~~C;$R=-bfBdC_+#-SG>uR#c2niX16Uk~%edk+D
zHI@OWLoyFW;=m9>I=!h2r@s@F(fsz{n(Q};(6o47goB&^$c`=L>olPMA|JIv>ky6B
zJx53!#&M#E!R6Y~YUWw`JBOzvxeL%ijn~s!L-UlHjB#*544Z^p)4&xgz^}8=SqA)m
zT@{xR>G!NT-37H!Nydh+*{L2f;|H4kVtt?OY?JC;r!4Jb+Rek>M(!!#Z*vW^Gg=Mt
zrAcLqkP!Ve{;d}>3S)OpE>POvJ0e3m$z&v70UU09N-?R9@dWLm(~+<mLWt|e=dHsI
z1;1iMWbnATUXq6qk%=Z^+!lgNa+^eaZ|d;p7u$&Rup!at(}QPh=JXXdUYFLyCRB0L
zqC7ziEWzHDF&DNQd9(@FL76stG#fNFkD&uP-EwJSjCc-4YvU64Fn(vhu-jO(SH$B3
z+FR@dm%nu(UGqjaq>{>|tCW%gHmj5|rqnsOl86O5F%O3AMc(kv(g2A*banC)>;+4^
zf1>%f8WZ)IX03Htc-3=6R$1(~AIf-SXXX|12(-@yzq5BHAnq}Ee_95pHxqV$FaQC6
zyc+e6J7W9O1t_783QSB)rtEk<*Lmlx!2817whaLW_~}#v1Yp<VSD^@L@}zA*mP3=L
zV4{Vck7HC<u58+<1z<wY*V|=YKwk|k4oL5B=kHU1!e=5F8o?6~d20f;ywB&t0>^&#
z4Zd3zvqfM2_&=WtJRKGTJAqjJo?HQOQkU=Ctt+1(B87mIqVOvxfQ1=bH{5mn|LEz3
z$k;r?U&Q$M_(+8z5+Ipd0+1GvD@CR#@I`&_aUW9h6{83zKzn|XHSG(7%c+XURY#ff
zF6?toWFh57{1Pyom9>6n&oXDNfrO<hQWU)z1FnMDwvE2-gnmJA(2TKMU}qcP&nEK+
zgpD5C`Kh1E-movQ9~S`<BGgh$Ow6uy@y`4BckcBYkPTp;*4CEjoyz926kvIr<?-6B
zx#S-ROsDiY5x(s^yoD?JU!@G616N!5<Hs)l)+6%gEnkk04_Jcm+e6za+_X&PPNKf&
z4zxc|Z+1S&^GI*%?Kfn_MT80IH{=P<GBQ#@3TI<muH30JtmXLlTdw@tRi%oe;BDX?
zipFceK1ab0N5LZ^pqYBvh85oapnuiW)II=o{(V=()01~7s_*mq);_>X7S`6re<)W0
z37CD`4$S7;T`S;$6c2I)p3B_&`mg8B`yOLGk1fB?Td!vxF^^mTpUJy>$ph4fXN2^T
z$P$E}XxCfpOh)60d=JG*b}<SbpbiQGgk<SH2;a@yT8|`oYQUZ8BY)uh{P`MK&`n7o
zMFfZs5&~W*dLJhp0IMoc&ejVxKqW)A>wc818{AK|ftytDA;S7d0{~f>IwvLK=O2_Q
zGQ|E5M){t*pMV_ap=deUZvBtjvYx=rjJR&V>n%WngJX7EkSD>}EZ3TCbhrVvdn2u-
zNbZky{Q%&ki-6Gre0HP%t2YoPNS;?lKPLRg7#P&6N%It3fV+JK6w*5^P4VNGI{s36
z191f?u5WE47(5JshpDms@2Bv89>#zCC{!(*3=l|?z9?n!i#o3Dj0dtgxWIG2b01x|
zMK!QLo}f5a4-c1NEDf*=;AUnd_}>&u(|J7|l>@Q^skWVPf;vW<(B;sj4qS8kzas<=
zjNxk?0EwL?EG#V(095a1|NId!fBvZ1L)1C%$)0td3r+DJ5R$^3-qd<@$H-F;=4{?j
zgG~wE|1oyHn$^b!q7B7|p=$KA|I>WI{@e3;IUu$B+;bT>{D%sx;Qi&m3vg_xV~nxn
zoB&+0W5)NkUiLp<e=zCOz}||FIYWytVr^~xu+-W!tfrwi?9p>g1lDs~<KXnTmjUSr
z2=1;5QT!rV{Ep=QIR9b->@j^cSHeG1Sf8`@2!@|s=z-T${0Zy!ok*ijZF}iDJb=Wx
z{q5-({Nc}m&?qof)o!#eBZm89M?nug280JQW9;ghKjY((KSuX<y7h)+QKUKdG2#E;
zTMhh1NEwj|wZSSh=wgQ(?;C$dc?c7-17{XXc{qr%i;oC1c$`eZ^XVz{gyz#z?BL`e
zN-qJWr~}wp7YcKSZHJB-4=%NIus`QF`C#M=mV!V<KO=7fQ=TW>G^o7hfItqG+%xq_
zUtgS)F(H{58iLo|>qwABf^@KunY;?RYsk*+loBgOW)?0e;=f%PF5-LyTi}16AHe(h
zj}->>zYhf$QHp2;%=o_y@Z$hG6@$1U|M9y2mwytv;X+3K4`&W9Mm7QaALeGi<AM<)
z>HlyWapU=7!H53q%>~XqsPzBqAjChxk(G6R0tUm%y?<%?fA5SGwmGo{D<SvCfN0T7
zpn&_pfvof(1tta+@G*2;F}Qeqq`||ioXj1S_KOY9r^cNzlh9bQ_-vgYW05x3mdCk9
zlBhk=u5EU;jdQqy3Mw4-5gN-XZMaZ@oC$;>JFG@1Tc|+8y=mg6CD5q@Yu6d-hk=}x
zot>a{EYM5q>QNvsFAs3pBaMnGV&;;FeK#8EDd+m0CW)1A5&`tuM;&8JMd-OgdEliB
z#l6mq>H+qcrK;1Gj?}2g2Bn%1mSf6(o$;p|Rr>)K7;^Y-%CXrN)+<9-c7wkN1;XVv
za`g#C4SS5kz5AJ8H|f2?#jpP?wZOoXJEdU#htM6f>xPub=f3DlNZwJ85Dp{m+LZ9~
zlK)O~@3f~vo#2v_Ntz4k^VfxhoqG$q(YV(4w<oTp>*H^0-Mw2Vrn&da`Iq_KpjUm_
zUg{qf0k1@Q(-P0A%5VKmCPhzo%RQIz!ZYr~w*1IV0k9KqPcEdO8VaR}=cfurTNGCh
zwwVqK=_6Mt*8N*rr~wU5+ByW`bGwa@g8S%c-(TqRr|UmmF~48&{_yFL0b2`x>(OV|
z%Y$3aJKO)tCwp9E%eCo8RX;=Px??PWtL6OQR4ULY)oRfs-!81hZrLVrcmM4(PoN4+
zPflrPV~(yDfwGr0oyC}%hanVunxvdeQE{GiAXJHR$c?KTMFZaU%x0E})A#ekkeUGc
zgBn;J#mdpI_W~v)p%P<kYgMX8=XN!f__&lwP%wp!RWY_cK9^pe70auxr=MT$6J<Jk
zz?+Pbq1Tf_Te^KNfo|T__W@Dt$3nMj-1YI?*5I~d7L84}cVsFdDc&SjHKMX7E!_BB
zgabjo++RF&`2Ck41$nPq0Y$l%f^3k0*sY;9tass#o>20$3ca??i_%m6vQF=4hsWtG
z|8y3|!9~+=AfT_*o4^9u3baXoQo>R;z-awDKCVB(tu4Rs4Lh(AFxYv<@4ev7#D&HP
zb#%tT$8DP9P$(cRHS{-u#*!f@#jHKXM&yURtHTkS;hmXk>0Ylhb=>62j>Ou@<c+DM
zt)gwOp4tA(GX)nCdj9O0A7QsnQCnl&iIB3;lHYxD@+X{dDLxa!#C?(wzlvbArnS4f
zKtq2u#tdl@FK&%<x~8nM-uVyc@F;L&-SFnNw!fKZAva7SWsmAUpaIX}j~L86@w&(T
zE`uNyhI8CV3u8KS^>BUo8b<!sF1Ht@7HY7q0!1!+QT>q6o)^%^=~;pt4VcY$_r))M
z=e3p#=AtUlv(kkY{>Ztsru_<(R=I)%)&$h01xB5c2Cb#UICmBT15VMB#S}drdN`#(
z*V@_VH19u$r#sJ9HGm^^+t$VD>?^LGqGmwb`5{QMm4%Te*D~(Szsny<*q$dz+Vr>o
z$<K|!0F+U)oBP28f}gp*LJ3P9ng@q>q3e&n{=c6h&VQ!jx8}~i;_JU_?#VA~?SObX
zw#bZlQ3c+m?oFyM@rB}xpL>O!+y7)@Twgx1sry7Mf7X9}CMcLa5VeU*MIUuSuE-Uq
zB22g(5$rk}c)DND2hiPUbhbAAKId1N&hltSfjeH3N8N2`WlzE1`5ds;|6)e%R#z4I
z1n;^%+N>&AimlwCtgXROmBvaK*Su{*KhKwpii^YbS3gn4aj`Wm4L{d=CfJPq5?Ei6
z@GDEowmi+eUzG^mgC}zYZUR&3P)i7%A9KcFL-p?61!%zC{jRmK(Z%ATtB#y=X{xZk
zEcCgU5qaDh7wT7(l7R+7qeUTSS4Ve(?G%GwpE+DQCZlC7X;xif4=Mj1DRu%|RSQf6
z8J@p@mLTEz>^i8v_jO=v@JZ4kK|_I79MK`+04XoUuyXzH_vX!Vzf#&LIYIjee-7xy
z<+$}8Sf$hmE-il9Y9r*Nhn|Mlm9U3z#DB!W(y-E0KXV$^pX-vF-rN_;b9nm>yh{F>
zDmL4mZ7}yFmOTHgolf$z%XnEbPjS6vG?zbid203?P<~LNiz0Kt!$CI^5L{MSUtB%<
z#WI7^FnSaRy~Xird2nX%wW|<dMJb6*?A56Jy_`dT2hhe^8VC{n-nv4KqRJGs!CqJ(
zawd8<31GkTY;rTRI+l-PB`2g)rKDiAWG1<MIX)b3X!Ud4$J3@sioU`{$d>$PbLU!O
zbt<b^>Ezo{DJ`kBk_vT~2kN8!h{3o7HKPXYew65UqxV>=uF<aaQq(CFc@s(aW69%f
zCG)&<m5i&^a?Hoczr6l75I=Us<Xn%L>N;zoaD9>84-qX^nx&1aO4zH>U^MYp39ddU
z6hL_g(;(2E+g+_ublIEU8mf;n+)Y$=^d83YeS7-(t-j6`Qd+2>ytOss>dF~Cij0gm
zXEIr#JZjiDH3OylyK_5XNJxJ)#;CieIfz5oWh;k=n|mQ1fuMHp=kx<CzCw@VAMei&
zn}c2luP~8VQiq`_^bfl@ORUeypAbp;xd%@(M27JtfsTyCAy{%wTX&!ElYhycy_=>?
zngjh^00Vn8(5JdiyF>$ly0Ez8tHeC~3}YlzjcVTVv?^vzdG`Gj+0qa3Q<8L|LOIYd
zCUsUW7#Zk2YmZaqD})qxeMiTVG7i}0R@79QA8Zl|cLq>QH``L5TS%>(riTBp#Ij)0
znR3fuF6$si_r(*+xv85@5#h_DE|^Y{rv^s(ENrM50N{2kEG!IqIhS@q0GtCEV(d4F
zQe`?I1_L<`u(kko4S=lyHi{6Ak^!faEd`V!By8>YgoJ=iv~hRwFFgI8123Q&gApeQ
z?2i$lYgQDVKjH*ZkAHy7Bw5TxPoP%}E)?}+ysz@*^Ck0Ez(493t5)J2udvcH#!V$p
zaPAswYmw>9>fdI?tj)}*;z6uDJdw(PM(N7<9e7ob=@>0IcX-0~5YIKJQ|RK4J@-Oy
z#lFi|Ra5|JAfS_((Z*?getsSmI0tmiD=m72_p^JUqhaeN%?5N|)YP!j&Dekg0<AEX
zmd=V9m1N^^-*-Tty?gunMSZtmMPy~Xyu9iK2V12QOul|aQkDe{8NHWL)O2)yPBIUs
ztfJDm1se4C%9paUJB@UztgNI<KYsRi^2Nu$<BAR6i&Ynsp{oR{OYOPhUu?bt8tHU1
zB{j7$)FPgIE-tQzp6R|mu%7eznVE042?t&a%gYHGhmlud>L73KDbA)PidqypD&UL#
z3B}!?5$+RzZud9yKkiZJ9G#LtO*VN2hi)ds94ud@U++7Ki7MO>dj6~5czI~npuIK^
zPcm;n6`N7T*s;rCGGCmNZOFpxf&Xg@H}$1=aj^9f&SC9ZF;_DHzg{}_hf#RPYtr-q
z@SOS@7=boKF+i@V0$eE28|3~Tcz~4^xXDW5S?GN&w)uXyRzgqPIfpJg)f2xSJ6(jv
zhRgN&%E~W+eqCn_;9&@{v9Q9Ai;~>-oA8hRP)~t4gkKT(H2X@;Qp@(Cb^VT<<D216
z<3)rrXwExS`H&xd^Lu)P(4(LLLt0G<p4uJHh=!xXOOi<-$Y7*nOd$=NdI|`czYgBZ
zmCpkckDh@cW}OToOkJoaT{9ht_8YOR++T2f{H7&J^iKc<61{9x36pry_6-Jq8VvpH
zv&vX=*T}Ztmef$Niy>V?OLY5!fh$w93r?!KG;Jf!fMMypmF;rrZ>MN>O)oFrOV2J(
zX<8TR6q(&iPbF?bL7^Qt^og#W2X$4|*hIU*SFtR<)M2S#6Gq)TmquQ|yfT@%Giooa
z*3s7&+W$kwNK?%sP+L}J9xBS3x-r2(AGunL^S4q&l4(KX#XB>W2z`RtsPo5<A3!*v
zeGYXA>Q{y5<<Yz$L@MtsWE8-IHz{36tp2f>I=Mwnp6bRxPcN%qcRjFA?Em&0cjI<C
zFt?d>84Eju56xqCo9i@(&Ma&}d|5G^La^3$KFz<nl%92ltw6`MhiviFe)~xdXNN)a
zUYW*Y>mCA9r^V!WoHJ?~lT#L6!h~X3-w)lQww5YSVW)!v$KoUBx!{wu{KEMe4$_YY
z@^$F>iT3<x@brA}l!$bBIp%ZbfMWUO99~xtrayH^fh_2@{1OIswkMl+T9N4G_RSsp
zovCL`>k=8p{~Ge2LtBs;j$R~w+22n{+Moy<G&u0a-ETNL;3B267|kF;dq%5STmoyU
zumWo1(*P{{?@h5VznH<B%8I%|W#TbWbw17hGcx!mqUj>55lUju*D((i3vv>BYfX%F
z(94{6GRwWp^hmtk)oO)WqYi}}RM2SGC7Lb*0tPM^l;QY%8vnfYvRsJVk48%Jp_+`9
z`ZOCKhcj3mbp<JIxq7o9PRN^<5yYX`Jco*O+Ou(PnJx$~7DghcD-Q->CN)^fXIFE}
zYi?1pZY<xKCM?f(p`RSm@Oa$6Tk*eN&eM#$+u>oax$>b2^o*7@`|%c7+ZoB2%utfZ
z{C1BVKXTY9DKb)QolfxY%tBb=WS`lR@+;j<RYSPw>Z^+mq}}^YbKuXv@~kck*pt!8
zFi0&hJMBI7garnlzd2((W}I)o{DqM?8DnKdMhlfVva;De_UOuCDfpK}_;?+=`)!jS
zUk0zc1lD~%r{=r+l5Wut5)Vfly+^)({|+!G3XJmdMWyNLDA+<Vepi+>gnNY$LKKJl
zcKnSQue}!;v{#^tK)1H**y}#pFJ;<z**>u0o$Ltl^s@XWm~1K;SI(&aG#|z@waA!b
zFu8o@9oA4Gz7Rz^H2b_j@Z0k3pipn9h&yIq?dsRh_iPp77a^r!Xi2FBD@JG}xS)TN
zPSY(Yg+h*8G>lB*bX>+{1t0nPPYZa4ZzW(+BJ3xPm@8tK?#ZGjR&*gey<+5yT5#i>
z^ZEJNBSBwX1>bO|DQz?tHv5r#WAt{p)bFS&Vkr}q(c>hK&d$oZyYqmUgHJ#ZvjZP3
ziALv0ZukYlEEsSEbIV*-#YVhII{e0OyCt?(3bz3f8Aw~Ly@XFDTMsW?XJYsT_XxZw
zSmqpYZ|`*DMi9qHNs3A$b4_kOc-5}C%he#8>(?_+nbiWl5I7t{iHfG1z-8Nd{NLH$
z6EnZ*^Qd&;_~B@_`xIK&zX1>PEh(M{`5-L;b%EVwIu7+H1}NIesem_j!A}wM!q+m%
zVQOcv`U{AsAXemxIOSvq2cVDO!{tM$kvNe3<4Uj0XYJG<-0dQrQK3goWgdhsWG}@F
z{_U9S=f_J-kF(>~e7k9}qp!0Q<vVwtHt~tn_EP6Q&}#JFiS%R473pcIhJ<J0t(2}K
zN0-wc3(@*g7$lL4V{m44T#K2ZQS&86)_l|VJ7+cpzm=l!BnffFUbgc#D!JEWs*i(w
zi3Ea7uD-KgNQv*)F9V|1IHwjDzEK!a@|6P~k_prPsAA-H1WFf@iP7V1fhQ`{N&J%G
z#WA^6n(wkGF<OdhzF=Rih$%SYXo(r<3;wbwGK1=G7gdK$pcBrD7jKmM&=)j#U+hoq
z>6cGU_m<pLy@?JFe_vMTz)Q4m4`~n|ya_W|)afV_FB1m5{4ibs=a}cj)jxnkYukq{
zi{BQegTjU&38I1@L8%V@rp_7&7nNA3>_Pp>1|Ka!WqC_Zt_fx|zHXA(w8k^7wF|p=
zEyi#a8`$PkqFib0nZoO`58$E?B!v#>1Q{S8BgKt^Y3rIKH0LimQny#cPZO0>X-N@+
zM|~eEmPL)I79xnCZ59}e!y`~-EcABigTF%ADcYg8qm~QC|9q+M4hjRa!m)2K5?vo%
z!xH5NCAn)m=6;EELZZf&Z3=N6-}jB2D)E_SWrk=-s4n&p<(&|Q=L&$0!iuW9ju0mA
zG$aGm9LW{q;sAR>8r*f9GUdIJu9i4rKH(#(qAR5$B1`bgSvE-m84+B0%xJvpb_OK^
zjR|ViHhV$>N+NzT=W`0J+}yWi)W2<LO5Q#`D=XbetBv?Hh-sW^aAb+;Lt?>hi69fL
z5?E&*(<ol-S|MjOcxB3pIQO}&yih1s_nQ$4Y^gTWdB4AmBxFR1gURj)5p9Q~r(fB&
zsVZ{>&)s@={RBcWHPgdrLlIJn7TPn7=x3!%z=9Hv+xMUHBvO?T&Ha8*h$2(gSSTpu
zLk<@G!5{iJ*+7$4%$Ou0UoRvsji(#)?72j=X%UVY+^>rN^dyrw>P~LyRgynChuR`8
z_EZ8pcXMaeRE4UHy;P?C!N+ywqZ!c}aEcxFT~pkEp+Q7R#2AX<oZ#Y$r(y;vNx<Fw
zRWw0C3tB*rFx0n*>WfUlL`z-z>&&al9eHoBg*|JT3IMC68XX|j{p-RUcg+x`#@I)r
z$O)lt6qfrFde@L9CSr-o)}n%_kj(sDy3eSjrZe=K%G407UTSnpgS31QLY2Os%SMKP
zs$74Z(X?M=jAtZ$hnSQKUVC6kvn6Z><AjBY<A@K!30=wdU`$t#%S<|GeO{cNSc=Fn
zpBf;@LcvPRuyC4wLqVOGyx`1<X>%`mo==ASklx;NDdE_lr2m&RH_}4UzYAHTq+RHX
zBPg){q)IB{Rb*%gmn_iQbYiM5j?wtfoUGo-G<C!0zV0mQpnbawL9@Yhe4*Kb5ng{d
zogzuuamG6_7?C`5q=jk0NHsQUKsd#XzI0~gt>g<JbFu8h_Uc+7_GM7u+zoV$IFmDi
zCgW4GM6um%Bggo|5RL{1m2QTDCt}K~!7WdMFB5iX?ywA&_DTBoMp<9T{f%d^pA1YY
zdhk7$<&SpCWiCp-BRU-d0$52{QyDFCIiOO^>SrdecebU8R=H`Lu`E@Z_(B&jaouAn
zB3soqwhCcOX+0txr4k~tec1Pa5yuCYM>f9}-N5Lbujc@;qJziT1J{btq+sb_j8NKe
zoDdO(njl$3JSkNphu*q#dGWD6Fk_oK_$U!7h|!iv3Ch|7=qk@p0DLoNFi`WjL|%x!
zIOYF-$AZ)Y`V;BJtHFIXv*Bg(>J42GrDdkEj3_ljt+IWWLdO-!RoY5({G$njT61>`
zpJgO%WTeCWMk}sHwzghE#%cJ8(FpocP!dC$$dD)yTmn{Kh@G5<Gtd$b6sZ?_%{L-C
z#p816O3sK-LTL;QKTAPfS=7%eUVj3oJe}KRD;Cm{n{?k|X0n<T;x2&ONNhFR@yhG}
zR^xFM#3D2*a;~56_2B;Ei*2Q~z+GcH>x84MBv@Ye&*eudGDK+17<@@nk~I3PbIZ_s
z-~1&<rTrr=*7ZNggBi3<`Uo7derKo_MmM%eD6>zZ3rXM0oOPuZBF`Bbyb3%y5h>g)
zZ)M5q{epx|aPA4o2jJi8;PuDdStGu6kQQsh*{&5SHsp0`C3|tC)x+HU);r~70>Zy-
zCl5-4Yu%5P*tr|hZ<br5EzYK}e6)?fW+u2sI%OmwTaeInijY&%G)bk+bZvD|<5p6(
z7b$=h6KqDaMFkEdge9#D$b{eDuf2|8&y9DYT^=7lZ;5tw!JK*l?bsUr4*jaM(G50}
z%KCR-mdKrLNY=;%g+`_%_2?@H7sU`fyp|*l#qxQqK)qU|=iXuV{=FX{+{2f-pg~}m
z>L}QRg3(5GrkRSgtlj`&CIX7E@5RGm405!+F~lz7<~tT!l2VmGs}~8Xg>~7McJy<d
zV0yvE%Px6_7fcMvMM)cs|K+uf9emjVucW8lAW61YmrEVir>~7I#v<)o3NItp&o0`0
z{GVEY7CYT#CWRu~Uy|0aG3KFl^@N?74&v4f-+W2ztu){nVM{m`;^#KklSwb`9K|I+
z<)euOV&JWVv4^+>l5#3bGxE-De{cAHElQ7xN1>Uw11=<5<OIG3dm0?8+z=;ic?S}+
zfyR;+tt2L$gP&^7Jj73tI*=VMCd%24=S<l%bW7c8z+uq}Pn-Pe;er@WK|)qn5|Vkb
z2cG{EB1^$4bq+9KZV0|l0ZUzK%NMhed?`znJR-GfT0(WYdsn~+OkaxB_>syTPc)o_
zEqiQCj9BBVD(XZ9M}1mYUVP=px#K|2nu-0bCn6C$ornqz{8#jMzk;}g_tcopTQ%cL
z_zwP-YfwN>pv~LcD5I{EKk2XU2u8XgVNBYOqQq^%j-{y@xav`u*s{;hYSo`4$TD<E
z9RSwLmVk<srB96++o`0<k$k>QObhC&B$v`bc|E-X9g$RdVt0+G6we3@zbD)O!9jGh
ze?Wn6#X(LEMc8Ls?d@z0R8bjA(m*ByA9p%YJOze+i1>xHd?FZ40tNvkc&7S?B_SG*
ztM<>UqBr`+Mx1YW4y*$cw4t=PA}2;Ls2q~>lCj`xody(ID&ch}q+RQU+O9|@#;Xvc
z=@n5$7^vPFk-J6Wr8lggus(RKuY&;?VdrkMRv7EpzVBx^TJ?3|atb+$)E;nAS@5(>
zF%6MaM8KBD*&uncnr#=O|3%X|21nX84LBRyHa50xV`FaYWMeydV%wTn8)IYJ+;C#s
z<~RFReZQw>YUZ9g-RJb_zOI^@KVtM69N_ZCSa%;*CGZV$j#V_B(h#eoFdXCY%4jIq
zB`fYR%LbnmmU^GY7Bj*Cz4pBD%Xdh5#rCY^nJ*jidI=SKR46anDiX3pvHTR_hU%Y1
zuGWc6#ZAG@zv|FdRP*H*53FX}dif0{azzZ2o=C6A&{dkwphQ_9k3R{?Ehv&y62LEB
z5}qK4Mn#axHLrC=OC^G*_aQzE`$f|vgFKRJ!SxF<6UN2O33mur{1}@COSj>6d0M?9
zrgVas<c+5CO#c^Ub9=YZARFePnByJI0oaAdbY-^D4arKR9p(ft7mPIWs{T9Htwg!|
z-1gL7tGxSwErt6K^Oe|#;=DBx`wxpI#`4yChTGbUegJ>==q{AmQjfx;#|$a9RzV{Z
z8#>ub+Fjjm`Z0kI^31JT=}CHO_jUC1Q@NBQH0xi*Pt-k<>K9wE<OxHQIM|_%kdPQ&
z6Ah>as3RISu^KWGc=o{g(u5O8GEu%l8YNk<0B<oP9mP7w%8a3+Vvl8QP|_upOaSvE
zaX`^$`EOD$(onV4A1YtOO>BW6-$V$pxp#9!1gKn(yQ?w#LS8QLJuBatxXA%bc*H-J
z)EE{T&J;yDzYovion{Wq1fkfpuT`wHH%Y~pVvmGZ8=cG?HO`?O$cBLmE|7i>(<A%l
zCBJj9j2ext<WUJ)h=hSng!_>AytGggIE%AOxrJ>7L8s%mz@Of$n`_||P~ECB6T4Ly
z7HZfoE1g~0`aW+?41Mdv&g608-^nX#B5nr51S391!SR*TICl}OrDe)|POLW+E~+G(
zF4zCl%xaTYq(Emx*k|MK2iVQ9%mTwMBr-epK~^oP=Z1}XYsMGfEKeP*spw@$zjXJy
z{0(I17|-6_J?6qrXY88vBKIXR;<pQ&iW)-^{XNyDrTd4b*|`QZTE;^{7xa1wPP-0+
z!3NvYLk5V8<O#(2&0BQ09}^MOzaLE@(PXM2W~{BDY-HgJv~A&sQ^bS>N8C#pHM<hP
zt>wc`3X!`R$Rf0{y@jI%$m50paiUxo=TbIP@2jZ{w!a7OFq<h)&L^(tmlP9%pc*H1
zX0;=a=-oXK=lL_lNk`4}lg!Z!ysMM7biH++oC%Kd{wnCa&!T(Ce``)5v@3mX#r>ZA
zt3X-@mL7jJwXhCTJVXkWpC!u3*g6_Fx=euUGf@t8jfs%?zuCJn79wr2og=m9?{GEN
zKwObhFeHiM-g+wJKuQTxaoe>yaUZRy@mrb+oWJPNknNDwShbg@=1P`P<7O!bcQ!JE
zwrsrd=`9{=fQWy8p#YjBtf!p^GoV&`T~AWYkGG+C6lbq!7K*~hkwl(e`Pk=$5cJ%>
z>u_d&UShLV2;paGf!Ltb2|5hfpMZ^~D{nl=$zKMt4Zqx3i!HQ(n%-+IXpyPxK9^2|
z<G-&3aVR17f}6ncj4pRM8TdZQeVLW~?$|CrZeTJwJt*Zhd%pdrSDLp(L~!k_^XrJA
zpwt+xE0<s9;g8Bz6F4>cfQw7hr7xf%%A4+YHjN|+#*%P9MDbtl9@W7mLKLwi{V%q)
zhsY{ziKYTTpRn+c^ZnH0&S-Z(^s<^^yH)YmIZNg4c)^}T8GWMPmGRFfl{fqyc)r`r
zG+WX<*xok6H#SSC{Qk6;aT~?Np6GUO+EYk_ahc(#c%;p*+Nv|%BK~*CA!_ZbVx*3+
z0wRPX!wcIo(Vei}-2ZOSl@VQ_NdJhSmD_~VuZ3b1T&~fdd~RIbaCL<-M6;gdIuw1S
z#@=r}nP&;zK9%bcU--yS2et32K5jljK>Pa=G&-~sRgy=Jn*}%2bR|tMT$Vkhk(d%~
z$?Q=JOPpuCl-m+}U1+!hyht|k-q1C^p1S_(x$ye)1urj&el2%LR~fW!eU28AQD1|@
zf#SqUb7-|~F@3!q=n}5kj>5d|;!UV|@4uzKL4*IA%uuP(gCfs3CbdFgHR)mHv18a9
z295YcmD#1r^@A7T-i8{2deDwDNlK*<SBYG)6<@fmgoxJ}c;JrxA*#CYvZlj_O?Q|(
zNE<Oul|r%7dy9O?8jB?GybE_`2k<czIpjs;&~lF8#Ep2%@%-@-#gCWWlU11JHMH43
zK6w>jk(LGsoNzSIh(;$f5(J?K5@VY7TU(NrvSj}51TEmy=eLC^aG{8y#;?6!eK23I
zyb7vQ?sb0IQLCxxhn>CX`2z+VNPHuu0oFTz_p*5IQcOoXgmO0^1a3?SXu7_%vJ-}Q
zmPG+cu(=ofp1T1;qUIZ7DUT;->(hmM%tt{s!lCBC&h{N4;`fuXa)+O?T#EyhGqwAL
zXTs4Kk@b(84s|DOKIP>A`G942O~519wEvzkd5MH<U5Lj0ZbH8pY;;7j(b#YKmpg~g
z)afP0zub(}>8CsCE#29-SzgZufCe`=J;#l*V{72>0?O97VZf_#ldA2;E6dF1&6}Rj
z!V`GWa0tB-RN8&8M)tfE0zBKnEP4c~Qs#HI4PsJFqZnRw4^=W)Y@8pCWZJ5Ac1k3M
z9d~%Q-D2>b&Mqh(evc+;%vBbkbzHl7gSY{qQy4ci-He&%wS}*!%bTs$>~T|V*eRmH
z)_)WIOO}efv-MA~!scGwmc#D8Ig&b6QMq`+R0--REj)~Hu1o={f0mw*m;kmYJufD*
z{OGnMp?4VOA-7bn@Ha71F=l*HZ4JASbZc=w*b`E-@2vnvo3Nt_0Wn(7O`SB{CvTkp
z&ghB#y{U!UJt!J~?KXK>^4zR=3q3DkfP59w%U6fO0WaV`+~8hSz2Y*}hDJ8s*t}+r
z!h3u&yYy*)C$lOV2t|LY$LU|t{_JREu&Odr^y6)21l?eS!HOH5kTOdhf6Kgk^*q01
zF8nhb`F+eBW)#ugaUk4ZRC0>29oT$3V)%jZq6d#^rlzm=(yd#O{U;=?`pk2CIzF+=
znkr9TO`kjOuJ!{nQHES5n=ZJsSUEzGoUzW<f|F!q>}YE+?2kv?c`^N1>8t1E9KWaC
z9)*O7tg4?++x>-Muan98S2Bvz#an>bMovlbYe(x*hr`j@*PTrjUs<uEgYgIIuaGcT
ze~d!TwTFtXPdP`zHR6D80ZJ%Xd99$?$v9KsD^9upK&U#vkI$cu$n*C5#IfE3*apc9
zeq@V*)Pm0!yl}3l53y!L%)L3zy!XViO0B!{LugOYV<Ey^O?1qNtf3cNb>0VjJyx{U
zXU`Rke4!rB26x2QI<|zy30pGF@wZU05%*EpKvOt9Qu6=x&cqC5Fyrb+tTIKtg|M#o
ziEDEO&j^dZy$OZ+4a-h`9rmE|@9;qTBeLx#Y&PdE*$BnW^IN^HDwXpc*+OfsD;+GR
zqgw+8;<AMl4=c77o-csqd|!I6TAR^z44r+L)rK9ZlQYE9OgfGt8wr@}r*MKI=O&jZ
zz~&qHHj#c|un}ZI5eq)4V*qSwXeoDpj;ZlSywP3HKoEiPRIIWGJQS3O@lg<&IPoBX
zQ;(%+dc(q`L_}t?!c&9<>^0`AR*%JmP43^zi~gV5tQ*x?tP&B%I!O_c1$Hci*iB0k
zgwgudUA}v9Zk}>?fh{?`Zt2yB+%_J!OY&(+G7>N8U#}S1KVe>;`4#sIUE&T<W|~MD
zWvPyCiVTG&4v7RMs)F4}U^!5K(l6=xV%((x`r>}rw0%v-JHMYSV?=A!JpyVS$AI0v
z_0%92qzA&+M=JeF{fQX)+;^q*yW00D;o2hiNA#Zten}lY0T}X{`aw-OPI(kR>B;Au
z$LTEh+H?fCC$9Y8m2w;}eRy8ppLROD{68-|8txX$%4@QvnfPUt-rUD4Ef?DNys)cy
z@*1L`Z&&-~y|qV~al#B?yQ;*CQv!|6+B47ht@iRc1X|j1F9Qj;K98Z)QzYAtW^}zy
zO9Tn|d$M3Y8;C--g@xS7;uv2ZNLrkMwSc3HQn?UR#C=S|ud)SedzvzVo$K8t#qNg%
zB;U*7g$xb-u%qmZFH>^6gv4G_8aw4sZOS1^dngZUwC9Nl7W`fgF0WzJkloB+2He}?
z;2W?^1JK8n-mQNPBG(AmkX?10@^D=oAI80LC%O2k;-v??ubKK@9U}1hxM2Ww-(H6G
z&)VK*YMl;r5M<2-n1oHgY2|f9{5DIGm^{qudBf3UsM@njC!7%B6PA4G4<_JCzdU$m
z-x@n@-0U+l<bV2nCRzP+Ae&ax9aBd9taO^K>3C>PB0-x+?f*q0@Kl!PIBfpat8dtu
zQ~5a)ukC)Ddhc<y10eDLZ=+UlD#&yOkNft|5<W0a*?>JfmsvPuDe9tDKx*RAO%K%H
zn<%r?V0@wfeiI-(lKr(4fU25=e_R|_a^K`oa+48VQI*S&6<p8U4e#ogrr{Ytlc0H>
zs|q{PCttSeH=#AQ;YSkd;U6A_p}Eje2uH@)bT;q)hH}z#L7?vUtTyTW1eDG3quZ<}
z`U`NQ((id8%FXM^ylv@<=OyWkY>=NBz9IpmJ?{Pz^%lAjynpwUoiiL3gAKgk5f#8>
zj0TXg8;)4A;94O6j8BRgK!VAgNZh<;<I@ZVZp3bJ{}(Y7)RjWMJQrEUpYQywOGz+P
z4Ishvi^}U3&gvW1l{ZGCLl;p;0diR<;af-vDWn14kHa`=Bl}&_E)+2&R~9m)UASnI
zb!(bIHzaZ6RW4C@&jGYpgh<})(tJ~j@0#Mv-YeHOG9Hi}$GgIaZ~drj35v<%7AQR3
z&33vxXdSrxpKh_J5`|9ZQEVZ8uGPF~gjtD`a7$!lQ+JQ$qVzw0DP{e=<OuFi-DiI}
zu}?C_RE2lHx{#zLDZnJ)x)^Hyen*h>cQK{l&+fPj_U%p!p^}DV&aFgBlo=*mlLvA`
z;~x~@ZUSZ?{tp42k(^L!@({_79P`!_gOnq-mF2Lj4i08M%InGUoS|&{ceKW%YI4q%
zp{f^Z&=!$!W{S;b(@D=|?<tbnX12X4A9=n3_jolpdnRcD@%A(f2DFHsLYiby+|~Gf
z;2IABB5#*k$7gF(X-H2EW{U)lcbO2EMIdJqHn{%%z+&jSakE{Iy%{o@(cC&6-|MZw
z?Y$ykwKZKs{?uz?Xk>md-B@0pSu3|IL1I!=u8(%YGI84b{6fxHwy$5uoFP~1$KOOx
zK>tI~J7-O$@&cyRc|^Vu7_B#Xc*L%7=!H?7s~N**mc`@&mkX?mzl<eKPB!a*3JCZ=
z7)~AgllX|YX2->q?5K0yUf#I#M9kU7AG<#EoR$PY>xbfs4*x7ECqR|`Nw^K8gzSn|
zQAa3xEebct2&e@=qt&|km!LJ(+4^S_G<6oKf#%q`SLK)`w6Z@SjWNF7@uf{>V%VKL
z$8nlp$L$a}R_tAo)fn<xdSF!|o3iYDf@D}+U@gf)_2#7sOH_<;+a&;k<SS}yfV<oJ
z2Tjcgi|{QD#7!xgNblx&4N?Dltg~4?>rYn<{4e+>kx@yV!c?JR(qZjI;rG;OlI|Y#
zZPJDkYClSgFzAWHZ*gS;mw`$(C0M*RSeYXr58Nras$uw|aySD$h5NaQ#nvm)II|EA
zTKw+crT=EjKfkf*HW!?JCp7wuPqahn(&s?K!xf5w0d>0YIQAoE6y=?R7u&(e#udX7
zPg3lk^<K*T6)SV;OR9?yH*<&Zr;`6}5avoax-VNDKnxOp5yM7$l9j6TWst#ICW>G1
zgv_!P&>Nw`SG7|83S+(d$x)<YG}QalHv^8|DbVSSz~dsKC!6qJC8_W^MLFWrI;=3t
zffK=J&zL#7iL(+miqI`A@acS~TFYSUwnScD=yFaPu=mH(-Dy>KdN;#-DtU{AENbjH
zj9vfc?~`;$ri&sq0vTlPDxs`=lEA1_JmfcRWP-qH%fsBro%l?OTwki0nXNl{*VN$&
z@2dC8gB<UNF`jz7y0s6tJ_-5P@b3h)K`UTsA+AdUpwnFgO{6C#Y6|59D)=qnFm-ti
zqQr`<u!xbeyLQA^GVT-Ptdt}hvqVi&gZ&2Dx<+QtY`5X%SmEW*$t}1}(pnk4m5xNF
zqflC=X@(q1mKCNbS)P?T(PNA4w&AJzb<|&26~r-YRA=KJuSYIL{m!yLH(wl_u(3Ai
z3=;$bDADD&+dkBnIB-Hj=Yo2zc8uN=^Sq)C2A&xH{%rJi67=xBH2FMBXTD_?_K94;
zPPtRM;>iT10=sI6znVkvvQA{gjW?;o4_IQbDN8hlIAvhsf-3<eQVJL}4>3fP5kW`c
z(`w@{fXw9+MAO4a)LO%+ozebe6UAxQ0?zLyTvuJcDCLhd?yiLW&}LlPPODxzpMLLI
zSrZhu6(G_|-r(8lpc~rwmY{;CsFu&2*&6qhxmC5?YUhmzv7Lj_Tp&$Rn4>DH(+(M3
zX)^Guo?DvRv2!Ag%-*goNSI&^L7u9vad6Z^VrKSj`iYg|)T)wQ`=zTJ@;71wLN&7h
zads<;DHqIwF`7d2&f5OB8ruDZPJM9e0V7)tw|{MpFMU&gw}a}YQE<!DmoH?hEO9o~
zeG8&(MbT;CI%uK~x=_5bC|%i-s7nO2a}c~uphP1zY#Lz&IT1%{DjR}n#%<TP@rOEs
z0b_6pl3yW1C~TVfy+(q87DAu$fBn`pEOL=pVP1cC@~+4j5Oj0`Pvt3+B1y>-mkJm&
zdU+LbNM5r@_aOgsB*cV>a8Ezpg`&CGyRfI|I^Z<ZXYBa8B)K^muF(zs<IJ>K#9YAK
z(|TRCXk`dhPc5mO=Srx)iG*1oS9C!p9=1D6^070Zi$i>9%wLM|!(W2G>H5h>!PAU|
z+|j7t*b`m4LuS(Vn;OJgkOGbbI;um+rDgs*2hWb?$|KmbR|Hir9Yg^C3abe8mHh=2
z+aJ0dZ)>P9Or2R0TEXuIDXx;|d%T=|HF8BwQi^pwOW{Xy=kjIN7pMJh8xVXoJ`RA~
zna6~l)806Gxav85!F_1x{2Kc~CCVX=T<EZMJNa%C3l4ml6|NScM=&RAr-hfJ#=3%?
z|0e3?C(StUV2w!gsL2m9=)*!$S5(QHhWSO}%fv+?XqShugO2C6$%@IIl(9pH2dG^%
zETa0cafivTgK&LR%ia2RYdPo2W6xfn<y;h8D4Qu)mzLzejPMPuKhG@Bj+J~k)TPQ5
zrDRK7KvEs}YOJ)Bz783ikQ68-(LjBfQYMKEa=&sx@0A<ngvBPQ0#Pg919bYz;kfT7
z_CPW5J2_wOk+ruT(FIm&jM()0vxB!W^S6XFaqqh1qsyLPtTKb>1THa^(ReHaSJ86D
z10P$j&u<)Di%1AEDa+)cW<yzLypP4)g@4GiFQq|3(l$^GrnB$y;$^HAnqJikz_#Y6
zIVkdFmI*?nIWX2i^JV771QNuq)pl~S_j6ib;G_tKdj?}Kfw~U5nfkS|r1LgFsHm28
zSLnPKf}v_*!<qOizeuIdHx9+0KW7X9TNC4<53oj{#AnW${W{Kbky{~xCRqRwa}g|o
z%@*Edm$HdHvmtLD)Pk>uYNmAb!wMNes+|zSV^<5}fh^XCMM+^k;UIDQ$aC%2Q2S-|
zMw4(7rzhpbg;yFQ;)@$_Nek)vV9{!M!BxW_0Ot|#0>9*NxowyCN`zCLIdT(y+WHbc
z<FxT2Pn)fwNO#fDVfx?V$lqOU>~4G=X;F3B=>h5_3VlQ4F{6^Nh#5eC^T294alN}1
zjw=OzAiPUvg%ON?Mk76@*iQ}yo_N4?F2|%k9DZ~6J&T`UM`q8y0$+=;_^-O?->)zG
zY!IE-gEQs7v!w^t_h!==#s&Itp>HFud|WU6Qr!Zcu`RoA_Ke^xW^^Sv492qL?Z2Zl
zQsjZ^q>VI~a}rS0`p;;WUsv&<y~UzBCuG}#FLQJXmzbOYcQ{dG;d^g{Db{|9h$V-!
z=xvBnwTiHnR>8VJ;;Eb~h&sd@5dk?&S_09=$U+R|tf%eHlENqBZV!B}?uY<5LAmRo
zx0I1Mu!L>_m6l0}jkg~i$SQ_-k2utAaBdKk2$HxpXpH_FM!EklP2$a;G`)?fzj@f-
z>x7k0eVZo34i7O*TomKfGB~`u7OVRTM4R&8lswS$<&GClAm0)exlKvF5oU-~*D&Fp
z7PAAxL17@E?7IV(7fK6F!zRuT&9^;4KmFXBpjj?Dj9EDG^P*^zZkcV{Cd>Nzbg+Iu
zxhQtOEUaq7YvBF4^hGZxSH~zYBi*&s*ZK8A=SRSm;I$8jFLdwW`o3>Ds3;KE@Zz%i
zv06(p9-N@7`tsavlrT5o%@=kgXLW&7XtYE8h`9)#Qh9^$MpIoXnOTJ8$@E-MAZyd4
zKdvtMttfSt_$~L)s&{HYD9QmUt><g63=N5fqzNK-M)XlZ`@?<3g>LdEELNlOUM=Y&
z_IF9c8)O-MXcsN<VnSb>q>;W``kmA_GW5WgquvSO@E05~Y}-yEi4>7lHoy-Vk?Ub(
zt=P2E``K=(4tT?Ohiz515lCSaJaRa#d!qH12EwEK-4tCm^uhHU+u#t?1UD7Kj^wnm
zWsH}{FKG^Jq}>Ef^=)ZWcH7VZPbbJjb$xYMY=6Q2qp3Mh!bJXiROxV2T=cjBEEK`f
z?8@I>GuCBu%Bb7o0$WF^amd9-tUb+-EKM|?N5y`50ft`;6L#VBTGd%x6f&)Vuh}bN
z(WLKm>W~we@69}GvwizGv9%n4T4~QAYb4cQw>2ZTLycZ<1Z@9p7w;Y2^bhMJBAa<y
zyU;Ip?m*hNe?qSDy=k+w#JMgVHRn}}%f(Vcb|0x=r*_^1nXa}MC2O^K+#XJ7-K-?*
zW-Kbp-kr;RzFi(g%a%NpSCpy>0Z?Af!Mf8&TJ%$nk}fX>-5UQ+8z?NWmfhZybdN+W
zY$<4+Ss3W~P$R?%X0Pc)R|p?60DQ0cIfz`?bPYWd0yi}aBjvpLJ(4DZui88fNIPr;
z%o!Cq6oEB3Ub~<}_`{E_GWSOlu(Z5as!!ji^qW+r?^*k+()O0Cw99p74+j#3vKGud
z@sGJmb4}OTGzSaIf1<UPKEg1j{kVyI3<}CjVL-?8e)^gBw6)=qNqyg+b28{LPuf?$
zu}T%uTBXR<A|MM|rTA1JaYQsAyzIRY=fSamN@3}sQP?|<`7O$<-`M92479^QkCsv7
zX%1BKvm`<wn2;SL`h4rw@*J?ft6+*xntHoOdiAU^WNC)mMT_p~oSkTot<E8`D=cJa
zkB2G84En*}4mz@T>3L1F+O96j%lkwjDCJHBE1er16ym2%SHh)8g7C=KjKzct3-_ZY
z8NxnRxPJ#yo_?|yuvp^;YUcZqyo=D&BM|=zvo<3UU6Ub9jz!&m!C!uw`-v^UC4EUD
znUL>0aER;;-Js`a!d}}9YE(!ZL<il;&@AV!jy_%aN7_sahNZW--G!HV+^s%Ulcl=g
zDYWeihMrY!>oDcB;q9&du&XloDQ&8c7sttFm=ES$GeeSRT~Nr-|C{pR{WXy>$Hy7t
zBxWR9x1-f_N`P*hJkTG3na9A&cZQw^6*k3r45?}N&HnnREmY5#xA>s$bh70x-KR&4
zLsL<Re^d?S9j46S)_>Q0v8wx-MEmPF{`yvj@5y%eMffXYV)Mf-<tiCp{n<d7xEF9S
zQ!sb$cDb7={gq5PVmS3~Is?t9-TA6|*?rN)GY8ptNIcS<PC_ZTG7YuthfJLW>gY2e
z{@T#eaa-b#98PL&Lj)CDVK|t1W6d-um<9ZCMsjbkUpt;==Wfv}r@cpJ+u*vmI9h-1
z{rclzb*4bPk%s4Gz-cvO=?I!X(5fwbTp$rSxKjs7p*)GZ!5A5v>~?;I4ohR*hI*n4
z%#74j6dc+uCkgFg(xowrTTk^4YhAy=jJkR(T&JBmrZM)(QqkJjn$6@2k1XKZ5-9x#
zOf1<A<4(}4kLmh)p*Y=gBI?$>|2VhD7kc+^zv_<vYkGzjIT|rmtPIc1;SyV4KJ~wA
ze|D2m^r~-$;;uX3<bd$JiNXb)lx*n(XIH^z={5g5e!8U`1<(*nX0tmVv42Y6l&%v!
z{mJWuTCvB)aHVVS-HF#B?|jG8Huz6kw%lB+?>D8qt^#dwH9r3F+|-I&d3jQc%gSEs
z@Z(w8WA0D?xK;bm&WN^~QI#C_`jCIf9eLdTzZZZS1J@#jyjGp1Pphx_sB-&HsRr5K
zv|;A^k>#B%fml`eEfhqe?kk)x(xyop_0}IoUXOxUG|n8)l5da3QZUe{l<LMBK{G<#
zse42b6EYs7=yCKv^*U9|Pejgy)o824t~qJs=ibl-?$oqB`8X!A%)B`Xu0jKeM6Kc_
za`Sg^1Iy>n{mHJICsBg*p~g`(L(BVpRVqMQOn>9KVC6Rz6Bz6!qfphzdcIVYR(Mv?
zJ~?j;OlmJZha;@TxX|JTX-Ab+Ho84?%0*$q9Uhsp-e62wa<z051rY|cBHIIBgO@TT
z@~6wdY5z`?&Tz^rZ|eHf(*nODWU*tsqQPs*a^tmvjK5YkhM{wbK@mUVzL~XRJgFPY
zkEvZ;FpPGrZ+pGH10DsI1iIoXVbs?B7d2qqHb`i|uV-CL7S3IRStP^{8wP?UP*uRK
zc?TIHVa$+c_iT8;Bh;i?u_w#JkclD*6B}*GQ?3eAW^O|t81(-t>+K--b=Ztt8bD?(
zb!`k{ARm}JadmbkZwWU?uCev0F*lK3>ol7oPr3X$J|QsMxZ_V)JNToYdyi}}*1=0y
zxs6sJ@Gh2Ha4TYsVt9anU<2v>x4QU4z}!5R&}V*81oi8u%$T*qJIkCly7g*R+dh@S
z#x)qTpcBqbl=tSfmSItYJuw(g&jmJD%^N4PfD_K2#<c6N75+OG-f_O0fX&~hE*9RG
znPya(>ty@8GNaBI;L$2+q&Hy8sq*^f=1x#pu5K#pl4e@U?pPan^aeCF<*=}=&QYU8
zWQ1+7ZB*>Z3Wwe89ltl9o}}HL?(ZkRC-dX=<Q<K%SDf-~dY@H4Jp&LFJ3^Fc#J+3x
zgCJ!vxH-xp3rx=()jw>M#|vAV!ccr414%t1(!~lg{QCLfr1xxnFP><O4sWhZ<|ljl
zpVVl~U3MGdXP58nbCfZ+<ac~%lPc_+@`hbtuGjs2g@4d3h94pCp9<CWBKG^>^*;H+
znhP2s;tN0fpbAW}9SZnx39&$LA(>QVe-IKwFe8v`;kJ9Ly?cGck=>D-BBn;tP)}Cu
z9HQinHaaxTE?+snpZ}UR;K)olYc#T;Z1MGl3wXYBe@$kx1Srq&Qga$%s;7)-J4p@X
z8!3qEkoODw86gM!+)$8J3Hc1XN8N*|MjBC|r9SutJug&KCfhCq;$JP_oydC^&x0a<
za9bCAXMwZ61NdvRLy_f~Ci+6zg&`CXnpszmYyS0Um?A5mDQk)Y7e$QJk~e<~7n^sC
z4eL&R0sK-+I307pAua$p1!K*Mi_@*Xq|IlAhrOd9C&2T{>D}6m-Q#P^n@WB7<#mx-
z?I(JlY83r+-biCD=UuY&5?g&kgp$6qzyqh`J#uRmj|;ryoN|p4NO`yvncVu8_($OG
z7lzy7(Dw!LqsyIysmU?(V*R}0vpsL2F4_2|GFkIiujOUDp&z7h<EQk0+~Lgd{|nli
z{*>-d<mpZahx&tb7m1@4bd$+<q+m<kAErejrh`n~bB9kgB>-1~NzK$1!)Wt0dMf;p
zB609NP5<2?MMFvSjkv7(nIX{%t%kq1&_yWh3aQR~gCufTj`vNFTaDkENs%V#ijuY#
zFw9^3j_lC$0{J7`pwMDoFOK(yt;xMU<%ivXfFl1FAATHfs%ry^oxpqrap&av_f$@r
zo1|jZuD-hi0aEB5I23Rw8T)21@7|K*+OMd2jQ9c1V5x@j=3}oLjWyzDK|1`RR%5n`
zX(sosYw|SEuO{AbsOqDm>qi1+jFZ1E+vg8x7xQ~ZtYDwi*w5$c51yXN{_&W92X4(h
zo$|?e*}nBDmGitY?7F!<S8(tf`bKUpFQEg3G9Pgas8;_5W!K%fZf9QM$Cd?m8rHD3
zv1D$-4o_ice&BWnapvvNsQrg)DwX^9;<BZlxPyCnW#i`O87s5qBh0efYP%c#pFe*v
zad1fP-{AJJk{_4`j2z`K@_QBAj@9w^e2&%OiDGB(*0NoDZu!>SoU+{ud|ttEdZJ*n
z9bPmYL$zUjr<(Z#AR%`>it-&skemH?*+a0Idbd`g;)1kco0NbkmqlIs9@;}H?@O2Y
z+6!{44?yZbY8DlOQjNNsydLZnZM@CT`2Zum^i3q))e@vq8q+x&#57h<+;<RNEMv_^
zU~5K4@^$r_%2|xbAm}7}^wL)-SdQOh<4mU3#M)<aqg;#DB<hpDXvBM;vG4K%pTJ;B
z)5*GB0!}%qob6T@XJ*5S1#9SwW5*$pe;{k+2YqgG?GyY55Y^kO==tHZodV4VzX3sq
z-?XSsE$}qFonn0Sd%KhS61n+DL=2P64ne%i51LpKy><L-a0xVR^WCfsIuYNA0sIH&
zs1^<WuK{eil&N!}Jhg#^#~<HJpq%)h0KspG5+Frwwz4$$^jfS4GlYJwm4g*f!SEFe
zU!Jp|R!@3$a1F$q%>;}0dgt~bv7Z0~z){)DK_$;_JxW+@&5gmx`R*n{P&;Z7@<?aQ
zF%`uGT0yoyeypux3pF3_cUUrj3y)90@fRdsm0%8<9Cv?yFpO;$D6ddbt`O;Wc*v8S
z=#?nH;eC=>TLYM``g~0JzdDThM@IJaI<I+55=Vs$*+XKHZA~Si8uC0nn{=uN-=aX1
zaKMkg;q5mjz?GURp^2&;UYel#xB1oFLDkvTqTCJ=-_Tuc8fx`Ab(VeMhj}W1pE*6*
z2>Hn?E>TE|=i^Ma2yM)gYdh_w*Y$zIH1xpN+e_Z;&qj+oMO$;@CMioKTVKu<WK1h=
zNw1;rr;1;seY+fy=KKr>-jw4FvSqIk*r}`zV{%R1sG#-)jQ%HZbDP3Kz|e$Hl&C2!
z!Gya^*{h7Yg2~)GJ(#3ZVV0z&fsi)$c)zo4)1xzj`9wnomj|tyMPA%D24G)9)ubQh
z!vZ!G*$~?fWfb}%1MktEcJtIA?T8w%4i(s}<9RwHdWp^r2Ps!x#Tv^>bu9%~p=YIO
zXdScUS>w?gE<5qe(;go9$w5g;SwvFa*D6Zxl=JZ>6&u)8c{6+eSmyOQ^8-)mdV6QS
z#-guEu2hL`uh`Uv*WR5Iw~jINLi_uIketqBp1~Na+IZi8rt~Q#G9`X-xvJASV+Y0h
z!pqs(X}kUGmSA<pHq<?yt`I}@F)hCXi_em2;9b_Meo-xB@9|(GSgO|>NntgYfGTh&
zG$k?}eG=oU&|5PI7^;whm4^9V1JI?!i|`4P<w<JZ&;O;PttYo!B-djIiGeo}WkQnO
z3Z!tBgU!WZ|C!q%FZ=5PgEaj(RwTQ*4Y>Ao>>~9wm$xB_?^%gR9;+~7gO47Ui1A{l
z<Na!?U=TKkmkZp}e&&lPR?$R1=u9rFvvq<H{SPI&sSJ*ni(WA=umSqb?7!Dj6QPGd
z3@5=UiW&REKP_vp=3+ms?P%mBtjd4XxvEet7pv;sE;j~yDe)>~CiFIA(rQ`mvK+?<
zNFRtv2KG6TGYDKHk!oe6OX+a2<K!T~>v2b#134yzGGFncRiGKDF{}1EF%z|eGDo_C
zPwr;m#EjRFrF{O|8DwXInjq(#9kQONwU{MPDzGI8?Y~pa)Q|ezZdzWR8OVx1;aq3S
zulx&4G7=w~u?u_xZpTYDMK#B92`jLyu;IC~ryCyV6frDcQInT_!v`ldS;hTZC2%g)
zW->v!m+HVoLjygJ4eJ7#4uN6B!f3($HQ<#dR<}D`Unk}gulK%KIg$o?D(>sU&JhWL
zCUzdA>2(!&hq-?w-c1WtM4t8Ca&X#Og9b=FmF3eQ8pAwgF5Z6{bGrXvZQmNYM-)-X
zxF-NL^9}?1I_t@$fDU>2AAQs;QjH({!r$bLee+s+-rZu(pc)3jGb7okA_7blP=|pD
ztc9?hMse^^GHPlV|6pBTueD#}THUu;|InMDS^RGx{P^xahR#1%Oxt<GL@UU~0&=GG
zMJ%tjxlp&PJ0rAg`SYjiyW{<%nP&<I2!V)b+rcOeAX-aJ`zc-b`vJ3>zJ4x70v|=l
zcM#XUth_uzqf*nUAC4NV&U%5W`@V#k6=bsge98NI+18NRyqnD7P5wtO2QiFJ^0$<5
z^D<(>B{ZMsKX)fu^*20o^^1YPn|7bMj}KhTZqvQ8h35~5=eT|polLVM_$F@k+cdb3
zJ@D?c-M1_b_?`}!UH8LQQVHlt85Zl4lc~3<|HfPgc|GLWJyEuxbiesaZSq+`6~>UP
z;{;u6P+jj{3!rQ@vPU7KY(24$mA}Uj6c4-U$BZ+8b3C8)?vh<6+_1vbEjKYBjJSuv
zU8#2i`HY(Se~4>ZUJ=B?#hTCk#-lUf=*1PoH^(CNH(uYig{`8%$yKqp&ZV`4)Pt~R
z4om=&c~U_Qf2&PjcgheoWn%tn`^fh)KU!QZjn=d(swX5k;297^ih<5Wwt())OtG{R
zSJ8ozSt55xu~e_v;M)y>Q**#U8=xvplD=Q}XNZEFXsEaI$x!YYbb?J3nUmjwjmp~f
zY1zv;znYtG=DaZK{3fQ7C)Rc)b8wg1$BwTX%h{?uo6#9#nFT%%eR&ply$<Zs<CviC
zzPdd$c72NqY$`QLVd@t%j#YNo4a|M&?1tAXkv5riw>sAzANThLYt78}_c?P&U_;D2
z_a5IqP3&QY(x$>V6U3efslh!Fff^dp)_*IQTa{;kkkiplgoQ}k34CUXa=&+!9V0h!
zcWpU(T`OzsAiA)jqZ@~yx5bKa^D#$-yr(%nzS@`G5Y+yF=wyySsGFI=nxsUoxfMri
z(&!>N5SXv8$F1}6YJ1ihUs_}-u|GwPj}rrTfWSeRQe(E=?i23hHN%~Q{v}t$z*CC7
z^>n?C`7u28C-of1>>J>0rxXS_CzJQ%v*vw<IiRNxvL^L~_*-{6++xYx=JuQ%ZOoMg
zvRkmVp@qRwTLsw@vVb4fFl0eukRKRb84cbo5-tj};QF1s`C&3V%NnI&I6zR)u)Zdx
z|7m^Q0dQv7U*;`p&mxJcId0{95yV=&z%<zSfDS@&{-L_K?0M27Yc7eL6TL&u%UWws
zL{aU#@4xf)o!N2-Y+;37UM!9+6+qi_NoU4m<Ddb*T5UQ@c7_3_l=aCUJOP6N^ff4L
z>4|p1m$OM0@wtK}Ikg{qC*s}&lBjXq7f@r`3nzV$l-+qc()$O(^(n&-NRJ6N<-se#
zMC47}G?-il*Gj<5i<rf#N47N4aH((39P~qOn%_)N!rIuE=H4Zwd8roMKVs^*%Z>@|
zp#FofWZ?kZbmr$R<6_U`_8Kx03UzzdSdgBW5C0|ouF0owv{s~UF&Wa7{lQ4FcLN_c
z6$3{$aws@qao6pFAb~!--7X3!JLiJ;^_bo6pULjDdhdn!ZpvVCB3j~23W1+PVJ0cJ
zhB(cvUYfI}gp$~DbVxV6eH42}M?nDMkG3f^=d2N#PbF27Fta4lJTnF`6{qwcpRbGW
zw4LbbKHeQN4v!y_!kL*c@iA-RDdQWDe08KBezj&6obd(AO%7*iV&^O`5D3^>M<hZ~
z^zq`d7zZD{J=cU=(k2OZm1{#riQ$Px2YM!e2JkOXrkUZ6VyEzwQjDC_bS5iAS0j>{
zVF-nO(PD)3&f`HvsF;bF;LQ_?LCMgi_BSwy`QRyaf~*YxcxDPYJc%GvLc~AZ@jnT}
zy5G|V2-ejN!h79T8h%85y@3kG|A1d0l~gfE%m6ZXBezZ0=y&|XK}pl4?mqQ=34-`#
znEx~jjg85NhZdSOdSe49!jm6QTQmQ}6GFF%@s3{~PiiVEeL9x8oK<?Ob-W)Rfq#s2
zbqW4~j5jtm{t?L5Mt-jGHzW(cp@#fI9j4YXP8mZ~hbG+@;R%V9a0aI+!Zi2-(|IE^
ztXTj0v7|x1{DhPXk8gvoOZ+Jeo#l~FZ^N2~MQCWXBP8LadYz3&IOz=336<Glg=R2Z
zAaQ2VRGt>mte)$RzrqM<9{2L~<c&wI<^&)MP{qJgbpR@F*j1ej31Zx~a|X9I+f%D!
zV<)q?Vl%xA^Z@vzk~qd{kdRJD`Z&JywS=Zo{Mkej=baa<_c-DLBDYUAurJb8A{)(^
zfZJ=wyW}3kBv7B6MlRm1<?{>$2Pa1coA_I{Lwymw)WLn`AeF)>|LhD6G^2Y<)jIwQ
z#YUIDYe^~Eqr6NzErSU~3f1#ZPGiENItI>u*33Gtm5{n8`kc5Mh_kIOA%_^z*nT3$
zdk3K0SSLpKG*Zy2nSEo48+~rBJ|ThOe?`#ojYumQdUcGzhB`)iZZ4?{nu1wJGBrh(
zw3oNJQuJ{?VCxHRc$?(Sx>NU~Gue&vOsSC?hoqVh`$%|YWY|+x+cmqq|I9emF_L{Q
zEZ%RiV24r7!2E7y&s+_{7(z9F8rh=O+RBJ(ke>u$F%=UyfF;g9z|>w8Z{<WtU4;68
z*qQ4r^XR3nju#UYThB2CY`IxZO_iwXlL)dY{21(jw)*kN&RXk7JAr>&5Ms-ptVM?^
zc5!Oe=hTmxj=7WX$Sw-C!P=DS>%W3;-rviliJgiX59~z0pj_>a?qOmk;Uc-kw2d!~
zbbYw+O0^;S`P5BRz%lkM9faJamDFA>ey?8~7HP;f6m*9{6AX;^KUUl3>HH}m@_eDG
z$jCfXlCC9Ztc(3g1JHw)gd=NbM39q*En)ZbFW|%yjXBzY+ionDJ;Ee4y$HaBr@sPD
zZ8~s3`{q655`_rFkN^J7y0K@~zmO%J-J*hbCzF!4r#)?Nqe>fUN8j|QGCWX^d2rLe
zq6Q`u;BCxEr2nCPyg|h|mD@mK1s=ct-cPVK5i)^alfZMTV$>8bdA*E#Ex^q}3LZqv
zfveTk)M2=HH~NGk+6rFcj)h5Yqn`W+mh-u$?+MAmP*5ahxPj5vcD{Rf0ET3~W5J3f
z<Z|(WG!y?|gmrVKpW-Bx)~#w?{X#j6T)@h#D<O6oHrkDano}tFMxVWsx0)q$L@O{;
zqI5Avz#Ug%8e*=0czBGEU;RHW^i`XmJS!nmRW?PF<xfI-kSY-GeUp<vO$p%$-B(yP
zuet2QYE|xl=ATNsCY8q*mu8xtaEpnuF-yjfTVnnsd_M`aD9#ZcVJwO9ezS4*^O^oN
zRpDti>1|}gz83<(oLlB~w6t#8W9pi=mi2G{A|Q!Y9_@e9QwVe#_8hVz0jg?~g*P=g
zJ<Ps4f~jMRA4Cq!go-+#Rpu8;W5%F%#!G};axd3w=pdf2gp$A^jbFf@t&hLihy7{|
zv&Y$VXgVtZM?p0=Ud09+6(AP>nKm0141rHrTG9mgJ?b#E9~Ytt(n%H7*C+lnecJ(>
zS*_>4gG^4m2Pta*3uPcJ&OiFzzfktijmvI1y}xRicK}kx?nKeo;5z62^Va@T0D_?8
zjYfUoAhO6a$Q?7uJ|i%&KbHKB2js<J9j*i6*_UecCl<8~r$CXk1$b+Y1N_HF{73ck
zJgq3RvVz_H`}p{Hd)_!R4|3=34n=3OQaCPefOxZ@I0!<PfCkY)BJ(>>kj7lQ$qMb?
z7XrP&pwkdyy-<2^*=WDPXW)AyNu`)GT2Tg&W56cROjD;D0`W3?K_1;qR#WI`Y)0}1
z5@W=W9>j*RnLn(V-?vull>IOB96A-Mj?VizW3S3?Ix)RDqSrv<f%rYI!pi9INhxK?
zFb`e~TLU)!(bepStn}xFTa&}&Ggd_(q?ukz1N3j5t6x6DBuzo%-$iHE8h)9EK_aiW
zRYp<+I^rF9<QViO*E6Z-SghBs#b~i$VGyVYE4l6W-$0qbQnSYF@cyLjgK@6Kv-BY#
zBhMS<u5!9>xKADi0bG3h)%v|YW=-eq7rlcI12L(YfQXdxe}vy|H0*4}=t4A5KTEpX
zBWiYYi>tmD1kQFdpO5_Kj76)rE3f*@Tx<N{V6dGL&>1!khtb=>)-M1rW(0L5EK5M_
z2PX0Ht2v}+2Dr(tkn;?d%mtV}{YxZ~+K<1ZZ@pIkK7<J@Q7Dgz#SM}%y}pLaaeV4R
zjyW@V6qQN+!@P>2s6_B#W6+!H#896kVc&Dro}ot!K7in`IdfK9yOZ@=i>{=;YpKrA
zjjtEQ;~s(I{ztXOFRr$hnh-e{-`w~&W}4j*HW+0y`NbI-Q{US2`%TGKW1~3?{iYY|
zV(OpFc@dA%I<m#SAZ^Ap_VwTi<Z!qbQEe23?O?2XlHgx;=c;tNBEdV=0AlZsc5PLa
zKvN)SFKS$3^=mVtzo4)Xdjy}Mg$TKbebc7R8G`8N%`Xpp2TaP7uvY11U0qswMoMBh
z0i@SIyFR(B?zStF@zl#kI$VBSRaXwCI-A@M%C>Q><z@s^g8F|e`jG1#CNyc7sEA|H
z;X|K6nof?80fzGG5;~RXna?YF^`S%Ov5s1DRrM?zN~VD`6g$ViqvLBc4`DiUfPH;-
z-(3;u(b=TaeiBigBiaF=O8h1xkn|v^L}~^uSuXgIo#`OhoDC9TxZnkojp^L$!69^D
z`^}V#+Lv!BcyWIC5x|76JXj|i%UPTn<d{hQhaQURdEAFkvY`cID?@K4(Or<s9vGN@
zTJ{T4QnZE$yxb^}#D!37S~<{1&W6j?1I=0g_wPz+#<ad!r2whPGd0<l@)2yn^3t4`
zKpH6sY{V6&wYFAXXNPWXzLF-n?j&QmxiM{)6rj@>)^<Te$nCO}ap}ad^9{$N#aT_4
z`L6V$0EIg&YMw^zSCrQfim^{u5x&$5vB2Z{PX;Vl&i9Q`%tw;oEAet3Xba_8qnP+B
zpG@5KJp@zC@m7T@Ch}bTQBP^e%{iT5vb^5P6%cz4atYRDjO-&7ys9euEFr02mWGfV
za}TxOlPY5?ZY;aCj!=Y6a^POo3TZ(|SwglPCJ$D+Ds+-Y`}w&v97%A*dG*Hl8CjXl
z^|IKOfFGS$$Xy$IyxHZ7BAdrg$!o;<$}y#0tfF8qWt@;-!77uN$)~EJ+DI`;P|LDY
z=+?zABL+-(cHX=>7*!iTl$8=u8+gn<bg%1oe!sMvfjYjVZGc!kpN|OO?2QRDAKY6w
zcI2wdT=YyM%e5bwM^k_#=L`A);%CZK5Zjipj+>76s_sfQt{ilVu{#tgKvoMf6tO58
zofmv?Btt8b8*o_0UtKFFA_S&Bge(hDbq1WpkinHKj@~8rKPLEGh0Z}V*nI~QOSf&-
zHFK{$G}zJ4SXs67-<@CKnPvzQrw43otOux@7w6rWjfZhf9yH=b*|_cCqex4R8bUb0
z)fSd~sr)ikE4M(;%-<N8R?;Q<{v)ZJZfLkqLlD=_lpZED*1VqvK>&Jo7?sT_r9_$P
zT93qtT1r>qhZhTs;H00K<r5Q2RG~m{?Hp{l9hHXk%R*jdMdZD)I$~o_WDzn%+-XtC
z70)>J5?g>66pBDsu7s28y3ZFbW<|=xr0mdSRbKC-B^`~;Mo3Lg6mJGfZLi;@hBvW4
zu?7(g5Z{$uXp{!=u?ERDuQMeqOfJe));me`GMN-|iER4YEhur+nNMJUp5%R6Ps$~@
z^3`ol4tH1A)>?vm7*&Qn0FcoP;-Bmd?4Q8x_F%&6{bnSbRs_V00?PoPu_<P-1e#Bz
z-8=#vj>IfBKqPZtkQnXgpAdvv_yrDRX!{@*mj~gs{*l@Lu~~^h&|1ArkaMH;q%_;}
zvh64Xlq%9eKF@y;-GA1Tf2iPpDG&qz29-Vzr*kJVEdg-7p*RLIrlu7CRB8m=_GYj5
zCj!sg!FrB;Q12UCA4DKinh^-w-1)Gi+a>^dF9_nbY~aI)B5(}`viBa$%QGMRBO3o>
z*FF()Yd<4M?a;%_xXS>I=M`g$_mYtD=8YxkOcLFTp4(xUmG-=3^;Dm!B!9vLjC_yC
z^SjQ21cMkW#64qvw**~F%T7LVUC+0;Cp^yI?<P6>C5@31yBVt%Gjx;Ch|ZU$HfasL
zZX*3P@27acD_3iD4w?t4X8rF+l$6!L%?l{Mn-~(Pd@J<vIkB;)60k<U@jWQ9+tXcQ
zp?8V@w)R2oIknn*c>T1}S_H8BKz%TrNi~)7HApxz@mMtW&1W{PhRnCN@tF1Twp<Ga
zaQ8*A{hb&dS&7_9g9y*y+$6BI(jAdKG%Fo~#*!XUtlhm5njh&Ja!`Bq<t*4lJp`ks
zQOAq)X&)}SiqT}^^`T2%{@ZX`@|YuOvo+jua(YtmODsd%dbmN+rw!`C)HiDRg%w-5
z=i%yvu_wLB!Da0RE5nRZFyNI8I92rhgvi+;n5xgLl;_CtfwD|RWmn|r`ba*}Bzak0
zY!%a7oah|l#4PFaGmtDQuzvd$G_zA7=tH}z$pmd9<iH%jtcz|u{oMCdZfDh7mVOyn
zsz*~92%_P!NzOd#YjeP)vt^|caI@JAKoe^>$226Ndw1Et0=tYa9;%uBBc%|j{m+XO
z0Q~9pDD(aWk}(DJi=&5!NUfm-a)>L5G*c)aX*KQ8R73%UJ6T!V)H9aAnXhythpwU3
zWxuGxB(`}#be=+@?Yfw^JCqJgD>2F%Tg}=>_aVRz=y@M{^x`KDQIZG?^mj+8{$T1k
zZA;ZYHe;$1Oux-^i6G{Op-wb<`#?#kDK}c%l}xb+0fzNvQ)E{AnBuN1hUF;)sq!e|
z{_=ws@Q|@$foQURzWYy@A@HJZIj!E}^5ley=*Rx1!5)si*-`z-wbs|QQ=wX!^TQHj
zj&Q<0g5QS(war>Giby-nk!W@yMR|IQd*Q-~9{<Mcg3ocDs`ObB-n|y4vGtFa>2KJn
ziiHB2h#3k8B)5bA-wQC`vE7}8R+V|{sN8qcv4$&?GTUPc15tt5&6=ScvHSaXe*F1)
zzVOBT)_Rb>o`e8?sB%P>H1T0xp~|QGjjRF#vowJ?q>SBW%vlHxBjZp#1OC^OLFdUr
zMx!UI`whV561U{oW1-2_H6do?gfupFS?T;y&r}Avn5`hB;UPbKQGD&;<!+ehe7!wT
zlA3`qEl<((_&ZT&@}qmZZTOSm8={uhvh!yWr39PH!;*SbvOz%ZU9;_hht?Efm!1f<
zz7>ZMadMZescPTr+ELh-L2qWG(2TUuHQu{pj5%}9<dA(9iDH(7G~d)no?jToytTdF
zjlC}viY|6!cQ>ov2aUwHMAWfjd7t*&NGh($OO9~XltJ6g7GG>TeOUv&uCQ*vw1Vfn
zN{s`l8w(MO0%_;(s?5Qy9D!uo*p6*cBYe*aj*Sml*Mh(yl{nSF+4pq}I-O2O@~xMl
zU+X6EX7eM9Othq>Fu1)GS&DND3yqc#h2Za-La*7`b@J#b>8a+;*-mouw2X{Ud&a6n
z17Yt!=8j(UooP#AwTylM6&D`y&d+t_TM!-|tNIcOe8lN~{P>YV&y+Fk;eU@Z*#Ib;
z{B?K?PiT%iOKEEX=c1@R>4Xvd0txTRmztgZRHHxA4nu4<KORlPXW=NCY-?X?w_p)P
zr9`fxD%MaE`e+A?;O0<Hx?0>27I9v#5qY)-B6<@0>2E0Oe52U^<<!xBvb7jh@w>3N
z7_y}!x;wYJJSZoxVo7tY*|wy~m~BW>^4?)%y5=G_MZua>MHPH)$Wt~((bSiyYT1{_
zllq&~8L4!99-MTADlW#+K_Sp<H2|&b4t!oRm!=WIt}8Ox7iR4Q;tcAQ!dlKd&&1?_
z<7ck$ZPSZDePF23Qo;K#-!j~Drj$26_bvXpIgaw!;@qjhSO1ay-#WI@0=+Kp1ZYos
z*Sae07>0`luCN!M-{@a#sI-Vg;Ze6n2#%*h$Nw~ilh6S{-M=E1I6I%;qU6Mjb%Dnh
z7i0nz{t*Q>?TyKsm-b>9RH)HIjzHka+0{r*4HYp7rb)-f#{K!4qzZ=|4<`A>hPZ<?
zKiUWxJ`rN&iM#7Oa8t3cHA9&uWO-HAQXb*vWC3EY0I?#s`u_0=4I!Kiz0c+<zojzT
zlYwi$mBGM)xy#OO<!4bUxvCcTes|l^>Y|jA2ae<oPk}OQOHnZ35m^o+l5G6#@3iJ?
zk}NrrF?qpGSkE!Xu#)k+S>5pM|B?6BVO4(Z)+j0>4FXD+po9oWcSs|Wf`Ft5C?K^I
zqy-VALqb|Y8tE?S1`#EsyFt3)+>iU)XMcNt-#_Q?b6vmpwScwOeCE7kjB(GQL8ebu
zMNY$EAo=tumisc=bQDWG?K$?|*R~Y8P?P5qlbZoHX}ODby<G|9Zw>R=kMS(|FD^O$
zydbiye)m|0yKv#^?xU&z`Mzp}sJzsQmwZ<uRjw~<OL~mEk?<#CMaC(-)|2n@_Fx!u
zrZ9LZUh-?JA>|^OhtJud@qJIif!jOPV{Ng0&wS2G_fG4OQW7?Ob-Au@^$S_zr1!g=
zVnY8stzT~n^rUvi5T0d1^=u0&Wd;{#6$apqp4he}Ggyd~T`dfqCXVjDGUZE6J5!0p
ztE6-0O4xsD#QM(Pv-ADivoOvn;hT?`YC`_XkZ#o#Qx2Cc^Xd(?ywdYkNc=EbwDW^i
z0<2LGbB3>9zaoCF;ISo|_-Wm!+<LrotP(W>*BnEd|NdrtjB%>Z<D45056^0{E+L4L
zf1y!*ewGxIOe&dh>ur2l8Q&e1#O|Fw@jl>Fm=Ifn<C7DeQi~f06E2%HCM*|C;-^<^
z>W=OJ_0mOX+QASa(g?0H9SBUAuAI=xk`donxtRD6eAuRKKZBtFVxmw47BjJN@3Dpu
zM**ZX0ZSG<Wr1mQtvp>BHMQ%cdbXDw$KQ}5c4~-wz;xq>n!}j@LDK+re`t)$>(5>x
zehmLOotkaMjk*)-s_JT)OSb}pi#SbhEWIVK`q}o7vZf?t(LzLJ_N$w#;p~xTPR%~g
zi}zc+QM9L`;hY$XY!#VzT9?~~3TF3|)7|GOIx~1xZKApIl&{km_fR)K#q~jby~nE9
zR1ww}8c(&jn!(oA%SiNFR)d5+xuk;RUCIlVP4Q()>04^Ilp|tS{F*rM)A(Mc&-*di
zYUxSn$jDe<{93UPd;KTt*AQONR~;o@oPcj_g<j|`(|&e!sFm`<yas#58LT^6fv)vz
znwqyyPwHt^7-!~IGBTMJ-Ogx^y^Aaa4lD^-cj>sj8DxfI7RB{GS0qnv++HTwyHiBM
zCYt5B{gZ5UvJ@xp1iyF6`toM|(SRA3joIzd<8)QW<!j76+OjT>9=+zXW-<`*?fgUW
zBUan5wUhBy{-Wyx^l#~YDb5&;Of$xqBljnYT`v<`&hqdkwb;IQkYcvtcGU73(Rhe5
zWf3t;XJ0fi!{+ms*1Jx<Pt(@wWfGk1-iXe+Z;u-zR_qf_pv}m-Plq0SQ$uJ~Ain*_
zRrTMR(aBZ;@@B^TU!AcEWr{z_n0U6#FSn^BCdw@3FMJszIgI^V!s42d5?@dx|Gbxb
z(wYD3^^YWPm9!F_gwP7Rspg8*Rg<47?hx5AlW17pRKK36%*Nqab6L?&pv>yB-PxUv
zVEog^bsQDRY2OmDBNbHhz6%-C^R}hgh9t>mGy4<cn_TG%m@e^AZwwAfCCSi=Ky{y9
z^^T`ww|Lfwe(d5bXtHAN*LwG(^_a{P4PxT-M<1lUKAQFq4ZKgHdHh2F`(}&rN(5J9
z*$0Jc9?>1Ss)@J6H?WO6?_wFc-b7`ueDuK}Eug=;$?8>{5n-&ux)aaWra5Rr`u4Vx
z>h;E!NIgNfo0^5U{J9g;Z)|!H^cQiZ$Vj@4tBL!u=Bcd?7M7Y`%jl^}n9+tCw@KIn
z+Sh4QQ@(|_hsR`Q;dyf}%G?V$_vHCnm-3ktJE?$Tm$)+TR|;!59R>DrsUPbD&6oFj
zF|{^tss>9byu>S~7WTyo<6QSRC3bsN%~|%IzcA`5slGHb(R8p6MrWkV;wP0UJ|%j!
za5|!ZEyH3nwUrZ*T2t<`xQSLRe_axsq1=`v)vmPV0ow=0GxTx6kBPWtjb}*i%4X(9
zS!PgLJhOY2j6SV?t?du$^Mf{QZpUqPsu{Qo4#NVII4ho$F(km`Y4>OC-nr)c485lJ
zNcN<&a<UhLv#zr?8awFRl)2g%RPhS6*ev89;P0!?Ks#FY`GmI3=mEyM<;sFGS@=g6
zo(T1t#S$B?!e>Ik#H~ea)^S1~oV^GQf?uIzO?hvSe~|MtMVUKYoi7x9%1IM6jIn`9
z%KhqDyugjenol<Em~--jcKqt~+jn*>6w+%L8u9D1xq8D*x|q-BQB$5@jTZ_{YiuNW
z%N(arMpVjdUo)9GX4XD26{nU)7pvrcEvkr@ZcZ(_)y=qwpJGa!RqS?afT+9V<mKm|
z1y2s-Urj2HN)a@LxBTMe3H?(PSvD!|kNU%+6wkx*IZi;Wv(wi-1It32@EBwICsu_6
z|BXEvSD_LhBV?wZt>iGR$7vR2=x|nh=Wqx|4)J&2j-=RqElR;jMpX7pMnC>jNWP~y
zFNQd-gloqwi*)oxN#Phqnjl;alB4zY-CTnpTI`1In75;G%T8OjPhXIv<6_@*d8#{n
zm`ZCbm%1c<nF#AN7(YBfIn8<IuhZ%G<Gqr)!&|o>Myrb&U+x$hDnR{aJ#~zi(`+5S
z{N2(rV|89rdzz4#g5s~^dYa>&rR3qv@<0PNG0V3Ufm|%Xo3fM-f@RdCuN_&G&{Gw1
zs=a?@mHx+EjY3BP^XjoZn?_(T2E9+<tTA!huvli7ys!G>&k`TwjA+_q>J*ttF3qNt
ze87Kdbi_Q`Ry4DuI<>j{l!a9@w5`;CK!AN)--_wxTSJN<d>!Ug&guG8T@}%HHZB*1
zig&SS9oajvYA_@E8E@ULtn4x%_9^3^_;T3%iG}0QkApQ-$=aCY^Oi|fU9lz6k$QL0
zd#i)9Z2F2XWv<rde@k~daS+(S?JQ1A(B=L+C>yJlpT6tGHFNtmBfm|`%RNi!=!8eN
zghTj!Jf9eRJz5WX{CntnP>Vb-fAQ=GQs(^JG-egPp`Bl}FP4|n-_9%g6Jt3Wz71Pg
zxTC1`EiLoyp&nhO>D@;*b&@4Hlzmv&ordfc6@BwbPYj%6($m)?G@lTMv@Ch-@y7Di
z_jt45Ns-(gS2^<>oI~5XL8_`Q(x`1C$MP+cfrQ{wCNn*eBeNaR@YU)4jO?5&mZE-R
zTH)*T3+Re%er#U^)E+4?hW=EZXG@Xc{Z8r=Z`f6sDJ|%Jh#NnHZqR;PPdAX#z?=GR
z3*H_vyWg}-)NDk=w2w0VW1_ZzJB71Yoe4JAS^cMdMPK{9Hz9aJL#?PB=XfKE{T2=7
z)9N6fSL{whR_6lrl3m|S$(pX;jJG}@!)+rGbhS0Oy;mwJeEjPxj?+e@nM2tgwL@y}
zdj&f+@7ryU0IGd~ip~@8Su}rKRJ>Dbg8$xu#~5vt>}E4nuSl|wv|j(DzKqDtwu}7)
zZB5Ok+f2#2y1GgaAL0yTsF<JZuXo3wY<bqfUBPwr>EtuGJQ^1l*A_hBb~YUAU8M2s
z#q#p<T=$EUz5*$E<F3}<W@44UU54l3h`%TpzM;S~$`(vsx5hW!qZbohf~z;G0#@_u
z6TySjwEaz@^9oB8yS}204TlU=+F%IVgvYO{uiq2IKk-@k8xWZxZfhnri=vwi2X4Ag
zp9;<fatmx!%=&2e?(Oc*kqAVY7QS9quR9sv<U%bd1>Q%>CgdH0S1Vj@JyKwN1u=%|
z>FGgSj;uz@f)LMB@WVqaY1c-I!MWb7(Hm=RxbQt<+!OD*-NE*>ED$alwLMuc*Dx?p
zR#729iW+b?np(-pPX`Bk(4Bxul^%P|-v{9Eqg`f2UToI$Y~@#$(q~Rddv7W<KAVZ|
zG=)$5tgfSBfrGKFt+H_Mwj6E-ACcU^Vd&~WDSmF=tCk7rPrT1(s#AP)>hC@_So*3Y
zGOX0d!K-^nBsCTL^zXeDC8EafEslq0?->Ir0wWbX#B>E^WmjvwLt^Z;qgdHmG3R?Q
zE>ZTl9sN~ydwnOeq%@8{mX}am{y2p0wbK5KJD=o}bdeEGfm9#KzucQ-Bo3`JqhAV8
zw|lv5X&=;;i=aK_Z<2fReIUQ#WP-r3{))b)u(s@eExv1>?}u}8oIl*mIFF5Mei%Pb
zDwq<WF4@mrbsv1lc;I#v<DEVt(2W)ql#~U2BE{H$8b!$JLOM|82bgzuKG-~@6#sbk
z^zUh<`icABV4T&A0@1h5oK8;-s(+ww3f`sN=Pp!QKiz(cov@4(F(|dU=p>n>ZhMid
zrn2Ewf`86(gSj>@Oh#X)U)jBtj%>oc=v5at##20luD27Rwsh7Dj(0UwB;)#W`mAc7
zpR{(n&qBzem`O}9^l3cc)fL3)64p*Ub>P-cs&u~lHvijw0mXT)(M%<3JeP~qnvj0x
z+_td9H|2|QbXE#wVsdVVI`iku^PL>NUrF2-oNz+$?JL*B_^-X&!Vu<|CvnhDO?^Yi
zr=0cOXN+IcJ!$Q%#%o<PYH`uA;r0^@)2c<^%!7+@W7_I{4DlVYB9tdp2V*?Zdwe6D
zAAc@uO64iEMci7W`$Wl5%QLnodU&GpU3#i5<V&AZTl<wUHKp_T?{jW}4U|0Z#04fO
zBe%jwy81^xD;=iPxVIfN6dn9cc!^8VVk91)FdA&i9+O&3U3Yr5VpdtejrFwN*J!^`
z%HNSkO&}WBFF&EPHKW-D$|;l~waDPX`>lIPxsi&k!}JI4KGf!1ey!ZxMQha*uYzAF
zaT>O>Hla3ar$o&+lW?3g3u=d!l9TII8Jro-Mt7&;c`=;@)ET<>jc_Z!MLJF_`MkDZ
z6T~6lJrWGEpFA>fZA-ky?{Ty4wTtaJcSuuK3||KKMnls=u8E!rN!>+%F!!B`C;MVT
zvwosK_i*30-AH^QpZt5!dem{^47*&;nP&1T6YjvfM-rpTnPj3=i>gH(-|(JqkD@;{
zXejgKH<0Tw#Jus{WfEudOvYf8W=-mSeY(-bk98{ojp2iZwCi#F-@k79JelymvDnD(
zfgNe$mhL7;nXr7YUza5C=lE@V&7PWAfB)d0*$@4RPvb<D$b;==*TRb}`SJ+zu3^3d
zQ-I&U?|Yn|in5BM!O^Td+aEpJ%lGJ6`JIDMeuwb;M{nNnf`=R8YUy$A3ZWs%Zhyoa
z>={1uTD}#T$|a(!MgwaDxI_OMdy~^Yv*z7_e*yFKbWwb<5r<<ABH{DJ-h{^?ga?P<
zAhrlhV|rSew+*<VjlCXt1pWyqP-6hWN#zb+&oJB=Chtw)wj?DG%+0tdAvUsZRrEHG
zS6^T9-2s^O;dT`jyo$@x$l!YQr(`T;kW7k^Xd0~Krt3~N5c_E(@Q)jF7=5MZG#}ZN
zs7AQ|Ayr*19*-C<GTJ@L4j-q?XJxa*9nXAr==Cvh%FNrpr@sD{w)T1#*Y~1!#?Fa{
zkF}Z|pFayqwrV1|dM(s==$;<#np)Fy0-fQY$GvAP7oJ30BPSIf%i;^8Tq9<&NxT$q
zxHgO&vlm3rYOSW;`)TFUz9)C$m8WK}I@e3RE=oEv)baHtt@9Z@DUNPSsMpAdbh5?V
zNl;2CtycCIhW1y%`wrNBl<O61?irj0C`qRW7B4D&vAB!&)PMN0GP#_#lZfpl>OS2+
zXxI)MW4Hf$H*;~#sma16aMMI(o;A1aev;u2<ovo+xEl+~YA_F*uw_u}9Eg*2QT$=L
zIrwmTU%Lxc!gc8CauI*RwMNUc{S}A&o~7|5{{*vDtmow}^3#h6WyKv1z2VKkj`mlo
zOHpUS7;U+=3olL>9iNU}&Kl;NzlKXX9K7pJcSzS(q}5)p?0zA(^2T?OaYQ`V_)SrU
zlZ3;=b=fo?XVDx6jgCTLpSfNELeg%_7lX{yP1-|bA2vNoW+N#6TncoUygqthP2-%q
zZsSeG$lyS8GM-pG%utuz$d>~L?+R{Nf3n-s6tCO_jflsRlW*MpyESrCFIH!b50aJ}
zB5qK1P^qDsrDV#)OV*__(9{&CooIgl{_VP=k<>XqLxuyFWb4_r+<@HocPHtBDvvjS
zlmo-7Ko=T!ny#pOvs-Pni{McHviF1`5_{%YCbd64@}%m`s=%~2@r#!)rwiL?Ou(tU
z@&w^L5EDLL%R$>d(F0rX&qLB3c<#~mkBkVWTuy@=I5YDM>HdM}Zi64(Hy>@-3odsu
zU`hxtE&{1MO<l5P2&@o{rz@~KSmoKeQp)%WR;qxV5ZoXB-kF}47bJbF0rX4nBr1!6
zf7z%#F!%<j0DJwZJ&p4ceXsb1E0|V_sJ+AVqWkxs?Pv17{iw!6$v;`hUj*{w$6Rhc
za8p!NJpAh`Uh|}r;9Yw9okZ7d1sb<qHRRVskMpB=IXMpxaTSp*bdSR(+6s96qXq)^
z?~tB<=<dJ?MT7!k7e{^E;Kg|ku9}G)uG2WKi0L1)AghCUh|7BAiOBikEOM~Td*cPc
zr~>g&jE#%asdiWoWz;x&UdxBjH2EH9mY{IBWnpPqdAi*t2;alP!eZPJejBl)2W!QD
z?(35WcKVJVugRSoqV?_A>PN@MQXoFOoX8p<R8RK*{@rpidEsv0arP#g%Z|2mFd{N?
z3mg?=K|_J82=Wi_)Vl8d=eqpI>d;}%#z-v$M9MQ(Y<J#0U}=+w!X-p>pUgbo4g;8_
zrO@ZI;9L(e3d}!R9&*$+Pl8Dx#{I5)I74q<zI=J<OG=6trA<wk)6PPn)d&}SI;Xwg
zPgwdMa_zx_g@Q97+B^8Rh`rUJo&FH_h?9+Kjzzz)ma<nj@99b_C@5G@3Qug|Lv4m%
zcX76f>b~Fh7va{-dMLnaF+QFj9P5o?^}6HuL*(z+z2Kpkw*fa#Ow({anN&5i?#~Uo
z%L8Ch<GO)Lnx&jP#OR9VSR9=^=L0i(+G6v*Ry!)hz>&yL8i4qD*l_7k9HM#!eV-V*
zMS@>uqK{AQ!vd!NV^ut*7A~o4Y02v7jEE(;An&Sn)6J$?pcPDp^WFDy7|tCUu9D7?
zP@sV~@)!(qxvpCpUh!#@>3w1bt}Vdl9R$9mLQlNGQFk~KItc*X#bL-rI|xgH)cwJR
zF;YWW@B=M4K1u~w&xJqaY@OTZ<>5hec8g`ie&~j$Go+}|`T2QQu;ZKqyHIlV#LiB|
ze|DN>uq~w#BUoEFq`|t@l7#60?Z3l^Q=v?flTRF&x+kAXUTB9*n#{DTx%W+qN13!1
z)z+W<_b(?zpXTtBx_SBG|M)TR_p0>Rg#YbFBVW==oq~4#fBp#LvH#zGDZaphEvW=y
zt9QxcX+nla%CK&sw_x4Kt|+pwS)RJGD`wdpD0%%Q_4_KJ^Zp{*Mwbx%QUt|mzA4`R
zCplq<2GsEWG)tRyVeXgnOC(&f)IGymoPC^VtcMo@4|JR+!zm2=?oiQdvu;ve&Sj{%
z5K~v(-zX;-ezo12@qYZ8_TZFg1n<VNhNOVH9k!#@pAsw+Y3h5JIuYs>-?|{u5;&^i
zGbb+g4-bC~SE!?-N*Lywj=FOv4a0V3YVUQiTi4Cg4fh}C)9mA!a|eR6FYDBXqiI&m
zM~9rpCwK6Fqc%3rWQ8wO#pURIPtIqr69=o7<wSONJGAxQ6q=YI!JG<NaMr>$<@w}h
z%ayb~*c11+%2{%?>1kP*D)xN!<MNF(GEV%G&0)#KGTDeYv9Yhj`xsq~E6Zu$ia*(2
zKHWtVj*TV91gy}p;$iuP0DjA(pkHVa5{^$uh2!E1F);8QJG{5N)Qxa?6B0<6-dVpG
z>6gAJlMOw$eI0QbR|D%cx)APYxmCkDOgPRa&JvCewoAVwWIQ>3F;+>OQ-1J$oFTxJ
z*kVl4%^B_P<t$Gc2di<1x!`#(PGwa^yeIFJ;`r8WBS6ReqReWf5wu`!L2cgmeW3k<
zHtnPIE#GPZFS@n6e1t)g;sPyL=}Iw|Z~XSHeq7zMBT_O5RhRYgW3-_+s0;p9LVRC>
zt~2Vkk-IGp`!%Y}B`OD_qdWUICJ(mUJd##<6O>aUD6|avb~sz9iWZsYREK5_aW&tc
z89DW!cV>iq+6#)qKRyej$uM5wVa^xImHBh8?%Ho&G-d);OUtZyk!4O&#bKr@;pAN0
zss{YF%$zt(JUn<S*Miq$97eAA{ZuC+W}=zc#+)Tsm(me@eJzgIg2`*w*X54oFVpXe
zO>-<7UutH)PiEvZ9j-no$^00@%d4t^n%cZ#<U|-Og?(#_h=PLROO1WsB9>P{U&Xh(
z^abd~Y-A=sX5|@d0?8fw7%@G<UU0v-_q<!pSn$`tfRwp8Q^)gdThjS=Kf%5o{*;iA
z00#3@C%%I?8P(Ns;K=ZExwe>bZFzYle`ouK__a?H69z&;Li<i*D@#kDIYtem%vRS`
z>p3u6XVwnrX16Zg=^JQzfoB%~E8p-hm2dL+#tq%YAT_ERCJ~(2y4O(nSB<<bLKu|l
z7T5aMSng0&K3`nZfhLHHo!Sa}wSy|&uJ6#dfS^4odM@OX7abKz;C=7`COG(CQA<Gk
zIf^i-{I5%t>3ky@^*8g;=#!1rYJp#ywjSVuS1mktRM0wG4*Ah`m;S8aU(xU+LVf1a
zt$#_Ppk5yT+J6N_fo;O8AOAC#{;!e)k}Cdx6(IlbKXPvTfA~^ce8)@sK2VtedJzLw
z$-&(A{pB{3z1f;Me?>xOgN1{a2Qn7jLGnGM4bI-E#a`3%(<7If$pDTzc^4O95D}(i
zXIr$}=vvaa{*{Dj8l4JT`efA?7c>x|{_FiCzxTe<dVTZKOci^6-7)LMHpfMy%jAgx
zjmz5opUb-++v=eDLDJCk7XKT$dX?cw_Ieb@cj@m!oXs|3glze((g&=E!$Gtn>ZojJ
zIQentB>&==BZ|YY7jP57wQ}<$w<yFB96@o;&(E*ymi)g<^`F0?!i2&MX?$|^s~z(D
zMoV52(_S35ZHa`yBA`X6HwEvAgk&VS$07(3>?*K5sRSQwdm1_fWCHzp3P|Dsfcf$B
z=L0#pYY4W|6U)mmF}fpyhB#vX86EXKg+{0Rl2KsCI(O@j-@j#G?nqx>zw%5}RCITD
z_s8hyL--rmz>%^ey#J{MKm$3w!^W6`rKRPDfzI*q@e9CyhQ8@nd=s8Kc5`zpDK8)R
z^GEUN(^0|?s!%fmu4R&`^WPU3U6QPxitDO8SIiu&EDcBP?oWe+Cr_TVfimC3y6(tK
zS$MsK9^2OyetZRRZAa4t9$@JH1*VmNUK2cUVJ1nbtc(G|4m79Az^+}uyi&)j^6^&C
zpUTykY4vY!cX1Sh7x>sY6k3SJ6aqj0+#ynoLpi2lXxLXeWY7hh2kmeZF0nyGMachP
zTO9q$5Nkn*yadMyg;%fcZMrTBS6-Z-p!|bj;gL57QrWj{!V?XEba97wFlIgW_qaIl
zSVWXdS7;B>k`9}hD!^gdY0C4Oz3JP-X<r0w+s_WZ`ac%IxW=xXgf7a|c$Dwo`-d&V
zwXowK5D+7Xx<kmUCM#vxBSj!84U1}R$fqI?pA&JLhu0fW5l;aBdSjh`4%@Ux?Q*iU
z>5EQ!ZhOBG_wg}sn6EtJsQG)@re+I+_WZA}LdF1?0>d$gHgIM!Pk3`Pj_=j8{k0L-
zH51xYFpMg-9NMP6*rH`KsQHW(f`H9!^*Yuep$U=yg7>=E#7^P?vPa;N8lG=Q>y%ql
z13L+h+Sb!WUF@^SSXwKYfy+8?-C+xdeanUUW_ME#^JdFMjNnO!efTF)Z#M7T&;ra`
z%$YJdnr!MMdB3Y(|1KMx7@a8F--lns^sP?@{^;gBUezjRZNbZX(sK2&#313I_ic%N
z@3}cf!O`y1b4>1TK?OScOPA}C7N-?@>A836Hfy>L&a5}{%C`rS&LconY}dm(gy!b+
z>FdMj?IzlD+xGRlgqY};JtgS=w_h4Y+F3-eNJp7ING9|EQ+8O+o-h4WOpOu;KN$qP
z8t7tTVnp_bO~ChX8<9yPr%Xlc$QwA}{q<4b?Ci5|A=0-<Zr-$?a9Z4ou0Oq+)F&q;
zbp<K62|Nz3I8;+V8Ys|aLxf~6Vr9XaQ90uiXyA4_n38@>OawD3obG0b&v)L<Pb(@4
zPdc4rLUY{;FiZnv;{y)qWjEP)@E}L}Blq*2Bn@@-#@o?)jSy*C;H69%6>gE?ZqI#E
z0T&!1R#w(P=7_2)A?P)ZhU(A3-(By%#X9T%_F@0mM{kH}XAwu&k21aNi3XgvY-dKl
z1i!;)Jo4jB_Glcm>ha*w^##z!P1Wr5nv4{gl8}(7>gaS7nRa3DcpaX(!z;nG;iW#2
z9b8|0>X{Z&!{_}eGOryRZYdBIvF%Lz(iRyvVVL#A%4lib1V9G;7?(RXoxV#h?$7ET
z#r*wfcxeE21BX^`i-}Df*3Zp7|FY>ypb%~=l+elx<G6aL9$q%Xy7=sDR`$v{qM4Z)
z*(}Yg|KrKQhY{cwoW~%npZs@-7sU<)o6tBZwApS(vFS8IFC6%SP(}?6*W|JUpih0`
z7b2*I?quk;L&VQNKQVCnV3}WgGpTQAvks7=_;@OK!!3v)@oaCX5<2#FFh~@t5Mg2Q
zL!fNezYW(AaR9ZjLjh`085<k>nA-MRGzXuB%m7bOQPB$<8~#pwx9e<Z_`X*(HStoh
zCE&xu3r^R=tdSFdU6o)!|KXDK|GxSD<9Q@GD`#D5Ufd}JK>*S_&)N$3xYedGt95$F
zs0{<;4iX+lM#ci$=?8V^d-(%nV}X;K=I~s)lf)-0#7MC15L}__Ub(8SZYRVya(-(n
zK|w(YkN4i$***=^ag(QakN|M8XL!*9u)XhZ3mG+%$C~heH`(>tArmP;L;#J102Tf@
zX#b~njzSqbD*7b)?u*-;9&VwDoU9eY-qqKq{@3(Fl7$WJ-8Ah!)Cb)soCE~B0O9@%
zY2W|je0fUzeK{XNF2Shq4E6}fBkrj!fANu$m6d&boN;{YY+ZlmfXF!E-zRFEAjVG8
zb8~<4_C_yzHQG9Paqc+lTs;mUXotFPzcgP>RrR~a*^tM5_tUq?qyWU{pB(nLL`A8i
zyFR49OiNF<heQ9j$Hsj?AVO43j3%V*eReifvO&O|?3Cua;VFbOGc!vJ@aVCV8sw8k
zcbWbgH7mA*vrC?|mlT^G79Rcp+)ndr_bH0lK1D~jMX+cbuG@H6N`GvY?Nv$;y73Mf
zv`9%wA+~t>-y_MtKlR?p%~+9ym=Oq~Qt1Qeu-)IaJZo5feK|BCp=UNk+&Vrh?9-<v
z(Sxz3=(^q8jnkZf?mU0~TzcFDK7Z>bqEv+wFl@yK?wu#(H_y)|FUAE>SB`OUlU3W_
zPQde%<0NZdcen!czL&)gr>?+Yg@UqjdO-nUbXN<t<Ctf5cB>nFvlC<-g@+Hh|CpT}
z?=jxKeTma)`#B(-g8Lie0X=4^H8t@S6#^tO(*R?!0N|h|o2JT)E}^SSp|#55^7a2b
zZG?e3$}weQ&k6$%X`JUi93{e*QJZwCHGqS|n|2~dh0k6w<t2F`&KLVYCGzX1xVX`L
zk(QPgxPL;<P29NAJF^e)8;ERVNyJ1%pT*pq>&;ulILhj>v(*7aK?*cU$xBeNQJ1j)
zUtZlcmT>yAIVhV8oOaC47d_5>3B(S+BsKhBPL!9*-Hctr*9}Y0ibs$XjP>C`Ti|oB
z_xhhdc51-qBbs27pJ@62<)>Ws1Vg=l-y-m#{e?DT=nrK_HQ5^h=wFiQ6QUL$5`B5;
zChm($Sf13D^)eoMl^L%us`kGfZy};av|In8?-WT9lTj0CILm8oGRG<$ZvVBFLLybR
zIY9=-&Ws7%pLy-ftOAeyrkj0^Z}@#=o&CjV)l|`YOCjTz?ax%jLk6ync<pno9Q2}J
z^)*op2g3RobU1DV;ySI=|Fs1VOHwS{Tny|mKK{#ZUK*_&2W_ZnWrC#F0?7N0L>=7W
zvWfP6LNPP)YbE8Ly#?=|@->w-Bb!@Kt9(sb-zELJty;hlcCWT+$8UsGeV9glS>5-|
z2-fvqQ~l`zcLo{NXa(>0zd=cS70UdXM-z0Q>iMUjGx*Dc&6k*0)B+cCyrs$f|0}l!
zvQlR^ufOrTby=E@3_IMmhXfPdOKrMpi<2MyL*jsbc>lbO(~r~#(x?K~wJ(Px9F+X#
z+J|1)%e~Y3jxtprMzEjb4$%CbwVfV%{P<|@<fk^H0F%ywL7bA;eDg;Ca&Qy<LuUJ3
zys8BD%l`g%l6>A6gfH1uER|(t+<Ox{rL?HD%5Sdg^e=0Wy=vBFCX82%=h92ECLcyi
zdD)2fi0(%iV+R?rdDrmAyuJ~24$mnKnaN}VH}T;1{#I?8R1@9H-+EVsQ4Nis?=2LB
zy`w}OulzpHI<^l?%sy85jwk3=-_`W?8#)^e|LH1P@1xh`2LdCOAFqB+!XV8^(5_be
zt_f$^G}^!b>+>hum6huOS3kR9k8Wu+h3#6&iPOFuWmuexFI+;^E#D2gFS7VheX>&8
z+3)Lp1A~SsDm!%Uklf-Gef+7V^o9{N{)S<RB!b&JWIsKwpINCgVwJo=dG_n2j#y{d
z<9}SodG?9~Q(4Fw>sz0n;1%g`MhFTm7)o@U^a@0f+!huViti9f5AZ-ev%bE5=vtTQ
zjJJOx5!kZ-g9q=?fGUaUk8Oo;Z;^LHMcLVz+5T)@?L{JYRvoK;@btV$Dr{VKRk>2?
z+wx2@JnfTiV&<YQ+c3UkC>6eO;d`Q`ucf6oG$SQ7WlwW6AN}E+^P1H+7yYSBA-8IB
zUOl{OGK%`*_$|bxSk1A(o@({H?YP>rE~JV;di}@l!(I2}iv$D19mj=}R(G0@o}I5s
zbwkN&^snU%84a~W$<+@&E0or!Jvq1%X+!7ZOnX*Y#Ku7I!v=#jqq0WrOlCm!*-6}a
zuH>J^w`WEL2aaqQmTk!f;y)MTHjS;5Od@BcH^ixYglK#o8&<y;ND66Ypsf}bUb3*@
zi@-O>b{4<Cf-Rrz^F+uQU)ja{TK01piuZf!ym-81-)C39Up4Zy_TaCjPLL1#yUae3
znUXJV7Bi(?v>;CvD*JH1-_(yjQZg#cjry%iRbi#1pZBuBm}KVHt4ANlT<>&tw<b!4
z+m@DpLZ9-MVE&lj6+*Ri;~G!GgKo<c?E$&v+1_r6S$BAp3YuH5PwXn+(Flz7OIDn5
z77+<DwDn~@kh&eoAclJr{?^Y+BzrZ{tk7t4d-_s=1S3&$Z3b!OsQ8TFm&wHbXAOS_
z6)gn3pZnT!exDxXi};Aa<do!g%SBiUL;ULXNv8M1`}|#X<~tPwd&Xz`>gG!>RICfm
zZFQ6GzF#b3WzEbMve=R&Y-I~PF5J1exKc=Nnsddb+x?xAR99CIW%RQZusX&0@W8vt
z_*NJu_|(a$scQZ{d}$!v8qTFB7iDU1Z{Ob0A*n{vIBi?sk)M&_`~Diivr=6MyDwsW
z#;ut|MH2!8aq(%zl%SDHR$b!x{cMfCi|fOO50LrdH&V!WczM;HK6TMOPHuUY45e>c
z8sls=AWcwWJF0h?l#jBou`vn?R&EBUattRBq6c)1AGp`NOx@146$ngW9$w!!#(H=>
zc7OPdu;WHrc{wRsbeD{Q0gduvf6XHh%xY@d2~hc55~9MNeJvEm=m%9YOpFA&SjPH{
zuHS(h7NO?tF_VfiO+7t!>pmFb={c-6;(XD4X<-}Rq4vD?Ji$GwVYu9emYSMcKHPiU
z+I*(j4|23g;+smnRyPxIg6Hj}qwDhFB)pCa;muIH1PXS|+uygFX}&a+JG@$EGpWo;
z#?$#kON-CQ>2;a)xJB-@j*gD!W@a<5g|IN*tIZFHx6RlJ{NM?KA<pZ5(yCB|3_X;U
zZGCspKT_?;0WS=NZM-tm(b3WHx<hLH=aW!npa3T8+@K<VuG6~izzszaRdB<O8=h9k
z@OHD<6PqEE@zL(qAj&XP@<#*mii${~q!V>5w@sp1%#dSIHHqL0GdA211x>zDp|)+M
z-g9$vlTzKn(e<;$3}(gpb533K%a<4xxH@T{(z?aRj=B^a;J;gZ5nOwi_I#$eNLjH`
z<qjxX6NDXqj*N^99WY|CP?`CoZ!k!?9MlOtc0Bpavz7)`0#7np4GY=}_7^$g&7S(g
zVGLmddR6^0;fz0j{`@RpcPloXylA&KLAX#i#Jc?6O&o*#XlM%9Za%EH<0El)b`ErM
zVVs+r3yq5t-o??w{}}Mzx?q?p?QLynl<boyq`(_d*f}X*gOV*S?xBMNUt0RUTMXLz
zx^LEaA@#$<HLhPlZ*sNnUcGuX?Q2jlm2hlHS(*BiC+6*%$IeNgK1n@z(D=hTg_V$$
zK-7)4mL-cPxzN0ihQxF(_@VjjaPJh>_-I@{@>?vb)#e1#hu&)X2Ru4=vg+#k&}QDT
z8<GlV=M?(LeZ%=-x}E7xv~)R+`hgLuLSE(J6`PfhNpD1+9cW8^O;7iMIQ&Vw=#!kQ
zD^eG!ZO<f%y1cNkpg{Qy_A}DcdxnRHGYgsk)&p-Z5#~J(58px|=b7rl%{_xl=xKx*
zD5DY)X0)#jI|iQF)WJ!ryMv?S?7;~Z`ek@hUJXXSRL%|x2id1>$wJzidiW>11If8D
zG2R2o_8>Xb_{jRjizy&`?$Faue{8E!w6I{^S?psX@<Rv~)Y6Y+=Dsc!VR)jsxop-o
zTe8sUDLdK?!w@g@yo><hXy*-T94GQTVxYp;(AI8;ZeIo%Lu~*7kS_p;h^cfEUdtYZ
zLR0`Jd~4YqPvt4~`;rqAX`sW#!orf$>bwlXBqB_7j9Tm8*_xml0&&P}8?<~w(q{?h
zdQh@AO@Apbj|R|TR`iLc=2pWsS~CCFGHUHzU9&KFbsJU+8U!l*YcN3U{eT`;EycSY
zUI3!g*4CClf->l!)gVGyeM3s+XN5+^d(>>-UdC?Ko(JzO;<dtw0IKnxLv=z#1PT1X
zx5tco#k731P@4@F5dbs@7VSl2n}R|#A=l!1d?q*L;GQ62VmKEyM+<TAa8h1UR905b
zEiB+?<_@mrPnrYO3+xs{7gttR7Rikp#-+MJ2bs~=jzxT)<Z4U87KJPv!)?w5tYj)R
z?;{3!`b*X0$}p_*Zaw)f0n$GK^b4SRcPwuJ=*(#|2K**BuhHGV-~Yl<MlI0cb)_D%
z{}U5~$E^|g7{XqU*{b<pc<#9{=?O;P5E`7EoDjJ}o2y}A-{#5+mq>c|*%ye$5WmTK
zPu(xVK2ofP#@|kKS|Eq7hNn!<ZN>?={4~9DtXb96<ef-0Agw(MF%RV?#>LR7b>^*h
z+=RG-v_YpwJ9a_|j7&_FZYpXdsR=4Rm!O<*6{Ecw&_cH8d;v?t?C%8&rA893Z2s5@
zBOy+y$S>VOJ{J3D%<SIa$Vf+VpIFzQQcFAzKD+{XqI!RAyRPf)p7r8e0`76dPJtcI
z{8NldM|HN3u67fk7ohp$t{NIP6tCvU|7cCT(j%`*ay6i(G5q$!3b^nHRtlmXLQ0F!
zgwQ*-wY8yO@=x8sAh>CotPE+U?O;O}L!)lRH;UCoaZGXemBY;sU{KukTbk>XPl&!`
z(RGzLI&5E@A9!Tt<Ot6f4a@iQ1j#c#P*lXm$*=SYpmN(0#ww_{V5m1dexe>x+@;DN
z)o;2AiL7orGM}uM%;dwBYxsYG0wyLUy|x>4w7dHnWc=C2WD-0BWL_W_n%{bI9uie`
z=Hp7gq>{nL%>X#O_a~zf4V9lpC?oQgHym^ddTYYnX<P58@!_73d>W^SwZHAS)m1x7
z-dMzk`{ngPL#w|pW;5ecUS@RrNk)Htu0ykZyaQ7QTY&ly;rpiI9-Ci2{v&MKNn%~S
z&Ig;+Y5D`{TOdG}Ha5g&SGXU)Htz;~-ubNf`FmC7W}u`SfOdY~e%G8KI~zu#D~?xk
zbP9|-r^j9`thvr!_B4`*1ozvb#jKcI5xV7q5i5WNpj&P*;$#()zR2+4znLllz!5T6
z0X;4(wHjQ20@DoveTxhv*!`oU0Rzj;X2sJE)p#l&Z3Vg=>reEzcXr_RIZ9M(!oVXq
z3?3(=lNA85_$=}YbW#b}z5_fnu(`Q;VCA;M;a_8ex6@a}PDc6=WGg&AUTKi!FPHVJ
zSM%oLC*<HKpJbcV{60=a>qWu1b=$fF<xXE=qPVJ)6(6OYon5P$nx^Kg7*lG^l}M9<
z3rbA4#KIA))Vc;<@yX@aGY-}LJg>C9@|vc<uRF-G9GYytYMr-FX7(@i6jIw=l)V+^
z?)K`7`Rb;AQnl~TedoskqwB5HwzxWy{J-2^=hoSY02pN}KqY-E%s3;2JhPTjfkJ(y
zP<su2@Hdl=CGptJWhE(1O#`<*O>o?9gU1*wTiqXh+*u{L0XM*4UA?+1Fxwh`37wA0
z@9(ee=#Giv$c`g7q58CVyNE^7X#t$tgc~yO3=O_yWnn>C(lkxS%llh9X%{n{ID!9t
z106DgdhgynrC6Sq&0H3Agi|UW$HX)tz6zi2;GI3dE2zZW-TixUe#it-ur0pGSpZ_%
zhTBwM1Y1=&8`{;GPa2ubtc`9_UoC`*k&DExi634<9;Tn4Ut9eY34ND+L~3AmHUrm#
zaGVR>BeB8lESNjHzi;-pP)Z^^baMS<{Bj@%Gu^~8&F?oHx4P0wcp6>a-H6{nPbYcQ
zOiYo`UIvvLGL8c9aD8CsnR3MR#=6+~p$Q^H>Fn(6)i&`=e~D;BNex}YK>C7!5|C+n
zol{c{HUX`LbKEO_zC{3;rB`>>HwKXC8*ug^Q{aixQuVtfE)-#*dkuWl(RtR3!`#IM
zG3YQ$E?{XQX2JEuq+kAv%T-7lYL6dF18tG>Bp*@7L`B_%P*I3gI$|YD5d^rlartQ5
z^QArQ@2h)75GU|yMBeJ+!BX==VhxRscGShQ1sw;K(Cd%lXUi&YYYiaHC@WyPn3>%H
z8LtV{T@g$bD#tsrl%gp!(s*pg6}*%ZtwpyU9~2eo=BfGT6SM3Te!DN1jk`Eeb=@^n
z@o@I+Of@O5zA0JkN&ciQ3H~uTw)68A?G>hl<0R!>2ho9=@;);FgaFN>0%<S1a>mY*
z)Ae1z&Vt{5%`qz0E!26{ZldaDRy^9#+4*3+tW}_-`Pp)uxRtpvz6GQcSy_x6t-O?q
ziq8PDWMT@93*76nuirCbM%H?Lw45@FS~j(2d3hPAq$sGordecWY9D)?(qCIy&0>q^
zymN3_s%6?XRG2+3vhOs3(jE=(`G{AKS<9XDW({SCivZsMBrJ^Z3mMe{7*{$H8FcG4
z|1vTq-??o<{sDE;tHtx3UB<(>5Z-HE;0mj~^rB~7Uc_xH2TDFo@<`M>XcgYB=9RyA
z@j~EsuYIZ{9pM)MWY^Z#K1H4HKBiM3g0$+o+b@@^Tk8Mwr#c+A-laE|nGsP@+rX<7
zJFM$MD6y;5OY(PCfk|<Mqks!cRAIwFhU(|Z<5*dIO{z>s81LSFO)>_L{$>DV4X!U+
zCb=}~*W7UQ_4MMy_}VrP27vYNn&*lTsmjPPU<zn?0Hlg?2C;9GF>_JK8?<j5cEY-^
zY`HeFCLMV;m=uVhu8>wnZEQe(c<{h0S)PoA>5o`A<MeBxm#<#k=I7^MA>92W$eydU
z+FwjWG^6|nrF@ft_uUdIY$V5lE%k1-a+#M7zY=b_=<YsM@FnSsldzK7Be6Cq!|2^z
z)uxwnweAZw<0hAi`=?(=Dj4FZ$Xt-%Qj7A2uosh^Nri{<;3*9KU_P#(jr0BTuqd!=
zS(+=zZsb{mS*bC#nIU&RsaeZxN0e00@`aCj>pxiUeyd~d^l!7`{+~Y^kM~wPdV9^b
z=Q^a=0}?xbTbGCU@A$HHRp_zLDt(*Qx1t{&4|1IwOB}dI8jP+J<rfl=dSEL&jCS?8
zPNP||$$R&78C1!*>hXl_;^C)KQUMJBfGft4c{3G9wUi$Rfo3BpY0GqM<?7VF>kkBX
z1`TRAQpe?bP_=?*scFV(_{GbpoMl_{9p1u|Hwh+Dnq_oP^V~29@fa0atxJuISyE>V
zz9i*2+)3a7%->VuT}4HO+EYj6YsWMXRLXVr^r-GVO_&nxL7jB`z-pOTl^(=^N0p{A
z$S~cv|3+r0Z)wB=s=u27AB;HLeQDj9ap^8MWRMs4H(8x>%6TkdgbrsCxLv2R{FKSQ
z^f}V<SUM9Y&v|_JwGeb3aeCuVN8_WK*+iCo!XROJUb)uZ-u``kW2cdJ`9bz1_uad9
zJ6BO~PvT>a6Zwd<@1L<eeqU9n6^}jHlvr|epu4^4w`-1a`K-~ge8K-C{Y6P9=Pz!I
zR#vks?S5ZE)ZEr$+cveEsVZ%`y2wYb`MQj&cBNmO$@4#Gstu{6_VxcZMQc6Ro@G>V
zos$X2jr|5jXyFGjF$|t}OZf`R8t5`s`*qyPtz_YBCZ#&5Jncol{m0Fk*dM-UjoRCL
z6s_~cM&HW9qS2x>JSs{SC|;<-eRdioaNp?jc73m^!Ev*EWNCTVy*>$x_zu;Rj{L66
zRcpA?J$c9EqR>?$B7UIE3A)fy%0K8>Q(JZ8w<sv^_a-qr$BncNj|u4=m%hg+YVjFe
zZhW~uS-x8)n#e|a_Y?kn!jhXUgu%3o3}RYZkDmuR&eYG^`j`Fp`ID~lM*T2VIUDot
z|A-cLk4vi>TyAMmBCDuri9X7;3)xXMwdjmzNtaU1KMoGP^YOu0a}b8gnTL<}dYNwM
z+V`-)*IJ)0Nz-yeqJy><V2D5mp7Q0w<6`MaXVXHOAhiBPzBnNvSSh;A1m@J+2TAO^
zVkzYps?@FBRaf;dQdg;}s;ZWjmgt0q<KWN&BL|?otpB1X*$M?383tu>A_F1jj~_oI
z930B*@E5i?SL=4&+O!A8gZRQ3$(JMFG>tdBzow=c3AxM|4WMJU&q1_Rp(eq{XvbLi
z83U+2?ay8LIF<_~SIp?y2hma}W!P@wh$V#pwfy&Vr|%y;kEj<#Xb7N9Ife0#S>QSD
zu&}&re&EU0b?7RFXf)xfx}?z0p9NUGe0t1GmVbUlm_!%{32A?*ViQ;R&U%XLpasy1
z^W&lV8T-mW_gt5J5HQ*;e~sSmm!mDTn{CbfWex3eO91Jou(11Ud_$Y5I2zE?i(H)U
z_z=l)NO6*-0o4DHz0w;-H$H(pQmR6Y<U@D2(oGHnC42krP+U3_ooNq|ZiB+6EMIoC
zSdIP9^|U!l4;DN2_xWA62>2g76Tl$Ue@D}Cn$~x_X-nGw(Qlq#1IdlK|EUF7?e?E5
z{H3zXm9SCBc1xo5&O-ZrYSF7vl9;(#qr*XHVT@K$sq@h#b(n-XJw9(jcXD8@;N}eR
zLft$)hY?es+?#&Vnc3Muy?m=XwgC3s3Fx*;gz+CNual9K^ekLhxz2;{`~Im-E#M+9
z-AAz9i-67_%>T#WKU7x_FZ?$2>sNvMne#ts?fAG`_x+SU%POy{ABCQ4MdR?~ZL)<D
zliM8oh*9D1fPj1lVe8+D(3ng-A^8B^BeEPaGA;L$?yFz<Xh4(G!^g_<Oqn#C(YRDQ
znC8LSyR%U~)X*P&C5fPLQWqB&LAV(UQJ^4VSWrvIPWnsVnp<4lndk6`k!Ng7mhycI
zQ!^u_mZVTexGq05<l4?B7^Ho3dkARzp1mQ%KG0ODIKK{J>*C?#L#}6~Fn1kUcK{((
zW68?_c?AW*tiBM_l9XyW%3`)_fn!Iqt2sGM)(lgt$n{+fi#z0~k76NL-%BEzM^#Ww
zh+iD3%*@W>9pym51CkP)M+0vmmqO#l><_awtcZ3QaEH_xRmf-%4<^qx93cfj-~>(@
zvMt=mBJ1F=pV?8CosBTKa43N$z5TS@`m3_$k!d_7{sG-fqtcPWKUS^eaQBcm?DeTV
zXJiqI*apVOd!W5@0^!Zyle2%aQ#b|y{ka((X_gt4{*mZkWiu%rDa^ipti;G*!j>i?
zss{3y5Si}THv$-;wJrz$Ze~P;uq*K`(9t4_13dmQ2}sRI&5PWG3k|8+-pcRhmKO59
z0O?!EfYWRczxw0Hue;4Vi<F?;18!FU(fCwU40GBayNV$i;4RR-MtOUC)2}yC;mbvY
zoeY?v^)oz<zV}oXhO?5afRbS*#8l=mTP?FD=a+l;Iq$a_g90;2BdAl2O^=^btco&q
z6&b&ILUP;U=kNT0y(-Eep=JNu`W?BHQqLH!zNY8qyPTGj696P*>Prgo<Ngq6n{uC)
z2gB@k44h2(*dIee+NN!%pl_FHcg66ovm079-^U&p)v}>oBIX1FIVD2Pxw^&YVC7SF
zW$(iNuTkA7Q})_jbq0b`cFedNAEiXg8*8$}$D1h@f%fO+jRfA79I9e~kcL4MlytUc
zuAroZG+wp>z*<E_L~z0cSLy^{Vi6IqAH=bvuz7QSD>^$1J&a<zWOQ+UhSXSyJR7M3
zek^WmSl@XRD{X4}Rm~mhAwa}J(Ur!Vc9kLxyC>Y5G|`sNTpte*#y@}7<x(V|cDqba
z*P0n6nG1yx`8PUiEldLQ_{40I=S`+OxC#s{B8k0onMK86Cgc)<2D$GBmL*UExGLH*
zYALO45;V(IR)lvy#iSa%nLU*c#ZWEiimDr1AH3v~!l(ekp<)1{Pu<12&AN?%e{L02
zasHJ77)|deK$OUA2>-3SNY8*MrJ?496uw%oGZ>Ip&ZyuDw+VsN(+|kR&%b{C%3Or4
zg7*^tzVMIFsZu>lMU(RJ3cD^*(a``DS~6b%3<CND9xCDBgTOEf2_ByXGLxteJuV*_
z_VEk1t#U6JagH2I-^ZmBy8bt~Z*g>hy*2fvD-f&DByhqY3yBOp`!|lgB`6uRYu*<2
z8%|}qYH4URA%p*QTR}F+s9rxD3<UcC=;AY%sbc*LqvFO^cuZSwQ&M)i-3Bd6DfR}3
zX0&4_PHWxs1l`h{4Ac9t0G+vtb2BtHRuMqnzC=-x<E2#By&S{aqN3w3PKHGjMzXp^
zCUN)OL)x@$amg^<Ko53FI_hKr8(3hkt2k&NNLVzyI3^|4e|;UWSwLsnX6XqrIRPXF
zalPf*{A11afbbVq=u;q(YMb2x*8eSN!vPC10fjyK`v&*3o-vN8B?H?pec_DI)<1M}
z6P-M4<^Stko0;7S+pYqdl*8Zo?lNX(>HSQ5f=@Y8os^pT&y6-RoRbE5@SsAIb{1Z9
znEA!vl(ke`Dn#WFFE=Q9c8>4c^tt$oCmO=CL#<~6Z~!EEm~KEsQbz|K7mh?kL@7Tj
zva|n?UV2u%PcBhXw?tLl;{%iA3UY65IbZOFG$nIto$RR!B#zMvU;FG4J1VFpmlBdZ
z>_GLxnu?_mBQ$x<1bQhL=|HoZsEI-L0|JAo>aj=DcV(ofLo)p=^oI36rX;rkPzeK+
z3&t2!Nj|`{WtJK?vebs_p^$(BQwxxC$?aq623Ih$l6OT>qQ2^z!}9qJgH}7$A`>SR
zFGMMD^&Ls8>!>!XhJ`+lDuy4cSFHBanMi%Q4T0tEaWe&_pfTS&`S51YX8^hgN>Eip
zLlxlKfdc_)kv(uN@R>i0I2BEgEbJT&*SQHpfQBU9w4>l*2CND6oJ|1nJ8jR=KP~oE
zr$td*mQll<FB?H4vON6Z@n_uuRJp@I6kP1vT!6X)1DcaP=c9>(x;AsM9ddOI4FyHT
z6p(oB?>oTJg3k?jB3Yw06h=UqG+UIa>*^}Pw*kHLXkfVP6(!7gLSO*+8NSc|4`;O=
zJG991@J@jBhqn*BHgFYiEiOP#F}#*H0dQ7Wd6=|^$ePB-gGSfQ26J`LWP7FD3Ujlw
z8)vtE3=Sg2DLFa0QVf@=yr9BfQP9;sPtQgHP&LUvdc=FYzA;`69p-($4EK6kg>W*E
zu|!5jwq6$1mfuzV7=#95hJZiJ?vT-7){F8bE<E1YbVLaPZ}Q|r2mx>Wo|2YUtOk2R
z``eJc#`lHM{(XV&nB`)m32d)TJ8b!J7BEATu?LvKE$&85rxy!_HC62iu1fdf3yv)L
z=M9WI_MGiJmu1HCk+Vvpq`Lp^bG0q&+C(+!z56k4F8hSEw0*y_G>{QjkU47SX#tc0
zN&0?`Uru&5oHnNW_rm~#L2$1RW-#yzTd%!H$<FS+j*bpPY;iD!gi<hv)*0dMxk*2?
zWuT2jp}0RWsifD{C60~hnOw=qFF$^(rTJS`V9DT<e~K#L7X}82YWskL1Ao*z-Q>IF
zT#J14;Je3fV0R#U8kiR-j-Pd_RIO#o=~ETg-C3kWMHG^soe)!>RME>vnL@uDW*Q?w
z_kE()#jbk%@#Dw1f_p&|ZOWe+T;7|E16Bna2ZUFMUbDYn5=2Wd`eZdy^Z|4|5FX(M
zwctYcH;#^ocoOJisM6v2wY-f6^c-?D?1_&}(?&3@t46cM<i++-J9jYkI-T*~^z{3A
z_SjguN#R5P5JF=CM!;6C<6^L|vzv`o*r~l5hshkKB-chnfEh1{3gpd!TTBloJ(2(s
z0<7ilk*n2k!6g|rh&RaP^FYUmG+CDeGL1|jYh)<%ro@RK%@G5R0pz1G98w7N{U8Gc
zd=W~0II#c(%D{5~XpBW8-KbP|q}GMM!HCeesR6<@<YV(cB`<waMrOBQrJw&YhfK3J
zQ5y%Eq}C8x6=srWaO8lO4Ey|9UPp%<(6QHV-oTIi&^4cuob2iEkBxLC^^fG`jee#n
zAZbRYl`Meko8Vb?@5lXIxmyxeT9gi?0MlXVL}kJKcR&$9anA*mAhUTMpvaJIpHs=_
zZT9Us>g(!SbRSrk$4#E^m$T`XV1k;dZ!r~+5qS2%$ij#YZ{iSqHYX>+A~w|2cVK$x
z;0S{#3l1ctikcb`G?Q0wAicaUL-mZf007qoL<}&E$Tow@8Kz|5s4lOqJ%`;3La8Dk
zxL^k(rwlH^nfg231bk+?N|G`_B-7e2+tY92XpI{ydVRbB7Es<&MIZc*j^^g)WdvXR
zvEoBwO^p*bQt7n!_F@2rhQKo#PoH+d)X@mo&cLEW*TTik9iyu5PZa#_MkqZYu5tN@
zZ6?0+l}j)!E}@`+1q3K$64(O|l$C?~Z?;KA7x(gXyNr3RuqY6L_88fP&CSxFXY>@i
zQv2J&6vpKtIFq5iy7!!)on+<ZwOE&HK-_Y6t_pLnfdUM6Bw!L9U0o(Hh0YMA($i}%
z<%ib)<}PxbUuOi1)mTMX0DDz?XxFY&r`2Nsa)HB1dFf^zgd_=QrpPUQt!>93{O#@S
zK~92ef0T4}V~x=^H#W>*B2>@5<F=1gin;HP`G)B<#(`ATo|9w~;4U3w0H=n00Yx#y
zfa?${kdMVl_8hi4lwxGZrA@vB#UNaQisA0vfbDI2lL>$ckyiy|65w^X9)Ley2gBJN
zYFH3%!}m<R7Fu3eL3Uv)1y_WakpuIO(Qc}QyZ@jKs^YCCtFU0+o9#v2ZL}Ur(BK2E
z2a8kitOXldPQa^xtO2ZU^T(e*orp9Nf<DN`#c?H9L4}qMT2m-5=(@mT+iDsSZ~`_2
zd8o*F&td~0J~f=*yp*VS?w!<b*Lj9?E^=Q|yn6Kt9)DO&%u6dq4GoQ>qdpI1C?@|y
zCotiD@(Q3mq~?I1zA*UI<~9?P@7c-04oLLK8dqTwxDuGNj=sLd$R`HIpcjJ=h-3uN
z+JQR0w$QF@#EOTXANg`ym~WffdaDgrTYw;(XdwL&B)%OG$bww9*ECw+Cp#PIZIPoH
z7uN+Nmzg;^+};y#f3b#^Rx1GR$j*eE#?!e6W5a(FHeH!I>1=ZysP9hPt@<K7eA(h$
z|H5kH%7O=zE=Mta57lpzbB}Q{Gn$7B@`=POP1=tLI3IVTI{hdlc0Gic1`0UUyZ1z6
z<^vPTLF_dm;VSZM^j!M>!BH}H3Dchgxt#7)Kfwprm6NXbQJt}n2`^ddJ*H5X7*x7W
zhh@8o>eGmNBH=aK6HG0vU1|||JxCi)2{eZlE7>{nN=kDG(qC2<)!6s|MyHX33pdh(
z)FB4pmY^q8*V7ARRH%Wj2$@EPCy-DOt`cs_cC)hYG}*3sR;SgX)X1+Rn_a;z%fn&)
z4^sP)iMxyB*KA`WK~mi-)>PQ--q4UfYR<nB5BgRoz+Z>q>MIBkNTm$Acob+y$*{8k
zc>~coGHQuPRS{niD7V#S8?AnkmDH)fMlW^OUz=RApYVHD=dtE$6U86g_JaIF-u<GG
zO11{0P&j)NZU_1>dg%H8&~)ALSibE$rHm*-_Lh}oRW?~k_9zwE8I=_o8QCKvtE|Wf
z35BLa*-9xYDiI>0ge0MU$JO`!y?=Z^?<eKCpZmV<>pIV4orR)A?u-Is&4u)IEo@Q1
zBgo}*sRm_+LdOiLFtDn&>|G}awM??|Q-!M0xf9|Chq)`35thmDA*4&kCUipab3o?Z
zn&6(?-^cl!t1Sb1+-`3HK?!>YvOWD?J${U!&LR>x_daoG<}>8SpmZSQY<lm}Xn>;i
zkd;@|>@m<o*iu%oY{$|3?<tH_Tlube{CVzv$V6~dlbwOQxtn5${`Xo%MKS!4pofW9
z1DeZrJf;rRC&a{w9|}oICgH|`cqg|hA;hif63=Zes5Y3Rkg^S)|2U4_w`j}W-}U(X
z?!=945b4f5T*QeH-Z$lhJ%RW|G-EcA>IHRgSyk0IJm-(byy-io_H6AFtd%I0E%ZG)
zCEL)smHqH}p6=fbd}nx7`~PU?U+<c$4oI3^3L?Q8@OwB|ox(Pm<6ajB41+k|=b`<E
zZ?y?J)pk^y_h%ZGfrEYg*9#o;w|kXaayJancaOWa03*@q?>_ZT3snk);PpRx4y*uZ
z5@w+Oj`xiLhc#q3c##oAauZWwg!MDkvB#0Jy%FO6!kS$Z^Vcd|+jQxoKiYl>$^I}~
zKR=!`;Mq*CQ>XUu<nrRo=kSe^I2<^>W@luyVtB$yc^7wVdAN0EsBTP6w^&=Nss^4E
zyHv7n%dOY$A8-FXn>Hm`cL7W#lacac2<SC&zT>VoVq#6u%-JE?LS{5PU;8xoCU50B
znYTGgV*qPFKVS7S6btZZ{hBC3Xg3#N9SB<ikd2W!4k*CP4;>=*WMdPPy`sz`aGQ=L
zDSf+g<;og}#}S`mh>;DbY)D7Ez3N^#nEE8$q&=MKUGoFBI2H-Y1R*f83bqo_JJn{M
z;+r?6+!~z*FJ&&?8IPr2&42B-%k;X@WyK6sT4c9_A%d=XKWY))pzjm}z5tlD6BSTb
zo{G|!E`I9`Y+n>T{;c|ov4PSfVdyA~fgbg~dewq|4qQy9{5v-G+y^raIh|=D<j)aG
z5%KJ}Aj-^)?T2K*+e5t_TPRA}(RzqnQQ5ud@9)^h8{C2~y-M*Ek{)<+#xbwL8K)r>
z<fz$yjq?8Oz@He|+mBpWJpX$+E|C)a=s%(AZ~4K<!i|n3s0PO3wvo_@7yJzil^*C+
ziIs|6B*~()u&{vWg0Ri?$H+GtfTx!fXb`KA0CzOyEifo+;zT0<f*icx2JRPKoOvU`
zkj+{+D#q_?wEx3<y`9MtPjLvW2@W_!LFO8f(gQMdzX2~I>p*!O0)x3X;%=mn=wNO!
z1`)`okB=kpH1hmMX#VMc!D|cVI~+ZllTve~swxTl7n1MqkHl|JkTJlMKqSukyYCGU
zrMLZ1l_S~_a=@YLA?Y@$DJgq#g+nXjd&G9`bjA-?)4##`F+%+I<>iH<v^aN*3JdMc
z-vw1b5*2{fnADx+<vqQ<&BULN0Ku5bPtwU*kK4M{hmKYr91NN_IC$e&VtF}k5AohV
zhsaxcELg=#yY6F`lf=RUqm6~n*0<a1MoX_37uo7HZ(t*=Dw#x5exPcZg^xNJb-|gL
z!Dq=KKLm;K0U9-&87$yJlW}yG78OOD{dPYFNZs-7yl2Flw4T-K5r{7o_@#M)@z{Y;
zaZxS2;WY5syhKuzUubS-&pU^s+g~}4;)X)rVhM=X4-YPpGMl)_p!YhB){_vm#%5-k
zaHqg)U=Fq1(%7Z6+}y6FsEuTs1qwJu(|7oi5>=X*&)6rIJCm0AM%Y{0clB9CV6(^3
zuDtrAcLhRua#mvpp2Ig9o?pVob{sd7Sp7w?Wqx?zHxDy@UU$&AEGbV=ClJ#E^yAuS
z4U?7Us8Plf9lU0|BToWTiVrr-UjXlpv)Pb$8+-Srb3-%9ijS;%fA>_Q#3&M_#kR_`
zJ13EN&s*BtiR2?>K4%Co`pfPjWoc>YFZZkCZ$MI$PoOT*_$KTtVK^m9a@?VYy9||C
z^C;{nB#fG1jo#ieXdb5D-`$+w?JO#N^vDr9?d68xWukB*2Qvfph5Y<(IBWKzcgNyi
znRZqih3`pLIic};)J`I?2k$^$yW0F!3E=Vs*35^kiwJXcK8340fA`{4DsehYH1mi}
zp$MJ~&^vJ8aJ|3u3)aau{r$Rp3{nXZ2%CUJD-4`1N-c+6>H8=8B+1tzIZUIo{%DDC
zgxXhI666n042mR3IY`9>`#_O&{_R`0*qq+gI&b!9T;2T_4uVtu<)H*TLg@(?A<@S4
zzIbsC(gJ2$@&HIW1lF3%i4&uD-n$FJfI*(r_vX#D9z&|536T_^<>;-%C4M#8hwtwU
z%FTKD7ts!L{SL|6iE+Ra04^j3+`E1IwXxoV5AK>NL69%h_|Mv-eKdObOTcVF_PX5n
z;|3qc{05zlc`?q?(5(J;4+>f-6?Z!$x?lzWC0b5k;3O4u4knF6l<2YOd>}8vEe7?D
zNUJA!I*$TAU*3o98?v@fq&~nFPGVYc8@-S&m3HC6BZ#?(?IBCi_sfONe|YF|2#`FN
zcC|m!fD#BIi+7}Xjm$t7BM@s;#$>#N!JZE-Xj`EEArpgOCdB3thNc!kGZ{~TeT`&O
zLDQ)NfmNfgtvub`XXwAcJ?}Cr(I3X9r2ONBCeo_Ha0=5j4kbSwvqwY-I#AFvI{2Gp
zPeTWW{pk-XaH0W&6z)djA4OvJK$Zc4|39n!%_5aRhoS6v89jGQvsaUEpP<wNwHYG?
zOa##hsxPOmWt&?#VJpWS5gbF?Ue3=}2__#A6O6D}h%yTZ3Pzx+CwX5`{gqw2HvA}T
z-7Tzz7|qwOUU}lFB)Mh9%zaR&6MEGm9>K)K1Z+npq;E4V2zbihbL$$R*()mt;#dGo
z0!%3OxXz9aEE4eCODGHsAzXa;@FC!z??B9|ku#!6bEQ;PS6BDZuqULX_%183Xp&}<
zlz931$*_2_(WU0)J;w#1JJ_SK#@F0po1ox2$Q6XPNGr>>)ljXv6qbv#u(050d{4cP
zyL{hcmy34j&2g%K@W0EG?A%2I4D|zL#NJ%P&o`@jRm|G5+aJRHbsya$P!alo$6wEk
zo_P?(zu;J)9^Vc%=h~Ei5=OYR@Tx$!-2C*|MJKYe0%+*y>XLIzh6Rr-ZWMr>sLYA7
z@ZsX1rU^6NUb-8{=JyVYY_!(NRybW^GEUrjXrzoSE!h_CYFcl%u(*oT23oQU7cSU#
z6q1})h)j+nK_2(|rtj22_lFAy@hx}mOeP6!_~)C?e&rNjcj|lhas08YZ0`jcqFL<u
z1qxlzBNQDU0yi2`yd~7shIC`Lva(`PD`m>LZb3@gBlo+b^Ghx#PjXe)dkOR$LbBT-
zYu}7vmh0u>kUN1!yb|hI-f)8`;(}D+iPFV0Dlm@4A%U-wU|w0>NB{G%g+*#w8hw$j
zy}bY&e#z|!JF)}bfj5VJ&fVL)CdJ+Ikn)t+U9{Q(5Yi=_-iwoTsr&DWQB7?v>I2jk
zH&4CajocGK@>@^ytpQ`9YiOu*(Fx5aUM<vDoY(F{VUtqhSJ7GLOq*R|0yi3g9yJ+H
z8eV?|6#}$pVNg!vwIU8jgnLL9K%7W6$gJ+54LGUgxrDJLlMoVsFocZ4c?5$YXc&@^
zhZ2ai5&#WQHn36UuT!4ylGXWPZD+?%BL10#^&cItCF5_1a|}!rnrFN?Y@>W^bTW2L
zl+dj1*pY~m0c{pJ(?S}DEIpFqDD60dZUwUJ*iE$~F%dMGk&)4>ds6xQSj|L-gOA5A
z@kC2N5^cbgVGDpB4mY99!+kh)j<(;FYK#0R6aVz1&-09|ylS>``5#4rVz%)QOI|t;
z02fkNdXX*we9+9HdKeh!`ZYTm*1&84mzVOwD>KAb4Ut_;=O?YoLsMtx&tfV&jV?L~
z9@(v`nuq!xFi{Jro=(U-6K=UR?*+761Ta?6I#=SUmCs9U(hHNR1vuJZUR;8IPaivh
znp!^bgp;-Rh1y#NB7R8f%`IA?=7dN9<^Bt2;}>SUnAigubo<ih{Nz@9erS44O*QaC
zv}?qs<vO?c^*z;($*nkiGtnm8e13&{zxA5tNoZ=R*TH8rG3uXo;N&}=iTtMKpkxH<
z2oBICjkE9JjV$HmbHDpOOXcjDbSV`}_O5Q37}-sSuJGtn_lfMoAwli<TlPnU@cH<7
zx@6s2zf0~;h_9!3H0i?;P*VDjhTM&YWL2Z=w6`~4EgS~$sqZ&364|o{wA~%D9)JWc
zXP9lC?A7dj#$#7EBKAhf?us8<j^f_GfBT2JFVdfHn3VnedRI*9RDO)<-vWt_b*qW)
zk=gQI_KmXY(WcR6+k8CG^?~3=Dbr^};VVH>3;`?=LK8Y9k?Kf?^Dy+YJJ+Ll#_R8|
z#~f4i_-#Xq()+1B2dSepR^SNUlO9BPDpbsZO3ok6;uYK$CI*bu3@S^0Uh1?T6I6L<
zKFwcI7gSjER05zb8W3>q(ku%_+tMaqMnkV<kB2W|UB`U*qU(thMSudf9k{5)E*LGF
z7xDOt*YOQMP7N*uf=hv=2eNPCMQ&3N#i0+~<fGVM+-1+#RlaV%Hsd0jnYxnRaayy?
zs6yH8TTEa}LWwIXQM{8ThVu&YSbOi^pI|c-+CNfwUb1OwfyXRGCOt$k_fVnD)Z%jI
zvuC>;{ka>5jDW>?{V6C}xV)zy?F3L`gv}?ulYL~86>`PT!7x}RQ=fgi=9d=nQ^{ku
zbKz<{)VRZG|F-|v0{phXmXYZE<yb9rDEMCKfFzNDjYw4cukUWJe%ro(3oJ<F2Y@`#
z>_fqf-n-gnnO^99-j1`01+xC_=YucRG1(K^zg<?=eL$WhZll=%cMS{f2NyF~taWsv
zXYbY(rGBG61RXjAP9Q%W0Yk%gNjh^g_?7X-pcRbv8kw^B8))A6W?2_#Fb?Ksx7(z9
zg3cRoi*uvmMB5$I%FqK69s@uIDj|x+;??)-bI&N9$+^6;5(Ja#&k)yOY}yHBn}Hvr
z!}bOH0IvC-U2H^=^F16fYYP|-dMKBj8|!#b{no@FSZQ}<%KhA;tNCo#xjc4q<*+<f
zZlq5)d6a8vkUzT1ps1**+4EPrP#m@h^ip>mO0IByU3hL^eG?rD60eA37&<oubYM{H
zGg5GZnZH>pq?_vaL#5Vg46c!wW`KX-;nw)$ZNNz`z_keozFo0Aughg#y+%%V1*#ri
zdb!;Q)y6*fTQ#_TvTZ}K^_wYbVjaZ>o^*XicJXrmkLT?@m*~!@)tZ>IsZ)j<Df|{<
zz222<-O=^e$EY-{Awu;=>DyZ8S{wjCHD>*lp>&^zmSxTZTvIf|A)x1?g~6)%!?U~k
zDc|(=MgU#dT?I)2!tRCkn}~6AomW4p^5faL!?RrPd0yOQ^hb9APs@$!2`**-3&c@R
zk_1e0SX45Yf3OSv$Ss1&1*bpvqq6*`V<RM->gv_j@6ncZK4OH<#1R1479#ARD4?qI
z=WSNPVc%{(05d#C&eH#epy6QwUUO`Yrbj8Vg_qX|GJ=`LzpCV@Wz4>D$?K+Hi3c`d
zn=th?`@cNRV56o?Sa;UwDpgzci{&BPCEaI#J^onRTxcHlOfO9dv8*pIhqGX9*g3}5
zD|Z7b&T(%)nfbogY6H~yGYk2^eT_w#E&lY&K$F0+)=`vMCUS2r<E&$$Yqm+=FavGW
z#Pl@RtF-NB8rUT@RX#456A|~zmoK4&g@6i+{GO=hWT)heZk`<vujcCVOWS*k3NID2
z=Vz0)ZIIQ;Pm)P^qqOU+^@fPY(t8ZMnSU$@xmoFLso5HA8W#O##^YH&MRVGptCx-y
zA~+W80#^4D;KO@B^I&VhZchBiXcuj{G-%nJPtyJ1@qMeFDm3cpq*C~5*prW<yqw>a
zEwG_s&UN)o!@GAR7i8&ecSv;K)UNb11Uc@naoUIi{aE^R;f;sY-ulyZ`S_I(<g}8q
z98fS|kx#&Q93O|TM?*4)_Tk^7xphlU({&zNkXls3qYSpIgSc6?Qn4^zG1Iay&~1ol
z8-8%Sop#=6G&r%zpH25oY{qH(F?Q>`sjpulN9CG-m{hEMyepo$w!TEoEdP@4(kuJw
z3&0P7{bnlpN#dM^s_D#h|6_dpc!{TIYSZ%bJ^WLbgH(a~6&RP<SpPUnwU$CNq^Ib}
zg3sYP>53aC_kFf4;&pXv47qyg1Mox)DT`?~nQ4Y;%b|^q_^PpufvLrtJsA2oSW!vo
zT3Hz%dNP%>KjFK9-+`n7;82C&CQ-GRlOOMsyywR?SevNBn#G>1{IOqj8o!gsEja$S
zcPQoCv{UWd!sk0)7VX7p3f(oayE{5LRfql!5lGQ(-2L)agSjmU)(x6}23H*PJggQ;
z>U_4vVN>6}C1q>=P#Lte+M{EAF-=`XhS#1$t@`xQ>9h1v8Ya$z6OB_k^svc8Hbm65
z{UBf<Ybb^gm^AtbxrFZB)y=K2t}Yu5&x=^*Bh~in(dSzE4@Z#=lVFzUHRmIESNO!a
zZw>DI@p&rv6RLQk_H9$x3?y%lsd3g+u`34zc?7!z9-WK_yNOHQR#f!a!s{^_i*@ug
z`_~<1D?u|aq!7i^m~SlVC~CEELIk4|KKKoh(i_%$sUI7ETRHG-#u`9l)#mK@!s;R8
z^f}ssL3fuqw6+kIV)4LD7FQ^W&94_{mU3-rY3a<PGlzjovN6iW7qU6$eDPIaio5*R
zo?ZW^ROs%;K1g11TR|fOX{|oH=#CRTMrc2rI4@Cfb!Ew7*V5S?{`ieV@HaT?U=B1g
zO7?mar8?C9?ecQxL1D+kCI1D<c=uWRNu!HLsRuPFuo*Vz=+-BK)nA#F+;>&ZNW2BU
zhY}%Cv*i%x!2;-tKvqV?X4Q|_g#_TG+9TtSs(O}0Hbcq&b9>|9476@&noK^=PJG^x
zHT1UHM<nor!_Zsx-^{DE5dEPJCpbL#IV@IyYmm9eaJt_ML+vLRTp!Z$u=}$tG&3_j
z_SHl)fNcyeDsYj2suKezr|utSb-DPs>vZBDLwm*z3}NaF+g`OA4vzoOMP7h$>J;sO
zv8a|j*KUoWD;~AM;8wl^BAo*!P3rR}8QY1BEpg>8>1G>|F0q6`<3tQi0-6yVF@BE9
z{R@wFC~)QbKi`p>dZZ{>)gH4b2ssP+QGLLC;cmX;{7<a9`#XnjZpcyY`XTk9JNVma
z(iB^TERA8;JQg^)t?K?b2jCnGQrRW!WSuqm>XoDD?M6Ux|CDb5Tto0cktrCEziFWE
zM}7)HA)(=cFa-@JA4Ge9-=3ZMey?2V?_b{;pJy&YiRIf4x2EpstobCsdXnlS9fgvA
zM73){=2b_{{>YO*hjR=<Y<@`^alBQw(RHZJaawZMI2XlGe`^he*_NBqch*U>-G3^6
z=k<dUmnRP!)jpjLa+XWocKD9d*}-nvWH~=~h0g=s3ZHw`fS=)=CaofgZ1{)6V3ibz
z(&h)v(Ewb=20kC7wi1S22yGUrA|RO?gC)iR$S2t*FkECF)D2XmP-fL!4pUcexQqP8
zg}(r65$mIJ;!VXcc1|q|3j(SEPi;s4f}JETK2{-z;Z*Y7>Nt$~nePmls*@Z_qrSo7
zQ&}#KW}EOS5YG~!97*voG~5JIc;(Mx<7XrzB1c0uem9GD3SGQZIa?^|79!h#WoTRn
zlF&PMw#rvaUoZbR2nDPYT1M2J-wBJ0j197pKw^3ik9@y@K?dEj?u#`aKGFx&G*t@j
zWqT;RtSEhuNvvLCyy~T~TW({ueA=z-)KmfbingGuvo7m|hqia=I*&e-WTVs3o=@B`
znyZ+h$~Gf=aZzZzo<XLaR{GEE1pH&0xD_LZW-jF=2wd)%jJ9l)J0;q(a&m9N^|vRJ
z;@{;`Gi#>=%!pjS*cU}x{akymiLsB$lcOpO93zhf+CMjX4KbZaLK4b&R~8y+^d~Kl
zSoMSC$0GsL4vOeL$tQ&#b3Y#g>^=l;B*Ix4L>1pV7E@^K3ic7Z;6H6H$u9z0!le?J
z4s*z`&wXn4N%b&YA$?J<was4EeWWcW+2mkZdmalxRE$m%<oXl5tyeYn|G07cw#83G
zfdbfBU0wCw@#<EAc_scX>CQ;43!MgWGKGYwf)OgMt2<KS{d%DwmcTXNe~l*BB6Qs+
zSX6NjO1Kv<9w7m$X~*3G4Il%sw%VhHsYePaax`Rt9f`f-Of=G=Ztc?I_h*0HNPWX}
zbYHKWDx?qya%x5;8~{iZg|CF=&2u1q`f57rKiUZ=j6RvSj7eou+kE?!r>F4~+L4v8
zFp5cfPPz?~6BFtt%m5@?0XvXdlH^W-YA>p(G36*DNH$R^dqQaWq^XWj8DI{*%nTP6
z7w2JqBc2Q&YZL~9!{yhj>=(a%pxd-*4p1%6IZq*P`(t7v<2Jf|#q%j_etS;<5XGJF
zhR5qZ>k)~qge%ZywQhj|>?onGNI*QkGl^Tn!KDU?2>|gw$VZxmMvbgX(g=ed0Vrkb
z>B#_tG#>*W0(#(M{0@vS<<FlG)#YhfQVIaM9_x?4Lo;@d?Zite{&j3a5x<_zEIwWA
zd;cDy(=lHFs}Mtc0+uKQpauxSCGcnU$8IFJGp<y^^!0gu7V9MiCC<eiM_o>x$`DH-
zHci0)Ac_bz$uiY<!`ajG0*IYtdd}S!Qd4CL!@r%>c=4@xHY5I{QFvMOk+h~b-7aT-
zzbFlU9F=_M4{W>|2r<d9r{$C4h=>S+fw%4qftoiU5?g_}jzc5rHDY!F+i??X0-QR;
zwAf#8GD78o92tPV2qqE`6Qikp+x79|mE6(l<?}9grB)tdHV`1kS;GE-J0>Em*KQdy
z-T(u@CX;mrHryBiQV2#WtE_Z_^)^+6rJHusrjcGbAM3p)k4hgu@MwsuyqA9Yy&3{$
zh~Q`c5c(m~p&+G)2XT3X^#R}mRKy-Cl8R#w12a!T8*pIj8-{@urH(b`6Ut{USUh#-
z-?nXi?OT0NFQnu~a2av=;&I7yk^B0}%!p*-LiIpcYN9>?A5D%u+(zA*g=@NfA`zBF
z`@D@UEHXUh_5J+((hapLXzZ<FWX9qGIYWjJG-8%J*`Pp$*3Y(a5`K9Ns0(6@oC7CS
zjUESlxVAjkcpIljg3Jm{roM32MGweuRuKOyux(ouG(5yYy4!s~7dJYc!c8t<zX>xQ
z)FUu&U8KolFTjTZ_x}|7cjoVosn;B?y05z8tk1Sq!&3mM6geduk?%yzvS62qeuywM
zfE0*gi=-C87^t6;k(IR{uu6XLPj=}2YE~iMf;=P@)dd8ePlz}ar7X~Qhnc~LWLmw+
zOX2XHtTp@Uef1|~>yLRb8`({l`d?}?@xhTuvXkray+N;&Bpk_xU-r0@$7{1l$~0L?
z*Kagr<4qu6jc{n2dapft(U5}=0#-`)1}vmu6l56=fj=fp2m=hGB8e;m;8I~(9ghAf
zB|BTZH-|GI$G^s{X-%s7T2^`c%1#`ErZhX}mX}sT;^X5j8-f%JOU*dc0kS%NJ37qb
zhcj`%M?7JoA^kbLh=3t0NqqqLSiT6_Oa9<aqVYtq7zymHC;v(=5i1Mu82I!SM;X$8
zV{)fd`e{>YL7jE64C#sIuX|A>ELh5fux<z=lmObABpwwah()3iaar%qk<QX5lPGH1
z;!0eD#<tt=D=4r7!_d}VZ?pWauk6N+(#FPo->EkUj#7sj2iRkcX8;}?%8N^Di9cTA
z^exH5HI&z1$HTt*4*&~)|Fr~4w5O5}643P?*xX3&E2Jz$b_J>iSDS7^2@Q2hPN;za
zJ+%EjvUkhN^^)?9X<f2~+gWTuCeG!?iCP*_QO<8kW`|B4cc2!**puwknm$C8hWZL!
zr+mts1lK0eZ2FKOpdOx{K8Xjp23&}#sI;n)6m9sRy{pu&T^2GvI#IJoUjvq1W33_0
zNgMUuU)eMawMN90|KQ=2cui%8j?+7zKjPYHYiV;RrqdCoExchhG%QB4CG=TcQ<1DO
z($fLTvCo(!2JIwt=DiiT8FX&<8{0XYO>8$1HZza?bxot<>7#pXukRg9YQ4grce;E7
zm$alqaFx7`|Fxv`8hdRWBNKtdk%j_Iw?nlh60wQ2fjiV$c+E@J;i&DNCG=fQopv>O
zoZV9rVY8alQxH)BrLq-Wo5uh6l<%ViGx}z}uI*y{Qu47paYW-&Z67sYgi`zNP>9mO
z>qNB>P0uOsGr<7TP!PfcyrH-y&<NukASq#p5J7636Sy<dIw9Awtd6oyHQl}ztq$O%
zN$bNVCSg@m5UytNY-8q<>6#GGGmR=SIy3wG<rypK`nQGJ=S3wX(qC?rx~20xPlXc(
zTL|s$-Luqj3RLDH#_kRk6RovFXlYG?vYs6u#hF$Jv>yi9^uj^`2<D8_3+(TqU`ydC
zu2W^EQ40R;gZk@$6dMsrzyNT-=qaw+w#=Q(aKlptd<4z06=&4^g_kFeJYt)bGHKyZ
z_pvT5EL?lORiGUmXXj5RM@O*rxV`wNUB_$X0`uc3v?|z8C~`C;oDG%dDygVQz}z@J
z4(9r2tPS7ScdiRi=BaA@6X+9M#`BJtNuX)$Zwyrf7N2@Y&Y%cs6R%#GXJ==_iLgS(
z(*cu)+Dt+3TG5ndz{K_I*PZfxtxL<wsI^Uii=CH|(P>IyQZbFr)eSG;i%6kH1DXEB
zV($rbK}z55X^Ka*$VSyj0iVYioCWDh=Mh?>R)p5B`SUzvVV$S|I&l$DW0A~2?8Kdm
z;5-jO=(FJw`V6AJM0*Q$V=TTTnrrmi_)Cz>Zd<*Wk+FZhqtF(9AdRR(ZDq2ccuf4g
z1AG>mD7~=-R5j?uy|{@Yl?W+_0s}8LfWF?oz8KiR-EPnMg)aKZ$UOOa29r1zYtU8$
zF(k>9U}ptUBj6x`)oaD3!*_R%fzATk<XQ2(y>gQ@lE0SBye}H7pLdDWuzU5Lfm=L2
z99tEJ(_HjD9~pM{=1pVOqc3tldZ2l+d~l{R_dQTt9HS(E5vTxg(=kYQE=+|YV-UIO
z8!nvq2SfBurpNa{OS0Cw&ncbr5y@qH?UuNGGf3;x&=3*s2};Go=)-Zmw2XQ0=U`-X
zg#wgp-k?N))jq{dyBfqn8%1O%cnbIeI78l+$KRO=Fn{!AP0WYqf9f>JefDX>2VY#+
z0b&Y|9oxv{0IFd*qTPT*4oj0jGKiXyk(Wo;!7nSA_9M|I!kKry$i34~58w9@`!c)h
z>}XpeIf4P=D}<oJF;%_${%hv5<0uQaEnW%_4=>wwM!aXik|9nD70JmDb-UsZF|Vkj
zm?7959!~1P1jQ}nJ)OL-hAI`DFsuYn7yp4C{UQv8fa}qqjMq(7jRII8A_Ve2J$(ul
z=he!}9d60sU(o|Y?o&*6F)K^#ay-D{tH;{evIKrk0*^x8Z_zigH?xp69a6M<#E=LU
zftbuUP*IU`5aEKzt-=e7qAlaZ6-!Roxg<{1(5N2eI&tzO&KG2%`dz>|3;`1noT2VV
zPxp2cT39TuF#fy=P<-k2+ODqG60Te?D|_T9GwgjIfW?KjBM$LCMeF8a2ZS(<cXuA%
zj8E8%5DU<8+YU>;M}8;Q0c>LAZRG@GvJXLtWB@fNO_@0#`~}>4?@t|!KqUV~I{YV?
zXwJKgBN+_q66%CzKnjynQ==Pgeys&1kosH@D*H95edhm+dPvlGCBIQ&*TFwHOK8^V
zuB8!T;HD5@Q!be1;o?faI0U_t-hnfsa^{{)hN7@IumXs<l%o&jsy@ZGtv1yuu_LZc
z7x+yj0fp*_&EQ&*_lFcA<Yca;qsik*-Y4UKZosy+mNH@|8^42SZl`1X+{hL^8TsLb
zZCjBXY<j;x{_J_<2jG+TsXRdY^3%j5nl>cvhehFE0Rt^<?I`Br#Kqh_9(l^$Fy?{#
zU-~fVV`^<}06-)C%6eUo?EW)N488pC#F!^X_DKIh@k`XCmoBmHs#=2@p~l86Du8L`
zj8Lm9u9}%AQ77l6+hL>@b`G@LcddcaNq$<j?F@C4PQ_kflx}1cMjyuv_j!oLZ6{VO
z4jz0m+U6%!?rmZ+G5rNQwqc;p`4jEu<Fs57#uTd?Vsmpxqil~SGV;y3ogz9n^unHi
z%2G2jc8#n?KDJj5PV}on!Tdyhba}VTP@D2KSU<*5m!il$GA^gaLTwaM)ZXo1#Wcy3
z5Zl-NG9lR^%GkTb>{h}XL+L7k%W*EFE;G;BD7NezP=q@Rgde+Va0%aU`yb4_Czv<2
zbayJH#EKW~>%YcvJM!kPH)VO0I??@Rk$&}v<1}NgP@IUTp^n(Bx3Oe&Go`T)_ucmd
zF+0Z;6xZJvnf3(#|C54_Dnhn^!DG{17yZTyg%0Rof-S|5!C3b8Bp?cf)Z<#S-jDaK
zY0s$Szb4wpLLETmVpd1vno?$d3`$6Cb0KLyma|A%MEvLEDo}5n<8bMz#4pxbO32u^
z`VUO{Y?|^DWllX?Z6Yne*Y*sY;pNK+A^~}LAZj0qC-m17b&@kD<i61Im{oMzPe*EK
zvZ;%#eLTd$?@#q|2HCq98iKZbPI)%3CGd2)@@31X@cY0eX>v9Y+Yx?5LLu2XwccwT
zUEe6oOne_ELFZEpQSncnZe2uciqxlQ1!Zh#_DF|*$gFLLJxnxSAi!oeq$$PIoc}_>
z&uDhK_1iwMyFPKdt(T{^Rbx0cJ6kbf=i>6}9M`Zf?~VNM@zoGDY^pd!o}6yU-MAs5
zMym3j;Vp{<Uzl1X^0t3soXtrwy7T^P7ur%wwZx8R&obnkPR6+0n0<$Dfuv5^X)8PC
zh=L`7Pm1uajej$jyRz|^<QY{XHd>b>k)HBGg6g)hjd6>T=pa62wnbRRv@9bD%(Mi!
zw%6oK|Ilhf>OuV%sg}Hnr(K1d?;#|HxPm*ATZ$OHPQh?PD$!HYJ-59=wlZ(sVyZsB
zIWbFt&H^%yPo6x<v0O*GTRB9>^UKRky!}t+N6|@+8(Q{yUA?+@&CQ7BNe-Dml+oiA
z?W1HPYW`g@6_{S-fV~iRYUt8S3OBN4^ZokMZ~rNLH{!IID`{dh<!2il`UbH3(r(;X
zls>-}yFI3e81+Qy4)wo%%f792qK*_rWo5e0_c?r(91U^)<Q2uwH>eUNDXgS)w*Ls6
zy3?$DeMZ+`E1rBVj%PyWu;Wm5Ox%+2n^ki9m~$Nvv7cVE|3r$eQ2aNdQvAoAWK}o!
z`#Cgf4>>GOL*R)O4SCqb<?5F-W<Lxn=nHLtmv|>{`N}wBEuR#-R8lUE1p^FtUuAxi
z-L@@pQYf{s_{Sb<Ek8S6J6-!xa3rx-dac$i%?a);8!k|8x%bFUE{OVSIgD=TG9EvE
zT=v4HacHNeZL5yF>7NG68w{iK_T9Vw9It)*YlO8?zT&b*rd%NH`Oj?ZCg2{9J+tTE
zx+IvWAN#WC*g*Tle(t9OZK}?<W{MUtb!hfEQ&+K<Am>c&NMfIEW9y%te+)Bj35W0H
zHL&xmS+r}>4d(f(8h$Y0$w1n(m`<1GxX*1feyI)}>Mpx;x}Pj7W(mA5^W0Ms&q1Rh
zy|b;YjG9VEpa0XD2iW(OCRdLhJ?g}qRIMT?mJZst>$c+7;XTJ_g{}K0;XEL1A&^v&
zhwYZO2`^-}(j{qEguly7?dqTgt$_wHJzPiGx%kKFBK|l1PW9cLZ6?!3I%5&?s!~$w
z@?00gb*Fv92V49EnVmY`_e}ceus`Xh)XLFnB!X+e(|13ddB5Mx9)e(SpRrtvzgb85
z=oCjyp2RNldQ@`kv=4~yjk63?8RBy*1M{Z0K53I^UDEvQq2YTnqC#{}1g8&;4tFLr
zZgfsT1>8OsVNYK$`*XXt1x=KoX?2C0RPbYss>_Gtb_Hn+pRBi0ZW&!1nGKo!t#qVu
zUtPOyH*<Sp+xh;51M;v?GqR`HDIe0ZPbhZep|?NQdRm8lvxmrYMniR`sH!QK-wdJ0
zAicZmQeT#{<p6Knop=YP3~pq*5sjjhBFoE3FW24|8!RknINQxAR_{G-+d`}Ta#Hte
zEcd1A9}c&M_Hk|=T`GFG*njr&$$|D7sstgX#Hc>*xZMsj{Ejuf4`UKo?FTtd>^BsO
zKRn`3TVX32DP~yQ?>~9!)(QLeLH}vT&w2XKx!+WpxcEv0bLd=;*LYzo*%#(1s@1`z
z<K)kMd$yykZN_ewh7cR2MXpkdQnV)j(vRvFe_d+cn-@=2t4}itR@TlIcI-RSY<NcE
z=bqUWv=ySG5$96;*bGAXKT%Z2Y*<TyK%C-XWA)rUsY!PAb>aT2In-+2HO_6L1}$C%
zi!vpbn4*&`TY8-nK3gYUZ3I|s<@?&AIj_<+yX%E?-u<mNx-}Y|^-D!+!iDJgXoN1P
zr#ERxY!w(d%BB+e$Vo0Fx17~eSzk*uA?mwTB-3z7(|7)!Eo{wv+Kt);O>xX2s+WzK
zwYNUajveumc@HV)^SYk>78XIOHs-<EwXkIY$(H&0Hp?wp>cwG+<F@iU%1R1GQcI6`
zQnu><zZT$Di1Of)PHx4Ee74e*Ht{##VMftI^9$%2)kkMb-06ftR~^l`=Hb|>m%LBS
zgQ4An%!>@IniQkaExI8coC(3|P-8v>L5GVi9=V*8`e}3P(8|ttAHDAvGJmN4x`&6g
zFUiTus3d5s*lK)ybk7pI;Fy-EFF*oVoaN-?kO=TwbVk(9D#;*+ne!pTdlFc!FU$n&
zLT5Npi)!}W&Ua#T@+18KKha==*5I#_Y5J9G>2%elv`|!#L%q#GpKWsjPk(%2hn%p(
zUYuXRzAzZ9<Hu%VojhKfg5U+==0f%9dr3k;?2FG{@|jjAIDchWWIcQSm+BVo7!7_1
ztDNeZ=jp?R*M+ISQhG_jbY-%={S8J5WnNf{q;Rb95?9VD%+=kojbRhr$ii=G?UHEO
zR!0q!Xyu=+zlG_PbcU@&;}fD&sx^we;<<a{G>Vzdv+fNrb>TRE9SN>98>r&6o=bKI
ziTgli7?RI>y?1^4T|xG+UD`bj$8;I=Zq80SIIp!=<y@*T%Z?GJjNsRJ%<wMY9*-dh
za})!gCJjr$k4=VO8d$e7dVP5z>tn4{K$n-W%zNpBVW-P(8a9vO>ep_qGmH5atLqZD
zeC4)MN6GW`Mr?GBoaYW7NA68#?!Ip3Ht`vK%J4<GIr{=*-I5psHuWvKAJ<0T%#BZe
z8SWt$8=GvB;+T+{smG9Gg#iLs2Q|EfmG=D1dV5l;Ak^}HiBawZ>$S8*JWsYMXOVGN
zUZpha6PS0E-4qLC!_ZD^6cmgk&e9fMrh!(`#)cQ_Ur4>9umNaqrBu5773mfY4XVhy
zGp2U)`+W0sWc%1P4o9sWxwB1~X(~L1<6e^SZ8XiAK$;1G12kKv{klnjkn*X;>9@!p
zBwC)H?W#PH?WM+dFk%qX4p@8}s-{TKg3%8I5Qiu4lLvfvSW3*=apD}@)Rc~c2V<HU
zovy?g_)#{i*h8&@ZsF4HP_612fJa4BiqJHeT+7!o^c|H?h^jv2;&KIg?u3MdvTVcZ
z8O=UlY`xJRy4f^NMV5UQdiMtiFwz3h2e*#S-gIc+WGiZ}2+7!!o9>auZS0AMG!%GF
ztYLKE;JoND9U<Mc?aTnEp4Jtu;*=}&qxU#6=0_AAv<h5iTzq&pGG>S-812qE)iw2?
zyxdDL7qwN}?LRG?ytA7UfvFN2$yn6KG$JB%-RS;VTq{YO7ZDltz7JU;RP#@6NfiFP
z#Fh{46CN0xNJ)Au{tal6h)$dc$bkVC((!5X$}+5FX+l{f6t1|Q@}1?xqnG2yZ|(gX
z=MWgF>*!BgdYU`@O)|HV@aabz3yyZ}D5CnJ<$pglbqnACun><%XLDteIp{XHBBc_b
zEtk!Xb?d(`7~k<^tf~`6vyHChvZ?aCCI8QJ>lR{`zOeQ^=H0DncdJGzOqR+{YXJC%
z8Y0)9pE+H~=dV!kNY%+BgvrOpcX%<t>xhJzhM0l{qfVTRhD%E4(4s(-m*&kK9Fa{a
z6B9RSG)$PY+(q~7Sxlet6UP9*y?rq??zvsRzIw`Mf)0U@`i{Ozh-433QsAe_1T<Gy
zF(?EFV#VmXMX(S7p#E>zj4~M2nvyq5bZMO}DT4blWt}&nyiH|g^49tfOqGh15@s$D
z7>n!voE93zdaHUXRNoi$o?N>V8&93oxca`42(>YygQm1sY@;P@>$P8leSQ0cwmf#b
z4JQAeL!MAFFJ8Tp6AM`CW+k9_xj54J(iLL5x8gMU)pMz#1ydq+jmzn!(L<8lJp2*L
zZim^CR`m2F08(P>)0TR%_E{^)29hytWx!Cx=k{hLXItBIfYt1(Ee+$MTUU5*m^RNW
zGEr0N)IXnDJkSc0-`vezj__MyXC<Qy(ZR)3P05Jd`qg;%?x6-hy9ovFPt@q{K){X6
z`a|@BG+w{{Rh!UNUnE>w0w+gj3ysXk;&RO8QdbU?c+Jdw0_&TQ+N6t&65v<RZy-#>
z0Ov$q)l-2-BP&q<RUEutOpjt{a<ZVY(MFX7oInOhbWE=5@@J;mKr8_t0lfwrO=<zi
zD^8#^Xg`tGh96-&kTW<R@2C*?Llmk1+9m+S{2vaM-Bv;d(EImFCne(ojjBgwq@?)z
z3`S@2Lw<{a_kg1iX~FP&Uu;!cUS4+kbmhv%(cpwhW|CY}<jMg}a!(HF^#J#8*suY5
zz?x88v5K2e>>q^TgC_z038J1tvuQ7sXMoRf?+Jf&;|8KYI;gWm^8DAL&4Yo%DKD`0
ztwiE+tf1DDyKA8#h86%ZCQXw*N&P%wxPJCiqF<B~UKg47%3&Bz;T#9m+Ay!}Q!TzA
zSI{PdS|(lq>@g%yr-zyaHfiYp;Uulu-K4uXH9eg^?^E#=7>gPd+5k_zK<C--&LOBA
zO!TnW;2RL`52zd&8#Bj|-@T{mPu_N^oM&XZ>dO1ZRZhr4@lo_*Jk0B58D0u#>Tzu6
zQUGsqJy?%jT}wQgB0u@Mq>c9B0GfftQ$My$sJ`jiJ1sQzuGs&zAq#{wzn7QExEPol
z#wSbjg#qz3G&Gc@D+s57%+e1H{d@28b*L$L{|xi~B0AR7r)!JP&uWOSQc+QnI9Vr^
z2VQob=hD0AzCBo+T6wxTwBuz_yJ2>nhq%Uc)ZIbPb+xp~+l4LICccgsJFL4l{5?Rs
z)QlT1ctH#g$L+z^w<J0n>s0X;>dfgg%PnIc-7OUxvxn~7DL6}|V_+S)SJPxdbFbMN
zZUI^@CGYez*H4))KHO`u?QZG1igUCPAv>u!I6F3NGW4fsxE2FTEwo1j|0>;4?q5Mt
z6VQxf;Ae<_{THBjSgpg5*0zs%+bK{b`XU?(GBv_Bq3y#CyMnp6c-r|3vAMnL_xiyy
zO;j~l#xN#fZ0;uPcFxbje)woON<lT@M5as~dzcOKMDvLP!1A!Lut*A_FiJ^B>dnVf
zo(^Jf@RTne*$&Q~#fO<uplzkX?9{BeMaF+ROY&Y4RBG#a?XATLS^_XqUCU&Omj&~I
zehIQ#qQk>UwR!#pXD6pV^(|*whXp+*G2a9R6O|%s-Wf#;3nFd6amjSzhY&Hn6y3ae
zAv!v`cEq>rpB$m4pBQ}!i-T`hdRqmcC6v~ezw^ak4yiNrGGW|woyEe!sFPae#2bmZ
zxw+`q=j7R8IYqF8ATXeBGx$$W?6{9&%xw>x!Cjfs>n=TAkA$w>gl>6~hlAp9ErlVo
z63gksT1iMkfQe8}UyMxP1QS}p&L3aedS{P_2vun>yDtW)kXEHR(W?qB4y}@5<X@+@
zC4sgl5M$Wh;w)6yO<&JEol#Iw0Pm$CetCX=-bpCC?6bX<GkjD<Njq#Bti}ylx++Ut
z;jHcu2*q6|b7HzO`Im3QHZ!U1QE?(QC+9rs*T9lOQQ{8wrpNZ1x(Ma-?(*^{iw6%M
z7VA3(z3Sn^SpyERqnyrP&NWvB$FpCNWTyAZ9i}s$Hh`+U=iH9ALlp;b#C!E#2J--f
z3nMb>BsQ_(JalzU*-(HN8U(pyZ|c(OpR$oh2+ypRcrH;@d!*8Px|JvM@@0Cu8@@7I
zh=_(=n{2)_GrXty&!lx?oG1;nvvUF1dJ*9472tioDFYwmjZ%uiuvAu0_pYS`2~0qa
zQqP%6tNUBnwlrvYEu{f%I#^)|w`l5oL0ugOp(~0D3pKuV;>m(@@6bs2EC}7(ukk&N
zouHK`CMVegdE>*kY_bcOvDv#W>G4&#>5imsEMeOjU6q!R)*SvtAu~U4?T>;V))}le
z8a?&eCfcFhRR6+h2*3k;vIYir_Q#I;VcUAl0v{g3gJ@sPMZN)#|3?ZqID(%nC;<e3
z7%%glh9q;zJ%3@00pUX-*I8VAQ>E9~A_G@OH%oDJh+A<Y#`P-8K>at4u3;_O3RxN)
z$nc>U*7fDmgFW>miPNvWYu}y}oU0SjR;#cK5L;SbTQ=1AH#gymqM$Kz(-F$v8Q%4*
znHy|hvo@%AWGG)-&MPT*<T@2Dk{`C#)5&1;=k-Q;bE*kuyV|#9xTY7`YHtCL2yBVF
zkejPqGH+oq1}RR#l`H2iUcA^lP}hmm5@Hp25UvjR!BdFHgaa6ncG&&HB!VUEaSQNX
zW>sUpOgOapweGP>mtn$&)tAn?FH0@7Awhz9vim=<;~YP;s)n6!13$F>sEV^>4}W_7
zZD!{4ivmrV*Zb?!<%QGsA6uvCaNB=&<ldwj%v8J>BlT*jbY1w{WA*-C=`oACFb3Nq
z!q1dqo9R5Y(Oe3#*u(7>9S{I}y%*?V!2i3p%~hRM&Z|AX24rh<yyRJB9u&7mN6-3L
zw<AdM`7IrR^zLl!B_XXzq*os(xx}#<@ASTh?s$boOzS3o-;B)yb)^%0qpXbWr^-)D
z<%CbyRFO?3XOQ;wC?;2-LNCx}(_asB6M(@;jRL!9&LY;z%<EE))UlUJM1J-|$z51@
z-EgbxHo859yr@3bv2A5!S>{SPqnCTg+ImOEZyk6-h_)$ddwdh59|-x)ylanL=+z>m
z-HGr1q=c}pr*T;3Pmru`|NJb6bwM?eVM7v{3Oh|(<=?SX(tjMHCjMeq)M||6x3RD|
z%;l{-|Jv>LlO@r2{B&_T&6f}?op$M7tNk#1E9^#x&J_gQ#z3b1G4X|-Xj(dl(o$0`
zZ@HwbeDhJ<m#C#+^YP4s`;cT5ZPsb}uJZBT>c@?Tj!}HOW!GoO9*N<LlIF(AXIs4U
z#Fn*=D4v6q5-^!Aylse2;MpxAD<eZzlcx%td~zt&#KvB{St!sbPJ3KC{^^sxg2!r{
zdY_SFqe%wSLhBZJoW>bUyG3)4Q?vKDypSt-n#&-u)%1f{&*`P0gL;N+?Td8Ma^<ux
z9E=7zuQ>QPuAFFJ6b~+A*465hHfZ7i^;nWnbroA<)$}e7%9+rHM-bS8mp*Y~D*g1Q
z%lfF-=7_!qkSMVW5+KndxB)#2)^Sk})BBTm{+&cy<o_bzWpZZbQhXtJq`|<c9pEBh
z13*p*jyw<>0?f3ta3E)X`}R$=U>Db>TDZzd-h`-j7nYs>bagvQZ}z)|{sPo7&BPtE
z0`ubPeil>6PU~?up(SLmM9$%3L{=!5Qxrq9$qG-qWuSm{AJp=wYcBh)>wxO%PzYvD
zKS|%>%z;7<oncht@`FY)@(A<0zGFU|ocoV1M_dij_w+UV+=Ufuq&rngA#mFmCGNf7
zLZ+V*eJ-Ou&s)hZ>70m_C1c&7UK#UFe7Eyus6*^6-TOkO6YLo+$8H>=y@|n5d3hm6
zTpv#@=xw`5J;gHbD|fX{_u4k)sbE!J{D$S&5MVke7<T9E__9jCgNc}4#--g-Qfv4G
zGb!<Mg5)O6-~{iDzU?Q>T9WSSreR`EiItva3Wu`*V`N>`0mZE|M_xoo@g=Oc4@!ux
zLIEJE*^+gxYU;<rLZ=T#HyEfnGth2kaoc)hoNKNwdE(|&<um2^$zxWZv+NJ|%7H2)
zia6qsM!F6eZ4OBg*6Fs*<KFkNco`2~_tjVEcOZ#nu#g8IIda5M>_bV}DIYK;HQr-S
zWb3Gm0)+PV4t`kqEZcn{8?%#&7@pIDk-gy)ok`lJVKQ7mOl-eSuk-QaNgT7v;`-nF
zPVs)97s(fS^*U~euH(XI@j#1=10QT(@~2bYc=%mSE~he6*d&14*y7-VU#+L2ZPegJ
zVbBMM=M4GRUsV6k%;W)^6vlX+lF3%Gqt4s8ZQIzbl9PGmXitg4j;P~A<GlxGf(!q&
zLMU+$hUC(Uuq7W1`VyOu_p6)={MUKWSh3-JK++wGT{ir$W=+mBxVhvEU<tuM0)I?u
z6y3gEP)#kgUj@Y~kq&kevpmjEB&8DT4RG~`9^I7q_rFR*e~aV=|9op_LahVpv3dAH
z-aCPe=ba2^FF*2E(J^GjE*XFo4GuBWbJKLEIuzaAs>>2sjoV0OB;4ll;-<lhSEdy4
z%Y%?pIA-(r>yWPEvtTvrV1>Si13GSh-+t?R8}i3-IB2qcQtPDjVyC!a7TYqVAuIDE
z*@x%K*`4~IH;?o*nUp+y$c;$_V8Ta{$M?dh^(L4dVwg@UaRo(udCqEKd3B{dbcF<P
zv1;w@mlj;xnMYT?{mK69$RB%X61eNCWTm7Y?P**xK(yX{v8xs5GZF<-I<6^%eQKTx
z4NRU^*i}4!`9o8`88^lHu27!eLzK)J7caQ^8M@J5xKN(@_6=R=+Adqa5@WMtd-bF|
z^%hxz4kLPq2>D=?`6YDg=POVC`2doKK;p&;?R|2$4TD30D+h1riZn9n$k;#D+_V(`
z_VV&MNDD>&0hf=WC~$6reO-d1z^Ng?!on#pDCb!l6>reFr(^KE3RVe-p$o&DL_jF;
z-Vs+JCT)0?sw!Au_=q%U1LDnxTH-fjH`A_T2~CTZ^VRKvcauN=?oEW0fsr-}%5iQ#
z#+RKWPjFu{&lq-{A!x?$efVbQ>mbZoc#z0+7wGA`3{@B7lY}p*gM;<7(~!QFY3{4X
zaw=Z}%Lok6|CIMFcnAvG)I56M^F|kxKLD1%FmyZygZOG$FcYXDiIWoib}41(9t&6v
zg<nUO=Y>YsT}#4#4@ne>8$he{;jMd8WUL-bO^USdTqHx-ExFCb#f3W0Z)EGH(OX#9
zH*BsfO1qz&Y0gx>pZiJF`YY#_%!jJ)WfT&VI;6y?8x?k%Zpgb%dP8*9JrqAR&UHE#
zVty*oZs1tmP}&=CC&#A^V&--0)<FOuVz*{LADRj}>TV?yA9-fl{ru+CSMf6dwkLNV
zwQ8*s{k}bYAiD%Hla3|}vg<7@U_DB`X0M~6yspEN?Y^b3+RwYS^bWrZYd`S7D}@Iq
zCMwEyvF5YSV+_oxeT%^#{7X#^_FHuEIa}oS6l8Z<N*t8<a!qWdZOg1s54Zj+OFn_?
zZBK^8;x*t+)i>JK^S*WKUJnlsXUKv;8z*j`(|EPQ7PpUkC*{t``6a`pW1m@mFa_Nl
z6YBI3-ahxu$D#gK&~7`KEaaM*B}uYqvtQh@kY3*|XnXf93<jC6s)s&X6)4Cr?6Ooh
zqqtlkr4g*Vsm4+w&25bd?q#P>JsY)Xe0-~0YoSpR<11YD?IT9l&X}z&O#(BClJx2y
zXBp3urIfe|#1JQfX*zxUt2{UKJI12vQs)4+uVvk&)!;j7bNw}k6B|Rjj?B3h@q5}l
z!PVCih10Y3g#kxASW0M5(`9pS^UiyAJWXLFQTvJx!W2yUXkxvxX>&?j?NeqXx5VYn
zWg0S%s6f2<HqZZsuk{=2*i&i0PE{1#D}3Iup5b;5S&A1PEv@C&KhAXU`uY=&pB^5j
zUMxJA?$wRv&sIv5`;d;AJ@6U?O6{u**v$d_0CgY2Pi(ucxhFi|wdUaUUFFA9Ub|hz
zivy*_;VY*r7*@hs?X%Kd607G@Sq02rN4~aiEvK%WqM>#=G~pj8!Oy9`eo!7ED+M=h
zBqQzyLXJ2sftq87CAbqW@v3k$$h<SxRL=U6{_a}D(!FV04ks06#JGT^E^#&So;JU#
zbWU(zl5Oo<#D~EKv|rHkWKI+ptA|QYa;TKP_W9QE*Y?^fV_&#Lu82M{=Ds+p=%j5C
z^HY3Bmm{^6z?T^f-TQJG{JEO#Uv7#H9uj3%lsvnyvC&gL$TUQVYtw0{EwXml3!M7)
z<x3%SLMV?Rwn2IxOl84VKjc{YUdz&JoBXSzbw4(R*|J5cpGsK6xVF5XZS<6Ab*tO@
zTH)O<f$rvo{s{<D%~ysnn`u)_XIIxJBGCX4f;^hgxVjkNY94>aJ<zHB&a7B+NDN{a
z)U$8qyl%{R)_U5*jKV$$xuu~?Pi+uNHGfl@jGY1KUAkjO1_Mu`55vr=SbTW*=TS0(
z(eHyVD>lJ7Wo;A8?ihbCv?@u&)&LPiu7;%y8j#0~XRYr<r+v;+s(pAHqQpRog5fhV
z+Dvv(G=$Rl+dc2=Zr>(@sQ=aPU73%GV7j|Ic`AswKBSRaiVddy>rz#@I7&0+mG8}D
z0xfr1`1=l}I2FZ%DH6Y!W16M;L-f9vrLj<?N}X#7OIRcAAe`+E79N6U;@F1%1y*K#
z$bZNrVYIJiJq(a0LvZ<-OzD7Hc2y1Q%zysb9%Kk1Zcb;eBb;{?gE^ZabrqGARNc-f
z$h3d6pD^F}d6N>wZCSchujL$-IS;JqfJ_x+_Qtb|-_WB*L5)vR_nsZk@s<-IKsb~|
z$Pgs4Hl+~eA+BKyHQnpi3~en$nuyQ>q)HXr%~CS^XkWU>piNc#HoVndC+ih81Iug2
z=ve<Rvl6sv>3LoD)%yGE8W{K;f4&?~l#)lcMKo7HA^#0E$~7$Y2~>ffD`$^K3F7`h
zYu2JM#b+0N+x|0w=<4BGHJOtg4bvTMfMwJD{@7Ni6T3+K&LQXsrL}8fT~an?{b_l3
z`F}eCGv&NEbDxGKOCx)CT#V(q*xAggDI8FpLAL=Vl8ltk@C0t#u>(sJ!IE}>Vi9U2
z`CwY+z8$v0BS+q4(sSwz2g<cXpWs|p;BcG><r=pwmwFo>Af59Jh!(0y=-B<ztAVCi
z(3)z*GK~t?4@FArOHm1GPqZ{y8b6&_l1`<-xeGz=&cAy&)S)5$7qP9U_axIr{fBFO
zeLUrV8#5b|d0Kqy$u|3s&00dG<>kcH1W_EBm9gevm6h0KeVE{XeV7SpN8%V6vPqys
zGHV*^_>#F^>{+*L9Z2TP6^-m{8P7II39todJf@=7jQHSwDd?3YJ)e$qj=xEBc8Nj%
zcfLGn_UEDwW7~GhPiQ_d#@nymR~Xb)d2l<Vd^|vNNAbyR$Ph(}0Lg?RSO6;aB7b?o
z%QWrVt^0~5y8OG?W*zl?1*(!01C#bW7}--)bm_I*dPBBP|B7qAcvTi{<V4JqIY4L@
zMHQL8GIVd<u)j#~f`5a&+Og2~@M~^s*srOML>5E@TAg9rk&5%G)^Fw*rvK)9VIkKd
z91kXb6f^R`lrcp|PJG0=L)pGkR_6-M?^x?~iQX4d##@H`XAZ7xJ&-(-m0bc+bJ3?x
z>?0KiPo%#z=#?Aq&focdJDuXlvFwtts)C6Tm2=PB1s(m)ZH+rH-?qe;=P-Rpxn163
zm)#w%J?GeZKSpUzM4vrh&2@uO(QZu^htwAxog?j(J7;IxdFM$>2B|fG=02;_iB_yi
zii*EFjY67WPOu=;yGSbEos(~yumL!wXIkcH7BINqJzB-v=KJ#3K>z)1E|wL;;k>Yi
zqIhAaSKK<N)pcVdz5NcTo~VGyHV)<g|5;{Oyh)MYGa-HL_CaCBo$D{YJdX|$odc*g
zd^zaJ5E><-q;!;)uDl#+ABHGAUPbTX*?E1IO&!oZb5NUGJwspX#?2}pv>V!Ki^viL
zyF^r;ZZ>d+Bj6aV9f^*Ev;=f7<m+1{EYaC;DV<3`Vhzd@U{?SH9R|KKB=qPG3N@Xl
zj6GpV8>NBV(lp3)+&FfO$};Jl|8Mm^-zIvl^W@{>wM9$y&?*8erNFCuxW<tNV~xOg
z;EudS@*HVg24o?d1QUlFNv12~s>r*C8EnJ~gtQoZIpT%E(FhUa(_3jUrJ!UaEfR1B
zg`i(jMAQoVzI7iv8yg97Kr@D}(f}3TtJ8^Md-i?JFGP}Bk?BU7KN}61_fi>(nc5c1
zRgSi1l^;DLiOBm3>0RpP7WjTq#DxOu-`=?tf=OboSMc&rX&@!AHlkqzGY(1XflHFD
z|GC78d#ZP`7%8C;6=7i#dja-;8QK*<Kp2lj;$;3wFmd)_N(p#GWIPFGIKt`$vuAH_
zuO<v*h`uv5IXm4i4Q$N-=|>1iRewSm9<y7ge&ey**RP+4iWn#%$w5XW3t<XD0z$fj
zJcw-NM(E^GFu>z5%|!S5I;5Oq6EBNM%?(1qO0q97jFb8QwE&qeA>|z72`4CWr9}1F
zej5*;y8c?GqT?>3z4dZV;}R$FF^KsMge-qu`ADVq5jZIq;8whBnJ9^I10dTba}0|1
zCC_2)7nO_ShT%P4gUmDH>jy#%Fcqd{1#H^5M^|fW)1XsNgD@G|O5~|YSlu&jP_wWg
zdeepmp3>gVk=FCD@y>nt_U`t1lrT_3&BAMQ_3HirgBMmOOV*<icbZ2KQW~=Bpf!gF
z$SH6y#kMggQSmG4Eo+q!i_e%5pCv0MF1|K;q6CL7?!`(-XuAF(*d2yu+NTx@rxamW
ze|gidG)saqIOHaUIA!==i7&?74AN9;XaR6%0n8#@CE>8}R>g2QUsweq_wIc$jn8vD
z@&t%4oXpAP<?wHRCvye<XX^;K{PtuCk|L15FXb_)43LF1mFO9G^TAEwOZh)McIMl_
z{?Jf0=v<+O7i*juxSs~CB#!<i_&*Wda`9Fm3=Ig2Mcszhgh(+n=AOG~#rb-3)3bV0
z##T*{{8d;qu(ZrVn6ZnJNVIF$#J>1rY7hK_@biH5#5!-zd<=>XW2K+s=qC3Umj;{k
zP3)bpq-i3wh3K_0(GTr!0=n;jO6n6^C^OO6S;r)8;;pPiNYT&U52I8ihVSwz4e6^H
zd8--MjBr(VwuFoXeBP+@VZwLI*~b1+U^#e`mk=4@mA~hmvxJ;$(D9QG+PuH&=3iSg
zs>H}s{MgnwL@Q~Od=SbMy|xE%!-FWaz5AYU%E%s51viN|bQak*-LUH%6<0C*DLp>4
zD};A)UGwDAle=Z*<vU<;)#GE3pH$H)Z2sSInA!6p#=ivRMX2gew$Rln8;W1B*Za?P
zis1f1%m@-<TSh{g%1(qOEd*XoeT`vb|AP3D%R&sDQwSN9V*5m#!jM0k+Ia$8{W&va
z1a(%t#Bd5p63Hj6DDo$f)AS3GVZ4*nOmSMpHFgz2Vb#Cx&kFiW4NFOzBwVuLs;o4N
zD$Ox{p{<zf64G^YckkdJnG0B7&&3wVir@vzemIX0<bk-EjG+Mi<&VfO+%}okZg^2o
z!>`woA~*mkHFi(X{lt^l&~U6rAIfD|SG6rHlDHI_P+@gD=LhRx#28SW@w<(yJJ9Sv
zxd#t9-{+TPlpL%}=kdX~$@wdvM2MyqI|+`QwOXYw636yM`Ay@P)B^B<J;!GC%OR3t
z4~)MD!6J7X7Ipw*LFGb1AV7tN!>0_qfJDrLHWmU>NSs_yWdk4~6}11CYo%;lTwJor
zN-UFe=8om}UhK7CG57r>4O|*~BSa0v+Hq~Z>$of+Vh04r;$;T9K(Z=-!4zwR-ws0t
z`i}32nZ@7=ILC-d853~mF&P0@Nes|e*WN}-`rqF-{z4Lf-}wt62yF->D(0U;(R*<h
zL(B3j-^(~HzST`hvRnKmhcJoS!P0<_iexTCKQt>EkVxu7!GtvQE9K%uWkOy~^8Vm0
z;8pp-;G0M<`n<qyBb|oxAyzC&Qiccpo-9S?FlOGi6Bq`9cXEMPW+aXdEDjKe&G@HE
zXQp9>-puI?&eX<cbLj8E&HPL^)z5z2S6>$(3uXqJ#GhH-zbSxVvA+;O1WCpGq$>R;
zn4kZ_<zVV8>+c)D3@gud2>!>!FJaOkNmQoDMDh)a?jB2fISnuX1UeaT2Mv!TRDz7)
zLUE(Xe3ktXZxOT9kKra@8A4_zCm~I!L2(X$Ry-mpF};OcI3~}bNc8`dvJV`+C9|5H
zRfN5-u}CkcRoRbHQCbk3BBp71V?T45aXHuprQlkC{q9_^!mZ!8p|U`T3rPrHo@pdX
zi)KK?@IqXyKZ6@-<<-ja&$w&zcfkf8<uM?_l(y&_pj#o~FXZpznED0CYab7Ik2U0c
zgGFPr+KLoeV$XxxndG;BUyc~?d^$7vwfC)``Dk8_%&(tn){mJ#HE&@PN#M-;6{48G
zjo~TR^)DCv4j?f=qEH^&EhsUsckfQCYDV5TgCw-yQ#TGY?}PLJ<URM+9R=l@jH>Z6
z=EfCx)VELcp)1M+8K|1?Vbz_q__y<UqF<1**pBDs95vS*zv~eUMEB!BjJ=G>SzF>X
zA6RVs!-``Z&iC(#Z>;tn(?eavm|?o!aC*Ltc~iBrH5LM>9DedbiEZDG(qy0Fov3Cz
zxM_s~!r{Qc+`MAbk8m_&lrm*Z^IkkTB<bEDX34QXdj?hZi2FXJZcN^G_262wjZiGy
zG6~-uS;K<(EZ9ZPmzP@w&9*`Z7lyr`JU%D|$f$=*wNO=vCjjYSYk3aS1aiSrjaYnk
zn3iFWLwkE~IGXe3`>!zO%_3^`1?mf&%l{Gx=XU<pfD(fkCrBnMig|K+;YzFikEibd
z$GUIdzil$IvNIwYDl029yNrZNN%lyEN}1Us$t>9lk&=)GEg~Z;5vgQVNJUAq|L3RY
z{r%tLIo{(Ly50Bv{fz56uX8;5p$eu0mh>Qwl9bLdaqSJUv@4@0s|bWke3(#O!58`z
zjuoWQ5a(G;5r&5^Y)chh6bbtM&)E~C7H)2C$jOLsjKGaBk%VhY)|UsTOSMz~#xoBo
zC@Cv%Xe<R(0aFE|aGVS&Ned{plyFYybSaRqMu>bH{&TYgMv#r}x!FesLx-;_z)ODx
z-+J>BE`hpu$ap`$XJ_xij}cnHJ~&M)^`GX#%jDzGz>AYrcH+cVxN6%&P}7>LnVXZ7
zb4@s2>~e+di%N`$gO_FqQB$XwCEGN|KbTq5co)qPr9X^;%QtT^UiVK+O>cHgP;bBu
z6btK{mBri@9iY#E+fJ(zV`3;h*kbd_0_RQ=Pb7Dl*r0Qc7F|p)+-Ua}@g`kPf@=n;
zchNL^BYFfF9S|@a9|>Q-ewDp0DsX#}DT_c!=N3mqPT}Q9PYW(2j&&SpST+(S4NsN+
zS?-%hshf9g=R6O~iG)ITg3RK%dIML1<NpSWQy;+*0V0Uf0G~fa!3Bo9<XHHh%o6-a
zcpAoMWUte<a14L7RHI@?F}GR+i#Hs>1Ota7qCxj0yu;B};dY$$0*B0<g2~Y+T=>M%
z^+O5l1*twZk1tL|;<`N7nQ%@*M_FBXHL>6&5>n_**P?udh3hk77f5I;=e_PRGrHV;
z>B<oJ!P}8-#b?CfChuZSrsvleDXDv?C1q4z`DCLEB26`a70jtm-_P`EN2~xcZVe>l
zF0Jkv`5E_;+byF34hXnSoUl|dDc)ma1BcsFoaVuexZL5wg}`y|!&0>A^&is#_3@lO
zd}Wub7>nee2efe*Ftq<T0YV!R_aJsA1E$%hv+Bs9jq$0Z)y+8;!?VahPmfv2wX!mH
zlP<4E{UFsCMcx5+f8qgXMQqVJn>Jypj`2NF)z>1du%{=gaYW_zxJG27H}$AwoDUs|
z(<H7M(8B@O1r)(?xEHDrSlpTP6mzM@0%${P0DcCFAK29x_$YMxWMe-gG`YHzxV(W>
zf$=EnL~JW^<^K!w1(yhV%@FXJNcRQ4r;{9{z^bJh0v_w}HQ*}4kqtEy%tj#s6GAp7
zaT@aX2k?mk8pSukw*UrxL$Qg95ZwyI7hcNJpL1v%=tmBKUyj8_m@i=8gY0+JU0`6o
z=Yf-i4Fs?-$b^+BE|mr>fz9G?;QnC*gJ|qQsTo@MJnxMMv0|N)(te(BOb-bqesA-8
zKv1r(QrM#4Aw@vZ0Fyu9>u3wSpG`uc4F|C}>elAwYvtukmdJRjz*X{C=T_e?*F%ED
zQIeX95D(#Lf(!~02iMahX~7wLF6Q8c^5d})b=<sbI<x8%)pyYZ9F@G_adawR0Unk`
z=q#WhL%0??0s@(S{P>Y+%{|k>Z?KO<MKk{WI|3p1_l!P<mLF?H>ax9Kubg893<n4X
z4{x8QD$yRh!@c+}lUA5Vm+?lyCB#E;M%Rf+Qpf1-S3MF#S#lvY>=prUtoD0j9T3Md
z2uJd649nm<C7!_~4-*yvV6%|9Tfo4AFyXC1iG1rm0(jA1!zb2P?H51M*H6N*mAE-I
zfR#iHQek{H`j9`0w*C0Gw|%-EhZ8|rI6CVidrQ|iDT3;gO@rkxC*BvY^@R78m-90>
zF?kwztBJ|PvEfadnz}$=0%JKlH=&E2LggDD;|~yXW*C?hz&4a*y22FdGR#mm4x08n
z;yb@QyjDp=Tv$D8Bgq>4k3swL#@Nf0A=IMs3~WMnFor`3o!h`e1}hd!7=<3e5}}~h
zz+Ef0Hj&eiLUH!pdUVKL_|X7UjN@}R(Mt#OUU?o{8!n6E>R;^5?8@>H0>WYJK?Hc{
zII>;`OdpWp97rCNGGm49CHfUKL@1BZHkG-w2|%L1wab{wJTjfm!TqR&zy&K)!AbGF
z99>H-5D~$s-z!4gfQ)E98t(d0-kRjg(XQ{`t8ftvEi#s%x(k^b5H8}Hi*h}e80SMu
zgN&squ-n)I&V?y9(*Fzaz4|Y(o17qjiKQl**}u^IaXBmSoGzMBg&Usv%Vpn_uMEd8
zzEF?rjh#`%TpDgF7cSp{VJ$%&6b?wqm&RRQmUuGGQfwce5@kep_N1J~-8E*rR)I<c
z8`Spf9ey}3iA^!te_$%3#b5oX$W6y`t?6Mf(7gM?H6tdMgJoC`&VC>KuKA?p^Ql{(
zZB9KR3F_{~ay-Y(ntqH5N7UsfN$Oq9=)~Sc5%PCwc|+X^3yYMvZn?sy3kJ;P17Nfe
z#{;5P1QS9G-Wh0-b1PSut2J4UnzZxi3dkSs?O>&eDRd{+?0}a3WlzH~!2ZGn&1$t8
z^b+kvJd1@(fToaEuFayV!V~sRikWjb%8dxG7AHKZTVY|V@?`C9yO%?!T5?Bomy5%m
z1)ff8JlAh%o`Y;;7TWeRU7X5vr1%dAV2a#%(lKU<5z@4L#$mBP4-KohC+I|<GDmg(
zl{V=DJ+m$%13d;J;Xj|$i|mFw7=Vl4`h1%$*5|D8aN=h+<&g}&FE;Pu^ykGRcBmhE
z=JAK162*&uKH!P9i*V7KR=1#hAjTcUD#yYid|*<F^St4mO-XkH0NBD1tcrpEiMiu=
z&098YrJZiv14Ldg0=+kQhyEt`rGQ*f*H>u%q@C!Z%53BLEGxT@F`=&5Sd+|gKH(4n
zRP|Z-${vaY`vpVgof+@j?x*tLa7B(EIxON9?oD*nnVEw38u@4ahC<KbS_YjC3o%8n
z8+|DJo3YFAjXMQx_fF6fg#-x11*7YPVTAWsTdR#!gZR7+m<bu$?&Ld4D<Y-#Bhu9c
z$1VCa97jk~R9hXj;{l^_Lm<1OoB((WzytqzB-FXwZ9c=C(A9jj!!LRz_lZ(M{K%&1
z<!4rJ4nOg@a&SxWy~zrvLm!=58tQj{V-G){BIeCWFC{7QSch*gE0iY1K5DKuAzj`{
zwD~a$yC{1T`}5GmL1WSTj>*cV#~Oc1PF9q>I9PIc%go+|h3LRLw+{8cb8g-n)ZSTs
z{YPo_VV{YQuEA*&axM{Du|jcrBc3M)HJ=E$0Z1wi7iFw0xqa?w@?4D!3l^By+%ISF
z1RdSn7;%w+0Z4wP*=Z;2{++C&0MXRcXr<l8h)Cqx9{7FHfv~I4H4bzT6BvDT3rNfX
zvlbNL3ot7KFFC)olnwKnK|f(kL@Ccl*^&&5?sKJ2IQ2`)9WM|DDg`wa@L1>r%22o;
zjO{t5n1A~=D@z@5lZMi=5rYIVMrw6n0R_%aLC65p8gCUQ(eO4lN%5B2RnV^NtbU%#
za;Dl=<>mCTdk0Zdg#ccKNdX^d<a@a3z$5+2HSc3!&ON1;@czUjRw>i0koVu|>tr6D
z9_PeMp=ATE1Lrg2mcL+mysFavai?Pxz`Fr1OR%s}&agz+Q5AKahe{Q^KhXSa9-Sda
zBy^yNzSY7_1#+Z}vT_X83Zji7)eYuVij-dKYwS>>?zB8f6SZI%8M1!!J}ncYI;|rj
zC<70q>Oolzqi*6tZ<wt6Ini+KZf-s79{hRpC&&yM8C;mmUwO`7V#fbeiRS`;;~j;;
zKi7A|HwT@61jyE19y)(Q_H3Xv=Z5myZKE<$2zU1<v1A=k&8uzGn`}L$ECenb8C^OT
zLaak?&Ru@=&CN26{CPj*1rhiT4Y~uu>rhhRo;CowiE;ze+Mn2}FjnP3_o9)z8s;m)
z(Io*eOwkCZfI=UE5fb@R*w*ZNvfrvG&wIrxqE@rAnvZ5}h($wVFGpeT-}o#MRA>dm
zKdD|Jlscsa(sr8L+Co&MI3zAIXZUu)%F2pDqHIwo50Omu9RIJyWzq@dq8xn`LX|h^
zs9`<9)P7`4KQN0m*l&dGSk=<@z!iqgQ?-vKUTMn2WPS5JeVV2H4?An279JFEfav=A
zdJbx7YnJm5a*s)Vn)ElE-lU#Hf1IMAdO_$&s<XeTQ8Fpei5<={@q=LSfB^wOGzR7K
zAZ&)&^*BJOC7BN~=tS2^Nx4SYDGOv2UW}m@G^3H0Yj+#ZJ>@(tF>AQ3byqU?ncx`h
zPk<g+$zhUy0erBd#(pTj-AzW(y4k@oC{cpRwIfFl_ba|?4U#&6`K>-$J7UsA{MbVp
zN3KzCDK-(l<%IQk+J?+;5NtOVWW0NK{`E%^M$c<%C?vb_1Im8XkOY@1hl@>?X0R|)
z-+;7-OSj~wj_=*BW#Rc)qfh^0LDhgRKqCFQ%{1Y3jKv~%l+yV5_V8)((0%&MExJ`h
zc-zyz>u}cX*}p#;E`V5Vo!{c!<Sb!GiGpzoy=GoT1!rS|hynUCY)x)GG7u$^pgd?L
zcY<pjFQOR_g)lWm1BoyI#76LaDfbV+x#--x;2~s%&^3B1gFTI27+b<{_fk_u955E*
z!2<y+D)U+ZVj-ZTu;aJDb?|SKfd)13n#NsDm4@j_3R|*Q)ratT1oP-|z&I(a&>i+m
zFDB2;?m!GXPi)>GAamlac{^M)<@(bT&QYIt_(YUA-#hs8yjdVM;c3Xq$b?|0qVF8J
z{ovzs)X1QW=$c1th{f}eZh_g0J#20N!qq62q`qPklO)HVaF*gw>!?GtBh2MhaFlhz
zD+3b+d&CTP`z{cH5h=i6VCjvd4{6)Sd`LjYrGEo{JP1qx?7XS+-n}7^LjJ=0oiL{t
zVFRj<FmAN$WCIRHeUomHGtJd7`D|p;g>H&Ap)nlS&1d)A4hE>pM*%<4YEwF<Ng@-d
zQq}-oLr0W}sU*4iam|5wGX<fKC}y7I31N_}-UE~eFBO4fWD&6x_4u@KPxXRjSRk6#
zBGcbh^qm(AZ|Bv_98<PVu5EgKgMSC?SZX`Y{gEfFpf*h1V04Yl6cP$h=TQD?H0^^D
zMv6FjM*t@km}J7EA$rH;V=f%{_zO{4Zy}XHAB5?vI?&0z(I?=OPCx-TaSg*%TRC`H
zP1x@f2SP~*Wkz#p+^ifD4a4LJQl|bzv@@BK0I(SZ2vv^^g3fpEVzDQ=AP+1UQN^DI
zHqNZgPGMmXy>jJB*C=e$8N;+jH_Vwb(o@;e@QF52FRc7>b>p|qQt?@ZsJVym9D-TJ
z`CRNPSh->N^B1H=g5`|LtDHbGE%~&>ZA#Tu!Aft}u2t`X%Xl}?U!_Wxu3gK!I^MY)
zok1W_YKBH{a_gYnCogyV&;wW_uwB`D_&Kmj@2}k(f&V!GE`bjy?^Jn05DOl^!n%PC
z<!VZz6a0<dgpoM~`(&^v*?~b^RQgz>vszs@_pFtbSnN7<eq(B>5=^K#iCZAS`_U!Z
zVVFnwM#}NwjU!YY(|_TG8HU~rIS-+#RnbvVQ-E(A`m23FwX4Gilp=p-?$>AHZ$_A#
zM9frP&Vj2K<sa0R1*fiLY4WkW<~$wn=UXj|t5|b8$TA)xA;>xE0g13US?Jb%caul%
zK^%>)IX7;!f3gb{n<4nQ$R|08i6JEI<S&U9g;V_qcHBQeI5XxVd>e;W|6U<OUCh&T
zpSaLZCpe1SAFdxi!Oi=yY<hYB;7M=a$$o=Zy|ZV_-ftQ7;|dvi?H<J$?19@Ja@e=i
z#(7yWgJwOeDmwKnR^{_oHR~4)Sn@@M_8n^gDc(*o6JJ%f4(2?tv1Gi^BQ}Ct416;>
zL<-M{8Evt2nCdlHh@<yr8_SG}FXCo8e<G&D{I*$f*72Ar*pH#bAm00oVKVDzFbAR~
zZxt;>;^oVc07gY>yoytCIH&=0d+0SpjV-6>`7sU{2kLro$)SPRnF@O$CxlCdj`|7o
zYyT+zNCl-+2u$K44vU*~)p!d5u@5r~p6{}!mYM5?6&LWE%8f_u?ZpnHM!q>3i8DPG
zXjC4@?Yhy6C+&?TAI~l?JoNe@jYnKJR0D8a(yBdabk69_#@v#T`kI=-o@iw)7QGfx
zPSy}<qro;cjiteGW$#}Sr1*eC6HXQZwVf|-e0z6~S*WpdsRw2&xD;wJ^}>ih3?~gH
zQszZ<0OiO)0U9cTWP%){a8d~|GA=&&V1Oa;!2n>8Xzk~ilqKQNFbn!>f#CtIjLi?j
zfdIM9o0-|7X;|v6!sh6!Pd>k_E_=RG`ijI~u?p#zzRnL1u+uD{zv}Gi;gKBqsg{7K
zW>^K`NR4{aHLA=8p9}lmKCckn+EtY{)geddRzfVD|IR0RJc5}QR_!1z5yh=G4?|FG
zW7IH&+#7NiN*E!}knQ4!HMF6McBZ>`*Wq#j#d?Z(BBFxBr3?ksKZFlJ3yR7S`P?~{
z>{$ej!{`xfL<@Hk7#8R8Zgkjbap-&qn(bHi`O4k4fp9Ez&2=Gw2hd0`J18gM&P0QO
zr``r`>Xr(^;spj#93J0loikdF5N}NX%A_x#N5fIL=dc6p@38hFrCT;^L+WpWW<1^t
zH3g1|p>S^S5{kYP^xZ=_XkGy&9qcV*G10_#-BiKy;63tusZ0caxbK|50JKk4X*BOz
z+L-?Q=<!|c#x4g^dDnBenABka2=icA?4s_#Shfxu!0k&^%V%P;Gd_bGkW4V1?9V6!
zQ+n9r$btZXNl`{>^82x5(a7PY$&g6-><=M``h8l*?<#HHytxyr0#QASEzcI2QDOTi
zNFn<NhXjsq65-@75(kerF<Ds-RQVeq>j8ush2%)Q9(K$iYL!C%@BZ&ezAY*MC+h}`
zev`m&&mJ^tZuTuZ!`dCb!YfVrNd0!pzAVq<FW@5zyMIO$SNrxwH3|o?T-kO+AJ6pZ
zpL=&YHAGQIqS1e)zzVfjWfou6zGMk8arLzugIDMVU#w7PY1z1_+%GxAV4h5h+sVMu
z9b^uVa4WFn%=hi1*JcM555`xu719{pK4E}ruX|%@N3~qq;ie_YK2CT95*t(W^bD!a
zKQXHkg&ee9Q_ma`v<Vp6U7>$|^qtp{a<~)<Vf^X_e<LZ)xqoiQw(2kp&6{=xCXNSO
z#P$HnnJKl^+Uz*~A?>3wO7_C7I6};3VI2edKDj8~4QxcqnCiXD%*+6mcIb@yj**Y+
zn-h({!7)Iqtp(c`f;9%jwABytoD50S`-N(+h6GE(ym;d`9}J`DV({L0$yA~ok`r%{
zXT`-#gy>BAf0_r`Pul-~MgZL2V9yjshHTFXS;~D$nEn)vbti6ly~X|&6^ba^wkMTz
zca|p2yR*lizkBzsPdKMm!BH-KOj3(>pPsqfMu}ldQR;M!U%IPtj0Q_Jd^2QVl(Erp
zLy>B^Q4;fjt7T<g_2ouOwh4_iI7}=pY3+Q!A1Zgyd+WV!xB829aw(3FpR0Oj>B^t$
zSW|ajgGK_U1$aHG4C{~x)(E4x=ReG4_-JLMPrEuhQ$0)y+rmhVzK(p6+U$GaRC?qH
zDBLjt*K!#o4)0rCeCH9wG8~TL>Do@EK=Fv_pg8N?j^;gZf~hQak;ci4oe5GZ@YsA8
z`HRubs*fZyy&Ys`AFlZ^V)OOqX?d}GgEPjzRgGuwiAPOD`kS#dgiu*pNBOFU1>$p6
zeq-wkU=nK7yL?SKUWbTVI%HYG0R=|z_bQm^$nZt<PUr#GMF-eFa9$`8_JANhZld{Q
z>p3ilm4lz$<7Yk~64yixdI^R7%O24N`kYY%+G~k!-<tK4Qyfz|P6;WkYuVqWKqlG-
z%t>NYuO_t`oRzg4YGejgxQt&JPEh$j!3ji=*-ZUi7ufsMb_5(qh`aQLdDSZPl|qsp
z$s_x$tnATH*`M6R^fk$upRh3+WR_e_B)Nb)2>($_9EFt->QV|{juO_srK#k}+rsM-
zlBX#(sJ6!E@af<Hy=5Z<%MQBc+4a*V=G<Tq?mcwT_8F%h2Y4TpsN8g%O#Y<+88et<
zcv)Y}kJ6__U6rBV({0}$+Iix)%4?C7l8tEU_YujVRrgc9FKep?kOL=FZi_I1$bN>U
z=^PBZ5Kf^gGX3k<*t^%5p<}aP3i+z)c5gZ8W#;&hXAU>{HuKGRc!{zT9pd3_;&(jI
zM}!QiU?c0Vtfp9}r+Wq56<yg@b2r3@KDG2<<jzN2uiE_LN1p#yM^T)TLZfGnS@U=m
zsV-6RE0!Fjp=`LhYQF=uD5Z=_F`CQeBekr8O|mMEFIdVJ4V+hFAY~YH(k1h~+;lid
z#sA!^Qoh5x&R>w4{RFdUKuHc79g<a8YDSBsV6=8oblu6l-_Pto{+xOq%l(@-e_Mm^
z|JzzD-3PVtwFwok{EX=L-}TWHfwKFVLC-v*%_aX#ctiv(UT0O_2J}eD??Ri-+BQv_
z<z7D5qO8S;tLiT}i3wz~FK*icpFeV{iY$#8JOpnF^QcCjRxQyG=x?ZiTi2o5qM-Z-
zVStAOxe>=$c>TFmKZ~lBt_z{6yWu;WDe~fe!kXc#(T{b>FKkP!k1K0_5iba)v0nUb
z{@017ySMt{=k*DN()uUzlxR5LDDfnv3rSRaJkGtaSSmq%YAfG;L#CLYHnZt_r^H8-
zUO3dkEHdwD@8oR4!ov~%$*I7L%Na1!Lh)|m_oUCbVCyfNwB7j}#{(rUFYjw(5%LV?
zG3U-#T~lN;>n|YI(AN2~!&^wH*tjq~N_n@`Gz<DV%*+qo46}(oU{XMY2|KU7s?>`d
zpWy7ctl}W^V9y%5Wa)78FS$d9Cn;hbm!4X#X@34jaJ}SCr|}+)GYOH$_eepxT+h6+
zt@X)TF=~DlzlYQJS<7BJyxlGF_Or{?T!Vt>hSRe9g^qMK$nKwIcZiX2+-K!uV?85l
zUwS7xnZ=6##0lZ!lfsi(nehR;ehYw^kgeYNqpmGSlRA7ze~hnPf}55@Taorm*r^D6
zKItP*!ISdQrOzt4nYf<m;SaN(%As#UEfUW>N_KJ5-(@bJi|dV&;mKFeEVpIOJS+cO
zT95u)yp=}4Q=P28HCqNLVoJLvk3)yYL~<1a7v2xBJ*p!7J?rul{na-bnIm_C)w?=#
z3Mf;JCeHHVV1zHa71W~x{KumxF3NdMCR_*O8|;>^ZW4I+Zqe=7kH&=HKq}4opZcvy
zgJ;6sqtqtnS=b$JboP2kcKXjJTlw=vU2E%MED#=Pi*UAn_rO7+X;yrstM|I_#Hsn}
z_erBU$5DI^Op4sIUTO=w)Us|y#c|{DF847hju@A_Of^lX%B7i8nrTy-6|PTkZMo~N
z7Pr6V=!p~hfcD_Wxucl#a7rX!c4M`a1gB{*cwaBzUDt`wJF~!HF8dBUX+7`47-=yX
z+Yu%AUx)VWVL!!c&P_Mkk!t|(3*`vatv9Re{w|29oTSa$@LG<)tIcgBA<!h%85B4^
zAQc!{hqSpc2OLh8uzBpu0XXnf>4WzO<^-o5e@WWt>!dnosR!_YGXa_-4C|C(KH&2_
z{z2fa4Y7YAWf5y32I8t);@1reWyR#_$r*fkzU&}#mt}{RMA`Hn4ylV-GuBiU($SY|
zhW&E%9n37Z4l2jf-(XkSa<U`6NtQo~|B7_gpk37NdvoDbZpnO-&i$=d;RUnD(lX-l
z<Hs^PngXMBdphpPefrcVzA$qsTR~cM7pH`Em}0E<9~XvI*HIcFwZP%P%JV*58d0il
zDr!oFw05dfnfd9f<_SJedRb*&D{tY7mI+w0xw*<bIXZh?m)GjwK~gR$kvZ2q6f=R1
zx~UXLQtD(Z6_4DQ&2efelio4ARuo6H{R|Hi8SYIHqLujaGQt8$9_z*8`XJD0U%*DB
zjwcuPq!SGVg4A|6X%F+c23(U%JEzQc;PFFt&nGh)wu@~W0~fjcWHOJxs&wxgVfn&0
z@1`7dQ`M`kX)_}=iLD>5XM#bP_|~l_>b-I8{fB*0L^U)>p-nJcqzp2Ke=-Kn02h}?
zAbs)k8p*N6yggFGl=GGk3vDLPWWe-|u1*HjBHE5=rSH{TO>@9yalZWoYlei+04FZu
zmy?$jKKTJg42XgfRgz*Dn-Kn~Va;!PCHE$cQ-x1j^QZ%pwUaP?xHeFAJ81P&gh!l2
zo!9F>Qbgm7daEAz1Ns{_Gzy))mR5TE_xHwlfVjmLWshpmUGqx1#IL{er^h=n&C>?_
z2N0L|KE|<~oCNTsZzF}Sx3sjB|H8r7v##q#MBA^s4d7w~C}i*?H#YC;-Md_X=CV(*
zC4xo<qi_-isG8QmTqhHA;X)lyw<nTlgznt&eBPmEZP6rKsOq_NZiTUK(zgMwOsh~E
zg7h!-hd(@B`?7OWGK*=Flv{Piqr_2UbYtxBDw#)@zRNWv@rkP({dctUs1U$;Sc@^4
z{b<gZZ>YU>#iK`pz@g-Gsu(S;A|;Njrp>r6E5m}QKQ>Ai&+Wj?$e#r(giAin=YC9~
zg~`Ue3>s(zh6JZ-ozn%|g<sdnsi^pU9q^o|r(>%2+D5;}iLOG;M)jCObnM637%I_n
zX-n%)=McMty&>)Gro<-1ZlZ4(Q3>cba0a%k@O#pO%GO9IZkGG@y*B<fSA9SEiLI3t
zEL$9KNpXEm2)dPjr46MR>8eqsR^c3<VHZEYW}{*%h4=f+fyRsr>$zA86YhDODnE04
zYxjhQu!pk7mn~tUS06m!1tV2e5l)7{l_3EH3J0zk@Ux#6Jp*Y7uK}E`y12as^_^JX
z?wZK7yNJeXg!kEBI<$#i7PL&}+@OU21P7f7L+kvf`|$v0W+G>94f>rk4&9gP3_M1E
zOXzmqimfEO5Qv@t*bzcc&MFC_ghzu*xX?Fmt^$jhBqCGU={It%ibNJp)E&5Ad#MO%
zdG^N4vaqt{-MhC2KLpR8*J0TMlhFf{xl>DQ^x*)xWi=nbd=~4*LVUI@#|%4-2S5(7
z5}%hg&>0BaZ?=IO$@M}%XR(kl&3tu|E_dmhD+Vpf;saC^prve9R^?93oT$l$Nx)<;
zi?tx?Pf$Bx27zG)`ZoN{^P|^<LF`jhS|2aGSKxeiQ^Ml%wLNYP+)<$A!WR~k0U#9U
zI8lQD@(CMJ4uqvmfxZ}pPe346)if+`gZ=h2sc;D+jM;G!6wy=P>D^)9s3(!K{b=#c
zSRk<TPhxb=<Tb9lSJnI!!c5R96COT%$mE<n*7&Bsf9>74VYl1>P)878JXLHKbX0rO
zXJ#r^t>8nOx8873zBgxe;h;G;dPL9_xWIb*i}+gRDEn6f10pUm&el9ajgVm2JvVhi
zjfG@TMlzLzrky6UO>x}KboeT~Tcx1klk$o4V`gTVnH_%qnO>r`Ztw9^{I4zNiHk{M
zPL9U=Iwqau0uI&7&{I8Tc~959^-jZ`Qr4@tAN0PR)wd6^zg9s}(4z9XSH;xZ%H!3w
zA$k=HbBEUI8XCLM#DVC$SGI{C&hY<Zg@=OnRXBL@r*+3^bugVk$VDcFu(D9R_1czW
zWoY$np7^gp+V|FwInT-Uzy1<_GtAumR)S{mA;Gd_$8_&&ET&~EAI^sG@wF;MjW{l+
zWMsqw3{BCmPmz4hQnyDtb<_`M6s(w8iL*&(cQ>t-m3x(Rls!5hBf)zd_{jejF!FEs
zw-0aM7g6gLEX5v~?qmm)N%yz}?|Gyrpm(hkdZyNqP;7~RI|!^fPT<7}xpes5tA43+
zg+`}G6d3`SeT!dOy_fo^WS1I$RA}$Sb(5~1J4aT7DJ1S;Xv7;tGv67vH1Egmu~@)x
zp?BiR&|++s>dja<ffPurH`n>@yI`*en&A28&!005OyXYyKM=Sk(ER%PZlG@4mvvk2
zXBeT)6=U4l`jZSv@dgC;eK~K>hp=sMMf`NDu9MNz!mSN2V|t4+A-HL(Fuwc+7S%Uy
z3KYIdjM3X$jW|XOb)Y8boUpaAG4{~l!n6xqFq|RJh5T`<!W$ztFG5AEFa=1QfVxQz
z)9bqPB?GkaBW1`r`1;`@{D-drg8~`bLima(BRse4+gyrvf*B3xw1O|^LFc}xVbt{#
z=GQgH6wM5)Kxu^mz||*vv9U1WfBSBGr#GG@Yfahf%c-e~r6a&}@XvTpTW*Y1M$#8i
z@4o*bwA*>R-KgQ}u<<Z^J`CP*24dbzIs4I1$~UKIeK~1{gc`A>jbCc#06yFXS}m#V
zJPJxXXuDQQMMOerm%(VshcvJ@wLa#&NYkdn5eC8!MY2k(rrj#$5A+6?bm#@BA3Wdk
z^YONKzAj;IquF0a(XRpaIxeYx1=FAgWPYa0ROMh56%-i%2+_8i$v9ZxvL^3#v22NT
zpv3&M1jVz3a-I&i`A1Z2Ke;+w^xQkjAnLKn_}LOiB1;;5XhP7htufv|&E>TkHZk)}
zs2&xHE=ZLed@(>}EBbNd#Y)x6xdRceF13VX{E7bUptge|$zO1?wk`X-geH>&RUo5^
z6O_yQ(efj18H9xa2kXpI&kjW#irDJxv{Yz3D-CWAQ7~Ldn?-VTenEkg4R!UvqybbA
zTaNqu_@HuhPxZ;qtDu5Mw=e9XHsk*U27$5(M!URzs)@K5Bb`QZW3ONkvZIoo2sI92
ztn?5+zouGHKmbM>@vW_`<{i6*RJoihavxbIYHWdZ4wP@gKm9>9291u<oZD4*u1kSc
zF_RT8utg(D5=_YNA=RQNW1=%Z$e@6BXj>}4;e)LC{8wYqNvS9p4G2Gm8IpLzotvA$
zB{1N%vs^P*Rn?T>)ol_5;}D~ut=qPFcK1F(I{SeR{XeESHmqzMYh_q~UW^5aVda1b
zhAn<kraSe@m5#S*`5>vGZX8q2%g%M~<CvA@Ue)>gtlh(Ix5^Gc$C%C>kDwuBvOmTd
zOxQy><E438(hykNl82=d;TQqOcI3;ce?ECwHzvolDMulF!VUil$oyJSB*BJv>I#vB
z3e~Kpds{be&Ybf7J+f|MSl5n^9v|4wx@XKlCc)k~WKd9Rx5ws(Dtiu{s}`-@NPvUS
z<9WAv!$e6!2<kLFZ||Q4{ws`OAwc)gOsBp~6>0?a`1R}L{N_t|JGX8zBgTT1PzV$U
zJpgn)Rs+UsB2xuL;`i?J?E8>7)aG*Pt-r~x7s(T5=Mo+-C&kS?ZR%~FEN?ga$Xjfo
zn_98{-incz_NPg%$R@A(x$VMDvoZUQUWtQhEPFSbw>CSXRZM0#aC*sImNO5N5Zk82
zpjV~xds**ggRX&SaKd7?mWF5f0emI;i>1LL#nEUoC5*yE8Osjdwx1LqJ+CI*X|?YB
z<J_dtH_y!2Y5mX12|wx=mYnWaezrz(AI&_K1a+X)o<oo5^EM0by|g#%%p>(<Ti{)6
zPqo=;5l6VoPcS$mocNXf)J0#cmMAK8Xvui<JHU)iP8g2R3+$~tfOqUwYj#GA`T@Mf
zX!Troa@#aKJJA_b4rYneOa;ZeNXZF_qK26k20Q_Rpu$FBlfS^caUv5s)y312&ri6x
zd`asZ@dsY|>J_`q*8OQ^n~XRC48#8;EpeL>=h|)FA%tAIYIAHJIn5!9%~#d>vUz@j
zi$8K1sOvC#L#Qv)&2=(F8UMoW>Y^vov&eIVf@CH`&s-OyEc8rMUot(PA5%njyttT{
z*i9N;Zav&s*xoq07zkUNWG`EJqj>rI=FYiDlJyRt+x&n<&7nv;8Aj_=*S5Dtmo_E4
zb6xnyAE;ybnNiqjOo2i1tz9hUUzOuC6UDZ0uhLF+PHX#;Hqsm1Vd%VtopH}~U&*JM
z8(p2{(i?mh5#VUCu{SuO*=1)Nw^JKRvc0`Yfu70QJHOQFUOM=CC05Aw{w1xwu@^=j
zsyI){rP)f*DKE9}@SSt$db~OGI(#<~w=#?_=LLqAZbquWMAycFaX{1yWe7i1&TeON
z4M~zagqbbol3)gL`Gn}EY;?AU#)2Xv{r>*_`@@*<hDjJ<L^Sws05ScJQ9(>uD_J)t
zk9M=>F+8dzLP=m_JbBnJr-&`ltyh!tq#9)AT~v9RY)O8{72h*5#4Ia+I;GYTy~M_{
z<0&38VRB%@ywkaf(;l=#uJ*BRSJ&hTnVdNt>KNP&eZ}SLevcm1x7)vef12qH44o`2
z08mRE{$a^ZyCIK}tHZ)N(#ex6##GzMMVtLb-NwB|LIMJL!Q#DFMjpyoUuhM*@Payi
zWGpbQRTOg%)dS~gB9sdqRJ49qpONN2#7+x6+LW5vS{Wzd31_i&;in|IjEz6dDa7PO
z)XYqCSX!+e@)eU~>zr-&qtm|1W}MO-VYh)+ag5>e)zS`~lWPkrG?G=+d4*O(0+gCy
z8}lR=R06Dn%|ru$2)>+|WGAI?a1S}a2>$z=_o<B7!kfSsUMO?;q#0ud^H#1fCh0cE
ze4#~Dfu;5v1y(c+?=IK7HFm9Bj0@jDty^=5G4qUc-r>8TaHuVOyNeUIYGXq?IzD0-
z52eHYZcB%|eIU+g=lAPpD&fEfSV;z_i2UwJ+m&1B4wtqUU1pjWrGlqu9-hz+e{tKv
ziT`$1zs($*yh`eU#;5!PD=sDFr$US4b_a_ztw;qtuJR9J`NAtrQ{XgeXKu?R{Z+`V
zbjL-vj3`V)h!E0cQBqPe52+3=SCd4UUi<n~7=O7te=n@*QvFY(mt*|KY)kh9Cw+uE
z78+E4`1S<-5@__smEH*7NkS4wk&_Rx8yqcOK&Rr3e>B_e-dN7pp`pj}=G1Z13TeSY
zu4Lo4#R>9@(@KuVV*&?Gx4r+_KGB?`xu`R;B0RFP(AwOUe{sY`?#k)3zCofagg3?!
zoV{mif7hY$f^y?63@#AG@Plsr@9VoVKdNC&6UxS^PE>FhQ2_;HBqFy#$yJo|?Qe>9
zbTkxd=Y_fi2%XBt3190Z*I!v(h(3~x*&$WT*AGrw&|lYQuvr#oD3l~3(3&C{I{s!v
zAvz9x>2;sq+Q@fB*Vd5#*;U&=9BSPc-|4lxP$r(bP6p$x7mp^NUO<m}FmF$n!oh*u
z5#@~j9f!1~=bOGo3K`zrgL?;jwtxK*E6Uhf96@e=JqQb-;@irbg+^z@H1W`y<WTL?
zvWRybb!970yf{xkGJ1Kxtv1;Bq-NW-(jSXs7>+|&0k$<DYV-GOJ}KsaxhD$?4>wEQ
zAo|k3)43}w*cdS2w39Jxp(X`7f~7GS%ip!moO{OSwWp-+nB>T--XNui+8ekLJQDrF
zp~`(#R{x0v{^G)Co+If>c^lvYsefGKPIPxBTO@7Gy|PF?z)dvahZx$Os5>(4_kD>U
z@+xU~%B5lo03IXOnrSt3xP)Z`=Q~>5u$2$YQF4$_Q@mf+D;1BM*~67(%5${o9|-G?
zR-tGC;^E#vE&K7%?R`l19YJdYYKuNh@`otrLp5$Ng(RtLZ$x8;YmB*~H?;OIzkt9_
zuw=KGsql(UG#SS?y4=-fk5HM;cIB$0Le*I&DptILdK2?0!ks~%3=-k-CSrI5L^D4q
zhUSxtr*IO#Xu(int)bzi_m&x)@w`ts#NK%bHJUV0QGeiDaa*;a_?gGpVVq#}^ba_t
z8*7ir)hRG2%Dg^Sg@a5ufdM~i^i*7qs(9lacL8$HVXh70`Hh>aQ1W5xqUolJ&HHeg
zficO^X!Qo2)T|X2R@R~4<6VZv#s#z4_I&;;MqhS}Gtu#-Yj@gNE0$~uxptv#D+?dS
zuGoKYWuky6PWTs(;2>3It3prj-=v|wuWYDoCt}lld}<xaTMy7NbAGZf*<Z)U#*XJg
z$I!QE7{yRLu`7e=$kRZ6d<9@0X6y-nnZ8br#%XI$L-uC&qPn&{A7iLh-c9H>2~7Gn
z{cSuM!uPV6Y@OQMwE0v*O5Z*y-Mzc54&@rg7K4P}eQqjT#JEZV#HvJzjV1M$LFmJ%
z7*s#7g{d>!V0sS0hs$vhsR|~hZke<JFCha#+myMr8=w7BBkAk_7xDleV5^;auB?3J
zRm*SkVHLx5!r67i(2wJcrqo%_HK@z-F>_j~zV{GH;wLOLAlrbM1NFab^FAqfKir;$
zAqMX<EPO+d?#~M)Eyl|o(~FEczmFYDO}hS&l7CEF+K`^E?~_qMCO&;Lo4vK7v_-_@
z{i&b5nML!r@5yvylD1o9OSIQJV-u}_M}`WVOQv^LS-vHr*-nBw+u6GMh`yq;Warm{
zBK-ncrFS0w)<#_l8)A1lt3~n0i+=ITdCAt5M77gtSMrlv|EN%+j56uQiOzoeGVgGv
zpDYn&3qK^7*TtT04tz(h+_q<CE|3(3=H{KpSp7`7OnBwvzTb_Y&r(0jw*Tt{T+525
z-)$Ihkr8e>cgK^_?02x}{Sw)C9_63dmo#?2T%CN3<(ELQ;?4$c<cx{;tX<a;#}L$~
zL-XHKr*!U6`+YBWMv!}Mf3E(Hq&Ao8M#1zm#~J%OJ9lWII|4Hj5DUR?z@w`<dd~s)
zAkH8x3>5p<SzEW+K4!!jfmygWb7wxkaSYeR)_dhFn{{jWzo%ssMmp0#TaN#I*yQEH
z<(tE>`KU64=U{YsgWpP8of8iPAxt710Zhj+Wd`C=Q$Z$<z$?PZA^ZJIen|-no;-dn
z5NYL8Us%xBB*MKHJu{^D7+9uAnT3t|VM&XD0zu@|P;Y_Tg<N4a>r@UA5fMp86B6oo
zvHjCoJLTs&%4aRD=sKqF@&xS?lGhh)Tl@lHt1qMq@CASy;&Y)3Ne!x$N~3D+(=o<o
zzjxLRYCM8Za)2-AX!o(PG5QBQ5__<Fu3}1zq7$|}V0PAky^p>fzxE=y5wQBXhIf7D
z(OL?rU~rcJ$6L2!hai%5Pz2do^I&h14~@AhVaXz=2HTdfMR4bV%SOghSije9ZUP)Z
zLiqqi?CIAyNXP6TNDz2;rZ$}UM23TwaOBKUKDI<bzWumX>}s!V3%%a47}H8CHo0E&
z?wcu7&8R-1#@m!CjZea7&YT$rk*xmXlK^~9BIE@{jsdp`C~S=oXyNz;{zdc|NXF`%
z*wNGB`=d@*T)O$K!Hy#D6;@mjEGJYHLYxl>uus*AEVB~^Bg$&nkO3q9Nvx;9`9cFo
zY_f2iQz#HKklzNM`A}$Nh~pWkli>c5piI+j)ksh`;8wsLjPes0E7~*2uEUFR{8#P2
zV+-=4+ldnmXP+$`HcY0){2!P(Fvp8kv!=R|G0?jhF#2_zRz6f1rqR`4cMggxz|qBb
zFMK%l<VSu7r<lG|_}Eykw+?S+kfBS8##q~dE|vxw>j~mX0FSPb!P;<Cs<C-?u-hkJ
zDmt#8%L|i^u$9C+1icXmx;_Y_5CdUi6a}F=-8HVE7YqOA3$TY#M@NU~s5{Xz!^nWp
zDt#eM6EosOG4QihzGD!yv#S#gM8*%J72xkz!Tf-1BxYkc``pl{=9IrftAk^pD~EWN
z*r>huInvm;{sG_paId(-?f;%8@1=E0G|7@~$2g~H_J%KnG6;usjl(?Q-!oLq1dn(V
zrkt9!V8s!VBv#rtG@R`Qr`yi7?Io8jO?a`EQwfaG7%<zw!Xs3)!)N;=fgR~^>&dW$
z!<S%~Fd{%}jrI(hIE0O@2XA^}(@iP?d=<BE-~QX`su2!|iI`&>XwGxs=r*^uW&$}8
z@iPt>@q<|I`S>S2VgIycD#4YG1bt%gmUchL%F_$g7FHQ1Le5j1BNxiA2)0W2kCZ*_
zzVOkoWRfPo!$wXfZ6d97N4k-UX1-=#t#6F9>pgrlBJ_shUmpw<=(@uQp#_w^CQNj>
z%+!dr4d7Vf4@Eo|U}A>0{Q`lLiI}9+rYa1W7D9~If+yM3+FDOSEy1A+A5q5kw1*Ox
z@b=Ijl33(HPz6<i4<arC0EkAR8n|WfJB~>R`sPT9>ix+jWqK7vE^lw|kz^^B_dPNV
zTVXg}CfdntG_T<0cq%OzKn!7ERY;Tn&nVPprv<}{SkXhcgb9r<X7EA%|H?iG99g*P
zDRsLF`%frkBn!mt0&oL;8%{lFh$$2|H@A>N_wEgZ;*AkGtbXw^K@T1RA5qRXr}0kk
zk_QI1qT9Ex!LP&cov=tTBs4cSr-B~>u2h&0YqNWezg}N*=m8r*8bA%7Pqw)!Mrpjv
z(Tek-r<Z3M-!FSm@}$#}f1RwW*DWnMEn~^9m}PT%`VqKM;Fpm+N|2OLB%^sHb@KP#
z&$A1-Z@|uv1J4a!u$Vey*^^C;Gw?6I!C0_1@R7y2SAir2a<3Q~*DeL{xUlqbc?<(*
z0(DXcBrr@!+gx-o<4SV`>lG&+3a$!iq7)!7Fs5N7oZcsZn(y@$T-c8dM3Q*>7`-ZI
zJ7$b2;pH#`Umk-j6I>=j`9*dgd`gJcEb5I;yu`2VI&aC<{4i9Mk!(*)!*$K;!SySH
z3uu|`L3;X{i%aq+gWFqMbun=VPN}U;^}Xxb**y0QdZT1Kd0lfMZ<tx>x2in3cDC}_
z3kQWhyUGrVQboK`xK*8)o0t5c@`;@YBByQ{+!yxf2Sv8WBE0zO6>U60WTk;L@t-&X
zx>Ry?HZ(MF<Z|iBmUy}r*vQ*Hs?{AWJJ;&3;-dCMpEXXHfEmmb+*BC#fqqLA6IE4y
zA2@>jX{vADyj$Tn)+m!X@^Fo4Z}g(BfL_z&&704G%Hr@R<`k@Z;W4!#R^ceVbu2op
zt!vk=l{u#59vFF*;QSZ`CDNaAT-8RmlWJuPUli;HR579hq~`LZBeYg1ymn@3_q71N
zz7B3ZLO}*HR}PICSve@cfr}EJIB@oGh=eIM*e}b(L>>b);_?o9E;J_0sm|CqL<$Q5
z<EyS@b*nA~bZjHIU7$mP@B<e&{>Jgg1I(Xe^R!Z(Az-8+)-*!G$bP_HAzf=_@)*}f
zasF1#b@B<n>*XHuPm3u=ZB=|=XQ!;eBFV<&wZ3Cp`D$<m|H0gGxG!N=E{yV0_dj|U
z&giYSk7F=g!-)o*kwl3b07wIA4rZ2}c#H8PWLWrYV0F%D-8Y>2qFcAJ;hNQfSwB9v
z3?IB6dKnk=3Tyy3;t~Y454RflkAyXaM#v~xMnOS$vkiN?Bp(atn~u*CYP_ZeF^dQw
z0w)-f0IkKK6Edo=<-MNtNvpIKbf%B-<rr1wFP|0PwOO@7y357+Q}xWBKRc5pz)Q4F
zE$-aOP79F_bnpz&I$m|ZZV)e|jedu)TFG?-q?h!B<jW^4L1cITId9Ir{wu-|>CgXF
z$ZQ)qnr6x(o!=(Eu7uQCt?NPFZF3PE@=}x$y%CEy9Xb&p?yg_zE`V6Y2+Xrr>T&MZ
zPOF(_DTfjwImxpDq22b}98nwPV-yPpZ5@t%a~qpD@N5U+rm7CtY7A2_nO8#@^6J+b
z-K~@mbou|B^)WKTiG;(4Dx|%)Nu@=G1^52c%guKPcTxYpBA+a$EL9&e`$N)0f%<OX
z&eKPpjZQd+9nc@oq~nk7oZPA?E%@kI_w88Q((WCfBvbk;*~L^Nf3~7@e8--yl`IjH
z2eb?VB&cE_vY8?(2=wM-6B9c4!eAN0bf*_1F!Gy4rKA{eY#}KD$I^l7(;LtIyl@1O
zAGOdEbQfC?vwMI<=0}d0eP>ISs2%bm-beqWCIC=~Q~~-E68FHZ$1f%_=Ccgj4Wx{t
zWhSy%UuaPA`@MdBG9v9}zR1!Bvhc9vU~w-%u0T+KqcGHwJ<dFxWG53d(wKl^3Wi+d
zpvIQ@A9yKZ(uMsPf)OIl#t`Q|nGqJy0bhnJpM6pWH>$9kH^m!d2e|K!W39#H8Rkn8
zu>&+Iuiea|>q0>x!#W3I7(X%vaV!r;vgjf(8(>2J#E%*pwlV0aK;on(sL!^hxx9@!
z63O9wXBq;YHT^Zn^r)BRwPKJ@FPm*s`Z?5@4}}nbJF*n8{TqGG{i_?0;xabxYIoIu
z=4w+NJ;T1#pGv{gN`rnmF4p@jEe8(`%=3^k+nn1QVpFV6!3t?d7zz+HgwVWE^FhPC
z$Mrop1aag7#hC#N;Xs8G6Zn}DMi0N=e?JJlRB;JxMHjm?m$=A;7Wf%ahG85}M*l=9
z4kTFBR?nQg9;6kxRVi2RJ<c|j!Z(8A-t03o#d0~(IlYHnileChr_8~r`b(vg%FdSS
zGIize^HE}Gs^7ow9QY6yyZE?Ixb2ddhiv*OUE%d?nsN**PaIA+ozf{yxp`ARb)@8^
zcty^lhw?9$hA?G0I&C6NYxK6a<{{P_XmJR`8*&L4Oi<y*x_NV$I4Y8z1Or!s6`}RG
zcAW-VX7|S{%L@ckgXWUV(cY~%LPXcyfch5ukZ4UHIw$;TqHe}&dIfe5vF3Umsp~?%
z6^!T63K4-IDVky4OUTi*+E-69bxfncrbO))Ei5X6_#YKq#_j4v)M%I;JoKOWpNJ3g
zq5nGF#YSM$LtY`HeljdN7hiue82D{wWp$H<-F|<^u?>v=42=9m*DCd+o%XQnbxh1a
zXBC}V5|fb72-35=evE|Cfv15gTh6|aVZlVAI5dn==H^WYL|DTRQ{O*%`bF3WB`d4O
z!_nzZ#r2%SGvn3es4&$!EJj*(a`}Rba7gTR((hvj-Moa9KD!1~-&s3;&zVn*CGWXS
z2PJ9!RIj`SNPYU?_7Wlh>c%b=-9M+J><M`pB?hH#;g%?4U9>YFN%vNK{zl{!D$Q;&
zDwfUlV97khENT3MD<3@;D@sD<mQsMDD0Tpl5w0ItO3+bakc}n~wJN{lj>Ml|TTIvu
zVl+H1@!1px^kb1eCtkd$T2S!32o&Mxt&*hYm<qWwA@FX8>Irv-n5ULpUUga}lir5Q
z>#GxGr5tp2eM(kuXVlL;+;nhv)?SIJwi-L>m^eaN#ratMBWb1_Km~}Kr#pID1nOP1
z<TRSI<kO80JUgczbn$HlT3_=+hnV%uZ<t_|i#g{%{^A*2%b<UugnfQYP1i;AmpRzX
ztYe~7>FAF^sl*iuYW3-3pj7^T%-kuA78(3&Y}HzCq05v1J?qEOzN0g3ZEN(d#maGH
zQTANFv{*V&5)&qKc-Q7eZ)|YKRj&#<dHBN%-SsW{_{MhIaZu4dv~OBJBRexI^Ddxi
zQnx8)@!4`9zCXzn_xd1-mz=Wa>cd)huXE!lGAe0J7X0j-CxVpJbs8EU_qY@pN{^HQ
z&t5a69KY&)zIH{@7dfZnaXy;h$90U_gey#DwVqqoB8U4IeJnOBRY<AFNYw+{lS_py
zZtT|L{WYSTR>HQL{QdKs9-8r4E&eJSPR2L-Z|PB4*RqTm3TVG^X%9Z}cZytD)a)r2
zm)AnsDBXgOV*gpCC=-V(I{)Xeo&c%ztE$$gLk{e+EDz0JMF~m|eC?IoyHBj0U5?>=
zt5FV}oTsbY)xmuqd7Dn%TAeH}r{%Ib$djhxzG-*OqdhckJaQ705akUaNwu~qN!y#s
z#g7Pfs4xO_0N^Po>oQE?(Br7J^NRR(?Mmjl?>wK>{axF3hT8LipboYjrUyA1vkE)O
zq3N!l+kcJsH`Ocu*3P`?nAfTn*Nd>u$PP!E31nHXpJf_L9*|BgYmyG?A9I_z&Ke~j
z^gFE9c+){AVR))q6q$<gv8>y+Z8c`@;<h5Uo}NSxiU=uaJ;1+zE^uFe$F}qOvY95L
z8wdUIh4dYZYIe#<=@h<u+iPsCX+DR^0ZXejg!355%<DA}G;2OGeN3jWx*tabnEzNu
zn97<D<ZoC-jxLOEN6=JWb+(7}?7inRa+)`;Jqyi^$@mUk9g)lGJUG?iSwSR^fV4pZ
zs<Y+Q;~-*6coIOdqxRh~dlg<AxCX9V3}{j$n0U9@LQF$}6j!uGZW$W9=YWo&*1{lh
zL^WQNQQ3D&6vH`i|C`-qza+*~o>x9%^d+Xes4e)?gt``=^ADBLqOVh+vV>}hmejBD
z4m3>WPQ9$c73GpMngklbu=VT7v-8H6oB3FHS!7EGY$Q*{Nd&|?G3%!o&9t2{dX;Fa
z|JwDpp1|)I4c&~}DSkyt)cW$A$(U&J@-VXBUTL=pQ+wd#nOU5?TdB@SAv6BQxaxzv
z4{n^dw>fefm5k1!JBN=+BRttr0rF1=jlAJ+sIy7ydhpU;(E@Qus9>kmG)MnDJ8)9I
zrGm)D4y0aZZI=R$uV8reTK|IEeB}RT^yrgtRS!mS=}!KA<r<MVCY&dcc-oa>fA1s}
z0g(n)mS;J5c^P1X4pFW>AU5%KGr2wMvZWwiL-P&9mt|prAS|eE>>!XpPRGs-+^a5h
z9~$#9bv)bw&IbV>#^4zoq~@~9q>JdjVQ>N_MV3a80Qz4rBtJA!IyYaTyz1*Sx+sBT
z65~C%x06EW+{(g$Bj^@zQ~vtb%1RF$KGf5{rhkRxv~x^iwoe{H<$Iejh3oeYNZ&nq
z4v7LCA|5QD2Ld?j?!>%p+@roz@qL*5#><MG0UzM{f-AA0sHpLD2xxTw<_Ma@+p)CZ
zm%-n%b)84)z~pNAOV|b+@zhcFs8&E`V4qP%_kv0a@7cdyMT-y((8wk<%Abg)3w5@}
z84X$!rDn<pccA%X?}0}6IE{T1s){$JM>VCOz}+)p_c#|Uceu$%ca1CTQfqg#bbcGs
zR6M<zGf6l4?CH~XNCo+=g8LG!EowiPyXFi{PFY(d@=YdNQ9;?)%69f>V^IG7{qEjQ
zSS|=Y6X5tJ`|7X(=V{K3+v?v1G#;Fd8PeRRWmOP&>#U6Kj{VY&w9Uhn2c)~j41O~_
z*;=srZKnb&gsiv-@${hTK@r*=Q_y=ECRY)-X^37O4l(FWE&tHM#UDBbc!UB(Yty)W
z<AxT3(gAPcxX@Y~Rd1*#q=btJ13mtkgCC6z71K0moNk^JZ7jFz8(QGq{h)KdEZ;;>
z{Wb3Ir^@4bH&a3{#dV5!miP(!EsqVP3;xttnk@gwd;Vj3$u<=sBZyR=c<Q=Hn7ZdB
zPyvdd7(dn4<yrj=%N+jz{55;QpFwQyN>ly(WSzGVx*$qO>QH?5oTnmiPTWB}7o4^C
zrTpzK(uj1;@uP<T3woQU>xHOe^IlQzG&?1{tcXKp6Nm(hoU*6ho8bwBR-SkT$fe(S
z?D%iO^hF(nMDQ)4*^kWwD6rQ)i|+=VHl>ja$rX-gjrtK@p^7l>w>4#8D`F)cS6*`g
zq&c{d5YmM`{mDE_J^gg9_4fdl;-rC^n7He8`g&eVRr(6*EO^kkN!=XxQV6RDvd;*Z
zfU#jC$=`f?Aus0AC2r#H0g`P4nxXN>1Y`uv@(Q>HJQ}}_iO$ZU5GFt~0PvkVsX>7N
zx5m#;1wvZ{(506hyzEb_tp(zcFznfskKLD9pDm-H9C2&^o!1|gPPH+e9D5~3<f$l>
z?&f_E1`!+U;q60jM`H#`NZNGc<gCR=Y7U^fn|$Tc$N?YbU!!1_qsD}Ex6|XVZ*1(U
znL5}BEj&JRnKU!ZD-bp&n|&jr5CkYuc195Oq3PJbol)*LgBhcil@%sK=B|c;foexR
zJOKa54KK`qBuOVPAn_)sRC`yaF$OF^2Ms21_seE$Ij(I4yvwO+0q-PW&gMEv8h5=*
zyoo3t1_ZECIFPt&Zl@N_tKfTdKI;av9FwkCMDqEGKu8Qx<}mTAt7DnvL1DbKw1izl
z2*`CZI4yIhsxcJ_LF6fbc~aZJo&u8Ho%j+_&jKC?%M@@oJCEN$XJ=^dk>Is}eWY7<
za_iyidM#}*Z2W8ui?$LSec}uD%jHOGCs4E_tnuuhSb<7R04=~y<b~0w;>xM)+=Ng)
z5op{B8-ODX%`U+7DHdNSN73ZhuRFo(L}(qs*8yE9C@QXlPRh?u$v+v)>k)Y1f)j<U
zr44WqH42(02t28vaGRdPc{Fa5;++06Um_z-*$^W@06Re94V*25B&*Mz1B(YZ1f^zh
zP!w$eNb~y2jv-U697f4EgwYj7-*xSX+P?V4b;EioDYLJC+PIS?NVLP(z}P%vjo8Kc
z&+bQ$>f4-M{doLDWo0EHKx19Q^62+%?6Zewz6-$YGzqF_tf)M2ccJFQLN1&{7M?B!
zGUOnEq!4=VyzAFVU1;ApF*Zi5FF+T>Jra#0l#CBigxbsP!@mX^4Z{aEfVHqZ#vp>&
z3V=%?{&X)+1bp;yb!lnoxJ{Y&p_vCnhubBOH9Ipi6Apea%cO1BJxaZlx^DZ%5AIi~
z7-?xx2!I7hIYGN$@1-2kzN2sr8LE01v~cDf-@$Dy!oc&}9SstV>d!=lMhzY7M+a=t
zm;}?7%!CnT03SJ{?v(X&Yx&MlCvVSfUBNsa)EWDFpDJsjP{6eWg#w=yL&hECcM>-6
zooCCcKKt5S4aX-zfrLq%&&S{+Fl!{VW-!hE>+|bAe{&76BjZ+i5}inxbVpxxyZR<p
z?3+LB#o4UAPMuMLMnYYEErmdG2-691wiZYdWW@;#cJ9tsO&uKu@Dh*dsOmof&^TLv
z_qd=hCqU7XReSE9$H3Cxf1a_Ct4`)W=LBstGqY9=0gK4iaJc+pRT793KE;v55dhsG
zU}~5pTvtC7BZr))ivX+yo*2ck<0^(WJhX8nv}088nk4|oyM1x!DZxm#U2&`*v|NLk
zm$v;m;fopqxB0&wD}!@4N*}AxYU|M}&VH7kGte|xS|S2L^qY#KH4mOK-nht5wH-oi
z0wzYKOU8Q-9t7g2Z)ImF8Qt<3)d<RE{7T5#^YyQJ3mN3^m@&U2-Qoa~zixgtuEd7t
zNh9a~sKf04#G~(uppI_Oj2`tPxm^G&GLp(oBjLwX2WsaoDqQ|}rE4A>Q0oF=|Gs&<
zt#6J2sG!+`7LlWV&_rKQJ!`lvk2HbAH5Bxm&pj+(M6zg0K6<Z?J=znR?n)&j!Ms^X
zi5GWc@WO*&2w`-0A4xiA_vx%rvgG3^xUGN(Tiw$KnKZB_S0wT<!HWPmtwY*3_=1&T
zm2H~fOiwNm0o3sz@Yw}-XUbHnmDq9bH6cUQ-!K(hD|Si>lNONv1delYaw1SfbEo3=
z%(mYGDY+355oxs+csu}1PM^ae1ub^`hAMt+8t^gapJH19=kfo5xiS9BUITjTE6r-J
zTu-yFlkazYQ@P*j@=Gu5MqCe#4>gbh`XnCP4-nm2^VG<1E2m7(v~_ee@;;r4EW1f@
zxvSzrd3i&IQaKdh;=?v@2hboYMh9E6Qk>+>t%>RvT%P(F<UcjNe3>^vHK+c|7b^tk
z(&@Eg^ShjwI`L;P81z?@i*}^@gL3ZRu3cS~X#svX>o1$kjGVuujV2ICPbo&{Y64|}
zeR7tUQsK;fvp2<i3=xJV2O8EUE#Z<Rkp0)+))sdKO8QqV2Crz^F<f=GlUWq{{OMCf
z+vfx8-SU?Xd%Eti@@c)=oc;ypZZse$>R6stXa}L1+lH$J=c}uFW^kcchl9R;LyT+S
zQ^yDI%MpYnU~I5S^FIoKpBb1OIDtVmya=~D^NYh@R~#pw){Wd~e~}Q}(av*w{^o%<
zCiiW;8<n{QGySOq8EGANKC%(LBq1r57@>8tZd)@eE$1##EygW&LXB_PjhV7(V`LwO
z9sj_5GR3L+>-)mRWxnsbZaD~w`(1yt;3urv^P{~zE4@Ek<<!Sh?H(D~lGnbv_4Qb=
zs${pboubxF^?3h&o0L>h-73R^$9ay>;%6(`V!Ob3#X<kw=?6`W@l9<;Fq)d2P07rB
zZ?AI5tf)H)GX-Hkd*hU_$7-wC$}jq=X5^X}z^P}oN0s5#yW6i@Mt9K2s>>%t8BVr@
zo9|lN`SZsyO>y-Fgk)DXy6s=sG!P8G(W~0wszK^<Yb@YT$a|^8V{Yjj3L?CPBXi0}
zGkE0ZO*Z<}j}l)t89oV3&1bu*8*bk@h3R_q_Wtm_v0^$=f!~i`5N@Z9Zi(#ruJ7^V
zaHb)yB7nt|gI_+}m%ad|a@xQ|Qf%D5c!e_SRIn3cmQR2`i&@|Ik5^JtdyZ30M0}!E
z7kVzjqMgxyp%TM^!{Md&eDFtet9xe8M@t=gMth(qzx9x2Z=d0V`xYnC3ynnCk!XGY
zP$?|4tFuZc@Uvroc06hvJ8IzHk$m~`ng{VR2G2*Qedd1aL8xlt+06TCcHzB`?*(HI
z!8ulgAF}Th3k;qnB+>Ub&E_9Z_c>ftUM}l$uo4yJnSsmqKcA1Y`FiY6Xwx6@i%Z$-
zZ4_C=3x`zpw%=#%EPwAgP`iFbhQ{o+!L!PqKekt8?ueZB3ihfzXJBvvj%7k@vwfzQ
z03*lIU-$oN*?G84!31y@rVDXGD!<B9CD&v4^=9aa`M}!%u?Pyv4lt}|v;(TgtJ@!X
zkFaf~>^*X%1Kzxx*}Z=CF&u06RYHzMP+~ORCO39nD*N7dH@ZV}C2ONaA(+7?K#q{4
z;WAu*Ui}2)Gxj6V&enm;bE6m}BEz$LWHkAWPd401DxhL$h~wv)9Ihvr8JrFm7Z)F{
zH8|Ycht?3d4JIRj-)#r0OA*HKb&T5s-93azv&CYwd8bmj&Up4cS3SFFt9q6fRbo6}
zR4r5yV4E(zyU2z=OBAyxIOtak8v_@@)T7|))xim$EcWupd!7XF2AuhEHP}=UKZq<z
zDdP-<jfFHV7&=?yjnL4uTn%1X#+^|H$5gz6P?z>Kq>qpZz2u;MhGaNB>(??JeLX!o
z^179kmq_U9rcHlgVa+G+V%(|@71bJ`1g`k9F#WB70tZ^%{u2v%(Z6a^O)|%n&WTi|
z^AqDtumQq~Es7dwqu}w-1_&`-%})-Co%}aY{$<ZC{!Q&1d~0!N3sMKrLxP2cDXb$L
zR1lE1rLa5zFhk(t&&E#SA>3<-Cb$G5dYaZam=MIT7Sc}K31u*_B~#{((FvKsd%-K`
zG?so_pjn8r_ZmJgb>?l;Q}P2P2ee@WfcYRsP|;=@a}n6j4RN1PJ8Z^v;^cpHQ~J1p
z>rKg9e%%J^U|_ixdk!c5pXQBa&|nIhFn;U>rwB&53&xZJ`JjNwNcJEEr2!UMTbU#!
z7rZhQV>}%ANB#e}0D_!GoW0iK9Ye?V|F%7@d1UUfUS6#6`<_Fb4EpUY>av}lpWbnJ
znikz{dC4(%PPb5XoM}PIpPh~k%!!l3-{y)Ewsu;F4TvAU((*aBXS=x19wwPHvj2~&
zuZ*f{``)IzyAe?Yl?LgM1|<!;R3xONyQBmJ5v3ail$7okRHP9E>5w?2biZ@odw=g3
z|1S>j^~B!$tToq+CkWqySycziQuG^*YUy)3R%L~$>0|LHbe-nYf-Vp9qEa0Uc}KOK
zYzO4^GsO(wL@2LbXTFN*aCTJf%S3*ZUZKG=HE_&CqAbLAkQM*z%*XP#UA@nfq~2c7
z<XqR9v*>sfw`<STdy${B^)H52)~dM5yz9OS_4uY4vBCW+nbQ5?3?`dY?9G~URKU0-
zhsPGYtUu@L8-1`3|D52`Lm{_IPq9ZM7tVfdN$a`jg7@X%<(|3|qLFt?Dq_<SFIJXM
z$Tv6dHdON-^Bis9;|Jie9fKVx$hqS(Co__SNvKbZJVar=tt(~~;GdW+*4^ca?MR62
z6ZtC@*JpSoS%t!-d3ySW*A_%JQ6>&DypDR#b%<D2#ZewFebVjXP=0B9*e_a_{VLTN
zZU(cEfZzA?74v0GV)EEJbKTQk_HUotXZiZyFiHH?b0NnLWr<cjpU`$%Rhi^<I%SXf
z%|m;XYT%Dc)GoR#C6C&xD0<9dd(vuFI<yzT(n;>!NB^<F<t<S|j(1dVQ%q4@r2BrO
zwQ<<9lctg1c4_w;*XOGYu9=UzHV%1bR-s<nHm{47mCv&^rE75x=fynZqgyK%lNCQs
z+f$@KBcJ2j{Sx=&GJ}v68SDw%GfD1l@wNA6pov%BD^3VQ9SePk<}Nix&3<R1V8s6I
z>sQfir^s}><^C*Cg;0pY00_hzWF23jT)P#qvH6KpK@iTEm)Fu&M42{reMaLT4eg9(
zz5^SjA#$*n-ZC{%=X!D1z`C2D_23m3<IOid%wYQrF0*I~&H=lrl9%wEvTdsw8!emf
zlZC<?XDjY$`M!@GDZ2?-Mg0{`Ww;b@E>YSQd6$yX4Ul+-I3q)sKygkw0ix;UfdJnL
zpboCNxz+(<RYT^?1!mu9%hSz>b;CdxIoxF`l*|0JlsCI=s*O4V&@lmyIDqB^cpJDm
zEB7Mcl>BKvM>S{nDQ`T#vNdrB%d9O}wFZ|2%V2|foP&h*mMtA5SOeZ)wR4m})L?MK
z#C2<?6`-KN`(XA>O-p;#w-l*g{QiCL+S=N()E#qkHpHB;iT4pWt`N}+TuFZI3y%-*
z9+ieX+0Z6Fma4v4(2li?1_vZesFg<zK~dP=-oAeG08R%wX&=$1!ka2zl?;E*W=yUh
z&c}4|MY&B!Ha%J${^)xPH{3c|$22nzO~v#T9>D;8cNw3$pt&DosQHx%Y367M@$Cfm
z3ei1T0C_-ig(K;hE_p!inU}!IhBiKgF9n(Ds+ju7MaRs$l(^RoJfn9T9^!kJU-Am_
zJ3B@X^G}be{@UZEdE*&i638y*6XW72xs`QyiMqiD%hPA~ps&rzinH71-Ew~$AJKhY
zO%hx5M2@1ALG^;A^d0s^*M6pgQ8sqLxTuK<EW7?yy@l#MSASEMm}|!;0_v18U6fKW
zE(Z=jbO<cQGOmLCCyFs()qH7$-CT@`v%9Q7XYci!<Oq?mNNc*Zky)^($$3=;^MXzO
zah|1Zk4GES*}vBoCVsg|r4@~$_TIqYZqg6VPa&0|p<?&8r%9o{(zfTs78aJ~o{fpQ
zs!)kZXOv7XrN+mDf`ixY5%723>a52<JCS<n!#d70x_|WbAb;ISI)gV?(yn{pA!-`a
z<^0UPKD~E@5&yMb069sQ$pKM8cZqj=`@mq{UEu^)t<ZtR9P0HG5q2->N;DMbWb<B?
zxQ{N#JVd&a&wVQK+bF6e41l<=9NLLD5TOH#(s!Y}mQ#$B&mx0!#dz?BIvzQ0a%jP)
z<U&WhJX{xl%6HW75^?&wZivW*H+4JJs}>bq9lWS$$_=gi>&v;gJW0H%7HIz3GN`;(
z+(6qTI?GUMC$)rXnaxL7_zI>~zm}=yP#|VLR%+F^vO+7|oZxxKOq*w4?QA>V5rwk6
zKW;&nJK4W|`R>6-Ymb70uNW$!0O=#S{~*h2X7^=wf{nDlrSQnSIu1i7SmMZfVykI3
z?3gEL6Lbr3VKMz`4T|_qKs32Lj!GYTnO7FXght%fdT9~Y#!*PvZSvM-CWhYcYR4;V
zZwcGbWgfgc;dd>!H0h>{(t$=W#V6xUGm1KUC#-R7S@G4ZOA4md;DZGeT?!68DAPc5
z2b8I+huD6}g`3Ed2K~lFcjU;^(t1rgb~a~xoh>-L<l&ISdrOO0llx3vZ7x|_r5~vA
zzVhA-E}W>LS;>szJ6ua|cuKK0S@p_prJ$cEZA!>@v=zn@IuU@r>WpD!-JEmDczZ=f
zJp`xUNOj@Qf@})8X`_Mt&Sw5M7W6lR-|oxEG4T%t47*sBvVujhZ2IsR={H;avg?<c
zGtqn|w@wN_Ra}g=uB&G^^HTaSYcB3phA^})29njpz2W-W_o6U$V=O)#G8vdJAlSVD
zlzVnW6a%D)sTD`UC@#t!f`J^@pTlHQin_#Iv~nFuW1p5vEcQ00dgSm19vGw%au3Pr
zbL3A+Vbhpp>^x>U*qX}D#f~>t8bW_|u_w)IJE4(m@kT|rKm}tJ1OQ_qdFnW)zO1l3
zb!L`FzhdIsF3=PI+9+r}Wtq+3B6Q6&X3jW!Dr^1uYVJIZH_39mdc~Apx2Mv^bTmUd
zp9dr<dR4acfS7TrrKpm+%N1gs40K|M+uZ=tr%&6j;*CQKCqoWfGP!X395mDC{OmZS
zU*hnFI9z8wB$xn<H07R#*wraTa#Q`eeGD2oJamK#z1grsLdkJgWjU7B&JL9HfsyHW
zw3smOvcFCyJ3R&2@V_5A**n`06ChE_(%IWwUmz$WkgU-?>FKYR;1#=95NIY4ox9fE
zuC0BIsd}pe4AReF82z*_eR2uK@lP)F@71|TgxzcZNsLYghgbWo?C2hk@uqIp+si<x
zG9Env$8Gq+0Yl(JK@S&FAH$NNSOrwB^`rfOk7}TXpKGl)8Eis<#;;EK;eNfj8psJa
ziatt@@hMf~Vv62(KC3#Pk??X*{!RYx*Wo>1U`j?5xqh}vjmPS!eTL)@#_1g}sd?-r
zBA%x4RtMJIfaLl^_giQ+M#WZ1$Q=(jbAAA5!=yC9WjdlVw{-&u%%C?PIU)i09>9L+
zKddl5w!*p!ZOEYt`)Lh_{`qor`|%2V^&!NPjAF+JDgcn)hE_2q_;e#$)%`-=R+S(U
z4dQ0d?ZX6@m;dtP{r+cpaSr|`zX5}Z830B?&}!-cNv`PCbNqiFzry8vG*-!i<A*aY
z<Nnot6UWn8CXuJ>ZuK9*f3S8by}hh(d6)*obe0guADQ#*m|Ada(fj-ed)ASz`nPZY
zx=U4${Fwy9tLw-^5p@cy>t(f0yYe>~sLMj?h`2s90iT16Y_bw`tU(zsBT>Sg2QeH#
z9PS{?^^i=LA5o32Fc~~PV<sk9j`Z^3?kT$oKq<&MNI?^a?OyI(bSsg!%X_1yODWT*
z9GX&lQ~=+20_uo}$rV@(a92;X0p1x&?-7bF9Jy^}H#Zs&Oza!?Myx`PKr-VMs^9v*
z6{n)`{p0L&ZxtI79ffl-rs!on;!EPL3QwN+p+-TOtFAc5+Mr_T35wA-rVD`{LC#=V
zvyz*FH19VLb_z_-B0y9!8gwz|-Rg51`Pfm>KUe?_u93GBo?HC-pUpBL$n_!HHDtsf
zCYn#Z+&JwDg(Mu!2DsK|QzLutz6zL>h!|Y^n4dqBWSMS$yt`uL^zCY5?l*v!c0DU;
z?7W&_1ct3n^|5TsMfI*pO%Y$EUz(7iz5J2KK!oE7E=15LW`j66=>#A{7UV%W|M>xE
z^Ow*F($UrJf|mFXVv&iK2<jVXHQSwxFgZq_srqgI7#0FzoWbP$gI~Z%|DQdL$5t0O
zw0N10plmlM1}v5l^bs_t1-y2xpn#MYWl|av8Y$Fo2f!j4os6F(>_z2KSAd5ih9m$U
z211blxXnO$MN6!wp!5PU-$8s&?xu=DV-9w<k=vLBV!?(_$)N>)GJr<{^y?E)=mUdE
z!s_2WaFGbm2M`qSmRO({x03`ZFg$z`Z3jPCwq*pC?B!9LAtC)r8vtg;0n`X^u`oMY
zDlG4f7*U0a8nH9xSC0d{16(5wu?s?Hg#ik<Mv4AOd`u_ibcHA3My>`z><8$W*wI29
z7?=)_&{j{nF<PcR#{t6x0M_YfX|;e1QlgMG1%TLXry7!gT93x(bLa?<VdtH*rvk<`
z3bfCA5jk4M#y#ND;sK%o2zgS70OzxFVKjqq1mw%h^N>A9!aG4~wFa=G9=ij|AW}Xs
z+7(6L4$W+O5&H>p<)Qy#pBz&#Hc42hd7{r@D=uQoo|yZx7=<8g;QQ%e`5`U?@I1km
z=PKAS02wO<K05R5BN)0OML59Kp?`z?{@{l&U<Cf?0F3MKKi^^HP*0|stSCl-R(@QH
zLlZrex6l|x6fZ%IR~iUE-DMsC-$GP_!CoU5?B@`xD{%Una}m!`9U<M*{PPPUIh$AR
z*YKgL(KW=X6cj!KzkgSQt%<b25o{T#ydnPp&;M~UWL7bVofp`{K>%)rdqw;a;D-YD
zw!Z+6i-(8>8wNd_T@~@BfGC->M?54%FX_PtgS)4{O3h(}jkr0V16J}e9rD?TcL`h|
zyi4`Yoq3>xE+V8r9L?y3jAd+g*~=g;hQvQnK>Q+mA9~yY%xE7A3NY8!_KP5+R#XlG
zP6t#0Z``Pdp3irpL$O|*Q%d_^{2p+xHR9<NKe5fdn<NK=4CKiG^A-Ty$ehP{`wOk}
zpz1ZnJYN_wAT4;rLJt6@0v=mtK*2H-F#=O-t1>`95s~>C85<);g)a4HCkM)0Utq1(
zEWEjc6dxxCn@h0EXThQ8(bv~(P{zaYgB)sLHK{%k9fzEPb;_t%Dxeu$hi&{4M+s>l
zg|#EbK`=CjbMDp9%9EA=bU^ea!}AAOhrS<4f@1KqgAgI8XKn#802CA`%3pzKwUuGz
z^rj_rE(WYW{<9~wQvJ`5n7}6n+O>ar)4E}7DGFNvFw4+cN%#4y3S9+Qp{AY@8s;cr
z^w(g&1(*8`*t6e3*5D4JjL~u~2a_XA7jbAgI0EkNHn`QrTYS5(`|pEvVe2jfjClC@
z`nJcos|E!|OO_ZSSt+CrE+PPa>@EW$CBzKma(Y@3E3CIVh_?<*Pk;jtcM6T~Ua=)6
zSXdxB6H{Ej=bNq*fh;W0pNRtyBw&MUj#mq`)j)g@anAxRN4RCDHvDUFG2}hG0#^N(
z{*fFuj!EK?N1|Gz>H8)~C!yp6252z~d|_ZZ2#jypOR_*agrXBzgzy*85rA|6;H_=c
zx<-M;D82YEP<HjZHX7Gg<XQuGRVM;6V-x=~R}ff2lq9FmHryeY-sYk%B7E@h;T$+`
z-n@ymR<#A~t$Uzk2-7VnHkHrxu11(pLM9Oh4f8)glG@*^#(|n9MtCoZK*1YB5!t&y
zfe1U<sE+}g%0L+ZCIH_>DQ*fb$G&cb1F_W!C1EDG)Jp#EsnwJXmI-m1()8;gGM;~*
zXn;zs+c%Do7Mm5%vE!*5Z>Sn8VRV<ckXdB$^l7J%S%4IKK;KeMb~b`gR*Gi&21>n%
zQ74?ba=VSOXm_qa_o_k_H7cwgsliB2uRZAHZCA|=aVdOi$<g^4RE)4p0ggNHWI47@
z_DT}C#XoTI1oS2-IDm&`G>!q-z<U8s0L;2x*_XRAU(<;nv1Yy84JWDwicRO5ScQoQ
z`LB+bl==-G{4rAddOvpHrHe2jegbVTc+${1Ktd4Wh6Ot})pvgk@sLAwq5o;gNS+_g
zBasl&J^1aCa{=lP#uVVC2BT;dPO6BZbb@~#&45IR99u0uB{x-KE<m94eu*514xH8;
zb;`UscntkKNM>D(EvFhh5v>PAGpfW?1_Ds1IUQ_t1doJ_6|J`9MqL3@m?Y-R0qwaf
zz<nNAhh$3GIa+~%mZ%+k;_iWU5#mV<Laf={eh?h)rioL3pbC^lr0VYuy$5e&P_G4h
z3g~8<@zG6&-c?1eZ})C232AJd@<JW+N6LzG2?{O56&0QUOcRwTb7i(#Fqi9k;nz8W
z>K<+o7Qi{^+x`Vvcg1Q8IviN5pcWVp*eP&@i?-^251|@9{+@gIur>(Sz-qIdsELJj
zEz0y1K&sGfeF;;ewpl)iM#1go@B@MVGr;*Ez5xh1Z6oKO$k`rdfP49AZ|XNNSm_0m
z8R|2PNvCdZ#4yxsqUJf`$vwMXJ^d6O8`8r?LlTAw&!x1x>5@qRQT)fb8j0!6XX_3`
zERu=5&8Kf*hPeeoUuD40hp(?I-T^JA7clct)sgsj4U3c4YQJUV9-x;O!3&YUAFr!`
z$u#Sj2#lS%gOhC!NyIM<3;-EGdjSHBsn0ex5NvJ&=p5RyuRsnl9y&!3SVNbo$idQD
zanuz?4=^{50xdFV=WZj%6SRBbv;HxD1Sh^%zt^L0(ZN%I7s7E;P+S***dqe72r+d5
z&k@Af4qEldApvkEXHYoW0fqM@)i@4*mt`%)9J#pRuNnl%9QF)oLFJTFBEcow5?~Nu
zm3;3Z*9VAR14+T~`MK7eSkyqUI0Pz}eyJHcgcbzAU<AB&XyghwEvO)|7Cr=wjF49H
zv*KD<=isp_E7Stu_8$<?df|x!&CnSbMt=aK`~>n2;F7{G2plP(t|;y#0$L~%v}#iU
zYB-8oL{1e*>OlXq6uRJkaQ5ziw)tDogr4$3aU)j_E)r^@On=%4d86plC`iISm`vS;
zJMSB>c3_1h2hlwzXIJW|zsrxrH}D_?JoKSY)A9~{BTQxjVF(GBRWyWd1KS^YOhAW+
zdjN*YZ6Ob!LMwy3C{(srgoWP$0v|?LfEs`mjF`89oDzK~01TD1wPWwT`q{Pq>+=g3
z#0A%8@v93S*Z=iu+~$cUn^W<v#VnQrN;MNtGI6s%nbTjS&R1ZiBEr!}k}I?~85w;*
zGlm*MxtWhJJxJ^VGhnaUQmdR7^Ct?YshSqAW!pp)Z4K-C!QPs)pD32*1$NVp@sF&n
zZy_>I^uOUTT)(cOeg`Cqpg#suK$k$%BbgR4`$hN!knKpZF_YlJHirKMTVHvY0>CtO
zKr#k45Wq{atr^s~0966P!#VICm;fvv$Gv%zog)g)V%Y~T;ZOnO49L>o0r7Zvd@wo3
z&@wbcW<%iU14uYH)gZM6HUqfdA_DVh02Beph!bopWy+87@$owM?^C6z6^wrR7%H&|
z?gxQ!IIZ&~t);QsJP#5+OM9`p4icTTI{0D8)Zb{rrg*75I(y+q)Z9RNb0)`qCivc*
z@u&1Yyca{nF9O$PA7DRObUS)lSecR48TH~OfZ~I?Xxxr&WxLsi-slbciJF}lnh9KD
zt4AHEcGN*Fhor}Fef)M!Ft6eK0E6EM4sLJ>8<=2%TnLfkhrM~^gP_BmYO~q-p`1g{
zE>Xk)?Pd{!7{Ew@ojFqZ!pH+UijV;VbXp1gSwYVXX)o@?D11kzB9Nv+(+ePY^Dg4h
zZ$}gvpjiiT0={GyHOgDZ8Y^jIhARuNLlVoKrM{T>ypq-JqFOj^i5PDcPVU48^A*8#
z7@*DV0P~YD(gorWd?mD1-%mbci2?rYn-WvAe?!f2;#VPs#-=P?7s5M%Stz~SkdoS1
zHi0%7fZSNoZ)X66GHyX|kX?rhbbj50v^jshB7ouo>Ds!GqCrrBi~%w79qNZ;2bLaW
zdWnmHR<c(Grrn;@!}Ee0Rml4lQFuc}MkaRxAmZBEtxFmHB)?v2f*&*&C+F+w5XkJv
zEn{OdiWJ3%abMhSv40VLSNP}g^I+@b0gIu*!R*bjUtV`nU7)O<(ML;Mrl2qq_cPaI
z*lRLY{3X(b$+9wm5-Qu38UDb;xs0-H>=VG~7@5cXXZ8U{bvpOp<8?<$)rh99VOP0X
zzVE|<9gN1TNf<XIBqTVjKmNFa^iVo<QCy|RZ<=cQ(qjMD%9(pch=nn*Feq1H!GL2f
z6~&I;%&Nu($Y2=uLrHFhaUXe*9j3qpg!KxyFFmAvO#u7hdy~OYRWyZvE643hSTkSl
zGG9@67Ha-z8K*8*Pl}wqH5%Xw8saNg?hZwLyTOH|6o++NZG^QzLP+8>mup+tm!kVX
zm1|uFoj)Yefgf(p_<hZtQ%&Q4KX;($X7?N{paHR{58JvGPtk=fOv`RzSwDQ#qi9D5
z>ECn6`UPx;IsZ{+=mtU}=2wT?@ZRw9I1TZapcJ?RY#$0D97<`tN68$y)=tpna_XeF
z%(OOWh@^6VdG?ZUK=DaUB3ZVwPC#l#FqssvSQpmH`*7A$ilOnnmzh$8C8k;R`_)vr
zX)^wNpJ?2V?XIld4hA+VRTc3^9eB`W7G19xLX*JvGB-CTJ!TGXAM*ComD`nixpi`1
z;eegoEiXY!jUUYBI5bkfrb~HKG&u)Z=)<gl-)1=9DlDq0*ch!tNIj|Ke)z-7szmHB
zUwECo6cC^)cBROlYSNV3RZR{RAQI|U5L<JO%VN|EMSuEeub5hH*GG*DK9bItNj2PW
zIIkVZKZzW%9G2Ze1MTLHqM0J2?r$;LTdXT95|%@!ocoT{0*iB!I7noL<sb<pO83&)
zJZBOo|EQY>?&8*i@0WLCy-_B{qi&+f&ED~oOKoL*cQJ>A*vQ9;g^0t<75eCNEgqqL
zz3-P@pwJr@A0m$)FLgE38Xcd8Y(uAKR7$rX&!v&E*gKgrY|Mv5t7pl1Io&xRH_L_j
z+Cvw^P__dH<w%_fa8Cv#5W{9RlW?f{*WrpD=>MMjH5(_9%b{_jgV)JxKB=0^?XzvO
zjzTT$!OIpx#L6mN&sI5!Z^~aIBY1=zWX2);0@e}}{!kk;i87;Ldlp)jS%ArOj}8}~
zi;m%gvC}{_EDWjg0R=9+FcmO55t5<C!@|SH;H)LzGzJZ~O93<_IjLqz;;p68<alzI
z7n}!DMWGpp#>40GLRPQZ-%!p4`$LP63PIR&(LgfD8+Xmf+Oj+nikEZ<ZW3PGr=GyT
zXub4M*YqG&gX#Pd*Dpo^V#;Rg?7DfTnSQNSG_Kr0rvU3^te~vo>1%8X66+Y3AX02|
zhLK4bz|b0|zxaZhnbygW`N5>(=C{`{oqW^y`VBURD8{Szk8Z0uHQc!xOt$^=llDm5
zyBF8ErJ_St<XSHcSYf=5PbQE$no=jd{XX3qaTPP(d|PL^nG@Es9?Hkrqv-UetN6v0
z2$e}LBA#l(gl_!eio<x-D>U*k=b>nj$Y1eX^UroJDqbQ`QCD3_N152{h_zt#e2m?2
zlr<1Z!_?7foyEcw%1ar3;?UO=PZ)cNDa=FxFE*e33WaNP@UozQfZ1QJezcpul+x#T
z_5N=mvDL1`Eqz!NT==K!j9|_nmFH*G)W{I(xn1PwQ1=b-(T#fAj7KgBiIZ`gAD2ni
zn;!m^l8{{IxiVS)F6a2Up2C5AuZSFz@XLWTWY;f84hon}c$_#~+sgAcJ9$bcNJH`D
zyEL>EB?)XuTf#1s8uxVT3zn_h*?489uqpqoJUe2sWRdt{@+0}V_^NJod!Ik%$kNtO
z-@Ne^hz1!q*Z4Q=XZFL)`d^FEs{9*V$TbD9<B>*e<j1!zg`3kv&9#^|bH)rv^sq5g
zxFvbPaW{=+bMW6+v&|LizCr_23{p7g0jUc}$lr~AK(B-V0HG2QR(KoXDFx*LSXhA;
z;=+OgWs_mNB3PcmDe?x(1dmOjBuAjkY$IlN!l5<()z%^n-wzMkJ9uo-mkD)-O4VX+
z6Vu^c{jkmR8tDW;c8R-8js9BpQVX!+zksiV229?d9GP%zC!X*^6%AONgLe?pa)B&F
z4h)PFjGXYHu(}F-w*a6l?SN8FYF~?g1oi*00MaK!C8LqWsMGBj|0iJYik!W`wTuG{
zAe5T99_zXA#m^UXhGr6<^LgD&RBE`x9!#d3p~D%rAVcx*gRWTKMki~Y?gK~BZcyj}
zP5L`@0#ERuoniqB2{5^IhSt8I|G5v+VSqCA+gq-CNIoet;fM*DH|zOU7*tb&uB-QZ
zWiEUW3m|B3>Oo-$$QJ?ds$z!(63BEw<DYM=hp+4y&7rcAI{(W9zwW<3#K5|j58*cu
zJ^&ALzt+>E9i&u411{tfxBwsrM?-rXVOt^wVb8(p6P*vv2hdWo2d~e|#kGGJq`{L4
zX%>RS!B@y1(80|Iks+Gx5t&}dj^u%EyW72||Fy;c`_^pt)qifAv;{aQv^%E=bXz9^
z(miO<i0=Fld~70vDlCV21#~&WNTm-0ZWPx9AsHeHro;$469(WA&<-~LH$VArO+_cO
zu%ZpPG6QP2FmZ`pwI7Cs`asBnck!3xL1sajobc`)KlGU-Su}TD)rWvihG_<~q{)YV
z|K>OUdn`=8(H^ujT1+JM;jLzTe>}U(K;;H{4$v6`Os~zsl*-l>n9qQ^ym<#l$ScYC
zk0t+m>p#~L-Z2Q-1dzG{;Lz-nY&L7fK;XO<`a}#u;ZP}bH8jD90%@hgPeTu>A~=0T
z;J<+gCICzu#Mvpemqmt5>AxHM@3T#B$B+g<Ru_ZJ9uPNSy?E?jt&w+lYJ-7+$p`nP
z6vNWf&D;DV!PE~QD%U@tc{LC7d?>=frd|@!S3(R9nD@5FEHe;~#!lNC0u&v5QOh#_
z_x^XKrKJC<5MTs=wD6#n3wt##Dd`(b3;JL>6#;V*C~P5p6@*d?Sqs25&+y%+2Q(=_
z#s-@G{o`-yw_(#7wY+AN2UBz!Fe;u$Bqf$ook?KG0bYR6H$d8pFw8O=&et|_Y{lN5
z^8LTp!4zo1e=rn`Pl*ux5pWQwt3ic_^t=@)v%sqov2{a?q@iZ&LWCcnQbab8*%G-J
zEFrz}r^LwUlZs%wJmV}1X;lJIuos-NJHR!Kc{0~Re_*Bh|NL@z1NFNRXn4p~XQRK0
zhQQ;v`Z3S#xGvW^idiBa1_4#x+|V-y*qDo|7?O)3vE}5W2dymfH`A9!^&#P5EHeMZ
zLgZ$`pFXUY@!}*bOgB+_u1Cdf=cdG&eszT<o_>=8k8SRw7%LwEU;UAfnZAN%J_RB9
zT>k%0CgpZrNN|amh-3Gcc&ocAMZ@l!lA<2UQM_IXURdx*P+|JGAf$2-h+jDU*uU!K
z!q7#<X<8g(vr{-$p26G*1(Wn$J9ex|D*mrODlRl}zBgetGGaEqTXZyg;mh5}gJoHg
zl&BnTqV8w?zgRFNmgYFVxD}Hj$FVgkBk#sTMKzF97keFBop|dn;Z{f7Yl6GSG*RLl
zQ&%aKzb^(gFr94jZM8Y;9UbKdW!KQ4o(Z}X_UUHXpNkQpR|N^L@22lQqj=etb*bd%
zriHuy>G#1%J>ts!P5!?*F5Yz(64$R{Kc#A=tv5e&Iy`Z9-1mH)Iv!&9d9BZPc1O!x
zmW=LzcuQ>wjV&4n0Tm%nvv|}dxS1@Q5UduDZ_ecH=SR?{2iX~BUYKb~S(!CstfFQn
zzq}?~LqkQ~)1udZ&Xag(ppkBWk%4}?5|huJl9ex-e)agc&3yR#K^C3F;qx!n`E2Y^
z8^?w-F+?$0UKpb_)Sz)>puGtDQmH3pa1V1e7I&?=JK#c$QF4+Jjg1?#=|tji<O3ht
z;a=rdXTj^_DOXqh?%*^-^DoM7%${YW(P@Krx0%@#ZdOA3;2DG$0F0D+AJ{8Jy(VlT
zvs(w492E6J>XGi+bx^Y)cxuRv!5xZfa}JKR)1N=v_PNcLf4Pg(=n)8%w<O><t;HV&
z)`wu{T8KDu1T{-zu{M)7d!YT5SZ1?bF7>@JJW|1=I~26*^7VUFjC+ugbI7t&cS7sL
zj`@+a_Z5}I^aPTQm%i790~nieo6jGo5?-n6c+=e8R}$Jo{5nV~<c{XahA%b44EjE1
zLK<PSFV<hec_Yr)((4DUnhwDsyg{`2XuZrh`i+vWe=*CUd(6vdg)qNXK{xr0@gme^
z`WEBr%-FS=&J;G>lZjmS*<<P%oKJzz86>x8QbltJMJ@%4Cc3f90tJD;UScJNtTEWe
z{w+AeK!Xp4v#X0cuGc^CsRQO7)MOP#_e`2K<?yyEMbkt_{gnnShWM(+*Ifq^(AH1m
zO6^baUtLR&uwXEja46R?OZKR;?WlBSd?nc1;Uw;TFwviWMbg4_Zla0NU!C}A9ck1<
z<)5>%KOe^JE}cA|*x{Ug=-`SmK8&s>Q%ibLF~s*a$DNgstu9%I&xdNF9&PXRp6IkM
z!+N9#?I$Tg+P1v3m#+JYj-7X{Y}d{^%Cfz=umyWVr_YEh7qE>kJQ#`urb3RrZpyOa
z9><)srtYZM(|X9iFmhKIy+@!kIFrmer@wP+gL+R3!>0~kE@jTRE?pUO5RH`1RBmV}
z#`R+Jo^xABg6z{<(MSGmwLbLGpNc1g`5AWa6n_yvFywn4k?IgE8UZ;ytd?8H1Z)OD
zm!PUu&ffiDmuqbxhvgbfHuvngm1wGliiW(@uHf*GNH!*n4_d;wGcUttqx40kP4PeE
z7%=Rx?_K?1U~giOp~+t2?h<WhJX&^lcy%$qz@bCi*rIP|x-M(Wv3ltEvxTdl{7*06
zSXX_rp;y&=;-pnZxQpeGm}^}7zTP=ItVL2Z`B!a7i-p&QAivomOshZL{1RF4wT-YO
z+BuJPMbX8Zd_R7z;AXQc{dlpai4yok;$+1#`}zK-v!(H|CGY%268(*KqtV%7<ty>?
z8UcgkqW0(X*O{bu|0>5}s}1$*-1{t0o*igH|MMN%+n_kj7x;17GBw211Owhn;uW%D
zjvf<MW3{gru#GqW45Vjk-XrK_jx?yiIiEh5`8EoejC5W~y>tHYe)9cY-AQH@&AwoV
zUM2OEz?_fx77Ss2eY%-O_Dn1EuVe9N9p|D>X|idpXwhO<9D}VihlCIagXN?5Jpfiv
zAA+WvN8^+Stun;vzWG2$DFYMf@s`%lC0A>;Ga7FHjqGdh+qCwSX2O_k`BZTGL+*=i
zd>;31R2@kcnMdPi;_JEitpSaN_g)-3x|~m2F(}03efX?J`uq3CshIEIt1z!<=-P@A
zwfkvuMm;bNQPT{2t3e@>9NK8)%WS1_U#+b1Oog5pQ?50hv8R(mA}y?oSXFhMF>IGF
z*fqc4vjY?J_bHLE(9t+7RpuXdOe8|AypE)FR43zh&9{HDTv|DxZPlOsGf!QSfSX^^
zP7tPj_SZ1l5}a)N(RgUTr%*{`l$rm^c+rKaM~3%Ec;t6R(r=5tZf0_#Unfg0<6Zfm
zDBdMZJ2kujHc;s<-*qwSiH&GRSNYq8vSb*3_ia8Ir&qb+4F#7P+b=r(?)sSHnF%f*
zwjBgGK*R6p8X54Dg3ebSrOv%p_R)Q5w(5*C&F~!Vz$R~g%cs@#<zIrA#W8W)=Xu*3
zg|+T+=gf;|zfB8}l%VZdoXgL{=UtuVE_fw2U1OBS%@Mqe^Ku~hhJr{OIb_bU%gM?j
zE-sYGDWs1z0~;5)`+RGe{SE{@4js6CO-pLYP`MIXIbDG|dN-tR`_3yAekZuEVc+e2
zqkCPf(^Q|Ri<$I{U~LK4e5pB+=*Kjgd}w>G-Bt(dtnNXz?NW5gOmZz>X@{TWCa>d9
zck^w@nth%7)LUyAGi8k9n|JQ9k4&d48J{)&lCJ0F1;M;A;;rA+vMYFcyG$ct4p|%C
z9Cd|z<B|FI`fRN8r|;qTH?eM<%6G>a{l)nHQ5aW^#ObU;+*6Djvy+unv+;?{R2_ck
ztKcxd$VzLe)mBSw_CQyT_~{sfL*2toMuYDj`zpheeeL}7>(@qnFyb@<Li%Jq#e6oq
zxKGXxF&)3Sa-TS#eK8Z8U7+8RnXWExwLK&f_8}T6Bh_}!ZHtaS?;UAxmN#}1KD_)b
zjl0k_Bg@Z|c{Rj>`(!tTwY^eA-ah?IbS6x3XQu3#;diH}_@}ESd`BJe2fl~WKa$Yv
zZ!;6##7+0a;;Et18Or?lEj`Y^_dz~|nV~b+ln=G*@mLz`KcJ(I82R#@w4<_lV+yD{
zDW5dFLy{u}1!X}~ru@)7*7i*F{rOJ>5_q5O>L+CzpEKXQplhR9t*13x|82)hOHV&z
zC32y&v!n5Kt2KGiv9H7ox73GAvYv$C<Tty7eIrAY9`0r{amq-$x5HSBze|YJ8_cy%
zZ(P33=N`w3dW{$wNk6yG8|&~~_YTZvolDJ`3onRR8fbeP@ow2!@g(3VwzRuv=cW^z
z7KZF)w8xYv`!1#IKSuMXlsuKF*&)^2Y_KkI*drJ=ya<R%U{TT>*9c+{CE2Uwj$tMd
zbrrgq9>ySv@vtOVF(c4AlSUHr!WXll3hP#3$b%bN@|pq9n$f+x4W>x9PHwc$hPpmm
z^Yxe`ui59&`zzO7C2-Oz7->Ffxz&2Ib!2n@f<mv7$@TC3!;Fma5<9)fy+5e@&2zeR
z&mSxw&`W(8SCvNt1hJ(J?GmiV{Ac#3TiFjP*G99KX3L&^rs1~YIdGzrA&i#5f40fv
zoy44CzDktnea-e5gU0TQ*K;-_PfVX<R_~<B0`uMndXeWi4h}&@jI-&cS-qmTRSwb}
z+y9N~!#l3ABZWR5+hxzo6~8{kR^}1yb=8}Vk$CL?-j8GTN9*HhJ+{TR`Oy&{;{W$f
zB0tUELEdbx1W6OO_qyBGqaRoj`RM!k*fCPfANMlcwF#%Yj%SRARRz~Z?<fb6hYt02
z8Z;BQ{`kDVLsIJJh`aj!9%)m`J<S(szq34K@0eZUX0~tqVjnK^H1*7gjFSa#I(72z
z6v{qyT8iKuwcF7uO8eq~?DR4D>f0g1=&_4)mUok+JYuLBi`!$WJikjJhub)mIs8`1
zt;rxxMG{aeW1UoHvuS^r!8rS&xm{n-aLT{SL5ngY;9xrBPVp752HslQ^Am%uTbl2+
z1cpfjBrk-1h*Fk0t&z(LY6d-PB4zLV)h#UCg5s6;U+b&!tO#e2W<dQ+&{|3*UFr!%
z$DHlaoqe^a^|{2N0vBG@st+<#-B~KF7Naev&Dl+!+AeDR&m8e>hLnbbJhxtCv_xpG
z)t2;$4za{y=Fi=RWbJ+4Z%^rX-pb~>yi}5;o*b<vZ)azUz8~^T{z!%kX|9To0LDM9
zV2sQex|*%;@$ZM!vLbL=ng|X@Q`|6i9r4`8rFI_&v7*U`weqqCu%4dYupKUQzOO)0
zz{vfL<|kuGORB?oEzk8E>nkbnN9f+lJ*^8bi8=x$F)j9JvE4s^3A6I)Y)E7kLmGSk
zI?>%;%a1&3`6ANUBvRt}so3o6`Nf{%3{EsXWqIQF?;(u5RXagUQSz-UGo;PI)061U
zP1rn1Q4HctJQBF}XRh7+Hopj$jHpsd9_LB5wSMmfH>r5v=U!%~&r8n<@q{X|yrStR
zDNfNqo8{H~?0og-D&ZJEepfnYCL)&cG9wGI20_x|=0@Xa5@DbB%yjzuyjv%;O(`>^
z?s9JSKb%fSJ>6O!by(@u4*T(3tN$=Ty_jE$ju~mz?qyXN#y{VR<+mevxa=#DPqEdW
zNg<(^Lp9RDwZG)w^=cn4b@jUU!oUohKvC$w)tX$tJjNv`B!Oz|=1BvDH?A8u5a|oL
zA_go9ZG;8`qC{(E8_$P?#z1KvcmnMQ@$cS!20{;sX+c53V^D4Yt`rY<34C6xBg3;<
zTAASLb9{#e0{;w48UIChGlLyG+;Xwo9!j{Iqp5Cfo%zv!hE`8k+%U`!e@8lYaxRaO
z{@q!>=u5#mNf^6lrI=*KtVGdcEK!>7L&dQmYr;bGHH<N@g+cb&J?`BK)vwH|ep5!q
z_v+nS=)2rgJDwkM;)RhKrz@FlN}o)hGtk{){v$(ta7tfhc=cChTjry%NIh!rbtj`+
zm;}yi^Klt5-?v-SHDw9A;#-GW-x}$QmEyjYAv^IwUH@n=J9dkcZutC+hxRQ=X<VWK
zjco_ZN2k)hnx@Y4i#q2_|6=2f1|QDvA^uY6D<&rrw`_$PPIDWZ;-3CO&#(7KzwrA_
z@*2(Nhlidd>*;MePogJFE$pN%*Dl0V(EnY}WZ9eHC@MWLgakFW^gYz5pjiuAu0~qK
zh@|O0xKYCZ+#M!f03-+O2rV=MbPfECjhVq#B#K3<P9kFZ12mYDL>yQEp4c%PZWjQQ
zTZ`GYkW{yE>nH}{Pm|svfDZ-TZF(6$pZZZ~&>-SdFn8=g?YS8qd6&N?UznDde(ch#
zM=(e@Z_EVyY+J_=XZ~7?wdHsWH)XB~!47LRW2UcuCew_|Z9Cje5%qpUkz<kS>=ay8
zEI+>FpgX+58I$N-E8OVODwi&E{BokhP~RG!&&PIeZ;M$wdtRSa#n2=;X5ZpaWcT^E
z^u9&=hUR0XB83MJD;8u|qAXg;X8iqev;Jsb)gaaxdaUf_@-U#{59cst$++rZ&Z1R%
z?~ldLc?S_|2~(}%Z*!F-5*KX@ui95~_n?m%+?mN1ZV$jTm2-+XtECEf;EiT6;7Qhz
zhQ-y=&fB)Uy}xV1zvw1!u(SK%T86!5<o;R4BjH=w8yBXcN2GcsH|5gK)Kyd~^QOhE
z#V3xR74o`gZk&CT3K_|}2z(r~`ToCsfj1!1VTYvxAOS*}n=@c!Z2>BA2wKBb!f^%-
z0jJ4_63vD(+MB^R9f3yxyR2epFDZA-epdnDvxtpAb__fF-Ep30`ZS^D6^wy{V_t^5
zmdQ`+yO>|iiT=8U_EraozX*btx*QQxMcS6ovl)L!EITA*Wank7WO7dpAk$i(7I`Gg
z&Z{g|)()@KWs)ycmeG!xluH2`8m4~^Mao2+xFhV(%+480xAafMsu`V<ow<c#WWzt*
z4D;s+eb!ew%~HF+5w5Oe_jA;>>FnX?v(D%jl^12o(nL1{Z_BUyW2#-cK`Y>EYBybF
z8+>$uD^tfBEz#P0gTE9@z}aj}*N{5Eb-cDztvYYe=1j8XG^};9_3LXl!biB0a9sGW
zG-(8}u75m#u`csQC-Y(^eqPWjTnhU^E~yqb`s+sOb=|TJ=R_fv2ppHDtX(3}S%$-<
zd+Y95Wf%|9A4gYW(vF_uxlZ?Rs}8VN*@fcz6MMV2Zt?GbrOjhJCXL{Bw`X~B*s@ab
zLU7M@zln%Bnn8qo<Kl*qCy&h(=DvGn6HSoc*Pbx)9Bs?g7s3~IqKl%SY}_s$jhr7y
zSo%753!`Jv2|<P75Ye?jV_F`789z|K(qF2Xk8Vfp>3~y^pPw}Jz}H6r7YJ~JCm_3p
zM#L=s7TRm2Fhm5HLj>R<pqpzBJR5Ml&O(U;Mu|g2Bmo-e>7f4rpi=~Yj8K3a+wcKZ
z0gQ8K^dWs?0C)jzK@`wNfC@$a2jC~5M+iu?Yf@5afbzWuqun}JRKL(G_zyvVnzgV@
zeg^U)z))c@em6-V1Uv%WPUf~f4o`Rbkmg^!1^@Fo1%QnJ)&l{$K|>a1oCweaa9<#)
z6Pu9%vS$xmhJn9>EYE+Dk?>*s>}n7c4ngEl9nhM9iUPb10^Ni)i=Zsvet4S>K|-(f
za~I2Rp5-_Bsoz7y*ThueNnW|4SZ{J-qrxTWL2Gn?DSF}~!G9>C5%BTq)^CX!Tz6Rm
z1|i``jiqRnTk3~%&L=%$wT`Wj_MhUh7!xIjQWIp^ON+R%CsQ_Xq3JP>=0xEB!Y}HR
zWaX*Fp^?N*FtN~dpdxBxyfgEOab}0w@otLb72LQXQjS#8Zt0*T+OZ)K;ahEI7bTvS
z54xX=rnW!OBBD(VcU>aR_f8Kv#lQ>E>bt=H8|LC8`&)dim7lq=z=LnXhx_y}MK7i)
z^7>7z_KxvV8rQ!or8D}8zWUvNFTBOxe`tF<zwYW=MX}LLY=8KHS$5^_L!MN(Td8Ms
zBX<b3uBF<>+~+EhHGY2C2QtlDfVC)$<dHvHJ=mIYm~D2bz@*P28xiixiO>iV)BSxr
zkJT5~?-rqebVb}}Mx2pcUafnYfn67PqdufGQn<QCF?$;_+LSTn%EN0DBpD4+duQHj
zF{moWndzO`F>1aFD(J>p7OZ2f${cRbvKX-`Xj&}qQ+Zp{uRD?L>7h4Eshd8ux79KY
zZ_54L%0izSZ@$D%c-T6PN3YL)^iAQBpvx~)^O)p~Rc8<1BB{FC>O~*I!>o_!0_E$|
zxB1MLO1p1}9ydoi(%F1kYj*y-ko<0D&TVARLz0NacG@=d_Yd;*oufo)UvYpcAVAO}
zLuKH}BHZeca&VnX_doZ97=?gNOB(kXAUj|OH)^20fUy>EZUNE<HksdpunA!)1NZ`*
z6<9A0aRKUM4v;U{HDEUV2=EuM(iVWk2eOr6Fd{+_xya}Q4aO|(F!lktiKhT#MK%g(
z?t~)1X#)c_z&!#4OaQ<ffMSQ?3_#n>U@HQAgb(HfHa6p$IUud5SL<{Y2*5z*E`vb>
zU}zz^vFMIhoGyp$Wa7C<4UoqOz`6ngeZUn#+<NZ(i3QXQh!6n<p-V{ptqx%JO05Pi
z16+wt!tExUyojd=8nB;t0I~~IW*Z!01_ihp1c{8VnEPJEELmRu9)pImk!3Ip4aqfU
z`E1d~UA|uWMmVHVdEvKku>a~urcU8WdRSh11*Oik8)d2h-d;gQOF^XhGM@Jl`iq%Q
zsM!xOvkP*Etv~a#&-G}}`~9ToDwMBrVzVo?(&uPXa<p?_5L-yBF(&&r6K_h{Yi{oj
zZMJWJaod{T^tZ$JJ4~4oY!U5P?m4|&SkQzSb4J|Dh$+*ItwE30o)VWQBJ}Ix!he55
zLOR06ZsyRzt(@zvkP6<FivByx%<l;YsczeP++O(Pu^zNH4ys3udM!K=*x1@{X%(@^
z-@haaL<{VvP$B*J-m=^CLUc_1))?CLh~=F<%5S~tNn(QKg5_eCy*r}vs_!23L~1vk
zmi_e_{fHUE)57N|AEvQ2v(6{Z#Mhc>PF6kU>~XVywoPNHZA4Ia>_Nuz<w+~8q!QkV
z1G?P?iky48H3!pto>-lkceHt>Fdr8?^7>B`ZuK}F9=zk57l-{9su93wIkRdc_Kw7}
z_Gfz_F$>9^skWNG1~p!26aEqvn|LocS_`jbR&BHhj8`f9Hd3%8W41H4w!6#a;6;tA
z&o=kT=4)IjVM!#$yeLwhn-FBqjT%m4EoTitISiwiq|jLs1NnCa4ma<5?Bg#_Fo#YB
z@Y_@9HE175<1;JCCT#F+S|<u)hMI(WEo;}<u8FM~vGJ73pP08Z{Hm#%Tk6_#G}G)E
zjqxXY7f(bwL7+Ux<MN<)a>ID5h5ML+w?ih;pd4e$kN&UIZg8Th*}jyQOWvWSs281m
zGtEP8nTu|dat4z$|BXJ1vj=Nq_%YMCV@bz*xqDj~1-rf%n(qnz&H^TD!+NWwx!@v=
z?cD8rkh>|2gn6!@qzN3p0=jwXf6QA21MLvh<`t)##5<oWC}6=31|^XGTy;j!_*MZG
z;Lip{pfU=0=`LVKfP)>Agj*D}-H|z*kFT%w?<#Hp)Y83t`BKPcI2<6B<P4uE9iKjV
zQX|d)5>>Oavyg2PfRu@~H5X_z%&Zoa<ie|jo{dC-@>Ddi20#n|@lFBNJQvi+tNeV<
z?3|o}#Ur#dN{hB&d@OA2fePz;Q@#hrAXtP?KmaC7X17wl_4Qp77f%KVU|m&hVPVv-
zUwUZKEK+KKm!TKa>Eb0NB6<QP2Y^jMU<sUZ7GZb?Qk`w>?Vcy=)v6jAowm2;=jYpD
zOqOq0%|px}bcvro7IY5CSy=q0V%Co`zZdGcD?aY~j&+Y%s`4S@)MTBx{opU*Z^dgJ
zPdD>2Z?aNyM67>Vi2qY6<4sD!n?j<SyH-J9qn>a2E2BafQVO2@D(%-Ea!dE~Klxg!
zsE*+GcV`d@)E$Jfe|()zqD-1?-$<CO@CXZgg`7FF+e94ajYjtVX6IHBc3)#C`8Zmr
z5zhI!cpjc?id*R}?(oWxmR>mdL%&UCKB^|ssP8K6;ggYdD03<QMm0@u^;Dt+<Jokf
z2As~B**a<5gTOCA+gta%o_zbII-`4s)@FiA<BF`-yStl}HYjpD^gwD&71tG#J0qDC
zlzbGLZS8%sIgV$;^XoD4XUFL`bVSfteAL{{Qq|+WCda6IJoY&D&#%8A{OrxQ`ZMf+
zoj>TsjjFDXk9BKi9}a(`r0XJWD<|B`r@m;35a8t=>FUT+n-Xd^a9pRNbECgLOQ+P!
zHQesZCpWrl`15~Q0J*($$1;z{QFNFqAM+0%({Wvc{`RgkezE9NB`Z2fepc5d`D4<i
z7PBaKLs=~|Mf(IVwyj?~e2+Ppq7|vB<>Vxl9ADqKcu77+U~)mV_k^Ld$!a*$oGwPH
z1Z7dHl1@W=VaqtuwqE{8bCZVXu|x2+r4_ya+Q<s3Lm#dR6q>y9+4&+hb=w^8>{HT{
z<&x_Xlr;X>&UboE9)>rUv!Bc^Wte?0I(F5#)o{u$xjz_ElyJE7Hok2s#k}8Q$$yM%
zrF7fd<MfpGIOe^8ruofd6`7k@uc=5)&x4bb&hk2+@JkzFh)ljT*yd8o@SB<UHc2Q!
zn{Re+x0s)O!?xr)HdlRg$339J?sxpwmLte6>%)=cx;mT%JHg%pJpYiPtfu5PmU^;K
z7I;1szh8Q{V<fs`aCmZ3FU|^#cR6isa)jFWu;AxUd@U_4*(Xn~fMCNS_&I@V0_YE`
z>m}L~>8gR^x>2`ve<nKk24tJdpWQc=l|=)YL$K4St`-3(w*};%$QcJ%bb%*d$!rp2
zxb%`H?eoAZ9BsJBkQU^zigX|rb0Yh7beX>8odhC>0bv06V)I@l4~q<dpwK>h34kAQ
z-#S1)4CDu4??J8z2v_*0Z%>?l7PzgCdw6<2U`*T5ofAz{?rk{AH)tBBdL%zqc(^`w
zXJ##;Ue0gDdg|PQEwnv25I-cXV(`kXYj&47TuBLVAJ)GsCZ9ZV&$~EPH3U$|T+pWn
zgf>54<F-8t41M)UlN5hQhFN0tG-R@iwvF*|u!vj6RL%FgyHE3JgS2@&eCy0*8;|SE
zn~jzGQ1}ERGD~gUbG6d%OivEw;&Oj32`BC5(^MZPU8-SGa+H!1Z~a!ShAa2rDKDN>
zR~V6xFWVfEnD{D=*)VaypCz)YayRBEH)fal_t(r>qRZo|V}3UkY$tl_UlO|O?$m!l
zw71`tk0Mp?Bl9bWZa%l38r(T1!o+yNhw&m)T>}3P7mu`?1koBn<WyiNxtEKcJ&o--
zdhRs7+l?Qdi*loXC#b1g3J%(t`%*JV$aJmRBA3_P+}2F*y3)+W%q%`QmkrsGj4#!_
z5KXI-(@#1#obwS(={@&7DfxQDSF=O&d??LvT`0<cg~jEVebVLHYV8=^W8RYUnW3Y<
zYd88oFO7A-l=)&>_K*J~NNgpGIB0+rOulXj3+U!AfXMgm@<98$`}es*0C)=J&$a8<
zzyH*JlaSB_q*zIwdYidTU?U3J{^HXO3;6aV8#D+`fir;k27*rqB@Im{R5{>}^tXn|
zQaMuaxyK^b0^lJ)CvND|gBu{O+6o{%4-mpi5_i3hj{YFMu&`p{)64#}ArO>%T0U0v
z`2F9et-ZYuE%a>|6K^skOk?QygFvT=s2!|Shf2-m5wT#kpNa3!O1>I3OA|MpGTB<v
zvurM&59?(*kd+u6-y&I~OXTE~=t<3BS&aBI+F$bApK0+0o;od$Tq;t$_tIL@rCpy+
z3nud?Z>n^OzP!X)^_tIWjHJxjwWirH`gR=6vD7nqCYevlN%}YIoGYA8cP4CA)PhIs
z*ltkjyt%}uvNSG6Uc$JTax3i2+jY5J+%@Zg=dZT4yDH)z<(Wf;2bl#j<>#(mTlGM#
zXHTHg<B9TiE|pO~Ne^nr&9ZNdn8w6#S(aFue)8?w?ETR5MiXDFiM5*&T|?7P2`DsI
zOU3QRUwkpt)X@CNuEnJ0zFKW<YE6U7B$F*s!x$>RCf#^uH;y`$Q=ZTCyu&=VrO;ra
zY{n<_Oxt%)5&N|xQ^cK?KGNh{FUDQ_%9!O&i7npWwkflZDb>4F7UE|$J1YJT=bP%W
z!Q;=%&)($a4eU70Ji&OY-};*|S=jv*-X5ovqG{wOw+7A+JEOuMchQvmSW-nvEN=Ol
zh&aqn)uhMfI_p<#Vud|s6CuqO*Nv*`5&uw9{gCnSc=_n;ljJVSvE}9C*Kq}m&GK^H
ztd2*KF7emHnsA?_4<<~t$Tl*Es!p@&#AT`DPJUjbqAgJ7R2J&2WUdu^?Sf%YHE<$+
zp6}Psc}Zn6uY!uSt0ODMLV?nkRNN)=k%FfM&T*Zcc>C9{?<pNu-|~2C93(k)&0X58
zNH6gVUpjIs+qLXBA3aF_!(9;5&m&sYO+||mAb-VH6G}dtd7%0<v`OS-WUxf@11i<i
zv(^h_hrNVc!^6VzbmA^asc)iih_keVn_~3u+-RfFjZeyoAvdF<UGX(8h?X`9&Dz;#
zdO5@vN-Qt)<j;zUUZL{Sdq(eL9(dIBvSU!QxuWY(u-#IU>8Bp6dvsp$#MmJW)2AUz
zk?~+hnpZa?PTQF69B*M^*|(oh&Y@tZJYq0sAVq2Qq&9!Rv@ApfDjkIWaeDY1&Gr@}
zl--9%M-QI$PXmB$+V6y$S=`0!aBEgUQSmcq`Jm~Bg@W?V+U$$|GK)I^v;xiv30R%D
zto<T~;GU7e3S9;W;0aKk%`Yu2@mqF{L0Go8=K(1_MJ~k>lSU9SA_AqtYr?{Ny1bfP
zif}T#Vi3M8F3!-OqXdZ|cEFe|2S^|S0IObH1zHCnFbg_x7?_w4;vrKcqo!5_xDYtw
ze3_nZ?u;f~I_Kf!bbQCf!{fGy^ZEC>{iZRQ$sfUs;g;Z9$?e*}wy{}rv+Ea{NY8~<
z=4TDPPp#2~Ia}}{N?T0??f%omO!a4?P6}Zyn3G4_-zDir(II8hn4C%K(&~!2|AUs6
zP}^8*;P-60VU{whhks!DLDA9HEMw`R`ebH*5v|cQtDjMr+MPh<jRE$ty1h``4TUco
zqumwnT18A-ziK?VL4s-uJ?TU-shRp=<#ftFr13d?WB>lxyvUvtWB6Fu@xWzy{NBX`
zKXsFV+rIBT*TF2kL_aZ~oumRbjrB?Fi$>DA-2i*3*+G(g-uMR{Le*n4NfNU=q_ger
zdrhb*Ly@NUdCO;?*QC<!<Ui9~9w$8C;Y}O;RF---Q3SWnC6uMn^*ryQ=VuzP+-cm}
zBzDYnNnb5XW0tds$DX2!n7;NOqhuAg3!)gQ%)Wa2#@*V(k;}d&58W2XBro1dyL<1B
zuoM1~E|Q?KDcirX{0lo<d?mv^%}nPS-6(y0gNb$zW!KC~7uzRBmrd;T9J6gy-DgIZ
z`BP29S<^K0#XP<S%-xMVd9T0?t+X@8Bi9eKE*pg?x9?|lnejZKDV-|<lU}X<xoyc0
z>RONF%E!zmMYb{+T_)=ef>JtLP8?PbD5K@Oc=Ep=8Ae3T#NUA;Ba5&3(ead(FH<DJ
zu5gvS<!~$AnaM7R?_Ke?tZ}_*W^&hR3JD7j_1xu_7MFd;Y8&q6Y48<Oo)%C>ZM=^Q
z9bM?w-1E&%*^ze6nJO+{pC_FPWW>>BO8I^9LHW`64Jk<%$L3z;(GOuwno6!A34d~Q
zD2)~G+gz4Bx#F^1xY?Xm-#Sm4^XTAb!$yWkdl9><GESzOedvjO@Xs4FSrRV3@Pl1U
zkv6Y6raU-nz<xTZ{rnutxhngBixVc6vxKF+^@(B$`V@@Fj4&Rl*}#ImMsX2bX)n_Z
z%t>#Q#grGf?eUZ^ecs?~B{ub9olBT9OTe<-@mWUIqY4{B@|Gaw-E)uq4K>$aIHXg?
zO}yT>!+4GxGz&yGe{MhSua#ImahUn^_pH{AJ#!}I-SIEC8`B#f6SAZ|OaA!Y3~2s}
z_xck%DZ?#W71&6KLK+lJz%&Gv({Dgooctq5^ddYQ5Beo)z&GWH1IuD6z$_BWIy#o#
z=tER0Kt|afWj$7Q6*j3Q;9-Nq!hj(d_2m(pfWRBb%{~K=3L%byNH3&gSXfwEAcQ&d
z+(pR(T-bm{>FCJ!v^)tc<TLvFwLt%5dH5scD@I*>saf-+2l5_VAGWr)vrL=)K>0EX
zN+i%!1R>m(X;T^N{?Ct<5kWH$3J8U01Nx7}XTO_#z3bf8zXR1<6&!wneCFZpO-W7t
zz^>z^GfO)kcqmwcZY{#Fo0%~~7{m~@5K#ja6%{i?@GL`m@*VHuqXjKDCfb2zTBJ9d
z%E(^9X5{xS_%Zoh$&aCxB!7lgt?l6L)4bU+v8``T%OkjDnyrsT9#8O-OCOrZe750b
zeR(k>lz59W-OofO)~;G!mAv7I!$=U{Re&XjkClcy;2S&NF-hPz!I|ya{CQ>mT%s>E
zmb5;UC5-$<ZYqU3GT3k9Z+DVUwmS7RXBq2yI1)sqVBE7G(tMZPXx8;K>W*@bi5Jb|
zsmm?0MduigPb*6s(Rb;6G)|Pie0fB)iWehEC^G#)B4U5DwZ@unmjzduBq)Y=HIJ2#
z!-TLvw=HqoYbxkwlnBcUGtB)9jHxCv7W9|+izf#>!$I%p+<S*Lzc%e%znd7Yrq+sQ
z9bWdE-@cB4K)~$Gy}yim^^iQuJFIoPajRE1JwuOXZj<92Z$bu}K_N4mx%2;#^&aq8
z_V4@nEjuzAsEkl4%1RQ2%7})@DA~%)%1E+hm1s*g*%^gM_DmA8Dp|?SDtrEqtLO8K
z@9*#B^?IJT-S>4}@Avy0$9bN|F*$WCA)<1~F|b&#K7aH>Y+ywDTk@NRE;d7ARE|4Y
zD)LzP_mag=7`!{}bCdHE?>DuFy#sYS<|r!VPT0Dsf1(Ydw)QUE*AlV7c8oJBi%W_&
zn4!7bgZ_jJN3k)T&QZ!%8z!olvx7z-+}X~WmS?0hv3N;x`it07@E@S;^GcdIKkP(V
zLYnyKpD;QTP;`$f`T0XOUCFZfV_6=DDB67TSA<8E_q;n%zry7g!nHilQC(u)rj#VZ
zLCHWpQrFKGUo(22cZ1N?u8Uhd=#76!INOsIzEt-=uws4i^c%`8tpM;3u(fUJTWYm!
z-F&oR>I9oiqNnycEnVvUD~^gM)Oc>iDK_;F3~#wmC$-UKrk!_xvc&F@cgKQyjbr2m
zR^Cj#D(yYdbLG?ARw1FIseKuNw)K@Gyp|EKtvm%XeL@Gym)mDndh1jSD%m=Fy5chj
z7mD>aoVW2~?7o#L*v>I=s@vD=M6WG-jz#)JShJ9CxtN=)WvlNkUfWLFpk*EBd0HBp
z>U=}t`9rtH!W|e&T6R8N+f$u$cJg7x@9R^qHcCAHIr3cOy|Td>vH4=E>qjXYis||K
zdQ;x^rb^qsa6L>tYw*qPsN76jVC#u(LpMJ)=arl9$ZGjLJ6j|dQcZ8OPDN(DmVVkq
ztCQ4=MfSu_??t=)3cp|P3E$E;GuRv4J6S6v8Ev4-{h@$XdYLL0zf5ylj9U15N7$HW
zq%K;RilLQPjOSTt+M3VHK7KG~cF{gus>rI%a7+2lXiQ(YS@gU?Tc3;Ay%TvV+vbAh
z_h0aBdhW}>#lD@~m@lr&WouvVuAct8i@j1oqlqK;>MInYdmpYcf>JXPz-&D7&UEn8
zC$C;*@IMhLVRyW$rbfWOqIb3nF@Fp4f)-FnzCfMBLPtSU^U2qj#{OSlUAZG^_x)DK
zO@Br%%}cMZs$!UK1b7p_h{(&Y%aE3{0E3YQSXNS>Kd(Hvf&|3^kn#xTviJ`iI6u}^
zbZHAKYZ}zBw`|`&${!jYo<>~BRq=2&<-Kn^GoXQ#f@uleE3*^bnHaH;BYyBg(=t~Q
z&Dd#RS$;E%Ts7?L+v}OAnw|#+#=m>F0w&Jiyyx4sD+L^usQ{QBM?|~;-{PQ)kdTmy
zRd=!O3TiC;e0+*yop0N%-r`qDNT3tJ$E)KA0X20D20NygBqyK5@3ppU-kc1I`&@_}
zYG`YxK2qbCmhQ@pR*9A#o9(Jfj@hSw^zDk>mdL`iYEkgqa)xwNy|>OwL-jUds_?JL
zhwqFt?|&=b9m-T!D^z3fS91ziOQ=%jkWOy-5vY9TjatepwFLLBICb4e+IjEYR$DeF
z$Q~}9mn-R-jdw3{4ZQiL+%;i5GESZEQHA!vv!4MnFE;O)>6IGqFIm)4X^>24FER6S
zAFGaSk)nG%az|}A#pTmfh4PGfa*JidTUIMiwck;n>Qb++9#EdKi4X1f>YH>g>Wa=2
zsHxqwIJe7&Uqfw`qxkvBl=ju6yf-7FEFV-;J#MM(RZ_l~{$3=-UoG?&f6efbxoe6>
zGuN~-9iofJ{C~NvK6YQ3F;(`BZyAs7tQ%XA&s-~w`gKizr2ooumv6hd1zon9D_dQN
zS2y%pn&z80H{vIvyleFJ#+pLv#|0+NRWpWTBfmzK6V=*8+ux+|=eF)icx3#0F5|b}
z-h`G*i;I0H&)DgA7n>!RbY-+eJ?jwphWx4Jitb~V{x`SH>!ce;m6dHXD`x9m7GLtZ
zl)rD79=&BeURP-BtrqH}&VR7hZ8f@XP=6sMYO=COSyx&v<yQO_-}Pq&a(lP;CyP#?
z|Ngjccf8$}<`-9#t-7C2C@SxrnH^PjOmX0HUfPn7F_h7g)1+**cub|PR&B(u?vcxc
z<H}-kagBt#&cuzPu&v6vN0n2w23mFU9`GbYJxypyZx>Khva6cbV%c+WWTRWZ&|Wo*
zk%P)dr(%{0#naS;jt=&&YShNDKHAozl=58JDkWi;@=BIMN+m_YRq+J9i1<9-Lp5$)
zMN*HSIlfhUxir1{mH4gmoT$trkEptiW_Ep#-qo<BYbk1xg^wj&WlQvO)k{ixFU|G4
zW-F^S2xL}V%6!sonHs@LZS?5Z@syY87F{L@P}k8<%@+!N!k>H7|Ju)#Yaw;@pRTk&
z>ZZ$EiJ7Zv==fHb5TQOYMg3TN?rzuFK}qSA{J7J{iq#tG1xrk|Ug~;ysFn=<{=!^z
zs-){xS^CqyJB*J+W7(IkYn{&Nczk<RDdnksA<Oa!-AP3;l!=r!H3aVO$T2?=E^f-G
z#l+5j8tKZB;&uQC+PM~#=lW=Tk=8V$9BR=F8rtzER}1xya_Ecm@RX<PAdSH+zf+NT
z*l7aFNDn$7a`O!Me1^J;cy1(D8O5GYx^38|syaSpQc+Ps%a}Q|%cdon$2_T|q~sFM
zbZMz<-VJcAU$9#Fs3UOJ-QC@KG_%kRGw=2*dU@8md2@ma?s~cNT1&$ei&4hwF8(PX
zb5~B7vfXnaX=ZwR8psZ337pdR-|w&b_PW7^LhfB#e>JJxUj6luA?%9ia0J)G8iT?)
zF`XOf0zZP9Y_gVIHcJ*(#niGny>%~V5ni_NI@F`zUX+!v-0(wR>iD<b#$F@!g!?O7
z`?>eCU&)#X{WaGlvYbwNPMd<ZCg$QP6{(YJzne{hCfZBiW|rojpB~!K7@HU8am$j*
zaQRid(U;wu#LTHKJ&Z}dAXKgGz0I9mp|!E`;~DylLFpnrnjG^NeZk}rr(0fr%l_0E
za_pDV(DuNW19WB{c5AwODi70|lTHZweA{<YV*M_JoL?>)2-*7<rQJ7_HRN`kr#UkB
z^yXRG(joOOK?hOJ0w@0oX>aOPpI)si>Pj`%!bw(D5Bp8k)adQGyv9!$EONMLU);Mh
z@%YX~?Pt$kB!$<r^~V;fzgG%+r`(=6$$$Sei#wT+G~MdB)meqmUzH6P`wepx^vJSV
zWgF%Udkl^)zPL!oCOVRNpX|Bm4Tj#3F8^abjv2lK=`jrepf`*hqnG^lIFyp1GIrIG
zRY7&{&(Aa~i=+2Bx!mYNRwsUr?i2Jrq)%qa^3C~Feg6|Tnhp=oLr)By-)|td{VB}e
zwZZCHc~_)l%|kCv%J1LG4qqHUoE`Bs*Z+*j<mx41vpu&Cw!Aq&rJvX>wBJ{V%h!~%
zdNSkJ#nBe>sPUKy)7{z9LhqGl%qq**xe2m=>Mhwk5)quMW{`6%WaPz*C#%b8AH?Xn
zxFjCahUWU-^BeEqChi?}y<8|dqT_wUecQsK*i%BfHO6-5?qw<_Tn_c{t5dw3WxheD
zZ4+<KvBxvweu_~qqWt5pJ&#^KGQV)id9iC`DbCz}h`#b?%V5!Y_3L>?3LPJNNXAvy
zIE?nk1xkk&%t$ZIu2$$yGB9xqiaj*sDet!D`-6c2MrAZ&V+yA`z+Y8uO}T8!urDgv
z*o^$qBmY<V3l=9+UOyU19oS6P+1RU*F!e6>y%Z@`Og_T$U7`J6O?4V^gI;!Lv*1Bd
z%7v&TsZfu0^2KhiigR7XThemB^i=9eY-H@Tq_I)Er(b+`ePN+xMs|uagPf4i<gb(=
z3jUYn!E*W5>=D0&JtpS2vFxktK7Da7awU-``+cLIoX}Zt@aVZM64C5i*6TU8EBWG>
zaB9>|Ke82i4x@M4R7-J&rL6^Culc*R2Q>$)?l>3^8~={cyg`#t?)vVu?_~$m+tdom
z#~;&jUQ3{&(#i}EJ(NRz`?kZ!+tTEQt){jMI}I3gK9Nxc$=%)V-(=e8%Xt2);QF?7
zKaUV!zQN;}3c{28Y@$pGq5j`(zOu);_BylAw|Y6=5grdu*#5n{kkUFlMT%#U`grNq
z2hCD$QzFYY3|GHs*^4eA2W`33yJX{VeD>LoXumiL=j)tDL>uhG9vC$=Q`4VFU#94L
zKk8{3qkk&jfhVZwxPQIcdIqxdys3_DWaGVE;+Z}Tnx(-FhAJ$NVozLL<zMf5BGq5r
zA&SrG?fBC@f}USFXTzg2&pxr1K}U7=;cHCNOPg0GbR{0pU*vqxe#hsD>bbg#H(KYZ
zAD_RtHR2mX-;p1DFE5<CU&PI;s(&mZNv?I`fR$vNxa%FezAOFFt-Eb&`;(b@g%yIN
zmc&<mloCz<GG{<WrTxhj7~5z)9J8n;Ui3t*jqFS~`;mETIO8$B40r9rhc5B$I|`zd
z)FZ4>3Jwv?I~W=7+OO}FxHy*1Pd7b9CM7(ftE-~bnceG<VV!TGb+F9k@r-*ed#=mj
z53ZYyhri!u-o5)_aGrZ#k&BGFo|9gNo_kI5uU{iNhuRJZZ3-G;ys0v@vnXTc5tomt
zp3IWyt$C?=bAjrispM5Rg9_Srk_p=v)|N`cHmPfwh#s>P7FA2r58a$~=9n3;@THh5
zmhTQHNF7Og)zKiWHkNk9c4<X;iPG%b3!{RN%#ZiQk|Xt;&N@B*Ey-HleeTKvXPbu3
zg(7RG?UnJF9=jio_pn`mEExJMz-8L}q_kg?4UL8VblB*csc@LVJln~0_nuJLcY8n3
zVYA=zV)<mKc{ACwIsbg0dyyNc8<!3|mwx2+zQJFm+p|_hw((-zou3_`!Sh~pq033<
zjHUEkx!H5uD{I<{O>*Qz-R~J+Z_%fDa@Vi+tG{W8_~=V@@W;iM&ZDmC;sQG(Wsfeb
zWB*2dbg8VNW4hSfYxVT!qe0gETvN}3IlaAjW86uu`OFWR9mdl$+oIl#bKY<cD3c!!
zF6lXUGT4dBj@G5+dsKAkTd%<{mJgn+3+X27F7|S<*-)7{wDLr47uf~p^CwTW@EWIT
zbm$dT#QnNXAtXdI9MdqE`ja8(!-b$2rgjsT%c6$#TWM(--kp>`Iw$!ks+#S_;ljKj
zoxE8tCCR_~p^>NSu(gS1U24!tbid)<owIq8(bIFs3NXp%Nyjw@xU^&*(iZM-P1;WN
z^9z-<#<q(e^cMH3B)P;Eko{0kbXRM;IER-zI9xNDp<)mH#d1&KjBxd7Z_0-9AH!LW
z@lkqj#ixrm7weH6M+<~Hto<MI#e8`GW`oAzJ6Tls3hAx^x8FYKvaic!yT!IDp33gP
zTzaItB;&7<jmfkW?bJ409IsMG%osu*S!YEp(i9%dc1fxYG+yl1w3>N$oB4dtL)O%2
z<Gq1d-;Im9p8f15XVj-l(~K!lr`@if#vo?CzMDg?fAE+=!l-{JB>;L`1=9l8(_87K
z#5E>v1<S0fLWOB@o82-^#OcOfuW}w?1s&Rk!gac3^FPuLyy#K6Hos%%?jcJ%A6qs_
zDw7hL^SjGJ!X379$qaqn5X|oLX!C_>?<me~R9Z50qnc&2A+dJN2ZAZi@%6=e9Gd@r
zKB#NuDC_%L?>v3q4lXatr0Akm<}<N%Y!a^OT<z~`UgezTI;9f1UU`@)s3v6Ork3-j
zpi`N$Ma2{r@geDCK{rTGj;2mre7tLS>0tEGlI>4xGb$YsTJ7t%7B@^w*R477iUcXI
z&7F)L{rZA-`LfTMQ31)}pW^eYQu9M=rPqSv<0H;S`>7l%_2{lP+fEs~Vdh;#^WpJ+
z!L76cZNtnOkNf*ewFG)f_2z~(!NG7NvY^pY%%XNvYkPDAP5AFvw|3?2Bgd9i-9E58
z(a=_~iEbMxUUy)PTwiFxv16H$m*am-BB-1*InH{~>fS7S-YcoMxY}9pa!h^O$eN~X
z+Uw*#e}N}6mXUjPO4JHstHRFPtyrwcnOU>At{p6Vxvp?j{Y85_>odnzj?G*5F4mga
z=sOQ@aGpOlKXB^Z`^F8sM4BeMZ}z0DsWZ-~rU&;wE*uumpVNGLW5Y<LU1wu!HrLPN
zHGvH$y_Sy@$(YE{2uZJ(r!3e%+&*x?&bp*I<`j40W@*>FBgIRw2xQTPjAN2<x1W>C
zEt5Yja!A?@)Uj11fADg7wc})&={UEy#Wsh8N7g&Ij;wPcuX|FOBDUwqT}e)Ad(K;<
z)k7a?Jx^tNR+Rcr98>dlFJG$^aW)s+=fZOOpcqTEvcJ|qS>?@`2~yCveY))tr}yl!
zdBkZ;>mY1gD9Lq}y|ww_#-2#6*b{ti>E+1_pIipBCwuuXtlsB5tooEu(a5pW@4DA1
zs^M#Q21<`MpA%^+XnvbdVyG^)KIfybcAI>Xdf~^J{2S{=2Gb?m)s<e3Zw~ArfBcIw
z%`&k(?n~CZhj7jjMZXZC9JAn;lj4(iZcT1e&?a3rF(_PS58{fq_xo{0OJ+9%bzu?H
zS$1#B-6z~L#+hnsTh<ErPgET&(A#sGjWI-KgC&2b>FhFnNWHgW<{W$R2jQOPUAA>>
zM@K5O)xOGK{<{|-QKnfc?>KkkE1&VwZPyPj)9<@M{i!a*zMR>4+-`nIcV#O*tq;Wx
zr;-eNLurc+g?WdmFLNcS*La=oYENIduy)`w#c2+@wQaN$g$x3r6#{eyOV@4JBF{Mq
zatJ+c>$Pg)mdJC>oS3TewVgJQR}4Mh-qlDoZ!5wtuF!4V{J>z?=&9}g)WY~$M@{P$
zP6m#~?n}$$y$@p9ZhAF58M*0HW**Ry)DUK=cr1LjHFaQMRK@T;)BJGFuLcX|%Bu3R
zmBuRu8xGLkW8ZN+VpHGon&-<t%er|WRv>I51v|r@bZ)^LR~*R=di@(?xPR(0T2UQW
z;;v3TGx^nfM5!T7lyvFh%#Y{ix&nUK3hCr#_dF<@%oOOoZ6`eN>Q`lQO>cr}Z=a&=
zLC0IZqvv+56_cOc;BzzTR$ZodT9T|bzmDBO9TB-hdpLBcO{ia}q=~G$Rki$DoTo15
zm@Df&%69#XZ~v&lWWr?ACEGD|HYt)tYt_la46lzkyFET@V!gj2YarC0e_>1a+MNRe
zv+^gOT34EPkcqpIpZPi@%xUT2w}s14%u+*7W3YIMqxbc#`83fm;euJuQAI<TzoN`W
z-UB0h^4F|CFWpH?j%7DUW8a<h{&3S<8P{co)2oMqRwza&40iscF8P`@X>6t*Wm~9A
zJ#T$5h=Xpl1lEUpZ09l)dB>q+eFIUWi({davwXribnMbPBA+szv-t(zTH<UCEbi8(
zUhywy?3Oh%e)rMSQOdC5I#bv4<p`&?qW3fLid>y9rCg4<u2536_-vv!xOt><tlhAX
z#QBUm){sp}OZKg_!G2$>G{d(2Y~+3`e&lvh_0CKT%ibbx5)RwRov43aqW0P)JjS3U
z60ac@c_870B!$*``_{t4m&sQYj`@FVWtsSD`8K}S!{!*P{!>RSd1^=1`(6IcHqEsa
zT&^1nsLFZgG=JD$lPFbP$Ccj1OZ$lYMZ%W4VyA6a*EkjQY5V*=tEd@V={TrL1=1`(
zW?!o~lQn5R-!42P%yd;Vs5Py}qoK5vyWsTT_~LwU7-_WlQ_bOpJv-UGTi;Hf#8WbG
zy`I0e`@$P~VPS8dSSu-myJ=AhxgVV=DIZJ9KJqUAIWnWJF?+>8c%RPt=wfolZicW@
z%ijjGb(6hISpu!UXEiks>uAWGcQ$(2#r2YUCZL`?ttU*Vr<IX^+^THGpi0QL=0Z?P
zw`^L*y-?cjtNsZ##_DIaiw)DI+l^jVl=e=3+b;Y=&Dn-q=jhgIy|{`-j_wVsu~`N$
zi%sqQawOUtJhupyTgAuLnH6mi8I7^x%kL{O?H=$D=H0*l3@|n3ty`7A;dKB0eTgHx
z?r!?dwGT`is5pFxS~V-6In2%gZcQ3Ac-7Pl9@oPBD9+Au^V9mp;g;vlY@*tS79-6c
zd4GAa>9&nFUEq}D^7*+{Ne@5!TXJgaetxB|5Nwa!{jvYcRi+}!(<Vc0DnfO_G5IuY
znV)Dqc{|j_{3^M;$ln^wJ5Q!1RD9jcTK1kXHtSr?_2@|<33fWZVA-`(0vqYbf6g-p
zbKOdgYAn8^7ky3kgJ4m;lT?lw`HJ62<Kd|9soVYBC7nC98g25e0~{o#Hx1b>?J%L9
z&Zl^m+oqB+-n~J_>XXW38=aTxCWkn^aE<G(S9Pf!*TtM~xXsM>(eZ^orJMiA=K%&N
z(M3~-m-vWB#$4&R?PzK`B$sDiD><#w&E5FiS1+@cqK2VNsKfAQu8Z6Uo2@r^D)qxc
zSudNGS0B80fBl(DrIU8HdlaHsi>O9EzAut;a4yy(pPAwA-Se{}B~#_&sBd0Klueyv
zmNs{?bkMWp?fC+Ot&_7&?Rj@C9X@ib$+Agq+a}&NsHd^}0Q)yO>g%EU9{iF1PW}v?
ztv1QUvlsT=dq%UpeI$aJJGSxq)8YY%P&Wz95I;71tH;9nejl4Qd$7H>ZdA$k7i)2(
zqtEzEm3>kFgvW(==%M+2EShef-hF<ikk&(daHT)#%wmeiLj)R)OYg2`Bu5*T`mK?#
z?a=iQx)Y_Ty-p^>w}%PEiZ1I4&!77~=P2-b*|ezZa~Zq0)m@j`?PS(M?=s!Iw7e$W
zx?%FM)ZiAE;--hw@5-aaNA7bPXJ%Z8HXItY`5>q}sEPh;*OepPrV$-_YV_T1v?;ZV
zj_uNmpV<_uUf*0Ay05rh^Mq0$L*~J&K@3~2<UYy!EO1m!bTxpX;-E#`$JUv?ZTYh%
zk<A0n5z>bgbW&snYx;Uht@AJ6TK%w--OH`1(IekI{ZZF=CB^3{A5P|euAr3XG!Jv>
zFP%G@=f6EDNUSj0ja~68*?Z|P-o1OZ{6j;#Y7Fnj+Be)ja*%~i&ZfO~u&Bsz(k#F%
zu1;a5g~Kl<<fe6EG3)NgmTtvGhNkGPmXGg6Q5zd{3(}a!G`RIQzOgfkGaO`D`2ql@
zqI#s^aeb=d=<|qMTqVABneFX#gM(F9+m<{!<`#cnM&(@5EplqsuKzM~tHvmamX7X3
zZRlq1Zy@?&-n{wNaD<V|>I}{qm~N91l(XFNe_H+5?89=?2fc#cvh(Coe15TmA|(11
zfA@X`j)zJmHrbjs&e#1CQ=YwO|2}4T*rfBFL9W`==aks(N0VDpj6{d4so&PSx#{V-
zxU}{^E8;J2-ILldXnwP3Ux7|P)$#0$?Js{h42nJKI;1h@l|8PJR|0*V#E#DG>hznO
z4{r|-8kvw9wachc54dx`<A&_QOiZD1>kYl|qHp?AnF1OO_1Tec&wd;Y|Gn+<+&$Yj
zqV3m?o@1D^^r}_wQavsoc~sZlLNfD=iEwD%;}!?2G}jfUV2jB|pI46CwK4LSQ@Nx@
zSAAbNs?waMrym(jrlM0***KT*Orhy;h}Umv<NZ}umxDvKswTbc-!$ZpT-+;r@5Jyz
z>NR(&$De+GzS?Sk;l@S&@urW#TkR7Xf=5@i`*VNW>o>a<esZ|-H0$aDLniA)(vP+;
z$z&&lM;7C+y_(Wq@@RQ8WBsCgm5XwP%J|ij8*FD|-%qS?zRv#T_I12*?$Dh)|C=D0
z5RkF5lQ?o+^l-`Go`k&GgPl4l2bwBwcqC37u<rJ4oah+)Ra$mYoz82^P>>D(E!)Fx
zi(+10ULN7hJCyf=sefv2uBJV2fI|0wOE_f_T@;Z2?w!b%?-Pn2N9i(0MDi~L*WSpO
zi($@kIqcqk)|Yysm6z-MgPw$TgO3&S^xJ|%avTGdS2;tn>87a9E3c$0C>+k~pU64$
z*IN=M_`6L;u*|;2&ABVoH0k7D__$v&Tj+}&k+|e<usLp{lD9hpnnZj<LKs(9S4+%S
z;}t_TZripPKKM7W^&cdG?CEJXH>wlSWf@8b`x#ij7dm-@0ea)cjSy(8N#1B&Z=DC(
zK_zH8qgo6OS2pOq{eH8!4Mm-7*M)0>Z+>j#;pO#cErfgzLH$Z_@j8sO1Y#xtG@c-e
zk1=mfMAiH4dr5W%beHuiKkbZ@r^8giR1H^>O?NTD+A8Td5;0N#Kenzv-UXA^TLuqO
zXo5x<6g+Y0iJ|9`CdAEb!H(1P#@-aza&@uW(meY47f|Z~gWBQfH0gNsp;;GBDWGN3
zqtz8{ZHKyDAEK`H2_Fek_b?G-Dqzov7<15ZU+BNu@w#5a9n~r@M+SkK`2bkl36ZXf
z-X2in{{R&?kGS}(tIe~&2g?3DG43N0nH)Q?+}}LbRw`?1h;afmFN3n39j(^vhp*iN
zji1rW3&|R3EHR2E;5IE7&{9Ta9#Z3=z<Qe~rQb0&Ycc9jY?}Qgwa7GAn`p?ns~{4_
zFWzpFjj5lIR*Q7In;=lV0h*vN403>`-UR&6m6erQC{n>j5OkcN5a;9N^#S)ch8ZeB
zd42{D>-RvtF+|_kkGebpOFbd)<}x>a#>r_mv-{=0JBRqcd5w>jH2W%qY3S(4W6=MJ
z-UFtM8;{G$-MSP8fqFu70_8FqMn+T2apc%^N1@b)9wTqH+qjxnm;nK?>HI*r&F9xv
z+cBH?9G%=bSHEmT6G330Ay{JYaPM{%-E2-$;nL2t-hg@_^=Igqp@8-W^^Zd3kmzeg
zUqtiU>o&wyqQ&By<Vg~k%<I3su{ZykvzLVCZ(=ylVf@=xPtYBSTp!nYq(-zdW@QOu
z4$QNpM8=1jk;>N5St1k~vWocJhC%E9v%#^EtT}f08A9)N9$W6P%k-YEt}biey7%wR
zItvb>D?M#iPlLg~;$U37C^}@d^xfp4E=Lgjhw8cPMN>sT2%-p8<PT9t!%-)_h6yq?
zP0ea@#v`)m@(nriiw1n`y`VGe1?O2A6pzqL`5P3+*T6B>RCMz?!R&ZSs7TJtY!Aqf
zFJpO*fX*4*>@`+P3PEgT@9y)y8GGH454}o)lK4WAOYS9irto^AQFEv{i4mgTp6JVe
zW0jXpUWK>6b&JA}eyew8rV!LR2^L~Iw4|Zi7wzFQ1eyLquRj+O-~XK>glfcU`@C`V
z+><c*>sK6Vp9HttV*xESluJlxDj^!pP-#7CIam|+dCbW=zn6+&Ax7KFJDi^iuug{^
zy(x~TMx0g9mqy*Y$3#e&e6P9_7#s{W1@R+GKeMs=4<IW0?4V^eR5r}<Y>90P>2y%9
z-U4g=ZA{8G=h^5JY@FJ!uM#R2inwC3P7%y7DG@4xM2`&`t#KgJp4>YsBNL4Qh~<?P
z-R5=c*6EbkU_*iZ{1-TndnP7Su-T=hrLB8k;}x~_^^~RE-O;G?66|syj=SGoxc1kb
z|9y7toH$PyP*PCX*6q)Wd1QnSft@4tYrq<eO}U^epsvi!#x^u!OG8Ub#;*`WG~*<l
z<sdoz{7fc0H{RL#L{(e+Hg+5ub?Dh8?&0Ea?8%P4H%9TBxJQislIJVf;wjJ0nhbwR
z_z-?51has@VV>+)X4>B#zkV3{3#yu$1KXNZFJ8U64=30ZuLFr14T+133r&T}d)o^z
zo2A2X@}mn*`*Ls*-~YLb0DDzq1}1%dePZ1B%T-KD9Jul8M|bB!?~k89ZC4rc70&KS
z(3#lkR|aj6ay+S@BYQ9yl4H?&8{F4+VBgEW)~}2PcF@`-!3uhQltJgPf2)P#_Q!^7
zR3LjM_}DOP1*Nk3?KXvF7{VZQf4~@cy|t;Ph8oi%hK8}1I#tjoOz@83pn|N`+wuz<
zqu7r)H$HCZG|7JHzTyH+70tW_0sk-mqXzunZmi_0a7a8vPEH;sZnX_(%aM-KF(ZOw
ztLZb%8<ldcbHp$Ub`LE*Jz-EBh8n%*=bfP<($v({lBpjJJJkeJj&-IU3icN<uFk@Q
zSbGk-3yNOuCOGwV?5eh)L8}Q?htp%(4$TC^%9`VZW*S<u(oT@?z^EI#pTfXUWrrT;
zw~jP3<DKC{LuT+8WN4k*fniRxK9OE%3V?_cN+VNR6N-PC{l6DtS<>_;iR|dJIJ9q-
znD?T?|KRcRT4DOUVU6xQ?evW9rRm`c^w|%$XL5n@H%)Ut!LJQV0fidwojY%#5y6O!
ziqJkHp@$U4kYGQ5Gcej1>tp$anVFf`TVRjJYxZmB^gyt{(YNv1uc;|RNLfOJ_neLn
zed*D;Zx!%>7r-En!2xKh!Jn?ob@QPcwCWBkVRJ!7&(D8*;rC!OMs#FD*x$gqgo&Ad
zviTbNfBk^Obp>thXP83eMMoMq;lIANx~a#aqpn^FGe*n6up9m52i>s$A*1;zO-rb-
zs0eBu#Dot#=EHmUWHEH>wm4l@cS>;(v#!RNu?D%d+FK_pbebu{l9D=8m^e7@q80sw
zKLhs2J&Z2kHo*-}hT3rA^XIqVsUS>o97i%RARq?Q3!wkI7ao2GZEHtergw;m<$M)~
z$o5l&Gj`zA)Xp(0!$X8TOy%X&p+m2}TqX851Bc|f3m4wu$w@elY%VJ+BizivYbH>P
z#+cJ>bmpPP{IO1&#Uly^l!u2$C%W!VBnBg(hXZ@2CC&NYWuBK>sF0LhWx&>BJUYz;
zOEv&Wg6XGBD>@+a6alp_=gprYFdsLgEcdR`e<I`gdwUQYoi#MuuXK^`K-g(QddL-H
zO_fzuD+B%Cpx~f$qyI7Djh{!_&i)?@&2(_#=8=!9)E4l2P*AV#=vX==)VI1ck))BP
z`LurE_k{!hy+EepEpn2wACF-+z`cEWqvp@=rfk)|8RcBxCkp--cK-GDs_Ep<U%a6A
zudw4DL8khoq8Cqib9lW|Y}z$Fm_Xbijy-F^A7!S$jpdYoavK@e#9M9Tc#C{M$<7Ml
zbILG(i+Qxh13yU^JQH32Uk5Xlr=Ct;+1{?D;7OwzXvl^(WLH;Lc$<PDDVhxxrPg`I
zqG1>ssKQ;vb?^S~jU<nF_mg0wFcyOe0+VcAu~=WhHkUiyDtl-0hW5-$UG@3<s)#>}
zsHYqzUQTGLLc^KxUcv?>7`H0xJS~AsH^gomvSrxHJ>G5?*&KP~pt_de`5R%oAEs0<
zqzba`K)iH!`yo)N%fV(3e|{NX<8sH|K5<3pzpHnASu-)4_3#?z9iY}tFsc#^_|Of4
zQ9zW7-Cf`gJVZF`G;R|w0)7I)2naoXA9;!&Vk8ft8|K({Ls2ZeexS|HP&5p(9y~{n
zcIKWYp+gA@5}=0Q2JIr4^R<ErqpGA7Xc-2JlhIFXbVw~cj&@%h)*8Gu*^+XtSYup0
zp9$mR&Oh!e&SOh7T=;ld6eF+wl1?xotcBzM=SZ(|x?i`<J|I(`+5c05;Hrd|?L+(}
zX7%-vN&T}o?$#wMPJbO^Q=E=MSO(%(2>J>nHzIpkSRBm^3<}x_djM1X8=-K%<ko#G
zbeNSl0Zj#r$SqcVS7uBep^a5qItottQ{fGJqya?uLL5s7mEq1<g!F!Dn?PoZ2ock@
zbB&-6H9gkBDwJu@C5sV#JU3#j9Rb`bcWn-&+=-A75@vU*u62caWYAX_lzCI`<l}39
zsscy1iHoZtXzKx;n(<hF&TFGF|JSd$r**e}y86e1Atdh`8(Rq{-1T}{O-&XO-WNZ#
z|M^*jMNo&P6s;*+MT{uGhU=KlI(^bd-+jTCVBv(5dk8vB1UMhTON%hJtfs~vUY-c8
zK=%)kkUrReN|S6thyn5zve4>9-a%{*98qG{A4^2UxP*#)xVRM-JQB}F6cLH&fdlBt
zhbDSRN8jk^=;G9`U*#x<WPb??4Gm2*Cd9Ndj_@3Od12rD;$p!@4JoNh5p@p5PQ?>_
zeLi@~f`Y+t5{Ba8x^>r|%+1bLx3{-j6@A?K86i-HOL)D>2b1j4-0TBYhwB$?`p>cq
ziea1Osfry`Ri1)h$hK(ZheCq{XfMzQ>^fOSU9yn9^Z)F5iIs81u~Vl`K}~^~kn6fE
zx-u|%7Q2f?tanVzj`H$yv(H)Fn5CZn`Q;%dliZi56!UI8Qo|Ui2yPMKxGW-ENS4@d
z-}UQPoe&Z8{BAj~LfCrz6lXT0|4gLhV}$Mn4#Suq-9$*`!1mxMH^O5r&P}M`4Unb|
zV6%d$)Ic=s$UrKBDMIGr2c}a8kda}=lxf>GW#||w2D8T^Y9(a6p!hKfB^1?j=X&fs
z_9lFOW6$TdG>hg&f9?~0w)2yQ*zJLL?-D<NnkrTa`UOVN%^;oxp;bbx{?n&y1P^fg
zugG{kJk2!ncQ;;iIrnYA0Y_su1mpZM(RvM$_V>O%d>%ba`5`?$J>s~6xB=QZff(5H
zGt9bD$D(CQfT_6W3Zs`qzDm|If6?}dKHA5@guF{k>=6;CLOBGHCHIYg3bcRB^|E!z
zTk^`VXk&yqmDSa&1DL;ICAa-uf2cX;7&#F-6G|Iq-||HeO<%vuVh8Vsi4zgawtcv+
z!wy4C0-7&ku(j#gggxQ&kjNv{GyU{j3ClEvRe2a1I%pB`Cr<J4_Qq^TLUM8d66psB
z_z2yaG9T)TV4Fd#5{tyW^;(d`RzI&!>%r^>X(B+t01UxxftVl4I7EO8pDY4hHzXwA
z?VkI5xoQHLTpaR0Dk_BB3-ScaJW|8Uh=<=rcCJ-qCB7Yj8bJm<nx1#9;piL@B4ree
z@MDn{Ad3P)H5M+9Wf2&_y@Yri$XiM~I>K?k_DHCvV4SkAei21(R1Nl8DBQ&&U_c~o
zKiPAP=%mMb0mdeh5gf`X>_0?_^dN**z{De>i5O%nM6*0n)99%S+x?D#H2`Nd4x^Sh
zp14dhGBQLu1(JVc82@op0o^0t9$tvRv?LEANJ6BtV#6V{*w^T<Z1ylP*rBCkM~PuP
zQ!xJ$WE@D|4m2D&pWYWQieaBGR|mk)dj-~v^dV9z6u3H|5eZ;MXr(QL<N*OBz_d|$
zcz6)t1#vo{W1|4V>%IH-T}Edcjv&R`&hZfkLZwf46e3rd==<lEL9=d$J=lhf#KX;-
z;wVAReEn`Rp@EKGI;o<y=pw0*khphC)T_f$Qnf#%xqE3Sj^{r*A<rooqSz>}Ly@Vw
zMOTHhJ-XoSo`Ug3=h2Kj@BEB)Q9~pAw|C^-_T-t{`s1V0_~{34l9uMERc2;Sn;hTB
zb9lZ`Ur7IH*!W25cvM*GqLW7CO+heF$Nzr5XMF)Qp8yo_NK11<Jq~7#+}7w>maP|=
z(gP&m!f4wA9%2My79!_l;F1o4S{fTcLxH0$Ns<N31W5*Iu%%FfaH0%)ShfhBPCVF&
zJ#N0n7{L|@mgYPI(368RsS1YN5K}OsVVHFCErA*lRJP<m_k&d=TwD;B45wfy9o(DE
z!$CeK8j=L_q^ic2)YNh_vGUOwd`C?$Z-jaJ-`Bx}GwXJg1|AeZD{=_J1f!l(ul-*S
zG12ru8K)A8(mA=ig1KaZw+Lub+-;;$d~#*aZEtE$XHvFlLz@C0_>@@y07*FgWFsVj
zbR3%Gh1uu{|BEq<4JF+n#7haIfV4Sx8mdaP*ct>;3z#%o6!tE!OnB5RO_YQHVgT$U
zZse(0Xf4dvGv(ZfsGG#+&FUp9yQHG}D8O7!QIU|Wx{z<njlnV?OQZ<4sf~dr1(;~e
zIv$6H8e`C89t7|7CEbkjs<x{OgT^?OMBo6=6@>F(#;XNu;nOC5e<vaw+H)FPkO33p
zEd+%JPNrqN*5!-D@e&5J71V8rr)Bh#qJJ;D+dcW^{oXtFU(|^T_~&a+gm}2=yU`g!
zZhC&q#-k(7QFf=2cTHbeZ-qZD4=ZFKwhcowfgnW(*@xJhCkS|G#cjHR;75?t7~l^v
zVp0mb$h>Kj4}J(rbSuMg7%-6VI2Y+bM+E|sMz{;&@e%+9@e?yKBjhCn!v_L!Iy_C#
z#BU@RyOFts|BPuL)Od2mYWntPMGcLU4>rF%{a~jd;vFm`6{q{WA;`NpK?3FS*@`#)
z4Ua-#fN@)UyXE=Efw`J4vp)mTRgOzFTiSDb<D;n5R4{5#`H+F`r{-Y0jt>YL=q=k`
zl(p-7q+4y;w23^g<1Xlbt&ZML3~&Pu;19=Cc2)eulPlmH;T05QfIQ8)%?Y()|2(sR
zfM=g}lQD(X`37#{`+i#>Lsnf~{Z3-ZgPpX}(TsrcDu7FX2HYQf_r#SV@H~0u%-zph
zu_v;?;t~OmG`}zy9lK{eo|->kG?el-ARHM&+6m+6hX4#y;5`t7JOO6};s(4D0!2Me
zZ>K<dZj3;V5X&<Z+s2IK442yrm<I}m7xJt*NXUs&F7`_~PwvnW9OL84{JR%`6)CST
z9t@ld>D4~V^JX`WaPDV{TVwRp>_X}YjUU(~uF^o}9Y#N@kky^j)x8UWJ|j#BXg3~v
zuoI!ucHA9ekbD4MaK2nINKK#)^Ynlz1K|P)D@ahxLwm~~*<x&AAv;D6;C~n|Md3k^
z8Z&A@(nZlz89)%$0LJ1H(8`cIb7livX>7mza@raglMpiYJvb#;hNvM3EKPs|0EoHE
zT3e4RHSGWUuro6^)kgf%IdMDApfToH{=hR!gO?X_ayg!z($GM%M$m$lf{o)gP%73}
z`}&}{RSg#Utzu&jPhMIcom;Z&x-r-bApyA(_m#=U6O6rt3~r|Ti~u2O`t^|(Zd(De
z3kaJL1_vo}-jLhwIN4sZs<GYhrAhrZgg2!VbH=}Z)Dp0t9h^xIV4@mAxKd_qp@B`(
zVb3?~>y@euam{P5>I5r%S@i-A+Yb<+dHcE@@EgvMcV%K_^@o<gI;Xcs_rw?pvq4iS
zG2uQEXLxCH5+ZpK0NO{O;`kII2t>H{AzFrugdIR8OLy-GgdvHe07DcAF;Y6LLO^hE
zHJG)05D4?~^RGvcF9N8@wsu!2G1v+bQ)amNxVy_h^&46CaDfAXWbppc-lt%_FP{HS
zN8(HTLMZr5K}feZSZcf!U=AU{OyX0Zq+I6}W1dS9UQck{_Yh$;bSo$!IDzHd4DB|<
zJU*o4|NF9_P07ho-@ku9RF}jtjdCwCvNLYjU(WPR?PZHu_uUU^Wme~T39hwe#0zW^
zwmayl$Wv2O3086|^<#iDo+R#6f~G8@gJXHto$x#{sr;8O`ZWwz0Gqc(5C^d}9*l?7
zg_Ie{LWBbrAC{899Dof#VD#OqAZSL&!hEO)xZ91xZ2jzf%L;C>p)c8dxGic84b1X-
zp&rqL0z?gD1>*L9wR#CWrfl6ehvqX#_Xs0&|H%_Spd^-Lo06IX5z{b3LLu5?^>FdZ
z>?NyudKeAps`WrzPXUe(!!O<dyHF8~EhyLqaWP~dUa+AU$q9P!U?X55CU`R>j+oN(
zhi)=r8YUVFVL%Z`ogo#*gQJHRfH_GP!-$*ZkL_TR1@9BK0rCdeat6d6tX!^>WXN5K
zm>85U8vv`_+er%vGeU&}5^_LcgCIr*5E%BAL?Rp^3OUo`otG{hBIwleK@0<R=;xh6
z;BbOw9jh?3I@o30*&$!*zf%bU6ep1P`-iV8KiCQD0}-`1buU!n_N<2#2L6sB`^&dD
z)k|{!d_|V+rIyvMfFTFX1tHK%M$&hi*#g9zCbK=wQTUE;xsPaC_;ICfLjk1+udC(r
zDXVuWeQF~?9PHRcV5ywhv&EIm7u(kH!XH+^dJy3^!qQAt(&g3Vh5Utq@K|tnvaTev
zApj8#gOJ-x$5BlfD$4yd*16guph-Yz0Z|MyvfXZ+GYcDeFHcVd+V4?N8`9r&Llm$C
zM69E33~Vj@^<Drz0It3|=9teUwz#?ZaKoLGijVd?ycfLBXSce6ikyL6!V~GNxb}1*
zriX?=*w&akwY0+ViZ_ak>F?dm6ZbJm*TsH|{IO_hw@I9MZYCPQj=H1E))tHLw#U{_
zx$IyHkXU(=ntJTKx$i(iO5gC@!=1G708c?0sjeXpV5V^iVa!!=!hn#NUhl&{;G)q8
ziuluzRb*~y<=p07MqSVNRr2lG#&aR;K^$@5f;Eoqy8bAEeJ75fRbD5R(t~Z3_sbJD
zExHdK#Qk9?eF#o2-t~6rJ?YXD|16YTi{{rW6f`8ON-9r<_??ok5`XPiLOF38bl;?Y
zsGp6+350|let?q(qleOwWPPrLZowiA)5F@9t}opG+`fhVT^IW*vGXN4w($azJ`ek)
zFwoVJ5g)3gy7_6n3BIig1msmEU5ix?*-4*mH|=w14235>>m~os<>;P`C&3+VkUA0Q
zfJmczLQz*Y^0monQ@+HzX_=Wg9s{QT1}Fs)3S{?&hK3kq95*!$Yiemxhu0^b@o4+9
z_x8G^X5bC$urL3gOI)`D2Yl>4V;n>Jt5+LwPIN6c+=CP?v0E&1Utf3n*OPzMX@A<_
zKg8OmkrXhJ+W*gu@>t~6e1YB{M0jLM`=$(r8e`wp*IQ-2$ax1Q4gw@cWCJwOaI&Xt
zEOOVsf5)@F$(mOd(AMOr{@ONZ@=~KFKq3)oIjjjGqlXwRJ|-rn$sL1mkcq#mCxg_U
zh^Js(-Zb5Wq553xCIv`zvxp3Aj?CKf0YvDZS0c~yd2X^iR`~m)6yfrKpe34w=IsJK
z(`_PtM$LCk{?mUf@yN*6{UjcxjU)o0cq(kTjxbUng`1w}=0eV;2)Y77rHAARssn^Q
z+)5jPquc#>$Go2d#to1KH-vo&02MaVAMSLUmn?}i2}FWdM1+~RF<>h`kho21%8_v|
z;X>GR0{)bc;3j$2^_L?_BWf1Ji*dfmNd$UCSTM|3cbW{w#m15m8ZC%{bAL&r?wtJM
z#no(Mrhi?0q|#Tn4NM~6k2}oT09ZmoRRKiH4r0;dB<Y18n;?TqOX$RtWS^`(9CI9P
z>xC2zsIWeRU)h6;utWr}9R!uPZP~JAzI$y&|Hw?sd4hQHE*R~;qb^vEa+min;uQBH
zIj2>=+5i4AtY8H=tvZLEA_ao};nf?ktefS%;`ShE@$DF+vNcMljBaQ*>%h50N=%f^
zzYcA&+ZW#it9J+L%vqKPfy_c0Gp2pICIza<Zf<U$UH_A+I9?fhO?5IED(9rq0A}8v
z85h|2+&S3IfIET(G-s1G!BAP}Ou!=AB2XbC5%>gZ9<d0ws;a68qXr44J76p&(v+vD
z#UM|=MJJd~BEdvXRc=x-z?9~N0*4e9GYLrC*bX&vBN9KdI22zmScA$#7Od#W2e)AC
zc=qn4MIlEXes&VM8@wuLAYJy74B#$4gXI!ao2r*C-O1~6p#zykO;4E*IFGUbmhHRv
ziUOWzsPpY1A{p{2^TgvI(m^bMCwxh5Z5b|~C>;RZHN<lz#G>K!Krv<qxoIOr9_ep|
z|1k-2a*^lK$af&TB)|il1^PQxac9gEQu$$*uwe*V&Wg*T2L*}{sFO+wC1;Q$A`bYT
zp4a7FZ*l@olXOdWlkxKW6jF*8vvY`|vvAfT>^~m<Iz%n6dUkW0nVgXk4-rNGUVt~5
z#B&0qOitV{q=}^VN22~oDn-(B$w3c??-mg};*KPU(Dul$cf17P#j@`L31JYCL8PX}
zVLj+r1qn3T4j2zX#Y{v~g2nSj03mn|9^4{SG`+FQ_4k=&gopVMf+iGaiA<9~M>2|L
zwfE^2vLSgRk|3M|V&!7;^VtBpmLq>4-m?#d=`yHS1tF@72Upd8e!71Th^lGp2q~cU
zaybA`#Dy;%hSd;EB&V+TukeBn?s~vLUS$SmPA(`jlpBW}^#f9L{J~B_beRYRz}i@t
zNN2FD!i?SGgH&TXak<d!B0?;p!@?Q0a$$%Yq8VTVA=pV2O#qq`c>tlGjPhI{0xpTj
zk1;73+9!UlA&@2`5qeQXhH<FVVG{z97yZp+PJNIRJAq0Q(pDlvgjzL%^w^jf61q}|
z#Fcnr2sjL8Mp`A1wh-n98tmVr*OvKqt3=V0h$5z7$?~;t&V6^kz6&Elc?8i&d8lzb
zIM^i}0{-Kmc_fV^K5f1LqffACk_@K4R|$vL6Q?9mCAurFhnfU2VFsj<&8tol?DJq(
z+J44=ixQBeDECEkA_2(ne{u;)dDRqbe;=TY<2W6{5ab37O=N?x&(4d(Np^jA5Aooi
zfI0}RbYjN<e&dyvE|y;ic@DZTk|!chkVbENxcf}``^N%^?deGGK7J&yEOm`m7Hd}U
zMvzJ^a(TG9$zWO`rb1+;cZ7<QtpF@j!<ry++)vuX!LfmaaPpX|Ya!?t|8-qMr)}R`
zdLrD@x!&nWM9WZ~B@}@#q_>$&2q|rZbTpAS0P!V?t0Ye#okT)@B+7(RmZ<O|N+g1L
zId%0%vx_i9S(j@!B76o*Xm4K0%6`;AUV?bH5c`(o35yw*m`J6U$$G%#Ln)FAFa4Df
zt^G$f^#+#uq^1Vky(<quy?FW8DKvO663AbWLi}sn8)l6sUns_<rp~VMl*TeX2^g-x
zO(WXp89(*!;de-s$X$q9MdwTvyh}*9bpfr2h{*mf873M7Fj~MQ`!o;BfrT5S79oHY
ztTxN-gq6fyvqG|Ls+62@hqyQkf?Dl!Bobm7oN-~(kL+ypR5~}ua*1{u<RPdp2EZ2e
zqJUfmL9Iz7W0rK!^ZYK*B|Lvl0b{?m*j|EVPaq>9smpvVyo98XuBTE=k0_90X^B|W
zFN%%(6S_i(9+3Q*wo71#ZftDy1f0wYGg<|(w0CZ4`&eBQ*MINsa6j`7?c>#LyLKsU
zL>Y=OC_n}D!VC6d<>B;iG`?O`-B1qZuc@z=nPemaphY50P9mz{%aHWDRb@o*Es}bG
z0@a{8O-8WHiHDy#RWs%U?2`k=m2TUyjpI)r?DXBBHWUe`40n2~dAJhCIxT&TyRDZ{
zIJ^Js+4`PJbzS08bKCXq0P{2kxP12$h~#~`C;aP8Du6D8Ai|{Tw@2C!1nh2N(>hN)
zz1_c+|GlG@)|`u^=ec;yh!=^hgYXZiEg%F%hC_s)jjyreP)$h_Kf^*o^A|h3H{I8>
z$j&P(&5nfuWGuJHC7M~76(6DeW{7(L>UlH1Jr9Hal!EFSzRkn8+DA8p2h%)4jerGA
z%D5OdF|%{=W7Q#CA=t`e6Ldm_Ox+%9N3l_x(k~q%%oH$UlC6<N8W32`?Ye-=2}45I
zX~ZXh_u!VuNI~p}H{y7<dh?=Whe+$~YZlsnjcQX^Oy{~CM%sylc(+jD<T}DFBkzm@
zdm!^1-lOcIY2=n^Z3vwL%zt)7b4Z$CjWUlHn+8ln82GUXzz5&Gs5whf=LU-h_xs0@
zk#`|G95aVVa1sgQI@$ObkhrZ!Dv(n$QzI638f8$~##ulssBKs{ECWNcx=d?p^t=u`
zbsOu`f6sg}on;>B9WKEa2us<-9JS-;>kf$fouQUXR8c4?_d$>CG7*&+&H)(OkIpk6
z;FLT+lmE?Ocz7(ftC^dp;3jT^cn}y1_n||1MZa584o9AyUvWq6Lfj=2@Xgtz>{?a9
z77>oa*C?={>3XAy@v56Cp<xJtds=Ne|9>tlIc;cL(Ho8ccf()jdKH4s4cfkvpFl&v
z|Gtuq%{P-&H_KnD@eVjkA%pepp0JsCk+W4^6<g>}x=ZsmYQsOvplaH-hxFY+aw~~@
zJ)kcLPt#MKxM`YjCZN>6uK)NBCbNm3|AvaWZ;&E9efo6v9kr^zrmijs7`v{tkpLIN
zroxhBX{V1xa{Lpb$BFahU8f3=^vMvHf3!zMOMvyC&%IwtqWR|^%*>95={^7b7-rSb
zivQ==ktXt{neBA`6=XnbAJ*Oiyao?B<`&)H4@PpZF{AdMbw^4n@+?S=ff@0F{|gs%
z@5<KOaizVsDAaLFN-jFE-u?SQ;lE!mB(ag9%6$v;Jv^vF1EwUlxQPA0X{5enBs>g)
zBzEWS-QTOX+sI)bC7urRuJC8HG$)!BfCDL@;OYK5yn6@S)7tW&3>gWBGk^B0<<y4*
z4LPS4{_|nUR*v(elOQ73bMB@5`6ZhgZCvG`kDdgn`ga5#l_)Dwkem^86Q;=!)|ddU
ziLR>nrT|<S2S5wNq-^M)B)ZNBpk<<Hj*}3l|K`+TMIaMF3cR>{#EoCy<os(x<XTuH
zmp5M^Sd0)4W+8SVFcMND4)XHAp^`EHFm}K>Tv3N43g-llMIsU4M-9jI9+bjrz-z4t
zwfc>y4-?KEaSMq=Xfov6^$`Ion5pB7ZY%!h14}#^xJc0XCV8T+NKsS_v<Kb}w7c{~
zhLusgI1)1_1gUC?$g}7Ia77(BAKK~n?%j(iioSo}3w>2(W@2%#Ui}Ic`tKfz6z6bX
zzfWG31e?5n$kC^al>LQ1q{at!8I@nbA-9W9a@}Am4e~k+#i(j-!U5oksB5fHo|65~
z)g@i>XY_=}BCTzu-N=t^f*Pp2m6cVc7?LtoeSOA~Zm#J98rPE#S`^;=C*6z-j;WxQ
z#ha200I(m+Y<PB%j)=~XwYW?Vd4U8tW)6YlU9|bcAQ!Ii+fV`TP3B(_TjYT_@0;6P
zCxPFBQkX<_{68C`;5MFC?fduTK8(@<$av!sm?Dq*3=xvm*}M{NM9zd?1d-Mh;yy!k
zDZt}YHpMFva@aafU#{TL<pUZAeX1uaZZXNpH0YlrL@<d6k>D=A9S4XGJ32NlZXF8A
zL;wSn<R)wMf38A1wnE68bNlu?9X2I|1_x0Z6D*ombDe3`nqTT(3x>cbJINORiif#E
z^|=~VCt@=ZINp47^y)2ihoR7eg3x!Uj#ffP;x2B+7>$iY`_)J3rK<=$V_v^zK_Fil
zCx7SM#f!8Ep8@L|p}B9&mFhnilbrAzcv>zRaA0r&5+1j<w!S&I_XQF;6bdTFoOpNd
zrh)@JMi>89M-V@XN5ljsu$M1i;zwzzw}EFv!6F`+FJ1));N4_)t6}g2oLl_C<CiZg
zC@55$3-CJXpg4Za#AJ6+H3Qdyt1R-WMCd|XX2ZXxNmZ-K+{M1{`*-hk+hWmsh;Jlz
zEVFJ(4{~A@_rMh8<m8Cg3H<vP%(<BiB~<_2xvHl`NN5@U=S!D?e8)fkkdtfa7ykeM
zx_*a}+D+5gGl9Pxa<BdS=Csv~tWY(GNwv87!#5cvKUz3k1Z4O6GnfykYivPc2U9E<
znpTuez*3PHhRC;EJ>?*{I9g*#oY5BP3rVZ}V(UE-W9KPK2T0}stwEAdW_ES@{_rDg
zW(0bMI6>K8zDh|Ef1niT&kr}UQwb2LLj3*s(^A438o5-tJCz{Wd@SDQ%<l7dK5bmP
zEjB&34%g}E7H7?@%}V<CGfeULsLrBCh1KBP5{Of4FH6QZ=wBLv_mV5g%9!<SZhcQ&
z;uusZj9~6iWcO{gkVk%rxPy^^nl8o*0}rU^ZCnKjiZhZl)L=GTB$_UXt#wH1D5`~i
zetrs>b+165{rUAfQR4G$XJ%$lp*iMu=D|*Ge*RQ>MoouZsi{<vtQVg!<6zB}S^V)L
zA&8w>01TlQDd5hX3ZNA0(^|hWyqR!eBXL_$F-Y;kk6G+vqKQdOT@TBf_2sJYw!0`#
z6=lma2QW*;`5b?81s&v@Q5uAsfpy$}wqauKkV|^n>Fq?!&4>GDrlBwpX3H`Eu}euZ
ze)JZ?aTFhCcKVbty|ihXn5!849Sd85cSq&~gvxY!!P4@7o=`O{VDCei8^DBrTymiH
zJCp1(qnENz`p>V|dH3prKart)l;s(kLfHk8|9Yf9-yKFh#djq?eV>~0<wB(sdZN+$
z>^LG`t!V$}d?mNSB%=1;zB=?A|M_6@d+!-l;86%XdmwkeXZ-{@4^P~IdmUp6zzw+q
z=4F7HUKZ^ov<-?^7cKvkC5w{Jho9aZoM>fyBpmmNJ;MKBC%|D;8vP=hpGrzI7fL=J
z<*O4VLL#EXH~2f(a2%hXoQ)@l@Dktmu7oklEpv%b4%e<bu_$@4LAPUMl8gx!0sRQE
zh&qQHQ4D}9Kr*0w^NfZD(egw%ygygb)+VlO12ka1$A@vUmM<FhvC}s@0UiQP!9H5)
zU;hV_ZK-<q>cv6lFBe8#Pwze-PLYMq{2V@(8(auAB5xpn!LudGf+%8?_Fv1r{^aZ9
zWX8a~S!b((bkB&kC4AL{uRtmOJ2b7!!P(`*;;~mqsD>!GPY<b*WburMLVSeB>H+~e
z!@qJ8PzkaV5(!!5p)O}~LXe0c7lgh86|63@@HbDo;QwD5R`@mRpCn?XxU<SKI?;D?
zKce=x{~X%ja;j%jK#6GkwsVh=5aZI0ZN$E|LSa=iZ3|$ZQouXs^!2;DE?Q_Ioy9j6
zne3iZ+urIFXjmA+JrSqH4t*fLodXbZDJxl>5h9{{#4d#VNtD~{Mc^LPsj1(SqTS1}
zeS+ohuowa*C&Vw2ZWG<#27npAKD3NT3MO_=@(lpUu%fk>zth}1D@$<%a1lR0zd^1x
z&J^XtS_kE^9U2;jx!OecH?qH2%&ZW|e#eyv{I%qfCcEx$3~L!5uts1=#QEgYC2^SP
zB+V|ZB<)Qy^<$7(M?r#!#p-CuDjgv@G&|n8-6@=aTb-AuYD8S7&Cqy)W>s<`N+ZQ&
zWH2Cas<;xW>qpe~kfj0M$wxi%9)>TVUT&K#P8dy90qlVKnbREM?W0v_dd&jotSiPv
z=FuMmYm#1n*p0tZOdE~^2hi$~F;-(_UuA_b`xe*ly-Wx$U|&qyUrsxY99UV-%{|+s
z3|$){Od^myqE5JB!v<~gQxA4pS-gNBh*=>V9Z}3iI|LHHiX8K1;{4AwH#fgTOF7|<
zy<~f)w-ApsAo&iHO76?pwkgzyiGWh#+7B8aH!!_thZk#K0hwC|aL&=`>FGrM9fQO<
zQw?;XQ1o2(mtcIg?~v4vO34|ar1Ph}C?k?}AZ11jm{g)+Oc<?4BtGk40>Lwzet!SU
zRX=<Kdq#Qd-s8txHj~IafhUoATD1N;z6toS@Omo5Pagx%n^VI`_97UOMdpRe=>>>~
zkW#pYxPT@NP0jMYsnS7|pvZTuDMnBS2bg<A?j~)dp&;sl1Y$&>FvzgUB4~5~nV_tB
zd+1H4HSthTEk&><Z)nK9&P(<j6JQYvtXd)SpY$d96sk8YcjRyKPpJpW!i68Zle!7@
z=deBi3CB<*#<k=!Gnz`O@@+i8HdVGX+qyTgbX|-_Y7)Bq0nzt1DRQ;yv7+~cGpB)w
zYU>ZSBku1_P0J`B%`C|{6G}-!GbPb#f~HZv{d>w~|2S#Y-?mTP2jdt$1Cp3k`iTdh
z*18_gV)koF#xBBbN8ahQR&;R4(d1&X^opp}mip!6II6Dz(-1)NpU*>bosUWZp%fQ0
z2H@PEV>7S(-@O3nfipmB5?m!QDMny}h)s~j5+$h5`F6Ym4bhC~Zom(Zpsq)>9wN=B
zmsj0UCyG$^96B5DD*#2Y#l@>Rp<#cv%>i_x!0Wr0L|cwHuHzxClfB8)H2B-RJu5+u
zv~!vbm+UaW6t8^vJ@?b%vn>&esymn(N3I+RwMs(-D_5l><2vF^Cs>Mt3;Xoy-9udg
z04W(D!A1sq2$n%&8pkU21v+d9zwA8w#S~}U7dDwlTae<$iBmr5w5}ok5NX)XUAwpe
zZlgO%J8c{JpABW|ExRMeF(&(E+mx?(bycqAnQ+CD#=V(GiRDxZ65K$s?gis5ron{6
zL@?qoKUIr5E`frhqh#y!A`v?h;U>`3b++Ft#F39J&v~pJLc<a+aB+6*4%!n*o@EB5
z7~k+FVqQ4E2Rmc25NNBVAQ1^$N>bFP#I|*3<pdu-c(84M#MwU=#y3*;-TdIxAxDPq
z+6%75njv2sRhkKuTA0n5k#Guzl@-HYva+L-teKI+uy_rFmSRTYkCM;d>7j1WCtMWL
zUp6$Rzi1jov&u<8aW3^1xkhM?IFs0(sb7+nc?lib9FjRRTmR$^lH`FTnk4Cwd4y1L
zd$f5kiM^NK(WmasiUoYNuVP{@uXf!iuC3(_-~b|{jSV;u_H?(VDq4DqcaL6(<7A<s
zAPN_vVfpu5w;N0RmFjEDi;YX@+YR<Qy^T%fhkh~MzG1L=)zX~ll<H;_%t)T?qphnZ
z!a)H6Lm#+DJiqIQ)Ko}v$F%0k>F(W_*Rdh9eS41J_CNQ;EBlu@>f2Z`b9hCyhEH<W
zpDqCODWVs^XJ1|VdhGyD|0<)VDC^D5u`sJmVaNVIw%$9O>$d+NC!$njC6z4{SxNTZ
z6-ucjm57E?DSH#LGD-=Vy;D>QDKj%kh$ylXl1(<>$JzC{e}DasqvN`-yQ_G6z0UJ_
zKA(?u(klI<Yr4%os@0bX2`ln+y3cT-xp?~QSq}5N43jsz!l?eSnaQTTYwmp3Jjj+A
zrJ;H)C8x#cAe)_5(EVTCRz~t5YYw*X>v-RO=Ql8`+BoN?l6kqQX>M%J4Jq9?|1wNg
zG|-OyJ0(q-L(RcMf-zI$t?xLVECc`^{~ve&It4Xes+gM9_=O)$`=D8L0%)WcFIv9f
z9x$_aWJ8?8SHrP`jc69%wSTny=#e9e5}2vI)p&d6a1Yb1mLY#UvftL*v>S8(|HhA?
zCuPJH2aR(6Cw@ba5`fi#YyE$a$M5nuLt*r*g8FE0GEO*<J{14<pCZbm_|(JYj}X*Z
zX2~e-(mvRA?JaN3t9ur}SFS3M*Y6BK$gWi=>;9W3>*A`1eBPRiLnsow=fQp)OlRtJ
zH8p)rV_5K7YR@U!IS>Edl1Y8&<jM9PVb8#KJI5=2&MX`R9CQ4hqC4#goJO{mKpNr<
zZ732bq0Bi<`jI|cu37p~4{hR#%Cm@Q9>hV9=X2$%OZS)awzggFn2;Ul?(|i%t`A}^
zwYhLottq^`n6Pr~B8N^g$T6$Qo1l5W$VL@(`?e*OpPp`{U{iB*>ABZ8f-01?sZxuI
zxY2Cl7{;PLEgARsgBxnjrs~^TN(WlG=Tc5vMw}Su44l&3)v<R=Xr{&C=@|Y^=Pq8D
z-C>`6Nhc+%UB`WJa>ocRn!24V^Po8Du8G^PhbM~ar}&3EV~43^hPb7+gtp%7spfZg
zInW+FzofCk$Yu7U=_;G{+WF5cGy1mv#oOl;zt^8Ta>RGSPWl1=J<8jvE5ja1{}@H%
zBRLJutbGi%#mjDcgdgpa5AT`bKa^16&Jo-B_#?B+;x9(+mYA(---Nq0M=AE@j!%lr
zBR>1;N&FZuOJluUY@F1uDR$3Ja&xJyL&Z(YU4eQn=!#d{@r+#GG&0*gB8tw908mxy
z4z%F+Pda85FE9EjN+iuO7V=Du2IO-`3aa1hOT3oDzEsdgW#%52YalG6yyU*TFCm~&
z+cRNm#x_;qV&3p{@%G^(EgNcM@<wWF!<YA}{uo_9G}hGc<mr}dwb#o<SDp^Dq+iZ_
z(Ke<+`OEU$j(udW=_$vxEw|RjSlGJ1N?(fo_~^;HKv~OF+32Fryz^VNlnsu2O;osH
z8=obA!^C|_=u2*rHRFtnh`YtZ3j(+9y?8sfsqK*%58Lq+sx<*SS@#IlNTbMxRO;|N
zFoe|L!@tuI3ViTlXQ36rG=4#P{)Z{*4UBb!JOdsoE1!W)y$BYEU{=6h-+hv?iVsQ_
z!P+I@y_CnI)9`hyd7Ni0Pu_3cx^<m?<QPD&)6mz2=AM_hoeUcm1GIX=zzzz8RO>Lt
zp7LDU0~T<j%$^T}9}&ICLYS10%~Gp~s!3&O$^;Y~H1Vf_s}q818N$%slyDUSSvEO8
z=~Vq;^BD6#+)`yg8^GEbwN}T+#f2!2<k0||@x}P+q(nr(FKru=6YbETjpuhR8Mb|f
zfD%j=WLy!=oLkV<+`a3+^>Q9HW(2>oZn;69x947S@_5Yh!6=Qv31}FsW*N0<v4Fr!
zOEmyH&?}#BOwnb+_DF_82}3CEltD{HSnXsH;`?~vxpWcwjZ6(?V$_X^L2U#iXS@#@
zTB5Qe5Fe@xbzGhiQ7D3MKhO2ZGM~WE=)53@!=HhK_tMmH5@HTY5>0rSsnrppc!@OK
z!AQG`ATAKuP*Dxw_BwRw*%O1|p0I@3Q5S+<=>@rrGKgYG?&rZQu_>D8$PprlBj2Kb
z5<;8PFf*fHVQL#3ot%@r_EQfh8XHm80Kmf6<JD)u2PA@ay+|mopWiT8Sn_a#r<O3c
zSUs>QhxYVX+I?`u`G{+Nl6NJ`p(h2Oc_RM`xt5NAWx_3x0^vq1SP_r%;#)$Eiw0OP
zWuXJ$AAmH%)j|L+iDzV2=!5%N&OluUQCAgOgAjlgfbfVa1bwfWu0BQ{DeI5XQP1r#
z!xBf<^Jx%VJWcXfq#xA7I8CI|X!4mGpJrqbehV0+D5zu9FFsMxb?6G!XYxg=<on}j
zQW`cpbOt)$f?z`P!d7r`bZ*ImP#G)Jr&hiW@G-&B1M>?vI)9XR@;<?g-^QzVn`+P{
zx=^o5>Pq+kw!##=n+Ux@!s$?R<8Kk62k5h>Pf5E#lqWbYG4c@B9ppLv?PTKQ!MTUg
zZ=gAID`6NR9sshT6RM4AvU6p5Mh^-&*8@a4ODIr)mrN?NyyOeOb-|A<gf0kp!oGWA
z>bRg2G|B&ACG_sOxCm|CqdI!*WU#_WsON&<r6kDp4~VXS_R9g#{6Xd@d7(*!N=-w<
z=Vk(7P<nzTEa4Tm`ywT_r4#~s?*HW2j8sHkOwh&Qg&wOa=6D};^%X?j*B%qV$W;or
z>s5$#&ZVUo8Qrt~NAyA#7C{6t$e~r~@K~@Lb}bZWVvW_Ll_#vvU?YY6EQEUk7=u@g
z?mY{rF>H{MBK3H&Z07FMU-94YZ~(Oca4~y_Y}*oGm7#zlSJ^H*tK7YY?uoo+j3Dv=
z>0q)15>B+A;J_?Vx$v2feH#)O$;ULl2`;W%&@@60^;elF;{klaO?jFWI5Dw5hg%-J
z6Vg6NL=~7lI6-0i0zJfuYI*kT83>CJWkBp;tu}{?AE6N*^_Wo{o;ieiaxDL5X7jJ}
z=c^<e(l>Ti5RERE;}36jU%K<t3r8)SwI5c{$~92FqQTxDufUGulGrwGvy1w)Mkb5b
zzc{68?zuEg*thzbTqt|+B(q@V72<JdSVy}G7L_Wr;DCOaur5G*K*Gm_J5}tAm9s#a
z5&0_05pW!Iw7>^me6@p#WEr#sjokR#ZSen*6$N6N48X?<oUW#vPPLs}>Spp(LoRW4
zi|EdsJ`+2BzXy_~{%9Pk4=OctbMvSrdT3rzEU@%Y085||4uoUn(6bfM+dgEOx14n3
z`o{?XHJ<(Rx(=Z5m&8uJ@e{F8QNF0=tZ3sk@a)i}f}NN<(Q0rqWp518feB;)(x{1%
zsEU)neg}UTk10O)EGZ?>;*)U+kU7WAB70n>3Bp{=JZ~fTzROKcj~lB-XZ~r8G0=`Z
z>|CVV6uf;P$W$4JO7n>;XMi~adf)&m-(dLm@~8gEZum+n@Mk3fq$g5tpiT70>tlil
zKpo4wZTrBlai=k8WP1tC13VbZ=N~w>8xzMeSmDz+Fu-W8eW(gVfb9L^rQs{&;G?ta
zomm*iHX*7~pbnQzPq*}rccH+G5|<RwBoHhEERHud4IS;CDcpt2?LKvIv2JCN3C=2C
zf)Ifs)kgjD&AtBC-09Z?n}5U$<TU4-<<az@zif?q#c#{^D{k+@T7Z!1Mja@9zyuPP
zz$G9dUiO_(^kz=sFFD7=T3L;|AFH#?eQ}%!PJBC%#}uQ=I}xR;ktgg1v({htbcshr
znkTlM`cxsYe9!v$x&i-Rx(>gCmb4PhX;(B=`;!Eyv|M_WBUbcU4*smVBuk&k#b?1$
z#c}@kq<})Gc0yU7rl(!S2gdPOkz$YX06F@e<&B}j&%Qogtrn}e;ar|hP@Zmi7O=M9
z$6wjzSo%0-(&X;R2_FdcU=|3>FnxAYeCmCsy|dKATJ;%wIGefM#?%2m^TCF9iDHYA
zbvb_+m(@+w0JFaU9`gvc$bof>K|_~hwg89l`NG!Hcu98emes6bwdXEQ8T-#B)Kf%@
zShW;-76a>IW*2|IVV+}tlyBe3(=!{fYOA8-NIA%BFCxb0loq_iX$B4e*(0>%MpNPj
z11dq>idxtJlJ;FCo6=rG%A^E+&}<pcGU#}Q#X%!a4ZNKb(P21QaD@prt7#Vz%;0RI
zqJr%HJ+`EjeU}=J@RyByepZyp!>?V5PUNY(CrS^a=R#X&{hKSDi3&N%BLCbaU>rM_
zMjS)A<;-zhB)I(E0rN=iZ>2Y;yc<iPU-_p(PEbDK%5npGE+<I)8ZcQ3wb9{(z$pn`
zc?p78H%^_zQ^|tK3uRz5`;Al}KeLfZF|1D_;^LKP3$ov}f>ZP(f)Z2*NV08GIyrHU
zpbo7@YCr`P2rt~P6yh&@dwGTiyM;OTq`HFwxKyHqC?~!E?C%q<UI&}bYrEH6l1;&w
zje4a91QE(1j$Gw0P-H-}PrEX@vZPAnRwY4dCHOnRfG%|G@_+ptB{B-^;s6MudT<S{
z8Wegd4w#dOoF34l*+rY8mE{GzZDMr16A_V=!>+Y71*DY#n8<**g*+4;u{!dI@wVdJ
z|9+?160U~(nIJweO|4fz(<>QWWOo#xBZq|(HHeVbta908kH6?e1(cP!m{b=f1@$%H
zK>XIgj}tp@=z0DaiWI-7sHo-2x;;!FVr)dl`xXi3iv9xoi=tc)zrGi)I=IwPz=`AK
z@{|WOOXSpwyU~1{`O%%njhaf;g;#rZNLK)DU$aHa?k%CX?17%#oee1HMcIL^!XL9P
zw=3XJ9CDr4AXqg5OZFgwhBl?)oA}(DwqZg#z~dx!rj&Mgtu;tqG?mma4>#{L=$^27
z(?$#@NGUjrJOlK3^&M?^NkmtQlU_ETrWsywKuEJ7Xb)mN0r|7NduVAHBp7~8_SfOM
z3gQzIxXb|^7@?2Bq~SX%o>$SLqzpti8J(et%bS_nV6?<0YN@)^8KEFju`qAwy!5ww
z6}3>5{yM>+kI$;DzHHgr)9No9a$+(%Q7+-zDTn<-jcon8(Z{!)TJf;;nBw{wDR{n*
zgKN5bpw3_A)cirYhzW^RO!GrDYBiL+=llnjZZuOwG3lx<O*7P9ADG_MBIlqO!LxLO
ztu5Z*Y#B;S{OG;?ODU>GmiG&m-S)=hCCpC*UDN&;yzB<RSIjJX4R97bfWX-Yp~1m$
z`M!sJ$p^5SU!iCDTs=K^sU1b)jfkd6Cp*ByeMcpxM(bVo{1`8dXT2)UAMg75`!?Ap
zV)`stk}t7W;Cd0*zCEp$vMmfsD#T5wEd1F`M1%m^fkp08F+}1B(G)|P+Konev+SkK
zIQ37&pJJYZ&~wN0x4*XFE_)&k-YjLQ^=zW$APQJFxv&!Nm*hMRpKm7KG$EnK4Q%r0
zIf>BDW44<fhk;ps0Jys`oHD2JR-qrb9P@4#TIF#dUgWZMLhJ&m3<EVl)C#GMO|8Q>
z|Fq_;gY~i$BB^JE7DtZwLug}(r5b1XP=QT9aw88<I7Wmd#FUV3AKijKDQ>N6#_b<z
z>PLcA_aa1>fKG@SvS4zrbcO+y@5S*lXm7`@@H#NJ72^>sF}IL6vFGX8;Q3zMOKl7(
zDs-^gi<fh;P;cKRLIp`c@Hlt~4}(VSzzByvo)nBPFOhFWMn8-c|1|Lq(47&}NdY$Y
zEQYdhs6BVt?@&3_<INAw9t#~G0Hws>VoSinw#v#%2$XtIG&IAxm0^Bhn<^62pM`Z3
zm|Q(}-%F-`*??pbqQyns0QlD?B7qJC$KotbY`FVZqhputdfM4>?sYewV+9g}PQ*(n
zcvEjo-W@GgBj+|NiBE~g)cMD`woQQv$|Cf5>lc>6AwmueuJeRUOHxr|@~)l9cJUTA
z)-qerIKq>~b#*J$w!|_9+kxxHA^~QuAaE_HF=yT&i%Q4RGFg@(u`rNwf}HPg!0(bk
z3k3aOL&9k$T?Ye<f@OmabcJtFDKafvB8Sdq9U}^1(%q2BOKj7=Z<DN;h0&9vji&;$
zzb-#sP~VJl?8p&zf_BeL8CeC}QPbX-8d7|xt?Qmg6}X0KKy+n(DfYMh1}<$Dg|GMJ
zZt0!+dEgSqe6*8ozzti<>*e#4kBe+Vbk3$s@+w@cFbT0#99N+l-WFI~w%STdMVm_N
z5{FBI*4yd1CQ8fUs-nEG`Ne@5))zyRgVa7W{PT-`bV5_q;_k~+)M8h+Eh&9SS{S0y
zwoKJGDZe@Y)_+>7i%&+icKgKkFKd)LYY*(>;f)mBVS8XhgCrxzF}6{=A9--=gxE^U
zVbeiILT>7M)5eu+a@6fy`5a5je$P|>bROW|Fk#{GL#E35_gl|4e{P8>MG<o^O&;D2
zGesYaOooJmB3y8vE`WcsXyHZ|YB4^)3oAM{_BJ>|YA@L((|zFrUyfsCuxY=?^uq6P
zN_KA{x3WtYdNevJGJr#GfVNC2&7ti}wazB(R^i>dSuyQlh=8~h0GO_ZhQ_zit00vD
z4WN|wU}H3>6hnfo3ttfaV}8>E=mo%p5Em{?S+;%pdV2LbMSCisD3X`wgeeQM$JD>>
zhw*c!G(5?XQW&~1A~t1x@v?lr#=_$q>y5>%5M2=a71^86k^}wUGbhicJ_1OA7i9;V
zTSG_ZmaJLya!NXLdhPfA{rSReK5IU4pe*g)z55cC){!Is>6~+_-vtWlqZ2QOUi!(>
z<F8FkP*$fLxv{xH7Q==Fh8`mN*4Zx%y(1Af;NZfD?fa2xkz&W+>P0YG1d?U{J(=1o
z3_~f7B`}OvKnQId=kxm&wu)nzvgcy6tw3pMj&~Gfsi4bXq7aaf7)`zL8*(?IIR%zM
z#B<V2O)_+9fuBz!0bS!!VyKqGi2p*?7spr`Sey>%WL4B3kou}SP&6MVv9#)>Wzv(!
zn>6iqz^}AntKGAYA#!!Q?8a^jCrooW_LvVevT%7NpA|h+Wqw^hvMJ+6GIYBN-c#$c
z_T-#)G%-3BETcimK5yr2Xv%m*IcrmSb;3)GB+QCZW$uP%u{I4^yA-}vMDOSK6wc?+
zmM>FYyGvp13F*R^fhiL<?T3*<Kl)EPY`op;x3GA7N{h3+u>k-R$~}&IQq%!ykm?2l
zSAN5qJM|H(fJh$nm@@hpyre-R+&z)z>7(c%s5>whBC|@%AyM|O>Qy&5hCu;CY?g@1
zd%$qtV<x9S2TiD7(1^O{VG~(RQPVH=)JJS#P1;a!k5&yb-FvF;$F++zbrS=x3w}&m
zG55c@th<Gp|MNa=ZU4XDzf!gr9o4L(<Sh%kGR&j>(!#QXJ@nR-Cr@<R{OID$q>}}I
z;PWk{%by<{^>lw*;R+UkSUVv@L$E|*=;M0FL&4`il&<ESDtFWtOlH+roe=pU-$0)Y
zApz0H6NM<N(Ec^~Hw{vmH_sOYc0RF>Z<_D*PgxPa{M5WhE=fx{dDjq&PZ1+;(aMQN
zJ_=Jvkb~hgALsFV#6i7C@om0<3%KV7bg%gzI&$RUFa9s1nQ}4ncCjaf@20X2neEy0
zN|<G7;QT>Ku36@px}&KIsUF4k)1mzC&pm_QIoYb6=#;fJFuBoN?_iiC{>`Fg?y#Q+
zPrSiW&W_|^RLJZb1%{?JOf42)JU4l!7v!b^*@q%aYHXXn?bgFhj#G9q_T@{@JB@zy
zA8<DcvU@ncv@t$|^5v#&SmV|85zni78y-hbIVqVfNqoryN(pW2Q;MI*mFy|S=gNT|
zH16`HeaEw7TAwP-|Mf1qEN;=LdTIOPbIQk0if5~8XH%cJUL5%DO|r-V?o+DDyS#Kd
zzb9F<yx2pz%%H`%KPW&@vk)?EGKp(*oFvj8p&F5i6rRCtJ0z0BZoI<fK-|0oJ3(0}
zlnp+zXTKW-9BlZ?5^J?(`K_v6(3tMV3Q&RY@u{C^%9hva|5;r$rkS#gLHIv?pP983
z><s@I4q@ie#4$u5(Q`)QtU&|Dgd>B7LDFfgOV(M#$e|oG!LG>8!#_{0tl(UF^7N_3
z&6~Nh#h%vI+c9qw*{kykyj4UJC%M@eUd*Yfsl-a-eRVa}K4<6Me!qAqsBdd)tA+#<
zV{6kNM|%d}WAKM^J#n%6RmK7K<Wo;Rq0r%O)zl4l^_-QCR~1)V`7{o$e`wRCwLKSe
z1>_bRe!tu}iXLPGq~fFl_W(kQ%~hMgps_+*yxBW()LiwuiTHuLCXdct9~AvM&v5BU
zeZ!`R$B}D?KXf341nLt^Uuxj(O8c&@j-RZLU`)FE%|3o`eo{D>d2rxSB-_+`zNNPe
zdl@t~&b%vXH0g}siS=|}*77O^5`Z?1Xxea6p!EO?%bqKsd`M0A6x~&i=?!M?9_1cM
zT)X)@Nt@gKao2>vXZP!mZ!m1wJV=pc84XbBV}EX*)PHc%Gj(XY$2#KO2GyC_cv2=6
zYVDk`5A{3V#x}~aZ&<RQVfRXu4qnVoeXYYI$KbdW+a||gw|=N5O@sli5^AcqZ{N25
z?a9~Wal7=CM)MDcf%&s~SE|{%868`B48_JTk7t}<)la3fv3)6EPXA<ivZ;y9dEU!*
zKx#_;seP>LvY95&=H~vKv?}Hn5%bv0+M{_bQ}6r00OKsXl_{_}R0kUFZj49j#cz6E
z3;m**nRCOm*Q}6$c0nMlvc4Y<Kw`v#z!^$suX{JIe`Iex|Lt)9e6>Qybn!lvJ#B4m
zGhR8FS#h1<z{JSqIB1YB^j!|qYsq^%V{0Oito2~pE(9T9i3|HY<4q17GCpzyq#D;@
zAt@IfrT9~(rjJ7}hlYpiqW#IWOO^ikDrB^Hg&6PAetLeR-_jbHrwTXi*3~hs*P^rX
zREgUYE_-4k!`*n5K(DTjS8xOru2k4eU}hr2>XU;h*`4=*=|WxvC>R%HU%W9KwwYFH
z^Lo<0k*@&!_n(-{uD*VBu)cHU-XGzjnVOoKZU=?3Wff2XSUkyiZz7L+CK~|w5>X&r
zAx{|50eivJD0|Vf894>RtOp+h46svZ&CP3mB}R0E2%D*Zl$C^IAUs6?#w?&jYfo%q
z7CD9KoYY^O*;MuOLRu#UVfe3jtr;iYv)~Fm74wuPtbH);W{*m`E1Tn#hQJ#rEC5Qo
ziFmflseVQ5dYZ@orv<qDd`0=Z;YlsG;sB3;oX3m;^M|KqZl*d+73(KoelBrz!1n66
z;>}mNS5pHzr>a7ma_!c-yG#ElyE5;Q`zx-_WVkMQ<usMwz<S{=Nj)ntDc+Mf1$22Q
zx^np7cD?+c(H#f>dkojN@iY@Deq<UDfW!DZ`}{AN4DG7eqDWH{6W=REb9BKR5|x--
zvfLMLnqRBlp4a8G?oL9&^F684T=5ZMJR{0WQLLqClXED%u`VpH-kvvz^ut_Cy8S$K
z_r8DEIxeE^0Hy*)d_BPw5a;p_Aj90#nk|Sq;XbZM$J$&`bv?z+bt7VUWpGzvT4Q|b
z>j2s{XHJze-d@EJ$Q$evCMR-vx5DkVCs&MZ9Ng+Sm9M*veoFk@rMXS}pq6pThG=)D
zjjX(D`9#;=DU9lv>6_8**fq3J`{p&T_~gSM*RLh!7k54dkancKkQlT8(2|7RH34Nr
z2a7F@At4ZM3(KD$B4FVdx)GgtjLKMq4b2Z|ZFQI_N&TTJb&jgGf+3y=GaeFI29^q~
zuR5S3T4EK0H4d?}br*aB%FY06{(;asG!Xv`k5E&V^<7H>;Y3h>5<3ZTi-j)bX1|k3
z`e6(!PDt?~vlvb@u+>oC_-XJ#|6DNJ6a4M94HH4DB{zRFTSMpT^e3x|%wNRn0hhWL
z>>@yd(caoO9@s1OzEE-7+!Kd+Pb2^4=i5x(9xAUcQaW%{<MFqH{OqB1Z*<kp%0ou<
zOwq8?(BjU|)jFjCj&gbpys^E0o(bZrLan#YZ$4oA>owi%)xgHrX}>fmjbwO*t;ioD
z7jyG%NGgJy%MZqgYH3xszkW_`J<HYhb&bJwsa@xJJ&YEQug})0?h;Yim+bwm<df(g
zKT4kGA+yx6Bxm+F62IhK!o=HtaZ#Z$ff3iM+RCz@Gn}bG_ky|13pV#Hc7J+a;krOk
zI*Tj;xAMhuU0uhoVc*mRWxFoOY*C+**x`GK;l@$kKJ!VJLPD6CW!}P}BDd4lE_j@|
z&gSUBqH^DMYcIWS-O<Kx$)VwnUV7B>#wqcc1+T{pXgDpL%*FT8`1LO&$jQ2{vRoEi
zj5gRF8yickvItNqf%_Yx%hQr5GQ8O61bm0%1Qm|F>D(3kkuG9G6JaB(o>-5;H~QQi
zBzZzo$awM2Iv_@=?z<L!_bwkFUsB7Y#sYn(X}0X_b+v-!iUmsNXccK8fBg7lb@g5J
zkxbLxdnYcdpE&W)D~sZkYxB`cn&8+7AnDw<DJF6zS^enQ$DOvdWGNtL<P+6z2L&Yb
z2Aq}*1R8#Z&=!bkgcdFse;rH4uj~vE3=&I1f?)8?NJ<HU0q|pKrZjW`{U%VdqI^|_
zYc*0W@uh_C@VIC^8nK$PE%w*oD*ziS_C+2w7WmPhZ&~1Lle(VYf8IL$oaamD=LF?o
z2m#*@O%o>%^nbwytS<du()Nix>&8DxKppT4l6Ld!82aY)R95Z%F*SSYZ%ki4>T*{j
z31(ysG%RpQe6gE))3l%zc*AhV9fpqH&S&ZA@4+;JXITdb-Ck~ep#>BE1<4R38Uy&F
zZ(_W$2ySDiCE%-wsA$;=Fmgge0#>lKv#V%IHz5e%`}Tu{-5lI(46EUxCp-*El$AdL
zS5q*MJw{~Co_sDKwIOAks`?=gI^tLOc}*S+9O{~QeOiSQCu6`t3yUy-Xd0QODLFap
z|0|XVE+i5)%wzBb>$teM#N=6shq@?U9>3CUzxlv{10fQI2})}VEy964t@-=WUM_?X
zOVCVaY-2MDu6cjr<5k|z_qu5AyF^OyJulgK;Pdabdh(~=ZPm@L>$%a&+Mbtx{b1Um
z8w{6HM_onfZ`cI<u2q_nS9`t<|Cmo?TCP>!rW|+wejp-;HN<1e0B1nZRsok{ARwaG
zp>IGYV`3~P=_yni)CGI#Zyq&Z4d+>FAb2=V#eEBg%-Oci<;oGp*%vGG5<sQSbQCye
z6!;xV*W%sibtwH$bkX3<!h;?`l{oZ<`vfR7_ZW{D^a;NYeYFb@6inp4?G#@F@fZO%
z2GGFjLPL22cWL90&1VV!xsQm|Tl3G+=XdHOPQ!kA(HVu2L`T7`v{}_d39=t4gj_(x
z6sPFO2$LV$S*&urI{%tJABz{d7*Pmgc(eX800QDB2ZY@6tn9FX{=J$$@`eFxCAJJj
zL`9Vily1#L%pFGD@o-Y4u^IsTkmNvu-mwHp#-4&B5hS;|umE@fP2f<3R6%!yZA4_Q
z+~JfeM*ruP?`d*&JB^(tJIi?IT}T&7Npcjy-dFkNK@duWugG~?!>{lp6aqqGx~aF7
z0sTt)Y{XH8f?zB>LD|4FSZm{Tr+#xLExYPxTgHtpE7zpe-d*<^n>4J`=UjK!AdN~@
zTCaW!Ux1OTt0L;nM#ca|c{L|(<85488Y-Q-GP?SLEbp=C>QNh-)0^_ry#9)etMG^M
z-xkVfX=;LUnt$_pjr7AP$Li^afrw!RycQG3b+qPpoS6Fiy+<E8%4zp*=%9W-!(sOa
zFcg4?`l0Fi3pw#|YKMk{xp)h_PyQ&5W;Xmt&A%M)wC_@4SLjtsb9WUU*qGWJ(@$FL
z;7i~?dsdJ|9Vk_Jh`6PH*52EPW!?pGsVbk3nhn^ye(a*Qo9RTD^n8jmU%PxnMX_t4
z1?gx2WtyaltMXWFOdg$MF>mLzPPwagN4jVqb%XF0<+%)h+v4DOr$cp%#@k=cUOoAo
zMoa9Wc3YXNZwy=JEK4Afd;)qrypx^SpV4zzg2c#hxcwjNJkeJJaqocPcC~fu){&C?
zkNz<-qNgJ8J0yMD@Lh?<{F<D@x}$xd;V?3GF=elgZ3neRe?XJdj;*ErlkdCj)$qLX
zUj}m1>ZC9ePTcBMSVj<u5<_y{Hb^Qz*#e}KLMSOoRf`V^ER3iKx>EkxC*fYi-H$@F
zyotNiAu&Nz8vR@IFy!c;K?Io-6b%t~?vzx*-A0NJOytMZPWWqhCnxi}ft7LU@}`%B
z5AlPQ^q(JZSxF=L^vJVgitkTvM4!DS^j7>0w_lut>;+fDr`AWd1%<)Xdm$$_euu@2
zn9$&{H07Ne*S!29E`3~g$J<}O&XIKR&0xH(q&ZvL+OiPQLcS`Ae`p)E%_yIoQqI{L
zq?On>`0X3jOAjYu`K1W)a{B%|9hzr<4Z>(Zas@15%O{yfiHFjOT^7v<E6KwE!MZst
z`mKQil%QxRIHG3+7M-1aZ8MBTXfJ*no!Px*lDGM5;OEq%4Hnu<H-$MSH`YcpoJ@Rp
zxRO;O>J+@a#MG8_cqfU4!5#-J9~l2VGcyzYj}wR;1^HoHC7`lSfHv_-KtYoall=1K
zMr15nFDO{+H*~Nw_}^DCxW;fQGI%A!*ui{aF+g?E`=nSQx=#{Og$V)-e~55;!RG0G
zHoFHYY%=&#c<Gy08a#Bj@2>DHr@@0$*6r;c{epg}3`BoW@ygODw!s?ORZb94lS=_W
zne1K)+*Z}(A;RVZP2YZ@vvOY)y5gy_RRTF|c_!xEA{S2MKUMXf>Fx2hRT~}8=5-0Q
z{#eE188^;f%hZt2eBp}K>a3)M_45TZezSbB1x;MVr=G9Zbmi~#%)Xgw<!NEl*YDp+
zlix~XTRdW58n$WMw-fYhBt#u!sJ}Ei5BQ4GxHH~S8SwSI-o?(7HGYSYW1P-#RjlbV
z>$*w0;EuoT?18)2BEP~I2N9|s|E^u3Yqub0iV4!?&wei+KNcboF~PveQJZlrYZb^F
zF@=6`ilt!l?jRv9f8<^8*Xv4<VGv%*R%QzUc8FL$sd13@;6u#i*5BDNh7+Irk)tR5
zq24NkOqB#DK>mzsj}xG1?$_2!-(RiX5=w`+RDm@@WDu|jq~P!;#2Ky>@I?{cOkB(e
zas><%j(~aw&0Fi=qj(?0T$tEP6$jq~U|tlq^>+>Y!I*0ClWgvdXxj|ZScZp(i7Omj
zDQRiR%t53gD1-#XCFD1g(ioM{{`>X4H1W}!#;I#{izg?AtYx}4h&pu9bhcBH4yDuH
zI(+D+9&eH(QabHn7VL$gyb8**2VdGu1`O|Xg`SofEx5jp%4C1_eO)z%RXh0nu7kwa
z3Ekq4A7<u>sZ{6#%JwVK!{Q&)M0Jg_rPbTJ1;Sb*q@^e5Qah}?#v-OtUshX5lYl1l
z3_w@e%$Ik`M(^v^RI1s!Ja=`{;4&6WO`qlQBbG`Pu7cFH;rA)C*;l2wmBuR9w6gYH
zlgIf)AFAUEZg@?IRgdj++goN?Uo;k!8uS==c4!_u`dOpnTa~IVLk`=R60;q*^PP_w
z%)KSLEH`7QHHw4U_%sc(pX`?Y1@uH+C@SmI2TWCHQ?>R4PWIlp#yi92)SGcXd~NZF
z7lkQYR9kdHhq~yLOU!7#qVTJx@1M7BaQ0wR9YtmxI)<!3H}1}n=#l3mRLqLz<&)cg
z@ubiF9OOH)yHHX4Z@_1&?zw3LD=(^nX5Mcm8x_T!%h;)h)5=AAmb6nE)F6EivB$du
zJWd=fP=g;Ysa~9$9LGc7t$7GZS!BWCoJG+iIVV7ZS*%i~Wrn=f;+(&pl+}Vj2`TAW
z2%X-4Uy>RNi@jd+QXHQOjh~L`1tW9?Kf`P8R#XpEdJ6j+WnaB<*KX(6uw$Fp!bOFI
zZdngC<cS<oMiU0+=Ul;V_jw0EY`i!c(+(v8%YWoHu@;?11iB;={@k9aeHNr(8L~Tq
zh=B$9h=E%CB-a)-lqm5*i<0O(fKlW($%^n_$I*iyXLfP2F3DKL9^#cVXoCTPl>lY)
zg-4sTQc$wOG4q1R7AuWp93%8-A@l{^M+>0pKA$VPO(Jwd>I2c|k-iht4u(&EjU>Xk
zjuS-+9RF2trITb7sImWMe3SpYqRPfVeh36vmSY=BT7B5~138`GHcu<Dfu#jp0XwXz
z!Om@r0-z~K;m5boNZQEAtrS>UsYBK$@m8P{zdHD3J4wPMBn3JS3+Fy|(|aRzp3A!t
zy%C14nu>}TgRN|AdfyE_F9RJC22OalyDdmYk^=&D3u*w~(>MtFvdg$`eJ=-uI5yVz
ziy@376dwfnWaCC{&5sIBn@#Xy8X3IZA9U`J+vgX2vGmfn8jC7Nt=;s^zigd7%}a%u
zS`$(~bQ)2aGe|@8T%LSHbX+j4UPWCl>$dNLNUrpibq&$^zybPCB11BfNJJ7~0tlye
zkyMao+2I!q{;NpEA+}>e|KjBl5QxM!A^8T}QWuUE7+NY}=ds6`{wstWc%+t+a0qms
zAoxgHU_-IzilMq+_R%L_F907RbR;@SUvu#)VzdRtD%Ei${qQ9_rKPDx?0GB0H>=le
zdMRy*)4NV-ay<cluzb289nwac4Z)y51!{`N^pea&lpi7l!zzc6^HZEW>(0BxBlxY@
zQ+1?x#RG;767hH;XA7950JF?W#Q6Cm@J3T83J$)m$vRbCm|##9Np9;|j6Zj(t)4u4
zR)#zZV-u6I5#yA7AI|^id9AvQaW|zG=oHCBWG7Kl*RSt}@RTG;iyeEJSk+e%6fjrU
zAg!jSGoa7=G@9n;!UxF$8o}7fIMv|u<_$jYW=D&z(CHn0FHz`k`%aqo(@u&zrH*={
zSFO`-r+YoUdr8O^W%ql@uZ(aB68akOOvwcWi}Y0o@KGUU0zwBl+D29x&7_TBjwJDo
zO>aNJDn{Q=q7X<OC-%AC@x;u`4aA^FGBt>w1T;elP$)?BgHQ#|z^z4$T*u!&!R;<D
zMK(Uze+zcrjN`BA5XDT=nQ+qc#yB>)LE~XFj(`P{A42#m%x}05$s0KOV!!9IFu`ZB
zQ{aD(={7vbiRMXP3@w@J(x4GZuR(ALQ@mjSsRmf<WY0cFcquSWlKlc{1XPdE<s%%X
z64tC^=@~$~$q4y1WO~kxp!#S>8-TT<7(}hLD~xB7(%St?Pg&I3KG_px9z!R&Jz@4~
zZm#L+z+_K>h%iMPNMVsq;-Y+Oq_X|%?gBnz)GFYLI7-OG@ut%w7@PO6;@SkP5mXic
zhi9!zU3~gyLN|_Fh%1bD9nV*O&^glD<$d&<bCvMjusn0Oicx*}u@7>5vMztKjnbW2
zCNIf!ZI~$VPDtj<?<srE+bZ|YHJoRk427xlUbMi0Oj7|YH!MJ(Y5lqz{drNP+qWpY
z=IbQy5NOwa+jy)Pxs$M}Mz<e4E4ew*^jVVWvlmWUs!VQH{O%(REzdnzi?^<)%d%^e
z-KOB6cSxSiX1BwefC<;sXo2U79)c+^W{c?4XVUCLZ<!c*QLY>vnh*H#uvlX8j+@i=
zBw@F5U$ab|BS-Fb6~ITYEyAV&{u=5w0s9c5jmT}gtcl*5z%UrtiNMk6cRJ<YNe&B<
znx<yijI`L)-)(}&g=4c)D8tOdXilOkf{`R+X6AByQeykV>BD>Te%Pe83eSU7UuUKM
zP8ZBNa-Z(j9v5WsHhorsng|9LCbnB7k_?AOB@tzk@<Bv?c6OPGs&5zbuicsAdNj?<
z;9vjkTOdRlJ$*5!Zdtsj!i>EJ>v3f^e5HhldC{2owogOX9RkWz32-M#aT*KvT=eD4
zpiYYGo^}Dovp&Zx6d(Xrb0S~_#7`=bT!w9O_FjWDKn#S963WT!4&7fD{#Oks5J`R@
z$!H|20hA-Hx20fJ{Yo2x%uq)lTc9W1EO9y#DYefUs|Wk(;D^N*kksU<enA>1sJgyg
zCxL6+E(3hzZ`9V-67d$1#%Gx2Fci<#ULg-zMpjmO&cALfbv;Y+Rk>}IKu^ZM?30kP
ziE0_r22Y~0JbRX$J;d7uZ5eRB?>5A33S}9_eT0)rbVcZLpJA-4X|Q=7ZT|fPdTd~&
z9Dl4Ax%7U@pu&0sHU-o(<)R$@F)U;Xc@SNAm?X}d29X?fDhun)GB*K^A<i0uGz}b5
z;uZyg2&WafYk*IOZ0T2rz6xH6oXkKi!JmUMm>^vwS(*GGp3vWI4AO!mk6Pz7hyju$
z^``wMao;abQB=D*if31QtSoe%1wusd(;t`L@XoH86~9y7ca3D+5M?|OqM=V-=Q&q%
zigqDue>3xsYdAMkDU@|1{57?;706Ay|GD$a7m_DR0>;USg;halG6?6`(E7>|)e!Cy
z1J>c)7(3RpTaa-0K<2s;E>lU;JJ6(KT7Pxz2y(KH<y>IDCwAP2OkFVHqbV3Im}DgC
zY-0W;v67I%DU9D{t7~Y#N`<&pq*mL*ZQFypBg#qA6Rs??ke_xD=nWnpI`HtQ<?z&F
zyL2(ntfC3`G5$Lro9V@0(3&2dHUI4KmHsH*yflH0<`a*q(eF1gVI`Ib^xl}xi0l+c
z*P{AfTZKRT5o@Mg3cL3DYrHq80DMTgGQ2H6Y#OYJ2Liq5hJYfVkMqHA;YtBg1+w3~
z>4oNwgshQ|uP5jOSrRuQL}1JUk^(|n4d)k8yCJ_p+6?U=?!4N7DP{kCVNFLgiEw_G
zg>>w;`CJav&mKlhJ)Qd&0BuNEE2QR7^}!7Sy&nlTQR6Kc8HSEt3&q~-#EI|z14Qu!
z`7dhmI2IKN;j}ugfIS6P0i6Z{lyrd!fK}K&Wd4^<@BVN)b&_IAJ4Puo^8#FiMGPsx
zFJGA*va>#Su5|e3j6wJ4dA0+N#Rs(vpg_l9Lz8&+Bngy<gYVXUAx(trsLXcJeCXri
z_zHAi?Bu(t+vZlYCZ$YgPP4}RO|s=Fifg#raY|;(hG$>*e>Z7=-zTS+tS-pHtR*+e
zDO14glSr`+^Vomq5Cafr^7UVT|0JGG$vQZ{&{sOsr+hV9h>E)N#%k%~0jvq)op-^a
zhR}6%t(bgb)z;tF5NHsm=e@uA>RtxUbFN>E9C@Rv>zkVVpm)Yj>50Ag{EQA)Pr!4p
zy4>FGYsq6yakHN^yYg*2M6YsQ<(Xm>uX1)qoFoy6ummN(ealHi0Z38+ADX|_@5&?T
zd6Fy(hX=`8!ZC!!O2{kN*Mt3v&AUQBD|>fD$NYYNiqcqZH4g9@jW9H+%YQxxb$FSJ
z>q41}@_p>>1DvZw^@64CQdktFi2(L{g%s0g5iq|1poSIn3;>8H>FI;1DS`11Ec+K=
z&5?quV_@2axTT3Z03F5go4!@O2<@e&A_2qb_A&co4PuH%5kjefZHPEwVL>>S+m5v|
zhT5f`X-Z)4HU1iZ=I~o2?*Muo9m&n)el?N?g@_pft#{ksznfQET}`SJ_7m-^9ZbL}
z@TE}_ybLLt&{Tk{{eCihs-SK7+{`Be-T-~T!OY<A(-847Dioql=?Ci(&*Ou{Kgd#n
zr65{4w8g@81BZl#)l}HIvD8n~q?=1SbOsBZ2>(dZ0Xh`a7Sg(7q9#K!T1axxfT6<;
zSrOWO;ebnp4de1G7h70oi>?KtFNBNZ!DoZ*S!l?qN-VQZ19ier2sl(5RU55m?MJ=O
z_oZl1ATiiLvoGZ_lix|cDd}KHYk_i29tDaEEP^y>z_)DODg~#5&dT4vH_^xkjdzgl
z{}eLU;8xkJ;wDFKe?V?Ld;oY|@B6N0WmI7vx5Q-upXkOA(~L$8-5f7$F?d@na#YT|
z?gPL<D<I>gM`r~xl*nG;2_Wg)DOqxpW2hid)LHeN5#E2lMEV`kwd3gkMec?Nr-}?`
ze44kYs7ND5@_~Usm{JPOE@>72e)>!j+1udK2wDwEWS)KZs-_Ej2u_H4$m~+cO`_T%
zYb2N|z=`H&Q+R$zBo2Q6zN)83xb^l`=>7oWlH6E4Ba*&~r%=N<pRTsEZG}?OO<6yZ
zxW97bRnJ0KKhuA}jLN_sf>OZKgwWMHYF$O<G#W-s>!syv&YepxER6r^K&QZeto`!d
zg|OtvL4NDoe=eLqPxJ+{mw$Y?Fv5iUJ(G;rpv0r%-KkL==|)u|THf#5Z=&lnPCrb7
z2xt$MJMbQIxLs|ff$EIcLT(j6Py2xL2XQ^&Br;t&zW(qnFLA5KE~O4*CeCSiOkE@`
z6?7Hhq@dWQp&~;o#v#OONUFzDcxsuyiWRZgr+st=8-xy*26*Z@jVni28DYVIw8Gtw
zKMW&yRRxVtS*<^bhM3&l=h45<7;_5wfid0Scq{{y#PIt2qQofG9G{nwYkxdf%+?!?
ztakv#U$_9MgQaP9<f-ay?%tOAz8+o1jjfMnU+p=}^fock>n=SMaHp9ms`R|{UiTjf
zM6B7ZH~cM&=4O68C)eP@Xk-7(@gak>S5b#*!&CC-C?flVD@re%ZRCWDnPlGLRj-xS
zm<{aXVE{<G&JM2WaCoXo@F&3cKQo?-y*wnHe|Ph3?+aElkNxx<$=~cQU5zvW=&fEx
z-fgcf2CR2u%$YY}p^qwan*0@#^7H<*S5+@@K4G#-R%l)53h?b1Ig6H`I15ERwQ%GT
z@C>U6mkH|ewn1QwwU?cni;QT{XYkfxK+AfS)FW+P$mLtst#<aTK>nL_#Y7g#PRDp{
zer7yG(kS?hu6*qaMwuk;et_9NifWkDi8(08Z1p72Vse`U^ft)#hl&=SuX~&u=+|%7
zvfB0be_8-9%;Pg#m?HHt-{aE~dP=0<TKWNPU$tI8Uc(;`#ZseZUvb67CKMyAGooC_
z*W9?C?jYFv|5N<pfnjVU-F2C7u_y(F01%rHNf5|14aSs&>G~9lr-CS~J&*v6V{m?z
z--OFFsq@>#6H_c+*z|9yzbb^{44+#{TH0zjozBjFu;Tizi%|(G2C<iF>4z<`9c`~%
z3FsV+5!0@w;Y`slI*&!tHViNFS?jw`vX3Kwe)lWr315yMlz+s-THZS_yxh>y5eAv&
znG!GRRfQpyNe#BdTxR;$8P5x)SPhOKY-&k(`|qjaJ}E}nxY6vwBSs{K6r)G~s#_-#
zu$+)Nbrn}%n&;V5!`Q|g9lLw;dK5{V+?Bt-K10;v$B{>~qbti-0E3dGO>D$Z52)OZ
zdIWytC<YK9=lWL+iTtaeB0Cb6C=Ztnx)^c+mCt)#(YcDrbv$ddT(iNFfk#46pZ)Kq
z9jI`T)zN}y{magvPC{SNd<uc>O(pBHqQa(*N0!lV*y4BZ-X@$KWWYg%Q<vLzP6o;W
z{Mx+d0-J*ZSCoLn(R)4nMsAN9ETnfWOf1X{FBsS)P0^-)ym+vLdi)X|8sdi6;x5&{
z$wjAQAF2LF1I{P51c+V<;V3q$WWPp7b=t@0aN(r~8NC`Mm?AfEUXp%r>=ykMD$1vD
zKo}&b7@eNgXCD~adY}u+>kU)9KUT8RumJ1<>VXnGjN|P;0rMj@`K|$+Puukx>@HqB
zOW#IvDRGeYU<NB87fImAr_wm%A7acF5s6I#LV_eaums^);@z|7i1Eykkh9rq_AIX`
z#50ls3u*{v(7937w7`W4S&hM#sO9%v54*>2;KQfgs>{#(%Zo2f>*LEEbm=a6%B!!I
z$KHQaKE<~jS<{D7@56iTK}T=RpF5J*<X2f`+bT#|xGgLy;h__b`$yhYw3<+PIsx+?
zhi60M+1FR{ym9Huu`~%3aSj)S%fC%7EP3$s*7tcG^J0r(i0FCnJTZT+acJ6SA8Wpg
z#5TqhM2O*}f(``p#UH~!+tC{VJj0>1GW}M@^3dfY!T^f^j?|CM=d#Bgbsvq4b?+X2
zcTKi_#&U73-h=J)JJ}d2XHIt8%TMlzs=l9<rs$TxRaeGBIQCBW1bSi}INvCW)<nxJ
zv!$V_sl0UvDySxfs&?;Vb3?zmq;2&=y$$$o*Hkws>DJ^0(kVX>_Sc|}H>3xVjI1>H
zQ1*Svdf!X!)_*5*e)*dBSWs~A*}l&>!T|H&Yr>Y>cHVr^SwJP~U0I(2=PoDtrxcqL
zGCxwSq~|SmivGT4|5GCJ?u8h7k4Hg&6;~vt-`A}9DYg}EKhV`dNGHB+ZF%yY!&<^l
z!CM5>>X@KX?L{0(fOBXU=&t1bUK<aLg~TEwN$F*zdi(H|!wfrGoziUChFwcd3SRs*
z-8A~teFta6m6dDi7cNabSharkhHhmYO4#dlF9C}{Tx#JuyhjOfOr&+gl}#vzZO<M)
zeE412(@oV)a$R+P&-|R__rte^(>|MXkLKpe5<M7fb!Y92cW`5Dhf4c2F1{m4!lm7k
z#Gb3qReAXh(e@D>`rN1c-p~giSV+6Ve;K|vP!40krxrC%8zURc77Myg7y>Xv=iyXF
z=35?Gi(?0tK(ZJ*B<&Y^(^l(r=aoGgvdU(kV+lXUf6m;Dsi}zD0jv=m0f{mZvDZ4E
zV<y7JpuXwx=G5klXr+B}8+oSB)#d+KI{B&7(t;1gmRxrnyu1SalN8Wf0E;LMIF)ob
zVm0p?RE&IV!ze218wblcrgg|D_@$&GYEe3rNX$!dXWg%$j|z>}12Cm)LTW>PN=#bo
zD%F%o@ErCxK?m>z$*oQJtoJ<xv?Z`wFi<1p1W`8B1NQc@`-Mzp_pl7E*%*rtLzI&+
zNaHq)Gw>9{Kca<JTDWy$$@qM_UTRsl#)Po^WvcVb-TQJ^`>JE`PCixafxQ5|55Y{B
zaa-W8&!;655#In<%xBV{=~8l$OLC+B@NS2MRh{kZWu7aV3WfO!zD|LUbti11XQ9Su
zyn^lr1VR0vL*)0~qXsHJy{BG4H6cyUc9)f&DxmDWLCW5}d(WaEh~)_*SQGOm+lRL?
z&r&Q#uDjPOPqkBjp?4p<!YCLRyn^$Ob>qNFl5wDR9BM6)TKvk+$w|u040U&$pE{4(
zo0NX2&QS}9&Jz*57lT`AywLNW#gqK@%6c7;_FFjas67@H0XqOEMUSIseCW_BXiTt5
zdy%oJgL*5oC6vmuH0F1Z4S;b>CbsD6@E#ClMO07FdqMRSr*MM-dF!}^oFp**-<?!A
z>VX6ihg*yJ^$ny8G>{mab^&EZ2Sx)G2K}$!_e-#Z@Z;9XEXPp`9ST+eADF$9+`61x
zi`$))GJW6kvTlQ;6nh&*2N)NTiQ%zXV#wIfDf6)Q&o$Y1dC{$uT(7V|eHKOlVD-(-
zbodc~%i__P7dRn@hy9Q8{7|+3eJX{*s=Bwc;V-Ix-^^dFr0u3Xob%>Q3SK0}^jWOX
zEL=X0ex>pvT0XpqGEfc(t;Y@6=aHCi=rC+}jIFnFl6QrxJ}G+QB4BBVqFy@US>VH=
zswE!YiHicM{$s<5sT}VdxspWC0WUO+Q^5w?JJOc*s3J@CFD&T<Yw@{6mTF$a)uc(-
zh4`NM!=%Qa4(C1d`dQIjfATnWol!gW6|bLz^<VoFRCMX=mcLAobi9D+38)LXYlTEF
z;<XQ(D4lZn8g`#PEkrG~i%9D($wI;SfU|aXCM8F-GXGiUTz~oEuAFmooz6BpIvK6m
zkany_X_V64?im(1-lIC%AKqkB#05eU;hrS_T=rq6NxqvWl|r{@&fl~FTDO=T?{&E*
z7>?o0hcE%mB658KAwNP7q3`y=xQ!j#8pKVm#jNeYah07>)tfym6Zf^HjISHr+|52+
z^WAeGoWI>W?2=p&Q_)Ee729oxBSTaLT$8D8R=l|OHm)H+WoxL;ErDXgpk}aopT;zY
zCo4s-zTG`O&rFlk+5T2<>UnxPq!`<A28piwX)kyG)B86SFIc5J>K*Z@u>>(jt4nUi
zhTehU0X8M_I*L0bobFYrZVAgSHp#~ygXjm`l^61-kztEAdG+K+n(#-n=3RA+9_9`m
z6Vg5aHz04c$F+ZPO3o};t&lQ7bSP23vXcB8_tAvoEbjr7h|MK;{nvU}-*9ul>P6wG
zYC@@CQ(>;gdqH~otJe`cOiRxi2Vs~$mg;fzqVN12vzbGqH*z>wL-j^@UEqsDIZHtR
zDfB5QCvuJ>+wjvpbo40H9~X^ZO=bW0&vA6{lJA_AxUtLeWhINcH*_s7H?k|2FL0=e
z|MsnHLf;6U2aO=vUD(GYksRPD0p5Vq$<`57Je~<o7|6PaO^*-utd~Vg9;&dH6Kxmj
zD6#&$d}$onSuqr7Kee#r<1W_fLZ|Sm8~$tj!Xl4i-jVWt_$C6FQjm&DF!`9UN$Et0
zE%6KB1aMN!5xA9M_4Sg`Q&U#}rhqO(JZ#<iPq5tYYE;)#HGl*UC>fF9rC@{L<N=d~
z8XR3N%o4<_sJIn?JPH;7FHN9j=yvgBnb*^OS9Zob0GwKd0guxNlPJk%#m2?UApi-I
zs3GzoXbtu~9uss;-~}I_5E2$9&lanR)GZ)ZyDy-_BEfN_JIAzwZ{qVl%vBH!sbajR
zn(o_lG;vOa9ZwXp?q8{Rb<t5F{JsQuze}<wD1{*jlL%SLZ$~FhP=Rc-U4&&puokX1
zXCl@izzHZ;<gp{30TM$Ep(v8`1puQ2^C@pk4Y}T#aFS5HL#360^9_>O3a7}~!2UgG
z3BmFq2RCv&=zbmggT@<Ave~J;!$g;H-7~!OWaUR~2o4TD-nC{r4a33J9<2<xrIC0I
zO^NNLOIj!~53ft%>S;-07DhEq!hnLr1gu$dxm=l#7`cZvjc#YUcCS2IeEig~h=Ve?
z4O|GCiVZ-i0sCMVow1%Ze58V`YDPHiCMK?t5|5XKT1r}#-f8>1l#JcQM;j9=YiJi+
z`>NQ3v<Pkk){2^{u@376r$a@vaD%47udW&K$h3F8)3%G^9jqtCITS}9#>SFmO?JuM
zyZDG{QJ2*fEM+*oJabV8q0;*EoM*Wp^+rp(LbQTW3bp*w3=$_1983$wxB|+)2kTS5
zUO-z0e4|FXn2V8I0*VI)46*EwaL;RXvBs&xlzXu)DtT@LHB^JXDzkg(S)|Kz-djs^
zvIgqw^$U!|K75Fn2s3Z>pN}72MrK(V5NL8=3_Z#N7T|l_gYFAK$3}8hgo8qS$p?PZ
z+iQ7N{eF1#H(uu&v|k&x%Bo=i?w=&NbR>XAaihLuvI?*m$N~vm0T~scF-l-14#jB*
zCmfj!d&+~zCMS{rK&LSqB`gZ=b8@8@5>A}lKtq5WKwPg~X7FB-Z%T~b;4n5a_)8+x
z<5K%Ob`p=UwG)l$>WCYuYsS+!ecmUQ=tqK8#3>H8^(>wPWI>?j0vc>d1b50o<nu#Y
z;Dus9b$ZpVuN_PXya6&s@LN=Da!9`@m|#foTy~}^A(xZD^$XUB5M+4jR>HmpM@zP9
z&N=t?M^$cFPj}qe_w>uMV>f?&;)MQl9segF_^T4j#_}K;ZvG7F)qXgqMI*_$3;hDr
z*`#rTg#nFII25Dg%7)pmFFgD+GQ_;8TE5EJ#-;xF!qfhsbgKYnK%(2l!p+JdKtH^9
zQUdhaw+LYj+y=_F3S5yz$Oec#dQrzJh-P0uN(gQnc9Lz0hjRSq+J?wzA+v{UIbYl5
zQoUM3wD^hS3gWYI+^W}w77jppfKCdzCLVn=K6`1x_A{Y>*YBXG<7v>W(_b6c^Mp6O
zb1rL4?#}gaTIU?IT0aBsCo)_O-mjGO#!?Smx8$aHI0fvSPH7o>Fn)XF$i`F6`$w5Q
z+!U3JWgg$BD;PfxO^B_5k7RdQc=+fe%2A_nzIR_-%`)$~-;3obDrYhB1$>j+KNexk
zHm&Qrqn4?gZ{$&>`qDvp&4f`vJko2elmhRa(O4RH5adqHk1X#^yza{*QM=($#cGF)
z9Z;1PQKt9R>BVOOnZ|lNeOwSge+IY0duXygNw3+_8g5%t$J0$|uoZ0CZmo2CocG<t
zaocWtUHu1osVPN!wf$Gc*A3XJE%3-I&mLlo&eBwy6x-w2#L@6fQ8=)3G~A`w$1L-l
z(mqa2B_^8F{etne4xhnyv*Vl|cK!K79!nVY4}`*Z#fDeOvr^0w&}sh?<gocQ;GhsE
zcXbuo8920{q*H^8l}r)R2U%HIXdU`yR(XU&zbc0tg)u{sDdk1cJOjiZ0T3vtuFM&c
zJ79ngvi=!&Q?yEC?|owWjDQ&^=;UIbp_Ug_2llp;G<$NX!oRDYriX;99m`MjjOyR&
zoeQ%{#tzwn@4WHPL7*RJtdH-#((x%-8@5@J{|y9!WMCmdo4BDt>jHTiL-R*$EGmz}
z$M2ySpt-`qBZUG>M2m#e0ovUB{A4iT83d;r@-966dLj2{eO~Hki!D)y+3W>bf_4TY
zh6UKzx%;Wtqkhs@Y+|DGy|3&l8x=$DrolCOJ=hLJ|443<GrwND$LEk=`ChlbG?~{i
ztzbbDMG2X#pu+igxgojtiiF}z5{(OyM@UIAV@?yZSHF-@-KBTEV)E&s^m2k|gYA>8
z1A2kJT^E`X(~M)@;8;&1s0HvYMi(N+|DM(WhNGMWx}w(>3h(>*bImJS)MLz9;rJq3
z(UOykT+rS1I+nsl^vW|-M<8kgjW-p3_NePSH14Y)!dQrM@Hk)k!UMc~lI%~Y+`PQ|
zoL|wQqeZAGLU<sd#&A&;DSKeEN={3vWtt*@{PhB7Akz*>6inP-3e07s1Q%i4BKkJ8
zJ`m|xzEZfm^c4f6qN6ww5<}|@fbr#?Tx3cKsn$<oK>@cZ2Xao7Qnoo}fFhxJPzDX=
z2e${|YazHIfg8BQ09UwK7jQEM5X_i_OhFP)oGpl2LyZC02Ojg>Y1|C>$Bm4=pez2x
zv;+FU%x~@b4s{dqHRKeXTAtt_B2xg=WI!Mnd=Uc_;!{fn!j(!tA29R9?0~17=QPe=
zmMO}m{*m`n&ER6}uR2eQz35pg@IwlHkZrV;al*%7W^}X)XI;T6A$K{GzQBu`hFs*`
zO@xZL@#|P+J9IA<zg%&qz?x`zmwJHlBe({J&&R(=Ra`VlGCqxn9s<u1P6C}bo&%^v
zG7>vHL=OmriA-rEQws+Xx;qf>P+A=0++)@&xVs9J!N`xd1js|v`pt~Y=5dA;wOlS`
z7nzZQNyAGJnk&SI4L(38V#Qd9=Kj5k^py^!OPxhrPz6xeGkXyo4Ym+DGXUI^Zxq!s
zkZ_<C7r+17YFaei;X0~~0TEjp(5VX9K5|!kXi;e4w)H~&2bsNyjQ$Ejv7nEJrQr2%
zW`_<Ac6Ma#Ym{sy>PCF@HsN%?P4PM9Jm}cb5uknZMtQ(7f>cj}iWcAkMk1pHA=(fm
z;p`szrtprqzRZxpbAC+~rY50bdW-{35ZAm!hA=iC@(fmCT@a28I0(5ps?x@iUhk;-
z-RQ6T6}Wlic$@P-RI?n|+gHVUK(~u49inK|FRE`Adg?k;o{@9<w~j(rFD`zo!PaVW
z(}ANLLpPKk&~vwl-W{J9UGoZ|40?dR=@_>KLJFM~96)&HE=4)dWhIDRUVKodSEa`a
zBwZSA1qE#53mVE`iwwACjJ3GbkT$?jrY~&$NvN?<%EhjodiE78Ecmjuwn)rB_R|e|
za7724;w2vy3zz4I$Q`?@10=W<qM6l8B|<~}jHS0lPN-Fi0{uXyCxBs8$oKxYr5}W7
zzeYc7%F!`24Y~ZQg*76FJ|I6L-m&97?!Ky_%|%-vbUHH9Cdcgim4f60Q+(<%A%~&h
z;|>qteC!n_vi<h4?Y$zZ;-#Jct`X4Tm5aEh8IWILdFHI+Tk}88NEF4OUJ^>ddfz}a
zgoq2fcQWiK$Qx*dh=3exmN;QB8;}lIzie+flc$#nZSdv~YJJl#-yIFoBG=w7k3t~;
ze|7oI!}fg|GHmM@$PA8;S$$#4<V#4NiO|M)UmX9d@|Y_+We?WpSZ(9OGsWHxIr_m?
z|Mqi(H27@&3@K-INULKya)8oEWJS1a)A+0z0|bubl5HX^@TJ>e*}2ju;i>NGnxFU=
z$i=xy<iet*1joMFP%YrlVO$M?xG-?^2aH!@>ECVc0M&>l1P4=;n_-eMFM1;sEs}Et
zKP5)Z)wfJl4&5{0_ABE`qlnDQvkdC>IV!}*`<*Yes7r?YJVSy-=6olw=GI!K>9RMF
zrQc`1{VPsfqfa2Jv^&hK8Dx-r9j0m^4yz6H1ixHWm(Ug)|0pf5>rCNjXzsROGnVv0
z`RYpaTJ>Ipy0!P5#pTzpzaLXX(+;;>q?^w09liu{-|g?dSnR(MdA{>ke>DQ;QF!Zg
zQX;xJgS23#E8#E5FRj9F4^1;{v$lkWE<;=?)5aq%yMKHk1PuGv3+y2=jye-7kym|H
zCT&han<};`*lRB!OH@vD&E4ZpD3`=HiMxPbU#FrXFy=Y)6ct<diA8P(f8>#rU+{i$
zEKB3`4*+M%4_OmbWIPTn2mOW6=@R!^7FO1OO4o<7nQb*6j_hP7QcYB2$#-{F?Q6JW
z6|x^xS^o*SZO|*83zPnMIJ%|}#5);jaoaSsn&Nr%>j#ciJv;j1oWZlrAA-0r9)2&0
z=>eiyhWhY5eh<)gh^TP{RDcfNfM!WN4h@G`qyy+ua+V`WgWSz_uBjG`FwPBXD)2T$
z&k8Go<mDf7kF`YE7|7)=a?TT}C0BdVi2_eY29AF<ouDqvY#0y?ZMIgQf;ksTh%zYh
zXuU-(AB23?$21pGJe7g-2p{FRA}SYA(TW-|I7I(-hD&ktBh>6>@p$W|%aY$B(#N2w
zO-WB*OB@&E<}Gi5ka?kAhSB>!t%aoS#A)q$2g~XSqwMfiTnm);%QiM*oCks~|7_y#
zI1EiiW$JQ;43obGar=<ebY!JK+)1t#JIKY$W~XxQ(`Ld7z|uti1Om8h5MrqO^m@eK
zH6EuTq%Br;-sMSs9k=SttaE>hM0Fu>K>VCMB*u_vCrN5NfGt;`#B_#~s9{~fsvZB7
zNXgHihn>8j3>q4s0jVPqO;R6b#qXV=mOPs`>-@19uMEk48SAmUSU&B}3<s|c!tA|=
zT0d<=paZq<`;u*A7jn!t-luhQd_mQ1Fwb1=c=%fIUzweI_401_hw*SwAyR9TKBHUz
zv6Mri(age8H>`YdQIYBAz-I1Ds1>WMOw2UPdqKIOSgs0?lc0Rz-amf?2hVPWsEAEo
zU5s0y-my=<QK7gGijH3GFZ<B35WNpzuhZ~mVFzJG+J95X91%HaUe?q3@;-rTnkbWA
z*%lb0Dg2h8c9u6jv{|~r@8bhRa<X7j1>^(5=2*<Zo@;Nn!7@-mVzzKV5(^dn^Pg3h
zJuVL}vJ6pADKkar`3;^bVBDwX__d+%Ebbvdc@v5XvVJh(_W99DYE6{e0YmAlJhTAR
zQS>pUqxRty;NKPEpEu^G9?gC82K65o`;-nkXrL9kW}SUy;-Y$z@k>`L+hU>k2hE$R
z!yv&JZpU9U%QW3sprx>0-p{H1p%2aAGWWHv<6@g+b5Fk%h<bt8c=Rct|5Y@S!dTh_
zZR(weC+F%rIv6ue*P%c)D-}N)yXl3gM5t~|&0FU1d+(3c<F2a*H)RVW%M))8NY&pk
zu-tJq@F);hI#S)p#e=XM9k+MnU7bp>GI;3QJ9Z}<i;Up{`Ll3Cf{vOUNC~$e@6q18
z{+8jQ{%7%0lS|>e%c@YW;o1}_T!nW}eg_?3M+kQ;L0}Pe5qMHa41uskBqW3(Tt#xr
z!`gGW{E!2;+*&VK%An|DYYGzeGjUPlK5wcLgKG4mA>(TsLL8K@8H_7%R@eHyF6;J!
zz#NDHxsLh&(R3Z)Sa<#39t|TZ+ij1^N|C)uLugP$QfW#__7;(yN@WuoW+F2(8%9c{
zB2<X7_xgWM&-=fw=X$T}^*nCe_wStZJ)ilDDHa}m8VEUH!mEt+(mo?VB+Br7iin5=
z#7PbZbiG^=?Dz^Biblz~0h!Jq7V+h)W@|D!d(&KY3T0%mI;o1*#=MitIlad~Hrwa~
z)#l$UzFOBG7@0chDHH*wvb?@-tc;3oCNe7e#{CVR_FT*soE1B-UNCBl*_fQAiH%TT
z*;*lnqs#vB&(lIzsO8%ZpIMwd2MFAk<!3Nwyscd0eR;dDAvvBllWX2Cz?ahosU74B
z3Mz)?!s(;pVkT=uEk@N0Ur9vrGKY9_uS!kf`cIW^85j-C;Drl0zV+q%beDGN8}IDd
z?G@7&$(X7nC5(Lb7dGX>y05lKWqZCq{j||DQQVAs!(wUg>0K+CyG6(DidluQmWCee
zN#baa_H}?L>;>oQ(Bub2`ZfHKTPt4oz3|y!C^^v`)-58mkt%FbRCjCA;LFGZR^vur
zWy9lI-hUT~bq%I{tEp~M^K@2#OCU?Lb5q&xBtC)3_-@+N9^Jmo>9n_7yJf;7>y3TP
z$_;17o%{QPBU95`6j$_Q1qA2A*J#yV?ycOpGozB(CQ+cb@v67yB%kBHUmqE{ZPR@t
z7atS}n?C)2Er3&7B?du|`zmqm!W6OCUy;*RW93&*<OwCa%uY4t9#BcjYF6fMIQ?Y2
z{HygOcbnm2e5U5gnvuXCEj%^1r~PxL`D--qvd7yL_j<+WZZ#>1ie;Jo`=)>Fx}Gq(
zd~!KAO76LK#c79vw=9*)uk%D1jfJ0C5!NgcdN)HoUbv#}vdk}=F{@oo_Vy03F>hT3
zTHd}z7mtAiUlWy{*F--<H*}!y6NaUhx7=+HpWddjON#bZ_Xrc?(F>w;5uM`8JuSzV
z&)rPj@ph{TN9RJF)}q*Y(=QzEMoq14_D&il*KS`=D|$JxDLgo`^Vd%f_uD^|&zNMk
z6EPVEt;c~}CmvndCL_Nk_3c)l#HOF)Srcn~PkdWvJvVat+<VQYm!iRJBOHI*E{f?Y
zuK0AZ86;&m$S?rRP*|d3vZofXq1xOY*=%vR{b{8XMdvXi_Vx!eA*Y4wF*V6cT(?-^
zsOtVuO7mh(hu-ndO-8@WviE~$qe%$M47-`gsKqv0-?E<j*J;N-+2gn0ogPS3(6YFu
z%+tD~j<#vAcf@T+aIk}AXxlHAS(#M{16rqIIltAGzn&235$hOprF4mS3wee9+Wp(Q
z^ffXOdfmLce|)5Qven(Byu;P=wCu0f2gX$>f7<QlcMW#YJ=Gi?(qLnf(^dL5A@u7C
z6-{Z#3|$wMNx8t=3pKe;*-P;&;q^^JH>z#-<w<aiDy-WfI(tJirqI<=l&95Axc*3}
zCNpckMziLwQxw@#dbFM0M$^51A9-h=m9>Z0aZ{eMa&mIM7<4XBzt_?#!F?1t&{|rD
zF3F62*iu>({IRd}<vrIDba){R5%a^PB-$SeDJ}mIa%-Hku4LR_jC#Q<H1IvjOtfs*
zsl{wxmO8Neuq9nN5;I`KUt7N(^+}NQ{QX3!_Dv1&&Y3VEOdGy((PZb&W2YZ^Do&cv
zi@xEI5mlt=<~A<vQ<6#)^{XiE>r0Tgo-%0}+x9bKKIIw0kHapAiv`~FS*J2*cCeiN
zR)gQ6tlE-R4c%)JHP@9!&8yVBX1xd8^7HoonYiX~G&v(bZ0&?1i??U`{&!uChD9_r
zF-O}vf37uCJeuvK_cFMDTaE6kXaiT%Ta21YPX%LAe7pJ+?CkgTkFjl{DbG(3ly!7Y
zpK(n~;1rCizI#(4zwg?bq{UCCJr`Y<ysMIH*((f<H6tautL}8rd5S)HuVR|L-@mA~
zc;or5j1I9CRTE8b{#JL~em6@;bq<AAn!T7%;@;{iB0bfhDe+*_*m{$VhXvc_BTwr6
z;`dxEX=E;i`{x$_tM<DEIT#EZjjxUF)#xmP6qD}SeMmAOQzh+4=%a_A@3BwU05#=z
z-PBXVX-<cVYdS*PZ{HS)-KVP9^5f0-t+DQ5Y5RYf9%5MWS^qtuZIN5*gx1f&SG=3u
zf=`<8#UJ8vu#wrarH*(aL9W+ps>cgf?&svL#_ba%_@|QP5B<+sb2j6$i;x4kiqx`+
zAHg+#S6yNszvEzJs!F>4^OeTzN4iy;(KmT)VYVuDZ_8_m#rT)Jzw5>=ud-mq)EgiF
z`FGIsh0dX~NzLX?dF!cjm0J4~mDjmi-}O~+|IcWAS#Io4?Bf<j?J@0Sy7Kmin><~;
zGF1(ksl*?FxZZi{u;3)$JRb$dla&WBNn08VWh5=%VN~aq1LO@s0=X|RZf|8jhRNJw
zM?g{<CH3Qw6WWL<oU5*czL;BKHqw4D&PYXyQ3ybc3-`bwNcRCUAII$l@X0V|!k9U&
zV9zdY!QKH*=|WwpdQJM}^cy(%VTUR-$}~zBkuCu-!TlwDv~O)|Z`LGacga9YASpEM
z9(h<n3fQ5m_cvo(yu5$i8ORyI>DY^Xscnfiy&cITqN~8YUotdY>3(efbvlEt9?FIX
zJLA_IzM5#>A+1@@Yvo3N=jyNM?=6Pk^}}lWgAUJc78^FNQuGFQ_oAV1Oitf-&5ZR4
z-_fOsW}2~*+L5-!eP4Q+Jmp8ZX?s;Ig`*?Zc`sUv>@U4OFIK0$dt;ZW?rg|b8rMF$
z3PUlm^jY6}2^ZhdX8F`Ii$SrB!1`So!2qi^o!2rgo>YA+;<q<5ER;hqT!Owl?b>v<
z!-T+fJz0()fK5JY9d!~rY}T}<@;=Yl+Q@!LD-d*OQJeFAS?D%#y)a;wb@ew^-IvV;
zcl9+%jfuvQnL;KP4ZFysQ}c<DhexDoMfv`gSjEq0s&B_24UUiMg-oK@sWojy6&3u*
z>Xl=pgUJ~N^EdHzBeDk{#l5`_aQfiE4V$7biiMw0^nUaq6j454n?WeL9O)81tcsfy
z*VdD3V6DLgkS^o6d)RGomJnNRDtz2N2(t=)N%!#X7J3IOoD{^Jd2=@Z@RNiAednUj
z=fEhe8$NckiF_|ho8V<Wh<yiEd@>|=c0K!i?i)|Tz`5eXPe>M$t@=BDf6btXHbzY!
zf))8TUwbP^8{!D!u<UgPLfnu(ACjz=_TEFQA<=}J4zw1rb8X7)Xy{)lg+MRj&6_tq
zmI;Z@a^VIq6-;ZjH$CmgC<lXLp1KB4m4})D#9;X|uteZyX(?6P)!RSiPOOTM%ql7=
ziEP=jN8w(atkd3c8CKDin6Jt}51`0c_0ToM@72xtw<mA$>lLUb(eEx$EYeockc|Zh
z?ZT|G<4)FMHZEqPJumSYiCdnGFs2npb|3T}NGce<F}%tVF_pEodjCwB{6CjTNGyD~
zP^Ul^(gQ}w&cj^LNH>W#9Ux9OC+_F3%ZN>Z<l%gO;b&@o$tP?1$dMxijDpJ@d851b
zC2>+=HRfIH(IIj?Ov0Md-d>bINN`XpMZCs%cc@fuaN!ETUz@VeaL)ipLZTo9>yL?J
z!SY)I2b797Hj&_^3&I-fu4MuEE7(QX{_dShoZOr|#0I3p3#jEwq(6<~Zkyj23o}6t
zvPYghyJfA&^JfY4sECx*&e1POJNCtDPlHwDWZ`o|si(Mn0O2hed~i?hc>S7kAV`ow
z4!!q*9TI>PL2cY7e)Tx|Ebu~^1!K@=*JcpCKu-Xn`{|@;ojtqB+<&fs+tXfoY~u)3
zN{d#%nX#t<wa=WCdc*4H`{zR)Z?9~2@x{fBip|Qxs4s1X#?LpIzsvURYA7^XPVJ96
zmbm8kk48$sy2z^iEALrGD7_70`m$PgEMKTnW^Yi_R9aWQVJ_0&b|0AY<#nTzsdr!4
zsnt#x8?#FBDayDf&fdN$CS_<22f<`RWLT2yd_w(e-a<D+KQI*$Eb|FnnIlb3`b+J~
zvjb%pOZsHi^g3m)JG{TeDs-=11A7maw{GawORXhYC)E*}!?GE$J1~qs$cJ_CnqPUV
z+dJXOgFcX00pQ2FthVpu#<l=iu^Lq^Dki?Ofg)pNuYc-*Wi7K+hc^ylLEDG}`KfPI
zFFM?>?b{!F|BMrdAV*cJ^)Y8|`L|mW&H~f<wpEK|edP_7rftzi4c7W3KL<@;>@30b
zSFJp4C{{q=iTx2zdmrS5#1dMT^d$2Sv=q><khWPOqCSX{RP5I?5BZ%u$gLCdN#^92
zPM+B1zY{Scynr%~z9sPhX2oq6=K=`@=hN`y0?<`M>{%qc9nW9FE0-8^V!$Ci@4T16
z%n(Bgc+RM{xduK%SdQ=tXb|xQ4<Q0JK5WC58c=MnW-}kn965ZLI2XaTrIeT7e=-67
z4t`216l#NW*qgMbn~`Q#YVDi!c2Y3`OM|laqB4=|0F_4mURLSqLAEr5aO6r0P}xE`
z|Ebtfu$_DU)w{`R6MAGnfLbO>Op8&zCeKA{F~E<+VNXo+=$B1vlg!@bUU3vGNYCx6
zMX@8SNZ%vr(YQi>uGpeZ{~viydh&2ZtlN}Z?vF)xkl~gf<F^ml;E!<s6n`{vKKk#5
zRY7El68M;?K$`4h)+moJED<Lu>Cn1yV-3{{EW6}32XZ<7UUu$)w4)4UjK>4)J!3vk
zxvN7&5$ibEl?U3NcqhOY=mYm%Jt+cV0GwO(pdh261=U>aVcq<~bd@6>GJv38Jw_@(
zg|0c2VRjvR+d=W&o=FD<jb64iNPl3&(XOciO#>+<26;l%SxB>p53YK&j^j|O4aE9{
ztw7QWM7RV{23C<A7y|AGTqgJ{5-{=U+%v%scVu!nc2Z)m<P0qx*Kr(+d3g0R$Ku=!
zp!f@wb0@!gPQ2ZGrl#j2ymxUKK{3ZH#AAC%6F3q)#=xcKXJx$(>UyWj;{{9_0-p<2
zOD^OuJk@+(8}=j!!M=~nBjyT*ebRII31u8x6PGM-SUTT4U|C?#Oo${HI`AYHu0!mb
zR0)_~VwNUlbc0V1Hp#i6T@fn(3j_A$(7_S(J`cvJ;jhmN&3}Zu-uTb0M9ObSkp*Z(
zB90_hJ+MJrLMpd|$NG8e2#NI4e)OUDP}ThU?dLlKF%ITWLlFmD(Fp<Q$I%Buz6TQ=
zDU8A)-PP4x5H4vf4s7RSVgBj@WVE6#5dRXxRCIK7wReBoeGJyb_hY0AiVZ*v{sHX(
z1N(5nsb1@QZ|mv4zIn47EF8`YcnA<ESdE5w$gbKx4?4sBVEx^85b}GhyR*R>6SGjg
zLsXsT^$}Q7qg&{(P(NEns05rNC?ma4yw!7djM{doxVX4*Cd+5p?lkTOW#5|D{DuuD
zJn}xR^Jft$YPG+fWt`NzBf{`NLeElN7YCvj3?F_oObFjz>P^PI{)xGhUGL3;r-qB0
zs~i_9>Gh8sxktIX2k12Z4xIcqG@N@xB6H2M?Qh?0sIAR&GQG9lI_p)W65JqonG<_j
z>*DdcF!BJ?!B8=5gK<V?0@cAM_uhkg&!t+pCmquSbh_`})yRaVKVBsOVHI~(`-xN>
zV{Zj*zAUcf&-`jLjhk7S9cr#TS>R}I^Qq<*mrFELW2%30W9k8?Op7wt#i!mwr=RQ!
zEBa%4r0DP?J9bmc_0-yI!fb2yQ8qIeF$E5Pv5#*WzC1D#xtQ2+>cZd6pV`hH?Z4<7
zuiE0Jx!~bLO3Ia#c1d@j2nqQX1EuztV`JU^i$My4T?9)I)h(xac;g>GVsgVK>J0~@
z9kls~61P=45lLZ!fc}M}_XIDgLxN;TXa9aq$cr2nK+5N^3V+2cA*d_y+1#H0(T6ke
zKBvN<3JM77r1wJ?;FBTg(^wQ+9HbuX6pwA2(k-9FW&mc?8Z<98;AY6!!L;HZvaMhm
z!(NJ~3g$c^CYo&<Na`>?6M%p{Jw48RP3})}att7HP54N3AHWhFE2cK!;2l((+{P2w
z??&bYVy47I0s93rbe4d@l983?>-^AE9*hz37$z+ZpCTqe2(2K1A@nq=H!oD3Xz|H*
z+DJzY@R|6mu=x&L-Fos3+U@E{4m|$qM>QVP8yl_mz}JYQF0jp^7F09IP=jVxi;WH*
zsHRip|5ICHxkI4X1D7Knx0Lh9NO%HRmn<>7P>!^$1|qEB{o@1#c__z<(8n8OteCLP
zK_An))nXN9TL?DmvAM2=lnCOu!*n}PTm!~g704A<@Q=$T)ZLo0>LG#>A)AR={;N|1
zKe0<fw2G_PKNb<~D`da9^3~P{*13OHeyftG@eVw^j}AVmAw)8C&U4CKQ~^VtY=;>A
zDHJs|KA0yrUML+DC*ma{0dsH|jefZ!A^<Hmu(WxPh1q-ucn!n6lac`5bHrixCVKp>
z2JSf6kNs<QD%U5R^Z)?ue2~Q0OK;?H?AS4j=a<C9_uQp}TSOw!l?Fla+es>nU~me+
zQ(})KdhCB)TX4#}JsB7rY_8Je)%)LAi#0{8x<tbOA3^Dd7~#E=k!s^iI46xY(dZEj
zw;hCgpaZi!-*t8E9jeAQh*!a23d_yAkdp~}4buYRjK%OMVj&`qgT&A$!QV@WSkm)r
z%-R!;&*3_VU|lG1AWyGFx&YBKCJ7Jyn8T^{7`^l)XaGkm_C}WEq2=??#$nvKX`I2D
zyMt-vI}X(#a9>bKBtLj??V1Fu9b3RG%WYX312eOC+kQ56Z5&mLgfE#WNFRN0c|Bs3
z$UUHYGkx#6%yG)#O^d@JhW#Jji0rNrvV7qC_Q;A?N~Uvkf8EhBbuU(_N#pObCNlAw
z*--~x_`DLAnhZSZ;~a)Ho#h{T{!sbfPm}r;DQU9bm07SdrM)u-?A525z7|lHCY7F@
zGv?pWJ14Y?%PH_yl?sI?{K1i@jAf43<+Aom3&l1Ke14f@;^R|zo^HNs_Hsvv*p%wD
zqe2H?!yKmwW964qdMU3AB0?4#yu}YF#xAM7_b&XCmuR)QHi<)Qqbj55@9SzWc{o1x
ze{#F|Dt^<0ZkwvwzCVsuwLvU(#<bIJV<WYH!Un}WR9j1oy<{E)KRNa8o?C3{J$}Lb
z+fUmEhdX3_Cf5bf_+GeXVmBGZBUsS9)5EM!>OtfleKC#W$`NT&zUiq2pS*T<H9Vje
z8QR*acD>v1ueflYv9{WUUhmKu%|~)<J|>;Vt2X9tR1h?qcxAv8{VcuN$;b1~`^%S}
zmw)l#V+n4VlhDXb*HYvD{4C@5PWrQv_t}4xZ`A#IA#2jBn{6g+G;4U}^CUMNbUFqL
z?0x*IGTk3tqx^Htme^X#-j|A$3k_~86RiCcvi@a2kv7Aamv5gko-RR4-#F_}?{D{Z
zUw5;jk&5p>^O_}K)+s=8$ZsU|46}uISf4VO`-^=29Nf*%bQ8s*pV>uqvv>K2e_tD<
z;iUS*V_eBC*TP`GB&VWq{Q29X8nu4ALN^qQ)U}DHjE|bB$)|+#E6L}+Q8}OQoaZUd
zdtRn;0&2V~=Y+3HeX*HdvOmg*#%$isui9(u*t}?tNWf6}9rrg9D+fn8!w)tw#u@Zw
z`QwH|!W$9y233iJqVYS(ZvXY-!3PQokhk?^LTfSRhc^kT8>Qny9|-j*vyeP5+;Flp
zM?UydnBbO0RUJDN5M;_jaz$RA93E&Q8bZ#@<k;_nk3NugS||9-b#gx<>m?1TTnMC0
z78o%bX**M7fnf*Vjo~7@U!`RNuZO7(dQSW^Fg!fj<n`#|5a5A9L3#3ll@%R+V*h(1
z6*yXQIYu9jvH5v=9zoc+(bY`?c<_DBn?IcbKaEi>Bk6rSI*c?XV@CXP9Bva<1jiVy
zOe33Y9Q5dR$bN(v<tdx&F*>(ILswU2_^Szxfr6=mdo{8BZm7m2vtrDgmbJ_wr4z7n
z{EPh}jTThI$_EVw^x@_Y+D{g?0jVaD4Mc>4;Q~h!d1JsqJKRh1S>8bupa6;n1{}!v
zsv+=u`JG+VGEaEDZ}wKC3aF^6vU77wJ?~m9dYN;C2H6fpXFr<db`WLO*OF-WGaTYu
zvC|`BM(KaTKHMq*KjDJP4DXczPT<|*v7+jt^R<x5X748hmBHu07~xvG{K+gih1Q~5
z5CB?3<^%5O$B!RmGg|f`Ks>MyoNA*eq;7wD1hG4$V-If~gtz(=Z-Bs)4qio`$Fh{A
z+fDZS9(|Yvof#An!NTP43(GYLq5bZb7_qT6!-0@f5N+=jkub{gG&E$yt8}^ct>xLN
z0J932KZV*wnXFHHV81=;J$FK|+Aw=RLNwN601jxfPaP!f*Dsv-R8&-up<DK6N8x7@
zsDTfy7fyV*UkC|}r&ic)gdMAFR1HmdGzh)IfwckC1*kqqU<@G5_LUJji&n;xzokRT
z`y{jcqJ7mbe>k&^C^D=5klr&~-X<4C+62$t-Bxgw<T>noH*!%~4t;{idD#2TEwE1+
zCxqS1-*1XFTQJ_?4gTvQ*!c6(-GIA#T_NV;RMw|DNx%oD1>@$^aCRV0BdcH&oaAsy
z<M|D_aRYXYdMNck{!qbcg$@#^AHofWK$w*Zz@iUBpg)N^h0_^jApVr^2z0piFWS78
zpbe<%yOLHnG}Vjybn47L3?&zy|J>j%6Xhm+`c|!fxlRhx>5(xtleAoTk-#zMq2mKh
zj*N?MN#qeyrGzvz5}V^@y~o@uh|(L$9*$PeFX<~>g1s=C1l7PTD%LG_VFv$3mefjN
zI68aKcgDSfeEyg^Lb9?Xsgh4^Y?&a2AjGHQW+T?1m)U*{_f5rhA@r&OYAt<@(IVuA
zg*ZIMb7~ktPPQbH=mQFDqgVs@NC0GNyyC9e$YA`%z*VF8%bp?csQ1#n>VKdE0KX2Z
zce`z16NRCzn=LR$atJficU)><ln>BJNiG-7-a+CokP;U(-sIo9eyR+n67cR7Uyeq_
zxZ#RG6t0LtWl(I!x#|@}VIr}#xXBT<;VaqVq9VM($q=pM6tAucT8(tNtT)h?c>Oqh
z>my5GbMs#5Tx1B?!N!BxHra+dUT-89M>>pQX`Vu-B{px~9dl?>Vq<h!Ok6m#I#nJj
zm69Zn+#>m{#IZivOsnN~H9tjo^XA7Ur~e@QClWKD^VxuYH)700h6#1+PkYR*VKB`0
z;G@wnh{BC^vrQ!*p4GqG1p>0gJR-Dd8z~I(Oo9jkqZRa7lqYVG`1l>RBRy^cHlAbj
z$h^G>t(-1fpl>2Aasg`o7ZoP$%p&TV*nr3{!ffJc(nf46>O)~#AVqknoH3F8{D>Kq
zTbO>iFmjOl^dS9nD2vIDLdu24HbJA%>kl4?BU+LR;6no${xDZP{5m=#Abdy0B2Vkx
zgrBdgSKiSIKdoPP)dU^7x}o;qQ*~JbrOZ=@s}yV}B-9tS?bJ~7d}_VjHhB(cWFt5T
z8)iKi6vq$9yQ6@epky!#G@LK~#*n;FveT`vfFiJIdA|Oc#PWSl1GT=|KY0`N-U3;7
z#KxxAe7F}^F7kfr$H<QHggf5Z%dw|Ece3?LGQVTZW?H3i(62*rE|I@#pUB_FLbIbY
z%M=;AW`ide`3x&M>Z0||Y1WPWZb{7$HTF#xwp*x=$b7$ey`nSYSCP`u`g1qwc>id>
zh{(R9F}-4THmp_jf&}N?+Q~fjGs9}eMNJXRsxImL*=`blj1rd2JObzerv83o|HXcg
zZRF5)vw%sFR&Vx}^<NlNy{Ss4%!lhFrqsV|_t+l#F<qIbR`AJ-s{)?AS9CQvJe=mk
zHbm15P5@RMg}4%KthLx*y_0aA`1$jv>(97D4&G;^2-WXcKE{BBj5R3wMG7<uD1gug
z0T1#XQx9%NiW5X&*GfuCsDG+2tkPy9#|(yl44>~BTDgaho?LsRdXyxUAn%|`y4)2;
z43IpdLpCdSJEo4!2+pa#FO>cyKu<u-KGfdoAzrw6aUg)JR_i4u1L%ZVarpu!+neTk
z1@Yq~eHOk8_+u_a_du#h)Ph)4w9*Cu*hoYU(enWL=6SqUA%r>@?jIPT$9z_1d`j>k
zQ_m)Tv-?c1weM()VB=*d2WJ<$S0Xw(I+7I%&<aLEiOAmu!Pj_7Zm~kpO_mx#A896?
z;zsXj)%^1I*s)PYDQ^E!r^oqFZLT8mIdB$5-@5f{qWta2m&7YfWM{y?esib?a4Gs?
z$O*6|aU~>s>3IE(!H>Q7m)Y3a{m3y*QsEJzv8Zu%dC`q1{$U0zf5^U3JeC>jL>GHE
zG6s<)Ng^tbqt?n>WJ3-^&C8eh;Lb?u5A^eO_yF@R_wR+y)gY!Ceovy8z=7?ASH#!E
z)1A<APTVg<3~5&z;v3CsrsHknv^eVoMGK?(gWZGC-`TjiuM=G|juX_}{<$0)1Sbe=
z?<9Tg&;eg&R@MfX1xOqPQDOM{=!@(;R_!n6uAac!tKRIy_b2zJndzc;%%xoq)nhb<
zf+<weajz~&HPjAPeR{8iORsUp_`_@b!Se#zY!?x8z@xC|U2c@z=?t^MLIa~fTGQcz
z5uw2=wbArT)V*tWPfL$IV3`ys*qbpR#F_Rs<^Qz+OCtWJHUnaRqVg$q44<z~(Rpzj
zu1jcazUTTa$T)Jp>JNVP!Pvu3SO&HjHM2+w#q=qcRv87x&wev|N|!3jrk<YkV{?qk
z$Hu!coh?6RL@(){Sx#@ZlZtw(t**}DQQBq1pAo4s&xD31rk$T*CcpUe)q2Fn^rB>B
z_}8&dot>fRL8XX`8z^*MJD&+x6O0>hSLAm)AxCPqUvq^Zekh+5m>O9C;HU%W75iT>
zT9scqN~@0b|L|PPfZc}#0FnaVNq4WRcQQdCAx!r*eFVtpTJAQnsPWNe<_KUo0(pJ?
z`ZcQlI9mW06PfkU=1ctZ;M{1F!*Ypt1$WjxVfS|}T|ie6d_&a$N|&DbKtc~h_|7x+
z5N)fqfk8q4Joq@Juc%Q9Licxmt;EfrFc<v%{75!h)%grZd$lb=2ay->J9bO5rnc+0
zY!5D`qq*st+V*-<aAQC0!bD4_4T=Am;(qJ~L}P`VEU3;_D6{EjxdRxY$UM&(9v;p{
zhmO2#8(AZt?<F~@wY`%Hb~xSXWp0OcY<hkeU(-33ZsdflKHT~DTp9BMdyauX0CpUb
zYlNU8TSXq6g`y3Zz-nyeY_SsFyK<~r66Mzg_yhN4jyua2_**euHaS);;@csVN=66P
zy`Be36DRX6$oGFZll!|oHt#DWx~8-toJNV~rD%;D?^xG74thn2nOQLZ6+KcH+IMz+
z;NFAsoB!VX#HFzB8nqA3`P42u6A6s_s;Z|COWtzXuBp0u`_#QpR66yknhQ-lHn2Qx
zq2fCCtzgo~Z4vUpCZijP{x`O!WGfle?HgpFw|~OU`mF0(RpB41frDYmnSUl3YN={+
zo>$f+$=|uL?Q`aGs#(afx7~|T^5)WUF<r|k5sdO&Hfy!Z!4e{s7H2QZpg_Me3q;IB
zYmS?R{ZdRpfdbHW`fc9&hK3}9c5i0;NFm@nj4))BtUS~uy+3Mf2eeigk0dQ0g>bc|
zmum$0rN0KSST3z5+u#It5M0Lx->TJAu!Dg(M54G4+k|?^%ATl+%KZFjZ*lk<j~rS3
z<lruT`C5w92>vr#9xZ@Xr+<H_zM>KGh#sRqL`skaHcO0~;f>GzcpQbY#CU*fRDi)r
z2HKVhpf98xtWoVg;3h<h!e7DohKj~h;&ooZ5u2ZWT`!WT12gJ?r-BlL9@BeHv}RZQ
z+h5wWMuPGVRNS=`h8|k8MTMx3YCCE(LQh8bW~4Bk=W7V!+*vIxbY0>CC3k1cCEedK
ztK+u&OtMGPj8||Pab<9Z4_{SJ38Ej<IAMN^EhGHqr2sC4kkng}lm32^mn&uIhHh(y
z#IXWb1yBV*;iK{PXGRVb_obi?hWTc}Mm5F&&5R?C-0C%t=|ksjt9fkG6HmogJ$map
z%@BAx=ix(tXv`Y=27l>`p5Q9E%T7V`2z=g%BZ0h-I{dC4mF;TJn3m72PSb9}d4)-g
z_xij1TlX}hoNNE@EgM8*bY4+JwzEwIZBHE#XNaog^3gHPW2EJs(&=1%CD*t}_iuI=
zmsaEKWL?<3H^+SvSp5cs`hG@g+KQAej&a$0&8I2d9J5`kd8l3Tv01jxLqWa2H?^{-
zKN)fR3<ZtyrF_-@@s-)-W31Po$dz&){g;*kF9jF5wR9@ln_MSvZIozcO&Ra)<&mXj
zc3GOS&k5;?>8spS;<07ZEw{=?vt<Tp%-1#fHZ!g8H{OYQz9HAC!!)(nBk}%avuN+Y
z;?Yx7%yF!%^j#=yhJ8neg4%bgh}8(jK3~_OpFHCI*fv3~wv=kDb7Zh&uFt7)0Te=a
z&94gj>*Z?=B%X74yk4263oO1F!<zG8v$3nU6m1BXU;S9{cdc37>SStrY;b|BG*Z^g
zg>G}>CjjwaTQO${?<*a@dK+yLxLTT;nrd@v_#C~n+_guB9(yPPSr>kM5x-3@H9#g<
zq}C)&SNtn=ViOP$D4^EzJO=qLZeqB2q@|_l7L**+)1$+^f@I|+%`oX4VsFA`jXeH{
z4)($D;L824=zZ4EpS?5QKeZ(&@lA`CpWfxkK+9tdhBCMRmy|&k+TyRRtPH}B7kHH-
z`oJt){QGqWAqi~dW`77?QRYM8lRn~=omH%jE(<)gi@gzDrSnofNOR<aZOgwshgsKq
z%W5q>ZL!<0N#xP!=*5S%$Az}9$$RzYQJW?RAb7GtdXURMZVOCz>_Qf=Nsczfzc5hB
zJHLzBigrD9%`=mM6GKu)ei_}}bFG35Y{?$09(Qr^TF|AbvKiGfsxnr{`{w<L+8VB|
z>HcnmB}YZImcB3DnXoJAqcatnwCj6xX)+G1t}D@Rj@PQc<EJILYSs6$ty`jKBGhMS
zxV1P}=^Ghrq|$Bxz3?6P5`Yl6+)njqV!ndS7fg!$_h@=i9XU2lN(M!Y-9DvKgi99X
zoAlajPy+njx&aI9wv4quyfs!<R{OkV@0hn~`F`7#qW0Q}53?LGqq>xl3Rm3q&)&+!
z5+ZyWY!c|8c2<!<da4G?YAi2+?YJKrdV4Q#%bl#`0U8bLomgUkSYrx8fBPSfCl)ZZ
z;>M4U8WB#4KMMCO3iu2qer;b~LrM92Tk8)STfhlmp1`mvOf=w*5Ka;0{iL|VhT`!T
zSB{cPKUWd*63S)8ZQIBge5R=Ll$R2^>(bI_5g1W%H*;khq(QiJo2dgAT?Z>%_Des|
z!#Lgn2aqCk45I=~8~pCK`F@`{=g9U@`(F9XnSl+98PX^C@8&RAa4T~$tw`$Mvyxe9
z8%bEM`Tu9$pZ`6sq2S#hOwol91ZPV{D-eF-UcfC52^sQ|A}!MWIvKmz=s-ykq8Eux
z_=l)HjEo5VfHM&>R$(w;;V#)IQC@&!gyf~;S=Zg}8!{-u5CvM281;amgZcs9%FfOX
zBLvZT0Mr<TT%5!Lzo&hN`~II5S*>{>@uUd=cFwigE4}$_Tf6mk4$0C4T4m{7|NLdy
z<~-d*eH_i%5+Qa96uGdjRy+uhtXmA{J$BdzW+h^y0?05a?~C`W7Yueahlv_{0kzC~
z&nJFnwo}(E?c(bb;oLah*%&{rYunXW63fYCX1kh!Wxu_i9Dr~^?Q3!ky>6#O=q);%
z64$j{*L<n%IBl0@yS8L=Lbb3KjsAvcm-*j+>f9F!4Q9H0!{r9f(lNyw7W{QDt!j*~
z=h1R+SMrfc(n$#p4>{IQQcOuKGn>qdHt=h9a`8^@qzqMe&lz<IX36T!d2B1ESmw3u
z#uXB05`&kx#j3~q)>8ajxsuNgZ{t}@oZlGH$l!|+Cg(GrH8MQmJ)B%#KXc&K#L?dq
z`y8f}H0)RpjV<UVjM$c=K7$zKu`v-7jLHToKt@jR(2}IxZQDKso7|?cj6B5g*xXzW
zxDEMe$W0mu1rR<!^2jpqg<@pk>(1UCQ%WnnOHxHP9%U|#J9_al*l+-h1$6YdfmKD=
zKvpa?8@_1%MGRf|*RNKuF6W)C>sF;{`<Yu+-TX<*i)E@}*_(G2Q&l;PHKfG_9b6>9
ze8**RK$|Zn)F62GCuuv#=7VO5_}w8D=<K^1<F=}{cz&Xm<p4MLZWpT3=^;Fl<f;Mm
zfFKt*y`dTymk-_fc2Dx$XmRD|BFfYktv>}-#h=d+Tn9POZLo?H(<fq#M|aL)(<3%L
zcnO5X#OnIX5_5XAx~CfES+1zf*JNJg3x>Us=<6Ym2SDR7JzR~t-6*u!f)yjLg4kF+
zP-!wYl{dazDBJSGeW!nGMKfpg^`?aP-GlB;s=5`r?k{tv8O}Xb4706o>>8(E)@r88
zmfx5;z@iw@U99_iWgWv>3htH^&leA>*aJKbZl?9>st60ykQ6jUo;^md%saB!nQG6*
zr_&NcwygKqYJ5gkm7d%tnoEjcjds9d^*g?4Lha<+yFve;utEu`y!15`z6N5sBY`~F
zo>0rmg5MfAt{H0tFbonJT3ofX1sjkTS0%8Wq^CR7E9|cat1Oh+PKa;!4dkI#h+<%?
zaiBB5#ITE2Xo~mX^DiGcx9AzIRlM++M*CHO2!tpkd>hw>Mt}3H-DfC4)+Y7gCt_%S
zHnC26OZ{Ab&wDU9s@HE=L?Own@0Ijqtw7)KJg<WaGp2RoBE)Y_5{9vH4E*{myPe61
zJEPG%+Dsrnah|p(L;68%E62c`R<z~0hc#U}s_UeLw%1SUpWbqCcIKwQ*qly>wn--k
z9W~h>AAfdnza%lI>fxWj|2Xx|ebyyEEq1Bu8}pBk>6}r!?kBd7E~M(|IkOga4Jpo`
zRrh4lBy7L1ZrfPF>C-<FZ!47tlyTU_qj6--(`!wY?3>o_h#*Zh-t+BA82Q0eKox@>
z9shf`zSUN)wPJY69TO{s$vXPEdbWIj3l(@a087G_N*S4y^2nQGA=}CePuDZm)YRmG
z6A%nKM6o(g%k;WExL5K+Tbmz{#1RrfTOTRN3d$0k7Ksx=sI4JnDrD9#<;wlI8WqgX
ztyqBEMmEqQ+y!(2mG#8?&!2ZDF!|=xOw^X>xH=bqS&M&Lba-t*wbq<RVn6CAwZkvD
zS0p~@x>EKHsxxOb>>BFkjim>q<1~!_V9FpJJ4~xr2YkDbdAbn5pl{CoxgS=yn(RR$
z1pad__*UDF?l=pm9h*OpX?|M#2t+c#69~-!+_~}FV;d#VJb<gTdqtN7O^j#eK1tkj
zylyt*5&$3FvrV^OmsN=DJ7ah7m^X%~{rmTSM=5rIaYhYR?LU(SAVK`RTBwUiTM(LS
zi605eN}4oV;rb*nQNH|_HmJtOxH3_%A=YcyG2t>_FfLF%;7+SYd{#V2Mk3H)g36Lq
zebS`}n=<}njq=?V7F@20!5_{9As@sb4d)qcYh98G9}9MKY1Si84wiN`%{wDq&b<OG
zibnRTCZ1o)EGh}j{i-k(-g|2>XBB7s)4auaIJw%14~Ap%5ugXF?AzXt`EO4Km}LhZ
z&CM*AY#`;<IB_Q5h~u8QAuOolyKvQIWXmb@!GQw%KZfxb%XO}ahNn(Re7$hB_}S-?
zPx*9}s<KAXC*ExwJ3QU+GVd~k5~H}(e~13B1_33k;65PuBN=hTV~ov|C}ohVN-{k*
z%2>S+TpndCN6CxO$CsjmPZ|>sgM-EGH6e33wD;ssd?R~k`z}Ym^X|I5yeGbJM3v^E
zB-h57;y1w7a*C2DeUs^)cHp(MT~s<O=TjB!G>a`ZBXAVM%y+l0e#&tl-o}NHK4mz3
zp*+DNO{&{@N2+EwP-9I7Iy1H?FD)*?X}Mn|GA0XzB|=Xhm%6+2K7*$c$7;=5Le2T}
zv#ptq?~?J-X3MgDAN%x(83WrmH#h5aMPR<j(}(+|lP=okejG}Z{PQ+QJdS3QMJe5O
z`h$5F&pmtVH?I6$IilkbQ?2b}J(Gw)lGf>OI{Fm-O|9x5*fvp-x##IqlDtYH7BTAZ
zzncyI&A+>AOVMrj)K-i~ez5So{`uQfk@nH#O#c?cJ5_=@?u^^lbC@b#kLw=(Tpxir
zN)o1v=N{pT^0wF5>~0=!Fn{#HIOp<o`7!P3kWU=@_{Lsnn9T2B3vz3k&39%f)%9=F
z?fe~vdt#Wrqnl1aVtVHqdi$==^5YxxIo8<eCRBwa+m2qHrW+D8t<3A!^%?D~z9HpT
zANIzC;@)_7yuZ5fZ;}?%)Q;pZM>JWZWx&AI)wQ*snNB--X0vZp`0bR@laKv{qNqjR
zZ<V{_v}(3_&frZ$)WTVaE=&GA`+}1doDTRJoX62IG5$ccIe^MxS|g6|<^)xUJ@DuF
zK}u<wVJA1c_0m-r7XsCjQTfZrzL||fCBjqumI3xREGyrm;(^F20RIE+^H22gQ|n<|
zHO$Y;g!K$jC^6B4tUwGTj$@3MB+&1jJhAeT*k)*t5iJW4i55d2fj_tKs6yC@2;x+P
z854y8;7@iA4qDtE2tK8*&QDl>boU|FlBi4}ki#7XfE_(-{|N8-2N@o3U_l@VRNu#u
zLxM?9AL}JTKs}V4vmmT+;c|YGH^ckI5wD74xg7WV*@>i;8`wFCM;tTy?5}%{h+-xp
z)7|k3^i%}o1c*p3OZaZmE*HLzOxNf;C8s`sD8kN=sS}#FF;Ge%Ce{#*BM3BkK#NI*
zzOKK05NHWWh%a1WB>Ms0O5H~vfH$zHUj3Wiy~QHW_d%ixlyNAmqK3)@xr~G|gbopg
zGUq=|e8Ko*A6hV0hW!qWjGQr@N5pk+euF^3!ABXPMhf28EdO6snMeu%gTW_YNS(gZ
z!YbNFr;_Ax$*0gWgey=f8_A)F4FO4JpxFXgis&$5pu?e1Q=>zIoG|^67-I}57lF5t
zi4fvyqS3*3nVFlD98!cB*y2scjznTf5Jg1xT?|jh0GVL6&_T*4A~i|44_G69cv+FV
z1E)Q#=L}RNi5$tcK}T~DmSF(+mRt$IdLd9iFio+n(vtB$N&P{s1kv2!ZE6FThsR`P
zd8R5PBBGv1L7^za`X$&RMkx~cjFbRG;9B8!jVM==M24UC)OGsn^UKjl<-;w(6V*z9
ziE5!K8GCPTa%Gq9n+Y{|x$`|{E<lw<0_Dl)N}@Tn2aNl{0?Ux}7{noyjAaDSKx|J=
z6F~W>H9`A2d1PHjs~NrCaj*9GMW+S2Sd*rnd>leXj7h&08=V>o1VTbWd=EaNZ$0t-
z9QG3|RiK|l)`~88zdmJ%b!zMtOuuPo&BT(+&zT~XbdT-(+0><?`*hn*Mdd%|asQxI
z?vgw+0z4ZQOS~a}9~}8>q7srH?K?WNT<=8TH}g4H^2@LQErBHd22@7gnGe$QZn!&n
zFMR3l1<|NZkXj6CQ0=@rm6C{}8zT5YV4YKME%WbiNC03I#f_g3>trp;mXFeSPZuVc
zGJw_U4cd5A3kWOf<rAY;ZDZ{XW{nJP=xAwvX_sF;^fNkIAc$M(<YLwuTeDS~)2d@D
zpI_hTcf*5MbD=o+@tc9BPoWhIwJRr2M>pA%C4+!IgP>*5>vy-7NQ|r&^=0@S2`~+p
z0z6l`>t#?XGF&L(lzf$Op<JSCBkl9QaBYKCA%QtWCvLAmpPnW=lwPGaPTM9VaD1#A
zSMqMpiM5#QRc!mZZ_goQ6wB-d>J+mM9Wd#JRm^W-LFj%a-;6cO)$Gd>tzE>o3BcFD
zpx$?!<?%+T(FDWiTza!CQ@bU6!df&s_Py)2U8|`6ZRp2H;{A5@j3=7aIoZp~D+_iv
z1|wbECyrpuBHIxHe#ciYUALX2o7ZTy-0+IKzWID|q1?@_v1&ukD*1SXNHRJ=ZSSP@
zsEIxH<=J^?M%8;?>*(ig<NM8XYW;!g&e${ezUHTE1X7$ogsLLhA<?y$8D}Nwf*{QZ
zBm@g0;vQ(v>e@{-*J2^yM<+wffg8%I=SQz|Ciu6;rGEH%X)265T|zB*cJuJSKyrka
z*JoBOTYWPdwpa5}O{HdE>Y<mQjl-}3izT#$+s1b!>?Mn0wocaD?>~hm2^rx+sUx{`
z0}JiNV5FMp_KO?mHqs~Fy|?B2xc-D)@t0~R>t-uBg-b>%7x45DE4(K*H<HFgRunjK
zwb2)}A)%o)Bp?H@PY+2-fk!nTJnr4!&d798JR{2HGk{AUM9rlUtk8b^PgX`EfC=k{
zb&4v?V?h6bjm*Av^bUZxfeb1*g-GEN^tjg{qv=C(ImuJ|7mI`>h_qMT;QUE<8iv<z
ze=p;cBJ32D2SgQ;WA)r0tXs=JNnva(`Arese!MMJpFA<>I`c*J5FaEYBqs$CtYIWf
zi>!0>WXQvbp5*=7Ug74gTXr}~_a8!n0g1Chz<sv_jg*WG3vA3CbFh;z{g>f-`h6Ud
zsbBw0Ww)wL?m847^G&vT+Qy`-Cvs(7U{2{>C`lN<4^3`6{Z16;6%-0sHN=+ALx<;`
zC<_QX3;{2x%7)M_AM5%q&)qAtBA!wWPWYk$60tVO9$gbw=TDT{iZQ>Dnlg(wglxvR
z*%5vi8_)~PeoiRVAagU6%)!uHqvqlV2eZ)Lo%-{~iw@(RqNc*AOVW&pQt@9^3sOf&
zrW%qlaZ@3$RI)Vv#R*?c1+A~@GW^2Cdz=HZ7Lx2vdR>Ty4~l?ZxQmGU8A%7Ibs)lg
zLjkwI@o$^g;tV0B%;ZbBH}I*ubCUS(F+5|u0Itbd4~mEc?n8b}9>iHBTM&x;-w!~8
z5FoK|68(uq^l#hkxc;Aw0S-GROw&{-`~Fwrg0W@pjRz1X_2U5K5N}JY?U+B$JQiEP
zo=d<E{3;j={=^L<mC0+3gx%+qVp1joeGE7tHV%%7ilO!CT_1Qg?&&R0Wxf&7H>AXG
z=5#yHR?)D{kt+Yo9}>hxO$A1?_R2?V#^l#pV@QnYA!_bxtb!4!pY%PP$K0Hx8A+6v
zP;KJiA;lZm`|@zysLlO|#p3V>Fotu5ke863V#(u|BvE*>m9xUwv6UPaXW6R<oqL*9
zHR=cQ^SOMiHy>SSP?~jE`X<N4z{*citPYopJoGa5O}A=e#sTb}>Q8{Ok;Mz;ws)DH
z|5UWA0lO?a{DxTHNv)lg5wRB9pWC|w$E|!M+3P5mA<yi}?Bq*H)@*-7r<3ROVbJ-b
zD6MXw#<~>wsL%^2qCpBvX8X==-I^jsp2y?jpB6$SBHrJ>RxrYNW>4&M;lXnu3ds_K
z=X0*C`7uAVC5Qg*qT8cow*LFdl2M8x-7}v;owmFw=2B~!`E7;pa2;3ISzo>L)@N<i
zhtvgF|5h$sfIyRq3d&tF-9W$w8HX*><v_dOIz^fW@BtD&=ffF~icE;hrhYF~w%OFn
z;8(+pgtv+Y4c)1sosZ}v`W%eq*Fj1Dhg>x<9bho*spoYJq%aXRE0!emnHRwg-&gn8
zK#FK_k@~N+j_}DCN>Yy8+~o0WT#W?0p`U^{l8_)uG-Obznp#?tHUkO}Cg^ejrGnqq
z2hj%xNgnI>2G2I~bqJ9lwicQ)T^vEFm=>zEdM;8?{fsL{PAw@mK++>{Pr?Pl9wmp^
z0JW9H3Eb~+@q>B^>*hh%H0A)}uR$85r`g{8kpquDU}^p@UwnD~W0f!rcz~dF;HZIp
zRQKl{Tw{r^?%Z}-ste^~1{kBzY^7)BSTmsp6l~wVeUu#xc9sS(N<fMTC5Ry+B!4w0
z3{Bj3{TmW%Y182eGZ3^)t?%Bga~7QWSh0zT2F?Qq=|WHsL;_^che&B|>{h`PCUQKW
zUIY5BJdD{S01C=0sME+>jA{7T_IqA@#*1z7Xyn9(jy;V$>?C(o4*f<X>x{Jdgg-5X
zUV)GV`Ntc2Noo`<TSwqpYx6RNpzdj;-SHLVk;tKk4~z*gL0Y6tC7ZfoGAEZ<>@7IJ
zNc<4iI)SxG-44)&%^OxDvJkI81V(rOh>QT5@#MC3W_{q5@b~#+s3wgyI1We{7Pd;*
z0!W%U!gdk11FSo_u#f{H5!9|TK%Kr8`bIR*%{GpvpT|i8l@y8n7kc{~_h?`b<XQmy
zlqeCO-+5~C3z0gLQlXX^W~N<!ap&htuc@l4LiL1f9o??0%Y1P;>!FlszdzN$H6@WF
zcG!EM<kwnYYeJBDv$$KE|GyRhNcO*^KUkcErKLl?N5))9NCnjkWGOg70fKnMG>m&h
zG|uZ-?uUS@uWtPbRHa-bN*Z>i7n!r8Xzd6l+9#+}>Y+Tjar>w{vuwp=Om~<&+seJX
zFK$2f?5zu!S@UvuN1EKR8peyy)v0n4Ud}Dl7CG;vULF)Wx*yxyWH$AsqCus5ZxK3{
zVzQPkX69x_k<x8gdUa2n5F?Ut#JWy48nORYy<YGE&X6-j{(bYZ&bQ=_(Pk_P*U;W`
z^kMr3RWojS1`~R$lgfk6Du6+6J;*^q&I`n#L43I*d1O5uHEikoLnu?MGzmhG2_HIY
zrvAhd#T4ElYY+Ih9%vWZ*VcP>eZ|zfd3nUQrsd@9Rmj+9G7@$6*kgL_fm>zXY<_SN
zWt){rTs9hivGPf)*)Xx+>s>e!MMx+tQdXg-Nhvo1#(ID6sU(y#TwkoBsxgEo1Lqt5
zihU;*KhW(IflpCq$O3mqqL@YC2zeF&C;N`v;KJg?CqqIq_xNmzZF0E#a9>(VqvOu=
zEC8~w$C6q*@~34W%01xYP=*0>a(|KgV#N`N0EC5WGT#Da<wP7+BCY6$85E_E`H&=#
z;o=Q8NaIg*na=M@7_dnFFv*tfLrwLy4`<Nbx`~7o|N3<jCYrm`s|U=I!Oc56JCjtp
z%y!dCJ?T(mtZsw=m3Yo6BuLoZ5w7dD>_Y;{KgU91zp0s-e{q-lY6A1d^?w?ha<70^
zRo1wstK-D*YUK!BXJ<u#0olS{2p1x0C+HKuhHe#S)hg`LaJ2#^&}kP&S1K`{fENMc
zw|X#wQe=%3BFrjapFn&$4YUeDpmrlMVYAQPcQI2Zykd*?m?cLyCg7cc>FpRKz=|qo
z;i`Ko&_X<M@N#24I=i?ScUZ=xhC%gfXxInsUNx!yL1rMSb}50!jnHXmUBGpWq_KQ=
zLqHP^krTHb;0;MH5K-LKM2|ZFQ57NGK0y|`I|yWPalW#36q17D@Y5m4hwMRlZWAZT
ztqgmuMjQMUFD?|%3?8*R5wm<_Grp!CQ5u8nt@Kk3MO0>+GB9Fe_yT~DHkDRs;@R!F
zu(B@4jh2ZDDdj!gE6Ym%%GjXLhDGC0u|A1GIt~FN(Qy)$DbNJqOU|oRCtTI=^S{aT
z<Bwe&i}a~&@t)pFLXP`ek{VEiEBD=tx3(zAr~KK*t(kQ%Z1;U%@vdOjz2<r`{C>ls
z^?t&em}+&D%OvUqRBG=T-jCecTBeio+Gdh=LBQ!eO$S@`hG$}mq4AlOM;mL;ezcKZ
zJKeqcyq?jqLHmaAK4lm$NHaS=G<CC!bRMj3*CZLR*<)-c7vuNu-*>TIWAgiLVAQtD
z;pBvO*lBT}*B}LuIgSd;OaW6beUrEw%bjewMOsS(uNQQ}FDbiEN&?Q^wqY$@trUHT
z)7w8THtSpL|C2wFSYps{a1H~<Qv~2c+i`{YePY!q-7QDEd2zsFy2JZN7DRuA-|qx^
zovgE`lJAVf#H>49c%EW5X|?Om<OZMxus1=KpapSIuJB5T#1w81r>l*p%O0(NzH`(|
z?@-~=8pTY%8*8dNbdot4D(D|sNSt=p8FLY5ccB|z(p?t3!MG~WRv<>*@?J`Lm{(W#
zVxD5Y4@bs5+<@SnaYu~=%JXn%KV!j}c{#c_nr7-r)mlo5%0r{2EdsLRmdP5|zidOR
z0q$TojZYLWJkVq_Uct<O051}6iw6>cLMV75hk8On0%+d99Rk;DpF0n(3qL1z#g16H
zRL&LLws<K_J$EC06q<WndT?3+Nkb4=`q^N~w;^qQiwAe}&Z*s56z=LC-b#W%-}aky
zSRsU&6uA-@n#6jc7i!O*@RpGbS05T+=MV{iQ6@rI0Dui>6^$en(70NHnyf`T9ovQt
z1|K8vSHgy~N88}W6En|r3KQWioz#R@U7Y?q=Bfr3E4ZOAh2b&kM4(@7ci2GXaN&Z1
zy}hV%&LamG#Q9t~EL317InJ-jpx)!Ru<HT1Y|`^VGWYLYC}oCYjG#CMX(`<MYKMTj
zw7K)AQ%WdIC3pv5*I`KPhedI~!e}gQbMA@aFJup?+JvUT))d{r%46-9^4q@j7S?w^
z-+V<pdZjSb8<KL^afyFa<9twXa32(3nCX)T|2}&7Fb@UsO-^x#yRie~K1ed4$gD#E
zAc9tqxd=5Opx)>a`k21gtyV6*&B5UZWiYI2U=&~jhN7h9-&5{Mr{FihxPze^8E<p|
z0D&(Z|CnQqeV0W1;%!9~#h~>iAR}s%t-3KYMHacj`Wt!wh)=2Sc<C-3mcRGVu+{t*
zpWYop-5b}h$Bp8vYhQSb!IR2lrG@>lbNswSGQFka9v-@MKypB;%IgVN$nwU~04L^G
zNNPjBuq+Augu?RoFY03ZrgbC|3MyWj*0|d_m^Tg5<bPb<eB>GFQN*Rl@Yt~wlk_Hv
zH=pa(J?p1Oa;qOXCf1JjNEz)4)3UnXV|ZmC%KzZeT&3@V_ugV8-n{|@58_haez&_1
zdvL%Kh#2Zm%$vj|f~S^@zhHYc1lRmf5M$Uq%{o?#QClNvLM;+w6Rx0UJa9=n%tAtW
z3R(sy2nBI-L<Ao>4mX6=Wp0^XciTPE^>M8;ZZ>nRtssf2M0)yuMAq3sI#2^72U8%%
z9-_mP{sHnKq7u4nP+rm(XL$v02?#U61dT)YK7c6P#K^@HYYaOIW@!QkB=Oy`&$*uW
zNsyAbw_4bkPJ49lC6FItWri&QQ^^aw#ZV~1Y^`w|z6FTj$&bQ;I2nJz#hdoe>f}xD
zq7n`PhM%E3-3RqIegr0L63Gts8VDS|!)u=J)iR|e?DSyM$j%-Om++`LRy?tTj_Efq
z4VS~?5N-gDru)%h5ideQ17nriyEwH+?ZWb}cwc@WTidl1t&PY|^0UYgrye^gI-%_#
z`bI5YDQ0VuuX;nRKkw-@&5>F_7I^Pkw>9QeX|c6)IDDI;H|zJbneV>m5>jy8bv(aM
zWZTi@W>riN_)p0`?N6B<zU)-yr*)G`bX&lC0cKIfbtf2iRcZEAWHLExFb+w0OdBWo
z^cGr%9(75gR|qT|dX=P*<l9&JP64R}q{0~p3d;@^xP_U|0~xx!j4B|UKj>tj!F`zU
zft{?`m-p^wE%WwYVx}MVj=QTmu5SAG`kcw_*FP_BWIuZIeEYx9P9d<eh_WYF23T~E
zx)1&Hnuxvu^<kGXQ(yzQ9^mlq#oPhO1@*|u0$B?c_sbH&3IabOU*fVM|E05YKy5;e
z6@MN?w>WF?$&oRI<k-Wff@4ThR%ZIg&yt+-Y2s}>dv?hDjGPZs?XJRpBe)v@A?IO8
z0`>&ga}Tg|2xQJFRo?h|3b!b5WcV!cmys}c^r0Xak_t0A%z;F806K@9Q$TEYHQeu0
zCK>H8o<f62#srKzh*AZ|XnOGAHOy7;H)5h>foz_k!HFOy*c8wxfY|umGuam6Y!ktc
z$89#ZM`=J#4+9zaMY1`)f4zdO>}eM*70!9me}VM@8b96m(mQy1b#Ti9n?F3Uqru5_
zelU}yKZ7SQ&Go_V4Ty?NrAT8!P_4Tm6gdP?hTxH8vqIJlW9iVu-i|RvfJ*29gOpVV
zfCdoT)7A#Jt~#DOM}*(tKTJ++jp$=rzdi_bF1d^04Z%l2WJEGnCBA@JfGd%BL#$=;
z2taZPfCG^!r0WXn#reqPVekFf`}b4x8a)v1-zCZRI>@Zt9<UO?ZnDqg69&WstmbB-
z$gtziBP0qECAKowr6k0N9Oc+9j<3J|Y!KiJ@}^6lkUp3iyZmI_Phd8C0iuI0qVFP!
zcmN88O%@EP;`#dxebxAt$iPV625`0b#rH3Zo(+rh!DpUzwQ6>^eM4>E7~-si5hzVE
z(-CrUdTbiJ1GGy4uO{>!XaKEJ&e>#U<S8Hk?GSVy8ftb3_#nQbl>Y*hfg%t}RV9JQ
z9mr=2xjBdi?2yL+L6WbdzYkCg0RXUgpr>2lTsi6XPk=uJ8+}{<4$!B#Mjb=O5S9Tc
zTL7vB;W}c95ZdcUJW`O6fwU#4AF!Xj{PnOTp#Tm<dI0Q8gvP>=MKU-eZ#3x)Yc+YZ
zP%*L!=z;b?9Ydu3iGv%2JhH*KkizhWK0;++peCXk6t?hG$ozUjR5Rx0?IYJ1d=xz7
z5(6i*tL)#~s!36Mu*@$^c&Q#=%KgsBPEF&?d|q>|$%HHz;&TB~m)J4`@e}DpCnN`9
zxT_YvjlkHGD8Ma`y#V_yH5E9zCF2!Jj=Hl;sqIx)%I(V*3)MdCf{zUAf(*wdN3Qm!
zFxDR*XjPsc{`q=k;dv}`$P2wd$$Ty1sh8urh+nfDv-RYQ=2x$3;5tT{48Q?w)`U$0
z5J87f^ajYn1&OVg=x0GZLS&8Uh*p}}A|&oc;6d#H>CMfnH24~j5}Arc1ku<S`x-J`
zW3iq4lt*1P{~?#Se{qHYQ9uRwMm&gjl%0hCl+Eghj|iP#7nGxuGAdPr;2n8@2BiP;
z!CE}=1cC;<5Q@7xag_mSdFn{0EmF%4hNl;BC?gdh4tny=?E}OEEQL6@uC0NA{_afD
zfxIy7Um@E>w}BwS_uoP8DojEkeFSYGB2#u5=EAWBEM;|Co6$sc49)AMj8AKSR5$rE
z?X69o0?XcmObS9jy8Qjh9A~V*mI8Z@+zC0zIf+94N1u3~Vc@>7I^jVQQ6P`lRwPlz
zH$TU7cHx2yF?Hb<21z?MnD4SabE}C)2LnClh7CF>8wVSSvA$$wc_CY&A*n|D+|7vw
zWrlU{jCkQX{?&dmUZHmj@9>O2rPc1{vI7?1`>j!X;x2BB-+{v|t!?0=NQCbn%>|zg
z=jeDnrX@K;sg-_1(||W!u<x-eAFO+%-~m(aU6o+DC41|g%;Up{8NY8`Ij6b60T=mx
zdF?J%&wl<v*1)~;>Ob}hEfmR8CPNxZi;s2<yEBW;hi8y>#WO|kG?;Y`jUDPpUVn}x
z?1N6g@m~e|Zi*Q}ZF8*4vig(XD>It6#un-Sx;K2UGLc#4_Uqh@DTkjts8Jev(rBO;
zH5WZTF1m0noiAG9J8ta{=~(HW@n@LrjAQj{_8j-h%Ki&`^59y(oC@vxf<Wx@9omFR
za<}5hJTl2gYdK8NTrvMfs$~MMx)e1kDF@ygKC!(iG=5(_YEAWqsRiK_!9Ge#r;yA0
zp!^_aU~!Yg8@NnTSsy<u$3=)T#BsznWfh>Jj704dY5+A6WU{(>lMn$(4-Py;i5G~7
zFzC+g0MG#&BdO5@S0^};m@$R+kWGugB}W9KUgbS?iQ%*&5BSH08~T7IkXl0pp$@b$
zI3&;lgPJjf`Vox+u?`~LgN0v(4)G<xs!G27y|B3MdZ^IMVV{Jhz=PXq-VaT=^gU_h
z$Mk>g!=pq0fO3S1m_kF0t(h1;i@#o4jRzJg97s%0--E!#BHxa_5+(76B^Nu6IMx9m
zBs&{geMv7Ho_KPJo2nN-+lsV%KoX>D77TDcsx_#{6N*iw3+`5;E`^)~TR%w>26jej
zR7h_wuq{<pUL4jpg8|$T+Cgt(42l`zUL`a!Zei+WD4jtzgudG}e#;u{`N+jX`Nt=i
zGDznDn9{bZxTX>JF0njffQgQc4UjT_K$0Zz6yk;HS|)Jp?azL-K?W!9zf2a?0}#0#
z6usn~gBBElVq>#?kz^TvDsd9Q76fVRDirb|1A(-Kmt7sH4;J(`Ix7}6xAp_os~ocQ
z2--)uA_9TnhvE)u?l_MDmsFhnv-*K%2L{<D+-bjZ4ObTMFSz_-->)ZJQm}wK6W(Ku
zqZ7mG`oda`J4R%6c5sSGFFb~@1lp)?7iQ)nl>Olj`4?f+RpBv)G6!0!rOkM>z=<I7
z+C2^2GCGln5u8vY5KWMRAiR+`J3J1<!c9UN$R7%HwgCc+tY{i4C^&-ge*s_}2iD>o
z{uX`lkQmgWR8tA3Ie~D{vB20eK}rh9_b(2b_~<}F4gue3dH0$}71&WG`7vI*4rbiO
z`*u3T9lnX5`xF=<6T#KtRRhs466g`^5S~vh)U}|?jMPYVc~0y>mmc{UXNvB=l7U(R
zcnzcpHr{h$*w6ON2vFU<bOtpikdxN|TfGR+Y}@2{c#|XvOHC*tg8GZ2o(4xRhB>wQ
zfi$8FAzRU1dcV=z@G}9FI_la(?}<c>*`*O5R^-Gx_0^Nev(&wZ0gI+yCXX<TnWh3H
z^MjlMm_EjOf(7jW24N#4xlLFCP)YQ^><;-TY&kJpngdw`@=idXd`YX!i@pq`PDM1w
zvXFtGW|d3RN5K0LyHqVG!W%&MTMPa8m9UG(@{|{|b@SV^&OCDw$nNeu6t$JYsY3I?
zZS{x26KvIE0rI}MPT(n9nORvD1PxIZRKdN5ft=AmvtFW8Ne%=8$cmt^8atv${t)S0
z$Du8YIYa);`z-IjO_ZGV=eoT&_$)Utzqx<T|D}-aHy>$vJ3X4C?qeMz_nwZdS8IVP
z!4_;ONvt|BiMbDXkKf6fE^+FULlbvWH@naYxU+$C;UZ5y`UCGYr#~HsSc^1ffZ<1g
z9iY+3a@x98-ZjcwY%1@sD*f>ed9dVjDzy%QbOxFpG$*rxhxt!mi*Oita*Qr7t&2P(
z@3q<7q*Hu1ZT4BMn+I?WA9i^jE!TeJ!t)H?<9pQ39C~t?9?K0^?uC&mK|D!#J-}rX
zW3I={jo6P_o>AMWX(3*R+H=72!PT}@4NkLoFjoPB$K};bcBeuYl(YY3`~Yk*K)y&7
z_XdBzKer(STrFGfi@h70#W8fE0O`=fjfDXC!7sHhci_H>#B#~FR|0O<^R&lY5PCP(
z;<Mb|;MzCk4g~r-bmINQ?bb39l356Hng8ln?bE_bxC$gE28l{&jL8UB-5x)8YC*T&
zXwgcJmez80dI_vZkJ&||6q@bwTL;(+D$~0fq&=G!*$nbBYn*H_1R<HhpJ%+${n*S6
z<#lh8s6!<c8P|zk3LQ4@wUpQ!tED#F^r?`6?bd~{E?f1^S_ZcKf>KR<Q&x@n?#!y0
zkhF~<!gpQoKDUx_7bxtW0aTN)_%_1Qgfrmrz6}mO*P$ldrW5NQ8^};F)5K4T;0arj
zpjv!S$T$6(lS+bO@3p78_141){keh=Qg-#2Eog8${vS=>0giRs{!c|EQnJYiWhJvB
zQDkI9Hrbnq%xqCAWaZ8#<F>buY_eC#&dN$;-DcMRyq@>}JC5ggkK^fiEBE)lzSnu4
zpLGiHhtBH$&MAEOJ3>TL>&DaKB*<rncna`LWwkgm`SVx!ob_QU&6n63WZ931mjSZ(
z9!wu_at~>b-s0!nVq!QH?dgx71sE2Xs!tp*y}|V-nbE(uW)3|ZOllZS{g+_8g9Y-C
zW&Zx?tm$WF=`H1ZD|~n;?hK3;i+hsW?f#@<XrI*ggJlT%_-_#U4eUD7ThYD`$j3py
z?mu%ocH<9lc;6r^5cFe^zyS9)U~t~vV%UA4T(JksXg&Fdwg>$T1YavpjJG8xCVY!_
zgI<yTw!QC$u4oSnkSlN)zkvwND%|x1Tq<x*bfz>foI+aj{8EsKsPqg%K^?u8brkmF
z(Y@mgd{z$(`y8-IC@mrBEn7zaJ9NE(CM-ZY6;w~>5Sy`ghl~K=5fttXL{~c_jE1Pg
zhJy3?tvd{u>Yv}kB?zVON8vq{ZeS##b44s6*l%G!!D|o(K=eYX<g+i+gnwSZA~cxd
zq*&6<5fP!qAVA({56|2_59JNs)Mw?@DZQ4kumD6Aq%dega_ltp6X}7x5CDnn=dfi1
zBuM}*f+Qq`s2`j+pd5oX4kE%C0EvYg7$p5pd7zg3FP&{^3!xz(!GlZDE1u+SeZY^0
z;jo|pv`VmiqRU(oMe4yeEZF}9{Z+y^^c_O!P=X;UXA0CnYtL!_a6k`%esplTC?6^T
zXo%^84IxFIdk)aqK>+|HKNXlX`j*FgKS1OBwGdT<+qwRDTy*c4U0h6<A3dQvGs^=3
ztKUF}=IL!wGJ-NF5jO`F6G;vD;9of<p{VvdBl64o6AMlV<Yb7x2Xq)@^pM>pVxS1W
z0SHLI0^m`hIDBeiCo#C2K+Z`5UmH9Lm>J+lhyO*<aj1a@DK9+vrw~GIG~Mu;ot+&%
zbc!^|r;dwmkhmdcA~7FAtqJyixHmno2(;2X7*qirH4uvcX@QLKcLnIiKy?uet{V87
z&<BMX4-{_j4^Yv5kTMBP5*!{t7(AU|Z3i(53<RDZ8ZYobTw!5Z^b!L}AZWk(R^Rer
zsAoaC#(YL^>$dDGLnez4_kfs0LK<LT@lewKA28p+9v$eGQXDA(*Ilp$;u9cH$mKvO
zPMB%1fL&x@cyyUptFS6`xB~w-|2_D??lPORxfWwC$3Dx|R$;nB+Jpx`$M*UAOXOI(
zEKAH}K32(2*p3wE4j0W_!T;d<Jfgw(tkE$UQT8XI-IG%H)w7w}sNQm2kd<I#etDbu
z_B*n+Ae)YlW)I6g_VUN}F7J=-PfUjHC8q97RFqd*q+6t$?WkglD;KsaEZlm8SC7p3
zhd*0g65Ai?0Uq5L09LfV3HJCP{U@gkU5Ub@L?>Y;af_8p30Jxfua`ZWwc^B(>9p68
zY4D3!b!YpO>ZD^Dg-r06(HbSVtZ}>j;)neNK1(n#Kp2Ui!9T1)!{pK5NghtkL%1u}
zU*GV=2Ucch9i%%m0DSK5*B*O#{(WkY^Fr(^?I1%1J?~us|6#TBZJ$rOPWA`f@Nzz-
zsMw+TM}O%PzcCn)(V$G?(=Vtfb&{7f=M>4=I(1UfG8&@SKVCi}#mh3~#(3zYi}UVe
z6%{?l|L_O57pSv3a+>pO$w=--d^pi<0Y{tj7N<YWH*~5&zFvpxf`jmwpU#MlMi(=|
zxTm++I0cJ(rtC)|i#Fxh`ve>qJ=0xNMxd!r#m{ulIdGOg_wf<``t|GDG^x+9GPzxm
zA76h6CY9;KO#7Sz49<AGJ-6-0u_;kL9|Bz#u_o9wgWwBLzt*8hy4Oz%DN0@mS3WGE
zJ-q|EgU3s-(wIEeR!T<Yp`6{)#DxCl=H?VMKpIvI%c9T!lGmaQJCIBP*B`m9?8!4b
zw0C*Cf}{SoQ1rrkX-oqwrq!wG`KizMdPL4|L4l8e+$u3po3_7mUoJ$Iv(j|-?PW?9
z(Xp+IU$PCoVdVxNFR!l_xA}_@303$(X2O~U`aTQywRV>%at3f|b4dq$@9>7;%9MjB
z(T-t4$+e91bmDN)4nE&#k`v}e)=o}p9A5&fTNa%-*=p59G3OHX`lj%yNtl*HM`Src
zph*0;kW}|c)-KQ0w*x~%REqD}$BIsn=xPXlq36Yhs(XlXJrpR?B1ZTg(**=Pneu-6
zUdXmeOoMN1QJDO=BJ#N=tqNYI+9Q{x<~<#S&G0?ZlnB3er*z@zlG&3lZeIABkM;_~
zc8u?zbA-RoiH>Yl=n`bckHJ_#4Q<bNKMM$hy?%W5-gpu0W2p<;D5n>w=f)SjiKn?I
z+}7A$F)gCCu0!uHI2-#opUi(}9@+?_#h9g?eRcA;>f*z$ZBJzN<E!4xHnDX!ugFb~
zI8@`a%Ww$1FyN1n$&V3TXl3bmZd#&!_wobk|I-2h>B*)4Qz2;9WKOSfzXwqGni~DK
zi)#7L$LH+Kp1)A+S7UbC7&&;Kpn!)54t3d#e!#WPyp{-fE_m~1c2NW=hLA5rzF)BM
zz<zv$lvZE|xGPXqb^O+-owU1s>B|iW*^0>7y(OF!Z!yR+O;b!>V76G=<NA=miYH2v
zms+>51)tiKZA7!L+H-Ivan{RvL9|IA{b!LWKd$t0u60NIs4Xzq(9x@|hT%F6Z6-33
zm0OvfwDPs?v3NP&b=JGp$~=EY`Jqg)UQ~zK@6@rs(mYXU6OM^VYO-CtOvSB-<@Atc
z@jy>6!RY#zvUrQ`(y_l9tLZZ|^+gX-A1_bV8vMOhalY8raN6y{nNt^t38}uH<*hyK
ztfy(UE;}YKOeJiv5zkA9w>+rcZ6GP>?=DV~Y@@UtWFb2FG$eo1y&Cw9-%Cr9ki`e4
z#`a(9T~!2TO`w&S`z)*f_8(!6+mmO1#V4{*Y)Z0n-5c=(qf;kdri4@Z-nElEDpHa5
z;W!y*l;31=b68fbSZUQ&ts4Io-Kz(n|64olvvztn>3EN^3huW6lFgT-o_I;S->$7u
z`K(hxxAP|NN^>sj@0FG246!W^XiD&486_*;l&klGD+-sHNq)mntmZOc|Cc4MYr5pP
z=b7g$Rat84%rUH+R7$QMPZaj5_0^wtA&|@C`GfU)bJ*EQ4Czo)JNws3PWC-@sYpBs
z`kSQf)_vjE!15cKDh2EX{mv)v5|ikaDvT-*Ae&JtSy?%d3OC$lgYn>NG3Ik}b>$+&
zQp<)0#n_ef`gdw@VkATvWH>P@?Kut<6@6oG8j@)+qg1c7IiACY(^ri~@K11XaOTc+
zwiB;Di;9WmpE3z3`p}$Zh#>Y~>$k<WvXrj#dSX>L()z8oyt$+`IB%r&rA82894tc{
z)c8!Imy^`K$G&FH`u1xgwNO)Gj~GutK%jL2*N$^6G8|L;7?z$jJRALcv560N5J|`o
zWBne-tg07Nn{iM3p%QCN%g-K_&H6hpO$a^_Uxb&uq@*M;Ad0rASwNLGH_ccysRsoh
zpTGlTm#gN3E1l1blgPT=@wOeJG`T5NPYfDw`|7$b^4=RwQyv3w!cx#$n9}Ndp|uRr
z(dK4p>>YTIylBcy2*>1E$pFqJO!On1xySTrn@Tk_(%-lT+ix`{c!Hjh0w2U4Qxxa!
z^$Pd!r#D{d+_Z9;m~zwV#JjQ>k8oUU=2V;<R_)Z7G1G#d7Z*XulXNx{xrcf~>LJG}
zw%T1umtQE6j}>5M#iLGcP-`TXaTD4K-NFWcB@>;@6#Z<S6H)ag?Zf)|b5C5{<O5jj
z_4Q<%<i|1nA`7>PI5+n@6V^g3QjQh$0B}(~kf_##w*eJAx?noY130^lq~gTzmc@N6
z*SYs~L7T^oC=sJkMZfbvc!$D_M{me>(SzeLe@8uF4eEl6I_<?=FQJ7p0y!%ee}-uP
z_UfeF)Q23B+3O>>DD$JL600f&ZN(aPXWKp?=vGfpPdhxlssO#yFR)A{#H~j>1bdzg
zK!3w@__oP7d!D$($_$TVOKjzwNd0xHS&qC>{w1cs@e-jKnK|Vx!Z1h@Ur6*gA9vGM
zWzwx$-*ft$K^>5L#6n`QzX(OZ6?kvu?0$bEW;!x~bW0G7G-!Vo9>3>`V^U+mL3=^w
zD#Fsx<AJc)Ca{G*E_hFyfA_b@_n$v~l|&EE*Bnhz;CYK4*Lr<fNTmC;v^se*9VmuH
zC)!?1wb$JRtWJ6|g0v{`7|ksvuvZ$OeS@{1(1@x9a(&uJv}_7fxXLFY5(v;W3;lVP
z4{c18LD%K9RUaz9g!u8-2-kr_h(xdus*~A(%lk)h{tepx4sQwd6}G_Y)6o{4Fl2^5
z%1|H2$E~78U5Ivt^}LSSR=_LXfNi(gKE|$DllF}yFF=G1@yCP3sf1~+2OucTIHd)M
z1`>Tk8`ElD63A?t{T4h*-DQm4wT#pDFQmZ7HN(WxQZ)}R0^Dq>%#)``m^2g;$NrXG
zj)Ke6Xti_3Zrd}Jig_W(%kefnWG(~hA$#@1pF=2erlt?_@_H6jrJP3?2s$K^4WC-h
zd9**?hNt{(=K0ufI(N3`tNd)=c6V?0EegVw`Dsby-~Rr^&=z&Eb(@<YmNVhCZ><pO
z5kJ;%H*^K=6|QN|HGf96s#as_Cw+oDe0S9#?HWiVa|;VtDTMhY?Y{l+1}kpi!Ik7b
zAK4qgOPmG5MbfBWF40Tv*F%CTh5F{V1P{;g$CCJ_u<X%W*<>}}%py<5e{~q{kqiy?
zw7ph>Sz0-`)!wwTLmqy*gPOd*PQ_vS0xm#DrEj;Z%+kIn6`P#QJa_7w+sD7HXTn~H
zhQLN>*yA)c<o$PuyN9wnC!s~>W}Fl&Sv}wdcrYITN~erfosIdmvrm=A!E`rP{E`BN
z)vZTdGyYW;X`1{b!VY(5Yh<x1yWl+ug>=?w*v@IN=Ak#lcl-94f}xx~qV+>8Id9(?
zjllCSp79HrReZv?2DlcQi0If@38o9|S(|-=B>nyU!_^+;8&i=NO+l&zj1X+w<m6A$
zx&z)^i^mrJ_n7cNwggmP@xSuH?}8+d?+h4K(*3Oau}0aXTqiw*i6}(aI=i|!PDf&1
z<u6e)GPXc>fp4gv2tmg;(~eCziyQ#gv0-V%Gd%pa1z$L)NYQBITln)cM(+r5p9jIL
zlIZ#=A_5arh0^=&x1E-abXp*LX$CkOLYWJDcS3C8_kFaz1e5~&a5*bgnnVgcx+KlB
zPf5IM4X}m!iZO+{yzL#UJ2_`AwX@DX>3qaBs}0B~6<d(KPdViFd3w@=kd2Kb(aGED
z=~GqyCs)F?HjZE#jE;+wR8k5JoM3K!({`skzA^y!L6-HS?y6G5qh}EoLBN)RniY;1
z2=bc=aLG#|SHj;irxXQXQ~!`K!PDAJnA}Ssn4E-{1DW+GJ$<0zoPm3w%(myxDq3$1
z5xMCEJ>Gv*lXN6PpMIhIWL3w)`19SZNt+m|U(Zk9d}iZmKqv0IceoAapJ|w4>i}-l
zR9kB|FNuzhzQ<1907m|o5KuG930UdRcgVDWk~@I~%}&FDB8vL*<tCI62<~hWB-{dp
z3+mLV#gEoQHS-wFp>HE5Au)p61g*ziOJzKios&X+aS;H-1aAAMPoG9voWEi$66<{q
zhkT~NLQZIBR=Gn=N;-pZI+WZF7tYDT?MI)kc~mgh^9$Atm!SAxAdlYV=jTTpvcWV-
zNJwZ8U;PxIA59%MYICX2rr!mHS$n0_kh6l4QVuzzMrSub^`U^zUW5x92o^1jX#Ap+
z4z|L$`ZAnZ@{NeLv|B~$-o3^+c{WChr~I+9oXuTUsp%ORd_qFlKR1=LI>_|^;#T!j
zhgSOAw^#SiCdgqU-zXN{B@b;0SGX$gzDf!|HhhGSLZrTKO?HB`O`Dsajfa<(mU_p=
zavJ7fy9R6%<>lr5MD!@aXwH*d!1sHr^lSSfH;WoQ<D!dvxz3F{_J1ofkmLR3OME!m
z^>Yq1<Z~)+T;dib+D^{SMwXVcG$H^EL1lnV>;&-A@$nTg>FKcDWBcG%^N<mQ4u~uV
zL+e1z+?NU-wZ6xEMj=i`e+q>4UX64dgJ2-wt+4R3J%g1?(q6>G#CXve2DFa=Q=ss&
z_Y8WereR<3@Fdmb^%gt8n5vM5Q;RzQLW7@35-q+HUIj_IvVlQ?H8UPgu*cFmr{v9~
zfQ%Pp{jJZmsJAmvmSAx8ze@mIf|Yv^_#-9tsTH>Xjx$T(=iwW$v$}Vk!s~wQg1a2s
zg`EPycA!bF%Wl<80(*sQ(@0k#FCWzY?ooYZG=F`rZcvl4oDuW=)RT8H+Nz&)SwB34
zInwjjrKCE@-*Q`?sD6Si=3hNWOKD+l4)6Nfls{>9LuI(^gRME__Jv)(eg!^<Fd+Zd
z3X2H*3#UP{UvMDgGOTj<-f{JzGC10ZDuR%Mv7<bp+xgIAz=U87%=S5zUl09DXy(#q
z&b^k{{X2PN>$z}evL4(Dfg(iQRA3udRaJ#Lglrf1y6>l^#Gsf2seF{aauFFrAV|$$
zu94-x(+z0*Zf~gfD|kusugIm#rP400>eQF=x<+DAkS6vdyl>DdAc`hTmen2tYk=Q1
z!nfnSefupqdT4;5lJ9i|ZNO=;c{Bp&4_AFzu39|iTWO)#K(KXUts3WRkSQHB+Z_<z
zw3{HtYrn79{-TeAl_8YN+k#K9-JON;@VCr5XsqD;oB>s_)a9XwHr9Xt{;jnsd6Pjn
zuG(ZwzXRr+Vt=H``;ooQ_BF%#IX3oN(7FO@20O3`tj0h3Igl{HtK0~07_r{aMWWZl
zi`Qn>ZzBU>?LkMaps8`DkyU2Mhc5xNgLrv)_@B$3Jz09DMj@!!c;k5EM&s(v4EW_5
zTU+1WUyEoLILq*B?p3v_(d_z*wb>6!zetbQNcjW={8v_<ff!Qz=+Tg~=;3BRG@dWO
zXUBW@uC4MH0CAkHO1wDLX3d`Qyfr@^;N-pWA?aI&_@NbwY-nt3M4>(Ck3%JH^7wEP
z{zQ`tWL-12cI4gFJ65r8+=})(7I3h#VRy^m_`kn_nwD0=&h7@@6xuuQTDz}@0FuU^
zKjX=Mj6b`T=(Vjait7|WizMt02koZmCU*&Ij<yQ8>|T_l#B;DCGZ>0ML1H3l85!UT
zVgCRoOaYD>bQP!8U(lng5#+nb95D7A7Uz5_FW1a_nLza(Ueu(6wxlVK$$bX&N3zhq
z*U6_-jhV%{ojQS!{?^@6gMA@@DGXn{&~nq0PJIkd0Nw#Cs|_qg{qcj`+}wN(_^~I_
z7eTH)C6Kg7T(j3@_X}Y_Hba<0-ZzI)I+_wIp)lQg&I=^fw}@@?3C%-CKz#l@r=4L9
z3q$A(q~e@wSUUIm1O%Aa?kvMB1-I0NpeAUe!(~}75WR+HpGR&8duG~9Rj`*%_}16o
zM+PKV)(9p2ZOaT43uW9d``hrj@jE>T6%`Z=9URE&s-|B`5tWQ^!ww$fB|aw*Md*HY
zI*Y>Z^J|aX$K2v#V(Ni6M6LRhqeE!xKqMI<$0*H0O|dEthBWjf-#wUGf1&2iCni5S
zU)7tM^pxV=gYR&e{$VMS%^Ckfc>46fF8&lqj$~QMcBf8@YZbd_L(Bi|+bQU>_{7A{
z<F#`^moD22tFxNEf4=~w8Lo2z#eQBv!L+`Uc?Pu%fE7k3B&csvhPHqy0T7!{-D$8k
zEI2e2cAJ|xIH+KyiaRVS$>A5E>otUe4&A-Kzdxt5CJfK|?u%@&&jsXF5;QZgR23PV
zva&cRVw#Vj@ns@=XXxbg<5S%SnCo^2%WJURFHxY(WbI@a-9J#tw6kATH3Twa4y(^j
z{(WEo+uiFVlBd$#)!CW316U*hR4WidHoylmE~19!5tJ_-WN9(^`9P(>F`6T>gRX**
z%6nyHC7@+S`#TQ_4eFBsXM|^inYIB<I5jo3GT}>rX5l*r_8hs<5NXWdwfR^mRE?oX
z@6O3HW5@u_O$jK`y({6qldGB*Bdx7x$ljd-4g>-(d;9xS$)`H$YG5OY=te6GfCsy9
zbNGR?D7m`Y>!tA*c-`RRoV)U{!j6uX7F}KNIj$^1T^XoQY6-5X5mTpjadIMuMnY{t
zVCUjF?0I71Pk`I>O1iq<f|CbZE^R1G;n5x*9%i>X!6X8#Hbj0H4{O587}QZfug%BL
z@8>BN3bhf^GfTO@wk_a(|NYwp5#Z~?CZW?ej(*)(Q##&MLMMsS^O^L`xF%%!=<!-{
z{PVMG&H-d`p976YVdDUFI;Ww#tlh7yRow}-x$&WG3Q&gz3-SFCg4bX3>LkWdWl+a=
z;xx6?`tCeFHC4~f+twHvNUOV2fstiJZ{19P($VL@Zr0+FJ>!<I`Kbj3J^Yth&yhvx
z)P{P_vcSu++pbyT2SDS=ytwz78v1`UL{F<GT!^nk|9KZDe(Z?VO5P}kR=(Gg9KuQA
zn_?fE5e}D~g5eVFL@9*%sl(fI*@O|mp>I(&n`{b-irRfCF>j)p6(<PP%tzJ-HFgTe
z9nJq0e{fqiJ>S56DLkaZEo6tQzo2zJYK*b+c@$TU`nh3)jD9P+u1=4k_TSh0#bV9k
z4SC<9<WfkO33e!k*bx|+{6T_dA%>&zoZ~X6FR$6UPNA37vw)%Bg0H$bZJOu8nW@m@
z-O%-c0&j}R!xdNcI}l5#d>rU0&|=bJ5R+8$#3C_ae!FvW3~OJ!6q=hOn<$&eq0y}!
zF%f-JRb*Ie3$kyAw<>=++UM1mI#=kHh2RJOE2X{GVwN5bUn?BZgI>nNi^%P}b@=-?
zJ2>}XWewDi-@h{*6!1U2_zCk%Sy@?e{~+JLQcz3zY-#$u{tpO8a8uJz^gg`By%5}4
zd$d&B3}jm7U_jM(mA<(-oLg{62&eGG0*=ECy~AE!UI7A;o0cEn!wrRk5W>SRqS#6h
z-$GX_1sLDW9^N~5g5a)`pNUB8`8!rVQ*ivRU>%hy5VpNd?uol)*PL`Xl0@OWQIPCu
z3HQkB-rmQ$(49g>;mrP=ogA>k<?i7DR?^2p6LcK}7zOy#*{XA}Ek11LCN*e_3*$z9
z<LfFq7ir1fI4i;Lh84ZKJRdq4cQ;g%NS?wox7olpOg~CqZ=tEd`jDMn4JW2856jE6
z3k#C}G2flUXE!Pvy*KlYJhv+_uk=;COd?~BaHs~ZB>oVz-PbYQYNol;-*icW-snZj
zCG#eUJTF?hs#;leIkctZcPyJZZ$oSRXvj{h!&t3`GKc<w&h+BDrtP6el!YGModO^C
zYaAveP5EPr|Jivz<2%$=j4>{)&os)ZbYNpi(;#wL(zQ8E%WYJP(+K0w_IyM;71(=y
z(qw>9<cwTlmx_~2lVDQGQmFPv&cURomtCxWMQ4{sd?J=^T$kHO<R>sr<~6XxReydK
zgrDmAcYd_0d*j)aiKkArgPC2XBtkz5q1uqWdvF3UvEAldHTboYq#z$esubvlI{bbP
zg2ez{8JI(%cPi5t5lgbWpZ%%{^hieL=FN4Ud~@BfEh;7VALDM_X%yUzl*E8$2IMcH
z7xTA__<!lHaHGSph<`~Y9n{=8YLuG}QHNqK%hB!z;G2O<;tebxrf!Brc6-o69xPjk
zs|wvfzYGXba%CkQ657Klg<JVK0xrwOo~a7&%1ia`9(qGnNK@f}?N6ut?<48Khlg9*
zv}MqC85c#{Rx*UP6yG-BrhF2%qg~@u0aX+#7~5jXa1~$x2sgd`{MRW#MM)UKRF&?U
zow<mafuRxF7J0b}pd!M%%#j$9UfAIjcq3t_5n46^x9D@oF|UI4Oh{cz!qD1@Z*zL^
zn;{WK1^xoSqM#Ihxk^qA7b-klw3!Aem7t|S5^1y{yv+7VHG7q%V4xZ&_BZ>5Gg^K1
zkV$(&DNVwtMnBlCh9;JK=bc_9=3cTg?}JdwQP1bbT=%Wuc@K7I^$WrWmH!uB3+)wx
z7-1#|_1<Jbb_Bq}0fyh*Z4nQFp3^M!-D7qQD2N|9)bM6RC?E&z=FKY@xoH&Ij4I8m
zBVC?1=rZ^RZ(PKFbkD*?_{5JgZ1+ueDrH&yoJ8J(2=|*Y-vRuI&Cn1-r+{osp1&m-
zIfX;#UCu~5Tnu(7yt8CsVX+QV=NU;!$v;OO<|QcVWeq9?SWpdJTr?7}b7xw7_RD>q
zjvOmo^#_kK9vI_m*%A~VJ%ZKT;5*O%B^@DQBzD6Ic5^3$wzWO<zxdXvTTKb?hiEJ0
z`{G<T1p3ABaiXvFY*WzL%W}q_-(whj$4;1@>>!c=MgvX#QgEg=xYpn<Q{)*QbwN(c
z8W^5D@4u21OKMqO&Xo(7Miaw-^%R`G)R7Fx@glZe6q=<*6@<?s4`FdqAc3S|JeWG&
zJ2@GOgc)#Hkj(|gJY+x5UVmQYA{{Q5*J90#k<0q325aZzMM@B@0ofIj>lF0j@c1z0
zH<GPz6+^Wfm;$sA9dayc_E#FAK$icRxl%mB4K34vJ0FqXPETDOUJWE@H7?*F`E3f>
z&p?l;=9LVEcXN9i7GuF%v0Mkn5R~rhYzJ!+&%rP7{`7~;Dykd$-$1nn_!#sKh@CeC
zB$b+ut`YLkY~j6t&h{r@7KjIU$!J(z2yqDdQZFGR<s8`Ya=u1!Tw)_T3vUh}2h-5l
zArm@E2f_F?!wb3q?+^48>mQO1PN4*4fFgc)z~(mSyjH4r^dYLo>%I>fpBayLTmHay
zwD~x&6y_65?9P_Tx1Aa|0sG|Oa3S9@lDAT};tLjgZ>BOf)p+ewl0Un;TB|$aSn(;h
zvBm%-k0=sEY%`e@<*tALw;oMCa0!7c8|iHU*CDKt?H7zkq3JbySHW!yNjGRgt04?(
z0nRQJx=5^pvP)s+1Z6prBB9O;g=vA&LkH`^x@r^>i<}-%fwEIB?W|6QZY>SXK=p@m
z+&B{|9s>WGBbX$<TTgl+v;{3w$Qlirjqws=gu)0~6BOnLl?%CNhM}uL2_*<1#q$O1
zbQ*$i5bXy_A#{0(g0M-hOa!D*fE_?Q<{E6s&>2jG9eIQ%(HP>&L3kt4l)kwsh;rFz
z`vfn|a(P36r2xEa;lvR;**hkTTyjxS5%<Kq$BXFp01Nb4cplI#3)RXKxa~j=eA5iP
z>C?wYtH)>?VYXY(0uFs-?`bM;d{FT32Xbr(cPq22sW<z<(uzuIdiu|^@^FmbOzys)
z<n`Qv;EmoXn|UXZUholTB)Vr``5HxQO&CUNomqc^coC7pEve~NoV6)cxr=igJ{IOT
z*?(|Hoi*Ag>wMalqMoJJuDljB*>Mn6*C2R?+kA9r=nZtKM!?1W5iTJM-}|yBrmYsA
zSW2vf>EVlyI_wiv^fItH>U1<*2@?K1_J^j@C2#)6!L5?I#~g)+Dz=nyR5+1-`OX7k
zAG`D6`f@&dVull6Sc7g|gQ>XP@u)fb7_=A2ZH>+)C~6~{en#H08#obj<x0Y%96?k0
zv#$9M#MAt`SA=oy6md(&`%l@J_BEFink>_PPPQE{{IiPH&>)I(FcO-4b@_d3HM=VQ
zcU4~ay&mrf+_-vsu)!mq%?=FCdImp4M-v9j*&cG(d$ZpLKnPnn<v}U%$nA7&ZN@?z
zg~mT(E=*ZnXG`mG`G!O3zl4E1ttZt)+*Kxwg5J6F_TWG%xdsUmG*CAI5Gf4CTLcb{
zs%DMgV4d-p4-&{tcjKyjpx;6na-{`cB;dXv^nVEV6a*Wt!B=z9XH&-KGt+f#;lp`7
z`G2PETz$N&9s;DM(xvGV9H*-cRI(!FY+JmVisBaQ11)|D$H-wpw?CkpFO*iqNA~Vh
zZ7n18Kw7ol)xVOld~{#F8f0W-tOIB%(Ju&lAS4s=6c`)T?wbGO#RboH?TJNoU~9^M
z(ao*KXhhUT@n76;KMK>I^I}Vy)@$-E5r_VhmqFltN2PXR<-9=mL5=m~C14>?^2hVd
z-;Y5et~FoNV_vemdVFLDllRmN<MEgK_}$NWZoTeuU^bJS;#4yJc=lmvZr-(h#Tg+p
zhdb_JRb`QU9I4S}Y`7!CK+d+MB5}khtgdP<5c(u_nXCJ-_XGL&6V~6fhcycGb|@4l
z+1!Au|0}r$f7HN%ePUNq&in3julGc3(F}oR7I=%FOe_G~0`2qh+sXav$wo=iyPrRi
zRF1m1teTEKz<J&{txLCUVs1`o@UJflChqr80R3|*f|}5}hH1F(Sb+&D%-x|}QW9!!
zPBY*#!C-78yk%&^pi3HfzoaAGg{LA(A~Y<~KksN?QWrTcfE|s75&J)2_1(3^I#_n0
zu^vf4OktJ@@w&4+SkXBlMFb)MsHLHz41!|LQh!gzvG8Ls*Tc6)aw0r`>Ce9Lp$<o*
z|K8<+Z}dC_owwSzw5(dUJ~WG`hZn^vanUCKg2-|JI!t?ChZnH%dPX^x4O+>!3zu__
zo3j<^tVXO(|GUn1x7&E4Boa>hS6v=Jl>GZ_@)i~SaO^mPf~I|8@eb!lQ`(l&rITF6
z-o7*du7Mvz3R)jvyKJ<sRZh0nb{uQa%G+uVId^t<hkXk@^?zD`U*g9&aTpU-1j(+D
z6B!vA;z5ts452`_G`;Cov_QnxY?l@!r=~XY6J>%p8v1?|>5}OEZ&4~^lZo`&!jHc$
zEKwa8=b;TpZpudBcc5(LXPvnKClcDWQoA{7suG`(5K+b)0^JN08Gu>@6=FI4nEepX
zoc_gEz#5q_+wt@-YpwU%48;@5ZY)E?gU-yKKM2EzyO#1G5odYr<OPjlzdhpQ)L|7<
zf{?M9b?R-Y3jS}_t+YX}^Ow*9(DRI9!C(%gyzB$>FX-A|J+Z5%fU>y6xm_$OD;%_>
z5IbkFp)p@cs3lvjaNVl3`zIAcsKrYhvl&~A$oD^DEO*;*0acg*8lL^sB@u`>;=6e>
z0CelzmP7INFAoKWwjyzj3%Wc*DcyP2v-bf|sM#N1H8wR(v$T~oJYqQNcCGP;-u8NC
zYUyxMo10+&h{L^6=sXc22gu25h_m7O4T6G`0H7Yt85|Mp@2nPZCVFC-cM@ncaOl_&
zh8ZueSZmQvi2u-p1IbNGelsefve$F3SKg3~nvCDvyos~6+$7eNPh^TypzOzvcBx&{
zkB#c4Xkox*;N$}WIQH3i)gt=+87^#e>4~MY(G=0-)^cZK*I&SbgiI~wsvWW~j|L3!
zvIL>%!iWW^jdHl9zqLPkpJwXz>DRm1S4I6(T|NV!%h_KQ+vRxWJL+=@i>IB<@fipn
z8PR@H@D=7wv|bO2?+=f5UV!q^_6NY0n;Nwa9V>K^9S^ieaG%+Fz;N#9=m@`3WYYp-
z6EG&hDKew_el=6g<f2oZY^{rHHPBDAv<*H7Ej|d10(=Cht_H~71hv43)K+pVliTx+
zK4`ixUAhGGidkv>XZhj;>l1EI6F<zBc2?;D1t&<W9>BZjA>j4<YX~%gD9^JIvOvhi
zpRv)&vK4@h6i^^^C=wGBrP?UT;!Uj57XSQdxK*=G0@nHFHTr$Kfh1#oVhWpL@w8C;
z+qr)fwbu1EDy-$H1mUqlb%x$+tR%4g2qaa-JM}vL+Gl4ia<+n6W;pXm@ft<sXJoE~
z-vezxFGEBP_GP2Owczv;{^s=^XzPl!6#m}g5{Yb%mZ-0p@OS$eLB*d@|KV@jk?vA4
zLMK3P2Jyd{v&@w~|1q-E@KGt;$`h7#&O+2NYLdVi(k^@LSeX7iT+HN(UHkg4V-`Lh
z=XO64P^unvJ~2s}Ddqb^S&~GmzOh`lsW@Fv@$g&6c;^pIGBsbEMAYHu{X`$w6WR#R
z0ksUUh%$0q$?7&o^8;D7M77}lLN$ep3dw(gqv9Movd$pRmbQ#dZ>VV#?SM<`>V5v1
z`7N#tCU|GRp{+##_W##6Rk=An6mc)ON3=B1+1d8v;T-=Tm@WT9AYCfgI{@1rN{f2V
zp-fXjOPC8|LSiBy;%6ZO(C%@qnYi10LFBX-M;KvP>F8<l=ClSQ;mpBbAG6ft#w#^U
z4iHBQDT&A&@lnc2+b&thGJ`D4RkKKF#?d8fku|fb<d@QLx61ZZ<!a5-nC5qzPjZhH
zb{fg0zvi=CIHOyl9~lwFS_|(L5Irw7_1y@-lu`Cx_oI*yo?5BI>rJwtCSa6&A-#p*
zF-cvO@d3blz;p(*Uf4L`zfOHOGA7^J-JKTtB122Q+N2%u>%qcMJ^RTR5++^7N+<dG
z9X#cB3!I}y0Hs^f;@1#pCxI3MXbz2SK+<y3zOEf|zC1oYj!f*(;i8O5h+E^%T!JeG
zUOtpj4I}yWbJmHuLbrRceOtQnufOLJGnr4w4LO4wHU$P{AP!VZX=NSzp05JA8l9LJ
z(6b;3#y%K^9c*}Ig4Qns2Mp0^JaDg|rR`AtN>_f$z`=uG2M6gZz$Evj{d!mDS|!XI
z12D_Yt6ew~Hplh&8%?1bs~rztavOzoYID!JM>$02uqLrDJZJ0n;b75vAN1(r>MdD4
zHQ_0}1|c1i@#cM!fonzRn5mx>z$u2F8Wb)x-iAsDSHC`tS}3NCliY!gK#7P^Bha)>
zx9wbv#5d&ZKpK5$`p-4={H^QA{qZID71fBromD=f`}ze#%D$z9nZGvB^aF)9fYfv9
zq#0crCqY|PEXgzMgi=BN;-VEc-mUgkxm=Tzkbon^_n<H&#Ge}SYJCh-WhV#3y@TH`
z?`E{|y+JoEqBMKe#jSq9uJ1i?>LBpl(8k8*G2w-kYbE$;v4s&hEg}c+v=lSpLRIM}
z*g>u9z}C=<IGur3w-GciHXKe$N=iT}jB*ZkPLz<TWozJs%}=-akmp>SbrR`Nd9|UP
zQ^8by{bt}*&<Y{?7a9R>5+-clXq)Vby~?LFPO-4)24H?<>YJaRCw87?1xNyTO%ybT
zx@tg|2_@x9d-I)mXc!ozm6T90$~10|a#IL^72ulSaeb^RRSPWE10oT|=L6tm7@@$b
z#a<M+{4jAmq$Mg4_PD)+XG1UUO%IToG>FxvZP<J)uxmF%b0Y5tm_Wi2N;}UZx_{dL
zzGOSg$`YUxmOx6?t?u05^|g!*u(=ro=if}qmwT>ifj-XcVVgm}(WhbU&wc#nSf(D9
zI|Q}53xFD!^1r(^@#18?;RR5jnsH2tJRSA?OD}y^w&%Hkm%KJ?kU$LtoH^)wKQ3_Z
zHwrX&(?o1uXHm@Uw#f0Sa~u`D6*;yy(!MtjlqNL;Lo+P73I%dhessY3GSmntlmISC
ztyVkK9X6D;Ri40wTVYoew!@wDItw+e7$m!A<PwgXnoFrIMt8`T>06cu>y|xL1bo#P
zhHl={f@+z|-s&lE@HHpr9Ev<cOKyP-ONO1r@U1>stAyD#CWZ#QCeAMl)V|!MF$gGq
z@|q&<?$ntKM=>AX8&sSDa`$y`L1(8uYqj?zjDiU-lo*-2xKIKq04#^lkFreD=QpHP
zqE*l6o&%jNs1Q^+gyixZ+3uVFjQ^lbJ1ti~7hCWc#MGeoxy}+f5j0hmhrxe^(+!kH
zCMuvBQG39-5lDowmVn6sC2hcugd(lZoH}8~Wrw{dcSGEfXIt)G|GesL&sC4f#^q&h
zZ`>`POR+bTa=baLB7d7T-&MDj;uQ{k>b&CM{X*k<`Gs^nlCV;R%S|b9MOvNPGzCLX
zB6$LP^+g$4rly!co#g0<EB^O&^b=Xy@NVlqg}e`2(sc^_Vsf2N3)O0at#2cY<>*U{
z-^;~V-$|l6x$z6GPvEw|d<K|Tp`Hxw2efh$CAmT?Soc9=Mpw3xA|W?ZH*#VOY))@a
zYY)dQbz7Wq+i9734$z%sqO+q@*{tfUk6FD-j2t)p`P7UI6b38-N|~d>{q^sXq(*SB
z0_6x2B5U19x@DD9Z9C6Ji_f3On~Pa>5>anw32jMTvD%|wrFEnu<BuI}H&VzaJvLX@
z(&ucz?N!}>XM2<Yu2yI>fHy=TR*=yB@ckSxz36C7O?~=xQcgHd@0>$$@NP156hZw%
z*Lmk{aPYRwz*h$c%{qb2tHViTBlknNE59D^P>i#E=fNl8fsZz0fp6x&mgE@+kl{RX
zO|87U%}eA$4KYG5rAAy=CrFZdEVRkTA$S0a1-##gdX8j{_PNzDsWBPXvIZtyBcLu3
zz=Eh1f5rp6EHxt7G7-v5)<d5h0TQl@8!C^^+?ykT<x097iC>1lXt20f(HhNHu;uPv
zb$qp}BXX%)^YLN16oPWo!xY}+FM$9T!9cJg$R6;iPkDK^^>(dVZB@lw;iRH*T~{cB
zm_c5h{`JaC`qI}qHpzJ|j2tulc`ni@6{xPp!4aDi`~uo8dyGQ1HQOzK#RP*~6#yj#
z0PCjh;WE(VE)9X&4oU=6k`P1#>UBG*Q8-y=qb4ruhGI0n#;6)vje1UZ%d)C`<a-xL
z2OJPGWkQ8fYdV}*3d{1*h&#(ePK2Z_KY!BSv>8?wBpw6SfWR91vqTKGP)%x5<z9V0
zJ^s5U$1G18T3Ww!D+F<XeC_WlkaQr<fQ}zeSFhILZX?x6#eR^bf;?nk0m~_vrmUzY
zfBAIQr_4<;c9~2}fH%kTjnhpVUIlD{90z!jxjSZ2M{@o1sk{0fe}w>;MGZ>F%96Fj
z2BV8suuSP}N$yeE4=;t+_^*8uh0RHijlDoUfLw+ty}yN4rL5+gvZ#4nqM(ig!2?(+
z@E3Tv9lC|W(MZm|dv%8Z(6%cswms4hJGL{!3nwIasHw4fOU4(rjQ~^);2um-97>Kc
za?0uL5uZl@ZiEeSFg~4xW&rwN;60&z{kR_*I00=mKy=Jx@7N{Z><NcV(jVH)tsAsg
z1X)Nxg;-nTg=?OCk_BT21NpQ)HSC&z`@uRFZNG#ZQv{d8t9TMRX%xZmtoHD~eJt>e
z)4tD^rI=_rdGaKJz@eU?fK;HsKjq}e4}1n0R$#Ndl&@L|{;PvgXpeb;<ONXg9+NMm
zpiDtD_IGDJDUpZ%iMlh!OWcbCa_6tI+I<sFJt=pa{$@NM{~Y;y1{cwDNpT4YO~b>{
z{N6DO_g3zdT{E*9QEHbB&i%;Iy_*Z-E0{h3#YAa#Q1*T97un-+dQmO90fjkYLkSp$
zoHkn1%FllS50nA^ldHF=mn<U*@<qI434|__gL@2eRZrjnp!UGatNNcOWVqnr0(d!_
zH1C`+#hMiw&$!o0`$oGx^K&u@(6(Sr4YAH7&fY1@Oa}D?ecz0~^eKMhm-p(nPhjl_
z`}_Tmju3FV>sb@HV++>~pl=`B2fAEOYQ|i5w9G$M%?xxepRjNic|Z1*#K<#_`PWoQ
zx!0H$28hnTkvy9P5EXQwC^E)=A;I+*M9;E6^=`+HS+u_7Ctu(GL^i)gF7(an%qS!w
z>qwDdOj->Muo;v4FF7J{Wut+a5cdK(qevIG`y)e4U2U&TU<IPsq=6(sUmYm&qdZ&W
zs0jnBmrz0*5Q$JMg4~nkrFck^fYgP8T1FTK)e97yMj#FV$%!Dld)hS{HYQlAp=rXw
zmNDR}!6B3V@P*+qXXye5eFE%uheG|I7O=n`cfh?<^_i|ihxVRnn(t+~gn_0%K2Z7Z
zF)8<Y6~<1{SlkkKVfdrLp6;oua7V){P2vWys1Wq0@l^UBCeRk=VTC9BVvXtMv!7q)
z2`lC#R-Rbl-MCpBn_j<wOD-rN1-ULHN~sebSqu5v2kZK9g*sZ{eHT<5*ar7NVE){M
z5FN3sCLSewY^y776<P#Xo`_yM#G-@rg8M~?&|;_Pr+(q2o6!q1aQb1y`aU{p&8r*3
zBF3X%^%i|_5JAeVKc!f_QoUpLhgE`eV)^%jvLSfKP_S^X45JiyZ9#DMUqcN6N&u7H
zMU??Cc9gq>+(PQKL(s>-<T5wT@a_SEb(eouTmt<nJX7$DBMmdiW;KB8Oo4I*JWsu$
za+mX!@Vcp~DZfa4Y>N2pL2E*a-|H`QIg-I`0x}OZRw@>YGaO^BxNiCpqb=h19P?gG
zJ&#si!x)Y*c(C_Ac=|NwrP9=Kf2NVy@Kg!(xDR}MYO}{oCd%o1;6CSt(L{?oeLE;}
z{L2cw?x3p#kx_QFr&z%@!vbHpEK|6w)`pv)T$q@XUJKmM2$cv0Uo$6JCln#D+(8Zf
z0<={T?5sh%Sy3PLUqamQSLYo5*4DkKs%)5lVZ;Pa9wNmd9bl$(NMH4$Eew^`-GOby
zKZh_?<r<QNT!TmZ19(?fFQ2`060AFd@hAIsH6+cH1^zc#GLu@ik@xP`cj~S;xcn#o
zh3^-TZ;BoJj6Mrv<CIMPcV5M+A0Y@D+UcfOA3JQwpAH?wzcO80=8<em!&6Q^!kE&h
zWjrZu6xGyle7xeW_El+<Fr;OsU7%3wLD*(pc=F^>3bQqbR+kWe<I+|(xpGt87BK9D
zJtkcCinu*dAPo6qI<s?A^Ow*_h1SQ|j%jRam-u~Oz-4_t;bNE_1|cN`!Mk$zI5Ts_
zp4`2$^B&`u8(UpHm!NYN)IlhZ0W_(=jVL`_J(BAV)qC>PEfHga$1yM5GdcEh;dVdo
zXz62HL9O6pEuQ5Ux!4*R%5@Mx03_A$)p?0H+ztvlS_YC|xgr8bsDDi#*4Mz80sP!k
zu|o>Oc<`edWPd!jDp1l;P0NfapC?Zx>-!F@&ef||QLG>IDs2(-X^rnHZu*UnGrK*f
z<<FhHEG9z`rFIaj+V!yU^{i0~;7_-2TPhF6_Af-=oy{ZmnSUOK8FmQ^Avwsf!iD35
zT2UJE*kD(2K{G<?D=LH$(*>@67*ly~V|c51_donc|6%G`;e`gN5>TK4PBu0zo#!G&
zWU?`dUA!4($u<@(q_R4^GJ@K7E!aNDM=wNtZnw0sx);{NKM<=PH^17q=+v8Or5IOm
zNd(lK+c}qWRY)qwpMh-W1e{dpjIYP)pZQ|_DQxq`(EXapzb<qtTsu2EIiJ<x<^-|r
zIbx=G8LJBr@WxrqhX+I_91iu$>|xqG*4fznS&yJa-HD#U(a@$ukJ_Fgxs-oG20}1m
z4)}Ifrgu>t20pj~+Kzj?)KCoWf_}VVy3p9S^#PSC(T&rlAEVqKDi<b4DPVc%7E4u$
z9P~leg<dK9+cs+}*=gm37{WGF2PtAzu9q;u$gP+Lm93(#BxD$Ifk9GGDUF)DY
z2Nw4shXv=zF}#b>uF?YPjsNo1f4rg5a8u8_XB60A#fK(D(H9qx$qnL#HM=TdMSa}@
z4&#}m9sa-_Q<3@CTeEwY>+HmJ6*tpzX?TwzAfX(z$A|@EzHla2M?B}OO7!(N48DIj
ziE1PKUSv5|s0HR4f4b8AjM0pr2J<IP#focEX=;x}T%cX$3dm|e76h&8m&NorVv9R_
z*J6%Vt4|~@X)uf&RC;#jej?0XqUTK|KkjrE@@NnsYa}kY<|Q2FfWOdV-JKbqE)b5X
z_qQ#&T!L>3rMIAZn-&g>oov0?>6nzaUWFbZ{ExD>e>-!Nn0?KsBc?gu;>npZJ#B>4
zfDEBB6vhjyrq(QeGa=Hc(ed#Qz|WIao#szq#y@d5Adw2%0wBe$NdQxlPgfT7Z@#|h
zq`}!<svC>pD5ey!h?EQzZg_q-kN#z+R|y^I-m~q3pE|z9ALBeNplJt{9C?qbHG|pc
zwZ0Unc*Dcgz)nIN7!l>9r@2LAR_~@Q-`$zhzSBP?p&dM9_IPP0sN{(dbBqFQH?C=A
zoOZ$^ljfc^l>8Llp?s#)Z-r|h;{;7B-IL?v!g<K6Kr4Ho@mh3pZ+i7e-@K{s_Aql&
z4@<Z#HFc#NEj9WHKGaeZ`~nUfRSBD)UwXrQhPYX|%Y8sbh15>W&RRksOd>@DC0bAp
z>UJ(>R_M0FWpmvu*;Q>LQzW4WR%TBV{TqnY(k4P_6^OHsO-HOJl3IhLRJUk5DiG=c
zcp3nI&(9(YlrRRJ*w9j`Vuy@TdZaQz=V}HtoB%PhvF1_A&5>U13@4wH_R2F&ejt!0
z5t<~At%m-e;?@<7J8<8jzhsVrmguz_XS(%u8(m^(5uhaHRR;6u@x(v6hG-;V38`1=
z?|qy6h=yN?KHDn?RYKC?vIT0j!DSu5R+~Xv)Hjj(O2LbjVuI|en8A$1_O%!V&P3nZ
zCr=d=6h2i}KH<0zachvfRAWlZY9PxN@20=8C)&68ImJ!PAdmMB$@;+!jIqX1O|(jo
z;{$?O<5er^QmUmO^*`6G&V)&y!MTGp585s%9Cnu0YO2IqNvy?3W!-w=l<`V|u=B^@
zN3J3X$j1g>Jsx%6={OQ5LC_Vrq=3!)<-L0x<!1W_y0%shz>~g1spp^(D=NBJyBmad
zc7#Zkn=m}#*psK7Y9BclbT`VXj{VI<?r6w^E7|9-8TLGJadpiRVd)Dmj`lx6a#mxq
z|3$NR)UN@WcVP8|IUGyWUVE_70Z$c~bP@FWZ(wBP9rOm^Lod}==@z83>nx5}7t#E4
zazg3Ay7a$x$hi)va$u5_2X>4|xI#z&3?aPL=={Je_R-a=<U}+MFPe?{pwA>=&fvm#
ztQmLUyLIagmHrES;C1mh<L+n-IYW2x3$)y5M|8U}_gT)?E<bN92@~|iy_P@1;=aT}
zkp|@^@$?8>o*;gZ@bU5KU0cBT>XtaXQup&q-Lx-AYlvtUQ;h4W*X6$3Vf_p8_>k;M
zQnE3&FRo+GZ*sK-)H{9<&G;Wg``N1BZy{5QH|e^DSH`bKo`hz(2B2*)av0BUZhOqr
z7XsqteMTemKk%BE?yi~rdxKdv5CTEZ3{uv@90JO4`+mh+%rIh-6Ty`EQT^#tq4zpX
zP|g~rc~OiC(%OUE6YL(yssJM!*TxZyPl(7S<n2j983Eu{*(t{Sav>D)UUjI5lrYOP
zDm{<w&N6-)CCf_59;X?YKATwfL||JAfR7~*YlzpuB|AWECb*%4paNv2y!zo~&q5;v
z-1-7|%`GyQeW2U<;RQ5r2<?F%G%{It@H~6^3tihn=~EPo8jFhZTdsV8QaKf$Jsxcr
zMZeT8v%2Z4MRTF^?^<VAh#nsC5}VOLXj3jI+}V+(3UrYJVG<vT3CC{H-RtBi)yj=`
zGG`#&=S=-ON1ZhswOC_$CWrFhsr>X!ooAfJR<o(g_sD}@@bznE;XI1=J^1+3JzX_5
zsgRBpT37<t6i*+>>H>EqiW?nsKU1g95N7gOe$~Py0&d@)uzV!boSZBmX7{e0tXtSP
zZk#R*#G8(7J9EPA*@tIXzKBDUB=*#gaWp9b?J2<j0xj^}`7%(@kY;q8y<=45sU_^H
zMj|isKmL+j>SmmJRN08}v+Y0!pd@u;6<+2qam^<y1>`8Rvy*!A8=7iC`{XAEeGpK(
znKcn^98vh43%G1OuJ2*>J13j%-M6C-S#^1$uvAaj?(($77Zn;8xv-^{e@tw8RzBvS
z50(+Q1mt`Ga^<A$GI^uRW3l$anE4K6MTtl4$bQggCP$tdr&0Zg%q481yLjeQq$|_T
zqV-Pb=Vg#NhqhdN_dqWFWdz4Fpr4Tb{lOJ{#W>RB2}tsYf3%RD5g2jE)q*FX+@}0!
zl-n`zyx|c~*usbobc4pg@J@oD4H8w?!HmDpnF7+&VfV3oA=6{ZN>Y$?0M2*1cCsK4
z)~Y?0`Y^{X4a%J2kzA3nsHeI;ru-k9WaP*9mt}6#H-o!GPX2Kldp2-c!1i<xTMFv%
zxfhBQK#vJ)ER)H(2kv=n#Lpy1(th?&*QOdLe$%)+f2;N#ViiG${lyyoZY{9_E1fpS
zGV0lJSu>8+3+`LRnYVbACIK+aW{46b&z;i&MH))8itFN$ZPFsv<#IERywnBnI4{hb
zR~jwx_>!gvK56v|l7J=R#Zq4Re{;`|upz?So=e@Nb02hhGzafq!>pJCHJ+tm=8@(m
zB1fLnQ@4Y!d3UADf}UmaHbcI^P{&Ut+@0YhhZR>mK)6t63@?QJxbqOrsF2Kb*dY70
zHh0cf!W0pa+Pk-C`9@^!ye`$wzw$RK&{wNX)*hq+FP5<JOB>t~UYK5jd7Y;k4vae|
z=WbWJuKyII(@tCFG`q8>ow#_w?M8qTiB^?*F6t0+;w7G^Skk{l!gGVKm=)371fiE;
zAE~~T)I&G_rYdAsczKh!Z;B8Pj<>RA)6y0#aoZ2)wIlOCoX|1Dgp(o>(PsL%F>8eB
z%Fg-PYkE;Cewu!aSFX3YYFs(dpiq9$ZLNDyMM*=a<!@F&*opOXG{y=P>m1wMeEQui
z(quE&==uM;0KzA-C;imV8#!82@UI`_*|f;rSK$b|Z_Jp|%l@^p64{V=EJqmUOEFBT
zj~|Ke#hWg({l3n+&z2M}kx&)P@x7#=I6#h#c_@n-dUReGu|Oqf_Fem2YgOH58V5O1
zAOqW_GFq?5hIDemIrp*mjq!v23RY~<60qa2cG5tjdurg6qkf084%bQ81+1KPm2!#M
z7510{_>y^8ZPtO(#&aCMb8qIIO1tP>wBXk<$H|p91Wt+j^^%buVj4u)`G^U3c(n<c
zg9En)6l64bGN*ot@_f{Xa}63v$hsKV^{b!qV~;lGJ!jH^)y`5As?DH2+Dkg70d3P|
zsLr&swA}BXbr7gB6pA}|2XZu3JPHE0Hgc7|g%GG3&fxDrqk<CmI1rg*QrG`H`9CcH
z<6f%_9`&9`<@(asXB9)vdNS<f{A+ABJu7?IL>gU2+JD28d%~PaSP7LOXY^Sn<@Bwu
zQ=EY+g{t*IdlNv-usY=}xFs-g6S;58zn+72?7s*)XKvDc8zj?3of2wsxWbe;uilYm
zyU`=I@+3z%T$(?t6|=+a=4d4fdZ%;2l*>_joyDqZF_TR$XBW+Q-c!3AM$UvE8Zkzn
zP~2d3c%;SBT@||L0hnYEm<2Yswm6yu_bnbZO6w<xTEd$Rq6{F$@~YH`stlI9P0GsY
zC7*Ke>KJBOJpxH1cv@2FEpJmM>NzC9lIE+|uKni@JcCEw%_nBzd|sBVWWZ6y<vQir
z&V?_7DpofVN`J>UTX59IlCUAB>h#Gkau&-a;E2d6sLAf@kKMrvzUqi&ZR*}at409%
zg&7`?@&_@-GxAk1@3arf7W3Lt2dovI8eNJy()RZ~?Wob|a^z@CbTi;N?Q{^f<HYtN
z4^iq6IrCz<s0E<2Q&WGuOX-GmsPFxz|DyXQ7~9Z|XY6Vay-+}1pwZa(|7Y^Dsn=GN
zglQ+bm$xJXLi80(+ch@&KIp#$VJpy&=pK+r4zr!U=^iU*B<Y^ECR`FOQxi&@bZX2*
z?@W#?Dc9VUq;SWMqqg`t`L>RpBUzTm!tDi!K}Ssr7;hkX?~8fq&D%XlhzXY_S;K9r
z&OJr{Z*r2fg|q)kNY(6%#T`Nx2U;d0<~fsdF?%sXIz`O)Lp9w?WoHJgQ<d4ZDEl$I
zbo`dXM1=nBk`%n@7R)BqWQ6hpxm9a&cMFz8Q0+xW>d@p3Z0pKA!zZlN_p~Y^(3*yZ
z4I?y19Vw^?AfXI@jP7@(5p$ureGKjk`N_9KOX|JQYI88x#Fx$Pu~rL3`D3Mhm6<TT
zZ!w~^9QOQu{R&E0l~ct5ewUZG4dvxKveWRcpN8LIL0nNj`)};9TkbJ*F(+QuotVqP
z_(kkYs*o^MNjg@<8y&Y-VphWs&7C3@_SM>jjj;8WR(E<_?}RZ%mUgo$bsal+&3)Yn
z=A}E*=zpt61b_gdH=sj-AL=Pr^{|-_Rwc{?vrxGi7Nx%=ifcVB@BeZ(O15KmfA0ww
zjqgr%q+2J$m<pfmP2T9m@W~Rf_~lSRk=FHAzV^&nAp6i}s=#u=ouKXq4x*7GAUi|}
zykI^uV_#fcyboOla_pyli?H3^+=TWy0rLOhD^f|^aY)2YKM)~n+ly$9R_PsgDb{D}
zJKyH=G3D*@>S_y=uh-BSHtNVW5)So%Kov}la7-dphH-8sxleB@;7R}c<+$N75hTi(
z8~j?OIeEH){vG~3nolJo%YFhnv~s)j)oW+nT*6z@pG$U^{=8umH0j8YXKC{6*?IUG
zA*{&%UtYY&olsB5NYWu+XMrwP@zrRU&A^um!xvt(l2D7RLrL)%&sX_|kO``%3Z7tN
zg|C6^-K&&jxgX7eEi;5~80x%rBL+3;oOm!BJJt=J`tn+N%p^=IRO*V4-mem<g+4%p
zpBhOTS!e^Ep#q;Nr5syl!@Qi^`Hmx5qY+rSTv0J%=v&BtQx+>s!#C0c265OWK@2j=
z%H-(Jf{|KMb**plI858l5Yhy2?>ARqE)tlYYSS8bU&<T)^4w$uCkQ?q$Tfu3ftNv&
zt+C2Mp}{~*2(!YM6e>GmA`>DX;IBY~sh0ek->)J5gl*BP7I7NB0ue!4HH@D*M{uNP
zKqL(MY5)6L;ml*0K)fs$*=gI-7{e9#LN$xE1Yh4;X+#_W)Xo1%^6?B~+~pE6T(1G{
zg?9n;_4zhm=V^X`yQz<u;*z%(OKWann1UzUPCrOfmD$O`XK@a03bwrAmp+b_<8m?A
z_a7=3sgi^pmf((x{oShfj@7b1(!FF=HB^^iN^@w><z-j@q@T{Ab2c?u`?;kw$H}+j
zRrce{D*e2_(r87u1r=5IM$X0%Hsy!91e{ZR5^depji-{Epc00Q>oNzmII$@0syn1u
zn`U#rX-po~KG&mkwr|h)w_lcP+Ld^d^l+&D$h?Qj43VM!NdM?)cI07IRHmiwbxAK_
zFde)Ax(@AlE7h0{&BQDIb7RpjL|OsIwg^353e4Mpk*Yl+<+uK3qZJ-PTr5VgO1enj
z<R;j0-rPOP_2Oigk3+5O+$S|6d2N5ffc$(OXBV7sNrMQ`=GNBjVGeRNJ=~RR3GrZK
z4uPB>Xb6KdBkVqi;;mi)krdQ+FqOb$+UTjm%$-0stPl2%vhLL#IKkb#QSp~Hp1BlO
zwxBiugg-E9sHNrqP=RPR$ZGwO(CGHj;naRo#~h3~B+`sl-y%c>haS_4?#6iL&X&&o
zY!~RYVfw&-e!tGOePP!-v9wRw#5=@f#&LcfMoS8|D6ncR$RzGLc;^!9DO9t)m#-VB
z)Js1s{w3UXQBCm3@qFmzI|1;zNKJzK5tK0NL+IwMtMg6qD4BW4w{UB_t$mmuelqxQ
zparJ1qYG+^tWwg+dPO|={re}CLb9f2Goi$RZ#v^{5eqjPIaP)g-UJV4Cp-Lqyo3Nq
zOE|WG=SV;xk4S<B71isL4D;q!6O;B_-5GD(+?*s*GnajOdtcH{kstm)=Rp4ww%V6_
zYRYFs$%+?$J_f|=-G)G_6P}b6OVV__as!J%cr-79{3PWRKYtZ4Rsh1^LLtj+^jdC>
ztvmmZuJ?|{x{v?I?Ia>0tBlCrGi0U^viF|ZdvDo$3n7H;P4)^QS=pNq*;}aZ^SVFh
zbI$ji@9*c2ZrvSRuIqZgU(e@bW}pYaP6#(J*s=(L!qE93R2`|2222w40)WjY0F=-Z
z^*U@!m2JqItz!|t|AXsty9ReVJc=qRZn3j*zp4=7Ji7E=<g5lvoRTyelE7Fq!aq=o
zdLCbT%P3GOvk~s7BqeU8TFGdYCAnF`GQ?e{c8^Fc4y9d1rGnQ4O@`xO9xN6EZy1g%
zP+z=AuT>u?X;NDDcquk>SLqic0TzOD%25y%dc<albURoa5H5hYLvC3217%1FK%@-I
zOA`AaX0i#VMYG!cTeW53ICG&q>`9c3mTt#KgA2C+_Ja!^Bah8~BCkq;o-$1~g*i}u
zMR)PpAdf#N^XEwAdJNP;Sk#{CU@MIdI?c?x7X5kZx|jwuA1L%v{(jO)I4xof$fq9}
zije={E^K)z=+u;NR2i9KDEO+;&13lZde!^5B|nul*%a*@mSV+TMaIf0te<rQJWBXN
zR*mMS=`=BPnet@rF|gaX3zaYSFDYJr9R|rCC{hLlGuCpEgPe^CU8G3O{ydbX`xpy@
zIC;#kOJ<yxoJusL(MFb*SP+ll!OkU~`n&g7{K)_3+BB=_V<!;{|G4k0W)&8DR7cG%
zxcQ|pI)U{+z&t60n$-6Vf8v(=&#P}C5*F+qsnc%~ym>Z7Uo2J*UwZ51bF7h8KHM{X
z%I0kc^C$vx15z)URh9IY=I@H6LZMA*#CnVH3;@tpVv?Qj|GQnKE!5?~_*O0Fi*&;4
z9g$0cv3E)P7v!5p0A@nk7yytF8=7}QhaGO#z37Lf^D<{F6iKZ<w!HC`FBjP-Jl1@r
z_ig)WqkjKzrM~07)HAe8eG7LO<u4#+Aq+-Gpuphj{s1r`$fV%8H~0{5UQ}QKHz;6K
zA~t_I!TbKh{s*1m8agmW+1kqc6jY@P@tXR~#6;;a5F%4=%KHifjfIeQ41+@$&?MR|
zHo!=LcW-M`Vn{CCM!>pr4$>04mf`|-Wx+cP2%%@oz4LO-24#tLg^VGP<{Kgx6cpHl
z;#%S>C5bMlwbEyruOAiOP_0pqv$5QQc^&Arh}+>kMiH9vdK?LcYm(OM$kO{9{i4#n
zP?ZBPrv<Vy3km<za^P`6`Zb|m6x4541JVtDkcmNb+S1@sq62&Ap7I;ByIR!$i3J$)
zr@*R=pw_^fM@Ib|gkzAC55C?YYu*y9#<W)FZ_7)7kg9A)g1(Al-4oPF5SfrqO|v7~
zP0%_D(qJUwjkw>|J+g>qAB0I8(nA6e0@vmx9lxyrx->J-8PsEJL%@0wF%SX*Sj)vz
z3Eb3bMt&pX<+JZ-8S_j(e%E5p`53RlsH#Fmht9d7l!`)S?_&3r%Z!Iw)W2jg=+M38
z6Y@_0!I3^tGkv{IjVNW$&OGwUN=?>jN1(2o;9fz<&en0XuvLmRNG!j}d|D5T_}arL
z3NQsi+X^m3$YdEAY45=_1_?Plm7pzEy2&vFv=f96Nh-fkd#QhUH_Y7ktdi8*RWeA%
zclNmDbmOdB@hK<5jiHsxV-H^it+Fj?Vt=Sb0z9#xO9hb@;212)Uw`_TXw|O=Uf<2c
z_djYO84`*HC>!`FFqyHOSE;1PfsGyVp)rgZhF>!ZyzQif#|!*K(ov|*fa@OgL`#y1
zeL{<iusO0$U`s%^0NuN@;t9>hB^bA5UeJ`$K$e=!gX(u&&prDyqSX>^O#nYncjTi&
zJm9l&>S>b~JB1=-_tCAvnE=vRLtMDXnoPc+4U_F@zbNu*)q9>diXKnu{T*1^hK2`-
zQ}wRem}X$@lYPIj{GWvGZD0SUC)e&2KBboMW%Ho@j&rFaU;^^Yw@2?W9=0iy4tUfL
zoNGSS?ms`WUG+Z1jtjXnMk_M7)PK`Id}JNh4k;!Pu0utOAlU$4IRH6|j!PPc5<r_y
zSZUk^+-0@Qbi2rrYW-Dtn#w^2&e>RHM_rzSXu<G%5cq<l9Bv+*fdeikf=Ve#@l=k_
zJa`?lOuwW>4J5P>?fW*W`v<$RWZqkM;zo18b@2Tu$geh*dhKT+x;irnK2_Mzkra-p
zHd8_{JyHQ@LA(<YhV=LExj$|v90_Kdx6j))B(xe=y`!2SKBe42^}rz&fP0K9=qect
zX}Gef=HMO=+GGxI3V_o;T0D*8WZW#p7i#&)AvbOhXv``Q^1M-zK9k%ypy8GLTaIS(
zJoY(f+H;{t`ZX#_W?5t%fi)x7LYuX46zgmh=a?ZCfe3-QLxl|jQf@0IXi_DzwaWUJ
z=y96te0}q-H8&*!IB1AvDPdRXmzCZx0@@Hl4i8e|Q}u+i`A!ls$YLVdnQ$?;RO#4w
z^o|hU1$C_CC1>un9^(VzKTN+96*Q8?sStAVRDE9(M0zrS(qL#p_;3~IP1yYp8h~C0
zK{coQ5<#<yauES=W(YC6kWi`6AR_AOZDp%UQbuB;Y%{^H%V(P@zpdA2SdSUl=qT9J
z=s0V4-m1`SH<eRkzNrP+LAmGf7kaPn;4JZa;D82KVk+pDs`_O-AF(}v;Urq|g=HBp
z{hOY(!`v>^*1=zzuW;Ak$O*b7ZBBqv^hVnPtec>?Tfc1?3NsCS2JR089`-1<<m+c6
zgU;t!4ngy8^Qm!&P|NQG%fP61_KHI+Ji_TIu24<B;z#_Izi`>YUeN*fE6@v!R{Nz6
znL#8>B!t7SLT>{1i5<rl(&s{Wks0IC2iJcz{ki#zu2<jlho*J(W3C90XwdJCuo8gQ
z;;b6mvxTm-jV%PmLJ*jpB?AgGm{CO@=euCrkZXRdW6nteQXWKuJMeA*vbIwVsCbUx
zr}OF?gT{(PN%!kHq?ADPF8Dyul!^!t9x{j^19Y3TMg7$fLf;7#G0f!<{eY+l_=Iv&
z+D#olyb1b}aF}SoCfQ+M0FG*4gP`k#as3dHzi~aZbd3Yy8>|vuK<2EitpTZyfYOOF
zo!)!BlSwh*ESl`OZuMx9rIHY!jBQYp34;9q-Zu(QQc(pO!4%T%!JO&NPw|nD^z;+K
zdMnL8Z5DZaZVhDigH;{#v#E#|`49L-lsDq8IWIXZ6yg=vP8KmfP&Yf@SVp6fAX5{*
z;grK&zkBo#?Gkg~wU8Gi$)h~x?i;M<$Mvr8j*VKS-lz8%gR{YMP5N!@&V#w+bEM?9
zd1c2PrH$34kNB@&SSqa0&BbrB5qgdJ(=PS3?}H~1j#a3L#ILtovh`Rr<0JvB#t^nC
zAfU*IMx<%ohh(4R5q$FAH;My;<vr1Xw@Lxb9)a-7ZYYpGwQye38Zm^Rv?320xg#CO
zuzL3F8A1aG87Q4`lzDw%wSurw*ndji|CF+V1KxurQ=UmH85CK#PZFhRAG~VRIjeyY
zj3|;}jWT=fV*Pp~JiJ%(C}u-YF^gsF1JyZWbPR&Ve36InntH`6iU@HL&vZH*>^oWu
zX@7KzxU}|!qF_X06(U9i!o}}=H_D2HSP4Wyaiz^1Z}(FEVHNpTt%o~KwB&CgWNJmn
z1FD7*_F|BO8NpYCK$LkOqD~5tH>b_<_zTQF6OB}x+`zWYd`_r-)kt}h9{t(18XRRX
zn=lragbWz&HfF<Mkh3j{@vcX$=m7S*kn2O0yXlduz^R?GWe+pG#?-1xHNH8&^aj0p
zL;#Jkm?2z)^dY@#ho9J8L%Qx0AbB*7NOk!-KALU|`cJm58X?L!-H(>gXh7o+yt*eX
zsyMD~2eG(J1?5?qYz#J6YLqFjV0KA1HbXMk#@~cX6D)@BxNNF|V((B$r*y`-30swJ
z#~re)&!|EaY|S7~g5l5iuOa^;xDs(KLM;|Qw2i}y-S+G6BhVv*=p8bCWJV<dk^?!W
zva+nPP0fGcpQ9)z1M2RW)&<olD<DJZPJAs~L>+@<tP-foSX+lO6}z^%Ik87E#Y7ZI
z=p(Wja}(8L1fCEKT;Ugw6)_D>rj%MxAKdYzJ}RyAqKK4KQWWIlEBxMPv^Y($sKKvL
z51lQl0--X|iQ=@w4ln8mnP}vCxCw_GAMp!s)ivql(7}2Nj8bcB>#ILy+{7RYhU>?@
zb+LLg9!q}gfko_9IJb68>a(fj;Ui(xjZGJr{GQ?^p=P!3Jm{>MvGs;|)`RPq#Hn~-
z-M<J)n&~}V{p<YApFcm&_JNEU;gDKA{9JtX_)0=-@%QOp2uyZMBJcCrzbf12_HcE5
z5GhGhX@Eo@_)!+0#x$IoHa>}V42zdV7yZ`d%%;#x)0DcfZ}rWd$K7h5q;j8_@}0-}
zx{+;D2~y#?)RN%@VhWU)SZHXNhVev(Rmd4;Zbv7wg`R#VDt5!NuV;hMrzyze-lgry
z=<1%9{0Vj>n5{eTM%q8XxDzla0QXWv`v4;I3p9ic^ItX`gPO_he12;~B+&sRnHf4e
z*Os@E7(<8>sxO$k$pBf_ov`zksN!vf@v<=6m(<wUSal1L)NpYSDxyOuUaJeV{bPy7
zxr{-jmZiq4XQ{W{|7{$Rx1Y)MjBX4E47%@n1{`>~3m#NO51P!^ND@fG9<-VHiUwbG
zw}bG`9`D+wj*j#^C#(AN1BU>+14okd(3h*A6WP33v%0tVq-6++--<-|d*k@Jj8I$u
zaQkL8N%n6VX@txWT$%Pzgr$)Z>@KvI6nU<=xBGqR)nCE$mKDy1=J{&@TgC4<-!Ga1
zhVSWMI1DOYFS`r#p^8%C*+&&zT=aCiH}TP!YW6~23oLh;2jP1P4c@*hQ%-DX=LNcD
z4U|hC*L<RwxGs)=o;#vGYPffw-H-Cw642Iry%~!qPY`DX@W@#i{(+Rs2t)Y(_4oZB
z{m$!-jb%J91P`8mOw$>|Y*QRw(iXmZa7wGM?a)J3@qnWJ!ZX3n;Mj%4PrlAdx4|f!
zO!v@<(p_ZL_*UwWq~aG)&w?BR>fax)9i4Z!>bWvCdbD0>g|&0p8gpd{?+<shgoZS~
z&sXXvS#tNeoXunsbr-(YU6U~o4zbUF>E_m;tS4;iXv+WZTlSsTq5sygD12S%{$(;f
zYosGmmp{I8|1)1wtLO_t2ZSau*v|Jj1KeCmg`5(K1dy*n(FrX_T2r`<M)|s$;Y@-a
z8ApC>!RATLB8apDzdwb35#TSz0;gfw(Kon0FQ6kU8caA|iXOacFt5Y;1xa_>$$Z4A
zsh3w(V_C5}8?Lcr*J)lPLx*lNWk@T>ABM+70ezqO`I83Tj-W)1fmK9*i}UiLQ~+KJ
zL~@_1Ax9knytGwS9MIm}>tr0Jy#jx==KYQNQRkDZUE4vW>>hDLoxP!J)g9J`<R~)R
z%<kg^n*O3x?aJl**>j0KOt|OBVcw&wtZu^DZb8iAVNN4jHa9$SC0L1_57hS@9U{lg
zcGb0o&sPK4d|QN_F3zu?Lgh=bT%joCvc<h&7A?~Cb`AGGW#WVG$$68(Qwv#j8EV(Q
zZtc2Zlq>c8WzoUkjyDd_`9*}P2{UV%_UZIaf8K<)CzgoaRw5HX`OLZQq&>$oD3T0n
z^o_b(1zx8$aCAdw=o{|sllAq|R6%AW{db<H=8JnRZP$O)&JTO=E0~r(N*9i+8s5VX
z|B~=6eDC_=;@k13)B2*a>?=>-hYfMx%xYAJV?tkO&<icdGk<Ozi(2X<#J(oM3LKdf
zOGz<(dh5&4pAc~>bK`Ik^)>@LwMTlfvQL!r-68t`X=AQ#VaTZ5U_&iruZ2L;g7^p`
zr^aTyk#|se$UGWY&=xD^okE*^E}o$5n|qIL)yJJGF*c5u?Plw|usHXqjTF@VE8LLC
z+8&$QUA-Y;0Y2N1UTmGw+Z{7Wr!uh|b8CT0+wKJu1>JqpZUVdPJUr_^JC~f!kAM0E
zlldIDg^5hf9@;Wwzbv%Z9Mj!%H8FIf&v6#m{l?Vd+oJoKc`r-fYHsKz%_%>bvcQ5m
zO_mxDlRuQu>6LE95&TXtL4wIog>k*5J(nK*vx3zu>=i1@1Gj8D_mf#uhvdee^Jcho
zy=~m2YLh0z#>lZqce=3Koek4}5d9*jm=P-&7{37#riegL#fax9sJG2JHYL+|+pMTk
z2&Xh&p@uRIEO2sER&vb{S$&(77nrxg@V|fbnMfSd)6+9OghNN_Vqn(Ua;_n<3GU^E
z6h9b$EM(LO`IOV!XY;J92_V@gId>Y2DB2$Up5HngvfZ=U;{EKniQ5tHp{iP@z4mz=
z)<ji3*+!l6Vjp+Uc&RUnV;sHYZ2T3DM|IridL^fnF8Z6a8D103ihe6g%~yJPRn=!6
z6W<wxTGfopuY}8`IOJ+_3hDC+KmnfB?!p-9m=+wGzHY1rnJ7t$Nc*p@eU^RR$2RQM
zugPeQb3ZixmS0dWMDC_<Szn;RIv=lOdB_->dVZ2je_r>kt}5gA+FH#=dqWPeGQo*_
zOztc<{sjt^9p-ErCKeiW?0?5<0Y(AARKGBZArYbnV%&N|wmz3xr;q+>0%RQ_sSKJf
z@8z4fM@8t1n*O|&mtO^GYRu^Z*pPKL2eu2w&q`X`MsM`i1t1}&<`PsXPg+)R>^?Qy
zHS%H2%K25TYZQAPI3qN>&~lZswFpVJa@ptN7ANn?daB#UN{MpH7V4iu-UIA2uDi1n
zoJsF2(h*hzSR@xOzq^%(1v@l*FxM`$_6iMFBjmlHaNxi586iQJ7GGiEw${k|b9naD
z^W;<Bg{msyzQ?c^k<JUZrc8wrDNt2_#@ne&<DNLeU{Pz`=hzqpC?{p6yx{RNU>l={
zg8O>U4;wS>DBDiU>|B`11Ox2sA%wSHrl{+k&x<b*@*y<02c8hs;lGfEwN1aIR=cgP
zw@9{2@0*@KC-p*;`Hoz}S({N+7-WGy^{F5!umGKL;Wl~P?8(47Cv=0X7sxP;b0d};
zuxz#Nx`lCsAo5D&o;7?%zlNw0_2hG<tj9GfwM<TvU7LU45rX}qY-M^Lz5+~{P7D!E
zxk)m#h%88iWDt>=2RaYP2=8#h=EO$TZm{hEmm27K-X9L?{sROIVJ!7uU_}%w0*HkG
zOMtlWIuq$;uLLnMI2j?dg2&o?oVyBUT|_M{fT8^E2OrEmQ%n(pY(yjs0wf1{yH^`o
z-4u{e1w|`C7U>9~nd{CpLUIW#hI`CSifInud%(VUb=Rupz5mq%CvFUd$l?z!(1}Cx
z$b#~u9;1hK)fR})YU|99tVs;v^_Xt)sHvGeNjN7l1_@{f!tMtSO9+$mI-}@jM=a<@
zh>SK^3LxZa)1|3=4J5hZzyX7XJR~GUXt2bB6y7Sqkf64Cju4Rpox-R)SX~LBm5>wJ
z*=P%CR<JE+oid~jy3frmVn`>jLrydNK71;c3*a9qVf<!S2+!aLnE<UjRA^A7w(s62
zD74ujfAyGR7&roZz%gGscmw>UlF2s4P}fTVQgR1u>K22jr_~pZ?p)jny6V*24qO7s
zMHS8-`nWm|xjw|k4ik2A%YklcLbK{P+Rv85Ganv749`1L&4&-~){k6YydZFsJ_8#)
zj7oGVZv;bsl%us=>mXMXBmX)LYraX>{LZpY^;wwdC1CiB$(DjYw5v1#n9xbk7%w5T
zZG|<R3KK;|RrmCC$gC!&Plh$Md8`MS-7f^CVWa$1;Z0EU%|Z?~5hALwMswbhxhp0N
z<A1dPR>2nOU)LSKw|pJk`P;7kcV1@3ZC9V<Go7N~Bg+)ix)Xz+TRdFzZ|EMffwS@b
z+lf|#s8-uQxVW90rW3<+=ayfpcaF^<5(BMcu~e>bT;84WZUf^6XR;wNfUBxhWk~#n
zt4Ln5t5yWMQ<;a~nRHn*j(K9$1)GO!-y}V%xGysJwN>q)*lX7%dhwOsckpi%W)-Ov
z=b)H1GMB28>P|h>Y`%9*9zG#kq#_Q@$j4frg1vqn|2ep7AwZ7eC|rVC;gFlY))zL;
znd)tNR^)9Q`b=J*P4C|oIq!%rE{?(0%n-pP!~RyW)!G8oO`tj#2=L&3ywcLs%0Tou
zaiS|3w?%}bSL?<5UrFA=`le{gk<EHjf8OubG#wIB3JVL%tq@_=B~DmkmUXY}y>F2n
zd{))8u&BM>Q(>_=o7|=A`*)_0i?sCtxqe5p=hlgjOZ%%S&OKp2o*!;?DO1n%U6fee
z+I1qTf;Za<jFZ*1r$S;629_TBcA1<0`swp#v#~aDQ9P?cq)%<U^jZ0a>!(aBvWfAV
zOfTlrhc7RdW)FCyd%w@_!*gN#^wMt6s$~(k;)us9wuQG$eY~Zfymp*bU1Dk9WY2Y~
zPV;8r*s8`?EkiHwH|goF>*Qa(^%K(UxuNXM@n1fzvliyjRph76B8BVh22Dz*M~7QU
zR$dz?hyVMbY08}Mk{t^l!*!7&`WOFwFh5#Cg#yAthCPWM%XWR115RGX-;SG&vjdY6
z6B-32(P}oE4Z9O^g~NNNN2L|@4tc7?1x%8=D|x$%H9x5Cr4;+jZ+#xObc@ol8Q#<?
z%sktxwPTeOU`pPb7_dz3c_40cnGx+&XZ$Mf@~aiOWqrzFlPkwm-EpS+>?<lao!y&G
zIl*1Wx*rV-n{(3bWjAhuH#v#ElhXgTo%ihM_DcZN3Sv@iSht<Bx=g#&5xl^{qoBwx
z_*%U~;+KS1U3x3a4JDr-f##`g=AbUC_|97;?cn|8sYCwak{;d6)K>(O8Yy*_11@#T
zw(VvYyPhuM47(Qw7U%BvWnF!mVtC~d#NPrb`G-wu?-trUSZsHv4|KhXzG^cI?W&Kw
zLOShv6fwAteD2jjY?W?Xc<b{O^CR)F@ldVNa%;*6qd_iFBI?BO-02f*(%$Lfc(K$B
zGtRyV)^3NHOs3$BeziT%qr<#<iUY5O6{m~&3Hc`5v;{ev>1o%jTNDpkvtljMzn||_
zwUX>PlSOAURbgl`q)*hGHBHDiTVZ?<<|a*TOxK+%{`aHpR+)G;a`dKo#fWsBlgRKO
zlecQ#Wf#kMt!I0=E1K9F#_mH`Ez9{Ei>CyH>cx7(!k}aL|C!67r#A>{DD}9+^G!A3
zwkx5ppY7xhx3mA}W4P{lCMj#O%;nxIVPT&C<~0{pZOul*s7`ilfB^|2dzf^|DgNJ&
zY}!J3Wc=tC6}QhhDYElvAS<8zuD<dkN(eTAs5WqQseAtUpLgK@{ilx84+hceX=`EO
z)7l6Muewg-FPyyqD}nqFz<4sfieg64N5RghuAX|F_IQvJR`g@daqic#X#n5Z^Uqg%
z7Z+2f4vHxfKx92+E)YaYhUJ1`ZuFoHhIK$`L7}a|yy30SnhI%~2pJ<bM&>>=t-?5|
zN*cuh=@-wb<C{(f*EwN60V^Y*vVc&*A_rAl<DQ4tRuyZfN(A}EEiHfR9JsZg6>6!d
zf?N~A<1I=S7S2sM)0)mKQ-QIA0RbM5T3e6NJ>WWv?D9M}jy*mV&vLW$5`(Iga?o+X
z4(P97@y@5-{G`!p6Of;iqr&)f3tZg==C^=lhQ|)S1XPX`pDe5w$<bvD<w9ynvf!gw
z3B{y>Q>#$$d@B{HsJFw9f=@x4K73}O()NaXmRm&+x>SmbU2f{mge!hC4~o0FxxF;!
z{!ws0{5jE&*V>(PRn5(FPCMBd8E;KB0Xb73K!>!J802tMO-K_Bro;qyEnszkk-6M_
z&|o$iO6P$xEqL+ZA;Yw)YP<<giR?I<LEi{W!ErSG{cMk7)t|l2Vtq}Q*#AL?H4mtY
zBqcCg5QnUMM4i52%NP<@RYg=%2E{cw!*dJNj3ufJgHe@+t^hnoKtrP<75>#baCJ;|
zf}bY;x40O%#3imwc<~(NHwJtwuD!gx#4sJQj6rr!i3#K$lEQ>AEJ^!4vvpR;9TvE5
z0^ZjaCd{405>FC~tX`h)w>%RF;0RG+L?5gk86RJ3AuF&DxIA=pf+@JzXyE+pERbWP
z_Ev?PzP|oA@o!LC;rKm3Lk@qQyRr`(OU6(aJ2q^r1%{8{Z<+Gl+;RYY*2xyuKMIN~
zc189FeUJIx`fy~!1HUEueAvlchTnRP93B1eEte~N*s?wRTMknUp80Ms(BbiB1e<Eg
z&5g2SBVZG*)Ghz*zsrB0J&NV?_DwMbLqvU4=kWN-)Uo&cB^z2+7W1I$R??v9Psc`q
zwMSxj>Gf-l#_3Ohb^=HmENceD7>GmrtRy+#YUMOH2?8dX%d_fsEr>B7!;UP5uFPax
z4W>x=`X+B4Ij27?{sJU!Max$4uD^lrMHDcEKL>?6If?Lpig49@(3~S`bRv7mXGF;F
zs+Zw>-Wj-}&{zH;{L<Hto&X%JWv~2l4z4`}yyaHVjj`jUJ{D)~IRl)*5d!(>U_(47
z&sdRfA0;hN^?*LcY7iQIKY63po*!5d4Kca|k`5-8e?4;Gs)Pp>vSpu^Iu$!8UT3up
z&PupoKmbA?frz#h&si>2v^@U3wG}`k?Q*(mPZG5RA*_JDYfq*QI@Vd?Rs80b`>hdj
zp1^RM3S<yjTkiekdso1ofRq)kh?kpl4l~W;R|5GHq@~Ii>A9}EBM~N?TT!@r!|j=_
zmCPmbL^i2l-lh8|KY-(2;V|18Tao79Yn7!`^g}_-wPMARL+(C+DlAhNwlgv3|7Nd&
z{1js-(`vqOce2^p_;bO`HI*CsywJswg8tWEx7|()y(u<H-@ShgO~a9Dq1`S@<-zm%
zbC1f#4lLV)nX%H>o5{~vP8Q|&Eb2aK$qGkq;R@~Mf9f-OqZZ@Jt0w5NWO=+(tCLxe
z?1xJ<E+Uwd*w%&Xc>1U8tfu)<orU}Q+Ntb(X!T^l+^@JnGxuj?u#lfb&R$#Lo}3Jf
zGx=4x>PWSp41^XA;~=+e&wMJa+Z{+Q+D$1_{CCh)pq!rEocOd!(a$Q`jc&<JrrBof
zlZeOYFQh*`-_mwZ9<!xS*fE%3SDdt%U{ws4nZ&yG{e3aTrc!T_;#VW=mw!lV?jh)w
z5TdTN`*YKbq3e%l{{Ev^L|5K({q2dKKT;ZI-p|RXtxUNO-O-z>ho1*S@@zb^z-3!z
zwM+VvOuy@$9;+z*okwDY`wnz#4$b?%%~n5Mr|@>aOGd_T3N$vrRMd~Q`>{3s%9<t8
z=DbIzX4+LN;z}?xQLELfY~zFL=Caw%u&(X-=Nj_kf!_gEYoe$y1u%t(Oe0Paos5Vh
zD-ypwEz)b(_k9$oYPbG69W*LktgR$>+j${Qr);(V=7#_Gpn`w0x~q@83T(~)$A{C9
z!^C@JM_3NtgBRET;p1H>A$<vCIEZ->_ViK+&xV?bM5i0@b92B71#QF+Kmrc7?V$VS
zEJTqAq9p59fi8HlUdBG^d3HWOek97Hs_JlYVEDB<3bI@WFBhws!SK|)-9aX)h_GlF
zf&&A|AF<~^dI%JQA0&J+IQ`!t?*IC4V*+L%QpY02>7gc$4k1<?w3@6A|GOSOT#aN~
zg-*9#SbRd4ZWfk$5LxKriUij)**7G@5F;&!h1W}rutFj94v4ZQggXaf-xpLZd?7#-
z5dp0UMyjgqD+Ho=@smy@TwULX5Yi7`k2?YzzBXj%Ta!o!+zrB%i{uYyuTd2y5~2v;
z5D~72Fs?)Kz3=Y^-#bm^SBQy?B72Ht%B<8#uMp#K3LW`*0@65Ic_;Di)areo*5z!H
zWyG`ZROVRs@W=e!>jT5V>g%WTeddJJm?+@Gz=dVcbyo}RZ6}j14Mp|@1&WomHMSr0
zW#DM0bP{*J_C2}}H_L0V69^)rli_p3jaw#M6f=Ml6M`h2o?OF&giZ$k|IP#Hgh~J|
z-lT8Ln-gOQLkgyNiYcVfU*E>=exp?n31|J6=Maav?d!LZW)3u92eAZa+82-nAlM=#
z`vI@A+mB&;5EdTdz`>B?GtgDO!;50H<zM8%_FV4HKuM_LpTj%sgQm-*@2}obA|>f1
zx-_RW67hw2dzaHhal9+gT)({!Xic?7%6dy^Q@Oq}(R2_VocZs;b~a-@nceN<twxok
z1Wq&Pvqf2Vobh|@qlR9hhs+7vJ(M32A%bG}z3B!y5uy3U&={<sT9<Px_=*<zmKgCo
z2v0%OjUlNpu_B5RA$N55qR{UT9inLrtdF)DQ2KOO73+hI$fEa!??nM{iOvE~ak0R^
zy{)ZI2+!Hk<o)l;nZkX&oAyQIZy!fg%&;h)+70(SQ3~Z_fm0WPTmR@H(r7S?ic(F*
z|MajadTuwfbMK~~q~N{iGnAF<9;>xdzb%d=i79$7nK0G(9ratrql5;VDh=l-RW*F2
zOQ*6b(rrmmyboewfhOmvR;NBX!Fb|v5bXPFgbLx(%%%S`IU_90gq=?Sd<77>A_x&S
z<WAv&LO{Jq;}r%K8cZZ0)dEQ4Z(Bs{Fk=u0hNR;48$RKZ2-PLB#?W3+2Nk#Zfg1au
z?YtFt^i6GkVaNlZjohKkA{;0$fu(2Av4F5%A3{<U1;y2L-vP8ox3AoQ7C6vU(|tT+
zG3Xq=cDmV(4KX>CkGedBiKVz0Vc7yllw05We@j+lJs#R^YQl17judinR3sYr&<<)*
zdQALiW`XR9revZ^irBo1pKdwd^Jlf9^rO83(a_jrh2FM4(0eY;R5oAm){y#oTkPBJ
zzs52HEox$8L;?zk>Pm#bOc9tSIiphXJ!S+iBrxq|KeO)WgF2I<X|7Wtd0hbvrbJ|>
z<Z>NDy-)=is==dV*52avbkB7kHI`g@aRN#BpAqVkKaT4Sqr~vg=m!SKj4w*>(&yu$
z<rHUMl?t0^o-HY6tM(LAFnpef%cWOcdX?}~*7HskbAl-e;F^bHh<pkJM~(=)2LQ-z
z$%hXCpYb?9xIx6bBkX(~5NI2!e^sa7jWFQ*{rg*Ng{b*lm7w^8CwvfqyA0<39z=XQ
zMDV-nsKL!U`XRN+sTb=#lo3K~^Xjq7%WpNLvpOmCC^DPk4U|$Jx{g?0+(xFSWwJW?
zhV;VpL%dNaQXJZcNv@9)m{f6+D3V0#k`(q5HBU8)u-_S~XufB{52nGQ<hcpb>*uJO
zWNedT@<0jPUvCRqxW5!?`-{nZ%T5yy_mP<G(%+^to8D{UXYC|EZJk&}ii?|!R>Uwd
zpHr`nOZ-EBjVl~rq}iJ5<59HXih{Dhc@TW;7qEX<7`gjLg+?p(8~3)0hdRWbgscCq
zo__P*)vhEmV6ID25LaJ#r+NojGr<vGoKkuWKY4pLF!rVJD^%le>hwIK1d>t-Xs~eh
zpDW^_ZThb#`-cwd?5<t>4u(MzalSyZ2>4aGVF*cP9ShXh63&+*sHX8RSIBBZF)_R_
z9y+Sp9zwndf}}p^Nm%z5;ov~-5N?pbLwijt#nl57aVG=|9Fnykk|FuuT%E5$4BrJr
zmIgoq3gX6sXfDC^yO3zve70ZI<OVkuC~SuTl}{L|CIl1rA=3wuN%EK|5*candV>$q
zqy!fj0#}5C3sxt0xt`?{4<7}_jCX@c_wI11G&OWvV%aE6GJoV`G>QT(r@#_l`tvX=
zs-`&0H%-BfQCNyfcvw7b<ue%Lj`Fri_+dty86;gO``WkZ^MO`|E#rSux#cN-Zi<wt
zH2&MYwdTG)qItdh&quJ{W?}lwCy-tO?~7?o+;>7}f=EW1r~agK0Wp;sRYUbA*mwl~
zyqYGF)XQ3>tR`!!e9YcAbcyb}2e+>qL|X25E>km3qli)Bq0jy$@}o!kYNVRTffW1C
zfV@W8{ICiaT=rMZnfp$X4;$Y{JP#DZQ&X6xMH|5}7-kaX#bv)!WRqcddKy@3SoiR@
zt5`frBx?x0f<~<#$!O)b0HnCok$#8c0(6rj(hk|3n;9oO$=eXsLAQG7POBLxN@*`i
zL@Moo*E42gV0~%QWhUb(j>+5`H~jm<4{_?M3n|4QypKJv6oxWpS+nVtkcRlGE~Xb<
zpNisPg;4sVIC*FUi_e`qzN6u($G4I&FwiObO3Z9VrJD6N5Ghc`zMn8usH8_oM26nE
zfJ*<nm=Py+z?<J0q#Z0Z$CuY#cYa{B=>H(4VpglyW|fUJ<Son1zzlmXz($<!Pc!xh
zFHs$n)fh|t>rpzsC_Xx7*I4`Or214d!Hh8P_uAX8tGi2|N_7rLEp45q+<02q>OSK>
zyR{Kbs%H66AdPwL#|Vw<!jqgruXUmM+2q#(@y2S$PPXXh*HA7Ex~(Ak2zI&qz%ZP)
z=n*h<(91@$NbBDVlKC)9%T9pg@7Rdl%?>$lVhk=>YmS*Yr~Q79#G_=_MEow~t>;G?
zm_6*72emlSF=<T{2h9{UvRb3p!>R{}yFohyDOi4f|Hhu|)%<xT3cESd@`Sp6*`L3N
zN`gZ~GXwl!aPIV*RBxX{7>)s;&#D9W&R4e(x&{z=B4A_~{97(oBQNqq{yc`<hx-UX
zy?N=z_T>EB_|2P7tY0QU90;KeVTgDR;6KUgZ5JXCvqldhCm4=o0k%m*NjXHj=Use#
zDIADU0~JNa4o~zJMS5Jv7P2m16s;8R-=@m4EkvPJ`1nV1%h74e@!r3q-I|N_WvPkt
z5JSrB;i=Zao`mojS(5yOwh#SQMg>u9!VVY@Smk(H%Rg$qlS}>Zd*bq#-J>o?9W6jK
zB}kGjCBey*{MplhEt7)0s{->^{EkK_Mb5U0mu$DLm%HICGjesUFOD^Z&M{ovyjt-U
z`P_J<N!L)y+D3&91u?W7>XcWgM>P@*LWesRQLn$T@Z-qb=JY^K)4AO=T7bt&k7Q9R
zEp3e+_S<7!_5M~VR?Xu)q(FOm|2++R&9XNnsNeMmj1P8931p%r9;@CSL!K$m{#}KS
z6t938AE#3%ZAgqQa(PMqcXzH&fiV>ssWC9XS*=X_gS1i4+mF3#6D5Cxrp4Q6hL})V
z)5D1|4%*vst~dNpA6>no$@aRJuhPV*s7NE<c8r4>R*h6WjYEfVee{DoCc6n+mrpco
zSJ5=Jx$~LPW=fW2|Ce)L-blLarxA5NR^+CkWP675)`qf}es`n$Of+-ryGm92Xpo5U
zFketAbe5l^Y^&)Cqe#=>S&ebX8L*a6B`QDLR~{s4JG|!Bn|H(S#b>D}q#Ze%*H@4u
z7tHKRbBZ3GN8M?31Y!Yn`<Bh*+@hVt$hpWJYo)YA3Zl>LUUM30#+|hVUWX@UWj&U~
z^Y=%Yl`?!ojK}s`Y<OK1Q?helwZ`}#;jQnjDC6mN<}&mq#iIB`4QT~r*iIx58?U=I
zh3h`8if=FryKS|v(iQq*O6$0%xnq{Dt1)i>7eNW_N$Mo2$u4(lwm2Pw9p}hqn8bMP
zmhB#iMBX>=6?lJco!~_3%fTNRf0$Oh-$?z2wrmAiTTRH@2*Wrvv5ZX*<B6Hd_39Ub
z-rl*vZ2akzVU|D7t4yfVSaW`u?A=1Dvo>PY4w+qTa#rhRyG3X3;kd$?_CT47G3Ncd
zjxWwldwjhb$+x6x@HF6U+UR^czq*Z4K7W;}*>W=;R(6P(fIXgRY_rdd+uLrKcx>i7
z;dUgII(OXnln+}&t;EO00gbg--PXeOVaYsZC5M{$(%SojGKo;433<?P7m!p0z#-j}
zALp*A{mBosqstzzIjkfDcjnkcbGFC&8W-NLFvS{mUQ}s(xjf!P%!B})d@7EBq(pds
zztuu95yQ|m3U<?1C)SOp{EGE#Dj%zzFJmHZFx8;`*OK0^kl^tLPW%G=^&hUt02LCb
zc)V8SY850-7$$Dja{>-S#A-3lt)4HoJ@Ktr-QRky9tT0y9U>b1pvF=N#=QgBb?!q&
z3^pQx-+HDR9Xd+85EU_qkP5)eE@r3NLfCu{Bi(<-zl8dyhdCF}f9HO=o_g3g>~lBv
z*%@&g9KuGBst7ad#kP8T#DC$}#<9;raD5Nny8pXcihb?(XWad$pRR;R2b6=5C*1cK
zTH6r+{C%nKkr%dq895A{{yx6ad8wV}D2I*dlqyK~l8Su?aX$$9Ub`<8e7TUlz2v)x
z!upyf)2dOmyE-K?45_FA14_bS!F!YGwKMYy-TSIA)!xAbU!2zYHJvHVbEGiLlAe!z
zoSMCSS*`xzMcX~Y5Pc6ZS`L?vryIjx*Qv97w#Y;J%!p{+<!M~J_sxxovv<C+xLd#I
zzAy5y=VWJF*&k_w3h${7ha<RTlbFZ`2BRruD06pq+?187uvf{3lYRB?(5@!jsa4&&
z!%3{I;Vu<Jk4Mg*(s^8=)A2+0W_9*HHLs$7huU40*$`xY57t<U-KUY(<<qW>n8;{~
z$W&w1k2pGz$c0Och64-<A{TF@3Fy(bt5Fh7z4Gdg;?Lz7>9ca$>66Ksgt|@m`QP<&
zHNVa)Rb_L`Uf>JQZgad_tW`X_OoTR<`{S7j$rJluCZr-hm)ryCdi~kr&J3Jr0f7-H
zQhvDh|Bwz+$S8wQ;-2RiMcbur)*08hudFLitW){peV`=3V?ZX9TJWM*;NQYiI;6i3
z@_ltY4Wza!OQPYORy4V(oGom&hwnRIpJxRWWPJG2)>p(9P$pVpF*k{)AQoVu72}dA
zDxK`u$or)jdPR65u<NW>%tI5VYyL%*+(^^@AP+D_9^TLHjKBXtfw_*@@7+TCPk*#X
z<l&u+4y%{nCHRQizA{)n$yyS5+ug^i!Z3TMIKcdDXt}Y~e7hHQZ-o{!qK0IUCAmmp
zp<=doLS)uM>`rmGIj+gUad)>|1s66dyaD+=j!e9IDA5ANcQ}Jumf8>6ugNy<{zVDV
z$|I*n;=%sf{#V+>uc=QNnOdIip7*shjo<Zq^S|PbcdPtvaAjLvzuxtEiNx`#!!bz~
z4UM4xh@j2s(&}Pn<c`fm$-Nqqhf6*dSXX+24a?jhWe&NM_W;S#tCV^2l_@Rn6VM~{
zL_UB{GQEj*x<p#<-@y*#{N9feXpky5SD79@&hi)kFc3K9pXelz<;@U0zG;&-s^+c?
z4iB*Jt-!XWI;Gt83@xQ_aZUQ+k~>GZQ|glx+8Z^U^dASO56s!Q=k@+7JCU~C%zH$s
z5lBUMUEPf)IQVh=m|wr>LD@IMx+z^^<-^Q{$NbN@_jLL;(fRso0}A!X`+5#FuLO+r
zr-}|`S@M)ky?^)I#*Upmy=Qr}i4~}3?*G17rNhll|I6hHg95Ip1Cx8JptcClnN}JI
z;30Vkp^XVDqX`mKRfQC4j5OJTVq2zQdYP~wlvlRi#tr&wJV#Yrn%BSbkBQwFMp{~)
z$?JbzyJCCaW6ik4y^v>K<8C&Lchs}pSaH^r8vAYKC7W)C&Vb$>0liThUeS_4%PXs9
zkYIyOfvC7xoS-Kq)VkfBCYGX2C`7h=q;{vJH9a{L3#~6iWyiyYcqYQ|wqL5Ym)6j*
zL@vD%L~Foi0i#(Q&28{H!aINlVM2{}%5(oSHdT3b9m|oJX*)`tNi~2RM<^8PSR_>H
zcTr=pBk25SOLh}WYQbRIE!Ur7r*WhTCFp_fvK6X@FY^R(4^#WM<-#lZ1{EO`|L3$q
z&;Gcycsj6ETTUu}U$uei<BG<b47EEYZK|}=U39{Sx8D_nw_j$o2QUw^W1}K^0S@D1
zz0v%ACc*yHi2$m^#<>+_4lmYX*Pw3&841jN^8lz_Z2J8*Ck{cDR7A8IuD0&`un`+0
zqT(O9FC!!53`iS-i!Vj~C|-9`Tq6TQ{vSxw0eubj%N@wq6Z$u*6LnCdgeWTd3*`gE
z`<_$H_K=0jb9#Y@j)CSGb=3<>itHB(j1uv0kv8}kcjYMVzzivW@U;ZnE9OC!aY@kW
zZ_gYHQiP<<up0w_C{yJiJVx<^ZHZNT^)W5nxN0hcCodHkEhxXJ$sV=atiOC7W)l?1
zAsZOZ9iXNd<4B*+E?4s%l}6p<qGt<x@=R*FDUFr?c?3NpW~exxfX+x*ZlDH#t<j;F
zU;qNb5W>~KK|=v_n@NpG5nGgi@F25|>oGcZCPnu{zH@3K9H_ZLM5;g)RAi_NzC+je
zuN3rg?o?}Gyf6L_jjZAs&wI<L|5pnT82E%Ld1CGyY3siSTGq)e2r=Ma2R%cu4D2dz
zq1>a7n76-o&bHKq)|uXM^=LlaMte#S>(RIYJNA7d;}1w^MXN3y#ZGwna1;gXI^<g7
zCu}+Ditu}nihddR@QGo%+Lfqbfz$sp%}RDQ@e8T<y<QY;52tKu7p7S-qK4S9v(kDV
z@3R}e!+!nug0tYs_vQQT=Xby4QhDFJOf?+J?DaaPE!~enEkK96V#X4QWJFev$n{n;
z619PIZ&z9km;02&lb!Ben@{wV#Br5Z3iwkMLn}Cf^&gmQDQsbEyT0N|{OoLqmwh#O
z9>}r$b%q(+1>^NDi?IarTiiA)L8DRj)lp0-MP%dv3y${>`8c?QBVST*#`9se{dwyU
zg^Rmd<9A7RcE2d7VI2E@%IL<GI3^7(CI+}Mf}D9{)noEUv%fCOHQR<T4|yRSY+mF3
zv%;LJ7+e{>wJ++YkK$jZA>fuq#g+W!${@7*^74@Tc}iEsUW6DP&3j_QY0~T-n}%9G
zy-4JHlxU%B@BGs{W5}l7#svo-aIqNJ2N^1x5S`ESEU{vvk*EeGm#7cCNxwagM)UO7
zk9LLXgS5WTs#ZU&{m=WZVRsHr?pbzq@Vr$*9({vdJ>ktE&malKg}fOy-Z(i1CT)8v
z#b0PK>9Yhmj`Xi72-;`wob}EX`x#UY{@Zv+iSyH4iBZhcj17yg0`>3st!=hSW&_u|
zp7||7NJ>)v&4!p%3TY8Hvu^C{$4^>rwU1WXma(%0HU<V!Y6nR&`X}fG{&*a~Ja%T7
zUrNZ34{>_MT$LPh5hn7Y`cmlm1jXryMU6(f3$#e;AFXk)uxTspNhI-ToqMS9KHUhv
zrWxdS_r`kG$;?^gxgQw6#rgN7ouZqfoi0M#?h#VQ@Hc~3Myia``1ptHXxqDn_485*
zQO~(|YF{!PF?PS|Q=RusYz;8e#5NszUfFvk$A$jw=chGfPGRM)!CQTYB@L$r9kj;V
zA^vI15{h-l?d*DA`g3m2x6npqGz^Ev-AB7!HPX^>@GB`oKa0FCri`P-&}&V>DYfv}
z{qlf-KvPuI?6WClVD#@P{`Sa(Kn)YUUJBBF<2Ua3+J8*3t?Rpy>yD#%Bsyeca!R*c
zWJts@-Ly)*%kO4Czuo!E<aQz3_f@%ubHnkFWp62#4SX-1aE%m}$LN6*PWcv`ptDnI
z3<5osQW2V}Ds+p!AL;-HFaXob5T^~)tF9t09^=;!tE=69la7X^Vanz&9hMfTCr^G<
z*vO=Z&Adl0Gm0`ICgNW3-<h>HMH&bR+p0ybsh)h;q|)(qnt1QCDc590hCbQrkN#8g
zW`_sbyF5h}9KV_r8_rtUEmpoM+&ym_-CJ5(;EKMvb?eZaq8W9{Mr*28e!e4@B=ohZ
z!q;J=%`=?o%JLgmoX~Cvtc^1z0fG|5o92H!?}Y~`vZ7HbXwP)LTOhHL{OWhLhZ)hR
zDCEBH^4a&5Uvg^J_&&*7d`vM#n!dNI%LYWA9(SkgWGL_9PLW6+)t$(4hJ*0;L{n8&
zkue!iETCp7F;PGwtt&#*g&X~#@~PkB#S#6-dCd6ISb@J#QdsUDbH=j{#O2iGzxfo5
z2is_r!l#<b{wU-o+W<`>J$~*`U)oshm!bEDH#%tU_5Z%b&h<zlr<4zesgJlOo8x}f
z>wg@!uiHsb7p72UZZ0a)O#NrPdfGPojw-haDgA9S&x>CBU?Iw~6q$+cMfMaY-2oLi
zll~<Z#2AA&2>gfl9MONa|NDD&*mZ}EKT|4Yz4-R=6d(eJ&>uBv^NAq>MOC$=d;y}5
z5LLOa*LxA^R-nOq3{|6-o7r}64&@noBygVvHMH*9w3!WW7W*r;joPu1nkkWf#~G$7
zCb$><eEUx$r4&%{^r?fOyN2T$%G@EnrK>l3H#>QRAt(Y*NL*SfLB-`sn6kX|3rakc
z9MiCbpuzj~JiSoIhbY~uji(5vtk04yqJNAR_9HwnBG)~JRz=|hTHuHa4lEXGw6+XX
zXh7VW`W~+*QerCgb?OySifIHA{~HXRnjRo_5|U6WC#R-|L+0fZoSmoqZ+ErD!z3jX
zR0$FGbqEvLe~*LSQO{l=Dqu&M%wlMAA@qbbc7+8RMxo$`txYFTNwqw<ld|6sGk3c2
zp?$`JTpSAC13a_fALL`rIw-l67$eM+P52RkNoK{o>LgSbXSj56w{^Sz9pAE($YnC}
zikJ}JsWAv+-}iXJD2ACEHH(2AN{va%>Fc5Qk~Q<oDJmIWtR!XVDqbB!T=&ZWE>}Ub
zCVciIMJYm$5Ihv=aFtd6xh-iKf+i_3f*Ik4FV&)G$Pokq)@f(L=*T8Li}nYWx@q|2
z{CPR=GkYdkev_kPQlSMHpjDe}+NE~{bKLJA%)<<>7f(?=pl_?j57MlEU>Kx8-&v{q
zT}D8LQmb%;2rcmSzdm+em(+tp+=aJUd+yzv-Gh(Aknu)jO6`|_#l;l;!rt>-lT$EB
zHL4!l39u&*RQNQ@MHeJ`cX}Vyfa$rs-VdnMoIdT{1<WwVfFP5Y6CcGNDc&ACYTaQ~
zhsdy}qnxjoYB2h0VcMjvI_#D>R>=uR^MZ{wnp-DBk|^4`(T)2-)o^y#T*L2oHMZi^
zW}7c>i?Pq6%M@g)%=)wfwl8I#^*Bi3t0N{oPkXphgaP}sea$O#fr8pWyg|+;uO_DO
zyg&bVD!q7BODU~bm&sgo<9o08P|zpoM89Ad@_j=3VyrspV`Q&)+fu|6g;6P}OK}9_
z6rV9mSuK(yS?Qh(`@D0Dr6e~|PZD}qXHUf|wSmu(c!37&zuXs{K2cKRVZ9yG1cGv$
z8<M{bH75yG{qYpSl<+#bT2vSanXq0p`<Qb*(G$@6Qa&VYt#Q93wWrA6ojE4A*x0Nb
zP15LBL@12a(Ew_JV{hf})xN&H4}CIaKh@{xJT-jAuW?CdbIN#<K@#^4&YJBtd;XoX
z^&O_RLs!s}<J-ZphH75QvlmCs17BkG2A2z>sWIxmRTzGvlu!&5U2g)n`?HtTzpzs5
zkI%}!y97miC~vf8H8vVvdHNg`V{y5uJViyTdu{l29@`VDd-OSUKeY3aa#TX-hXxhy
zb7ANSa7Ny;OLe$OafzueacM@wQ$V3Feo|njTKYS2_i;nSlbFn`-i`uHPkav9;bacD
z2WCvYtSk`3>aG<<lKrN6v6aJZyNKUafa5AvR%CK-!2!LRnL2sYe#;R{o9}sL0bPj1
zTMO^sjfofSi*Ap7*C&5xI+6<|_e}ZOWA8Vri-I1bQKuJ5U_fXM$Obp5uG-@!H9Trj
zjaqBn*PJ*lCy@;Eu1+G8Cp6o>4?a>pS9CQ?+;8Q6%LOM5-%=a8TkEK>-Joj%&a~C(
zKyae;{i_&R!{5EOrTSC9KhjAfe;v3c-T#$F-gf$p>N<6)@eb<L5EHKnZInOf{$Va!
zVDfpchtKy|qWGZ#`(qqxlz?8Vey25z-oAn=2VHV9opr0=T>2R1j@CQrr?-p7e4S=E
zyH==s3kKOw@ond4ZJ0P2&lx-EqY_M*_P@5!9UrVySEpgu>rUZFCy4cYV3|^1EX%ov
z>eSCrx}{|8@r|}yS)4tQ@ok5=KqkbD(nDq;XiO$cHP_rLi9;YKHVSmqqLfI=4R{e~
zl*##L!IivK(Q4{@54DOv@yW0&9lp`}x{zFlwDpZ&fs459By&)qmP6-jdFR9Ud)9Nu
z<g0JZJDa+sME<zGJG>>PP5c+WD+q`m3nt#0u@L_j{)saTIY8<;UcjpH<{yWb8?O)c
z>L}2PF4t<fbyS=8A@<Sfe$B*w(imf~6wA;m#^l|3?|U1;R}E?z_Seg-6xk&x;-dX6
zbsb-m0_e`atvuv2A6Zgno5y3M{160jR%m7K0x2UvM8rf{dvrUu_fIZ<Y?CXMXc3xp
zQhdKGAk&!Z)gjMw^)9SaD1?qmnj;hx6j4eVWBNAB?#PLQ%q>v%NpMb$$Ga4pr<MPc
z7hkI{AWrm|idrS>eEVa3<sfdac9r7~?-W$Apf1DBE6~D)g$9mP%VPfM3P_ZE-{$^k
zZc;O>$5?|AGsoZ+CB5Q+gi9R*Gw1y%%{Uqq>0F+ym){m~+&s7x8E1(0lZX4{UmO2)
zyc-Ctu&njmY^VY|CWh!o?9n(h|4D8kF)<#6Qg=mZ6bWwTyt6HhsFknF*iQ;}r$>Es
zMgPsBQIA%Z3ElVqK&=-1E?4D~mb=Lv0qpK-zOuJyxx~m$UhQd5kdSkv2xy;9Ir@lU
zmXTJViJhsWNGTzG`G^^89QP$@Je!6VON%?e?iy*@i#Y77l<%@3!4(C%i_NzPEcLGG
z%P=M45M6&6lC)h0s!-8W6zg{6HjM-KY+_X7C%L0&G*ue6Ih6Zf5l}Kwy+rQg+n|*W
z67TxpEc~7{pPE^nMon4?iE(y~CMPaQAv&YYMm{~P=bnnj_%Z*yg3^MycsxIigD|tf
zu2`ZHB}RZef8^4oCwk^#5*{USMCaJ%RRtyb@7@cu0|)(-$0KsfS64N^k_!)ewJ|6M
zqu0m$E<_}xo}<0N=xm*faNif)N6GnEU8RMC96;Hhb==XEW&3rE8BG_-$wKg_D0WMQ
zf((1cC^?gXv_V+V?{)2QZU>KRPAoHIn#F*zBs>K}-+*D_IF@NW!yuGuQM|kTgpsSg
z!-B@&SBg&lec2ck<f!aFo3<_D3BzbzQuNJW=3R8BM8m`&{U?7L@BR5*v?9eH_>}o{
zie|K7$jCZQyr~RpUzZ7F(!>4Hf>4PHHkhq{P1yJC9zNkD2%wkgpgto(kwH=*N#}N&
zWAEJ-vVG8X@;)N3(Ch;lkxBeYlM9wD@jE{JI2!Gc;?rE(=l(RaIs4ynaB?)pKzbH8
zsJl{`g%)|oPx{j{HdQ=Yl2IT#lb(y-{*k#RK`+UT-!n5hc(HzfX1k+)`2ET*n0XK`
zj>yG#)IqG_4GiXH^<g)meN@sY^i}r9_4K@UC&JUy{n=PHvCTmWQe!PB(gda_Uz*8H
zxkHskd0!aR8_6Y@&=`d`xxH1$f#ysdx?6aW&&8~SlrZ0{&-1Aqsp8#!x!uCz+$~;j
zkS$G*h4v(GdSoipT}o8r+hphlRqepEq*7p5NZyAZz7(^kmUJpX%+kc9gUhL#Y%6Y$
z7QlxTDEqz3O)vdRCLiN3{qf+m+VHha<>|Jk(SNh}F?i7{eQi(@m>-kI@OQ5?j&f$r
za0R|xs>ZnS(Iw7iT?_T1v;XZM=;EXMZV2DGFVCgBFqb~;#bZniSv+2fuEAn=>Ei=2
zZ@XUZAyYy6CI@4dwhG)>bBAkZ{6MLDI9kN7Wx`(OeZVo3m&s?W*fgMzu&?|}E+n4M
zXxc;*ig&_)E{i8Q#~-6lhmMCe$;A6yv3}FMcl?eh9_lBbr5cY`qvY3kckx>|KN-2Z
zqujJom2PF~D{n>$a4)3fNMHrfgx*s~YI=j(lYm@j!oM%ZPp?+C`z5`mSt}%~_<ddd
zraciEz7ugvsXwWQH#ch@-MwIZ_8g?RpO~L0b2T^>C$Y(&Wuj}`>tjQy>F2m!OlQwE
zY^=6T8|K1`5|1}k5Es22$B<50*+CpYegFE`1ov~M;JJF}dq(-Lmv8fqyP{)d(@asu
zFhC4bIgUWHc*P})cYN?C&$|>Bx52Tl$)q5>H|dk3I>r@sJK`87qVbY~KPx^l>)aOI
z8EGint0(ThQxP>Ck4rpqzd;n8B)~t4bqq)IJ<`@b`a29XlMw@?Typ=#kGPE7NUv%~
z2=@b1Mk#}T6Dp80tto7%zT-l>scNA5fFUN<(Gka^6!$I=tw~hv-xi@d`ro@Pv9Yho
z*x9EBwA4_3$vKM=9N~1)s6P2W0SYAb+jB*RC<)MY9kEz!`K<yOgXJ-^H`*|_EsZG*
z!^G6oBqT{`sx0-&k2&!&n&a6@4XsveMUEs%kYo|863f5S4Fv{tv>{i2_v*wn&4$9%
zN`@6QEfdUjpsgTCRUSx^BzY>{EJFiK9hMkFPEA7;B~W4MJZ>1PRx~V_PUUX13`wx?
z>d_3OU<r5Kwi)Zzx)gT0VaMumlCSRjL!)JUXI~cK0L2Zx1sph$MW(EPfT1sv$A=!h
z8C_j*Sf&BXG9glXgZ99&X}owOhk!(|vAcwLEDVzbLBJ3X>UhhYn-GqKabmcH7k-fg
zr2_7}E(P^)3b}Fs1Pq&dig@T(w_tN6loWuvvE<p8#&GmP0ir-~Q-2nz{AP5EE(K9p
z0|EjhB?f_%obN5msB6c{c5lu5N#J%NuoypA#@@ge1i5O%RiO5v@@zvN@YbDxAV7#!
ztjS$_0<^};-PuUCEc<y_Tmd7381urlEoj$_PYf_M6U?+A%OR9C3qO8&6rwx_fMIQy
zg}HR3YO|UOw&JQbyi;}U?K2VF2e;Fz>lt8}U;qMvFhntrAG|aQK@Nd{ArUI${(H7n
zdvrMMX!kk8us4N7L9VKnb7l}%ED16Ss)--%9fK&%L7*0TqZVe<aVO+~tsqihRxQHY
zTbPi_X1Jx90Am%8Hj17^0%6u#ah5frPz(yqq{tQKoH>XP>iNPpN-h|TfzVW4h(*4^
zN<a`)Bz*D7Ar#6C0%h2o$l&+?(_1{#D-;U2G<^xyjifWDLstRSvWX{tbP*%d8p5)L
zEqx{Y)+2W~7X=jo-}vzeC@fF{tc|O9>%BX`sEJZEi-KmyK4$<-&7J{K*1f$TTkZ5W
ztpoAAzu7*FRe8?{01FoWW#Ti~TD%GRI>Tv#e9^@2BLz&PbgT&**wj<Nsp&qv?S@JG
z%566<wIOJA*{N-}%A0C2GpS`+wo-ysLX#~pwM^c^PY#ukDVSIjF5!lCDh{41fPo^c
zF!XhqP>5H%^DhbJ=29rC<&KO;mJP<oqlL?5Rih>mDraKZvFTB>7fe7P2qG+|KsACW
zsu=+G24XN6L_VKiU^$kDY{9@+U(BI5DnOwcx*{4jZ(Qs2N#gux9=EZ_&`1`CP9$M5
zARaPs<8=|-IN<iG<wikvLs2S1R>G)z9yNJTL4+&?0nZR~?zvThAV~mg?vdhnXV+*f
z$EOPTr*E7^G$<kwGSCx~k%+B>NG*s|tGnJdpKw{5Q`z>aB^sz;huGD19fn~v-z2Ws
zLYPpHD+iplh9FAlNyIUdEaU7%4&4z6Yr4BIK3l?xks>}A*Rf?&FC>vwWM8{1CKQ+~
zfZea9AW3N`l8F8_y%?U(VQ3<Qu1FBFQorFY2nr-Q1VZy57IDr>aECEwj6tY`!z>ny
zD|<jNYd=JCGbU$sd~I(Y(SU%hJvjuFE=ZExa8w{Qc6Ndogy7+QNSuo+p9RtLtOr~6
zcFAa>0xHydAJ2!esu)BNu}T5U`QGjkoE|9vpxCiKk9=_r60vBV!fxNe3G6;R3q_(B
zSfe6Y-r#A-a(HkCPro>hkV3I8q2bpb7{KO@ePE2sPxf_O0jBGE8*4uXlSPp)&LA3D
zgNP{NdwXWl7azw%Z@C`ZwuC?h35*f!{be4f&dp(HHi0fV4NF*qSUBA9Y!E6)&itv!
z6wf}Ng;gF$L5m;}%p;jwkC;F}1^YmGMNv>J7M*@;S(a0NLP*7P%bt^$qi>t04%)^V
z@rfT@#HFbs0E+8+^7ze%ZtD2pbV7*Zp-IrXKDuGye?K#dso4@Ni{e)Xa(MGyH=ut_
z7f6+FP3Kfjx@&q3W5ACN7LY4w&`bdj+%yM^Z3Y!2+sKFzTPqU@0kYIujs)<`?6=T0
z+lPeI14)!X1bdH1AOggSk(mJ<N6CJaE)ys_f}o;3=IKnwS7*L}_-sG^bK)-%3WOmL
z0fI-kT#<;Vjga8ujjsUl@p&CXKx#Ibys=Cjx~75)Or)}iumul5lB+F388e}2Wds93
zghB#Rsd7hFI)qSAaO-GZ&iT)cU`&T;lmS$i$23ipN~OAbnx+9mFC!ETAQBc(DmS-M
z;1d>4hcOeTUINfOEETg%7`g_6Y71Rm*P&@zozjI_CQM6*B-&P4rfDtcpg(46Fb%Ew
zu8e1Sb!W41A=q1)t)CZ%f<hpIz2VLpTl4V#$`^FB(ocBYcU{+;9)Rxjql$o8n-A|}
z{P$KEMhS*dhO?b*RTPC}J`KyV>hdv69j0kOmL&+NEGCvvZ(|xISeAiWL5~pX)W10d
zAQYyqRunwVucw595&~kDL7`)EE{vR(#E$FxsxLGQz%(n@Q&^S;#^#lJfj|s>Ya|3^
zf>3Az*>V^q5(NQ6p<Hs_P1DezNETi`nnNOz##k}{UFZWL7D~nxrpFm3^3>kAmeaUv
zXCETbFhZd?`g#>mL4YC^P%t7;5eFp<#d4``jI#6+m}@-H5krXtAQGTh%0o9aFwvy|
zEtP=?S5nNknQ5rDLdw;Q41l4Rz|8sQWEvV0Vc?Iya2k0-gk}=FW7`x`rH$xS7?@>L
zZRDEFu$Kc=utV|)6pR3-S+3gDwoD!F4rW=E4dG%5$P@t6R!mve{J9zo3?|d76yJo{
z3o@f*f|-@^&jL=35#)4x^Zn+&Nu*U)wM=1^4L{S=zyu4F+M9KTrrLF?<Y-vB)7}gW
zmZi_vgTYpyRknK=fWZ-pyg?umhP8niyyed8>;;Jn#u!Yk2uc_Vg(57=gvku<^a93M
z-I!rn26PKJIZBY%2_y>K*gJ)Ejv|iv_F<M;_1+q~3L>{&TosBsm|?*vGB67;%rYRL
z5>lb{m>CL05l{$3OQ=|r+nP$lP=T7g2?hp3FIV+r2SUJ@1zju58?`dPFf~{LLr%+r
zmSvRCgAxG*Vj{2BpEE!~K`22_PY*_>M_`ss*u!8Oa{#lTmsQBF#lMCGRAPdn47FSY
zgS|meEEWM`;gvHKA|;^2#J1>Vy!GB4AcR1a#ChvD#$c+<eiaKPZv{x+doz;~0T2Wr
zTWsz{QYr(%d1Vow!+8EEm4V9KMj^l~T9wTk3IP<k?g<oUuY;kPPWua@gm~8)7}P>s
zE<%wt#G_pxWUP5JGSimPTCD2$+{~5R?zx~?C;}#+tM-^gL1DFYDXGl$gePd{!QU$j
z7BX5IGLaEww$&>i|G}CWfy}o@S64Sgg(0RCplTY9Pf%#4fbL)c+j?fPZC!mD%ranT
zC6E9?JRV0;omqKHaSs?w=;Z?VgCb<U-_$-2&-+aSTDbtHqcH|pwn2!!$y9qkgK3mO
z1<^LNAjBCnmSQ8<G7V^23GU5QUN1@nP-0uKwkrz?qJs5(>k$u9ge4URj*a27-#ZD3
zGDM{!{_xjA*!3%Ka70?m)a`M%lKxzdVE-8k69oOi3}QjON&&(cgKm`JKGVue__8W%
z1Gj8vd2AS3rVi769kakZ;?px`z%u673XEB{p-N4=Fe0OXeuzXsfMuYTb6~JwRlIBK
z^0#zb#;>d`SmjT#pknyqMP#x$XqwjYj5%iNFiZ_WNwPyDImNPNnLr={QISAt2__>@
zln{EA0G>NAgYUh343UrsS)5-734#PkQee`MEsN?U5F#NMib8<^3JsJHs3ilbtOnDx
zph)%^B$N;sx(<PuAcVrQ?DY?0%sJ<kN7>MI-Tu8Y&MtR|7}L}$YfRoc_WRqZSC=G(
zu(o$CLNY}t2*9$Mp7m&&T1BRJ;qf>vO9wMt;Q$%)G=?%w9hRk4U+00LYcNgSSwq?~
zqV4J7Mkg_-#P*OamWsBr-{VmjBnp*?un0j>pMb7wkUTtUEQFxawGadXlg;3NeRl*W
zXOa*I!$2a7hu&y{5J1y37={5=Rhy3QhOWXgHONX21VMyg*&D^?|KHx3$9R@qcl~qU
z`z=+!TDrQbtGa7?hIrazJZ)R!H6o4)5{nbEDvBaNCPG3)1`$UPAr#4Af<z=R@CT7>
zp$LLN0a-y2lBI|s*|Lb^ZA@a1?HSL$w7crpwf?sEd&}MV<GtU$RrR#-VE?{StMz_$
z&$;KGd*8d~-h0lurK@DB-j6oO?NN^lHi=b}wb}+NrIgHu#kwwcy7D#~wXvPcomm8g
zp~nSVWa=K$uaT;ha#$uTm2#&!=pW>zX^QurEX&YZ_uC<r#2bNA9zR2uuLoLZRHl4?
zCd!E|GO>;b0y3G=mMuy_nU&QQs!T=#2O^JL89p})M3CD8-Vxg_Wm)1Hv(udkA@cF$
zYd8^-Wer^MP1QPu(tR$Ly~pYl=cmn+L-ixr+1_Pk)nR;zFlY_TI_Gj5+xgsqtmt>6
zGkkfrTrbt0X8W0kdHeNu;f26T$Xv`9Tc2V3>;^x3?H3S2P+hBHLtl8W3gS4%2cC5E
zjG&P_NlKscrRcM$Q_oLs{~V>E$goeJ|5tv<JkI;7e~!)4{Pe&-=vW?rEV0B7D1;DX
zKH*5!xh`(by;~w*<P0ttvsR&1g4TvC(-Q|E&Kiu0QCcIV#aT;|By4SMT}ZaXMI6gI
zSGmRp8+2)NYEIZRSGmIMkGglJ*pMETM{AvLo=mh$qIHTk7G+%Dn7(pldL5_s9#5Y?
z&AWp4va)gm*FON6kNf~B1~JN4DMgk|(tD?{R-&~d&Fs|6=`I5AO|kiTIXI_q=KgY2
z1GN1)P2J+U-lxXmh?)cQD&ILjU}trkYOumiu*>?CL#38GCz27}IK>)E$F(@l>Nsx+
zd`Q$yP)g0*M2LKgX4-_%W1ZuXl^+^ObJ%oe#953N9_uVMTbr9A(VAROz1ob)a5_UP
zO&rHWQ8e{H##x2)24gcqVMtR$;2+#2qRvR1n_LV;uE}X~N^O09yF%N=$}TE1BM~mM
z&~#@tKe1^502O3OL_t)vQ_Z?g!CS&Gq!f&8eij?p0VPh%A*<<Cx`}`x{>~YURv002
zL(@C(79o88uPA4&q!MVp`eKQl$}xs3guyBZhY<N)FtwE`fpAnxgFLIfO^whMX_m0y
z^Fsw8{K!F}r+08M)P8UC1yXSkptA@nL+_=OVAn9Jiz}I-dSz}X55s$mk@({M^mvR(
zaaMEl`ZbI%V_d-cdW4A*d|9Jp7r!<zvUL_wat@Ie2N|URti{L#N`u9+QE{=b6a8mA
zkY>=0i^v?C^LMIW&Z@N7q{-3fIE+frnFS}1vIDjJUbR|S#RFj=uqr|Lp@R(-gZC(z
z>*d*34OcggNn=T+kjs0IkqO@FeB5}CmhsT|^I&r?R+;5|D&;b(L76bHluJ|Yawzny
zb%w{2rWw{cbT+ET@pn|oHnlQT%Vk_pr4(2KA+RdOg|;tFzL7c9A2Nak2Ox=b{Esh4
zT>Du{?_9w<P%<A!N+#68#Zi7)7NN>{1W3>O*L#o7x-fpfvUeyMkMv`oPa5H`xO{H!
z?Wf`~SwySU;a{D8nA@+s3u%6>i1eI1%mp~qud!m*NwW-rz(|Ypo_qEVJLz>cHaBK|
zlf8Fn9rrh2@~u*b@OcV}Ug|C99BG>77oL_wLXk&+l!`Qa-g$Il8SW1&ix{qC%KHa!
z(wf-#Nqoe&?ANH(LvClqw*i5{a4Zk<O~f$dN)%CPc3~^uc<j11-%P(w*jV95cRr9u
zR&^5;315q!#C0@jDp5#|)gddsMqo<_2Rb%`s0;70Mq!M_h8pKQN+m4zS}gjpG~Z6p
zD2~hlWnD#<61MFg8yk85bmJ~m1U9ueEOm8<HP%_%n0;xtMRU)7hiX<smn0RvimE2q
zM4*%=CE;UdAL8etU*)H-{5%A3Pu^w4*N~-zt7ebQ&B?nf`t5QKqf@l<*f1lg2CS}B
zD7g~ViY5pINfNWZKAQg6d4p03-aE1^#h83f{+@eAI$t;wZ6FY&F6K`gUt!B{v$|1b
z$K4pcFKnFL8MMp>9?(uBWsEV7BuQCa9ksWtv&gKAwHl){j5pj<cgebzIF9ejv1U{r
zjh-?-B`L@HfQqXkgN!})S>ITEe_cNQCDsPGK(VsA!khQ65eAM56-p&DQk8S6IDpB2
z41<7`Y8meXD&_J-gt~JECA+}(;8}G+j7A4ptBUVpwcpMx>-L}f@;X(lIf^^4j6|wr
z>Vcdcaw_sxqE&_;bBYq)69`Lw1wrlvkfm+Hz*8<IjDO&rRmDL~&c~T7-;DE~)3Z}L
zo$g2qsaddAVYR}R%V<4(P-F8=!)guZ1lCEM4{;*II>G8{jg?ifCeFu!SRLY*Q7*0E
zv3RSoUH~BuuwGIU@VT#9KJ~>q&cU0v&vD*U5`r|7RKk3NR%_kA$EO#`(mTwtHn&Ay
zQ!jGXIO1(vPjF-P#>{rUUU+A4R`qS$V^`<J#tS@AdN+I3*{Rwl_;`a>DO!X$XK=<~
z&Fr<LGZ}X1V8iGHYbTx5tj&twdrmd!2wlINks}9p)~ei29Y5I6>BxbP(J4kxS*!SB
z&hFX9I)N!pbOKk5Z$TggTE^JQ<hT?<5C-`_5y0l=I=4z&2;sT5>XF$|kqN-}qYdY+
zk+sTl``=4^=+HXD_M)`Jpw(J+i-V^8ZX2UhoUmw}vf>TrZA&T4h~qAKJVDP^jxg5p
zYTe-cIo+g^AMCin#=R4oIF>k@e|vjT5zaZJl>LJo=Nws<5!y1Q>u~nLXEo3&qvRLc
zngO0b<Ss{>L5<&R{xY{t-^bs){r9+4du*hv$8%!t;WG+bU<DH@DrN;KCDNt*p8f>(
znKB>W|M#;aNqoLm!zo<Pqo*jUWEUUa-@(1dqqV%in$}~j#u|flxlO5Lj9W3ttc_o_
z5GT-RL#>Lp7Ne56wpS@E=ktA<f(@}<Pe#g<;ghV~+2v>7`wIX#>A-`E4bFHHor3dt
zBf#fTBO1w<#UBzM1^mR@e`dt#>A(1Yz)v0jGv0pl2{zZ32O#sXJOEi@i62k|1iWTm
z;6I%H3;uTS8|>}u%`B^p=Cqw)Y`$5nRK`iOMSMk4La{cs_%v2?+KOnk5_Fn-rzL61
z{{H?n_7iwjzsRTRpXA3YA7X#^A-<h{gHIp)C;qbi5D(vYWG0<4hDMa2&FDf)YfZb|
zp74NDO48`Wv^rgcu~_G^)^g*<bOZX1dWnBh{~7*s<teW3+`w7GStrhOWX^#$hLcu=
z9W!LoG$o26_Vz|~Z;T>}QraDbFc#-?L;tcV9JH21-4r{;BW)gxRy3j*cc1B`zMA}R
zzI69<{Ee$0VP|Xc#dhDyzRK_4{atQsJ;a9D;wLu$8oS$*<M9j0|Kb~m|A%)R{smUl
zI!~sbB|FTpfuSm^JaYDSf}}Ll)9o4c6kmSz^JHr+q>+5sf0WIQfsydl^pAP&wQr)U
znp7wJTJ4{+x-$LFCxA4SG&*TtPg$A}B?-qTr<|VDNs=V@;24u9%@o~4LDpiuDrvV<
zLLS@!NS38^6WLF>0^o5xE&r6yy!?m!>iS2it<FA}TJ3&|Zv92-2XDoNW`xf;9F9N9
zzQ~`v^gJIacM*1tPV0n}Y;62^Xm=?$JpaA(@3?aGXF0#~!}v_}lX@g^o0ktyxUw2>
zHOy$l8Fv~T0^uodN4Ybb_hgwQP9z=OVPjp>&05@PY09BzQ)EP`=1!waIq-y6I{D_J
zhp5})mHH`R5c0^Hq|t10r_m)8uob4cr_~UWG$GAYe{-_aIp@{G4%M>Y;ab93OR`;2
z15dE$@8QZJ(OQG<#vmj=NmEj#NA%%=B*|zeS<x;?(}YAS>g{8s36WOt*oNe+rCAq>
zvu2z9wE^Ew)P_vrUZ}%oJ_l^z9)AnnN{6^6@x{SH97iPCfUX-yh)m(0y$4_aGH`<Z
zOMjkD(xFqz`e|^IBqm8@5fuQRe<~wBe}VeVZ*a};anfvXJy4u9&uC_OZvC58#d+IN
zwHXxcisLSsQk*n9Yy^@|eZIq6uO4#mY@MB|%=_PaqBtf>^7@_kL~)mHk`5Hs?&8vz
z6V+lZkeoy(G}AJbl4oT*nVM$Ddq+1($YlOaRYuTkx4Ba4>BINaX!P{cY_%z`D~?;I
zw9*bMCC6S>aoUQguBk#CPt<KwF{ZDtRwv@ds+*hs$$OC6Vlz!W{t`Fk`#EVgd8n*8
zZk$tE%cgpE4&&*ABi?c~B-Mg<?wUe>(C&2Dt&FDEan8|>V!~AR(?|61+Q}Jj-cp=4
zyQrN!D#klcv(;i{#Sr;Jq<fshqdLk4grR5q#^@%E$Mf;iU*-d6zr=p>DArhxlW+5o
zy3Hq}kMfh}A7*d&2GwBty?D+!+MO;d)#4z~IU3DVUaO~U*DTjpRX+kGiXxmf+^HWD
z2A)#jx$#I&XRYI`agROo)<VyoW_*U$H{pN2jKBTJq*`D6Bb@wOBj4w?Y4F?UALISc
z{}^xk;&<?W@!bdnZQbHuAAAJkJR(@*7oYht-t*~a@PFrr0chHDey8#4Jo>_4;YZ)`
zf&7Jewod$>ZGQ9Kukilkzsq;lo}_Nx$5UsY<xS`B=5X~H_VsP*(KmQlJptr%|MA%|
zch3|LZCcht$-&VP=HdB+!dXM36|uYE04=g@e*5@0c(n0ep1uCZ{9^JCxxRaYP=qv_
zP2|;#<L-;J^?Ufl=|AOz>4*5>)=v>urq6?sZSl$TkMY*?_j7man>=>%BRqHW3%vW~
zzshR5i7gq<;+N1?BY>`r_{7mix!wH$&+mSjpO5}N*Y~bd64RHXMoaD;pKumw+*M0e
z7!Hn(kS1V%Q?px@v|Wc<tv1q+))>y)5ot|vd~%FZ`KHo5N1y)}aNcszh_H4I5Ja}a
z#}9v+AJM<U`9rVp*VR8_e{Z&(Y>eit9ivTeV@+|^Y>{j#qCim@b4Iepa&+F}#=7Uc
zW4N-}<Ubz$F7J5dgH+`zyF0tns<|+<)||CstQ~czIMzq}&fR~@!}2j+-ufE9B>pkG
zJG%?h1R5iG?(QM`8zpwu3>%^0wfZ6Dz_VGEICr%lY1D6jtJ`0jWLd)9d-t#oN;@gm
zZI9{0P;Yc{KIBd;N9RqP-5%wu6zA<hyOfgD&l)&4+BaG&8l41dOGUYivt|clC)Y%+
zD8bmRf=+VYj<9CjSxfQU-2?VF%9t%pbyITZ_>4;E*;`XMSDPr`>?By5+p(EUXmq+H
zsU)ooq8oc@Xl88^Yb9xQ6TAyJJMZHCP9e9V)#+fYLYk0`s=_#daRJJNY_9k`nV8p{
zwNhN6Zz)rntrpd+3V~&#tdS<e_yFStYsLI)gH;-x80%rbrqD{!j2xXPX0@CjfQ+Qg
z9GhId_k^y*SK>e5o9XZIZ^T!)aqYWz0HU>`86|z2_@(q&e(U95=U=Y=H}<wKegL9m
z!i$G>uB?Qt?JIDec4xW`1CYs-C{FwJCUHc2NDjz{uaX(<Zrbl>nI%MVI-$2L%V>56
z4jfvgw4!(-SR_sonw{>T-ZY^Z#S?*%?RJCL>KzaPkE|w~w<IMWPplo1xXsH4C#+RN
zu9Z@n?SNn{n;4fejd=0kfbF%A4KgyFa_1}|6rR;RNhgX4P$Rb1yvY41j!BY?Sf8`9
znQ_u;;#U=CiKnua^lkGf>JTT2mTVBzGLD-bv60fwB-htc{@^PSyR~CpsVjEZb-%y5
zahE90_{`Tk?0mb+L8r!}+bL)9Ii-z+<8wi<k<w~)sfGqA)kp+L?>&`L=53HrwKYyU
zce#@7PLJSy9cXo9oRAAM9W+{}yjD-}1U&kPCvr!mW^zpnd1}W`b$*N5!FB4_p65-+
z@1%b9Id+ma@n@R1v4N(Ow(xQVFJ`-Rcsyr~Gw#%5q!T=`XZrQEIuTbEtVw6VdrzDs
zbYsapBijS_PL64I6w(FU-qxHnTdZEmxOd*<v8xjQ>>+&VkAQvb+iuel9l}t}jHGIw
zHmI0Q8qEf)YMpxXdG^&UI`IjC$-mv+c{<GwR+TwyJ;h%37CK2AAq00%zs^JPS`m2;
zX%wSWWul&!+F*^I`5jqe0C9A5<d2SyxOM9mhlhte{`lkHYnhf<;thv8ckXcW=FRV$
zi|d_p92^|5v%5oerApv~xt?Iod0Y|R83aN9Sqx2L!}Q*JtR1~f)Aw_oFZRH5-eJAX
z!<vH->n+Z^{_`w&@3C$m=kV{nFzVs@r?q3AoX$I(cl|V2c{^QkG2iTVIqz}S5r*US
zyM8LBVR@Y&guM>SHC}EJocB2Ab8nkV<n8Qv)r`w~QP?6}KOnr2yw2z0-@Pe2VETBp
zJ~xDfFVa|z-O?b6^f9ygFPE!F;R(g)MS6?=KEcoB8@FMRr*-%}88#5d<;f4i27m9R
z;<@Piu?YF4b)H}>f2{Qu?;K$;zg-l1a6TyHpP!~A=j#c3Ve<Wc7^AZm?->d46^MSh
zp8jBH+!z0gA%4B~jfgx{ZXVqr&u(65TDgI^c|Y~(?O?nO=Vb2F8G{$1@ExOn3H&&p
zVg4RRskq#+*RJ7~<zs17-^20%1l|qWJ+=OPQ@nSH<UBghz^gu&#|_ct?eM}CQ5vHW
zGoyO-gLd`kn7-&4mUEPD^cic5xE(RIpL=}9(Q9u{^*w#|%J&xEy>E`OZj%2{e~;>Q
z@Z6)DCC9itJw19adf5UlL~l%<N@=|Otoq0M|Dt?`<?CS@f9*7)c$b9`I0`!95H6Ug
zcNpFe^glWd24iKspN7}IJ$XEN)Sf`}Z3N>?{_H$Xh>^=-bHOz(2u6M%;WI?%@ddwJ
zUx5z@y~iO4O5uRci@d$wgS9vza3YT)>7@elc!a0e1Q`4t24UZZ>9ws0hMKTxCiR%p
zEgC=lc6fmoe#EA-))uLldU86%BActPms~T#3w+=aZZMZQTz^iVD1SkluCEWqzV#ya
znrF;58rH}7)b)C!9$pVTU`>$*Fc=-T7t!~?rYmgm;(558qk72Sr(qp<VG+gLep#Dx
zkvxNXeV%Ht=;w>tg1xmv-}W7w|AIh_M0Lzsn-ru)`@{tQAjp63+3=IL`50gCaYJ;A
z>Yu6)cfoSQ?Pqkniwm}Y+PI&)wi&XGJoIha;rD0}hUtd+U4*XJz8)@X2KQ2bP0Ne_
zATho+8ebD$j?ZX6!)#l7+@|l@RO92@6{*_7qMv)?Aspj7(T*{?e!aHP>zEBa&TpY@
zJR;xd_k3o#@ilOdKeNZjtdxq2oB+pd>RA|BZ(f3KAm4?G_0hAXE|$9Z;^^6U3)gnT
z{ffxvjydueufd6a`m0{&1-_SdGj|XgPTRDwUO}E>S%?eEJ)iH;nqmgbP=5}u!Fo+v
zwC0-NSJ*sxsUaJ=ScLZUVT<ULo(<NcaJZ3j#+i}lK|LD5MN?h>U}NF{<kqcQZ-_jp
zODyq5K@bGg>vi___6Wo9!baU28QyzNPEOd~-Yz`G<{PV`w-TYZ5kGhy=GiYlAevhD
zB8VZY$9q5Vd{H?^|2F9zGOZo+<m}7gdlM9WYj0k&2K9`Vo8{SbS-CD-?{|x?Pd}!o
z3zr|lT>US=EFDeIkI6Uw_eHwy>1kwBeTsk=9d{4v+cI;!742JGj~HE;`T4!r=R2S-
z$i}FI82LM6<>rn1%aprcx>@}+d^zD<y0P|+(=FydU%v7B`h16Z&MS9my8DxF7N5oC
zX2?0D-+8sqeD4?Z+pKzDx4biO*>N~YKYi|<J<jI;K8w#q<-2V8aV*mRMc==9<6KOW
zWBf#HT%JJ%`S0WN!x-Poj@L!)nDzHrxcK<HKmA^m-z+TZ->JNFf=lsVMEAY6;V(Mg
zr_;h^Z1HLQ$MJgfG0Wz@s7*JW=iIOSm<>Hz|7F%D-p{t-=kc9otIsVrUhj-@d9erE
z{s|7#Z0j+)(KcS{D~9)%w-p{3F1Y5nUrg9Umt5<N^BKo%8>Ux&T))%D!=&wXv2u%S
zhehQUwP*IYm_4p9gUi~0<8+tZG`?TD%ObDl!Hw(twG{^PTyTv&0}tB9y(nI9yZiDC
zmmU87yFC8x*VbQR5)(&o-%HP!C6;(2<Mnz<y^$~+Zkl_Kd=CuOC6-uXi6xd;Vu>Y|
zSYnAKmRMqmC6-uXiSHsxK(qHCg!sNm@w3Dd4-!ETP^nbDZ{CkyI;e8FOr<iJM{tQH
ymRMqmC6-uXi6xd;Vu>Y|SYnAKmRREdcl=-cyoE2}<WKGZ0000<MNUMnLSTa5@*1Q7

literal 0
HcmV?d00001

diff --git a/docs/discussions/deepseek-v3-gb200-optimization/images/image2.png b/docs/discussions/deepseek-v3-gb200-optimization/images/image2.png
new file mode 100644
index 0000000000000000000000000000000000000000..920e3c57f94a9c77dd121c17aef16ad377105cac
GIT binary patch
literal 205208
zcmZsDcRZJE-@ld=MVm4c*@`kVva^aZv&xpN?9~p*UJ1!c5kgjW$R;F7l9god&F_6a
z&+GZ)xqr|7$93IT<@-I)<M<q(&wCyDT~U?axA*v75)zVqmlR~wNk~W~Nl3O;lab<A
zZan=nf`9G2BX#K-8UE)<X5vdi!boyS=HfNi*zr;=>sXx^&o{1_g=jPhKNa>@JSA)D
zr^xN}{H0A%&#iZ7Zy9Z?I`h7Zl!Cf!E});&n78&xmsoDD7{7XYr=p6QmEuYEvm>$o
z!pC$p-Ypycl}JrjNJ|}boOUC5VA5He7Sy-&&EwacoQjv%M(@9WRzGb7_db?NxWab$
zfB&m|=X^z{B%>rF)pB;L?EnA!dmj&YTwF7bzxRLq?B1HCfW7zr^Am~BU#>~m&iKE-
z=)eEV+x`uS@Bi^pPYr3^XCBHWT}<yxyYct`{K>_?+dk-WH#RmlH8mNXp}s~XlW@h8
z``qz;_u6T)F7AxzTu}RuN7-Dh+e?+z)p&o$ZUrT!s;Q-|=Of8i*t}V-ZEO-dt+KVb
zBo^PUCx5Xxx9lnY?b|osXV0oF&ZRN_=L61Y$uX?^w~mBmyvOPdq~iyOPv{!+d-iNS
z`|`-?$7J`ocz6!d)2nK0KmY$%<gV#_F$3Ao?P=Tb$m_B)Yn+mKZa*H&B~?x>CAC>3
zt7>$~|HsNNk0mOK9o}2xBDG86KXz<y&E*g;PRVfot9bt-m7NUJinhL1yKADzywc{=
z{$p)ty38+=;9J=?4}OYf82_%dfAflWW|wqoM?%M_$(6>brNOzCp{=B@v2q91uakq@
zgs&^6cOE3$IqhC5vAmVAJoc*1;>)NbA6r_R1%53j;<aY_J~wX(PjLqE_{;QJU1QP@
z(hRbe%-7u1OX!(IrFhA_a<5B_yXd`d+R2dGHJ0*j*Y<6m;_+5vtK>H&ybivzb!ON-
zUL3gRk={ni;9-fo&SSg38#+H&*ykxGc96@KK4@gOkVj2Wz<aN9t9?JF3O7uYUs(?t
z@<{dV%#EktBR-k_?Nx97jCX$MKvbpoT3T0k$y&GcTD|JDy_MlY<KWn2+{4=^mfOt_
zHFZUl$MX}bzD&dPjQ-*@=|q+nedScT<7a~hS@o?Kvz`Wg9~wF(5VJ47)kiLgNkAa-
z=TAKf(jD|cKEHk$j61KrWc1axQ(-;C!ot$t_Nvor#`Qu|b945G504D8Qj?R1I3Kb-
zjo+=CaWu7~{`+_86DLkAZ+PPacJHTUWM}u2WULq%IC<#Mp)4noo!hsyx3||dG<=#`
zGA`&D9UTpM5?OOIAvt;H!^&5*-Q|prj4iCKpVidJmKgB5m%dC+K9!S`lPun5m52I#
z;^U)ZVDNcl?gn3UU0q!;A8D3J+k|sRZ*M@a&1ot!s-s8cY;1VWo;_R6R<*IE;V43f
zr=g&v9CRomIdeSV@R1`ovbB@B{yK_iYH86hFw~AZZl|K5x#aCFe&fas2k~PS)zv%i
z+_~d=fqvdy%--JK#MJc08EWs@TloWH%=6NhFH^?E#NaLPyV23n$;rtICsfiqb8>U5
zrl-%~yV_r~b8(&G;IJ|j!|Bq|*EhAZ3x5Cp3<n1XiRrN|bsQUubH_<{@7B@P#Tq1a
zT3u3BJ|!x8JySy}b$)3nx1@xNp|QT6iJg7VvO4Rd+vAQR(Q$F6PEJSsYozS$vvt$G
zxB2E-VT~pxZj)FB$;itqUDwR&$}KBv`SOKIR8*8iMpiZ?CdR+pI`US8T+$6oOTUB!
zCU&jt?Cirwk8W4&S`YnVl~<_8^SQQG$I!4At9gbRtI2r&yhcNyYO3+ATmF82djtdo
zNPGhVSVTonB(+iBq`Ylo!*}l7qsC#Wq&A~G-TKBxW-cz}`j%GO>_{$d?z4P+C%ps@
z95_I%F<-Ql^{?n?8f$?Qcp&NkIm;cpPBXb*pf4JkUE9Fwt6#l(>GI{R8b=ScBaC}%
zT1jq@IJvm+3kmu5+Q_GNgv7<E$ja^t2?@EYG4k7Hr0it)28W6FUf(z3RG~LcZNEiF
zNB6a}Gdm~eL8nzszul{@vA3C-f7Vy#P<bz2yg&_B8J!t-yB$<3t*1xp=H`ZHSofBg
zo16RE)y04IY-K~kUQ0{M_5%x7KMFkEI1nve%-@C1dj0zKpC6t4v0lF;_}gs?!otI=
z^KNyqadVqGJ4bpfP4>^YO67z<YAWir;V~#tK(V2)@hELQJ;Z`{-`3F3&?~gNKRs>7
z!Y{!rV>UQR!JNQ({P=N{b@iy@&1~)OKYpC#;(F$PjMrg(**@ConxSE9u6ZjO%kyTl
z{QP_cMa6-J2u|!(Vw;7X5bXu;_2(n4@r@LxqZ(x`E!m@Z^iQ5WtA>>)v7d4tI}spP
zH9vpO$;k=jbLhZ<i;Pqi{dQN>)jvgWY9)uYndQz*_e-y?uD;jJKv{3UPcl3_T(a=(
z`g3JglH(^%xXljj!E0-C9nCK&I7>|tD`fv*ZltwQQK6=FN4Dlfs_nS5Xb`<vVoJ)c
z3{C$w3ln4GhtHl-5U+pn;x-)n;$BrRFEL)jvhb`CN0Iv0){%dm=}i0CW`1~=$G3{F
zJ<H7GIe6$0J2!W4*h<E-C9}1$+Qgyt!q$<bgoOL%tuOl>Mf`kyjqcnD`}@~IHD0`V
zX0R3q-nG==MNCYJ>K;l;a}ST04<F9)=zo+^Q&Uq|sQdc$aQxj(VQlETj0`nhT}2(8
z=)L#Wm8odjy~`zJPNT+-9z9y<G<GyBEDW1Z603R|2X_6RR{2S8?tSHbt*C&lrL9dv
zGjsDj<m6#3jiI3jGYv`=O-xSRm%N7xpcnV7YigqC@9!6iQ*AQiPrV%fr`qojhtU@Q
zpwq1g?T7dS3g+gi6cx6&Zyz{vq$bOIfm2aYae8fIe0<z)ZSgi1Jz2GFY~kzIuXc;$
zI?AkJ6fJ$F9#Jn}($i4Stax72(6FEUDf~_|Jv1(U+b4lvS!u>bM)%}{=+6oW$T>Qm
z!{I?e2hs{l-M-C@7f0)O^5jXO%M^WXZZ4`tkLMUpb;;J|nu)pj;L;j8^Uq?JpuoWW
z&d$!hzP=SU1;*LhZ!$7cUcdfxOwH{zhvea>@!Le=0<k4Fze@c2rcU$l1V%(0VLg3X
z=lb<Q)LnZ&*LVGt8L!+vTB;h(2U*RRO-X(03hAk-CDS`YBO+J=m@+gQKYiNX`ckZF
z!g=nO$J7A^hM>mbftEPYbAp0cZ)vpCW4s2{{dSM8F+?|;y_C4yQc*#I_o}fL*zx|u
zhw6!*0(4&tk4H*TJO^lK*aH@RzbpDZcR1_BGm8W7*K|pvES@$!ivP5*$-?+-1nsE3
zqr)#KC~8SLy|exMcLhU3h9gIgT)TG7h%cI&p8ijFt~pL&NK}-(rsh#Wo1fqBWR)85
zE=~2Bqnkb4O_@_#sv<AHyQ-=RKXXN2KcH`Fq%BFl#C7HYYB^fxM3eWDek=o{(|}F%
z0mCd&xtU(H>cU<d5fKsJr%!)e?L2$_ye!HbMTBiTaOhCg&!4oH<Hb|T%3hX~TnG&f
zeI6e_QXh62Gr?}4dQW0f5)C!=tD>Ul+8~CoxVV5yBO^5J{Cok-Ga>tdotQU{PEKt<
ze=5t#kz&%UX~yjui|7(7i!WQ@SW)*Bl?gUR>yC}3jkeE1BM^xz@mvWlD-)+Vv&-W4
z?Uam+0Gx=M#>OZxM@PpmeSNLb{N_Zb!W+MS&Fb#%Uf0s{IxX#Kcef5vs@B$AMC)K;
zy5HHU@lwp=bCnMzJ3BiQH}~yUuOsfp<pQjgLFKA_Zx%*<E~a*%xqd=l*|TR)a!Sgj
z?mauVQ&3P$57dyO#NWJo2aw2j{(Oyjyw~T(DBepdDl8lvPe(^BaboLRTK@bjx<m4z
zpa7GZSau9#bQ`xVW!6Ky1|>$0j)(ng4$l7cUTn50J;-$;D~xW&Wk8rkqK_Bt#HXtA
z?_W0b{^Zou#<k>*QEFP+<hO7A&>McajnJKBW+p22TgLSZUaR4NDgYGE<Kl)#J5q7z
zHz&$B3UD$oVJ&TJUT0^A072Zmc@xM}Sy_2{G&Mfw<Hv?<<62_iv9hvO7cHom$Y*HW
z)yG7?diCnNckk?eS8StsLzO>L>aq0v<;&NvUq4AqJV}D}be$R4)fz7`(pA2Bp7i1U
zKYz^r{Q2`HEp6a;<wLaAfB*jd9UEf?<oWSK7x=<$WtIg{JyzW71$qn4YJp8}A`0T&
zJ1zi*CH-_6zKdrJy9D+}o@U|iW6;fr3<%gKCog|VRh3m(SU5Sf2z&hPo2H_YlCbCU
z8MEBzwl+0l<HAl~7P9+IB9|0`Q}(61`x0iQ4<*Crv4z>8`a@;Q1LS;se5iyPK%<_X
zM!*77Q`2jjnwTf9==LfqDnoz%sH>~zjL<HH-QS?sTK(fAuQ!#QeHN|$@#DwseW}1M
zhK8wV&)Cw<&hk~Q>e|}I>g>fuXB@=0X=yjz+@e=k-T#b?xQ)cQC@3fx-Msk#Q|ivv
zrq|}8?bcBcG@Q|ZfPkUlVKp6{OV_R)=I7@>ckZ0sNDCd_$aK5<nc^vr>A|er>kpRx
z-M!r-m-2?~u!KFP@U2_7aF~Hg3f&htWMyU1;%9pLHX~bFRPka$?(-H3Mb>tgwkQ*>
zh1<_}t{)bB3$RKI5{IJY=_*P;fB&#<#*U5-5E6AYwMraxlrJ>}DY~4cwRLiNc|3L$
zA0=gQ72vJ3G@x<#<jIo{#>cJa-Ag^Q-o9m5jTNdqVOzqq#p=3aWNyW-xEDW;N5hAb
z;5^hz`gM#}o8<!UTy$`leR;4+lJQ=%nepx00rT^Y(G$~Gi#H_%0YCly{ntArHV&aB
zF$xH1{)@`XJC7~KNE`XvmekH<nXOIDz+jHmtgpW`J3EUZ&6IWP4E5iC|2~a6qTR}>
zs~;R28&g(M!OIoe4?byX*VostG0QzGF0QJkc0exaKtT_>*=azE=4KTXB=EyY4vxK;
z)qPXu_Vyv@?s!!)O3J7y?q~BE8QiZ^Q_mg`!09qRcRZrWvlXQ)zPW6?mz*4!EQ%&f
zJU$3*2IK}EHasc{7!{q@+}rylb{{K9{D+m*-uPDeix=-(KR(L9&^kNI2SkQ$2_|Z6
zYz$&JTe+JdI4a6VGrh)y@2rrJ0+2IsM|E}eZ|C9}KpkaP0DK%c85x=2(9n}{!K$fY
zX!D_A_@`fY$3XSq%$f7&ANNeKa&r0;XiBv25I)*-vD<uoqHpuI!dMPoEV@?{y}Z*Z
zV)opbGZknXwSjayjPP^-%ciC*XkHGPof2Df68xf~{#|3HuCCN*FX)qR-n@~_>I!@D
zq5(L7oSYoksgX;+poe>*4KIUT!$576EXR-BxUns&k;TbRvaadhKMvF|PD0r8=WLuw
zhx;f``VozYR@C`fQ<E~gC~y=$2y_+=GWpFLGOUr5)b@#q34$+`lqk!{>||zUMw`O7
zvT<=$k1d1-2TR_)D}g5fW~lq85vN)U%z@EL-d$G}RY}>F)@;IesWdb)@@S4GH!pAQ
zhYvDlW~@j3S%#+0AK%y0)1$1adJwcnSNDZ(dYL&kfWWW-8F-*yUQ7CJ0s1^qde>Fs
zL~xLH8DtT^4qQS_LvsVn1QW!vJUl%7ikjMeoB>|mAOIhrc6>(l=g$KUMMS?yPfsWC
zBHmK@ml9KMqN4gc4In$T3-n^pm+(#wkfNrRmeA+V3#W8hz6llds9>n!a40&L?Z%O6
z`ncI_>n#mvHLXb5+|-1fmBqqfv%(_$!16*u4xn(gQP(DXH*&ju0|Q^bd-n{zy=ZE6
z!uD<>HWs7nriaIgnC2&)R>J{;zZj{=jLsdW!DvWIdWf@wsfr$tMFdwr6RpgJrTX|$
z5l?|RTftd+a_HKHGST0F*Q~9aEG)8loSQEXtfdBtulizqg~i5lG4%cVWoBnbmDE;8
zjEC0NDDHHel<1fkV>7cSLqjG&UJA;}Lru~A81W=o^R>k6>n(P9`{s=i&OH_m=Upqy
zfQ6NnwA7$=aZvyu?Zbx;!7(u>a<UE{IV})#JIH{bgg0*7$HKqUyyaVlz62(QQ`I)=
zNGoJV0v0^qymSxHxVShbiq~+Ka%-9r6}KZxycM-aEOKb5l#|nWgREmoZEO;?Nl8hx
z!VZtnvdqlPggh3x2$W@XCK$v8<?7|-MX`Usx`6?~4UY#rk@nfIrm0C<VwEAl&o8Z|
zb&U9NaZk)oI*4QLrSzo+LCt1W0B#?Gor-($bzrfVl$6NdrX6)SD<RPe-1qkFTO4DB
z_+iTCGLW~&1F6ugZh#)4wV;EU^nJ$2%`_};k!SF#uB#(M7va0A&h`<*4YPwl3heBo
zyV=9M^jWvI8rQ$h{~hD|Rbwn3Pl-e8<doM$dSL6JMf8G(e>*reS_JS`{X3nQ=FfNs
z0G3!WkI14TVJtQ<Li=9<Y6=3yZEciq31W7ApKu1$$A@7b15OB11A_rOpn>51@Z>)W
z?WGqdCjb2#Se)pwwzWkI?EdwORa7+QgrH4uaPV|k@iB?ri|La;c(3QZdWHRvl#+Tt
zQD&9*BRDN~gD*vlo-&9*0uqG%!aohSAermu)f(t%oA!Z1bX#vN2W{1@uV2v7(RmgS
zaA`zcM~6l}khZ__;cfs6uvGivUgGg@b!A?XuLUf1U7cq?>c1<i3(DGHvtVCxP7dFx
zhgvfe-{WMdX`lOOt)3Pae+jBM`IchOH^JipX04=RLa%rQv_4(BEf8Zm-GLT@f&kz~
z1J2pko+KZ%Jf7Ri#=~QVE{JZ@(y|{aORR{~Q?LS3QqnUq&7d=Ac`GsHD<?o}FgXEa
zY<fS+Lbr1a?gUrq|FkPHF)@7Hll;NLF3;|lXSD+#T@}IjYMG(BA7G@zPfgM8HafQ=
zr7OPOLBRya8k0Y&lFz=cdTzi;OH*^X_U^hoHVE7a2T_U68J`Fcw%j}AEiWVU$TOZ&
zeEHtfr+e{7Ah!H0aSNN7v1h-E{nRUVzTNj!phnQRV0`?W;J^&fIjY9g)c1>^RI4v<
zn~Q`{%=Nm!;5{?39iy1Uj{#;Ou7L5AU-E3TIJe%qwIb2|{X3(G$P3I`5)dIEq2am^
zW)OR&e1o3u?*31Y$zw%b0wK$wAGj`0GXbVNdi=Ql-#rlDJDcln?CDQ!TfOsVCi}qd
zg`NI|Q^fAsVElo9FzJr%GMDo2I<!X|c&)$x2IwO@8{4Z79}c56)th{}nO!HK0H_uu
zvHAS-XF0Sbw6%zOskXK@f_x4Drs70GCJk&HPDGP)`Q_#VWLu<(PI9^7V^#m}FA35m
zYm>!mX44#-e8meY(*}yam)73Cdv^*{?adpu=(pVSK4ffUwFyQpWAX<Sx-8Dk=JI?K
zV|qwpmP-RQ;Ljh{Ex=&GPoF-alYTbK1(Je}K)f>A*ko_9PtOFY!DvIlFL(b#r&X!Q
zc4`|K{As<r$-l9&p`D=_{gw3x&s)hl7iMkeN4>`{#9aK&Ra8_2LE_1W+xxy<5{cVi
z(9`_m$8m5^Oo@i`>^MHppHs`p$w?ig$k60G>Mxrw#KFeKh}s4DaBG^nyXEn*xERtv
z)a}eT)ziBv>4$)sp~d9Be}Avd0!;DB)vHw=Q>CY!8UedqH`j$xJL-X83koVK0TdPN
zWi*vzVZqTxJ=6v)TEMcQAt6lhyBEh(0JQBz;)aHXaJ;~&(VBvzqXA?nF&jai0Vs;>
z`nQ9&c`O<Xn3oz{c5x9b-<Ys9JtNn&t#i~76}JzV0E`q$HYW1TJl&P9tyM$t*oSg+
zX>?u9F@g{sS-H3<e5x*5S#eC2%<ac=NVsy=L4y98_~y#`hxa`b?U%R@M?pVD-TC^G
zp|DKm4d&C(7pklUI@;T%phIDXC4TrYzkAO$wz%x5-~XO7<Q|u~=1KzK<VH^)B;W81
zf+}hPjX}}p{oe#$inln2!8J$9#xunD_#X64rM`Kiz!QG5kxf`Q`m*@SBcO8}VJKOU
z80tu#V7%+tqu-u7eVSifyai*kz_w2=y|ZC+5+LMRP*BH@9}R%Dg#3jrCYRb_KQkaZ
zFfah#l2lL-1rjma@58Xk*0nzJ4D`a`<TKMYmun_Hx6U2MtS@q%IXSHu`G}UAiwi7X
z1_vLV&cprj)21RQw)^QX_yUS`5_;QrYk|pMrAitaNAOIOlQxipL1}=Yf%;G$$GfLW
zm*{N@VgNLSe{!Ec9{~l7P*HIJ@ix$d9|4T(UcWy2?_Xj`Ni2E=)G0UcFX&u&yRi89
zEWX_G9rq<kvbs2Z_LGkU_e@0YCqct|do}f*7ERixi|>ny*yAqZ0V~oK&-mATf&xK|
zu9_OaQ9?fg9Oe64R9edY&_wF;sH4cJ_F)ll{)2RM0XT(FLV!^L)oD(g7=*Hcy0Y6?
zb$X}C$<578v2R~nM~5`t4*h|kp`dhubfT=+ua;pHJ$kg$%F626$%+C=A?Jq=k4p8C
z6<xY?iAhW>4lJQ|a9gm~d_h40p`c#4K#K|E>?|-iIC$>-dHa7~*#!gzN5{uyVA&+p
z`0S?}K<9EO?XWOt4mr@>o@iLEnNBcmh`$F99u)FgJqL6)zqr_rjYX?BZ;53BUVi*|
z*N>ha)1Ew>%5I0>U>f0(k$fNwsC_}(Usyo-g}%)|vJIx8lR}Iib8{c!Q$c{hTYq=z
zvOH<v;o;#E6;*=T2xX%E{#Ph2moEps(;fT!mxK{G<dmd1KFz~}V2cF$1^5~La<~>E
zB53YW|Hm(09KS$Mxnsu;)B#vF1piR6-f|FAq6<Kkc=N^=3KaGiy_0x6YisK-kP7tm
z>9G6Y9F46H24a{T1HK3jh&;{X5Moct$x+|`<VZaYxX`n}z?*#0$(eYh)2ysdFgip;
zMZx)uU0t6qEI94myB93VB2U-d!y{#4%|~MWKHviM7~q&Nv6cSLFV_7RM6hzGL)H@;
zE3J1wqiIL#;Xq(!K7}|3uw1jeEVQt&fc6Hd`XP>Gdb&T#82?e^I2sxnXM0~EnQE4K
zNUH7`mX3wxG3Jyd`JU4SAc4>^Q4igjXVDfhAs`%J=)%2tDelEF+%PypsOR<d2>^Jf
zFTcE^tJ{Ev{qW&K89BLzG|vDz=5)QJ#6&_O!n@AT&l3xSc7{WRP0T4QtOFcyU0*(n
zrAM<Sw6*bZE{II2uU_3tNnwHfU3hm(9MlKf%-ORn$~YgO&~%Vr^3k|2$$E-X)IHuM
z4Usri&6l4)e-3^5vSoGk<A<;d^g+t><olmN*QVXS8@l83=08-1j}++9Th+Uu;c*=$
z2T~~L^FC4DcNvTYxD1>$BsLcR+y^!QtV+~i2(x@0-Yqvb4Ey`k-(T4AFBMOeKM=;h
zu`z5w(yLdJ-NJprMtuRnbP2ZAFXghju3ot!i88D6?wxQ(+Y0O&V-XZQ7FK`Ci+|vy
zzgQWWH5lqMIp))<vB8&2q&B~Gbpe&{VC3bc1aJax<ffaOorTzC=I;LD{{3w)FStGE
z8jHBo0lR1vjSct@h!o5y-fNKtJ%``CB{(Fc5@@otv~<0(e4Tu2RAM8QLC@eCIoIQX
z)2fWqR-YfJrdHV$3_2G}EB>ze+&i|w2lNZ7fI3Uc&yOrD6k1(dEAm+4*|%?>iJ2L?
zMrp0@o|f330j(p(=H??=3sakk%j_R2U&O|m*x6k%JLYGkd-ZDAxkCS^PwSeSd+)@9
zWMM2N6c%ceayerVR=_+%yJ8aSkY!&)gTe&FQQ#L8l)rxc1g06G^ExDrEV#4t@{Wvm
z69RDbjDUp1WiZ{myu7~)?pP!U6v~$`!_d^$(=&l;j#;Go^1>w(lVt0VBPBW;*;AFR
zyX<=*{M!rB;zuw(vvo6`J$qJ1aR5yc`zQ4zl$^N~${Fx{rhYN&z6~Rk1fT(q)a=}x
zsXzgaLrzX8COgDY0I#qKUo=R0RaI35l9mbQI}<<71NN9aX&kH#8h(`z!ilB;A<WU)
zndm{_WT3n4(FY?E{wE7y1T|HruXp1~BYT-R)OSn;$Zlt-DKHtK8G>D2aGRAO-JO=6
zzI06q)qu_s{rvfFsEqKx1{d71D}z7sZV}@5Wr>Yzq!-{f5M!V8;YAxVnR_hDp4Cf6
z`r}YgoF|Thc1lCa27tKU2XRg}(yLgHCo(b;uYxucIy?-o#~Ji|XnAH3D}+7^^u!l)
zc{C;~$YNJpXXoR_;m`@`PgPZ;6BDNP_6M@Ml)&sqTH?stSZ?07ymgBiCMrbXZq0w_
z-a!6eIy-CNv0@ZLqOrPl3tjI!{|C*wXJoE)VAc6PhYfFPD%`wzl0IlROdcqvn&}aO
zeV&INRx<YJVc=vJ6<yNSrpCHqgksXj+1sD(?(PO?ZTt0W)1Ug(ig7}l#rN*+gwoPD
z^gxK2N=iz2c)-(@^|@9kF8D!MD|EBbUq>C$Zl{-LjIp$6B7Dr^%*+ole`jls8BU{J
zUew$NHm<I(&rPR-8qHbg#7tdm+NAPrAW^#rxajNW*MIr$>LD<9^dj^So{1$0+nSo1
zB+zQZ_XjpKC`NB=;Txh$5|{t@GInG3qA($R(C+uKo*}+SAr&~+M?qCJvT}RB-s*rq
zFBaK>CjtjoBOu;;_3gWZhpYUdw(|172Vw-+#>ocNfp7!;I2;Xy1a1w-aQ(=fn!dgY
zjsO(d>S~#8n<;D?C}TpWm1*7=9HZ&9eXqV;KiMrQlnzRBN9Mt2fQ<WE_p`y70iK|y
zAx!amZUwp-06CY*=6!fvP&4rru<R+`pd|(}NVH)8;^N|R9#}(O$6_@^GXmw~|M2ni
z6Q~71C3I;T##fFyV<F$->cSY`n_F<WCns6Cxm7Z6pqGP9^9cxih8Ye6W@W_<><5#B
zR?Pic<{hk#$GB@vm;9*+Cr8KkA3t(*G2l2t3xf3)x-<dfsUqr@@AG3&yFcL*aNYq=
zfU4jML0p1pV&X7{m&Mn#OAJ9`bG?w0chfHA)OcLiZ#yAENQE6Qnc8tfAg0J`O#r<`
z@1vt1&L(PI`}%cbFE4%4nW)Ifds;0sh{~`pVd*LNuyK}&!(*FYSg7}!!k7Uj#IlSQ
z17D{?LalZ=M7DG2C!r}hI5<Fmf%S|pFjaqy#fMD@yhT++maimx()!LFzy^N364$dP
zeN#(o!qBe)hd){6sb9HL0m%(o35ewM=M%ONaC|Y$u>-)AG~u~D6KHhMV<EKip<kki
zfXcQwDp$m|1^3dFL4c^04NpcP!=b5x-hed)vA?r4sSgR0HSChvS+`PyXpMNNjC5U4
zgW4~?U*oqqLv3MeJ7TywY$!!4zv?O85-VIa+!(dwG`>?bBK#Mb=~CXx+04s~R7Nmi
zkc$b{q$Giw!gq_0l~vBH%zvb~xcF^W)@cB7?EWEo`m$Q>S=S3_r0-#z^w^fe@Cl|N
zKgrK>`m_(hv@kKyYl|?NVL~~&xG2@iFj6t{@Tl^<!)Bn3VR@vXs5Uts*tjCgumyz(
zm<7z^59QtE9T4y#q|bc2Zd-57mDjem>Pw;k`G!awBv_w29vvTVZez0_dLw8?N=nMC
zw;$s;h!k4dAOtHYeB*oGSe`q6^w=?);{=qK-fwd_J^;EIe&uXTvjgK_phxJd;8Ar3
zMbnDBPk%Mw%zgi^4bdBDCFj&nYk{lU+HCxJRi8e=JTZb%32y>um7SAwaJs)L=f!Uy
zp5hG_*pO(=uToNEBTL2{MF86^ZEd4IJITsErnpBIH;=A>;ls|u6B#4vs<H9CuMdk{
zaWh-tbURoHaInXZpSwi2FJ6zA`F7RQv!p3JSDOneu~D|RiE}8xmY5hFu))J3NI`t$
zOM^iYufZ0e6?)Ib^nn9Ll&u`KzJ0r`x0f!_7JU#19nKHVH=!!#=WB}G01`kWz2G{1
z4><YP-7POzA;}Yjn>9ZE&f1Ta&(Y_}6Nm;ZA|mdWSrCWd1QNuIcYuc*9k(ZINaVtW
zSRm`)5v8YM{c37za1w!b0r4T!8RI0T=)!~q;rdyAHy+X^{M%1{hfc#N3avD(DF8a~
zsjR#RZBbg6H51mx?64BD2{>ASZo$q}#I(b+;RlJq?dB#tol<&EK;Q|0&K(iZCy3L~
zd_$4&Fvuc2Omwd+*RLyCS*1CW?CQ~$+JfB;u_akqCqol}0Gr46#S94#`0{W`b2vuP
zc%e?U0j;gBih%OHe*ZogHX{h%-dA3`WfrtCI()~8HEe5Rn(9SzY$*8V4G)6apv#Yr
zA6iSn7=s*&$O6It-;IHd;7s~Gefr7b99Y~#B%ZJ*g1=lJ0)9YF1`B<il@$a*f-3qM
z%$aQm+8S7?$m$kDFcP6j)<4rl=|aRph`<C&V&BH33!qOJP}*(=PsRz$8bN51U)tLb
z=0qL5)d_44EIKf7697|M7p27GF!x6Vu&~H`LxhHgCTBZgwPJINMh@)p!EqZJ83_vw
z1t>JNu<!+}f!Tv<h^!Zb8lUrns*Ix0_hQ|E`GKDS&z^<%p290YSPnnJjFxGH$Kfqo
zet>=rnW@mO|H1rOww~pl!5LS~I?M?v(zbo14`a86mex%3qL<&@%F{(lb)Ejb4ef%^
z-5}}V7eGn)?A+mf(HuUM?_-gb-?KYgbcVSwBUFe-SXd{2?9iAdH2j}hk9~Z?AYdpd
z`Tx|yNyZ*r1pGr^$AVnd)RaSuu;N_jhK?NJ$5)wQw6?lRlFTrK6Ufx%g4h1|QKb6;
zmxzcitIfjVBHx7zO%TMk<X2TOo=s$_Jd3mr-fDz{Xp+~8m<8qp6ZZ-d2=7exlam9t
z9F1v~yL8FVx{KB9k1*|i)HMp|)TvWv0Dt2*=TBqFaq#+Qha1&f*EKjLA9%F0BU3<{
zc)1VecM+lq*z7+LJfSjnYiUy42nY%ym<h^xlaN)g6<0xd@CSxp-<7D+GWkZNF#47*
z96NgSGjK-o;3cY`Cv<4!5_XQMt9@KHrY3=IjpHr+u4!=Q%a<?rK=fG_01`p7^?0Hv
z?xB`vRz~8{cZxh$_-1BiJWkG{b%FB{eFiENJNu<b<)_N-fN6jUSWuwKfx$t%*9{{h
z5}&FA4*Czac(h+W+94HwcQ4+U&>_n_qcQ0q%+t`(?J_W(m}qIZwr*PeWE9>*a8)zJ
z_BUBsBj2-)4<9~ElI0%w1o~klGpG)1$ToQKIGC_We;8Z)cTJ6rVZ6p+OemcW)J#W~
zD*~GYz`0ypz9zb#RtUjULZpT&hW_O?mQjGHT8_0=-L6M1?+)agq7Z4zgxzt)z#tF`
z5Da9PL1-0-B@DEuD0%$Z##TmyH8QRZ3<(Qk=BJfd89LN0t#6%%GEW{ySdGTIxw&~5
z=_rR^a`>veya<BizPwNmBMj6O_`x29xjCOIVgGxJ;RD-`B)<mU?6-$h$r7}xOZR<i
zl$x3l=MnNJ#AY~V$Q4;wSP%mM9}|6pht!g@d&LZ$zyXptJ0+L`f(`bwLznPsjQmD5
z)@%rjKrSTG81C-6q}OGqjZIA-!>H&7GO-^d1(GE&y{_&upz7Q7^udgRUwd_wK(O2(
z&Z4j8tSWpsNVO{aH@ug-1wGv$ODofBJCigbIiHQroPoV)Y@GD2TI|jr3LM!h`5i8d
zutNdP6tBBFID}#&iB;Cnh_K$52-VmVwTVR5-^t0>s7y$pb$K!$g5y9a2xa(!TPPxn
z=rLQ+lAFL`2x$RK3JjN!+A&};sR8O0#LkL|sXzlmMO8$tN|)5ycrkzD=pUkrE(RLW
zD{^=Y$sH8Q!qSq7k1rIDqpqu)cJOHVj>LTim=J)^@I=5?rIeK^&YwT;kf8`TOw)9~
zB0c&JXk|{u`{&F@$#z=Yx|IZ2-`=iH7<xdBoSdAV^KJ6Pl=%DiZ!nEV;z0L=bE8fG
zNETkrje^nCyx=z1%R?XKXs4Wuv4H*X7KUp4k3o8dnoj5$NHqZF<!O3J%P?-1F8vC-
z_yo-z&Y%z$5xA|LhHc&tz77g^ZD|T#S_6?T;pL6$Ov3=W;66`=4AsI|7Y>+YQX6n~
zHR2=$vb?*sbQd1dAPxZdhxq!mG`QIC`!Rev7}MK~jD44k4lQ1te!+6^+#`xzr$wyJ
z9p~WWoLM8PW@fybKbnd#An=%XKit|<sxUf3WT(1CoM2^RSSfB%l;p5HqT}SCjTiHf
zg9L?N{4-P^RuPqd3l1j`5#+E29ZAdq6!s-i?Iw<c<>b{Orxuke^RpF__kCmkbo5|O
z&YBZinSGHS96BOG0gnqQ6~dhP^hv7o_~HqKB_J@3OceA*dW69FhFan}7$9iE$PWOx
zYJ+V6WF@u$vk){{&fWc^GA-Zn0HSliSwQE2NWl!v584Ch)Z$NLj5u_1c)QFJ67e0W
zmm4MDOxycSeafPamD~{N(cbx&_#9(nX5cVzcp^&i^yyO}x7lQC`wi)wDJsW2Hjcv*
z+<>)sXNb3F&z-Br2!|LA;f+w2uA9_Aro<^k)%~5CGRHW;lcV-4fIkS)p{sJlp$MON
zYHPTB{n+N%-Hoi-8Nz}n-&!lLwaP0KFjn|WkuZPQqfz;8(_X6V1IGgYq1{A_vF1E_
zw9VuU_4DZH{{AW-lo@zzeM7?!m!yo2(ZA#4J73`}A#)ZySbd&svW+fwhR!pvvTtc^
z#YsaBf)Zwk-O8*f0Hcnc9>NDccrxe$Q)O$Jp^dh_b?!S&YMytDo?>FUsQG)Rs1M>>
zw{PDjtUYW9^weaooy-qtF|YxZk*DI`mx)vQ`7<eC5lj&X6Y%jMbvw-eRm0hS-))^E
zZ#i8wSyoWYKprX2GzPgB818044-q_r)-CEXIgRXEz2^0&wyHql@G3A-My(I;w{A<c
za4{n6akOb*L#V7De*UcD?9S=7hDe8Bc~EZ>+MNe7f^8zwq?l$1FjkB@KKUuy_KB&`
z^wfA(e9cnVs8xUXVfPN30>lrL%*{_jyl%h$YP&@r1-#o42gLa>1(J7Uy?=j2gyC`6
z_hAP$ghBuQeM6DI-C;*ze*R~0u2j{y(2QH?diXwkJ&|9+rb93z$<Y$80HhoKUE5VJ
zIqfntH}>uH%&)`-qg%I>@(0(J2ZL6K+1f~>@YFUC_;O<^KrU%Knx2l1+jKWsn;_5@
z;h=%NVK0ZjSZ0SXfE>VOnw@oc>jS2#psb9v=MLB-P`*r0gvAKjKKXurzt6+FtEqPr
z5iP?^n`%2>DKQlp*Xsb<2`<14)e{g40W1PTO+?_7X|Qe;$eh|d#mcJalMIUlN;3vJ
z1?fY*bQu;kXwrnEYH`j1UWBCtdG~?asTfY|>9X){2%?G2!Z3$rnZm4FcgyaTLG1Nu
z-31Z4Igz#91{0$o;FlB>5OpL7l&vi<VQiod5Uf1EW9Y=mANilsPcf=3ghWQNa{hfd
zRS#nx+}0f13z-7g{N%%zn4VC5!1AD7GD%1nHmD=m3-1XZia>VL=g&{xxn5RLA(F<O
zR`3H4!!Z1AoiC=do@Fedb1)~Z(+c_H*M)`8(a-^m;X42BxGXWevD62GsIzOloCRO;
z_3Jg7<)aZsm=|l4={Uu(Y$5vBOO|$8?JL@Z0n~B}q8!*Rpc)F;p^qLmAxiC{hCL5Y
z6gkAsdXw!2y!?i>-{5p%WHvN5;t1Y^B7x`(MDU_JBm2Q{t?~y1PazGDaFT(+%b&%8
z0RjCB?x5WF^gLupt~QMp;?E&u#dnrXo7eYVJ$NtR<Sv756#{>IdwXB@Sl`_ACOV3F
zANWnF!05o3IICTe9t3BU3cN@YfEdKFps=FL4Ki%@Gi(mr^3?ipcZKS))QmB6xFmWp
z&Nk28C6~Lkm=h^X;laW8fPO%yu}J&rM5@czT($x!CDw_307<~aLvU#Fgx|4f&aHll
z_1&9;ch`eR&?@F6Hfy&KPj~85`8FnC`!rtj)|qU{y(3xL6*6B9xDs#vsyZF-`JmTk
z1Q>_VQ_O;W1-4C_8n%O>LcT-ii<n|5lb4f&Z-Blik8HrAHh5N6sX#GZ_?p;hNR3e1
zr`zNi2&K)yfF62!l$iZ(ffJau9CJh08HY?=B1`K2PzCvYb3+RctL#Ew|Dc=ERC2_3
zQ30hF9uZORa96S+{*C&<n4JiKV0nBWj`DiFG)N75bm!mZ#@hN!(A`te+Q3|XdCsXI
z1&8?qS1#S4wlCy&T7gk_cZMzaHYh9dZ-|-`YUQq7yVSdUk%~Qa=!D}8i$R~YfYQc*
zb20LHQL_cv*_9Bc5SeOE$cxF^TeA1j!*t-gzOBtA2b&6v>_Si?kSzR(V3pScV@%&p
zGo85U^<(4t8#XHoi&tP}a2tuk0#RW2y+_*%nGgDhW|<$T-<0!T*45Vjjb;zI&~CJy
z1=S|~^4Lhhn&%Sv@2O4wcOm6#y;CMzNFW^i<-K+r!wNnqxM|wLs3?_n&(6jMZxMy0
zHig~Jy1K`^vJ5G8(jPtZeJ9(NC|4I;I~X#igD4Y`kAN49<+GpueQ~s!kD7w^f-CYX
z;bLQ*TD!hEOJsJ9tq{opq;H{i!G?B#!rbywObOzQgM)hhpR1mt&W-hBCi2(n?yim!
zF;F7N?d`ouFDhT>-)j@a$MQo3H*j=9xE5~_BA&b2yi8vl<4%tqi)#noML|YG)A$o~
zcbyr>6yv5PUZQnnZiEP>13u!g2XhG37_w^jjYmfHj=|+kfIJRB1FH$)>Kdy&#bN>1
z+YZIO>V0H12X)%zlGO5*4|cZSEIqWg)q8vCLF>}kc(bfSJHytpJf@S$h%b?VAR2<?
z)yg2-p!X7sVR0MQg8<a{Z35W?WhcLW-LS@EB10SV;)UXk8%&Ui5UI_@R6vNpwB{WW
z#ek)NAD9iSVS5me`S&l)v)9PEvE9b&FxTb(%F<rMsu7kSx;g|qVqM@`!UREj3z519
zy^AXEZ_{$gLy<(&roZ4?0g_FaNgqER53B!B3xTi1iX}ECMhSow@Dk2v2qJ4}E6R`P
zXlaM$S|xCr?64sa0Fx(w^m2^xZcr8nou{TiVb9?}L7Id$^9TI2WUfUtC+;uw3QC)v
zH%xzV@8ePL(<w|aA|-|BHqtV9GU&XMedTDk0ea^S9Xts1T-k5;J}-}@an^-7_R`jh
z#FlxcUXc|3G)yjzs__#!hMF^%x7{F%QGP8J!*$@o+I+`lWQ3Uo1)~tHK#ww&yN9uZ
z+<$I2EcOv>Eoubix@fZB*PsxD1&|S~LC9f<3<A`L{QQF_PAGYMm;I!kbY`1qIqast
z7=zVDZVRZvZh86!W)2NK{pWr=+&E(OIKx>+_o*TfF>&KlV+-!!W<*3EHYnyOR0$nz
z?O?(hjlN-7&_i6e03e5zH+j|<J|7`<nwUHy%zPrI4htOI9G#ZQE26xCaf_qc><(;A
ziOrdy!^e&_x^36Z&=k3Pq>u8`cY&DZcE5dDlMey52{j%|hwp>91y^g<7WcKl)4{fp
z0Om|@2Wa^_I+~E0x;wX<2%Li;V*~^4yUza&i7lU=AH9kTKW{QKBe`jpI$Dc!Z$E66
zfRI#$?t<9z05l`?{%+lBSf%5tH3iZ;+1_c6y*$bdWJeg_AV&>*b0CsH*1FHj|4}xk
z2A9XWXE8Fr{QQXtN(wCl9{PKXQz8ulSrEDnjMpzcJtq!U;G)G(!SC6{L5qMe=$+rb
zMTS>FIEjpkGR=2`c@-KpbpVOhnwymt=O*_UEiNuPK;VTb30oBLV1z3vN<M1AHGu^H
zoyNk-Y9Q>gc>h3+KYR~NM(hEBPmu!y*H|jadVb)hK#bekL_x}-tJ6-~GuG-a!*v0c
zWR?#^+6Lrd8{^57Qh8@rUs4G?u8TYxRGID}icCfP*1CCg9IpHg>v6c6lAD^CT!Da%
zRdSj9SvfdEg)}TY5yUGAlmiVHf*q(G6i85{NA)Hrp$6lSBPxSj3k#f`YVlPLn(o$0
z?gs*Z{lw)XM7ZE4HrLhp{v3I`_`Yvn_7*N>QtUb9cWVpzb-=QdOCs<Kn2rcS>^Q~3
z0^P6ybaiR6Hz_S`FH{a`=^f6+`Yq`WB5?qWs8wlweLBn$B1iqZatCM_vIk*P8ybF6
zYb-!&-Y{4oSAwn~zlYj`^jDvs@^osqeXne&(R6A1*a8~QT(^0ALf04|9@wyGwLqY+
zFQk<{*e!5-;PQ8wnS3H54M-gVvErJ+Ht-!+S0PYggaE|X#`%d1DkIfzs7?@I2;jBj
zy_EL@&S-;FX6M}(t7BhWFR&qI0NNSc<=L%ko*vbCM_0|3MtW=t2tA)zHRLF8`KhJ#
zq1NJM7-o=aa7_p|*`i;*G($ugVF3I%s*yBUx^8QIL?WqU6qZrN)Dl*Vxv$lPFC;p;
zXi9kUm+{1DY437|u=;8Q_9z*|O~5bV*`a>3dnRCY3SS-ak;$u2XY-cB#fLijRj8tb
z2#mWOqhD3Ac?kNT#1Sh*7eY^~V|pg_wq%l@=b7=@<?(dIWWy-SjPH$?hmZS-Cm#<W
z#f^vdK2M+@_@=_&J#o7y*=c9R+En>vq%82h@K|7-)VH)mdMWL?FDbb=vVW&RUL2q#
zcm+a9!gu~Wqcs?xTQTu-nLdwM^L=8GO1J~paBXcHD|J^^R&prTF|2d6O1iBHLcErL
zQ{B@GFcW=XJ?8G5t<-9?zhVG@9V9N;$quf!ru%S7icoD30U|Cp02AW+Uj3~8MOfhP
ze__l(AB6S#2v`OmgyTVgh4JraF)^`rQH_s#ZThbMDn^LLgTbFSzP0w-x1%-%*IW4U
zd}!)*Hyogd;T&T)43SFC^il=hL(bAjAVykO8Uq=Nje}WXmDk?Wqok!3wSN6<@;Y(5
z0>yewORMACx6epl;d&OVcz~iRq!&zrfwjVb7@?bd!f&R`>6n=N=QID8x7r;1vKC9%
za)2z7uDma(s=s2`0lp}4DFz8$$UtzFlRFqe%3)B81!~m@N+-49{7NF$1wzHNZ5B#4
zh`verV6n^8lfNf$n2@zaIS~t*b9V-TR0MBRJ1$}P5JB6iCD`*mNX%O0@gY@IQ6cGj
z0d9c!^6wqpx++3s)o1Zs7_64eezlLqk%UGQC1w}SBW_{91wz08p+&qaZW<>45%RnY
zRF9C!%0kxNfSQ{jWA|KbtgIA&Wr0)#k4-MPqdxvrQA!YUFL6r$siP_4>MKAcA~$G<
zJW-rOrLx84_|CUVGT%}fhxdlW8I_()+xM`GqqvuclRL|xv|%Lvt|}rWXgE8!XZ@HS
zhTD7XTJpEFsXc1jw*JJ}Virnm??$QvcQ|2MA)8EGRqF1hnJuX|84xUA*@rMWP$41v
z)H2+CMqD-3xP3e0g9lI~B%Wc2KahxgaSaL40SK+J!grq3)hTqJY_utuik`t$2DrUU
zThefTYzi7H@8?YV{ZAI4Fd`xXk?0>kemswhLz4M7E(gNhhtle>`C_04sMy5g<MZdw
zgJsTQA3z@Qm~gxSUEZ5-5<UwdV4(vMtOyVd3un^U4egA$R|t;_2$Bdwg4yFIBq1Bp
zIvWBJ@oAZPcwGO%ROv4J?$SV^<Zq4Zmx_A>YX^xuA7W9&AC{JWNLxlrwo}M?{CVuQ
zL@XEd*we83%U5E`nJ{&RUXW6j2*xyn)U_d9GFyPin_jth99%TmBZM|if?h!ggJ1x{
zoNB2ZYTDY!3zvK)4t##A;-yO=Srdqyh@A6XI<%I$VWLQJQNnv&$hkNUaT)-bVT!N^
z54LxIzEs~*AP<rYD1z%)QlUlF$`{&7E6j53$G&BVt&QPUof>jWy*8=Pl@ZSBzBz`J
z6>{r!U2v|9yu9d;wMG}m0GiE%`2<9QWb_Wn2DvkzNcZQf;Fc%|XKDw#xq&|PtS3)M
z=_S0IFxW615IV5mTwlquF5dD=fCWlPE8!iBJ>Ci`6{O5cem7Sp;lgd+_|oxosf)$O
zZp*_m0%}KPL%is-)u>$M3d6N3Dx>AAwalM1dL_ux*Sa0yxu|w!+wO}8j!9Bk<{v#V
z{^VCda;~j;qfXjC@2xjiBNQD}9r9J%dlyPGS^I~+>MyPKO!WmxGXDOvJlkJY`}09~
zDCl2%0Eh5w9kW}l=&%0ctZ^6atL{|4K<{!SnMEnmZn9`3$D%DUZK2k0;E2?hZ{PkH
z)+|-{ZT6ITi7G{MCA@xZ)^cXcQ!9mTN5}N^bohp2tVNqeIMQ2=`l&!JMyC?5?u07e
z4mSo)HkPd^Mv#(P=@9%CWsF;J*L$~{=wS#Un*h=&l@NT33}HD*wZnoLDk5>XSP-nu
zB?regBO?RlQRGDCdF{#D_EMphRq{Ora5K=Hu;^=x6IEv@5S4-o&bgck(**Sk;C4|%
z0|$Z-k`MY7PC>YKzQZeOxpQDfNLpHY=fjIHVq$u~lK9CyfU$To#COc`&Y}sfUp=|;
z-l%dreiLUhj(=p{=pvIlKdwEUJI<}mkN6^fXU}7)WOa5zt)ZqSBz>&b7o`1=R9Y)Z
zvRhbM>VQGZ7wuR+62N+#o*p+(&f5OEa599<h<qGClkMriq*iS(B9*?cEqQ{JaK)8^
z;tX_281eo`xf4--D4zIV*S0<W8*qafzgf6TYED*6{^SAZ{lV>->_bC|`+cf_xdChj
zW^ei!9R6`viVRQIb}j((^_oUXUS4OwqSIo?Qe0XZWMOHJ2!}pXoui1%ojSN@aU52M
zJ{^Q8m!<s6wrM_69`cIyIDNR;uxlwWb8K=|Bqz8k_b88^0yYXph}(kkX&-N_juctf
zn^TWorKgj#rxgDp!4Kf^UX6X^Ocg4z!cyRVThz<yn`aZ--rOpL>~Uu_AS+#q@@l<s
zo5fd^%Nl=6kn%l9L3$~*gLrB5|1V#Hiw#g5VJ{g_3fOZDTSS#`6VZ9;`PC?*IM5RD
zQQLYHgt26nFPh>d`p!FFkCcTvSZVa7w-#~_s*czX8kz(!d1T1{w8l?O#c>#MxZ^sd
z*f1^D4$fBZM~(ju5+qq3CqKrMoSoe^tkL2@TU51&)_&Rd&aL`06i8e4_N*H0u;e<L
z^!oMZjNN+*`Lyrc$>n4g_DD%ht?gC9!-{96@x4wI1PiipukySz`M39DX0d~BA{*+;
z<+4gi!hxV#;+{=G!7+9gv;bVHY}>~es?PV{H?FYD4p;9XXMtzd*H_ih5M(p5u&C=y
zuZZ*mU?Xls#;N84UD$tUls9EdRuW!qyEYFeJ`zj87yXP=e(a;_?b{PVYvEK6(5jaH
zXfgF#|0;1a$<@Jq1u~y19JSVs%-kTnw7z*+S{iP<GNus1rJc}t!#s)}HVcZ$!p6o7
zKL+UG6uYBZ_?o!!e7~{HryJJ-KZOTf6p61Hv_}Ue?*C!i{?WV2s_7Yjm?EJGDjFwY
zf8<%@bNc4M=8o6H$68#t8;>uoRhM5bVY&MEPA$oBK;f*n?LeQ`xP)?ndC9Gjca7Bx
zTwh;nF~-<XDtp>|^t#q0>N##0E^W)YxmC??(DYGNUTb#ZP~Bx)*2T`+%^FAHF(u)_
zl<{(_wiCRR^P|m8iO&~!96$T(WJ&R<WjmS<cz79{ce#>jJ$~Ncm_w|}x`8mCT|(<d
zDZdAskCsX;L&=9~x0a{?O7ZZjbR(<Yw8Pzn$31F8V%AG@x77CDeX+dZId*k?le5eD
zY8}s&Ch<(x_2Z1<p0tuwQp$=fd<R<05n`~pO)Am+AZYZ;-VXMq;+2=2Uo{h2=Q?-K
zc(=NEHqk0}@2<&gtQPR^-f?vy<+<2M$LDBymigD;^D-Z09$wAN`qE6Vm@v^X@mSGP
zt7!gnpG59M#Zi@_iM5h{6}6i`L_3ExR>YT<TE-Lf49<5Na9%rM|8Z$`{!hiH%`Y{L
z5doatqGded{A?PpV<*>!C*O{Ww-zw4zOH|9Y5BaQ=dw`6mGKqFrzx$cJb#=e^@()f
zqEobsbqdt#YZbges-e;AHuLSI(AWOt-0ga`89_7D67sB(!&7wcM+2@_{OS`mo*uQk
zvFzyT?m8MEA#Z%;@cqQ)d9#xxj@pXkjUw|VeWPc*1BafCS>B;h{YS^1!caJJzcu}}
zkA(b6|Cq+gjgA|;sKn`BlKAYes`Q9lKY{gIW+V5>u`mBo=dw}SzQ|#j-ab-!^h3UF
z0r?%v7oS7qGdYwh-pCo2+`hOnrx#H6CavLDDV<Ex*=rg%YMxf<nHK6@GuDf^U>J~p
zSoQ{pd}gMBWRB6r6Hqsf>>1l#RibPS%_-Zw)qo_D*HG8FH`FRF-6{ZIbM@^xkHztt
zbNh(Wz8xo#KBwNC&k=20b*8(();3efDma3ZGX1-yX28OOe{ty!Kh!iLGBa&(WDZEZ
zcNp%s_%Ytz?{~X-k(QhZ6T}o&w??OSzBPNomG?seBM0d+zLiwpw0EA$ADzxIRl>k|
z=lxZWTI>7|K9#WMn1Y)xSkWv$<hR5K#C_iU=U))e{qh*^xK!=+vnu{DAR>A6`O<S{
z^rDS5G~iitQ$NAqJn{Q|RixGjm$bI5qE~m>o=Um6AO!vps}joM?<ey~ymUrVEj(>{
zuIOB*RkPB$<0-ivnX<!wHFE2#;ug*B1U{xp?x2BrRcDg(_1m``&z1J4ag28YeuZ~L
zCbC)gl|5{0JDis0P<?>YJcD;FiQBw&c|R2cw&{E8^FZxXb@mF@ru&kw5|_|Cl0VGZ
z{`+X)a3Wfnv@z$(;go?g;^s}z={=-7Xuak`SjdlcTYqal|8~q>w&4nEn0;3z`*H2P
z6jc>xC=SbB?D$}LXZ`2{ss|w<Y+_=iJGYBWOf+&j+`}g;cgT>bwH3w+^SJ08>T%e6
z=@@yfZ*NZzIq;-(_XbJNwy0*aZ4JkxmfSQnYGtd#dycVB+$<i>ETDRz%&LM#Awk#V
zO6PrYqww))L&Aam$>GTzmKA~298P-sOVi{Gl{;P)6a-ld#Bd5+C?66i`M^uf)%#^k
zQgO7!zR2;rzoSS%c+NX7wv?LbX|j!thgdA^`QBHa!<7kwFJ4DDv2vtn83@h4{#weM
zLRX0>;pk}gX!Fw}6Ot>A-_KCE=tWGcP!D?S&ti|56874r_`2#)h{7oV3)wEGqr3)W
z;rV6}?KgOpI~;A#MDX+Y7-%T^Y|GK9N~t10`r)LEe)s&Rb1Lw|x-WcJ@=32rf2CK(
zj(cV`=Tsh4lAOsJFLUim*K;4N^NMI8|L*XyL({aRA+eyKFJez5H|K_@$RJ0QoU7W~
z&HiZ>^T5Ul|3o(M-!N5k$f*gXUOF{=cUe9*>9tD$;fRTO?cDAj_)DhYwV(mL-hMi#
zzwfCMPwJDUrQZGixM<MPW$ENjTJPl>;SH^E?`ZayzI`OobYw%C?7H{(Xtsa6pnyQQ
zwd7)ZzoKOyK2f<NIjfcDyV%__KdMHKfBw?E9Q_)1E^b{p{P_NTrkmN+A2T(ENM>4;
zQUqAuWNwBmSxUU<f!}(S$uNXD-sTyT;n`HFxg@r|_}gG=a=4@ts34t~%0HxeQd#D;
zR%gP8t2pl+EiDi1d+{1h&;1+Jn4>r>x{+U|E*Kl$5bR~!RA2N>OM~Y4=Va6lE`t(k
z{jSD9+)A8ejZjQr+ncVL!7*xf|BfguNGg>M?GF|Q`t1tGd-gb6ksa!|CKexYEJOUv
z8Od|UWfkk8u<EzFr>8Z5Uh#a=a;nvsq@~=q@2rr8yb+!Ea{rq&-JDkwWTzv(95L7H
z?d>IF(#|p26QVG)Hz|<~MF(vxo68>-FHnZzx1wu@ALG8URutX+PJrZZW*o;<B^2YB
z&GiUmdQJt>CjRaT?sVRKe%30u)g;>QjkW>&$}X9nEKSSEFs%<&Jb9x&j{Ok(?V7Cc
znEmRCA3uInNbNuh1;lrh<5kLH4ULva-0RHDKw1Bz5n{EkEpt4UY+M*xVg$32g-IPO
zEv$7ZhC7Zo=6o-(Wq5n-DX7{}9<JtXf*}TUPG8lI#61Ko#6Bfl@r51`m>wiRm)sQZ
z{rYDSBv~`GIaxLB?3=RnA6oCDs_=K>_}7q;KIGE<@a9z6=|^=mjqdNc{58@w6D)P3
zy|P;)EME*hBTxAfgZG96NCKymH>O-@@ZaPl=_?@IQ_T12W}?lyGWD@^*a|17VPS6C
z&1(UrNo-Gx4W!5jWQh=Q+sLN`LCC{0WJF#I?B28^<{AgG-10oSuG{e6vK=PZdEZ|>
z=GI|$P0H4J>Q_KGkxT)uS40(d(7ERsRXzx{cyuf+?W0F3{#ub;<VVBBcwz*jg;xqM
zbbs&XMd+7Hukd}MMW5n4WshZ>{K*hTYKk0StDMn&u1-&~#nC6D;%~k9oTibksJJB*
zHBV`${vi7K>jxc=9o7b>jWje6#^m+W(ooD|T};tk-TkaNM9Z8zLQ7ag8aYSo<dJ||
zJbxMTWBDUj)2_W3uX_cP^7-pafY^Y3JlBuC5MAAU&vB&2*)g&;J*S%sr@+ZpJo02}
z)z1!z<PUCzVd4cp&xfjiT(2zabkW9Nh;()9`4EZyPTN`%s8f^-hnP}i$U^)JeGmA_
zKqj?k{=VNyNKNOpWvDW1y$K)4h+rri1Fh-Vu=+1uU4p|d<fR=4uF=|hh{W-{<+d%4
z`x|!u{{5V;uYU7(kFrn-g6*l_%TUP$ykG>GuMGbE1Syfz5~3&yA9+j#ilCKE*xK|#
zQJu$BIuugc?=^qfO-icwI6Jn;dGY9#Z|<Ul+o|y<LKMQqfWO<FR)s%T+>F=Kh|uQR
ztC0Lin@iC!Uh|s{It2a#4#9uODL*pd)0v+OlpG`b>?gDR)%cy!abtFtZ0)9ibuM!~
zW@fdn-x9d6XL&UBt4Hl{?2OFGi-v}VI$riWZ=0*e+DFWmL>Z*0zs<3XPb8C*zbfqG
zQx}k4ZnCR-R7oHv1((B2bX?QyBgnH7&POY^_w^aJ&tZY;zpFWPzw6I*De|&kw80@N
zFE2NFF&wLOZ*#N#o5fAT*f-VcZV}S9a`LqnxbMdJ(ZKsrq4XNMcwEKsr-aerS+UdV
zbVhb|?-P5T3QXj>b^aGoy2N#BwanErer|1g`n{4;<9&V}b`}sz**O*7Qin~6;{hMr
zl7<=S3r1wgsOjnJY5E!lDGgTaOiZfj%(HL1?YlnXT9POt(Xx`y<*&^p5cv66-?24M
z(fPE4H0=pT>P>PiHh(w89ooa0aw<YGg1c3uq^sNG?X@%$k@1NrvD8KP6!rXAgEZ^$
zl&Xlj(JN(}{m*JcGU!C~GGj9>b#1o33+_M|tnszJfw#>IRvb!}#?N<Tx9o--NA&hn
zQie2}aYP2(4;`NOo98$l5UHHS8kl~3wbs>}N53>J)_$8zlG@U@mizDSzDbihCo(YW
zqs^t66Fc#9+1|pCMc`>s^4<SO(^ZFMxio#cL8L`mIs~L!K)O4mL!`SKL8QAIX^`#?
z>5`D{Zjf%iea?G*e|VjvC=d7EyR$R%ivjybqh7vn>hc;-A^747#3&wzdz}fP^cs`#
zd^ubCi8n-_<BeaHsP0rGK9dK2<i8kUJD*ljFa=%rKoRnp3Vj9YxPRy=AQef*!pO0<
zwWT46(qIfEwX(yh{23*f3Mk-~(z&NPqStz!Og9;0YR3+X4)+_hpAs&6Nvfetv?g%J
z{u(lGGT^V&54}mc3H^1$>seg6D=d5aa^Amvtigjo3^L$dV*hGFZ#vt^5-#Qy%gsm@
zO_cO$EEX4zb{)40a?h!$_-|E?+u4^(4PzW8w$dpM56V)XS1yVFW_u@$pt78JxPW7n
zmZkO$!xMZw$BycIAFIE~__m){>=PF739>M9v0!U4jrX~R)?dZ#!VhJ<wbjr5@C=o3
zA;5fGTFbm?UOmEQrqUzxFqG4yr#htHYF4bw%b(<?#$oQ^?3R#Zqb_bs|3-sjF9Bcv
zAlKqd&X#M4M?)_oXVPA+#8*{DzT*4XnppmSmwR6pUD1|~T3m;!v_xsXuk3WGRXo@G
zetT`?id30<iF{8p3gTbC#rcyiqbxFuEy69!_ekUJRPLPIa?|yS<%+fMgX0|@(J_as
zxt}hl7<ezi>T^38>>^ErC|994D0pj-N)3<+&~W)z?%HNKbn3BNu1uqxZ>?6tya^vV
ze=NhXo4>@#k6$CuL7cofWVS(@&m1I2nQ;7O7nv@9ankCSE5{<?OW??ZxstbVq^C~&
zU-1(&+&+%wOgi%C!HduCmTF8MDx+;|Rc+^s^fu$Nmd)u!ZxR;5cs@-r<a7KzmU*rp
zGOt*8sG#s*143_|)Fk^~BT&Q{81GkW-(I*Bgx8m_F}r9Ck!o<b>DJxH7IKW+_D-0b
zzE6>Ha`GJwtI_NIns~QDUpi8H{!6s|X7}Mi_WaEJa|(1u&YXIG)`YY|(Xvqv3C_m#
z-*~fS&eF=2zkC`A!#rvNoARG=CrwUQVoYyJTbojjc<DNr^)miGmBqH1aqV#Gtq^HQ
zaAwjf#EaSXJ&n7i>Q$o-U$k;QJM!%o0zkwI=NoAItEye?i<5w)H+GN`Tw<RnV&+dG
zqqM%mRbv~Y!ogwukx|hyAI1uW`W4PffR&Dg?tgAcD=$LL)n8KiLd-ZYlG7M(Q9kQr
zcdbRoJvZZ1Kvp%rt-9Mh>3G@`9My!#oKd%VDy))Cq%4%=_E#skL{_(Co`EZ|g#?j4
zG+{fh_QsYR3ECs5)q=Nu_oA0owt=Ju{#5XMvLz+n?3MG@-*2UUDR~i75ji+dJ@MB<
zq(M0h#_yAhFNKle3g_Wg?%oTn{{l~gVa|PLm*MMkU;5AP;)6_>i4(0vUH{HNw1M>J
zzo3<!&h9&C;;_UaNz?EZ^ob1gW1Jkm0q_=dR|?FrqWTL*$LaDv%{qhT4FlmO2%84_
z%VovIgNwGzjKdUIA($vZP<;AVPIboV_OmlHkqp0^g;@Fd!%emJn&owMm-C*v-Rs*9
z*L#24BSSxI1oYn^AQyj_#h^veY3G-dO!~SoeQ20UU8RXeN7ZFuCtauwDiX(ikKIK+
z1heDwr)73Iz(_1V-JBIC3o(UPa7EaYK~<lY9@9(jtEL1HqfP3M{j-_~a|A5BP?#4_
zQxwz!Vhuq_Iw_h>P4UFC{IhRN<f_OLTq&()hTp@HK`vpV9o2+1#SXG2aisO~;F@vS
zDY6t?{#pj4>avf0CBo8a0p3ogD00xjnOce4<!v`!IJwY(!nW5eh~5@!!g$-WLiS|t
zue{Y*gV*}Acgy;srb$4H@5F*9JR>UPv<xVdQ6j1shcmu?BvZu8a6Uiv+<$S}?E%ft
zcqcrS5Q51OtkF4k`qbBq6)|20=BZ^gzdkJluAbJ7KMnDQP2rpTfdjF%d!5B#YE0zZ
zFP<|ZxsgOKzDEd0ET_Glg#5_s@}C<K!Dd*m0f2~U+vx(cfh9o^Cwh+^>U(jD7^z)j
z`sbKIlRxYcAj^{CS-=kY)WwdYJY;{ko{jUo-s|Kr;55)cGIqxzhb=dyLy{&-EtRi~
zL=Nc9c9z84tz|^dc9kO1Et#sU?%hF{CwFHQcwvTwT1MZy*&blfJoc;>aMGK|dhF$>
z-%KSc6_(%{G?+%mo(&(9r)k{Mcg0syGL~0y|6&qdz?nY8TJG5k^veufoArd1q;s1M
zV5DJxlNY8LrOXoZWp{IT-U?4mjb@*8>Z+@n(ZZ7EVoijAkVIVgLp&b;{A)^^&Ja0j
zn))v>vG$L~c3*$^!7S#|kGsGhrAc64r^jO|QoVSprRK%xlVrO1OvbKS4gSff)YLMw
zAB$yA#NlRE3Ea})`-YYWFJsBLQo|yu_?lKk-r>6>nejYn3H)<r-O*>^zq~@&S8c_&
z68V1uO)OQn=id`Y{orniHh==|HR(&KVo)7(Z%nxJ$#Aq1pJNo{?QikIGhUGgmior$
zX7RP^Beif!foAb|s!Cx_dHLAW1K=n7G<3->NysmwtEuTe@u!V8U$M4**aQ7x6ugI6
z6oq5dXpYi;(#e9gqaZDH4uVaaDfiiTr02gR$UQhx2PfHLKT6Jh$UVRJi!7I~|BYYa
z`}?)XnReFLDuSBRDsmG3&+lvBWY~!24We1#JC<?tgyQr`!`}ud5OGL<iqS->PE7vj
zO9|F#jzsMb&MNc<2s^NF%F+!*bDzXdaZpv%oYje00fGe(7SMqQ4bghxJ}my(oUL*S
zUJV#S(~QCHAO1>Yv7LkVdmQ|35soVzo)z-EcOqf-^r{Dq4(B=^5wh410`{`<p<;CT
zX$+3d?PKwqJ^m^qj9Cu5N1{C}DA$%7M6@EYLnV*zY$l>Tf5&FiWA(7JRw~Bt_wQX=
z@*20t^~~G}tgaFx%H>Qu2RV$<!+a>nW}}-IL~#yHS#(sF+Gpd7ja_C9$=$iOl5c#E
ztA(m}h<{0m6J7bvFs9DHXYTh0G&Hssq7e@6i{s<cny6~6ASQcM^3p<_Bv@oxEVQXl
zn#QvcG?|CRU;Q>&Tz!~TO*Ehi;4_fJmg(~Mev~Mos*B0W_H@>woW6uSw4{3Vn6L6w
zY#UHm$BU)EG84=oYU(u-?a1cKiLQrBGmia=ClVTBN@&9LklF-QP3=VOR39Po?TaA+
zwMfk7%67PWa~*S_!)<U9hUacJ4o(kOPObOGahU5keveRk`&L@T{!USJRh8y5>)#fl
z`5euyO|g}_i~4PWBy97*`0B6k4s!fN&f~UCHU`9gqEcEw(_~MHQ#0(#Y92|^l?!uz
zk(QVItXbf1C_64BH}#tZ9Zkj15-qQYy=gbcf0VTg`#}UP^{n@uN?Fmkk#yP)y-4iX
zSF2DdMr0cHG_eW^I=j$pKD)2+u)!qm>wkibh{*B>^b^e~swCf{@NL9Cml=r7b+e;+
z+Il(@_jD?fXX0APYtqWrH>6=%aD<r7U#MdUaWc&%)y@$mO_$>BmjzKB|8OL?lIUCc
z<hM-sCukI(Lv1ofb)HaTna#AYFQ%2xEO0eRaUM&pfB(C3H{C^b6I9%R0mW!$sITfJ
z$w%7QPY9g-&g`tNTPL-F39qrI*cD@r<PMyukFOXcUXml3cQ#G$G95dz9_UbNw*@Tq
zR`z}cS9VyFMn?25R)l;|Azz*Xy(>oLHwnkcQbXB$*#-NU9BW(D*fpS_u@We#dAl_w
zgfdau#u{YXO7gpwtVl8-G=Xbb`QLr8T<~`ldfQ%Hk#<;0X61sdUcotR4XLzBR{D@P
zcNu0s3%14|aGc;pO11*2jnh|X{o0k{8jJ$65BNam_RQXI4RG5K0l2>fvf})<w&f=;
zfPVm>In#=B7%)fV933ypuBw1BK$=Z+2rh%V9q<jzg)*nRwbp==kxu&h7PzlcYbhHW
z$&%N~hQFu7>-BQwBod1YPj{@%QfX^xeV(KKq>H3MS6Nvxh_XA*hDQE7-Z*85$B1F~
zTFtk&X<K>YLG8w!KeKBxrfSXh8!z(}U7*^JqQ*Ybht5%ADl2*%ZB#RYRDuWZda{3I
zNr-o2&J%Se1&`%+H&Rs?n63)s{7rkW__xxMp2SdBi6-5f707c%B&$?JxOe*>#L17Y
z{^0s=Xj;?y9?OX3+!f6{d<mF6I&~fS^uRV4mvKXkB!4vT=gQ;+ueU#?DU_E(XV;aB
zX1%^2wi`+`IxXO&j*Y2Eo?L?^+pw|X^A%?6_nHi;dronEl{jrcB|UP9fwZ|q#m?6<
zYAbiv?2n3ZnnbOR41piFY=|vL-b_4ueL%--Lc2*F$db0wkuE{mOQawx>x@(idT<St
zObw{ft|3~DcQ$oje&<OG;~}o*Gygh!%<=Foy!wnLEet2R`D49?#ONShJ?y8m>qJK0
zzi~Qdp?av`^o;ycHNachCXHhV+T>s`t}e~Vk|E2M#EEHnGTuqj9`@GYP#W}vE%6qF
zEfF|fsTEHU(QvS22yBV`J0aAUjq-Jxa%W5C%cf%4GkK+Zo@mz2q*jVd(ciT?@`xlv
z>tIL8oHgqdUi9Zbzv2=C;K{>W>j{cNf5&&Dv#SSYyqCjnhy4v&1Bgh^kGna_s4V}_
z1rTz)80ETnSbbS>_WQkQ4^UhZo3<<U_S;_O^R8DucIpp+EDcmDfSoW;x)t9^D7MiS
zVJ6LIF#x_>g0O+a*0e>eL7hmb7fxWO`Wy_5tX0rxzdyQe5U;b>dn;U|5<Dz_bL0lS
zD3BrUP~$*Xm33bSihQiHwMqz!hOvz(GO3PcU;B2|J)Y1(`Wi2G7Qxx2|JaF^rq-nR
zFB42tT9+X_XH=0%jWE{tt#8g#yL>lFx=7G;GL>xPKL`WjQmudL<nU3IL{FQ?rXn9+
z{>%`bomV=Q`nmatr%GNVt>0eqfxS0VBj{jRkQTq5Tpdf1()V!ojbt&c$T@gN{wZ>9
zEbrWkC1@<Q(Y_=EXfc1m4u#T<(G<Z$t9P(bC#hZ8<q6G(>7}};gC(QFqJtlIJ(M&s
zkLVpjcu<eMS$g=dJx&xIt?U3CcN~DW%mJ*T!=iPA+>och6sAbF`9fanX8Z9#aMTmx
zi`OU`Tz$3g_zp_}4tp?TIX|P=Wb(5Xjcy>0kIiiphFWKR-q6c34!I4|n11Z~fJaSb
zet*9(DtTY7hc_?ZP0m)XC5KW<hB$sDUCDXhkD*FBo>R#)?C4`5I2<=~$=+D{RG12k
z;yoDU0<zqIfUVKwHWF{GL3fECxe+44{QT@?D01_!k3{e^oOoVMjA&<meMYvHp}et1
z_s3xmVl3xolq-OdD0ua@*$@AogEO)S$$*UM4-VfCvw|F?Jy}}*Y8uIm_xsXxl(G+;
zL;vIuy7=p;Fr4^T3#Z}|_d;ndx`_NgPG+dyZJ&@On*W>FRE@MqIo7+Q#fKr_SRd3Q
zmJ#`eFxrd(gTNO0c3}N~vF&5m;J3TDtjEAK+x8T|65cG+ZE;DzFa~h0^Wh9|3&a3j
z;9yc8Zd=dAMF3a=lI*{3RAv?clmp(!e6=a+sn1OdXl?-q42N!HnzKn6=84(a{#>yz
z?yG5)6fWD$MHhBL)1L3>t#><VpD12Oa&I6-3S4V8Al;4_zYoEJ6t;5{G0Fqd1N6DH
z2R~8vr0AUba0C4sg5l*<@;<8v9<wMt@t@bgK<6>d36!vyAbs$`IUq+>S#Tb~=~bAr
zD-s^f|BZ4RIsV3fr)ttQ-%3DY%Z+?a2o?qIm7gQ6owCL#G`g6G_`<%j8P}(=KGh3y
zKX0Bp`ky~G(isH{f}o5F41QOd`Eo{_!SMAiUn>O_lw~~A(^~D$cA?N5t5p?SnPI`O
zFm{1>R2Ia1N&<&DC$A8wn`CMmnMI%Jc>4cKS<RT~eXUjAcC9V=xc=%Qh}p{@0PQ!Y
z0DgZwG85XoKddmjJKqLEdKREv1qzDsR1V9zHcu{mJ-78nPT#v>{c8J7ahr}Om-iiy
z4kR9X<Ttjfw|!iV|N7pBe4?O?lk$1oB!xr{$0hszD$ra8&0^P8Z?0Kg*X8h0Xupy<
zjr^eK8lQ~W{v6m4Qe>|p(b4<`rk#TJ5pQ2l+4YrWVO41Tw8BXox}|Fa?dT8i-vi?r
z#|D+(v+oXh#*C$l!UyPNkZ@;P7T*@3ElRNKE#Z^pB9G}r=P2FWbo(Rt_G0jlh>03j
z*%9r*n!^tVUK6rfHUz^(SHX3x5K|<^v|6SNA;X(QIjrHlub{>;99Bm>CrJ*oGf7SH
zqaRq_!i2waw6*5T5+4^NYU|U0!+4Lwd(-YX4~da*=^|<!&sP<9*9{$`K<{w3Fx$>q
zPR82_sMzCIv-;z^LND%mK6j?|Hp@eRFFMvQc+Ki^vJ_Ijn9~m`c>c$;9;@Z%geiGG
z^q$I##xk9zSOL%LO6x_=F`iu_NQ`wknuG2H^w~k68XR%$c+k!CIG_V9u5rk>D(`sq
z0M1HEe^X>u=2KMN3S@T2mwDf%Z1B{pFpLC8DU<m}Qx{J$S+HyU{kmq6Pm{b<aCvjj
zC~P#9qOGf3`g!`R%j+(2Yl~p(j|r`R@YiVgMEPrJ20j*4!f2r%{jhv&;_GbIiwIn|
z#UmALC6K0TWhtvN)TFXT$;u>wvYU(#=16-MXMds2h)Vf}$r7)AI}wR_bA&VzlP;hV
z2Ze^rj8XFFtZ2%jo6#-_CyaaGB&C~ew+uxZOwAjIqTssEzxazS-faACu{StsKFIff
zR}W76%hPE~hwXfI5J*lMUu;vUf!YaS4V+_2tPrxmwbke2)!RbfdzyON)iJ;;hSqFx
zVM@~Tperc>B%mYSkL~w=E1Hip9LIRP)=~Az@WP<a*1o-}HtIzg<K9FAj2Qq}48Ly)
z2b9k&Ac%21s_H+v`Nuc+R^!gy3{%&?fm!+M`wt~_#tXeS@ox&AL!m&k?1v{YyD|x3
z3@c&)3z0Gv*gi&<OPrDtHGe}2lq<vP2=TtE$<`T4OZDNHpgqEj)j;{1Oe@}weKr&c
zHJQfwgd?d?gC~EvCy5tu@GJwpKFvNytaj%dCI0!N-J_APE-rSdh6{h*VqZ?Z_fsmi
zBVuSJsu3cNkln^de^EByYK9*oe!Uzh8l=n^LmSQDj?4?pDeGi`|IX18C~AN|iB9*I
z!AG}L>Ul2z!Yj*L!h!bP60e!d0rSh3=bZ4!yl!T`0610dFaF(LR!>~U0hMrCmM;j`
zx5+JbRzxU>-%oriyIuW=*6hOTvBTa!oY_}yz>?lVCFwXIuRdQl+yTo(fE~+lpEPbl
z0TkQt>HZ2T-RFKkE^`@#1RxNE+O)A8fN@PeJUD=AIgw;1-2SVfZwk<w)=lEm<jZB?
zuhaOHj7$sD4a1(#O-=6?Pj&n68Z#{};D$-@)~=1LGK;4Y+rQ3S5!QU7oSu<ktxO3H
zEg$}2gC6N?1sk%)6un?SS)g@#0|qd#hceRIlb;v+-P*)HY{TaI_vKpdwbA!2U#6+X
z?DBJ+3q#zQSNpBpMRB!jVYV7POBDpXo6(bFX1Z}~2t{Jrbs3NA4+1vqW@Sk9F72A@
zeexPhjFUXGraS$1d)+xM#tN~eb?-~f%Ffp}6@M%Can}Admj+SpnNQL9M4L4D0}g9n
zi!Mn!X+y8o;3x_v7rc^aBb~Y`ByZU<Ovuiij8U8jIvo<i$uSsCfELT$n(<}%b*BGW
z^fHeU8u|*+u*#8i%9y*f(pnJ~T7YhSRy~dCR{a8&;GbK1KZ$3!>2TtIoznW2%}va2
z$yB8yU^Pc^HFUaK-7gv+&xe~1%Uf<|8rsD`kL<f)v?L|eEyUTE5l>V)B$pP6ocn(7
zC#xKGvQ=`b4L^YPEN9=pZ@)Eq-*%}CPJps>Af{$wZ&g3ZVZFfY8q#h-{2mMpo1xB#
zumL`9i?La>U;GlLver!cOG~KVm{LNE3&Lxd=cfTB3PidjWjM{mwWtReB;;T&%XB5K
zhKpx<qZgd&DSxuR%W){zf?SUx3pFTAyLmyP{b-n6aO2MK1d<kIRx@r2qWjkTi}`Hv
zib`sYU$qD~p)&a^E-ZNq`*z49I$VQ)e6?E$KtqS#UtK$2|0(Gf6-xh+z7e7E30Y1u
zkb<>}xc)gt;L<?qh&>j@==(?b@DB@gE~k4uur=yI2I&rK-0nn0FwM#VS*~!=bHcdL
zy+X){40-9i+*GSss?gA(rCGbiUs$GSob;8~R|JFuqv6E1p?w91ddiX$K`^}oDel<X
z8riyCG7#50Oe;w=y`BgBL-e<tm>9tq;@Tb0_lE$=yZ|vj1JI_xKXE^<n_b%FkoyVz
z001F49U!xG50lHwqu<frLQc!lG`9+ndj!ww`(^;JQ?5h+3do!X`y$?IRT;tmm#nn@
zLjVqa>24b#?Cjy@U4TtyHCO2m6sh(~n~A1X=2O(bx{mv9E3O~cHS-7Q^I7oYVaGE=
z6#w~;2dxh&CLmn_E@Z)gc&uKg*~QvV#I`>V#5;PvPtM>ULL?YKfoe99E9TyRantdV
z;l?iykWs)F=7&(^+GjxqG|b!~%TEFkqsPhfh6JJF;~lkp1IbktxZ$xv;DGW9a@Ya8
z0q^k{{OHIC-vH3RF7{>(z%Ik!5B^7zbDre&6WFzFzoTAku%$_m2A)`B|N2Q4xQR0B
zp8$uF(`xP`pk4w36p`ZyE2Dn9*TQZ=4>0bAq-HdkZbmEaULZ4u;LYH76kyx>JbAc!
z<Cs>zffTHeB?NaurNv)*J+FUaXgzCNaSDLg94DPW*J%F^ppHo7m?GdsX*E010p@MS
zv>q_X1Gp)`oWJ&kKS%Z$0lq8}dE@Oy7!WV*zMTEC^@dpDwS#LMSc7&03?ID{910o0
z$W@lkiwC6+pn$qRUCj=AxT>gN7iPE+R%wRUjPZu`VDO<;+8X9hnFC4mT)p+@D+C>?
zuP`oDIN~2*L{YE`n7l@5$8i-4KYo<YV$cv%u7Gl6-xjY6%D+-d$V1MFHo{j)|7Lt`
zbbfox`!*Mb%E@;c>R&xyf8lh$@b_>kH@5P#FRGX*3A;0k1RC?K6iAkK|DdhpRUPiu
zZm5qX=YM_+n1R8yr~1QJa&{Cg{TWl<;%fZF33JRB!|!vT;PopwOvU5emKsx8(B}v#
zpe(?Ve9ZnPiXQ5xDSj5VOm3v*ACxDakSBBL$Nl<3*xAG1;mJ>2@>?4+tJ!s$Bim|!
z7rLBe%Rmz*f%-pl7w-&{laGTuv|=Z_vV#htev&6`&w|MUBSo;k+2u&ww_eYbX|e6w
zf>$J<e>bF&EaWS=SZ}=v<bquII(E>&-+Qm|QXX{o^q<d@ytWzi*Hg?g*UM_gHX#2E
zWhftb7z4H@g<gB2q!a`rSuY#)i#D6>4=0<ziE1lt^>HU&+;+Iy?QT1%3G({xXFJA#
zI|c7PpYPM5zE;f_bl~N@o%q>iU8Cb37uX~6MG0IJ;OjcYJhaW~c}9S2AQ&WOS-=ZB
z{$uA15&)hXL5lCblVUd4?(HdzIMD2TcsuL+q;b-G_<l_2#jBIk!58EYYv5;id$Zhf
z+Mee7yzA?NzFE<BO$d1=z%|%xzxj==&}a?K56Xwuo3>Npn+%{g&Ukq~efe^A@OQH>
z{S;vcoLIz_hn1ThXH`i$h=Q^o9$$y$<N{ls+aK8K{NXS%`|t+vi`}mNYG{FE1~})j
z-z+)40t^V(Fs<Eh@v)FAcyni|3wUKOH@_}0cjzZ}mDL^wcR(Z<c2f$-s~u~6oVF_?
z9J4w-GR$r4Cl`<hm3iNkx8^tny4WCPhliy4kYDW=8K;0+y!R}SHhSI(`|2#8**m1;
zahn+ojk$Qf>Bs`|z$ntEJqdSWcdjKnSm2<{eY5H-6in<wV3ux=U~rmZ#|;Gcuk>%b
z@!ie`X=nsIhnX7edeFHbNAspeL*F}r^l=Sw2_*ZJaf8+U+8B((@*y}#aE-_Hi;|RB
zUYl=M4Z!kvUH&B99$_1YTU&j3TD=1*p>dE%yKjWhG~OSS-hojQ>UOJJGHM%MJCBb^
zzIXUwll1^2h%xXBFp3}d6?`{RErF-o%&&6n`lVW$2Uuve>T-qMJLrfX^dYLOuWTlg
z`KFXu;p(v^R45PF>*aPWT5~09VPMoh`fZmDvQK<UHB#8tX`(VJ<ie(`ftgZ8k`(@0
z&$r#v&nRjj;rl)QJnZMf_+lMChDnjgWQOAH`WP8ByiJLUITO)lY$*>p@{cApyU!{^
zGUqiy-?LFdGB@D|%Q}B!I3eBAabUdpu{6pnCcU8*bZU#LTp@)pHB`neOtM;7W35S*
zClYb+)&+xP&cTNDyd0jAk$_bgdz**Kr<zT2C^|Wj%oJ}%jd87oFJNSfjFxOGcp5)&
z9JeG&;~u8$(EkU@F4IUQi;>}->ZxD1qV~E*i|D5*JS~|Vy#%z;QA!fQYuGQcb+O9l
zK!sWJWt4E`y1v1FT;S#mtoRw&wr45hUf&`6y5*!9wNhmQ5TLkE<)G-cDb2nc8+7a!
zq}>Ab%jml({%%mP^_mxE^r>m+_5y!u$Ljf`ym}y7)G_P2X}=Kw*I*%W@aCtpnO6fh
zuDgYfGA4j|{3H(?=s*Gp1-YD<KG!$;%1~lK@OmAeyKP3zO0rWv#^H^VmeZX8)c0$-
zVecu3FpN{FfdZ5AX4#cA>tSivZ9NFz?eSlK(_U^^Be;6q!3kO3tpJq|U^6v>?eF%l
z7r*J{`GE(dJ9le{LIdFPHh9}-+j7jQUZFP%<d`autKu2lmtml8_2)Zno#OCtb#ZAH
zD?SAk7UYYN$`uOS)EIz*brER@>?3We_94dy^c4Pq)0zZs|1)q-8pR5;JWqJ=7&SYC
z@bw13IY-CQI5hzxbRDM&vD&)2e}6<@LHZ46AQQv(gM8G}3WzF=U}i(pPO2rub-f>i
zKYYQ5`Eduxi<mQx!nInhW^BJ3wLJ&Dh$@$3ZEzd*e|sf3Kv$UFou8yP)Y;it>z$4=
zMsV74DhRO#l4~|yC-}!SoC~aI6udUDLGkWu09avh3(|#C{Li<W;9gC41&17-7%Jw6
zZt^;??%D#E+NM7u$=X!bGyCe@m}?_=yFkg)-*}4eC|ZyT*m6*tI}nk;TboAzzoi<E
z+X))y2c;DbeQ(U{K*Or}!2?jbb-b_sJ_1X5M8LEwhwYWY&WpD>C@WrZ2d~Kiw2{v@
zNnetIod_T<{uhaM1#fu3{~r|$I)I*aY$3%*H7n!3XRI`$G!MlXc8geT=Pwzg@=ga%
z-^;3qnq7WD@dR&6_Fx@_(peGyndj6=#-oF&PBb|DF_Z?&%V$g|tkh2b!D3<GkQ2k=
zGJFiZw20b3MtS|k2KnDrgNPN;b)6z<uQ4N`{7KS4<GjYBS!VQTP*HWrVG+vLKUC-?
zpBVfR#!T|^*lbsPr6XzJ)dsJN`qIAf+G3dFyj{&kGQ4o>EoP4M#7~H@R?yiVC6Dk$
zvv~aK`Gb!$*v&%U?@5bkcxU26|4*|Q6XFBVWT}50!G*}NLNqx?qA^dX_GS#n*6sSF
zOL|-@0tKCL`hG0x8_IX8IbszzM>`95d<XWpq0onuq2tZvkbUe14$D}C<3Dyj+^t1F
z!dN#ADnim#y5Jnqx6d{-#VdIVsyWK5-8DCV2}fAh<h`hRz;@)`?A)~yUXh!@Xra3F
ztJU|Z@=82u69p4~IYNNU;sW<9EtLMt{VdOJx}%QhQ7A6MT&+1a<kD>dA$5YC$(E3m
zAggfoZu#!6zM(Aw+=--sj8!<*x+`$H4r~p#W%Tc{Ah{vPz&0A|K_gfQw*Iv<@N0u;
zpLJZ9oy8)|nlEL>R-<?iXrb!CydXdy1}r&E#y-g4wNCl{TGvxOF7(7rKkk3Ou~ApF
z-64ncX2layv*;ko&?js(AC|wvL^>;0@D2q!mqv4665mIAJ@;)~dER|Wm*a(AVB2{&
zT`1TKTu==GWUSfd!2#0ZC8sGl{)eL)9hZ62R!{`Bo)0m2U6tx@gZu%wN~!<S?sXg&
z=m#!J!)AgPZzB0lxB<Oy6!sN=CnR#!&FY1fl%zWh(fxPvfb(Fm+V0J$RSowa_zv_G
zuf>!g%LPd(Ah*%!N+RSD?k1Dke<yW3nwB?ipg_8TkUSR1`Csv$L47y0+s{wh$-o}2
z1`1K&9)>J8<#te92z3kEL48z7IsxT}jGybn!3d1=exc`>U!NPQ9k!)!JRxcPT_>U-
z)JLFWg1k1+6(a`FP5(u^EeJu6dlEt$kaT8<p&}2+PawI%SN0)rxBrGbqG^&Y=Qar)
z-YQ5dAd4eGWFPpDQq%Dt1;P$|{`oi3M?X+=+W{YjC`T-w2Hx`K1A4HO=PUA<B8^!c
zr{1SO!6ZSDQi1Ow0TSMZB?nZY+pbrjgV)Yl{E96I)CB1u)wNWXE{71OBuLx?K@JbX
z-$!nuop+>nNI>lM0jN~~Ob-+v|2BF7y(g!HzM}n}9*C>nXF&9akaz>qfvd*)S#WiO
zO}San68eb(vW;3npQjMmUdU35f@%bazsQL&YQ9cTka%AzIqCMZ%c1C&sKT|*Rn!QS
z7;2wQh$E@1xl}vSrT8H~Fp+mTeeR5v7yPRAXR#cKfCEn9cz8Kr7|B9SsvE1uL#^@8
z+$7#(zQS&TPf!8}3QfJjb7T-OZ+#ok-wcChVzpi}V#N*hyE@=yAMH;@qL=1Z#w@O@
zCGMwjE{v{ZrE<l$C@%0{1~plCyecX4(7KAqS$3;;51;~LRor%mBC<rzT*jAOWqn;T
z0%i+#(+VDvQ10J-RLirB9(>7%6_sLu7xAMKJ6Td6mt^7}GdJ_jXeJrk#bC^nxuF@e
zEf%Q8OzyT(U|PR(nE4Ag>rYns(7{l0-r$oFO+AmPB1Mp0Vs~!Xn=$Ag-u-ffL>{<y
z)WfbG1Z83~o)m@uz@fMI8ct1-GLOkW2K9*c%Ta!)4a|V#t%_N6B?a<9KoonzwU^W?
z@r~cHQ`Jq}shfRt-J0t%ujd9mG<<69<Zj>4Q{3~&J*{6j{P+!mTd}^WX#@ey>B+0-
z;xG~C9ct8^;Gs-4+}nP?zQorF*(j%0qaTD!45bFl=xa>M1>uc%7iCCv?mC7hsK1;L
zBgWQ<8Qio?uR)U^QS73e^$QY<ih#b>q~SwO9Q?}ZozTs72t{($H$$n<k*}NBH!)=C
zDz7cM```vxwN%NDp7ehkUo%NdkxAH=2wEfl;o-)KnT4jvclw!E_)SH}jrVA&DK41A
z;|C;x2fN=j-T!*vUDQ@dY5B$=O_m8LG{6xn8#qxoX;A4P!$GO?xUvwuu8OjU++x7W
z4iuaHm?-neyg&z~*<eEq*|=FxM`m|}3O?Py)m0R7Hg5Q)B3hEi6$GwSAw?e~>o_PW
z1?GZ)KnNU7AN!oKtQKlRYQ_cA6ls40yHkw0F+Z;c*$*jbApeA2%Q*Ead}ylC%X-|R
zStcYFL6R4Ga)JUH3Ibwc)6KpJsB}<+fGh#J(OEP7l;;B04s^wvp1|4JfjkI)$1Oac
z%MpP_3y?**==jnU82&eM351YtaGMj`wEhzZ89v&axH}?{DMD_3NEt<*ArI?vpa0pW
z30_R8Z`mm22L$5q+F{FD2c^CMxRgoKIIUrh+P1kDAlI@KO~!A4oESrbEHK%dud~Dl
z*l>HA18O!HatB#pkf|A@epzt_f-zVtL=7N7<cpHGs05u-jb3O6knUfAxi;rLq|edJ
z00V%(*#U#+5IceCHxH;;`gxFvBvwI5Ck1G$6C8X%Yy-&~JofV+K`yxsmXR8`ks7b0
zlCk-N9@ln0Iv`m;%nGv*Y=LBGbP+PZlqSPzvT+>7zcY^$rL6|EBz{Kn4msO+0G={8
zNN=E*1kh|kPwZbh%v25gX(dr-a5h>eFFqnKX(4sB2X-%v8WOU%<UGYlo6`3*ns@oy
zqE2O1t9+>4i;W2{jB3hKURc#7RbgdHqvH`nry?6}mDZ=r!+}>xp_>b8L4DKZ)4(_2
z*v2c|o0)BpfRJs(@S~TV#&IRgOO%D`D7+Z3&j~RDsdaXSv@2<HD5yTMhq;sU*f4mk
zf09nZzEb(+j0+^#E@T-nEKP=USD30sbYZo{**}`h^RSF$=f}KbOIx-Y4Fy&+&W=7x
zuRquZy^)ksUDp+ps+9G}M<E!YJ&G(&##-oM8B}NJ%L;31bqut8o#}IR6G89w@05cU
zM$p~K&-H!r<5M%t6J5}<eMFu)pPw2fE9`Zj8V<QuHcIz3yz<E$jv*zJIl7FECQoSj
zkAT{rn1OyusO=p*f$;%^y63%{*uU8}ZI!r_2R>76(Rit>Wj0Vag-0rAQ9F*I4CE&}
zw0ekPP?~n@f9FtzK9yX2MTOQi?i#2ml<nvi(Y-65)*Rt=kU-(!h1J4*GCH`?tadOB
zRqwwy+898dQ~c>WC#$<Ad4dw}>J|5-N`Zl`NKYlBHg3?sv?=i`I$8>vVA-2qn2V>q
zax&)Yh(Wrbt&}ENQg2o0u`Y(%=k7$lwuc@%C+0K$Lu8+`=Q219f$=20B9nfLpr>SW
z?&@fm%R*ee&=U5qv45GaSJ99kj1F$0;E;cjhy9>_BaQ0Q`tFmf_ImAIfTd%~@PkS~
zPF|?8m_iVhq5H3tpR0%nqnLBG7sIyGm|T?j%u}~(^n7&&bas2T?fvJYqAN}R_7vi7
zYu*`qwyz~LaiQ-$`lh`L3|xD6Oc!a*K9FikYa-UUSSR5Me`i_Im-}PwJ2uV&``(XI
zucs>{E$nhW`Aswh^Yy1X8YlSsNt70BLdmbl+JQroh`hQl)QDy{_mKm?=BM=qF~<hF
z!do$-ffotL(K^8NJygPb^d5u3SgLfF1@ozbZlDSQUxXM!ZpW*f7qHxeq@nY(ZCfuu
zEvfSt=-{!@)gyCiN=w63>9+}^fydAYq+CPiTIz(<4`3p|9f0$OA=73+u;f4Xgor^;
zPWo8-=)LxvGr6uNzya<Dt;?eVXAe+%L*0TRV-y%H*n++!IK@Ee5Z!UE0`vb|0Eio>
z2^igE6A>B$QXUB1q`$F>CkqyobOuYQ@ZW?1H4|_dsR6_6`$NcgCo^<l_!X_(T<lA<
z*$!J+)H*c=($fFRIS^zats9WNz=3Xx*Wu^Yjq2g|juG3S0Sf|s5>Rk64m%u1$(B{E
z6QpJQbL;<*pkf13jdbtp`I8lgd#-<Y5h|8cDYCKB`9L}Vrh2;TG;`?A>e5WEw0U0O
zwYU;}TDNo4A$8W3m|_QYs&23j)$MN@LC9K+5W6mQfmo$gSw8?4vk_#Ick|;uePGF!
z-B&9OVSR6VNFmKao}Falp@SX}FL^e@+1zf|gMpoVCI{kxKMFFX9LvS0MZRt$LG`DZ
z%Dz=kr}%W(&e%VgI9^5b&n@IstfIM6d<S@Bss2JIQ&M?U$W!mdQ&?y^xJ>88Po(>=
zN_C-epe1U*wzkZu53CX?yZ@r`qlbevm()sp8$!%MS+ISnU)io;{80HjHXz(k;+{8T
zkP&&@>2SIzHZ^;#m4$uu;|#}Cz^cn$UQO&B2VoMY{B1yBII=zLB)nHhL13V{Ax<$;
zxVC|84GCWK2P*q<B0LcZ<@NCvXIW%3W(_S4!<CKiYsMA9UzOlq?9=-msN+*ZP2pbd
zz93BnURMs%$x$cbSVb}9ogdNoB^xrB^ir6LED<=4^oH;6l95u@D{<OLhVySo=LW(A
z)dV$?-)@hQrNWrKSK;9_!JmBgmhjVeEoHY*tQh;%U9Bu)D}6RF24xRBX05?ZVU-nv
z@5yl;IVaEE6Hm#H8eQ49m+*T9QEDpk+(mJp`(>{1Tu6YhApWt#R>R%Q<Z+b`8)f{Y
z%G6&#DyWuDiY{LQ8ZY2?W^gk3@WPbAxJ-9+G8W15&p6JtydZiS@d8O>TJkU2MY**|
z{O^Dmm?r}+R^MqaTpI>gs?DO}@2WW5I0iyU%GnE1j-%AiDciv!x;K(Nw$})M9EQwi
z;!OmUa)S)CV;lV~;iDz1-`xH!C#wqI3OA)MeP{e=JRF_JQFhkl@qlM)ro0}aD-3tG
zPT18-aoSYK#L)1n&6rg5dwP$4C|yL5-rPf{%~T7a)^O&jEOhr}gJ8s{Xza|7p{Ccu
zPl3a0TBukBPW!*$ts+rJ1fXSQ&adI=Pm|YZe|&*gIQbj$HWqu4eMRZ>5)YfBw{swl
zOd>Vk(eN+C&fx9<&Se)EfyMl(I92Xe;msw47G_(t;?@=1k4U$YZQ1g5;|C2~i1SG?
zOKy#Q8#_QiUHQ&*;XH%0*Mk7-gZOj8n07P^<A^yw?F0pimcrJq;5BLPm#f?aCghI&
zKdir4J)$|O%$PKI=&d?v;(+AD-PKcdy-8PCWI=OHWGKL&J?~dle4>)5QNo%%F$Wd0
zaX3br^7;LbZb?o*Dk`&IikdM$gOy-m#Kr=rlJ-q@z3gt?m!kLBC<CRk%~a2XS^_jX
z%L4;UrLeTz>OU9iCFRN9I#<ZgIqrN=c94&ga#Z|afrRjZ#X{TU7pwSfntaBp3;m}O
zw>oYnDoZ>9&5yp?EEXh^xU7`+Q!AFeE(_~nkz<C^MK^zx*b1`{x00X9sq)Ysnp#zu
z*9OO+V8U=C2v5svh@<$#RWlc{$>Kc+fCj8`2;MosK;A|!8LL=w=E}nAty0*GfQ-MP
z6o%!bo2|V5$j)VXEyqr64Aw@097F5w%7pGszqO11R*W~{AO^9nq`W!1`3x0Dvo$jv
z?Ry!?H;tEH=sirOB$Kb36*%Bjx-CUfp5A=M<FgFvUdP_zl3C!)X?ip1vPV{70UWg<
zl_2P1i%qdRfm#q%1PSmZN;N8WYNzBK(T7xorT4`}nROajPF;Z%S?8x?Rf9sPzsjtE
zo7)`6LE)77irXf-?9>+MrhS}Z=dySJU8=RbIC=WLTYI}z@0)+1l-!~qK5jw=^EsDW
zTqj+b8`oe+fM638@qLiQ5ERZbKvg)L$h7Hw>PZTImi&&6RcB5}=IVZ!<)`}&l4}#$
z|9-tmBqcxUZ^z9~C#MNKTjK@|wD16N#C`Fva|Uoq068)EolQiP$d!7!3#4NPsmB`(
z?g0UD6yjb!#(yDe1DbR<iGv^j55ATl5(a$!?}nIB{r-b>s_FJLSjddSsV>Asklq@k
zWCz9+FqjeITJ2tUdDj^{Y-q4k?0_Q<1{sH^v56@y+4DW6BboWI%cWgffqD{%n&>g2
zfiAHc)B+Gnfg>o>s{um@%;UNcFYrB&%i1y%dUylrG@50)Q;Q%$7NI~cO|#~U+<YV|
z`R_)@t)Fg^WMq`5`X%_XQWQ-7nPzE+sq)sE_7S?uB9tukS$MPbUPZd1uNC7Sbd+!?
zaxj%+rjZ--$X`CoArD(5?7u@S6t?CLryyH<<5#{v9jN96KUNNZ%8?jOr=_}U{|5nP
zI*^?meysHEstPng;`fxdu2r?!Mo2#bMfL~-MdHRQ%2DK#D<<(?v*Y{-)csqAg@U7R
z+Z$imDxaZTSuz24s#V5nA(_Y_FMwZ?tBj4YSZrK7SW2V{udib+LCsu|^*9s#zVV)g
zqkmO(_UqkPWkw?1YUWUCWl@2fhwD8|!X}}ZO=KXG4p#{kKJ^rC*MU3zFY=fM$wIoE
z_v5@{R7m7SVS!5UUz<mNvP0WiswI<Fy_?)gEwYmMf`TR^k&Go*_rMfLMu4P@Ju`vR
zSZVX<!%8&`gyfp|t!6PpU)ATC&9U=v7=OsF0KGUCau6m=WLW1vkz9s=RC+3CjN-E~
zc=}R8+{D=g2~!1I6`uO8_i#3vyv^{<g3h1opu4t*odf-5Qa4sTKZQxlN#e*=v{q7<
zUH<v$h;L~kyC`a)&5QMbHx^2btjxl!a~p_iVE%0=f0$a_aN(g$Jz;Ze#enRiwdOk_
zEbbdPT(SK3L_^C6%$cd_%^x=#%hGXh50~4N19EZn)7#X8U+Q#3k&^MNNGB8a?<80-
zv)`A!MhR{Gu*Et`kACm=Op)I?!joLf4-<qLAjuubLdG&Gcy46a20k#ag+zJbF-LRm
z@HK+r%gH4b5sESmm2gxqf42Gbcig}a1(xi=ID)l0RHoAEvOPB#R9{ijCkpJnesXO4
z#A6Y{NbB`Xv0mU1ioOVW+LGvNs4*W2Ssu5IM>k2O=E!MF?<Q)azz^k(DAFQP#Gz?C
z4eru!h`pZ;xuN$^@`CQw@@=I!gBfa*4bApbOd0y4{Js8<yW}FJbzHr9UtGFj#Z8p=
z^;n8yY=NBiOwaCDk8458ing7)_BbAcj;J{_0w_=6gsyO-m<g(=FK+u`!rx7oqmdC{
zXj}AqmhcZ(89X=D6cuADUs&YAa$(Gv_LY&+mC&9z^#c@Dm3WX=E6l>V_GOf+?fW_N
z)9Zf4aq0+m{Z4Y_$vYJ~r!wfa=b)Z?aJZ>6lt9iM`e$IT`bVpf`=Ugp8@}@@E^b!i
zq<yf>M?Az4Q=q)4BRi7VQW2YA#Vs0!pM3Z?nJgjOf_hmK!Q->BNO6}K_WiITbXQ=z
z;%C~sqfgkP*QY`OiEGo^aha0W$&9R@G3&Z^Q_^@n12zo)SRU2~7-X+@|EyND8-z0x
zGGnms#A(mvyv6SG(@Ka{?vRR6zznZI4uuUEm$lRJoTV;nQy?_M8p`G8TKXLyV$HXm
zVZsw9@(0W7gZT^jUKl<7hj0xl=q`8XFM8cNuC3MBeWjo1VFH4jsrNK35QD-)`%PZ^
z0i`*FYXlip50XV-slFoYK#OBEnJ2Xbyvt?NxmhcMX_}{p@ciyRuD{QH7xN=27?`I(
zP`d2Zi0Lw|8g77-Xw_T20iCp!E^kg?`qpK@BBcrDiGAohzTKh`Lhh_K9fp(yz)Ipx
z!?Md;;En_gTB5-G%qalwY5f48AQ-h1TkF(G_X4qAnh?XM^3EDnlcAqK!4@$f_)haj
z3daroH$d+NN{P9m9}g=!<bmn=#m=ZG=*3{3Erc;F4J1Z&wMNOX5R9}4P{-~I=yF-G
zRI<ERxijG_I^Nq`i;*BP1VEl%dMeu(_cy?*0t!}ukON~+M-W*rB5b|BK^ix{z>9u-
zZVZIiKH%TxzUGg33li<`%0rGxiK7mbq=9=vUh=hWlW5_}YjL5dg?H7N2J{28P&_3I
z2I^|pqW=a>5hUVyH>*9ll{WAh`6me#h1Dibd%TN~)<U%+p_ZKe;aCd$OkA9Ib{X!o
z%%!<=#<m)v+0~X@-+YA?)FFU*pLuXGM2p)2O*8wMXW4g@)Yc*V!-d|_whC#!PgnQg
z)7?<Q^>3`fTcbjo5BYD{*Q-;W%Vxv80#FOl%9f%fjh1(M-eCWtWV5+xLujG9cx$Ju
zdws8nwvzW`paNPkHZBg{JWuaj`Q9>7&a|!isn8@h9lrC)`OVC27R#(3bWgs`5GQLa
zQf0D)roX@zT$+J%2Un;tBKUo)$NbHGa7?~$ql!$5{@MJSXW^UP&6}SY`GSY7?>0BZ
zzw1iW-%Faf)D4c5j2N4h-}U@zL8gi={qoS}dA>a&g}kXdx&Qe*E8y|4mY=N2N&K8U
zZT!*RD`W66{zkf4aP0TkhYpj{hYuZV-NkpKq#?4TmMnFXk`}?=%xUDkxlIq-Wn!qf
zFOV3Rm@i7?1lRXi+%0pff(9=#`?}0P8;im{d?4_%e#*iG^5(_i&HPMJNM58yDy)Ry
zRSdIor&$5jcvta-d%@M;Y1Y_x!;kKD8C`q0>8>W%S09bEVH%zKI>!~Bo)oY7#S+&p
zLtsx!#Tkkr53;#AWN<@Td>B7nZ*P*)^&9QsqUJ*LwBQ6WqkTulCvoo-6?+)%K-!Vv
z4?1G;F|GYGOY5EX`M&wS=dh#0<sqz8`Ta=k%K)dI8m=@Rx}ick-LGaNsnOcYr{JB=
zH9~9AB{|(kQmu%dAWR*AB?9Nxi@l#w9>FtU2E-03z^}7w2lL}s{C&Xm<33M7i~^%#
zaDbC6yvd5bPEI#K$OIY`7}U$w0E-1epefU=g6U!TA^sB>1zr>iJ%um>iXkk@$M*At
z3Ohh_E%bf?stbFNYEXNF@yp*02^e*or%@%n0h8!88bS)MGd&bwGhQ!=j3%@8Of@P{
zrI8If%WZ3uP-2Dkp51v7e2@8SL<6Rdn2nPF5tL8Is*Dh8q%cMKa2W(uM`dn}hP34Y
z9fAwY-kk)@9e@P%L6R#7(^Q`CxHfOKnN@qKm1;vf5m-nbC-pUeStVe?8k2X!qs}lo
zm!|WEcfB=s2zR6#aD-<oI88LB#FC=7&Bp<t-Wy}*xq3Nv!rpr8)3{$=*uL)q*C2Qh
zBqIdWC_%@ak-^&*Fa*{IRK^Z})inM=9v46i7ogFP;|FZdcf91^gR~WpZdTaXzlj##
zQgRQ>+hnixr{G!6b2cM}%BEFz!C901g3d&w@9{tP&R$_yZFctN+@+XBxHqW%nR6Lp
z6+Yv_PH^4896^0PdecQx(Ym`cA#|Pe<Xz$QAn_Yv6^m%qpDG*SdM&ljuAc3t%~dRS
zX}LAHuV~cSG$>s^MyuG&_dIrgC1>VW+NoR)ONaHmgyqOi$s`QkPRgk)()XE)SJRIl
z;yIf-tlxe1HW?bW`(>_*bIp(H`SNN&n*00(F{L`+!kh5=2|8T6s0}_~s+#gkPo&Vt
zS^X|wqUjYVb<=YuJ|9%w@o$U)_xgUeRy%o6^(!#zqnOF?o6q<JDGeU(RUM9dmDdl3
z>Ej%TNjYV%!MSB0h}@-28UsJxu`DEfMd-<CE+>*B8hN_==zaY0s@H#@@tMN0gDB$3
zjOB_o-811^)ytpDi%AS_&tDfe*fr~ujj^MCEA{{(H?dmW$q6Qb+$cJZqa5E{^55m>
z<>m$ei%)=iXH3*h0E+w|Lq%&hXl#Iaa9}H#qRm(>*57y+Xi^kgEj7Nj!?0NvwdA6`
z1Rd>RNLTwh4dMY0=*B!|R&iE|&?)V&5Wacs@~y4|AT1lrrgFd7akr-ezP)##TRjlQ
z+zzI_ykq+^k_|qU2_1j98rT-ghs6QgOjzzJd8}b392CF+lb4c0E-?3C<x<u)Y%0$L
zn~qg(%2BKifP|+c`8dD|#Sa^k_}<tWu(Sq-!EW6h<2&7guz~=~V<%okUaEwlhozNl
zACT3gP7m9Y(w-Fov;>U&3y@JjM+dr49-I^~RpMx+ZPAn24ra9FR81p^C$uQ7Ly<Pn
zdNDIdm7A#G=g)7zXu6gY41DREZNILg^p|QE$pk<7f8aYH$Xx)EFcwPCq>m*50+>t+
zXiX64x^5K$7F3v-`~>ZB4$B#+9vUbx$_kuQ*3MQLM!cT-V|FfMLNL!^;%hqk-plkg
znRO6ubf+*<d^}>1$eDRo-gsb0&zAO)0gHHdua6Wr0=C3vxh@+ELUPSL?X3rnVL;{N
zyTZ?G-8f!MfR<a`tuUjwfmOEYNtaB}Lmx8`098!$^c;8pLoR{>{dGrX+cMmHWW(=P
zWfUQmlT(vmb_L|akA&h-DIUigSrE=bvPm^AS+n5)?MEJf8Ng=;IJa2Zo%k0O7q2fl
zj&DOusv(S`w&b@^z+M%ykv5(mPItlA^1<9qsW@r~?GHj*2bXo5r|Z#N7nleRY~Q^X
z49m<@r4WySKM91s?)MW~ADFfR%RHg`i6}_Qa<_8RE@5t7Txe>mg(pGmj01*evf=A{
z+4rr@ijrd@*@2Fq@ggT^84BG(0M>l#RB8y-55gORdO2r)0ZbVa3Ia3-K+1;rspOq5
zf7=3)k3$Nn#%7rp3Je|v)vIilAh93BxE;dk0v4X}%6e$88)S72f!AhSk~-iX19xZ|
zzg?Rlfk0O#Y8@SDnF4NtL{s<ETfnlQg5UK4l)g64YecT(dtjc^dJ32y=D>s>4AKB3
zO0YvA&?T6^T>v<pU^Sp4{|*D5Rx}%FxtI#r@?atK(urof&HKXNyzx!i)3dCEBwGMc
z^?U>jGW?Hy+^i~v^8cY7u1og88aL%>4PH`5qV?7nfQ#b@YH|DLyWLkZS#wQ}G=Rp5
z1Q?O&VERjZVq(Ga6qr#8Ia{C^0AX8!t+;f$3uf~~h&ytV2W>nKk;GdYC*%DTgHk3l
zC|59A9Q4_c;G<H$+~pgGI!K)e!Zer(Mpg~lI7o!gLm?bN_P#9@MWTy=r`Cce^Rty^
zmPO;UkA!Kj_54-bb{n+^2*XJC15{A|M$rv_=Z^6%UKVc4fZYuF#G03B_*~V<V`zBc
zju3|X0hzWz+<?a5T~WjCKx7^JUR)?I0G028Mng6@Uw|iL2T*MLm#3QxklrN%-yB6S
zMg^e0Jz)7dts9p60NqFiSj7TorH$)F+g->hKObgR;4@hP2B4NRW&h1bf*21&-Z_ML
z1{t8x#!bZBc8LWA_7N*hx*d$;?wBb+m4J8H#7L$<8icCm=ZGn!W78OjnYj($2~2KV
zMfK^Zz~2>7<}u~PbOFxGh6mHdz}m`P9c4{Z7eI7Ez!whp5wyC2%S#eC*btTr3`8j;
zJ7ogAETJC31Xf5C$^l;no*`k7w+ofoQUV?d^0$1zbJv&*Vu73iLNtWX^#InENz-Yk
z8?+2gi`Q8fB5*r_5Y|B37#cz&U`6`|VF5rGMIg{((oum|3yIkf9VQ?wrrB|i7)J^8
zfwHU*=sk|Keryc`dhh^*by)54%XqgIfa6dQ3IZ^wgSZAm6gZ8$nVy=BcI4n<7DnnS
z6uhE?`tK(Lyjg%eHiE$)ZU6&qe7at2yX3N(>wM*N1`B8s@Z2yE*C%`6yM6&~a4<u?
z2Mm&f@H3#e0dWK&;RQIp0n!6N29mBkgoU929f}>|$h3<nbPH|V7e2^ft-(?Q=t>aI
z1>^$JXg|k=WI<48fCI!`*el)+Ix}aG{2yFGu@KeIrU0au#g=u41`xn^q~PWG2&q8!
zS!&NK;Vh=TIx26AOQHgLq6(eYWXp-3mqu~Xw!}Zr|4>mYzJ@l8!Re<u_V~mtpX->N
zJJv>@%EN>kCPWieYc~~YG6+UN{I_GjbcE2kM#Wbb_yVG+8ZC}8=wYb=08|+<7C01L
zfe=rh4#tt}mg+y1SXfzIo>*I2s(|Y0bDyVe^FDPQ5GkFmK!`4HPWb@Q<gdWZ(obOC
znV>8k)MMxjcop;?+W?*G%|)YO55nn<fPjG1tWj0+{jmOXF4Wae(kI%@Q}Ad?@I`=B
z$FToky;xriSeT!n55OyxZJwE)&Knbau!1iFB!8<;2goF8+-yL!3CTMJz)e{>;EB~b
zK!{?60PtyjNMzK?l^49*%JUo=8k!b;B@iFK0+8p-#2GEKjQkb%U1HP4dgY;Fm75F|
zp$^dHQZq9vRF*CV8&CuK0MJL2r0H-&K21ZSeeEiQ!hMk8x-xjXV%v6=m}J9EB;R}u
z-jOKu0iZssRcXqKQM|qaG!-RWe0-XGUw{Uc0N&2{gDyMa=-gO&`@O~ArKO|g-TFmv
zkQX0Lz&BF@`$c}imelJ^xWenZ2qT^fpvCyXOappchSO-h^%X#ydpu`=P-n_#b$WVB
z*B2<8CeN`#KD8d&WGz;khVcT=7`7QOW2WD?2*NX^i!jUbR@b%bg0}$loDu-(D4w=B
z?N8>l6Ge!}l?RKEUxDSGY@w;kVA#Ig_Rln5bkSarF3xL@4zc4-|K}R=>E{IWDUhI9
zjHYtvk^X^%vM-JEZJrGl=H^O3tMV=bH|i_<!S<nID(wGp_0~aAhWq<4-CdH>C5V)S
zba$;tcXtSaba$6DND30t-3`(pASoaq(%ta8*K^Ky-g*CEc3=kBeeU?w6}{_Ki?x<<
zHum;Hg{KvA#kI9>>+9-DfCo{ut<=@+;Y#bwC4TAtQlkp=$H2<fLhq`+vC+#KvyKhe
zTz^Z|U?n13Rsf!^|8js+8UnOZFt++JJ)IAdllG&2kgu0RJD>pASIufW#8!DqPZU)R
zFhJmd0)30mV89LRB8nB$6N^h%0dD|U7X#HCOMoeW8cl-xGnx4<6AR1Pp)K(G12{k8
z(Di0Lp!({zuf?F4_N$rW@9ypi_cgHK%SjGZ0IFg~olnAGeg+Uj)XZu1kVw#v1zsj4
z<>jM)J`Ik22SrU1t2z+#*qi_M1tc{fY4>DH?s9JaZIJoeR%4u)3I^!^n}Z1-6lS&T
zxmR)h;CrM4;vN9?TY~ODGznKIC;?&tq`T0|Zpi}t2q1r|KkR8h^^$=l5eSQNY`M_p
zwMK;r0DywlEvIo0!r#AtrxyYG*Z9ZRci>0RW-Ry-cwc6>=eq{v(D%vXunZ*JDBcz;
zo{YF`jil@^)<*$Ooa)=RKwDT}<Czg?hm;$%XMlu;K`A4d0NxbvkboRzHZa)XbXex=
zD*X<E&?HcD189AqkYaErR|p29;CinD1Q-zI)48qj@EH`3r(p+MtY(UptZy8_5`Zud
z6zsh!>x;od<UsN4U@BSwj0EWCJ^ejw?JNs^Qybt$K;S3$_?-Y~5DfZp{U!itNxsyO
z&SS5WQN~w90G*e@43fJ<?~slw%}AjDP=guB&#irmfJsfZfIIXVfQJGkVmw+ebwJk!
z8p}Y-JE|Y(Nz2v5FIJl(1Dr_j&KWLN*gp{T`LqHoBf}WDLB&CL9BA=F|2sKIDXzVR
z!RCUpRg{6fQ-W+T`19aI0fU8K08(!PR-{~~J`x}?LB7O)xnL@!Y5l8dTc^XD9}o}0
za)JphKWH5RZ}q+gd!UI3of0zxaemO<mt{^D^xy>gdA|Wf_<Zp@Fge!(bPC`e5ltzQ
z17zJHT4{$tI{z;22Xg>^U1*3g*Cv^-rxjx*G6e?~6p)}@^ryy!8@wanM+g4HtLJ&!
zlLcTQKt7HW2u;H;O1<=f%DSAKa$_r~JA-?TL7EDToW}54KqAp0Jcy{Ft*x!BunUyq
z^VPt2*4Eam&M>e4bI7=FfSC&LDR?H&3{=<K&W?lFVB`Qh@HfR?x4!q^{`_7*4H$vi
zn;3ZR(r=GvXS%KE8(~X!d|+SREdrHxXn#9|=XC5z^(?{hHjE6=TZltB=wOGS;|Y*^
z4S{Jm6u`Qv@Ao@Frs;Cgws}32?E1jbZx2-zEFm;o1-G0i#1jfuhSC<mJb3)C@F5i6
zG9mIL$f8+!-q8x^2vF=V^Q%`tx;zBnfs?!rn;CcApKfxWhM{!M!ejseqF8oYvR|zS
zCXl`-QHD-X$Pxf(SAk%k3=srZqu%>r`g@QoTY?h_mcRfV=uLuLP;?S5eq91x6bri!
z2HbbhJ8pYZfd_Udys{0+MfLFP=3Ja307{ewqaGI!=xcrMF6;Jgo)7HkBKKw+yg-Ir
z!`|%pmE2tOp^41mnEr0+6xb`Tg-*f2E|WnC5%+hodJou5&@ayu8@<DhJBQ~Ve;JEm
zum2AV01HAFYBB_f11SrhNJ7bF09Ta4E!+kk8B-B--vB-Eoyyk34VT=zEM62gttx-$
z;{w+yb`b&8wF;WRG=UhSCO-x8UMMmN+FF1@)=i0#gmba|mmFGSpyY1z??kasz$p}v
z3MvRtHbR>*P8(fK8rJ{a7613L{i}c;EsO~mv4W>*untE3zX2haa(CZ~+hfya1FBw`
z<@vX??YIXW%JYU<eewf|$;t9tRBFLSdyrlh0gD;{c&Y{el&$3t4GfR{;_o8ApXRB~
z07(YJZac>c1$P1F4<AVUgcPDd;xw)xK!A1vpi%!G8OT21Re`5K|1(2H;A#Nsr9Km7
z;(0)@hCU?F2bE?gE(Fq>es_(2o1pgk0uBdQr&`xt2pbs6L22hl?*I3B{+rx@kA+AT
z#ttn=DSfZ0p#WW3OQ`}9ZqRY%nh?bI2WeI+P|q>&xp-$Wn$`_2YQZS1UUsNSEZ~TN
z=M@MAcmX#NF7OGUG!U37zza~R!b3q2><$8bBJi;RM3iox%0Z8S8llzgzyvVBp?S8!
zD5*cVbYuCqiGd<K5I&}a$O9w5BvvJGny&#;7qJT@kibC-8{pc>fv7I1;Y^k;16~OK
zi;DjZj?fn_7)p^&p)f#VDQ-rcw!$I==~cS0&l~VN8z2cW2N!M|I5c`7{3~W~M}Vb=
zeITN0Z*PZ!$h<+y3kYBER7H~V!{@s*LdFsdsc#$X8GUlE8}}|*FWf=kQ)Q9_Er$S6
zm?HH>p-^M?4}aA`tg`Zbx?6Jrc@u1v)4F*6)SpM9g#SL*|9koQpR*Lr1g$#K0fh-9
z{wyF&?i6P_^c-Dz^@IZNfd9lNV{p@fGBg{kXcwvjhzQb3N|?aX%tK^GNtYS8av(y<
zVo)G)^Y26jK46FddktETP>N6UMkEWkc}H!=ILt=~*xHW8p&a_hyUwRfDEj{E*B2Cb
zzz*eK0rtNa65~2_R`BplfDFFHM9vs=XbLdW&^i_1=9F3n8|*<71kjyuCxq`<I{^b4
zdibEef1M8pj{Kl9|5<GswgbmPx$Kn!0WHo`DCq}EjDlKxL#gLrL~wlEi~SFkxJ6(A
zCK%9%cC-d9y1I>2|8wj7@9ls$mw_v->knRn<mvEW$@ege2mmS(fQ%A=yR8fOlwK$N
zz<j}-k}ojC(PvM08I%&sK36LSo}EB6gqX`BdWWd2x%p>nadJMm@d2%m4w24%#<f%6
z>Hh3y?di_%3|zOFP%!1H546Gp7YQR?#N^aeH=tB-wluugHylgn8bn{MM5Oxv9h9*l
z8fZ5Z$hg9RCPt*WcFx^1unmVpAd!HxiO~yEE(S;RXiyM9(d)oh28>Q%fbC$F%Z~Cu
zEQNxzGY1SvHvk<9*a1m^z4S{f2z*lunE9;V^8wTy5GPECk-(_~!xk`+0ezK|z?379
zs~jFWfSk<V`VHqAkn96wf1uZz{p?y07*+s!A(*tl4%YtwPRniHz-=uI#5M-ytZ;K}
zxuaRoCj(YC&@vKQ9epx~%2IrKsasN3R!~x6RnlJ$-499{pfU}zU()&{rLFx_))Lt%
z868l<!t9q2g1RLvu%abtfU<?>r!Qgh9j}Lh7z-|12#&a_np#4G@P}^-pbBP|)19H8
z2LFtYe1KaC8a$hv*+8=vRL0arh2`a}2c9$_K?ap8h{ttxv$|jYbsoUFhsJ^D1z0z#
z@@l|$b?peb4iWUh`G<g@Fu2Y>o$HH<iDewu0}I##VP<-edjaRfq9V{s1K=*n#j77b
zeps)8s=_Of=ovuU^E&7yPSPfTG;-oiKWOFEfNtPn=aVlCxM9+=$gKa}8ljopP5!h0
zcMdY*$aG+U_8zyyZ=go8!zglt3f&k`+&UC8Dr+eR-JX>A$q=TZDtoWlW_{N)p2gD?
z^hQ?urRTflR+mY72W-{$ShK>&EspFyPTZGo|8qNK^v2lj@xFwCZfj~9=-mRXk^qA4
zbA`yiU^@wDr6>rh@-Z*m-#B#W&Fh7sg&Kd{+HDz~CHDlXyi_<tHYDcSpXM#!H<!bK
zKK`mt`t;cyfX3s9V=N%U2f$QovD)hVcb{fF4C<r9$=M3Ex6Ff2)3b8ioEt|ADneGk
z1F#yfbx8Nt`~L4zLe7~Y{vs~V|KjX5PN*??;>{c#J_<sc(vfN<BSclT%5Xxy5O^Ga
zyZhe-D=}D89xbnB4HpkKmr3=4*l;pfq&jDKkwbuoE3D=G3O-4UG%Ta21yJU6^ZE?k
zR{x(Xn%ry}fy%ej@UG(rh?P?-D>2EOjt7l#_?4yE6FX#8-ZiK)oZ4}8t9u3$=la6C
zN3YUza`*1^tec{d#MFduDE~a4fU|<RB0Z5Lr%Zt#ZI@u44>(PAUwS*@k!nYzbaKC=
z>(`?wr@&l=%oc>fOwEB~Hj*li8Uo>oOmNg}9(qS9&Thkc0I-jS8SpW%p~)ZL6>M-2
z;oVnbaT~}nZ;a7RxND;6#^gBvHu`PqH|R}`89em6MJ50uYbW{X(Uf*WpV!L=>$Rbr
z$LQ4frRAgCgOV@?{Yv+?^XG;nmQD$ZGi-u8U3ZDGtH<#G_^k`kqSwTDO960&aOvzC
zFJRHOE+z`s<l#9l?<Cc|9VS#bATZ*wvCTHF`B^8^<Lwqlon-NNd0C#v9h3vYSrhjO
z69%Syr)H!U1NB)n&n;z&zHx-W?6M|u7N!}jN{FN2%~PQ&WF#L+`>rHJ*j?~v{a)T%
zXy8I>z8QM|&1oLDSr3(=cuQS4eJ;!)_heOHHl9BR$gh%ioK1LN?W7ZzbYMyQSeTTY
z)u)P3zn`~%3Q{dRXDGt)AGxhW%Nz2_A`mwkdTcL<eBxEjVxyL98#W&rq`IQypSfMg
zS9ROS5#QDp9(FMs5WuVaf+~u{c>z|IG!@ha2b?tM>rTW@%e2?t9a;GK6FUlbY(!B-
z+M6H^dfU;r2O6g6(Xw=j&-aJ9vBUj7UQuB79wP`lbl{z3#DJs1;FJo9Z94kUh*8@f
zlzEgjpVD)9P~@{^v+u-rhv2X@RD%b%X~SGWnyJ}=_gw&Ao*8t@;@b~bTFCyiMAa!?
zR_G};*&Ck1X)wpbslAi4w#V#FOw2m^HQo9fe*f@!Jf5oL3;F85woK%@IrB&Qc0Kz>
zh%5V2@%PWCc~ITvRdKWY*T`2))hlnq2MAg(U2DJ9x<B!r-UTEFa`eO6lts23SQLA=
zK0uuvOvc;_U1KMdsEOZ_7B&<oigsaR2pZJaT};<BrfMl-vcjw3oRiI}BQZ0R{%pcO
z^rDD9VSP)2#Vt)Yy?QU~qyLgw90zf_Z-?@GOBdauByMRzFy`!x(!0t0n#uHD$Y6{*
z-p<$V__A#J*k0w<D6CuU^bvOan^9Y~p9AY{+i~RcTossj!#u@10)7~bKmXFceUjC~
zZnC8*BABvroxD+IEl}Jm>*M56;<6MNZtSQLVo3;?BMPFfA0w3|jZt=scuohW%M@rV
zC$*qr@9eR7Ok(pf*J_Xr=jVueUO8MoqAAH+-q+uPSc*P3_otVc(DG@<Xkajt=sqv}
zx~vun8DSubfs0jTE$s^y4fW)3ubW)w;`lCKjfSS$AY>#!h5<iDnZEcWdY6=UI`TI)
zqL0Q}9ETbK|2qRjUroZfif>nk&tztHT1v*9ZuedR<`Z)%IsWIbf=!goHPq9nU!Qzh
z#nEyRg=<A}*HMD)BPJ;6978a9zeNO&!c%=#CbvY+5D5*8PyD2rN>!2UR!VTqI_ms7
zd4A65+8ZaTyZEJ1VPzBnZmtw|V9rb3J4EtTb22zs?@e2#vaj!_(Lz33TZTTQ(AZ^2
zMny4^zunezbPU#K5i4$-iti*>gQ$G2v(Xwc{p{gLg}2AT@iNuUHV0m*_~s4AtDkd%
z39<;mRBhj~FYEWHEq2t4@l&=42nkewk5NaK7~`Q`GaaIuCvz=|Prf7W)6@&DWBu~9
zLmb!QDWE-9mDS-yDbCk3e`-W8Ge<6`#(w8pUB)<aIp-kWK1BeFi2%<~pMvOdB=V+(
zhWqG{de+_P?~!U3C;jFC5k!Z4?XBxR)#6gRON@mnj+!u<{NOghrzRgYW(s&vxMARK
z*;EXQ=xnl(G%6-T06}D{?HkAeEA9b$1QcO%Yb2p!Af5cxL<#k=Lg`N9Fa-eyL}<`I
zN+fD*um=-3;(OmG_KxhJJA-bc_{8gOchDXNy-8VSP<u{z8s#PDgZiSQqk}FcL6l0=
z+IqKY=e1!n1e9SD=&QO0Io?!`OFSTZ1$8(`kD%kc7*I#hR1*sU35feb7nJH(dv3C@
z+Yy#rTYf&-rsG1-tS#8SX|Zl^m4TW8xE4lV1<ddOY#l5U8=LZ5yDIxk&?<vdg+LPB
zgh2MQx|&m`+87z-an&I*dAgVTg(4VE%dj<Kv$FM*+*#aM#y3eFmG)R+4UeswyR->4
z=fMj_;WPEYH*_kV);m&<QP>|p^;M+8!5OF3ZPjLp>?+$t#*GS$1u221!_jIx3ib5g
zGDZUPlcgn(!x$nB#~3^;Y&TmWL8h)cqf5G^@8jiL*DBGpe;8k`HP{Orh~VXwm4zV=
zJ_Pn`!Ufx<Y3^E1l_oVmwn#-&8XnZwN>Z0_8KeYMecb8QczpDP$1HQ_`+^Ry^US}q
zExuW1-eq<8VpOnZA0}1a0yG3b1_c%b__vvxBePK+eoR@=Oo5Qx`S8qjUh=xJ-myt<
z<S4=VGG$hUXWIIW3LwguFS{Dx{~V})JK!{)OzL0fOq&A0I%2xfEt@GgfxsacT3u~p
z&v1hkltB@r!^^9vz@$b1V@?msp`!83pCGT()=r=I1YiMA7c-Eg0({WXO|Q>4;HX%~
zUkBEEb#=8oV+k~FtmT=5(nUg(z<>cbW56)IdNsInMt^|~Y$CyCg86fBP}JH=<`29a
zF?y-+B0k!`Aja6vc66tp2{zhasJ1pIL<<2s5rrD~H;#FH-k(b~*M^R&e#n4P4yepz
znMt@T6qN(M+^v2xSJTkoG3zI2q@aK9jhHgL24B|P-5F~`R{^PZii<0|jNUx`ncE{j
z<-2-w4qx}Qr8#70ESHZ{@vYcrn?|`~UGo~Q89SlXi>9sK<e5rmOZLmw78!Ld5giW9
zVKtfLs^i}$S?rCUWDO~oGIdY11C6?RxT7CW-pbz#xIylRlI%(<K3^B&Gv&TXw=zuo
z`t_@^nhmMdqtlLyyP*8CpQ{@Kr94S!f^!=w#+Nk@q<!ieS9w^x4(HWQmkaHA4);^N
zBgkG3&eO-T1bP-X>K<yvbS7E<s`%s-LYJF#8FougL}6PLb0Tz!JEU;8Ae=rsE33Z+
zEwg5JrR!F3C|0&{lZ*PI<_EaMp%KY&67qIM;r-L~BSi$V%+>Z%e0cPvQO5ES{NWdl
z9iWTnNRep!NQ?*ISV$AOAKsL5dA0pzdf1)dyZAey2GK{%Gq~RJQB9~}i~Q-T9&0Qi
z2b=lSIbm5>lz_jZdK2@?rDrSkEoTFs&Q+HWcF;s>8+!xcoqUv)$e|U(OFr0{qo9|R
zJwDGwQRQq&5ckW|qdi6@N;BP`iyuBsm9&Z=gT`|2(Us+LuBAA02xtJ*)q%PnEUC56
zm_C^-Gnat~PT}DM{$L0&Zd9C6%s5(kjV*fD;bWw|uYW;(l<iC!JH#t6@b0N6tH6Sz
zY{KoZ*1km{(wf+CjB(E1ja3K_UlpesgJ(QL{)bxau_hnflj0srDutK(X{ncTkVO|v
z<gd0}OMna%*({NB;e0I4D^2=hU}JPJ*M7Z^g4&DO0Ff;0uZ!4X6)eipP8$6ELV3=?
zVe^KU)Wd7c;7yCPl_~F{>aQ!ZEbiLCGnER<x!L!NydCl~y_vFHXC$9Qrx@j}BHnS_
zTt`*0Qhw$<lmOej+CHgKcp>j!;pk$7i_#lLfR<}*l8%(16%Fzl7izj-*`;4}Y);F2
zkP%;Z1!rD<<eo)Z{6m+TEtcFh#%&Rfv4ozn&hS@%YxO5CVu4l+1XKZ2sDq{swvq^(
zlvO1S4!?0fXu&@Dqf=dZz^ABf2>c!~<!O{ZJWABVMHLfs=Ac&;Rg?#;L^NEDa1K*;
zq`Ze6(>F#`OfUo;v|r;(Gt&fdWYJ3U;ba9m30;`EZR6ovZiqdW8$M}Ku7_`g@RDBl
z7aZ8>dm$WnMRMHrg}+ShEjHO(#L95zIlZYwdFE{+uG4XM7uLnDRGh6p*7sgSzB`R3
zsU{H)qo7lmvtDI}`{PLk1rTIFi`Ba0nbDmy3h~NPQqdrgm-j>)tZDbadit`9;Lv1)
z^_DR<n*GCP4wI&#Sxw}!cpQ{l=1PS0_kRk@IqySWN2BY+OZf_GWL%Fux^rXDmXWPI
zs3i|^8<?|f>Y3{DoM!WeB|2<4p5}-kHRUK&Q|UgQW`p~v?>$AyA>K~K?2(>rLboul
zt~Om|3ewjJ#l2+32~tI!UNz*>U0HhOcwgltDFUhp*%B_rKy556Z>bOj#=udUI06n7
z&2bua?y9a=c-0<{7xsc@(p-4m*Y7Fzh49g&Tx?%q3%5o%^;wA9bNGGqB%86hyPic&
zDkXlKMF)fX2{DCGOG+y3QwYJ&yGK=GtH+@^70!d#p#@5sKM1BE3n3!wg!YKIsOBN&
zTBtq!vE%G<!kr1SmSl$fV}&+~xIKz*66j`;$w<9CuA4FSaATZ%mNN<^zTe{*4TSiK
z`&Y2P3^3M-)sWSLF-bF4{2oS)$;h+43RdW_sE)6zFyi&~gRqHOW``33GeY)C`~J$j
zl5<|a%pR%r;?IHLMU&V}Ua}d^_b>1;ZZRPPtLqIg$q)$IH3LPqH#MF_a$L8e-mIe-
z1LACSNFu>YFI{Sds1roi$cGuV`T(<{)KwIdZ8L8vsR#Qp_RlqN-pqsPXwf}X#S`<n
z!RFmspV@DOVG2GXO`me<MeX$oC%w7CR{vJ&^0D|a)qc!{N^VSZL+{gdth8Qu<~bIG
zrm*YuhAF4o-^$(HO{&oegQEejwvJ$#7EhNNEl3MdtYnBvOO*;Ef)K-~@1+XT3(Q{j
zl8P55VLwBt^Eg!pnx;>*35gqy{%ou=&av0lr0NpsBop1>NS}V3SuN7)aHhO^XwH0P
z$P;C)eM!03*l?CPI5)jYlE-HRAte`%>X4u)A}HgjueEgye`W9{ti=d7AYVQ>p*b1>
zO%k?47FU=S>4o?#gF)P|19~rRrsnF;ar~#{|Fnl(K0AHl=h?DB7g#cByH$h#=68&H
zW;tOo9!;W1ERr+in^75yWO&uvZ{?Y~J`yEq{F{&aZF&|ewrGckOGRxcyv+K*&4jG?
zvt4hbJ$_A^L>hOqD2{l)Lo6;tQbbC7yRByWmu<Jp<O^yT!qS)Y_Eu}E{(sPq=cnsu
z-x0DGt<sQ=9lFeO$6Oa<<Gk<EX;2&v1!u4QBW<l_=Rn_8t~hZAD~G`}v$gf-_j~VG
zxma_2>!V-!HN@w`wa<=)BpVgB3~CrDY=|>%ou6T(@Onz`10p!BBB|1u9Ruf<W8IEN
z6~^FQVR7D{xG*EC2-FZNuQptGb}6jhH?W+Ivp-F@T7=rFT#-Ln8%0;4&rC(bz`-WT
za*yO1u8vrr4c3049as&_Y1&)x<=sZh8W|?^I$)O|@lW4Rd8xD{Fs`7GxlnJ4Z73wt
z)f9P+4XNvaWza~MCE5N-A2a)dK+TX#*1d%0QG~_-o94N$xcp9QVXf7?qmZ4$!m!X)
zXOC2FR@cmg4D%~Y&dQeHT4j;vGwchqRY7Zlr*F_zzU~CQZe3`9)pxI*LeAxm#DF{}
zwH|Q1Xe4^N3X@f%;1%mcL}txawf|8NJH!~x4sxkPW6%;Ik+|79B=_pXKp-lLUhF;D
z$)g6c8HdZ~?TRTuN~~mFOW%<iMvc#Us6{yA8|o-HJVV!y-R!rFt&d@W#ux-}GzZ2b
zRo$2&^zS_HB4>W!uQ$^#F(tO^xk$t8ZgKD*{V+9x#6&#Ezw*;=OChVwN@;ld^Ecvn
z^gHJMUZkq(uld!t5FF6}<H?q9ah)BjL+{>tp=$lyOhL5^##<mJD5G$7m9n<hoSjoq
zu%Qu0g9ff5a{>0k|3w}(Vwyk-tZElcd=bBg*N<gQX|#;Wqpyh5w&SH>i12h1^u6>^
z^9$TxW(G&NAicsrBYP5P+<9`v5bWpoZ^LflqCWo3{?OgL9#0g0E@eMwXJbArV;*~4
zfWUeCTwr0L>VpTfu4T&EgnMTC>s?uzgne;61sz#q<AQjPCZgjzj8&cHq`)~MhVlnw
z;R2Y~vi4Gjb#WwRPXi)fCM*@+BD)l0u~m)^V)hD2$fClUC}c^5ks|{Wb&GSV;4T6@
z_y}tCCov_YLYTJ$6NHd?suP!0tsI~GA6M>pvEpKrN92kWG`1PIVu6OF=OGepCt@u0
zQjbjo@BboD#s&Q1y2D<_c9GPB5g*Y@*5Pgl95k^0jn0TzLa{d(m6nyaf{MSboYHo!
z#+%vj0mA?$&}o$R9jou3zXQeb%7Ul@l~nN?Cd$lGnhOz)Cvzhd(cg}7W7PBQy8LDN
zMna3(lz0D9lUH+8ELVq1bD<5wEsYe?C0Mf_XBrii?<vYGFv86ID3M>Z4R@7dfz>Lm
z^_P2t6KDIOP*|X&Sk4q7YNZ;_dBmC<o#zsnIy*udEr5RGNBrhH+DfWE!p9f8iW*E`
zI^IU0iz1aprgE>Q{mc~mp7Y$`sU+se=S_IssPFe=j3Xn&U&#fSzSm}60}nD1E{uDP
z?hs-O8hztf1`<?6vEfK13h$coOl?AjDJ_d5WVi@p1p+t;e~4QC`ngakr0RX|^Tieu
z0;Hh-v_6pusGTe{B8=12pAj09BAw#s|FJpk)t`|K*Y!7R`L5O(O+mARh&O2c?lyho
z*HTS@X#Y$3&`;v;JC@@VLWRz#K@S{+8_x}Aa#m>>+5{LlYqC1`Fl2Sbl;Qw(%tE+?
zFcwZ~1pPdeZmsCRaSf&t(sNt%-u#b_TnVz#9T@1wN}PwnTtDFGn0Zc_1h@gVViyGg
zLX1M6;EWz`C%c}7YeDuYgvQZbjA8|MkRsCBod0C@e6uBLw}^`%uWmAon%@3yQS`!u
zn4!qroVc<-z}QHK2mlW9^4eoW1#^03Tcd<<Q1E4Fm%4P1%xnVXyAQ`v`lVolt=}*f
zYcGAJ!l`1uETyhQmh>yuvv&=DvZu@XT}<AD9x%Z!hJpr1)ZIk3+}Tl;{54X%f#)!<
zsVnu@28t+kQGhXDwOCDa5_!zIu0I_?AkIQ*P<WTNQ{jt-lBR}ENNlEh!uwy$oM~mv
zM37PqOZg(umH9@P(w}i*MB~IgU6gF+xT;`in6@EKX>Q(69A?~!W$!4C2!H;;{Yux*
z>|Hsoq&{^4BTU5n7n=4J(bEfW+>?_5{v7AIPVG~In8muQfs)cHa<2NUPYHn>^gL6n
zcX3X9(tLOYpC==aBF??jV?|NL4BaWGN_vu!UVM)=_#EG-pCjj<73+Dp07Z*fTX)AN
z&aJc)PfL;(6M^Xq=@_g`#_Gmc4XcmCIY$o;+%CC-SsPnhsh8`kK)rxKv9HDXrmLI_
zJO^lR{Si2ZCY`RhORdSLmrzouh~FDJ)ftI)u)$+kQ*Bm$>!Z^1AojPGnc<!Api1sK
z4J_W9a-#}%Di=*0?cbD2E&Ny>1t}ou=X0>fpeE;~rD-%T!hPm%*4lp|n)p<USxEfH
z>CZ1C9pB)1N!oVjiobSS?3^BiC$Ji55y?o!#=xUqc8^V}i_&O2nHeE6N|l355q;+L
zWc_R0!s1GI(^s4AvCOd7;c2#E7)TFn)*ByPR8N~*xMZ3;$fU4f!(j@E&u^^oro$<{
z85EOOOH1FY)a`0wBF7IkNd>W!YkuZf+TOwL97`Y60M-}6Gm)Kf0m<XtPP$75tl#_C
zF~G0`i=+%jCPDba*jBXM@Dv*&v4;F4$8G)f0ZOP7L1E%F9!*d}oRgllc@5jRYtw$L
zyF<;3tu^F`l8QeEy9IT8Lu&?}xuNy~{1zIo(I@yLjHnd4>Lt?Z{XWJEfA3*s>5i1%
z>{-G{ihA#sp`4rQ%xhiw<DK5Iaay{-Db8P2x>#O*1S;iKglSjo1($O<*y$*#WC>WY
z4BGvllM$wLm`PTA+#}m^>sll0<>rOd2|veGga3+1Bru4}=T8v}xF7U!D}s5C3;?%?
zi@LjWOcQU8HY1;X04@A&(}+44nchK5?SlIfT`oP8ED0nQ84Mh1D(`S|R#Fs2=9%);
znwFvA^iQ=gM0`L}{e3{n?E|&K#_i8QwR#=&c=z=dbUU1JB#~Uoe(Y5xV`FTwQ(FHt
zVm_4of$sQNUbKXKEk-=-=(_w2X|BJ{bL4nj!g*=dg#W_=IBX|UGB$UOB+ePlF~klO
zMUb8Zh}Ss(FlkwG=iFR~I=(@#-5&bU_3p2c?cx<>A-eUzjvy*zYG!dH?CF7TAbEIp
zZk~(Lm+A#(gmqfjXux$myVION>2{lT2w7-tC+GXwT&lyy58*6u)Ej|VEIFZjZ9iak
zT^?UbX23|I;kw`LYJH%}GMaj4iiV2h%|s_Hh*QVD?0*^v^vr03^P8H2H)922|M(6>
z2x@8hr+16fA~D0^kn>8HRz@ekACV2%wK1bBN)>}pZ&;hx!3?XeX^*d9GlWuGFLXq$
z37&IWk{eFkjtKyJ)uLLKzV()&NN(PlsIEkO)V`NFoVkzaI2=8FMMstK^DfRIv@miH
zr6cCBpAZ9TzrEKt!u(^c$<IKWu2)u6m^net6q7*UkCIGL;BnTvZ*}djl6B9oXxP>I
ziNS(AzylJ|v{6KmRva)w3NLB=;wl5ShmQQP!1@6x8A2Qyuy({q6vH>?CT@cchB*cN
zpFgTGZ|Y!(Q@z}C(%-a7bmGy2$YL<p-d(2rJSUZ=jQx(0zy?DS_^IP;`~j;f)U)FS
zM7VSw%Y#n&R|-5CK9`@K<%Eaz80z4%R>nyldlHhNh2Ffg#4Z_J`19xWeyZ^&XW8n{
zv_`_c)W<dETf$#wsD)U<;%UAlmW44f6A0_M@uezSD7-6@BxW$7!~PReptN>8^}XEl
zj-#0J&brXJ>M&?W>uI$%_E?nJyBklQg@N7tkU#m|tnm%H8uH9aywi@vARGc>C^LtW
zoFSk1{777QNN+L$fvCNZJRdTDu`zj#GtH?OaUrGjHi7?8Q^*4h5tX=k>iJeC2ktZ1
zCQ^~g$zFjHl@HBy<djcMi>sXLjscL^uF2v20G#o^L)>GyI70^_=|P_dzta!DCXSt?
z8AH_Qoa~W(G0!)RTTGX#xC}@4RtI?5MPxu|TacffhtRe)*dWjB!S$K&d@}Ffd`*$#
zv&4;wK?ji^ZK7gd>MIaLXSogfob+p&b+51p>Bx7A!uGTVhMLWr<NX>V48Q0TA=Ph`
z7V&3+X*Jf?K(2FQxj!rEo1}b@J=v<+4>G@=277?zzCmO9mfjP*%lKAJfH1f7_*YXC
zfip*~69QFik=gc%ZvMuJ>pO3@cogcBJD=cQT+3B%C6h`jezvqZC?mRnKb{E>$vnV1
z!wPk15MHKRvM=Rrjh27k6WPN{@&3)eqBrFb{;MxP<DCbMdS-a0h^R=MUy9bCimLbc
zAj50IBz~cjAdT&OC)51uNm0Hl8P5ylF+K#&G(KQPWL%acjygkUc6titinR?Z3ae;}
zyaOc|A11qLshd2g4t9Y?qSJi_5{qct@nC?mYt?4SM<plp<T^9@wQ0{co&Ih$lDflN
zR!<Bd&_789Ach*#U`DMTy1ChVgthEi;;=rKxa!dNZu*Vg_B1`gzaFT|Z_{mwN|aJy
zmvSEsd&*(f#^o9S!l8qZX-1%M*Rx~XYHww4^cjCG1PS>g(Fdn~O!=(ea4fo_yfQ)y
z3Cm>7`qM@Wcl>_zD5A?BqS-e_>?;vvzi!usf0M|1bBRC|dKdUWL?b?QEr720orWl8
zU&R9Z4Vp~Csw}gayJOOJ!w7elip`5q(`MU`J^p6s@pbI;aE#fCVH+<#2_F}>biL>D
zQ&p1%b0i#KP;KcWIP_~JjPya5m@t*gEK_SEGN_V$6y4$5?H=!299JiXB(VXQ1n2Is
zA!F3^;ON21;R|vZFLcNEBWap4I2m#<8{_8iV<I=BDspFZsAm;8Ox9j-TFgqp+$dLW
zwYGc6vx;wW@@q)N^}VjGh9%nWHYU>Cx99n8XKk5ZR48i_5g$I%ZKO-}nMTP)YsC^%
zHITvg;i{r`aVyazl(-+xkx@nj@43AvuJI5BkXX>+f0Y)o=68KFKxPk|k39n#S--y#
ze2BK;M#jh0eCdQp)k}Brj9-t%S{nI5Y?AAh>u;R;Wf?elPOVzmWa9hP>2SyBP+AmE
z23Xk!kWhxv>BHpvJA9_!TJ^jQ!Pj+iEBtW&OcG%MD;aj{E`f0#jt*PVee)Hu`lJSI
znmis^kBJTwd+0{$foUWede?`5BBznb`WJDz-;IiArOuDvY0X+;$=xDS(Fh}3q^c{V
z3!$_K_>l&sONO{E^B38CLk}?5`HX-@Mom|3MQjavoTk+!sWQ0kDtXB<6j3HxzN&5`
zLU7hv<3es2GgYc+CT<AH;<7XRFH)$2apS%jO;1e?w9N>sj0LOs+t17|b&KR)3p)z>
zAcyYmq|FG&abrw8YawwjGaaOnoxyf{52TbWl}ducQ`5=r7fIBWs3V%gd_z&IW$$m-
zU&1)oSL3g6M3&!jIG15rZ_hOm!AY{}(s=xmN7W}hk&J=iEl7WpD2X;mpEZNEDIV9P
zpc>E@e?m+(Eh@j`I?b(AHIpSB;|X~wx_r@|Aa~MVwQ)j$cbiyC9hv&om3*^!|MfIh
zw3MEgM6t~ldKFi+&rdxgiu19+zVOUtG;1oXvx~1~e2n3Pu2ijvC^Xba;Urf%>)Pif
zzOa&i^X^nlRQMqRGBAk~5lt-TqvnX~AO|@szzfcQ(6I}(rVj3EFjXG?Y}JRGh_*q5
zG2#_Uk_(?gAti5kpCb$L{IsjJHkgl#F@yXIRWrnOTN;Krrb%>wZnin9fXMq3t%WyU
z5f$Fy@9h4o>hl}J>G`FxpQ&rHF)@tgoQeqJ_qN0l6AQT>5Gr%MW=?E$oIy|db&X=2
zz7T;}n0`g4gdLR3im8_{yL)?^t6oQ20A>aEOLy&-2+C)M&NH~d3@lud2C&zR|KV)_
zs3x%cGf}Y_lx_kQ2m-tbGJg>uf$9ygljQ4q*4$z2;(Wd)o5fwd^s3Q6dD?w&F(k-W
zg#)mS5n_o1XuS4=B&5Q($iq=aF^=;4R9WMs?*00GLBYQ^L$?=6fdq$hMKB^$^Fs<@
zoDWgB9)=+{XceV=4MU|3WyuYO<2r7M8ZXpP5H&b^Z7dz>*$RqoLr?DGa}{@{>!CZ=
zZg!;|RaD$7B93G1m3g2B%0P*bXeD^uVqsJmJqR~69WZ*Gt|B&a^d^+*8bDi+k*>O?
zqL(zWeoyswHB;X$(lw%2s23)W=Sggsh@#N==$HFmkzlZh%H4b<<$YP;<A{$KVy<}8
z$FClnxsHe!cmARF!^$4Fy=SWG)$;*$rF`EJxUYXL1dw_B?FScH98hD(R*S!s#q=ar
zlXIM@+c0{=S>IAs78;|oE1_Fwtgc6Fq5r}rz=@Ko4h+$|?q56Rof%wuwJi2fhZvpf
z%37_3P((1MyK4G9Llfo0K~bSAIle`X{D%j%@w^|tmCY1Eu$B+Bm@)-jH5eGZ{5Z8v
z=Y%-UFaK0^vC|E#AHsd7bHRv@j^f9?TCoZsW<m-V$iu8&E&O?u)c<2H_0YIPQMU65
z6@p?WIMi&MVbCtu`^?>M+dy+Qk3!y9o55{=vIYkCqvi0htZFH3G_XFVO=6#LE0^QM
zG{BqA&qG-!Pqd6=#+AdRG3R_i-P;)!qgPG2d05qrn}~Y%`6Cf2Gx>VjjEf?5%>;|H
zEjBFEJois7!*Kq<?&z4Hre8vS3Z$><R7^19v+TwC2Qy%UCldu3#(j;$Aj)3hQsQ=c
zH!3XxZd}H<5!~haJvsVh%&adgH_lZc0?rQ0Ka!}34ovSt8y6Z5F<V-%AP^mimE_IR
z+!`w3N2Xwi-C9%hfYQ<A)SRT6HRi?J*e{pi9$Fp#ru@`*71QLla_|CZp}T{p&ps!|
zh-mdOM?F%^oYwwMZ%zNOOs|;bMgyKcHJw0Xh0)$3ecchDQkn~WNQ(Pn(Zk3-`rvfM
z))mW7Bn08hAVuOdeLxXynxn4qBRIV&Z$hu|g$*}-y*XM?aO*9~+z;^Gu>aJ}fxCPz
z9V|qCYIe2{O6pTlQCU1Hm}2s~oylzifMwto0A~b%@lez;5QI*XrMp6aGVcKAXdECs
zZvci7SV<BOG?Hbf6%PYN)jd=pF!q(<lgXGGF>XVmv>>QN438fd|H!9x`AF2^HJ>E-
za7AC&=#Hh3k+I(Y4wB$ZEmIsvU?LmILK^^6n6NHH?A>K;NttLb!&%o><cmdyM~Kzy
zG{t;8_N}Xbr>jXPjUQ3ik7W7eLsq>Zt)BC9Jx`7MH6EN0>&^AQQciPlRLSw%Mj}BY
zt;blNNVMB{@%0lJ_r|J+4Z?>g$9gbLlt+TyLR5s#3JJjz>pzA8;RrclFnkY<Px8~m
zuT8|Ly82jxM61HM7)rM7Es-bxXOs^-W0*(ye;2<CJYAzY-XA<)ba(8HXAu8c?sY`+
z=(hHH-}<L)Ra4}}k;vd#+;Y!cy#hN1{!(nU`GKwAm0#ybYy}G{(YwheYmx~OZ>~&X
zy=Vx}bbXUsiut>*KWy5}?8vC;bNP1VJ>T3d{HTKKJ*SS}xB3|O?D-9E2Ctug=jW^s
zEV-LSPt7R<F$$YU3|FSUc+S*lIRaR!#M52e6iXENylfdTyIMbe$yr6)_!&=<zOE$R
zM?HpS?a^{l=4_>xUAy2tFw%S1m_txrbwnkH(xwG{JT^^Gy&9*^4R3^>aoPwWw~4e*
zJb#@z$^?`+|MVRZnk}$+Y^KZ0zwjMNe_b&vLm535sLB!immcqMj9gciuFsDHV-Rhh
zeL!)6ep_$J%O%|1QXKHlX%mVn!|*m4g@?Dc;J#n*zy76_6hEKE=VeKf%V;{q6qC7X
zeDbv1voY8q(<}202lYekKBdU|))(KdLyEnGp6Z5$)2Z)*SCjJ#)gKTMLsIP4uCY`e
zE=_nUJyDqvLhBY9*6kl}!}hGS?%p8wzuUL_7-#A0_j*s{Bb)FQ-Ff53WemrSEBQ6l
zPlspE)I=^M(IW5n7aQ<;&y9(RzybQ*jNCc$>Gd}?RftmW-&BvoZZa#av%kKF>xEqK
zXgkXo%s8B9?Q)B8)8R+_h{eX9e!8E2IzFRC5FeSchlTqH$po}KD4~%~=@H-~0sX#n
z9q^<${sDXej6BJgEUI6t(^u$K+c6wee;Ib6)&14Cwo^1s{SPTS+sr8d4gr-SJp;_M
zk=o!iK4%)J;4kzGuw7uOfc7l_n*%n@_Go(V;~_0n5fkdbb$@-b?DzB#^j<rU|2|yO
zr0w)CQ3LOxba)eCK0Du28;QE+vpCmFEL=&wv95HxU}LN(L?0RVSGGJy7A7%qTKmVB
zl%7^xb;KkEDWgX<otU?ii+?Cwmp3BBoSF4Jg`+F{MTBA9AFyn`blK1MYJchOWDE52
zL(MZlxkym2n@T(LG5m<Ix=!WNblw;w-q150QZhyUK6=zHqj+cKsXhK0F2uFLVJo<-
zV`LT>e(s}~4k}sxL>Do?iv7;~O*m0M1_=2-J-lILyZ75~ch91D_we*2$mMdklCg5W
zDu-Q&BOabd=ZX+PL-pd5iuz*e<%OwO$0_rv8tl~CAok~DSls-;$b9Y?6uMwMF?m_W
zcDsf~U5j!4P7#Z3z|OjIaO2G!={Rlrxgr1vm0A2-KC4?79XY#auNVzD{i~Bi2EJPQ
z_GS8c`5SL47dIQf`X+LPj}*rr5~qKUnSX+&llk>#tUdSlK}x6DuOX7;OERoFB}2G`
zE&UaPO=kA+zL9Wg0RPpX_7BaVXuIt3Nyg3^9b|hXI!Sy~6!2iie(dCLd%8U;As0-r
zYFiUyE^&E_@889~gqN=yco}J`!~pl(DwIzvyE|{cEzC#g3`3tO&OtgzmF*&7(%@GZ
z>Vo|zO3Y&O=e#YcJqX_IH=I~C<r0%Sfiz-$KgTLpP&cZX@&HV4uu8?<GF@y!mO(Yt
z?QYN+O+-X73?&C{Xbn$B?9R2m?8B~L!dYrpO%Fm(KgM$pycy*i0u6S>tzQxM55tbN
zUf)TPFtWuDI$oCFp6RP&WRIk$`lf2B(PP#NeB8x0UiIC@MT$v_g<yRXdV1p62tMPx
zDK}3(CsS51CHyAn^UUx5Mw9sUE;miWbk>{S4c~^m8H2^>bmZmw>F~xbtmv)3u8i4E
zJOtN>CGl@%?d|d!VPMv>{rc8<1ODO_erQ~#3=_l^XU~k+rNvCmh<cd-E3jc`_Of>m
zM=eeuHYmBh*YFRo9uvXes$(cT8Y(o2U<6pY6d0twtMMNNb8zYIrHWLwbU4%h3EBhI
zZzy%^8#si5*S~_Y0J{dtr-7=}mm79I_Xk{VKs|vn#b#M1Jh{8-lMzvn^c|v2$Kdm@
z+oM`q5TYhLBVTzxl5}{Ev3NgqYPekN<ZM;88cH$iJCR@Hu}jY7zh`GxN-9Z}tWZ(=
zz+G^(b)J`M)Ei9A?hEn(?57JDRXDGPo{5}5C0*lv;-h6aET_lnO$whx1<KpWFkx~E
z)e+;_y4v)Q8#e^0*u{142?~_8WNj5#N+oHn#>SN#Cmp+F8Ab);P9IHC5DGBJg=6q7
z^n<Atr-vCb{$AGDKgbu4HU~!rY@&wdCtp#H1x9s-m?Oz?&%3ocgc_@<a|F1y5EhYV
z>|NEYQ~H_pMC_=|`O$^4GnEl{XSq0-?+j>KeO+7Hvn`n;{a#ZgsPkDg=MP0MdsVCn
ze7HuW*$2#shr1P(53h#@q~ls23*dWs4YnjH%wxR>v~E5J_!IIuU{HlPdn;JMu_qMb
zk1@T<Rj{S$-+NjZMc!+ywlu`74p-<s+F^R{?A;Lcp@d+3kgP7h#FcnPr9EqU1h+5h
z&ZS50*)Jxm@|6f<(*1?&7hVnRz1BNx^4d>o2;#W6*x5aO_!EO@!LkjrGpg;``nm>Y
z_viHIly83OGRkm%ScOO{Yl%u+#Ig~WE#2nNp|KD%khAnwC&>;LJ3MK)Ie4a2znv&F
z=^_rs0o!$;FNw`)uz0?t;+DP4kkSA}Prm_&!o~lZkM6ZDxS`*@dj}*<tL7O_^rYI7
z6M_2ibANzt0X8azCqNH65vW{H%=!RkB9!+9dkv_jJ^rxpFi^RIi4wH}D5DFA(D6f^
zQ2?lYf2~u*MW2a)hBO9ZXc#w_0oW(Nk_({n>=xLU`*@&&0zj<~z^8!t41QGq8$n&b
zqy#KH;!rWbGwAQ{KZpeeO!!z~fF2$iij+7^2iOS7$;ni~pY!qpfH_g!97QEEZ>Ky^
zMh5Ih;7J)76%`@9M4WSnLFog-#1;!!)=JwVkM^(Qggjw)SV<-tJI^dNI<=SPWn$Tb
zjZM<@X-}?_DlYP{efUBX0>vKk>QLMcd*~&xV2X~nMb`JZ;+!6CVXlqWvWY)KH|_a)
z0Rm=#(U6i;C0!2zLa*~xJjAM^wBk12H;4f@BDHn=Se{xj%KD4m_$Z=gf+BZ~``{6d
z=vF*_d~9Z!gnZ_3tw?uW(yLxKG}UoexFT|$zYRDjY+~|Z4_<Tlub3^*1pJJ5w&y1y
zG#E((8Tv!6EEP(x!z5L#ues^0QW-_&B^KEAY-Bvn588vR6HC5s)W`WH^tEW>1aq(q
zbpP1ly83Qt)|ECT(-Il>94>i|4#Goy@Ym*a1V+3m;tZ{;hE=hbX$UeMW(w0Is!Lr%
zJN}nU=eqGugWW~lU$!pzqaa#gJKY~rzDv9{cf#Uh-St12t+5PEa-A>BjTUsD5$2VY
zb*3nj8f)z@)`a@P%iz8u@t<4NhtDQ>yW<l{f?i3NZMA*yE)G8uqp;3|*s77d!g%zz
zRCKO0Pi!_rw$0)D_Fx(FbemH5y<)<c`qt`lcHPxG7O#d%F&0cX9kM+fQJ)Kq(Aad$
zFoE0zw=Y_?`Wr3@{M(!H#G+ugd<5bF5&b4W+~6+$V+1cZca`_edtsnfep(&p6Ag9z
zffC0@GhUa?Ct{=~C)eG1?M@c~)v!<w(}90dkK37!r@;3E#t8^rxD3;$Isr>bC!ovl
zR$qUtD*zrUx(jU0pwhj7=mT{P1FZ6Ks7OORwM3eLI~&wbkya)aVY$KU@~5SCy$vDM
z)r+2<el(3E2%W9TA5gvEkAXx6&{4b(&XWiOT+fd{_6YD*HUV$+H^_8kP`dvaA9)=K
z)y+Yb$)LhT9Hx5nlTfPT-O?J=2Vt&K|2yErqzQTP081sY(A6?*ADD&OFA=vbNXlvI
zVti)R+ZEY$4aE`Cw{|x&q@nC9bZ;k16>-_&?J8GA%j~34;EM=PL^%kj9+UgO-{ybB
z;=e`c5I)DN%RHLmrna9k2t<dKaB}gry9!Jtr2Kj4PG?zf*0Y`Kc+kU5j)eX4=$HRA
zxTjfU>ok%SXUzy|!%=!r-a;TAg=KSL1$fQ4<NO3<tJD<_PmALToH52oixVq4UZ{D5
zItp2JnQbFV<g0F!^!A#=v?+u1Yp<;;<m!2*muJPxOW>$6@Wx&fTZF5YCp#4)RM1oX
z5h+g&pe@FaCGzSk7Lw_>HBU|BJqTfQ-I{_Eqrwx_P!vUYDXEQX%`8GX{(doLvUJgK
z0su6+)7H)R7cyxGj+3fB;QA^@ZTcFzWr^e(x1iR7!H6Q#1xp?ik_DA$t03;+4bGJI
z6Q!VMnAE0~^kr6Qii*V6wyfNj3{(V@N16<N6^~G)Qu5>^X=0>M<9b_WX8H{EXW0lf
zi`uq{6`0)Wexh`@sERW***37Sd9tYMRvyLU30fk`!goj}QiN7M=cLZq5wJiW2e^}V
z19PCjxeP!dzq}O!B*{LYAvX^7J%w_Lff3ic9N$}OAm<~91k6x2-vb^DIuYxytyIHp
zC?^yMaEUXZnOz^RL!F`kfEG&8tMz;G-8kVJHcNEg98gnJgYAO;O`uAY&U^;Eo`9$g
zRB{5SUO@?WfC`OF#$)#jC>`Vh9z9UO4*^{I=Rkk~aHo*Ep!&{V-<36ixsMjOvH>f%
z5a`<g${v(ytE#&fdTZcyP~QgpuAo#<C_i#>Q3q<H1*H#{ed`zjk|m+ZiBMJ^G(sE)
zQ9{kqp*(K3)_qQ>>lxIy1bTd+?=P?OuLNbsO@-yprqWMf>)Wk*yO1@moV=N;>JEG>
z^L+QK&u?lVTZ-}Fg|rR#lxVb!=2O;}20?k}&w(00hq9)nScTSK5Gfmmn_tFuH(`fN
zX3tK@h@6Ui?-pJ^nH3L%@jcU;oZTQ5xlMt;3&7JG<iIByh0Xg_bU;NYnZL@h^{7}V
z%AD%^hL%uZ_2`k?X!G!UP2v~)U*R1zm_}Z!&ag9t_x4-nn>@>5DI-D*-ih6`n(G4t
z1f36KURoCD3gfv$e>i&MwDemLvM+`ogwc@|)K)C*L@rh9uY}xy^>Hx?B01eAYH%o*
z`>K>oi;WMH|0UkwI;l%frnqaY^JjaLsqoP$JI0Ss_fCari|u>PS+1FIS}1>Q8)sB&
zlVBwn@n4ZeB4v<WszEr&E9gRvC#Rco|9(|ECu0x&c;a8)+Hy7OfF#mc*%edr(f#6E
zg!Ic7Va={{CpBW+bBD$~qi4kYC+hp^=v3_RVi>*RBiuTBiiG;@DrEFk-&*l<9!nSp
z5>3_G8U2mA0*zpB*{42*Y%+d1RHj}DyebxWUE_NGc)<Ujg^5!H1K5_xot`UX^4n@4
zDh)efnQK3LeyHVrlKt6dAlvRYJ|C8#<JO8&1e`fgkPUSZPcsT_6S+r)owruwJLr5G
zTJOTl$wPY<KP;G@E`w(IiU0Rx4sK`djdVEMI~B9J{)QATUfQs6PmLI9TRG>Ssts)y
z$b}l|07si&?eOg4#}f0NNTjTq$n@|VyqEPTy`mm_X0|Svuf2Fwb@&sl)H;>t7!s-*
z$wEWy85inrKImlNt7DL0hf)#5Bq{T-yWN^dtLP+ibfsy)7$d}U#w71H%pX*Qc+AT!
zlKCLyc>h{5!a5&meI2^egOyI^&O`vEE<%+Ucc5*Xci1pR1pn-UpbB1woxaPycL)Em
zv0L>*e&+@#2T)NbtvV}QsM;I}pA%KovsV@%9p~oZfj}VQR5(T;=s+!bqRDu%{2l%7
zmY}5SwBq!2QZJzJSOU1Iz}(9MI79+rDWTcf*(x9r(Q#4XcmjB)P)7P$`;ku^7|w3;
zVPOlM;~3s8&U5bE1E<*yueH19WV{ZebY{F5K_3M{@XG214in9QC=QK9K;3D_V>5;q
zpqHWGe<-&UYJ7$9To7v83DqC#f)e;Xiv?W+HJTx)V;oS`xB<pC-B4MQ2&-KQN*!Io
zIFW|zW|tQ?dxrBaOO2XmA!X82i%jXl6@@D4Q*X~9wWzP)ev2N7p#&99jUd%VKA3Q|
zMrU)EB9ZS*hS6VM;Ku9F)=PVcxQN6~|1qVz$rh(AveEP_dM#fzPIG(KLnxNacuCQg
z8cw<Y%P%(S7qa1Xl36r?ZbNrNHoG}ZmJ4bXtW}`xtI(%Mwk*N^cGn4qk`n*82_&~>
z^U+*Pb8{nsCQP&52#NLIYNCq|UXNO7%apP`ReV|eAw0D5mtUe{%915nBBrO$3J*3u
z_|M{%63D;k`y&>f03RPKnT|9NoHyA?kY1CQPi(s3q~0}hu2PgmYXyh<8J-U>`OU3Y
z-gKq0qIIf1{aw&yI0-kM_0z6RtoXqM?x3jdd7hBsTU?Yv6a+gsSTfh!sv#_&Km5s!
zmA7TK-Dhnam8af|W3c+cb0gVDT*+i;aQ7{o|HA?lnQsf!m32+YTg)Lkl~yK0Fi3==
z4#*75??Qjj>HeM4nO5&ieeLHJ|Ll1uBD4Jq<XCzGy&h!8#{kzFl9%+mu7RB*7nJ2%
zkC=&oiP9m)f?zgBv9|ecgxl>+K(_qkqFyih_s-$gZ-OJEarzU#*11&WryaZaHl+H?
zp9W{>bpCQE@-p`3R6PC8e)qjPslvZUnPK@(j;w;9bj|`2!BF=;-Yvt-mbM}L+)Do5
zEx|ybl(k>;OUz%=t2kI|X&7pZo*p_@Me$ITjUN{+wy3hap6@IlSkr{GZB?fQnv>!Z
zk*9)NOg%b`inAL|)liJ_;`-T@@TbIjKfMdsw6iUAHdK4`QLgm<CN?67ANO9wU;V|A
zuiWj36Qz6*?Ft9vb2%2_*aJD*HnXumO2Wf9B2s{8njpm{EnI-~=eie7lp0p#j)v*!
zclTG!?N_Cz7Zl%AKEN*?)jd{SW_P1$u2smSZfC&%kSe(MNf^-~JDyY8txS+-HnYek
zSa3_6di%_n^f<2b>6P18#0t9MIi%I=y$P+8Oh;2jT~Hdz(aq)B?-|lh>FrgjL@=ys
zH)qLo`cW${7DS%Ao*h0A=rCiz$`?ED)49D7bCDo)yua|(cvu%n`#3EXP3(>)|3aQc
z8t)}(-={0X!scqU-a4F6xgV7u+8(BR`F0U>toPLe%@p8cclhF!8xn2oB|Q>e3`^qg
znkme^*&RlPw3HQG`F;pcX+$7gKaeRD<i@2E1_XAHU<#g3N?28-10|YONxGGO(BZi`
ziSru;Vz->hO2E{0>%Y2T#y}1OnXsSTWm^$S9M-7R8-C?`f)16?g?>R*mY@RTpq&<E
zogvFCK<72DQ;=*jlFSUvMFGXBOKIT|_{Y`&$%Vq?HgK~5CbM*$n+M1LW9uw~;)=F5
zjk`1wg1a;p+}+&?B)Cg(2*KUm-95Ow26uN20fGc~XAZaOo2ff9Ke{OB;#hZ|-g~Wg
zKM!!jLhb=bo5H+V#Q>rKAlPA5I{*C&h<#b_07{$z!1Rj5=RooWw($;gc-IFC-jpht
z=4*_OjimvDXYD3yvNU<Ksx1JA@NUCWm6eRd{pGx*I|K-v<MRF-^+|o?;6S`RwKD6y
zNAkYv$4u6HWPpfyIpY7sZ^x>s*P3v#^Js`W<b!K;CftlTu2oPpN54G*<p-i}`#qU^
zax2-BO9>^yCMjORJYnl@dIM6Wt8>;12I+M_9Q1s=MiR;8bJ63ICAGgpjGkO+zo|K8
zEy%oh5%q|+?({sp8{%ij+kxpxjyhl8+Ab?P_euWsU@pRshBYL%;C(-%CXB1mfkZPI
zXjL72g(Ct+p^0>%ZAgSl_cdzS);BmX;C}h`g&WegllaG35R(_0pc6-CHw@ib$dC_m
zqMNg<OXp*i0i%vU5T=FK?}?o5I&Q53>o5)@h=A&skC7Is=yp#^y*rXDyU3a|9foWk
zT~`!zRrfGOG?NB*6y5pTBOhx<$+@NM<q^r4CqR$W;me(HMLjB{b^QS4S}cQ%awV<L
z3=+53z~`U0V8xpJ_$RYJ2Z-vyZBjY&=F&_k4%7AK`!(_b7TgCaSa{r{;y0bq1(>HD
zrE$`P{6Qu(<pxc0Y$OdO9tU(f0}53*GA&lViQq%hqJVfbsFN&YSw87Bql1<ed7cNg
z3eO`x%5*DmCE>5&=C|?WW}m6Ulnn_mVM&V-m`(8Z!y^Bc01CY{P-Qz_zs9%H_fDJ>
zys;CHcx=rsktLryeWWM?1g>A$$J;oR>aFH7*g1~PmV!SYZ{4ERr|68$KjgrX*=J&#
zLQArET)b`X(Y%qUu12Q1;*b2rf9-nKpw4up5*x~rcL4Vi(4X#abiaf~R6P)xCKb57
z)@obWmi>u0eUk$|di)M|K@6=&-FQBK9^CL(B7$dZ(J)=UT6^GmCn9P2eP>mimb%K8
zLqI^HNEj10`aW#zNO}(!s~7<7$TU5RY;#g!pf7s7y_;0qAIiB`e2Yqv?I&v$xJ};*
zYq3Wx>N+av!|<35y($;W$UpFHKgPX*B4<={H$#m5H}@&c$81-ym(tVmFk{x#=Q6B;
z=25q;E_3xR+S=xjB<5c=t6TaA^7MhAk<5gcO_q&};oE&xo-<D9O13x*QA?N8G4Zc%
z8A>Iw4kGQ(JsZB4`8zMCKYD1U6+hxg*hL@A(<_rEMdhJ3z=>HEIxf*t*(Cm0cNU>d
zbQ}uP;8f)Ol^e&F=Dp}#@j8btLQfYWZ}jx2>H4oI<g4R=4+j0IQilnIno?`#N1rD<
zN~|apB7qE|42`Q-`a7qipk@%`QOHhV>??i+;a%4#8FsbFm<RpOjXSc+iKmrDOOAG~
z&R-2dnr#2B%XPCP0W;KZM0tU|xeC`me1({(k!kKYBY(Ak`sseZEF7ETtZ1NNjqL=<
z!(88z!@xv$$=meE^Ugqso88`v=h2|KbQ9-g2)y7}Ax{e1-Ep;>o6`EB8tvh!7(Yd`
zTJ8pFp2})}y)qHAx~^4F;%(kWmSrVS8v^P`03B<%!E(-h0HO}?>_PwtFJSItFceJ$
ztmBLUR6z~^a@z@*@B@^P(15lf0Q3c{%R>RcDn<aN>D>wn6i7n=3eW`r>%I1E&`UOL
zjBvNKwubrru%BbT(dJC~E?qDJ$R@%W4h6uiZ*MH+s=52+oyXRso7S2kEc6f+<GQW@
zPRVKq5|se@MQ4-iY#k^TguOe1r@U|LfucmjJEgc(t)$LkCjUp+^war_FfZduU85ga
zK!n9NS%>|ssanScMZuRtJkJ~YtFO(Is5B&e#X`xZ=e0o;Y)iz*;cFD=aSn{r_=P*y
z`5kkO{^C7RgB<eEE@=l^p?`k~%Z|JWzVaWckM32mqtJ~Sji>X(sZfMqcbvM)Eq`(-
z;1}1MRPa;o#CWskChd4FUljs89(C+src;Ux?t+qGG}+-#doAbnm*gC*?PH|;k9@*?
zmL)y&rLl^A?N-|<-Tw2OxrD^7*u{#0Sc<-gSb3#*uU(jfF#$*JtoWzmNa@-eTSNp}
z@uw~jrR?lf9ibRB<!D{dt{;CKMcYCAx-mGSyDB4q{No;&hs~NKN$?vSM6uCr${R(&
z9&JOBU|G0B+Nlm(m!Q~c{^tGq@C!a$z6kLPWH376a1EIl!VMDa&mMMBX)o(>(^02}
zF&q|H6YLGa1^dmHA;{oL5HMZr4gTw#sBvd}|2t2Z1}?TRu)*4xu@+{M1(VE$dt1$q
zdo_thr^xuMd=e^|6uXHTWsKvvXqAxV$=+nU+fK}~qD@j&Hnu@a1w#m_FpIYn`q@F>
zqHMk_<Dsch)T%@vDTr$*xR+(=Lefg9cKaJoxUm5~Wa9T4D8s*5PK<jmQs=*p_8Qo&
z&8C;xzBn%Uuw4cQNn2teNT&W6EkS@2?RXjmvPw%Qi74s64s#yJc1m)hEMb*d6}YwJ
zfh1Zs<_H-GMMae-`^4%k{JX07pComZwBC*@JHzQvyg&*GaM}-O#!67azK;Kg@XN^v
z^xEy4kg)y`=6~xMJcf%B{o#*!gal+TzvAQc0yFxV1HPESP*I6m85N1?Tx><4ooh?n
zzgb1$AXO3u=BdbfU<8o=OL8SXZJA7xEi2Hd!VvBO;TF@M@dY0U+T@c!#ye+SB2T%9
zRSNTDSuwL^uL<Z%d5VRM<8rQ7*Y~RPFI0ixG?*hZ>LtxPnm54UH`XCGZ_9?uC|}#a
z!gC)XIIbm3eljwvXDhDwf`FyO?>c=!lMkDY9?jRNl3y2Z<-eYMx)_~(U_WxBZt^66
zy?pi}HQ8*}te6Ee*3>zH_7CeNgBwN6AC0R%ow<syvF`A~glD>Bpysj5BAjhJN=ju(
zxM`E7%UPtrHS?VpjEzGt(|cN^b-y)~!+!;KhP)u#b|sXX2q|j}E7~7U$8Y|1Eo(8c
zu%koB&z_Kq2!*~UtVCCWL1U7So@D3bgUg)VG9iw>Hw~6j+jP=SkiqDB5sa9_rtS^c
zO8=SGv`=YwUhXvjk!{k!vNn@lqi5snUgcvkUT%vub313X*;CsG7z!%jMH?NL&^J3c
zNC@p8Q)UesoUmkbo1_ZdG3{u-OzAl9)~D1GjAxmfG@d<<X`e(;#77C0I){Rx>;(mq
zL+cucs0LDeKp<PcJ*S{(d}{8Ax>Qzmdm4&Ht%hLid=SFQ+pm*ooht4$fOYY{DNsXC
zgY5q}j!!`FE*s<lP(dCNt((is!vN<Du?K*4_5lScQM#mF=Ez8lcQJtgr?2%&UC=w&
z8n`3PcKi4QqmxXYZ}+7E6-S`)x*ehaOb+<gzf+UBnI~wpQ*v&ey(7y2<IRY;F#?%A
zVF-X0%x^RUd~6lk&C&0@rgw$I_1RWVioSOm&=tP*bzb_KV9A-Ndcr9Ql(*i~6VESa
zKKsCom;e}7G67uzPas|K7pRNz#7P%MR4#<<Kvs9wCd}ao{L<`Vk3ZqlTCT}UPEf=L
zz^{U1rB_3&E{fi!Hy2$7cldR%?3J-iCy1vuP}6yx-5C*#AcsQ+2I1w9+b&TA01?;z
zjehNR9-~gaS0n*z=p&o0yx~rW=@;v=m7XJKdc-uthDGc&33-N$@dHTXP?(tLR$37A
z%$J7;i~8Rl?Zf_<3G@hbZP~Rw%xF>h<@B*ogq5tsyq~@=zHNY!_uNXFBzy_S)!d@_
zPwaKZm65pqkdJ+>YQi~vVR`=RGTlAvT<GCLKMi&N(1hA@n!)VG4+Ay5457XF>*qym
zgP$rXq>JZP#IRjZ(p#XwmO~WrbwJK5=Ib-R=Q){j;^z=s3EyQ~@fJ8DcHZqkTXfW_
zkJYBXkIRg%vhI9Ue?H<IANkbT?t~mHQu2~6U;TEx9rTVVvkc6y`L{hM6W@)h-R!s%
zb2h-Lkf@qCXr$xy_p8qL)7DNS2n<n!RFOn>o}YTDUWa6am#VG2O~=|M<iegC1Ad%d
zLAk#g<c%)4&7$0TcoLSH`y$%OOnd`Z4dJ4op|Gcjf@DcZS1*9(G(+_<1qvB^@HQRh
zX4NBnodgRhe=J4-+T;a2iM$#J4W1PaR6anOb#$|Kod+K)YbYnt^CQ$|V=P+SKUf(_
z_wS5V_@oyWrR}a}UP7Q9#NnToC0VkfIYT=T!Vve#&f8Jdh3M~LcInU*wA~}0HQ&La
zVKv3{FC!3&{4Wvm+V)h3>d4y`c)$Tj5Dn?N`fqd3;pqG-Okf&NY{o<yVi`C1OGpND
zHQDOtw!i5MA}#6*1Ug`UWIi@jGga{yL9SW;o5u_h29NIYd}O;Qypo|wM-1Niw8ua$
z2FbP4P@d|2l!RLh3x&hjxnaZAS7<a0Q}w*GkI~y7_aj6Sy9=_vV5g_oILTf=c;_kU
z9$UzDD5;-&Tv0Tbj2V^UikLTG`1#+8XPSavlA67@bhTHeqI+Ae2T`l?K;p;FEwhnx
z{@x^dam%HMo;rU7)!d@;RIK8Xb?8de!tYp>t=Yr)%rjc<!KTnd4jsd>$L7N(65MnG
z=&;_rJV{rba@q@%pucF`Q}xvF!NbQHj#>GhX@z1iZHyb4&JT1{j*}D3DhDI!n{6Le
zIZg+HbDPK{#Ci?J`_pt=zPQdfp7Q4xoxnmkf94gu{cRD2c}n1H5pj6{f$F-S_gVW?
z2*RcX4#cM)ifcRP9AYTyG48Cm7|R=L*cWH;!^j$T=}M9KHz89mnMVVzA8GyMJ?8rZ
zqN#cFs~;%m;daa>?m{tLxgoH$L~pXNH6>oL5fs_a(#gRLbOlF&KQJ3E1{G=%AV@^8
z?cZA+BGxng30xtOLhw_M-^Nlh?eB;Ein_hA0NPw#YI;bR{e=>o#(YIMsK^(go)5q}
z{6C;jfTV{cDRB9mS&XTx2bebqqkz_`e`$$Muq7<QKzQrDRg3{HPISWLFq&v6ifP>}
z672^?NIMiUi^ALZPWmCIeu$wawhH;^9k%L*s7^66h&WY1m-wZ_l_lqCeIEhFoo40~
zsIEP{-gQp_u<BL-iCAmn0tE~@0VLsnJT5)Z==K1y{4Nyk-#fA!sKak(dz@|q0hK9F
zJ6t$`J`HIA0Bh4y3LQCDg0R(Uzo6Jt$i35ye+&Wrtk3}?Kt1pujHC4mYuD>dwK+LY
zll98%$^LNAds})Lm?^)rypTVBG69N;fHWf!FkB|RBQxJ&ncMFU3Sb<|d+>C#z5FZs
zB+8)V{?Mm-ZyvU2{iYB3-e5x`SqLYukkYfT+{IJaqwq;9p=o1bu|dAVH*6LjVdIm|
z&$06L54g7CRcE|ET*1<?MO0Ljb8wJzq#*kbwB$Ly<S@ae7_6cl^(Q&cV+l*bgwx7|
zNavLfTYL2+?T<g1eH++I+|FMY0IV7FOTVn3_sAja<oAU~ReD0lD7aR0BHUNeE`Sh6
z2N;5^Mz;g!tG4q4APeE2SoZ89S-nLB0wy1Jy?w^Tb|iI7=6nf+F^)?eoV*Mm$VesE
z{!S9?oOPe+62Qp*7JKt#GIHJTs@UmCKG|eJ*3$kR;9h-vTuPm#mZM0=R5T~)N99CK
zOt$_&nt<9|DI8>Fo$&hnWk+(x$lY-A>D$Icw56~dRJzv2R*<koRDt@hBfMmpio!r&
zdntWN!b9hPt{!f%(uUOQncVS;R?H!l7s(GJjQUBuz5}e_>o&;w5+xj-QK&=umn+m7
z+nyQ}@)_o!EomIBxtanzBOGaIcy`3T{C@6hm5ARbd60}9KdCBNVCFe9z<VHO)x@O3
zPo2<Y;(zBdmNh;U7VBZKlrJ;h8E_RN6bGP?uy<&lK^x#2xJ$b9fkFa!>d|$5)oV)e
zkIL_`8nsjPHK0lx+E?IF?4HXM*R_&vf1V*l(&uy)m-kJ6{>!&ZXTf1YowDwx(L6<&
z&9lS~C7y4ZyYVyRnDxa8rH_)3)>4i7Z{jxZSC6HzXUtS3$k}a=Gkib$GryyI>+H|!
zv}<bXtyB4h8;UEURjdR)-VRy0iH<k8(K{vwmPGoW=Cl2`$P0!MVbF6R`yD9PTs|i#
zQq$6fD{E=FOP{5T+{LxK;v5ZX`74qvgYA{k%-cF4`%BhFqXqM#{)}p#(VfgKeF2+#
zT<5^a$~OG?5K)D>e%*4$5lR}(D6pZrCPu#|DF<)(wF>eWoEa*RpM9(U?Sz+iPh)9n
zEw0jHVKiY|XQyg1itr+HVAUmx6WS0ym51vFm9%vumj)v$7|{%33*yf(%n!PdRax`b
zE7Dc(Z(-_H7jm?FlHP~ehcYp+;mSfMxo}KS;vb3UMl+sR-30gx3Vgi%c-p0mdrQ^>
zJ}JW?OmqH(&ir}M>nTnhUh~4pdm8k7!?1RL$QeOk`7!#34s?Aro3He^zT8^QoT2Wg
z8bz~?JYGS`;z;|#Wj4AGS~LPm+)C6M*%W?c@?3NjR)khe<S?kJt#RK~_guhpy-}c%
z4iZKIf^u!|L5M#&)$EVTdk`~P0Js%sBK-Xm!F2jzC*(}!89<*xpC2muqyu2m?hPJ*
zjr=~!0)nY20LK|1ch1{xng8;Kg;=-mLzU0wPXpMcSTRGTWo3T=el8KjT#3>UGYU{R
z1d#NG0PGtW>lMxYOu(R!JP%^>^n2e9z_n!ddG+jx(s)NI-;Zi`^~RD_nO{Ghbx*C?
zbx#1@XIEg}>;|;lAy@a#4-gv`fi)7gT@Pl07hy{Fa|&EQPaTDD$Wkg}AtVL|MUD9)
z5D9RG@I>ONjf^o>@4Rh3a&|vXNXX-Eok?q+Er*a&P_}#E!5bnhR<DUUb&?X|jAyI)
zuh^W*=GgJP88LLA<{P$oc7%MaVv-}pr2?tKFa-G0g7S?I%0N(Qvh{aKpJARp&#vph
z7Ua$cni<5iL|HHgJ0v2wsaoD@7A<$S`{hxHx=mpO%W;_DQmck4W=#d2YM}wFE`S6r
zPA<Z`f%@?om=)!Ik}{m-xSU6eJ@_%07?9UPX9B@foTgPL5bya|x=xKLO!=?F^Qp&v
z(LrJDdmQ66!B-z`9#h!MM62d%BXfK8WJvLhlc%=CWmE-eF$0)@FTZm4O^-YvM>!vn
z>CK<vpW$cpEIMe0J&xdMk-EM5>O%V?YEb-u5DfDhx8N7T$gbDMx77)_Tg+Z@NVPJf
z$g~A?0SD=lrw(YzFS{a=80iZpqV11i<7M+=(4mV}&?=<3q+Yq6pCR%f)Wu1S+P-&T
z5X6-abHjjoiQo_oa{TS{g93Gx)XIX%@pdP&*kWZeSgn{Sf>BRr^|Y;`cDMofH1|3u
z2sMud#XS?<%E~!4YR!gjT+5$EyhbagAK%B-fdTA>Mjt5r1!FeaR~-pXUDO%XmOV&r
zh<xUIvXhJ@Z`J-vv+~9m`Z0~&3DK?^;q7i_pGwTSQ@e5sr^{<3MUm=k1cH5Mp>C^6
zz;{sc$|uoXd$PDXw?9*M^)6?`ifKHNwZ@Xp?p<4Nb;Q*k2houI3-RuQh`PTi&#X53
zV8^>-9QpUsKOS!HzbSjegqHM7^>?>?l---n8(~0Vpq<m>S!L@9W5B26>XoKF!S92h
zFsQZLqbt*_B6mIV3|7n*Z>Sibpr6I#hovPZddUlhCd&A-gXiI>^rP-)d0p<htE-e-
zL(A7Cq?6I>lWcJ-rWEOH7y8TN(kmOB%Y`Rl<~|U(m%|qqfep3SmhY5c$k8@mpp8AM
z1Uz_+X-KbQ*bWbZGECppyEG`AJl*8%b_!vRM7yz{KE;u2pCJuie0U1tQrvK<?r`~s
zC+x{HO$&mY)pZ*s=*1fqpzx`+De;)sg-@47S-(Q-2j!_}KnusrEZmIn^jvHTb1ioN
z7>@eqxGz#!Wd6~aLKrCwN9%q*E;g@tY+7%R#n&Q8eKL>5mpi)e-AfIGY%lb#a6qDf
zA2l{?6rkE#hByJ_+aVzB0`cy);NA|zid6-AehGMyE2!~y`Khd?Nbe9_plHQup5=^_
z^Du4>Ag{~;0y4n5VS9IpzPrExniU`g0q!l&k4xQVD|My}+D(yw$%0_WVRZw`;9xpW
z$NORL9lX5{1O#?~7#ORT6@b5n1aRVV0K~q>7xG<A`ra|s0YU`wsmy-wA{(_qpko2)
zqP8$S?Rye*cYhxA^Oi{~K@m`JmHu)d1T+pUCrxYjr!L)N?{^?Ti|`f5q`gCvApmOo
zzh7&IeYGf+x<!98dHQ}HeB4iTvr2W5gat;S&g{HtgaOmpL;mf#Rzo7B_y6>p;(o3S
zjkn^knJXSOL5f?2QK;o!i38;Mv8s&<pL@e_nn@#{6%A3cI_VAc$GkQOO)#50av!sf
z-fx|xhJ94h1eqV+&!WfG8{<{A9tH@wdg}GuI_ic%{tzb49&y`V%q$>2qH;=bs0&?C
z7=<v<*7i2BIf#>f_A2t!vnH)rhM)K>iC;?;fnn@CT}WRX4M9k|X=v_H$WPp`!2>5|
ztR=%)Ia_$W>GLtq^UvF3jPK(F!u*x$!PIxdwQmg}OEu|LZUOYPGo$u5SUj2CBQH-6
zzbDTUa9JXkr}I#Vc>}+{sm_LBM;penLT?5acp%3qhpYdzZ%75IF{HkSHuI5sxN0Z4
zmRASIWJcI(vgx_z5Qssn(dw|I)8KS<Xb~dvcspG%#MVqzR<w9~-`2JFRD@_aOr9=B
z?2o%bK^lh?1AI^;sY#n@8Z2jfXQa3mX<@NekCPu5+Qm*HR6yE^CXNo-P=2ru=7+gD
zy<2Xg(f(u>5SjuqDwa~`w~M%_&QGPGOHP+ErVmWVg{{40p@G6H=@#tbtMOIT?GXpj
zEF!c_X9?SmEF@t?Ei{syJ4ArlcLpI(85D;DYUs(BO4N{MR4((49|^b|bS2ZSwNH8b
zq%l0D`<qbV&#DTl5OTU|RsK*M=G>KubHgpREq(_WvM8~<kGleufq5Lkp!ETn&kt_a
zBIS=HeY4Pam($x!fjSbFg3R%mI9kZnWE+`3eB~JhwOLha6MuD1XczcNZsl1Slmg1U
z9yZiEXzjX766L&LEe@$9a9pz|PP-({F$v2fMf&2<WNGE+zVCk{tGW9;^Z3G7O62+x
zdNA4{X<XJ6g;03#XK`aj<c6)S;Z_7w{^%Gjo?vFaWwu(4IqnE<sblO{5}mUkd0i*P
z(c`UWE6UwtRETZWjUSP#HEXv6g3n+C=)xtJ)ib=$gDs0_vt$XReE)E&dinQ{6~S+9
zu!gOYAo~Il$0$ZE|H-}Z`A>|oOWB`3Xn_*Nt@b!f?}nP@mZh&2ZBqA4VT@TS49;tC
zMKhB%j>Vz=%v3yu@o)F*!Gum_21rIg@v(2``0;I;;GATVxaD2qcCcTA{}tqHP21bV
zXZ-@jDz1t{o9$Q21Y~KzkNTZThTPDuT#EVTG|SLtvH0#<F;el<K>4ldcO3W({a_6?
z^*NapgiYoZ#~|GqiZNeraRBIWpkoh+Id;HPunj2ejR9^R>=<BfNoc(oB&cPG_|7{4
zl<x13&4n_;?B6{NfQ;E>cR0@NLv*lV4ImPF*KGmSB`Y9BB@Phd1K`pBzWa%Rj%QKJ
zGSv!Nkm$R<<vng;z0tOyM)&nyIYp~h{5`ac8?Xedv^Y=*0SQ4MfY(>YaN2RD2$+Jx
z<*$d{l|n#ao(<?)1}vsOowmmS<C*h!cOjWr@NaH3fGGGMCutgMk?wZ|OG{c{!NLbX
zLt(XK`#u{3@_uRVTY#b9eRm}}-su3EG8m0$6mW>CRka<JMW(E7pW__#&G=o`A!IBr
zu({n<<M9-4&$9hb766O_<9bs~a(UX`j?39_@4IxDhjoux><BH~I9POo1wu<P`VfMV
zUnC`AMWylspOKtM6PAoLDX`sJdIPmNA#*nJQ`34-pnvO-`v-hYL;u@^mpzNIK!_=;
z0e1OENM}goKmsU*lF_cdppG?Y6pDij=KU0Z0L|^-{brud^hH#i7grQ}v)&yszx}bI
za~m;3QEsuLHsTLvf=U6rsT)muY#60;xyDW+HpbCXOuR?{GYa&c3fu1G1J0_nGo0q>
z^#~N&C&V@;+49V@Xet4D+c##^fbcr*7kKu1G+5fcs7dbG0CHHA8bopJH0BBzT4xVD
zK`hY|??Y}h*|ryb`kBd%FUx3g`})P+Bc+r-PKqx^N#b5tQ)ts_6NxUkQN@Z?I$V(H
za1Srrv9*i~wSVG!@D^Q=z97wTA2qd>Hs{SqCylaPIHN=M!h{+ax?ct8)}S)vu^^oL
zc}BEGMcj`j(?47WW~8zhNv51qB@sXxnRtA0n@oY!uU57Y-s9gqTT0BuTu>LkV>UYi
z<+IAv%Q|nc>6pA6AC(^lx!mtJk4n#<kgs>cUG36J7a<No_5T@IeoDZW5H^#Nb%q46
zj(U2NfWd|>T1FQ68y4h`Q_aHkj7y<tRg2OY$=7`(crtt_n}|pi2C?P=KicLtwk6(Z
z@;GUVNQmI=-^*p1x5r2eWGmIe2KWEEL)uJ;%5S?OzhW7zPhZFr3e;M0fxhNI4F%0a
zJcdxzEPsIn5u1BU(w6IflR>^SH>a(CanNV}GC2h~3mZwKI=-mu5f|;XE=6$z*ANdb
z!sDw5?c}J;K9wX#brCM{D!~+j550Bt<(@P=`6h}0l?T=!UH8_z=<N0~ZL{=emZlFn
zQAx3=Z0lS)r>*&(>^^9oIvQ-)b38+3y_R6G$eo}pj7=N>RrZe%e-`Iu)Lpt@jSN22
z4*TMYFFcZeyATV3X(;3}@s};;_rAsm?k1XxGI>F~tT3SP*))gqfs$&qE)>^a6|P3p
zWFQ4Bkza;Hf$7lsMg%WP=05`tw#<)5JhJNwD2u>hxgTf4J;4)>f4ZBSosYzez^Gve
zG;K>{Yl^CY`e_11lhzwL6wA?g;2VSyW<VE&5Hh2hpCBX79<E3!j4*CdAq*2L`E&gz
zYDWj-*GP!2UNzbnRl1zo4I(Tc5yXZ;>P_~QtJX0J6T8^<<TMxaYxgdjjs+sC!Yz4W
z<6v!q#fc>B1CMZGJl?}FJBs|=!LaM^7>=#qiAl(He>?Boo+9UDymqwZ+Yg}NqDf-D
z6E1Q8J^NJewDjHew&`c(u}6b;1}b#knwpPo%afF~&od%9m;RW)lD?#jDHUKlt8*VU
ziEmyXt)Ty8zI%P>OV9Zrwj9Gdu1|16gq0n%$yN+TO)lAOx{_c!h!j!uP;`S|r}r2z
zN2$uXU+vF_9K1!pT1;k-P((4y|IOC}C{;E%2Wp?6V(Q|Vi)u+3N3b=1TP)8LWdXq1
z@OaRLE5uqH>`8Jjuuu_yq63I{&;B5QO+Mj9fDE7`Kek!~KY{ij0Ho?!W8E{_^Qu*~
z!{0RX%le#`in*(yD1sq$pZmV?EloW)D&S$-zVj|CKy%#!CY1JLjLi_SvPB3%mui&n
zW(tz$-|&<PBaxVgz_Mk!FQ-7ewH1(=D9i3~umV}u_mwAvLk^wAolbxXI&z$+Kr-sv
zU8JDdWabE<mqCfedj^CF?^*7XNj8N9z?x7G_zr*Ez%}=E0$qa&-FBP<B2#UWXxni<
zH{2M0N9u)zC_JT!FAV*OBLUIh7otf_0!^Zf*L-`hskOdjMY8w}DhPs;MwPWp!{<kl
zizc;dQMZx$WHC@+LZK%_u=Ws4X2(AM6b?B^;?SgE4zy_AUdTQ-N`;Hg>Q!vaSAfP~
z*0B?R>ynKttIFeu6JE1q$hT41wbW$h;qU5ho!VX8`oMf8wb!z_5wsCXUMxdNTrk2M
z8`>)~4V&Hk{o~PI=YAEP^-EWR65h=0an1el(w85Nf05rD>0X|iOWK_930B-rSJb`!
z)+c7O#?Y4Jph-B)j!-K0kaEFGlOaV-=1%J=+v4boqQ}`lTtvwX#Y6Tplj6)tTN=0<
zZbHg2d;Jks%z72H+CMIS>=c=Hbxp|mE1}G4I!J6LFJEV|$ZgHH@X4@r8b7b--@{K1
zSspM@1UbBwMG_OP00*0Dg$nzQs;I40Dg2!x9M&UyFj&Krmu&)7twfh1wY-^#Sk;KB
zqz5bRWz8V-*l5A_!4L6emK2Kd&V-xdf=~V)9)7k6ikF#!m}LVKQlb^_qn3;aDOsSB
z6f+p9&KS{|0VMiKi)Jvi7m@a-0AaJTneT4G61~)=*z<*y|3n?=xNIqh1w)IN#XnwN
z%D~EO1!xFYM9B*8eznqp)yYHosVikl%f1XX#Ez(CYl|<|@-OvY(aA}5yGk)rfVr9k
zF4P}GE`Kp>d-gKj3mlGrE(Y2B$yk5Kdm<&)h*F5X+WZC;SHPwAiDGhXcw!lw#ccwr
ztOLctsM8F%>MW_Q6+11%@N0hJqraGmV4^cZ>oOhzJ)PyZZ{)-=fK@=coc$mE(yY!x
zZ_mB3temcx?ySsos&)Ug<e;CtHc~sc!p0(8FlF$MlVJSsv%;Ln-$~za=PRLBER5ji
z`t!@2ZZXDmch(8PZtlKC7$O^uEaq)H%SF@z++^dZaMK7yWN}cX>+3SIMs%8|%30$!
zNG|IxIFPfSp@(}D;4U`@d2i>b5kuN?e69*$6|82>{?y=Jncwcm3E9HvK!;EW3_)(w
zqBKrYzI>J?ZzqhO8%Q^~4zA)*mRV42MRFx>JUPo)Z;U9rTQUNMrtCO<;o>zJ88Y-T
zT9$$8Haf2xsT_^_aamkYl<+Cpf{{`=dQ&Y5w8b%v2yj>HvZA^Le}kIUB8fF7m$wBR
zj^=C!fWA&gjFi*vRM@Hw4NaEtztnx0zj$3q0gF|0iHSDZBWWi4TCIsp#IVUYv76mF
zHs~)*w}BU-=*>nN*a0BiZDtMbNO{WT=P-JQE>|i^FN$9i+7>)+oCQjJN%^nP%9a_k
zzmnL4#X>7AZpx&*N3xqYsXNcrM>(w}qlv>JhY&g+;YMPz!k3@PD%OOJMh*js-BCP7
z#WQOQXuZ8qOXvO?vvV4D)}{9seZ<ge#V3l2%@+LdlQPFm5n)4RC3P|ZM%OYH$tF!t
z^hT#ZG&{TSd{Bybo65qugEXf^U8&g6G(g^cDLs4dQ@hR<#B7)1tc=@>xN9rs^E!fU
zQCS&5*nlebfKh)$IE3`+hSWrH6z+!)FhuY+Ut#3Nz|lh(#WM5kcnHTxuq+fR<I*tO
z-1E;njN&R7<OYVTX#GY9;`F>(!TIDC*8cVgMR_+bjR%k~hy(dB3dnz0GRx?=BrAcc
zGzA$Uxt5k}oM!c<a_FK6v^31>H7;Z&E&LBE%%2sRXlScZ&%z#Fh4n!LUNgO1zOFVQ
z*N#=#qYr;1j1b?kV&ZgZ{568BTrK#y6}^r4${0K2U!h$u_t5dUKyXO}scy+UrhQd(
z15S3-X+mCKfDpyGh=XFb)<^==X(2oD<!Y7EQaWlSc39#B4-LAhv?*cxBCYgCYGpk^
zm4b;C+zl2)US=mF*qafUe@&c3ZIlO{kd++3Vl0UFngNs7q1-wU?(5&t#aj131{_^0
z!#BACAT(@4hZ=So-47K`|29@vN5g2cQy-oTuA;BE52PF%LdTeLWOz5IKfpl2X?)su
zCvk_1Y7g~*vMb1vw2C^RbRbkw&Ld){lBFyywaKrjNLA-mAg7{byupT+bz^t6S>ldT
znD49$7muUwC`4#$ZvIeOC??z-ubw-;xgstx&OX72n21G99N`awVGNQlTGm%nqMMOs
zU5PZsx3DDroIx&CGOPY}O8cYhmHO~28<_mK4A}-1b-JR%;$Ao##ytcCrO+=sw>&Re
zwous8@^b@-7d5(G8_#r@7B*6!NW<V;@`WbusJ4G^NfYK;!PQ-!rz-;Ve&8wGMLhYu
z&@Y?|ji@jckb!+COF+eG9Pg{Bg98%Vk{l9qpa@Fnbv5P9rWWVxEqVh3Pr!As!hr=u
zoNbP>Jms%xc+783+3;b#k|2m@guLuuF(e}kb7f8H`BEJC_%J??^&;JuaK(yjc&y91
zDIw_LP9tdr%N-tA8NnQeTqy?JQecc)xV1n76#*wRcKgFWGbjYu6f@JqRWA!btyQ`|
zN-_ctyAsuT{-pZu=ObZ^*~n*Nlg*f5@l^#lOV`XwMJ-hgLX*R^0N0wTpwHVz!CbZ!
zzYBGEV<*eNDCxaa){G<*fojyX_lbF_Zmxbw6qYvMMsbD5MP)vCtgEkPk%&)?Da?xc
zQ&Ym>$MSX)()p!%L_El<eW3FDW;6#rKGcLzwek#(Mq#&@Ieskt(Fb8<#{W+NDh`Ax
zy@tjcM=tsiyTB6Bxl|tWdpJ8(LTH+qk1=X-X_$B~0;#pkETaNDryQ^h5F~1VpB^i%
zs<tGp!oO<zRm*LDQk5>=QYLP3*%E#uUOd-A#;=BKJ*s#?D?zr1b+hBtMekJ;@yi1;
zThnRr%_~R4Ejp|Bph;+>P<BHe2dqPGBafe{)lunAR{z@1EN1V6Ks1bl=$aLxWtq8b
z9Dj|_?SK%ML%c$D$^=Ukwp12hYy<Cc<eWz<pm>Es$Q^6J9*!AatW4p7e+R1Bcp!n{
zo8$YU`2u;&##8-=pJD8C1xzJraolO!FX4bq{5&_C@V0ob>off@JPV)7bwa9#<dIW9
zzR+w;8bFLSrN-Awc|<u;q#!Oe#3Fh^AZ&vkSCK&38`ZA}K1!Taw?&vUYR-+w2vq|2
z9_M9NHd4n)8<yDkYl*2Lb7MvgF5;_Md43iqgOvTY!E#BD{0TPsbk>vgy8%W@HjjTu
z&ZfMLalHNTu~Y6Cbfxaj&^aW_Cq8qOkp}8C*?`-GT#tvkLb<zyJilK=5z#x>-{$i{
z>7k}`^XuU@a-*Yv)R)*n%um45O#%E9jm+<D8(wO5&jBw>Z}g~3-qGPUe59B~ZtJ)v
zYV~|+t4M|8(=XpY(3X54l-{E&jfzIPu*9O{E~oy*q`xuww-c?84psctbv9PoVsrJ+
zv^Yi%pD}llguw7!(FG2IUEDR}wzt1WB^w+@<%iPmn3V;&x>LsdD}xH&ihqgP9vr=(
z!!ouctWw$}zEB(aFpgx{8BIksgH{?Nl$Rm+6kh1drBR7QWrbZgwAD`pvwBYH$lp9u
zf-(A*yPvt{Q|>5ua)taJC8yZhP8|H5qhP*$`?;mB(B0i3{W4z49RSb#`y;w4p9#bT
zdNbBqUDlx2XOuPVDB=ZCIcL;gLfyTuWC3B6T4923C?5M$d2f#s%6js~Z?aG@GnQ$4
zq*T+E1Qb~za|~jBNw9m3G&rnrk|pxulHOPcg;C0_WQ2NOTX{`R@YEdJ^#qX^6q-$`
zO^`Ard+a;$)vId7oS)H7cu>BQuIr3goZKJ>si!|BzXgn9PWZ1-94=L73ipuTOu4Lh
ztr4+%{Y_AR-sVv)2r<lb<4uxdqOwTWW56JVQLyXqTW<f@-jVq@r&#h(Ofa1+aS>7e
z^P&HtpUY-Y>y6AgB~$L9I-A_#GOSQ8oc3ulrnGhMTkk@nc(ZN6-{X3)mnD@Vv-ntM
zTB6Xa$I+CY^xpHs^&PI&`k{<72>C@i{`hVi%telY%F4JQVB(ia#LLK%_=6&!(C+Pw
zK&;`G@r`J!nG*vlJp2zk7dv1zoqOP$k4>&LXrJk%EpN;<E-*#H8OQIfD%0@3zShH$
zuQ|{(i#^c?v(L}d>urE(s~Pr>)vnHj<i%=s=eLi`zU|$he?x><s5nLVJj4UoLy9S#
zXp)@%W{L#<NMq-T{ui0J6r)}c*u3aEC|<cidGMOP$Bm}-t474(6S&I6<>s+q4FZtf
ze*tWy@1i$R1X($`4b&ebXn6TE;CG>^rP4AUzw3Eil`07uG!S<QBZS=?7@}f7VApEj
z{-+QX#jQG<ZcVz9DL(Q%IG1Y}U}1Jv6{VgWUC>$-ca&}x97PD-!e1N3h)B3H$YHm%
zSi3)4U3<nRXAgx#ZP^>GP}Z2F%NZyWpBRf&%<sJ!AjTh?+vLV%NtAc_W_~uYv&(-3
zeP;JrK#9a@%7QYgYQ|Kz$$BiLCd$Us$(Z!-Fb$bs4M8#$L0twa(^If@GUe`<UOCPo
zC>c&eXhaYxF2ON(YQL{dP3;Ah^+*WAzeS7Ff|8hhx3W#4qr>d%?9;Stv2?uBuGdJ8
zt7|89p((Y`-H%!2OXR+Vz?T%LB)YTz{G3#b;wwLuG!3@lubf!6M(W{6maaG0OYQa%
zHeV#DUTfmsu*AOwr7EIo60rgS?Cx%lgNiKiVPBdJ=L)AHXLc_OcI$S;+uWnd4J+}8
ziGoPUxdr_3jsnW&Dj^jA>{qAN=H@M?$Hm39;q+ujT3pu?ay!HVtHHIjXi6gIdH>K<
zXZ}w2G+uBD8k~mS%?J6I1g_s@lRB*oq;4CI+oPkTO!aY&1b>|Tbmt<T9Xp4hb@Iqc
zlDMZ)MFMRBWv#QYdlFA(t$0~v19J#;C7wNd8o4reW8S8XH&(;;od<%-?>o_{TlVt`
zjy(>kE`x$t^d@1qQgqrXqxft)L!HahF|egW8Pf8IwX|%o<9dF(k%`#){_0YT+=CkI
zK(NXE#T96ZygZEi`1Cjo3!BJ%P3RURdQO)wU2km6)_uD<c0Y-(dHF)4qyiq#WLh#f
zII{R{UiEd1Bjx+dtolYJACg!!I!bKfEFC1WLT#O;;U<<bN~<RhEs_)Gu_<nGj-aY5
zF3N+Bl#Gnf-ltuv631XteS;SBy5IWfDusxbr1VLOf#NE{ptSVT94H&=1;hh9)>xK8
zd@(qwy21hkT?E>|9m?-J*G|#D+=H_xMe5B3u6RFKh^U(3;&J<5d#>_n`1=-;fcD%E
z{}CB)JVudHWk*Vfs=z6A_l{OM7tSt3%_m()2@4u4hsD0eIjrV~G7n?zNm|TU2G`*W
zu~I%io>|or8$6^|qM}6g9<<~Y47Re0vYlFGpp~uef9;con?+PGqZH$<2FWrf6Aea!
zId_Zkx$>piI9Vk@fw*c)3$$_|@ksNbKkk~40NQ6k2rkHQ`2POB(8HjCnva!u2`5b&
zl7~2&EtaTfic;>%Fc$rQZy^NXpMDiSw7YbykOl7HsK!YEH3fnaLfr_zdt2KlxL$L$
zNd64t`+P`^YVDw$%=P5+H0*+>t4y7_p;g9mnZj=&2{1y|1||%B%T17rHYcKPPHLXG
z#<&Dyv11?4$)j-b0+=c1(=o6x@kR6qE+yqJHkmLbFbu11Yv{-k0wQmWat!$~psJQT
z(?4fLdG54_xIPYu^w#Utc$6CZnuuPsl`mbcYF3+{lu2guPAgS`FeTGP4d%Y*QuxoX
zfU&1VLRnx#*+sg91#@V^|7z-&#Y+lFogL6hO0iYwIztAOpvM6yW}AOCm#6m#tvYa(
zd~!qBj&8w9B$Ps&lf$q3EJ3j|#c=Z%BK-%XzBj{_pL5&bD0oous3XSkmE|8c5%nF{
z%DHOHOrc!_?E~AhiJ^_D{n=62i@d5<I)m@F+QOofd2oe%!w_OcGowRVx}>-bd15GF
zoD1<rOb~i;{7DV?Q)p(>FRZggNfG-(YptV65kko)&Rvt4Bf;2Emk*)RQoG1**ogAj
zto}(aL;g@{#?Yzq!P#4nFhO!Z;ydp}@oO*J7BQe7_@ze<0?-SbAPcHHA>YoG5dHsA
zZ#2(<O==A4;ZY>XV9BNt;2;=bcC^%n_dtWwTYTc!%lXGItz-=>yODWJ(NiR}yDWLm
z?4wSE(V9p|kb2}q`rXF@!yj{%4M{`UVaTWtvFr(*9AOj0d<IooJ1QkU^QX+*>Lm1j
z-jPa{m+va(RED7AT(jCATmN}PIx52aZH?xac+iMgAsX4J3WK{XQ#Ia{8DcLbDIUQT
zhB!v6CvhYh3Pp4#ZeDD_v8(2xXOL+2jMr2=34!r-o)<R0C#G>h(OKp~u$pioM`z9}
zVF{l$ZtFJhD&jB-$})IGs(b1lb;-b8ap%Q=WJSBm^g1LauG@gZZ_)KSerk^y`bB)I
zQ~I`y{^HR+?3uE)mSr&8EpYt9#~FOSB50H#$QKF<cI-Z5zm8g)H1&dI;XGwhb|?(E
zRO7ze*UzX_9KxX_!Ha5zAO&1u|G<;22dDvX+@-k{f6z?2&L=6d)UR-ygUqqegEnfx
z62iC(dQ6-guFuaI%~#V{6YE!fSl1{fG*Mhy1dA0Gzi*Z*wii6wEG)}T&gT|0f2VBK
zMj+?#kG#ZW7nb;}^Pm`<89RyjH*d7~5!_Z4HOkC*aFe1SORF(z212mMNiTPqA?1pF
zFC5uNcIa=R9cwdL?(7VRd7T{yg(r6Hc}LR_3VPyh-2n~fv6a^@v=C!FqsZ<1iyd&$
z!O@XpfD7z{XSP@fTCMjBTzWdS4)?#${Nq`5^vlb)bte+ZqxE!l|N5)(br(f5mOQ_z
zyvF{ED@pmYMDOE5{i0|5#uF5m@*>CtpQ?qNvWKNbD8U@HbKWXfc{i@&@;03ynEkwT
z%%nak{#M!;a;5FWolJiNeomeuJmMZxyS9iN^Xm$>xwL@%k>?pJryQ9stU(XPB&
zOzaM)DXq<}cswqzu#L!mOme3BR$L!ey2q`G^VNWERrwHt!vXnE_b8H`lCIh(2btn)
zT`LRFH-VUMXEKLQW!*HN@3Mld1=X~V9FW2(Xv$D&8a3RVelZ)$V~wdS{Z3_*<Av(q
zw&zY3N+|iqCe)d%Yv}0whs}F$g`(`0E~Sl`7fbG?X({061u^69-Pf9-j^`xy{h|x#
zZ{6n6ZBHSYf2)Jqrddhih|q?xlixZHzlfJjEM0kKC3hZoPpfP9EyNE+td-l8J8!06
zru@8jY4ftO<6D^PE6vXSx<HxHVvQ={^C2H>1jGcs|5ybVK|kH8A^6jogTz((0SCH;
z#TQ9DQ}LvIm76R4wK5`&e*&d#XAId}>^68rM0>+=6yhrEFFfWSeh~uJ%!%+1^k$o_
zbzdhB95(-;kfMe30p~{ezke9(0qE7bvo!O+x=TN(k%FKAAPit`1>OmOFxtf0y5gHz
zxX!1c!KjU~F$l+KW<kNp*Ow<+Y%`TPE4a%Cenv({Csv@pp`Qu)e$oEVhJoFTBhylK
zo#i+{hq9Q(i*8);du~;inZ)D|7ai+S27+9(x%S*9Lc$)=(4<~kxCZ~<e>WbF9Qa9u
z7P4c#^Zy?C`*}?A{C{=}9K;Y$2%GPJ9}G8Pv>REV{S7DZY<%Ab5tsk_=_~SZa>rIe
z;DBfT?OwSxEEaVKkdo$1X>xP{ziOtK#4!&lgR#NbNXTZm<({`_QUcu2p?%w_Sd-=d
zbxoz>0QoK;ubyn_61Gy1l7csI#`3s3r3%kaf8yle$eOw3E}H|&Fu>aP_FOpRN)CE}
z+fZ}GGl~A6lNm?c7x}MCQ6R0YO#q<Y%wO89mZ}8)Eqx!Cr%N&dR!K!{poAFLL5d^%
zIzO*LrgG#abjE;zRKaFpXKz?m`09w=sl=LwA_8J?wEC|X2DlbiF|-(@U1xmnXYdnv
zom5BJvbUUg3KD@<nS8rarA7KT!@j*I9HHu_%IvGDluN4*)hagH0z)bF%QF&eULkEl
zAE8RI$FW~+P$aFsN5!7FS8LmBrEONOba?ctA&`~pD_b=zrt<E=$DR9ySs@t5aLgR<
zqWcN|A^Ump2{DRV>)PcTb+HFARIDs%itwt44T_5T7A_^kUa8CwfpI9gu5$-?t0XiY
ziuT{z=AiE)Txb7KESqy>tBx9-&uZ-#3^Mr}Z%dGP@a#)A?ZSZ0*?_#$hWwRL=YDla
zYi4uykEy+@m4-9xu7rguz9sYKFDH6O>K^EnP?n=j$kV-`&uJ+^ml$kOm2k}Xl@xp}
zi+)<<P7A6MR$_%EDJx@w{nBCVgI!M-Y+r`GmCtyM+fEh)bC404L{L@Gh{jUej6PvP
zB_@6zuoh!LKXBWuMrzeW@VLCJ5C8L)e1OZ5HQpGzuU{u^>QhD<30c76{>ZUx{nN!u
zFJk4ou|6%{v$*k9XSX`2P|r;SDXgSJW+zHFM_g?%b@Ym!wk(FLUdKn;aEZ&dEi>0~
z9H$PA2#J)L?TBP_3%>diz%vE&urF=fXU3qQqZcGPKCqW7GhDtWeLzO5VJ)?_4x>$^
z2*u*j(zG%~st|#OU+6BA6-(k&jUvfX#uVC;S%SawMyW2qc0T~0Xf)q_H!2Dx5DYd_
zkg9eG*7)Vab0UA`-o6$?-AcvhXIcr?w9HGl+KmAZ{TruBM5!Te?=2TZwlrGgpO`9v
zI!h9310QD_O|2#d^e<MvRmxCe&fyakfXQ>Fpkw8!FFHzBKCz{t7>LEw3vms`PMFcy
zB#cnfyghgiqLWC-yQiQa_^s9kRb{nXoM`FR>+VH9-(&&oDuklr8_r5HuNB2rE|;Ya
z_yvR!N8k3teNdTvAq%d0VzKFOD9*;yBH9_fH<>Bc!BkcKvRi(LY!A0s3K~g-f&pz4
zI;!QCrXb|zWsk3(O2}?Z5&x3~h-67v)jcK&BHgX>Anv=Pgp+oSKW((pX>nqkP*8A=
z(hN~DaXp=eFyy}KorkcQkomOH<-b@wz%jb-$deE5z3M149u`&ggp;7l8IsR*y<LmE
z69|djdm>Oq#82nE>MSnj`Rkldr`HK;QAe-o6d3v2R9t3LTF9+Akt|Og05Y0&O`h6E
z>;o04Tk0|3=)zi9Sp0m_8mM)q{ucGHkXq^^&*h`;T|GKwwZ{ctHi=%JCWI5OHC3SX
z22ODccT8ApQ*Zb|I}9fmHtMn{bs4!d1j!M~4#k+qSiYQTX|g|ROXtyhZtM~#^uRxi
zzY{of@Eu;{$!zhYmALeXKnb`?p?yPAMg<Xb$MOc}`~y`JC)PSvLvocAj;;Kn>N6S0
ze)vQb54|Gp_^OnPKjLidXps6~b)6>IeXxfzwEo4A@8wJ-O?mE5jeNNL5X}ik2BTTk
z9Z$5u_jRS|+boNSh3nCSx8kj7?r{>^V63A1@$$Sz+m=(Wcq|_(FR4cTjF6FqJkqR;
zN}az6a=QHQx#?xCl-{cNva=p2m9FG8%hy!;g~eLAaJ8vkKRbU-o_+ja0xr@eJ;ke|
zN{0T_&znN3rPy?O`}5j0?ayu#^Gm(Vh05UZ_K6$i2jUNVi6*RJp!|m0%u1BD*l|kz
z4~GONJ2}J;_-PeMj97a9r%h2h^;h`3+M0{}w$}W+wfm2__!CO3zWBx#c8ZRNlap;r
zBmoq#w?qf#uIxS<QD5ZJ)KzQE_hot4jy;kEoPD3A(dT3LV1q+pcG#!8%#cOF)EUJz
zV11t!eBg`LSTS{(qV#?T?ra46BevGRFKIf4dtAlr;w5v3w0BH7Q%avjQxFA2K#7UV
z5(C265+?O4GQE|H_)}Mtp^(6LFG`K<TD}o^%2?clS*38C^n8b{3Yi*h3su61#Aiob
z<6#q4i^>ZxpYY4Ndw%2~DB037D2-1k6rtpG&@cRNGfJ%9N3<{t)TVsS9(JDa+Jetx
z6{`&+l9B2nR=VF(Sj@CwHE@GXSn(oR0I9qZhG^qrWK<McpfCc<-W50;mxLsz0el<u
zdk>yC_iipnJh>|Zf*j_z*KPr&_UaL*Z>0Bcrr|3)ghh(bgZ77shPUo+>9s-U4{*tc
zw6WWkUB8qbkoN4NAXN<sHU#X<O~0+hbNb5P@k_En=DZv^@xQL)svP85A^b3hhD106
zS9g~MxOBfl;m^g-%%}u{q-ihs(JLFh-U6V~<fHFiehKK*QHQbAv~m2J(6lq^Yx~U3
z#gV*Oe+g??yR`@1vhPuCe|LwTFu7L@J}WZ@O9w5j_CIa4MU{Wj?F1h+|G7izi#1_&
zlSCyQxF}Qg`&_BN^k@Fn>ze-}V|bKoAf)L=XiUF)r5Cz%vTW!zBsa#z#?sU&<FmJV
zQ&GH@<lZ6p7TveFn3{iz9#~WD*;Oe1Q8aN-uqN2o9oQuB!3{@>a^KBjMv*)DbVhPB
zPBL(|_w<^`!=5vhg0h~3Yiwa{oyjURZua^`I{Z!;!SC#eil=B<2s5moGDvm?lqpZG
zBCC)-Hhb8>=G%|GRRC;hZFX>HWS->}{6J0wc9e9wkoI4D(yX@`;57-lyE@+yGT7zI
zk({5#2t)9%lEdBn0j>K{SD*mW#H6BU>6CByMTp0yvvmK3aY_INY6@%JdxLk=an4L0
z+rG>kG!<adD@Dta2a^&nu21sn(mgejWW%o8m~v|p4@UTFEnQ_&{7~T7QKsXQ7!v@z
zy>=F?@00k1Q2K;}Vo)((pA@=fw4I(Pp@+A33Z6Hb_5bXa8ftLhC#HISX@c;1n~KkA
zwWP4?@P<b8L9>~IjLh*L=4G|CIS^3s)rk3C9v=0{zcvr5bA_E=qkL8XypdCPqt^=G
z87d}&6cj6Mm001knT?vXV_o@RT*ZJ4f3%5Ze`Y7r$~f+e!Q60(KflneFgL33A)3^w
zfZOiFQ=DoU``LtvC+lFWG$qgg0+O}ZNNSH-4iij8z?~uS?=o8$Fcjt2*6F4%I_F29
zx(dzz$)1hfoTCu_FUC_)U#*Ldm7lX8zWaaC^j1-EbxqecPD3Cx&;)mP4Hn$pg9mpF
z4#C~s-66QUyL)hVclW>V_xZmAPM{fsy%(!i)m&BcdM7@DB%6k~TzO%&I$zP8gQ)#1
zg#0+Ve4IOgtuGV^y|F_Ny76ONa|!W&JfPR^D7IX=ABxUC-u_(kjESvj$G^=)L`K~V
zUKs3qTRUZUcrROE*|?=M6!%8bsVxmu*BcHu`IDdMDocCa4x?9EDLlGJI0>->PM}Re
zz6pBuvhQdjs9;!}(j-;2ikHs!eZgfnn$HTSaUsRiKcaz0R?)hMLG=c&mY%^kQ<e=4
zj_omIZEATfz*sqzI&T4QxS3h)xhuu<_PS>V*32@C(lVOWKp<J1<g6npyNg0&nlK2d
zU{2!ar{7vW^n@`$$*0HCQe=GRNvRF^R3HhhdX`GmBj&N(d~~V83)*bBX^c7~`MB=H
zY`*pCtATFgdN4XQ_+HgtItGW0&mB<p^=5L>R*;%Yl!D&7$+_ITsEjm}J?KF!TctNU
zfuQi_SN!ZBm5tl?zBTX1cx}%oeN}#U#*eGmH1iaQX}I0u;quL~>&&LB)ET5mWcL<V
zts$XJzt$A<2=${=l~o<a&dXSf(%&!~dT;Crn<C+q=N7r6PxCp}+&&~qqiI0_&jliX
zo%v0F1kF2q1g=yqFoRFaqSyAf&6fIIE0)X*1#7MSngPJdK2ub-HN`qV3_`9tBN`hC
zLO`zM%hPuxOqy7t8N|Gxxy}%uAdJ5veGvrb%i6<>7w)l_9|g$=30*{^&km^2M^rnf
zn?J;8h>vwvM~6Nw_^VcUCRcUa(&sZP(B-}gE!`2yMh+XH<?i`CZl@kZrw*0{>R6Ur
zndhlR#nN^1LDl;T2#m_h{;86CG=1J_($TZFh|u+Lk3+_X`pFy|X@m?PW$|$xOIi>g
zrd6o}HdtSkgJjVs6*TY<RKTJ3Y{Q2}3xJ~jd8~8)fa36GL|7q9ty*ywg0Yg5AR^T>
zJ<ZAM*;n5{=K7Y}hsSN7W{tbra{MxO?fDr|%)oa!s7#%UO4T$6YSXM;e`tXHC$yM0
zc9O_y;;^=ywpw=osCoNfpm^!|O=))iUw6qHr&Mp43a)HPQJTYIcoov^F(dC-8v~|x
z3%#UiS&qku1=8KuK6xF)=Qob*;+merxO3XltkcxP*XKR{v*PcCX66!E-kx?iNI)23
z-R4IkWLadZTQt`Zihd9ZeRy|{WUh!G46Gwa5r7o>&uCX|-}U7G3O&1^>Om{naIVSY
z?HEG-N5tqe_bll~+ryn-ZAUsyQ90xOwsMQ=CvIv}rU4Vz=C^@iC$$Y|DaJ&v>Xbab
zc{^=b>Fbt_koPQ6p*0OZkQ#(kPX$%pWa?d>^T4h5z2=>dPgK!sF4Rax=R4NST?xK(
z@{PAD1!lJV-ATyBEqi`&emOz<LI9q9;s7Co9oU1rr!0fdT;hw*p0P8}XZUjW69&gt
z<Q12N`R)a<6ix7g>vu}#;_3YzSCwOCVW)ktSXfHI-XfqS)>v58?T}{k5UFWm`}e<K
zNZ^)aC<_`rlF~o9u2Kkci&gf3l(TJae0iS5u|9tMf<j_reC)Mu1q?`uLbJp$TYg7B
z+$y$m8|f`~u2;OU9U%Htd~?(q3lRM<Qo$$elisFm8~>e=pC^ba;(9@Aw%iJX9nEld
zB&QHn$B7t5zk5A8(@4vOR*SuUyjHOnv4vz(l(D?sA|S$2D#g_+B9ipF3LYhM&(PbB
zzw7s^iP#`=<~efth1K>#x=!FFwGN+082?B5<@Ez@o7+jZIb4uKK}YY~X(FeEGWAfx
zJ;%^<P)&Reri-lx-<6z+YkhYI3c>d5y&s28Cf!%acp#K&L!V`pO2pyq+Ouu&@r6ZJ
z?FSYfHTbChI+t;G^-JUw1QR1cZQla*$m0?BsGX8OpL~SFngfx3gD#5Bixdfa_qBH=
za<%w79oK)=>hBCeBsg8rbRZq2JpN%k${TK1x?G*p97Vuw+>c^ZpWtqe>$DP_mGKqV
zYIUma;+?ksuBl2#E)0YoqY09Od!lM*-%(aN(?CI>*;j&#!xBubifhAbe||dp=~pY(
z_vD7_={a8Q8VA}yq4VOZg5U&#6)Y>8h2w0gUNvz4S#Z)eWb0SH5UB=Y(GGK3n7=T+
z-AR}PTWxK+eX+`h^$m1#pkaGKge#g;f1GJ!o#vkyuwyO$TzH2>LIBV#w+_HJWze7!
z#?>ANIwaulB;QAV_JaZ2-vs5OfBrWZfkH{*-YeqVi3MiwK)X_H%lg{;d5I;V4k4)o
zr6V_PS$cDWz`cUS#xRa>_TP`Bl+WGwlB(c=T?=m&!$YwVUinD7r~aceWFoyj(nwQA
z62tI_1R@?<;1@^(dv0A~m`DVaLKyxPm$-X;K1>@e$==5u71N+OVXj)Pba0oa%o0F-
zDAf>w0d&UN6#17Dx!BMbSTkN$jjBCo*8d7O<aSnw>U9~F;ItR^_j!W~wrYjE<uMgW
zTrP5UfrSfkPko9?{0~b(9Cd-x<cF%iqtx%MBP!mq!DH=h)RU7_0bjvEd@U(x3X)R8
zC`ULTSdZe22SE11D+*9YS00$~6lxFea>*(Ft<gzpn9W=kO)xqBL5#7OgT_o97;F4+
zSxbY@vHhJxEetMZw0>r+B^cXq7*oiRU6qQS+?*Z_C@D7Nk9}El15%=XEjSH<K!svy
ziE#wYFxex{)jKatpl8WDe4-)>iV&+a91IKyBet29vH5_`e-??|4b@)g{U;vDq89~-
z=#vT*jXjX=crjWOG@&a_wB{>gO~3$36lSaJS;W5w49AX$)qf|yPAE83L0KLr>de;L
zFa7D7RRQUv^b|_=n&lOAeTXtW`>!8t;`v8zp%0Z{o>YJ=UWe9DPmeCArUuf3(C>J&
z#b|VTPuNTDMnDusf?7zp->oWa)<kqKKE^c*1%RW3pk)5q&i4|md${vu1)E@Gc1`b8
z5~1Xbyc$(Vin@M5j<_=hw4Y*OxT29{Y!zngs}%RnNI>}`tGI$(a`8lt$9DVFiQ7Vf
z0FourTNQ*I<bYbn?I9pX7|NK_pn)zrzepG;pbQ{QQwnwC`p7)RBBl=D%9GfoHYU9&
zC1U!U*OYW67$9H;QL_Q_Gg?bZ+Ty<@<3LpfhymK`R6PFGuqg;_Az0wH2M}pf*e#*?
zV!9_Mg@gkpKeQJzloB{Gdm&i-=eBNy)a4bxZ?SHvKhBVMYFybEl}Ks9N#UB--Q*!w
z^ug7)_}7HbpV-+yerhy7@<&1H;1Wub)BqkfwEnoP2pgJG)JJ7`qNuy{wzYTRv!-GE
zn{HC<EHv@7qQnS(-c4J|?B_Q~_qjq)&9;PH>m!>#H0p>*930pNw~@%6>k8Zsw3~>D
zVsB&>@V!jc4m?gtuhcfhWS3&sQdy>nNJm`G6Be=F9GL1ArG&$reT0~61(J}=rJ!g`
zH9({r*)yu{U4ZG~_Lj(HTDiLW*kyZ#jX}h~q(>DZ*<I71jLIg8trjg*tI*t!Ao?PC
zcFrFP?vuQE_UYM1%EXUcyJwWY+37*oc7Nu+Xnuz?h2YW(S=z$E6-Xf#_$6@0%+$v%
zrog@+vtEOBN*S28nNkk{C;<vFjJVC3wLv5)<?)0@S%~_WmBj<$3WOv?i2<A*PfQSC
z{ngP?W&&#Kf9GZq6&D<a!xN&%6t=)53KEpor|`T~4hmo=aEe6$N++;Uq@Q4??i~^R
zQG{fg0aleBMpa2m$}&%bGmGY!xIhFaMrh$tM#SnCl^`!@aq2GEQf1w|4i8NW+9+(c
zq*gf-9e;G2(<K$Wty0X<^#)>f8${6y>R<n@pNEUKVb(EZR*&vsX17Mq<PtdRzJGl)
zf)*1$G85vm4&S5UVGL!*$)B<>&`Wjkj}X2r08vd*Zf;IFd^%Fv_x)LLwk_bD2ly0|
zWgTX&2EFH%D?jfngb9G`0CV6#A?QRa)dA2)#usVSuT_T-SUJ}mlA0NT0}q?hcr{AN
zQOV)XBJgbXVfiZA(bXqhztOrrPmckR{(D1CPF&E#LbWh?8i{du{L53HFX@-RZ-3%)
zIsE}_KAt)71-CuK6_`CyeT)ZCX-XHZ!bhaEOd;-LjKO7k26P|A7&7t=j`YdfFI#Se
zXG<*qt@}&onE*k}laLHyIBjZ~1t#SNFkiP*k;j$*pd)apL}zl*!nXbOv3{Q{Df$r^
zfyY?N)tMWxI{r_V9>Ol9;?c4t-+WB+^Uv6&BSv+tZC=U@iy5N?ETF5pR%!d-a=l`<
zs^1GIe57eLQE_;woLD4}d1ij|{m!W(4TkEG{k5@6^Aq^Pr{uOEsWjR;wvHKT?GO&(
zd&EPH(*~r1%^KRPCxt-Et34yAgbV4bsYmsp1P=?|56|5xQXBM{(nAl^kqDf9pkYMc
zv$DnuwbD&YTTPOdtDSB`quk7icTHYcZiFfkO!0YP%6neg&QSCo#fY>fE+nQe+byT7
z*O{Xn7h$BNg&nZQD6oD*KvqQ!f++Ox&;`N~x0q&$9)8(d`2j4`ViqDz<UZ`^3m2U}
zW$_WPD!dhxC8Y*HvXs8(EzUEA%2ttd_rr=CSwS#!H-hB&y1?jIH#=6!46Xv1{;3U|
z_OrXvm|3}61b$!U!{3?719?fs|1?lAFkxa3Lj8rc?FDf)aTAoTPj>@K>!k1?l~@AC
z*~lefN7FZk8DWM~BQ>05{+fCjrUgxyWMwSL141J7a}LS9Trab$55ITm9ZtOSZ^gQQ
z!WgnCCiKa(&DQ*ZPj}huZ>?}QxBA5;tNsBs8BgM@1fhbbQEK6+LJgxN{VG!+%fT2x
z7AukGbbh$^zpY2X-0xp2Yo70sO#1B6!+jZUPNTi#hyhiDv;jaL3z-Lv)v2-5;x=n?
z+jm;VIDj`F>PSFOF3!MCwyDE<g3lj%Y^jnZ+7u0pCpyGVp>e);2@64W+nYb)k~13E
zkOn&>!*D5efwSNQ2s~ZxfGX|-3ZBH_s@^@C&~%VCm#c7j!$2c8Rrs3d+<^E+*-%$8
zB8I8;;F#i)HMUb2G4Qb141BhWQ1kXO=~*3NIRCo!(&S!<AtcnVegtm5mFXUgYVma&
zS3QersyWJE|EAaJ!{Uv7F@prwtOZx25)G;!qG4-gyovZt*UiK~AqrV5`~FiDx8zc0
z^N~plm-;NrDTq;V&MS3OD#f8PS+6#bYKg;BR>dqx5H5Z*PL$YQ_M$5M?gNS>>3yhF
zLN%gBts#v)bQ8%9e^LJ(X1Jag9>o4bz>vl_VO9)k%VIv$$pF{HuDyiTOq<$$+EF<J
z<Rls|VXmw<pS4+yT*f7zHk+cfsm)yzUGbwJ)=mtKoVP&944;{AVGK68Gqmxi7T|Ul
z>5gEh^Yc~TS?69($7vP*Si26o!e8>dk{fdhTFYXj{|~FVCCU7Fp1u{oALnYA2;fJQ
z-2X0*hEhcUXGoTmfGOo$^#0<E4Yvq7V<|EJG}!jNov~df63D9N7+Qnz3?)5{xnL@6
zfrH&DT`&=9V`$%Aw^$T{Pr0yxIH$`%%cZ>vQYWg(+CT_v3W{=Pvx&(Cd%o&Zot!s;
zPR!?@cLod(S*xnwi+MF^B`)TvXjlm<5d?Q<ps1EoUY;;UHy0f@o;1K$K*RfH2_dJE
zqyYq}WSCbkcBG-*27+W+=4J>Gd8W1$2uZ|gxh4jOT&b4;{m%L>Alg1T0-}bWP?DqJ
z4+#AfM%DvByGR(*u=22}M6njC5NZ&Ydz3mt9xOX$^G%FWEcR60d+lOf(JNYBwE%Ip
zeqW>7E#v2v?sa^m_HFBDz)c*e12G|6B-pn@Ojv6z3h%ZFZpa}mriNrx?jlR+Zyxw6
zYSoLle8>(fXJ=>Wuy&krGz=}dQMOnf-3}C#!2lKTblbAgw1ZQrt}HI2t<}HCP{@|<
zp<P2EM-A1#L~;o5WrCuv=%(lwL8MD37!&oO<D3JZWPg&;sE!AuTmu&lZL)P{6GSuS
zlfN(4Nj)3~edhHz1-S{88o)^KJQ6E<&El-(1Bo$~uLaRU#S)T<knD{Q<6XUK+2D(C
zG-H)ezttAn4P#kpmQj%<<5MKsfc1N-aV`JOJK%M4(x|WmAvH#z6=6DKdu-$nU1)9+
zT`bev$y2wZ3;C;js;Tsi*J7&uAvwrvo)Y1kWz^Y7Yk&$*@zhX=7TXeVaU||9&VX1U
zMB(nMdQzTMkz?%tFY4eNYq)3<U+-v#-SQPybNO0|V{#@zW!2kIG*B22!#b&osjHN;
zh;IEX>W*WFy|L0Y|JjJce2bPq0Y!8&kydR*6;zXeD}cm=XxoYG^?_e)AK39Z#f#GQ
zxH!EDZ?tvU3H6NUu@&vh1MHEl^80+^TYAKM+c5QIb7+&SWA<^~0KZWE6Bb8|u0~3-
zKMdam!2ayVP~T1p)L}YqRMVeqQ3M29cY?kh=sh`tMJTv3$}A=yh`GTeDaa2tvEyZs
zApGe*gEU;s4p({2Zpq~bMyL_#c^Sv`&fF$32VcZx@sj8KkEQB!3ncJGz_WJHAwgR>
z=@h>VF~6LlH02KlZ}j!msvfL1l)<aB{LlW}LoWe^;T_7^v(~o>t7Xr+2HJjt95{v1
z@O*)@cL_3FiK6rwZnF8M!HcpDxy_Mxp2vK~I;Tf^N87F-DmtQot*y?>t2g<NRHETH
z33mJKvW>x_%tb~80!P#_#eu(HeQzj`hzJy_bD<Ybv8-T1xKT(1Xpo7%KSq(mDh5M}
zfRyznm}g3_D1;P^L}q-9n}*<Gnx_)uCIE^w_6&t;XbOcgOMh)qX?w!qfH1&qcLGIM
z8~#JlT{;{Aq&yQuA<M<Pib>Tap?~2T+{f)A>RNDFiM|6e)YawpEDMJ+-Z}v$MKxkn
zQ{OHJqc8%sgG{~dqLCg6_j@~RcZ;LFGGlw<$&_W!n$IGwR-AbC4jY-q9kxPk9j;Qr
z*Xx6lMOsULW+Km8Z>(#sdvF$XTu*eqe%x&iN20_^H2m(lNMEjfrQqSWKAdya0gVa|
z!1m_@+l|tUTCthP(pQ$emT|H0FScJ@y!^&&TnW=%)&)S{Vn%nbZc*&Eu5Dwn&x><Q
zGQTjm(Wkp~z?GnV&sRF>!^to{D^a9{{njgh<^2s;pde&!Lf#O9ixQ6vd<$=QY;YE!
zA?5r1Jx81I@oklFp<_SZ<AqhXu*&;4<jOH8tITJ4ay(RxDvjUr>ya9Zx1tf(`jA_!
zzpg9pTN6rOk_{HFKar1?4z%~DxBx2?#43C5g3Vdv%tLVnLZhu8vZc?~8HZ<MR@nev
zwZpT<SFwRYzrXg-(qAhK@;1l92wlSQqY8unUcWlN&cC#OFxc$4PTnlZy*5R$y%+0j
z_1Jnvc)#Cx3rj^07RkwV-oT)&JtO>yCKQlF!H45{?wGor`g~NoiZfrZ>GT^nBh>x)
z+?bRne5d3|6bOcW;Jx{N@w|#cQebp`Fb3>a``kHnJ=tc=&1ZVBNHlKfp-{3;>Wqbq
zAda=#*(uu`xZ0wY>$oAylJa41zZ-XXKFIEZ_t&2o<OSUrdYsE&Y&}zU{@Qh)yx_;y
zG7G34rK}zV-|e7cy@A(*KvFM2ma@b|;<V<h1p8RgvhDA!_@r%T8rhrbhx9U;W@kwX
z+Sh_&si2m;!zSQ@9%s#Ez+r8Ieb%V3_<9B;ml!bDG0DTm!$OmH$Mn*z246PwHfW+S
z#AgJS%v3W7$3S`n$xt@)_<`n7GXHzuySUqqZ`W33*BklGZhn1z)qejN|3PJ!2M6U>
zUh6q>G1OP@H`$A~HAdgK@^uebZltHd!;L9<f+sDb3{|_&U6*V5Y&XV0kD!M1N0P!V
z-0WBKERQ2XCPh$jBpSLks_|qbMJGS_o}P`ZLT~rM(#-sP>tAlMBKSBD>w6{Klg|G0
zqvaZP!FpYqID6mH`@@0f^@xK>bILOdgr?|0bkFb-23N{iI`H@&=Uakv_#|@zR>Q+1
zQ-0F}NhX8&y?YjJc`DB=NVXT1CC0#D`sP5LUjfyn=r-Q;PXrzpms_`=xJN#p&#PB;
zwt<@-@NYerLRc%=_VvD0er6_z1;$fcANSxkh<uvx9}hou*88)r_=G5$FCI3H#zZ*`
zWkiNfDtNB$qnUoc7^_xv#l%<f3_M=E!Emf!+c9(=WU@6m+r9LACEU2Z>GF>~Nb625
zO;Z!JeX?r5r0MQ_Z}LtEI*?=O>xe+Y+f9Y<>IO%n8iRXSWlffrpYi*~`{d@<TB6QY
zK!5$emw@PvPYs67>-YA9iqFWwq(q}2cP!3ls8_<zZ>NpglFF8g?Vx|g%q(_cl>Tvu
zC025-@+wEWNK_>Xbt96_-<a(zSh;jXFUoC>XLTQ!8d9S=-YO`=l<fY6pw`S7+FMjO
z@J=kp?IIziAPh>14{>=|MI0PZD3!43F|GK(kiv9ILG*w3K@$3>p*~epU-F9j?bU-B
zHc3DVg@1h#qsb%WOO*$$4vFMp+WFd}OV?<qFu(r$0_`!cm(Qj5E*O0|2H$)U`I%aj
zG>?RW2Mzg3@JyrSycsb=%-{LxUH6!TVn1(9SYSK=Hy9+p{fhi{s>U}MCA}%&uzG@x
z!*|^@gTp@~;c%#W)57csVp(%Z2)=k{OJ0BSs&rfT&NBofiQG|03-^K_y~4gQv_u9K
zVce!rPT;lrH<&F0Xp^DFz%4RKNVg?U3Q?fhKjHKf_c49j>Sj<&?QpG&y&st1{|10I
z#NO7I%^($Yr2SpbQwJe%e+;;nC?e0Pf5qC*7fIv)^#WAs?@pddeqTw$KW);H%!LSi
z<Am!ARm(EiZ^i#FHp^E0GQ+PmxL0LZd1cq(nvRNs5`&Yse|dtm>D+ZOv(c4~AuKMb
z2)Eq0(%0E|tU97fYw(Xv2vZre@MDSwe>hm)?k+HR*<pQZ+YMnbNN$S~r+I63(E%KY
znb>Eg!K>}JL1-x8hPYW*6HZ_-swpU`WeD4GhiLSLI@F%)CCdY+ce-(1IiyiFZP77S
zqVQW=s3|h@e)0%z!5uI_W$pqSI_okwfh;U(!@>C<cwQ=$m~M~R&Kn<p%<dD#OG+zz
zXiLGj$Puktj7c9tdp-G<Am7|fbK-%ws<_6Oe9){>+m$rd_j*RBKn3r|6ISXBRPwJ+
zf;7#xkY1lgjCF0-{Z&`v&&L_X?yGJ)IQOjG6%NmUg-M${rkoPv@0b^!xZLB4!;!>)
zy}z>h+<3mJpew2ih{FcGM6)=)h&t=ue?=RC6N8V=e*+J$Y4>yzdZp%e_OUgTYi8KV
z06eB#{wD-30+n*xVg6Ae{fwGmJ_E1tci1CMSnG=4cdqTSkdaqsV^@}4`ra;d4D1~P
z{n7u4!Iw{7Xw+n4;zY1IB-Z>)jUh|j$HL|69hr{8w0Z_piX*Ve?OEr^9ic*A5m}`2
zq&^syP3W)-D};e=?3b;v^qA`rQN0@O8Dii^EAb>_^aKkQ8GH|P6ZpS)-mmzt#`|=8
zu_9yqJe(thDx<Gsb$|}na+w6g6g+;s#6>1X8ojrnF>z<ZfLFdN-4R|pz|0XThhulT
z?M^Am+YeRS_1Yy_p4)zN_4)+~(%n|m{U7~|<48J|S+&&F8IVR?`Ajq#3&r|u@w?$J
z{3D|4d?PC8$jHb7d{JLn=z>EihEfMimX#zJeHVyV%)l=Z!V%OD7eiQQzy1{`jeBMV
zN2!l#Y9ZWKf3>iRbl4QUO8#UxYBRGn401gt)7(buNwmw==QNHWRG8(~Rq_C$VbhVw
zhwWl3cd?gflAm#&{4e5Jo_m&sZY;xx5LicUsTW@=DZ(c0JNG#~7iEq~GXJl!FNqke
zKPOWa@|6cxp|vmFLbVWu1EKth3**YoGzUf-6Q!L6_c^nFU+Vn=IRs&TGU$ra5`C|c
z1^(z&9fP(f5LsqC!2O;@eRUN5mBd)&aZt|r%u1(!ffLVxlGQoQstASHs`MMFl0<5r
z;XvAeO<F!&_>XkCg1eNbwvbdRAhK~RPTtw==mKs8!c1%<HnYVNe#A_QMDirlJ8udP
zbyW02JgtDT7A6t6;)#e_YCkneTV2c{Jx#SnUI|Nza=M)~_%X+-8Rfg)E6|H2BMuc?
zof!DVfk}d18i|VBZQqQs>vJ*M;XI4sC73Al&PV9|5LKmua!1`n0@g09Y?gx9+{TJ5
zm8tPyXfJohZi6{<rYz-K`0Nw)=z*kfntl5Wjx|*M3_G+BD*7SSg&|2^wNBqjf9xPu
zZK@!g+<AWPHLt`mY7lJqbjP{o3O9#A1aB2Ej<?E<kyh1Kl(ENbfaHJ0!Wg)GQ6eu+
zj7JU{6d}!azfyi%{*nz(7_xPa-*n}>-Ze?DwiZ<NR<~d>WR&DSxQK9V*%{vOycxLf
zL1<dgOz@y@hE>a@^(BC(vD*!Re13Il8@vJebaR*s6i`N2jYoro6q&bCVn+8T{KOqx
zyrGzkoZ=z0qZ66BCL8n#{gMWlf<~f<04fnB&?B0WL|&<s#2guPqU7hpUc9Y97U1mC
z+jhm^|443rEYqLWGa7ADyI@7hsPTNBaX&HtO8AC1Y6*!Bv}g9R|FWp_ISF1(L&(2{
zekE9FCj$Mi>0yQxjX0!I*|_>ld7O+(c`C>gv`1VwpyxI9>ja3qOb_4O55w}!Kjam(
z98gS4uZZVm?D=0%h1`(t@QkkR#>zFj(vu#($6Huo=~H-062l0s^e(lG1LgVO9_wAK
zi^^V=fI$nB$Pvi*wI;iT+SP~bejbf%&gS^Uh2k?Ex`0sTSSPs{7WP>YBBQs|N_~iw
zk4d=~$5bpJOpF2|8bR-)0JWdN%+_*7L0sCOUW<5D`fnr##kIu71mwL$z<}HX74IZp
zUwttL$pWhA&Z=ELx@ht96a->Hie3J&SbnG}c^}R(g6pUiEEA4p4En3N^B?hQ7NXEH
z^P6FxvQ`{e^`S@V4T+e9ZSfw?0YwY*SddE3cbS~s3Y)0l+#@9D;3sJC6&v>=x9rtm
z8m#jN!ACi~EY+3kbftr{1qHYJ*)p0Pw$X(ne!i8l+;{#Q1)s{uWFKHb`U=8Le)+O1
z_3A@=r4d9;luP4<A_#+GB7RU^m`G?zLIG0fuWQ!7f%M?+r9BFC_H0-}+~oXtUvMm?
z6E2c-xD`ai4t|;-Q~44%0y>wiF009OlAS){APwNz<r7ol4pcK@Tfxes;U75Hq<jAv
zO}}pzk~abd8wpNn3RW^7Ng8adl_cn!R<&#7f$)FlJ&E`9<o|x+JljBMT8C=~;02?!
zL-O5?Ra+GD-u<ICvkh)Khx`g{SpCS-Mh~bCiUg{BAyb9;Php_^YeEE1mg6L=f$OND
zb<*A<CXQ8MO<<(_=?fQ9-*}lCr9jKAfPXHcnIG7%H6LnbL4qV?X4b9MftC-_H;N8*
ztT*FJ5CX)NfI9g-zpfPeu`h6{zRg_A7gVU(f-aM*o_x6%BbMrkR_K1&GKo9!Y?5Z$
z5GPuYtWR5=M$!Abym$=s-&#DsX@)EDSKZ`~d7X4MM-9DJpHYV8b67nEF~_b`)}rD*
zF#X^wU#<^E56;@V@(so&#D%cNI^!Kj=YN7i*?51&$~Qk<lQ0za!^~=pzMk=_pD`MO
zd!FIVKFDqR2qDU@frHOyh9W`F-Q@xhA=-m(u<Z?nk+Qgl{Wo7gQ~VFI&X=AXnU;-)
z^+ndA9LSQKSN%=c7e9gofPuupkNm@PD4wBKCe?eWKmNI-Pz@V8_8)CYiUr95Mbn7*
zClxmJ!AeEhT1hkIfLWpiMgGVm8$n29YD0umBdP$Dq|#Ehh<T*qr?ip5b8o^Y2%ow1
z*9_J)amuk5i5ACz%%L5xbr}VY!RUTU?KyW?qF0;|81YqWkdYxrf`Lv*e{I@+297qc
ziV+ah90Q0v{KsLp^r#Uv>q(<FzrF}A=co+1vcAtVb4MH&0S$YX1oV&(2END7D2`P7
z_*Tc5_DxzUqGi4HV2&6EF|9DMJBCquUx^ZQKF$RM(phJonwx2ifi-DXZbw9aeGf+;
zPyRPV-OupHobX!(o}SZMOGyNM^&AB(N)EnWfvGW`kM7NzJ8198N?IFJa4~j7RZ!@O
zk38|K{b}3<i_;T;B%<F9&S$r4yx(c<m-WFzajbnv5A8DQ!~QcyV(!=%{C*3>`ovSe
zH}z``p`AG+4<#x{WsC?zLfJ%ce(eSi6Pb|zRo903zq@F~@&0iOBnm`Ki4RmetUaW_
zIi!#Pq8SJ&1P|yPx`;P8EWF8tj769&w>U={-^*-%Mao#;>3<KNn1ghf@cd<;3mBUN
zShPHoH`=Hae1<L2V{25E`ZS`G4hE^YH7f^XMB7KVR}F33{+IrnugJ~2k<-hNm{vii
zS%LiT(mBYT9*k!#Ag|NDt4R)FLmYNz|2QIYCMdR^U)etQmikkxTR~s8y+2Sm?eR98
zz%>zJ-;Yhi&)xW|!YU}*9rlS{UwS0r`ftuPxv+!dXD+1GQjgPSc|wrhx7_R=Z~4PU
zahP83hHMura*x6M!$%IUTrXwWdiS>PH0Np^VfopwT5w%Y*6ovGz2-Vp72gFpnQah>
z6{K?adY@nrh_6%-tp~sDd6?o<5QkAbVnT{6<SuVWLobiIYl;^q3fbA;!Aw0|=3BRC
zN;~UFEsic@0Ka{?)1DL^O|&v4nUA<cz?nrTs5MT7rZ0}Diy{{|OSJ$S#_%8!FU;h8
zi}ht<x0hR%RWDSS|J#FORuP(~H|tInKsF1h&mavn3hAw`moKok$e|iV4|Hp7((U_M
z^^QT%`P$|1wv}J+XtT97_ej(MTI@XLxQdUguUZ*)vi<ZV_&Bg-b$uaMEnn`rP9`8v
zKR%qdeE}RF$n`SG6A#CYEw>+YBLe!M2(I#9KLU(O17~v4@!Yo+Rh@KukzG78!YezW
z1Be0kj+M)kk^HZwJg$dBT%Kpt;J-lL*~+Jw@B^)?^;{TaZ%C#j%X3#De;p71fpYmg
z+;pT}m-&PLHXM)tjjik9`0$wP21)l>_j1pP|5FlNg|7Rgl6_ef^>K;<cStINewkA8
zLcaogXIa_jhU3B`fk6c-RJlN*+`^28<cg9VJ$=128Ri-mC3!=lFy6Ss!ea&+KGf^3
zezC;7z(GEFV)9U<6+T_`0ct&w^6`i&T_rP3^1tuj(_GN;SrxR&RMNY@{ZT?kGZH<j
z2U0Qmg19L7&6MS-<%d#<oKH#<HSJh>5l2}&XwzJ+Qa*CU6hji6t;r(E!k~JI<Za3^
z@xSf3DWL~C_h7(q`So&Ia-46^^WXm9xta2o_#A<DtXv<c!6YzgJ~X#^X_=b+w-A^R
z*3sk{0^fD5C`a&tV9EY#vL>0pxc(Bu#`Ezq-sS>4;uMhUWIU^%%O;NlEzftwf#-Ub
zZZs(8u+su>e!Mcc-}HgW(6;R<!cByDTfXue)!p_T$p7*{$@M(CYt?!5?Z(g_?`fOA
z+W2y+dN+&%3`^}K^v=BW+UIavtnvzNNUz!&di*4hejCfaCgi)O0w)&UkHJ;Dqv@Z#
z92?iG=nfe`JJ0wfgubJ++K?XZw3XLicmL9{O(E>Sa}}&%)ll53f~Y|=NUu{tF9pWO
zzR9(4Kx2c#5o}O>pbu5V)1K+&f#WAYN;0m~;IN2Y$K@8|Y^ky3peBo_sfau>G6p!$
zBLtH}d85i`9q=<n1hw|9Eu{sUF)XW8RNt77BF4bS&*;X|Q|_-UGJBw-qV6V=?i)=+
zRNr}1Ptm=?aG&R6Uva08tXg7#?I*AHA4qL%anFco4bs8Fw(Tp_HWcFa-_u3&BmuFg
zt{1SJPAfVmM+mMa{CAIrwk9>mfz9B_i~8KC5R!BPQA^1VgJMV_HcfyG`_OPWJn9IH
zey4!V?~q(f9WwVPs$6(O@o3T&9iqiW`GiL5S`uaV3|_}KP87Zu!?Jv)pn%Id%dv0H
z+P@)@j~#!VrxW<1Nm}VA6-J*g#=pwjQA5gbwy7${P5!eE1iMFZ{^At1A)1QP8@hwP
zGu0;}qy-=gDgh}(GIR#bgTp|#g3<810A#5@PaC~Izy^^-i4VY8aYRW9#$m<+z-|h9
zqlyoG7=2#IUZ~e#J#n2rD4tyei{h?i9{xlrK@DS|43+5qj8vdLisD|Glue1Im9c$;
z`%^;S(gMpr+iYuv!~4IYHeFMSnxX63vs}{~^_`b7b^^)%+Ue48{^eKs@rE$yo5HN&
zf5q1PlGx?-yAf-jRm&5*s_oK5>qK&X!A~-$OV{uCWsD)B<uMp#O;;yPo^&>8`ui{I
zXKU{8w5^Rb_7m)HBBY8~det-YS}vO+r8R+bLl3NcEAob`I2$(~Q`I)77;J1+0>O}{
zjThwW^?oYCUjTiTWt$U?v>S+6vytc8uSJ_V*84?EeJ-W(UC0vE1LlD&HIO40`e91B
zq41_VAzBs5XXPY7S8x(9A|Xg7N{!;V9w5N>+>y@yTX+aA?^x%b`efGYoBj?}G!wfO
zXkQl}SJ(0Erva$|4?k=GJEb=aK%X}))ai<wPLxkPc7n@dG(u}1RXPf`pjkm3Agr7M
z`>Xrl%2y}{p&=)?q+!P|K6W%)B34BPTg3*?A>*eEOn}~A!~<e@)KD5Wsc2Xh?Dt=U
zyDcno_~vYYo|8z-xusSxC$jZOhG?sG^)tJ9QY?VU@0a_gl=KkyA9=q2LS=>fhQwjo
z=lCH0qJg7`5!)(*C;oAfvm7CR_tKR)yRIW^rlm`@*H~f=58=#AL!jlxldgb7eUzEG
zZr^v?&_hW>^<3u~!znE@t%lG<OP4sA@A-WY{(?AtjnX!SHEyuHL)nH_N1)?{`L(xj
z@6lFzwdv_4-zXt&)n!Yz`f?b^GB9}H{hV;%7L80PQrqQK{8+ugm_!RAVNfta&=OtT
z{a@!XMVRRTC)yO~@{%n>E3N1@gQ|7|&a}6!)hyP56R)liE6*DCiYn!dkGIR~r>>mF
zw!f603LL`l@mfA6Oj$d(y+s6ivDlTTWUpF%!S`kD&I85H8@HFecpk^AU@{&;LnL$l
ziOR`eXUq236UT*$GE29h!M|Z|oys*?2&xC)=E;g&nfcNSqY}_sBnV~}G>TY^{Y&QD
zO;{AIAgGHZ{winl1=<%^j=s9e(NQC@@8rFWk#DDO`q+A*9u10x-06|Bg_#Gd^<8@7
zsl&2CF(%`0!m+J7e7p?p?5=_-cw6tEsAUMc$l32a9uE!zIA(eV+nnJ5AbOx(Kuiec
z{RnonuR(pR7_H<IYxm6NGiwMN1vLWv!|iA^KO~xrzxd^*M`q{IX$s7AO2pjcJl|tY
z7J=6ztu93_nWiN-PWscnmTjT?`wJ|y4}dskmNG0X5b;prIoDL-ZI*koR*A11E5ra=
zd_>s8CP^-V5{wEHk^HGTTmvVc?eiKXq$vmxP6zc_@^V+M%BiTguPxUmM7v(^>k%VK
zY3%MO7kdja;nY;>^J=lvc~oG_Bg7#P5`&Zw6VO=UGPx&t-~d9h#DMF*n1v0&KdieE
z4F}V-6_p%lImK?e;m4IC-yzA^UB&8-9tX`%8#HpwRK4QBk`g2YHHK!e8G4+J)$R@M
zW;h7VpCXK{WRSh_tTXijzls9!%hQOMN;%uV*z->vmI<&TT<3MF(<V<6co(O7OmJCu
z9=B)^At;4XA-rJIl2Iw^KB`F`BSe(=_*v(&v2Cy7wELMsy=t?(Dvc%~K?dswLXh-P
zKzsRu1uPs52kex=U|_h;5b|^a5Zl>M1yQGyNdk(PpUet^rlBAU0zeV~Y7P#LmX5QF
z@r>J7Lofh$qs?7fLMY_0uWs*Ray*q4?AQFP3ha&knbpbYgN<1C`g9zD<p5Ec-d`)E
zt_sjkN%b6(N$a^l|2kM}1@x0tmV0=WCZ?*u8Tkm#4mV$Rot@a70a9@!fc>yrvx1MG
zp%^)?4n_jHdv$l+v%<|U)Mp~iu~LLHT#Du@jC!|F>e|Kw+D5Z9F=;62%@TB`W8kS=
zIgz=%Av*|&SW*Cmy9a;oHamfHY}O(~=8v$eURf+;0Amtf-K%3-Byv4i0wTnK(9=Jz
zo)|>Sry|yXY9}K$h1HyLBUUnY_$-3yg|vbS+|KGnUt!KAHP?~uI_V-|RBHcBeXna<
zf0D&rD<=P}dJ-olXrN%x6v5Tq=xw?|wHLc=dW}1NloGf@@VWpR5oylgvu{!q01c+t
zaxcY_cq6^_3O383(SinRd#Q3grck@9h0pjt^u*4t$?+(C@x$~qwgre!qvJ`32!=zL
z;q}znmE2UKGbD`Pv$*cr0(^&A<yu_<V+MQ8&?UGXjB=&$bG0>R)%<c&@G$=(my=Hv
z-St<!CRb+Azv?$#&sCQLR5?ezg0s$bs}7A`*vW^fm<r;2J$`hxi^2{<B6?`xc9u^#
z*>@<u-n1zH=THu&j4Mbf{`{FpdM00}RNv4rv%DN?z1qKPCnXg`$#i;gQB+p;Wzngm
ztZWQbj-8!-7W{K#4KNECh<_loY=;MiiuyrLWs@y8$|@?Nii(Q%(UjR{f}v8HUbpF+
z)8KhkFshI8KLg1MhAPJYV4{cF-h67>cCcWp$qRN`7x45!OX|hNh2JXuSYB%@&fJ=Y
zwF}!Sa-c0ah|UCWv8O5V=5uw$RozQz1X66Ln>)slg%+L3IRR;i&@n7biYn6II&G<u
z$VlrMgHN!KuFv4-koenEB;hNR+SZo^8*QG4R~fwT=EVv=j`fty=S8OGEB}TC;iI=Q
z96s|u<pgILuVCLR<DM~yx)pe-5|muChDH-<OTh4FFy6g)LAPIZ2X&9_xC}zy5=3--
zh4&G*H&-rsGh*5_&wa=$zC(-mf`}h}y%}4%<&Nw*{@{DE)vTHmri1P+csUUYL+E#2
zQ@$(wR3}m=-y#`A1yt+&SpoWh`H9`pi+c51)LpA9Am@q_NC+kUqH%j=1?#LT^z#S3
zIjSCYS5y$A3Yye5+_&UsCYkt?8y2hWaB3-%)!x&5l-{km0)f`+vRjR|Kdh?50+LuR
zzp+`Tvc?PEAFt4#Y=n*Uj53t1#OzjIJ2sK=ccg8X;9}JSdgaN!s0kILrxg1y#`c<X
zytcVmi_b37L6>Rw`N&!m7dQg2*AtSHQAU0fTjyqnld#g+?k{zDc%~H5#~HYzYn`#T
zi^MgD*=E0zzG>@pD%qA5)23NRQ0lv(vvII1?Kv{lwd0{l>gq)G{@g<BLdFy-EeEQ*
z0nK5Dfr?Tl?LS@;G1&0-_aC6V>{SBo9pE`x6*#H>U=U*a)eMiWh`TYg%;|Scyr>p-
z9FA5nre2Y}14db7!hh%dj!}9DKexg16~F~P^-kC8%`sJMys5cK9MVXB`NK0caQntQ
zD+rs*4{B!UMeBs>zt@AO5zt;Vd@hAU<6T!tz))6IZTvM=?6+q0PT>H{E5H-uyH&2B
zbx(<@RmG{$<CcI<#o&in*ddevvwVsjmpV@qO8E<>b9M^l(;P0>+DiBLA$I`=(MOxb
zP78z5=*G1mxG@Imm@`BHYmOzD;2VoVy*c!Pk2QwLcy;&Jomm{HlDKA@(kVGld9vCM
zS=yFzqDx;P)D<|4Ii&DzaIJToB{B_<<xeXW1jN*AmCJgZC2X2ClsHh+he5>UC|<gI
z+H+B~ndFu=j?_o3Yj(-*G-Zp(u)mFrFYV?`<qKv)>P6L!swk)`7#U=%D0ld>$wZ8`
zh&+dQ0MRl;MUDzppwgVxPhXvbNyWp6AH;MH2^F>b5++7%_lslKIQhV#7{cv(022%=
z{swW``N|MaDG!&^(UBQU=#>)A&`MQz>HTr&edAB~=Hm%wj7;1A`0?XEZn^8t2<7v9
zl^3i4ODY-Y=0^b-w8?6>(?9;PsN+umA9fuOC<HcyI6AtbUxNewuF`Ia1DnFOZC-AE
zemrvRCn$4n`9M=KFc9?qIvQ~L^&jZ?Kfl?p+($%FoI3#LP0vUDL}ska%uEk3*Q8vx
zi|<y;(2(>$T<G1=jEahiN9heN4i0!S?mvU?RZshdj^>lnvgX~WABZ;m4@-tE;K%t}
zRki8cFnb09u%J8g!z6_R{T?^8<Q?)0{)#ZYZ?Vu>x{fjz@@Oyipb%u3w(V{?I>ym7
z#(JVdT|x-PL7e?Y`cYmj<7<r&@cT7W3*eh9Gy8P*Wyy)uT(&Zpn~F78Sujx}(pddR
z?3U<O3u>Gs`p}3i;;FM^oV^m~2ux*MGZL8*AfVlUyc4N!nCfny{dcr{upd=>$0s{V
zV1D<oMn-AHESm&b<>lYeh4f_jpTo+%#SM?I<C3(4ppMX?+t}EJuhkuwe9tyAn>QVh
zja-4@$KkZUc+6oC;2kJ)SQ7Kx&$$b%^~Fd6!or|)6}BKr$XMQY_9S*jYJorB8xabx
z;9^j)k|gU3U|1ETgZhMTrLY;Yi{3ro)H!I6i6TOw&XkfXCo3V11);zs9FYBQ`EWX(
z{VysGtr)Pg!yOaCJ-|`0_lAZ(Z-6g;ZL9@=0FM}bl(Z8*1V*R?pTr3hN(M4hs#*@w
zAoKi=bE2M-Lvbh2&Fx-gEsHF1fqO6to2!6^x)6*ZG-3e{coU<A=70*c1!oll8d<2{
zUKW_f`0W0&XoXzh4xaj*Tg)?oPKVD!x&?}TkNtrn`J$b{zWEA;%w{*6p7MpfB=bI8
z$!|Q;(BrauTQhvM*YQn%R>?6Z7{n=%zNYd3vncI&`R{(}4dIxTXjfYmP}+5@0S#Bv
zFS0=1X-xqQ+2zIu^)oVQoYmX1D>xkMzmVEW13^U3X?BCR27t%CIVu5#4;%)3DCr{8
zWZxT6nprM>`xpH}k0}(`EPKUK5q<cDMGC4=T704;3^>UxIKQ^~p-6oVEA=Om$Y@^4
zqk84#-EL^A%F<D)4Mg2qA&PiCED39m<9K<<+VA18tWXIr-nbMg2yaW($1J>8crWZm
zi3kHlQ2S+pamU)sTE!3}SegYLqt3uyHfB0nacoK?Xi^Z=)arAhO{v;;DH7cGTE)Kt
z+uQ-}baPP8AFrk34B7tfci4GQ<Z%fsv^+-MFr|xza@-%nKRZ3vn2^BmGKoT^=E||X
zk@!#%kMORkL?|OL;BgJiCV_|`<mVFy+<AXKdV`a?;Q!c=8GlYn{@ZfeZu^PtA1|kL
z+RZUw=7-S0EchsgM&aqs;BtUyzMGX!WzrWCOWk%O=z5v2;yg$(e0rYpo;<DUngoN5
zens)zNEFFuPlBmz|LsK1)AG6{hbi7KS>jS7Ua#lk;9LRh#m^n2f3+P<Xn$JK^xxMF
zm{Afp#@OYd{%G|6a^HTxpe2*R8T%hd4IDhu)#=!ueEW#r{Y+?GKES9qhYh#G8oeH<
zD882?`JW$tuwOVY+izWu3zIpYw*2{jYGh;RzMrT)&n9;!I7HKp?SNgY_fE>HO#XwF
zG}=qoh36z>bAG`_YW^@d3_M(%sj)+cm*GgUpT-QXf`~;sE>zg7(fQ_=H7KU~c1aJC
z=MOtLxepLBf=T^h5=F+;F(t|M|9SzC(Dk2kS&@W&hFSd(z=5LX<J2_1^$Kkv+BSe$
zuh~~|P>y2cTQ89jrv7TIpHMaO6p!?7+huQa&G3Z{|E(jRz8aH0zV`bztw7$)5j%pn
zF_z!X4p7O@{D69jsajg9+bOm(JXy-P$R6!jUS{@Ac8yDf2Sk8oNXsixD|rfd(HW8Z
zXB89;^x|=#Z4S$%K9iZ&v$niyJZj0x-ot<jm9v$Ww>CGqXS6AB*vS$(u`HpXp{UiO
zFPUVjG>!#e=#Op)m<x+aNRr?T0m>I&tuf((j12+05{!)vzXiaYYW_r`>HwutNqu<4
znBNlmA-xvJzJkeCrm43K#-TJ1)m;K)IMmfH45?&Co`}ghIS?b|OD~!Ao6-yaZ1M_(
zmt+*b)iEPMD;I9w5QC8=%*1$90_FI-FEuvO0Ht5`LHJp;WqKlg!B{5HvpV8t{*rDp
zUO*NASDz)V5P;okNq`ftsGtC52)+GNG9oESt~5;ZZzV%JJGF=;h(-#-xt9op5X+WH
zr2O6}F2&Cl9fKx}gct1Qz9p3$r4pQdXq4hm@C%z5rbZoWwGkDen}zsj%46)dd(7Ua
z`+y}S?I1nLw+aILWVcbKFPe8_QA;>JySufA?Fx381Ymgn;wPi_92yHtBjIO5Y`mb}
z3Z^W?-`q%{{mK;Sh5llO5u<{pCW!=|>k+}2t;8T<dKgY(9=-c~VcU-}-YP1nBElhq
z4@p)k;9+VSb!Sn?l9)nqOOP~RaEHwkoTSke!vS<AiqLrL{meR~B^A{h$o%U4P2xV`
zBvY-eGCV=2iboKjA`ZWS?y$`APUO7ml0B|xz2saK9Ba!E8Rx~~T-=d~eESimtStNt
z#(BtI_pu_3^#THcN+u_<o${TvBUcr@h5glZ-hAl&2jVaKogQU&Q<z>~nE$ev6z3|S
z-EDWO^unN?+#6h^r;3kk;1O$3UauJ)aJ_!JMl5+-gNGjNO+4Wj{BmStrnh@4%_Mdy
z*4~*VAvyAOgm|ve5gWgPUPLU4taS)$+zeElAKzGbnJ*;;0Mmk;Ss7MLj1D2N_5KjU
z1kek*G)XHiVJ#iW3$WzA9htc;oEB}n&y5mMCQ<N9Nsa5hTn2lOcVz?7sG)^9kDonr
zlsIiJab%<<9UNFoRI1``910<x@2k8gz-$n>J;Yt!DZ|MOE<EQY{kJ_ze*NA61U#pI
ziU|n`Os11~-{{o4!OTaE>a|2#H5TwU{)<as%5EIO&jdbkWo2~Bh4Sqf;jj9m3Dn?U
zQP9<$0_Xo+SKZLhV9a<w*luU+xHy~d=sQ`wq{(<n57%Lau$>)q)b4Mv^S(Ve+Sm_Z
zs4*Fjg22Z3%a_dy3#xfDW?hs0w>+9!T3p~XCxy@RPmcoy8p6)C>&Cg3FiGrihWFoB
z7Z;bq`)_v8zE=W%9R~*{VB^Y~@(qgB_4UZp)`X;_udWwh(*PP;T6he+#`<~%9i5HK
z$A^ap!6`8RBm!?g64=>x1AY#uZss98*0bMMX}^4Ac#=2Bj-qx+U_hA3=p~V8p^i3*
z=S3<6S?N%D?}dj#0yMBFZ?Oxd%y$7l6bn@;JobOzqS?^X0I+icwow$8p?bOvL;=3U
zNTt39c_f>E$f^gKYK8C5L{h*vtbxJu4i4xStc=qHk-*!v@~3OhYxwi+@s6gz0STAn
zzROF^@xSYRc&^i!SiFCXCY_5L5`VgE$XblsfTGv^W8E*$AI*DFCSq(S+1KucS4y_$
ziUs6pO<p(>Gpmo0u*ZNZ{oz=ObG`N;vi=2}5Nj<ofeL@rbECBoB_8>EjpWb_??Ok#
zS%Fdku~>2-YNhnuy^DEha@<-i>|t!Ngx5RFC`IP@i8|`JlRK(z6B0IcUiX)OgoV}I
z>E?LeECZtK7=>=%^vuU*?#7xT(G^}sWR(R&dMn&@Wrb1#n4w`qx~ZR{uzZMR*lUu;
zGUB;RCS0EW9Fc$x41~GWHh}6+io7ge*`MXI#2sF5M$wE6SdC*TXdWslvyO6vorn_g
zUMeIWlRaNs&pB;68j4axEPpr~<JO>(3IKXznR4UoGX`Otmr#<~Gu}9ojExTx%>C&L
zVXP&?ktfiW=Mo<|>!|$j*_nB7g18YhfQ<sgKr!xdRomtY55oWfJzVzLn3Tb341`Kr
zNiw{EZ36(NOSSr|+f|<_HrFP7UvD*1dHHWXzYpR*Zl!ZNbDfA8BWC?3goFY9Y@MA|
z=sp;IKmwYPUuZrHSB!_@Vee^H2HBCD3)Y`SPG=DW+670VUmWNQ3h{)*FiKKNl}0;%
zNpRR*UVDDDcaQ$`E%T5MlQ}G(U3IC>(eN=3L^ZkC6HcK}CG(T`a*gaOv_&N<B^7Yz
zk^8|UpNt;gCC^*S3?oDEYxD$}S0>X8tx@D-5GR6^-kL!c>>7TZiw(P1k#Zw(Xg^LR
z`n!&2j|>roa^AliQK<JBkyidsXTVQto<(weR3h|mVv-4WjBD501(c|$=xqfs)a0Vu
z0ONnthH1xJBUIGpx($eR1^tXuc1(uMXX;2}jfy28-mcDMS#kU`cOMwUn1`QbM^-k2
zmE|O=ic#hoonAvuOmyKY4y>VI-clIiAJwX$-&k#V88OEC{RYD-XjNsnb@ia8{JyS<
zSisM07Mx*cN^A25`fvXqdvD=YRrq#`B7(G}gmkAMCEdMgB&DSVq@)|^mhMiaySr0B
zK)R*7`@H*i?iu4d-*?Bj|G;$&hGTDFvG#h`dScG`KJ%4`xyAFb$F{wv<wXhc10sgZ
zBoA^+$GNQQu9;$=IjuVpPC*FfYf3h~wYCWbx6CrdnX^FxeM-C0@wxk(jo|qBH50~i
zKO;6sTuq_0dN5~q9j1yY{iX*x6*UZj%U6WZT8`8w-b_bDA=#Z4hoX{V8ls5Fsox!;
zbNYqbIn_L?miqZrQF7Mq6tI*q-S1za^OWCRaNXXe`{bAhPsC#`YbI~reD98?tH_0s
z>y#oS`usy_`BF+tCTYD$-kBoAWWl50sFsgWbTv;U?oALqbCsP=c@}G+@DHSsckbUU
zn?>k4ZMo(6NeUYqdA3{c7P;RD0h@9ihf&Uli$bBThx75mU%#B!D%>u=z$ozUQUPlv
zE*LP&J6X_^7g&U>15L-+#GUcf^mH#go{q4iBd5dm7?{P6JnQISABOYhjq;~YLk04w
z(loJFiw!@(kXR7;TtOTKc2A5hn?I!NMUBw{trvh3A`|XL*7w)zP~UMFaFEgD)fP~B
zg7F+IEVNi5wx9_;ALBcQ`HvZt-A3Q(xc5_R5Q!TiIo$n0H99ddu{6Ca2-3h(iV`bC
zNnJe<!|(Qd+iVE<P!(c|LJd)jam|WVLl?d_qmjeOCNK>aJJ8{*e+OQ1h@JKAkJd1e
zLQgLK7~niBS2!8+p;LyXi0t4d%;ScJApRH+SM}}G$#Q4lno#<$`k^qx7~Ea#B9i~D
zvA|XoxWo}fY<qq@ru5IMs$$EPyV=Q&x|yGClLW=@sS9wt26d=Tq}1;~G{T$X=O-I~
z3;?Scq|hYl*du|7ZSYxzg+cpe&9BMfxRzZgMG=89FNP@8&+~Lbiq!6QE0NNFAdRxr
zV>rkVKiLdS1V}KM`2D*hR#8yK#DtpNbOitN@b7~d<IjJt{=u+5bi95^v>9YJn2c!h
zSen=4a8HYa7f4Dv^Fb~YhS4~i$afY^4|efLn6w(rUnW#<O?G7kGaBL;3vr5pIIZV3
zc?3w)5JgeV%}8!1yrY_5cpq|i&Wg7KH;q<>qWXE^kK@iPj*`+uR`ifWaTm?4VK<qI
z4s1b4PyIi_9d;bX;craC+1l}8u&g`e$#FW*xe-hr|HM9U)m{o|P{@ocGMjZ*k5`^9
zUGL1b1RN#crio*XvpMXu1gSr_<%yMFN%H@_Lo#ra&#e*+Ky7uBeBcZz*MtZ$4`a!#
zcvDHqs~rfQ8<l)Un7=&`qJE7)(h^93;F9+3=7^gfVfXUWe6WF9cAib|j+<We=ESqH
zkf|7APA{w60+q3rDz1dI6BUo!JmS^OI!R0Ibx6(3aY*c5$NqBoEc;3S;hda`^voe=
zEEf$)@9ESY0ee;Zn(qaFjBYv=AD@hq)9S@H^piGp(tMnZgF*ifb8q=oT(~>i=~aDa
zN^m#|<Wk(J(%AxZQ8HL<&)U7VRlJye+A`%x`aM`QU(}as{^`M~px&*dQ}6o~p?zci
z5qt26+-XxG`eoj*?^xWkfG&c!@JQp8A8IupJ)^JYUvH-ba<(oP_LC-=AVk8ls(Dnn
zH?2cRbdjPQ{x<SyLACWloiQ6_TchoQvN^5QD_0liFC=pKe79X(hVmJwWt?+T<EFe@
zr?#9<AsezxdX-6+^85GJEdCp!&QY?8UHQ%Z8a}Ao^2(5jsj2QJ$9-v<$6HAK!YE2=
zhT*cd_u8n5jp>!Vi@-N3zMZ|JJ?nPdU}OnPMU)WZ@%Ljb`y1R|9#+{@-<*8+83p<C
zPvW}bv5Jv{y3gZ|xq%#A#&+v>#+6-A<mbg;d|d$nX|6Y*%F6U09haY&Ug~b!SSMol
zggexIy5DPLF5Aw5<AycnU5wces)$#)^SiK?w&BXi@Yqade>D_t#hX~ZxzXsLRc(w!
zO!!v1t|n$@kAhqvMmh!(6Zf<JAbW<^`bYZXVB$-(hLd6UDwopL>1($g1-NWQ@vD-_
z-gBJ~^#v~G%6|0j8r8E6rI-Uhg|pJtisfEou>8gC$<RRNnvO+cO;xZx28pE0WH-m}
zIq%zd9O~A_&qTBVIDQ#wzqHOstF=6?m#o|#9m6r-`1boA`+|X6>O{6MHY<<r%Z>l=
zO8D0}^zILj&9$lnRn2JGk}33Xx<8DjN&WV?Ce`%ZebaJTDCJ^vmd*6Ew$*ag&ULyR
zjM;LQ%IA7*He0nv_}m-QPhzxmU{kdh$#*;Zh+g%1l4`l(@%3`!uDam7jgX%ACZ@WM
z3(_#lGky8f({gD;dFBhTw0=}Q1*{h4=M5+L2o4nL&i!gZ4dhuFcb^*`bbks>#XIZV
z#@g6_39sFn-20o#rhjR*uV!+zJPTTrdI&E_{n{BAq9uCc#{f%-VhCqgWL3xH8upIV
zQ+%PR_S;uYMKcF>;!F?G`_MNvoF69ZJb0|ZtKlHhvo-7X&%G<poqhGs5)0tFrTXK2
z5-bxHwwMu&>c5{R80HDz?tHZ#{qqZl&VgD_T@OhX!tkNI&)Txv!KsDIz-{^WpFeRw
z(C{QL0z2$Q+~C<A*>dWFKfWd$T%1*5dVVi5G<;~*aqO;AHXIri%YSN*PD{i_aeQYq
zGJqAC{2}HlBX5Ny?>sq5hHs%?o?3>NUIF>YkZo(-^=F6F9kP-5Z=ri~Ok&#UX6DiA
zwgpoXGLQ#Mh7%^{=Xta8TjOFXjr!TX{3#+&Y;`?1^?JTvap=L6vfV2#WCLc*^9u{?
z3w*F>-}J5Zf!E>vVLfRomrX%)GhggDWh$8C37`pL1-?U$1SYL6#_|?Xj<+TN(%c%L
zj2fX*$pmmK9?&-A=jWGvstrBe=x6`B$o;kb(do6&gRKHEgeBKU|7~N(3*J=_yft2S
zKNe6g(;4gu!3L;}Gxx=Bv1D#L3Zchc#odF0&4rbx4>oPLG*AE+0o-$7Y6N5biWlGX
z^fc4~)?btyjKYT+%1V)Xa$H_sE;_Gxe*5;#_HtUoVN!<H#J(_3fTe0bMxh4fv6YX1
zJh!SUrZB@Tw4c;F4QfCQ&OZjAzg#w0*NTF-{=dqaLh<xmeDxpJ@j$||1-S6}p^bMc
z$cSygZ0szt{UN~h{GT1jZvXiczeNx*L&VvTe}OC;0nO5+&!?o}#C!?zsUPgR(YQ`P
zTC|_lbyn8V8Ch~(8HLh|<TY+?Za1CiULA{#4$16hud7W**E<nNH?J0KfM534hP78d
z;ar0ziMo9(UZ*Gk+kxWLHxp4_JyBj4lrUc#w{TIrSNjNUHh<|qG7KlOm_6K_eiziI
zr?zQ-{NVj`p4oW&FIA|%QJ!M~M&iBQ)zU22H|GtRDDj^twB#uN91xS4RlWi57>J`9
ze28((nE#Ze@?ImDg!L?dre|2+nm&r6pVwT3SZIf>d!ufoFqtjZh&XSaxC#ebrD*A(
zWK3&rx!)xt2{3L^M3I3wmv2)Q#mQx3ADWtYE16!-ez0rFk<(S9=<Hir>E9FL7u`#n
zi=(v73rf7mLi^+w@)O(shqj?iT`eLGE2D(0C7;0L&NfdWn6tV^7QZJl^z=v~9HsJ&
z7Hid&JA~>R^>-y^SM$TJ5&LN-|G1d3u`zNjn3OI`Q(B9^4e-N6$sw+D?mztIzhLt*
zohzVXv^n~}iJEHMF<E|`38;DA=?|`75fhi&8B6)=jyyrl8c2{jPZ=VME@|*cu~Zsm
z{qA(YjC*J8=4gwr(N11A%%D>UEi|mI;<(LkBKNVvNzDYa&Fvz?>dTW4`jvVAF0)b4
zmzUMbumN4+wYEEnDrt6$E)0-vs`(G6r-2cg|44d1v-<nwL`!MNVyAL_*nk8XW96xE
zuL+0X@lzM$5bFvwl!3D5;v2QC3UeJb_kIKGVy4`gs0Dm;y%ibL)SMd%Bh4GXI;FUX
z>1KzX?2RNI&S(B92i^Hk3l(T<gb}>V3mnEiD%=XNJtsJrbKfRJ_(uKxej`T`j^2B@
zbmY~V5UtE~&{<MWu&niW6OT3T(5I=cEDC0+rXE{x3#X6>DZ3nB$9-p^;(~V?=)!>I
z+FEzKR>Gn3B94xGnToAse0z&3`-U#8DBi$mp9WP~mg%hojPe5Y+#JL*RhC~b6oV##
z9Enhu<Lj5~2JVCnPJWC*9PW%vtIF#Bd}&Bb2vyl$kLvH`QjEwPbNppd6$rTHPg<5+
z5FsLpSTT{J$+rtSI1(-&{l><QS`MrDUJx%~B!(VceFY^&vAmUqji?uDZ2!U3_(##V
zF=gvc^XiT&S{xS@+GQIcZATcUAQI_}ch>}Yp?9slH!(5sxR{Vie(>+Y_?u78zqW3S
zBqIp%7-)aH+;`KnY1yA4QmJ-;g4AmGCF2QA2?Az7i-9b4AhWY0+DMWSLluVtiB>_D
zA{c-j`Uplz75lky`)`l4PbKWWRK|?5y+Yb>Ik6!<UAtpunz|{G{F(kXpez2Q*N!^P
zb3KFO1MRL%Ui2haBvzoXjLeV>xh~#TV>Bc?ugC6VUQDs|9(+v3ab2BbG=xW0Eq<!O
z0;ND%k>l;<c5{e=O32r|g9!iVPiMHJczV*qCK^ho^Mn|$rv#07XA(@sCw=o?P6XR1
z3x-z~QO+SDNt=c_;27L&e7bUJ>i<fe=CH_3^0fVBSwGSEb2^#vb)9N{Rz5baXAwSU
zPTB_gQo6Y@V~-xPeQehKEAyfQA~NF{mnDHE7d_PROKF;qU>0tHN<*aYwhUG=RmT3B
zYfOeXmLh{rf3F>XJ8>S7zx8KlC(OJ4YO6U*Sb@|OL>Yu+>ECC2zrIb+ZmgnYr|O`0
zJJK^mOjzMtwd{XQfk1Ya9hfE}Qty`P-}~8e$OUbysNnU><e#MBP;1uY$B6SsnRCd6
z?PHX_lSqCgprWEvi(nloHgHat6aFqz_Ags>V8=TH<Cc%vs^u+BEE#<KluRryieG!+
z(fgiZZC?Bsddc!b+3BWGg9=-~r3?+$`;l3|8#NBA|Fq$tV4Fah`S$FGyZvHi5O$Sy
z%(Gim;8Z(gK~3Z1qmU4#-3?REY@6sI*I0MBq+L7(rjFh|q0K7ihZqLN{KKN;n&)k<
zXvB5zmPC1>wzSD>p^m2o{twRwW~%z0oJ(fj>8fg+j`d3;1w1R1HG4*%6(7^(HFO}C
zLK(eC{pOiG=|A_&jD{Pxm7O0d|76&`f{Bcy(xr{9M}K&FC-bSSVM7Y0`(*YYA|ird
z&Q>HYM_?Ka=|pY^qm*YTtxayJJR_kFj_b*qI4Ht#t`rhY9RE>9vl(9R3BTY_cf4db
zU~5rIo>W3s8(opbqRZ}4hwS-i!=zxPFN(qTBJNXUno8Jxc9#Q@zn074Hd6bs_}!3s
zF;#IKVYogLfa}<KkfyBI;e$=I5XqDHoSBCc-tE(3B&tl#%=8`1RR_hz@dI#<NwdNS
zfz%@!%96^&(wuI_D*i)6Yd?V(qMx-aZ|$2i%S<2iJsqUYg!&?zj;5&DcAbD%Fel(A
zuc}YlujVap0fC_i`r8rqadF6?!1ZD`kV6b^L<x-nx}z7G2b)1YFWy+3{`2Skmp^oR
z9;Xt#yu5%%(|<Zk3@74=;X7`LrHtf;9X|&-=3kn_*xG47={9IWJKm#axU3^dMiLEz
zb7tQX08l0jKj1`(0I}1CVxo+>^6-x<S;$)u(7j{(%$b(O2@8Kg-Y+N!#8%+;<+Pl!
zJ#M|WA0Q9HQGN1wIPK@Z+sRF{>m$&yAEX9>`)baN6w1^tyKJCA8|1zyQbjSbm)DC9
z=u#xERN%6>^i2}1g06Twl(8(UsR@{g50O6KNO@g1ubiHAqnn9FPS4Dob)m3S0RHAR
z|B3L2uZsg9)i>Vl{kjF#^0ELmh^3LwsjiNNRt<m?;@>!Yaq?RXoz?xy3botdPmyx6
zt;IP1IcgA*Jt8RGA|qdW-A!dy84t#g(+fkZ2T+rsbkG;3u>sE@UHlhGADr+*A&|ND
zvV_no`%gn~WfsbM%5hw;jR#Z^hx31xhWx0Npjv$0#OGEj`gsE5lG8+0Ex{1Zm8d0B
zVZQuH{{yMRdp4Y|d1r=+pY}h)eic_{Ne5JzpgZ<n_E1zBQV)$vDN9f8eTx~v`s~jX
z!V&YWhErMj%Z4kf+{-TV*q>)AT^15Gh8WEB+27`$sD4_WNJderC@V<tuLXTX>nonL
z4L@5CH4HessS)pvF|Ch1%6Rg7X^0pahRVQb6qoRCe1GHZD@jk{(@==2F0tmJm(@K#
zU1-IgB~pLR%fNaSh)K-uzmNaSj0g+@1)BKkSJf}@oCrjf*=Mq=7!tV3s<HgwauE{~
z4Hw9BrpzcL!-dP{;2f<n_AaBvAw|^{3=9=_44+<gESll`YbHj_@uAYu{!SdhwkT8G
zk(Wfl;8XG`+o!_ErQ!9|Vq>mnU48eQS!c2S{_2pjL^#P(v-S9diH_Ttv7TGA@0KN@
z3~#1xiOB*-5=T>7Zedj6Pk!W~&N?_qLAStdQl0SXI<HJ$)Xyxs&7ckj#;W3?V6$+w
zIE(f0pVTT^krmchj`29t(?Ahb8;{A%Z;ch!r*<-gP-<o0B9hJ#=Xd9lbbeiG{4As9
zFLt}qSYiDUJIV+qLsb<Q4?C)#wO+ZvWC|-TPBK`W?Cof5t_7m1LZ!La(rbGkI)=S4
zL@AUJcFOld)kTGI^43b&#gxYAo(f;21ah{bv8NSZ)XC@71XgJxBI9US1cl7EA~575
z&f%_^2S*-JQDYK*r%=gBnq23uGudXLi!c$f`>W`8L|=jxYg;VQV{eCL&0Q=^{*DYs
z1^;Kz(U@M;iz%Prtsi?9gb7(n4B$YOMe^DVG8P=LY@y$}<8UOTsnF!QDZZjVy1HdZ
zX=vd6kf!SYMMQ>GM^8VtqR*?2$XVtWQ^hTr@&0vPad03i<PcF;JlNk#)D&^tUJ3Ih
zU(}?Ov~PS3eME1Ff6%fbp()~2S?Bv)M4c`tB0l~L+p>)9A96Gyy`Ph>_2@7JUiCV$
zN=z)L1sGDwSrp;$k?e8ioeMh(%ap{~nlpCK9>mc3%VDAs^qjmI&(`#J8OVK&pnW59
z{)H)Q-Il{<#R6A5sn!4|ML{}?O|+yXAv=6nAOGW+^Vr{1c$RXb-F|ECvWI%SJ>%Sy
z?aq(1YW62D994TLtt@1f^$D{1zhZ|a`-{P@+o@1*1@C>Kc(Y|7fs~kl!NOR$SjRqr
zQ}?E_B5!V&Y%ZSc1B1d(2b8Z_!kln867megVRY{fJ&z??a2y_<;$~(s%gQ7PUQOgs
zgx|R#I{U!p)}gBD68pOnyq%7JQ-a31U7)JugA{*?+@-aVVuAR2zCK~-3=u`odNt&l
z@3F#AtH%hJ{dLLefCVLe2wE!%58{hJ_@S$J2h{$QuL?}yZb{=vB?tU8l`%QvdsX8S
zbHsz2m_=M&=^Z%*R8{?p>7qVrFNsJ<&}YyRXzeTMkrY-<QYM@g(M4fL^i_!BO1kna
ze>I{nE0u7hH82*S?(Mx;y!5V#Bo+I7jD$0NB#~DIOa3Ork&wQ)xcAc&W>qP+&Ba%$
zUyLED>R9iNJe`WGxb?Jn@;RrehKDQnzpyegNYBXUHB=$zwi4Dirn{KDDV}E#*W!_w
zp1x33Q&!deJ~1Kk`NAB+$QaEzU9?WIe(9*~Y~xH-_u`FkxYwNyqh1sNNuSO;zVM$o
znhElBX78xq%cYu^z)@;8rxav0RVAH;zK0djt(A3kP1}QnsD_4ya_8=2|49f57a{iv
z?nRX6i*b>Em;K$0{Oa$^=AZTGC#c`ysN>YtVmv$q1uEa`1`*4dcu+`CN@mH$o@`op
zc}3SLjC4m9mJ|e@)FcOEQX*-ncO*U1{VeTu5)cg{0W2bk`-OQpIN9l1|Leb*G0M5Y
zCIsqvIhe-soYY%et!#+hRK6l5Kau&WHqO2CW(P%sTLS0i@B;@)-fXyJMs@gvN+&gi
z@`|cIy_l$j>nCkPaO9NWKkuW#ZBHH8wTbgv`bm-IAc}@?peE0)s%eV(IyuehAR~N1
zU~KsL%V#6m^>geP3%(qm^`g-q`aid1V=aRv;NGdDQmU_Ace-NGF)53OtN2?O&HHJp
zP7=F_5(fIl#ejclt_?0njoW{^Brih$j~1ZDVoI~x6s^vh-Ra=3qS`IUrzfc^4}noa
zPu$ffu`26L%pgT_n`L@lRm6z2gFckVVT4sKQ!pKL+yEAy)p0;;f4VH!YOp1T3bIzi
z*~eCY0N>lHVdf2gYz2ee$^3^kbZ>*x^<HR3)^T2Pbkz^MKW@(i5{tyUJGZBsejzqM
zYld%kJzlSjuRL9GLGyIhEMHd@Qv$Q@5I_`0#n1(MkZ997K)oTsPhtj0Q~22EcHrFY
zacHT(QIs3G4&22L6S3EWgdWJbRy^2&sPpZqOPW@-DKrT)x15MF4R^gqRWJS23#?%W
zFr|n(!Qbx(zCaBpT_^-R_EhKq&M08&w>#mF0tDwwskW4&B07NC*Pw@ldXz)eV$a}Y
zxsMt)8o-6PUA}GuZC{=WJ$U0abL)i)E{ons9Q{WZ0OND-73DSnNlN2E)i4ZH>D6$v
z9+u2yLqePWO=WrC6QD3Whc#2cg=ihH0>*oPidEIsC;$BM$M6G9H-H1cyPJF~x3B+$
zg@8gu1(M-$CIf|ONL*H5j0xUbbzHOYZl^i8fL2MX!r%p^)ScaaYHDiZW{hmodqY^R
zMcbFnhjoO5#xWTgByR}`fuKCEwl-eq=`@P}v<JHcpnLQ4pMWL0zTKQjVx`p_8;x8N
zOvl;QFch#UOm}+Qe!q0gHgg5sMmK=kg$*nn9-i*EiL67XPCs}e_k+qs+io<EWA_z8
zRfAhsar)l7%Y9b)*rui?YhLn|H-I!L(Q8dn&Xojk$!5Q%npYJKW$=%ZIN@N}kfIkW
z0iO)<DG>R7v9WUJU%Uy}y_xrVEx6VEsW4LlWBcP3=L*`{SNs6E=RmpgOPk|!4d2TO
z(m?X-8VcGri#}c^>E$3pxCwQ)H|RhOCT@z=Xe6zlD7x7wm9ed@N+4=X<2ARbGixwo
z<C`6hh=u;T!P1~piLOSi-4Zc|{olf@BfmfSU2iIy!XLt6z8k@vTDTC*pm}t<!l-=t
zXgS*(nKc}&@~q1QrB|Z(nV;tB1(cC>G|2tCIL?Jwicd1lnmP|`AI_1*Hvff6Yfo?1
zu*X*-pWkVYTpyMjJ6#=q<Iy@CKo{T-=Y|(WP1#Xpm}apkG{pb@4~g(~86&3X<*@gA
zRer8n2l^@JtHuujq^}2~I?9x~#OT^>NY9$-P~5YS3|9?rj361h;T@lbrCIvE|CWQ*
z|FIV&{?XT4VXeZ0U~=%YU&R1JaK+9xFXFs`0w2m);*w(F{h;CEwb$hp!lGR3R3i?Y
zz2-Q(e&%7&<k`5%ro}I|!)c368@^p7oi;^vo8a8eqe|M=Y=Q4-cZIR%rpH1pAJW?p
z=K60W6&MPc2>hclwc2kDKW|p-KOXyxjFdmj6rY@58E?B_cm9gADJ$gX+HpYO4HHIl
zc#7v!x3HHPZ+x(U+-9JOUbv#}8c*Fg7*G#j)}g&!@zOND&dphAnK<_-m#4I3`Uoa|
zvT&`)w(+dL-wue>;I>k58;x=HqEho^WxDpV;bd?=ia)ZvpBB5dtXsYfYxg?$KJP#E
zSePQYePrrHXzJJ6FOVa?x7Nrr`ek~j1>iZzacUdSZWVHd>?IAd>~ngpKxUO!Aq8<2
z(+Ul_$JR{rY!5?&({t}3TCLHhl}6|Yzc~F08QJ~Z^_b-YkE#>jpy#ad(c@T3Im5VO
zPR98GYHt0Z@bY%8+PGjLapU8D%rpz?cI!&Qh1<f<7u)nIm+16#7)gOV=B=%5Jo4=D
zsBKU9_FB(%h$Bok=<$Qi9Pb%+_NOezTXIEa8tO(zDWsxEMkVlcd!TL9-t@0ZYeITz
z5KH;1&BjR*&T>mjBcK|xciMF)OUGV`Wo68~e0=O{B`K+?06zuR&`H`~8v}m+WB|Bg
zpj>b+wjw{~Nh>3=1uU1%GHS==tp2#N6mbBC9}r&rNA&UhcwPOu7a0gXWVneW5q(Xx
zzBxNPfG6H=EdWpD<HzpC?Tq0T*K?Sg^5;7iAZ%?i4C4Rt^=m<KaR@+rvBP>b+r5Az
zQ~7oz-_a`g+Npm&9~Tlba0ND6ivV+dXn($bW^2!Xqr&%7EQP}YYgXS|pnlO_whv{!
z2U`({TyBv-xLc|CkMS1&<HeLurfLhzpfMZv+rzCy{ePhEf~E(b%zkP<8@UfWW!b*8
zijIzsV{i^dhBzIht3Z*D3SeQ{Opi0>;)ma;iNX`)BchQo@KLfT(*9Y_RemhoyY+N$
zU=6h%3$_-<tH@Tmx-v14$zKs0*wc(?d8c~JVDDZyZXGkCFqU}K&59bhz-B#lO<FOz
z`|u(Q(Jx<#|6TZUNB;iR7mLi7uE`h%3%+^vFo7^5zSnbYINUbN9|#_3kfpq2Ni9*?
z>h6%&t=*UUFvjjJDqppnbh4IaUy{SFRA5eC58z*wVo5%;Ql^fi4>f;0iYl16<`T&e
z$$1|*FxZ#zQDxaFq42XdIo+{_jdIZ_+HvE*{ptv>>!NXjR|G<Cg=4URtm<DQ@bd8!
zl!}xHZjVHs+0Z814-4S2Ql9V$ie2AJQ7KyDuX19DU4#>T9lxK8Qp+Uuhhbdts2aa#
zZQ=6#TRX5Tbw5UUKI$CwxuTl$@4uN*zDLJNCo3DzzVD*1INM_A#*ZbR+7V1D(h;;R
zH|MNyQnXPEOU8{7nZo@tZn|xF8qZTa9I(AfVlKmAETdg@+xi9mHq*#q|2UpAA!Yx2
zKL67xu7Bzq-z7?Swh~Ti8-CY-Wd}FTi1ptUtKljq3Gob9!{tY|`>-1PFT!=h$hHT6
zE7RPM8u<Ym02OMc0a827;|xr^BY9;skt4C_{rm_6ZQP+>V5bM6O^XOnP(zCgsPfV*
z-4PdX0XmM;YH5HYuc@n3(bgUYu5U>I7f@4EgCGB)kU|n6`9sCjf4R1{76!Pxt#d0#
zzGu$gEH2Dg2VbApx&om7@c`_vG@qaVZe*Re+nM74=oqo<C*~@#)NOWp2hH|j`VSP8
zlHmhXZ=U~gwGV=xJZ57kQ@sbZN1kh`XOwGYgvI+-7DxglwX_I;XRjp9+D#-G(QAhx
z`oj0FXU5PYgA{su8s$CmaC`20P}zS2Xa@cr$BWt>knTT&R1YnLfO>1wDoUIJijGYG
z(qMP^Cl@JjrAgwti76RH0)U%EP@h2A4>8`AlNhw^Gw^z1a<YqaR)@VrhwE*GWbTAH
z$A9^6&dG`ky8#KC#uf+$*zGr^$9VTkz>c6GQCV3On60fX=(Fm>@}P)Pkm+@25y`t(
zH1fpB$q8r?n6~SI+$cfhj&K5Y<P0N#r@UH?*U(!72+$av@MH%0sS7`Cdq2>}-aa0C
zk3%K6=e;amKN=mjLFLBunr9OO3gBc1kqmVOpdhIFT%d)la=&J2yP=RxWZnWnATKA!
zZ&F2QPSev<fE~!%)QNUMWc%87Yg(X?J_ICdV@a$=F!oseh5IkO%!@Bp`Y?27*nr%S
zqg4e0<)zkp!=UvN_ikP+_tVWrGON)mAPn9BziHYRP5?dw>L>LSpj-e8(+AFh2Z(Cf
zXQvavzMa=TQwn?$9~F4u7wUF*ch_vZ?-#OWFD-U=b__rn10$Et>-Zbr|G!+x|L3~S
z30i3g<+onVhuL`UYis8}z;J_d3!u%5i)kU``%TP3_jBX;oL_JNW3zoW%rd^Y2=)*d
zYz@V?Je&=uInHQ7%cK8@{6IAd>yiEVtGsOpd=MZl_c<=L2RDrE`6w4Vpy)t|zXi+^
z6vcx9Y7oVOCjkKge()Kf=pB#4KLagX0E+XUkFaw`ih<<}!+rPm_eKwgJV4o?(V5iy
zffec>ov7~xetP7^>JVf3NZK{P*H)o=6z`jEQu8lgazJhgK*6iJKWav&8&PDm(sMuL
z1hu<2REYu3tMTz_;e}8o{|$1kG*uUf@#D}F2D`2_Ys@i)0K;WJG+c-w<9=@Sx59Tc
zm1~<SUvXt0O0$Krv?YOO_Q&P^?9|j$?5VK~O>DB!q~V0%(~S|7e>{!3$BK~T0jS#x
z=++15IRnKDlu!f}x*d>Hn_~68WYVhg2YWNBy&m02JWfQQdn`IMxUI|I8$935wl|#i
z5#IK3wRM8RwQiW_zux-)d%@Vb7q}45oXCDi{|0g%paihD=LP$trWA!nK>$F$$u9M}
zUsySdFZZCX9{)F*%CqK=9j$T(F4cRBNh-iQXaH0gY}#05)W1aroFW5D`vV&x*VoOv
z{c^MU8jIu&`6+0j4$WYHW@mHb);nIJ!>xiw#|S{LFwlC~b^pkfq_ngY`rI;ostE=n
zPSZM2F-xR*Kiz_TY8`l2-k@AIn*OQgvCX$)c(m9E-US$GI|v<{;jmrj{Gd>0A8hIz
z*RTUzr2|OOY@mO+TJiQ?cH1u<xdI9#aI++XEW7e}<lMMhka}~!@;nAQuH@4U=|r?v
z<k*@?#8p}SJ1m%$yP5h1@B~0R1opL_s^e*N?;EkNO}jKCo)^l{V?zHjj2ClsO^!LB
z9AVO&mtCP6ddsp#nvFLxkbE|iw}G9|ehErmqH!&D<Vb`iv4H`>T8>~RPHx<1P+V+-
z8}}brt99`B=KwYq1T(WCt=cbdfWB%dE}dA9sWfhPZ*K_6cZ*wLto;@|BdY*kMS#%<
zt6Fa%s`fu3*#s*3DTsE(7E=Y4rlW7I7ED3<$aGA9)#IoO)=dh)x6tJQ8Nr98!RzsI
z))lbB1mHmg6Uc|@xF>=3^{D;X3p7#Ct|dM`KA9~M^pW5u9aykwmj@vT%7KG0bdWfr
zLmwZ@2yKf%o=E}80c<bqm7z`VF$^M3h!y4=?Ta9UhH!fvB+yL)Lbww2oMvZ{#?|&)
z5Zrg4dM5K_Px+$&u+C~d&jH1nVZZ`H;pzeld%&xkMD7F9I{;4)8cz*D#F9&93jkFW
z;9uA%ejtLHkA@^_3Os(9&ktsx*$p+s2k|IACI)NS``L4h|6C3vgHdQcInR6^0nIx1
z!$~*$^}R0w3G|4a?@`N|_I|cp+L)UgKr@_fQ=H@e46ECPIaK{Cf<PRHgg!kn(Fx5&
zpey}r4ZL(gBWhu8z6v%_F!qzUGXWM}SRolG0{#Iv)J>4!^1vQP@JU&LN*F}bgwq)<
zpd@>K*cNgHt6f}F1eJ|xCiH?!ISRzQ-C&1AztHooB2+~Hn${M%of>HI3gB;L4UK+q
z^CTe&)P37kc>>Qc6sEV`$qu?|RygS{698Q($mM4M{C3wg92ShC%ZCPr0t*!ljrygQ
zFM{P@CDqIIMvIhl+4N-odxSRaKLz_jh0A-r^r^)@@vth9go*YS)bm7sw@A@U;57Cv
zQwfFI<55VhvHiZS9@I#*%8PT!2vR{FuDWx@Mg1nEGYhtJfd@a(aC3>vb1Bf!xNPCP
zMb6J2Xrs0bQ%uJMKt*vDg!*l1_YUaVtruLeGiB*PY|~5#uwH;4pSzyVi_=U{J8>;g
zp=TN%N@H)9qc{0?)7F1EI>n~xSrrw~nYml$qwa$muI}@HWor<!GT&u3V`wUAHZb|)
ztDf-xcADa-huFUJC&LOswOA>oUwI$?qMIJ=n~>GQ*kEUj&5FltdMOTu=%(5y{6N;c
zeT7-?EFI{%yU5}2nq6IW+7Blc%#EW^Hjjv2bFK)yEfIW4{OkWN62Pkz;wS}`tpD-$
z+X&M{F-GD48CG*A4(!e0ga3QE0X>rL>r$Bimjg*w4`;t2`|ryIJ%AHW4XpTot6QK4
z7-bInpVtR^fVr*T$Qb`KK$vqh6McFCr0f6VbpQVk@qb;E|F4y>dLtT~7Da>C{oej*
z`oCXIK3v9ouniGp`VH4*Gd0o($#WiHooj&q3NW6Tj%rT4laJ=08UdL+)6V+8Ek%mA
zkUb6=gdPF`RJ@n^B{;22%9uf_KtqhEer1T~%=DZEhA5~K4-XFkO$Rm-`P^?eI|sCU
zsk)Xwjy<mL99qFO&SdnA^k_%hy2&P~ro=yZ8(1xHV-k%G3qjlWLUUXZy+?C&e;UHQ
zaYdYsUe>s96eb=!<p~P${Str$i|^IQm*SgIiqWZdgc|9DjPtm1E$XS%?v$sxyy4)y
zyHv+Hdq`S#!$f*(yF7hpx162%kqqu-t1z5m%kK&J%<a0CqqT6(?skOCwsF#uMbYE_
zIs$!r0!ZeQR{db9Ylf%yvvZ*mAI4n0G@Q-X-^Zpl_$RI7l1MnA+24xzrZsD;RSZzy
zH8f)c3toG<UzXkukq)^xTJLe%rs9b35nVk<V-j6k{7lexxl0{7Ul<!wT39H2=Upff
z{9<9IPLCGV(NizDGh=op;of47<h<N^P-z>5LeU)q@ekR)lF**a+Bv%8Be|b~EP~Rm
z;*|i7SY{Dw_TsW<NgHAJ+R1Orq6zb-QdW8FDUJ|vivPV}+2x|y7=ebU+*$vcz03Bm
z&ik-MF^~gbR=PoQ?KrLvI6?T@Z+Dl@s2gN0&z21L;AEhs-tJbouZ9TTreBnw>Zv=l
z)|*|6Ze#9#e>Ov{dwj4x;oPi(Tu&>qU$x~@CfoT!hdw-quJVp|XnzlRMuuL3As*KL
z*jX?yjyv+DkM%-Qf!4{}bL(+I4KSj_!pob!xXuaVaUn*Tq_CtdBj>Ej?{cVMJD=-x
zQT4YV&R`COXzm`MCEiY+TJXn|Y<o8CbPP20Bfrm+smYDo%Ml3v)Z5MAEEOwFg&}IN
z;MDFsZTDuMkZ)%PLa}95KY;V9CyeqyT|boEenG~G57JIrwtsqrKx*{%74myD5eu0(
zWlGBsNy_h68Y-QA1z$hZU#-f#VNyZK^{4op`bP~<S#SQib@2xPdjC9|9k*^!@$5A}
zJF7V=&d<5HyURE?mDtjzS1Ax6QIS(oMO$}t!5OA?z!`n_?f2^F`w-<-*ZC^E6W>UP
z|J7g}DlVBYEG5z)t*;WliSC%)`O(NXjp^g4y^Qpw^klU9U4_R9O9egw;-Ae;d-tWs
z+k_>hJV9~@UoQJKcNSZdKOY+wc^os5Q5S!s)ShD8(U8wIeUDO;tcS~yw==*;fIH4u
z`>$`1La=<>HFvps$Wm?0(`Yag5<abedaR_Yry*wItva-NIuPa7t}%tiu;#i<cl#dm
zmNmX;nshqDdCRkRyum&)Y0!xj9luz}RobcO>+9hc{*H?#O?1f`km@|wr=rt^_Dg!d
zj4|cDC34M@M4FKg6KxMT{zUGA6R(^O_p=MjrPpP7-l~YYYrjO<-il`PYL^u!A?uDX
z6(vJJxw%LGz3YBc`Y!LxHGIa1S4|%7bn#Qjm$R0cJENk;e!*>_np=_Z58cxyD2Pcb
zi=8>HC>CSrv#baxDZ2OWyM&06>Eq{c=b!AEjt1o0l-0$4zx*({@$MVCW);2ktr*(7
z1SW}6X)_18xYx;FW9n!4`Qnq#oqWm!Dvsf!b4{eZ8G_7pMaxXcKZ=+T7}3Q;wpcM2
zV1Dj4Pb<yo$FtSUA5ZtKf6G2=ia+-|zDFb<vaX$pQ7AN{hQHCg`xw65sh2T-M#cLF
z5l$3xmdwwHSDsaz-T2RLsg?SQ<RDjxNFb1nzPBs#p<$LZT|nYB(O#hvgpI|Q&~*r#
zDJ9cPFD%xscWlZ3dn=h`l<fB$bI-Thcs@#hD8v{pPsC&8qRBe*Fse6CX^UU75yg+^
zH8(dOd7W>MlU_c&LO>uOA|m=HpB?vkxy99oo$TV`g3LcFKR-Z~Az&tDd0DS!W@{sy
zwWg*<4yBQJMS~_bySUgwvpGKwlv*<!U0Rj|`1o0xOs|{!<!IP{!3EbQfL{t{y=hal
zwA`;)iD13BzW&G%A9yK6{p!^##2QnB=NBXkH>R)ts%zw5m9_Y)k$SO=&gw*zoVJ9s
zZaHn)^PJtzzj`D?9P*s{`<MGmrB!(LuV23&+?lyN?22j&3JcBteh|vs-Yxft_ztSr
zB6m!m>ujObmii|Kw9@TAjZ@@AjTg@JRUv=tH%=su^DblStM;Z7$@(lB;OQoZA3Z_~
zZ$88`6vDH;U|P9VFVlJ|WU@+Yh6|oa0qRwMjLq{~!^a7sqTAE%g~si*ZvJ{oD!Q3U
z4S7nYEPl=Qt_`&EPXhsj3DL4BfSSM%ja#X9@qiN%_I@VAYds~^tYQ;0FqGF$*=N@6
zEVXHkJjj9Hy8Sr`IR5O~e0W9=ud54@BUf_-ZT-Jt@8FG?WlMeJWGM;9bh@VT-fdl|
zl^PMT3jF>wH9ISXADA~~k&<f3Qc!T$Wal035w&MH{Ol3-GV00ZPq4*oL6+n0sE7GP
zW)*#N*<Q-JZl~T?t?8~`=O{2kkGrlUz{u%i6--%L`PnT$n2_AP8Ke?gS664hY0_e&
zt-a7-BE!V>u^0y+uetNY*M)I0stlaWp0udlDH}#ix5`Yopc(o1@1HW;Hv@w#xui({
z+Pa?}Nx=_E5Y;OwOG`^F#gI%hhWMQPUIGr_ru_K$2(&|hm4X!8Qyt3HyO&WOjYGst
zXWSN3U4N`7wD!D(^Tc94I=juM47=It8$OZ3zz;fpl4@$db`_7X5cS7k(t?cBbJ?ES
zJaT#!v`w_ykgcxKUY#xlT~(Z~8i@L+`WC_!z8iHIm5}}GG9wDf$KtytqS!#OCm=0m
z_(x1Hb0{Rd!PP!`1#i8{!kbyA#)u@FPw((0^l}N3TT)xcB=lue<Mb}K3JrMko==P0
z?yl&(N=L0%&=T(I*FVQxKHICemxO;|e=<NOG+&d7kSQpgtt77IZ>rfLJ=it#>TBDX
z@rSgT$Yc5S<YXAPI^QD%^53tVj_E72v?i8II$&U%nqgm~|4XVrF$R+b>3={zhnGUQ
zrRmdhgDT@7k#R~8Ih4UvNdAzUw7@N^mZl{@j*_a*<x!lh0=>Q7alBr!@)aZAS591%
zbn4~|W`G+M2Mb|YHr_u}WmNj^IuXN9G+S6-f2HC{1rmSx>vJXd-u14q+!xh0smiD6
z#l`S#SARokXJ@XNPhdh#L0l0rdS9-7FZb`pOxbo9f`gYhzhSt_Jc+$eXBIRyzQfEl
z{Yq^1s)`upYa&ApD(ww_vF2K$z1&rcm)W!XYhkbMaxRgt(Rqs5>-00Nz@ytyQ1mFq
zB>f<KVU6u!uPl$}A-|iGRgTRzjsMTD-B|dLoaFCrg1^R^Uu+e2M3NYR(@0GnuB-5e
zk%=Y~@w4!+^||-~JJ}{RbWI=d!Vm*P9peopxj%kgLy@|b;4<wp$uav`!>;I-qg0&(
z|7>a9u@$yOy!Fqzj*^E~y%i)dEAvOI0q18k+#kh9n4hDr$nk1&`wWW+j~ybeiN_m%
z9^$Rs4^;g7*5*qz96ij4!)HWZG2lT`+*hvR!qjPt>qi6|R$m0atEfYbi8u-Hja<ZN
z4npZ)>c;WG@r{8uy1x#hl(fBWR@DXXN12@joBcYEXjHjnH^h7rP9|e@tyY@TweMek
zZTQS1I$#*)bF&nJcWpUaYj3ap)-s#=ICi)o=u)co^!PTE^y_?o{jr;0{J3DxB|>ID
zI<gTbIwhQE<A=H<{xFuKhG_J*M>#OHFlCRHm08Z1B<*m&V3MF}l#H>E4CUsXOY#R^
z7381tinu91T)ymlGLqm^z^yfwsC9K?8#gF%U22^=OB&<L*bgs!+_-IBwquVt-9x&(
zt5O-4S$)5EOFkf-$!Hw-3WwUlB9UbG$A~E$%Yv}mi{Q7B3!>kx^db_i-^Mzki*}=v
z{2kOqKN6vrasO!Q!cYe1ldA4a%5+i-I&k_wOuB!2G>#UBQY0kGnq1Hht>wke3o#J4
zL>#agQbiclKC+}U?d|%+w?CggQ2ZY)!0ab)x!NPa_IKmis-(N2+8n}US>&-&YSAtN
zk1w8&-C3qqJmXdxyUZ-4aM#poI=ec2woc!{Kb`WioGMs1+IIWNa7QC5(F-@f*ECel
zkf0-tL!S&Nl2)XM55iDTC*q$UZhK6x`B13kdPbo_pZT5xCzP2j%R5Wj>>4HljYLtf
zIZW3*s=~t#8!xT?^~!kxr{k&Bpr2O2wg}4a_jk0(+JSL`A58+vsE7N0kw!?VM=pBP
z+*Kz$acU8Ed^2cqOHmCCutq!JdZdFtOvarjoDUjhjsII)o(U)_GcES=fquPh*79Dd
zqM_S9`<f~H=jx@0rS|JHcN^XDJ(KB>xJ0LmlK{Sqd>i3d1y?5X<fRfk2P9mVvwRXK
zQjZ@iF6XZlAZGNYpD-AD%~_ayI}9v3P}p5mzSCz%2uFxe(ZWgl!69MEVW)kvGOu3v
z>&>y5(XYSxEN~+9Fxi`EJ9c-;q~8>`iZ7<I?Xf(q!R9Y}pPMH<gSb)y8H<#ChFe=U
z@$FhtbZv&I(auwVW8l>wdO^|mqrIAhsymU{;W=a`sP<&%-^<0@TrvI|7PIezyp$JK
z_B^i;aWSsAqUEg%fDg)Dyq2GB(RI+Ml<2BEiONs9+?co3MjrZ=xB0mb_s*F{8?izL
zySeqb4#eG4?9+Y$uUZ4($DgD7M@di0SH{oX>q~O!Gntt8L}bhe@#~JN4bn>cFO&1;
z=Qr@B4o?H9CLOP$cX*5_6DBgErQtGc`-(nADIlHf6_a9(aBGE2aglEFp&wGGL$YZ$
zOrDW_VJ3&fg2-2HI-T@9JF(o5{g$6%WEZ-Ar+wLY=`sF;yn1>k*?(IwUXU^8Vcd4{
za)ob)$KD@(F)6oJOE{j1Z4|A-+*j(OScmt+^=sU*@hXo&xO56Q=F-rMMLf*u9;_}T
zdF<m2|GbXJ8k1oX!3Qp(&6(LU;{{(Vlur&Nf{g*Qx}K~V<u|>t+BX~R_pHYaPCSbI
z&e7dV@XVSkW3FbI37XplD5rjuBCt$!`~L{xi;5J~-~5=J(pVSru8F6Pp!Pa<R92OZ
zm#}dlQD-_Ge6AUoZEp@(@xCX>ogrQFq;9jjmc773ZPcN0C6y9};n}jQDIDPPZb)}7
zBh8!8NP7ESg97OXK7Yy~Nz)zV;Ml)+SV82E78lJ&tN+|GpjHt_!bY8(b4_Z_bkB%j
z)Udxx;dG?khU*uC%PQ%pR=o}kS8u-Kt~_+%CVZ%HuRQtynMN;0dXvtgZWd|cN*YT#
zt>QpjP8Sep-5gcns$ESfl#)cqWxUoQ%M+QgGE`8AZhtku6}M;GkZ}<`85TM>$olRd
zz5dx>6fjlx(0Hv*KKNlhg+xm&+avEnv+~mGNXP|=@A|UJOa4*mLW9@X3bB-f7dW^s
zIe%qti9*7nq7y?@>f*t}C(mipoTk-9a&biU6ZIk4#`iWpC-=8)T#a=#w304HgnZ|}
zTf{tT#1Tctlc$I8>hag_5p-90>Rq1s2S;O=P98H6NkaK`?o-44_Py&ufW@b`o_HZT
zXt?ifhuA`{U4;0quH@5T$SHVBLjQJgi%R*Ph~U?Iisw~E`^^O6w3WNKZet>QwsPcg
zstnqH6VAp^VS-1bw-_)z5nv$@nnIFsG>g>o$LZpU{p-JzsoW#>A;m$Q__?Gz{X2hM
zX=0Rqrk#UGDQDD+xYe|ea6%ph?R$4d*H401#3W>?qa%=rxQ&;7R=9OXhuf=NFXrd@
zML&8H(^X0{Rv_lt*4lQz$DgX?AQ#4>l8!duC5KTVgV2rV=2l`Vx`xp1B1LLOs3EGh
z+Uk(M2Y_hp)SE(@8ZB5oac!bsAJq@C1XbesGt^`)-vxXY{!L14o+&&3Nk(*MVh(Tm
zxcK~xVXJYyF~RQ5M7-$l1=8p#^2VdbzdHT?lzIMV=7KHXKO4lZ`)&AOF(VtR7BuX*
z&tdb3_`x~g&q!Dx2Z(2hi}TL<g!g&A6QRe~UnLgS&_|+wqjF0`IzK`rOtqhK1_ntG
z3Ao|>LQo#c&98k!uQS9L*CU(n;yOIMBZe@TKLvIapC+M1j!Oh18yUN5-)53)q69U`
za&U4|c%X}FYfB;x7{804A2zUK!@T|xNRMMuPAx9(CY~1@d_{1zC)+~z&WbsfB38!N
z$L675amReT+DxA-qER)jOiJ=>mLeDDQ(saKnP7Y{9x^2!QG(Wl%{oV=`?a$&U4psQ
ziN)71CCTr|I{%2Nil@Z~IluAG&UO&4ZlK)e|K1lJcAi2lqSV^pTA4@xDo|%g@j%#-
z3<LEl5m7X(kgBdvEU}A1eshmJ{RD%8sZit>XV6$awIlSAOG}~)Qs6^x6yI#M&oUN&
z`)9||SA6z#GL12KPib9_k@9R@fx@esv0PNVw>|c|F~^I~JZ(tPn%H<4LN|>s;kOua
zjfCBhi*fqcjDHiLP=sPsk(*+LiIBtViF$++s;JWn`<J{Iq!wiq@#8@!_h-+dh|Xz4
zjq8ed{6rK2^9Qc4HB3oq1YWnVR?HxrMnQu^r$2ZqeMDVb*S!P-F<S`&uEE#W2UEtA
z1Pc1qfpUUTGvr;e*&b2b8Lp9stxHUL6+VEa^G=u-7!4NvE|uT$cDI&vkcvG1e4s4x
zr@T+})nC+M72ivWDoPsR97{?@>FCfFZc(xvzY~=v)vUgb*{6MaJx2uu6*?765ek1%
zh*`z{-Vsgu*$<VN@og{@n}JnIw72%d#bu(o#gJXUZYsDdzB2!<@K3ezcrzz=`FR|v
z06)jimbm}z@TdJfogr3!TzF}l16Ia-uHm_I%5^o>fkB4Xk+^(}_LGu7lq2a>i{8$M
zcSyEtVT`RQ1f{%0Kqtf4wC4e=iPUH=(u){SaKcRmxU|J9<l@1qv~eLM1R)iWGUHW<
zuFf*8GGI);ap5OfJu<1mT=*H%K7mE>gI>%vHVS^4NGmF~{}U;*s_BurWm(fhL&SQ<
zh4Q3xq%$_+wNw3}5siou5}6$TuRb*TpMPLF-?Xwwc~eEPLl?)lBNlcXVm4O9j?VDn
z2_H_HtWW47AewHg_fJ=e2;yL!*->u$Hksvk;|rm9YSd7CV$}mm43!@cE}vR_J`{gf
z@u8vtjDol>MS6%7pX-(46OrT4TU=(oWOg=-+42mEcu4wRi;o^@W$Za5YKJ?ok=U5U
z^(;vPCspN<yL2hb?z824Yd;F7!OAMj_(+qV7;o-TssR`01doKcZNpOWyD~vA6qR20
zZ6a-^3v2~=^z0@i@Gwb?(g)z)1fwmf?In-m#Kl;xI0lbDO+_`DZm2jN@AqW==KUZ_
zo5V^N_=RyM1cv-c#%0XX02Q|9GyX4@{I`W-e>sF<`tYxgJz`$+Wtu)p*mPa9`x}P5
zr(^x&<g1D-6#t71Vp`@o)5QBTgEm_dnaNk8uaP&Py7rB%gp=SsQ9hYLZ|ED24<91k
zW*kgp(xIzIO9R(UA<RFn_$^HXRp*ZJ-4{X%|LFb*Yvmr2T*_k;EG{y}lAp05WW_Dt
zYH>8Hai#??F}UTendkT!!s`D72)q4EyMu_6ByA)=rCpzHE~6wOJl_y#RHM<Svwq7X
z|ML^sLBa9q12o7XiegB3OXJj)jXICuXwDq6FWWKaiPPEO<}%O`rq)|Ov8Iwfam}PT
z2&lLocw7lO30}j@d@A8|DJDW2J=bbkC;G=C7%x|bR4QirMFsnZg82_-NC$G9iZnGM
zg!(SSo5Y$U;+FsOgi6VK=`9Kg1!Kf*P7W^{T>7Ouh<dTK4j>1kC6Dq$1z9Qad-3}X
zW2Rq0cwl>)f~nTdSa`;d>C9TgDTJfRm|$f;r1s)RY*W)T(Mv!-Hk%q!XugkB#?tuR
zmX_b|Rad)D2jaykGQlooA(rS7mpA!u@m{Rs4jHBW<VPRj*CEsH$QBs4d#-=>b0qX8
z$i_ja6BrGP@+@bfjZdUlsS1Xdovk#Ob7Pys4KHP%RZzQ!@r@iYg{7wMn*mnVxE47~
zZ?|wX{dAba=mt_=zZ^VB`OmY4#F!&97l6w!PH8BtM-V(~pHZ3!%Loa0pZhMANQX=`
z-n~*TsS6*1LW7xP<o3#M%`#=^uwMX<c|9kk4tG{2;u@RM>-z+H_RLiXS>$h0MPw#|
zSr|-2o+m+EAKEvjSb->pOO;4{2=qGXOJ6vJ!)+-_>JW}NxR}8LZUhQv1*;g4M?0D>
zR>r6yu~v?RiAv|G=28etDq?oSt2vq-3?6P+DUx)Z59SFYCAyhsZDVL>wuJTmVxe66
z4cYchba{9xpNO_uFJbihUd{Z>X^&=Yo;zGv>J~wNQc}cUE=%~&Fc%SH92@6_P`XIo
zr1ZE$Z1o2^dPNV|JPKNGSM5@>uXcP4;}Rx+_VK`;crsBmo+=q^>#zYlhOjL3@u~bA
zRVzC<#4g^L_iRqGTQlFqIECRdwlM?yyF*vG#cBgYjk(gbH2pC+wyRx+#9WbyX+r;m
zQ@)-*TYT=-{P+uAHSMbG$yIrRNO^lJW=P^GLNBJbUtaW;I*I89eR)b6j;6&BTnP?|
z3LWy}l9l^E65fP#`7#SZPu+W$^+J8xRNpBUH!eJ##{~|KTBC4!uId@iNO%g~#*XR1
zACR0cawjUm_Gr=Eu;RkTmW(qklA=DK5FzXQCMM3BAi7%kV5HQ;s6d})AkEe~Voe99
zK9u6g9I$y)<hl)scKtcjIuJ62SLFxjh}3$ogAVpdFL%)-WSr<O3(NSZ1{hQrYhk3F
zmv0Fs_uTAwE!x)cu@4gsgL{%tA5M?==)ok38MbVq#Yk>A_BIvDZZal03dRLSTu77S
zQc+hm7HnYO^gv8={>JBJNc3v__avB*2>7oD>Y1qV-%QZHAVKImQemDC5HU#kM#MPI
zlIq9Er~<@OF^FGDZ)LaOXljp*zw__;6gER$GC{`@5T~%(vmMd;zi2wg@W{Gg;l{RY
zPi))P#5N~3C$?>8^2X-G&V&=&&cxPta_{|q<azR=lkPs<XV<RURcl!&;26ckk&&y;
z3NE7l6aDEbNk!{dqJH;q@3iAaY;=9o$ULk{e|OdB(=#}*^=eCBcb&B*;nmG2Wmd3g
z3~S0hep~FmO#5q$WIVJM6^&GXK1Nk+^FLZpN%2L>gJ2{>TvU?TSML9yq`zyM(rPl*
zl<P)hG;yafuVye}2&X_v4nE7=EAvSb6syBU$Ij)*)WvN(o=k^DmB3f%v14sN3%YZv
zHvC3Ywl4%>`!`COU`|a4l$4Z;8m7SLY>;AQ0);W`%TA?uWI;`;Fto5pO+ETa;Rbh$
zdPA=2s3_)tMjzgp;NnckJocY5_hz6}_?F1Fhs@RI#1zdnAu;m`)Ai6(_nDM-B8i*D
z+$D?`&$whX*0!almiNHHLkxlD`Cb`5uONR$<rR0fpKx{Ws_N^oo#N5iKx1BHAnfiQ
zr2T@5aQ;EKKoTmES%^pC1Tm_t{@sn@|1wzWkO-SV7!)uiFSR7n0%ZiA)deZ4($ORQ
z`1cUlG6JylsB#Mrc+FpTT5y5W7~>K%QZyJrzqD2$NGGJ?A51f8b{o|zlfCrF4IEw(
za4YDQLzeWRmh_OKc}ZZ5C)}UrT8+2-3n218Xi{XaB0r-WP4KxL5Z7;@Je}ZuPZX(?
zf}UbtcVux(dm9u<iIt|-p7d`^M3r_o&_s80N4@6Cah&LK5+%#Ich{q#w9<C=Dk&5L
zS<*=QV1t$$*{Fo+!DQ^kPPN<&N9c+K(a55d>jqT6!vxKN@ENX$phW4J;!8dBh(FE>
z?BlIzHN5GqV%NIm{SsBCyQ>cvNhVl*<p=kZvd-?wc^Cp93fHuWsL}-kWUnnkQ+)!{
zEHfS<zJEor=yj*xhRSzeGo@?MaB|+9C`J?NRbWC%NYZV0=FWr$g{O`4Ht#qVbG#Vj
zG;)GDK_zXG&V8f&fKXfa38TOucdX@&cZ;1Lg`G$-0>M%OI%KalK5)Apb4seG4UZan
z1X)lrm_5{W%@&lQhT|3e-Efem!@xVSuO12lQ;{$EoU0;-OVT`x3{_t!P8@gEN)t<5
zG8`F`A8Rh&q;VGpg^cx2^YkOXtEUONeq)d~e5f_9YV*B5DFiF44g3jdwG5irALiX1
zU8<pB`Jd(^*mPvSltv}_XmRn6bN7*g$$VM_WD9$upQDv_djDDScw6BkJ_8*>?ofo(
z)nZ<PU?R$R`@J=j1c06MXE!-}3uBSr4jxu5w`D{ibC%y4@zKmC_Q}b*nCS%BA(MgJ
zh|HNxsf!U}1HTZhItBFxJ2vXwV!{}w^TUBdxjMmi-{t!7&s?QC*GY+EW`Jt+k*nln
zw8Z90xFIpoW#07lACmrFz5QV+8Nm3ryzHbXtDcLYbuji_Vuv5*?f=S^D<KR@qoOb=
zmkN<J_fLlxZ$}<P+c9dqo_lAtCjKv4dL8KU(7X4QF{8CM38HN%pL-yT1kJ<0<*$gu
zCHP;|*Sm49Khfb*;hRL-c33?>YsE(RmmAQtbhcyM9LMQAC&B~$92ZKNxnX?4u_3FB
zS45?<*pGJPKF5_=ZpWG|yE@0E6Y~a3#mz>uXvFvXd~y&fwZpTECDBRh6{sJ26=+H!
zU=Iaz+E7g6mOR&&vNB4mD_Ycl%4w2HmVl!5<GSa)Kj)rrU30e&BdMN1rQEE%%nMZU
zJW|!PofJ1Q=*2FOPJiw)61{AeXSP}q+7nhjt^5uM&$r%BIt$2Ou<QzBrfIkh4R3pl
zLr%8U>kleDTj_W<_@jiF4#+z-Y6CdNJtPJs%tjN{amt4hoJMq_Z&a(cp5*wSx0iV8
zZe`M{f5WK3t+ehr`MbVKsraWs%1r}f6(6}_X%c!5FY$G*_rs>tq(xxIj5fpCZvv%P
z{E@w^`}pU)V_yAq`;kPTXcIY35`oN3F2B?DK}<xMT$<6<+5ekMBC@X~5gz|o7bOYB
z#=TZo|Fuz&a^|q~hz3)Pp#=t`oaYQVYgwN*1sjd4LjzNZS|(i3w;D#r2a^&7ixSC9
zy+3aLkNm%%---I~QP6YE8ycX7<$gtGkhr+~oCRIWYmYa4JBF{~J%fUJjZ3h$GJj3>
zXmp&U>^htbm<mc(gf?A%IDhBAG7OL`D@=_`o-%rMd7NGPon5*0rdh>vix-k$=eHyB
z@BQ#{uRI&r9s0WeenRov@M+tBiEsD_$|lz)<RM8TM+?XBU*e>u{<}hzR^6STCkEWo
zc_z=+njm$SXT@*F5AlW7#@A7|UM9Divyv{)3dEN8FH7?NQ<!aUo%I*ua3O@`<F$6S
zE}1I)TS_nqKr%{^Au#v@>IYf8OE=LsZ*O@1u4j;H1z;z!Gv|U!Eidw;?93nuumKOo
zQ-=(DU8Q9k8GL&i=;_D)Bs^>#ApYd-`e)-gUE7N-1V5DYsxGKwzuuzCjL*f=$gMKG
z7Gs<hSqD7JAB4j+cDJw;^|Hjgfi<4b|F(AKZw=ySNr7E5GN<1sla@!{<R9I-tlxb3
z`>%$dDA>Orz;pbD_^j--2Y>1--S+CYUN`!D#F<wzm9*LlA#c3nVlnh2d~^6Py_tkR
zIa?j!67pf|c^cEyYCA{JSyigJMF`U^hX|6dN%dt^a#-^3opZB-&hGx?(ZR0T0enLl
z!;3?A1RDcOA9wtDFZpZMhHYtjrmrlePdF=NAljV1oxeJb6$i0WoKZ<!aCf!HnaVE?
za$_AOPz8|BV%D`nm{mG(&t$`;At6%8<*K3~&`fnq(f^p2;80-y+Zp;Dybq9;%Gp3@
z4qminPhe1XY_Qs0#a(O<6_p|DpE8na@cdbS_TI9Y;pu8Q&f(g%?Q34M@#Fy@#Sfsd
zGDCprGY3*m@#v~M-Seumb++XSqsISlexif6_)%f?`eU`w(X2ES9^uUuvfy;l0e<^&
z$K&i`ebQzPIRDi}&vvJw57RfLo*+B1wTKq=N><hs=Wl;!L|sO_p<Z}dB>(!EVQ2ea
zLE;_d{wZAwhybF^3~<sY)V&vQy&d3}1zcx}T>C7F#BaNSZPx<BtYNCIJ_hfuw_^y|
z|Fe5XQrWv46Z0Z^)>~wVKoe63(l0?UZa>&6eIz9TaKkrU;r_ynt5SzPc&5H6^V#m^
z;5Y-+TZx7g;FMV;XeBH=G8Rd@`=dF>{R4lDi>aX9p7z_4zKk(*3HNevbJzy(_MfAd
zoL9ZLW_bg6ROx`6Hu$#Y1u?B^Q@Vm(TItB(!Okm%jk7z`3toe~w4~V-a%uf0J*Vl#
zB*^45<nIsZ3agBsBods#<AlwrcRQ~agGIZxw6e6J6u5WxX8PUrr8m${!%cCerS6IV
z*+ofI5<#%5Jq)dZRi4)9f^6mzk4K^okAKej8=WVDo)L9gohTQP5e=K)7aPf13y+~`
zcQLdUc3uT#DG%HCa7BYs!sd*Khq<ALK0d&Z?Y#4-^ZX<AJCWxyCbp$nLzU}=yo=hq
zi;NKwC*@WgR~Xn^nJCw0#!rrVP)!SujulI8mOuK%2-e>T(zotZbPn@lQi9@=6_3s*
zHizdH@L}a3JCH7nKGR-#F1qlw9p`vU)SH@^4%RFEbE9is1ztLkB~dNi@Y(Ltn>{Po
zGxgYb>5W_?r^MD)IuArNYI~v%y{J{_?~)vBEH74SO+RZaxw``7+DBjH#biasA5HN}
z>5~3;LEt7L1#CREc=bsx9<wRHhvifRL3lB<4D*TvqR{3|FbSA~9pByBM2}%DfG#fM
z{I6BSvvfeoT+afYkPLwHjDtD-=ODL~sNdvXMU`tmsDmz{1*1{jne_dvM)WK&*e6d6
z#2#J~HB6$aIYcB8vs}L~>MtYoM2LW5EbIwiL#q0zP0S(z&lT(Qp+<{<3-POmVCVv=
zC><~I#h{Kt;pK6P5IzX484q=rri6RSUy74<OA1U`7SAh`>F%>&4C1w*(dP*B3&(*<
z?$1S!nR@3TJ1(ae)OPytLMPhAKho4GzqZUSFnF<wc$v^LvS5lK8bL_wSq11Pm6f(g
z0zN0Dv;rEGc>hX^F~X<h<Z)NVDlltq&soT&$QrvwG+Rh3;>9LJ1$X9IeDr_czp=+b
z=f%~ct8?B+t<np?<fX5_Ee)69Vl1pTH+Ch8ULcK5&!5~qumrD3Tl6FiA)&{lz4O?6
z)~;&4bGsN(;;O$`dF5_<#kEWEZfkud$qUIN??kcX3d-9_0iR{B<p0hEuK9nNl@ko@
z_jVdH0na&8%|~~ri$Hj*rnIgtdAW|*VLem*F#bMuqkjw<=mh0;+s_3l-y8+$!!*I*
z12JasA(?ay>T9>`rffn*d)4AECA@&w)ik%9QKt$Ur-T8Yn-Zmeu!^K%hDs(OxW&7s
zAm^x8;7mniJVPj=9wLHTs#-kv39|#2X{%30^FZEP8ehS00GDz%%S+IRNscE{F{2b<
zNWAjK9}@U;bAUz&S_1_Ont9gkW|EM-RX;p`%%v)*S;vQNn!BUK{Wtj$6a)zrd_<RU
ziEs#LX`e`pmpRT&YRdWA)>x~5SL_0HYs0IsFmLHES+jX>KZwY^!AOuo5;{r!O$rKy
zXZng1M^wQ9oAEAD%zKFM#^1KWZ}O*f)Nsz@q!Upg+SR2`3J1`PwS=XQD0H7KU`Z3l
z2XOQcV)1#7)Ju9BlnKsDUuRs*8XUxB<5df5y9`k<CTK|;+btFiZ*0x*v+6B4x6WAX
zOxfn6zP{el%m5;K#|u|ev?DC0UhC#W@?(E9{;D0jTQ(V50BUpj1#i>u5W4;4&Gu>p
zS0c%%fx8cjH8M}|Ho}mUxT8bQ={6KTP^17CMuejuzcz3n*KvyxMr`z;XpbEwFisBg
zI~qCV`U^k0{Wq|2eRx7Y;%eJzdirIP?HF)c%cq7R5dO$30lQzMGZ9UOiaCPWwbmY*
zKLg%hm{9JV_9oA?>zz;NTPz5E-Ph4=C2KNgKFAJl>c=-+9P~(4NRkMzv0br!Ydeo>
z{p0=@ZN68Xw>c(YG#XEBa0bEzbpVks^%X|0yCV0(C$CWbyqwj1W;U1!2oYS-Y559r
zBhCmjZf4XB7u2RPl63L^2Kacq9r16|b7Z@{MCP*!Z%6LcAf4JFHSe0$&yTQ$p5Yh>
z<+QtQ^n_>D{aH7{9~~cRdva6H3>CESaBSk6anOt0B4VP3bxb49_(A^p=&3Je{fA2H
z9s|3u_{Qo%8W?j)s@9!6ktanH?%bdxKt+;LCgJ}bk2d$wWlK1b*lu-fq168+3b)I6
z_HoY>sX5&RHjWwwl8(?X(pw!SC0{ba@#z7g5n6ZeGV4{zJRdjAr#?18o4iGre%NLe
zyT*LNbgNYToSKQZuB}5fH@xMqn<w=xb~WWzQ=sY&jGqAXiB<=aErKYVcgKU1=N9}Q
zLsz;L-*2fmUd#8zCdXial>FyMv0cDFL8upK?WQ++O>~Ma*^1?X%=9<9N-n+N1&iZl
zX~i&O(kQzWRMa06LyOh3n9g%?|5cb9`>t<ObW+H$W_-m_wx|l!0-*486faheRnkbC
zm2E7eMjoAANV*oNdMeH~IW1#l%i<=us&nq7`Zi@WyQ<!fJPs9hoj;K~ZDflhK^2y0
zvkq%q8=GG-EWnK1av7%Yv(wb<xOet5JrK-y`~Ip&2;`{BDg1m5*}nQJEjo}eFerN3
z<)pEnnb#7Qkr&dS8C1#@!ts8^@d2)YSYs+Un-RP}iZkQ*@_Lmp#yy|Abc{QmEwBa3
zo?#WI3(;2jOrxlIxWumuUQsHi3q~IXg?e(N$4G!PnT&mwW%bYM8&wEJ@>JqjS)$Fq
z>4xDTmH{E&7ooO)P$Q|B#Y(Vfrjy@5x-SzQtO^SYBYnl#P9F8FME`#;fGb`p+E#me
zgl8E`2Hc7c{%S{j#dTx=K2sHkRxt~?kPaVOFE@Bxq*+k?4-1w>qZnYn+qlwruj1Z)
z2LtXG4s31QJfVBqK0lqeS_LC#N$oGb0m8a|Ii<G*;4{N1#y#fiFC+HNd+m9rg$owY
z+Pg!8z%iZCNSBEq0s7O9P3wA+YX<n9@-IITr{CY!a_X;_tbosd*hc4wsoeZShW~an
z_bWDuSSR!wajoU~1X2^d@q+)B>eYdIiJt>DR=o5kF@>!zLceogXQ)cl2?}0E8G)yo
zeUJ5}Dic+zxJ|HU=OKn>s|EqiKSUF;qEZs%=L!2HdBdvTtinQ+o5AIeyZGdD;};UI
zomp>|B}dK*hmED!7vHH$i|R?kk7z?M$p3ZC^=h^2iwmuwPZ`=w8JA(fiT)%&1r6%G
z{fn7M3~Pa&W(A{(aM5uYM4}oul3}|$jicFW5u#T&%L^OLGGI>IcJhrrmHT;KPNvl8
zcHnc^D1ds{=EVo;X8HZ|kJ9Ja-xOK!rawZNzJFQWyW1kW|AFJMzyFq6e!I$D|5!0B
z#6U2KjUDwFx38BZr#vnYu9%tL7N!Pnr`vt8t?gMK!3kXKt^T6q>3%DP+pgp>E{2`Z
zk$|saGI|OW4T5{)ZK=ZBF7+qIm<-POW!c%4Vo*U~eb`4gxhwHXEGH;%fq_7XbIgaf
z;ZZ_tMTknaIQDPGL@aorSO{H1os2@~e^0Q#ksYj1QZqJgj=-V5bCnI-iFgLdL-8|7
z(7gz5pak*g*Ey9LRZ%vz42vq0RORZFCs_xPaLSH=zN-{p>{k^eW@!pe*Ykr#>k`M`
zmGY3L;i}w0k&+m#X|z3pdah+}zxRN08GyIbSLuVlBKI!&gP3jSLGSe6?rgO7x@n5?
zLfUGG&|yjh-GN_RtvQ67%WZdi3^bPyTz*gya6?@B3#~hY>-kYOwf1QO1K`LiKEsj9
zwlS!Qgi=N$_5Ja={-U-sr62(ZXjolmOUlnsE&ey33HrZ-hAz88f2OzAKTZoycNzK%
zOj_D+_i}v(Px1K&+Dxc0^!k*MpwOfaJNy0#80J;AGn)>X1({1sG;+MV<6x7X`}qft
zUnybCQ~H1u1wxV{^W=!63IzpI1Bt~`OEf2^8q$=Moy5lV7c)Awa1&Cv*BBfrGv}bq
zUIa+>EU)XFkgTS8tYkQ7v_{TA2Yf)N)W7c7zV3m2U99@Eq0gP?{CE7Nu8YQ}OsK52
z^oEZj-KQ1g541vl;66Fp{%7fR`cZrRAJ->O`>=ZxEsvP$@u?5ag<!10B>!jhWy9-%
z6z;LWpF;iYm4`QQJKu{}?Vf9k5)5EN*b;Y;1Axo;uk!QzwFZg~EY%2(cj}4X5hu91
z^2}|SC{xF7pMdY^V)?(F|G3_9?cp`9djsAf-+za<T`%<v0+lkS%O3v-c!O7;9PcT_
zS@8dS+h)U#Gr3h*%wjRGg%$&mHY`|fC|ux0g=J++T$|;a642u$8}Q1{MhkTreZb<o
zcTt{|r`t{FZhq>3bGqgUnk$)~ji&$7KwMn-xKhm=g<ln?H6nAc<WsebTMQQ8HzuHA
ze%7G+1wD}fK0SJ&CQua;k_cG<sS?$*TL(||E+AJYb{m^&G`u6PL#2B5n<=S~r*NuW
zkAYswAC7iL$27Jti#duFp>DBAfr7f46anf)t0fPy_Jai3?}%yESa{;?fMEHWB7WnO
zqsH&-Qwk{4KOaKoxc4yuM>8Kes)hM+rDxo2wroo8`xAN}VZe0Pvf{63b7u)D);S!7
z7Hd*n&d&()SA#O<naRS9>AHhVVwb7Z%ft0=o3N@V5W#<R+p_+0eZUDl$19<wyl8h=
zkPrPd$om{>9c6E-yNRh(&YPA~dQVXBc*kApzKy2W@`h3pxKl9jeq;Rc78?L0ZW97q
zPv9E4*E&-HlMZRGkRsYb%)=(tR80-1F6sRy)lpr{;TT!<9c=&Cv7kemlSII>!i&=_
zoRf%EB5Y1pjzQ)#$IdAN12d2y^gQ7(v$YhT0#=Ga&0x3t-^BDmO7p}S0pH_-NP4k{
zuRX`2i3#?ZN}aSRm7Q`{t|di|rB$gdH#e9i+9oztk&p>-yJ;jC3ub=eTD*@|PNyG%
z+fLt6H>ltH)$meZrll}SZ*ly9W)D&>U!d33^e4pGV!t_d-^t|67V=gPHe)x6Ea}wf
z%)zNsnyfGr0WVnIQH+VJ7|@&+`VQ^1T2f<$9AkS>Hi-gi5^L0DW61*s5lxd;r)m-u
zSf@n+19rvVGz*anZe`o2mJ%h=G>8dV#WwO!z%xOL%ONhWmNHeg-ewFfE!P$8ME$=*
zr@b&optpDg8MNraQOru-vp?bD*C}(p4Np;Sv2f;HL~Ni7h>Hxn1TNe-(f;4>6P5CT
z(#W(%1vE1*#e4?W5%(a*^dGZ7qNL$$KpMnqATbZ`R3gX)Y+{YAskI!|c=uunB?Cuk
zC^4jM`>a~y3%c?-pwOY7D(KTUo{aOnJR##hj~!@`FtpoFeSdg93e~;MPp}vV9$9i9
zMR@=|5&pN|LZtMf-F~Q=J@1V<-VgT&1jqGp7R|#MD5MZ@W6|+%_`JttOC3=U$&0s#
z$Zz=3ih}3|r89+p|E8u}3{bWvOmW`K021lx^-qqFpxc^vk~1c6_?0jXdG+P{<L;61
z#N|>^Vi98yQ&HwDavwwYM?=mKO-T?KkI#OSs?Hh~DAOF%zX^h7<nfS?or@MzbEjZe
zp}<v3u_8@CPerbma{q&=p65Fcd7%t3s%1-8IkH*)&?iO_9T8K&oZ}mb^<7lfR|tM^
zSbjWFdg=#VPPs=D5jzz~$gO)ERP`EHrdc`U0u~%Itbm2Mof3w=dVoEZvVn~iF+9u;
z|K<^6t(OTo(y57+)>0^JulM~Hwit$+hS2Vx3>0I1dk$kt&N#BacttQM_+#phao=^I
zX{L|5DmNt<x5lTtUumO)Nfn|OuT~9T<rIZJEw~s=c+MJH6UR7juQpn*2jhFhfz7tT
z{o&`pYNO#10lKLE4+8>wGDm0cvptVuIjO7LXsxDQ(H|eV0mfrdF4E>V$Gw<J*s2XX
zyM~lo^L=HP0b$r5N7*GVMCAr=j8k1tnqy|!xj9W?#Zt&5CE;?hr6)pNzuz8>re~!s
z<XmyKaY&```0lrOp`&!($;t5H5m4tAs|C(Tfg4mF9%#d_TTGg?NW2NT8c;|ua<1dN
zAz(FPFvdy27FuuyS9-q}dD^z@OU9xs$O&>?-A2fpzBo63?EazJO_8f=g|AA3WlMA*
z6K^lodUY4Av|EXRC8xnr_n$T9H>?_p>~Tepfsv5rG*#LX_4%iY7fEXPkbX26T{UDh
zGESq|&zHvU>Z};bN$zc=GhimWccH@t#h8X&>-kl%Oz!8b<Sn@<Nh7<?NZ(_$b%SdC
zH~qp2VI$5OIs3!643D~^LBJiN)t;xrNAg0?Y9hOtLe>ON1WO!eiJkCeV_f>ISd3wd
z^*`~*f4Zh_&>?IdkUO1J@k_M5l1y*9!WSe?XsE5<!zlUgRl=l^rnffUf(MK#wveta
zk!HC}p|6B-WIwI9RIj_Pl@q?2GECgW`rf_ZKc!~>Alq2v%)_sQ2mByq^|QMctTadv
zIQGTQpefW>zM<-BpBlD^BFE=4ED!~l^ONH$M~hV7M;E|^R>8JbK@o{U(t>R%H{^U*
zYxptMvfTT=ql58TM*Zd85Gk-9oqiMw`o~5;&R{&0gsJAwu!U~MMjLXMsJeYGC)+IH
z8H6iN%be1W(t|V8;l{M~`g(E)CSNH$f=c2x{A)OIodXty4db1Ih#TCGS+-We9N+6=
z?E?-JLL`xyUPP<NdQ(;KZD?a^je`aaN}>UgLGNkx(fPyHKiZ@)-L89~bO;8#zwS5!
z<G|(Nduzi^RI3`@L-HgC&oeOw>;Rc4jKS@p(B7%xJG+QANHO?S*B#bW$J@ogs(E$a
zF-WnPs6O7563x&{^yXkk^rpCVLAW|+JVq2VJ?beKSf5wzGs9QpFqMzs2s?9d1JgbW
zj5D{q@-bYFV;SD4B@}FjBLf@6J32Z2xtPW5+Q3?uC}>Kbl2a%fj#qsjBc(BNyXm=j
zV>0+j{oj1%D9?a~-hV$JA~+d+S(xu@F^Oy;BA^ISj#`-<*nZlA&*2J$_TH+(z-hUf
z0moc~l1L<I=vaNS>2s2{y)G)si)6|WA`wJmtRoGUWs9-YWb|ScdhwWkodu4gzmZiP
zkF64?K)DZMV)tyLIIQ%^YErlDn)gPJ6O2w#TqZyn!5rvEltoJDmsZAhPf_9yD9MDg
zQM6fdJu48>H=j@$J5P!sN<o7PQ0%99c&s@()JYJcXoLo|HM@Xz0w4S#QQ{N3Vm?b{
zx@lxlm_@@kszt*)Dk$iVv3)$kF_aV-aBAx5jyL-ppM&a@sJJ%6<{h2IVbthr1_{>j
zj09!u@VrF#B`uc+w<>`n?<nw%rxU$>d1bPBN{G6L^(B|nXNTTX5L5P}o|h$_&xK3L
zvjq9>RFAjocvMmG<{p~X4RMMjPQ`zr99=9FixShqnF%ezC?Re~<LFr+z%Y>6<>v3Z
zhle>}h#a7qNta78b^G}EthBpOi=ZTonQ3*n(gKqt9MfwnD`P*wp=}sLfT0b5O)xwx
z2^R{c+hRuw0|rdhkIa>#Glh8_&yXS!3;qGd+Q*J=zXwcMG}0S(`}QB@G5YSn8}{N1
zbZ*`=0@MkMP9ESmfsqV~*cKN2(S!FQaqKDifzFLc+?1j^roSo_Bt=&p2GoQ<4@oUh
zj}h=#4&74F8g4KB(k2TLHg7Bm9Z~Dd+g<wSE47l4EjK`|8Ia}Cb*k?Y1iUAhwlm%A
zlM_LJkTJQ}amG~HZMq*?SS_I&{0pmyNf;*>R|+&XgL$pd5*c8V71!5aFFRNSF=NU9
z3g4QbP&2Zo-}@7!kL^BK?)(Z18mu-W6}JM3*`ryLeHY<p;%-$kIjDY)E~eqzO736D
z?;sx-vqx%+ZW60|zD9$&p5P=Nm=Yl^!-2WT3m$T9vX#7?S{asgQEGg^6DT)gs@Vcs
z-HTGM=MW`VI;YomZkB?AV{84#0#FyK*{l=Es&MnXEo}+N9?j;B9-EzL<yj6KP`li$
z4ypYJi3gt_j398<Q8&}Cpt3n-A26W}rNrJP79mqnUHH+{t3MiYaCRap&1^JY0cHUY
z-Qg!Yl+eT_42)*Ay^wGcD7pn)@aODWm6I07IW7}W9y84u-Ix+{!?uyqk@3Z6sHrg5
znpwf~brzgiQLm^H->&H<@<O1ZnIuRSln%{1fzwF&FJ#c$r-0e64@?akRFZSy%kCW4
ziP*|w4D8ljtm3%hkc0qaxfG~Z9G+P%0w+=w&v9xR1O%7DI$@bdM84YDcXg?IY~DCo
zrxOG}*Lq>@T|7x-q{d8KLZnZk6dS=#ng`0L7*_}>9Gev9SqMfy?WW>Im}#$xQ|1s{
z3gLd`M)MI+xZvO>&Lb;pYuwum(=4)GZ_~Y0j)Qr1Q;EQg`o{`CX0cFdI#r_;&(7Ek
zNwy}QjxxEuU1NKkQtQMTDkL=K<tLA(Xf(7uIaIC~X7poGP&GQzzqlor3esg1Au=I4
z@S$U7;>xsFF6Ll^<0!V0`(f2yir9u{{Qpv;F&eOJ-(fk)`szgo2fX!&T*$I)Pyh5B
zc-U1v!(ivq&>a*OVmu(}SDN@Zg}f0k=BtJb^{M;_QIxzVM=9xI@6f`bq>lX;&x$wV
z>>POdLy}|P%rypt|G>0g6@!%36|sq^#g|zJ8Q*y!EZ-+2Zk~DZ1d`)jP(uCo+rNBi
zXxITw8c^r`QPl3Y111;1wBYmwFkbks=>(uGZo40UBX1FBnHhNQKz0MR%A4Ppe(?Z)
zG3|@70)RQf9=nbN{^SewoVEu54B?ylc)u7uMlN|6uRJNL<UX8%{Ew4-=RQbmM*wnh
z3s8u1{9YfN0M5+q*CPQ1k%Q>t4WM?o0_aAW0Fo(Oj<LO@%MVyYd=J_3cfNcF&tJTv
za+n06w*euDZ3PE!HK6M4X)cC>N}@Oh+YCPvaCgMF^1E8xrEz~(Zo2Q5s^yoHM5Sb#
z>`;$(^d0}39hTvXv=Sn02JUQVF7u5Vk1B3(IYl?LXbcr-2uetB@cGMw{*zqVEatzU
zgPEcNWNx)QImU&+9P+euW!;vp`H5};Xe+z_KA69AijC{Ydm!t`it?>@cEm`l;Ke^1
z!iWG<jXga*j3Pq0%vz#@$3B^ZU=v0##mC6lpsdf>w;>cFyArNBjcs{ivsz~2=7Pl)
z!heq9Of?B@q=K}VB@qL@$A~;W0-mxLLAq2)Z?qyVQFN34kaBPq<}Ey$W}9hIDljU-
z`14Y5Y-LK-MvMq5LQS)4|GkbP<yi_@V_;3-Y945JwM3fRVlA<Y!b7Nz!W!zV%{GIC
zK$1&cS?u|WtI9$Ji{x!ytEX&AkW>|Dw*o^8&N|sTw!}HAGkHH_j^>`rf{iK0T}9y1
z!F5v9{thm=o|O8xH2>V3@?U)euhQu@(-RMj#-cB5rpi@J9y0@pE~obdV;Qr`Ba~w8
z>LNGFTar0#La-4gvnZ)MJ@@)G6*`*T4*63<8ww6fpQiWCo!fU_B4U{M6kn*>E$+HG
zud2ZP;>4zoGtVR5kCAX1*Fv;A-YxbN%_;}z0q9LR6UMQhQ6YO_CX=dLTmZS|Z>m4W
zjr(B00RQ)K2{d{1<im633|5p^)pQ2PgGY=bqW|nW!A=dbY}0qJYHSW9lKuJpOb0Bk
z0xE)%>;PP^M9Q^>`#r5fK9)n=bB$&wI-)BhH+IT}=e-)tR(~=lLOpWizePM#JrOsM
z6WWK*Hx;C<PmmAH^v3g;d-kT5I9ynunX^pQNM1FbMH1+lO!i_>$9t0$<`z}O5>ZDX
zVPWCAQFTAwVBzi639UEAsNmvBu1Y{VX}=R?{LjupLr@eZB&E2hH57%7m5Z2-&Pzeg
zZP#u29~Mg<9!c}!E4fGGOGfDOdQx(8bLiNpjA^WHhhzn@EyD={xvG%_IRE@T`S1}2
zDC_sa+)~xZN8sLE2WVmI&Jzr8?^~Y&0IRh18TiEyV5jzNeOzpT&1`K=GkxALH3A-%
z&DYJx8>xUhsjt8By-CjB2WVA?Us8};NueS@-{J%~y+Fid+f$MN7FIU^CUU$xR{0{H
z0^ztF@X59VHcHQhN`s+t!FyUk0Nd#TAc6M0h`xL&(8xsl@0Ydd<zP`mWZ>Z70MjqM
z6%hg93Sg~fIQZ`2`(6bT?;|>R4zLKdRtzUm$|Xt21wn|zXGmL}k_dw!JFH6W9IH(6
zD>>~9BLan|Dk_9%t%hPOXi-}4R(sQ1A0g?#3J%7fH|e8?AcVA3Y$T|VU=<juUec6_
zvLxcxPl}0!GSHMrU9b8no3GI4y3NRnU=_K6Xn%4<xrn4mtw&oDCftf`?pEYJ@21cF
z9MNyq8xsUJV79z`zK2JWk)BSc%F^CE`+m=4S7~#Fn#%a@5``u9akiMzfT)ko+J#UP
zE1eVBmphS2Y!p8xNPJd99$gTRO3cwpO0NoIx4wZ+!!LLdK~EqX%doWvgJOE|OBE^j
z^`Y^()jz!(@ytrAIf#-vU*1saTVEhm0@08<R^j9?$VDxbVdj~c5R^2V78g>=yvyPe
zah+&&wJJE>I?kA`B&u-9EGz2Xk;J9UDz>?$$r};r@_&P^g4Sx33tEUc;Dr`XjSWT;
z>}579asS!D#|q=ngi^XA{2*th#D+I5e7@p8Y@MuC3iO0?7Az?)3fr8=Q6CY#Ea<nH
z_{iI^+q{9`#dDnVYzkKhd6DH;Qih}ARD&g5Znnklz68dnrCCtyyjhfv62V>Bi4LP;
zzw`eG>9+MO57&UpObN6Qm+i+VrWML&55ohsDRxo3k{Puue<yxaRjlYp8=4FXCVyN}
z7`$8U4mMhG5|RTRB{NgK3yQM_$N6Z+=EZx5Y8Vuju60d)niExx<3xiQSeyRRg<`#Z
zjk5%16cPwfMo|qdaAOeb1Gv%nodw<nM+}8D;cxv$;#(Rry4w9HI>B}Qn7{Ln*9o^(
zjlOqzU={6#4tRW5Es|5Q`HzLs6ikk`ss=eGc2w17F9_7!&|-X+n)AlAMf~1)on@7q
z$wCuC;9(P~evwB%!>S7heJS!M2@Er;HE9uzw%;*IXwNf3E;WPk>?$zUT+T8DBCzWR
ztVLw7-rmbDOCB@u*mE5=l(tg>OBX1(HB#ON$Uf{&|J6GGhU06hzIoh39O(^4489YQ
zz=chLU@>8^;yB(Jn(8YW2&#;PsM-7}ZQ2}pN?}g5(59-1t*_fkwsx3kZ!@S=ZfO=r
z^Kh?<gF}%zVT^~TEZcs3#!NtFddMqE0`jc_f0-1*6AccWv|at)WZfJG*d}^bl-a=U
z6b-m>vrP$%e5+znv8I&6InB}@5-%a|fMtNF%x!dHPKi@9ni)p$p*}(YHK{)MqWX56
z)$kq86U7TW;CQS%K@-0&k30=h3ySdO9)3B10K%S7{_FaY=ho}y2mlE6+WrZ>+~viV
z4?O5;R7y~kF@4Xi)`!r+YeKgTi03?x+aLhA5ZCRaaf&w@__3XT0OUz2UoL{Ly+V~%
zeb5)HInbkPkMN5p$JMoi*aeV%fx-N6a;dnhrGWP(J)eu_z3y7>&%@qcko@P*mCjr4
zivRdNKWlv)HdZ@MVt-f_;G2yAtl4O%A({19IjS~E!d9nDnz6xSUzWYJF`PfFl1aZZ
zS>hrm-Y`O~<}*}W^&$$W+E5@NSNg&X{_C{ue0=+8`!2z?frwbd{^T!6^o~+_x+nnP
zs`RIha;A?)gbo22H{lm$89*0{b<P{C*rfruv$+#?0KH}K+q-#UV`Y9`oCc%ci2x4|
zuZ}(%us8ve%Jufy(iC5iLx4&nUaSHL8u=i~-1VfKoSed!&OMV4yoe&jgT%>1*zro+
z+S>l6n2|xpykT0@FekHw$#cSEiIe|!jM`gvH_^PDquqP;%w(VO(xDl-jp%wnt7eO%
z+@KD<47rN!W9yS%n$GImj{?Dqv3G9ASk>du!qT7#3GJ&FQwLdFSN<UmyH-2=c*3E6
z^sUvNi{^M?=yqpOBL`Vyj@^#gY0K_Xe0}!8r|d_3<pw@>>PgB-Lr+!luVsrFk@rZ+
zwM#Jr*0C0@EizpUYE>Oc?MV>3k>Tl5i@G(#zpd{|t&kOYt<b`Pli0b$^UbD>tFhwe
zJMM2fVsQ%Q5-baT_+H4wtPc<T&96Xd(7Nu$G6JKSxuFbW+SU-=;`O+8U3yU-OX$q*
z62gj&ef+^MO0&AydXxk&{kPc7oSck-6N`DW#kG?R6W&XFvFrQm(~l7nhouYl<qC@y
z1fiG)15~<kULL_isWIGuSDLloquYC4wDLb52J&D~NySr?89~roBQMs&KlpqrtkKea
z?+|Nnya@NAsa$RK{cFhQ&rw9!1>2ry?Ka*b89j?3{_03syMkXQN*Eor^dM{Kb0r5I
zEdAZM^Y+<!^?9i+WnXeAsaoO{4HlrMe$2~LGEocCTBc4>nx0cARF1g0aRyZC<X!z&
z9MNg=5tE2ee1BI@oT)B$${U_$r&=}G5$c&U*h5_aKT5lHl79L6je_8U<YTMm;;l>>
z>#{r4#Q1^nCj1D=mZ#Y}y<)aO_>=|n0`=K*j{+0g%b>aVq_iYv{L9@pAz^MLEq7%H
zh6wcxQQnB{94ltNv9m<vBv%fdh4CejlQ3hB-&*Z0xI)>8H9HSmh0yuY1>5b$;`S_O
zoY1u54F*_i@ZGg`{>m~t+wv$zt7CtLr-RJ3L&%*(f0VpCl}`3BjP${|OU?~bU-)SG
zJI2q&p4VLLOPRF_h(oRwzMt8_mmz#JUkJl0LqJAF{CiE1YS?OCZ?2mb)+Bd1RRi6z
zz}%y}SInF}IE`&I-9sfUmm||^f(jScY`}@Lf8Zsez&!APWP)I@IPu%B2Ax2RGaK<{
zYj3yw?^BH&Ml|coToSzAe!8`GD^Vp4Q4?+MPpuSbCl*pJt(ND-&gHk};!zwmn7}5x
zb!j;@u+FxKI(yT0Mkx3>+6eI=SKaDrt)U^a)*s+pA%wBJo`5b(L^gZ<wyYGA9)7O_
z;F|wiX?gE*<)E{PcMx)KNVx8uE(co`B&W%ifn<XnHFqR=(m(nO#?unJx14LqKRVqu
z3fXqkGxFlGd2nHBnbh?Gh4j3-bj;^0&OFm#KS{I(7e{G-S$X@8-rkXfB`kQX(WVGv
z(TfH4!+!OC=R(NhUg?qqNRFHbS1i?ZzDxXtCcb)EuBWK|oi?`hMg>jiJep^>tW>(T
z#7*@igB{1khij{bfy`E=6vL%imdVpT8FQ@|&vmsEMo&3}>YeV+znPLQNpp6PWTk-a
zyiue1B4I(2JULTNFI&6ItUWz&wrgf`a2knMrFZ?jJx-ksYoRf8VIf<M*wyWeNRE^z
z&Ay6!ECPxG0|o=oA-|aQ#Z67F@mog#t{Hm!X)m71weIn9EC0)A^R;#lMPswsEi<D<
z7VJ4aUF`sjND#dxbAY?|f9y#QJdhK4y42mBaAK(KIXNasnyOborCJD3R)Mk(2@KH}
zX!i@J2?>wo`s%XMq5uS!uhkWRgYKLI^vl!A+8-Ws0EqO9&A(lmZeVh@SdBo$pFyiu
zcJ)QbG-nep{r|lHT<=8w06}!FT#0!1nj45U#_MfPUwqRqj65*=#cDM}vg!Az|3v_N
zdAytkV3cEk7!NRbTr6JiMl&_ouW^1Uc*^86`+=?SlDRN>qVyH;G5`S%+7~Z35EL8-
zAn*fdd^7+^yRivH_FCGnt`(d<27=Xs*<bITgq*uQH&D;OFEpB)pO1!U6ce7mVJsPH
zZaiY5)g6xyrv||uZYK}bW#-6shR@9CQ;I%dbZ`^o&@%(~g2ezVqZ1|4<+?0+u?J}D
zAX;mN8S8D4i)LthULg^X<ePpZxt;!ITY7jn&iNJ5O=Kd1TQv^i#c2}k1chS_LFW?s
z3O!u*Kg!$}$~MmT;U99R)QX$fEqM-?x!P{f;^CPDV!Hj-w%W|_@mCzr<y-U|QA_`p
z-IS8Y^`@jIC{kXFDhmE}GFI?O1FL+midW(TrwDLp+%tUzUklqc5#v0=u=pNA5bZ?%
z%4&a3-Q{)cefqhcpQp08VLG32A48P48$k;WaQrW%Y_qm&$VMSA_i&jqc9k6UtX@l(
z(<J!u`kaDraZ|;H8^V~`hBX@(Z?(ElrMu_bGPV9<oh!;7H<_cp<?vIB4+Jt@KWsC(
zEN95tRPvQY-}>ddHiRZ%{*fPhnf=}Dbb}ANg&bAzk2c@#-}L^2dE<mw3%WDT2DW$g
zH$>i~&Q3;jpl!>;s%B*Y`M+c+Oew-7>u7aex%C!j@^yD|%Ag}LRxxGx!634HQ6`;m
z7X?Rb2RBw1Uk^*-N7~>pdhQ<)Rw@V+W3e=H+RH82cW#**uaG)vp=TY-_PpQd;iF7Y
zqFpzJt82T4(t4^%*_~)4!sAA0fAk>x7}RCjktC0A`v__a$+>jkPArG?6FOX|u26i#
z6YQ1Mx~nCaDa<p{Y|yq+un1m-@FWDw>r7GtPm`tqoKwSY`g8tSk9b>@_3fFbyEfw!
zLAWAgcHd&hB<;@o1qxuFpGt|=G`ME7PWg@QBo4fce9<A8b^dN=>gmUmq^e>OZ@K!d
zUnwv`YG{&wL)3TYJwL9yUg&D~{g!>ECO*)&u78r#H<KmH))ausHvG8L4M_~LKj#Y}
zH7-hqyd^w5&<`<v3C!Px#_*)Yn*Y4a)SJw|*vcPsqibI1uwc+lV;CTs$@k=s!;eq?
z@q_B(xg@r4+)eb~KaPxRnvz~R1O!%B?`?tiUpoJE>vB-PsmT*y^&=h`EV-8w&2!;>
z+-XD=Hvh)e>7@3Pqq5TS7Z%&mJ3jiMF3(7Up|@bcCKE#oCQtfXoB3=2R6gN_CSy(G
z7y?<90$R+Sb%kCK>tr%p`@S=L6oD`XV7I!P%P@QM;|)bhUMI;2cJ1n#O>;w5)<<4t
zaj+A!#uZ8tD9YpuX1IpUfu^#H_g~C0EDAiqoTW363;5xg+b$&%&LFF)hrFh@$|z(8
z-p|rKZHpyI5sU15e2Q7jGdGAqsWqGvCaF>FP@sjRb;+Dv8<M^qQD#_$9EyPB-sL)6
z_O-m{_`O_ffC|C;CmA6(yYT&3I}*6!wcx*MIMIWy#+(ml*;gfn1dWK*&7xtHRh#>!
z==6ix(|JJh6i|qqcl$XeTUXUZ>uQV0II*Pdp}>a%qXDrJ^`?E@#-Ri)jQ)h<6Losn
z)R{qZX!1q4$+9BaxOQt$*EoV$3=Tc96*W|N(Wou-^4hd3i1@)x1?C4KYS!{j1U*GX
z%+kNRG?{eDh!o|rU%VKxe*bLNMizQ~hO%9=v9qJdo>Wy)slUZOjN=+UdBSX2DEG)I
zrh3?k{L~$a09F3GHhj9zZpm8b9zD(GK#|GW61M73cqwF48J)@65jBEcUD~vKzS#VN
z9fVmDY$sF7<8Q<8{)&<OL4m(c<MHAwl&%o8jg!l#Y2J{zoq@;LKlozQSf;<-Ed?p8
zw{!qNe!*MpU50TjF@n%KZ3n^-LmzFw5z~x9lL4q~@LoVr_QgDx0!n@rm6dydhz;6k
zra)xF;OL8}4j`z%oZbUrkUXrneaT-`XMicr3^2Pbrt>BNg!o7-*XHP#APtaxwCd(x
zWLoC!B*`&?8cpT!01>3^N{=_!;^Xc43waEZ<2om^4uFgQN9mU1zG@Bfwhb+0^_chN
zXgP7s0)W+-UmW1j5ojcWQ6NqE1BlhU4pKEU06}}Ed>9bF9H)4WFczKG>{~&vfJo^A
zSZ&=`{{d2(44@*46KER86ZZL>{|Wj(#PXhxv!0ZERV2M(45abyqsE$2$gM}2Qr)qV
zpzOS<ww7>9%9W1h$eSD0xeE5kLwa-bKFF?1FZS}eK13D&IvX|g`)L6sTM66~hWrGw
zrImEq#!PxlJM&`XlcGC=Oc@k6Rrc1BtKSquLr%S91gxGKSxTp;G8A~$KNL}%?Z}Ly
z<v9(kDy<b7touY4yWZ$BWzcC#Q)4c7${UrD*POFUIp(9bUT@|7j3dHWfido6#nqIw
zl%{~ir;<4avpE^xo?937@kbfTWB|z4QV+RE9kl+y*ASRy&z-4{?R4jtZ^pV(jT+c;
zfDUi|Yi%_U|5T2BRgl?!1#9mW12vYCFE_|m8;gZR)j)DLw$<}!cEp<J1*XgV67mFx
zvn8{6AGfM0{^G(QQ6?Ti8?F6XMUZOmV)4fQ*|YCY3THedxOdjIhx_+6F?P;=hmSRj
zs{$x0#U@;4lH&woE&V^%BM67V%>kxfJ(3!=Ow>trkJ<z!q$W6=b#Vh!^S6(Ngi{6G
z5>4|<jXzune~gl1MH_3QcnF;0wKlk^2~^|8oACx;imf8T<sNlP_?lYT+YL=v01gZX
zTei$acJm>|#%|sOKB3I1=tNd1`zQFs#WK<QyBeI4vcjq`<jN6d*BcncH)jS#>etrv
z$-3%pq9ymBdGzb2a#_uVLP8wXOpLVVqZ3&jtFlC>{&utDO!+qaOLHsEZMSk7JN&#$
zk3VE4y~^<p4I=KPj776~EPRUR;6W<K43TMi>Xg#a-_2y*H!c&#MfKr!3%3uOmO)61
z+_M;B-P(`_O;qSf&~9WnxMt^fRpu(O)693Y!s=4Z&K4!3qD(dvaWj}8;sJSCSzS(n
z>G(MJV2qgBw+!|{chk<7ZY8?Z@z@W<G&U&KOp-dIoT;q|uK<CQb_ZpT%8AiyQ(pe9
ztT1sjZ(K1sHF!m+mKQgA`xmwk-e{F(Xf>U=?qViW%M}!DxxH7|{z4dVXoT}c%TWuz
zq_M?y4(~L(#Q`L@0DN&WQ~<cD)$U9QRQ)N5;`ej~C+i{bv$#;E4#R)wRw>r*=1yK3
z**iYz3@yQgib90e+<FZ^rYAgDPU8S%`6yQ^h2_V>3fu;5UtgUVY}W#l(QP-Lw%%KT
z2Hd*Nw)X~L`K{2S{x{Y7-uv<U>1B&>aN|}}d23#l{*(@44t5JiCrSH7L!9Sxe}|pg
zFH86DlRvY`LQC~_M}0FU)0y!by;;!Ny&b0*Z|-KN^)bEJ{MgL=x+v$FiiXagMf=z`
zMP-*M95K2DQQqA3JWE39cQ4p%#(ahM=Jc7@Jr0!ES$-27yAOyKcIc&+8vOZ?mDv1?
zG<h)9g!Ou;h+H}Jg~Q*&ap#koWlPSNx0O7;`e}WB_M$c4BXV%IW|k4;G|jA<qT1i?
zex{^`cm$B+I+I7cSXbR}2*2qm584I9UcR!oHtjyM0FFtH)UjM>Rx>uFZcFLIQ&F!x
zB^O(`<kx>}xX4>vW~|m#3BkWh<L;WbOx`)Rs5;yk63yU}WoNI}Z%d#TlHB!8YP)*N
zD$rqTH>78iTCJ-7Gx_U4WMq2@m-POMs@({?wU%c9PjWM+(NA1k>VItEEySHae3C4U
z{HyE*+oPd=-v^^sPvN@rvhida6~Wg5v&?~VGP@PUYg?ryB^AxVy3q6Evu-DWMZM>l
z)BjTS^fBD^Xn4Oo@A8Vf{e`i!zSvtvnhWZPa&7pDudEYJwGq?CA4Lxv>d~=pY9mV?
z^6_W|!86Qt(Q*^KR&@nFE=w>YQ|kM62dU21%QxF@5As;-KSifwmNG=@(SGN%>VSu1
z$9I($WNYT`rc#TzmQr^kT*-n+Y*%+_ca4)WI$hc0O_S#1m|QgAHNoDH@8{;{7MKol
zu5d?5$oSe1<B8@isv8-?YYy8Az6YyuL+{j>v!o2($W3G+k@qb^k%b+@vT19PalgIS
zr|L1n8aaiXcE7%s;6C(hPWxRT!&!NbMzoV}6^7%7=F~<sdnfdW6CSPSw$MQ&u2#A#
ze$M9J-9}l!<W&6ier>U*F)(pyNqeS0H7130diMb&5Q>5}DBl*@ua{Eq2m2KrW66ab
z<1?f!GFku2sg<v$>Y6H|&Aw=`$O|P75q1u~yb|y=m9}K{H1JOgxO%nq0XtCvMVV*-
z#HQ4Fo?oIaM!>cuK=;7wv?~tsWj&v-P^+)n4k9DQ8QZ_n0xSl7fW5S52QaEp^#H!{
zFLjtFpy$A4HG=_spz^I!(xB^rCLKi2b0;*EGoDx|52$~|eko6ZWA~i=kGt|OtpK3q
z2qOjp=LKAr$H<p{<ZwD4x%bQ7`b*`aJm|~H_7!daWN}1)1ydkh{3?xnS=7GlUM&7^
zPwt?}UuS)Q1tWBBPBm%Fj1=a}dCm;jJ51_v0H!_DmpTHFb$lh6Ule(lDW34gX~AqD
z;HPWR4}YmYPBnYRzg(|vPJ64><UJR+eer<iAUzmaFaz*DuzCE```2;CSktx(2Vmn{
zbk{`9dJ=>db&%rl3%i<f9O8(8G8JT|4t(uGky3tx;;o;Q9x-*Y*+Ck|UA3ch8T!0~
zeWAsSGRQ7VdPT9fn!a{eG9QZ>9^UhQ{m2TdFEiEouE8C$IP{&d_!vc04k@>mnP2?s
zE+~dX^ixyuooo><-r1>@b)@P7HO&-0tT~HDP_^zNa~rRCzJY|uhx|yP;dj^_U!&C^
zM<<q9(Dyo7pG4kEuzrsBV@<Rj{0DlnkgIE2*z}psHZ-3Rg3A2DUB|GO#cK9uixWc^
z<<28|c)C|%8iL}gs@q$&jv_9LQ*qhXhtNF>h~N79>m(U=Y0CBt{}{nOhH=m$ArhrS
z96gV?oND&=Bw4H8%orqweit<PP5DjSWWFt6yxZ)G^LjCDIa%?(U&1cTqd5E^J$%hD
z81gigG`YWA^QVwxR8aWU!8G-wguQ8sF!bONQ8f}*tgQu$i^$aDQ3gI64eD>Y1B%IO
z0dz2_9i|EzQHcHhFS%T#(^GmP&t7IH6T<hGa9n1i*GCR>d18N8g&fUIy@A!q%BzOg
z!{L6AI8(K?))dn+#gYzIDE};JoVhWohV~UJtZm6&Pll2tL;??KOjNE6P?Tt(igQP&
zXPc2AHu1+g1bWdVT$DZEA3-7*8EnLOu-D-23=`eU_ylD0`B)jRBQbgi<Y0&qJBHqJ
zwT-U-#}W3Q*@TI7Tj;~GmtcHYH3N;Xze6y^8|G_R&A~-lX@$S6rlD@s^DXy03yl`@
zf1dhFgzU{;6r1qvm<<qg9^lVQhM~B3e(E9&eRKLYJ&4qt32TbbhP0DZ7!%u(BH3r1
ztu`gR7__RKp_Q#8y@C)z7_LPj+AuV<FWIU+T*SQ@Ju`J$jYDE5mI|)|&0jZRM@GrW
z!X?96A1cibrG7Xk{X)@zIKRa4x5om-2;CW>UODRUMZxpkrt>#q1FoZfxF>}utFh^d
z$SKQ+2Tk045h-HJnKItRhaKtkt(BD(AOLE`PdM;FaU=A`^zbK8dOQi`@dKg!JyFls
ze&WJBsGmKj6_O^O5`j)e0Gr(WSAUAG-q%rA-+M^bW#ldQLdG6DCP)D19A+CG>5&TK
z#6Qbq`WEOY5hqK$sPOcw%eI$YSN(s~y@xxO{rd(^3Lzv3SxFMg%-$r)rb2}5?7c^}
z>>1fRk-hg`86lhOkiD~i*Zq8+=leXrf8ci<KS$5ep~w5}ec$)%bzSFmp6B(tKFqax
z+m4Qk#WTyaJY?_Dx%QJoG+*Ls*w&lG>{3BCbipZ+(L%rW)ENh_x7X=KTcv13J1#eN
z(oba2kgf0Kly#wN;+^r7-Cn8L?EAxYuhu^bFPZnGE3(ifd*xM=RetX5{QkAdM*b7w
z_T{BEd@OD<z7kGf*$r7BEA3vHYAf5s{9L><_XO7zGq;F!U@-ITem0G&%2k<2^JiCe
zUqqz`1T3q+se83*MD&!+IA=mKHQO6?$n)q=^M1>mh2-akqGY?PTw~N-U1b*=(iSYk
zvF-9dvhZZ6-)|l%&ocME%ETC{cE5=`kGmW)Z+mgP>ft&~qZqxyOwVJazoIz)(cIv~
z;>WeIc>5p1IN1*uD@TTlx0rf_pVpV2(cBF2A!d7r(oXs2xy7lX8ip!|H(7dg<F$5k
z5f1NuI-=2FX?gj03sEQe7b=qM1dT=N^bC?DY>d(l`Iu2rwK2@8QLZjsTP{f}7^0gW
z6mY+ID~+cW*<I~7AZ)j>aC{Yd^vjzNFSnJjy)!ymK}}V1-YCCtvDWU{5Ki)PjTu4C
z*Vf-JM2hT4?x7jYc-~CsFr}7P6**qGdlm1?y>UINS4FlchO~o5LT(CKuHJGlDC3I=
z!k5&AETpg5a*2GAHIqDf5y7`eELC6E!r(Y)Im`F~{aTYumVv<LCCQWhyS&<1Vrh{t
z4###)P8=+>zc1<dMcZ=y<j!!hpsu#~s%Oc@$9UTrbBrTzf@4aaIe?xtKzot8JJ^IJ
zwJhfM7fekS*NT*)qR80?4V+5*k91#KeF&$<te(ifmTOPAw3SV`zY*lJg*o}baxyiw
zDeUo$pm47F&6OAG(sMzahRS7uEq+=`304%d0xvXGyu@MJsH)`Ws!yResb9YQ!0$+7
znna(QO5J^Qi{Lgh3MWx>aI0TblvW67@T!;JE2Wvh3xcq&pI#Gc7bQxEcU(qO_UQBJ
z;wOqtV=@POY2LhkPFIA{*e2T<L)CGauxQx&-hx*}^071PHr3Xe7diSNMc`^hLY)AS
zMTO<C!5hak&iuMke?hnSrUh9G^g!Lui~*Ki-Sn9u`5QauXvtg~mQ#XcO-5!Wb3t~T
zgWn_9UP_msCEX#lh-J@YVxv{M5y17s#80?{{&V%<1ImdHH(2gza*w7k_e;bZ@C2=j
zU!9;xFC*~3sa;NDY{q|ax~m5hfMA*>>4*&^?~bKi5<BR5f|3_z7-6+rz;va*X5GQ`
z)8yRT2<rE|g|_Re?~FWeCkX<@3iGK7VB$+&-SNBMzkgT204D5WI+zx!w(Ly#_W&t`
zm4!h=Ox^fnQ7>{{LKCefQg8!7Jbef6?Q_!>J37KSWSA4oDP;y90Oh@V_lzgXf`0#g
z2Pyk_VH?KqSi*>xGN(0A5q02`dH0*N5feR9>wxJ;U43rnRx;d+gaa#@KRO<kPc|WA
zkpNJ2E_6l|G6tjt^L)Uduuo=wo>5nq=yty3rgyZnh>T_PrfLH-U@V^l>xjA?dHGLA
zhWd-MO=K2V#cEzP*U>`6v#i02yl)6Az}Cmt>~RGfCIn$c>&nAu!RJi1Ee-F{NO-L2
za$bIY3#@=t4D)-KI*N=?_qlePn8SE9OVWY%#D~QPguq!Y+@YZ%di#1$CHhw2!NsJd
zAGSUz)TDVq`<y4@4gv1Gu17j;O6U`sfwA}eelMNsLbR7YVBYqOSQYBh;ScY`(HiN^
z7{kQUJoeX<>S$CZN*`1X!+gLi%JI<${6Q1fn{0KK66B|}PA5AIbEFee*4Je+Ynwj9
zugLOV^}#~DonOfvLraMz%<6p;H^RpGlVR&4>knKl@mzWHuC6D_=T;=+3CwqvZyB3h
z6UX)zA(}OB-|T(Pj`5^Y?UurdIu0YNvKD8B!(^+0uHdT{2D-O(0llg>(w`UbKJ(Od
z7t{U_YIGgf>_VW7>34+5yGxv8&e*XyVofnpnY-yTFU^ZrflS-&-N<za>S@dknH);<
zuTl<Uw!P)OXtyJiZrF5xs`E;E(0aRhu6anJ-r7llO^z)4JHOU7nScO&eo@-h&0eU>
zdW$?{eB~w%NbveNdDrwC{Y3i?4W%7SqRY)iIZWvY$@2M<j8jR}?%VHLld64`V^<vw
zah?wCvc9n5<q+eEGZ_*e-Zki)qFJ1{X`wzTXOK^(4QI-;>ldsiEAlUFxNen|vILD}
z7TD0eofQ|Yl=;|u^oOR#v5D4a)ytxag#BP3h*)s&nMOvC$y0-})b#2<XDvY?Mz6gr
z?@lR@pm>@b{fQo-J&N)+VKi<ZJ2GQg$2h963`A=yD^O=x)+t`&Xpc$mjhy~f6C-%>
zr;hS+NY#-`W|1a|r>+BW&YAVC+qe5FG;uftHcXw9j<jtj*p*j|nIpcJ;h5fU=gsFP
zv0iv0;x#Gf+ZVd+G3^}`Qs{Yon24wit5+p>eP)e0cUrS!A@j)kz<Fd{79;IDwujn<
z%&mmW>L#9LPh2L-TFcg&Z2Xp-Dm}ixoORxeqpH0nz$J4yarPv1Us{XZ(Ql?A!V)Kz
zLoh$I5c6op*7EC8)+e4!!?CcNO<dRGr!WQfy{uk9-o&>sS)1G**u9or@|2FV^MS_R
zP)P}T=`Au04D4xKbS8(oMvHiLT#qq+VgB$L#}VA$*B5%1+JubgueIS@t~KrFCCSW}
zmWVK7Wm9dvgEf}jlrmW4*)CFTp<eOT=JlhSN>0(b*YR2{63nwk>Q=fB_Ou*)G;?LR
zN?e>a3(55ePF&?)rmS-h;;)@#D-n%c({r+K!^a)dDH0aeXfJ%G&o?PoV8Y*be~m|{
zHd)P??$yPej?ar2k2zBNsXi|p;Rk7E4G_1tapBS^;$L0b8PlaAZI#4mO$oe>Rfjjy
zYB{A=i@K$%x>c;==T#W~K&s=Ko=4U|i~-F@?S4-Q8hf{PpGkhweu=GFt9|i~uh^Vk
z)wey-4z4m?jTOrpx!m@_{YLBW4gNwyx1dnjd*x@Y{R??Pp1TjDy*F>GMi?5mH?a!4
zJRB+YtWrf6sXU_@oy0ZWaQ!Zuu@(Lx*I>(qc%tot*VV;fR9AZMkxyRATT@GJ0hy&Y
z8OYEXrFepF2E1h@VVu^)ny$U^<4gj5Ut^0eaOujl6@8HIM5B@AN!WT7*U{PMN^Y)~
z3zuwx($O!r_t*3DvS^ycJ?u-uh^RhCv;Cyrw#)x%K)TYAx3N@W)F1!a6FUdDgf4Qk
z@r`FbR2?_3+FiUEL|G{bSz6VJ)gNm_NjdHNVIQm3wVgoOd`<X_Oxt5sJ86=;r_93G
z@-Ta1u=lV-D-51oK&w5BQj6s<88^icdiU<#A7sKJGJy<u+3DFM=kqGgpZ1<G3e5rt
zU1pdChbU}aQ4HJ%quMHkZ?AxjWb~1dI}Eymd65{%1jps6YCGSN1x1I4P{-Xit$43-
zI(N(Y&#W&pQ4$&N298g!Z;z@2{cqgg*!$%Kli$=FS2Flu@ZhBLZjaO_c7J4ABIbYw
zSliAhQIPT84yW7gFt8Oya{}qAOsATd^d&lJyedtO?#G625@l(0j~=9fnuBp86lBaK
z_*0XNIRbQ5YW4=6>lCY$nM@3Cob7PBUcg)SfNI%mn3d3m4D?0@aWD6$al_otLxbp$
zj`L>1omKI>xc#SeN9sT8FWZS8^{$ZTPfvw<zMec^FdgHNc^20nTX`M-jzAi=;<t_O
zd1dR5KCGV-&X=66FFi^W6^e9mmRjDkT1+gWqL_AA3Q<%On~fU_;Ji|e<5;3QMt<qG
zcb5B`d+H(h<;J(m)#Z3N!}XY4iE`2nn1bvIF%<{)2l+WS<TWScR4R9Ue8>XiWTkEz
za$6gNZX?=ZyL|Kaars+r4pYPS`4`o8MGxPG)t*#}28(v;+fQk!5Is>3+}O+;rAYr>
zQ24;YHf*=zhK&9FdP>cu`Ll`5a$&}4YT}307}KvuanCKs4_IvCuW4zG_2L>`?G;Su
zi64!XpsO$I?Ndn+F|yrcv$5^Qm9(~=638-GYF8OSjh~umBO=n$$2)kyi*t}Q@zG?$
zw4A)^-E;l-vB`?mHBYMt-apcLz5WR2A=Rf7qeIPG6TLr!V)?3v@)8~Me!nN1pr%+~
zh*2CruQ5c^s-m=><hQMUR%;t4qxsFlzGU8P2-ml8S2(7O%`I&z$ze2hNBJmE<A;BN
z(Qd2>g*!R=4QnFbQWQ?Hl=-T!W!qmQ?t5ld@io~JEx#+T<>h|0Hs^4kV%k;0uf(CQ
z`1-DT?W>xIB`2Qyq^CA-%Qv6c61Qln@YeZWdHH#&w9w==VJoj`ep!e$hhbB&Si$Z@
zWyJc_;;zXBdC2<d5&u}ohYvjQs%vtX)yC7f2EF<>#&5RU(gke1xv+er!$fYzIQrGS
z^H$hmg>rRa;X?+$v5g9hwTkC^9Vik4Plk}8g|M~`m-%J5W-lZ33~Ye*zb8K`*VwbX
zQnR=MfCHrlM&hBw1Le^j!-D4SiOb^$V=^swI`1R1IgtT-33>YK<E4msDwa*3Nbi-k
z9?0d0#x51d5d@P{Kc$HJTEaHog;A&Gpk6?;)=__m>kmVedu9h~W5`^^K+?w_K;MW*
zEZlW2C#*KVl>%9e$BM4F!>m1+I{3oRT_&C@%z$0>&t!v))ib*THG!xX>gwbu+U>y^
zvPlBoHn20Zn|a2M!Jgd>;K?485M>zrj*MGwBYsr1ObwSdqPuN<{r|qg`1|KCQGt#E
z^P-V)up)AAWnpw*=pE4t*qEsHU=eqKz(B(C_OfD@%oPv9$3nh)&yPs1zAiVNTH)dA
zn}_+#dQ8n_c>j0-V94O<n!ey_kwMjx1L8*LPT>6rpP&|B?2p^U<^>CQ0U(yOfTf@d
z21XVP!jq^lhXItkFf7x2qjJd-gdB^d1UqDcF9fTK&AO9{84ql(-=EXQAI9}HR%ZpW
z0Gy0`pjZL4)RC{-AB<qGIX~Nch%e-si{oX|a8D5DI~R;ctz1qGsDS_eOiZ7*fpS!d
zyaeV9_JY1u-fnW^|8B(fG5L^Bh<6sos3Oh<rwlq<7$dn^e<1)o91h~Bg_){zV9#SD
zZ0j7E0y^#%masoy?NC?>mynr-mkKQA#n1n=3l_ncNZZYNo7I=D7bnwb<^#{}+l&Rn
z*h^SIOI!1l7cn515$gHV5L`upc^)XXu}<T=WlPC?^MPy?$G>w_D$MUd+e&kp2@IFS
zXh6U?2yZB#L5r(9*_hWCJW=sI*bGiSaBm(h)JClU7cfHT;~V>)Y}O}T{0X_xgh7%C
zk4}QjoIPRO>lTy^+JPx{n?JqXWs0O9nf`m#VYMkwdr%;7Bg2<ranCE>N;QVrkI3&i
zKiOOY(2g>L_(IyrtfXci>nA$U-cq-gzoW?v?i0j`tSHOh3W=x>!u0#i6X;2DTR7cC
zyn5#>KvbG&9l;F`!Ju>wg^Zz_)jA&#5P-}i<ywqX1d{ap>;%mmhP$$wPIAGxa`e2~
z1Jy5IzVN6EB!$x_nPPd6QXnIq&FYS5ktcBDzRbc8McGBf1^Fw^>{n{Qqqk7o<i0Ks
zMx54a7OgkEJyuY_K|`*fuFFdD$?A#!?#^m=*1g-jwu~@w92>E>G+{I8Ag|J>PlSrb
z9Y)wz$+2Db*wd`rR8&#vM5Z_&)?c_Fb0xu~0^?_|h4AfQ;&&VTd)uy$#l>Sks+iF*
zdiM^<odyfCu5X9AC_`DgDnw_;``TTR50ZHPpbGP3!noQSLE@D#@As%v*0l*T+s{0F
z4H^P1*!4;;UKGM28UFhE>2#e-EWZ;6e7uy;pE17`LAeN{USTAA=l2xR_nI7SV8L`_
zdE0_%TDw1-U|OD<N*&C%BSBB^F&A0uxIYJjn19tC{?vw=3?X?z!HtI)Vq0|zE2vYi
zxI3P^SY7@AEOE3_F+yBqddQ$r2qrKaJ`9;z3jUii(<w<liX~)NEqpX@B@L*@=3qwW
zBK-F-{iI8;7`e?*nPti*$yivh;CCxPStyFj?FEY&CNRRlWpOR?i87PBL;)7a@c5p`
zBZR~KKYp|z*&L>&is!$n<ytFfMuc0iss!EUBnW8bf{Dcr3t>;OCY~G!!qB48GHDBo
z?KekL6cQk>K&2TT9*&zmEB4RVLb3k;yVkbjiI^4og%~(fMfpSf!a<zj;81w^vTS5}
z0@16mx9v~a)?-W!4MAyKALfB$#Qnw7eiQ!eBhgkaHzg0Ams3~&iqe6rkWEPGvUeTq
zMc(eIe*b(qyE-ai<+^lQ#P>ah=lOoCkNOX4W8kOLwctJlK8opeT~N~mJfr{ldHCCS
zB<(|Tz<>T>?HZQ*f4=QNeTs_zpQXe-Lx)2BpDzruD?<NVF8saVo`&+DSNoBf6Qca*
z)nRDCm#F^pYRd+Bl>fY1443Qw=f8aI`nRI-vu936CaETO|GCg2GcQ|bswhdVz_D8<
z*Tc@FkfXCjSz{+hq*pA8#u!mzrNk$m{&OF6C=G6*z+#P#CgSUxc(o2M@LO^q)|Q|l
zIH`KTAH3wC9Q_1lU)>>w_b)0}i5|L(!L<LwcD?I-{C}}>EYUw>{jp0miXV&5&dyF>
z`3vfzX~_%)7`CmeyI5x`84@9rHCASF8#oujn>W{2fBE@!pwdNjY6doznTVv|-l#r|
zb*?6Bhg!~MD&BNLW8&do*D`Y=FlJ2jtq7D>Z%v85N=1UOvkxqpQ%UdCOFo9M8xdyv
z`8g8>vaqwWYX;sS#M+M$mP%aeA034Y02Sv$K)|6*MAf~B&hPL~n<p)A3(qpA{(5Hu
ziL61e51`5^D2UJypekKfM{b8sy|&6rtu0|$GOzD9CQ8^y3;tR>MHj1U0%CtJ%OjC*
z7?B6%4MBe`yS4cLPuG{oNfm06FHk*&FcBC=6br-seWX!uI7kapmmmxbehRS@!Yj~3
zDN{zQu{d}Ff2bhpq`_e6`iqnLcAF{F@=oGrm_w~x>-ZS#>pBDJ&(t5)J0DrTaoK<I
zB`1e_&hIWAJ^k+k$A1<G{9WgVJ4!+YnS=3lSs>C$_vsVqIk-k*rQ<l(Ei0b{5Xq~n
z_i!)8H>=xCwIaZ=#s3D<gM*#V(uN?qogOpS2m}=(5N1%CGfay1Jr!5D1kIL9{kMH6
zagbrqS8k~_td~#s!t8M#mnjJR2p&N&${$D$BI@c>rlwc_-P~bJi4-?cR_HMcs^LN*
zG6mk|3`X=jl*q_PO_gDpj}uik3{S~z2*JXGq-%WuUwlACA^-+bmoKysp%#FJ5<U_I
z(!r}8!r#{Cdca2EuyFn5SJf*GqImo*NGS<f!mdi7-2x>B22;bV@>O=tA{eX<Vk9mK
zby$a~3C4dv)fbA0OW%l_?}qR-A<Q9wMS*>%Rv1&hK7AgPt(avAp<hlzqyF3CPuCL@
zlsaf7u!PCi^IW*izIa)AdBF%CSW0qBEYxlv7#O(J02utig9lphpkR{7_31;TY(-|R
zPapN#eg6D;*)~qY^+*><co1#JU4ri?ITvkRBwR5UI9j*?{!n4+uw;#TR~!;nFJ(sb
z1}G+#A#0zV9Nv$T+=ldX$^5%8hWzW-&*`Jy!en)1I=umVSF*6@@<_f$_2HpQ5`eQ}
z$NhIGNbL+mvBkK^(CKP?SF2#~tiYccG-w%s0jYoL`XC!)zrU@GgQN=R8JKPMy*2*e
zpQwZckNp@F%m%ofM|~HkvX5%EM0{|`tJ*i`s~!Q4eN?E7({6$3+srHdG>X%9O&J*8
zZul5j#cU|4{d5H`ZQ_twQ4qM3_B2Fqd#)9SQXqbK_+_H=&NJv}sF?e}fc0&?t0V00
z(V^RgU9aGUbKPDB(@hF9pqDky7d434^l)H8{qhzGG9KKt?nn<AX@7}Xrvo40J!9Se
zS2JXQIpWfSl-!1_XSkV7z{{dFJjjY~8q-hefyx1tXxBynld^_};P;i9{aAmhTWxXy
zD8mvOMT>Ph!<#sGmg23DBH;lOlYY;q$CK8B(#VYki~-_vADF(bHoP{gZAHW5YXH$3
z6+y4Tyc;tKc0aXXFy*>H&c9i$<L8ew1<lI5Z!pI^7KROe`^Ypse+Us9nmVpyd2jLI
z_3c)bsyR4t`%*a4fpJ9_cD(V=n%H~KqnSZgM(O36$3X#T(-7jicQ>!{7b!gh!_h%`
zeGgJ)fD^0(Agp>KKwQ(l^Uq(uF76G=V$8t8Fg=`v&xg`5^h%=#2nTVFxQBeNU?@4<
zU<at~DuIVrDKWTStk*MFHeouCA@~PX=#J(N6cf>~m!e@hdLZTbmM>EG0Zz17FCDRL
zz4>YjFscRA2Rp#0Z0Bg4eUPF>Yq&Vyys(6Rb!N>56bhF;`9#-aLtMdAb7&dd{L8!6
zm>Ue%Y^19fHapVyg9z>j`ek^TJJWeTiLvD5>~LBi?i~^aggw!1j86W&g??-P6pzs^
zdyEmm<DYp|e|&TLc(|NvIoVU-kOP?p(oSwi$a91WH-Q^sAZme097BFhIE!{$s00n{
zG0g^dRA=x4EJnXJo!x52+Z=WbWPwx$-T-?U!82=QDZ%<W_crH}0#t?6M1Uc+p=1Cb
zLd_IT=&5mV^+r_y+y<=)4%p}V5JpZRWG_ND6GOLh<?k;!6~O*jkYP+89{oJ5!)TCo
z-MIm^Gd&sT?)9$7i31^m=P}2}j`Z~O$jHf?H*Y$_;(g?t$>g%~+*=50ig`hzVHV2;
z3De3&qP7r9yargS?#XqXf@gz*@+vASpZ!@~j;%j@_;9fH?ca!XeZ>dv4Gj|+?kx|h
zqo%o}`|TFX)Aeq5qHZ*R03l{&Wi@-DT}TYKhzzN4pw_{uKU+5=;V>r7O0Xr(ui3$Z
zdTbto-MexreTbZ`pjkk}K^=MnSRk+)K>tvIIvcU4LnH?8I*de4!R>&5eoytiz<gE&
zdEoG5y_=4IudZU}uOq))4(<~ytV^1p8-cqKhZqTiV7LdO4q~+m%(>4>a^-;p##mDB
zc5yD1BtY(7YBWORK`-iyYx0pNr=%n@&a5sL3ZuBgX}6@zOj<}Wmm6TOE^fGy{2P?0
zos!#oP~L0md2OD6T5Ma&CBOs?Q*Nj2ZVNl#n6k37{i`?Y;x=6mt0SYL5ZrEUe+K=a
z6|ffUq4;-zWnBRT2nnYtncLZ_22!Y89GG3aq$90M(_!^rtqP>z|76b94fo<3V(p)6
z4R}wb3`3(0OiWD5)m%AR?do<DWTrV2WNuxeYKyV$Th!O6$a`(yZ4qxf;Rq4A2C7~%
zt8SKctm26>^*<MobXm-28e~R*{p11`1r?z{S4Q%Y=JSh2$XH_V&RHfz|32ff*N$o^
zEyQ3N(_0%YRI}{7=Ut+h<oob-Spg(6bX+nCUEO=2IjO340jYrU2aKY;jKxr}BRLLP
zh43RCz-9Elc%|NIGos!OGF8P+7sFAO+i_OHa7~9^u~Z^oEK*n@&Ynh3j9sXzty-=>
zWp>!n0l$o|u1LyE&IuCs#RG|<|Gcq?(RiY5mk*X`8wA3km1A3+m(~Z$5K={baA4-H
z|M2jzh_Icu)E#0)pm#8f);fw9;kAuOQU%kD1jIX(S8p72KTZh$i&S*XRxBPAK(*pa
zW{C+&^n8i*6vcjj=v6GcApsL4oa*-7CyjRTtzkWSNbJJn-@`}zx)~W6aGHRd*01hZ
z5F42d2WZ@Ji#cs|0vj#J5F-#$?jjl>f(?2FPK;p_jSro7eCtd7{N>ABK{M|B=d|EC
zI2T}H#uHC(rr~|d{I<-;Ra|WZnBb7I`@tv8kn2OE3X?#TOezWAAr<C(`DW8~5F<Ph
zir5**koGbYT<(QX{&x}ohOUl{WqH(4Yn~p6y4<U|#kM@!Z|!V`p4FR9@#NK?J0c+q
z1y|sYCd&D~#zho3enX{b9pFfWilsy+6wneSs~0(qM;mfgiV^k%mZ_Q+M+-bD9J$XL
z1pZ=GL%pD0Vz~oXgP*L<vVW@IadB9)+lw{>p=rMVDP=}wW$ec{j{atDClk(#3>sPx
zx<(Ir>wl7$2e4#ry;4wU0t;3<YzYLWH352QfE|h?Duk3kb}7WW_T8E$R^AWo^MT~N
zM3@7*XD9U+yg(*RjT8N6D<+ADNWOmz(k&8Gj}O+XUrm>nmPP^IwOLNT*oEQ8V9pkW
zryFr!Ru39`_<;6C2sEHsO1F5v6;*7v-R1_GX#63B5VxQ9^VeW`p*Iw0HCJ4mpKt^A
ziv@o+{$)A<GSi0DJ8aK)09YJOTHzsUVESysj^yz>rP)gG2<IgqBWZ4}upJ?g#%~%?
za+y;JLKAuRmoIODM?fLh^Ux_CdsMCnrI8kI0%T8+w?D<1R`~;bDBr3=Q1XOXJ=FXx
zND?t8%NnHWfK<=@w}*}UbgLO!d=m0r|9ABkfq3k|W-`tW3odB23NV{a(qlci<G_Zj
z4;`|%Z9%(Im`qkImjyLsfc4=+*k^3!GeTfZSF}HcXLOt46-;g{Ga<ZnOA#<R=+o6N
zLTgh}-UAvaa6NPE`u2?*cm}L}CR^hXcxZq;w41!HdMC@exbQR8{h>}14|)st7K%RB
zSRDXa#M&{SUbnWkdLJs+IdcQ?kqYkyKn4u#?O<j@QLeUqh=4Lr^lS8d=mqxMZ$YF-
zUgL7ShwAY|+hin+DKb$q1360PHm4~~lq3SO;FVb49<#G=0bQjz!NI22ZSYE&9GS$8
zwBJM5+fFm(xp_WDxz038TLCO~NQ5wc`kN!OhElfDhY*2dKVS!&%w9s-W(GOC`Orme
zWSynhm>*$AD5ekL=AQI$GehzB;|CJk)B67{{%9j;2>eBC)0|&sA~l4e^YpYXX?EY+
zF@CMUqi){hLmhHi{(VA-Wz@hu-+!fUGo=56b?g=)${07oKP(hyqICTHn|kOt|C)m$
z5%L9fV9@y!O4}02DT=_y-lZv(cHkws&i~AFL!Z}GZxIAQoZ-Bt{dd6+BwnK0i8U}b
z{_ALiRgDWC$njjTU8YB*3H~p7hcSgxytu56OLkX0aHfZIIvuD=2vTcE8U4>!$E`r*
zt)Np!5A>*$s{bUoohuoH%K!dO>i^?FbOd!!qf7H&-}&o*4p=?7@)uFl;o5%rIzC#o
z?tzW=|CZ*#18j54!)GS?E`M_|MM<5c0nau+ve95?5}%j*M!qSwbhF*4Ke>_9ll0d|
zTGLQ<R{m&#@|lH5x3c}_|2Ua3yswgDxm}mST&BX;Ke;pyM9K~vlKdjY{W)^&cP9Gu
zmR{H0^%I<HB;xC<=>Ew2beO8BQZ8=(4EDH!7woChKV$JX`*XlXWd8D=7vJZ=_0?hH
zkEM*m+>fNAWhp1dMtWAt2su{CJ`&+Xoyb=|K`wztj>y|=&y$XsuIdbV2b~b*=%EeS
z@o#1~Om1Jle#FQ{)P4IZI_7?|qO?kf*2@=$dFuS1wW_}}5nzp*9OT>nMOaB_8gri(
z(fqNKk7JQ0M|1)7=|jPMTmKH%?p;2V@XmCeV>>u93hP2TL*b<2F*UU%)rayUJNoLr
zPyNDmGs$>svic2YrGGOJ9n*J@ejXSd<D!NFszW~2$3PYxRuW+LQc|=uIJ4S2F?<R9
zTe5K7*S@N7v=N`X9sK00%HHk0aGTC?`ItL(;?p=vjDy-F!+i27G`{4Tbk)6t%m~BG
zEdB9#Tb|C?-GmiuS}WSYUwcx#CIcl2G;)`&I&m0~iI8GyGHV9jGOoF87S3q&(ba9s
zi6T#@Gg_zfc}2dJr7Gjf&n1CF6@il*Sl+bG2?hm>yyqVYba>XDtk^$F++!nelPQu~
zV&NPnWmO;NkC=906+3WBT|HGkc}_=2YPIQKGoq|bm2lE0Mv{-0X;kfC@BX9HPc2;i
z=~3=hp?_&&U^%+Q1*dxYJ3QIePu%MJi7V7gQW$#SxlwsXt@(C#v7JQ<Uw+w8TX|6O
z_Rj{V*`Lt$7jkx?zGWx9VL*+C*H<g`^eM*WOQ_v?yGoq8<FmU*Gv-{OVPUA*N<6>p
z+9&L%OgZ7)zt4*INR*?!B{Mu!zsekwYWGi1hlGWNMXCj85qIbm85UB)nID=uIy!Aw
zHiy(;<wf^Iz0}y9GfRo(ZGLjFZW#NxjTz_1JDVSmY49<V>+9XRiczz^Ro|EVu0zM=
z;^LyU8llYSBa-4SLGxktP*hZO%ds|XyptIR)QeMB!Bf|Z&wZjP`0yP(StyrHo--2%
z9(O|x1Aj!c=7#J%8>vK6<VKzDVae)HM%2lQrL1pmhJ2}D#SuM;P-6;P6*t0uCI38w
z#iV8_v74py9zLeKR!o6y>$~SAV5@ieX_T%i6DC3P?AyL=HMwnd!pGRuh9X959`c2q
zY6mBs+LAG2b}K%}^p~=gunLs}<>(^IDnqfj-%#N<7_hh1UbHlsmdnyrJR-yrrXv)|
z8S>db6q~j2;H4w{V3?BD8Zfc$)1w!_V+YL<Flmy8;F-cm!e`g>CqnSk`6G%d=3gXU
z*8Sex^!>K@>3fZg(#6)v*)n`PvMPO?sKHKsh*v!&koTsuh|x>rPNd{w#q`}RefO=c
zEt)H1Iu+uWiPiWS{KPnK&CSibrC;Qaw6E1esYk|RjcH?JV_vED?66plPR!Kw5xAcG
zZZf>2u?aG$m@-v*^=ckSeNkCiS;;(ja~yQEXeNzdQ14|sN8w<UKTwaG|Jl-d_hUYm
z<g*ezq?9>;Gv4B9vA=({^nCx0kB|Te>U%$Bn)LgZmvG?K%F0T6(5ebF8!z!Not&Il
z%(Yx~2E#cqDe0%8S}M3hIYV#zQ=fpagn@(J99eP~AG5)<ym|9blsYWEXGLmbAEDVW
z(LFU)h#0>$y(>i+6*@XPzg5yKnFJBQ<uQ%)@HR9ADapww{QUgz&ZHSq$7ilkO=>lI
zTtOPhP-a5hPVb7t7s;tg^pj`Lu0}Jec&x3hc_p$C<D};2<KZ+5rN{|A`goTRt8v!6
zVSZcg?yY8KSbIMVG6z@YY|GZB>nUM*f+k255Ed2X4MK}(h5}HeckbT3o5!+oc*4TP
zh3TV}tVDk&O7c1m`m8zE%)TT1K+50>!S4WN``R~LWVn|-g*z54VIdUsp}j}x+4|V2
zBS#<gpmNh5rihLjb1>teeSL`?CLy-;lnUQnnK5&>dBwVE(Gs;{cLR2(vjFAe%x^kH
z@_jb>5K2HxkY0>bBAzAoc`upK1lSA!$+1^k7xZgeY|RLll$GtUDiXpf`PG{cK+bE6
z^u)npGUrLa2cHLETJ!m~Ao(^nAFWWhCV;wB_wQo}B;h&M3S`LOQ*Tm$Q7)5>>kVu-
zP~SeRPn0*mE2TX>5u$S6UFw^M3bF#kM=zXk@RtJ@Z4bI?SW;5bo{c;2>JivE-~$FF
zCMG($I>+_s8IPA-1+YWP*<r@>{rh+KTBk$tDbYajyU4=;n%nEw3y?tJw7)=TWT=?h
zP~xy_yA#nVr=sGYz-vd%z+faf-VlX-<q|3@D{I@CG&6Av;K3kSfg>G}D%aHGr*3Y7
z$g8cEdZl??%;^0m1!WR=yaCk21ea<uV`eB^t$h;$tpS5n?+s3TBr{TFWPYMHH8(d$
zh)XQo9eGB$!uO#SYF?xj99CMB0eh1sGmVMb9&Eq0wH8^oW5vIUii7ou`^KNiNJtu?
z8xf8$#HC_l!UH<sfqa)=*fJtt!hwWauv;?OJ&gD!O*!EOzR+NJ<LKz9K$%gILcaF~
zWr`f;qmS74W#WZ^-#}QI9zUGYpFiK6f?V7N#BOL@TmY<4!zHBy`1Q-n4KP_Z1i(Kj
zoRBDNXUF-*dHcy^jl+$xv9XTM&e#E4-UJykUR#_j*`zk4>#*0)PVhRhk1#WFItf7o
zIhq2sq#7Mz>WJS#IUHy(?O*eXnV3x1Yq5-hT)ueHi1v2q^_;1N&9DbY+hq(p4wd!N
zHX2e(ehUUB0mFiYiCR2@6H2rDj)iU)t~KY!s~x?)K4Zo*{_zA@-ZR!b+g%L#$tfwQ
z@GurCf~#PC0Mm?D59h^ca>QwI#6@)N9jDG8o&YNvz^30jgH+a9tg%~QASLnk_FmrD
z$TY%z&lOU+YW!u%V`9DSXRTAA-R7Hxr6pv=J#6<CxdS^0`d2V2oR3(~f!(5b^5lud
z1>vFo{y$y-0b-mvs4@NkHI9}ugq1VIlftsT?`Uzfqx)lMXcoxERw%Q>>w3*xcW)#2
z%E7_muto+S2VJXtVjH|g({KP_V<Nv3?Te?+qa_@?JFr~q1+|(FqZnSn=~bsFv=LWn
zBMilLo6|{IH*<1wEPi#9oG#oKY>el&%1kHsXJ%#fK#CqN+cjb=Z0!3b2dLYtln|8u
zz^Mk9AHR8bcS|iSEYP_0F%g8Fv?H|_=4Fo<9!&FBws@B>v3rJV#mKK(EsV}&+@PSK
zNMxToJh9mS{o;4+fjyrb9brdDhr8<6;L6X3`}?y|uigl|x(e(-?+3N9QK{<Zp?5WB
z2NNJQq{>%!?!ehzb3h*h*9I?<$(~hA>u=Cxo|z936bDKGv;a;+toEbbB?~xVvpbG6
zM4V{1o9xHC$9s<a?~II)IE@s8fW8!`a$_L$)Ih-pTxM&H!)|6N^|Rqn;Jfd_x<@FQ
z{;@HChy%cjOC71nXDh+yYyiQvz-(Foq10r)<Tgr)&=C%>(qDQ{HETzOj|q|g>DT-g
z_h$p&(xN&(Fly02Bqi`qbp;GFH=d419i_1|smPz_U|#{zORd5j^=UAfFzm6eMZ!|m
zYeC|k#rl1z>FJjz%FQSh&%fm7w*xulKR5T5I7qxsZH(A}oz&XKW_G0h{4wauo{f#d
zdxwXRT*MyUZY?q%t(M+bwLU*}Amg^Y3}xSU1NH}%b>eBlR#t4dWL)k@k-fo~3NR8m
zkD>t{uR`=<>d?FX@$nBpku|`QBqM|%zm44Gy_Lbwxw&NJ-CeaF;%V<yzh<`0gR3Ch
zbRY)8IqdTkIbuDWAjUpoJis$>aK*WMO%30|;$qOJv_pE;uS5?YK13LmZE!Nk8mZIb
zW6mINnU{y0k_M{d{ZrieenCD8Jf!~q<17HInJ^>KuJ-*)a2tSzBNKEbqmRORTga7p
z3M>#jt8|42GD!m00HLJy`;8Urhj8aRU|{0m;mLl|Bu0n$j`&QZXm5BlQdLz~%d4pc
z!pTJjrQ<R8u;JPF4-9y~!ax!*&6FyD8XeY@;^G^nrKNNa9z5fbWFo|x1*3-h&^vFL
zEVO3k2O(05*-Ff$L0ne9aKPV<S^FkwVD%~F@cFIT=I^Wf79V+LV_p5ksNtGtoACr&
zpnEIq<44XR=@0%rdd)}>kB^Uk#vmQRsB_JNi)^&ojtM?jSVF=zZxI%g)RIL@Zc8hx
z?W3h6Z6G`q3H&)?&B>`ci39yS?^-fZVPRO{isgCtAJDuPLU}L5{Dpkbg*@mN${qb+
zDKG{!i*DKTAq<=$voz1IkE&nAU%n*5rQq{}B^pd3X1=rV3?;d=`iT3+ZA;U)P_!e>
zYOsE)&cy~X{LrF_9Zy}|y2eGi>X-K&>D#j|fqw}$vNJIDqjfI4AlA$uPeB@h)UU-%
zEKc$z;t6#a6&u@)kB`(vw6(RHaPh0Bf*~sjr}Ycv4DD#2T=;4cFI)2{GZNJR!NS&t
zO3)MfX}O}_eD)HYAEo#u$?bZmWJX6<S5wCGkLG94c?&z>OB`VYEMw%?G85y-<qSRG
zZPw*Gn+I;_b9Oco3Le4{yunhmqHuWvCu?lr%)nO2w(efjEOHwj8WN_DqQ=M6gx#S1
z)6))ti(Ba={$e8D4wY&$L5$ia&gpMB$6^$3%HI!@NS32xW@q;jq6#fRqk*sr9Baw1
zw>+1HqIFY7R{MYc6fpD^ftajT@A}snE>|W<aO8U*y{X}Ph)1|Ix9`|LIVqL)KJz5z
z2IghCY$YUU1BkG+v{cZ`mAC?ME3JP9mH~nTfB#A+58oFK)bXd^rLm$JU=});#xW~o
z6{^60D!DM7<s@d$*xpmC_RCBEm;m$g_u*mmuP>h)y~!fA)I3D=NsGgD*ai^)kP4(2
zPwG{Nia2mu>+9<XukT*HYD}|WoCWu;s_$Ltcw}xi?8%Wl^)!<B>~Q`LZdO2WSBJ{$
zE#}eLNw4X!sz^?x%F!)zOZX$#F)@K1B#ss)!M?s&+$qLLB$_ic2U@J+RHTI#F*<b^
z-ER_J|1K${QaL$$Fw&>bgVX%IiwLly;#54#wqq^i?&QTag~d~H*^7dSb@&62bfaV-
zF7t-Yg@LJQ+twCyptyFo&BsXp%Nyh49f_R}c`2`MqRhhAv%6x;)Yuj!golN$3`7Bj
z>di{{4+l6^B*d<9kFpPi*2Q5-2xf?k-9c|F{H>fxU@54OcJb!kaEjsJ^=Ms)s0tjv
z2Cl{)w#-m`&23P!V#<hms^?B~JUOrHZa&TV(O^E3cw6t2u=}2HN%Mq*edb*_%4&mT
z#nN4j0s9s<HFhQgu}{3W@b7~@9TMNTukAMMPa}H#Ddb+i#{T&2N|dCxhlfXGVqv(X
z!>x*HeVQuD48^RN&unqTefnLA%p_J~RQO^K9k|KwoLx5xzGqb%yR#PYtjKa%N$B9a
zQs1im9$uj`pP|uJi!HH@qzK&z3BrwY;e*yfGAf;fySs7muO3860tifvemhT2#Rp%8
zK1O_!N`}u$h1rL0yX1Z@Rl;AV%ITzj`NN7PJ!mqQV&i=kPIJSeWlC*rLNtlQefVa@
zIyEZ`<6S8!x~G-`&R}{y0lzzULU;PIiK%};hvW2iE(t6N$n{@RK0W{P6Yr3M#!-L}
zE6B;sqi^Z9-9zfWNU4c%ZcL|45y>p6^vlD_Bvgfg==~cvbJT{d-EO6`i<jVpT1VO~
z%s#s^S;{%o!hm+~#O0CBhK26^OU=wk@z$f~*{S29knQztQRLODTl||0iHtG+Yxi}|
zc46JD7;PLqo3!B=FYP>p>UE%xnc{|nU4M+<TH=}jQJ>BIOIH>AIkg*>c7t*#pVakr
z916wX^6xT+i(9vyhinds8la`2%3TF+y#Z<Z=bB+GLVLUR<gE+p!M8@A4#z4_$H_Rp
zq-inWS0wIhNTl2TO_@De?cYsqq6Qfj4(xubqjNbnHWsK=Ee`*{z`zRYZUZN)s+kSS
z`E_X;IsdrM4ZwLD)tdl{lS@hn0q>`|w(|<v!`A?&O61+B1Toh*`a5jb(R6FfgS4}W
zy!@%zZ?@exmHwsN*IL9FFLOUnz1k-@7#C6$)Q^zI?l3T5H)`TvdXF+SHD$k&@$oOz
zyr$W1fd9TSEe#D}K_QiDmZ}auqGQAHgls<#xltK}1_&ht=lsmJ&nk3co`t1MF0&7q
zz87*&Nx2ixYZn|Wx}i*Sb`y|YAy`ryFR4}AUW0fbA5@0ZoD8smo16Oq3p&0;wTl4d
zIUw#Pz2g49wAu+dI<ITDIg%mW?eFi)Pb)GLi`m+8fQS(hc}JJF_Qb!b=ZD1bAL3Zl
zrXE+n4EGi%^bRR=ik2>v*k>|IiB2KbSCT`Ik~&HwI?t(Dp97o#>K4(A2?+_|;q~wP
zXc2>BY!1nS;D&o27>Kh6SgySO>>)H~3&%*&k_k9-G0wADPG3d${JvrhmnuKmCw2@A
zdUkEB7^+}wB1U&5`V11g&ZeMnif`YWAE$?P`&oB{=-^3YTxTBB%u%{cgW94SN`^;0
zk02-8YJOwqaHr6V7j%TmWyT~>oPb?|@__pe;4n_kTZ@Z}O0OUgH$m0N!p!V0M|T-u
zLs1bc&^R<DA>l%wJu&c7;fX`olB%Q-+UtycyGDx}Ai?eH9F4QEwFn$^a$%uG<m9(n
zjBiCq!Ch2TWDdI6=U6A$S3t~wmlc+N%gSKZtTj(I9}`eNFLRLm4jBERZjOImP=qv3
zQFQyDU`1JiKLHLpxCF0(jI0I1Kw9sEos+W)$9?)dwMq|QkT1bTR##X5Yq@~3?x{tr
zR%Pv<hu7a`N`)_^1;f<>0`BY9S*`2MkG-uRp=xO)ff@w}E!%4CI_$qIK0UusiO5<p
zujuW-t<JK0FD7$Hs9+g_BL~s`W-Tj`RRg6u;)J?5Kl=mC<{!Yd5n{<@(JsP6Wnx=B
zr6nT911Z4M)04|$?lJ(Bu(-I}hs3a*xomzD=DuWrez}B!B3!l}dlK$nz?w~OKGoK~
z`?w7XnUeBy(XTHDo6Tz9Fx@10{rdG^Fg%f;H<yB!1w}_+KTAi-yq%dvIkal8wzgKO
za`pFb5x6=%J=(|b;Pr@68&VFJ;~pL0IBOpD(fF++XDAV#J-CX~EH$xQVcqaJ`QH3x
z|BS$})mByP&ZWsiqE(#Gl|1}*9yFayVmtc{{*J`ZUIOKGDe;<V*&pW8I#JMYKMO@Y
z&@BKyu)Zx*8DUs_2z8JoTZelCaK__JK8AUghN`M6;)dghTM_sa8|x232u(>6CKNE7
zobynC;rokqba#t^v(l@WnVL;#E^KIkM1@}+GP*WT6m-hc=}RPL!5)K6kB_-QCNVuR
z@jQWqM!Q*uRlf2Md;p}nRLI7uP6Vn<Av^uvYb0=JobP-D*aMWj51ar1CGu95;Mvta
zffGiMVCGF3NEiUw0s@4b{4iEDn{`eGU7IA~2KjBVL|UW#=2;<N7kco49%_O!Md7Xy
zVqGD3gCY_i^D<&Q*D6)jVkd1L7bPpjzV!&8PXkZ^Aa#h1h$Z#;!imVRQMEvqAFZ^~
zAMbUSjE+?}$HEj8;*IK^w$FRA!5e1x^q>)B6wIZ*Bws1oW;k`A=@$>DRcP*4wWWvL
zp5gJj^S0GcD_sSO2M|n-GBJ)%$+%MmO$6$t$sQUo^S&*#?=IWH#s91L6XxJB^$|;r
zd-~;Wioy!YEEe7hzI|pFDr=&gm{%}#;h+%(T!C{aMC9eK1C{9G<Ff^H90IV#WMpvp
zmvJfv4+;FeGQSYqV`6<vTa+%%-s-*goC-hrA}LacT10@;(t|-P^oO&8r3anm_GHMU
z=VTn+B`B=^a%}+uCiU;Uxk2ocQh(3p=@aVfYL9OHblT(xV=gYl3?xzXCv}P(+h^h%
zb&~{%BXUfJ$WI`Nw&6Ix)S@B+@CgXod=77(?jW9k6JsG{iw9{`swIY7*;HI&A6c#i
z&oECkkXq-bOM=?=_QXgpX{4m?=qCwk8|aVl@9OF*C=1t@{Kbg3vyl~{5F;ZoeLoIW
zBr??i7k{H7&{vrxpr187^G4x8VQhN$?xa(ClOI8VSkPQ2aX1KWva)!28uhpVtsc-o
z(5P2@hmDPmkvgh_0qW3PJ>Cb;_qy5o<SK`kc4u2}X^27W05g|38wcFZDcOo0oH5bP
zx>g{FAt6L0KI`xA4-4PrtXO^gN<3hAWWmfox7j}&_pWA9m3VsCqnjqN2I!bawZOzh
zKYaa}rC+aj_RsdbITzWVu2*$1#Em*kVwp=FT)<!}6W475LnrJRYg^knP~VZbWxGC(
zFa)3YU4dm?n<&Qv_7tH09=U}LN)fvG^UZK4tAvx5lxKBM5A0IWXh>^i!K8jkrlROw
z$=b5cu-se?JrmPYSPtqry-9*0P^l(=`GN&X<KDPrMtIjWEcVd~i@P?V_z;c6(^8Do
zg@sYejK?n3zQL=X239#3R{jidu`%GeAYxR;KX{#L6m}A7sjAja&G|X}_0JHYw&$$c
zZ!>k>)3w(Fo?b|_j`>4I1#|^biaD;wz|73IdDdM#?Wz;S#_3YmF~olW74{(eE~|e3
z`SUM7A=2ssI6~k{*0YR(g0qOo6Iv&2dNi9#RMc&*md;%o1=Ga83zX{{A~Kf?#3c5$
z+_P=nLwg>OhhB5~@?6v6&SpVj;dM^a$p$F#ZIBZ}uvkHBfCxb8`sWVVuH_XJ2wtYb
znZg*5E1&>`I`oG{(~Iw>lNImZzej~T57iuGWg#kjuPEW)>+4TmzrGDz9We4m#l_kE
z?nY!(BEuaWm|&rq-L{u|^ClP}5%%_=_(vFH2>XzXVFv<<q}6-PM~fL!XIU8ski2w+
zSjt~t>geeWoLf|?5mQiro{^k!>5)bqPVpEHfE0j00I;A0N-ikChx7;p!1C&<CoG(q
z87ip2<&~9V%&XkkD@4Yn?u^F()4PYlnTPf~L!t3qslq+q8U}@;W=&?Xhi*hT==T}T
zBg*hAd|1=^CE5eq=Tj~fx4uPKi1j;72k`kzR3Ab*7U%xy=bj*Sd$5#N`e_AjsE*$v
z%AJwG(zd6!pBAz{$m9jY&Gsy8sq#>wbenc(+&?-%vPDazZUh()*gLb&HI4}mHl_j~
z2Mz`b@6>jXPERh&Me70={-l0&+g`JL;tm**N>zdVHY8+JDRy*nYSsyd<qWOF1#AMa
z;F@O}fP7_vBVHhZsnt5(L{NcD*7K^q%2j%lcS00>W1kGfWopb5CbI_~(}v53zf17w
zK)a#-4JcobdZH}+B0S0Tj7wFUS+)HmS{5yZvj(3$R1qUH0~r9}roa(&Tkm@>uw+xF
z?cm4=gnXt2ol(H!Lr#LV`BY#3K0ITH{E)68+GpPl*}JpS2M>0#$_D!{dhbE^3^zQ<
z$w@URD97{c{cJMZtiAT0l^n>3_2K{~Ghp9yews?jLxv0Z$g9?lwGZ`su}Z~z;mHI1
z*hHP8SSeb-w~)etKtW1e10y5RCr_@BdqPR5B&(P+1UP-~^p<b)dnj4~@agF4_RmzV
zj^xA7Q!y|E1e3f~>g((j2DM^x^%Bc@A7Q0`q_pJX_^puhMjJlboRBc72+(tm^YS4n
z0%HtWq+n@Q@3!TktdAB|L}#)>Hp(PPHAM&pP}}fvk>96M;bZ{U4thTTq2zu8V3#2U
zVJG!{{822{)Whz7zazT(WB6v6ajx<tFDWR3czuM8Q#f#tSx8h-9AkE{!J_!D4AQ)C
zuCbObfjd~zAQuAl2)pc(zZe1S-c3TnnQt2BtkNX+`pgg3R6tD9hJpgu7Y(ObbE?W_
zwAP8!hAW=WfdEv%G|6aqK){L@z8j*4Lq$-86cukEs|&I$@OL=4xFTO)&img_Y=9mr
zLp=<6XGlE=wGURbSq6<GEQn3+2M4xDpPVvuFCCwpfY1Wi6{S)w7&%)UnZk31gc}68
zf_Xu&m;{a(200S3CxHtV$4BcsaF%XIAKe;}<{M%hbl8wHh~p2D8-%D(;(E~YM*^Ul
zK#&$}fL6d8a*B#*C{VGBo0vST+YT~oFy@d|CQj%ojKG<s8&M7{H8m&yDzs-wl1+yu
zywTc&;(v3Wa%`i6j??sYS}oID_w!5_wv{Pa!nf9U&SF2a%C<@BBw0*b<empcvxN03
zJY4ZG(pj91eUNT78SX%qU-Kytt!2e}4sbtml^gXJuJHmcq%b{+j)^HNtbcOfchZw#
zUTos7#O(@``%W)_yF#P}sFFnUN3_k&@gX8s+pdei@{8jCtLcEDH-Xm|EM{&V?emR@
z|0-9#n#i%%Rm7!t>I7kV-)vT_8ds4~O|E7gJ9hF2DP$|+i_VHIVHzr?8+X~{TKJP?
zg<1Irnv_ofeP)W#NimO&UXJp^ivjNj!D&$6K+!5Gc@Ag`So-r2ao5AWJw#VRtn-lh
zmd`4D`e4~Y6L5V4(9tqMJ3!U|yZC1?>D=q$@Zx;>vqJiOSZ;(7J@NB4nPE(FX~7WM
zO@fNUkjGi=5rZnx-cUwENL@v6;MdpXTVU0^PEJk^6lB4eF|bXe&>8_xS&D6*!hQyo
z51M`;0zeVo{y&zkP`{4?HXcMF<aV|PlQ)A)z1VBS+h6asG{=hSNt<>mE`B)w5wN>&
z!E3Bsq;vIJ4I^|viBYF$`&VaLqD4|KH7johaoHmG6WR?Ef}88=1Q5oJ*LF=vUS3|3
z?3vmL_DbLOp2l1^^?R#%ZfX0!DcIR)cIvKPr7+w^9onnB+suh5OWXFKwkHp+h(Ea$
zKEMyp4+DQ=t7u{!yl_qM^!tZ~yb(R+X$VCtSYR{@mCZLMxe<hbtW0RW2HF=n@EGwm
z0(2G!Cm)30Z3G@Oew%ZeKA~VF!nTn^h~k$yj~RIefHQyxg^f$j<ArP&qmjI8yo`~$
zD$YRW`@YG4YGlL^Ou_#F&hmihB)`-JhC=VzSuH*|T?6z55r9=PiG2Rhad!=pyOzGI
z7?x|f*>oH988AbKDx^R}O9s;_Qf}IAO!|X>G6&vh5imu94=X{gM4qoZy)agaQ1JR`
zg<tDi92^|bU%hnIphw1l{ZnKq=S`@+aIRfTsjlYD{VI|Y8jUL9<F6xzu$Ggn=}u>a
z5YOMe6Oop_RtLv_h-M9HL6G5s(F5Smf8W&9bX8qYmE#pCf^QsG?sX^f->$aX6bD$W
z`ug=YfY{m6kvGkVdmF+H&>x?+A&_}MvibwMF%i)m+)_FQhUY5lXZGY*TI4=`p?m#E
zWABVrJ+@&|AYjN%GJ^o}Kd4r?Mj;Y39xro{+73iHMxy7Vl_m`dnm$n2e~}FhH{Pt-
z#RcLs(d9q|=w3vUfOx}yusI!)mFS4|GRH=#8pK=3*7HGeD>M<E%fvCyR0apbwvWSV
z@IWzwjlz}C%bevenSm8C)w%C?XLQ{*Ej#;aPfrhmqX4Ns>?8xz`d2d-Y`}N$ncBcf
zAA-nG?4F2k4M~ik&4aWvJ~7chGJ*k_2i9uUL4u$g1%l9}Xce3Ed3suGXoj?1i;ic3
zO#-=7%+*zZm5r?s8d;#czKZ-HcthxXSC!X;vUEmz;yAaH;GEOZ0xdLTlL4I#hj61I
zD-_Ow^8u?QE^DlDm*m`aUp5p~mAk`M+kcle%|&a0JR$D)IcPq0ukMQgqe>gmdisV2
z&hc&p$2lIDDnO=%J&=}}i4OS#qzxczfjn!1<z+zc_~OOQU@~rzs0~nOevFLFS1u(s
zgFpi+p8U&~K7hc$=-?M!1pW*_-dt|ye8@52Yd>odAD@@nBfJ}!;HdV7K2>)h;rAV3
z>CJ#2zZEhya79Q~fQk|aF`R=h9F!_ZmY*$VVD1HMtl%+?sHiAqIFA_aCb&_KkB<@c
z0M`4$uU`tDHDc87VJRpxCSKq7%3=d$2eERK;bN>mub&R#cRJ97yfpZQU0GZl9eS(Z
zL);*5m<q>-8jjS)6wzl4)J-ojX>4_~6aSKV&<-*<EJkWJwus?skOz^><Lc_#RU8Fm
z0N|}ca2kVI^A;r7jmb*zb^OV{IA#KN2}~rSAoT)&hKRu%Q`Om8jw=Hh_Has1As7}_
z#r0SvGFyoI_7D;tb}0l+t);Z}$;ul@dK5{)2c{0(HIK)ih~Rv2t)~Q1<PT_5+i8?%
znBeOLp8LnM9k6mEwm_nQdnqta<=u`UNT*1%Plei}V^eVGAe{w>l~nKHm@$W1rR5dU
zAap3s0R(Zm9J2$&HayCK3q%A2jg#?_&xx4|*?>(Dwd2R|@LO<bLZoTxPkY)wIEV%c
zMS>+v2Lzl48dM`J=kf9J0+Wd{l8>EEOxH0ln{UrOfmSLUz~gW`peOdvlH0jH@Da_%
z93gR0l&jPIfY!k}jEE{<UgXFrDS2r!BW?9fkYG9MkFEhPAfB+q6>Fa0z2Kavy!Fp&
zy}ebZY_7<vZL$}pYltG0HQ4{D5XcaCxDDjOZLk-=e!T|Y!&^g<QIhzjQBw%QBP3hk
z{y<CRpZSE@iO08vGMuY|1~yB;d=TT*YV5B=sX{_Rg4AO{9P^nG1Q;^^bA0sT0Pp|-
z9k399$mX|&-<PrA#ubkRuB{mZG#V%q2kJ<akw{S#mD_7#b>HX7DcTLeB#$LrfPBEw
z;RK*iFAYkSV+un20H2DYK81PKS1158&HKbE?$O!XD|CXu`2iYzNU!G&t{~Qr?(Xhv
zQ&K!xHJD;@e(vfF{ZmL!6PPnlpx#0JxzOE?SMav=gZ%aFhL(jDNIWSuHSu~~;Ix5m
zLinWGaD`CCq+x_}a&q1!#-SDzqy$?CREN-F3=#`y>nTgYQbcTv1%`>{0O)|&1sMAL
zLVVanfg<*DM&RNpsFQE<Z$T<SM}d{EkX8v{RYip)Qt}?SI0~RXe&)cDxWE$EgUl!5
zPs6MzE+YwQD`;LjIX>PxFw<8?|Hlh3Vb(mq4cvJ1(ykV=_Mrex9$AI%p6?I=K{w6K
zWrVx~YO3QSY48I<Xhfa7iuBJ_xSVi+^9g8Ma~OpuoS~3XSxI6voZASs3+g0^M!#g;
zwiG|(w^ZP-z)m$<Vt9k!jTU%pFkgf97aBD0J&fa*m*C!j`v@MSeyKZe@qiBVf`#Gc
zb`cqJi}mCGkO0WBAK=!369-r=h}%9>rj!}!AgVd~Rf?zitA>2S_CGf?GV+wk5(UKh
z(5itM2k=w!(7Q%vJ6!pZccofk-09NkEu@tpJ$h*?{m{@1LNv~E-w@*)mAK~ZcSKk@
zAW-oe$2rT=5P<#%6u;MSO3;u-HoN9lAiM|)lh)6Uxs96UQt!>_5rhP;cH}7+3{Z$=
zC;4mN0~?v9CJF?Bccq^wm^+#yuuP}xf?x~|$hnX#A$@}AR>CF3M<F{}Lla!9^kA6p
zkQUY}48MVE5(mN)fIaq1Z`JhRpeHOjgw=(nqO=2dorEr$PpXeThIDxlVtIq$2_X#b
z2Qm0+V0j`*0L;6@OT7tbDBw<m-vm(}9co5N6_Q;07=Ncbcu2_&fm{>L@j}^%9=(R{
z31vMrf*_k5IKD|!yU`fxszwml0G&b>Hs-huZaZZMxx6>5K<wV@>x>|FtWD&htO)KA
zq7qs>0Qw&`860B<Z-olglRfmJ7<?yD!3Ou^U$4}j*{ISh9s|k{d@Y3r{nSX^2z5o0
z%K^zboFzjJFG5IJ1oQ;lVtNawbil(s{lq2>bUq@2LTCj-!Q$k=gcScO3Zlcqw(*2E
zrYC1EB|D+owMNmpEPQ-8P=6%IaME*0Lw<y^f8BNv#Uq=7cm7K9-+9y!hlMJOI-E=W
z_JER^b+%q^9WJ^js6bHgEdy6<X2z8J(nz^>7;4)ZI1T`OAPvAuiL2iop0$U#19Zrr
zHqJWK@(F3dySjLQLHZ5EAUMakGyXp`eRn+8ZTr7XC5eWWY?2DuBU`A5q-B&9ku4%+
zmCa>DG7HIF5e;OeWF$#K%BCbTLX^?(efIr5_v`-S8O3#dKIb@&_c~aaB=G_A+d%>`
z-?X+eHV%r6TuTZ%gsoo1DmFAUd~?>0g8_SbE)z!|ei{b{M_|M7ntPlR9_6^1;u8}S
z-CA0YucXB~{7#jTYxwV&Qm5-}A}XEn)zul!b=nru(6sqL<FDVB+$wr=()Y4Ej+<65
z#K&*M?|C^se;do#{?WzvlPzb{FU>%ViIxH#JaB2Cj*dHz5*bl@zES)8Q1tA&IU8}n
zs-K)}NK_bm{ry0P<)$_9dkN0-;)N!b&36ipe*8J0X=T1M0^sY%&azv4)(cokPEIy{
zdqJR;a(6jR-j(cVU}a|*3_xpQ84z_9xp2hAZyp9&+pc3~;H_-pT{RPYv7Rdj$G2sX
z&j(pNIb8c4=U(BeI`gO-yYuB-1OT2&xOuP}>wxVM<yl+<ZBf@c++#KJDk{;FlP>qo
z%cbuu>y%`cxy4+MnCtN3+!I0By-o1vGn@JK^25EuGupeBn%5s@k9vi^2GAO=U2<7m
zo{6$Tq#rX5;Jki!rHnIvf^H2`)}@~T#}h&)r5R5@<leC&Yv8Hi=<t_u!ZfzFHlPT9
z_xW=aLMZqqcoMY#Sc@clSCz?FQFmsRckK&(WqoV(F$62|dUW?XB60hWdXAo+-u+3`
z{R74OHZX*MUO;n&stOy`>j(9uZMnCX7ZeTe_qraLzhAJ-yY+wMO2X!6dXZ;jZl#J{
zt@~{Fnw>sG<EGg*COX~o4h6gSYJcK5beQX2MxEORK6_K8h=iMK>-~0vcwSz<gcF&R
zn^4N(SD6O6BJBbPZ?W~OwdZna)Qh&B*cX+Q1-u{R2Q4ZqQb#;k`1;k}Pr(-mDygc_
z%$%A3-XZgv@MWa<MIMg9kuQ%*N)8(vQ&TPeZoLu<{f%?)v&~#F*ZL4wL|Zog0k>0-
z`R;t-?QiV&LVMyq-nk3fvm=9ds;ai(Tty>u;zm$#^idbZoJRM9eLH{eDM{kczh`$M
zId=WzasJ`(=z9(azkJkBF0SVZVtUrID(H6A*Cq97V}F?Dt%tHS&u@(D3-v6yedcLV
zVEqx$aLy@rV@jI!q1UzXPdyu$cdUO=^~*H*h+y~5-6=bFF?Q;{*mX7KjpfS~)vbkW
z`^*Yz9J+2gMt3l_w-gYm6p&<<0bQDWE>IHyDNs^;cOHDCc4vcaOYGSPf<HM}1A5~`
z<aN{7ZJr2c{x*_6F+M&n<=Ur0Vn{Lrpl1fWgzDDenx2`N*$p0z&I^;guUedi|Hdxw
zdnwu2(n8iV<W6gWSR#XL_V+uef^d8TDWQ6f;vYBZLx4BL6r+d{XmGGl<HSceN%m-x
z+ZT3|E9u&c>?fdQ3jJYR{Kr*1e11~k^V{Ai>QGhDl!%Gi;!m1y)dzp=EZx;p%%d+p
z66Ek>$Z~H<?oQ4}+PZOXszv%?c*d%Q==l}INRAif23#`!;@(j~A%a`tv3U=m$YPkN
zPu3%vX%<I=n1lo~_W|=%Biz}*>z_8GIKxOe1}XQD^!oNa5tpTp{PO>)QsC?=vy%Qf
zuGrRw?`aSB#q|P4Y>X#igeC_u0;IR1#D9450f2W%H6U*lfi(4QUpK&{R<*x8#KmHf
z5A=}eYthaTcoD}7niL!zg!e;Rfbt>3DgGJtj;PHoosu0TKGM-oez|vTm5ys@*BJg0
z;xKNnf;uZRllRi<@6nGXcI%LavbLz}fZC&OXH=2olmh#~nQXj8kvC_oFTP(mYv-o5
zfEx!0xdR+;{nI&L74!qwy2kVDs+vB=%7GZE&#Ce3Z}FX#GgiFuKYfqO<I9z=#cQV)
z!W5bGevBZxi63Pd49^V&H>qXZ{w}0|Y#OW~ze%m!c$w=od^@ghDvyU78m%X6YszA~
zcD+LTgZRu4LT;hJ3<W~q?JbM#hdHAgpY*A1+;`^%)#mZ?K;^02vN8^cd|$wq4@iTg
z0W{i@xe|kLY7ra-r*FaYx+?qJfsq+sD5l&|WDUZ(`cD>zqlpS_!m%RDSMcgTbn4m-
zmllRd(cmhrM_b8&)sDiAbbTOJj>9|!aC_o{@_Bzyqfj+sEYz7wge2f{Bh~BCN}}>Y
zDUHUS{2j=4Nbd$T9M=#fB_#|9)(`?s!`jER0hyqsAWwueQ$#leS_=Pe{t&Qld%#m5
zf?2IwlYbh$-O6826ZJ}SgNp=sNX=W%iPU=V1wCmnu?R$sGD9J0LsmgB?f?R~pzEx|
z`pi*!r&nVnKWrz_=o_vwaATc2(?u0tx{zAq(Q=9t7y~pt?;ktt22yEuZ?kLF{1z-E
zIoCe83qql^w1IG&wEHMar=ep#0~y>I?|W<FotPYbaubu483eOp-kg7OG6ul#IlaMO
zVNkh&2g2V9L57ZVfAu!pl3^^{2<!)$_yG9rD}OJOjtO!Crix#tK#<V#9W{8re(acZ
zC;L%4>mAl3e#QGl<HO7wD-82!R5(3vC}#;w9ZD)4quHUR$HO<~yj|+%M&X-DCr46w
zDc-WYsBH+afm0lp8ym=71>b4D&O(bQM7)zn0SFc__kNr_nhabeqS}QzZP>PO?dqUP
z#8RS$VfPm^+C1Dk3YV7^RNu&QFuxd`HNJSGCv4|_$^}$$zABjyrxL1)u_$o45x@t_
z5LNMuk$l|t*pRn?8el8p1iK2fPFNj!f51La<C2;Ig$!s-9u;3!^lw<E!TnPW1g^kK
zBw0pe|KbThv}s^DE|L6@k73eMHrYILGThoF%j56mFRwQfSR0rM>iIhOG6UbfHJm`(
zjK0gYW<?H4yf6s%Vn6*&>}3q+Y1-yr$A1BHwTbYZeAtUWg+NS^3?{Mb21NNs=0G8X
z;3W%tmfoo{?bnjpyu+{M<KM0a83!aA_$oS>IO!y)dzIdj#Q-?lMDdgm8-h31Ey6Zm
z3>zQ9IS5RFCJUE-Jz|*fa!GrOPIIESVk`Xl>Nw&*n)9DOBDM)!fP()o39#Sz7awK@
zPJbY8??zEGzJOq#NL3^(D>;BM6Hx<NGz7W?!?MQ+yz3u}=lO=Nk&Ctbj(dh_gk{{E
zr%;dLwjJE;zwd(+1M$86sAj6&2?B&lr$)z~np8L<ex0Oz>|0&5AzdX9O~O|bEL)sA
zB_$>2$&<|lfyEbsS=ykLUWzM*()<NSFH9cUty!pLQ0SqHhc=*%w(Bn;iqSrhqeMwr
z8N=w#;6~#`kX0=d<3yj1=oTP|4^Nk-s^5P?q^giE5-lyNfVCNhVThF*1V-+Q5GYa%
zk@KDaPtb+Gs5e#l!&vTm|1y`8gr5p89Dy)0(E+kqw=-;ix;enLMTDt^;|@wnPQKQ?
zw@gm>b{6{yHmdcX1Z+~c4-y|TIek9XTv0=(fkFa|U-WQ8oDHH2$W?~DhjyGi^3(Xr
zDCr0yLlidvGHP8)|D8|p^&`(>`}~RW0KNJvD&x%Z-&|Dgq?op%v6!U~1(cE}YAHZ3
zaqSF_S0aVHs!1K!K=jQ7NdYa7^6VSPOw|9_7^dPKcUu&X((Ig>Ztw^MP6b5d$e<rO
zx|BUuWa-^HEcOoIGVnm4jK&Oy-**;B+OiGSjn2NBxQCO|qT)iWb#XTK6w&e%eLILT
zWG66zS-aZikeipImztoL^8^5<@g{7$kB`o7fOm98?eDA_Tqfl1fOO*_!gaXvegi=;
z(Q*SivUcs-haO)HNPdUC17)vP`0N?L?1U&s*yx6P9R~DyI5G%X1Qix8Vqmd3#)3cw
z1mc-sT}LxOq{c)J4cIX$CB+6WiIj_a0%6XHGwO7Y^k~bwo$s8d4FteLRPZEVB!~d6
zB&Zsw9hjUD9R5fuB4{VHPr)FqNe^9xCn&XzZ?BG&VDu;zzwutQhi|JIO?ZKd;RM}&
zWm(F6_py&>@mZ|?yiWl*2$qQ@u0iXslY;|j@1W){npdu<kwXMmpVH+rx25EJrn+24
zIL*E3C{I0N*r*764^e+QIQ9u%yjBt%1f&%>R|sm?TVU+*(}B6kyo7^`6))-zW3b26
zwsq*p@SZ+hHsJzpms@w^jU;oOwfjlI*>cLNsPUbMWG#WUy1CN2g^A(u*Exi99(<W$
zEl)XU#-RIHin%97Dbo;j6F`su9^ZM?SAv%d6&VX<taIyvl5NkY9~ZC!Zg^h(Fj|e8
z=Q=kd0|R+VI5EZLFQCrBRsaNGsb<;Ov*4?OM;4i?`pu)<4TmBDAt9*yaI3Y-5U4TK
zav|6x1audO4}ThQ|LB$|yQFBn)r^dcLywZnoG>2u1&E(mVg!NV>?I=cVAhC)2AxPt
zKnCITAu)HjMQN@Z(8o2f|6+#bixgNeRserkk69J~dO_Ji7FRnwGXQVQeoISUG^02a
z+TOjRs^~S`uuH@<`F_<%xy?K$rH?uOcnPD_dq~LG@E(ySsR;seSIGMhALxJvA#X`%
zeW2dss*x3)N!r?h49_Qo*g>FC?da?+AbeoxaU?O(O{VV<ylya->Scby=JsfWj?!Xa
z#h*C%wCcY%P8|1OoiFzsh3RQqCqr7xH8fneVM<$jHTZS+);*ReKST%M6GLXM_+NHa
z;(Wup1@1w<6?zUB#fYE$<g@U`lewn1yT-4gyZ|B|2j+!bS+EP?vL@*tQ0QxBwL;Yj
z2?V-tw9tTX2}E0Pt28f<9)bt#J=5x}HHPleUDjjknph9W3FIx`w#$J3jH}=;VkJYH
zrUB}pEA9sjaCi86wCe4+5{W3>-K6Bge#eojfkz5G3<Jsv_<HDyYF3p{+TwVMgE9)f
zMw1hn3WvjwM1RXg)dC+9E*GE==u`2i@csg!cWoGk7q9^q1XAabXNf)mHoh@0%>q-A
z%_EA}-MZ2mvkM`}AWB7CctGe0NsrfvW||yv@S0avR^F9%Qu!&}^}KcELF=fNm6esL
zVy3O0C17PJRv>pWyphS+6TcxMaTWU+H%Fj4U-k)0+sgRj%|8NkKA)lna67ntb#Oc3
zrip(Eh()$W0*ZGa<!c}5KR8}YWQDNm%)R-5=Z$kx5SX52k^GzAY~5kzE7H6tLYGB&
zSF-*mOmX?{U|SI6dxg7vdVJnb1?k6llfW<GW?HPMSyG0;6%-a(&;Y}fGF(hIN$cAg
zZq^=)aki24&9_{H=JPy$_sI=^!C+{C5&)G$44J2=-==c48N51}w^2EnKW<%f$-lbU
zzyLNY$H%ZQKoJKKF=WNWW&>lR&B-Iz%^zW*sV&xS0RB*rU}0%VG*5t2=t6wHyPT8Z
zLW3x73S|P0Txt~4*B}$AKUHqd0u6UaZD3Kr4<|rJrhNDcgwO$p2H$Ta7<K?}WOw5^
zy#$H|ti$YH-itj{O0lF_!iL3_f)fQb6~q!s)nFO7g$N1%C2^d)QZx%waC?)SmpNXb
zz?GFeSZ<a@DR&n3-~s`WfHECG8uWCc*tpQ$5RXUIRKNzV7?BBQw%s$ZPSOiF$I5<f
z^j+*Mt)_>cg(@eS4iKOwoIr2<(OKX`BeAz$lgJcZZs}i!e4n_2LERiae3;cteAiDU
z^ZhQ5nOT3TCY;e^asHYBTOBA*l{+OuTcS|(0u#GN%r5!uL1#F~kCI_Cymfd0uOtKl
z#PO)&q@Mt5cvSt83a<j^!`mfLfB}nyC=10aqnTAdmsZGF^zCs@-nwsFcBP4*e7spo
zQoA`fyrB^pd3t+;HE$`)E?qYeJ}T2rFR#3ZD|e+`qgd21wTZ*3lGgUmi<bW?J-)0Z
zjMjm}ZZp0r#0g8>n((Nld7Xa^+=c`f5p%;A4{S}eaz|`9rKanhc!c~3zu9+b_7KdH
z#=}<|ucFt;l|GO=v0s{YVWL}$R+cdG>FGS;p7{Hh7ApGlu8rzdBWehXUiUgJUKd$#
zC|R)~O@2oxMBslRlXaoKc!|RE6Rj%m1{iw66B5|4Qt_fMjUBMMtt*1HvDMO-+M}EY
zl`rnBSE#XI3n?g$IMo-zy^GgK#8Rm<+u`T#my)=;Uy?82@Z=<TR#FKAI79J`&Gs*D
zZKG^SOVd-?_igH{W++<mV}oAD;*LL#Og7chFUtMDS#nJrkB^y7P92G-e(@s*of!N#
zKp+|}`2Z#I9f}aO?n_e;ZL}-}NCZteZmLivNi>GI9B&+S0ZfaM5UEi-I6v_Ql`>dB
zg(%_j+sx}fA%!oc)BqvN465muE~O!MC-A7Ss{T)(1<7`;gUk>Ta)QlwGveEbi9IJa
zP<$KoBIx<j7!)?<EFbg;)aG6nG3lheC49^BRxpvURMo6}uDN>h0q>J)pkE{=PiiT;
zQW+gAK07qajbDCBhCDndlEe89&w@8DOsSS=;+QvV&=SA?-aE#L5wHUBdP;JHLIPWo
zp_@~S?w=Xn;D%?lr=ODyH3TL=O@<d-IsIZCaAq96qyWQi+NM16`%Xyuraf&XUV+Nb
z7RiSO6pAnQ>{mg@IcRP!jVkxwn%eQRbY>#WcHe&czwpNm!(p{wUtj;j_}8yrty?oy
zy?SGoS-Neqe23Qjw+<#GR+L9x0p#2vQW+Q(LXYO~op2;%Gjw13uNMe#I^cRGOe?Oh
zgCd|V5V?Qb@F~>fFh0V7sE#~EF!iQSQN;Pe%5=+A{N<mZ1Ge@fzAUWoyKE)aU43C3
zz}{_CNgEm#7AAU#qSFoCya|6DwI;LpjB`T#I<=RQlBoyh{7_uzrcn+tgclyp`xL&n
z^Q-o4gFxi~D3ke9nhV$oRp+D~;v9U0-@IX%iof+fj>cfUKPlX@Sj>)`j4o<TUp&5V
z8eB__HKCNTd&1A0I$3Nx7|riK0u8jkV{pzI<l&qp%vD$W`23s&RromyaA;tvIJ?fg
zV1VsB#!vTAw~F{VV}bbSKbeQca=ojYyf?3<j}a5l<otb5VoSSIgLI_f(O-v4>R#uD
zhvq(Ph-5fgU@BuG*BJnS0bXXRo(z9m)tRLg9KSNV)XcW)>K=F{eSxA>+}6od!W-Su
z*;uSzlBUNEDXtB_$>BJiT#bIT#bGx8@y&-M<~@D7#V|in=`Cz>59WE+bPFcz7^XV?
z;SX(3i-<th{!?K&Ra3`p`1>x*|N3?N&Yf$zuDG#?c+GAxH^ia-l|w5<?{9rYgXl|J
zPL6Qtrd`F)_Xh9{%fZ41&CY34E#4+BDBonB^RbqBUZaWU4>wM`l=bGS_Fi>-0q`mi
z00iAIJc4}**gX8zkW^9Y`FrNO!?13huB-DgMLmoSy|m>J1B@T|iXWb9ebO5d6%cjL
zu@k(qC$Z@T%}Fq&KWM2Qf4JU1MgG>(R`~<*3Y4Z#hteOmN$<9y*UF1L{?jE_0tNTC
z1;KerqnuG4HQR$-+OZ-lcTEbub^F(S+kbhxzI3<P%+n}nqS%U}X?rpfZR12ah1%-*
zQ`tSrRh*eUYeZR@?%wA(R5S1*<CHmPV&iAq4D6b@Qd{1)n-u-;GgE9A+8zEV20ok@
z|1_$-Czk%iI%Wnfq>F78829vcyS)Uy@va|NkaC><((&ZHagGIf^Ikp=77uUV!dkyO
zit4u?9Mx8f`=A`YUeu)Ft8`y__!Czg0N8OvJ!4Fp8h+%%xhJ~bb8KavY8=e(4cKkX
zUzO-nG5J4raI)uf%YP$1#$C9V0wLeGPn!xJ#gzKc4leDN3G=nha(mPf@m9N2Ge}*%
zwQi7M{r@BtFjhORKQqJx?2th7@o{#?QL1L%k~A(W=q?C;yk}b~*Ad4$kzmdLGIw?=
z^N#oCuX8cY?VpNE)rl92Z)J1pG3?xH^mwAyRx`BLx4y@By!Lo&`hS<>yaI(jca5DJ
zKVH8zUT>2<Rk%v7%;tl0C(-HeM*)2_7>+rFMDTtM6k-)lPpUDPQ>834T;&)o%i7;3
zpM0xnboMHzHGAq5m|&%hSbHAI`=Ak}vET`t_#YzYKcQ2GFcha*rmFwY>uSE(>Bjrb
z5lXrO88V629c8Xh$EJLvxFdJ9UgkfYz1cC0oTW2wl?!Qeb905hDhukaFa50g)j#H`
zh<lA%=QC;JRK-kLr~6aUU0ppB_5e{JP4vzXUKrpidav={m)?lR$zjgQ1pdvbdz*f>
z**4nP^eS5{@JYWo7+>5}7WuSLJJU{%cceU`GVyLEdrR@_tK;vuj5@Pi{D%*6u|My<
z|HbsPPJM;t{V9sX@^QDAr3gyns$$1*WW|-}(#dO@{9JCj>B`T0PdA}-f=&l@5zv{#
zqTBz!nQyOcj-amLwKHz;jYEBg=JnDKC#xv&Kr9g7s=g#$Q_3NM&1u~7CCb}kF|z2$
z*Jb^=YHhKb|FM~mh2m;56$dxD%bx4ULDB2OmrC@WY!ui=Ssl^+l&ckVgNfO5VIs|8
z%8HLQcSUkEg3Da*_hxH~=)9jzZ&YO*#&ZqU9t~_%j;*}>SXS+1sI!2sAIKhXQnU;r
zOFWx*=k^MH-);P=54v@@IB>#|fGHGZg9}S-KpOJf*IYM{6=#$0dzw(PuIFHeSFNnq
zp<jLm(O>qq+?D2}sP49KauxWwwYg;Oe*ReHl|zhW<KnCqYT?WK<kN&ZSX#XrD{P%b
zFr+dz)AZ@9(b+>{Y3a&ci!<BantpR=N)WfWo@OD{F3k(aV$EK=Sjv+<*U$4B{pF+V
zmHS=1m&=7B(!CWphFddR!Xs(@)BkiJpFiK~I0xm<|8eTAw(47~UFw+J>dhu@|4$3h
z@LV=oBPjO}1UBssA0u9XJ#XXY<P16}l134LF%1Ae7*Cb&>G7tZh3Tp+jn-R_!Keih
zH|pDG#+TbbE6W`6yD#%8mASbD-3_j6M|oDmO;ER^d`7=;(@Z4;3@_R)VqJmPuYD}{
zi}oFeU!M=Uy_1<Zp4*&#K;W^+`|W6N(GaekwXul07b==l7ay=AnO4XwBG4i7%6+!u
zGr#DYtod78c6Hw{6kNB%?&Z~SD*h*#>{TKKUknWdptb;#odanPgm*w!4wXGXJ;fU*
zD&g-{S<Rn!pG`KKfri?Tg{G-dpDx*M|27Z(Pc+d}10D)IkA+Sm^W_0|K#^8SJQ^&=
zg1e@WmZQn~7IfDJX$(Yra}&{8+yIy6{osLm@m5a2>8uPRlbcVBf%ji~JG(rcKJRjK
zQ0!<I-2c`C8p_WX%^MCs!+}l#noOxLLB~;KpoW6O36Nb2$MB>rihMlZXsGfETwa&4
zDvI7j&3qyz>FR+zq~1V=b<$97^XQnc=M@3Z=u#pl!OlP!3a+SJiEvMq6|LPV5Q!m}
z1;EF&jVNKB@e$^h+mEl*?aF)Qoy5BBpg{aPDx1kZrK$%sc$X?xRAo<?BQ{>u=4<fa
z#rT))=op9q+{MM^b+j5SEiGC#bl+)|Bc$%fmtOqgdSzXP;eMx?J8aOl0=>U9o7R6&
z+(eEOh3kc>z8D0ga^ehA`mZ{ib+~Wq%x_$NE`5~|FbuxqdfLAEfmsytVwng9V323<
zu~DHLB^BaZh7e9$3-i)e7!z66UbTLoo-|A(OQ0A{O-<oO6#71j`meCyUkFX^#A@9@
z#@Ysff+zbrd4x69q+*4YvTQPDYGo^RnyDg=ZddqO5n&tXc3>Y4oSVPd9yps;UO6yQ
z9WVS)ui`}Vp)%Kq;FhEJn`$2cvR(5eE(%xb=O1^xW8mO9Z*~9df?pM3j=(FlIYfVE
z<zS+FVc;#I)Bs%m9r!*pSx7VAu!5L>@k+}*fQf<U&<?z<h*EJv24nVv2QiuSNeFp)
zw^DN#w;B-;BLZ{}>i>3YJ^ralL~?Ta7=41@2X^rA>gwvdqnl=Wgb{=TF9!NR;9Ep3
zVJd#~%{MQXPT>NzEO8Xm$SiG3YOX-D3?(m_!U2M2>%vquesp_CSd*e%0dZx*oD?g*
zAv(<L9_cy2Nzp#)PN!&%MlrvXB&dUv(@PAIQ|xHr-0Jew<8dq789?RW0(5X$;UqiK
zQO9WnPp7}XGB*8TGc&od1rogR(hL~*Bg5N>vI@emyFmy7uQu5llreugexBpNU!r*h
zK?GC}Xt?zGxjSQY-D=4Eh20+z?3TU9ucayKzivq^=VfL<?!V(T@&7U5x_UB_5!pg7
zx_mdjfHsY=ha|Roy<t8(^bMMJr+^DGFfzhW9}CTk-y5#PnG#??J=Fo+fQYqV_d`~e
z!R@C=Y5<&zaDZZvVtC{iOmY^1490?{=TUH@I&tFfcK@@y+mBwH`rDei*WbQmkFAl&
znl<`-XZmSF1FR&%xA-^}KkoeRiQngYPfLG2eR9%&#nIwF?^=2toh{VH113RiwrfJ^
zH2Ku|mgG*@tDb#3wPAAB*IqjD4IB!Cl~#K!Z`!_19ZjE2AN@Vq@B3~354TyQ&TSv2
zV4_R|^B|zC%neX@+qCgZ^o0l!CYt`(?P{@gb=ruQpgv|6IIKVfdFZFm=+xtefo3}f
z62NpN@3m{ztXbJN2|5J?i=dp`76cN9oqD+q!MSkHtFdc&XS32#Z;8q~w$TnC0#q4Z
zy<=~w^nY!2{~Efd%&ElStwz#1pB>*OrB%a&YPruwC<B3qdJSD3oksSdecasKz>`o@
z-KG8yhbNFo;Kh%da=M*$f!3i0G0dO!>vu~j7bx7-1BV_=Oo^WFadeA0%qaSwk`A#A
zWX(^35DOA}7<3%}(hr&PLC}lqo#^A{zO`X=;EiMbuiSZn`>*V_61;or<ViPh#OO7&
zL3rKkcmaeTB2y?5bo~630QlBI&UN4bSdX>;hd{P5i-Dj(<&ahFbN8hrGXkaq!Eq0R
zxzKcWmeADHq!Gxcyvaj%b^7~<0&tX;aifp{zk)6&2GOA0=)ACi6W)w4z{v&Lk104X
zYh~hWcx2=Y&_YzclMAEY!&#DU#gaHyJZMJkzRh&hVCBfPpQxyp+kAS01_n+S#cfyg
z5Spx7uq1IiGD{WmvyJdP6&A99n=S3Vz&$iHbZH+Pj#_Y!5sCx!1^<p6Yiet2<>!|#
zDDV)+3;1Eg1BXJ`G`~Rc3;i}U$-`G|Kd-%i|1jj&uP{kk)XU3@Pf3aE+!Lk4THBIv
z*%FgBKR-VZD6JYi@6ne)CNR39?fkU|=DQn^Fi$8vMC|hI+O^)Vs`{85v+LXjJUawI
zue-85yq##c%gV|E8BVXaH(!W@XniyCm7aqGP*IAkla_6oW?RtbSBe<-+oh;3gn#~U
zBsL*P0jps^fdx-brPYV~79oa-%NRs=9mXiVYik=Ubw{X;04xk-X11hFeRQCLH~<C`
z3ym*i19S{9$2z?^j^+|QJ-z3M?7xa%+UG|56iqu=mW}M}kO3`Hoo&s${=Sb94qZNI
z6$q9Dl*8J_X31aAl57TKo-#&6M0`Z52t;Cx>9pENB*U*ItOd?1=>NdoUduw{5V6yc
zTpMuKmv;sUFwsEm3)fgUiSXLDukXibxL}wq32Cc)^@?61_~!lDTrY|m^1w*cC>J4<
zdSl}hmh~J3lkEa_*VsMkJ}sS^zKL0*tdAopN&Wl8gx6=<I$7D?f5cmP3)Re>J&_&f
zbg-wPDD=$?LqZ=6-C)v?b9VUnalPivWE<lP>R4L~tY;+yWpH1v!P@~=>~}tiuj|`Q
zYE2$NXlXD?Apfo{CZQZTGMp6j6&P<%Pc*`bT^3d+iSU9e2G{^zh+h@d#q@YBR6T_k
zg?2_L7*@V=DJ6s<)}cx(AiLjQDpGeUlGpI-ip{7LAD6&y4Mwb2KejpoDn92$C)dqO
zP+(6+as&@l)-MR%_fVP_yQB<69kHSn83;n^fMv=rBeRLnJ2<i?kL7gUiUs(Np-+Um
zLf$kNWQaLRK`_=e*?OQN8<)J8LK6Wn^yeh<-ak5BJO(r$GQ{KH=uM6s2~aW<Nj!u9
z4pDal5NY?hZ^ktZcuh#lkVM2eRF?Hc6B`hsNG6#B!;ip`K7Y<!i=eS&6@c^HzKOwH
zAf=okf+=|57=s0f6*4^i47BhHmlh=<SY=Fqb2*2vRva`+9WFzBsW#VPT!N2Bjg0JE
z=KWL&_5SdoB#ij9cro+-8|KaaQU#WkDUf8Re=BIRSDA0VYE`rH3f777=al>b!Q#uP
z1fdVY$%&u<)-78YfFy%#sjI6~NU`oMb<HBDhk5lSnCh($f0#+6P`+KE(;#N)=Oy=#
z$p*X1!TOHN(J{cn`uO0*Os?d>DJMlApTER@(AU?uGbrs02MWZ{t5@lJ)E!GdgUw)j
zXvz1l`oW8;v|5r67mQn?xsns4U!E8gShN0|zO8If`pYgw!*kV<BuWCMB!iuH@+)f%
zC#UhiiDu1CRv(7qAZZt{<`fFWrP&nz9<28FSs2UIBEY0=iVSwF4jQ*!x1qRO71h;!
zW0tSKc+k<)drQeM1*;22HYr0oha#pb^KWLSO9_il*Wyt`0_L>6Te84Hb>D>x19=R+
z_QqrvJ$lqWb`t&nf+kR=eTrD5P;EfDaOTVz8QMArncy=VfUo>5EVBy>-`f1<gaR<F
zMfZTr#m}CwA>S%4j_;&k$$X&JOjlk_y6A|44Ej436_cr(3bis^hf=bsafXA!{}%{2
z?(JoN>Qq|dnf)k)cc=KPy5P=*7z-kzckd31FX38kkG_fjNY#~ApZKg5!7%_2@rgiN
z5lQH|C$#(0iqt@`-YqH$>o~XmMq+r9!E8Q{4Mgg&f2h|>eLy{o;&u`NXKV1cK8%fy
z{YLak{=piceSyt^K?y#1l(>WZ>^AwJm-t}s+5n1*HhEBAdhER1=R;GJqvF?B3Uhu3
zgZ622@LKAlSjw)>g|7*{yl)fV7u6-tIC3>OI#M^dgW$JZkcGd=>&JMD|DlqmDvPD?
z%1`}%?dqB$iTn~0(cd@WL%7h=n#UQ7ZF=mpsf`o-z^<8lhsDz8Zr2u{)5kf>Yyv#Y
zu`Bat(}mK9i6WlhimM{;CN|k((@nKeec4b){T|gBWmbxr7GmP;?Ci^%3!A*3l$J7@
zck{Oj;+z}}uSTH{FX|Jk=3olbOlT-w&AH3R%*<Xm%e6oI17H@9avygTy@on$Cs@E4
zo4AYHoF((}PdzY$(*l2NVd3yI9L$LPMYO=DSH{o<V_we3oa|32sErUElxNcYfL8ln
z$Dh1!e(RbxP3J9K-P!l+0e@><N9wmI!F&h|@sG<RIov4s&F+1EFCp|I+3tj2jenZ^
zW-cy+^+Ub2lF^@ABQm6_S=rf#XQfm%UhkvsmtRW8S6#n8ugYRMh=VgcWbI;gb$R~T
zqQhd9yZyiXv8YaZ=&qAzVk}6<c~4Z-1Wznw-YU=Jx9nuvn$&}GrzbZyxwxD=_iSu^
z-6U2?ctk`H^1PZ$uv~BepFWY?S?FWYtUsXP>#Oki<oPTaO~z{Z$pzg$;ecS6^ow}-
zU|ojQ%~Cndb(36L%y%XErDc(&cc<M0DHA4LGYJx>1*R4?v*%`OS=t!&vkk9|QPuta
ze(zG=VX;4XC!edoOJfi~=|L5koy~^?r$1kwi=<9VG(Wo@!PxMhFe(8lI#7y+Pey~L
z+C_TLUq&3O)s+?5r>_06s5F0{m*Ee_OdUGHH-l>ZV{!TAb<7nc!cO8b6@KRTC_b?m
z<X7}|HC{+q&XyMB2I#;sMt%SiB~5I+vgtN?#NgnBbg&MduPW(3)Xzay5{V9y;p)zx
zxXDc~MM2+t2EIEaaF-ZGz9ZHN{F?8}5iv13sDKa@KHM&w+G3AcrScHXgeKa=DI5h+
ziuy<9G_!UlwjI&jz-J$fBx;Zh8*WH!e|~ZO3Fz0zXU7o@m;p-4VMLri2TANeXLv=O
zt1pH}k&P(zF!Aiqc^=ff0}5}am|0!<Q&|gzx6S!gb-i9j*%t+yzk;H5gk~C|IEa9t
zxVSiq4Ke%B#!8)eL5I*a`me#)SnINx%;mQ@%5Z(^$8@#dn{J8K+e+7Bogb7>w!@8i
z*Vu*6&Va&4V{%jS-4}l$1{UJ`L6XBmJdK;wqMY8<VYcNWqM|nNQ{32lmIcFC@z~*7
zoW>x7Avkp{{1`yP;Z0g2AxlqRKL`c`6nW|k-7pdo4H~?tx9b(Ic*8AE;w{l>ox$a#
zb*E_xb#C#+S$ojjl<)5H<I5sd`H*ZCWAcke8&f5sB#Ns4{CDc&o9!zv?H?(QjjCQh
z#q}Cb?V-eh`C9!%k@;r-DHIZIZPW;UOtxED`?kUuq}?W~OEdHc0o7UhtZyFnZ7+O#
zm@iX{J2yT(ePVt_r4{AG^<NJ>Q)VRouOxH<ut1}sq47BNjL{R31$dH4+YdJYs=hH3
zxyG$}EA5a@1`jKM+AZudo<RY=h!o2R<hSSM<!L-HW4yoS87d{n5iR|H$bxXaWhCIG
zN|FPKj0z$K4kY}<u&fozwfq8h{rPO=vDTbt&$ys2Rl$HS5W#5qYS#8(wc}@#U_eaq
zGKyi<($sY5r`BM_17Bvlp|78UgL!vD;!l+jjfeLkG$%SXG!M~OH3oSe&vzWLUEhr+
z9ETPeZ#e`F;^I>OUlt8kN@61W^n|n(zg-xn7}g9DpEbI%B|DzngBDh5S|&7aJE(`6
z7qD{>_3brAt=X4_wxG0I0!3F&8}m)3cdLOMY)!L#t@q?TUs`mlbxm>TQ7-O%S1-I#
z0hd2fs=@8XCndEJuCNg6{;D?2)Ip{Y@v!1+L52DXyz6O6j?=JGY~K9kDPy?VJ^JR7
z$Cdl}h>r~OLAX(1p<k4d8Gt~u0m=AFyIA=txac6kT4Y4J#fFetR!NJq4~N_oh@b|Y
z7Wu;%R+(z`g~wG0ZyIqOF3sV8a;}i<UaTMSgv3NC$PCj?vIwlahoNX1p`{2L&UyIo
zp;f)+$i1ND%UiGui8EGHORJz-bTIm#EJzC$B+Dx+-ekVa{Eh?pqUEz+15OE}fpC%B
zy4(CYrcLartFIS8TLN9?>^Hh{1o@F!QCJZ460)%T6Fd8OQH}0}3vwbxnLRsmNaajO
z!g#^`43%}PQk5AB?lk@jlf=yHs>dm)!8(m#`T;x$Ens#i2(t2?G1Ai`L2v*$ml{ws
z;^$1vJVJp?49CDT5U#?YZJJ?_rwQU4EI65k1y`2;kGlen1-bnE1!t!=Xn*uRf11ZZ
zOT!9)=``HUxA6fXAfX-V_6ttB@draLDe39a7zP!LfpyH+sNHMyvxtvn2%w6Fwe{_;
z?xXTMM|x-GRw*?bY_FPVYThQ`$O}x*x{7EF%j*X#IQi^~h}iF(f=uc`naEEWw7Gk_
z5$G7ZP5By99s?UIAd^J_kAt6hrx5xa7+W`q>gXC|t~oF^KuMHP{PQoABw#d&&I_C5
z8d|)2HDMnFCwanGzio#?+f;44ob$&+RxB}4JRl%$8eJv7ynNTVNufMDIk*tN3&R(-
zxkOV(FK8pyO`Ad?{v-7>89+f}&zQi&$A*98nNUpOoCWoZH%b+_zqSCM0(}bEr?PB}
zG$z-`;l?5FlHUKK5+N2Cw?(3>sx13OX|mA;BWMKleK2ZL?jfo{1s3Q-u~#*-EK+XZ
zXh;8_4x=q{mS-K7F&DCSX1F0fA%W(Bneu5m7sQ5OvMI3xk_!D3wE$|+!(ymJ{WFx_
zmPn{>JtL{8jx7v1G|RyZC2w`rQ%0fJuH8lx1hEWVgFBAdTVDbgXwXvf^HW1*O2p&1
zlG^*VxZXh^@u}w~-{mWfOJDxP_JuQVU9%=4s1?4`LrV*K3>lf22|!Yj5P7N|a~iM>
z8sIVN_bHsFn6!&94XsO;E~z(vm0}LLaf5^8hvTJ%d?HoVH6*Q~Bj6Jgb9ml-Rz3me
z10LRKd`sFsYE3`;@^R{+@rt8p$e{!Sj!TF4H6h6y^6}$wo_&{ysVk9(&jtP%BD@AL
zcMGInc{?r6UQA{q`JafJ7XQB!nSmQ+267vBzLaoOy4P{8S1x;=QG=CK>t^?GZ}~l$
zSsNS(-80x01Z(e(JjI7v7DqTzF~^F{kCPKc1Ne#T+l9^5DjQ;laa_1GmR-8~#?gw)
zC+a}$+!mi5t=2DV)t?=k9Jt6!u%UY$C{e4<Q7+}+KLW8q;6QHc6+X`D#%GR?y8rl#
z(efH`^4ey39P@}+zMS291kbfPA38Cv82jg(UUfEW=1%Io{n2_6yxtC4gSW$u83<p4
z_7#;eU2_Swx6G5rkNFf8Q<DYPZ>l>i`*GoP*h^|yEwCyEY^PR3>Gbh!aG=dCxcP;i
zxe?e@kUI!dAZq6X9--i*ut&!wR#cOy5uZxi;Qsjjpn9#h#k!I7$(DTwC)80YP_Hk+
z1OC6|oqF(VVgkS>*>Hxn<>tH(8!deD!f4i%uX3;dZOInD|MKYiYVrLe7k!tu;rUB6
z|FIQXFf(4*Uo1E}=WQ6jbRe^+#$b&USDEfZT8_gn?2K5yL?=D}7Ik*BSfeUqbt^BQ
z=ljpYL6b$bX{}y96Y5R6oXozjsoVqV3kig#SI-A@kE&nUWW~bY|KzMi*LK%VUgU@(
zF17SC?l!C8pE6sZ^TQLU=$)KZ5}RB<a?^2?1-%>;M#x8kpV<Zo8}-iXJCaptp14NF
zv}nCjnD~#~H%e{x!ozpR0v!L)6kCo)s{LV9%u<`G-l=70NRd6yN3XSBrg2u(?}+Q%
zr26D_1an66RjsFZ>lB!x&w%81>B(ib6+w!B#0XV9`o=Pi@ax-L$JR0G@g2EvBH7!C
zXC2*YPmwx*^=_Z8PK%b~R+A5kHtO4&Pgdv1>)Kq2n=PFw{OHzqs#f>XZmAHB9fFkd
z$p@OUhk5v(FRYkXrZ$Y6U}v=SnY}R(7u@~1<eWUaYL%L;<uheNg61t=&Y0G*+&r}s
z%vbiHVQROa=JLX1v^uQB<V?LGW#iD@-KTjwtrV3bP*mLP@6T%8Rrvh!NmoKeYb34e
z9~zHIbtUyb27A8ksiPL}Q_oCOJ`wt6zSb~%zNtF&jeeI8Dy9&gVJG#Rir?y&dDm(v
zTX$RD_Ful}Q|B2zu*36Z#^sJ)OGjLwKqxRF8Ko4xgdpV%2PDozK(1~`o}oc&b18Y_
za<5g>-wG{{K=cuMJ$&KMN1~{Hd`f>>x+w76U(4!j;9QE^E*EC=W5>b(bp@{dPiXe4
z%tN}u1s{y+WdN2r(7wWYHqcjjH`~=;z}E*cNayhfs*6umT942-54sW4Hh778+!II<
zs^~F_9j0Vt#9+SppB}n)E5`7$HP6%<e0+A#OfjyrW`u6%AE!dc$mkQ1T>SY5hwQmc
z&=n(BaOq30+bGpcz3+<>3E&W9BBM1V<K(OC5>`j2L(<eSc|8o1go)W>-?||!ozzO>
zGN)j}%X?NUb|xh1Yib6ocdl{mqJzW+15k0`kqHWj3<r&bn}BCmn)XO4w6UbLz@f{u
zZrw|i=>!=8*n7e0gQKf!1B~||2ymhDJRJ8zsxfqK1QM;RRLS~cI<(BHyjG<tXyo!Q
zyNB^b^(&d*S_kr~H6{EP=%8;&Yf<o-<Ux=jL5_e@@T;nB117Zp+&S?N=jO9hjxd}Y
zYoDARq2j+XJ9??_dyTe8)rB8i_zR?}07A?sE4w*S<d7y{Bh)gCR|N}{>r&6NWp4a@
zp|a&I6%{}o%%>%yOOg=)Sn=8Y{b;!t0nL-80Hqlz&2L~T5(*2X0rwOt+GbV3z>2yF
zUo$rlL0x6sTwHF559g8?IItyLjZ+7R1H3!*FD1jHqqQNxOKH_(Va4W;;0`U?C`hH^
za(s2@UTu}=HERZ@g7XKePr=!GTJl|qBu*?u+Uz$q-Sob_9f%?(Tw{Y=s;;T-QI_7#
zM$fTvV-SX4wtGS}O)|I8M}Y#$i&sFIhcQN=lxAjT!sFt6{um<NVtU#Q28$YjUCTi|
zRvx>88mZi4VtFJYIi~}f_-|)h0K39#)H`s%bLoEF`jM)<QclgGSyo2Af%34>o!TFZ
z?%xmXc81JyLVAnG2DIh77QT*isVBA679Q*qYn+`$`Q!cZh<&VA7K%U>yN$lD=k6ha
z9BY;2vS90=J-{f<$^%B_D4vMQmx+aiAbluFg8*O?$^e!FrZsD3Zb%6v?_HLfuK2t+
zf`9X3wM2rNf6~X=gSN^({$iY3*?xD*bM{E9GbFlKiiThM+QMVa!GIE)l!~~<V*pqz
z4=}0)qAWpOK~nfh*b;}@<;qh54=dh$RG)kjq|kr*4V8O}&X@6L>V;jWIL$>8Zy^E|
zGfsuyW~xR39zBhMp$zYK-}1B;z>v+ThW7jVmhGo~QaYO}PdAjFKwGD*GA1{f2?Vqj
z(6$?*(=Q=2an93viTa-$i<lk=NlKhFa2*k^izSAr5~LCI+mE43Zr%?QeLiK)e{Nqs
zb0Yay<Xng4h!LMNcsbIJ0|7XgDou~_dHg456SV-EfA^4S(d&EOaq<OufrgLWXVg4Z
zBqm)<{NW@|hGubJ<O-S^;`Ijl>3H7JeJ-lB^<&p4f93{^Gpo<lPy99oyue##poeGs
z|Fi&da^x%skBz+({1~+|XbSK(`1&CjJX*G5=wWFU^IcnUPL{^RSz-Fv^p^ea!$;Rj
zi7|_qXJ{4_6acW-*~<Sx)-T+4nX12LZ!q7xuhAp3Dy<>I3P9Vb+?3v)s;lRiIbPX<
zc(t!PoRIV#8X7tP1)<05k!f#3mZxX?v&x#O+<#EBeS4#Uy>aWRZw9OKp$BI7j{K!7
zIg8qM5Tb8l#Fum*(uN7*z2gQUJ4HxZi542JBUX2Jcho6(MF0FF=RSx5N^N_hf9o(9
zHuS{!9Oq7N;r41&+Rv;pl%;RR9pCZeg0gDL%fW07;X=E6m*U4Qa&n;-Q;3}v56mvI
zJ@4USHU7xoT5OWD&pr@;A7BJwHAs{z3NcvmHw-*`vx@d$2yr2pimPd0u$F4n+3sG)
zcdw~NAD9Pj{%CPeKccG?KUJr?u1PWcvcOyOvm6Y7g<cP+v@U&F7MS_~U}(|W>lcIT
zmp*`(#GC}nD4Es}^7ygLAD*G%VRBZJcwzh#^(a~8*vw6>%<XH~*v_s*ZOfbVqD6_2
zIhx@`FKq-?kJ@uJ$x%>x>9G5yU&2EHAu4-s=fqE3{<#k$?#`eH2ZaD5897(LT4adY
z#yR17!J$mXpPm7Ef)WocG*#;?S**Zoyp%H9eLgwM;q1U8$geIee7Yucbj<hFc;vXH
z(bwFv(JiTuGI=PygK^vRL=q7V;q9OPzrw-yoXF>Gg>7zC!dS7kTI0>klvUQk6F`V9
zqs!t|n@dz(N>m3A&|O!ct()hJ%#lR~-O-Ott+~#$oSxZPDMh26G3gU6dw(TSOW#Ox
zmF|k5W?Zxk_YCMn^^4@<#b2LYQ6x)Z-smURCy1mV+ZQCF6s{aJ*NIJ`So35c0uBcd
zapaD||GQ#l`N~FMp*nvy{JPQ1lJI52T(3It5%Eb$p@`W1Z8yg`ktggTWP8YSG@6Zj
zsmCsm`Hs&)v~{k0C2N|F+jB4wqY#c3_aC2R@nA?$8qhSXJpjx|KI8yX2)@Jp`-dH<
z>pwbL;d&*xOh_ENW0(<%s^w<e&BR1n+@U=^2NyQu+93`sWH{rBBM^~EfwqIg-S&%c
zWJChMKKa!K4H|wnMwQ+^5di>%ltP4CaOc6K7AbsvblOUGf#I}Nmoz5m_!-_z@-P%$
zn^YHgyKjAx%K%q~$|gP!L!X6kjzAxTFu?jr>`FLzP4B=8$3ap>Nx%;vSG@lZ4yiY9
z5}iAla1B_Cw0uxJ5ZW4w)yGes@Pms+ZUELv_S7d5#t)nUp^>C?pKOLBtjueSjf^}1
zaSIye-SY?VEkU4zfEH4*LJM?9I=5)S_90!G*V({%`Nh{5!3_fkS#i96-}r6q>=ZL>
zAW<NvZ=q$Cfm#iQo`4Dk9Ycb6!((DzA@u?+E*WBS2|OU0njqaA4b%a;alA3;FG2DG
z!d}L#J0vV?1CKoK+zYG;DXhf60JKX3*&!qq4p#*lF!XKHDsj-9986ar1Mx2Rrl=Pg
zX>HVCz5C|STsga_Arb1M2Ld6ih2jA(7b<y_fNotDKKvpgAPHE=$PbK0AGOWFi$e87
zxZ}IVIS!k{!7TxE$blEp{!ywurv>g3_&&D55w;PAA*_aTf_Uo45g@W~^t9konSmFQ
zaOtoO+i?u<rJyy|U_}K8UOW=-;Q!)`Z0f={!a<k6w4?j;GRVmMPt?W!zSeq5>QF_%
zzf4VqOD3-^mJY2v1`|PE9nB#{)6vmECL^HI#?jK78JDzW&DyoW-Od+&I+ufs!+L^H
zm~hJ9zB%AiC<WAmnt`zt-(JtqP#4f8RiU}&P`y+V-Dht4ZmLSzw;S_BDJ<)TA~p!s
zmGL^%+0nQN4T-6D`3t|Do}S~hnx0>rtk<EV>uL>exPMse?rFX)V#`eo(`T163v)_m
z8+t2wot=`(b}Rhu?$8j4?c2F?r<R@`6Sj&$KRXB?pwgN-ZDd3*6wbuly6^9@CjeZ2
z@Z@LTrq{w%O(r)YnP5r9(8semP4;=ad*K4keS&%;GwCJl0|6J_(+yQsrhaod#Q*n4
zp-dDmAiC+QXn$C>h~b=AFtJX6Dv<Fd5Ck(ozHtq4u-D|NWSq1r_fW(zEG8T~2gl$i
zZf%UYLiLG<@*(0f#=`uY{)2dDh!_yhi$P9=<e-xIG!KlZ_OWBTBnFMct3X0jq@<*7
zD0vgA{M@;7)9^iJ<LP3Fg~Ahrk+#NGR{uzP+-DY@x>tUQ&8)CByJUXb$;hPW!zcJV
z>^pByyQ=kxEx0B?1T;1`=d^WNE6U^3z(*X0b{Jy3Zs(7n%WvO?+7HzEoIkOA6Pg#r
zZ!p42knKY}$(VS0=I2NgfB|4bBs@qrmPdcD0dO){T&I!!OD1FC|3wM>5umBHwKWB3
zItob?bGKn>_v!WC3YS2@Wy_E;&Zw@<$Cf+P{NCqBXx)be3!fha?}T(1&U7;07rYPJ
zSq;|1Si(3J;IHKq7hell9t*z=RXPbILt9Hwa?*+@PgRCsI>;bS`29#+?^quk5+ddO
z-4w^I<ePck4chX=&;(aG(hH$rK{#|Q-VO28qfQ`+;v~-u08AFQ*?}ObK8?=HvBUpH
z@4#+*1JPo*+DIdTSJ#8F+#FKYbVO~v(MB#ULt3G6HXbLov$M9Fn<Rd3#xE0G_EE2B
z0HY%#69os!na1-6`5_IbJ!&<U%u&Z|6n!LG1K4;3`kUPRe1c#jV(*UAdiT!rSc^C`
zQIGG(JwVPLLTTeG=K0H&+gtt6S^X5~=kb_XpOLA-o!nGZ1MNm?uMo?XzS@VuU_?d7
z({KjhJ1*}AU~e;+fW8IVDbti$3xtD^nVvW-BRTil5Bw<Y*F}7KsSU<mZzT;Q6c7XZ
z)&6WGV|=mj-_>NHWgvrdNv#XyS)YY9f;yt(wN2Nghk3QhhHL!w^00!B@bBBl@t${-
zyDo88AFI%_vwZnxj0TG}#$Ckfh%<%Ot?>MV!(vc}bh#dSLR))!n~x7Jh(VN~rc^>>
zV`y`5_dPzl?swY9!%*?+-_i5%ke;kwasf+=3v`%_BEpnoN^<hoM+xj=Djnq_fBlY3
zYMDF-awg=$W7|84A!Bec8)Il1NkRbWfXf`q#&`TdDN&`Mb3E5LwfkrKumU(}6G2`L
z(7^~1hM<WhEJRm6j!Ske2p}Esb&x()ul{i&<Bh292F<|!LE_G#J^)pj>GypHZVi;&
z&{*Q)8yKklXYD4Lu;`w`!<n3W$~K~$m^3k}EcK6(_YjJ>eO;P+X@hI#UbrEUNCnxH
zgM(O1;Fe~4pt;!QhF|^*99LZ-Lv{fbqG2V^g?w&=Nv$Pra2Wqc1`QU&49Z9WK|xwD
zhlZ*ybvP<c^2TzUq9#qq4Mk582oEr8Jr47op-s~v#ip(82qh_R&5HYZ!~p`B1;3jw
z{D$i6s6N=g!13{It+PZia*>xDfPfWHCq184`4Qj+-a`O~1)d^MMZ%=S<F-e?6L_ZZ
zY}1ZSYx5NktXid4VZt9eTsQKkal!F4z?n(vT#Qw##c}HT;rz258|kQFeOdgL6E}_d
zO{}<9!If$O&L^{ikTb)GmnD8ZGFe6>JcO5IGeRw3_>8^j8U!u)7b}B^HprqyIOBFq
z1P<n7B0nGE5xqF-KxC~9V;UGOjxoe1N%_q=s4Pu9^6D+X9s}H5jG68daNnTp!r^^e
zm4aybPP;1$^V{YcZs``5SJ+<VAEVmn*}SVX8~YGTX3j^Y6-v9?u!b;uR;^ls6Ue;$
z$B~4md{}2h=tvMDL1d$mL<Z0a^S7^KC6H9i61(RAAL0oGJBE;Rj*X3#fw@6mfh+2A
zD)a@kZUMo#nPv$(zq;6<Ca`DE)U`hLHEUcQWp6&T{w+tfEj(s}hCu>b*oM;E6VBY0
ze(oEoni@ye4c*-nQ5&ZwYqG?CRWpdp5Cq6XRT!5gwt$u#n19H3ziUb`%WC7~koCLK
zVoy>}fbGw7Y#uHwD1#-Z;~YKScY!JT_m@OQhWs(=@j0$=n*<<u6t@i#20#;ZkTF1#
zK+#wLfQMZR0gcwE`4Mb6H&&Tx@H}$7>lh-fP>aFO$BFuYpNR%u2B@qadX5_UMFZD|
zkZIc<DpX=^S)A8^CsVilC#0JX7=o4x0}$|@mUOOdj|`B##wDbj@}Wc2kP%flh8n(C
z#29vjjHJKjKZM5zV=Pr+>u*4ra4npEpQ}%Zc1$C{kH>4gMnc+A{-ECdd66(Lx>(cO
zUZVT^Uls}EdkMBhRLGGBaP43ukg(YVusZ0*b9~9oUl6{#Wmho|8ygwf)vlh8|2=(Q
z02c!5G_X4VMT;Ud@}DOgr73#sSNRFH*bsQy2(ie$(1KQa2nK9TT$HFNcg@d!6Kqr>
zA;<_?K|RKR8W99Gw&4pD?e5Eq?w^(#(iRei6;A#<$lUY{cP6?Ma!x}mMb2MbL^xb<
zK}F*<!o@*VIE93wTM#PHQ+@J}qMP;P3BwUXM%5YiZ{M<oB&CwV7TDi~&w-Ui{4A8o
z5=5NS(oy|mCn278|5;fgaX?i6(j^s=@Itl*S$Xzv)AiNP%5aeo2&(6h2TEDjF`@1Y
zUJbz2L899vVYMdn`Nn?sqV|7IQf#ddJ4_%0Ov^@oq+I3FWCbDrwKk}J*neynf1AIi
zR>*Xg(dcl^Wq;N^HI;Ci+76AKt=4XH7o^hHODKyKlH9-o^*jkvisqJoyJP(+^n&^A
z`fXQfnFZ*g9(g0J5C<)C0s4k#a9}~kM&c<DgD{PNY&Nu{NHoB|5R87z_8eL-CuDOy
zG_<aTM(W{XkODF?Nv}Eaf>2bD@$Z`4O8!w5ypFjV67Echq(CJR$sqYGc^ljI(a1%g
zI)#g93`kM}`ZP$0a4(}2nH#gDyIE`M^37g{l@13iDhX&+-FWurEHpPaqv%Ce7{Lia
zr8XF{u)5#b(>hDKwu1+mNe%FJ8z2_62&lc^i@90ZD*U|Y!Cas|)U3Q?P_>T3srWc}
z+iz9!nD~ZxRaAH6YDEUun;TNJ&lg&6(iOy)=%B&EIZe!5qiff%*TK6Zf@$eg^fD_6
zV@*=<fP$F8Z~eJ+Q#j*=GN^BqGH5g$s00dLlQ&0<$7c6BoO;u3*F5XH^8QAq-3fRY
zQI_+ms%HFJ!=L3|+InGOZ*vLE=IY>R$;Y^JM+6YaC+4ae*0WXU5R}yu5)z<ooF0nW
za~o;<z~X=~fkp2ccic9C{br3~TPENx>8PO<pdxjkMXYe1v&!|ys4tkwa3BO#FVYiE
z6APPA(^S(S(la|nu2VR|O)Z^ub#!*ww?ywhOkrcJt_til+iUt?ZQI<zq55|z2Homr
ziwudHuO*q`tp%pZ%FX@x)!hNZ#rSVl__CC&to}U4@Z%SM6fzX^F+qAm3T7Nf?f?=A
z6h{&Q!CRB4K`d>7>?bwCh1_hr&C94_hgr96BP?XURsv-@3OpN1LdEN@kgBe*b%;tS
z@G)%e3TPU1TkLU)PMUF-rBUQzfh-P}GwOyTv~i2dEAErZJS+t2Cv$&Kp5y|6PIywt
z{YYluzKnaK3{ewegQ~%Nd<qI2kV7Gv0Tnk$F?`Z8g#1D%NHSu~&3lAPELDz0u-Bzz
z#!T)LZSr2(FjlN_^KfE6?oBf02}mtT!=)M(-&ojcXfEB7Hu_y(ZQg!glWmFRJXb=e
z+|<ltpWo~hg6}~ulK<YQ&J3WKqd;JpRCx`kOuYL`i-TcAK#ju~{5P`(s~Zj^1b{3B
zOscyb`9;MKaA2r<Y4RoQ29?0lKEHLC@Sa^zM8_s9jk5s`)NIu9xSc-;TH*`^pgxR0
zJbNml3XJeH;?UV!RTp?x=ZmgTm-PwSqJyuhP+t?V@9T`{h1)F-FW75tM$!KD=WShG
z?Wan*Q6u)GChS7{K!IWP;#B5kUWXS&MZ>?$-mR1ATT5TK_JU8>s9(BT|LBh!Dl3@^
ziqF{ZGYt1|9gGz-D%;yhNw`_~cE@dnM2~etn;hK(tf)wOC}zR3v9p7<W(HCMDbNs9
z07Qk1n@#;ldXqw3Lq!^GUvyQ)xKNqD%ZPbl&6JDEWPMjwD&`W?H^glZgC-K^Jv@sw
z(OR0fO9}Uky8PP72Mn#VfD?4E$1!jBeZN*BM<8V%_w)G@wdV`tm8j^DT#6yr`1KVW
zmwJB;{-2tWSVcO{<>_)GpaW40r{ViY(+|lMKCS2XR(%1>SJ&)){I4$h;P<0mLn~1S
zYjOy#flua7^Q(%KPc*gj${rrsVziW3)HO0$b}Lw38nYm>PjCwfC<zJC0pG$M1G(8-
zW1l}EzPq7NA_;&15Pto{%|pcGKrmp{*`Eb*8v4LF^IQ$-9brPNyWJy%u9f#`XDnBX
zrq+m_aLe}085NFiN=!%~l46Lo09WDogPRCJ&!_`#{uop;n^6@_CEcvdPP($p?tZj5
zxQ?(Xj6<IFx`g$*<fqc3LXP<2cI^y6Co)F`xW1pT>jv!%w}&X7ao)iQf)g-(@(W~3
zNE=blur0i>(WjdeC=-B<5F}~h5?^t~K#^Uwce=(2Pc6fR`9hAeLS6I0mr@p&hq3^_
zqJ1VoP5XH-(m`Ex>rk%~Mt__<`Q7XO;>}lo=j~f&&brKxsPMkO+6x4QWZkK(E)Emb
zgxvX0BgO+jrJkc&n_gX-8picURk%(w>&mJk)c`u}Ie#9M<*3b8NkPAi1FJs6r!N9X
zn$q==1PX`4jTRQ?rQ0ko_i7c}IZ0)%thg`#Jofut*lQ_A>k-N(w<WIPq2V8+7c&_f
z+~>Bp&Z3GXNnKEyK>dx?f|UxS%r|pCz9mA{LFHEP7%~4B^kRNFb9F`qJtQHRa99V9
zUL<1N$B!{C0Vp|oCRpl~Vk+c~#RA+dR9cnuq9Yr2RPzENgbN11@JHvTyIbG%{;t$m
z&L|uf2$pD_wb+O92Q@m1E?iyYU+L6uYSQ*bUFv>%_|e&zKGW>8B%U2Ub;>oX7^`d4
zV_VnhAhFUBE~<qowcq(>_Ykk*HuheaWGG;^v#s<X=8oolc>@3mTKky~M~sLx1oCMD
z3j^N38wAw_H7Dq((=uFRH}MU}%)clUHm5^}=eTQDMznWD&@<zW)(j7nE;toUgV?jW
zFf$tJ-(dyZw?u#JoeR?vKm{z@m1-Bt*#eUShC}x2FJ9?+utRkvK+-%b^e>J!04@Je
z=l?FRGoWxRGQF5LIR+{54H-u^(mvn3$@R>BE2K$euruOiewZ-`_-FgZw2jXb#IpML
zS8kH9&C>umADNGbIvQ1DFp5lbb3&xKA%qN<L(o414p}NhHtG*74fcU=KH&>nGDjh#
z$TlfJ!%xCLasHFx0=S|v=FIW@rl~gD?IkB4m}a7~&g(L=wdy*^@w0!@wxYVe*QX+}
zEkeZsFXqX~jFI2Bs?~f2M_E_SH6;k6S9yUFmppZJnyBUR?okTXj$IlGNKVf>!-4Hd
zE*~^mUc4Q!H3Bz8*`Rmm5UKF*br2R2hZ1^On0Zh=xc`}RgmB!?Qo!G$%q{49Bnb+{
z;Ybwq=Tt;n(;JVds?OG$C>>)B4IE$7*I3EZK#VDb4T5S)f3HUP8_Q$I_IpeCD2bd<
zBL^*_uOT}c27&Zp3vuNVl_Cj1xW$HR5C;gcq=41Lr}~IN!vXap{?H*4p!N|1J`)0|
z>0y>@Uis`SM^+)QrKqY}??W7sQvp`$Jcn?!q47k@n=Y#Bj&hO;1(73}C?c2kpgEoX
z+4nG1dVtXhOo;?al$FqJk}eUoIvO8jt3qGS4=NYA`Z!$aQA1$UkvVw4adEOe+a0eO
z^DSa%B{1n3P2sTv@NIy(L_tE2X(`NkCi>?UZTC@Tz|Z7+*L7SI?%dzR#qN+cOnFPi
zAGH8go#V<lV)JloUbv`;kP^Ht1}c(Ti^`uQJ1x&<{jIwx>jWD>=e=Saq6cqCQo|Ud
z4R&y3nZoh#L+1r3++jvF{`fS2KY4dIJ=NkwRNnDo8OGB4m64AhyEWP-2u$p>efg8V
zggU(HE5r?Ca37(zNX{IJ1K-K=(T8VVG5|OxnxmIEqTFx|zx#S|WQo=>iGAmNRf`m~
zV-R5f8xuxc7EsOt(wcdHV9y}zhCwipLr+NJs!-0y$&Jl^pn6oq-}Objhn@fChXvuk
zD9MRLi(pxCDnFSZI4VY$M_$C9%4r%L0hfO_TUoc=)**(Rn5S&=qT_3ZdZu%|SocTW
zD^6uPg}iTYD_8~=g*#vrAqqy^3e)5R%rtD@51$@UsS#>}WZ%Fd13RNJNkK%tnR9}1
zRY^VETDSh&p3JZMKE_70>x(51Os=`OU~xbg<V~IlskTXiB+>GrZ6gN~E`F%1!ZSqy
z9g>l3qFI+{fNld}hiP;GGC^WBS(zX`4KN?_DHvO4$?QUC(_)~ky@g7L6kL~AXR`#=
z)M}cQbSG=%BPA6J(QSg3TRZ!FUY2;8up<peCxAnu3yg#VkC<Vx9%~^+4#&Voo0*|k
zi<nsvL&_<{Kndwaa}4GRdHK}5qVtI1XJIv&Je{qTZa3MteWu@n-*JvjYb%0<fC<Aa
zez)^T1oyi5LqHC9Rg@uzg`^&ma7}pV$TuXIkDif{kH_uAs>nuDYe7jks16(;r6}Q|
zp?D)(kH9Vjl3?bRi&io~+(0Y=c(HTPS|iswn_-)Ec=5>xzRNQWQdn2yv7_1~dRo*o
zWH<$mSJZw3fQCqdGN9iGY;EGk#UDe9`r#?nCD(U=c$OBX8p!YrNW2M%LK3x#{TEzF
zEllbffRr)BRE7Ye+b1%HsvcLYJlf;E%g96cr6dXcCVF2I@<5OY)GOq;BDqjtx)xji
zE*srH`D870>)u>BqQ5Un4*wvUaS%X*VQS0#1dYKojlZ?Pl5Vv*;Ea!ftBy!8fEKu~
z^>z1V0FFx{)wT!BZN#w+81WjS1&90puF4aT5pEMG%LTDEfkfeZ<i|wDfyKh_<5y7l
zsJ|W76#{I>u!?3?0L%d$vlba?S-*>}tT+Le7(gByIi$!wcwt>UKqev(SQ!auzw_FE
zZMD5WSw9}PEPCOt*Z-sIyTiHO|M!Wal7vEJ6OoKkHf5xYq@{%H%262^*)yZ8G7^d+
zCBmtY8QJ5=?#K+G2-)j*KRe&=AHVB&{jRIe=Q^h&dB0xI@p#<#{kU%|Gt#C&01m0|
zTJQR@C-J93%Yth;8>XUQuQK4KMOS_*zZVy+?faG|q#7jM95Klxjzicw$ryVe#R~L3
zArKuwi**bzxr_|kow*-R3sf+aNygjo=k_XK^!q82CNL5#Mu0&~jJhq2rjffbU^^G|
z%mk&@;Bse1IbTKUen9Sh{wzkN<e$MYnPM1#ydC29qpH?))~)J)wE$`|uYWQo@)UeU
zO!=rs7g#ftN@ScC9YP-PiP5!<M<#PC40wA&&=y0C&gh0EhMOM9b``$!O}t<N%b_oV
z0UDE_7C=K90^1OnjH9X_xT;RsJ!s5vuiYq|+QjL(atqW=%iI3!D$ZjtSNr_=GYWfR
z|A<XU?36U_b37eqz3QVgEEUI2j!HP>O!;!HP~aFLA!2_HK!_xWmWc%0pdlrABFb_1
zoJu+u&FB#q5Lp9oi2Vd)YYL|~2RY!-Xue=c6bGk3QA*O*@m2{1OW_5u{P)X%+_r0<
z?o1X1f_>UR@h2|_?+V5KgXHf5OM7zt%hOwCkLzv?%x?3s@>muq?ml~uhKs|0ec>$6
zKCudl4dIcDlmV22I$8@Qe9yNvo`*K<+CM+&&r^nEu;KnY_&MIffS^IxZNNhSVzzSs
z*!PC!v@lK&v~Y0Gg;~IdB54%{1}?7c@V`f|cFh6fM1-q&-Jj7hp76jl+j~Www=wzV
zs>kWm<VM7>k|NiKuk`S0-@&KuWnqAW6yy8LCpY_eNtjjsV^;0B^eAF*teZRh_xaQF
zpJ%1e&_L^qI#wq=y>x#08#M3GAV$Q+jXG=tb>z@vVQDF=wj)9!VtIJ~z~IJy*OtSc
z^T(1q^Ej0*@cyE-;v;%nZSCii*&i0mX5<N02n%e2pyS085|j+$qS%055wRW0;DG-V
z`INyFkmSTb(up%5I92rDpSK|s`+V>^#Q13SU^tPmA2=Yq@6wt=g+>RezW~NdJ7~M1
zrmz9Y5xD#y0!O`pJ1^mM0vv52IfklB0@4rkHm<9v^baxyUtvYb@XsvluXzp}*e)5D
z5wfF<j1c5OjlK#~R;WZkawMku&d(X_dswS7$nop%#>i!jp0m32k;<l$abR6Xow7x@
zLYvukjcV;Oqe6L(DDJY$>~0a4`EAASx{1(kfM3x~2q>OnO%r!2&_)>4Aaf><G<gCn
z41lv9eSW&xku$G&lYpS0Y1_-Qf22VZ-J5aE5p_`Q17VyW*hK)vSruFlVs;RF=?yZu
zKqHyK2b8!5g9LaO1mzh62ZvTi5%|k}a$9}8pq4j<wL96(U{V03gb+ClIYthhiu~LK
zuTIXiW@~F*1a{wNlM0wnLurM-%fXZP&TD7gpP6(!Yu^3X+RZj!qZl0@fAVBJ)PpUN
z_mLQbR{u<*(|&u&2fSrLnV`mQAy6jF?mj4W;IsgSQ?=rJKob)QAPqf-7dI`Ls^0GL
ziwYZ>PgeF(;|$TK&HHpq0oR6|<P}UYlb{>m3vv)j_ho*VIEo!l-S7IBCx+25WQ60I
zl;RGVL%5oNg6jmh23#Sj!{mO4XDNR03&;w^6wl}g>K8l=Yp%QD;S4C6_f-wSn_eNn
z%~HHGM_xwMHN_L!O_TvHHmo|obHKpx3kX~ps(pyns)o7}>*i{}C9ENGVD=T*aIvwC
zKG?XQb_29e`2Hjmh^VrmneoNcD^3PNKYkGjDvW`WP!*FbYxAD#EI=gl*4LH^28>E@
zjA7V?69^te9A)THAi*b?-IwLN_2g*e#SEWi1k>e+BW=|y;gZUQff>u#{)C`Gtw$<t
zBCY^814m0`b@hOjRsQj74h{$53c<4Mee|g^5FRwz7~SzFp5=;)oJz}GP2q`&^cbTY
z`9_kvh#nn|)<>}dF-3JOSFU=YqJS&=7V1by`Ed*3N;D&(U|3G*v>^=5cAhfD??c?w
zQe^n5jaNb<Vl1BGyT|vNN1!IhAxa4|UBGJW>?g79nU<G4<_^*TiS`q>AJ$r#a4IR+
z*H!ReN2(nlk)Mn;%s|~i;IJMjUf{*wKxkxcv>RA-MPrsFSkNo|+dY*LOF+yLkzz1N
zOw+`1xIIffQN+x@6jL6UWpcY;FeCpE<--`QQ|ZnbYbw$n>;D$TD(&6ck}8m|p!22I
zEl_m0IAiZb=r7D7=E0CXol;UjU?W7f+^YtL?Cocp`H!A`!rfek%cBgBzJT>5wTc&x
zbkyzQw%td%Q7my6ZH2Is+y{^EJEDO8eI(iB_j&5LH0uM2BV;-q`{*CxjSog#uzz_E
zAC6g*|Le;^t$68sM!n=&Q#8gY7e8R$5u<5$I1{%=b9P9Q9PlLEgW#Zh2M<}9oA*Jx
zmFm-uC7qynY;BX*7J)U|!c4uTwcR0~;+Qh&c2OPdbK8SU4n?GS*d}6d?J_fb1<wY|
z9R@<M3qSg8{#``nMXCijwg5FJ1J@YnJ`@-vVFiC~9f6}AuP<p-K_f?ka>p*%yC2v?
zPlK+AaQs-S4iT?W%;TiwbosUed!YYmAm_BRw(9rQQC-gyS;k^9gVC34pC$!i^D4}D
zAFF}`HKA3->5lCzosd*U2oyPpmL@RziqnWsUS9qQeRvORNYE`ULz;rDP&)J1_U?f}
zRL%OW&zwWO2jFTCfl_j<BZzz4ve*JYyxar*=HG~JMFs4Cl*hvBxI@U1>tR<|F$}~5
z+7t7&cKMGRdEy(|hmQD`5wiR4gn%h)fC>Z^cuA<D5*h`JMh@Z3(M`G!4Dj=3Wgl)6
zq;Z_FjjS<unB<24NAy|=z$#4OgLNV8@5N)rf>GqtCQs9si#r1*9o>Dr*b4QdgTiB!
zY6>^7x!SXgC;!%nSmuvT>HaxCpzNC{$-eqYf0q^bKO8ULn+Qd}?5C4fPFzJPzDtVT
zy4iZl`+*mRHnYe;y~Te1fDh;`Abs<;FxV#-A){@6aH@DWSez=(I~jFuI1)gV5tl?f
z7<>{SUl?a8s9xY-_+%rE3hzjb#J55e0pN^Cl?<AT%<Q32wNRP<ipd|Mn3{2hLi4x$
ze>q+hnI$3<O96CeDk{&McWc{OJsVk?^!0<&hrbHCp~?mT$0sRC$_ij_#An>H;gK4<
ze$=vVrN*~gr81{N-EN?nfCK`Iq(NFx_U(czF|b9`3p!+EkHOacyS6I4hfPDF%Z#|o
zv^gOUr2Oi2QA#+TM+gtz5b^i9BMYU{QqhF#t`m70Kf2*pLjC}Z4`?BjNYLr{q@siC
zjD!wQIAl*!JZTK|@!9YwLbF(R#jskQcYMoeoXT%8D12~*owj;WV}2CR92}u_7+asK
zxGm(Am+#zlfA!>vm+ZZ&d+bD-{!cy&o+~zH_<aFP(viM3p$bZ!+c{Sj&1lB1a`qgI
z%)q-sDjrEn_|gGaq?Ec5;;c?`Q5U{WdIZ9<cQ?t@j*RSP_bPe{3lB5|8*dhcRYO&V
zI23Q<+~W*Q&=k}OZ_hXG+#J+%`h4$Z<$y4y;sAk@A8|Ff;m+e!rZo*1UAp6ZYzyzr
z@b4y0_nImk_P&|D&Ef6yU?<@qaX${|F|?05pxaU0tZq8=OTFcCdb;vpos+}i%;MAc
zKa^j77j&4RUzg+S_X7iwY<^`zUdwVLU?`71{cd`}ZeHeeZE9N6oC9Y^h9n$^LDM_P
z-A>S<fyT5gceKH9qDFXcOx+Gf#(@VG4AwNqZtxAk<kXjBza-*XMBFkj7Es^#(^rw(
zj>NoY6dBH}+PM4vp!FUT^awpYiq;n9r$1V~Io5Np%;a0-cP06s#)%wUYkdv*)ps-$
z2OE!n+h%S&JHL?Fs7+_cAM&q$>K<=a{^g_ZHapbp<7J^&m#Jq`b^0(AeM#OjHCHsG
zudh#RChEvZ*BSb74^I9!pY~Kt^)D$5JQ4XkvY%n{xg6C`<xl!Z1dmr5;bXJl7pdJd
zptWT~#HTy1iC<NQ&p3@or5y@2ci5p2O3eW?f|9CZ-|MvAKHacc(Y{F6-01flTe!VT
z3QJ^0$iz)|tL~y>-rXag7R+dP2lKA(;m$lPf-<;d`Rk<hnSc-RY%DB+q&bDn#Rdux
zlk~XHJlsnVn}`<CiGuir`qo74rJ?}s=;gjV*N>sy%1D8eq~a)PJixPq7Dv#z+pKH9
zejI$puVFIYZst7v$?ivIKI7<U|EfYEkAG*l1E4(6FhHK50_}PI^x22+mpn&B2IG(j
za`WcRu%wxd#yNZ&?xkskDDz1R-m!mO{QFYgj=OxTZZ79Cn?s9S8Yk#zd<FL1Q+^QO
z{o6*Nf5|BFR$!o0iSa`2rUj~7Ud~6MX@RH0Vl&Ikv-jiY&<Fx67%c__yMu+LWAN6R
zVaQ$@+Oh=S2o4r&l!>T=uhs}Kem%mP-DN{2tf1<~wAas7jat{maSo`PY*Vxa_?MV?
zvkK~TwlVuY?^V{lyN6r8siiV^qTOJ??MGeY8}Dd^C8;|fvbZ~1^mRLnU3H3N(mM52
z9#eSl9yGaSw?n2Xfo^R4(B0n4a5O=CXR9Rl&tp~dAOHE&Q`U*G2ghLsGB5OvpVi9O
zldg89+o5jC?}OL87>n_#Q(nZQhGc;vy66K3J2p1@uswKJ2*4)V$%0j}9PhlDcU&lH
zN?_27X(TUvMS@s(O-;=L2q^;T5UfkZ(lYhTmXpc7phm<EsLh{gpwyymlDFS9XcfS-
zS8i9N>z)f00#;&|3;Zyja&d2G6jjBwx0?(jxXox=^9@rjZSc=CEWPLV>`KpFTIuYY
z-7Suh4*z)XGSJ{PaC6Zah-5C7)vaKtqFfwVS}vo=U-MqD`KnTRVj{c2a_jNY7s^M?
zC7tZ2)mg*uu&1+gaL~c7e}3Z)$rGRj;QRnX+sfNF7)nB@r<RXBu;`HZt!egg{d$3R
za?K|DrXWad-Jrt(vRi>dfuIl=`~(XN(3}^{_8jMLQty$04lG}Y*Dk)Vj8W$Hm|H89
zii}kHjkgIT-92)@1T)X|mz@T>?S@X?90MjVl(T}|c_tYR9AEsVV(3jN<2@e?JO@ZF
zsaSV#aBzqxXp3&8-_$L$2<6`K2a6_;w?A|0LGc0uAu5<Ntar&xe$T6wTKAkRYI$`q
zQoV?UCBHh)lupcq&M$K4h<Be~_}2%Qb);oJSVeuc(WxG`$r%dI{rQY(L;MEnT}NM}
zR%~javt_M|?%rP)nPIA#GWxTpmus!)fRnn^=g@<Xc$Jduk6iftV7_O*Xma`YUbE@P
z1-)AD;_9BHYQcqXi}VMXcyT#~@Pd@cgqZWN{)yz5|5Zu_@Hp`qocAx1e<5UCdH?ae
z1>#yg>AAj4a5^;lQ66F3v;?6dp5mwuq=#?7M{RI$kVL$B6R+e4|1wfj0{-M9xa4EZ
z8soCS@Q|Iln*Yv`CM{^6t0=!y9`Br2-!2<Amr~rAINskN5}O_0=JvSu_$35qVclnE
zOY~SrxL0bK=$YgD+rE!KT%RZ$$$tf9Jy=^H*tfLuA9Q{6W6@(K{S&Ye@$1?*1vEdR
z6uk-kAuiWU8_SEORx>;3HX*E|-$6Dll-9DLIxrx>bV4nYdHdTZGKz}8bb#eXO)n4f
zWm{g_5F#@>&{E&F-gF-A3PJyH)w7ye77IVLX@CVD0!N9hG#O)BSU8f!PWP?qQ6Xg6
zNTYeIGxDrM!awGoy;#nG<*QR;4h6AG|L6;MB?}#8Rtvp%uOAO=KM_Z^Xdfm{al&Cv
zFTa<hODYeH$gVxI(pMCYw}=_Y;$!L+(PTuPo>tekbL|-Qxly?zrp$!F%*-q***Q*1
zCTc$PA-@@$jFe2^*8S{Z$CC)c^6<QochktA#jcs3E44J8iNpB7v#z(W{P`GYORWzI
zehuu(O%Zz<Uo%BAOwx1`YVQ`lQuMtn#==2gg*>ruTj48%j7cH{Mt`v|x%H|$x?s3*
z_k@_fv$)op(+5*_WJf)E8qv6CEb{Y|eawU*!d~DuW~rsrv=LfuFgB-{El&47cL?gB
zIka2*1Y2F4`W}XuEJ>R2>m-T}D8}P)?ZseMZE*Wnx^gVw`zc%$BeOT{BGt@i%H(G2
zU+`o<FxnfBf%$|<x4omi82kDKw?w(;lco)a2NBrP2?Z{cp^3lTfrBL>qeXv--Y52C
zJ7QPX72Ku7?rHVe)do`Jc6sfrdu6Ziw@YBbneEp(W9r|$g}qXSQPFNRavzbw0#!!7
zx0!VB*WTVRoL;~YP1|Q{fc)7K<r*sohbnYSm17A~ldBaK)bL*nOKjMbEHv=$U)yr%
z=Z+!FS8F^rR@g>y6XGjS-`u}{Uzl$hZ<ttmxUDbuf*(gBL6eKSOM4%pd+Z6DiZuD~
z_B&E-aLPNEY<bI{G*4-Yv%>N#zsjM*1}EqubNihoGDhK*`72%5eG4GPau~F;qZda|
z(BrUp3l(C2m9$uI1wnN9uR|UsN$aA)yrR+dvmZ?3?&RL`5>Ml_m*oGD6`cIbhzDy2
zxbs!3g-=Q$KkVBA-mC6#4{FRryw9|hjvg~HmV5XRQgq(u*+>O|JEF(r>9fw3b<lq=
zxJOyA;W6L-;QqyKuRc#OZ`Mo$%<;gNA3m3eFvJL@>oz(FN~x@>%9j$=r=-fLVv({=
zljDN_IYjmWAWMKY2oKy!1b4iPbkQJBagaP^{A;sG7bh%Dk%9oK_Y=f`Au&d-n_*d@
zA6>dZT)6HK)7|8|auaz}r1@N0Ue+@*QWVLL{K<sC5;y`^0nrZyHcfZ`$TIqDk~Kg;
zTlgD7Y$X4xFWWG@Ce+$d<{b=Ek@MHXESIDwr_8vze-DxBf%h2zVhX-L{;fyiR>^$9
zPW#FE<y(Z6gB}$NnQifh17b%>Fi~Y?C85rt?a2tcB1_sfd=|o75N%bQmP|+d^bSiJ
zyL?0VArmvnRMA|{b))tXp*W7y7vzqpFtAnGwD&hD=2J&huZNjXJc*z_&M15!PCbG0
z<8Gh5`qF3@z)NwGbZFp&@O8}%UI|uDVcoMwjhms=sd#1&#ypx9a#=}w5Y#gv1cAnR
z09=brrrN>)2@7R}A=<wFYy0Medixrz?qB&$?(`&L8Lvj__T_7xZb(oi=Q^|$@pB5m
zZ$4Qm&ala^{%(P_>%CF4@U4iQ!E+`_&)t|<y(;W~P0jqBPo8L9RA{mP8AS~SYD4>m
zL)aJ<Tx-8*K&FD&9!j7T-hB^Quhd{Ji2<PA*|Yw*1X4;$xDJ%8NTLLR9Kl}N_FWb|
zKHk!JR+aYQuMcjDHd`~}Oc%+BleRSfNI5X&yuj*Rr#@-k`_^f)CpMS=t5uMK(p1tb
zu_82FOwpymVZRlTn6<%MxxNUx?5nl(4AGY>IB39B+8YgQCRr>%Cd;zAtPh%kMaqRs
zeu^FfyjasHL$u<xyTai?3KNiATby``3w>0yKH}rIUg;fvZgu~u16;Yulnk|sGUVO0
zTSWCPK>bFPpr1&}G{NC8c_Op@pK1-Pb>cJ%0hC++R$~eZPIrX_Dt?3COEw74Er%nB
z0m*g1LdcDD>ROXV-qLvH2m1Hjb|-rtRz2QKUf_u0yv$06=PC=H&4}t=A%1g%N*pjS
z(kC{Eq6Rtq9mgK+a*XR*=uH!mVx!yC?bvY{F-xBS`a#DmaiD!sOHXN@1aQIp?;+FP
zr^!P23yC~VP*Cs<n$p6S6ji;7{qMVdkaa?+zMVUF%CyB#?=*<Ur66(epx*)a(UW;&
zFJGN5*7#wNbZuKCl^;+>-X|@W^FJECGr_anz;#bO?cf>m9K2~bOJOKN21f9r`G+Ti
zl1G^KFo@dOBXO1-$1h(=MSV>{q6|ci#25t(p2B@jg+~sCHFvoVNPUC<Kwo_CiXeuU
zVjassG?T<_GJlPXfl!QSXax_P4LiJJ?017|$BT|}a%-94)B`uH<zfh$lAIw#G$HWA
zZ0GuF%e@-~B~|M>tHn68R)&n%1VHn|-mZ`k>6QF<>DSno&TGC>`q#ERXNLROCyaQ%
zfu4<UW5A}5)^ohEo-~wY0m=%5B5aS`?Oy>SaHS+tO%gv(C^bCmdqG-ZhZ&!p_Bf$n
zJZkt}w2sINz*A`hL=e2nMQ7P+(fhn*RPtKANo}9a826nbBcl@(%AfCEi>6F&8Rgbw
zL`@8J!=pK8rdf(BO1aie=0lmSgQf;|P!2>7{k?6f|0XLoKu6f&_yz-m873ws`6$8N
z_YjWJC@U+;7-Zt$zzo7#La!S~sYi)tWyjF%4-%+XLOy*Q;pd<?v$m#nE%Im|(r$fY
zXZxdfG&<UZe!rbt{S9E;!8=8M5hjNWFJQ6z`c<xou-$I&b{R>?TO1}T6QX4+elJ+}
zE1X}rk}ctCY~jDi+#~=aEQLcC6V0b>dGmK+?{|Byuj1a)Z<kyc-mMw==M}>k2R=T^
z&@_L{(41YL2^w{%h;us!xej3m0k-b+lM*tTJG1T~IR6Z)@b%xi>wYAFo^ZqgKuQ(&
zo?Vsiy3nSzcdwOwk~aT`T$6`cZbKq}{-P7-s>(x$F{_=`@eH#&D4WBGZWuh1ny06y
zQP|;s#aC1CT|H+Gd-~4J&HX*GT}~wJ^SVEf8W9M_k0JC6a4rNjl7~EexQ8Wzlb;|j
zE|MR2oDXxt5!l-j@h1K|+r#s5ua<ttnHRUuHU4(ge$J47vRlikbfkRsK1)H{2^sy?
zpx>GnwoJ2z)q+0U{0`PyhbFXyu3|-eSv<->tzw>Jm~?jWM6#5F?C?dFYMOk>-Yaa8
zS~^MhZrxElEp*-br9wJGrLhomt!p)nq(QJvu|&)ghlLmUqd_>3kT3E_9h#;UhY;wf
zM@KgMlqLxO$n~mI@aNa`5v}{R=q7QLQ{<8a%jgT+_w1!Zw36qN1sG_5-HLA*7JjHu
z(#xe^a9r;}Y*Xz{+>1G)$0DC;3MgHOOE}IS`L~l7O2SnM`aQB0Bl0_PL_rii0lA5(
z2F*#95SuG{el^Mede_xnL>PRs5N@-m_PAsrw+Y781oxQTu$ZQzp+_v8`RQkQ+_Zom
zk3ut6``q?!ScC-|ei}TH$6>^<2d1%zz%C7C-)pr&Gx8K}06=9aP!IIyb<LDrOPK7+
z4Lm#@&A<6SFb>yN%fxwn11^sR9-Sn`1iNprTKR-jJ}6I<MN)gc{{1;Tf0%tJ`EKi7
z^%OU*d!CA^vP!AP+--H7Du*B4JW`+gdiVrA&vamOvmOx-!D8D!^7X0j#K%Nh8up}A
z5FI2%Q3zUefuTbyL97$N;iC>BBf3X8;F?|+d!>4Cs$wN0nmy|0`D>R0c!CEqT-k?2
z@EnN{HpUUicFU+AQl&b^hrE@6rNWU0=fH^%EAg^*j$)xt5$QXAcFOg<;3;N-$=nrJ
zS62jGuCXpY8a)7kBK|TsAU0LI>y3}#4<l&kX<<BV^j{4*LXisBGqh}zOPV%L4Nfn*
zxfz?di?(*P1v$j!3o=cgeMbrp@J`6-TI@OM7RpVFM~3Mw<&!5*UYykaWOOuANQFT)
z+GLv=rPg&?eD%s(x>@hW-HVdXePxs}iWW+-zfKZ%M~_)KW4&nH3{w)>deg2~)YeuY
z1yqy9Q)~}+a59n{m7|CJKC-hr2{E5=)ip>kK)N3wyzs+m+NuY46A=&;KuB&(<6w#Z
zca&MnAbPQzUvh&Kj~)Xxf)D-Z8QK@lHK0&LuHR1kul2tm&I<vZXy;W=A^o00ut5WR
zk%&TwPz)zcnq!;6fxD<YO<_2VGO$0G<&ss#+&=wUGNy(C*J$oF7ASbcZpujO63I9%
zw0dPob&?|k+@-&l1(qjH2@%y;yluhef8h^0f5P9@80^(IiW)w~pXKR~xmqH|*y6Xp
zm{iH6V!xX|L`H*tWLMqnE2YF_#u#NJMSHm!>^~L|?zr6``WDn`5Q`m2&;isW<`k4n
z8&l^`ywgC4gS{nkDrDlcU2CZBs%+f;HlvCzp`P;NoQP&5;}{p6W@IW|z3>?nPIy(f
z;8TDFh^iH)1td)mvC-lDn{oO;%@f?5%A@?hT7YDt42Iw>S!lX%yW`}p*%it1ypC}c
zdr0+*i;^--YtjDPoOzI@8cimo2~lrUtzlgRVGC4Ul6J<)%|azrKjOkI;$qYjwpjjc
z;m*SwQT=Sul>W83XVTVKx+as}SIyQacbvQQ-z9zHr3f13!hvd8Qmc{ZGR#)EH1-NP
zZu-*W#iy?{)nQD13PIJJnB&k~EV@>7e3kAc4-G9=*e=bq5tSUvyZWmf#s@sbwx4xO
z3Q4aNZ(AN*KJo1$?WwS<FBMt!@9Dc6W1WPlYx;@Sk&f=Ut7Q(2>b?%vFrzH=Kyl`I
zW}AL3?pln|W>h!{JK#UD*eFdfEG;xxwC}V3=4XhJBNGTdCCeX52DcRMp8Wz1pyAce
z)(4}gR~Oz^CRcm>PT}fD5-;k8;RxH7*Y4-;|7B{r0WrZs7%{XhS)HTn@m}%$%Cc|u
zVabaQTiKC8hotjDGYQr=_=Z^kK`b<fdHCVDB5Dw@x5ILj!O5lLv!;_hMWk>C*)>IU
z`xsFLc3~EJOrL@Jcw|M>sdrUYr#Ho(*mP!Hp?N$f{rWdXOXb{)uM6u%q?U4e)l&30
z?^X_{zkknyHSG%-Bs8jc8ct%Gk$wn8`#m@J5g8YtC|GyzMwA#E4C{#Vl9j->Ek9SP
znIrk((fa9k;|4|PA6B2Q$hFNxzO;SZxSVq+Dcwk4tm8aS#-vq}`!4c1F-1!hz(Xla
zrN8sh*Q``|uFw?EzP@n<e3fvCXMv+KJp+cld}8@j^}e0Z<DI^IO23@B#98fWcqSsx
z)l-Pu72!Dn8BAH%8}e#0;zxzkdoyx5$q*cLSL4sL4ymuWYJR$DCeBI-D!1Rw8b~uh
zF{NF11$`!b#EHQmsRm(G35C4&D(}_@hFRNuxHAo*QuIgGG&B}7p-QHb85tUIeI;@@
z00}^3I9bDml!FtJ_^pOE37&sLXDCf`txK`F8-Wg_%Erbg(np9e0YMPvUnP>C=(8az
zSHFH;1T|Gf?xycuaqO|jBqBH->6q}U)CctR7!^*}id;aNDgG2i_(~$}T;aL%=aDKl
z2$ob8jZ&p#Lp3G2uo>(rq$qO#UQ~S(dCs4kLSke1SYDF?O0m_|RkU*QcH+jcghmto
zXC8X7Mz`|8#}JLn??fItrZiiVq6vou=jAvtA8s;8MJ_OOoLRJgc1tE1y8|WI)XtY$
z2{jU=O;DSnyWM!xV3+yOtyVTxP>(*`zdU&Ie^A|i{_&g9TltSCE=VjY7*(>UMOaO|
z=|BNcw+U;?XySp)IegMfv5LEN#U?*2eVEZ~+&dhA>};fkVG7eXrkXF3k9GsBI-t0X
zcvf~>$~%J<pO9KYqy9Zz_<|zy;;DBwiXX3ajX{#`P*8YX=vBu!;#Jbz_egQ@OEU1|
zfgqx>C(kyu7OF;sW$~C{G{1slaApzM$PBN(1XI1E>Jh1moSxl4cSt{v{fzu-SX|r?
z64PYFzC+t}3u+S_p51nJ`z2-<g7Vc8C>ht~+LyVF=q){I6cZ<p^ZS6YBP|qeszP0#
zi_)L3y~UZ0_2{y9ey3#6Zx-&uvZ1oCz9Y5rm$N+1U4-bSQslbO-^nznDLiVG*>nPP
z`UiL8r<Yn%`2Ao9K4%5T6EVtz52>D=a<$?Q_MzHR7o>0f=6jhNU)!NqwgGj?9Qr#<
z<8ow~yrIz5bl0Ty;l7*i%l3QASIUnZBSGc~T2C`|8;eJP%g_dv1JLw4_sSpjCXQSv
z`6k&AIuAkis9PQHwQ?7?&7#o><>qd)U##r6a;Kh&ZW_T{YJgC1H)V1oMHgng(Zl1_
z1Km;938yQYa7wn4jb94krbTOsj+qRhudOaO18Ra-K88p}o)_wxbH1BonRZDR+H&;i
zsd}T>tDS4LYYD>Nh55+ODswbpEs^%z29e$?J638!H+-XrWA9XGdao6KrX!=^o3iYn
zRi$s-J{o(0P$oQ%D7~b!{=uSkWLOQ2t3tO!i#8eZ;AgB^Fi5`bCGHYz@sD5m6(Wix
za6T&?PU=(48!x_=zCp!1EG2qebN}*7&nh3;0Sn&Q?w3;A@f1|`LgP+4azy|phiEPZ
zlpfAcL7L+Mr(*ceMg7%vk%5{3n`D<^o8ub96=mUY`N|swo?eF!XH1q8fJ|UiKZx9j
z5YyP(NjC_x+V1<#3^iFbqYa<#12OG--GP+K0pVwk;Tb{PhZr8qwWd!m9+fpXsnu+?
zuCw!YgH(`%#q8(JcqTu@;}zt%Og4Q-pe&@+W@sbrhyS@^WnCEY*!}zwj^Ux~*p&-H
zMFYcpHk_T0DYNP6=W=(y?brEA<tL-v%EmjEe`xnT`SHMN(Q7VewBP-yf*6CinYU0<
zG|cV{kbRYLYTjXv!yUpph&^L-$2I5bb*dcPAO1N$A_RZ}0_?-?fNm1ktDOSBDgBkB
zjc3@n)92KyX(At%ZrD;6p@00%->a&%j`CXBraRQelG?H~e=j>6Z)3%N&9X|csjrFE
zJ}4pLefCB0M%=Pbc{CZ(usC_WXxe?~ht;f0_z$fUvxk1%Xi0F<zvr94UN7{z*?OVt
z;nmiFI91eK^6?2zm0c;RRV}Yvk8kjn<#T;W7p^Lh(>d}&O1afO>39#HWtdPfWSE<i
zMr{-?*(7})b<p&!y}P)*PW^-fr{w8HcD`nrnBz3TO8SFTyHb}pW_t%tE@aP0wzzg^
zy`%(GMB*5*LWqZVmrdgD-FE*qEky4~aBN~OhEQWDE`dw6ew5l=w-_R8C%VIbnBuA6
z_b!15KILc&d>hyk_?A(GRZ$@z3;%#&f3y`S;gXtY@ULK8`=%9l1gRndh&)lrILjX<
z#~m94ugS)3`B7j67k`VR*bF%|bre_BBX1rFRpu^bm;Lb7mT3I71SyooSs{3$W>)LB
zig9gXQQHBk0QEZ{PgLA6?;VzBR@(uT5z&$OJxoX9w>SBETV2!uIP@nJ>kmrANwC$|
z=Z;5GXLW4FFEu@kzY0Fw|0pChdV!NW;yC}X)>$h$@$&qFe(4B>PL3B+w+3?;)9k!n
zjOgr-f3couJt6GHm-`}RqV3J{<A&we6EO$%WnzSE2BZWLY~xiYFx_o?m;%A+cPFLr
z!zd=*_{>J*_M_S)tJ(S>Xl&d%QSEPWsx*yIQQ(usYNfIUYH%|sAKZ<&5MLCBM>y1>
zuELS~v0ohD3t28s``#4&B_^f(xs7MlLkYSlnv`Z*j0Fb-1ZZaQY>VKg<vod-mjHDp
z;`OpaZ9{e>76&*#9s!+)ORa;zn=P9K@A9AiEi~X^d-s3{j7sD=w;Q?7o`u*Gb8!T)
zMW2Sw322i#?6dH{KhrBHpb31GH|>t1w{Y&u^$SSvB4jVx&bioulS*3tvdh9>txQsz
zv}Fg$d1gr?HISol3HLc{;7GbJ&@U291dkgCN2rKctGb$Y4j#ymh2xw;MNX8xa0eSP
zNR1J@{E>*yvHB3g{1m1!kVSulheKX-<;YAw6p=)D1@sG(Qe+zI(Dn9G#Qri+BjIxJ
zd5jUU-ok(VGjsu<n8ef@8*}xz&tFD)#>+~V3^z^0JX@I@kMu)db8Aui>kLYd9*2aQ
zQ)7~v$wIul@7bjsVLO={c5zPoEp>E(L&(MGpaX%Qdf3;UcyqPRemvYFpg%Ywy+gFc
zs`z`L>X>+%kUeX4PAEW$azqiMSb$_o1%}|ZAAU+h(nh4eL{WxRRUDe=o3bJA#D`v6
zL(EPgRMV`SoWpV3VRm~6ML&0)B){fIQ0mz?dKF=QOF=<~v>*~DS)=&zIOqofXA>Jg
zGP(kTB)mWPFp%M#BM#pgfB^WR3x*gi1tz?>h&BJBiZc-$;!@MY2*>bhG-I)#J{3P+
zQ)<BTK1&}mD^FC-Fo<x0>6Gz{ozK(PL=J9oU-7^Dg*91d3#evN8WQaXPKc^xwGi@b
z<!9=p@MEiE9v-J8dcuuKO;y`u;5PdSBQY4X9E&||FuHa)D%)zHXf!meT@a_=?NgH_
zqEdq}AR@CG|8#W)VJwSz=T070x&dOxe8Y435<pREA{gwQtjhA36>)HK0&UO3K>J5}
z&7>>uO?2ARQAvKuLVwcN5pV%t1w0iq6!%1+gb}Wr|6^D{FtOWEh$4+xGm?Qmka(5I
z&v)B9!%?01^S}oPCj<E?2&Y~1B6B$~WFhz~IPz_|T^#oNr4#~F<&@esopOA3J-L+f
zoTXpm+aFO{9zjVR3!kr#*<G_yY;#v??NXl-$hlh|wSV=xn;Xa1uV3*^(Bs7(+~l}1
z&~1E5!#r@ov9qHikmTtQZ3WIhi1D{!nhl<VgXc?@cH-<%oC?x?lAgXfOks|dxM{4(
zVDhqxT&n0U$Sr^2?X{g`DgSx;C^b;Zq3b4JT8P{cBI|@6R2GY-&vR|W6A!qiqp!1*
z)yQRNC;20Yn?q0n@p(cNb|S)8=|Igv_DvAa#soPtpPAu-4D+P;iK4SHd$p+j=}>K$
zZyu(oQGF(_|5#{uD0ikj&{U!UC|O@F(LiVsE;F<hL>5PicgPm<a6(LhROl}l&-8_s
ztZAeiS~FtT0sU$Hc<3HUMwE2l=q(E`e7L=TqHspXXnGdFUlnF_LWt-b;GqJunhJ^x
zGBhxNfE!>TTTy&YP1hY5d}X}4FEf95H+C-y4bb}om{6$#Z+#QQFhO)l&UEqb=Lb}n
z)kuUG;-LR56D%~sXmpoez<bBSdWFgpbt$nDAr{a5A;v4bn0}<jl|>Lp60L?OlIxmv
zL71bxgg;`3VS>9d^Cp|(SbM(R_jLC4?@Dr4uKbnHcS-WbjT;9NJ=a_Shd+VCGsqF(
zEgbYSM9oTZ*JUC|gasrwF0>nNgY1BIfWM&${o_vu@x>LO-$KH|$a6Z1z2vdfk~Hw;
zdX^emAS3u35}pVy@G@Dnr!iMgRLR7l2(FAsrr!<(=Z5?EC30^I1rv-N)cqY2*-I+i
zW@|s8V2BzPrejJ%hw5UE0g4CC46FQ)|NO(ShM5pHfIbXrJAhp|qX4jXA7=!do%(lJ
z1R)4`Pty8?XVWpXKL}&y^~swKw_tnw_AMER2cjkvj8<?Xg|j>-u2j(rZ^%82u8JrH
zKpYe6l5Y3;uP7;=-6Z9^y*e&8%-W)x1V0aE5eSOjxpM*N6CW$OKZ+T_yTB76zdn2=
ze+9nwKm&=b2gyLDZi16d0D%KHC#<Tfo+>}1H@_y?3uN69lLA~>n{MU{E?~~g*%DW2
z>G$tKID(LrUA=r0uL1t<O)n09v$;aQ{cVKd*Cg(wJ=~Z*$8C_C8^!(`M<aF;4s6j~
zC+};-?z!-HPQl7qd7IY)cxuDut2^S3Me^HfS>muD$F=-Sxrum%xud@L;KO^Ob^i?$
z4_cK%g^tfhK3P!EW)#a)KX|ae(Z+#Uc><D*WL#mhpmqig1PFBM^$qu}IH6->V@a5l
zvzM1>9RYeCy5V#r>lw3^Q(k0T(1TuEOUnc}6Pk7rNYob${skR8HGM12%fd@M(2Vzg
zYSz}PS#b-Zug}GQzMif`t5UDjx=qi*V7N&p<Y30DSLK-Q%7zadQ3c?2Vfhn+Q1VbL
zU<7W<t#W3ql|{Ib@w~`J>aIQz%!~R0@$eF|vbD=D7hbo=&ZF5TfIKKYqyWlaeCV!=
zss-D;Wwe6GYwxsK83{4_07dP6s<_qOMeJqW<xlL<)A^#2fBGY0k%imY`OSH#zilyK
zDpE<wh~Q&c*)6aJQ|1=u3*2cj$D$a?59^Ch^`1vKDj{kAFNqVSILgQA_H<2iuD3dS
zgLzS*@YG56c&@vXe24jzEt`DNQnGS!k;aY?^?+bfkor!n9h8~X$Y2&I=*|Mi>!FF@
zo-mp!E-k&Oq*qDvfVo9vms9rVnz51*J|Awa@#BK8SPATk{oz`q$oKIzzoxwZ?S1Hz
zFaQ|}D4O%CY4!zTXM!G&v6z;(DXvDU*6&|JHj_y`T$;qe7AMd<?povKVpWFx5#XXn
zKx>9k{|)b+E}IK~TkB;8sxx!fmwt4;{leG2Rec7NSI44<G<7J3yR8IL%ncG9qHA*K
zP*1;Vt8I!{=y*fHyK6JMQl(+kcC+J9!+>wi*Nd4gs&+Eq)5nnrpoj?W0bUncos{k3
zIA2>-yR*wRGnPtq+x7Z-j$-PFEhv%CUv}IHs~OygX09BFCg9_s$BO2!zkSMTFx}Hq
zDA?yX|AwfV3aV>N3OMZo<d_HUTo~f3r)2E))YvAYOv-fCeOF!5lWkupKKv)6*%}3a
zKPF9(jm^UtgnEp0fbyRrV%b}Vwlb=9vs`5M$f*u<+vzjBb?UkJKnLA&!qOp8J`!Yr
z;Z|ay^v9UOeYUModf&+sD|P;JroL)y>Eq{`Ebm+8oNMja;AY4o=V3F|!Bb~fe`p7_
zggxFc5vZb`N9_e0oHOUnf!mDrTjB^-O6zRO5lsR5hmkc6X#Ftxh_%BV%nRz(+#mpD
zHUx|~2i4WpvA*-8a1Jox*2dZpfxd?V>7z6_?to#Su_7-GORf3yXGSLNrbcRmXv8^T
zDp>-IZ&{fNuC-scmfVUp2NUV%6n?DeC83;bcv+$S^s!mJ_w$Z%v)x`zG_BV!?k|5~
zaYtHL^ysCu7Cogfc|N1e$0Cd}%HSR#a};8J)G{f!#djh1bWxW0h$k)saMbw!reO-P
zaTTPof@dJ{$e3KlWta_J0!2(mFAlmj#7A5K4^HR~+?rT=eKU)gXvaZ|<^eQ6<WMeG
zMQ05fDzC!-5XTcKIY7_T{JPbiwB&kPX0Y?n3#rSUO;fVzOSvs-k<Wx@8y4+l$vrI-
zcaS-^@_x%6Iv}VBZ9`Tnsx6S0HqyFxj+k$d3^m?$24bGmWnRM7{uE5Lj52nu>w6^w
zMeFeN<n01w_+!1Q*92*ghCU5c+w%w$`|sbI!AA#|#W!_6u+(-)wfMLZv4I#|NddHq
zV;)5c63|sp+CXv-9}vNZjGAvJUdb*+9Qvfa@%CR@FBC!HQ#^r39-2VNObCpG>0)#h
zc%e{~Zw>jxXkP+wR0hyHJj|>zA8Dn!SakABQptICc0E(%>zwTBCF2u^d)=6Hl~xa~
z{b*XF133%^7E2~DStt+S3A{z=l9~cRVgT1I1EU6Rk;M1G5{a0-K7}V186!n>2&Q(n
zKw5Lf*_jnLEU_t_<YH8!k5)L19|qI|C<(N&Fuo!!5uu#W2yo4uu?oVexadZFvxpFj
z^}=7bp<A4outg4LtQX(gX19GQJLD$2N@gxs0!Zto<kkFK7~~KZ<8Ck)F3bB!rWnKz
zKcBJuRGg-bEo=n0vI**2rj!KBDo2!Y__g->pQ3q`?kG#K(u<y$T;NbHfZwM_>xkP5
zdwT$KNoP2e-9#Y-m#___IfH$V29dtO=aP1lFTs(C#}^s0$8Z*=Au5Q;Im&R-v;pPB
z4EZMvD88}jlEeqhw5QB=eb#03=Yk`3AexoJ`EQ8|cbqrZ!l2==$|TV`R#sN8szDXh
zb>KH#@7w_(0DY4eMCAP#na8a(_*{6SN~~Ub-l&o8X;GZS-pQFPGCT1$(a*-vRYy}E
zOYo?cU*3H$)r4*>==fnqZ(*GpW)(|OA?nD7&rE8?B3xokYN+)D={9Yk48=SN>vGri
z&dCeUuH%|F6l1}vLIa5t5I0CcLBX%6q!nsXGvB|SnZM~k(Qihe<NMv_KlJ)@(=Nn#
zF0zomq!+qJK^e)@_bks^g*{{1S^MBD1-ZQW!sP$-QN+&&pXSeGC5Z)0Q(C=B3Wfz#
z%St4YyBvKiLy&6f9%DC$W;cme235fbexMYTm^w^kGCg5#pbli~&UF+xbwfwtPAwtd
z4pl~@+++m?Y44*80G?<D1rB-U=ZmLzQjnMp+rlYzQr?qggyP^PdP0D`D{aQBJ1I)x
zfRFTyKT=<~nx&C45j7el8^I+DAtikVJc#NBd$rP=am5w&JZbD%jNNwNqC{Ooz9Y^M
zv?fX#X_Ce0Gy(XpESIc9snJOUpo1vSOG#ITdEapqg`d!g>XSjUt;P!*PMkQASD!X<
zDY8EQX``Jv)ao`XU}Ne&j8dQYCS^n4+L64rM`}qkvSPl@nN4`DXv*O9)bxqsMtdHn
z8`kI66JK{|l{q9YxU`NsNa)Q}Jlra`y~~J^ZquU7r$#+g>i^n)*tWiuwH{2YPOx~?
zKx8^O9VSAgw->Rd-q;PDKm$3bv_7Lm6d#}-2*W{iET|3{?x|<P^punZWLzQp_Rovp
z^oX?~ng+7{DGev|A))P?U*0<!K@zhtWbrFEX_fdUqtGBK96%^OWqp4?+<-&;-=A=@
zEA&Pizf%blT07_ir^pvW{+I{*fM#^N#;arJ|J37HWC2M<_RUN|9NZ|74S3HVg(Z=H
zv-LOivHaU!c+O}Sh>I3M2HxI;xz;Vx(iRmdj4sD=KM4@twb&<Qa{uv}<bPXtXz1Tl
z*f~PEY{c!<ARZ^dt=C{98QQQ7C0hT@*q8=0y#TO%WzdN%ESVO$PZW%cVYKwuAv7h#
zC>5%wT(aTbnr{3PMo0cTh;?4bQZQo);#LKf8gMAZ`TVA*Xm7wRiTn~hA#t`U;mfVa
z#@$z%H8Nsq;8Kvc;SyRD%=@xfSXiKL!k}*jW}(XYl45zpx)#X3fD>tzs(pJ~+ZA{s
zgM$$ci{KuX16!V|c+kR=q2^Q_%^&4LpH$K!9ncNn@(TvIv@=WATp^1i%rXf9hd3Gb
zocB>O^D@wKGY0jwU%H_NDh?J~zCeq~fIBP~Aaxs(9Kgp$9CHB5S||g-N_9IA-XnS4
zSScWTp;?4zh}4Nu3c|D-NH>a_`zen398ogkhyIDE0s;6F(i*cZmY)l()LSrfNAkB@
zlGYAFA_%|+;zZ(OTsG=J2tfQe{$LEsO!|gyv|7E8z3Kf_ObJ!4bEV2l0r;uB+L;y2
zG)$${Xlh@XslET--0N0D8`=c^X??rU7SME@pRuJbph!AtB=6LfnTIc^56ShO9>TH6
z%FaG$pVcx7azxL-U~}IfUDkrf=8OaPzih7k*6sGsji4VplK#5YcJ@5^_!0%WxI)oM
z{mF$UGkgF7(5(|mBq%{7K9x?+r+@*S@AXt7H8h?NDku^vK<4h5)x44K4RcsDa)FR|
zn3<SR;G_Uuz8B5*rj+Jt$254N;lMY_ymv2~*qp6R#}J=&ypvM6;1IVgXpCsnT7pm{
z6+(&>C7I+^m!kr|4LF5YowS484UlEG>o|#K#git-2awI?*4Bzyop)BL%Whil10(rQ
zGEhgTNj_(&=t$7sbaK99)S)oqs<f-_f3PJatrMcLnQrJw3k`C?StSdhWCN#E37`t3
z&WJ;OIyU+(446Rf3hoMUllHRfF2~rrXQb?~uAxz)q<|DvMxs^c?1m7(`FtM5YueEk
z_|Xhej#7A;h=U$SdK=8hLb1Ok1kI=^{wIvg`d~0X>XDZ~{0?{Dq)O@_1G=ch424Uu
zjh+adIO_j)b06*hY5^o3x`%H5)uNUqczSK1;|s*=3nK=iyA1FAd@Hf#C=OqzUD_-z
zV>62+?O`nemmO|bY@^3>k7x%@_#6tc+{A0=z*J+RZal{&2Wfz{*4I0?Z<7_9o*u-$
zuv5*uYOsIV4r|-q-oF02xxxKK(X0*vb9LV=--fLtf5g9tD9>!)6F#L|A`|~&3T-xa
z#7Is~vJh#$g_u9WHq>kb#lU9j4a83yZ!5q5C2Aei4~1^+$n;o{$)x8unU)KAd^Toe
zOJKQ#LY{-{<;HD;>DA6N%{QBP|K^LJ-Cl9p%In1tyJM1uv{S|o*{w!bl2X>>F0n-N
z3p{v#A>gRnFl_{9O>Ii*c<1EAL?tSOg~T(kHbyDm?XmoG<xq`eLFamx6lMk-62w*e
zlY8d79$hGMJ*2Al*rCVoC=al6g6X(5ym9yWxznSB3!pz{$NRH(s7Y8J*z6Bbm=b~E
zeR2Fh4E0KE@nYofo>ia+=np$ucH2Bkaz38v&9T@Dya^x<#55m^G@~uBP=ta%Dkc5&
z{o_fx*p&fiYbdGeV_Pk_Tdow$AmNLg+NK57-yD=g+J-Ack0!Cb9J%Oh8SK?;9lB#B
zicR;zUuS?h2B{J=hlwBPwV@9|t79)ad<H4t5bA9ghd6U-i5zTu-4ZSaC5>_Gyfw(}
z82<!IM}+Y?x0*@oX%q_7Uf}V!u|3tHdp=6&S}5D2fqB1lkE~s8Ozs+zpGPS_CN8Vq
zmzqs1r865V$QzU7NxvVzD%SO=_1aq~T88I+Wvpw0G|U;#(a7uZf451f>~fHe@{K>f
z(ec|pibU@So1Ybvqf+kozVI-5O26q9kX>hV&s?h4XWmF=v^6v|j^-Oc0rd8ZCR<&4
zVhXB+!X1x`0@*CO45ATega#e6!Qv7Ulz65%Cv+E=77KJM<g&bK)aw^AF3ib`^YQtz
z33c@vRMKu;2<{k%W)U>b_5w*(x~nmd6S8I^>1)ida=WjprL};0w5OpRzmu|7sG7u<
zr0}d|^mIGrR)3pL)?uot@tZGC-wkSf>&z&#!1Poxtkt4cpG{-Mpw442Eiu~x-2U!a
zZ+w0JD^A^HE2)HFs-R3hHjJYX0uq=U^A-qW>X&jmucWn%Myfx0vGJ!N4pXx+%Dtzl
zH}=~x)sTA|iU4q>&;+eKiXG%nqIQlRNZ+-o6?%E2=_>_v45>$+FBMJcv32qHl$LRO
zJ58L5(W_(m9CyP}I#`x!Tb>OqNMLFTw572P!?Cpas@M0oO=-`DqPBpbK=KEpJstw_
z#h^i{4_FdJfQQ(tI|9$P${91ZvF^QV_|(=%(b#0@d{kLux<k<RaERGEt#T$UlA0v1
z=V)SggGhi=qZee2nAiK^nzPg9;kMWmKE?0+#nk7Ldl+qW2KTtsD`zHNx}DW(-*n=2
zf};CwSksXc7W!JK<{y8)c*=_YRX*bo=j}Pc+P{xGC14h)$CF2IZj*h%%R0*JNh|x;
z_mg8RWUh=pIaA$cD7f`o9%Dzl)~U()8UNAtUs_EkW)tJKi0VK1vl(0{n1UtyDKz;_
zB=CY!VG>gkc%rFfb=Ygrp3H9AbdB=rO9zv_9EsK;JAgZ7=8~USkIrlchymZvgS(@)
zxZWt5-34e6lGVN`=Txo!GaV_3TQnnKAAu%OlGo&A{Nc(J&Meo7$jdaF3*!PU)DUa^
z?CPg{FSSAJ+0LJHSB;e(&NK~9^&tpqBLyxeR5}n~#UAe4O1;6bOv@=$G3h|6gYHW2
zOhfvKdL=Mx$f)yz=@s4rexYps@#XDSKwIQdA-6taZUi7z{P(dj901#h2|HZmWLQ@%
z26ooOJs5p5VzXwJeSz#`6Y<zHncQOKKHj)g(A_~jeuJ}df?ewV%X>=o`Qf&|*KJT0
z0UZ{Mx_a?gTCxy&D**B>mhLROrz9nQWmBmp^w*m6rQVRR3jd-9DZEQiuWIA|a{|f<
zJay)5CA~?J#iiy?h+h%}t}u3iuzzUKJV_$KDEhno*4eIeF;VwQ0vxh5Bl(SFAIhY+
zzWMRq8QUrMN#dQiEjI-TLXXD5AMvRG5p1L{eYyX6w6;}n09Yi73!wER(PWbEf7ESg
z$*n5dBk0FQ1dWG?@e+Pzb+DU_b2Eb6Mo$2=^x{a3gNpk9Y%$R>zJ!jwOj@ZkH?P0F
z2+;FaWr*k{6+LD|K4#F;h*I%JNcpzaT4agr+c%K@AiwtJ-Mw8!31W8Lw(n;c&+4oF
zl=vj{ubw*)Dl*>zX%JLUdmm11q255eK2-JmfHGp<!SwXp;KL_J&jn9!o>jm(hg+)@
z!ZzsL$^3!Qa@^}X=-Pq7x(zTT^;yVw&M;(o@r%T5GCW6(s<MB|nVidWKMa1)&OU{L
z8G<wb&+;OoZR3{X4rt-A`B7xX{eHBN@yhM&gVX-P{apt`1<J=~E9-aa&rKJ(yjwD~
zoR=(Cy;Oj0jh?J(MTdbJwMgjw`;A9l)gkWz&RZgES{XMO<f=YjHbXIv(Vd*L!!o#1
z%85A=u;j!VjTr?ts1CHWN(ZM*P<cT&<^vJkgN>z)=LGE~d|ltoyI6idTkgQtz7TIw
z3$Ap6I*NZYAjwx|2_mgUe692hjsZ0BB`8vS%fyAL_!U$3_Ruz(Gqz8Xdw$4nN~-$W
zTb<qdz*1tir8Enn0tC|F>~)Txu?)vBTC@esA1x687J%O#kw2(7F`B_Px4R@36VjBz
zP1~)<LbnOe)<suFGcw00+DWit{F(B!-C6yz#`1+jPuK1gW;P|pE&Sk`?asB*-tQ#t
z@=!s)zW-x!%aC0^B*Q*Hh(DS3duvWf$}DF%gTvR6yQf~>A^bsis<(~+Y=ja3GXt!<
z*oXjH@NygoV7Em8)$#G-#$9T9kPYmWNtz}Wt~ge1?b%peqTJ->{qQaaD{JXk8*#$a
zcjdZMoEjN6bBPZ}eyql`7Ao_C@=ITT#gtK>ne)K%RmtnF50+uMz%-;)nk?6vRJGP1
zc?K&vj9R}$C)xtUnF$PqQxDRM%%_NXKm6;0c8__HSgOdePjnulUtM1M9%++Q(mJj^
z^fr?J)^nk*L*QqWnLqg{XvI>-Sq$M2!Wzi?I-x1R#i*vVW3(*sQtI&yF(WqthdLwI
zifHfPUJ1tk@$P)Q4gA$e8SzE{j47en5wVH))4P><$d2z{NT2T=skxT@GT{j-LA0dc
zM+jwxf^`ZE(Nln%(((I;#_6dykhB;gyo0;HNhIY(_dR0@f<1YsgaG71&I2Y4*b@<8
zrVY9P<}q#Djhlz~`btX=I~cYQb1ld>6eloafu+3H;mPwY_D?G0LS|hR4$C%BC+Nf+
zD(AKL66e!;xkbp#!Qp}2zgd_&E<ALCA(!ZDiH?r|3UGY_L^$GqOuE-Z{U^9>L(kP&
z;$7f@0l5@drVl?hw|n~KuHt!tHy5ORz8rrkR`%e<t>3_531@^K67{()>vrl5ZtES_
z-p&lTSl!`@y3?kh)cIIqgeIyb4(q$zclG}2rOTT%5?0Ni5}S3+204vNkn=~Sq;K4P
zJ0NW!Dp*H4QU8;Y-TL5`kWZ4rTb<CrV3w0~WsSSuQu#exHeLPeLfFCfT%XHSA~jnL
zjXHqkbv=&$`^w<`?eeifRQy1)M`q9{@yoMB$tbrfM084JdQ5!y@BwR0@AT=9b$`=C
za{%2Rnis=fsPjJJApK7*L}mo>Hy0T-IhGL0S1>AwJ;FeofBAjy<cPEnIwE}3o{$z3
z@n2&bZ9Ku<lXi9WNJwxn+SN)Ws)Zxpu?5o{m86Io<otOSGVKO$VQk*Vp=SF4aoobI
z9Qe??Dh5JKQ?sV<7PA#2;M94dJcHUf8{AnB`)%3OeNI)}e>98H5aHpRLrEWhu1^-4
zbHK<hT-*BWzKvkOj;=4~*S9HWOd>8=<$tv$*C5VewBti|sLF8k+4%yvliDZ36qYRH
zJeHmYI)-RNQS%*RpmuO{d<qC1NS3?1yCGX7|7Pj<d!AXMb;KkJ_!{b2z$p1|6crU4
zw`l+b>d<N0ghP?Ihyem5A7^#8L$|;dRQ>yZ?*WPQOX|lE4EPxcU6Ps(Kd16j)7ReX
zr@>2zX5r`g<KyGyXc=Jp4dCXBU!_LsGXbB?W644|^say{go+VDVEKWQs}~*PHu1V+
zs}U3vzE*@5hIJ|hx%U5na(`TZ;25>HIf>4(i)ft&5Lbxgnlh3DfCHY)3F1#cJ4mGZ
zI4<)}H!v%-_eH&qLJJ5~uICq$(D<0QkQ`++zR(L+A~w6SW58jPndM#3yJf_phm1mP
zHfGu4Pn=8FauM`EB4jg-)%1RmzL_xyF*dv-^x<TTan+;P(-`;l?GFl*d$^HnTMka^
zur1p?^}ESO5M(oct2`&xt8SzRzb((%6q;ZQ@lHxg3YqJkImAvMh(}ks%~m}{Vn3e^
z&U&D|lwL6=Ji}b*b;<BLW*T)(ni3VFB&mYJn)bpwiVR>v9I+ipjmswfy1#QShyt5N
zZ(exj1W?IFf2~@I9MQqWk)#oo6Iw>bP#d5Ejja6u_7}R3p=EE^604<PowWGF^x?RW
zAP7mI%n{{drTaKOdo+?iF@kH0Kj{>5yXt$H0Edje1kHqQ4CEG-L@a%2=nPm6EFmn9
z^&j)g0MLk;JP`UQ_Lp%{M=u+_0$UqagQdt<v|HLpFoiN^kK9;lb;dQXFH@-#cdC1;
z9xE==f9Rc}ycC>V@jW%2ftFfaUcNs5i5$oM*vL13=CIhoxJv9~z<Tf$j9qI?Tn5yJ
zQ<4V?64<H%3iA~T#x^<Pv2w(-9e^yrAxL(xt$_iEOG<9Q*@-X)GExel0YG+N9DDt(
zkCn-dU(Uz8)IVH$Zt`nAWggrwCMc$y<#A{O=#rRshIfRO1w&l_9UGg_^~YgW|7<(l
zO<@pCBvwz~zdtO}Rp`Iz(m24vg>ks?{_Vj2Za#haMCCD}^1drdwm#Nn@{1SNUzQdj
z(!v*_r|kC1+;tE~!W&p%a(Ztxyk86{$G}l7Ob}qEoh31js={0nvrwk(G`1-1koO&G
zY7Pzvqy*x40?W%bTKy5*E?x)t2jLV-E?huafx>l?Zi>cQs+t&!Zf6$8+&un-njYis
z7yOz>@r|%wqhysk9RcBhTGzcQewukB<aQWh({Bs}nFaZn?yaOPFuB1sb376$6i^bG
z{s;Pz75du=iU?eUC=6q(52W7RyF0DH@?`p6Y6&(B6`em{wcPtcc>n%Sy#TKU;G&4S
z=+W8NeO;x@FUa^d3@@=vu;4Mgi<husutRMJ{%$N)44w#XIEf4I=Ui-kHvEP;Rfgx6
z34i>@h7PSNJ;>lbxM2!kK71#7*8wB>=Vf1^%5N>l%n~lyjn^{_a?CY*%(DskuHe%E
zX1jj&P3rUf{5bYoK*dm=Icjdlc7aR;B-l(*gu*yhlvK}wSD+LSJ^%LNh+D(CGiOX%
zQnkCo!4gAXc-o_+*}4Lb3SbVI64feJfD{g#G+tsPv2oDZ9*wq;9D8A6r42M6F6tkf
z8bH!;VuVUDDQq$h3Jt@xYY^_VT?1(OO|K`;&Ijlh7?{M@yRxponTme+@Y9d;V6T9I
z19==?F5=hx<yvIevr}ycl#xR(zPkC>^Z<QuQ-CjjYkT{2h6;3#@KI_R#R%6%Y`}zb
zqFW=?+B-TrX1=Ur8$ufbIWtrr7zqdN)4llVgq{RV5Q@0_TX!imK5h}U_iKRc1YZQ5
z2f4gD^e<lA@*gE#FYt$GC8H4)Zz#mh4Ts7MUASF>pNvw^KYZiF%_DCRoaLw2k2Qm5
zE<AZ2&RcnyRLOtZi?;ChfawyhppkLotd7B#uQCUw#q9kgMq=^j?JwbWoJ>cjf+a@2
z|LyeL<gQ$RbuMpq)U}&*)Gjr?@!-P=Vl@vw97A`wf~A5WrF6Ot@^ixZY|QCb85!SP
z6{C(}vE%*I`Id#BQA#|Cg^gXKC8&SV?t~cOfyt=E*ROlbX1EnALH%LS45PJIi<(Tt
zNffI7my>?~?+~JdC58}X{&xt`q9RtY7hN7<Q4myS>A<k}gwy$$>Kd90`?6(yR23%N
zvX&F+9ADlk?wwS9p+7#N&v$}%{S>=WC)<<zyme_Kfw6Z6PA*9d9xt9$nX*`&cInQr
z%q#Hg21M|!?#albyBbA;lrOI;?wo3Fp3<xB@+fAsm>eGOi4n1k?AMyVxyD%1E79qa
zF`@D#`d(1}-rm{mvtLXa^<@t*r4(x@TmE&z|7rp+YUL3^lVIY;(95zp23Y2?`x<i+
z$TJj0HwZu(qXryuw?os$VMY7aWyizE^tii;Uo*5G5F<f$19>zB!F75(mj6N-OhiFo
zNi>cBg$y39Kq}D|5b{V~N!socyB`K3Bt?aS0=$F@U{45QNEePNAu`?nwct=y3V%)w
zF3?^2g1!&60Y_`nYaCJ_R;Tiux>~;fqR=z}nkU2AWlmz*FcpGp9gO8w9lkxpoi;Nw
zGd7U|;#UX><8vqZw}g=z47a19k$|tUU8>}52^mieG#f-DSQu2+9Vp?@Pd$Y5AI^p@
z#_z;05uFdNNKkvnT`rHzEUMHeqMC)!NK#$td{PLxjiK--1QXPAKGqCMTz5N^Z1eg9
z3gtq;x1*q5i|rlymvv^N<KmHX*JSiZc>jndro)f(f7CzYT}R{f<;{cbZ@#RIeK$DQ
z6#cMXNEN67@W-C7lc$6#>bSKwzS5OHuPwEC!$RJil*sEUlRM|#?9ZC6mCtEArZkPt
zy>jzvyK&x1;?v=i4J@5A54Ux?WZt)FsCsdN_q}*rP<!PM6FU!S3oAkWoT9K<27bkW
z*OO<YWTzKbxGi;gR}G8ivx_A}_5G^pML+nAaxETL{9?DoTT4mfzh3^>g)w8dYi^1W
z)}dp)Ec<FQMLM2IrMht_Nvt1#*J!<ZIqr?jfxq*^dXZhXYI(<PQ8bVtf3LAc>QO!x
zN~)kaMc!zKj>8IDXOc#L+h3&WN;~!Ku-9CdLu`nBL@*O0(9{3xm&D|s67~Q3Jr#x-
z`aeqa|Ie2))B1n8kO=8gHHX74hKd#6#OppnZr^1D@2>bidIII*H=aKf7%Tq2)Z)rM
z#-MM`XSh<x1O@{}rhwubM1x>L0cn4vpbP(GI^nMPL}&|M1im7<`TGOyGIN9BK|o-2
zd;*!wosXzXJ1{egDtd*DH&YY<55BZF{$u}5l^-hLM!<}PU5Xwm{IcHmX5=J;pUmN$
zH#3jjQ7->uecX}X^h%R^v7(eeMtz}-FgTPb85j8zSjP+QxgR!9ql?re-sbhYDJtFV
zDsNQNjHJ*~WQkkL6xt$lH0L<acdlziLC3evaac?nFd`56b;hOlM9hR508ux!oq<f$
z5+$SK<G%cw7-p!RJvDtFGsn<SKLNwDD@5SueBJ-4?8?KjOyBk^Lyb%kNy|u564@fM
zCT}83>sX4AEh%fg%AT?%TPoRS$}*KDON3A<Ymp^F$QEy2BYU#+owufbecy3>fBYP?
z95cO(=Xvh?zOL)M&g(q0VjZ%r7wIbl``}-)2;&}->oOht8w?sQ3X3sXM}3c!Cv9QV
zH+|!*#u4S0Rym!!m}C3K@H(N+$vP@`{iEz@_q(+#WX6rSPF1{(G0d8F-xRHIZLN7!
z<(Hb~u}im<8tDCrm-<3>zg8J|9J@LywpP<jv6Al;X4GUD-m8AOxb|6KsM%tp<c$|c
z>F<xMRpbiaZyxunXtZFbhP4rQ<z9<R(*=t;vV}~h>gZc8JNBP)pY}9aayzVYVr-97
z4@3C!*rf{5zz;F;N$*r^J7)zV##Z-GAjX6&|J~o3XcDoX`!x}{c<c6V)3p;gie^2%
zLiVCI`Dv}`^GPn_0G4o8UKk|Dfa?Rp!-!!Z&eY<Lv7ybw&I)CTizo~L)wb$m4h2-F
zfD6A?FG;Cr>qRA?ag8d^gn^`$Y=d28t}i$u@C%^Qph$U!uApVz(XjL3KXmboMvk?%
zUprU9-f!Ri+}#SrCy~e?WiCLRYi@sRxv~g31R9Cv=+2`gtDXoWU$qX@9pbS_`eg(T
zm{@w~uPtndP3W5070@?}8W%|S%o(kaCmgI~wV~iYxP39_UU6stx9fcALdXon4+;c7
zH1NX&1IK;jKzfRIAJrt(Fq7#jeIkN1xPdS;4F$ox_WC;IMdZi<m!c?@7wJLN2N&w|
zr_$vkFT_1LV`S~)+cppUd>;=>Lg9zFyn(C(y?Dp6J2L&usl9=~=RnsR0U?IBbEaeJ
zeBg52!4vai5IiUIc_ae@+Kc6f6En273b`Brvu?;s5}Pn|Cc`uT&p}`_!pN>Yfx8Da
z4LIX%RIA}etVe1_1_9pYI}4FvP^0Cb(fCv>g3{5mrQ(mhQR{%G^xAOeZ;04v)Dr>s
zy0>owP*q-a7h5=qarYMf(g=dCGIsD2Q;7qDgVGpb$!=kXoML=@ywP%h@~JBjt?(IS
z<WNxacDB=+1sIYFngQNcrX0C|et*9=i73|n(8veBOtd<mPqzb;gb;{8r!L(e-6)W$
z4R3;Qf^_7@6>Y2|LTP04mHH?uKq~a_pTc3Y@yZF^wF%W(FKzGM*W3|rq~5{iLpjLp
zJUNHgQ2#O2{bUrY#!xt{B=Tmsat9+-`)qEl-vv4)Q2h=e^H3;&GC(nu^qO)ll5^@^
z0!SXrlM{*#7?33D!H~H@*BS{B(vkxvCMuPO`)P%aKt#b-0D*5;M)Ms=rI8wAaANix
z0b{xMJcXzqSY`d6s=jRta{mMrFh>)t9x=hr0ckGATgh1n`pOo=>Bt-{=mXroYqh|f
zLm`i={PeZ8aBARQmBQDaVGl9L&&~Drs_zyIG_u&B@v!UO$nKR!J`z4BSH|TY9lFYP
z_0qXl2Ks-!?fY|?0k2+4TH2~q<2bX|st+2O)^Yk@;n^y{dXe($h7E!{^Al#~=bqXR
ze^_*d?U&O2hfRiIMG4up&O0{LR*reZ4>wD&L(T`4EOc8Z=!v=}ELW)1#9>~)Iw2;G
zQVHZ|v+&}wdg9GQ;3`2Op|+sK(2YRS4KLLdK`&4Xf~MFtHvL8PVUiiIb)WJ?8$|=T
zBBcb(a^hfE7?+We(T}cw(!G0E(9wa99tHR@5xr8JXgu8B+6mGD(b<8KS*dc<>(@^R
zdp;W@9mp|<dLUEx0pjkx3Z2s-9;cyIOCcl`Ht&FiAB_@Iog=Y>fph5!!l}wn0`t8=
zN6T{TQ76V>@2U6F8I04Y07;{EcxtL8F@%OU8r@3$J)&QpazY7eJ9)-27`7z|4ZL%M
z!O}!`1w61GBGZCF;7$0vvOcG{v5~C7Tb`RIswo=!kanCB78kd!2-`3z*&sbKGY6vg
zi~-n%s4)8A?)w2$4r`37L#28Q^!X9AGqa%f3<^Mrgj26Z_lO*6yb>lXD6Re2;3r=H
zsfxk=NZ3tW!;pU=-nbpIsPCyPGhooBO>z*^XwGm?@G4p(gq6HH&&jR3m29Kp`f&|M
zKzWbJ-@y&^oKRFR;BNVuIgAe+B(4bHUEES~+ks*QzaNqM0i{n3l>q|Z^S+NtE4v-!
z3X0U5o99!f9OYAI?oa{(Dtzw_E(<#x94XYM(;^e#crSC{$eGkQA-(sduHc6d+83cG
zK%}agsNR9#gYpNE(M?n=g!JcFw`bMFNBi31c-^nXONJ=ZY=pC)1-&u&q7Rv)X3J}X
z6gLWT0yhzt2+Yq}<ZTlXxuYF5^Zq**0E(<7(=jDo`Tg-dkUQxEq2&X)U4$c!v(60C
z?-ZD&5RUw2-Pj5(@uKIJk;dwf`Uag5{<F?A$$O{^KQ${Fq#fnsz-_`^M-E~j3XLeB
z84a*6XrK$C`--tSyNK1vHuWtnnke4!0T4M><O}uah}Ot#y_Kw~r^f@KdYYYsNPyV{
zfaN%7<&c>lVr>9h4pmW}QVDj}-_~4)SO;0U`IzxoEn!OOa(FA&%NIx`SP3dsAiVe$
z>P|N5y2M3AX&yRMuOEH=Ae$&>_|nMgKHzekK`*e~XuA^A(t@F{iU~^`1MOwMVi)@E
zCbmYv6Knf7ZrYU8Y``6?4s`4<l-)3Lgj3y+qb!&{4swhai}*tQG4(^ZTQIn;0x4+3
zwdi?|BZvv=xDu4p$xwd*l!m1O2vbh714>04K`)3eD*Y^WG}sO)AWU4p&Rt($uc4vw
zd7pV-C6BQCA9L~)sq>#qAzgj}uD_4y$uYMi&DAr`NbA*PV+NJk0djulmR_f@uUC$R
zVP6a`7&~|;Wb`PgnJ)c4``{|1kf1&duJCT?8XLO1K)jj!c$6Q#OLvenDDhOvDE_jw
zbM7A(!+fWt_T<NpV-Ydi=zp4L@}i*XB{(72E36QbxO~hm-}m4h^_KfuD$~7O2#>0G
zdKC1L;Ora$F50#0*Ddz1FpQALQ^$zD6gHmUvfo?J$oiM}m_gT>n}?^h_U_+*o5C{*
z4xPTZY48AkMZQ7`X4=Yg<A=#60(Z-*B(!9%t;BNn^9W2k-0ud)rHfeXq4qvAi%u?*
zo5#S`%NW^E40c5sh?WulC~=d4{<mzTF)gad{6TSXJh`OD;JHmcBvQ#Z9swdWNP|KG
z@HCu|^s?FrSp!GlrP$b5JB8(G%FxM+z{ADq1+e1z3CZ`*Kf7ahNxJ-F<o1+|C=nhe
zA{pjPpp8Bsii+?i`VN>WeUAC8SE$=go0#LgB;*!MK6>7Dvn+DKjwD_J=W&Ea!K4}_
z1`Ev0w;wWl8n_m)D1ISI(V{gJzz)PG2>zOie0eH~ag6m1gA-Xm?G)B*{`Sb~*Bh!X
z5#i{baK)3_gkxSjDk_Q?1<sCydRYv-{)=1=P7I*WFW_lYu_@B%*;MhA5(O;MdH<An
z*DDyY@`2$JG{kGc^&uDu(L@`Z82u1qKZ;Dn0Uq`(T+}ApW7DtT*q%TCWl(e9J^^mF
zmix!Ms9{N10_PR&`ggCFbKr*niZ!l-(AT)Rxskz@!eyx?_sm)d-4okNm9Nvaim=YT
zm0T&)Y&3R5FYgMDx|G41kjBO%(4lw<S>77(RCmJ+6V%l1$dcJjaK7fo9z(ef*5?zL
z(^P@%wHI_hP?)<hN{IvqcUBfwtb|pAmbxVvcEku35o2@)R+aEifLW@I{VxO2S2EMn
zLlBCfG+y|-Ec+#F4VbFfr)07D?#A;2T~yV~Y(pokSB1sI$|TsKE?GtH8g{R+hzMz-
zU@9dZnA#{N@xmTIejJfJ@kL<jqc**nDGWYk<PhBNIm$((LCAO+H21rYUyv>Bl0cw-
z5=c?sFBVyST)6)2TMdj(QJ7iT<>-U)A0Pc2M7=C{S@A;1zb82Ml$r6eViy6$J!E3C
zUvAxIzR+y=XJI@k3X0Q^*{29@-h9Z=uo9A!)*#fPqI!=eX1B@L2@@Q9-b%tSPU2I>
zo)pXPy>^^S5Y!3^=yNj$a_f@Q(q7rE4J{vP<Zp7`tP@Ry)N#bX-;YEUQ0ELDIc&hl
zU8%GV>#}tk(9yz#swsxnFtFKn-xz0laR5>X$X-8#=RFML1i9HTRAh)@Cj@bru#n#i
zCmn(CSHt35Hzfi(^GNV90%e8m%j>tGp+lGo#9G3P7pn!pH5rzI`dUu;fzN}RDI$O9
zHOuSc^AI)NQZV%;xNX}jPdm7OV3}tauY!=*&Cb~mvM*zIMqm&XzK6Rpj||I|St#is
zzw99D`X01Uw~u%@4szO}JUzqn{|w_HE+(-HTM}IOxE#}$^ns&~w5JvRjZ_c3%K4-$
zmY^Q@FI7EtYAbA(0LFry2L?0{S%2snc8^Un_@8+*F@Ju#+|#sQO$GfHWDr6qkaR8k
z#-`!%6t5)NkewVb@o*vqetgKd`V+k3O(l>dpqGDx9F5?+QNSD!Mni6hW*)uysc9Jc
z%J941>*=YSX~5nft*NVP^*n$RN|G0M@qbFv%Yj3W0iHB`EgrfET3K2$y>4o<O38&X
zBRiks+w_MZpgaPlk5}dt*)h=Bi+TQ>%DSBIYH1fgPY@U@$iLu*wZ5dY({{e?OTl0)
z4J{K$hcX|_v)cso24L-B8`R)Ko?KKqdQD3#v;0hh^iCRs7Y`6;ghKGm26`fr0-X1d
zRRqkdXiw9g9HcCXHRwW5FG|5h3}_jlmiA#%R!Cet6x1>820=;5x_R|%w!(34Oi{5T
zGx$Zp#RixfAnBr3^*aZ(8yI>P#@{l9#F8MYz+`7(@|y=&@^f=?;1Oh+C#e&GHz97Y
z?Lv(ST^n;`!tI_ZJJ?Hg)jYCTf}7Gq%F5Igyw1anj94K_>tF=xB|7eSYVGm1EZf;j
zK}?PV8to=kiRcRVNXi3d{-Yei0s<y_&1bK^R!Xu^<V!(&Frl{j%sOcuvSJm+4uFG_
zpPzresKYLZLR=)K-4VnU{yY(<rJ=#UVZ%kxOSKzv4jZ7ejRbxLKp9k`$w^5sl$Q&Y
zsoFliTHb?^OL#-;AmPrN2V#8a@Zq}|87LNlVKU9Kt0oC}e-_egNOe5H6s@MM?fp5O
zURojao0(uQS2O?K@-Dz&j6ahj6cN3xBe@XOH{8P-(#{C3AQ?Qy9vngK^`PGt{}vZK
zx<JLZ!UPKhf@fCI$H&j6KkB|x{7-ajd4X=n**E99ORM&HW{n@jN-LE>{|9wPi*R;<
z?lQwdRoCQ)eq1HZ{r&yUNMW%^9We>&u8h?2B9~2WYBxQF?YKckyR%thY1ad0A*?m)
zq+Ry|0>-_J%M%U<@6kJO8|kS2lXhtxv`w*gki_7|)Sb*}mMI9yR$5pYP*L^9BSRX4
zPiOyKo@_VJqmf=P9yVlJm!N15qzV+3jTwkqHUEaaF#pWFujQ!G>zTu9DmTKyo)FqL
zBqg6=%Ul>Pnqr*EM#`lnmj8=4y1mFN8;9~S0ALDXW5SkCsI^|Kf!SFqD3W=(fLwc^
zCr;{f+5r|__b~Yuf9Y?@{TRtUp`-KH?@L<1=K&{~K4@2Yp<<tsWts&GgWq=|<&7A2
z)5v*V#q$#Q<4jxIHGWS=ddjCE3tSC8{9bhJa6V(e0G&OCHz9qOe4QJ<&=V}na9wYl
zF*M<dvZbFK))-6}xXDxfF>>O;q4Ng$HEHH}>NpJ1dC)`KlF=NZeuo@72vlxD0q5pw
z-3+EwnYfUv>epsw^?!c7J=h#)6r=(WecUd3+e8DIzClEYLD)pAV;`o<Ad{2JyeZix
z3+M?1q-)?ASzz2Otpi*T?I?7{#t`Z2-@QA%`1gVk|E7@8c#oXp<-=1AC;_4H35M#V
z?Rpi@SVSUZxG;u4k<p)U`ZVLh>}f_8qgi2>Yey;)B(i+5-`wKxT17(FCZ08L7w^T0
z^+p+)8!$e(3<}92GQ+co&<CKk`4YPPYVd;CZuOFX`}PRbSe|o(Cd_0qJ}f3?X{^Dv
z`noz*%qbyuwF@{mE#4Jc?mTG+&szLc*f=c|y)7k=HVX=#l#Cod!ONOy@N#}G*Ou~p
zYvQRS|B&(?Fe0(xh?zBkws4pmxzdyHKZ(O$Q@U|A<k&PdG#2Q09p^8l9m|t)9X|>8
z4K$r0t?7qZ@6MWY%Y^h|)CJ(au!5L!VKA*fShfR4>Olj80K_H8AEGsmkq;zBSwb||
z;>9>f12{L55e_h^%tw1G5v!drY6+d_8fXe>YH4lc2_jxXNW1o<T!OA)j?iC-?V9`d
zTVvNEKmxUAFt}1$i?E^RB4zmG<7({8t(qNQu2D;)cpyfgMz%or4jUYqB$$AfAh{Tw
zt=mJK4D@&~-f+6(aTJai7;TW!XRWLqR5~MW-Fmz2E$R%=1VA!(gvP)acsk+n@ij1o
zXT(J=Ue4Ext}zm*+sC|8`;Yaw;rWmPoY>dV>2%^B)YMdlV@M0FSfm)aCiY+F=fFwy
zV_)slHe>}KS-=y{@M3fZ)B>RgVxi<&(pfBPeYAw#+n_EDee8(>8<_qQCphA;h?2Y6
zE(YyE(B?psdyk#TM4kvZ))e^gS+3gWT-ZKasxr%rs*u#?<Zm3d=*Li=0IiFXvb-`E
zjX^Mqc7k$|zRBLJyieihc<+ee!eQui916xr@9_8#e`&1(;qj@ai%nAku22PNoZ}~@
z_hA-$h=$_^e@tp4RDH+Kcd?LCZ438pq>Lzjwp&zioXF%w#3K1dq={z~*7*7c=@!t6
zP{@;1dvdFu+`-3hJ`%q&JQ#EIZlHz{`}ES{9$Mi}NO7V1-0A$`z@bAJn%d0G2FG9B
zE!-b-Hzq=b#ik*dcpT%BOc!s*S<Qy-^|y+V1TqYW5T6l5A_7m42^}KOjcX$6d3eAC
zJk&&5(9Z`Jz}@up+=kPJHrm9kEdNU%cW^)jONKQCFluYiFR3{P9ZqN?#)7>;Qa_U4
zVlb}a)hkOpI(TP<Kra!1Lb8pNO}!w=2eC;@krrTfQBXuA2n38v2q*IL^5BI|hO8Kr
z$td`^74uhw=!wB9uIcG%*uj#)V)F5qf8_!;h4=&-&c3~S{o_Wfj;?cVKi52hQzr|8
z3t~m%hmaXw+~rN~jggVI;0jXQgGVq52xk=dZbWSCOQ?0<l5-UB_4OsN)8p}aEg~IF
ztE!4fs+_%d6+>PzSXO;-4g;|*f@fmBAh5g#7N@?*fW!(JS;q@SLe{I)l&|;FMgmDj
z<O)3gv_Ey~1lBH;Pe>#pj~f6l3kyq;^5?wVTtd<U5%Vs@!E<u118oPk(3tjj8<xJU
zm)u~Lv<}wbBm5?aP0FP%s=W*;od^P4ot3Ywu0G89gby(fEn^h8G%bH2EJdf{Hh{ro
zu+lECP3&aU&6{PA;hQP2b>fSHdk@riu~y8ZlZ`*+Jb3WBV+2X(T`+tl*dbGoC5(-Y
z#ya|+>r_0~$%Ti$>mO@Tl||UZ9m668i2=X{Twq|U*%F})DiC5oL_~!7V&2qFO-xdg
zQvl@!4h*aq6tnPnYU^x9!<|emP0ip?%4X))OXIxnh5u(^Wpynf!9FjWExEa|F)S)7
zuya_5V+nBB{WyD$XG9`drNI1d$ap=XEI(i9c367}ga^c3bHi33pb#Rk^IYu~0Rihe
z@W{(cW~)miu%Z_5jsOrOkxf-rRb{oYEMmfZQbjfTSDRk&MWkkmax6i<6x=yHETf0;
z)HZ~~19{ivL!b}X<iL>N*Tit*@H4|?q|NEmi74zbI)zsuc*0h#J-w3?ckcdu<_G1D
zgVTOyud(#NSs=O}c(1cFkitm=ls|Y0=uih?P*UATs4_UDTU8?wh1l5G7*i{k{6b*r
z&u=n3_t0pQtEXKCBCCm}8lq;3BE;GAAvmP#Du%!%f(@I)Hk*{3ECp9W_24xVQ9~#!
zzvefuSV$^%u15(D{;*PD+I1zTt$J*s=M_V6KNS=TkL?pKl|Yze+>~e5ZLAR8vc;67
zw&^Pasoo_#)Zx~$efpLU$%Sp(mewZ59dynPAmyZ>PV*5X3V<mTTk+%56a==(=RqsE
zGvk*E)f<|)kGyEFcT2(p&ToKY-^mhj5z7D>8w(wNLoTB(2LqYiRO%ODTlULbh4Wz7
zGT!^VxF`9>ITa7&+3-|b5VP(MD}WCzHh-u3Qj9YW9y!uzyDs<3wU`(!J8-zTgN<iQ
zdHs}@B`_Wq9T!K<g;>&3j=*PImm{N$Vg0s*^$5E|FId7gp7Yt)=6J5AEZhm8r`grf
z+I0DRJbj;~3Fl_dwz=zFx!RzaFmmBc@xXNLOlx$brnYv@;c$cUJ=8?6?6Q4>!%A_d
zUhZ7g)1p6O`K5VcFe3Oy)9s7fHJQEvvYb1G)AmIkrXS`0qf;oMmdzW!O65IuHCvG<
z?P|_Eq?{n5M{D17Ve=Nsx9nJ8$9>+E8$N&9dZ=+~!BfWg<*|tA1o1V&;ImiO)(-1s
zpceU(fwKcP+0aoR)N51It+^+1Ej-*&UFF!1kI6L<RkyZIDaz`kpPf7x*{gUK8Ufij
z)UewMR&qvmpy~jI7ZS3J)OE(jaX#t-0;{}%`r#~jjsd3C(kHQo)q47b4d@w|_9ecq
zc&OHl*6+xAz&4&Bl-5JH>#znq+9CSxF&5L=3r{jD^yE<Y-9mEBQtkSnJ{}MWL2*#|
zR)UQ*{kbO;0pv-bKu+s#0s{3a)9%VvwehbIW6^TM;g3t3a4zvbuc(Vt1>hOX@0>ty
zOsK0(_|KQG2t*Ex&Jv2A$Hz0A^tHsX^>I+YcgjDH7dz%l7dc3coL`582jFlG&;W3|
zZWkAKbjX;`sUT6Ew}HDP@g~3=whwmwHXb=&L|jM)K-0+@0J8OX>zB*_K9*c2X4X$c
zZ)5@S5<=F9(TL)IZpQip10_xdyb3f?9H~GA(oPLMlh#4l*{FV%C|`ObCL!KXcklM`
zP!<x}`;dkwm<cab2V^4IU{o!-=v`LD$6gAytR##9arL#c6SDRlr!iZ3Q_X)F`af95
zJ9|(-%{2hGL`{S{GI>tFMgi$HfK^NkTiDyL`ga*~hC8RUEkkPt!Zyv5R-Dt?erBem
zis!(A491s<3;i*C?dF_wGBd-0m8{JFb`;?4JIh^@o>Wy-9GKVS+_?=sdA!5Q%F4v0
z5AR^bmwoHb;DrA190CGj&D$68>){>Y4MZ(nKF)fRiisD#Q7lqx+WlYb0+;N4Cqsc>
zz+`{24b4r%rGSK;Z$BPd-*?Nx70_L{zwb!$-{GEn|GPgn@@T)9y^WWhfKx!;yByH3
z0d~F9rVURQ1D(K6N+qyxh_SgttfFosbYg)cVW~t`??I7{RX#b5?Spf<?&S5@*mXwf
zo?eANu7gUgSvu#sxB>;p1z1rKk$6CIA0V#Ok5v0ZT3!c4weK8j!(mNyqtIZb!RLf*
zC$g-Nc_DC26-@pMWg#_4LOyAjZIy}52@bS`4woD!zhUm_)p&V7Aff(VrJ!#p9Qmh)
z$}(?+F9~40q&EUw5Twhc1ZpP`RkM-vZ$Kvnc!HRO7Of<LaL}{r#<uH13#pql$zXnC
z_GO$7`a{S`=~E^%U@rH9B6b3*xMZpsWBiv1ItgcQDcdGd=vgx%9VcmslG`Nouv&zs
zRKI~C<e$B}8^xLVGYzg{lz0^TUdp~hQM<tca5=!QlE1RDRuCJ-iuoA`(k)E3!#PnG
zx0(>6NDU^3%oBOjJIC_hF_XXq()Ux$!keiKliwH_IL2%Jyk_#e%#r><XQBL`Ae1y4
zHZYY+)QlMad%%(<lJ7veZg^g#qliaM47unu5{+iZRK9M<wj=^KIP3Ua2R2ZM9%J+1
z!!{lsp3(i#+ovFLUFA(i*k^%~ixbf^#8KL@QVB4U*7EK+%?sv~pYh)RTsM|Lk*DDe
zHxQX9#JmQ_!NcZh($T0EE4+%d^%hnd=AB4!0=jT}>uSHJD9b8<V)Fuv#@Hkl2PM7W
z<Gtg}uWD=i37Cv5Q$x^+rz7e=&pIGB1i=Y8Gpp(6PhF(_DzAd}3TiP&xplZA4sXz>
z;?9<r?OA0^#j%BSqOQ)Yc?zf}<4x1xgcRI=-%~$5-|8p!eXHT${d%Z%K%$_K99%w5
z=nOVN#r#TkRwg*U=Y>vKu&8&BJTiXyZiwb(b&TBJe}Cu-b6Kn!M~|+d0H~nescSg3
zcF8ZOOA=a|(8wA}<Ns-y2E=-O>01`2dWn@hRfuyv>-#vrV``|aoFrUG6cNrB%^&T3
zoPv^d3_1^i*iPHe4je#Fi-H`lsFg;eWqHidumYnLFJ#d!{d8N#!#(qvd+W8tSxnI-
zM~XNlUxNN2!EA}dZ@!WH;IYnb9Qp)hYHxkf<G8R9izf64+pW6q1kxA^QDQ7gCukMQ
znJBcW@*0o%nQh_y=}tK<Bg+aNkYv45h{_3aJQV0CWJu?Lv_?KQ-lc_)!{f!DGCQzW
z<#txqwF6tI61)~rB_p8)o$rxgdv|7UG+SZ`wo25b(ZwLmMVn|W`VqLyGs$tDcPwnb
zl^kfsi}-<-80p;Iy<6!Cr<3A*)t`Q)IinKoi<Z2sq|J%1FvqsVn3*%!fc3ycFtS1U
z40Q=NWQi1jY9i1`;w0<2bGpXmmi)P`IM0Y5jyD!23sjMI!Od3%NHIA&4yWi^!rv2c
z*Y$13azC|o&ff%dJqC}d+JCeZ`K+<liIiALC(cynC(=4-@uQcmWce5E`})AYr=9+H
zy<>+A?n}XbYi%f%)T(?{gpujPP@QP$K)@|h?w=jjg_PKN(+39L6NL%1a^1k}qL8t1
zSd@jK(SXQTXk3VHB%Hdl%t~2j-lEzf7VtQu2?7}cstQSJjKrEXmuA6n=D5s0YVk7d
z2DExMVjdCI$@LFu{&*w+OL5YmIX8aJ<;O-AlS9X{{ttA@_(J=0HrpsDC;)Mh8;V;E
z?j@iCuQy^>+|#yDMq7T0Nc3Z?UN&9ACuP_39QdU6<HM-EiVfmHfBv)yDtvYRyU6v^
zxBNeGFI@bE#8tcH^qK;p0@)wy!lU%f*e+3?jeMa~drYXkwrI$hlW?7wQRvhGPRnFM
zfy$2;_aD!r1eoz++i(TF_;OE0U{nVE9PHrve6whTdL#$Yn{kx9d#4SD9}|}!=U%Um
z>8G#RS-eV%_>*m}MMgI7ZC??d5ku-qr+gIYLxbtDE@1UR{|6Xd&xGqtfx?e(U3J-^
zF)?3;D#|k)w@8cHd)7cf8OKafo9cYkzD<8$_xbV4e)V(43ikFY&-UJUmE%Hne9<$~
z1y(?V^o?igRNWXe-pN{a7L~2blGP~`%0^A9>JiuN>}pS!e_w{-&VP?;tgUX+4<Fj$
dsPO(P)85p5Tjw^;nd0RX%{{u*^xdXD{{s^DoQ?nh

literal 0
HcmV?d00001

diff --git a/docs/discussions/deepseek-v3-gb200-optimization/images/image3.png b/docs/discussions/deepseek-v3-gb200-optimization/images/image3.png
new file mode 100644
index 0000000000000000000000000000000000000000..f606dbfb744002f43c0de6e2cf658d08a0e3a9e9
GIT binary patch
literal 98729
zcmd?RWmHw~_BM>5fFKeQN{E1@gd&}SgtQ>t4bt7Agp`1QfV7e#-CY9GAl;#Kch@`j
z`5&L>)AR8i?=!}`hvOXhvG!VP-}ju?ysm4`wS!*Eieq6AV;~_RVM)9aQ9wdMeSw5@
zqYfPz{>#r+rYiV96nkL_MRauZ$v<*)NJx*6Bt(Q1T@p8@O?2^xXHa*pyGINM-M2&W
z+fk8klTomVzIc(YO^-q8gRWrl?(28ms?^#*Iy&UwKo$Am(s!lkl%H<uBTJyCzj!tI
zIKd*pRWNaVs!jr3Xb11lsb{5%lZi=hx2%&%3iq-%mpuM$xPkw?zLx#nc0&308&bS4
z66U{O)Y)&U{`>j&4~j;_y(3=ll(B9Q{?Cs{NTp`zh5vs3s`T>Rt$)Ad5hFAH``L6D
zNB72me|eGf;s2dWEI2!^r-~L{?53pBVK|pP`*-nU{;+wjtJ!NgzBXIu8X=MEE~GAt
zXCovB!(}{LGDb3%!>wHpVhff?`Sz_G`FHu0M*D;KDBpiqBK`Lv-<^lQxNG%!MaCHO
zmaE7uHz(<jTkET9*4E3WY2{@~=A@fi3wF2qI<DF_|BA?8tdB59s;cO``BtMX@3{Fl
zW25#-gHF90bIIJ~Mve&K0A0KC?sjAF{HtI0{yzEdUB0V%g)bWNLc|ca!|)ph3c>ws
zXQa~_+Oz6kE&k4XhZqjFdpmaf_O=HXDP?OccM5+T5-hVMceY<q^Il&pM&{O<(=r?y
zQqgkD@rjE(4m8AVH>IG&3z9~@jMgLmw-Qr+A8EOdx353hK3)!*e}#XYWYV#)u(`05
z8x(jaOm3BDY>>Ti^tlMdo2%ix>irur{fa3`-baQ@^@&m@UOi`P>eL-ve*?L~Zbop0
z4Phv>TUsh@-6f`VHX#@6dn#1ez9^|8&T{a-0;N)eE9~HqU5S0vRaQB#U}*R=LCn&+
zS?Bsk6&XTS%$A_*j*`z97)N1dvzgWfVa*ewj(-AOt_q0NU1(7lJ+&NXqG#X8c{D7?
zQWFJB2&F53dX>%@DJ&$=dvs;R@8dxwd_gs(Rm)x;#^B$nY`JZ^OcPAm*JWY3!TmSz
zH|D=Up39$;Hhx+6EoPp}H#>jkdqI9STO3ww2uqAIz79QorOjf?VsQz(V|rx7N7@qq
z1LHmWEbZHJWuYrG9H~)R2`QSHmHR!_R5xPA^{Zchs=6LKLTSjaQHZBifAuTskwRz$
zpB9}c@AKcBoztydZB)PeXO!{==oT50qk<ks(gk}zR`!0&&iro|OoIfCA1-hmVTV1x
ze@Igzq@!bhc(xch@T5wm_-<B%^Ojv*lFw(mV%1l3?USt;GOuUR+&`|5c+DkQ4{WxH
zp{8A(U+Q?29A9pvN>j^`Ml=bdzMmqar#Jh(^LgAW`I}kvhh>-KSX|qYZv_9YS!zdW
zo4XN3hv7pvL}K>DVN}j|WAePKdb`*7=G%5L*_McY;TsRX@t214^X%NVK5pb4e>Zu%
zu$%CWac=>+Rh%Q;e5G&V0xRnmiEm%?Eq~ovISnkAQHfS~*%IXQDA=#){l6=Ddy}@|
zw(XKewBLR(-e56X<JdQT{hYW%OvvX2J548J7B|X=T7i8je@#adj?*lZ+~Uq0!IY^g
zm&=>15qXg}BSey`Tlq%PnwxJIC25h?b`#zcyY&xwrfRsVq>Z1Bl3hA(|9Mk$oOJoQ
z8$y|@T2rR)Zv6G@g8rr}+zN7o%MPUV<Jv);ZS|V%g@8lUzk$+}!x(JNEgGJY<c`J8
ze*%d_{%~lZ{=4Zs@-|!2Hl?-BVmmp{^7--oTd{_e9b7-YXF0u}u*DWk890rY>@Kw8
zZv4TyQi9nlaBXGx_Exsx<G}QyZ|P$Z^b+BYftx+{wT~WE|GU@9h8<FtNK>}qZT@nj
z%W%0^<xj$5G??7ugI{iRojE^PTz$B+Eva6yHG-VGHqp6>#*usJ^&vlA(T?>t{uU8$
zVOP2~SpW~Oiz*|bq6yl6Z?)2Tm(=>{u+idW^>X_NSwrrkC}nD-I`Oz=iy&%W$epld
z*YOn9Jd6HRQ6|mO($tjz6p~R(@AC2(d^qoC(n0TwuSAg(hr?rB`rVKId(uWv*~jwC
zZ%*9&#BP>RW5y=O(qJs4XVymAx2j<BYhHNp`CPS_X-uF<sk}Mb9m%-nIY{n!#TOxz
z{`u}p@^wzy7Qqk83zP%@zzpfc_hYP90i%H1S(`O>ZLB@DT;8KJ|DP8>i3AUo3=5Bx
zM@LeTau&`O`|928=aaG7?eIi?l>NpuP(IpZ)yylUig;>)^KWyA=eU!(Iy(hVJ2(rj
zBM+k_#oVY)-RZSG8Q2AW>@(%9Exkl{WM9;9%9|;+u8O1$dC_~9&2!~PfP3hh&S7sW
z)R+IxRJk1VIGD1E`&8`aujpukWvw3NuGMPU-zsrb!um-YS{n5~5^ky*hwPqTrdXfY
z=J$*Eyt?beW<C^4uk~Rs@y_c1%BQK1D;q1gD}$N27iQWL<)Mka1j+O@%ETo7P1^HA
zKb58)H%d0xcyyCAv-9Cb<i#6@Ue#*+eDQ!q-0goANG6Kl)P$(mfDj$0tNwMR#d5n&
zhVIup!7XFjOo^XASua&hJy{;L()yc5lS~!o^U&K=G}iA1YaH_bzKzmE(ZUvS$E5Rc
zxw@%#LyAzJk!{u8#W3u(HFNd7JrYz^0;P<E`s=ynk{U~VMeEFZvWMhjo~4p{zPHhC
zAk95&+^?0UiQ<%^G4HSY+CY}PJU(Mf>Rd5tH`;E<n!GG3Z)oz9O!Il0Ehm3j`n_Do
z`db@c0#n;<6E&F|CVNh$3XfN`FG(OcP0K#gytHr`m`4$eq?f>#=ot(T5a5n-%%xTv
z#A1;gkR_k;u!&BK>_#rbdK(*g@g;9U2<Mab4W!1pxrs9Ks;Vlnv|TaGz|@tkt%naE
zdWB!M5QskG<*li#92*>b&c&sgtvF*%mct!(@8+L_)xq)cako0{JZ16!yk;X9gXX#F
zv{T~Z)KpYPFEB*qOUuea0|RYrY)tvBZ?xH_(h=h0C-OM{`0+#X+w01zs*LpX(qzIJ
z+*G<`9!JTn!RaXPd5Dv|#D2q-WG+>eH+6fH+zerRQmo#52mawEuirVl?6X+#G)~x>
z&IgAYY%Ut&(*2a<bzL#H{nO!NBd1k!@s{seXL@9eTFV!+2DM9-WlC2fY<{d&e_rb4
z5kBsvmfg$jrqCiSQ@i&|$8{YhH;}CEouBe}oa`(SkqCNSXjs=}^qX$QVN-_9udS~)
zT<mpK%($7K9jx~8A14L`wDFfXGd&LeTV&97cD&8PzjwA?ATA;@6~P!jU-|RrPa++U
z-LWDAPA#s-j~52z1TAOlFD<PO?&66)d-7zxY*hPvt7bbL&5>tX%VL16?Fu8=`|5PQ
zEj*lr)9Q3@@pxdC3km7PI&E-j-bB9j=${M%m3p3?WJ$5vho@6L<MY{VF?0lL#@b^d
z@06@_wN4Mm*31+-C9J=BY5i&weQZw}Uf@JotjaiApi@zAP@tScMn)DbnehmH1LwBS
zYjt%q);Qt@&9r_l9v;bS!`Rmu85ywE4AGK2KN;)$R~?j;l+4N|#>dr|V=MOVhs;-Y
zc6LroO#G6g$HyQdBC68V*3!~aQZh=?>o8<BF)`85&<JV$>69|hmAw4q$rDGOBu!>#
zTU+^@VZx9_?+RKZ8}Cm78E^+;X>TvM(*{>9EG;#b(F-Oif+;7R)B0V=y-Ms!RpjN1
zoHo^u`h<K@UESQu-wu8V{ZjKL0Yc{y<xvA)-{{cLmmXuv$gahsqoWPO!BvOk<m9{`
z+t}*@PIHYnVzdu8#-G#C(ed#mPQR_0%&UnKk<ZP}rbw->u6DN=g`m13yeVW;YQLJq
z#+EgAU`?d3xo|N3hoCO%L)!;uvn3^&o2uDu6+~LJyxeg&-3sU1w>HTmyFUB8yZPQF
zndQlo^Nany=d7&h6F6I2Tkqe$SI;Yd8E|)lwKOwx$roFo(ryvsNj^p0lm*A}`sj9q
z_(NENjWX7_jRfoZ)6?aY6xip&!l(326sen&6)D{IG`zf%&CO|VvSjPr4(%KrEqg{)
z?W!)$w;N^~yc;x`tqVsZA|me5${rjZ^1JNJE%hW=T3V{zZ`-ex%m@w+UK=Y)Q_fMM
z!xu4RBbH0zDf7O*N=zgT6&uWyNoev#LqkO^mhwh=$7{9wx0Q_7$>O5&5qfWL?}#(e
z)`N|ZrJ1fJb)Wgl_0a+%0s`FrL~i?so(J-<$B^(Oj!E@OsdR4c?hOY6QhkEgwGMvR
z<N~!ee}pOBHp@ow?%m6%pJa$;;Nm(7vpAN`&dclR>Y_@8?dyqW@0y*}$+OxLTbU)Q
zF|HB3Mkp|K6#4YR#`2eLa$H^%O09212`ne-*Q$bUSjo<Cx=A$urxIN@c+k?~iT=Pd
z9iWoZOsb90mVg|DRWG-i{-K;>#2V)qkd~)WQM`)|NEJZt)!xw|Pls<-7C*g0DB7?4
z^i^ygU!Of`h5PYV_M0r!TetWx4+aOQ@%#33R0@*$-8pcv?myK%TpNa8wQF5=OWzKv
z?l}@U)!gC=<MqC};GFe%DH0YIhU)vCUt32<&}l=pWW3u})647Xc-C7^<nG<OG*KOJ
zo#{@et*P-0DI$6N!GQte-o&`5D8#zMzVr#4GxGAfUtOFE2ngiWViO}>$Q0dABE~|s
zmL@)6x{M+Y-81G!uaVO8)bP1-mo7d0WVX)E_V9(}WqWA3Ia&Ry0)2DS9lPXZjpQNK
z?U2KGl5I!K$+vZ~>qf*M39a|IDr`hteT~_?nELj`(u(TqlLIFztjP%osG~a41%^1a
z{>V%=Gia2VkIJg6;<v&_E%q*s$`f*5X^_J^OOsTI(xA=ZtgyvQm_1H-sFA$PL>Qu7
zK$|)?Hg<C2Ql`#?E*us5*6va!Id~+TP2daG$cPIXNp5YEImx!xJnGcdVYAF^ps^%5
z+v$+j6cK3Ak(a$sCd3?89}aGs{_5kvn7CYD*1GqQbZoz$gxUHMTTAAYXmWOXe?7L`
z4)|PEc>Utv73S1~Vefg@nYOm&mY{pNR<ArgFGun;Xk`;m2BcV^G!&A!@%0^VO`kVo
z3*N!REO{71iH;m%$E;I-F|4A)q+S{r$VMFM5K0-<Aq(NJ-x_>%dET(!%PTK0-*A0(
z-X1|k#H=}1<7}O$JQ<HRSd>%L9Dq$rORM4h1y8i!f<r1cPsjW6@Fg~X%iYiYrWY$|
z;l_Q*5HWJ(0v@n(mXqa+3kwz#rEkXy^-z8F0H38|7$qbmc6WEHG|q1!9s4dd;~wo7
zmzI{Mr$6T9J>M8F`Sa(Gt*vc-e*VnNj6#a0Z_}r!C@OT}*ROy3V-kgt3o-}mw}p{e
zT39UDxxRk=X$YkvH8u77_jdgvE^hAA^K&UnItB)YCr{qc;WsrkS^R;B8W<QTE-rp<
z(z~#@czSyJwrpZ<PS@JndR4{N%gakiDLN`Ds#Txc_OFPtvT`i5R;hYkPEJmfZl%Ym
zos-iMWP+xaR*lo<+MLZky&6|oPC-FmPY-*Xw2+VxPg4HZuX886^LABy6ciMXAGdUM
zNfFz(*(SE?)3dM?<mT3TobJUvH<6T-L`FuAjEuavxR{!nN~a#bOGrp)Y-|j9^o);>
zkB4Vscz77zT>X+NSYJ&|4R*oJ%}rMJ?)ry~zkmNOEg4%|dk<D*mzN&_*~kTG%mo02
z4*&~A9Qq-!+t?n?=H%q$=*W&sKuDOGm1WKr|L)y8n&pXkuKdi*%=C1TtU)7V<77ea
znT`&6xLr23q0!L_YW5N<e7mCSIoJC1^aV(6s1fDm<v>paFAv^$c<{N0R0*!#QG~K(
zVv?1Wh3SJsPEJlnwmCoF9ERFv>*(kR$4X`lOipTK4H6I#kO_Jv2D%-r{8?OF+}dJ3
z{J7+L1@}x%O%0zbE-p@6TYGb&%--4g_~4+lqGH)q0HR7z@cQg<T|`k?8K8kXT9N<<
zN7vk3N(9c{+S&^HX40GZj5t)Q+5y66_u$|&#;YRsl|wgYXXmJn7TMOjWIUyivIV0m
zDk>#|OK`m`xfB8KD-U5|Vc0xm6ci>xDF`hgp{9Dz^NY*N`T6;mFJC4lCg$#wg|yoJ
z52^vkm4E&Eh=ZeCCV>Mg`+)w2Zp`P;W0RAUV`GLUCfnmB0c72Ug@uzHo?Kzii9>+^
z@FX!q>87G;Hf8(u>z7%XI>Z()FRxQ2>^UQ0NWgzy4q6Zs+K9j*5bcK#G&z~D>?(va
zcAgAXDj|X7-o1OUqEH(*HW=&HU?u$h{fR?=jf`;6(wc0~)WMyr=W(#I7PISMfIOSR
z6T;Udt-F3^Dj#}vI*TbAL^xr)BV{QaS>gU7s_S}mQQb>VR;d3L33uErvw4yU>iP!Z
z{L-)IN>pNL{f4X$LdAq|@G(TQWRu*Ex9Y5C1wvXW4_{TNGQyvApR(2We(Erkie;Xz
zw4INV6FBW+X(&8zJZQuIFqkC=MXo@*j_2vq!s6nA>FLYOirIR%!yhFj_EJ^?vyK)8
zB;hAfTb;%ADjO+$ec|MSUPl|_K#va(4<$1qr%$jjFz`g>vlXGh;L6L$gmxI3DftT{
z%_aAL&GKcZ|N1OWdUbU*#p`U<B*oLX?uD@MPfV8D-(}`1PEUlh2B~Rj@AXGgOKE$Z
zE~1+Ax*u7*dKE|)3|wKQKP{a9=sqt0*-9F@$IkoEP%PZl!!vBb3!BBxXkZ=U;SZH^
zhL?Mj&iA@lX5F{zP;=HM%5vED`c2v3a#`E8`?9duQj(Ia&z}dG0%EO?<dLwM{)V^^
z3nezXoaj#z*MV@#Y)c1B0rDYIYB65Sq)~oxG-*9hLQhX$V$!!YRaN3(Bb&@urPurc
zi-dzswQuK7bCz`6K$X48(WMKpVF0tO>6(frec%t<vkih>LU#7{qq%DP-q*+8STSl)
zTm`Rou{k*_NT_l7`iSuHJ<c~PSn5yoeY2EvRiD|+z3}$FuCC@eLVaex)J>7<b$OPj
zRWriF9C23|%I(qSq>jrU|6d7{uz~Mxpx|S)ZhEkH85U31I73zsA~=(+tzO-iAk(Qz
zTUaY0SJ%^gtr|GD?Vq3is4O0*i?MLWe^*x8Bg5bic%a5ZIIO0=030*LL`Lp)Kkqx(
z7>^1BcE`x*u^6pVf3}u`YVJNC%84LCIy$qi`%9ZMbqV~H#z1P!M{xRA2eYUsDKRiH
zogu>wetkqO&?>hW|3vW;kP#9^_}dqDb7eELHK=F63dXWui4?#|w;QhOZRdYMC@5zQ
zdcr0Rbn!Z_9ZZ-P0Mvw%3(mNe!3xB&n-eCaM2I%Gg_G;Rr-MD$ts8@aA|6Ui@Zdqm
zRW2+^TQ8@(yZc*lg!CXA6MV0g;JM$!30q*&@wE)L1h#A(kP&bj_>Bk~Fq#L?ja38k
zO!|^#BqS(OffIz?fA&-N=>^CZa?gYQa?8nFl|xvHj}Q5In3-j9ZpKMFEwo2~y!qAA
z;^OWu<;|+6rw5paaG2+`DT9SoUrY*+wP{zo1=CJ=Z%y8K5RSF6+GZ<rhxnubY29Nh
zNQcFOHsXiOf$rWN@&Y7G9tKYT>7Cr1oO?H$be|d;8D(T;9Ro-U{nVc((OmOx0Vz^{
zJmdcI!<{%zYi)J)_yZRinVt-(*ueJL*=tA%0B2cQSxczHv-SKr_pWK<C7JFM*_(_S
z)P11~#xi@q%f|xL2Pr2%Ka8I_s^hbyk)k3#E}4MG7}OTsr=)x?p91;#`3W#?RrjDY
z>IfB#a&mXXOJ?Yood&M_^vCRqWib&<u5sQ@{_^EZO3HN)=WJp2od$>ph{}#dyH)!c
zo;pbhi7J;}eJd*~D4V9Hbn8778F%14`Ptce{F<O?fuu@`q=|<^0jq<W2_F$en4Aa-
z>Yd+aWx!YGntXxWaDmi>oB~jvnx5tkGdLXuG*1uhG%Wu5^(zQL7S+Oc{H(^^_i1Uf
z$h|LYfmJebGY)74!1e;30_o*~B&DJ<fIqk&ZDeO>tJS%&PbNK4$!F9k-)MY~B60vk
zModg>WOx`69j>mf8WmQ<>xV@}MNk-dlC}Wuj0IbPg^=Lj*c@%BIlUrc(I$n8J8TBJ
z6auetAm>kW0LWS(9F{Y+aqP{LlkSjaGG0VBq$)~EWmePGi(RqzBI@ccLF{seO#-x8
zS#5!a0Ope7u_sZc&Tsp-1=vEJ`!U=#pbuyXD3UpfKYl&Q1#(nu)CC9_NXN}xWj>lO
znZe4$6uSA7#JU!jfS@%sDu%R=L~r>%uUjNs3b!AK9RR(gNPT@h0Fu3(UA4>Z-(R65
zL8cv%)S$$mP%2#fP-IZ6*m}tC24M@3%_k`6-IvS{MSIr!x-K9<+|qIr7`mz(yJ7n$
z*yj);7Qo-w|6~a)JxCY+okkRf=(UPjuNq)Zn*Vv}>VEb0_0=wzg-oe+-4ASUYcpwl
zcf+jAZGWi;=m91nV|!npPO;%_-T*C4&A@h^R7%P`WsTRbUx)y?bkx(Mzzx0sEVZ<h
zjh&sHiOCXfc%#N^XRe8_&l?a?q!f7T!otE06jTtn$;)uZy!tKZPEAkEhUm!2Q$BwD
z2#D8kc{E8Ymke8w23*hKY4}5av${jL<>_9M$FW^+-n<zn5fYITyetg~>4MA=xW2Hb
zRIhchwXi@$gs+beNR-n)@2ls}pYH>+80nwvh@uVp4k!U!1XSlx_8W*jlQc>3Fw)iS
z?Uaa!BG{JbCn^uY6RFv*Hz>OvuQxO{mgIhf%YQiZrjigD>7DyWx?o9vnaTF_!Z%vI
z+I*=I{lB!NBFp%-68VT(E>e)z-HdkMN0DG1KAz7`(ou+Q*<#i1kI&Izx9_`9E-NfX
zZQl2I`*zj5pw8I907x=GI4u79(o(r@6LNNJy(V*Cd%J3xS@e))r+$&2pP%%Xr%i^e
zaXm{$T`@&?OCyVS#{6Z0l7)u$AY?Ykf^0<w-zK1U*MCD;jzBxeO|9l-XGe6(Pv1R4
z$)=9hL=+Ex_oKojkO04a8UOUV4H-Rv9Lu89+u8Zob;g22MOoQuwmvBk;wwub+rq*E
z(AFCOy2^gJ{&Xq9$jC?pL@`TiTifgvG!)q0LUq8Zb}U@*1epM*R%D2V*Y(+GgV)I)
zG!v6$xLK@+d|zZ9b9$Z~h=vf7@wx2GhYNz33=0Y>((YI<Xh@EXRFIdaK>rE;Q*=VY
z`Ly#aA{uk;qwrOMw`x_{`G;~dfWg7c9MiQ35dlO8K$)>lz07RbAe=u_ks;`NZLJ5;
zQbfrC`UUB|zPkFht{)1%Tr!_pTEAwM9k4d{)?fl4qe(oDZcy`hoVRin8B*N0s+?zB
zmqBP*obCcieWILupWm%CEzJPjo)iI3gh^%47Pbk=4k*kT$B4+*-fGA7hpr2_$_uuz
zI@WcEBc0JtoHxcIl9G}>fBu}D+*_UKaX74kf_B$%F$Td-Xp>ziSrD%RgtS^*)F`*0
zNF6URLAY!ioEN~6aEOS+5qWb6;*G@n(4N$UBLSO~3jm4!O=fp8|72U6IDRV>wOSB(
zIm&tJ)edhR91cLjmD(*za<uP4AOpeMo~+moWnhL&6djn&)VczvF}@M1U2qK~2S`CA
zm`4#2xDOtT?@^7{0v4#~c(OAxe)JcIjRVkqhI<PQEh{s#I7m}Op7gnKHvn625^rpL
z{6wAx=Lo|Q2W~%<^P}G{u_4}~I@o{@gbQAA@$)0#5D*IRWAEF`fWVcM>x*R&SL0em
zZwE8$_Bx*cUc<h@2EwYI0dt69)~W`QMe*_j-|>{aT~*@pDX1d2>+Ubl09(-S+*xq-
zg5?E<(*)Mgb-y^d<Hnfn$<7~zIVnVKaoKGpBEV>c$_^$#Ut}?Okf7Fj6S)Tm2TM{@
z|K@>xd3|||4FUN1bE#oRWEdIm9-lYVcnFNx*jN)|V*!XX4FA;PVmm-KRNtnKj<>)X
zEuKYnlmc%7(nAgb3nW@E`Xx1x0oZ9}RaI3;8xN0*l)Ij)VI6yw>FMc8Im)79Vi-P+
zwo{+!-&8xS3X34<==IelXcX1(c0@HB`YwOt#*GbAO<*{%#Uev#;@H5nz+{9yEj%y-
z?iv&n#2VKPboTOa6qcR~>WDbGr=iwWN}z|Q=d9QH=E}-SVqzlDAp=<wB&15Bp!Wh1
z{CFQSm&xRDF5RBZ*PyJ^+({aBsk*+Y`qspY$8D>!E-`;(V2C;V#9g#>iMcaZObn`l
zrLpRUN^nR>h+@+>f7MW@kXFb+A(35v&Fg=&0Khu{Bl8LhOq^}iVy{3&OH5}v@Jxf_
z2R9~iWVdQp)g&fE^9E1>Qbk_AA8I??CM-Ve0icET2R}a%QPJsvgrD&$YHF_M642%X
zmBeUk?$vR5emproUgQ)nO@oacj!Pw`ilzz<gIbZkKdA2~PYi0VcOWNE7r-;htgpZN
z+ZqC@3X~LN@4)(D0*B>~f&!4}hA7ll_AE$9#8NUcmV3KSn-h+k6G|Bh(Xp{53r4W!
zzNlDsHa3N-jC_5xG&JCr90C~ulMG=Ciir3fJc~|D<a;~t7|12%kDQ$Kv7&h>xfPa^
zM<W{6^wFKbFoEQ!EV}|J+1uMYI5>cE*lEa0hyp&1h?p3BfDDDxjgmg%JBUyQC#euy
z8r)%ewzn5=oS<x3OzBLPTZWVI9<>tb;PeOLQUNy@bB>9P9fkCO^4;U5SRvBjb*buR
zRHO=+3!;wSxcdvZZYfwU$A^by6%{G0M!^LI){r==YII@qY{W9a36%3SB}6RJYY#4?
zQxLChSlhWdkXoY<jt}m9SXnW35)arMg8u+F4n74b85xwra_d=b1qHnO_uIu(*saoK
zvv}>7?%*D7O}h`IOEA_^rh@MfbdT0*vOF(6-EcHt3wVq%K}dQHBi5O?)L7SYUy3)_
zs&I&;e=}Ix%|CyF$1yNCsAlYt)t$guU_D#U*9XEmBo$JH%Vw@|XJ-dC*u~XVLYxI~
zoQLOZs><H2u3m_F|71QKFz$7R!fl_giVRsY3BM={2L=?JEXHL4_^CNLM<JYm)^l(;
z0hR#{V_{=A$V;$<&8t?hP{s>*x|y1O?=S?{AKVnsx(h%RodAbpVn%XQSOf(HSCs%t
zUP((EUS^q@nWbk6T%F7V1o}MU;^KN$;ti63l7ixW9m2b!5)Zp*xIS+P4#ud{-v0a7
z?&8GO>4_DnPsT~3iZ;-YN~)@rAQ=(ad~vcnGjknRWD4>aN?SySVJ`ma=0W7(`XRsD
z0es^7d@eRFE?jN4xtSz6K5ctKlnQkCK;*$g1K5`Xf2hK0dhF{^Zf-8Q*GW@b8!ZI|
zxL)u1r9=RM*2hb-It-yskBm4$X;sPBR4X>@fWQQaw7tImgkAU7#|Mx@Ds=c24G{iF
zc~L+}s;b}Ad`=RlvI{9rQY<yPzF0!e4PF~CFgdb8zD?=MBVTbBMZ<p6NjZEjE5&P~
z+%myod$$kwW)rXh5VpXFn8?XZ%+1e1B7qBn>GS%{o2aCuIv}Eeh9C%QAaO(ti&X#4
z&oB2REH5w9YL&iai%<9^$JbY);S@zH_mIu>6L|E2fl#i!Ku>}epDcZApW`krLKC&h
z!JBAL+O~BuyCOV00low&5fSZXwlvK?m`MWV0F3&A`wMned3kw2S_AHq`S|#_B!lmA
zIp>Rw5ZU=!V73^r#sR|afmaEX8XP_j=GVwbHs@mD56K=rlnq@XGM_AGT35*#T`wsv
z4t$j}%)`rD;^*)AuvmW*6alQIo_#MXE-o%~1Q;tLx)vD~fx36~_8Rlk2Y$C-?j;U2
z2JaP255f>poSPh<7`VB|dwatO9)*X78c}os8G_>-d9;APL4TmTeBgvC;&Q7D8s#CP
zS%XEW0D9v91ICy>bKBdqAi!hLRKfcK@Y2%@ZPf?UEi^QgI(2Ah2tpmK0FVeYw6tA@
ztR>?fph=QsRiL$`s@m=pV5x}X*WTU^83`)^nmi*j(`kE#AH2WW*#<B~EV}V=xw*Lk
zi4q09y`cI6<p+QS;}ql0osZ$+&USYCN#(>9pN6eWP<GU`ELX2Swd&u-!rC0n>Vtk=
zM0~v2)BCFLj5ft)8FQQ9{_~VgjEt1POjS|Q0Zal;0fv`D$aIA@!_%jq2!0!0J2*S1
zJk|3Vw@}U*29uc%C=FP?KYs9dU-9vhkXZBZ@WdZtne<7{Q}!5_JVJ+708~yW{vb84
z7h`o`@1Tb=zq~vOk^~l~xbv?9*(GXQ0QEOukRJmBKk2_I87IZT`3=+sP+GIvAsf1;
z@nP=*!D|>>3^=?Ke*5-q3t2s<6+laT4F3*83~X$ZDv~x^hUnl>G{c@H*W^tWT<GUr
z0=^7<4pf*@)7I`SR(!jjd-G~@HV|yQj*cLiOyGZN5hrVFW=XQns{<LZ8*iV5%|i|2
zb=s%|;{fOg|6V&4#7S9MS?+EUuk+p8x1sAYP(L(04DHQnHyu^1-vH;Ssg&z<Pai*q
z7%y8&=!AT%=%ZA<;A{5r(6`4ZAs|&+SvkHKj1a;S*T3X#{E<)3o8m6UL|Yf}!GU;l
zZG~FFTn+<#&GW-`=ks+iv~t^CWAdH^rka|V5D^ovZEf{3|Ac%se|jG{3^2KdB86<g
zFmCSFmKMJcAJld1rw-0uetTWWZ3?+bfZ;!GQC?DFz&Tj}bvsAv6DWtMs5erX64msC
z9A$=khY(dDDo`(xb4i{$to#OKkzy}Q?+y!O22&dt(k)Cvhpnl&Xom+tfS{RM=Y4$z
ztm#^-a}1?%whQQ+>fs{lf*tod-cK)8yoi7{Z>&}m9W-zqAjd3_Sy!_I1JPG|-%HbA
zqNA_;{?-ht3tDyH`?Pws9If;>1F}JuAr8h69EnbYwYI1&Pf7wRG2L)=SO9JUn76R`
z^n!v7ab&B%zxe_z9B|Wzk~=#%lwR$8t~XVOenICMbD%Vhf$JbNwnD|I+HAo_2lHVT
zvb3P#Djw=$Y;2p@D|Rkru!O-W07v90?j{r<a1up6fBEu#*i4Q586|Lp`y3XkZf<Aq
zQP2R>A@&|pnvLcsDd&KF^nlCeE1P1&dob-doRdpRp4ZG%TB<O|qTRfSBQGR$!`ueb
zIWfZv5DOG0baeYcK6(N5j3KYkz#||S{(3i7_}gz&wzqHJf^C9uw&57KMuJXILV|`b
zj`EqcJU$sIX{K~sF*JiId0ciLg%HjE{j0C9kBa?psz9fKM2qQFhlPWKgKrfv>~C5c
z6)tf6)kC+pAy9BWWhR#{EiapRQZ((?g4rA=4SfY_)7)_^W)47uF>mP<fw~T|2Ae-V
zhwKWkUbS1!OqW7GFEJ@;{L45d5%a?GGUusSp`0&x@i_evRN_F+haf>ACzNS{&)>be
zx=N+f1~JAY2vv-RGd3!!*{!?C8_bQ8utbRXW4^HYdj_Z$a+8^#qncivhkKGQkYp>2
zsj^VfQl@2Z?G(7P#2UAA+1Y(f4hm-=;K-{wBS<i(fi3LoD_CqA427+5vh|i)|8vzS
z8CkL}yDpV4h+7tz`n}(D%(9}kwicSmlz2Da_Z)35E;{C{FSsVR+B-i!u6jdt@>wz?
zG*l+4O;WN99P*lbU0wlS|DqsWID#V<b?YiUoMu(reZI+Df&CZiF>P+zO?mvfx;jL@
z?JabGSq^at%ZV#5EbIe`qN+LuEHBx0DGu7oLO7^c#0%ToeF7-nu&1Z{%TiI<z%tpX
zsX+*5egD2v(kEcf=heegQ?x}oG&h*wXjhdnKiFMD+6G{}Mt*&QHc3wjCYs){OJ`SC
zUWjE-D)i>I!QcRHrCn)50saUT)jW8o;F779825nwRudQocMPP}3Ah;04(aXgmX?v>
zN7$v$O-2fI1Rd9gMz!7ZIub=`qKYacXrkEF9_K8cKdDQvtaJgT3(ZgC@W(VXVI-Vk
zGDatt$Fn-%Oe>F$-+{KvI<kUA{ujsfksi=?`~$Gj;Cewc1AT=A*#XNtRK+gMS#nK<
z;NHEcgao+T8DJ_1eF=^?KjL)>^&5fBY%66qRxcXl-Cl`ceJF<!1`gM@u3x`0o7|hO
zY>1_g^rtO2R&Pw!+`dNxtr(5AYzs@$HnqO9(K+228X+9;rRfN4!vh2F3wTrlrO(P*
zhIaqB%Zm|pAsim3jrP<vu+CF~c-*>m%i`}(Fu4j@GY47*!N4yoDKW2QVKn=E)H*yI
zJ>3U$#Ffixj7#|w0d6fvvRzZO%k=YqcOwV7-V5N4bhQV47k0jMSdxhHXC$Q#z}i2?
zNK>Be6J?I&<>prNZo7Uv67{$2lXgJ^l&G9lU5<qPy}kPLEndpt(9p7a!m8n>Td8yt
zlauCU>XZ}pp6-A{z{<h>gKa_#4&mbN(mE+um)8sv&4eYCmvm4;KM)2vG&QH7y$*|)
z0@?LNn#O6d4Rz@2IRVlIfz_EBQ>z9<-w1gYX<M8$4ac;fuWxhcmLAL^fq|c?DkCR%
z{U?CDxTGX4H5IH}em=g`v@{mY%1;Ri?x%Z3lrRlJRio7u>|b)Q1`-3XE32fW1eP)_
zZ618F7Bdp|Di(#wbgs0_%wN^x1D+9t49v_{zhkptEH6+Md-m+?48RlK4Kq4Yva(>}
z9v&S*b%&bG*chqaidrla&z4<Z?*(!SBq6`+-U4`-Flh{PAvF%G(BGs%_W|hyW()@y
zj6hYOByMbMsFfJw_JbD;eoW4=8Huxra<-zy6>pO4=!sgkxN6k2Pdtnh&D6Q8xv|4J
zK_fsxgMt9e3ygAdZo36gecsa*cnQwo8Gx$tz#yEVtGlzKTh><qo(@<68r8paC2}62
z!{kDmSSV|pG#ES<veAd@qbb0Mz$m^vI}8a6vnqbJAqXmnSVy+dp`)i~Q?C0t)!QZi
z-0g>xzjM_}pyuag)$6es?xS81_-+`oszD`?VyP1m0lp54=l~KDBn|YIB{L?948(mi
zfmC7>v#l(2L?P_17$%KhGz6AFP^G2sJV{)J_9|iWbCbI{w?=ez#*+F`iK3i68W6)E
z6!Q3%L-7GM0wwc90&*nPiiu8DT%3)xiD}oGUhP3E>T2>rtMnNkMZ!{jN_+1<%=5l@
zl=Ia=g~PH)6%-61vu3;3z={Kpvcy6VZ8AdHjFJL!<@z+s#7SpD#aJgfB;#@T!eKe_
zLn+71+#IUN*v|WKfipTTuFB}XB*tO?s($3$OU?;Qd3?}=G&1oAAYXf7z2+L<(?q=;
zFE#>Ov<Z#8<wOXnQX{>9M44NIVWins)7Y4X1bbzEe$dd?Rs!N$QE><=1@!Epxi8?b
z@*0|_&=~&x`!yw4W#9`?<AP={@(Duq{R4V~GPR|p1!CeY8*z5-H~coX>ufp#_s)(E
z2}w!)ajXQCBJiRYYrOxq1htWSCtfx}>lxfU(3`ZW;22fb)QpadSWT1$1^xmO<aOZ^
z^CBqKlr>IOo*_F4O79tv%OSHv5ZE3b9xw<%7d&0<m{U;Ty`1C>tQpqN&dv^Cod*3U
zI15wo(|5q`m6e4BL!cNCeK_(UBB47mF9t(lgt}UlHs5k{KOR2TpGU*QzyQabh;j`$
z(0*OVV@9k9v(UPopP0aIbE6{Yf`M{0$P@hF^<O^y`0>usY*!4^`ea4HxP@_BLve9r
ztAB-?#3vA*@$u>^Dx4N$g@J*Az`@`SKwNS2@F0u{ASH~k77BKDd(eYYrT6sq_I9F!
zPz0_3R7O$+nxLT}A=5B3D49_P`_(7rJ3#(M?jZm+gdt>r=lRj4{d3{s#uYKGMy{}~
zjt;%FHzqythv3!~k$ze}g$($0Id@ZN1x!L{{sftVLj`J+DpewihCW&{t>3hc1<V_u
zJCAB?^v~^pw*hN|nYhEv$=JndM3pB=j&iL&b>sb{o4Oaj2hd6rC6s;A_xd(>gsRpX
zN3B>{k^CbJoxT@w(qUh}N~F^L7+!;=R@YgF4Wy-}hQe6}^)}b#he9?fzZ)9~9n4GK
z5`sPv@H2&DN{(O~pyJ=YeM7^3cyV>U4fX<u^~@Bg`7-tC_<N)r9DlwAJgk=gb9<y|
z4Q9=3ot+!{Kg+?;1Goc+$H%h(O`s|u5WdT&!Awg}rlDC+4K~s-gFOE9)Kv0+{a!Gh
zw|;+n2@G`nP6=R+j^`l(ZrR&Gv7F%_!=ML77bI^A1%`$Wo0TPSS_`-x(BX=Tio$`r
zeB}4;y%ae=+N4Sa23=Xn6%!Ves;ZzM?0s;pUlI*T2-H<_elPyPMX#;qix-W4x9`Dl
zPX$=v;K<}DKZ)%E7y;X~BYs*KF&YPkTKhl>Od5g#;j!02D*_HO&<&R1BI8X%|BCU4
z{QKR{H|U|sAKQhAi3trWX%S#!7Ru~Lxr+Kh%dWmng@zq?<;jhWFOJwdy=FA5Gj1rq
zt$}C|DK+XM%v0uto|-c0*W6qS*z8=>eps1YwD@Uo*Q1A?#s~zaeu;}aMbsG1c(Ut;
zl^O|RbDG#LVAH%mKv06$%;T_vQ}*>oCJfFfD=8g9OCKi!x-nT<wCBafD6?a%oNK|%
z!GEFj1YncPiXbM=*v(^neNozM0h>TgsWy1=<06Jg`qb^w)EEh~MW%sc$R%>&l9G1E
zpMjtWCE=9K8iWBKAR^aQoY1KK922v4c=n6M*~SK_^S~*6Y*&x*bN4nWDJf`cdA^lc
zWz)qS2ESDRMh1Ws6`P>lx!)+O8`3%eR`t<bp<XjMrWLR%F^u4xxj}0mW>`Q`8V0~4
zDRTmkgH!A3w9bDwi9c_M!X=ZN9WEJsY1MljXR2!Tbi`ySvECs`)1rNnyQA-kBJ=rv
zQscJehDkSMDM|P}T%e&8qKJVx>)KsZHLbSUzz)NBK>QlE*cg0K`EReK!J`C64N5!s
zau#M;%2%0Rq%#o|FP<%^$7|T-Mo23w3^xL~o-w^~W-yYDP#&9;$AEdDV`UBlJDic`
ztn~i0gT?<jKHhC@C>ywE_=D#(jEuRdsrn#bEbjC}H>H<HfxQF|(gR?lCxNp>9Q?Sk
z7ZmGPBG7BVx_j5S4CY`L!S=jH^ueHLLyrRr2V0ypw3p3CawFDWjE;_S?laA=uAYL^
z0t>ajoZ_urYT8c~&Ijckrh2L%e5VI+pC<Gm9CjW@v)8X*%gXjbe=KKk6(lF{GZ_&M
z4h|@{lR#=g26=jV!kCV^nHdZX0EV;VeFOTTU-X!o8a5fcY|D+YA`l)HgPB+fWr!J%
z|Fo9ICngZ1Q}Jx8;JUj)4;QvPp)VZ<wSd|@PDKn}z=zspF4+zPCioFXhr#s#iw}lS
zCH3`F>4Fgqy3C9K(RrkV4y*qnvMTE^q#u}U?*p~nH_L!k<H_sk{XsNDy_pf7@AHHn
zCi0juJHS;>8*R_SVQ>UUNI?1w_@ZH3O;twlYinps?!_PYJQ5KR;oEJw2M*va1UqD-
z+E>><KW~k^w*o^jbeZmx_9XNmO_E)W;R9{gpId=B4y0hw&Q)EVn714X(mFUVIzuel
z_@YpBV6=h{EN;ROBWT9mqmj08b_P!yA$q0oOuE19(ysZjf@UnJ-IMhF6_RSoMhqMh
z2-7hHHTcocI{=p$F?$8r3DZoFe2DGMSj0m0g%&?<R$ZN^q+}=b5L$$Dp!+6KLWk}%
zx4wSfL7O5CQ5Xm&;&azML$Gk5GJtDbWaTNxe~6#(eU~Hb0TNQ<Z1khxeA(n8q3Pq~
z%iiRyi@!sAKL)Nb3fHjC_D{t(q95b=pS4wTx!JUoT~YWsW7gd88Z;sv9jS|;8U0lb
z<B4M;0p79eUlJb1#SH1kh85P-xJ!j`AT_2?d6v!`Jdc^LgdP+)TTEKjZ2bJ5FsU-q
zR}Vcg*j0lD`8@A^PSqesaXd;h@bpLm2=MSI`NyCZz>@;=0QsOxfgZ!quwV`SPgAyd
zkkr>;`9PXO7_7i3k3zO#N<OG7L!}}O=2$2{Oq!KD*;&bFFqWKg!xoe*%ql~RnPgqb
zVGxA?c0D4ZsS`;eRHxKb9<1d@=x`Nakx)v3H5!rSe);t26HG=VfmH=f9Jmm;OfZed
z0)0IAU4|y=Fj53Trv#414B)K{(Pj|l5CM}Ac7Sr0b=^>PU=zL4W{-Rxxj?8V@eEr)
zg{vK}cHzSfC1ziP&AEe){^?9wNm222i8X)UJ|vYCr~v5ef_-hUo@yY+w<`jZLSP)i
z*bBGm??=|w+cAn>pC`uW=$!&!UhtP2HDL%f1%)2C=FlBp0@wk6)Y6j_40}$iDHn*=
z+FHK#Rb=8^O}E4(d59kfKg56rK+LS`GTAz{_{aMY>2ig4;K@J^?~H@!p0f{f|7k+!
zY1g?yBwHAg4g%`|C#9G>79w0ri%iXtJgQKfn2#A9IpJ@KP!Cu6+w9Q<x?pU9lexXc
zE)|$i0{sg8!x1-X03ls=G;ol3zrY$Yxr&c9HiB+|>}Id(hM5Erd5g*NP2lE0dFf=P
z+Yh#;HFAdYRf~KAp~V;V`EyG-b8J_0b2Hjq()IyMj+|mgcgTcec*vk0W(EKqq0}$u
zR-AyDEqF5LyMgg3Zxhr=-o+I)gf<y9SP(b7-uH4CK8u=pt?j@hNclbQpmfTjT$BD>
zsrR1k!rDf*;@G1)GwQ8<A$#xGvAalDVO(L5^zP8w1!Iknn0Vsi#Zy9BU`L}c%L_XT
zW-QcaPz!;<!IHAFfSb<~9)$L69mC8TOpmaC8T1XjDTIFau6CjB`*90|)Y{)SAm79A
z{|hEtPiH4680e73@+9u@A)Mn;9d9V(o6t7}Ac5J-{{DVw5`s|=6D?p+!c?A7Mj&_$
zY~Txwjj67GTTg}l3XcG|mT>g{bX;M)RY0H~js_YIupSm3K5ELwd70{Z&@T=*TMN=2
zKxDeVzX&dcuwcW&jL|~pE5QMR)i<84_k_zscNk`WfPumRFp<m)Lu?@H#-G|yB8Ghc
zlAw)a1B2fe7aq{h`~CYj5Njaqg^$E&qQE6#Q7`=tes#munK_(AA^Rh=9s>hcpiF>_
z*v|u6t5Iq52L+q&Q$&O<Oqiwh-^a%vf_es_RzzvEu?PVQOqApP9q5s8t~IrMsOwAO
zRmmC7%*Y@hB7(<s`~m`;fvy4xgrr1_sKR3fSHC5oCrrqqtqJIhho@hM2?#DO4ud9>
zQ1GgUsMe1;jXFOA2?xq%Y-9wkJ)EYkrNv=;Myp^H1RqJLm~vRp%nYIr4K^@ziAFXn
zN=l%6uLg7wc4piaE?=S7jCv%iRqF!n8V*WIN)TFLo<p}cm%|4j7<}~Fx;jwG`dSYd
z*x0~Vt%Bzu1nrlkp^+>jBV%f63O*kUwtxdXK5kW{Di%ii06O&LmKy7J26j~*Ffs!x
z52p8k;s??;@Ie4AOKWQrlR7N)%GC3Q%~HUrg3?&BuZAy*h8D0p-`3XJ3eAG0#YMP1
zXyu5DiXw(8AaDSbEEo-8Z~&kmlse2a$9s!^mR#KL6Vy>vbxD(TH7<!$(yk~fisN&&
z)76~=KE|R|{Ww_P@INnL%t|1SGR$7{O5=;d(@r2{fKwrc=;-L$&*uFvmB4XghK3yk
z4oqY-FfuwgIfZ@vxY3^$pqv9nx{=bt&*o;tEx&t*1f?e-Ne3Y;?h4^tcp`fw-P@Ov
zQyf;%)>FnNemrcu9GtIx6t|MHA?@?3fUl@XJfI2vF3`8+wxtxyr-3yOa>wBLFh~R#
z!N&0aIWv<S-5TMQWJlU2$;-~ItnL<b_$)#@<29Y?$AL=f+@~HewA!RwSX88U>)tIP
zn3Ua6d8QGr$!wzba(;J-v<;r^iRw_)((2i-WotqwX>31RVw+J*UWTdJ&R?R4X+7|9
zM%NDkv*2>j78))ppD{1z;P~DHzc_M7Owe1t;`spW&CE=(42AK@$&y~0k-;K~uAz|;
z=o>@XM(twr?P0ViQiaJmRkUZy$pdF)77vuO>rN@wau<v&Et#IofRtt#-Z4h2_0Nlh
zd(5-gV>0egpI0i%yMN~c@RK;{rMbCAo(Dz8pRc%jtQ<y)R;`FPXm~f**VfV<8SsNq
z+fwR+drGEG#^QUH;0+<Isr{xyW{$oFlMC7H#ND!Z>yDDveusKR+GA%IpO4fFMoTJQ
zruDON5hQSb!Fp9gfj(~hZx&#@n{~j}Ac2E8G&hQh*VLlMV&mI^SYZh#l<bjQHOOo&
za4o+)Gl;*S`*f6%pD&xOXa+L>crkAa9)oVI^PA@a3)sIRE2VpyGMKIM{Ddf3^GWPw
zjnj|XAJrdkQc{*hS_~71%?ItU!*>H5S0;QQ!Yb5>6iI(gH|FOY8o<pd=yxI3$MIY_
z-V(AbCVDC+#$v~}JnqH>qvRY{FQC^cwY3P&&2lY5FLpJb;IcQMX4&G6C+L~76|%>=
zABP5XXt1QgkCN(FDB9{{h+gY}ORPq{aCy3Vo^k7Z#=RwF6n|sca^qxujo7Y5$oGTU
z<%B~iD+g_M?mH=#`KR_LVzlWFQ)7?AhlYwQ_ijds-I_7=XHrvjn>uSTwcC!j@R%<v
zs};L3SqeP~Z;_FLGJ!pqf`_Pw7r+It+W};|DHNxKJAnyPv8XlnIpnVq;g2ehA1ztt
zo0RkjPfk(6NnYj6*58$m<mjxV>3c>}gzr73(_pr2d-<6;9L;&yaGhF9rBOQ^&8A(y
z7@e$MyvJKv-tc$PyZ?uJ$B0H>(ItsxUEHP}sR<h~%pR0Oy94+vP=ODDh(;+M-hhYU
z#~dqof`#Vo2ENY=3~r@i{57*D*Y|X_a_AJhUg39$85Jl`;9wxWIiZnUovV+m(NhXv
zG<o^Z?GiA5oC+o@;&~hyK3u^87q>}mo4hC-jf<5OX38!i2c(1qA~xtxPiu7$4-_w~
z&CQ!G2fGU)J}EcH?i3i@&_ERyM)nFoVUvRr@$1*se#$jGw+4>~Yz*hbLVx4R29CbM
zXZ}qqY(k?#DSuILx67_-V|K6wJ@rxGNh&<aS~*+9z475L8AG&g!z2^3>86jbFRe_x
zAx8oT9wq`SFofTsGX88}5#SCByn0UjLOYLTo(oSzfngY<YHh0&fCNcBiez{I^&L%7
zOYpMm#UJ=DQ#2Z7FOy!7%GX6mFS)JSUMbjCe_Uon<~Ez!uuOX9hXcp_=e30#0naAq
zNVnKd4Vw)N$4K<;T#&b_iQ<VW+WdQB`dy94np&%Uiy9Wi!nT6@zqcZ(-~7MvODz76
z-;?2r89UDvHecCLN{{#mI~6p+sRsx{6Uq5?2r{D(4mgcq7_k25J2y<>TL<|1%-M)D
z6kOq9UZOc35EEA>TYc`LoY;k2VK9V00gfpI<W`@-O~grh=DB3?MTv-Fp-Wg{KDt#e
zX+KsEb{~e1O~oWUGjS~7FRW)^U{CI~I>pzBCt6fndjahdXyU_MY<nT%6km(L#|Fuj
z4_zV}8ZmfYmzF4$v<;pi0AB^>H-l4Mbf}%-)*Ijn(}IF+@BniavbUTxu13}Ib>X23
z7@mYy9gwbTp#;PU=d81ei&@#(hfUdLX#1nn;prV1*a0~P=G5_@ONh>sSwJ5BB77pE
z$%zSgb`-`w&NneDGDN4K^Boc*_4+lA)f31Lq!%cpZD0iK@_9FCN#BEE5uka%QAohz
zp0)yjoa(xR+6)>ZGA73O>HV8Rm9u+sF{rAkbe)TK844Zy7djJ<5Vv%~tH2O_JQ-ao
zf-edY4)YgL9b3)l*s!{s2X1vqNz>C-ty+TvNFiIJ49AFfAoUm=yKQc4EbM7iR>Kz#
zR8sTx!Pvw(M`Dx8fA*@842IS`C(*Fr9A+mmO)f9~^ESlStc3PBx(k}9{@?NiEC0vX
z{Qu-@Votn|ws`q|Fjyj;xcAmMyLCyk#vRO{Q@npy`n%|LOK@RGU2cBO+E(@2){oh<
zqTqGf{8W9F-vlH-!4#5t_xrXV&Lh81pIGUm4Bcvulefb$#~D*Yj3e*Zd7bBCBPMRk
zxfl6E!O?(!80Kv6MCHL&`uESDow)IT{`BAf{Yi|a`@jB*_(3leweUah{qF(a3&l`W
z|Ggb?fTC*J4MxPt|NUdg?MEW~-<OcbCm1nB3i<cf|Nq@a(WNI6tkdOfs*lf5{m#x`
zc!i-;Af?jb2%nw5rVCzkT=5VNHVu|UEN0!irNX-Vjl%DcB+wIc+Fw)n;-Df4r(;T;
zZR}C{%}sem*O$oE6Wu_*`7V$y$yP@|3Y}_ILf|;#K;kYL6cnN5<iGWGCFm4>Pk*&f
zD7#nM2_i2X?)bd0wrVeARCsGB=KGB4nMxK%Q0l9|v9`8%IKuC6#1te&y=%WY<RkgF
zyLz7XpzFPO_m!oD!Vd`-*)P!Z;*kV5^CQ2XSI@-I^*%n;^+Pi7dxkOZd+WrR{j2wu
z#VEzAApEcP+<GbG&3z+P=TbNnp6YIVIw+A|Wqt>?@>SlE^dE5^A$?-z_WbxVU1#un
zemB}zx$V^|Nn=5Lrryity>>HP;X6O?8+6Y-#WqcnO6-lk-`C3QR@|(8TUVmb{n5%k
z8eg0F-7T)d8%Sm4)i>Y!ogUUP;vjtx>hI_AWu}~o<m1~W4vX&MqCPka$D+9T{_80D
zPK-)4yrW~D@Is(|v?d?N=K;P!P+(9P8HbI6g(5>+XZkDU0<Szp@xQ0<rYCJPzuEQu
z$+HTtfiG=A6P?hYy@~$E@S`kkg-F~zeO64gj|OUS3i+#G0NfD1e~z4A+!^_|E<tuw
zMTyN)3`tgShw`@u3Es^Yz3-H7;7WQ5FO1!$q9MYn>NPBU)Doz#<5%?bCnav3mr7Qe
z<M~x|C{w7m-0#Ib6r|q?QwAcBamD<F3OI==#g^pQAEYiO2J4N@)K{THpRd--*<8Fi
zb6_#gvb<N8eC@~cLt^1TF)BIPz|fNPkKHMh%ZdXiBRM)4NcZ)SQ%AS$Dkq&|EqRoj
z8Smrp@_PJ9xJ9vWSjBsXH)eZn%F~YOSEaStmf(#WFVNYD<NCipUkhzU(=rGPRB0df
z+_^86UP!>QBal)v?zA+i)x{@}Qa8B1zpLl&&cZ@T(NCDCofgh28p>R$oj25B#NcOR
z&6)AY&y`{hvvNHzYlP5yYd;Xa0BF=*8V4EuBko$Is+8545;uLbVPY}%#I1}rRs37A
z6=`{Cw#)AinKt2xuI1gG(x_MHIH@C@(qeVmIV8_k;#b-qXXJ(t(D{9$+hY!&&n+`M
z{v#7QLRj%+3~iX463^3*kNp|_^D3JcYdE5_Lzp<IkLuq|{wXHoQ}(!UaDDWsbaLv?
z`5h{FqP0VLFq7|aDfUJk4W&hiI?|(?4m?!~t<<#XhBS9&!@A_#6)pPK_P$~3Quy&{
zw`G@zOWn;sbLsjS<#hk+trw3B-sTB9pr0^M(7lKv(s#6JtJDd7Qu~abUMFqnSRsO#
z;xXQ?NYA%ujTx=VL;E=NB4XV$n<=F^F0RXW4_xNr#l^6adFbpP|B~L$Fh$o3?^Cnp
zFWdhe-%#MuvE{FbRe1k?%!ud94>&j?J7`TyD4}@yPwsooVikypwTP{xy6yL-J|Z_Q
z9=R~z=hcb!cy#lf!HZtx=68}}R3zU-(9w|{TsD}pRJ`3S9Dhnydaqq$^`(%KCxsy$
zY9kR-hy<FJ4L{^jE>1n!mb#nLq3}Ku1r;T;MBqc%Yph!InMaCs=Lg*$m6m@eeObjt
z6Yw5<*vyz^5pG{Bvu3_rD+%>hbtkrCU-qi<e2L#wgRSoI^x2cM@|<NtLn1iZojn6j
z^jn9ridvobY5BE=rXAxrIuEz*O$(R4%Xc@pZn3#Lvkz@y%;T{mA;Bj^-<iApbG)@L
zOwLE#`t&h3#}jVmtehxU79n&@;W_Ecj!SV)@3O<yQ1(x0D1<0&zYQ4-ao%m??;ww%
z{C+HOwX>7@z`#2pLJ`O4<NB~pep#OJWf*me$N6!w^8K%9<=7<;wUOkIczm9^pOn1`
z*?RDqT_KC(Q2RR>1?io$cy)8^l&RC1HSvn152VdihTK!UewqRzxIV}yu3Y=O_M$;M
zJE7zmG{#x2*c-!a7!*@h6;|W>hrBu0rEM5h>aT4)kMCn<HPvCty3B?7eB{ln|B}SR
zC2X=ad~<wc+~LR84iQPpwXU}sDVo?V-J6;&CS~aFTHhJj6kCS=o|qNipG8MTryRTd
zjpyfMT4~)mC4V~D^UI}5`gCxMk=PRr87UyVyK3oAo7J`W)BBaWn8NLdPS&ya^)YlQ
zAHYLd{|`;)9Z&WD{{KVbka0wuBpf7U6S8+gWbd85Wo5?+$)08Pwvw!@gsc=w$jp{(
zLfPBzI-l?DcW(XFA8zNo#`F2SuIur*KhD~gXfY4hoW~pNzUUk0NH~RmK(Z^hxL6$7
z$GjVR(e<mD)8TCLS>BH!y}?=MGFMR@&FYbba3uGM)xgPG$5eTelcFYa{U-*iz?}8w
z&d%OICq<S7I`_$o6oN;A1!^32cV5@2zCp@RE5=U^P7jQEy)OT%q~?DxbDVaC9Nky7
zHEn;-8vVYuDfqZ~xFnjEzNF;&Gpm^X#W$i<-IXM<NtW&(Xut6iA!UADZ27tA;qNly
z6JTsyWcocyBS&<nc`Z8Fa>&?!u2)4;zME6EdGug4t^GzK=Y!(bg6Cf~S}>B937U4Z
zqIa%0Q_>+H2lh6+p6gk+BhNY*a-5@-$-d-zjej@i+F4T0i%7)25~(W51HGK-7OQQd
zHF7T9?pEsJTGr0Dw@K7_=2EH>LgrG5@eU@XT0|`B?uZ$Sj~id7yD?vQKQm~Gk+8t@
zV$+Z7Xw1A<U5N3~u#>qo<^o?v?X#=IDrG9UN&9WQG<XE3ZM}K{#>r)s<vq7rxfOk<
zJWi=-v_0K)$SLK*P^E9C8$H(|jlZSgKly{5Y<oFq#hg5!HSi{$d?}ZV8H3-)f*s|H
zufanh(i`)qEX&-A-%3=k>&$&c-u|p;w!1cQ#w%F=>+bC&43_TNwHg04?o^ds=$N1U
zt+&QX1~0WmufHk}I(yL5&(8hub9}R7t5B-kwcKj<dCm;7rv!2zipx6Iy>)&vf-ZaA
z+4%XrODx+P&DKX${jUqAm)pZoj;<BQ&OOp;sey0O<O{x}BwXdy?|MjLI5VmFYQoN~
z)_9GsOpn6q*H6JSpZ0=)VHNqVwTp;Q$~{7p#%|$K4#S$YQQ;=K7+Y<xFvIecuc@Bu
zz5S6NmQbNHw14XZc^Vt1#z#heM?F~cGCJH`3QS8DOKEEwF!{{q{JhNd%cA1><Uvx_
z(`CO$UUCIS6&snIXuJc7v$(~$_ez<y#mPi|?nmcpR|RhhFuxa3r?Fo2>uDnsZ#HQ3
z&HYKSQu$S0!0vPA;1z+|ehcEIf)@x%f<)9O<@Vj<W4~YoydE3w0pcZV@ma2-2E*@O
zAxKrS-Ph*tM$E&8J+?n(7~`JV#DCcQz28=6iOKCexWC53SJmxlmERM7>ow;S|GCC)
z=y=fnT5Z#IMW1I5VeuRbX%N%`SJuO>2w)HJH<%cAhyrgqZcJ5eR%b7COB<ZtrTkoS
zw@1L9`DwzUZdjN8a9Yq&&q>wW@M4G8H6K%!=h^GS+DVnryRsFE_)G*(L#2gZ3%{)#
zH`3KS-y68e@WEkPH=ONJsYDG2UHD6%#@+4OOU+}oKb9hQgiNQGCFE1$lHm{+VY94F
z&xu9JqAr@vU-(!(WvZ>|yZHI#g0UHLYudqjQz-ND#U~W&BFtEFhKap@tB+G_BX>xY
zV>L3xjzpf`q4F}ySIJHDAc?W}5)eWmc>kK68{|;v4Np~7?#xYHF~5mO(JQs#^F7{?
zet3g|`0)T{w`n}cuf1sIqgT!1yBpE>dAp+fqBI@nN!u+5Pu(w&?y@kk{taV~3VeV(
zI?RZ!_P$y2oFasZl+d;1tQfDj9rX#L_L5SLM`P*rlzB+Q9&y=a0YaM&yU(jEw@JJC
zNC#;jSzu5XFVpfhUiYX?C9^AW)L%a_;P}~-v6cBI>fpx+7GA0@MlrLG_Frc9^ckaW
zx5e~p+-D4^(8>jv5Sm}0oY%@nQbr+>3E_3;WYSLx_3QLWXy3-k?#33GF$LH7<G>2f
zh39o<+kX@A<~)=ct%>DEY5zJ;9uC`G%aNM;O_!1Ay?t<#&?a`oiJ8y#VOCgM(=?CY
zQ<>0@Pf7@$>>t|I){3~WpFUhl^ljlW<60ULxgW^XmP9$8Cx8%|aMX7_wC^dW;I=h1
zGShQ5s!^)=MEXgMF}CmuK_Z30?+or;-B`Q0S7smg41o^l((hc${CQ5WV0m~%vRlgJ
zFmpsbD(>@RmP=e5w(1>Lgw}-CuX!Db^zk)W@p+LSQGfrD6_wWBRL>TjJ6JaF>ucES
z|1|lJYr84+{1qX_+4nuy;UeQL)#*V8Oe3ENhDE7RSAc#azY~plLi<}V|GKcgwfA5#
z%guC`m#Q*ZQX4$U7~z^+gSgb|Wz(mNb{Y&~P5Q;5;^l|!pG#~NuUgLw+~|nO(#9UL
zs3{~O+Sn(5=E@;trI?Vi@gWTpbD*>Rc{H%^!!WVBduU>IJj`N8{PC}KW{o^e>!%kU
z{$govN59&4D+&~y++J;c(tZ3>fzFEm69E#1d@SYhG}%MtYS9q;F3Y9zo<0U$I1Txx
zOYf}eYX}SJ&v?(hBFpG{u%L^aC2#az7}1EP2obb+V71YuR;KwV*hi8n==REO+jm-J
z$_yyRWUCRn+T-_n%}sx#o}N6%T<C~!pQM%hFgVs=)qQO4Vru^*|D^IU@l~2yS+e<)
z^CLgP!&l-l{At!Nv$P#jHHkZx3%aV-iD)Xa^=GsWGtE!h|9eJaDa{qP5KU2f(wQ_S
zEKZ>CLM(8xOkX-H=S5$ofCC2fy^70gh8-c(Iwaf(PI^8m5tq^2Il=S#)zo&oy$yEr
zm7ScR)r~J6i#4dlO9PY7kK<e)pO%;l*U71yne;}H1{I{)jFq3s<SB<V<;oR=O2HPH
zD&gj3EBAU~bmQ$nw42M`QKW08v}hxH4U1=qlLnW;(FZMZ{rfy>T`@Ly^i-0MZ5x=0
zZ`zS*`e$oi%SL<qb@um(wB#nOxn*u87!^~Pc7!T0Gl<vv{`*Gz&5?Xkbd^@Sb22AY
zK<CEKC<Ov27Ad?Rp8xLjsKo#0tD!yqr?G+xf#M0Dr^b{f6jKY-npvwv-D8yY>S#2V
zuB+N98BZhIpZPmGoDbd;^d}^ToB=94Q#HN${SJQ!ilKS7_+GnRM~3#DV&V)0roe4u
z^NL(Y*OSzedAj`REP81-ueKcxz4Oy#H`);q{nI_#^K^@^NZlYbhWb}fVdVSZs(+0C
zD#yb-!oL$IzPLk+xN`jVO$;iD<6&CCo32r$cFezDBd2a$0VW-9D>xcbt><%BcVkdn
zL`~upWykizZr9Da7!z}nuDS7DL&<4Xkp}sF)5qs1Nxii&)~-eM$SQtAqCRA1@AcmA
zfY&X8kLOE&sJ{v0H)@%Ct~k@2PE&#wuyl9MxSCzjwLdI*1Bt1Zqq#zB8O^VtaE;t~
z?D*@^66NJ!{srQ+S-BlFOHBVtM1IBTwl9%J`hB#}VaKx;&Mt<0E&D!R&f!;v+<`as
z*WsG(8!@HJhF-*c_aKoh60&Eu?%{Y^KPP>>xG>x(#bvI;N%9FUXgE@OwO8QTArf&S
zL8@|1*y=|u?fq|=nVEF}V1c<g7}=kj*<%wrmkd67PIt~UXI%aw>DAqstn_{8ZEQhb
z+Vlsld+GYU97$G!`J^vxZtAU<iT!7p`uHk~dlaGbWoCLYy{+9vyLnFbEFF}kF!o`c
z^wnF|EY{3onJ#vY>v_NEUP{N>)F=G8iWzcsGc(@mE6hB5ts_5+Ezu(C=+^a=yVVqg
zZ(S<rsAH7fJyN;pSAWk%<vAnM95rU_j$GF@cgaR~+x(t3K0cn1j_^9EbUG1~_2hOf
z-y22+M5r}fwV0ZMNo(E0b#>z)W2(!CpC*ZORbC?!m<Zx^)&tk8ulB4b2(lJhOd{k{
z$oma@j*wdR)c(nOJKLRa-x^OiKGKf7=B0@_w83EUWzWS`&$UcLn8l?zd==Q*cYYH@
zonPrtF4O-yFW_{)y<hN);$y*lwX_G<Ddd*zhc_tnCF<%N`+DG*7JNGQ@~gZ-#9BD<
zTqBiY|E#;81cRH1L?19vHfYVM!DO}a&kqPV2kJfn_j<4BdDB_@Y*|b%-7(PhDfQ6}
zKB{GFTF{b;M|p~6!_Z|;)OBaMpLYGvGk6p~8t-uh?3S3b;O}c3zcTzOdD%uy=11eU
zi>B5bOZ&V2*H{8oCDYTh>z(@qCVR4mELS|em7x*huI)6Hm)hqenn4ubbH6KkeddPf
z)rkJPj?Y?$bZbZI%C3vdOT`+|CUvU$$M~j}oj>fU=Dxlc)?aTZKUkwg#;vqVRv#w#
z1&u`j4t4wU*zK3kthp1P<&NJ8f0+^Sjd!GF!+nJ8stCJ-4E=v+f9^%{k0`w1PZRlh
zJN?xdzr(0=ldu45JU?b$XDU(g+&}k2p5OR8<Bj@%ycZ1>mzD}w2E8e!T<Zo`b`Aqg
z4a@qLtkFt5=ka(-!7Veh?g*KK_?~mwr7EM?ZwZ%Qb%$*!te58(3BT@)|FCQg@905x
zMVUW~X6&Ef7gAdt-{<jH%6^z!Oq=C&@@Z*yo&90ZeJau<;s2MK@KF>0?~Dfa#3=d_
zwDg}2i%^O4l?$rEEe0>d%bA?$c?kkO*kRox^Syu0?v|0GC74a!26h8<UqxY4E9+Sv
zt!kv}jFkC&5c|%i8|7(ZGU)ESUfx1TOv)(2CDk5=R?F89ia>=K(!a8t4gY9&oO}Dr
z=KO2&zCMO4N!it0bvFBT-eq<+T=Rvo4Ne`}m5Vjae;PfWvM4BodTK?Sy7oGqAGZyW
z6Q!hu+~EuvhqDAFNt-B3j7Q|)`^WqGwLJ+Utlq6rq^WuwX5`mpf~AnlEVcFdg5nb{
z0qTEDDi=;?rv9lYk2~?(#$}J6-&=Y6iNJ6xlr_fVe6N6o042v?y!Zw^*KEB$lGYJ3
zKlxyt-uk0$=ck~Xszqkr-G4aZzq}k&tgK-9gT#~!gWGK&EiJ8FhEz@&@JBapkkO0s
z@$$wmJuaRHm5Q5j*+{yX|4P`vo=r6E0lW35QjT~6{F9%B=acWYFIDqn<ebRwAt<e1
zkNMbsdH+);)V#;<@wXR-Do=&ySr&>n<&kYKuD+1^-DYeC$D1Sl!dFYLDKa0gyWz`*
zAbrRmLTx`a97$Ag*C6TpDMJb?)`GBzR892#zJX%+=(y|cOe^^+JM6o`!ODh*ZDa4N
z>*WOph;DvO5iru(5X@7i=2YD1B7H)8D}Jc#)*&kg+u)n(9f?>u<RN~Jd}<@-n$PDO
zFRiumKh5&nqUn>JT}y@2=^Y&255qMGXg0<^NZ8-xGOk|eTCJ?l-)uVMB~Bxi3*9+n
zB8{*dDK`xO#d-O=6rrGpAlcT_8v$F{kmGF(D+>7sBqehlu^>u9i@0W~(fszv57MB%
zSj~U7__bx#eYBa=tj4kX_vaTeT~RMoE7G67UUYw7?<oJ(^De>_eY!K6J6J#c(kSx?
z4;v+*&hPlQV2xyP=)L^Ev)it(nwGfpkeKwk>emjDdKc2>+0{ED+BG7g^7zLS64QSS
zkQd$^i722Vk{h*kGiup>MW&~G?L}I`mG*bDjc5CPG<a#&)`NN6|M-jOuI;C%`vvyq
zNOzzuos3=J&m&vEwsTXRCFH!gdpCKPznCoW!D3gW>p_Bry`wK|QWZV{f*T%)OPF~I
zKXN^4#y6ABKdKGewT6#=5Y!ekK1Kb=ta*9SkdAzM@^A0hVlw7l7XPP(qW6lGs4H&I
zqDG!sy4w+B1fFs^DMu(0pK}I(nIn@GxQO6tPfIJCs0t~Ry;NO(W@@Eo8?s9QlOGe1
zp8#B@^4CYO3%P*yWFH6{pc)0-5NGTt838`;$G@L|t#eTANXo=Ik|pLkD)gMjCq3lR
zk+Y&Qp*<7!Pj84@OzQvF0*w4VUE^Hkyg7gBb97+tINb4S*t?M)C5QOY8Z#@XBuPd6
zYmJ+C^iM3tK>>rMFE=YnmshJ&EIfa<=J=F`@|jW5_v=y*FrPA#eC52A^SYpZw|PAL
zM7f&H$v(s6Mv>xzloGOkS@-LO-=t!rkGvdo*Dar5S;^x9gMF>nI9eF3w0koCvfrh|
zcWe@nVi7jG)JWmBdq6MQ=W_Ksed(lLp!*@^trOSY%zKrFKQdLV&k-^IKpcYuB1)EI
zONX3o2JbflQzP)?U#D3GE#~v~3CQE7{pMn~wmfsrU)DTE$>qg(2(nFTE=8_iF~2Tt
zx|i=S^^|E?3Hi!J!qjtTaOG<LUGnQaPyMCttX>8af{=l0%WPslM|d4WVD;8cTaG>h
zN-mpxqQ60#k}X_Hi<V6!4P}8Y7;!kovtJXvmXvWp=}qD3pNuBe`!qIM>JsgImY!Am
z&>c1^`o@?^WrKZsEyu04HsU|`XTLrde{=iR%!Ck(n7+-FkHAcB1`$`tU{Pta5sR@o
zqV938BEMw>2?waM6CPqJ`gDD)^zRRj;kA3(@4Pk7ou53&vNTj8lrlRc>Bjz-F7nCV
z_fRaBl<QePS10x>MLH^P;MN_&w;4Xf7!rA=nCJUJPTQ?6b7vXo^y!v({!R~o+_`~7
z5;W{0pqB(77*2Bva0Hy(7{t&ZW~Y<`!^mdH`7xD-M1Rm{`lGl@dnfIid*b-mwm&~U
zKJ!&Sp2%rvFpU(hm&7=be_r2vFL}j@zjdBJc}PXlWqF+zi>g=i9-I%bi~lfZAMWyK
zg(Pj|an^N8rcCk*{abDHj91j<e<ljc#S#x0h(tQdi0~4Oq?|BXVIC=`Y02LlTV_OJ
zRJDd$6T_W(avz>66*V?3DcWFL=eDz4*)JVo*lutJMr*!kBS}^3eNvVmOB*Kn%8bhT
z<M!XJa65G=>9sb4M^@+Rtjr5JpzzrQw^lWXQi1;#2q=z_!)|lLFOX%LRNIotMa_Kx
zD<WtZ-@kj8FxPuObKAOlobS8KMOk!;;ZDE2+mQFCPA9wLlWA4yt{fU3N;$JvC9ls4
zhVcd?t`y!R-#HADoV>Z4{46g4ot*fJKX|NHp4%l>b~>JNqWx82-{ze$@~bN<^#Tv6
zWcKK7-;KQ4J^HWy)S3Mo340gO+qJlH$pl8KG7$$k_oyL_8zfc=H5h>o9Yfm^x3}ui
zs_S2;QF06w+w)^VbTp{eH(^fSUmsZTq2A^f22<lJl*B2&E3hOLUaHity{q-R%BD2W
z&f$`SgIlSCD08~3mlkWcKuv&Y2nOr!*_%XPc*ifVvbc6|Y<!!zm$G%Gqn#ALR()!7
zj_5oV<$|TL9-T&Ie8J=UH1fV-wPBFoADzOll`rr&P~#@vGO?U8=%v9^V`-VoghYg*
z!2EcNhX=gkRYpK2DyEBF1cL`HL9Ua+1-Y)x{o~W%il1x${Dy$TSO#9P0VztkJoW|?
zp7_qW{o~tov2wW&I8z6z_)h(*XqgmrzSf3UDcLXl_@OoD?rT3xDLGlhL~t#`srUV#
zd$GNY2~u2H>if|w(Q+yLKFguc%r{Q$v{mybmQo%`t_nUyATc(zd>lk2Uf)m*jvAOB
zcUPYwQptKSkLUS5)yCdj%P{4y{MPjXskSt>d=rc4=1;7obR-jcpH(Y9v77NGXESXn
zoI85~pMp-_elg+_aYP$QZJ2d#Q~TAVdJktGr-hdx{G>5g0|mPy6zej<)BzSF_EZ&w
z4Dvp(>ehV#cV+=dED%op7Ig(H8;H#}G9oTxs7VP6iK{=FRhaOAz>%&eWXS9^xTSN>
zquvB$1Gx{tCYxr{KKroo62a+Q<h0}2pTu#R+_Gv8ezs&2qx7Ymx;W;#pJszPi;EbR
z+b?OCKkA=8Lw&LkmbUwG(|C4tMY4UX<Jzudwh0a1?*t*^KtIVH8E9_bn{mT3wFZ<p
zQrs<;wHxr^rkCHP)nvUl_SJj%eW7s<BflSh_!t9$=mVzZ3Rh7ife;DLz{9+J;Uh{6
z*2d;#v7!IP4}m|%r&*B_?vg5|NKDWB@z;Ani|Beaj{KIXGew0w!D7gCFrA37E+QxG
zYiU@jKH)?EvU<L`BPjRlq;i-3(}(hZ4}+HKiu?0fNGWF%)Sl(BXAvPt+h6o7f;ScP
z7%ejzNVpO>5|65C_jBH6_VjMRts9kH6Lx-6T<qZHRyVP(l+O=D*$k(05N+SUM&<dz
zIRh{#<IckA-8RCRRF^Mz#52Ci&JOA~>^n|mBB@N|u&(lqA1Qz9^ifclv;Jn!88TOf
z@4-x9RXdqA35lo@jnBomw_=!(FCAXiCl+71JwaDmS8!v|)T(Ud7$cgl_Vo9G++7vn
zu^%_Ii&noWBII?bZ`!SBPsSthVr8#Txk+i(?Wwh2_vm*(KmCsvqg3@Mfa2zWHH_IF
zr(cXos9jfk6zTRLFk0_A&*_zdNj+-C6}3BY*T9H{08Ru#h6yzak^r2|5Oka1eS0d`
z4`^Hv?1H-+OelMMKH2Jal?JbYC*$|mLKel>p=D-bXb5%+@L7=(%7ftnJYt3SYL5<`
z&#Ag+&Ep>LB$rieN>vHp;5oibbb0S15e-3@Y-UsQR|<}QcUVcIP4q>v2})&i#G;bd
z!&+I4R2o>LXv5z33fI#QiQORAjm56w)oXR|yT4v%!YdBizPUk!hYb7SSiwp_`mOD&
z$DP>+f*+GCzbnykll1;3F*ubqS3dF<o6IJsV7>X`;noB0O0`q3teDw(5_8m~nf~am
zA%#A&b;Z-$&*B5-wjARXaz!<40uxfAPZf_Vv*$fnPypNsJ2$wRJ?Uvl2?3LN5do-7
z+JfR4FmMe6$g9&{2t2mb2p|ZA;I$5b8H3lH;o7wTkOoViu4v%EKeruI)QcwGS%$aI
zz5tRDpzXl?fc@);cSAtESdUJ;>DP9Ea{S%0sJtH=_A5KGp3damKSTqhS!9Md%j9Tg
zcSepbNM)#*G86o8@I@hF)q6Pn2db2JsqsF&i&5iwblAGNf3N%b53!k$$b|N({>891
zj;BVk%`NeZ^qSO<bCw>PIj-CZJ~;piAmUeLC&LAVEDCHW=3q_%OCY~>2NTu|eiex3
z5eRT+V)+3Xs0uJOa6N*KJ{*fBcm;51khz0T0@%#3bbuHY>~aL5`fwGV5HTPl2NZot
zt(d*~r5MDrdVbi+Bk@9)4)u=tk#cG}Dx%-Z{lBy;1$jL{G<J7f3e<2s)2Gg%c<(DF
zN<PnNc+L~7hKE2saL`4!W1Nr-7MiTw*>(!}oQ4Vom<aMm8|G-sa5k+DC4w-$q7p&s
ze+s|7;I{rZ@WvqSJ@LMvL)fEwxmohipKC2Uzq?Y&G4Y)<ix-;4OLY}@0w}4h1OLy+
z1d`-ud1z9?GQBl;tiRiC1Dg>bl%uD}LPJ|1uAlUZZ-T#P1Kb+m5rvcF9nXg#Fb6Z)
zD8Ly2UrtUh`VO?~xT7ne!E}ptK%)+ZA``&E0nTE3d%K-8R1vAkQR81$I80{tYAJBx
zZxNMZWqbd?uM5@uiIxWAaRV(#3|o`pt@!e4MkE2g(8~rL<eltbIhpdLYOWra<128~
zw;`z?ykzz&osf!`5`W%vVr|M|;7du$?7#&lu-Sv`n6w?d+PgTOk7M;1?!Ex<%$ux_
z2ob~I0cV91oWZZBCkWgK5u{hcP)P!Vuoi7hH-YQCzOZlsY^!n4pJ&3s6nuH~<egxD
z57}>=!Nn(lbpdF1v&ndra_F?`7h1;nKV0j~{3#ky1n(3ycMgo65XFuoytuqV<dMZG
zVmYBdH%VU+34O;P5|L8o2y2TwG$@j~>1ur@^)?K_dNwKv#ig`lTfDsQh`ES>UN1gU
zPN0|-4@nTz%=Aux+?tx_5vyq%I?d96r&XR0t%P~@x0C2uHc`FWjdYF+v-}z}fP;me
z`<!TG`>anRxIXrO5@Ueyxl;71dG&1xtVYx%7ctnSLol;#j8?Y0)=|k7pi*ZxLC_4g
z10ctO;RY8g0eCS3#R^vXTl(LG%J7hc@_bjX;{D!kJe9Cd@yGJ*9Qv>8_+nADTDO*l
zco=_t)d)_pkYPoTc3KfW#}A4*rfRoJvu=|86)u6P>HJhEi$WfWpZ=${EUI@lQ@2S4
zRw!Io?jo;B{+6|s6_1}O3JbO@NFKmpuIOTws1;*8l#~b&2Z7DycWzvbfIWpL{T8VG
zEck%mv*IOID8MzMydKvUqweCh{cCTW_f#8$w7|=9Yd}`t@+@Q?h(#9JJm9&2l;f+)
zAi2SDCwCv)Ks|88DRe21$4-k);Zr`BB9b3X3RHY&fByy{{>aX`F<5N?#+ws-<N&l;
zu($bA$W(Iy><9NTHg5Eo0|P2NMc{C$cKXSKKoQ6m>ww1ruN5A1FblSUp8=fpAlwIX
zq+Xd`q)oszj0|7c1Mj}XMK|KQZ|zr}gRYE;peE~I{*UKZTv2(x3q?#=tVUg}_{Ilo
zpgeijn>emG*xUR4sqCcTHu8ey1Kj`6S}W&P)THOLkO6>-lamwp-mR^z!E0%4JqG~5
z*+B88e*2G$8BPFfIB2~<G3nk;26)5o-@eg_dD&}eY5f#3o}HhJJ<uHf`-1?aXREgr
zSU)2193nJhL<#7X{`kJ9L?Y21ON*52{uhv#whad%{6%p_>AS45_{@YfPc7KGUOjuR
z&Q1R|$7H6i++}l@I=MM<Pa&UuH4@H&d3j1P=>DK9pGqp4#n{+v!Po>I>uZ9xF><-f
z;|!Rgv9Wsl{yf+ueVY8Hynn8EHIz$(3k-~b-vH0x2fl@Ylj{g~IM1KM-ayu;{y#O`
z@B|R5fG^7B9iODK0)8@XPEMe&`~rnD7^jDWtYiKGYI=V@S;MEv6Kp?%-|M5VVHEQF
zxeVU@0?!1P9FBqg2v8}Grd?Quro0C4TYjLK)sO+T-IZnq=BQtBnpc#*3F^jZ67*kj
zcKxSYb6mowl^w53N$8;&O?sPlc(KHj(74fRlt6)S7NY9zj6^f3REUL*sE4d8Ci6UZ
z{4a@r^a9zQjQl~3>JBur9zCk=oLeCg#9+}8fZ_%b>_F0AX~)Q^xw}YA#ZRZ|u?GDK
zlpI$#IGLNw&Z+j!&0CrvrefXaT5QK9y@}JW;wj~7275XGvA?}rpqSX8hao^TnSuQS
zi2%k9d8EA2w(;-nLtMoNK4sv35Ha{M@$>V8wM9(KANT+`C3_t;T{pPT9>enkP9P^&
zR~}y8kd?x8J%9%4mEg!w$xpv{F5)DI!|4rG<B!(3t$_En-)(;83CCUt^q}?fnTB;7
zjGo1MCFeUu(l!qtf<+dbLm-8%I%Wb~z-Q@`S&F(TA7j|z<mKDJF9cpX+zsjY@+J1F
zV8l{DgRoZ%PRK~2p}>ll{arxdsIPL|>voY{0x1GCRqLCjkFHNmA0#1FNV)LxEXd@)
zi?FVcsANisI~zX@4S&>sE5$C0pVgo6`9nL?1;WQM5rR41x+y||%ZtX8Cykj`FnL+2
z8tQ&8tf}vg3)8zA-$QDM-w-5)V|xa?H>%r^rPyImsHJ%K?$Z1`HAGRcv026NRt`Kx
zc{Wag(isOhP*#>f4i61|e<eLL_s{EGsn+N^U{w+m`5lMeKw*H&A@8mNDDN~F<$)H{
z35E5Z`xyrZ!WRNwn#>qosra<1haU|DR+6?W<M|?Bry?UGL!hXr@_(*1CP`rvx);Pe
z=aC4BogebPGeLD)#R^EUD84T(RU2=ebq8kM0@zh?%`|Y_V;%yP5F%>eX*~r>BhC?j
zaBxsw@j4MF2VEt30y~rmM_p>w(A&41h^V-@jhx{v30`iFgC*(nL*PO{eWs-cO#RW?
zI&iRn;}}AaG%|!7U|$bz2?1*%7|n1<99LHw#ma!gbAcWg2N05eN>n}|=+T|~y&=Le
zM@(t@v8S^UFDiQYE;~2nH*`L;kLqcddI%Lh_K9Yd|DAleI1Y{m_{@<u;c9{;`EFW#
zbR~_BA=_KjQ$HyAfu3<EF<K~na6BlBNj9*`C+yzKWs-5RKr(z{FA6u)N|YSL|9}M>
zKNMW|IZGFwgbuExOhZ&lz}evvQ1n8c@pHw^!9u(O)Ms~ig27jZ`1Q{ZsH7waXn71U
z04CwjpOp8ISF?y?nF6in!2%C@4jg8%HJchK)!oG7!aiU^82|{qBII-p5F7TeLgSRW
z(t>6c`*6kmo6H<(79XF1H56{EGy(t@trmP<6wRt7p%+5C7vKp2zaD)YVB}2bJ~DBm
zfXrCsKKp6rWKpE$blvSf8x`Um*RA#Y8P9NbQ^3YS*$k%b8uOd*T6y1iH|7jp(lk1#
zfc`8m8-QUO+NOB_LB>IVEBo#+J_2eXcTsc%R7E(jI_x|JzWg2V-Qq(|SP)#m6a>~P
zn9VKG#or+?>1u?7v$KTfe3VgHZ+G{gy->O}>2r_~bN-3KihN7H+d1v3)e-RcIPpDM
zr?~L=i7pmd`8K!yb@5mSKk@h^KYz=($$3Pi+XW_tXk$uz1OdTSJ<?|xh88)T?W=Uh
zZQ1u3YmeU2w;kp7i?AEI<nf%xJi3WUJTjnU;<Bn@VZ)b6K7LreZ*ZOyY>=pR0)RRX
zD0d|K0HpWN4>UIVWk$Pj(3tjL14Fm#Izw}!k!FG6<-(jxS{*F0WF~;3Mu1dd_&YuV
zifNqrAFOIZH9zM5>FDUdf^7k034K#<U4T5nrV*X!O%}gtWT1Z}hFCtD-st4*nZ04-
zFu?DXow74l&hjVtWXk-cSrFU?RaKV}25G52!FOz*P%E0I2*v+_yW4U+V(L5P4U|JL
z#MMo3H_0LU9cuwVgZK*$U|{HVbaZS0jRyi!z<>##%1T(S)qzC-Kv3LCwbJU#eUYho
zFEOAzSvy3sB^Os$Z^QoPkK3fq_FI+~7YF_)$==;&(`BT={erfzOVgjwEr3@AtEqZX
zbx9)lnwJ(9@LHi1_Jc7O82SMD2Cl!mN=onlXFGZ<kpn3$@Rr+wyBf!j;<r{ZG8%`~
zz5Cm@jh6E>II1yYaa=*WUV6l6otrrr`xgxSi*T!|xWvDQ=ne&rWW4`MZKj$>;)yLq
zsqv5*O}g3lH}Mi=6MuF%3R2x}=M+D;e)&jFfiPcUFeY$Y=c-<gH^bO)&6_FnZl=QY
zx}T!o1V~w|hgMWZeo>hWI2$q(cd%b#21^97(1*Ck2R;rc=HV|^l?6pbMB%a=JAmL4
zwP;18cjLls07mx_pvPJdiLC#4iLI@zt^0`M0)TKqHccgyuD&iVWh#YRhk-yv)hZc=
zH`#O@14fMjF%zx6S-2tq*8qnn%weW|CR~uY;NSyuN`&R=>MG#DOiZ%AKKz89Sl^3?
zi0JKA$0n#3JqMHQ+V0e?VAu~~sZ}omHyLn#-6!kCpW~*A@FtQxWroy%j8jU?5d8Up
z0pPsB=OO|<C&T0p=Pa;~$uRvlxB}KrIJ_umN(7&L=&`*J3b9<kfB+k+D*uP(7IA9D
zr)esM;N)k<Dr4Lbt>|l{guwBC^~zjFhc(vK7QL8e>^fFyO@v^ljs|y3%favGz)t}>
zY9S<?0bUj7?)R7kzD_j1PrV8F=8q?4&fWtg9;oKH=LF2VxSK9u^9|x82xKh`h40+C
zifE|#pnwyuORHiNfN2XH=HlVia$5(m*$B}=f&*Ak?>LY0;Tj3H)sF%E1cu{0b{v-h
z$`v!PzXJRco}kW{tJ^Teg0z|&s%ZeHgGd;#&q@H018xGmqf@Y>!N09-2*)^{nSs4&
zfC18O;063*wUTjkS|yJ=Cb=S?zfhQBiWz0w|C+zPhO}%PA$yp>#dNSI6e7_oWY%U+
zRdXS3;-~KyQe+h3Ok(cOr{!x42BvH56B8$jnH7XAdaKUfudBK4OAj$UuYFVX_Ie>#
z(ntacn`60Fcg*_Pk^j%OoLKJT$Cvw(&!`D7H*PEd?!*k<Kj;wP%y<b2t$cu2>wyBg
z78<`mRy_`EhJIZPn%@d&fbdQOL;yT=E^cl_q5oG$9LI`GFRwD(@Fw1fK4YgQacBa(
zj;AK|#P~R_281h$*9v}gu<2-PNCN1g6mC3@Bn1c-Ks5kGABfZ1uhr_;-%lhZCl|`&
zQUhScs|30LR8eqGpo)Y&T<?ek`@qI#0bo=q5ZwU_i_d*KZ4(m-Kn8R&1pI-kZ)f-(
zaOJ~qbs>Q73zWW+3>rYmf_Bf+_m@;~;)2t<j=uhdRwx<XL&1#u`XVCf<g5j6y02qk
zX*vrzTMFShxTCsl2~l?r|710(YmL@#ReWSdhEaP0CAx7bC*ZYu_IQ7Pt`^W*liMs1
zp@6)MfJcR_(*u{>z|L;(<n&0C{(k{A0C)nBS#%zRf`A1aI73h-wq%LotR=fwbCz(!
z1i-cUglN+3GbMk8(?m^1vKm{aki!7%aj7adx~pKE0M$P(FR{ng+E7?b%&2J4GUj=7
zbU}x+EDC&M4F1cx;7zZssd>lcB%E2ekXx{-1U)X7x+y(vZI-ftQS+i1D%@5)tH_!s
zoAV}B5nm>|Y2b@U)v`2AOncNLf(&)vjlLN4^+FX}8Nwxf(iF=t_TBjx7dhzlhQ!ES
zoB|sMXGX6yeTs7W%TU#z(dk%qxh8zg<d_&kpm04srJ(O(DB)72-V8B~!|_Q0uQWCR
za6o{`#ntV==mTJcUWqr1+suOxZQz>$NhR+AJ2Wr;LHY?KOB{mV|2CQw&SVC4uGbMU
z5ZeGvO4D`jDLa+4J?FZKTb!H-i_cM%Km=d#1e`T|F0_Khwp6dA-yU}R4{%f9(G&!F
zC%pV(C?wd>z!ML~?SDXdd8DR>0|lDF69I7*5Oarv2*HU6ww*uCTT$H$;~zY%H8m5N
zkf!aIU4(I&2%u#t<X}zbGTa){)`0Q}j@kM}gQ+PgA7K>%=nELO?Of{@(%=lv03{!s
z`{5`Ae57;-9%ap;iw0@Xp`UU2wyct0AkE*wmVA%XQk7Tb^d9f16ep*OikX7uylAhp
zaAwcqC7&qN*6}Ie+yEWR06os@(#r5~oPMN~^MVwPhqb2lWkzLbY$ZvaTB$1V4d62r
zbX;DlcZ75G)*3l{Gj+%;KU9K&g8AcxRA5#>Wg~T`|HuC-0g=3<+f~Yh2-oT0er~?s
z<3VJIR?DZPC5`w7hLm$USvl<_2#W;L3q%BMhbfeJJ{L>xZzNY{d^hlpQ>U4jbz*A0
zTS90wR4m)CJzP(PcfHZ07!u0>2nTeY%!~{q29`8j2oaQxCqN;lK=UCmUJwn~ygOnH
zX#6pNyli~j?j>RjbZ-m02-^qrCG7xvAeWo4bvd#fKz5kh;mYA;z(4>n4g~_9fd?hS
zS}oiNKNv}Zg>l1z4-GjEFdTu*Dm&QJap6Jg>T&GRK!x|4PrQq313H!0+ro((nqF`n
z`}+D`L4pi4Rrt{K(BKqWHf6SXX)2HX6<AGyP<~`&ST<6W<4=NsieJccUO8^Aym6xe
z76cziaDX$+Pk9PSsGuK1oAjFN2TRy5v~Iko;FShv)0=?)!vT@T<)X7S!htoeZs<k?
z!TAK1L?QlMaE}G=`E!(<0=N2+9(?mg*k@l$LitZZhL4ryl6w)#6eU}{J@iF++kMV8
zF(5Df?gg313Vu0CC(WXzt3~ToJd4j5@zSK2@rT6r7to#PEB_Lschp0|nFzIZFCs=4
zC7pJ4tQ7288gxu&8CqsG-9>>mM~;9!99ErQqH!nzK#T)IKX7l%4vgr#eiEkH<gl&p
zOU%J?0*ll6#u<*+1ASHH#w4?sz%GJr3=@*R6ZbO^f4f-X0e-65R0o_QfMl5J>ETFX
z@FL$+9S2c&88E--I{^d_%Q1kfN{0P8?Cv7raDj&iZ*zycuEI-BCu9$+?*M5c$x}Hx
z5!U3dpiSaKpM2F%f+%=v383XiST;1|YvF3)D=^Z6O9(+&zHY6}e@r?e)nOAxzB6|x
zwnFj8g!SL+zLaw_b%hd>C+kXxklx(6#6xqdH;DSbhV9n(9U|p%=RtWq(u1OJd-S!Z
zn==%@l~2_jzAepJ)d{i;6K)x)kCfr-;Uh(;PjrXnU1AYC?82RlVEaPAQw8lsAZ8<&
zZrpeZXa3_*H8yw%p~(XcpEV<S0-h00t4f1{8{`m&0~jV-+eOF&GBGe<@43yAXlt2n
zTUcp?(z*NO|7!ut0K11D3g_E2o5t5~IoR0|q1ocT{!P<KNtY2_-@pGcdi?-Sye|b5
zjU4wQF<8h2kKjMsN^gOcU~~ThqQehXkpZ=BhWJQ50gh-A3^c`__4MPelj11>GIyhF
zE<Hb(+~QvL-B0TzKHd)X%TJyz@kd_6XU@9TuqVI$go+8;n2GNdLxcT}Z{f*}UUeo#
zGTgR{(fQIfKFo}AsFxw+X9nT31(s`-6tTQxtcdgq+VfgEyIrdh&0JS&IUBoG?_Y28
zVqsCwP4{`7Q3fFc-X8Hr@^xeF-KQR_cyzt=MLDePoQ*U79Uh@(C#sRCE&nE5VTl0H
z*zI46g<AKv@3kGuLUu?8Y;;X#Tatrw@c^cR=CG-@Roh|W>OjqgDK8rvo63Y2W9iSJ
zNGvK==>h`sW@aBZmzI(u;Bna};O61kAX0zpg#Ea)d35Bz@`q=+jtlu|q0#+o$nGnQ
z^*#O7p~0Y6&t>*|DPj>TP1)u=y$c+)N4ejZPwpZS_%gGBc)Z9BWoeh<TgwZ#7t3!Y
zew1NMFKwU+Uhn2TB@X`ZU_3tfKMA$x-_k=-OzBCBfj12=tv!4j?`@j3St6S?^J|87
zy=5_#w)?J}F6qI`gXYq`$#dS|+ybIzr@?pS|4b7K^R|CgsX!Kd=<ya;*WpDP`44@#
z+mkBxQjJDx7=tT^k%Zw8=hirTPm}rrJSZm3`AWwKIpsKA=BTi)=kvb{9r2kFv+uIL
z2*em5n4_Njho(j{E<Q7IlMgj*dr#wbAImH?%y(cA&}O*LKq?T+RmHSL!?^j@hr094
z7U`CO+0IztE;D7r){v6MMT{q5_jNoc!?p2Qe!<<U7joO*e|Pvwe3YkCRz?~~re1F*
zB%@7yb2Y2<76GE<SS{d}Xx<zVZ|sn)12mEbUO_o6(U+;lhJfSCT8_X6lX9p5z_+9N
z{F#YRKm3#@GNCqO=NScFU%tS>N4*HnI8v`F)=Xn6Yr6gu+C7G89PnkhuE(diSi}^%
zT1_3Eo|Ts>qOdTOD}SV>tc(bS>C#4hFT8fpEd!o#7NFc5^w&JYP-)g*LOR}|@R%<@
zp}l}uKCz#C70MT7!*}cQr{`x0tR4gkOavIxP`*!Gi%5(tzh9-*!WG#DB*MMz3W}JL
zm^Vs-X|jn>E|dvD{)j8h(n9gqMSe8(1p;LXS1&h8MO+v&lrN?&cVj@s;aCe@Q#JvC
z%hrL0)T$^s=<@NTt6C=<uc3Q$B%!~C+h*alUF0v7VQSr$=&sVDs6CU)dIY#40({c;
zz<sI9as}2fmOu4$o{dLXQg&O=Ai}IdQU8v^cnK76UqP@HAOFeUk0_LK0s?C(FuecJ
zfaf&$F+cm|S@HBlFBcSbI2MWI>+I^Um*&-y$H+X4!M}GdK}5-fMVak9SNLN@bE3l5
zE`a@e(L+}biI<pOE_f90FNya=mj1a7*Kb7VO*}JFqyn<t4sAU-G^7Iq&Rr5YWtHF_
z+ec7*s1!DE8O0QYDlwrFj7E<Cq2YWK&Yu>Misiht*2f6D)lA^_eYn#F_mW|f_-Wpu
z6`h`vg2PY($OPsBQx&&6K2%nEKox~>2c1;p<7Y)JD3rDC1tLZD?Mvm_dy?M0r!)6+
z6fWz$%RmpevaX}@neTmFy?D8XsMIBARqSUQjoTFjdAmYcAD^<}aOEw{%%X>6`d=m&
zi`^V|^*CM>e2<zX(Mz{Pm#1At{5Px}Fy34g;2Ik$i^RwfJV<RCEmi9gj=eujQTw=v
z!Twl|l>qM(Qarq8w><h|ms1sUhr`3y|1FEpjWSYSm{OwCI)7q`juWSyl1J80Z2%eW
zY_G26VkpE7eR38CQ2OVN4tyC1mjYNq;f+Hml>u91Y@GS^A=Jt4QusISZ*3)OOUOl;
z@Zh33czC8E=FrS6XQ=1f=Dun>A@s#cw2C7r7@MJ>gryy5Wx5qVS+G#?HbWs=e)^89
zyUN+Yp}21;IRgd%TOehFMMYS`JOF{~wg7PCwU^Qp84G9{Luyz6<SuDxYZr2~Dri1L
zM=TxwEjBzA!XlL(|JF@2WS-46)-GcrkPMqzEq~`97vR^aaqA-qYeL?&>~cm|icyua
zO@8HnGt%4P3YUl6*>2l8?hkK&W-NxGsrt^Pjc{(&)4WX*XLdHG7pY_Q9Zm&evn}Q~
zS=kRv+GtAl6_SgRXv4J@nJ>%sc2U)~TMgH<Jx+=i^gD<n53$ZawvWvhVmkQbC?``)
z4W)7ioUeJ<*g9P6R98aVET9kC3x_Nm)5@~F#>nLbbzyUS%~?M{b~irE#X+n3Ow1_8
z1Ipr{{i!<W(|q{w0hRVPpdCkb+?<gH<f~@Bk~bN8(;PhqrxLfR1{gJ&z|?wRRe!)3
zi76VahsqO<H8@z=z&FFwfEr6>*6GqH<KMM+WZQi6@(ZRPAw!Ck*e*Y56E(WSq-l&U
zR2%lxL}KjT)lL~3Z;EMc^2{mX<-0qaZ3YlTk>0x0BeM38ADO$_YuYJZ_ik!S58A1^
zEZlXs44Rk-$!oZ357%P0KFcN!@>|7?3)dW~hZL2|BEs^yD863x-q*jViHx4sn^|G=
zeH__>hxh2BNDURi2c|8heCEUxJ7Y>-!oaY`L38sZiC@T?EM6|kL;~jdXD~}k{6;NF
zdmrU>MCW`acsnkrY?_ns>!(#Ks~Et9@j=-FGwh<lt4IRTxIw6do1|{tDtYt99#HJC
zAY0}P<ICK=+X;Pxk7k?DARCIGa+P4{gDi^Vq$DUu<^9m7G_6T0)=*R%Li08H%B=$3
znxi!fp1&|d<}vwzLO?v$ZOc#C@}HD)+}zv{ENTD@Ziuo1++0s8uS}=~ilnEGZH@3k
zC}~OAMs(=%dX7AbS%FjQ?&lMMdm=9sJrj{yB=&kYZ0VF14{oN_l~vfA(^7})TU=KQ
zOFW4w*-<tgx7e;BxWBQJvZM~s&WlCyzK$Qd*mVUEQ403%uN=4a@l6WSo+26#_vBs9
z2fpV27OFp7<k*v0BR*ClphsMedaxvs@|KY;R^Hj?XO>ss;5OZEpTWI*&xBrUpa|kS
zwSlw>)%}Imot44O`XvTv_=-(ICM1kAVMJV{S5ncqAy#kF3>_9|TU<e~Y2?sTQpzF|
zKN&!e;gMK2^gz!vW&dsz$1)*t8aB9e5X|ZjVF@Y`1|$IxqIW>XhryONAA#b^)WG0H
zM#d3L1ppjo203I5@pdq$nes(bQ9(BjmxuA?Iju&faL?v`g+ZkiZlKnPo`Y#51h?%1
zPxlP^PQuVa3~zg2WF+P`ErEb|z$H<jy5hA0Y!ldiFlr+sY`4i~y^N~aS|)90Uzss2
z3PYf*`~;dtJl=hn4YGT8r7gZ(FzU0Lrj6aT&hd};8%rxLbOmTxQ4zaov1vpd-zKz`
zpTZA#*UJ>&%@?ynvd8j1HMswp-j}v!S(zBHU%cqRv@*}0PkHfh`{0dsipSTK%}kuU
zL=uFu8w`(*nisCz?Lwz2`{92^q!GAGBjygMu3scFO7kULZGU@RF`DUu3?xX}+P=my
zY#OIFph<|61UBtY;}CUFRJVT&5bZdCI|Icl5E%C~CffJCZB8pcn(-pAhbF<uTnj$j
z)xyQ3?fx|9<>VwuM$pK;$;*>7dX4*^SAurx`kQ>Xsg3{f`PlK+-#5`-n{~b80ls4Q
zZe|DFWJRSRfZ|XU#gv)HlJ}ErsUfCUG<tg_hW(qar9}rB`nnfdM?sO2R_DP3nlWE|
z;<5eO<c*!_px)x6GxmXqFDPX1uk2$fiasj!pT*j`Me0q*o2L^qupMrfn4d;Nzj<n|
zsUZrb*m-wRK`m~G_3{Ih)x?R7>>o*09m3MzHSJuFs=m(+WhEjy2ri(izhyY1Gp<OT
zd3E+$ufMv3-*et3%=KOHE%jWKyDl^0#nTT~owQSrZ?PrmW~Wl!xr$em@XMid-{s7+
z43)zm9Ia?lwx*C-L7!Vq{um#j+>5+cX5~$wypQ5;=#8@&obK3HrOzBY*NNxF`^22$
zOwUkw@1{(H_z8-sL4|BXTm0IL<8G?9Bu~2#Kkt!kHD?a$>6ZC(Hp;f;@9!#)Go>5G
zX_A?-IF-~3AX1f~8Au7CrvZ6n!p@@!+1aPSjy2#)E14H+X5xsh3OPT6S@;}u>2hGU
zW3%I~XyAg}E&yEKxADhkJ(da7h?aCC6F_V8`UfoQ_P@hNe1Kvcx|taX38?Y>8mcC<
znMV}qV(%VpG-z#&wd5cZ^J^~T3po%V@gEM`D!kENo4RZ0#r7!+F>)y_ab!X(_OSrD
zPJP^vD1}&f4-#F?$uH=v_Cd^2VW&sf+&laDZ-}$fHGC|w)Z|_+HfU^-m`&HK+$K#f
zOvawNMx#Uaww>B-vc!LqH^*B@Z+!gk!O<PWg_s`m#~)m*KX970zf|g&oLnt=N5pyc
zGk=uZUYggzT1_AtK|N8Y68%~5+^)3&@wJK+0WT`N@lC_0{*aCdrJV$fcY$E7sELvT
z=zgEX_IDU*pBLejQXxkWte&fT0@@rOsB%fdLiqt`--{D31nn6jMw<deBpvPf6e+C#
zYtd@;Q;E*Q^-5uz2UzVl&;kDgTB%sJI*CUuI3WJ~aaPoXGXVs=;lhU{AVw1x>d-ld
z7YdC|Q1!*|V?I09_Cm)B2I?>g#U-n+A3~>Ws><d$>pk=w427Y&&^Pyw%e)y91{Z)N
zSX}&U?gMDTAQs(|UIc~(2tV9g3a=`RB;x_38eM+c5)Qgk{VG~tXW#C1q3oCPR!JBU
zqs%nN?@s%}>uy1H6UTBwGk=}GgrmZWr{+61DT#&6byK(Bwa-NS;tH92xwm|Cn~K;%
z4)l>Wt(FPQy2X_ZfArjL0LkS}IJa*0-3z1KuS^?K5O`sDxt9uUwTFtHQ~XMo8>0Q4
zNFQk=;%7sYeZ3ROh7s;bvAbL*&-F9icf)a8mzjVgLi4m|?#7>6)tkj$8{1!f5^u9E
zN7_D(|0)#UrZ7GBK}9NgAlSQ~yKd66olK;wO094eLTq6)?g?`o&^|y^0}mG$0V2Xu
z?&?#a;~3^i4A%bO=9ha2gg>1OZ5quL&<OJ@{#zrDz5C3E=icS}rLE{lI&98_X_cL!
zFzc5pE@MqiJX6a5l2~_@#&g>VLoN7}J#HfYYqU_hM0;6oxfLB@X_%&-d;G3=EoJF(
zn$60;<LAi2mV(?pQ=N^&=?CFI{DNWJK#t~P0-A9E_xP)C6zfgw!ToD3CYo2nQ~tZf
z7cJyWW>~votLvG(<k$5=F4Ub=TIly{OrkcWRi@;^aCy71d40@{aEp5)G@n<iiImjI
zt!6eB>k#G43+Lu)Le=qSE;@>x_7Qb{&tLMy%43>q-X$)_yq$PDrhr5ZkRAP%Yr@A1
zUF~J5-C#-a7HCy?lT1Yxem~4x=li6}PoJxE=3<?HF7$h|3};8G&`<QSGa;a+&+kzY
zB8?v2WwdB^_1%_ks$x-7yGj{UZFps7)=hM{vbNn^6&W|M)6I9~#qdAt-y5#~u88B`
z*VnaTlY9+hfq#G*hLqq3zKMNH;?RPHx2hJ}MxX?d0&4C~%B9qv!jDNbdU@ZM^J0>u
zZlREWB{ard_bq<_h|D5}<4ym?t4_W;Cg0evC|aT;Xv`tkW=?cm!);NWF`EB%KFNKe
zUh7&N>D6f_A=2>+J$=Zlx@Y>97P}E|nB;H*So+TXA?+hNxrMYwyNVyNa{IFTw!tlo
z)}{USazAJPm@tsY%Zm`4)<QJ{Exzp~(3Ouy`&>p6gth&rUX*`LQpm5tsrXtfO0Ldg
zIk)CTI(xJbQRK$8_)eB+=1z~652f_@PjpVvw~Oiy9nqg(=>DtBz>CeXHqE$fNtx_Q
zm;Q#4Ywp*-_uMzdq<k+LQ|BRMVgt+@E7)elM6{B#Y;s0_Ns?esh&m`A$h9h~{CeRc
zpTDG2CDq5Obg7gs^nkUl$9yhTysG_X$LbM9OqRi#)Z|7**&m5{a-a8poDE+hm=Nzf
zYR@gY?QHq+P+qz12>gzlzB$h4)0FQjThqnfd&cY)DQ&${L(jE^6XC{Um;|W^O9ENw
zI6>4JjJ<%?suS^AY#1oEkT?d@1Bh+APJY+O$O!tRfX7`nfP=eujFL5^+!>XPtaybn
zeS~;hiN44!s=k|YQEe6mkRuGjtT*%AlJ{I;?S$b9w9;X?Y8?ak`~TCJxv%nGR`h@-
zExeL|KKBJs@Bc*)Lo_C|dL11(<&^XLAxBk^jcpN*Z-9e@?jazZtBuM4X}bY=I*11)
zlaKl&ML-h@ouAB}w5%)%c%~*NP4fGZ2oNQif~FA~2=?x`C6|NGer{DKn|q{-1;0B~
zeV~sBOm&*MT}jcge5B|kC}i^Dmi+}R9`(g;v9?cPzet4zlw(m^QTi4`{um=nm9}Fm
z_MLJ#f?v7RJ=lD0EZ5?@m!kOPJoeAG-qAIos6M+E4RwV^uj>(0Qz>FxxfzRRzYFX&
z779PLApD0VXgrLyA#I~E_XyDDz$kQ}v-wpmpZX~(<zLCO`Qxj~O@OA++!Sh4kX42S
zj+MQNt_eMtE<UAoDRO5yeZB8)((ek-4G)6Mh62aWOdT6;UGXpT=i04n#<-gY>3>u0
z)FU2jiq+OFm<IeuM`eN1UO6$d&|P^QSW^Bfu5R?guKuheC@rhU)V^3DqM&sd9SsT;
zf1INUqM6`&L3W<5uJ_T7L&-4bJuRq9$DCmf8y6d!YV#OsS@?j+kBf^7NcFdFJ!ks?
z-5MBVHBRL~=#!b*^yugt_UK_p7_0s_=DZPC_&{G@UrWoXdJG-G|56R)ZZWa3g_j?J
zBn}X}`T6<B`<oi^w-q(vWDQekNRxt7IdoGUq-TQmA;`=W2I-0{UT*?-0X8U_t=>4L
zjz=UYAOHg_1OhYE-|zAFmn=y9X=wBy<70G`{eO^MFiaqgQ5YSFwuYhwz=C{eNY`w<
zfIQ=6;yl6!U2Wk-7Y;p^6YLvO1Sa)a^k<!~9*t8Heb4g_wDYQ9Jz~^c@NGGsf+~TG
zP@Y1{=k3=YHee-~Ixg~*I-QvN_h7#Ck%CR##Qkc@6?J*!;c4(&^V2E?8+-)|A&&;G
zC&QzGx}?9!<a9o|T7(1Z%;jY$ebLqa8HqUAz^8Mqx$om$nze|D@IqzZOb3;=`fI#(
zP<ZR``B@yVgly*>ONH~7pZNnbES>Y0*l$}<wi-!IzFptfUgvGFHj=pA>Ad!`{1;Q)
zY^16$d2EIiDz7vni|%J`g4>yNGM}c{iFeWZ%#Vvj>Wx*sn<QUPS1eh$Gjz!8d{oP3
zt&;A=5QaA0XG{^VyBvm4Zd!V0?8s=7T{3(BRE^3<q{8aQQ+fHbSfCZdYa4Zn!ItXX
z>f6QzP=FB=x<0@@UH|6?%3&sCm{|qHF@xy@GUchq^5HoGl|)lh_M9lB8iR)3$UV#Q
z1#~nZOaU6)F&@x3xeM@N3uU;eU%wJxx1sN>eg7Up!Y6I3zg9U5yo8$spW+To_ZD;z
z#l^(f?TUWa&1}v<3QfN~J*3M)a{#&li2vTcU4?oHH$;ONVa05!8cAJC$P)r0s-NEh
zh=AX$f0=d11=T?|8swsg`;_MAhohh~5BlNDG&D+mkNmq}Uih)0!ALU&_{-3Icr~|f
zvQEggT8hi=gl_)C#Kc8eNNd1p=X`wLv)+TnYWkg}{Q21_q^v60Fdd!2*%<OYAHnd4
zj7|t#O`uZ1#i$n-6C>a^Lp7O|nW?(}1J)fVvjyQ2HcmCe__(X93#KjDyd6-FK<g9a
zLS2v^WBSAMp6cZ+h}i&IK4_xbEa1yKLXaSgxN|d$VJHd5O?s(7sGI<U4`OX-flyFT
z%z*w6*2Igl2XGpvqoV^bG^p_*{T5D(TaalE)2)9J<_{k5f%B|{J|kD0GH?0t13E|y
zG1M4O`5jwqHK_KuQ(TI?>#|%FLet~*!&AOE%3I<T*PUjTN#6@Z6-r#AGFiGqz3`o{
zWbTu~txKh9EOBSrpjpNgj0IE;2SnTG;ODc+gpKC^V#1sJbtkca(elC534nR+F}Te{
zq1dqNIg?m~CH`IKH9Sg!DAeojFHevORlQ|4y>65Uw)6fdYsrK>_MayIkEZXAr@DXt
ze;aX(I5M)b$zGAYvquQodqk0B@0m?xW|NS;vNF0UBzq^LB)gLA;&+|T_wn<%|G4k_
zQ4Z(4U$5)Bp4W3c9tOa*!|H{VO!_-6)}mRV_omg}z?|rfCOz_%rs&4db#J-TPI}H-
z_~?D);x1W!)PD^bQ<5-KjG2Fcah9$Sm7YqVeGLz(jBOt(xnHSzpF90khX5#tDJdw3
zX@vW>4_+B?zw%0;73H=^#U~`>f=4%~tCoM3wXjCwk<hD6<G64PK~$WM&f(d=x7NDV
zMw2w-(5T#k1o40mX<YREUIISHn%A*$$rA`{LdPyX{Vk*WTvpxgd5y8gHjzp2WZWk<
zVdi_Sjxeze+3Ytyxw+p8Wd<_br`_a#A`Ek%prNmINLp0BRzNOKNZ^?5w!tX-dUE-7
zX)sy5NY;R<Y@mCe%%k=PDbw7y4hKhqhJQ}C9uXukAY5#PSBsyh{Pn5gH?zJ)JjXgu
z`Z{=OMMC(a$9m5%bp-t~E<585Y`JvP_j?3~c0#`{&Hs5~wRfesj#!HO^sTr>(m-4m
zI{BOU{GPV+Iu$MpcA5=sCb?|$e{HJvmkCZ4xQ$rJ2fsZ0JoD?7mA94QuiKfbsH~da
zr)uJ3JA8XQZgI4Dp<(EJGOSSaPWl2Rz5P?dP=c@WkNOW$UT<!U{bw2Ul1JkO9TxI_
z&bA&3_CAyq5IIIe1PN^-EPLOqWteejWkFpI{QXTGDWvIxRPuA+Hz-*EbM-R}9ze7q
zASLzbSOiVNmt}xr0nzg!6}JH17Ub{U=9?n!1!40U?B{P>8iILo;>G_Zx;#Pa8yEm{
zSWE_*M_>+2NnJzX0<zTrIAY6kuoMf+*VhYfB|b91?*w9YLE!J$x7Hg914X{=fUr7p
zYNY8y^#Vpg(6Hz%S`oZ>0e`@(H>0DXI$>@qx`~<XfM}~8^bBeiCW93anT^{@Pl)v@
zFOK@E>v*?4+JM;&IYtw}^5E2f1jPS~BP{3q0ODD(hfBHX>0WSGc!^y-UuC=i6YDG>
zwS-~;m0QZ^2PUbBj+WLp|F)W1mc(O6e}Bnpi4Gv^!ZzCt0*v$W@}NQoL9`Vd_;N2^
zA))d0{XFqUwvZ!%k&|9(L%aqG;J@MaXliwZYY-MIVC<~1b_nYeOu)Q^M7B*9*yc~Z
zt(@M1Uz!6%^sB7^N#7I^0WuZvPa!C@K;vLaA0w@SFqOaIf-<O-14aERSsX-TN9uB*
zr;fdOuB_Q~O(sR$m@0b3Ua3n5)q+n8(C1CRc(nT;V-ES9u=XowN8_JF^3XMom)$mw
z<;X_Zo3~GXacak9B%?TGaT64=_da1?V=S^(XBHkZ|Lv!P*eMojlJ5<U(5{bv{H15g
z&#RHAxAu9T(XA)^Mm>Q+ggg>T;H^Gf-SzuVT?$L(8Z{;2NRy;MjNnq<mEWYGW>u_8
zNT;1W$Njg0@}du`6ftJ*b5ip5)1v?t3ouKqYE7G8L0F%IN!jEH3?8hJEUc{E5KM?r
z10g`_=Nr%p)MNtD<m_iR>A_7%hLk(sONNO)@+qw2fxYhoCm-NE=zy9O;4^^YV`*%x
zc9{aQeKG29h+CuIDypmd1!5#RDR17<ec<sPY)u2cig`JR@u?VoyX!p)<M~*j0<1Pa
z0|pN9kHiQBA;8E#1a{t_spEI)Ifbbou-9ftmi#db3`bwM>!5H>&(1D`UH}MGl**6b
z8yDe0%*>R`(Pz7QV-zz(@_I?KtpDj|8C0#!T2S}==+!({Zx_L#n*KbJe=!P{Ws1PL
z$dQG6-`>gTIvd;0V#lmoOE+}u1(2WxxrOmU!N5HSA{B7g0QXUO2u#*D@bC!>3j^9Y
zvi}zYP;QhYWA^|lK<GUrRAJ(cfX_S%#jJdK>-lrq=hw*n??J@f<#qu2Y(QKHKa_@$
zth3sGCx77aUWW6HK!lrHvneJZ1vwsm7`zX<fG5$>EzQmUAX@3il4YqrfT#SJtnO6y
zKN?VMo$Djuh=%|-0Z3oaX+S}Phxa?cC=7nzQVMLGL#X~kt4$=v&b-u^3;1xJ_&iaK
zh>oU0U43jlQ=+TP?Sj)QL)`%@^!LcM5o@A|)*8NBXr@a%_-3=$+|1~D+^9V7+WdIm
zodrg%8AwW<)qhhK3FWrmiElFAzd#|>Z2Ga$Pl?n7oo1dE5?wPTPF%g{_B!V(rphfi
z-3-)`cX$M=c0FQcN3Vw}sG@GgyCj#U_xL{;v@d&%tQv4rPWSO2gA7oojEIZR6Irab
zChmm>{FTy^CsUnR$PXgFj{naE_#?=gzwRtDBcSZJAqqk%tu8D*G<J$1t2Nusv%1^X
zP2<m}1Pvb$ew&yPPWjJZroR3n@4?4QOtOQQP}zo$_D*-igJ#5<LsIJGv?%E+hYT8R
zaIp!$luZ45-!Dg9s@@Cb#4U-8Ws1OPcT!HIMr!6<h=WbzhPC3~ASy3ztet1((mc-%
zA5nQt<bQ9L<k#w@!wOXj*Q?uzbmx1RG=8wpmB-s_V`5a_Zg#`+W}FfBoIo4RlgoGe
zuT$fhE(o_w3#`#}D%ACDo%L^>;e<v8Wm%_8tGs%qRqUm-_RIk5_=qpKAO_)NW3nSv
z<P8dvF3}N6n0n-hZ2X#KSwR&-4@<FHhpRLqTHnpHd0fP*yO}~q!E#r;o5v@3FmxdF
z`&C^2XOJJ1nF*djT^SivE<Q*oFqthM4~xQf*pT9#wO|zlhkW@j3M|WkK_o>EV{3do
zNEpwf<^I0iKER|3z2K4gQr)t2y8AFh3c8}1nVIg3e?Q}?_&^*CgAk))c~#X8AZM3C
zF_d3esi2B5H3A4Ku>2f`&eZw0u&1Wxi(7f&Ve(9n;|iErj<5qE0=ONI$hXGml8E<c
z_{PqAU*7?u(FtoXn4Te40{X>YAcBYcf(W?BpgQ?e1T=^b0|UOmsE41F^qM4`M**3$
zokoy((TKW0ipC=7ogg0)GdsiO!5!BCdo{@Bym(XlZKvXIpv4X1NeFXDOT$Gtp27Xy
z4N2m#2LgN#v#($X{G|{%W(E10Ab|z?J&gI21218#fr+<)skn3VWlo^%GRSYP@){!u
zWKFuYoWWb;4@5N#3<xJ%kob<Nsecld(Dm!lpI)7Bcc1eB-w}990ib?@F$`qDAwNEn
zrLsB0$`=-tI8e~=$_-w@i}0Jh-hqV8V<>h54G&=CgEQN{3~{*R(19~w<${B51gvt4
z_CEmWLmzPFoCADsu_a$SsA=$V{wwv>rTBXL#@c3-T9g|5;jWRk(Gv+dl?6FVsTXe^
zi^bzA)+wkZOmJY!H<mTQYA=lNA15~sQ`f|a(&A>2!aIV(u2&wl=N6vrcI$QJCXQYS
zhB8eP2Qm@3`DD+CdzFn0r~Np!C?BH#6KKBe!NJ$5>C2MZ^U9<TC-m-XtEjha7yc}v
z4qqKdR+4NGdVWWCIu*b|U*p$DD})c<-A>t&l(kj-8u9-7G1bE;%~uBld^#0-@z3S3
ztf`#+RE~b;Yt*9b9yFUaHZ&kTj*a8oOZ=IYuU}Ikn9l`5k%_5w_4I`9&+sC?D`EH?
zI=R%aQvy~h2xEZ2iU}NLWsPt&3jx&>JkI7VZWe%xKsR#+SO*`dr+`SzAaYL^vhG1U
z<jIWZPOr*m3ygKRn;BqGKy>YtkNR9cXaU)~R}er@O^{ju_6u#vPrz;9dB`vI1nhGz
z4PI&k{;x@f3{-y1)4*?@m6s>u`borP03L%QWCcTrgsgaV^+8~@{+7GgmjieRKBG0i
zU<d>1J{f+9tbl~$KGa4aLKgUMRV>vANGds5wYe|U^jYJgnDK;)wBG#~3Tr>`UHTis
zUmEp&rrM+t4*^oXpx2fG&{#ixYKJIKOkfi*OQ7uPKw{T)ZSVmQjWg2IFEdDIt^3j5
z!ACHE0_J+q_%H<h(NjQM0)Y{-xDxiDP6P1$cdN42(cT`KeKwJ0ptwRn^X&TmWk%WD
zLm-oTJbsKh#TbqtAoa<yu|he+i>hikt-;VG;Pe2re#3c!t<w?{q6$kvFW?|S8vqrR
z<e(rWWe?u@OW0u_KYoNj82>$>*kW4R1eH~21~H0b0MmzIOn}-2z!gZBPJ0>8F_Ht&
z9*R@^boV3jYZJ5lG>nUi7Kwe&i+n?gi20Lby{qyL$+L{vaH6$Z`^;`fEH4r$KKja@
z-u==-JI$gH7C|Wbmc1g<TY7SF#@}SVYfCD*{JxLr@WDz1!*_Q-ISal>>hGV)cT?R|
ziQ9oPox#64Nt7MuKWQUy_OodIlD4yqNNeHZZt>+{7vj5pZc#|ekOP>%wNAXgft%1n
zh&{w?ZNgS=Y}fuM@Rc()B95)i(6%_xZ>@_k75cuqcKu)60$Ek!EQyZbH(W}q6j!rY
zyBiV5%w%GvQEM-0qwmrZ#Y^7QMOi9RU`HxxW%${qaST&7UAw<vK66s7uEuahf^|?_
zd?--6@yN3}U>1fIDlZrLM~rUs<}+>V`gsy;9{x=?`4&YE&z_`X?ToaHG~++_zFFnA
zpV{a9SjbvY5DS|rIrbz`uj%yL$g|hGW!Dy_a-DVin(S9sa$~l+C68v>j<Bo?o0&ed
z_q$r1P_E1GPkHX6BJQbAtzTW}Zf6ly7r5w<>B!V{*1lzQV=S=b_HWR*7*ke?#e8R5
zl@FV44|h$>e@1bo^ix~+bU?N8vs|)_0<sLJ{b}!G)7|rzE=Tz{Lt?`p{cQ9o-<j%v
zSd^?;s$T(uKBN$$R;JA0X#PFf?@y!ySv`!Or|=YF_rN0tv$s5oM#jGh(k)>KfMEyn
zE>%n`DG)%Hj^^x=p}QNG&xg!1IjBu=x;vOq@PuAQJ_8K`Q0Ln^Iw~j#svrul10e@2
zmvX;z6v$mYwMuw&^dKC3cOb0pFd-}olOF<%XKV~}#TJ-24ZI*~G2zX??680JlDf6z
zj69zRzeEI=3p?t+{}W1sMiZ(dMDuEBYQo>eFj_$j;R(EYR1xI1C-?8e6?0hk!pRr^
zfL7?E@)6GdWYw0FLldx>Ap7ip44-8Xgn+aG<`ZaT{h$SfUsB636X1MIP%ZRqAO*OJ
ztDu(#+<x^`jB7+{D#msJBq`wdC;j(VFO2u3co7RN4}oLazv%(hO@QqKrceWM6xwJY
zb&JSFt+v8MA`Qe2P>Kd#{5!$`MO@WVdD4ErA&C$_6mr?%?%I<cWdub>)KyCi@T{Ql
z77#1Nf6E?2e6V>V#*2VZFcBjn%sVd@aQFbEo%2ow5Cw+mPJvSoxXM58PRnk%g@E!M
za{MJ44756SJfefpXhn-ZU0e#N>on1VJ?TEMZE|((n%wPrVr=0U?sl*GEGEGL+t|L>
z;0*(k`}QV{CbrUTJ4Jas-SWrmOlL^FR&Bwes(aIoygzH-3!a(2nXa+!&_Tt|yqInE
zv0=%1_Xh^(gtZSNoxRhU&-(h9jja9f#3PNv^vn#R+A^HEjiO_xzDc|_FAtsQPC+ZT
z?euOdI~}S$v=WY(yXW=3F6L~Erd2&XNzBY^Fy;BJD7Awh&lm>ZGreV>rdseOY0#S4
zPnqpoUVSFz+9jFOQZaqk^rys1hU><aaA&kFdPuO9bkd$0FVAUvLnexT0U&mgXjWXQ
z1K{>z@-<DXUxTm;$~1mxG$;oE8?`Vur*}xBUIk`$w2=&9o-bw|fpix#MVP*I$}kVk
zQuF=0K!An24}}0u9}G<eY%o#GKzx2{jmay8u@}_OgnQ5TB}mq8K|z54a?`FrKNlCE
zW#4h0@gChTR{a6p)fvPuL%`7$MM6SCSU$i;b>{Quouf0Q(bKc#i(nX$Y!Y_8J9@wP
z!lD(7AlTJY9kQPU$z1&1Czbw%V6*{Ujg60iPSt5<M*mb=f$MnH98I1Uc=dBCJ)%I^
z%Qd%7($plP+2E-|q=*vz4pLKkz&!stfP?g%Zv|@xkK4?Fx(K2qOnYKvM8(Vo)&r1d
z18>RCjSY9nJh<<nWMNJk=D7i0_>676c-{yIl@NJ1w;I#xa}b^&m<-ljDIfmJ`Z2Mw
z&aSROd+)B2GDy#Lhg^&e`NEOnT6lSEf)sK>6zf;m0Rz6}BYpJwF(?+-dyerV-ojxb
zSNzewT?2b}F!8ve2+6UKPIvTkxf>=%U}#4kZeaR7K-Y&FJ~r?!vf$tmW0o`^M<PHh
zYkgpC0$<SLZ9~EL%NNVm3OVj&#N1&KDr*)K6U>NW4DV1Xn@!+};T05Cg3xSyre;j>
zRah<Hpq_g42W$1vJSg|Nc6)5ycq*1^IuvizCi3aPJrg!x;7(7>8xEPr%@B_ebZ0mP
zPi0c@?%ogRC%d;JL5MIjMGmA+En)}J^a&=gX?~NFvfJZkCo?rOHO}aM@Hzp#<+&h0
z9d;P<=O;UEC_#pU1gCoB$_?*0%X}}3L^8!l>nhH>zg9(28R~o8nXw57nknPUpYUvj
zTvZo=#tXFW=NLJh7|V6ullI`Sgdj(uApO5~ykBp71-6tO{7hxRS+UR#x%)VogBp)x
zBRb9JJj+8FsgwwM<@_Q6=lVj+wffEyv@NMY^U3YSA`1t7<#+c^TTYXoRIXyJc=qQ9
zRoCL6u7%NFI_tmw`8s()c>0JuIg&yV($W2%Z(i|D0-9Y*tI1^&t8n6x{p}$-LS|fw
zf(Ns|-`{gReHuf@ZIR==IwC(5mO*;|a{!82cHV8;;htYaSTf74A5ynfQ4}T_Y@2~?
zUsGdj?m2qaQqwTs>ZN<4I!c^<i;G;ZZt6#^#MJL>O&13=PH3+F_Dnma5?`aOS2p(y
zx=1HZ_d1sR#Y(y4`e5(tWXZqHov}R)zsF`~Xn*0KmDA7?t0;zces;J^5rrqISH{{2
z*ghmy6IhH)Lz*EtA}B=Q={1t|3c>*_PFFEAHPzxE@4OGetB`?8O)N|dO6x~Md@WTL
z9j~F&;^V7sYokE`v$)F02s_zZxB5h9%^RUS0TCfCZn~FA33-Vy4gFRwibpZq*w_I4
zbPN}XtspgMP(euC`$atHRAks{JE>K~wlAitiuZ{PY*ptUywuyLsDa!+JOoZUCm}U6
z8y8mvh;i|8sNT1N0Dt`;YJbI35`IAco7QBC4{n+XQWN8111v+tEU5>kiZ~3aWvg*Y
zlzLXR!;W_<44`p?75@u5U{!<q5TmbaYU=Cnze+&?zsn&cIs=NHoRILq&F$qpKSgiU
z_c~13g+tEDr%ypg2U0YQ|0ERW=$%xEjl}qFfV+brwY`Zq^u37uTN4HVDlzs2pw)xA
zDe|31L|R#ogi#i}gZ!XQU2NU$j!R6u6bfWp+M^?=Ritz`6WnrO=>SJqD_1-P&7yv#
z+Du<p*L^+D=ll2X`e@r%C}>)J;O(cl?hB(F#+k<<9Y?D1sJxt)$HQs~o1;}sp^`Ag
z7Ge->d6_jzSB(*=7&ftX&8km-aQ~t0RJA&4pCeS%)dR06uXy5_wF~1@UUM35{#?HH
zUlC7B-laq1{`G?&j}065-{Yh5Vy8BSgrd-2=PE@$5s@7>*{ur>RdHIx&AI&^bR~o4
zoO<!g!+S?!=C5K?YxKPPS}7*$ZZQ3b(R(aOS%$;pkkko8rF@}tdh#}6MT8m#34$MY
zOmL#!rm`KG&Tt@{OM-$UPcMUU7oV_F=&^&7-+I{UYwgspsxj&6)*%a~$qyK*B@_g1
zy^>IR6D$w^=yGikn}?KpDcz;~Fa8Z{r2N}B`1q&EdcOVMp0DSfb-RCK4?!}^Xd`tM
zHW)pJ0p_d7!?H8Kdlw&pq8*=<^l~{5V(0~J{(~MJM%YjD^Xj+XKrPC8!bh&C#W4v4
zRm<R$wI@&TYG(tsrnrzSkpBb*O0VrWHez@)pbuRGVY{Hu5A(bqEJF90-89L31x&G?
zqJ{5`6~fLlfBljIfVjU-AP`gAlYjk>p#Pr@Yrz;H*n@9wZZ=^ZL`_x==D4-Tl7Mv(
z$dHcShf<CxnP~6DiE4H*zC$rvjfqa-BJN4ksS)xNvlGjqX}bmKyZ=sQJ&I%iL1sc9
z19m6}<#)g9GH&r*!f2j+S9w&uF)w4_udsz(w}%gx8=ZjK`Ow9sI6J#F9`rYN^!1lO
zwv8zPkaP>44?v+a0c0EEfayu_6TKSdA3&Q{q<Ncp0;{m1f{U71pI-_Ry3x2?o>I$X
zif|l4Hj1J%05E~qCZ!cAm&)iGS}S&c0TU=pj4#Y|DU{=~;AOsgv?)%i>*#P;FuDIp
z9xqXaZ0PZ$>d8%Bs`&AHm^E+Dx%4P0jWDdRSUsCEHHgFI$E=oh1x|FWwv0UJmmTj<
z9)#O)^(_cINa%5)yprZ#Sg`4jtkCes1WCTlb>w~UhXGrjS~Jo*Jh&=aS$=$9kLzU-
zOXc=gS`>NPT>boy+Mg!&_B8Gti|K}j`eFgQ4(Nz1o!j@BHac0zHZ0z0kCYM_Vs(5R
z33<7bUo%qMBVJ3f7S;PO<Y-~Y(GW7DC@7r{hsN-eH6L!q`HI&vS-7rBL>uEllqtxe
zO{d72R`$a<v3t!wKZ!yg2db48KbK^lbMryn<<Bi}@XbT3$YX8&<_H_Hj%OY7e6`x4
zG5h1`aysI6U^FN`aDzkU;q-3~MBMp8--v>W0O{lLnQBjZzjZ@9+|d5=V)4hAIMGet
z$GUsjhV_1W`x}PDguvZXxPI$NMXG4-pn%l<#eVcJ;wy++bHGK%J9gnc{Q4WF&lFg5
zsXeKFU&CWJ-zod<b>%SAJGd78k%ba*KDgTKTUp$^>CdeHE<fc>-diKpy0A->f(fj~
zTwDrlOt?FgvDM#c?%jV_`Kl)4%(3SRE}32FQFLP@PW6(9I)QtFHM-=FA}+SUyq~+z
zKCNnD7tg~yrcfiRy^K#DGK|-1>Ch!ZHwnj8gg!9ca#T&2BzWn1z+?V4BY*!ETD}VJ
ze3_9E|K#_9<g1#|4kyddgID}VSs?ZX*IvY0gjT5_5HJLLd$x4e3Nj}UE)_N1Q;adH
zEX9k0G6WZ-zYHXSvt>fO9?+W+s1crEJD{Y5gTVT2tl<U{Dg-`HaJFlzt3jo9_hw=|
ziG1xWBXnvQ8#^c!H8tR&(rvv8l)N|^4R)dGLoJ6?c_#UF$OKj_-J^a2hez(foKR(N
z!pch4`HXi7VGR5|QufoWd*iwSs)&YtROZP1=+0H<7gr3c5Pt2H6OGCXOb$hU;P1g@
zuoT$c-G#X94dCNBJI{BAgg7{`Uz(xBwE>+eQU^rhoQGW!!om$THA&%7;IHx${w3}M
zNwgghiao!@6$6I>0h2vo8yNh#Cgx@i(s|yYb=>D%ajChve_Oz^0^G-rCkxKgfF6^b
zO_Q2YL4}K8&R%?c$yP3B9DorpZB5l^1PqOimW`Vg72+WSA3hZOy=)RDK*p|MQQ*je
zMg`#~O}5H#(4xSFWt%~1rO~Nl4+}2QRZ>%S;&6l2dnwZaY#jrSHs7m*!O~x066(Gf
z`=BmX-RSy+*vNWpVkUWc6cbZqOcgUc%AB{NZ#f|Mep8D2CZ#ZFGB6`lp<QdTv~|TL
zB?N7>L7l|Cc|YcaJ_@tyr=$5fnyqgq)N~JQY3yw8ttH(Z-o!oB(xwFe+e2^voL70O
zNv~bM)x1m-%g0KdwSWuz#qBpaHd)^f@zgT{Ony=zk~qVM@~qe@?>2i4AJMbIV1AY}
zQ{WMJsVB0vEa6}OCpm8KK=SIrglR|RasZd7IaQQhQaP@llc2flAKHv7a~CxdPqn1+
zTmJs<E}c)@^C3BPnw3bkfIt$ome2@)qTqW2p%&9>yok4MEqA<~1*z{v051@*N1!P*
z&ePkb#TAeBR@c;wY#%&Pe8EM{51trGC9#0B0f-BP2Ba{6l-iEA`whT7PfxxQUQryl
zCjwRx7^Ob2FJULNwzif=)HA=17j?BP<T1Ufw)4~@#YeTi`DLY*h4E5<{@zohp~1OU
z^XbT^%YCGl82fUvAjj`{9YP%r67{Mywug=rzfvMz=7xlmD?(!g7w}-lx^~vyhr>IE
zL03((hDw194l-hlLx+Jd4zk5YtWXbq%`$lKVpvoVd<_gO$ngf@3J|X0y4KLp*wDu)
zPS%g;QMif>Ip4n7p9+si%nQ9&DS^On@KGy;%7EEcInVoUeAxQAvxAF3iul)NgQW&z
zUQiwHdQZA>x%`&M1F0~Y`f}nCC=V8~d`9+G4xXc#BNYlc<G^Ec?*|brdw_a4lWjtF
zd=Lk2-GH;cwja{NQpDuQZ#X^2Kg|91se{YyZB&{cWpv~}(~%detgc-yh&6ioyHscQ
ziCq$z&|5ra;l(t8x~mccBU+T9=&wg7LBT&AM#+Hc9vIBXajY@t8q-rQ(U%v*d4<^c
zeJ}w&b>9)YxrMzb?^m2S{AWX?RcDWkLkaX0338-MzCLAI4b?ieN52XGmcDYA{JPM`
zHK*|g;-Sjy?>O6^gY=69bRyBFTWM63GTm5|8y&M9my0wk-r7VKu2tMr?w@>Y+}E8R
zBe!zb{?^}M#~~`=duVly@cf`nRM_%p;NGQ?(dbt3o-BkwBP^cbPUrQOso<(z9;doX
zSU90Las50#;t`3#?}k}YXkVk5Xz(rcZ&Y{p3b7>4q{P;0rpP0GWuCSymEsmYUx=|L
z_EM0ZMqq~5{n&h##ToqW9h1taa=Edq?jAm+Pi6$lM)6JSW#r)a#4ca&M$9|hv-A=E
z1*-odaVf+=R90f!yZMrc(^8AgWz!($X9Kx}fEtPkP26fx(-nCW3DrmoD4-2PuM*64
zSD3jR&7!JgjdF7CXJ4yjL|3TCP1v`;pi)F3Uud+mvJlwjJ|D5}>@3+bKEvZ<q^TI8
zru9<_57w=!2`LWVs6R}kzvmxZ9BK18YmZxDGLtmhVg5MsIPS@v%-KuKFW|C*B<qrK
z$b^qpVTmhdLN!@38=i-3ac)@Ro#n2^TRNF!%(%3!?23$iFKb8;;F0{U)xw{-4ZD6=
z$%K$1Fp$s++4g};GL|*4FsFDaSxo=?8lGpaJ3|_~a+SW3GY+;EH4)J|3nZ6y=(5Q2
zqC+>o;1o@Ib}WKR5B+w!N=f}KQ|kN&0&X(8ZrktP2qc1=kE^Q%0K*uQ;(#MJe<Ax<
zC2yFRnZE#q#u8k9Qdm`i333cZIoLg5F^Q}2%fHhg4Rq;Fqdz}yALw7mAwU?~I5;Kh
z(pJ{1=|1=0`hnpJKwGBJzO0C43$ip9C7uW&I5a>ufcB8ti(gn+AEWzfw<~vU{`7c>
zqS;wLs6wEMk5fI4g%znCUH51^O|63H*gkim>}%xgis|%~LwE=uqY-t$bv#<B1kMKi
zVjDH*b)O{pPOC@9_L-&*BC++J`z(FF-!gjn3M#}|nX{bwL?#CW4Yg`j@fU-(GF9pO
z)l;j?<D|Y=sSpb$SsO-lh^x%Xy?wfxvQQ#Yf%VQ}Tk_+LCz@U#CT1KC|51)Gaw@hd
zV$05dNj1%V%YhfWn5S~#ZA*rW9cHbIUHoS-G>V#w6KiwV)`lfh!u#jFz-)&p(<Nc1
z2d6O`bO(yJif!&S2Xw1$$)f~QSPd#@`KxZsO#xu=Xt&bihdtU(&*>#KAy8H~qGFiI
zZdB%VM2OE3ElY>j&{qt-Ua?jluQ0|9?I>zIN`+6!*_n%n2PgE`FZX6sZJs^w1$&ZY
zZu*Wgm?gdI>_pw`Vyw1E@Twj~0H-#I6CF?c#bV|IHpPL>$ujWm0O}|iC8a2gm*DjW
zQ{OcyZ%_PGe8gX);IY(Nqo?=!U;n*3&vQ?rkz0qKlSL_Zv5%`LE|au|7FQn}Gx3W)
zfDOQUfQB>jx_^}bPXNt4%Jg#D(yD7$yETw?Fp7W~c4|#99D+uI7|b@IcDn++1)^_Y
zA_7sw&rvcG6)p&V6;LIr&D(whym$fBGA!%=A_<xp1RVl^;*UEl|H5Za`ZiylOY?I}
zgUV?c#25^GfG4mHVB{ju&4Pkt-o-LS!fHpmY&}%mIss^LuuW8I01MW$ip<4cLh~9C
zEd<Tc(@>>TDY|hG6x&3Bt2bGZ!asw>@z&n8NH0b|W@?A)Xy`|c<d0j{(?ac9jokb?
zQUg@tYpAJH%yXnRwszBBN6lEV$6Lco^?@jIi}@|}@Kf=BNZ2mrUqX(aeK1pAKbGCQ
z5=p)Cm$&!8B8at+SY$M2s?Vo|&NiHqrK9xF^hhtKTkHLVL!>SllXM9!z0hQphWOZT
z^_<$)<bk|MPL{IxX_8moKi<is<=Y>5=DNu{tiR=+Wzb5iWG1Qn4`<f=#<@o#xWV!E
zp0XX4sn&|?PR!r7nFnhhpsfT*=K}(4@93D{d)4`e;7|k1H%;b%RSgdhQ^@~|O9=b$
z54ZtT@epn@?x6>L+A|LAVo{Bnfjc!!WZ2RITb`0(hG#=VL7JHjVoT)#UkUp5WxA{7
z)nhNF@6y!X_4(PUMiB<izo+4`?j)Be9~!QQ-Mrw6^u<Mbn5@yZBYWs8iuP9umm3oP
zWep%be>rEbz_db%t-^dof(IvQ>^X7pyjMd|&W*WGUe@~a33Gv>xY${o#I_yN5-hx5
zi>GK%Lc8a$7z-Zp3n}dA)qNC}PVRHd_UvIML&%s22~g96VgH{C;JKL8aquXr+XuVw
zilKjJ*C-yZXrX5Bp?ptcf+chLj2A+#+)LMMxTnNg))Da<Z&VMVsL-J!f828GZ@}}V
zN*~5APw~*W6d}!zYr`En(KI!2vwL46Ib~`Iu{KbOHeaYCM&?v;GUgxDICG`*caoZ-
zV&k$vI|&;x2`4s#>9VCoz~zS;w3J;Tl4rHZBn<Y;_8j*BDA_bcF&B)g;)tCd{RGc5
zt}Eq>Zlvm8h<inV6f7#5NH3A9u{$<QuR_XRd&I)<vLKF983hEEJG@f>lz=Uge#tmo
zo-jhku@X5AeONz|FYP-%_C@Zwx?aYMC{oa`t1DpXw3LH+nw+m-r&=m4dpzWG-QHDg
z<`<Sq1*1MmSen&FbeC1KapO$8epzDmPnz7~yM@{5fomS!)vNGP9&xa<LrWu(Hv+y&
zPx+4(6IFoelLdx@KLO1F(>8(5KUWZ4&k&dAxS=34y9tI49)ywT^x_hbna%0{`Ecp7
z@zi1fP|B{!_<I7Y3ikC=oOlplJfX<A=%w@g+s6J0NV_W_R;OTeyJOq8D4<sD57tma
zj14Lj{IfR>Gzu;o=Uv?nRTwDVWPBNC7-RmEJGEY`Od+&fqS`QrrY0#-k93Q8D&X7q
zk<j6)xJH*VnkZwZx<mzY`A7L1$Mm7-aKl_?#k|h}I8<1LgGE8i`Rk);bY0vf?KpGe
zEBx`(4yw)*(y5C~w{8Y#Sm-})_04VzzG=0}b>_r7G4UpnUucDeKQTmxj4+brQfIzA
z<7H#F7NS+n-)pQ?n`l&i;i~n5ls~hu)Zgb6lY*-5-_J8zC>7bh4zw)MR_Jgw|2!!=
z(Z<82ibY-w-*w*4r1iaT)JL6SxP7T|`0n&RO*}zX;!JkQ!#Cca6S`Ke2*$`P{^@<c
zdJ##4rFI|!?sm-JZ*kf$caDV=eRdJgIrty<+#F~Pd3p`Jnlk+06Ii6EPJx%t!N46i
z@#zyG0zmqosY?hjy?bksP#bDO`7~hB+SzUls$Vl0Gps)<z7twt-3*H%7^y%?00>{#
zc1;Hmm%u$|(B9?2gUCjr##{mGF*(8aAYeey#CUl+*DvFhou@DH3UCxsUatS5B=mz{
zh{c3}YIaAeWX&hQg1(<w<uW#l$}<owe-E!P7V$m`*NV59J9XsiD7j65Umicw<^cr(
zE_;lnGO##44i0|tv#gx{wCla`I*qs&;6<$h*?+L{TLueNu&(J5{0M0*_9ia_P>6?z
z2eb*NPtUyp4S`BTd`+@RtcH$OPZY2hpg02I>s3|N$4$?PrvVCR0A!KEq9TRV7f)}^
zjg2u4dHDLa8ddH})SWnK>?)uHvBIvFlvLO2Jqprde$Y^GcrcJofTpUCJv9%T7XQn)
zK)19(d{gfyE|zj{)Tr8tT^4mFrJnD=A*pIz&9s@b(Yu$=e?RYJXHTjnLy6c$g@(u8
z9;RuvWNo#>=GLPmx4VohfM%ErutiUPvGvp0dAve~rF-R%($~w`>laUAa~Y_;n!F~t
z&NAI44DATw4F;c>$8%JF?5dsi`Kn(TlP{yIEO2Yx)Uhl&F>w1E2@|~{n@Uc)Gs9|n
z-gHe6ZVg-cn8PzM#HjM%FY<ngyjO5HX{NyJGjK?N??o!}2aWKWHgN(JV9y8^(0jnR
zxm#uY2qat|pNqynX%2=)47P<}*TcX^G~JJ-8)OaK6IdqY%7t{nW=S#+qhf=NJa8$r
zo`u%j{14ZlnEINlAz97gkTwc&okM%~UpuY08@zp-g6>t@-5tg!#Y4ta8MEEqjAs=j
zmllp&x0mPrON~*Mj^{6Ge|8%>X<S5atLkP5J97+TLqoyL@i<}QnUup0QIIZf4{M7a
zT??yj$i(?2Y=%&^Im>%Nw|b*b>@$b0=yl(y2z}x-7CI)Y%TlAYl;i)=O<BC-Y)a&G
zB{&S>DD-Sp6q0RIdapoPnK)x_*l{E{poW@(oUwaYh)K#{jWfz*pk>+<J3f=(m+eaC
ziY#Ul&WxB!MhoH=;f;<L9}Y(e`Iw4SNRZ8n`XzHma`wSh<A5fwWt`ZMsj{|0sm}07
zmCH|RF5m9J9oHM@P%^d(R7MJoDkPGg1qf*;ttz_6X?vS>c=J_PhynkWNYUhuIspRW
zaU*?wVX&}sv6F<=Dm>8$B0?EI0VE=&r*m+(0*w(DV_lQ9TCUX$bXid6n^YSifQ`H`
zRs$J2T$r9#d6HfqAX7{BzKhG@k43N~0Tq{x75JL;3NlaX$Do*DR|x!dM9|?wT2>Y{
z0-qjO%$LSSUX_s`V3r=8{G15)_3b=KQC{8z{!c|p(f+M^=*VBFbfc2pw72&};3e_u
zWuiGJjLROgjNeN`kM3J`Yf|*d#(h{fdu*Ih{p!^zcuYwonE_o9*1@;*pv?jZr@XwJ
zTH4oL4VzxregM|tGqqN?ZN@RSXafUk++X!^?tr;<H7^0!5rS-mUfjLMUijd5l{Ga7
z56^vYmj-r+DZFO~Kd&f4;?i(#Vb3+s<%tgwAQJ#jU*RYj6228>M$b|<r~Gx3C#*;c
zSw`#evFuB<Z9*x-nl3Dhz3M+bpsc)qVB}UEB2j+q?8%4qj?J`;?ffm<`@YS_p*%Aj
z4X2Va@o~YPV)a4c+Y)1^uOmB&QAi|W6m#3Uq(hjJ|J<ANm2zMpz4rLVkjS*l<=(ox
zrT+-A!W<vg-(|R3p)=!QL*Far_u*gMepyBgI)T~u=6!|5$M0$S+|_z5WBobgcm{E&
zToT{erm%m`Jc69kN`3L9Zi6WYpzP?poe0Y~Alr)M)CSwwIgZgm?sZE=d&;tDqLfV=
zKjmvV56<;ToDphlwwMLVr(xVWx7s&!-s6fCgEq+Q9nBewfje(>QquwnfT)EY7CyEn
zM(D_=o$48DwV;>>Y7HU?)=nQ(M%$wD%cm@wfYb+v{m$!qjq~?>F_zzh_Qn0bfm|C1
zhkxun^mG*QV=m3$%KUu1+k5vD_zQy7IGN&SH!)Y}w2f(TloQPXej)Bfh0E9Hx060$
zSWIPmMBm@Q`z}JCVlCX6Cb+x1xELD&9Y|_srjsDGaR$aVu*RZe<w5IU{!GRyfasv<
zJ;B`Y1#$qPJ+ZlWZ~PsqO~|<S?BCY&lhnT@+)3zFkCu@35GzNv1H1E9b#cVnSf1V;
zyG*~r_;3PSiTcTOdsF6aYiT@?!X+goN9OUFGhR+^espZKtSu|ttnECcMbuJnEs!hP
z+SyfLLH+uH9g6X*HLa$yqo<?<x4Qr36M)#bDOiyCMRD<0&~hxo&cX`Jp8#w>a(o*3
z6dqyN#$%kb7AF<FB(GK5w*hql1Tx>(*7l^A$HwGSF2Sz@?E6az$Ft;E*3^)SkzT$P
zZ!la>Khz)-KNUGJ{yeOSJ|N=Ev*hS$&tK!3akBE79ck$)TeytaHdSW>k{H~~7;BP}
z4X#^|hgiLlT~Bi@ySr(fQn=PTsns_4<cmME;(x(W=5It0<{m*cg9_c{iD-ZmvB;Re
zYq&co-iX~=RAxW?S<cdjTrJYjCLJQZ`;IiZYn;!^pu^i|^L-40r--&~AI(Hwy6yj{
zH_gJ$k#-P%>ozze6Pi6Xr-8lh>0yM^ei^M~CnK1Ws2KR%Ir6!v?Jt~hEWzq7jl>&O
zi+5F#l6^_i#Q>vzi_$zU)KNPAy1u+YtU06n=gq-j2Yjz{R?x=4m8Nph0dy>IXbWTc
zjD;lH&Vkh&cuZFKhoT!6b}>ftk00-Y2cmaybdm8_hK<~t2J8VqYT=_IN@OvroQ56X
z-rnBSNN`2+#xV7%8g8hKM7q(oNyk#-IV)}<;u-uatjJ1ENS1+rKtRZQ_w(kzvt=y#
zN8<DkyMgJct8C(%)Dcc*oT)Rwx<D&a);mqmTk>#^XG!viEHX=L^2Jo$7esek@<)cL
z(lQ73dmbql^Zry~B@dRDzHF)`N8-tz|NeW(06JT(aPN>iChK=ym$;u%KeNl8+p283
z6gU0g+tUHMVi9LR)ROJ*JoZO)AD_Al@s+nKE#pVbQC-w~TSZvL3=lH@MBSY2NdA@J
zVtw2s9z=z3?S2m1>t-pXAYZwpP_3`-QNUuatmidfAcuVmp=^E_H)DTDM3+Z)vwcbC
zU%6{gPVI{kFX?&l2WsCaa30FeclEL^kTHpt@g#K0n5VB~JQ!Zm$IH8~FHsOk4MwSk
zwv3`mNo#1u&dpVLl>fn%Ec8B1zOcRhIaLi-D9p%-bYdjI|8+l4Pj3bT=P079kk)Ys
ze{^_o5SC}7un8kUK-fO=@#Bjn1;n_)K6Y~R&6g*1Fdp8M*#VMm()SQ;=)gcJ2)@9|
zz&8bYJPv}P-~(+PW5)`VpK2qp(I_6j2gx6riUUA({RBh&gLz@Xb%j_oD8n3E78ry6
zj01#V1*U%BhWi{i6=_^|ECY5$X$wCE&EZ8bi;6ZE7uy2I4<3^o-XV+|ClINeBZw5u
za^;SoLQUYdgLxIO*5ivY=K1-hE}*Klw3ir(R#=fp*O)$gm3DPUTmNs6*4m-3G~8;;
z(bAq#Ms4ObadCtIK$DSyFR0La*xrF_wp<ZWe(vjp*w}2a8v!9nLBJ0Q1e7BgmbLxV
zIa)8loELIVRxr{O5Kd}p_EKkNWK4WW#oRH~CVt~z#^1uUvkJ<p#Zrg-bzt`+E0{cu
zk&&0~JAhMz0!J1}V33i7e#^l9mz6s6{X(W+-09_<LB!72uU6hrU@_b@vcN+qydFH#
zyt%>gPSh`Hy5siqzAS?~KZMLo<7+o=on24a`j$|a(p&6kIyG4K$!G4bA)+>%ZR@oO
z68--S>XE_~%VP_Ir#JAO3sB5`^*g1?z2`&(%w6LFFBTWj4+o4Mzb6S%BK>uU62g=H
zR7`s%)s&1$i!Jjrnyo8j`e`jO7qvk#Hw#Ou-H9ws9U3RM&*S6T!CR&!vZ_zZf4>|b
zpLqrPx_x^9PCnCD#huTVigm<LdN=f`=v_{#rp<WB<&q6(Y)&?9dXb**mAfbdbLpl;
zq_;sOLg*uUx@>?r<Ax!P)LMMBRS<0Lfi)D$8AokLNTybsOfB@aG}ChHN9p;agh)_2
zz>4QY0O-{K4Fc!x=bL?<+SxPkfro8!Cd`RBV7jXE7|$KEBbJkcZ?>Eq6N52U07Xd%
z1r=2igN!M~JD`@YW7JV_r9%J$WanMZUl#+5BbVt{Py)duCXrzgE*szqz9$lR)APt2
z2thzwbKX_lAnbTqZvn&44QgW0gIp7|?g6J-;Ep!|0Rw1v>FcAP_=-98?;f3Aj4t`%
zs~CPG{VTP<beN&tkh2sJHb#KPQqa}K;&OV6L-D59a>S!Je{E`C`XKm_geBiUbmCSI
z9w$>I0~2%9pZo{j&vw^_|DIDfydU#$WL8*OXB=37fu%Y7=h=*5C8%hiH)@5B4ZJbq
zE`@?$*0DvW4$%!c&h!r$Rz38Spyi0|{n83iJ3hOg4*JM%=l+vq#YKf#Ak{bTZLE+j
zQW(4L9(Z3;OOLr4Ou6_;!s<?_WgqFEiPSS-M_$6mKch7M7<mB1c<`yf?!h?q(9mEc
zO0bCmqDD!3J1t^rW@gf^s~B^OMg8_|MnP+Yd=-ekCeBw9_Ar+XaM>}RDiDYNLH)d1
zJKN!ZumvXH+PUIFpn_UE+ZjY%QUG1*IUEfz`1~_u!YUGY?9M@nQKfXj-2x*ShP@B;
zEiG_xenRJlA|Y{{KMSyB+xhbTB5EY(Q@oph9nZU%cwGmVQar_|)eJuA3D0R;-B8;5
zHY&f2*%{NwC97v(Ml!7L*z5L737HyVX;k7U!%b`m_(*^K&Tmtrn6dAo*Ti-oc;#=z
zzL}$uphK{?pIUp1(u=zGsEzn@tkg6m<z*aah0oHa#~Qx2h-~LNMzIshs*P+ebc)7k
z2&SU9*jEhpR3ys0P>vlfDXNkQzPNKrI}6(vq)+L}+ZzQhQ^rK}=Biv#2=nxP#i@3}
zzb4J88z%X}xF}TxLFcr*)s&{!*ZrlpwQb-giTWkKcIl<4>L=L}gCzO?6cYpcINrW7
zy+VN8d>E08qBrt>B6WiD!g8nJjDBiLt6o04Rq;+ODqu%xmfYB38O)5v_W51O6oHY~
z;<}Il_PAMD7*rj)twUII{_3}Nr|)NB&6>oz@!|$K;po2fXyfU%D)!Q;H4h;o^_D)7
zb2X>Pr$FT4q~PslElv9eO71ib_8YHp@$tdoBuj$HGx6w)_{i$saiDCJ;#&=y<4%+L
z@$AXk$xZv2vd2b*+m~Z9+ju_Y{Pc_Y7Zv(R_N>Zo;>FXtx;r=oWc_WGl?$@I%K^e<
z*v-M`ZELS8iEq@Fg#Et$yE4mvtf>1~5_^Yi=iPT#vR4KXEH;6ET!)i95?WDS&nbp@
zq(~@oLN@%lQFW^B*IfU-FOL!9e#X)Fx9y!t#n%W$ih4xXiSO0M94p#<?)7k?7IDTq
zt=R64am%9Ad%XsOvS0ZdcifJqxY+#(4Om6BVl0f^1|NE+d3z@ZMgDL;ohpwb%1}*C
zHl6!gmFgi}BCHoq$?hu0c=O+p7QsEeOieV}|C{dkGfV$dlC$_wpGO;bJFH8Jlcadc
zfpyaU#&aJ{7rxx|c~|p3IsJ{SR(cwPw5-s*$f4OVWXP`>m&YQ!dEk#-=c?N6m%^t*
zdLd_Rhu;+SG)#DxFpf++Cx&0gL2q*Yg*1d()P-W`GHza*;NSXyH2pjsF|er0U&lDY
zf(cM>_fJp+2*Y2gX#m2K9jFt)LM%AvUmGa600?QwW(K_o(4Inonh7#!Si8{9tTYVq
zp8<0-KQC{0{{$$EnB*?ycq)u&25wM95Ki~X$_hBOOcZErVgfzBzKd`9pHMCX=)pJG
zDR`_NZ*gIa(*OSI4LRR0zzE8Wa_A9^8$XO|50<++%gZ_W_!1L8gNt8h+>SdhX`xXi
zMvDWMXx9q@h2fW&!9ekH@EuGHt()%j9kW+1&i@>zks|C1`1OIO_U}>m#jeY?mpDBz
zFh?T+eb&nQ1-#ZS&=v6-uR_JOQN{zMIP}+zDg$f3!LXd2jSUzJuyKtxR4+?&5qZv2
z(`pNE3-H5$V%Bwjx(#7n@an+!5`@hJXn9Vqp+^Xan6u?V&VR2B{Sy@udJkJO=qwh1
zl6(Qz5x;Sre35P-yoG>ybOZIXT_)ganw34y)R5oL7oF=>=YA;NG;T8<(c_rPnb*nK
z?=5aQMB0nau*SB3(U9dd5o=hgA5}VDOl0Kp^-AF2Q4r%*dFFbMg@mP&rrJs<=9<;r
z^`Wn<`%&UFyz)ctZe-}hzO42E*1(0pZ^9A(vwBW#T9r5Rh7Y~(GWVd`7c9ASCCV7c
zy>N;wuK7KkbXqjI7$n#h`^OxnGFpW6e5D)BB`&*e{gFAcu#78guHk6=fvTF3!-9ni
zgUEpRyK)VUS*!6cuAKKT)CH1<g|uddiUpEQzLC*0yk&<ZJIi=PY<|c`j8D~IhP7zh
z{tNAAIx~sy;tB4Vim0+MzCy60+n%lc*)9V~R4|8wY+M^?Ta|xRxE!obExcS{ZG@3D
zVyZN-yD?k42dk8K%|RD@`$cK#i5Zw5gJ<jx+~L+C021?=G(<Wu5s;8zxVv)TyN^jB
zf#<MFK||vM2!ivTJ?oXFf_bq43>-0R=Xo|{CEF?g1kBouOiTu431J-r86sUUdO<!g
zA8=g&N<O((Ksz=1#|^600eJSe1f6+1=3$4zf%yJ^q<5eTLXxg*Ks)?6O9w)BL@h&W
zwscTTkKfZhGzarIm_>jwEK7s{yZPEPs*zK7mO`d464geld*>QqYh)rlSl<hCG*<57
zMKAxEI&!&bH+U87Sxi!+Y9Q(nx>jD0UqSML9|jBy_$h;A$pqZw5y$vFfuu)FRFo3(
zJAoVw!D=$#Kmg55)*X=r1}du8&_+=57`?$1?ByV_ZA^9nW-u}}eJ;10HSMzNZ<4)!
zx5fNk+C*hd`&N?F58v(T7Gpn64l<0ajDUb31RRY(oJlt00ckE^Q^nFKUhh`<>Q(#U
zC<#P!wR?Bj^^<%T|Gw-2-2Nr=1tOeF-0MwWKClrOWmz0F9|k5ESkN)`wTQ=m%5B!r
z){W$5SEHT%;>`E(0v?>vv1Cm`+Lx)30~FU;_obv0n`gcjzuxsql0p9Zy=wNcDU>I(
z&iDsZfdS10vH8yU^ede@j^Ca>@I3wx?H8C=!lakO^X0|vkBm<3f?0{fC>}f#!aN6q
zKHHB3eNG2Wt`B{ihYg49q9`i+BbVuFwC0VDY&z`(u=82Y(7nUX3ac-2iP7S3UYERl
z7{_bn0bVfV+)UrDxtTwGF`T|nh`9S!m+p3u<j87{Yt!1~maKRo?n)Eu!y|Ev+CvlJ
z9PBQtR5>k_CO6Mw@R>)Sj^#}Lv2)0!NBdqHeYF1?ds0Ku4`J5N&bjJ$=UGY9pq{Yc
z9~YY1!nD^Nraoc<4?P%Gak4oI<>Iu0jW>M3>jcnr7-S?N2?^XM{_OQLVx$b&>zRvg
zR&5^a@eL=SZdi2l0jCU*Oo2-WiW$Vt)#c@2g^zcKvf8Q}VpYZgRWW!4DMmo=b##WT
zKq>DB81b#MGkrgR?4lwfxR)*|tEgbKR{9l9t*yQQxq>A&MBmKcb{x&eOkH3QH^kq;
zD=VA7c?#ARum6|k_r~%ekR*XKtD{5G|L9}beP~=^(tzb1SdCZL)@G`)6%`c)-yijB
z^^{-y@%^I4ojlqKIl{_1-)B6iD~juTVO8yo(kmE!<NVA^?95ZHqQlL!PECsYYMun+
z{smqU5;|HcR2pL)N%kK_<jB?B`yLdo)h8<r<$h$Zns66E!?eQJyt6cU+%~*$s%ib-
zV3lbq_ren6Hic5cEOmM;?dBj-qrX3TqFwp%hXw=B{Rd~>GlS7lqo_L^&M#DuUsw8<
z-XExH(V^IfM^axMM(nj0xu~nXkX-y)(Yx_N2@8M1zrW~br7Sg>EUQ*}RmKw}RDMzU
zC8LkX8v%3zS#ciMjSva0UmbzxPmuNCp26$kgRi@df>^qi9&JQASm_$K8o5wAzwxsh
zUacM>U!Hy=G$$ydBPGvHkfL#0$3#0RRdF&|`8%KNr)Rkhd8)FblN6-}9M7OcxI+Ig
zBf1<8+a}t?^V+M5YGBF~9NYy_F88B2OTokm(h4xDRNyUv!w$sJfh7(rD{F`HHGb2k
z{|N;EOTuE};o*^#e139(ApMU(u?mC*S2s6Vh+7$2)3}XU3EI33(jt-rQ7Q(D2dCqV
z3}86tQl6PXZ?99f>Su|Lgl+*)(mGgMK;B+inP%w(EZ*oPJvR*S*)=LON}vCq@hjj_
z&<Dbg0VbT`5fQF8a~9p4?%aX97RI2tKxL`;4GW+<!%K~0?4;0&Ky=CnIU!8bjNmd1
zQdm(@Lyl*V{@ZN2Xm?3_x$4%(=&Q){|9!Rp^M@S`pgUhuR2fbnU;u>nFkq>GJ=8Rm
z;Lo6?g}9QTp&<-m9DrR&;DQotXkb7LYn}9GIEeQnOL%c;65K#zUxkuH!1Uv+D4N06
zq(1BBD>EJ9*W-?jUY-7tPr<blvPSk`<%lDm9mXauehedFyiQpFi0B!7SFmP&!$f$#
zZFTWM!&11SHHR>j;7e7=;k|lCi|(s`ch8KQJ!&FuB*yBc4=}4Y@0L$5Ot`vzk^i(I
zzVyY-=(oZqvt3Aeh$W$9W5?mb^AM-4GB*6nq=tP$nLLR5i6rd~JNx&=wHZd4@W$w(
zA0nZLOXt|wg~#7~_%8LN6?1PdG26^yhpVX8N6%piBJV;4zjA8sesV5IwU+3qeKUdn
z>dTAuDZ6U-Ynv&j;P=1zv~iNPfk)*oxG}mO15J`fIoEiT)d^{h=2NWh%hh#k_Ilr)
zlwa*bE9Zuj>Vu?)q@c9@-$wq++r#-9HTWYV=VbXm7g$ixP?2MBewRZTFf^y7-Ktf6
zOCOhQG+;H8q5on3Z~9%!Y_J#r1CX6LAw&%xE>OBAB(Oyu-}nb$1ZXfXT)^-PY<mU<
zF!ogO@%yTB7%3-syF;ZB2aq=)AOO~Bz?=j!zg(FH#B{JGOACW+7Fd$7R)#w)D=P~;
zcZ5?yX>!kDeme&*M#u?+JRRxLho71@LA7bXonCgTucrYdOXwW);8FzR?nISm`S~Nl
zTTqenSAnPrXfFIJLyvNSNF>k<N}i9SqhJE{k&{0~4O&46O$dX-0=W=C1c4_BVCr0x
z)Da;)*2v&%9LYkGcVY|+c}@?h9Z76W=ycA&MhR5}rv$?~2Y(;XSINj&l#CD9H$c0R
zjsA~~IpjC*>pEDU3v+T_Yu>#6|G5AI0`)?p74Yr<LUbH24-eR$L6L=9vnn-dcDwUy
z9MAKd9Fr&57pCXXwPFm|szNScLxb^503|M0B>I^zXDQscP|lAq`7S_E`Sj^|TAIBG
z?Ysqq`G9wDTU&Os)9-@ObqJBsZ;1f?tRQ1RckL|@ifC#QJu^A8IdqUi)tS|~RtNno
zJH7p;FHC-1n=v``F+oNxw^f?0XpwB@SP~h2kEkT!^E_1_F}(XPY+Nex^<5r`Nd$2S
z)Dup{r+h2^8-6NEfx3i!J)WKLO4WN~MXo&*3GP5)T&$7P`wiha%1JdZeA8Bg#cyv0
z{3K1Xl3RoNSigKL`jBP)ZeNN{majQ5nhfCR8=STVS7eGT*=xIaf1f8jtw%jItHY76
zlNQ#`vp0DE^MP-&uEOAb)hK}hl1}W<_DlHKFEUlB4eLF0Hvcp`+rOp%iX~qw*shMB
z%+CCKfKyweabFxs-+T2ag|rMm*=C8ILZ0I;tzii9otHcJd}p^@bv@paZM|?e441#o
z6p8ya-TgOOIn8o!CF7OY^V~~e2fbS@b#G$f9fl|xK+{A;)0IP^jhhAN^6(@OIBg&m
z6X4>)3WeSZleB?}bApxdeJYzhkc#SL3EF_V40>B|**JwyaiV$O46<UtPlSk+^ynlI
z0{6OMO@Mcq*1*hcDA1n<ZuUSh1K---hKvoqcu4*-h9E+K1|_|=V3Gh8Cfs(8hhz<~
zCjnh&{Iui2K6r~@qTe)18`|1Vz^7Ol1p~{@Ld)nf<XJd?pk=*RZA7!@wOu^>GWU&0
zu7(>sW-WUziJtPW|DFBRzObM<Sc&M_@8(QJdqAW3=HE@m7n!$IS_z5T(kM*KbN_Yx
ze7@w5lNh(rNlK-X3W0T{{XZzmhxm*~{uuUXbyR9xJPGq5yW^DVbt~+qI7y9+-pg;j
z;SOYowYQjY&;j-VQ{Np8!T4XLS{;eFTRd(TFN;^5n%Q{OsQA%V9c9y7l~gg&ol>~>
zO!`TuUkACUM_M}mL1(^OwBDb_a;M&t{W@=a5Uz8jmwAcH*dn~5FZZ^BJHgFJLDCQ8
zGmI|R&ZYyibX;Df8A~S`S;-X1Cio(zW2auRe#z0soqg|RO9k&4Wy&Ned}YoEHe`Jh
zZQYk?99;E7T%~9IifKnMBa1vX<?oz}$E^qV6WFl5ZrrwcR=E@X=`Oo*Wd`-126ur+
z&26;i;e(pH?h}8<k|A70y`}8DZwmKZEDdcdLTmChq2@OZ^Q~$lC-6ruEd|WGrgG9@
z)-56;;<1N^@k_Q4oQp8DB9I{93kF$xN6V?w2lV8oPzJ9+=z7w%?c2noksdP;V^0iD
z;8IdKgAXC20LVF#lBZf^G~DUj@DPF5PuqSSGX^|6o7(i$%0DDcM_-|#8O%-idzVRC
zTAG*l0r2n7r>>tfV7mv5knRv-{Qoe6f-{}q9@tDb78Qk2S|vkFTTIMTjZ(P%0wE^c
zc#FKd{9_-+FVbh2Hx{j-u8x^6&Vj25F#x{O`M@gfv>g3*TVoTRzV9P>+X5RTNH`P5
zI}02wc-6YM$^U!w8HCc=v-wuxiAhPpU?Tyv%Pvp{gNa~P);)-Qx$+Y>!vFN^tqdgr
z5vVC{U_yZaQmfd}jI$IN9UnJ8T1|C?gd-i7`dbVi{xV);b`10Mu~3L<^vjt|m+QA<
zM>tzKksdYM{ibhX0#Z|_rExc0*6<kAB|ldiJ&e!vK8_(`?#g@@$nS!-E~{OQpC}Vb
zZHxSW<ehb2RnPbK4-G0Q4bt5pNOz}ncZamJgo=`aG)Q-sbR*r}Dbmv2aL@Vp{XL3%
z|K&?g?6ddmnKf%&>wRgW^+S|ns@{98wLtn57m%rmqd9q>3o|}4iZ!cQZ&m*M<##{1
z?%%z`sgX>F!EFPlTu#?aO1?s=ed`Zf@*G|7E%^4)k4xeUO(D0)^=NPRiYBPBN~ovy
zXmHW`k#nVo?d14s2b{kv5z-_SFqcUs*G(%91wIp;F*G1cypsperb1yi1B(pT7Bf4G
zI?io@QIO&Q2WXUrFcN%JuB(xvi2?OZ0Ly~Xo#+J{-C-d>#%|5Y!D==mKATM$2%4OK
z83W@O^Dx3(PD;v7Vf=`7Ak&}c3+A({jZa9(&jkgtDklX61<(u!uo14OrrXhRaht*{
z#UZn9g{bK0AX|N0o}<tb1463U*bUNJ;hI0Zmb+>X+M`R&k+iL^<IF9`MkaW{7x^3M
zXn6X={pVy2w%pr5-USH6wEX-{i)X*o4FH(F?>qUU(!JK0kzelO`p%Po9l0zOmgeHm
z;+(!)D<$7Y(Vs@_Jxqw8Q<y2dU{5NXy$RbOeb7Pg_BD$sqA9G1RRL3&V9v7mkO7e~
z6-)-p#Mb*-PF|GvxYr^9?0jtmVJt7On6%`>kv#+1A~$VU63a--e^)CRNf&2-6|;(>
z5$RoY=ka|7d%@FeZ$`1TYi?3HQY82-J$cE(B(#LQhXAC@ly)>|w7zuOS&I6{Vu^?~
zKK`BhJ?`+!x#Z#wYY-jQ4fXYi{CMOLyF%G6kGjobQo*qNbmR%<DEs7$*clCq=8M;l
z<c9n_{O>!q1`_;B4BU0jKTwmNcZUR}8U}UF!ivyiJe!h!#e2Iq0mXA#FSyrVJP*89
zU*3d!5u<wEsW?*9!c3Za{##^jzeITHHg4YFRWX6k6ptcnWcch0`}ZX9^;Fo}$eAE9
z2NGLQn+>LKP#WMpNKb$VWH*$tY|NUIFV(?@7HCSlz|a7ZH4-XewdaXx!XQ9JzXd@B
z76iBrUV#8X-)&k0z-~a`R8m}g2l5@Do!SyiO%Pr^A2d)!0L3@ZzWj!cZU@>^4|<ZJ
z88VQ#KqGnsC0;{)BsCbO%_0TkqoUR>ydQEuV2UB3lK}6~kbZOT(|%egVEKz!CFq8L
zCfotNLrGp9gcVqKEYL*5@meRq@sI$K9w5Gg^ayaEdk;GHKwglKt397v8J?Uan7-hX
z?bwQQzU3eI#Tok#A3k*XUTNLmSM4@zf1*)RkHmL&ft8S9OV*n$b&X3vTJ;O+Ft+}%
zWiubAC8h6Uf8(*Lv*KH~%%{JwcuxWeK)#s&Fm07Kz=woBMb8Nru-YAc5D_&bZ-D!@
zVO|Xpg3J@h1CLK>$oNfF{v7f<+wfyO6|oounac35wQEAsQg^B=*%2PpWF_an|4Ceu
zgdkI*!L06{eL?uk-XIUrKIPwkNjNV$e@Gndg|gS2Gia<7?{cD2H>@?Oq4*iX|HTsW
ztUWdQ;4lk<`M2_UO@50nGFzr$&{*G#i~SPJKlIAn>`nT4xU=lFlw$Z7HP-KGyvuBP
z)#US#ArP8mce^b-S)JUe*B9&P5r;Hy@AxxQRr>RnmsbN&<bmQu2pDbylSH?L^Rvgx
z<&wC3!NMvdnA$IjIS)EEZVXm$*ZayMfhX<2z+Z;V+pDZt02a@mOiE9Gg6bTABRyzc
zUFI{1cLkHTkU?YUz;fg1kxP-lzGJ7@`Wm(hz^0*sp+TE<ZF48Ak#)K4)gU9W`G&02
zN;86zyRu$ZZ<gQ%c?3?p+j9$=w<r(`SL}O3iOrRDY~4AFUHvf3<n0GNI%KrW-Hlv0
ziNfF5KSqu%!ZD2D^|$4mqgcM+K{l`!AbmEhn6UdauN+KGW?w2<dL?*;EK_PPw!}U5
zCdx|tvOl_xogw_GE=Iriezkv_|D?5J>h;%>?(duM*UU9ejgqB{#*IC0D4&jj%v$)k
z9j?UhkInYbWLdNUDDbE*y7dbkRKSUm3Udqc7Ec-yKD`Bb5N0|$0E`G2wgdzxInc;#
zGfCIJ%W|94;}oxwD%*IQetWbg${Ko+2oy^?-=?js+gowH<5ufdv9kNq7zC5*BRZQu
z&06P5eY~|T?X^7!y4T9?)~U?hauk9kj;69@eXf-|*Ru5hrv8PtPBKU5cwXO`-4OXt
zo8xN(=PwWVA07sslv8Meq$lL|J_-(#dWS$__igW$X;Hr^=jS49Yb^M4GDPR3WM%oT
zrsYKvx4Mh@1{!NNe5F`8WXEKHh{T++QR}vbCD7y^24;}s%YxIWoo(C1DnV8bD^{m{
zO@px6WBv2_!ly*oL3w>ZR3~OH6_wH4=J{SgWT{M5kSmJsjAutS@dP_4**#JBHcepO
znsZ?D6zp(;q0zx&9cY%`fC@?gxTE85JuM9lpq008pP_sLT@ILHAVmbI8es2MqJ9sp
zD|ZM2t`P<nRx}t~P-+l>U_DCgLpfN0l!J&)GPJ}FwtPaMy%el3&CSiRvDm=vT%F<d
ze+Sr`&tH)|cM)*<Eg~t|4T1<jNF~O`mY@f*0w|V4=QkkP0IlcSb!f|g>jCJfCJhLH
zoCtN{29`vId89%(&oVqdSHhY@dXi?nHe8+s@RSg+>vQ{yE}llFNJ<uEia*$3ez3Q%
zrIe<EkjK#8XfF_^&01RR9kn(cT+Frvx%WrIa<EK^Z+0hf>MLNbx`jpZRb9-Q@6ZO)
zf#!~RmCz>mIrm!O?Aj|PR3+XSo9Z`K1609~R0c?jfxQR2)8(RvZcC0oK_eCRq17)(
z=S?`A?~8ES^E;q4!cR{hf^velKZb`X*ua$UCp|tA2$CqSpuGi%WfvM;X(0feIhih5
zuXp$QRY8!;ySEEhs@QGnk``0Rgs^RnN>*h%wYiH!{%CO1Hh0*&J@cenlaCAZ-_NDL
zO@9eg9Z)}mNhlsg6Wal0msx90uv5fKK-sR=Uy8&WJp<uGNzNv;rUHDDZx!UTgZmJq
zm{Bz4&xiUbmDBZ{svGQz`O!%2?#ep#Mk#qG4>;$-5JGdc72ER@B;m-yid$FQ!{HFb
zQs8Zf_Je+;;$(*Mq)zP|rNG9yX4=|dK<GRVGQRWrm7|7xLB<BNskKM2_SE0+yWPAa
zj&_k*_`~p6dy5J&znGDxLrh&l$ie1m6AXpbkbPk*iwud?d#3*>N!k{2{OpoYPc;0E
zG~??&9%j2k+;`t1#Lx=a{<L(NU2qa%qg;Cw#R}jAD%;y1fHXfYHa1n149G5F0%I;p
zAkhQx8L%o5b6SJw)l^m+5+I2i!%`t^z%mPhR~S&7&jR)b^DGOCix=L$AmatlRzL+f
zhY5P+w9UWI?pchBK-y1B?q2_V{%OpO#$2}+=;7hofr9YdTX4`IYjPJzN<!E{E2>+A
zK2v93FeW9?a`8r-dAES1=;kRu=@|p8b(E-Un#nO2dNCsUB{mKW#I1qa#N?~+3KQxx
zRa8QF{5t@T20%N;@|*;tvi{+k{dsYi_U-j^Z{Uf2ir*h-cGz<SC)5`pQphTS`7&3Q
zQr+4;jc1}ZP5MVGNo;T*NDG6!EkJJN=jT@iuRnL!2X6Po#2*fS!$z$>%RL7~7AAy8
z&<?JJE68AI4<wmU&(X=L7#a6KUEG46jCD?3f!>1R;Yh>>0j~-6IRuwQADU#@;$P-M
zX$+QGRh_SF%eY|N`Xo^>s<_Yc%hp|sl0kGQp(5Me<9QZ7hOvmM5&(<*xm2~vUq`_b
zsqj`X88AmEKbR1VbLRUZ@f-rOuudAGZQ^nvk)9Z2shpa>?0^Kro|??59WFq`*U|%_
z`0~S+Z2{S@XYnM1o37VHrd{(bmZZ+~Ud)lPsg1mmomM8bqJb4!PN{{ntE9-}k-b%*
zfB>n`W@56k*8;Rmw{9G*is19A_hR%Hcol8H5Ln4x&A>p=``r5SDI<f-pA%{HjLG{?
z3b?Q>C4P3mP}+#-3kGoT0tFCYTxS*slHwqqO%Lf=eY(*$!u*2!!rs*q!NC7@zPjog
zH=mZ^?V5&$h4Ji<7=P18={MguR(yF512K64YeMu<RYNI)!79VMbMWx#Tnzy?GFDoT
zdsZ7=g#kQDd;-i%xltra8x)^Zi<IdA6#7Lfd7zc;O<NDbdBsB#1)+pDN>dfI;0LOw
zfHtnG8r`Oz3-A!&x%@gkY^?KYv|&mt@<LHi-`1Zy=Bc~1og%QSyBqA>uUpV>y(K6p
zXl*>|;%gi`X<v#^z#~IkWzdkvStM-{1}F?M5&mKlLjo{XyM2;Of19Te2iykd+dTz(
z!~e+@kz?X7;zM(83ye|={Jv~=iAfrU5$ojQAI^t}P4QOi^j1@Am%vVl@)!dtQwZ{#
zZFTxJShw&!NiUSIbARIGZfNlv$!Rm;Y8oB`>J41+d2g+n9Dt~90!j@4(g*{S=xVQo
zhl&B3{U?y)YG^BI63({Q)QmUU3uHgM&7JKW!wPo>K7$aDZF@3K>;bH900X@%6?r`G
zaheH^4hVpJCsP)^o))Fc4Qwxr;mwUT2sAQMkVDQjP&ar$59A4JDjPOHh2Zn3_)t#>
z^Qy5#I@p6pT971CwNSlK-O6#OLbnu{j~7pa6>YM|ALNajmdhr0?<X^4j75>%aN_*A
zN!b9!ivo{wdrFH9u3`RN4?YqSQu^z;?~=Ma#Ar3Tz6;3CQf6k^>I~D?oR4zBc&vDJ
zu+K*<7_c#S8QH{$<IQ;TCHziZQ<P25en3Q+Q6~mE4lZ%gWa%$tJ-?|$?NB@9muK%8
zvY0yG@>Q#5cUsYNmak!+jZ(mUDi51^!O3e!NGz>wBbH6EDRK8U5DUVvUDMfNDOEJ#
z=Wt!U{2&i<YSr>`@*BrL5?hBd!o@5jrd6q?aEgKx&i-m@#U=2tav4n9066MAkT?R=
z1C$mCWT5U3_rSyoYFPtRNx*)3pksM7P#BdN0tI1PbN+lW01TEvJq9SytY-b)y3@~d
z7FkpaL6ZF$1myPxz&wyX+Pm}<KwH4N3?@b(bS~bcauE^<UhhgFq`>3FHw)`N1>5(T
z*;yb!dJpRRkG6mEzbJA5;LAFc(+8%p2$18@3ssFz+XeKEIk2LGW=?RhIw^gZM^aJ8
z1#?n^c1G;u{n==-X6YX<KVZ(X+{&sn2!0Dt@CyKG0ic|v{Xk7At%cfugHi#oG{Yt#
z@vAIH&MG0Jm;iQU%xT`qb<`3h{AAa1DDlWs2r_+P^@=@q`a*Uw*-PcG#RB$MykoLS
zAko#?f4CA}{=HDKs%G$z=qlOBSo3Ze-?g)IPa~U3$RouoB$g?Z&bWlXu;>@1G;f}u
zuU6yPHOh{mQpUJP(*fc5?0f{=Sjm_(%8x;KTiU5t#UxQ!oP-dr#o||DW}`I>ZSr$<
zI-u`UT)Y#M*nU=30W7^TEm(y?dleES9wlts%kt3L(b1~u^-<B*7LXhc53~ct7b*l?
zKRAeLVeIw|SZhYpg`YrBvzuuK+mpC`Q(OC{e0K1sb>%@IMyV!nOi_ja-)jTV<=E?t
zzuwa$<z5f4gp;@xc;JGbh$GW8F+tOe!07%K6heR|FimZB<Lm|~)}Z$Wcydj$5C9|2
zoE&pd?gjbiKiE{z$G|0|vx71XZ07$*K!dVpcs(*>H8nL^j9QypTOSpStN;laya}*N
z0o&mti3_RIxQ}s2@Hsc%`o&1$>Wv0m_%*Io|M*O4XGfmJR|QZ=OVY0a%R(yC&}J2h
z!%f~ytJc@g_jg!wXi2RvG<xvkP-bcFYgP=0$cb}D5Ft%<_WozOU}tc%h^!$MPGhJ0
z=Oqv{#71{zU_z>wewHuP&XNtDV<176QXbvoYJTT8UhVY%q9iH4;x0$m%EOOEnT#PR
z-cS4lkt~{|)>g4%<eO{kq4k`zaZjg<dU|$0GOdT437kqJg&*-DU?C?UBpl$#ehn`w
zqo9xiu-s4|c~B__0`tJHUtxj&yFdjHr-L2{FjsU~a{}jq9jL!LkYoa4Byb+N0@&F{
zP;}z`aD5EacmPKO&B{VN0ZRo~0T81Fwe24!6-fhQ_c%EXfcgRN0CrhIfbH^V;F&f$
zIH=d`c>+i^uR+fnv>T0zY6IkFON)!a#(4tj$rmM~4T-u&r3ZJ2p(K%>(F~7}P}k59
zRMI0rJd<9V`y`A^p11M0QT1BQts-DYjR?Ag1PAN?Fb;nXB<%+aT*mECCsnX5A5LVa
z1R1{MmzuM=@um4rau(wLL{IMixXWR$#^K~8s4B=3j4!Z)q@5swF%l7w_RXrBf<fB)
z-{!+Y@BxuE_A9g1zcxx{20EfYOBGOw;(9!+<kbWz8ezymGL6(Ff*svb=X9QAHG0WO
zouvyWJ2eVfe(72<E@(ED%<FWSNe1ORYjvIuTz)qE6FQ9+K8afEvlzPIw=h{9jWKap
z$m}R=eD(wW@AP0tZ1XS5xmT_3=+4H!`I|L_r+xJu=8sNmdaZwEbIrrz3@lz96g>59
zFm%iO%~nFMZ1^z4LWsVPyKnf%>3Qv$Z`>UmNQd5C9>4{Fu5S0U+1yw}B&4^AY%q{`
z7K4R}2@a5w0I)Q;c3=UotD=&rS_rb1jB#?HBnlNJfgu9eSoU)j<O~ddLAnKi=wT)e
zp<p=xT^HU<=_WrF7@Z;CGVA^jkaSB=MP&{Q%OT(YZLAZAEuUCEkp+JFvJ9Y7kpic|
z%ML)Fbhka63~W(6fNDYrD2qS~Tp&iox>5=cOq=1;eMlus`vo>8=58HZ+yl6^phV?Z
zzxEZ>w7_@-$&wTXmL~l&0DT0yXFx!%TU;)M>6zPf;o{~72KPYM@gSt|3;HBeP{6>D
zp98%TY&%d-IX3R*0j3m*p!d|?g?km8mp~c-AA`@x%cDss_Ca9IoBpX=3M%^Gzzy5<
zuuR7ltdNj^MHVBQpFSq_3LE)rFJjd<6AnR{YHBjE5FNa-KYWQV%U@^KROjYDXk`(f
zesOAzP=1MA_D0J-IX_VOSwKMa$b1+D1YW<!zOLVZgh>(PSMXimL^cc?n4C?UQJL`m
zBzo1`TI_4UsyHSm4kOys)W5;v`uqT2cDc(v!%M@h*Mwg4wjh66!{n?sUI$IL<pLH5
z!-?xs(Ec!#%;u9OZYZYJRc&2;C~@l>cmXN>?6c$BpIXgRYuP050AxxfJl9+~)w~MM
zz5SszwE~$O^d$jc0RB37G#q|y#Hxl!aDZAfSYTj9WMqJTRHyO1jfF)nu=S3JkU~}j
z?MpxgK@%ejtS7#H1u+Du7uy5lQF9J3Wdcug;88j@rVJdck=ML{Xzc`)X%avaV$DLi
z7Raj10OfeKF0-1;FfcCy_veBDn9_GVXB7a_9`BYk>cel5k!>L94x~z3jg5@}LISFn
z$H&JYWQ})W1U#Gi`owh}kdwK^gEs*(GH~W{xf6RAm)#a%+1$meK>h;caHHUxfZsf=
zQpy~V&w;_5<VX+qUw1lCxqkWt8`cdf2T-v4!opw2cITA&cZPs%1%?*DsJ%cGH#Ic{
zj!UE|Anywf8DTgxeKobtlvH4|v*7{-;<o{n0aVx_Ah5)GUsqRGu2;tbNYlWAosP~L
zR1rWc*rQUS_a30B0dT1z=-MZz0&)U^aewZSkt|r~d}18aguet&j+V-0@0#3HGHmb(
zBeIc4jG_xuPT_Du^Xeo<4^s7pc<bU+n?4^A68YQmmOeZOExrHH-jbjMgAC;7+UQ2u
zqWM0Qx;wtCxg$D8_ME9op0ho;!KPb0B!6{~_IHGwJlq`Z*E^5uDZWPck;<ngS?N?W
zbVx_4Hy?ccvY!2kddk_7KdCh*VX5YGt1{Xw;L*<a>iySif^SlwBl$^<B>{l(!O`A7
z-~$f<+GcPdy})nx-XUb`^WMW1!zU&t#=`ouN|!JQj8wrf2c#B1?Px!dRj^lmc7F)@
z7DVlR)EGs5$+x<REGZ#z`x-eDv;u%uM3{C^(*PwNFav`{qZ$k&Ft4!3@wtwB1KBtz
z8!m$<94t4Whq_F=GRuyZCT15ryMBQE{Q1+Tvid)vWvG7Ksx|-Jq&~;bsz1`x+rZ=x
zFsk5;*(yH(xFu96cK6A+5)W+dg`tx2z%k%_3gi|?pukd=N*3sTxCCv2i0q9?NC06%
zB@kOe85oXAyZ2-a(5L-{l%RL%$kGdwE)QRBdJKeeBHKhj+L`DNS2Ajnw?4BA#&AMT
zRn&VQkBH1B*7hvuaOQLh(dub8<@1f8vr>&x9b&$>O={zv&cLS>>bq&3$Nif`pNTfG
znW08@E%gT4(Es=c&%T&~$^ZydKo?yRXvE`jB7=CgtlZz<gBrC55S&5j<<KfW%PeTE
zfO@X_(*kXrC%76xF9=RS(i#m=GuNDeV15l`Fa>JNqyJqPYJ{FSa}HoX1%Ut)14}l1
zV0#Oow}5Y*4LI>2g{@GiZW?|CY&?O?4z%nt0d+daLj&wNus47vyC1&v0pm-cklQ}-
zbO1ssaD6MP<3LLL6sYg-Zn5FMCeBMp<gAJp!Vgkv?1J1!(5$}sZDpsL$L>yu1oXJY
z2tNT|4*4DFe{ZI$iZyx$aBu*CDg<&`z-nf68|pOefcpaQt<J&IDlH`iWQfB+830i8
zV@X6nDEACf*0A*hOiEG`5>(VBo@<Byrv;z}L&+Y5iN4-0|K@mK^xW#&T6Yk|6Mm!N
z=GLN!Dwwv_%>DpS?4W-H-z*$ZaA6=&L7|p`n+B-)0DlqyYjlNHrj;<nz*G;?Kwug8
z0W4KPj0apxz&M4@N38;NB9z|Hu+9Uc11;S`y7L)Ax$Dh>;-L$AC8f$86ZHFC(nOI#
zi#<@ZVnM)U0VX1_*9872Fc5H>gn(~lZl2S-ZsXv97y$69xvKrux`>}mfJO&OIIco@
zjS2v<dB7wA9)=v7m^dXZ4K3dl4+o43uqTi(cFzFtVi0NpS8a!*l@=h{5l7b0)C99I
zK&@_VZ(CVdV2TY{zJ`Dsx$~Z{cB<#UQW;b$Tf!LD4GRH@Ng#Uw*uh`;0EYbQ$hr^*
zN1KA;fpmlNP<PSq87!oyO(~iBzbomI_S?%3B#ri;fbJH`175wD1=>YZkiP_H<KEN)
zwRPHXX?W5qR3{`Revz8x1tpVJ3<W%nfG3otUFW|M7<{gmxIG2(lQ2+|D!}|WnlT1i
zBB(n-ArNh$Kyp;+2Vcq3$I0aqe*4Nr83@oMkO1A%t@@Z>e6bU&^QuJn2%7nC-VXhm
z7sfd1bj(a=%+)D7pG@Gw#%!QN`OP@nwrYqOl{SW+Zo1`H1B9gzC^#1Es2;p-sj&K=
z9#)^gY~lf>ApnyRbp{P>ZG1@mvV*^ryIAnz!PgNEOb~KFjB9is9$tzml|C&y8m}UL
z+$$$t`7KXCB{xXXhS$}w$sKSz3*YX%1p1l4gax)+kHB{ngff-W@w_H2H(SX7GR4Tu
zJUlq~90dg?psaxs1^AEzJ&>BcW5b8EJJfj$7w<V9-;uFBW4eULU~obXb5tP}|A-X|
z>53K-41&E^g)dBJ{K-OKEs~G*v?tPaw}^h-9dxtdyL+H$a+MJ3=qUUtjs@q2l&8UI
z((jzi?7u+vXayAlb!q{+E_hL6;S+%)Cm!%qn*iBC7-)RKy(*DX1<5>+O-XXD&(1dc
zkJCvHu0D{%L6r6N1uO=z!`UYzfb2`8twd}t34`1CRa#2k4gsTEu)Yi(^~1JgQz8#N
z!_^6$gr8(vve2C%GTpAUWGIH1VR^!QC?IHa0W@8y=HS;L_Oil4UhrH4V=_RW4?-mZ
znx|(Fu$}^=Jlh)3n4+r-*xC;*>{i8~g#<*Cpt(4l9T=*490oo%Ox-4QKUutkL6(S?
zqaOb%pe!PJpc2^CmnZa9S#?pJW}BReN)%)h?G(_Xj(@V5RrI2w6~`cFeVCsm+p#`4
zzA<)@aug<`gY>#Oo0zeELesLhSEKLw!#SOR{Hd-nbLy{9S**CVGJcScRzv_SY|bo!
zc#ZLEkLx8vmFPV^3^BCgNt2qt8X41tU2nV6#0qsO!_-9Wd`b&jDurd#m=2FXwGQYs
zK?Xq&m>39<M3%pd?0pSFW`Jz{&n$Ms#Y;-+EAV*)z5<sAi^G<#!1e`<e$cQF1Bq4r
zA_j(9=Cm!?g*E~;O4a-lXtx7vALv-iD1Up-NfyYrpGTvnlSfibXmIq!56!jb`<y4W
zIv2^+jh|NhaTsQ}v%OhTzb58OtU^_WE}No!<)X<!cG#flH8-C-Z_b1_$V+IE%eLIM
zqS@odf!#s%w90Z9eBzcEQ^|6C@8VlYMr0F{r@`co1_6^s{NN_Ih0HaE<=i|xo}e!d
zQUTUbApZiR9JU?A_bnjE1_ubQ0V2GL<5#<qa(TM2|07V&f--5(xjvYL?toDI?ihX)
zUaW83IYIO2t-9DdPu)>pJnw^#k}%{<n>hHNCoPVGLR;if<w6=-BtMB}zxcx&!jpRO
zA`~N9jw%r78s!SFsjhe<(DEZi6lC6fT3NWo5OWE6M!w^OGsC4RMG%b~%s0>q1A|pC
z_RiMHOOyqnR5#M6upJfxNsBoq2K_E#bm(*yhdvZfj3f!O2QG9u8R~T1{Nt`XX)&h9
zxB5cA!fro?!HE@QX0Cysj~GBM{xvW#@clQ@3}|BlkOb{Pi9~?+1~r2Kg&077KL=eo
z_23Ucc!XO#-RON`JCleZhD-o<yYhgS0&I<KfZ`r3Eg-D~ay6&i^~bcHxEN&lgSNFA
zyVx1u>EXsHqmMc0ksRti!%B{k9v<gDhc!R6u4yQysF77IRA_!FDBa--Awz~G4-`+i
z#=gQq{yw_r#=m4s(3k77(^B%Gzugs!0NtV70OP~9gRdyStAaHEKHy)9+bsNH#e@ms
z56YE59CHt9%a=#1{vgTZcl-+p6t$s*eUKd5IPq*`sfkquxjPUO)zsCo>eZHHWEcZT
zBVgRov$6*H5y;SC7_}5C(8HwmjJfKGIpSzz7ii+d;t0R-eyWoWMMv#V8S_v3Di0dg
zMFLO%x)30lkXLdPQeaFH;8^>IVOypQ8Q=w^(e+vU{aDzr=U!f0t(_f%Lfe@oT81u)
z%r(uav-~|q0M%rP-xpqEK;yG&`GVIuQ59V1TWs`ULcEHPq4(uv6}$_B#q!HSY={Ke
zozYYuSYJufi_yKI;9B9ys67l`+>K|D7@cf|H`%CYTpY&zY@5MO#X+Wa-Y;~UYaog|
z)+t3N8!;^NW+T;lZhUFMcuZQ{=nhs*4wQG*8D0Uh0hlHmEtdjDx+<Z5FP95;sY+IW
z?hd*ML6-_BhCnOupyv>b4A8+RSE>m$8v%1@AV!kH8!*X%QQ-G*t5K{8%vywicbabL
zC1|V&2@c-Ta;FTe1oK2e9*v-BfYQsQL5x>&q634frbyD(nHf}1jg0y(g}Slqw-`gt
zHK=Yiuky$XwQQ$v+dYj_homqA(qNHeR%A2s=*51jBDx|EDbm!vR$_<=-O6f(C3jj_
z+M2=r%<@`>=o8#VwS1w|&#OoHTWU%Z$8>Hv8GQ-wEX)W^`|9@s_&D+S0Vfw|P)d=F
zqvPoCFxa!%dU|@+AqIX2KMSaVVIZIff8YHM1j7iT+5qqd9%ZoR1Ei;H_zB1|iiwGV
z{PK5D?F7rdFdv^cKq!LYO$aU@=)F7!ih5A=95Dac$}{+y;<_(VI)Tvw121a)jFYuR
z(^#cQ)pk955|v@j|NYgJbB(t>54jj?6V8o2S)lHaTX&qa80UBV)Ld0-AIIqp>Iq~V
z&lZyO-d_1@zn_{aa?45yfk@vt3a-SnL~@&%s2u{r#N_S6Itfc-Y$VmBj6Pzxyywv+
zA|!@*ea>@D+#5DippTP}>mS!qrB{aig2SJ@ScZlkf&=D8RuN>4nOI-{V^lQJ57%SJ
zX2d~0!o?XC-xKG8_d6}56)teyzVoq};oK-*H@DXITwbE#M@3_jCJ|tGkpZvSyvmh>
z_gDGE?tKU6%FmJ-+o|Hn0wscIyCta{1uC+0ev(lOC2!~WCTD`}2WaI3A+)kw&BA2x
zO9eSU0I!CK(8Sz)Xa)5w(8~q3yp!+VfJ}N7Rv@%Z6A;DWKU#GB0Fx=$rGj<wJ)eQK
zHJ#WF=uuIj!+xNxCnDcCX;hjbF05{^#?0;zH^UTbX~K)$&ShVd#r#EpB-dKzK&KQ*
z%A)Ly`j-f!<JB8A=Nrr>r08`-!gH06PLLcEax#oR?p}A}&m?}?c))&TTW(mqgcp-l
zeV1nlmzOsafm^JLhrBHjwHiqF{ZiAe3Be@@Et0c`vg<;qjU=d2ir{mDW<n=(l-zB<
z2L}1n-pMm~Y&n}F-%Awgp5KXqeT-*=b=a}{12QDxANLj=_2dc_XqrG*m(@rLi0CXh
zbm}(0GN%G*W(t7QjGnnP5S#rke+>Y)ww#2u(Li@E!w~l~7uQSxV5EVt3pBdleh|tY
zFPPi~3c1hhn&4XWZYHf?ujBZjbGO%<%S4jSoP9(P(a#9XqqdBcO|LEWTuxYJa;xXN
zjUzaifPu-1-DjOUJ2)BcoZJGp3!bc$G?$@)Zv~<x9W=OhNb8`D{Be|#Q+>*Y^E)v&
z9T|mCCMS)6o6@01&9r^@-9WQL<Td^k0kRUddR^VGb8@wF(c2YP#@LB^;0nZB2irc{
zZTIG#o`ah<tVJo-E-$|8*5y`>-l|R2C;ewHrA9(IX%I!IMGK{JiJ}DA1>ht08LaI%
zuVRw^V!@l4+q+^Pu__N34}A}T2|>@rRyC9Ig0IkdS6!YOeQ%7$rTNC|1DlW;(<st!
zP8bNCzVA)ACEop2P!syA)p1hdbg#5o+Zw)Q0%3pN2tXXwEsgJAVwwe8ne+CKZ~)c?
zVsd?!tEXp>A#_IYRMXPZval!>sxx3e1K6iHIo97csc?bHW_Fvi!exY5Fa-Ud<4z&g
zRCLrPnJQ*3mh;WWyIPL5iF>f8?!otnY*LaI(wg_@<tYWhC9$F{qukOk7##`J0ljc<
z!kEcEzj%RcE4PP%-at?XFOioUJ3v*GOe4Jf?qkRO`a!8?;GN%(MnDCdUwlOyp>Bio
zJ&sR17ddb|^^*hzD!S5^S4Hjo?VvOUY`RuOwf2EhaAZi794v;e1bq^prL#BB{aIM=
zCFyd{nXl2>EAkw51~<M`u}q%HI4e}JO?q7*lRE+OL%?F|T1&N5lQCiN0SXfTk7^JN
zaKTtu9k_O&F~a|yB)&=d=)=z4WR=+O&`f!37mHCx2gu0BT@F@K_LgxJy-OjZ_kTqE
zy>?jH*?NK^j&5yx?>I<$(2@ylWcFKjla*~>M35x4W;FCK>YM$g^<u#k+scdBYSavY
zH~nVAi4kfp7ovp2;jnWdO30(4J+13_NE;snA|4ypn!?%=P4XZ{>E(i1Lhth5WU=sj
z9l#0-nH&N$^K{-(d+!E&5%PNa3q0li$wJ%xWi~TXTdd1>Ol}t^Z<3%JNFN253V{)m
zw);eNyyaUrmF!XGl_epF$%ae)Aev_N4-@O#ic0Z(1ghKsk-*(`+=RJ|+Ugz~kE<a5
zFpPTQ!><FDVMK^j3gFsif4~f8W8-q{$ynP&JsuuqWU7sS1l%**Lf1DtKPo<Bbf|8O
zvAnnUzeMzXE>_SvbGhDH9;yDwLuYZDajwEq`17hWJGLlJw6m(W`|D2KE1jCFzDQWi
z{6e9CZK^o8UheyAljK({M0}L|yt78yRd~vQV{T?fj{5<PKfTjOXVGIvY2AoXH>cfU
z2~H9a%Jdx6N2W@&&BRL^8tWsFB|e0+vVofRqMq8)nDXKeTw!nPOYH^)obSAH`6wjc
z1(Ya0|6TvKY}#{&5j1UMYi;Ky@?JYSFyX?l6eIY18XFU<PiAaKezFmV2j@lY4`ni*
zZPn{GmKrNtSHHK}y~cs54vRgcTGhH_ytdc%Yp<z3c+6H+9gut%RhDBV^1dkfMQ?$6
zo_*oZS|&iYb{EBd;DHWH71tLLfADza@9<Q}^t6zvhM4;e{rhJW4f>jss9i&tqKH|Z
zgR)2CcWYGoDt=Lu2-#JZG+pLVA~?BHbhP$EUF81mjhVmmvudTDA0pBE$bJO=MAgLe
zI_jF^?0$XCL!jRi4}yTiMjKI|vY7u=RSmbV<Y|BLstL_cv0#j}6`d7<XQ-hiqGq+s
z)8gSZ16m1eH~}X!6V)5#ldZrBC-M6Y5bDy$60@)fs!P)2eEItvrK4~Clz3Io@BLE<
zo8VB|b)Z-oTDT7rCRSEeI3XLCuI(TpPwUaQpv2BRL|1VQaecZML{3TS6zF|Ps&ctl
zH%s(y0}+1J`Di)L-9{}>vy7<VVFj|~ns?T2GQJT!gimC1i4Y}yRWL%IYjYx=7v8YE
z|H*=gWU<vFH89~4Nw`TKvCB1DN^KL#NT5c@f7*inmQkAdcLgUI2cj4qy*pX2xUW}f
z;jf_!I8vmDUM}j3Qs!3jlW|^~Y}j1Otk}Sy9wUz%GXeD8E7EfMGFpi#69XO$*uac^
zV~by9*@gkKl-pkrdJ%H0o~hGlND-RV*cQ+iTbEWj>8}d+AZL`0s~Tyflfb~Kx=??B
z7;4KE>J}yo{QWw#6DZj0@uBeqwW{VzXG@tK?SvUy2TA(b-Lesrfq$LqGc~DYEDMF}
zbnoA$t{9MdUp6)y&%<^`|Fi8t2@29E-UpF2+B>CK&FPK&Zd4SMPzgG=26BlJcrj#h
zD&@=!^VzV$$M+SDe<IDcSA8ety-UfJZC{1+Ms!ZsNDnd$;BbTpDGZ+ZUPe;}Vtg9t
zSb!6bm%~0DTgt`knOl+ey;GsDS<7_(HXIiuAEA@1KpsUd_f7x9CsqhD48-29dc(h}
z?PmvvbKmCGh%YrK9UDOg0X7+zNx-72ioTi*$_DA3SkD&;eA7)11To^|5{VHcNOiw@
z)p;16(Bq{SGkWmn?pXnLyNOgkZp*;Rs?1-_?~)`)iQQ*Y&r2?y!2gsC{XA`p#-w}^
zLvxdk5$<gIK7P`KU%4^AtU761Mf)9{*q`k+FC=mZJWFzur{m4qa&PCkXynk-QN{jA
zEaKJ#rAT5YqU@MQP@6{`*B2?mI6{kbSzKfcu~+uB9fB6rI5WbEUYn;XS_pp?2)_RA
zGuAjw%R;@8T%4ZBxGqp{P(T?ImM^?_Fn*1L@#k)%IN_=0Fa0;zN4~2A*=&dQ;OD<N
zx}2kja@O43kggK&-tkw{P;*eep(u4t!#+B?flY00%gHM=pHFzwemKKR$lVn3TP*6@
z35QsAZt(8c%wOS<N*KPCk@&7azE`X_@SeD|#vXw`A}*=<?T~>GnXe3_YUQ|@vjaok
zH~OL1qh@NVn2XOo_?a1#P{%eNJVb<^dEV&pES}N@VM%!V8)NSh80wG_72CMR!Un;o
zYw|x%U!`^}<aE-PHQ<pc<%=)IE<3rtrG3#77V^6u61kR7qw2uVSCc#txb0r~WAJG(
zF8+IGR(E&q<p&p+23MaHqFQEus{`Ml<h9(zl?Lx60km|>LP=!arI{zaC-0JHHBD<j
z0MKO}sWKflFN92jEl}cqz06*IR-nymb-VmwJ8F++DJe-nyZp6NVlT2;S4gA;gj^yc
z_7>hqTz<N)NwaL}a?~AZ2;Xn{em8Q*tp_smNdlR=7cIr-Lj1`8X6IL`<R_8r=<r`_
zqR83L38duda|7q>%Whu|Bh`_rGOWcHKM}mbd?|8(1Hll(GW&VhyyjJ-|8&s$bf|r`
z-p(#Ap6&K5j@aaBwqB;4CkaQAj3ub)RNMYnnVAFn25Mr3Q_pMiMw#^AsQZ=Xs@(^(
zZ5bM0?B{OsI#J%1d+vNLCv9|`38Ftq;t0`){)j!+ks3T+<nOcFhZTBE2lli@k)=!S
zT9R-w?0q;D#sguqN{O^F18~Wb(W(p>CgUafpK4T$9*_UFmP!h-hA4(j?Zh<)iYF$i
zbzN~AS0p50{e2nOGceO3D2d8-z<{&Fcd#i+;Dx*<5;BEd5E!F0l05mY%;0K0`9=@T
zJYop>+Z5vzN8kxUN(_DOZ`Zqp9}8husdUUm=>pah1)14wPVlgoRBCe6%^x}n2n0r%
z4g8;ch|((N-(@4dm6Pq;!Wdn9vN~#A&nY^5iu6BBt>s_zG1GBRSV_i?Go;GM$U+p~
z#|p`!884@iiot(rHmGIx4wEyKLRgY1BZG5yVsT;+H=en|zIIrt<?5=J>ui|`Ik;&M
zQl1|PS`GzWj~aZew$r>;T}QYWLyL-Nr0Tix44xdqzYMCiv3lG+-Tj^KUYxCc8(V0W
z{)+3E#%xZq%0a10SB#`-FE#uNt8eYr&Gu@tuHlJldYO3ipNS#M;-bVUO<#GVbL_S)
zHHi;D>SlJeo7_%<<Voj@N(|zsofkIRsud5$!gu4!_SjWIX1_HQH@3dJ7(pnq9XOn3
zdBZzEXsR+jBTxISuDE!`KvTVDd$A=q_u_R~W1U`pnYZim*W;#AYj#W&;a6AkJ~G!$
zVNdlo^E6iW<*_C8eREDK5*iwkn6UF(Swv5}jjMa*bng4h@7bQ(Lw4wrIJOF-K0B}#
zZdnq2H6@<!%h96|^elNdTKhTqg0=1(e?pLfWyrFwW{aC_-G1NZm}kJYu6#*FQ=j{0
zAUWtVd8+xFRNJcmbR0JWg(K_rgU@QS%3evm-;}RSKf&d9WTl#6#c?;qSuVw^rsa`)
zGbADX)2lDZZQ&?Uy)=THCOgcJq<%&ABW0Wbq^IaIV-Q37x^Mw6S$HA7&QCou+50_l
zY;+>7Lu7cQtft1xqd?1y;JlkbM)_vHqw%bJR9>Uf#kRSUfp6l6*YklvlhF$|KUiC$
zj}|6tVFaaoeAE0tc<x<Z+_`!j9l7X?_RUosTQ?Op(3A@@D^3cqyj8S#7=P&QzM-CW
zxEV%$Yb5w5zGAGvvTZYWbbI*S<iR14pJT1I<~x7??)z^#2C9E1|4^EWiFew+X=282
zoH8W7_iJ0+9d~QlPt4BhQgJ#?oThon(sr;AHMvlCFyFizCpuX@SCB*0qok#-bA3=8
z66%?9dRKS&H!g|<Z@j^G!NRhNS%${DX^cMf&--a&#?_{LV`r1NE^X=|h95d4Nw=a;
zB%NaRYs}8**aowTMVkvM9z7C0ZEv1N1c}hF_xYn|gcxbq1(-OxEbDaaEp?}d#WA%~
z^(L!t3AH3;6(3jNYTTmZZ~<`9>MMGR^_%%0Hxr-Ax}~U>IS&*3hA-9z>WJf#C#ojY
zeqv?+=@j2DeJ^gE-aD&&#K6DRx9nO`G%)+RY0qF*IBmIBx~eV4&7ewWpAyk9zi1#i
zefg%mJk6kfZN6VTO)mVcOsG5xdRXYQ$meM`Of1%oI~OBA5Vd?Q6z3m1eKZfQJq|<G
zmlyj|XNQOvj%<tPXTn!sz;P;uA&0eEOq6F+WEno(pR^4>Xgm%5f@^wf9TQFRk^qk=
z&bsF7n7RXQXclof6P5w1kn_8v%!9Dn=B>+t%~T(yro*FUe*Kx|q=;a;#^}_QgZL2i
zX-;hW#ip7KYZcAe%aC1XOm_FMQa8(x89^3D@zkbc<K4<muLwQ@?&F@qSpk5wT?yB!
zLG>FZeU-%NHN@GrmDF}xP87edVEZy{g`AR!kBW(cs>|WE9>Mo%0Re~d(F6N}t602Q
zY!U%=jorxbc%L#~w>gWLeR9m<N9w+#s~W|_)8C_p-g}ezng{nqMW%{U`9nG9*THja
zde)4?Xd4d4^{HupmVRa<MNX;PnIS4Pc-?;{C`?WEwK6#T8uhshwJK=z_T0nhU0R*<
zYIEfS$&k$6_vksA4iX$3f>G{ITbXojBY(esZEXDd1E|dHSDLJ)f7OoAZ|wyMzIN!t
zZgXdO?Mkn;loc%VO+KCPas|o%u?>*dr8QMGranzP*wwLWw;X(Syd<qnrV^Cvct<d9
zqO){a++f|Pso~$XUCytpX~W*15usy-w8CGq>gGJ&KkO-5$S$ekD|B_Lc-E?aHIesl
zr%178f`x0TIz4mEGwIegyIxorQ{*lCEv{eO-)*mJb+LrmlT=wlBQl<NfVbLm?Qaqh
z`ex$$kY@?sF)DS3i@YBlwVa*DILmw2C*A0o@l<kr_>*`xk}@C4!Xjlc4Q$5uNLV6W
zXFs|bmH4`?ui(VFnW{)&pV)q|vsNk!*1?QnKotsk5mWi#aJ`%OWRtKWt1+ZFf!b|e
zG}n01H<o!>vY&Q$dAoc#S)$3F%!unBCw$|5=X>Smd&ql`^9YP|Zn=(%n9~mhf7%>!
zJ92W$<QRyFciX-U;|+uqC()5as!iss(C2DQuxot1XmC5vNcZU9w_^|Dbo9RPPgakO
z{&|0eCs@O>o&Dz-N|mfcpmu2{b7g7F%{{6^LM(rmc|2ni>ix~N@a136Tnf48%YJ$)
z>UF&hZbzrXO$n?EoOZs2=ij0NkL%g=bS11+pFezM6;<+xTh!B78(K7Ed5AVwu5*qF
z5}u-NYMVE=2{~sF(U@-h`j>idd!Mi3@{IZU-{lDwxvYail}m-NLIQ^>1WXm|nykz3
z!G_vEA;v0RWp3UUB#l<j{<^gIK0iVFwWA8Qiuvp>H&Rv~zs}apX*VpBgkvZ6WW8-O
zxhro2mIUJR=p845$)nrF@*8n)7gu*qJ24GKjp^RHL!(N|!H!&hq4MH{dSnz7oZ^~k
z_Zw+7aY;PP$Q|Yjz1yhf6~y$n`VPE?)b@WYjAx6>8cq{hnqJf-mM~ut<QDy1`;>y7
zAkgN1Mcd0r&8Oed*==f*YxehqvtCeD7Z5!-%2qY%jLyt0lgqpZKd>recW+-E@-{mU
zwo)Il`c@X`n%YO>zQVCojI5x2Y0ULpQYGcE2=T2&3%{dJlDX7CMOrouT1%Tx(8tW$
zNyYdg23s#T=cXn;!>9t{(s84wi#FO9O%F|OZ+rr%DT-`Q?OL;`@1Dz}JFM2tsHhzl
z6!WO)Y&d>o!AYQr>5s;q?J=!&@86$gud>PyZ#@~eX)LR=_K{b?uIg7+Gpbx~|53Bp
zTHxq}8&B-)qt7hNAkCdL6ty;fFu~{Ef0(eG=W|!vqG&sL)IO}Fp%G)-FIc<RSGg=5
zamYf?=rpCN$v4m}guePQ`Ljb!poC;kLFh!?-q`uZ{z%e;+DncHKmV}SZ>XVJiRx7#
z7`f`}5e(wq#62i3q6gkv$G1P$;4r=_>uH-yy!W)0h(!C$vH1RAs?4ZfP<OIACYfxQ
z$hhA>!qLs(=Hq?oO*ax6>GEbl;rpp;yeGu}(*mf^%xH0MlNffqHwgQtRGm0%AN`ZR
zc5j|M+X|dJvt=@SZOdkDaS;vXfsDz>R|ENH^5}SRTUzSr%0G79IrB5aNN@=;iEw61
zI|D9vm%&?KlExM7dWRuG&s$;Z@50ZeEa)#@POr<YZS7IfQ_(TVa^kJvr3=L(z$GO#
zeH$vjH}34DsXH5SaH~B;{$q7jtYO37(Lh6E)6+}TVf+xziyz!0-oFfk#c~FRx(oQj
zVv`RxA@da8cjtz$7B6<D${f92kTlq9^fS!Zl7>ji?Tik$F8Fb^c;q7DH)duu>h4!d
zSfJnciZo`fY4E#yVJctE-d(z@!*f;a>319nn%KnjOI|rnMt<WfKlr3A+`N3n?HNCC
zJZa<lEPXsUam#yH=BJ~P{B+V0K!S;Dk6zO&P3y78Mpmp?H<ILQkK5Gt<!udxsj6a@
z%t+=1k-MLPs-d!~=0*X%<F~5PGD+#4Zb{mUeNH;&LW5B>EvdlnH$HeBJ=iM1wEV#N
zaCli+`2*e6+{#OwUfaFmLE>JPfUDy#B6!Sv0^=lDxUZ7tiq|u03tu&v8{pz%ZO(qk
z_a~}z53nfFfex%9r`UP{rO3L>Uz<6TKCUX*)SOfb8q@VVQx#(}r5degNN?=l=tO9d
zTzD_njQmye^Lh99ttfQbH0(v#w=g9YiRtPYYkflX=^TK3JSuSGTUtS^Ys$>kgG)fu
z;L<65qoET0heJ%fL+{M~9h+$cqDN|W?a-!sqag9pmN`eVJ#KRMJNa;XGxkNL$ram6
z$X-))sHy5OiKDoNebVCO?@Lb3RTQ+wVO;P?)(i5n@M&P?atX&P(zt#ut{vHhe&gZS
znVVY2H+o|4GEdlTz!at_v`vVIz*rP2D3q|ce(I?YPdwahTlr%M*GI-vOo2#JMUU^J
zU{X;*QNQ|jNm+6EdmjUqx+(%I`qr-9tVQuooT#1PuAN{{#o1(E<cTZx5uu1imxQLW
zyw-tj`?%-nq1wZ>-!Xht^}lwT+k6S96wy%E;A#-Kh&URlWl3k<X=0)aJ#@!i<zeBZ
z;yq{eM?09CPjqCM(`M4Fn#ZzzE+1@@U6iN!7|v0c=dGn^@2lY}u4tF){l1uss>||i
zsJ$Kg;z*)1C*HdH-kFc>UxpOa8j|!yK%MDuXGLx7HhrDU{6^l|^c_G{3k}wt*ua*=
z6J?dy{V<xmSHVOXr&tQFnBS%7InOTr5IyF$ym!_mnadEGBg3ea&S<Jo!!XQka5&HQ
zga2}@uVRdDrDu5L`h3Fept;)Zy}{K@i>ADc>N`C&hpM~7shqtZW!y(yAFT9Xpnoc|
zl?_&UDVc*KT+V{<bhB>Xauf*T*EVWd6Qtew$84c`i+ATks_VC~Q51H2Pa^K7r@KVk
zi<vK?lU&33Uluy{Yn@CE_qHzm?ZamTkqE5Pk$v9h2-2_vIfuW3jjmm~E0@9K{SWA%
zkZn_5=?R_I1un%S0xG$jH^v$RJ1e2{#DU^Hgj~7RxCg=S<v;{x`@C+ELbobvX~%xa
z+e&X;y*QYjkAj|tBSg*q%=WU*Ic&5tpJ<W_aTy8L4@AYEj>KCLb{or(Hsc8HJ&BjX
zQp<LUSHp&>udRxAhbBD68{t<+6F$iwR2Yif6(=ejentwlYPH#zv1s$*-2dr^37VxQ
zpH8Uo#!T$d8vR(*LbGewYhMEMB#2X`8g^-B=DYsck?t@YOvx^0mp>QW_gq|Ar!DyP
zkmI0d&{x&>(TA$GIXX_Kr@as$r>Hp-F4L)!+WCl}kBnRaeE|fIOLAPl>{O>}iHmFP
zGg3%!76ZBc;;ke@+jWPYz5*>1+4YImzt2jfFVel}#?v?&c@tvZeA!52IzhFj)%NTa
zSJCaV2cuTWSc=lt5#lFe7+mNl|NfIp*EpzA<y}LA^^`9>U-eT$0*|@c|LIybhBN9m
zmA__T;mklDVm0>1gB<?9cQlW?RcdFIH4Xq%c5a(!EJE8Q2PM?!VI<rmcU>=Ql~SjU
zkI(dFAy#4m2+%jw7tqU{&sC0ROVQ)X|K?pT*O{D9CHbJNuECxxIXh~G0F#3Gzmv(z
z+Y9ZdQdDCwkibs<w)3o>j1B4EX<|7IWQ3kSs~5pAhJk)4gscGb|J#2(Xa(e^l<WR?
z9UzcZaSd#6y)X@C-3)|RFaBMgc4`p$&OV+gV{bk+>K59^+}&^)Xn=*Ys5P_kgyY{^
zGp32l>B6gn%l1w%u^b{%1j@gDk=ki22=#16HB7$~(8T=rx&#g@@oxq=23?kzVe`12
zhSQIeT=HO7KAVL6{NF3Kzl<Ke28DQ8ObsrCG_B5qaX$lv&{!=zUfV$eVBiKl0?x9U
zi+HA}8d+3Kz_+~+D8D@7$Qq%f(I9sF;--Xjg7fdQMvxvPEDDFI{1me~t0+3M;GR!a
zcQFM?V@nPc58Cb}b-nucy6ul;dp9?^Z>l^WqNA3$xiSrmqZ(W-Nvk9!y0%|hiKiN!
zrIzagi3&J=Pvp?aeQp}rEz`om)S&3ZQlN1)F0|!H=2(}Ql)!VIifs^N;WPPn?J~Fu
zLw^`mx8>L-7s*H@n%C@3^JJ=aigos0_-6c~ZDc_@^O&8{qF)Ne_;)6yGAdJGvH!-e
zg~^^w)j>l24U1vgv<4JN8}74$iCLC(3^lU$7Q9MW{=J@sJ~XRd;NlJ-$_$wsWlZ1q
zUfMTtvt;NF*Or_sp<reOtIyc83Pl~A`Fv5*{`VehoX{+Fo`@sfTg604Y>h_Ci10{7
z8+{QR6|9%4d^s;J_Ea@ksW*G?e?;p1n=~L}eAtrCd<IRYZMBIK8q{3mleU#yEE%CP
zrV1|k;eL(5qqJVVms2zRiHQHZjin-(-Ro|S13Ib(tUd7BI`)kV;vGZ@Dw0$JOnGLS
z+*PvgEOpu(4?i{^H12E{j{G~d`arr+RlV(N+q0t%=0BH%8D-oaHmk+51R4@dmUj6=
zB>n73jVu>RI?acxsE2%COalM+lT3<=4u=<G^%#m6gbcjh2ve{VR;t5K+DhIS1YNY1
zjZkXFyc>Peo%|3^@!!+~5s|M-EHx@m&bFnX2xYLJ{HerpW)Ofwh_#a4DN@H$MA4A@
z({c}QL+gKEph6SlwHg1mjzrs9qkBE(@fG5WUH-KVQ`_Gvn%-A12d{m9PzCi*T%>~l
zPhCUs=D+z00%?!Tr?Yi4=-x`<szC+t>g@gp?x*!w>L=DL+vEw84GWLtAyZ8Ytw*iJ
zOTXehT{{i@e_xnDc;F;DlGl>HpfK?}4$~mW?cMBp?B*kDJ4Z5;_}$z6Xm**&?!7qb
zJz+X#R&@<EyXpUX=QBwa+r3wIb-pU9I&>6KCLG>lqeGU1^v|DW$$r%q&YK~wm{x^O
zQxT#0)f^{#>=6|;ydwYitv?d7=<ZzVN`}1@IZt*c%GbY%-$m9XmgY0v8^Od6V=a{5
zeUGsJ>cxekvXQMZYQNI5^p4N%{C__-1x`bSXvxj4&fid%Jd~o+gg~w5ZDgd<<I1U`
z--zv`MMp)Un~)0jiI@5Kf{v0F7aYpJ$7GBNTk_;~Y4u%7o9SNDo0usdbCIhxK|WN@
zq@mTT=scMCe)Cp`^Xhc8rfle~ELm)^o$2*2EG<{ZIRtH%&?A6UUM2R=>HSr!XZJi=
zqfC^ZL(;WKFxNgOE3X@q+aMQunB;w9VB+TDa{3nW(#B+SC>z0lD*^;ElA1+_6EP8q
z+u~K?lCFc3tnv8ff_0bv)!U~cH6(4FU4yiy?U&OOp)$_hm7}dUS`KR@r;PtDS;}ik
z6#>JmUZb|k_j_$(Lx0Zu+a0)-2;J-onD5V71>6F1=>~8fq!e0D$Gy`NvFGa@`<tFW
z>Z=$Gng8#B<e?3f<vQSPzPUkQ-T86Nxf{E3ay#B86Rya=&e-N*S?uI<uBi2vXns#O
z(x`++r^Tm#n{mZ`Dn%Ka&-C9F{EY<st0h){T6LEj-N-gMjlU0xh~4EDnyg#I*kqzH
z6gCJs&)MfMdrIH%Jh|=8>ubgt^=mm4&o91wH}k(oHaM6fG|%qttaixoy4h1wBWHv$
z`)-xH`iTp->fqgs(^tA8ySv9aMNei<_4^;5Egb&cuElTs^Y;8WnErPy5W0a#eQ5Fa
zF@bfz-^k*%Y^gNOi~6;E;fB?mAEPcfR&-YEI^_g@cHq8DqdU4z$w?}1o{=u;sTnzU
z&JkF~NKRIhwxIv}w0a);P=>7X`P$!ck@>`|cvTci<u8u7P7>4}qoWpf_h((VBGk!i
zE7su3lK37a6>%iHat7tk(mn5D^7n{rhiL{Q=l(Y`^{c`B)v)(wU;p&B_bNUG<Nvky
z-+^5J@B1+PlJ?S&jG|O__MRnVOER*OY!Z6etHIl@q{vDcDMFI$Q9>#!N;1pd*_-Ql
z*5`NqaozXd_x;b+AK&l$)8OU#d_2zcIFIu@k0Xa;cht@?_l_)k_h&{wsT8w%L#1Br
z+<TcXqPkPtNO$q)fQJ0V<L{48>q>WZH!ECnRLh%N1%E6rlY02y6EBxn)K=RyUYOP5
zGqWDKmitB1T1Y8HtME;(lvB(4vlF???`$PLJE3Th$n;!zysgZ6=y!|!bj`ZK?EFCI
z>d>9lbh*6`*|Y!stbaOoZK)heTWmRd7mVu79_{J_;T%!zqBC_-Os$@~haSdxgC4m2
zG@Pn^s>SkIeaOy(Wr}{<Q!004riT4AB1<$4AM<qWvi%p_%2PfDX41Diqvl)voT_EP
z6aQi)k;9bgq4j>ol6gyCyNEbvdt2$Yn$C4CD(B|zpdiPQ0d611#kcOlIaZABzt&gl
zbI-b7k{W&cML53VOwy{4%(3nNer|5|w8W*A&STY1!V~%hc0I+#J^QX5OkfG$>$L0_
zRYkRYQPu3dSk_9wCOu|TVS{Dz;X`zcbf!BN?K(V#<=>xtjT^o^?GnmJHY&5PXv-V3
z<_oZjxzrN$zbC=%6rA`xO<zkld%C<(BeJYu?2kwqw@yU9(9r$x*`{n?dcHqDUo`|W
z<pePuGNF6j=Cb_lWc~bv@JdqFrS^w;ZT8`}_DihKF4i>*W#9HN6!B=Njc=P3i2lMH
zoM|ib%;(<&-zpoN$QGP7vbI^b<GVMf&b3vqwwrf3Y(FFh>K@eWtNKN?LS88usu@#D
z8#-%0a)^>^Z%~+WjOg#W-uzG=9iMz|ON)zPa(XFWG{q)j^S+OI*pkb(1k9I7pE!C<
zoZnbZFpT!!z3Mi75||jHI$JmVY4~-|0e=;d;yjy{bz6tviwCqnBz^uUt#x)+sfS5c
zW8>b*+SirWsb1||Eu^Yfo#5wQuR6dW9X&hCu+3##p0abU*GjF-_lcOc)YaKl&(OiR
zRskVCl_qe+sb~I$vT}b#c#>#pN7VE<AFk`lwIBI<ZQiH*c`aww97my^>iGehBBOD}
z`GLtZsmUiSW15*_B{r@6I1qh7OIFoJR#ka|`*!<U@co|ZgJb&f)Eq}G*x6+qXFf1+
zx)g`Ui)LzB+~poCni~_B<mS+BLP165Tid@_A3E_RdJ`$?h&=U)Kj$7aMIMlUpdcSQ
z6(@lVSRi($f->Js$liGiBf41}!7T%w@kNf+rL&9vXQ>We{Unk7IoYd)J8N38gr$Y5
zu>X$j`n}d?Ia0@q-EF@aDm^l#3UaflI-Anx-!iISXx$cRUSvG)QE9O+O2wt$H+S-n
z?j4@KN4oYCZ3ULg*}8LuM};{m?KD->YHV_XxYH6rf!1E2Bb9GD6S3Z2q@(RTIpS1!
zr!Z#0*+@YBS>b~7WS#;fEXw7NH$?7fBsZ#e&5^iI2eTQMRrg%y4E{a6B*~a}sp)`b
z-q@wLGS}MnUn-?bQ&D*C_o&ilUkJ0Y+OXGsYW5Z?U-J7vh9Zl3+ZVyPa%HTk=`O2p
z4~*pTbmXspd(ELq=A+a7abR*G`5osb?(7&dIUP4Uxjgluij|mpi|jn5;bb<sZjys_
zqqb0Qd3fNK1P4~u=HmI_-?NW1-_)3Oz7m?c%VV{_TBpOa=>5hJAki&fxgtOC$;<{d
zm#&4%<YY9&k=A0FmDRk>t<5u)gc6>AYJ0rw7=C6>Tg4(!PVaa-M=)~?OAH_N`{Tzv
z)>ft->!u!0of+pZH{EUhE;zTnT<YxAf~MA2qj18|k-lKSV_eqU99+wl#%!T2x3`SN
zM@`aa?_Ra!ch#a8rs$<ko6_?SHG1(tf0lto>DdI?|MP`{R6U=mjONtO$rSgMZliKt
zIpuC~Q9>h4UYha5j-(T}`Zqf43*ivfmiu70FP$TeIhfhU2+3qMk#pbSqwLw)B@&&I
zua>o?BzA>{sN4DL3$>Z)vxeLWynE-KLFVeL^ObSQ3-`3NuY1+L_3zU@kz4oaN_C#N
z6m#$+Ra31phKqDI7F+pAhE8^Ak6tn}n?@dw{g5zMOq~{7!x0>48vbBUP?M-5ffiWm
zxtJ3J_3ZAOM(ionB(2vj#PV+bIK*7YT=lB$!NS|1#xXQbBYN6)jS8_y6`ZlVa0`5Q
zZg8&24fAtyL0dz{+nfUfcQQF#y=blplI5bgbxBVDkICY~rQzSNlfdLINgmKnDa@QU
z+DPJNm(~t@Ek<rN)iftBXCEzXa(*|JZd1JO_mNvj{PG1C9tLlCNtRPJciMe758}8V
zC;iIT)Gw`xc%zwZXO1N?{|?>j$-eFC@1%&|ruX;td`&#|d%vmHnX8uvQjAZE`uz!w
z%M?$jZ|crGs7W@Ob~6e+y77j}otg`IS6U_=TMTme+wAq97D=UY+6lLC8)>dhTYtaQ
z-r4`6+PN%Y{}vKy;q-;rF$XV+2*ux@c+N<;?vt7KqgV^!HWiMprAcr6x;wY^>f3X%
zeJfoqS(Tn9@&7rhRDS&PV_I(usoY)nv7Q05DW5@8&(HYmKS2u>eavtF<j;H<jm)A)
z*}nBKb#`Dfi@E&q#yAo;?eJnIPVf0tI-~h|d&wu6wl{r|Tk38$-ZZ$vy`lTo|Ehh`
zjVB8K>i+-tzmT;4->32apJ%XjcZ^S|EnWaOgo7+rsLK5-yh=H7s(XhDxf5Hy#XDcz
zTUKVC4(P>gobBzY|FtdVjNL#HGs%avfeV*|Cj7yVu@cvl+1Zv_qG>8obxd_a)m6FV
z3?#?Ov}#=yfabFbk0*jj3lt9~xXmd^4yA*InTrSH56p6Hemhc_X`KiJp<il2K$lw^
z{xMwmFZzglO5(Tvf6ib<*Si8(%S3ifK*1Nz)Y|oZul<^a;7wnc8Q|ylV{FbMi;?(X
zD~cQSNJp5@8yFZE8jh01wY9@SLsgZP3(qAU3aH42%Q#kJKl7A%!B9bltwJ2y(^yAU
zQC5bvj<@usJo^q7wHRtTx>l#$tLStcJa`Z)?FAj<4Gj%gD~2emyk?zi$=~n9hVR7f
z437~k2xf-&G`F;*r_sZ4FfLA#JlGIxU&t;cwd$q&XD7@gE<x>|*sGiVA^@Qz(a)sM
zBeU4#n#uBAC0z3;HPm5s`Qz1Pc*;!76LQm71;6RMn_loamc}fmU(_Eq&T<{7i^r;9
zTbZvfByHgZAS?SGE2deHrGNG66;oc6W&iY|Gz1l}Y1`Z1ucNIEvjOZG!fe20=xZOO
z{K_C;e~g(~zi_wASM0>DVYxZ-;Y07P(=*Gfkf%$K_V&OQX&`3lCGll;?b;Q#UtC@j
zqu}SYw6I?+PDpa}_C~by%z6+GTX1B<<~CMd@o@0_kh#MTJ#_014`QpsUM(^hJ!ddy
zXPVv+LYCN7O=Zu4waeP-GThc+0Z8SHop*@M5fUhbNe3Q=ZAhh)a2B?th<$6UzU@7D
zPz0(1A3n&*YvMydpb-NA<`2YR27lPKJsU<g<>`bBIM%Wy1O;b$58$URSsG3!)a`0Q
z2=$(7M$ebn869}zYuSfQMl7QKUSFG7_qlYFSkuCKs)mY+ZGrfT8y<A+$B&{{vI+Y<
zTHaG~Q2BI)F7)HlKX8zjf~#s(02ge?{0|xv+tV7{haf=!#U8kj{j|wlD=#E&#>o&y
z910`sMipQ~D6E>$0d9VwrN$DG$OdJ)@%9321V0PhNiS%H<u9l^$z%yh#b0M&%EP)G
z^sECvhVYb%x=;1qySL|xc38`^G#gmwk&RqM55ec;0BBJS_tu^dLHmh4YC?=`Z+6=l
zjD9d_Q4iUFV{9iw3=l4VZDVs-L*jsAQ2X5E3`A<+Y)5&l#HOos74nu3qoZH21wXT1
zTwE+$sN{i^4PoyB;RIb{_mmSH4Tc-aR$zQ0xzK$Z$~H<AMlDx`4W8(lJXs+W;>|wp
zDeK~4=8j5#^~zy!st?LG4+Gnw{tjgxY-bDKuf-nVDG#_^%%)#jO~YQ`?tm+B`Gazm
zySw`$Bw%>qW`>gx-{xh5hjvP(k)r#4uY>5?p8y%!1V8*C#3ig>&vdP?<z<tx%z-t~
zf%M0gWSkd>j|6{&oy*9iTiuj)z07)Xwkz&nmu}U>yu_h9IE}fsEq7@7WaWr+v9z?r
zQt2h#h0XQQ+zu#qLr*I!BV*R11gl#R@Pm+$2n{@=hudAJ-asR%JzbY)FP)H0GyDP3
zdG0a#QRZ7Vxx6<Vg1m3f9geAl%R`t)Cv<8GGi|$$o%87^bXHh|rNWj^%U-3}O2zjt
zKTRj*WTviuXYxKwu4y&Y<Hct`h984W=*VoLVJ6;>*!BL6G?2ZBkLn^K$scd@OS<=I
zYifo^L>y!GU8rD!x|v;0D?LwGaBv|+*btnsk0*)y$D&eVoYgjFZ)MfsEXBq3#U?kV
zw!g|d3e#9!6S~xW4=9(=_VOq}B^*n+YKW6l)WIF~xn~ldBGA`J|F(47;~nj4D*pcG
zK8OwlCqkCzr`_8sEgd?FfJC;wzCLESH$F>Cs~>0*RFk`)ebw;Cqur`<kUrKBc=qzm
zn;H1}><hQWUOl{fAkM`1SR}Ug^5x4A0CH=|G{SD!l~51cZFnbG&7}_STIm<TL$WRY
zI4EZ^v)dMj_5Ats5iLurK0W7_yh;%}u;#?FrBd`fgc=@HC{k`ItYIkcnWR0drAb}X
zVV~1lcbR&)bv{EtnSPyK8$;7{GCnJDV%MR=RS1V2Tq4-aq63_C>1iOY{r-y=FIHX2
zX~pyNnafGYFf4F_rbkt~9wOZO(*tPwlk|5}(A-dY&@}-^l|AA4`TTxDgUD$lCArfV
z@>v!ioYsWg$g_5cVrFM&-(>BupN9q*Tv+s>H}w0%wuJg9EaTq4{}qb#I42C0lX7O*
zcQ10CxDyt37#{angNqOv7#JW_>n(B?p(;H1H6<kQg0AiaLJ(p&qB}GN@I{~@GzKFL
zXc71P{$1yEms+0C9)q+OE;A$;)mXHk-2gGm&ASi%czf00*eNlDMu^g%qzMUMSXf<y
z#S)vixPFVXS`(a>1Q3)XSS3U<-MqS7pk)gsB-6doyq9I6OcId-eNjm5tX~{|Uohz=
zS#=xvOp9i);b{d{^X|pPwTtdS8h}%x1cGas?>Yn(V95o45g1)U3BS3>^;*vp-3u4s
zDE@JX45Q8wc#qlgdzL)2#)=;j=_%-mVNX6ZLqt#zQux=Pq=1-ul9N*)widb#Fl!&h
zbtBxK2Spr|St4FQ4F$>Ye^M(r*0BG&rLuQhvThi(Sgzrv!T$;85owlWcnGH&ANoWd
z0^@jH=#GPb-{3bJp?v}P!dN)mxU@^?dve8{wlQPaQ^Ot&?t{(Ay@X#?!x<{I7{QuA
zMM#ENe0g+~nNZe)o+CpPL<}LUJWF^`!g@kOdQnae7q}Cl3?dTHOL*>BgT+TAmt`!-
z+Q&VKS2%t8UD?_!&lTphD#8~X!V_>~VGDMG^aWPud0u_&bCQC0(@iEqH2^qc#qRAl
zJ)B5+dH=u}D9o7gFRreA`lOuNWGx&^<l<kME>L2xx(!L{owU5&y`}0p=}->Jt%*Pq
ziH7nGev}x=F055U)M^^(1bF5GwK<*+G<S)0atXYf-cxu2g!(WN42}d|>KXYDL*zOL
zpTW>bKi5V(;#;S4ixIZPP3|&E`SjMU9f5q`LLcM;lUnH?$)rDjKG=I|3`^?F`xqG+
z7andP-PjS;HBnzvQ&V4`_58U*fuTi?d+*0^+#=SB_nU%AJIYTWy_Dh=<2|mz;qCCg
zeNNN0BJ*QyE%tez=czjMAw^AiH88_u1M0HYu#)Sr?}7j}ev%xRd_lbytNsj9Y1~Dl
z{5>Z)bzWLIY2-nA7CBNuE>sBTo6sj9>|g>Rv}Xnf8hj<hO+rcymtcJ^udH=!?kpU#
z?8dhfJ`Fw9cjw1DmWc}oZ9H@HJ}BHqa$V#{MuG1B%5=iIs)mLJr1qgErpnUv+MJNa
zv&h!NtE6FM%p57KA7J1&Q|zYkv&>3M`{ZVl(+W|{T(8Bc_IAU7ipx2z&>#a|BiwNC
zFW(w!u?q-X2b3ht`jEMwTmQrbZ)$4N)|RTJnVy<r^c>~8Lca_HiWK;|Bbp%R;KQ`y
zeogqy7r~epez>{NQ@|&`gsU;ynnOs;up}M9KPL3Em?7LD{5pdQ(&3OvqeigG2V!D{
z9{s>HbXdkA6ik?gAyz;O%T+kY`1)!lvTrE2u5fJag1;f2RMpOLC>QE<k4G^7o?gUx
znuc&vAR)7=9zC#ubW6_1XAOF6-IH!HaXipgfD0P|GxM%>>Ez!azvC;6xa81UE2dji
zTIvHU45ZwZ&Js^b4ZJ%Mh@UB?I(6T2G`8s6aKnVq0D!U47HKMI4Z34v9LB-d31|V#
z2S?r5FF|W?QVBaO@fL?yC6bGerTnXN$=kPc5N3nA4^23H-)bM7E`Ii`U&D7PBGAmt
z43UKA)n>IOyvF<Y?%{<1C<++0=w=yTwOfs`&9s(GSi-jimcz8ju3bhrbJlaryg@~Q
z?OxN69=PTr9$)nqH$U_tt+T)<PsEuPTXJ!5NC+nz8+PIu?^YA0m!(?vSa`IAG}M$-
zM7|6@HZ#9TIq6uDSSv1tZifD>H|P2!aFj(|T&r6M6c7lF8_U`lV&{8h#Veqq(QW*?
zQ45Zr1fU|Yees|7I}qe=4nU;cekO%T<M6}D9PvP!&#nV@Yf%%nY2RpqZ;3;JQiGbB
z_5^ZBDb~cx*I+Qf5FH*^3;>zh8eV>UJg?nJ1#zTNX~Ts08IyhUghN?xF6_-2Iwld`
zp*CyPoC<ey_?!rc>lq=!`L6UZK|3!a%GhJ*yTLUf9U<sy5%6<TZh>^e<ci@oi7<o}
zZn)cxUK?|2yk6AtIf};@R|7xeMI$5c()4Lc+B_<s?weZxjaJTFDuTar&uT-uZWf(@
z#R_hp_sV27DuD(kHU1|l;l&&B$+LrvJ-`Lg2xri~XJN*f{s~V4lOc!+kiTi`5=ODo
z(w%OI!+Z!OSy@>VND3w{b6Ca%A`GJ!vRPYMR5+KIW&;Hsh!fyIZr{G$Wumj=>6tB8
z3}fN_UQ@%)$q6~X`;EG`_{&uujBwC|WMldFoSYoO^@UKIfmjLhW+A-y>P4S=DkuO&
z3gG4*eDt*lezk;w4=^EwHe3nUjj5<I>b*TNv6ET#137wJT_R@|TN?yxhsePnQ5}k0
z{dt_3&{2W{WrmI|!U|;IWS}?*P)OJ!`5?_hM$*CFK&%9nm{J6be?R~sp;C2(FbUj5
ze-+=c1Zog4`AdUZ*p^HYm%)Z#@NeZld2;MWcG6yH`la~^89Wozh)gm*?LV@Rj^Mlm
z4~PLoP~YD0vydGNgT2H<!lP5(3=v+zHw7P!Aq30!HN+SFZcXkfgqjShM4TeyyVcjL
z*2u8=mUbC^)HOjRnPeRUR{?-FINy++7Jm}I^b_h?nSt;#GsjU=z}U+Re=cW+Fpp0x
zwA<0nZgiJO`zu1H1&QcV8II<xn$B!Q!ipI@n`8#ZDM&YTXhdzHxp`7T!l1=@uVYaM
zwAFBq>g($-K_CFa<%GbWF9ajJl0yPpMqom|2*XmJCN+^TMyaI@!;f{ew5DO))8E&(
z3-zu>g0y9qNqSb6$ar=V+b5`HG-nuyB#qhRwbd7V+Y;wWRNY2tX(-W-NJuOnME!!d
zNH{A%gq!0PfldkQNy5G~fYF2P*s(Kmqb{BivF&t*+);eqo>#ACw2cYdUuj8Ei9!vG
zAm$24nxqiM-`^&V3xd-sZ-$6oB_1Ym)EL&~to3)Ak+blKCt7n-pFe*N<<8!MZ?S|A
za}h*elLOp!biPBM6d$7{`BmeOZ)LtVb2$g~wDt9WBGU-~8C^O444Fm7#|vr^4A+XY
zkx)<r(3X7F2UL8IH!K55NhpWlc3L3BLZ?HJW~>9Rv9kmi(+5@%kQUk9qm<Jc7aiRy
z$|sm_$lY_Z;@5LLCdMvQBgiR^1%^ZxLYB9bwB9L09gbqW#?53VVHl|pZ!YGgu#{AZ
z@A?|j^+_%+E*>5kciBvqPoF<)xI1^`kCoRD?t=)#^f`v^C7r-QI0JqC{q!pE<Yas_
zp7!jS1tb|6gzXke)p%ff2l&ST#s4AOt)#7+QTLUCT?pHs?RcpJC#y2#Ah6~tfG+Vq
zkjCAo4!iNFE%|Q#Eu_+u`fae#zE@4=%}<{)-?`CNW`66h$yJoQZ|tBaa9vCy+vMM0
zI6#H4V3e0@nJw|0=Qhm$14w0vm?E>jSP#9YEsSnQ1O+e1t@!xDyAdw{X`uu;2*L5q
z<j+t|x2hf}S|P5~;x*$CBL-<fg;4&uj7*nX4NwxwTGW<mYGHwPoyCi9+w5`s!wgu1
z6YbjbH3=*T-84GNm-tnFbq_<<hrwa<mjk^9%)&`fxrD-K<4q=DbQ_!DgteJe5C`_C
zuOXvmS3pvcd#Pg!9Wa~?sG*07RicI?>?Db}4f7AYfMU4c5`@Oq46+g8AaP*|$wPny
zNYR9>oL+4G05s66dO)l|w51;EIViTlhVWH&(M?1qtd3e%bu7`-(}xz6gM?UuwkmB?
z@TTw2*n;;yhp`}3DIYstBH|wY&8ba0QJu4%z4Bj`RNV$5+Q2mzs&$*hONR>A04wRv
zUjFoe9XP|m#2@$*$F3COT?cuLy>QdPOwdwrl8!tA+(Lw_sz1)cQum0jqR(lyeto4L
zObJx8H0Vs+ON`Z;zF17RlB*^cy?n|pX@5I4KRwYizZ^DR0fjWXe!HpVRp|u4;CRUs
z5!oEt+Of88pPgEt5r}Md?iiSkU!53OHnz{!tNXD!7TR{6y02Ya)jGz)FmmK)?uc0N
z%8dH;Zgq=d-@4bl4j#n;r0z1d;I9BefRn)ZLwRMZV~FdJ><h!?CTK6jHHeUSLe(ZL
zD4P-w__u?ova&LLUT|psQ-D{T=N~1Wt~fVjqjmVi$`LAdfc1O#MtwH-qlQWmFhu9P
z_KV4LubZ~+*db96k8vzR!??+OC{eUha>;9Ms14Vu;>3Z4uknvk@1j6KK|xwElaavS
zU?{h>188~7k9#A}1b@U;WCA2iVgddEG}8r^ny%uhsjJhWuA~j16^+4Hfso8Be4U8H
z0`P}JEy6ox4@<L~N)1rs4it^A1HtEY>0#*QAj@VXaQvW;S059aGQ>6O_J)5MlIGWc
zRa^eI5tpK{*eejmBeYf_n7EA#Hi2<`SN?luO_T%n_TzA@1ps!L!wg&8!LAlgRtG7Z
z47IScxe!-n5_3w%@qhB<3G_|H#Kcga!sQOwmo-3Zc6Jto4yrVHd3oG)z*r*L<DcS&
z#>CZvKmk#C`t)i1UnQlWO4Ax>cEM$^u5S02iX+TmG+|c*mJtF(s66p+IB+Z2*_B|a
zq^4H;<%?k5``rOO(59$*_Rnw=%31jO2St-W$fE3zdiW6E)foUvsZ>i-Gy7T>PF4%j
zCW;|E0m$^=@8OFFDne8YI)b<)HeXU9Y}Sa#dP)V-RQ(765E|v-;7H5NREKu~J{^96
z28{ia0&8OkaiRdlPY1RL;7ehZQcFn?Ud7gjLJ(!XH8(?g7*Ce?2q*?I7}qf#7Z(T8
z>GyBb9@7DMb?Iap(hY_oZ##cdQ3C@Ae5jt9lbKoP)(`F$r(l=i<=VQsJHf%9YHE-?
zfr+jGeMbmAh>RR>&4G+&vgiDGRf#t^I#|E=PrCiL`|z-XC{!Euz@zBtS&bTjBEb1q
zUt4Yd_U*8kQ!h$eAWEp1Dk>`S-f_Z%t?TcUD`&^hY55Ch+zD2h#Twj_sp;vk_Fo;Y
zT|)iR8lcE=;5=sr!=Ziw)*GV#Nck+&fgy@6E_46fCUcPgVctDhxQ4W@C1+nesgRWZ
zpu;||)sC4Ts28cN(XHJta|8~NmZLr=m{F@?yHSbqLW?u1CrNv!@V6J)f=$01HP2~P
zr&DJS-bc$f;2^3mP|p9u?k!A^Z2&z`2W}1Bz5-3HFzX@tULwPrM;S%N$HzZ-kZk=E
z*efjQ5JdMtFSR2z5fn;CQ68R>f@>;Bi!I_J7rAJ9{unt{9>7slQB)LNRPoQmHIU*B
z`EDI&Zbruoo`fK<LMu@Qgl!>|GJ9q?y3^7#`59^VGLpJCn71?N%F$nPf-avoYTAJ>
z(feu53H4xxacF!I>Y~zT6GR<<_uQ3vEa95TH+#Aq6dwxoI^4$1`YeT+zW8Uj@{jpU
zGO|&jnhiI<045gnd6||5Ii$cq9GNT7XRkVua)bCNNE)Mva1eGU#4M{qKme}9cCA=6
zJULzEFCI`aiJFZtjDm`iEOg@d98Z^n2@TEg0`|m}L@iYS{((|OHxqnU->34skRzQo
zxK?8B$AkNM4Z#F(oJQ1W^IPy0Gl&|GrCnN4;-TOSYh`6%fWt$U&uqG8l7i4U%OyR%
z1;PLm>;g#KupM2F)ravY86@2tw6(v%`11S0_)Q{_LFeu8!X*6L#msN?IfycP3_h(o
zJGM&DOsn&hGTg?+qU`KLkp}iP(B*z*@~(!0P}5hq%Xl@f4Uy4mqaOICsv{q8c_7<4
z056rZJyT!{mn)do91tEwr1#+dp`;@qU8u?<3Y5uZFqI9RO)zxt4#kAIVWOAxC1c}G
z6qwav4D;SbzbVJcZ~;-Em7)Oqdo0V=S0@lxZx+;}&m*WNvDnAQ$9Lwxo$h#c&>LDr
z;snW}0Iu_}h<%*q5pnUKj~G5%<j`pa3)8vI-JW~?>QzI}WSnF$FYm$gJHnU*J=_pk
zwCWm?M?5ZGjGoM&BoEa-FDw*?WR9d{F<@IN7mRyM{j88l6;2%FHe_Q4epwrkSr-G)
zE8ps0PPR~cbFIq<+O0&PUFz)#@(gKIo;WACJvY^84@R&0tlv9&27E}&M!l-WX6KR<
z<DG}|UwU{{oiN<1q@f{WP7zyck*hDosBvV7l4KaT;rgY`zk^WgLUg?`-_Zf308i2A
zYgnoh^(xeH;e0xYB8YHm1(D6b$_`VruPQvNW7mB5?~SMty!_q|1OrkB9EV@(WfT78
z#F@q$w|MD}*iL{CMk)qjF7OYJ_`DI!Aiv&@Py({eEVPR%E(5h@3cTH`ZE^u*gjrZD
zKndfb8s<AZO-lnI5Fc&~n8Xd@2)%=EY1-2R^$;2}%6GU7SM+SgE83}to*cBgPe%o%
z<)yhXhQo)ik}E+J4de4#Sv_}DfbRC2MiLt*=W?~o+9VQoup*(%KR>~=f4|nbbFW~!
z20EUIp%B;$TYfF1iNg&2*dA2eQGw_xr6N**h*5}-I%%MzufIgN#3NKF{DP4U0V{FZ
z_Jtz}AlCW&`N6M=pjps~n1|LGnkctXnc<e4U}0HWT?0?Kyt<ZYR777iF~1BWX1px*
zi~D11h2biYWN!N<mGF@u{+#jvQW%Vx;2#~jGAMZUK5|s~HGbaIcN6e0MJ)mv9~b&r
zax(sa(9%WyDziGIK>#q6&2*f|!CrWSgbnT9AOH(tJ3Tu)I~dS{K0bI-ADm|*zRDeX
zp|lhFa`_9nqiVY;j<7*Q48IFPS5r=Ubw&#d>rQlKUDpIV<jg?N-sDt-!j|26C+MnH
zQI7)ry{h<SRs1|Z|A&<XA*o-b{SvpWZp>xN_I5<cDjU4<HnUuKup-(a`=@qA!C&F-
zA+hsvKl84Yq1a_n*|B~5hNcP;pEcLA<@i831qGUe2NQf&3o{3Ldudl65?z^yksPqV
zNtc%p;}g8D^xwL5t30&Zvb{mfLG(~nshOqaK!5+^*I!VadCm@>U`7e?JaYt*05vaS
zDVR-RZdk7Qg7^wnTwhx+E0MSxlIFp*W$jC8&IH>{F6LWYx0r9;;EZ|5`Eb^nPen#w
zCo<+@q>B0Q=Qppv($qG;e?zUsdw}nR$eyj#dnq=xM&E3mFX)n~o49^yUgWmoGwaFX
zb@~aERqdVweTI*h;|4y!mb0NPLuUkWZzBm;G=fEqnFrNV^h65lO-3=N5B4OQkd)F=
zW9}$dU#{exl7!`T5o@*axpSv}t|L@RS!>Aj1J;Z>Z5sm{d2cgt>4;Lm)}wyoL8#oq
zR$RS@o7szu6q2&j(t?dm{Mu<BsXjvy7Jd>vUS)atP1bh8v3Hqd)F0cRs=3$29$TxB
zWi5biA-v`zYUDH@=%gd0T~cDHgCwCg#{qdw7@8(m>6|)6G`SCK-!xQESQx|YCFZ;4
zm8-9&NiC4W4h8}c85-)CflNySf{gAwKtnT!#CCr<Y;Mlwbw3XY3<GQOG`d@`PCi8X
zo^@%$&A@4C#`>!?D{H`^B%SoDSFfIUR|L=qJ_@Q?PyzpD12$hwhJ5EM@{Y^ml=+ew
zo{?YsSv9z#jWgaQ1lhUv7lT<CX~}B3)>YyuQb6Se{YeN60?`kVfy<91<l;8NWTV0&
zCtLtExcX~v7!W}G)P#4COwh-O`jqA1M=~yO)O?)vkaE|JQe}xyPqnqOI#P1dxEKo7
zFp6;cNG*>%7Oow1Xu_I3n3>K7r4W$gOk<<2B|IR3VS9(uFQh@P_%StQJbatAwDvE~
z5dTx~G3s>uhkB`uatc3wT0!Gnn*#+iTURgUKj)BrE@GO~D*N=F<l<uYZ7R@1u`w}^
zcEL6Wu00yE`v7mRySk=M5Azu2pUTqSLIM>C>Qv;SYD0Kd7LU8!A~;>Phsq>HH8t~R
zPa>?g{(aQsXZuLBJO2Z6l|(B6d0Y*R7>(V-5b#DQeVLj0EDuW6<3F-*hlVBJ?bwCp
z8?rOy4bVZrBrw(ZS_GpRKtg5Z#)$XBu7|+#h-m?fA^bZkDQ$ZpFAubZ$47KdA|p4P
za&X`!f#Bb#TI|%f-P>~u30Mo2i9j^P<@pQuYAvYf1=;xdr*No}@3rFQE0G@HQlB*@
zM8g5cW?X_3)f1#wLE=Dh{Z`$7MXYi0@ii@Oh!(J$CCk!~@cqCpHRLrXD=^4zeY)I^
zsDuvy+8V!5*(Si}0YLO5@&x`7=?{`pxMX}?o%)nPTzL2-9Bl#u0wVIR7qmnDdY`1Y
z`_%Dmyy5)eq&@rh6K2eC01hWJX6s2pQ5VH3VeSd8;<*0+STxvHkchMZZSDKL4=GQ9
z3jwpfjCqKN$73ufH<Y85a5j|ow9mUUva%?I6b{A`g*ZChvP-UbeOe`jFKB-5;*L^L
z?uaPj;f~@ASNuqmt*81m3rceAyYEFt&JmyY#$M8&#TDeK2y+7;C};Qg_s4oGTUia_
z8&BarNy9&s{GB18o&b)(=!A9Jt#+xmxw+wIO);VXio*WF1?qynKQl9NKxxPvphln(
zthhViOt3&;rD<>?>D2LkN-5|0ot9YMsz@?w9N*XIe{8`^n*EN_Zo0Lf%*`rM+-fQ+
z$M!3l^S7djsrao9S0?=UJn<-3(XDkKN{K=#q);lOuC9*r*@liC^3yYoggfT18phtS
z`)z&NiEmZ;;R9ax0KCnl0J8y)07xa>{+PTjk5f;Dr3W#E;_WR(QU!9Wuh&Rz!aqRf
zA~+4%rw%}^Tr}yqxw9zVI=VEaz|I{HqNmNf{|12z^And5`Y;V3wwMZ24&@XSU$_?Y
zI1?cEbD(P6sP(OI9=0uT>A*j5b9aY91ky8LBE_i-7n+Hthe7J_ci1n(KK>8X{ZVPn
z<+d0@D_!5j<cS`ay{#=#9w0RAA^{!~98|{wIb2Z?RSNiKyc_!;ub#en^B_4FhTlnz
z@F9THjPGLK!LM!@R3RM+SA=T;g$d4)v9U4g`g@AUAw&&ovv-n<?0jvWFtPy{Ku+pS
zz_5agfX4Fd2*E<&TdtyPgdY@2_VMxY&9@P3j*5uP<KB>XQGa8gaebHQp*nzM4bx{k
zXt)T`@rw8Fab;jyW@|eFg=AEBTS)>IUr@m1>)w{zdbIgE5dPjEIkU7V_0*(v;|=BT
z#sR!jVOGWjfZsi^!$ZSiZ)Rr5Q@=kw<V0r~`m#`uCh>yi0p#YVJaK3@D?7V2h-321
z&EDO(PN~VsKeUCWW@ZlIJE2jAT4;=M{|1s&!ks^Jb9X{QKG)QsqY@StX1?}*X?eN2
zs_OoxC+jP8OTLQZo2NHL=%k;Bk6^#&;yDz&K<0mNEJ$u26I1=mEgE_mIzYlCj3^zU
z;6kF2wgvrr^XAPjRg#PoXi(%BjAbu}yZU}~;aMyE@k8(ROX0Bkfr#U-q$3(OLiD@b
z$uV(pssYN|XhMWwofVwO>)~(Du=&*<*^&nrIN}8?@1&=XAR6*qDqX<|4L9AW3PcAE
z)P49xD!j9Ea;EX8u!Ua-2Zkc#Q7hOzcl`_(pu#V`$Mh8yKVp6WlR!I2w0x!;uZuB~
z_JmZTNfw!tvuc(2=>N<DOoMGe9K61-Dp6ie4qu~n(m0~VB8Q!NJ81zKy*^$d53=zd
zF(f`fEF9mT&a$8A?(O_zFV4yq%q(((ymuoDO$a8!ih=PFmw+Z8K6vm$$PE)-1N8|S
zuM@_hWBw7x(+Wb&=wUUYfK{s8A`u-OjhhUFwTZ+XMRa%p7%*amc_(13ljIzN+7=Wf
zgN_Hj&XEu><aR|tnJm7UM83H73J65}2FOATVa%-Tc>nF2GssVu`4N{vjnUUJkB$|J
z%GZaKTB5s#Diq)k$9N{2w-yW|8pz3?HWMK_?i||#<rHf9Y(1`}vqyP(9f1EqlP69;
zeV}}zW{M@~E6kYID$x_&?thE=)bYmWh%sM9d3hTg7Y`Pa*hnxEIDcIF=+T?TQ;$&p
zaf*u<zIk(jJ4ztkpWqJdwWY%q<@jHHZE6~0{F$xy668>kez$9+AvY`gSJZ`=DlpZF
zG|Oovzn?m(E6NvJ3mOG%ElS)M&z~RJL4UvaWW4%W`@})P%)r2{)r^IP^#g0C88`lX
zTH$cq=VxoH()}s;Hy@@NurRmWJA8Df$x%NKo~P%CSBD~I6ca1?z@Y#yXU^QNweUor
z3xD0)%L^UQXsT+3F#24Sa$rJ@kq&U6_5~dWLB+@Ch)V=Q=$Jz2pXurATomJ@ql7js
zWWl#?yp^1My!!IYSR0H3x(;KI2pt`SOIsTo1a45zXU?3#NX22@0LM{Zj6}iM8#7G^
z{)YuDRR4|%Y>N7f5f`kZDyplqjGPzf5<;&TH`qxu?(xeFJwro_NL;ppG2PwW+)?5E
zP6dcQpg-Zhd+#$|V_$A=?%LWKWJCZTh`C3V*&b^Q7~o-|I1k7)F3|_6r>ooY_3Q8P
zarMa3-S}p>;gR>Kwry*Lo-pV$%quFfr<a$P!$Su@j&Q6L0zcSW(S8g=F!?N;&T3g+
zSh!~<B~e#bNA%jz)xhBgeB|KZ0PBsY1=VPnO~WK(epWf<{1D7qZF5=+R|Lul++*G4
z9T=f>WRE25)4*^_U+-6i);*eO_(^}^Lw!F-wR<<CupKS;PQN{}>(n5G0`J~sB|=FF
zZUFiu@b~%i$3fg<6Dc`nAL$T)rP)T;pqJfGTShPvLdq{JbV*>p!b_{D!o-xott*ag
zK0FjaKJoZE)9ePBTd4lCZEJoxC|VsO4s1;n^t%gg52mPqBUJ9+=Mp3~+j*zPtk7-p
zH)6ohl!oAWYcM8-`#r+?m(@h(Ic9F%S0z6PmXn9^&Ejhy$ji|#<m$_$9Y>qnkR|X7
zUN~?w;!svTz7Nm4T5(s@XM|Fi??O5sh7}LaGe`eIZgCO&Cdh9zX|bbsuW7a$MjTLM
zhkx#aIB&(T2C1(5XapFgA0QE3zB_kz!}!A@$DE&TdhKCQUBJXPCMKryeQBN)e}Czu
z=a!Thr8aHzm&M#_e@wgULp?5a3_NIP5LO#uRaSyiZj#Ze3ZIO`&{M^)^BfO*Z$6b{
zej3rEsOE8&KYBEB|533+b)ZEF2i?|IsQ<DD->)&Vx3`xn2mr}{>_o38o2cl5@YK&A
zb}o3$Zf^PSOa=!Bd1-fcB(ULq&1V3W3tZV=FFCDhZ2VXusTBixY1-+7@=NGiZIk~y
zDkAOW78NpoF++!6=1z4__>J%yvtj9qsES|r?1yHUC6Z%At!q{ek4{~teP@w#Z84;E
zwO2Gup=q`4aHC@(>U(0i>2uOs{frK*1@Z7Cz!aTbiVqD`l;)LHRURH5P6h2KnbAJK
zYp=AW=VNq1-XV=AwHC)NIvWl5zi)JtWE4y2eMWypZ?Nt?iAEOkn7F)*-=hKOA3uKV
zdMss7eRgzs*dr(ZTh|S|3TM5EZLF-Rj8$u-^48Wx?oIZFhK7d{j&|w#$sUc?3fdz(
z$n=T3`F?mfACf%ah!<K8n0k521a;cN`vHQugt+H@(~Dm|Zra9&<;e{Oc&fdw#Tgm3
zX8g|6N0d1pqU4H+q0iMHb7M+DNz*K2X$gx!*VJIX>{dJe3;wc~1?i)MDo`OV&z>&~
zO$9Q-2&a&+FlsXk3yYue+C|enA5C~y5aEzKaJ;~D-?@8tz^Q<2{1xvdI&4U32`Qtf
zHK)EP43GQVgYz%m=x2Nxit{L01E%mzLB-;#p;3=fhyVWDywl{V#BSZAKp=RtTc7$$
z;!oVC`@{KrQI_m7YBFLD2c@!wgph%}9b4Pad+#`T;Zm{!7fn$?0mUgtM?Q9o4^Urb
zU|>*^asrUOrGq8{<|159o}~GVsDT`#$};jx;5{Q~Z04hH-@lhFvi9b92s5LPlP9@o
zLj0@?RcZEzR3`6pcXY&K=wDe6Rk&hl$r@5wc#*5hj6Yoa9E*C~qnH?uaK*vFhlbpF
z2G3*eK>oqCF5AThw2O+$j-0Ir11gT*6uAH0JN<govChuU-erf$-||I%xv+!uQ2O0=
z44~daD0wcr!??h(6C{s$qn@9~21>nuKl;&`YbZ6&SR}#FaRAi%k(8#C^X$>qZpZ}~
z|FBpn$5V%$1!_efV`Ijk(XIaPpFd|K{lE*rWl}utnwXJs72PCs<dIUSqA5fOG;Iq+
zg0MtZiKv;ulrb?+LE;6LkSMwrBFB7`?gL>-@4X~w#Mo)VOUzhz6tzsA^pZ|6vjiU@
z`phb*@^^Fd1(GQECVPAC=@rz)IQEF8m?ytar)!F4P_mo3fdPX6W3o;<)gjSWp7Zt=
zczgqvkKH$uP_hz+M*Xi(;<yn0rf67$MLWgxASXxQ1rI(6Uw`fbCqk}b={i96qe&qI
zHJZaqoN(W%;R5A6vHP&NoN1Drr{^+SqD%d;*ZO;V$zlW4PYvFqnD~2c@bJ2S|DHmL
zjG{FOCQHCDnB?;D@!^l=l9H18%Le`_neRlM8xk0}XX{2_ZKQb&&Fhk$;P2xh!!o71
zCyY*jg^leweg`I9P6d)^7O_O2Hb6@|Loa&+i7eiMvTXwiUPWl7_rruB0X?FYkp#Ej
z+pb-^mj2D7?-OA`b>Hs|zkmR^1W!w<2;&_K=z0s&?nI{wHw!Z~_$1fAEKvY@sq~Md
zZiY{t_S19s?nM+8dD_{%gdYU5H9EVFj*}SH@<bU0{2`st)!2wm7g*En@Fe<XgNaTv
z{^%<Ml8iim$2_i>Irm_YJzTN5vC$Eg9f+jn%<Q6~WjJ9A+qYv%kYMgHT%(|%FtxY>
z^AxNqfPCC=3PS=;PW;w95jB{_M;H7RU<s7V(S*I<>We&o{NjZ?Oap{scn0SQJ#b7x
zsljLW?p=+uXU|@~e6&P%@~Wk$Qp#6Ur$p$4g9nf#UOCclq~dV^IImUG6O3NK=P950
zXyIXM=n?<B@qswqQ>lhNKBYazv9)vWwlIOXisE5q!1cw!ex$-65Y>c|6NY46FLgLO
zln`|E_4T8CWnfneG<lu;8IxUz8P2WuD}G^yq!Twb5?w#DWz;8QRqYK8e-N!G%aI~N
z6^>nx`et*2UFD065%{CSHkY^W+T}f7F!0fo3kk_|uBo-Pz~tSAqB(p}@%TG&378M`
ze~``ksIm-=?(3Kavd{d8{D51pbcONm*@K0_K|ydLWGA_Kc;JnFZTXEB%ivTfMfrRG
z9WZo@j=z-e@XoR3J-86E0Hc$U;%9g)R5)hUm@yw!lmN-6U*L42zy+5ynNN!IIwmsi
z7_DM$z-$6_+DDq$T1;vkq@nrVM<I=gAJ9x4RE$q!-|gfg9v`0JQniDt>!o71Y-CBN
zS{hU|s8hhJG2Tr*|6=Fn+vnIY_>P|jgXuCT^)bc(zu0R4LVa+*p`~SF!tZ_r*s%zo
zLQn$51OF|f0;iPMucyG|1Aw6sdiU;KCLtSY0pVZ2f4|0T$;`|Qcy$B_j7WfzxN+kK
z5;UR*PM>(A0vk21Ml4La9TlFQI-;no?Bwc7wQE;y`a@tNP+uj*#g8KD@(st)8-i({
z13D6@MZSIi?&0Q!brMW)O~Tz~D-~7L=RTJm4@D&;Fb%@S%$)e_nF4oI#4Aim?|ETY
z`OBf|E)&z9J@?|{J5b}Han#Vz-PiXm>r&b2+nd9fWJ+UCY^6Z6#RKFOassd`K(%n}
zN!07y+}s%40fq3*2G>^H^y4oF+*g=`0iOND82iPGt*~J1>Df<DPb{T`hVDFnJCtVs
z{s<6P2?<?jcmVusKBzP;=!=m-DTGEFwS4%Lv5T8zCrn}IJ|4SEO+%w|;Q|}=c3+Id
ztS%GdT>R0h89FK{=RHo+Bh$l90-QW|i;ttTvp1M`cS**Q|ENMLl{GZF(6YhIA$}5L
zZEa^5!n(T71CQf65KRwW-dD@-j*$hphIc=_b>@E&kemmw1IL)0)(@tWwf&a?tb!B4
zigmmc!9=1?dfU&R7)CyI!~rp7W`=sZzmbcnAU%8)Jw1zb()(bcXKxQ&LiTrJ38_rf
zNr&U)jE7W6C9oNi*}fpZiwkjyY?w~V)t6`9n^13A3WbRh<7>jP_}tgEPro<er43W4
zzLKq<f!vy#tDV%ib>oKDKFlGNu))+x*JcjqZD~0M_7wAxLu8r||4P#!Ic5<Nk^Uoa
zP5>PJ+&8m0`R7l5>teRVMt)j2Kdz?>@`NiwKZOHHSC@;Xt+F2;or_$cQ>eE~!Q`^N
z{o~HlXTdFp#u@5}h~(MR1Rn2cv=S7KRb!)eajzlWXijDQtQK5p3Wy5Wk^U|1n@y{o
zu=nENM64XM<Dtj37BC#=qEW#Y9}}Y8Ngsw2W-CZ9oiIT_vk59$DL;z;bJK9$$d@X#
zx9<I4>^kb#;Jxer&fmC>gQfrdfBVa;ITyJi;T{ccA7kpn!=Jb+VNmw>?z)x}_qdSr
zFCz^e5unm0w=<oO?BXIUB9i`X*mv^&Vf6UWMcGM3RbN;4R*!4mLsZ0^KRPBh7CUIs
zJ8nX#o%mw^pafYLdl=}BdjERwPq4AquTM+Sfh<}@A_XZK8y|m_mv+2{c5O3#*t7h6
zS`xNJ#<gF*VnuO<k?$OIdGI@)QZ)a*=-Or88~%G_UHxk<Ffm7LBf<Rk={Yuh1yvTU
z%a?y)Sl7_l*j|u+g7})I*`mV2G$h20>b}l0t)4!oMI3!%&;+9_{~uqprDWHOd$-Q5
zkH0<#?20y}2<=Yr67$zl<ox~i<hDNwOh^50B(mX1o__(X1`GQjIisr6Y{9Qv?d*h$
zKD&WErX=Bg8yjTkyxv3_Z23zleEm-_i7goLQ&>MkC#z+8(d)&9Pd$@spGb<8oF3AL
z<u!HE9~Q^IS&Z_3gQ7t!?@JhdL;8G9>&r%~P=zG0hUeIlc_<<}4#0#L)l|hVQ86)W
zHB<|r9~!OOl*t*KIH<v@8&-PAjK?vD@!$KDRi!PL-8$=nK@ac?Em@ba|Isiq3ziN^
z3#kyC*Zv?ma?Zb~%1IrJh?}1;TBMFaFf8b??+XIm_~iTxAXT<t@$3CNNyID~nj$M`
zc~jsyAZ(z)xWoTFUKe^YjCYCUH6UqB$HCa0lY0A$3q;hSql<p@XmE5?)z{CEdmZ#8
z^>%rbEUK!Z9VBICWfW#03()09{>HU;cFz4vxl-Q1Pz&$?uFF0_z$Z-8<f{x!PqcJ)
z`e0@W<1FAx{rooWqe2-DhCI2fY#lT+vbqr(<d{{tz=7qQp8ozwh@TxeP#Jp2S}+Fo
zb2+V&_U)p(10rH$+wcRow|^O!#$Vh|OZ(`-gWib=jXW8!8(4_=VK)cb>`+n|e(2%B
z!9Z{nYHAr-Su!3o&nry{PR0|%|E_@5PABnqF?jaqApO6i(!6Oqt^lxAA__FIwXQ+H
zL9A53TBfiM`K|^Pi3^{ARUrN!#VL@koE#iLNKLitg#cFw0m89kppr>@4j=Z$Y$nz)
zt0O^(T3@}IB6G_xk#vNU^Yi4X($zTw($-zOPK1YJN`)-&AKbP&;+x#B85X-aqSH=#
zW!=P*)2gcZ;G0cHzT+{%?ee8z!Pm*1a%azCII4d0?VID6=&5RI+D|yh3&7#EIuA4G
z52B))#?UTScsoKCuRfZRoJ=YQ5w~-51*Sra3TQ|TU%yU?#GUxxrEKJ-1P$P61OsH`
zPE=czH}Lc@C$!D~J>C++B8G?1ozhEfD#6_|3ucnn>@_k{UtkufG&C~0D%^&-XMF!N
zn$1W;XepyVie;zafq}Cgc<6w~<ZHkq8d9?l47n48Y;`b=rpjV#Z!e9K;@>d8i6DJT
zhQV3!)UL0!qeK<?dcI)bv5JNU?l5349xu!kG4vr=Q;vccn?bI_hdNnN$?|gV@(w1J
zQDLEqnJ;1<g@YrwegLypSFykG<_#HiA-EKrzvL70n#et%&)^rg;Ragfz!y9`JP(!8
z1<b>`19m1D@6HphvVY1c%kvKzF&B6q+FfmtgTA`g(om{JPC;7QFn0MUZvY?SH-DhN
zS=&D`ad$}fU)t>0_mA{dNWzF5OJh3r=~N#}iXAyZB84jSfKey5cQ6wIBG?PGJ%&7_
zMk6XsFJ^qloGvznk{Ta9d?>#`@2=FG9$G8b6<~Y}Z~H{y)}dCzBwjX(D?(+z5#?i{
z$-7&q&@<pakI`&KL90EyxA;4vvY2l^#AA4r$W~y8bPuyjL9>tdF6LN(Tn#TVqWQ6O
zcBDUU<Hn7}sD|)jXD25qZXljwgNxX|0f+)m&md@Z@6?eaQVsEv02+W(V=Y-s3H7OL
z9LOs0I>$rZ0#{yi0=&JwF*!-K1zk>Dcf5JfEIvNc+f3k~EhHuuZMZ`?Hq3wEUy>_4
z!F4EP&u|KI0+7dM60zNiYo|D~owRYoX+y(yjP`w$!KIu}Ej*#Q4dZbG(@G|{DorQp
z6r~R~VQ}LymNRbLAl*N8e9u;lTo7KzuSHl{2ZZn3`P>J#NIF4d2>^A&%^P;VrWe;=
zZy)dQ1j&wWkZPhXse5Qh3^OBxgI1*H>FL<>av9sQi+%>PLw;!2u^kAhq~|p`Wnn?g
zx=}pI&(}*iKfBU26nHeC;%U!+C}T@$XiNU3>*uUQDjQCr-cIaTQXFAt52BVa4Gs&7
z=QrO@x}dGimy@za?Gbr#zNr=stbWlOh|K(r#OV5*bO|d{2Lo1w<X9s3@<2)5BVni<
z{fU&ki0iW`JK=5El^N09ZNlIB3WI*BO&y(z9t+;5{+%*0f&h9hf|P|qsCaI6*4ggJ
zo~>B9+e)&@Elf)4$=qF4QBmP|41H9}1}nkMw|_;(&!i=%$)teRcQF8$6hu2b>w!*E
zULL~jor{_<ONN~*iEx_gM}qzfCOl0Dc921nTK=qmA=lH|J|`u^8Y{uhZwuk+>74AR
z#zjehTO?i(bV-wQ;%9NUfP-Ba0Enc*oA~IIxo9N54`E#Hc-UtvLGe0qvfrfKzmPM|
z^_SZ|hRyhZFfb}tC;{s_J9m@th^ZH!0eTiyu|x#b4J_rLzus))97PGX_zOkD(@5?u
z4tY%<)}8{3Ug3JNOy>m%eaG^LP44}#y2)M#cx5l#5l{t^nRGhxFX>zUB{><u!~n5P
z6_%EEX5U19VrQoyfpxqed}7a5%s`U5fihgC>%=7_V8h()GHtJKU{G|3AcROr)!}-v
zxDGXPME4@(72qQOjo7DbX|CUm3d52zGctstRb3d+4nrfJU~|DCPu&`|v9fw)3pnG0
zvQk2V<KK_&A@=u2XFqr_3aN$Sl&0q18fQ~*f-(->B+|xBo5D{bhoVJJa#&e#gaf=?
zJE{>K9g7w`BjyzJleFg}_Mk!E{iqz4$SxRbApM!2XEkW==(v6Rb`5Ei<$$oDAX3QQ
zL(EnPP@$)vo)e|<ly&*oDgj?d>=SfkVsrZI*YhN_1CC9~oqxKMr1&JlLOAwt+0Kpe
zGp=om9{&#7to~p8alW*)6s$ez6*`?@sn7s!ECLmN7d+|xmcL^q=mjc~h#4CY%j6!6
z)8nt*-?S?lCr#;*xbNBuAV;Ix;dIO&5rOQ>k)KEugA_<r4U`Zir?`Qs9PE+A*(05l
zU~ZzHCguXii7TQgFwsDnXWh6L-KTZ*&B3^zzJ||2aJ1N**-kJg$M>SJ@iCh~dqk0S
zk}j+Y2@AUwX`dfcZrJcK1W`Rq;rje;ivJM=Ow0a}dt{$S-K#M}XBDftz#P@YhUKxd
zcaUHekKf1B!3aA@1Ilt@Pao^!OD6>^>)J8OOHz&I??Vd@;V?n==>mpVG~&hkMn(Wc
zKE%;inP%e@MLl}t2JDNevH=8Bv=O;zK37*;qTY9L&c$4dD>^8c$OPerZ-h3oj;=0c
zIWg@BzV4?(K|6A<g}FIJIa=^*n4-jyK#v4#x^JF5DR(6Go<!Cloii{X`o(|^_qtWE
z^@5*aqRxF)<VUglb;NwkrlqD@8W^;p>ce~!shb!S!|SQZot~TX!59*;C1_dCn5!Qj
z8OgFQ2p|sK`~I=9v7Vl$_IA6L3_v*(lWsg^qU)Hef0&*gcz3~T4L^~RzuUi~+@Pk3
zPH#v&Mo!?A>AF(JcjSl(rXigQ;#&2R8oe+Od7LW58qWgQhk_KXs&oFlY@P^x7z&D#
zD|WIkF8s6;=9kADbSY@aH4w9htN&ocD(}r3-YtNwZ|z$5(b8JAOf3#15LU0Ga;$b1
z7soBq6d0CTH8K~?VfL*s-E#Jtj<K;cw%<q#IF)KF`w2Mij1SUlf<D&h&k-^~!Bdvw
zA&rcT)Knq+pm#@dgSc^mdp}+EXAmJ87%bsmeu1Gius4ScFv@HRjM}>$1zx`ahDGAz
z;_O;V5xngmGyePO*LMAdP3gPn_<KRX#X^h%xWwJPKqnnU)nP%a2DH`TQuuL2B1Vq6
z=F1mUTo~NufTg*e-OThfW;>PD)sZ&Q{KnQsOG}hJvK`u0mr>zk$Oup2-lvOLZbeMA
zKcjg8OB*2GaEhiuyu#VGC_n!m#fJ|cz^U_XITBIBh>b%28xXhvWCKdr$sip1K@LL!
z7^&+pz?CotJBf)s#0Ib|_l74x{ki@!>=Zzij!~CU;Ny^x9tTlUIY^cB=Sx?nRL~PI
z{3#w*-J=%Ge}aSK6FwuxW9+Y?QwE05#>NIw3SeLHIK8NY35MY9?6{2fgyBBB<Af>P
zIdl;OmZl~J3B7L2pP~JDPdNpxdn_g7sN{y+z1xNM9l$iGc6dA1TI}Dt@sZEJ<kN66
zo6T^?Hu!KN6@YJO8e$9nJ?X}b>YcjNPU&=TB8n=skMy`u$fFH$1l9lLY`x<*Oj|k$
zg#n9wlE0k_WS=%z<Q#@L$I}2ta7Ne-R>FXJv?|BLPd2&8p6C?QgcQ-;cxTcuM3#ch
zDiY$+RwexA7sF*v41{0F)xRpt5cN4l17GqOYtHBWzw`63{fVzfblR}$KrBa)kYbUn
z{_+pmdhO;iBopCS40pX~g62kS?OWu-*sxl>Xrtzs<T5H0l&yB*ie%3bUFN?hsoB$H
zGttlqs0c^ic=3ic4D~(AMcoHU;j?RncJ+Nu*WMl^eZ0L9G{Uc6jTIDua(EKW6%_aK
z{vg8MZHZW04-!3e$EfDFYZl}J8VOy18$%ZFk_^UAEY?6yg3mc!rT4@A-=<P4<m$$M
z-`1}%AbrpO{u3Gh=`uZmT?adl1nI&OFV(4wiY=Hww)Rii$gO{XpfIbOS{Cer2)yQT
zv~jNVxJp~DoCfyTfIw?@>ZRiU-rnlwko;qQ4J8Q9nwKtVXkLKbYt^f9v4|QnChLL|
z!4qQ?hWEe!{dMn6{}0wf2p{BM1|m3-v2{}UDnq4CKIbj`a3m#pb-7GglUx56jDQ8M

literal 0
HcmV?d00001

diff --git a/docs/discussions/deepseek-v3-gb200-optimization/images/image4.png b/docs/discussions/deepseek-v3-gb200-optimization/images/image4.png
new file mode 100644
index 0000000000000000000000000000000000000000..04239401edd9bc835b009f540a4a46b9543ad1ee
GIT binary patch
literal 191466
zcmYg&2RPPk-@l?#Sw%%z$&4Z-n+j2O2+5w=n=+ClWRqkSqL4ymQ}&jWQ6$;fo2>um
zdY<?FAMbq}&+#0&b6vmRd7j_z_p{DBNLBd?`GMmHh=_>D735{qiHP<}6A|rNAl-|f
z6ujPj1OG?jd`Urr6#saUnm!^TVkT0Mxv1fhI92ZHpttTiyINehYWMuaMg=j6ybZm>
zp2k7Z%WV+?^zGtStk>=Y+m49DIwi)HX_RXm6+2Q{#+obasgtb77Pn<n88sZ4)A3mN
z#H#n6{r7w1r#2`Ak6*iR&u+<S@yg<=$pupNQ*nX<EHi}%j3?bzr^QzLoPEaJpUkSY
zrAWPHV`opbIxgxm{^iBxKney?m$ErkjYkiRZYNG<TQrn7A8($b<uq6cnD{b(<iTF<
z_}a&eKKt6+wk+v-!^0;xuHZNR=T{B9Io)UaIzHMD<Z?*khx66}shn$DK1YupmA`ha
z+K_MDZB%6LPWgjNC)KZC50YUqSQ*(|xullrrzG1!9B;5!@XWtVscO-RCRG~kTe8J>
ziH9h+Rn~evIrozN&jZMjj_=!E9AH~Y*}nGvnJqq2f$IM}fG6#FrOTay8k*nzqw!d4
zA1-QY-v7+p+1Z(%k+HiY{ZZKR;Ntq+#+Iq6>G;RBl-;sHdyOw$e42FMEQRsGPSuNh
zjU>6I4(zi2#)ETl-fE&Q;^dmL#h!<%E4Sjl2$7nFhX>wcex6{+Aet<_tL5II9+J)a
z6hr!%Q=9hN5<gb@sSbD-cW3^09SN&a&+B(cx>JxP@)4s;C;s@BJUI2a)w9hTv$wUG
zf>Ueyv*lS+2eJ*0>?P(r@b^vgs~eKC6Ej4sf)A?iP&XXgFMF}zXGgwshg+HAaH#$B
zm+6;<({GSYuKoRFDn~c|c}}Y0$QyH&eN~@$BKGSViM;w7cQ;hj#mnruwB-@Abl$8}
zjna}1V~l^9x~WTk%NA%a-zWY*%VkAddr)E>+a6}X;gYbh&O8$>e}8=K$crpaf$Oh!
z=6TO;y6@av(mr4)oB8SA;pY#e<M&*Wk`-CMA+!D6fU4J>^~Z}esR7Xn8_okcd-=RJ
z8Ji@YCt5C4rypKt+K!p?I~5TXmf_C*_scv<Bx%!<@WTS8w2IZ8izM=UMkqLU)|fiB
ze^=i7{+)N@wCn#rf8!qPkMnk}P*zNQc`?+z$4G^wRe$Ong9`IuChHv+FA7>R&KWK;
z->se0?S#sEEUAJFq7`~YMC9LoPIH>n5Blyroy)s$l4>pgtzx}g;OBdP;x88a(D5@d
zP5L+N{2MsDd2EMGI65updOyh|9_)anSwctCM?<~2=i75vB6g+w57x=D%x-hsIrfN9
z4*z+su(yJ&d!7*fxLI<@`lgFmfh5UtY?uLga8~Mr`wr{$e=D~Rax#~suKNx0uKy7d
zO_cFB;d-ukf6yS2)#<~DH_rt{)65uNg>wb^&}}6=e|()L;GLwwF!z30HC4^!_=Q8W
z2fYSqf))!n{ZASCI&m&7&&rK;i4U#!S*p_P(Ns%i5FmOcH%!;)si`?~JcDDt^4X)<
zLvxBhOf=P0lhYD(wayNvaj5NVg)@`CyT>m<5pANXhcc`n%84Br%S$nCrAg^tUd;B>
z_Lch`rxsQ3qo3Klb$f8}?`Yf%@x<CoZSGUQ<0YrZZW*|Gd9L+IevvZr{yty(C&gIG
zq>?&F=8xEaO8R^CkKb!!IvU-_?HLTb$DRI-P_=(Z<;)@RC(%ui-?6XQry~>1)cjm?
zmVIHsH)$^Hj$k%}tQzURt(LZuUGfS_)i!zKs~g?qdt&5C%)Drp|LBlqZlAt-CWh$h
z1=^jzJM3$fn^KM%+Ib0Uy}deuN~hS^%(gnEW?~lBy>fDLs3-y(8ydbu2O1ozX)?01
z3aYJ@{o2`ij+V?Z<+^6<$cWj|W5@POFecjY$IB}z)f)0`jxO`3`nez4{AHP0Kh)c*
zt3@%s|NL(&I<DaRpAA2aI2nD=O^NDQS`vJr`beIY@UsaQ<AUCb^z80K2O~LGJj8Qy
za}QGl;yZe#-Sp0OU(Zi8&^Gk+jCT-zoRy{hMk^{RN>%gxKOH6i`^NIjKiJHOo*Y$u
z;w@_1cZ^6@4IPi??AfM2bZH%HdK<4f5BN#nTVMCQXYn^LjQ40(*41JX|Bjl{cPVbA
zTU_gp==_?Sn|XM7)2xm+Ha60R-r*c;ei+f(*GKPDnWD=10MBQf7ZDRfGseUfT-DTc
z;NZc7{Lk$^=sFhb(T2+1;;r0`y&NQdnUPV~D6d9y=>-*Ic%U@%y$mb$Kn99HM&X?U
z(&M8oEo9l**_O{9W@WLmvqwclP-WT&r~aPzKzEd3IYcA-*}P!TsW^E}TC_rgGq~!{
z9~L~9N^h?P;X^llDwq7!)zz0M(0(hjsCt_W`Q(|Y3|(AeumwW4KOR&MdSv`MPPUhw
zgM*HqUPe<hD?7U~rZr4?aw+>_<qehObqh^%OoGv|vG}J?pWAypXkqe6x$Kk5u+Qg9
zmfNAtlx<3u(v_1QHZ+<q-%RY<@mQvi^AFmYx?8Do@#0Bc8A`Y9k{2(Yf1i?hEk(MY
zf^4U|(%*)eDQ5I1XZbCerfg<f*Go6Fw4Np=n%dYruB(&7%vo=9uPCtl;)QdQoXn`J
zt4qtsXpW7P+9;5!Hs!a~_FiY1Z*~!&CHph)fq$1#SQvM+&~Dzscf}(8<;zB;id4>)
zwl=5fALnO&m2+PR)XI+S*1sG2=4K&Ij8DPBH^CVB2OsEf=&~LrpRtRuP$0S1ZTCe}
zvvz3SVSCf_$IqYp_xfYedu_~DXZojl3j(o`IOOyci*tjQCI5||dh+DSu;=98zrqf`
zi6$l{O6+`gQ5^}fcD=4fgGm+|8roOldazJTm*lzC@jZL?lx_c;q&ae=F?iw0lcO_z
z#nqoa-4i!(a}&C|GG&CX;NO+X)pVW4QCH@&$<TPa{ejAy5?@o(2iD!W9Y1~qcUZF0
zQw%u>3%gCxe)#ZV@`s92r=_*^(6`supDMCfY)#u=S5rIbKO^en?4f0mjdAWJY4@wF
zar$SWj*d=bzVF1#xSmR1Df~hGN!7mF_V$^XnSm7*l6q&4Rj%HxymIww@GtU>J->C_
zw4T3t6MEu;9UnhGi&R{Ed}HLPE3aO^j%=@*n&OE&dn>iLm{3p|Rv6Xl1_tfP5_gjq
zd|fxq=fwLH<KqYaEl&yx2{lO+_u7p1<e5zM<PkRyv6M(NOW(YCB97OfZfIVf;a7<(
z_myx)w)5vTyIXmQE<Ldx;$T@H=<90<ICu<&=;Zz8_3Ocg$Wx}~a>Wk*pP9YAy%WXU
z!{+B5dux;9;_5LBd&)cuDh4)p&y|&Dd^UWJ_sPET_Uq4|+y_rxzkXfBYmLv<)pasi
zMUCe8i4*Hfohs*!hi=X@?c_-k%LInKWsxz9s2?&iG`trXdFWwapqggv)RfI(YHG9P
zsXGIu9`h{I)6=wMd#_%-TJh~RZ_?h$b?vv+7N>r3$f{}Ix@Bf$v@4<YVZU8mNJ3T?
z(9`kb$H)KvHTE!lpy}_obI0g#6l03vfvTOqXR0)Bo}{^7|GnY*(2(G-lAv2^szEXX
z2lu3X|8*{gDqljqleAvA=Fi%qE>HJIEjyWY1(J)-?9~72W=+;oc5cNSXeMiN5NH~v
z{V{s`@m^wETif>^KAaO2tl8WYKN=E{m`LyWtS5A*s%rNIyT0cyUf{Xp^z|9f#&O&E
zwCPG5R#wRoH=bZmX3OkpAAi%X_A09h?MO0(vb3M^MsG&eX=i6=K|#SU0|V`Ib9`>4
z3?a3LIhgX|i(Ds<;vnPLs;Q}c{`&PWC6navpW2Y8t>z!?`u}NI7m$kQ#9qm<lWexD
z`&e*=CDO7_YjFRE>tv<@8wbher|ya~K9)Nb6cps>=7#ERj^pj@>r=F{;(Gb=C3+XC
ztk`=)2<^OZz~@9-N5iL2L@C}Y@i8~m)ekc;FbH0_U{cWQF%o;j^f~j<_WW3uxkd5*
z$Nl4fOX$iiQt$UoxHOTMjFT0)&oWxHC$pVA>F2$%5}BCT^5qLve}BK%zlqGvMc?gq
z1qB5oW8?djQkzuxmXEeS8(Le<@Ppjkvy$t>)L*}T<>cl*#Zz;bBbj58^PJm;r<>4<
zz=`s?%gkN-^0j${liGyE#H6I!EmFMedU|Mau7=0PhL-PG+1rn`#0&J;2;H`}_T!k$
z{A_4=`*yIH`;4Jb!nuUjh4SUD)@84xb`uL%CQimb#TZiPDxDKC3Kn@sf0f^jLw!d^
zM^|SkCA`ypBkr{p(bc83=_57pud9%cmh4qlaQoO$LAzvHc{u}i_2R`{A|fJKf6vNp
zw{59w`=r8?zI#9Sctr9I+e29B$!EPbgl-rv`HvLu+Lk2N*w$u_cWrA^R!$O?y>my@
z)6)}kqBi81m6g?tqN1lt2|@}Cl6O~l&c=nr>EGcxYspwDrNdodu`ZYQ#Z`du5{J2k
z1!~7JDI(SKcVDryi<_IemX@5SrwArpSQrJqB(UEuGdGt)Q<IL7addNi5l3u$Ya{RH
z^wQGOmtVh>FJ0P=a=&%KvLabxOWf(J>!KB&0Vfu-O=Vp=IQ2_!Z{tK~roz>$#`R$g
zm~>yeyRTfivhTBD#K2bnK&^2epSXAn{>J>B@^9VU(aFhe-@ehH6!eN5c9r%k<6Zjf
zrP|xJ`I+S7MmkkBW50jb#LRLOrpI&3r5y^duYP?!Q(ITpc(Usa&C#R$Vq)9}^Dr8*
z^(L;aM?9RuWg}nqZ$DVoW|tN!x%lnHP&NrxaBE{##Ceo3gRvQ(4Vgtm=(G(C0v3OM
z{d&pQSBfH#Si7?1WY>6|-9w+GFJjV?OkM1cG&EzwX5;`-3^CGDHitRBb#*zd%^CIn
z;MHY3apDAe&E<WRU&Z(@{fXb^+!MH$gjrpW%>1CZ-L2(~Eu4FSb0S<npNuf6RXET8
zB(f_!k<iLc{kTdYDIuZSMyPpc(w2Q}bd)r(Ix6^*hMTQC$>xJ*?jY@p<izZT^zUE3
z<g^a{{K<bG<@n@eT7LfF%+AKvgbRL|)b}Fg9|Te|IW3MVEi5jIIQ}77UteF?`tV&c
zRkMF`IX^%D((T(>?_K|_EiCY3O{K*%-)3b2u9WHXN~Wl&s{t>NXNFr7x(}AKr@(^p
zZN<OSK9vR)WAh%CmX^!{0?}z#!oT+Sw_<y*k5MtO1gFle{Irjbj69j$GH`cR;@`i2
zXx+|b(YFN;lkJU9N#Qtm?ojsk(^g)9Kz4-*4<3-%2>rY+Y9V-hf54@d*47tUr!iq4
z1i77-I6v&=|0*k$ljqEt`R$F_=%}cw`Js9?a)74C;o+Sf9T!m|94g7Ir%n;y`}4=x
zXDN-Tqr1BvqYI~dX2uS+b$4}U1cM1jbhI_WbGCf7=<e!Slq`Pdl;meJN89l{nwzQj
z)k#>5U9XeuKE`-m5E#u^C?O@k{sg&sLHxD2v*-BuK6O`ayB;`jV0de8-7CLo4Iim-
z<3?K6X-!Q{G`Bl<?$FZHvx<nsW0q1K2=G`Ri4y@<!0!X&RS*6~)e!1EIQYP&<Ii6i
z`Z$q0`DYz}B)*~vG=vez%E4j6AOB~VHA(<wR$MGXPq9xq)4BSm&c(|4G^VXb_wVnn
z*yx|dp6!;IbyQKItgf!6p`n?Zw)aiIfbO-aGcRzQ_{v82j(W-2Sfb2f@IKi}kaO)(
zTGGnvBvWs+xcBehAGMb}Rde!ah%BRTGG)w@C+x3{g!r)!$Ij{*8X5T&{<;&=(xT`&
z+h<ou7R4y#lZZtXRKl5x+L>e8CQeIBd;IvZs+QKn3@gG2`SIf_+B@Wl)OYWwJVX!i
znC51k4hab%@xKUIXGYa#%HR0l^>Z?vNn$#(y<U<``fuaX(^;q7O4Eh=Uh%~%VzN1o
zG#yb+6u#){dVcPK?n`vOl=k`)_GO=eH1Ib-E(|~D7JDwTqs5z=oBvt9Gch~Mapp`Y
z0pVd$@bp|<T<8ma0F!9;>{fBVgD3?hB$S+-__eY%)hi!8cs=LW^+U)1voz~rqdRww
z7xXHstG_q4NpokzB;Oun+V;aKtXM6dt?j)$NX*8{I`_uj`(XBW0uaMDrR;2ukx@{n
z>*xe^Sf&>gP~!=+-&QmYRtKul1f03~aV|P#^8uhM#sU{7XGF;<Q$oM7k~N9$V+`L1
zYB;Z*M4jgs5U>Xa5>Hz~Ns?bu`QbxsG>3}1zW&+L6W)?c7=d`2mDz#zFb1(lGAuSf
zKZefw?re~elb;h4yJl|A-rCx#sI0tDcf#J`_orP%6Dfp|5)`yQI3#4KL@-qD6oI0g
zITINb#j>nKzq;;Kq<40LxK_WQ!_w5uEC7cJc*jvR>BO(LtgNP*8WO3^K}y0YMUBJ;
z5Dz*${rvf&+?6Z6fR1k~w@dUAfEQD|7WVu5`#agvXzS~b&h(eIw6`}8Kh3cEM@AqS
z!Bm>0@$vB*nwmxhy~4iRo6g5XB1g*ZuG;C{H<`A;`+^85s;HRcX*aaB)!7y9)U9=Q
zYdxLNW=gI7n3hHySbfpWZRMf!ZIas9?#WQ+-7Jx$C|-M?#RLpmCnu-1^?If>p7kLW
zL&3V%r{fb7FJ8Rx%g^V>af*(P{zR?6L;!-A9|{WlhlYlV`t5j*9>mnM8K_8_n6R1|
zs0{C(tVJ7Dx~8rk)z|Q`un@T9@y}^?dWz_nn3^YNZlaj$AC4|+Y8>Ga-_P@Qx2^^t
zW&|2r)5;Jw?W}&;+HmQ)BNI5<eMOG`D3M=Yi%L$;&N{bl#UAl^mSL2q?XfoZA}50^
zuo@N85ITZ-S5Q<O{_(*YE$t8~38Xd?jJlY(xNCPs#Kf8b)!AyF^xJW8bBCO|5(eh@
z{sqJJ&(38<MYI;3ds?ekD^?}2{Fw4sy|)P-1_1V&x~chBG&Rx7!B-qLbbJ>t`1-e+
zpTcM0^kJ<*e5$H0{hge&va`#`%-omK9@uK0R#uiMad!n}<TozhwmNg`$B!RqVz08U
z-qt<-(|yL&nRU0^wZ3Irnlnd^SlvHh_$KMd#Ys$%dfT?z7SeP6S2U!awVG#TWtn~w
zkdR0MZmz4VV?Cqg>?}Y4j5IXw-o5)YJj|Mv6$Ju;H^<ZURd^@hz>)1GM%y9KAyW&B
zW@%Le119_m-e_ob1JgehfCK}Ay!}IYSsAt@EKTt6$4=wCcV^8;(dq8Jdi+K!n<wLt
z^_xc1)K{<MwNm%_U(C_Uo@%1rvumij`n}ITF8iWSIYe^WIu6b&1?}>;_N(cte)^t$
zBR)CV9ETq5236?^LS*{9Z*0*Kum5jjW8^#tu=8+3r0|cF&_@o!Eq#4@dWW??O}kxJ
zRh7PVD@l=v7|ha0@c7W^>5GEG&iFM^QPEdfSyf|;sK$dbTS^J7bp)((h-`0TbF&F3
zHeMGekB*LxUefC+$QG7aTTidXM(Db>b^}Tk?;ABai&IsEZU#KA)S2F5ZEbzwJtc`+
ze_~Qnte@bnJiC#xv1?h<N}T|Rpj`Ztl5N0Ea%qt$O-4yCc{#bg9LkBzXU-`7lb%bz
z{JgR<CFB@yRCM%Z^fpUNQl*f#{k?f6`>^VSEqMK!3p^H38NlWWbwW7tUutS<fVx0X
zD{xweV!h$fQSdsZD;C8a%qzoog*VyRYUiOAfYu&4as<HFMDVy7l^_?_{^-UU$PMTc
zFJH2G&IVAL3LbB4ZH1OYMYi|lGyzfqZh$z;$;sipS<jt|M3vsYeY?naN0OMB`0I@~
zuU<W`sL<z^B-y=-QQSlN#*JeI1qEQVc>LZE*2nODC+@U9usr$j=}YQ^{+~Z#(I6{*
zy8WR^hYE!NBn)5zoko>Y2EU886B8S&;CuSiDgVch$+%A70SS;DbO7b=A<meQkuf}#
zLA;NG@4|&ECMKsA7Z;-@OlE(TtH+(xec%GQApVn!FuhH0en66#yWM!a@*f@d(*Cur
z?M!23u=w=!bX8s5)6{iGL-xcq*8(F!vdPKeX)zZ7E{~GqIviwJ&<l3KMoV@$NJdrj
zNa4@cKqepg+qX}<l_uc`U^o$u|I*iM!y_XdU%q_m>3Pb-@;W=)WUMXeYs&JS^1d?9
z=)pmwGX1xG)(|fe%F4>fZ7r$?`LGlJsCQ#UYieY|=tVA>nXzGR|56F8_Nz7iJhq5)
zXO?>EaM0X%$Ejxt30%Cqad|4znV-G_$vy)8wzWMI8ygF03~)Z>XWq%gcNrP%_@}4P
zo{18xpY`4Q&Yfoja3d;8Ac*YI`!hQgZrx%4-EpK?mO3)2c6>9VMA0W9Pyc?Cr64~5
zIYx$k#X7rkCbfs->`fcZcb^R(ZBTzdYkWu`UJ*y-3imFSLzf0Kvk%q^WJ=D^7W-_8
z0s*1<H~@m`?BU4TKRP-}d;GXDC<?X=`wkVVW@w&IRP^*~P8o;a7^%WZ%6Q-V=v(FG
zw?4K!iHR}H(<X3jQ&SS`Fh=~at1U@Q6SZuR1s%U!G|OL;eH;P;*G03ctZu!vZb5+x
z2wc|GJVH3>mA*`}Y7MB*&I~2cjNKjg?nk^t*S&Bd4i#|c?p+;iZ8!m-4qV*a)w8Re
zy}ew-F1@|I&<33btM-VBi9x^vCq5@Gu6XU*L9o}M`fw@tm9HTWKa_BtzAQ_1z3do8
zAh?e@h4DT98uwknGn|>7T(9%dcGy1LBW(&^q@bWM=8p$BKgG+dqP4fU-wyqgmWheu
z{Q20jGI2abvD3%_3_T15>H`6mwzeZx0SBY@s%zYDZD}`YR!pz5Ibw3(!z-^Nj`z8j
zNvUMOty|U>Cr`fxoW$@4^~`^!q)K&1oDrg{!nJGBnveGSU(^!1yyW?=*G8M?C@O*T
z?Abajjj(V7IyEFr0M2H^N$9J7(nOU5?VzMjlartLmAU`&3<Mg&6Rtr-aV+gegNRB>
zQUW^z{6ppH-<|&8+TD5Vh@;yR4$Ys|X7QllVzXubshs@Mshq*-H;(uny@#3BF#Je9
z9lwB9+aaj*x6s!tpz$>qiyX<~kwbzRb@i5&Zb|GRI!*l(E(8LB-f}kXnmV;!saq(H
zsv<kJnVA_TMd$bLwK$*lm0KSZ&lMb^;%?S1r|CbQ7sI)af}WY40?1<c(=AO+w1bmW
zq!B48ya$P8*cQw0u9_YqkfNvY@j)9~s4>jP%a<<$Lq_3&8yZOAKLN`KoIlUT6^EI?
z7Tbl>$Hufu=po?OwY9ZGOUujW&Yur}-jtDHUOjG}lN9>lgIt<g#ZgIHXfCrA8)t!}
zn7Kdw{hO83)s9X;Pf>tjh?b2r^f)ZcX=BBf2$~Y->C;jD+kVp0KYCHm*d;7ldSxY3
zc)dTA<7u}7&qegMx9MIADk>ac5zx=u5=Ab9a$=sK%XMH8AVr{qc}-gzgruq7?KqU3
zkk?<k*Z*Yh0l;<VrExW#$EQrWR%p>6ajGzRA^)MdqT+@tOfU+;hj#lB6&Dv<K9l34
zHFa`|!o<Qd<0q`FtRUQv>xW)sXNNR4D&!XwKuCbHa10OFJVGGRoWYkaUnVZJ>#v$!
zh2T~VHA+H4qNSxpo{3JVZrfA>QjPtAcv+T17~DWR(bic&&s3!NT%M53!-o$8auC4j
zvuCG6WNUoB4?4g!(9zQ)No}-ND^HGzk%3r&{ULnfR+nHXW(S}saEzQ9O&F6@SihYG
zW^8As3VN@Co!wb*#HE#$CPO=K?{crR7w(IjytXx2cCwwqxX8EtDYX8>kgvfzV7%&(
zEF(DZ=qEqo{AN8GvhdJQSPL(w5AOB<+7_JNi1z>!#6d7A{7Kl9^mLNI>d%-VOx{a}
zK$me|4wI4;p+Es#0I08CyM~@M{MmmWkmfJ%)qZkKhr+IXy7s;-OIh#UMF2@)P1&Or
z^8IW6X3QBKku|cj3xixe=palW&^O-ddw<R2Ics=|^DzZO<r%6vUD~w|XT~P?w3U{Y
z;(bvrfoKHIJKNstI+~hhb;Zy+0b^EX`kQeC-n@B(-UMB{r^Izq!1iKJb>WI`io0#)
z)S6sv!ORub!!JNAN=izK4)h&6^ep3?h)Cn#zo*%wg8+sbKZuEn1~;1ShOnXY(Vdv~
zaoyMxn-h=aat$im(A{|}H~D2{W%Vk&L}mo?rUx+#AitOhCB#2{`nz6g`y6Tpv~giH
z!QMDe+tSW%^y|xOORK8|u9I59!NKD*Gthe-rh3l7Z9Yr9IM$vrh8b@2tIV^Sr1c1!
z=(p=%$OFy@T|NAX^^eQZrwPmN{>GoGy&bvh`rv!1g8~8q{7*$OXHb26{ryHq&4uiL
zwKPU^pap-mI((S}XaW5?Vu=qr=&aY682sXQWo0b{7<NuVq7^lbxr{^BIFl{4<}Ve|
zUd55(b?NS1vGeE8W6GOVYm9=LVj!}SH^FJa$ODrbAAhB&8`QM$;YhCa$<twl_5*D#
ziQTzl|C<G9A;YXrgUE)Vih4v_0P!~wx>D?lea2&Ay1jEAQwB|b{W=Kyd`<}Eu(g39
zUu`Mq1a|xE*)vNU8`F)IX$S=+rKS4=tLZ_lv$9SDV?oJ$nfD=7XqD9LS^=$wbns$r
z@(vItx7Y<>9e^!EK2GjX*|?<VbkU8&^(UesAsz~nK{says)H9-+z6zIF3Ws-AJ5T{
z^xKalt~ffLBgixmtMGc1K6Aw4jFhimzxLYrIZ!v^L=Z@N3oNjs1i9+(-`j*?L`OF`
z5@#@q|3OAjQ&+Dlb{<P_F^T5m<)>j2<uJZ|`+%lqo;DBvA-3I_gATOW-_g{r(wE0a
z$ol$Ll6f553Qa0z%ZjS6uZK#?K}A}l$s7DzoaoRy(awhE215t4J+nCp1X2ev4p?ce
z-gol~&`5e%;H8sre2CU&+$*mEHvwTCJa(1<Y9OifpozyhT36CMs5XlKM8!`N``g6C
zB>#sv;ICT%DP6oGJ9@;#)Ktyw59+F_%>IJVPB1(0LlIafXlza@%x$~W#7av_t^(R%
z3*aYpVA#NSpgnQI1buDlXCWmD9^X`SXZbX$i(vm?06u*9aLz-#Y@vyp@B?{PLiZz_
z=dL$5H8eCDs<J}wfg1MSI(8u7zup|`7U79}bzv?pg{G^Uz0+<`U;_qbGgDLVfBUAf
z+wWe^fs~}Ak+g70v`{EVFdV*p|LzDXPfrm@#>&RQvDkIf9_+me^(wVJA8wZUeRgMi
z7Fyb8sGrcv=^)*s>E*l6>Z7Zh7JdP5l~-2I$jS;9uxNww+6Hv=DmVAZ=3ib>@MttP
z$e3`oe0=m5zJ!N`U1d;IP>57`rM)2nJY!+Op`3Z$%>&wON7|M3^>*L3k><Dw;1nv-
zFCf%t$ndLz(gFnI73oD?9^u3aDh;9Y=@mQedE4ELZ#T8*j~B3jH<eaengBFnBXok$
zpm=!~lV-gFQ&U;M(qU8lUYY*+{{8!i_;@<|W&=Y@-Iyj1D=URIQEH#Yxw&`m1EJaH
zKNCM3<S<l64yzclFJzC%*w|VE-20`LQ=a!tw2C7b>&q<PPys@VFh^uW#3z(IHpK!e
zJiZIMi5$EHj3uwl`Fd#4gt`PiihuSD;PC<MK&ORa1r#cV9Hr#iy@wC?ezfVmq^tYm
zAOkNIm0&K$>w*t24^P9)W8ej?8#nSm!eB6#%h=oV;$tnJv6l+zdyX8Tvp6KF%EQ)+
zugB{S4-czqXwY2oHWNw^@mxqx&)XP|R-vV({WQC}*lO<+4VemLOBlj3jIH+{KQ8>R
zew&w{Uj;5`dc^?43_2(#TPM~WoEp-3tmJYC1^A)F`QxFAN=lFyAQ1viT<ym~eKyVm
zmq#Of{_>@)<i4$~Et?<jgHXuDln}xRh#vX^Y&^i;=vs-W-PX25*P+oFNIp=`Z5@9*
zolEtZOKtkj-0zEeXXEr-YR6<$F=q%ILrEz#owpc15EjC*EG>`fBDGp8)!j3Y(VQA{
zz3rz{!YmNv!NQ=K9)ysE;aKs7cv2=|mDgwfpw1aiq2rJx$;ilvIyM$?nCHJ<<E=fT
zKY6h;b=NYy1GGX;E-oW9)co5L+Xc_c#$a!vi$gIbJnfqaP!_OW2*tgy43(6W(9CN^
z*#A@lzwCjmjblXc)5#td?TY0UC83;zuaVbln?w6%Gl(Io>fY{1nWEKRj>jjc*ZQOF
zC7pXxUM*3DjpK;9+`~v}^CRBpP}=Xc(8xg$V_~HcHgmlkDraMYEflJjmUJ}wuV2+6
zv9Q%DdwWY*v?T^v7aZ~GLFwTrsOjmoB-mE~HF(kcu7*%t%v=jG*xa4c&M{jF#)3%=
zmx=l6$vF<+1rEpRJ1K=)T)~&H!x$SbE-nqyATp;*-DjUHvX};hb#T5G15N`}gys&2
z1al%<kO#xc#oc|#tu*C8--egulejnqdHLTpLDV#{F;OC2A^+y@QWe~IY$-$DaFE^L
zq6E7+$)hF_%}|QBJH72bkY?fEQ!t7*0L_}EVrM}@2@@<k`<mS^St6c~1<r5uNw<#7
z?(x5fNPrA*EGPiDA4-Rtn;XMA9vm0lV`OAR{La!NybvB?sBLWdj*hNwm2sZx_3QrC
zgM_3-O-&8CiG`--OBDa=ue&Pj7w|Ti&FJ7n9m8hI$`k}=>(4`ZBo4r&MBHcSG0|}9
zqn<vcy^_QxIp$WnKB3}Eu)W-oeS;hkUR)Q%EBWP<VX(S3mvggvrR=+HK)225f$cN;
zZ*NE*G?wc^=t4O&8Y&#9$obRU+|XZs@@=Pn`E0QH=a{0RV#9sPg5DPJ38}Tultj=X
zhADUED0|;qT~D+t>9>QN*RZx$l+J&6|0HBHkY|E`)17-OKiCN+E{8#ZqfJB*tSqd-
z8?o&#td2wHcX4$Ef8tD`Q=?_uE5pIcN|0euL+M@poEJqdmU>{Bi)Iw=sYq>U{OCnN
zR;+w@TdO9yn?Dkb|4iu*z;6I3b+>(#jHVC+0d?m-?_+Y9>^cL$P0*;QlAho&KqM@H
zH3R(`;Gc(wM}8wa>vTXs0AXt;yWhfz9PteVZJ1phT3mM!rY9gWcrJiXm$kHxel~Qd
z9v>yByLfw3Coo~0EP`oDPEMZx)qtk!(U@L~fVk^l8p)LKZUf&WltKJ3RLYlVt5^>m
zFRCCJVW(kIg2Cu>{nDlTiP8Ha;^G1uO{?q*3#>ZXMZ~OjwzvAq?_LPotFmT`k&Tal
zR*5FV!pj?u#!D_>t<Gl()$s|8$X+%MOA^en9madHu}AcMw_I^LIk|Lh>gt+y_klUR
zx1F56+4Zl)x;xK7Skcw>L%Jv89PC_NK?vkDH8(f5w8(S{5?B;BcUp0=&e7vRQk4U|
zufNnVyZL{<*Kaq#CmHelyr}z3qBy(pKJ4UBQ!F<y19;YYwbbTij4vjpxQ{xWg7`GH
zGhMnmud|v6BNf^<fH4%vBAu&|@+2q%*btu$sX}Tm;qana1FfiQYd`9@s~>d0xv&26
zB_h$Aqm7AdFKWRUqza{?oGcEN>QQ>%57K-ZB(yfGrKm_rhJV$7*LHSZ#+0S{o@wtj
zbi&D=m)4#nAiH{yAcL`=CErVY=FAz)12_wW)|T>BC7!64R8BQ8Y(Or@RsBIlFtZUK
z?JyuTPQlzlGw*eE^@VS$;%cu66c%Lwi(W>JhF?Gc?J_&(?}N<13}UF}+Y7uqQNCMm
zcb2cumT%0IZmy^7h=2_`9(nr5UG|GZn$7JUE~<l0oHS9Jld0s3vH=Y%W3aegL`Qyq
zgDwK^3G~Y89L;o1o^8|dXX(GGR5gx-TZG-bK*0Sy72aadfUpJJvX@>&3@<(={~b_m
zw0?pj&>tWf?l1gPEI$F(s6M)v{7TvG_H9Ct1Xc+KrHq|jF4=f_1HWDfaM9`0r}+{T
zb9AR^1GqZw@uiO*OBJ`94`(<)dqVtJj98UE*iLHJX#ndHBsY=JK(eYAM=w()<;jt$
zyl=c!qkz>q1QmkERguh}=0q*P8lC~%1@_W>q3=dvlrc9*6$+B>{ZCS=#;~ZU!#Z5(
zXV8f2CS0OoV`Z@t&bw;1!j!r~x{cqcYsTW_LC7X}(gbtlh)6i2#Gi$cX0!+J&6r9L
zp_+&*X&sL`EMsV{rK*1&bdb;WoWJ?~`*#BAhYg2($ScU5uZo^xnnZ<Oq#04D(kFf|
zPjcq4=Sga2<6jRWZgA2V+t?_oJCU|6#Kgp8dWCGP&USwLhV?m%Qi@{O`$s^!?_FC@
zsISeWXw6R@G-)oY@tV0S+24;}7~r5K1Ll>Ml?}-G&CSL20@Vv;9#ujl^zD-wh9&$?
zC_|Vm3RcfzW7)=DfZNuvy^xWfU7F}bcn;9^E##7RtV^PSIUnuQl$0?fjBwQ6S$CiP
z`}Z&DUjO+;`kEivvIYrf%r!3G0}G|oKKy;WN)o#Z_Jcu2aD+COGNsrc!UF%HkG+Ns
zaHm|&kUyRiUYLSD#HF=0VaR|;9ze8&$qP~PwR`Hd6+9qjBJ4!MnkxE0u|-r0I`-h)
zVD(eQ4B_0ngTz60V$!*xEMM3~LvWnlS88RS&lTf297OmnfU?!uflLIL@X=~GSVl%K
z66x{|{W(}gP`r>@V7DDWr%%jdd3fXCfNGDbQs?;i`0u*V6Rd1(^LYJ+Pj1+G0d}|P
zAGH{lU}os0`9O_l4Js7f-3zCvs2<%)2~eVXf4ZtIc=%O>_tmQhaIlbxSg2;&CLjpd
zA`MMVsU;=H(>jV?xOAEq)b{E?2Zjd?M?HV7%DljcKYrNz$jTy?9Xdk+yfK0{XqMDH
zIZ{n2MG#_cmAa{^szOB~5RgPs*I+bC#9EiWB};uY(Pfnevm=W+Aa+nCIM{8buW@;@
zTg3aHAY$EwU5D5M0oVBO58fx01&lI8dM*0^*s(+ZjX43_c!t+{)0B8ZBElr!PqLdZ
z*2AT?X@TLJc`JFmH6E?{8HW^grJW5wlYY40@zOvoAzyPbSpXIptOaC0AmDF)H`qGD
z#>Pf-?AUb;4OZ&YC#-x~IL`-=nVXvfo&v`Gb`kxLG4%}Y*}{U86>)uC9bxhSJ9%wQ
z8pKubG6#N3k@S9+mv;{Rt{&FgpUv0W#UXaL6I%Dmls=Nlxj25?M!W?Qj!8WQA=0)n
zTd9IIC%{BAvpm>Bgs^}t7cW0Vcsz^Kb_c`)g6*ew7Qvo-uM};B63TWkx(F0on)wq@
z0G1S)45`5+_g^oNTEX7H+fq<gHU}O2n&QV%d*IGKWQq+ObI8BtZcQRX@$zNB(vl1O
zy%FOxrzOxCQAaqmZc{y<VFI8Pq!tuJqu7uK=*c$=(R1kbr=9!A2(JTj>*QCLTDzF!
zE8+PaCubsI7(<SMV*>f+#EtUx)QhYm`abJsRfF%yhBv1yq@w*ZW$%7;SQ>FZ6`45{
zBSiWy&hyLHmhm21iqa&QJ|M}%bzrTI(1QrJ+lBMu9p_F4E-#%~Sr3z8ft~?HYTaUI
zDh0{GCM3GR2QX5iKpNpupU%D}w7Y7M4^k>YpJ$LwAphyRr)Sp@4JM75z98QXp-08_
z)(lM__R!r|t{-u#-D8E23={>A@?S+SV)6zIxC?sPqI$YnJ}>{xej61RcLgdq=mCKO
zVoCxbi(y&#{P)&A)U~O>M7wqC7J+?5F<#cl2s+E*_u;L{$j^ZR6?gXw$bn7um)_7i
zpOK#a<@<NpD^~(@9#D7ZxiK87H*pnm8{#OT&CbdKq7nC)y9c?75E&czCbRzJs=k5-
z7yr#c1^-=#l<uAsla_gD0+<G*jDdiQ3?4gMs|1jY36bw#1+Vw{qYSI}g@w+r9dfj|
z3EfnIMos57e>{4cqs!rEM;YYlC<2E(9-B}bTaVCfB+7L4>^-NwHC;H6fqt^R?Sn%N
zIg6c*EdbKM)v&QpqH|Ic!;R6>F(rR3q&Dcv%gd+wiV4(?%Q>>=aQ|+AKNDmq{{6dy
zbCCRFWB-qQ7}NZGeA2)agdw82*`C99<o2SrPhL`u_1G@DP&pim|1tI<CfW^Dgl1R~
zY=7k18c&ZOV=&fl!|XS!Ux8$g<O(BX`?B@X1kmf|2M@G2u&=;$g6Gd8Q+CQCDv0O@
zTs}my*kRH_?1h3sibPFI%X7{Hhz{@<f9Tp5>|LH*k$X!eBsh5OM|VYCf2lhi6H_K!
zL<ChaSw%eNn1M76er#>Aoa`z>N5?=hC7K!vmoj&AdxDokhc{DIWIbF{;sFB<^xnRF
z`6x6)N9!cgo{X0-4;=_dPXx+E6bs^gGfDoMa!yO~$ZCy`K5H+8TxCRd5aj^gbR2D=
z)3L4a-Psn_@;gQcW&qj6$^uChsLaL?dkJvi=FNl5R;02Ej*gCz+&6Oh1O;W_Y~jp(
zvJ`BRPzJbyE1{vKm2W?QbZZ=sFl>N9EE<%R+C6P1A*9`le#YI}9;pdG3+)Mp3*HkY
zMUXBrxq!462~+UdvyzjhTwT?+KR>G7yvfb=ZkqsCH8nlAmb*v1`QWdNw;J-P>gn;A
zaKWbmL}HY<!y3pJQf|de^(gH<wV-P%HUEn4p9AY%;}g0`M@sM*jW_MfWv^eSMnNLB
z@fB$$Ev+Y0+c%r5=2I_q3W8z+Hb&Iv-YS&?>j%IwYkqPDbNT8`Hm?XEquzB5UEScH
z)8+`RuB^DlYYWkmA?S<SK`Jn8!Oo@zDh+=8{;;QUa=pOX)!BAe#G|`l&kGRGp!gBO
zxA2|;TA|n&jQ6PU)F=vk{)4#;W#T_#F3Jb$m&(kA2b8Q81Ylp>2A{tzS)v&ft)aa=
z6M`lb#$E*$FJ>eOIxL5=WdDhM_}rut^S?Zv4c-Q+GwSCCM!8c5&6{q}<&&TQP-oP1
zbQHiwfKUnk9s*>LUqF8Ue7|83OSk+1ni;)_(}R@lrA)8Ign^j)oh6l>m|5@HyRhjb
zHND?`{Cjf6fMK3$^{Q#ciQ~tCB@DsEcsC`e3+x`tr41{ck5|keVsR7TcwW2t_5rb0
zoY&*CVbuEr0nAfjd$;9j(gZ+qfGqd=)vHI>nk1G_SSJtktUgEt-XBi9k6ICcR7;4>
z0TbW_2!yX|IFu-@gLXQqL-73r5*n!v64FnnpV|ruhLGF#9$PJ6p7Q?^h|UiK_0!Z;
zjyQWFW2B&7_^%<IF`Av+txX^ughbG4uLIJ<1tR2Hg@nF-`68{Q)g>@J6mkI3J~2{6
z%`~{{wPf~-x6g!_YU=3F2_-1m9Zpi_z~n;Y>GvXTs|Za__)gumrk*rD+KL7MN12*K
zxk2X`Ha6pJWjM`~#?`w+=JzalEq9Zo?%3WJyYc=Pag^vq4bl+u1jMlcXdvXrBN<7^
zR{{R(@!j-tM;&4MLaaCa{c~~|N(&pu@}2!7?P+l=JunAOTajV{0vJD;o(7Q$$rc(Q
zRi%+tr|F$uDe_A(9@WTE$tx=29J^tQd=2`6j?zArB}*`&gbVgxk@rOk2h=t?;GDW#
z&furuX_BC1#flHf5-`>Q@Gx9F#**B%b#z`pD?wMB|NWU5EQ#&bXF0z)KO`xg4!MXK
zmxsiD$Ho-7O&^D%U-s{Bc2u&OYC43a2E&{=Kbb0ZH5v#C*y=wo<Me|MK9gv!%SCKP
zq7x1Y966#ousbOE$iM`GF-HUbik0ZQJR%C4JMaJbxgJMAQYg9kiBLJT0%52U04SI_
z5lCzZdkY*71yD8=JU*g$^rZF0&H}f(t2T3+p8U7&+Bi9>tDBX>&d<;Ph~j{NxUjGQ
zoZqARy|Jq8hmDZV6tueJXX_`I27xjmc@hEy6R%7{q@ikdyyx6fGd505Jc%nD!!|jv
z-#mT%3`X1Q7^$u2z}TT`5C}PpEBH^ao4};enwp?m{VDyY??B3AX-m)MrkwUXh@O^~
zSxPDe9a3K6=>97gZ<-eVM6wH930EW>M3Wqyj=H?DUgc>2+GtVEL=i{_&)wBE7C1A{
zqMZ>Ai09mIKfQyn>i(3_>bqcyzCLAa8?x*t<DT};2&n@GF}H@TqFx(zRI{JoE;x*O
zzDOz};U%7wlmrFb;ZIFa(I5X0+Y<$wy~{Q^9y2l=Iw*LAI1ppQ=D<Oqz}NxOhQ#eh
zex6vSLEU97L?*I&Fe0x75e|%hMdpyfW$=Y3PvpSZkcj~}!#d#mVSpmJwG%`64XSqC
zQ=jz9_4XpkAA}G5ebU$0m-)Eh-Yb1t28Nf&zayG~CIs!BFjMCL$U<bq<j#2aZZ7_E
z2gEP?viZF!2nr4CY_G!jp!8k3;>-H8#5Z<lFGKr7Qkh3hmB+R!-n_YYD2B5At?;d}
z9(wJ=iS$1`AAR~IvGC5pMsj0H04GVrXR|1AF;Bl%Z)&yXuJy^?bH<;#W;fervBy_&
z>_KtRcOc9Q(cB`cvB^14er91Rv@q3b?(xP6?tQXqoOi@MlwB4{eZB$LX=!N@T+$;`
zlS#Zn(%U&_1=RvPXZl2NA;%LHLXh<<Dw6SmB8pCFu6eahELs-=DtwYR4wWjXC@QkT
z{lQS^4a>Fa7>ofR11fJ0miM#Rsw=oJMy2#VQN(#K2H3`QA)B|fF#RaU>`r2U8I!K%
z#Eo)>%J16T*Dgk~OyzuGLW&#n07Rt5IL`qTK}1GU>j{P&kPUnkkcLA@)9}k*5D+*O
z_*#lGWo#WwO#o;xT7G#9kpt~CG&U0MCCz&fVj6&&)f476BSt)LOs1Z32`v%U9y2h1
zq1#5NzSV(h*E2$3uct?z5Z+Zz{50rL<F21udQ@urhE5^R#qEbh$*`a$*M1WNUwf?f
zy8}vKoaeoNUqiSqvhFpRI9OYE$4y8pIj=(A+L{~43r>fBbs%MQT->oMX7DRLC*KsZ
zrU;31^%Nn40uDnj=BCL0qMKXqpBdE{QrCnWA8vxcz`X?#lyzU;>hf}9yyA)ddAsJd
z@6dF7IAm>MVQ#)QS4|0}-Eg478%Ah-qq;`<=<qP)cZ5(a5MlvMfQg5<L*+G{^<U04
zDWAOAu60{CM(H=`9-*ng9|TWq!I;OR6QWm;g2}Bd0(fNIrEQ+;u>NhwKnDOoqa$z@
z^d8|2x*(acy<qNp_wFTV!i%mrRiIc%a`5x>CqFAJW=t*|O95Z^r<7zy(2Afg5iaOp
z<_goBo0`f{MEsK1&Q3^8&}=xC>gRfq_i0qrB?X0`sY^_BbVir~cyM?%$O{7E+4U7&
z1eb={jg&Md7y{^l)hDeYjo%uTcjt8%9vyVhW*5T{`HYt!q&Q(B_Vxzh+McR{!lTL?
z_a9zxYo(bq=cGwRzs1=m<hh{VQzfk;*b$n_iSQFVOQiAt;JVQN#GrME4&Sx!PT=_G
znS1UiHmGc8djrO5^T{djYjlvadPSJ9HuU6ka-V+8#P^&R=-NRVMX$sq2$$O66hdsm
zXj2F{GhT1tyR}^8?dNfnfL%bm0ZE@I`@KZVIAKg&M-h1W<IMg?t!)N7%Lee>>cg2h
zfRGWKMZae~FAG#gPk-dD{`?&YDCY=Ia-BZSA|kTRbvt!A*Lm8SLB+&Yy?Mk*;a^0k
z(}0ZGchFX>!rtV{pu4BxRBI5w=HlXPY+QIpI!7uRfm{;=xsiMQ%fh<d+g(3Mg5<>!
zV&X%R@8z{Pgr8Vo5K@@r5~jxGFWr|qJ#Ncg6TGlcbYGjgKKT|on~M>MDQ*QguT1Gc
zD8NgDuaibM%E-#rUe~mlY~)!fn`|t4^@=pESr+?72r57|hGcrR7-+}dP)ClN89a)W
zJW6=mvl89EvAsP`^9B^N4BQT=98kg4EEw1DFnU9&Zmw^ZEt^Qtad2=Trznquh;oWf
z5(9F1B}ZILH}R^=R696R`SND@r!Vg<NAjL-u<a%?E*P2jD8j8NA^;YM8qQn)T%j^V
z^y4_eaVz%Ssi4nIPEJ<FSHZOhQ*yqg+d9^}^r2BjDa0)o-D(k&->$!;7SIMyPe|JE
z^It;BnUH$_i?hVS!Ewcj^=)||5))<)4parbk0G4q<we2~pMCrN_Di^)m`sFxZ>xE=
zT_LXORTn9}Mph>)>wy1lZH~ql5|S&XX(e4<!Q__cH4xLC<Q-s+L-&Tx4PFS5^F)`y
zxA&I&E~0DcU5U<+wJ3TV8tS;dV5U$?K!@YwmTM~`JpY>o=qo?V!;}90{nBfqX$aTp
zCW!0AX)xziZk7FB(~W;*XortSTAFZ5J37=52Ze?Vc@j%<p@nK`8B7c~3>S=wdbxrR
zGc<W&*^oUONfbR-xF@-908$(h&jgtpB(%VyT^%>EaGv256KqHk`Q7ac7Tq)TH6KW~
z@E|qj1q#9M5%@*olHfMtf|PPv=&J<d0lk|y5q3U2H|zuF1qKI{sLwV+(02(MP+|&_
zo4~}+b8-k5FwtX*90qs8c>}r7y?GOvoMAXFP7!ZEjolL1g8_<#Lj@C}V^^5GaS8*-
z_6AlPK_2<<J7t{)7g^8`xeF8llZ6_x4AyWS&W7r(TdfdGaBnAy0@t@vIS-)s5ky&R
zy+ZUd?pEUE+4ks6Jr?(Hi|o2{C)?^crYICE0`mucKzbh29WE=3DS+x`ifO!Fq`b?h
z4q?CQvofrZ7K8$aFde~|_bAtg4FP=yd4*so^HBSd&>jI0b~3QykB3ctH|w3<&yTi_
zm0LUvdb}%a+pUd_Cowy49^h)SG2B2s!N7u<On2j_<-%i`)Wz4R!4Z0Gk3T0gfqG_t
zNr3u2$gqN9%Ll^+YRIz;T$uB#9>gFM*OWg-ws)i~1LjmYG434>9Ier!+|L<&xce&?
z^J&*TN&(+*6Qojn0h$jXVQ0o{?d3&Z+JBZ?IK{#tAT2Q^B|O9GL~hmA1#+Wd&?j)m
zwuic#Ap3BoFd^up8<)Itjbu;0E4ez%RTo|Fm0~TlWBjhGs_K3Y&zJX{Dw9)FQ`a^3
z0!~#9>}a?SV+&xNQ7rW5nFNyYD=8}*^U-SG+S=*8yU1CZ#Xq}Kuv%;qs&M-CN$UOk
zsV<R{T)NME`Vi?}viJ9cQ^^l3T)ptd<={sieV{%K)fXS)kA7UrZeVU`e<;ntOjSo@
zB!5Z%(xq4Fr_Z;$i&k%MIsQ#7Dg3MTy|~a$GwbwN=~0%#w&Ycxm4cLWwTG_If3BBY
zId_3QCw#lXR8g^$Adh*7OaA^mQ}eixCxmdT1I(;+K1*sbzMb#xicR-Fg2JnDI<`ao
z;{bzEqKU}Mm{epcoZCbKT_?Npz*}z=1Iml+Y%ccKybNUFX->_^c!+;(A^);)(N>(%
zy<PL)EDns<AL<k$Lqg`Gub8?yzc0OoTPw$5_W8=n$}UF-*zMWb*3$C3n)3l;QcFvJ
zdh-!36v=kG5`0ETHIq-I*3+2GtRiJ3*8-<T|3=fd5u1F^(zJXgB;A8arvynVNajA&
z8;A%A^Q$r}gY5Q6=dF(tSZ*=Gf*ykb$4E(bkC(K~*E6*T9|0PPyZX}7%8Kr{5<DmH
zP+DmV>L3!-d+wedWI#nWrV4uB8Tm7m78WKUXyLTdYhgkD+4B{1?A1K4b*Juo%(n)g
zPl{q-KnC$w<*Y%DlYg}F5IycG1_HwqWVX|>e;|E2GR1u1xQFXqEPCpf$~YcIZ}$1U
zo_z1_7LoJx*)NHh{5LC!te@a#oVV_JXH-o{M*KN29HX8{SZ9r2Iot1e#MC}i6E9Qx
zb#!$>v6sN#1-6;P#VpF_;h8BuB`3npGAQ%gT7)K1Q9}lVwomr_d!F{=nVFeg_j^3V
zF$@y8zlhBg4e^orW9!sz+!$MsMqpv&eW$Wm`lYG#Y7qg5JS0<m78BNo!|Pwaee3So
z3LWclv0F?4;$#*--dhoNk@;u{K?`1)>h;f^g!)5Rmr{iYhUB`&_7SX2$FE<KzspDG
z<ia?fM_W$>aU>fqj<zBKwx^nplb6?G`R{i-is;LSl-nu^*ZrOsWLTBx4?x8#TWk{}
zNNjdGS0bg%nrM#l>bE>QZ>?SFBXRSijS-mbOo{L25R=77Q|ws@mo@_-M$Uu|m21~R
zHtfC$h6=QcceaK?w3<Agnt12$YrC_LFE86XymYcnbTTJr@O5ejC$R5myhXAR;TZn-
z&^&UzU1W?tOlsRhP{}#;j^uiyO1@pcXtMVT{woWYyIsR@`<(1woIER1!L7nlZL<__
zqqpz!$D?e%Z+U;dzb8k({8>HY-N!)Vu$OWC_8dI=?)LZ77c_ppAbm+wPwz|?a`en`
zsz{lujZtjZ_>`jSnS2uwif4Yw3I)Yu-dI^`%LOVQ_Jk0M!RG+cBIZ4MPwYK>{ZnDW
zyZ>|h+}W8{l#lGx<jqb&CESIOXOGr?B8%%^)?GP+IL_cKzX<DN$62kJt>rizd4)0G
zrHhX&LnWmRw_sqc(COQ?F}VKn9LvKC>U%G1BHj1ch!#-5d8T-*q>iaK=J7)#(+0nq
zYrN&@Pk3~k8k9d$G7T{(_YK4GD6N`@YH>2a@uATiqKJo-I<-sP-dua7)M+|Y7Ybpy
z+dJY(Y^)>1=h6q|s&TI}tO!?Eu@_y_r^M1@2=_=8*+r(_Ra59-=($hMtRBiRJGb0<
zFp}xxC6b=~65*Q{{$;6Ha7xn)$E8vqwtF}5oY!&Ep;FBI15XHaLy%g;=uRFFWy}vh
zxG$?HW`S<kum&^fx64zNOw67u>yJDxZ1rDGEb8h#ye_hsc=aVAY!B5c4YSB!Uu5!2
zW!TbM^VS*t0`FN;b4q&QPcdBApYoeEo#d{WpnQ9BJyE1xZjs?F#T8+AskYWIFw%U+
za$a^1=FpM(2qn#SR@39&U5{3HfkoOLf0AFGp;6ptFR>oCwMg&(qGqtKBUNE`*B?VZ
z>zXAI#sd?JHne-e)=`Lj&ZCOBpt^VK#1z?`Es6eXZ_zHW)nJ<ib={I0=X!dVjF0@)
zo}gS!qS;+#RP&@Y#`m9~44vUUQLjU{q>4knb=03E_tqAjB;7++Z3zEgCQS|AEq_@g
zrP<?Q8w$6;Druj3k<thSmB7@3-oFkHW?^pE7@q?_?0a`^TbF|I>i3YZLf*+|nw?_V
z02vgMM9&ZPWxl=nuRQ+Xmg^741xuwA$s{N$m)Y0X9ccF_O;<jd_vmZY6!QMJM8;_O
zPC&Tlc=d1b2=!p}*pn~Q^YfiPZF#*iF5;`7@VbqAD*u)y>?r(f-Xr-JugxO|;r)dw
zCpYEr@*5G_By1uWGmypWoSgKcq8@{0eKI$p&RI^2P;-Q>e}};ky|}V^`>cJd%N_k2
zH?BaI!{#lHn3mK2eKXN=3zsgJmzOaF`OKPP9v4;_%4bIA?jBYRVH&Hhm$^?>6*~Wn
z@sv=?Z08x904h8Bl}k5O{~e&3b)aDmlMQ?uu|mYfB{jx39LSKnJAH$9l{|}xj!ZwG
zq}sjS+$OQU`2~fh-qrObX5BsB2O>7M6^*I(Jc@Ib_a!B(9Y6YKb^HDvredR_Wfw-`
zHr=o;dNJ#X1H|bMS5~-Vwo^i=g3NF1`l?^J6QO>OEbxUfrA0xm({|EiZ_5$Mq*ey~
z?ysHhFR1)%?bR1vhqGiDa!Zdg$gZ}}eAadnUt0X+{FfoPO>}u<Qbhcr?x}{^+@@t4
z^Nn8y&6^PilUys9tQi{&uGu$OoH^J?yzcUGuEA{P;gHgSix!<5!*9*(=A3?&{d!yc
zwe!Wl7`upxYB4!ccOOz7CP{Xp7<CaYuE2l44&}zh#aV2wT7Is4?(8pR@XN5hvtcYt
zb#YpNckA4@GV^c&Nt@7WzD@tFGbsaG&cgg;7ktxRxG?1;J+9z9-_W(xEyhqh<;xp<
zDWwhiYtmhN!J=7GGj+!R^<K_W6^nyG9Z%otO}4C*S3X%knNqy!`FuXet1CZ3JfiYR
zz?HMk7wGNPNktQ^Q@oZwH+!DodaGf1XU4?gvCE9!o=25Ll3w(>w6=rSMV02yybhG}
z4SA6Fj6<nYFXYkw*gn-q`krPxJU`2w<ILzMW=5hrs^zZ64<_Gzw<n-!_t#U7OJ{ox
zC!-Ih)0CSW88XfZcsTvpBHTWIrrRYwrByBS^3`Il1jr5?{b%^td6HU*y<OyuXrt|O
zCVqX0j$BE<Oq$Rp`s+I8(2sDJ%L;7LFAiM#S${k_iC4b<g-l>#-tK7rs#=esRXJ+)
zs~5@|P3f&glQf&E_}JYgvn8Kd8$FBWb#xcSA*XJBrZ;r0-Zb;_#Ab3cv68y!$SOM>
z{SV@beUAVq<qq}6FiRc?cPVVu<DvcK>w6(7sUK$XQJ$w)_-L0jBvV%8)a|4hs*7H)
z1ZG%G^~_7cuDNI27**?E*Vy($pSR99XFaA<Fto0YePueE*zi|bv0}WsWR|svO)t;2
zZ#$XWGj;kKKRY^V_c3`rQpbOT$3I>mTSCB^+DJmF6IkS9Dvb=w$uP+~krsn`pWCz<
zU59nsjNTX8{#0g(g#Pignj+yc^K;Lrw9$SQl8Mp|?(>bA5+_N3a$rg9QHrMbSu5Ms
z*nhBB|Ha>iIEv{316=Q-zRDFQ?lkx+-1Xb|jid5hhy@$BCzoSLDJdO^jTLaZEimx0
zz9f<Az6?ua?yXr9oujzs){-JcNuAI5bo+eR>bzgM!GNd52|4>;n3PAyT-(^Iv;(N1
zOfZUymE!t+OTq;tf%(t2kIGLZ$J{T!Yi(s~Tb<7nCb42E*qTse!tz-2s>02i`w*MG
zNZQy}To5m0dt3RgQO-ZnC?~`44{2%9lu|jh`7{FN8hp;?p%xb>WoWalp4kj+Gro>d
z=A;=NPYbWgXQ!0<S-IKu=BL8TD74wpcBvNUu<{^2EiSf=)$WdO-yUb|c`D{E^#5o&
z3#cgfF5Clxbax3TC?hS1bSvE+N>W;+yKyK1=|)n@ppov7E<pvPMM@f^1@4~j-h0+!
z9oIPmGxNUh{_l96-?p?Y%s#ebIZwBnj{nS6SWsCRae1+c{snRXb}p`4)in%?`sMyA
zcM~C{nZaRs|1kr|Yv143JQ{bo^Py|?gbbw3)JBtFeX;}$PGaN;9*fs*OBMv}khofm
z?n<#++{fu~&-nemVRE)ac-F0`s|09;N=|$PTf9OyIhU>y7*kYTUGbq^n^yFgcfGU)
zmGq)VJ;~4Bh3WJwGa4Kg@G@s7lWe^H@?1|TFC<qb?EHjV*@np1SX5LLO0r&omL^p^
zme;+@Wi=Ks<{L4`nk`s2{;6E3Rp7JyUj8<zdV?#&lhTF;T9Tnm{p}C2rqATTUx5ad
zsWi`9G*LI>`^hqhhZVD?I~^m}_h&+ca%vwtHo$?Mq?MGM?DdCdig;M=Hu_WwvoV&`
zS&FvO*NKT6Q=PRo0)L!$5e5b?msFjdzZK#eA7_{Tu3r04KQ&{{%Bbd|R?$mG6fT~u
zJ)m~0JSpp+7z2@SWbvn@cP%Z!J3HCNcEb3D%_~unU!FHih*KyU6@ODIyj&$b8!y-6
zRD%!YRnR5#vC>*%e}P@gRx;J!aM?CA=mdx-JKvIR7~`=H?0L?Ykzs)LWhs30WOmeV
z{g%aex#2)z&W7QJF?_$0K0H_1lUb#kjiNscXFPhKn1?(ndqej@x*Xy$FO6Gbv@c?E
zd_2<Dz#sRny<{u;0$%8augF}YmX8jya3aod03Bus^}b^qbL28(KYpm)PnJ{SDz8y-
zoZ*<Gw{3u5Ipv9}VZYjtx~>lWeR}i3hOrRCv0u*t*CVaQ;w169;bE?{q6pjj80e0|
zJ(;!1T409rF4{jk7G^qU!(8`?TwSFKiF^8=w{B-fk)qfwd7OCi?fz8b<F$J-c^V^C
zci%49bycYVaY!sxz1X*5?}3`j^R<^gD>LVZQVmI9fV~&5a6Dhr=}IRX9)<>GwO5e|
z5#g)ZQ>5R(kvy0;pn?Dmm3l<+#fiSs#@&izjBxCMWXHkO0O5beyfg(xJx%Rm=_(0I
zg>hPBc%g=)!ApyKY9r~#24#UW4ddb${+YRO^Au8qGYU?+`fj-Kz;)cIHlyKY94Itw
zbsObXfvXUwrJ?ofvNV0<#)`793nDefZzJ0_1C$hyG`ha~y5XOqMHc-;9Q4&jxGZ4j
z8u*YY`=~kP1{7@t`T69kiRmmAek_IXwBTW_@on%5NksP(_2P;6m$`gxoRNv7O6X^W
z5jQj|%(z};)>O7tUOQj;zOT)4focyf5`d$st%eU?cA&5p;BM7YvKr1<nO^RGl~m<f
zf=IfXUi$dKXGWcKiA~+|i5QZB!vF$Ij2%7uo1<@Q66_JPiZBaDgPze{3OvGRY7+ur
z0fL(0F^)8-x{KwLEcI=P?>gFT{N<;L`Z_iBngmJ9Z{>B%<aRvt)JO~As!}GUTKi<O
z4-R~j!_^q*)WXZ7z_{37Wq$x^9we!e#VwApJg&`M$*v*vV7-$<JO*75gaK_~qe)vE
zoAxSBP@;S&D?5HV!b<=Urb^BzXmh;#`o7pjkg>w2SCCh3Uc(Y*xrRM;_4PvcQr*I{
z8HAkBg5c^xm9W#0$uAEj7?g93W@useQ`N%9dp8OVCEP}<I2(O@-Yd8>W0rC1A_nKH
zWs@9%Y1Pz3uQ~y+s(rVjLW-T!Esv+W-i!fEAk&2kHH-ua1+&jj$#i>VXtfx2K&^x?
zmOLRa%=k$w5}Hds@4j!WjbB~Q@9-no;D~F%GfWe9ExNZ<my`hQpwxi>baj2n0+bQZ
z=!V+iPN4;4mk>|9MNIsCYaF>;Te^Pj3xP%*P35g%G~iKueiEw*7gA^!sj$EeR^$uX
zjI<&}_>7>n*5xx~nLYQ}RZ%dQDm0l2W0031l?v4ujEodJ;?D{L84paL*QTq>;bKe(
zd=-d3J6!V>xp(}?-~6u&clFWChN}P_v^)p+=j>j*=;S2;fi|cOvj>IUcqs5Rjf}Q8
zMt1URj`dm&aB36Y53iiSohC+$HudD5ATeuUL1W|3!^=tNQ9wf$4YbVgoc&fg*{b08
zKZ#6r{b)-E{a&aO>i-$DQyj>(F&LC4J%{o-d-c8K<wy94<xUQalo4$G76~x(4Ixp<
zk(eVNWh-P&R?;*vAx5bXtut2k)RYh)MT3E8Uxabdgkj(ImCD)))Rynzg?|_Y)SaPW
zCV9>j+<s;>aIjg8W^czY4E{cJ$&)1_MAxf#%ClBSl_k1nb*3E{>MKwa+Y-0`JDVqD
z7@ryODfbfMP@4g98T3ij#Ue<MfKP&Xr<&helH0)xhcY?aF8(Lx*AE#T{D|gU=by(Q
zZR!I=cG=fdQj?QcRCp#fCZQ26KB~E(=bA4IrG>vWj2wLF>_iq;hcsc#rS?}DXDL14
zsI<;rCw)+Ki1!U?TRy!0G(PhuYGJgOXDwh$1jV^4gPp^_SGF0@rO)3KS#p}<pW|1Z
zD3pCA)z05moCI3e>HnyFR!-!82CE}1Ii$2cP=I~JTYvc+n$+@wLNepf&obLzzeBDx
z{X-_ECW7DE_~BlPW!|Ppm{~mml3JN^Lco{S`nw$?X;)QPEPiRyKm}FAm*?3fyh&OG
zT8O%wO1+_K$|pXIjb-L(H<c7QlT)m2<9v9Upr=m!Zh^+ls8@hcmw9r6d~>*xS0EER
z-;72$`uqOxG50x2LvLKO-@H6T<Z%uwyui6jP-4><_)~gk$>1H~WcKp#joL4Xy0M9H
zl1t=*SF5e&sh-S(hjV+ZXg;?UOYVie>CN>l8EIxs?9V*kV#k3u2QP+vImzMs&YrmO
z>g06Y$dz&(k{%D<%UbtFajMI)-b?f<%XA2HUOy#2>YlE@>EAp(uiN$#6ZSuriDK}1
zuAlO31Yg}YBSqABLO^T+E0yZE!U<>lm^Sl`T%R?87tgk*`w4FD8!P3TC?Q6z>0a6y
z`HI_f2$bWQulWfrA0C|2P1R;gw>6g(9-68!eM%KNuVs5aF8+4ktFzjvR+8}m>E^TX
zZOPJaV_QFEWjuRntd59idtZn9*-VV3?d_##9a*MJmmSmcwHd@(rYg@$rte?_KcOs;
z^XO%5LVn>6Waj_ywIJKa{E$u4S#k1A;_rABXI+lz1W!4ha~JRI(O(gsjG~4CtHQlz
zqUoO=r@ZBEqv`Y$0*Ck4wwTJraD&X&gZztyepDFuyj7DWiwLn^GHCrfI9)zM-x;-7
zDXB71)F8+hX+|^eHSE?%9rZ=*tN7R0)O`{8vvF~2<E<H$<MHzBj<Pa>zmB7oTDORZ
zvfPh@CS5js?@<SIzDn}{m#w9v=9e_F=9bM$9WW|95tDr2<g{*lo_;*AJIzx5@)nD3
z<JuU~@KoH|s9ki<HA!sa^SzutzGKme)r;l>-wk_F|Ksz3p;X6Bb^K;_&8)hd%dCxT
zN}iSRjlx5!!BAt7>jjR$^q9Y%Ws|~HXWa!HLb1KeAzkwn_cpToiK0)CSAPdL=TfiA
zJiLmROefZaVq~Ny?dSUkv(?w+8Lq}RML%A@<xAyH88KOvImtXMN!l1sHe6`fsAA-~
z81?2C@g3&4{BwOg80uJk5)`V1u@9>EPayRI(@lCc?*KS?CBl&A&c2<v_x8>=%P*9E
zm}$V&Q&W?)$z%J#uM*!ZJa1fJ*!}RY53P+s?uj#Q(Fb2{Y9Mgl9o%HYt&M6kuSryH
zaD(%X<sqRg&d7PkV~R(f(j+#))NT9aTf#nAT7)H!{7b@?RB4NoZqr3KdghyxX4y3u
z-V%Vrj!-iJznZ%+CFRY?o>(1(A_Z2AxV&#PWjP(DV=%citeeOSk2>9|S<@~-BYesr
zXyQY-%Wy&}^FfGtmY3U9Qn0yk9*<=%ZQf&w)lsJtBF47s{p15qUiVH~p;%sdF9C*3
zUhbI3ak^GIIAfpV>{PAr%bG#E-CfuOr<AayM(Dch(GPd=duE@>Z;xpH_9?vgN@}bo
zL_qZ!ViOAnED8wMcfLw5h8#|VVy|}Y7JNkyibsYA{`DO)^$I!=V1&C#6j5}URuif)
zKLEP@R4PdLb@$@%GbOs#CwSKBIdD|?LsKzxl@Op(RmOM8^F15m=_|mRUY|5|vWX=T
z-0pv<S)k?7*Y3BTEz85m5~X{-60$v*9DvI*_Sw|2jZ0;sodEOw>@|Av^|=8Oy&c*8
zH(f07Yn@HnL*mjC%|Bz}69VG+gyXgf+$*`URKI0TUW(9f{fFtbhJW6b*U)faa+adZ
z^htM#v*1wb<0LbyWWcDV5K*`r>aI2(5Oy0StzApZoTuf$A-J>RU80u5sGj6^Dp-D#
zHcqk4W%=aXD;Mr^DBgJ*4A--ZY#rW{hFh%|a8n1|X7nI)^DQA_q$5&7r1q>SoxmX@
z%eK;4C%k0B01r;1^#bCyb--vI78>-<geR1PzyUS74f#<6()-@*i!!HEarv=7p$pmr
zMABRAr7hzveuL6?=LSOqrXva-GAzsR{4)-uOTgzz98nkQx!-UQ0Xmq_9qHAk37y<G
z^G)Or&T0(@R+LXdNH8%3i7oW!ZDO?@N)|MUGAneo2E<NV&}!?VPAZEgT4qmqmcpJ5
z)@0r})Iyb7E9nb~Rq8|(8cLc#F9nl}|55=e<HYOYMS+nD3PKXqL=Zf7@v>O^UJQuB
z&AZuyhJ_|^XTz1-ztYux8$PEyHoSD#(;~sU*^Y@JMLi_xhdz<YBkyI=>0(Rlvbo!l
znQzE@|MDqb_4_0Z;>C~~s_k2$@>-u=unbB7j+mp?R~aMI5O4bXb1tuMKy%E0ii)R3
zwvrmYQG*^rX44Oz*5mMXoBO@nn(X*PgMXf^b7T2TjdpXN^4)f@T6mI7sEph|)L25$
zO+At~6wcqi%DYa*E0g%V=nGlT|2|q-BpzyWv6wYH%WHP>$%tNpk<Qz9*__L@%YtdM
zBR6$9bYyk>%EDw+<}l*y2Orh8YEsSC!m3_&Wq)hR<>0DlE85gP0}KlfKenSnMU^y;
zXxJI3gP`U2S9~Z;8ce*9!Zf&EkVUM>{B~_2ajCga!)39#Y3e_C`a?h}(Srgl2=m)e
z=HubcrC+n6?kC^>d)=V0`>ge7rK+c)R-6o7*_Z?)9?hrAG=lZ1j!M4EaS3Tnyl~84
z5xl1C{LtCS7o5EbGYle^L&@>O(K>{au?&+&IO`5iFlEN<y6=lFz3w6oe>cP$9y%}I
zFxF`JXRszFKBbyZfN|KfB$RUvEBMH}#MX=DC%Z*XxZ$7$^=I_QK^R;VW;-SNyn)%;
zBIN;ZB)t}9(rMoMA{Jb?YUZ!pt?eA^9a~JB_e{FJXI<3GkJwQmvk6Ak-(1ZWntu*i
z$JU;HAVSmDZ1SWZ<M5$u&64Hr?+b^KAM<*7V*)iM+h-nww!(IS2W4F9J_4tw_wd`6
z^7m-`MlmyoAMW$uK?C5;BX-Msnh9Fx0#-Eag-)}#vQCp=&P%(~6WmZJvQr&HUqasG
zRUzy-D@`nz(El4v*QbyI^xikb$QV;yPDIfQi(aYFKj-`rJU<uR_dq=(h`8}#wjG}7
zeIe(M{reMc31M^DssGSr0YHI2SY?=cZ76;}YicDpG<ChLXQNG2_;x`Ocr%FD-zpWl
zxC9UD$Q!TsruGmfJ{^$y(g+=l!NE-!SI`d%Xtv*}F>550c7M7baX%~WGY%%wp)Yj*
zCXZdR%H8d>dYOt3!tG5)bg%fBvl_7mi!(UWtxOHTKm3j*`A3Xhjh4shJ2%hc4w-Xy
z%kV;{&SfFzs%n1@G}#Vq_pkYeg#TX)z+#|%dpT27!NBPsMPg#nW`T-mNpP{BoQL&;
ztENv)^eQV?+#!89<a}CC1?cKd2Az+0-TIATog=j%9&gvM^h1boq=n*#skwQar|3h@
z#p-J9q;QIY;Z*V&(aU`inoFf#)0OcPT|uFp!!xlFWo6~L%gL{1;d1yl5A#{n>XSNh
zlLl-{Z_1*6wlH3(jDyW<!?9~)_dm)D^dl{pxkwfaqdAIVuJ8v0&-aBMP(orCsrw8=
z?ulS`a7-GmNoMLhq1*}zC~45U%ANJJ#p)l)g>#c)aA&-gOT<&%%=ZlqD%;#?q@<>%
z9%}DfdZ;=vRG+Uh;U+t8vHUQr!kVb<=k)YO!!T5Nqbw%a!4ug@<AmZ`I6|HjQ!LI5
z2HQjNBp&Y*-5SHS1iIPOzhP_QsbyNZ{Bp|*$)+zCHst2w@k~n89@DD$far^HP|Q7X
zRhE_u9vxNAee1EFB-4stwT}T6#o!=1FK5(9i~|K;g#GAmER49$T1$<A*4hpttfX6N
zoT-M9%eY0z)VIOS3v9SCE~Zp#RQmd6PEXS#99BH!h5yN2dwYz?u`G_wUY!`$dX~wC
zk9WlphFzUx-WaTM!Nb?V5rRf#BuocFNNCL&UuK6RDfR-2dk%-uk>0n0ld$fo@(Vd+
zXjas@fQ@0`!Nxe2`lFEbZ&(J7*KYV!Nf{*{$0O)9o>C0saHX}*W32OG5>JMW$+ce9
zOWeJaRAnh=BE}hECaL~f;jvWbk6JXI8cZ76UeF~#j5>G%bA=C2BcXz9Y!h~7V&R{E
zaX*<(psd1DpH?NaY*V&}lIO4NJrkO+_=hPK`2w5Q&(7<VaFos~#ljpWQ&rK);v!>q
zm>mUhRq$6|h&)kDD43_d<H72b;39l>`=HdE_e-*+zQfB=H+Y6Ip{=%KykRmgpgJ0{
ziuVz11ABs3u}F(Oc0a#bdeU7*wpXg6MOZ4ITd1o0*Y5*XrD~edmldut?nX>5rI6}l
z2#pby*GYL1yi4riN@E&ROfKN%61}^pA@lHFM|S>S*Tu|@4hrTGNtvn2UF-`Tv_|B~
zGgc<nf(t1n`5`2&mbJcqWQfCx|4uuVAcEG3lG&JC!zWbBV{7Tkn%AVmQe@k02bV{o
zIA0b)dETYZ!G*@;fn)55q^M}T{a7wtp_FI8XyVEFzoWMNst#eMjmQ9b+Y~!t9ugUg
ztp!6Yg6VU<r!V~qpGxn{y>w*_>9e~&Y17R*2cu$>vDyQ@rXU$w5v9$-&lpKn3L7G?
zo<=sRd9^5Nh;>jhZ;O<aDv=R<Fm8G8a-wXKF!O2*BhsFWE*dwKjWVZHjinMN?5U1V
z4o~JGj=wht9(s5V)Kk%cyKiRTh1d(bDJUtjk~xniOfrzlyv|%7Gj;lzsMW~#4Ie$+
z6XOBJ_YsD|dp4dYVV2{R_!S=%k;n|w$YmvM;lm6y|KR8^q{l>9*8T)o?u>grcI_+o
zICJH+|8)(qBJ%fiE;delrI1ob<6O_s#}k6Cr*5wcuobI4Jk@i55tJ8vR|>Dmq>;_A
zH5XB4rbj}^e!r7S=KY%%0ScxE*-LL6P2MgZ2}bwP>;>F)l2f=xsn`(4YMjS;L%D7P
zX?BD~0=}>D2Hj*W+JD;D4|S9*e>?8GFj+V8GX^{_5$k8{!jVV+^~UynrOzvhv3bVD
znkUi%qy9-1rfM42&Jn6@Pa4ZcE}zqih$EsspRua;OI%4fNVf~{7O^Q7QN$}O_|~-<
z%A(;K!JED#QW?__%NK<4LX@o$>$_Q0^+#3AJ>$2guGM>#%o2X5X<}pUSwL@&le^u)
z*UON&U-$I>7nBc-lWMtsIflOS%d`S%)E6%pF#`jK0~FJjXdEdocV>h6B&P$Il%~Xu
zZ(+$s+g9xIUkA2TVHB)CB&64%Dxi=PsKB(*`dgdhxQl(vHMwu`$-Kgy-a$OvZsrs9
zkN4Zr^DAV*C9%4gY<)pe>^OGF*K9bp%$B=9V_m)UIjl`yWH5h}w&5JF#+|{K*hxL9
zpwmyLzGeGe!<7XS2VW+`->MoRl@t`TW#lVdRn18=-TL=i56*^c?fC&$YAmk~&Z{_N
z&`nK30Zc6%IjW!E+8ok{L#-<W4`_5HR&E|^T3!0Hy2nuAO~+8Ar#n{R2FxB;%{8?I
zQsW@U=jqAkZWJis`i@#vax!LkoP6bEvwu|@7b<?D#`MIlyl`U0O(dsuI4@wduvF%+
zaH@yvhe!01MAP2)&G<7E5U*d<M#;2i<<HKjU7wf+o(SAzX68S`4IP%HOduZl;UV>N
zz9dUgvGb{+z+CGn7u)N%bmBMHm5>#nluRd5n)*hO%Z1=eUM!hbl*zcAk|btGzbd6v
z@jSA>!8O4~zlS%&+Tt@aTEw~$hqPWyE6IHK>ZQdl!|DTTCJ$_H^FT~CRt~FzG`d>x
zRlMlD)1fPY`-~gblV?#%968Fq`Jo*)FKQQDe0gT<=)U(oi@zJ9`UvkzCq;0VCgt+f
z7rkTtxJulce{$w3=l#w22=^iXH>8d3zU14{iaXQhga0r!$l7&-PWP6~55QZUeSHyl
z%>yJ}l*_*Cp#~^mMnrdifBaPT9OOD;XTKA%8;U)0<^wM51FjBpT(@f`)r6k-`<XHq
zm(u*iW){DH=#{xIV-P_U{#WO&62*7lg;<x|89DMXOQhU%mM*z9L4zr0ufVM}$M+Jr
z?N5;f2!-@osV}|kcV>_TvTmVV*bMfU<1DzkJbdqzawq#l0_n!&!|q}AIMX4R$Z?i!
z>5n9H%)dX1__aNPCMxnULyIa$7q2wcNwFh$$*w@-<@UF!@eia46*YGvDLh`kzopDf
zs)whQB8B_dbbRjGyvg^#wv>h2s~G_ihe~}SU9il<Zy}8$_w8T0{%Kzau{As~inwSK
zk}=;R7F)bXSJyvBOLuN;gt=axWR#=rFLm`6ydrO^uC4uH>7X+4yX|VTt^Uu)PeP!f
zY5)lgapMX0R0;vmqhRtLJeMqs7vtZQM}c4)lkYOLMYd~r&|to@r(~xQT~Tgb=gXU`
zkiD-!lViZibO%XMJJwD1yl*91!K)R~PEIIIR6x-ttiSx;*8GPmQ<oH)QKPA6F{9ec
zS9jA>1Ct&w*^)@jQfzIHE9h$&eXM*h;K`;P%QE$ot{cuovoB_BcUy-#!noMAah{CI
z%!kLyqLbm^;jFCNARe1EsC_EW?T*>F(kbqV+NFrEpvXxlU+m1spFfhtNWT>{KZv_^
zB>lm;Hxge;mZk8ywRO;L8$BB{Me41T-+%81TO7Y0mms>`NjCpyZ0ht0E_OoXE)&!5
zJInlq`LVcRSARDamC^m-w#pA$XX<(06-J2iv04vjJp8v8E@{#8hL(1LNyrIBk_jP!
zM2gRGUaY;A2*uQGEYmhLQ~{Y3m>HnkTorg70Ikt60hGGUoaQE!lSA3!qWKPkRlw0k
zVf+zn{8mFK&fMR79ldclI8#4JbH&*<6b$@|^d5ZTW%+y$6SMZf+|@PiXBsP~*%?7r
z#Zv{<2P>wYPB?v^1N~Ga6wnzFuOix?V)&=^dMR`Y*xutArMf%V+YwALmXY+Y7n?N1
z(pg{5Ji9#iK2AZ9n$ce06^XlIN3pe1%=?i`&B^Szf4cUs3vi2dX*fCgUn)<_GGqiD
zC$(3xh9MEH*onkU*p$;=t+_!6^w6@NyShIu9&7v^$@w$b@QbqZGG`x4R$5u<w&q_N
zWI9Wz%t{u?Z0{5^d+zPrb}h{D<L4U4F&{!2uAWritXN!Z6ZBtQ;jxOiESM-*)uf>-
zA=;#oC!=x-XaA`cXf#o-@|D1=wLloQJpcOcv7zUjduG7-1~e-iT8?y3{s60?v<?(l
zqRM02p!Mpg{1AmpLMh+C_-h_39%h?SqF0G@47`}(G-;hJy{?%wUap;SSg!27W&Y>e
zhfDy$7<x=wW;*xKnu}0NJYd24#)ido7vtMKP8}vB22){ft^9$OZ_xXvTU#kOnDNYp
zx(`L{V<s!iC3=WuwDwP1aj?nA5V<pb+1FjYr|nLT8n{Vzcm=l?%gv=(uqbRJwMun`
z;^Y}+q&^hkpi6)7OPAq^eE6ly_YMx0B86lbl?3Af-}BRFHh&i<%KkCGB3`Cj@_5`C
zLavPUm|QHJTuNT+Abid&1ncIp(X{L9=A*mk&y*XkC&Gz+1q^&JLIi}GzZD4O@|Hx1
zDKTM_w_<cdMwy5wg9D-)FR(!9HMp>$jY?c_7H9js=<$S4bs6lAsHiyWvx%aHIka7R
z=#(0K0UP)b%)H;1mac$q_ct7~%Se1~)3bvWRx;e|fOA_=eAR>RsbQyaa|%$ckLKLS
zLBfOjtbqyz4vKDQM(EeRq`d~zgd+Ievwe4#hWJM<;8((5S~FFNjWxExz>v{oCDwFA
z&I{F(e!x*&7r-h)KIc=aX4&>5MBs;P^y)DYRC$`<Ios?gv*h+teE#z{RQQn3fs}+K
zZouSy@GIv8H(9idr<fKdFRvqrSaQ26c0Zg7rI|C4lUF^Ot~0|Whzh1N=AP-N_<PVf
zt`@SrJQj>*Z1cmbNb`<W=jbP_$9chXpW<Z^HWOIyI}V3bp0Hc`w%K$BxT=OZbC_zI
z#}yMP3}86PeNfay(<)-TH5wjkxWpmlce=;-fjI<qw($6GD5G&2e11*Ip=03(kMdhV
z`(M-6IV&K9TszFU7DLl6R<nc<fpYRbj-mqiv(Uf2uK(e+K)2=$W9#ykV>%#-YO4u{
zqWYS!=KzL`4M5QG1|1_v3Q*E^-<<~RAQaH1#GnQ*2*wVA4zqn=Ve)MKx59I$@Gp5K
zqueR~Lb&m+r|5@M_V&n5Wp}#Gd3<v>+J8<NrUR@M4>$xw5|Cls!$eqx>V~`6q4t_P
zZVMG5Ygud>3L!k&OlvrJkBtlyc%^LU!l{E-b2f1CRizYeOkk)Iv5IX(#<-;^IWZTR
zd0^Qn8<3}19fe~ip#>^m^ih-Ime;;$o+d4PSa`Y<kubf?I{UF*joo;rFz>Af#dmft
z`j%S1Hzpwkm$514F)8~*{oN;FeOTzYzKsI6=9oG)Jl?+_V>K(dp`}Sx(PHH3VatAg
zpnJX-&*6uK(x}{J10VxdJ0JqQPnW`tLUE*^?1DS#Ms96w3id58v_cBdmZ71*Y<iqw
zNeSSqaQUVwPl|Vx-VGC5)Snx0hXA4rhT3J&CiYeVF#>Z`@>&cmuPEFvjF`{>q<||0
z@frTHG3VqWyS&=&>P?fDQmQ)?T}Sk@WcmguyxhduYv18rGDiQc7e+`ng~zKOB=4km
z-}og#f&gnqNFs+e=DnpG7aC8Vc6MFJq~Q&8AwLyQv;6f_5yQmXin0@H;ZccosPrqd
zFs|R-#ZbtoDCCo?Ky=G@k!)Sc&c=ypQa|@)SD6~S)cYq97E005a5JQIFT12-G@98#
z#!kufL$K%BQH~`#D_8yUC61zYrA<c9BYc{xr;)~?9|VO<#{5-OgpdcaSYcRMh*w`X
z)82kbS<3CyRG1$4Fssw6>vPIZ;fK}7Dh;2Y3=XIdh&^=uPHCv?;cgSYI$dr6Z)L;3
zgF*cYQ*?kOJ}}KmUCRl))jmv`5Ci4_DEMS>LP%~j3siaA!@ROxt#f{AYW==m>)Eg)
z(WxL+RJ;P~tlV}(r<qOCz;Kjw9VS&SQ4v!>wu0{b@53;$q>`W>4mrmuF3ItKixuVz
zKSkX+Pd=d$uqkAab!kyVOKId+aP^zK#2t^Lz{F~vzRSm{C!lJ+9gn<iq9Jh`=c_02
zn+*x!5E(@VrLmy%kX*rAx&MVvCntE@22!(YDeWoQ1!>p?CCawo#Tge&l*-ax4GW54
z?!7RNOU<|;_ppA?rXTs?PPuFINXz=sZM@%UhCaLs=<NSF85k;9yJ5X_AUZhBzO95D
zD^W(etlu2>_FD@$L=1EzNi=fpvv1C@%ky1@oGp_ciz?^Qgh$GwBe05W!)0}r%flsz
zNZ&MIzz7t^cBc4vcj6VoNXDu-VbBi<R#T@%eEmsxf-XSRHZ7TJeRvusrKO|@5-V0i
z0oBB(_g8*AV#qvhuRR~RX*9{Fng~n8k`gQ);7YMm(2W&aJcuB<-POo&zRCh{X&i!9
z6(s~nvO9sdc?LmdXiegz;Y}l9mlTdPFU^ohj%j&>8Fc0VZ*b%MS356(XW2v4{wiXT
z-UvC234u9h;vdIO_Chi%&^5Z9u0nKSBw;UPXR5FBqeaB94Te$A1S#++hki;XE16~7
z{c0nyxd1<W(%sX4{A9}X(({koYA0fO%h`J2S<1?frL9jLBhl5xJ#;CS$|r3Qkzg;{
z!sU*qev8jT)K-Cpm6T^ff8OWA>p`6nuxKU%y!E6+;hIVf<{Ugc0Ix|<xr@!%WvT?)
z1DN_dFqO5QzbH~k|22|jsSg@U=0ds2qs$~`423kLzc`sJuF?&vYUL8MWjtl{ZN&~A
zPdQr^Tpg8o!|La0GRp*1xg2jBQnb9RLMS~Hh%30B`*{rZdi8dYJP2p|SIW|!vl+Zj
z^>0%vn)3UdPuk=je)B;=ca0Kw%~H6DUNHRaN3PaeI6gx|m^#Os{$b}xqR4%AEV-d;
zlEY?V#<Curvnv^oEwx#bk&5->4?Z(HvzS~BUG0CKwN>F&)~Wngm5~p3e>((4@b8YK
zQ*|CkzE{Ro5_qMF_9Z>9UF*eohzUIbRYm!;eED!2DHk_fIr-Di6t>qCWQyd+#h8{U
zG-1V>?3RZoN>aXB;|!^C0*_O<|B<+nqmhYbW|&RGUlb3s@byJgMtmF;pi23Bxvg=y
zleVsqj=b=A;$1jbphuqcA~DD6f+mNC9}9--C^SW?NkBRzreKEv%08q+DFAoW*0I9Q
zo<*5pA&(V?3na9b>3Z|q{;wz=6qL{z&L7tyzTOJRR<r#M6o2j4Ul;*c9+Uu_#?2s)
zSS3K1q82To?0rm^>+-@&A>?8IA;IWe0itfuL!eHqY9B+f{cdtpct)8Ax`;&IK>5yO
zG^t;0MF^luw>IEIEkjyH56Xy4|Kn*|<msaYzr?j{-xzXrv3NKaEj5<TfWOoLl3e{N
z3taFB470a^(*wLrz^{SK`7P@3grCfr{||`kePM+3fOP2LB>>ff&`OfF&_UlF{?&Ii
zHOZhHKU@lDdbcS8?dVLeUlEWBipe&Z(cDC3L$F|@iT+nH_(dkvC9_cOV9`2t@goYD
z<iV63MONvPmq0yq*z6kMRe|FNNbyzv7anExzaph6A8pvYc=7!5{0LAC@csb8zE+T^
z_8jPlw2?)CJ~Q@O!UFD~n%J#kU=X0LfwsE1yPl=;gLIT=9d#6e)C61s@{H~j#aSM7
z@EW_0o!No^cUaeU8U6F80eAf(#_DKJ&c5e_fcMqe8p_ctyjfBWn9tR(@=T8rU;cdm
zln$niAdzR!o_TLo_S(RjDtwbu6gwW+U$HPewM;cy9hpf~mr(TeZSu_qD-lRXP$>*}
z1uV8w;U%Chb_%zV$f{X31H5sRPeG^jm%QXTDYRF_Atzr7qjx|V#$dh_0Vi+}yfW}K
zaJK6hH}7@g!!(spBho8WD-v><YM-MQkp2mCxBYVgtK`k8Y$kwr3xY0&R;yB&CPc%n
zi1oyurpSmoQ}Fv@lqD{~gtC4bnTDx!yx_iET&ZIJ^^Y*VpvNnB<<%aBulN-P)fOx!
z7cr-heJ=Xfj3MDi5cWRg{nPc2dwF%N{!8UO7rxfb24tcd?Y`USJF|>;?=Ln@U}{G_
zsT)qI?^&)fQ$iwJ#F2~a<XW=Qx=Oi87B4gw-MugjmCco>m3D*CODFNjw-{)x&@a9g
zDOXT1HntxmpJOv(yiP|J;5tuGbw_CuZ6yihvDudU^^sXT>ZO+?k?JI4^_i}%x8<0@
zWBcdh5=f(rcqiRqT^wV--dBlir+Of{K=QlLBl=FHnOY&UNK6ajAz$<N0FCzSwC?x#
zcV78B_xp;IsRA!OYqGufG!#Qg;LD(@bnT0{szrQltjAaebw5HD%ZO!I3G_rCwRb9A
zoCH<Kmg1o|l3^X09`K2fV7$wCY+2!S%Ng{U!F-xQdMRJPf6_JlXkiF!w%GqJb=%G<
zQw?2kAyEha^uFWuu_KBeKKK#MdpY{i>2Y1#aI^QJ&}y3bdw3p27tUhnn<N=?&W5Z}
zslG7mBd$K?NLF$lemJ=YFwX|1kfo3rF!Rj@Tx_GjxFtqSf}b#cLp8OGO7~BfW387u
zL;u0gT>;7m>T6gd=;d5vGDlfG5>|3lTlAAraU0@DiQQV;D9%~Uuq#_?z1Ug)rma5w
zGgXhX$K?847nSC2mbZ<(dmM%4g?gFesjc0R3G%nLZB-4P&Y!fd-M<Z%ytAv5w$YZ8
z`M3LX#S%vdhlUlZx>nEw1)MZ3I0LrD+ON)SL+epT^_|}VN8fcD;J110f1&1YZGk?=
zDQuif{10t+XcJs3{qB=SsrwhW_cGf7P)zSh1i2y#HwUlZa<0)`>~M%5BxTO9(aE~>
z5hDn`Q15>hUK?#&xnV;>&gGW1-<CO(ERHn1ue|VEzwAsdeg=tL9i+DHp}>=}wf}pT
z@ww{(*$71}waNK->dm9sJtVy4InRY*2wv~Y%hyoF!4v<O1eC5}4eT>!KT7n)j=pKn
z23}vH;;H`HHQ<R>0jX*U#jD>Ez6ojzV&GY7AbOuh(5%ZK7s|PGW1t)yK|FzHgS5he
z&nszQrxQortP8$&am-kqxV%udJQ4VW;FF$20VnI>JM`<FSm3M>W`bCUcA6S^M(t3)
zbQ45>Q^5H{0X*x$ntk@ISRz#Gg_OVP#OJFv(d>IM_xk?RHTu%9mOTh%^&qN)q-9_*
z0RFRjh!v=BLES)_C@WU>#3`l*CuK^{d;Gi}@<zgrD0@0SWerlTS_})CDPsGMPxk0!
zDa`EO3jAn-c`y%`?wH^(lW7RbmsE5`*@TK~Ye{KJu@wF}DxlHdM*ooZ4k6?6vQx&5
z9!bcBUUbu5<j-TL^wC2dGmgQGgoK6N7-p_0bp|R^_lCNI#;Rml87X}YKK>V!PtO*y
zw505`#*)5~wEwO6^0@+ZjjQEMA-0qDpElAEci&*QTZEGAS=j}pW+0(@=a$XIee!+6
zUcIh|$>T{Sh2N20G~1aZ#}gdKTL#h}rpBojH^P|dNUFESUWaxb+N1MQu8{Uiz6sWN
zng7@Yv$oDkUyAg5bC&<t-GybsjG&$Z(?FRUm>C+il|S-c?=uYA<Akt6ua7r5w^z$h
zuboc;y%2qdpYL+GNs`-ki-5V%)FJ+{!rLJ7N75QLKbOjrVuPdkNOOdLtOM0#4O$tf
zokGCyiKRC7dIeR`0qm`c0%u)bl1phHcJ-<!=JtlpG?-YHXI)2EfbMMt6_MEG--%hb
z302g+<##@jgR-Ob8%&~F06;9lYKMdQ*?i!&zxpzQo7)OxlfhSW`#MhN^;W}lXhaM`
zi+J*h{jBhWU@i9F%YifT2iL;auN^>NMibg7NW6KZbOzLfhU1@hs4rB8fHot$7$dr4
z3IhIF=Uzq>%mE04sDy8)^}=DNX;-41>W9Sfl=-ahZVPbT7f?G=ftzzsopbr-UbYvq
zX{Rwtk8{<a8|Cyjbo=@kb#g*^^1o*c(5HukCRYjIX+s->9V@6uDvGDI56*U9V1WQc
z{E^AofNuSE?Ti-$25iE65Z!p3=iDcq!H;<WzkdANx9%InV(qB4V}bvA1IMPOaHi~P
z!C+a|e4qpm5x5z<xTwe0Xq_P<9eghjNQUw$7JlL0VlXj^qqx_!a2W&1R{ODxOA-^H
zaEiczL`W})Vuo_n&2~2Jwu;(_!rHmt@M^8E{+*#dd{7S_!ZI&3CkQP~iWTx0_nOB`
z>*fc5#j%Aq-!bpHk&OkmRS2_GK~R)Qe5#n!RJvNkmdu5Az$P6oN?D)jy_$}SP^d5q
ziz`y#3Zf`4&JgMj!AoG36!9eA{1%3?QvVxTe$?f85Rk)XepB-LY*cu+`Owg^??Z9+
z07RfP$v_DdgQ*+nBfSu3Mq&g1nU4r96BHz=q(2C?ohY&#qerlzf&_?xm7dT;2eZVm
zVf^yAq<Z8RDqMj#(=gx+baOimV(07mhH$AeVVg)9^(>+|Gr^NPP8Jsf2mf)w<n1B5
z_qb~vH1YYL{>91U^>^wE_=KWyT0WzFCcGN5zqD-HiA%fe6~>U%!XZ_T#6Af-JG^y~
zb0LXySof18%V&wm)m3KCr0LFVooHv%_A8_qd#_6?rUok|sc`c_7w#xIiGO92c>bfJ
zP)l5*0?g<}`-eLp#6Kv|Ax(B84|1Ari_MTQxaPgqs$G+}Pwipcm9Mw+3^8I9RJSq+
z)vxxD9V@~A8Qq^L;qyz%G~Dfl3T}qhG*>+SFy5xpST6n_?a(9cf%^B%_WxfCK$${T
z7CxadM>3VNG>q`wy<5^5wiBuF_7OpUCHdcz(T=rQYxF$B-EvLaN5ziYHI+&2bg{zd
zJ}jZb#o_Yl>P({=N~Zc4%80uL9+;uq{p9hH@1MTG@;rEM9<<QPv{Y+EN*2aEHuzSh
zz?+_s!e^BWtB4J+rwzJNw|MW~Vn(&&(CX$q=yi>~3pvJez;lc$%7o#@Xt+F{(Qopw
zA3;j7U^bTVoA(fasTH-B6=e-&x<2hi#s8d(*|pQ7^1xwuJbORePRzQbSnfm2K(FoX
zf|R@ic1@u2gfM#^UFb(e!lQLjV$zlkPER0WtJv9bfIHT?Z5r&3VP*io1yQ^yU?ei#
z#ubM^J3HnBL=GJ^?v}q=Cw1UT-UB?k%d}<ce#5B$2<5o3OFtXxjzPr+6buY}*inFJ
zyx{~laZ_Frq}q6><$TVATy=YMy8+aN6tJBRD(=0kGk9lQs4b@OIP!o1@HrfrC>8TO
z>-A~uHBLJqSkC_Fwhb^{?O~%>HPGsOjFJj^?%ur>TkW(YL(sJdr<d!Dr<gi-^PUV9
zmq9m}weO*P-zb0?&>WyJ;YV4NA*8K^kgMf3HTkyB(uAQ<C<?4(44$*jkQ_8@UPe3m
z1>cDkBdcf$8Iip9yZW~#`9JnMv`3A7HrUWmomQ01*OQQ^;Wt^pL1GTrga;vm&vI8q
zr85@`*CR)hCZiCHai@^`C|Hpp>T`i$&8c72t=&q?gNk56kO)*C_i9QA&GV0npD;jv
zrx>;f^rC3gr3CGK)$=cSkIK~qAsd8uFAN_m09i2>&6zG}<QbeXp~m7y<sh(~*Hior
zQJ6*>)IJdRz7hr|$K=O|L@rz%*P(Ekc|}DxD<<9de|<q|^o=IdMcnVB+y!J2xk<l#
z7cOq~TRiA;wS)gZ>$%{6qq!~Mw2LVLnn8}RzZuNHw@}?AuuHBts=+R%|GOm<3J*w)
zCQsqBQTO(4_@v+#H@J0lE%<D@zmwvH%-1sI<0Bgi^rg?QuvnS5<ya8>+r;MWH4p#V
zuoB|j6j)7`YK>oT5f^mU-aBpOb*#ZkBd7E$Wr#e7o!x&jIWIawulLfOUT=7a!9;e&
zEXtPDZb*!7+$?M{xfbny0ZUTnQw{QDzad#0>GE|!%+ZHfxVA`d&8Rr5zj%ZAlVQ|0
z7|#&7FAMFt_s-qNLmC}K`4!9!$rAA9%+q?}ib#}5x1tG4y>a#}*rokryFceXqgKJy
zvk}Ux=y5<=I$n3tg*;>*r;tV;6~Ok&<C2;Ua?D~*|L=jHQCCeqpU0Y<Y47UE#^xf)
z)aKVfRhbNPhCICcd!9)F*b~#z==8)Pr@q0AMLWN~Bs2tvnv?mv{vK<NU{%?a9G1q-
zNJ)P!W{da7hcyK&n$P}Q5_1FR*(X)>E6>1?IEwD*uf#^{cO)^{#Ehsdw40ttP5HI7
z>B!z;&eKCCMc!~$oW%s+oDQdu4UzMm5E8-NAPw*E0FK|uBnf-Zm4hZ{ZhU5?oXlcP
z6PCI95+1uBM8FrJYqxEP`4$p>vD3w16e=-7-S2tOqUV=LO1~8jle`Zf`dzc8O*5>X
zD9|C;tTUZ&qS8pz-315WD(u88xHtj8WVQ41lOJV9gHX`gyBV#i2cDCs8hOyJtaM%c
zR7PM^K0nD}lDFM^yYMfJ>E9xD5Tn*QoPxjJMRET^%_7PJFF8>66P$!)B$qibV3?a_
zzXa@SCn|Rjr8Z9cx&VnZFdx%}U9G}`YGGR`l)CAXmmBIJI0pj47sx7l0p84?Bmsh5
zNDb29IDRX4TN;2xH8qsXcl;8HY*Aqi{vPz40Gi49to98=pr-z|4xJ}LE6_=Xv>zBt
z3#L%(H}Ch*4`qp_Ld+o!g5bz!o{2i0QNPcKt+&c@PzW*^U}OP*G*N-IYD8UKJ@yj?
ze1yaRc5!V{`1tAKL6Z_fzvcxc>}_TT&KraiSfF*Ioz^-t-klNF0P**v5*baR&pKHV
zsR^%8pn+*VOM1{J)K#r*FO<g#Xs3q|FS1e3t*YKU6FirzFs@mI;^R4VH8>6H$L8`!
zn#+xvQoIjW31@0;Gj@DLPT{HUuWLQx3OM>?vJZI(@!L&HfhE>Qqc>spXgxBdVHMQE
zG=36tgQYvjv15cUY4`@`Or6YYmQ7Czz6~eNzNZ<?W3`;sY|<H9<TlD%*FP+Zc$Jkd
zF5c{RpH^Qb+^)_sefFJ@Z3<U;>KnIr<DKr&<jVy#imoPmbULDt3Y94BkM1+K`_jub
z1MIH@FJ!mYYv0ExU3q_GPB1sLdL}ex*O9cc^iqGCtDGyhm*n$ZEbSZ9T*$(TF1&(7
z#=_=%2~4_uMMH{*tn6_P-cOk;MnC=$FDG|CXT;gUHV9;CwL^dVr1)mn)vaigHm19*
zSjx&=R4Hi+xw1?%Er_7kn1!!2JTi~sIJeHHDq|_xq^IUe^N2!tDH)!X@_DZJ!Fb3>
z<v`7dCiGW#u7wVf+rXyZXI9n|Ka#ta)5!DNxQw?bn8h0xlfy`gY`>Z*yc<_*lwf!s
zws`$L%vM>?rZYx7`}6q@b#E6tqtdjelStdn?Bg>$?q`We)gu-RvKQtVcF}Z?u*7e6
z6Nd7QU?ltMYpecTlM%%k=-<DI@JVbhW;3azWJ=$>A9C3qP{|Ua75U){-90MYPV*FR
z{y>}YRmp8vQw~ylAIDfee_~cu-vwo?CkA?w&%CSy%hy|SddEcbH7Y1oj}3}reO;B~
zM(X7zFpDX1BOi*Y&js!hPNY}as(FP3yYJ{ti|!(4&uR-0w#rJKE!Ljzh67tG#4)N5
z$CpXbal>ra5Cjpy%{t`f13?n=+pqjt+zQi3<NxKjNDCyLsh$Vgwg`yj-sB^4U6?WF
zCH**ApIgU-tG)Nq9|w0Ro66KybK&xrr6e{3mg}qeW}<5{{#;>Oc{)&NJYYsUz5@i0
z{V`eMN?>hwE;;G2g=<B^vS4T>3<6wNU%fvXD3s{W(lS-kOrWZS7+|L|!RghPiBieQ
z%w6RLqUOJ-t`(|QRTJBn(<#yGP<=Fq+7CE8ix%9G{uTfLVz)+dq$vn)V&lBd|H}UV
zNEP54dkb`G6wTSLyrl=#rYO;`KK;TI(81_A3zk)2Nl*ttvy?c4-5Uy#Ap4H&o#|h!
z!91WcC8@H@SRDzx9S@X|eQ1hQLM4K}<j;Wa0q_?P-<SQ$ZF}6JZSwzYR(|_YCKiAm
z`Cn>-ikZ-n1OUp7Ak<)x;3km9o&!k)W>ydEe+s~`1&qV!G`W5mqMl`<^H9*i>akhj
zN>00WLs0<77&?EU{I?$)Xfcq4fVr$E!zCqFj68r8v6T>yR6vSdl;H~|H8ki(=2$!s
zHkBaga-#HxCM#{At4t$m69CrC@}pU&8vq|!h8<q3Q|7TN(E1%Y&q_)-9F@!ozvc5f
zn*I?Ar$SB?u2`t`a;gG72;eNyU{q^92Z$SLl!-1D&UzTnNJ^OPNU*n+&EAL%l#n*J
z`oOgq;H4hYczQiL?;Ad?X&5o%_@DcNfMPx_UGx)LtT-vwS7`ClLDIL=2|D}mY2J?7
z>3Kx&BU_A*t+Q)9sd)sQC-FIBw;$e~F@1-g=t^O_5)^V^^GkK^=<;jbvq!@=os7c@
zA17lg+?rsalS`KN8bkPJs?SAYkMNo56a+aV$1}z=h2~e^zu8&a_-;mJAFr35l|T~b
zktd8$Xdo~!o$`~IeYrn0U}Ji)a}n6)7G_~#?8QdNDxiQ(5*MexDT*Gl)MfQ$aPwuz
z@_}ZxjN<F2v5b-W+`hLzFvm9jB%J>7FjNqU`;*V~@yF-IcuF&!8hyRTs|zuoS5GSi
zd*5*R{rH~o`IB>#*KY4Kide1~vf%@++*iZ2o{YGY>eyT8FX%-Q13Hh8FHe$_HdiLu
za3<RaJURk-V&Xg=@^8LWOV@g*lA2cgdGRYHOG*v<ADM(UVHz3l|F+MMDqV@o9p!zO
zyJCtyP?ko@Q^~KZ*v2r~^NUWHnCLqm4rH*euypj)TMO>uEU3O2;me==xyjG6*kzyK
zS?6s>^>!Y?g+=1&6Y%;TQz^@b4)<E@4AtbHS((Qe*D3mYW>LTAs1!Rx&+qrANwBWH
zz?jr%Ioon)jJ#<3EN$2GP5B9>q`7v`O%bEls}0g5UarclO~(7PbF33Hb8o$mCN4!q
z#*vaGbvCL#4jM1>Hr&kQ8_7#qR;0#B_0msl33CKFu3R-gzmH-jOhR=0y9D81x!Cnl
zBD*G*wjJllOtJM!!f;B6#gfRN>*L7ce~K6WLK8*jQ9W<X{jk)>%Fm~ajha9A)w3R~
zb#HyXs<=N?MTH-|s7!sA!dvs_QOJ@|uwAl-ivVlYRl`TsgE4ASEeD;KSm@8RFji76
ze^keoRKFI@xDn2AY+qo}G2mWwUF>da9{srxleiqGy6Vewi>C+CH=4EEO*|BKPFcga
zmOi&|5=A(S%K!Xcub=qeK<JD_)bINIc#PUYEL-Yvv~-D&h1?Oo6(Kk3Nj6@Pev?nB
zQ@C|1hQ~eh_9Y*`tJOx0DizZs<#)A<i+mbStQZDLZrot8xLIy;v@@4uuPt_}pY%Y~
zbFLPFa58danEE>TLcsP>Vh*<hm6u(N7KinvAYIB#DnH}ZxzM2^LO0Rre1nLKzt&3k
zvU>BS#3`PP=uCH2tisawPh<0bkJx=b51ltZD))F*pC8dTw8TNUPqt=cR~~qeA^Poq
z{SK=}1H*4*f6gpemCKUR$)z-jhpv7(5@d4FbQ3~4s_DKNIkHlHrnUEJ^v-K;3RbpD
z!RE5P>tTwd8Dq(RIo}0I5kji>iynE|FHZ~$s+YVbnSXVxjc4O2W>@~)cfR#@(9_n@
zHi=w*>^EdY^>Yk5hrNSalV=yb;_}gUGzy%1g3;jOM#T|S=7ma}KzEI~9KgFst|s&f
z{(GtX0P&vMuN!Y}i8tD`JF&>U;<?oNg<m@J^XHhsoe&hg2~}pG89H@f0jG2^<O=3B
z!|2!yWA?mmg%nZLJUd?_y!VoBTW7gbsY{%(20~-2{&#{h@{|di!3oX0=grcZ<h3_*
z<}ub@>)WE|r%DMhDq>)Za3nRG@ld<OWkF7{w$?x?YxfHI7>BewZEtXh`o-F0!}Tt3
z41Q$580FvDp*N&nXQ;+GB(vL#^D~ZZ;_S;Pgl6lR73!<qm41ce-T|d!z}t;d(-s(*
zKw_5&U*vm90+2~VQ;Vv(>pBL;yFWyn9w!Cnl<xrpfrE}l6o_81wlY_v|5x$Kv%%kx
zIsy^_hh@wFKowtYK0i~AhnWkC3k9&6ezsJ@PUyKz7ao0qfAzzR^?&*j&(=dm;12gf
zKL<b#CuxHAbRa5Q*P8x8?uQ=^wBR_kVpWwWiFY+c_e;`*I79N?E4*}N#xxvAYbmwb
zpW>(i)RDeyVDGqAi(Fg9$yGfP*OYAjvgT`^rsRrQf5`nN=A69SQO$VtF}8iDBh*L=
zZk7l54+!1bf14%f1<TmH`t)C%v4-*Ebrr??%BFU$j?wQv7-~_Gg<LFl>*3O9TA3mK
z`x|01{_3NGnFJTQ_D4#}+T8oBk@wXHWD;7!rl-8$c0|N!a~lMUOA)0#rhD>VXC^6G
zU1#S`epJPc7p&yZZ%>ZjnIIVFe%c#WhWnaK!^G-N$h+Henz6HlmdIuLr?1c~i}S?u
zY}^YjQp4^0rv^wA<+Vwtjnli{&7WFw&}d7KJQCu5!?{qFRvL+;=J2_BqVtUYlW~Br
z7~#s<&w}h+X^-}=M`VmTLHFV~s<Gn9jziP9PJgnm6sDi)VAIU^R(i>$|Ni|1ACXr@
zy<o&do;m-1tfEU~r06#5sX_kZ_X=&_={i$zRzIb23B3pjU3RG3Q7(E&XgSx$^BgzE
zUZdc_I1{ycC0|kVg@ipv+JEJ*iw|ZA-0v~`$o9S67Kz2%AQHlrS61R^uJ9j?8m%ps
z{iC%xT4`r}lS_jN@f_?)eYc{zz)#DaQ}@3Y=f?0ob5aRDc<@${C;rDbi<;=h2l%>P
z=Hr;kniEADxXi2msfCym;mS`06)X(4jSnK;NR9K@1<tx2gnU_EH_%(H*D$$zzBOJz
z^PRL@@<l1_EpDSuB^lZDXf}#uvsXiYPp1-8KGtf=Xk<Ucc~h1&Wc&HG9<u7q!*miP
zZr2v_m`24`Ur8E)MS1x!87;<6!-l%kV-zF!B<VHR^rr#QcAZ;4uO9QiV9htjP{!@-
zs9%eg?RRtbwB8qx^Q|1dU(M$G-;DIR1o?hZ+(8_BeTuqy!|&7hd9&Odr`gt6Un9+&
z^!%zxA4UZ27V#Y_2IF!uEO$vtX6~`4={4Aq1}Zc<KP4WGy|G-x+L=p8&wtmVck=B^
z^Fu>kRvYz>4{{rL5>#XiGp0*I8I{R1^b@B_Un0tgek2;m3fRq2)nfL((TFSV+gmx^
z;64`B!s{Wf_KZY-9lH=MOE&VdN~68|KJ6CPU}t2+e|fiR){xG%a^2dIdOYIvzw<G-
z#An~#{j)Qcz!m?4VALS|%_l7eo^GPv2f=xw7OUzbPXTu81k+n}pozm=jsSDDb){GF
zURF0$dZajgzJK4btj9jz{#hfo*M5V+XFU(}4~&#q@Yn*41GTWk3<glrDsP^j786|W
z>|Vv7VirEhU=>*9S(yCxq(>YfspCOIMLQvA_IX3c3^moKdE{Vd%DDH*<T4W0O=h5!
z0I(3y<CEz=m)!v+<ClZ!?`=)z!@cEAjD@>Rqzc6ooJNgFQ1PPH1)Qzri~>v(1U>;>
zKyEt&2kwoin6mlrl!YYk@mXA8e&)Hw&NJ5_aX8Tz7;B>Qg(7^z?SH=ndOitGLT{FZ
z2$&=?QBdN?MyzM`WrOp+yTefTYX|HFGdTg#*%g}aSlfuVjg2CJJ?MonIt<_1S?|1j
z3R6Us=nJJwYWlDCC}gWP?$S|qcU{Gf=mi<W)0|IIQ3g%}1)V<Ik>))|uE|x2X*$+?
zrDfKlourlthsBo3QEI+T?k3c<;nU`_6(@^+zK(y2X)J3Of4BJLJn1H5XbL{hHpovi
zn2?W_jfl%~(zwI$nObVW|H{w)`SpM|Z^Se*@veu7?!+c%(Mg3m!CmwFZ05&^lBo1M
zTvjn#`d9M02=0$`vmUr4KQY#J>}4$$fBtx#wm5#Nw7jW=HX*O4!t}M;#lh87c3|c&
zak-YESo4YHydI*#Vj(-rq?{u$v)Xfau<F5WA+v<gs2(b$4e{f1NZx1@FLXU++L5%E
zqB$nIcZF>o%hld#JtvO4yZ9GTXjgm$@9%i$s!2U9?nJ_@OGgarHU8HM(-BNuZPvnS
zXPS)v!_`|yRn<l9+kl{i(k)0!BOqM@($XT`AR!IX(j^TFD5*#pfHX*VN-LcLBGMrt
z_08?~KJOU6_xa~>;Nf`A-fPb_*PPdV&lt8>83vX!)04{Nw^O+b4;~v?;iz))%Evy(
zBK_J;-GuI}@UoxId$rgei%ezu%jUwF-_@Uf77?sm)Y?MES%#L>nm1o&JZf$HqEl)9
z`S(SC{53vZs-cqoaU${5oR4k6pYhZi`W?$&&+;9NYt)JOj_Yq2iI%L0y;FAePvkjz
zemk0>_QTTe_rFGK)Nc_Q3Q2Cd%C~d0oYAiresGWa;2zSXy>%xgbcy-EKb|X)p>u*t
z<1YurA@>x&y&?%#Fra1K&`<<AVmVfD4#5pzfCHFB^b4<M+y_YLv+?qLlgm;k*AhQ*
z4ttQN6Gma9Z$7wUi=9HR11;-O?|nS+&?H`=@0Y-yBDLab1B~bEwaRIHkuVG=4SYb*
z$aKN<vs)uD(E0<9$A<1LpI>yvO&31K&vs>zsxLvT99ZKnKuc%2xNgfF0NDDAzw1aP
zcD9yr56on7mi+P_u_gDiG8DNdsO4!@2m3T-0ojc&jgI!Gct+)99Q=y=!E5CBY=7_s
z3|=XOT^aT5z{k-BX(riE{{+s4)0uOnsc%s2Q1HZi&7rQZtI7eXje;lQt6r?8jkB|r
z>QYNyisj1J^!Jr0bR&<C{vNMv8ZfRj1;Eh_VRykiFBym`7vMcS^yVf9;gILjf<gAL
zvJQ1}#P1KR8(JE`q!X|c(m-%dcl{B`Tm~(86i6WS76v|59~U=)m&NI0MRV?h*Ypzi
zD$4D;9!(UvNY&MiFPgSF%3xi|!sC~i!5-(>RRt9-i%`IZ>FRr-^q02&4K6o};`)+G
z`k5H64v0LX)um9|ZMvBrS(o;Z&4NnLsL?mhlfv^pYTC$OGhQ$ThB4>d<VxLrc8h<L
ziSHTnyDlprd<d?{FIqs0g@8l^0#WKs=uo3w1~fCXX>qT46~`9W{$>N^h8Q?6UTJX!
z31mydWb27}!OZ?-m4gAkL2JAYQV4`|Kf2de@c=k!4oIBZBYlRsXZtEBb8w-WlQnpr
zwRnM-A7BtrAHN##ZrFxaVK5(99=AbpGFs=R6`WkCHXw1+wHUbFPX>Y(jkL3#_g0qJ
z31K#|EBg7`Q<39*7%PX&CTqhh$jM<?^&WReaHa(eFu}rT8Qdq)^`{2Mm@8Rm%W#rj
zg=*$)FmJ^J&<2TxeOz_d0})_KwvV3}o5%gP*4v@)$=kpC;(3O3pa5d3lK44eXZkUe
z$~Be13<nn<-S}vC_9;K)i|&b}*kQqv6OGc%T{=o|<0~b<SWgQ8xP@E1US=~)-*~q7
z;Fi}y5Lbxz>G2-{Sk*{T#BhA&Rnr+0(BJmMr(H1=ZBQ!hhNqwxp)0oik3><x`18PV
zzMKKrMEsZeO;I214aYaf%)7c=GH%UUPh6Sc%u}hu%6GgTE@dVo)WKBgU}!kqIib6V
z8ksGv)y(n0Eor5!dg@Q1LIhSnLCQ~?2ctFV-ogKSyFDIJ<|sC5OpjIioq(n6aeC9q
zO{AGKAbvFS4)v?A^y9}&NF^AlMnJ`vt>yyu2^XmKmH|N&*xWD%EonpA>rQrdOVAm)
z#^QW~G4HDexlmjij3XK_@|^8J@D<oOQN6%G6r~2(bgzS9d4y{~P>+BA|NWuhePQ7;
zv{@}d`ZMdV<JmfCKE?_haL~;{6X+i!yY2?`fLi{*4rx9u&;*4b6y#0o9t9qk-rD07
zN9>RIYzYB~>LplknyE=dT5B6{kU{K_L52^FRWE?DkfwLYDZrm_P7z#Sy}{9kjplRO
zi@DQ#1hC6hOkzfV|9ws}xY)1eQL{#@Ks<s{k2J`~VJh}KN*HR7Z}ek5DG8Xma`UXX
z66wmL){B&IL_<s?4n&|zVcTc$^#XM|jr3SfO7B6lDrx@|?kKj`7%5O-o^O)R3j{Nq
zTd@q?zdsTKDG!2X$!^;|dGZ5*ehaXF{imrwR1gO9f3>9+PPV$Yt&7+9V9)(0ok03^
zQsRfNfNcNQ&<6j$lJ~sC^|hx^yhe}?Z_wN+?HT`jm_3*~Bd8noMthPgGpP*6&VB5;
zBe8{sQm803H;jU7ltt6oSYK1Tpdx+2w@KgPdE@^N0oTK4GyTkMiwie5=2{#9zh=&G
zmaZ;-?bFIXZg`~`;-(jbG~H@PY9PFy!FW`p7lpb2?R75T*`ULYEFeU&0BfiXYWlWz
zb{5c&K=LOsVE!^(ds&5faNyb(iX+~=$a+U!2}d_nwMj4>mZwi#NY*h%6<d#9iWB|U
z@j@~rr1J&zpBs=uS9YK@PIlwzEKOM-G~3j^68zpSACy2E?I_U9%$c6RlQgvji`|dS
z)TGT|4c&qCOo2q|gI<6Xc&Q<11`vl<U@~k7j?XgmiBw@<!CSXm$HY$-OuF#I5Aact
z{vP=GfC&<=&;kkyLM<VN&OlHinEKItz`cSDSEU86uhmNboTFiZK{*bHfomyO=T8Pa
z!Ns`C-3W<tSw6p6VPk_I;GwS~zX+{8uvFWD?WY`ife;KwjQ=?heXifb4uB>dWP1<?
zUE2w=j+!se=Pwbh0`w~Oq20R+E5Qx65pIYfq4-o!hol0>9VN~W9>TIl29T+V?V^Ky
z@H70bb{8s$uACRgUH7i0aKgN(pKz&`fd^TJdMcO$mOxxWvON^%b^`h93W3gjWi^n}
zO#;hQS1*s*0=-EJ9&?$u2Pb3Ijo+6%6<EQe-VAD?NFpjY=M{pWivfQN0?_LJZnAYg
za6&LpGNY#i&}#92E7r>lTpA>irC<|<D1TlpH~d>)8>+dLZ`+b9>Vk7`)dqXtVB}}4
zwEFeYwziY$Wq^lW{FDD#g;(3SWR-A43{gRfk*1ob=|k&6C4cW==su-WKu!~k?Og*E
zoZ9%eZ#I%9ZvFf|0Bg3KBd>L|xBnKHCGK0*2+!6ZXx~zcT@Hv6;W1AC#{z7lA2mXU
zNJ=Dv+T*pva>D)KQ<QTkbz>a)!*Uj(jQ#!DZ_~~P7*W~%zi`3O2f>lPXyGrJ$uolb
zC0MX*X`S#G@V&i+HtFgaAfIp6uiy$vUik|))+}CIkUo45Yy{}!51jIAIY+(3ic!UV
z4s05po$O`U-GNPY=LW>ktSqP@Up;_c{sG{tA*U&KOp4{>s6L?j!Sw5OXo3lOZ9&k*
z-U5&(vk}lbzPIrrzghgCw19pDtK!^b$p1x8ZsU4!U9cTQ=~S44Qyvn~1plzUT@mCs
zMKJ5@1L`@Hk)IS*&Q;jHgl(vg|1Ai2oy7w#&KADFLNTl>Hc&?dGT2*S$NBXOGP>FD
z6jo;CnEq$Iayb(~c_@dvs67pLJSFcz+ic2(s;F`d9P$397&s0GW^tPZUvvsKW~Zm6
z@x9%KC`OCOI={hhWL!Of?V+%8Oe9lv9S#siS8B7frA&O*@_e8KPB{d9`*KG+yY(t#
z&vw8e|3GWu6KVtKN;2%?mVQBG<37%Zq=-~(TPRKf6rmxm@Esnuh=c$5^^qO;f!YBC
zcD}dZiEZjPVHUA+P!46BhXtc+8P5jN_#ZR@Ip+i2?}l3zSpgR(w3`NwUu2mHBZGaB
zxcoB0C=)m=l7q+h1#@5M4;GKrVihrG^V=My%3l@L3Ag_^<YS-j-#ffTgoo=9cm3+7
z|4#VWsJx9h>NcLu?3l>JBqcY{*T{99t8uYt8mGOY$d)_;jn%Q60`@~NSSchk(->q!
zf1uujtc38V!bj<{G`7^z>yb9%SBv7`<p1f~#`P%vpWN$u_&XpYenEBrcP3$xu2PSl
zb!G>0=tA%g_<iv6M(7<Xz`v1gK=_EpW5NvCZ;-kh!g@gUs06kEH&~EvofOvSF0#$#
zACW!R)m$&bW6Bnxga8odwx2kpjRNPKXWPE>7ER}eI5TuBP=q2!M0?uBhCR@r!8Qy)
zW+}is-F2F)f9FDr0!h9KjLHaF`3jEMHL%9O_5OYwM)V-;@Cll{u+(Ua<#fQN7TGJE
z0oy9GpX7yaPG%?yQD3jZhM?*4e0#FSg#mO`muG{Q%ivqz!&(Q2g<a5QxlB7$BZl!K
zYkWwX7O~Y&<+IgvHUU2vK$MuF`)Rp0k_Ud_u5C92oqJgeVDiAmg2Z&ymmg8=uK|5P
zYKkyORO`{B-8+|icaXp?bqmCH0|*2OrgB%JgVNF{;c9Rl&U%_?e>PTut_;#@fyyP0
zq2`Lkv;abqz*}InEZon=M_?Tj{}U#QE+2w>^0Aa12(?BRLDl;4(I=?;A@b5?aK*O(
zprxf8YNt(?r%l!{a;qM29thTM!?I*9$f|7b1Gl{egb4*kzeO}j6WRM?s6|r{$pzrB
zs^WjN{K2r^4e|-n<??Ena5EsbuZ$c4hAwiG5PoIG?`RfCE?(!CDFK5>Hw<EPs;H;q
zum@?RObwiV+|Y7h^cNg~UU7SF8HVE96bZxgK_Tc!1$z`S4j04L@Y!xhP={Io@^Y|U
zR5guUc7to~e)1K_AzcXFuqj$EcPGZ&6yz!xM4lbUim6zczzrXnw+VWz+iF?LVz;c}
zz=d?;&Dz3wY)2TuMKYvzRKFbbok)KVsu8yK_Lkq?YrvxkO>0QYOAJ317Ft3u#iW}6
z{tYbQk<G}e{W*b@vBbr(1yvUv;JjQY|D_-Q#sB-41V}i~5_Y?tQu**HT_BktZK|@z
z2g&XNXwlj5)jr&>VJ`u4K~;2<4|YEoA|{6)Ko8VFki)bnas6pfZZwATAoH#GqxV~|
zYgajV-t5a|hcnmkHY=03pS)UAH96uR6%{HTTU8~1q%1H!)c1RU31|Wk5C}Q`K-zi>
z6s{K$(Ok7GHgBLoX$B*Z0kuHtFvz)$Me-GwQ7^&;!8C}F0|W-R_+xU(kLi3T^es%F
z1MYvh{s<D5Rfy=A=DWv(7cf)S0#Llbf|458h7dtOFoHq;BXS|a!Zy2zkl={iFk~I}
zC~{<7LaY{pUDsmauM<?GVkr5e5YQh%ijY4K=g1$`WE|RzunKvg7fxVnnDYFxz9p66
z)oHr=b>XgZGdC@I;g$30YXk)pp6=l-vH{1pfBYEDwrDtN<9%C1()*Xp&i*~XK~U<x
zZ>IuAmW74(CFNR4PeoK9VTxPA#}4x$d1KF#0G9bx4mb7f%u6Q{I6;Ut*;R7s&rPX-
z`ZB+z=&0@B$~95z|GkB%aDCxf@T9q7`8GlGQ0c9qhK5-ZjV!suGoks@xg!L*I6mJF
zFgeI_7MfxBbR|zKK8Z#$ew~Xj@<8zf^?J=-j^OsMPkUD;a*}Bzn~Lrs$5GHAwcfkj
zzc)HHMR|4sLvQzhLUD$Y)T-`12gv@X9qVFY<Jg0WLm3dU{+Flo`#`X}%=;Zdh)G$P
zgL~o(=~_A9=eGjRTtJHZ54Fd+Q{N1EWFH7+PoRhU6_g+yZ~!5L#RcA*{~mbQqbz`+
zM@BlpJdHG<JTDe$E`p#HZK0=4TO+&_bJq(#$jRr+zjr$0Y2r1b0QclqAFGFCDX_pY
zRmjD>sjm+X+W*D;f0xSMTAi3Sd<3v9_?RpC>5k?Ie+qDRNG4NAb}j@}q;mmEjm|fh
zXVxfVpG^kg1UndtMI;0|lVFsC1wAcL^)#8nuYm)0JaD&2R6*EKMCJjdLb4grfQx6y
z(fhXY>t^umX#1DU16SXLQ1w6nz7}-4wZN)vIF2Nn2+aQP4d?2;VR|i(^#B!eU<Js9
z+Bo3cJro}*hR8Mm%NHRSA%6RT%j+V1a=>-?)t>>Kb3G6a-*kD`i5dP|4gbz0@V&Dy
z$;+dRGJ1QJiW?5~VGO&6iJNcRt>`5oDmYFcWfQyNFX3<_(&1*g07AC|pi(H0MJKv8
zH5{j59tMiU+0Ol+1DOv*GQmrC7qS(w<ZXe?CZqxa{w)CO%uAd9d;<{x+00TGzEYm1
z0_zD(14n6w`<HIP;|^)wl@bsuL0HgKf=>Vu8gmP(N~Un+le$1)za^T>Ze$b=^lgxh
z&K76z|J<~RHXiJR*+75?Ax!MG41~9+yTU>NOId^gF!bH5L?)F4{Q-B)H*rfsAlYet
zm|{OwffaZkS2z^xQK`C6O@R`Cn+xkNxPrP1i-W6TOgP|Z_mV3VWdTwrbQWiJGVCyv
z!H6&0oouJu{*-_}2_lT(cGIN<SRE`OqGmuG_{V7e&qZ!SF(sz0!@4aOW#-xLZbYA`
zfM0)x0N!vCL&WqkH208n0lgP1kC;I&2E`P{D<I=g7+)%e;<JEF^VO)RtzW7Hr7#l*
zZMmHgRTpG~3wp_L8ZsQev6rl3VZi`q$Ul%x55c5RfC2x3yKnh_7B;z0d?YH&G79Ht
z<Td~KHI+lRBoGlcAjjTk>jv{+=|wNEN0(;_N0)pEmWUV)nG~;~Aq9n;!GMdqkgRe;
zvgtPf%LDGkwLnO}(;$T=9CwE;1t<RAg%AqSZRPGBPYI|VAzuMuw&A4X4Erzmuzy<>
zqxgTGHS+7>#XQ*fAuZg?-5v}WS?Seqs=Sir-qjvSgsL`-AKw+SiZtkuiWPF$MP#@q
zlY{=9kU}8ChTVv1{ija0zj_#~u=76i)CPd^A36eg7?Pd+)7H>j?$UhuGiLAw!9ii1
zi8ED%C9fiVBFjh2-6Fkaw<VY-JK3fAzh#*JKH#j`*=>sJ;dJs)WI}qPaHhFZe@ua3
zzz9GF$G<!<9}a{@Cn(v~V9GKIoVY=?5(vv_tA1Y<k^H(gqh8xVsC-y`@lY^9Lx6iY
z1CyZhzS7i$<2AfDl)R)Q0GdZyu>iNR<UtJsf*)eqyVY=H%++{o4jb0Ji=}%PAu#EM
zc)CilU~~hXNLa<-^@Y<|R9qYf#Lqgi#FeweylJC{|L^0G^FJ3^=-1m21nPG)pGFoP
zLbs~Rpu~{#1VcK`)HU+1zCHta)&YY{RLWCVGMg4q`mm4%1rAy$B?Tho0?2J8m{k=Z
z6NHwo6M;1P5sc<^g<7IBq)ae`sMH}{orO4;CRPrP-HjpZwJmoi;Yo``Dyony!clpG
zFa;*w|G$f~yOTxo`qj3C`${m8k?UCevf92w=3+Zr{e9ot)s7Kur4<&%vQgHQrmc}d
z?U`dp&3E8Y-di`hqztr(6Ap()&%42!|J}O(dH%vLEQo30&m}~<wNF_is9Z5okXjU|
z|45~uOARC=AHu`aa9q~Th>z>Ldh4LI2S>3H#=ZGEY2p8UFJJ5nky+%!g_tpO`=5Cs
z$9T9~pd{g>q-h^AiIKMLHeU|9+V<9TS^&bp6|wb>>?lpPlBgLHylc8CB<=V&dspoL
z`??#7APcvj?(Chjd`<KK;;$C*O@qT_Xl~YLKqh$gl1rP^KXtQb<r@R<iWV*X=+w({
zr^|RV?wxh!I_%u>#)gkP?iS>Xp)4YNHUZp|H>Rx`KXqq|HEpn+aC8=H4fx4nN;<vq
zy&Hhl_|)^v>VM*V8&_-+A#0vB>d53I_)So4z=Ie*i^<A__+Byr1PKE-e6i7$p!(A6
zLItu>7b@$L^1BrTXt}1Z*WZ>^&_Xw!v05O+BbL^xBr;iJGx%U#UecFd(ORDQUAcvN
zxz)sdcya3!hwKseXL?+e6I1m!sMWkJB3evYR9Rxak4QhZKSf2)R2`lgQ~g+Z<3*%Y
zf7XA?V2Fg&%WAsQaN3kRiI5IGZfT*GYtflJFG(ZtX%K!eNJA^eLfaNITu1cDYOYLb
z2p_ilb*sOR-rAdVxCliVms;*O$acmgi49&~o?*l2Xt1zF30gXwxAt{ff3LN6xZGfr
zT%Ke+X@9R_J#!O$QLuyVhH<sps~HA4=si~;ro#|fVsEMr{WUe5%Fa|gR@2Bdgr6hm
zAcMI`l0$rD8&@GjZprGel-kF<Pflx_f*z@PUu&4}K9;;_@aw*dqD0qb7MdWOLYt_L
zNgcVti=CjE;-;pqY4+iVv0H<Wvx{MY$&xYa-sGzzV!Jv&Zxgjs+YhU07rE?V&GeqP
z+%ZyJDilmgIJAVWuWiT>)AdP*+2=Af@Zgl%dFOs>bo%wxOM3Tk$elXczV7u5duyG3
z?*zA~Z^Ll<yr=fP0jsz5dIwu|Owu16(;7NAJIM8-c4b&_=pNw_LUW>qN#q$5z5-pM
zeL%Hb$dJ$!`;|x;EZIZJ?6Bx)xg(cbb1wI&TQR?cJhQ{T_T9Yc&+?a<b$z?1ca|Kn
z=n^#FX=zhicM5f_&Fj-%iVJ_CUbwic>rhbF;P14rb*wUHc&GeTL+W`}{_``Px|-tr
zZ&}2!X?@A@$m)@{y0h99vi_u4<KmVr*~VTts<fS<N=X*`*c?Yu<tk1e?;<=`_icAM
zDa=cC2k3j$yWzBp_E-bPA|Pq+jiganp5yKsN}z<VQ(=dp1*!#g^;~(3gpo;nmwt4~
zN)V;6{p00wM2%^MZE1C$r#5N7M4KXdG1z#0k+g6LJw<`&r9rA#V_Mns%z*o!cSv+K
zm7ssnLprQ)2aOc8BeqE{?Pt%{vx|T8XR4CvdOtDUt;J6_AklnML7Ft}pcP7_e9Oe(
zMg7Z$S8*txB$&@94Jrt&mcRKWd^~#WFIa-rO{4Onxv{@rIXU=4@*}^2J;|l<WPH#n
zj%_xir!8^l*M|M9vJ5(FO0wa6{|UEz@OV+{QK-)WbbVoeKjY7x=Q=NiuggKYUPU7m
zcb`7d*IexKOY-{EvHhg>3s&ZW>*(qlx6ChD-L;aB%BaH_gt19b+s!qe%lf_=pnW0k
zAxEF+h{r?1zM3WWsz2%md(HND6C#}C6{w0oP^k#_lkYd3-#sP3@tn`EyKhJR)o=1C
z|JEOprdXIF0#vub`mY_1Q6Jte(l&P^u!pZcI<Rq^M!hh6sbaEUpXibG;7&g#Ge!!7
z&BX1=f_I5i3XgXq=n0&fF@|V!83|)0g_P}l(~d|T^7F)n8FBRvs9L(GSVoz(iK4yA
zqnm!Bv9Rw?HLTadO{;ez3Y`()sHWZvQ$(9u;&P$a8vjN{HT8?#|IL^_QKXDgZ6lX}
z{FmYfQ1_L_gn<~ece}S9EIJF3tDQ9_seCwf5%7QWtEYbIW?9$3#`fW~+#&f31-^2#
zS%chBEHz+3r9xev@~h+&5a7Nd@qW1L*|oImh+DGZRWw=kOI9SM9<40hbv*50r$?88
z`Qbe?pZL8oqLkliCo_ID>G=9JNgbae9dC99-H4#YuPBYx$+UKH#t&qmmUGCER(YS5
z=4|AGVJ`?na#)s&Ogt*CQMxLGl|QC%?2^Oo#5W`?Lz54U6i%-)Asga~uvo{Hc4Y2I
zVo;=DVC{YHJeGr_-Jca$r}F)7p%wb$<nIjwhg|{Z!LR21Bb>WCzo$)oWTUiO7qV$O
zR~j2HU;VxzGDiT*R8+LCp<sgY{U`Ts$rs{QCl=O+)gF~PTyi>gL3!(Z>|=KJIFk4p
zF1?%DlzK7E9S;p^E27Op+3m=U1wS^uc@&t#|Cv*-^=H+gV;AQGr6(L6xf(g|d)mYH
zPoEyvtk&+VjT9B?&8vzu*ijktUKgV?;4Ll<W?gEidT7tVt&a!Und$2ZnufM%873H!
zfl+_sRT5-;YrlR0)JuRJR^m`+(DF!LUcTD#hQstNLhJ;{uY7%dTec@}_Hzw~6+9%|
zRsG>mG{;}0LD)EDwL5c4iWO1M`GNN1i*KJQ`Nle)|9OZyGCUl#B!px?pDZJ=sk#9D
zC80wH6$3^(wh1@brodnP3KvRN!mtfwK2@pPx%h!Cq0fNH8vBV!rPLv&(1t4$t^M;Y
zR2kYoJ$CjtgbQQ&)dl_UNSDsNI!T^26t(TMDlRLe`-IZ7)zw(`apHEfJHM0vF$vzK
zH@``d;*U;>8lvx%6+`dIV<^ZL*O_B1E?Gs3FX7A$Mgec%_iG2j;rZH_{p_C*EjC&B
z*^blv6K=}bq2UkShn3&O&VP#Ni_n>{yd73p<gN1OT+I@STYG7zUS^Gv8=d)mt|R|F
zTo(YHrJqw{V?bX^omoPE-59H1&}r^#ywj<*O?TZwYvYYm=fy&tR3X<p?-m=`6X8NM
zG>EUZzge%9?P_TJ;S>tTBqdhl(GWXaorQxs^W)%o=2~CR_@7nO2}W`fe*OE})tTF(
z+Yj@M@Po}*TL{+epU4*4VJP^UB%+NEKdY@zzV=4{Yua~{>={0DGW{dl<{Pr9yrn_q
zaWs9#79YV70go89@dq0OMAiVsFXgw`8&GAk5;P>&$A~$v=eSNdb4=b{oQ>OAecD9x
zN(pu3J5_Q6F1R@iYM?_+@ap=l`R@*;yZK4yLNozfno91%pARvbGEkv27`(mp>lbFI
z%LUzt*Q&NQhn@m`BSzvVt|9*S_R*K`i>M#eL@1dD_;2+3m1ts5SJf4rjtxqfk6z9^
z-}sqKX_M)}RT`qkm5L^_R5oMOdp^Lg#e*T=Nii$%@;ATA#=Re=N$M59IK{SR9x$il
z>oVF{9oF`x8vD}@e{>yjbP4)wO~E$YxFX#d{}}d{pDhso<mJ1#mVESB(%aM26rH?)
z*g1XPZ1#!jK|x_I&pCkzL+vPLqvpf+Lk6s#P4odlEY7xx43oFlDPH=My)w6m@1wK}
zxV`0P`k91Q-|nK;l2s?*a@%yoQXyZ+?yvvX*CNS&M{{+SezP=g=ap7!B(xVk@9O^E
zsq%0&5+qmiAZ?`Ybe}b?@3|N&KJ4v)v00(g*}*){yOjM8x9gH3T4dFUep_1Fa}un3
z6T3*#Dwb-iX4l&O4P=qjjB8_m6VTFUH%_qC%Bzy|K^+_b(-$8<4nl(&I}8)6;r8h%
zxDlr`2CbrfW5dPME@2vh<YGxFNs-v9qED%qX}-&}=bJh>{>zfNWiV$NiS;T@n~p3k
zR_CW@|717E?M+6YHyjwMy=KVue346k`?-ks&vMLGO+u;UWv%=suRn%l$yLwJeYV+V
zvnpaY*3kw<Zd0aQkH(T#TCkDgpvRiLvWojquCJt*tz@GShb8=qL2Zq84&x!d=jz(H
zh3Do)QT({uVC#z3og(wkk77fsCb`w^d~ZwzFYllmjPtv#rgRv(m74b+Zwa<dZt^s7
z`LaFT8SVC;!%=7U+RIxJ`!i<sWWAHy)t+B-<xB4nC3CW6U%vayWmg_Y3Hs>a$F+`#
z?Z3wkHu8QSCNjzxboep0H3=!{GR8RwxxPIbBf#lMU?M--JC#*it)CnE?zPrb_t|YE
z?yozmQLtrGLA85otPHQ73?|kMG;>YOe67JgNi@y9ai;0x`9IN-MFG(k;SFjsj#5!Z
ztLRmqIHxjhr*OP!@l#_wKKwEo8CEs8Hc32IE@^s&2S04(Hb>;h$6V|Ua|`Paqj0E+
z+3gskGy0QvcKUZz&iCbN5GDKGzDc22uDVQXvVvj|n#{wXm(NKZXl5Jg>b8IPRO{2F
zJ99qY8W6QrXlE<k7`YWyR`P*h^aH-T5WQ?_A!EL|lwbl2+v6K%t(Zj$J#6&a=y`=P
zb+;mm?2$sWqA)8un!KpJjggu{IM%coeYV1IV`Yx}eu6Y6hiGt&R&X~-KbE0A0iEpY
zghxT?Ots}Q<k2iu#cyAuwUhF2v`%P08gQF-DSb>nNhD}1yJsIE&FxNkZTvfdS^)?r
za|_Y6<Gy@e^QAs~gevels@&YKv0%x-bIHD&^g26HQW!c{Fs}h2F6)N6FsT4#5Myi+
zL+T%oH3Dh;Sef_iceY(W=Lfxh(QdgVwjTc_<%PYS%HH(7;ve7tZ08*Mr8}<>Ry?6j
z5WDHYiH<h%C^xNOZ=$YTv4FpTW}cYE(!H}N4Hh_ZRI8?&pDx0mnpVcb7EsiDLBN4`
zPGp`Zx6o6ZM2z`C!)HM-Qo8qRPebGAW_g_yzoVmS$xax_bpySg=SMda^of0BSom2n
z{T>s|5?fSOrgfPZ_ay&W(Mq}T)pm9Z9JrVRW?Nr66&IAc(6@!hqwrp|mE)si!#0)l
zh`n(kb$o1y@7=pDuKL5Le=ELJW8>BMqB+l}hMT>*pRoIVJNQ@IA|_VP0N=KMEh^nO
zPtKmVblK=Wdm}qP=AydQc`~ovU8llw@1L<xR7Ib8;q~3W!dw2fo2D?#+Vu;2a#*$#
zCce@;!zOA=?Hsg<l#n0qj8Q&zPYKd?n}4|ZUJ^Tqkd!czlx>hx_qHk~cBC|B<X1vP
zTUyJolhH1<q2W12m5P+Mg8t4b`>HC(#U|sLkTmBT*W$#Jt`tyJ7_^rrU-aZY>kvzQ
ztwfph;%y$;Lzew?;<r(SpYlw8w78wDY|pNAiT%FYD=H&^A3<{Ug;5Ld`y_?6%_B=I
z8*Jjli$-OfB>XSC4Oda<n528SP7Bj}9+vVvz*NXLnlq{hI(k(k)$fDOrhUbu<=A}U
z<>d91!9{&m<MXHF?rnyJ2b2}z1PpTOCMHh*0+Z=PEP-)C#aUkQZ6Yy;LH#RE@?I@@
z$;9tt3ZF=>@HzgJnR6`BpjMDDs!df%f2e%%4j05_mHU+MH(7qa8rvUNe4tmFA9kL<
z@hmzjp~&m7pG1cK@%2x|QXjSiK2ko=i?Jc^5e+zx@$S!*L*-RhXwDyUoOxVVI9P&1
zUlJ<0h((o@&=4)iNH<)3@?={3HoqgSmHUt1nV*F?G0<d_r+RavR_c!r$>S!QUPSRG
zV_mJwi8#l7q2ncOSrMKevayAUnnJkH9fLnzT|*)iqIIqr$h4-9N?$dv<3sYinNWaK
zfIi2jKOlmT5C<nnW>rr@xA(_dM|{cRr~VBSPnH{%3@q_lLo;8J2L%FT9zMPPa=pOY
z$X=)P;MY}F4CxPO>~vV&F&K5se5BKtx#FItbGMjHT+l}TkT9b?C0OU<tEEA6Vr9~1
z39WEY^Ww7X6fZS<Pq%aEqvX@e!z$kKib=d9Le6XdQu3xFTUgl!7iTyI1ykfYu`yBb
zfL9tGStUNrA0F3Z%8EE*mZR%eWhl(snJbF!CFsmozUJ|`+mri1Ru;q5&=e`|nF-3F
zbQF5?&qb0O)pSl-BoK0}I3)0=64_jNj$hXIQv7QCzyPr$CH?d}g}Q(--s+1&w*Odw
z23Z!VTz)07m7!EBsv~}B%&JPpl1*1>x^(IE;?`4!Meg0C2W$=3Jbz{ev6-X(NPj`$
zp;&*vi!hZy-+T3&ui)>R;^tviwn7gMRu@k*vrs~H$Li6MMB2@o=fTY_MzrV&>-l)y
z8NayPxo+GiKxKLG&M1+gofKtjq0l#JXUyBkdv33s55M+GkmQr;a+G20R6z}|an3<B
zbTxt(4;3g|wO^M|=ke-y<l)_FkxI7awLPAzs7=Sxd#RXDWN$w!$)Sad*2{Y5JYs_}
zmv|19F4aVp)=)AekJvGvs**Hy*MK<o3Ip%uC@#xdvJ0et7q*R|v`H?hEJ-rN!uA5h
zEYEM2Ut_aPYMOtz@O!ggnJu!HV}m%){k!gUR;E_X=Z*pygs4-jQz?e>qhpJ2?W*Y<
z@Z<CXDWkISKVB&)d|Ab53x=6%PkGgoZel$DSKu-TMR^dI^5tq-T8v$wI63RnpE3!}
zd3alDK9LEn1_y^0+ojVwsrZ<SPBI=p#<B5>C7+OF+@VrV!cUFW7rgoV>ups%a>2Wz
zA;3sect}VGI)tn%4R$EfjP4Xl)YIPAME9rjeT7SD@FeNLs3Ix+jB7TNnT8d;YSVWA
zp)Hxt>YK>9-MS=vUEU@6T0uOV)HNzu4oT%}lMMJ2b{0A-b!dCU^dXDyWHHrtSZmY1
z-KDmtb-j(Q@ByDCmxVCBIX)-2fXXGB<z^V+^RLnkTzs+5ss9jHrO=zp>Q=e1-YN1)
zyRwdF(G+>=HnG!oZQ+3ju9SWDnUJnDZAMJprx={VuOv;jBsD}MQwH-7#vj_S=M8On
ztV<N#?X*2N;(N?0nLD8S`8v-7Dmrw|%xT@SMVyaqmEW|-GzdOTEG5xGg#+!59q$s2
ziee-&T9NptgdpSd)2+{zWcqrS+{Uk#@^&s5?8zp>9zP5iPQCOHw6d%JS#1BArdDwr
z=iXG?L0GTYrxwLptIHR%9vTOg{7<qQc2#Wg38%{>bD_SH#ETTbO}RJb6q1G0Q+;a+
z3#>&wA3S@dqvS)%JL|2;@*O9vo5Sa{bZDQNYUPfxN2YU~c$*tpRk<1-PKJM{4J(DF
z@22auYsBK0<Fp5=Tpz`6NV$;drtVw|&y2<&nxs2kc8hk_yUQS#PcB$ok5|S#cpkQj
z_TZooTb(V4Np|>U=Xr}Zc3-fT{!5Q2c5C#kk)_$eF$;qzMT?JEM=ikvgoLRBY@uIG
zJiUY@S?U)$I?MxK<Ce+O*6ZR-kvy*#@w{s5vG=3i+iro2;{BJ0_VK$xEL_S-((ial
zY0J8!4B3L3ER(9R4JEPq9$syKnOVQ{{mQpK4P~1QA$*h4(FD!6h0=E)MBa?)OVJW)
z#CRTHbd0?-vqw^s^+RTXRnn5D<LRZguQaCpw4AQ-!r$y)KeVwse)qE<Z!x%~3jVIk
z@Yq?~5Ouj)JnbucSsa5uXRP+YzV1m-uWq0hR|zlKRU4v(FV&hXebVkZD7#OodWwlg
z=7L0QZ+vy(G{e%;U1T(R`Z3yrUaX^|_9z`yWwS!5&<5|Qnvif>v+%Sg**RHd^XVYx
z=+MNo8xqpN*z6T!LXi?|5upWCZn>WLa*APj#K4#U7BKR!RNPW>`s2=DK4PB&({5_*
zDO*h5g4J@e_AB)?J8BmPtKy9zA!iDBwjN}tUY#Vd6}1wT!jewOng=J0-X9VvRx-A3
z*H&$J7Qc=by&t(NenyOAZQ2`9z`#^djh*F9-&-<8@>PVbb)uBvuVqI;sLW%wM^f{h
z8DG~tP4LI(&HBiKG3mUH_Wy|e-Q)fFhA&moBI25quk2WZCZ@jV3$ec=gzpHVKT%q#
z6GXr8cpAUzRdn}a=x)Soqppsgv*FrqtV1eZO{+{YeEH*xh!{%?Px%FQN#e;U?!?{C
z_sG|Y=xXCpfAoo5dl#mJPO2@}A{AuLCyH6SdW&6$F>kjvNu6T-nn@FjSO=z8#1poh
z8!V4WSTa4B&UlnpPT2R0M$Ss*StG|9bEH3OiT5!)O2{Hioxr8!_;N0MSu5D+QQrKU
z&g=mO^T$)WMI$A#Y8~ukVdZah)Kr<B->*2q48C2XTc@YusULip{4g+Cvad~$loc?(
zRHCc%N|bx+W17ie*(p+(#3dN^YAat&|M*uNiE=}3q(%f@mkP;Gc@~8#TZslI5><V-
z<tAk&Eefe4+J%?u`15H$V$)N&_kOWGIm!%9Y7HH1xw$P`efwm``T+|Kh1Ks96<xZ?
z)3RO8uv|e>-Kwyp7YO77oy!~)9>_Y|wqgn2M%M^D;<hAR7p2Z5;AOA6CPlMQhR=i<
zd~4t=pb;%MWOkLdR+2WiT53XX>j1;JS(#^n0N*!oK+|w*VJH4G%j83nRMCHr?^M#c
zDIKFs4xuRHUwg}iwaPOEvUesafm!h`K`fCEu+<jdR$7gY;Tn6{F}cN)M{8z%iaV2%
zQn)kZ#8`-Fkiz^;qfzv8RzTtlLe`clVsNs`qi1uj1W)G1(`#Wn9FkErC#Ys<SGG~T
z+)}!A1MQuzq1`cVGY#!bB-X!2mf#Zk@GmJ_LTmi4lm<o^AzKN`aF7BUJF}8c=OeFc
zHfQ)4BcxK-Q6F#LB1-6HNpPQ2(mvU0tFWdheCn8cJ79xh(C9t!G_&;w#$L`C&XH}|
zrX<~zFfBHs=f=3He<Tg8*#q<r==n=Uv>l9O9dhUTmQ1bf6T0WNQM;85DOj15eD*(*
z;klwbQ1Bw9)%>8WoD$sUWjr0l@&V2B1;t9i0daP1=xug1RKg$WL>vJg46*Wf)(1S5
zzvx?}#WAkyY6K}@-3eDCxbr$}dnfcv6RWk7kYtZoc+Sge3`-L$qo!?YHT^>xrP*)q
z9DMEhF?F;cYjoz4q!rfaWO`z3ivQU@AM)m(YX<f1_kRehMwuy(c`J@K>YeHm#8myN
zOOjqj4s|4+-<TZDNepA{NPLnm%#;2hZQagL;pp3!bC)<6^vgYSe+^L3?qTjNETwX6
z*92VUDvyw>@^mUx7G<|WH9vF^C26WjChYcC%CPLba(Q@n%J0D-S+2q&#=QHmu&;KI
zXRjX2SF76F!3%kEqj)R`UyfGj@a^!|41wU@vJ%gCddGvMMsJy#wX>pSKh(QwjF7!P
zpi0I{`IKY{sCfJIh!B@(6$$~bn@q!k^iLVjY?O|YbUI#AbrC`C7x<Lbp)M{yA;fbZ
zwu@_}Oi}5Tnn({fL^+5_9j(N#=mq<tXYGr#=b*5D4f@(1I~rIfMwhdF?@dg+XCI}A
z^wGuJT*;5Ez1YMQXoIpT?7Sk62v|~bovwazd-F1NB0`DvX^`uf@7vE0tM7Rjq=Z#)
zB(3AJlIe10a%Nm?q@I<moiaYoh_czd)u8&-!<*_onW38S?L2Jr-#5G@l4SI?U-hzt
zNv^e;Qqs>}{eWI<;j1xoe)k%RCi=D8uHlI>ve!yxc^S$bF>6x~e|kQ%U_oU7$n50v
zEvcGT={iSK7rD-~<~L(`4d>gfd&IOx#YB5qBvm_|+-nB4K||)gS?88Bg=Z_o3P(pL
z<H42DO`Wdgjg^8MEGDVjqx#pPa^D?AZm!@jcLa?6;nkiaUrd_cj!;Ldo7wCARLzWu
z{<h)dK4uArli%&TO4CqV1n3xHtHJ%1oRtZ>(a!)>-gFl!Ubm_kNyUB3ufHA~s1YtL
zU_a5U=D$Y-aD%O*<4<JL2G9o~nVgUa{31~zH90zbNZMq_z?;4lgVufzL&y~4#jXzH
zUkVHQ&;oSUe3Z#dD|gn0zda`VYm7<!Qs2>8K7|cf*%;$ev#)td{>~<D8X=ph5$%qj
z55G3$V;e8Oz$y||`I&CS>X0uKU)M;*QCtm!eJRkY4lVc9udi65teBIe>jKO+Ys$Rl
zx-omNH!+rzk>uY!efoJg=kJde6G{on!uB&$Ex%(Oglu1FxUYQEl4XsbGREJJ@nG1r
zxg+`IMJh^TaMb|gx<GZgnVTrXdapRf^Ml6L7laR3W$tv;rX=W61Szr*w`!s>r%vsL
zNUPi23KLAYiP3#4$QqUYE~&aVHom8G#esiJ;g)ejNS|NlLnp7C=%ct+-V*g178%p%
zl&}Fz5>6+7vXp8`g21x+w|n}5C55x*xP%mf!6&4&yzKr__`J^4xnbQdnaKe*b2=F|
zD6BZR)n51Q?vbfV(<a9zurX-ek|v=uefDkt{7#<a<VQ8v`B{uay@%Jzo?Tw%$zZK*
z@-tqqh>bZgGIcMV9Fn#qy#Q<KsNQk^`+YwyyQ2QCD^z}!o~ja*z*JmKA*N0zYJYNH
z=F|PGW}`qNPg|35ENOLNuAi|TwkoH8(WkXN78niL(%!Ypg<?L^pKQ@^9!+)l#57zi
z$0Ak6Bs)DN<Y2dy6pLxmMLeQy(<Q4*Un6*Ju_$0lkkRInO?5S&@zIibt|)b`gLf7|
zVGPN0d?Bm|2X+w~BciT2Ha0o==dqX!o13;ctP1YAwnpC-*Iz+Z!oHW1A?bUBcu-U5
z*!N5!R!qEdg{pj8<1y9ut?yi)CfPXH)E(Ac!(>(HqMjOWpR6jRuA6xWUXg_oG1t9Y
zL*hOYGEPyw`!2BNmG*NblZEL$)lDXSvb{nf6hoVH-nf@f-%}CY4`bR~Cwl6rtj^zf
zT{M4aszb%sBE3T9Ugr@9nQjX90qT8yq~+YxbUxj*3mtnjXP|w^pmh0x7-%#ASnTW6
zsrJBWaStRXi2EPVP*<7}fg^BVNTVL;#5uEDz$C^kL4Unemt%-_6APhAK&;7jd;|!Q
z9a1DPd)0dHkEZ<vv7_y5#MTo_gFtX~>x}EJ`o6t-RdJD3a!*#J?&aS71SY}#0HTG8
z`<u!_&Zt_kd!)Zcg5&ykI^LL`Saev-9uO;TiC!a0B$(NF|0U_IB886<51DR>uY03W
z%!PiS7cX8S^Scc{ACdANyveAeM%}?bO+Lx#jovJ)8Qd7-?vbIKbH|p{^R+A9j>nzK
z0}_se#o@2!4BFHJ_E8N4s+T%Y6kK{(9}2G?_iXU{kh69zb{y&KsvXt$Y|h_{o4hMW
zhhAtr-+LZl*uI@*6jOlf`Pz3`cg*p(>F0blwFM^MJ5ztoJM46PnbY%d4rEY&*cI*v
z3{oA<Sadl0;sz_%hl|CA;J?jiNzG*x)ee;WY-uLmdDB1)2QT&^cd>mnibiT8vzlJ2
za3WXIQEr8-vt%1tKGvpQ_{N@1FzpYF;?_`H)zTbB-f~+kt&hC>hW=?mI2@beI#08H
znH1sB;rB|jtgv4E^=|%<gh^|8TF@`Pc%Ux4aF_ge%iNDlmwpK?I>yc%Rn(fZAZgsV
z_Wkumn)dES4)nR>5|T}ktSo8l+&%3(2KO#6Cw;etM5FG`UK&uVW@K11#-b_Z#Gp|S
zJ$_lLcXNy^<W+|kR_BDV+a~|dVt*Vv=VLnN;_(N(Ub|CoI=uAKO`Xv=xCJmNvNQNc
zV)z(Ta91<7KGp_ky}7GLX*<9&aAh?|*sm6>v*EwFRlhgYnI^_Z0tEr?BKwY6&#8IF
zM=L5rtR)UeA98UnjnKS_nrE8)Gv%*6QyCQZlNgtjRm^H$EKXQZ`=Jp3H8ZTk#`1VF
zC&M1^`Ha4d^xJR6Waa&Wn=VX6x5YK1qoX4%N3KQh5WR2?iYECc|3}G{6~)Ye&&g&L
z?~OrUmglrfh+J{wNAk5EN!-2!TH^@I(Q^0t!#tH5T?M|<nm&s8wBFpj$3i?e>E8O9
zyr{e^jlLJ)chihYQ{i2W;qmCCR;GrG(EQ5mWnD^<+NQ<XciD||Blg)%)@Q`Y5B7BT
zRzAdt){!&eZPC(m1dVT%@2<Mn|4}nQ-ASC7$T&S;%W&1hNR*EFKDfOQds&|PK__{*
zzWC*s%Iq;qR6&Q{*$S(2V+EC&lzCIvo2_|;zHlyy<6|+U)t~!^)njEB%$0_J_D5ek
zJMvKfQOpuwb?7wFZ^oF<47n+YHH=3>?=XRnCc1#d(bwueYw-$&Kw4c_w;NPMh!_W8
z#I^l8%_u~}gDAMbZz}<yLMurM#5OpG=9~o#cOQhKfMUq{_@`h~4>+n$(_!J~5rcMN
z8PrGF060OTMoJ15Mun^ZU}!y4<C5wpK9~c}hZz}E8$=vL*6z^F1KaF7jmPu=iEU$f
z`dN?*SA=)^8ErWH`ua78PhFoXuZ(v!uq)2&x;*w;$E>2*k+@J?`$s0-=P?IzRO~o~
z8nQmwZ#>d=X^m0$@1gGUP8?(NIR?%sz4jpZdcn>2IU>K(^p9GCD@%d6+^rX;13v1(
zyt8?o&gjmXVqRT1%1=W#B?U0f<d^B#CD}SC+}K}h8@1y&Z7f)jsdW(s7l(F@)slYv
zw3o{N<#oyWPQlUo@zm(2<K`L7;)`aA<*xf@Yv<z6)>A8=-u7$U>tT{<6`&i>uVJ~?
zRaJrx71=xfO{s4#tVFB*Epcgd`Le3w7(drnzI5;vc`ZP9kDLg1TS?iC60Kb#DMP_x
zX^0&CXP&!QW0KtCXJic*)E`wwrcVt_+~3C1oIN?Ba+(a5oDuIvb)~BEyhqiz-;3%v
z)6#p>tUNVke3FIj6hFAasX}p>5%-qnvymU&MG2#NqTwRxa(EKYt`e{{H028tk1zAA
zM7gF3R=k|pP86jJ-Xrc$7jZ60bul7oCEV~X2z!L*l=JIAXf@yoY2rJ4-+5cJj@g@~
z94#NJ)#wU%qgM1MLY?Wuop<U;4BpK5b;|!1{!{+)bwvg78moZ#L$ytuSS>^KZ{=3&
z?vGO#e$Bly{}b;q@`b%+@{%{XcNBYV=B4Royl3LXN()8j&mzq<qt9DQv)8K2cig^b
zxO!vsj@O^=2+TDG;Cxxi(c)lj-%hgkcf7Oh*N{@4qLTMFm{8;>-|I4xy8iUMs9ov4
z7BLw?ubWFW-DK8}!fj;+Yr`aQC;rmI$l3byA<`)8pM<Q~H)3?x=6_;WHH~zeG&P3k
zd1~>T=RC@tdL|%Io?d;~RJ5$qMIPYDqxy}$fL3O_k#T=JO<_2Oic+V#bgNX~xXJs<
z3Oi0W1zAg+u9k_G^ja`+BAVO9Z=461tF`UD^tm|#QL3?otggO_dDN<x#4VStGUzxi
z8xKr_5<f+Cm?OFoaP>MoQDN->^E|Kpe!;t^hw^Dg*CX0e9o8`8uGoILSh*9h2u8g>
z5gjPF-@f?!*9XNBkbHz%2CP0&6S2KS^xM0sgU`Y{qRHYHp2Uj-nTFU4fNsaF;uCMU
z1@Y;j=k8Fo$<m6BK+dyxzy4c^&lf_-`WM#m4AZc_Q@4?Ka))2<y#68L`Xp|$&izH1
zi4$wD%R1V;-EL25=ign~_NaVC=BDAti}cY)vUaf+G@}o{^296(MQ{(tGH@SS-&C@5
zeP_lcT9>wcw(7g_-pOJrRQhmZ(|j}KHmT|zE7wQ28nw=jJ{i2$B3%jadmk=bKpYaj
z`}m7Op4TsHyOW`{?}jB4LLc*ze}^$mjL-Bl3^ok%lV!DbrKlFXV+kJ%9%&`7yD6&B
zr$oaw(j`(HT@+4m&J)fPW2Qyxv*xNFC&_}!_FSx4M(Ft>BaWys_fLR$bm@G=nyG`x
zZw|!cT>rgs?|sx;xg0DqAy`3{n>m#g+gtPnY%wV%E3a-WtHo)!_|QQ-KXsy<*w+@N
z@mt|jT3rdkCsrNI_R|sE4zL#dPL}lIA<3Gjv6RmJ7nx1d@)yD6IF6Yzxh$b2=%p{X
z9SrJx_M#6W0`vQ+H~zX46&UKY6yChO&~D$WCE@YyXY8FKX7bKoPn664<TViA0XA@&
zndtNL;IU9QYJZP`Hxnmye(>3H-F=zY12pCPtv|j#JtD$dt+RQ&_gw5qkbUyV|K?+y
zz1RaH0r+>@YuuDd>{(t@bv<C2JDpNJEVMV<i|EBu2-K~5>!2gJo;A~d{3%M$Yn6CZ
z0=xAK?v3pV&C6A*UNTZz`=G7B$u}9ztU6kz!oly>zF;o&-?2A`FaLVqlgQ<rz}?T(
zP9qvZXsMdYUve`PAM}+_C4Nw=)n-U3!s*m9i&VzG-sMtubECrd$kAhaF0-FLQImc}
za`PJ^tGMrvQV*)fHn#0R#f?C})5G1oJARQcQXw<o@+hDHB<SD~Jp}I4$ifGyat)|^
z)_(s+L!7daF@<0*_qdxkS@ZZp@(%s{Q3T1zl6Tw9(#o-rs_*L4zW0@cZKxxjT=K-A
z#w-UNh|it(aSezUgwyz6Qc6I-%YSF*s(|AYN(M<-*RVuFhbS{xmQ}b1@}`YZRsu8e
z=&z>xV-H@6Z#=`0IkOobqg@W4P_NGVQ#V#>HZC%@so!@fK5gS@C-;G<L!T#HDw~fj
zRhsGL&zDNs;llIOZQR*cdZ;5T`b_ibC_RLH9Z9^sd5nE5L{pe1zEsD)(%D?B&`Vxk
z37wq&)>HLYd*HQ^s=GI?zU{LQ-LVm`1uK(&puL(BckuF$f2ykA|JeHeO%xpKq=!j2
zrPyM@%&nDtMRs`O;I0GJ&rbUJF79v(^mp6yX_Cepj4y7}w9!chE~N#8q4ZI_X1Qo@
z$KNH1c;{A7i6x3Bq3?6-|NGa#BFEJyG$9&Z(G!qNydIlkjNPuxPRf65z&st$5TB;@
zh_J_V&0b-stSTh!COfMT)3qW=Z)`zx4L>v1?ANl>hg?bHchF>|to;Ow>}rKqe!ISt
zrAwgXBq=T9E{)n*U<^h{)q1-1JHL{vCDbh{HtFT(VKmpSE@zyXui}bTUK;_`)Pi04
zyXPJP7oODV7uI+x=_1{4NFQJR_6WVfnO^s6DuXd;RlUyqUV7Fi>-7h|wx3k+Tk<~f
zgkfV+RCfz_i%<pa@elvNUn%&x^mF3rH)dSQ_bm2O({Wt-I_?nL-(R}jO~0ti>L1)T
zyX?fxs9}}At<vbg&h<PcyyQM<?|2PQdQ?Xk@z?l~ifo3v5kx6;6~4?_)iWAd-B&m5
z^-_AaQ~JwpQc@m6i0uKZauDS|7KpwTh{#d9T#%uVUf>4k4U#({@C%nSEME1d;9UU$
zgOISWIIjdUUH~d_OL?!1-(_b9frU)D_rBFX5ecY=<gNPCWWoCe+&pLn1<ATx5OvSV
z$qC{S9@=IG@-D<f0Tg7*D=UEIw}21h^A7V=H+K&YWTqedJ&4!eRaAf)B%j-1s^Y5z
zWLS@(2n0gx5ZD7i^LC#t8F>NIgA}M9f_4ea8(#rl3;gu!yVkg2VPUDfK>vD!1OVnN
zHt&mpZe|cfv2y1Zv5W1JW>QHQ`(3D{U{Kz-@=A?q#ry1X-siF=28HI|W?+GfJtKy>
zibnY&mUbljob*_NZ#;MfH!)`s6NF5vLaz)i_HzoIPN+3L#W;R%_I?~NnCvFedlEgs
zOc-@*^GTdp(om)6ow|S%_WMt<@T+2ri}Ne;!{1>o#e4iwrNUhL<5{>;I->tjAAjkG
z?27#dyX@l95NdaaT#=VwvovEIuqe~&Y&Um$V#6@M2-uzmjilMMXY-Bc-CZ2cOuF5r
zb|>=tSRC;}0+D;HfPYD#wo*1O<Er!X@dXhs(;Hi{BJkMLo&MS{yy0Ye)4(oEOeptE
z3(fGAva1)54SEaKRXWLh$j{&OZz{_2CWe%l-=~`R9HYg~=foS!OuJ(H+u((k>PI>%
z3OS0LFuJWb+G#Ub8meDvlRCdUbm!(D;+;NU)O69O)7BlDef_R{F2`QcEFq=ux`0yf
z&FV-soZ;^YGADj*B^6`2Q%wFLY88AjW8y*j`aG8CVJNwu6{}7IZyS>TjwBwY++_dd
zUic>Isl#rGbmrNG(|FoE`tuSLol)vT{FJwDPH3X?B_{>Gx^?xp&S*sh2HW|Lv9P9=
zAB;58JsBDQnB0jwDa0he>+QViVTMI`Dt4BXdpLeN`FBa~>8q<)^g6WD#3u#Esk!?K
ze+^%bh09=()Z~hsH#&T8nv5F~>nr21pB~^sr4L;I=>a=30|+ryhlgR3Lm~uRjQ7DO
z(-lz~fV7J!x<j|r90M6J3tD3awmQ$yZqOK?cI(5e^l})4^lZK$FE0%IJFWb|7@Yzh
z5DRdh0KW*t(tJq>7hG9j*!Wh>5^f5IUNAV8f)fd1U*I=z3N8|EkOG1ICQ?X)3d2t@
zKmm0G;`oM`2p~=dUNEq>W-UMcpTG~cjlUa+K?9gqfK=D>%_j0ZBGrT{hf&yCz$D=d
z=yp#x+`t#c83a$<E<f7aY+86Sl#km!865u|H(APXD=2%q_O$V|VSe}GlyjLR?y;=+
z7%l$96l!*990vOuL1(1Iws<kO$6eeTXj<53NheRbg5-Sav&Dvr-r~qB1X}JkV$wAe
z$`n0m^|&sBp7$(wuj_i$WT1j?2aPY@!?F*lnU~HuFS58+X1&R>&+2*5mKg)J(%zk6
zmft4Xj~=hu8%q=aw&k#NCcAXy3)}Nh`&K%fK^=uWtjLNY*#+;Yfm}vXm&nkK7}V&6
zH@fYp#95!ewoVfU4cZNsbH6B{@2{SSi`5oj4(i(>XyFmkip7;X5h;?aZ>tJdWqc;q
zMk-Lew4fWOIOE6LBYlI}x=T@j3d>>9hK!R@`x1TkTG09kgD;b6j3LjpaFowPguV)%
z#6{VTPQh_owiB?`pj6=b>-?O2$*O2=!2TPPwe`9PNOP_6UuXf7+-x=@EL-@u(<4pL
zvuyPar*eIEs8?#DebndZkN>d%WelXMabfdC!NOswPcd;Ul@==%=$Yt<QWJvYFoTH7
zOZPb~rE%xN!o6;D8Z30$H!Fx^(V+>eB_>++87jVA7Q6^H|0!V<J3cu&M&F&u8rocj
z(Q~`Q_DJR3Gc&qgorZKYW^;>(&`31JHw=E1sQPSAt6N=*Lhj)PDJREU{Zv^Z4^VZH
zy!DtZd<9$j3+~i-kh0!qJHq_ysR<Ki>|Q3+Qz$9^Pvl>u7yo~Bon=rRQM6`pcXxMp
zcL?t8PH=Y(?(P=cg9Zui4#9)FTOde)pwsu+ysD}B(UnSayP<AXpYF5w{??k+Ay*V6
zaq<l-kc0zNP{5X#1t332Egv;5=mK#pfP`WWME9l(MGODkoq!+{fG*?>J?m>>;Vb|v
zf$EPExhE$HdNONs+L8xi(1`~i91#%E8Uj*I)qrXUgpUUUv9AHf2N1iWrM%vK5xF~=
zJprm;2SCh^f|fQ41W5qh5};dSX=Mc@l>#z(QvjhirTPPiuT6vK-=LcUsH=f&WFW<d
z4~YB%AWCc;9H9JgdO<fah?@d>#)v)*pwh9h90Q^d4iKOPh(Drk{BS-2A#wnA5(xMW
zM8m-U06)$ZU_815cY3qkF9slfJp^pnCjhcPC@3#Zk-|q#t79>aA|NPC^0bbC5VsHb
zO0rxpe)sLy&%PJPya!@C{-j-u|GOF$p%ewoR8cEDHB_+PYH!UZdSxWirSo7S3Xh*y
zj-opw68n_I|FNWf4qUvqtJyWzaGcw6ph>r?4B2rgS+s)1DnzgrhC2{3jmF5}+Q=Z4
z`VPC(%5^l!lDri-TkN@>N3XC_G?36PWsW{LkCvi~x6zuA;)d4%qx5H!<dE`3K<z6R
z+Jel}Sa8b{S!ElPRB)q)GM}N`>epyKMSAkULDAn)C<^yRi=RoOR*rkdfSihZW#(MQ
zAD_Jh$J))tq&pn#6kOB;1tmH0^pZ`qJbMFMJ0*(HkFtd&o)(QRUO8JcqLntaNh&3+
z0Us@6ELbc9@79H{mnq}&ezED5U@2?3e$nV(edqr^7bqWmds!y+!<md)atI5Owiel>
zME1IzZDj^$CT^_jJ!uY1sxUJVoLCx9u+vVa`&;!R0!1kF#k0yGVIE=RWrjh!%-F(l
zOFuQ}*7O&?at5+=`5D#chp+GVQRjC}OnD{^Z`j^Ke;;3;j!4y3G?Nba!UPaWUwYor
ziuuPSKHa|={S|PkQt>M+o+RE2Xt-vpTx^|YIL{{*(sZHy(E6a<-uU6RItCBEPv!9-
zscYJ08s#Ul&WDJ!T+U(teOnZc@KV+#0M9H6h57jy?DV6a_>!|qzP?BW!=pJU<+!s&
znqFybFu*)*sQcr|OP|L}!&95Q={wqjhE$y7@K{uJpe^Y{rsbR(x4z9oonxt$Oltm(
zS~#*kJr_hBHo>Q|RcZnD=_}sc6#9$_843*yQLAGj{WC?Qg}+UP;$>7)cu|qR>Z&}-
z!snjkA{298N?JxyHV%IxrP(}0XGGBFlT^Cc<%d?gnjew7)lk@!$LK_$!>JReZl1$M
zEbe0|$7#68=j+;L>&n3~&B+%9B=>UM*?@D<+iv_)S}~U6TM?h?^}PB}Q@1|9ZuL9V
z{zR8gkne1l@1qy(qldSLH~48{H(<b%!DQYztH{bK)JG30!xuMK<=F7-y5|Rd^C##)
zT(0PX9nKy6lIi;(Vax%M@lKZ#v``nmY2X&U5lT`C*vAA&Z1na=yu7^DR5bv8%42Yk
z4WQz1gYHPy?mrL!(`qJ~LIqfJK<I?TgFpXfzZA>{9CLyh0$`6#uMJ0#wGK$dI&4$`
zFMZwJLSP682;;M}*BZ;fgV7ftoN>T!>Ht}LP<`GNc#hry;mXW~1$7`;2#Uh<{cc%&
z0#sHb0KN${;{^(U0tyXyKu$MsCj>Y(XKz{?0uulxL=^CV2K;}Vm^~nu7RZwXV+uU%
zL7pAJ5|;Nm)%OZW_6BhQSOZ|cyQx+Qj0GWUg#8~ti{1KqeBu>|u#wO2-sqSI#Ow1x
z_C<aD*yZO_6`C;@#lc7JdV5ZLFLLMJ*0?y<FPW+PSV<qJc0A7O3if&C=AI;WOd*Ht
z&?XLp79za@e@f1zLQ&$&+>+rlJX4#=<U?5fFFDOyv@^~)G9(yBb|vbEBr#55o?qC@
zSG;Kv(jgR5Qhuo6EWN?g6e3p!py_)Lz5gh7fnN&1o2(dGUJ>(>So^0cgzVLWK-^XI
z$~0aGCs+V03ZEuoKAy7G$2os{_9<eV0vy6Lis`a3!eWiik@9lSo27g}XF%c<j8|JX
zOmu^mhimVdx$5ti&;GTkr-PjE*cpir^h#y(rVRk$uQ`GYsXb(>U>RpCimwq4s?CZr
zgD5ZRK~h~Gt~gnbt;SBQL^b=<Ya>oXSo;BdU5fevV$86FY_?{`*gZUScTaB${lRUv
zU`KY@39t$o3r&wG(A@T?eaq%)3Xa3o!9*6U-Ps;)?%SExbC1p8Z%US$l(8em`-IiY
zO3xbKbblWs^ifHySU-8Pgm2|RE&Sg2J@dyhZ<B{BHU}OSkArhXQ+ZrYS}ff(a<iEX
zXB{#9XGJF9HN(f-g7PMMs^uJlXaj;eB@iPQr8%LMOP>I;%^+Sn^7#I)m``NcwjTNV
zda1ix+8l8Xj%ZIUtuM!2B~RW;?<#t7XL~E=8+S*WBv9ndv<tMEBM_?Q2xLVj=Q768
z^-Fs&SUkKcHC_=Xe6lmWn%Pzj=8d12w^4lKA`pF2DOI(pHO8K|p+X=f4H4U;40h^E
z`%CQ1{_VHsYqCnVi%M}$hh-+Ce+xMrQledOSuwQ-8F8ujv#cYu*F`^79hdwh_>WbJ
zKX&4li5K4T(j4~7c#=)G>bt(5okUmh_#|wKPdy0EG{5?Ac*<f6r7S}YMK<<dQL_^=
z{Yghz_773$DV<(YQm~Dm4tA`a5r%?WXOj!!uqO}Dr%%5wj-*qG<BHk*=zRWN*%~7N
zM?qGGahRB--b<CFB)TSf)dB}yd6;cIxf1T?Si?&9dE|?;<<^(0&+9S_t_M8K23@)@
zL;5sE#!|+fSk&0^xlQtNs=YMY1QqAq5X*rLiFLRu8oljcb*2}H{<c2N36pmSbLx3E
z@!J@xgA`xP<7=kw)@}`jq0>sSlxE4-j+?J&TRpCo$vi|RHHPISG)?rUpRa#4d5JS)
zva$?$LDg+^`Fz}bRcvnzw_VXx7h=phw@2{3P%wzjY}aIBkz!Q38%5dn_kneH%IPmt
zm=w2BS0dRkvFnH5kcyMIG}su;G+DyTJ5?9pMLm<FoZ=p?%aBtVSmH|))f+VFGgfF}
zlKLBA1qWUyD4_RJs!wC6J9uwjGmTe4CHQ4gwuMfrn?rFU(P>%ONa4o``^_LmdS$ZQ
zL~rZw@$eE$Cz4{rs5ko!p!8eh;Z%@<pGFYQQI@(io?c?*PD|{Mk<LulpRmE@cuF>(
zU*aZ5KB>V~?=<w&m98KM^AMLE$vIN-g4cwkby^LGYXhBvBoKrZWHGx;Q?Q@{$=bgF
z{1*_%2IyOWn*wMMAh-SpApe6%dx^S!-^$9ufv|K0C>;-~OaX$GkG&CGlS9w}g!*?u
z98BP>knLLAwSiQ><)x+ji63eP1_}VZEk-I7$WqU(t;HPh0TNiSNHTCxnE-@)0MIVS
zs9&BS0k}mDypxX33Ybc2hx^IrcP}E^v_1=9EtM*rpMC&L3}EXU8=Svk{s6Gd178w=
zd}{{G4uGgMK>d~?1ujO)T*{F@5))B=pzC`1<~`SMhR*^6(9G~W3pO;{e;5}$?Um;T
zE?YiaU(@8IV&Tfz*b9bFOyWsnLvZdItmgCeYQPq5+xfvIV;<EsLu?(07q(gRm!4^7
zdF^T0Ng3+}HK<&d><#N*R+-MHx{k*VZeur|X=e9gDZjv;KAWH=gjfxX?uZXD^oLF;
zn4RnGdiPg+W&3NM-3XPtBEDjr{lfcMc(H{5kawY&Xa7~wzC;j@`Yr(>=wPM?VPK%!
zhS&D0Me24X1_f7vS}6M%#Mg*P%!h1n7H_9{4%cJ7IMdye(kt3CiVPkORt_zJxMcQ(
zQlS}pT>mCjOP6VnPAKL}GBwAi4Yz`_XOCJ=#i0;Mj{JJEPDIh7dc~b~i|8Uj$v7`#
z2CFhxH2X#Uf$5wW_V>SNpN5^VJjM)itA*isYA8}ZPolo}MHCkP(%jorzUqF#x$jW8
zOa=WFM$7@%Cl`8M<tg3yaE!c{V{OnT&%;8KaTZj<q_b&U+_ZlJB51LTlSe!iD(`v<
z-!fhb^e3S2rflt0=9bTjTPg(iC^JUWVoka`b3OumA(M8x((`rMlRn&-8(upSY2Ro6
zrQ-~@{~g$Aw$#Rd-0`D{@J&&C^ZbYtwBf8sNPyh%=HmKz@V>Tie0w?UicmyWp~NCk
zqs8;IAv;8ncVlKUi~rGv7H&v5Mewie;NdqR|6&@G%b=l&afmxs>WaORsgMS~L9uVc
zd4DM%Ar)NsZ#Xr<ShQsqEX8>6Q!>_vc{qwC8!Qeo(doXq#~5{4ER2wdK^cowtuYH<
z-ApSa(=#u`XrcsL>pJkb;BMjf;ufCknD<CC^m}5*2Q^y`s^mpA8_Q7`>SvP5Mdf#d
z^5;jKE&E7cpSj@lQ5>2K0@K1t7$_XIlqBX_NuELL2Oe^Ud@RmSQygi}9xa~hgrYs$
z3W8eDO2wX?A8<(JiR5DWE-tlTg8p%m%;osBs8Yd57W)m<G@<0#`ta3F4&oD=Ld#s&
z6wd1DukdP`lYFXh3T#*L=Eng}5a0ULrrui}rQ{{Jx))<LY&7m-ZVpwVivuM6p2`%%
zBP(6qiD--rN8caUb#FQn;`>U#8Wa|qgd?y+G`pg1HEAYA+?nN5G_bAKqZt1kFkkrI
z74oWfvL(%I$zi+%yz+di*^v2{t6D#Gs4!`}wlFg3|Kt0~?9X<76yz^X)P|QBtf<Ap
zlB0?ry4VxiNut|rA?NmSVgm&_26TF<3T1RyXs~Dut*h@6lrJl7m<>qtf0PM-U;8ro
znsRjfVt)-sxlKj#`oaw%PUpmVu)Z7c(VkLG>OATOmB6?Ge<X0IDGlU&m-r$=J6G~-
zw-!EKmBA@pd(S<eu;2t1=a%4i*qEd_WYj+QZJe#HjX>`dDBN5MZGUk9+6A3?j6rSD
zuAj`BO~Rsh)k*@DCV|JrD~WcHKckMYKS6}XLHEZh3eeojO6qkJ(6@Vk>RZteV$I_G
z@P(Pjz-3OD0%JHm=)L3=Ngx0>AN+#Vq>D5*F=Jbn4pb*2fvSEQ=L}yMu*d?DQJ^gh
z*l7U7l@FkB1ejSYncN(n4YE{&fK*XlNeLMwPqtsFKLScPhX4|C^f3^?@CD6Xq^duV
zO9OFjFiAd;;7yX`2LLAn?U;f8E?7J6q&YY<T{-d%G`fwvg^?%Cdw&C}PshMT_kpPr
z5E~gxv2034<Q84zp(zQlvY_|`TJZ6!8f;7|G&t<-$5np!oDu=pK|v##N-ypELuvnx
zdFzh{8jMCocP`X4Es?NBMl2>AWiTOJ#|5fv`SQi6f)8Lw7+}TxN^tQsUQD`}<GF?=
zB9Y9{S`gP21td$md1Pl1r9-)>cV6neOVG(69C2z-ry+2HA#`6bZMK-JvXPt2#Wgs&
z98@oZh}DV7Bvc<!#+Bnnl?bT0y0&qs^o#3^4?O!bb)eV7>Tw*A;6l{I=|bARTPe=1
zCPol{Z;D6|!rIq@l1?-mH;8eNGbMJ047reCLN();aZ=2(#L2_Qqx~k*7|bS)GS&~q
zyD4B-*TkG><k%W;^J~^k{9T<Tw>6#fc9zKXZxR$2!HV13^7j~T!fn&f0qo_z<ya>;
zl+8tjb-~s@I;6_>C4Pi8R=Nzr3SW|0*-5!z{xaPo{<4bV-wA>2lqL0oB4K6v*hyqz
z^XsYi<{YMxX&@B8d^6y$gYviP{_jx6`VIPaZU^K}oo+mLm6_kotT~Cd=rap=3Kl<S
za9XGXuihAJy+T<AB8HN3;#em5w&6LLh#eOPh)+}NJ(pWMBl+~y`66AR34(bpMuaO(
z$?{EA75UxyXb~OKL3@KE2RZCb$t9a`H&s*C^<Pz7jqz4mZvql<MzgaX5=q+T^$D*p
z!G*Uljz8rcMS1+})!O{~lGMy~g1OmtuZ-Z9q`WlbQVhNk>LQyxZeT_MG*(#y6%=aJ
z;cGCq)$(ESBuml>$o({Iq~)mujl_0WQ4)W1<l`qTkfE9@C|+kd;{C(eNhXp&VN1;0
zp(fyj&lt4vBG~wr!-}VeKP~j>K`05o$9JR(Zz~BVU0x>m-d_dot6X$#06yAuu7|dB
zoyQ-ivyCASH2xCy?_6$ToH?gW=zkUJQWj(wV=`}d$-AP!$)U(gqAsP2Bgj{$aV?63
z&2Zo)DNb>{%2oztt>16LoJt~M*kC1rkzs<6@-lV;10KF&V<_1N3jY>$n;jWcvr2A_
zNk~DI>~UI2&9b!MynbwPIVBdvs{$!t>7@L5SUxo`6)Q3QZprPNX3o`cLA7?w{tihE
z^r+=0m@p%%gg@1CO%Ki%NDLhEaoA$SWKB<6%;Q!xUHl64;4T}lbIS>|K>zx8%V!Hi
z(BanfUirQboT*BrFWsFKo9neT!?Jm--Ccn$uW<IA+fgc3GE(dhR)mHd-|s8iOWjr3
zu^Fe9h3Tga*K}rvA|L6^cz>~xeF%yO^7ENZ6$3|JFHN==(i(iAv)+MO0?tq6y3gY7
zR1myFzt-3GwT3f}zK^qTk`rORx57$+){`-pgQGO?Dr}h#J#Qi_UCl6UYH(J$__l>2
zm#57_kL}I5rb<^`R!u3<w>Kc+Xmz^VOh|dye8*&iE+<drCoK)zRV~FL4}^mbo3AZ?
z3{Mg;*Q3Z#!^%KjNg>hA6n~5DHC}OCaT6XFpJ1XJy3pav^m<7@JT<7BI1+v*KVC>g
zt4}0R&c*4BCsWH5kJ&b7oD`m!lW6y9eCJ&E_Y{rH7IF+=A!cYv`e|rjf*`i0;}J5X
zfmpM50RP8lvz;;!(9RtJ>({iDAQ1zI<M<zb0)Q`?2x!YWG2vYS8Xm}{%Vsr+1(Hd+
z0YtUGF;{p<ZzhvoyD7IB4FM5-FI|1WkPISQn160QfUJB4=)h(GGY~YR3KGr&eIy{j
z5A={Ic}V_7rl*3g&e+iXA9L+MUA`B9MBbgLe8~ks!o2|KnFP1^mj#~iQ}6LV+-28|
zx({S5NO%!<Yvkr@k)#cgHxonOH5!q?sk348tNHqKIq;Bv9g&~JN`XRVL{sFKk?<Xo
z{@@7xM^DsZAgmXFb#oY%CzsU^{rKG_g}VGVRL<lJrQ!O{fWRK@u%sB&KTOReh&!7G
z%GT5nsO7y<#wYK_)nUvbR8=T71rdn~&k4WXm&U*RtFPhxF?m7}6eb6U2#r65H(yN}
zi;?ry8zmawtqHaul(ETnSy3T)0HN>M8Ij-QWNxJZSSEfmN>s7@2IgP!<ywhs9(=W(
z1hAh*bB-ZAX&$2H2T~Hz7O8mX(g*xR+}|~2_R|U6>)&G;!}xj;@x;7|<rxv6;6(?V
zFFs_=bm$Rj^8NU@MFi${*IBv<&O82PbUh8W&$_L3Gty5n0$qP=Mph*9*{2gZeh$j-
zFackKAfx-S)=={v2PSD;nCDO?Y#{3u`oMK4Go)cKbivdmf`q2)NOO`{v5iGpp{mJ5
zN%q4kb=>Du?E<RdI&k#u1NvkTK5s8!;LOl*qLcO7N){J?HYW$V1F5%Id&v{n953nS
zs>%1%TU#P0)87vB|7pourB)R7`3T`as1P_G5whgONFT`IkJnYlmlY`XkwyJVUN}WS
znLC4nOTMhwltP%${*l1ahnS<ZH8>v%vk__2Zhg*x6)4Fs6WYyJzp%O`LnC)>%683^
za55E?c#goIT{cymnPl6Q?BW_B$TajtA3}ksEG}LebzxoBRE`Sci&$A?q!^h3g=)@?
z8sDz8V>q;c)%KWfk&vx1Qzm<W-3s1g#+TP9?>sPDZX2j*9ERUnNt!id?xITi)<khI
z{48W9{-i6ol-+moDYxpfvr~L;pX1IIh9uC)gHBz`I%QI(g!0Z#BWty<@xsA2&i)xx
zX4p*&uE-1GLHo;=qYd7Znv~t~!f~wCbAGWd-fNLoAT{|17f1$iNCs#5Jw04xx?pLT
zfr6>s0%4t(pXXW3ZZmqM<r{-qmVJ6<lVpW>Swk|oo;nw0)&jq;tja+pW+9@|O|2m%
zaYp(o`9>K+BLa9x;cdz`88<3b1&yKVVECOvasiLDUwJiO9a^2Da5>KjP=Cz1o3PWI
z6za;fC=Q6Y_wSK}!cvS;<D#UMt#^7z8{jq|N34Si$||$*%TMk2=kpa~3LVa@UduOk
z_#Mk%n}_g4_M<v(VU;}Z<^&6KX5}zGiR12ozbZXtquB@%dt3uvp$OaOckFA78cn`V
ztapTSXQCti*|+|GBW_R{^|fqrJHiP&%+<X`>A>8P(xH+B`lM&}OSN1PchG{+Fs6v=
zWH_RHSTxDcLxriItv(3mq5QmBdDh!2$07)ec?$y_y;%~_mQ5$VYqVl5JP{@!$e!;}
z20na+gDC)H2X@o1upr4Nz}6mX`>j|2k`>MdJ#hB^t#fVy8X)x@$GJlSGQf@)1m`{l
z^v8fV9rD}+z{3N$H&`_Ep`7S4_%uLg0W_vL%8>g3E)bq*kPrZ$11a{{&<Oy{9GK!&
zn!~_%Cg8=;=k;c+x}o7D&g7LM4u{!%zT^|IYi|VTguol(44~&$X7Uo6K#3>cnA1;_
zmn%xoX*R5H4`2YOuMD&A`;JA)pW<kUelrwk0@cT@7yEy&4GpEj9i$3;?b;{`3ZgSs
zaSfJx@guN8L1;TO2Qbx|v7McrNZ<?P;p{C$mdX`y@vm>oBWELrHw6?bWR0#a;Ubpw
z@Z||948+vJWD+7%z&CoGREAmspFD6$GiJX-f~5af9&CXSqSmm{Euk_+BlSV+YEAUb
zA59_jD`QdCaOa`|b<5+@rwh}8{MNWrjo*@jTrQUat4s2164zR=c(Cy=&Y|tlo#tj<
zZ!bPHCT<MH`<LTCJ`-mmE0;ObE!jwLwnHtW-7>s6PZV!cn8DKpH)0M7L|YapmnRF_
zBU{iw=_6v8;WF}O`MBPY7x;$58Gp~T7bg$DWTRl-N`a1|0GBdsggj^>NN?i9!keIt
z>mTA9|1l4HgO)zpk4|RDgiUat-wS1OA4pQZ;1d*EEDaWOMa~_3btneeHc`#inD(!l
zMDA<7)N_09i{~R}@cr!U?o_Wc2D!&BGX0-G(&D8*L<22@LS%1`*roe=WJKdhAxAMo
zXxURFOlD>V%5yzY^--gE+Y)WXp&ozukjOMqaZ~WZ)#rYFEr&oQ)~`GY_ApgNB2%=K
zZSxC>ND!XRu@`_CCa@vpg>$g;i!PLbNRYPMNhU3kf%rB{qECK<t~*Y4E3OM+W-ywa
zk=gGiCdXMQo^6AMxj=Gy)iw5UsZw4Z{oJI5BW!%sds&<^-Dn;()uQon01SharG`lb
zi{(Cv7}{{r(<>dOsRwrNG{f7Yxm$g6D#@!ieO+9gq-H7|-sEd*Oi<gPq^vG`OOs;c
z?wLbH>bE^L?XlMDS@zE)-%f{%W=!dEwPUkUq)L3imi$AC4`C@L{Hz4oGPt?;Ztq{6
zLVstDZ$2*hyzolXf4vMjDr&;;+CNqQ=ye9m{s}@;XQx{b)<0Ir#A@cuvm%L@4zX%}
zTvM#X{!g0Q-1k5eNw?h~0?dTsR+#b>Ijb33?uF|P?SzVUnk3Ha9R4RnvfbYN>@dX~
z+z7?z9Q)+zf;)N>>k?U@$?8d<#|l<w)pr32!NJk9b)g;jm_l&&w<jv+;o@)4iT79X
z2EPaRt3)anH+HF%3F;H;EU<a~MZhWoLLmdk*Vb-Q+p<zo5Ka__NtbPR6BL61@RIwy
zegBXFvv?G2Mw|einkz6S0|~0Du{&=IWwIE_19l6*8Vph{SOKU-5P8&wE7oxXUmY|H
z0Gfb-jThnvc7MRqVgGdH-KPMY0BU;x1Z$utk^v|>x(vP6tOBmw3rkB%Ac6t7VWJHe
z(33!%6}m$L^MAY|cEyTb&IP<A?(Uobr<|;#qXUEnJv=$F9?!B}tVqz6c0i&9g9<i3
zu7wDCa>iBDOPP8k4N8dW`w_G9?n7M3QYEm?Ep%1{O{cSaF2Q7W0GAh@)t?sDeq10{
z&r*M(ZfKZKE@poA<9(ngVI^*7c1YX;XMfF(+zj4fLuCKpbJo@Fj|p7c^_^i${;+1Z
zSsF`J9C)rqQJY_-@Zaz^w*hAuC%QT$mj?=RSYC%R71W}6Il4S)OW1>v3yPH&$#$x=
zO^9|_>@E?E6pd>WO%LC-NlpJkNTs#&R^V2%uHb6E5?qLUlmMwQ_adj;LM{P=3|Uv2
zp3(G$lPM?uo*sCayvC|4p`afh*X20Uc@)u>{if%@wKp!T>&nH}1xhO`z8s1fMVk2#
zS27KTPoE#j)_P<kQw!QsRt8$G=yfzE*(e?*nzERHoy#W(g|@|C+e$%BEb*#1tE~{M
z%5`DZ!}<SQ$klX1YGWr>7OsK4>Nq@fZi*8c{jY}PeqH)N?*2u=<{?)w@_2AU@ZJ+M
z<i|iOE+#y*GxGlK*6$Z~<PIU2;OypScQEk(JqrNgV7aQ>R{$)`_=UVF8iN`Wo+$a#
zLXJY{Ijnl27X<20bP^VDqJ4!>+B6V{benXa(9Zmm7a6}_BbBw1^Knhe8X@1Ztk8L^
zO9m#x@iTJ^*eGl<GAO<jN#YF^F41(q@kOP4Cy))!<{7NDveY+KwWeU!Z0z}R7-9QZ
zaBx_J`HDVM);jtaKxvzv*-wz+B+_{0e719|^M+$aVM9J~#`M|l00Jk`M8b)W6l@j}
zy=p|xgp`*2gOUjE#n2qcTfm|D$*QoG)s3T(aXhI#ZZXDD^)6#j2QGnAh<U6tHt@or
zkx6r)@OzMosMbYYlyPn{lFkH0xg|o@U81xVrs8lEpRQk(2KZsLJg%#lUg8wD3tMJ!
z`VM<WPUD>i=!eSF3*>6h2skI-A^*6F>YNR^Duo+WuH&BM-8Pr6zLW{Jsp|l=1y8^Z
zAg49l!PXa`QB43E4n)xSL9zH_p0GcULKe#rtmqRss8Pp(*sa+BmD201ts$8L2Q>W5
z02J{iz`WfAIK{v)8ptzswbixO<;@D*t_OgAg&a_71OubVZtJ#{H~o;Pz#ap&5tu^&
zZb5JuDzw?Dse|5^tN*~{7QUYqg4{EJ_8X`Y!32XEen1%rn2MWB3c9Df>iY=ly-9Gs
zul@i$<sgPRP*BGg-g{q3-Da`cftR`dop|5^O!eFXvNd4f;qlLJ=P!AHR}uipBlq{c
zU-YFG7K&y<y;(+)C$H^L(6i2x6Ey*gh8G0NFU(l{gYiT*E;vHBTQ^xh5(!<t!GVFZ
z18BNNF@q4Zt}!k?4wJ%C#ErIzsVUBbPPA~Oj2&yz`HQ}IZMMkQKdo1#G-c>|8E;=6
zjzmE^rNh_&7J+M`+glU|xN#hzltbpnAGHt0N9Pq^4m~{mlEE5aA<*w(#hoRa79dQ_
zx=+z^B93juzxAj6_W2zy1E%eg)UApj!!P48K$G4dS0)Bm%jEM-K_w@t{7<XIBd5B@
zYS$WCV2tja$E{?>VS9w^Z}SZZdhT9cSwpW60YusIg#xW8HS2_?*YqgysA2_4E3)16
zpFy1n_j+Op|7uhnp7R_vjSQ6cHizb~MZ1I0vrqV$MEeG)8W$3;-|UXfgpeY@_@jFO
z$3ySz``ZipWJ!6~2wrQX#TOh1Uf$V!CNeRmG5-DaKfrqruoWy(PR~sIjH#NA7Jf;U
z*jL!o+M1r3iGh&BWGmx*(8LPa_J{4oF1JkcOpz=Me}ZXB>dR_HASVGq4uUNj4*VI6
z1n(HDa9ubJ{ZG8Z7m<fx`!nG_*&kXis4%!_Q#ChLI^2BInl5@_3#s1TwTCHMAJfQ~
zy%KV+@Etdj*^id3dWqd;8jo2#h*)f8tS{(f1m#*do#N{W{bju*DCx&lG~!w7k`!v|
zq`PINZi8}~Xy7F+Dao9v)SneKddbB+o}C3<(B%B@>_Z*`IF}zJBBN*KIt1>fb_2_k
zHeS)bkSbFmYe|3fMUXR=k%$G;VKKCet1~=5iC4y0^Q-zXxaXsVoILl#5wR~2Hjtma
z(9x^m=V?3$rjbxBwQ9697wS_Ct4XA!lTdFdjdR)74Jf3z3aIr;M=3TmhVw;IZNKC?
z)a{BqH0=DLeDfe1Tm3OhPs|4abC`n=$&%JgxAi4-6|odwiUv0NequRLOFieELuzM{
z%_r2uv4%FB&Cr`7{&x73icMf<&r|kufCmwfVWQQ>k)Sz3vk357pIMB(vBuSSwPfKx
zD0!NgkAoo;;iAJ$@o04T6H0YjDiI8Df^z`MeQgS<nDl-Pfba)3=|R&5l5F=fz=IWN
z7vbs{`%+850WaXdEKtxKErSK1d;pzyetu3q%WuETL=t$f29m)6|6pTl3y1?$_4FnH
zZI&!B2Kadgc%XtH?;z{<9v=!Y#X5Z83ev{`%yTv|34q<NxAg1R%^)Icqw|i)e-0=B
z*kd+IZ%+UgeFG>O0G_$?+`@tckoy4?XXF5+*5YL5>){FEXMX^#&ScV?4`7P_@{a*>
z$N|7F>j+31a&Dx(jIsb(I7pVx9{B752H?CwwkwdXTpSR`Zf$LW#v%fCAxTYvY1o|G
z#IISyc!^w02C7;)G8RUNoYt@5Bym!NDsN+US8tR2G98h$G$@BZYoUR%YdLdxV|?-V
z!jy4bBOi*GawDRW)j@GMMEKIQr!%->@7}V3!-9a6{z}4PXs_LeQg(Q{@^it!rQjB6
znz1E9Wh&TBC}X>pR2LNE8er1HDO$G{?b`&w5?pO~l(nb1J+1;DTx~&FYt2f_<8G{Z
z6l#dxC4=wlMWPkBB);`n#tBBEZi|Wy^%r;W_ILqy=*U+wiHj#~`Mbe%Y+}Rrg+XaV
zZWlA0+AdA?JzA=^^5k_H#H~tyVBNd9$lt*4`}cX!>LvQ*IMqb7a3YPx@?-<Pvr{l;
z*ez;?kaBvzvb8S5b;Qi=Ad3GHOKf_SN_rmcN3%1!JSOagA5Zl#91gsEXyKu%p$W9A
zy?5ihT}ADLxiXXDgv-eV0p=UKCdR$-qn4T5eHB@O7;>7^O<*A~bgZANznZcxa5*3g
zjO``l@-#7N5TDG!;#=VGy`r<_D5~rH$dfWiW!}zItZw&3V-dR`);rTyhsMy=&lBRO
z)U_d?EFVNs{3eg+@j(|UCO-fr3;pHs`$dFAs4m@1fhGQk8QY-tSfO{k*#6%4A6@ab
zm<X=5pSzdDMrUb5=4kLk^hlwdRfXZeOV~%t9QyPi34htFzrD;P!tken3DQJkA7)qO
z7&kV`O<@-gAh><=DEF(NlgPVQ&<i<1UgH0W0VQs!-;kxV=~UU%oG^L}<=ep@LiHm?
z|FRHjOGqaws1(W+_T*QaSy}`HDk*B=INNi&GNCZ)zOf*|maGLvUL|!S3`Qw|Nj4HW
zL;|aj2BygYZ9p^x!#FraJnWbCm_ga@?5%R%WW!X7kTT3VN!`KXlYg8Xp0=S<u*8~_
z2?ux#KB&5<9?GdAhqBMe^I5B>F=Mi6rDwluR^El(skctbZ7GJ+)orL@k+4VO_{qdH
zQ4@+G-DK>!tA=y?r&*zBW`}n9tM1MIC0HhE$4ZF>%bpFnP(z{~5!s|tMc;P~Mn|Wz
z#=>Jtmcr@X^j?tGrHE%O=$8sbxG)DCrTpns%0qrOj5Q6oR#MG*p+>WrHz;TmMR$Tm
z0ryLes;NXJPhb0?V*>t1O7NSxDEYF!ct*h|A(1Ir8;QBBT9G4oBKLsffWc(appwdW
zaf+Un!=~~H)hdW&eF_hOP&&4Bd)rCi#fToJqi85J6X8tZl03f)%NC-Hw7gQObu)P=
zyaw&BKW%D_*MJ<k$F+3;lDfRxx1SqYjL%Xo@h~k*Qd2`f*@lM0y3vqM28{q&8)05e
zsj4+ygJ$?o_c9WAswaGHXz0+5IUzx&)jYsp2TA<^MaVzE)8NzpRyt1yIA47W9f9$D
z`@(DBHOLBB>tKZegp?UgQuK*$`_*pTv?|>&@Ny*1(!LA9G&M;j01{4#TCM@(kedN_
zKk3RN(-ri(eb@oHHcgHq21@q`Fv7WLT?l~Bzq{-Wf{y_*>NR}&1}>tpJwUg42ppHq
zJ^wjP0A0sjaPpVD0aVI3%Fy9JXRzS=XKE0NI8!(|`-Nq7XDKJgAbg1BBtLL+zHSp9
zfiNl=rIa}%=Lrrf5d~jZdJoT;#Kg)Y>!oj9dn{;xq&1|($Q@rtB&VIFoblIBG6-ZD
z>^3hKPdrCPi{MX1%>1WN0>aXInGw>-iW|g!W{xwnHm1DAJtO&AUYLp0jS-_#6Axb%
z%YS>V>Hq84Ys44*)FxGjOvJcFdJo?E2esvR727ZDhxx~+y+i^#H<n5+9{5@;n*&?t
zOMM98+{^5Krq3D2WU248TwFMV-<vK63Wv|q5Omo}lQX&qT-FUKUMO@6GDLhl$of(L
zdFXhm@%E;CJJDt<rU>en3=k5i#c`M&IQ!*V+al3nl?UZ57vablP_9<oiwjJCR*^Fy
zoVw{OFnKloup-rAem7Wlv@=zCb7Eki-4CY>BdEyG^zd$5Sk;})u+C65bjjvF;hLic
zx1|v4<nOjgg*0p2M|r_o_c<)b=pQKmyjY1Y(MY{H>^URvw1&Ii3_X%Qd>0ug9{?lJ
zVGXm;o2Oo;q$CVs11q^sPM0BT=8HllhO|;qO@%K^M?SYcNMad78QxEgwTj?T=$-IQ
z($uzg$fM5NC~IMXgFroDwZKuA3~bNTgKvD2KzL75)=bDhO3_JB(FT*)7TQ}D<+%Ss
z?({Z{h>T)R2`NYeE!IHs8^iI5V+PilV|sgDV}zVTKzBA(I!>8KxNvojgUZ*zpVkSq
z^Gj0RIy)$<pPf&7x(m@Z_z4BgEskj6c$G>f+hPPz0wu~gyqeVhL}%t|daHV96Y6E3
z6q#TZ&GC>EUp)rEQH~sS-g~p2;UvIeOWVMbQ%P(*aVSz)LBwzk7;+i7htxZHaAbHA
z>RRiBI(8Ajb>SQkN68N3Wk~F?4Tdfs_t04lK68yq?x41e`m?>VH}bBhn|Bpb)N}E_
z2ya4_#ggG)jcU9%8mmqIfrnKN-A-DZ#%Wo85{HUj3ZfzX)Ua6McLFv)`qV(>0}rj1
zaMkosT8@(610k15W|BfQ-2Q|AnXBv7=x}M4ymDb6qoe&Rv<Hs!s3Ys(&8;f*PpNgr
zn>s3D{=As0!dP5;vB<7y9;K3Rrm!~RWmZB#HzfI%EXpNl89U({wnVNPH-3Y^wdj5z
zif)Rx*TqWB7^lmkJJppnyD(B5VVu)ZYIQ^YXoi?VO`%&AZ})_lR8F3DB-<2<7+eY(
zOa<rt=G|S9-ie!H89yv<{|e3_cE&erYM8Zfs7N%2D8s`8yyo4h_9orFsl{|^+NgV=
z+(MPZ1297$Q`86bdo-(FisNA)i;ZvHJtr#hh)R}cwxnfDn>wAw2a8tM(obqoJPT*T
z5BoNI<iDX3vtZjMddMvCY`sHq;PA!yzv>FaC!?(PPGnJwAx6|m%FZWHMKD{?F~Y+&
ztB8gAn2R=yT$lLFGU0gcD0rSJLgD<J;qi+6kP7(lh^0&$N6yIg`;Oa^plU|62gW1u
zt)IUF7aZgDV2X?v3BoaE*LU$Lrww`gw|=N%Y7C23(8IQcJhCLj6vy9-$1rWQq)1(k
z@@(b?>9w9W!`d6?lFdZ2`T0}9IAUHr;xEC)uG3KO2rb{6FgnJ6T654F{!H~9S#qi-
z?tx2okgx|fj`lWyRkLlwY05#605A5HZz=HEf=~Zu4?S`c6;~n}JVj8@>+&>Bar-V&
zv+XB4kpdla0$`VPzkv+v8Xa$GziP6S7iHhwRX=g4T;7h{z_3uh3Lv6|HSuwXt(EW;
z({M-p>iCtaJDvR)4i@`5FaF!FHQBF!9Xkb_&HV3op1xf6c&A&g1ZMwxy9&J4Iz8v&
z2g76!lS1McMoOW84;82S6r1iq5`m3<NqJuDD<x+Ql=ZzH^D~y07K|I1?dkO@0&7hb
zN~*->*-RlIFyWG=$RNPcT-{D{lry%w9<NBRwLG;vJ?qb}&U$zFIgM_%3?8?h;^yy3
zLOEC1d$3FDa~xJ8-S^;)Cq<|F*Xov{Dx21myZFYr%P;89y%6uI<g&A~Z6f<?xR_b3
z@ldZ1pwbR2jfd`DJ~-7J88I<ot7*q%Yn_@W-BDU9tbJL}TN(cPA!iwE$B0zSAPRlT
zsh*x3Q_CkBz#m&1lpBou^iU0n6|2({ILjYZt$Y4!TB34_Lr;s<mEu*h<_I;7kjpUE
zt7yh>#OKWpqZye)$JhRa;+w6xqaD5UTu(vV-oDUvx$^pjFQmf+=fLni)nW`=s;_$+
zhRxoPH}U>vd8My%-*nrZ<YA?oo)EcTSD!P0?x~1ki2t3xn<*ICaq;)1(@(e6jht`j
zP4dgouDf9SGKqx`+Z*%P5UGn{-*mkKY&L_6+P6&CNwrqYY>5}4BMw4^lZ8@Wsc84u
z`8}bJrz^?VuD*cClY+v}7t6m-$sA~2B-N*e^jjzYH8pqdJ&SJ#Y#6)Eb1audy6)sW
zPp9cTqp5ap1R6|mIbc*NNw&Bo#^IiWe;n<D{m4|geIwRdC1^V_)PtJguaan8{s%KG
zw;6wcQr%wyLv`%I0B34C-*JJFRz{u^A0fv!e>q3Lb^dwMYV)3}1WOxAZOCw!%OU_!
zK`_#$vp88W2`SXtVU5YOd!71GWDM;+DPMsjq)|E~dAnNvIUnIwwtq{5I$>(NGKIKI
zhBkoK$YFml1V_nET-<Q}TB<!en9p?Ai6Zm!XLfYd))Gzz-slC|=j8?QxQy>g!s^al
zzO<}{n**W&%*Qpa8Ay=_bmlCm-xQ<BEo<A-bW(@9-;ib6&(@iR{$voi68-op%=pZ;
zv+_X*i76T}kIex2kuh-cZe6nA=s?)rX#~E|IPPeJ?9b=jwd(3bptvdw#9d;lZw%kr
zYqHGc7l#aa10dzG7VSc1?o3KuY_lN;v<?xwz@q95CXeFUT|Ng8#`xO&(V@!^WHj?l
z_W5|E`zkoO7DHO0Eh;T(iXR;&k(66*IIv{lkfanaN;CDEBx7QkO@9?H(Prr@ngXY}
z<Wze=L?b7g3T4}TD|-Mnb2h;Ts{;L}A${)wv*FM3Y@zAc5O(iJFN6hYj}!*8(|g-Y
z#k~FZs>J5;V=CTC?pHTC$7u^4n&RBZK{vT5-XcoZ)}>MerOQcOA{rh|hO|{c614(H
zHDFe~!k9g7T33pb#pw(u*%2h*xRLo>xPtm8^x0mT_DkYz2rI@?qB@b_=+o5~$>IkT
zkAG|${$G-c5Bj2!FBF|#D5Y5fL*?JW_ji#bPejL;+WKHC@$vi#)4JX<=a-NVN1e^g
zXZuP#j!)1S6|Y_)Ba^=5`#ka245@IuhM}@^-#gZd{G97}<Y@ZMI`0(xI^xaYI$}*%
z@Iu$AZfZVzf+zIvZuJyvapOI6a3smqz?XHyf$u`GBwwiqW!TIi)bXB~wrD|!F|%yZ
zn8elA%|3l78*1^B1<lN7`SGJIe`{vVNliO$2ECmh!XmPK*41o@XD+_qG|bi$%&IE;
z7NH%F1YmJ5Y^y}<k$yd6P1+jQ1Qup7%)0o${q8=i5VX$FSxgHEJhi6F_aUtzQRDwZ
z2Fco+;j@c%v}cX)_fWTLL`x<rev`4n?s}x7_6wq;=^{tu)#~=dnL3+*TI3I1ga&U|
zy~fTnN4d_msow>nX6%l|pMPD|hW{LgBM)bdW;5$Z$xDKZ@SxnjxBd)12)IYs(ImAP
zdla3R>DgE-c|cG2eNf)h7gf+6ySXZ2xx)x`;J>D~HM_S@*D1MWQ%A{@qZT_Qk+#kx
z)NA=RMs54i43jGV{A($qiubMs(gI_jO8huEL;yBMTX){k9XbbMiGRvdC*hCr+T?ZM
zUfu9XJ=LZk5w4Q|+gJtd-|bT462H%){^u)q+mF{4*C;;kXMP0mBY3r~7dqhT3>hG|
z6h0xLq?BKQ>Q*xwEDF)Cg2^*RghYs4>m<-Z>|NHntwq;YSnQfu6onwXecP4m#TV|M
z`)5S7?He9q2t4(DYoC-RvlzxBD0FfO7nCZ3>CE^D$QL*leaBH?4kgpw{P9+@#3WiK
z)egU6{Uvep^1Rhx8lE`CCV~6NWh`t@Q5d3)8DiL&AvOP~hr}xNf{u1iWJAh>osqKI
z=*mP^*>x(S<K@7*Ix>L#B47MH&gC@$Yw*IQe(QMxoc}kS%@$$SZ@-9s8m!w9EBqv#
z>|=6o(S|=v;N;p!{w#)J9Jy(*1!lWPMp6vsvzfrv`9|gry&jh9OS2ihgbx@Z@pg&h
zp=5kLckv6R0%PXyoQv(+*wZ%>qlfm#Wh9((5b~kf#dIYQ!cGlc*W8*O#dTCF@K#H)
zI=Gd?1xP3L5aQW+TvMCm_9h`^1fZztH)IfB&sB&IF2<B^#}}<T_<Wlhis^`5j$~6v
z`}WwpyFZ_lV8L^1?REQQ_#rd6mj8QZ1d26Ye=jZtJ&C*S!KH7z7L+QLRx@|xSjJn!
zL*UdlCCFDpGcJN9r3RGaBQ6r-|ICVdQKAN1@FfL)@coS~{<WecwaTu$JGR}Pw6fjq
z?>m_UW3_7}UW3jZiY4K}eqM(w3)PKo!zV8|XW)^`<X_DP1}&hH%sS(n?BG&d|L@LK
zDZnf>E`s5Dr)CXb8cN|Quo$J?^Mv^le{=Dn(>2pbQjqRQfiqCkeiL~75{{Rf8K4c;
zl~zHgej&7I=gX|&Q&5ZfG5Pg!=9(G-2X65Qz}HHQ3BZ+r&p=iBME6l<Sxsrtn19?5
z^0}WvV;EUK{<G(AIbkRviyFGk7!b8y>9Fsh>Byn%%Bly8ImiZflqHZqfnboq;m>%S
zA84xr!)Z`zE&HyQ!C_`Kbi_Cb@wzbb;ASn|zAB}Bii!Q@?0+dM@fN^4e<9MOa}g=w
zSV_!rP7=XVMKT*uyjQM}9!4_81|03Z5f#=vYUOOBK3p;WdEfAIyFArifU~P{oK3+%
zzfndQa56Wd%p@hRj70l&x?vO^FJB7hSA&Q!abx@!#S6;|{ocjdSVN11G2rWsyM`gV
zSGs+DHwksDYhb}k?3bAtog@x4<R*7ZT)uJIPbgFkR;EQuT<7k5%AZg<Z4xn-AX$zI
zKDYBI^>_&v=QbIy@ZEM4(RQpl#UcBn!}vRJ(V?{HQq<)({R6Kuk=U0Tq??^i(26Gc
zbY{5dFE|1LdFWPNUl8w}>1Z~GCsAHR*S=K*5%C?oI4mIB#3jM4LwxlHhbnihbP@l4
zS2=W(LzwP_jA~5V8~YRfaA$ZbK`|cEj1LtBQgfVuBwXz3FS;s<p*@_$LnrG-w&9GC
z(nHOV;#V@6Sl;H+_PF9fG#%8<(ab>}x0?Qhkw<nzB60-ltH6Hp*jGGA;H&y8-NJxg
zV<r)luKJs7uGGetFoa-~L6XiJ{oE|R6DMWpFcjTx6M^FJo=(e`v6un9njGRU4}@?$
znT@Yt2*KbaOUohr5?C?OaLw0nzvSQzI4ZKrD7ZsnDLX;^2&h{xidJY!3A$_Q1{rkW
zXvEOS1R2b@PnzI$Zta%IqgN}%a30{Uo`zYRUx&zR#H!r*@JsT#GKSEh_?O%se$J%C
zQCx`xU{i*L;aRM43rz0zi>W*+hqCQ@sUbwdX-ygT!-Wt^rO6CqbVB?qnj^B1$8u)r
zO-LD}(@HzZ-M7bQLDAsULCc2Nfb}lwdUZlHoWrHdK|IyXR>eL)=87RKEn!j@ooW&I
zgi&DC-W>)%Q#7Gbo5Vq4TxP0_zC7BFCp@Lv6Fs4#G1D_W?jh^w5wGu)>-X1+<`5f8
zqDg(YJ=-JY7oD+;GBJ5_PRFMd+Cv@+;pwS}C?D~y+1=IO@z<iti9`|dIZcB@Qn(L;
zOBa0EUFD9*E8<dE4D#qw0$RaX6yqmc${8+tbXsxZ--JSD(&dm*(N5-o>LbB19W;1X
zXtHXx-g9iEu)485OetF3=WgVf$mIR4)2eWcy_pTKM*9uo&5xgqzJCw7<TlTIPuAl3
z$YQx*QPX`^4C*aUGKDNGENqC;B6DuC+{ah?-aWlR0e2=Bwi_Rzf^rUa_JNy&F@C@M
zszM8#-KA=c;nwUPSRmb~l|Fyy(Tnd?G^l9@T>z)0;n?gEqnT)!?Wi9N6nnCYroni!
zVF-&H!r?1aD}pH~7J7s)nW6xfvK^Fb^gPUPF@_S$_$&q%z6D*@)EOqaOXk2MQ3UR6
zSx8x7*t77LaNm~~HMwYb8Z{m$jp6lW$#bMo$rx&cU$M{{M|S5dM0B6<JQ0*|M)#`J
zE4Vw#zk~1hpF+uT6-)i}-+b<obB8}6J3%-rZVwAb@MI}Y6VI>N(+rJO(9IfZjbz1$
z!;ryxkS*e%sP5;5m<fGR`4Z_r8`bz%Ks?=GnfwgGLoO=Y$q%NM8uk!p>bJCwbK
zeJxqONuxcnrhSEiY=ZF$e5jb3Zs_e)vTAv}GDJ&wxPY+Ouj-U`f%QmDJ1YmAkL|tj
z>S(%g%t&+yD$QzlbVrybbx8K;iL}M?3a|-Xnexxy%c_1S6mes@E12kPTA-4X29l=F
zmMso`>wlb;G$V7oS7oD4n1SvQ9H;HqTj%orJXb^!5k5snSuKf?7A-BK7S&((wI)mf
zMxKX!K2yrTEk<?8tgFbM#7-`u={7=dub2p}COE_;*f1O6-H-^=geXdS9Fw|Tg5>LL
z>B$-_*B}uD6^j`MK5auR))2&AfIQksk-FoJ1bY5*cE1|xIkSX}I4!w$rEjh#Mw#c#
zcHw6tD+mY)iJtoa4&)U-^iMyv={M-*{i0=I!{yWP@_ae!YE`~gOGK9kkBoxL(?W$X
zDZr<i=D7Xlw6lrmhT<HII`g5Kw?W(Q{V+QG{bP{r%rfB6?<wm8-8e1skXZ-$bIkJ9
zMF^Yfp}~5iaRYDNKH>ILrhwMy+yt01er-)j8b%0|&BT6_x2cg`R+Z=VLqE5M)ps6o
zV{AI9k^0c?qvX*I)&nNGysHs1?0BtgX=?D4*twiR>lz+8b91*&F_uZIyRgzh{NY3N
zO7;P)bu5>*=+@r{462c2s*ZbYnM42Dm3N8w(Gd`9zbTN*A+_>J!%La*S-@pRcyMHT
zsL+JEM^aT!!y#{=iD5#0olNP7`D!t#@5UDz17+l$Egtc6jSIKvSGc(KQ<Im2BZf|%
zzOGscqHMNIRF4MPuWI#DFqy^waRhKsCVPQT+T4o!J-Fyr<dk_%hgXQ@FU6z@{40{(
zDszRR{90<qU;_`QDa%cddFF9dpTpp5L^emR=5>7$pVm&Wj%0ez6)KS<M3XJOM#LBK
z%PO6|f_olYQ+=>npb9Dj$<)(t8@L6LCru%TBXormIH1Ko*OSA1J3qAq`?iRrJBh{K
z881;;oEl1>{?j|%v8gkb0}@KRl;d|4nPVUwvU-(jOVnyBRL-pN<3F$ZYfodzGgG|_
z^;T_*8g+52;%vzB=)vRwmKDxYVo8YzTMrC0*`-bU<tj9d(v(wU{!gZE;Q~e$2};>j
ziPj_qXn6$uUuj)!W!v&6)8gEkkCmf}2Yu5g!Z00FrE@}-6eNDB$-#ePAmb6XxUdx~
z&3)v9<>`mfSbKDE|LRhcsNw2|Yy?9a{WA?wUa7f=qY_PntVp(M@dl6c)3l~v<YLDC
zV=e?wwuD18*X$!dm+zqTcHSjP8T=e}#Ymc=oZm5`jrT<>lx^yh_Rkh~yo&{Hhloed
z8so8b<uB#S544Dn%u?c0B4u9a)|l}u#X~v1EhGl#at*?(LRri#+(<JKv25e%BQt6Z
zD|zmz(LT^X2KbEB82r1B@>hXRb=n90A&~Y)8B>sH{ixUBS)=MhIVZvX?*!ZL+pYCE
zmEzwZ4zb!r-`o$kNDVLaeR=pD5zvBhgKsr!z<@>f?rs{><r43*^c%$R0q}RAIxq%|
zFb@JyGhA$jz@urn!~}Ljpr85Ekhfqe55Oke0g~zx&Evl1lyK|%P5K1y_}0s{o;;&2
zufJV2B(pUx50`q%1wtJ`lBT{wBuQhOv<cH*b*yGAanT2PIL^ffdJVO7X)R;TF{knK
zfG|&}pun_lZ>c)1`)NP!n-d@27&{<7en3b@9(;P@QxSc;|Nf#zG<N9D-vD;wfX8Cg
znVuNaS`T~+z`4Nn|Lc!tJvDjmk2<f8w`txJJtHa0PjBOou(Q&VR6lLI6Cthq|JP^#
z=S(J4;F|yYD*t`4y*~(mJ@<b;_dg#YMEU=oAU7xce}5a$nFZW~{86nqCz~~y9l1xo
z+ELOb`pO*cJG4TC38Oy$?+>s}P?bmjgf8CLY2JARHz9n1TaIOhS6@YHTj#G!nPlx~
zn+Pwu<||YXG@otqD%8vuAjllT;z<4UU@-gia-fIm@jI&5-cKm5^g{Oravb;-wB^yD
zKtf;8VQ+*zYc&%B3CriQ+pn)PbfCv;==n<vbX3F2rczPKeP8yx^${Y3LoYUNprp|J
zrse#p5o{FL;4Rs4U%PdZg*L<co~+-K0d+HjhOkz99d1-G7PxVRI9)|?2c(!**`D4^
ztviR$=@RGMZp0c6t=f>pp@sg!1^li8V!!MTOD>_ot%?8o5ob~D&Zhp?cU9mi52pI<
zSC#g+?dR2loBMP>n$!H;+}pb;A-*gGwAxlhMMWRzaR@%Jka2$fUv?R85$8`(``r+h
zO0Fn^suU0&Q;iM7@JD9|H5@&c3aniJho`p;i>iAcwgKrb=@yYjy1ToP4(Vp-?ot{F
z=`N+ayE~*CM7m>W-aYs4|GXdO12D%PX4tGN*0s(v%C0}5j);BV+p)>PzB-eSzX-9U
z<ePpby|RIK*2oB*-B&DO@Z?CQZS%XPGoN_N0iKAceAj1*<)64tTzUFsDmj@7$*cZV
z`^$fBY#NTRq6p(EAB?I{&Kb?J!B3h9%EH#G_&i-W7mVW;0z+O4_|V<m-B5jfeTE1B
zE<L@&BR>O4On*NtK;of96&D_I&8zVea67`i_0Bq7Z8BxYfX5P*Siw$c2!DUPkPSdg
zevc2jEgWT!f0<g?6dX#W{DxJu>MkeV*N5t`nW77_+@M@Q8XQ;0VB}LNeHhi-@_?G~
zX)JwY*<Q&4ciy&E#Nj_G*cwR8(y(c*pl(Q#1b;wuUFA$wy}ZE~U(11l7gsFP=->OG
zfyvks16w+-UR;ZZ1}j%l{_2;z+M(w1r_Rl8aO8-(7Z<BngxmZh$^`lPqxr6n%k98J
z<;;WOuJ<5cV*l)^%pXI`rs`{{-JzIRDn1)SwLtsTMzNauGeMEZnMss&TNy!ITTsPs
zYzfRHtx!JwyY{A8Z{{M@pMpj(28-*4f4Z(+7_18xyxrTFZ@e~eL>ujb=!|izbAuyi
zwkg*R4qC~Dql&Km8z<-MR=;2}&gQ|}MdKcKQ_>b`nHyEl9*coFZRoCF#IC(ro=PLj
zk6uy!J+9-K?mSdR8vZ3|S@QEewruNg+~u33VWf4&g_Fw*seX7$k^j(dCKhdCam}QJ
z`6-mojl6CWd+rRayj<#At^`X9y32abSeW7M>U{3ZBnVHGrtYKYHwrl{X43%SFT7)E
z$0==OEO$N~7L|3c&X3n(*S)$krOI~W`9~i`ayjM7+-~^=Qt@444s#7?cPiWk$du17
zTSV|8B7vLwXL>q}v8QKqTD#rXuQ0YuqL?c5$tzsaSqh4Zz-qp~#se$bJw2^bH{Ou1
zTcK7qj%SU5fl<(4D6zg0uzK@{`c^(Q9T~G?Yc2HOd8Zx~SA7<A77ns#%<1FDjl1v{
zJ5svJQYIp>G~K|~))v)q*XKck!J#40q$(pG9p38kVWz2#&39^96**P`0o3}<Mcs<Q
zn;X|U;Yg9MTfe7JFJ9)L^OW98wxL)qtET@Yb>w3^CK}5J`M}bO-r5@d{rUo9>8$59
z!SA2+*+|=&@XNcZ$;*u)rOd6qpns=r;KG5*x`*Oh!*7TM1qC8tZ>3aIveym5*X>L8
z%ubD>O=0uRM(DG4YkY2_wk-X6r_Gq%W?tpJYp?GER8V`)X*LbtU1DcuX1s<(Nrn`Z
zlo~5A7IdYgq^5OUUcV+IbonJ#3Hpc2RTxB-Ni1<C?p7|I?G@X4#7`YcHmTa2tmL`c
z+|A$4TmH*eD^n@cz{G+_KtSqq+HlX#&UWC}*0UufA))@BFB$V=$jCEO?{!MgxOo>1
z!ExnI{P_=)@L0sgvx$^k!5Epxf-cRcPpi0C3%bZ*z2#nxA2}AJu}sX&3cc2AYHBQk
z`+ItNKBlHpBmMaC!=>{))3)-E;ejdUHaD5}J8iODH~qLzT_dhemmJEu<MQueAZ`s3
zNQ44<3#j!o(4$Y(KN2q&Xi@P9A~QQSlkb$30-l_FKYcQp!@N5-%7*}}dH&F)7|;IC
z%*=j*5Vh|K30M^C5&EsJNLYE=jB2#FP(Oip6k7!R7XSQjXN%+~Y~5pb2W>kFRX6l~
za`)7L)ZlmxScV>a$pS}4WOsO^=mw^zF?DT=X_PO<ak`W{gkoiWJJ!MxQgI=)JTzi_
z=BPJUb+b(pTome5z4NsULIJ;0;&<LFC#;c#@rdL)|4pS`yFjn(ynfd`x;nXiAO1l8
zT=amCI?NUQXIT&=Sn#Q8i`?jqHOXqyB{3k?H=XDQ;&<)TmeU6ekme;HdTJ#N$R%R#
zxL9Uv&lwdJ70SA+eBLHC?824z_|m%@8Z2;+(Z28DpOMjMUyw-se4#1be0ebcd=vM^
zGtfH=2CrLXxe$XrSr-iyB*GkgKG`zu?(Q;Fg4L`t;WAzH+dZ+ctdFN$vXa}dBvr3{
zjw@IO7Ovg|FE9kW&qz#)_0RixX{{%=eTVrSTarV3?#kOXw_Dn+hE;W+RI9^pAyHo$
zVUkzjU+uI_%%Z2kM2)r8fNoU7I@6Fk^!?+K0?&S{ZXGkY`)gJo!74V)SOP(wZdj{*
z>I4r0b*%M!(^CNXSplwCbiIoxN?G^px_w#@>o_ab2%~n={-lwVhdplD(+tDWtP7`}
zP1X4DhtZvoBxk<^>4!?{t>03^0@u)`zA4Ct@1Jt#4dbToU+j1L2(+3_)zx$Na!&2K
zrB?`3uYu%VFR<jkLz)uX&({j3C+@M^gV!zpavT;91bpd(Qk501a^CKAvW2rg3)F`6
z8c%ZtX4PfpR9+zjyuf&5W$&DEljmxnranBe2osrlv$}XSJ>;#n^Ph8;hsaVKdG-a<
zM<{3&YYD`;=ahJ#L(i|Aw<gbH?l4C@oXJ-COIv1)H^h2tNqu0^E6AzW#cpg64ra%*
zOXbw=rgp<V5<DHM0JkrKJ6C;Y_%>Z*(-v;aZ*R38S!3G_y2#C4R|7a7rS!Ze6Z_|W
zdU~h8^yifg&NY7ww>-_n$dGjNJQ@#+A$3RAPyjQ9%D)ZFaRwjlz?Mh#nS53l;SfXm
zz-q2X^Glg?J_Jubjb8H?Ev_4V(k>ea+q8V3^{zwo*ikdli-wh*j2}$t(y;FR*ZD>}
zNQ1ZM4~Io==r>dPJ>(zi>Vja{lg35;$brnLe>T@rUcW(F4u1+%wM?^57jp1_O?hvk
z@E|nsnEcZHH0#13f`~h}5Px&1aJsC|F(`Q~G{7JNB#xF@GYpH{Erv!c$nl-i7Xy>o
zk%%UU1cxeth<SKPZ2~J_i@@c#PchQED5|ufMA10)*c#*+;&d?!qC^fRX?xL7`1_tL
z#x&2p-j<zD5BT8D?nSbnTixJ0RUQM8BN#t{xk#8FO$z5(JLhAc2uVk-C%I2D-uO{&
zhW75hV3mG~eY+U2<P{_YZC!ANnvl(d1f_q$rlqfqrjmQdCghO_U$rt=`Z;{4OX4y-
zOsArXx1@P+lbQ%dcU7loP~$sZo@)37XryUv#j7eZq6aqQdlHy4q3g{*e#n~E2-ZF<
zL-*l))LG4@T;*o=7%$iIds3bBr7T_<#i2xemx;oroC}X99mv*^_JP=(1)fO^GcbLM
zt9u4@YJq>Y@mryYOenUfd?23Wu@XlTqIjs-Zhzr%B!@dl%<f2ge>xw@HIPDM7dejX
zjVq)FQBQt#NvO8NcKt<vEkFVn7x`~KElOVTj9dk=a`)buk5=8e-7kN7OxSc<WOX>1
zR6SD=tXx4#fWnQZ#Y|-4jP`N+|B4l2Jm~twWpo4>xtT9LSKd<8$;(#BH1zm~*J%UL
zK~mOTMOks@g4}~In?ik^`(W0aD+;cCip7Aj?rHdF0d6^cnfqDpSeM%%CXzM1BcGP6
zzXgF^ucJ}zo;3ds=D+-y!Z6;iPuH|tZp|t*+(QAA@3;2pEw{NCYaX{*V^Y8#3UF#$
zjQ9#>Pei~Y1))<m#oPQ8BqoEd*W#&2Sn#I^HuxY;jD?7MP1M+I$Fg*65S}|7dZ6&9
zP^HfKuN4djG_qoMbjl#~Kxy}<x1$`;^SG?T#-pzP>sf#p<ribSM}e*vk2eON?_VxD
z7oTArJ8q??<&3xnS%{Iaf<DW*JSNad2xLD@^PRG|NYtq`Yshp+IP%6>G6O|YzaV%m
zBoZ>^?R+pz`M;`V3SNp0PG)tR7R|RjnIgk8t|%$DfrH7Oy21PugBeYaVx!#Ba8m#0
zAdT2zKc<>WsV3^-M>vy%8%)d5PXxp)Ro`GFYzJIejPf}?iwGHs^iMFftHOx)iMQX_
z^tu+Id&0>W$rNeU{cV%ETiL_!=-WLc%@^UQ!{feuXI2+aPYhc!x2|z~Tq}i)nMEgJ
zF|kT|Q{WC$wunH~pV!Jrri+`dVNz(`2u@;UT?vQ9z6y(YL(z*`oyn)ey|N=*QW6?3
z&<xZ<J}h~vEn&zHrA1WLEigDjYBCh}avLMw)$x;{)5zVwS#U43RFCdb&fWkhtyWfu
zwoJMu$I)K`A!VFpU!r$lVfZNfQ^iftS;+kAUTEY86-=w1mRFw&1}q6Lm&*s4=mB2D
z-;{~!!#4CHn7uZkHRyHW5pmvj)4GUaiZ-xZ9z(c6w121<ioECrUOvFid)YWK(&}G(
zatTnazo)oaZJ4cdz!Zf;w%5#9jN9t2RMgHLeD&DiZB_OW7DYtN_#UH#fhLN{p&LBT
zPhDvL%%c({Wh==%ekmR2AHJBhF`PG3khD&n5X-~qpI9aEvbkiiGL&uK#>?T<3>NNZ
z#}G3-E{2gPY|4A}4#Q3k-yAS0*ID#p<KoMQt>E;ImHj068-<{*Y-&GyBfr6c7;Fgr
zU!{!}g=mFlPuXlh)1((cd^;~umWHYA!1<R$uB38;fyamA&!;TSbspk94Tan!DM}s?
zyKp6Em!e)}2J@7jk?m7<p$<GT+JUYxbEy);IHwpLVl9ZDoGO>YaCpg)hz=bn%M232
znb)?wI+zh8%qgpDpQibgpP*~$Fh`<gT~jY<ZE<pCC-$LdYq<}kG2SOI;%eIrv+@Th
z{Cl72XVDmeNXx|0NFCC0`Zu3M*V?rdF(mQ_e(ZHyoPTCc`xSTd^%925fm{`oj-~`+
zFXjwi#!)L>&?4T)@2Sk~-Q>bI=PeFn6Kl=~TzBUJGGBKZDp!g>2(H2&K3K$JYM1R3
z$kcW|ChT8*oPZ-vq?Wzr{J;N;@Ztp93MM4Jvd?~XqcKFtxgsTRB-39k-=%s5mRI*6
z1YV0sv_+Q<dh@$iMBfBb#4MSykKZ=0UN0=-d7%|8M9e3fJy_$Zsd1TRRz=7;n$?m3
z5`<bI734u?#J5uvxbVi#-rFJPHDBA2h#(yX2}>mB+B?TA4?q29MbRVsJg1<u;jpdl
z@`+}R!egsN`!C{O7C#mEVTs~kV;Wo2id)ElbJGK%M7LY1|KczmX8sd_o6X#mkpK0p
z*{fng9CT*qGG#+S^*0!b6xK@ie9BudD2bMae)std>>p>e@f1a}=KnLK4ZXsxfr~{q
zFW6WKPUctYEwQ)qIi_q!tnMDh|9^+b&@-yllV4?;iOngtPRTKWEM9p<(u7jWG+GZ0
zM0s0I9G9+E2Gs(a2{AC|KaKQfBL3RUGEU&YB}S2Ku!(k5&KYqIj6C|umX&&52}fm&
zy}gzeud7y~pHeK^WQk_QSX$>C_<7fDe|ZU!wqjbVR+g1CfeO!dK0R77;ZM4E3Z-O-
z-E6#2XvlkN6vv`E4b?heB5>v>BZ7=b8@TL)y6ZvsY{)+;LxeIISS;5qNgdm8#5|>n
zhxcU(bKdF4*u*81UNF+%<KvvIW-@M<n$Y9xs~F?oNn~zLczQ?t-zuO^X+{avdQw!s
zn)w(v=;#niIIHt?!%x{>8V<L4rMwz;;`rzEajbmNyN{AqmG&_E^G3@_v^DfRV#r)b
zaM-PhWZJ)3iDS6-#5DziCN!7pMRvXy?%!p8vHP5q69EO&-hP7yoB9XLNY2JIypNA6
zj6RPM6o=Zbr>rFs-75`*1>YI%4)n*o<q^v_z$&5zGq40^3ma6oNUByausxiTYV1+p
z)X>n-4@9^HQPXGs5sk)YFRs_oCQ%`ZC84W)ox{{V?yq=#?&p1t>v~jlSo7vLz0_&M
z$FMY=cGOxx{#2M3e=CL@uUU~|iG%4%JKFJoG4qEVP7%iebFbeXSCh7wxk)(8eB!%8
zBVuLgly?$M`?=awScii}88BvkW>p7ha)oXB%fbD**O<{FY;eU2N7!|}@R&Nn_Oxcm
z@o-7bXmG=iJkE|zy$Tn9zMH?erYlPDu?$ClthcnsN~RalKc=Ej8))qk%?RSy)K9t2
zcay{pYp^XS`hpxB$orlmW@9Ce`In5iahXET%<$#*iVzOR=6Q;sSa2E%1$|O+Q{HG&
zXYhx}dW^nic#h2PuBEDZQhK5b3H%OrUtcH6g#YHW+5d41)32<)NEPstq_}-W3|63A
z+3ThYZXEn{cyoh_MTrP={=emKLi7V{;4u~TEpj~Llvv-p!p)}%#*GCzS4PVCPwVaV
zdui*&w8@e)Ru7=bK=iGxxSrT!N@2uX<jep(!w!tm5}lb!C3CWm$3a{Jg94<*2@L&n
zmj950IKKoNC~wL>bzWWF3eBYJ`}eGAQgM82u&?5#ah{SYNq{x5dlJM4X?KM4ir_t}
zY-Xy%h5^ef(a7Yg{4A}4y2&3_C4G5p*VOR7d?tj*!f^f-+pR@pEYU+KRp?*?qfx^k
zaj$7U`Bu_Vxy&g+L8=(o>Oh2Hz9XsZF>*+ku-H}#FMsO4Ty>}}PnG`t*<>yA>R%J7
zkNnD9`D(Wnbm{5eTWUef*@LVt?1i^d*sPqrgH7R$&gg3#Bo8{xchZ!V-Tid5_CZ<a
zVKWTd{BL+WhkIkI3+h5>l&7gwX{xs*w3p#ue@lt0wl3UWaLhm()T?D1##19LFtafi
z%79gzX78sqb0-2L&05+D_c;II-raTx={%q)u85tha~BT$!rMeY^b}s7ki9k<(sr>%
zESFkhct6f4gpG{m)rS7^y<V$OZpYP#h5n_@(1H&--?;egcgt5wAFWio*vzTYZiI&G
z8>0XFQz}MmDlv6?^EP;@@$r>2^-V7Jah?ZX@d#X1nVcXI2*7f`GM)r5wLhZqfXIjW
zE^mycZM^f&=f-FFlnEK=d+TtYNor!9tiMW2rgpPs8(sj=Yqj6=DsnY`Pws!bXV85X
zxcwDvOju(mNdt(R|IB@;b5aSS(0OH7jcKmP6@y#gZY_C-)m!?2iG)jStM@T3hfMId
zvdz!w&VcLx^^Wa4xmxNX_SuH!jh_@TQM)lZq4vArz4;Y{5QLjCkohTvYZn^qKGh%<
ze;n5TP}6orX(EuFoKU1HCn&Pe%0~R-+c%`qJ!f{a;GsI&XN$UsdQU%EQM+HTsxUi5
za>l2(UD?Zs2edB?a?^5{A?FNaQ#6Hh6#sV<0NvjOx=Aq(<S?eg-I^g0#}TUU$y?)#
zTjPQvNE-eXwkYUBBAxw4e!Ej;{hGMQ$EbP;jUdl`)z}k$F#=bMxjQ>tg$Ak9HJ$3T
zLQy2#D2XC$QRb$`<W(7&Ur?-uODp(S4+64@m_wJkUj}xU&0OVU*Y<E=<ZK<6&kyA=
zXQMldHBu$wfrLwa=RF0`?X5$?gq1{WR<sT}1$ShR;ib}Z1uM9dD9@(c^wT-1rq5Xz
zgoZlDqqz9K^1r2*FbnkuhReQl+`Po`-C9z1b~+cWf?DoY=y;?D)PDgzusU{56*V4f
zz1vM4F&#s(6>hg&xD{T(0v#W}Qw381?e(cvG4rT5sr4b-A|yAN+`6BqDBgcOf4s&-
zQ^1C5v(oB__4>%Zr>NWxZU1S;lkFC?Ip_H(`GF+mzYsA6FOB)N%$dd)kyA52$<Ea3
zzO|up;n(&D?eY)L9oi%nr_`8n;`VHQ0-lJ)N<p}DeFgx?#!W>KBe{-!GnH|_pl<p4
z<m3QVM>|kxVY64o`e(;Kdgs>eNTwfC_$g*#O+P`$Yc!;r$C6j;MW+TN>Tf#%A5Sfi
zhoI(3S4&gJej9egjPW&ti8E=xPkSf8a1W8#uIfLaagh&-t(laud<E(yPkJ2KCNeI3
zzslfuG-f2G*!k2`fy5&09^?j3cp%b%{YQoLOL<Hs13pmzBBN)5Fz?Md2A+8XqO8)f
z{o$9=?plS*kPF%TZ_tgS?(lL0l77mty%Z95yJ4LgQ*{z}bMjn!*&A~nNoq-B=95|c
z`~Uk>&S2qcYHp*5pIcn~%_8}>+1F!`Ar1YNlz`jkjiI^>Lu<l)WHQH3m|Q!`C4~zd
z_VH^VK{aWWhfr50s1PjGa5GT|gHr{AS9x#3bL|#6L_rexavu2mJb360)zB9yUlj35
zB}ZZPW2DIb>^_tUD-(5P;)X{W-O_C-C26j>Wc-{}5mKfUlj!LDK?VwGXlR;@e8xB9
zN9>+u90mAysCX!$+w`7)v@I~|nS+hO;t>?8QDES>q{~Ch>Fj)8!<IvdU?a4S#St1+
zk^15(v`<*QVn^m~ozzZ6B%E}}LT{CFx0Bgp@uyacOZ9$K*LkM*AvChH)z>b?XsxBQ
zz)l9$pdXX$So~q7!kY{6Qov+H+KeLGTkF@k%iI_OfE?``NiiM$&x=_|pXTLfi+Z5o
zBYI2@^%?c=$aomxu?7|mJp+Yb&))i2&Ym`TGlb<0lP${c^_V+feO?BKfNdjqOQuO(
zr^<csUa(N#Ihud;Z^L`2DpFq*pT;u-v%TTROiRJ-@;=?D4AXUIn1#oue}_zyyh&;R
z0XTfFMA-+jOL%Dq4{Jg|TWTO-1H$7V4!TiEM^@)o&CqYOsB5&&eN1kz+x9v`{mN7e
z11CdTnm{kgw=B~%x2)ha)BH)&b5CgLO{)2dtMm;y(+8%}@ZFOU0#{H?Y9K87D}g?U
zUEmhq=`q9U?~ut*!^QfP=qdKvy}Yrh=T2O%Gnqys4laTqzXy*>f|z8mTCj64Y^~K8
z^DzZ$lnQ|9C*L69akez-_22Kh?(L?Ol9C?5M|d=#rY-}=ANzI`yub}Z!^0_UpUjrL
z|CsI@|Dh3-{`ao7P#EWk-4rWN1EhZI+HP1mQ8$UyhgosEljJm6{KIMgC8gN2r35ip
zjqg2=Av`i>(bf*Gz~k3+>|y92?MkBd?(Fq~=<<t4BLSc9s;0}tQfoqN!G;A~tP3X5
z_UCY4SuGkb0$Q58vE>y6%UG7UiJvyZ{&rWgAj3uU#2bT_g?|FZ!Oz;Lcq2k6ht2$-
zJ(=>*S7TgihhkluTT+fa7ZCTWWrNvee?c9FZ0TZED;kK4;v*&D>lxk=geYQ{A33AD
zmcjJ%9<PfC2m;+kmsovfY39`b1&n1>MC;j-SCXMcs0PIA#gl3?1rNC4Vj6Y@&az8`
zN3||o3N*H{oaCg0`0PEs_Y_R%sKhXo4QU$8D977@n8lO5AKu7fUkaW6k~-bCsYwn#
zSZ@0SY&YL1Jig5IRX@RR7X?722wIo?KQ4eRNdZEbAa@TKp5&6_q*Y1gm$ed>DG4)9
z?)M)XOJnBT64J<7i&jO5`qhQ;dT2cpVuN$}Y|06Hi+5N3K})dezl(z-=DR<yV&hvN
zE#hKAFYrl_436eUwD;wy=$Gqn_51+4NY>|kXp5`ust%CUG*Tpq&7AzP2xn6n%o^T*
zM<ZVa?Z3Y*3a_8GIuB>(N8B=DI#vD4sv<R4-zm+#diPomFS|db=f^(Tt}^*U90}7}
zw?R{T6houY%r%?$_mIz+;Rv|&wp3wM)9h1UdZ%){)5w1hwkt`mdC$*q&LHunBIy7J
z8b_meGHV2N3qob~hv6QI4Mu#I2Ixh+)tX{!K?Gk>Wup*ee9BT_#b<9Epd*k&Ka+Fx
zd90LkR)8kl8)RGK6e<R|*N?iGt(Nxt2aRKT%wPr$Q7R=vN#5xI6I8xfg{(L{YJ1W6
zzp8i_bjxdg@(xz?`Mi32M>V8S%Q{VB!9qXyZv2;<?K|fubX)U=>UTQqHt5CxdkZ(;
z5GQrq6HHkZP3-U)F2m#RobBV^Q!EFdF+7DwcWmy<*iL*m$RO5%wK3Q@rpf3a0rBW3
zZY7Nm&ykc;WxjB&z0sncgwDSIeyARMB`duiNDl?)d2iH>)5W5Y<Dm_Q;RoOOBccrD
zu&DES?C^FJheC8LZ5nyu?$>)#ITT&nB(=1~DJq1F@ysvyN6b69pES{#>q2Wqp&n+B
zto@0|D6*V=5h_hhu-M=te`u67;%1e+Q|)~?ba_4z6rNJHd%+ZT8=9&O*x$>80`Xjl
z1|b;TBcJ=<OFyKxy*WAv-;3*e47l~nIwo2x|H(`5E>s%Uk^XBA=Vu}-Mqn$F2#nD}
zN!8RtSMw+(D_;Z%zP(+k55!%x+HOFVI|2i|z_ilUsgA05F){u784Z=&hSh1_eHrYi
zZ^R%;bqPig29uN6d+c9MsxNbiL^0!s(J2P`<UYa?1I@7V!S6#(OC4MH>#YwMU_T(m
z=$nfQp`}?76MLXBN0~At(u-um<2LtKZtxvg59!*GlsswLf);KBm<zs2LD%ADzS})w
zkHguNU3fQ1R4)4uJxKD`&CMviG7IbAR^|s(apbVzFM)Q{(X0@xW~P7_wk?x%EXnqC
zp17b}oDgUS7UG0>0-x_6%oGT=RSxay5Pbap?GV)YG~1McXFw%J$3473`lZ_TxRSSv
zp`7}<cg1C=A4%vsZOTkGDNMHLzKO=nI7oB6<<A)9;m!_=UFK(Pc|j5p`Z`wkL59*z
zMIZ!JTokUW1pa9=2$1|@$WjQN;R|s|8_*lW_#yVIzSR|;Ig9IXh*O5g^sf$`xH+4q
z1I1@SBz-yIcb`E*ZCZUF$BP37kSc$Fd%HT|2sa!>bB@8(Mj7Tx8WoUC@ww*w46_DC
z{ri23(XHs`n%-BlO>|dE-G5<E-wHuv`7nR9WVcd*PM$U4ei~1qNq&2Om^6`EC6)EY
z&#^;_6Sbfid0d6=Z)+fjB^NtMlQ&(gT+E_DRGuE{C=7=E!%t~6jV`l4qz(!;&N_ot
z>g)-S)g)4ObeqDh;2x^Y$^RX6#<N6-VU-b<f{%j7`qCNKG&9J?O?<YB4`fOPpcJcR
z$Wt3`pg@x*`oTTMMe7X3s=s=S^^c$vWr__J%fO{(WE7J{Rm=U`xI9^ef(IHhN7!v3
zuB%kSNnRToN?;LMagE#DWrL-pZfa*&I?_nG!?Arj-#beqPxbCgqZXa0ap3KK^!a;8
zSq2h2g3fQC`JR7U1?2@0SRbCc-h6=lR%il3!tJbc-%n5ACcO~B%YGc+LwO^r)Uk$+
zY-%KkvdYnGOb<`fu)}a4A_Rf(beh)Pi5>lp^)k-a|4+ulvkwerK#@NTOQ*zBuoUSt
z|K}123JS8gYV{SG$`g-h-i_raAk;CW*4QTjZw3hG-1mQikh`Goe+WO|ppywKslyEe
z+K`@gkIjkm=gIR6+parX!L~Cgv#(!+0Rm72a7wr3_dsXlXF<V<sps*jYL9cOAKqSE
zoN{!`N^zH$Z?7Npe~D4VFJ2U^z7mpquE$ONh6)<ve@>1BcCLY~=#N`?pbJM&E>|KG
zz605VFnGDL;)W+1D#UNUad2>2ijZuHBTOZ|g|a_ZpM4Z_htkoZjkLLI;)XU~U>Vd+
z(zH)0oW?FLH$C3gg_~J^Qrwf>_1g7Y-{VJj3T|k$-0$P?Z?e_0*7AJjNc6*7$u$r&
z&0>_R*>3gnV1k2-hB^0-=Wq!tl8T+{>dKi#2wwzRKq^Ex^wdI*=FGrO0SQGGW23Tc
zF((#U(r{?ob-=2kvij5LOS28rEjlg%F$wjO$H%tHPVa<|VE7Td_eike6!Ar&s=6c(
zP#Q4QI*8)RMxQN^$f71K&QuM~8rW}ip{bxlC1>G4XxwhG<Eth?LL1uQ_4U5>sd*?U
za6c=9_4t8BDqdUqtG%=fPxPq)I;LnI-f|9O{F2UUWM$6r?c}`qw+RD0)ZZ>KIhRm1
zI*8@PnqT<hTK^pPStKh9iqv9i4|0LwzkyPl+!ptuU4HV&_H`1@H92L@N^XPSfjRcq
z=r}})+}S4UifW^>LfRr-(MhOT%Th6|5jm$;b|}ygtWX`$i_CYmchL9+I;dq6Z+sbo
zBZ1X(7@(RDt!~_LK^eY1E}BMf=S7ggY-Mad(+J;)3;%gO`O<my-hY?dzu?$S=xKS1
zSr-~`;KW^Yf;%t*Hy<lDf%Z4x!4x`cJ!U>@zgBWTS?Vbfx}_^FDR~Gpx`%@@^xAtL
z&3dxhgi*2II9{qj?|Y=417i#*p@5ko;8_{x!uMMt?PqRP8DOGyo`6?@u7`}Sk=;1q
z$*R|*s;fK<zsL{-j5|ew#UpQsjOyAl;LZ|)m)Jsc^z8dyX&b%m_;%;`u+Y>wF!i!F
z1v;3{OV)CmC|x{;=z4r#uX8M|tV$nR7t(oRy<Q*op*`Q8HqYNvWJ?y*e|@{tTOusM
z!ws^!GVxF04C|Yy{n?TU#u3H*%*HmGIp>^zKo;K_qA;A}>S@7^OVNR?n8o7~#8b%{
zKc!BKn<@~&k_z$&n$il0`lRe10#<m%d7U|a!wPe9SGjk1>EV-I4O4)g-4`!(aUb9n
zVcr*N*H-Hh`S9-k=;dooux3YMDKbqBb&Vul6o{)~$&NCC=)D`6J#uKGxNH)gB&;Q=
z><%8#9Qqu-_esp2z*1V00>KR>qE7>APQK+9<n#t@^_Siw_PQxXm5)ut#G^9;ZbG@(
zP}}<_GD82lxUn0(BoSZ2cunNM_g5k<UF4`5c;u)gOQMkWdu#;-3f*1IQ9UU{gr`h`
z&UgDHx4KyrsOq-=dg@=j(x?@bKs$dW;pK-??0DwCrEq{_+?XfEzIN!?3P<h|n2zt@
zYEQ;n6d!3z7~}~2Ov?Qb?@&YW1u0^8<J>O|A*U;qAG|R@?BI{HD(x};66WtdGw?U`
zn67Tjnfd+rr*EeR?Cx6kCCPQo$3=2!<5<=MU-YQZqZL0V8rvt*aGjNA*1<C%1^1d;
zFxfLLPH+6}?q}wlMe6>@PU&p7ZpIAC)*RyfC7Vgpz*QXQ#bx2K<=BRB|LchX8Fjn=
zv&P1Rty23pevGcH(s+829lkOH6yvWbqQ3{G9wp32v-dpP_0f53Kg$MZiwmWB2kKA$
zOV7TQ+O?bUM6I(!T*Sa<G5nkAf0zn&zqT(l1T2^_tl^Mbb2~NQA@u?BJpg+xBa}l%
zlH+%$110qKFjW`u_F6n2+mHg>u*|>4f#!_RRYDv`4mYg!ByZ<kLlp=xo?Wy|`NfJy
zpkocOVhCJ7o%@~pEZ3MKkiQ&Ax7;r1Jpg#N37|a8H0KEQfIwm(u%*e1i_ZX=W%t{s
z?5!U;&(x0Ia~YmnK?t4zv=#s@qg5p+9o_P6-Vbtn&Sh-$mk7UD1A{JY{+qh1E=9w2
zQ$l0suV?4ZxAR|EbfMW6&jU4DZdU;#P5Zv!d73)#9q@je1H+3|gp!oB^uzs-@TBtF
zjdIhxtb`{-8NG!dJPeo^dx6owM_r65_}(COF>S;e6B{JFNGS!MoJ9#(C8yEiJlCIC
zat4**YI1bQVPbqK60PA<s?5|BFsuZrgL)ojl-1?OOG?O6x041>osNPNfP8_swXMI=
zUT<ViYs6V!1Vc;(HU8U~HS2rmps^35MZvcxpG~sH#EgrMu-f9&wvc5_VOPhRa4KQK
zd;_hd-3sOW*|_Dcr<MDWledvoOGhM8>@oJszTBn6Fcn*T9{rlx)4+e=Oz9b0mGnxd
zXZVcNh<<A3Y0N(`iZ}qx!){%=h~tVm?vt$rlWY^?TZ)*HF(m$uUz{Q|J*1-JJon7Y
zscZR)Wa!dJU?91x+L90%GgUW}rEjH{CY&gxn+HCHzI3=9o$vQ7T}E{wueTi+X^Z4!
z9WciSgweESyldyvk9Zj)+DTMIgoAL<kQ1)SdN0vmlH~w~k7v+ySUcOTbKNWbAH?v>
znz^5UiHyy4#)yy5LF2SY+{NW)ie-z(6XGuKMDz3`?dKjj^$BZ3bxAj6a13r<;VT+E
zhfP|vTZUiu-vZwq={j`yJJ?6ZJ1p+tJt5@LWt@nbd+(dfd~*dEp%#YA+GyZJ5EH>v
zfC_m<)bjks(lDZWD)pI9^ot$2eujP3HRov0*|#G~r7K&{FIkn`bs|XQz=))FeSGEX
zmiYji(u?^nk=edR_B|<lX5n{?RBe5lZmt^n<S0ZMt~&o5Dv2NT%KCHhg(j!do@+}F
zvsd0xKC|UR?C^B#lZ_RNRss$$hNdhtJbn$WhmAsKar+Woz!aLInsSR&^o!w?E4p;=
z$+M0mp>fm+<rI__sw8K0q|ICi4eKHGu3AyrT&xJ31fQ;mVZC>q?bF#XTahx(Pl1(~
zc-?W1aV^({KCLGIjzOI$&N>$uPMHEM85f5uV7L_+S=N;&83N89Iv@R^%MN(;61x1R
zd;#E=s>?Jy7uz-l+BRPtvORZ@nDv{x0Tdc&lCw`}-D^LquGf?a-j)gCEg6R#IB`GG
zzV?O!rb`bGtz$`edjVc5FoZVn{>ueG!Uv|ST7biiUKc$G56MOk$-M6|vLIq6Vu$7+
zU<MNU0+6n!y*z=hw=n{KP6fPPL#T*$o1GmG4-m2<M37QeMi(9)&f|S!rSA0lM_u{y
zpdb#WZ1x|-o7m=l3gPVAAwP6oXPrZ)r)g+tAm-9VLm#F#k8^l6hLVzO@9#A=tPq7G
zJiOZP7HU}t{;h1@X3cd@abl5p4+yuNPwVGbz_p>vsQv;5_q~4^j=-t!3WESJfKc%S
zY*fvjmsFyd)uuz}4js2z`LsLtm%9+{Zzc~YU&pe7lY7JSgSxu9z}=GR$wo&8@EVVY
zCA`jm5g}?ys@^nqOUP9S$jL38oZ|Z;FkJo)C+L9hTlTYe0cqX#&Oj6@28PSUR!^fo
zO`Py64=}GqBH$U%ZaL}nd=IK$0Mdjw)1g>Z=G6b(SUz`*RK1Y2764MV>fTsa04zMm
zFXyoc<i42KZ*?B~AZIPN{5Ro!0p-(mgANaG*^;{WtavoBC7eiA9f_c*cg+;}1?&fF
z{LEVz1fW3%X1iTDTsT~F90Zm{LWynk+z^;9EGeB}=*0NYlf#a-g?(OdT*)uBd4rBZ
z^{g}3PDjHRIducgsm`qP2miR;3jlIeW88rZEq-fp=g_NJ{8yxhAAarQNsUnv_quFp
zSFB9{F*Jji7<dcS{}}6IMrWiNkEDY>_{pdw)#Bv!^StOepK){#2(7X}#S87Z#k~!C
zU0Q4L4-9-)fbn_PEWs}%Rk|>AdPbvvqQ8d=eacVmz+Fn!Rk6-(LAj*R-0bj?rqcxa
z1)mpq&d5hs{T)W61O;o+I0FyY2$Pef?f5Jk)!%Z^47rp0Fkw}#!$QVh7u<^%!>sK|
zUJa!PQON!EPf>@3#{N3dIwit?+TO8Btp_>Tt6w;!PqQ{Y8FhcFFq7Wv+mcu7JoVIK
zekG}LJS%Q0iMkqHQHV(CL5niHi|BiO@zI$kDyAnQdO{v<e6?yXl`HdM#cNq@EWleK
z+Qr&0rX=kncJTE?Za3BMIevW+Fth(UU_pM4!c@Yf!o9N(?RZ<lj;8j`h<v)!_CoH7
zzd6k!I|PNp!`-!pr7_^9>_cF!eUg31f}u-~$thYkH#zbO*khXToDY++cC?#(VAfsM
zyR3wynjN2Ih>%xaE?SZ6UEvnzym;HW+le+)N@ERUjJp_tS{h?KG_g4AeS?})w_=RC
z<#F_L!Cm(`Ks~?j<#N+Rscv97$IY!)#4KR<GOip{v)2EDqfEvFTOU-bHep8vUFJIC
zXdKO2NNPFF!+^yvOW%<rc<!#It`~87ZO%p(y|`SQ%l?gUj$)Sxk(OnB3{96nzChi2
zmZwm`#x?7OKoVYtChe;LQ6;IGr)1^#IOv_Qa1z{bj##kW53a&p<|MCW`!t>i?lb9s
z?Uu6}=Nq0BAtCYxcgL=W?!K4aVnJ+-c1f2BjjgJ&<o3^ovzY$+YSeDrT71F3aR`zJ
z-A83~I-r)6U`;>H3`(^uPkEWA>Z{T2_R->pL=8%(Q%kj2ElTTxgH>ri1=iqL?Mf5I
z6qs6ZBaRNtAi44dvPR$$<X{dGPxUDB8^T9|<eg&}e!_;szDU5I2sd6~Q78mrGFL7>
zFue&5dO~CV@IqH;yBG7{I9&-pH;BqbXJ`0~l2i5BPbRNs?X#5EzKc0yw7ZIw7k{Rb
zN$ug)DD<KfU5G04Xv1gv9}R!KvoJftePdM7N43*7=Qcb{*yb)5bo9@6zg;P-x0rss
z-rdJ!G78bci~XLU@&sHlrB-$HsoXp`4Lfw1k=<j~GL~TI@-Edx&bUQ4#!=P+q`^A)
z7pDHuHC>yL%j+Elu#A}Xg`*`UC!>v)IvJ#jP{iB1at&SpOML8{vM_1aCWO7HDyM2-
zAb51x@q9JryGy*erJnPM3*a#Y02-h9nIizZU0KnEV6;K=nqOvCRz?A4UiZkSobhZy
zpJu?Q_g{JhnDjW0{<sf;kR8g!^Dy!CouHv022aQ}n4xM-`rsky9)yv%w-K<QV+q=g
z;jXn?W`ihIAU}b=0N_;rJs_UG49Cti<Ilr@O0<W&^AgSdY%T5S8AuU^AzuNED{*CI
z3?RI){MQm_zl6VL<NWdT^z_h+-`SnQqz74qQP`|~g~$QZ6*{1~n(Vj(nhg=>fC*IK
zdYnDe|M4&+#S2*JAs8(Poe!=m0Nl2@2T3Kn9wr={0T-8Z)!XUjD--afT?n8wgUfyg
z!qBRpQz|GafwTqEephv-16FImtu@lHXgCI_wEEU8f%cV#L-TgH{$ZU2RgY<$;2<PX
zyq*%y@!9;P`MdeyUk2}C;SZ?h)5g_i02s{k_IwR-cRL^30f<3}A>kR&AM5}Hr^VT|
zd<x?>U|WT}Fv;yZ@Ir+i1RpjGeE69em6%Yp7lgtlI6Vz(`hEFgSWbPka6)^ZNIhhW
zNL-7fLJ6H#yg09DGv?mJUMexvyrO9m?Ru=Ms0=~5Fk@!M#N?vf!Iwp<v2cT1D-JU~
zGy01k$)=*A@;E@JFB#{zwE<sI-ej%5Fp%4HykD)outpfVwL!yQBW7>Te94=ZGbc-w
zU+s3Z&4?%Nx!1R|jXakB*J4V`|2%bN%Ut4c{V|iIBYgD0HBk#SCTy38(fqqmc1qId
z%`?cLs`E{CbU@6y+Aiv=BKn+7pRbcWgGc8R*FWvj6^*d2^>abrWIhIgxy$K${jZ#$
zyQz(-%0YByVS6mi_kG;u;8G>7N28Ck?~5kWrFiCk3$Eg*S;xo*{cr^}MC}qOR6G4+
z!Zn9yeBVEVPPcLx>A3$Ab+q&xJE6|fH4uGBH_C9#etZmNN|6;==K*$q^tr18pmwFd
z2uuUM;xDDrnatTzjBeAFIMc4qf+*4m=0&8e3l7{k{dcGHcGm3AC2y^PO7Me!qp=}7
zbi8O4MgsJXF`FPBf>r>_WGy2g#}v#GJop*jsp<g#jwZR6`bRFO6kT-DWm(Ttwc2+m
z%BtdKbL2`+p~Z8wZvH`>B<{E5{?^@6f4CRV%d%&4p8nlMs7nr5wxH5_+rhl{*uWh^
z3er2N*AI`28g_4(8a&N&8-*vQ>}lk7CQ_x8b8;EdzqeT?P0dKeYlW|cKMH8l;X>o|
z&UOZ`GK^`rJYm{)`we&VBZ#M0yc+FBaU1iSXf)@<;}i)m%TT?);=45aPFALt%zSh_
zg&JtMa(%ZE>);Q5bIheO)~<CTtC;IRx93wqchA&twq!;&N99Qqokn43Zcc;03b{_n
z_*bTlVPE6F@>&p5n<dMa8g`&FujI|KXT<w?a#bR!n{9B6#~tJp9b&sCS|T>&24aK8
z98&b*#j}<fnp3f85A>k{^(toQ=f{Ec9lO}Ka0xNbVx~PF<gb)9&iz>y*0S!^-9qoi
z3+qmGf_*Hv<vB2IA6P~=4Z=)^4$|*X-)Ah~RQZQ!6h?h#`hAf0n$s+&+UZ8<$Nnah
zYe^JTN!V}koG^S=WSrY{DNMK7n6BZ;GBTQ#)KQnXVePoJ;+N!TAZK}et$hjA^AdHK
zb$cEPW*(9YJ;=V)8*R#o-aRg^@_(17D9_94N!YV>16sn~9U|zK^(h)>LyIoT38yT3
zbfOz)27zRLP$|;m$Po*`t8+mGA18Ms>tJR3?Q=i-UZcm>Dessb3ybO$R@W)Z-CnvM
zm^=%zPY){Re?`fwTrTuApcdmtavPTp+p)@t>Z(e39bS!Zyc|0ECe6JYHE&unMmZbC
zVi+a1egAa%$>!UGdqOoB`y^YDz8xiO5Wnh!9Ivsd%=DoT`Yyp^){hagH2&wRi^RvA
zKkaVNLmHV<;%3ZJyzQof-;^XT25b@G#Az2JPL<AZeX4M4*=av8f<3w=LpWPf#YuQ;
zp)oJ`z`%B>I7T1@q8jiFg46`P+ZXZX-x5ctf#jMV-@tXIG#D0$4}fF(ouT!w*H?dU
zZ|`P6MtwhD<v#%K1Pc-o5kWbl+^%lErW6+!uX&Vf*97yQxBmdTS^i7br$EXKXkIfa
z^qQ{HrhLPJsC6Jn{=9|o09a)w=}IzTy4$_3d4P=qNblGS@J26HoAmue_&hbMdyVmS
zfwAV)hX&Xgf`ATg0#H_5LOKbMyua<NH43n~LX7ZN5WOs9;fE~s`u-1lBt9oK`mQsQ
znO^_WAcr(LI5;rTBfRdnBmV1Q0Oo5$h=fKR5JLdtz<mI(TAqjG9|Xf#iT@GzPL6Xh
zUWWSM^pw<9RlZ0KkYh&xF)j>ffdp(rN|Qx>7c@YUiw-y;i0bVwJu?BrPSUDFTMQth
zsDr%j$#Pu;v15C}@5<Kiye|KoHoW#PApsfkK|p#QYe21)={!mY$Qu93Wxm`sb?w!T
zG4Gm^KM|B<xf20C3dp;DzK9V1o1*8i4OB0CK;LBx@|^Rx*K@CfyojHG=L|qpli(Zq
zvQ|`72;5AIE93}eL6Fx~0WZgp0;s;RaTWmG((e`NIBc}1Rr%lku5W0#JT`huhXh}F
z=9bXqI!lO<zS?<DF5Can8iL)0Jn)!wlg3I3M}W^<)Y9@(4j}j`t0FHptmHX@UBjFT
zdyU!0?#gpEWx!{8=4mi@>vv_r__8T`vW|Pm{bILqo71muxEsG2JTPB^exS1%H<r=A
z=4S`q3?Yp$ev&@4fy+4@NsdUj3P66z{`4Yj{z$HzYkJ2u8{~iV$`)6L?-zLl0B3dB
z(#(JcP)m8EVOEaP87PRO%_-|pKpTgqJ^;?HBlzroi1i&jB6pxE;ixN;91c{L0jaCI
zo62rnbeWz7i-CZJf4@7I@$Qf&y5ed|F6jjSL_pzl3}L2zLu?Tf1B%doYxwIV0jc15
zJXx*0RWAeOmQ^-W7baBz<zq=zZNqh6|Bq`K8-zs>HNx2-kc|TS7R_$+B|)rQj8~Qi
z>U9&dsS^#zG?zxP!7JjF-=n$;KPA)2qWII7JoYaQ#(L>Sf=$UWLnnQQ>bn`2l|5f9
zes28RuVu%pcJ;qRo%DTrY4e<Y!=dMD1SPuQ&Cz!pZ=xg^UHe{brgc1ca-9amoOY}t
z>Q-+CnLf*R)R3}`B@X7ji){AmpU_IQ;7(cnu6D6qXQ{4J>Gc?wvHW^wQA<c3xCj+2
zx;v|NzH5p<VSX6gS~I|iYe-$+gkLL&__i}txISBXO@*f7uohim|7QzM*!yl<>KCE4
zl+nR;jk_&pK-YQKl7OvnJumfmk2!jCCCU>etCNSDiLx>Gh$|a6dFAme0?(wwa5QUD
z?0^&r1!tBL&;s;-G<_*dq+7xeOkj4*Aq)o@U_Y&}8W_;Z@8(atDYoxC2R+?WLhs%#
zG-N{AJ63M3r!30qBBgKYjxWm#$)t6Wgp4))uF3_CvEj8c=<qwf9DGrdqEG&MzR^Lg
zE}K|GeG;ZEPQtw$B<sG_EU9~ho(S!JCqB$d4LUrOQ`b2hhN{}7z>C6)8$!vpo%@x8
zZupTfLz-<?G45P8U(i3DmlTF`3*9P|JMhPqH(aU{4(|xZ6Mrtt8vkR^igex=kISf0
z^68d_xI@yamxR~<Hw%Em<2VARy<$<)5ysEZCAOO}R8xFMg2zRMLV(>N%XPLa@{2A3
zC6tGLz%d_RZN?wn-6M2kmyG7|pX|+POKjzaTT5v%8iz&_ZAe*LMx}OQ;pCq-x&-aD
z?c38ViMi8pD{ELb!k0q=nsuYkm0_&RWoG0f63Uek?sqVzNijc^$o`bu-!ALp++B{G
zwmn>!>XqJ`lV`lq^uLA)7KRWFHGgo|1I^MUuIEOy>~V|m%|z%iCVsXE=df-aU(H3e
z{y|}rF55cNu1^)=&lfopJ4hpnZ_O>qI5Sl|xhNx{UO(inQcR*Y1Iz#P8bxDEb19N(
z?^gxVf1s~sG(6fE@S)#r_FDV~n{{)=7W9{G<&@l{==V3*9N7}@P$wH^tsS1$=QzyM
z&A7{=gNE$f+OBlZy*L@V(gW6;3mgv2!?tMV7xicB43U$se9vHYGQBu=T!YR}r6=c~
z_5u=ro>8panmm@g5LX`M<sX~gT$nQA_rBy5_JI-2N0?oW;+db`RIofi>rDsqRoEwf
zQM<F`jN6aG0<(u4?R8;q5~B1Y&#m6rBWAMdb0qpJ>k|^|4?JE$U4~6j>+Q0Ua?x@)
zZW$nhPu$jvobBuHF257JC<*I%Z%|;x%pISbac3OFGSR0eWKX{csNcy_wM`DhF1vES
z1|Ouz%Syf}1l%%cbiRUiT>N8jR%BgwHC>sXnYFNc+l0j}ud^ihHbSa8(kqKFu-4ru
zm5f<l+@{oSuSyPcUXd@qZ>ry}yyg1LOdN28ZoUwwAHRK~G=L`9f$uGD%UzYpnIBty
z4&TVu+OWCj@F%9{@=rF9@A}^5_dYLcXtv`y0=ALaTy!y&r{-DwzC7_g0V83R0jsQK
zKy)DU#!SDl6H`>-=BF%v3EAe#V{^YH_d_{jxDZi@(7n-axfj?!m8QU?ZPpp*%C9nL
zOX0L#Ai3EJ7%@wfCi4n~)HQxjchF8id)(!)DD~gQk4?`mK+ZNPaIIkIdnpN_uq*Lw
z!vZZoDS3JMnR5@4h|Sl#CP?Ulgj7a=H(;6VWikB()M|ae=phc5-I54jR&hlI+7Rf!
z?s-MiMmov@$uB$J;>t>9fKU9epR*6qX97OsVe=wJy~gN?4DM@)WC74K6tn}o0%H}|
z0N}mGW}ZH{2QY#h9Uniug1e@GEr8*~kMdAf4i5hyJv@l;0k{ZMI5U2316dLo;)eH}
zZXMudG6Y(TYwZ}xGt+Z(KTy6xF1Z`cJ_$uZ6mtdCrQ6`AvWg1x+oO4b+dt|pd%$r?
zP!Cui;+b-p-~@uBi4I;D&@XI$xm*2jPhwyA%_~U_lF`Hvb6-KE^oki=tR83jqobpc
zeP``fS8HZlbAb8Y4oLGL4@l@wNl97rZhk!a-Le&oa`E6h{*yHX=pu$df-{TnHVQ;)
zrJXVW<P{H)Bnq(CAl#j;^Z$Yx2AZ)gK)J2c;{1`}OC@8g!@$fKE4t$N-@>isd*WVL
z`zo&Xn79EO&azsLU-QMB&)4JKB^xz*q?LIscOqpC%bC&!TexD1^Zlpbuab5rgUH_V
zK1f<$RJH+rmtKd%4z-)m9$G`cc%73Udfp=~w2#x&x)sMLg3v{fK2mx8@y^@sRL9`h
zxZfYfYvEFqETvptv782YBc4Eq^ucoEj3B`fRAYvW+{#ZR0yn!(1MOOgBk=TN!S_R-
zUn64TyvlRac&&<938=Q^l4dxTneJ_R|G?TJ_%WA?wNJfm@>JZ}5&gC>KJFP)8@tTR
zKQFP<B^?<K<LUGdMnkLDl92t>;WrFr(iS}&X(}8oT1*q`cy^gY+7)YYD*ZIbm6ao!
z;BRR7<g8hrn6@j>IF~nrSz27a5<GknLXzE@pU_zB+}OS<%h=^0Gvd`jJ4PjH(V#J-
zA>7J_n*7I!4<9>IOVsplcT0)R#P(m6(C?!*uU+bn1F}A*odk<2Wb`Esl}xE48nP(1
z^Ko>xR+R4zBX;FYxx+4XD8+=LToxz4_M8xHtj|mG87v`I;=;c3|5vX$x8{WCWzRef
z_iV)rD{hSHKbG(Aj?19Y3i8!pxWKuRGod>own5^IHi2@5`=dOh>%~0rN*gqMu{T<b
z@E$xbuC7B`WA!u2?Y*WV*KN;CvK^dNsl!%h*9)uAW}rwdYbcB;(J>LyV+Xcb&<c@-
z&;@4Y<lZ|I8{0@O{4R{CTKf#3ee;X{cd7EcOTemf8L|fJZ_wQZ`#!ipt4i#ndfH_k
z>WmW{`|%E}RiBh;61SF&q$JaAr?>uCQF$ldxo?@aX+Wrxc@j~R$)DpQC$47>F0TAy
zy3vwU_NsG&^if@_%8DLz3F~ZD@|?G6Kql*S%2xeNrGiLivOY4D*vhIN<D6hxIta)o
z_p@2*rr`I+w<O3Ymb61~loGafL(`ASt`|jeTdf+Umz--5^CGM+2<O^0Cw8}gkmt@C
zDcZYU`1NQ%T-2#lP^o4Yul*WL_DBBhJb7Ut*IXR@tNM$9Xr9hSYocCA+%&d*k$^t5
zr)g|ff5&~ntv0k`^fjVZN`ga%Z0&Y8kGY)nuXnc+ViEptw{wRsk*(pnW6bHI0$R!M
zDHke!N)Bhs`Wb9^YjNc{eN?QVWL(V%2%wO2S(W5D)JQO$8D!mqg(+Z};~DCH;S;&N
zN#pP>4!<y`!&AJT`+_xeDeC2d`-T{knegy?OC4Mg-oe7Ak@RKzw=K~>K?M1k;@Iti
zM=i$o|Bt=DimLK^!-mmCw{*84A>G|6-O`P8cXvvNv~+ihDAGtMASvCAbV%2p>-T@(
zZ+v4P@56oIV8~#xSo4|lo_Ab<qh9Dj96G=*`1z4WT0k{ea0)0)rB15NN@nK`?=SEu
zF*#elSv6<2Qpc}+H<8mH@nF{bgv_)<u4?rSr`AD-O<&nj4^H_D(oQfzei5e&%KDC-
z`IVnXcXU@}e5NN%=Ma_-t$fY}%FMJWb4J7+L!s?u$;;jKIoq(`)3mU=Fmd9VuUrod
zYuqSXv{k@tzoHHnm}*KkbiSbXL9u0$XlVUpdHOQm3%TVF)^%<Is{KQPcHbZUhctDM
zzL%rXD1US;YG~gqM|U;m*tl#Z(x=42?u>ovms<UflJ95{PmP0uTN~r|YF&X=;T2)U
z0Yi1Ynf_WguW`3Rl`<NM^FwZkn7Fc$=5cGQuwf6)iSLKIpWI}d4ZJOh3-Th5$HcUg
zuCE`bwWMDoDvzmVPCsOdTd`CrrL4V#{WATn2cD6f=8u*#rw>J48Wn$S-nSDxkGbAe
zwKwsar)hGH4%tMzI5l;>U#3d_nBY28(2)mGCLYtz2`5B6OQ+VJ^68k_pL7n<4Uu?K
zu2-RwWF`#7Q%`j}jIN0e8R*ML%vv<}n3zaa2SJ#~dh!Tp>t@Wg;Va*aNvlqB`y1G+
zWOGGZkdk(|3P+pT%4Auo8*%vDBfB<87Gi;tZz$mi;}r>fM_E}^Qot6?T+46hRm)B^
zvdrw|?d=ZwlI~~ygu4T6{6zZpuV26B2eTyTWC!ANQ2TZ6n=1{lFGQlkLOum9t61;*
zfrNEYRaG3gj9tMGx&6T4b9sKezSw39Zr!_fUhT{s3p{p*-aumkJ6x@{JI~oX-p5OZ
zAw~7|NvZ)GxHTqSd7xYSp*%n6llO{mG&q8v{Vw;zU;IPgdZ!Ti0H$!iST;dTXiqjs
zO@uzVcraadxW5nkv(#27G3DNfv47!<y})*==3J8%T9U{Wk&%(nzst+*GR-)T{zEuP
zCrHgp`Y+&)XdI+K>kh{hl^4u?w>^}W<2pc`<FgTa^Z2yny;aVL(d@D=S*DtwXr;%W
zn8KuPwa5!Zinz~w$hubZ4W8?}o&-U@Ku4eB0?VXRV|sse^l+ZhRZ?6W?jt-}XVeDl
zV#CX(CnrCmsr;Fl0p13Z(2hhpv))IckgDoxz0Lpu(0rwor;Va`U|WUur{A-6LbdWP
z!#<izLTF>TqWecsSfhxMjIVLGd{iP8+Vfjo3Yg~CHXX6;sgae&w$|_QQ>{fc`F!=?
zymLY4`oqEgd9A+)c*g2|by^N3oV2|ux0E2SMmVm-62vTnBXZ6fjy5nBWB)!<o1D|&
z(L)_5DxaCQpV~`!kslr9!;A43*;=bEe5f$vbL=*^o?_uEHv2M`=f`<_Bkl{X7k)i_
z)*RZ3I_f;<ZP6a<SoNghl+yFVD20y5eCotb<|P5@wRl{ghO#EO?_~FXR3^&q+eE-P
zE?nsLwvXc=_cWDl66<nxo(jFwD1RsJ?)A(y?zSc(YVg_A3m@$r<q!oIERqcpt1hCI
zm1#Q%7pX$vCx>Bp3C`7OHSVfL#3Fp|-@UWk?<ep^GI2xou*b7jh7r|}^lXbuJ0lgx
zIvsu%7(E@V+hUMitUvRvVbIi%o3iV&do7;TjPO9v<s!r2b@Fv@jC{(+ZdXQpxm#Z>
zL7E9hpY`_P*oUaq<ABRn>!{jAX7u`2*sU{5&*yeT(Qjm_-mb&#`k(AG8MJCF40t~C
zC%3e_r)OXLyKYaw8B}TQ%&=``-XXlIzowuaQBVoDYtN5=z0W@^_;pQGx=@$H`xVm;
zfsS6osVfd;<bJ!SR{47}1K!dFZ?>#97sJ=9v$I+nCYoDz<TW&85iTo&s8Llm*<V^L
zr}B=_*xSBSR2{g_&W9~ttLLZ=5!Mt1^q>@H>f6rdNpcvBS+|S|MZLuPvsSC7onzL~
zbsCP5m2Y^Z)cG(Qb+^owbS*JX=7ZQ2x-)X3?VE4^^S*=e<oa-z%3amGyPWet_*q~m
zGAi_%Eo;oGiw2Tv6;%H#Th?gh<af$$>!B%gj!*`I`)=Yv1@3fJ;^&+00_y^cOBAx~
z=iZRV>Z+PJG}X4u^{W)~uvGBbLKoT{g;|_QcTY%bWm;U*di{U?%DHY@O@4mhp*azH
ziL}Egj$dA_SXg%YX>vsPQ>}00)M$|0vB2zap<SQ}GgFht=e@;;3Jjs-X=f30wVUel
zJUu-P{r;BT1c(At5WR;WF)$^<RpAe1s{0n!)YORK`EHRHI<=v^8;_PRpZage4e*bL
z%WWB8x;%d1So)_xDRgE=2Mi$wG5D6c%apU@g1lcQuK{hdcff%)06=g0JdM1L7W#on
z8w}ewfRr=_sW5URs=uS~!|9KQLu;|k2|3DbXqNu{ppJ$lVRCx9pRnsj20C3zu=O<o
z(6|u81_q%znWs6T+mFAt8YJR7{IA`hjQm+6Zy4jGvWL(GBt)k3=dRRn`Mbv;fQWmF
zySnm%dOuS5tig1X{&`|DxkatksU^Aew#wimcz*`EfUqt=Gt(XX!}L1=10wc;@s*WP
zBk$#Yo>@b<ZeR$<z{Io(a0pyrWJcCpbIZ7WtH&NHZswfru=b7{7_Z)32S0m5p!E~+
zgCSYbdZH>%+-i9U%qtls3HNS1gBi1gcY2@s25_5=4HZ5Ti2YfjR5rN($vK(MNG2ZG
zOP2V<0ybY+u`&I!hglrum{Jb+A#-HF{%PBgZ{i<T7o~BEMZ;p@&iZLx@vjEDfoW=%
zwd8MJalbEmL}x2r28(5`%3Z9Ozny7RIr+)mx*Jq;(2BSCZBD?2`$F?Dz_(!vR}#Iv
z<O?e>pZd1-QIUNBI!kkb+TsC(g^yBCk}mT=BDqjSMa54HL?=B0FcjNye%f({Rx|To
zohcI%5<EOS#wR8uK!}&v45Gy`1OB-GuCH<5_ah>382hY8E_H7}o14Hjf#K$!uj^L1
z6HLm^m)ytjQ@s|z)%DiU6#)u+fV$JT4YMryY$iZGG@;$chfT338Za^wKqDYCOj`30
z{0&-`$lV-+WZ_nNH;+M}qkO&WO$lDvIuKlnH$VjHMlQ>Om$@qLCmq+&M}G!@2FVwr
zLP@|3j%*NW40s5xrg7`@a7KknsZq;cLoW=UOW6WrFAruft`?m?yKc%tyOsZSuEB;^
z5jtuPzXRrk>kV(~Hls+LO#9>T5#WaHYqNMANm?8)fGw!&Z=P8f0AO(eH7{t6hRuAk
zukYY}>t@f5n3(LwPRz~L@ek+gk{cTIlhA{H_)khKN%Z2mM*2(1NGuDg@)y(*Sdz%p
z5t&U2#)m>A32tDn$7wNu4!lMM%bMP<rLleoRp9NOoY)Y6gjO0#eLeVIGGz-6iQB8H
zJimQLFgQen{?))95p$d52#hr68_eK9+jpU04N2VBSHR?MGNXa(3t9q)P3?TU?|+`6
z^tm25C%$9;6+f#bW)~NSRfF#|A*}7Ek}E;?jth&6dTm}TXoRdI32bMhLMP~?z&w>O
zbxr|TvjfDN!Lh&fXin_Rr(xE%cqra^U_f?M@SrB_WUkfS+G#1)9ODc)n6-n#69Rpj
ztuRX<!_dQeP*d3EByK7N_L+WQ#0($)W42-xa|j(j9%|2sDfYmgEFS@lEon?|Kg!c0
zNB;n|t;rg6PKtogw93~2KQA6CPL*+sn;<K2fA|^gDbGkOadV4P25NQ$oZ3<h#BQfZ
zRl$2d?7!`TUHhoW%2**q|Gx$PZ<Vo%{tcKlYgW|PTR+}iMyHBkCCEbO1W@oGIL2FW
zls7<%58Aqh_Vb~6kD-gzO1p3S)q*|L9oGDKsTJC(fDR5eDW8c#fQcIn)EVyS(&RY}
z6!3{w!|9Edzi@U&a}CCAob?FCMDLlQF0Kc&^}C<!>lkffo_|n2b&EZ1ik*MXe~bsw
zamDX|1p+*kAZ+dkh#3kb6<@z*fbet&o@BEv?wHuKu*vPvTgW#sYlm*UwY1LfU<j=H
z)rtK1jPkh$Om>yLz4@R}b!fT;HJb&I>@|rGGCXYST0a;gkAWt_--Clq;Boi6V~{A^
zaZ$S1c=`JIvFq8}iTyO1GAI*7Oio~SXO{6wD1ltaYkYChfP0gkA_*G*pyz^wloYxr
z^beNqhBOqmMY}*-#ZbJGXt>Gk;>tVt+Iu8lEFYYs6eYUW=coHy01oShy4?Y*%iLjf
zzl~UWU0^yDD;)+MZA4S}#n^D`d$cD{5IJTw92Q)5e}L;p`#&e)zX!#T>&isrT~V3>
z?FPYN3Wpqg&k26IWhe!BF^~;&0bf60&iNDzjy443biK-a%YBs3H*z3QM`^@?MTKIi
zN5!54>b~#4hCr_`)~*LPr>;k@?`>_3%<qA{E<f4mBRp*AQhQt7`>|c7y;?EwOOHG6
zoxqN7UkWe=F1^rP7~rwodJqUtLElI$!;wRccX2JF>){np=q#_TP55Fzd!AMw5DB_r
z{~-~ct|^J|9B$9Ip&q0V=uC#8G)A^K;OcX}6|no}=Hc1i!Dk>^6UK?%Y9jCaSIRcA
zv^28R>fQ=AkJ}0Kd;(iS==%Z~Kst2t0xD-t#|3UEG#1X7>ZQ=?{NECd$|Y}Ry`~6o
z@ao!*1&4-*eZdMNvUmgUz^|)8LeQ-Yh}Lv?sMyHx-@sDvgRYfsho7g@={eY-FfQ{}
z8I3j*FL|9;a?rTJuM3M$0Eg}z7?X#A6<xVsxh_h52}+7s)qt>;7G5xt1M3b}fgQLY
zKi%d(XMq(?6xbo^eAs4#TbUyR**Dw^_=fXA2n?H4`P_k2hgd#G6xe!{Apn=hnem(U
z|9uF}AL11e<tr@}B|5;la#(D^7;XaAjAp>i3>>cJwT|G&+2G$rGVguB?(+pvzOuTy
z7}y|FVL^`;K+T_UfW2Dus<G))cJ?hWSsn%^X-JQbpl#NAuC>N<7<@ZyvU9VUWIdQj
z&dta7*=hOvQo%eN0xS&lk8d@-YjK3m>VU#q7Kepw&wY3g71Z+C*3l7a>I`kURaLzP
zev!Ih%L89%h?b6y+aS5Onu&=C`;q6t%-L3o>N&_^CRbKeq@>_Lq%5hfeh<#gR+tc|
z#&$t3aSgb{nvdm)C}r~|q^A?ab9i`p?QlvjEG*oZJRbyaH`#TQ)^FXfK8vz-J#hZ@
z5F||fRi0`NE-7K)NZSsYeE=LtI_n8+Ww#=5Req*5X(%*;u^KdCc><PhQws|U;KCh<
zCk{CAU$p=iZ+MtJbcommOeAln@}K$uOa~?U2iQJMp#2GToiykKCuFc4gb^sL66~Fw
zv3yl95&@Q#t?}8=@g3KgaLUfkPMD(?fZKq!<kHf8)8bK%`{+7oPBQrf763C|!p>6!
z$-?o1M;&l${V%A3`>XFptqVlV-)DoACQUtcgHoV#+tmd$6V_dP=FV`zW!<P7FHRM*
zdP2#aDE&IEx2eW#FvG=&aL~2E(f`k@M4VV@r-IAmA(K+D8&7_}`x?((tMyt&m|}4E
z0^g{`Loh+3YVR&~aFZ4X1@v-5#Z7-&DbeD9K{NPYP#2{Idfma!6{ZFHUk|_WUBlVm
zR{2m2QyVkLS$%{PIil1Uv^-W7%cuTbUEv_ZyY+zHo{`t%T#GVjYTtEQa=r-;4({<%
zDAj1)b~PfL^=xFeA3qmJ<YH*p=2xkUpL2;?<ki#zfp+n$vo*K|QtrsOh!4S6>6a1)
z28+^TL&?;+VLTWOrErqn*``4kXn($6(EE)_>y(BmbYmXEU#!toxU=T$wc^MLT$B^4
zWduU*y#3i6VEz25IKKsyW0{9w)OSz5Vkeh}8DtJKI0^3`2Cx6$8WT-oPrS5U^y(4c
z`f+yu*b39}p7=Ev&1yHykQ@ybay-!uf2J?dzWa|ruWyJu(7G%0R&AKyMWCrC>*1M=
z7ZXHoeo~r7$F@CwYPeNRJw3hC1^ENVl1sS$>!gP<^e@Iyeu2#}7`6|D&~)VgJlL2Y
z_Dcg0?Elwyps5N2&i~i9`ci3Nu>PMH{r7Suf&c#x{r`FiICwH+^I$k==Xw912a*ZV
zXN=UaE83Mp=0LC-V{uC)&dPa@`B6iB{Z2#Q{4OQ#^@GusGY2t4Dc5e2u>O@YwX6)2
z%f*vPWzS&B|DJM6JjVFysw!tz&Ymx}q!P6P<Hi|3FzOZ*9M`vq_kg2hSY#ke8gwO#
z23d)oDvdfSyA}x{Q~h#jd0V!n96b@CCAmtCY0T@C*}9rSzC7BM*`&llT54$}^x@v)
z{gU1{R;n~a1;rh3xC(x|FQ}zmMf1HMq&d$V5u9GjsCr}mJ$8~WrSp3c`M;}w2(f_~
zt!q_m;^JjQrevxHe_}WXV*Xfk`pT`8hC<<&0Z$`v<`XvyQc3WT8Lt*xRs~Xj;h8e>
z=F^PkYnU1~zYfSBvo3}Wa=9So_>G*a+2N@lr|rhRxNF))7-hmnque&<r;`cuh8q?A
zTd6K$zDPj^`&^3*6_K<V`|%vfr`q+W3cM(s-_QIcM?o7_kr*THB$S9R?)Zu9y2yS%
z37&Q!TztG1$kjvYkGINeWNsdNJ8oeFBie=e&`I;!#gF~qR!uCULl)nv+myYoCzYyM
zz$Krk-hIFD@avDj6BmUq%+VI)_}dp|^PLhG8=oDUSH-qDCE|{7WybV=1j=2Byjqbh
zoimBPKc1kV{L#U;>T^;Yeg9;C`j1m@Ns0n)Mc~|YKa^rWuzgadiTwST{<|lISs3xc
z314qES>Guau3U6HRPECj>~@E<A0`gNQ|2xGSV>uGJtypw4SFAuUpyLHGe`xluCI8<
z8JP@x7gN1@l(!eDvi<~jeVyX(+m~#%c4h$w4-3O=KlcuSI)dM39__83URI8UESQ9a
zL!+s*U@l_!uF&}S2bFS;fr8ZDmLs#SVb>fpXK}d;^62n`@BE2edzTN}6dzL%Yr?GG
zfG3ELF1aI=7G|kGF=0U%SfcT5t}1_j2wC)g9M%KrbKnb|*HhIqO*KUcA83^Q*Zg#R
z8e{8>-}@-+<|R3%bz2Mk(TY^$+WZ8!NDt}Ed!e;!jKi8b`zN8&a$Yu$kCKQAD@n5u
zk0-QyZr3-*>MMSHd;ay{RI~Shd}*#LX4uUG@0T=e*ry3cTnnXR;j}K3ch@`?BQfOm
z=Up_<UmnIhuW0}7$N6H&!G(FY)K=EUVWE?41>wC2XBGBN&5)te@^V?_3O!G<U2Kti
zbP=8zyZm>3I$Gx)mlw=D{1OLM^R0H0Bl$ur+w;xO=O+!Xf}h5eNML-it5<uOdHgo+
zXbkQ$i{118VgVBN7kI<W4{T~Kgz47V|5*p|6)?YdVSw)`xaN|<6MBpJC4nVHI(uWD
z?H4!y5FH+CvDW(G!!3X2rVn@AvMRb~C*@z=p||^+{3cKT6kOx2FuvryKO$8TRB=AZ
z+mL`S<rt8YkvS@WY}k|;jo2ImysP(z{bLFa<c&MnJ(q~4s_*K*7i~P6@>QlnWzF#B
z7%6pn-|73N@HzL*TX~tXGqkVCRn*59;J<W`d+~jp>zRyyl55;jYk%{`Wo~w5^y%u#
zFcK+$jBX8+y|%N{a?tO|)umy!)qJ-wK&tj4S@f;l?QV_vHMNuo6QR5A2?7*k(egD#
zzLe-#zXI>(OIcNW&NyvQ9<E$s=wLI;1P^te&#yd#Eix`a1xAn&uo7u#Ye$al{_r#c
zC^67t0<9>}0gvivJv=&km8JAz9&N0}5qN(OxU0*)qDY#WoqfxmKfg`E5o3yYU2<@6
zKr=BP>eeyK;y5>LdvQTH^hPl9s{x|Sc-)j}*Keem(ZV`9H~Hbo<>iD;e%*8#ZW5Mx
z3>%QU6c@vVfEs~TgFhbtR2CN%p(zV9cqc^1>qF7BWox_-UhB_Kt?^=A2S1#N$%?Zm
zR^%*X2q|^HHqc(=6^F3*<v+h&NV%WKuCty;dO{7}TbsRYH+%BB8YK@{Xc6%CJIJ*5
z6yA1bs^yuUo|Y0l?fhIE{e1txYrHhBBB$#2Fe%O%trqwtKZRhnX(xgJaN@`yigYOm
z5{pfoY(CJ4%}f9m2DF=_MP7ge3zWSG2i@Qsc6N~8_RdGUmAhXrOq@NFm8?j(&S#{!
zH+KJR+UdD(c#s^(QZ{o96d)RZOsr|aF>fS#d^BeA;3gsWB=Ej6gaCtdf&DSZr8`2M
zpeG-jg41SuQynR;e4|GJ*5EPv%DM{C%zg%N?*H!Zzy4;zJ^;XQNV(SoLqwZK^f)NZ
zGaz5yxtd5!e37aFVhDI*1H#Wgl>N14Td+MmJ-116Kd%bS_)_3XC0|VZX&W3i8ROyM
zVJMX^)$s84CTt%oU{E}vOI-;kyc{dWC(pI@i<$HqF?TSrtzw}~`L)!DlOT(d&TyI<
zT%Q%3Fw}*~$izfLb~XC-c2Gk`20=6*(_*Axv*6F*nfSaX$E4?_sa3uD01+sYP$DHE
zt)3;RU_ydA+_ZPF++WQWm~B}^URtw+UFgDI6&JykF->nG%dIa|WeYwFs1~E!XJ_@3
zeCp?)+VUGu2A|3CNrLnP-?P#+pOYmT1<zOPIMO0!WaO;6*So*Mw_{C=o0{~6<yq9f
z{nOMns6lZ5+sR2eIFIOlVve@POJ(IBF+n<ul~eyqGA^Z4dUx2NF#;5L*wl;+xRpTp
z7Wj)~@C*SQIFzQMaTe0$e<maP@2_fajs;cLR@ke-Mz<cxy1PF5m<YwG<k!!gR7B`I
zEoU}jd|MCJqq%S=p=5)^SMH_n;tR{l=-wi{^r!sTRnwoB7JbLswW=y3(_gZ5o${Ak
z%=Zc5$zo6iRw`%Ku%Nd^7ZiJi)AVji2U`Kq;|Nl3YisKs4vsUU5O6K0E~)W}r8qWR
zhF-YsU8FM}Ce5-Jaz_N9E|OqDIO0M|8&X-{Y@YknPpx+b*G#prAm8H$p^+6g92E{@
zkaHp|TonfdHMh87HXs3J>7|WEA>It_!q-8&`#NK%wr6cWJ|?@bO2w`85xjPWpNR0+
z?4(SzDODX8#}2aFZts(r3!BVuqP{0zwMsF{nd^Nbd}@yNr|`!eHtC7aQfzp53KJRd
zG{iYG&hxWnGVs1UYrNQ{)WWiSEiktk%)BUTk5<(qR*)d_w>y5E%)xU@Ohu{eU_(B!
z#8*TE!y8jutqERSui$Z|3(hv2b5nEJMffWG(0gZ<iNN2ny-Z9e`hKNl?t8-#)c}iB
z<->pop;Sz&ze}#ULL>76>KYRgk7R(-{g7J!)v{C;FE2Jv5|-z;*O4ZXjj6dp=y-8h
zX$%f{1iWSrR)v2;aRkz>{IBB%K6qimOOtK-6%M><ekPCm<v~V{?r6a7|6<tgAf|jO
zHTuU~I*)C-b<K6*rnbt7OD6L{CqwD!y~FD;iH9HX;xE$&tP`37^08<@Uri9z5?3to
zft~<l;pWJwkye#2$=mT;bi)0^RMo>_l|xE9JPW>j&HLw*$Y&;Qi#FWEiGVrq?pw(n
z&0ihgis)47$?X{=6Pio?7=f`CbK-=v*3FeJ!S+{@|Jbx0_E&q8s-Aka3E2jkUObb0
zVd-yc^J~3KPH5)+qQ1z!?2pBA&aSVcqr$B}obBAv`|Rt7ODW(ix?Zp}lSnGYklj1_
zBzU18OQDst6@}#5%(?PZ&w`ksQ3_L<^b<bo&GH9c6kixDwAty32O<1Yb{nnz(|Xm~
z?xuDlmy+u=uVXxYMu+XefdO^ITJA_qb={%pG?b8URW9h%ii_=yo7y<P@T@vH-|>tN
zv^Xq)$>-`xu)I-2Y~)m;mBeD$i1+&+%1j8Nlr2{4fW`33cXN%*@j%fYZLz7L=H;vm
z!U7TV(WS2N_nCMiHqfI21`@?z%4%xjo0>RLLZB7Tcf0}~T^SOp)SVcnKuyjegP`HC
z!@vrS%|(@B=EzAmMWGzAM=Q!B@;C3C?0T;Bh!%gN69|q5AO<fCHamK@w|WXWI~|!n
zjSQXT?NbHi|1KVDm-1y$@>C|*w;D3UGlF3a4N=}~rZZkT<vkH;%_sgHU5P$EAq(S`
z9MxR3{UL&Xhyi0^upnH5F$#0>ePv%0UQ6k()g1+5LoN>bHwoUsd=3HK@v|NIy+aR3
ztbX0DAS08sFe@C-Y-g5)gFl3b`JH)r9k2XoTF^H18R$<%7@D7Li{4#%U{PKtuzT<v
z$>*Rf#T+*JNF>kL5Xc~uGVdON=-<Cy+$@X1MUnceb1>d%1eaPC=Y78q0C~Yc0mIwd
z+y7O09G;BZR)U!@Npgq{sgmTiFj(!|Us>4D`p`n+H{nWPYMc2^TZPMCRDPyc5ky$;
zz;0HxcwzkpQxSXq?ZAxmr@YQBvur2PT6`g|Yix9B&dY}14a8EYGPF2_)vMHbWI+%+
zSL7y`5230Ui1kx#+<q~|S(91kZ{fzYv+@w#LX><Jd}rD0#rq`8x^pv+NAyEQ4AEI}
zyD5_NG(#fUa3LIfFq-O!+=3}EMI7%=&x*hF*26_SU_0IaPH0a3UBqiBr*VyYE_bRY
z>M$H#>iG+CnV`5lmCkZeH$BoSKBGqfWy~{9X_pDEP9To*jt<7k76cW{31Zf3=Kc8K
zoi=_!n_6(Ue5=cDQ;~;AgMLWryMqn#v|86upUlHlUfn8oILm<Y@=~oH<oL5s_qENK
z`26g7zt;<zdsXh;cr@ta4oK>$f9Vi;-`uL92DDs-(aK%)1n|$HSDl7S2!2zgs_i(d
zNo?UHNl7|~rGoRnV|wK|l0+~x{N!RJ;)9OGbLBerB648(ocxN+v0eO4B_oK*2|8JB
zyy3@>c>43Yud`t`NdJ*9w6r5?I-dF6D}H;E;Kf45r2La`BO$l`?66I>p~j2n{Q|*f
zw~4b-6D*FJ5J%tZt8^ZUFMkwX9+kxoTIeneSl_qfD4I+9+z*YzC`#=F(4tTkNvCRn
z#O&iQIP!a1$V(7_hBTzOPZisY%DfA<AHT(4+_A5~HO$<7XmV;w`UyvAQa0Mr^fkq<
zc3c6|PL*_)ku<V+3YSt&&3GPb(Yz0xsFd1@*w%u~8TAw+8OE(SU!Fch<AZ|4+ww@O
z{)(8_L!d8GmuNl&yG=@Vs5Xy|#WBr@kY0P1)$3RX;gR5DXhr{ER+nVA#3_!a?<-T+
z&YS{6Of__nRUIEWx)p|gm#Ac(1_S$qC7Pe0L*nQkzHl{g=i_jX8Piv7Q)5dKU5fQ%
zl4S9!kW1t)je>u4xQ}zd)HZw8yM0Jc%;d-Fmk09Qx*Mv&QGLnRxu5pth4vZ5ZG}E}
zq2oeeQa>KiC=52$ge6M;-=k5VaoC;s2Fa<3&Zd(t^R4wX=~o_uG`n`Bh_|nZs5m~}
zdYhU^MF09X=egH>Ww-U|Y=yx$wVF<uIQF4?JLW`lv+IQ7vfYpFBMj#o8;s@LPx;HZ
zMNLjZOgcyaeMr$%6G3D>|K2vJPoLT%giNOWY==9YLKhYdM21qa&$H@Ls9~0M*_-Mm
zzezWLj3V&<DcBVEr3ta*6!%D`V|27-LpB$mM{?Ro^DHQq_pE^FI!Za*Kq<h3cOZ4t
zGk#;n(=a~Yr_R9a=nMCSNlIj`Wa?Yp@E<>Y{xF7m`1BT8hEPwRT?Hjq1xAOq)4P}d
zb%He0Uuwb@{1mC@CHrFKj%Td)a|k`ciRjrlPQiKrA`tNxp7Z$*shz^QYKhywc-qF%
zA2TJI3sQuqpq%K+-F&Vv+e+i0Rr}(w%bd(=5W<K9N%ks7G4CdN@3F8Zs4*KL$+(Rm
z9)}TD7lOjf$nXNUw4sTxuKiJ*F_KHdoBos1ZhqnD#C%9Ezt#sWpH~g-9HMcwa_b3b
zyN@EIW@6oI9jRzux~Lw!Q?gc3GwQmctxu|Ck>KMd1r44F@+*IIH=0y#Dz+EGHW7G?
z4JztX8kmqCn^XTij5Fy)$F3a#)11s6!;tF$k>(T!-7+$Kyzo3Z_a@imV&$u|zDeWT
zY6naSkx?coD}OnPF!q%)biCU|t;&7-pTZT4+RgB(t`Pcqqgve`|A-j+xAtUgo3=T`
zTYvC5*&H2pO`binHxNJk`S2T(MdnB$M_77#%TiBYdiabBlp198%{|{ad|=cnQO7Ii
z0-sGb{Q(j)KsKtM4!84*Pec3VW^S)y;C;sLeYH6oHZqMZ@|D#zKGdI~#6#ohqX^>6
z`P&$l32z+}M@|LP$3*S8TIoMck__)&4A`k>tuUlmS{(kJ`@M0(y59BGC~{L(&7@Pe
zE+r+TiMiQoH*wp%naRpg=I{UF+yqVbigT#;)?bzEVlK$XG?pY&tA%#_coV6z*{Y<8
z8EQ|gi*>7Dyf)t~h3jEKA+O_*NVgQMnh<#=cqZxzjKD?eM#@vlh0o=L5P{TIR`N9B
z{C9omAKD+^#uqCg#!zHB(!6S*>kFl7K@SmS+}i(#Hw;NWa&K;6ky+41*v?Dk<kwdn
zq1u~IgqK2);H3X}HYd}O^8yVmQX=KZKR@{KR2KPcCZU&v`@Nun*F?zB!Cqml!+xB^
zBCe+Kq0O$<C4<AFq`k9{YMFiV%%>JXh9s>I<Bt`oUr*kcB*bo8h1k?k3mW)tdVZ%#
ze5UWSd4s|Xk}SEjLn4@dJ1Tp^Nx3r-9o`!=LSov^k|tF$1<b_*Yi@@)B=NGzcO&WV
ziTLwMaHCx?$gXWG3?NVJ{3tf7=+453{>Jd+rXx&0s?CD-E=*A8&mCxvQUv`2-k)XI
z#=q8Gj;c)Y^NctXTzYM5LP@VsX-K#76T@Gsv=HNMu)NZ%d5I23MZzsE2CIggSOIzb
zERLqAPwj$g?p^;deyk8t5%40?<Khhs$Nx%%+2F~=%!AuZ>l&gEHAblc%X$>4gkHeX
z3Q;C1RZGXTnrGPd%gv$l%i~Oi6>@G6Kl)6k9kOFn+AM4DzlWn6hk`=zzLH4{uFulX
zRN;U38(l7YbQ$7<P$w?a?<k_8EL#QQZ=d4fMA6a%5kIB>IG!~fR8-@E(^n1Kri@-a
ziCl3sR{hneociYTOLvI^?7Xt=xjfU&DK|RVEgl2ATwCT$TF=??k1H$2WP=j+%G%n~
zrwP{c?z`Q;3y3x_&0i07QB;P_(y>A~CsW;n+0Iz^1D$#IIJ8iQLc1_XXqSnaODEE2
z$@URMINwC4&)UloHz2-1rhk$B@Mu4YK!|q}n6;@M-jbFRMITPxw1~FC`OE8-tEHxb
znO-D}+W*qLb4z(1S83vg&w|V2Uew$yk-7D~7tiDxA><){v>Q$XY;+MtoXI62C4HFD
zcUV(zWTzY$2Alcy84+3%QmIJk>pQMU8g<c2@mN?`QtCrhl~mk5>v6uWXD#2>WFTyp
zhKp2yQz#51p=0<VtMm?aF==+~#e>+@v9L;AmCD1@?o#R1zKFl17Y+_0j-;g~aXOJD
zHXbc|!P@y6dj%CH@+Rl{E+)K35d*sfyhKla#U9+oMS&}p6$(uO3UX#cByDc#^_TB0
z{$svR-=@=6+ETPWs_fqH$Ou-}wf*V%Ix2D<Fm_8ZtSwQhN2YeJjE@wyY+X_S4-H_*
z)Ur&tTfHhOXi?G5Pc(b{-#FNP=b8w#h&_0*m0k#<g6@c>wMG4LX&pY@zAQ>BwXG7K
zxpStZUv{T^Q_RP|#~ge2oT05)NQOyHo6GJ)NVX925A_v$ilIjuM6PP`)|qlnnUcu=
zEi{&7ypC+R0YY-s+A6to=<AX|HP+@1n1g&IZR1S1EeO|u8Uf2LY1b{PW3J;|dP2R(
zi|U+TDDI-&_XHGj{M@KsT8Fr;GdlXuEfiDBE)I(ovAFU|dD1mS8{NlCx<z5#73tVi
z_tFe_FSCCaeGwgRd)Y_*>L(eW4}EN2u~aEn|J+wOcS!~p@;Kk`%m%+s;i%!Mp!XI(
zJh`8dX~_!H3w_jaiB5aSwD+hgT8&%S{Q9+*&?|ETUgzfak+#}{82vPlzwZ)fcbiNn
z)oo}n;{0pJNxisR9zC2EM^{yZ5b}*OQm<Q?1kO`3I1l=8F}Lax1kzz|d5=g7)sP2W
ze{Q=W`MvGyu@p=FWfA<*l<QL&EiO_f<HOgs9#(pIpTkdDDs>FhFKnw$^P$hjJYG|`
zq}FSuUkA5JD4v8X`W5~#=0B$+z5%?Qm?O<=k*(@pI^@WsuP&JraQQLIM#}m57Oru6
z7JdP&2(7&GUlD`r_2F4ayX)X{Bbx-+x8ZGk@IM)25=;s-&`2~$?~3pCBqAKlWSNTS
zWUiDKr4ojRg8mmLZ|Dd79~`XIv;7)02a(o$XC35W6g~a}w6zlcL|^jhyD56g9IUXB
z0(dAK&}3rjY|yQ)6P98-JQ<Ad?%yALe>|7PtkN~Te3bEK46>Q!O%2mR6l^L8D^p74
zj5tC+(?y=0Z)@t&K(!cw5gY0esXmdx;$BI&d?ad_H+W+`mFlUDr8*GZE*sredR--y
zcC(g65&WW)zHHVXk07|-XmEAmX=rH(p}8y!y99Js`Nwzl{|Wd<9(4ZME$z44N@^Ue
znB&d*yF!*I&rCMqrV#3Q<XBIoZU>i<EW+SvGxN7!v|NpdTQu*#Rj$!w5Z*r1-!kaq
zvfkWQMS76{iQ!l~SbOf|asO;$@GM8!<>K&N@4hOx%s+R)w0XXI<-ISJ+<0Ymd7}pj
zhD440fl+H+AVz|7_hE3<U&@H@8HH9Dsrl&cVz68)Azcn|^|(Vjl5VfLJ7J9xai*w5
zVfLR%YO4Lzv9N3dKQ}xb2P{N7rtfpjQKJbtSQwx#&N29J?HZAudYrmfq*No<ZAvMB
zt0Z__L2Qw$iWz&RUm6ec8NY^E-t{25!;nxDCpvW`wK5=bOj)`1_x>5vCgIUMMUhJr
zG7xu+O+`~Iao>h|=#*~OJ+s(Rg6y_n@gfmVJQ0KREAi9!H-z<iirZOPOCuXJ<s5ad
zJKr9hOo|Q>Ng|!<!h0%*Un#qAv8dTR;4GDBHPG?4WmhOVS}RPUm#K=zHwgI%zL84u
z-xH56JH6|yy0aDLra(-Pn7TMNLseEMp^WSu{Gk!&oxQRX`}Snpq%0uv%0&p9Y2tul
zirjw5>YvH8MLGwOSkjqDJgXT(Q#5OJ%5JfkmES(MxlR}jksgG0Z|zmek9b~L`{tQF
zi6J{ppCqhO%lMuFv+4`hMH<pq(8?nk7EJ~nxsrdzCX3NmY~uU4{6d$-3ia=QC?Wg}
zCwZ^%CE${;LPb>d^O2e&^F`ibSTNzo^R{Ui!&DGC+>$E^P`t}QrhgD>4u<`RTe!hq
zu?q35hj)i13!)zJp=>`7UR$O&?U7;sYomKlE<fG#aV+#|Em%il(O>P5)*3b8RjFw=
zD&ye?R9Gs8RG#xZqcW^!vSR^xnCKO<e#G)xnCsf-LKe~YZ@At``EGZv(0T+1-gmC-
z=IF!E&v}?tv~sjjo;Rl|&%Ic;#cVSf{W_sq*=x;){+US|(<LO|SqV#<T+4Fdteum;
z*=hIwzos>hhx4@4KnO7v#Qf?vGRx|X<vFp#!P<=zL)dvcS9Jq3yQ~?L{QUe!c%-^u
zZ#X$0QM4T7UGjU;960uc5lH9Bbv74}7^zp}L+$f&a2V{d5jiCB^d@)Cyx;5Gb9H3D
zxw)}XT35|p(H^0Cm>`FoafIMDRjQ02)A6zOVQS|Ga(ALUIjSSq#5}Sf*AWMd4cbs1
z9VDlvwY@P*)=F`y#Y+E(QvbqhVAf`gQW#Pc28sD-MpYCMusX;|u(<rYoitDdmP!%;
z;^(I*{w-hhB%rjw{m_m=6Im1v4l&+{Y<#xZ@i~k#?-oZIL=4;9I^Mi1o1n8Hn|mLg
z$MP_OdD_;fVWbl>Lo)rDLpe0D_xZuu<|)Gb?iafgwWa3ifV0^q%_oO)Nn-ggQr7Q_
zKCi587kt*9n1-nol&ZtfI6@VJoA(M=ZwsRss+b%ZD}<nX{TxV~0~b#?*Lg$6zLOG(
z1f&ms^{8XzAiS2!#V}?-Lhqi<C<N(IP>TSTMkFa8K^ju4TzKZZMXeg%qr+>k`1SM)
zVf}XMmR?T4%`Y-8>o+K(-A`tW@3nYk{L6A<aH%(BV<KL@vL!nYovxRQxpicAH;CP$
z$4!I$6fI08x!ptM`iYG7N0b=m{9&eC?`lggfci|JPXFrFj&k<fbH~(A7EBxJ+RSJ*
zIHMN-<jYc$JgiVice|(Il+<=^^Hz7cY@1E0?8UKLq#};g)=i`iDBe^?@iV}oZ!pE0
z2FOt;QzWdisbNP%2nVzSxH~3MYOPDL$_LazwvnfTZlAU=`31E7<N>u_3ns5@j;%n<
zRVMhSfUd0xZbio4zDBjS%Vl2FxHo4W>w?L$mvF4e=@{4$YUG+<l{lqEUj{CO<2m?;
zsLO`_>j)s>mWJj1Y3m?E@`jX@mPTTb^1?r>5p-S&j7GRb|J}1j_UrbAgBQYUAl367
zNIm|8+{ro+b_FoXHXXB4*=?v|pGAyG4QRx}j5qcDb<0k>X9JBLP9;wV;7*eOd_pF9
z+3eI&4MmNWh|stTcjYuJB-U*`O=x|ZbsRq=Oe`?3VAqm4hWeAFAHNmeK&lIWBr1im
zc46W0KYKE=9F9oHPxjdoPKmZn<KP6$mb}OojCFanh9gYGpW(z**Gu9@ZM;1dQrgI~
zNjDF88h0h`ws}7HNqw)GviO4j@f~|{4Pd$T3Y*-MQQl6q1~?AA<v)s<(i6TQa`QW(
zpVAReLn}15YhSj<!?}+^+1VoYfsbMJz1?7<91LsD=sDqIwtpu6TY+fN-9m-)Iu)U6
zoWZ2ZNR>8*C^B2XWfjo^J&*9A-cOnguZ*_X5S8hN`imAfIXty#Ji%x553sy*e|wVx
zd0_i|dWf*LF~4<nPS^<J5$NAa+&YKjdWHE_vT{cVMj|>e-H<JC5tXxlcM4vHlL?mH
z+`h0GlRD@{{(w_nsL9aV=hd~D52IPpK!j_^6^S1Q*Pls+ke(rD0Jn-8zOj!rrplsF
zCj>Fq<E6$YU4;4Iz^e%O{QCAdvG5Z%9^Kczd2(`cNVfrbKEf)(qUVMsohMTOBgL8l
z2327Ea<44Eb1w>#<C{*^m&tRrm)C!Qpa96y2;N^Fq@^m9g0v|7=}Fx0YTnux5MKm<
z(2d?Vmxxx<NUX=XbY-Jq6^fgw4vA6|gpOUhG-WHmbQ;MP4gR^!@?zz+jHV_oKtTqp
zwJ^$LTU``m!-MVO+w&~)zQ1?x)^z24c&6m~8iDA@G4oq~)}T{|B_W}UPP2b2n8{;!
zL5bVgR1p39g4&fM+vV8rn-Zrlr%|eMHezBo3KNd~>O)N>qp<ps#8MdA>#e;AG>Kp|
zhL+l@ZU&vi+(~eok1%I__4Op+#wharNi*RC4j7STx{1D%WemKURuc4Lb-L}o9Dq|q
zpjlfY^0}|9n_bqOzYai8G4B6&D_~CW0e-M0+WCeZG3D34Z<+WDO=^h=JR1UJ3Ub&b
zr!V32GNcGeBNg21Bw6_*;}tvUbM@sS!G#Xz^?44-){LwoCbYH|?ylXMiFDAdn<R)8
zXE>?>PUrfq1k$X_8)=IVzoh869{jJdrN*bU?3Z_SZ2~&@_$rMKFa<g1`Dv8U=5lwm
zFedW&wE`@iiE!b?J*Xi?_sb0E3-E86n9(~?NvhLo8*&Ob?H`QVYDK~u!y&O}%Hzeu
zL;)t@ERk!Wrs!L!wyOcFa@p%sO-FTW#K73g`a6I9NS(KSnYZuU@-Bcv<fZ#>GmPi5
zIyQ_d<J@6^Ar)h~Dle~Dkp&mTJRX3~Z{VO9b3hD&TLbXLox6kj-N%#QV0KrafL7l%
z3JAbM{~?v3L}DoJd6a(>y?4Ebo&atR04K$6dN98M_0RY9MuC^pP{-|&Tp_{`fE@o1
ziCSlf(hbD(ZUHL12iP~n-IGX{L^-ui>O-M`+ZiUKzxh^2QW-Vlx~gkyw|mjpwt#xn
z8j$ie2e9vN$9u%MP(HIxo#8KlxD6=qmWc`fq!Fhi`h)FjT@ll4Y^mh8*S(d$6f-$o
zcYe+^CSALlxEf`=Nuk4^($Hc+sK@9sbcL7CA5n~2lW4c(llbynY}E39u>edb_fM(x
zg9j@nZ#>J0oXa@t9YhmJ=F2%F{~@dSAg^TPuHh5ma=$oK-fr>V8l20$D*Uh^k-aWw
zq5F@5n)9?u_QAOnUq5TosOQsK=p1^^v+zxeZvTnW*tJdZsqFZ->MP!ZFc~cN@3vga
zTyJk8_HIJQB8p{o$`WIcxTl!p|Mmyee2CmW{DuAX>ZBqKM1-K^L74OMe~;X4o_49I
ze!myRVdE*D=Ju|<Ca+hLZ0#}WjK1Q1!5zRBy|a0j=hA!Z5O{)5Vv<^2!2E5%Q8;z<
z<CqnDSwKA>wrlr!c-~mhoAa*NP@hQS-!!P~L>OCpvuK6>@W`dPDUt%He269d1+_Al
zN1n~iU)8W{sc;g|1#JVa1BWz31dCFes_<e%vKQ(Hi%hJ&;qvN#`6HJ41u^n;vRV=1
zP78_bU<JVWa(SRKqsquvS}Xq*_|?iw5h&Khv79g$RLIyS7bu6&!PneXo!lmbklu*a
z^FcEETmEe6sxJqp{^s`iUI2yBh~9U$&WOkQ>)p$fIn$X<oB;;q_UeyWwP?ER>-jNl
zZ`ePKX(Fepe^=KvPa<X*f#Ljzj8!ZvYEppp14Y)_PeM+NnfY)68*&y%S^B3zA|&|^
z$dLD*Zx3g&o57mgFU+0_CqY$RptN^k5ZRz?H*u_PsG8S2;6gx&5}SibyHN42_KVR?
z0Hp+Y&Qap#jYU8_@tidpEKU*~0<h8{fLjF&GxAjc)AfNy%Kgf6K+;(A?z$%jEblGw
z3!pf4AiDX01Qox9B7)alG67T>Fcf-$L=NZH&kx7OSwh~&=qlzCj*cuQK(-nxD5eW|
z;?}vIGZm;okbYFx%F%kPXlA(d;)=Q)S>b#z%LYacX^ZtWoS8uhXGrdWK*00kFUv+U
z*Y~adQ8UXwPcX3{XWD=BdQ5_j$R~}A;;Um^|0SaVD&8yoi*b#zT)BUb?c+BVq7#1f
zT;(I^ewJ4p<Ckmp+UV%Ech)BiJjI^ugMB?Dk&Bq$y9V=8205-`G~-h(7iHH4ZkLAx
z#cmeHeG^9Q0~9w&$xCYX3Wx2?{cS|n|L~i8Az;&o@=7c)*M<))uZwm_uKPxcTKG2&
zFH7~jE;8*MuQcn#Eg@!J_q5}M&4;1$a-M0pTyoS3E{JOqh%F=_cS!JWrsR#o;_ciw
zp+FxdEf~Cl*gAYSIY(J&Y$mx~(Z7j&7!!<<vx+u?x6+GvJrqtRluq8KDrq)%;;9nO
zDM4JG7R9i~EPcpoSf7_`;H{yq-Z<;|@A&h(J&GRd50YR0xwRVB@75Onl?*I7Nn3Hx
zG(ur?Is9myJ<me&`!i|&&hx_~g$~)3KzjZ^v!Y~1xX!Qs?4}&1Cg-6X{$jo6sx9;O
z=1vylohTYSm;7v~nx9&~_&Ir(R1PwrE0H){L@w@#z6rHL@4PzI#)9VvObX?+Ssi>E
z<Pv|1tYL-dDf4~zk)f~ei*?+Y%n^POFTqV7#nzHy>$ekP&Q8KVM}d@@VaY0R&ZWWD
z1-TCEo|k0mL*qmAS@|JTIMync0UNBQR2lThiNx%_%N9jHCT_XO;=NG9IeQo;IphiJ
z@I~YcafP!_vXfIb#{ZB*I0o>lDT-f0G~T|I3k(ziOxJ}2&(bN|pwn<lNQ2*{oeGe>
z3s~Kap-KEti73zi*zDKGR}nwVg6<brn*iyc6DrgSpz7GM(z;ESC{ScI;J9u9@O4>x
zz}kwwbJrtplIR@+&;h1~p07+Edjf#YEeG`1|781sm|#xe)jX7lLI#we0?t$NEdieZ
zs{Ul1AD9kE9O@wS12n%C0juFp6#5)?OGrr@N3W+hk4H;$^C`G{@&S?U1C%KXy-onE
z<;tL?y4qek!4<%Y<!L!hf5_3LP~*g+w>_BJ4j=*^bFRwC*8xfD7hbZAISmOGwqWef
zTcyf2{~@+miD=WuNAT@lC^@%0d5!k#(m!?Qs*HxyU1mMgu2i9o7h8`Wr$sPFqyfuq
z)@>5&2Br4KE6v3T0IB^>D_WQR<P@jY(<j|Z-8kLS@7(7lx%wYSN3UbAbTdL-w6$A*
zGMHlrjG4|OCPX$W%-sh=7Vvf;$+|ZMLn+wIgC8g|UUqVYx?Y)FJEgt#P(Gri@RIMl
zD<|7?jB!>eT<#eoZLdkPvJW`2xQl1C_2Ru4?8{P`bEApfXOGoIu&ULpsQNT|HA6P<
zK){FhchzOmJs-bB!zgvop|?<^W>Q}~5{@5h>O-^Q^Wil74qMxkEqk--Nr8j98>WuV
zPmG=2o<m~`zudHU#YU>yL*`yDl>}%ESJ-_ZC8|$&${(s&k(-F(N%RElH&dDyEA5Yq
zzWDC`yn4U3ves8rz%$eo=EbqN!dL4j)o}Jkx3-!4D`1{s#iPQz6t7ADw9@bwLBfH*
z*qAG0|0;gXz-YV6)EXeV$_6oCU)$p!7{VvHz|gl%Ej!)bbAPFI4HDjbDlYkUt1eC(
zgA_OK@%c0o-oT=I>fKA^g-p{^qc=AzF@k-ED2j~n`?(uTRT?GdM$M(dLqi|uTF6VX
z&pkeTtTcgGj?sZDYZ`C=%vZP_J2CVqZ`ZLx-Q13{KQYW8oUo)-Lo{&k39;*Xf(|*v
z$ESaVKWJqz0-5Abn_Ryi;^x(jP>4Fw*vT~v87_^bVZ%j_jeeN0se(`ex9ne2y|7&d
zD~I^)LM8TJG3q2%sF#pxA@WL;s2qay@B+R@z!KCp9DPlpTY5zGp*SG5CHx3>NSBBS
zd5OWI@a+12?@WVz;KNw}D9En^*}re)OIiF(c{RC9J6OwUmnoacfGE}XdaxyDg|XpW
zyo8;E(V4x4`Y<9k8n;B7#C}}8xom888WPNwkoSc~wbWOrJxcy9K93z;<IPBc)W^m~
z6ogq;|9pj;kn?a4+bb*>tl}Dx<0)EclK~=Ov5H|^ZqZh78`?TM??#I~P(!JHfc*2%
zxYi!9MW8GM=#lDxCHhYn4ycY{iu}WZasi{sy@&uM!J;<;1<Ie%($<D?0rng2KuOdU
za6vu;=8;4=C*b%|K%g`>z;yu>Qa}_R1&S_l_b?JATm<j&!;#Xh5IUrr-vL5J4N$8w
zaB7VNgbwWBfn$ns=p_kAK9=AephBL1{`|pP1+EQHIT@e~Q&3nK>bg152$V)ag!;2_
zt}1#<3uR$}2v*e6lD?ZCocBM58er7$C}yx)0qPl)u~?$PNT-z9-A`yT5{7{0cQgjr
zhQ8o02NaHyrlyajiWyBHm82Gv0woeuxS6f<<|SZGf)5CNFTh;l-Pok+)78r1)xr4p
zskg}g7%{w5squVy>h4C>_Rue*iu}26>0I;8H|?z&l>phdix+i_Zqc*3KN{QI4hV7I
zoSge@&~rCM$_Mhu>Ey<0XL3xijlfLW2jb<SC~74srKcs2S-a-ssowar$yZxp2P{a;
z9B^8CM>!Vm>ShFl#W`MNQHX?A?UhKfOCrpEL=CgJZ&s=8zM=CeAXg?_4=Rsp7bW;z
z{mxK1;!f{UX_2Bw!9b$-Ik4lcz<WdCu|!*phbgn|kz7%aDvIH`2xUA+-?^E(2D|H1
zb49%y0ZK8#c*t2ZWjy5X?L=CVv&87In!Q-8wKM(XO^}M%nb5-}<)W=qRr$MKv&dgl
z_mps@%<YDWM?zoSm$31bDpr0Z;zb#8_3tQ;+iC67*TdBLGC3Ui5-=SBzahKpPgu{`
zPL{2=XtP~}>zcpdw<oX@MH!{X?-9fU?s7?#*<GYGkwfAex|*8sRJbKrr^K!Or9L>>
zM>u{`(Cdjwk_-|47I`HWtAOx!PyCv~3em)gmRS)yG3xJ<hN7JRW}Szb4*Vfe`s?2n
zMKwruFNW+_S3gH-bF>?>5_0cfj@vq0S!|#bC7>Y-s7qp#@)=0QaAPAPhv+VEGARz<
ztXW?xpDGrjVuXw6I3QNlGuAZ7FdK(XSKBA_@}M-d7X<Aa@DIT`ru;&P?o-PQuGR7k
z^`QIdt+`>b?n{wtWJ|V4N3iTkL1Xs5PU}<IcXFjX!CccKmQyqu!oNHye$Vr4vc0@?
znYZG4Hz<VL`g2*r`nKmmLDI$CUr=@?HGUk1%p-jj)<m|0w`~yS{_agrYk~X@at3C^
zn+VsaV{2C2ukj3b%haZ4DIzDLdi=*t52+AzR$rdx?nxc~vybItD=7<jSr=#Ful~K)
zpwayenOH0??_3b9YU!I+17vSyx|5a-AtUL@l%LYxrsxq<xEvE+WaK}tKF%ffOsb1-
z&gX(3E<U$O`*D`Cb;R3z-sp}px$d~O_-!7wU7joPZ6fCj;|grPw+SuhTTAb9>g<aA
z=n7$Wz7tKy!;CSF9ITzBd{5zss@Z=zzsqbe_bnm}Pj^p}xS7c`ibDdmMsj-enwx`f
zbiw<1r23MLi;*({H|Y*Coz}6qipN`L64e+UnfAi$kc@~_anVL7dQzvba7F4bmO|FX
zYQi~3deXUL{5DeY$(P8+K<1+YrE9^kXx`d$56b2oyDiLLXq9%tY5jkzGj%<QP@|iT
zR8v03s`<fU)5qG&aSRM{oKZ2D-}Z28DPYc_IB|0<4El)reem^<hjAnx*?S_|pxX4M
z7@E70O?ky72uqc2H372mqpz#~rS_mOz2<+ER8&NR&JcHVy^ZU!(SWuVP~U))*z^Y^
zGpH;vARSsSeRJ)<euaagB6@2HWh)v3`Ckb*s^t{luCoDBpwY+~^xy;3%)Wg20vY+l
zgBAjmHvbc~?1sZ^Fztbb8qKhF{L_G%%s{1)ojPxKmOqiw;KY9T{jL>A4YvZyw7@~l
z=WpPvKY|zpN{1G`n@EDPlA$RZkbN{ct91^14~?ny0ozPO7El!uKyp^B->z%jA&KXJ
zQe|IvUWH-?ga8omOufk%pw}S<J#12P0Rkb&`L4U3ML?lJ0Lqg8R4_qBZ8=?~?Xb`s
z3kv<t)reCprFQ}UN4w-RApL<j-v4;LItH!eq0~-5>#V}2#H1d;L3`hvR%G<0sENR;
z3?2D3&;N(LzmCd!>$-s93q>$MkWd<v7U>jFK}rcpk#0~xx+O#@NogdME&=HdL0TFK
zDG3Q_q(R=bJ@@^5&l%76@B80-#?WyN2e^K*_g-twHP@W+_JT8)Bcwu?Tf)9NJ(m8#
z>=m?~E7@P3w^p*xI$*B4Hc}K!yrA*2a$3}u^_Bn0^?IDpN2VhmRh9z0&XYQu7@EIL
zbI&uG*nz-BaH34QCspZ#m2Ol;%X7yl=g&6_V}$=a)}X!`PcZ$9FJa|qo~CFnBDY}0
z#c9Kdz?+7FcXwmAs7Jw(j0LZ!quivcTetMRHBU*2Ur%M|AchWKK+uu-ape&KPua1Q
z-9h>Fdtp`|3Ei6L=6u$QycAr<MXef@<ktrT=9f#;y`NbBE<H&>Wj#}k=ft`8ILET(
z=jJ8W$%DN?N5RAM+~2c&?Zw{&79QwCbw{RU=e%jk{Q6LB-f}_BXL?BVxmmhqdtvKw
zu>Z}l7<D&n8@)}8N$EQi6`A<}IpO5>zDac)Im!-<`Bn+m1GO@ymq}DzoBU=9Z{3fE
zG-!6eiVFygWYX}Ge_tMB(<!2RC)Fcg8c`lb-MKeME_Ti3aq7Sw^BuDo3L+t+0NRhY
z2?|Ut9iEHedvFvvlUI$!d-6%JolTj07OR)spKDH5;Gu1{-FNMrC&Rg=(@h0lTlOAj
z@oVGQaYV*WWh#Z7ILGFP&0oOP+g<IYLt;Vqke=E=`8@BOg1DNMP@pWeERWczbcg|g
z_)iAPII%VUjXQ$T*g6~UZEi3o;)N`*6Vd<L!*zJx{fOn=*KJCyUW?l1H$AsP%qpmA
z3*vMAYSZ+N23TZhnhzVRCN*|ahIy|M-xgK%e>-Q@jcxZ;gH@u1KvB2tk@Szu+vfJ?
z>^tkXG?QL`qHN-vDdcxPe|=LqaxP4j&|KJ4zTyO@>ACfin588RF#+CELQ46ewk3;0
zd&^7`x_8O_n^(=Bo6SYJ?=maf$GW>6A4H4rnn=|7d5yW8h<tx@sCm;>DNl;`r4y_D
zj!T7#=^uw&uf9cl>y4dKwiS9!vx*y14fG>xqDErW=3K9T=QU`&Jh&{T8jUS<3Hxq<
z&jRn=lKNW@U*@tozFrMl$J@)j9e#@OONF2lx!fg17V=w<MMfrP4<?#R%H`H7jwG^T
zRn|%wMe4q?(A8{-J-Ik8le5ef_nT#8?kHp<@c2?wYcJ0xy^_Trq6MjA8P)>74F_S-
z=0j~SD~9j3JRc>-rBa+2f22*Ds#($-z7zJF{a)1R<k5zIES`JF%`u(uVXFAqeDo20
zQDMS{!}0Txpxq1hs-&OxH%<1^m9Lj*1jGmRI5!@J*B+S`ReE)r$`QG&>c?HEjDKt^
z+uGGwlY$@8HSIz(!W=hEG!ca79bmf3`;$>Fl67Q#y~jjvsQN@v(;c1%y18RIN}<{J
zs(L;p>^$Mk?-z_uCJ^y{>Wq7bLxs~l=eE~;C+X1M>CDVp`toHe>z<Fc$~BUEm6vpy
z29mFmPYFhLbF7=@n8iu<-4jR`kt*7IJeo^!$hb;me)|NAOifOZ*z^g3x0E2!C_DUZ
z)>g*x)PqW~*h~3EQw@(_VTnHWRL<c%m1z8IX7#pV;+IB;T>Z{h?nc(GSSoa%_uKMx
z?PRZw2!ypb<|~|G+R|+#E|gE^n(+G85RB=Gej7DA)omD1-|kY@bZ<J8J3jdu=EU8n
z>0me4Kp+yJ;141K%Z~_Cs;c6^LIjn|@DF?O+i$UEJd$*y0|WPEWbiqSIwk=TGKSpw
zi`17uc1SicFqDrw);ZG}Rt_hBIf%of0vHJ*Apv^OtC-wOJid)o`7uZ!8BLfY6A(<<
z+UIlpP#YtJ-RWp+uzabYUHmtHC9^CPwYW5n9)c@nUIZ`ZMUf;&M;w3Qpo_N4GB=6=
zNS(qOBrozoP6?Tk{gv|o&A|v#W)(J<?WL(vDpaNWGw14z6q%x;y9y%87dvo%dJ*Tk
z(K=(zpIfydo#6D79OhPHc%Rceuh}zVe?5pC)F5eggam&N$)=#lhLlr+gtmv=5@!cg
zwP*hvtj-}^48dEFxg%X<<N)J}MEF6tWN9bn6(7kf+we=G&mK?zc^MnHxo0~(5I`Vv
z`J=#|-SATd_1<U);;O36=Uf*P3Yl;6Sx-sL(*&x;veet$3*tFtx~ZB}er<t`kQ;y3
z8QX7Q$BXyst<CK2(wKu0axSX%i{<k4iaabor)Lx2H-VpYHMBWk<eNlMr<fpQs*!i{
zk=!WD%q7Efe!L!vF3dL<ZY9|{sy4WQv3fuI^|)R<dJ7ASr1I35^_~r(C))Jphohsd
z?{dD}@K<p%8b}D3Rj(4<ByZDx*Htju?5MrbsMSvX(e(K<eTio6mY8;&QTuLU0xIgc
z*Sj$W`tqCm%1b7-Z<Xw?M9%)86fHMwk=TBuABz<)Go4<!raVkTyZP;k+n@>FID-XV
zQGmp~!QKL!b9T%AR?}Vg@VliZ^QF$ztlc{2wCMg+?O-RKt9aI*zyCpQHyQUQx{if6
zPj}~bWKy$k-(J#Vtc)Lw7g6jHNL=jBU0tl0FFa)2B-<=?^-z6xT&GTrUG2)<;m?BS
zedUsDsE^#WknUyM#o7>V!d}uDp+x;)Dmj*<$D8+V-m2Ym;Mg0lmWX)iU)FB=pvh{{
zsxO+)f<VHZ;pLe&8Tz}-3En$bqcx6;C|nFZ9(%2@xqRb!>Y;H_fTFwA>KaXj;pyJ=
zJGj(z73%n;HLDFgE!$(-`0fNp5wm-<wzO>8&nhroc@m_;r4v1V>po}7Ob9pv$@1Q4
zu;K=G%MW{`A9PlY&)JyD<bF8wM7_SCC-QSaa@Eh;7TP_W3&glwL9~YBO^QPtE<*gw
z4IMi{)6>dPAw}&yCMpr`0p!o)zvvx%J<Jk*K7*THcFZq1sDH>ZY)i3tg>*&C_#N9Y
zlWXns-zGcclf@)TPa8tp(yP@Jl^1YL9mJ#n_Tq?nxEju6!f~hZIsLPdc_qS)*!Aj8
zozIuzle^Y50}=_EFyZ;PTjzvEp1kKCR_NWkp`d~rd}H-PW@f~2U~|6SD7{tqgZRRT
z?_rnNrUPf=rnNgB`RUh@^-_~3jIUob?&=^}U7+~7-y#}rTryoEVVZEHp7k{St%a7%
zvpF;Q`Z6|EaylQ|Zr_U15y6c1pRGd|q@CUu=b6gQoylaaat#LSf|squ*b|kXe-`!3
zSTD~M$Yf~k9e9O5u8E?yQo|4VMB-j}GpN&${idmzLdxQYYZKz*DoT6Y4LFf@!lJoY
z{cNQYx5i#>`>+~2L`ei0ZQ=<(SbuoQ(3$9hrrM~!SDKeV#Y3U9K5?ek^9pSZzB{FD
z(Z+!z_p_(eq)&N<H(39qPm?yODQ27ro+}R;RFf~k{)qSMJH=y_n6qtzQzeBn51DH^
zdr$G@NI3a~ad@~9NT~K<@wo{-IkoDyd((EIHjI;STYbJ){n1&C+K6x*?b=s!UCG3j
zRnFlh^Vs*?i20A0G;uWYJj2z3aqjIIDWsihE_z?Zqu}w8@>4Y#<K-FN>F?^2<ZR;?
zU8~<w6NzKpO7xlg(>M`HLom!znEkBfr~Rq`ZBSEsPnC3PE#XIp_-Wa(HlFGPKJMK}
zxl|EiIhQXol=?%XIGGzHjAQj9C4sK*uij68nm+78*(GdPcl0~F8E5Cl+nA?uMKW(T
zj}?Z=uRcExWPL`gY|d>Pbj+xAn(3YJgI_5J`P|n>gk==xb%};=z8YaRi94cX5{yc2
zP&Wzw;}z>oYI}b$4BtGZq9@6G%H7k`U^5-s%_JJ1P4HxJ%nC0|*b-=;5eTzolAQbQ
z#Oln`^s?=%YA>G~_oHeLlg&q59nP;zh^Zo~6a5DIUUI!p4q*4TrF)>sbl)K4sIN)>
z2cN;`4|nIzT+nlXUM*c1R`UFYBG=yY<Zro}&p?~#i5Z><;kfIjj79luq@I5mw7INW
zZ$#xgu1}3=sUnT{{r+@hp7pogHf)>-#($uZg5-e<4cjjwMPT?XZKX9^H&G++Zyd=x
z-p{)pCIz3+uB@v01%#BEP*^YqpWeHrTj1UG*YyviZ*m8JL6!8x>eXrNi1~^Nfue6P
zJ+}$a@L%yN@cgm{he})_^LwHMF27&UHe307UL7LUSa69$&R$4#WgYA{EkVyj=Q~f#
zLvMufGUrvjMS8kOf^if+7Fk~Y#jO&LmW<!D=c$(l=?C$9E6NE)z<rQNSlQ<7(2MMy
ziPCarM6x@y2Sd%sn*|vis%rc}!3yMkSb}eFoJR75AQ6nDlPHlXE$wa{?(W#CTQk67
zx~%0hM5v%)VtOgOfV6^5%*{bOQC1g1oG1-}m8Hq^=jnhtL8fr%5y_701tDkB@Wors
zq?9TWl_*iWD(?{L&&|o9`|qRNqppT^{(izU^NQh3RA>NhwHjsSjn!GpUREuZ;0;py
z&MUNCdg+=P#A!^z>vvo`v_?!*y?91T*xWiA9^(kkDv;)VE_R9X0|^aP`6rA;QO1k(
za-YYmuC)lSUF16YL9>zWN+xLd<3{0Qr*jcOle6^Q4&KjCK9<(c=87<Uq~Wm2^hWU4
zyY3PC83Cp6sO9agmC$rQoZA&XqLgvAr(a*CZPiURBa(e<kdm`wP1n`ABwH0H(b1U6
zn7t4+SmNb8A|v>rlg+KMnNL_&f~+fB>|p&~EQMtbRnRhMX70KwRi@rMB^q?^4zuIL
z9@cma?TvK&)~u)D#RtuY@prX&J>E%$Dfwru(VTA>9t^<Adt=5U(dlELHXBmSSJ_K*
zbJ^6j>z3)0;y(wca-S)xUARF+G0eY^6m~H-Ur=MpBFRycr%!U^*VXVdeZxKX9ezdD
zPx7()Nxt+qI0fbUeaf5t#ESt_J{x4;-&e3aol>;SdH=vlT_SQjXbH3F!C=;$lyy~{
ztIB0C)M@rs#)GccxfPSyCFGBH&NmY2a0y?GG4M4pBzktM{dE!#8~*R7lrpasUon3!
z^AjrLwJ%9hjJF+O{Zw!^u6yk+vxkua*RS*U7~d>P@>@9=wA<O*<rlgcv%h{S7ioSW
zP%MKruij2$Ba<Xv(Ej2ZjZ@L`(T|<TLkkMr+AopQ9&#-NNH8U)7H6Lvw3liMQ@W9j
zqusIDjgL?E@!XB{tE*)x$-<h3!+Lr0hIKOc*OK@Hc*+(CmHnQYG4uUV|4J8U>+9>M
zKu+uGBD!?i(X>}h#wG1dkcPi6<p&m`D;Li8#dH$OMe2Tc(YnM@=d$&_<-%2;&L+zx
z)+DPp8yiPlQR5<i&c6wIX^MSK^C#;oLAMhUG==UZwTYm|lUyv$sQged?nB^3icf+X
z0ngU1ImHFBSqVOL4QbkmGhVauZJct68+)G@nQIjLJr*kDf!Tq^YpLbNE{<G}f;&w{
zU40j4_(=TmOer2x%eJL<+@z<G#dz=kbQa)@Z@g#4)XsF;=PDVOpd+OqwfNpyk<ez@
z@6iv*YgC7m@wi?sv%d9X_)yJaJn_`@ssY{=L-tQe1qRLK%<&(}i>aGm8gSgLa;cHN
zb$@)b#ejR=fIQN0+KF5t9^cD$MV@T>ElYNjl{n9;LiTFU+jgQhmqzu%GA9`xy=mn=
zvU8(mz8hj{pAIP`OHAqaIy-fPee5-JB*PtN%dSk63O<qHTu#G(qjR2=epScNyosLv
zg9jD4d7R^`|8fBq?KDHKo@Q$!T<{NTpJ_3)7Q6STVDB=9-1@V@hN)S5(<iEeJ(k?*
zBzc2s7G2(ZEJVo#EYY~dF<%XYL>zJa=N>)LO{cCBJJ0+q*_HBE!WXI6FDI3_e8SF7
z``ivNx2(`OGzn4ox^niL?kp+O>Lh)^&;lXZnm+%M65B+QJ+~~=E849)<V#AYZzYKC
z=@^ST`r)ZG-8X%Gsq2UBYc}cRYr-mRX;*J2<hxjY^%?2!z4z?dv-SN&i*yQYMNS=@
z5TVcyZ*UZS-kl{b)(@BP;z`CS3=5FDq$sYQAangvD$A1~{c;0q@)GWTBCBMw<W?7_
z&lY5IE+)zFqPZgmUWryGUb?-(m4AkrTHN$CZI^EPJu>y=1~?Neg91NV2{i1kQ`sg?
z8~mVhfj8T*a#lCzvvvFx+aZZc8d(W!k;s!L?VC~s*v~xR-!&L&Raq($4Y=NF!hQBW
z?pNDry3rs8H!jsO)^p#PUGu68&PROmod>XZ@lnj#2b7|k519FHiI`+P@&Z<}1S1pC
znWTme5tc^FjlJ9E=H?76RVI-dEP~GB9=0}`QD8$;0If`5g{V=*uq(ju=(F7NG`_og
z<i>G#fsX|8e+uhDV_|{rQ^bjrApg#fwA>swTIoSf1tfMWc56O}U+EthumZhGB#dZ<
zA|wD_0PESURPSijfSl6?KzEiyx;|K-RtJJrID{$l0?S2xyS-60jUZOF*Th>>57JEy
zK%8kejM8knAVI}GGY9iLERXX&z%OR)-Nd0cVjd`GtFYTGDqW7waozm+6>|tWQpU?8
z#b4Tn)Sp+Ms{8r%oy%-7wOP`RzrVj0Z~h92UVMB!)NCN-c<OTW17r-hTN7-k1~}ar
z4``o;cI|X23b_AqMJoSp`+bS1Ypas;xZbLnx*6Ir&Z*akr{t%nAKm39mE{)Vu+f=j
z^K88<>Q0tH<t)Vg5MSY%t?_#Phv+BvBm;Tv8SewmGOTV*dBFI4Uz*JKt(V)L8hM`%
z2Aqk8e3qX`oc5N@e0K6$ho?w@-?W5qaNixO(MhVEeompr%ascZQJJBpqG{vO90@5D
za(Owd)j>86<YO;9mrt7JGe5J;YFajpWWyK{Y_&_arYVi_hM%%{nJ-w7O!Y>fG0BBP
zYP;srwYve>FE~#rKOznnIzz_Wp~@`I&)(*u{r$x^)dW1ngi(pX>)6W?G;GEvNGL9t
zzZbhwr`dLQEA#1%Z6jvCvpHg4biT~K&+=6?{Bl4&_v5<0)LTyRk`T&ZMSkO!=ZnA%
zPD%26KcAgRr6xf(%QqB8x8?lx%43N<HgCPbVk6Osy@~Y97#9b3rSoM{NvvYTv%kj<
z3WzkC&PMaoI8Tg9+Yk^}+jZd&E55R(do+G<z}YdV_v%yYm;!lr5=RN{-3nd4F}ts)
zs~1TwBt2B*+}C*OzphM7HZdoy`uO3`wehVehLatR<D2Ko9e%2;XdXRd`(*O!v8@Ni
z8{%fWy5)7|W<Pl?_K(LdJMyoW<uBO1oK|6&cNt?Bx72){O;0q=>tpnoG|T_&z%<3O
zA}zrM-7N0voqJZ&)|(&ooL7x6XE#r{By=Wxr^un{J=#eam`)h9cJ<i#oGW18HJ6vr
zzEw3ym2%(F&hW-SM8oZnmDJOpCdWtT$O09KU4s4X7wqJMUtR4;C^%sJLZv~sb+gtv
zVY&e8&-f<QPE`&3dwoK!^HQPAE5?pxRTa9-tC?9i<Mv&v0}R2WkEPi&pH67qy7zmw
z!lE|ws<<YmapwhyTObRh2$#nh3AVjbAlvGVdjF8N`GF6gWzYRf>&+s6LXiWM*j0x6
z^fIxHkZN@xeQQvy^)Xa)1*7vh!~`=IIZ$IaVGltG;UTC2&H<AC6Oayu?QJ2Ds@>}_
z7H$B;CQjt!r`1lTkGg}<@Z>^(NIfp?47fzmAg@`}PDGR(*jisOU+;hjI3fh`uQw9!
z^`{F1J8k_O=;R^`K#(W|`-{N+edo*38Py*?d>Ha#Bspu<!QM<F;@)*cAvrNvd2FtB
zao8*pMCnLN`+9qc9<n8q1WhAz-Vm_HnyfJHh^}88$VP(U=+r6yv;PZ=Z!shJuaDr5
zSHw6+hxblnVHd{kERb4+)1=7xk&$lSXSKZ|T_5#f`#(BU>?(m-FbILuUcd*qky?qF
zn$@2mA=I@5r8_==p`@-8fyg#8wYLO9(hh~8W#a`2OE}ULw6%k$`F3l%fV*^7o@(QE
z2#7?U%r|I#tCcAW`f~)>n?4{S8}e}F>|V|9-UqJGT$q&**}njSAcx?g{nwr&=?$wM
zFhew~TW_Ggv8b#+m@%f?^goh#^ypDNl78G?{;pR1loT~%fvk6($WKo?6B%ZKh~mYM
zUQ|0{$^1Y+1xdGHn|w<B)35IVGcXFAua190Sz!RWgRlM49<rY%3T}oe?xPY#RaGAa
ze<W{e)5-4x&mVByi9%4{v#400gV$a&E>!)GSk-@Z(#E)+5;c<RBSR73Z0Ujkq-fAf
zvP=*VfX>Bh^Y5d;->QQ9({Nq?B9>g;9C<(a5$ahZ@D6Y}<Ph!%E?f+hSE0#09}X_Y
zT6v|##aMUuGZN~9p~e9rP5e4osaDY10e`NJcf7#{9BO83dD+>N8=U+>Uf=HC8c7Zk
z@JE`F7-T_GwLA3&<nica@vINh`%F;>1!#|GY+qSf8AKNYp&wq|7MxYwWED4sJ@=0H
zxe*8`ep!a5(HjtcPbc$Tpv3HfZhR{U&oUyBQV<brMJGFA%%G|b`rZxSpVwA5AJF}s
zAp7rMbqr@b$n=5Bi)1fnU!G%~zw~~H7??S8sA5VyKAg+U%>3brOEZaNP<LA-gnNIz
zfbD=(Tc<$fxuJjsxhx~A^+L1Z0=*C&2s|uWRnZW?P8GJCL>^@rq;)6)qL{-qF9hXo
zuVX`%h0y_t&FYdua1Nz$UAwCVEYK;2KyXOk0DQ2!zrV35kP_(@tmHRmy;JZn>=bx|
z+|!CmOKkuK-2^9RYtVu{X$gOAB2iD|CIcZm2D!<&0&8goxlYs242&OG!TN(s<FKe&
z|Es_WhHTWRZUwR*KUmMH6&ii@XzzqIhRzje{BNu^5Fm-+ztY4=QWd;q=;RV^Pg?(v
zk{en{jgo=>B#(op(XnI&OCr)N-W|A5WdP7(kVG@03L<HJ=hW<%m*tTxDF*41CL+;X
z1lT@|X8SPcg8FT6&LO>6&{Su6Qsj#iUXQnG|6Bt9y+?U=<L<{tNN&GcOC3a6VZCU-
zVJtwa5otT3gyW)w6*91aNA6hH)knRXk)8eP61LhWy;Df6@))#kzqtRIb4S9GNP-yU
zDah*@{(PG{Kst~%Kzld|{Y0#_+j)Z}6%x@Pe7faa?K1V-RXF%tY^Gwm_2ySzMNQ4G
z;BWf4_^EQ9zGbfzJ?X@RjUXa*hyd2S`#W9aE4}c#-xLQ3Bq6DA15n~UNptdFLW6oK
z{Xnz$3Y5r$Q#jk{W;}1$y(O<J6xi`_bN6(2drb3zU?jYE)wq+%>0lcKlBWUFd}3EG
z(8<R6{;H@R?*sj6pg{JRgafgJbTF)I4y`EQs8@ko?8A702#^AW^Z46y>XXnBG={tc
zbt&NvI6wqhZzQ88fE^La<QW=)JG@Vw4f?5YAU=|2f3a^1c?_(!l;YwDSW0lWFx5&L
zMkm7*l#f0GvPmUXRZh3VU9E#+^Y1(m2?~w7SZl_Ih9sfkOG-+L-X~wMq^2e{Tn`+u
z3TkRnAaJCZE_)LUi>L*WXlieY)R%r;KrsLq=mw`<h+1!&E%tvexI`p82w(;8Klnjw
zuqveO=J=_DY{?722-v8Due;LPa&5!14cjsAR8pO~&bLF648u9<c_HF4CK4sg6&>8@
z#3LfR?NhkUd5u>(t2Qq6r_S9|y)rvrdB+&Hkr>yu_7xw);q+?$JVP|jjA!||A3=$A
zq0LnA8~(0pJ^AFM`y5o~qg>;W!j-Jv_|1!NtROJp?Ijp+@K3sE3>w09BAY>o4$h^N
zEA4G<eEajs{1BhKHB}h7XWW7)d2<J6Rxrf$vIbL)eqSEZzmQ3|?Nn#02r{`RJKYr&
z6-8xaWNMEV$2)Zncujk+)5*m4|0<Te{u$)JJuhoQ<Ltl3(qCQ5@ZOB*lLjn9?{E%9
ztJja^k0Jd1uLUJ7hbEE!*B4%q+5DYg`Cni3WtaH>zUlw{N1$iMhU<*m_<m;ef8NIw
zCO=$(A@%%kx6@aeF1Mf6%TZ{;lWUtAGf?ZEb7WN4+fSD6>eH9vzbuZ+eP`J7hM^DP
zH9hs}lOxieZ&!o=25Yi0Di4q}77!FnwYqou5r;J6?Jfv7;7dj~+s<ZPPF1t2sILBo
zy;svsQd*bf;kGs3*fQ(c*u`H9BPG-UIQ-tY;eKCY^gK0pM<^+ud>33WF}s&qd48$W
zPc+J$c%(dRU5m%plZ@>m5;i9GlP<r~C_hU01BCRYE-IXU{JxN!;umF$(%^@U@{`p*
z|Ew##7A$ZRNY2WVrWkugcq2JH34BbTw&ZZyFraD<<uJlXGX}6DwOKQs_Wn#oteMC7
zcNt%=Z!mm6rRmsnN7)WvS&=R|WZK~}V=9}+2E#Uq3XRhBX{COp<cARkjYJ2UJ9egv
zrSF%z0r+YNZw<Q~NoIo;QZGOgP_pr9)r~^mf2YZ-zTP@+Csv`mfH(q&3G1W^Lq}Qc
z(?9NLK;3^s<M(b}mUB$Z%COI{$6Xw<XmUcbi>Fk@k{CX-KW!wF=@4?3i+o2FHtO<r
zbY_wLXY`l2(RVhomaR!WWpCbRyvHKd@bb+t^D-;A)xdU6NT^zAOr)ta$HFMlB8@U}
zbuO%>!&bFc69za|4}@3mnVyL?lp1FJyLlD73lmnXRXPn{l@MMHouWKeXtK>M+`KxM
z<r1GjEmP=zYwo$F>GtuLqw{*XVT9=l5;aRHRm#CxefgulVpp-f?qWtQc(w9)Z?sGF
zahX1Q1}_}1;W8&D=Z(JM2oo-8b#=;+kvXSHTVL0GIa0C1rFv$=%D>A+k`Xv6+uPeg
z<mg-w+YkGc&e*<KCeX`I*O*BPI&?T*+aB0?gMuxLi*cs0@KF!@8kg<EVq*A*q!x*d
zG4#yL%!XYy_ayKBb1%JOf3td-e~u7Ijs-Siz3z;d)0x88u4wiSN7%~M{*zRuQ_A#p
zbKzIdMV<ZM=Mmw(luQ2^qW}Nxi=zLdzZ+-&e~zubp#Mzz|IdMFH4=*P{pZvEd%npE
z{vX|5)Bm&A`IgIY1hHB{h7S!cR64AJ(m1<83n8R55AE%FA`L&7K8ad2x3Dk*m3=VF
z#mGN-(gt?#uXV}T^S+nc&?EL{nkk228wnFL^FdrE%mX47{f*7d7!Jd;G{NcZgSn4C
zosj8+xrX<k)u$P?8}YB}?1|~)!05%&LBE*xc>DQr6FA*1PoPw0uH?H#a#B)K`wpmn
zM<C;1$bR^K?jRF}T97MmZErt030}gG%<_XD#5oVA&7W4_7Qqg*DGW<H2vgH|kwECb
z1$567lauvc;o;%7`xQ_GAl4MdZT*e5B^WQ+Q?gw6?=$=RJcYXyu40JJzQz!J`%tU*
z<t%DxSOa-|9UOlBuZVi*!`Dz(8_0t2n-{s*K$<-o218(ZAt8QVNEwqLQ9wNi;Hw8i
z)`95nllGO!z>nW*3L6o_Sfk#ciUgdBqCe?3>yhC=qD~KATy@*2+;y#C?V~u2z2rUt
z`5jp7itsORM;fk!OU1BAX=y3J8s3?IEu{3)@Y5d1tOX7lYY^3+wy(ss%>ieo2e+!R
z>ku~#03qDEj7MNsp^uB6^u+O>oy@E(fraY4ntdj?a^&O-)B=&qyu6n@K7YQa^6|-+
z3cFv(gDNA#cdXI@CDxo)Q)1ROHstJj*pTvIa-fxhz$myaf~f2R#U;GPLrC>zwg1d&
z%L?GaYWfLGuTu9Fv%Opp90u)qsMkm?UVIxLZ+`r>fJHWr&vWqC{XsiOvp}&u&`I+;
z(U0BrX`sF-$jjfcx3^DEMC#~}LdB*%($IJ)E-v0N0TS<_lGuq|&G6j77=eU@L`Ur+
zoCrTFW713$3s+u>{p)RtX{U-|u9KWyhr}K1CYC+!yF-2zfR1Efut7d)d|U%}*cwKU
zQu!bnp%GI|N@nI64=B|HcR!2^TF*#=);k<N7{_X@^Hh9yE)&)*+ZVd*SX61<f@}_@
z<;XM>R^N{;XP1Z-C>7z`>e_GBb^wNVakCUTL_*y%x$d|GOk`XF8Uv_q%*Lx??p&^0
z`_X{N6E;6P>ZjK&*Cy{GZk-KO46r5l;F9^>4*8&Fng$09-Hn;n+uK9>W}{`HkehlF
z&~_f~ZGx+TY)8#s!4f&4EfRR>;(c7;_R<a|BI1WwRgQsjJStlhwY9q%{fM2W19{;+
z09n}zJ`TAufX((&j4<-G0CdCP_k52l?7rsJot;IEf3GtkDL>63J57l}yFXCT7eGZP
zE4b&94P(GwxC9(}ji=9^J-akfIc5RdAB)fVw@LbHHMGR?S>8ZXj^pRf!OrR&@FahN
z@n*g;6XJZ_{<AxMm<JV2*+SYo44QpfNKgEUym*eM-46-C9L4!zo}pR*wO}Mve^WaL
z$nEQg<<K8Qqy&)Q6q=P9lUfzAoW!6~16gmbIo?K0Y$)<!)g$l{J|C7J(%VOl1L%kd
zjfwFyjYZgxG3f1gBnZ7r{PR0~zCu=4_X_N3G<3Bz^r_)b9Pfl{81hg<8yVg!2%zq6
z<+*=F<rVTleXoD96%M5qR70UUXcOFxer)bV3%?Kmc!fY!(~cC33r=39I==~E!v-zb
ze269YXyN+*&%urMvG2H~ALg_z_FXV$Lhlek%^l-pFFr)RY-?%pRdbUbv~wv>1E&U-
z_Qiqq^E9_x`s76L>wspnbR!wja8_!M-E1`>A||+QNcq3{XEYKGSb}lJp%%4vA6JRR
zSQ~u*<1))DsFMjVyY#pQ4r=t1$aDn`QS!|iOTG@|p9VG^b#M>~f|G*C8uAst4VV@N
z{YAxRS*K-`otqm3mTlh1G6vi+q7d^FOCS>81>sW20lw?_P!2t7EWn3|FaSn0Ja^lq
z#9X|k)d6M@yTQl11OP7o9auYBR2?pUcBvxw`vZ+5bb0`B|F17cl!o;gI7(0CdokhT
z<HK2V-UF&}e8&aOf1Q1w84XWd?(WH?4HQ@VXSAqv$7dd7G1+7a`!GHueMzPD!SMQ#
z)xsvYG$cHK@k<?$9scbJ4i1i804mxLP=_X!&>{cHc0nOFHg>nup{4=6=NuuvEs+xl
zG2Xn1;14uW0PWlEgi}d8BFDpBZWlQ@VY`8O1p=)w!1kq)2%^#jL#*(~NDkLsOJqO;
zr`z%lu&=_P)I1@trPWzv+UF5e*I7Q7@TPtC^Xg6^ka^$huU?oxed4BeP2BPJ#2-qS
zg!(`&kH)6Ue|S@3U@F$8!&(jeDGXp?&dkjv{}MPp>^jCn?+R1*qrnGlYd-MOA?LR?
z$m<OKcZ^kMB*kE{;dz071#(b<lX(BoO{T8`&o(0wzhU0iYt<2!{S=^5?8x2-?IhXQ
zR0vt1$D0iDna~aongGMV+pwxx;1bb!2j^QHP<#_0;{V=NgLr_TsyodcfyolV)=NX|
zs2A>yPbm(RH+rbho#fcaW<n)suqM$A3G6{?)ITW$z8f&33GO`XZk%4%iA_5y9I;8x
z$Y3xcw&rfXUu_S@8z4N~6ih3pq7r!|5Y_$#qL@dJj-R30wv(O3=kxN4iYMzJ%*a9J
zhlWj{tan|0ru}C4`ThUNs!%|Xd=)-~JqB9T@+vA#2={_(`!=^@YzR|WTCz4aoKRnH
zuO{~}049go*1D|8E)$0AZJ?hLfhYxl6~6l*>SMpR55ceySaW|@4BRKe&;EdYPw4TV
ziDN<J5L7zu%#6lDM7{AAHpsvVX>?Qrp(=l;;hZ*&z=Zik9h`ACXV0C}-^TGA!gRq}
zc)&$b><(GL+yy+G)~{cS-3JFcF0~iv{keE7Kq|JtVmX7y!#^>HS9GVDZWUN~yl6@q
zA;HMO>rO|3*1yAEQi>AhIz^y@l2RkOTbInVnzqi)w4$PLlqBq~>B2GL86@k<oqYY)
zt-SV~cWJGFc_G?3xrp0sFi*_O%*k2W*s9kFL7>ydhBY`LwZp%j{+mz$<Os(qdMGgI
z>OhHklv}g^6y4wk7%xV^BZAWFft?+<rM1pyHavLND6d17&2mOYMjb48TdSIt_FKE<
z@sASIJAVMc29QiCFH5j^^3p^ttw&F?MDj1^sJj;-&O^-d42t0fqYGDXgE4CKy>KM`
z?%(S>K3wetGu3(hSh_)NcOgJkig#k>vtPbE0TSAe;m`mYCD7tAYrsxlgIV;G)P)z`
z>wo)F^Ws2nn*yCG=#@bh1UcKN)>I}lqUek4waS$N6_ZTRK1b+*sCijJXy{!R7r`Er
zBGrYwtgI=-%Y-wQ7>1AxAO#H|=lq#6s1^w-=hFavo}2{ymO0Hd$&><8HyA=kM<FB<
z&c&mu4gg`t@Y~R}N8X$U-^_e>Z3xXUIDkb4R@qX8!czkP^o-dJt|Koqfabu@>vME!
z>A!=9n3Ny+<up@>iF+^~ECHyW@Z|JOjk1u+m5MicFP5J{cz)g1CQai!e+A*Yz}%Dp
z$O-cxlH~{ZSN(t~%^v`i8o>_J0E0J->%s%=DVUrx27rD$#~ZRo=t!cgj7__6_B9^_
zSnb29OXcA6gGDCn%7;NK9VV!RQ8)pNUz4>m^#s=<=f=hcLZ;CS7zVPMPsTYJgBdd8
zHrCcC_5ij!i9QsniH9*9ui<xi9Pb?efsCg^w=JBJ{zc@}Z7l~=N?r@9iK7(<^l7Nk
z0n{Y*mX?-DnDv_J{vdj|Am^S`Q4xDhBFL6<9?mFz;HcgPyb0O+bn?$Nn#KK7qW$-Y
zU6+=|z5~$}!Z{}#5D!|`QJ*vNmUbs<5us^pa4jqcn0>DnE(cky4(0XP#rO6q;5F27
z$v{)08Swqq&Q7C=niqfu+%%F(KRVa}@Qtif_`nRlX?lGPnbd*zPUCos3z+#J+o%Wb
zD)#^70-#BXDb?D2Qe^1_UQbCWDJgwLDoRT7%F6eRjOe;QecFY>>I-<|Npexlf@(sE
zb~evtS`GtWq3mP`xFUbngHf)D?43{?V^mCA1Lv~ncNP_X{{CmdC;Xf(_T@OvXRgAu
zume)xzrXx*2P%rCB{M;JY&0-J<Fdp~<1}+&rD{_a#kkLxYU;KIwYz|mi*L))s;r=J
z&(w55in58Bsm>0p@P-~d`*;557fFN_0uRMSElUjY&hxPK)62^tjlFaCF4m13H|z<q
zCeLZ_o<KDP9Den)*6e`efmfbi^b4*ymi8pLCZlgJo2Mby9TK7Ru2-$uAcglu1^c3<
ztIK$%B~&wQQ+;CpBABA<ckJXrJ_voOJAFmbG1m}q2)%BoAEvBd*e|5X#<#^gjJK{&
zqfU<j9OFt#2DPPSWzjd)OG4hgdv@#VDK-FI&<$H#TkD;;2nK^_fOuwRW~g@4xf<c(
zXs8%yEX3!1TD!W^b8<R_jVvr!QGtnqQw5#{m4NMOkHRkDcTk72cD?!6sl!glcmZQ&
z|C8Q7_ZJ?uK=|M2Kv6r<1x40!Vqrl6nUM2u4-r9T<~MMETGb_Ro~BF&x6dxx+E~K3
zm03+Yzo7auz24|5H@7bEUAu902NW#qdROf<SMB?(8`vs@CIEGwL5IvW@qqaamw!bx
zLcb6qEJ7>ibFc3~WpXfwGBIH9;p_c7yNp`_{NSx@9~Vj#@fvjg9PXicO|SUt|H`A%
z`OkRxrr1=D!5TZ=UckW08Z>OF`L7R@#DQ;7goSI6A9ne_TMK>1l#Q78)W2Nc2&ex4
z0T~8=1vep1$%<0E*TuRxqC+qKpHIN?{4b=zhkr^Y&^rV&xE7md2Ke?bE8h38-yG{L
zDvD3Y?Y-$q_s?Z3czM6hRWtqG^q+TPJ3XdCp#0sMTs^V*t+x`s`<-{EXkS0%DSBsi
z?wwbBobWTAn>?=`j-QL6@=ej+|EKB4N4;!ev)<P8Fa`@W$Kb*m7bN>IQ8;uajJKB-
zUY)un{{D&{`>;nvy{tSfb*YXN*^`^Q1(}chI<Wu24D|9}bpQPEz~?r?TCNADNo{Ih
zZ*O&a5Vk^hMT|h;CcW%*(lh|m3HXtxa!Jm6kho}!3KtPdb`?}{G&GEniekEpZ4Yt&
z3EgpSxhy^<3YMm^;#?G8xC|=*BeUX?L6Hl58BzllJOxC4Sb|d@4nH@eY~N*ydg{{5
z=hunq1H|eB&OhlD!)>?x(L;6AMj<BihT;9|dtT8T>*YpdCZTK=dhJ8<e0V=BCBi4U
zQl@B(NED7%`Y>Vra>m9=PM3~;yQfpOTYiveC>vPg$44G0etP$1e8q9NU;J}zwoQ|L
zGGHe79pW_Y%nZCj`oD3Zn+utu#o+gP$uqn%jfKthAU}itiA0yPCwaZ4L3W)VaS<=K
zr|F4wV+o^?mfwn)L`_=!k<Yp(KGJu8Kj9U_vF>C<R;I4r%6*Ocfv6hwjr)oZ&b>-b
zH^+$iisg^ejNbe>W9W8q=~L1LS?m+9o;>OPyg*h$^!KU~aalb&Iv&DwxK8^0(^eEc
zT!BA-8U@jc$igmWjmjw-jJM=_$VWvdB0}3<01%v}+>-N+NW)YE%hZX9fp_OrvXAsT
zL$1g_?*3K!E-hwln5QB)-7x>w4=2$<_Q^uNaIE1PmVzVUwS)Z^E08F!bN84m23N}t
zxb_bIdK+M!mA4;m)NrT0^={vo{dm8}%c|Qg8|gBb#ux|Z{T0pG;+oOT>ejrIH{~kZ
zANW<JR|+`1;7^ZVSfR??jPsq7Q8k=oWZdaLBF1b8xTR8BSRV@KRB1?h6_n=TC^X$D
z>`dKwQecEfe?T75vOjodb*zR)FeY(uc#eLP{9S-^z9Liq1qHQ&E_V*QEf(o-4vR8J
z$D2&l8;`WxUn(sZ5-!l!uudybn=53TzCt7;G>_L1!jXm>o2IK4`JxlKgQB@-$Oiy)
zIkNDLO_Ku5?>}|dRS^-9&{kPr9k!PJtzW3La@koqRWTvbbRvjSfPjn2P!fk)DEgXC
zPo8@#v%4cnsiA$vp9({@$B|#6+4Xm)F4amxQSkYs4OGDTFcOHI%I{nbc^g0KZ>R<b
z7HVUISdZ-8yQ^Lj)dwDO$Jw4S=kkQBqj&6SjU2UDVMdakle4hP=Mbh}Iy_I}$}-7)
z{=o)8KLuicMMeLF@E#-sEb1izz`}5m-}HOR1`Y8~5b2;^m`HcIN)cE9={Bl5VI~{Q
z8>Lp4<hQ=TIgL!jVZysRolAYcwd<<r;FUlD(-WQ^@z0$pbn>Bthvr%;uPoSCl0&M4
z$~tILRqt2D3p#8-A9*jOULn#jG(H}+0U-&f1FYmgkeiEZ4JE%2sD|)oAz?-!Dm;SP
zV9LaCDBj_Dv*UR|Eg)D+BjijX(+5C>fwA$<-cX|J#u-SXBs*3iU(LwLF&N6%g~leN
zQaFbldmvsR2L!Z=TnJMLdV{EI486kiG^(D!u5UX9nB~F>nsi;EJOCY$*E_J9yT5(=
zHqBqcat_jGplp8yoQRQ$N!l?MI+I_&egy?s*dR2)^gj|2282pN)c1Z_Kk2a<$yC}L
zOxp!NLC$NT@p<GFygmTg4-#biUl5*s4UNkB0v1T6$jQku&<h4dk8Zp8xVX5KlIQSp
zo(<;yE#Tq3Y&%h%?&AWBI|Q$wzIIMd4tDCcf#)3$xWatbJ?kJU{<k5&(>aZg3;~6K
z?xB!5?&(|r6%nr|Cx_)>U&{OU9&g_gA-wiLONykuU<iCvk$F6=`yqR%<cv*XFb0A<
zA5aLQMcjq?`T4!#vQ}O<=%P_q`<2mQ!<fvG)cg1EYXC=FNl7VgO3A-Z2FwUPePU0m
zlI$W<g@a5VO7<VfnmR&N>jQk?($W%^@^82q1VSSVUZiDU9A_myMI;EewH~M{g!0_;
z@bFOUh3dIGecb^plYO(4c9zCQZ^M?G$G$R<^>GaYIQ|#U29xOqSm)z;LA#y(gM*t;
zvFF?UQURz;p@!ouHT4i|KwU^mM{WCL0Jpj^=Vj~rssY*$FU=F&f|btVcwu|hhcR_Q
z9SPkomx&KbCBVB!v%5Wjjapc&yq_b62Lo+r1R*1j;9Vd0T^}vGjF_}mn=YYdQ34?`
z6_ty^iL2q<k~iC@015xg7yR-0H8KT%G4Hy1eKmEvt~@M5CwE`l0eao_I^AyOTokEL
z3(IlhK;riC<8sPDX93HoMd^z}Pd1q_9X3f9mr8037~Y(zO&@UbrDZpbO%o#gAfmJ%
ze{J(GNSmTMm{|&|9y%)ktz9R&LWVcu;`1Uw@D&lDztE>^LPoD8&nINv+}!fo7Xf{O
z5>ySWI*}j}8K=?1&wXhpVq3|sRKhk!cMM2-OKYpt5N8{gjPU;aHAIj=87v@}L_}<I
z-{y^#wX)m=MOKvp%3wyz=pP@_a7cj<-P}K~SPhVu!=ta+F3!7=n*0gC9P}=Mivqc0
ze_q|jLQ4V5d1`@1YKPH!$irJmRW(8*=fnqY;5FnYy2DmJa~ua32`nsV3Al*@SO`Ta
zNA|oTl#U1wLqM<G@o%{RGIqWEu>Oy0>C1I1FPpMzV$K)t-Cwj3P9LcKt8U78&&~wd
zzCyJc4+<3k8RArtc$&rnYP12ffi@E_cC3J0LT}ip<BsqaL_UFhe{yq=4KG<0E5u%=
zVJmlCt=W*o-{0F({Qjv5Lpy<!*!|ugFF^F`gr<l^R}gsyP#J`FMjN1*2Aa;5Yu8T7
z^eNPcO3wyD?fsW52gBSxQtxgUlxdb}u3&L%KyQsEcrK-_F!z@!d%GDFJE+vUH)xlY
zoxSdO+iN^TaYqc!B4o?Tz`!sG=ISf!FWiAsg<wCZlv(!n_R2JJ6)7-&;>ieiZX}iq
ziHd5ze@T;TevX55p(3bx`jg;~F$;&hHZ^)U1D&hsg?RhdiLAy4md<e(D30}|Gs)rq
zwt;1Y6=J}0Ev4;_om)S@^p>$qh3V6$Q;2{ED;T6cCIM}KkpPGdJbfXT?X^iSf{o?p
z?(aYwOEF~8^E<#w$VwSXAOI^~jZLf#7yg9Q{BY3y=u&EGDmrhjG|PSy>Q<Kjtu$(b
z{Jqgugd$|-=TEwv+e%$8q1HjbDJl@b^Ze^8`_H&uchSfTDyUzbA#Q=}V>nerj$7HC
zk&*ESK$n+AL=u6hn=<y0P;+qE8Qv*8j+Fj-DD2rx?7k`)$I^k<3Sf9%5;RV)(BS>&
z&!6jMRif}3=8N#>X4V~hSF{K-ADrrsGpB9UymvbE>TMbVTDRRRU<IuQ*S76;`DAL^
zLNEolKSaHEs#;DruEhU2Sqx_a1i7Xd;{+*PUEMqhjwc?urY4=b-@h6EdP5-;#~Iar
z-^z*|P%hL%06L6a9cdFHPoho;P>ErKjO3_i{SvS!efKC?{1Z^G(C@?N9SsTZ>b2Fc
z3%u6AW?8ObdSS`;5*`j3H{ApMCe-+{C>1pPw#1F+H8rINevarNg0=x3B~MQKAT6Jz
z2SN{Zp^5RNIYE~Uns9{h<Iv&AfJ0^Lt!yH%x9*VBN0FVNG*ft1VHBxZVImjy9^Y)v
zreIk3rk2*5Cm<eX_dJ_JJ|cC&HmJ9?z_5L&#GDdz5e$rswileU-s>A0-T+0K3(u?8
zaiB(Q__amH{^cZGCc<{0>mWD6*~W$^{onx{R|vxlfXD-#yz9R?WpSaxLcBpdTH!b-
z@=`|UvNYEK#{UFlaB!O5ct!03Fb!fW2EZe)s;Vl=Bu-@V)@6e(=?PB^as?-*dpKXx
z8R4%~UANaETLcM8KA^}@hQphF8$6Z4qXCyk$z!Gi3~fXkWTd<!f0DOCI?;yF@(R%p
zh?Tc@C%s56UE+jlSv-*91`;rrB!z_w%<SQ3ujwnoczJoFp#K2pT_Tj=;PH(RJlK>y
zcGZIsH0OMuQoL_I>}A6nWxeEli(<)JR~~S`wg_f^*WERblb2z0vO)&4t#DyM#dGuE
z2%R%9Wp(`~CW#WqyWIneYepH@)Hq^JNn{QHKWLFc2j^h)gMAjfuD?&iVRhVsa@vQ0
zHm#;cnDJo#x3ZMf8PIr4%gmHIaXDT-o@Bd!)lDmJ<-}#bq?KjcxSsc&HU)hj+wLff
zNS1DfvjQ`=mffHw1ncH<R7{K%<R9v#7KVVEG16k7kFO699SJGvjV<z2Qwn%)5Gifo
z7SDLt>cJ^Q#c%xz`lv2A+8i0F#lX*dmyhv%GL`x1Q)p}~1)lz-m$s|~^a!Cj4bU{a
z=XM?X^scy**ZOI0`+WmnZU7*oxlG^ABwIlF9}*MO3eg93$8f8)#z~{v4TBq39~Nwc
z?z%!T4Z3B59<i|wK7{vNJ2SQ_ML)=QaESOl{hiU1NfYd2$rl%*e?I()1V0q^@(le4
zyiX}uWlrWq7au@tj8|hm@{CX%FSp%yuCK3hI4Pjz`0HYzxMG1#CG*pn$mVI3Y`5+a
zzUg|YagvWWYx-8rh4AyFM(qWseHtJw)?ig_`}q8Ol*?x>8OYXRVq#!Z+@*|2?_JW!
z)@)QTtgR3~TDT+M?CbuV<HY5xILXq>zDyDWdy+r#2<;MNXNhzZc_CEI%+3xMUf#D6
z8nEFeIh%SvzOH};exFv}rPA;5MMA~4n)KJMd0FzY%-Ub-r~3sB)U-p{>&ix%FXD(8
zDFR0x(BIo=Yp_i-rGQND%-FR>CF^hJ-WKCV7^(ry5?B#2MF*G1GArIbok+LV^3S9H
zqKHM9Ip{0)XvdqgOA1*_DEMZzWQ6AX<+P+cO-(Aia5R=$sE>2{(}An&Nc{?jt!?()
zYKA~C(XdlG=*tT^yCe*3En7{5l?~p|SMxUr&gm6dE~Aueo0bq^Ug%d_)cA?}HcEA0
zIzsS;l%gtDW(}48WvAg7qn`5J)EJxsY!%~G8Kv?ct}Ch2M33tjW3ZAEI2V+Dre2$)
zVLo|Nv2Q_uUohh>w||*PS8Nxhu~U4<zFNUHlXY6{t>%^ILg|Hr1Su9Dwr#9~C}fLj
zzF{c_GF?+5kLg(bQAO}p&3I+8IFm}rqidy9sq;Z+^;ku=Pa6wusL6WQ6-46@^M4f4
zCWd=Q!EU;2_q=z4xTvYehiC7<sJzD$ZMH0T9zBUI**32>r@&cVZIQ(>p?LCvGF5zX
z8F9>Z0#l0L`^+Hs?BhTw?kDUuFU?|&q<=1VX1MCb8YON>KbNT8;(eZByY)J@lO{2@
z#x3tpuU))6LpaZ%8(ZTo&w&Z@%bGVmBU)no*B|Jl_SppzWT{6+ga>iX@pH&13Y>c@
zIq(IC2qRVcD|uzXpmV9uPH0wwDgC7ZzPEo=Kt<vrq3V`bO+T)4mPlhT(ID5?1goUQ
zmz^$ox8`I|328Ym+yDd%aaI8)lN-T{vvqFzB1rl`=Wg?sVq#xZRsZ*G<>y-I8|qn+
zY6}}V?0Ct2JQ|13diQDwGgAvoJ|b-A>X+YK_ls&3iWM$Q@DFx(*Fh(6=>{f_Q>@=4
zN2ub>GmfCJ>5S*kDm>R0h#m{QXg3I;QVpyI^Fvg?0Y`Fa;n<=Ss5H=uw?E}}{O1Ts
ze(fH)q5lm!u6nBZ1AE{%T>AYh<ZX9>YKdB~&^km+^wgA;>$-sjDZwT7te&LQk1=Ai
zXVPt-N@mG&_i<(B<;_87pontyDObE{EL2_qfRN!K`5jQFVG{nctB?AsRa$vR4mYUI
zgXMNBDySG3=50h}#)w}mv-oxiLf8b*((B8cOVc|C%d4p;9GS)-WrwVd4eZz5g1Ka)
z#3yvDSv|Q!HV4!~xI&YK<J(PBesKp*U<;8ODjag?Ughs^+4aQU?YEO2kS)x;?&8GF
zSnATufAYGT6T|Se9U&S5I3@Lm3k>b+zL7sxda&Vr?+R-5BS}$UwmQPF?D8yM{DcsY
zcx>n8QD+J&wKP-f`R*&@FLpE$^&5(=mxI3yq3;i~O6<<R=Y<Ehl!cBUF#>uIj3tFR
z-%3#*o+L>W33HOmu9rjAJA_Bb1;;E@GmpiSA5{iM#K(VCR@H>M>J(}eEn|FaT7~RZ
z&?qmEHXD(`FO1is8WWfrGJP%3cLGTaIKah&X=GGXRHU^yjjOLk*bR6q8W7utn#6w<
z3rF;&l<1X6N^)PNu{*48FP!rR+f$^%md;l~_ZEtTAk$bN*-o!JGoA{dmbfxBZbf(w
zPqTTJqx)+DB~$BZj|}Pf!C$GM&jx*;%7PuJ=!WsD{wX72>=rULE}7)wVp5<Y<>r>`
z=vvb7($nKYRs>Q_Q1Qy`60x^-QpTQ&&DEdBi9XU+)i~8K)fX!p@7dXT1K?U{LoG|Y
z+t?IVpD~9UhjKhnoRNhE+LnZVVkRagIxy!8akYtRuz&?>(k1SO17gg=uZ<I(vjlMy
zw{G1ECg(I7+RK8N1$6=k&`N;au+(ItrZ()+R`I9f$zGM8_K9bgIqwKm%KEWL^f}@9
za^9jVy!N>qTSXwkWx?b<R)}qTgMXWT>qz1?@~ynSp3Ku8cQzv99vLcWY0&^5x!@N+
zaA+ZRD$2{(L7Z8>dlwRNGVi-k%VlQzK_Q^1p@HZ6P0c)(&aIX<xY_Ibve8^;{B20(
z%z$JCpgxDgiU#Uu12TjdAuv^ulAF7da%9&3Vt82L$zoi#8gA=(oHTuMEc#o-(H({(
zN}9dDwh;RR@QS6bG04myg@6?D$Iby10gkt6AQP&{g^`wf&?Nv6-tl%qSY*}4!4a9a
z?qFVaRu&nD!D&ojU|^1RZ34<jf8|yNontMl8^6?Mn(sxhz+u_w<ZgF^y2eM6Hp^nA
zN+NZD;T0hxo!d9jq?9S5$63S!<*%TNO9O~eDJ2g4rrVnxoX|E#xB*<$nJ09^J2hwM
zp9mxvs4Nj(ua3cbJ1-rX<?(Z72DDDhTnyrKe*v?>16VcC4wEr8%^aJwDc9I;ntuKI
zH5xD(9UTobvgebK5TrEr%d$-LZ!5GQoA*8+`e&{__gjPPHbaw`RS?rRuD}b|#N{77
z!m=GrgeeKcF%O{R4T26gP_mYHoU63}&LzVGWtisQ3w?q|ok!;KJH?7^<x0}7!Z{y5
zrk0dkoNrOz+S<y|s*XjV3A~Y}aPyO`4!vha3wJS+?p?~Nf#1YdvEKR?osU%fl;>%K
zBUusdvaq;WhJ7)vN=`XR3X`&f6N~GTw0OOhuZF3iIe@%jz9)Hk!hE789#T8;Agb0&
zwYy)U@4Txp1`ZZFDxqPt{Cmh-7xB*DJ+Enn<92s{u(lJfa=GFQEPW{W{(Oc;NIPJA
zP=bG|O{TCVJ@2zzEvytjS{@fE{ibl4q!v3Y%l$X+<LI2r)0vZ0WL8<*nAX<TA5bQi
zh=@>~M+_^}OM}KDIgc3`@OWzM7o8)bq8z^0SVv01)6xfy)3%+k<Jx6Ny7gh!9y)p)
zMjg@7wwYBak@nU<Zz%r}%qwi;>@?)4bT)sU+FqEJ{emh!u3hLHyL>0c_pfQ4<dv-6
z@EnXJlS+sHrA%_h!dIp-2pz%c(_0ies~P4mDoQfiUyrv7-65hTTztw^u^H!(GC{1k
zV5b$eTPw&fAx<xVNfH=%%k#2z-;DUT=X};i++tr%d_P6mdLY#t&AF!)wS&Ko%o_dm
zv1@e6>>Gq0#qE{y&h?eE+BoDnSfpid<N49P=I!QsKyNwN&u^U@I6l~vYNMTH;7?$g
z*7sr=&y3E7D}zZSuH7uP&q<m-aS-Ec6VqfMZ%4K;vlx5!;dOnHy4@}*6*lRCiD`*Z
z!CwV{*QK!<0LXH#an}0Bx}$(l2`pP7=Wu9Vzd=1JD0YV1o1exu0Yw5Ow@z#IxQJT=
z&$WR|1}d|6Jmz{JJ47Yo76;W28*Ewd!G;>PC~8ir>q^8Q5SAtaE~9`Q)9~={on~td
z#O{Oxr6Zc%3s5n)uT?Uti%$f0FjC^e$`7!bCvX=>Gd*5rsSC7gC^uyY*f=aR8w+;u
z@0!3*pot4ap8~k@`yMaJ*-wDsEdKh12yJjC0T?H*sTl~pa)>-21|$f1Ih*+im!RA-
z)W+z5gx;O^d1}Cli;9caL1vD+feoft8s;{)p7D<;rMDHWw8j1DfAo6ZpNTT=FGZG-
zf&7_7`l95*iuk_3@lPt!(F=$3dcR$0g*hM1iE>v=1(Q#Z|2$+Rbdl-*y|Q~FzrW^8
zXWPd%TEDF83I09bVtLk@v{bMC=`$^IRCVf#Q585y=-kPelyoKk@J)n95~!mI2^+eH
zLUAqD49e|AO4DXkJ3CtZq;mqo?kVccHJr<>R6>cloCdu;cRH;EBIP>g<T#?N9{6-t
zD22>djAwpR6l#tcj5ACJSYXTo_(vGotU0mdN3ba*o;_oRLb7hwdidJ$hvXn7Ma2dr
zssS{O<t`1t#E?ERz!vb};I-1OZM5IAf$=6A7%&PA4Yg9x{0@-V;hHG0jS$!aG|qXl
z%wqtP;MuOqgA|pPmR6$pYPyTW^TR@hn68wS#g6#m3j<wD3~9dv5K(tq+1Q+Su6Ek^
z2!rNe0CPGU+}r^BeW<_?SYc}?qBQNv&s}7>(<fjgNDUR)8DhGSurN{RRzoB92T)k9
z2@1XgD$+?ypv9o15zuV-aq+}1{fgVqocnM>Dgq_1lV;dOe*H^L4%OnqXL-|==I*-x
zt6oYb;E&2kfA}$s6*ij{7^_|U^5qMtfzWfr#7o;7>pR;S(hc6}EN-6Gg_LBwgJJGd
zKMm(eemCt2!J+v6-p7m7b7r3PHAeSRyu`4VzO<$s?zzre=qNh4`vj$0IxG!cDAXXd
z4Va<jeo6w>%l5`6_yGD-G=MTgaso(?V0ZqD1AB!0r#+1M4C1d30WXmVrZJptivYT@
z`v>^8ii!%^zUeLJ)|M7NMvrOhazMf62dh~hkZo%@WMlt^ojL5{1Rko$TH_)lNN75X
zJ<kDWyx~eZ!GE~`7~#DcVjhc8NnPC!)uq!3LN1Pzp12vAnYutcgnRBA8fpQ~(dRHZ
z;ntSWYy1$+Aa}q+b9Zo%LuF?*)p!OVC{bD2^N6!ecsc}n5wL*ZKaAj4z~=+(df;C0
zim;>;XYp?__KM6P_S>hyR+z8SXPScoQ3HnRwwroT>wFzVPB!7scc(90GC*ra)Qi@(
zHi{XNzl8PQa^m7R2tt5Axtd)tiiUm$L2Zy-0Z#5weO=jD36ok~9S^i8GLOwJk%^Vx
zH2>+GpsNOwiw<C3n%{~m7hgqrXg%e`^AhzrzYZ*iChWE+>gq9o-Y1I(`k%?K2FeF8
zTrad2ApI0T`XDW3LrmQOaxc?Z#C?T{AQ*~+<~moTAt0xGK-S5C3?4q?J@kcO{Q+mV
z4cbLy@S>q-H4Ayr!#KWBpapbyci#c31@3t&j6mdf>A4rWuO#oU8&>>nJ^S7@ilu{i
zui>`$|8Vu);au<U|4OM-sH8#&sVG_5BO?iAQz(%cDVc?kD0>&l%7}~zNhu^G3E3os
z%1-tQzsK{O&-b5S*LD6lSLp5adXD>kto!G}%Id@LEjXrXx)V>NJ^*>cTe@HZR>`wz
z%6j9*joxt*lP-Jw6`(&BSk!t}#!^O`3;K8mABL~r<-6rIGGcxEA??WDzqj0sHV<_K
zrW~t}Ihds3SK+%&8)`EE$6quj+MME(lQ|$Mg;ExXy9QNwoCUCVF#X>lH59X=Qu6RP
zj^EZ|tppV7vM`q7PrUxW6*7M*2}y_7Ik^N?6?9suPoHiE-1yV-^y$-#5{_Ibfl&Uq
zWe7hgA}~8{@fax=!Q8h~TL6X~0z~=0GT`~83UpK6&N)6}ELHP~{|*AhLIJlG9$t=H
zM8fct-kp8&c;l#SwV+MVqNo&7tvO<%K{V85E916v#a}ZkGSkxTq41%0o&t%U`U;Qh
zO#)?J?MaBCtR7*xt|Jc~o8j}au<HKpn^9y~+p4ib;Qr)9e2c{(=IAvGNye}1|5l)o
z8m+{6r*|SfKR>^D?!ojU--=5_Ac9A~jiefoD~(9#)2zPkElCu!d*8$NL4Qz$ni6$9
zv{^tm362Nujc87x=^@%UP&ZVBGlM!1mGLn&M%a2EQG=M<a<WG#wL4K)$o>`t;<n%L
zYX11&*GGnn=iGWlD#uqAmF_JF&E5Z1e=%1gFMe$_YxPHDs_pf-mX!2puY}S1DdoQw
zkE9K!6WRuUKTEjS+g>&=60af=Wkva<?Zx<MQ}NDNha=XGKS+1FW(yoEPRx!YBk_nr
z{_N~)*WxHu$_(GRUlYk|RIXRmp8WnDx*+j9bKt3K=%d$3R!sQ375vo8B_ooah+jgY
zgfvNO2(3N=G6(w?LgI(h&;^jkfhCEm`XS?bwR(wY2g>=ICC{2GZF7z;<rG~_t)J39
zH&F-e(#J4%YDPhyuU`#_Egc<N74ceJ`8~aetyZ|nm10io3ivIoS}5CrJ|3$V^M=RW
z2bT0aNQEI<V??D6HzR2ku!0~X6~oaq{{74im!}1?aX<F67pGe>LGAe+vUWjA0s(14
z)p+Vr<r?ep^X(#lm9)!`5^+Jtj^Fi%Co+_TttC=QQ>F_WFVi>YKfnI=6lF7TuoDXF
z2AVa+t<BUegYPSZM4h9FS(r#}n)v&spuixQRg32E>}q|z?-K8b)5zBwQM(I8@~!DJ
z9fs^*kIhR+YN($)@mcU__PSr?zo%4+gW~devi5JQcHcI5!=Y3W29&ZJtxD(K_g$}6
zidLW2gMNaa8U)YuZ4`1RGG%JBouaqJ>#RC><V;)YT2!T<cDJ;YkUU*6_-wKrP$KAK
zNYP+9DyV+)ByR9&8H=%Dx7%d3c046(Xm!H6rtVn>`>4+04)K=Op*{usg#Aj1_Ixbj
zk{8(L#4|r#9&CtjEsJ>I8@cPtSO#nI$et+k&iO)#Z6kvK6HRZ6gwJCI?J8;QE*uJ)
zHlT3c^sM0bAWvwdIR7X8L0z3yEuBYkp4u*_SDqD&USXgDtV_x>!X&}5pcuq(soj)t
zSn$hK;1J~E<E!SU#;bFI2OB`gA@sbV)ut0PQ*GyM?EvCOK6rqg<MvtFmVpY_;w@}2
zoVXfj6J<?J0cA7iHD@+u^`waOIJMd7l+-WIJGSbxvFkUaEaWqoOt@JtpN`jlkf?L9
zs4LX)MQ)ta_;s<MG%Hfh!8Z$&?q?W-U*S-nbS*(KKZV?1l7j&0D+nb(EfB-LN4SC+
zpCvOjRYpz@h=GJl0LlR}5_z4YVR~}%9US>^%6!CM)r7I182j6-&uSl4ga=paT$LE8
z)7EMH?B?*4p~4b09>9Nq_Uh%C-2E4IdLSlQv_D+w3_?ulE+Io6<`R3<Oi7~>wk5N>
z(W#<pCi%?>8IW^v5hq#3Uluo~=_Hq)J9i}^^HV}*TxP(fN+JJ63)am-tmeakwUfh4
z&@1-6^P(h~n9zORgL|ZUgO;P=gl_d!qlalOO*i}}K5FOMQOW91&Z2w5#^ttHxiovc
zP%&@G1t1OpZO&g##X00Qz;*_=v;I4(U=Zyv-ke0AOcZ=T3P{e&rS}_9NR3gMYDrEA
zJx|t5PV;Y^&U(|L>-)w*dPrx`hO^p%YKzWAf200SqqF&z4QRNHKF?|m8ZZ3m!xJW%
zFED2jKNj%&Uxn=#Q*ck>6d;ctXKKuG_u}~67U{<GBR!63dB^G%OABl$JIXv|a?4gF
zAnk;C*$e0W9~6@OFnR&Rsau{XYk)kSgh+7n@*dClt5v-!B^<?5+mD*%68g6L4<3Za
z#&(TA`dM#ma3kgHo0O^Kg7RmT`%X6tNwJ2qq)ybHd7#8;a^vQcCZ(S}8iPvi=W6r8
z2L7|gtn2)AFraH(?Ty0&UL6g}$4Jf~R2MEm-Zx`{Kq7C#YSHKv4~*b8#O6~Flxo3}
z4WYTLmZVsuc*^OYKkE}V73Ywk2Q;|l2>npPy-$2saEGI%hPlQC#dF5fr^X{K8Sy>K
zBl?j~U)XjR?<(p-PqLqnZ{o$Pef?eQOB%7`kzc0>bb%}l5aMr_MzT<&Az7jt`r(_P
zX=6mLGGV6#Wh@^!Vd*U$$MULv>AAyp4T!h~r}OYWQ0Sx1XSi36Vvh8~*#-3`=x<37
z0_r;KwKJ&QCd$^1lO;){@wh7~i01{m&Wj$4<{aUhQ8`!w^31;QIs(2Y{JO#B`p&o2
zyVr0|qlzG&5Ck|}?cKZY)}M7qfJjSCtz1<@kSkg9@Qn*%6+sYDgD<K>IY7!u@{bWm
z4sMph-sqf`*Y@?LcB_(_#hF(!=TbfyjR=o;XfD?9Q$s6CbQi}H`F=52_lAJVt71N7
zsJ`nd`Z3e)Vy82Yg#01HgyOpv%^V3AfFlt27CD9p9fL9kT-{ty7;YA#1TXjAM06~;
zw4ic8KoQQU!3obb$vqdYd$C;TLB|RiiTU4!Nszq6=Du9Yf?|(ovfnt5_vhF07DV4T
z9c^r2OQjl5`?E-iWker#2pnD@tNtxvw1TaL#7>!I1+)+>0~L@MQqaL&5>N%yH!<`R
znjEEYjF1%_wwiRtogFe$hjkrWzuap${0govaM#XnMd;W~^o@rTpJka{eo^%Dx#ME&
z^}m~mzYo#@^V%o@U}Z=jV3=mCZ;bWtr~Xpu>1$dYf)_-K>*b8MMzD~zjg1)qHfXV*
zG@JI`90<nJarp6Z2XTlI_Fc_rUS@4@%?w-hEhsrabwFdVNQsb##9DyfqTP4=GmLyb
z4TGleW&ux=JX&M+C^M;ht=@X@a`{4U`Uwh!Ztr@<*lAlJ&R8j!)OYPhn#8HLyYlR4
z1YCZ<9=wVF)Ff6>@GYc!E*%n59YBL)hAJrzR4i1keu&~|?6D+ZAQ4m%80+Fi+FNbI
zPnw!yC;Dx;oKFfztium8{q&Ml?hv%Y&&B|qHu19|B?a`Q!v{u7B5fhMYKU98czC3S
z<HRMoWYmGD({E`cq9pgMPX`7IQe{$v+$6W?w>tq;mo~^NDyME$(p;pBXpV{Csh^Jf
zyd$XI!R(Pmx*T^nL;WCWZE$D~fQRo#gbZ<4B7cX*u$`P~#Bc*h9AIMTAJwyiQvf;j
z%RHoyCx)Uw?O=K9-_&|+%X7+qKPP13mmht=dq;QE4@+JuS{5<|1lxfh-bOcP`@fSr
z_16ple&YK<xB?$h%hJ~zVeA3~I6jJvOoCy^qXWZDx(bZ+A@P^cH&o(R?m)STl%RBU
zis(bCKVMK%Oiyt=UVXy=b?krML9vYb?1ZbJC7}OEtu<YniY|i^+y`rBiX@dBO1OCk
z>6vTqM`xQbZ&6co2RuJNh#DMLq4)Ac!<*o_>azQ&fdWo6l!3s+iGN5%MP*Dv1<wgT
z83ZP}G9)#`!JtJ>3`oY;Yru~Ibsu&R2Qf;d8Tw8x-kK9P!fpjMZwh4rELQcuzwtsR
zwji6WT_Zj!@AUrAiE~b>v&%3lqU;%)d*Gmmg6=1(JLcoJ9>RoD)I8uhu(#KE=t~mu
zX3Hmq8RiBlrht{fm30$VRDvTz8BTgujC~=dJmNS*b&u8wGMPKXNrWE_yX}}GIq~2o
z0$*|gCm!0Rw{NAuLjy!c_W_G67B_r`Q-7x$P_z({q1dejemDxAk5&QM>c_=40Ap94
zsMAC(PhNS8c4y+}Tb#!e<J_%zqq9t{AmY6bvz=aM<Mv*E{5UczZJeeXTLD95@{U}-
zm>cc6((ZWxm0hi7XNF$mZ!4cz090SE<W}YL2p&83&)o_IA-_&t4x4a&z0cz^3zkx@
zrw%!Q48Zjy=~33(^G#!T^zwYqkPX#lM;#B6h>eEs_3PKpPxNX4v*nteBfmM7_pL$6
z-r+eXplo1#OVLJ|>PJbRCc#vUIM0KEsDaU1CGJ5>x8p)hw$yp1hZa7UIVHjteBC`H
zM>UkI-gd|2NJQ)GAHtoCX#<C^KkgB|_D#$KF+EP5`e+{Tj}8^r!)&RBWr9gl;H>aJ
zAq|5S5anb31JsYQP)MEmvY+votdOcH&Yy2c1ezO?EPP9h!mn25y5M*CAfyVF!6G@L
z!V(ihtsW6EAGx#i^e%^f8snj%ES+<84Hg4h4%hNUVfq2dq?UWR?nycNw=CWM3Q~Wu
zQzcuRGL>&p<>#Tnv2e%aDHFkH87=e2uJ<zX<v9}yZb6VhoIMAbg8QA{!20wPDR&3I
zAQP%VSD546QSpaC^*=!ag7!a+%7`fOwAQm3)@ijk#S^<UmOJ<sxZ6ks(8DxkfFh65
zCP@aZnVA_yUIYK20G<AtR_u=S>B~#?PeWoMiz4nuH~sRP;KmSki@i~cnUyC1eSpS;
z;E<%tq6fl?K$&!{Y{eO_XH$usNO8*4f+duRuMHbuIykgaf<g*~c0uic^`m>}T1!e;
zKrdnFBn%qIQ7b?o2^Iv;@t~yS-U@z`k#FEXh|q8D@!_&wbzF>CHELNoqn{IrLiuA|
z-2nz_*rN%%Rt1m)hYhsLiO{dTk*cq$NkSzA7VV7(qQZa`euqJRKZGHWr2S81c&**4
zNxVU#OUl|pE#L86`&;A<y+T+OiVlDM(lf(3_1T~1(4G^P4*TcN;w^#kS4d5OK7}MO
ztsDE9gAStn{oSo3N?y!)?4dN}(L;v@Q*VuDJ8W9NZn-p8+D!xwmKW0zaOHy9Ks^y3
z7srfc4(Tl+WSyUz0f8dXQ=s7_ZRaij=_=nYQ&4iOrv7yQbd~--EEN{7u>bAF`tawI
z58#jc_Rz_`)zP!?_+>jgyXcf;Z97BN2W<&V>s(x1-lfw_EBQRlxnvbGVY0qhPE(V8
zDSa1DD}`f-><y1=$&z(%9<5v7!9r;m?4jA4`^GeD#|h~YUO4?UG`2^oqre!Ouwl68
zeo^PRvc-Oep^lUT)^x&~+B!yo>Z`5W{@|2^WG@smdU~7Vr1l@eDx7s44|$UF)FCM+
z57=_M)Ujic5U)Or(O8LLPy5}SP+wB}(*)i0p`TDy?Q#fpm0~4ap5xN3<8SsSWeHw=
zyx2BV-XrbC*1Rdv*#pZE8o20)l+jk#-zZZ`T;2athpqM;GCZo=p&ORWS0EEN?8w{R
z+9j)MS&$L8CcUda+D-GA99=x^pty?fj>O^2f|8dy?x(vqp=hF*6BJJ$c|BtP8bk~{
zHV3Y53;P{q<=A#2rSYlz#g>9ottZ}OwjXe%@@p6@X!ek>i{dssdMNE;dct64PI`T=
z>+ekCjDm}gIo<xrUa$<Q{h9py`SaSrVkq-Q<qYlW^)`+b!wHnJ(rL(I)ar?=#iVKY
z$LE0&25bmmD{gHzw_IC{IV|Hpj5})^7+9yD+-TX-Fnr-&*2Lp__56`O={k{mSQur>
z3P!uNZ|fE2wj@{^J}}ID8``iFsOwxQzhTL_*6w&0L8jmjEmf@(@$**b!$)U7{7AxT
zlJpzCAR-!GyXD|%TZ`bV5_KP?4VKGIKkc3V-gv(A0JH>A1zM3qpUg6y=p;{t&-bbN
zsM36n6!qKYTQL;1QTkScS<vnvTk6zEvjz&U5NX-ZPyNkj%I@^&rk|16D63W2H#*ia
z`m87DmrB~YiJvKQub5dq#srd!x7G@cKRJRkRSG8xUF`P_M9J{PTw<rpmLi$`qs3cv
zvcg^ctsNyTMQ22ed)wM9Y<V-+^FjrR8ww@wN1xOz(YRxxx?fv8ce><C&NC^e+w*mc
zNop*+g+vzezWI7zT)()Bl5+=L$tI;61qsbD$GC2Rwx3`4ifEGKEJhF1YersBsnw1~
zaAg$p*x8DYZX6Mh&Zb@XO-1WJ6T7ZSSGq;~I;;`?wue;y6WLe?ai%yNj|q-n)fm>;
z)paqHp6ZP1sGpr!kBOqp-e;ZN;c<gvjV#*kE4&$;o$*#_tpbz^d*3}>%2C|*MElfZ
zrq}T*iwZ}VAJ?1OHav0QT*_GG^*?+>`0?|{6t&}3OOAXo3ZBu>9;1-neJq7jQnKr!
z>;9PjQ+;!m2aRu(ch`UVvMOa6cWhtly}Kpiiek*aP^cxpe%(nM_cX>nU65jZoTK`c
zM<0O4i393{o(LU-)w^~{kN>Dk#h=xy38u3NW*7DwwgVN2=j9XDsGo=rvedctI^NI0
zl;H!np^B39k;ui*4pF`}-wq{b=`&GEr!6Jk*X%w$7;~#$)=JPaJj*nF;CsGyl}p2u
z!-Gy^yz#?ypLYzGi-(<*F8EJ&E9<0^QFyBB>({a2e9Eh48K$^9&`(D6^tGM0mN0<}
z7(_@kjmp-o!OzpYrVE)K{z@OMKEL~sdDW_P$XBt3fk{p?8mGcjr}!Cy7*}FPbR<-p
zIk#`_(0;p9jj}nRq;ak8)0^{A`&*BEF3uSMIDPZa<C1H(nLl<u2*J7QR`TaJh#!bD
z&|*N>rOisA*>2G8geLV;u1k+4)6j0S<rTdI>TFK;P;?R`-5k*V)wOGI1Q64D5UX@?
zL-lCw<<pjj?jPIS)ZQ@4WBg4&-IoN`lt1j;UJ~!re_=JV2w@uSko<ETIZ}I3L~f(J
z+5x8!9VVTxq0Hj%LGGf_#_7#_X&qFm+V%%Khj9di!N&(pcQ<=;^v`cCLB;rA^r7<6
zGfHQB&Ne)S=<sX7KXi*h_HCjD1_mEtEmc}M(%G=OFyZ-k!4BFoz+XL<9E~WF;R=#v
z`!Y&fUV7n>CF@mN+X*|D*3vIRb)`8#2j7j((tcv|*ZCP2+}YFwJD_r{^tQP2CbNz9
z$nwF31``j2<|~X5(Gp^{Em(P|nzWX_%~-tCk(dbFuNCUxBK~0QKh6);EXiY2RQ#}J
zIZ>A~@btIOczWjiwTw4~27%_a@88(ePVt)B*la3zB>jPseO>AH_jFVi_qNeJT#V>*
zuQ*fxj?+Q#Gdp|iKW4XOH#2OO?0HV}GB0?(TQA>c{QQKWZtuSS-OuW*mAkc0=M2Uj
zf9aZ0tm_oryFxDaVVC<my?AP0ym$dJvyQ0H?LT{Ip`QoBZ`qL0>M-(QzwDLU9Lg+L
ztWFDLRJgvjl-JA7%1U|hLZ^XIYCFJxJ<R{bV|JAr?7O4EktppJg`q9`tp*$K=ZYv%
z4mC^fl4qu++MeEAgH*Z&)V@xXmYFj<!vk^vx}$3l#P}}<514flAvL-VUZ|<L*_ZQy
zH~&amOAEiluQPZZOHOgl=QDy78)kW^0$Q!j(YuJn*XPz}Tq@{XdN!j$e;3f>6=?>!
z?Bus)u>-$3WQ&hg`swhzr!WzW=`U8xQw9U@F`s$9V0)oF&+fq0*d&H?$t&?RY*toQ
z9X2A}V#hU1w2f(eU2I?d8=_!Q#{?Z~Yp$?ib`}~bD6)OhvQ^HG$l=#O=oR!3>P>B2
zd2MK3kSSB?^U<i>n4f##@u3sb#0q}-GL}fhg9oiL18#g!g=*^QDPiy|t?zzr;lG^9
z=>K-1dLoN6^gR9wWt5YX6M|qkh2@RKILy*epdhE{QU0(<!`-yP!M0EZ4rRAeTweHx
z3xH&ID_Lf{u<Te~hRGSe-kl~oYP$yyTWrf1pts<+_X`$loo6`l?I0!oc;|v3A?^`k
zUjoR3xPg?dQ7i6p%Ra=k4FH|khGQTi-fm<iiMx&TTcDb*&yDH3@9Y{Cwi>r%W0^9l
z`dd`giwa6DUZM&B6iG^y7hxrN4&d=%c~3-8DD6oN?ZqctTfp8BH445gAMvW|dIJ+Q
zTIZP9C?)TGWnVdMlNq8@ZIZ{2ux8xMVva+^(NnzMfy;<vi44po@eA;dhet*_)L$We
zZe~_aKYq(w0KC9!(P~n8H~xZs1J)(zOfI<md{DAmb$mBX7&|>JjNa%@TpiVtcid$$
z*JlkYN>3|1^3}WC_*-u%%dr*4Bk`ejIeF<;;~P(&ND5mGI48hS-x&RZ@6lW#-9cvB
zo&E}6rQa4S7ao#|e0KIqw2tM$$bSktzuE48`07+%uG#k3^|c>!6QxM|s;|!o=pZ#O
zFHhaU=|6oEpo?Rm{Sh?Kb=7l>k3&VQam%T!RuWu7+#p(H$;3%G1rUtI{^utj1y(eg
z9N|jucUj_@L82CqL#nBx<7pXRP$*7VW$lqY^z<aX)_&@TvRdMCRW5p!Atq>YojTP&
z=K7Py8!(O?gvMxB>3UZCR$;e^JMR7nn2;UPK!#7g@ZFhbFPA^!fFh2>iSDGP@>OU%
zGaJoNO@I#oC_p-&IIf#an7bZtuA+ME2a|UtyP5e(O*}}&4Srr;UUHd_Jsx<jj%4L3
z>gtkgQ4+CXY#yU=pIAU(0eI3rBjSCRQ<*F=ToOn-)m<@g>#DStE#a|K7h5RotJq+a
zAXYw?wz=)%hIA!Pc~@5nlH!ErbLL~A-m|j^mpKkUzc|ba0i8B#i($^}vOpYm^XR>U
zhTxe`n%vMCA!ZJSElFYn(*_eE8e9r&+lTCJ@18rAxwQ_&Yd=W?$Vg8hVr!PSZN6Lb
z{Ou*;$t+a6P=74MDa_)SdegfSg002*#92|@PbB@x6B4C^;NC{hDq)S@FSo5<CUbA0
z^-VcPsbQo^e;2J2HvY`<NAdC96RS+LzBnw{krvl($`v5bw*5=>n^x=bg8mE5Lcpt|
z-LkD7C9GF{oOu|aMzOpWz0L3OTaVI)2hRWHO>jn@9@q$URIVdgQ1amh`+zi+8PnSQ
zpsw*W1Q3BWto_*!q0xt7$K=ez#0Ws^>kUD_hs%krDRB&xCItvpP(*f<jmRN054w4!
z)OUhGY&Gnt?MV+q-1VTfc4f6@raO+(oYb80>p_shk-ljpB4ct-Mz5yrApGmMfqW+t
zI}Yss+dgu2FbdeZNwE^U8B|K)VFMzfq63Z+JFhuXQ+Y#Y!>p@|hPHM8gS&6Tl~T@H
zaw?kuD?8sA?(%<HfTx)xrAq<3(V{?{{YrMn9zgcQc>$M^WUA$v4I;6@&;roG$p>9!
zNR^4QOUzCF9S-xv*+XnKJQKl62Y8YrnQ2dHQLljxB5#cd`f#`&a#Umb{WMdBi{1oo
zFM>KKe|i+gCG@OYziMJW(c!nyNC^WcUEU@-d;Kd{vMx~w@bmL43Bv|NlB~J;_$(1+
zLpi4;5?gmBK`I7Xx>TQ$Z5s+YKa;FxP^g{A6T@4vyZYz-?VsNbG}bY0E?*cn?@0eF
zf82LZM4A=w4Xi?0SW@2SP=4LDiKIqII*%Cx%U;?2-6z7F|9(wAk2azQaGXyq-J?=q
zyD=vADNXf&DMf}SK`EeHiLOJ68a5zj?mgx8jKG3rmOC5t0P>})#WPI}LDC6E&KQ}p
zq&!kDD<LN`kiqjWZIa+tP(C`wirg3+BCe|Kt$QQ(eTcqr+oHfb)1=S<8FHte%D=Y}
zL3ry;8<F#yDE*ZDThF2QZ%$Vy?$dT5z2V_ur1xmAosTldUD~gfxN$tg4s*sk4iw%=
zD@>pIgcmIO0<Q@xawNmlO#dKR<Vun_xRCTS^S(ZUv(NdB+e&<mnF*-6;z>DrxoA#h
z(XX2+{pu|CA3aK=t2_Hbu>V4T!i=bHctzi1xJ<m)apFqHX&|4Z@qUB_VHqdO&I(LC
z$!{_hJBDN}R*!{C#Qo;K^U%zTYbBRNVo~jxO!ikPYuaEx5QqnxZ%NuWE)am|G;CPU
zk?0Q^XlJpaxuqo_!#h5%Zv$KMFpX+UY@H&2M(o@Qsr%vKN$qP&X*(*f6%qObUJ8J#
zTY>#ws(z`*1ISb0lA3IHpZ#ls5XE3j9Ry|I^f|{bRM3Eg2ZaXzEAvR&Oe|n5y!zX%
z)~z&bc!tUXYD7v&LPRh>?hvXcM#Z(<=XyK@IR+y~<D~6+GRho!Cq$MF#-*zs+{qh#
zg!@Wt`J!({pD|8atbn1$f+n!($p<8a+TXo<NAVb2AB8AD_{*OKN>GI1gin6;>Q%*Z
zx$$+`DoWE`G8qZyj?r&P;!>H|!4~0@dRQzcj{4M*a$08ZKcjp*4NK`VqWNtiYwrJH
z&X8wNZ;?K$`v4Y?miOVwwtCOne%#b{Iq+xw+3(bU`m?StI{C{q@SnVylKNgTr}t!K
z8K~@`gduO?hM618SKhqja35v*lX%s0fMNTL_fb3XrBh7r9dgqxk2JY4#%&r~L(JA~
z4z~4Yxb7zAdRh6aCPk|Fy_V>i)oXde7;>>YkJB}{Nk}!f-SoOgW3-OnAf<_sNZgKL
z2GhYxue61+;G{5NZN~7n=CCqq>i+64N=yt445@4&F(I0oMsnH+x7?Dqk_#E|8pWd@
z5y3hIZyc(T=WvHX)H_gowv2PK;scd^_KWoL!#;bATw~nNJDZLiSdXqQ7BTt=M~dQ?
zD|!8z5n>P9K`-Va=&7MR^Fvi}@IY>%Y-%pqS-`l}Z$1hMIYg<d$^Y`mIJq+6wM&G|
zVw82_#9i>ezkZPsgMwGuvX9%QI*)&?f{Pr-D5H>R2&ZytdU^pYQY3rkPo0sD*QL}z
z;K9YBS6y9~#CHYsU4Iog_&hTcUsiopR>En7o)`{`gPH8am<dP>gWY%g*Gt!TvEZ7)
z9+BQ<maLu6xp#-bw(*3nd-MGtNl+J&r-0eu%9+*scmWtRxn3blKJZqx@4DW^6?evJ
z_axjaMk#bLA;>@qg>?TA&x)=fkUa>fAk;m2RP~%*;GGKpuz5uZ2qVaI0DW{o=#KfT
zfytXB<_2Cj#+LzqVLyC+ty@AmUszf?9`_T@?<o3#depLniU9e=-6wW`3o2zCXz!jx
zU$j=Z!^GeS<s1ZkPb#FYFvwElk5&-CJrBfV|G}$_h>ex}UI!DdIpl7HmH&n;NbX(s
z-4HkZ8`+Il3Xq~-O{VE^jZ*$IX-}Hl|Hxmh1J`${t^O*7xXTnLu?-SB5%e<0(W6)2
zwX;u^Y`YK3S`cW4_4-ku<n?%>QnxGxBtJ)<A#v0q0leo@(tU>C#G;5eB<9E(pY~Cf
zrXjI##X(e&BteCgxX67xnlqz>Z$zz6w0^+H?OtDacqUD*^I?G9TaOjjoU&C{VnL8%
zRc~sDJv{T|q00>iZ6j$rST+HJ8V1qH3p@-D7cida>9~_w(Hg7y&9b7|Wg*3ii9|=@
z8Kj}b0r2*!4bwMJhL81>#1gG5aZaKm^m%#I`R}2WT$tp7Nqk>1bA+F;O13b>{BZ2|
zAU5sVcZeKRT?V}PS1Wrg`xdVSO*??OXG}mU8^8=j0N{yDydh?w0ML~X73x@5Q7AY=
z@r>E|h?6)<kXRaB`{dNK!26N@7)C(Cqo#Y;R(g@Z78w;KViT<BuMoirwuPv@L4`{F
zc}GdWji<5Z#$j51C3pXKrOKT-2>vt_Zq2*y%X6A%&)%#3*|hb|+#X$B_(;A(B7kGD
z>^lskL&CXB#*p<8y+-F-mt*k46|Wag;24rqe*d=n+v+A+dV7acnpzivvaO^Qnfy57
zQ2u8dzS#}C1gy}BA8SWMiBb$Yi=!X`@IenrXY13S<mKV{fylNWB!WohZ{N0^7xz|u
zzXCVNdxC|dTNCDL5Mz}H^#|#K+0Tz0LQbeQFDwj%x(q*w<~n+`^?{kU{su!$da%KS
ze+Q`j3mQ~Wt*n{bTv`%gy<OvT*RI^gdh(g=>v+UmvBNIW_sTL&X&QPGA}EB+6(H%1
zQ1Yb0?aPmV5|O_^5&+`(|2{tL+UynfdDRI#Qfdu5$AgP788^tRNukm=hx_Coap(LY
ztKZDo5?3pT8jsA>Aq*-iki4#OyPEkyN_h{MF@E5^L;4yG!i)CJeLar5Ijz<wbz4(Y
z-|Of;P>mJ&*Yu3jV`xXNqKx&#smx44+@(Y!0L@~9;f-$du|8^Q*Go5x^%!xq2Fdd5
z-U_v#G01wrCA7Kiv0*gco16LI^@XYsDp_{9qpS=>RRP4y1neTNsLG=)Doa^z6K@<?
z<bIqcx+}1@Zy|tz-)1&z#UF6db4a0a9Fs~0J?(ISo#x|?VMl@+K*Io@TTQFM<v2Qx
zMeu%*Hl{<}m)<P?<HNQgv4iOO$ZS<{r;!$MPaq$4i2bPs^+>Q_>q6R*dpbWl8eq6V
zsY><=&@DO1wZ(g!%GjFMjB5TY2=Oym!`~`hpCC=>`dQpROZ)P92Cj>t$D$?y{TTe=
zBcCL9dG;L+Wuo;09s~Jh<R4*`2u>T3y;xR|gvgauQ|%8$%T|EG0-x&on{MwBw(t?9
z2%k=ygD9(EY@ftGq^Yfatfg+`Sic1J0BT(BDH~ASL{19&3o;?<K-VbmyJ58`R}eb%
z3)~{ov5zHjZ>6DD<oI6)6=0mFM^us^#%E?64PPnKqo6=`22%D}L5Y6yVJRv@xr>BT
zbu<~h_b*5IF#6Mq+@di<`&kooD}-LpXrDX>J`{8;|1qJ?YrGtC;4C2o2<aL}*FfuP
zy%7Zr)HsApMC=qpi$fwG`3g9u5LtDtD8<S^<;hwYr+;=f5404@h_^HS5Hvjb2IvDb
z^pL3kOhjUR&#dFE6$B(qw5rgoAn4&<Dhn29<Bq^IEAC5`$M}_mV6B1u(p0wa!kDup
zJ~A=5!p}Fw+Jnm+cWp#?xL5bJ_LAbA!JNwEE`@^&F7^nmLsj5fg@VyrVIMj!_GlQ~
zI2pl-ivT!M_w(#fx+@gQQ9Q)P0cc|OQLy6I*EvcNsqI`6kO?+kc2v?HWnxH`_%MV*
zpI>R6&w2r{!6LD<*C!Q#^*ryi@u}$ZVJ#~2lWGHLs($yeu<S|eHKU*&-6++EM=j3S
zahcvdWRh%sgPJ~E=3HtF<CsWIeo&o7%InuI8Vzp8*_%B5EaR;AiYrH47h~oMXv*3Y
zlg#BK&yMz_X|yFSutOyMy%=l8=98xeRSR)CGGw5KN4<SEtFU)ER*7ZLmb>>(-oKw_
z#qw*`?3pHNXGaO3Paob&%kL0x4=&y}_@VFm<4o&MHQtlkkL=+*y&I3inBT8?xFto6
zrnuUofF`!^!#iUx0ctiX{NeUdN@T0Gf4_B0y7j7|U5R^n%c6u3e+=iDeD2?y!*r%q
zc8Wjo<#=+&Gr>!4ZF|G3H}kHagn8{(4!$uoabTQyHFSClr$CR6uBzUeT9us?oxiGA
z>tZM4^u2N=g=~LH{m`m!LUF$iHTIYtW-^_qxn3+v<(^Wu%+M&K(7WBqNioyp#;cHZ
zleZ!n*KrIfuMP#*PxTppY&zk^zI87{W$6Dd2muMVR?YyOy7I2}(U!}iw;fA&>qiO1
zs(v(>kiVHPy09W=!*(zEUE(DQMk$5TA04WCT=eiL&Mka9x_c`Qk&N*O&WwiF-c^N_
z$2^d}ff8q)S8am{b9(<<kqAFkr$)vUD!+22Cx&T5Wy~VX?YiNQ1Y)K$9|9>h{n6Y$
zxxZF0AbI=d4S+3{1b`t5#MbTSHMuNi)L@+Y?oRGtYU#`YfgAoF^Z-x9zru(AN`kRl
z{49=1$N%ql(50V~&du^U?5$u)>GzLzoUm-Av|8bEc+khho7)rqQq`Q|YOVaxZYDU1
zD}D;>I=n2!HlJ~c2JYb_ksXvO)pY7_mcGsxAN%F#3%qjRLohi&i3ApxD>wVao5^)l
zHR<V0Y#&xc8T>cj?U8L8NHS>H6rf>q@U-}mHd*g*b)%&+PGxi%c@XuJ`V*Z}m^ou^
z`)Tm?C<53eCEY4-{@GSE^t`@O@t(YWirjMkp(sv&XbrJ-$k8hXs|r+QP}~qh0PLzb
zpI9~P_i`+mrd|m3k`ok2-9{indQL{F?FF525h}lL(m!Ph<NQp=R@Fk$SgIxRQ1zqr
z#NGfM-8kAO>o|8!R4b)A^zIBmVRH};%oE5x@g2;yVgk!a3bDE1Ql^b}rzA0t4`o*}
z7k0+Vgo4dq?ba~yrlIU1zXQA4=_$Px!ZuTWyw1F@CMxvasJp<>-uGd<LvQgZ(-CzV
zk<a7EMlJvRaw7+)azE5wL|Ft;=XEGBAnb1Z`7m!dzu(34F{f6i)P<jWE`2=navi70
z;iEl{S^uX(LnS{5rB%hSV-pl`P$PXJ0!&~Mj4~d#Lai*TTugg+a$ne~oV*kszWV7M
zn+G-JzhGAYyGWwHn%9GXi>rA_b?$NxIKxafXpU!b4B1^Y%{;8O?7&Co)T8re&;A{K
zy3Da5yl3;I;QK**WsiB}0j(7O&0*KYvKwgfc}6FE<&!*$j*1+;AT_OK)I?Xg`%8$Z
zH7BbU9tR!S-h+GnmAr(R+gt0e=;Vjk^5^dmJSla>2P+2olBCHbvu1uks)QVW%5iS_
zI*VgOS%&Z=@a;y?zCKz8sdf1V&)&5QYO`&Z)z!rv1Tb8QGdZ!@Oz={n0i-7^J$t4C
zaDr7_T5-$F92taIA@#|Vq)cny-nj>T%ymtM8<g^y9s3^$V<VEn3n@aE07VdB6{;8Z
zT4;!nX8w-U>{E!>)Yj4QK?I=v*uQ;C9;dSFK7>*%&6@@WSAY6mQuFI<mac6er!rhm
zmx{Wu3t2q6&QYi2d&&J}X!0d+;+7<<ui{(lX?=8b?dJ^{4_}HpU}oYYjk_+W#zrw@
zKriDk&z8{FNh)6EKI(@Of#z4fdSUQPj5*{Q4I7HZw`kl!LIGtBtg~qD?M1>7pfb!h
zI+$%((zqx0ZO7|F;UB7Xgv~;PeBS3v0_U5cQ9L|ox<P5|Z2}LK!G#{<o7-)l?NN?E
zjyQ@SInnFL#!yi&w<zkuNHIo|n<NekAG|^o-=R8D)I>~8Ku3w2C<PgthN~Oux_qQ;
zL0yH%jed!YEP|Z~HU7u^&k)mgLT-q{7n}W`%hA<Dt4DOI7?=on3%TYH5sDBwN~g^)
zC@Qf_&<#99QYorMRC^w?gMDv3oO8~_@OaxA?U#kQ$~T#d7|t$S$liA%%A9ySVeNsw
zegGA0wT(!D=b8txU?I{1jR|RHNw_gE6gYcPB5gtm_-=4IJRw01-{vq_-#Q@&(=gFb
zf>S^_X<XDb>v#=i!~0>!eC#?DI=JAGpiXSKKznPzJJ?CY69{d%Iep%(gfZkwuvfW;
z00#{l^tA}hL!N>%jM32z?#K6G8c})Qb^Lw}0A3-{K*&i%Oibsv0ETl~UAsoEKQ5Kr
zsz^@d_v6kxf!J?Yf3u{A_bSrw0EK{uL+yb&bY?A?hMHQDzNUjMEi?JK%$-;sWux>m
zx(WALkGDJ6GDhOX>8r-HW-kUQR89xa(n&_!$=a;q6b5WYqUNFWIWKB(5(;f}O3>7x
z1|FS%+n9K(CAp-8kzdltG<m5=ET{2CWT?O?hB#Ztgsf$i%ug3e1ip+SE04n7W4o;k
zW~Il0(n0|Vmwr(DXshS8aX}6IG%{N%ob#W+DtXAV0Rs>U1W|KngwPMYGZSpYkS`Km
zMNS^*_0U=pnw$(?BUve!2(&4(Mge>+G*$<}x)d`50Ylc<TjWWTi2!+eWNG3YgcAn^
z*h9Eq0+RngC_F{1PUtvpqgb00r!#xiZnJ%!XYRpghuOAhGyM+}^Dtj2LHxkF$j-bq
zsU&x0v}`qt_uT0S1tiXsU{e5n$Z#{<m!5f;6pBcE6UI`7^b|p^RnU~3BYHrhwd%fh
z2puDW1&FT^X?IT=h&*$=t0*!yHY?pB3psDlxdR}D8{-~wL2$r+N9q*G$Nhy{ar~S@
zIC~-^LuG@X&}}Isdy{}$(qEGN9z*JFMB9tlv~M2E<5HMZ_7}PIKfwe1N|#yTCJo*j
z=3Hd6{yXCk`M{e<Vl=Ub!3<jR5?SFr-=SY8Q1QRR`7pu_5w=5#`=A6x^F-2RjxT(t
z1dbu{Cs351K|%h~-Ie8f4ec(97f0R+(Fnf1_^WQO<cr{A{0wQ-54KCjiJDXdC;WE8
z0Ioyo2`w~R9;NkN7lWV))#;eC+ny(nIF*5aNV6#9e~o%Nafr#Z+0xk9nEw{Th;Rj4
zA&P3O5v*O*!g@TrOF}DGyTp&iwpv%2<URi{$=|nmxYD9v5>F@pN;@wRWnt=IXhk3K
zn!H6uFquYyEejdXQ%Ms?=Z}6)ZQBQnI&GlLAe^KC^qo)@l6#HFHz6lQN0)Qr%>esZ
z0bBm-oXV)C`llDs#dM52uN<4aK!36^g~ueZpvoi(q1n)Yqr(owYXS)J7uIMCj3*?B
z5=5CO%s<xF4xsnEgi9R<4VyBZT6)20pX{*ClLjJ}Mc<)SaX_Fh;q0I>1BdbhtvZ=S
z`55K~2wtR@YTmtLn+#H-R1#yR`ggjlcG{Uf44S;?+fbDXbtHex8DPL0Z<F9Tq;8*|
zm)cI;6!3SF8M7pO8>d<w;?r<rz~YeaJZ?aa&8tP-iXI-N3GKGH^hnbtdYdE=fq2?L
zW+#zQd=j&>+P81j3#ZM&<(CW$oN>hx`(*yE9sgyUJ3%RRd&7npA>rs7y`?ePJSUzX
zL*Fn&)7QE{{%Lf6?ghKI+$9Qb@LRuyt>v4AOix_Jn<Xzd8=DYpMt2S6*I98;q(D%H
z?1KStXJ=C{e7*e~bN9NhlmV&(ZAT&*u}MxE7%)9bBc2Dm-3FWhr=U%P5zpP)2*OkN
z;*2poOpKW}PVEqO9qGD2#56-F&VDtz{0s6c{_(94i&sP6{-sA9(>y-mz;)XHD9sAU
z`-$9RASf|p!WdE0Tmg47Gs&p~aUE!QVkgYZjEIb^K(q(R?jy!eG`%H?X_X!b!NHkI
z49)mr`jeO789+Jp2kYoM9v&pB!5(JDVG4?=>o}dKJ%PB9DFT4{zKqYC>098*6XvB4
z3PVcbM?^i+QikvdTngLB9Ylt1U?9kn$zf$Whz6b{C9QkYU60B2r>v~Fzcgm9p%k6M
zl=*0qwvQ(jfjl*0g~K3Fa;_0<#qCT9@TN~@mQU=ub@%)!YM-C9T`LXSv`_D2EuUvO
zI6zimCy)FB!xP;Kq+-2)ih&OS%9*w=Dwpw_xzF(c6*vqL-|@uU3O9=a#@IRw`r{&`
zM;yAtrNFY~t|Q+H3S$^`6mat$jP%F3iaQ;HE?hTxgT-j}#5<^jupgF+FSEi(7%em)
z@)MZ-zI_Wy=5n{+?XMcc8Ddq`1<J(~<r!MSibc+i+np@m_dQQgQE3(;>y@~0AXr7!
z4IFa{xu0)OSnT$#@Jh3Knu#imIM)FyfZI-yxuK~f4b*mN$#uP#Y+|CDP4I!nT<A2z
z&0|nnr9}C9$we?WN`_;DqRv>TQTKR{sTt4CyO`plM9qPhzEH7m^&QT*hoZI#*MzUZ
zZweS_lQ9Snlz8im_Uh~Cwmz%N6Mt@csDjJ^Ajm1n`6C<#X+}uQ1celarMhki@G8Gv
zF}M{4B$XaF_oT-8_InNcKG-}6rxE`GqJr%j9xnDhVf$Hr{G+QYN#7yWZgGW6<a>p5
zXrY`5tXX7$b8&l4jagG*6{h7~dm2klNaHYhpk*L@&t?(N9q#9|S-Y@4fbU(^(LI10
z2FVQhr!yP$)LoP#W|*GImoo=Gt^3mXrnF4(!S&Q&1&({w9$VOgwA9igfiM5{C|eC8
zQ(th7<#pPSU^6^wXP1}p`-PEb1`?>&Qa%SAx2v7gq{myqn>Pe;1P&K|c<$v@{*E!6
zYP&!Nxq=BwB!xtv3(lS9@KqF_pEw4>3(W>VWdJyl_^hA{Da72Dgh+~jY+198{<E`W
zC<uv`TOIFRg+%BbaXBL#vmO!T-3?h-ga*l)!NLt!2a`O%mSgaUiaa~m&y}6)jOdHb
zfB51{!`x@i>8C67VfUu7kl*(s;zOfVW-<&T6x=q>gj^If7%MAb=4+ieY1Ae1rF6%#
zq<49eL&%lO>Wj#ndt;M!cZ@?>E4t3vX0qaUhzl>}$r~$axhrf5s}f%t+$3*ghWs=`
z!48n>fOZ1rx+1PXl0nu-u>75|*iOlF30La<XX*CUS2nM%GQ6WnSzMH}ylbxxeiiN@
zRJCMv0`MWtbIch~kxcqg;1XCkOeJfN<b|_3zhNos4Nc}M9!urgf>!Neodv#B@@D7O
zlg>7(e!stVyFAmS-w0)kqT)T)Cwp_pc0sP~RURSJAA?-*14oYr19&1JJ;VxJhYzo3
zLjV~r4?<g}DBh^v=?->1p6E!zIRTr%)p_Mg%HzirVE4qBf3w8~nr`uqUAQ_{==i$`
zv=H)|yRS`5e-)rx3im2E7ECtH2HE!-1GCnVsS?O(xEVd+_m3%JZOCMgzl#l?q}pB@
z#QkmtPU4%(U_9Yld#`dIIWn*hR|o|Lj(V(nSN6BIY)JeF4fVaf9-a@^Tz*j$b!#e|
zi2VGl*EP~GvDN-^uRhbr`-h*W+L^4pkMEZ%H8sl21ajt770tDUi=$0}WnE-V%lL%0
zuIxz0vq9I4M^q7C`^54y47s_gZA?t2OXg-&gqNITN=NpZ?A+aT?)zvd%0IWU^BFh!
z6gdWq=kq+POX*LRo8%c68o*NsgZ$9mEMR8%6UM2$_Ka(Ui}Yn9o`I^-Lwmd9v-m0%
z(`VXbHkwTDSq>2HudKH+2?;#varoD(4mX!lVgf_G2)k4BXwQMzn}am^QK;u%UEsaI
z8=Vj+*-9-;MODJS^y;R+khID1(QCoG9IoZJx0c@AJ3sqsvVOFmNivsD`kCk#yOm?V
ze8N@Lr_S7%3T_^TRC?}7Z^Zv;0bUdO0ofO*LvY{YhDzqb^$7@fW{JB0Ri3>%9A)Fq
zgsOI)=y~S02=xQ}3Gm3{#xtDfpzlNL(5QVGt{<Fu5*NI~GGMKDGA{xSg6<!#Bb36=
zyT+H8*_S*9%aTU*{2m0~+uPq+RUcXNXrlkYF>`$jylzMmF!~E|1da2R$$T5V<yIa_
z*Yr|=LId9J3v@D>D}Xnh6?ZSV3;+rQsnouO_66Y~j*Bl!OA~-^&<+<oE}?ziJJG(Z
z%eI~H{S;_Qa6wCs4NBI|$VPiOmwtLrMK$?Fzl|Y$2iqN-24v*@+Dh44H13)Eq+$Mr
z>e{JDlMTVX7s$+Lgqjef6t*}#6f)R@0tqA2x4#0q)I;M*26Q6^;5)`En4nCclt@F9
zwS5b>P|=2(kJqO<LM^7p&-V_@mvvXIEq`1OOB5X~tJn$m6VO`O!voDY@{3l|=P~Dd
z<GZr^mFdtipqTb27643dK~qOMJ_wDzxsE;~9W0nH7;A}AzId0Swra<2_R;qhif`L%
zL;WZ+GjA_s?xg+wt$coU=Ag{05qp*ND3V4^yys*hICSp5UrcsTC0egf=@_bdDa?An
z3;isDPNY!;VBW>G@^R;B^rysV2(<;^ATm|i1T_hH6vaDK*+e$oljnH7nJ=${i#E^S
z$9ZD$ve4J70ry_K5W>;B0LjixNT0~yJ=heG%<vGXlk|afPGxdFqb-dg!`rZ~r%2o-
z2s=_!g9xg^B^X*|atW~)fC=0OxRwm1R%C;^@xw>O=z1TR$B>c>H{DNB6lUnQ{*cki
z=)LVQO%I)F4Z=cX;Y1?L<L<z_qyr%XMv!C699x&>W}CnV4;GY*H^I{380*L6^>@RL
z2WR6rHYyLgP)fMg29&YT`sQ%axiphQBp`XnAsOquPPCT<f5X$X32|H~$nWvg5-$bm
z*Ga=ef*YVP*t1}8?X*zdrx#{HNAsd8o1OKe$ZSK>C>C`)?<Kwoj0+kBVbf1s8ZdmE
zGjiXhtNR+QBR2JtyTGrz;lUlXl?BI7rDzf_8w$05<BwqJg1PbT@4IVA#WF7Fe1)6_
z35EkFjdtCA+I>xwBo@JJiK$F~w~^l+*Z`S3O@>7`tBPu3q8*axVZV;7sYLmfr^kZ^
zB!pDHc=R|&N=1P-;Z7jAARx!k7@!}93`ewdp?@p6H!)of*D`5yV8|!&z>$CmFfv)(
z{>Nq{ae~5_LZt{6>GOf;3wN4_ncKKSD@*n?pzXR5>uU3O^K0>hQoy$)3IbpMi*TeQ
zO&3(hW2k@OA|cH-a1acAN#l(Ua8dg_k8uO~Q8Dm*kQ5I>9fHgLpM#O~4n(boEug8p
z%eO)fGE71^6RKYCx$!yx8ze>si7B+p>bklRb$<6;oA)I4P!K=luh5bb^aN5(2$cG9
zw_w6l8JSH(@H280lALgizrnRcM&Wh7xxt6e-4>X(*Q$e>x)_sn!j%+6jX&i}<<qd;
zAp;_jWjh58m_3&L+~u2kn2AkcTec#CP%&<(%E%mblBP_|uTN><FC(sE2$CpnlcEk*
z>pa*r*Riy}W9#G!xYxVyz|p!p4SRn5jK1+_Y4528&r-?AtQ;O-i$Wu#=Mk`^WpB?M
zbhKD@ZQLa_QiZFVJLLfBBJII2LzG(%-#bomBD)0h2lJyMCRD+!2*p{!c{zbyyH>yO
zKL5aEfu{86(Ls>}@wcox!WPp!Cr)c0YdqnbkrjU$1T+S}!1LDB(vo^4#<U_t+irmU
zE92ggiTULQ{mTs&1puY$v4uPEY{=*%=Qr_@ht#!<j6&gJrt`o4{oOkE$hPi0tvuTx
z1qqpfj<vwN-pC|-^cy(AMq<Yx(t&WToXCiR<I=21?CR2t70Aiiyz4VVnuu=0bHd)n
z;)7ohl6BgIhm#yq4VB-Twbz7V&Y-g$Re1wm`$^YCH|#$+;~|kH$pe6;P~$^!gzp1H
zXQrP9DG5|nNU@hx$ct0QQa212)AK9sjH$nhTe+dRN4&TimDY|cI&;e_r2Yq7`X9ap
zZV&tfULkSsLu`%$NW$i2q<|>s%1L|`BzFWI#5M7cbSc9qz7TFS*6LiITJm`ieuO3x
z6&EOIh(PNAFcOKeN#1q5Dcq3ACr8guaYFw_0$Y>9;>zkZnRX!>KvU>Nf%XuKBAN)1
zt6wXTneoIk7jOX3(vjhhck_$k(Z4K|d87xF3+dQ#Y+r&5U<s_+CNdWasyo0Lc*Qsf
z$$S<hY)v(t=36(;C`slYka<(=)&QP~&<)E2hqM?4b_X#kV~9B<S+jE_;R_87^c5Kh
zh=UM5^diV#dc0{_w%gs;)H~hvr8dWC2K_V>@AyX_QYWMtG^I@Hh;8Y#5rGPbq&))d
z_>27A)-rNr1nfFO!t#N%u*8KcB+}gz_N^!<De4%8=>p3p`ET7rjQ1frRdBs{i=Qoy
z0eE1Cq?Tm#L2Ly13&tEbFP|Kf2B{C1K_Hnr(9yB^9l$!Gh@M?V<97zCM)*<j*g;l4
z&CE>B%iEFIa0*gxqWpdKtj-UDL$OWxu_W;a!3~JFVHCD*`6x;h0?y7N^GNU{Aq48j
z7ddZqJDn@)J<82}2@1-8fk5D4u9MT~iXrb4e(X$&6LQ&5e8a)=7rxpbMi_8U)&-s@
zrNm1wIg9{Vu<43M&vW({-^0&&riFk1^r<sQvd2C+xB6h4%&KG*uK-63U9d*|jop8M
zAwW3OkFy@BPVCBbWGoOMf`o(+fKFsoo=EJoQ$Ia2Pu`|r*>G7o0_EWVJgB20Ro-b<
z3iNlg$`)16oZX|*yDG=<K|Qft{p`2fUBi_&B1G**tmBYbCTTqNzRtB_L}$Nq#er4u
zPNIqKbBy@Gwe!19CPAKzfl@xC^@K?eAxvnzp-y2usR{oE#oxrWY0u{d&$@SeG5=0l
zsQ4#IXXlEht}YJb7(}?ClRF`#!xNB<VMw(|FIOcK7umLx;F5})dm65z69vBR4?bN-
z*BmpyKF5k~1Hy$G1SldE&_GAOrJ<~K3N}M)SUSVQoDxP?up!7W1VG>f8<9TToPG8P
zJ`?WZD1;TR$G4+<9TY>ZBu}lY3{<0bZrt+$;*%ExYvdz1v5K7ExPPJEYN8r2crmbr
zCXc(G+C07dI%nHZG?FusHU16{5FY`)GK|c<wTS|{P52aV{++DG)y%@o3mSGo4C9D@
zp_PYGH!>n3!e_5+<xLr|+a#Up+6%vvy1L-h$;*N~nnZiyH$?MICXc~8-qaoo#r4L2
z+38}N*Z^8CD*uyQ=JSXP!p-NELp2Rg0G1&cc!J14h-wJ$$H#{`SCGU(nTYKS2oEI9
zge19)nklPt1n>v|Ga*_td+#LD+VOqq&7E=|9^oyZLIS)$N+pe3AHuHy2tFC8M|OlE
zXo?53+DODBHaGb1YenZn6&`T<pA6hBaHWmM(Bn0*ZZhqZHI!5gC~Kce@>9?E59d*B
zS8a}<TiM~3gUYnWWBKZxa^vWa+B>|+=-*>-0i#j{Px7y60ZJ{L60nZ<T?a`4`CRc^
zDMWZ+B-cN9NYL3;T+GAWpOsS_fFljkdB5799MaM(x$Rl$O<Q~`Nap$8Yp=X)Zw>54
zqLj3B)X)`qTDs|9cq*?Dgpq+nzF|k$b^*}4$`qp@nweG5q{rlw&lo}gYvFsyNHOYz
zp#NRtZJwq?UsFs+P1T8b5g5V1t)eHX-$v=jQ%jx)(Q*A9Iwb@5*z4*E8G<uF`bN^%
z`fx&RGRAq@eK?^XgDHZE3JiDMRAk~`A0o;Ioauo<1k>1=;hR`*KJKj+;a57?`1Dho
z`Vp6r8tHeu?>-ciy{3<?t3o3hu5Bbj<FxY>r#xeKyk%I|`0udkX0U`qi^`bXRnOQR
z7Qo8E;luPrI}Q5O8?pNhnuQiVW$878Y1^1BBH5|0uVlpi@Fc72j#Y-!htuBCH3*B0
z-@Ew%T=l4kdkX*5DUKZoI`NRk224oE@!a%fS@FhR-teMtY>SQ|6kg5~3Q1z4ssi*~
zxp#f@7!LIpnRvrjQMD^xEj{6&vL-#gxhHCsNwzW%k{k41uH7N^?T7DQ%)2^7E1-t@
zduCoi&35wjpM<~?UrH@DrN<T(rF?Ubzp^~e#3IEy#C6zxQTwp5NbDP7u>+AEE)r1*
zQUaz%HEGk|T3^fNc*z;<aJZ5k$M%CyV9?J+h$+}4Pj5y^R6s4FFvoyr_uGrMTGpd}
zqk-{VZV}R0FBpATyR_?#A|^*mUaFLsr?#xhmHUh19XJM~q_a(tU`a9;=&BJ#fh48*
zn^H>)bemli*^cjFU5-~ieR`Bl-Eq}@%npJ;U%#X5GF7`@?so4Pn1sXanWv1iCkTU?
zLj4PG#EPKywy<!18WV2499GKq!NF!rwi&zi3s`~RP;nKy?HH1YpV!PnWm>j(`LoH-
zpTC*I@RXe27*3>1&!%*^Xxvhtx!W;xO|!WHPNJx&wga=mFKTZB>y(`zJ#8s2uK&TA
z(z`FD#tcCVBUJT>rU<vC*a5*QsnsX@eD@KeXnvstQ=J8{VaQZw%0HM}8;ZCe3)w-B
zrQvj!wUznTBbiwwyAQMh!9&RHxM|<5Ff^!uuVo%P08QWc52uiz!_=$UD1pzhl1#DP
zpGrK}XnR*Y7<(5!q$XYK^HaF#ccPzWcLvwx8)vg$cV9znZ4+x*?z*eto2@xJCdE|l
zU*`<Up7RK<5xh;Uvk+95aU?_(2su;<_Vv9G?5yLaR|uE&{;_kfMWV{gq4i|$r=H$N
zP?v6h6Vj|8=1Qd>4LK_I&FFp1pHD_>0oH;k8Ha%f63ZA4jT@TpsC|AnF@0F=+M^)%
zx%^Q>!>)Br=YvB_UkJQpuzHK~jf7xhKvE7p{F!(S0O);Qg_;_FVH7wwe<SWdEd}r!
zoB_(`Pxw?X<NQP_(C<k%lWP;))_I32-5KjY75~1!NM*D$i;I_=Timi+o4sPLa32l1
z(r~x&GqhZ}<LA1c()?BXd;bSpBN$KeD$@a_!r=`*?d!k_9aWId*r|T~K&Jh9v*vVi
z$xEqT<w{_4aT;3`#K4+`>DlGsRusclCOpvR;q}R+uLJS#fY&rS%ucukj2<oP%6Ax4
zBkFkyG8nTvG7mQd8DmN&!H_`}XiNdHw@*yt)p_S;9^kil5m{EY#?jP#H2c+9Xxy`)
zKSLGK8euZeTdq@e?>tY**|=UcliSdXH$WJspS$wXYqAlWYn$3Pep$!)g+BPQPF3ox
zM{u`G-!>lvr(n1%08+^4^Ll$a-%kt9dCa8UR<8V&QIMV`H|1By9j{W!B-=l}sqwN-
zgKuwFUZk8+K(=$>hJ!R)rluQt&Mtfk)M$CGM2~V7ZG6EA1^?;+cHrPaK|wz8hx995
zU7gb;bJhF0na(?8h$@Wm^bIreK{tn^5_MtnTK%}QX;IgvRtk>fJv3EAAKBW@7nMz)
z&cA;rp9ey)$mF3w)hnP$TubIwXJ+*dCHCQHR#u%q_H9QaT{0idJ?hvqxXAJ2K48j*
z&e`2+oEZnVemFhIF;pXT+i!6w&h3QO8$JvJ8l{JXANhrs=55|Jexah;Pv6H+H7;!7
zUH<)}(CwD?qCuZ+RNjWWpThrgpIn4QuLyqb#^SR&3o_AjsTWyugHq8T@6Vpi#{>%A
z_uoh_PR2)5kn!7uRQ3FIU5rc-b{R_YUBIkrTi)U!j%#lx6;C`CRN@w*dB5X9e4=RH
zT-}P^)!hbN$+r_(63=N~h!dO8^LAD^z6RCZm$(G^FSNcD-M<`Ix8U-GXRWL^ch9kq
zt;!skDz|ner=~pR*z@dbkBi&i#rBP}3P-)Xh#(J9t01e5;ihyKx&1Le2u=s-pPLFE
z#}DmI(ohm@+RMqR909xy4EWhz{<Mg4<tTGN?7IV>w6~~pw30#cP;7QmU5qkEIPby8
zjNSn~t@59aj}2_LM;5b%LjsZqRe4E595jG~gLrMr<`l!Wy`cEeJV~3I7}d8MCe{7E
z&-#A!+>a+w=HfXH{!%5NZ$hh6(rJfjK#`+}#;M3Bb+lgc;s4Z*C_zL;=i`QZsI~Tm
z4;ao2l!k~Vz1VrTY-jhr$fTj2R}ej`t<}z@snc_<=#cPLwdA2&x#!U)`Bnf*IAJkD
zrJ1gxtBa4kJJ7u2@ZzqM2q8f0ENFP@7+ZQEgU@Gkk=RY{B`O<;b1nf*7~siU#}gBJ
zti?Efh2`f9me49mQT@6Zdh`cxY+Y27A{(W55}U|@cwiPWRAD3nheUa}uFKRzeMwR)
zp<01q^_u7+76h7Uc4e*H#(gNEh_DH#8*kO;f`WoYM1Ju77u0VXEx`d*z<==@%IfN(
zLHnq<eXBSeE}ay>smt!MI;~E`VCO96#;6Ut>c{V1`XUMn7`gedPLP;<K>z4ObmZ^b
z^)mqS!(c8u&=?1wwo(#6oLE*yTDYZKjiuT%*9FWxSf{||m6pk+vi#x4h5kg1g>x{r
zB3T?!Y0{5JZ#$dU$kJ1x;euB%4?=tNoU>B--d<4oym)aP{dOtT5ZgY9H=O#99X5mO
zYRQ6Emzs^EN*1jv95t{i{)p?{2bPks$Dr;}yDQJ%+IPmJ>n*%Iyn=dfFW=^@F)E30
zpuurSd=A;yP6;s`*uHsY_+|vB9nfDAqK~|H)cOGJDp!^f)CULNRGeh`pY1BSdX!31
z<J5QMp|an5QWH`)3nXya6{#wWZ$8kZ!PSKp4kmO;&`=w<?&f^UUs(SA-x<XSYq<v&
z0wn-eCwhIf=vo&qCVY7$&0(&QJopW0Bx!X4Dq=);Y{DLyaHR+ZPP#rwY|NHshTR`|
zQ|;~R>(QqrLL`7jX6EL0(7x|g3{%4wLU03(Z65?5^t*Sb#Z{a#+7V*v@2H|pf0wW9
zZv>g|6R7m1zA`ON<*d#*P=};Xg~9C06@HpQ56+p8yxB0S007`3A_#p$)E#&<r+3w0
znz%Rnp6jDHSUZKo2uOZ+ZI0xa^Rl?~W7UI^7jHhrau^Xf^`gdXr1f@_VK^V9w~op0
zI1Wr9vu*myB+nIh^z2T6CNjcta`OP`5m1U`cEc)QpSS<@s!?~GP}8Kxn<nrakv*Yl
zQq>6D^a1@0Ij<H6Jy$JDlyq;Mls=G5clmOjZPMW_jZf|C>Gj^927XQj!ygd3&ZLLm
zG%n@o>AkqTgG?#{jt&P5DA91oidRqw-a1ZbQ_PM=!=kL!aO1csz82^kzV-(MC68ad
zx~Y+1XX_{R8)jc%D)tT1^E1|G$S`FLsUAR~^9w{EWY~@xvfC76_lIu=!iV37SO8xA
ze98N9JM~7*T{J?%84W3X+)1wTso$Xxwd2->iNmAJD0dpT4ElF>mBjs0Qj%^)&gm@y
z&$hBP$ye=k;xhY%Kd-tD)RU-O-wM~Yy<2PzfUDp&l^Z{w=i8eqwa3tL$MXDy<$rn%
z+Vp0|`EQT8w!m$JMbB?p&kU$E`is)0OBCZ{dZHTW$#Db_DG;;N%`n{6CPiI4MGghL
zPJ&eg@%`ju-|wLmh3pghGJr6+r`l4AQ4D*@8h0oPQ&XXmCUh1N)Zoa5@*8zy&+x4h
z3oN}x<SrA%1cvHqh-NiZrqQupKKe2N%{H;T0IDU}6SUSrT)w&6=X)pS9T0R4gjmhR
zW$}5?K|y|{-I<w9ZEaBVG-?ZmC=o@ZTtEMLvk$@3v`)wBw+gAEU?EcpfC!N^Lcmug
zyJN;_1p16Hi7(rr@bD9#6m)=-WQrUay++iVc1*I5<*v=Iww;hZqk$YJ)<mBtdnlDK
zPY<vQa`Tp<)RJUx2T66t!D0G#Q=mDl%GE44{>(J1I{*OW*<JgXLH0j+?XQW|-xKx2
zQpWt(0Vus52$5VMrzYY9#kd+?iB>*DAxktyEKN=cffyz{3EXwMtM~tr_TKSWzV9FK
z#cfr#${r!o5G8x&mLe;J?3E;Y-*(E#N{PspnJs%3MHJZ~BYVr<Jjd1d`}_T#Kc4@d
z`}LWXdtKLgp2zV%-s315Ab+^Ch^%d><RHc1DoQ#4nE2r_`Fp>Mvex>{aRV=ME&wm1
zxTJ)pDi&hg(L+zD>~S0_vZ+7w2v9_rK0pP6v$g>I2OJ8&niK)ucu(_}j;1ColWI4^
z$8j07!DmSPJ%pM1)Ki*hmS9c_OaL&pkQ@e~4D_05>FIJRep9P%MEWB1Kt!GhQzfET
zi2I`mzy#Xy|6E?REg2Y3{2=Ey1m_IutTnO8bw2<OcbI^^1n(WYbW=DIAt})U)Z~B8
zCxX|KX@taA2pXYw_-zLHf~cohTt;L}Os4#){X=gUm!G?ufC0rT09&ww|AA?B-+m-N
zxAzst?3`9mOSPed#3?TT@8P`0TUsdp@la7gKNS$wHHdor3nj@499d*Jxv{Mz%~9Yz
z1y7^{@_z;44SKrw)KHp%hn&Ro#EzNk|6#0WgEq(n0#on46AS}D8-{^IPU6+WaFdAu
ze5z8>JSGkEW%P~&({jnf4Rzoj;f^CYsiGUoj>%97?q@<8?v&6ZKc=)D>fJF@iGuaD
zH5dckel-6aOqt-qgCEpfsrTo~3m}<^lxHyp8VV@ovW8n;a{{aIap^T+tN}k%EZK#z
zNd<=65fWQ;5Z5&YQDwN=q;}GW%n%{ziR$!coa|shqLU1mb-<MRO_USQ@G$1iD|Cke
zrVBVzKSH>X*Q{JB@j;JE(|Aq-aQXD<(`Ok}P#K|hu)iMwB;|(hG~s7jTTOtP0GRUj
z11oqNFiZ-DqAKhbU|A|NE|q$~t#Yd5X3w+%PSf(AWjkRYpj5G3f$<+?sn%Zf1A;G9
z1#f>$OS(74{dmY*F@_Hpa6}8%Kk6>NPZ!+quw*XM1Ny+mdnx8WBjKD}-G;FQoG*be
z%$&Pb+fLHR_%Y%H|Huq%Jf~yZCRR}s4QN^Koi+e42PNOZl;An^8=M;CsOA<pc);^L
zzxehWfGntsASfLIPs^o#jcA^rKYQbDz)3K@0OB-E$xT5k3n#u4-`(bL##wD~FHVko
zxzHyjfs?6G*1K+7SUoxwcIVE95Gr+pUnO`tzi-I{8{9Eenh(r9z`+?iwG5$AehVs0
zNV>beWk*CbJO=PLoNNGdLN5-G{XdE9j|8!P?FOKLQY$Meh9s(wyftlC0?7(ARj_Zm
zs4|^@rCrdY@i&ob<kF!{N7qeZ2L`4F<FN@L<8Np5>!u;m)@|ZNp)r+xcTCS&j$rT%
z5Mbz4AA();o0&qqR+<+6RU^V2nf>mz+gihK-<ndWH_<KDtgf6$>WGP|X-$AK6^gcs
zH)Ta~S6_>`U}qg%Pf~Si?dLq|bFr4&>$@=~HFWfqpN$DoJWeYoC2eQi^1-`qAx%Yz
z#})Otqrc(f4tsOJshi{>@{&|Mzf8<Yp<0bLha&(`L~Xh+dsVmS>73*N(xfC|Yb%x{
zGSgW$q6~fW<AzeBNl_Csq2xWv_3w5}wxESb+HQ(J2ccH^=7nZI{wU`xt&ox4qnG8d
zL<u(RK{sdF$7LtK_x=mB2I9aIK~VR(DC1?8mTGp?k1%t+<~jdDkjP&Ks75g^Rm008
zgu{;rQjf@OOK<ehSlR>>JTupch7D>0MvE(gmxo|50~Uoq5c_Ran=t}5Zhh+uV3=_J
zuBz5)Ih7CNcEVMY2mJzJ09vs}1|8{%jjD8JHfi5nzx+R3fc;dG133Fo!z3u^H8r6c
zJq>^km~&n}XDu`vq&j^n{cZEaoPDSu9Cnu7TqsRfUtd4)LI#$d^b{xx0MLy9j#dS!
z5u`9Bb!2*og<K#2b_uXKy9H_tRz#X*fEPkN%;nA9b`5yX=tvIaQ4sQrY=o6@-a)1N
z5UmA#IO1hO=J3xXa3s`rxKvA|e0fU~d^;gM=ORosHa0d=Z0;om!S+{H=0Z<H)JW4w
z`$doAg=>hr3J@XDq6OF@!?(iRYYjlBeH+%^=TsS#^o8Ss%E7qvagkM2(C?=O-PLzq
zJp&mJ$H2d-)9o0KXElSs?E6!}e($p8Ok!ftn`YC3YB&O}8ZpJx;1a>+5cqWFFmHkJ
zO>JTd7(l@AP$t?OjLF?#?B+~Zm3kqRRVoqgcQizXcQXUm__%bwIAk`a@k?^D<%65e
zt42jY{zmn15?|8$7tX_xI@K1#{Uo2pvTUy^|HWnnc%^cHpkJ{E5(vHWWd&86_COXY
z={opCuvY4nsfq2OPPc#;6KPDHtr6d8IwHz{l8~EjWG{|Ge9p*q)^2#nx^1{}BkA`Z
zRaWFL-yEAro7l|Gxh0Up=B0_jP7dT16s(v~Qh)x)M?`vh)swoGDtw7z__=5ER#+mX
zd%5X;c<Kc>Thz3)Kn&BnzMb*l#mC41x_2}46QC(IJX(tZN+)f_@bY`xhZ>q_M!@1=
zX$V@bK#uI@V1#K}mdSqltkr6$X=k}<fzkygXiHkJ`s^wKO=JUqAz?@lsLR(o#2ewz
z+JPBtEcLD7<D^<Kiq~luw~3GP=4&mI?rzb)`bKnO;5NyfXLM(&NIb9_4O17{+1XRG
zDt@;S5|(?Y`HT}?yr`%8Qi6z+l8y7(GXm<CI&Rr$oolhR{Wu?=c*)*g35nNtu3aC#
zzJ4f9LGgNdepy`;JRC$<bQ~hr^_Z>F?%6u<W)0GM@Ui6OX4J{eeA&Rs;&!d+h=92R
zbH(m_Kf|l!xgJ%nwEFGy_0_NI<V8f<$Y)mOs^oF*1%}<t%B+EPT<Ts-x0i(&>BTZ=
zt2^4-N;+yCFP3Uw4gj&@5Z+7p4<ITIEog|y8>JiCR(X%tuq2d_n&qrd7uSg{X$dpk
zQc?=@3cu1;?4LY5=eBS)!Fsfl-*q~Bq@vDQbi;N|)E-w`b!kco=M^7x%@lbSbfNjZ
z*3PC?^ByyEZK5KD^_->1pO5qiQ(!wEDNG_=C~o2^yj-ue{_1Z2ShBLxYAaUW=~F#y
z1LeCI0_gevcZ{iF|9}VpQTv5J2Z(oQ<*Ds6y^s9~{#Y<vC4^>nXVF4xXl{!;j#bPg
zjtj5{dys*Go2N05Sre3#egV%CtZZ4pAg}@`6?=LVts~=1hi}^U6hy_wnNqM`x>_A6
zHZOabN#dP_hWUnuH}gkY3}fd>=k_~$_oWSMSvHOyKECjyw8|+Tki5V9hoC(OYMyZM
z(!H+KU1zvpbz3FneMBCM(tK_+n9DpFDkFp01lV@aiM!baT4kwqbam_R!YA5^RP3N+
z*00vz)w31pn-O9cwI}tZq*AM&cBs4-E?~dK&KR^2$J$QZ?uWbV@C80}2rwFqL2-_z
zsk}+_+p7Xz$Y<1`5{7SM;Fb+8M)8tK6lmBg+z5QYI`DY>JRJWs`r@!Jf}PN_O2?VR
zUr%ST*yc|HKSINNa0Z^KeqZT(W-n+@I#doi-|X7<fVmX3hbS}Q0uRq8LRvipCRV_M
zJ9FtiM7Wy*WeA3vrWpHZFcXgPXpa=ZVk<-ODB0QP%bn82)vvBCGaPo(&OKj)$sKg+
z9#>|9pf2V(8#1t9wDC|HB4$AbV6?qF4BV-23J8o1(O_@*P=%RC^NVzEGlIce6fokE
zM4F_aETOq}f>;KG9!c@L-Cyf`c<oTsuC4&XIp_Qn+-%@&xrv9vUIgYEzlIhfHE}&8
z1tlXhm$!FFWw2g^7&a`n$5w>(WtT@R*#<F}m)_3bS7I9nwLszR?+7%eg9|9Hubz?F
z4I-8?mueW`M~EgBDPNzF<Vt<=6L0V?9_5IZWpbCyi}v}LIf#~niwTn}_$NfwN`uiV
zoE?kXaGw<c%NvEC0AAfn?d<UoJ>@XP5bnUu?)&ennc0|)xC`cAldXl8aZrZAUrT2_
z(igkb<u|x$DYK8nbe(<{Uuu@NZL3kl4vg2th|UF!`ssBf`+egJjoLGp=a7c_jI^=k
z*WAs13<6i8abopXe_Y$tO;8iuFE9uLI6dBF&@vvsBu1pIR*%e=F2E0FUnrvp{1fP;
zyu|}3SQAT?C9zRn|GCq8J4q;wj8y#6S?|X?3e1c*wn<OMkE94Ow#SE&4P-?4nd`_W
zK-ivPGyF&b;S+bmbiZ2>vvz&8*Y$~cW7?KIyE{p!B+g{5?>Q*@Yiek~!I@+qr#r9H
z?rh*7f}l+v_-Yt61yRvK&dovy7Q?i17QG^KCvh3)b}F@XR+Q!(1=ds8T!mRLdMNT<
zF&XG>xW!9o9rVb^VMOtIt-Qd`_XQM@21)ke8v>*p6Q`rwOC{n%vc<%LW*dm*bvEnL
z^44D|iCW`0@A1)r3B!+U0ctfDKIA4Adl=`ft<XP2_Pz2ZvDn52rm8c5f~c}_$;ILc
zA9AhZ(kSz;w$Np>ciu1iIHW}zso1bl*0_srH`dg*^|%=xbrqcG78p_zo6LP@QPsb2
z1B?9XHWZP{WGv?qyOuDahZ<s`T_D7m6JQCT>dob*rb39MgMzYHyKBF9F@`yFEJ!C3
z3wlvGwUW(spL~Q9y#UVnaKl1Dg8;DUJG3PE$8mPiI(F$`Y_YbV9)9@3Q2ch6)8%)M
z*jfcd8+1mMSrdi_*yos){L;m&C+{`xOOG>sd2Jb`QubxS9w3P75?U&%284u)YSXmc
zU}=s*oB2wOu<ESA78Bkp-7XAUfH514tiZh*0v*4Ce5?@E-~bCyB4Aw}4x<eKKX$S>
zxI|wocHgl-d6~UcaJXggjYEt3)$<IqPQ%q{d*(QV^U!NJIp45D03Um-G!Qw*G^5ba
zkM7!%VoJ<E2&AzXiWLU9uq)<|e_NQ-=|BFRGLp%9;5^|Qlnm|=C^G@Dp9sLxpy33V
z5KK=D{!Ovb+^uH}V@?JwZ&*C_Pqc{8>YQhv=k0vdV~|{VNwg|vTEShR=Uo-GxRcFm
z%LghJ#Keck8*0O(zGf<Wm2IUssP692^I)+A|1LQ;s@uu-8wj1%l|=|4=&%`rFi=Y$
z@zo|8Hz3~e!O$3_F4K4Yb@L@iX{elg0TeQ(BZ5_I2l5x@ZIW+#JQ2aTq2Ot+nU7ky
zIF%WJWE|NG8v0#@!`HA~0N_6gi@!)abc&yfAHB=4J+$b$hmMR6()zFjKm+;$*3c*=
z3&LmO=|36M-BL(<<`{R1=vL5$eoZYch?xVC%@gkDj~co*K~4W0I+$x0@4J#hcdVm)
zeu@EJxoVj{<V$e^Tn;>Rt-{sCgKbALsl$eZPl*OnDN|H0aO1Pa0rwf0H!oB#m<ioS
zC0*!J1L9UD2U}|VSyG~<k!ftyy9yU(=O%jO&)!|^`=a~uSOMwDdGS1KkF)5{KQ-ab
zwk>+ejATOAgU^7D4j`X)^mDjM@E_=0JM0B~r%-BoMAK{*-#wMu-S9AOnm@0ZfHSk9
zD`{fZl)Y<*Kmps+K_B4vNZ<u<=Yb9hUr1V3)`g*O^O}`C((#wv#2sQ17ph7;7noN6
z#P2ECj=tIME}v`P0Ixr|hsBUo+V|p#EAxxB8;5M{qG~J=nYyiKM=ahz6;f7K7S#{u
z2%IL2HQ(|VfP7^NhX-ujAcPdTp2ux251b%y8aM%K3*I-pw(D*Q5Ej+xsMW;KFK^Qk
z*1+kL#sKL8-HJ}k<uY)1VwTJ{Pg21SM^Af)X_jI*+WZ_##L)3WStXmF?)Q)Lj+YLG
zc9U$FejbP4N`qH|V}7fqxk#H7PFQ5o;0HuOryI-oxVlbZASotO1`A$0Rli@JV~1iu
z|H1F5Q_0k7uIsp3p6%xL+2ZTnTN2KO&iVc?7ho3$q4jgv7vKM;03T^T^i~DDZ9tjb
zxn#qNT_WjW&O62cw8=06d;{G$;YT3wy1JB+j`AB>RpRvJ?JAF)Yyk35d=BagJ}BG$
zfXr$GK7R~ubjUe&2IOux7VI_kc>8_dohnU3TBLn~x|+{-2jR8yT1?a%(*=Do+49dI
ze<aK*32^54sxYuWz$z;O&e91G5TStj{d~P(z$;9lcLy}n4d7;DowMx{+j=O-e4s2J
zcBWYY3;70P-~>&@)_iX{D5y3fp_A<L_iAiU0=#K39gOiWTy?9_&pcVHtgiCYacKKF
z5@byG=<(yveR;Y|6Dcdi&{GZEW!R+jQkq@h?slH^G#qUNp6G$MPpeg2`$D_uDRUJZ
z<p=J*!Ji%Z&0<gA23=rAkM+a&<@0nT1XVp*zql*(eC;;^2!~(BWm`;3pCYhq54>&R
zJyu0pDKn!p+}JOm)HGoa_a!X-3<6^Ku<+)9a|a&>AV%ne3IPTMe2@`ZKyaz}1(GA+
z_8G@<fvY=J$QQJ{G~#<?$hhaK9Mx_JX5==f#rqOlMGGgG{=B;5njRa}GN=9Y)Sf|{
z<$mI$k3+!P0KH5AB#xj@3R~~O*66GaY#UIZZXGrVD7CkZM$F+}<XdF+mpdQIMJ)f^
zc89V{nhj~>TimZ-CP)=2xDk&0V38QtP8v$Ah*s*x^3jQ>NEf$T9gYvg1S!+$iUkGj
zy`0cz`w83dp*OUZThci=16T?GQ)V3*(@_@jwf_C&93}h5vqhmGo?`OLS?7)`GmYG=
z<PL$??TmEJ-tBIJIX$0c7bi?=hA14_pQ0*V*vOz3^9>s7QAf=~35m!U2&g^*ZH(V}
zUW1fDfF#2bjhDbjV8ESD(6CY3rSF7^sry<LSMM~I`Qg)`jeDUp#do6%R0+cR2Ie^`
z8(3yEQ5d+fsB{<vRf9{Tqr)3fS~vwPo3sAKc^{|4t|b>)>@}A{pcFY5VpN&!!R)X#
zNaNq$XhVG7*+|Z|(KCv<U9E*VVS*-IY=v26_+#dL8Men&JG$5AkRLsE_aGn*6tT}h
z_xzqzgq+Bq4)TKnNa?%gW+YftlC1w0<mG(<M)PAS<hdSK-I?oDi#My&^OMCeZ0#ct
z4?h)c)pSwB?^Ou*CnPUA2xf@T@<Lw(_5*biy>IS=m%k7JAV=_Jdzi^zJ}V1|AFy&3
zsm|c)HaA2T94!T&J3fBS!i;Bvtfn;RUK)9}b09r_&iiMw48V$o{YvLj{id{+!La&=
z#@S75T5|IQxU^FNe<X=s2CU-7AQ5|G>y1<Q>K$7q6;{G|Jr3Bk`ITs3tC^C3ui}xw
z$?_p<V%d!>|GVlPYAd&;nkCeH1V}Lk1?Bpfhg!m7owEYPzt1sfLl{aZFqH;n9MS4C
zRT2`?P^CO<Coo{xA_;&3+pTs)8N=pfo<AG@HY3*4P5Gezjig~h7&6x$$mPXbY#-W<
zp>GzoCWeZ}A+)43`O*+eTfNw;L{^+XZ%mhuib~#3Ml0W@lYUc|MxRZsNnbwYCyl^h
z8a;C9kZ(`xJd;w78Y?9~bq0~7c0T$lDyQ;Yf5oL5K;xKAk1}#q*U|%LL1{$KHMhRY
zo!zUpc*DpjgNm(+cS%q8Gt{<z<lO?W`WD+2Vb_I^=<50s;asr9MX9K~k&kWdMNwZ+
zb-1v7U)W3qtt3PO`gY9t2eCnL+KO1FDo3gx2iUd%tir==Ko6VZJQ`g99z0O$FMgFR
z9~xf96H~YSP<K!`=Tfbvp<%Y-*u;l?up+CDz2%F{_W6DRf-d}FP(=YD!Nb}s650(&
zfT03~Fgl<@olY&DeEe~m&YmbBystYEr()5az43lokAV5MppEd1r;1NKFyj983*71U
z??3pFNics6p46w2zGHvlas=%ylbo}y;dS2dO#u(tsnXTmf<qdYs`gVr$s=;>2@Zy-
zs3{!)x??b=0Hvel{qAmE;TKc0PJu7NMID`>9;+vQ?=oVS&pn>Y2zMuubTOwF!EF^n
z>1(|w&`~CEX$L=%gQN;n@6dPsDiHDS!ikpY&BU|63{s9akm`0nobkRRjm09b!hU(z
zOQmz`<I=mQ9LyIZL4H{>V*1x@?yztF2?G)7onhK;fj=&lYyo(eR>z`_UZ&59WLJME
zvW6QA@*GMt*%$H>Co(L-TaXS;H0JxuO5Gp*&E_a#tNa4-Uj9y|J?=RbwLLA;S<gs9
z0QY;uzK4^@nXp?x;e`cYq;tR8Diy+=6=QhiK-CG1b~Qb{P_Wtp==cv@YT%8yi1g{i
z!I&Lp5aG6>Vi;dS0t}wE0jrUY=il#OrV|Qx#b6pk$cq=n3GYq21_xijISM|>CJ@~L
z^5Zs_SF^B)J6-T7hsB0P%4=9YMe`tCukmX2Dhxr$03drBOkm6(d<_?tAK!YT_ARzc
zTV37XPM8$FLO}D0eu(|lnW$Kgv(KMO$z1E{))nnP{OPPWJeQJ2P6>gFGfle&@(b<4
zhAOOgyx9A7wE8OV6vS+wucbmd{X(6fO+XK9Nc%Vxj~U7SLC@5kVDB{2@@B5EYdvuJ
zbkvdf%;l?&DU00mm+p2$nvvq%<?m44tC6>A@OPhKoAHS)%lq&_7L+64j}G~8(6Lg%
zElM7^|3r?_PC(EMcb(#qumi!58#nA210)_0_zs`CB7t-ztoKGoM}IEPweg{pe8Axc
zwd}5E0+Ajd1%-g^!L9~h@&E;aC|?tD8!9nR;0Sz!A`6;y88`)eN8EpPb}NA6G6ITX
z;koy(MTZR{#Jf*@6f-QBw>;m<bXXWGdGAN@Wq|IbvA{Bt2_gX+N4xL@De$xzfZ)94
z+^fcLx$QG7D!?WtCzidQAbfW*8OF$=6Ypx>Pg4;l@}ULYFpva-{G`R@Rrnq7H-pZW
z3?<x_SeU|KeL$Hy0bxe$w|GH&199@uL6tFfJBhr_4z!1RzDiVXY*jeA0n0|&LYL*#
z=kGM}-XKV$KC{2H)x1hgsMdIM*mYD6{KMc>dj^R*4g@!eaGW0!6?nV7#3hha!w{(S
z_78dRZqGzHwG(aM<&b+97l%@|3;ItbbyygF5G_pY$jslL&wjPBz2JPyX@v0ACl$5<
z#Zd>Rw?EVLHoajty>@R@<fNb%+3S1)1qZb~k>y}7%E%}O*>!O3ck%4BbUa$x-bYU=
z&<9tm>+j7ZIO`s+wBwt!_jh-gW>`Ddf`R1_G|y<dFr2}tfLU4Ua3e}QXGQD*?&Vdj
zr=r+OtRZc?l|Q<_GyPe-^VU+?^YAB__Q%~TQZ8u71Ft)nSINSO3ruI&zR)_Rrw0)3
znCoXa^?8;QCiKKv<Q*MLat`Q;QUmb#Y0j8nq^SyzhFy#Demt|(sQ}X6Vu$lb5}d2@
zwqvT(Rqh$t)cl=!CzlPoluU;C7RYt`3;N!a!a_<k^i5fGj&nanPRhu^uOTs$_1C5b
zd2F~djN?$65$XkS3$iO<5qo$FL<T`QK9I(X3dVr!;k*|MwgxCtW>7#2>@PV!I)Y>d
zvdzYv>&Pohf}q4cW$dE+8Y3ekKUf36bZ26~BI!#I6$fZy2ABV6b*y;802k1X1(395
zS+>`#`+`ou>e6_ku#t|JN!&A2s<2y~KYtnt9n3nuv4YANHW#S+_RLIQ!Wj(Y3DC*y
zImu)0fsJf+|At9iyIKW|23$m<MzU}A4^{2gF~B+0i5uP>ZYTP&(eJGP$=!N>FZeoQ
z1c^kWWxR?QKLnl-h60N?8~N=9y?eU4A^*k8fTD$>ZwTAfs}z9b!vXA7oI6j}ZuzH*
z8tLr7hr<n&7%m9bP^w7#yN#J^IMSJhiFdbzx)G(4K#c(h5G->rfC9C^lbJRE5Tx0R
zq!?i#o}&&1e$hEPQq=9;x3wAySST~$FKD1y&N0IKS%E?yx*nHZRuTfZ0nA+Qb?uwc
zJ-*Tm#b#UaVq`wd$HY=1%Lm4o@VYuhB-NtLQ-Iu{Unl?!ET{Q40nL<l#=U`oiB%(D
zAGpDZ1dWaz*ff0nKmp=kG;9?T4aiM)554!df%|yqVNlTD&J9;GMYW~fFxoubNwC{p
z+9%070JyF&N+n2MvxAZ<!5|NS0aSMcy*HoBqP9nE2N;C@n)_L=sjBJ==DzV)b%Svs
z<~my~7xR21rl18jh7X4N$AFv*yn<2daWI`Y4eBe9G|GeXHI@KK66K6F`p_&HbU~B+
z$%r!A+!)?BqVYvxTYI5ZgRs^ZXhYhNxApW>^6<{e`tIM(2=CKyTJa3sN>hXRZVUO|
z$5<|cfRo;cFJe=k<5q?J+j^-0BF7<Ehzwi|+l3F{BX%;s*@7rhAd3t(7DZ@6-EH8p
zbOQ!4zrtz(U8bahJ{5yJY>y8HjOD#2FH6J(Sa858DJLT%^A+l#;g*h$Kxp4VM2aF5
z(4C;1BADd6B&S2~yx{m^+X6ls=wq<3UqRVY2n?psuqC)&807MUk0fA1<=raEFs!tF
z3;IlS)QQF~gMH3!R+1RdX~0DN6xAnsi+Def<Y5rVv3v@el(dS9Uk0?qz7nFX^jW6@
z8$ob_rq~P*i86$NPcO<Hhd0&?tQb`IHKhC@!$V#9ogf+D>*pa5$tj)~DMXlr-AL?X
z7;lH(q!)OkT5tI)2P$j8A;1RoIq0m)F6lbTiL*4r1`ASF$7vSl%g|>*Gexhu%ne3?
z_%Us7x;|mIo5<im=Np<>O|GkB0shz$!yhaGRC*Be4o*_llKlxBI<hI0BcAI@Kn_TO
z8V}_yK@kVMvsEaFKqO$&9xoycHs0sh(Vh|}7HS3tHY-WyI&g_ghjn#E4FQ{lnUDps
zGyn(z<jis1pFTsGkRayz8Lnt?ahG>3SV;iw$Avj;FyvMmI7ybTEP<1AI)^N5kfx?-
z%%{7_zN6ea&z(YMX+U4#@;-X<gdlmo1Wq;RjNz9+u?EUM?`mb~s@KoRZ_e%oI;uU9
z_>vJc27)~o-!CEzoL(1sYiG-k$N?=7I$CIWp`3XX&tklw*EB4GQjtwSX(Hiq6DJMo
zWw>5Tj*F{qqMqa;2CHtrd4=N5^}$#N22qQfk{gc%EJU0Dd<3Q<w6#FlV1r<b^GGy!
zuz!lXVYRrXm}FY}Fm;{2qmukJB!tqZr$hvjQ^U}~L!al+3t23dBW?mZ#Nt@eS@J-8
zII4m7m0DHB0MG`kTlA>5T@V0;H94@cmNst?1UtN0zMLsG_z#MjB5f<?gLr5lUi?_T
zL6Ft@0~pjxFZ#e;^PVIvDG@*X-u@_b^)MwUxul0{?*_HM$AerwzDGaVz@6obx9?91
zmROt&Fe3gyQiqDN6`|ku^L;<zo`l>pbP^9q6)b){?Oy^pe%U2oXec^%_N#=PyuFW3
zth52q2dW)#uW{3>t$+&(LR=K9@?hQE?VpiQr*MTX8iZ?U#l_S>aiC}HA%=4vY6r+F
z?O4v9a;W6PZOle>p2r;$&ZBiX$h5ygzXB8+QBP@TwP0~I!ep=r*w0YMY`Doj1`q{A
zJpPd!@-CM9<Krf1R6U<k_i`8!0vj}htwXs0$`=*O-zN$1(T}_i;avQZptJ>7fd+41
z;A23c1%Iew@|7Y=d{`W=F4vz3Fb3=b|1F;}K0rYB2hH+`7LDEVz&f4}<*P?c;3T``
zSj#U4mT11PeKah&WX6Bcqah(c@yoXv12cWcNY9Op6bi2v1I)8e5p_!d**q%W(uRQq
zU_p2V{Z`vuWCZw7FvEGX)Lto!pEE2{`~=dt(6@owJlsg0TSb8*I*0;i`egKTBnc!i
zz}kn`8mhOTBSJqk&|pwxVDmz^0IIg}cot~$2)y-O^luRtDk>;c)qeUYc7~RA0tB@I
z{R>L5T{ht2g5qnSwW+UEapG>|gYWy1p4Vot%S7(zN?`v&>AD6eU{UrlJe#==oxmeX
z@H2w*7h=ea`n`ZIt1Pjp;U>-k&@x!b^tLC#%0oA#WEua(QC2ue!ANGQlM$CwWheYZ
z$ak7B_<y(ns-PvQ<`-iK`#iHgkal<D<0s$wfdZ)+hf1i9hrE;{o}%dO?CD|Q<pb+s
zgqA#TDZoRb>U~9p@THk~EHqi~<4uz~0^#Vfxv~JIwLQ#>VXkC^j&v5FB$d8%Cs^W3
zj%2_d4sr`BK+(QP#R6yd*7cX<W@Z;XHcm(5TJK9ZG>Hr<C7J5yi)3mmUfp&=u*4Eh
z4-yjSHdoi+4ctyXCmZmqzZGsfKHCv-0(`$H3t?s>2z&!}03Rs^W;LYVlp=tx`~har
z-GqF1@K8b>!o7tilY)v1KphAna_F7Y_BQQjZMu9I!<(+_*ls-h8S#$^-+J~1`f=}F
zK0bmj<k9=@jSL~5g<S~>Kb$2&ei60QCq1=4I+Ng?pDFCIz_cRDHYf3_YbJhazOsY!
z(oEei<OQA^9uqRz*=(%mpt+Kf!a5tqlF=L|6yKkJ-B5JKPdQ1bHf?3DRghUEQT)on
z$W!~_Dd~lCo1c}XAH*3EsEAW4aDNrKVr=LU<Z3{p|83u^FJs$xPTlV$qJ~VU5PGj_
z8Lfpr$;{Ptc2ZGEqLAz!jV)WUk=Pt&UMhFA_TT7qt2F2rr#kdH#wlpcZ^z>PxTY^k
z4LbLCF6i(6?(|-9V^*x}jM;G7tG?IoG@q5LeB9pN75~H0p~jLcH9Xx=>PaLgdn*-e
zWN_{`!hT2@(Od+!N!9W_K&B0as92C@n}T*)UZ0jtU5=;!q6%75K)i*ul<ERw4<sO5
z+j{JjG3twVd#_P_`fwra&;T^juwlZ!6NC;d-pUUE-c|-NEj*`B{(*z286phAFL_nE
zEN3q`tp4@_H5Tf$0Bl0=><b6g8wMG1pJ4`7IyqEmz;j(v^VoJVAH|s-leL??a)#sM
z)2s7}P~L^YMl%k(Vh7s(01pW@4F`lAq7`T$ZRN)|tC*^3pg9$LBpZU%)1^^YLE^oC
zy_iQ{^+uCxd0E*w2*A*D7hL>8K@~~B8*>T<xo-2>nK0c%<&-FytlMcL({prmyMQ~0
zWW?v#3mP=hDV=nHDp1cJz&)XsEp%HQk8(yPR)x`z0EfEE-l2T;vM8?hQqsaaxTTx&
z2<^Q}RQ7>4iRx!z7z<2DV+bHXe8C$HO>X8{cmiPjzqC>LZYISM;plCTuGNg#Akk>=
zLv&mdCA)E2wOy`y^QNVDeZ67W`_BEik)aP(XKL?Wi5{@}(lgU6(sZmn>liPoQrx{7
ze)-(IO@<}9yt~C7!Td3CB!uFL(`C{|-~NT?qbn*pt|<jSyYJ8QaK|3+3xW^zJqkvW
z_)W^0&(pXEvJzyJY8D$G#jBKaMdUqa*Sd7>&u4oD3F!AbDW4(C;xWfS^%|F0^@QzB
zKi0`}uW!czh#Tlw`&W=ml&+f+H5HYbVKX{v``^drzX8Ml{;yhn<NwERZ&I-j=uTD(
zZEPMcEX;AwUrgwaoyo4|SD`XzEs&kJoO2I#5ASl0=6-7!H0xGTl7O(YE1YNNIU{zs
zveCaUicTp8ly8;KhOc*bx(kwGqRlryII4IhwZEP-*Uab-uDUrBTZc2WoA#pm^SRUO
zm-OXE(H#FE6xCn9?yQ3|=-T`hUS@yvuH~Cffm7Yr8d+@fe;P-sh-QA1CKBlWgGgZS
zML^JM1baBlJK%J(tKOIIlz`(LJ@b|fC<ySIx1a0e?kD2rv*Aww2^LPa<^vSD21*P>
zj9EM}E4+LPYU$y~p2ac8JN00*5DJEApNrA>ilG864UOdFWQ5&8uOE+059)K6`$Lf>
zb>ty^b_!-_Py)H={P;RvXAP8hn4=(kM(sd1`ofbL{}kXWI2GPz#9d&AMhdJ5xJVnN
zfh85Ke#h$*RMz0+vnWo$c0LFwc_<m)C2X*Lk3bC$8>qogP(jh{cZh?b1_-h5wSomF
z8H}pK%&65^#|24E8#ES!vRvnizv$p)e23?S!tX!$lllnk%Yfdt+-t=t-?{T`RcM}(
z$hXHMCi07?OX^JXl1myK7w_)9=7`U1W)cp&jgbLZ7)A?2wFVFpu})?dV9eD@LHyeN
z`Vb7!K<SXMWX4nu14?+0W@7Xb9Rx4CB8MLNNNi$&d)ewjjnkLkgp4^~hzhqjC)cB8
zj*GU3V}pKy606KStQAN~{}j#-vvYFTp8iTaNcJG?d?YYrc`ntviI22e$OBx%p+FAI
zbOD(LOtjMK>gqa_k2|eMF(%0Q`=q?`)r#t-81~IgB*L)oj)9~C`)v`aACYu7t<TZr
zT^UfMBYOg~HnPnNq4JvV^0GeNfztS88DE3PX|=$niRUx7F(=n)R;@9G<5U7dH#oX}
z5pXbkbD<6ng2y!nOinYg$0Hgw*K4F0Xl&U*?0gD}{Uy$+k5C2fYxAo*Ik892e_I-}
z_b}S=B=wy{_>t}jw@-*TWl&psJC&N?Ti&-Uwh9Br$X{HIZwD+lHSVS*zo7-Z@o)X4
zDDxV?GdiX_axi(tinNkt8P7b4=S*GBpuN)8(P2UFcp6$LN-6-e%*A-TWW`Tb@4Gye
zqC=<`GE~WLZxj+E+P)nNejN*VHHqz#ay=zaWf}z3Tf+|y#Mv5(V`j6Sx=UYJEk^>{
zo+pnI?|30}zqwHU)^D>HBp!@3Y7vxF5K(_EBg8E}+z+vHS7+cqe|E*rab6$)eLIb<
zZ|wpPcaY`HD`qbbSl7AKXrIe|TOdO2#+<wW+XofZ6knfqa8T9?zmm}gY4VvI`(c7m
zeKCy*QuEQo1ZfV0R)fuq)3@g7Vn#3t@<PEkRpv26684IAUtas;S3!HBrM7s+SytRc
z$yuZquCzI%qF;NE=)X;AQzGNk&R;ocukq1FyqXPsvkB3ByjRvrWPC@f77ds)NHBg@
z{E5&D9S)y^khBWYx44U{32w$DMhAb6@v6%(MUU=x--;?>Hy#_-Y$%Dy+0Vl}Gs&WP
zf?n`iTQJAJoiJW(=mZ-(yC|<kV&)Mm(Mw{ZsY`s`Tca<ZClPQZo@|_$=@G0|U<<S|
zt|>abvrgf6GV4}xEs~#zNAG&A0?TWzFGDq%7Sp)%W3+enJv(w$k`SBL;7*G)%G)Wh
zvcF5|`#o4q>$WOl)a#Ip%nw=;O|GeYGWw?c?_!7lW753)s*JGgUHMyi6=RAwiE(r0
z56z^-m?QYS(iOb7SNIS`BLQi`_e8S2d=IE?N8K1nX+JPUF@`#xMTGPT52jBTF=-5}
zwyrnObYMoBZiOaLpYOAM%J5B#sCR&k+=!U1+=uF|tAU`pJo=(N=bwbJv8UF3n;<-^
zcj9*?reb|Y`n*O*k7w3McBk+!{?c;J2O%Mm@p|i&zORWf6p~1&#CgRz_SWP=5{DrQ
zV><7#qrlhZ@_2MRY^~T)MOoFfy*iKUe>Ydw&nlv?;D!H>q~uD*a&ot;-#Chl$mXaF
zXD!gPM-%@gWBWu;&0k_X&vEe<tvZb9^1y%+jT+I{U$6aahY9Z{l6&sHoBwhT@#eEr
zm^70Ko3<k0qk9?1`fN4w6g<@rxYX#QD#MUS8a<^^mK+!kH^f>bo@l)7^=cSA99i5a
zsh}!ABh;VGy<Mn;0yE_7mCv9SkAQ(b^Z=-jtVNQSAke$3M+eKS8Brz~P`ZFtmjXyX
z>+w!AKQs~^5E+eX%vsr=f<Yk_Al(w>>sTx%l>Z6A-`#h9-R(%?5#!$vQrVVoFNn3{
zPmb&x$NTVvQgPt<M_y=E<JQdK*_i$wShH$Ncj=f+BabkbkB;yhKHJ%`OL!ME@GPz%
z(}|}{4|pCEaG^^{jnx>eMNvCU!t(0lrknrVe;}a^4?#0ZP}9ZMeKg?=_`!kCPtlGS
zHM5jp7*S0H_~$Myp}we4iIE{_k~)f~z`2F^!Y8gj+AOF5t5iG$gai;7s$K1J1sMY(
z=qZ2~MS-3MU?os35Cp9V0fF@QP>B*lQ3g%T=O<{mHa=X-;6`zB0B@kZY<Ic0Cs^cr
zn~s_cb9Sb9&e=wYG$JC*v`6U6gzuN^^`Jq1{dp;Lb)`yX8J69~)qV?zF+7~tLIu-f
z4(Bf|;hKQGU1$jNQ&yYmB|)Si!h3@Q4XuZ6vk`REzGwgq<O)fl7xK5;L>TOmZik!w
z_VBkU7<yJf7!5y26#yUw)MFgtup3cZMDRJq1BdrJ^nR}-8J@#E5^0QIhmabM%??ph
ziB#(S=S@NRBeZ9ZG|hGR99`$FKX5?(qv%J4{%!A2<23?*W;Ok)*9oEDb{IHY5d@_D
zvY^=n6PV|4N)yHCyU>G)<X>=+VnOUcS2YF}sAU+&*IY`U^YyFk+OCuvft&qXaDnh=
zLD$27AGNuL%HOY~W=Mzt#{q_O&%n8e28F!7T9Y+OgQqc&6(|tx3cD2MgP*|X2)yd~
z=M*$}VCUBe|GRYK7K)g=;Dv)Uk{SWz^BFjE4OjnL)c<`6`OX4)U(cyahMypF7qyC-
zu=UwzL{rV7S}7mmWg>@J5E06g8d*LB6$!XIjH0g$G`et<Aqi9g;iZq=0EYZ@8G^`I
zYF5v`cwkZitz##2w1;B<?-yg9kKvI12nWB4_u6a37qxKEu5#k>-CD4N`W`fxcnHkA
zzplV{^%*cDD8k_>kgSH=knC0sAGra=yYJusI#=*-MgQj?#mR=msAdTu{#Dd42l0lS
z7$Zn0TzbmH{#5%i5yBIa?%Gs*8S!nNF!uHTzaPCoyt#moBtK<D5f6AvG{9@e16RYV
zKzvXy1F!Whjr)LB)7L}N_aKMVhQI?*VKLjVo_r1i(JCvU|9hsOS-Opd8`8)bRMuDl
zMwzP+|1*IG>%e}|Du{x^j{zku+Yg7q8tO;`h~wusS_Z+8kxj}dNQ&|TfRSHqS@_t>
z3XPtj0$>Pju8rajCENe!c7T6JsT1981GVMEYGsEPIr8^|;bE({K02LM)6qGBK*JR)
z(Ity66Z8_I?wfEg$U?~nwn4tlEFS19*>3BCAo5xr_@)?vk?59_4uk)>|JQ3s*fxCp
z`Yt0F1d5d4;iBHUfR->JyX!L(K(PHx;r$mQe(<+Z*lrAgNfw9+{{EDuKmlg-gG0z8
z9!v=fz}X$~g;>N>=u5%X0o_&+xO*IIWJ(btm$eHKfTk!x&j)}g#J>2$5D1-VfNBUu
zhXojxYW-#Y?=AIoY_{<fZVWPlrNGoCOdvhMH@?}{dz}%vt#fK}^0mrWkXMDEhBxRE
zL{Qqs7qEUu5ZKR~wCdK*fFlXg02zqS(2aVh-L=ji-S%K>x&<b+;1Pu&K6~9scGIyI
z_|gi983=cmy2QK>+zt*`e3~sY6Z)>~tNh>l5%{^;hO~g(0pe!SBDMk8soUuK16u%c
z*j#VHun>IO4o7q>5}t!y00|%$pAscJ(21ZYyv{ASAV4wkH*O3V7=kGH-=6C~P*-{s
zo(AFuIwb+sP!l9g@q;uNP#^?md|y!-ev?U&i)Al|Dl9LL*+(cR?!ON*@vq?4gqp@e
z$D(qX69ysx0xfK25D0&SS;=iqe3<py$T&%dv4J}KKfMj?6wEBFR5F~@s7XCcS^=Hv
zn6aXDJMsJ`4~I<rzn*exkF93~#YSOL0r=0Xmib*3^wPsM{a4Aq|KB&UdFrrO4oEDn
zgTEkgela`u8u%B$Yv}J?Y2+J&)})h#8I0HU_axDs{=W}VdnN~%yZ`UwkQ}W9{`b)X
zwG7HH{GZ4F&)*Io;v)?rx!l(3@&E{`{G^RXL&cY)`Q^{C|C}lcG7|$D_kSN=iHr!0
zg@5+-MRyt9!8)-+N5o0(XBh|HO{TjJr&Cl(R^6BYR)7{w+<+hm$v9E*dw#|3@(@qz
zGFvJHCL<<zq~z+ZeEJtvbnM?`5_L&53E(wq^s9?Y7H-vE&$wj&1Zgs<jJ!%pV;AfD
zrXtf^Orr9;K(EhzsT!5py|B4^KDw(<GSN+DPecd-pc{aMl74<FXU0Z86eWSJ1jr14
zRhER#fv5<sDL|%E_UvEyG#VT7^Q67c@tIZN5_Z#UuPY_jkp~6%K4ouUofJu@>DQ*f
zHs<8gJ#x><JM+jAly62qlaKnfJ#$GhcGZ4+50djAOg*RK#*_4MS$}Sfq})#~!uyN!
zlaY1H7wsM-{V0HOCzLkuKCa3j!vsC^<=DJvcs3?yjBc+}5(Nm9?L$c5b{bl0N5AJJ
zex=gy;G6Rh`Vw3daRg1e_QB~wqjPp&vKzcqFlo)^Vsbsq%XNfqiz^LM1_aYS0z=s;
zb@`Pic6ucmQVsEsuQ#k|Ts~P-L$b41+HmFg#P6+9UudW_%(Wb8{(etuSg9k(#rz~_
z5&K|tH|fn%6y8EVK5YWV^GI~8u6|X|nUfkX(T8d0dyH@35$oT%cO?m_%iGfmWAp2N
zSBFV|dq<qy_(w0LEc*z{$yrnPQlSMmB{{~F8`+gOXsGk@-#hMR^j+!aL%Iw#mhf}%
zPRJVt*)hDWYby4|Xi6*HP}Rc^WqG%BIy?Ir!vnt?$lzdcLfpuC{Er{;J&>N>hhjGd
zXd^If21Iq6P+)n^-J(11Z#+r6)xIKh0}(6#aLszn>BjgE8FqWRFoe8!Lm9VsvB>u;
zSHn{V{$H%6We71|PY+@rP5{d$HSr4yu&8NivLfDl`eQ$4e7XDZGZXtt>RoqlNMFE@
z7hn+H?MXbmGA~i*yH{_VwY!_W67<aPe28zgExyeG-rpb9#R=?h0`-W-WK0-h@ootf
zSMO;1M{^l%o-~qTtKX*&Rq<xA^T6{-*77#kT7GK(V@~>XL&gQ)j|W+8ViB2IUEP^;
zo&1M44BrKRA6xw^c^j6EM1E3V7Tm2k-!sJXAN+o(hO};deEk&}t#LFGe7t3GXvw?n
z_tp17vk|fH^w%euT%S~r`E}J4Ll}&S8s2&cs%i{Y*V~>bR6F+18{{@?(CffFl!u=B
zLI}#{Y}I7XN%`S)p7yG><-KmA9nU33K@&B$o{S93wcQ>IPyN|V`V*Kxg2`71+$0|3
z>DRL+2pR+-)>r1Yu{~QyM$HmubageZ>L(d);%u$elD6G^rCG*S%SKoF?)SP2yiBUx
ze$1JhvH#bt_i2Gi_MGA1gP!_BnN5$_E|$mYbANZQ5z~&_mr7^0k-2%zUym%@>$ttq
z8hWe8XZ5XU`&>8O7n9(L!B>lZ9=Wx4o063@bB|)$wa2wf&xdO!?vbt2O>0Ix>p8-C
zJ@%0e7HBq+z7xA=EhZ7{Nb^`-9gpzo<I(FsCRie}-M{PeZVunqn;Ny#PQU&#y4~%r
zS3&#e+%EfT)pw#_JA)JgJ4TO$znIR~RO<N($$A$?$Pxs})9mu(xjG1>dUN+=)HTj@
z#Fxi!6g{@mbie#I=71E#rf;e<|90VkRb?_o^<66~p=I|T_OrBbl(5Q(8U0(!7yMp_
zxWjCBJsP!;7tL4Ruqwq|G#z^Iy1P_6x|J4U7+RtXWvwr=Hl<$tQ71h;8q%z}rzC4_
z`lYw@=V0nud$6+c>BpM;lUpBEGV8W(TIa;~P2J{-EF3h5q5bpnL9Pe`p#o9%O1NBa
zk+Vp<u2sok--6=rr0{!ZAFNIueixwGPA6}l>TF8=w*RIrua5hrtLNyMh-`s54@0)d
z{EcfE)n?AKccOVB?Z$<lX%5UQiBaq^HL)kmyt&(S&s<wKM6}<9;j^?rMwcaBj$TAS
zuS9m58r|Fso$3q+lk!OO6~(2H_9VX=v)Gm2tlf9LxoFz%S=uQ5F2~3?=Zl%@1n;X}
zLO9&XuMM|38}#f-G7vtXF~7Z<+}>t?$zF*e*y<iOy(C>Ny(n3@H12dnw%3Wk-nyMC
zw~qd8J+{i@gsC58Sg+QFj+z8s#f@5_*N?DM6Lz<!FkW8Hw_;qYBr_KT<QI~uPH-=r
z#vR{vnFjzNbp&0;YT>Wu{rOn-FnkrHYeiPZxw>SzZkS)kczjh%6`NA8DI!@H@kZRv
z{HmnM5=mzH-qz+9TjD!cf|U4=dk=9L4cs$2_D(K)kY}l8fOq#D`S-U8MUe^&^TxUE
zG}_ffS$UdaU;P6R?j+Lq)pJIZSv?e3j)to~Fw?^Hd*T&aoeI^8Mu?@5r@IZ$Ui&$=
z69;HzeSKXXjb}A-e|_;CmzmS~P<IQ=>kKjKa}BSLiJ62+zDrSDGZzi*H^lGD(;8ja
z2G9;uBK1Ba+d+`JrytM9OWN9TirX)xvD-z}_s0sJq5;9JXbi34iJpG^4#~W(k3JN9
zOz>RD{kn8~62mIOmNpgdFDVp8mwm{7l`&@x6g(B?kjL+~kbc<y-2P*LSKc1m--Z?6
z&NJXX=l|zPa+U-EKr6qB35A`cr=I`e0QPu5ygP5`IPcbh*MaevKZUP~C(*e5C%0*P
zqZP^IDH6K{S3iE59+yZyvoH7P7Wy&7+aChbQ#gFpx$qs^@3}Wv4i7baQ1mmF_5F~K
zF)$#_)hA=h#{|5=z}UhfA&sH^@dv*tMZX{KdW$Zx5QH+HsvNsUXD&OKDMB5PYlt!J
zRTvwSNv@Q+aUs!(+HUWOQP;;wk<mhO?KNAY9lNp5ikckORnuH!qa7GASEOVHf4#kA
z^cC|v<6J%R#SsjXB*E!uAeFrovG_b|bW$Mr7Jx?t+Vw2LfBha~nQkx$o7#-yCO59?
z7khF1?RnkM6x<LfNt)|^N_&?})s^Jhx)WVe?Y9vPmnQ*5i<-&vm>UY0-cp3pow%zM
zW$srqx%~pUdgb1X#Ymvv^$yG9A^CY<TH5SVN!|+WyxkHrpgc(!4eK`WAuIWWEIVP3
zHJGK&R!mNO>u6?g7`o)=+Lv{$I**=CoQY0~#bMf$PF6nSXrxT3q5c@ShYik#NOUcJ
zP3_5F^|Wt~sm8`5vXhN(JyrHlW6i=(bh~>4+n#vp`=MR2@xEQ@MR`FReDZ#Tm%^^=
zg3;HzEgRR3-rt~XD>?Qpa-_9eJ@c~b9$m*F&97R<E+@LLWQLfG@2YbKTPOLfx%iIt
zjOIcb;%?0E@h}iXy^%SqchYo2*o^dJzb3}E7VpX6`&Nbs`8U?;895(a&leUf4Uy&Q
z5j`2mZKY3Qr4uK;<{=mw>!>;5r8RfWb^YOH`5EGXP=c=!>bs6^a`Rh?pj5IUnD!*0
z^Eqj@TEbUmOZb>P$K$JINiTotIN$xNTH8H`tXU18UsAnXoiO??a*EErtFybzRj6=B
z<c*=Lt=4u{Gb>?0ExCZ;O`AQ2(5LK1Ki)MI_xpXz#Gf%7lyqYz!*m>Aew9-QAF~^^
zS|Xg0Qih1a3tvU4jQlUXm;7=hUY2wa-_rc7U3&6s-Ead5VvP8BOM8tb-)qtA|4cG{
z-oM_aU~eS-D(99*(qkr89rEuIG;jHZ*1ZLVZjQJSe$5DL_=L%cXO*OWD^B><nM+kW
z>i2zYUSP2c8J|71-L$9CYDq%|**ufyG|p%%P>ft~ap1(kf(YAH5<MrLLM!6GUl$tk
ztzTBM1{D2$?Q#@}@i4BbygL=vSehfr=R{!FC+;lOdYUan&d+rOf5d=naVRak`EO*y
zT#0Y@G_p*}e2Re_<FiAi`T$>Vi{I#PVb8XlFWCydVhm#L@B2aOHN)SJ<MVoTn_Onw
z%5?JNAH3+)@V+OK-A#<-zreZa%y<6^Tm{vs;Gr~ck9m!j*qQfzO7VjEnESlMTgz&n
z7XC~%MDaA#L~yPlP_C(6MXk1h_6y`@8{lmeLuf=azdlOn94NKYYPIUy3S;+ek>`4G
zu<0FXuX;>`<xOIvj>skxs=KsSdFj>48<DbiH}rCwkGqbD^D0`bj?Y$(FHV;EGbg=^
zyXtTpA!kz56Ty5Som*`~%j5b)cYJ4mUF*f>@_f%IoUkiO{@NYl7ag|VPj2ni57|;?
zi(KgA@OgC1!shVjbmaEv9d8S9UWehz`C9qo3u}S<*D_}~GGEke+<lDOTDrfYDQ&hN
z{k?g5v?I&fW-sctY;8;Qwp<2Rog*pst1M^7s@P`W>!i_2oyoSOyLCb>MZ;zVEr!cl
z^_^#i1cMP1eSV$X45DWDEb7Y5_4D%UO#j0L__mTj5E&dkx_i{$^jP@!H(KsGdF-U=
z^<G=?&9^P(^>Mi0{pZ`w9n)Gc+rg7!Bi;M9(vu(hgESr4HrtMO3vtJ%w1Uh;3)0T@
zC9P=fZ*Cs9c#hZ}7vFYNpWiMM2o^sej=X-0yPR=;=exW57u??Qt0CJ_&TAs}EF*W?
z-VKiEvuAQj9*+!8uDW|vwmiLlEvd~tGpKaq`BwACQn|^F?MwrXFTuwu&suVXutyX3
zoWCC*)<~X;^4lD&@3G&Xw~$+|R(#)Jf46>Dr&LXxcWqCRo%Q(k^Z{Qd_u7OQ)+Bsi
z6@JyQJjd~Gv*E*|)tq7WWBc^IaC1d$Dph2Xr~F|xQ;s;x(oRBkUG%{dfz;N;6XDw-
zOg=tykBeMv6%T9sqd8l1`AsBN%X=2aERK1)dVa0?Jr=&~Fhs^~6IAo}$-D=#6lLYa
zG`8}uW+X=Q+qR1>?QXl$x6u=#-i~{0MczJ1+MT!OMJoTWJO5_cOK1_Yxvex~bu}l9
z<?zGa_I4dtUr7Y_=(b4NVGkku!l;c;JB1gULtEzSp-Sc?9{-jVZ?5}_j%%36!WIr6
zjg{lE=$;kav3_KSLG;1F>7Y=h0|`8Kp93HEaEDTLvGNz53tuN)&#;$@*BqIeEDgu*
z#fsUgtqgkvwz$^6!sQ+vepE0O=3JZg&z^G4o>;MxyjX0p-`QQhyky3la;+#Se8QSp
zJUQP_p5^Xjzpfm6<ko2S;N`V1ZM}jaNoS}On~JdpUTU>l!6jIh1M~UPo%Lhtg>Us-
zZI$-7vkSDflFr?itsNXq5B5BoBQ&|<F1gFxZtDIpy`?y2==_fykqc|DSIq0QNc~$V
z>Mq{@z+tOxA8B}`T6>&iNK?5jmYw9jrB-UotW$flmqzlM%;cwitTOAXgGHa={COgF
zliK6(7Ba2OX|jE90l8cY7WG|$VUBv*SoO{$NxUHO1F=Y{gPVtk3#?pHB|f&}N7wbH
zzB#Q{wA3eR4C&oae4huwtCuy)E1hiSZ=@e5>SizJS?6Up*KWORSu(tvn6_cHCL~2;
znQQA)H)3$kVGAti#w1+fN=NPOM_b7;L^ye}pe=WmOg)7!FyR%gt@8l6YXv$QSjpx!
zT2HK20IP9mh*iV^YhoGZ1B(BjKiTd-kP~HzXanNH`;-)!@l{?yH>yn<QeP+iuSiSy
zkNH1#_r^c%$d6LFJ7%}+_2$SJV9ojkBULyVj+XZVT5`<U_g#XY=yCTMdUH5T(`}A~
zsEE}4qFx9~XXif8g7vX4Z>hG;?yxqv+IM^?Gg<R(zPA2YO+qy!i+Ju`fqH?VOF&DP
zPw?X4#bVdY7TtmnPq9wAX}=a0dD)lbzVW1yYLu17+&OBvnX^R(I-Cy6r>SVMl8ajV
zdw*9TQ-*q=R%!Bird+-<fv8RGElwtdMOnSckeHl+_5t3=SW5SUbCG$B8os8Rj3&d*
zy2UcawP}B<A4lMpoXl*b3&+_N6KU!3dikJdIb3eP!d`IstPY>7*U_#|R)_5nTk85w
zoj!Y#ia?zA@mc4(t!2uAwZzAhqCRoBmDuL0vjhYAYrdP=B~4vq$$wt9MEJaXB)pT`
zGGrUnqxs?ZejV4^mxvGB%hh6~okAEz?Bt-M#?MEbSl)yyeW#93ySu&-eRxHQ{ZC=b
z3JqSy^YtEY*oKNNs<jo5&pUMA2>z%SGx&Drd)oYFtUJ3!1f|YF)Z><+wZs-y?|l3_
zzAb0e<?==4C*SyFJC^^^FJjpXYY9!6KWsjZJAm$Ma>YkU(CFw;h5g;AdRXsD9@WB;
zhaA_``L&v~b30NrWDOL@cWx+WdMqaeABtVs9ULsjDnD<r3vwPEW?#wGnAtwO!Ct>#
zdGcwPUyE+rtEAvt*rU~#d-;~n*(r{0nl(OnQ|J6a&U=;Pjg|#Lue&+&UcGPODp$v5
zHswP9c2KTCK0~|fd3=&fhAPwPhQF`$iCvP*fxBfmtM!@Ubk?@FbA)!)<&trBsT%zR
z|N7rayUkL+I2<N!<nZr?VNDlP4lhb?HQIhyIoO-g&?(*Y+DtrJu0J%W_4z~^*?gSZ
znl2%RjjqpqTI<^qmPDN``SzgJ{OX6VeLTF8ug~>e^|?^Irde9x;aYa;R^}`lVzM-H
zZY@@^XXWr{CY$UX@pg!wV;ydV(eSj@)Q9=K-$gq5TC8`XG%1eHZN__UUW`O~4kyO1
zH$B*(a6Z<qe=vDFVQ`i;cx*;H$kQaTWj*4<n&-A)QS_ni<lgi7!*}_2B#7k6Lw>JS
zV%^v1Bee`wW#X3JVH3SICSMKA*Ge68xr?*^F-xnvj&u#OUsIeqwa~6BCzwYoqIc<@
zqpH_RC^m7ozdJ$bX-HOw*90)!mvZ`hMFy&6z4~dTGDh~dHnU}(vkoVcl6+^qvesm%
zA+ol7aWkRlZ7VC(37H`W6CLv-b*_Oe?+Q5mxsqxHZrHuKqZ?AiT3dvj%qe-jeVCxz
zTPOcH#}jv1O>%fY?>pYhvip$*T=+XKyJo>Pbyzb7Y{rMfqUC+tH($5=vyipOMs~5X
zpv?oCZ?~42b*cAXf3J5z%$T4OIkz^+k<;BUvc!JLfU0t{Wd86&hoZp*`$fw96_2F)
zn`Yk}=MVLDzwGr($>nSMiR`X2o6H>+kLT1G-!#iO=kTyno8s8OZ0TgA>j-sr-txiQ
zd>x)^78ME$*_69~%CXV4oNFTt&U#lAbK&?J>)ozrFLp_<>JzHZoWB<={jzlDThEI3
z#QDwGRm$BYv0i6`QeCr&YLmmM{)@?u0hwONWP{}9;M2M6X8DrwoDmJuNEIK>skm|j
zm4j)Lpv{t%kSA{P!`)>y1yA|AdscK*Bwroe)eUK}4kkZocofyr*73o?WS^_(tW-)n
z4cppK(uc9wvMGs3$IP7V<%&BALtfR`GV9X|bMn};-!=Lzc(8RY|HQ4>o{2qc1d;$-
zz}Ei!=>Etyc(m&wErF67=l-A0{xUA=?d$)?>6B1Zkj4OuZjclSr7Su`q*J<ELKIL`
zdJvFy=tf!vq`OmkfT4%xx8{8Q*Y|U-$93<2Zk)$C!^D2?SZnRI*M2<@rp`_bM`}D>
ztJi>aIuBrn7&y}<!GVl3R%)^pMBh1gch@{ip=P?3M(rLO%h7N%f!{gids>p1%C1%r
zCI6OvYed%>&WRj41czFfT*ui$D4AAzyxIQ<V(oS&Zd3zxC8j#N#^Aw)_gBJxPuqDJ
z2xfh&%V#jQt1<W{)^q=?#pr`wj=Tt}uJ-nYhihvbg|RKgzM9EL;OUPr**o98ID5n$
zqbwO!HA^+;%D8jTClhMX*Pc1yyt*k@i+NtynC)+lDvkL_!CrTw%{QIO>G5PY>xWyF
z<)@2}sSbqoa_~3;*@~8u*ZQ71D$aG<!56e*qNx`3Dg53AwII(vSohOBreI)6$do)g
z9qsRWaP8`uVq#>z*TZi@$o9zQ2MBLvLI&I8ogRh~IX+_Pz4eO+tDkdd5ATGJ`I?^4
ze>kgAnA@xyk$5Kll2{t&!=uHwrFq_w)+0aIL=WeaKF9U#hY{X>F~@j51)o*XQ(taA
zEO|0}QXHxH9fse#E?sFZ4?RjTbm6f-x0Ua6zO3CH<F&pEmNW-V+yh6#hB4Nk3^sE-
z)}mR?XCswPl<%^`n*KYot(G-J&miY>YPeE*R+)Qp^OK_1oG)5+4k;d8vyEB1e9-=D
zW%taJ=SqR&I>y^|toaRYRObk8R5T66=5lj!x)LE9&WGx{8y7meTj$dkAE=VQ^>X`I
zB_vlxhcj|#YrX^r>OnA6r+inORLi}KThbk$buDQcTA!Wz-~M2AIjgH~LvL#rY}^#y
zsjsIiGs|;lVV`41Q@&{LDSXmLTC#oxc13^K1FYX3=_KpyW)B)_=0=txk)ywKkZVs%
zZM_S!p6jW9xlMvsL=vR(MS9ry0_zb%*|ZZK$sc!@p9U|Wd*{ff#_dD91ZL6^(|R!J
zSUb8KS?4|#GuFq4(|d9M&hbN(_=Mfo*+KcjsQI$9YyNlLy#BmKFSH4l@6?X{mCxpg
zE4Fz@w6O;n6y13?lIz4e!o<?@&b$)3qoXw)%4&@SIq~1MUP{(kWK<57l`iuazkhmU
z<!xVg&~AwHs>@}2r2jO7BWY-_CUY0f+;<qAZg%b>s*j`8=QL7B>WIP7YfPv74?$kW
zD*>sr5?e~YbLEybr%%xb5et^y;YU83+${dFKa6#zJiQ&iW?UoNDyeOnK&3A~2xF(%
zWg_<*U*y2y8}|0$@@m*|+HNRy)!g=;Qh3oygR>mwURGXe7c=;mICc?zIM9)7H!^%I
z+rq{oAzbd8vir$s!OKRBI{}Y%otZrE%g}2D>#=XUOV_vC-S<eR-tVZ?sIi>pEvNJj
zHZD$dAqo++1bUZDbbJ@(w+|fEEHyv7qPd3Jx&$#TVl22(D`ZGCj|A0jaio=|Gs-Of
zQFV#W-_HwZd6lulk>-UFpT2WQ%-Wv8#Fws#A?;vM7oCKoohbx=@|16Gw&U1At(FyD
zwjz!k30n~L)H~G#f9uWJU+wXN;ZRYK2zO9yE9Wl@Sr6A^k3F$E;J}TPFoCO;ZlElR
z)pW<!xE7mrnVudWVA--*DK$73_XiaTwF5ZaMfe<AT)wMS4<!On1V{-|rE>tTz*hO)
z17r!J2+EkD?jjU8!B9z2+vCj|v~>NdA7Od?X0h&VNs8Q<_+&GE(A{FKCo*Ple4h5A
zjlM;k$d8LNIMX1A>+9w583{zy09kaNUuo|2WU`xsZq~0eo<3Tysdp9=8V<CvLgXK%
zZAPqDhMwG9%C(azRgyz`?D;FV#~#M0Y^g?!2vP*^2v&;Iujr~d;uYrwk@o#S(Zt|H
z=leOmIC9xpmPPZK<ren5OQA3wKl^g~&rJC4!eVaTZfBTLVSUS{{rt0iYc@tnH(O_W
z(T<9hd1>u1%+6_G$NrY4KK{&y$-Xg5`qSf1bQ%$*O)(ek>M&ll-&khT^yA6xOw5~G
zeQYelm~^G@qfgXy=KFD2Z_-Ja2OYTAyU__%?e0XWFZDDX>uKYzFJ_LG5dHDXC2E-G
z|AD$dNAT3Ney4Eb0A+t1dT`I`<~yQEJ&WJmcFV6Mf0Qv2;RXgLbiE}rYO=)HdA_YX
z?}}S*s$l3HcyNF;l-ABZI8fxWHX!qlwMwzU8~yrXXO)n8QQx^IqQ}u_RsenCK6~@K
zPsjMR#i=>*){|ILo@I8eYFbKWdtW!o*8Bt!hK}Kr7au9g=NuXuPBv!PD;&OPyXq?G
zPyZ2JSmZ=?985gpz`Pu2KQN0~ObsLCpY(0DaNARIo5y$Cyi&0%j%?m_6{z2Ov#^+e
zmM`rHB;awn-hOb|T9Q;(*iB_ncs%?M{h*IGy8*>DuQ`VnI`h-~uLq)!9}oUO+*y>>
zQ9~cdtN2-Qg?#p$?#AtFk7!)nv=$S3AiDIH554>}KLG8+!mbPg5VLB^jN^Mu6(}Fa
zI;-4Aw|5XYZxobeQDvs%<W1b$ryx@6=6&n1s*uN}$d&6Ymj&V|ak6}9PF;jXA>8a)
zTG~mGSsU)VcQPt*VPt#JS>rudNvqnHtItU%8!feJy7^Al(C)Mb<@0oT#kAdZJmD}s
z?2xkUd^FJe)yV$9$+4Ds*e<Ux<75OPE&r|0s72*O%9d5K=nQuWHD`SauOh}?nXW3L
zOEF6lXY>3o8bsW-9i19Tar!?57I+ftA#nd@ewA&iF@37p85rlY^))d(Eo3^H%4kJ8
zmXGFA?_peqI*&1;bz*z&NdaN}HmLfv5KDLqy*js%w;1(d>8zGsV_kt|+)nbY*nk(O
zTAYQ0Pid0fu*p)j-96tijv#7TlGj8@d^klQwT-=^3D@x4pay27%J~kqZUK&Wu_$dU
zD*Oi&5P@b-N~!7Y$^aJR`Yj{F0gm&rCGD5F^jhFTui~Ref3ZmIM_=x4-aUhYguehj
zHp2qSCaPmVM>YiHdO=^1lB;@^4~rE5c?=>y2j7FTG8Mj?LDWnGHPz_g)h#})V-T&G
zNTVH{kscZpILpnyPaGoKWw7DSb+M8GO84I=oD1AJH-$PE8MGPQQ5SkM+5l3SI6D=$
zsOgp@B7X&}PteLM2aVrfDw-Ya3-q76h(0I@R%az5*)B}-_oqH@_L0nLO~QNp+(wKa
zKT`Z$+__5e$C;h?aR`pflfHbG-g~%2TJ<O%{rCKpH>zutM}Ajb|8|b7j55iq1U#G`
z!Zg@1%cIXOtU1NYONIX;pbiK);c&Idwuo_AThO0e4(ghqP>ar&7f(W#XO|JJnfo!X
z|Ne7dRQ-96$CYc%hqf<DDw4xxWY{+y?&OHh6t@v_x!w?t`$|x3yO~TacH<Hs-wSbe
zr979olh<f+)5DL#x-n<gZ$>Dl-05d$JIpx9S<N{0c&Sro+Y7BKJDf|SRwfs$yGz$;
z8Mh*2C^x2rW!^>3t-Z(-WwEJFWop&;=E&_7CldD~EM{v}xF#uWF0#`hAjKk1Ht^i=
z&go2f1Jl<ON(YyO<d8_&v~K58WLMH}`IZ(wld^mpCZ6xazlYiG&W^8qCSVlB@jW8H
zcVM=EWH=q+$$B-%x#uZifg{tqcd{JU(@?0IPCKT{xHaQPYd<w7tsbq<CP`N#hY6yG
zt2lEnWn8~cu^>vMu1}#mA=8X|{|7p`RM0#oS5$PbK_bIwyep-<hTll%?y9+*H%5Mc
zYASVGO@O!%ACL8VyLi?@_^+2`*F=7f(=aei+s7U?F3awOT~pVkP@Pyntk>G5?-5uf
zNLA{o?qqx6JR2l&Qq02ai7}{96Q~or-ZwdjLy<96qm#*ryE*Y2e=w|e={TLc-O^ln
zo*HL!Jw|lJvg2f|ha|byMv{S_aAJ5*^jWxzNW1F|Kbnw~jL{ltOf4PHTgR#meacW3
zUYUZjEwxzb@%q=)Od&1;Zc^K8GEwV3M%T|O4y4co)}zFM@(G>^+>d#AUapJ`4JD_A
zD63m6nM6g+RVzOecvsDs(J9b1|FO^Bo?A=Wd3oXJAYW#e#bQBf!Kbnym9gxHk<T9O
z^-`_JfAjvkP|e)tm#6w*GeB5xcy_3Xt)Q8(#VA+9(!?UJ-QdPaoRqI5<b<)bdhGqs
zP=gu|_dk2<6A4(K3{`L+p++zn3!ws3-WC+$;cb9ybzi=rl-hwzAkiksFc(1j7&Id~
zq2RMFtmBfblZ%EFg#ZQRO*~oY-6ijZ`Tp2op?M$j4|(1)&ODR``lP#p)FijR+88_Y
zUt+}j&=ci~lQa4QnbY8kA7pDAWEYeCJG|mL+TfZBULczoec3`BQJKr-?b3mP#?~fv
zs@qokacx6Y46dDf`UF-u<M#Z|Vw%Duo9?TnGJEtYAN3?H-Ew@4U!?r?+qL+;kbL8>
zf?m!zRe0r7fmJ7ny%^`J`f9Ux>G%}yn4D*$vNc}b{xhXJrg2BEnjz}WA7$@V|E1T_
zk0<=j4mb}}njimp>mv3Tr6Gfp^*BaGJ=CK9`%U5Z-r_DtKH{}X)aoG?H+yT^z5G;{
zBeon>Z~INgG|}dE84(|6KB?_ea}6v|Roq<B9(?${TBbCMdIj~vb0?8omG&a745+}R
zWf3#m)OoG{9;{ImctItO@q9+H5sq#kWhE#nO4XBe(R2Rn?xOWay~%55W$U)#+KINp
z`iKp~iEdFj=?fvD#?6lIjvvo`<R5T!l&+w~$jO4uq~2(r;*lE$p-(UFP9)3Twr{K9
zIr3N<pq;%SewpW{*qh19OS_$I8-J?1h4wBGJB97?7e|W0yhro<DIjDdwv>-h+#&?d
zyK6BA?1`@poj2x6OSkpR8MOMi_8o+sh}pYDw&@?f9q(d$vnW@Z_&3~KC&6Z~a3W_;
zK;T(@@NQQ7cYf~N?DD57$ZU;QpV0MOvD;NP`IGr+*_qnUl$!&5UIZvUc(XA7$wD*9
zy^eOi+OtJ;<`KrMhxfEiK|bNkyJq<6KaK|BT!o_uS1Lf-xyiP?ug>>Oqk+or)2E9d
z%UTCl|D^5hd9b8<F{e;0<K+8YG<QCLuduRhau4Fl7oosW8_IA&r`~j!2VJg1M8(_=
zisbV71~v8~j0k2{aApm1_)>=y1R;m$$6u4WCr(D)pEea1f1s3S9``-Yr7Rh+g3G3X
zG9g2*LM5p7w$f?+<ZT~M4O7KD^J&k>c(w}qlXJMU=F*v&M>f}`(t<8loN<`OrjIiY
zA78oaPJ!?s-0i<(k5b;D@(rW!_m^&uGiRmR@6qqJi#oIE<qCbj-2+JjV@ILPpi+~P
zjHD|G?4=?Lw<Vd^JqVZ&1T9hQbi9{KeT7%G@G-|R%bfjsqXg228BJYiT-~TMtG35>
z^ux~KyUYALtFICFyI5vAitEJPB(=Q9W4L}|O8V+0h8i|DPa{iRWWG*Rhy1J*BD$@d
zU%>p@zPV##vpQ?)j9@mp-8<E$z5sVAj%2Ea=EBDWPwmoq>mVO|b9(EQ<IUn)<9Ze!
z(ht0AtKyl<&Mp}#xrZhVfpq~4wS?kjja9x4i!2S%zJg~y4pigY_#%De1nIvvj-O^F
zpD@e%jV;`dJlE&Pz+~l7Q?p5yDCw2)-nNi})2G&kT{+1tU{<oNij{Bnc<0B)hO5AC
zTz**iX6AcB5z^xo8Aa7yW}mz;7a2=oOq3m`%NC)<E6r(3zxH!(8|)ZSRHgyNZq|+u
zVnjKqR<&kSl%zJou+Q~<u}-r11|!MwF>!Z|uz<im*XS@wjYq<+IfYBRggkcNGJBzC
z8IKj_4wPhLrL1OWy{L-30h&AOBt}X>VFESEH~_4op`NZ02Vs3|RrNjpcRCmr4*hDE
zU$bd5Ihy$f$J4}8K$&@2Jkq_T5l%fTTUd$KCaXA<=l|}XFF%?g6+H3&fmnL(LEpj8
zyomfe*UwoCWfAHdF4ir7D<gWYgJ-l!yUvL!d#f7vTimcPs7mMNE&tfL1-9sT+cs4%
zVGVPeNoM52D!s?5OCQ7hE4FrRoEkna?kVqkp9V%ZnMG=Y9@KB}vfKS?KiHVgH`?}a
zJHQm!U%4$r>T`HUaff$R>(QD-#>J?9{9(K%`37fKH_V3l?JS<LK~<`he9mm*0{YLs
z;}YXBk>wuxsyAO#<<gwD=+xzDs-k#J98_j47fBZ`=7l4K2(qIJcEjwiJ&bc4q6{Kv
za4wH?_m=2Nh%o6Bs%E(PeA4JC70Yg)pV-z&rA?iAhs3i_VsVQ}ELrInFbPAvr0X+B
zC5xjP5iayQE2z;r;*uoO!dT(&xI~mf?$^ACI-hO-5UP$`UX(Tt?yfph(z2)dmfoN*
z6(n`fdg~?yU9X;y$i<hqR%w;4uS`oleT)ke^%F3a0#UlisCxv%5eQ!5flS|OjN0AH
zj`^Fe$MHXJ#QNeBH3<r*-MOQloFAZYO74TXC&o+ae0G10nYhL=fz;=fzmE-nj;5ib
zjV^NG)M1tSyAJL_wRmj}F7ARm?vl^R^kB96Zmk0$qN%3zny<&cCvM}6$hPE3##zZ@
zvR==|l%^%6zQ@Mwv8X;xQJ${n2^~zKwe~&qOiY1fq5i{iv4T@`A)Bt9mhpAPgv4={
zpFhVVDr<v921a+-x0~m8yzP2oJ&nekc#YQ4#t$-8RFRb5h_8tZkMooBR64Nh&m0|0
z?J$;9)b$Y%NOQMu1l@Q*J)g<($l`VG!5fIf*|@%`#^A&035fVPT%e%|3*C?ic2HNR
zPDxpy80dWxT_M26)?g+Lg6-57F31*C&wPi9RKIW6r~YOFfll`ReNr+@vztV0K_V=v
zsWW+m3z6+7)ynv)FQPC~A^0mO3E#W9ToTH?(_NURJe%4W{l{eJW2Q-uOZqz2@zm@p
zLfiS;>*77RZDFHFH@@o-U%8UCv>##DHe{iGuw#S%Ii?c9#P|0P=8C^9z15hdpl@Q#
z-=r)iW&GdS(d;8-`M+s=o!`G#h>WKo_HPo+DEu?D2?`0$_d4U#8qS$Lwm~QiTW6$5
zWs|qpFrT0n+3rwE`t}hR*HT*HymRHq@m{=9@@vI2p<dwfk>w|`WG+`K&Vw$guL)&!
zNaGNrE-tz&#=kY~&cEu_*5<Lhy*mDOG@7Ei?Ec-QLYjm{%|GoiqLdn|GPFqo_k;IG
z*AG3m)vWqh$0V2z@qG_7<GP4T?PwV{M3oYew#z(tZZ=ou77w_dJ|^<S#3ol~CcMZa
z3Yf6#OBR{+tnGc-GJ#t=nwYSTYm+}y&)VV?iK%n>+^Ri;`tkTrqVVFbM~|su*|%4H
zxpR9*Gars#%o|krn4`9NE6LBX5GI885E>px)g`Gu?J8a?bmGEn7x2&tGU1fE*PI=h
z^=+>zJLlx{-|rQa$i)j}y-ojll)|=x2BGt6aCeXUxNcxQWALt^)cBqw|2<8&Z=@fI
z2opSCx!;;wl&;slo>#8*_?CYAR{3|C=g;>i4RU`8Xo-t?<e97cQ2IS#SEsT+{DhWP
z*t9qsdJ>{B^>V_HqaLlzyl_SM#HG09^P_f2OliIzm*cqZ4Z}K^-=BLaJTSq?<hYY%
zOdOhL*Fp1vD+)J;BFHAWQM1{L`*=rbxT61PJHtRNz4`07f-2owK>k)cIWuQ%?b2i%
zJElM5lKON~A2CnETh}w({Cmc=Pk*sXC!bE^MKBA=Ncm#Is;U;I)tm(02{(tYs?-jF
zFH)PrNT~c{x$%~qaH(?#-cHyy&)84z3{h0Rk#@am7}C^}GD$ClN%%&YY*$al_EY$#
z5OYR{pcJ=Hjr+aL%TAp!8z<t^A>|1#aAJ+#`W(~TB&=C-NB_*BB!-frp-EgT&0Y<5
zh5xAqNKMR`d0^<68+SVTcPVMopT_%`<gVdMhpX?BUJN=;?pI0?<Jf&y$k&<`T#e#7
zCe!eG75waqDBh(-pC&0D5_#o-`}OQBcC##EILTgfp58(XK~;e$o9G`Q3SWO$F!WBH
zRx3rVl&&)FCm&7MqF5Inp_iEo&1lCZ7?_XPs=AM2@4R0=PZKjEu6;66pry<meSW6E
zjmEOc5`CiglraC4TaTamOWmSR@qS}>8hgDW`FDvoI(r5Q!Cc*f0|l%1wQ4AS8yeI9
z&{bE`H)^NbpXJ}*#~-exSm=6?UfTK)5k+F08RtHEDCMLb=4w(T++_1~x<Oo763=f(
zl4o6ve~S1lC|*b=e&wN?>-kTJZ7yog^j*Izn1p27(}C5g%O%C<PnOBFHgD_kUf?5+
zC4DFp^K5(js7r{Yj$QD1hVVc}(+jKZ=OZuusWAnhL|<CVeD`v+z4xh`Ru$Q9t3t+R
zar&W{*3N}?6uup|KwZLUeDLr5v=0kk-*p7nVmR#2)(U9*X{gffj7y~9S@SG%;q;$L
z5-B*=#5mt1_Bml4=ex3pzE|zxl+{tGk`j88j=$mfhZFDI`ZgkMU*Mh-_qYh7i>;0C
z8<dnkgF@33;z7(&#dQ3s{E{T;T{AcQnq`W;z5|2ir<$J{Gn4BN)2O?JvN2b_B#bhx
z$C9IM1Jz<YxvOg_Cv^$>(x#K%n4Rkv=vrEIwDNQ3GQ-oYlU^*f@UnCITBk*2XdUdh
zRzH#|9Jsd|(4gJ7Fdw1B-@bkN#>^<$RgFPk1F!2I?hu8)^PL0vs%(Og`u55N@!zw)
z^<im~CN6d%$>ZdXY2E8+^Be^`Izb{yDcs~d9{-RpiSL)FXq!H+O}0T~Zz1u*QPeAL
zZTH-D431Ytx0TT6QKp4ST!y-Ubabls_vrNSnRwmlXxDZ#Tp_woCJuhyJ3?Yd<mkhT
zNkPplLAiY|P>T!26$Et4wO^!8TaHds1wHz7+zjv_4rUA5UvR2oD`;$NRDAgGs+3eR
z2!kh^z(o&;l;z~*FQ8Q4xdWCh8J9Yy4KTo$fK}Grt=>4UAG*C=!;pkiG6+%_FN#M<
zNG*;+qNrsP6`&EKM7A|#r-zq=6q@?AGmP@{h$2`NIQ7qQa|Tb{!&7NZY-;Q&&cBZ|
zi165Bp&q-$uEP7bUh{c<=UTyrJLDlX(_YfxsK;q98A^qE%#~+_tOv2&Bb}&<^O#GL
z=JaMcJrp~KFY(v1f8@uBJD1HRdPSVzNNI`Jx$efVrN6Ywd^o$b(ib@+>??Htiau|%
z03%M2!q-gw5D#ODAIJRsr=A$6zMF^75f&{i`nVR7w=Xl}>W{x)L380DSp_Z<Me^AV
zM$nVH8WM9K(_E!_@tbie-ZJ2!JIAykOTD2%h(aBeesxllJU(w>Hyufx=Xnz<e^K)8
z?1{&vg7&K<KZ;3e?*^mLe8wrh`W6z?dCf*`nCY0Qkg@fUcgiGW0Z%iPCEuI*;I_If
zP2{`Bej)EZ{g!Gx^T9Ws?YGZ{O_m)`^S!GV-m0bv<%O6j&sun&FlEIG$yDGlc=TTE
zhXA}l^MyB*(A1>`jLmV9hWp(yZPHb1L?dLgap<I43FIv;r<cxa?bB)%T{1Y;*e+0b
z9mcaw?}kd=2tg;jDo^_zFIVt@W2RWJz=TkFh1|-a^NjLP&6p}E*Y3>#x*ly9kVQb}
zTX}`Dpo*;2nBn;EgTB8DK<wS_Q=wvfOHXM{XmCuUC)u%+bB8;STc|$7H?Sd0-8-j4
zY%L>%I(A$f7WRShEDf7PlO8v@Z3t$t;i*($BOaZ=&vTI)PV_FOO?_1H2S*gvwcPqR
zw{KiJMM;WOFr4m_S=LimY^wIzdmbq7?|Hc`ODOsb3wUKX@!D-b8Y=foQ*nO14gXhk
zeX-ta;y+7Y-i%+7mw)GvKBGO}Gzs;%n5ToYT%(*sMy`G*rpYnC$<<am)7h?&p5>16
zDqh=O(?W>)d(~Kv^h%%HT;<7ci@s)mIx2VDl~NQUgwN`Zgy}?j&!y8|JeNtPo-653
zmF$-i#?GRSdT;GpssF@i&v|2%c;iT6y5FLO{#5|UgG}{mm-=w3xS4l{JL2JfWy$zo
zFFK(}@0&HC9zEthxssQNe=A#E1FzaORnqNJZ|Vz8ZQ`K;(@RDjA82;#eEnMcR1lxu
z;Tqbx{q%|3nB$&lmI_Ih!V&gj==Drs=6l2O7^T_7og7dZlj3ReRfk<#$}FB={ssv2
z(lGm*8Q_Lfn0D|^mD1C<^ZWMP*;sJcIrE~u^q2D6o1hfC)5|N%v@>yrvNcU!#Npw>
zHDNBR(kG`1Jf7FRhS`^>nZppi!Yk3qjcsc8)kEEx@h>hkG33jhy7Y~u%`XrWHFdZ)
zu^0v_Aml0qlxrHjlC$yx^u=d{G4VyN#}%T&J>8G($7(iy8a_T>_hw{B8ZEh?C#Fqe
zB^lg&cFwh(7gvI(*?pWKRv7nmk22^%<IV^>mA(zd>QuPLvatQcb*;)^yX#svuv`8@
z$R(}Wi#VA*HZ;ty+eO-0Nc#ey+@MdND+VEKhhPSh)?~R|vLvW#5$%8&juWVmX&%^&
zl`Vtf(Gtk9=!++*CIR#qR{}JI^bHNs&wT)5;{p7MLr5+UL4sP0gF^xTFqpx%8$@2K
zS;y}FOwWO1a`Z@X=F<sdu1|1V&U>!qgGO?t1IB_vC-Silt=GJ(!BDNfj-lZAr|0Gp
zSCvpcu~RZA7H?ft@2n7iWr@{adSvD~0h8Ho4<{Ye0pjHIAeScUK=qRO2`VJM?Hna9
z%IffBRB6=tg&oS>(yG10z66y^Gs$4T8ailU&J;XYDLUDQS?l$1@I`v2?6P5;VxFR^
zO|6*xTu!#nN>G&??P4BjyDb-yY2&-Cawx2C+WyIltaJ55t<f!=9yDXjPyVo!M#!Vk
zli!@R!@Y-wvv%vgm`~LG^1(y4>D-f|Wa&N}-R$TKmba4l`e4K}U+}2#8fCm=y`bG!
zm*-!EeQS3ekqX~p=Xd2$7EPthLz*aij99IAsY|KEf@DR>;2}Mxve-TC%)z--M+%d$
zVACRvn%UGLuJ%O-3>tp7ljy64gR})IOijBlCx4$^Lb`q}+3_~o^suWg-&tE}d(qih
zhucFN^(;Q}U`YV^bi6JzO&_8o-?X+zCF?TVwc-^+z9VZ4rK+wm6c146!*nn<ukR-N
zeBXWDqf^aDp;Nb|=NRWy!aVXTF}laR#yEUXSedWc`PT)cw_Q!QPPL@2?cNMkU7B-`
zlGGc$(cJ?}WTr`6QQ;(pZ-~V=+u1l_QT=|6X!FPM`Kq+t4Fcrf9uHY6q~THY;F<IL
zxU#+I!EJ;{>aOopzt_9_-G`ejf|{*8Uw6aRgw@dR2d_b*q-;4+>)^wns@XQ5l<smZ
zZAxj=(UD2@pNA#2XiEOGElf?O?~iYlTy2wf2Iv$<WS!@u?Gm<`1%u(8BO6Wgq^U#1
zT9xOQxV@R`$*vG~yVK-b+X;sIh;=7RiIi+fbg526ZiyBufxSHI>9@ha0G)n|JQt=`
z8v`gJ96ZCM#_e@WAcu<GjJCaBsz(j>g;+_HnT4Y;nv-4fhB>!OP$L}iiwu|ZcTFVP
zPxeMFbn3Qq28Z;GGMI+@?oDyC@Fbt0pX!LydvT2psORhZo>^@dc~&MHsVtJ25B;Ii
zO}?`x>LZ6jaYKD{+?r?79m(#h1kZyobBUbXN?-QW-A3Ay)3PHv;fZ8DE~M1au$NF~
z-QcB3Uv77SeybrAce#gTm2Gm4{=6ykQ0Dr)o@*ugfQdrqY|i%S0&=Lt)_l?~ujnPx
zcTak|s2H#PVCJCamn#9X0~2J%BHYJZ3rwc`bWVnn^|ymhJ*|>lf7<a5ty?4WlPw30
z6;Ly;k#*M~u`}w-H|1IDdRa)SytmuqS<J$=^UU_>Zg2AV)UQ!Jg@fkOT5nU7)%CWQ
zKBs7g5~~?<*kq@b<RNVlUCRQ^uVv%oCNCisTm`41rBSaioB7$cHMO(RaCm<og?@Cu
zG2XY%IL~EmV!6iKaGeK*gTx<SbY{Vz>kbQDyth)0#C!0Ncm8xPv|#rnvWcatL-;>!
z%lQFkZs5K995nEiG&C-Ug@qxO8H8=;fLvw<l7m4ZAvEXDH(+Wp5^#}Q3J%Ha>;W|7
z>EI7VH-#YCtbx?|)#bCt4;;4a?Cf#hvlHx@_m6{x1nh-ZEhKxZ1ppp(%dBULyCna@
zjcE=M5xTYY=c%8zu0Nrb4W%Rv51Klqf0;V!`$)u?^+(|a4KI=q%7mc0l$nK&TBD{H
z?bp%npJd-7NbG#@9!&-2lYRY85+KVaO31|!!YnX;B+<Kn-EDt}=YVW@qw)$zUE%r5
zKV{BNd0x~1Ng^3-vJ|wll@B_YjObQ(TZ%Nj8M0xIUOxZWe%qV9(5%hm&e!vwCW33x
zY&#om>I}kg;u3qs`Xi1S!gh<GWI&L@=Rn|H0-oodtp25-=K8dnZpXa)Os#JW?-8~t
zA*285AQx1A3zs!4qP`Z9Ii)Ebj(k(F+nI_<vDGFLC2o&h*^{og!%RdG5Z1S3|8lT^
z#BG-MS&W=hTe(??ey~9y{%%jT{p=mbRcn9yU8Zg+8VT>RBZ^_lSXbJ2@l?FLKH*vQ
ztQBY(I!XF6Ti*L;BJ|UT1d6D)2vOQggY3DMI%;l*!qki6xH%6W22V-h-l}!9?2%wX
zoO14{+IgSD!|%{tZIW-yiR{qr8~99p1%IL{g*!RIl76x!Ce(Z%<ymltIBD|IKw8XM
zQJ_Ma@a0rVeO!eIS&I*qWiS7B8#kvCMQ)9@-L4Nl9rVs>zdKygIu~82Go5G4Gf&p{
z*L>kUk5^UeFV^EnOL%pjcpkgfoq~N|o@BMLH!vjE7g0^iF@K_k<m?l^cTACg=ociC
zJ7D8VB;nBz|6E<qI=CYIkzekDv=<-2gT}9I%IJzORW2bMh(OZ5l<9A+{2$^*^wMz{
z#&^AUV#Bh!hGb+?+_w)_L=;u!y5bj?wTFHnl{Qw<tRmE(c31bJCn#w$Wb>7`g?t%2
zMy>mG^BXiXr>A{(mNv7N=$1x^Pj;r%i}Qjf4C4)OE|YuRAtj>nZ)5kUE0r?A{TBI#
zOaEjjQ<rN0<S$RXTGx5P@l&%{Lz|Vc*`va@Z)NJr7@eD&H|Ap&4oM3dnKrwtX*~u7
zk@=$Gzs^ZuLO43ydwU1U>kE1cJU%ZoX=)oGWQKk=?1wGH^-TKMT%eCQM6)z}H7c-3
ztIP~}(6~lwzn!%HLz#lz@JU<HU=v3@%f}SMFH>H0e@qpoT&O%+6#IjB)wM_jFWIZ$
zUB8i*sUP;x*37W{(gi%t3G1zFIr$?l$6t*5)8xg5s>ys!8R<@M!lF}rzl<ov(Zr+G
z9>!TsS4v6C-6Ysoql@f_sG8AOnxrU02jy5QhHfS*+-B=ZWKasLUDPD|tw*Vb;py_b
z+!USNLbqew^_5*EM88^PqQ~9}?^|YCeyk1RIp;3R%g*<cYvYS2VPSzG$^NY3C2Kc^
zi~isTe>L=CaPH$MaLuob2nd*2*W&LVXepFTZeEGk{d&f_zxuRKmq1yqs4X*=*%E@R
zDJjyEF+GLW?2Vl%qXoW}R^Fyq`CpAq5<Nqc0i$O(?j3oCzPIkYg14X($3roB$f)59
z;kL%7xJkAtm0+c(Z2WD_K8k*3XOzNT<Yjgt(HtE`wqb7&5-;`yZ|1t9nFoBSy<Rt-
z3v@mfc{UVlpmj~5p`ta5yS(YCnygOD;8Y4Wa%1DdDTUxo0(8Of#Cm8WLHPnCv|2M%
z65RG@LgFMmh2WGCvh5pi;2(D5(U$`Ao~uyi;|Dl;(DMSK1d$({_J05>>93bYATKfQ
zJf!cm-1`QA1bpUEO4bLU-pmV`die;(R;&RfqGSvmj0b7j?WMHHCr_Tp+S+n0FE0aa
zIqJiQ2H-AN0`CpBvW){*Ei#_G1#!EFy-q#105=sC6Qhs}NY4R@1zBKLfQl>vlstTq
z^X4%$&$S~c&&fO6n_r^n9tPO7+!+32HZ$IRS7$<zq@|6PVur01R4-SB8S{_*6s1v7
zj7Nc1hxF~wCgurDm--uwNKlq3wU?PLuWv{0yX4L=SN$@)7C*@K?1AX0X-HIeMDX>_
zh&0@s+da6rSsNr`jCu!RmO1TiyXSrAhijFbOVu?4DJc|2MlE8G;vvB@(C_DQdFV^d
zcOKlR;>x_AqJ8fv9l6QR?J$qFPu&}RlXF-2K7{y51DC!v2(aqjJzdPr<qfNDx~6<}
z=1~;<?(=ku;d8s^Lu+peJo}QysnI|5JvuL!w~1gBzix6*ltCp)R`&7U_R)kse*Zw~
zie>lt$64fFd9g>);>22Ruc}SD+?jL|Sm)npxjiSOUt{vB3oR+`(NoxycCuG&mJh^#
z_VhHh_@1z{H&ag8s26cDVjg3yP;=$<&*e}`iSQ!>6VaP*D8rZb_XdV!^(Exwlk}PX
z$z69wt6ssu9j<oemIBdJxQ|4Li;D|*@()Q#6!qLbo8+fcXGgwr^6}j5ASLDnQ%KKq
ztrX|gt5+`ywXh$<fua#3IM7BQm?=X7L3B6)D`w&6b8<N2Bqj=S3pgO}YzBIy2#+1~
z)RY~lM6b4j;KR~HHHL7QTvku-0?u;9_9dKy$Qcftf6g1Ro}7eDE?RZ$D$tBurA{}g
zgltBCf;@2c_wRE+Y8C?JI3R?VJs+vBm&Vqp!Jo9tZJ9usLkp=<<+An%bVig^Y$Ot4
zTK~;Ruzk7Ionk%tBM!C{fWn5OBNG4${%mgc{Nn;s8|FJ`kSq%ld>N$A9T>F?<$@M4
zedU#4!O=TR)p+nvXz|a#^y0;Aa0po0*@K}Z4v&4<K7*05#$z{CGe08oe8PthGV=2B
z)h1W3U8{tcLi~p^_Ivu%q}n+{Lha0lIsWrt*r(gHOMgl%t@Dv!PL`x6OzW=wBEJ3K
zayG7>W3GQ5{y#lHe#VPS4IfHUjwuP($N%rgUz0{_n*K9>|L@&B%m2SpKx_^P+kfz}
z2m<N<_xFEZ{Qvt+s(w$%aAMk=KjdBwT2hW*=fu&ms%&=h8;|@~D@1(paNwvH2FZHh
z#jPM}!<fEhko|~CPWCO#b!c@IVFW4_pxh${4_|63{J&<(hWdKq;(+|h&)>fdR;x(Z
z#NvQM6~QD9X%nEYzN;8QG&ahDXtdI!N7cj6&;R!UQ=b-?Bj;_5*?tlNfI|IYYm_Pz
zE9T|@JU_g#liU4<?tg#jpI&)rqo4MFI>ucjDgH)o<ir1zQ~viSlA33JkO{3xa{e<A
z|9xH7i8&K!;5pV&IHCXP?f?0k7**z@zYZ3pe#e=w#gZVP1me1z!I#X^3^*aGs;Z*Q
zADgjHjBzIaS2KgCPmgs8q(MCh#Bvuw;0ok1^j24nbwSBR1Z0yWD=RC_?s{&`DZLF3
z@6kEpe5$M42C^-%UVmK)WCgJ<ubmzRAflqaqd5Q9kSD(P3)lDG6G>8aSsiWw{0T@Y
z;(}QB&tJcI-8Nppy1Z|1zcHla1Mw`VJ_iK_!5NhU!qVH~&aGQmvLjTBmFeb{lzaja
zE;!t30_~Wvh=@Jy&KY}qP_c&L>;$!!F?~}|jsbl34WL^lqPV$g@23-sJo(TzS8_$G
zzwW`g=l>dnO_rsPwl(8mJi=+YGs?=ofW#_4i0Oht#6A?YjQntStd^h=n+09S<25^9
zECXgS`*)0F`bDr!H`7NsY3b+`p>_bu)}S1rTzAm2^a8d0f>!(!vV;zo-_GHWGy`)H
zrOksI(M-iRk=!-I1OJ*dvLtm}%gn4m)%^t!aKLO96%`d|-MxO}28fS$PS$u{0cMwx
zk&&66D%_yzn!L&t6Ar|kL{+ou?V+I|8ale+m13ZGeb>07ky|?9F0Ru#<x#&=R4`(_
z)`_xXII*}Av{5=46-8~@g`k)&URT*pycIhOvio95!`4D~zwG_B0qrTKb*5Q6!!Yu;
zbV1d%>w~??L}n+y@*@5{m-d}oiKG)%IUrYp-LbH+pe^o2zJGs1Nazcw$(AOxz(gvr
z8sY+Elp^T3HZ(NsEcFDQza|e9*(N9gZijv)rKB{4vW<(EFY`e}@bTkEtxAU*F4al<
z`)<H70~tmFKyR~(h{VDpc17tyHu>AP>mZM#0<svnyNh#k5E>dj(bk4&fK5>FGh8O4
zVP>Y32ANFQPkr+}I|VsjN0<koIjg9shy~a|lDf!!+Z4Ob`<Aa?za9j_vgcV?rq+c(
zT{as0f7zH75Q+g0-O|}<3bz<Q0-YKr7Et#fOu+k^kr50Ev=^>iX@&8J8i3vYBapNI
z`u+R6jSYJUFrXU^WTJp44<K=@e*v!pN@Op~$GrD0in^@Q0U;bfVHaSE00Eee?^P(r
zSI3>D$DmLo3(<!9!$PQ-WTYhS>FIgLDEe`3?p58`?N06oPdC~FWIeY2HBH&L^kYEi
z1`zFuiS6KFZr!}eDk=(KKUt6n2Ui0t<8W;gcnVl0Fc2ZW%Esm&8%qbohhb3JYmQ_w
z11AIPA4ujnG>SlCNhvJ+0PWsGPpJVp3b@iOEsCoy;_x3JRaXpg7+?aH29XTlXhCZ@
zVr{G(gs!}rqdAN_;sx(&7X?9MXaD}iLe{$w5_1a+4j{iq&B&+>qD(Nx#y|{_Q&6Cx
zp}7yUftZr<#o6fzP@};lD=91goUxCJj=m3q=%9>6Q&?DNj?^xCMT%Xrl9C8$Hz;$D
zK)XTAc%}-Kw>^fEyY&kukuucwyng)}ZW0#3y=15%uY{sCQgZSpn0wHZRZh!|fCa%F
z2H`HO?jV@^j~}regk4`V_9ZJ}X{8Hm<x{4hvUQz<0~*%^L7qg7jSkVH2p}{+;;xa?
z(u&P=?E(d55#SyG_K^c74V-$w_`*FIVSj&ra11bAu+au^J;v>uDBuM|%(kkNvLow1
z<?sJvd%09##0WqVNQP0|<(-<;$tOUlg2x9yx13c6Xs{hq`ajdr`2$UWE_nrLJZv%S
z>`zii6tsfJ!Rm6Ck2{bX`W)QIuR9e3Evr|cS>~tI&qPh_H(+T3@>&oaoxglJc*53f
zq{swjKR*avfu@`)5G-IxyaG%ikRTF3N>E*2zXLFN%R~7rSo{J+9qHe`1%XOiB>d-V
zu;o>*=!hqO`T}3Ak5@Xajedq_#=^*9iG;p=`x0J!Woim2k~XQc12tfRDZ`?Jaf8WS
z1OoT4lq(Kba+Oq6<{{Ymu2=bSb=4Y+XUi#9e59Hg4b@p0%5k#j$(mbSTnkWjNDJwT
z6wtz@X*VCGx*QJV%3oM`h1Ol=WXU90*J-6#LjhvaRnH~A0JRB1E;-1ZfTg*?$@v~?
zx3-ouli`;;HJX<J!**3nEFOf_`ijleKnYE|)=Lx?9suS-aq)@Vw!VP*z^dJQ?5!$+
zrfvR@Ei$m;pth@r$Eu>owVi*hkd)#k;o7;bh5nAFrd#RumbCP=m+vAhINP*7?VS6!
zz1E}WTE<a)`SPmw(VhvYsRJJVTtbIDaC4y)G6Uw>TIX0l>r{X4H84;Rm+`%7q3+^&
z#ubzMe1sj0Hyn@sj$lHO5`iwD<6O)6!@adfV0J9b&82TW@bD08ZEa<g^ooTWuotMQ
z>s5p&<Y3Cr%*=qS8x1wJbh+(BOKa;4I5|WW6fyC5?b`!|05^7~2H<p!fYDhRE9Ztd
z6(UbKv|Ad;r3HjNT(m9E;fVE<hgkvKFYK)h7*>zq?_1?|uwzm*HD!Nz>%xT#ifU?R
z(Cg1MH1Lp9>zWTgBqw(P0bv=w68Hq$1j&5-{38G)1P_ncg6}0DB;*AU0}LFI^phtS
z@QEp3fhj<o=xb>yf@25u#%nijyn!nmFzKO9SVlSwCDA#$>5<>y+OYMa_17)XFeSl&
zK~;FEC)aVNJ#$x@Z4;4smazYO{vN+yvKyaTwLV7csP)D-%~vwsorrv5x-Uv7qDL9=
zZ0;q;yIqb!YMP7TG?`|IrLS~Fv&1xv)DDPp)qmU0&dwW5I9PiE>;BrcYk@GqjEhHx
z>wJOc)bQ!nQ?G?jp<!W0fXD$>G(vdNEl|??usi(`EzFo!pauL0UbkSt!7Uhjf4=Yo
z4i4FSB{`XAYsd?cw3jZynS>hHF&6Kg^Ey_ao`C;wqn~D=r-#eWtYFJv9bkh;h(|zE
z0qhne<p?-GJuyetd`wV~m-h&sw2TwD$H#X8S!#Bgk;nun=odYy4**UP0*>Tx;&)j<
zK)^kdPfrMliD3dv^XI>P0_F8&AaJPk@S)}BtRLB~@{a)Q1WwsAn82#HKjE824G90=
zOvf596B84J_ox7)_T$GN7K<~)%KQ6KX(=fw6E-Jmk1+@Ge{8Fm(Z!YCU&<%h_N%OP
zycfDDyd2yJyB1HWJ`Cm|x{$a1UOW=Pz3oT4ckjV9ifMzi`$IkAUOaX5+X}r`tGqjq
z|6U!T`HLSZ{9pnVxvc3x1ce1Zg@%XYv%;bU!VO?#9>HXTET+Ns?R3E25&dU#j*Z=b
zx*7&%431>pF$m{9w@<kS$ISO?BpE3~ILuQ&rJ8jif`Wr#QTR2@Ok-_0fLoy$uMep!
z3`I+#uV)FcT>D(-GV5czN*k5Ds+HSnT~S~&3&1|%;p4}FH3b5QoiItS(9+*KB>f*@
zKd|iNF@LE?$FpN>cvw+dTDtm3%L+7~-!Eb^yzc5)fj>Ms+`V?|R`$SFS6SzoML?#3
zYCXrD9%rUC+|&lkUy_#JhOSrNh^q9?H&Wc9{#4{fRBCB2s^z?6Ty`;khNH<xMNj>%
zsvctg0fGESx-WC3_x>Gd?`P;2;YPt-1Q^Zbp0p6NmfJiM{1Dgz4jvZ#QDS*c4i<w8
zD@-0%7WCT;*1Q_j#DgxRA@=DZ5mKC^rl?2)u@bC)K7fL#QBkIU%1n{s;fYF2WZYXD
zYX&*U%B??lAo}|KpQ<kxm%U1Kln`i(XXoW%jTU^>QwY=GT7_1zDFra|Zon#m@O!(g
z7D7kp28jBQc>J8@bxLw_K^!mopR`S~JiMh=w99rs*zE$cd_1%E3+Hk1&*t+Y&VSr>
z4IIqLT2pQzC9V1?9aPsAw`Xq$UyopA+dob+{vY><-5I==)GtD|fm4Oq{S@?XZ#M@v
z&3w(uB1!)=b&pmdl6f8&1ZLniAaY1eO<kI*lLE2eOo-CK<7mu__eQgFaYcYlg-{?n
zH}@KZj?mtKz(9z-VmVx~0+HZ-;3Y7I1v*IB@li5~1+~qK+0|T*FMADriY(*h=Z^*(
z1$&vUHjnx_`Y;G;@7+rTYgVG;0&~IaW-j}!Teq;GC3s(r`<8$yDgR>x0)76lp9g(a
zgi%F2o(eQ}cZckJ3}t3pnEYSazYd~Z_X7VNK<izPv8IZGt8icd>vNvxY5@oW?y<oE
zL9zFd3q*vC_5S!0O)pD7$HtP>LjbZ5h4pkiFL1!zT2IwZM5o~^->(6;5AFgYtG+Y^
zDz&bNvist{e*fkJjZJLS091<hOPYEzT3U1fmNW$mtmiUf0>lTb2Zr?sL9V=&imWUi
z#2|A3rvW$7#iONiaL<cS&WqyCK$(|=10VEQq~u{JY;zBHQHvwLA=2#<I-IpSu#vK>
zq$(Mdd+-1Y=8-&@qXG5ip8)Hzuhav0R~T3-jT$GcM|t>=EZKYS3h2B-ZYC-r0V-#8
zb&zSU=t+=ieb*&*mI!+mEEZ-kp1XEG0Mal#fMbJ5pQ3xc*3tp|qU8QGKGvnd_7hjc
zZG#oN?}XlZHK)km-`ANWj`i`yrX4S*>-~K&(f{t}P+zz(3vt(Y#p|2t@W2b{F$S_$
zDsFBfz=JaZA_2^`qeO17&48%==bD1e*l)mqk&=+Gg1Z7dg$Les2yGxil#Gpyz&7R8
zoq9lDgmjpKPPO}dH<n8SyFb9py&p34oq$~!z+@mO+52t`h&F8a11ZSAPGZZGHR2$}
z{}Q5&L`iRYo+g<QYoYu^`iHlYlXa1U8t#I#v$L_ncG&I9E9j%|+Qmc(9ShoZb<^s7
z9%Gc8CWTrgBqST(hh7%GDAmy&FkT%l1QF39XOsq(WCaTo5<XYKxPnJetAzX$R#qB&
zUIhKERp%q2q@;w9R_v>?5n_lj^br+Gcn`A8TOjY7^64rs?+5K-Q(lNq)bv+fsx^yt
z%f_DQ>#IPx4D!{8loCh)fGl*{_wVh|O+dDZg@RI0WrnflYai6^t_Xas<FWW}FUw_P
z31NZ<?79rzx5d28V9SSC46+Vbv(3b`=DMHz1wIdwy{Sb-AEB*{O-*6p;Z~Q1fq8`4
z`Gc)6f>;}-9qeDt$3!N<Btu)k@*q<3Qq$6O35`nzAy*8M?E5EgVI9Gm7V$n50LurH
z8=S?>+qbX6!d+T2@A?6sf*y|tC#;dI`TRMsKb4<7yZGV52cL4@t5*YthqdZRyYJkA
zw8E`T9aZqo@W_ybz{b*j%ijPj0WqDm_4mbNV0bNpGluBoJU#tGm_(4S05|U8(O>di
z7n%nlU1%tYtgI|N1f09Y!Z)i`X0TPmqZjHRv5!te6WC$8ZS4v8MA*px{Q0va#}TeC
zz?KifQgvP3N3i{ZKOnq^N|GTskr>m})>eWy@bdBoy?JvmuGT0VlI!ofSKE5V<QxCs
zspJhYo{WhtP&NiIK~JVy@}oUw$L#;+tPNLl{xvF_b~bBY?isjAVS1_jARo`!d?#Y-
z`E%y9kK9eEqiBh1xKR<&|0`>>sPGg=PUrd?YO@=hsp1yrC%9b%|DDB2)&F=b{jXmD
zhn5sdbp7AiQh0l)>d(tBZq(=epVvFnJcn*p-3a=>2|@)2FOp&om;c=w@D^e*DJL}l
z_j`U>;)XvVOYi@c7fzi@V_8++B9USIN{aNq4)*ma0S=5+FQmK*QIIN>`}73<7f=21
zW48B4X5HQ0elXTQelQ|;|97tAY!43y4qG*HcV7-YfQ;zP^a8E+o1Q8dI*VaU*UQ3b
zCoz-%^*aaxb7bMz8UX=;-hWbt8NtWklq#DOp}6)E*wcc7f^hvy|7{tY7@<-CS5U=^
zZyf*j5wkA~TPlwK?I78LmaOsoXs?r84XU7?o9uo4@Yaaozuu6jaK;kf;-1alXvBmA
P|0q3BmCKWT5%B*3d1+fL

literal 0
HcmV?d00001

diff --git a/docs/discussions/deepseek-v3-gb200-optimization/images/image5.png b/docs/discussions/deepseek-v3-gb200-optimization/images/image5.png
new file mode 100644
index 0000000000000000000000000000000000000000..0128fc7ae454e8710caeea8eb97ca92f80ddf6fc
GIT binary patch
literal 330297
zcmZsCWn5HY`{tn$5Rh&`5C)_{7->mCq`O-Lq`On3r9nzSM7pFKq)Q}*PLb~JeSH7B
zANJinU*I<bbDnwPy05rTn396bLo5m`2!bBUzLHddAapSZLgB$g2Cq0Q$-==86en?6
zHB3y**=5Bg2%?2#C10w!r|&esv)9vh{o=N#>$u2o#`p;$5d7#v|B9rx)I`0yaGb3=
zul!cx=t%J_n3#Sx*FN}(ka4IC?(FB33SnH?N%#)th~!|T@2llP4RuWy7q&T4d{iWS
z%8I9-v<SlH5Xt-V+mA8a6H)8@Z7N?W?Qch8{p}n`KF@GltY6Rc8m<rKLZ|qDE<2XF
zZCn2j|9y3j{i4$PfB%^GGs<g;|GHMAq@9f3S^n46G-`><`tZN5D1M5efV}Mgx(=mt
zUzGFy_qAXd+(VOSp2~HG!Y}n-x0G0Alg39C?~U+2tdNQ5%E^JX8pQwi2k`Q?^jK9N
z3Ed+bdYfQnll*^ftYQ>-E70<-N5*JqWF#U7h8g&``?q^8Z7^pyEHAQR^|$B)x&#&N
zB(#@H@I9>`+W*%uR@IGoDGXVWw-0eym5C&>KaE=6oa*FdA0A~Vd>eZQ+1Km!bdjsE
zC%d`1WhmtLnVBSXVCP+1fr%2L>_9?--t<wal5wK<m|2ef`k%`kiOf_WW3Q{_+8u+0
zE_mJJrTdNHb6x}bNUq3E0;yP)tA?~=8%(-NR#mwdnWWC1=re<ot5E@5UvPeiT)MU$
zi!qz2DzU$dIdvEJUvD!*iYJErt@2(y|DU;85Vx1UH(B(_i3s04&MEo*%l)Ylmhq|R
zf;}c3jz2P|l<_nX+n<-SbT3y=JN@myAc97bb1)&DUp-tO@QosOY+}RG=GHij-##SL
z#_kaKz=<zuxtBvvYB_=lPq(M)hX3#7j(QxK-=+0#;aL<v;d9Jw7yduJN>X!L#CwZN
zlO(b>5o-O=%#O-xbq9or;mTBE7f5*fqX1<viS)quk@$hR(1D1^CLA()TDN*~*P>CF
zoSz9ZlcGBwn`65FaGfSfPYG!$`tS4@y^Hf@oR~=wUUzW!AYg=W$Sz+Po^$lJ-W!f(
z-O^z5wFr;Id}_Y81~cAt$$2)(K}W?o0pH{DYE-ARalfnhNaOAVr!~8AUrO)vyS+AI
zYQ1@~cW^FYPa%r4Rdk_J`T<dJx1SaR=UKk1^)s)#e^^`du-NM8_$2kW>F&*{Lr+=S
z`se67pClfnVdRrPc5QaInT}^Z<ufsRohoijUlPJ|zRw856*O6hrIb-$!u2vLv{;eb
zu-BXKn~_8G3e;eE6CK4Q++|~%e26-~W6hhzo2NubgKe0ckdYV}KGv7i&(}iIRRj=?
z|6)8DOw7z^!(tf;)d-42R`&lMP}Q+;i31TW8Ds`K*wdmK<+j~N+|$TCy%fFVXxsIo
z%%LBBuh06)G0e0-AhgYhqxP<E5*elAU1?cvJDG&&@>np=Q~>$xw%dhhRIY<o%9ddY
z%W$Ct4jB^dpD@8zYbx}R;<G<JlsDHo7H$6A(*8lot54rHL7pfZClKnCF8rejBGJ5F
zrNEwwDg23Pu^J1ty}~1Ldrkfy0%qD8=U+YFJQTGsA}7~QLJ})O<VxkMJzZG}0<iV?
zWxZaKxi;~&we>mW2#=(b9u+Eu9b+)Ql=8}<d!C9zy&hxyCSpf<VsiKJpdwtL5?*v2
z;b(HOg}sDC2H`~p#-XV|2Ca6+pOJ9dV82+!aN&<WLPn^ZP+~w=zy`$H76zI76F~F#
zPwtZU?i;V@g)WzLgD{Pdl%Ps=Csjg!niOPS3?)<rr~`=;BJ}szhYz5Xp&&!(5aBPZ
za#je<WR}#?_}*&EFIAuw)t|+K8`>LnipIu*@WcYHS9l0!&2kK0oFi>GhrS)y;BTK7
zJ;bDANFI=S61%px=2$NzAV7hMdT`2`C^u^9?BKu^--m;P6WqDHwQXZ#qu1&?Z&Hyi
z=r(M`Nfo4d5sDn*-B!#@gasQksep5G;$wx_yC}qcjf#r8yu5Vgo1Om6Yk$vjGuw)a
z{v{{{$#sZbcHJWKMbq=@hsK$!=UL21Im6{&3K1Fs-|ltK^~l(5u!QK+pA3j?E)a#T
z_g7X{x5O&;yeM=JqDw}|_;W?a6E{@Xr@mqQqcZ&1>SS`<XwD?$_q@AMGbP8mifz`S
zq2`E_kg1K|u=A_lKjlJw2Ia)%Td5?bkqotcKS>$T#r*Hx?s;};)D~{ZP!Gw3A4RYJ
zI=s4_9Xga_XBwI-nrkRF)Zx`+$fTb*?}VwsR^c5EsIcA`-33zA_bn$jnEvtsN^(jm
z>OIWn30%m*tLJ*i2?65gt~g8X9fXcH0@K?eNam5s(!H6St@qfbYR19%(X&p*T<8fZ
z{r=E_&@wOGWAc7~=1BWCr>_lVzIaH{_?h&P0rKRzktj<SGM;zX;+R^jKk_ng(@l1T
z<=;ZmF|h<9(AJ-=hG%fW+4*^W9UUEQ?FXTq0N6vsm(%Q8WEtZzlKK@gg{FW1Rw+;;
z2$STcn4g&uw?@t2rof~_9m6gRk!OCx7?1Z@BvLxpwej}$e363)i{Ezcqjav&h^fG%
zB<lUGf0vG~Zdyvpt2`BG>HPZo>Z-|L-@3VkpOh^`X_dHxOxM8R?&f%eSn6^kH)dvL
z#&<|TUH|j0`-!6TrQhY-xz;=6EhP6>u7-M}<j(|xTC3+Rsy{nF>2W}P#h9q|`Evph
zh2{I&7wL@63V$xuOx!$NKY#r2H&aBaKr%UrrCwgbx^aG&ZZ7k&`J&hp$i_|%HF$T|
zw3;*=>$(E1ZK2SEneOY8#MSLbxm{ZalbjH38`&R~zO~eM<ZXy?tJ>K&p41}_R6V5J
zo0nmSIjo4_G-}(Jt}mo$TxkmI3U3^xoxWGUVZthMa!M3$X{nyrJS_J)Pox+QRJrl}
z9V7m?mwufpnffOtlxOCYaecPVQ2EMsZnf`T%o@4)E$UP|#rw3hv_M)o6|SBWeXv>8
zG)L==qk}`mw{PD{OBEjG$1!Yz^{98-&m39D4iWckzPo<R@Kue)=k^rg<m9w$ul3vq
zH^0~CF|E?mzI?Oa&1gz;pwcqu+165q3_1Y#sQ0}Uz^cm5MhXctY4SM!d%BHiHn7w8
zY4&&<<b8+ugi+oR&f$FSc8A1UgS<>u{$+80b7y?hMKKwHv}kkMUW<<0278^y6UR_L
z&;D}hFzI#I?;$}`gxw<Ug?DxPm(u1T4N6r1aMnc{%KN<;_2l_SIH)H~0=o|%LXpZT
zDNTNBR}X3NOYMx7%d7IV@h{#gq~I2F(}sq~vvBjfN?eHEGoSC0mOs2HDRN#SI!2Ek
z9ec$X-(@no>l&e@7eo3-GW|~UzH!;_QdJij&V-K~n(N~}wI71xJmKfHbJcG8;tzxO
z&0#I|;e#FzbSfkjVw7RRr6T&+^PZ-+k|x4@-!JzQKRv*&Aw4^8Mm<xlxs(|}#S(gP
zG2bjI9Fohx-TM4fPjeH_nb0JTKQyxRCGP39RRBuI()CAMJymbdkIkMIrxS&E+z^%?
zffi-&{YDmZ>!IM%nee)P^GmQ~YAohe(>tz$4Vq@!nORw_o~MNjve`<#CKX>OyS7g$
zqNI7n&9n{l_2=j2gxvP@p2XI<?#|53>4{|Vli_4!WE9G}(qbOMH0EaCb|8smE4^YM
zaBZBos$O}>5?<NV<b8RxrpK<Ssrj~Qn#`1{=S7CX4F;qnmz<oeTA*fUXU9r%z1@0e
z9ojiMKF%<36oi3SIlbex-~6_A_K>OYb>`!X+L_2&-QEdH=3t64Eb&(Y88noX;N-gq
zWYp0_l%?muz&>*pI1v*?m=}%sjWvFU#bFT6Y2Nc+Ai-hx-TQ{y@E6ZIjqI{YT1SjH
zVUf&GWMyBY>7xJXU|)Pvi%9%5PH+OVt|#a1gXa%V&(FOtj4QO}np}?en!S>JPi(z(
zy(+X|uL(vDFU12?x;E{%P8-12AF^=Cv8)x8XC&--q@AP+hLHFdZNZ{y`aNIt>LLa9
z$JIJ{meb3T`_dK>=erEIgHkZVWRF|f;ffhl6=x@8LyZ0Y$aJU{-@b{kMdV9M>s(#4
z@(H4R$FHofJgUAsE{%@<%kvh;bAG0*#g~)c?Ql+eER1kkULmeuXk`7!L#|-nmY@91
zv<+j{2w}9Fz>Y-(TE>S%ccJ6~E~s(HwszLz?m`mFOhsHb^g#4(Js7HYifMFvy|;rA
zt?FZ67kE-+c-z10nkDLIP=;82c5|@YnV6V3Iy!oH=S!7%cmW`-M7LHUL*TiP(2isM
zcoz&OClf4rdisyqU8`4`d}(aS15`c45mMSo@A=ZYO#0T3r)@|eDqM^ib?B2kqiO+>
z=^&U98@>#M^KY$pGUy>Ze0)hm7NU0-ra1De9wG!`@`{S*XJ?rLE`>TF(z&A*6&2CZ
z(Gj9ov7NT7wuawEG14Ew0>YH+j;|kA*VYhLJ}!{cX3PS3jm$yxx+`3*V*0&CQMD56
ztI9j^Hj66l5{=AtX7g!1zSLK+aFc4CUEI(+HUe(^Cu9BNCnx<|TZ(>1grS{%-;skQ
z`D<+#8|D{XR!%m@TtiXLPFE+-{_f7!>QZ)+vg_G5>WN{RMy`jHWRz4?G}?x)4sMPu
z+o$ZM$z6(bWsP798L|>TtF)=m+Tqg$8$W+cP1(jZw2)k7)0rhP&K!L0M!iyDju8?0
zd`*Q6rfJr5enWpk+6MGK`0-Uts4kU17oVV@ps?_5eAdm+?Ne^?FDaxgSCZ!=cL`Nc
z9mE1PLLLx0J}N#E1B5Gv)Qdz!<AOy7%|WQNXrGV{AT8XXb=e8cBS;W3fK(?O(7?++
zNdrsn%V?a)W{@iqB!;JiA_g%)zM?GTs~@eQ6X+>Kg9(`-IYP+(*lmy!^a`3nVg)>l
z8p7~D9bFWl{X@`30^ZVbNe6f39&zNRA^r7W1)US=53~YNK^RIXZ76E~_K>q-3Y9-b
z8>SNK5)#~>SB$0&X9?B+cEK9i9t`aP1RaQM)fuZlejr9}L<!#W{K51VQh|DUe!8O*
zwP72f!=N1q@`sj?@R3-qeZTXH9gdc5K;GAJgGi{5q(7?|mj8}uHI6^{*YVW6Gvk{3
zN*ffiv#f|KOl~kQOkzBrkTLx(nT&Y*MDM6Mb)!w$=_o8(uO`Ys@%Q*UnCe&`SyknA
zzV}F6<YFl(DT(CANu=~@folG^iB_28lT>+%sP2Y_220-03i%l5=)X6e83+nWH-0oX
zN2PVVt<r8b*tdrtT!o0YTk<x{+y0njkd<SMzw`#DWftIj*HHu7F*jYX&HML0XVY)<
z6|-ols3g7M848Q7eyv)pIiJv8DiKMQA@Fc;mTKYA0~S?%X1^_}Jem#U^)4<}W5C**
zYg<GLdtY#pL}-^x0yg99o;UVRnTQ%U6c%2fR;UEqa26;-JPUu){WtfMLm${ut(U2p
zJ5u~l#-Eh)8axR8B$roNUXC9n9VMNslc1oS08a2L1{D}lv4%CAs<fn}>-#{bW}BZ-
zU-iwa4;^U_dXDiJWl`IZS(U1$g?UGCq70H3x2G3o92-HsO<jG}@+qsquayoTb8?(v
z#X&pHhK~|*Pano(*8>{ND7*t{!y`Zw)}vLgMb_%&_Rc{WzsrX2HS@!ZTfVgSf+VjQ
z;{$1(f7E7CQ9mb-!u$4(k&)5W&CTN_I88x8K_&0s+r31+HZUXVK{8dTP-0HR#5YR7
zkNem|YGEOM_qjogwxx>xwQJEtKejj+&*zly$DWkiTigXdSk?8|Nm1+tsYHFx^=o@_
z2~mw`@t-Kk!BoHY|Gb!ATQkQAR)&=n6aWMqJ+%~wXAJ(q%inrDEJzYzZtHtDGf`Bp
z7tQt8u`x9C!{@X#DP<uV@fv|YRr^l4qJKrN(|g4xld`ppt|#Uu5H%_Vui}Cim*DKF
z@(PNIh>3|1@oY10?E*DUc!Y{4C~9Ei0G^}9k_QW?_<;ri5a^DgJUu-v+<%aq@^^h5
zi8gBlh{l+n)#FQ7SJ#z8c#p%IH*c5}GfzC5RaI392?@daljV-j&(DKTKSef(nKPg%
zRERgPS7RxlTbs5yTWI!D$girdCJuR5JNqJ9mB`%Nd(NWD#l;2GY?bV#Aq&(zR8%?X
zUY&W!jJ|;#GjL#&NHSJsZMmakYAOX59wPoSjsc9|{Mxi)YIqnI8lRZR5cZnfJ}qVC
zm~ZxSb8;f2)?!jfCnG2K2zdU1AHOn-w4m3qK<P`kxc?)6lWflCFWzyb&CJe@ZT?`8
zRmfK*l9G-4YheNRZ&_Fn7cpcZx%CwtaadD%XKY)l$MW#xe6_f3{n$7$Jg+O%)YKFd
zs$&n?IKTkjzkkn4647N6$<?~(cXxAX#97zU;=9(LsKv@SP&sNzwXwgyx5-5w^+TsO
zP)y&}68RMp(eGk4vmQG48qWj{l86KqaZHpYi@k{ZJwrz!e<%(Yi`3VGN~ALana<4E
zvGUWv#D_G;%^&S%t`irMFO94Uae>Xkq3|ZLa=NCSF20Lkc1kNFLzyAWl$$>IK)_&p
zK;^+70{qO4lPG=RyM~#e-kGbpVm)?bbuy-ujMBfEH1U1C8)@vFZ(m}{_~UmQJtLUb
z@}BxVyI**yA{9$)?(3b-O=4*|_*GRpep+9Z{y+%Q)zw*;pDDK9hFSb>qIE}pnW3P@
z^1FLW&gOk{LF35oc^+X{eZ_1`gObl{Yk}1C7MGxPxRRNXm?U-C7_)g;-&3dq-cnPN
zTT$}fO|XWWN59|vfhrlDKyIGbNy6|K(@5&;=iD6*RK_^2e6>mWOle-8&F!@6L{hQI
zZ!P4y0$W=uJ`Fwa%wWR*74{_8`K|RcZo%29iKoO7L@69vMTl+e>B0U%9Qv;nxt%tA
znk+|KH{N`y8&nkEA{Var1jRA4xgq?Qe_YU|Sa|AjpoD|FZ%N;VqrDk<_+2$w{`jk4
z4ep%qX26XdrcxYl=3p3fDGpul?QB@*TOaT36%-ViN3v8B=o`rG{k(}XE>L5{xj8)z
zY%8(~z>nsx5fV&ZSg@j`Dxac`4UG^Sef8QdYkv7l<gSfv>G1FmJe)A}&cPDhM3q>7
zaWd0%T8cN73|s~Bn7v}&QOkda3D-;<j2*eNJ`xr}J)5kPfA<X1y2Sr7UMSuZe-tx8
zyR*AD%&QN=jqIrSO=tGF7}yz!<Fr>mfjbL;bM^*P+tBdbT}Ufa_BMxg!1_N|>#1(5
z6y~94bL(YVYXM@LiHQj@IXV9PwRV!|?TKoJf(-MUJQc?Hg3?l4XcB=qz68eT&c;Ta
z^QH=4T1HArN_zU>nqv-Za&j^gm;t-4t!Q0KMb@R@^q=7642~wefA?;5(-~$K-}hBP
zpDTr)BFYxH2X*%CCKc{NnTvP3i@4}PFcMw|=DhG6@9W*#5b*%~%Erc`u}vUCanL(w
zW_0+{CN`aYe0=EX>2qKJ7~HI^Sh7Z(s1Uz?aq{v8cL^^bKilI}jA*-}o6SBvdlb<H
z&}q)LcG^aoK9j4ay!;@-@Fs8--Z<Y<UT(j?(42L5k`Lx3)UbTZN)RKb6j$<lOTALa
zAD%jVnfWNL{<X^K%ZuUPzfJN~xKiZ-9kR5u8?oTt1T!jc``(r%{7LvqZ*T9fU!NBD
z%L)qGLt?!}S}Q7yYiGGSVOaz3ZEep^PcvTlcv@L)wFhCK2W2-m`<S_3?9Dd<TmV3c
z8@dV*GmgRMpdAA!_p{yE)z#H@qhD6lEtgK&`MFA;N7l(r$>x>V6J6a_AtO#QP@siq
zdX}}7i82)2Kz+ZvyVK)KQ^<F!pJU4M4$SKr$q+=$&RSRNlxSE33O_d|&lq1-Ri%(W
zw(H8%4Rpoiqz0Vo4ybi7Ip6a0HItIPJU!7dFi1#9zzfI$VnF9q7ik_}-T(!{G;maH
zjbLPAnzyY*Yzs4re;eG)b}<$SEjlFswqt(A)$Ldh)Cb&KE-pbXPg0pTRld)Rn<j>*
z``2{)@r~eq)7aP;y{w854Tis>$R%o28W?bPHeJ!g=Ycga4VtVZi{Lh?spQ40!(<YD
z{QUfQczC8pqhPtMt+%iDTkj8+CDdqhU<NJT<gEh^a}!0HV0Quq3?nKmC3B%Nj?7+H
zNCHMxB%@Ki9v>M_ftr<v2OtUeH|wH3ju)WdqM)EuvNFYkfn6SF-BXqQ%F=55d4kNu
zrGOE5aPM;z_Ua>p?6UpEyxU^+>~5EdgO^uhMaALHbQP#hh*EuBZS8k9HaAN_cp93T
z`-`o1wX@gP*ZcF0wRLswo4=ptk6rH8E=I@1gmx}}{`?ui_BC0IJ-M^8Kr0)Ne$xiY
zJQZyN1IA$QtIhoI9D6Fam7ifw>%+2f3G#VImpA!YzF1JATwYbxaYqO?R8AN}iWTxq
zvlQ}YkOg~$l*6F7pn#bqLRVMUxAlH7m8Y?>5p0v!uV1sVvF*29+j6Hi`QBdr{E0ep
z3@YlgXV1XDIK!#H$SGUhMl@A{FMsgLiAz+wZARO<b?_`rK_p9Nwi&Q9$H~o8Z>dVZ
zYwW#tHcs(&t2*B!H@lD-ufW~heCdbiopsCWPRGZd_#U+qEXlgN)%#7y(LJl#&K#&l
zUe?D0aayXYczy>hHr#gdinnpsA+x)C+q-Ou{ibd`mD76fIpG4H+*u>{N1BFiKYnBi
zdhLNNKGtOF?*6^BbZ@JAyYcWsaq)KO?_UegoBZ26;8n46bz6LUJjWFo8?n#wFe(_|
zV2NYkn;Cb(D%w`LvfQk2)LVrtuGGk}<g&=cMkW~5+p6C19nziF!(|zTWK{#h*3&N1
z2;8?aGCloU{Xsbgl#hMMwsylf*l+b!N{dQ%cX!eJ)A;Sa@sOAGINjaUJJ#O})h>GI
zmEy6tv^G^9=}RFN5sOn(a4rWs$E$$tpv4E1=t$90c0Nyn#lZMTA|RXP<xaa4*%#;Z
zmhaJ^3<dtX2t(Gg(hby?u2aO?xa3Wb?=B>&`cCY<Zz#*dBUcB{D%DEI>KFX>eX6>I
zZr75-B!AzuUpD|pu7&e<x3<d3srr7wPagBKv7~8smz;v?kH_8JpWBilwlajE=h0VK
zxS{W9;_T+QeiLBkw;rEQ&(1z{MuGpsBP9{>J<mNmJ2Tx6f2tJwRXn@b!OYnA?%DJ~
zt`q4iFTs&xp`MYsrWdZ~rPG#b&f#hW!6&7!li6Qnu-~wZIO*!nX06@VSp1QBlf3YZ
zGV;9jYOS;D>4>7ARpCPwIYj<9k^HO|qIY;UuCoZK04_dWxMhVF01o<C_53jxD`z!3
zPVe)zWQ+K|g9STAS3<)-^}j-t6ctB0Iy!)pA<}TMIy|iKMgHQztxNP4!|ZAQeEk!>
zOBJha<oHXn-PbT#0;^R$b_^&_g_$PC^uSI4#7tvgBZEB-j&)7V*-BT0Z>wR3!S3Fk
zg1r1Vs4?T?oZZd>83tv^=Gw{$C_I%~tltpZ82`Bdt_zBYfI}fIBlDhbHi{CMnsuX-
zlZ>*zfs)r_M_eAS)5rE^2)b=;Z)Yg1TN4fqNF=R!izqTn(UC@UNj_;O<DrNG2$DQ7
z1g_y>4HjY`bgHs$2UicjBg^NhfRWkOHrCW=<c~e)<LkIf=TDUfx?PM`NmutZrYr-&
zp}V)cdtGbmJwScHa~b2`85y;0pDr}GY)_Wxf<qnrNw?N^diM;h!_)XaupI@caB+q`
zqmDdWET!h28;Fn*1FP?icG5_eXln~FCaXiZv_a~Up<q~NZ)|UG@8RL$>dHqG1Ijd3
z;Fk}NNU+enXOSQjcJQT&wuM(yduRuU2k4excRR$X#hQ1mJG!{K9trt@*(uU215;=s
z5H@77y}1br8yI1&-Qqu;<bnhBuKxp(QOlF@0#(~u-8cq0zz3*lXr%j!OG+lj$AQ<6
zD^bhG$A=Yy+<my^EbtN^1gyZ}U>(>P^&49|Q^TaY^uBA{Z<$?K_?BS7oeFAVeZ4F*
zAipIgCBQ9VPcCY0&04?Q+}s3fgNhvB4LEcna7q*v@dOm@7F%{5>j8|qXD%)*+^s}#
zs1y*598*(MYZMXt_1e~EiTE^RX9M0)TvoPydUd=$qFbVIlQOVopCOq)_DCGi1$Kj`
z@38P0&6ue42eq2np(@2$-H(7CEH!9usHy2b_GU>0B3A2bL2K*cvi++#v)WmJsc#s^
z1_uYhzM#Sl{m1fwrgL*~x%6%U)ya9iy1M#}q9O+yTZVuO3l%Pqw}!Xo-6_*HK=9gF
zTOVKF4h#$g(x!|0E$pA?eyW--W(B@bsyw5V@~=T~KZ~kMvY$pG#Nw~&^;!V>cZT9A
zzg3a@N3X<R$;im)>+4r&IoIofpX|xj78d9asdGoQ*psD{clP$qFpy&t6URB;Vuut*
zL3k?q5<$6kZSSi0c6Q|7yqRxw+XwDVFn-$5<l^Gu^fa^V@9}XWsOkRh`uX$cK$BWq
zKZ|{(#zHLq5$N_!XFxk{E{_Y;`Ym~BsxrMXsZ?zjTZH-fL%A&194`krS{Z`DDZ|CV
zNsxOA(2bLG$+12uDGAU3Yum?Sz}EPmUH~gADqNXtC4AKEqM~PHIJS0ni}g<VK<-lE
zuC@{3f@*j$JUskwbaVB`%b|}6#0M>${QUgv>^Q8xPE1ThkaB>1WDE91S=s*HUebU$
z^lST6Xyn@a-vj|TD4J*>Ek0@fl&V8zT{eh)xB12gi;N8Z;>8OQky6CA^0Y5-(ItsG
zpPUb$q+~*`zNBBV-TPf+x&950kB`@AYz9mt86@IUqodPgxOVT~N5#aXmDFI9vL6V=
zoC~#<Yq2LbHZ;gH@}L=sp+@iT?vYcG0lq9)F}1R?a_O`V!VjL*!OqZ#Ftb8dS+S1_
zL}&)(vpb~2S&K6>fJ;Rpc^#8BXW}3!!7?(^8U;lZpztoh7VABCuXY7Z1H`_xwzlfA
ztM=>Y&Tdgs<cx0o`I9hhvI~|iZ>&vA71VGiCziwpkL$BUYlH}J1k;9SPB0krza?pC
zfKBt}P1L{|GHoP&25tJr4v>e4%~Rd9v}0^+;I!;*?)6zzS;tmSy!xudiVP{b2N~=;
z3n)fqVxa~yMxfP3n)YZ6UNo?kl>+jVy6#b+XOS@9;`Daq&Zn`{;cPHf4$SfE61Eby
zp&_8%ho`27(rT}+t_k~y)Dynd)Y!xIU_|VOE#AA`Q2?lbLf6pLcsw}x_b3EZ(dqgZ
zxudV#GnM81owGJ5XebC!v!BR@&HohhyKid~%To!HoTOnF86*zJLJ1%RXg#(p^g?Lm
zd;(6z-VH@NQ3+#tychO7oq4XEG?2%Q#O><lZM7AESO=e7(#xIqqlRyM?sxjgTFdQO
zqcy@#?`AY+(1HhgMeN~YnybgoKaNO~x6oGpF76R+s-=Sm7l-e32ivP90n#>-F2WWr
zpp2hmZQVRSFFZe=0|RQGL&!U2gcJt#ICwTMSXF0Ikry0ZQYnx^#zGwDxK8=Ltb;d(
zM#w4k5{~3nH8ku5SLlH#{QxUL<l<DH&LEp8XKrD^ND!7X^w({{Lq?fMN*Q1mIJ_XG
z0dS2l^!w*xqV~|sy<;Gd%^w4KoEChg?MK9JkSG9Z*Q=9vRnx5DD~q@Di`BKYcn=>w
z5(hc45RHs3@<SY6sdR<>k#*IwOC71xl`k%_a(TX&1BNS(cJ+FhpFe|;?@6#;gR`tM
zVA?Ckmko`L5nU_bbctU6iU0SBT~D;bb8KvF_xARH+f-y;1wqMvrF!*oeP(Kxs&cb4
zGs9V;cDxj9B(GUXKo$i?#~9C?D5u}(TA~Ca!$BAS*ltv=#R|L?xcUp4_%+g%caOAs
zd>nLFU0!0Qe*PRT`2?sh+gdMkb2N!(<fa-$6BbocPafU$00lI=+bt${!`|bty}j*s
z5$ac0Rh2uo2~gv1Bj#=c?`{b%u-)i%J4%_(&i#a0b>lpY4u?pJj{*lEyRfjZ7RpA|
zh{!WQgH^)te*@PYfnW*;9Wk0^2J=%>n5f7Q?ZDBWKYzx@)xe?g`bTT(pupHRcp5hu
zj@DADidEG#=jhC`{mI$cLaX0B2;JJhX4}If{XF%NJ(!K=>U&m!0Ih2Y?*Gj7*IB^9
zZxLYA!h%8cD(cG%kYEyqp`urYbS}5Izib8|xeeB<rna^Ms-N9$0V=mbD?L43Pe<qI
z`ZinXqe{4Ltr2^JK^Z9gP#aklpwh7ZN}Fn(&<`I#kpbSAdGY`;CMvMlPCaic9&yus
zfq9uzgMNq*7BOO8)`NM5V584IoMb<zWO;oA1O%?GuR)g6ZgjCzH3Alo8W#iN<>Lbg
ztW~1%5+A$*gwaEE6!ZrT^Ls->Z)D@_uPLj)KE)!3PCSJzj!Zx1v>Sa_V*$o0a45aK
zZ>~<akB^T5{(wpeKwj&C&^I_$v=Zzm8Bh`N49sLWSb+f4e+eUIW&}7nK?{(Jz<jcy
zZ+(3|IVLJB4^R}aJv7pWY^#gq7Xq;$G-@%sdx%n)&GD-qJBX}$Oe(~)zhIF+rcywG
zN5`9M=Z}#`NfRA~5i&W}&#et4a|Lr!;Zov!Fdd7}!lC=QY!8}u@Wp?P>8q;d0_g6I
zV{mbE%l$NDaSRSvf4`iybrI_T&^Lej;w`EG>vOfWWv7U0&|{ZUwz9Iiy1D|}2MmcN
zQAb~2in!f@N-j~QaBQ<!qevnDSu&`+Msqp9li1mb>@umfo&TJb1xyYgkWy5Y=8e9=
zsLZ!;sUub7Hq)ARVtl~9oj@QWa_Z{qnFlII)_HVTGffmg4;*bkOF_x9WNm?#j-i1;
zgruIrTd6p!p!R?Oq?jI6N*r6sT*^^P?$`r&p}H@iVp8IUqWFrQNk}vATo06|d<5q9
zrZXIOrg+kZksebVs1Sj~&nYQd1_q@C1qEegwOXup?H{z^C&61=oG<dUSUu-#L7lXR
zyGO@AOZ;ZP+S~ha-uApsZ&p_~c4DGoMyEjU?9#haLc+JT6)c|#SAr9;5G9{5$w&jD
z12zVbq5Q(ayUqd+pl&AMNNBSB1GfsYhv;CFGzYMR(_#XP1BBG)`-_J9`ls$fAlbCx
z9$hVChuX||Cqu=xP+)084e1JVl9Z-=+gn=-QPIHZ&l&+-7|bY)D6ZeUaR|`<kIO92
z1_mfcH*AF#=Ei?KiW1S&((=CewC*8YFuS($qExQ{G7UUve({!<tXSij6+uK-aWA*b
zD^m#x_#9l2r-~#DSUvV|a7?1F`yZNcl34}4lu`!p#FyqEK*gRsU|SmzNUO<Wn{4fT
z@)faNqQ(*t5kW~wIW6C9$q9)u#%mksIM(a68{tFWa}I<Qi9=Yhu;Nwv+t1la<PVe9
zu;_kT^5*12Ufi4-1!@#q+rWv`(9m!-0e-8YuC8^p7Ay}3Miw>x_vefj1EK0y?mR4y
zAoW9}6Hgynd>BbYff_KBbW0{18U&Pyz=32P0M3dfm;)>TOrS0S5-9luB%{9`RW&u)
zaHn!2y?KtJmW!>A+TSwB)s2zdj)RW!*%k6<C#G@&%)-LLLgadfC#HuG6BPu^MKk65
zDdL!qP(ic+s8wWb<+KgltrJbZ*nI*(5sXNsU>rC$z@^5j*J71^0(w!btgV$7SI~gH
z^XHEl1HtD0evd-5)yw=b2dYp{Eh4F+iSRWy`!gn46>vD*+|B`m1;=>v(0!b&wze(Y
zi**FHaz|n*8}*nhdUfUS0(jy%uxT4tFfuPM%5CFR`ET~{L9(=r3~EYBLR3039357Y
zbfp5T(PeuaJiPk}!&_p5gP&o9Bm)a6B^DI)b`5)`2?eLLWGLuOZQ#gfu1~yRVS(c$
zc3j3xSWRr7Zv7xM4FSUj7D<#eeQDJ9%EIAqrM_>uUYFL?Oqx_M#P@*>d3bo}Ea3BZ
zko<Y!%Egm#6HQH0I`EE-ogGF{`&hnWu5VUW)_R>V-LX*2^OW*AoM>LlAuU!CBF87O
zU^xuRNUPk<v|Bt+w<ae^qQzj#R$diX^dcfzBW}Q0*JH1(uLmSCb6^d$gxcHq`Va_k
z2SXP0ODj)JBUPlCmBM|+651vwK@LkvPX5wu;`eU?Q=EsM&hN4@up)HBay|z5T2zq-
zeFtTLkDs55P)V7nxa$r$L?9eDefEn?#qjOTcdT+X7Bpm}pmu-p?n>L-++6s851WaX
z_JI(fhcn~j!Rp=Ise`?}m{+;AH8l{VuL=1t;LFA->3?~M1)(>$n{efZfZCflz?g~B
zz2n+AWHDgIox+Quo<V+QeXMCspq-@8j@a0+<W2>!hDRebH8a!TycyTI45tyk?86ra
zw*fWATpJMPXx0H>bbTl1PQ}S1GLN8BLk|MUhuSDW#^i1nQ~6jF=)@nzAJ|)Ps-mJI
zE1z4d0-9fakiRTyzCuR788;BEU{`=Wl;O5;cu6zA7;bN~n2&RfxA(@?hz>MGrtENn
z<a%V?%gd|Tw-rB%td$#@83}}f$)cpI`o3am6G`bAcVm78GEY1k0+yCJ!0@yoTs*v@
zF6}}uZmgu8{fA}!+HnjKUEFYZN}m}Bg+NRM<`^8n_wRQdG)W;PP0djDH~^OJLcS|M
z9}Eh#N5ADxwNI4FN9eMW6kl6&lEL!OgODIe<sZ{FfNX?Gn)vuoJ6Qlr5U>-L#Lt<T
zd)wQpC9KsvtFc=xB{0Cqfz*7tj_B|2@8}2ws>EHWOc~CB9ofzh%*x6NbS*g71%j5U
za-(KaULX-zJxXaeYJ<_7+yWOsPYAFNu(lOi#lW6oNd#iuQ-}s^`%Jy}JsL%pkP=Y{
z{_ok@SwMnbF#v}!cNB<EFkL<ROep@dyENqF%Wzw8xwwGaR5v$MKYN!$93a&K@MXxN
zljRdE3q-46yTaic_Ha%CO45e$%&^&x@7cmIm7UJQhVMzDrp3+8!elsr*-1YELXbE^
zffP6NoG+R2+KWylM5W;^Ej1y)mq~0xEv>1!IiQS1o&MtJJgVytd?YXx?=b0)JiV~6
z08A~16;#`<&cYlFwE_)Au>-h{Lt%s1yCQD9&`#T0L7Etu+|j(er$Oz%W@fIg%#MF^
zQbcK03loHaUcY~Y1Q%CDZWQOQ2aWI>Ek6lmA|mb&`Cz%xf?mqA{Cs+>Qoswi-1FxE
z%@*e8qh=aCmn8BhmS51C%Ex^L!mxHWD!v7X{@%(t@HV+ZnYO&*-;mj}pIavr`Wh0e
z8~EbCH+gk#0EGvhHPGZ>5Bd0j+Z)~1$B0~Q^oZV7AhtpCMS5nY#<m0sWIDL(dVl!%
z{xIvNFAHezyOH~w5x=vlEN#{#6s!}i6UA?~Fch)xD@X2R_F%IBJ)SlMfHgf3tYPkG
zT}1_8+2uNK3e+40GRO|ECo~O##}OGBIb~A=Ealfjg<+Q)BR(92tjMRQ2*cGDI<ONI
z@_E3BN)8{ZV3r}{01g{f*F=vQIBLmD5d}`$ubuvWEcOpz#vuQ8BYS}i*G3;}YivRf
zz$XXRDm#~_rl&6f;{=sk6GmjF-96Ju_823ksLJ)r{(Cz+zst2`fXYQh+ra7tWc&Sl
zwhV>G`Kbh6JEBMsr;yC)?Q*Mw5NI%}JGIMJ0?UmGft230u|#9LyIWd_CYj;g+qX<<
zLnhC+tKN=8M@I631(}>&36j{~;?WJoJv6*K0uE#jFcN{q4GaP>@Kkx4AQpESF(N7T
za?6M`dB&IctR%0&kvux8oH2c!7iUoBCXkVGUfbNvmN)>M^oleH*I!IK0Qlp^r?nup
zkDTr4aa)qXgCS+EuO40`KGS>>3w({9e-Xgc6hH{j)wYQ25->&wjCfuOt`@wI$kKQ~
z(4)UP)>FmwIHb;OIydN*YO#Jm3exiJs8$rB0Vv9nJm8MZXe+9rG~wxwA1BXP20WaW
z7O<7w4_E%>JEx~EtW4*xg76mFp5gaD`=ZnPFD8+fj#nt7_c|8)^eho16jN`$jsfYl
z<18RdqsU4EoT+XCFR;n#n7-w}0Du@9w{M>U9@>%noD1)!)T#vqh(xVy8xM~c_?YR!
zUNs=M0f~*N>FUs8<R<Ju2*{|4^Bg=h(_g4BzZVsOgaD*!GRBMq4{}E>Yi9u#u_T5|
z=dL-DCwl|FHn7&w7Oqkd954vqEz6`3E_g^r53uxXsXfSu^CKEQ7s)1@5-H?QzFp@I
zNHvciSkUh7?v<4s2H8z#Pd7J&88>hXfh#7SUEb1Sz)Audidac_(uTg1b8>@bLq_17
zmZO*EsGtAobnHYu1}#LJoA1gHURNiZAaMbcx^)|x(EUuDdWTa9ts3KlkzmF!=*=lF
zS7J#7u*OQ#dH9Hll!u20gnW5<X#8aDs2<KXi34kDEY+2jE2o~juGhvq#X7?_eys`M
z(+>|FiY7dd{`AGhg3v2Qn;uFTy!9dJsz3p~)@DSN?M6!;3%0fE$Cq7QQjs_v1?235
ze~vGKmDX$kmh~!OH?zp)XzZ<7!taofe`&{;Pe4Z_(Sn0D^i$J*Q504v74VE}3*LL&
z)Mq9x>du!oWbqFX0%@1t#W4U^Rz(gFEKXh|;-hBJ-!fMZC>Br&ps|2KHq<n<DC5J{
zOJyQb5)xn^=*IW)!r|_Fb1w!_y3n#~2};nDx?f8x6O|#Z_Zs(C4wOJf%f!Ub$LHzk
z2__koK^h@<`?DSGtdY<F&8_~hnQ2&_LD~7a-~GAh&4I++uU(@LQi(j<q0RN{tJmU;
zP^8?Q5$Dd=qGP8@)?=a8A3w$y7Hm1mK;#M(QMpe1s3mar0OALT(bB{$HhJ*G_qjJO
zTwZ#dR3pa6-J1<2B4WOW5KqKJf(GlKWR#B|_ppo>sA1Jsh435L%Ag9rsVmH7Cj!k>
zH8nMTeSKiYFaygQ8Yr+rKuZXT#Xn~mBy~1?K<0m*-Ni*k78(J641y!sI4}rM_r^9)
zu5aN<YS`pKc@u^`qRTe#ZERT6hE`A~){nDwA4bQflcOMOp<9<N4f_rpCIQ|J_}dOg
zW4&G}C_I9o@v+{q%ZwYq{pGE1)<`~LTM_gaoo-ERBg(vlGG8&6aHj&^Cy}iLAX|hM
z=OEU+zN}0!e+-!D>MX=AJ`YIM)I<EKXKnZh#X(C<<#aJDJWR5~qRRMOgpa_N#hui*
z^>JsQ(qD}t7kBpykc_5eP=rZ>Rytc@5F<K)tQ2_HS119yC2X%*`nElKp9L=H4X)eG
zHEFM(_pBbuGmedp28^K$TAN;5k1MT(cSXPFB$HJ3K3D>c`T(cNoHVH^z2@2*0eV9I
zspAPMxua8yi}yqPi(fv0<wt#KX=@918qkfv0q;+I2BgN@Odyz+?SItNl$Mn(Hhb}t
zL_8%31AIA>zHQC=IT5Bg?zJO$2q4K^-XkqDb9l|swUPRKO^kn!UVOm|=}Z)<$oJ`@
zncCC2nVIm-gJ5no4tYGvq9P}LyZ3oKR3kvyr^$Z<;PX$W@!NqM!ClCJCDGH%i;X0L
zL3Set7$jYmq)j4uL=#=cbSMxB)ytRzU>}cqwbayr&O$2Op4nJ;v}=Cr+F5{U6+fQt
ziT;|HsIIDV0*mwE1G;lLh@rF5qa~jJVGP*NMw@`g(bw7C(xRdW>0I#I(WQ6FgV~7*
zbG|g-PwAMya?$(1;)L@lH7yM|GBhzv6j7gtEad6@(t<&!pF%o3fJQyi2Taw%+?*r;
zJ<v$z0K6?6)rY9aAW4Y+XMr-O$^+tO`eXv&*xVddw<qw@mutu(xvH5ueS5vD0Wf3r
zIN*kwy1Lc?8Qi=8Fp4g#r@`hIl6}wY-kxpR&<AN&ItC&vuo&|#K3>2B$r=GvaCzBy
zuq;yY3HYjDV_E5-=Mh<p$ji$EbOE#<1SPsXKUWVs#Y-n?E;N$|o{Nh5-Cnu0E@sIy
zjxq)}i`a_|yYIjsJeIPm&;rhj5KWAgEg<nRJwX=g?8&p!)0;axptm!Q0UVFw($Wu|
z%P7`7R;=pP1R}rU9m-W7I#!zkCdyg_9*f{k1!<6jK*mO(H?{!JLUEs&6|hDGP&Ph+
zDFwCP)s-X&4=gRGc(_Ufpm3t1i$E1@Qbck^uLA2Ec&j)d_Il^&ND$-Iya1|MXI0Zy
zogFW*PGl;ynp}53FYfo~^sy&zv%$P?7&8C1c|Nn#T2=2P;~~SzR>C|ZjpeMGSwCKu
zTLK%xZm!OlJ2kv3XUGEBdsbrAo@mg)^({D5H2Ob3tWK~^pRdWMs&G+6p}c1ks3(Y$
zX3hg<A4tf_Jfgj(O@4%oZT=Kr9knDXQIt{!<5R)_9lc;;&<VRkfM0ZcX}^UHfu{pn
zKwS`U`a5`WaRI1KkAp}S-a2}ZtiK|f#Fy)<tMLo19WT&P^(BFwgCloqe|sAkWK=!C
z@q8Wkqr4on>z#Ps9vqYptgXQJ00rT<TL9|`8P(1LWl^y@z3ZBe%hheasXny64ln)o
z4ZWlfSoYqh6Pkd*MRL{7&d<(z4OW(p*HkvMc68>SoSYb0pk_u-hlGgJ5ri=iJSdtF
zfWtrP@&m?Yx;Fx}oxsCUO3dr70ZHW(;M|nZ(EYTkMn^`{Ta%!}4)hmc-T6VL;<nro
z0#HseR~agw-38-!$3DU+TJ~rG^)RIEj1W-b8J5KU8_+A1))h>he)HqUkKW1~P{Y^O
zh=U9eQ%ZJZRpN|!gqKOfgtgBSLqgD`lz~eN5V;(R2MbB{MEoOSyDL9Tblq8iIS<f6
zOH0rp4g9=u9uWIX%&s+|*;63^Tfzz%PvDJS1*Hfpq+@Upv~X~x4FP7ofxrzB2MqK(
zc%rYfvlIUPUmz#saRg#UAnBlA2n1qu6bS;LuzzkE$k*dQwj3xysUXQE;%sCDNz?+p
z<(|0@JcJSpy#CbPkUg44bH8pa7}1ph(N>hbQolL9^&=B*U`!kLyK<6I(a>aOW*Qh6
z{Dqf-AOm=Vi01w^Zj##q1wSAR061nKApcm7tp~scvtD1ur2*YUaB6BQDsMqBbq~;k
z;0y)l>YAGT0s@}*cej8F%n^ZRhA?~;RaG9aLbHaBRG?c0Ku_1PH|Ph<a|T|YgTo_r
zH#@sBg?vJ(3ea$*`pDR@_j}HZXux^uIzJ$`6B#y_J3}{}DIn07O^k!S$he3FU3iP6
z4Qa5M?Npra&Fkvu5J**&mEk>$H(8DVQ7cH8o`wHLAVBNI16~?nG7DsoNI5t=|7dBU
zS;QXatP_X8ti2$^$qDbC_NB$=OqKtS7tYSkZZdNR;tUv(XIij4GqBIz4yU)56A5jc
zddf&k?=3V(2)NqYmlqXbzn)K&dkPvJOGwB}>mv4k{`|SM#nR&d;2V@oOFPt{mlN2g
zycEiX7o%YF1p%A+F;+<b+R@F8kX1IQu=a5Ia{0Sl_F@?ll4v~32cS-}`k+A@GoUl^
zW%i*^jMd9BMDoCzN&(0#(>{MLe2le|7CxoPrGX9E3lagmoL2!)r}?(Enh>+?^nos^
z!ovNX9UChv&ra3ZOziHIu}vy+a^({6Tjz2c(J-LS#iU$9LXnDeX+uB{=}~HUag&gP
z2p~~TD&xc$2jlPaqA{_#01Wf!6=JDJgh>_1&mpkue^ggfyMMGJj6nneSMmJ%maI5C
z`>?~vJRKl1co+Z&y`tL{JaGh)NFpqFI*54J!*}0CF4N?RWbHrXmraFH0Aq>bZqyHr
z{gYxk?56MjC59sNrCzqyPe<a3v@{c0QJj#rKvt_>#nac}9?~10GNcf|`KLyD0s^ZQ
z$V3nbGv{WBQS|&QA2oT>SPv@8VTT0fz^oq72@(E#BoX*<6cOO5rFI{CP(~@fekg*a
zI0ZkWfzzvtn#8(8pBNta0RsG(hmH_gW%{D^*7R2N9-Gtu<hoV!g5&-{)eGDNW?@z{
z?`j}^SO=_PLVW0w<??_goJI;Id|FuKyCOhvW_E)KZOIfr2={4GDKPq+KW!soKV3^E
z$NcxgNKs>-VqGfFahf(f>2M(Fe((I1yfJ(4?bt}wJW7bo<QIgg3fpW<Md9Rwat^bd
zKIqHjaL@av<*(8y9-fl~u@LRgSn_Hto@Rb}{j^x_+Z749X&l4av4^}__u;nxrwY>|
z3Yu#nfw!PSS7;?LJa9QO{U91m+GCq?w~S#%nlMHhet+<IU|zyYkd2D)EKUv{ZD{!X
zuI#?#VxTpI+w8vT-U2)|MBhA?N7`j&4AR#h7;5jRlv=mU)#-}3Weg$TpLxDS+*V9;
z9%qN*B6n0815-Rs^;9f<TaDc`5!%ZW9X&oTxOSx;e*Dfyr&I_(K=QxR<J2I^qK?kD
z8*^4$^Z%r*ueMmtA`iim2f9oijqOgHW?EstnP8i6rI;=24;E*<zV2SS+;R`k4zcnH
zstxPWaR8dh%f@Bo=U!+K0%;7H4Rx?WsuKsV@G85(v}i;MA63snDU14!iVyow_#wv~
z6IeH%a(|&;$@gP^LspbxFOp_o3d!~Y)_QeYLULqWt<A=-UTF1F;``Rr1)>Lw+-dvZ
zrZy7%^48cLu`&KZ0v+XOO7^h=FDC<`dT^o2cnr+xB|a=Kg&_L5a7@T2psOPap0gpp
zUL_8E?Fy0+cuCizhKdR<DqPcFf@-@~4n&1KRZ0^r4c+%wYKS&u$S8rk|FEgyiL;oj
z`o4R3j%qcYmov8RL{wl;k*Q?GliVv19X_YJ!XQN8rHY+T8MgnTaX%@Y_Jy4WtOny}
zD2#8WV??jIE3x%$5i0%N5C_lQ4rRi9#Y@BE7qMeRok>Ie(DHe$#IihBr|gC-co^@f
zKJmEfOw<Ll(5=4M+VB<oYpDS0=xMDl*$Yj<NMHXfFB=p<kByCy{r}1R!7sh2n5I}r
z5Ehk;JY$uI-=g*N;qL+(_hO0~k1=LB;Dvi*C!NLkU)Bi?;y)eARFwW&ZuH%8DE0`T
zv48He;{OTH*#@qQ*ZqHb0n|-bX)UXD`mzc3V|cf&U3||}R4~uIj_?>(o3jd9@W`E|
zg9bLgO4EBeV;C3zMpo|#mTAoM{KjH-qa}>1;fI#H0K#3C@1_MwF>R)P;E4f{9d){u
zkkq~S6<?weF=3^A9I*81K{gh7YmTeUU~D_-pEh&D=Vi*h7mvJlj9m`3mW!OmQR2!-
z99?=WuJ^>%dle~=4VdlJ6gAc~N;Hc=gRAoS<aZ7FiKpqfZZ;F~XzDr}kLYq2=Bto<
zGo`&exIH9-ajx_B4wsO|G|}F`)or&7)w__@#k)ldsIT9N`*t<C(iK&eThTq{BpVM3
z)mLzjZm1kCWb`Q)Eqj@|+BHGCF1zbN+>}(vtn-jHN>jn*fxs!XMDirIGv{f@J>Byq
zY_X--fau-(WX=yQ$IYvu$j;|23YfqgV@Y74>cK`<%u1Kj78dWMD;qMMIZGcq%FT)y
z>c+>@3-;*OG|=ZU^25mLTz?Gn4=>6H7k?2=RX)D_hL}4uQui^Hey^epliJ#|M@Hoc
z?W}2>34<5*hn<FMw+QGUA<p&SM!fVcEZP0DC*x$`GCXtLkBzR?p2uiW?1i_8DGnxm
z!rnd{!hxL1B$0!KOe{pDdJo1)|JbKxP4BF$lUB-)uF_)O6yYB!5Bq)o^thRk-4_}E
zrTgawe+iD>W!_7s=hve}kf|6_odn9yQP9S&v)7g?)5=P<tgY!$?yt<KiWNzHamn-`
zl?z{Lc|%;ph~VZ*V&oXhqxIwgX5fVi#R2jVoc0rwk1}-uIm%97D96#1b;t3^pWGjp
z9vE`nQ}G6EJqBkVHGPS<kYmkLRTwcbWVVu@{YiHG>{(g3=AP*l9&p$s{$br?Ze${!
z%OYwO9=%#5*j1L#jwuw!`SBRnJq~5tBV>@QM2!gYM#`ezxncP+If@|yeOEK<hV3O8
zyf<p<6R!6ZaTFU6zP;kuc4&+B;?7hpc$HwH_!rmkPX!{FKCQn@p%(+$CF?Sg-r#1F
zDrkc{sy2>vuTI88cmeg-Bc%zA?j^5I{nHiU!|h0<HqhHBlzHGjm#ws`?0b@$Tq3dy
zdn<x42gYM%UELE|6(1iq+xF|PJn9)<KcB5$%Irfj?*Mg+?e!FD*>-%fy)9uMjEo_Z
z&|TEhI6aM~tpS49p4B@Sb}u81K{C?><Xn#0X+goml9Kls3V=;To&9`WU)=8`oRF(W
z(p6dcuM+{Jhb=982L_xN>|P1zO;eYO-p1#1C^(S6zr2x)`E$GT+7Fbd0e%*5eM>FW
zbYCfo-ftv#@c>Ee=0<2+Ta-<ALNL))^vf~$lMP>OQ&S-#;?0}o+&|c@VvAGQv{UEI
zD4fWV)Rl%YzS`owfB&8h4@VEI;e>1%*ji_~ikRAe3CEE-?N};6fTz~LH>+oLajj8b
z_Y@6Vr^sIF^<47Ei+#laZg=YG<8{qb6Y5urE@}t%JKlAEmMZ(}XU~e_7@qS6bF$pP
zhTNOQ8H2x$rM@y7GUDXp9p5}vEOt7YaX!i|$y3wT(_>D44SJ|o9hy%ps1Ch_);D4}
zvN%{d%D^*=O5Zg~KAdgWl4Q1$Qg^9o5CJa{EJR@YmyvakzjG_S5-8WcFY){zb>muA
zNw9g89`a^!6qiUb=Pldk_L&LwYerub8+s~QL?V@+o?5abo?ZI@PLqo8brHV&=!%)C
z$C8+wtcmocl-;sg#A|jtH|^nw;D*q#_l+uV19&QF&ely|{~#6NEioR#pbpw2nDao?
zu;wVjLv)UI(rxO|gc2ddR;MqqdYao}RMtqR7|PW>u1<v>%q32y3L7%xj^tuhJDjpx
z5d^dybkk2akvd%uE`r>sl66(OUQs9gXH+-fLa=Zv;x;CI`&RN0lZx+%?FojS{PZ#y
zZ`vDDtf#Gu=O-4XQ7D<~phJ?UX!k$1`{agt#Dz#Txg2QFD}8-BRfaDP)W@D}htX78
zgvjk)c>~GbxCJjo){{Rb6(2qJ74nZ^Bc*CA#_H5_&ik88PE>#(nQ506917(P1%sFi
z7(BF?DR_PtgIiMF_gT&R(N%@;MZd*Vr{UdM!SY6%T-isc*iXXIuvxIeA7X#YX6*jR
z%-F4^!par)*>VuiB&y=@c?DBW&QDww)V!*;O4dd%uNUwC7gb*YRaMuud*~Dn-6`EE
z9TL(FN{f_qcS%W03rb505`ut$q_m`zAOcFK2q>t8|K$7rd&j-+cn8A)4EEW3uQk`4
z^NDF6_Gt9`M#IK;3uFAg^N?e~{Z?Ub`_1<0<-nCKWtMNc#$r5gd%vChnW|H4$grt@
z=9mLS4K?{i8&_Y8vvV}ZARZvXDM5RhB|Se+RtC}d_AMS$8}hrKN<&6QBekRHU~AkR
zn}dU^FJF9mqmj(<ETx~)%N;{cq3qXSA)Y!B$>_KAYuJCnXIEy4g%@qiLX8u9Zr*8^
zJzy<p@kvzcBtSa!^O5KH&9t}&l)zRK+Vytlv}5N^fIjN^zAb_%We}?Jk=a)ls`Jfs
zPwqTM$`<lF$`zK|T6_ZZsN9ii-0P#mjEEK$p_En-)kly$_-$2f*V{@2R#oNr&}e2*
zA{1Vv4jWst>}!1=S32H(#LrSj#H4hr^3&vr67$F9?>bIjf^zX)7lDO`U|c{<Q{?7t
zuxNy)FteoFWQTv1CQlhy^L#t~eA6%~u6a!1g$Hxm*Y-p0m~qei%=Mq~M1sWes!neJ
zXc0+w$*~uw%}W2m-b2Dn9YchTo}sl@lR;$z+I4-t^1?bsI|8im_V#wo&Za!ezvk-V
z58)@Do&a*N?39bn!~%+^HBYKU#tf&vHBX5Jzj(X3GGmEAT&BM+78O)+K$8iTrHbaZ
zU>_mM2TAm>PY)#?W>oqxQ^rp(5>~TpHo+}sT7v#H9IAgJ87AE1iA=>wx1O<i3Wc%|
zrv<-R-t<t)<dGIKR$)tbS%j(;NR*EDt+wnMn>(^ie=5@tPyVEve{!6v_-;7OLJ~O$
zYydf9W8=xKRwDuq3FnRV^-?m4KPc(<Q66v4qH;I+!W(-Ym+HC*L?6NN`DNWI$5it=
zR=GRw{XzrGX>eSJWy%gVnRAdStGyvqQDTCQE>=`Tdpj*t3v%Zc{;=l_1n_7=fmp^U
zm%viKt)*n3%m;10UQasQ$1J~CYz0P?OY|0o;g=lO4X~MX%%H_K{Jx-^dU!dtCpn0l
zEUj-1r`2e@v^mRe*qs+m|Ch;)k%sr*1j=UCj(*Msd%a<2*OAbk{-jh+`1pOEF!UY2
ztv~<s?WNey>TY5M%9sa~159;{L#xhMoR^tX0n%<cn<!2mgj_9EoNeEx)^X#?-TF$1
zM0^&Tq4+LK^ZUDJH!UK*74rb#Ma^VboA(O@rd_I#kmANFFva#p#69=1;vfY?!-8<4
zgVc>O#%>|+M2VRwUMa{7tu&pP6jv?#Q1kbF_;Yr9K__17k6jld<YC8-H0Yz%`$rUW
zlsjVgXidl6xf$B5P5)!{s0F`f$xK~A6`T_=qY{d36-qcJCZN;0cBE=Q_;Uw`DNlI(
z`Tn|yMa3SpLf@4;Ce_nxm&RE=ud<UQL*U+ebBXdOsY*5lOUO8O{nkDGR(3S5U&dw@
zpCld%ohJB%X`)qf$KNTjth<-KQK7>h?cnU3ae?aCDT?T`u9s(IJlhCO$U`dRc#ojW
z=YD%OLogKZFny|1M-4tsnyS)N`DtbMY93b9ZK^j$cZgL3#ZU&!ah7f{T3}0TW${ue
zp4-=Ct$l`1YhC_xb~6tek2`IAfp?~9&u^x|O=T^XXi6>!NMc&$?4Tm>djUf}4w-rw
zO(iihENsm7CT}11x}E#aEKR3>R5s17{nquG?O*NOSxQLCb?^~kU2<+Y>H@~7)W{4$
zskFGgH0*sdZceXZZSDJPWrltY54O`(OW!W1sz_DhZRVYE4#xq+h87XNzJ0)_5Q!8e
z_ZBe2K%kbtz(VUu9rJE45Lu%pT05e&xwXYZ{X}@+e19E8hfN9k>C^0Yl)6?J68qPm
z%XM%yH#Hemm7W`J4F}49h?yCJ59rrGt1_k5I$;em$Di+0r(`<vEZZ&XevWEQOHzf%
zq{XR}gwpzKv_NahYum2D@;`hgp4pxqGxkAI=JJ@P9o|rB`mMa-7H-F+zzt0KbgGBN
zX7xO;YHG9!0)*2-7KV2Y@7X#wy9y8Zbfi3`mVBBLAuoTtW-DX?o!rw?zreC=djc^n
zVI=m@#?4-B`%(MRgu%7RbM0_Ja%^<`UaO=sJvH{kr};eDDY7Y-2u9`FiF_`bQ4uYs
zXEXh_USV1J`1yT-9jqAkugbrbA`nR@0X+7~B@SndzQdlpR4qb+<4*?Hn#b088EuQY
zaWSKyM!F0~MGQP!Ieg2bVQF8%Z;<Rv^^AsSIxY|SUgX9=%l`UL-&DM^mBVa1dxej@
zTPUqJceRwtumKxsjU6q`Ai>EO%=Ok}S^en1fXlC-%7}y`2O#ziS=EAO&FRd`yiNkR
z_Ez!F4~!#1;t#!~W**ji8Q7X+nMY1nU?HYOfT?F>WS3M{8BfY&@o7VRtcaS;CY0fC
zdg_{~9%*aDf;;h9!6Dkc8dFwxsV*V?0iK2-bgwB9EdxwW!p92dHQl>mR0Z*_s$Jbi
zHCIozxc~hP*6pSh4cdDu=R|E76;lv+!*}U5kA{&^#T$k+-HgzwbBl!StHV)^?*>#1
zDSHYY71JNLZm7ld(=$$G>R_`G$2gi+RkfIhx|<o0lyZ_2DOWtY*R@fFccj2C8h^*e
ztFY9O%GYZ`^tO_{nDm)PznmokH&|O@Z$x6H-a?kNeZuL?&~!pHn>)`?>^x+-Gy7?#
zm(Z>ZI+6}^B9yw-mhU1CnX=dH>YTIb^~;IlqXn1H&Rb2VMgNF64yA9OQ)tL9=ZR2n
zV%@d$JuLWpvt7GpA}qY1U1j<cOGe*t;naje<)m+KgNxSQ3QxAgU+&?Z!6w(m_~>NS
zFK!Rn{0lC3Z#*v`(Qghrcr9o73Y|&F>7pF3i;Py1wk^JJX#V|uK31<Rjhvj*XFMjo
znw#sTEF+Kn4<FvHe5m7n30j-#02Rtfk*(ZZo&0_gEPD^v0#!-{kCcj(sJ<}9UNtMZ
zfhNbCm<PlP(gg&TC-fO*S2J}x%oeYN92Bw(*G)3LzxhZSHo3pb>%c+2&VHkqvSV<s
z_h-L-!w-|JnmtUWz6~$No}VVAbK)lww|X5VdKZYL4z?vIv@5dWdsMr!Ft=~@mQrkv
z2KP3bFsCs_i|3Co|7JL|Ics6Svx|AW*Y;qsd2^DMiX%sumz+r4IpC-+>gtell5t-5
ztp9D;bB?NQt%%tbSNb=twKRzLJ6>E=^BiTHS~pM6J2KB&AF8rQHY?^RVr;)7xJ+Ho
z<Ta0J8bcpG!DbYvGi7Uzy_BT=<=nO{T<`L(mZsd!v1-@DX@~vorTB+~!Oz)hdy<k=
z(Y<LqJXrY|!dn+0G1T_{myg-ehRn*m{%>C;-o(~Zmnk}Ka80P$&C~NJY=4(!U7^7I
ze4`NURlS|z5gB;|N)bX?&(L_is^<1|ru%+geehWFWUB#s1nTPyD>DRR__}Up>jnub
z0v)~Rs{-3L6>2I1A=GN+)5)!+Qe3Uh#>MvWn{Xu6{+;?a+oQkukLjq~<#gw;#pQG6
zP_X3*P?c3WUaW}*zYAKimJ*8ZOgO0bZQsh5Mj)6}gB<g>U7{}-4$FSjVxTgjS`WU}
z4&}f@S9(2~*DgJYLi_dCA17z0Rx$QK(*cz9C)x%Vn*I!z$kyba*pvRpVH9|i{!JK&
z9b{e-Yio&14+cw@GCvvPq@dMV7N1q!0mIh#DIG4Vhc3bidcy<B!_wntDD(;h?eheV
z=<DY-(OS8be@mzi&zMTzdab+glvU&o)yuBPS4z<{qlQQI?o?5VynNl<{BG)@ypu-I
zvz4IjMjz(S!J_C)RR-R{9o;CdLu33^uPVRs`TN_MJBdoJG)>?(=tPdAJpX%Fi4?&R
zEyDCwhF7jsICyJ);m^G-oh9#I_o9XLd-t<#=>z`K!}*9f?PT`V(0rX@F(T``ZS440
zw&T=bT{4kkSZ~hkZ^YEdSr)y}UyiunMvwNE^{+m$wrYqX!+&<X+kSWPGyO~G^wZ~e
zmA@zBcBAU{Z1~(Gq<W8K^x&}k%p&L}XU6bzm3=Av`jbGmp^&qwlp3!sPsWUy%awlL
z@7=;fsee3+?)NKnNt&GAd1POMuiV8@v-ev^PD)~p&NW8LNC_RiN4VfIqR5Qh|9QrS
zRkkv7&v=VSD9y}Sj3t>dFbP7b8SK;g?-4RZ|McYG<6AydK#k}I4vK+=g^b_6@y=<x
z?Ql9k=y?3?0#AYi9}x-j<|1FCpd%HIPfm_CG<t58p(;G&K!1sjjXg!>u&{6FjuJi=
z5ue=RA$hX~lL_%yyPOeSleqb{E=enOg_1bVE`E-6AF+7fSV+!1*`eynP4w_wg{Dt;
zlrkcEm>kkxKE1M(zUhFTkgv%)>6V)MIMl40fqF<!IU;Hi^_1o1FbV=&;YCz18GXI#
zJ2lJ?s0bol+G6>&#>Mg*D4%b9sX4otJvOvlC1#7w7fj&bGXGMu9~_3p$bpWaI4)vB
z78$X>^Gz>_>!If(L3Afm-gx$`BA$!z4N8wd+#-e~B7AYrmyjBLncxGl{h}vkun+ND
z!UsA_y$rY!Om3UojW5_3ZT*EK$Yb+8i{Ery@*}<&8V0r~pmR}U6FljcM-kUW=`SnO
z)stmxB33ZYn(cQUZgw4SE#|1~#5{gcRn=j^uIgS{nr$+*zPvhFWz)yp&u6*%RUSJ%
z0-tA^O5@d9d85wC^Q+=u_aQwSuX!qV%)M5zD(TTOZ1tx;d<yS05Ga%m1P>k6IPs}%
zg|qf;#{;XxQt=TiCwAL)Owm?J3z{N(EKa{<TJqtOiQm@!yA+ErP5%`^{@d#$WMkv&
zLaf@SX4e5_jY#wwf}Nwv;+bL5glK)GMpqh)m8V6w-ze^_>%W-q5kapgN2c%J&_&`C
zNZHJ+8-L$y<)T1skym2mAii~r6`RC()7nxwb^IMJdfY<GO>Ls0_hS}~YhKU%_&!XH
z=!9(;s%6QcgkRuJ33BcHVf7xa38eU=v#B2TfgUe2RVeSGdWu2o@Ay#rFj0=_1YcD4
zLi1QZeLnhVuPvL(ke*U~lyf6{8i~1pl)a5XyG;jXI57qPoVZFA$NAcmDgGfsuBVJL
z+C0P0AG$w1Yds*23k{B9Lkm`udOYLK`><d=gxi%$zn{^U>rS6-t!;z_3yUwoA>X}o
z5ta>$%Gqt9lOcc3gvhvtcWwNvi~-(x%_dm)*oYpaCY~E6N1rYl46BAu;Nc)B#A#>p
z?>A0c^wcDMZzW<<AtU3f6?Rb9Wm0;c7)<J*otiouyJ1CcLi-(w;N#+w$z#+dHSea+
zZ>BQdMk*833eS^O0GpLVzTuJ78yC{|JOsilW90<spz!OuJaDgT+pW?oxr5UEs;*NK
z(6Zp~t?VR)yQ2?)BJnJ8=#_>;%A)H5<~EMf3{^D&mmbYH$J=9(?;et%F?U@&3dr+L
zMl^@}GWmu+8wzdinRI7D8Tu3*$?TB!>_%(V502uY;n?$lzV~N~ndrT1iPb&ySDv{I
zY}GI4y;VF>%CrTW`m9mK?x^NQ5Yepp@mHRsJ(Yi<_>$uSGrmFOR4tZVI)P&{y1#y`
zk$2fsFSP<Ug}L|^CYC}Idjxg{?*2_YUiO{F0unU?DdVksEAusyYJ~hW`6JpKO{i>F
zHnCC6dBQ^+zP_KU84>dD2jY~GN(G7>9H?x9ly2D5DQF+C8Ki<=KfuAJ?%Vu*JNMWw
zI(f`y!zkzr+k2s<a^<JvpM7r4HXj7=%+bc4nKlhja>b6kmHxtrXvZC;t)`HWawF{}
zA8;uq(pz0+%4lyga2CQo`0~Rmf|VdFq<%x+%7f|X<#~>E0?Hevrx!&~;BR;hhjwkV
z3_{^6q=}AzmO^4bi?^%$V3ZOwlXvTqo7#J>=pt+;JUIuWF6`<0Ko{HO18--$*39zO
zv_p2yX-yVlua4B8qtB63cGy%Cw;Q@xDF_yuNsf#s!$0{uxY`p^htPa&a4H!cws6yF
z+ql!cjH7@(U?M_Jk~hMElHQZA&lQDWA&y^XGSK%wGYK3pW~NV}-d|e!yvf1pm|}oh
zG}*&ndCO3}eK~B(Xo$1yq-bfw^0_yC%Ail;qt|9;RHX#W=wAgGa!s8!B`T*km@)yy
zx1D1j`}`rC+4rY5?jJ7yR{`pxl|S@*1&HHYcvatNTi<G5DUacmZkwdv-?V(X<;*-F
zU~Bd2wyfhl7ZmnvlUYl0A-%VHjKtx$&{kZH=+MVs3XE7J;UN`NJvZ_+j<Tvx%N=*y
zYOAU~?P;Zd8v0GyR-0SSIhaUGI*is{yVq9xZpm%9ll@V8J@$G<Cd(Hg3Iq#=5jK;Z
zG4ZfQny^-eeicW;1Il<M1=Et7#VmFwe?oG~)d-O4mF=n1zm&ebmR&9hCD+YFG2SAS
zMDO-skm~IF;H$4AmhU3);B5@?JCfge@2KlhP@Z)=md%wi8|YRHPGhM}Mkt=Pha}o6
zeHW@X&I0!wDf|l*Y}8dtDMluFFZ+tX$4CYBOmJTkr%gYB<Xxi8Ahta^$tsof(@bIM
z4^`?gwYwWWl3LT5whd7JVnd$`>W%%im<Os%ItE2aVT|nEwixZp1WSz<YB7^(C3i3z
zQuV@%rqdKW&ZjLtCrpMNt=M9s6d|AKJ(&ALsVAI6b2e5e6zUoLaMF~v!KHUxsU7*^
z!x;Pe;%z)Rr+b`4?AfMKOBWeSA98e0xAXh|iBrly)flasGD020Nq_BOp=v@?u0ruS
zBVkkq=lGaUB@cz<N0cW)MT6WN!Gd09|0klPdQh^BjEufr2|FL4alhCU>k1rZ%^o_3
z?nP(diT!}Fva+(8TD+zUC1x4$Y{9KUy><m;@5VfLEi8D~MiAk(djRx983(qdWMpK|
zh<l?+j2T$!T=e2<)ww^2_hLtp-~}d6^~CXnQcBop<C69i7ICA^FpU{X<)4_6FiIY1
zL}5bh*iih$1H}`QvCsM4OLd1Ym>DO_+K~$KGtm};$_PqYDGO5qpZna0lx8HUjoz|6
zZozgs-xGYP=d4u}Kl`S(sT2^a?;-9-BWM(?J}a?yW3+J2Y<<?o&3)gv)O{dGbgM<X
zZIc-F7Rh9)lB<K3M`TO;4LsV?550-`F8+}&DIae|xx`ONYvt)ZSg*2neAB?B-Wo2x
z_JmQ^fyt9bOevEi3LP!~$d%~GQrL(?q@{%h-N#He8kyuh67#wTRnmx%!nv*YhJt)v
zcZF-?WY2C$`}=-5+^WMSE?H$wj@P|xxB8WamZh>DBZ)}-`pF)@e@d@kLeEq1A$f;m
zW1D%*kHtH1mq;&%ru5rH)&@&Fp=5$`BUPU!_2e*C&2P_K$;#-0Z;GF~)pA1aB`6Le
z7?uncpN3%ABrPC?T2>e>W;zwZIlCLEwl3Rah`OmX4ip^`>84NCyM#GY7#)U08rRCF
z3gjRE3|e3WgEtIxn>^c9laCKIPB9+l36hJp-dADAVCuVz)MnYR6ly^C;`A8Dc_^6i
zmc|@KKAN$cFM<yvg$AE7ccNt_yvyw)X*4<-Utd>le2lFoJ<WYv*Oc^sja;5Fl4wuk
zX2V<((g!$P1>^R4*cj~-QJ<JKk4bEyW`v-sd1@W}DfO!y*;Wn1ey(ZRtd^0*veI>d
zBUvW+oVYEX8xjz<%EY#NY`>=csqEC`I89V%*2D6hV=pxMvfH02&L`n5_r6}DX8Jc@
zwS9}y+yjBM!by8gxkZajao$fc$3Ik?<-T)dQAuvBDL;BAO3^tonpdcAjOz<e26cIY
zR(2(uBP!cJ&5pUJt!<+d!{!Z0oZ%yWVK#QZZMC6yL#>U1`^-Nq%6n#TPqcm17%3CY
z-q+z_5YE}LQ^*tH=X#Hyp3n=GnUJrAnF%QV3>;<_3noGnd;E0$r?t>{3tdy(eVvix
zG-dKwxn~IeADho~aW1?orpilL8G8aoSCc%WZtHFd#@K%{s|jO#)%{`B8CPrC(C89u
zPR1qos=*~;CFEobb`oN8YO2BEV=+i7sSx9G|1KCVw^e{{cE8c#BUm#~*?^Rry&zx}
ziy;IVuO$2L;sl6niTs_vlcm1Dtf>K0HbUti;iCcXaoE3vo-O^lse-!GzIaCfLy3o_
z*@rKt<3kN8V>qXW@4E(-K{~;za_Pa$M-`j4s3m?|B+=--;%MmG-%Fd%c(G9#=XSmc
z>5e#Ldl6)BNHpbeat(G&U^(^j<9#76qbVUp@DN1hS;xh;RvOclj-VwZhL<dktUp|$
zVqaWS3cEY-?ef-Bq_}&k$?mGu+Rvx<H!wU9SY!=kZCdYdIbm}tyt-}aIEM&Vh^@?y
zf`q62wZW^OKR(~4t9{&I)Ut*e7%s__$S03(*8Rwo>?wz@<=xFyvl8{ufw;#~IU=a?
zo`v;1$|6oTzjgO@KUt27x72Zt({JNR79_yb(Z8T0#ji%?9wRcO*LKh`u;y}}knc!i
z_8_Izjq_<zS03sktJy7}T(Ci6Olx`OpT59y#%07tmkM}byR|Q<YJgYyY4~f%=mGXr
zBHiUetB;>eBMJd!V_YIGsytFNxh;Gm(#Ifa8^1Tp1?w4Y_Bq)y35nLjZ)hvp!%K?~
z8|XKLd)pl?dF#?`IRnH;*VAp7v}9aSnD8VHQ)xePb}wP9S<Svy^u@rAB;G!1O8=d}
z9L*czp3-(xJCZHN#Y?-@<lD|KFVCXgxs^gjo0w|N()vWFx_9L|+UKYd%snM;75|3|
zkQ%XQ?HikCnf`0Okok0sWus;y(iYWBer9=V13OM$KN(?Sd(?%4@nVeS-&^P{yJ8CQ
zDR=eKine}d7}5+FTIr`n70~h?%Mk^$xPJ!;wkW9!$IsQ60B{1E;x3p1P_Fr02n^JK
z_J$U2X9A`)<L+T6xyp&;GUxkm+G_dDTOdgQo>DbXmglNXz#{_0pziHQcf!K14#Upg
zExNrs*r_PI`q4{BW#iuQYYqs&AuFM09Y3ew47d^W^P@^`jz5WlzW=bQ<lVi{Oty2b
z3QbIB)DOA5pOevtZCz~M606#4*kmV}gorX?b44b6W$Z>Y5AJNn5x@O-drE7A^tN&;
z#nZSVq%IK;fh1b4{{_n|Ev8C7lMYeTI*k-TLOvlKx(hrh5eFji?M5b((kOL#L<rRj
z_vV+2YaF6aO}8ixlNAqmg2F|IL*_nDyw^k0K5HsT@n_<|j>b<KvuBH2{B8fH*(1Z-
zriH;q+<xuY@7IA$)k<HKf7W*rRQZRL53LS^9~2Q6ahP;RFzPZ-ZL_iT$3!ywDh&~l
zzV*BpiLRwt>oYHr&89QHgnlRhj7)`sNc|%+4Z+*>Qnw4ftcEyNlq_v~ZpWiX4Nxx7
zzK~k;d8^G6LBRYh96f3o`?!<sYdCz*=GosdMF+AyH0;tssm*<Qsl+;A9+Y>!4crmt
zK6!4)?#-e_hXxd52f{3jx$oec<g!q_7X<HO@`d8b@iC$Z$TZ}MH|Qj1AeFTZ>w2`<
z><UsT(43rXj`djS=2XCOf7j3u+FgA8P`m>*6-c$99P@uvxpKJDV5b2zRz1D6koS~}
zZN9Hu7ZqdhkIpZF!3Q49oia#lP{6Q8@d^#xa+#OMrjqn=;9~7WS}!!%$(=7uz*=a&
z9fi5m)){(!3TS=Bw26_Ck-mQV`hz7fTprJtB2xzAv*kTm%-1%9$*zmdp5tJ$6iYyt
z!y`e8JIqEwN(!cetQ~_HuU&(;#`727y0b^)gMwfvm8p^d80^+?KmGmv!vp1!f^+lp
zEjEJ$>W&}>n;AzSw%S`;1L5aM^;K0>u%OSje}4S(rQLCwZ9EWEC7|kzIllrw*7M7&
z-Cd_Yttj0hV`I5uQnbmDk&z($(3%F$eTIy^$=%~U;N?-p2)fJ*Am%{}03RGh{_obk
zyLZ{vaG;S6&L--|ahg>#H0u`A;53C|^|{ee&E`;-qtUuQ0V*~ilY&SbOdTN!0|P9*
zgjN5pbMQR=a_@eqC~^E~flM%LCQu%LTQdNdkH9)8fsA_~h2cIhJZ*<kfv(D`kp(ib
ztcANdw`20Fwr5xgPz+JEnWvuV+8xOu5|H&C!){L*cX{&r&HWj}k+0cGMf%lVEfD3$
z;9}?gE|jUdqm&pCN*;$mLA$?LBiY-igeE{xBxph_fRw*2j^8?<G+6fY#XWC{cWf^6
zHb`}crzDs;10CGf<ZdZbshpZuWIGhDDDQ}EU5%+SCJ;(7>)qHz#NSk%gMX2ATm4?9
zpRLQVz$reRu}D(%j@)Lu@q9ja;FeW7Y}*D^e?RuA6-;%FGR<ByyQ8MZMRAzDzaE(O
z#4q!5(cLb>4vT7UHCrqpVUs^FEDED~0V<N<?WFg0?Q^mCJjsw>Jz=swzB&lGa+X(}
zpRXeEL<ny__&CYaK*=#A<GNdjZl#eq;2I|=Qbux$Qr(1B2LRI}a812dwn0B?65OQ&
zkJnA!*ylgG`U6XF0<^<bMz6j@c!O7uKq_1th{^r?GXbt>S3pL_q7SgznJUZFS8{Q1
z5Z}l&(3adOh40%Ampfjaul%!oZPkNDC@tVTSN$DiQ$fGf*z<pa-bMSlkg_ar_OFlS
z+C4mS9&m;wEC~C%v-0oitT*O{O;YIbFuVVf@24-VV8oKI!3j=woqslSuQ}0UqMu5+
zUi%#`hc3Fl)dB&?{(OD2>p!?$*lIj`0etebn<@ohj{!2|z|hbRo`W7p40bCkV0mBX
z74DDD7fV!9$Q7zhn|33x=#AS0JXwgJ1M#})lV$LFnhIEE!!Dz+4ET0E@8#d+8Q6t_
z>-Lw0HxJn>GZFGxt&odAsDGN7nddn`wQ_a(79OC(cwlhw`L%-qNP_zlDOS8jcp)jV
z&JIH=w1db82;}DN0rk^6?H9)*TVuKYeMBmVZcum>h8z!X6+Y?&HC!Yf-!tH%<FMyV
zZk&XI4DXtC{`*_3>tcay=-($|%i{2QWLw=IBPSpWGSHFvwZ`!8_ZRPpr=j!U?s3?0
z22rU1{vO=o3gRn8*KcM0KP~VC!N#r-=>`1te|z`;nZk>>nIa8_5qouAVG}ZEElY0d
zj*gByRFM9<es28xH%M6zM7@V?d$nfn9ll*_Zfp-s=2|@0;M6<rh8KGIb2|My7ZUq=
zIY6xG`ZZ@pWdr}1n3w=AZgeUHhKq{}mW^n`DPTcHMxr9HV|%alJ8)%4z#cB1!cO+`
z_O^)^f9RKEq@h@UOUb`a%}$nXPRDDQ=L(AvVeL0X^<pK^zi-&@wt!=Pa*_9ikS5bk
z$}~kwcbpc_)r#zpz2`i${aOy=ES2`p)Gvxj8F1hTcs@+xS*WRu9z8-9?tLv__Uj)O
zUTlh<CF1K4!?&U<<@%uZ*atm}<rY;Z(}|PSheue*Te1A_ef;ssrg|mFi8L|6_ck}u
zFpfS+$u4<}X~R7?-wmS??PcNmJVDNO(YBGi)HBU3i(3b1i?29?*6YUce^|*E__djt
z(&5ILuoPvyeoRe9z4M@yozihu?VZ|t@;k1&QWBfk-ER5&aU@Y0y?y`Ai{ue~uEfns
z)TbSac-Uh&f5W7ja0%Dvc`QWU>>lxSmdi)qMLXNZrG0i%)Q30}li1?ETitT}fg2AO
z_j}7)azjG{$gM8FV#&=nI!;eKx;lYE6u$Gk{JACrq`-f-OTZ>z@pN-6SB8E9HW7Gn
zfE%9_q%6V{(kGv~u6_-r-LzY^68POmq9Y<Ec5UHOQ4r*GhNKq?PkgX1$^s_|$er41
z7n@<a!{5Jqb%&puoabLZ0FgHM#a^c@aCXAt`2uhH2F<PZ->Zor$QTAKf&IfInbUcj
zgkp2gelk{8){$&}xve~JzSi~hf9GBQ;DFrI5?ivR901S4YZ)b2D1qS;#r=@_nwo=x
z;P3H+YuEnKn$r+=q=05lANKn}Y_I1TXy3!m56+f@-N7KzCPl_?g!k>2puL*6U%W<O
zo&L%{42eY^kGY2#2+?sROifK4!(#t@cm`YLB+HK1yyZs>jT`KC!LpxeD)fIACc^HT
zntp~~>t(nSFfmh0ROMWPrwRXG*BD@CIn_{|$k)aJayht4x#P0uAYJY)*S57}sV{D4
zaDIVmy$vEoX(%}YL?l}&k1p0AE?j}_<JRV;?8&I`%I^WOtE;mvoo50sChEE#P85)F
z-!=E0HFAewCOAlohRCvm4aW(7l44?StE=}nbKPYjnJgW6B6zxyegEnx<xx9a4kSXO
z+{5aZVgJBZg(2ie4?J^FHSN{D`<^5`-U8e2IslC{Wbxl$CXC<5$n)+M=!8b-1jxCj
zO4T3$@N#mtZss_XDt{C?fH2+q<2@eOTeCRx^YcGC856UsH7wUuWvXF}Mk<U$&<8Ie
z=-N$-kSSA{8)(cu!d!CON}oht%@+4dm?IiHTQZ)m{V<+*LrF<H#F9MOyV-dk!#bU*
zI9q0xDIj2IwJXjrT^y(Dwx#==Ld;8kEg^E>k*7Ij(>G4JJtDqPmudKkGqXkWjxr1(
z-Vn<?p+aX%jh8hJ3;gFAqV(asjW7iDwQMyyzJ9(+$}wG@9=AMgG(Q{XauD}D{UI|M
z$GX#UA}bs#th*AZ$fn^%-YSH6$NQ>Z<w)&{pY5uDNQnA7KfkmU*ZqScsW9o=%-oc^
zo{B_pR-em^_A!^`trtE!ewdwEY~ae;Ha!18B>QGK6QcvOQCyS_+k@S{&ilJX)>?6V
zOFKp3&D#qxR?eC2or=OPaSM}=MHz%QGy5`UX{T%VEkdl`N!~jxo>mGk?J3WDVsJk@
zz5MLV?^fyCVQDSPbic2J;+R$sdBYFt0*KJ_)1HReX&8#Is-Ct9B-Zf;NjL8^U>Zk|
z5ybwxJbYAV@$R&r;=c4=^&3)U3E01^tq`v7gRP3Fmc}oYjAh#?r(C3v*ZR<#amiOz
zRa}(#$<(5|=^A{`MI80P!7S-khvRfPtS`+%*-J1`0K-sPnINC+w`bj?aW5TnW~5XT
zzQeH*axIxvwS~ME@fsoq#0}6B?Yy~bV)8tv17x*N?^!;F!=~tV6yhr!h-l$ZNCylg
zQGo~!{70@g{8`t(OUOY(AqV4c|M4CUNg8}iP~CL`umx65$|WDKS2>*CApX8f%1Zsj
zGVITW*cT5)HML=gA+T$LzPE1C2d<|<N(xo_YL2LLG?M0)i-VIBHFzb0?DBrlPT3%R
z@IFL<H#{1jesR~ftb#jJuDGXlVBnvV3AwtxzjoOcS=BYJi}yp0lsQPLCA`)lupqu(
zpP>@oYzPQyE8Y__P<f>&F<)Qhx<G@}rq!HpT4W_)?Fj@e$Ri+Ixm#-<2&sle$Web)
z6|ur~9s)m0q7r87_(adF?%rq2a|);4%%@KpQ9?#WM1t(>4=;WnAzp*$DTJZByD`nK
zi<k(5a?Lb#MXGEI>-W((4+}&6?Ck8A(<nt8CduxZdyWbYCeh2}Ak%%=rR9+dEGg%3
zfv{h%?(-VI29Km~5Y(lvey?Tont@-;U48wA&~v{5V{L71Aw!Rp*F4;C&EG%1!F?2e
z+H-Yqm~Zud2$o6jEL7vZ?vQH}l5wFt##MTa9;1nE=0KMer9f`tqZr$IhkwY)ple)}
zzVTG%#HUsw{}c_qM=?V0b}&Whvl~Pt@gy>cB%2`xWM2iYm#!gm<zfG(xZpE!4nJCJ
z{)zyusTAsjNy;)k1AL>8vx%K`QuxVcH)mFhm*WYrC~46!KN|9EYsw>UDQ{;#a5Aua
zA5QZN8kDzUDixV-cKn{rUHG+-hoToFgyM=RDB<=*XuyFAnL@#nY{uMYJO7oXpWp|L
zZEzfUVEhCg<{&Q-F2Yy)r=Ejastl27Lf=MA%adH@Oiw{3LNe5MYc1TGu~plCih17#
zurb1k_+|`$(6q2tXX5G~V&t#QaWu73?qC#xO&2=4X2VT|P=e22QF3i;GBMYFzBK>I
z0nn%GgR&t-CmDI7W~EBERaFW}T4a+|sPU15PbndX!f52{cucvF?>CWteZ$;ZrjJub
ze^~38PTY5V)uh6HKgJ<Bai-<SiW|F_{Cg5f9cl;FZojKb0txc<%Ew^4qTQEjq!f}U
zn#s3GEU?{B8Z%I-5J<)zrUyiYk!s}WrTW8G$hRL+wu$5s%xtFk@Vi9#m`r%05pMEK
zN;7OYcuTZKh^PmXN_CNLD4O!~3b`+I$kb!oUMb!lfJT*wCOSHDyDXTS@sTyYD20R2
z$Rl6=h<T&Rw}l(D@^|ukwjQuLhdxy%M|*7fmN||R8@<_O!FtiJN?Ro-51-~1&#-r9
z0u)-FYXhJMr}v%Fo7$-^Dw20%2H1Jp8AX5X{`$s-R*uLHgp@!yRN=TLR-^khDm;-F
zxcPi5-`|pjI1r8s5ZDFJ*rk#lG#7I;94o|u$!Z1|D?@VH0*Niq?;y_z?kBrf4K@iN
z{s!^uQk!oEyUzFb?6XjLLX5(|BL}S<;@a~pMe4hF<>-fr2?p7a-*C18a}Au;5ULuW
zv9!2%?`-!~9VjTl>T7dz6AYOb$n@9931aC8RDim%JNruU1@f9Z3$oLr58QPkTHa7l
z`$HE6a-261+n|sH@$o{!@v#>I*mvAKJP@w%z;P9dM*UY#Ih)*d?F~?Qnz^->fkN)(
z%a_H)3V!|3I3%UFcxegH<c*A;PVbav!iGeZhgnHZ5)e4y_HL>qXYO)UE<)WatR+nH
zx$=c#ECIdr@9W9}b~Ru74~2z4Q{^swaYTsM_G39#0>-AMyJzRuM$^;UQm*SmX_-@e
zA9d0rQo&ruu_FIg@TrSqWj6lb;PBKog=rCqEpS(b=y^TZYC>Ww58TWOgLW$+zhAx%
zzD$ZSE>Jm-=ShHfMq4ciY@gpFwr9Qhz<r^CHBGg}y!N^@fc&BJ>hIoQ64e_x&YufU
z(3u<TsfThRC?+q52E?%=u#pWOj)~-g{q%Gg`~O^Z$W3gqKfc4{cIg>FF40jC(X^`C
zs66X`grcCD_$(?eJl+|tu}{Ov;%hMi(cDcZI7NHc35RO`vomQgim_>InJ<%_MXorx
zV+3I&8=tG5UYr*N7NRS{95F<U*@iQ}6WNFHBZ6;f{WYiNO-XzMwZIB4_3=ggAejZ!
z?kRFIqD^r}6Sq3YQxt{1_hNppJZboTePkxwPtFkYtNg=fYVo<gdZ_>XctGCwv8N_)
zqcuY+)lM9Zjp7Q>d{<VmG!c7j!_xhHYJQd5S&9C07Cz)cjFWN3%e5gxJ3CL+jhMFa
zR~+UP%3DSgA6w27zu*^hBuQ}85>(<1cw>SQF3lh*dLdmJGS8Dt(a213ff?wVP5w~8
z3F%pEOrm?$VKAz{^Hmy~^0#^h8$$g=wScBsX&XulPO`>wN(DBP2D3!t;`Y?vv~*gF
z<g9gKj<XV_4M;7_!|!_cnrvJp+C=ARU+}x#OX1SPBrC%FedkTH)Og(l`(sOo>ERQp
zMs)@%(@0XQr}r1gBkk$F{aCoC<l-WlYG8dtAGWdN@_i0{hvI?ci)aEq#4pA{7By;a
zLBz7nzA6&^j9`?{3N_$9+Qio%f$|Haq!~s+Uof<Aj<4KYQ&Y50i=z_8h^UkB66SZ(
zqi$fR-{zaAor@)1Qi{aph-6!+v3VX_ke7)2_{?nfwHiCOZ8VL4|8VTPS!S{PT{J!q
zWvM0lyAwyqY?9`|p+R1!&`z&L3gTNP%_>^PA#t(qII&U8<7mZXgzm9W);iqFu~9{|
zgvX$ormq|+6XVM{8LVbK+b^PLZ)8!m2`3NQ!9KLqrsa#ypKI_!)Qc);#jwh{hzpb_
z6I3p)5V5dsyw+oWnAU%L0HwftdZ~;^;clJf<#O<$nk5%BAavE)HJ=HD{Qg1z2Dsdn
z1;DV;CKn33_ziJQPu(CN>s^Tk{=K?SKwSLwQH4Q`ccNS?_d9eeu8rKG)B*d-9p}0Z
zzt4^pPpCz!f+YxHDh-4I=Hait7WW}!g3;Jxq&_Fx<uNr44a(O%?{Vl;m^HhA<Aq79
z*T(k8l8jkQ#DFmjr5o9(s;<tYeksog!t_!JN+zYNUXd{+W)uXt7|BkIjcw$qFcVpP
zy3woJCaHcLVDSk;4{Nr01B8S^Fo##6_3VR>_2>g<#b}-c3^me(>H=Z|Z0di1KXpNU
zY)9^>0$oRz`z$-l8SQJt-~|W$uj@Ju8gtV0{5(9GLK_-w9K8azevZVf+|ofm9V%zj
zzvgMk+?z_#Zx#ev_kH$gU|3=^J!Ud`#Tv~4_Q7(G!i<b&6$xd0parqMxv9QEx)m~6
zbQ_Aox<|(uh33HrO=V?efMte4eb4ad;;6W|xYX^X3b<b;Q3|deKKd8TLd=9_8!Y88
zS#;g1tFG3Z>jFRZEHgI=D7K(Tjlrk#g3d{uaXqLUt}7m>lg5UJ{|tzAdVW-<P?PI{
zQ>gRr?klJ<(ZZqYC<lFWNC&}r_$RnHU!4?!?<O5KIv*r%@SB^YWVe$EeG!BRtds;r
z={Q>k?GUmEoFQ*URF7_KgH>}<aYp~Ibe0X`>+(gt2?Yn)F>wm9|K49={gg+~ZJ7zZ
zFlJF8w;=OV()xDyMS}fXi&*k#gu3octdWOzu(5M~l5{z<Ofg|7)v}Vu-m$PCQ>sh3
z8%Hq3{8YY@Mg~_e*xxgLp>Y;99|AhcqU&sf^1D(j7J+#*#&DE%32ZuD3GY_|Ls2m|
zP#Gs~&!Cjy-NL9%ae8OdE}oL*n@ouH7*~wiPcn+1DHV<5vJ#&)b;~f1JR6g)yofi8
z%@7@tp!=0YQ92S;pd0n!o&GBH#5{-kBt<0d*KqnL_|=|xw>i8^KC{Y}A_#Q%S#VL5
zc)Ze>%DY3UqFN9Y2~!dyWeQDL`Dg*8<S)HFbeWx>z0y;UR>wgVC*|=#W~&p06OZ~L
zQC=dI(6bee^JglNN~P#0w0*-7Me_353JK;jYD7rV$UoX2FmzE7Uv*W}GTO)zut^06
zBXF14uVSLorLm5xl&F_}i|}~rsUwlJq<Hx=HAIzVW8TPIf6`aFJU(f(Pmt(rjPEo3
zXqIpd>DL)C(eTXVD=V;WvSmjqO82dvZ;x)g_IgFt?<On%TcrB9I0`|byI;Qa%g*g7
z{t@wb(5;IHsJP~#h6V^F3D>u}50wy<d4Heh^x{xhc{MOqxu$UFQ?c|?6Re^mZrqVg
zap;LoweH5Cr2Uel`YSZN5+#==4-w83S6E4Khmw(k`F-Zy#GFWM6ddhE-lj+czHUtX
zd8fYBZvrgf0Ou;C%}9Z?cY3FcjUdtOIiqK94Nt96a(&>%jL7lXm`;5|?-;c$?oAwR
zO(DqJ9pw_u&HFglr$#N-GhxaD!RnZq`L1V&X+|~^6#>EO0S0TokqKnJr{~5iuTV9x
z({gjAYzFWL=3yl>EH*zzg3SojT`*`Nh)d5f+kEN^+tU*>`tx%$VI@P-$AWAl5SS`K
z9WNUapshU)4L}b_V`|J>8vs9WGS+|Ln3F`sE^6Jtrhxr+k(bO8+Cj#hA%A`!t*Io_
zO1R9wh6aGDzGyzwE|onuB(}iN6PgN+Tm6)KZjV`_w~IB>?<zEWMp!i4)9pxXHKc>t
z<&Mwa5ZTK1S73ElS&4$^6=?gdps&uVcsIop4TlM2#t5*JqqMO&R@kagt081kGBWxj
z(Bx*((AzoS34lTHe*Ivx+Y&Bfkk0>0S+->kwH$ybPbdY8p`LkJl57g_Y~WUY_Dr^(
z`UEsy3oq8$uZuP4-T-Q=l0q-zH1m=#VT2y;?g3@9>qBPMyc0kc#6(5GvB4i_sReQ~
z7^8Ln{(XQ})WcHZ0Q1$UvZJA<E<G?KPM^}S27CjMmb8E1P&!?%rEz6=|2}`$drR?r
zD^vuMZGberEe-p0nFN#BB}FkZLZgRQ@U^&hwHHFZ9#bNglFo}93k`>WSJD_&oi_E8
z&Yl+vve6{cQ;fCsQu0Oxd1N?STOx-N;xu19c0Cm@Rw8<vk@v%DYNYh|mcxA1t|wzS
zw&Kb5tW6#PZo&9(4>ybyhEjT?s-T&UiVwtM&zFh>QCd5^9dv38Q>d!j$k4-u#4LTI
zZ~A`FX=9P(a6pqhe8N*|BtcjE1ihz%pcL<^O3xc7(cB1?MKc#8f{s<}_tW$rY(u4G
z?kw%GDn_93Q`O?4Sl&P>>P%h!>it2uN~M1~wu;Sw=r*<o7gfIvhu62!ps~%%AJwD?
z^s=EFl+s+5lSK>Xe%Xz<MmPb6Ea>C3Ri@?`+c}shA=IedL}gA|5kjw>@Cd#Zs}i+S
zTi#M%Lk;0#?4Bn!y)Q3{iyIMwWhwn01>2I03^fDmW66h?1}IPDTeHnQqZQ)_7M|%>
zYF6t?a}kFN5w`AP$8J-$3p7X1Y)a(ga$vJswmY;$p{@J43FEvc>Mv!RctPyJy>0mQ
zB4}(~_p3arK&@dVe-Qz$8wSz0sc+OIbfH4?Ds@zQrEobEkH5OVSjy@Bgj*3s=pjpY
z)I()!e)zH8&hGtiyC?9+%~ox$dn(x+$A81E2}No3<hi_QJ3cnzZA*QwHtTUT+C7Tu
zDJfR4QevtZWx~a%i$r(?J+UwIGQjrWC?X#6{928vLg8$HA!VPN80)MVuA&?%P@H;q
zLznX#s>hugd!Zp;#@m!;4fksEaLpz47c1mZa8V*t*Sg<08|A*5lsIA8TbuUqiyDwx
z5$#l>(tGJZ+BhDXaOc~;>8!R!ouDo<|KFB$YX4)!=dtF7D#lME_g~f1j4&tuyWN<B
zY?O>)W!2pcZy$AAYAZ=Wu`o9e0T|}y>QtEq4ihvIBn}#<!7MUJ17_bz)9Zi`1HSHx
zD!kC;Am!rX+AR+3X*6T+6#xz1;F_LM6DV&1tj?T*LILDbl`<yp{0=HB3QYC&r&!nW
zB)rSD^SO9mUxfXG7D@USa66d=>DP5R(O@^i*L886kdOe@W!FeXy|D|lhNkoc<G<%{
zR!npL+04K}$m@|-&Z_^-R6})-?PV%tQQNQ!0+G$x`M;kPatqCufI<X(Y4Ln@x2A2U
zJeM9Cl(F<d9Dn%UF1niBZ9q+`S7llC5D%KM^xZL1*2C{rZF-K10BMO#1~vCc1NbUW
z>EcO_eJE|__+yDswc{(vyk=KX$iav|<r0=$ImfBe>jrQ0qT5MO2Rwty;;NS~e=nbP
z`oldWQweW^#IqNA$}I82Q;OIhr`}jMfcN-+X9P^?>+7%jy(=R!2{O+5HKoD71iha#
zz)bu*Px4D?O|yJApFeDFY`lhLfVj2O?=H`X5D3&I`KsSt=Cjm#5tQ4)&VxV%U}<z)
z-6){NHR!O=7+HNpNJH$gMi9V`;x5wdczLl(Yh|bVPdrC_ZYyW^*Y_?F4RX78a1`7$
zP8C$qA^#^zE3%^fL?O+Yp<V&KE@3&8-;)$IFj=;AqT+?Sdl1j>s!<I(QaY@I>f5ie
z^7GXhyXuzbyZp<@_e#I+Ux-X`RdiwKqqbtbD7eJ1UYXrCu-m-b9XjblZV|LxR-~c!
zhtqEBK=0$Ugot5uWTts#$sMLtC)Taf?foy2ob$c9UQ-<dcBH2}eA%eAnK2c2@0931
z^Qx?8Aok+uz1<^NG?#h#Smi(w<v@4R)2b~1{YNEszV&q3aK{q4578$+)u*+5-Zr}&
z8c(@#JIGao7}_nuyk2cD`_FbS=X%5`ge#~P#1x_0hMlD<lbh@(etPe*piMxF@W>&d
zuNt1mZ>RZNV6CC9t}S&SQi&!B%X`Y17S=SF#m6#)&Ckq;T!Y51UC(RYE(cxx*?RON
zih-1r6g{fP(3{5M6N)^SU8y!p<z2v$0RE-D<zfsS0Kk{C`N1XI^6Im59aYRXuU0Vc
z$>Ot+yv7@Gc?u`8MM_umoO7~ff?%42rAH42G%8qZT(&1)fWi{+P3KRwFcu8#vn-O_
zu$lO>xoHX5Wf<`A@UFAb0BJrAa4bW7^$;&jDDCLzXlu(6oST|@FEKGZ%yg}Mfk9T!
z0ozA}Pw(JAR;cT``YYCX^aRjHSiRRB7D&{0EuaqyM>*@P8ngqU9S`EZ;bhQ`Esc$}
zMd|Mz1;7c)^@=@Mf6gE$9b~GG1JDo#dIES}$Hak|@Ycz}a)+60+7mGTQOjt9O!<E3
zuUUP(a*l+?n;MTVI$^h-gV5!=oeIQ+k`!6ftIUiHZ#bKmAN{+8J^(3#7Jin#-J!z5
z3hEqJfNvkcz!VruR;^Q*TDT9eiVR>C8G1Z&&~}Caoe0&5{#)VOLR`V^$x?D+va=6r
z5+uSte@~D1!xbP%&;vpj?U$j7mTE3iWu>Sj9`;kz$1iU(>kzSGZO_~c${Gb37<1x(
zzaToR7_+a*Hi;$q1J25rYp4imE>68Kue%HbLK#Alo^XN#Jkhb5!2P!|o0E%660k+M
z{o~^iejpq;i2Q{%1|L8-(DOa}69TxcWmniGU@zcl#hf+-xak#a<?FNuiU)nO2aCW-
zg+~l&Mt@4!&ywqA5cH6;LbgV-c!Oc1TS9FEOq&z<t6>mvSKtvr=EO0Jrp;`0o9%Kl
zUFZ1tcp#Y$3_4{Ur&ua$sfF!FVGs>m&VHg`$sz*8_1UT{EI1ShsM)R=LV#`&qN9Q>
zVA!|E6nt&l#p&t9J3r4L&?qyf0S`?_?y?akF#q`414BXA22)^~MIa0i06QmWf<s^3
z;O^bclRxl6Iw1hT?}I3E)*Jk%v(s=P?DLM~NC$cWHk;0>0pYvW;vI`!us+3f+4JKG
zT@J8#9GA%U*@3IOL0`_*^%9T-Ih%nb7#;%m@v%gO-=fV2CjP*GP{#=(RAp?$X<^<;
zlPef+dO)B08i)cu3|d6@@x}<42GsIZnq+u_ohu9=I4JH&PtG`AfzSPR$<NHuk#kGB
zGC|+Scjl!Yd%n*Uf7nstHOuaqO|n{>ZTTh!l*RD$A1hpgWN;1uj`Rjz+sfWAzxw#S
z8^=%hf!zd@e6Creyc0my`To2@T3`PCwQ|0EhO93t`U*K@gDY471Kj>6fxGsZY^Wr&
zrT3VqMix)&<>_LX<*(-t3(}OKK3)QF{rFoHp)B+{HK#)W_lFZMXk20o>Qig5;v9s#
zh5q2i;?zCgq{W;Zn<u^v9gc)TV&3Gj1RqF`mR9@fMoMm4+<kPyY%SPt`j|AWm%Z5#
zI$%Io{NFD}qKah$Y{6qBUZb3{-o=B*X_s<`_D<`cmnO@%R!KLaNwix4*Qeky{sDYp
zKnEVd3_Y;uJY96JgXhS>as5*`ml$fz-qyCY{as6ubAveoONU|iOiZZal_+ge<gNnD
z%xIW90oVn&vB3}*68YN$479)xN>8pTEnROI;S1h=5nHZPXlZ7~&2Xw(#-cWt;pPLe
z3n5_L7kACd;NarQ5pkS?)M4-m&8Pl~B&0qpDIdveyf^G3;O(M}@9xYs=L{g2Ug&%R
zSkVlD@BZcPJ<Gbc<>i~uT(Nj<bsNlO8IixQ4UU1{kdS~o{^y!}F;NJxC4Fq~5yY6Q
zZ-kFvY})w9$oaT`r#GCvSL^I@Q~=q)>J;CCMMAT60k7!KG|V;3?z*r8#Nv7+$yw)F
z2V7K7y)a-ke&zDJKdex-yE@Grm0jRwKnQ-Ou=uVybg*ErDW5<HF@Ec2!9yN=%~*g|
z2)I<>GHp&2{u{Y}1=BXbtMU2;L${&wKL(O105=1z;CiP+s}|a#C%{}d2LiVV91ZX}
zHs;LSWme;E{=N7)wWe=6uVw>%;TY)p9K(y#YznBVt%cFoyNm8!p#aki3B(cSd0Tu^
zX1`lw+GN#(sxSn%$ENx0$I#7`Jh+2|Vr(YsdgDS-6hz9PFw`)>HQ9QP$6sAteX*XQ
za~)!?|2DLr$}+zLE)0W*uIFxDGY9QPvaUf}h(2J98VC&%|AWuIJ7w9)402(>DIt{k
zISEk(7&4vkn-)GFm0@QCjqAsChyD6SuF>&3CGZb^Dw}J1iwHH`w?9Z+0s%xfP=_bk
zE9yLl&merKSJu!tOFl7*6M^uG6aVZ;;hd_GS*<nt2_;TJ@de|4;SF92D>T}>jqU&M
znosNhgr`~5BPElGL62S7*Db8PwXp#uSsjeSn5{Ab05ID!Xs3RVUJ9gaR4VSU)qwa0
z43_wn!$Luu0k~%wyAPz0H#Q;Q6-jU9A%);DhUT28h=?jd^|X#$D0H}nJ;D&WhK4=>
z6I`E7m3g|*K<@yw-(ai{=A!`w=|~xa&#%f!2WyMr>PIa6X_lp<agfnLtDWBei|3MB
z8q663_@D)_#%q8Agw6c?hd@(cfRA>KGHO2ef|p4PMQYr$!oo)jcG+YU6xU=YAQWd?
z_$>exiA5`mujOkbI7dK`&J;X_{vd#-;E~Bd1dr52Pwxm`-FNt7&~&>_%6DIuAi)pN
zZMi)V0|Nt`vElo0>=cImtt{q%lXeEE1Nt}>A4W#5sWYVns(P7l=TEb;HrLl-@WgfB
z5eO88p{FJf9(;omSV#ODdf@~X0y_PC{QUo57`3m(BZy_%vVUwL&yJ=9P~?W#m(<+c
zy`z9}@wFrshEN}XII`?uohZV=1l_!YlXo{);kuCdr2o_x!Vd5K_7d80#Ck9`Y-hT{
zImZu}Pi$TJ;Fq~mno*hx4OHMzogcg%;fQ@$5V#5bPN=8XKLK^;-^Y}&8TLb|t&6yu
z$&_+o7uQTLm^_r7l?6k5yr5Gjce$g(5c*S3K^2Us;ShEQ;v$u>y~|SDuWJqpP{nK?
zj^~K(nXUZIVp;yXCHF6I*61E^v|1Y)yud9RjMoi~u5HZ!?dUv`bYE^)WdRn_zw<}`
z4&mmrt7f6Kc%H$gh5J|m9_kK|qbl;e)IFKM?P{DYGWntIk~J6c+*0kmt?hr8|HwBS
z*^SXK{`<rKw}O?)|0PmBLz}pb<9pZGSR6Ph031WwV`RixrnaPOVzO|}>Y6io_`JNb
z$|;@h%i0mxoU=RXC}FEPiNRy6?Ku6+_W~b~8F;MP(no!U7;|w%H(|LM6t*mV$Kz{}
zC+8@{rxIGvajsS4-G$@tIpESxtQkWiBkzE}+x4r)Js*aVL&8^81%xbc<X>#&ZJo;4
z$jr)u?NbJy2?ffZj0wmzuMu>BsVRu;?hS20Rto39G2|P(Bl<>0dZd3&Ibq=yNSm;|
zdifHTDjqOXLJ&n@Wg|w|u+Mdf-UY8~`TUgi{)IX)xDCKeVZoMF_6=>FG~4|-eb
zp9DX1@E5a>!HKP)D-a|i@E=6lTfeu$`i`kcLo;KQbb}HlJ#|u_OO3U%^z*BRDzDJi
z_R+V?zw+F#S`IrxT8*BTrW>*@AEqQ_`%6I8G%m9x)#XlNt@Zo#<HwH^VW)4#qGBGj
zpD!H}7|6$*g<U`?IHV#=Y#;41R{;+4*L`<j?DSqFeLF}~U5D7$2HU(h7_88Nuy+N@
zJS;@cCv)ug#*SlqM4soT2XG8M!0q3(6I%{ggO#$9QvuUlS0IslAjY%`jG-{VCU4<6
zcED)f^K15=si~y<>U&$Ez}_2TnIjv(puGW8L-rraooC4X$w(~*+gNDY!&Euo20jDN
zN<%K_d;R#<3!jP~0kfJmy(F0Wy1ToR>7-y@BT2jx%!kv_(P=Ur+d3IcVYnuH|7Swh
z3QUoM1^owlWj|hAe@N3|8X$D%u3T)cbp@<`E*Y#-EqGN>6Gxo#%xMz+#Fx^j*)#CP
z!PTcbG_H*w|L+%1rT|539t8HEa{K5K(F)CJfGx+4e$i^hiINWFs8qLlU6OptZY^&d
z`#|XVKU{rvP+MK}Z2|#;7KcJ9A-GfA2~r5|?!~>u-L*I^+Tv2&rMML<?(Rie+#O!N
z-<vn{-pe1E%s?jhp1b!sTh?9+(RTH}N1Ue7oww1f`i-GkuHR-|>b6ZeZm$M<mnut6
zGg6nzN3LdZBsnXZoBn=k-o+#5ea^6BqcM5BPny3Oyl*rNSj@n%_Y|+|8dYitP7i@B
zAQ+_a{I<#02X~zR>wcVV4@eft+Bqy=Bb0<SwG9z`1-w#i2u#sHe}BdhD1+O8`N!*|
z7(Kf}jeWwQk&crHykmV4LA<m74%uz7X9QU5e*WcVr>-?WaLua>3y=fk&bM?N_~?`$
z%e!R#f4^z9se<N~Q4!461lhMBXn6DE(oX|ikT?zgVylN!iN^gF0<xGs?puVA<3k{-
z0Fc6!8Z#MgvZKF${|f+t8;IwIkl0W}VrHQ-ug&O`tZPNkmQo10Kb}U29U^omHJH$F
z-Yg*Y)4@S_+In>q!bTB8BX_Ncpo2aFit8{?XZwF&&;MNBd>C(#n?FKv!56X8YZDpe
zvGN3Z2Xci-00hr1;-Nny0+44!&Lm149VUy_Fo(b~{<k`vw|(qyMZ7iao)Blg|6h6I
zf5*o-<KaPnlGR2}UjVpj8VFkB|MHJL(VJ$3{D)q}^?bn7{C{B=LZj=g-{s^SM(9~*
z2f}rnH@^R<9Kp%~2Ils}Vuc<hz&pA5tGEuEoeaKo)cwDUxPA2bzsrSuU`zEWvzzP>
zlaTw*@(a{T4A@eK%th~pl#LMW7DTLv3`MASJ^syp{yQ<DVr;xl`7}xS-?_ow`B-bA
z`#%b4cJl}<3_`Es#TTK<nPP!Ai2&4ba&jUBkpMYOO*iAuhvV*t3yDfU#I8z|5pIRh
zikvR8@ki|MEeP3PmSM!vI8V}X8fKXVlv=5?dUd!YGQ-O8axMU!!%Kur5^TRT3{Y|g
z7l=dH1{G^#n}9v1A5G`R=6xURTATjA`%;A2L2@__uk6@L6-^pGrBl5*-XpzK`;CZ&
z9t=;swbP&wEOg57r~iKQza>3IC<!h65WQlYck2x_-Xys~*o`z8um8v!kx%?+z+pW>
zxbw@#t7iyH0m7UR2KP%w^c+vBo`u;}_&-qH+TEB(cu;Se7orA%KnOpcIy50#f@$8b
zCFpx%OW!Edq}>&98_Xkvi_nRo_2}is_+SLLGl;7*{H+WE5=0y&WprY#d&OufRQhNU
zKKI<?t`^utfy;j4*vEspZt(y`czr)Rb%r`|lT{V(l4oW$OWEHE_aM~@zTP5G$~ONf
z0OnBt=`YOg$m=Z>A!X+xq<q)0{&?hd#s4o-2O&^B(EIanlsa<3d%oGp+;X{bUKkHA
zo<it4xypA35ec6gBfNOEj=l>uA*bHmF3wrxas}Ii8ax8HE^L>;pbCZ9r+<<#mM1FK
zoTnn4g4DR5IG31i|8@dF@zvT=N>UDhGi<Ye3A=9;*150&z@U@nWQ=@kp&FyNFi@e?
zXdDLwz{TI7XR=&8CRVWg)4W@PlxG<U2C6`6vMjT`s(qx1s@7@<h9LnEUv(TsEf_vG
z{LkTynIOS?dO=Qb0D$PW^1yZ(NYpmVW0xP>^y)*+f?a+MFKu<Cb={+fL;V?XUV6H!
zoELf$Qslz&@&h6Sxm3@-pBcQSh(hbcpbf-<Nl~F>pxTD9{=#uFOp{rdnXGnn>}O7e
zKq1^clxM$rt~h&#dGm(Ih<Qf_%fDs#8M$F2RaRg^q1(4LXY>L0Gb5K+d`y{gG)~PA
zDVf?Q+*kyG>4~70#*66er>(mmI6t#|h6I07Noae_D|)n<EoV7MB+QW|{&RB(g(3rB
zkb*~z=csJacmC(~TK_a_oWUIY!mjgA9&R^oNb^2Ebw}j*Chez0g~siv^9ez1cOP}F
zBBwo}5_O)|So~4|RyYze1d@oGR2bTy8U_wsAkB3!a5aGA*11V>@LFMmq^J&<d7!va
zE;O_4zp6JQy@gcBBLm6Fxp=qdGB5lYWmU|eFe;RK-z0M3%E=@h%GV@I;a~va-1&+z
zU&QoW*)cgjjk${&EsYIn+r6RIxLoYs>`P^Kqn3D68zVHl3V$r6VqNos5U?-(YDQVr
zPZo<rZC88JJC#HWYhF9tg4ETe*MHSssHVHeC2n#IQ0^5Ht*wdtk9iw~?@@r;Y|ns{
zovhd>S`)b;Ry)egFVa)V*2iMVP9)Mz)!i@s);j_jN{Sx^3v<E6U<{)=(5l&!@ViH^
z>#Y((#)={Jk8`LgjcuYN1-K56D3c#~8H@2IyDCn{NavI6Q6XI5ET;iy`#JR4I~9Vc
zLL*-PUf{?E001;e0t6rk;#c9Kz^2=0t2F#Z{7yFaR2mT21t4|S1Otk{IvdWDWI1T$
zF(u+g!BAu@jXybfl>7L|lp-fe98@DTLZ-RV(88gU#oas@lx<|1i2OvB%Y}mzOMhUB
z;gO6bkNtzS9tg1UAndd}Y@6)%?LFQ!S9ZXkE^V<Jp5><ZdB;s7BTKWd{O*3-ymUV`
zt$H|cZ^}~(@u@zo1w1{Q1zfn4i3O)}qK)NHJY~LYw|3t3AqKgSPvkucq8oa0OHv6E
z;kEkjrG$r&Q(_oNvi&{RwM8G23&SiDiL10$Oi&x%JU%YR`|oqTy6m_hru~z!`GYzf
zW<W)Thuu37bAe*@q4Dspbn@~|T}(%%j+52b{=^$(Z(N$Ef7K2#uc6YTIWNrTgh)s}
zWLX*StZZoj_&w{H?$UxRGxFgt+@<&-)o$7<!+P&ggM#M#8))(XKrq$&U%~yUeONoC
zVm#W6EUhi9`NsAMIR;FeGv6X*!uGtVC^|n!p~B{h*r>}ViYluNSx2n#6BVYV=1ejs
zHgYQ>QNn>F)H`tN139U1Q*OJRhYmG4xy6E^`nlX$+x?7oQ`HI&n+lP287<Tquog?7
zaJESGRutiu#WoDF*>TA;&RajoX$A-y>oR%C9n4W+K>Mlt;Q`QZg6mrrdH?_xyoXzn
z1AG2e;35YWR81~yt|P?)1y=!;K$DF+|F6#L5qG*)w(94`GWv_;FbjHcuwpx5dRUG<
zQ*EIbQG=1+ybf_Fh&hA<%M@;{`_pe@TgBa^qB{r6H9~$hu>g0AP~&&?+#hl4xMlAy
zx%Y*yK|yMz_NiH-cl=bZd-fHvudUDsCe0aS$5SsNe!RL5Vwb%xL`4$}-=5?LkWTR@
zig`l=u+mVX)-E?v#%dy8qtjbbg_@F%ywNj_V5J0!<AV11XW#tkIN_?Z(9P9tQv1yj
z!?9(0U}&lxj>0Qec&c$*Bv%3f_lzf``>oIGeJg4%;sk88uyIBF$e?s0y#W9Wl$A$P
z_ENq>isNNL3VpGx$R!#pI<t36AxqG5^{CbkcU?Ml3OIAVh>9?mai(rHMO6dAYaol{
z<)078tDhLP<*Ed_FAdx(P0+*{HaVyYzwE-4+qT;%-y;tIvrw!xX~Uw`=iEG)c*tTu
z^XWM7qBrPPqfeZ=MIdL^e5KV=Y&M;T^=bt8p;s8zQAzY3?N*~lh%+#ir(4crg=mH5
zm)(_rOx87q07AE!gx%uAJBq?L4L3V0h``D`d;K3Dt178NT0w%=?zw9At%Af<c@2I)
z2(^Vj({m+F=w@Bo$z#``ajTf@PG$N@;Qo9+VN?ebo2X2xF&{YiT(e;fQ4RQRs%7VW
zuF#-XdnM4ckwSIfx-F79+1z&*A`|b`K{-~b_qRLi*g;=fElR_`hBv+HXQgdjwnJNH
zAfSk`wRW=OIL#vApauMxpQMSUg5(j@PMy~R4ToTbykto_3bV73XgZKPl=oO9dc_|2
zYJkrlyDDe13{^T4`5-WWpHuZ?SVbPaWtQ`=Ts01<2E)ZjDv+J!AKmGy8QW?VHs$eg
z*Gw<(eHowVleMglFu86q(Yx7yQ|BU0tHLIh&V9^+X8OAqm|~BcuMnC#RULbAabsCn
zU-MK9TIo$0SUlq&xzGEmK-o`z$G2DS#CZL>9+}fmnx#AP#pYze?ur!Iqm<WlNR`9+
zq2@%?VOG&-l!C=|;tJCL3VE=4>Zio+=6bh(JiGnL;M!RIw12jAE3ZDZie4_Cm};*~
z#dV<cO!;!RyhCI3@T|XA6z6+X!+v;taNK++g@(2BXBO``>nZB-$IJP2M)`YG=xGMl
zb4<ZIF}|DGPT!BUX=BH4o^I05XGQ*kP0heuR2hj2B|}T&H}TqX^m)>(^<E#OmzDLR
z5^q)!^HP{L_t-N4_nz#B5s7oY57P17*u6}!Q=qR(0hOEv#y&IJezz<EDDdy|(OBO~
z7S3k2?AI6geFSOFpC?;~-ToET3dN7_M@E!oJ^+YxrgiV$Kb$>T#lI1tEByk-NMA;T
zkQlguK}329Z&n)Y1~U1u;jVrGM{E>8bR;|ldB=pw$mH72KV)ZP;*6uNf@Ys*r|$Z`
zk@~H(rw*H@btdFY>Iubd^D99Utu0@v$j#dFR3OBV)<$lb+Yp4FvW&@UvvSD`vK!E#
z)s@ht<ecKVNgavE;gBv2zwr<VSU=yZO~sYc-}$_6fbNK3c)v{37>mlazsmP!!snRu
z_~dhT*`+2o9YbN+ER$U(oQ7riT)4`B4U>vx*m==wF<`S^kg*;<YUM%h;~X`HnFzLv
zTe{gAd8W^bAI*%swOBdbwggCNOAj~KlsUDZH6A={Cu%ctI}cixduP%asKj?azWG^y
zA7)nB>7h#}7z5{Sc||<XZKTfg7e#XT*@sg<f6CQx{?kfSA6MtKGG&K^ME{$Iu7#5|
zubtIi!`{%~NSsWO+dZ+TeBHf59GAn8W&e|`vg3o{D|T(n@anhzce$OOyIlA&Y5TWV
zN1ZR-<ZlNHolWVs5TbHPi1pI6%T%E2O{to-H6@JaSbPVi`ywlah317IT^ZOG$3@5?
zIO4{|Z7(z;ZXQt`^~3-q4HejAWbJ7(Ay|j;{mB~iV$%`IYd;Tg{)XsP%+Yk(He?*M
zG#kW43Xhj*m2Hxcp=zr9CkT${2tyEb{Z!wir3y=5M&uFRZsq;Ks0RbvyBl-EM6)gF
z;O9}xb5PbT#-|M-6*9CuO~3a#Y~F<sve-hbI?<&knApwvV+V;6xw|Nvy)B+cU~V2X
zZqe`J%52_v(Si&FNY*=@!Q_gF+iT5t9Qhyxke-h~CQ2W%Joejzd$1vq1pWZ}+vO)V
z*6BTU3VU>M=Q~vs{ZnNSL&79h89BRV+o-zK({d&9`t#u5pl9e|Y<)0*NLGdUf=~bm
zTuc^8`~ZS^_v9g2u8&)haVMi{+07790vgr}U}`H9z(OdqsWBzOLs1YxKi%D1QRYmg
z2w!y<gLf{Z<<!Y?6{f)%1c)BA3k|*vZMAKK-fekxmtI9<>3MngP>8XO|4n7ZUi33;
zj9bxwn(MwY%b0F@NfC9n$hdgN*k&QLZagpTCvV~yI%iiVno|kmWlKZOhSZ4l6u0q@
z(r-Wjh=q8SQdFQ}+_<!b7TcRZH4<;a+>Cqmtm9h*X5X3E!%~1yOEoDKQYcr$Qb5at
z$je29Xqxe#fcw5g>RIZzW~}0IyNL$V%rVo@TX}{;hktJ8{lW+MPMf28`FE!6pRTM>
znc=a6HLaCPUfXMxa)mL20iH|zTVU7|o8i6vSaUA3BX}kQTk;+#m0zSIqi+me0%?&u
zw7oXzN&=s&Fr$7C_rya1z$^97vfckUDVDt~7dtBI7Xe^k+%eMQpF;Bpv(dD5IeRqq
z7cTN3v7p-ZwAj%L9!rZ?Abn}$d3m`68I|Zv$}w+t43WyE{~s*Cn>(Rl`6rBZZqfr}
zBk~N4F8VwJ={eiJFZ4?HC3WihS>wgbA}uLH02<gUa3SF!HY@AK*5y_%eH`03yZA4L
zAKOYF-a``NvRO0^Z5{4MT-}GaW|*x8UbR?P%chgw;D48q(zl_+8O3H(b1^3MWtlZ!
zyBQv5Q*5>cmSP6e;NIuhwHa`CYJP82dNQfs1#v=s&;(n2)T`!d^tjk}hY7apk_)G#
z{1-{h<}Vr!YHZ5WW}#yv^VjX3Q{b$2n}bi&A8U>$gX2hq60`ju1=0;xcoL#Fp&Pg}
zBEsaL4}ct`Oui@eybRW#t@#w7L{cEG6c{xMsKZQW8K{96$Hr7mpi)Je?DxVdb4R#<
zx{T2ZU+B2G*_X?Vg;t^<zb|#E$aqiE;x#pgI31@3jQDtbCqvwsUJlFdhT3w}nF>Ja
zgwUdBL<UK?T4*&8@2dMWfAX_ChIV~Ka6U6d|J;%{_MLI88kMU~GC+Ep!1^q@?Rzfw
zNr27qol_?KktO?EdBy5ypts4}43BNRvo#@^FK3usU&|gC9X94Do^ffgctIE~8|d6B
zzOKH1@3O@j_eSQ!FO?Ngmkee%s7JJ%y!szp>CUpqZ>|ki2&wTW432901Cema$9y+Y
z?0l_J%H3amKn)coP;-`<8u|JAx$jKI+v>iGw9Qukg_p{FMxpSsF)b0jK7)0X0V_-3
zN~&2ct7^w(mltG{AXT8f<6JN5b12~18|(Y1{Y0MeFI4B^Q$@1{SAN#$2HO3^P~o0&
zdh?!KLxocquPO$H#q-*8m=(_F)-B0JjcpKgCM+mL4V)M}w>ybtRm2>BLb;A7P-{1=
zwsNWG?v546v(Dd9eR}IZXhAI`Jq7q^?)&#w$>MTT8O2kP%}gpsA*6F8o`OmE`unGs
zOnr&cgN9ATZy4xZAPyj{fr@dt6l$0E$6(CE?p=$60DI3xuhN>ed<Qr5ih$k&N#ru^
zG7d+azb<qA0NZqcEbul4U#0vWj2B2468>=%Vmf2NA!SmbFO4aelk(2D=4O8w(c!sE
z|81gYX%N(g9^c(n%tOK4a!O<MWyhRmiZU7<3aCyfz_p?)V^=23+N0bad$K16j#KOM
zbQUL5gTXnx{|K;rj#xF%s;W*i>PXscbtvu2*FAk2vGs3PsvhZ($%AjmdM$s2yS4fg
zVK?l~&XKk|f(PU&^%pd~){alwXh~#XBQs?vmKRE>5*ipd@$5!5tf@*J#3{{)LeOOY
zH=yNrjuorwkH0N<u@WHt;VQ@=MRrpukTet&CFQ2>5)z0NgAPHL0{1|jQF0-m^$U_z
zVkX*e@W2^@{-$JS_j8;CvmAY-sd>s>s2o+plr2JS^I~^4P=+B0i;x(CWCJx#Lk$l8
zSfpj}omumosKl#A>|$!P_0YbJDjfLpIm>V0!T39Gex8=fojMzsCfMkwAMRQ#gw0&b
z3cwi%PJXdmQqS+jwAzet+1<uhMM4Gl5F*J2LSVsgi+2nZT3Etubq55UKI8jiK;Z#V
zr(P@EEP;0qh2sG?hqVq+i1W@OvifY_N@e2Mu@p}dQ92v(dDmv&hqD>Q<^C_K@m^un
z^1lvsE@$#(P>sC{%X%C0Ab@Uak0t)M>{f!Kl|R!BuBnWjcSp9;%Q1wlT9d_*qH$@=
zQT|{tkZcV43w>d8Iy$vzm&Ap`C2f(xJK<}RT$tC)GUGT?-Q5eIY5+f_OT=iJ-W4*|
z8*5a6?7=TtP@!wiA_u%pbE({gz0WBep11u`+Owal3QJF)(duI=@3y6RNj0~Kfc#&%
z-Il`JsnfK>WT;vd1{FGsP+`^EhiDI}lU325Tm;JPxthNDD(f_S*MLw4&&>P<+F0x+
zD*~5UKjm?nfS)t(P6$Y=mCmDsh~tTGoHAG2>s-dK<DyltVNp{w(K=v!lC-L773XXr
z{zZX{L8IFtGWBn<Z-F-cwe>%rCapY97LxwLjk)*yKpa6(h~6F=YIR^nO%fIabMD@e
z031{{RRK{G#t0`2Hw6jub6hECxuP;n?6YeUPktb<_!X(~xFtsE%VFgf&rbO&7(%4@
zO%)SBgW;<vw}I(B+62WlzsTLu`?z&@?xw#{dh`xd%q#=ILVno(c*FDD8zFz!XG>?p
zOM{POS}{w92V}B-ZItyRKFjDh`3oNw5mfE01qJ$6_3!@F)=-Jrwx3xD<LKjr`WL?V
zp0x^9MlwM%7_uo1PLt~C)c{H5LPq-syX(iQ#$y@&z>?W-&#|GPm|zYVDeNaYaPrUV
z*RZ-dJ&MszwGz!Hx08Lio6)b|2EnG%>^XAa9)RNrO(aAZMpB3_Q3*Z31NwpJS5m$Z
zhTF`MNCGCXSs{=F&?HL&%-~PoH&)sfq;#s1u^*nW1Ux@)TQA?*4eQ#x#E&}C=ziGj
zZ}bH56^4wxF!E_l%q~1#So_2?^Q-2ig3fiN$u?#6nM=eU6hd9`^1yApZ79w}b~7E)
zL0gJB+@whC_M60Pao-5W^~XNsYULm+qLP&rnVXOLzke5kM<8Kzs}O}1iH(}B{dMYF
zr%nCHnU$4ovy2g0$4*N54hpaU1_nS3DUrw)Wr~T9=#76nQ79oJ3b4FOKQ+3$fWf^>
zcGB;=XM=S}zu}AfbchISBh<C15pn@=8$dMx21ABJl3-987!W3*m$E||^fd{LC@*e>
z5>Jq|^US#|LlC%T9WAM_W5Gzb9U^=aSg0BtOVSySNg1WGN|@Gn=a1TOz8@qb90f~i
zr-ati@pl65&!q|`NKl~c8>P)iq?gB{cC4HnGc-%oUyB35U13CK00WPmGNM`D06`>a
zi26pNz*AaiLYG8RZb~p-AfTZt1O*5F%JFEi!NabU9y>#On1Gc|nmmJCmMV0XYvz-)
zBNBRG^YvElqgg!ptCbb8H<_}rDXlU@>DynFrpgNm<Py4B-||I%e^+mzu}5Opk#7b8
zAehzaCbNr3!YjvDlIo{`CX@4V=P~t!v8T035LO5qgf6%3SA>#hh>)D*{LdOsILAAv
z9Akrd4LX-*1xk>ehg2%7B!_(Q@?xk^TK~7k!IF+CE34tN$jSBB(`2~a+V>+fwUouH
z`4lNsJ!x^^Tsp9lvbM4Un{j6u{dAs>&FoX-TYYL=r(z=mhwW-QX^t5$A$w4|q^pRV
zcjNa+GQZ}9((~7V?yIk;;E?3*{vAiX8fgLty;)RnVypEv?T>4klyD>uW}4PC=8k+n
z12q}r$j8Z)r2ta6KPOX?8#N0Q);GxeRd|P|=fOA*jLu>|A`dRRiyS5_{H&ef5{^j<
z#GKpf0l+p8!`zsTC0j<{$0P}9u(>1u_Vh*wRO%Tpopd*Yf;@o9xBz4w-+C4I_Kk+2
zjBniltXNh3z`WcaG63WWgLx&Sgg`A_THG``xU2*|K3o#xYbXEJ;4_!9g&D<X;oc+(
zaCUq8?}9x~;B5*Xkpx(sBcv-9TXh}vRCuSQ`;-{75aqx*;nG9?;-{I==WpLQXdrg1
z1We#T8y+8ZJ#E`(SZ&7VJS3MVcT^yL_**$`OS`PO@^t!Lrjt%y+StL;!tX#Z=WAq7
zuTP83349n%g~Wvz$RMXJ4F*83sn;3>Ar<SgJbRYk*r!!`WwBZVqvxY<&xSIbH<mN!
z_-L=ou!~3&0<j=w*g;|SwT3CdFga8JOu`X8FU$@@>Kw?PB^L^$jZ8}@vS9f1Tw__|
zBwmBhfZM~-!!8eH*zHLg7a>Yy%x|n_)8=tz?*6Rw<5?$MZ_Opz-<yRgro~k|{nPn)
zvh?xs*A(zhu*wz;$!Z8O%pg$5ld?!=OphcUZBnZxjX~Kt8SGZ6|1S$kQ41r8N4tok
ziO{^M|GjyIqZC@X0V@kM%I<e}?2ApwqwQ<#3?%+weXLy^!Y`7Om)7>9`b~3`b4?%1
zPCJHJ?J}r8&Kz<Lm*iSIYUicr=JD<=qOc2)z9T3r3dLrsww=UieK9DQzyDpH)_h)S
zJ6}a#Jl294@w<6-$ULAxx3_EejwBURhKYeOgR8YliPoE%dBvs<XJLsq5D-)$Zbwa3
zZ@FV*HF@z{ynMKJs=vr`8E(P^XK(-6_6D4pY0Tz=GhxlzNpp#(tBNU4fbzie#oqy`
zES7f-3zE}qo{L-D++kN{r;$N`27n~8rR;Uwun()|pKt2F7nX=rlLNs_kic9y2{6LI
zu3v$XQyieguWBY~o8rh(ZXm5&LJ-7~Mv4(7++M4=4T>BIpv9My{ID-cs&)(?>B7*#
z;Lx^n@1e$`Negu>)kKjC$|1@_?f3qd#-ZjXiH-%h3MD+0a1QjY`t{eZ00Fu)HwR2Y
z>RctAab#QFObq!RO#w8;55e5W5;eIlP$YOaDl$P1ND9RHuU`TT%e9RGa$`#7RRcy+
zL>T8u5>92^t`q0S_E{3Jk~m%YfuQb;p0p=;&<PPnbR2%8gX^ye?m1}=?1nhC8A`1H
zw$Sc*<arm0WmE1#2#`5N0}MgOM}i_LAb}(j^pGR${4-hK+mHs$1j}v;YX6uZeA~_0
z#SIUEfvj7aAiQg+-|XME8Zaz{T9XA!I`(6phuIeH8aFpgbHnXWVFli5)gNYkHg6x-
z95PcFGibssEk^_4v7Su1b?@7?$&yzfPYg&@Jbw5*Tu}Tu^(0`3C3Ya8Z66-`<f}`A
zTvQ6xUN}b3=vI0Vu95=VVpfN({#XL^FpNW0YgHix7}yZ*?OlhOJ5N(xJy{T2AW4p;
z2Y{J}We7uHuVy|?XQ?QSHDOOnA)(|z093FHY$Qw|?T6biqqjAAyyEZ-J|iHWbw~k0
zDh0nQds29=i8^Cif+w=$K?4v7LLW*>vp$M5zwqV+SVz~>vRkjSlR^u|sqH$wAuzK|
zn-cwfiG^Ej$F7e+`;Q<j<WNEhTnIQMVe)3RYyML*jVwqiREkEC3a-Lmf-4t7s)Iyj
zIOEZ3jyo1K#Vwv;mr4)fM)}OM@-CR0x4H{D`5VjuK$cP9#xC*PZiXks%}SC{h1yav
zif2>{@=)O>X+*jP#VO~4aSats76DEflTpP4tTcfe-^Z?k%}2{bd7o7(_LE(K;@19(
zx!mjiF!Y%%38hjciKy9L<ix(uXzdWZ-;#mwL5v*7rf2}-kMP0$-j+k$s{BS+Xb*RR
z4cHCH%pSxG#tXBluAJ^ih2q<wCIu3k;3n;^&{(@N<lyl}5m)dRGx~H7+sC8Jhw`X4
zX00~0?Hse2qF4MgCc)2x&G|ns?!dJ^8_E(g|57U!wdV<&&zRTgzmU%Ti>|#uF&DGh
zxiEpj$$_kzc1@I^g0{q5X1g}*7>z04RhkugUSD_WT=Gz=Q7(jpeMDq!V(S>5z!QPQ
z1c_*#KQLAhaiT^xp&p`!KrviXgTq2BsHtj&w(37rL@Rogi_<g^zU@%mFJ5DK4dmep
z{XB#!q)xI!=0M=-z1f$Sp1>)c8;!|@)>yi;`o}~=2kB!N_SVOy`Hi!Gtx3{$Gn#9A
z)YW=Sz09At`=6<OODxghr2L3)s#(e_^oL%j2PM|<I>o<ytURI6t^IY$juG_?x8;|v
zef_y9i@Vx_RvHx*MV@~6IH~jKdChh$bFr!I)LFLG`|zoKQi<U2_YX0-L<eNnT&LXi
z3$y@e9_wAb>F{|iL7V%m>bEWgqJa+`34+C9iuV-{N(K-|B7;j2sv)Mf)tE@d)ZfW2
z&PPcRMimftxnZ&Z;gJk@=#tnpc(2v)6Tjj!LME^<R028XOXPbP)bVxja=F0=QNDO8
z!5ly^JP&3n2MY__bk78ZExXVl^&0fG$^#{;Eujg_1bFy)LAWQWlWf;zI_aR#fyrA-
z!nTx9UFjfqU})$s06hAOz*jmj{?OY5;z&|b)ykboBM=w5<@X^8=F!>L<)cB4v1379
zz%+C?4rlrXYd%Jf7T}s{3H$7=)>~rbAhp1}8Y5musks7NFem{aA(0eHn&eFtF{|UD
za*Off$F9gaJ8?9agHxC=dce~1JhM4YNBCIKT~sW4*zFg=ndUGaI*j^~I2nW_4keR<
z2akf~Y{<fYqzB0+*~sN-=J_$Og7aqY>A4pqy3*6rKRcOeXGa!A*5EaQfa2gt`D!p0
zk~j^vtQ0(!1Q{anH5@l1jdzSA5(X}UOygSUC)*m)Irg#Wx;^fEUe1hd?)Xh&8%JqQ
zkeFr9LY64G8fCu1P0Ome^7vu&O0(pjtauSb9ixI2-9aoZgBcCTo?=fGW&>yZ=$+f5
z;l7QR-^u*;U7;o0*YCdFyx5cTucLMQ|M<SQaS#GhUAgtEy$gA*%`XCj=~TAd^^s9s
z?rxn$vV2`B-4@DH%XkpnZZ6olwe`+jN{?D_%;BLo+qDj0dd(O0(YaCrtVz#Nus_`Q
zhTZc{BUk6FrFODX!L{TNHAp%z==j%pZJkOjt*R4S*5}vJv;#M8vuA#VEu1xR=2Ey?
zh5?l(+7x16>b-?NBuO#wI!yF~F+e~a7)ayGA-@r-t5n9IIRID1u;vYhC{oZH2u*XM
z*SJOKM3_pwe=HIkJ7tezU?8d-=Eq*P0luzD*(45F9)4Buv=jgoWP*Q+vO=pfZ$@A^
z@);SB7C84Wk|&oIb&fB4^LPE{$`3F=L%5@aL@yE*82#O!!Pj9lz~IP18uU#8h^!9%
z6drmg1Y02{c6D+YA<aV*#FB8pLqs-V<i|5NTB9#spGyn}b7IMQ_by_Ca&hxs_fbj`
zkegOsWk3PBWD5Ov2qe>o30oNMZ*3AkPe&}EH4nG{-p>*2mA>D;xh=Rk5<m&?2P#>3
zQt;Sb(n}+ntcjfk$`8mN^`MbiJ94DTf*#!939uhlVgfC<Y-tg$K6`sEUNa&YMMeVK
zz^A|3_MWPj1%iTRBBiDhR9;P2g2M25@DpV9g4*}<3OwUqFO&)%ycbmDEaBYYm(RjW
z9SVw!iPA}xr6c>)Etjga*pYARmCExas)t4+1|T>wt2@u;=}&8dVvVc&hYcn9HF=tP
zuPT=XAh>QryU3oKVUn_jb1Lhm%{fQ7;tkP3)UOsrgQKsJ;Gl}%UmA^7&)#y)+1>Ft
zj0^mp4l2v1;9Mi21@6=};t_Gu*}51xbKsvem@AN~{$=s@S480{B%*(yhC(r9n7)Xy
zlfGj#>nUT{VLJMEYw{J8LQk<%s*_4|^6n|)dbmSd(_c}SF`e`?Mo4f*>4>1wViTuj
z+x*v*iJ;0c{;{MJZ9+Rph*W5BX{kc1_-wqJ%$6TsC-Ka=;|Gpi!uA|QqgqwVAl>~t
z?YJCyRBmK-sB`dC!I9=yqST>8F#epS;kwq6)y{!VDoWR^00IrfLzsw~NYk`W5~rp6
zsf`_fXb_%?9xAP%VfCg$pcqw8+?L<gf`(%W)6rs%2%i91m5UY#KyR2D3r367(!*%3
zIY$_AUHb;3j88I*S?N+HCbW2W72eM=9j<-8S_oOP_?=cfodYt5*cFfP!x-|R0w%!j
zt=uH86PeZuom8HH960`vl*drjA44!?&FX!tf~8g?TV@Gkd__#mE2%Osp_di4zcJGu
z>e9^ynV-sIRMTDEx_Tgl-_UboBIQCY2^jzuA?1W%Ca}T0ri8p~)t6x>0v!Y<W;c*F
zp_>8s1NdDip*YrQ`Oe~R#-D;pgty&2J>MIu!IkWJs_6UkC{n@U;t3!mSQ0pNPyrx?
zXqn?n$VC<7NnFJOxaW^Q)fi!MMWiRcvmPuUC8NSa40|*F1B&C~(aanLlLmsZkUzbr
z=ne-;gwh#HNKF3=1!1k=w9g|^XQ=A%108|)PF*A~%wNa51~aXHZCrNtOjJqS5N!~?
zTSEy#E--y~nrJ2eU1w4>!Ps>c5H`YK-;P;!5lAPU^zoF^<ZR>fktlPubM=$|U|dc+
zsOAj=TGoqxq^0Sh60R!ucJ)UrrDse}i()SR*nSmoF_18Vh5#7K7P>G&%pImqWnN<+
zaCGwW{AXxdQWBY7DXZmRY}x;4&8OqyiPGuy+s6$VvCAnJ_NP(p&bx^0hnxJr_R-G?
zL7)U+4A>q32Mw7ZA<J?o&~Q?xXozyaC3+#^WwE)q4BRq$ehNuZfpR6b4Yn4(T#RAy
zbYK7uQg#>3hV`Hk6)uLP4r%y_?eAz9ghn<NgOBI|uaIUqLH%O3GhlyVl@G_Lvuv+$
z8}o-%^oiF%3Z<BzV5d&Bbd1F0U6+HI{{8d=&W?=Fm!Y(~qihOxmSu{+gF{hUJVLEl
zh~T~&RCw9@K%Xt}mGW5edlpks5;63RX!~XU59I+?0sZ1tSBsOXb{BhurdtSof5Tj1
zX+WROaohoF$75zlSDP6>mw@)_RO(~bkr~C`Vn-TNsb7z3N6W7$%;sp9DUX%boE2Zb
zuq%mqc>3?bEo;obsXdQX8B(E$oNZkm&EqJHYWfy>IQ;dzY>p~x^hgiC73;jbOg3Ar
zwEy!*<L|W{YKu@F7v}8FXCW>GHe=EFr908Jr_EbtUail|^TnLWktKC?ZR|GUWa;{k
z{B*o1U%Q>WdyS5K-zeJgT6#>n*rLbojB5$?Ys7rr<3#3i<?<w};!@XgJTQ?Kmns9h
zkOe9?&4Kk=Plh-0?II$S4201IH!W(oqyucik_$PC9+rc;kYMQH!1^&V0zgJ%ILW9t
z4iFt492A&gOu;1$Rtszie8j?sb-Y6jK!R6dehS0xB`$BZtMKEE5U=ft#*>`;tN<&0
zAx?tla!AmMU<P4C;wGx3Ns&b*kV@8}0AVa^9gjo+KmxEw#|~j{9O%{!Um*zxKOs~A
z+VXT)QV=?K!OXW9Hz<BR(+W2_uJnHYni@aSWYO@%y*3aK$V0q)Nq1~j{5I%2Z#}5o
zID}i3|H)(gSutkXLNkV(F-mGFjd@RbgyKZj?Z;HA)oo8S3L&#nmnk8p6a<|?J6;-O
z@a_*4RdG5TEl4j!HA2GvNzj1y$nZM`a(Xz2BF5tdaZDtQF9aevvmF4447YLL-`Pm<
z+Nwe9@`ewT{k9j^%mG5pH#gELaqQV%)>NV<nqy1teCCM!+(=MYUr;OVMye=_>HFiF
z!o1t!xzwosbtoeoAD?=2CAu(wJwpjcPieABVSy8RC~&gh^aDx1nO2MkfBEhJSzNLG
ze4Rc`_u>0~{PFWb*JVc`!TP9&0p5Ja_3-`g0IrD!zKRecq1R1>F)yAS9}5&v7hTq}
z)+uo8-8Nn*l%1mC@>*7ztCW<79dulGw`$&YECRR!4}*`DXs9Id8hft@w>w2#&(U1C
zlv&&iv&$9uE#>0Y=Jkd3%f<u?(U*Ud$#e*44!0t1$J^;}P|Ub`PXv#uv%K=QLcHlu
z=OdrhwEl;ns%V*`gO{e7!?9n#sH5-pq7j2XKUOzAJa9V8?Eb=!3ah@$#!5Zes(iZ$
zl@(8p4{&n7lTz#t6*)0_>a<V96MQ(`8bgE150n+EU#IxQK5`9nJ*4IdnB5h$A3bNS
zk=I&Lrf2&*Gp^Ys(|<pr{CBYO?d{9iN_qtGz0=S_bcgYDB2H9ja@|<UjXU=0A}x#H
z({}T}dc$uwNs=|0lz*$fyA@>{k$HK{Cw68#6Sp5$dY+kAT>P6kUMFQyo{I~x3Qxwd
z&m@g8dN`4G%NhRS?*DQ=!=562{FkgnL}s^NHVL=8qFDXa=-v&hRntI<vgY>FWG3aY
z&|OIL%JJ>K$ZFj~s#P}Y(Nm@=vqSh=FoO!Xc5gK*o8*I`<y{Qdz1`{)pBu}Y@?sc?
z^4v%oE~>1&hSf^`dKRiB(SA;O)qNB7S;ycI<sTB9lnNYO(Ica?Ghh|j*KxrHo2HND
zOMZ&GrR(GsV+-srM<6+<x;+^lDKJ473e-^mo8aTZ-gHUnFtEH$XK!teTuHn#&St9m
z-~Ad8(pQCr+6_H`GF>Uf@UYczv-tj{@X4<zbQedq_Ij4uv+xo&D3mio^-`)g6NlVZ
zQHoH4w_%|ELsg5vG*&z{0Gf!4|5)^1>GsE2-CuIlD=Y5rV|cjQCrSU}j0Txo$mei}
zBwlz_D!OtzgK~Xx%$Ok|F>?p+QQ-vmy%X;+xW;=+&`cjrV2)sbxKx+R=KhWNFO!2R
zi4OpHuo}lR|Hu>A@kM@r0r}!VoaWy+hAqb}+n__pwpds?Wp~yKcu_n7|Go+t!vBc9
z-+?E692iE;(G-yUV2U|3<7s9soq}x`r1c}f>geIw@TBFa{lxEP`r-Oyxk(HhTr-*2
z`LOFDdfWXF|5hx0*utfGMUT0PxS!#X8CYNm`(Ojbkk^yo2zjN*6*?ClL1c&R$fzC+
z1Qgl8dvR%GalIH4h3CKNq|LA}H!$NWI>sbOt8f|p{dYQk$7c@)ENT>RIb>!q9WO53
zUFN2LySW@=)A(^di?-ID-%9;FF}=<HJ#E;6TF-`0g}`Np`UhsV*Tz44rt7DH{wQ1+
zS2aYBrDgqj@$BVxvaE7x?dprw*xR@4BdoJoqWoM_-&(w!*6SiHDZSjcK2OG_Dzl{6
zt;w9M`8(HMUhL5~;S{A`VYm8-ejI3|-i}`P-CrQeALc8hX5}UKJ^kgo=q~1cGuFJb
zbQ9oTTE)EU+07&5e6!xspmFs)otiD^+j2Tq_Hfeb#{HU4h)Y#m$0ax;|Epz%F8Jru
z(({4G)#HB2S>4@vh4xKqsblV+4m0KTny5n9!K;;;tEEgst0H+mqsclyelAhX`%U#9
zsl$~d#UlE3onGU|Ty=c`w?k)l7OnO^KZ=G=I^A3!RCdouin4rdydO-8s3|bEdhh=a
z7GTobP=8*`2>{5?n)nP*Nx+@z3Ln})H4G*fn(EN(LN)Tib7J4h*$itFzX@Ak?Z5V{
zdipf+XT|s^wptYbw?Ov!?uLX>C^^@*{U=Lr|0NtI;!8eDeQ9705M}Z+*^iocT)4<m
zQd_&_kJn@Y2b0zHM$3z%FLT^os6b~Evv11wV4`nWIC`Cfp=07@jz7WF2a4q1NrESu
z7)=6(^i@VoY(Y>ONmAuFv1|5$=Ya^1fDfJ~!}^A!@)&yx+AFsxBiQ2z$e<>4sqd}o
zL7D0+4Id!aZg?v9dIBD)uC#?Shv1Dtjqe;H{ML$~VimlLzpHt5iM2YJE~fJ4CDqqm
zCJDI$xpIV`AfCnGZdnqUZx3;1yt(g}x+SD>9qt}c4?B!o$Wq23iPJPr6D;)06stD)
z=HcOI-^Gl|9^eLT=P?=;n$^8oTBj`A$uxU4Hqxmk@#|M$!$Cf>w3_HecD%B%(S93e
zdu;;l*B?hE#w^F)CXhLDuREu4B@xX*2jyQ6qeqlv@Jn5^4qtEYbv1Vx=Vdf+*}Q7&
zfe5siG!tb;dK1*6gE6}#?Wbe-rnSXOcJ|6$AZbVYwC-ty<9PZf>f(~b8`tAolTPjF
zSzVvgMkUj!%@<K!L5FCDDDctaLSSM7`}6GI!fhfh+FfHdCog5~aqPuuvCMD2<r98x
z-sfjwYu~tp#Ab}&RJEVSqJtRXwD*iz;@dmvMev1glcz7xqV-P>R?jy5nOR$_6mJZ^
zFe(dJz0Fv^-dqdPclkm2dz^w{s5s5v*6Y$w#E;K{p3+$OVNjxcdMbl!*-YEtxq$QU
zo7-XaMe=F0laowObJNXaxffOeimompA@>0Lk9QhJ3nA)-Ts*kN28GC4LI0Ym+8@j$
zUD(;OZ}%3bbcH$thN?i?zP`mt9ykGJ&R%VLZ>p4pYXg%SU%ZXoeGH^Pr*z1~z`*1R
zZ>QgfkMNgA`96{k*ZzR)<CQNAnYEdPIl`OfjeSrGRKDbCaG*8KpScHb?*Lkttp~0z
zuh_5G(^j81JO0(T9wGa^u?Bz*cy1(Ab?On%(R>TjSXAWda%;iI$1HRA`qj<1PoFNk
zOL}U~8y_B4^Njg5-uf8FYh$QDkpqpc-aBd>wG7Y!e`|fFq*AcAU2Zfs8`6Ebs%ZP)
zrE}r()bl(y+IIi|UPce06l&H0w_jn~J*YM*C`()C^TpSXLClYIKGhA@nGy9p);I+y
zefhXQIj;CeC_;}#SjX1L0Nn~f+Ce>^PTv%dNwn07vT70MI1}d~Z)j6-*Egl+cp`lb
z{D(!FI6GqiI8!nS-(my{7mtazkc^NP$aKNG;^Ko|BbdAJ1MSL7jcC)lxnqx(2U>hu
z(a;{U1C%iE^<|pAGWYFeiJVS#r=I*g%8c@~_iKEZ3*Vm54+f$qf{^4SlXA4&|M_wW
zQU?NZ5z5M6)u3}`uOYA;jT#+yLkpqhZ(7(o+R~1P^q!8P{qARxZ6C_F5!;ZmA`wZ-
z_n^v|l9HY0O3wgoXfj6?>A~aTa~%&oM53@+#n~g){9f=&xVYgWyku8A6KSWg)(rk;
zU#W;dOPSZ*+V@k9wKBVgyyp0)r;=vdJ{-EeQuT}EyWppV3w6xlu^EX5gR6p^F$Egp
z#E|wOjg>)?L#=G#(Z|QlW<){#u*-b*nnGLi5I`z8HK{S8KtGcE{O7~a9M<Y$nq?fR
z1h~FE2QM*JC@C>3&Xdl@-SUI_g(iSFG86(rl7!mgxYABV!RdA=1nMqN>#RspDa2Id
zknPq4(~kZ{5;HsU+IR((F08?<e1Gy0HN=RRWs0qeA(C?gLGQRY>`?{WE?D=Qj3@}F
zeUbR?Zniw2(J9;L4c}r6{g#@M9)4K9rhrOrggw$bXyMN`<cNkER4`zm2q(a2ecfUB
zC}!$kd7PV8!%)slGWWs+77rnp8wYhTjIfKGY`op@(<bv)kx#jHgmG|@g-Ib1LOV>|
z6TcAbrx8^{rlB;ClTw!4)Do9UXf&JVdGPahG_ULHw4q-!A7}n?5F4=&(;ptj{XFN!
zI9_<oXO?HC?<>fhnMWcz$9&hI+jJ9N9;s43%>5-+<p$|{uy&@K%<<o^s(1(7`Ku|~
zs>Ael4PGY*;lAIR{<_*@{C!cS#yDf5!h3{m=Zks{{j>yTnU*wWX|Ji9gj59`9%j4X
za#~n`@&(49V{Lp-8sa+E<1Lm_#jJ0JbQ5Fn4r&Ff6vjl1H7b=Z^~-^27V(9KD6tFF
z@U%B^8I06PJQd$_4On}A($>D!=WD06EW)n+nwX%jO!PJXD0m=4*TtEG>>z5e7>?8D
ztG^fq%lNuT%%=`*qJDSrE6MmQRe7mZEU}?hy4?1?5NpMPzU!)m*vrb))MTwXcttG`
z-^`DgIj%s_bm~za(Gp$4|DyWtyv)$AGl}(0*&x`$g9fV0)yn$m+^5dHTc%$}n`X}9
zJ{j_kR1te%OGK-D;Foq_GHT$+oM%CAjJ9U^va@_gGG}jc7%oAGDl@a0Au-<KMP>WO
z9B~`htMOJ^vFeaJ5;?AM7Iuot!)HE8k$H=lv<(-uZY0pJ%M_jZ=@@rCOSv4x=i8p)
z<ic&uoqt*6_&s_=EZIQlYw9(#_s*7<cET_AlT|3@h~_@55CCp>R>^Xm%CCWsaj5ri
zI5hwLff<N<RBw6B?v<P)Y(sya9~c-4Q^*B2?z%X9oXdUP(CoG%Z<AJ3l0eh8R{SM3
z<>#U_c_)@m)E*ixiYfPpFaq@9`m{*TWCyIO2{lFRvF7YRY@rGlZPPlr3aGah`o#z?
z>Y`D4Uxu^@vjubpxi~^8Y$V4v#}^krGAQ%@S1)i7oQ4jlqDAtW?gDAA30{5?U(D(H
zsVa$Tq9TeL6a%!zt*x}jdBdR8bLeu_WxO*KzTK^-d2t``ahmtTM0p?)lC=qBg_GC$
z$!-2|R=Xobj;^xJnMAf*&QzpdfRn4S6s)a_G^3aUChy?D0@xwsU7e}f?oidfQDG<5
z4(-0H#)W1@a<y)J583Mq$K1b@<m9SGB@UMMR!94A@p8LA<PcJVNbdip=T+5HG7wph
zV-~=uBq{{ufYI;guaU*2ajoaN++s!LsF>VMgv4_ev@>3(sp1=Umif%5#KCL;9ExX;
ziI0_&aUO<Z1%v^C0^b@tE>6)4>k*8XQ7U9QO%kErp-Q7|J_Xym?N6o<UdF#a9>(s2
zaMDzR#2FIOy0uH+V!uTv&9E5*!no`x;!dl(q{D+w%Xc%5DET=(5D*-vHV>=je*t@&
z+om5UVRl5tbh*-zK~r*!qxL=>77384Hag+6&q0!P19*XfL?&P2ItuJDczb827GF*#
z#mEzdUXC6DI_LlSm6g?Dyl`{fg%QXwoKl3Ze=Zbl^iei$+*Y)n?e8l2w%lmG;4fK>
zSDE@)${M9r!B~OTjCb2uSTm=OYsB(hzoIKv0qelARMND%fY^48<8>a-I}Bb2KP}_+
z+PucMQi_i672t-^|K8-HJ29X3=f|7oH(Tast-1c0`!UEt=X#JqKvsI{ZPy!{pVs+r
zb5@Y)))#;L{XlcJoA2>p-9Q#P1UyJxx?CSB_r8}9U7^!Z-KfLSp0i&spw5xWTH9{N
z^UFF(jV*SK$<;fZWV52gbBj8B6N`J<OBul`_;)YQw<y@_>0j{FMC~Z+ce`fGgFuWb
zT`{5N3F@b@X)~fBTh-7(7=F|svkgV{wXT;Sx8gw?RN%5wiHojrbsS6OK0?pZ$M<l}
zXs7t-MctUjN>Qw)IC<`K?@%TdaHd!=*m8ml|7kMU!%gqCfUkB_a#9~x<8?g_S<oIW
zuofqchZ=gof$CkCNZL~`oZHMYKRE-dL#5FX)f72fU1avR%h_8R>3(q!*A$5-K)c#M
z1_uT=%>ABze5t$H6;F9{levDAn(o&A;MUpE`opSd4`7MPhe|IF2$fRXvX%W;Q18B=
zH<X)fe^SZEOB2>+S5+=TBXpR21Rh>1EZ}IsaIqLV(1|jlAY^>Y9}f7m-nGIdGA$x9
zy?1a^%6V~fL3wrm9moMdjflJlMeUbpCnswnxNh!7*Ecs?vbf>QL<xpxhrjeq2cSJh
zWIl3VUSUhQ)opicsl1b89!rbV3H_Q7Tn+Ev3KPkR<gTI5e&^$!u3e)B1eg+^K@*JR
z7_wj8c=HYX7>Qf_^ma*eu8J5C?a2|%fQR~(tfhwcOhw-2Je$1a7~X&4lzCg1{^e!!
z@o*Z`M@(!=P(^(g`#aSfomLN2>D&?)S|G+<BY1U%nOyUeldS61evpyOsL=w3&^6=D
z<u4;S=EPXh?6!l3*RB5)WeYK{_p`r~x3>nIrrb}8iLzr2$y6%zEHC*!LoY1_b;w!l
z3u=#MX#&{i&TY`85WJnIMXf0l2dgSz9Tb!kN~CL;Of?2W5Q&rusvgJ?#)uKzRr&tr
za5OkHJt#}Of9Yl#lXU8fXQ-~!F99p_)lQWz;x@)sm8vghUC3B?#6|>BpsjA=30!uT
zHG!s-unB$Q6BvLI0b;j*O9Afn!42K?QcQVQ>-?74Wq&zrEM$K<GoaAqLyTzT($)TY
zbm6p)06P|0GX;2}1*Zjc;!`X2ENggj4mw5J3lm53B;GXI8vyRcO?6$7c4Dehj2hyG
z%~~{jezn~OXFx(Y`p>yDB66^n_EcYY{}#{~sOH8N2=OKSg`3z=wXwvye5q(!P2|3n
zS^fU)>SSRu&Ep6?<<C8W?5Fcoe>?N)yWjHc4a;RuezEE4&1Ywu7hEpL$WIMwJlR-!
zLF=S*<;?JQ@mQ;DOC<-xFOa~m7usn;7~s-9!xlP!U{D@^oKV>qhT1D&+Zz9}Ui8q*
z_{Ws@QZwX#b)X;Go5t#Lbff7b4vfA2G#lT?JEF&u*(7&fU0#}vXCIXH$r$mAEtN_I
z)ug<iFjlr?bBh=DsML)$7T^(6Q7+#esex8%2Dh8V4rGZntqf)P`Hj_G@@*NKHMo#y
z51^GrGcTUodf#?UbWn`$8D;D19P7~!m+3O{j(F}|KEeZbe%-x4@GRFH`)>SVNw66w
z*(;MJ)LA`+jm*>e73V7Xd#afgNtWM{M5~z<du3d`T$#=FQb|};?msmlYDiqe&4<jU
zxJ;|PpEqcAH*d&zM4Fs^y=zu$dbWgr%vHIvAm%;nC`H;oS{7wm9=6{;!uHA$|2)g!
zw_WnrNrXp4K<nW+n(Kj%EJ#(kQ&68dBCc@S?aG<IX<<=7tx8J*-`(*+!CT&l49^qO
z$)`?K^`|iMHd=HzP1e(Lw%F(OpDKAbPyjLcg&yNO(KKYv@HP*!k-2HTA<mE6V=XqI
zGWL4r2&PQ+%xZSmIaUf?=N!AVxAG7yr_HYnJ<F#ow1FtEsSOE`Kw!eGbXs~+2Gb(N
z3MpdsQkex^izX4;`-biyipD+weFL2gc!TP?mI6>NPt26V@D7B30Q~SF^Z@zLC}Kd1
zbmUhaFLvj}nJ||m(mQ7U2l%5g>gu`3|Dx(FgW?LdMLoE?J0S$O;K2zR2u^VKV1v86
z1a~J$aCdhI?(XjH{&vp2r|R9VshWcMv0IjP@73#j5kyJ4>zGDzgY&;?@cf(lBAk?_
zp}UwQ2XeYtymOKOJ)dIcz%N);B$!<oM<j0W_dGWR&YJ3^wEuixcx?KVoABA(m1C9b
zi&|uzb89}0lU@sdt1pF%_brK}PL!nu`u&N<Ub;I{<2+SGd^;Y<#&WYx(B;y(ykyn#
zV~OK<JSW(WCtC>5DO-)m9`dh50V#T=e<5(X0^xKl4LT8H*k2BvyMBgf6Sl0s12^xH
zNnu>hEwLM~KPzLz$V?+iL?sf+IH7j)gz+GSd&cqRNU665`(*65CPI(FF+#Jn8gmDI
zHFSRLW6mIG>0tgD_$Oyu+-0kzX0@+BUsYwnZi<W{v-0GVIrQl3uIl^6$-}_2X{Lr&
zTXeM{SY55Vxcpq&gfxpeN>sXlcesF;YHswn*tCN`X-u=e+M|BCql3>z<n~5a+kD(1
zbRtE@zgVAHx}cd&jxjm5{bJ^7;ng!wpEq;v%%cgXIAp0tn}j-iV0kx2l&|8uGZSCS
zgupz%v(axykskyc*E=3+DBZ>CJWWdt9o?CW2&N+{3y7L`bN2l}neQRz*Y2u>A<2>M
zwd!$hO$Ul}rlp!Fom6-ICU$H^p_4w%NrIZ&JeqR2yxAhK7JjnyI6o)ZJ)H0&@|q(5
z^+h^uQ9{jX0vm~>t|RxC7Pp^|$`UdOOZzSPZ;~*z4hD7a&F1b@jwJ%-unME;t`Ca!
zN8=<Z)M7miTt9r2bkk0?O0&ETexE-{t&s5yw=JiIs_74pRW~qXwi)WDc<m_8unZTp
zWQ40v;W^<kE@$TLl01*p7!PtY91oeX-=8R&3Mi#6?N2WmVXDlmkr5CymDp|$iUB$6
z%P9{iJkWk}>csbFzj0(!*ifaKSuwMKvfOa*)3#W&N!B+Wd7T6erc?HEBLyXkixSMA
z{Cz`z?C0POtnJ-v`NdkF(Pjv;X59ql*<6jAIKOp*LZ&{QH(TGO^fm1#n)6CqwjmVp
z8}939g{AqTCX8P_!Sjxz){HaEI#W6IH8Bm0>fL_TSXKxl^o6fk4skFe_-cmKDr8|*
z3(@9C70{4}UWtgfMvcGq4O)_bycAI_df+o|7u|iSGH3_fCL#xMKlb5ZP?i)DqQ#>K
z3Yn$GevFOx7<*qaHmLCwQbniq2AaXdc5Lv;fX2Nt(4`U&*Mc7lik#xSb$<<{y08y9
z_lJUDEQX4C$5ZZ70dW;Z;F6~Q?8-2BP@U(RQrAPHxTVNDsu;D)SS@oiaN6$b^GTyf
z1Q{%QqUm3r20P(HpLD!sG55%3>8-B}<DJG^b){PJDI(h@Qcq$a3Oj8ElIw!Z>`5ZC
z6Q^DjJKECgf=In$sa!5B+EmvBtFR;PpAnWnj<)0L@&$x5>i4I=`@p$lVuY9`uJ!%}
zH_~ASL?p&^wVTsY)_*G<5Ql|C>0EO$TVH=n$y(|#Wu3Sh8fG5VPG9G(J1#27YS};Q
zkrTY`{_X#K<bJ=O6Ehz8#pj0r;gx)My5Ki%wBy!4jE_g~<OX}ApTYJ`qZQ0fAv>J&
zTb&p&Dyi{yq$F@3jF2%{_K$FU-MeA}Q!;MRgIse7;^EVcOZbp5^c}KqF4KaXqc~WP
zgPw<-iSgY|9tlhc$Vyi#3vghXW1+uU!ahr7HCt@O?2k$KA|P{^E0kjaoA{@i`}4P>
z?SjZG%}B^q)o{(*hczOJ_m{tef-g(Qzkm0@1?32>R~FRO#=f?IiK9e3r`Hlgd;s6@
zN{vQ#uF?KHVUv&DZnvdq@<n%@Hp%bU=|;P;@hKr#&Rho0nL|RuSZHjt?tSZT(d>-p
zyEd;=)I6SQ^?+`w%$A;;(<gfmJ=Jx+8f@59c&wfu`84(dh%z;VHF|kJ4h7ubTA0Tp
z>*%)qXJ6D$GR#_?%r~Rjog5082}l{LBxw<H<O>oZ%xAI6ew1LW+v*+sMcU8uxb2qL
zJzPr@M{v1HXwv>TJM)zCI5xI<WG}(YS@I3jB1BDsK@a|Z&u}ZH?OF{@YuH+()6b8)
zGUU0EVS`>we{kYxxl~Tz8`T<mWvh3t(?jHM^1>fOy<Q2Dj0G;bzm}kFP8hV<aX*RV
zJ2Y!9D%}+^M<h&J&y(0Xt(xziX8(3!Ov8aNk3$GHb=r!jdA%!-V%Q$Rl&yEi2vuWI
z?AzWC2`oQODlnb(#U<0sZj<>HzwxuD@sQBq1A)u&L&i-%cK!XQmhYWg5i%OIxuU&1
z71?T40qEndcD5L>;rAz->YpJG5ei0{982y68rX2)<Emo6W}}zFEKx6E_xZcMQ|c+n
z-nZtULnw{UnUt&tSVnS`u|qCmf2&SH#3!c-6f!#wCx0ZsxkY|5Iz<)I<L7Dpqk)O-
zYh1<y`8(n=AbNab)`^#g!e#9YhXS%gon+`zwKn}~OmlyasqE#jA&ZEpkpKN@krFYM
zhMTvAg;PFU#<Fca*>q=EdrfCA_^V#WqTBK4V1Y%+!h{8k8qCSFalo#-RV-7>Z+^dt
z8FQ3ene(Du-HVl%u89^d{smNR<0Oi-H}^*{Sh;(-y+~Doz|T4DhRTPLTn|ZZISsC&
z%nF{b=AM)v3Ded(E+E+=m!Ign-@@t8NsH*AgQKJMAv81dtte~R4?S6d%if7K&-!VM
zJB2c@!`@3p-RWKoUi~|P;no|bX`VFRYy^MCp3e}2dfab9-tS$-VYMzPPcMH%1=Cy)
z@5lPf^j_NEjL0><%tx3DuaKR+%yiPf&)<Z<?|oO}xGjpUeE1b8_LIZIy~cg8{pQ)U
zC)NDq+xHyvxcsrevrxg7jTSzaMqK65zZss3Vcw7H=_qu??|MII`*gIHGagIAt$%26
z{eS}(k9Gj{+tn3)q9c{35S=}89Iu%7$BdG;e6yQ#M^3cU#=YE0{f0#O>>7Ww%LuZr
zujtaE4NOOks5(>r$&(hrfVce!FSTU)U37L3UsV5N=_$jghjWZ|!I6sr#jcVriQE&K
zO61#f%9NfOL4;)7RDA7MmrGf{Fa<<?etf{sJXzS|WvSCfiDBK}AFD+WTQg;y0^^xh
z=$WCCrpq2uV|R1DUdg>S?Qr}|8umwkZ6&3~l&oEuKn^#u4lk-)*3;T$-V|2O+w-i?
zG*Rl~<d_`FoD&tWze;}zN5a!hXF>^5$|8s~r(#GU#|5kVxRB5M-imk3j`fB28?u=Z
zSoeHgAFWKNa!=dc<koijMlTPW@B3GDNs~%!PzZZ&WWp+<^i=n`=bA_H6^{Nyj5Uay
zAcTsPP>;T(Mt$F)-XsBT3A;81`%n_j*vjCP&G)M@1(gi>4ApiFFHu5OFRwaZlYy$1
z0KUp^T<W?(qsxb*m6~u{@7n7)C~S10gh9$I?RZ!btmCIMyJxNa-gmBH2B?M^SnJ|r
zp@@}9?mKF{SYMVx{5Z3_!te3jOWmP@ZI#b#{t6#Kh3xnxKCOYrPi*Ur`0;S6V-77y
z^`C#Fx?9th!U)e+8oCS$mLgv8iy+uougJu|2RP3g;z6nicRfS7h77lTaJ%by#IH3C
zn2?5{q(Gl%i^qg*HMMAijxV7@&ABqBVMaFa2`sB=`en>bwpU!I|85*QBnqOsVmK)k
z0q1MFJ!1){!}IRrw;40lk1+T|!=_6l0*NsgK`vFOh+BQUhmU;_q+Vz03}X*Q(Ni#`
znHcAnTV%gk#{~Q}12q$B{n^kA-=NbljfeG+(uS%k+$pM49@L3nVLjmTY&JO@l6>(c
zI?lf7A4Hd<#)9&q%g8f=wYyW!!(2kmsg5P<dk^_^p1fm_V^|WS!sTp_;4!|{|GFct
zHfGSxb69l0B){^3LfWtzrwXfLx&?(<rq0n#;(49G#nG_t+@BE=5j=H?mmy{)NN-lW
z<#uRR%*Ag#3p%yfbB1mo-<=o654TVS4=422vz>dKInhGORfhGlrQ`I>Y23+Tq$jZ(
z=DOJHDf;;=e32hd2DSA$R1t<cla{6|x}F!!HwS=x4IS{w)nLQ9ovMF63A@Uqn+|!-
zm7^>3yhr|tWMyRd9x$o0D<N3bft|)~h;F^c)o5QkrZe<OiC%)jV_fE=?<Atbv!jTT
zA&M9_G!MI!4}&2@2n)yDK}p}-6E~}9VdB@^ne~T-vjkaO*k<(zzFZB&vlio%{x-(^
zLiLD{!nh~EZ(1+KfkT;@oHF@=&D9__Mh$WU-Tf-xT~*ou+8F`4d<|h`r(2Vk>H6kz
zu)h$OMT`m$!sTnQvvO>9Y5;{|*a)?C7CaOQ>=Dv>LxW-%>##d@YMe;jMDknn(<a<K
zjo2=CGJdhju&{}%W%g4p+UaPibdC>90%c*+$FhlUp842bTX&({Ga!<MSwT+uK?c=f
z;W}(k<9lKii#$WMGhDEE6pAw4w`BF8hjKPj8%mu{itk$)*O3b$!|O8j7mHL7^t$tH
zsi2p0Bt$xS@EL|7B`35PY@51UAx(bk_~9)9wC%i?P!3K`$N~|hO0|VR+bPWLYAcLh
zCr{D`-scB(9D4ULMic3?oM~!y-ul0+Dy@vt{hE`X-u>FK|5U5yp#R3vabl7SjUfW_
zRP|EH(R=^kfD@XzeVU<&98v5S^)V$TjDM!0oWisqsF1R(Ecoyn^L$c4RDPNHVfBO1
z+iyipPQ}VWx=C#l-%tF37Co*!Xb!t&@Cz2WV-fLzfe0ExO)!b`6v$w8(PXg@n5|OK
zqn{77JYqh~YqS2%;~=Zl_f_><>?z8dc`^8;*_3d;)!H~?T3+)kly)5E^N?6tt61s1
zroge4Xx{k0M#XytqlV3^#oH`c<0*Jdmp7>HjR7RGjJnFq2c?M@XP_?DjFio7#+CvK
z<ANom8=mSs$`)iy5XBVdw7o<1R{v1-vckTHO<v1a4)3hbmwg+HaYvUiXKlMvqf3R?
zd%Wr5F)g%+xj#6ZVb_cP#Nm927yO*1-Z%`yAlcvTC2ff9TazTFt6Dt^84iMUWSknQ
zOWn<SjB(ejyISQsrSPMw1KY8%H>WLHQ?axa&_bWHX;}{!NR}Gzl~`)I@;7TTR^nHJ
zU3xkC{n=QA_E1<|j(olw>3wM0ZZW0O2a+Tn9XUuae)fvTP@qKneTM%b;JO-yX-GMz
zTHUV9qCrvTq?sW4H8LB5Vv_LpkbkTQLGRbcrtEAS$UwfwI!;AgyQKRIXk$u<QlG$N
z&OkinocI99c_m*l4ED*n1})CAO2u-$J)$bHP6|kHuiM?o++M;5KeY}89Waj5Da}wv
z);uauM02TTx@NV;^vZeydUE3S`CjlrFPgF&8DpyXivVtK)$yM-f)JPE)|=QoNC@x$
z<^q(tnu-Nr_xdUM-a}7SEnP3_ll-i-p}Nhud?O=sv@(_AF^A5ZXsA$JSD%6)Y>^K3
zata>jrrAdb3!PpWMA_}K8An5L#<P_FoQDp8J&r@+H2k^d13w+2`By^>?%bmwuW6|g
zDzMGOTxl~!^|g5Gb;kSWF;mL0$&HkypB4)td@N3@dVbR2nxQkSI<`}rw$+^x;L;^T
zrWWK^BmCtT7I`e*dh&FWjhKKsd)D!}RD|EAa;~8QQ~73L;8~JZ_ERz3{OgXI{O6^K
z9g#B2S&7-~=8p)Nr!B<svomNB-#u22LX->bAxb{~phBtVb5kY5h*$7@rrYhBaQsww
zP-xO?yFzyI9#iG8n5uT@<52o-dctb7UwJWKpcGc&<IafO>qKQ};k!^>Q-P(y?22aO
zvq;NUjvM+sl;6!MGj-WSfdEZfqrOn>b{B+D<FNkhzuta}w`y66%;UpOiOqay$5pco
zMhGGDMU+qbKGdTiKL;Fi7Gv$U2%I6^dpJ=R6B*9{egCa_ETu4m+}ZuJLOs^cMV)85
z?O`Rha_25xO4x<*!RM>Jx9f@FtA1Z&xtzwNqaX*VN`#!JG2h~QU{$uIRxop0EpU=h
zf{oBaX?~O0?kUJa7pa{jle29?qayXX`NsSUHYQf`^lc>io3i{A78(lbstH7B>hI9H
z%3rIi^DC;85JmE|Vmx_6W*)EJe6o=FgU+66pLqrO^)UMqarD=tkl|*@tzow)qsDNR
zf1Pt<;FiGbNJ9F|Wr#zJjb_gOQd=Jl$@$L~m#kNKiqCH*6<t^eFNYkRX0T(FpvDOC
z`-L8wzKI11C<v1rDwyIVpp9h%;bd{?7jQfnn1j$u9s=b|o+jbYWZwcu+{G4NM<CHe
zq%UB7;KRO9g=d-(JWj2C@`)}`7?b;)ZaPU^N1%;cEpE-cHGux-@vXfP%`goETz-&x
zIt)?f06os4j44Grwn7`a>gg9PKD0rJcEmi0;67FNernf)lKdLbt=HD!+E>nT;h#R0
zcDUqP&CvOUcsGH+v5Hso(pn-u*RI@Fyacv5boJ;x^4V~bnL;2Ao6cMJK*swq<m49M
z%}7IE=_N7G@`l_H<G^5w>T?&bVCr~2-LOS<xLoWN&9YlCGwdxc;q5W)eq_wO9qQg1
z;)_!hozebjA#Z9M8gI_b(D8IKs8o@T^f3difI%;mJX33EsNg3Z#kR=puG9XM!30f(
zC1z!SWTNW!%-n#ux&F6dsWqG4ZaPU5Hp4-8w0%BG|H{f?)&T*-5qFobsvC>ENeGQx
z^w{u{0d2#?T^+vb3T>F8`9~vP>HA^)@PshTKN*)La11280e8Pds~Mo=Bjbg5R=?9D
zbAG)^r|MO=$ltdX7OF9y%gOd_8|I^{DyGB$VXPmzQ3SyHkJ5A?sP8?MC^Ap#u_;31
zuB)<<wdk99rl`^zTbZ+?aCE<5H1Kk=wyPk*5$Y40Gp`)xi54c*c1l2iT$~b;p&2rX
zaC)|?k$uIJqwfhYo{nLC;Dg2Yag-1v!XUoCU9o!5Li@1GK<wn_ug1fV7Ju*&E3E?+
z4=#mJPC$nKGO7BBh?{zL7M8ZlGHIPakV!t*JQ6Fq1%cr!x7orUstLbEzJUxL&^{rt
zd)zSKN15)Qsu$vP?bnIfz9buGW|Ap(@0WgNt>cy+A5iBIYQ(s1YtSRrI;KZ=VQ#f9
z^<wP9GdXi;b1qk2YJ7NeTS=v1J`=U!JI*}2t8a${B}0K=Mv`KmL|5iRDw;VRWF5Ue
zd))<zy*=wIMh#c^zI(cFCeEJJKtZ9~p0L(SErc`;Ni$HJxMaHu%bc9|>=UK))`yVG
zhd|K?LF-eZQ#PBDdwhp=MxBp4AVec%lXOe*opnnF%9&BFGGRr-f?fW6UZ}!<U<waG
zBmYzxm#NAPEu}@L6Mzy8uW4f7)Gd|VNgEC%Ci#OHXYpda5$GE#UanQF@=9>YkOYcM
zypP^4$P%Ro4=;|#7uMUoZpn~2`&DOeu<~_(W<Cu{JyOd|@sN5xE^Il2t>5jujV&jM
z-Aiv4A6~}KPdbDxzL0wE!n~Po9%Z2^J2M<4wG+IG8s80&Nxtb|CXEN_x{tih$*k%U
zTArw}4n!SvnC=O_3GMFZ>OuNM3lhET#vRQ&F}-Z7II^ScXVJb-^|9rL_LBuFyqCHj
z{d|HezwU^7gkrgqe1YMzcmDV|YF<=TD91-7$X9hUF8=%{woqBhFG;TKgm^~?oH~OA
z;&J1VuE~ytPK(EYbtVUVvm5`oxQTM^j~ZMOUZpH^ol<eW6;+cx4O%t&CHR&{oZBT2
z!vUL8ju^dl7f+M&xJXHCYYnK6ozstBEWWd>6mQT)>o9UL?T#{puR-xmz_8J{<?2aL
zpyT42s>i#)O>XhCg+}j8lCBenCrs~^NDm*%+TRQD)f<eBY^gD>5`4l!+}ta08Ej=K
ziE23U6}`hO3q~dvO1znzK3ddpmMgN8|B#N_9<e)Q(@6cNt6qSYo_hGuMr&zzcIfcH
zB4c+db4r`avVQh4+@XGx3Nw_~{BHMPy?A%>_@sdDPkU2PQL_WX1KO%nbiiwjGSQ+W
z%<c<y0Z+{n!Xah9T;RcRVTxF4$-*Y9RNB`aH5=zq_Y`s!1pk^$i^u*l=-v+<q#^bK
z!)<56VZCm#dRBsi<QCfH=r2o0e>r?RNu-G*N%#uyPX`5miRMA(`oGxTv=8vmAhcMf
z6k*k-PUhpRpE}s1e6C4Rm>~>dt|wYt0sUYKG4o3f^K(gRZXKzV)&B@0Uu&6B$o?)g
zrUBW<*ycrH%Edqs`d7I~5yd@9r}vLapkxpSG<wx4whv+<U!sIJUXn5PLM82`PiWe2
z`0#;?RgO(_FiEN&1-!@1RE(L>!k1{2=TB$zyOTm{t#uUeM@^IIXhgkh#qe}gmt*k|
zA)~C2-9dhpHEYUxbg&dW3h4Dka51!Tv~X=Nu@pXq(djkl$<=-#(n=JFX>`yKdgrZJ
z=qYq5<`eEDqL$|06lN+Y6wwXRe9<|v-75?aM62vUIj~|8=uLTMfgw2p3CEjj>JTY(
z5S2g0MTK$D>q+E-etpm<ibu%K{sZCD_eTXr9s+`xv#_Bir&6|i%sA)c4^j>bG=sDZ
z8jkyb+rhWG`ZbFKB0;zzQAE9YM^S7J?B3<_k7B|J<eBlv>*^#|7W~v*y<x}@bODf^
zYi`u8Ee1{?KaGmO1GBck%F{d$jq>iK-l{`P8w?G>1Ud`vIt~S5F|<jN8+xD&%pK3!
zEN|g1V@gb*xtG~_<ewk1;8Jm4kZHy+F<h)~+ye4_-}ys619YjM-@X5BzDeRr2tEVI
z5NFIsYy&#;i;R+s>7x4>|5L9!Si2vcehx{q68Em?K9ZjgVr1>l+?OO{^^xD5Y%wlK
zNmjbCMm}vJ!=sgDj+oU@HI**Z0#LM-#^ZCwI&+XHK1U*TN0PhUB^g62L$~X0MVfMR
z)b(A(WjbmGl1sJ<+9+p{xTeh~y8vb}_|K1M%apj(;bvmUda>ajX<i!d?w;kDNK}V&
z9L?AjZ}~^O)>~mIf+<YtMhI8NVwQr>;y)oJ#ji_}_r6~>s*Ic{9;fiP!nr>v>S}e!
zRtRjFjx%^YhU=1Lxb)-KEoOkN9vpXrWuf)IcsuFax(06beh%0aExq0%u~e-+cT$2_
zt!%h`THJY5vTuQn91srT1EoxuC#E;D@Nl*sli!N9@sXor(Ep@KHzr8G%3O3USnSq3
z=OwWy8|C9cRnhP4VdG8uT(hHsgba`5p7t3^C#HRh^-5mX?)nx>&TC&>y(&Ua#dn@U
z@XkriIWwkgT}z|pV2MYexm#hOwdbjV|EI$+f9w9mX@q&@L}(SRrdv(*7WAsacAn^k
zj??K1m_5;I<&M%qxTrypM4h+Ia@8*1w8cxi@x&MWCAvOJgh`MCL~)5ZvfQ<}0pLA|
zwP~JbIo(5sbY1Bg(jkyxO)u2tfSp1O3vXJL8`xjpkv*%&{G+1+?O@V<N+tG{Vu1)}
z$QHM5oGJ6qWw&>eOH<k4b-p>*+8nC>DzI3VhsLYM`&5Bc!m`x2_tY;ip&k85dmwK6
zRi|Z_`9yl?3)dtD#wKR(M<ZJGS6{LPUV+RV6uU;jrt9*U3eWfal#bebdu_4mCR-#L
zHj~S@Jpfep3oWVQO)JwV^re`a8d>iPe7l9i`jw(iv2oI)ayaUj*b?azrHX(CV*JOa
z&}Utj*O7o$VyUzM;lI%IUhema3O^H_GDux7x-|QX6D(+1S)=`0F48i{2*@8C$GMTt
zbWC!LJC8qKEQFnE(Av(8#)lXVpJd#YF7kl0+c4(qQq+jJ_ZRZlh@9q#cH6Gh=ly=y
z*{44TelDr+3)^QnFXT(##u-ON|IH9baD2_-Fz$0plDM+UcYEMw``1zF>k$2;1^o{3
z#wRQxJvN?>Bit)~f}*~#T_%w4unmQM@Kx581<nyq`dH(1TF&nb+x~87p8$k(iF+8f
zJtdtToddEv$YE`aqIkd8xCpS9=^-z3`5EDy$teV@p_h95D<%~+9PCqE1nCcaCjX_q
zxo;P^3-HPKu;%MV+X=bb60(fvhnh6OPS=X?{yge7mhE#9{r6{<W@3alotu*%<uVyM
zrdBy}=K3u3wUUztqBj4$evHcni*$AyzXp!$R^OAted~gx*+ETSmOElJB{@#9+VeQ@
zJks81yVh|XXiqZ%{|wkQ+f}}pWy`2>WLwiEOGhI0)_A)e%YQFbu=u$FsdHeUARJh4
z8~>Iye+jNQe>t-8<mwsQbPZMxoh$bcC-s7TzmDAP;9Y7MIJjtfP_;dqV|h3fuY33V
zaz*^!H8He)Qi7sxf0^*$jF$)(T;g7wztnWFJnrlb)_uk!Oz^d~ISV}KP{i|U9gl1=
zQX}uBx|p?_NSd5F-U~;fFRKK9`aV!FHI8y4v-6}Zg{S+=OJvVlraP-Rp3pQ$(9`&q
zz9rGSUj4mx>d^;0TJ16#g**1GV)O76xT0{Iys^fcv{+}@TlKZ_k509}^t?DM<y&fb
z2zvZ{a@3c|2z7gATpLvnXH8|45T$AZYvO|val5Tr=@#uWY@gZMl5f_H<)oR+-p>cx
zd@sm>CGzK*2lZvM*zD9pbr9mX3y*btP<fnB5J6zD)v{>58>`!O^q<bLV|ru@#5IJr
zya;~Ib!CWVo7D!wLkuf?N!FJkI6|r)*ww5|XDd-@XJ5EqWE^=!&w8Lh6n{%cE%!v7
zd+RXm?<4$eZ&25#zjo5mxCmzJOdz5w@HVXb(1_LJl|Ukig3|4&$ncImB;^p%amzT&
z5|phA@+5^s!zdeqE=EARu0rrCAINv8bTtJd_+5oSgHgaKS-PSqdgXWzJqEd#OgSio
zg+;*T`t>qSS}udvwmiN0Xyr;g#;??{WVFBc?2@~waewc_;gc%qmpVeNk*&bd)vLO;
zG)*RHawP6oQD&r|DOKZmI*_|<iGbo}??W=GKOaQ8B6Kz4NsNl-<Y-o_^=_bH(%zmN
zNyZoZ7r%?;7D`3EOG#~G%RKMGKIjhyxD?SWLTm13Y@BL8>w?;yprhIaiW5ee6`+6F
z7`Z%bH=GiBP~^bC3(@DXhgK>sxsNlDhp;Hf-Bhdh`&qB0`-tt>uC*zOn&VjPfysvq
zH$@;C{RA~Ik9n<$G1|8;O*IBo_SSQ$T{+NwAk<;Uj2itawuy)-IM9ujbWT5<^Fkgd
zmbs2wxt9yFKj67(z<+neCC8<j3ImBjn^fxJRl<K2)vHL*3GcqnRJLCH8(z)cRQzln
zF#!$Y8*~^MU>;kaNfghYb05V|XQdTe_TRG<@}Zcv&lNI7(3N?yVxV29kkFyp3DlB~
zue(0|aZF53$1u&o*xt7Fev8MvVskLH>Kqjrb+5uum$Qso&m&lMUsW}=Xn(rGtOw=C
z6zsev=rzANX?Mj5YX!|BSr(xbs$Mluq`_L_(kCi^Wd0=3X|}|>XtwTs&*gaf*mOOl
zsr9W7Rx&`8X!@Yz{7n<}>~MbmhIDV#*&-gibNMu*VML=|Q@2<OEoBXz2DV41PL00}
zM?{eR5f>yRO#1<ohJw0i*Eq460&{a^7yWD4U)!`m5!sNCLBD`#S{s3qzj5@m#v#fT
zAC%4pVT2GI=e!Nfgp|kS!l32pqHmF9pl7F@u`A!MASXmiY01K{x@hW1z391IW@`7m
z=!LvCEgs)Od^j3dVY>eqAVA9yQfO>H6$za0R*(wbURVGmHg0tqL(lEEO<pi@aS~U>
zM}!6NJ$=E8tTiH>I6fJZpu_mgJZZkFoX(8FPic%DeY)HJd*HpXVPh(IrK6^5sy?d2
z=4^AG$3$k?*f&n^ynL>$Au^!rhZvW4ZpMQQP>wQeXOaly!)_JoJl7K=#~;``rj#wJ
zwwvkS#ABiKI%m7`E!$2|9iwVSr%Scv4T&PwU{n2N54qilMe)ePU{=?Bn}c2(q&*@S
zsjJ@4CLR=<z}r$i)i<ot+%#8NOo~J5n2xYv<_a@D`v8H#j4q)~Z{-Weo&V_iRP*D$
z!DUpxQ~$8#fef1o0^7+cvR=@BxiiXF5*=PalfN!Zc7V~US+KmU9cFEF5wf~?Oeb5A
z)FCVG7NZDsV(<LGksNXb(|o$lzWGvz=|f;WijoK#kZ;4oisq9%S*js#Pb?yZDKoKK
zUQl!XqJCPcW&3DFBICAhvkiBilAc+Ya$hnABL$&Wi<*78d3B}@^``&6LjTO@xT~=g
z+Qx53?a=zdiF&9$-0C%Yta#Dp1A<1xI4wl+c)D=J^8Yj+L{XRg{{0b@n3D3Bo1A@b
z!brDC`CkqlnE^as<do;---z7BTNP(tc(+)KQF2-H*&>olVa=so@t~&i0qR1neQ{$0
zqk?!TplUJOjyRo2+-?;FKh6N}NcOt(19e?Ozn*GfR?$hLI?CPR$Z2wV2t9;3FT9%%
z`0GN0RiLO(Kl!?A6lq#-Z=|nE)~<_#b5mlnuRxtrAP#(=?+N0Qg_8$kGpl4_o^%3K
z!tp)nL)`jvoS__vCU+AhzaUrGQ%(_Y0qMhvKi3+6>SYo4qU-o-S3?Ew)nCBQ-LHIE
zf7QcHDn|?)axvcb)y{fYfokx&>e^*p%rMa{S}N>Scr~E;3`Y_|__jha&msHPN9J`q
zORdv(v&8q@@LO*;uKelji7~4g0HQu3G@U+uqMr9WQ_6P-7o8Hq7n@KPCjLOq2kQOj
zD>kXLz0JnumXI`9H!0D>DPh&A@c+U2`wn|VVFsN$AF;0b+O;r~Ym%ewljp}G>(~@m
z=+el{WSV(+?hrT5YxcD#;5nb*JYQ;Ci1lHeT#fuQjv%QjT&@?c2t-fo4Pvuup||%(
zuZ^)m?YJ^FHY?nwZkAWiZ4!76usQL)??<1>WPFBm;xh6)&HV9sOjPzj#idm;hXlr1
z=c4t}2Go8xu9+pc%;?_aUv%CbSI7QcF1b@4Q7Xa+g`FoAwNGDhN(rQC3WY_5ttvx}
zJgp~+QN1?nJGAq~^KXLYu#_HVCOSPuOz?aVsm?<>lu1|A5eD?6j&GxoxuT<+6WJ!x
z=x&*1PwGzy8b61Wd^;q>)hU7NxEgMWHF7X31YdtIQb8qaboVfKHgvP*sL_OGC@WM)
zxtE<Q0o(V^#XgW7L*2p#brbp^C<-cBorDyFz6Lyx`eJaD3+d~w6g*X2aYTIdJVwd&
z_rINHJGH(uA#pUDIS~3eZ?;Q!8(2ae(2V6t@=&$}=jWrzkpTTWsF@%_yU?<Q%D7Qu
zUlAW-fus4k;QV<!&Gx-30g4jdkx!XGTQ-RzA#ddq`RDv)74YrJN0K@zk?_NOQB}qD
z&qHR20&AWi3URTX(YL%SpHL|Qtr+=;3rA?2-yRkZ-o?IeCVXF6avc_C-hK{~QA{Dg
zf(yKr37gAt8<3@14uhZR@6j$H%D$s)I~TrxG?RhlQP@~@DO6bHVt5Xf@7p2i0!xTs
z%{HQS(yThlopuoIXoe^g<Z?;8w~?AB^z1zaqHdNiZ!>c|p~CP`%vkpd4#IZZEs#U!
zHeP4$=%N%aLt$a~E#I6=mgq=7fo6WbWL}-m-M&r4gD7L#cRPhjQqq%kWUbDz{;;Fm
z^*vJ&eXE@%`of^EhX>te)cGYqaR*9NxblM|fz155b$u$Fu3%f^%37*eyu6HGMs7lk
z@6;20bnjjyQE-0{J@uHivWNxY&riGq=DR!Q`yaWW-sv|Q%u+!{G_<SeVqF}kBISRr
z<3G~CAbu5`$fFFYKdtmW*jR5r?kG%sGPslHD0KGVo%z<UEu$e^)=1V`?%0!-MImG?
z<!zFefEo}N<|n*jC$W2<y7%WVrB7yO!7+E9UXBesvC&Pg$R4LyKY2=Tl;h<$8GU>-
z|Izp-2W=8~2jmTvTV8hVnJ{OX0|*`u&E5Ipt~C~#pA#Qmw0N|NsQrBix=5V+bcrq3
z2oEX)s(|TItm(gO?Sahx^^<cu14J={mZJL?mLkg|u{*DCz9I^p;$}7PJ>lpa+}{Ju
z5qTaZ#S1s1Up6mj6i^tLJ?9=ID*Sy^)S)$#7eWXOZ;W|EO$TqN2%HAs&kTri%YEM#
zOUM;AM3($;<WKuiH5x&m(vi#(pSWF`_-J=y+BxE6z6VHlO|DoO-us|DoUdoz$}^D^
zzZIJG97A?&LQqyFcTPe^W_HVQl)&t<{=27uI%)Z?B7<Psz0OSKqV4OK&saSPF<rfw
z>v*V1K}7F41WZtb=8y@iPB}YpK;eZP7rovF^Ihi^{b?K&^(+8U1|Yu_4>cn15E07}
zMm#RJ#6VC`P-G6;Ymg8J(j9?WuQ0!atXqA(PQx1?Bhi`_&x!Sc5bxyBus}s+NzI-b
zsmErZJoK{Yl4S{hzP!AY)ctT_<mT7)oBuT58e&df{4cr4(}fz5w1yCBc6+D29Sdqa
zyu{@7as=ET7PI{Ga&_Vw^LG3fgj*zD+jy7jZ_>&>S<eJ7^Y0&vZyN-PH$?oLAMUcY
z$-H1|grFWygKeMr$s95e*ErTakp(@u_I#qeAv<1}ZKgLZi1=S$w@1Fc^fPbKd;1t~
zWWDgb#brt4-wDqbWlM0~j<-2o?mXgHaGjh(V?&!56nealxs?%GT032bFJ4P9%jlX_
zj1uK(pn?~s1qB=ezlFg6VXn<``)Td!IJzIsvZC8;3-;DE4BwiTc?}<6YICxqZFt5N
z{=(@_Wudfv%k6e9@`~fJ@xU~Nbc~u!p{9l7H$!pMN0zN6jYCxvOG`38oN$y7KE%{j
zF66U9FN=?E!r)9GT(;(sRugna_m-sFBlzyf?$dJK34Rf9I&n>!*N~Twu355W21*LI
z)Tl59gWK+nv2p*UvSZnOGTiPEkIo4#x&%HAD>cfrBs?Am=%{UIb#C7HgaG@H2#A6%
z*E$*A%%=r$0OmBd2O3Mm1pn2@h;)T)k}}}|qwhGw_Dtx(PRd)<Di0^;4APv_&CL~8
zhrPkmO;JeF^#d10B{m5^=Ur7#3wULn)oq(T*5obUFe#GHzPzSZo>d@8SKa2W_KVxr
z)T^`132DXdS@}Mb1F4tY)5G9m*x;|BX#~Bo$xEgD#bb%HM%x3cFUz<ub-j+2c7z9g
z!$$p6eIr7}9Bqz9Uq`BI|NaqBp-ZUHE2sbE0I|Agg*tgsS#1`B++hV_;r@7jbl~Ub
z=g7pE=l%Bgu!7%pGwbaoBGBiDJkT!gXlF`*pZ%hcPa}D+Mjp4R$GkU5XG;^W3qy`O
z5QkrK_+EzkB(f&6n!HUh5A*Avr@>j4wONlZYfVxIZr`90ic~AK)gGq7d9znC?$qvo
zS|D4!%nvuuGrzFUH0_QpB-iB;F_e(FzSQLhckpU6N{D#cnm=w*7w}%Qc^~Vn7A8hZ
z>p`CH_I|ma-Iq)2TyZ|Wc=q;2TvLq?8sztK>)MU~;3to6MV8K@q?h0Z)xt+nh|be=
z0!JlsBQ60fP-}+++gr&%LC4kKrk7C<(?Dz>J?6iyOTzCOZ;6?-<z^7d&4cm?W^i4q
zDZoA9xg$y|J(DCkwUGaQ$;0$)2aP1TjC!m}NB5>si0-ocCVao+?O`I5<W{S|yrP*}
zH?t&ok?~o&E_TThMTu^+Jh|g#bT^k850+5G)O5KbEB!e%Y9LnxHqGA*164L~A6F%c
zkJsI%xT4}sc5WZ(ZAl24WY6_Nh#W&65#6EkQx7u42A3pPRnQd6W+G59NW}>0y2#HD
z4Pk$OY28)7JU*WI(3w~(|KEGV|26?k2PNiikuuEqCOZeHuSIzr)w44YYHDMtm*!tS
zFVWO-SZUx7NoHhe9k7JYL3i?W)S5fb$fWmn&dvA)aBtZesq(1!I1RuJsm9Ssf``cq
zzaAYY-61T0V+3w`tS{3PnlWbc2Ef!Jg6Nh=pQHGc?@T_$K!`0^S|sex{;}SKP?NvV
z7lK}$$xrNQgp6`Nfl~Yz-Tt?cAzV>13_(wH5y_DKHJhS8ekO`wVevItqtheki&4n@
zU;%>w(APM=yQF|1Fh8^{h0miX<oykpfUW(Z%L1K&AkZ873#l9@FMEz%ilP$=F)X{M
zEFw;b6nthQN=Ubo^8-WJb%TFgMqOUsCSivxyR-NEIUkS12fhF3=|5e?_Any%L(R@T
z9h&4>nrx;G7ox|3=9w^W@yS;|rXLmhD-=v#2NXVpy-i0)@KKa+imb0A)DX@WdWx*5
z*r-55bSv=@gO<mv8~<-EfTAjT<%X^6$sOY-f`c+hT==Em@PP7M{PvSfn~26{3i$8n
zfp2gm2--M%G(o#0B%3_#53W?qlm<beRf|PF(>%7BOp0PWoX0wi3X)k|su1Fn)a*nt
zI*9-sy6?!J4g`65!X?XflpEZ7<-Gb@Ch~?Sdc|2_JcZ=ZQ@&J84F8`TAeQ@SVKi>t
z_}0tO=QSKwKL%+{`p`3{%#Bt5WUI#|KjzidY4D4JPZ9(`VstUi*aOA#6^Mued97Xh
z`^7Y^en|h7C`19N*2R_0FdbiTrQvThY%RmJ!f!Y-W!W3<0nU%Cc>j#%e;e5+d~^s5
za&#ZFX+Z)C&C(KRy{=syPp}#8$7uSd;1J7oLBUB*X{L5d;Mveq&$?@r2j(NM<$p#1
z`+|l6d4VGU3-<qaV)Nhj5F+G*u4!v`@<0pugMN3~bSxqQb$o2}2i>l=5^q-|=O~lt
zg8MjSE2Cz%!FX$%3kHin<4nEJ60}Lka&71bT#5x1Ci;*zsw9nz-@#m&b)atk+qhF)
zh<MOlG?&bGT8}`J9#T7XEJhld2XYLjIajZNC%gYEE$9%YYmbH!QJ5^BIj;bw^{VqZ
z$$0z2)xqD3a_#Ml#l4?`Y?{ZRt}PRJi!H;m5PmL=C)M1apqVMtr41p4@W;9@aUtlr
z;0%NPq9D63)X&c|dk4`wmrr3mM!@<3pR98W>oB{@|EaNnQHXttX`-{4iu(W28Web+
zPSd%N73cj?a@M?l@1tw|?E4eKc|^(O6F&QKxM=vfE)D5!22D|zyL1f&(b~l02%$Xk
zv7Y>i+D|9E!lf?zMGe%grcSi!zDbPcFfnQ^I*Wei#&$JD9)xK~YWzqH4cg_HUP!c3
zd<#}phvbm%8#1T$fDEijz@uZBwnQqZ{eK#S-Aa%kg)i1sRg>{s13kDcTeTm(7cPjL
zf-@t{*{Iv?3y5Wku1$}qMtVP}IMG;1i=sQ9ox2MCY*A^pxZw-xtqohdi><5eXA-kQ
zE8%L~M2~@D6MhxZ2y=K6?c{S*Wk1FijApBp%x`e1byBo09cNNqZ~-hH&7_xM0UuDm
z+GLzweY)0cM6ZT&ZlESLg4LxZ))WR39#JuhcPrI73Ybe>5Z8X{wfrAZGGm$eg?3^s
z;QgXilFA9F1<#XfzlUa~fbU>vSJxYKXZzrhQqQ*B_i-|_(ZsLIZ1tE%rE(1HQ&WF4
zf;EnV2Vw+RX75iQ@1_TqxK89Zm-s}W>9t+oDCg5&c_m6j@lT}hPLJNtEZD?7bQ9*V
zS=6GG@E%6|nqCuqHJZ>X+xKH+m;%tV;4H=s^{fJ6lHjzKm5~>EmQOiJbaqCKD*z?9
zk5T#9^xD~Z*ui61<~?A$f&@{L{ol6nQJ9vqfrJ=4rlOt2Ft3C>Euy$JTJ`d;+50J6
z@RmGOVf=`{bEM!Cd%No%a<v!?tRzUm)=}@|_p;Loevvh{&;q~0>-%EMH4Sh1k=M<-
z=U7p*{kUR7p0#@1C(SuM*Iq3AVNN|u8w1E**fjhr<iTML1{d_d;7=W`LRV1T8kIU%
z<jm(u>msQB-z>>D5VjF47ZU^Fz9m}9)Xy;1<z*J|s$|rxlQ}*gIhC}`#sc@a*U0W;
zTh7Zg?`O)C_U9Js#)~G#yltOj_|u;Ft0#v@Xnb^+<pJQVD@xsGP<sKB_H^q{df<|s
zp_UGoM1u&jx)NX0U2Ed3DOdE)X!_-lz20Xb@=iPUSAylK1=<ziF@^Yw&$eZHHXv2v
z^^Z;?9wTO=A9|N5_ezH6Iq|~oQmpt{1LWa91fYL~w<SiYt8&(zX)Y|YHfuCnO)lAx
zQqo?^;^In?B_8$Cm`Y7i>Nx3)aqVf!W<9lD;bN2t{qy8v{Gvy>cbFgz6kk`$&|Wmq
z%T0*Oap)dNODmRsPN-D2H{5C-UPg7|xIw|Ct08jDOjXJ<v;O|D_V8_^kKiw5j5;KF
zm^7`8h#!KyNo}IMgZ)<%aT(YlN@D+F3PsU8Q1sVQ1f9`F(kj?_Y62dwFir`0TfW=6
zDx&q8ix+!BHrn@{IdTYw|BT_on()@=IuHHPo22o)R{NwG8GHRQGiwnK>z>P8*wAF1
zITinUESga6TpF^=ToHK$$;dK}*PgBt_h&SReXB=QNcHm6p^Okhi&uNR+4}mtO=BBm
zNKDLDOoK$)M6G+FqNdNbcFf*HkZRi6+9V_-8pnHkdx?p-JXqKMDUW|+J_e(b80<CK
z@7LGYx37M}{Q94{4Ve~E)qVS2u;lLkj5lY`{#!6YrV^3(&dKokYz76WesHH=XKIk8
z7{VrVu6Ox1;N)!Oy}2UzjvjkUT@|E6(g{G9F9*opyYbptSXr-D&%N#e{%XLvTsq&}
z{QUh<Nkx5K9ivvG&3d~h&@YSm@`yfzg2qnj0gdMc!>Co){fyDkQSVz-Z^wCU=ZqLz
zp8uJxfMoTt%a7y4Hj8Ufx)>J4MBWX)lg2FZ1J@P1r0WjZoZDFrUxU%NX8tCv7^b1_
z=y)95JtVO?pmV2y=88yl#zt0GSMwDQ0m^XSfZ&_$_%-O}=H_J^fEYWsUuXFG`VPfb
z0QheJK>xVmj|MAzkp|$Mr=*zy0CADd4|M2?j<^3%%_%f$OhyIYuLbXKZ^b@dBL#K?
zj9vVo0+(aDU!S>F9j4pv*Gl4#4i9aWRyah6qwMwi|4LR>FyleXQuzMQ9H)1|R(I8B
zNUYtL-@L%s3SvyZK?~~e%`ITJ*jX)!czN$v+Ow*+pw}#QmA{mqwt&kY?d*xoV|iJL
z=aG%{#B3=H|FqCiqzZa_NlQz!ra*U&v^bqERqFSQW$-e$-AoNLwJ=Djgnd<*(*P-e
z3VD#Rp}YDch&rwkDt7Fe{_)!9mH@1D$3m6ifNIARAU*ZE?@#mN0?#L=0@o?V!-;K=
zD12WyI5^76$^c}!g4DOC*>AiHUtW$%BwmtTZ@9pGk25jM-ukPK^NgQjmqY(oFHI0d
z@KnpIm>{Tps85er^JCM$C4_yV&w%=1!e#;l>-z1BaBHG&<H*pJZtm{9yu5hKI$Ek+
ztAivxEWap}KtaL5S>A7tr^^jcE|0rO^QG!>vIV&X0EOG}py25~CME`8a$8UTkr8}e
z94~J^qO&*!u+_K!(6Nb{*1%?c5dgOX3-EmA+|iPs--jPJYEm}`Y}qxA0UA3EQfWm6
zbINy44vu`qB4A8UPvYX@0Np(;HPvI?bg!#R*upAa()ia;OUqw<1|lLN0CD=##bYe}
zivm6qa!*%Rmq;*bpF!0Z`@NaDIY@ZF?CO-Qoz|DBO(L*|-frqlGlxw7!n4=g(Btvm
zC#)fjZ{}G4S@JF9UgRmM6ntzSS_37~UZBjW4uY+Qo`D&Ndl9-c1zryY1%>}^!B9Gp
zq3P}En7*o@=tpo44=-<)%T_2LqJq|7@OrE3_4C?&`;e5&6+4lI6;l$Fyr$;z%nbBK
z5Rvu6%f4XNcPcWM4PO)@P6sC^uXFcxsA$=O^QL`LilqkddN9BuT()jG{U_l`-j7L{
znVF&#wwr-?YhKT%*L$PO0Pq^k`=JM5Y8%xoE-fuZdF*`rr-O4(>u;y)fPf`@r{d+|
zk;<Ry!_(yhueFwGHk>Ut0F{`_bUWT6dCx-n4CDY#w4C?T^c`R@MyB$m1pz2^xlUVZ
z2<82e3?VTcJ5xy8bMd2n8Y>*yeo{qpoYd1;!rGY~!vEStR#AD<nM)oeWS-rcE%*<z
znXXD$>Z6^LvZ4S|`~6n;{T4uO-`(HKVL0;OvVoWDPm?-cfUNwdN#66;>wmJG$?yIr
zt}b4-V7^?N*Zb`S?6FM*{2Wzpxu{5&@OB4&zw<+4Qfv2koK@9bU0Q1PdVNZuSFLYo
z@P2<@_rB<Y1EkLWtZ5$@+PUWXlL}K(3jiQzZaJy$cwO%BeAo;|BjtMlVD}Hy!06o9
zt}`{kZH{JU2mcJl*=kF8-xi>2|N1(6-KuHbl;7U|27cQEzum~av!{H2+a!C_XH9uo
z*%z2!TwGjRd%O33zwdZG1%ume=Pb&b_A=ei7Ay5dbak^7r;^*%MIZdrG=Q~BczyME
zU&q(zcsqG|YDBih`ya{rg$q;3H?bo)<j+dZ@gp`7-QbQ~D8u6k$Al6pJfwCGuz6Jj
zq#tvJnLAqBy_&BF_pcvE)404hpvcr(oUHgr-}ksWnjDYk=qp;QSxdpJmo6{W!%WWr
z`rK=r|K4K@E2|L@$%>}E)Z>Z{uhCQvJZ*<b0LATmvS8bXqspLBdvI8g1$(hL?sW(7
zo0YAsPNC5FE^4w~Pv8U}y2S9b*@0!aJ)Q@zG}_J<{{T$BcCFcDDu)HARHy9;0JK|s
zd%x$1g>PLx$N&cPxU2z?7`Ml(-G2!60fD=!ma}Gyx0gr290C7v1_SSxtxk!+tyPwb
z)jvv9KE)08_lp~TXBTYaRv5E=7t%N42BC-33huQvI!+|dXL^>m7=qRQ!hxM30)gNG
zkhB7jOl~gywsrvbYHJ%rX~^6Y;11I`t$3bKY6{3Yj}8tVqTU~(fM?k1>5(T$X?tAu
z-LHGU|Lfk^mwn><y6gS%_X7z*w=NjG6~Ix+@m=<+wqGSA>3ThOyx+g;xLVyBU4-K?
zXR?{f7Af<%T#)+&lX*QjcRXwVmUnqNEKse`AqnqODc5?;C3`&pR)~mz!1eWZ{`lx9
zN0i0H+S1a}%xpq~S*;S(3FwigrY8S*IH$df%fpKO`0I(O;B!8+CvDtdsall*FsaA$
z6%{2VacO9o0cU3~tM6|s78Q4KIW8l>r9(it$nf?MyKs_5<$6b_`W%%|{r^XJfXV13
zFzbg@n_EguO#CMy`T52?!&ou)|7`5@{rNw2$Z|WP1p&73`R>$xCr0Am_$g8i*taGo
zCIFF{nwhBuOank>*YO6ruj~C9EO7Ii_pC7&v$NRi3mfl#a+5M01UE3VxtE^0FI#wc
z3~KTWxLlSCHo)=%{zH2|-F_^QQM3Ir0<7GjeOnx_I5ILKy7Sy_&y@1r>nbiwtI2LR
zPEO#l8;SX)NARTwn8j7#2_jUTJP5*i4OYAl=Yl71X&p@$`HXP&=YIuWCI$?ua{WJl
zVxajKE`I1uiR{AtZdk*CFM5(3(yuj}sFQ?#j?9MDX|ShE=XgEJrE53%DOA)TwSvdt
zVB%vGpB>P#f3ZkJL<9)%R2}zi=dTYy+^|C}`14l^h4SR|^!06pcUw`>;XkzpO!L&?
z&CzTLg!0}vqMx4DOL$7%+$7Pe%XS2wmW?oeP)0^Zk@C?-mp@>xn8D45f5tt}><5kP
zY-}pk7-UVca%h(CPM6OC<UL?Ab}e+DmjFX{baX@t81(y&_g5*}Q2mTT@>-2&=as69
zJ+ul3c<<|p7#4@w|HhNODdT~9h$1ca^K0br7UokK84?;Adrr>xMy!WFEL13J^<am0
ztv<+NA**f35LdIIciN5hZC|2!UHB*Ixc%fK%^H)gofDk_EYgGsK)nVq^Ao`W@!zQ+
z0CxcRJ<m-f-Gw%HE|A5k(Kj`~VFA8j6|emw5C|8vy&iY<@dfTz>_Mz4$UeKnNnjwp
z+wPBGWMu3oUKL-zXKvi^3ukIR;9Y+lD-gIXVnIIdcz*-TP8p!<lR09{try)QX#6|r
z&zor8@Z?$J5-=gkDvFO8DdBxNbEzrsXXkI$?{BYw*&V1^v~+fMwz4Wae@?y<58G1v
zxVWL(v2-`Bd;9Y0-2&*P$J1F>YkQ+rWH0D{RNlo@9}acnZ(zPQuQO`YF9u7^i<^F#
z^X8py#$_kqugaqrd--;b$IEjNRX{T1Ng9ulDY;L=ppiAORA%khKiesB!*Rm<_}%Yr
zZg0!9TR6jXASCk@f&N2XY=twIt5s1D-Uo+v3pqMEI<MGD7#ePqIIk$UUi@<sfp|Lb
z8-UjVUGy<~eTMg&n^RGa;MD}gx&bi!B^B*YfX#=vXa}Th{Ox31rBv;5w!+Kt_30J}
z$ryb8EfENnluwNTH@wflu@_MYKWO%o{vP1AfP1T04M1XA5DF29LA&_|u(-k&|Bt=1
z?202=!u8<p?izvx_uw8NxI@t38r(IwyE_DTx4|X2y9W2*4!3j8JwM{Eb?4i_(9_-Y
zs<*0kJ@2-%va_rHZjglz2`eS@n7xf6<l)XzPGrd*HBPYe<tgdELGS__x^7`5cPs7+
z$F}9p!Bkya-{FmSkUiyF#|UlI3w&zU4mo_$(1(5te|&7n^WB5SY=4lttn4VqL1!88
zTyvum^Vs*HaBEr>w3g6UfuD{?0w-E&K{x+|psM|x5D(eX+`I+QPNB;%1>mmn9uN`|
z(*IUR!ebiSa{!Vu;8ul+|7lvaE$<k)?zrzb5HQ3?MMWhvwzRa&ANr>_Mzu@x^J+Re
zL`6R&eBQ2eG<<Tn?evPIK^s6Or2f|NbX*RIG4L6f+|tr7bo2A`jh^@Czzgf?`^L61
z-G%S-jmc5sLFUs~j&8i1hjv2z!~ZTHA3?vTU;kn_yM35Sl<^k`GV_AKufW&v+Q8|4
z`3-~b<C*M+A9_6jhNX(68KM4}PV-EM)67X)?U%J;X407ZySoh_K)eI0eiIktB~F5s
zQ^K#jj{J3jnyx54Jw4i+KTcYXl_+JQEicb-Ef9Pmb)>C=uMzpf7s%4Np9Zv_=TsEd
ze+-cnDgVCiC|#s0<DC5Z_Gni9eP2_qYuyCnKT;){X?c{`>UP`lvB{~f9%(<84Qtnm
zMP(Bt)WslIbb{AZnABhWGj%=v<n>yE+2dg`hvIsm{*hWtshH2tB(n8x#fr@PMyb+S
zM<-*!%rsV%&Kb}w{+GXfV{0ob9@eq)ay|EZJO8cz{*Z)+R(0g=HRpNXB!L^qj>DcR
zPGcF$#M{nSBJeoo`sLjTg3vz>-p{o!OiY?qd;d)$^GU~a{@54~RDQ}bpR@sC&zC5E
z@K#`@)b4P*{4)@-c^>fJ;((&x;#VGx%PS5N8nNvu2|b_dsTZ%DOcB<vM&5Z%hG_;0
zd9+50i`NUi8L*e0wG-hD)-4w@1@?b$13bmASeaCu2&?s7SLcsu4>F;%U)gLYv=VvW
z91>zd$^U((MMIaRzr@PGv&zsA{ojK2-<Q{?L)ZLA2>z=E6*T{gng8CimVW`s{@+ss
z-j7*gcK`oB{U@6LZ``K-qma;zb4t0<BPGC`PAv((_;=3-8sXS=*FoM_SK6;nA?Tu+
zT!puoMhh$n0!s8rp`mFCCKf+awN*a`l7kd!u`?a*T*H3tKI@^o2?AR$D<WE>D=U`i
z?e3!1(Ec3aH-#xcBQ;A(B0c|(G*i1d18qt}soWnugq=QeakAu(U_i;I9j~?FgD#E1
zULS$TM@aYyuS56Ayl*@DeOdm6CS2HHXvcNwClcB9a>}OT+RhNeaO$mGEm<fnGCvVM
zin%6HG9nC@py^}W2TJkpl}xw-un@?J;U9G9DUdhM<2z?DXwJ@vyKtr@@gfBM;ZJo-
zRqKL!v8yX=YZdaxv((3~cUK%L)oKre)S<|s4e@a!tUHePO~SczYW6<6_)p(PKK>zG
zRgY=iS9_gni7<2+^wdsm>&*`M<`%Fy_~%AET;{{1b^$~HQlB!NeGz8RHV2i~OeVZP
z9g!Sm_<)g2mK@i7Aq1mUWurQHVxR&OrY657snMkguBL)Ly1+PPS?hopY9g9cb@rG?
z+-wONG6m{O!8A^`2MiWz8@9lY8)xjSdoq-5xjL>QTI91HZa&wpHcxW{7DROj?3jU+
ztaj_|x+YCZRF`QYvyl4sri+z#Nl*OXdR{8(tO-T|voEXhT+I6tWWqPgsve@q=sntt
z-Yr&(P3ztQoyqGr+@TOgWmXkBjprS|_9c}$Xjoj3DS4!JvOhwyyEVY^R^@Q*wX;Xt
zj#At3Vg%*=ApWpZ=g5V;tc_rhWu#)36n$0_vqxNUEt%rT)v(aBLclolljLnA5Q~Xx
zpscLfsrQU8YJ!hu^^P`p5{pDkd^(QzRvlC8^NM<lbP>DiD*@ZJd7$Ea#kEu4S0&zJ
z$BGo4yo4oAt_m-7>q_Ubg`T97kN$4?fXAh6z(=O3!}x+6$w$Ym??Ec!efbyjVL3*F
z)QzrQU$du+X+Hn?<#vX>$M4cEZ&k3EO5<^yXw6CTX&}{u$MDN9mME*J*_FImU(0YD
z`Rmf(3%;k2W}-IKu#QCuWA><9C7jv&i1KaA*ne(ZZ@S^iy5`GS9T{;yx85occ83q`
zl&T^Ye~X}+Fc;s=WWbFW%akcH_1vS*PmRIsR+Jes%D%qoAqO`aT*;BP1R<3xhZ0qG
zr*8=-ZQ<hglf#S>C(8IEAn_43mnO;tPn;iZq%mVBb1+rQa<lLTj~`sap%VY~?kXz!
zfGdl?izA~;QVI#$jj)|0Dr?89M0%*GXH#qng{b3`A4G?T7d>aj4zX@}s*gHxV~o7m
zs5^-BBwK#`Gto040xM<q7UpF1STwrn-Pf+6-`3xcl&v8&rRH`%Jx=7?S!?VmrI~(S
zRHhOmoW`5>!74@9duLul*Hf#FMj(}J&Dl+g9(R&MTfdmCYWyCP$*J0U<?5k2+s*kp
z`kdP%)A3`C1hP??U3u3-dc?*pn89kH%3*|&MP!8A@-!`nHCW_TtO!c#Y9M}Z8ai5B
zmpL`TyyUi&<OPz7JfkV79?>Ij;WCU_3!TTn`uvXXS|)IIBwGV7a~>a#W~AgpelGN+
z80djzP_}qBQwGA}_Aho=z?C7s{91Y~d!~l9)bpk)^0)4lh@#M8{DLKlxDux_%rNSY
zf3_0%DN2}<8o7|gDjOHt;#CN~Ewc^M7LAmqOh^RXd;9iQ*ia${_@x!7NsK}*<d)bs
zl)+KJoqmiznx(vLuUpJ?oGw;!Cl&^ylb$o&L?xas`880=418V9{LXh|SeH;vOPnAl
zNkLHbl`_sgOdLNLlz$)Yf`ah~CbOS1K|6^3KvRuhuGZN2n=k#Z0hx{uNMty~18E&f
z1YLgq!)$uG<;y}V>txB72!Z9`LS+_G5X+0^m_k2qQsfFQWRU%HEQ8Uh8bknME0b_w
zin1ixmM*F>R2{Slua<*jr_-9-wB9hTluy^Y7UiBr^@qmXR$D##<ZwuL@8#?)DHrIb
zUAnFhow-FmKD`R)!{&Cr7aN@d?P<%j>+!ZWHs;rznhjAhbjHYjv^(urzjdsVl(kQe
zi}QB|c<}^q=21Y|2M{GyACm&tuRBJnB2lA`H6(BtwJRIB_?8OPw5$r)-((Ew_r9AN
z%k9xd=s9Z>E6!riCwO-wbw7{`D2XZZt+uIeF3NN|XUOID(K;2BgvuRSMwo{dd!4Bb
z-7rWU7BWy=@!QpjO14+bELm2Tmy6t@e#kFzk-)KAQ`spKDOH6;S8FmpNpfT=b5r_S
z#g8*zUecxx6v`mUu9)Xvq4e&m2L&=jYIC!tT$oD!xtyY)DW5T3V3;E$1gQYe>*Ii*
zos){Um#y(jfBIL733#PvENFj9e!f3wJXcGc1ub)~D3=8Sxh3Pgt<9+%erj4fM{Jw1
zZU;Pff8I~~lio3MofHwTTi1{T&Z)jx`~;nnErY8+v5kLm%q_g%JLOl?PQBywPZK$0
z=hA2V*-7;Zhel#+>DzRc*BH21UD2v*w$7Zr$}-3O7d%>Jtuv>Z@l6}c>iql|pikgt
z({4PgTJ!UM*nUEznPZ8{*o|&&{^<5+dMP+(MW{8<uGL*BmgmOdRR5LAQ6oe4peP+)
zb$B`J+qZT^5{i*;%o5m>G;n%^9+w)-&gTO7vd^`AU6}o~Olwc8pPu!3?I)6Kw^!s%
zs#h+ZT%IrPG)W!^jE8e<JzlXF)ix2KSrCiXMisyvCzn|vZ$5{;L`*^s-xt>v-t}b<
zxVZG|no%pqpLz=|1#fhombuA}kNNWxvgL^!jUaeojce@wPW!%(@8jJQKkgQvB!7Cb
zULQW@CD1Q-5HM8_|C2MHPxHT7fH0?_ycawDrOoyFO~p7`8MuP(4=2Y5zFSu&5qc9P
zI`G1VKQ4!JoQmIQw>gP4Su`;~IG7>!BMx$<hUIUYc<e%aCAp;w%rBE_RA!tsERUDH
z^KtU~GPAzQITbvHW0~Wo1&If>fmzZNV@qNqIHP#g*mZr|!doARaLvya=ND)z2i3pO
zjE#Zo7osC0AHYC!x8R(Q9S<?cs5VuOX6MqaA=Qy7Wt8`zVL|DAcS%j6T80|4RFz)4
z(qRDmmTp92r9_kqzv1?JH90oh?L$2Z`+-9%3$jY7>fS7fg-7{m{W^=;_H^uwOwx^m
zqYuPU+V-$guCu6LYalQ7hbQCrUNZ!MdX1Z=@n*?CgUO(K{{F3D9lU?V3||dMxm)Gt
zI7!cb!r2(%47+BaYA;r#ZN+nS5@`NGbl;F+@RP>Ei{SfL{tBzkn{elpDvRi$o3OL2
zODhr;Atd`Q30H2!`J_~TaXMX21EvbQh_nZq<Mj~TTOGEoAVVu(x36dMEtfJy$`tyC
zhnw%E`fDvyjAfTZcA7V}<rCf)0fSsc4eFMj&rKJCkFM)M(b`>)gCBgo&dTn&xf_rV
zLX7zC`lF@ZOs?NcdEz)qGm<xaX4`a*tq3#gL6~7b58NkOt+`GzfdE~rS)s|e@6hVT
zpDkZp*|=<tE6kV0HHZzA+O_x*1M$DVtNu=F1%tCgfm|q0QJN-UUSWQ2j<yImJFX01
z^(sw{2D#6id2wz|6%|-lNSr)~Vm)%=D{}w}JAC6vgaGoC(MNEOOi?JUr5I4!HY@;T
zE*hFurIn>6ec;n1{v?Di|MX5u<Stm3o`ajyqNLd=LBDTG<?u2uA!w_E6rAfGmV-Zw
zf)qf$k~CjkCNF-)U)|K?axA)k{is}u&Dk`KC@=|J%8rY<s+iZNLh&sA^C#dRHar~4
zN|W$goU@-dMWzUNPWGm?rK(5v97NFP=H|r5WgB{FO9*X%ODffE2o?Y2i`^F$aFizZ
ztIj{^9=O`1a1Uz)1*mC+j=^sGqs2)lL4D$3oZv?{oo+w>?3^$9jix>A3tZ3pm6frg
zW1?gHY;2<azMa)?K>@IraA#9FoY>5n88@NCjyIjSFe3`RU%+zkKLX69gT7^6(EVVO
z+pmC)MO)%MSUQ^+e5>huL$mvfC3}*PZMu)Xv~#cmhqibd)gK)E87pgb4eQeW(NFMm
z8WQT>Q%6n9D1Xi5i_!0GU)vsuz*nkAFOiRHOxOBhxwcU{Nz!v<3F2ZCyjHEMyFGc<
z&u_0qD|dm{+>6i*NSfnvis8%Jc-nQSOo-PkhxS8pi$3k;_x5%btV!jvI5V|Z0N=<2
zowgBY&DCz_YGw(bC*Q8`UckAtiK<@_va6SKY^9H}^|OpX#Hrg}<9UC!^UORphJ$2#
zdnT;uG#Y+mkkSY)k!{8a#CyEHHTl2AiHkYtS&3bDXcx^ljLLwPeoV&Y-E~SmwR^gf
zo_<?fNdJ`BOa9jYB)*MkN__s?cBQS0=cf0uvVF%VkGR`t1ZT$XhSBRuBI}sX)AnmX
zaJQiDbgg4SLfntx8m80B0Mx8%64c~=D-t96??0y^IWFgpa0)))*nK1onzoN&rOBEg
zZ<~>Ia@s{SWLa$He3mTO?BMzVPLb|(mT;JeIti5MTP`Du4EH{b-%&+tr?sQQ$faN1
zOI@Gdo-Ym=VJfeQ)?b;fKLU4d7>8uI>^rHYODC%6$UHEU{4W0U)Tz-_g!R!LQvI7)
znJ?T{C<$)>B2U9CkpQ9gMyK!AeV6B5#k!C!QchUw#P1altV|;Bn&;kiPcd8ReYUTB
zf-?KEo+i)dCJB@7I;3y-oc5^vd`8}5kpA^m$Fuem$;3+*CoHP?D=$y=BmyL(=_$?Q
zHjbyFL#O-z$esF`W68o$|NOyQFvX6{i-f7?FXV}g;qO`#7ZtC$l9eN+#_DJp)o92n
zWq}J36IReX!*=bU7ojm))ejS*d)3<<CG`n{w0jJ>2U@D{c*Mmd=>(|O+r+friFjOw
zf-TBjR7~dHTLhffTnRL6=s$Z0T*=M6M3*b=tQ`MPP!l43CEZh$n{HB2Dq}^NOJqnp
zD|xZA^L;Ctn5ar7Oc)j;an&ioE)#f+%R7)o;jB#>B{XfA$Ns~?vI1X&3W0<kz7RCQ
zH&j8x<ISf8^dA1G8Wf{?6s4%WgCY6nM63o1rOb4VK+)mJ1mwyiv6MB+5T9W^2UW;(
zl*1FLe%6dE4lY)-6tcvV@$uWcedno`j*H<o5^|uJaL~^v#_LwgI+A!5|3=N`qZ?%C
zb~v@o;>(k&0vzdTxL+80K-((E3rWK#Y2a^M-gaLfp{>3A`nhcK-tp{Slu@dv#nHw6
z#PaglfT|(-H#=-%O$Ltb{6|fNSvy1g{{A7MPpMxd&X2mQpF8k`YYdR?yc8I03^*D-
zoL>+fuV)A5CkNg7Iz4;hR%$AqcYGE4DKS@j@VD!AryBL3QlSFA^;>(p{^-IdZN<e{
zrU7MoHK>`@`#a~`4zTU^M)271v=i-q@R%^4+De1#MtOVsF%NDaB}%z<3U4ayFBN($
z_S(GI_7J3RwQ=0X0Wd|qqC%C}M!{QoU-Cr6CNfRCN{A{#u}l5m-ugn?UmG%-W!#sa
z3a=uglgcwGi{|Yww_@(!PTF?c@h0_^6Xd$0U<MnQi(~ZV=*-djlg=p5(6i+^oqJJ)
zW->NA9_>#T1-^O~RODZxknf4ze&5_T2~p2nuklhkL&k>MrDG0mF*P2RV}G<Yr(U``
z&QpC(IB54YMy#MgF=7WHbGL>BqkZz&-`kU2u{>dW%V54U-^K`(5!O%+DsZbr({S$(
z3>Pz$i)XUn`D$Dip<Kp<pg;7-XRPh-<6RWWhAxzfz6PGd)_`235dw1@k`(FYqrI`$
zgq6Um?0^w{whXm0G(MY*mH543))jk&kIPzoLZ^K=amf|5o(dDo>sr)t(BxJm8sC0O
z2@Y$qWjP8h>Y3QH;`GsK?cf|%tg);4p+T)`W`^yT@f)=e3-x#zlZ=+wgvD;UCtxed
zKo#sGxnYR!JT|T1L++aGqwEC-w$o$6r`Nmeplfzcx{FVra?$I@5B(=)l9<&ANo|mR
z31wXDQu}@AFYS@od*U6_cD;!-yLpImz`(``T?#IG>xoxSg%XotICB>Sp`Z_ka1}O4
z$s`2N_^A)#7bIQrD_}sRFZ&}q5f=_&`-4UxVC~a~uRCJDNE9R|`iJ{t57S9xA7C(o
zl)uY1r82;_3Mc6{6`gm7bETHWg@qqKOQ!1`U3t;(^{xp^YcrI1WX_hTYely1{V8NT
zPFDVv#;5OOc!VAGjVg8^z|f}vErT=BzfOFFVUs-;gkpkZ=oTtwl9I`N(L1U>&$*cO
z_jF%!adDnWv##`Y0ICq=(l=>+?q*B%-eWS3lptk^caVWsAARQGO>G-YXcZypIW~Wg
zxVDSO&ptQJ>mV{ALDHH@xRUxHVdJ{GvJ#=g9)T$AI*qzaY=Zm)8_vHvc0rnsn&u)s
zTq_8H*CDyjx>j*4Sm14xndaTWa4}`ESO|BPb1$tXOxSvv6!i{yZ?btIyXnXg`bw$w
zHj<LobL5)EOm9*#&+1+Jgot>kUx^fwFnGFFXq6qXB57o=yKZ13bziY4`sLXWv)ea8
z^?I;!=Rwjqt<n{_(~T+E@i+xkjuQqTBtJNu&oqO2VSN}|{x8P3ii#XnZRm2y+6i2U
zh!vS8_rv8~9{aZ+v1(Owk!Y$K79=#*R^PP?xVcY)jcDYxk5VCXzf2Qg90`59)|D48
zvOS!J(DOP?)U^8m6)y6qC}>_++l(P$*}~FrGF(1jbXmGcc0qRBb(a5GgGrmSe$V~m
z=}j|1pjRrP%^z^>)5EBgm;I-<XQX8>S_sC=i6W``GqZ-YWT5X`4C`58REW(R9X!E8
zmX%+|CMr$$nP;hW`y<pByN6^3IeK{+f=Tg7hHfZ2bc<L@NQPkLk$dkVBuOg?gIGu`
z0$N|WW@^H6SQuJU+M>t<3=yfxIR7hQ!ZKAKDP>TobggoDD#Yk1LH~|1A8NMb*vH_7
zDL%sbVGx}OvVU5BT<9LSXZrI;heuRj(C2;3<4JWS^|Eskdx^v$0hW9N<(uAVDVnU!
zkWcwv0{x~<@J}BO+he5sinAsSp!P&ypiSV#%U&#`Fg~&@U3;1Q@#FSLD8KsKIh}A&
zKz7M`kuN>eaFNuUWguGa0I@3ET9Y_QD(F;VF#k6+$3mPD_k&v$f&s=-s(I}1!{h+j
zU#YIGQ^BX1G&Ye7ih63?leFJC{t(u(=c%~tQ{_T$WDnn?qHsX-PF|<=W{OTn)m$08
zIDlj^XvrnA?Mz*$7wM!FVbdk;%n)fDVWDeMAwp8^j9~s!D^VyHx(}MN(NI<<!YPQ*
zrJr|}5R6R>sNq9JlR<r$8AA}*Hw|SE0HKZar2-_gbRXoNEDWrrE+|3-Q9deTJ8sKU
zQPQF^G*>{PThYg$B{6Gsd^aw5L5A+@;G(3}xOSW};>s+tdZBr|Dk1Bt_+_B@V;Rt1
zh=q~Y`Y9Y}G)0;iT&6mQK&KoS<gX8QQ1GF}{{aP!we(jNM`er{l5PfE{F<8fMoO(o
zM*NrH?>tTaZ=x(FqlQ4PA7)45K#xCEC6HrS*n%<!MDF9HPNe##APyh!_tv|Qm1Lh;
z@u+a&dn9ZvELJ`RWg$l#eKUd#-OkbU=MGJnQG0R}=dmMV2?$0M8p^B4Un9GG^x@m)
zv|qy;rtzbSVyR}@_~)b;9;QDPNfMDw3R}2TU|W6rO`@DW4ANVP;Qo;+0nTv3^hrm8
zvWIviP$wLqfTUtgX$)~AmPw#VC<}pAL8b{3C4-^*8$XpU63UrB74c{4VNAtkiZIMw
zku>E=gfv`iSJaKJpJ)ju?u1eu3mQzEX#^t-MF<_h7A^^;2|aaKd8%bGTGnirDlFF5
zH&#{TUUlH!+35|evemjV(k4AU!CG3eU5cyu#T2ZcOMF{Cr2I*9l}K5+9FwpBzJU8+
zDr~jqsm(x`;x8fYVXYY<EI1T)G#uAg)MuzEw3ymrdPSchJ%wly)e^u=pjnInd%$_`
zSPRQ!%P%ovOk3h$--_9-j41y77x=Hx35LiKEg>2xJe0$p#_23{IPl;rn=B#(^zDAa
zK{){%DS^XBu1-D-A{a9JS@<>l?g%dPUpiP&;Z-B5q}v551rWLB{>K+G(Gx$1lGEV_
zV~kYA!=)LF*s>Q`v>7TBqiF8b6|*TR!>c};<E+;*$@nGv>|0PE8hl`-`B@6OP~9t_
zkJYG3R_Qd{3b6h`tuaEcKv014*uR%yn|UX8ykUVaUJ+&HrJ&$j@>wKpmg|dlXquut
zMXtlz2yatfqeni|GD7@+!b`bBG!`b}57A&lkW`WmJ>-_Tsck_N*N89-h|VSwlHDIC
zH&2+nAXl0dTUf?DyE<uRkaiD#s)ih8FGhf*cp95#9Jwgz66Mq!7T2lO>3gWOXF4_(
z7TwQ@GTPJuxk3YebowIG$!>EVTFS~8Sg+91ZC%DTjK)vcux_tEmLPF;MXO&v*O6#G
zh+HWz-GlbO1${XAf}x81NNOH3KqfhANlC6ANVQt~d|=Ll7g0q~6kF0~kJt3dj7$bn
zOJ{;p*1Uq#bb_%K9>eF2)HwHtRDXdWoS;!&au!16M}?buu(~6MGhI;RBh0Xo#*k#M
zadY~izJeTueeS!v4J>$*2+rUHZx0E1T;rs4o#1tOd_PN{Px<BybnL3%4?4R#&K6>T
zwdWRMt58}LlkURq;4M1*Zt?pPRzbSGXP=%RP$Z>Y0I-)>h4Kz2gERw~Fjoo#itA8E
z;b!U#MH;Oksv>zMy2@XU$V~n$n$WF4<b>j=1NUiA+SOk@8{paA;}TAr6<QJ7KjdLi
z<}rrUBuAMH+z*2k>(AY}^;Eby!=^(KD@;?!t?&1v#7D6sWM)NXuUBeT(&Ke@8fz=%
z=V45jkaM5MsH`i9L64(^DT;)vOrnI5kilA}y;C5kt>lmt(+S2Fqf_3SzV6?XzO*34
z_8AHVqrR4baE<d34mQ{bM*{EY)2^|U3Q_M|`8&>1Mvk}tNUs$(D({AdebS-Is-8uO
zn;D$VE-v1@R4*JaC0?cPx9&ACjlD)7FT^qW2?lR`dwpC{br7*S$%x!h;NI4SzofC~
zb_{RfcZGr=m{Lf8@>TS*F`o2y#I9RDt)CRU9vdu{LM08~H&P~tw7?mU8^F&(U<m&%
zC6u0M)-f{XhM2@4=*1CdK4yUnQHbq681YLkoC%)tSd-#ONa8~2Tr6?&lmMDmLMHDD
zYv8AwTu$VtA)?w-*q`qFK6t3s;-iFfV`BLbGEm4uQEr5dvb9@~#9pW|8^M9hexHOx
z7ez7jo^qC6CPP0c`ZB#hzwo%M$2?byGe##fY#~Z<(~RZs$uwlgXj}{#lMM-fR{qKY
z8?o4@u>JV%@2z0)h*F{tll^x?qCn%0Q?mMZjEhm(vmCVM$qJei6My;H>ia{Z(9`hv
zq~N`%phhaY*~I%R^E>R2PV?Cz8L6eEb!6S2=r=`YDF#BeK0)?GBP<nQiGsC&gFTQo
zy-$aZ?@6ps)nv}3h~>KhJkYNB!@9}7VqTf*H6v0A)H!+4(b2kK1M`<?yn4a}|Etgj
zv&hSQtLE;*TCK;hw@XuM7*A70_qvIMa;qd6kA2m7htXlnfE;u5`^an$iJdIYXivL)
z1U+eF&M2~89XDopLj8f_R^dm6VxiT=jrwZemdWB)Y;PU^Bno9kQaY7K9|qq_x)^DO
zqAvtxy8>jdS6KBMwP!Gf0r&?xy0E{lc>Ac4(D(Y*qQNPAuy<OB4RCu(V=<fWBa>ZM
z5ki|Sx1}nYnygqQ#0(!f&&a|tpMu;z60+S1{R#1zZFD536_xn*1u?+h-rm68+?>zV
zC@WnT8vPG8?_KX_s77lx7D+l&{K|yV!sr37N~Bpnyd7hoFdDOT`N_N8deETWe$H0D
zkWM&vuN(dnN=m=K+4MCbp}HSQYEJ=7<a{1Sz{a}01+p{cH+?|@V!lZ0kMY+Sk-`}G
z_(#3nwj~ny$9`WT)0_gJ3J-cA#Gd^mk;x=~A^2dXsk?n)d?NqD=@*#eUjJA&MjSIV
zso(Bl>v-MU)bvS5Z(5QWhz`Zg7fF@X)~n#1^^`7#B>ArbRIrksk>ZHjScztlGlX8E
ztse`wdAslVZ8$JWX(#<?;RC9w-i-XE5Rn>2I?~Q?it!;x6HS%<l*Rd-Q%=R>Z8l_r
zYNmMIgzLYU&D4LY%r&r?2%d&WqsHNu?EI~6knACa=CkT!u*$6eFj;?`^5G`)cqmL-
zi6g?`M%W#m`pCSg+!)+Mzi8`*D-kkq6xx%AB0{V%DEC`MJMar#QEFY$w)dk3oFweV
zIU6EHzfO67CYmg=`ksieHUDJuTV!U~c-8DziVY0OKsn_8(H$y~Fx{Rf>w@Ll*4FWF
zzjFD3A<}o~{W96vJ>QtKJ%?M%!Q?O{0R<jb{5{}C4f&VFv8sI$;Wu^Br-7tn=}SV7
z&Nbi074zIew{6j7GNpx@v8USMj#p%wn0A)@Vs87wA}p@b$KYpBUodJJE9wKHTE{<^
zoIJr71euaUmvmsd7;v5^Y?Ue4{UMd(NL98PFi^JK?rpkO&%WNt+2D>JuhMPp?f2p#
z9oI5Ok4no<mAZ!`%YqW4Qv+jgrWVlc#H_Q=50s6FObwdb&^!ABLu_H<Gs~EWTR&9B
zBD53&oz%jAAHENg)Yn_ZxS)}}P2%}3Rp<yQzh9=3d16_mKc213EneH3NgY<e=o83x
z7TYKp@2iSCKbq%eBdEUGHFPr)l&e>1^6#HWmLFym5)V)W>|nL9uu&C-gPqGr2|D+r
zl4N>S4I<#`&u-s{&yJ=QJe&lQcGU(C{lyJ9+ZaX^*g@*a;e8%CE`11#6&5rX4Ya1}
zMIwb|Qu3D=oZ7)*n@t6F@6oT-k<fkWIz^*ju9cD&SaFJ>fB#LHMWeE0Of6T?Ku~K&
zpd=LVHo^M}OTitAeW40XotsZ*mQUp2zin|wU?<7c1-5~xkhxlFIr^45>blBjx{?y6
zq1>6;m>e4Y+&iL^o2e>Ti8jM~4~&1gxVkdyb+`dDtFv@^duPBT-{SguC$ROM8^7SV
z#yBh%&BIsUz5($i2zN=;DdeHhK73kOL_f;CM?LBA5t3jB@jl)maa_cLkCvyYt!o!s
zhs;A0S_4Ujfw)YO=VQPYxF9H0`)NaT=R*AHXHAywWRi-;304q~rqwcUB9$_#NVh7^
z3?-adfjM^jv|f;871ARG2jk#WkU1Y5jenecDs?)uq=Vpbit#Rsg8(_%^#a9M-Va_U
z#EImga7}JP->{Ld_Q(e#0zw&ZF{EE`4>x1>=gnK;!xg*Gr)VQ{eaV!+;`4%Lq&cY=
zb4__0LUg&69b->K98l{7wy2O5c^!aVw#y1k=P7vY4|`+3!Q-*OF(GH799yX@R(D;!
zlC|CXz6xzxWN{zduD3h3>H_eGf`Y#6Xy$n~IC$_WF(q`uQnWi-D2<m7x<?BYvUpJ8
z>sWY&<IrFuK6GWHGCgtB4R1GXmh_=$PjF;>Af+a+o&xtYhQJeCpTtldk+(yGtKesB
zo9)|EmlK`_tmx@1-MOvA=^rY13Zhb)Pm;FqT%f7MPr5Vw?}@vWKZXrK8G_trCLJ$e
zFC(S=CAAx?4wi;xlI2*IK%!9tI3jXYMe$j1*Jn^SD{<fS^zR((=>=Ov5<;xQ@d82l
zKnP2ITiG1V=))VW7WDJft;BL@y8IPix#3AQQOl76%{7pH9u<!Z>f51(9&a6**>Vq`
zkvR>EBQNC;>ey*FbW(BEr?>k*MPq!=UEDOu9Y>$a+dS+u3vQ{)zt4+9&c|fO@dg`<
zlbNx&pjK>Q%jX?*Q)EIKHsnXwH(xZqFFn0yCF$WR|5)R#XWOOwLc;S}6NplPC8Xn>
z7-Yy||2H>Bu8jpbd_a?)b7oehIhRvOWoK#Gc;fM=TZm#V4~HLSAb}S7cYFn+&+Ob3
z9z^L%R%Z*9I@~rZbF;H5Iy#<5MJd1-HeZ}H&w-xjStBqq-rU@5#+LSPWK*ZP>enyR
z-XuVDKj}1C15^0%vP`_d0RGkE%df8)+3x$mnuz0B<6^e!hWnUxrJ79C^A&u60YDi5
zqpTMN#e_sez-;;22ifGp*`v~k*I}v{FJK@Y^!N3`*Y{|qXf|dCTIBD;PK@t9dyWM#
z@oi-_YFaOXUN~!E{Ofib4Fv^4x_Gvj$Dsk3GaVxJwEq6R@Cq!R`1^Ymfa$nj?`Ms~
zXR_uekizXkfOvyV%wr*!41f0cQ3g#j4C7iuG))Af>S4PCL>VbS)SJL*cJre4P4VJE
zQ}DuO2EyvJQ0mSTd`q0$B}Ru_V1wflF5*6gY{AU!rl?1f&HDi77i#@mTj$)i)|oa*
zk)+!JHt&PPf)al6Ds@rpiSu)I&x`BwzzG6jNRz{2gsx{(x-UgILoP>fOs;Rv$%BA4
zu#fKs8gp12DKtflQ^MU69}^DCOu>~HF)r(xgh-g!IF*1ZqEdY{CdwYtsUgN|d&|y!
zYI}S<igdGLtQ2d_+2qWORp}Xh>|i$pz?}-FM8aYbYIL9N{!*h~+g>xI34qZt8Uh;R
zV@+#6$_mplmI(K7Ik>-Lf*p!f#~LPbb{M4Kt6O$oR)u`7SCY&=dUe{IByVJci)8US
zZ>|_LI!^Lanr!u*H^S9MC!^#Yq2P$l*hH~^K=U3j)*E;8;U)H(^Hm&YNzyk&$H-^s
zM~k$Lz?hqwjz;tTjKGMzOjOR3*a$!<O{*UMQ;B(Ibdp)0d}|)t2U}A#O_j98h|g~Y
znNUcV9euQhkS)v77eua+-4X`En{c!LGW624g3P-JwXpnmP7dLq8zKO+cg{Vmze80M
znTV;H$jyYdkz9!u8_oqe(Jfzmln%onmst114{5b;o=BOW@(K%9yR(OJVR6r>4rvot
ziA@^{A4^dkB5}UDJ%;jChuz<m@1a!DO&B(Wn6-Vz^^2%6z7UoEMSsNolcC3O3`_+{
z*6E&4v1bJ`{EhLsJWZg+gvZ4XlAQ{<!q{b{*6>8Pb)4E|EYI2C06tl^&qJYV*dK|b
zW15(N^04ewL#;K8+P_;|YDZOdLJKzkk-`0iKOCcIn^nyPhc5hAKePy)o18K8=UPg0
zwQSkA0$1jbXYnS`$aW2x;e!)_cQmHIGUoGV(@zi2;333QK!GYaOU?Elcm@k0jLmcp
zVe%S@*9qcC*mbd0=A9tx{c`+{toe+uDr&i#_kS>NU7}^4!;jU7D30v~w?$(IOO0q{
z$z-H%w%X{Nbv%mX#h#s2rFCs>EmV$H`&8%1X?ymjIRE5HFpq`moo3}`B4kls5Awp=
z2nH<xGx={%)$eW9)ph`301###H`7w@SIqBMSEJ0E06NwTzOBsq?3(?L*g8AA-|)Oh
z0pAp%c^`^l`9Qe-7eQJDsI+z0%}#*Tuvu;LeqP-aJgKM$$R>RtsMvX*=!l>LzuE)J
z9bmqr(#Nn=)#JFd7(gkibzRq;OI0l=vjqT-0(=WNeYR`|{|+#jUbR`O*-p2rdb}wX
z>a<;Nvlz^#9Mh%62K_X|58hT$5NI?WiUydR#(yvabJqp<nSZkVaX%x5$S6Z=+1dpF
z836JSm=u8z*qW9i6L^2QW(GeP0mER}bz8Cdo^0Z9*V9Twv)=!k1@JfKA^c(lyXsI8
zPB3=bJ5>vZX9D4mQ(r%ZJDr|tgAY9ctg8#q;C(o#2}0T`1M?SvaEZ4<i9h4Pitv_W
zff#xG)a;W#g_Ix~z$9lyVAdFXlKEh+Hs58hizsX`TCkW74=sB>EQ^0!`@2<@uzyMr
z+TG_CPq!Yu=_gCfA5gf~`2$HX-x9SFq5r3R)jxMp&_Uz|tFq5IjUu>(E!BbHRFOP)
zHq;;#S!Yuw39%a)UM5tBsu0vJcsz%EJs1hpI@>}h5&!sckg)-?Oy8jwhBz&9E=#1S
z5MEv82@;78c{l?r)I{?A&>@;c)<BKw2z4jg+TZN9bi&Hh-XKT2mn}~sqM0paKCI_s
zjdw4RQQe}m0lX{?Mwm1wOOe&g!Kb!g`)zG2$Uz_Qf|YD}KWmDXfXwKSHQUMID;>jW
z8&r`sLtLy~_yY4a$j0Lh?1%j&*R+v}D;@Ldqj@61Kn(&<a)oS!J=y&>O2F{5zMreX
z5M5NtUo{xTAkYC8S1N?dkw>QudM_hcUTYlNlaOfyE!^WMJT;$-{`dz2WC**DsHnCI
zV^!unXs?caBZkif^}swi#IFxiCPa@f-}T5yiuL|`(;P`^;0kP0sA%`fbXZV&`1egk
zv0wd2-_}CNAl=Q3oA`;B>ri;V5A~a-TYd1$4Oe$X{`07Z$OvB(dA+}t?|y1k=*`jp
zZLsg9LDqwq4?B~l1dHzWk`=uh#(>kbdn+|=l7a_6Ctsp0^HDh(0wa{fdJB6=;$15^
z&{3`*)IPOLYq#736P@Ka(R!tl%-hBWjV4JErxpUsqjIwUE7nQ%SficyuaWH!WFL9w
z+xGMH8s70E#l7~I6eFr}AEUghKTxIz<oDb#qnK;j%3|M4%SRR$3^<`)O<jdtC(+bD
z@LJFbRjjDRN;`}Nrl?4HJQ#Q{CGRJy1Yz(==jAwDP8<6pwYBG<7M*5N$5Y_6jW38M
zTdheD&J{P4<yJaI;L#+MHz;*JscEnwwD*SS=Yso|lIh1~FCBu|ZFs(8Z!@UE)PQ~#
zvp|go1YV`4hGk$IC#JB3Vw#b(A7UmN!(KZv3&aLg7wCut!fW>w7@sd9_~mjwawzS5
znIe=b&QLn#{fUT3Zd7DY;w4039<5bF8RURvlMiANp^L_74cpMLcwkD$_Se0&-h-B~
zhHrYIUx4;rQ7}7Sf)%05xwtqyw7Ll8+b*2TG?6{1*0wQXJ+NMXai<|w_|6vgp3})4
zQ(s<k2lg$uR4^0t_8^_fn&2-^Adme?4ULJAKuMop0`EX3ETIQOmLIAYa-z12!;E)O
zP74Ny^V5^V^Z3iL;fvy9LZ*dGOkFz#ce>7dye$<g@MXz=6b?l=zaV?%^`6*hw9-^-
zx1855SEj3u4OtlRyq52RN8nwct3rUoQ`NokpEXFfdoV~XsO&yIKYs)An)iPI=G*f{
z4nU6rDHQMq?HxdfY%DBVpU>L?CN~E_aJ#aFeBS_w@E`mD*iV@)*O}BV!E-_X;{$!~
z>T@7~`AS7v>!ljQ+vEBF@Q@=6)c`-m`wzxxvRMVc0fz+89nxRjh;v^OXuh@qh(xhm
zS{;zK5!tqE%j!QVJ3HS3L|LHK)uWe&Mv6sxsp<&FSJMq3Py@uu<sM*c+%PL&UQ$9%
zX?lKkMuLwYDp>%`6;0IvTh*riA_x-?9ln3nWingkf?YpPZ7nZ9Ni)lS1b}b*nJ*tr
z*a${f?2G_?ZfkE(<4uy0pu{Lr)z{z5a^2AS_N^1(1I#&&pTP(2$|C@Q>c&qxCue46
zCYTd5U{tDFmY8_)41n5ICxbhe!mP!O6Q=daChpmbi;H++;sGb^?f+^Aeo}=~GUD&2
zOSKyM-i;T};?zZJZU4zj4<>W;)zoeW2e$MvQE3E_Lwo#CpxqQ<c~a}GO!pqsmp`(I
zz`O;@c!J?#VOh~z9xVp6+2LRb(gij(v>(CMT9iu<^N6RYLT=TE`^E6uR%f0D5fLgZ
zXJYN-5O<9Wg3pKpScr|)MHcXM4RogyKJ|@#vJ&@2T7P%i+YRC`5*koO<9x_00^!_T
zyEYe^MW)JoFw{YL#-I6`QGssF&<#qi-B;ERl){u<I3RLA;yN9;sK`59G5tmx*e^FG
zU-&?Q@FPLwWkhDw<uL5E13K9^o~vH)yUmap5adhS6LzSifna&Rsnstq)6+3o)Z$x3
zy@VrZuo{SKjE4AZ62Tv}xtkkpR|Q;Tp#0%HJfKS`vu>{3nNy(h^2!1dNe_>{NKg_-
zp>KU3fiIEy7?f+1;nH|^$~9)x{RzfRMCXAC>CA)4DocK)H9#O%V(v#0DZe)=dheuX
z-4?G2!jL_*8JtNNKg1nBnoNz6J{`&(#Q4g+_rQu7@&kDQ=HZ2!jvk{Qf+*3nsJk;Y
zrg!-#i)hzShyb)Z2>nN}g02H-fz+${(CHW5-;UY03{vqR#RfEzaCp91|I1Lxc>`yn
zlc)Mt2VY%mPQ*uuhIB9n1|f%ec;O~C3~nmq_^?56%&s!!-)&NLoXb#xud7<6#MmBO
zZG>{UZ&f~^B?J&;TbDtG1r;tQ>Syi-o!zc&{>`~jvPOAn@%+`KcQ@ZjXnhSeRX<>h
zkA}%HSh6a!d>df9yp^xN>F5|EEYuh=h@rl;yLC&gLE_Vegl#jNV#LW&8WC0l0WkK#
z0Id)AmY*ZPKwlO2)KvsszqVu+&BckjVRTGe3)EfK@`|-&Yk{V1Wn;cDCdjV&JQk3q
ziIa*D4hjYlWV+EgJdc6}hPpZ!mD2D86bKea?~ahhM)TtkR>!6EuFi83J?$jQxIVhP
z_O;U0duwsHWMj~68ojyH>wRYTbf?jqHMaI)b5Q$!!E4Nx@N@U;!t2R=R`ISalfr^C
z5wno(t5k*YM!>KXal2v2<stohGy4@;ne_Y}zRnv(mtn)WF8Zs%U>Q`m;^(n00p@e_
z<4$rCUyYtqZ7e`O%C&OZ<{L)t(#thQCnpY{eXUdy2;OEVG@Lzek0x$wrR!Fgp0GEk
zraTRuJtFBgEepJVjPVG2I_+k~q6NUVPZjj1?i!3uPsvyaGnn_t)WVYI&gVInZQq68
zYV8sD)Z6G}A0A2Rs3u8mBYzeYWM1*8`Iu*EY#)sBqw4UucOg`tU<aPuV*vM#QD>27
zw<W)s?5s&fa@lbEF7;%+BlOA*>zCB4mJ>XaS@eTZ?z*N#kej#yk&9%A!m@nr!mVL=
zD79<}8#1Anl^aw;f_L9GVfU4|FX_5yQBQ4wA@uWpx1j2Tlx_-V^q0g_G<?(kTFSUQ
z4ee>#?y^`jl&;N4YLrp_-ab?5;UE5#zv3a0K)FfE;EN2S6h`XnY?7#s^FH5=DKA$0
zS)$8la-LCtAp{E3iA?u@ki5^+k$<rH`C6-ujSY}vZgjYJ0iigBSsxfGoISc(Yjs?#
z)JYlLLsc~pxS0}L1ZwKvA9G#+nw`zjfacRrMHw7K7!Y8$(7fSsqGguF>3+3qcn>=}
zE>1Z;H^<@g@^HB`q@k{^=f0Qp?;|gd*Ca#kxAPhRY@f5?vTCz46e9$Xw1R-FTrAfg
zoan#vv2OxDUv08!KQ1c=Tn+#yfNTJzK3O))$>O%#C`!?5^Llik+dB}vZw2t*fBc<^
zi3tEB9s?3Esv^&80`OZs1px97@P}kx_wAc7_W!;RkQY6$m%G6xcK`3EHvmorxY%ju
z^;v-Z4`--4ytwEBl68RY^a06~{__F=yhngk9uS&;l=KJyEdqGs-##?oGb??aX500D
zE&&fcXiWNGe#|iIP4Mf1(EDQ`nqZ4zU$8_3uJ@@inGS%JZFG9tweH~-shXJ`7{>^3
zxt^~fF?ZA`(!Ky?fH!bnfR%*Tv;<xjP(Kuf1H+3&>G%8>fECFmAPs;(X(=gP@`-==
z2|>0JI5XBmFBTX!jrIvPP?4_aDhM+1BEZ`eO&dbSV)Ghbv4YbHVg;f+U?E$53e<47
z9&qwJ@~I`d4!;7Sg5M3MrhW70FMNT~#VPIDgU6LQ!ESAl$5R|~_>oy{Pw+(_Gp|gc
zmC9wh+krZFeWmgCYdPGA5kLOx0;&C|i>nxUj@q<Cxn)P^BN+^hEspQ;0IZngea80h
z*Lyjoo2X?p)|mrk3Tn__y^UsND)gFjJtfv<drZ^S^419r0*AkAw@S)X69wLWO|=2l
zNoWIOBWmedVsLNSmktcPVr`R#eb?>a=cx9KK{di)MyAT(L5PAs8@=Gv9@Up}oQtJ}
z>9)@GP${TurkG{pnyc*ttuikrv|m`i<~uq#n@k;76Q81*l+dwLpQtoK!FBDdS;X9C
z>L$Ccn9ExtD|-PDF6)r(qE`A3VCJQuyQGN&r0jz|91pKucmgb&&zH?zeh_T0hp8k;
z>T`44$z-~!V$od4$g!k~KG6bbfXW_-)S_|#O>I>$tqtT<I48s6;zEYi&m_FHXwi|7
zzG-E_dS$d4GulbR;=`fBrE!k}=HGguZ%(t(L%3x`4Dp--`@vnKlS!^j86-J6=ikGV
zO~g+<yD_3MeuFjehh>T!;9s0#sJ&k|-XF)gR!6d$oOEp}v2I=m(;t3E9Fy`^%f8p5
zpAAR6F3ure8JzP8xX;A5?FR>Hdy$38dPLFUpW<jjEE#8XVek}mp3VJMAvm?D&P9wb
zD*Y7NM{nvr;<I`>7}CVF+Tb{=x${E@4^9Hbt<OaO6jLKR;Zt>bGb?Y+DBBsw^V(;a
z3;O1l{|crK-tS3`VQTR+yq06jjhKK3d9E<KotiTrAOtIe<!i1QFe|)Jl&xz<=u^Bm
z=ZKBFa+ALA)g@~q++87#f&VrdkueH`y{50*uioJBL8{SjC9fsLNPc$`8e~H8x0@3N
zD9z+tchB|SXBdghcl)&ivrr(8E8ffa>V0!{$kT{y2>yd(2o`J<r-*h$+mTrqkf8J3
zWj*iv+E$g|3#qN@QLlko4JHZH%`=2Aft4w(M#ob5@FFSbYn5novEvx>gZpJj2j8+8
z8K`gkQbnsVGbUU8;IIeI3!bM$WG-$H?f?VX-9tQv|ICW}fT8l6hK0&wm2$8;Sb6uE
z@v6#mSPHx38s=H*v#XuH^!sPrjONj|RmWE$+tW5{btf(H!RMNFDOB`dGGOZ$r_L7d
z<L;E%u<M{gp9aeZXpxZ(%C{@DCuQTs#-m*cj5IfTmk-V384OI(1E$UCXq_b&6$EZW
znGd{G(@peut><T)qf`teo-e2elgzDMsY|{C4a0SU*GW>8vL*+fo9A9dBert}MwIyR
zV--vFBQL71u1Q&ROOJz>t*(hh>d@cVnD45>b)~W#_wn%-c(IyS?mObWb6fBKEQguE
zM(=_gI1Dvf+KcRZIKDDiqVS6Nt$Ts7XDA+oK@2nA>WpNce;BH1{i*yVj*7rbO~~Kv
z!=ZVN$`qkH>G;i#+{iloM%C{8D^2{wiNMPN3SAAb;0w48%*@WRt7;TACM!>5a07N3
z82JXk`1bY0XEb4beSN?sqfD)w)ca-{Fs5LQlg|7xNZ_BO2pBa1f$%S2TBPT;9R$~Q
z>+It4_BTYR<Ka)l!FWcq@epcdqluPEv3wSv^Dz+hSC*F_kAT3O7eadF?K=>TbL8~Y
z&5y2RAVm5C*vT~5Z}<Ll*Jy0yO!)l**vh<5xPl#Lzj8Y}IaTPk*5&8_0l;N}vxeDo
zfZzv~sXc*1030`PxVGJRS-`*~{hyRHZ~8p{a|b}cqKPEpK5yTAJqL8+68L<3MMXtH
z!46;;Q&Lt2P{Kc0rkia%0j>G$dj0|c@;`tm4tRYe9e=86XuBLF>bh&JUT*Rp=Ui;5
z@&w>zUEngU7AokNne}UpB($`au@v}!0IngBEr7VuGcW+W^FKqN0zBY;{G;zT-{0;v
zzB$kTQ?;Yn;(t{Z7;rSP>$n^QIPDBO@Z;*{n~S%1=fV?UtD$9Ob@I;y$@jV0xBci>
z#v33njg5`odkp&SK=<k`%FfFeFg9Q}9m7`=Y6Pkd&x=l1;N$_<s%kNLQN>85F+qQS
zFvUif`ynj{RF+(L;|Xq|>7+)_TOF)b%>@IxmlLc{P`Vb~ctvU=Z0hp8fazV*q&3?W
zvQp$sH{8=Ed;Oh2rPebT4qXKT`+>$o?DTV@k-%X##mEW6uT1{O$8$W7cF)=N_EDF!
z&k*Sdg7p@X>`4Tp8|-->cJ4es>sgNKWVsidji)P+=ECo|bj-*Y&_K=b%gG`^VzrB6
zi~q{kC5mrc$eyg=M3fVxBCBrr^~uKTDu~2AX|>Mqez39fg;1b5Oq!4yrowzq{v&oS
z{>XJM4u8tmvo=!MWz`hM(YdvETA_|6E=%_w)(e9h97_3$^KZp+6*^dXE-Ze6kO?z_
zpvmi>b*T`QTF{0aPGH5qNfWHOTbL`HKgnk?2}SWmN2YRLQ~N&n@Wx-9pRVZ`z}zKW
zdFLKt6y$jy_8c;`;iryYJAOwgli!5KKjfNsTw9(q1FAxUh%u}VfwHE%rpZ%{T66xt
zMBm3Z{D$JCj+`OM>E*ewpV}YLg1P5aRB{vG0|K$S-!Ni?#;cl(tM#7+L0nW)T(9T0
z@3Zfo2Nn^)M%c{Nm6h<}cGBsa%Cp!0y=AQOs~ZqK9$lIn;@LLie2s>`ds%-kS!vEF
zUZO{Jb09)mV@CG;p{?{ftKH8Zy*4z!=dkhmbud>HBN*=k_B2zC!7VKkmUOMKe2d3A
zkT2(-*?}vML#~(u?~z05{4vC1aZ@~iJ?mk9sENB@fic3sfT>p|G^pedsn0{Q%*}n$
z#aJ8zAFwwlg9Cm~!BnFe(8-Oj1TuVq2q%YhEz5_4h9Omhh7X_!-%x{g3!uhe4W&@x
zUylu3$3!S7F=7}Pg9M5Akry-wiT4LiQi>|a)^_E>E&@ZWTCq5p9)o^=$Qkiq3(N_M
zrnJf7hqpQJleY{cu7Y`1x=Ii8r|Q1&=ikzKo|+)7%;<r0VHh@zXGZQDrwGep$#`}Z
z`*X>cicC#snA+NJp!PN%3ETW8)E=Jn_^IPUzaQL|+jP1$=E8x#Phau~vinskf28te
zuUpIRlhn9}`|RWW=f_!!C*RN%sk8Gn7ONe2Mo`RCb@+&M98r;Xr5^8oq2*%X6bceL
z#xrNYH9*r!vZ{mLb0+c%N4)<#dx6GF)XkWs?;ARae4fJEcv<hPJM4W#RHc(yTC*;i
zkout;WkR?-L%Dky)p4o6_oWgVBEqM#%q5GD9AnwaOuZHw!(F63uxdYBJg0&bp}<#O
zn^dW$2IhZZJqC%C4YZPzWQxwg&*y+U2i*ryQDWlN82cn61H<Q~Z=&#v&i~pk0&fpH
z|FYORlTlnoP4t$PJ%`qRDK+5V130%l(|k>LTF^2>ajd)MzW`c2?|_TPZ9kU64Pb}m
z>IxOB3^<8h?`PP(J#W72X=z!c@Bg!-12W=;xw+lzM={bDyq^wmQ$Pa*Fkwlm_SyrQ
z-km@gcfHsk(d5qp8bW|U7z7Jor&K?|H+uhi0GOry>ndICjWRGWV1@Mqh7@?a)FlbR
z-#+{V7RW>zp$nBue<ytGdbt_}zKJ=RSzi#?oO4D2C{X}+5hPqj9-tdfBcI9Ra=P>}
z$E&XO-|NwH^LZnc)p&wuClVmdo(28>`G$2|PdI+N|L1)7f9N{vsHmd0?GHUeISe3;
z1B{X?9TL(E-7QKB(yf$q2*Q9gqO>$fsdP#U(j_6?0s_+C=K0pU-t|3eedjNi%$&2&
zoPBoO*LDAH03&(`&KOC~3X1gs^=%t0uK_X*0E_;CdByv9>iJCV4Oz=|rI)kpYj$BF
zJ7aA5lWzxrne`IvFt~2l|9-<MaS1*P;6CmqUR^N^fT1G<?$R$4yU*hn)idAy8?2W)
zfd33#Y?8E$Mf~6^{;vC~1}_Jag8IJoGSp1g!qdc|dd}nCsjh{)te(!BTZ9>kdV0~i
zkp|Td9P($~c{Ld?2RYK{Yv}r;i_%|}{dLdhQurREvlA&_H$338lB$BT(h|eFO(TG(
zcFLXZZH?mfKBiwP;$;+iU}&=8lG`wTDjQv-%`Y{{j;TNkAstUwyd#8^N**w#Q-2&1
zUi@->S^r^|Q5<DBoPcIkJ8#)weLuZy=Ef!Hze|4@+wA{u%156)82_X@mbQb@efLk9
z6<o7FlB?Nz>VlFJACYhT$!;V2K!-nGxYK&(i|cat^vQd%2Xz}C_U31r>fgTnXKNU~
zPa!WguI^yq`jgOzzfnIbwNE{AkT&^)fnDAJA>Um&T<N4d@}tj|yb_n8Bo>v1l)`#s
zfRmw}Ms88Glzr<$R|CsBRZ~3YEMz?%t(!_7ON&RekDB>u)ZTBztVI@Ug!gRmvgWvu
z5tY?nZOu$u`j;B6C1Pc@Z@G;5Mk<m9&F1C%EgTumWXY9igQi|t{yy;HO#0q0bGjpp
zry`L#osRl<tAn#gcJ{n!zVF8EaAi%$6^5;((VD(diV?U|V{tkZYPMKOF-afAkAf1o
zVnqnVxihgop@yqCOYk9@ql1-BiV5lwQ>xrc=e=@|T6tV#mvU7izdM!{xz*@Jlb50|
zB0gZhSD}7(eiCL-0uW{5?kSwgM$aGErU^?l=7_Jo@-B$YqUc7I&@Az-l>dw=j4mzi
z`09>LBTAYnzvQP@@~xIJ09IwFhSrwn6)ChyERg7D658wzb=%_LKp1<ixbCu<choY`
z7Hw}VX_2i#McY!~RTx@f<jXj#&9smT5@l%>sq8aIF^TQEP7q5zUS?C}V&kF2`$!$3
z7zfA#wVGeHi*?pr=ImFAEE=N`>f0X|s$6rm8OrT?1|xe7o~{s^rC1L1{IIamo;*s_
z{ki_6J(|M~g0EX>jcpjA#@Bqmw<tbNDM&Liwdyx#afWSB$lHU$-{kvmHh79v+6q7M
zy|+=W`G5>1Q#exDzvF2g#O{FQ*wUl7ot`+W5<<Ocoi<<Hm!9KED2Z3oANe|eE>#W1
zT##aK;*>BUU-J9=eJQ<6tFWVIsX9?i_vL+=%WS(-H2KuHi@5D#4&NH}^X4jIMmoN@
z8$PmT#p)<5y6fs@XdT8Ls~?=KnL_9blcQ>iA0r~4m^`+Wp>?`?r9cvKA6FULrz5~V
z878tF^f@(hFXJ*U!BDW=*}PZj`K7<H{FB6FOC<!+tep0X{Fo~!`A_+8)m#~UlSkOr
z_<YuKPK6!z%JKoev{ix80o9fscN<HeaurkZ;nE<iY1Ev#H8CsQX~cA6;|(fxcj3Vs
zfA3qq;EQ_{)Ngv+zfPrPo_03};VmXHq`{~xOc|gVnIv+oTGK-l#?PRmPChTqv17=<
zSO>L4J_v=iNV_&%V2mA$y9-gj$Ha1~scSR@<$S3&HZ61GkuNH2KDcRnczZf8xkO6e
zvud_*X}8oJn;U;kEs;t8*-G2Q_lZTXVC><-GbW~TA|mq-x@5%>PpSBPp!fx_&x(!?
zv&yF~#q)2hII?5*&*n9p(1BKIt01_uE&Kxny#ZiV&DxXxc~AV^J>NCaa6BE_u!PF2
ztgO$UKiAj${0E&@=3sBnuR6jV4XBGe7T@~;8xJV1#roxd0p_`2Nmb!}v<aA=+ko?!
z0>*E!Q-~k179TwHdrbQlOet<>e>YBjuK-$04e&tkz552l&QLhuj<x10zLS64_Ru*5
zl?_OFSTFZGshW>UomLJ{0k93&JL-yyIls0XisdDkQ7b1u@Vmht)@_vm1Y~th4Hny!
z+gs@c7aA0B@h)6YUnHPWMn*<AM7CM?5!L4z9^=t8d}^XQ)jAxMva6)flJ9E5iy=VX
z0HzM$7is>}Mnry3<sX;?t*GSzsuJiE$J<jDGsiT~-|zn&QNISe6ni^65Ck&8xLMzp
z9vvG4^1)0kj&YIOptw+~3Mnx$uFKR7V)Y3gptXvft>*xElo;50%*C@#(=$MeB3V?e
zMJD#Af<-@LvunhwI3s9LLz1TErfSMO-S@B*18Ddiu<qv<AQ8?jy5taw6TkFe6TJtu
z83x$+QuJRRq9q@NX{)y*<XvQvsok-EpOXr@W>f2bC!n2%AqG?jwAWPpwp+VA9UVz<
zKSXGyj%M>w4fxJP8;E6#K$jKyR%<T&rrr%+?4(LRc-{YDQ#GiFu4wI}=Rq{NwXmi_
zFi(?GtQ`{^_P`Jlk@E9zmZ96~;H$rZU4mu53WA)w!runiq|JWazOW)0u;|A0!<Xh_
zvX}T?NJ+7Ung+Zmc`KyT^=2-(Qze@^(Brnvqn3)E5~`z`%A{ebm)zUMX4snnM)=$e
z(p1W($_D47^c1&*aAF1QBMh0}y4SH81?q+0<GG)!#C{z6@;8LMuPd5PG+j~)T_D8i
z>Vj8tAdAy^68ef)iX5Jfc{7aRHW=$kJ5+C|NJ$Om#@my|3o0;C<j<gP#3o}pj)m^Z
zn#ICRwn%=)bEKy#5wtyw>>yKZDCxg?c5U~FESLVnmU~;8P>t&k^qib@LkzPk9`hXm
z>n!4U+yI!T<r5D-<n7y2+NG@n+@EZUJyJFv)xH<!5qI<5gnQ<N2=zFRgo(jc2qfNf
zVGJq4MA+ySv2oD1yS`6Zto7U5Gcm7PK_pvY0q3c>ylyUi*d6EFxe$&0PYc~mOe~K-
zBbf;b$xCCi;v-Ojw6=>Wqu4=6gr-i0ihkC;im~iSVK3>RNW3_-#jNi>jZQogrv>%s
zxmFMiAIi=@C{!-5`?JlC_`W^*J$=YmwJetphnhMBYY4EG9<`7PJ`i>KA1{E(GRhSd
z#~qg%b*-yZOBO|&O(d5~&3G<*3e(YW){(|1zuoaTCIwC)&bB9PP0DQ-QXZMDi6zlS
zKoZ>{i~7z&8eoqRXe$sX^<Pb>+qi2+$Lxw5kb0}I@Czn|c`niwH%LE<+|K-vjkdam
zkM3G;bU`kPR_qO$nM`U`*0;!}Kbcx*S1!ujIp`m5QINJkVbYu)`9}pFDO5nWy)W(<
zl(d6BSey1eo~1G~nZi!w0eSb3M0yUJrfdLQ$~x&(N(X^vQIZMfo$jvgel3GX2E!Jx
zIGtSIpb+`6^p!8d!>YH4T06_*ba9)GjOo2y=mjTvu*(}01%VWk=mQQYEKoQ_ssp#E
zl`b{0@n=e*lnN*0#`*z<DmC$=Q2+yco85Te*IZRKQdx31_!zh=KppMviv@tDyIBB&
z$aoxD*6zj-0zM1Y&o?+?zzTg%_27xH!*un+9Kg4M_2&JLgMe2s<J5c!LKeeD*QbN8
z9bddKs<c$(x_#((4X)U@Q#;diHdg-BbIPhO`62L#&49rcFev9ICtp@@fl~u>n#z0O
z&mdKP?MDU#;C=zAGOy+XuT?JqHiE={AUrzrkbNsVzllx&8|PKGf&?#UL;O371!Mlv
z_Vt!8)2$b=fI<hN4hzCRKqT1_Kw4Qpy!?HGoIPFp_)+asIAh3@Xpy$)R`g+*nY+6?
zu2feU07-vo+;O<PyaWcgwHuN(2v7?N39Y%d4`>F>;NW#E(tr97$rHc|>sFGCzem%!
zbw|m_XT9tYAW7G}olM93kN$!a`xYLtKh@W_-SKWM{#`;NnJ1EQnrlwD>?hZ+Cy(5R
z0{X*j`-RQEgVBbbYiIa9SBB!p(<aL2O-`Myhc5k0lE`VHQhF`CVa!rKh{9Omeq5g*
zHnYKjhkY57dM;-EjQAmTPgWZ@*JXA!@wwn~iafE$)$~M8DOq}q<oEPu1RJUR`EyA~
zdl&&T?>c;sEYR=M5?O?4j7FJ|rLsR}4G}5C@=K~e*r}8}CU-a-V$m&mzhb^x-84jV
z>(A+0StaRNU3Pm@K-eU-$;D=p03p+n$o;ov0LTAPx5;^5Gqrb$MJ5s5g-Xcc8hZEj
z<sa5vA2P`KH7e#`TwE8a`9C5@Hiex25T+xEVH~_jAxD)*ZzdyfCLCq%S}uQ-kAqKN
z8^#|eIz5gPT`+7lIcQN5ei_vDkk~JT;24AXB8UiCBq06ipT6r9z~a0fYnuk*Hc)<c
zkZVBS`3f!xCZ*Wd7fUb2{Cn;u=W4Ncxum71f3cs6?_2RdOrHnf%NzD)V5Mz;3J;Jm
z!1IJWnGhhBu`>Z`0>Cvofqpgf<pxNLR{@+zfLl$_<N;W$*WZPjKD(|cF?E1#6d>5f
z?f^*!1don^Gk9xjE8P1KuM3LZz2bVqO9m9w6Ck5J&QK!=L*Kwq-%xTUXaS3o(H4kL
zfj*o7WcVFFKwp_Z{BYvCsr@lc?A5`|s|A8(WWm#UKu7nLxRX0OF`+=vf26Ebko6cy
z^<qb(+TcFLzzYbpl2?8AzxV<qw4JSOs;HME{Jlhk)}1Hx%RqC{(boqsLw^AJEhs7B
zq96npTUl8F97PaEQ@Gja*_Aie78PxRfF00j{GBUSg)JpN&dtr;?0kWMC9#qFPX3EI
z5$u1DS9WBZrP9(dOfmh1V@X!3>4NtoK1rM<);ioGCY}Y<NU#kY`J{e>v<7$Y1p<DF
ziHS#`5nw0)vB1T)P@m)NQ($?x0bT>p@Mj>5#flwt0}*VkKlQLs-LLmhj9E)b9B>R{
zCC=8t5aE4wy2koWu@v|siosLySxtT3^mkYpxK(e~9Y8%IX=m%qlU{h<*#LreH+$Wh
z8V^9Z1w2x(@3D8T0Wo~W<^4OgN48*u0w7cM%pRjgZx7};HbW=8O-6Pr6aq+Z`r{>r
zRc>oPz;2(erKKeR{u>M%{tJNwY7(c*z!EnH2*qG!#qffUQg~l3M-u`bu)D6VE>HzC
zDSNG}z;N(st1j8ZGZmh#!G<dlCgocu0D_r@Ro39L(h-un1ju(i?_GP@Z&14Jw;qdu
zKY|>rn}^w#bicTyq}ss;w7OWY6d`>>9)=v8HjEpw>$-Z-=hr<N^5y#H1(uXxIP<y&
zZ2O$DvR#iT1@!-Wbki+%vsF01{9yxVt^F&>&*o-ke82!@R~oqvjJ)60v!lTOgWoi5
zaP7vgd!y9#t+?)aPt=z+9P}0y6@B{Py;=P1aua-40(35wd*r$8{h-slV^uFJDk8kt
zhHwGIKZuHg#|m2OGf_Wx1hAmB&CRb@dK1Z6)wm3Ak!hk38|&-P{(PWK-ZZ#*iDA#i
z))tG~;(PQ%zYXOZnX~ZI2P1ubA+xrSLElxrBrCje<3R5RiAsJq`08f=%l||_4i13!
z*jQHP*d9i5(=vdBzrlds@QeWx{@b@#tNp1`&S#mf5#28D!4%O1%rwmhy_~`8dw~+g
zAf63w`g9n07eO10*#$f{M{h0$%rn>(zNZH_QxI5qe+7|fz|N7Alf$H1@}6eh@<X$G
z0E;srQ+&@dP*&+=;XdH~@)^_B2dn;9fR78JE5JDe#tfsay1GX*wZV0S%u$IY!(4qu
z#l>{v5rBZI4ra{18zVrv1SGb(lz@=`IvUhsQn0uF(1?0&->9@Vl?uT5e%iQ$6+gHF
z<b|Exjb00QyTF{Sz#O;0Edc?)?ge;FHMF%^PHxTdf&O!S`tcgXXSfQ(K}WxHp<`%x
zdc$Z16nB1N3()O0-0R$vmxOP&C(nPpX9o~W5H$kPoupYthAzWedqZvQ5kQT%rIz>e
zyIk&h7YYYr^EL1*(fi_80JQ)Ri>-jEGT&A$jo<cU_t%E|G<P)U(SBeE1^tE7VFq-H
z{~&9|Yj--MsBUaerA0;9+~6e#>~m04vzPOe`bWlsFOZb{Z-5$leYJNz*W_2f_6q0#
zpxgqXyYX|Nb^xq+W&$CHWp4tzs;X+M3Fs%^fV2%FaYGXm$!HV^zfpt~-axzoS*Nb9
z4$RwMU6#te{(kfQ`*+Z5%U@q^1^Ve}_}oa?_f%5g31`uKKR}nG5ptZ>=#rnQO}=mJ
z*#=_;8!)Yoe$0OzIc<-n0>S3Lsj0SB&yK++PHA3WM+flF(d+_$&sFYKnYIR*vBNb%
z+knL?g7plp>E?C_CV&W=lxbxp5!>Nh@S~DOML%t;-J#*LR>^oo89*(@Q_#@hyH4X<
z>+q$z8utYQ`wD1ukb4NcwXxMrIj{`e6+iguf*U`rI2dWG$6c?hqOdO~K)(r>ppvw*
zS^*2G6p^73yikr`iE5vcADBEU0eq-`0KTd`W}u~<;XbUGcc7=Md*7sv{~uT0dtsN~
zAu#eV-B8;!Dcs9He_H?RMqe{u)8pL^Fe@rX$hv0={cfdT%i+``T^$qt`=mLTM`g!j
zp(?ECd)O5}|24r#97D-Ai-gx?GQp~E$0#W)UCy;=Z!*(=9SZOMJo)xt*TG;c(li1o
z5&w0#s*rP3NdNZ{V=IlP_y3M#XmmG^YC#L(|IR!A_g^CUFiL~}x;_(A7k?HuC+feB
z<&=9mrlJ4!)`ifIgb<eee;RvWEp@$3{GV^o4~CS-{m;kzs0q#kQ&UfEtnZ?hYh0tF
z*vQF(aa$lzS>lDsABXGkw+(5ub8y^8{^KU$EOG>#|Kj|5ABSf<V?*9SVw}h>lY?m*
z%Bx(E1!hEy6Yw26SEx#tLokBm3JdyVnp<2giX4GBD96#{)}mD4&i~EzfW8ULu=M3V
zuLLTwqf+bZ>b$+ZV}8kjC^vW67WN;%XLgr^Kl*GcZ1cQ*5So%LhYcTMNXv7yDO}B;
zIo<7CklOja=w+dZkWCvKV&urPT-D0>gXTanL$RcWXpIOSE}6!D3<`l};*6>j!=>2A
zCPsv~F}h8Iay0}Y@SiqZaZ1$rFn9~{!7@?fug23Sfi7G^a2jcO^EjPHcXLgxv0+TD
zL&TB<_i-fQU~VV*OZu{jisC{*yp!LOH}%cE?4SUx>~n1dg#EbOCv3bVl_~1D5*HiC
zJ69*nbg1dx7;)Y$*BbaTrPk%;XQ(N&(9lGc&zG3zF7_uSUl?=#UYaqaDm}v?X4dn3
zu3#jE;2#>B`!HWc(DWMDO~4()zH->|P$Bg`aeZcPI*qrC=Dz0>RS{A>XJ&XDoTZwb
z&(06(AxI}*U0NC~h4^5+ZoB7vvU}g7C+lI9=_qq{t7b6Gknh(Fp|9FQSzqW3s{9$=
zvgCey?!=q!0fl#ao*dVDtTzQ}8A9RIQh!q;T8XX0==ZVV<S#W$gJ?0*_~l6J@e*YV
z0Ww9V;6WpT6}BrB8j6rx94Rr_@#%1O!Q;F2?m7$Gx%*?-UE$x&;oC0ZryXSPy!(*1
zR>h=qv5r=$43U$Y?6A#0-V*P>%`&B1|2-C;v|_nBd(?4+8SKRB6ib<n`Ot1b#XQWB
z+Sln2uyFgV&d1x`n2!Cr;`ljDTI$ih?kWbuqBwN&*3Yf+ciI<RK7C!?)cn&&h&zTL
z&mVX{UEWW`nWMYYl`>86G&D6`qB$!})0jU<lA>Gq`z?9%`i2>Be%{C1z={`vm5WFx
zp_mCr)s=SACHJa|AP^h9pj^nS>6xj<Uv8J1mL_6pNbZ0jP>s{8+>Cl9)pexu?iy^)
z`Gj1$b)>XjPW@qTpQbhgrELEjD7M7sTatFW>@P(!9+L3(SA<Z;#W`$!xm?+8E;;Wy
z+chzvXOo5gv~=%{1ZJeAghIEp9%mAkx%nN5lS#MO$Nx@t?7sMjoTSurFe7@3{{2gW
zMGZa?^J=9Z;XgaP1eQw$70xtfWnnTM>NX0krh|*K=3m#{%IeLdO*JN`n?s7X<TPIS
ztvY<laQ<z}`iWItgVr{SUWPdRRFOSWwrpX~aq!y*4t8>uT0`Ak;iMMEKld;}uGPki
zy&UW%adt_NkNZl=+xroaba#u{ay3I1^_+->Z@jcpjBsaW&8lqdNS&`;FBA4}WxFz0
z487tgL_t58=Y>`?%c4kvs}Y6O7>EpH!`(<<mw^l~Y23(;fml_hSw~5cJ(TSMlX6ay
z8U1LcF?(k}pfTg}aptx51`rhtQ?J<R*-xu23%jOiE7_Kkv{O%K!2Sq$9oI4V;pM%u
zF^ho5rs$?du-jIX-XrNvAj2XIv(4-4{n{|APuQ=e#7t-u7(v{wp<z<TGWaThcrZds
z%#4+k@4A0|_oU97u&n&n(;a&>hw?OKVQ{o?4ic9VKbIUfdK@P^+nd0Y)RHO2X)5H(
z_?Me9ZRpXe_ONpZ#wYF}lG-D@0g=jZi-zWNu8xsi>EYAev0o3#VT!yAJqewBWTFVJ
z6d$)=qII<#rKleFT&{No6{>UrOm9tomHPO2@*vT<EUCSgiB<-IPgS5~=%&5%e){Tz
z3N<qK8Lb?f(s1Z$lNwpKV&Md_vIaY)m|aI1g|A^66RtWe|6FE|At^P>t8rXj^gEL&
zRmDaN1ATVN+Sm&d+qB;##wxKN0!hD$|AC?sVtmjBWZK*MgYB!XEGSoMRdV8V89IJU
znOX8%VU1e@_+&7q2sR8Ri`l5oZoP_>;voKOOiKI0VMW8+_yrdnGD49JVdp@o3Tu)I
z>cq9A`LRz0Nh?!@ZMRCVV&{O6Hnog&jLDl|;(y;tQpYeIipMWhGb-QyX1o^%H%};B
zTBrXj$iA|r9T^(ZP>Aa$3(@LbTJT`A;y$^@WMEgW@v_YLb=H965j$R^tjE&{R9{Y*
zgJ`}jM_%QWy{+^@yq;41l)b*sbM;C$<<*t2?cCLFiVUyUUp8ZY9Pd`GWr#F=?ishr
zU##A7@B#ah-=F{ez8lrsTl{<0%=N_I4uKtTtae@gx^mA?qj5{D;c)iz^u`<)BX8Z3
zMQ2L%4xAgaqPoLSzUp<>DS!h_&{3Rb*BAe@oz^U_fJ>YGYu$G6@liPIiM?IAm(i;t
zvvR8+vkl%0k>?+G=0>ozgP+!S4r>Op&p)d0GM*Vv7WA3eEM)yLJ6F4Z+|@Lpqp2zM
zMfAQM-M`!rld;CQ*QY!4C9hYlfctu5q9GKW;3qKiEywTjyI#XNPURyvM|)Fq!~P`m
z`6OITuG?F?`hyJ>Yf~PjuO=AYzt`s4>`&2XEXZTJ3~If;HAL#A*L3)PcU_deczE_9
zAMOCM1^M^%-_OIyJ3G9aDBAwOL~VjO9#%mmxe836VlrGh<=)f1E!6T+YyY<=Wz`Eb
zmCs199`5eM)oC>7XuKZZa?6lCr8cAH>Eq1}`0PZT5XKbgjG6=P^i@}pzDlk>!muaS
zz@Z*0%|!b)DW9NeZ_QQ2y4K;mrBj!keZqsG>$Ag*K1cch;g9rL2c$853iPyObHu8~
z!(wCd&KMT4tw1|>m|9Z8?<2bU4^L|By2hUlg8FL@rM2q%MXaR!`l{bV2)y9QlXZy=
zAZAZQ3a!1!1N2YZG;qa_gG5ZsaS>g+%AsptitmRBB1|@5tIHevso_hl1JMJb=XWl<
zk{o7EzB*!wWs^p=S{mq?IIIkxg0;J1!)i^6EwVy(1n}?K&Cr<!o>ZHf?jD)k{O!cD
zu2-WHIe8^>f~C)gsz|2H?(z~9)eb&;|Kd$rOmZJ3Vd(gejEP@vw!;OH>HBwq4!~=r
zXd>sQ@#9Ez0+h13-o5YM+5c~NwJKRfv@ySATKj27w8Xc?pVxaEcMu^Clb?h>x_#ev
z>qA%tCMGQj$1K^Df{GrQ`TVMUH!(kvnOfz?JZ@r)Lc}cVw6zY#NxI&NpGIt%T}^~O
z4iY%5o%?5LKupuDF{Tn9Zy|J)@JxCtONM9j0m_{aubta|#(MSY%75_CekMr^a6Nq`
zoD9M50I8fR(c*|hr{r(Edio%lkC=rq_D+dDW3w2JFQkChJiIdidzF;<$!gtmOWxIV
zv9N|WyUjTxArW8aMjxb^j8O=j%<SDVa!8m8O5jaZiMdKJos5=cj#*0RK+?V?wHtc!
zfT}y!VW!<(rB#wCPX&dLr9iu)5Im|Q9>1iska^WyM3NF8PsAR=KS<ug`8I4+UshaG
z%VjtDNZXEKmQrlA5^zZM&e)UtY)gZM<d6mVwv4a!$eM6rHj<eO7Ds0vSnC>VkLNTK
z-$5cW9cEJa?=_lw4*cT@w^;|FE|gN?b_;~;wN?G0hLld+(>C!t9DRciVpD^tF<Rfv
zKeKFuq!z@%cQf4Gr;rv|k$=B9=64Thg|hvoam0I$o_tJH@7185RV1qIr1+PIcGQ1n
z?C$gBOQVC#<+N!0Gm)CTv`8}1xlhkFrsZ*P?AEF1-cr#CnwYfm)r<PnJ*pR59$)-n
z(_~MpUi$guOO6?2`<tVuq~mP%@$P`*R6{9dwz_DSuBRy};g!@hJ!h!Sbf3>uNpF!9
zUJ`P1dh*-ubwlOhsUFLVmppw@+cR&d$|}!Y=_c$qw_n+;R%Yy^do^D;f54mhnG(ty
zI8lLF&2jK<mB4>e<uzkryeE9QclaE2$dK-Jyc6B&*5v=$IM3_RYmb@X@{59tvBzKN
zXmU!#QXj<j{8VK<W_5nPvC;3jvte>LNfaySm{x;)g`iB+CS$=wOOaoSGfZ`T{Po#t
zt7tz}m2Bp|yz&UYhVxUS8BE^;F+VT2soPOc)-$HJ4ifA*?gpK@EHn?<o{7pz{9F>8
zf_>b%5-aw%7s3b}N&4&f(69Qr^8R(}{>eP~iG#gDY>-HB#(}e~B$e{`+wb^ej}yuB
z;!ts0KE7`*O0$u~%3Bd@Mb*Tdeti9Z1*mI+#9~tt#_N{%2aGuL4D9Y7tIM7??Y;Q3
zpVh?i2xrr*rZxKcMD6v}am`#~>F4{W1q)gWtk?$H<@~$e&wP%5+Y+glc*swfk=&yR
z5z%hC!=}e=@Uhi9zV!j2$TBMT&j9vpJB^}%#mMp8<EzEJA5quWk=`}I67>E1m~J`q
z6V7+LbC*B)20uLZ@SU=w%L}hlp$ac%nu4q?+-gadaV^0rZS<`$gZz-O%CdR=&a&r4
z%R`&I;AxE}S6#ECma|A}Cae6DzdPP9mX>OEj^0uZ)U0^Cdi+JGzL;-5jk~Y@tiJy2
zk&11}+?D%-<X=Nqn`N`sSq>xI-j(8IYaGw;cH`w-tVp>ulmwo=*k9ng#rs>m)!(=<
zI04a-DXRO&DHC$v<4e|?TX`UFD3IjG(5D2h+92Zy8=HA9LkPojHREuF#~P5@Tjxod
zg*tkt3a!ukLU+TRWGaqbB}TFQSl6>;8rQfxsP>#`@7lD+Z{xx6&AERkve5eC4O7kv
z=twI?T4+cIy$)B}i#T|_;HI2R!be9x&fq(L3w8W9oLqbw7msSP(Q9E>m##LEwG=G@
zt#^qyr>J$bpZWS}EqmqNJ^uWdFXrJP4Vo!a`fnmm0DS&);#b{MLw)u<Tq=Dnr)3h#
zo9%84et-2CM@fE+e%1SB*)J49iZM1DYN!YbO}$OejN(y$4%(`o!t8s-@@IL+I!~2%
zQsl?idwSNG{Yo93tTTmJLr_sdlg%OOBxghdpJ^*-Kfo3V&16~HP8!F@Gv-<tIbvsc
z*>hFHNTAT$Z50k9tbxei@ol@}K0Zc1_mzngIt2&q9E;V+HQ_(lr;{`O_J3bKe#**t
zRMk_fN~lGh|06T+Lo!_iV?IpJMZ#H!q}Kg*spxv_qoc~_+3%jLP~}=tw^g%SS7>t|
z*Ls3SE=9|pQ^M;%7fST$3x9gP*u{PlH0{+T(TxA`vs>QM<4Urq(Oq#rai4Eul#xbF
z)v~Bwu5rih$=buQDb<JipXw#q^l-`ebo0;k9ZC!5-%BtV*?tRBvB|2p^67}0ZsQ3}
z)LkE_uixH2GbpRYl=J0Cx5N<>Q6k0=;d24E*7UCJl}5a!MyIB}d8wx}e5zx2VWEm7
zg3-=?y@zJ6a}&LOei*wZt5#T8@KzyPfz?7m!o*B^d!z<K-5IrGVA+voyVvu0-@W)>
z`&?LZ75*rezgowind{O0j(-dT7Vnm5pU(Wah)_pi*i{H(khT$Iv%d1VUM)Vyc_!8S
z4~hOA|EsXEY&Yf3N`2;Qsk}s9OxHE~o%|<@b-H>0-lFN_U>ba`?uDb^-+1S5&4}ZN
zzF*zFi&!BOw(j7`X*#}cT5+PO^8VS8*Yk3CqG=_Cw3FGI{}VprtD}c2KYIo0KWq$=
z%I+9&#ao84tFW&`B-yoB<MUs9YZstk$%i|CJ#uYn%z5_b%u&pr9o9bdo5ysKErZb_
zfEj)}7luV^cqm$Zfb(f|H&AWxL_4n9qESsISCiJAf`|I-8;X*kcbSd7VO(x?NMYe6
zRj0YT)XP5}kM0yk(m;diV~tpG1HLXeOwFA3A3As>{xm?j+Y+8mm{%-SF+pJP=y?2e
z(w~8(;WyD8SANq~dKZFO|C;O1W-KPf;vex+Rc?nc<uQ>2z*Lawp>oo+@wvRF)Bz|N
z48)WgKhE?Z@mPQ?gycBvlPWeQ(>%efRNyl<SqPR{AZ`dED3|+<lPNP^#@=APq58|B
zFAo~cyvdpMOerAJx1JB1OfBa-jz~AnZ|xnR39;g`XmnGGU8q05r0z%aYg7kKPrUGx
zTrLxe72YJM$f2@Rh0;|bL-=vY$f_X3g1X~}Uail?{pEkPjTjYm>D+enHGo6VaY~=A
zy!STOKt^9c&F-u1o>IDrF|O3HCwKN#V<Y+1Sw^PDr|lxkdflJW-a8x;xvQVJ61`(G
z<9R%={qPGDWLfT5Ztb&<*U`WA<st)z=6r`sNByRWFLLa5VMt}<1Nf`rZ?O$lW-YAu
zq{}dSdp%we12Y*49HLbDET0Ula|=c&aEITF)m|v8dZ(3NEGchP-9o^>`*Uh8G6d2L
zJWj6#g<=Q%#?ljTEILjpu~vS)*vY@DEt|E|HDw@{3S&~iVnUKu5uR7g9h)#LT@0+?
z?rF&2V!=$Y_r5wQ=nHP$F~m!PnVG$$er2Ii0^^pv9Y$-4mo?v60plhkCeR`@gTkM9
z68ZZN#OUsP6UTYl9N$!(XV%^DxT5Uxn`>23*Iso9iMg2~{<10C31%rG>p_Tu+cqWu
z&nz%HPK}zHkc@-^zt+><UHtV<*&0FRr7~+dwZ7;sn-%{ux)mAqHWyRNKDMY@1m(^|
zczY?7J>CkxiNUcb-vszXXyM%<&PY68P(NKPo>mz;HdS*X)Vvo%E&lum54|?jHjNlp
zNGb?7jvEbyN^*x#Bw68;wSuu(i5X@qHFC9O%y7}07J}^KeZoxEkF6NQg@JsNZsE^^
zcOL%1rJybm@Es?ki=>8dQw>rJk|ah*%P7GOIPrOqTyJPl;-y(w*my*oB#2I1<@~(=
z@dBKuNM#o^N;9S^>WIqywGT6<exLM01O7UIK)iq-Dd`&WVnm9V7-_Hp4}==!{QXPz
zDK#XbaEbUqJ_nK{+s}@b+4_eJ5rcYXP9#Mx$v|f{c5{}wnTA+XTzD#m2U%*vl@|S?
zQx-SKagU?Ka8I=q6SMT0+*ZdjGU$`Nwj4cE2$z7NMpsaRRZ-Ad@+^h#*ZPtHoD9c|
zF(`f<1U6jprqx;!HyD%PjJxe@;wdH>pxp4I$K&MWJJDs5*-7N?{qN<~$@?r($erKV
zs*pBT=9xG21Y{6}a22TLn?O9*>nWN?*e{iegDR&gZf$mV1wkL2H%aiSjmT;#$<PJi
z{#-07PBfoCv|!#t(+@ui!U`5)Z)oreQNOy|!qwZ_BxchRz-Egg#=u1SLot%(lZx)R
zeIW^`yD+Tw1Z;DeuD*0pA)W8V$V8^lgdkirc1Igr3ynajVp&W5uHbyQ{85Gb&(t|*
z#ess0IE&QnkkEz6zMtN)sGNuMhYQp7RhPSS7o;ouFpJytrS@U;B2bjsWPY{1%$=(y
zGS2Xe2GUq&;?rd7A#9@ZeR#>w|Ad7VU@%@qD046*C^)W~BS`q;;MLNUnsMDH6-=Bf
zQ;1m_Zjvch3S1UN%#4Li6cECFUm0|-d<uz78bV7+gY!l!oFgUEKs*epoxe~R4~4;^
zVXIJyZ>vzj<hS4^D5A0MaU)S&#VI-gW|B(d7{qGRVn_%azg>kc?#0W??wxxqz1G;L
zYUa1}-Y8}dsp1NIa5;2nRV-N&@^UiahQL@tFP?sFT<$PTgR~Y(I|>n+@Y}Dn(T27D
z{EWFr$N<Mb5FcU`QS>azI_$2E&nJ5!I@zP(@)iYbDRLFugyZURKKS^s4ZEq4jJrdU
z5Bg|VQmf5qO-#fKW!tK`=1+Wlye&Q#>`cnI;zMDT)Cf~4cmR7hFEQetq2|Y-#p5(=
zrSl}FprOWdW79yW^meijwqe@?qc!{(nXrVkekr`$5E*7GB*y{>k}u)nd!=5fi4XO2
zm>FFsgWUVdVvfVB=E1q=U)=^Qv=Ws#5_`o65Qs3zV751zB$b(DE-xg1I=;#<!m8Q%
zXg=bZ>v3wHb1=<OeSaaKOdc;)Dnf<nkRGXcH&N;B$sFJI<;9io$=qy_5;8;yZTecT
zG<S2>?fBR8>2|Nj!Kg~I!Xm+3ZMaJ)<P}`@IMT5ch2)YYA=2Q+W|;XokRAdNv8;y^
zG)mz@Y40{QU_mR;kA|7#r0I|A)sW`OEs5a*RPS)psmskoeNmI}2OUr%E^Nwng?JKL
zhitqEGRIh1XL<uIB_>G}7gI`zY7i<8Q=5Rwg~L(r*`B2#F&Il&YrkGA_7?8z;yFfH
zY&}H#9CQ7G^h<jVBr}$>WP?&M<t8`Pa`;8-RmH<S0!&5s?NIzd_dZq%Vr;`&hi_#%
z;f?OH7(tfT{G8U@^z-n73N1z93$ortQ(Ug##mN?gWgv=4EdrKHO|XL>8M5=eB&7l!
z7KcDZLSXsXr(}k*@whUOpj<0MX~MqB2!1Id8w?&#uX!pV_HDlG*}U9Bq$vz$7KjQ#
zG22qj5=#Z#V<M{6LgCr_qy$(QV-RVfriP<R)NoqL9UGB%yd+R;``5(Vnj^j?^>kAv
zHc!!>*rJS*zob<Rl%!jfq^+b>N<ZaTb3~B!neKVpH{Q~HXM4d8!Ia2fGnQOBS)K$a
zO~hbjuGqg2htVTPyZ749<{{CFd%2r}lBS23lb6Es<%tLx8}kY-69YwA9eaD_dOnzl
z@G^+awu_64r`Ot(T>OcTdhlM{XSES1##ggbZnyhqK0Ay!?Ox6k{dlcE(*Yy+%|e08
zMF$CV2$I97AOhfRF(e@}&GSdj__{d39uB%m7RpR8E9~15cyS5*zUFegZ(<$~m5=&o
z6jbCUsCfQN3X0$gObNJen(A@@!zOzIrE06a-&Y%<D1S(ACN&*OJtHk!d?g*EW>XUm
zF=Key6(Q+FhDC%Vs`azMB2uAN#)`s)!PwE<%`tFE`W`LB8Mh9dVns|0g`<=KJg4_|
zoVjv9NQDCXlP8{Nsb1E2K{O#E2r7+WM<l_KP((aRky$DzS2dRjmr?=|)Y=gg5$*3G
z%{`6HIsKCBeuX`h?ssM}hD<iQ*lA)Zosl$%o66cjlnITwon}~NF8HpToEU)u>@9~=
z?{S#49ha&eNf<d4pWh&0r`smj6pMIV({ey4`g5uvbqpE8k`IPYrWD!1GD1u1_{;*Q
zgNu1Wh?!lE2mdw-O@WgrU-g)&yVhQlzJpxp1;N`hk<5NLY^E}MQU5b{Ai6H{w?!N>
zJ4j!t^;;0py#6t{F00XpY%LwcfG2`HiREczsXDUXY}mFSHAIDoizpXUAXmw_v2P<z
za|N%J2z>G|LNeNtocCz*3r+n*b~Y^W9a*t#r!cX**(FYaqtvBzi>R{_M|j$ljY&w8
z$>f1>ZfGwILz+g#q#L~Ry0ztj`}@#0kqf4yl#y2!P>dyW3`M4NABTn#5}%n+y2gKL
zRs5IuoqsbYsu|O#E$!--RU%9~&t@-;(a$f#28t(~(5QD-$&c!U@zzq>03NHBG8mUj
z2vsvTUt{H*O!X2dGx4b2ht7Xl-&T_#Vd?zFcH@?Je0-fJ)RduEW^vG9)4jRfva+&<
zwV7<w7As*XB;7DG2mw|)Fmq6MY3$1<IX<^6`}^p?jL{=|d<_~yH^<*7fDD3%UuAuk
zFeED#qwi4JsNpcGfQm+c%n>w*e)K`pZ!B;nIG*TV#K(A^6e@2+r|w%g%DFiY$Z?rq
zL1=^`L?wuDkWFe*8Z8wc!WEh#c$<e6(MfuS3{JPu!_%a0U^ozkjqV*uR4(1J^J-kM
zi{*+>a(7ruP5#idRVX^{_{?pa!;YF9D%&pK9DaIeJuzeY(yd$ZD0l2odXH0tQo04p
z-ZXvdap6hH-VDQwBYb1~DyS(pJqr|0fQsvuA^9*Qk-()Ks1nc~9?hnl^Pax<``q1M
zn;OK*c_`U$c#}*qkGkno!@?ceFLax7NZF@C+)17q#wPE{ohS(>7NkDqiY1y)E>KA6
zIwtseq+oD6&6>iRqw%4|j1!rY^ud!!Sin#--OLqL@N2n4e$-a0Ro0?N?53x|gW5}2
zqdJprRDLR;&psv@mg>in{z{1!%<)>vdZ@CWO?+<Gk!b44a9%x<lu@}&A;fsS)7VV#
zbCms^)8*iuW<Wf7_R;UET02QcSGP%N|L5I!WE2*=nP{$z=q}nJ@xi>A4FV-eYENSD
zO8w-7eUOzMf=2i+?9GlX`K%9)DJnI2nuRUln@_@TlLzL)JA`w+sIbiKlPczf(>yYR
zf))PyFwj%-sPPnMmlohIe@;`TrxLO*5n)t+18vc!yK}wrY&Ye}OlneT`|Tj;Fmr1=
z&W!eHW!oPsEPN?iQLc76R-+g2_}LIn9AjbW7icEjJ`$gavOx(=rY6_k**qst0TTS+
zMH@0~I;%3?`iJE*SX2-){E)dqx7tsv`F8UQ6>U%h{PuLo+%uFEsAHmgy!-gBaXoiN
zj0Ce}Quf2f-hG0=uxnoFZH_w?u@Vxlua>eqbp8fJ^|FZ&^Dbr{t}dphFgZYBln~N3
zv6puYqzoW31fd6LX<5$m{R1bwEt@*Sq#|M0k3|oZ^q=Sn1<3wn@4CPJfNN3_O1A+s
z&9ozN{e{FD@60K{F#7BCA8E4kZSyNCNrBt|?hrURYt8+X$7`P+U+-m@)IapytGDmM
za%&AhzG;<cI}9aLjRo2`9Ht+y|E5N_ZoR#TzriR}8y87$JnhNu&c{c@^vi&YIC!{|
z|J`SDD3Ok%g0^Q@tV>eqgp~euna|Z(6z$JHWu1FvO8U(|a+Fy;be7`;E*sVNX6P!d
zP4jnt6xnrmkSi=j$+v{wcC=i6%3(0DpJInnI0mJTenLsvM0J|xHGOekS>_NtGN=~|
z8kWE!_;pnlb|GAOf7Pr$Ia;5FSOVji)1OV$IV1ZfsAO`-b=_G$<@3JFpp<v7#o10r
zxIgPL@%CJk-OPt=M+2jxVU{l3$ubp<)qfNrB;|y`DGKw4L7{#duek0BbNt2Xvh8x<
zrgwlwRKbzI+cLvNdgnR5TSDERcnxbd(eb<((_zP37<1!h_-jxqz;{RO;9lubghis3
zbVQEXb4J|Op&`sx-Tq78ej*`yC+-E?Wf*BDjbgkS1y93v563A!PfLGVDwYYa`y1bg
zs9#pLo7su+1do(SLQHk_xG3?M{3LI)r#gPdFP2s$)+@<|qg0e^$Dgd{8nWo9SZVWN
zSL4eaaZ#|L){fgxFpeP^l9!5Tf%d)NchU`{PjAJOK;8bFncmeL3;U3W9&dXSEKrbA
ziBWcZhZ}Dy>1Gyn&?>c9U;TaIX+WH9=g_?lPAsg4;ejRrE#K=U5sB~OcEVI7g`wf5
zon+n>CjYLmEXr(t^rU?g{!KmC#B0D#SxK5IGoTczv{jK`=eQ9scl~Fo+~hh`y*Ap3
zc72b?_c3GaitDK*b6nrrudhUGa6U9;6?@VzTXYvQQ_E0Vq89asvM>nZ%+vyj54w@T
z?PP~|l==SGwa?hnBrqF3H3=6sc0H?;^j1Qntb`EWvz4#kN@cEEo_Rgy4YYp0^6~I2
z#?s_Ce*Y&lpoT;APuwq2co_2R+Op(+qKF`Nl6hVxSJ&+{X$GTVEh89$SkJ70)Lh|Q
zK4RLDvQx*`WDqY{K!%E+6CWCMvX&Y6HS`HopLrA$QY}!lL(4*8@K)z9GfROqYhLS%
zTrG_>Brn7)j=T!Cpn(%4XS?|_^NC)6KUYiLKN_Ck)KnR3b8}ACI{o_Vhl~1Z`pJ6V
z?pt^#bvYt@X8DN~;?^~F+8^3E6U(g2y{!xo9!+m>+$$3OcvkLGsQ0l^8mft`6KT7b
z9<!C2yqL-Tjx<qE$xghVK+5(t8|QuHD(k#FhtsJiTp5TYJZr|0>Qel67;fP23MiDi
zzZ;9NLh48HYLl>U%!qY0Sb_l$A25Qta@|e`KrJ^nHxw&KA$dzD<TzCsn#!t@!fn-?
zVCkBhu0kC~Cul!e>p&L(^1gw=NkFT_b8qevB#hgZ0&kfJpql8P9s_8g<Lp-#V0#1=
zyCFeH1AqYmMX{;w8&NpW7=si1c?Zxa;7fi0IwHOrN*8m7x+IGdgU8R^q7jYaT0@e(
zz`{uLw~Z_Dpx80^*;!)YX#!j5DDzUCf<)3bNE%itQ!61yz=Ok@D4kDe!eHytM8aDt
z%!X0~aO_8;Y3)dz+#KXChlY_IcVI+~Jh#}4xLpA}i2t2dr&(IZ5aYH{=roPk&K|tD
z<tq8*y!YS1Psd(NuL*|-CNIU#YTZt5kE-QN>Y|01x|wvQ=@1-}1=WOX-OPU=$j6dd
z=6J{9vT&pzquM=Q)1acIU4bV*r3Z*KrrvCNuERfcm5W8eJ`Lp_GC91Infwx&uFhm@
zHg7%Znd!r_nQuNQzdEKOi+7|}Me3orID+(Bn6_n3j1Z~C-Dp8PBJ;ux*E*o-yX{h7
zt(N{?Y4WxALfnTBO_2`wizgfnxSht+Jm^I4eNj!FWF&EukvBlB8t~a^EcerF#7>_i
zQd6E8Vzlafi=OGwU@^m(Qdq1Q35lg@tzd{O>8Dr$PR!izg=KFur-{%qQ(6B}k_wl@
zQkEm*-j~lIh^9gHC{FV}d^*pSLGvhN6^Bn${CGs&^?km(62}-`#Kj^m=N%mCM*({>
zV+}cibMUQK=C60y1unPFmQ-El59|GZQF0${B?sf8V|gp({svSK!qMYFCGC&pu%+Mb
z(jT9NG7@AKQD0TRm6qGlZLq%Mp|h`y!_U-CwB=z+)$F7KYn3vrmVyu`S<yN|8KOrQ
zX{>sTdYCuR-0lUiH+f;5c%2+t-}tKmc{wjE)I7LHrJp#DW~d)3T(2Nu`-)$&Spmbw
z7ediV6qc>2zu{$QlnBS*(xkQ{LINz;3*hF8m`-d($prBz)SIR@od_He`g@=RRzif;
zhB}sVTVC5@=bz+IOcV+k8Pl7#m993bwcPi+k_RJ`<s*KNasi@gy%gNsEzuycTS&S&
z$zhy4L>v%;8DGr*EZSe-sX+1CS%*g9YGVe+nlemEB6(|zsL@x7j#Wx}+k3xL$HZ@a
z&8^XI<LvC$)_6@u!`HeRxnhmba@~^FKet06M7e=B!_>SwJc55x5?)8+mi)w5Ef3l|
zzp*ekoentt0A9n1GLst%?5D?<8vvdI3Vs33uJ6Ut9gu<%-L(X)>CM2$dc)hvk808<
zyCv~&fl~j8CMXU+V10Ocabt!DW>PN?j~m{>RE3!gs2&2i6A%6OqjWfGY)A5SIL3yD
zwdT6)O7FZ{jss@lWq{B$sdF#X+FT63bJf8r!Q=9~<Id%}g2gM%H}2p>TgLN6B_;gW
z<Hodvt%0WanJjK><}HEMZ-@?bsvEUBhzVKhi8=%CxHce-Lx!+RD67I^qP-JHFC=X#
zakDz5gQ>BNq<l13a7)k?gpt+bkDx)>2IQ36!FS!ejR^b;3QgCua7q(n<U2G7KM+U}
z5VurIW=3SngfJ1ZN<($N9>q>p@u&z!_&z>y_>GKu?wHf-SpYG<BU?rwbN=#DyuhAS
zG;k-Dxw~!faP_eb6Z{|bdRzYa;w>rkwu$;TiB)02D^2pBUq@u+$a#>^Ii$mDgT@2?
zyG1l1I*pcCkISFF!Va!ypMs9B{|fSbXdxaaTNd69`Nqh07yl&m4P^kFKHfTvRAiWd
z5R{wRz-6B=M^E2*c`aJR$;ovfdQZ^gIPh@OPD|tTdsy$|%iUU7>HVO#A3gV9l{YcJ
zQ@gtpcD9)^qp7P4TluU9vBl?;mPX<lus|2O(YYw3#vkANA&e3RmQoyCv)^%6IY7+H
z(^4p(B3y}mSVD(5!n}!q)(}q>om6s6sD#yg_Y=PpiF5@sLx|Y|(GG<QbCzGo)5R;;
z8}~bu^bdvNa1F~;%Z={*c5Oyu<4Og}6jUt4so;{j{Ann0%5+Rx=^!8CP?NnHn!HU)
zk2(;-qTSwVkr`^tpP9B^@mjg~BwjxA<;2h|^PrmmzUhI#pud1crd6=iDKCkYz5bBu
z1*L02Vr7l+ticv>z=2ZJQ0U`5M3Xr)+gmwReu;syP>$s12>m*=!Fx|Ko0m$_11hhC
zv8rzgn9CMx#&3%R94pYoBSx`SB;hb~d1^N71nV7Xc9MaT)GQi}K)sR#>&X%pJWW1w
z{wl)k2a;s(h>Tmtm+{`$7(h(d#de<GPFsRdxW3w@dT*x2gNJi)w@<yxLnSDz<A)Su
ziH0gHh8<u<O(=vZ3m`Ai+l<Tp#x)&e1n%gkij_n>37ry!#jT1^Jt*Oin9doBH%~rz
zn`B^X(OzMEI}hFOKBumG<~S!~Toe=|tezmf+VPYUS-2EHKBRG{wyUj|g7wyUs<2rE
zZb|E6Bwo;O{tXZz^s|8+z~7`g=cZR@M&yu6jK=*;!Y7Kl#TR9w$#I`OKCj}4#Va@1
zi9mzjJeJKa9E)S{o+g@1OXu-MOA*K7&>{&@OdMR+TW3ULHBVF}M4X3-+im^)G_bX7
z?c;pC3%Mprq4zt@2uD}!mnB%Kv{0faA~NiUn&@pJU{%q(iu4`FZ#U3TN*$;hElIfc
z3ZW;$;`}&x!wWb%I!aYpbv}N*7)k}mkzqHU_%&c{2PrH7&^4%@!fkf2+@n+o5Cumd
zY30U=&DB?Tc{~G<E9wsb4Zjr><g);_RDa;M{s2740M_UMK#(_tGm!J4o-XJBOykYx
zzxEuztt92==ijixZt}%Ik_RAtCEb|afmh|BlR>J;$3YxuyN2B*zy!~M)Kk#3kEi|5
z0Yw}OS88Z}zB~bxzU{G$k7(njr=<m;;X^S9a5f%_^;ITl$;ikgS>0eME0y#uoSos4
zys6Q?_$;>N)iMu~Id?E<dZO0^?L2{$!lnaH`pf-W6q${3<&Z<FO@vh<lrMziO7W^l
zb)h%B$e{Z%7n26TEEYvbXbq_1`XVO&9Sj*jni(9g@6U5P0-pk{V6A6Q)%EOA$gvbQ
zeWi6?JB9)aIm|?)j5e;g|Er1Vi(PYaOfrJO75=^6$4s*T$+gj8DJ;nqw<I3idKE~L
z`Bv_8)WS|jQ*sWOCevp#e|Yd9B?tA}u!DzkcsL?p6Z5p85z-Q}n=FZ)rTnX$;&ytw
z*nxz8Elfz!h-GNqs4Mt90sFvM(wJzPL~&V=O5XVL7bL5hj`7IyryM+vj)>2_rJvs9
z1wLBIP>L6B^6+GPYD(PSm7I)(gczae=ch`IZ}u9})={g+OiV&NuA?s>S03Mki#4G7
zR%?he-D_`evuZnNLKzwYsm>k>U*UL&>fET_!rL`XigzsN3yOxd$^`zA{=&SYls*|%
zMj5_s98slJl4o9;z(F0y!%6v~ROUMdKR^R1n)z&cfe)v{bGEz)ly5oKeD$3~K`DWT
z2@)Pas7sB%EsvcPMwoeFu5<SdeUb2}Qw2yx;h-FD|M}-F!K#QA-M4=8RpehII)boD
zkHrpD{F?@@I(n<lyEueTxU~WqeXxo(8f~mH*9FD(=(%VK5j;RITSn0D9iBMtMoR4N
z(XfT0nM~1>YN3e{Q{jF%g$n<!Eu4ya4@9KagoVaCy>%T63_%hWEjK6NFH;zyC7y8d
zO~e&iW`E=~0+omwv?4lab-sx`7zmPFP)df3cBE1VwlSs_D0P;$3o?q(e;v|%ThS8q
z7Do|F+F5gv<5hzK79xry&swh^yZ1R3n=*^h8^~6-T9m<gNlHy;-QT!jBb$K4a+-?B
zRuyFAU%|CoK1YWRrz;zyV$VoRY8(|_FR7fEfV_rZKYAVf24jel^zzkuq8f|MuZ60-
zBI!hB>QZ-pG>Q^@@m#(Vd{;tS;n@Ih?%c>Q(f7#1c-J2SOS$@w!DrfWMKsk<jqK)^
zww*E<pX~~J)R!i1{k3>k`CQjBiKNGbZe_1ivi<MqMy&}>fZ|Tgb)BJ@-bdqjwT#ui
zxtjeu$6hl6sn_o+wM8=&t~s1i1pk%`Y`9<yb}m3(vQf%cen#cLy*q>ZC!66v3W{Eq
z-_^Ew*%w0~Uk`VkI<^Q1I&Oi8hQFMTkVLhV;8;tBmf+b9&=O<iJ+05D^>3bhuiI35
zuX6$4+!=G+q3eOYPevnkIQmP$XSj|hvLj-3`qP}mbeu+wts1tuMpH-N(z<;1OGzA=
zbx+-Uks<fW=ckufWTn4|I%CuJW+|+{Sj|PqoR_SzeN<Yf5ey@tqge6jeDn~d@PBCg
z%djfDXnPz+q;rdaNViBxr*tSFAt@jrUD5*5Eh!*f(hbrjA|)c-Eg&KS(gM=;pZlEO
zsrUQKhv)L)+Iw^F`(A6VImaAx3}&W>($7%e+*8&_$y?CV<!-ofFW;8hC!eQ)N<~)4
zf9Q?;p9rVH>Ijz-VMR?XCc=-aT>d&D__-s;+4l$Z!t+;j9Qbd|8VOO-H@+VWq`DEI
zlS77g5*5`G{l-7Be+4Pl1=5P+Dh~%6UU>bg^E|Yn`-+@tD|nK;Gh1s3!%)Z+3`VW=
zV4!eBh7jgpB_4t#OCaq`tIs)700pLD_f5f;!37kqe!y6~WtNYdK>GIXE>g$J7Y6!6
zje7<_pbEx;U|0quwB{S^<{~*;-C>|BMgTvL9;!6`YLhPb?bYSt^(AQgGUR%TUn2!p
zAl)++NY8+I5Ex*Z8y&5Nsi}v%pKD7?Z9tO+RIZ-4dY|USh}-CDi)X!nF@2E?aTq5A
zRk6piK0ZE#qJP_A{I8@w6aFk{R+5C9uvDz<e6QaG?HfPP5<r2mNf_#Ke++XlGxGpd
zMPXpZGCWoLMBg4sc_xWs&Qy8(ChMHR5<cC^BV&gI2&vdQ=A=5tbS+8DT$@~Ybz=}@
zIZ&+VS>B9bUfiUczAJYvC6`~nN~`&Oj5Q*vfFyyCXhe~Xz_pk@ndyd!qBhe)-NTe)
z-iPO_Vyyzn3JEUUY`fF8yB63IMMRS-a(TQn%eWn0f9_{tpbImZvzAJqg3@gML!MQQ
zz2u&iYJLB_vAeb`hBByRJRu9u<;-hcOr|#OwJdP{u|?^*>2xm_n<wT1=g8WRbE>NF
zX&Hjth|0#oD$v5pQFQqm^Sj2xhRV%PYtfk9^^>@C5!`<LrPSD_v8tvepSt{Bx70Zt
zvz_O!$nK=kA+We!7W>a?8`7p0DVKVjn!oJMjA%G})|YIVSV*;S*f2RE+$55e^cRQ!
z86{VC=To+ldYr^5RJuBojA6_i^?+}_=yJsj|J;_F>OV*^cDi?F3Kva$`$h72AB`fK
zoUYo0AeBU?gbT0ZZJ=D<QO@O;Ev3h(q?;(-33Bqvf3wjymnejTqxGwD#UuGcGkvu#
zapp{}&-*I2U;T;r9<eHQI%ad4*LRhgF~9UFbqotr1kJ?M3+F?^zG#k?JQ>yf3;bUm
z8>yC`6wqodzE|tytS)cFX~4uo>bSXt=x_S4x-*l9npnZplWDeDTKpADu8i2*cRH?X
zUk?&~i4U4`YFD|xGj&zda(+k2))I1ks~1&uvtk3NZ0c@nUqjZW2Qs&E#0WY*O&D1l
z;Cy|izd-x%X4!)7fygGC`q0v|b4RRp-wuj0l$%|TNcI15qY=Dy#~vB^RL6e128WN5
zMU}VC!~&ln6}@f@%>uV?I>(}bpKU6@qE`gvNtORTztDy@L%e`jvQOpyIa?6HH~lvk
zZhQPg*W0pw3LNBTzrU4lbaPkgnmWnfG?>bAZt8Puk!KKRnci7S99&u-_@4J8Iba(<
z_rJIR<<ISQaSE=Z+b^9r+0KscZ_r!({yKCxWw|UvlcCd8h3b0Kd0D{KvOpr-!`jfr
zAt&AHuUNdzYLVJa|D5xcPunLr``4o-$t?YIFRv{c@A<gmzr(vP7`)&yS=mAU<H?br
z*zSbx5;}@E`Aqm+4@Hp>+1$CS{+`8~JL*O>5jeeW1Y<|lCL~r=&Sb21)i~`uak{#g
zB3OoV1|r!Vc1OF%zdhH!Cl|)0+ON&lUxcm(yi)J&N>V~68^c1WL3qu!`qjF-nCx6?
zt$Rul8-?Xy1?p_oXZZwN6*>K-noRY)U(UZn=)M?ycdA*vSF`bO3**w!?&`P8#=nu-
zHtwVGD}%(;-<ZqQC)8`}QKu5BJsHW04|;VUzHn({I$NDYD|ZsSbt?Y2@OkD;TgO#p
zM@K>;HV|R>b@U9><k?oouUvOxMg@&OdD)95&8&-l<~W|-lkwAt6_y(#6Qu3NI|^>2
zVeJi%=#w?qa<b~f!)Xa3yXKZzW5oYDS7gr9d;9PMeh9;(po4szruv9MGc1A}v0q~J
z#ecu(@Yqw85V^9Wv0Fee1)<&|)9M+}&watGc?vVUpkb10r2(e*EiIP~bM~9->(=)6
z;xLDu2`_zvd*{)HIt=haxI3+m-By3~iuV>%eDBfG5h&|Iajcvr?lYPz*SO&J9458}
z(wTB$EcV%pi(hw;aMvf)(Q1%!xq?Y%uj|X>MdyAFP%3zBAc)tA6cNaR$xI+DgKic~
zL|#sVVsO%P`*jHIAf}$3kOopO$l=`S>DTAmjS?{G{GSuRpFwUR%l%h445q@+E6l8(
zFHE-{)q~8}?f=j{M9$8F-k}!V`v63f{ti-NOyZ(b;yp~IDwZH+4bO}G&WbiG+gkaA
z1m1t->Iv<m|5vv|i0d1ixq3;NFKjl2BK=n!Ek}a!>|8i;Ur3V@!}#Y)i+_;ek623y
z%q~64SHjzCK0{C12|w$LuVMNMq`Ues`=;tztw^m91ha5z2G-+ra?Q-;iE;jO_An2n
z-o+O#a?RADMm)_?m-;b`{)UV}Nmgr_b#Aq>+|No3!x?|m4Zo4a(u&!*`xm!$+n9){
zf_>X~?MtJP1rhn@kB7(zjeq<0NUU@D-ar^_MrKdG3;go*rtE&b;sb+Q=nG}bsrmP2
z2!jG_+lqc=?zjc`zwX+2IhQ;s)@VOGB6+nZQO?wIWOVPZ9s-4F-b{d+=ING4Yok}&
z(3+5?^%DZKT7D7Xro5Wg+|#AR`N?MHbprV@S}W3<O{G^=mjm$+=L8GP1#h%J$@k6}
zmx=B})3sZ1MFblnJRAA~iE^^2`c}m1hcWm6-1|V(^JeBc!U&<8aK~d&Q0;Csr+3cu
z!zQpGUufv7-A8?~s+F>TAX$9yK{Qy!xaN^RV3rQ{d8*<xwmW>xTvgLJ*QLh|hs_y#
zI{Jzz0YB|}jn<5g|9v3O3VVV;DgP(wmqoa%wK2wNID7j?7VaWpdr`~t(-#yq*CQ5_
z?am!<nkYAaV$sXAh~%ub%uk1Mq0u28+>Xf!tAFj_ICyV*Fth$TtJPi6)s=apd$~f?
zr}txP>qRu_W6?P_XSzN#>m*_o`Ki|Cl@Gjg8>NPihe+{%U?x5$GoXDjLLi;Bd(+73
zg_n#?kBRlm$Nin3LY$rrWlfHZd;O46Wu`dKR6%RTG9T(+VP1ddQD*bn!uT@xm5Kq^
zP<8iMg5*u%VyWU9>F^u)1s{KAu1FCQ;GRB0u`SL<EaBu-o?&`kzMba(sD`@b=6;tm
zohCvz^+Q%Yi#$l8<t*s7P^egnr_%HgeB1u_PB8lef@mRVGeu6XZbgPOGLwwxZ8EA@
zu`V4BH-^>Fo#{-?o=Pi%4x>Mag@~IE&@leW>}ctc-25z6b*H?JrTCp1`_oX#>{nfR
zUl0!`0%96&ul3iuacN?)Xsdm1MpUxsjD~L(>y$@e%Of@depQQ@SslfwJuNov&-tBY
zA0oo6u;~4lRukn9OYrw%PW;c=jedUh1BaGgrOQz=r@qtC_#rnHt2i=zW|JpCLPZ1X
z>}&Kw$2WaPY6i0G^0^a>dvoHccRc_25G*c)Y7wc>KXi>TYzp!-*v>+m(%teR6|}5=
zzTwV3*P7M1^OI)w^nQd43g_(N<msz9qR6|RlD0o8+A3ZNsxEcW)htZi_S&EHJ@J)1
z>~p2)YGXdoxL5KfhrjOekI<*(YS)R9Z#x>EY26*LQW$*p*XvJ-VH-2zn(d2+)8v2<
zzo)7UUMstU^i@Q72NA~W=c<)O4l}b;_r3SkxNF3i8BJAqZH~7Ueved!hY|)p?x@ny
zR=4Q*y;^WUzRhhX0Hgy{j5Fa6W-a&G*~QFBEbE<hwi~}B+WU1@1Vkvw4G7P_X}m?5
zN2a6R2SNr&&4SilFv~lc<o^&75(2VR<p#|S;8VQV>I0C#zK;Y};Ks(rOVE)G#C_oY
zCnr?BWFngCp?&M=DyX+6(}4OQsMP)gbFkrbLC*BSisy|4{%al}J#e-0tnCbbk4u;I
zz~HR?CLy~{)lZ~Ag{E0WM@Pr5&#o6T7=y@54b1iTBvyj65op&!bboRFeVUw{e5d93
z2V3Rm{qHX;qq<C_!;l&qF#2x=lCbORcPS}9gP?@OVTr2pd!aof!V2GIdVKsgT8B9(
zIxI&C`u>rjsj>SHH5o$COpko<i6w$tRW>Lni0xSg&{>X@1sA}h3@I&wl)t&Yx`bJA
zQ@49~4n$}RVKWLUc9t1C13id$h(Qr8JKgGp@$B2QUui|Q70D%pow^DV<R_<;CrUIw
zH;wh8k)*oFTr}5<-d~DtEbH8>{b7l&ueBZTYHu-bTUPV!_zQ@M>3k<D@w8AYDC{V|
zQFiN~Ua3B<vHep{^m9F)sKbD0Eq<of_r&#ye5vz_2-}WjA>&YfXv9iMe&#l=*B!1C
zJ1%?fyTb6Uf<3YHCR4rZ24>O&kynB8TT$J5qz`^cT>R>|<9qP=;ZRO2wfy7IU!sO2
zc=ahSUJ>14D&FVc?H0aXn~xtBc{q?C$h=VV*L0GkTIY;1nQFy7thcnk@b#2^vsahc
ztiJZQ-<t&YzC4vFZ75iM+rQ}AyUy!9mw7bAtA0<p+_tQdN2Qjc`74zY-KeI9u(`Ln
zxt#6zQMgn#8TxffNXml91K#@bFU%d+;8?Dq?eq9D_iJi+h|(dR`MKLm84dA7n;P>e
zlJGRMFZ<qaV)(!PxFd~<-JPkpCu@quEKgNdtXL35>+3CrsMdMgk}dO2o>^mA-Hqhb
zx-8bdv@~r?Z<#hDjwF}On)1DB@AcfzxBZMfl{?MH7^T4}@9Ot7_2tzPz=HOUy-3nK
z+eE{yKKH{n8WJh7THz;j$`Q;)9&(cxB1PzusDrf&*YeLAyG(BzS>o6cchF_5eqHMB
z6QwNdB3dlBQ&)WXm;GLn^Rnp-d4c-=ch!c%xcW)0^?eIBZ<+ne-OL)}M+>c)w#6>J
zHDNcT=u@kTTs0f{@(rI!rmzYLMjR<cynN5-Z}BbB-bDI92u0w%z^tK^R^XfT@ag`u
zPFv@)v7y-_Wy6ORg=ObdQYSWhmCNc6J}3?Cd?;6psh-TjPYa4*f26q4P)@EfR&R^F
zA^YhD1LjBdjfi}enFsz;-?5W9KbNv<^O~DrqC6JCUthi9ki+&Xv+p*G{XGkUA<Ykz
z>l1|=s(qZ%{lRxu3#!<SIffpXR>`@U`nb}!PSluuAGC=i|0jE+=2n*dqO=x&sykIj
zY1ez<b-$Nkg9t|bi2=k5g()SKPl-g;7N6&o*{vt20zWX4TZX4+lxjcugesb;QXjv(
z=}vTu$STu(;nx)3y?GoIS3U>d{l)Gs3!-qLjl%DZeQ6=V8et7m2>uko@k@JUizFu7
zM>aV2_9b!H50&4!raWA{bknfN<e&<C|G;5%a$WNSs$?Ab+Bb)i(lnD^hW8Yd!%CR;
z<kSSLH6vwVO&Ljb1iW7-D6-MR$DeZ0**@pD=&67FeY#G|O<(-Hv`(w~>x-lswteHN
zMD7<}f$UNyO@2pp-_|iXnu4RgqB*H(t&~hw?*(p=54^v9ZfJK%`TV25WgZ>IeOkuS
zZ|esy7kJaHlW1{i10Lbtl~Q*!$LCb)zRD|f4q>7~;r$~~Tw2I8DvOv*5o6m6QcU|5
zmLfT6=56b3CrVO=VaBmK_^NgMeWtO(!;rJtU!+6%iM%6`zW%my4U3o|GN}Zo*p=V4
ztB(;~6o>!DTwg!s*7D9`-5jshL*+M~6@4vdYOdLN`*zf~?<eP~cIj`@K)<@+R7<3^
zn5mv)RwsGmy-un^S{QyvBLl%#>1F}7+b?3PYW&S?Bx|ZsD-s<k38(}ONM8SrZKLY0
z;IFo822UUmPV!-nzaOK=p9Z2j*zF){1d<$XJM*VVk4KR3*mD=f3e3!;MuFz?D}&dd
z*#kLtlm3t^;lC01Rr6}v%d0(_YjBo;*bTWZl_Wc9E&EHb*gz_R1pWl2UgUr)IL>j3
z9nF|#t_`O07PYjr5C@ii1r4AZq6Y)KaJc9-*rwXe0k5zPI%J@sSc4B8>e-rtvNGIu
z0D{1-k`%22)EY%!@93kz+l|Sf6a5<WRn}cZc=o@y`CWr{&<*#@V-N$@Nm}vvadx=*
z!uNFLPg&}$>%!F3R2C?NBPGo4z-a@=3euDU1jq1p1D6H^vzeW12JL_D6&%XYsVSg+
zq~uuBY!{y`FFNv6O2MEfMU~aOd#e1m#08^(F#vH>&;^yY17(~M`zd!OC+!f5eAU+u
z3wbST@jQYe-rz8C%fhY%A{J(Yh?`Veq8=w^Az@e(0>)!shZ@@EuP26-EB{D~Q<>SY
zC4DyC3)CRxds}v|LzX@zPoz-Q%X@ZtSo}oam&Nbm(GKy%h+doHX`oy`>7>Av4GPtu
z=RT?w+L&fo8c=&(y{)9q>yGYGpY%7}X=J@VTv22QNGd7sAC@rPcyHaSq{#dG_iO*V
zfnioeTsLSX<50NrTAaV<D?Y#=rU#OczbSzgM!Vyi1=f3YVSAd&6R0Tlnh5f{*?aBo
zC@F()-kZyEV>hM{L}Dw5oGp8GZhm=_!ceNFg^oh2_}F1lvv9oFkSIj%V>EB!L_@u5
z8Ux5C2-)aHZy?Hg>xgKuw00z2X!)CXbg9wGs%uU!BROx#N60*=Q0?|=sxTtc4N!PO
z<fn?QMlDSyDH>j#kc@(IP-e5HIVIcH&(hX0qh_$}yOmcPrqgE}MR_BNzd9tXkj0z#
z(Kp=AO~31p0k@y&9X!QXK+En7uNRDtF|<yePaS$67(#Q;gz_p^%Dm4|w8(JNlTxoW
zqL0XB<mz`6QIIIQE9PmtaSiS>S2`PlnxH{VEurz*+iOEM=`xHi9ceLW;a2F`f3}s)
zX>PqrjeI@p9Gb0O*ey?3jS`XPT>XlK9H$9&?#~2v;*-3DKm|UIoEVK-5_9UU{A1@d
zaFi?^ZB^2Lg3sf*S~S{Rs9oico{bZqEb=Tz4J!^sZ+#-EPXjBCmX&KM|0MKQOdJI$
z7d9Bpo}3UxJPE8?V*mPhL^2jJ5;A^|F<CwLX+YOL>x7!7EfsTCTBdx7wD#9VGTZ>+
z&PG|)N+sr%nKNvfKozq>T6}~F?J=!~nnnFUhOn=jHL+TRw4V3Jk5){+@|0$U{Rd6d
zqe0VeyOh`E5~8&Uhz=ahI}G{havkWBWa$Go;+^nwKW?u7VM~d7)BV$I?|6bG3{7&`
z+UeC9qatpgV{BR^VQwrMwm<XFiK!Fy2wm9(>Hc20w5JKd7${6=T})Wm69e^}G7LA;
z)X~Hpw@r%;XeA=fq<@|&xOad3J)2M$-RrqJDkXzZwR&6MARw2wIPE!-f@1tlFQk!M
zL8qwu+o;gMGD}|>q6e!l;Y;(#N5KTmjg-En0m{fOLtd^^5W=DMJz2WG7{2}t5oZYc
zPb_I#%<;2;mm<ZbdTbQ#yF$AP8sjt|^Vae+5SN~T83p_fkX|?2+bzg9iJxx)P{8v_
zBqO>aHo+M}8B%O}NO<i%NR2Ic9}H%$88~&3AU)bZ>Fbf^^7F;BsfiK|q|VV5=pssi
zz!Lm#qr)P>Pp}|BiZj)Y*|+Y#EWd$8N04UA{hbJ{13cqycthsMe$AIYSg3v<MQ>Ay
z`5-pNn~^lq0L(N|#G$Ki2!DndW9oT0OXpE>Y=3}#3zt?sd%~ygo4VC<)~^#{e2Z->
ze<f3Xa$pkP+O+=qk4&z~=3i_gKS@#B!M}JF35EMwDK`;lYNEFqw{CKTZ-*&l#D7yI
z^Sd%%JJ9*G^_Cy)zO3l27-v0P6xl8k>F^rQZldZgOZH5ku`MpF7i)L9v69RIAo)Wp
zB{_?qt)^U+Ekn%d|BIC~kvey&F|F^4jGFe7t+biYgS&3`aK%J!y$_n_M!hk`t=mVM
z_Z9J_XTl3D?iW9+OjRENeLzro6V)3_jkony{HSx{6Px4)EN^o*OcNJyrKNgVuM|4P
zy~Gf-i<;rfh!=)G?opeBs$(-omTwmOJb9Dg$M=f}Kc47OKTaq0Gik=pNgJyP2JXTa
zbez{ctj~ndw`yVth~+WY5s$o=|Cnaf;F>4DAmJ{x^y`<z6zLQ!M>Thtm#V0t4Uka?
z4fWCJqe;I-NR7X{)c?tsD6ev`Oq8xb!U*3hj*iet(<6Y<(daXu360tj;~gB)rTdI#
z3873+TI3eRf|d>uuF^w2jZ2f{!<}mYaSwc`&je{cP=UXSj*gy&*bRmnpzwy&Lr1!y
zBH2$MjR)#~xg)D^+5vw8Z3JZL!9NeYjlIRQl$f7zn?KjnLl#9~VBrC^6Pxx26fU4w
z_4JiAHXaW>+}&DVKb>C_e+?RC;-F`D>g@|92U6n;44o8n3R85ks?B=i{!@dK5ZsrM
zx?uyB;=H~W?kGq`i8vHGa$X_ER=V0|WrrJBbh+-^un`tvRo}Klij>fJ|CK_K{e3RZ
zi}Z3u>eYcS(-G1?@I&p89&`)9scG}`4~$MVDfw6>8tpCPd#gg#jZfzlrRlBJ1Ih6)
zf;x=2$nc;q@U8lGRiSE@FSMFLLu}m<@0)YeJowLoD*Ya4bJ|0t1a2P2%F2VFjfHXn
z%JHS;<s{O`BOXq6xbg`-3HY}$*#mv>k`MY0H}45&%fA`Z7@y0{7j57*qWAoHo=KL^
z(X8KTvfnAdZQGyhK~Z93V@yv_KtL~vKUe$HyEA-bjrbq7inUpnwxHq#_?s`N{{3+3
zrUC<%?X4}O(ID@z=qqTvBo9^pdon7!((Xo|2F@y9!*3&X#VmMIL1+}pTBPJ7hyz9|
z)#WEZU*!^<R3t#h4ylM=Q{xPRpV0X+dXP*J|NK90!vDJ$K^=~P&Hz=?+t}OhY;C;<
zty1F<s|8T4ixT|d4iRK9OXB)$vr^*pcYL@Nkmz2V_B}(IUaw_(4LQ9>N-}PgRT_d!
zs-K^qS(z(z>_MbIARvH*O&em|g(_Ky;G*{dJVXtgxXF=`znwH!j?k%xE;$G?ULQ5K
z;Tu7626}}JcF8srsV}kVazAR`F>o1$FD?$DDHF5_Y0gGMk(7Wk5TAqk8rg+PPfv#%
zKK3VHQLQc*vFH9-)9Ck)o&gRHj>VI2g5v*j2+MVA3&C3eDfik9l9*!z(qW*<25ckb
zs@Jc#(PAe(M=CzP;q+c|#GCr>)$t!U=EoN#F(FYAG^cezQ!q5khy~1PwimCqkqGJR
zEQqTyc01)pP3ylJ3`EC;SlK;*v=PXai~I&WMJ}$tna7{Ik1yqmoehwg+(mlZX?jCL
z1K_wy%o?n28xtj<@7ZA2y}XCyK!Mhzyg<4^gY7J!!-}rAr3c`ygO`kgmzS4W(&YE=
zXGo(1kj@L2&8@9H0Mr(q{MFdvB2q@u3w|FJ6{X&X%nTuujl?Obs6aZ-z&&Ef`U*Jh
z^+~q`*d5o~og09tB2xbo7K&%)>sj+;c)Swvy>$%@$R(GNkpTw{s1t%tB5))#cF$?R
z*+*4mJ>P7<O0?ns=N6U1E$VOBhSfLiDsN!0Aa}=rF_>&=_Sv&%NEKQ+?tJ$C2(&yG
zkruFh%ayjs*I{=9JDs=Ur2dH9-<5)33=thoQ10qM)EPE3Gz5_;E?-U-!cWA%`*dHo
zW@|yCuL8V@9(~gQ;5%90S;6|+MZ^a_I+1l`Y|Qy|B{{j&^SgBshi(mHsZ5;{FgZqh
zQHx7Tgh7&T7|bO0Z}Y)+|L>J<<o@uLQZ1tOLB6Q_7!G{|UQOR!x_^`#Gq(~xUp(^b
z`a`5h9mwD-@kcP}TKH4lt;L5Xp+TA1{~0v?D|XD0n*KCJMv?#dSfL7T)TICOs-UHh
z-+(LsKQDndge;RY7~TEf@8cqie)-nl{QvJC=2(|4*o=z28lhP3g|Pn@1aA=BH{w31
z{t(*Xn1WDSUtCWAL&7>0Z&XMhvi$BQev5~ws^rIu2DQ2_M-$#Hg0bhny4c^3JzqA1
z06Is=%LZdU_dW#Ko5T4~Sueo(Vd*3NSa~2V3Kt(=eu8K1OBeYt`8gHQtk=K5)^KR8
zmUXvtIA(_9>`f@)_F&nuTeg0vSl~n7pG(5OisZ2N#ioxbDc)fPwo_0}m`TyJIV3Hi
zNWatONsi*hGJ5nN9siXuT1UD8Zzn~2&HEg>WJxrVkW*W2^U&<O_w<WsIfVIshXgR8
z+*s`QYGMz{K7^OT7k1YQ&TfA#v!a8N{e^bZ>-``d-$Gf3WANK5apP4pp8Ex&fw=?^
zjBdr6>sD;0>DDc|2c^D2$o>|J79Z;FXkcK+6-PmIG}y7N`iHb=jIuFfp&Ci@I--&#
z-~IOX&kZ4yNNEdSnW->YT6t@!o8;<>b51`+EJ8d?B+&}wMbP)sjUPtWQDNPt{3-iL
zaY<;DgNcrEZZz6Ux;0MvogFS+2a1g1`pkp*+Xwo{+loVvc&;va1C8iQy{#LS{CEpF
z-eZ-f8d=$2^4(>acdAfk8n%UoVo7YcD8*8QiC)^x`PObalj)EWx6{&5_G6$-b6k5n
zIzGL>VvT22gLStfDc@bxSU1#L5Z~c$-PEkD_WeY8#^|3OLu&__YfTjtkzLWBi2`Xk
zSR`fS*FQZ_2@~u`<dD3VetxUucNOCY2HhbZ%Ev-(jAvMM!FHVuDw=B#r{A#S=p=D)
zSf|~A!@~sND;B=4G3R{$1R?sDw}LuQ+K+vP59slqDBg|fj&aBTfz=yw`|a%>)^|^?
z5$%|j9%LZ_yAq3`elt71mPM1x=ptCrmecZ`WV!VOf$hha47E&nog>GWf@gJHH10g7
zXf7I_>Ab&uQ|FGSvRx?hp6!2aDO0;76za?RXlcSl_QN;fbxGkN57`g0(K;r2tUy7Y
zJVDrxe=VeCWy!G0y)fq=*EV!Zx4YkJjmrLm?S3;fMvm!QHr{{k&X7g|3l}r4w^Rk9
zwhrF=&Yy90LK%jlemuVKDx{6Aw<&X1JO$+_#1uQ=#*A44+yCCu1Yh<jEQ^4y4kTEp
z<Uzm{DYV)WG=;RV2d<gkCWQNlhzP55DEy}?U+fAECEVA5%z%(&UOc%UX#@}Is;^cD
ztgWnA>?~J7&bbmi62L|tjt6E$%2JAnrpEvNgO8j<ade~*q9;FtkV4K-`j5=bokIrX
zQRhDi|LS3g0mrm`Ko^jjn@Gf1zTI=ZAh{JpbtNDsR%`-K?gY;3in6k~k4~n5L^mm-
zXBQQ%K_e0vsD39PvU>3amy=yiiW=Z&@QJR36CT;%gG?afR@MKFUO)7atdl_t5U?Ht
zk}gl2rz=!3ZXj)~l*+@D+`m0j&43j+dX6;Ntm%J%bkzhf14M1fLPn2}h7dZ{zuw$2
zoN4hoMwULf4?Y@!4F#xHH%c5g4Np!@B|SI2UjXL|3KGSywH(gmHhom*bFeyy^g6xq
z@9;2@5>ITHf}9-nojfCb2@na{3i1<9y)#p9hX-$dYSMoIL_T@uc&3%eR7^}vvEf&g
z_hMPhk`F{V@nq=tMOCrr3hM0UTo;N>W^DyXBQwZ!kN)phjOKxb)3<g2qR&=9Dgi4~
zs+jH<udbnE@Nnm29;j3!ZRo(Zjo_22j7%q_tI(D80qP9+FU6cBgaspD;s9i4o4`~L
z*`Nhs)myE>vcz^C8zn`sBqTic&8A(&hUdTy8(cV0u+*>+4GxQ~$eRH98m&Lb*Jgna
z6P)jUC!I7f!4R*+{H)TjoJ71d>c6-E31Dis2U!Tx9}mQxb&_0x3zO-wni^}=2%bpD
z1~HIwZ0+sAM)h>PD9aF37Z-hwk?Svd8N{yhnwoU)Pl8Kd0HHTZ`|4o&^=8}ECS<0I
zU?bbjEM5D5&bH3*0dAaM;(UDnko5$twYOU-h8b`}kZCGhAf!vNQsY)2vOEm=GMwt}
zqK4FVEs(b4LW2Ts6?i)}Fg;27pjTho2B01M8DL;OzVl@O%%=84k*={6WFU;d#Z?6t
ze_x|eAC|D<eR`8sWck_(+PxGM6m#FdSAY;PtkyL^@xYGo3$pDDIp@UGl>J=8b{>)u
z4wCuE?+#A@Na^!}t$R_B)Jxs32j%q5isl`7a*)}?14QTnX$dmP?^|((#>wZU5DG97
z<b?G#IXzveS8oGN8z}@pa>(2h^84Wx=SM9(|0e}#=Z*w*%k$&iHpq*=aOr<?v8Io8
zKg{5OTJT*eVImV3PfUax%bhBa0d|DW%@4op%Ty)i3>EkS5`M#8KX_r01cf+1Qyc_;
z6=bpw{dW*d)_?xI8ESf@*;i><1vtC!?C#!?{$S92bUW+h>oG{ex1Be)G2{ZB6|BF?
zirzab{+4$K^Td`lv1<vhT`G$1j72}cqT!((+e-bNIFr~J1vU0KTaqVhrSTl+Th02;
zuRqeZ-$d(u6?U8Q-F4hWEaf}2;TwbqvX6^FjLE;%A3Squ$TNAqR;Z$uFRw=&IddL2
zZ&)Pe=RMHtsh}2>FS~wzCZkhfVcE)iP`<lcL<!9Q_bDlVwzjr5HlUEafCky3=Vm!L
z(CqK;1M9#q15|6FCQ62Mcy?Adv~!Q}`Y$0Q?oTtX5B#o%LH+&Pf%k*tbI=oC24S_z
zX|Jn`QxIIgPlhKxaI^dWR4*PTGxWIZBl)Cv?jSMI*Dfx<pQuo%*x79(8)8TXu%jba
zqM~rd5Hcl28bXeYJneAhS7uPy9~4}CpPDKRI4o=8S<LkbVem*S!()?bGU+hJUT~+O
z3qCtPM`~H0A|ojn%z%BvJ!WRI5J;~H&`>G66O)sXQMEgUrn0n@hdc`Sa7;{0V0Gr?
zR9#uQ3zo>9c6J=f;{lRCVTEUT@AnQ2;7J#Q5l?mX=!$cAuKS;n9H8q#r56<w^8z*}
z#244*h4kYvu9$mh(r7ui(7qJD{@)Ajcde@~7Qgb%(eB-2hk#f54i3A31QAdQAt#uE
z&1b<`003+FuJ;gFziF5i)IB^-L6_Vc6v;u88ZZDj7{06>FcD#^5n~6YWMw_Zg3!Vg
zub@0`m39R(-aijcNl8gvZgOjB-H*t%)>9)sxlJdFkbRKcZ&8a8Bt_UwKr^;pR|pCf
z9eY@GjdNhU=6QAD5j=8naRHtjjz^tE2voYebR@<%aR1lYl~P$HhniGq4Z?>dCF`K>
zeV>D)<(P(4-k`LU2+ir)<x8kPp}blopb`OVx(Zm~mVQT;)jC_qC<7-WE*iUTlAM~Y
zgTplvuYwI;4ABGdtlg+bd6`oqE1;#}yY||54O}uXl@*iBAvxLmB`4PRaNelZ<IY9z
z!Dm{mgM))TcYOq!mew6o(x2}U22Uh@*a|>F-U{)#v$GROjllIjz6#?~#X=)bF80I0
zHYD}QC@ih^5C305Z~*7IWdO1&n|kh$n3VLR`82W6+UOX~5BwtG9w#Ix3-a@?b64Rc
zF#H4qYWS<5>3;O(i!?hafGC0f+NZHqKSOKSUr|oa&(Ht(;S1-z#7U5ZmA!qTwH}<x
zS|oD9-Y1T*j8l0nQ3L#EZFgXF=Z!QhwR;Ncarqau>#LerbaHBDx^B%2$nZyVwM5fU
zSBFF>OifMwj$iv7BUv|wmCxYS%O074CCXvp<KbQOW?f(xTci|4DZIJxBgGGst0&OB
zEdJk*ifmxHQyWW%SWZh5`7@vFDThECJ|H0>`NV4gMe_l8@*(R&Ac0bL(*?`N5zDC&
z%cWiQMQLhlw*ny_UYX1CNx!1)qQ6b4KpnY1K71J9B!PmNg+4g2<0oJxF7TCMdrmIz
z<&Rs(_oAKR_xJZ-Z&Y4wAl2*-1_d)z?#ZdOU7xo(aFIdi^=xr8h-17PQmt1`Tc$<~
z<A6Fz{HaR!RNj+OQeFTLi812W$;k;a$U{v(Cy*YsEL1d}`eLi5rUnK8UK$y}Kz+-M
z-{18&vSG4(M|9pcGhZG3|7UL<pjDHgCedO+3~$t7KPEYgC|qA%U4^o}ZPb7Orj?M3
zGbu4~ovuJA8uamXz^MVtqxI$VgQu+*zSqz&T9})QM1Vs6fxGA#w?*ygvG4Bj+edjF
zMn8pifN^_v4BGPms=@eS(k0*?%5nJ}Mzjk15as`!dP5>u!+z(^9k|H3B}YmT7x1br
z{rHiL4!##e#7Anx!~^z?NCzuCG*tF}&z2uZ!&@*yOwGy?B*OzYSUBeJI+5-hu%Uh1
zB-&;e6&e~Fm;Y|7Qs{hfx1mGO$A;u(8vl2-Ibbl0b0gB2tZ3Y$tu2?O6poP6QpMx*
z+j4sVEul&M=_t}kT4;2jVZ6<L!f_G7*bB|4h33}`!B%NQ&qid`yrBSJG75ewC)Wjh
ziJ!d6<DrB!Q`h}wJg{8}^)Fl`z9uD+UVaLsMPAj}9%<ytOtq<}@XY{8HMQ7WmcP$g
zWMB~N>Xq_%5=2rk+qyohyguty^);!T-GQN=22SEJK2$ZGc6ZiM1l>#dN5LUX?EjOW
z=_Z#)1{|WiV{Fn()$<uRyGMs5-8jQadu@v*_50yJ5`C*fg(=RN4GG-V-fIt0>T4v_
zG7TpXavUw1QW$E7sWygz_^)Qs5$5&S;a_pSY4p@cf2LSUuGx4=snxY`Jd7PIZ`<<b
zzqdZMgFLD=`53k5-o6g5jw-h3FpY$-H=u>E7a;V<d=g6GSdk!ob1hgHzG2{^GjO}N
zM(N70KZMO%b5~aD*X5=5<Vebg!QTlc+6!+G6_WqG{#KoWCC4!2hB{EE@<PpL8yW<H
zM|`@lIz{gW^Jww~<bJxyrjw<|4(;+3d5R#{M#NG7jw-`CRM_ycUg6g+pWq`prv9G~
z<Cc;6qZxeenDz)~3{ms<&oyfiE460W7t|@!8%Y(J@;fU9SBnovUuz{)zHq|YZa2(V
z#e9hOM<2hhxLEz;?xH%{S=!`;J?Q|kf|xC4;Ca?PR8%_A1W{|->pPz=cTs{mM9Y6(
z;Rel;VFe$3S7?jX<jbdHeAm-2aWcEZVx$n4Fla$@@s(^~HM_Jlmjx?F|3Q&;Onph*
zH?BJNaXxACF+OFmX;F)1U!l?HASQ@GlTM3pT=pW*zEx^`OIlvzsdm$DRwNqHE$N6f
zjo+9Yr~$v(%X^65Ai{~|VqfVd^yhRpn;T!<(V}At5Al_eQGZa>%cH*2#$!xP!xE9;
z%qNW&z^9zZs4%$Fu*|B+pdj7H9U)C0jK$br6BqAz9N1yha04xXF<hG@LKcfoE0#&_
zk-K}B5jE-^>7CF1ZKZGd;(WLmG9TjTMC7tW?5E(Or4%8Q$FucVBXUbO@*WI%S}`h=
zveLH_7Z7V{@khNgUr>}D=L@9WzkWj)(=d}VXnL9aB<}adO{~DDT8Gh>&4t!j<+^q|
z|Kckt6%g`q3UUSV!SeYfrSxoRx!Le-#A4ge_1ZA~``%p(s*@<HS)IHI;ggFsM;H5;
z-Q-pxkpAWTyZF3A5&rPO1Hm%ux)1F`A*I%(rL3h9nuoE3h+{A7C`JX{!e{o{8d|#J
zBc(elv5V93n4-h?&SNWf(i%>DomyHRH@viC#LBnHFI0mB10KWVyF!~|HY2+>if$tT
z0w0g`>w-e-uZoQ9A;tna?Iya~h+w<fb)!uEY$k}*!S4W5c|R?RCPf=4$sTDIjG$+U
z$gxf=RL8dV@X$>%k7E>$ekDYS*ZGc7VcbQOIx96*fjR!;sF(+=&dTYRjdR8h*+2d0
z&}|rrQZqA$R-Dg{MTgJ5#hVwXBfF$?-sQ6JDZipj9<*3ur0fpQwc;aJ%RI`l)?*=7
zEKvVMUr0y?K-Q9@BjvF<6$7@c2AT}RT{`qpJ{fYn9;0^DDQ(oCM)v;5$jF?h@^Sh6
zgr>Sv>n@(6)Pb}=$<hk3wd5!<^CYBblh*EL%4~cOf4$qNi6-^cq<VICR(U%?C+X0$
z<w5exxk15fT@H5S&-{8h38e!c`ON_TQ7ipj3x&9!9?~-MGQ@J~qGfxc^jNxakbOcv
zVdl|^*5?(F%aT60dVFr3*4p-3HZHhB@+qD)mUWo1PTF!^r5h)sLSK!j1oEe?<;RsN
zuJX8H<@CCXI`N^<a4a=Si-w+#&dAG1@xR=7rqw#^r2Q6~icjClGx7)uCMz)u?n=*5
zDUS<9v$3)|3sL^5_;R_7Aw@6Ea3AMB7M*PMXL^O&agA_CotDt=T{RyT>|Zx8a5Z;k
zP^^}qiWuuAVF&uV;w!|l_ooe70914RB#u!|jq%i9xEQu=$K9NC-}YaoT<MZmTz}gS
zeV+vE6D54;S=aP*G5ZY+W*B)hCTm{s{oQjbvg-@i03`H4kvl&-%kGf)#bYW!tUOK@
zvUh$7FujenwKa46sHO8rw=<=nh=?a-pi4`}^uv-djJ=Yyf5FzAA-NxEG6jP4#B!mX
z)gM1%9azM4YXE#~Xjnl=g4Zb@_YV@{sOab~0}2up%H!f$yMLEDecP`6Hp!%{V1olK
zwha%XjYDthwk&fzfYv7f=%=@FJ&&5S@qsD}4r8f~IvsXCh2PyRsIP{jJbj?=Km+~P
zpFfQR(iieouvwd?H^ra@Woo*7^k?kZJM$(eRG~MT^VI5)+VsY!C_5nI0Y~qbTl4h8
zE5GYVe--*fMS~X4!{rzWLmi!&z8k0!99UnF6EHv9Z)C3R9&CKQ4smCkD$W8d9qi{H
zf_?J-K2N5jxai+jQR-nNYBN7S;55JBSG$msk|N}DR+r0iYRV+tV^hBa<PUJPVL$g5
zzkeK7XbrFu@^X86<bAoh(A&N4v$6($@8P1Ts;w#b<B%l%q<{2S^{Vg{H$7AQYoBGV
z3Sr9VAWTAPaqpBuYkdVSRf^KmZ_7?`jKi5?6gY2;a2ucw%g;v>n6^$6NL8ZXG&1H&
z36<?9yp|sUoewA92c%Jf+iwf4e-@Nk>wyD|-<7w6L*=Y(>ov)j@9hdZ(~am3w--2%
z_viVP;~A~Jy)VEQ1~R-xy>hEAJDjb2s2;$YDn2&$R&sx$L@)R_EiW(A`215dH!rXc
z<?OGi@Vg%YcB13!E)W6UzlY{BWK;gtIqljIKee>Xb-z`fILVCHr3*ShJpmUfk}>2x
z%T1%3)VcH*lg7K93vVDsI8*R0H9f}aC)vEw|1|a*Uviw;J{nB-dlFYyRpq$<TcK<c
z+(SPpJe$SCz`#J(?O@lH-0xb{m=sfJ4Wp)znM0Na02EiM((AU6!Vc0NDDs9g9)h*c
zyuF~Nf9Qj&7S!<(`26SPQ>&{M5xG0N$KR>0pK8@NH=jd;BD>rn#QXGP)Jso~9W$nX
zf6=vx(M51-XQ668Z(s8m(7g<ss<Z!r2loV7@b~sAG%vKaFn;lVovB>vi{4WsDkylI
zzIgG?RsdGL!-RDsIVwkJA!cZZyF;}GaX3(C&2*!?my1*~oSXEaGMRhnfDz>3UMD)z
zmv$%c+BWPtH)$1D%3!D@`Jj@jifiaEO#zv4zTpF8N{&AMCN(v6XU9IY)3{pa=N(3c
z4c_qb>gFk*!G@oD?HgW<+9O^qlZ$^O2Cg4T;FYJ8tf}77E)yoySe^WW_|&-e&1wGY
z*RN1tva+(GNo7N2GP$=0Ye^<&<nQ0A=}mM?Nvgcjo`Vb6P0aB*EQ$!U5M!2zF&~t>
zA&{2FbeTZpH8G(_09EnMf#@*T+YTd^6kauASI&Oh0n&JtqTwtFny5#S5fM`_2<}FG
zTVG!XuQTjGqsF-w@hpXzgoJ{$LZGZ39v^$P7^)G6i{cQ-yePjo#n;WMpRZrX4&)LL
zm>M6~(A4}|v-NFpm>I=^0}SQno8rx90~_3m#4(R3IQ2h*H<gB#maFPB`Y;_n&!t~o
zvh+)P?$o^px6q7jY&O8ntGu|lYSxy*^*c0}R{BVymt7XLX^8zV)M&4txH8etw7iXb
z%^%(TcpsD9#eu{t<I4vP*2}gj%Bvmp%r(FTI1N5&!fN9x6ulvN@+Pnv??X)EBj%6L
zJwG{d2RHyuj)lKkT7#9}UQR7xBEIjJ(QNsODIGR1vWX9{PZ)72si?TQxoK-_+mGK3
z3qdIJswDHSIr985q-mzb8Xq4Y!8>dc!#GIDe(g!wy(g+V4qix6JoP`446re3w@2i8
z(p5N_NN&rG@Rc`T5ur(`X^K+E$kH2D-U-H*Q_CH-VojvS`eIT&KmO?Q86B31jSXvZ
ze?a-FLCiB^b5GB-+~VBa-15;XK4mcUt=3T*=PR4E;UmWm1k*sM%w}zq);yXQRL1Mr
z`$w(#Qk81hU)IhFMQac<QO9(faAUdO_%Q*179{})>VvwzKsx+Kh;r)LG_hR3;$dg9
zFnLrQW6ARtn=}D9$mALO`ucWkQrc^5!ITZ8B4p_w<#EUNl0|*9;_Iu~nwd!{Ew!li
zI8@L{f~I6fN(xjCnEf?~-FJo7I^A)M*s-<kj)XSE3UPRy9}G2vjLuHT7>xW~M8pIH
zECkY(na9t{hmE+Z*yk1(U+C)gI=wHoO{y!m))S)C)|#|$l;J=<uhf5l8Bh2WA*-fI
z%#@+B*-ue58SyCZr!KVjl$gg2#2yPp!?ytWnK(wfJIHP@97W%^-PDL7cCipkv!OPY
zmhve-FW1go!%d@*Lbx@zfWI-UKbT0&6nP%^w#x)4Hl)hTf9?scM^6RNVuf~cb8)>>
zDH43z4p<}L0@`I-2(%LoLPR(sP{g8fu6CAP);MUTkp;5O(o%AmQr;7C*?k!uhbE<@
zqy#o;&`vlPpgd<SEq#<Xs>FQrTg=ny-OcxCQs5mK9*&(iYQ>XkVQCrEK^vSl8AGSw
zY-6JsmoG^xsYd+vWgMf_&C|u1H?M<?$)pe%n3&`1|8Czms%CE%nL+&=%|Z6Fu&BuR
zEOyYMqu|-1Xi&H!ANim~?Ng$(LOHbrbKzKdNjh{XgxuZ_&c_HerQm-Xm|A~YCqx2i
z!TM}sLKE&=(Rj7az}f+cQiPFs9!tdH?ld1qto+wyr(i_gN6Z}$_e?k`U_Ap$mX4zn
z93m9mn8v}KSv28A0ujDmuN-blxGXBJPD&A}PE32>>zk_09L+>{7IRWo<Xv2pCij)!
zjFe{cynXvNWpMS)8+5BB%5H@?J^=x@%QIIggn9`iA)y3R&dm$2zl%H{N5w+>V8y4W
ztE<aZ*1t$5%ckef9DkRFrpVf}{-;*eVapj_S*7pvT}Fkt+S*!JY9*RDbY}H>fYW@?
zehG|{o*rnJ$`_~uGz7Rte0)3r{CJ&aJa@B(f#os`g*PMviMcEaao*yKaA5H%Z|>}5
zrl#IPqk|5}Y;EY;DL8g^%0$c3@3~*Rd~pt$m&P;mwm!}SgI!cJ@#tlCQe4^Dr&_M|
z_Vy+wunT`x;5xW3b75)`%O#mB#90AZP*III#CYl-^_wX3U_>XEBUkW6C~6Q9&WkVa
zeXXsrN#%^n68#>cPvNI@qZM0^Tue*gNuMwTw|Y0FpYkj@iw@@{l1!YO2+41QOu>qC
zH9G|!21G4LT0dyB@m0X20A#?CcR2ORwRMwTHR%H#LrZJ&j2z&(=alIebT`D%8%Ry7
z$z*Y!>+Am(8nUU6$ekG(X?L2p=|{I}I5r<e$(>2#H=|<947@qDRk7<-uZqNs(i2kC
zmW4iYaxvhCc7hFWl6m?db42c}Z6+YZ+uMikq9g0DTb>Yi!AS)Tp=$Q@UNfB}G3Vc5
zJ82m&w!*{1Pft&w#<?{3Lw_ZbVcg*;iI8PY1Rxx8FGnFt2s!b+@bJNHj^yiI+1J$C
zM(;0y+z<f;Ktu#Mj&B^Cx@SIXBOmj%w|_ty36Sl*_+HReEGfA!g#b%lFa|X>WvJ`E
z=wHB2GS8X_<|3BsI8y4MnCEelb$A;qZ)Rj98JEAZVm<=dbmOEAQWF9zkSl$#v@|03
zqiJ_^Z*MOoJ7tsPQIBp(KaQb>0s?(f>Acjm<SI3{Se|igbo5qC_qLlCee6@>f~KY>
zFg;yNcR&2ce$A^8SE5;(m)E)N#>v3}D-6)liz~E3Cb;?%O;!Y!lCtu`M<*FAEjcy$
zIIu3q#RxhjG<IE@_SWW#21>!U0SUIOI$pmMd;imomcA)8srCe4jmVG=(be|ohg;bW
zKgrQ>b;t%trPPR(c@pS1ac(J(!ve0lx^W}YeAZFu0??<e@AYMHi_-^T^}3mvece4S
z8eg{+C@W>{cEamqj91W2^2o$wzzV}9pwQ1{37nCNGZV?sChjug@|Q_vRQM|c_?^a*
zi%5n_5tBlo$2XhW3|j%*hoOPA?Y>kqlD#!}iwLw>m7@4wM*Umwr~hTG4eRX}?L%I;
z?sC>1aI>@&Qru9mlE=yc*d5uHf~Nvo5il`~AY&t=_RT}&@Vbl3KOigWK6%nVF+tqz
z1X=F;^z>$+Km%9|Gdq9({vFMGoT0Mzn8t^Xhi4z26Sa^digpToQd%J#NlE4h$v6O8
zw{*(Jo9imXwI`tsNNVpomX_+NtE1L(V>zN&T>SmD^GS^0%WdWHziuw~u-w^+<;vG@
z+~_x`s|mk>x%vDqt#8LnyOTao>ein>C<nn0dyO{}5C_F0K(P$%@CuEnL$4&-VAb!j
z6##q1r^JBD@e2qv_+9%}(Nj`Uaj>zyw6gMF1=f~y4(wz=8`p$(fIb8d1*|@O+x)0{
zG2Y?`p1TQ9Dvuj+Y`cgMQj3zsbVH+o7r^$hG1NyQFEPqIxu}E}cX-;6c3w&5BUh45
zD$Mb)a#^xVS^qgRDn#UBO)@7cN-@8P?{$eevZu{+*xI`7Pj~a5a{A!Z*cc?>l?KhE
zG2O@8bCv*>K3e|!*O7~C9^gLqb&pKQ9H6ZRd3kYVr7ClLmfsapea+B)30+I;cDSrS
z`ijty72lWJL01A9DnSnXF8l2LU))(0h*Ii)J2$zA;CmR5$OZffPE`siw62|18h7DI
zWoKozL4web%)!U46%-U`m+@1_Y(7Ivl|)vdXi`SHq~8FzH=etRbF&f8@5`TX1d-e=
z40N(LMTLcMTD|aGXMz(HI*&+<AN5A%bb9|vySyFMZLUA6nG-*#HwJ7RG{l%YE%IJZ
z8&)1eu~@D1Aumq`5e&HN)4IG-u-^z(eDXv}c#O}^#icGk-wcYah}`#}Cjf2F07(?O
z1bN2n=#Dvnttq>!*>ShOBlD5>y=G>*7`nA3CD_i7_1k=@_Q@aTsVT&{!?yTp#)BPT
zVPub?g==!__q5oh41MfKnb?4;PSW6tGu%aibbK6Gq$^0N8L!22LxJ=+R6?0+=u$mq
zJiTT_Z|SGk6*7Zo(8=kszH;&PJ0IHzpl1j`TCd8%W$DjEWZT#|B=$Y5t4Cm0gDb@@
z2%un@(fq;<-X}hEIITpqGQrXSuRSv`*xqwTNWTD?hP+V#kD<^7s$yCpvK`AP%*U6|
zzw$wQ6B=A^A1y=g9UzP3<YasdKwzIME93Xqz?qCBSTYjbXJeZKH$9dJcsp3e0^_Wv
zW*>lNDJ+9#xAL~OtmTh7>{Ijecmm>QKlFG~KZD6R=tQ#jLz`<PM@Dpb@sn#=bMwRa
z-h`y2orUJ)++t{|!g=jqCJpT{v)dY4t^f|0n4o&HDUZXOR~&_YS^zT=s&y)Y$3m1L
z1-H7LKo0`k)r%la5Xs=><;4ifDJkhi0y&vxJPJ_GB2jKw@8XOf2q9Pk7u2<Kx{L)U
z0iar}Koo%kc<>b!L)&h7p9>hI5Rg;=-3~sdv=q`$XwE>p(FYb`_TH>*1JuiB-r^=q
z7-$M{P5SmOF2wU%_CG$)E-c_1(urrO9`6`p8hDN*il01tj=fbM?T<cr)8>iqhVB9?
z?rK;ZBQ(=y4h>#OH{E^mbRYx;hxya16e1n|4*KnWakKcwx*QcEHYYy_p$9Tji1#W*
zy=Hobh8X2&HTijoIM#gRaIAEh^sYEVtzW|)lv1;*mF;BLhNgv;BN{bq#pg_E$wels
z7W@NaVw+#?H|DJ>?Wq7+L$X!KQOC!5MMTmxjydrEiwn@dl6_B;*xbcM6A^rQehg*h
z@X(N(i_5^^AU7V8LIG2HK;s4$08sAKiGh(hGBJT>FmkpkKjO6Dkh*(Ihc{@nc+@I^
zvDa3BE#jA>5TpqD3uZjRlmsoukl4^E)a2xJY`b+14MIjCZ3P7susU*{_AEI<NsNj2
zPB%%JkqBI2Z5$kqPrYFnvDd7ouCA`Kl9Q6alILz5Bb>}*>;L}!s~fcNf`@<|cmj=e
zVmVzyLsGOQKzukJJiuV;-*&?d%r4!4@{>^^Ej6{tWnG<*oIiaKaueUSw&rFpGc(5c
zUP>WHNwowx&}{`?R_jDli{Vsr<_xtnK-#*numJ0~v~+5ETBcBdtI9xMpPh}ZtgP&&
z!HliIoNYr_7qBZhnTfF57O&y72wYiC6L>+m&T}dx^C7rdZ!yGEBjC8Fh8?f|=lI&5
zyV2@RE%b{L<4{6M1LM*vNLO-l=+X+2)!&QO&s9}MeB?kyfx<>aL<ERHHv`(C*UUmJ
z;{Ml&g&4IA_96#q<n(3%3%G*4jfyHzF9Aa2`dERBh#gNV0gd?9p(lJ0Kmv~uvT}0h
zDG+${fvEreJC0lPU(N(SYwPv>m43)l0qTOgv;Y;+ye4zJ8gW5k;kVsmhF~L}aKy&$
z?(X_}ta%eSnAg=23-DqE0wEIiTY%V?$)qi=wOK=qhB7*Av@x0jZ>EFYGh43}f*_Bl
ze9C~-jLiL67@C-vfb*e$g%!3)k1mb99-#Pnbounq4Gv*8Q^<P55d2Sm?yQtKR^C4f
z4u|kA6F%khOW#1+UwiH#*y6TD*6lP6)E1XN?*=W-Co6q{nlv{yR`dDumZQ)Qm`;F0
z-k{03YxlVG-%d{~1EWGFsC+{83%zO>q=If{AWH;v+*PGAU<USLs<E|zr4Ra`BN08h
z&+!L_hmQ{rsl>f!Z0dnz!};JrX{jfSPXQ@=#<l^l8bU%sUOe8156^+M{EG5;JnM=N
zuCxcW0R<6(OP612!$me>IcZoan5qQyEJL^u`$o(lspf^9gHy>PavGY&rdO689>Qp`
z;g9m5e)|EzuzNX{@ji?_d~{p_)Cl^sM%A1VzW|kk0^#@X-xLm?Z-CiQp|Yy#lQ>O7
zLnxCochYFF<kTJ*d;(+w$<u&57Rgr0{s5@*XOKGsFf)iy9Fkyk3NH>$R5qOmWfhg9
z!$V~HLmjgQlfgku+E=fh-xdmQy~d6~bz^**`LzD*8M>wabbkL0E&&9auR_&!o66mt
z>LHT`vX;sDb*goK+2l_G6vdNP95luY26E$dS65d$?1d~5)0^cbB{?jt87jk|`3k7^
zC)fVn<Dk#=S3Y)E3${34asFBc^v{H(NfMm}_0dSt$HLVwUaSh<yLS(4tY7Q`yFBLV
zPYhCz2O@_gts^x3<9q+L?D{wW2ngQKZp}W>WVNtZ1%@<1XeT^lfQ@i4f;P{2?T>K#
zV4ki&GLFF6&cT86;y#u%oQ%xz++=uMB35ZIXkehB!7D6$3gV8y`^p6N8mt6BK6AR4
zZ*)9UO8}xs*v}++Mw4ot2g&JaX{8kv@LU|=_5;27Kf2v;*#AGI)X8ZNazS7t9YA{?
z*=YswqEL-46Yek%J}xe!+F3XOC(E9(v9a~{^@(5p8*gnDmqI`bQa1?+8UqjWe79|J
zeqLW+-y6sj0GNLNu4nZHMIkOPFAq50U4ZqHMRvhEHDbDNPH7?ZhBglYl5nEC^lf8_
zSaSRb=-~O{wYWRymyy~t<J@~KWiJ*!Hn=-H@(cI0EcTPcDk=56JUawDP)qC5*H2CD
zmO|W;qr0;+Px>INNeEa$#Wp;tNZMY^+qe3k-A9)kA=|I0c#uBmcDiq_n*<anh&#g3
zH@*Jqup<$b{uN*~nb*$pDW|`G4^*!scTpG%f-!PvnCCnNLJRB<NEBc>08L#@E#dW|
zPvhs$l!cU@3z1zWkZH5`BWF8c-(Dkm18pafZT{B~M6>&PYbe8It<pk##%1a0`uD+7
z{JqK=+@VUAzQ(r8?Uj`kr22^HaFEll4C;xbmNaFln+)uMj0(Se)x8|fp{+!n0JSd?
z=Cr5<PRThTAQz{n9?Ij2ii*_x7dq`<j2f^4C5dx!NO7|He-k>Np$U>k0_9t3B<kqM
zEsz#44<m8|Lql<Bns9RmAmYq*wX}jds!Y2H0Cd^@&Mzv8$EvQNpy1@>#G2?Vl7Wy(
z$SqDyy&K<a#zjW!@9E^keZd*__yx!RN7HqOQ~kGpo9rY>GD;$cgk&WniX?=PIK;7q
z>{PZ=NXaItkdc*;WMvdWvXYfDk`M}s-|Kvz>(?L8bv@6Mob&m-$9=!nEooI|7E(8g
za%E)hpqLn#dfX5U58dQ&e5;V@s{IT=E5W+lYBVr3jN81N(AtAaC%Z?c-L-rsAuy0h
zNNerihPec&7#!wV4k$B9OYaHX054uj$P>|)N}`$lfek=cK%Y91@wZ!DyDqv~NNcpN
zPEv=!1zp?xY5>=*Cj!%K=EW~xCXZJ{MlwF=*uff6=UG3#000|8N-PW0c&6a@cZ5V+
zf32^-8gl*PtCT4c9~W0Uv3Zm#lcI&)U!vg3=4zU54ih8sX;mYJ6pBw2`&SmdrYa|d
zvbX8Ktg31lb0ct}f=?W9z9vMe2il442}p<EM(;Xzx6Pis>se`z))@f!zm0eAw9_Z?
zTiV*lSi6lBiX-8BPai*ij7!$p+1bj9^20;5zBC9uS7d+2t}{|goFZOmb?1#uU}UE#
zRM680KuD`9A|de_QZ;<w*S2>rtuXgc@Wpl6KOdaMH4BW;SyqvkRSxa_C)``0FcX{t
z0+dqPIcN^a%Wr95X^=eldL^^b8rrJ#<Yb5ku`1c~QZjQOEUg-PCZc`y>OTl*EG;c%
zc$QZKXbV(~dQJ*~itX>`4i)Mkm9FX04pk6ZA%205=b;|M`WNghEv>f-!cnwr^rg$}
zVyj}c&)8q{?%H)KyBn`vsiUu_NA-s1{5|}8IK155yQV#Hqo7{mpcz|SC6P!tZGhbl
zI14ja`})e=*Z$@qpFW9>>71eA@op3_OZae@8_Lbh{!e8@`lJvCTH4WX<)381er;i9
zhUC<$kTd$YpH?bXBK9jdOd$@$S8{!H!C=!Em_K|kTu2jO;BBT2f$6voinxNSZG#k_
zIS!o>_gA5IvkSkpv~-s3`ZWT0n)4Bw0(%Y`?$>c$@@$zuDu*g-OEw09^~%b~;9$!*
z<9PV=c?*lz?&F$%etx*}30&PZ5nCdE&QMN%95Nw(5|7Lra>b_wNZ}>NOSE`qx%;Zu
zj;$}MeCCEBYlZCesH7x?s+MYArpP-qmIb|F>=+)>CbZNn)z{bG7dt1w7?PWdWNs;)
zX?C?c`(Qkl<z^<wU?e9d&ikW$MUroOM+d)xC&Lm!>n@HYTB`!076lhQ)|5oT46-pm
z`r>D%Jzu_h6@Vfi7AaE{sSXaJr@K*~<HhNy{hHg#_~_P&PuiZBU1&q^@U2biPA%N_
z^3(n6`265(o}l3}8?EA3T5o!vm-ecwOcL3TZkgtFgzPJ5=Kq)W?8VgE>&xd&(+}sl
zZ%_(pVcVeitc_)l!xo+!sX3OWOKv|`Uzkm?ZhdoPIt#iugo9IFFc)CY$;%6o8`%{d
zzvJr8m)6y%L@P!jD!S{}AFiHZ^B0^S`FN0#GSTZMZ`N)2#=m($Qxegzm6;A*OPvG1
z-@)RS=_>PhiL(DR1w2ui1&w(G1O$*sIQO>yp#LiN_furH_chDhn3%kg9r>$FFMpz0
zagR2M-wq|h+xPEvRoPKD0$j!nBYtavJytl+PzXBj_-4r?v0v+MkE54Dn#^fXA%%Tp
zGLITydGOoIetwmpJ7JaEc9SO2d8TWsFTnGR(i*XvWLI*S^L7iZt$<buyzrTsRLTvq
zgRq+m3e_VJu=9YTaUzQXFAh-!MMd0&IA{I+{h8^gb;=Emz=)u<hn#*~PL&<}!I?8L
z1I`j+Vj=zW_D?dwVIMxcOa_mjSDIN4rY2r<P~C-Nl2j_6rkm3K;_{?!(UCrx(`WuT
zmk*-JR)i($XVu(y2mXf*U3PlM(_6nMrwxCnFN-prQSMD`JSty)yH1InMwKBrV@}XP
zOjK0%T_G14yXt*d(4sVzCvj2oT)fcDOp%BN!N@PmGks1@PCLW5L^Ko$Q|{f{lJ)A<
zD|2in9pQxd_|LA80HAe57sT0!B0ZG7W3hzLun<Tl6?RhfMrPvv7Vu>`-l)canhif8
zaq!@NjxB|KOhNiU&o2$XLNj7vU?3$eUHbgFjSP=!)uEJjG@3N3CWLrUY7DP97{daT
z`E_o~oW?Ceu344usK=|Ud<Nd5s90s!!56A`0I)Dv7INDR_dj!&9f`yp4$6~;pC#N%
z;OARFDkzzOXMOpyhx=trbTstYsN1nlfd&Q%C!Q1naS0}}KS9+$4P-^rI;RG5!|Li(
zqsIk|<d0v#w?|0=-qyy3MyhH?ZTqXei46m@zo9S1x2_seR#s*unG$!$x0BH9kFA26
z(dWdsRpMh)D)*|VsC&z_&hB+HN`S%SR`BsB546jLJ9~~KPIH;0PAk;~>Az6yG~23n
z^Y|X5T=23M#afxl-q*&e&)M>l_>xxRgV?KU^3}(>{&LwLUZdKQPXHgs6D6A>&d#v(
z`?nfL4`?<wIUo%h`k@uRy4h#X-8r&XO<N%?JuQuPFCl$$`MuqvK;tbPl*;Pe)C+1I
zCiLNeT0TsBdhlk??fkrh=fhH8>~*gxu3wgn$Jrxl$Jh&EGIJUmHMH)oU%QT5TJw?Q
zO~i~#mV6b<#$#}Bz=ih2Kva@X->7HI?eo)9e!dycU)CDB9n9!Ez9$w@YZX2KD|ZQ^
z&WUiJM=kGKzDvsa^DDoz`keDF<pa)p_wM!YK@)I}llE87_wniPYs?+en-spkN(ko(
zTzNf=TdPJ}A!^@yB0BKmm5qjoSTLNK&!5ZOdwcwmen{P&)^AJQ^%p^MfGS{@q4;{>
z=(AUSGu3Wm^*~(!OAG2MDKW|cB!c+1FJgPk43>cB=0f`w%NX8&f}8=@&mG)|5~r5Y
z3>Fa5`jE$N$gFi2P@#^Xz^_|`)=UvILOguB=P|*c!kxPQD$2#&Tx?&aA+RktVbLLu
z{QK!scac+&M(d)G76%Qb68+eLViR2C&e(^*LHT0T=e>O8us?81k&Hp-aomJd=Fd4j
zJAqstoADr%Uy@~@{elI!qcmI8L?W$~`1<S~>TyJmqiu$^Pmsq!gcIF8TbLTC9cNkW
zoRbTbE_bXFT`YHPdAvtR3xkT{vL@!5TS|Bt!&H8S*sp;Vn)dv@t>wf8Pv(g(3aeXP
z+G2sj97-cczW%QQr1{lqy2F6esQY<gcw<-+Wt-$}&5OA7QK@fEUf-Y(-Zwj3y{mWE
zo!E1njbKU6D6ax~3p!7eABz(lyp^q~tE1x=zjPrjyoGj>t!enNtIC#=+`y-jvm7!-
z{HrZ+=`zzD+gQ1n*jY+Ygj`!j#&c;(zeR{aRa0R9^M<j&L_MJJlN}Et@{eSS)RQ(=
z7bjWhs6$WUarozRaW_)lc!Bvsad89*iEHZ%%?uAAFa7ex+D%c7frXWI_@&n=cAA=z
zV<^bApENZ$*O}zMe*GGi)T2k^5S7TBM&(dpE+JWnMw05yTA1Dv6B834sAtcjv!=1b
zgZ3jVddM|`^*Im?gqk1=HF&BLwG!ysG+kV-s=3WL$$gcSrKKNr7(xQe%?vT9QaZ{;
zfNX&^>G)wE;0Lf8Pfxomq`?BiaWgJ1&b-Qd79A4%2q%~LC_C%DlZiL$6WBAKWPF+W
zu)Xc{@_=M2BaP?^b3J98cl(<oG^+VYTr7(%K}PKP`jZnA@Zcn;q?|Z$qE7LWzyBK0
zz#v-+LUuXHjM>Xz^K1gQzz5k1#bSb@oSYoaoCzV67-6HL0up#sI=DHaq98Xpe7L8-
z-xqWbo|%o*)7;$jsclH@YV9b=?{N$}-~X8Sa-Nqc%9OXG<q11$1gubuPX+J3ecbwu
zXXnm=@=FhT9BHrg&{qMz=CE;45gXiCtDrb_I+Xo$p#X!-X;gDwK0XhQUWvC#M1*og
zLl@k}z)6s<!kQ_DYp%tbE&=LSG+IO=WQj05v;JC8)s(&yB>U=5r;K<)t17$ybAxSg
zC;g!yzEcEj|NJ@41#^^?faZax`uO~rpO^d*Cv)V;W!`dNX((B(L73P`VJS^doBE~0
z2k?D*I_T?ufWCsbHg0a;uYDt&H6fgm%DIY28295_U)mWSbj{@8Cb$aJhj^r0f+=jd
zzEk8-K;MC53H8}L_t~caGBX@_EVx3WQqt3vuWu-+vfs(ymifN51a0YOpmqZTTL#n)
zh>2BOT$1B`VOEgA<KyiOX&h*n)^F%=;lJ4U_qUxS&B?3~8Fke(W=n2vFsBJ|wdDRy
zfeeq#`;<H$>LCGJjK8D#&Uk`j1su=~E1bXBces;qIZ*|vK!J{{wj))(VR0RWgK9hr
z4Hdvtfi=MIIg;R$FKuZux3~2$I)DDkcoR#fCU`CY++S0`jSacpVBF6@)$)hAK}C)>
zSOC<o2?0nuJ{htl*|w?yN6{M;Y_i5K$8Wp#cW^}QeNf@mStrRs%B0)216Xy#!^2R7
zY^Bl1hcq^(tMbRsgGONi9;^q7rSId%+RmcW-JVnSxd%=e9JsH2yngL`pu>7?ZD8xS
zccfCBPqW@^%*@Zr%2c(ZX8B(7h4^u291NCci=7mFerW$l&Nc|36fxo8Z(H*5oC9qH
zD{Gc_v?hP1oHVM2@Y`8`?+6wMOgJIQ9cm=jv53<lNF~tLls$3uFWRQ;?k(pnA~S0j
zI^DsdF?sX@ya*196Nv;YCU65#Yi8!2qxo_=|E$Jx9g^#a_Rh}GFK74#ZBhwsGvuT-
z*sCg=bN4RCNsGN&975?Pm!Ce`bq-=9{5dH^^@nL}7jVJ+6OI&=fU@U4SDxLxPu5nn
zLnDU3IOc}_S#Jgw6V%UWR$IT}LWHXCBs-vSu3NMdaqIzoz}S*$bToed7}yrm?2zm&
z$19{2EN625yps$M==K3;oIk`ax|M*}*d}V4CcX@kTR&RzSLg+aDid^>>C~@WQ4~tm
zWT@*^AdoUxYtzO1cSC8=!(~AHeny`&UtcvSSllv{-B56d;yC{<m9Yc!8A12^_xfzD
z&)*`$8!XU9x_tF&wEzQI<JD8uWgps8CPBRFP+a10i6e_gZ~wAAZD)_K5}(^#*d{<7
z332frHWU$>kG6UQl1fcW2Vjq272}^$K}#qoD{1DyI)DDc^00~uQ)hwsk>6iM&Uk+;
z*bDX`W0#HT3vdL^vWrS=Qv4#?mFBEGzo_H(C$xsCJ?!ezzmVvrkOs6O!$;-4F<HX_
z_}R(XIaC9@s?(lWid$PcPyhZov?~U30vb^pH?nU?bqADF&<d-ohjiJ~RBP@S5M=lm
zeoXU1?S~r52vx}wVRHbpp(W3Zp~52<{JQLK757*K3oppXP=KAChVpJnkg*BqMN>WO
z)w*WLThno)G=LX5EqoSAn*rz9*;#?4`*Cr1gx<P;hWsx|^P)nUccr=824JVe=t(vg
zoGuaxOoB9r+ij$<XM$7@Y2Ae?3Un1GWpbA42WwLtd63%Qp~FM63WbP5sRR4deu73d
z?*9FxcG4X_XW3ms`*DWyJ8P)W?BUiXa)wx`UC%w^c#bXYp({n$ujk7Jq7)x>rYNk4
zLuF1olmhHefSZh%EOg*G-JK}{(L(1#8`Ix^|4H&+t{MUc4=w1poc-+*!ry?`U{LCr
zb6>ya)5#TdreZL%Z#)x~2@dK&|GkipP3#wLk?B?eIt`#Bdl~dOMI%83hIzA87Yi`B
zF*is`KqxH)C{3F2R^tb-5u!!3szrmd3=h#>OLEX`uK-!kakEy3`L`S{W6#|$C-)|X
zsO}z4E-%%x&rNRCx|8RL!~8Ys4mc(htc;B6nz;A~H_Ugr{=QY`yf?$Hw{Nec&fd_B
zzh=^xda6Tc3CtuX0mv&jlZ-h{YHRB$!jC`=`?SiF36dMC!RZViflz@^ah*>~`}W9e
zxXCe3FyjiCzubRUL#%2+_}&&}X=XI<87f8Lp-<_|OmCwviOhWVD^O*8arM`AHGR%D
z50#3bRh9|`^%5Hs2d5cLPbwFQ%@0Mf*ZL-NL?Q>Axiq%wvMzd6931Lleaw5qhkEg3
z;$ax9`j177d_Ta)ODB=)UA6mj#%L_G4d2car!NIraWlNji}<7=rm!`?(6d&PnTmQw
z^*oXBSq&#>QPdo|+49ldx9ImrL~L28@jR`4Br52V%2;s6OTRwHd+~}-BNI<BwU(eo
z<B1|k?X*8Wx}U1uxLamkQJrBc4W<0f+FLefPtm6Qh5)DcSYVcez-T=Wo#T~jVG**4
z*C|Y@1VYgqGBmrLuz#|kmWS>G?DnpoUl`NAk`goBc=tzsdGm&5X>fj@5@V<TsIJ#G
z!+-I01N3dze@<&BlO(yFWzS2+LbZWn<in*C$JkB=>A92)5E9L5$7#Fxqa)t4EBLKw
z-W4*5tACIYueSdYe+WBOgj~OXUrzRKBb8HBS5IH(SyS)MV_#vO*kn%*X|xU+?X|ak
ztj<uUy{q7c+`M{q^ZR4Sa59%T(Jiyi_`^Jdwc!XJ_V=3M;vI>GdYs>u{q-$`_71T|
z3oxLID!#c<<A1TM&_t3ZM4NUlT1cztP1oGkH&omf%Zg!NRR0FQ%+7TLD)vVt;BF^v
ze(;>tCW*;LQJ+&fiFxXy<k9YIlle@+@R!k94tlpdABp=G5Fb1_w<Tv*a!U{+#Uihq
zxZye9nl*==;ay_f@A96w38IH66}W1>#Y=OpVM}mlj4FfbatNEGddMe+V|{k_*N8M{
z<tjYM6g#WGe2&-rPT|ll`;X%b&UbhIFjf)JiZ_^!bCj$ca{YI|j(O*&gK7-JQm_7z
z^%{;!d_W3mD+47Hvnfq>PUmsE1<}(7>j+C9LcXRM(COiTp9wC5DMZOZMwYE5gi~hG
zCdJnxq7L6s)U&W~mE^{aOQfEB{hnEI6JjUHr~Zky;;QTb@E9J_pE^zZ_YQCKbaw^4
z#M|%&NgZ3;eX8uBWZS80{}q9t&tLM|zdNybI#95OTA=k;eLYpE_GxJLU6}G^;nxKw
z2`)jf)uyfxCn^eyDTLWR5A@poYVT1}<j+~E7i&argHbEW`reTH{H1<3GJRL(#}Kpn
zzM}(IqT;~Tz>zzR*7Cmdqf=8;S+BK&1ppv}&4(vb_R2;<?sv4&iG**rY}sOjw6KKD
zA|$>zb7qS09ra)0-(29&&dzp`9jrcisq-W{@!xYJmd3{3h35>6vb$HI<f`5rzJdqs
zST<ofBiPNrVY&RvqT5EL`}y%bTcb2<mJ2R@yCuvP(`63}BE-CZtoGFe*<DN4H#*GA
z+AVYSYfEgv_Fp$CdylP3gh~!{mj5}D0{<8SV54JVs?4A5H`w!81!Bli1hKdbnluaQ
z3ExXhPZB=){!J<K;`XCg+Z`U}=KiOxgl%i4N{+j&J0!~iax`oHkgI{Oef5o&Kb1KF
zNhOt%t(AWHf()mP47Kiy8U?khGd!oXxTyS~5a1KXO&FJhL|3k+rwV9s?BTj(usXL@
z&!+Z}+aTpc{<dw~QnpTN7#V@{Y}$1R3@RcK`!Du|^ue`=k7>E_D*<f4>OG|!3fF)1
z9|{(L@S^TElTr)Ol+`<cm_WZtyOH9*lDIDQH9Px_k}^9@7wE#bHt*AqqWbsUFG>?U
zP>q>ibwqZxP3PPvLEY1!{Em!VnX3l*&_2JLx|X@lwfy>|;>Z9x;-++78iM4@CI9MO
z*O(xtgD(5)+k5l9KmD)WiTE@)NakDqp8vppyfyvkQ=4C4m{ES7PmQD`9JW9T6cE#>
zY8efEdRAVvXt1{?rKL5B3d_>yQIkWWu%5*ia|g9O_pb$0y0*TsgZc0Xf*Hw-1z=+K
z2v4VHRkML_Zw}60!-oWY%jf0JN5wLbN?F)^*~>A~`0EwFadzQN$WL)kSJ*Wuh1D{i
z>rj2D$fp{f6j9#%hKh@>bk+Wbo=w8w&=jlN_XK_ClXq!q1n&w7Ca#fZ3Z<u~L#D|4
zXJHF9uK{fG$e5x`D(Z~9tSx+R%x~DW96b_1CDfW|ikx(e^UAWagt?g|2n6&QGj*Ps
zT_P2`RG6hCr>F<>Ag#H^k~jGW3+d0dM}CZniWk8h^C?aJ`5O@tAt=x_*X{7&|Fr<$
zFVd)r96T8D-0457=1v+)mn>e>3m06<%z$vo!-3*=?$Cd>Wo-6cuURo+Z=oJ4#-?N+
z!BCU<s}o6fpI!ez+zpY3)2iTGXOuyMv#)E!9C#9^@gqZsXtH=#7Uafz4nlj>q*Rwb
zveY@t)*!YR_1@In-1VYPVj`7XH1uV;IXV4JbO&c3C_r7z=xtqlvjh;M)0^P%a5{KO
zkm~T{$v$(3Ihx}dp-COTx!i5OdJc>Pj4!`C>F90!<(Yx#=M}Ud;O~zFe1?6Cmbn4A
zw~CS+_vh2X+kTXE%YdQS&Dz}Xob^AnJUDJx-`My=e|%&!@Dn+@%e?2#o~2fRfEo1a
zRM#W_H&0Gnux``Lv>Rcd3mMH$VoMx?{_fW=kD<3)>z|@czbo_~s}WAD)_hd_;)OAr
zCW>fiCfU{4HwUeY?+D=$KHyKLFF5}3iyz;aY`!xB{i%G7q;_f`srJdJ)7e{+vK$_}
zJnIVTaRdTBWcWuSeex(@2WiPSNTqh}&!hYAYIgRDu$yiPNdD>1M5C$_f0Mo{9%qcB
zBcR|x>%H1XhrbJ_jSwYQ{#|3xjHZ7bL6i9r&;IDs@7vX&AflxTTK|+;#s2+Jm+UU?
zLZG+;qg$VqJ5g0CGEwbyJ|%NJr=;(j$KeFAKU@jB>Fmu9L06>tXaQirXdji}S-EKG
zly-DDHB54BN9R|I1H<Z^w?K+?e5<{+^%rc#F}K8VIoKxFSKOg-gzaan2&~S4Gjl^C
z<lZJ@S_t}plwm#R!<|d+(1IW`0#5gvd>`p_Izg5AAYbC7(62npe+QCu)O3G7IuQe%
zpj;Nb+n7IaSV#n1rK^+EMbqDHNz&wlXKTV(AinK8u|mKW&I=bV_U=#99YG8mu#lJT
z<4p=w#)xIZGy$Yo#OeswEk-{%;$)TK@TtGQKl{No|AjA&&{&>MzR|#lbk=`xnhzk%
ztm)ZhS*_<EVuhX}>;#7hv>`s*i&T!6niYVAQ1qPYyt>N@IC1dOWj3?n=2-aXLC~W0
z1f;N+K%*qX-d|N#whXffg<|`O3z<=NzP>AnoS>!Q>8lCN`Hfn61T+e1q%6w;NWlY)
zasYG$sFhDDp?BNu++3xOxuL2*SavAp2d|jrJw4eY5}M<3bq9Bw@xnJ@F}3|?(m_p)
z%YjRNQC0?>Dies$FY3A-MdVkqokxuUax{k+pa*U5gtUNHBkO>MDhSfu(WK68Y|e^b
zvw5J0{5u-L2Gm$!Ys!Y@U7?{@ejq1Nh?frn8DCu7pit_lGM8w1&N2;phEt<!)_YPh
zDTL`EXu0249-i4lK5h{<0h$hKthuFNsB+BA{O79wyhh?m2wRC3=aDnDk&V`KHJjJ2
zU$1HX#z;ez;<^2XxwNe8?~%=aBV_QA{LZy@d}0RuvrHa20e~wjYl1JN!me~6=GaD}
zzzw81JQXM%K8ufr#-5y%E5OXl_g9uiLw{m&64@jdqQ~(8_`Td^`QmM{slN;SM7Zy<
zv~Lqz8N+nbmE;u^co=9PFp&N0F^EnUI0`uo4<PN?vx_;m79~fAw}kkyqa$t}4?29X
zL2}oVm5aDF2Mkp10fe{Wu4WO7ysIP3FBKs`s-n1A4DO`0r0B-%f4S?pjj-c0Si!jm
z1yM5mt|5)8lg;Opg3OMb73-4BX9b93QD2(x86NOLHifU!&p&_rMjpxXMB#dciY};*
z2serHgAO4jB2_|_l)U^vUmtpOLv|S!=9<H&yTx*!l`GGe(Wr9o+&Miv3&GQ{1efNt
zC(n<AI4OTAUQSy-@tC?QdWVF04um`qJLJ|YXFcwo2FTYbGYc=;=i5Cs=8IOY9RBA_
z-SqDYm6vd{A#NcNlUaQitRWR59hbQBy!KAQu~bAg%>BMZ3Bd)_EE1xk5M4O+(jT2^
zj5p#wy7cDz@Jejoc~y46m5_vjxOp#r<pekL#35D7Mb{s{<$r2RCW_NeSnxJ~v)e`E
z^5v!ucc>|ghCbL3C~GrC7=z*BICF-LX4&!r{r~J5si_cJQJ>BwKGnG^^q8%0#{~w#
zoQtcE!uaUy&l76X4?zC)_3L9m89Y%lVz)<K6y)SgOidl6k_^2{E%rU8*w0W;D*e`{
z6)eDU^SDA9;4QrwhG5tyA~QL8{VW5u8h`w7mPi<spfWyiAKW8)icAsE*`NlhZG(0t
z47pDFz0RmqRgQpRNp3@QM;t`$BAg{=1$oqqs^?xn`M?@s8jq}vJpPIBM<VaOezif1
zO+y8k+Fl0Y*+Q-OZ@7*a#uip;4#(R<9}6v)LfW-Gw1nt!Mk<F(w#h>f7{uGE-nJO3
zJxn&uz;D_ulilqgn{uSeFf#M=>#OJW<Ddrn59$PTdFYgW90~zd_6+VAnv-Xxj~!#H
zOW`*<B+1>jHwbp6!*k(*WTV3=%1TvaHmnf*Loc;$V|(RfWCSB94o%^EN*$1$-bpn+
z7_}5-t5;~uMv-xCq9evjtR{Gt)j4R{H`5_GH&?Q$@VXFpuVCowk5xw&ZR>;YOhS&^
zBNH`Vv9PcJIg5G(>ok=yCK!J6kVv>SE_Wd=DB!I~Vej-vKHUeVuk_(p)`QBM+<BYb
zyX-wN!SE}0`lL7))8n(Fe%HKSAMh0zoFN@ygQ0dO*Ag*Fhh%R*1@){_2fn;2JF-Y%
z-&*El&fO+wu71t+xpgt^SoOo<@3NvriG3_aqvBt$Q~Sl%kGU!F`Ba+kIB0>pj%v<1
z6wnB?ZN_;$-#ofxSnTNNO+?P9YAszFO<cDxqcu`HsIzftxt&hV<QSdyl|CVR2ElmS
zZ8zU+tGqxtG@7hF*2_upfpX+&ReEYFq}drFj2@KbWo5J>suvO>uI;UH&3NU{<P&R|
z)R%9#c7*5xloXO5@bb2{-=?PQAzbsF7g&cPL04CoYx{Pn9mB0^|0vZAtDQbS$uV!Z
zxa*d#s(AG_P7T#FE5~~^ndwTEWlo3uWq3i>uN)!<(SD&yqBrTSwab@MdTO7{#TwyN
zO4XU3aVDa}iP1!FALD0TFHa^y+zzUpbhmpRg}qK~P;%{*xlnZ5a*=!L$ZMH5gKgg^
za;Rw~qV&39W0@+*;7n{MnZ`RLM>kpvX+fuG8o!%0V!)a1Vkj1Ra+V-NeR4g^=Pqya
z;o7|=gR@XP=_v8lxQzu5&Vmcc6w&5vJZ$G&LKv6Lp+0&(h=%Q-jgOBH%y0tiaJ0Zx
z2({oPCnqE}B<au*Hh>yK0s>HKaF#U!?p)OxN7)q6(Mo)vv%mr-w7xCDwJ|tLx3?Kh
zrJ|x(Yf0bCb{a~-xONgG+Co|YL?snq2h(%5HPwX5$+2|cxPBWMhNU4Zt8pB+ep3^(
z$}SlI<X>h&$T$rt8q7hr8Z#3R2!OFi$`35WYSRL&tOV0^2hr5_Fh<sbV1ddarQK}<
zzg1dVot@_PR^9ByXFI9WN15#<zZSTj@Ly>Wr+9ILkx;}C9O#=Rk&yTLwS|5hj!Q6+
z*gN4?C6IC41BnaKIVU5;V5;l_lmJ1EI2#?ya&uoIV_Jp>Px|<t5`tZ+X-YfYea1El
z#E3yhBgdPYt5zLG$SFDw6I7aR11o(?e&_akLX{O2Et1SX;{B>ErlRl{g*bu&n#r*&
zC^#SA!eh+M#YNq)x-7A~fIAtdeDK4})^C|2-*tD(Y3QGEb9qCQoOx7uB0Yd9AIKzn
zUF3R@gH?%=0->B5m8e00^kWDgeXP`L3@-id-GzhYl^^y=x3g@7+Cv@TM;+mUhf4cq
zaFju5iv|kU#e5#zG`MK{t&4f(J!nXhiOu)yWp+i|Y}e3Wkl#&L>UTvh@$;6W%3Gg*
zC2MF7$h{Ydq&mxW=K!CV)}dW+$Ko2pz1*2{mh;K^3#x@il1wz!TWAO{PjHG|-@~w(
zSEW0(Nvq&c_hsiRhVoI_Lu`S{t(GU6gLu@Ojjp-oXsl{{Xs37>6Fi3!2rqkZ_UH6;
zQcB7_AyK}VzTRF0SeL^2Qd+w0^Lcaw7==>nY~{6YBw9E}V4EcKMH!ycFpuMnM3G7>
zb8@D1-RnG^GEV4yZr(mMu;i%89x9|&Y{LA(HCURH`{vYHRp?;QR0Mf}@YGm<rW8L`
zRfQlG;Qk=^!HmEP0(D|WdHFf^Py~`08I@fcQ4Y5Pa~csIZt&C)`K6#swgjWl)I4zl
z0ww6)U%uo~I8zM@6^1x~3@GZqEUbW!q*Y}IQ$w1`a-Zc6jRnl7g|-|Cv=tRAFtb2W
z1$`jmuiCzO80RgI)`ifh2EgWi^LYKNcg@=OybLCFy-ft<g5mqmhKirx!Ay4v&D8RD
zNSdt62NC!t5Bh3kWTdMLc`d-T2b>XfQ*n>$w(*;={U%bKinZ&zK3hkaU*$T-&L|ms
z65c?(X(4XrJ)zjdaGN)~qY6FZ`Wev^u$)1qPmbY&xv)VJZ3y5uFx)Yiu+GMZ&3W#b
zIXZD9E+~3^=WH#3OKsC`u6=y4;#t+)WjnfB;(iJ$nu<BaeFh&T21*D6cgMw}@Ac~5
zTYB4jMEBDhhX<a1_hil_XH93jeE8%r{K6qc&S9we6!NiV<aC{z%?M7GCnGub8Dv)6
z6DTe${ro59qAt-b*4dr%>ED0-Yk{@I3D$|Wv@6&D5R@NWLLUgI74<m^V&;ej$Yi17
zEE}99r{`T;movL~`Eu#RM26O6Zmvazn<}S9Ue6kug&Ud3m)Nc`P<_P*&=Qh>#u_4;
zo0{54i+H|hKf#*nrYYP%DP%6uhmb;8A?iZtfrw>_z{M9cIIHQSrtN@f!3*Qfc-7E=
zA`KezekZl{0*}Zs)-ltKlhhMQR#j8xyi%;JtQeH}fKy`uJ`|Mw2Pu$L0JZTZvX$OK
zAE~LS$>;ZV>w^v-<jxW4ci%jojtD<YT$>@sz6W56@qY?Kgswk8C+9Ns(uh42_Ht-H
zmtaNSSQ$q+w;}9M1g<N;2khXT?;=MU`FwV7_IPmUqHd=CbjRkG(eaT=1`3LIcE`@G
zZf*>CKZ;d9>J+AywUuTJE({QXHc}8-xyh{q<@q`}iS>2=W^Ug0_P4B)l1bgN;x@$I
zgQTYVwsnzKYNy_`{NB~J?-VS=4F~Q|6z|W(XJ{qaQc8_<F>gPAOZO1>b|={sl=bfL
zvB2^M*q4QkEuvuzt%4#q^H1bfW@KcbsQUMUl-|dY+7+xmd^6sGbs|x}@U!#<r4oPH
zb%-&A6m&Emb*Tst%gp{GIdIs*oq}jWUWl$S_yYbj<Rjn#V*0x!nza3~kkoC7t98qa
zE|VftGf|qBSC;|TiE4d-EdW4@DuW-gpbsrA1D`=2QdRS~AF>yv&Ii};98Sy;ap#3j
z6CWGkr={;$7<4;@eSZ;P4qg|JgdZ3MjtcxIA|mlAmVA{?MDtd<jj35#VcWsybM@@m
zZO>Y%C^{C=^5W3V&(BAqG=ebh3Z2%`d4<i40r$|6DqdS(DZbe3=;(-LhIm6+{2HuU
z#BJGxYrbE;C((HSyfP_$gJZ;0=uun|-4iI%u>=sSpZk4fr6M<X$}jn(5Ek8R`G~6w
z5BakqjS2r2l=5VyyNwhQ?o9~c|1^a-R3G5YqNCM?_?n#7iBE$ai&swm2L9EBH5+SK
z`Y6uNq$~L$jr)`H3u=1SHuLQi6rK-pKTnyJ0jYsr0RL<1vFpRu#h#v?5S|vCFNa=A
z2?sM-rrzZ`>-`rygzRg_Ltv%@upsN)KkBJ)xZ%_j1ME#hqh)IT6$<ZPk(q7V_@94g
zxm!nUYgD?IuuE_UEkOwm*;S;>W7byh<RT?Sv%5(F+FGxMa2AAv91uQE5eQ@yy8kr<
zF(Tbtc?sjQa&i^m+xyje5QDiNkjG|OOC^CW9dbQqw2U3(2iYE!rijEQM_~?=F<9hn
z9`25ojNcj(G^s*)a($-8b#LfqVfGcZhZO?^^An6W?o+OjoLX(+ri?vlekPo$iIH}1
zqh7ckr$%<S_{VobyY>z%5ys2XTk{y6=H!Toi#MK3`mdiu&24GP|8iIUPqn<fm5=e<
zKlr*D>^(ew0Z~YT0KK39$spY`%kZ&5+K)4IOzv4$mL_K-AU?!2>WRdORyq!60k550
zY#etl>{C=yx;++(?9N6zAnlwq0Lso88(T{n%JHV>8Lbq;adF@PIu!hC$bG?S7E%ZM
ztFKZtQx4c}Sk;KcQEUpxih>28$tfSi32~G!CZ0Lk07iZMm%o4i78pB02Cc8(1veiQ
zMyaVEKOVV#iy$rz;ULad{0qs)1f(1H5hcQSXxD6{{D4|xKqR~m;^$w?D_J7Kz}#F3
z(VOVz@j{6G7tmOvLGg_4ji-m|xo@VuwN>okK}c7jphZRsG`xH<Vv*?5Ai_bixv6PV
z9kHdO1Ajq!SCS}jq8BA4QiXlz_4FjAq@)g!{{(F(HjEXKJ&K5hV8da`!a_?$8PV`i
zBog@$5)w@}xrgtXJkEro4|}evs_OCMH`AWaI=S8GLW<SiUZ8Asmfb|Urx~Kc$%atF
zIl1V2ww0Ng*0?*cgQsU^Fc~8<)6>VNX1<;YLLs5=rW7io_iYDU%aJtHe$RHMw*Zmr
zE#xtlcwOZD#pvjb_L23|$TBNQ+dr;w<M@Y-w_k_;thLErmb}C68%<T4IU!+jps483
zr|cJ=hyyxjEy6OsR@X1qHWFJFsd!G+b+#;C-naK?$FTsLeH|YBKem;8o_c=kS+V)-
z+x4@H^1(Z^!eal1vXjH|>gE;ldi<Mr`v7HZaT|K4RJKp?X`_JH^|SG}cD`*1uKCd6
zS(h{*>JU?pmd%7PvEZvbAH?=!q=tWTmI5BnNIelQqQ?}$sjWmO)py2IVaeqXI<e9L
zdl^_3P_tVr6Lj?M4>Xh3dg~PDPAEypa$A}d7(>(W=@YC4^<(px<1p<e2SpkJd9&<x
zY7!mn4lqEIficqPsHdi<F_Pr`f&2B0J}9Q(r6ZMwHNfO6u64Ipsik6P+250YDG%Sb
zT6p&9my}aE^{FfGKJ89h6>fG15q^pi-apro*VPpj6^|b6Malg9MVP|Yr1y8zrw`os
zV)#tgvj@R#TG`#E=m{Ewi~Aw6u`f4GVzgUk?e@+Y5I5s&QiiSpwc@*yb)2P8H9|{~
zua=jVl_`=G{Uq)KWD}(Wyv%elyz&N6JtJzPjW@b=r5~|w@*2kbi}a*pKc^iSLG@fh
zxc;YVWT?mrYKE%cZ@&0#kOTC|$@$PlATSp$>)DZ-nrF{k+mjD}@^KFcd1N<x8o{JD
z&je&bg?0JCUvo)r94;F((5Xn6_HZ;^8E-#AKdhvL$V|h3ORI~={Bk#c53MRSH)vnu
z%6thYtnl*&$4<|e=6!>Wjof)VbWHcPdXOTbso$8EDmsb1l(S0ldL3~kWO|Y&`gYk7
zX`e>_SE;nwd$>b6SR)`PCUAkS9=~gRorT}=lr+DJ*t-DwgjmEZpjVaQp{3C0)aR_V
zlkj$BXm;1lzU<-gVSK@j7tY3g3{+WX0r4#Oes-2^9(P9~M6rpnbvUOQL;Att%1=4!
zkZmv_YU1ou&RzoGG1ds`duI;%G+btqNy(?kdG$((LJq0KP;J1mEwtr=(mXvq1$Jld
zcOaT@3BWVA`0H1y?*5KNh);10pzlC`clhc5zy;V*l$D1O;Bit2@u?Ri&RJOStrf|o
zy*E72gQOOm0}2X}Ow5s)c<K+ExnAmx&DJiyN~L`A5npcB8;KW-t4O?TC&BBE9t21H
zgLabo0@WOYk<s4RmN#!g2WOj9>WPy5LqoHRt7siS@`Oi3K!<_WRU{I{qsjI<a!==v
zAeXlOs1B;zE{LUoUdToY+E7CUL~6YJoo{{2RVKuA=88m;9xyD6u(rK0T9yw&3lg(i
zQIi3@2&lIcp7!E*h0vO<2E4|h4r~sm<ymcfe;ZTS6nJ=fnZwkubRi2$EW8RsA}1s4
z%}4`#)c3H_T2xdNVOYG)l|w(F7=_CSid=ko(GQ*Omuzf~cNfiXeDFW;S<}dfa1g1s
z=r5qNCUBiKTE;n#p}U2R*4QEN2!E5+&<7$AZ_=nz5ASVHPuTmIAeGS%PfnrR<9_ZW
z$(`Ai(@ugM7MsaN%61x~8pgBK&R&#@Zun`hnkzZ^EU*Vx{)Z1za&jxMI)0fyH?!ed
zy=+VYDizHPt}k*RwK7b1J2*MbFF$~}@n1j2W;{J})e)Nl2^P&;Irbj8*}PBVVQ}<W
zqn}aK>N9<cHNTgQ^GxU5xDAude)d-TxNTvLpiLAuKCf;u&%<&%HB9$hdef<x+_PZN
zJ&N9$r`#9X5+&3+L~Yk<lBCEN^9T9pncR$FolR2Z=9Oi`u5?ACoPsw0Gn7H%f~Cd%
z8SxIi!^6>~_EAq93<TI2XsD7WnT&gJ_ab+yIW847w8_mF1IIEmecb*KDl<QZLe?LA
zo;*3f#iFphrp7YfHoIFMC?l>nB_(qclN*eU<T^Fv9AF3DBp{LcoVKQ{bRn2I;c9P>
z-vI)-!v|u;9D72`l#5qjuATrp(z-fd$pX=6Z5@2>uq~dpHrwXnMI?Nx?7AoP_CMf0
zP5v_7KV@0h&&Xi`H?63s$i^!d=q1_3(mD4B6(<1#>gr+KiP{cIe8(<NFbe_$AksE~
z3MEgqhkO@}Cjz=Cz{8={1gawP1lNCV?mO&_%ocav1FX}YpP*uo(j>@Z7-)B2U$Z+T
zijP{qeJZhSg-x=JB<-rnpnAapbsdr^!Aku0ua?W}n^}fpaXXFR!Gp-J28!|d{PpX{
zDB$*l_zPPIxGw~VxCa@712efPKrLXw2L}i1iG&)QM9eT)k|N(Pj)$c`o-L8ts_hWJ
z_rsSU?=b30Agt!+sVFdVN}>-r_xO9>2tup2IH+ad<yF?>2#KnHwQ`#7t|;<klyp3w
zvMzKx`J#9o<j6Et_8;Dg*RbDE4JI4lZUU*V#CHb9S!9n{T0Y3l1$${xNPZXOFQBpY
za+7FeaP9zLObV6kZshO6y{*b_Xaoqvq=J2JZ`F_mY_nZm#n(5lY3K(iTfz{Cgarh9
z^&z44rTCMN-#i{l`lIxL`6{m`GKLzF2>5^Og7)pW;_qK{sRr{(u=uWDzmCHMERz7k
zl=2Iia}jmKubg*<tT?u$PolNNmV>FXu&}0%Ptk`KYId@Bjyw<>QPMGBlGm@*c;}+v
zz@V02<ZN!R-*6l2o<2>kiCxPrFF!#21mQjfIO(rng}gg&UE#Uy6OwDNX``ksrZU=B
z_@|L$I!kzDbHn<wQ1Z*k^p|Rdr;UuNF7Pu8CsaL|OVeQ^#EDZ-942I03^Cg%Xgy@f
zAcTedBJAok)^i^?cIry~F?|jVb$E^-6>Gj_Dn-v|qEr1&Rq_ki3`0qAu2Ud%MIud_
zBPab*r8DX>Hs9-9G<hrxOBgJkz`?Ka<GGJFMPXsSA!X2vF8I3f4S)Eshd~XHpa}t0
zlr%T9Qb&1tIW8Kt8#t+G>LwShz8d1iBY8?=>!cU-A-0wDB#XnTrxulJyl2hGsVk3l
zvk~_U@ShxV6sabp%_c%D9=4!%Vue<NlRr2zGegeE!~_=?&gzcEOcCp`X^`UUJLRJr
z<X`=V^gEWp9c^d`EuxTCy@xYhh+vWqV%A`Vqz-Jfu3Ha*%3|sg8VDWXZO!gDpYbX~
z*$H9}lH4y`hjNE({|lEaA9UZheLE#enr9bo<xxRg#}{*g9372xz%C9b_7+~Nj5sE=
z?u)dBZS^PnKJGN#J}?0W279zPWWx>2a6{?oWrW=x+kmrb_&6;5If)r1#^=w&#>2}>
zuk&YnoPlR#>fvbT?SYwg@eNYSnh9By{X4U|r*V*Xbp^eCk|Trly`7WOdHB`N-G*I!
z;Oxc3x>IFvp>;OCB}$@E)P6eTYKeCAKd%}0F-0k<FCLY+`HI+7#|sfTb>ILRJF+!^
zD}8ciX6C9gf3tf<q)OvfDU2c%pwK$5YRK>uu`>ih=(K>cJ|`p;hurIca2(f1&;>de
zJRJY(*BKcULw`Cw7zA<pM;+f|V#lRU0|`c#l`K004q-<aiZ#NT$F+nw08^4mDOKRw
z`aR2aOM_EGN9A_NJxuzs`L!pcu0>R)#-ZARpEUx4AIc1He{?=6Rlo4Ijq|(^L<pL2
z^WU3oXW+sw_&YB56TRK)#E}|5(1uudu$I;!B0M?mQEo1|o5hUf9@r}%D0n$y>=-UQ
zId$S!hS-y9s}H==<P}M>jGK$c)TP3z<`@H(TgxN&9E_xDZci5_+6H;3E2K4TWqW^0
z->Ei$UhE&u&czV1a(g;CR?Og>_Kc2-LT9GWS*lc|7YW;_{r;8rm=A@PhqCs+L^ek%
zE=I;rU)*{h7MgyIE+$NQDpU<cKU1hU8CJ@p&uOJ_QR00s8ky#CI4@>bx*K2Jh2tj@
z`vdJ5>8FJ=UmL?kL!<2lJN~uIAJ&Yszi)um+9x;gD9&2INGj>gKo{eOBDYVyBO?z^
z3O!eBwU@~Z2*iG1mi*T}c7gxqadh-=ZLEDVaRf;?7MtK+$uZ^BO}90LxZo38nC1=d
zu(T8|2ZnCe+)(z^Q|jUGP)xOx8hW(XGj+4OU*Fiu!rT0iE;QpRX|3VI?XPF2n4S(j
z^sOu#w_b!&JpR7X;6p;ILK<sx9`_P`x4YdZQpJ(Hp5)3S&V0khiEO8s!~wh~FK&IU
zQcZEMw@|x$$Ez4p_?ItlCF)#YH-_+MPi*ahNwZUO(drMX+H~3a2wn=id4yg_F0Osq
z-XY^fp|W2Qhpcy4`dNEY@}o?ytb)VLkM2uf)^|9eZ?ye1Z3J)sZb~GaYkDzfbvxS*
zh<kV46+-m**Kgk(yi*29pT%8oYTh`?mwx5Iea0{~h!A!%H?7+*ZUu&Wq9<xj7KblI
zMjTX<M-d^7K^-93Patu?%AZ@`z~C2Vq9|cpYaMa^%;wUW!u>n0K*mqj5&&Fp=D)rU
z*#TxPxS{czRQ^*r<340fV2M5XeLm&fwWZ^8yRR_v`&ay4xf!G<<5E1eyw+1{5*2dF
zJhVCBh?$|J1i|U>i=BBKo>`3yoavKY#ojm*$(iJQd{FRyDv^J`i7kM|fC({I5cOa}
zz-R+R2YvruuPSXk+S?RI5jZC68<$V(czdr&d?~3dq{p#(x%d2aV1ujwuHBYRKHQy@
zn+p!&;K55UbmM@z<uAHL5G5(f9#o$2FWFhH=<kU&5!q?DsEKkE)j~l5%l4q{(43Ix
zxRD1TK>m!{bH=EZ=8Um^+}POIdDlwXyaOX&6LqqXc}X5Z(Y(^##_wjlWn)N9)Wy-U
z{%sAHclm8SZJZU90{8u69N2#?E`mUgW;MR-60+|*jVk80lndt-2+s$<XMSWM#s2_a
zB!GiJy&(NB(2eVTVdFumMg!3&MC9RB!z;tC?4Bcly)~Q5eU>$}r*Z2E+5i!1+j6U}
zVvFkb6y3h^$_e7M!dnf>TO`9j0--n)SIM#I$2M0x?C4I*@kZl!z@B~`;sE?<^$)fA
zNx6lFiaZSmo`0X3ze6uyVvlZIsrhq-EM<f45ItT)>gr`0ZPob7nty{XE;HJji*LKR
zQdS}?`=2L&6r;Ff3qkCc(00eNa5cezobh=Du3HZ;)_WX55F#S?>}4RJXfg_Guu7ji
zrLE1&+KfEd4?R8iD-U%nLeAkc<du+{3rGxke-wdU(M89ELWN+d5(?fp2(4|Bj<pm)
zhcdnLh$=f|*xf}QENUmwhze;zG=i*-x4BjkqnsGZfGCBz#nk|i38wG;nWDR=Ut#Kp
z#r1z=8Op-~i6(u?`D~bGae#{Hc00JZT*a(<vIY)Sg~g=HlkREROTz(wXUj|POH+WU
z#UwbyYe9JjzlHhjJ4bkjaIc3A&3Xi7Z)2dD7|F-Kg%yRXG4Dg`nfPyDf^5@2n3Q~6
z4hsvLyT18%#CJRjkm=F`vw8JV-b)@H9vU%p3&lpJrj>BsGmm3%*^ym<syG>MX5I)B
z#!+zX+D6nL=SmUc*fz5(LU>z>bFUULX2l;$GT-#(KN|Y^kaCNQyV%?3U?M=IVy(;b
zI=}$P>sxC=bpQRr09$+{-ey0gbR@mb|3&SC9EVb?20HU?7r#>FW->?7{78s8vp)J=
zoqxgq=)^bIE;TiBtqdeE>h(yb2*Pe<d~f%v9&t9dAeN~yAkC<ue-E=*ZALDNS%|yf
z=*MYWDF+A#FO!jp2_-g`NQnH|<N6;to+B9VV?70hbff)r4@sF%l4_r{N?`0>lY(9t
z-(aGIOVe7C^L31t-@IzLDE<X)M2|t1rXOwo>TZ7Mou($u^1-YBF*zkgip`J3KlyF{
zFCF?>#oHGmk%Jf|CN934kMDhIv$b-5(oWe!O+|1IN}J6JH&1)Q7EoSU>F4PwbC%|#
zV~UOcrfsHf$aQOp0|5p5c7C*8k~&sze|3G^OpZOjxVK~FK8owbU&w!~#Uowr>l+%9
zVbr!)2HYDjXslXzn+q&!UgzgK6Nv)6tWNPwEmRF#0OZ2Adb!40(p4AP31MP0Q`QK3
zYP7CgU<GoUj~<t4hXk%>o#&Jk0Q$+JImts6h^}gh*RHLv_0{ywEF(R|iOsdVs<4o)
zq14L%ej)OnjveEtj}8|2j9D+p{;2<O@|3T~?NztR+f_d2lo!U+FHibk8kx-}_$;h5
zrhG7GGiwsl(8q6E^8c4#y1VIn2s<Jd@F(=T^#lX+PxWK@_+I(ECUSl-9H5R?hykGu
z|Gx<5yCrd3t<RUaWT@3nq69Hk>AoZMw5EoCW{t8IXNs{%;j?E=%vCBoTE{7*pF-0Y
z98A?N$fJHzQX{t4H}!YNJ{jV~jy9E;o137lg*f2(?6T+lgD52hdHLyOe;j$|BGqMt
z3n>H9lLH{s*{2cz4U`=qYf@MNGPE0OjNi#i$leuNURuiVbLx83&;Qa~qI^)C0SN^o
zI{!G&-o{%WkCZU31(2pX-T9v_TAceBFDn}hi&eO9)lfPDa))mP3)}`Y5EW%LVvlq;
z*K*n1CT01&TfH2lu|*!EQ>yIqnd1jIy-UsBn^DxpNM28k|HoebCXVV81x3t#`I6?p
zOy$iihDMEsOTN<94Ge;i5DngZ_$Bcv@mZCJ24wMD<GdaFRmbLvXeMF72KJKsZ4Q~>
zrViPX?dwWcYJY4j9*cI*xY4(xUHEKd&+m}l`=X5`S?V3n-L@Yuy^|Dw+dGVwp2Q<h
z(I)elATC$x(m(0ctfqdUptpr6IYDn%Wmae}$^G1G0J#5TTc!xCOQhEgi?2E+TR*kg
zNVVTz(=P0@yY|NYbKCR-kw}!-OdoBU>WNa*%4=OO*a9+WhjmZ1?_$n#%s4{{5Se_U
zA#<u5cF4ZnKA(f`_ay~-Z3}*X7TT@?W8SM0Ei32a$y7PoWeI7!1JS~;FrhhoQothV
zL8;@JdD^#=&dqgOlqTD51yrr!cO!ChStC$PYUeFvkMGEfcq6yFAeZNbG+KjuT2LN5
zr7j_$e=#xw<zfDs4OBsrPBM8mQ?`gUfot*aKO{C7qgNJQLnbDv)MfC6I4<KSgpU{L
z)J@}yc@wj=8m)^VX~U#73oIwoV+^E<8cdkt^Btv3Xf5L{crT7a0`xMN#gfO~7`I7k
z=K&Dmzcht%(@s<UVt?6N8?~dJIAT%aLatEQhoc5TWf$Fb)LKSIM=|&mpQ!k^t&>w;
z>$m!8AdZu4{K`7nt=6vnMUYcJQjY>fWkSf!wXQ~q6sdKf*f15tUpQ&iow_LCs7A5*
z#c3rrH&-`%TUZE&WuMBeP^YL>MjQe(Mfr_4_wf{wp2+fnaa-^;zqO$&#L@4e%5Gx{
z?TN`-RtoFl#7q(H<j5pOz1H)pVOyWwgODh}@|quMyPA$TM7>;9Y>036`?uT&a~|T3
zOGzwTL#@utTq3C_9@KLrEh^0vm%<KU5{~&bxKjIO(0|6SN0CBEcgobCOH^CLY?=77
zP?0HOvVCh5y%-&b+TFK8+82~MPTl3a)p%V0&G9+2Kk4^_FX$sq=Hb3v^<7PUpUUTZ
zp=2@1zq8NXPSRBkN@zLIEak_Ze4O@#;S_1;y8>j3<)!zQcTTUodetL4{EjBUpt!}S
zcTI4dbg@^ZX4i?*VgltUrcc&id6G!%%yiIvK=xE=?kn2mL!Ct(g^b*(shqbqQa3WA
zEfp$HNcSaaUilZF?{f40r>^5&$C%<D2lr<pF#~iNu1uDgX6ehhH>r9-2>$T?ixJ$7
zo;DOJyIOc#tk2MPlfo|ywE1%;x7^~=VCXsx=^2&)YD6$OeKQ&(?CU3uj10YjoC3F%
z<JCODU2y4D)=s-2+9a2j^^;c)IVo3}Zl-PuzgxGTP_>KRXS|R>RTbTK+adYZ)ZN1r
z{Do5n{%QZLvVtyj^gNt9er3@=`o3Rh5}j-}RcZbog+cj*X;PVNDGQm<-g{X&^XSM9
zVmVXE4!RKcw0Wf%AF1e1=hi}_+(QU=DKnC6E0-Yyvrh2XCPU2}_`hs1Q2<D-tqtsg
zw|@jqyQ?50-hU~FtddC+cGt+NRIOB%){hg4uhO53Hl6#7S`%_Jp!~PP+EQPP{U((r
zv3tyq)`8!|W`E-3s+*al8c%Ob%Gv68M!MU}&Ng6wD3KV9wQICJ?JodKo8lY;tM_&A
z2I_O-L3K~dtQqKQ7uH-#ZPt>_PM?tPNerW1enem+P?+2)ikqS2FO<8J(#%~VlcpyE
zJ;+NZapJ{in{f%kuQ^Cg9}c|GqzgH8EGgt@V)XLsg`sCff_p=XKh%Aum-|R^q`DYe
z%MhlcPqE*@nK!9aPkVep?uiT%dkmf;p9OI!r2|OtD_h|-f0Y>rMHUW;mQ(s3PKQ*!
zsY!Ryn#Y~D5Ed4OloB<68GCCIY9JuSfB$Wm4qbMY?PM;Y$#1h!sGziI3o3g?IK_Yb
zay`A+urPF<7-Mw2jOiF=iLf(dj5E{eSpCjSQndL>5Fy&W50=@XWWv!W9q`KHj1I4$
zQ0z%&Hnwbor?`{sWnMyO3rlsQs~~WTuMD43cW`-@-Q8ls+;lg1j9G5onu_aUY>U}K
z;IyD}>v(z!v3GEBvg6F@(-`};v<C<^7=Zn!Dt3l{8e7NIB$MLrCrCnv^L5zrP3uya
z_LpCFX>e`OAuZRw`zYM0JDNttwGp25SM{G-D_>Y;52!x^7i82c*;yuIb1{mlEY=gO
zJLQ=aR1ovO-Q=as9=YGOrrwHu@eur!mmlF|eA4ZZf4f*F3fWAz%>^zOWq-S65;!E$
zV!_Sa<dym4JMpLFZ4IiuUC)_!C(Ru+Wv3h;3LfwSoHmh;yd}v_xy3zSK3{v%dNeuk
zN!n0uEZg(5DMp#re~7|nk>&aPLW^TJcG+BIl490ms%b2t<7U1QHaLH4C5vnS4eR(9
z+gv7pWae`|XRmKNvhcLrMle(^GG)`vgnlni)Pcz0t@-`M(q(X8e^we*H&2O?q`XyU
z5^MkT)__}5u`lu4k0dGjT}LLR%&dv)+iZWDj;kaP23z+8Wd~~XLcx^UtLyTN#pEkd
zSelj~;}H;*`$os9V~ypo<pEco=VH8v#&w^!>q)mGlO~Pu`>hmLXr)5!zN%m|;qKMa
zY$&=g5m`gO$W4eMC-;DsfmD)tvNt9THkI5n$_*pGQpRwNSV^w!GLo|U&a2uuDulA5
z4OY_r<n__w)lr{!NgLzUe%l>{U#@*%YwxL+{*y=Oq$~ND)5bF4ajxsyu$!FHp+gTv
z7<2VkmXqFRf2zS{gF!s<JteK;D^Cw`#hA`Ldh)9DB;92fShzlZjLXeUz9c|_s3WA1
z6=!9UFxK=`uNk;$6HTs*UmIu$bQk@Idr4?IgTC$VETyQE%rPxDUz=#BX1aCpm7VLl
zJPzal7>VFeJiD6QQ*$XjIK;l`N7t)8iPLWr6?d@iYFd1S+wYH9LD>R~z%h6T-(2@%
zDd!Ut0~?71HaWefZ?;_hjV4ryZ|;oS(8?6tm_d$14Bydr?&P8EIAyO4cSOn!%+ODM
zFxy}FFGcb44_>R@8RZ!DUL8Xt<Roc6awnba)0xZl`yEcJA|Jg?Blc+Q<s_Z!FR{W{
z7DrkzR0LrlC4>&%p=nPA+|ch{D&k8f-qPUt{ofKW5P1T>+}+TdC!zGmNHWAB0ORdX
z3@_FrmOV;}VVSVJ)7tifQshZwrp@ci?e(*#ZWCIeqw6oYuvtF;N=VDBq~7Y(O^n3O
z3{?}<ep*Mo(LA0h^5t!E`-IQ3dVxQXp=_&(luuC^M<pf~$`D4CDPo{q{5pjz{lQMY
z8#R8$GJBpF8X28-#85Fz+R_w#k&!_x*7c38+^M-WQR_e+qaeX5{DhH_ermA%1PMX`
z6iHy<&61C?<eL}E%#t^p?S3ca61~GW{(H2da!Yk@Z(9_`;6eqVd`sM?io3O8&s#0~
zi>8$J+>c*yM_eQ@_*Ux`ze~N+LD=_n--4+(Z2V+3=$8vunL5PMbPxJkblBpTYUtm6
z`xPn8K*phiK}Q5}1DL>{-c?ALwOUh?rl4pSIOX4x|LW!i)82APE#B9?j>=z1CW5*~
zMlRF=r$rkB=2v*;q+F!U&rl)BKkdowwr@=G9Vv@dXWlb=y_yJ`SbNu&ZZ)BJ`S8VW
z!QBrDw*1fA0#ALw-(|V{#|)SxGC<GKxai!&?U2^zw`%;&2&U7#ko?sawdHd<wbqjH
zoaRbzQcU$xP)J?&OLD&#$VUl{W<KTU5+P03PTZ5wRLsr9;Qw><&Fx9QA3JSKd->Mm
z40M(@ey?3;HP=dwxp3w}L4M`Y_iANNhrZu2UifJ66n1}LF+fp~YX>-nIUGbaxp-u>
z(DU9!MUks3eY8mGqWC07QAr%n=~}gFWbvPVQXeyaHKZ=kIQs(C>EGDmn16sD>Ynyh
zdbWzJnynPIk&Gxsd7>QY5VaJxVX4#s!Ihib506T`e<3Zz5RA)J4e5%qgsIgLJ0}-W
z&T2a-r1gG!pd;KcCP@L@c%4DYMl@hQ$4vyepwJc3{!jPoU3Gyy^yyPvXsR*IkXrhs
zBgn;M70I~(J1|24nrJY_mU2QCok>)^2L42=Nu>nVeGEwDtoR++o8uCY_I>*r8|h}s
zTtRoDn1D}~cFKT;U6aFmug^(?9o=-j<A=D9>^G(2TErta7Lxm|=B{dFWG-g$Q}a+f
zeJZ+uU(cNJ5NbvqXg?uyIHm>0w;KPCrmKv~s_V87-3=1b(vs30f^<kI-6<WC(%s!D
z64EUx-7VcACEZ<j^M2$0aSRdRJm;Lf_gZt!h^s@p<22q{G6-aGBy^GyLqmZeQWUAa
zbPo#CtW;=)is2-({q`B3u-W#1xbWj4!TW>AmyJ^(Lz3^1%oiznRgYab|Bo)=))EjM
z;s?Mp1~=-H_{5hwXb6K-H6w^Y0XH0-XlUXf!*B+%qZCbPW&n2o^;*3W`5KRcig#^_
z=(kACO5l@0p5qhJ+!Gi0)7qfby0#YpFgjrSYitAr5?~(>KrX2E0{L`6wFI*Oz)}Fw
zHeKtWa(-EF>Wn}r=tq8WeXZB%kY82B#=-)8Z^;DRRYB0+Z(UJLW)Sqb-Wdo8H=sDs
zX9Xfxa9NcWlQjMvAUgde7xC`Sy>OBcJy<b=r^9{;{30+(_~q^D+5qS<m6f2E1XvGn
z3u{|FzI6Y}w+u2`z}OFr13+I70r{Tb<y&=Lv<59FE;5Or`_%qr)ACVZ8q+5FT<|bz
zPY=6}-=fTUdm34-TnZOg?T4rhQ+VOgJDrOW7CJgi39PSjHS?vH3`BtOO@IpMW2s$W
zg^?h_g2AE}E_yJ1K7fk@7eL_hi;@2)H89^Rc!_Y#o81SzlNa6scp@4}k6yy9qwasr
z9T?yxOtV1aQKcTC46t#)mBfwi({Sn2U&=6d)FnP(n)<T!#iNpqfEWEkGT{4VDg;{=
z*K9KwE};Sx4cvOb83Z0A8%oFSV8;X_Uos*h$^eiQvc0_x$Q#C6&cK^BR?7U3S;Is4
zdA$TLPH&_y$|YDknF>ZBI22T=a%Te3Vi<D4@<64JD8`owr1!6CUPOOD5<?nY2yUr9
zt1%ZRdk5yI+V<GK)9wUyMmyj{nRB><r(w<kP=QpL{Mr@U9(h|TX>g-~auY1~g<};E
zq`YWd{~Kfy|I<m&A6)MSro_fTPzG?5p+9>|fKBs_$W+Hu-mNg9K)EOtsyJ1L@aiit
zx8NSSu$&R7#14gJ#!nsDR%HmR#~W=r)e;!9JY%DvdKJEoN(`|RioVXtr2{NT08dI5
z7YB72jEFM+*S5-wPF{)7_e9rLn2&w}Ss;{H!xTOhVC5F@<i6;@v)#nS5Xa7m`_@SU
zWB_?(>PXz`s#u$}OLR)Et%VweLR<p=_k*b*<w9O@vAQ27t!*0Ft8yltb4-BUG=bb@
zaION5EO@nwR0;t|2KI2?D4>raa0P=ccR;~&cL%#XAc0T-(s6Fqko}7#4afom{C%+Z
z5JiG9x<bA(0K>aTz{Hp4CG70CPSO?_DFc=WnC5_CaBOTWz=8q50$KrJgbehBARot+
z{fnKsE3m%$FQfr%M&DjW0p^J!Dn(P^a52%)t+&biV@^y!034LWfX5*~fy=k-z{wVl
zCe;109lRK*L2$c2Q(r0vdMX%EOuKwbYIibL8)A*5KfGJ+NsgT3uH&AN=gdZ^;q^J;
zlJ{svZn0Fm;Pn45<?+kM{C~^-v)t!=4#dIuA2DI8%^%@=1v3RbKynsvz5XErS~egx
z1T-8sH#aaGrqLfkg&Km+Y&@xZu|Ov$jUK8l%_M4SFD>OzuVw^Z#em!fn4Tj`05PvN
zk!2+S>J$Ppvh2%nFrozH(}viv$daH^DQege-Tiq}G{eqjh@O!E`3f$N&?{Idu%+b(
zddKbhwJo3=DjtRaR?2mRZ_`56g}b084~2N)<hrD$c*-lQ5Ux^A^?1%IwmE%y*9qsJ
zBq+k*SYc=Pvxw|~H<#@8|3X_nx!dn_B%sRu$;7rI({|=w_S3jVjaC`==mG?v=1-e0
zJMXv1Y+Y}?VAG&99RL|T{VFU3Ny8v7J<O1Us3#Eu0o^uGeF5%0PS-jm$n!O2wzS~S
zn}k!YtAm}=1uD8s=6I(6=(%<aAaZwdx3sKm+AGY7uWYwQGo4x*EFD+#i>rwXZP0P5
z#PS~Ym`(bjUj45e0YoM+ZRXhmlGOj~MNpyim#ji^)pc~3hR&MPM)of60J8(wfQp5H
z!F2O7yIOHjP!Pa%hqe15B8UKs=1RBjQ1Ge3v?B)5!Jvj%w&{c5NPKkS(*a|uH7ur|
zn2t;4WMIYhICg?aEs!xbIy5x-@EeIDmu?$`d<0d2R2g8U614%`)wP=$n3)0hUVR=I
zuK=L`dvoe54HyXEvh19fk)z3C2K*=QN(ESr8w}7b0)=NiAdvwZ-yt@%`v+j(GPC}M
zKstHLH?TDq&^lAJUZ@P!)eZ$dCZLgFEe7=)pe*Ln<&xoe1t=#Z<&e<s35*$?B)saC
zK<rSrirR?y@FuKy4nBJ<m`KUfQxF1%Vo)LgW(QgE+RXTU_Mq+tLYEg3{NOnd&3yl3
z-mN<eE|!Z6&^QcLz*j&Hg8OfRu<5C(fTvl=nnNR?LI5*!9#8{UwYFwq8gixAO#--s
zU^wK9D#+ac%9|BSaIs*}N*awITwL+eh;OI?um)gH(_eH*inglh=;#^_e=WYl889FN
zbUQ%W#=f8cRBm{Gs3Eup?ebKe?7ofpN&}eQ^4#OZBp+<gEigRV5e`r&1n4*L9=-YI
z!b1#{2*4(*ZtCIT8!n$)*4Wq>3kWrAUS7!lRDw5)N|z4@2d9pT1pxpoAa^usH*^8o
z2q<G__tjq9DS_YUr;<cWFbM~&Rq*5CM1i>%plzhlzJaXCLqWhm8=%X8N^|^fiHVCe
z7*GuiSJjo2DgpQg;s*ViTmYR^2?#_CG^Mqj0RIHcaC?mw|Jyl8VB2?{zI=g89o-*z
zFtkw+|5#d5(uK5m=^eh~mOc_Rbe~#sl=>Qk&5t%MgoZzG&sp_$`l;Bo<RmDAL0dEd
zEFn5#3%Sob;E(A6|0sFj46Ki)GKlQ%<mspPFVhAz7?S{@!HFu>(iV*J{=I>L0Fq@@
zmHnp;@v#I6o^%PIXfYtrpFxvA1Wgf;COR&fj8^^0^o9FlZb!W1+-!E2ovRv#&Iw9J
zD=ssn)0lM|6~Jm*i1`)i>~HSHr$IKdkWw0bQVrG&m`NNoY}dX1e=Y!*Jp(AqL5uMs
zGoM&$07DG04UY!__OPsMxU&i{4FY~dYv@4p=M~`n7u_BM@Qr$^<-*0Z#H{^xZo!BR
zB9aYQz2SrSHmg%pd4e*4!qb~rF}Sh<&dlE{!zY4=nxGJ-CXfNlD!xo(!5a1{J5a0^
zI%WxK*ZjbN{SLh83l5jI9Kffyz^?@rGy54)kA@RoH<rY<<U&2izE~pi;|Kdhl;(F<
ziG3RbV`H;&=n~VzS}@#zb8^E}W;V$+zUu^5C9Cy-P&Uh?5||3hOMtMh?d&<n6<6E#
zm)2BT;~k63ha>jqT5o0m!M+H=`v4UkIL;;~!l1k=A4V`jk}8}8+7hHdF-Q)b3^>`o
ze!V=q!cI_bTlEGw6^OV5MMh422$eLLy@JCLSQh~|&q=Y|@}hp@AcR)#^fA$G;aEI;
ztVTv4&;<tcMue?$borpL09Kw>TtILK?C(aFL@H)&7k{A%+Ppe09DR3OYBX~BaYjXG
z=&9mdi+wetjV*Pip(&UUG@>AVnf?6eqYU7>gJYN7@rnr^2;%|M<lo;-JD!;ey=>~w
zO#R2gV{n-zJE?6dgB)3m&4Uj!0^9DHeS>%AU$sE@oRTe}S*wM%AqBeFhN6B{dcC7x
ztgj&NGP!9B;qPiR@oV^c{o+kNYS>sh&aW%iM#PpdHbcemD%%6@D7;aX0rV^Xs{WiE
zPPcn+cq37DB@zR7;e^zG)m6&41536PMsW6mrektZZ1E-uoCByyYdJ53_3{4iJ-YBl
ziM~R3?J$@SdL$X=X)}1E+mE<Xg(3*;P>5FKt#JmQqwO8r2UFRAG%XDLud0jxI&8TE
zw>4OD5gO(=-_|V~G$XZk`vgqn0BkWNs3;f*7{S{UwTVSEHwbL|5yaOdMVIs+$FV0q
zHlyxGF*Gg%ElOPQdJ~sAKSwc!r@7%T{qyWlj~xC&C2LCSnltQ8`0|U4D>233jo2(S
z5ICLxp@~A)dI~Kp@aX9gl9rNBS14ja_TPwlGK%v+;|7f5%0)r-$xD{#awmxCBYWMh
z+MUhjAdo}0>hv-33+E$l*W5_ohSP_g9YHH2LWv^yA;tu-<)}QtDf1()^ORa$re9(D
z1s19I59i__ck=k4i*WeqwrO%QGD-v_W#0-VDNO#YF-==4wS8xUU5osQeZ<s)fmT1x
zp;p8+`2X)pRj0avQDB&+>)OYg?O9QblZEhZVf5+CDRWI|Nu=Jr0L<TvpChC3Y2!r@
zA$9Wy-Mi3`yCYae=wi9v4{O7sQc&FxQdh`Ay%;k!R+}_fFrYF8G_4wFVD;kiXUJY)
z(}SXov^)czBqljKsGv?s+sj{`686B&jcI>op7B*C-Iqf~270zdtD}c+ovJrI8wW^t
zOVOc_9BAA`->$kPv?eyG?GhIPE=@0RF5bcCPrc};`_I4qM-@tSlLRL*S2}*tK!kR$
zO+7ak7jP?8X$N9VS*1<Tnd~|7MQyw=96QW8n6I!1hx)?O?7>_a&C&S+MBs&W3K$VK
z!sp{6FoV6kxM^vm@f9&f+a>)wr^J?%_smds2Xw@IG6`EYct+H3PS)_)|F)U<P(eJe
zHyF`HFxsIom9AE#g|B$&${`X4dwbaL5$TzkM@w4@=w_ECF`)zVH<PYUB^CYQ*36#q
zp^e5{;23|sc3P~m&xqXcRaRQ%LdwD<>$~Ma$Hh~f31@;%5OYOdaZSoNg28Hs45odZ
z+Ey=eq3**`TUW)CO=)@T?|p_qd@z-Pb#RGxxWD3yqIFf<ojG4MX-mRV@Qoy>Q2+_5
zTrs(w0RYKEm#;N|m%V9l38t9HN1kE_ScngZ6)P5j!gVeqjDXt^<gXf~*9VBHHd?Fi
zCpH2U)CH<2JZz9vj#-Ha)$H$e!(J@7>hPt_?g&vFGPe<J?{7Q}<7(?x!UK6DV$Zhx
zBxEt`>0phLXe?Eu4D%LQ%;#jO+#}L+c`1C%A8IXfcI|Suy}|r#UqYEQ3lW0|whsa;
z1l9j8P%&SVo1P(1+%OPXU0V~I{sh(sh8I_=Z$ovw<8r?y0vL1SSx5k6vfVyLhA>WA
z5Qxzo?(%Xc#Qk1yN@!45;H$S!>|f^g$9-pYK$o!q6&R*J9=Sdggkm7AcifX~=GZ-Z
z+#8AJgQzJHl+pZm5!cEsQ3=i>i-|@7KZUj>dBc{^X|r+B-cYXSUYEpqP*gts%?hEt
z{YbbzR6dZC`BjSSr`+$QLFJ`CZ{>K<t++<NARv?bl-GxY2(*2e#CQAJUl83EspB7S
zP<!hr+ObZe?SH%{htRdVy^Z+v8V@a}T8taD*Y7mz9D0Oa-@tU_dVv&de(g7+Vt`k_
zggh|YWuFnZ6OhI?R~gA9#O{C?1)~dzGD3>{#N`KhZ40G`bmDgMH+r|=Q@>-C#-5Bn
zH00l9_h)2F?hYmw%zBre)f8uhS{q-_XLN>`jpsKHKXH%l$X&xz@mJ%-5Cg<cKCk*<
zc*CY?spr0j-yl-Ld-!oOguiy;s1K=xng7Or>;%(SwHVyd50@eiIvTzNWhJ#7Tv!rB
ztIMDceMXJ|m{hT2Ozoup&q7G`$Hvez;0mDD;3|3Av+89Eq~WoCEy25Z;XWM-`yjZU
zWv|tFBODWvAGdyC*bwZpA9s8;O0_nRudgVmZq1k!(~*K9V~~phU*H4DgF3q7uxf#G
zvbq@6-$Z@^&tA@jz<9aNk~Y6&-5?AhM2}<N&fF~<On)c{gc|RJ{~1GD0IE$0f!U{n
zLtChI$+{_(9769S4T1C{{NuwIM#<<%A%kv*v4h}1u2<o&pScoL8C}u=ylvYr0reIz
zfO+gYcb!1|1rW3VN*BmxF#)K@f3ORT591Ix1;L;Mi1h$n5fsws!S5HlKr05$+&8fy
z%QisC-c`K$pR723qXE>4B$I#s+xEZw`;GW~YXtGZ5JVIz&}DOgkMh4NJE`;b9X#$F
z8f%w;2to|d)n%a(Vv^Y=u3<tTZz){p{HuoZpavmc*P?d=!jvQX>L;-Nu_~LnK3}&6
z6Blio!5r8+TJTC8keeU?I0LfnBU!z>36QV_SbD%;Jj-o6^@U&t@{x)Aii+=gSiDZz
zvv1`<23Gz>S^dg?j2EEbTQcXM!43r^nSbD4he6-{;Pp%^bQ%iS`ecv)Btgy8I0m$J
z0J{w&Ck~?Zg50kLG#~qT92wmP5;Ve5R2uM?E<@TLJ;q%~vF&ppZ@GCNk^L|UUi+J6
z`&S9w3?GYWojY)cbf{AhL@0fd<~}6<FzLV@*r9J69}fL09~Xj7!gm1#ceT~k6YF+s
z{9r_X^A{+Yvv0a-g>DBFpQiO56DM~CfXK1F^^!@fAN*McCQrxTcKCq`0cc=Z7#WE{
zQqR(k5CR0ao`6~Li0QTm$Oj~dQ0oc(apD3t#gltINW5F4R+s*8&<bSH`HyvTNP_NB
z!4ef_4sy$bqw9el6h!Em`e%{Jd<8+ozE~l`Co8{>el>@W?GrK#UdJA*me$sfYY4(2
zYCEo3J3I<WTQ4>;z?H}QvEsSTWC)O7LCFNzo6lz~&u19VCl~-gJB`nN*aWsc4fjXQ
zZdu?J;Rm9hm)sIbQn$ROCU>~$GNYF_dt)Ks{fXW)9OUG7Ozb`g6d?X=WUwn)gzu94
zlu*4+QBDxnW%a;DpkLSX-%E8iupB&}>9Rr?-ACqePz4zDKo25@n4f?NoZ*>uTN+G;
zl7O>S)(#Z8u<$b=K`k`c17lC1c;tYTYSp6)gP}ax(@8L3D!zUD)?lGN{J&>B?UUR7
z#J#rag=lTL_P!16_UL7Sb@UTPk@`L#>p>u-511+y;DY_D`1*BR^5oOGJRdg5$Os7L
z067oXFZO|?)u;`STtSWrDsIq`)o;4B0zU1E$1$FSJy=dZfA$+znco+cm4S>Q4<Hmx
z-W7QHc^&G+@Ca*#V^ENXQAzy~&7{vD__Rfy5qazx7qa%&8~ze)XqgCnpFZoWpe3Cc
z;H6N%?7Rp>3c&Aa0QBXcPeFukQX0WS3W*J(`^x0z@p7X&)Wz~Rhy|gAAK0vxQJbB8
zM_QNAUNl<ouX%5tuD|CYIxoS0T6Cxf``ODoPT?Y0i7Nr>{kG?)dm#7$kr8fJ2LQ6C
z&j;cbAVL!Z{t9-oGP`qg!1y+qN*Z*DK!gN<;>GCI<Ku$^K!F@`B7OPI`i#(BZ%(G-
zArOouj1i{)u0EE%d&6!GLI=5ChKZC1Hm(38fj6p)X^!R?b_H244<oi-=iq&Dz1uBB
zq5#p5Wiw#5`VuRJin^8*E43L)n`LfeLlaO6;u9eNE6Y3F8KBL!vasN_{|kE*vjunv
zU_<+&zdE*7c%M@RFLcI+sqU)-X3ir$q^?hH5FhZHTRUG~u$K>v@_{)B(=dZPKS8;8
zau*P?F3r5J%vSGaKm<dN3lKfuKLRdZ<4zWURY$hj%|-=)UL6otD~Gm#1N)2T6yQ>?
z?rd#=juIehv{|l(z~vou?l7p}<^gU2&?>2_!9f6=>CmX&|1u(G{e=CwqMb|t8FDf$
zjKGqf^Nt|GB2Jpe3l*K%FPO~l7grKd^vkuCglgkfDrtgfqeMm^W(Rs(K*!z({KT6-
zAae7?5z6}zxC?Btc!RX>HV_|j-S+$lw!mcIAPq$L_^l^_U|ahMRAS)$1HNDYF!Me^
z@KypdZcffbnS8+30I|8`h=5)hb_RT_yugdhO%yrwZS=pVr!D+^`Uc_??MYy>Shpk(
zB-xGK{j3Lz2oSp+n-e@H7t8;>wo-^;;u=8g0%l_%9U<Qq69S}Vzi<AAgEImkkU%E{
ziIJiL(Qtem9JPSj4%lE2pm_pdy@1D+l>li4aHa!m6GVRX!UNUV-|kR=G9y6y0{tn_
zvH*b$p#7>dUM*Nw0Bsk}S$d5w5S{`Q1@4oM>lR3;1C%pJ!}8k#{ck|h4RTOi1%|#V
zvb}75>;85-3L%3hL!b4pN&ffx$qL-#KFo9}{SEBu%L$oY0|P45-eO81nT9tab<k|j
zy*PYEgsgM4JxbtLDoaasi{grEk3Xs)pAtyXS1$OrCH033N|y&R^uLq4?J$Skmw7w>
z3dx_*cbZGm<_)rjpiBAVSwv5lFqHQ#{>hwhXSO$C>qVcw#_*kCG4d(hPllAT_VaMp
zoPA$qDH`%rNI04k;|{(*+w1R`xI3b_YaY=ZX2NYtWg0ZNgt=JcuvTtE%een7Cf4C|
zaEKt%1)39-Uv*4T<$*`HHjJQ$#UmIlw6CJ}uNh+dNHk*;@Kx)T!&n?_AG(+~%RwVq
z%X#_V0u@$QLMkMgl6Ic=;woy8{&K&(V%wo_DB;D>6m1_JgDN@|pI`<V)<*B!n{wf+
zZCg+`j4F3@=@`bIq+$&~u%!wF^3%wkm_WRezvD=IEmROL=lDUIa0n&e?uJ$1DjF<u
zp@xP%JV-a?N$(J5Wc#6ue_wq=UlUC=YGJh4EaBX}7ag<Kc5lnu72P@gYb>Kv-V@6Z
zXsdZh^EIQ3%8DS$*Frgkb`~#qJ`#b8BDgQ}%53|tp)U)8@V^EX@s6dV;;O?}uRVWO
zUmTuaq)WkE1H*&V2Yg<a2{E+>cevJPx^GYSC`|hKYs1oW*T<D!D7d&?0<>&)a9$75
ztaBLI&kUu@H&M}>98MX~Q6iow1O@RU5`K2AFs05vOJRN+bPy4vb<J-mDIso6+crl;
zqIFz9VJx}hTP?Nv!7|Fw^PZsaIAYIO`*l`Z-G@r7Z|$tNv-l_};X#lEFt!Eg82k@i
z+IzJ`v~%*j5;`||E_8hSDCwB>yby@tA#xF2!b0ef6SL>nGqp^2>xNja8+Z-wP@3F6
zi_OD#;f#N3`Uc{kA^Y{ER8X>N1}Df7=wcdZu1WXY#-%pc_O9E65T9USRO<Yp<un&0
zH3KVH=4%l(!3NKBG~RM)J7ljj7IMhL740jpOsi9nq<YWs6L*0Nrp+O31l&ehlzlS7
zh}YXo7B}H6?PO0_DTgOL-uchBZ|`P>edA<Yk8Ic1%j^yyc1x6snsiG2QS@3VBq~ZK
zkQ7@wdRe<ow42@QH3XJUUp-NQyC}ss1~<);>yoR%?NhcZ2W{eQ-|z+Q--iUes;=w$
zyuQ)AmV6JG>-U*^A8uA^E-+f5v+&dkBHI^B#zo9-x3)MguEE0;u|~0KhWWLOWLXcP
zZ69kz@qW^>J4q&&MVUmE@O3O)e~Zzv&mzXq$a-(QM6USow&jdr3bzaY$($Zt5=u29
zVifC`;%n7iQ_p*uu8L=bR(A}iEQo~0(G2S+&Whj2&!>8JL!TNv@k<-d#H(QoquTLp
zgH?ikSLQOs=paxUx(1#-Qv?qu*=x3Kh(+E16=tjs_}jic>IVA7UU4`o^bkmh#M-X6
zf3g?)^Um1%x}umUn<yCm)wd^Qo@5FKk_jn1{d=9m9nYA3;)LwFPNK_Hjj9Tl>DAg*
z1QNR9PMO3Yh<1b;F?k$_O}u95#n1_($&)g%t53LJxgD4;nQ(nZH<XqjcprGYAXlwq
z#jn}Z5HN|MC7J99`9G?i^pVGAEGz1&iKy-LKXODeLXDYKlqw?&LgF6Mm-)0Sul;N4
zy=DZBDe^@N(?9?A*g>cN-MbY}jTyk}^0QxrXN?D%Y9l$jijAV(UTGec>_O+}Pq<>g
zVlgUM+hqjrbGnubfvGwXX^AWQ3b$(ZbvGqk#Ap!)!RGr!N*Idvk;7C8dMWIG+rK{a
zHvfI#y@{`YhCD29D6Q$-Jjz;-Z<5rL7N$}nKynz!haBDIiy5RW#$RtYFDVo89FaU?
zsEK~sM)6cLUUTK}5JR-<tQ!d}&iQHgW=jK?kjJJ~>w|(ntL;use-2#^H)QNR9~=&r
z5-w)S>o-P!QqlrG>%l=9o*$Jyw}~UD;VBu9|E^JVJys+p`~Wewxau3^%;s767bq94
z+hBQ#R}D+dy(&}@TCBZ6@snY;pOQgV1n~qK6Pli|Q}mS>V9XMnvzjFnp$CIWNxstz
z<1uKUeekFN%Zvi#ZnIR_V?(C(IY@5nwsw!HuUANxJ%tGvIWXIuwgV>raHcb+UlToF
zt;W@_zOA<oM@9R`?9@NglF-~fHAx+TO8ezCcBG|RrtS_I`(?&tWCUh01M}yWTt2l=
zkvjz#H%w-VG_r%rHWjyGg<h_Rh|>4SKSr#ljK7D;s7jzBMB>JUHaHxW7M(|6BL1XN
z$t_^k&?rIXQ^F+=I3XrON^ct5lMD)_8nRKWZIwQ|5mEayA8ePgMpozKg8YOuzPq8u
zInZgS@~%iFQHq@ImKKU`_@M(Xb6qUCXe09Cj$iG!zSk4k-%K)D9%MR5vO?%*k4XHc
z29W@V<bUB~@huN_U+Z|-%7q;h=tMkN#__#!U0Y)BOsBVVfbpH*cKPJppO3ly%D&vX
zIxKGAaD5>EXg)CEkLI5*4?t)C_nA_NdW@wtTAD;TH3F2{mpnvgzlPz$9P&tH-?vpS
z;*{9Zgc<H0Ni80P?zyWKB8c&5%wXuU>9lq$-gS@{w6v<fuQ43y+SK@aqIN1{yP!p<
zVZA*%*;GXTg@a~@!SE|n!p^FK!YTS2@^+M_P^|M?W`l1Lbk!NhQJ-g}1M?aBaB}Yc
zP{@uoic6;&*QAfR?0t|_tD#oY%=%fZg8Fxnu$e~zQ6ZH%iyakRSyU?Nb8pEWqlGkW
z{u|#M4qj>cX@^am&e-+@>ez8v8FJFkM)qN46R8aguN;RyJ-${HKlY?EsfL1lFE;qO
z>U1g~jF;p7TU80uTKmTWvFb;kjc(te-ltYoUzU$*icmhvm`=Szg1x?PL(ht9yG#T<
z#CT3=q+tXdv4NKA(7BIjhK`0Evr|!ConK*>hL#>`U_X+p^y8s|Rj7f}oKq3Mnrbex
z-|jd3&<}>zua)uY+IUfVJ$yE9j62Vk6fYn}!Cr5bHRNe&4Me?^quz*IyibnAXF+^I
z>O9@e1yj5DgMI|$hs`|dL~@4iUdhWRBqLi4X)0qy(b971vsWs`(E07>-vqww@tP43
zlmDswQdcN5gst{9YydjJVK*;-pJP41+$0h_+16*7>O78>rjMK&H{*GyhV?ugQgQV2
zhoRFmvMYwg)E(<?2LzW!KN9ej@zhTLD!&&nP|cHKP031gZ$6$N53nP~k4$mryL|s%
zn}OO*;B*1LC?^J6SD5Wf)117U&`j-zUvr`#ec$};_sMT&|EP{#QH~Rg(CzznV8UTw
zn^ouE6b6QWJMGZh4bI47(Qo&K+dL4=dk=X}!EgFuVZFu{5-kM^cq2K*8H}lS%zM!B
zr`5<51gyGEImKV9ix)5v&uF9lq3ur45Cb;yO+p9x2G0+i^%)5QFvTKh6;qy{*pON4
zY%+;L{c;&wBC*7Lh*&ab?`h+D^0fs__jW&tMp~^h?Rb!9zC+dKy1Y8+Am<FRs9;%W
z_{6DoB3*bLt<|K^_vN*6D|Qa<HMMS|d5b%>uEv`!-ZHZ&l;Blz%AdBR0Sy@pC%r^7
zN>OhhE#AWmo}W$>m9u2Q9uQepJdZs0Y)AZERd~skceX>qD69wD(&}S8Qi+(FRN#Y`
zOv8a0cOMi*Q|QN`=dl&fUg?iaXv9z1sq>j5yp_$HsBu4X5fQN^FmW9_eKdx7gG**3
z*28$iDd{N5MX9Ae#Ko~&jq=L<a~D2Xt8NG?X=l7H+kp`#Xj1&_qIi8Vm7!v`&k}XO
zXb;;E7dN=*eDOhi;}Z6G9;@0`(XHkc-iW4}urW~uxB1xB+C^>kCu{A{0Rt^Ap25Lz
z{;xIoctMZVqq>@VXee)O|1?!=Sf1eh<={#$Dn(qW<8&Qk)@FW7nOmx+^?BpfG+CYV
zXSJIk&QCEll=;#pQ6o^PglcHm)C@wdlkSfC_<@1%Jj1v-T1q`7KRYqzTqEaCJ(C*A
zfq<0_+sB!k=6zVua3UaBUdc?qokn5=k#XW5KDa;f6L*=yY^c{H7($Q!e%CUB)XFHr
zX@rR3xa7pL6NwQKR>uqNhwOOpOk}AURT(HbC;TuKu=edt&EvRrrkC@|?C9&awkZy{
z*T1rn_;}e)I$SpMCMy@MJ?iKCbk|yT_<~(wOcl5~V#3~}S0d0l%%jCkz(x;|5?i4l
zV=UFj2i6><?@-LmV7oeGc>Z-neeM4SGeD5v>R7>{<)**_`TN`~);k-YuZNi#a~SC)
zSifv)e?K%l9#&W&Rlzn@>OEBcpjzd5FDB+!cEb8|pPqrdz16#3(8FUbCm9KeE&KPs
zrKS%1y<Kcl6B%UdMXr<q?znJzbyHoP59Acl3iV8BX02YC48sLv!%<}a?7sYatdKT_
zZT^*JMuzC>5@Z1bHOGzpZGd%Wy8cfMi~eHE%2j`Z&(ZtC_0uRfGVHK<DG`)T=0?f#
z)~+C)*2m6aHw0SK9@X;=)xJuU_X`4sGRRhAtDD4j<-e^xLL>5OyN4e;pGMh#1$ykl
z-SS-FaIFjM_ac}tAXgr*T6muk??@-ZCM5GF$#;cn$cuP%9(jjjnc?7S@AEb^jUUXQ
zZAYOT$~vcTt+hX)big#j>w4lBW`~a0jLn%fuRpXqvN)+b-*;jlUT~T>UrO*EBSS;8
zaXH&?RR|HDteXWG5n(Aw&%s?IY?`Fx<gATj0z>FBfxr<{?JX!Ms9qZs4D{2Wz}wbd
z;8yY)KW4l=Nw#w26#<s08=+&{FGa7;U&i=4tZ+Qe*v|X;4n0g0)q)|OV_b_P_<YB%
zQ!Dv}$D8o5Jin}KBik7MZA3$RTvUy@E;45N<JVXr%NFtk>5|S=h2xQqHYIM?6&@Uq
z<-RFE?(^y_Z<b?jlh&-0?9NL3(sm}W1BX9&lx<zZH4&RrSO}Lf4wo*U-T6#o#M=AI
zB;IJSP^2AhC%E2<x|i5@!va;8MYgR**d-kZ{3a>UUA`OrpuHG2m?=}V6EjoMG<V^P
zY-Tj>mHbtAE~f_HIOmKh@T>m3+-%p~@auPu^FgDi=#_UJF#*=Eq3q{I+vLkzBU4<@
ziisyzIH^qxbliEfJf~Fue*1@|RiTnczSwe;vr(cLi9=agLE-9(u*0P2u%`q>lE0n}
zv%K3+uedpSsc`=FaGg18`LOv?wC?|{yl)%%U|M`wwsRP-Th}AT5tWTdI=$%D1#>}S
zUV>Uvq0_m+5~T1rNAwQ9IkOu3-ZDD9qL5cO)eI%N^7y9bsp52$tsXY-5JR19glF9h
z+Nf&TH9Mj$HAKp15P93n#Cz9yaYkEH{|cKR<Zw^Rq?4q(JH1n&FYMSlTNr!FGQ;|E
z)>I<j+V%C$`1zq_QtI^iNhdeU3Dziro<r?1l_*8OE%slTN5NjDk4HCqC_b{ID1vpu
zq+=f&R~M`riit~F>pf5Gl#U&T3-V1rHd%}4_YdrsL4t4Za26M%7n4lJ@l*odBv$U1
z$Gx@9)v#=D^^CVZfa`E<MYURfS|e~H7Otq$^D-L0P|VcvSSW0>`Q#O?e}9G7>P2n$
z@c!4RAQewacGBY0Im>;p<F3ysW{l`u-RvOs?05@W@0xv$Yb)l|uYmvO0z6Lljc^P9
z!Ag(J30nTg-*4(dfCTtneV_P!O={LpE5C#u3M<YF>q)uDu|e@8{@!P>d=O8ocQ!^y
zEo4EKJ2LcsQc0`pJmk{intS%FFZ&q3R*r!X6a+V%Ww_YL?R3DixRiR8ScBC0%Qwf1
z9?GSqzvw+8YBzyO%ut~%AC0h~3_0N<x}Q52M$0Li!fR2xkb#k3imj`!GKsOVc2e#_
zktk0FwaWhS=(Z)xK+cz3Jq)|n3e9-lq)nd=r7FviJV&rywdv@5-m3=g@BpjhdzPht
zu4a4-tXKH?7gv2lLZlQ5wAnv>xQx|Ve9|wf4hs`RP#)s4jqyUugK`+*J)2Y>7>TP^
z#)@SdF*cV>eGJ?*k@foZz$!cXj~b^}%DE||zjqpX;#K24gXNAX{b2mE*4F7F+~#3&
zUamG{#kc$X3lU8UmDSi`)smkguf`Hd``5V6hTyScFCNeOM<(F+&t0fCd~os9W!sWf
z;x{9%&W1FxVyAvPTqjPvy1hXgm}BXIJDYsh_${ED{5}7`cAd8qY&kIv4omEBehG1#
zHgWj+>qi}NhC!1)8p0W+askO-rST8M_KkThs;@?pvihx)2S(+UmFXzs;wfBon95=Z
zv%z)aZfl&k#j!UJo)lDHnqkEz%I10VTZFefMG((XOSn}V#%gnkX_61M;a7Yflrf%n
z!44GoT5+&SO)Wu5nLayI8FhB?TUnKy^6Zd9T7)K|&P$U<0xr&8L@14Mp#S%*j6y+3
z|3gJS`vkJvCDMRYVyA#co3eY~;CNKEy*x>Cb~-)IXx!xDK<B)2%o{k~ceX*O$8<&u
z(tEmCu|1CY)*(*sS6p=`u4F{Z8B=M0c*tTHFMhy^Eta{R)iHdZ<*j~onDFv5Wo*b;
zB4hp(_TMFbwc-%gB5ja{|C%JYX!*O<tXcb8Q^Y<?`b>BT<FjsDY&e3}ev+&~mT~@|
znXN3c!;nebFFo<ibe24lv%ASUqT9co(xR;oQjD@^j>dNg4u&x;CpmdXiE)Nlv9*s5
zPel`OmK|~fW3d@dsZXsOl1I%7)$HQpNyG64QnH)E&q+F3RNS(~4Bpp|Kb~?ktQxIz
zeU`jRj$7^X4(6zKF5=GN`@;gkvygy$XO?ym86Ub6GW#~Q$l+RT0@An(t<kKFd`@j$
z(Q?NkEl!$FmxxD6nf;p@Ew-LkdO?VuIi~FO){!L@lm?HacpkZ;O{=&_IRW($KDgNl
zi83|?0$&SjMOQqf!Cf=RD%D`XN5_gG(GnM*xEd-;mg3*cl#ctQ7<OjzDARarXc2~b
z%)I|;y6Vks<JsRI6WF(FoYJc-61UZ^S*SL&@3s2NnJ6jwxrQ!x1~h&fPr75tZhO;{
zXM58fF`T*@-vv()Nq@+46LGdl`U^eZD1FrQK}5(?OT2pnJio03{l+JOTZ>fOO_^RO
zw?{&0G14Yc@w^5pT`#e?nDyn&l4xq9Hn5u9QjPNmP%vZ^jrTm_5@`~*B+oAB)vQMV
zwac#BbN|n>l3HNPRbH)#0%{`5O7g{HTsjlwJIf}YC0F0e^DkgkDPQ+D$HQ*kN`52j
zD@j7?_nWWNyubMhb$iE-CF@YLxiK(7dh1lZX)NVd6w*Ih=3x4H_cgsNzIxzoTq*-u
z3j<p}3I=#9Li&H{Z4Q@D6#5QlQ{2wSrPH$u#kPKo3yIjJNmA=$29NLuziiv9>I;7Q
z;p7O_)U%{ov8+ZthiY;0)+g~aih4boL_BbL(W~@_v30T%;!8G-&zexXWR%&h6mYz8
z$<*OHgL5jzN|qXrKSHQ&>RX;N0w~9X$anAa$;a+2inImP;kgp)-inJmRGTc(WBY4+
zM3rhX&J!wYQYPY6+t+FHX$I9M!Zhcz>Mb4p_==sVyysfLSvh6@VNZ=V>g=5ia%fnQ
z8pY4N5Eu{7X=6x4K+rlWRHeYh>;6QX-I+quTO<~29K~Sf)`1Q846(M!)N9^*XPoTK
zx1tN{#-nWuwdM$zSY3fQ>b&Wx+3&1S(c+-A?e5frvgM^3pcDwZHR?~#9IZ6^kTU0L
zGEE$L%=`$sk}46~F9_R5#vD!qOZ7z4apr2HddM7(&``<Vt1^nr;%p5r4zG}SpS?ed
zl4nfsvIducVz-Cb`XrL{&5Ve<kHF6_G_;G9Zd4x!Ptu84L<AE12a(4=B<u|%w*C%R
ztCzo4w!w`$v9HfAdq{*+*6*mu67xSqFh>k`Q6}B<99GEVMF@|<Ro`A&++&8vu;y;i
z1;nua#YRF8++#Lstr2mfAM(n<%J8n+b*Jpb#OnEZ4yy5<8Uo&^6jumKL_^9S{7G`S
z97h%!f&QEHMy7Cd2iMa~ACb}KpmEHmi!-&hy+6TOFe!qeOQ^*NGO9_A+iDTSJC>C@
z^ci1~lFO<1<7Y_bt5SK%>^6GdT^UX`@QUmZXseYzKItRi@V1O9T<+4nHQo6%IOIvq
z6^5*FM`F3{xHH6I;k&i9$P(&Pa{Nxm{a%y6i_7xW>7z%4P<CFz-VbF-(E~l%e#s9w
zg#M?P#2@4`j$>>dLKX4P*Y~$;v08uF-<g*;L@H)&MA&z8i?pM?>9rfp@1}V;;+K5H
z514-I_K4?GWrl<8-c9t25&4XcAy*PZIqinIx3%#6LX~l7ws8!bh{7zxh|jqXOT?H1
zCg`*a`pT++F1tZ|!DYjr*GV`wgwBU6HbW@Rq9VRkv7JDa5#D)yq4}RPWv@O``s~1<
zgz1tzDt~&nvColb!vTEKn8_bvWuhvN*Ooi%SA6i%l#Og6g{0c9oatPK(_baA!*#S$
z9<g?`M^bmN2oD!ywVr#%|5!bQZd+R{#QoVhAE(>)qL#Qd9lT$15rk3b!wTtxfyciP
zPCf6vKVn_~s4W;9p=#6Ww{Fy4X$t2|i?1zxi3UsH#>`(|wT8#smO3X6+qvHImXITE
z+)mHR`So`L!~jXG<eTW?L%K_kr}KC1hxOyNw|`bsLP#91#Dg*<)OmbnN7{N>tNHO2
zSV`O=;^Ou*JysfH{1|MTj0-LgUzhn7Y9Q~8Io4wv$Qk|S#3MSYQv?(BW==P?E(pzi
z`ptW_hnq|oQ@hqa21rzH@_#DZNS!5cDyXQV_%Sf@w?hlFXdh<E@KQO+T7QuEF5|=3
z^f#lX=bGDI!=aD7w;T$j-E|nOw`@UC)T|+7D;o=a7ORnaynZDWk5$+9y^Yftsu^?H
z!>Troev!g|zQ5(27w~xfS-0D5L;4GBij8N(p7SWl@Ql3EK@?P#Insu0ZXZ2ol|;`v
zYOhU*{E6opLw>>?n!G;!C5Ed&LkS{si5i{L7`m=k!tmo9$s7zTS5TzUTIw8XoaCay
z^zyDHDj&>eymAJa@TwME{I%k|;ucc|!5l3;BF?f~v&ke>f#hH^O7_TZK(+Zdl^)}-
zmm2ZiLY5(i9HBnER(0YR`xQeG<Bw9<cGEW$9eL!<^(lWTt83_ra~)Ya-*{Vh)Y^;<
zblslruV(fYmL_D+V!xK2e8uAr#pj&AvunCw8NoL@n}A3OU&N{VLA<@c1Am!WT9I8V
z#^yI&|M}OH`WBjZadXD=zo%8z{5o_L;!V3rhC^Xt<0euXATnW|hYxp!my3B36vDww
zmjB$CnHHutzjspdxRJ*Tt+fj^c|bD83t&z|IXkZQEJYXwBOhpd@v!9ECX0&mawBq~
zs~C~bBbt%%8_=_aSZd8Bty+$=NA=Tw!RD1t`Q(wY3Y)N=Iw_}3aV*}!YC2PJ9=a52
zu?Km2q+B41Qb2SuZY}bTb|ykm`TWOOWy0KUO_<yKxM-&YT7xCV8!@uI+mEa-2v2r;
zcFXo~HjC)JO*I(P-mkJ+%!IYbwM_4*nlCLSbYziS9HGE-*B&c;Jw?A9+uA;Rx=(Ox
zcp~C+5sG74v=Rr~!dSGU#f0`923sgSEZpS1>MEi}yJhUuG*!XWlz4Q+_mAuoTuK^I
zBMQOg4LS|Q{bL;)SQU#2udq-Pw_KO?PuzFM<GE;Wh%OqW`1P+UHV<S^8>slvfG9yc
zV6p(=-uYvKqA7y}VeO&1<2u3XvV}kQxStq*8c8nMTpdyv@kcUaeWup1AfG#=p$Tf0
zS3iIA{sRuNOmsY<GYb1E*U)zSuY~<1tk0hLD3q{bW0N7md3;{VZxjgo5mqZb_@(I&
z7T<4VVZWACfTtulQB6(g%@bV^GQ<%L!T7E*q^T!V5)2vY(2D<L_DOPS?6vDXj89G9
z<DYMcl3|GJ<1J+@9&mYuXK$8Vu;sO;7+(?g?3$e(yKJ@#sl>P6b(xk3n~PU69yrU$
z&b6c!pyk`MJG}cg2H!hgl$?6Xwp%n&T9<m+>SSx#bWv>4M<}<W%Mo4dkziv_vFbfi
zL@Vor%S))fvPWhu_rrAafZ`FNme}3RekFEgvoTF8WhFlQJ;M!Zlw1!bsb_#Ge-%?D
z^EWg!^zNL$y3}HDN!X!q4Srf3?vSiA5#`)LcTcK9<XmN3teL;4Kn80{Z(U8~wA$}t
z7d`s?spFq*NQ@wfDLY$FUHzXOPcE2@=jK`k4-dwsrvry%eT`3Gj4dZO{2i_xI3NNW
zd0?kJxf=oUd_zJYj6;J%Lp6+VnVFA{qw7`oSic*H_r!b-SkyT;G{zNF_}&oXBjRkm
zaZZ165DbBT1IvSvJ#Kx^Qf-_y-{jj#tZm{dNIWMYB3@=f#Bq_&;FUNP;=ofGt+3pI
z=~E#S9!<fD2vva)ot`Toz`7xLzQOIf4CxITk|*fh_}&sxjctYbTF<gPV@>Jr->Sza
ze^mwe*Rx`*=`+eH(QFCHe=jQyVJwp;Y;W`gBh=JrXyXxwWij;e87o*itCeLYM=CHJ
zlkm+julyBvde=~CSV@QOcT`MXzjq^?Z~ixvJ-#3e<ePzT5)vVSY;zris&7Byk*+V^
z*>Z1OR@(*--w#?XO4<9`C$4|3F_SEtit)|K(OhOGM)x~4@eV^l@(rMJ>~9vM{2`}`
zig5M(hcplrDIpP1ukW3!40wOAY*&YJSV6CqQ<A0C^L{hKd5w+arlCG_yj}4`UP0Zt
zZGxMaCVh=Xu$I2NWLo$$hVCb0#V*kX>TO94cd@Xk#&G(Oncn!1x5<&TJey;mZ!0(~
zdN^Iqo~VSpI_1*E;}x9tBxd9PjW5rtU?U<^w}%p}($_5_A@zE&s%Xd1hMT5OhYyV8
zw#=!MhJ{Nbc>OknEv%t=rha9TkJ~lHeRIWbq_!H1O3I>PS(zD95;lQNjESRXNh4}j
z$??T1Np+?QUza7&EumwUEG-e`qyZ9v79AYtiMy@*QNfPxV^zd&)fr|^J;trj#eY*j
zKD`me%+|J~{Nn@3HG(+2LyXShiIzth#^HXW+YJp9^k5a&p|<7GgC*zlW5%OR=qu5u
zXMcvbDV!*4<%<^6cVkIHVi{9uZk0bF<TOz+m73u_)R;li%l=!B{EmN;l9dxA-HY9J
zg`m;B{t~>-H&I`6rszDXE9fH>@14?Qql`;2UAwhLU>qEpyAs^uI=?96IcTLyu2PAS
z6=$@;*ToPy=X$qo=r}D!bhLenVfpAs_lkZ*{ww#Go?f^tE0&UKz-(M82M!3?;Szon
zxqL+Phs#ghG+Qh3l0b}{dP<R@zvP~pg#9EfJ&-FA-uig82+T!+4KRpU+lXKv{kQVF
zdBn7a(Gr-~0()<rHm~19TYEEQiy-$7*x&19{$!w|yLq}lE|gCPcBQ_+*%e1oz<#O0
z9@wD)#pW|eVgNaPTgiH)BqWc8P;{5c-8AEsyt2Aff{CVOo?~Wc=?;JDrX8D3NASP#
z29C2voO(vbF0h*=;@YnU7W_<k+8O`OZSXy$(h2Hy^7O*EZQrR{G|Rc?Y_ZL4Wnb;y
zmi(oF%iQ;S3#VO;cuRIJ;@S9#fE;~21mCXkzt8JqK1s=s9&=5|LqjcNRSZ}t!STr4
zy6Vk-D)9Fc2CePh&+B#Wh6jcOJV_z_{z#0hTnElX_3ebpeIh>Z(}Y^;n(7U^j=B%|
zTD(79LKTgYr8a3k`rlk1ZuSI^G*u8f<7D0grkd{dnDNMLn}yhy$NkS-{4?v4E3Ql}
zE5dw+lzFu~6R#~Muu=Kz$#s7{i7OPa_^NqgE*-75STDJDIG>C@FEY6bv^D?v;!In`
zM8Fz!6OdMU=uC{RS=%=!60>?xeYbdZwEf!!!rYc|br}8aUV-*8XrckG`B#+-w};ES
z4fz70{u^oSZ_6ET|JJ|1f>aU?%G{p4VRxe})Rwt^3JAm^?tEH4cxs%{3~nPq6{!%d
z75HA@on2+n7M0K&@Zsi{hpUaI+~v3PukW}H#&2=mulo+qew)`A&|oGzRmvtPDO<H9
ziof?lH_4xRdKGrf?Xs}97$^uaY9DW)?P4Y;KhZbCtmL(W|5N?g@mPBiSrJuKrj^3}
zTEE?*yZd2&LbBh$_H0i);xgS0qeqq=md}z|%W)3cy8+LLmCJAgeCRpY)fy=MdPl6<
zT?jmat03pI+K8or+fR0WV^n8aG_7~$#IYYaF?gN%eHAo+(T*dnIiyY)Lbo^c5A|$y
zx=D48I^JXvMdR<U*;g)}iuA?rdNO=K7YZjN#t8|drkk3@(iNrxEud{tAyTPEjj13o
zGyM4R<Kz7`culjptQUaiE{%NJ6bP;XTtGpP9s)8cfYGp(y84_S914(&cmoe@U_xA(
zoBMZ&r40y#U&x&Zs01;YnVulo4;Uxve4SPUJk#l7HGo|wCVqW%Ety)~up8eJC-n_n
zo%rQ%%MrG`UgGt)nDR}clGGQek-XiyHqSDxfeSs?L&Dw+MOR#Og#4boBa`I^&1fzQ
zkDqiUMV>|)2a5^<AoIE^wTaYkbB)ZeOVpT=%X}q`eYB_g+W>h*aGar7XXQL1iX=Hd
zOmtzv(0ggD-TT|$GH!TVR*PaQ%t!9w-JAY;=T^Aer|Yk+)FnPJprup#)VtS?82p(g
zMk}mw@6U4Dwc)!INO{H9pre|AgDdB=(UziAG)gUji1I@e#rZI&;isuKcBK(cEGA*-
zi0<{|=CSp{hUp^9w@1R+<uN>NY~rvkQ3Fyft}4H_8{;pOKOpD_9ct2R9azY4Zks!X
zUeEO!SXNL05?G{g<9D!kXgjO3Rd!rV3z%3EIq_GO)^B=xIO|+}A|4aOzQI^=9dKDG
zscF3BP4R+UQ`jmOdBTF^hj{f9WI=@jS;bYB>0Tu@2@_rltcBe@#FAp;Ka|_g?f+sg
zUfWTDpv=L;&cj@3*|gdVG-F+R23w)AT6{w1E3&U?too=H9;uvx<z|jsOdK{xval>#
zA&1u$kn~KD_we1#GAJu>a<@dy%#EI@gHT)qVz90rlODLJ#!gjSnjqMWgjoJVzB%(B
zPsVsP)0@$vBF~LWWo0R)kcv)yXFhh}4#Gh<BlR<=w_}FmQmZZsFk3NIWc4Wb9r6v@
z#wO4lQj{DKKh*{BE(9|JHq-t{C*n0|i}xsNc%3vnjIJy1g_Qi%-;#O7vgPNvyR?;J
zb2pCC()7d7nI^xdRnv7kJnx8Iq>F|?>ynzBx`)&MRDzBvXyJ2kMKhn$@_=C01=EE<
zpn~8RA$x|mMaATl3nAl299OpF!pSsDmPGW!v^!>Te6#k;fdLc0$mR(%_)-d(FrM_C
zPgEEgJ6*cwt79vK3{M&Z`cAeMIV=Y%e&9w%{%(uO6per1E4yv11pD#N90?6YptAW)
z7{w1HbRqR{6*^{0@|Ya*Ro&+!-$(f>Xu!h^{u$jv0KzJ48rFkAe!&Ze8w3#i-iYEq
zYB?VzWH)^{>B0aC$yJbR4BSJXUpU;J*UR-Q?)!q*OEq@uo#1<LfuZke>*FZ@F)@ev
zI3TeCdT2LD0oo}l%yQq1m7FQn*l=hpmL#94z+T()7RiRvewX5sZ8Bq~N)ktU*`V?G
z=DanhE3|gk=g~!(Rbv-IA1NhAL6smv%d26v>c`_b<}@A6R<cv&sWPIOKFhX2c$aC4
z9p`l&9;=Gaf=%q3$B}2i@1-ebAT&yz(ZS<ye9a}~uESMM!y1#&+f7^jg{?6aVdJ{L
zN$uM;O;k=(eN5tL9Iq2^B$m-Gcfiy8f5HJO2<)!4sIR-K8xQ8u*X%I=ahT-S$+*v3
z_xcQN4XN=$ODTNEF&#x^V6cPVK<z!!-yf6nOqSn&%HyA?i0FXo3FH*+ouM)eNO4v8
z*D;Z5rRh<LA-tJabEDN%|J|OQ_*p&;wUQB3?g*jM-844}dd@T+E>kO4wi^PQ$7tyj
zzo}77t2`v-YlJII7vs8u(?~y*rY)l){m`;)LR)XPI9P(k#2z%cJ{d48)zO_SV&IO)
zLy_`JI<x~{S71F@71c<!_3ac$@>0Rcg-S=$qQf(3%`UR$0-q4C@`1mFbTzelUZW;q
zY{lT&<}uAH;BbDVmIVSJPgQsXw<W8`)M(loQ+g3|o>2M=fz%|D{uN-seM#Q9YJmBZ
zj@Ge+iYhKa%$!G*LiJO%+ne;CMX0WGY9=l*6Dc&b)rLs7^RRbEQp9szg|^ksyj=Wf
z=~&YtXe9%3Wk_GUz9f0cieR?kVM=Wj<kgxzGMrUg9yt_wxIJ^uS7RCdEH>_;eAPOd
zn%sYR{l|znyXEUi6FZ{odLQAzs@mki=*TzzK!x#IJPHF2i{7N$hw4#ouX)0Vp`Khf
zInU?^;)t@Nj7+w<!v-L8slfOeke~m~<5fH18w{pp*HTWY^$2h?2WBGMt}Nn)ISSe&
zvmwevP&~vO?WH;&G>UA?GMv0L9i-mEH<O?+40<Ljn-h?DoI5g?B<t6ZJapSv8k6dI
zi2>=3Z$v|g8X@C-dBKsfG==O=4T7A`sy$^cfgR1{=jzkKG?aN$vn|R3&;HhB{eC%<
zz2?&FF132cZMQsI!@>PlgSct3aLbw$_Q)1Sy>vX2Qe8204l6l*@-qH$tXqpv<NWER
z)#`UQ7fkb&chOm92hIoRGBrPoZ|--*=<}aur~63N!J!9S$(q;vP!#xf^Kx>?^+8pq
zC~z78gvd2!V=9V*UoWRWV-Rul?>a$y-d}lwzX6{|kY@q>HGs(TWoZNUH5ACF0TIP3
zHD=LW>p!!&^U|_i6d$8Q<ztYpRkk>jm*qY+U6AUFiTy18n_So8jc;=DfSOjHpHt~>
zH?Xd^-E;+i^A`q{DnBNcA>1@oJ{$?LEK$xL$yPwY`BkX&rt#UWvCLXaNoeSK<kP+N
z5VRV?ep<bd@1NxJiA?c6`#mMG&i+BxiWT34^!Pg*OsSo$3`h2{WmzS}!c0+D9xX4f
z8eytxyss`N)#}Y*(K}}=B>@ss?sBL|oSq8Qu&<cb0&D^#CrW8oy*iUK|1HHI(P9z4
zmxD%YuD<gy^-=&oZ#<BT)^2NK$w@dVTm>G7xU>9FE;FDxWaY7vACzsVijd713Sk4R
z4Cr&A0u5O{=O14syNOG47<v2&`GOKx&;D}aZcpsN%E+p-#`Ef(pE%YYLf~WhqBbk~
z&RNkBBGV@-kZOczDPnKL&#G^|;I%&M3(r{=-D#b!8({|)`!oHrdW$d5fn!VFKB|F_
zL(*4X#&{bP_P2@q&XP5l6xrzicslQRtl#gC-?k(pl4O&W>?C`~O;$D`BqDp<WJXzK
zZ`s|1WF=%LWG95|mA%R4cj)u|`KQN2xBGp)uW_z(&ULQW)3PV5=c+?@Q%8zoblf)<
z$L9SHM`4HEkw*i{`F}NIHX3be(^@-EGoRu4W_RA5b<D0+%7^$%@i9(&ZB}N;5C1oi
zIgyJT^rN$g>`VCj&~l*Fd8N~9!12(3$|0H7t9WRzI9Tyc?=j)4wCT{6W4w&~x(}Hz
z>@4_}4CLR;ypF`iJ=y0ty_I}P=Sr!>pQYW3W!I$ghKEf_vq$T+Ss!mOM7|K^ygQh&
zpoVANe}mcSr=G>h;m?v3t6tLfoB0%pbWg~14sUr_UKE6M3jZb?i;xDUNM!{DF^l*w
ztd&~Q(sj2iyt{u--ng@lj=sKif$2gx*ylgqD$WAh53%fCXY5=s^yF7nrDu+j%EAw4
zX6morCC^RnoahNEvc0h;rdnq$bgUem(19N4t)ja>Y~M&D+v3ka*j7`Gsw2GHB48<N
zL1icY_2aW@!RQDc&QNcEy_$%twoD>#;#M|1^vYreXlLKv#J{k$k~Mo*R5Z|kk9pH%
zOh;(hd0sy2k55#p_F`dCbw^suK<WHo90pbqUH<qxxpj>tFIyYYy$id>9d)b<U0K;y
zH$yf%N31&1Zgy8&QQg4`3ZDMpgxb17b$d3zHYT>nnRfUW$@6uiN&AQd*<S6AL=|wD
zQmlqj`Mb^LC}^uLwkypUp>7ElO#iK_*ulk28C?14Mz3S^bhVbQJT%p-m^03+h4?{`
zh!F1ndX9HvD^>!E;fe8A>jme9mjmse)7%%dUp;J{P_KCCxkDOlJl)jlyeQ>>F}*sm
zI=mp}J2|)K{n`d=cwnP#nvnWZ(E{sOb!hSQycdU5gMh+h!)|AEeA@$xxra(9X7?e_
z6JZ02q>o!(IjtomOY6_&P|R<rwmcZ<>Qr_gSdA~N^L8QG(TLo3Kv{s0?&49Q*+e6;
z1U3by?)uVA+Hh7lat3WsaHQ)ma#v66c;uh9FYSOd#FNL50qHHV)AyhrD0kiuduAO$
zK?XEppFDYD?A#-FrljfNalku9<6;hc5RhyJz_AaWV^AIvPCrwt4J2PgrYbmL&wO!u
zJVuzC$9ao;+-X8`=+DkiQg>7)`Aon5adi32j3kED%hwAgHOEn+r~+rXxQ?r&Lq+<M
zsb6%&QTK7LDN0FtFs0SqbRS}5q%H3_kxtUayPWsGS^x&R<GBd=Tl>rRF%q|nf`T5y
zc_i#b3nAEyZ%85xxZ|6)AU9fG?<cl7Wfm2L&FJu5E~mAldp08(eRXq-iZoxlqv@q%
zr-D`Z2YrnejMnxjYk8$R=EqM-BVUfU;pAxfEX3@<!Iz;H-!ap*-zt*ww_`>P<YYB=
zTW99pVVTNAMUc$=-S`!?uc6MxSYn#NOn>_2!3fv5wkWOMc7e0hn5Vp<79-=8S-#m@
z3)gSb4}?ecd6ihDynP|+5J>(fG)PnXd-&GuuZ_z=0(Vi&<6d$lKRusy9U_w$?8J3S
zuaH`I6Jc2YZkgJluFbje63%Bo$w>ZYlp&kxoSSLhVkXeruR+z6*oXO6izte&CY3=4
z7S-;L>~_D#+HAh)hz#--kZa!gY~jr^jo04#sKmBX_t)h0&i1&fk8wnBmRe>6?)^ac
zmFs-FH#a=q<sr4X;J+w_8~DdAxbI$39Gx?$&C`hEF8N4xi@sQaY^E}BX0`=;CUn<Y
zWj5vx`Vyw&me9C?Z3~6_q<XLnV<a0hq5F}_P?7uy7pW3O*OiDnskzhkxu<QfTx#?G
zBo~HFX}2L~0*4%jc&vP#W%QXIMk=c6i<E~1nJ>pDcg2$4GVeV|DmR6j<_@uh;gL1N
zO_yOtG!tKq_ydn$cs^SbkG`<k+#5H1<E6XftU{$#bxb^MiB@}?N_@F-R7306hR}^9
z2RORdj60-I9A@Pf<6@<!e-q(Q^Z0k*X2<AHQoX9oRj=08m~hs1c2^9m*KleZdBk&%
z<0i99p-a2mPaM4yf4j*xKKEr;3<&RY^I{h0l!#}V?#;ZJdpyz5-#yQ0?4?iU{<yNt
zG!jmHWv8pI>`I<&y)Jv69HgM`SG^wb>0X={Yh1au$)RhjS^2)}*2-0MwPDP`oW^du
zRSv=Tc5a%JI1vwyn=`Y41s305vjcx5FGkL3Hv1wD^R)|dDqj!wZnY{Oy2x2i8s7Es
z_5H^5u*e9d04KYWwMXB|m8CvbpXPmIqVFDIeT+8MgK`P?1My-8DFx1-<nAZxLnZ1X
zwxp!x!@rB<S8KE-86#P~9}l$0l+S#!zqU+n_b{+&`m<nRe9l^|Ju^nIw9WTY;<jIJ
zrW_3NDK37OA-i!$N%mWbd5c9NkG}k;h)ek&@{pBwFJ50a?L0-ExP;3xMdE+IG}rzm
zc1n0}MlFKd*qhPJ<ATCvOvl!RUnAFQ??o;vU;8cjk&0vJS{de+hxsT+<&iSRLYU2v
zy}1h1!5rK1Y79gqrum#y%7s!eFx>O_oK^P+WYK9PypQs<@;&}8yAS%@GOn#wO%n%2
z<6nCl)5J?kvyPR(Dvg8-Z;#+U>l>=u!ZuAKBO?Ie#KpxG-}pubJi&Wzczr#ZVj0XN
zbabaV7wI0n++QAMg4(`i<*$rrFV^U{(uSB}z9m2fd;2zEy(nU__*UyB2Wq|^7p)t)
zhu@J`+arGd_}275Us|2?QdCgLqv@x@yHleBlrBX3`{p@j%inJNcu(3zf*{>=`rgUF
zk@RgQ$)c41d*=<lwt?q#Y~0bGuvqRG<mD+?noLavDZRgaK}Fr~T~1W=ef#%{NJjC<
z5t}xoqOF4NiXQ1huD>go3bwMAD|&FQp!C}tsbm$Q?99n^Rc}GV@O6;Y$TuDOtjxC`
z2$4!n7RtR8wNv7==yzzO(dXQ}r~qbDv->tL3iGAa8Ls69yy_`ZyuS~pMJ0T}LFrN@
zg;#a;*!8YyXovCF3)sFIkXgi#mR5K=pT9D@6A^!NC~?M|jM{W+O3_M}f6mLB>#m-x
zY!7{9HolzPU6Jz01X*c+;jXHd&efEtl;9v}u{5I`PE1_XTWMJVoPG0mq+~Kv43{YW
zM8_dbv8FD27RJOyByuG#2vM`JC*rc3XRGVUITB1p2HN$kFy4x7BS;d+zs;I5pd`=H
z)Fj&ykr2@N>TzWJeT(cF3o>#WXo#)ulBfHK`4i+K#sfVSWl{D^8(&&GZwQ9%`HL86
zBR%powG-)C?<egOj8I&SaFyKL<PjtzyXP;PY3iAiP9Of-Inzu5d1iC!R;zhE|K7No
z+UPU3c^^{IC^7Tw=oC5IIBdLRT!>D$Yc}#YS{0gB81AC86O!^nwgpDC1Z<|B<7KZz
zZ)+lzf>rri6$fd|v&l%M{9mDJmdCMPWp@&#Ip%+eSte|4<()-GMVQ0$EY<R@H5?cc
z7ouW=d#$LLqks5*6(tB&%w78OURzz>gQ{{!L)~_YELe%23!^F}%9QVC(U9ffpbqiH
z@2!kCqu;Y#j^4B22siqcV#i}jt+x=BC?!d+*qSYcESTyvjZ9%58k(6)m{1FT!+$Bt
z^Ir2b!_Aba%TZCpYj@PW;=9_T+X%?WlpJMJyp(A$jPK8VDBfP~8eaQR+{^p>y{`E?
zbq#47!_IrUx5ERgZCX17_U~T4{1kfo?u;Xrp_X~Jl1H9H<@*y+$ExqGEaJr$M`l*V
z?{cOb6NB*KY~B|C<vvU}Z!W>);80ff*&j%QTGcO~G}+k&A5B8!qkzAVizxVgyc$OS
zCpwWqFOj8Q%9*pZwKkWql<#{d3q$cAnIB6GOm8wxqo@>g-?`7oyV6Z8=mZ6skBhUj
zGYgC`--u4o%)jHIbwlr3RIH+VVsH?%xF2q7?GuUkP_L@uVXMKx-AIr5TJv`ry3Dw}
z?>E-Q=3_J#Pm2!{>JMh3Rk^CyzXD_T@6y29z|~vrAlLynCs)R54&Nz81F}{4u%X4U
zy(I6{6QHbXUzR@BJl_?x`Z+UjrN*dm6b^n{D`(>j1pIa+(oa~5^0dOW`TzW&dJFne
zckkZ4a3Pq1!6laN#H|0q2REb1qxK7%-8XV$gnv<Zcl-JwBnmPN+<fkB)pbozF0qSx
z@Rrv#;|%m^u4&|rwRKL$CGS}haErI}5A+@I%Fx`N+DO1FG@J8I^?K9Xc}x=qw$MkD
z&e}wamBn>+WC*UvE>h{jW0m4KgI~XX9Ug86R1pv;6{A&k!OX9yKmpVk8U`6vyTKxc
zh~(r$Ys{OsZ@Ziv?WYL4AXQX8a!q~g>(LT%(HfR`Pr$r6-hWjGNmJ+OlpuO-;^h;o
z-9s__K2j#NhgEO<AG~7M3J)%_rJ=h&86&>Ek~5TVrc^YdKc~))K0=HCasK_u3acYc
z!Ut3WK_Unk9H1ruf|8>k4X2^;Q|{_R0jr<y;4A~uvU+i4P|^rF+oRHM@qFMo^km|!
zFXW{CnVCV=jD7w371F_P=p?*BleeFyg0me2&49^pCX{kUi#yfhMD81C5)vCK^;!%r
znSIe?cL{K>@htacPjlJ~CcKtsd_Nq=j0w#fKPFfDOW3bQn23C)6va?K(eTM~a?a$$
zxcJ(aROXyi`R{fitZe8QVV1>dohfjAB_Ib)EV?c!DOsN`<qz6XwMUyuCf_3%K->@1
zzP*6Syas4!;P3!k9k|{6Oia@sO1pqHdJ)jsdy|25-g}SClmYbtdFgeacPl6GOuma&
zA;8Cf4_B_?dTk8*gUrlK*>6ibEUc`=*Z95}Gu?<iP>Q<lwAF|mNF96kdyK|T=ZTl+
zI&FQ0!zYo3?uH|YN9Db_cm=~`)S1(G{SGDrD%q+y^TYAq-%A+i)9^N1Jek*>-st1_
z3OT+-%DnK~MZQr{QERn_wi{DTpoa>Q2DkJ-QHpz_3XN)B^D>rr?r(C}?lIoH**?7i
zqcZcUS7ixqg3HQcM}Rs%h(<wXkQY??IXORQ<!gfggl&7=EfD_BQBEuY3B^SUM0iUt
zY`xcQD>6LfT&mx%-7Z>mEW*`(VWAYO;qz^FD$&Q`v=K4;`1diA-KC;L!w40INBbu!
zA9Zr$#p&ViBnUS89P(3HiV*SbT84&az_apUI5Ym**hpOP+elYe7k;Cs4b%)-Sy@TR
z&bPK8-39oT8#iu9NlC#m0g$Zw74tz#0t#an>vT@9QBqQZR&7~v7<W=VER=5jnkXYy
zOwZdX>QDAXsL-0nr7op~c7KlSt_ge_?}0(rXwmedme#!m@CdH6pVjhH;69n2|128v
zva!9F^2fbI)t9tBMoC6{_G2{10qu@Si;d@<CG%Y>%vj4k36p2n;`Z9csGpzQ7VR0&
zIRD}#S8)ya<f7kfySw-04br##Ve?;nGSrdBb#W=JWU5%o6{+79eC}n%K$)-gS$km(
z-P%Dskie^0vHi01qjEh)!)0&X%lCrLkAkW?8TGRdNRsI;3Ymp9uDP_Bbs%%iT9zK|
zaIn-VR>cw0z6f*GK_34~9qlkc@YrBq2tI2VI%d;<yufc3h^e-_#e{OXbze*1UWfH-
zP+oIUky0ZD*;Z1As!@;o>}aAVrHRLfD-Uw0*26@`hx1}VNl(Sr79Jz}?X&4iuQANK
zW)(TsI@Ly;?c==g)fzt0jgx!NQ$!b4OoA2Q20Sn%Btnt7OscYZ<B4#U<Romi5Jc~Q
zQVB$yS;u`53K#^&xGSQw(M75frNW<Y(ei#>aZC<D`rx5s^6q`3JglvOK(|b$>3t-c
zRGmd)d|c|^gMvV=Q;6E<iG0JC0p4bq5di@KN2T>K!W@oHPJY5q-hW5~#Po=*8&nzC
z+ZWZDsU!ROtK2sGuzV{w{>-n66)*hoM)%nr75U1w`$lYW6I{5o<tA`8m_B_e^#qqL
z_2-G@d0&pKi~q30WcIpuq0jnk-nJoY_g|R`t=Xtte>Zd!tx2=#xZiD!;1&zDD1@Oq
zme6)%OjnF%5%hYky%WXDvWZF-{=MML!pv-msRVytki0+7<jeb{w<5SM3)-Y4;n_m0
zRMa?Md_p95Hat0B$aGz+jwxF*`i$L19=~|a+wu^GHtS<W(1LNv0juqoRPW;p`0H+x
z$lL=~?m%L31j>sqSn<EnWnHm=fk5szW*hGfOe4&iqax<lEWv>MT<q_7dw*Pfdl`;R
z&VGYr_xXt(2miQqR6bPpzC>)u6Be!V7y2OAgB{}EPy{Y<wjTe#DY>7>QUrgJYi8Rl
zFm{SPaT176xPA7~gL?tS-jk)d0zr>|m@tNwVx~hE_L1gX;5@TDgKgbrBRB}V``KF7
zXJ3>T9}>E6i&3>}zl!tL{>51jz${iJ(>?nmUgodc+$5APxc;vu-ai+-0+*58L_YI+
z3D>Vb3b?liD>g=>?Nq?;?EPqD3<S4Y?;++=m7-bxt+?POx2>U1FG(Bce~<oK%{wpt
zYc#qdAvbNSG2GCDG^=a#yxC7%Tp{Z+(RMRPb^UzUHEYuUwOLoM{8bJuUd0rFwDd8}
z8;v}p%?iB|%8AUag<p;TeeuixzBuz;fZJI1&#Aojd6&%d@zlAt(2;t{{=kGv;J@$n
z%J}!a346BUbe__lY&yoF{NK;MXz@n7=`1q4+oTU)MKoA4rw;4fL|lZrHh9N-<6~l;
z+YI$ssy{y0{QK^q#Ew)P9#8CB`UmH|kw@F}Zv<nsv^J(MXL~jM=v^2{E8_9Ndl!>e
z_}O-Llo)|a^woCv-z$n`{%Z;w;GM4VBjm&0{WmOyhFoJwg!a8<pDbt3=SJ><+`mET
z(bQ`g8S>;!=~a2$XZgbam?lCauU5bGMK~CyeQlq>Oc;%Rk$(m&EW5w}wQ<*XLnrFz
zzc;)|nu;Qy$ZY`^?7yxH``2|@%_|m*PnK1tM|cTE&&O-ASgSqpn1;drKn)Y?tmBdF
zT5D&E1~%iTg0>T_K4yVRT&%o3=UtZ{;%L!{Td~CO+Y&7by_HAwUlYP!c#gSWoJf5x
zv|#>v<9wAo9n~L8R1Sz{kioz7Ur%_w`_~imdsdeU?`O+Ae-%zI{QKWGmuOIMv)qc`
z;?6$v%+3sQ|AYU&{6)2)_z8(gU!wEMpYv5U2xGLzRgZI(B)$J^{pWtM`qyFt3-6}q
zbQsDNap@-KGrv%FZ`(|8&HUTK_UiP1ecT=tRA;6JbN{4#dHh~<Ho~s1VJ{yJGc@wV
zhd;-DUP1inFrBymuMx(-vCcc=r#R~B#Iib}MB~}svdG&$RfU6yH<S$w`)3342L4+k
z>JkNiBnmMMy=X2)9i7c&S{Qe`IRxP~W$E+T3j5LR|0bJ?l=uU2Cb7VKv@<ae{(ab~
zYR};U{}cXI8<l05^f)`6vxWx!8$PCQCAu{s1F2Y)?ko)qCqxS138HgK?HTH=aw4Oq
z=1k7|!sEY%O&XQl7^BJ-C}NxO;K_Msv{V~;k9`kZBfT!47VUt$FLJI4Q``G*HV42K
z?IwX;;fU!Pcs}vX+$%c{aPJtp@jn~>H}e;T{#&gIgH_3aYgnjbtt7!6CmwjHZKH>g
zPxLpoE=omT?YgrSb@KLKqkTS<L{*MQsn;j;Y+sj5?;Xd04k*s!s7)ZSZ=L;QJ@;=|
zrv3l>xXqzp_I*4dC-VEr|NMo|X4mbi-(bCZLoYh=Z#uW&{p-I|Wp##e&(5Q@ibVq4
zON<Jkyf@L_Y3)s)auMa@Qgd7;N3d?d)O!9nf5n|{n}e6(5}1@u6zuHh=BsXHq@j8r
zTk^CwD0t_5we~s4{9A$&Y8qeTdxxr1)EkA$>RhwckjJEj@h;YSpQD_((x0gQn;tZ(
z|3>{7>-w*)<%@_r<MImc&wI~q!Af+OxqeXn%=3b}*}{K|>Yyql1T}-<+d>EK=((XA
zOO^<CF$jM7i0POn<gB%C|CyH^w_68(IHB{yfuu}ju2mX~sxyD=o3s$zHL9?)p4;#+
z{(mdvB(Cq+^MJR0u+(ax>wG!Y=ow=<TXp*S{oCJ!PyT!QT4mnepX=G44e1zfy#!TO
zyrE?>3s%+p6!pj9z4WdB^)KCj{aaIDkcbki54iA7;GY4%A8;d7@5_r)`4hV5cC^$(
zk+geW`(ycsA2HRmD%P_6aH%f+mHXR}%rDc45}<l0!em_*M^o&IYTSG!xIAdTd_deH
zY-3$k_1E&<VC=1euvkHXtVn^rj)+Ev!&P}KOT?Qhd#wyL#A5A|Rt?i}%RNzw9=zmE
z3oOeSZefLqD|sBo-rW3kN1GxhGdL3oxLu%kK-m1`)vdI1K_5?fwwDrnL@^k(n^e_1
zv&Ux=NJ8s2b`6ITI0)2|(vRO?y5E4z52$h3OS7R5M{&<|XLyLN=tu;F79tQM70n^U
zAXcR6m$x2G*BC$kib`ap@?yct;VAvTPu}Rj`X{5+EiH;WevF0i^*Q$W8XSFiCS2t(
zp~xb^vA3@3ldN~Hpw)mfO#r+LH1c(eyw=3xl|5A-jJfWH_*z1iUT;uLGoeN`%TG$2
z+(p#u)YXpP|JOE>Mnc{5_YoB|Al*9w*)q@R>p%Ky#tY#8@o42<N$i6f&1tV)>9Wqh
z_on^-$7E^v)g52o>#alFte4oM!2fc5rtT%QK634)#G#Vq2fwsK4&4vu%{_Z_-VV?)
zKkLKkVj<$XT^fWhU>W@f>i#_{)sSfHIWb~l_!R%k^S@QM&mTr@`~MG1*Lbz&Lu^4Y
zakZAHu)6mAj{^N>5!I*Sy7%o*L@P%B|3P>D_aHNSJF1hFIt;{KtntfN&Hs%Nc_@#^
zMK0r)cAi+TR1n<DZvN-eOF|<&SQKYNM42WpN_4u<%@FH9enL0-6^CR{9vC{FRQdVq
z$7|3*$8#BxdNt>IV0+d7`=fAKEt5qq&$YtzvJ17_kKgaCo_%k#)QuEOJxoLdGbP<i
zHy>OqG5b|&#L!yYnJ}%t!~7#$&hx>hRTL#!J0j73u^u`ea8(7PB~As`e}g_Qe!}NC
z-hLV+idioCY||kSCzBG`#>Xe0|BdvVKOUZx8Zym6<lid<kw~wO_>CXCAtZff>3l~q
zY=S{eepd!xd4%Eg&$wTwbl|=3vF+=V_l0gN*PYx)m~5Rto{}m+G{bDu|B~*$r%~K2
z>xtU=uT!zu7z|mS9fDJjnDyDOi}_tepE9;44|-7k_*by8COZ&F9$nps#YYWp?uZS8
zIu_xF0>%g}hP*H1&)~r@_a#kAhft|LFYbs7T)NdF4Fdb{+MgJNQzmQZoN&~*Q=?Bp
z(^y6EY%aE$@X%s2U>n*Ze5J?Md+D|$s`89b>UX_fjpHv+P|S3xptHzOsg<VZUAOT~
zR3LySQQ3m;8imq#RG1W9W8+DL1f3hqb6M0_QEC5H%6JxHn7i&XQ+rX#S3y{{(+%s@
z?D0#u>7k)tzfynK_)=9R<k5Q=z3VJ-ptziNLL6zlC)aOo;e>xS4Z?(Ce%98CN4IWc
zuX(-4P>NzMAAVLkG}xT5aU_M`5NH_pIJz~UZq+SIoPW?po!mWjU}-0Cg97^IVvAI<
z$^Mw}rdv*|@#*JTgYl;u2U6;PtMs4|>0ceHn`99IiE@Z^l`1dCk>fxgY{X)r&+v9x
z;T<=(2Vw3e=dXz2QFGEdw)cZX8UZgVn7q;|9^W9e2UH2gC_u)#r%Js3MJFAU$yR9y
zj6D%*qF6aWUP@I`>58FitEwmXh>3Wgp`ZPi2(FvH#v%r}8U$zHg102%gLc7^qVUbv
z<Nm9*J9oSv9{()|O=o$8kmnbebCPWD$=M7jK!BQT^V_w2+Q;=8TCo})lz09THfJ>y
ziO8vw%Voq&Wz~+qIp`pf2Awis^pGDtsx$azBFb$~-D>{b_~`Bjx%uw80wYvn8%J;7
zH9RV5NfOC8Md1r*Ot#DXW@6U(2uLP)=R8q;U$-5)h}J=n<KN<6x-xy&Z@R%eTaa~r
zC!LM+HnV>N3Nw|{Uqtk`FAirh(ez#PL4$ao_HUU{6z=rI*R2`<w5Zd$Dn$CxmXB1Y
zMFf~(GXD*Gd<?A{!^!53F;&M50l$LT_^^J7*E^Yz(X(P9gm8YuR`w8AwkFeS{jGXT
z*!;i`6G6ERA}};}@2Ngazb#IJO+ndi!jpO}oX@2A4exa2+`b?pBkP){BPW^kr|LE0
zDz`XFZ(~$K^`|sC*JD4QdX%FO5o7X^#6Bw`{iN$OgZ4TqeP{9HQOP|vVsgCehKKD~
zhjdTms)J6wCibL>GMDi&ie*>dIQVQmIodrj{=Vw=Jd6+97c&9lEkZugrlWCFlxOMA
z^j&`h#>7e8xQ@?B^5PvI8L!D3JSB_R#`I~55+hr9cn+PyRHD*P-#d;JhFD7Q=ZaB*
z(2|h7B}Lq!yS}>P?{67mlY!gnIcc(0VKi)z;LHn%czX3d;ag3u*lD2@mEC&mzfsKU
zNWiX?R$jyfrto7Vd-qm<cOA3h-XL%i@G+Og^FaW)#PB$tx<#y*n`_z(-w*lnv^8A8
zJg|1>%O<)|O=^&gE<?)?fgmSXeziR{h=E8^X>l;znV1qHx@WvoMcMrtFO2WV4z1CX
zjg%r;@8+rZ>}I9@p)QYq=T7VCf?I`K9WjS5jW4F=y%HMX@2e7sbi~ts7u#LN+PqbZ
z*e|E(<Gcz<1PZ}~K-eQxH6AS#Ww?lIDq<u4nw#ZQ+$gtRt}<?W%}w<&Z+O9rNnyGt
z^Bsc2UgGFxl#1Hdn^(u_qqbj`A1S=JW_Lxd;kLqqBQ;`dMBm)nIF3S8fZb5M1)T#e
z+wJP0D*n{De6&1%<l}2NNPJT4N8v=R?_XUjqQoOQLqb)L$72K@FcCG=(IKsN=BWQJ
zt3M~ytx#>ZC<W2Wb8JxxXug{BsUt(1raC%00DXhja1f%Fk&yu@|9(NvuxY-;4#2<M
z4XZ+JFxFb1t}eSzKN_QpVg}|WB)eo89gg-kfY>m?+%mlp92{&Z(6&`Qz6Ow(O}GZt
zTkEzqc63zzmy%2xBq~)>MXh)!6TYWnON$GO{SmT#7JJxWjD<LUX_wd8si~Fm)E+}J
zSMxV$OX}(Z#)P|i{l)N#{Bvy{%C`v#?=?9@yFmwXduQj7AHJtfh8oBS`bpNC7QYpC
zX?t46r|3m0Y9Hf3t-}=$;)_3jUPe1xT7I9~njE*Wv4O<%+!myg#()0?jX}WNlvh*;
z@$)~Z5twM1qNDzhS179LsJH3n(@=Bhg+O4EU?t&@5cw?xBw%2tUzRL>`rHP0F(8D9
z`I;uF^J5+gKYN6Snjt;r`x}UB_4qF^2rsWIrel8RVk1kxk$we1hL6ib>WeM8pLl)q
z2V3;>&&CIY6MhK@p>!OCd%4bCdKEPrdJQ!WNm(9ENgdpD1QDC2sD$bNY5@!~N^TmO
zV$cvDFh*diU03jUuOO)ejbci`CW(^VI`j<fzJf_Y=1cIY!a6V$flE*Ho-!j7aSKZa
z(;vH<5FuGZ??|tM;6UJ#IGaTHwPU`A2V9e+z(68!CtjPxNU|X)F}5Ygj(zVW_BW6|
zbt;msQ6)@DT$Ol}9b>_l2s-5^Ji5ll5<)_j0#q3EY26?)`{Coqwvy!y6eRBh``-%m
zYOzFDx~&<3G`^kP+Jhls-qm|f<F(@+AV2HhFmZ73Wq5^_n);J|0fIhc3a}ycBrt@a
zJqd*N_rP-~56To07YC_O5Ts00hA9Wki@;Cc2l;(zF!_ln0-!)TMdD;yLO!d^wZ@PO
z(q=~^-N$c-SG+m?W<PB~H;R<%v{cgvL?F>6pFVAa!fJ4)rC{<yuB#_fXpKrpA8&7e
z8eS1h?(~y1dGu%!Slpr@61q&a6VfzQU{Ef9?;fawgM6_=;)fG^6k(O1lfE~7%umNk
zWp<z{^}ot-7WRT9IWV)4!22%v^A)JJ<L+}QmI8}g`)#wEfTSK67#R0HTmjaIoI0jo
zeOp_|sO`N#Qt6=H1x>mcw(a?w#8s%72xby&dhC*GCn<H)C#(0`?$kwAp(3$<=(vPs
zvR%J^{f^xjI2kO_xo9-bp<&d!p`jtbR+32@a_O){?>7*8gMcP<dd|SoV?82V|A9i|
z@xj5OAsawlh4TlCDRbF)ieYML-HZ+SV-#}r0z-0gGN{o4;uz#S;b%S&2LbC{;%Jf>
z&>aN@_gSKWl#{3or7+rYv!=!GpW(8hD-F5g+eeL)+`|)S8Opek=F}~tHS9-oYnV6@
zQY#BuC*?jYb(<7IZ;ihXh>79*zIz|_Vcw;>-l58f{vD^{P3wlGJ9aFW9ZH8r28N$c
zqT`i4GMMA;wXoX04imN!3>|u_kWH@Ty|r^EY<H(kV|KA%*sYnxMlin<pd!<GdET=H
z+%#(o8m`vdKf=GJB3v*1tg6F2mUY|-zk=Bo^-)JAgRz|hjhj+W`r=_DY1B>foYl^w
z<;^S6t@B00-JtCIp*(O-fiv-BnC^s!D*Uk82oYrS>~`Nk-AUWcH{NzAr=`ZFf%5Wl
zYtdBG+!$rFyvkeD%51y~rt$37)`f+L@Ip^h`pEFkXd9IyO3d{4pBURkTNd0?qrwaA
zz(g2UQvDwMvE+QKv-7HvXYsFT#(?BhgT0>TYgEo2Rh>!7Ij}3CFJ8qx^uRE2zqQv$
zcN&!Usd>8fD#3!`UunDDYwS-BPv|1A$2VY%(A74k9Fxu7@wu4S^?YKI1Fz`Dvbd~z
z%wxU60i7xrr6>V@ekXv_7;-%+)3<*5v}n9!Xt~UK00`xC&OmDcWZcryQr$s*-J-n^
z!OO1)2L}L&glQ<8a`Jni_HV+a!N8`abtc6qX2lounk^sHkXI=vY)2|cLz_A*N&)yd
zaOhYmDlF`=v#1r{{#bx&4X*6e)}DZxY-MHT-Q>=*b0T0Ede@Dc^Sx}G1Y3VrBn9O7
zgJ?SA{J=S{AhAAQ7joNF2Of|j=C0Y+*y&;*Cjfn50JO}Fg_jzjsA(xZ-R1|KuIA-^
z{3my;oxk%Ftz=X<a0cpwbq`o6V8TCm&<jB26qPw3j)P?G+}s@0aRL1V@x6o^AKT)|
z6C9~}lIyqhF9DGbt#JXN%tF?^m6sRS{#Y?endQNbV?G;!?GgO;qI4g9Kur5AfU6Xv
z&NiFUAvQF1ixx+0Yinz9aWMd|{QNM0x>q{1PR~V7@XXdW6%?VNioD6x0rm(72M04V
ztY;OVV_RIjakRfh5sK4fE<oiXOyj}nL>-v=`UO3({I-?3Vv*D>*WNTg^>I{;LZUBs
zZ^>tE&^ODAVWCp%T>O*MqQ?&ib`C3Fy03Ph5+Xix8BwPgtl#KmVpYwo2`0H5-ctCm
z3iOn_QpG~CS}aO)^Yl3>fX=`X+cs=hH##^-8QyYcX(Z`?^!85g9+CxrgNspsLRR9g
z=^GldD$@s4mp4nCWD{$KM()J>29nRx8R-K{_p?<|Ju%1|icw%sj0z>H<94~jYy?FB
z1nDe{2mY7Ql+Z^tHp3c^+vEhW^=B3qb_2bdCQl<bJ>(Pwg=-25Zbd2>R=Ga&@~Q#e
zU4DLkMuy+&pGFRH0=O;-hNZK!699B8#?&o}QJP%wmvFH>#F0_VA(<*U`GOi)lIL~=
zb<9rp{xW70W{0W_Wjbv-qK*4x;OCuMIbUjL9ehl36R&43-rnAD3R0RS6U9s~ixwa!
zd{}jq!K`RkyBRXDR5oPeA(dYLm~UvJEo5L4wNp0mR%H%mxyV{ISwh;H&9j1-oBVF9
z*O#7l*X(7sOedx#S2=9`scEfxxbp0bs>&?N#vOV?3(W!oAa{|ux?J%$uVnSSA6U4}
zkFV7)o%@a>M5@X==t@8i?xaGcMrzVqV23~HPT8pt3t=RaZo6W&xjKVK=lR2Dd9c3T
zw|BN=c!heBS#Rj~$m?g%QYfi}oel(WD!i6{b#y2TQEvACf&di7?07KK+K`dq%;B>0
zyKFx(G3?hbKqqs4=x}<QmxtC+|FJ-bi`C)~!Wn8BtnpMJF)yCu(`_<ofcpgwBv#|A
z`O1n4-gO^%@PMC($K=NnJI>cgWnr(C&DerBZyvC!d@3&&5l<qlG%a?XUc3_6V88M6
zM%qACbqSxx;op&M=jRVfRaiN@I$N8j;_J52z~}?%<S|vBFL7<3p<^TGT(aUXU=$%z
zs#A#V=HGIObvJ%|>gSa8iwkLm)@ZmJed|#_{)-~``TW@xq?p08Td^0WBm@=!IDFMR
zzYK(~P3e;n;sSONq|=Q(H{?=O?ueiNh=@Et*j~`&aM|B{EJXrTsF+bLNdspAF92jA
zAhbr|bA)7O`&kx9kG|3fS)46hjq$%^8~Jr|9eNPH@5vL^ykGNy8>>}oKuuDL=K@z@
zXvKBJ9G(oGC2;C=jNIi42f8~tMrxkBSy|-*!|8JHNR^wNz5V*~P$^5a)#JyrP!MWp
zU|?-+y@e9hPq(Zz)FyAT6v*sTD;s3G9h1$hD4(TXIIyO3D$1g|EW620tJ|^MhH6$r
z7HBU4$Z=g~<nB8Sc7W-OjMxY!J6Ok`ApqWjfe=o?^YQd{7_sZRNlpL@BZ~Q9g?$FB
z3gHY5@iZNB^yjmmUr)<EmzBK<3k%5%>avabDa*)v7o;Vda^p0B5ylZqb^UtdGS3-}
z5uAuHI*HozA0w`KsE05%PV1f{!)Ble?d<Nhd-klg#1L#F_+ld?9beZ-<<!4U+GnWE
zudNLn2?6ePbF*Z8P2&0A4+BdDU5>G>=D=A4rXqOmm4<)!JwaPO7Wq;mL|@L#%nayd
zwy|Yu`;zn|_}IY1614yF<qO<$rY9L38*5f*gc=CwYsDxSjh{a?7$bpIBc4_W=3#n+
zy7I0uiSy}k`FH1llNDm~<k-`tI+h2e>q>PI)YSSFw>gx4es7H~N}l3?^pR;X$afPH
z6B`t0uMB<E<Unh*J}o;q_J&kMnLZ${ay6yPn8*okTn+}kbTU>Ia2B5_0F{QHAIy!J
zU~+V;d9Eg$$ij=vI$nF|_d(kligexF%IU$|XCT1`Cp09}k}px;z~J!kkXV~@*xTP?
zs=r?=t$Sd6JUm-c`a1x(gs4{+7lqwc-;UZ2{N!vadE08<Vc`Te4g5h$LYP=U;kh4>
zbjse9-)dg+`7=d<es6DYv2KTHu{mEN+=MP4K0G+UXJESx6QQT4XLR(L0F@H@=kMRY
zVZ#znyEj(|^$KFC1!-%kciAWuOt0G@uB3OS6QwbDh{YgPS8mo);*%h)I1=Ahx)oOD
zE->}eNs>2}^=lPtLbFCBeNJ0aonzw8aubCEOIse#1g~RE+*RR>)r4Q&P3zv>-5oeQ
zhAm=lYv{wT61EWZWv%|}cWh0*Mw;`nw>!d+0xS!%$VNsKa!}5^1HNuqnK}~o2&i5D
z^(!uu4VD8K*xcL;<phT##)p?MGCA6QX`90#xTR%8er`j717D=BzM-=)HE^{Yg{YI-
zGQK+&tC65;$JPpSb1{6oy1JlxN1J0-IwK;eWyKX9Sk<!fS+8L9;lt!fd*>%P1!cD*
z6(F)2w)qSoUX`334=-S5$;dPw9UXOb5urbUM*)S7sGp@U+bSv&R6x|<yRI%LC+G9$
zxJ0=hHw!d5K7RNRlsPyse9=RULGlb|`bs(j`Zl_;5lFcPUy*1H=O=UMc%#MDMZnp3
z)Og>#479L)&#tFsEYTNo8heI^t$8RpDU|L#Kb-n1&5{<6_sT`AfTxP+c7)->FMrD=
zCa4Ua3;)J|ng6cWDywG@oc5lcjNXNiCR2J62y(jTV53Ka+mn;?r29f=<jd&_KpEb-
zGXSIg^JgZSCLuX_T4)tWBBP_DNv*2tG3I=^d5gtnEwGp|On^ChPQrwITTbu(-(kCC
z<!p1lU>RK38ja2UqrN`PN<(=Unwk^nKc+|p9!he#VlWi2w$^sBB>58GCMP$x{s8m`
zb1SaS?fE1z35hk=DiL8}ff`e9Z=<MpXHc{19KklI0%M7GpN-Xl$2ts`!K@8$DIKyA
zpt^#cQByNIw}s(5zE-Zw*<=c{N=vKIxGqejM3*z0Eu>|7VRf}xmUrKC45kW9t$~3p
z(KMBut5>f=0S&|r{ZCis=jRs}J-FhFRC!Hk{*Fk#mI-mKZgv>?`}gm!U)q38A=*QU
zf}yXiu6}Lx9U`YtsLqj$oju{y->viTncrl<A-;7tObM+4Two|1x*WW-<03@@edsBk
z1`g+1sxD_75)j%zOa}VE*}Qb0b#(Xk7N-plfBhQVFY-{iK{0e<YAVd=NqhLTjbIgY
zR7puxr$x{16Bm~<T~4D4dtg-=)p>h4IP^?!%sYQsSXkKE+0ilfDl0AB3hHJ~a-v@T
zEw%ncFAh4JBX)6l+3?}Rl%%A*f`VsHpXTM}O1Zoxxg0DD8x+hf0jmB5XG*zn2aO>c
zS3pO`x4Vm`0yreRC2L?Qx^*T+#7&tcdU)lJ5NI9X`Ada1Y2<3!2pVw265#p=M8M^C
zz=%Gqbi6?k+W+g9Md=V2Sn%CY)0YIu*6hN?9<ZS~7!aQ{r<zys>$a-MC6CLDAB@n3
zArR@+_ze4=;^YJwc%2YQ0)-p8z{Un5R+i}2Sx4|qZJ#|`*g248R0j~4N=}bu%wzXQ
zkE99*;@f8p%5A}CG&T+obUee)M)vfmgBM>q)HdtLLkZho^y9=i_t2)x_}DOy3$*jQ
z=fHKVa+rPvg)aI94JP(hR*G8vV5!#>M?D03UNn^TioU(f`_9A5iv|H;Wtg<Nnwy8m
z5Iv&yCU*}Zcz`eJd6j@f!=~73{+s`C!G{kqNc2@>&mM>;N~nnfSy@@(Nf7vw<22q;
zQ@owrIWLksumrOnHg|60T295N_Sq0w#`gnZuV0JaZf8Qvsw-4tQu`A05%!s8l{nUC
z<>sx?yjjz?iqzz|5-baAI#<&JxEF8a%^*GS-~sFnUzzgF`L4Z@S0{+kC?CFs{~|9B
z@PkxuR+pDiqN$bw;70=(e%bXoSG<9)E<mAy&}dpbv$b!+BP1enGlY@0AMg}6q7}=B
zJGYLYV&~hpfc9B{!2p5%a&YxV^K}TS?;QzTZMk}2YuUv1<GBDe5o>g7SXfw7Q<EkK
z6l}xJv$VK~EoEP67!U!U*5r`G*E2T0licZA1$hd{GNq`n;$sp)`z|5QPbUny?g$Hq
zlu%$pfCTaPBa@g>TW*R_h{=UhY*8p7+VC^l3ZRodepHEi2eTQ{Y9~kTw>lQTXIAcN
ztG=Rcxe^K|Q9(bc>FDB_bHQamqXEXk9>S=2IZ1(BPD&;=FRz3%tFUIs<`1BG9z!r?
zFVq%`tkl$8x!w1Np-mVcQ4(|in}6=IzDe+$yYntlU?VtifW6Gfz>t~5XCMk>q9o+v
z=GNheg^iiXyE%C3N41B#E`VKwn@k6vx0-Jr?~b>8z=7typf|a6?HLWkeezkwhFtl1
zPX&|JL)+)J`g=4{_5PI(Vzl9{<}eol2m}RrnQW%I^^nsOO!itS8(>4<u^mZBOWQ?>
z0&I12q|j@{`qHIK^<PW#K6w~^UN^jUPfs{yY;drCIv=hTVo8E42OD>jX#@32QXeqI
zdn`X_as))2)yzYB&x9v^+o_T|)zXw@3hwlzq|g9pymSasJP-}jlc;H30fDQQ7XHMJ
zWV>1`!Q__a=IoptUR&PF!Qe`4ijI$t9v=lStK)Lsr}4CaGenYSpKrw?{Up;hIiOjU
zqpL_XADBHs!C%l{P{~(Q^Tz#yF<f92%T@feBU#0WFI}dn<n%+VPgQ92zU|8|X5h%;
zGNLtdO^ZQg=I6pTBv#=PB(MJj>==-A^4ZR!16&M5S~<OU1q1?~FGH8Y?sIZ-GB!2_
z^JB3{Hgyi(x;s^Dcw~f{;N?f7nlF`=z~LE!q0z`aOLfUHMndSzM*gB9y?xeWtMkr^
z>mhvhx2&*;$Zl2I-%H9jU(FpQJSa8f>h9`F>a=j~mBgDUE4%GH0=pS_uaM6*1zab%
zS#EB-Kw0VPf?QOvY$oi@kcHBs-qwI1_XWMOipsBFziI*J0%<-3J-)P))7Da99T9?`
zpC58ef{Dskxs7bBtpQFQ&uwgEWCR(%^D+8VPymB%1d{@?2n)_%bUBkc{~WJ=enNnQ
zNWN$ieG#YiTLnSU{>{2its4uiiZX)93e3&T&1`6xnX`_;nF-x<>iJ}H8<6q<z!117
zu$4jQOxo||NE};%lPj1E;{?^YCA|ypV!-l3oGT&{n#Obcc7Jd0R`Yec^nvmQ6Cr@i
zfc+6nZc-=*hh}T88=Mu$1;CFQ8vs281qG34NJ_*D3k!qu3-%t20b1ix*fGkrrns0v
z5qx{@SmXxE)6p?%&b@qi6rl5*;Fv*16Enoj!Xmq-<``<V!KVbGE;LKxaHt!uoNeuU
za>ZBKPrd->{KPZnAxz<crG6E0XW>z}-2qZ5a_ZlxU19TpUDWjc{m#9;J%C#CP}<np
z5eGI_8m>Y>)7W^nMxp<LWijuBs4lE7&>HkN82?~agkAOP)~Om>K=!4+6JWigKWTD2
zE(J^zL=ilc)HF0=J|{<z+I%KxprbPYkf_s>wNqMRGdP9oA&#8epPZb8wN(dHF7149
z;RaX!LSjVVjuSPoiM!GyfGSHKEGxb@;Sb@9fPnMr$X&&#kA{^c;KqTMUZ&p@v%i>M
z_^Ypvlao_fRW(P0T~0mB5_KE4waHpwEONlfKn7bI6iI^{hyc$t7I{NU3rNima$^9J
zJd#vcy@_;kDg|WJ{Cq-cYM6W$D#>wqu=t!Y{6eLnSocGAHg5P6wI_|}`y>I7xq<cG
zU{RXU7T;=a1gr1wUsv*RIIn=Q1x|gFDI|+wjxYv{)OuCva#9fBIPVFUe4BRYz)C@)
z!4xq@CL|?Y#%_3827tSG<PGKMkmg-Tw1O=Kj+tWN!0^gndm-x3CJ2fcBY|?4RCR~e
zS1Q&q47W2p`de&9^HLRR@@r>`xru9bRn;LRjUX^|cHRaQ5iBQ2FOU<omUsfOzM-Mv
zjL}_O>@57|`H9!^$78v;$Ao5c?%q$H;IYamMhS&&d4N4rk9ud$r<bb<6`^e8NpIg)
zqc*1M>k$-w;NE)I8RzuE8aFKt$}vq^G^(lU33IjQ^E}=es4-fDJP70h`4R~a2!b=u
zWOM?hGfa5E*sQNF?+h5b38z5D`{`4T<jw=n+AL0P?C0VXp}Bcqz;wBKI*ae@dwSGB
zP%bJY)qV)k0x;%5?CQtIPhY<L{_z9K2>@RPAr4tAjMRjhGd3iWb7DT1muEd9?W~0b
z=Loim`EsShw7dWn4HcEJ`{p~N!qbg#pX!nlNNsH}^epW_-rU;Gu0>x!C}Ov&J5?Mi
zLmccgV!*X|^X5``i+`l0B__fbrW0(e;BF~PV(s4G8y_2^h49!(0uk|eALc(8r{^r&
zhfnrp0xZlf%Q;wCUATQ&io{$GsNi$mX+YNQvD}7u|6899Boqglkmwh7_wLbZX+geh
zdHF+w$^6Pnfopxwm+xRTJjJ2RS4Sr>Cmj%4;1yVu-oA4O2wucBS~NuWIAXyc{Q8W+
zVnTxm#SzDSS-@np%8ffw*^!zxgmE<uqu#cB+V+hG1O$8n0_J7<U?(@;fpt@i3b@Wn
zxP^@nWkSagya4$LoX$fN3kwTKCPIp$vhrGER<)5GSkFqsk3cS7+HnEI@y?ERei{7V
zGqxBFLnJg8qDuIPkB<byx2I*aWmah#$ouk$3Gagkt6jPu%bz!x#`&!K`}K&F?VJ#!
zR$Yq~w1BVWf<ozPY56zU!y(+*n2|a;`24j+MZSNXKP5bXGd)ImqD*XF;`}DlYqYfB
zB0=gGjJ6THX+6PNN(R~iJz>HF&~TWtadC0LmJXYS@ZyPu1y6ipr^U3px0lzb3D4k)
z>ozWgU-#}CJ(t{)<x8x7zRPv+#*`}@a;<Xe9v&XmMunMdFX-WL4yawq>M@Y9ftf@U
z=z#o2)+e3eyHWYPOMx8?NeS2)i;GF;<VP6horMXnf)R%v5l;D-k<kEb?#@53vI%?X
z0|spbrAQbfgN6ZhzQV-BlyEf5uJ}fyjTD*h^v3LBaNbv`H#=~kl%Zzn>PnFx`~V3x
z5X_e|Vp2ujOMuF*P7V$kEJxVBfQr4d*bjlid36MHJD*1oBuT3uyuIvj3k#tb71G40
zk`Rl8$lJ5uw*1gjJg1kvop1FRJUtqsKxRd->fp&x-g#Q4KfSTn)<%FPpAEH@4b)Ub
zae!+H`SMS?k8tB&zQp-J5RRh@XSO3rEYa*F_z)Twe)!PW-*1J>-jUk6CCc^a1^u39
zEnv5yf1HI=y5|Od{SpdmG}IBw1gBDzHoVDn!=naLiIeu}StLd+k+!8nz;k1a)Ya3o
zva}TPJ`(!%L#XWmBU*qqe7w>L@8L|_EMM1~yu5?s6Bs?isDs19=N=y9a@S~RAUUwQ
zuwZ3tySln+WN29Jee40RB0vQZ802tiR`awT7d&T2Xt`aQCs?nZaHtaF(<2Lb_39O{
z6yY4FuTMZw@D=MbqCkHQ@IMxug+Y@GQe-?Ci|Vw73GS{CER1IH+;8~vCz~E0HmmvY
z=?#<-YVsxI>R>(*i|>II%7f5!ZC50v3%W^^=sa{cYJX^Ot1w&}TUuZ^u5r@R#^zhY
zW5Hw>;fDY~Z=8hMk@*&Vu6R%oYcd7X?k9<GGf>GPyWe<E&j|lj5oG*SovNkHzGSii
zMGiL1_{T%0#c;Ri)2C0s0MFMc<l^LXb^&JnN623~SHnp%L_Zs->awycS8orF@^y4~
z7k>QscV`Ed&}48~ULH{(mRAUb!MzJ`2cV*0+8zAs$6#&Ju#-FERB|9(R<8vLe9LG<
z%<nm?{v4DE&Fo{};<fK3)y2hr!MD~;F!=Xm86#mofdF8~<Jh~dWg3|F{oNcb4v-NU
z-|un*Cob>dQKQ6BV`^&ZVo&Dkrt;b%yC0Gk0hexiFF+vi{^=9GL?qhrXwQN0IZS#=
zN@I>#2)iKvRb#~c8~LJPBBe*2FhK9&!%H6v?+6HH=3}yijE#)Q;yHc_wj{&s-b&cI
zew!hyC#VXHo%iNUYipGoEM;wNxWTOoq|`r}nSrIz5)zhcLaxh{*)sb2hTuz!)zm;H
zrT1p0;5fJ&zkd@T;C7UOkx`d9UkN98kJ$bV(^yzuEn&f#5JN#o{%03bZ-+gvq}gZh
zhE4Co0SHlGj}JlJ)z~ZKB--Y^Z%^UG=Dq^tfqwk>0jVH3yKpdrlZ-c^vkdD~C=WF`
z41d5nr5%wYD*paDDbqTfe}9`hZs$e&f{h5TvR41BV+P0C2Fm04b6zC6yxh&*y;6-0
z5_9|e?%NR%!nd09fw!4@PWyu~jM0GYduT|{z#tS4MjGN%@X{Y9N2&p_@Zu+c{{Y*u
z$mB(j<?_M;dn^)8&R|R|LuHev8B!WjDJp!LKdi(eabhJxn|}89H*Hn7Ok*JQ;vk6H
zm~Me(sj1aDd$#Gpgi>Pzj~I5b(5u{#)Ko@<yQJ_ueN7Ujk~8<`&xmdL_*#ov+0!-6
zXLj{lq83?=^Vdi6$)v%zg!4PlIf=cg^TR@nhiAR-ojg0v%f@C;&9qBwTErC?p>zAN
z4?>)uw+LB#q*MFT|J4FO9P{A812eO%Og77!%ovyu(^es!7BW7KKzePTRigmo2vBSA
z!P^BPT!CXfLBT(cm1pN)<-;-=KVTnYWnnq<{8%#gUz=cIOV$5oaW3clke4TzTu@h6
zr^zvS0<INo29Uz|+9Tcxp}Uxv7(_o1Qs!s@=Cg5fLJ0D0nO7WvU<#R=mZqtx%s-Jd
z1TS&%@F2jyzyK|TQjF;h@Fc;BhfoG$G1wY7`M)~6)uxkgUu0|NYjL1%x$wew<JBHN
z!ACe?n`w={c<~}+G9eD}uA{qGBgnB^HTM^<d{|6W^tCrNta~!{P0=-bAp|123YrJy
z8+vgtMRl~b30`6z@Wi#xLPVgzY+9yYHnUX0k#YC3tC{x&1cqAix4C<Yi)?7$xcl1|
zn@Ly=@2Gb-JZ04>G?KecusDO~WPNNiAw*Q>7B&sX_K@F#<ihycJ>n0}!uQz=3Jbw2
zt*37QGX_RWogAhBT*qEtzdX8BlwZa~QPJ&-E)dz{|EMNp^vNOcV?#(L?4TJL8L-7b
zAP33loZg|Hp2;1L@QP{y*iLz$WBt+C-uHyVGsML1A@5hJD{}e$Ps@kZpTcffTT9z5
zh2@9P)|Jj}_*ev$1>iSB2Ik?zQJ8<cudm|dy~Ju<<sy6{J@OL6H<Qhx6jHeOE7s5z
zDmkTPW&P6|mf_$0UWi*AxXwHfEgFn&8|21kfE>4si+#4F)}O&*Gsp%*umPsT#YJn&
z`aM@bV5L><#v41ZTCuTL5PL1Bdvs}co#go&EFVqkyCyg8y|vy-Qn6cJ-O0~G;>UOW
z@twOXMPHqciSJP}W~u4NMh-3^!Ho@P9UUj@d^pp+{zj?2y$2;q9sI#Apgr#kO(^R}
zLoKZ`T{gB5$Jv7&zZ1XaW?ZDu72nG*PBrNlSBa?^s<<7{jzfR!-#%aPR3x7cM$s2h
zS2|=dwl=b={>;ilV*2*jhzr_N`%(}z6{J`e>n<<Rt@okue{%|jT@E9|o)FSP%a<mS
zIS+PkDPe?;jyWwS)D^48y_3%UL`7)ZVoGy`JykjTLGHmT+^SKP3r~0wv1^R9we-}r
z5J(|ti+EHdOIbMccZV2~E0P-IJ~A>X-D#bVnIa3CW@lF{w{IHnhYYV|*ax^;ljGm^
ze5*OkCuoJ?8}_Ty`1zJhe&?S;y1VX|ul{xxzJy5X3KZc&sFB}@Z+~WIcL(AoH#a!u
zhMvL42H$FGqoRHGtCPCs161n3=4;EFoJ8iGsotlNxt#4U=+hs;sbyh7!RQw_{eK5}
zMJt?3;hdIfkeCmMfEpY^Tqpxjc`M|X$H0>C<AAPAOmn-(tqhh~Rn@R*amDDWE~hJa
z-6&B=oj@FEu4kmD7c8Fz#$n#3WX_~e|K3kmqNI|bRXNCFqX@5q$-e*L3u=DurABOC
zKb@@GEgQUFgq>TSW@b#;jdM_(^{fDPK@^ITCV*Fwu1!fxONzukCO0ohnFYKcxSb3^
zD7Y}47F8hW2}?*gMV@JMV!{OW@E%Kn<jzTZN`699q}2P%5rXr71BOj9(?AM}B2>=#
z#tBHcJ=R-YTl3+Ho)Bk&%9orRyoXqjT4jk|SXq(t&k_uN6oM<Dn{K5+-UJJrFA>}t
z-oM&W@8E@DtJKIfFCBs)`fOkS@%YiB3{8$lM5t#mE0JBa!^N3wnQUMtVFYFzD<LD)
z;r#cf689thB_@%ZUktf&dM_ZL>Lx?&`}79krAvv*EYb>G@r`vo(iuI-mn{#C3U_05
zj1*a-=O<BXV`9(jUaPBD5Fw*9*~n3Tb3(N9E#l<lz8YHj7<#uj5$9!Ta9rc&=9U%s
z9nxIIx@VD`y2`WyzLlLF9zu%5v6A|aXp;;PlD$o-bk0<tmzoOv-SToMzlCbxuxZFf
z3sA}VP<M_=<^QNH_VDytR@S*awLaijx$G?5og$*cNdb&P7AFBpLS5^%?`=w1khP2F
z5nZtamk?^2l03>kfBv+A&f`ekwN8?;w$5*#MY2RApfqZAtOgFVdM5JO+Mq&Ya4-Q-
z;&5cBt&yv*+`k&ppBeBl7wR1#y3R1<x|h`h+bLuQ^7A~vd+=J!QJvh<O%!i}I{4+Q
zx)Y)lp_hn>r%jdd8~DW#^?g@hQ-t&csx5?drAUMpE^tEa2^^WNkFI&|o<Y|Cme-Af
zB@?@-dA)#W$?$bEKH`0hqTY<=6+}n+FZ~Yd(2F&<ZqbH|z%+9Gd@EpaCPXR{r&6QK
ze8xtbhib+!E<}z=>#fJ5iqe8%or?&`3`VU#6!C3Wga?0*%)2^j=tbOop~+xM8X~X3
zvG^{;C{7_4A1z7}Rv6<_@-F^n&yRx@k!e<3ysF>B`cK8A(V`A+jGswks(#}b-3zz3
zvJNA`l#OT;uH^iF*_i#}<46r8o691KKcfLHSJ(SUFwyOW)ZMh(zxqjv%`w<0F3u|J
zy9kpjMs3|S%1dCs!eJ^(UhOj1wH{0+<L@sl6(8O*<Jq<9d~Jq6=l^K>?szKy_y7Bt
z$B2U?Gn<Z?y+Rx$QnnC<>=lwiHW6iojBK)3Mj??svk56u*}d%u^}Bq2kFURakaHjB
ze%-Ivbv@U0_I?zPp&E`p+x%xAuS#;5R(N6*hr6y2dD83a$=N36re>7kGDDdGIR)kA
zU#u>iUrdiLU;kL&?;vXyzGy^TLQAVh_5I(kmV(<SGxeQ~{y$2H5h*+}ytbDT?F2Fv
zoi147Qlv7OOBCS*U0OY_g`&8Ic9!vQUNZoDF;djN0NTeVY9abjR2)@)tbeLZ(Ndh0
z3SP3TuqVK|DV^>Lx6j_bP#0Gva2=g_9QYmowse;;hy$I%Xl-jttopIKEFn3Fa?B0@
zcg+Mfy$-Cxr;Su?l33*`g7Hk=Jm0(7saI-SK4g43ICz_jOHsy5Ou`G}b2i7t2S;>b
zjG|_*XyFvA*i0hrbksBuR;e(|ZX-|tP=3Kt2wHeQKbDM{Va&^+B{78>{o=^nqA_SQ
z&e>7(zvxe)hiWSlSoDj2myPQ%QgQ@AB_(hI0N?^U7<`%Nul6MgjyTl*30rpv$t#{|
zAR}e&_JEGoMOsHkC+~)Z0vm99&^i1t5_wTQfJWfeJ~YVAfe|8WD8jS=2%>A(&bXmh
zRAYXW{)mhngbHnYTReH!$;k;gvnOg?Nm>(AQ|zX{1=U#0(}ohInXILr0Du8U3IDUp
zc{A~p1><%#se?a(scLKllvQj>zrnoTL@54>jlF&M(vGcos=KU9oe9vp3vZ7of3QDL
zESHx6>Cq{6%KQv&1bhzH3niGJ0p0Tr6Of#7Z&;Z)+t+<zC}s46i!=;W@30nY`3ky#
z2j1SpXP+3~iS1r=mjznP_{I&n7`TCB<4KAR_cEj<f62HwZlI7|KiD7P3vRANs`h^^
z{{keLT^$`q%y(kM(`M@J6LSWqR=i&67sHj|W$-ze=uX|M`AyShq6!kMRHY?RyjpO0
zSXeaQUrU~I3tR|ic%;NWLswT#nPsR@Qt5CFc9y9M3ul<Q9&r2+>e5@4EdeKg_V=Y9
z-)5aG|D|dv2Lfao{1xeRXf#7OBqb%Y_n7I8g}C>&`!7s^oMCd3`PpS{QL^9DIdF$=
zwEUUK$>9Vp6Ed1-MH1R?hQ}=`8);g1ODj4}e-SMbNU|l!za`YnOm0gjrX)+sCiCKY
zmicY5bm8|d`L7JU$|@LD?;V%(eQ_8r)VH6yu}7kMPwbTp<M^DxBlYCf7lir+qq%n1
zf{U${{>`+N^*Y^q%&^N>Q)8%5S8pcrQ&DTuQraWT%gs%mJN<7PmC^U-(G>N)g0=5Q
z%ElhJ-kor{JJU|_bCV$AaXmLBYMysxeD$?q{9JF`7aJl3f8~L^tC=a^RXUq(`INzW
z-Q_x)ftqtwB=)y&nqEKuZE}-fsd9<FljX@fwz-mL6*D*ZnhZ4C+ur$jykvJ>^pd|}
zvo`A8@zYB#VeQ;VijRMt$eZGuY4bJ(Vi~@!_ovUM)m3V*obkP$FeD`0^V&tY^QKID
z{YIqQANd<Mm&y!pp^0PdCC((h?Oh4swrv$>e7k)1Z_&FwvqPDGEI#g$2giSOd)BsO
z*S$A_`W+<eYiq;W7q+%M^soS7@9uiRX+IeVrD?G8JUm!X^FwdNN;Ne;o~@$-%mCnw
zKv@8|4t(0}+aL>2VIX-rTJHU>?RMQYDYmyc31Ky`IHT3L5_qh}5!PFD#|FfHWbGtk
zU#ot)fY=&n|A&zHZO#umKET<4itV+5xkI1^Zv;3(LqkJ81Mn!Iwjag__iqFIE6eKU
z<kTjsyYXH1d9*i?nXr@s-vq%!Pz4Gm0c3m+Lb<Tya6~(5olz{Wjpf?h^7XFDB5%C;
zxOZvCij!ttbD&gKPR<9^lZFCtngLHfIWZAF;spOpQ61j!zVkR3a3IPDkk<j^ltX1l
z{yIDg00G$M(BB`)H)v)W)(WyJ9tN_{UAApOwA%w@fOC~G2e|eTs5;)=x^Q`C!EP4V
zuBj;qf&Ti=htA;am4xx(;UNC@7s<(PbB#$uOL8&ojpV17BosLto#S^$lKF=Z2;MLo
z{a!NZP10{So3aHq2p@-#hg8+o?R_9+w-o=N%@=CaOjWFNVnB44{S51jP2}ZvfUltQ
z0Ud_*{t2|W@L78{Aj-+MWrr!iZaz^1VpKze^f7;3Z7uZaaCE~y0U{iLUw-`(DQy3s
zSrD5<+}IShVO%s$!tf^t7Znn02(&H$?B*f>bxFUqe4r21iY$|ph#};cK*{InxeaP#
zKuMuUf{hH`^w7}oN5&^=Rm-QM2gy>6y>6#qz6q=Gnf3GiuchB_8oi_>R)N#zjoVzG
zU5$6sxf(Cia^u=IRu3P}Pt+u#IY4ao$C4pHkOD3AXKyVfQVz69_#D9c$~Z-$zV-F>
z!L65M9$eZ17!+0^Yxl<bI^@9<-dp5(-U~>2UKf1e<Kye5H6?qOi}bO?D!3%A#l^*G
zm&pz9yx?`8q?tm-s!!srpBSp}-m<J+=HoM;YD|RSwV=a<pzX||rGDU*)Cl&b6tjYc
z$_MRO7#V}+I-CAXwEHV}dvNq0H;bL<z(liSZrZu|{McHwBK0}>Y^>$6eOSnoP<cgn
zq8?j>?K6=wNiKrgq}^N%!pv8koGD_r6+@8u>~vDC;Fi4y*+HSLK~!jBE|i{1cd!v@
zA`*CXp3&a8l8~MBa|SyNfg9U04~7XH%!f)Nn#aXGSiq_0D`FK%GYAmWFJ|Ot-TC>^
z;84;M#GII5SOh`0UL%Ol9z+C)QxGmfcgqD75y$h^Ud!>|XauJmK%$(JN;-@pRsKSH
z33Y+`vt%4O8u3M$D`3c9Qf7z5gt}qB*0s3OW(OnZ2xAC{-ItMv^Z&i47*%>OUh|mr
z$~_Pwd=mV7<6pt<c4_t$omTVx`*+`MJ^pzfOrdS2LT3Tj`1QGg5J`z4wNQ&acacw1
zA5=59ZoNuuG_Qa@#cDqonj1I(tE$|WKAs!b0-b3!8d#oj0{h{UjukH_-c`{DO@PTv
zyL890cSlMeKWh03NYl|c*GMMCaOnKE&gc|2f(s=jz&^Nxgb>1DdV4kT=n4vxuk#=u
z%EdiyU(m%7A<)Cx*bo`SMdrfV2bBU~k^8L9;DC*25567jqY4U*SNa4<%p#}}rd`ew
z<+QZi#;WhmaEAeYErdSuDlk4V&36*HbWmGg|L2iX*##;jQ6D@Zv$Hpg4U=EIxcm7n
z3SqQ_cFQLdEG%sbwg>w_8ZInYJ@3t*S{s7kpx?jd08&NfWh;P@wXx{{ae*hurR}Os
zBnvqWm+Oj@v01k&A2iLugGon62cotj&)xlf=#?)8+E3{q1z5qm-w4Y#9$JV@0YQfX
zs({3=4p0~XCkIb}=>%1P$<<qhV`r$Std;oG_5LUH^8|5bhdXRjDc`gT$3SDdyWM)B
z?$q3HExHeQct-uX>FFmQu7R{HClN4oAOo_7-c+7=1#Lc50Ln(Z5$B{)55Hfilf}l)
zo|TfKY##^eJ4+wqQ&r(67#DhNm>C%X7EFPWsaoRj^us}bL96BI?J~)EmwmOX8?{-<
z%TYma#CQ-Fk1Z%)x`=>E&?JkfX2;U*=1Uil1huv**V(V>UDm^vt>u=lB<kml$jZus
z_7)IS=*N6Ap`Kx7WK@nAd!*FT*cjzV4q!8|S^H|qmfwz@BKTjPRK0>hN=rTpjKlgX
zd3bA4#^Zp0o12>ua3yLa*X@zA@()L9@Rgb~zq)#4!#iD@uUwY<XJhaG98I9b42uKM
z=I`GCNnnuQf<o$v8t8_B-~xt1mna;sa;nivy<RBxn@P*v9U}HUtv-+^Ug-8H3Z8%4
zrB0_aCra@h`Xn$9&9p#&3x><A{-Pn$*+TJDZAs=%W}>0@*C-$JXu@5B9TdpJni>N?
ztr0S309Qa``J#V;C%WnF&>#A+xQ7b2gwBo}oYej|A039#HG;k6?(tMh?;Og3cs@3<
zoicRGkQ0$ZD1wOks=c?~{Jr4#op*S}O6I2Po<Onfs~#J1lTG3w{8oWy!^=Uts|B?0
z;rOZT6E7+GxpN4X3i$#MMZBo8$?3FKIirtN*Gv$tI}f6HbAcqNp}WYUPC0ZSfsz$8
zuKhk7yQ*~lchlrzr@2T3zk1c0q=jw*&+gN)`o6R^(#e{d*^LQOwnY81-XL!}BO|eW
zAOh!&QAv7<%oV1GkKXIzERCKOnlC?J)D*Wn{?#IWyb_2II8l5LysN#k%AM3_a1?cc
zc9-JQ7-xzYb2xPD?y5%TqCt%hQmOZ0Kh!MW1Q-AdA9=oMF6P3*b!v&poHG>uI-q=Z
z`!0~ARct7pKKS+p9F~j)An!C6VX*U>dGS#$NI~~j2nQGoyyS&7X+QdzB`Xh51MF#N
z1f0?G$H#ZO-IjdVy5o(@O_D*V7<4cDEDkJa(=J_=XVqys3&&P?G~1jgOG>`Y%YQT4
z*xzjB1-SNfHxf#l(F^Rl)J~uTY$%J+OcPcemVBo*jjKLLgcoE*8eI=JZe!eZLJ9hu
zD;fSSedYIM5eR=bVl$d7`jQe9_}w0W-swU)LLB-bXdz15ggNc`IGYcD6u`kFnAn1d
z6}K35p{}Tyh~I*Q2|PK{Ax{Pngh2|ACbtk=D!KO9q<zX#G)0?Fi~o8(s9m4OOBnnm
zEN0_J_lYHG8O1Ujv#j6KW{C7X=gmJk)cn;Iy%+UJDYoA4SVZWWn|SIb_RNw#SbCY!
zB38LLz-7!ysn=;Q!{D2ZgLQvnrMaL^Hsh}Mo2|v>O(LiHWL<4Kn`+gJVR0ZT6G+;u
zIw`I@Vrum`Zc9A8{pe=O48nq-oQRxA=EH_uyXDOqVyWbsjJg@KC<p}NLbUr3Y&Oy8
zjlk;N*nq2!|M{qqa}IbZUrN1ASF$bg!Qb{#uZZ850{ZNc>pi7Q&5A8fUrUK!D{g#w
znCU}Myl}I&{QlnkgZy#b@^NFXa<#JklKoMGWajSYHlKHo{xvP={8MDE^8DJ6=XH)d
zX?yHA**<2{@%N=w9QOF|?~7~!iB>ib!&vSj+79l^(`;W|yV@U`6guv({r%JWV&7{!
zb0lApY~mhYaf$qG#Y*wezjyS@cfVYEyHCmf{V%5`ad7l6lYi4P;j;$WLEEd&hk<{a
zY6~)Ywlf>_=x*=vPzqfeT-2M}`5nt~d>E{F*y=i-S?4&!_%2X<{7VnXlzoA(Vq;)N
z<5&H|)^dmYd}SKT&nE8uY@hrP+Dpe$#YT^RxOq2yi#4y9`}gg_#BGIta?R9i`-i0G
zVnVHN$@qq|e?`s*k$3LL^bMUnu2_0&mK6Dij6)fHh(m3TIjz6n(d$8Bv{vpVt7l?j
zkS`NE_il$FD7ObTN_j_IQEfxlm%pCq9FHn2t1uq<Ee!N)sZyj2Q1^4~iq@t7;!F8-
zL{9`vlG6Ox{-g0sIoF{|Qr)?hf32IX+olC1zo#|W_Vs=T;`i7UzU^eO86L57X0gFr
zF4&K}?w7*K{_xvg>l+2%;cC5oA_Z%A#j+o}N(QgJo6UB!<lj=64O!+dsyI)uvGG-E
zTso#^PMDau&aYcilBn=#|6)-n1=Y6cd^=zJ>Ce)s*%cpA{k4f?J?5uTTEwaHUs`{!
zbf4_vK^|r1{;zEVPs?-1OQBn=JCWW0?OaNY_}vqu(-D>`)<&os_inQM_M!0h>a56%
z^wINKHMnrlA_ycsB?;|x;*5}2F`<vj{k}tNn(&k_uhT_o-nM}atgQm6`o+M_KD#`o
zGTF?=PLOZTzvStIkL+}jX3R1zU?J$%Nc`!&^WQ%~5z(vl6m890eTOpR5iC_ovxRi}
z0#Bg(S(F8_ghG-;{}ROyPt!s=(c<9%nI%U&I-Z9=<0A(D@_P6442%*5j=1F4wQsjF
zCQqKbvw{#)jIZCRZrQ6X$Lxenz1l<w8x_JK%eud0M`mCom36_=q#B$oFe)VZQr{ua
zHCgGh;`l2K<{y_R67|yuA;8f-iZlvz3s-9<oIVg~BfpR0ESY~tTvsAJgGXOy2B_Uy
zjQz9bUkN<FFo=}(r!9xAezlyqjTpQFjbt>hd0XcBa4jrs>Xl<;jN#2+2Yuu*c>Us9
zq5ijFik*d9hcpVITdA`7Y$jMV|M#~WXhsBwYhJn<TBwa=3#5%I^C#I72U*Xpb^kFj
zdbY6+64XD(Ut;#9w;vj4-xtg7HZ0?x^dp#OZ*rZ0E8@{ZJFyaEw0Hq)AMUsD(95lz
zfb1z&?mn11iUc&!7tmRbP0A>&>8nIqJX@gYgjVZjNUP#4nUbj2wTyAL4K=0h|4Ar(
zyra!;WSCU4YVVZ3f6G)4SCFOY)bv}OC^+zCcj)3FUZh3zP0`ev+0FDd%DPxN1zS_&
zOLt~r;JH5k58@03cYP+=?!k{C<e%*@QN)|$O6zM%rIFC5a#R7C+T5&26|oUqVM481
zy5G4IJMnBYpU^<?!9mLJf)X$L^Nn?N8(#8FO&68c+|HD)ZYumc&>#D?CT5Qe^Ra3X
zs~nW2J;x<4{X9#o0`ThFPHW$n5qfJ~-C2HCZv;1mvy3|4#cXc&)^1>NNX~BXzHjCd
ztW%!X0QbPq>s{aO#g~sPW!mi5eKwx>9v_XC$SD2Ii!Lf*j^+*dF#K=mODgGQql`2E
z<d(i%^y7ntl@ON_r|g|v)ytJF#O0tTFq7<*WURM_r@utJS0@}|TrkOSv(sW^^u~x&
zXH+hl9R0e|>MGjC$}V9*>4DtR(IU?aPL>CFljfF7iJu$nzh)i1T&kYkP**>ZoGIJD
z#}a_bp>Z;g%U|8myp2aw-gnnTRrQXOlh%J<n3ylp@95DRNqOfyF&gL9_VWa>%<_oS
zi&9QO!I}qm%j>loMVU&9-f!lnxtmBiRnlb*-PZHa)k{1+R@&N1d!;t>%`rZ;bt*+h
zO31mch>x{Sfa~s}cTD%<Y)ebX(&6XN{nwd^3k#1cC$dKN_pgk5-nSwfyG4)YEF8PP
ztZR$MKl1jz8{Nrj#5dv7M3LTH^m|~$kef5VzP{qsD<w`&k&8_-60rqh5nu-xkLN;%
z>~mP8_~lF=M>oCRF6y-YI5=26E$Mb6ZA6LC%av@c?bp?7AAdL%70FtHjbpEW%0Z~5
zKfT!jMotYiGv;w?lsr<p9m~c)qhn<@+>BX0R@0+rzEHsiSg?Aqdrj~y=EZC~mS@A~
z)X`6JKMfCO<H&0cip1>HITLtjc*&(IKx?)25Q8rn${xd`!58#e2o$Co&c0a~{Tnxc
z<-B+3G4ogzjnY+*=hfumi4s)!U|9wADpVKU-Oh<W#BiJg*=4;pT34<N^VEI#w6p{2
zL9y8{CFO%-T>S|`{f0FqqMRLTTSNq?n(1{IRcNM;Rd;l6QrClNgy4;1zHy=DD-h&F
zDp%ko9FCsr$JvrCE*oagxjm3&1?d2&&t_Ds<3~Sz!bRoIEksJ(j8r^}AgD;LD+}Hk
z4K$~2e2|V#INg_Jo%t>KT8P_wNrL(6z1q&G5>SWBvYL5EXj8NTSn?El>^e&AusET9
zKn;L1v99I;Hn})3ENJ?|4Xmkw6<2nmuL4`v5CgP3EG(r3%=`uL=N1<~S(N=bsijI4
zkBbs{aaAwz;NW3&XRcy|%YK~g&o4&KrTkH&!^0d=*4o-n!KRav{Pn-PPEN1Q$_LmB
zk~v%wNP|8d5V6SnIAe<c-oIF;uLgT@r)8W}h5rCv3u^5GNq&CKBf}w9X3;=RW(_SI
z*2%#Ev>wcUex=%dugjWcte=_=mfqr)G6oS*GEXsuA{Z6zR$lrfjmq}ZKu84RL?R=3
z_-8knp&iKCHlxB1G{R^1DzPMhQu0+e#+s1s&da#CY}2@F^x-|P)gp6GK0{0;L2yv7
z4TPyi8^50(9>zec**eY$t|Dnk$*P7MR~tTov?rqW2FFc<<A9b4r)ROTQ;boZ{oo&r
z@vZ@V<#E6ziT?6KqN_%ia?t7ykGNWe?`x)?=gR+C4qnT>{UT{UmoMJ(87^%jpYqV0
z&z>+nZ2Uhhz_Vuq!J%Dbg|?${SoM_<=8LlnDT6=5!ia?blL)_1ITekuPX9DU<z%-8
ziiO_S@a|h(0-S@-REU^c*pn=(=3v(Jm}R0|^3fr22?@DB{KKGp1++Ui5m<YtEJ~&@
zShKdKlV6<5%)x@;ch`oFN+PDh-!YkIrQ-=5n$zx<=?)h~;_Ko}*P`0wXd`Z<qqEad
zL8*%DJO*8rHud&s8%zyZLzbfOeRY0&jk0RAUgPiDIV(QVu7Q{n;)gHvHI^u?yvae*
zvQyfoRFwIjwqq!RkjM*7S~(&FKF+jvY?NZ_l+?oZdOs5*eCmd|mU9ySoG5vlRa90^
z;Tr5RFMOiyx2yfATD=j{y=XOi5!Rk-onP}UyOKD$fAX#kZC3F<LhwG!`Z`~2!(v7g
z0m^*@_H#bMU}afWI}yBcXcT*wESCK6Bp_xn#o1kWqw{?u2A`gmCR<TQnCKlUTJD+r
z27Yz0FDm7RlxZ@(`p9Tr_76j_KBf-w3-@}QDrL!FLjc#V_+ex+EA$Yi$zab9$LnP@
zeYye;9OK9<KfWus_PG6w%I#mFyy+aQX8w^N=<JV2@IyTt?q9XV9AV5Xicz^7L~d^`
zV#zQ2odv*LVF>u&&JSmuJp8&ZM|H7vni)gn33eK_UaMlpcZQZce05ESkus#~KU92k
zeBF#=sHmDAB@w~E-R!gRIA`nnXE}-RW;W(R&#g==U-W1z<}H(l?$GL2RxR0d@H6aW
zUhPvXANkhN3Bp0cjQ*n^qC!Zmtiso`;z{gUcaF(}f#}Q94oz-k?y_gY^zOr%?_AD0
zcLwS3Cb8D{J~}?=UH!cF`zTeg)?uz!?8bwIDw7*GVz*Y7Tqip|SjO$)CPu85Qf&&E
z_vQY4w-7i}Wqc<<n2giq*(7b)AS1ZHDk?G;CGS&96Bbn<b%x<Qnl9>gspCo-)ipSl
z&h^sw^&9~9TtV+0zJpzn`8fqisF1K`f}|a_nHj)?nh6~wuaY?s3@AW?k#s+)K9nNc
zh4f2+Roa*mxTfd+PjzUzf1QO#yet>z5^(2UUKd&Sf4^(K*HUZ8cK7EioH^rWnw&Np
z&2T_csA4R{h$z#x0|&#C5-+4KQt3i?*0n)_jr!<Kmrs!E?P=Oq9@joF)@XuhQ7%Iy
zPb`X<fXg>eVCJH$r%RjFvx?4Ue!u7EjBo`d|2}*St+I{8&{96W{?eU1gwKLa_Wt?S
z%TdW^MAK35Zk_psRYbd7(g%s&oAIei35Uf^t;O5chc(n{8^@=cY&6IFOHf3#FSw^z
z{2hKPDZ#`$W;l9lCs9AP`zn7D&sGQ7`B~izX)MbDl8w_cyjp{c8er#gC?e<cI}_19
zBa<t@3bJyZu!ECez;-qttyK+>zUlgOz1OHI)xpUL8bAPefjMHZTrn>>HN2LNfcDNs
zK^2wUNT$2!c$9yWgZ0Hjo51PUEoQOoIp~<A*e_l?gaF6ooE~8DyisP6+Y%hM&eUCI
zmt1nsRx*w7gvnA7a^MpkoJlyt#7LFv&TH^v@Y97K9QxIpzW=y(|25qp3c2$XXS>b3
z!Je3+!=8Vl>U9-AkoFLpR4MIc_+u4sRQ0ZI_jdX8o#@*PGix@auM&UzMH~B%J5L-b
z=ZF=$&pvw~{VG3i-|fczI&-1Yk+dq6vM2n+T6Nxjb><_Fg4Bh7aLx2jglno6P)rov
zdU#p~IL637$;BX8qtg31GTv5ga?>5i<+}zS`W}o;K8nH}Bpk8_9KCtP#$@tqGpuq_
zt@ej?cTiDS(1^}si@O)@@5}aIWpw@~a=}XP{8YH!s~GHtL~H8!BL)(_+FjK!jv3GU
zu8zO9Tf-ds)Klqk<Y^a{R62OR?(vqDUagtTQ0|`(>8VTgeAzoDx><LgR)7i#YRX>k
zUU_gx>uah0SR%u3`m$JKmBlh!7*g<^GFg-o6CO)-zoyV%Dk8aJwm@?cD-*)J6Df8%
zvJ*3LWg)kWyna>v2L01}(i0EvlSNku7&ca+l%9Lu=R~W$DRsH5C$A8$r`7$GDbK=B
zGJ#5}3e5@UkfQJI@|VRoF(1PTpMM60yIcI|Yhk`4ne+g|St5J3!(Kz#FAi##YtCSm
z8LjDP?|FKH#c;&IW)<&V(o=RwhUm__*Zh*ni6n53<RVg5d2;YvUl*tUMv@WLg~egW
z!?>Oz+`i20H-zxy!;i|tJ<lig-SnI=DqW06@uP?_1g&Kpb6PL%eH!eoP9qI7D~ZQ&
zAZW~kna7a=IUlW>3DWA~F-|QX$x!&4nOv>PI{PQdB4HT!gMpG{f*fU(I`ikqfdPIg
zRh69e-;=3N%9O~16rRPLTT>xHM7uk7dxxI;F4fUna)y1p9C(q%E2n(O<ISZ~T>_rm
zCpur9usB6tbnQSv#yW4)I~8k>k~=DXgLEMdYK_=C6(8u|h|y>wXzh2SGA)%U84*0y
z&P;1(A-oI=(Z++awh4ui!xi8~W{;EiVcf`8dLHjtp^++Y<z39IQ=BRhxvJnfQ!{TW
zaJ}@z#76qM{}nMr{=didIu+Yy&Y_;H-V?Jzc5x`&Cu{5IqrIdtzr0B2cWJ&CiGzNQ
zi|K}?zbU4P*ZpkA;=}3AU!G#$qpnB4#P_@`>(+do$FJF|=f)ZJk{j_fx+Qb&T7vR)
zmZs&C;8qj)X{fQ(_E>+_vjN=TK-DlcR>#@;Kzv68)j5F+7j&*&v(=Gy`}`0j;eSt#
z>}_nk43bd@!YS)PNk+nfFb)_;b5B}{F<alcV<g5(*DG--&q>5RFN%ab#&|yI`8Z}n
zhoY?D*zf6ybWBU-(q_Sb66R`B_Dj3Y_6<*4LCuL=k<^P0CQ>tJS$C1S!p|tWTZ?(h
zkKv(<<CkqWS&ynJkiZ40W^Y|4NA9b9f03vAsUeaBoA7kpjVB@GdYEoXHW6VR3z@F$
zx5bZx+p6#a*Q5+8cvZA?EilA0DI%KUy6Ic_9fe|H%xaXwLVem|G&Rb+#1ll-o+PGA
zG#zB)9ePxRQHe%0a7mnJc>MS~C4yLv4_7D|ABlU0(;OX#qSguOwrC<uVisM_Mm{5E
zLV`yuSK|}Nn@73+qkdiKPM+gNLz-0aa^U_)CM-vCVYTpYVGO|yH1U-}*UKOJ^qPXV
zg$W*cQy_@^jf<-xihu*~s=d->Z}U9rnw;(DAb!d*a^+eI{^}QmQ*^V(39djG>!_tg
zzI#P3Hm3E{1B>us61akdyth1&(_--q#w@!~9($wx^@OfJ<DsJk-#9ntSKh`*4kSXk
zT*2sSIj>>OD5_v@A$cW+;PSjEMsO-u(^G8xM(N*S8VknG3_8cZOpr*5WT%;1&Q<E+
zQ9s^I@~JCm_@?1<MhuDZ9F{zr*@$jyh+aK!xJxlk>)aWw4`G}mS42bjXnbsKI^tZo
z-u`%c&QLcrvNv3W4ovMUaFKVs&R9L=4|XxH($EFNKL`(2p1pk_eCu2W1*<oQ)mg5O
zRaw!wZ$Ul(_21v$Ags0{D7(KlX%Tp`x7hXUbQbzwl`y?hZWOU4pW&w%a|}`GL>~ko
z;Z<6~1m3;W>6$<po|{j2#xtfxq)mOaNk%g(n|9|05s?Pr=W67-So{}h^^>!oIKH(9
zP$H?u2`)T)oR5a5_zn&3S$9ZYG8XfweUW5iMjsn#Y{;^v`qs~)jW!5Nw_?kyyux{5
z3a4KA&tEJXu`x@Ml-SZ*j$}WgPjur`90=3>G|Cb8>nue*1+nNop7W1QWVKM3kT`+}
zfAi;FQEr^XGqWZ6DQQE4*m)CVDoj+D2thm4U(=L~=Obg^^z>ZH4U6GKJV-p%qf#7s
z>r~8FmJ*X=-!zwNU4QzGaxsvdZdH{L#ihJR>7{i+tW%pxa`T_OohWBELZThPt7*F>
z9-*A0j58UqW2<|#P=M6hZbUP_Q_~9xxT~Bi7|TL^C_>NB{elatjFJ~HWqXOEAx9!r
zcl({%m8hqtKZiR6JQY_kjiSe))cAkM#xxLHdPke_2B&@@!k@iK8RJQMmMS`daW`^#
zj{9;;nADf=+;HJaO*+3>>~+y9O}3!D(PZ4rTH_KU<WdD>Y2d$(lhNOL-=D@yK0MN^
z@Ayv0{nObyQ$_s)Wtr815~X55adURK{40L54pNn&om?CVA*zzdc`bb7*}JWG&L6yF
zutD8kY6<4caZgFUSXVne=!7DWUr(u2oNenKD?(hgY2!l8KUA?9^1-Iw97{@M$19+(
z@?O{}aJkDXHAf_KDRuUF&o2#r+W<ozO>&asXI3URFRWh-jraVC$)+MqO=3w#14XEJ
zprh-YM0)qi;d<4hBF(c{eu0KMg5lYcr)!*PvVWQl4nB0+oAlH%<<iJRmr?m_mdJB*
z+)kTSIG1sijl8xe=jjz1USfZX3+sFCFHC#SG^>)UNH-Z19+oeEJT%i-DkLjNOuc9K
z(G6Uv2OV@eiAhN?rO6*;yTjHN2~4#h-Wk}2KmnV-ZK)a(4q`!lLZpyoF|mLyki?)x
zZ`RbCMC45KPV$=~T4=o#ql*)K1iWDaT6Y)&jRaY@?n%dxr$|1w(%^oDTqa`8y&mL$
zjg?r`5+mvnVdYF%81Qb1hEa?-$ca4xcODp)k5BnH5d1<MH66L)U{N92Rcs82g9+!H
zh_lDzc@3G%*r$xL+9JJo@1ZRXF#Z~{RKX}h5;F!h+J&Hy@U_1J8#rnaa?*F_OBuOE
zt><;u<SQwx#lH1rphAPpD*ounvbEB74l0u0uWrB*YipYDCE_#`4!+iz>yi*<BU&;!
zUV>D;?viLLd@4y)*)TWTDt*qV1`EC;D18q-Ik8}I1|Rh#v7=Y|7R^&j>xT7|Y|045
zp7SGr5Jz<NJ@+Z{oO{VR%1F#%4+=$zX1&}QRD2$iTPcTwhhGq=&r8vo@{zyd`KBd1
zHm`fZq15njE6w-AK>K&rMCNBGQZ0VYC@#T-5N7;1Hw`N5Rvm)EVYOM$^pkUvbLH<y
z9Qx#&R;sD6#lOW9slK>OPE6b?m=KiDh&IX7!x<Dji?6?$q8MOz{kqiFb+le2m*6~|
z+>OYTw^y%qhw5WQ&uB&AvAJrToK|z0w4L0v4~h;0ZqntJ#w<MB!<~;|8Ksz(YRFR<
zl4QuYt-@h#2pIKH+<UZVR^0;E233`lb=<mewsX{6WfQtuPw+P3e|Rd^xC>Zb&px8b
z!jW^17_0a9qp@0wR;!eHdHCXe-pmjJ%n+TX9#K%X+Dp#W`JytMLNr@&k+N#aA0AJY
z>kq*Pkby^G#t&9<_;?J8b_^awn$akJ3gaOsHWq!b5?UqFIy$mKVkqd_<h|j$Qt$!>
zM%;qLhu5#g>u(pAm6<pD?k%)OK#Yo>-pKriP*>^M)rrzr_DeBhN>J(jcyXKUv^`vd
z@8_t3DNxtZaiu4oJ5}cg2rTZtKl};51D5ZyN1Ghz^Oho>d3l&|j&H9JZ^l-vPc<cq
zV;=L`a~>0<fgsvgTFviStRdblc;|nzJ)9U(BuJx8goZPl1gq}8v3bsBkG6dvL%d-J
z`FZb_Q{A<T5liy7A{vXSF+{8cPca-FcdB;AgHryI?e#b~bH}2Hv=7fFC5ozi9cOt+
zX+>Zs?SRn@Y80)Y%Dtn)oEV?FM1lGBpwao@m#CSMvjf)g%4IVr)ungAK?LhN+ZqIq
zC^aO?)LO@d7e4xO5RhZ0eYkB^U9XjJ5wS73y3OA?BYOSUXbw4N7$yP}tDHx2HYE9D
zV0`3tIc0?q5vOY$^GFH7JGHopAdTsGqaC(47CvD!A>~5%s%JXT56kK}MYwQC1g6VE
z?jY~y3E?KA`#qN;`JU(iQRW5|L#*F<tQBRgbR@N(9MxFnS*L}f;5@1Y7p7Jxsk>@3
zR}>GaiO`tK_M-Y9S&6Vgm+mYG6U>WlGUnE}1*MU4;L)5>sE3rSZkdapt$6gtc{qt$
z7o%;yjg5>vAwnUBq}8Qe%E2V`F0J!tw=^`KaVdM#inBD*mWs5lvZa)4h9Alq7Fv9~
ze8<Sp@YDiY+4A>~VC6e;&+k`WJ>Kwaxb<kO69O>qZ_a-ZdEpH6l<qBkZ2lG{sPyl6
z2ZX&Sq$}p!ES%mJP2P&|Ets}+Hrf`1=AfmfU=Vq?-6wbw@UOJ;eWTaPDAbG~(epo;
zSiO|Eo65SG>Vtq$6A2-Aal?8y&i5JePkHxk2-PLlmd{L@K2~kLZ!~gQqmT}xOEhR_
z`svNN8I?c-oTS!ZpZ@9=$YjtDV;*Ba&Ge9x)y=wJ+Hs^!LYUi%RL0+oFrd&J9Nbxy
zZpz&ujJm(m!`c{ySW{9s>+j)f>U$bA*C{RUoZ(hJx6<SovlL0qzHJpjc1DxbS;jV2
z`l{rNLi6}aTXIC3rW7xtj1atHL<Ppnz79g)>_i74<3wA(-EMJW?6XNInT&1mI(l+~
zM=H!)U!SL1Re0*|+#NK#!GO1jBC40#Bv%jTOZ-%@ldl&4N$MQGZ{I&gm*y|a*qHFD
zB4vg<lOrPyqG&p-A%ciWNYaMWagpmSZ||kv<7}l>k8ilMC<{63MKO%Ln%`tawkzs*
zGy$LwY4vz1)GvfZIKm!g{<oZv&!dS9#~(-P3uw|xgkugv;D^Fz3A;=N$qEdy>M@Js
zf?Hvj(@5sKewq;Z0;7#Qz_JLVn5_SZLxx{M7>1T8XX2<#0D}iPm66AI*34sTn%7)j
zUQ!wKHF-S~3~-$u0+^g-;OUkWF?RWjK<TMnANywcq8q44u<8pwnJaf{-qSF}-RCi`
z=<$BKeu;6>n0U%z%HX2V5Cr&*g6PYBAjM-)YO06<`S%}K##8MxP2ajNfED-m#_Z|H
zCENbwA4^L$w@<_A$msYarKDWHzCVYY&M*|a)8zt!t+p_-iw}OWidlDnaX;(Pc5i1i
z!&lFKVNxpAgE_^Pz@-9){u-fF1?A{;{?rhOFsg$s(UnA%Y~464k3sg*7*|xx^#@!A
z?%~y`p_gfBWCw#Y`OCcsP#lJ1j+7|(NN+oJ%_TfRo?RUc!MR(k3f#*LXML-<dayh*
z$9vz}c#2x9k$p`Jk9~;&9)%_j&vj$+W(<~i8tA@75K$re?R$^^WPyAM4U$WadcvWU
z#<q5LtI;g<y!Q35#);T%xkApTmx`s8qgz_&IC<#ub&DuC!$VT@Xw}Kp<CXqvBu))d
zL+I4F-$@eDJ7B7Fb^Tb|5EZ5=l7`2kWVoe<f1F5G*q3riy&ym92DO_va?t{~Pex*N
zW9q8VKr?b^XY=)JrLdiu&r^A%G%*IFfEq)Mo3?&Z_u69?rs*y9w5Zt>-a;A|STJF-
zp_9|s_x{k8Q<Aj9IIq*-jO1kLM+ffv;+!@haD<kRS={D|vGF9_JCGDt;K;kcH+!C!
z7aXR0aNCS%0kntMD6G22rz$J$fN}E5sV2v<S0K9qC7(Pil~g%w3L9V%8_re%(!hDL
zO!LYWMtLvXbN}(lNr*Om1D+<B%?3%Lh&#;mhU$OszppLX+1W{6jxe$^R&n1%Tbokw
zKX`kf{xdT&f-Fh!gIJiEr6neId?_m{`&?xeI=0fmxb))(Jv}`y50B^#4~UbzS?>Zu
zFUSoyH8q6`+~^1H<PE&Fk`e)Rz0t9;t6Ey(;^JmxIX0#c5%sz8K1ghYqjTQ91D9QC
z`2D6ijTlzoULchPUdENzKPs11&eLY>)wSz|2AD92b);mK9k#2f&Z0UeUkfA0A?)U+
zP%?ZU%fKKpR$m@D;ACg_GYCaEQ-4DP7IOKm<}{4B%O_Dg(u^c$a6rO)girA_(cySg
z6Hf2=OJz@H4U958YVB$H(mef|B!DxVtfufbYRRrqLKo&V|;=v*N|qdEwT#LFXj
zFwmSt^~HP1`^sQuzjf;iXlzr7qLzGq#$XU;W@e>7%Xwh?%Nv1PPIa0WL&vB6cD1DR
z+u~yA2aW=-<>4H74`3ySKLB_ifaH^H`976YR^G0*4u`BbS@R`QP3%qAw=@L5#?xtt
zhPRL#3$zNfL^k;!`l0~#p(5GrJ-AqCIq#r*`So8T<0UV1aYZ6d4(!IB`+2-<XV;64
z>|f^1jj=NvZ*pgfFsm<`tR8wVhHDbdKewy_-=>4^g+$6AF-s!G+kNa0aq39{tj1wB
zEVr#MOS{U5oruqT>6s#mB)k3jHDSxs$%cpT)$<}$Tl11<-y+wzzPRJ(MVVbx9^;f6
zGD+|`ZS7MJ9TSq)?h<m7B0;Ft(1RMYP>qm=;+_xfVwe6NznzbF&|Mkj9daY)6y?%F
zH40U(zZMX{bI^Q0>v1IqMeKq8%=KqCB?u#_)#I*5p?lxUOMae`y1@QMP#vr~7`@tj
z_%j{m;KA4FjsuaF{@v#uP%=v{w+0h}A$k5OlLQyBNvUyp8l>)meE?$LPQ!N~S5ktL
z6QZTqE(ItE3vc{Pzk8a6YHDJ_n5qUt!$OEDz(IMM_3KS+0-Dl)f2LYaQ&GUsSwbLW
znk?fw<2CF0#{bVsA*{%M2i=!oxE(nK1+ZuQ1+Zw24u=AjU;`Z(7|2xg13lvz7U|Ev
zYvqvhdpD`^>wmZFU6_V;0wFaPLQvtcPZl!Wo^h^}W2FN77z4wDrHlt(;Nl@1@KpXi
z{j2t;?T>a>?tSlxhYL0RYIN>P<fn?rD=ArpD)3g7r3#p$fN^^EtQr0~*w(G;%)q@3
z!(z4uB!(a$19(Nx1~(W)JL}dYeExi)5wDO%qt{5TdTMGauzU`ZN{8Q{Pu&iLhy_SZ
z?nskzu6LP!?Y&`EIw21e91{~0>jezq5dm@GHpHJ$kdw!-DW+v)ob+(VYG*1;+lZfo
z$FVno7lIs{*I)OSl$L@P5+)BWfEuqp>ci4b^Y1TyI+spr+ZQ0&51tm#Kp#(31{CN$
z{^2DLSssnv8#53u75{@3QPYD+QsFU6Cl{BOa`%4j3|-2~%>1aGS<_|l{?VZysCyxH
zjSlYy6)q&nGSJg=hQ0r@S{lLTdu8Jiw01Wv?oU9Mm+|5SDFR^wu*)&Ydlh-p+*}fk
zW&u;$ud%|RuYbX;1RpJUxKIA<L7olxNEY1JXWvxj=7wNEh}ad&epiy2jB_BZiP?NF
zA`%0$DIcF5;Xz!Wl!8qGv5bwq@Xjf*z*~?gO@1f=pCjK|Ud({g4bP7Dr7p?K!$VmV
z8C4|lAVpr@{)|`OzmJZM<l!0DSaXUJ-H8=j-BtdyQPLT*;3G4aXrj_4<kS8Qy2z0+
zYNs%pxuGtq9_SMO_u|93$yJK&Zv-d|IjR5WfD0d=AAIJ?*Ly?4DQYcx>-(r61U6|i
z9_%L$(-6D}dtb3Ms35Y8@|b&p%W;T}m|e~Hx2?z~O)={#R?~GwH>t<*>ymR4XQ1y?
zUJ3j*HA)jC@pN*Wj5XJdh8ua3{Rb1`{m5+%vf+@IMkr%{|C7@d1P_O}UgV(N%{r$E
z|KD?kxIVbeSaoPnb{{Ils{p|XPMe-bwWwCOTQIN^B<sd7)X{wbUpRKSoEK|nYg_%B
zSMv`feZK$u#~CrHb25Smy_+MHJ2`&yt*AXb6QZjHWF-=Y=Y${vxI?g3VAc7+V-3+J
zU~@!T4Rc|pG83A}`@b(|5ICgDzf~3$J%q>NG<?mj;s}Ct{a1?%f4yCdfqz1jj|VKq
z->?dR$E<n%`aNXd*k}3gQ^PZNO=j-<o-d4|`~~$5oKZ>1$rX^W_92A$_t(IG8*jb_
zz+(n=%!OO;-oAVH?thMkIcmk-;R^v<(HDQcZFj0WEw!PR9)O&PzCQhXT$-cF!Z%On
z(!-zoF-0uz_3LdI9U1`PeOgzq!oy-*VSZS8`y{Ga&S99tyQPvo7YFIg9t-d&ofS$^
z@bQE_Cofj%PaDMrEImD(0p+GOKO11k<?&W`;k9%aPr7&M-_9i+#l3Mj!T_M7XX=GR
z2;SnOy@?xNVP}EU9NGYXh;atfMBsi+mSeRDutxu*dwqX3ERj&L_H<|u_+*W(B-+Xe
z3%l+vkHGlqLvUqZXj&I{7<q664;45h#zsd|vLlLNNWlMw%LZ=uo`<dVA*91XbkVoN
zC5S$?1%){58v|qBzrS!11#zlAe#$DB@MJXk!fhqL{q>X-#=H>|Gc#X!P~mtKjB5LC
ztT6P(m15#onZQIFV|Paf9Lits4JjV__+XVuY-Vbt4yQG&{>6-mtdpx)d2Xc~PmGLo
z`Rq*Q?-(Gc+@K&&*Z0RVS5+|_M2w9`qw&%_Ay|U#LrrtK)oZ~!3F<^aD4cR!kak>n
z_gN%7SJ<th&6W~}(~hgQWq&Xpdr8TSgD6P)qFe)y9xjZY=lSychySo%_59(YDy+Z$
zUhN_W@xK1m4DJrfcD)!=%pGbSv*;tuPy|n2LN0l;l<3TZe{l2!e0wQ8p^fi>;tSwW
zI7WWAWJ%b6jN?>=oj-6Z`nCcjD4h=b{yCVQN_&lENU7Yu490s06OuymYXD5R)lt~$
zfX3?&z^gBMGm&{PYQqpyK_MYlS@-$R4fk|j%8t0%&Cq8TvAq^P+uvICpYz`TX#s)>
z34>dq%}27|zYl=lLaLx_<kpBmf`Ak!D1(!xhY`%}OId+RE}yCZLQui*XrA5zI51x6
zWZg10hDvhNxw07oav`BX25R}~*Sv~~Ct%2e$8jZ+hTCp1tqw-1!bDqTjw-P0AaCIz
z+1}ZK>d0^Gn#=~gL#R)qav?tqX@%sWOK>50b*SP{%0@;>xF}L;wnq>Hw*PAm{Kiw|
zH(z9A__v>t*<G8uJCGtaW$|DRju;C)Jrn}ShA&i`kZpenj!9##ccAzdF@0?V8!Pak
z^qVl`7(SMlAjf(p^W`nPvD1;ql9I>pv7ZB$3!=s#?@ty$5D?Wv3QlJ40rXW6yEX)P
zNhM57_U^w6ixjEySnPWBY79qS=M)cr^jCoXoN!OYx=i#EAwUFb;sDq|pn<Uo0b67(
zOs_cI6(H)$xWF;qH6tY@1xC+044;nEJXz5R^!=G?e+zIL2-u3w{WMu>_y+pEUYj&Y
z$5AL4*aDBYs4pF3kc!6D66o1|+aI49D4E&cm~DX~#a;IE6MnH|MJ@hr5186J_ul^q
z){$H$0}%}9l@6$LN}5_E`I~r#Pmpq!rI^aRvD-Z|CMM>i4icu^s*IvK-1I%)QWGC>
zcGg}Y7t2w`5gX<$<H>S;^C<n{=Yoq8ML*aKrg-?SWPQ5W$(_e;emvtF;8U>GNxrkn
z%!%VB*Iikfls%kdyB-r~go)xIplyh|7ImN6H)18yTj<Y&8@0l!Mmg>s-Tr|gv$Gv{
zd`Bye!+Q?baDNJo&t+s`?Pb5{<OQqq!?3|B5bpTRLkP=)dwyZz8)vL}@{iS3r`T=@
z04Vmp-Ewg`O{L0$^sj`3({K~vL+*T^+g8;kb^|F}pYFt-wjeN<8Iro0uAH6;KL=9p
zRGUb<&eXci$@6`4yh(mG^p9HI<LmrV%+Iv*mY}RgbRlp;PfyHe+eR~C|G%&7R1px;
zAbt1C@q8%ttNeVm%ivaPf#(5|6xbAf5eQ9y^6;FfrLwg(7Xr%7moHzYrLB6&LuL>d
zHL{faVMeQ|iQQA^CHS%AU^LQBlYC;UgCVl}R#FXt0zY6{$Z1duc|<!zy~AUQ#HFP4
z>>O8EG@TYva6~lO4`o6<1hapIECB4z9$ee&gBiv@UT}wj4(NZGtP@+DVJ=_apM1Rd
zpE+PHFl8C+k41WUDYk-PnCGMrF9Ir@P*Q4$kE>ZrRT!$d_yBYXuLZE`kO>4Mv{!{c
zK7C4w6qT2sAtG=hT_$=ZEoIpn1hv?|<E2ZdD-~?l7wh`<PP44xSU~2-bS|9&K@j*n
zwRW0kh3x+K^)@D~-mNCJ(ZJ>(7-&9z2qlFN;kQ<$VYrGO7S+1ouAQZH0vUgqQqIO&
zT77U_E)AythR^jqRFa9K08PMv>Dy{(DTqXzn5cH4!$Y`?kr91=RW1&m_uZqDs9bnU
zFlGB+Fe%$Zs9Z(y=`pM)v-u-E!!x$bJE+?%9rP)0Qr`+ua8M=^$gxaD-}e%sPhnBL
zrnHK~_$ym9>JnX}t=RQoT>eQCkQLisw`i5Bj50cs4NRGgXy`+z1eVnp!|Ahw++RSz
zRV&lam$yEi=S(GVSo1IJP#$U~KEzojPc`++aOaFn=-gqfEThNbgL5&#K_Q>`5JEJ<
z8id!0DoqpI##aowg<PJ!VOm#vL*l!2smRYA<UyPx^9eGZiHkPPb!dq=dfmm5{nNtK
zW^}`N+b6g!HRkcpZ{{37AQrkI$m(Xe$XF^8eQrCJRqnNQC-m1W#iDKbH{g@C@r<e$
zeL{ll{+he#k3-}C{XGHIm`|qt>UF*NmoF9I*1;x5LV!XxeVd`)xCWiKn045o9c*;w
zV5v2_VbQr>tkI_CqOGV!k0~>)xqKoeC)W`8?=Lj|kUtAn+H{$_fBSF${pQ{D#&s5K
zAz-bHId@SSTJ<Z%r2orb9?nsPrqZ(Oqq?4g`@FKLX=<;Hb)7w!f~9YAVB&as|N9O6
zhTRMqjM;L2^yuBt(jDmpu8r3$&$?iMu`&k;^fWbl;o_Nre|@rsU>?P7zl}QBWn&q|
z@R%^5BLL(%4^yubc;K;tkmQJI?ruhjoejvagKsh15<H~p_JCD=tJ+!}WOhKU)W0!2
z+SN{j7$KMw1HeIQ6jSuelX0y|m@)wBF>i~Q30lGH;7onK;-I3J2+wNo^YaE;kT&0G
z)&6*IW$gTRILs%9fBSEKAcZabfu3G`|Bsn>K09Y*J`Zb2RB)#(Z@|Kx6KMtC4s`xQ
zeSND_mF#{ANlA5<A3|;)e`D}&Qk0WBgzW}GPrtso3lYU;wGMh79@TShEf6^d#!mzF
zr_s^r&~MZ_j-~X71CP)dMc>=o3-Lb=k}$f#5AaqP{=O*drUFTC%h48(h+<wEX4|mu
z%4*9rHvDH{{#90lm(7~8l`{%s%l7@k;e`SDOEeh7$@|y!Jf)<%f<xIHM|?IKJ<4nH
zLSWe)F(M|m%>0WsLQp{joA)!SX4|chhK4tsL_ru{w8QR=S~B&E%x{glDZ_#Yu%c>y
zNhbtZ12l+Aki+k%SOVp=p@SI`{-7%~oQdsN%zW~iS&*_yA_oT&bUnbE7mZcBFwc@Y
zhY;xI{whr5D5;g1qmdOPU$>{wqDP3%NsARI>&ghK>mgCfa;XBmiBVVt#-%wnSVm)P
z=Z^5E2DmEWJ5wsz9IaOvtX7tnUs9Eq`W|`lwdN*rg^^IR3H5J7MGe06ZX1)RTpL^4
zYw6PEZ{NPnvwf=H_sd6YA%^@k0!ZwBy2DNsrVnEL>s?tS*bGfg8^Avv&#iHmfdRrf
zX}B+!nKx`hfTxYGd^rI@D>N5n#-&32+VyR2OJY!80GV+*p2T&w9!9wVnth6J0UCBS
zS!lW?@E=rH^M;zNwQu*2AQ}-^4?Di!df4zoZNoJpkSOtm>1vU2#@`Rez|s(k#6`uo
zsf5LusYAW2uC9LCx&Qt8?<kNYR0Z1!t6bu|KeQ&=&q~2Fj3rlJ{ZJjKv3j%as+}Dl
z-3hDGBa?*u9)%(?3?5{m%yNS9f1dw&iQ&@Jx0yz9z`S;SQgQ^!C+J26k?cfBo~34y
z7Sb1v*?R+(TUSSiIXp|?(f<o!86`iXFbT=Y-cT={CU4#jIJ6B2_*?Hf`*$bnpOES6
zFf#vI3<Cqii`DgyBOHvBU=2>we$~;T#@A`EGxq893o;-9IwC#8ED3GJ-i<I_0M|@S
znT{m*`5ht9<N0}=n5RBad9AkGv9>llVRGcV1Xk(Me5hxxSsM{ulp&0th}S_`Q!dZ2
zE|h3r&*jh)-HCMnbT#`fdDhQ)OaTd{g~8e3k%nRANJqThQ)r>I`DKMziF9_%bXa6c
zKjpvdCFRbbNR_rX;f@8yNVG22BSd?4ZrzfoH+i|<lJzmCBfh0WLcG<DzG?+8HRF{U
zdZe0KOyTz~W?t+yOWjP<m1q^}WG1p2ZZb;=j_4rKkZLBp4|z9pd^@f;1eGoO<UjiM
z`*(~lnBam^HMaQ5{rM&y-(9hflaEU>BX*H7&z`HK=NCIz_}lf7B}ElCJ(EFVL_|4d
z!_sA|%S|ec62<3@ogvguqc#u2zofy0C~bXSx6U`e(oD^1smkrt7RMYKI2QY2lsN8w
zcQ7z^X3E|M4298SEFKV@-5x<L!$^5r=|jDN!80*20pchqAh6mIN!^v=$RegFM1LBU
z0ZA<u2SKW6Sdf9%*14zQ-3__9-%c~rPe<%HPl-T|akYsz)TGW+6}h=wucGtYyw3w3
z;$>_M;r(b%%>?5zlWK<%u6?|=wl-w=MX(3Fg)NVel<Fz76exWv>+7XOMBJeA2PidF
z><$SXi+Wct_QQCsYzzcSB=8h-vGBe5=EHVpz{5Ph*ZtJ!S;c`KZ~{Id{>2fG<~N_r
z#kT<H3Y*m~LrL=PXPPtM%+RI&fd-%N{Q0ksb_TVFqOGLU!F(YcSQBRQ-Ao|)$LFP;
zVjtZ*t&9@IS+L?WK3GO#Q$1MEqk+`WHtSxOk&=|G_1%CP>2!V@`03OEKY9XOQ#^rC
zvPicFRAr~o4}*{iO!(T^^EIlb!8<_oE8sNnQDi{vJX7Z^Auc{s_xURgKy3h_mN#lF
zofb}~yS1=zhJ^j~{LA10^aU>LwAF<zy2x&ONO~}j53im+jbFfY+{7Tiy%k+H5hN0e
zBYuiOjn5LbXb107j4d|CTu(gUpQb^dqv&tAta`gfZ@D6N>cbc-x3)rl_wvzp7MG&y
z-1ILr$Ct2g*BJ6iKm2OtQhw$B<#N;$Ri=IKpJ8eeT9R)nb{Gkx8m<tDEiuwXj9QC?
zi#UDLwOelxK|3WoXRVu&*(DCYI+`Z3p%V$$vSai~v{SVt4!T;X3rmWDFYO)1tb7vE
zMxe{^{GkOtS;x_QIMc4UCf2*Nc2f$RgRRBrNdgb_Qr9m1Jy?XIcY2Wjr?+l+ahDfQ
zY*9tUmxg;wkU}eb_Nsvaok6GX|L^;SjmDOa{%gq+wZeOPdfF)SJ>nM-&?m+$xIbK*
zEML`uyk9}V{tfRy`0ucLKKSer!GVUV7Y^`KbQ7@bfSvTOwT-DghDF((8|M%h$j~p+
zG_c_<!Ll<jlGD8iE+@#Rgn4_;)@Ab_EWtzD+0g-M{UA;Q)oD>%p1UkSi)F@woHlSp
zjaaq}DCOyIn4=K5lc#Gc5T@ut)0@Wh#d;zsEe*!~K~AV?<G`D|38x9%G}xH|$jr^f
z*KL2axd-Fhoa*dsY^rX?A+6ZgNYI?VAtWopOR>d9{6LwhSyDx)HcOEq0NTM`I%UEK
zJDUl$Bgk3oY6erp7N6@V0a^6`BH{Dj@e2w%LnP=ae+tlV;K2f^w?}23XB}+<o6;jj
z1df(*=Oi--b_d8Ga%GQ2J-_!{K~2I|{TJ}tU!SV;AYxN|M_Wu-U*_3#>>c{t`bb#x
z6BS0&H;L_|9-=uXo5k=!sr_k@vt=}5kEYR;Q<=Mq;-7_*m~5%;<f83|?u5pCY)~^U
z!Y1VFnzThSo?&QCe*kXlbD&=dqB!=gfflbIgb#HVfsYhwbCze9Yo^W3%|AEnlAD$J
z1GV4}Y2SI5pJ{1n5g>r4!5Ig*)B&WMKZ%a+hh)p5A}A-drN4X0r&^VP>B1<j-3If&
zggTio4+)XS)a;NcO2wvdu+_~T3(XM#$CvZspO8(KneISL)?k`cX%bQ@tgU|K<oV)T
zhzL=^D8T~!((DJos!B9<Q(>krIU+70!7Y`EAa@3S{XTWCED(SjwxHnjEQ8Rx=%^@g
z89_XZg@uKdR@Ai1b!}~+aMqw`0^A8L+?EJIS<#&dnVEp!dKQi`v9q(2hr^pfBS3bb
zA&lP!tnGYsrKHiFU?qmkjpvX5A5&i$P*t>bONn%Ybf<J9-Em0iZs`!Dlt!dMIz&nu
zq+7Z{x;v%2JKo~E-+k|$AIcxjIqbdmnq!VJ*BERAM*-v;8ykS9t_Fv^ftTN)7zGqC
z92|b79j}V_b<7G_8?AU>r%mhvi+>HAT;N9ha!Wx$JU=@FO`?&WUTRDXCR%WcrSx>B
zf{p?PT>3T%S}$M-fVUa~1#xeMAP{gk{7I;IcxnM~InOgWR8>*=_`*{FlKFS{lo7C5
z2MyaY*mMAK;SKiQ*2hCfjjB`o_hnol91&de1169hV#gz6XgAr{OXe$RKhLFcQv(1F
zKtob3tEV1-M_)hn05m^nIaB(d!F~={&OjC5M3l<~G88B%*1*;Y#4WJO!IoG=1Xs7v
zY&k*-@*V0H0C5RKF${5F`}gZt2N>nhXA#2ylboNc8r;c^w>)Tbkvy8){Z=e^YKB=}
ziMmuO{vaS@JO8N|GWJTFKM9@(jt1s0UEg5##BT|(5B>L*grWON8w!96UeK{mA*pVY
z2O=RM`P?t805kB6?OQNxFm77$TersMbiEsJDx_RiV01l=<HO%yQsL=e_6mXcYaAhR
ze^3P;nvZ~|1+OA66M)HpwOd}^)(aQAUs4BH$MoXTKOi^R>*;n|!0o^TEM?!@0i&W&
zJUqM?nDg@3fwiOVxiLzK9U8A)!`+dPlJX8bhA%n;un)3?2mucG)vH%fD*F0AQ&I>B
zC>V{wDh8e+?gz8%Wiz%MDR5%-Ye7WI;Hd|;?i{=UpfPp;?G!+brlzLwsDzWVvp22J
z_pO{3gC`&+36J@1Zp0@sB?Y7d1f!EZ9aN3Htaq?8$^a7fWzGW7WKB(isYBC1Vl`&F
z0mu~Yt626JfS_c6CqDY_`(Oq*4nV((JYGzIU#J8OF_2pUAN<n)Hyb(ve|JE23w%D?
z0Q9k@<~G$an<@k}Y*%A4WQr*qxdjCw{RxI(suCbHU}z5h)B)hEaR`E66ch#n5wVxS
zuF1RN#fgDH47jc-Eu%C(CrZy3FNzn19h8RGDEKNMGgVjjZd%XrRFI5_NC42~fUW|7
zp@t)18gcJY5~^Ic1iY(|$H^)(YttxX68r)mpkM&!{Q{u-qA7})H0(cIK7t++s8et-
z{@@3|Y=P*GhZmL{Y~dhb{Z~zgCa@9)>6$O16X3<6K7!f}_zwYI-a4R^G{1OJgX_+s
zTU`KNwY67(J+p9N6akPHAQ+FX5KK^t!1U-#_{dDz3fK{V4hkv+7vOtDo^MBz4BT{p
zL-q?R3Y4l15C)hw46Yi`b<hDR3YHWoGUlaNv<k`O2p#O-uTRIm%6q_)XF>@p$IWcZ
z%PH6$CE7%#mISJ{G<u2c?&!<z*l7KCq4V>LaR0(E7VAyApQjNP_UVZr00?75=z0-o
zN+YBoSfTkv0|L&&?Z!7?%O^z(exX*t*YdS!6+UT$1-As8uq(>o%Y%9a1!^W(JwG@;
zK%}LER*qZ2LSq5<4mjTS;Q+cx$ouYs4%{H{t{S+XC>4ujS_Yrg$pAeC_|M}3)>bAK
z77ML~WDD$Y9?;g?&#9@ZhJ}SCfA-9a;R1K*6_5h->py~d9w-$Rm6K5kboOm`&`3Z1
zBYVC9x&nt#CIApXkcpX7ArtgCQBxM;1_mJDenN|a^}aQdF%oD6qywf&k*8Di#2t{|
z;R}X4KwSZQ&I>jD@?e1GMn`836v=mB!GMTI0w@3*UgkM8TW*tuwKk?|!6&{v51@|#
ztrk$eP1yy(gU+-B5Mx}w;>#c0OX_(LN|){dcp+$GAS1LsKOU!u^uGKec%T8~rEE<9
z{Bn_fpC8G*PM`q+cMXJ%_@pFTO!uO>a4y#sFr5UD4qyVOx6h?&4aCJefL@!#tjA+9
zh;Px8-i~F%^9il(yoo;SyWknZy(<vL!RWy(z(1asd#{)i&uWNFi3B}9f?oueyaP@f
zVB$H@w*$F1IkZlN{$kEDV+b%(tYEtV6LFwQ-2-*d3anqS4lV0UfvVLL5D);Oj)C2x
zJK(opb~$dF$sklBQjb-CK$HRD!WYO#FP{onH9&iwfT;ym{rVH24-NPA8MlM3%LM>Z
zBe%H^L_BsLr;g9};pTcPfaQUP?xLZECy9!}`8yozYekZF9<^Ks^n?F>`PfL(wS^%C
zswgQ9!=eka`8?c!U;_x4Cx-xbJ!RDYUp**e0OUrH2u$y~Kw{$)6TveGWHIHy=yqd#
zFctz9Ei3I9IQ$3*mfPD~U}goy)&sO<WhJGkYLGk4z`*b#Wx386yeVW(h>rdUsP>nI
zZK?GKKY$#FHD^F<ddtLQuFMy859DPbo5_NJ0GKH|HPC_rs07R|us5p%QtEs!#$>>A
zI0|U`2q(Y}GS@At8XEds?7Yt&133<cDk^OpanaFiKw<?sa!CW-PWnnpN+9H^ih2rM
zQg9uCSrDKN5d)%AQgl>RI~<rmr!fLB2YAN8(IlL1!{#qw#OQi&3fW8_>}LR^T<_1I
zLyeD*J2Y%2g61CNHxQu&0eJ(IP}zTKzzi247|@rE^!J+tkVZIx<qUEg+gu8PD&`8R
z4){P|tqOA7fcH&oELPqkxO{Fvt4Af_zX0w+c;Pg_nnpw<LrPUu6)<>VV5`p_1Jo|K
zSU|2Y072Raa>~lAjEoaUWNcuv27Dt+cjh_l^lh-~mV+3T))n8m4@rd`v_KmF@HZYL
zDu9(FtELZ%AXx2UzyiJ7bnUYb0G@zg7_?{w2Y*397#`L<j|0;@PV17XLwTnxCg5HJ
zrz#O3B!Y7J!cBqyfR{c<B|Qa_!MiAsX9%vUNf|gp3_QKJHa37Z?FhEh_;H|VDE{Co
z1ZoASjlOpi89^un&frN0%3%eqTZ2A0(F@Qqm=L@K^ccAADXFP?Iyx`h*Rsom;_vi)
zg%l330gGWszCU0%S@CW42~>|8gzW=ir|@4-xf(!R+W+@z=zA9q!z2yz5_EM5<EW}t
zd)D|$0UA5df>`GmFsE<76!2YDMn!_EIe>nuYxg*TaTDOmz*ZC^GIw|d2%hRM*k<~m
z{-~&6+*$x$6o{sj8GOW106My>a|OQ4tpZW@0*nNJoAsg|QN)8sAU1Z~;d5MUEUn^?
zE=lJYk&1;7P!6qlCYNX@fn#fb)4i09{|9sP0#oh?_Tp_fGbpI9?QM3BQnX+&AuS;x
z*X9=(isIzvuCc#j>#^ai8Q0wdtT@PUx$9c{2)gQ`gc|2a@HN0_TeXcEDEJNwn;RRF
zVb%DcLz*%4EV(Ox@tDt%WVS1o1-KHV11ubZL}uN}7eOH|?i^ICmjO+1=U*mFKw)@U
z`Q1lujVbS|0BjD!08Z&IIK_ILI!^@TbtP~91~?PQXE`}J;1n-Zxi%9J5GtAebLe8f
zv@r_t7%X$J6M!w2J%6H~kc)*SDl!U;G3k5Xo`Brlje$3@>`?Y4!B~0BMGSYSjQ`V1
z{O><$@*!0$ZKf`i!=eZ^9IUvvYIIqDPA05ZCP@?5KJvZbF+g#>;5KF9+nhbkvh1^3
zSHU-ObajVZhFudky5>H*k8^Uv3-)=n+;}BBMR5hcyc?GEex+qDp(K(cfI9LMfWR<Z
zMli4ibO9MR#_8j0&S0om>V)`splxCgqEq!c33*-tCN-P6+=Fr`T51afPO!1F0B#Ws
zD1)=#{pPAd@|C>**8c^gl4#yO2~Zz=yE@SN&OPv|B1g5o?Df0%W^@1DEC2I9i-$|E
zx#eFVaTt0Ya@Q@8Aixc~8@deRRWbpqbEQ<C!<Ul10%#gAY?7Fe@E-C(hc*zLNd5rK
z@L!TfEy2vR58%PTum?CL7eM|~Q(F2Z^c!p=MpYJyuCClrd}+f#Abf*^Lrp_d6Dxa0
z%=>ZGZ*eV2&G%l~&xe$(i7wkoHw@ha2`<EeUKSV`{@>Sj>?*Ofu|dZP2z2iX0m0`}
zc1^>%a1$x99|+6@0WnuRwRe4=uass}H3z09fSMEB2_D=T{ZB)na~A=17;xf^`lT<B
zm$&tH=tT!`z^N>t&jpRvFZv=hY;aDQS`PRnne{|`1~)Mo^ZKRM0`U3r@F$D0Lxpfn
zPEm2YPL8%u8)QIRZ>N*zW@6xc-#tDsNs4fefW-5E`+tz=<)=54f1rwqFyJV_zT>qm
zzWqBoJ|651yzJ;upzs4CrvoTmrS<DbP~cEB*vN<b`@!hmOJnp>?0sLbJD7+9_!>;f
zh=2h_izg!t;714rJll3l8;`(-_u=XiDA+Gly7E^>@J;}7+yb-@s*-o((o<6F!)BtM
zgA*SyA|hhlqBHWjQ$P5m>>!j{?;o#v(d&koUq#cK6UI063q$;21EF?pKtDq|W_S(e
zbN~Bln0@JTMBZbF>khYpwfV5H@&t_8_@V$s50G9Z6A(I}=-<A*0|2V;C!#J>B8XxF
z<ALB{2Z(iUTpCVa0e^G6>XnbF={hKgVeH^E2vl@T47=}>7wsUhCICtbK95~82zS6o
z0gWot&409haAsy80;EysmkN1a<9gaM;rl`HYakC8vz0rWz(I*Js6a(HRrid!xUgs^
z4}tFkC$^)>j>lg(5#qv&@Si)8_Gyn+-G~>!f?>oo#PVe8y-$g_Bx#?(d6hZEBx;87
z;V+CB;%<!wB*=mz=Yk_Js?nZI|D6S(3<Q^cG6O>#glDaN6z!TdIb+aCoI@TY$s$~|
zsgZ`STQ~%Q2S@#nKTHE*zYgYW_N!Q13T|4jw>>3*M1@J86^{iDr32R%(}<K>tzt_0
zu<_Ez8<tKdea0;OSL8?(IRa{82xe&i)V|@^bk=+(s5y)M{9b}y1Lu{Jr39*3#Ji)x
z0R?)R!>PSF*iXbg^-Dk1Ah_RDAoKT_P=0c8Ss%<@VG;b9K1sGi`T409^>Jw6kdthQ
z$!rHenMHqy!r~#BNW6UY$JPoA8|_phZ$&eGVm4)j6i^PtW9&>X<pExV24&>jK%m~0
zzD*oKE_UPkeuNw#`!_7{s_${Y(R(<UH{Sw)dj@DMZ=8qfsN%4HJ5ze3iHgP2$R)Dp
z3TG(GQzNjw`peW-g>G5hO(>|wZ@t|V#jf&eKEl%QO*0tvRCFl9xcr(*j^HNK#NC5t
zpW|OZ@(m9u#Et6%NF0O_*T(8h&kOP!c@raNUYTt64+~YADiVXmI6{gY*ipl?A{rqf
zP!u!wU}p3=5z}O9;^bbKYmf(GAl-OJqB_W-WSBVP6%QSSV53NrOWNbjw-oy&s^mY-
z9i<j@D<edPpisaeJP13+Elg>`Of7ar!DaVLB1+i{@5Wy7?=nb6SwzCn2euv8h5Q2Q
zE`1pB$vticnJc-Z1XNO3&PD9{#>q}{)Vz}Xtix+DThyRo>ohqqU$ABjBEzJ+%*!wk
zZW2T>Lf6h`W!P;9Zc6f)YIjs1sYEa}-X%-r+tee6A~TbPO7>k<OXzwZF6}h1#YtQ!
ziUdM-rTGii(R>4VitXvxU7nN{W`6mBJYznL^0yq{p%E~okb*j>@g*xYr-IcQ9TvQA
z7(6k&=cdLQcKAL-{7dX4&ka$%VzrG(^Nx;6M0??D#4rFuC{X4`VFL~u;!PK)2)Tl@
zgHDUDmiy<VeSuby-5VKRIJ>FM_2#Ei8$kYTh|+N&r6Oj+IvlRsFDFnjV2&CeI#cd~
zKFn@?5PsPY<7}WZ2ZM1W1sr%l$b}M>jJbg|<~pIp-r_@Az5a}Lw9srn;U0W-7*8EK
zB!v(?Wic|$O3Y?%UN&f#JtmpDO)H6sC?*P>drL$D1+~t=`kL?EqEZ?aTmW>cLcqP`
z;Sq_*(Q<9!)eh!2dM1hpCsXH<hhNZAAHK$_d74CEiia`HOsNA5_OFzgNO@Az=)#j2
ziMTg6&+?j;Y&k!3(wOsG4h9rCls_j@03uZcCS|3|P1}=!9*^e1(){G?N80{V;*6h(
z%XJ>2Vy94gsy~!_0#)Hop65JAEE-}t6%RhI3<_??H*3u3>3g`q9*!<IW)Y}%&gnSg
zQykg^uvb)sjnW;yd4wtfc6p{3iWG-Egond6mYN%*af;4;N3?T8wL9>C)`UuVT?ZP?
zI4{@Km-J?!B(ujPKEf-jld}{J_YcOU(47y_Wn3$!Mp8wXb_KN8WA_|K$<PMqII6(}
z1_hYFv{e-D#u0UJLiq=3BJp5DZ+IRZB!rWZc;0uUV3%l`$(tm1ulCIfU5h-7W-DK#
zqE-J&<k??n2qMSosL1J@%r#Z~&dLT&Q2;@JZllj8=|t=PRdo5LtIf&^6%`+*!i0gD
z;MPv>bPhqh59CL4T3anK283q_A=Hp1@09ZFE5Sme$VcZUr_}D){6qU4dHO&f;--BX
zN3IuxMO^3X#medUgAjpD6u&+BMp@CA!fN0HubMWceZ=uT`6RC=qZkCDP3iMzIEkrX
z3J22d2#cxOAFo7fQmCw+zVM3jOrmvd8X8I(aUd}&7D=?z_9jaPDlS?ao<G7!u3cO%
zp<GpABgkRG9s`p=HVaEr{KS%IHy{M+-|Kl~mn3byV2bF<zSz|LrftKLNU|OBy@sR+
zoDF&kI9NSLN=p2H!Sm>sQD!0W8`|P*_Kh{Rb%FE9>rRDg>J-L|SsQ3`OOzq2!GESD
z@$FX%*=logIwAhMbvA*=aEl7Li*AQkU-Ecm-vyi+E4b4x)hD$P!dZ1plOjT45aPv<
zPx~R4@aAI}b;6H}RzR(G2965Q(r`1a>v?%m6r^BL|N9&lXm0YVJhEYKvj9U@o9R_*
zAFm-Q?Sa#VB{P$zwP_aBzgPSy#~%eAS&r6OYQ>S9?eHS{E;}p+tWaU}H0cis?Vu3X
z^|Hzw4#Z_8A&(ArnT_?~BZNK4v;PzzpKTA0SA;-R3OrQ2mMBSMP;y9UXa`%9f2SjU
zP|I<v>*fq{?%ZoSvK3w@WpSMq;rkLui5Huv6`w$KxieY(kAcgpDzoh1-d^VQye)c^
zfH*p$*3Yl6k4A(}MBQ8_Z6}p{POcgqpMI%p&zZ`3l<Ivsujx4Jm|*pC8p*mhC6k~q
zk1HaZEt{&x3U@+5v1Okn*85!Jc<Wr=ZO6fu*sV6PyK8VYhwb=N_|&I(EaY9;r957#
zfsRfx0~530-GQ3-qjfGvad`1@^X=U?&R$p%wk>Q#!XpKIC@q$sx;j4@a5bAL^a~73
z*{~0X$@(_XXWiUH-@2d9r?%dljxTcCJA8rmBDp!SWJ%Ne*{G}AL`Zsj=j1q#E*CAF
zTts&0czh($tm9gENFS_0Xoo1J#c84;wNI3n6JrneI-tABs`i(6Vj7+1_GEC<rksRs
z(3P?M8HTdOhf39dDe`YtZZC_&tF%gG8iq&obqs#!=@}5#JMP81|H0)E$jKQ)ql-cl
zc6z=U)9292w_z>{t(DcF)U5q$@!KUJY4%AO?F|8ZNsQ)U)|Yim84IbZF{T8FlkbGI
zR^d3CD=bx8KYg>U%XMHHOlbXGsH`kQa>bCcTg^^-gTC(D$>!D4@M#iw*mf?#XyGQu
zkEYbtl)jH}+E5_enTWknU&OmYy|B8@U1(rEbZjuI`F6}d@VLPT1A$`Q<6G+LYi*{?
zWtjG_42*x3mFEiWlOB1jA{)Bz`op`Om%=F@^?I3L+1~Tmlgy`y)Hf;@rl&K!`cp&`
zMq-C}vqa#Sq&eby{OAai<614y`s{CjPPXVO%hu|4N0+~?r1KY|H`hM-u~4JnvxdD(
zD`!1p-SxL%LkNHWr>cA(xAk6e1bAh0cOmy<dA00D;R%W*{NIW5+okaqZ&FR_6XwkQ
z+kLiI(-)6IBVzS%DSM3F8r+_2K9uYPt?}oExIm6l|B^1I$<w)8eKYMAa4oDaUp#BF
zl6UKp&EJfHeY~f=XR48K8|a>%lh3`~w7{`1aEV1^XHWN8Z4>2jT_6-{s-3;t*ZMVu
zC8^Agl361lL@RQAv)bOwky39k=j{f94=xv6<|N}4XdTP2T9+JxP3QxGzqi1+1iZHN
zK6R0B$Dxp*0kTf&myCIFhi+<hBO1OgZ?m~4{tox|)|9j9{u5Do^Ug+Y+OnSsriy39
za+EQpi`M)%jD0!0!$%mT@(++e2<(Bn(@|xF{<DPxjre#>@D&R2{KPhw#mShjSXE$h
zIS+rd60;|p?}aj|CF_V5>tOuA<#c|HE7%K;){ik1;+6PiuF#GMl(~k}G9Kqf9(BLw
zl%pLO5;q>pJZPJrv<gSI>k;}K!8^5v`)1aQEfx!qNITA5MNFBeZ<j2{2|3eQmAsfG
zS6>a^^0MG5F>B806$nNTd!Ki>H@WS$cfFq}i?lrQ9yvBbI213usJgwSii#tf>ugc(
zRa#wn&^X*Ts^7a09g7)U>@Fs2(kPok7^JX28^L1h7}V$A_w?zUaHD*mI9_$&QhC+h
zRW>k;MF8Cv{eJ$3!;<^j>`iFKp94FN!)obTWg!oB1HpHV{J#7r5{29{Ye5OhqvAnq
z`O3tMGfF6m_`NNbTV5ZWwtledI`=(XQ{RZ^lt`C6EteUkk?23Ec+*F!DvU&U+jp1N
zDC~)q(TtF>;wJVZgu!|L4W<<(T)9+kdA{o{zW15rg@O1DW^Cv-l(V<hH+|f#nbQhN
zTdVw(8F~?uWp^n%d{~75?*}dSwcX9^*VExoAG4Obk&;P?9FFbR!cT11bwSl7Rh~rX
z3SsX)YzR2sf$o(RcP3f8V&0*?x2UKMS((%`zxH9Cs6h*|=){bG5=6szZKRZYP0d$9
zzs*{|J=OHr=RCO&6}9zdaVO@hsbb%A=P!QP6H){l^?cZ#$`W9BOOKOG0PlaLHF)-e
z-U=Tv=KStc;X`2B{E23L)cJ;P|NUI57HgVF(ut+rq)6m_w}BTPf{9U#8fUnS@bks;
zn9rDmF+uKRcf<UMQJT)f=C2>Nqdf485M4U-Z5Bb!tM4u40`}H3uDS677}_R*&Zyl;
zA>xv*aw!!t$`E6M)r;<N#@3W(eTJJ3POneS!V;QR8THBcXA_NNzQ!TP>V?)>wResp
zPf0ov_kYe_|9f_!=dN}4BFlNOI=X2UI(?IUHEVv>_Vln;oFG5*BVT-pfAM;eib`4J
z^UZGH;b4;G^I7&Y4MT78$>a$cX^NMXv-uupq+PwPP3u{41LrZ%(1%omirq9GwkV!|
z32HJ$zBdh_uhCKL8r&@^o({DvG?lD%tNS)f1$!hPOTMFNWOx+ac3ZmHuVqdDJyQ1N
zbblBk9O7GyPTf)oh@m}jOc&C9?o8?><L9dX#*wP$&~#Qj$4GW_7uKDAkPO|i^ZS17
zXnrreWJH8Jp@Eq=QB3{!pA~N*K_9GgPhSeS8amRX>w~|?>37ZK<<-;8l2=jWlnb?v
zRQ|ZZ?3aQgz;)26RK{3iEkmp%RK@NoUG!@@qqspIHD!eqkCgeKY@-nEhIZ|WlneAF
zrQotNYBZYHRArvpBX#%N#khi$5^-kJ?4>-Btntq|4hghJEo?|rkbisK=Ym{Gn^%fW
zRkRWH8#Jx%pC3|Wt_20k*Y%&|e;e-qhM-a+53Pv|EIlV+CxwfAxj$%nx1hv?kGY77
z<NL<_)DuQRw=f}QLW}IKgUS=Wl!fVWiZ|6#7_V>k`JmVmE&n;;5})!x`TL6Lo;%`i
zfFD+TFXoXdf|LB&FmPll$StO0D8xIISZWBLO%BI-@8E4(GPD>_4P3OwZ3Nf7{yO6k
zUOJXBJ%6S=z4hP4A^$}mqj(2j#=@F_S0?o7lxppM5QbshTeNIWgRVJK@#{ctjcP?3
zC;|y*t&X+N-S~8`8Bzv&YU3!VzR`e8(pUo1n!<w~fNM4MTn4tY$pb!21@-E7n(g1g
z;QzzXnvnnW9v@e8#Pd#a!>6!5nz~IiG1XJZ_u?43#MpSepOL|~WXa~PWg&@#7FCKm
zu)tQ$PJNmsts;D25R1nuj9xxtuaIg-W1SoO#$>kC^7SuKQG5uSoJFoA&bK!M`TDja
z3@O@YaepIFh-K+wxJIE+fSrr%kDv%89$Vz_Z`EYran{7Xjj*tP4~dQ^z8Sic>1COQ
z6ss2b1;iFe<qTPP#zX53P}9h&6;H{cM4`=+62M?k$bL%K=@OgbN`-2<`g&J^KG>JT
z%l$rvmOD-`nV)(;7Rnhx0kfLtM>27*iA71x6_wa{V(pf!Rr`}-%F&S9&2s-{IN9UY
z)5iX;`sK8@Qo+Ez!S&3_J(`!msodKxs6%?EC-F7cyb%$hgacBm_O7adq2s3$=lxX9
z#%1O(dtrezPov_WE%OR_n*-lb=g;_u<l%BxXQ2p4VCpElQjHwEo-b37mwyr3%8gwr
zG_fN<xoix_FFLkbI1{*zEI%-tKe|I%{C)Q*w*ea$t=e-WCm9Vv;q$g`T&X2yNU&c>
zDG=eZ9#Ea!L<?NCGkr_JAxWJZ*2(4L#;7?H?sGCfcuLk7*V}s-ggQTu5=75Wgl7@9
zf^PZA+V%@ml~Wnn<IqSc(_<fD3m>(FoKW*5{v<1xkE#wSfr7Bl%E{gJklRV>_rzu2
z+;<Pf42>mLbRAuR^n-Z5#4H>42aj>y`ztf4PuGSS4QHQ(&Yl9N5}RF3{iX}AijTx;
za}B?EsV)bp35k+_(23w0^<;{m2<&*j74<>c5eDu~Af!avU~!cY8sf*DT<7W}N8QFV
zYj+<S5p$Xz1&yg=Y#FCe3i~Rrl+?hFI3!J)pi`kPpM!EEZT^kB5#B$YmWEE{2x#Ns
zH(@K@;Z^rDq`6#rLjL6&P>3~^TaOtcf@BBJ?`T(OF`r1?8yjz4e>)uh(~5mzL*Q`S
z%^TSkxQ&-FPUdwO>a3WH6?)1L3xO2Ei5=qH9OXGBwfzZiyt#)mrM0kPjz~w`V6_Q-
zU-r=mK5mX0Q;s4f6eo_-0(v1a;2W0{zJaxz1uY(RvS>L8O)i#r2SyAWXN75HDry!L
zJdwoS0K~^SF2&9Up@r;51Tkf<(oR;6sZtdtD3a1esfgXr&zKFHld(M&{)<_k)rxHr
z_FMCg`N<2%mOEt@l6Uj>IoO}HT2G#zQlCCq=Bg{CXv$-=Q_xS*wfY|T78ovFx~r{a
zaHB0fk8P7ZWniPk29W$hkd#yaN2fLFHR$j{$jRc)Y{NeBHp8?Ge;AX)_8RC#(NXZ0
zg!b>3l|Fb485~y?N6KY-&4bNY4wDkeLjy000>K1sdu5^RvGIAbqA&`8HOwqmhtTBG
zdiSS4%B{n>IWMQ3OvxWwN_U9iDIn!wu%h+jZ=v?Yh0-vt%j&$XWGY*-DJ!*@;qc#e
zMheVx8;Q4^$oea()KMums(YVBmmJtFiJ_~{`{mr?x5nbc8;ksd@Xz_sW=oSrU-S})
zlF2y~qW9aGDsE>~kZGNQi5Z>L;D;<_*(7@;V-hKaOV2Ev*&%lm^<R$uE@y05U9+&i
zEeUnOGmlDRNf^<}AE`%_ZB|o-(-u7`-s;|#zjuFk-y0{unixWk4+%iXYI*hac6{q$
z>aTd?R)ow3eR8U024Z4NXP)2;pHpqUC&#|F`>%5W_8ew{uAstvKPEZ8L7ob!)0_^T
z3X|9-L+CDQCJQ-*YO2;xo=!e_&Sze(Rz8Bed68A;MS3>x>H+E5;JT&jlq0e`n<m5W
z_T_TCVfyGB;_KQuuPau6=iHy36^~DaQ^7|B)qli`rkw8|zDcii`$wdPVhOmD#Bx>h
zv$^NpwikAkjt$;5CvDkszG-S_{1?h*@^h#+2mf#F@2kyenQaI<2_gR8a~E34Lc3oC
zI{qv%c0`*3uP09rNeDSo0DUx5f=cT-XE*_q7?NJFCrr0m5sV`lVS(kC3>%U;SnFzT
zt<<7^lW1Q_-z#jcB-*f*C&<kKH~+=ov_JjsrT4Xn7|9IYlHip5gbFc3<U*WYHELr2
zN1OE1fXtf6{9q-0eTy=yN>7r1lFiuGb~Nl}6`2GdL;JB*0B*e?eaBAHU3|~pK9jGg
z$9|opZX4Zdr!AU1He+tGMXg1e2-^?W^^Yj=SGK3fzBaqEuoZ&!^oc_Ha_XvvT*{Te
zQd+Tb%CP$>MI0Y*Ldj#&g+!qyzI~6Ac_+t_HjEJ2*Llrb#c+B$uCPSe*f~-}by(4U
zaaHT*YCMMUuxt^#!sM>rI1Z)EDDf%$&FaElPCZL2lX&bXYQ$<mj`LS@i2tp<RwO*B
z@9`6b1u-?`>7>yfzh8YdvTgiqOqpu_i;of)(NxShNR>RA8XtQC#dr^1bxdU(1CEm@
z$e#@gGa2(6jtue#vn5A%$ua_Yoel?&EO|*)hEBWWZ|VwJ)KWQL5SU(J#Ha+w+99ij
zi^{YBGTjO#G9gHM%&e5a*u-j*B(0+`E`S2x1fw0l)9OoS$rXN($Qp`PJMYswJiL=|
zqZPT%toGZf`wa9C<7cv>vh65h<nVA3wvy0La2UbydsW5`1*#NQtVPV#IaK4>p(J(&
z@R56LZ%p9>qA{Xj?Ao9><takjVnd)U=;cK1+^#*`dXUqi_S52HVNGSByN$(S#Uv@i
zA{N{x3MD1Qx@|hdoKtb6<E$$SUzNl_kx$T+BRGA$N&nXpA{#v_Ef3Wbt$M)c^pJ7%
zsvx+840m~-RYy7hxWWCSJDfdbCzEWss%qSp>|#!G8~QJc;&+GDb_|zMT?>h?$1EIb
z66LfX=OpRsIomG-IFn}X?y)@SD)sPoB&}*6vELx6QKjsT9i(M;oIMn%IM84hiP#?B
z7<L=ZW{mNxunm<>m$6J=DP2UO=Q*4$as~|lQk<GzVfxLNcc~ob?U{F|TP578UNO&a
zV2ODDg#l9`&mTf%=Q-wTL=O`m7)t`n^QX{?wRv=Ya^Gh|l7nU!4~6d=eQt3LdDu~)
za3ywfAk8-uv#z``>1Y**EM>oOw4aI{xvY!88%#Jvc?61NQQlrAUqW>v!%^v?v{==h
z{qml!4G+Y*S{r7Oh>Jfb^_sR)%u4nB9+#E45t7_GCA`^vU7X?2P2|qDv}2I+&U(X-
zUxHpy>}wWorKBqvIllTR^(zra)$R=j2cO!|Bq6Ryz0d}he~t-!ryXgYG5*E9oN<hQ
zOS#w8MMv-b`8Dy+Cf?LR{D)z2ISd7b_l3+FC{U~W*xG(NT6UPS{!>b+Y|W*?Fy^jG
znalctY1IXFC@_c&vi)f{)%yW+ixVpkm!=qi2&-9Wzz-^~9(RE^fN145F!*Ya{MZVk
z172|@mt)*#3YVr-E|;@$x6H5tn6FB#4%HQDKJWOq2#7x^OzK`f*d(VZm3J?MoAcw$
zSSEOS4jBpP3@Fi6HO3k2ZE^;)Td~}4o=3LdSM^gAb6&82&dqFC|B#RUmSHf><%WU1
zX8NHfu4XVNP9%Dno+zt%tISKgg!RujUO$EH#n{g{EnYnjiJq>Y(omz(zKNj^tn8WU
ze{9{bToB5L106Ol7!wvPqOhRVoAr%6s<=+=TB8&;q~~_KqL{6@{EaEq8Ww*W*&g2i
zd(TK^5;10l`!F45%T?Kk<<=@?9zk2TQJ2_EKf23K$8NsVC5%lZ@!qZEkRhzk)I@Y1
zqbhWJT<@sgeK_p!g#>hXCAonse=c%kQZa=h!ms$PdrIUsi0_NCYPm6XBL)&hNJ@(2
zpcn$;y6Z#;4JNc|0325)&M#RArt%vO!%T?(rOI_sU_4f8M>rO}IV!AQyecMwxF~@N
zwJaNd7_I~cHNHX@6E*N75SJY^X{wTh!qg)#?$sEBa=7kurjV8EG`NMq)V7xNJNhAt
zMNYm)8XkOfed<R9O|iPx-}{j^(&*vOIX;_clPu=az&g>tKlRJTC5>9nBWQ(U`GWg}
zyAF4}N~kJ3^=KGz>XL+5k_zM|{6{cw_$G)_n%lnnu9M!;!#;>$faT9s_~wT3dV1Lb
zo=vf$f~X_~JAt1Vj|3ZjG@M0jtXNlUKSrQmv?RtVGaR0~93icyIA%%|46?X57G{NU
z?0)`r40)y7tQe(V3jb7h7uN3(egnnPAX-0sCT%OYSbAX;CCE2JR&rR33Tk||xJo7y
z7fDP`WD5$Ksw_jr8B`OL2=>}s`_$^S%e|$O=P>j%UXs^hBBQMyzDH|cCB8NU#-aom
zUp<`s8?X(&MkD?)jJS1K3nRuh^<(RilTl8FH2JTq4b>IS7*Ppfqgl#jcBr+`x8WoK
zUAalQFjL0AE|EdYL^EpPyelCE`tB=2&0O5E-h2M<X}+-&d3Hkm>eK&O3^OY-M@QqR
z7|H*-G-Ww#uiT&~SqY3IF7x0DKdB;?@;+XVl%ia`YwYEecKmX+AFE<+$DuF}E0~>4
zuPW)&czD~wW*!qi)0$)ggOE%_rib_4?iAAw)8VS=MlqzCqJ6w_XKPB!03X`qea;Zc
zKBFQE!#5}*iN(&*3hF{_dQ;5ruQ9@)*GU}}X%VEvF}^ye65d*8M*916K*}jAB<Q_i
zqbWE7xXRq;hnwd`t0-U5N~DHDNw7sr+rP{2Xlsk+f*{CwrS(0+voTeVemM`5AsYW(
zjU{gH<jMs_q$yo<d3b=0g>!8B+gAcAQ)H?1_83KSbbc_g>}mDE_ik}#CUW#&Luh>3
zk^Hugr4$TxAB<y}Dtz}|(IHFVKAfgtDbnaA!tYDgcgyheKX6h?dp3F6{E`zebVDQ8
zo@mLn^X;s#TOVeV1Cxz0q<$peA%3-5dRk26vG*d==*)Fz)YLtCT8}O_HQ?JN|E+Id
zqxBfN4bKy_=pl@SQ++w#uj|}J;LWuXN1o-btDmv!8JT|hYn{Ig?((6LMR%9^!HovF
z-AYY(MO(;F;32YKJ%b}=F1}IhR&|2;Hv9XQ?A1@zIRSUKvPE^j%Y*U;e7zQGq=hfL
zOO+obk3@fT*U~LTm1Z@(7a`JFDQDBRWQ{;u)9CzGH_!lU9TQDE&PJZsA%1v<u6g`?
zE|<TuSBP{l%CARgDlfExPlQ`(&pB(y_z5L%bSm6Yi#Fq!tjP#}iQ#*c!m0khGj}aI
z!<}oQ_JjT}&si=ugY56WcYjJNl?s@D)#{ZMGnc0DeNK$3NZ*=?tQbdm=DU}f%}I-$
zzMdhswpXLz^?>P@#pUaS_X3vo-gEdq%=%u9@%R<vaiQIH5JRe+g(Ookq8=l=?r(pb
z3$3}NNsk7O6^B||YHu7qT)znlq?uNlX7RD?H@NH+y(UvSe*Ra{vK0DvYP#Ezb;@D^
zLD8l|_$J@%_N(=u3gYO9F*t%4%~)-A>-FdmF#`WPgICiAi`^o|7y)d_1_epsN{zY6
zncSw&$R&yx;tUitoN*y?*g=(;m<*!W)~5X=PKpP|?dHE^ktkeppiK}YF|g9Il~AE$
znFto@h`y~N_#BuSGi=(L!C_OkzcELI+sDS0;9#%9v8$RDH)j7NIZ2R&-vh0himOeM
zlzQ}*BhLT<iYOa+)%qx)P$D?FA^HdJQdA55)`afxKpRYEg~q%(=rAT*Hj;b`t?Boz
zf?XRa1&;XjfYjg{36q1y|IPxC<M!OIy~ZblVlOSp)L!am<HSGcyg>Mcu*t7a*3I7m
z_16Duwk!=>W56bswkgwVN)+=f>S3nLsiu~cx|TicuEfkfHg@)1RN66ozsL|mTB{)G
z)Jzh&U^NLul9cHmNhI>otMnoBQd;GUk6nQYs<qYS6TebKQKF#wKf;M4@?e;U$Rfz0
zWQoT3N0UqW!7)WICtsoV3VlUkXUq)|hnLre*NqJ;WZM%uQggA}J6uZXKy#8vd;Bn=
z@lFN>HDJ2OYN%^4AcRRAsyrUTP({;(h{-5Nfryu=8Z$OH7$ApGm}4>#s_M*nKMDtf
zxb7x!QPWS$EblykOn6A1RGCsZeWrJ)g7i1<5WtztH>5xC2kEe)CUvz4TaIPyc|%aB
zt4t}VdkuIaTpG55bhY+drlk>T<JsV+@@DK_!)<UVjVHpNbQY+8`l>=lk|HnAll*01
zX8tBCiZV{F$YtDgUbhgg-cY6FUd)(7E=6&nCv*8MTQ)f6^eg>Z!Bh1IKj$SgvD*l>
z!iE?gSe@+{Pno+a#D0CpFA9@a+2mX)IS@MahOwu9y&0zN$GOL^4d%_8M6e_1_xD(d
zIN(b-!^(%*%|o>RqIPHw(EUmzV$WhVqqBd7&l+Omx`Ntq6AD-5fBH~9j(}kGnZiHb
zTraHlP2zqFndENHUs~oZk{U!~SlsZvc=JM8HXRZ&5_mmx*U$C)i}!ju*p&3p^8Uas
z^NN|}S-YZqvSe}a1oy}a&0mh5rj4-aeHt7^2y;86oSd@UT+FVG=?g71wB{nNvG*37
zrap&7qN%r;F%gP&V3W5AEL)7RTtF?Fc~E9MQ|;RXls?huXs_&p%%P~KI5;eEhm_9(
zBG&8sa(^5UQDP+xG+R=`p+y&9d6stB;K}VN?YRQw_;#my#-Fe5jD(_n?oPc&p2p&g
z5VJ@2Wao&~)=*Set?2L=@-ozeB^H0!T<c?F8)g4#{L(yQ_ujZL3Z~edxrIvLs*pS$
z2eKCD@r=a7A-qG*qcWCDA?Yrlg28x2^nOQ5X&8$qxb(`l<?mZ-#4^I0nrM7YrFnu6
zA2#}^yvHpPaM(T;i;aee^M-jT!gQGKISB3c)o+s9?U&>3j+XnC&st$~QI-+n!{!?m
z#*?e;*tTK?+B0MQ+^Gv$)r1YC9jA4!Nr-V%I&2dI2(6X$CQ^mI_8c;Mpdn6Tjh#|Z
z@<EmFb2v_e6q)(K&rJ;&?39h@1d;gKdnU%>hc8yuW9eu@C9#z3Ry8vZx4biWD+T8H
zaiL<2c2fKyE(iko=8)=epV<i6=s2ynG*qleKeE_yYN)a!<LZ9@4rG6Cyr_y%TjMgj
ztJPrv6_dYUn~oM~bPmk#tYe}g^e#GUHLce+aM&9@WF)Ig-_}6+P2|hQt{^l+GO5fg
za}(Y($p!Z?zcyi{^zZUx#{La^m<S7NCBi}IHXOii$Q*&37b`AHDXxO9J5U;>VAF?%
zc$BJibh~bB(>2|4wNH`U;RZ+k&bS}$E4>R<9nQ|EHiOA^VblbF3_C)q$&QUczV>3D
zOgoez^Q8YYrZ{h9YzIMkwi|vmWwEkc_*XX^E(u1O+=+>bSIAObA@hG44%+d7{ud8W
z>shzOk+JDWQV>zNeOa}F!uVDu?rqJeW6<x}e#T9NHc7Gk#6T&xWh@2T{Js3A$HlB?
zi@|=>NejKIveMhja)Xmde-%juA5PsHMeEa&@zN1g6w>}YX?yE2yzCjPNsIZI{9e+L
zT<PCg_myIdu{@1ZWRh9MiUlPR)s;mCCuI*FIx-fkDsLMH=B^WB<VCaze8}TST@zOr
z>ENegL{d3=nx5ZiO)F`sWnb+DPnDAh->e*BAv&EIjj*)n^&cwt2ezSJe30ykPBi8S
zlV!J6ku=UU%Jey>>gHLbDF^WSNmx{lA~#-K7E4kRhL){n!t%|qNB`H?cXV_*2&SfD
zIE@&47?bR&e0dbwer9jPUyWc7o?^Z48W5r&hvWZ8jYyA(xa0a1K#nlpWm1)c(%Dze
z)<uqmQJEb?35#839TI3Pm%7Q?)1}GR-lf$6b1)=MGc^?Z4KucYXk!w?t7qc+Uw5xD
z-Kusy$Jf?JC?j8jD$X0$GT1;?5%kT~=O<{=g|B5kq?jkScs;=<QSiiGm*=y*k<KK$
zTt8eN8p%4~Zq9vC7*>fkDz$Zcj2=)|9ij}$*4y}6?p|GNj@eMzo48@F_~53uX9xy+
zH?|>_@cY~m{nYL8y_e}TU$x!1FK7Gyy#Ly2%AGLCQ(e7_z0-;>k+j0^U{O@^clTc5
zUV_n31;$~eZSF>l$g*W_3HI~KH<_pY=G#KM54B!Tj%ATcyt8j>(u6&&Mxc(3ZJ<}9
zl=%s|NxVrhLsGtx`+Z;<HKn2!#bV;^PMR_>h^G*h?f;F)M8S<hrK!*M>HCA!Q4_<W
z9KGQ+hpfYiKD?xKpa}?_Tm8kOZ5AFvE^1B=#@OIfY1k!=<wd`y<f`USQJR`sAkeHw
z<vKd2*|ou0wD1@;RxW;7+u2ZIdFNT;vIjXuRz7$;)nvA<`PWq$i|ofRi4fm9-}+zE
zsoFr|EXIMzIwh8=RAEl$GVV>GPSpx?cnkueM(z8}*T^P0&}s#LAmy=5`gYAv?fO~c
z7Nc29xA{k8xLx^WILi9;)y-^i>a4<o+_M=T*B57y!rVyfhP4~fz1wVTs>eb@wMc{#
zw~q&ld)*C?0py_b{v@&#!a*A07WMi{6>Q0x#EgMiR^qgS5&q+JLkQm<I_ZMz8Kgr0
zlaYsqbH!+UKwOl^zig?I@AHP*rJjw_<KM|hoBycmG4Uj1%AafCJ<NMn8D<idtTK)q
zBT9TaIDnMwGWK5|lX=~Yer(ulxoZE3qi=g>Qt+LW(C6Sp7ANI9Ye`zGrZcQP<LT_$
zTBj#`GdaIc%f~xNDE?6G@oiSFr1@ZP(u|U!zU}SWj_6r`r<qzdL3`5@J&kBUN=RU)
zw#{3b5EGQI@$c8~5}9C~_wjCdfcsNC6nlW)_i@1Z8nCK7L>|L^a+wgb@Oe!R!)OuB
z*3^%x60IU$z}JEsLvDe$TH@0Vsmv@HOA_u}r@WHpoylfF?K^1dCBA-TYCM2%ih<nc
z=3JAljpY6HK-9QJ$&EN%y>?k1##8`M<f49Tl9@wK_7=S%|J*qUea<h|`*UR9=nkN;
zOlo~Iw}?@^Sa?hEM}5Hl`YWwPm;|rB$mY>Q&%$p0?Ay3h+>Zu3u08RhmH))+xxYo8
zi23bM*Wg;v9e3-&V`Fl5(IF4YkdScw8JX80asO~I*cH2vd~|ldxnzVtd<2<h<xcvp
z;nvKzvuk{HS#zZ;8gX#cbrCLM_uj0%hLgjWd_vo1q@{LR`z-KRkG=qB8cq@RC34{3
zsh&k!sk(LK@O{q1#pQSveIqN@7PUe{d&w38!JjE!K5c*h0w$s(v`7!_xA42mbz`@;
z6&DT-ZZ%S2O^{^b+B^~7_@#u!YEIeFtiqvMyk(X;a1Lx@2m6sq=3Z0yw>k@NXn%2_
zrlykcLWW4Zp;vxan0qr{{hi5FG5qVN(_}5Qw_2$w4EZiu?=$RW{I4MX2??*CU4D5r
zXz(wfER4+;et8nLx%9|wS%sOs-^A2(In4hyI>Baf(lR)S{MCUW+0%^KZZ8w_%jL(L
zrxqb2Qur*7yp{_?8at_utr!_+Gvy5vg|&b452;cxE>`oeG7?5(bObc?zLXtkQ3<f-
z8{+pJL71Dh668of^VMDWB=jA8v1YT`u@zzE+Flxs<d_KU--aJIQ@Ry515J_<va7i?
z=}@G|7}@RJU&i8?H@j^&Lu|fm-w-aPc&!2@(9OYH;_$qVv^tuRvPN#jRTNA}^>7$<
zl*~nr^!McSp-8@;BJ8`VVRN~>QM=tu3KCalB`Yo^Et7VLC#<1MKRgymhHftlo+>V>
zncy~^zc-4ktoll~#qVN;L|2rHjEd19CBklXc>mXGb{g3c&%K!ouuYS!)yQNsv>9aO
z97>Mvmi_#r#dJkApSAAaKMU>lZ@-q`Cd%bR)vUg7nENYhiibxeu^B$JdCh3ZO)dr-
zJfSv%8lbSIZh!f;$Ov7|cw?>Qz{O@$k{jt)joRhIlZzYw=bMqdy-$bVpD#7=y29k5
zTQ?F%ct72%&Gl2WV`3H$RlTCJk*S%}G#yon<YOvmI0!N{@3?qK{ZO|dQ+ib-=HTeO
z-q3!QtLZ(=4Il29^gn-SJcXe>)1N|2%ZFVbM&|n~>c=k|8#E}|st&$yeaVF$RLJ^r
z=<F@Zaem(f!Qn-~!#U0D%>ZUMh{_F1eVOYhLbk%)35d!JqHt1!aEP&RFgWd22DhtS
z2hZn81C}GwrHu8B6uod1{L;830u%0NO!{?c@1oj`QQmtYXM5_Rx?UVsv_V`%NU*~~
z0^_^vk{Qt8&|U1~_|T6KEDJvgPX)PsK2KzI^**lOo*-h)op~I<Cl4)qfX<_Hx`TPE
zA$Uq|=!1Vv(ouREWwiEWO*{+x_usyeq%UdyaJKf5WEJ#B53)SkkM^Wm7ET|@(*K}`
z!bOpawUCjeNmT^)l6H+5EX9`i3O1b}L@fu;eb%oJi=iN%MKxKw4L<3DrN=uR1MVVg
z@*}@Q!l_=fNB7PRXfv<h=AUc`lh6MoblrPnU`C&aVE@xna_MSMkY^8hXs%B4PI}@#
zJ>#uW2`jM?Cp2MrN%y~3@NZZ(W`?!mOTymRCd|Q8(tcDfw&yacQ{|m6tP07Q{TIh?
z&mx(BI=bLNbm*t#QH8cryIdiPV#kICC*h=CHbkcD`*8U|*YULpT1Tcf6Nptz82Dg@
z(c4Iak0%akFm&8MhG!gm2Vb(SPfxjZbF55x&IZh<TsC^&gnx6ExAoqGRE(V&h<v$x
zoYU>}y=$omUAxfBPb@1@ui>F9!(l{KCVIHL5zpNc`f|MEU-)x-zkZ+NYs$Rg#dLpe
z-wxdo(UMSNWHnxZzKGZT<oclM;qHC!l_EHi+IbsZm5{PI&nohhj0_@lzUtAi)$-Kt
z)vz~QGHH>tCxec*RK@r_T)6q2rIaAqVI2deCwU-zs!o~tkaR@&yzM5pVej>}W`Hbf
zDL3n2;`QNQ5+N=Zr_wJ62Vrjlu{(ETo^r@hwz1Tx2t-0ySk)mlwQnRX%M!|%rb+qP
zyhdD}(&7eY@N7MMs<0&r`?D^d`iW7Ng=$nC@06EB5}kjCf5KhBV-@007~wlkGc4rx
z=0!1j{uqOj-r~y?6Aj9&VR1d@z$LFT6WV6nh!B^OdCU30KyFcE{q)1awZ$ib{r>x&
zak&9>Du$iE;+LMSe>mR#@Bx6L!{)pbnUR@Q+jnq?%#;=;CZ3vriJZT($U)1TXXbWs
zV_neawMg1mz5L_u?ni1eJYHR_6~qvuS_j<A9e4hJvw!NGuH*Ihnt4f&c`9@>?5*qW
zFYOwPU$+#%R!L=5@22lB*07Ts^0uO0FCU5+ZcG1Jqgo_%d)uA1bnYhJP;sSo{M=}i
z{^e4D?&;2w&X-HC2}z8JoEaLLmO>ke!r{{;0XRfgk^pA0#(4!}%WPfkZ|y4j-EL~=
zzL8*O8S1Rw<@|a4x<IqVWsn-Ri=A~IuV>gfR3a8o62X*iGr7H(iJLIYz4(iatJAiX
zKAyGKtyz%GgSzU>F!y5--j_eZ3l2=4v>ft|3Dd$w%EAjG&eh*dQ!MSC*xE;HGD>-n
zmp8&;AFnMU;1R=MnlBu3J!;vn%%5k?ZU<J3m<{ZB@S0LGa3#f+TKMp*e>E84=`s4)
z=Na&?;QwiLJUCt4dp(uwJ|M<9FZ&ZQV9}ZFx<}V`YT@!M^KU^)#+$uxL+@d|$IQf~
zxSw85nd~#^x7|frh@5JnlLb^vN>ThNa>2ND$UeIYOJy>b4Oe@oHqTjuqY~bXWYb7w
zTIcs_w-O8j`S3?W<?HJW(x3W?K2vXRsIUFwXPiIp&RT50=x~G#;j-H8PodjnS@vWQ
z^eNF;_!t(9)aeyJ4UQncr2*HYAlB<CHGGJstYO38;R%Z|9h5NcwZKCJ$*JN)`4v5-
zC_c{?F09X6^U7KhTKRNo8|BO-F{D*$|DsfyPI~6oQ#Z4ME6tQISz8PY-x)F**%>^I
zg`o{i%729o=1gj7)^3<YFficLn8_mWq|8Wx7(NX{%*Il^+{k^&xzEqV&A_RS4eLhb
zNRc?@NZq8Cx^exFkC5^&zp=QeT1i=2G-!W?=HGo=XB06+od@*X=Tp7#+sDFF%RAeu
zqX(i~O?LV;NByU<zYWiVMuJN&cg$J!W?_cMutg({-JFq&=^Hn3x9cg=rwRtSlDi7o
z!Yk9zZ<Br;aw}^u>g=%j3$)X@z}0%Jr#wA8O)eJOUN*Qr`@1+8aWvAsdr-+pO9(;;
z&}CdcD7NKiYi#9<PlM<`uGyU030du}_y1A&yJSR|i1_)^@)gNmjLh9Z#9k|1T>8h@
zuWk;G9PGyJDs~?^<Ft^AIZMXBZ6%PE{JbyzwrQ5o$J_jhik5^BxafZ#fH3+IA|GnD
zbN&E`rpC5p?R6FGb)J@R!SwcggHZp03x|L;I@u7S%<Q{_6oqOeVR=HI@{v8Yf!?=o
zTASNq@6j!O|0eug@3^t{Qz2}+XoUsN+f!|v@{iM{YLgcf6i41Qv*l`{`VPsWhVfFj
z!GljHlq9u4s$Gs~K=cQ;HXlD{?h^yEFW*=ub@J@Bd4*C37AvA^!{&Jv@(i;Ju8-4a
zVMyNE?xW6rtUI1x5U4z!smrgumoAYYIw0$5zCYa(xY^=m_dysDgyRidJ*wa#X0$rF
z;X4uvdA{*}7($04V--eY5a6%=&blVj471^6g`Nk08sj&8bK7{+j{b2Ik$Cz0QYW8J
zMgA=O$?1v7Z`{)|AC#M9!gf(S3itXVXA)HOK|W`Ud!@F^)70-LCC96M*&YYb3g7jw
z<#<Uy+dir|4q=NCMF_Kbtmf{T59_=$u=~wZ693b~!{cgp{37n(rfp5u*T;tbh_S6Z
zPnGI9kAuR)OSkRi+GhW5+gd9+Q=#?}7WCRizT_Jfm_RT$yWpf4j4_K9^zO(eY>X&t
zYeHu~fSo6qkW68SX(srr|KB#Uk+-!`*5VI?Qr|=LD-o}elSwO2lM)}-tUvBe=unut
z4xkU&{hqOMe-jg9Ga=%WRbIivl{bIFGNRRTb>=UF7RiK5bev(pEn2aa-lTspGGB4*
zn7fwNq6^`#)OWR)?;u|*a35hIn_F;Xbn;kfo48i~9j`l+XhX;HTgcsoa<lI;&+T#N
z%q#aW2!cczs{krX)G{ZE@w56-;eD@hdDC<gpO()<-wxGvL<c@4f0Mf(g1oKbQi(C;
z`AxIgcM?(^mORk-{nnTt5#$q&c4eaJ2_CmiNQ_?Zzwb@=wpv8LX!g95=dN46cp!fp
zzfejBgFh^wyeQ)KQHurPlF`wCacXgHb6(VFefM=Q=m=fJACI4JJCQ$a>Q2b>pMh_I
zrnH@{QkGya^POc_12R=hL-T(C_CN{03~WnWS*TSTeqZVahf*2Hn7WD;SQ|s!jl1tn
zV0~Qs^vT?+?RLkr2-l%?*56ii|G1HZ)$J^hF<zzOH=0C5LWpG2N~ib}N=5Dj+`*2c
zcPN6$zts4+4}%qd4U#OSq+)1D#X^Xs>cY=X{%|xk@xXxxGctR-GSxwF;XnMx!qU>@
zAN^70#TP^>b%TpPg#_V7_>7YMOmM@&FpkNX9(D4EQuVpElUJp4je#SXu&r0#DIfZ;
zC&!=acZ)%*;1|!-`}TB>;0)bp>AmXkquI$92K-7;n`^nHj-x0s{ARfLM)iUJern>W
z0k`BAPd5N~)o}GhO{E14NKw!Zi)ZTXg5OwjbBEFZq%D@;uM9ub2ZN!9`gXlM<ky1A
zrDo|&ed4JBJ1ct(Vrs5>YbR?1hqGEkSfY5kQM=XzO#k8Z)C+^k5$#p~+D}%R%k8lz
z`XgVnpy3Cz09bJ+ppr3Q3_p^ae15>MhbzY`wfXkmFOR0j6Xj$xyF1xhbBSs6vD~f~
z21(HHgHdDN^=e_%4*go_Rsxj}Dk&zO%8xysx6)$yy-H)zm5%T#!F_+eD>Id_b87W?
zt?+S;rD?AON4~LZ<Y-Q%W%*nqY=)(xU%k>Edopi~u`*)r)#`z-MWYYr;uaY>+6N}>
z>#IL0j6Iv1dSL({nGhy2<%^AW!Q1zhk>R7cpc$>6s20!H$Ffo-#qdLYV^8Is0c-I$
zrIAN-6VLP~hphQu728G6PAhxRntXn6_`z&uGI{m8h3d6dcPE1kjI_nn3qu3<ru|lQ
z?ynb`EB@NqdUMee4#$2c<+IJ%Okw`lYhhdO`$wY_PY)#e<!{+)>h6*bJ&+xHA}?$%
zd|a<DxV!#fD7!aNx!la{P1R@HeY;auR&DB*0AOT-#~#g%Kbdb%JLQXw%H`(XFOQ@~
zZ7}Eer($nMXj7YQxuxK~e>U2GFr67sRA*W<zbHKNFZPW-k#oyoP>YN<t);+3$nQ<b
zgvd-Kl0$YU?z`!P=ZKATuN;`r3_Xw?J=(YYLG|h@t9`pueS4DY-pU5#H?`l0`t}GG
zWV**x7z1dGKAszYcA&B77EadvMmYFDHaTo*U-utOdG!#+I0MSB1}1`ZM9>UZKdP=C
ztER?mD=mm26M_h>74Pa#R<paE;RpLj?I47|UL|jT%RkMGc9+1qi@W%}2LUqP&Zmxq
zrK7`Qve)%RpD4MT!q$<;TxWZ>m(J)8!#%6XPgt~_$2$WuluN+2P^th5i);ASkI^@Q
zuYaLiSdBRFabE&bVYLD<P=FtGN})Aew{x-UbvU*#LbC-V&<^1HP;zU&h>*=-eg*IW
zg6|`YAQ`y~EQP(3n2u1dV|fi;fG9-E1r%HlzTfo)?4`rDZHe}9G}t}^7=U1uBc#PF
zNhL+Q8cJJDK0JEryO%Ega<;bIJove3p?E!Y!j>>maf)Wk|M7_%=SC*z4St~BzkaLH
zZ9q)6WeJjjY2Q?#n9%F~vytDez!;>H*8Zu!tBZA*<#bZ*8SguHt!OFkOIgg6v}HbR
zxBbv`_nN#QinK{MLJCrXt!^KRXMI16f>5U%DFi7YlwhOLc0<<<Jx86tR`!DE=)s|(
zoU>ADv7psrvruX!ZTZZ7qqZfDAzKN-lyv0iKytoN(AxCAC_xxC+hL^5+;UwC8XHXJ
zvK<+`Em#ss#iIkMQ&&qtq?KS}v^J1}m^W(80D>r$kW8?ZOxQ9sW`lk(07wXSY$*jd
z+rjK|?eaoxDCf9-)ZvZ{l9qHV!3@U|%<KnIt?lPCR?1NXWKz~xK2dA>jdqYusDv#A
zv1PH6?D@J-Y9(y3u-aHHwK557dNgaR4fcU{3UCo?eLw`U5zhEivsark_|z-J>v#8!
zN7|{#cRY>JLzdqHMP+XL>RbE%g>Qaq_LW!IazrlI|KbaWzWDjf;9#c|y_6dDjJ0O_
z;JEDpK20&VWY_!sbuztCZg+gI_}xH|ov>K2a+GA|SoUa0C}z-n_1f!;KYVWV5BE(T
z$fRx?fn~I=9XnPycP{zR(I^N3=rA-<1R&NCv)r&GZm9>|RAb*kJz~piwFNgSC=p<c
zkx4OhUv}Z0a^z{Z9P}SZd6gij>(qn|KsfC5t5(6&p#g!2LQe;+(CJfg<REGHx>-@^
zYGaH_3l_wZg-w77+fmR6L8f%B0b|CW%B6;FuNIidwAZ|_8M5N>C-W*Pj4|vmE0T^#
zPb4arThX@DKZ8aTG<4)f#ZxsMn(?P|c3v3+VUv)=lG5W22@YCXC^DL2)-2xEHRxD=
z1X3v>B(ufB5jxc9{A4=U5y_-rMLIC``S#-5<?M7KY(-#>9hgb7beIH$BiN$I*G5BF
z!WpnYu=|wVue@p?6jV~!8A$}8SlAr;+AaAi$&GopUGQ~ard}QpHjw}s7}GhvJg7(R
zT9D|sgd<F7#-Gdw&FI?CR%AjrIg1D?$?1ti(27h1Vz6Q&k`CK_%1R4jVoL~Hm=JN8
zCz)Vkat17t6u6Ocy8a>nTaXDRNx~7VC~Spp!M6uw^;(OVoPMiszm*!b2Jg#OueDkQ
zzkI2V{i(!&eWQ1*yB91RCdn*WCWQ_nnGpT=WES5pub!+0ttdO4*xZHUdx-@Jhe?od
zgq2f%J&gGxj(1;8XJbETget{$Mj8Vvs}e)1bh;U~qspab^;$bUYUx0efYwG(H*j9J
zU_CP_0c0YR9Iz&y?jO7_V?e9Nt8UrP?Mtm6#X~SA(z@r6Vv5rw1A<BlmL#m$vC(Yg
z(osRu|Igl=23vMr=V9O4d!O-+@4k6{J$IwgjmA6>AOwH}DUzVX$ze&dnTlkU6Wb{#
ze^Sm+<#NhSxiYv?m7yXhN)*Xul^j`?L`kG1iX?)G1OX6ebOY!{qtVmrH}sp|9nRT%
zuazI?-q)`O5DlOil5&3pig@>)d(J-l?6ddUYklimI#90V*|TlA(onSb3uEIC_rLKw
z7v^7Ct4yUP(arhJ>2EHqzuUU)S0<Bz2q^NeMda$Y`?3{t1?@pT!WQrf+;U*u!Y%xD
zfC3bxOC0ZgXu6UvJzOIw+EX)@?v7*Iyh&SsT{QuL3SSU+Yw2w@Km>e^aZ=3QEZfww
z1RfB9Fjxt$(4&<iQ~mNMMRC9FRS36Vb-N=*1nX^hMqeFRKr)2KA3)AvK{*AQfI$?&
zGgg{lhutH1dJ~8c1dyU4VbK-=xYs_ue8YtTDs!&|Tm9;>wvK@TAOO}87#IcX(eB{V
zGXC^wtZe`xSWpCGw$6EL1fT!_AOJ~3K~x5TFyH_Ig5JI9p0HRir$IzmM9Edot)3gE
zsPZPlV3uBGGU3LKN>~ezfBjT-tW=+_FocT(X?s3H_i@SPJBI+=8w(=6s10O`3DzsJ
zC%mmV9$nsZ($?-vAOZ%M!991!Wk!_<IYvtm01ohin9vazLfm`CBE1u{Z1sjxDhNXo
zC6~uacb)u^Yhlrmy>OHi1;_+fFq(fP2m*M9(Q16K{OXCtZ=SmJz@d>wHBOUov+dS4
zbBMGvx3b<D9Vh`^pJ@~*0an*@YsGucGAF{!D8$x7t+i6iYaQ>|I-ffaKr3Yw&5Twb
zzk57)yt<hOMm0M&3e3F+hX-oOdNa?Qj|0=OF3TN@SnJm|vr1x!AS^6`E}xPDLxc6?
z!9yecwRn9qcPy185a!IWXMy9@%`6VIQRJDo;&tfOXcLw~H6P64SeRM5Q$V*vd&Y|}
zvu9aZ&%SqT;qLu|4;>yodh*ijVw1gy2n#%m_r(AcrIChaVWr*bxN;h<H}i7~%}+lV
z6Xcbp)_OA+kxuTj+#7{7G36w@|KP|_Bi(H0BB+)^<{}admcm}_j!e(Jia@;~i7wq!
zidpMDwo3#EzyDz9#m4!H3GjnLSeT3Y)mLA#5M1gO2)j(4EynFuZ~s(7gu8F2+WsN+
zMrLoD#t_JM86JUj-&8QvHRt`XU~5-i`Qf=g`okiglMVInc;w+;t9nzF@4EK8HnxIz
zz!XF4J)bmROA^J_LUsMIR7^a~+w#YYt1_^~7k-af>AhMa@)5@3d*J#Py4hFxaf1jZ
zG^9u?rASGS3qVRiAW9Ixdw<jWFV+->5fMRwt(|-6rR9?+Nhu1$XnbP&?z?mpp*K>t
zrmt?}deT&K)o!o%?RM&*T<0(%2yGjGTjs`<=>Ftpxr&Pk3M_QZ1XaviL{t=$?KQVB
zJ&Lz(tAO1#Rw_ii5Kx|s{kPruXPUL?q&$(hoMC8lwljRB?sIN0<w0tc23Ukehbk!P
z=3K{TA|kHC0E&e{M07;rxpgV`Uf8qm_#%r?N2<OnoqB9QhpK(a1|@Up__`_Uk-O`C
zyGwb~yS5K2fq3DL7ccC%vut&&lqL~KPYH+s9gvAsV}E)0uDZ`STXX41bn*EWmvONm
zW~&x}i4{A67a_n=;bkZ+C*kf~$SmTy_*9C;xM!A&h<Fx}Y~7yz#{9^AeKVgNT7A2@
z@^-7c=!&oxVG%DPAVoS-`G)H(=V3X`)@*AbpLir7h1OiBwU9OTmVM5y?Uf>vC^Bm7
zp+;pgwwvCyeN+#+=OJPd5>bH)O1iUTeI}%_cx;_>1}NWjowdA9scrl9ciRJpN~A=%
z+Zgr&5iV>9?v)i5_>5i41JYT_e9k6Py}RsgIM}f`0gEt{CR5VNsL4kM;zp3KIT0z1
zM2-EGjq~k`-&;BUTNj4!sqX*kL{QT59wJR$=T2ewc#*<fC4i+ioy<PJeCE#=D-%&V
z92Lvj_Fpo<PHfu);yc_~&eNf|I2B7_Hc7FY(6P?fT)ytg<J8))&AF^R9&KLeTzGnA
z`pJQ@2m3BOx3YY!iQ++DHsnQ$E~4H^Jx5jJo#g8d-J;S0h|5?!E8X4x=~(GV*|ujE
z^ol+U7gjLD_iW+fzkSyZ77=mcG7d_*vue+LYq2(+j@(->jYjX(<~J^MT+ZzH{L@RT
zZ#Q>;VYF|5NgHC%w!=z8neKakc|qgeAHD5lu@&4}t-Qd0gn9Rt1M3!U;jaa*%=jzj
z1WlwBITmIwEW&^n7=dR9!~+Zgy5(I9-;6N4FPKM$Aw=B`AM-Y^jreQiiv>ywdtBh1
zmtx8V3yFuPuBpDYP8Bf%cg><gAX^w(g*~4G*_BlYZSU@w!F-vfph!-f$2;%h@NIbF
ztGIm^4(-D?pT^uA4(`WOpGB#RBM0$=*YJb0c;h4<xesUNv1c4RC-D5sKnA5?6p#T9
zrjY#d2ny}f42oe2@6swD+uF2_SKq|vK8gR}lbBz|kpoy=12UXEg<X?4d>g*^8Xms~
zF2kR`2(6(3WFUqy5CIZ={vn(`hmOb82wpjX+jnAP6Gz`h5xo%)ah^Rf3A7M<c)w+;
zGQ4-z=FW-p(9SxWS?6tB4)#1b{(Haj-Ov8>`;q~Zty=*zv32?U+iP~y4c*?DJGvBA
zLlf!Ma~s3s4JZbLg)GA{loXBzX3xxEUu?;k*o#&UVlOX4JzH4-CMgQc3k06w1xzT0
z8Vhf8gL&DLjPGvjJiBtXo>b~_<<-Skj|?4|U7J5Sf2u19$q+}d)bq4D0brCd1`fXP
zp+S7lF?L-OrEqPrXKS5j&&<s1g}w7aLQav~a3+z6ciwyMuf>Ygxy9x$jnxR49;%dL
z^W1B*04`o?o}F(V**C~6t<2}S*IMlwuf1||v7ChKt?cMK%ZK+2SkFag#E~8!D82Cd
zoKouIrOlIP*Ppn@SIfb~aQU?pm!?N50FJ)1blXg0e4zZy4==uPdU^L`{q3`>&CK0*
z+lcq_`l+R7Uz=TCYk%+9{FC=jg<ARElVKnV)aXFz`^V-_o?FTC>Rabll0XmD$Cz>A
z?8@`6Uj*>{>vQ|2>PD*<-<S)GzHPek=9%Tp`hi-KWo)fD$L*YR>%C>`eJAr-?u8`^
z^x65%w`bRkrcUOqW#_#0+-&Ew3mb2piI1MT^v>*BC-=^CC-Vi}%sSs{I|wF+%g?_)
z_w>u>cTCj2|N2}Unu(!u8k?1k{Q0A^8?F5LqZcnMHjPpHr~Cixg>!G6TVcrQ*)^k8
z6zcN}o0U?ShUV?_t9zyzo2{(Xv4?gx*m2i4-!q<ye;H!M1)$22st)zW>l=gj53J9w
zlT+zvy70^b!#k*A3sI;uU4J~WhxH(W_aA!OD_+<-A`q$VrC5omv$}fz)mI1i?&U0R
zE-zI_M>iKQCG}c$bcBe&V7ac><fjE95GO3+!LqGZq<CTHg+w$9i8x`;3>FRO-u*DV
z^{fTB$@XbToB#m;6RO^jDBuO&qdX<maaucz@+b({oWsCvxbTAH>sx}^*W4+<EJP&N
zek<_T^)m`s;5_1C4D841+b|_WC9Ix7V-Ggxv2nh8%=LZ>&vb3|O`)D^_Qt#8{UAN-
z;F`0t+|29z_@SUfO`en{y2hobG=kXNd<AzP5(;|ikG|PF{mpN__K*INhBAG9=~uoo
za$tWNMFMdUw^ahAiz28Z1a{XM2m*-iJputd*g~$XF(BxL{iu6Pv0h($t{*AT^+E4h
z#Cs-YwY|_QVWX8l^U}%j;d(iZ&dx4RjW!lnHcLskXL?8}r4<WzGp#Pi7YfH-(MT3{
z9_+(f5LV5`+bcUhHJ}q!8jbq)rKkU7E~=XLQa<<M>dwy&lToh2wr!Qhz`^S5^UL*J
zmwfIoet)Gpk#dhGr3?+;U7vksIj))ZQoiu=S~?mBC0(6KF1@}{ole4vnSE|$=&o9M
zBK9r6`1-nzX#IRUU-zT;)&W`g;riKcU2>T}|IG5(1GTVf<gz^q7M1jerO|Nqh1GO0
z>@4T2@3bZ#A0#D9$2QM>cPXmrGv8W_2ZE$OoPB-;kj8=X(pydL_~E;1-g)0~-m%M=
z6tQJ?;xqOkvlpeQHJ>jXUnfJ{@jjR0fI|Ok>CKHxN7q+RZrY|-hFr&U&fa=&y=^jp
zbRZ&4r@lTne55w_(wbfO^<61|u&&Si<$`y-_}bdys~eM#4^$he?<`(Awyq4;&bHeN
z`AjufI^H2ewH>9ElN&_Tzpp&;ME{w;Tquo1DpV)`&BcjF8rA6%K<B=F$!F4>Yp<Nx
z*!h`3DDrK`*0W`AId6H+#am|UU5h#QK3ALPT4%nsm~Xhn*VdzkE|12W7dv*N`xDy|
zcFdk#&UW3a5Vc+D$^U2m(y{g0bh3J?1<>@9198K|^=jXq((;Ms+B;40EPR=y-6KX_
zeuJ3BXDkr5p0swECrv)*s1e5f!KvR}xa%LzgcZ&H%6dPd#B=+Sz3}XE(g-)sw^rY1
z&3tN@t@w<Igf07=ZPT&m(nt(N@BGQdY|}2EXf7SyyyNdodF!(cyMCq>RLuNKE1MVE
z*;*de%22fQ`dWP^jT?c8V#~Jail;1$xIc*d!*fqxf?)2Y)u<YTmEhv{mXx7^gH;s}
zh)7eA=zQJUmeqlB9WNwoz0bVQ*jx5FyN+k;0nrgHAKjdPWu-I{aW2i-PT&5L4%N!3
z*5WH`w(Ur%)wee%A0Gho+{<fk{qF48BMr8^@XDGplr+MHqwCeFB<T;=-)*HM5j$=#
z+uC$G|H@ipPpL8)i;#F>ArEpyOke^S9sWphz*`QiTeyXvee|dWEGTS&4F;nKc|k;+
z^UN$x*oiR=n~MlMff-7eMU=ohhz9^`NyK~S*)w1@0D<@L4qZnXIoPXs07T?qi{Mdi
zt!E+6V3rt8EF;?@5HSo3Gdt%*UF~-xA}f%BmgPV#VDm}_R@S3zv4^t|?y-wOSi=AM
z+xX>Q#4mpi?D6gA@UQ<Auf2i4_ci?T=dreiAH0e;PC;hzU;GRFH@}XrJ%yzuy!a|k
zp8<elC$O{zc$f}OoWjZqHrMd-Q7kS1g60~I9>>}yKJgg(`+#1c{@fgX<M)x4@W?&z
zjA{k*i%32G<sTw4c=#UNdl2gz_{;BNaRslvg}FHZXs+RhujA|mBxO8!4-6sCao6p*
zxPbc)VtxUy9`Abl*o)7FSQJD8A!c-IeiHyDpH77RQsKZ^h3bwZ9yA<ThkWvpp~wF5
z;mMB=N$l-<=EXB?<~y^;=QrkC>125Ng>$v(YM7X_FV3Z(3L;VfJWJcd0UGc^p4mAI
z5q2yha13@51P)^1x6xcgce!*VBF@?(l9eM9uo1;5yr_2TfwS{7@$UOZAENxlnfmlp
zeaGzj{G$_}dxc+q`OK;AV-<)M-xJpMMVu(?4h1gu$bb`Jz&U5F-R5TPQP`{l!?U+G
z_ss0svlq`cw@@HQM%1-s>97Bm2`qDFeH`f{`v&UeaC)?wdsiw2Uw&fGbFa<*;dkGu
zmcvinH@Rn`E|A-IHA-n9B0vA&^k2Ss?i<gaE~mjg`v<F~&^cxW0CA)rzi;YKo<05b
zXHM74;oS!Y(%7V-`T2)t{^Z%yfAF1CVW4-8S4R8OavFaAksZ&zcJZY*7Rzby(BV-<
z;yv0K|J93UTOI%K9b-rK4!v^X(%NS4EfMOgL_5c7kKa4_@>`4FJ2qc0g^%Aml{@h)
zx#j0xoo#j8%t+<qN5-|HSKeAI#leoz+M8!q=9gMeJ-Tb>M6Fg1wO0G5>y<RjoEOHf
ziCVoJvf!@0L+cy)_m9orvv2V3{ewwjI@VXx;PJaBUpRL0^z1tWees?928a+EbKA~7
zqbYYj4)uW@^&~Xo!<A3nKlS4Ag&&?+?60T)(U)$oSEDpG|K2Bdzwr9p+vndIsK@u-
zHWY?>_hkL=eBt&Nj?TXP=0blpdGPQ^Cv&g7xiCIdu9c#ve>fWiYIQT8Uu^E1s^#8$
zh7@!!#ps4KiNQH?N_6?|k|_nPg;xJBH0*1ZHmU!?#_Th*3~QmqBhS$3NPOKQasaGH
zo`V#ud;ei`JrIFC``ohU)eGm>=NG_S&{cytj>?sj|L@arwccJ|SvvjhNNyKTpX!^K
zO8WYgHX!C)(MQof{V_vWT%K!qFMQd?6tESSdnfDz5e@d@Eqh@OyaYY=1+&kY4GU4%
zjr9RxH=C=c-zl~$DvFblk)&EtL=mV0Xh4)wSLXhCW|#YPLIw^}mP_L{QW4xsGV!pU
z`=Pf>AXU%~ua4TVh<6UxOUfn?dw;cGivX}^=d6$VW#lepGYKjv523Zr1BaEoA!}zv
zyb|WC%=#jX?Va7Cu53T8^2&R6Bh%Oof*}?x@K9IMeV9MI*Y+m@K`*>Q0D-p=#f3;I
zt@V$ioK-+D+qPz*b7eq*)yk6CycnMzV!=81+S(Bcpsv^H+F3|Uua=%bEX2ZJrCBOK
z(4}^vxKMt*+3e=}B1O<?=kLCA@zm*yg-c2xNgRyy*Z$)9<M$uh*{G+_|M1lPhjzYw
zZgHTVjt|x2$S~OF9s>Ac!R|55Rrl6~;c_7_+Nhzs>n1USchuNQP%`_!I)3^a^T&VZ
zd{{OUk2FT_sWVGuG6~B@fXT=EeeO^E!E96ws#8gQXUch(wXGEG`^v~We=_&x@6E=2
zLI2@OIuxov%{)1H?%Q<gkLL-f?nvYQs4^b!{p$Gab1TPx=X_W-;}7>!K*De>fA-r8
zoh3VTq%!g7K>JcY_k&foJqDJdzTN5WFN~e}%f;irbKazS=twPyHRnu_i_a}*8+myw
znR;R{tm;!=UpA?(OeD)k*E$=nv9FvqqQ?GOnCQw_Y+~h|Kv0`XO{&>T|GvuTL-lh{
zFAv{YYwRnh1EI@U$7=5v$4>si#kF@Yq(kB8gMC{RFq3MZ!328vNF}NV<%wv|mqy<G
z^M&O%HseNk*MG93JeIJR+kb8Poj;p@^LH<#1L4dk`{R1hx4V4%uTGu)*22QeYo?^f
zKi==HpMPm3tb|d;EWWZ%29@z-=Vyo5^676bvgM&WtHXEK)1mm_*Cx*V#o`<P@|=$J
zp}#%Vf3TXZ*}enSuoC)Qv{6HcD<;)GW72Bqu4-5@K}nB4)X1Cu#P81nIQZ{RR;QDE
z(+%BISD^}0HGFrOjB;&`DrWeestVNLq1utZJ9+LqOS8`|#|<<7$N&+!jsOBhgNLd^
zhpQ_mTC1lwx17{PVJd8MU?q*9G#-P6g~&-|Dj}tO$CJkJoi+P6-N3;r6s?|Wt)6Oa
zy)i1Fs1^+0Q_WWG$$vWw!H!=XuFa(Er92Xm*51LSy8rfyHdGo3_k3~W%(oVg{nq)Y
zW)6OBa_G)_zTtO1HGJXO<)t?_E93Fl$NShK88kCb4$Qr@cIoK)$b${VtRpr0Kwp?@
zZv_rfJ>2=3;j`aaeDhzOPe;PtUl@rS;nCkX(>PG7?<^S@0TZY)nJm1r)||`Aqj7mG
z_Bo57Hj~DUfOAob`gT{MT1Z+A-`&4)rgP%oUOe#CiPj~%akf4Aa|20#wD%WA-}$q-
zlmBMU=Q93qW8#s1Fm;w~Wjg9CWvBjlo-Id>VCZn|)E~|5d}>%27r(zUct_1;yn3Q(
zLVf0&i$O%C;fO`Z3j<a-XD@`pcAt0nDD{+YIk0Zw7JlX-fCE^>i8v4gE$-4|SV$nA
zeUbXZ3}HrYK^l&LBFx7K3XYWrq2P4-f~*4gf`{z9FpCr=+{EWjt~7p#$b0Vk%dGXJ
zh&>!b*ozmjT_ci;-?0dL7Vs6iSt7)2Wn1XYLF~3EuGo|dgaqEhIojeP2qe^a<7NDp
zConXEg%)rT2=Uf2{QZA|Mgz-hz$T(H;E<ihfBH{R8^qcoAkZ2R{BOVCjr+NE{LUYN
zJ#3DD_M31Wz~I6x{=47Co<TfvKPE<jo&yVvnLfPpF8<YDVW5Ox`y!TBu-XJR@SFc9
ze(M_;t>OFyU>zvoU;G}N1vD<q;lKS&<ZXz@Km6~z`dMFp4rt@}DS)9}&yHhw%fzIL
z7*fxZ*y|x4xvOvJP)(O1+D|<?C?2|kT(V>&yzlSs4@$;z-dbyA&8(thie%@f#>7id
zF^9jlOKC-#M((W#9i6!hfEQSgoS=$OJBxE(7;ISpumd|mP(*z%C(w7p@&#R4ymwt+
zs#pdS0*DAZ5l=lqP3_IM&VBc#x$kBc&nO7NI{cHx|J*s$S&=Uo)L<u~xA;WDs$fzx
zfmo#2OgRxIc<-&VSKH$vY#B_<Y_0V@%DVRj1B;c{`o<En3QA6HDc8(U2X-`ePu2sY
zvRnX}7^-~sV>>t7HV$<;35XQ4eB!>Tz$oW1*q1)_@Xl7|LZgf(W0d#o3wxa~+F$w`
zPwegFE(#10qEK}#S4;6PJi2?MX#v#AVG<gK9Ns-RHB!r+Pa;z<M~f>>=lS-XjmPhu
z%yM5Vg)H*Uh1G@C_SV}nHc%dHqz@h%JuuVA9hYKLD@V<?%^mNWtbgXA83@wYl+(bo
zeEPv@MT$ipIy}ZKX%akiXw(>GwEFm+<AKq+^+8}BJ~F12vYw|#D_?kQw+Kp!{`7;>
zacHufl~Q-?9o#WqW0oY=%#uU~pikU4l|&}DJk%&Xe$P}CY6$MwJGgVaFSD+k1mz^i
zawgEBT?5l2HRm~r3<#mo%s8@VaK~6Jx4x8w<=6o7>4#>F)>>g~s9cJT_u}`6^O9L5
zAYw1hBVag7V5P{D<m9-WpZY&e6X4)B{=vF~2k89oU9hkO$$>1V6UPI)&>0?c{R53_
zW?O;tunt6Um${H12#QD?d+WT-Hy7p?Prl9G7s(7!xwPZ{`ycpQUkj47CZmJ9cAF@!
zPfcoL*t7UtNH`Bve8Z`)|Gz_6ob{$ifY?SB6Bciom2i<u;>25Fxgv#-1>T7lArc9$
zd;0gm`iT?I{@Smy2_X{f-Mi~=|Jv^R?+XAWQdBw>y6hWLM1VN5p4{yBdH#pwGFd!M
zV0b4>CtS7(QE=j=%i+0vAyNuTEal2;Tt$l#B7s-VY`e9>t0f8!mf<eFDQjn7+oBZ6
zkjrHKBKcgrBfxp(ud0YiiiFrIOOAS!)2&{EcxB1b4HeP6L5L&ik+8Nt6YAxK{LFw*
zB*}mURL}ywXn4nfKway8{Bbk^AyKLDpK6nB?WF++wqSLQZ!~~tz;@H4uL8#q^w?Fd
z%fPw{qPJ1580@<1_0{>xH|^#LB9hsBem#5jt@F<LBAQiBqmhqQ|KneNEDi&$)c)z=
zI5M~I9#x7IA-7JEY~-ZS6HmYv1B(C{O>7l1MdAh2*juj6q$bs_;|WNI!o6P{%T}F^
zD6AUNAYk;tK2ju3qI$6Nsgd!I_p3meNRgr~r791k!_nTajOQE9#Dy3n`OL?C!H&-i
zkA18mjHqscREy)GBh~uOl5Kkvskkr5H@xro!6VhZUmDBTU0e@zqA$I^v2=XHbv!8o
zgEYkhL1SNe-<L=8O{W4C)dQu;wWYqhwD-$nA~1=Lss@nVPYnx5M>O@sAbTN=#{P0`
zSIHzg><fs%mPujqu>mN!woiwneP0>#PQsF|PQ@xvJ{M9LJX|S_M9c_F8W0^2X*KnU
zK@%&V`KS@>`uvDaG+XIEP$`YYuESxf!-{boPlW#cmC{J;Gd77P1r-uB1`k&&ld<b?
zkeaZjLA2vD!$dHN-tp-{0aO5`)UKyS#vaN6iu;U?l+S&AcPSYRZPS}rNA*BhOssZ%
zYRJTjoe!&K$IlPxSi80l%6iA=hIL4!Y1c1~g5X*ngmf@+nN%i{y<Z*+%Q~tBJ3luP
zmW|7NbtXxN#!RX~GID=keNUOK1gQ$EMnuGRQ`b0Ww!AspUOc)^z2dz^CS6w=RRE8^
zedW;y>Hw61K*sKG0OB$RP@5^GLs3vNKI8UMe(BhTun<8QI#QF54eb2vFa);cL5S-C
zX$Z^6J$)cz&!ypL=E*^mC~vvGyOa(``Gzxzj_QHWSw}SXKwtlXvM_?O5z8u6-pbg=
z8vTc>L1M(Qb4UileP13kCC#=Q5vlHo_YIEa8!kvySPSwEXPdq>9)sZX?ry0zoy@(k
zvi#O&R5L?&)x`@bO#a+}4i$T^Q#JKOzcR$k)v5TluT2Vrt;~IIxw<PUjYTeV>1ed?
zD`WYF6Ci1XI#J9ra$jHn!RpqgO9ixkrWKS;IvkeAqTRnZszVY%|NgQ`^qqfihe@@!
zETU9y1aL(t99%e(Grr}(x`kW#cN;~<hA+GiA|fc19-N@qAb97yspeiF3~Lc6@dAP<
zuq*NkJrsig?}^B<XD>p9Mw5^*>3|f0SwQxr)dT(tPI?=D)+2yGthHhScqV5>gyER%
zl_`XU7lOM2BC>TOBoM8mf&i4RTxu-LEmiB6g=s{F=V!x9^H7=@o=F=jba3KerJ7v4
z)EPxb7ES=VNEZPJSjO@^U;u?I12kZO4Xmwn$z5In8g8j4J_TgU-3>?H1PnkB2W;Zx
zCN|blih5QS;leULb_7p7fzLjQY89<azVkHJE)g^!@E36YED!=AkRxk%<;viYZT5t&
znmvwLp@G!~1jA#e0?k57ys#A|U@D%VCuLv)I0sGcT^H;%rUIYm9#8>M0H51J94JT$
zaO)R3$N%We+WF?x#|GJ39RdQ*^B|_eR~RPb&O2UbMeu@_z%dkc34QE+mw`oqXNUvf
zAZlAKiR$WM5kMkrtrby1-orAylF&n#S!u*+EW(PY;zHWQO3_aO1~g$!0K$|{5+8y@
zSj2&qbm>GO(Im{yT5?Lj9;^XZs84wBty54}Z|$rHG&5W4Si1jXW^X+cYkrS(oof&U
zx>7E!EtmQ<16D!Bfwu1QW;hCT6lws>l6emx3AF&Z^&rHNile9)!vf^nHD3v7Y)Y{a
zfCMa(=N^Dj)K`s)U$ip^fC!Z|00j`_&SjPfq77>0&^i_oVmxqYWP4hOQWP3;o-1is
zylm~vS?5JK&{Ro-&?pwkat|Ph3<SCLQJ_HZo}*BSfSKb^iy*fQ>fKlFK`5uD*r=qD
z5kb*%QmB=~E>)x}e;`Syg(df%2uY+x(8(MT%Bd;CKqRwWZU+%6rJnafAR@()l{6p(
z0u)MNMKd6%q=B#)Ljh4{{d%70sMMF}W#^y;-U>^9Em}_OW=Yb`T#y&%&d}M~(G~x<
zIcQ8AZmy*7EUyV#*Bc0Q*@5LhjPa$2D&^2*+BPkPJg{qbV|r$52og~cN6G|+4pR`v
z0t6Xr7LNb`AOJ~3K~!NV0NWf8_KMn3eInHQr`3%h@XoUc7sTMLk3HD3SKJL_cwsAE
zM7FWCz+lVFtVrG9i+L6N`da6eSBf1>XJu*QOJ8K?L~HJe4Q<Z@sg=XHM9s7nE{DD)
z(gYOHh5(9?wV)ILY9|?&Ts5s|ec78(t0X~5+EF`h#{jNa4Mj10yVjnn#(G-~tT@zZ
zMQxPY>7`V_dHb?>n<Cf@ASh8b&V9x9*HYT{+iDu<61QtDy`Z+pDL}DPBZzO-Hv7v#
z7~C8-5d%)dUz23ko9cgdz;KpDiFeGz@Z`M+iFYifV6_QuIEnnmaTT{f_5Ru3cOI}1
z2cc`|Q4ArV7bV*MBf>T{D*=jhe!BXhTuw9Gy3n<!q7V0uy$6Iv5gphuv~#2;J^z{l
zrAZhDK_RYKP9p)QIuwX=o|I&sPOYYWrDmWty6O>Kn}7%-BjsbOaj;R9s4b$1U}6O?
z+>7w(P^H6Radn?D026CKifln0QfVkEmI;Dzenmf<L<gzv3UZ0~oCzR}q(3YcF5l^j
zriUdRmbD1jiO;=2l%c2=1c`=#E!Sp}@_11dw@dKTk@g*LVr>!)Ktx<FAb~+pF(y*N
zDB5EZEdX25u{Hqbq73OkiHM1)I6nbFssV7u0K&2<beeR~b=edi&7?mpI<Jtz1P~+|
zBAhb_VcCd??|1-F*+i9s7~AU;LR2xR_GU7X;#yHPh-%$!21HD(inm6`8W3^fod+Qu
zC@w9Wb?e9ksR>I40&(mu00gC?F#;e+jY#*21ZmgZTJ%+rXrS9Uq!3k1;Wi3OrqFCs
zilV9!FYKl89CaIPWvlhwdjR_Om8v_E%b{}VJ*D2Cg{XOPF939`fbL*GaZwS@q&k_T
zUyQd6cLZr6g^nQ6D6~g2?w&!2b0$KNXc6%l6A<*qTlaYZL`WLNU;{;}7;?%`+8=HW
z4-mqt5ia_z`@R;Zf<PR+<?+WFwV70W_ZC1D^#!%v>BSe8%cF61CM6NEQuMjcrD%*m
zIA;RlzJR?f9$mlm>gwLFPMAa$4^hWDEk-X3cZwHE6~bg)C<@DF>hb<`C<4PIx=?RW
zMnNR18WHi<iB02Hf12c?kqd(q6^bAHkyxO&99Xw-3qRW^R!du0oP`RyKymj&D`EuB
zd(SKuY$43>7A6!=LIMRTQn1)Td1wV|5d_||XAi&^XB1GWiYO7c2_gIS+@B|GRk0Uf
z3I0tcO)PA!B@nTPbr5!9A$cK7DkKnMFpKxz%9R^H5rh#%X}|Jw-naYTJ@vpJ|Ld8V
z-M*_A1@eCQ!kNCe-((YbFG4!0#LZO_p@GV|#}5Briz~adx|ufFwYZ9}GhK)7;LRUm
z?{3}9@%kISvxvtJf0Q6Kf!xf{Yf<3ED{<^IDNIB00C|;^soFsM9E^6)I$rTKNO+G`
zfE48mdH9UlGdNm5UCP(<>nK25$XSt#LO?FE=7k9mvH~s`SOT^X19(T?ibyY<+ckX(
zaj+s;Lqx2zB7p~6$yr#56|jhp(`aO3XzkLRCcQJ8`kaf~+I2HKdcCVe8ooT_2D&L0
zITiH@x;<lvS+YDgE?{BLPDAJ2@#}JHBZ;=(S+PPxl=D8%bI%@vLI}&E&;q^DJf%i0
zzHRU151!qZv{ov?vY~6z$FB8ee1zcAjr`mgI<&h#iA}SeZw-25uBaL<fMaod#kp8y
z?_{4qto6>{G!>M~_=q?Dp-_nPwtW4(-LrH5bUv!SvpA2y!&>(K$iBwrYOwIu#=v*8
zuv<Fe3NTc=94&pkeB0svvMpVj+vx73fMZzNgau$k4R&O>(~fgj`1(J@YpylznXd16
zvvzF0X&R1<)}iVaLI42q&U<&g9u&YUw`wL#<@UXe?R6M`svr!PIWOXvWt&DTTX7v`
z5hgH0toV$*FmJO%#lv;P8xdlZ3S0bxU0oBE0AGX&;uzh!UdVfq0^)A5F918oYex>&
zUU_%PtW&a)6dI1*)(Svil0332Q!7LJ^#19-IJ(^DhxQH}IJA7`?Zv(`jj$b1Z|^M-
zHon>4w);2kxov#AM`@sz?Aup)djE=9)=?IzZ9?0<_g8YmE2Aqzhj7RK{$QKGY-Y5w
z`#|#6nM-I%)QpHP&zWqbS#5Xrt=_SJs8+dR0{qYrz~UMP766F99x3Rj7Q*lyhq%lB
z1H<PY2Aq3QA`qW5J`~sQM_m-ifPCB7bwJ)t47|+ND-r-R$W`UdsO!Lz93t0DrnJq6
zC_oO$!PmYeX4_WYL-K3nk_GsXPU7o=(6;(gD~S7o^onghJWIQiU;7INW!{`#uD-E3
z^7%ckJR4!{a-ObuufQL_cl@=#u$SJw6Qf5d+!XpTtX3ZoL^5<oO@(AP?d4i;3Z~17
zmBLXe!Y;e|w}Xc(%)+_1*W?J_!jB20V3O^w;~srz>rWBaxdAl*f{CyAfIV~jCqE!{
zNTc`FLFBT_a|J03-&0dsT-*7Md*3tzcEZ-pe0s3HGx6C6FyPYRurwSA#9p@#MUjoM
zaA?Bcwrl#VPAOYu(j1UD03P^|EOy|Q1M3!U;jaS(xwAIQxZRP~rgJXyVm-I5?QCqO
z);epM#9G_Q+mz?XTGm9(tQ{%IDQ|aLd60LIH&vdqMyF$mGV2UG<z2qE$$8evvW-$H
zp4ww(-&tQ>rDY}GyfWseAb~)mTJ3Yr0_3b+?qoRw(gbhz99V2Pv|{r--(1ghbcJ+-
zuqb7sbf{6Y^Jib@EFXUJ*vQeB(JNd9f|$^2ZA=_QiS;X$dcwB8bn%iBH$C{$%+L$C
z4p08a-v53B$6q1{wC2M*=%Za<0Ah~3S7}-a!u7S*ypbSKl-oSxyhWjm?Wl9Uyf8;?
z&EKOR<u4G#CfL_EL5}d8xa)F82+&}~#we3i*PVd}AT0;$ymjooZ+Dt`ly@X=+RQR{
z;C0UnAb<wS!Xg%a+50@tGUt@_S%){7n1vkY-s@5*`ws3pdE729UP>g{%R6AN(^Dei
z#X}=0m0E6YrB{y33AEwd<?VJm^xm_xKGz&%?Y7FT<T>S8=mMJWou>_NeU@dphE*=h
zvZ8LD^UlF$zQZI&bnN|&lyxho=AJ_%t7{LOc(bNAX9H<pM>*@N;NG=geb8^Od&0|a
z1Jp2eAg`|XK76nH*lx2C2v7qc;MitSerDSNS04HP_|YxDk&|5a$Q%C<ms{%**l>7i
z&%uw~yJIcCxN<)Cp%ZJp^)us@(kBPLt<~9)5OZzCS8Vx2z)2A9t=|6V*hBY@zV!oh
zq0P(;AP4_8Uj0oh)}i`x{Y#7GFK;B{og(>+CHOGb*W_sFGmlKG7x3oXVn(^pjR=_%
zfdUD4RvVwbe|q1J`pp)ezG-0QOReQT#2K$-Ht$wP6}UWKXfJCCs?=|jtd+IbJ4=pR
zonWVT-O0o|ztTQqgAoPF@?5T)_Vgp+nb!eV&_UNWq#zK9w4GZ80zl=R)O97TE2ZF*
z4^1eUeZ6mS`LrRcuDTQ((L-+R4)ck}#->Lr+rO=q!_WNO^dGm+UU+rRFKDv8QWi?2
z%GG`RiAVYm@9W=wwsIOgaK~_~<$jRP`*Vic>Z;r?(ZUr!ayOoMd}QZDZTlmxSEDCC
zHg070&H92pYn1ENdsTuGN7MeXC;IO_G#uU}x2A~LcJ8o7xN_5m+_!S#A8(ZCW`cR6
zxtwd;YBuAbM50=qrVaB}t7Vn5lx=Qqy42nD%ATz(Yp<@0_W(qs<=ir$0p;^3&x>d0
zAsx65+*PTc0;J*%q(j&7I?*;kD}i+2+wj-A6g4jUNF2BgmMce_6k2>mf#Q!2h4sAj
z%!NtYswC*pWp#)5czp#dYQsgFX7q{scidiWnb{3=03$#_%~%qbO^=m+>E{mp`OE6`
z8#_9|qA2&1Phceslpq54`Un1Lqab0Qyq==|E!@IQg;5w86g1)AxQ3v49Y~#^68K{`
z^Zg>xCadp`4t?Utp8Cn<w>O*Z*$Wye<+*c=4DcWRUAX1Ix`kW#*@i$=6jt`{9d1uz
zeZyp}ox7&~X1_k@N|U$kt(9~#J(X5U0qDT^ZG-gzlkU#5_|9^*ul8s{JQj{mHOB|j
z>5BB{s#I50>U0h#=}0L`B%qx;%r88t5AJ)gzpt4j4-Fg+7Y;o8?Qg8DEU{q#-TGMq
zA`mb+z5BjLKN+O8+`2<)`m3GnV1u+dB4}<g!!?<qXgb{2SMfQiZM$xQ2%=c++Fd(%
z@ZO^@mXE#qQr2!u5Ety|;xI*|wc2<4k(vFEtZwYB)sH0Mj-xNWbn1=QtkojwCu`q0
zkL!{rJ`e~XLYyRbf9$cn{hh|*w1L)2?QGoMpwSJdZ%a$J-Ff&sfBFZP=4ZY3q69yB
zJIlSe;I+FOk)qyO5C;1Xe{AOPowbc9v(Q9`%Hc>oNfHb;1|OJytUv6ptI1^--Zs>T
zgQ`NZ-GNn$P2z7;?w9-e_Xhc}<5-pUmMCE2KqVu|bfSWQ`Hs8C#wYeY_np63xOhHy
z4%cH4E2RPx?796&|B;El&6%}qrKsk_kkpiSSN9GEgMkF4T9U_|s2=K%J**l-G%<lj
zy~L^cHA5=^AsE+<y|u&rJ8LE!(L>d^%3uh*c!DHI1)`uTzE(=gH{ypFtjAA2ymM^0
z`u5q?X8SrJ<)1m+Z05mF5&{H9&y3aXJTNpekWS96e0E@HBrR(bq?JmN#`}kAl{9#1
zM|F0t>9#qr3J*z|n7upt4(@5BVKU!vlRM&};Ub3sLr6Y_N&reo^@n9$OOR*~X>cjj
z+I(O;N+~+9t6r-F-+#S&VWD}0Bsi^Us4u<iz~GLFs`;tS9|Y=2-<Nj%x`I)p_Yd4Y
z+&2=KKx=(q^!8ur{GG@|^|*hyJT~5(JlH<O;GuMEXJcO+CW`cEZTw67|Gp-rm7b~Z
z?W;7D`e1Y)-DuRl^d%;MB^Vp4j|?kgpj73KJ4T**N=K0{l?M0j3zDv1W^}Oh>7SdN
z9A7$qay7G8&)r(n%vkNv{{HFFvbtuD-8E79(o@r~?p``OyP5SeafmR`7u~VHf6q)k
z4zHeY2m9jBJUTTovUL3Im1fIbHS1_agN^jQ!-LZkRdcm74Krhv&wp}ydg{^}Z?D<j
zGNTB!a=3R_{m|Y<CB11cw5pYY$L~~o$4PIy1rZ3BcF&l>e)YkJ{Z9mOkUTc^<>|`q
zzGkf)R3MnnXQnDUE1Tu9(hQ5MH|x6wcSqsPVXhS*rTQLt!0p=IKRKz5fl}q&y9dAU
z1*MdZlhR#xnJ@s5LQf;jdJ^meD0FqqDsiN})K$p9RjT=#&=*A&m4FaK?L=t+p<KC=
zDj+p}Fb)FJ8Z>}-56W_7`X_-D@p$Y@LG%%0ZUEp=t(PZv?meLfmYba$iCd6Dy&OM&
z`^b)|YP`~q@z0{X0A1+UvocBCcLd7Zb)Zo%-|^bXvDr(T-us(gGV()$KEek2r<6N;
z3xDnKA-J+PkzOlMiUtPL`wx#z4#g|2p~jP+DBXXL(k42ze`adr^BQf&hh9nOx~})N
zwYB;A`M$os!NI|g)cRYvg`ZY1^U~5%r_&i58`Jt{YCSpn1N>*dj-#)@!2ONCHTMtx
zSsIv17&>M!lZcCwR*(enVW+8_qd3-K08rbuJbR*`14>9)X%k?`cy@*Y3k6aN$VDQe
zFbtIdoenx3j>5K90xE~0Z8hII_UiH1esJ;p8Q)VACm>ErBNJ11-uv*(u6?Anh_soV
zNT@-f6~$X#DgtS5TvHWU@mGNeCQxY_&R;x#{MDCEy>)D5X`b14r9Md2>ittY_ug^$
z1A`+&o-?H)t)saM=U#v1#nW#eTVGlJYY^GeO7{&6@4x-dgLm8$Rnv}b7xrbKLkGix
zNb8XB_FKn}zw(3gXWwZzH$H;>DN5^l-_XuI`w!iDUuAfz=~S^SM8vV-K*2fNc5Ow<
z!FW)tHH8}6_E!WbFQIH2$#O2S7=WCyY@9$yl<_7cA`wAgbNkMTH;=yZy$fgG$+FIs
z@fRV|wR&S}*S<UNeW)=q)N(D}Cc>hKLX{XY1QNxpMR~$Ti<ChaKx<+OHUq^9fEdXm
z*9tls%8)kO7*h=2%Z?!h6`0U$uH!fUIsS+L8UP-74FB|h!S4On?XTQ&C-?HRLqYg=
z0)oO38wDzgGy$8=WzHs~5~Dqf(n_hW@K)~pRfbQY&>x2;43r46HXJ)+B#r|B%V@i<
zIt#^4rJjnsQvp1Y4h(%jC(9OE9cOKq0r4Z=;4QrjV{{sut9Njh7Vtm*WBl*`GZe4`
z_`l!6&b=S5JvRlp%Qo5@j_p!&(b_x;k}wDW+U%?=(Ta4-ZH79uY-5v(h~><vFpW}0
z)VA$B&qEXCz5`GirD;$uN0n+?yFpKu&-2d83Pd0RV}eR0h#~?yYiq9E1`#Qx;y5am
z)V6g$x7^OIL&G7WDAep_qh+1vtKtNjP$`ARP?q~GP7)Eqz-UDqE!)Yj&Z;0#8k<sL
zGRxjw&95Os92x<wZ|2VVtE9OVQ7H+MNVhXDz3UO6w2DI$7y{7hxb;ozdn!o;ibGv1
ze_+Kl$t|yK;ySKXk-(swm>|S8F1DXSSf6jSHw*<%tR^J@3mauTJ52!~PF$$NG)k4a
zd3K;kXL-f5*QHXNBm(3c8@AN~Ktw8vl4=!7S#XBS&k>N7wY-ygArON^M5{_EsHA2q
zlanE<u<#s`GD<U~BW#>clnL~f`%*UHI^sj7Kxrb%!0dhKQ3|82nI?zdgi~HZ(#6X~
zg1~ZW!r;cc;?f-c!_VN^SHQrhKh^oK{-<nwhgf$b0U;u2t<oe=1o#Y{b!-!r5RliA
zj;f>-i*!2PdKSJ`uKpHo;TC>cL1c_7B?f?Rw{5d2%m7gsxFFs@Ou#?_^Pvo=pN$a8
zE!@H_{0NZNLAAO)_F4u2gQ&LsupqeNPau8&QwL>K%G(c(YxTPy{P_Oc?`&?aanIZ*
zK%1akt|X=M)=#fWJh)<irh)4FTzPC_dT@C3o`)Xov|DodLKMYuxl#?TjF(RB*gZNv
zwXyL;-s$``@GB^yI7utDdSOzR<H~gpJ9zl+oxArpH`krLruyCo0TIP<QYu%2Fa%Hz
zFE^wJ#~5Q`B=2(;4WJLe<zH_Nfh%K`#zeOrxqHW+{mspF@2=(`5D>+2say@C7(f!1
z-v7G&SB~V~Uj&K}L^nz^y3sBS19h{Ml5gS1hq5u{UTDwswqry{Vw3zx%ZCVYitR(T
zv3>BH%inyI7<_>=HrGs)Kl2Dp)L)5~o0tCT^=~|L>aP?LgGD6Pr33wU1}ga8#cxMC
zvSRNUe#EnXWBHYwa~2O_B0W~#F<F^8wesdlHlK4oUEcTL)T2+{_w(hX{QlPlbQqR~
zhp+uSuGdkozh|2R6$at8pEJvwr&eD$x^Q-J(_Q1UR;0!o$%BW74(#p=w%Opb3(aSK
zICt{QM*CVZ5kfgNcOU4#@Am8KRlPgA@mJrUJv-NO?rP_eQfO4edk+rYe|Y#>3ZD0J
z{GH|JUR{`7XuIA%pMY}7?4PMUbjR>u<NfCF?ad4WJ=k~S$N$tK&|#z8|Nh6_ToA?m
zmkXE@5LK)HAA5HmFIQRR3;(y)^Xy&2sXFtRc_t$P0s#bMMp0};LA1RLef!mJTYLIk
z^}e^IZLW0NxBYnAw$=86w%TvE+Gv9<A`!Wa0wILK5XnG7=H%qebB%jH>-}TbIjJ)w
zBq4In3G4fbl2p~+wX14Z)!x5a&w65&V-I>fus2-r&_7Uo`o*>_ySfMR1w<{!1%{}x
zvf}(R8<#DrPQ(p>0R-&5^-~;O6E?g^U~zX_+=IJ2!9&pCPVgYXU9va?_W;2ig1bYo
zkl+y9-Cf?v{XF;cRek@#TlMY_TeVX&b9TC~zPe9$&p8K^lV%a95tdBs=&WS(N#S9h
z)6vvqD~6I2miu*K9FP54i#a(w;soQO!6mM-t#Nv>m3}y5Hb^j!UG=2n+1f&l96Z}P
zXH69K6K72lkA3;#I8yz^LE-1B+H?wZm#&&6A4X}g={WX<cAGjTV^zek64mL$pz29U
zB_HL?iZXZZrg_|o1uFAZCvI#Hi2WBcxTk5p;Dv9J$Oyl~2>gOqV&0$e_^hs={5JHf
zQ}N5shH+&i6`B5&FneORj;ltuj>OvfW~b`;jGext!zKKd0BGKRs;QyU<nf+gyNElx
zbT^xpo2I)EpFq2F<Kf|RY|+g%AL7}Fa7^rvC@?tke4(=2xAR9E{kyi+@<QvL?uN?4
zc8rtdRu1`&>~ld8;@?|8T9(5eTV@x%A5cy@JU-oh6@zIwIewauyJ>I~cXm%Asy0hM
z4E;DONd54%a%fgq&rC9itJ!}3ekazip{j`|dCp)1ElvGRY|`@mBMyoeI_bF2rEg^E
zJDd<#n_q4>TNM7sKQ|Eh%=WZ?|3oIyEK0zAylzLq=%`+A@BYQZOzayA*CdK7_?Ej@
z45_xsgEHeH!olM*RvZEI(^pIql|j+~h<4g%obVspq%@zaN$x~(J#Q_u;fEzHN24g{
zx4Cbh)EIfltOE)X)fE^ERLFv^_>bv@aUvCjSx9W7Iq2{ez38?xxd!6mC?a1~;tudb
zuc9~xF4|IWbF1r<V}2d3H`+7cEWPw~Jl~o8@qAPDBg(OZIxfO;7H2}ZzS;hhqK%3o
zQwoNEdxgu*Z<F7VtTlfAJ$@p;wtyWU*wV8F#te3H=7pU-RgL_fsul%vxS~nY+k2!D
zcYK}5l9sR@wnl@Mufp%RnB=rpX0m*D!=<_hBEpWYc&+4=qkpkVW#KJOUZ3a1L{2!I
zsW^NOyRTrx{d4`tm#y*W^tHDzZ&{KZm^iyx9CQ5e(3A0E6Th>h!?e4c8wQ^gE@KVr
zEXta}>MmOr9~bVfm+KMPIPV(Gg3q)IZqQiv^>vqUmSw7jLc8ytT+(0pBnnzUp(4B-
z4IE}>0|TkPbaYg6P4yoeKGI3`nfo;SJ#ISCH|K#$cO4F#faaON%R0zh<#u#LQL0B=
zRebGZiov}di=k3zcl1m8^VreyB1ITy9BW%k1>cjGX1{pomGH*=nSc3NSxec{_}oR+
zr=*oXx#QjKLKm8?HaMXnHI_XVzUL(9GmRbi4$I5BgvT4?VdwYH9W#GI7uwy<OnpYF
z$i0(9lJKfD9p|V2h;hH&c*9e|*=+1?dt<zD9HD!ZZN*}$wXS8VWr|<p!KB10`n}QV
zbec>A6LW}s{%Hu#vu{JMTCWT7!MM}4?o7D?+C51)u(lXC{Z6uAiaQri=dU2t_opZ+
zrAWD?CvoddhfwLY!l2QFR8(*qBs==tzZoRvf`|@lV1(F-3f~Z?O#RLX{v7drr#C8x
zg+~&i^46VGD3vvA9XXeT7>q6T=D@Q@Zr&jb>j14!@WE?@ZG1b}IPM+Xn~ixhmIDkg
zI$N*%3u^m~tReBju2%`9rK@L0cZ4%<^NLIveruERjpe1gO1$N~3;IBOX_pzoUZ~v6
zuK}N%b3is42}k5%OflM<i`RYD1R4o=tVPg+<uvXo{@8>nP02#0tDyIdxu6gct@<DQ
zPccf{8MErHTZ3~=r15kHs8O6Drg3o`G)nN;U4h|5(sn~&tX*6cG<Qynz*VdV!Zca;
zB~}n@#%`x14QQyA!_H($y59_z5(f1!wTB1>Y(C625Jaz!4;wyUV0w92G0B~t%)&)M
z6o3Gtl4P+PvI)*Bi9*x!kmR;0RD_9-To!z5<qpC(m!Osc=esufXu=aFA|rgR`pVHv
zCnH2MM@2f1F^FiQz{tb%;eR#@OT<g=!kCE-M+XN>6q_~&t_uX$3`SWKCSs<dVFj}i
zSz;FsqgIPUFb)GZcXzJg-D#vT1EBLdW+C+CaWayyYvBuKflxF(GjqFDrx2d6;Z?K^
zcJn`)B6wiW-tx*SV<8Om^rp54fxz9=>$^;_EX8fGC|$RdA(of8pJ9m!RNm;m(kfBK
z@BI)|keH6o-Sv;Hs28Ea*g-{ldq1FbONUAhaN%aX#lb>a+&xSOMMTs<p)B##uqg--
zl`qAUR~*dMP#7+CI4MW~QrIe8R51vX@N2lNEVglKDnw2XpB;fa(;})pqC4tLmxOC{
zQoFbiL{(jYv<;;JcQcc)%i|C9ByGhD(`81yZ}48NwS3089FDl}UaX}I;hr2zRJTF?
z1ycMAT!k$4$$6){L#s)D#a!H^KX&FV+(e9$l*D`=7tFu@Y9QTMcpF2bJ2>Y|#CL3K
z?$$fj71JWZc~)N#O6S#+SLO;@(~vEIe6Y(Ul?ZdEWo&gCJrM5Lb~v<;>43BSUXqyZ
z_q|x>KO>nI$ylE!i-HBRAB0%5wRqA=pZdkF4=TKa)%We?;8^})UAKrV@_w>{mYOwW
zdO@3#-@C8aGg*h4I!OorC|4h*CYAk0hVe<%zL{fdKE_liX(S_)EdCE$Gh)$WB8t=8
z_kzM*OfFRkVIWj0=HF0Sy1J!PB%L-R-mC=xyUt4JxCWZp8mQ5ja*}RF8ZKG!I$R=P
zLn<!=qA>^(i7lEzc9gJ4AYzg5RWJ-`CY)|HqBcBHz^Wu6#_Iqae!jCp<W*8AWCJcb
z-|bZWT_{-cvr`X833@Kvk052DCoh$1DgLhL4kG36DAHw22Z20ih9!yW@1ha@B?_!2
z@t!=cO%OO_H3WjH?L^s(NrkXZ!xDkRw*)sLfI|5(FeKN-iG45?bjs?1sUA|@)Sl7M
z0EDwHTSOI$hBxbee=>DD@A?x6o@Pmy5B4L$1?3QC<S#*(;l#SGWMqD??B4Q_RItfk
z%qQ;k<X)}!s9W+KA7ct4AzSN2Cu>?i@nF|OwR3$FM%z-+PkisD|Lp~^kIT+ljgyAQ
z>GQ%){ssLw%a5F;wC@_Tj}`)kV>DiR`~pwVHFRprBo5LJVYWSZy*wv_DUSRWU)X0p
zH9-W1R4oK|#FE^+iK9$ip`_%^zbZuzN1<g2p%j9J$NmybKmJC|l{`E_O&HvVXTZRz
z|3n!hjj9hZ0d^YaOaE3IXMJfXJ|2*16&EFt6MssxEs!IC$IKZ4j&!1Y!)S^r-X}m1
zCV>fARFzOMj;Hwcb=O%+e@I~$k_&a=E?ky@P`yT#M2ZkFe<+Cy9!6#f%G<nej&sk2
zL(Gn1P7LSeK_z1#eDMi4+mYn@p!&^A3Y&^0Pu@Wv&i@l-RDu(KeWEFDDR~-eSN~vp
zHM)8TvmV~X++rLYY*SA>5?VOrhBkoHcX9vfGZ8Bg5Y9({L%@)60+XgZm%|?GfZVQE
z>wQ=Z$o?T&9_z62U7_owsteV0W)So{2u)rX9?C%q8>DdmRh_tW9|23|*vuvtt^r}l
zIHufF&D>Y(^IUkTKebyO>ef>YNSK#o&}?FGa~><5oc1DnLN&8<ch9FsgsiF<;-AxX
zo!?7gn}3Fk=F`QRQPIC%70x8ntELN(n#46#DnI&CNKS=0w<VAW8%+Vr^xBEzqh0g&
zR`@uoOh()9yoIuF12EMC^4E9=(Z@+BY$ajkK}5@Fq4dPjfKmN_4~a`f5WyT4Ln;nS
ziTx4wOErX2kwHZgSTzA~3*u9tAL5x!rrh7vj3p9qTSv3vuldqD8iY$zQDv}5h;V3O
z%$32=rta%JqP+Y5k0ZD@_Ga&Vtd5LJ>EBRF!Al3Az7T-GiK@5}SIu`^@3!2vvm&&4
zjjK(%4ldRpalhbFifgo7nSW?++gD`#$!sW06ecVzT==HV_;mG@^qDz-3?3<O$AIB9
zD3|C7#U>%V2^}lL*|ejito&=`y$=Fi7KvUU?a(JJn%+`fWyZXC$`EYwlCCdjtmBgM
zSdzVgi2*$yuiVkq2@L`*>JgLGZPI`KOOBjW>k+R<Lt#?9j04|DevE?<z|%$Tmu0$`
zP@?J+Ud1^(<)YEzYP(8^qSCr%lZcrfSJZeoOfQ7GWf&geVVqgUj_sNULR6fOy1SU~
zfNbDh9djCE(&f0mR;%w_Zl%7qj8*7ZIyGxp9%8q;67IwH^=zf<PGvEhk&%n9^Zfbc
zyQ-VKoP_p|uT!bxrMQJBn@nYXa@5)O?@vX@xMj1_VvKS-%5IfTCsmOiN<wOuQyI^l
z+9)MBcO8kpZl!Teq-H4WbG#%bKr{MXY#JT)n@*D8!ev?octFJH*_lW}2Pk^@IR<<?
zQFoeOi|u}Ft;q_e*FcBn_E_QMbM3&n0X=W2k+H_O0n^5GE_ouAalv?Opr}$;X!pFg
zDEtre5@8`lbc_3`Mr9yj^2eG<U}84$b*`jsXC8oqO&QkIIvRJ+-v-cP<F{>hy=pf7
z#J)l_q{h)8DQ(SJT3x>7Zoto_f5Q7xT2;g}u~C{rgL(O@*7ZK3o@>9<3?w|lOfXH}
zB6z{v>9Q(Lu`^oyRu`T<V%W5ov*WRr2*pw}sfsnJnjmwYn;*AaPR_At3pSP!iiKe!
zXsz>OR-dydm+q~frbVdxm5}B3#Y3Z3LvF7#`=cvIPXs%UYGqrw$^0X6cDX69o9n>O
zp7}jRM1nt^xpS?y=miMIJ-bXu6?+wW*@hm&o-PNr>mYar$#V-SFj{7BF}xy#k_Lmm
z7l=+2;Tb6Gb_1yNJM6%etF9di1ykR@!X*DUOlEqFK*877&#_xw3!jF1$lAnMI9k4o
zv>;L7@AL}CY4k9zNm%3h^p(l*U52&3aFa(dKRA9tdgDvjRyLo2*@`8%){@6v5MI^I
z;i8xtLlegx2G^Kwnqfy%VS^yAvx!Vk$4ypU%4n63z0rwU2fts|bjgpTi-T?MA`gAV
z(t(9OH0?1X?kd}xwP#5VQg3%@wCV69l>PVn2^-o%tp)4UCwiwDcm@xPh_LjySF%*4
z3U`W@nyi_c_R)pf?=*xh$HnSua(SEWch;Am^Fj)6!^7Vv_iMA1I24D=2uYz@GL~9W
z6Q+g6>Dv4%A?`H&mQNhK4Pgvm%pl^zQ=JZ!#a4oy?lKB5B?U;78?ANWhsH0VS}x{-
zsi|-Olal(`4`yvc@8Ms}NQ8T9=JV8Q-3D(U;=<Rj!1^_96F3PRc5vFbA3><PD+8tp
z?&<~xdpX{gy`@|>^rS<Cv1#$SKGZ%FEICF({<0Q?D%DZ-nb5Se6gg~;I?E5e=XX3f
zOJv=Rq!+O*c5Vw|{xZM!`>(bvxM!S~cHS_Wyc>)qMA`Xf$xRz%tU-U3>gaPagq|EA
zK;f4E2@U@pYceVsQjrq^69JXTz80S87Y3B-pHL5;$G2s*ru%(6`NzgU94e#a7qFEX
zx!;>MNDkjd7GeTkwSPGMJ6m!~H(A$UiT<6J+RJrFt9l5I1`pr9AvY$GD%pIamFby0
zh-hQeBHxD^L+$_zfrNkLBXpNQ6~4~mb115Yc$5D1sEpQM;L0m2D<4ieUsk(=_gwG8
zNE~#kjk`G<NW)AB!?Jg<;$2>&N|}v*ANRdJS$XNDcxKNSEBw8;^3c9gR#K97$oijP
z^zWo4-I-3HYkzn5_I6Or5a^@rxM@ErEvqDMZfkFE*I}8Np6=f`i$}@vSP$RH_C7Am
zaCl#9K0H&Q=YJMyr2lE&!gi*dsB>XvW=4q)4w9w>TyWR%f-jt$z|PJ6-D)Bmk;2F6
z<@t%?aTSs0zd`WdafuzjjHEz|8yc=>^c<feJ8vWLn6;z{(9zMC-DVAP9yjUv?B>)&
zeb4W&4o;djVtPMgz|A!}t`vzq?Hw&P3h~)A-L9^#=EsWO@AyCOMB=l)|86}=@p48X
zcG?A(m6Zj`bg8JQ5c+h;fQ#(7CMBHQA4%YU?2^;Em8=J{{v6aOPWa>6o{X){!-?S?
z>57=<KjG=$uXn#o%Db5GBS9*zwS+tR78Vx1zU}n|(euqNJF}h7ZlE%)ik8zJ6uJGP
zJgC*^OUjPuozHW;9Qj=rq9|w;C+AjLQE{Zj?ZBaBuh7x&B7@@j^s~ss!}3kXO<m5@
zq1elX7zqi<%HxLbMHGj!vT{;VQbIdrQS)=)V}5MM@u08b?L(*kr-PW>iKo~LQRfS$
zvH#4)0HQ7Vu%>~aqt{^f!PZu2C(~vB;6T+{0*k2RyYsln-DUzPJtJebLeG68ny=EZ
zz2$LWuvlY%Z%?<;fh<7V(9>eR)?y%4&!Kti#{UIa?<(4n#i&C#O{wE{J@R0-YNgHd
zA_9-u#>VEoo}SD%9hU2xo5t_oM>7SuawaJFoxbMh_uf2hZ*LC_3@~Vo>|EH}+XJ%h
zH?1ixwV5f`Ezv+0cXV{r)6)Z_=I`&Xs;bJAWb@%eNpZ2Yz5UVtK2;uYrEZ<2%wp!S
z`StOi#g&zb(a9fXH9Awh&wbg}Zb9hehn)wG+1_`dpXPonK97uIUb4NkZ2fW{x3&A%
zBmc!Mf{H}|4?Fwm5OwV3&5e=UwC42mG=1Y2--m0F=i80HGEaGJ;I)(G{(L{%X;^PF
z{qd6xLA<rJ$<%HZ)N&&GX@0`*8ZaPVpecXZb7vz~41E<q{{1#zr{_`q?6U6-u&~>?
z0$qaV^W9G8Mb?SF|I<~h*z@gd$928i!A!yLy;#4?{Kgf(hlP4uuC!6$4$oCD_f=x|
zld+&o;GX`ci2kSD7=G2IrTbecMjab5LR#fIHJ%sSfGIgl=E*cJdmUs7xR86S1_IVh
z(NzH)7$kFNF$6&Abh`5Lw9@%}Rpq$wow4QddeO0SH3&s^e|*YOm9WW2^wIytbUKQt
zo{a1+h0on@6)tlBAb~1vc$xd3Db(Mu_L_HT*_dyQj8@EIMUv%dDx20Lq^Uk)GtW2J
zFRJ?A|8@jcL9@#C+y>03#iTdvpt2PpmSc}(<)YtKS#J0NY`%Mnd`nKfUuYH6)6-vf
zK00IV?d>%`T`%UJS62C5mD$c#zH+)fUHeS#wHv`)c{^$)>M}rNWw+FHc9HYqqpUpE
z*!grcK0fY!+>F@O6N&;rqw_qe>Tsp=<+$}^+_LWGkNZkW$7KOvo<<&Pp@23fL?7(j
z-EZVX&*L*3JMM&5?#6e-UhX`E{VvA%{Euvn76Fus+$^~OdSL@Z^}A4+;$d!Z`6Bzq
z=g^G8*4DNacn}{TN9}#R)a;_VuB%QT0&c}<wq0p2ujGB2e>ocoEN^>Ar@e9uA0N;C
z|4ED#*nbU>STBay=qQ1q$6EM|iJQB-*!7$l2tYWO|MMd=fD1W5%N@QCF<f1x#l=n6
z3$}krip~FS1Vi+86~M!6xvmR<qRRL6rOnNEEeDl)oqkUVs-m7xSF;mYLN`BirN*15
z%d}TKce38b&svE+%>Ny%<KejV{0DPuOA7)bB7mir`-Vk_veMF)%U_CYO{+n`jjyk-
z(b3R+0V-PrehKVk;zXl8=8N6u|3zcCbRIydz%_d9UROn;S0zcz+M+Ly7l2F;vqij-
zLwf?@5CLi&7XZ{M#hGU3;Bb6+I5lw8DKh@1W>>mPY1HRBVh-cwY1-9dJvXNwCUXQI
z^s6T0p9B~1HTB(z39h_2C4r-`tn7eZ?CuR`TH9`3q>43kDTYkw7<d3cJhhdT;{uIN
zYdr?7pX`^KKJNW07<@M&BaK4v`qA;_sM7#2SGM-cU)y6Dou{E}OWr5#{#V-mSAh3%
zq+yMC#*XIGuQBaE-x?_^D|0_yx*O%1n3|pzIqSn=13;LoHrL|D902%)D&4wmK)Bv#
z{R9+VySYGRIZMbLc4+5fyxCe47_Gdb;uWUyH;?OiD>nTmcJf$0K0ZMF?$*}DY77AN
zJ?2Hez6TPA(~FC(mSgFFT&%W7)5gZe=<jLDHYQh$ZioFH=G$Af#luP}MLmBbrntFb
zgbx`#Zl#5X%ueaRSo{3X+LSShI$#o@>=yt$0c(k=bH@WwA@`onTjd;`oH*LrKJ06E
zA{(!C_%`bwhY}e6MGfE}pTd}jcP;>L3wZMNNH)VdOJ#5G2cB8|i?j)!?Pc#1gLW@B
zMMXv2nxt6~thZCVWu2X!KrOwo@ur`^$lMZ(W1w8Wxwf{}c_RC-RZIW|<vYUK$f8pX
zNg0W5HUhjvOj<rqifV>+RCuL9htJ*0^U;bZ;3fg9n)u5woi9%(oktCem2Kxg05i)}
zjTiL$Bhvix)al=TmlVTXnGQquam~Qpu*JtS9|X@C#>6Dd+WEgAqA>Cusd2bGCcI)Y
zOY>S72|k;l&m#;zm%FR0wDJrIF)^>h5(CxDLUMBQU*dbXKB9KHBkLW{5;|$#$&O^}
zxCO!rg#z^wjndrQwKS{jyT2~|FF&!evQpph_{~dGVZ{!zHE(_qI%#tmh$0#nFhdP!
zX=wq1i&KC`I)K|Q1OC-3yV2oK%ZdNX1C9zAp#lFk6BTmMDqw1WXjNY>RUJE@uE#R?
z{2w<c*l>|Aw#OnP8m3A#4aoEw%9$v(Uk+7gG_>fScU7|?U((ax>YCu{GySVxn|-0>
zfIa^3Apn$r8UD3i0}FCM%*tOf`0LdxTaWM0V*MXadof<lQ#yTC0}(p_qO)6WyDKTm
zCMG3~3MxWM$dC7anFfe67sGcK(B7yyrv&ErMqo+y`m%z8{tIC<CC)VXzl3@=GM>h@
zlWtq-xgJh`eYD8uvOOx`vhC{XDh!x&no?n7BY`*zepyx3$;pZT)wDJY|4vLhEw#VZ
zHUtciaW00>DqK~^{eI9Z6yUG508Hw+F%UhOK3#$rR%_B5n8NF3<a>7*GrlwsjVR1a
z_45n!{~k46updunB$>5!JKgrLG9?u`TzE5l@ArxTas|}24!}E}s)PrK{R~>K2|924
zqxqI~3<G&gDt_n5L>{+YjQ`c%-+<BRdCN#kTl+74Vu)P+{Ch6o{{Y_!5EJZeM(16I
z*j<8H8kdy{w*$xfhP=FWAb#pARL*c{znTF8y(0CJ&WF}cJ%9}Ws;je^E(K7u=+Jru
z7|>j;#R%ZJkiS+{v4MlmqlxZD<Q-YCkb2osd!?yrfOG$}y03U0R8YL!4gwDPuPXvB
zt!Qr6eQ3&Y{_|)4;n<re3;j#UE#LEAXo}%nr;jI2o#@2>$j;RbAiMcGD}XiRx__Rr
z;aO<T+s)iOsrv4}1HjHmu|B)B<g;ksSYgyDnlUz>BlZFW<MKtnK{mFwj=&ReDF56z
zyRDx!T>Sad>E-E03<%MVj*bB6Jsi7tdINgn<Kcnv7!+DQ`|N-F`LBrnMvS2Vb?G&X
zUbMw0YXT9>@?Vo)@x7_gYkW-}>wlh*)AoFSFu}hOMZ{(K*emwf+kUhBu$v!i=sZmO
z7yf`5yf-qM7>$mOZd`I!1f1$+-0!{1zb+LBvpupAAxR2ZfJ+`W|8>y@co9)RJu6Sg
z?i7BvYf5w%%@e-I|Cv+-qMHkCVI#HLxLq_J&)40Y#;iR25#aN4GSPO=?Vv1poBJ;d
z@F-7)Y^>hj-!CjI0P!O+F>#E)!G_+q&Rjr?My``GCtkCy!1%G5ZZo=WZf-R?c1p3O
z`mYJ#C0OuX#_^CtrKwCRSn(t8e3oyX;%`gz=@N!5>n5X_nV9nUkpf&CjQ@VVmX?<0
z<x!=KV7(P4XIt4;qT}M{mzOTlu-PnyfDsT7IMb8_vtkA}`c320HqJaS;qQDz@grs2
zvEU_Mr*I8OqX_Qg#hgt3l))k*B?asc^lQ(RGmVsxaIV^<_vSAX`kr`u-yCOgS&g5G
zx$P6SwA3B=HB+;+jJ(cTnbUd85`u@<!I6Khw4<-Bt^GPBgTrhf9Nj(3Jj1BV|An8o
z6E$*Gs*(J;uCoj}2uOm?yDl+({J!PmgxKoI32cBOuu#_vQe?8qH7}zZso>(-G~4Rp
z3JC5Z@WoLGpC#?k9thmfT*(LMmxHYg!{91?`JQ+9*sZ$F9TU{|FWs4#t5r<7GdV2R
zdP2p3trz@GgDHf8HEruG$B6wi&OPr<YjiFNCN5-6OU}Lwn<o-QcaLiR^P%Pw8n>vZ
z=i%X@ChC^A;kD!{uLn5y@%V?fjP#zXU@0WB4e&|YM!r`=(XrP~Q{km$%8w}+IRfVs
z(vMRU4L}b2Kc-RAI7*W<2SC^NZp4V3l=RZq9|&g?S~(os0qcGO_zyLx>!NLce}8zH
z^j}&5lEHtcuC9W<!?Y<%$Gl2;0E5RP|0m=B&q+L#WI$r?UqlrYa{spm_0OTH#KQk3
z3H~`D{nx)Wm4E)yyY&C&V*mMNqwxE`$-94Mb;=)zjsG{j17^_5|IJDNXV8Lw%SQhU
z^Zy3@zf-(NIxzTH?G}r<Q+g~5?L`MFe&%&WJJ-xSTR-C)@sc|G6VpXqXEhbt-7bI2
zGV&lDPF6zlAYjh4QN3m!g?0;R8gLK<TsvRRAuCPi9T^i_lQ=Ti6Nd!S#|$x1V^9P9
zpDj5oRv?&a9Lf5Y{1rSE9jpLYr9WAEJ`V~ViZ`Qy!NLOGcC9=dKn>!jPo=Bcs6m24
z(-bjY81hJ(YIoKez1)kGE|CSXJ_db6?2<DyZF4(OF#TcK?$eD83dv<Q=lU|z-M+V@
zRkFgwJG5hFVhqB}loHKw>!-qtOlY@IDEXaUKeS~^6$X+d3<>p-Kz;=ekV2ZejTdxq
zW0anY^t2cTAGDMCyVu?AK0+l{rcZ#+9+u@3i=1hGd>^G3?x0SL0>4)i2$d^P;UIvJ
z3KMXts71)qZ>zB?45Xp*(g^nt*C)PDr-4rrgo!<flET?`X#!T)ZLeyiu`L9^fncS4
zb!4o7DAEcwao|<e$k6fYhTw-!?(X$M^zh)W(Iictx@};hp1tSk=gKCn!Lqrqh<>tC
zSJzlX^81lb0tVH^-WPcP!%AazipWehyL~N14<3>?`g1k+$s6N%C1VV=qq&u9y50@A
zO_ihFnTTR<cq&N6x`z*8_;v{is3_-N?sKIo$aGM_2jakBfeDF;g`3=GbK$%%?tDC#
z1kSN1GP$ZO2J8;*%ucc~Ek04Hd%geBke}$5zJ~IO2Hu@if-u;e--A|Me~(L|6GoLy
z+Bh%YR~!ntxAWPkNA>rZ%oy^CZRY@9HynwO|M41|f2S{8X}MBSI!yV4i?podr(Vl%
zo4ODS3foC7A&`==GYfO7meE)d5fx!}leCCe4(WuDOxAKH^$vsAXJoLX)8x%DQk8<5
z;H8E_Rtz*vHi!u;sG7em{GvV|L5FJH(@;jHf4EY&gmFwDWy~@dB2R-okUG4axj`!p
z?N23w#f+*S(3!YcKl30A8`Oz|wtN<>3srJ!E98tvI1B84x--pPD0@S%F1?gJW)aap
zpi?dwj|suWf{+rjeELLb5MNwcTaya6fKKLdJuGMV=ljR6ReF+q=aXi_Vg~QL*J!mX
znpeMQSkyz=u>wF6Y;K_pQ6b3i=RwjPi<_SZvQ1^?mGgSE3O63y+gqMX<Zh0<ScSNq
zXf_}b{Rz>mb+2324VcFTCoW8-HHLOR*?C$#CB%LwU&<J<v`$CV0f}c87wb3;l)4*!
zv@b1b>HMh*!DMRF_*Hf(vckT@8O&Dhf4+sLdh@BC0OgezSAmt%nR$QI`~_R{V|rNs
z&L><u28etT8nb<BH71rBDwkZ<2QbmMY8}5grs`Z8cycbx=^*2{doFGe80o=;Dm>9b
zvG1w<51ZJ{6v{!);gJJIWcM6FBtx=1?^vqKJkRWyer<KpPfgTF>ed)qy{*~8Cxwxj
zTn(o|b10ZfQc~blG?+zV^^1uPj&r(+To$3~We5(kK*SxUxN*5M4YoXud~9)11xIHW
zky$E((=R@aQT!eTMKVfQTQ#g3^*4$k?5O!|*#H>ZT!siR`7tx)yUaH}ik>-QQt{Kb
z!}aTnx*qe3*1=0d@AT8ofedpMA*4qwsyy1E7hzZ$mV=_hJukzx6m=UB9fLFk6c?hb
zIlzo_MuSz4_8om2A$6`gdftO_6w%}@MDzhEqx8)z`<ZxUmG9T+ZU~KCEuAjGg&95R
zc?sVb+E$Oq(HFL;6&_kLIjQ;-`RX(T16P^NxzDm#W}|%{{tOe}p{B`xPN9zipYfwn
z!~_Xwe-92mM5aGgLN-qQg_NN3p7%bfYzkOVl^Gp%Q%KN&11P9#Z?5mb6`QX{5%#Ov
zWDS;f^;{ldew!j8+6(02H2m-$0#e#~p`(Hc3ky5Hdkl$$%2OH<{y=>`?LX;gJGr}I
z>9pATg0hcO!X|t%|8(@Qtm}W*7N)Bio_2tK-9E>4TN8>hVf4M}#|7tUuO#IQuod@<
zhx^%mneDQh+n=D&yW<A5Sds4*zh&v@h~0GX!|twwA7-C6R|JI?COTHt%|vu;h5dgv
zyl_87qq{)+L72{lyAwz6+c!v*Gswq?{HH#AbCE9?O|R!+*1pv<ytnYSEi)0?o*qWM
zJUR3(XsxR_I&CP_<?$>3c+=2MlI>;wwQBY~i3rK>XH5AeW1|zr2~pmk^KM@oHsUKk
z`}U!3<fWaIIC#l%PV-NA%+LC@pAE}p2HYuAP<-Z`n(eH&H8VPhX=i@Eec2o@=mHNT
z+O~-Dd#`5FeK@c=awbbU!!<>V!3htUUs_+lsnjrhRh!HG$$TY#kvn5i2swp@$+EjI
zM_Ke%X&pxHb+#&-wb(<JrEa^n=PJa}@54qF-{X(a9y|7K$NDcsgmcXn@8lL?THBlr
zPA{%|{NPX^(zy$~Psf?6?cRHCWA&^@Q!|EkJne?ll`w4aDx;b4Hl-XwO8uYJEmLUu
zy2@n#?FIPy@~AX;b(BOQKQEX7rB^SRs>u#{>xsY*c-RU0U}EZ=2@Eu}=qQ<r($Z19
z-K~l&RZ-v7(+(J<Kf9%Beea&vE!<9Ygv|PTN_}3ZjBJ(H14-JA8NEKoPkCho(z^1t
zQI?tYrFn(>(tM5#Bpu%iHYTGV^t{6&EIQ#y;kbA-=w1ufZLQZ?X%vi{Gp<uJ2BT6m
zC@)vGemGI|zsxUVHkWwooHE*W-BKs&wDu#iqTXHXJXXx(?!<QELhp+U!AxdN<INRZ
za1}v8^|$@<x{UD`|K$fn&An3khUb+|uhn-*?_{D}NZ1~pM#O4tCig#FzVc1Sgx51X
zxxXZc>|BtYq41UAciOW#siVkxh;cESPD*G99b^Ru0dF#xt%Rj9Gc#BC2IJ&y&{NIZ
z@#aiCbmS-$s5k_SUa~v$6H8fX9~WVCHW(~0<cQR`5em7;9tn-7f1IgBu)Q3soZ6G^
zslGZZ;`LvPl=8kN5le1+F4aykyjW=`S##Re`aYF8DyTZYQ?G7=Ekn&Ci+(9U{ENp&
z^L5U_{yy;!^Z~OoBlr5JCzk`q($YV(D|i?7i>uvM*Uxruw}d|3NBVYd&tG4AFDzE6
z(xqhA3{32_R~~=++Njp#|Il#y^Z9Pk@OZJcm$2$Gp(*O#ZHBlij;*6@jZw^iLXpPE
z#pLrD{?sPQ$lQe)4XU_2sxXx)XZD%L<*H;Dp)FUi!GzeA`lsr}_eSzPwKd4_?A>Oa
zRb@MS^c{wLE_H)^6ibYu`jZzv^d`G1pC6GSBb#?MB0`Pi);6)z{cnb_L%$AUP%P(s
zXl*LvdAfHT(&<GN#EVTGy4Wxv`|RG<fL5m!p$&s2WlUwOY*ct-#QU<bG}kk`VMXq|
zcQa<Eq%jsj%M?)D6N-u?g?(pq*YE$~;FdYU2vS(;uhp3o^?5)Shnk<F>eYPDyBA<(
z?1m$yuxO;=Kv{QIYVj_}$TLu^Se(g9YyLA}6$FwV?}lZW4EQDbyPj*wK}c!8>_uYC
zCEf8++{Lt6Y`4E|Sp6FTtn;G_&k)B;Ng;<N@ec&m&bhPOrz(1KA>jw{zz=o-n(}kA
zDc?{2EI+h1vSiI3ZdkL<L05He$;IO^C*#s^62}l@?fGvLu3C{x{Hi$2MJsvW(Gi`u
zKdau{T;uueL^SUqzeJ~`p-P(yj~F`__=Um;h@)N`3tPgOQjKilwEj}1RDYj5XI48Z
z-3PKBZH)4=jqBP3L!wIKX}fApKV0wKF4{id@)Cn=F}D${7ReN?G-(9fB^~GOjBl>L
zQTTjE{O){3bd^2L*QQ=XLZ?HS14ZNLW?zNr9`^6F4Jeq}Gj5_r;fNz}eTAFyHE4~9
zj%wnf+$!3te9I2gZsPjn^Kh$a(0rYtfy_Im_=u0?UA#u9hQsFLthid3{l2l$H3JAi
zGr8U4$G!ERF$gy#pgT3q%j%MtZe-Zb541kQo54zUJ?ujS$QfQOof*SVx?J=G;c|Z5
zJ0)=*n;fb0ga<@IOkLP82_t9?dPJOd$ith@C~Qak2f{Umd)xz|S9(Ti4|9T!403c@
zEZ;bTpCl;zqkJ&EpMJWDwD0^(efOJ>W@}<K5rRE#<L9B$%N15plO3)5sao1xt-I!4
ziEeVSdjM9c-n^1SIA!_kN71BbzV0#bm(LINAtX}7>>N^PEHH5V=Ym*RBq(|Mgb4jT
zY8`Q|>wyYYjK+DLA4&BONUyFgw}bTy>CA?$kXeQ4r0C))z!--g=m|}aIoiD+XM%`C
zycER6H8FGaGKu7tbZpyGcPJMWb>`0FV~msA`01gcy+2<`SS_w~9MQFlWqr~V^#QGx
zT-BBiONQ9DdS&~X`cIpYMeBX0i^;qXE^S!vrh3_rD($0<b}Pvz;3bBefWztuUsiLJ
z4w${U7ON}4yMh}|U%h?kC+w0U)H2Wc^U33R^zxMz4~J_1Q%0(=*oS_o4sm4>B!Kmy
zCh;oj43&!{R(RzLSWooVyV1BS-w8rG(~AC7jpQ_AHN!P#$j8Yly5BeTL=JJc$xE|~
zMIP+Z_A;NwqF!?&;D*tAfqAPw4IOqTkk}sBV!P<6JLVKAmoy^5>h^Y=#Mh6S6Ee7w
z3cLa4-8@HF;xo>K2*Gd~EN%P0jE*Wq%$!cViK|OX(!<N^N0pN21EM8_EtR?XRlyLp
zeR@00tGUR*@^BmP5OJ-L)C!I>({XuuNy$^c!A7g-$_OK%bCTn>k&2_=hhpYSO?|f9
zCW~;8nDf%mDOlQ^C6?kz)~VUFa^jW}Vt|s|=5)4&xbvE&Gjw*z`64$snk4%Sm1My9
zbpVLP0PZ4Z&dZ>(2%BD`f&z9r07Son@YL9~b~Ro|!nTll4OeUKLp7)bk%A=KA<R=^
z=7JwoCOSvgQws>+h9>jpuE7*-geFlTA`=G46&oWH!gGY|7FbbTqY|0gruj&jOZd&F
z@de7hFU6^`vge=)t3QPw=)q2fo;RDiTRCbbp)t(ih~0YlEQXNi?<i3vq-Ny6!0E&p
z?bE1&lClJ_IBH-*lLgE-GB;6Pv1Q;`@3_4JL8!2(D|J{LmghD3<FcCDce`UU0>Adb
z?dOa%cDPsj%2+5f34HXEgtuMOWv9l(RrTnoV0ZP#K)Y0*siHn_!&%frDhzWDz9aK~
z7&kP`fvOHm@gochQy=!J%_qo{>#n5oC&={~D;|<i$BZDqhMJfbPQ_U(47fPrpnAX}
z^d|Y|DqNN-&8q73h*$bU7E{iDm*_0$Jn)vr-P_~n;IF7#3un$r5Q61BYuh}He_ubt
z*-O=>fB7W%UHhE^8J}sK+-4UI1O|@XZ15~Ij?wcnKT!|{*Sro+uL12<|1y0x6Zu^0
zRo;y#|BFEq%=tK$_~-y36&->u0@ov9>X2+*M<6Q>OjtMm>yRC!K4LRzh-o;O26!7T
zGY%8_n4Ag|D;QNzGd&aviQ9QVM+ob$lMW0V?1f1dN7aSJs^b_HF1*6zz{?5=p#B=C
zu=53Pi>MvcZ_cEo8SgRxwUQ5ughZHxCh(Vr!I30B{Sx#l|5<EgtoL#7rSCxKtoH%r
z=y6Cp!0IQg`Q8klr8Dlwv!}2v&gCYtqzSeh!l53zA|+-y4BTsYdC57tgS#3&{v61U
z8-g20>qzZ8t{HAlv%`rA)C6aVS_W7}c|!Px%imCwhMAyY4{3NpxNE3!H@S!TErkUC
zf`g>SncOSO>d=!_RL{4Sw37Y$fu+Kfo5G*@ziec`lEV(uV1OSACOX9NCXI&Q+`Tzm
zdyhufU?+(r5+^=Kyzwwf&jIsA&|Vip6!IXnuqcT5nD;G{hut}`kv#HSch=ok%n-Rh
z(co%?4n$L^)L95dhjiQsB6e`fsp|GnG_AZT1CGicfmUdF4Jt4D7R3pvZ=x=5GBlip
zNRCR0`gfSD*$8W$1#CQmPQqt#{4hD~8tl9iP<jjuexM{lN0U|WgrCbv0T;0O#+Q0D
zO*Ex#SBKhXEi+qIZ*rBkofOKFDkqg5ebLNX3Oj|81gQW@kG^qs-Hp7CfI`>l;YpBU
z$zC)xsN*>pLwIgxMb7(Xu_I>mi;Jg&H55t|7CuKjf}3OnGir<hAq>O}oWth-K}Sf#
z9%VXN&2b3NF4-5rW8O4%t1uK-bNcO&CRQ8<7K#~PZwSHUwR;6d)s-?V*$)gcAqpEJ
z+?Y%SDMFiv80zPYB_!j>49wH9Ot4^5O)7q#!t?jV2upxNdc`nFx<HeJ^}7K-&@eo3
zzJlRF?2h;d4+>i%a>NT!eU_68c}c1HKIT78$KY2dN5Xo69Y$cffgjZY0{fr&dS&ob
z`pj9O;oW^ScIHhE(GaD99=5Ew$bh`PTZMd%d;tm><GvcM&8Se5Z4NOZ984;jYLF6c
zq>9L^Q3#zGDtG`JDUfvtwi<#VtbZVl`v*&wsoKX<&R8+Ki)hXoTYP=3NL3u+P?a4v
zGA}dc^9>X8X5hC(Az6cu;J3bE9Ok6)w8$W{=$$>jbS7#^CA(;-ct|(7-bRezVJyrE
z+F+I}rL#uhR#1Y_Ep`AZ$%;@32*pQzq}>8Kd^-bH^aRU56lcsfk}T0I=r%aS3yjnh
zMtbhq!|sl_?`@E&y0DQ5U1_2qFsrw>a_uT;li7WJ>-xMyuAA>~T#rThzgsz6<#}GA
zXtDDD9(GmQEF^^CAhM0JQdxf&Pek{#?|sQ{tv#VoLg`llNIVE4j_a+`7&l4j&zWD<
zz+7qk@h1;`N~ruHaA}eRo+Zc!52H$9c^uZKERt%88~kyUR6!ijUyxpG+-Q!0P;_>5
z5U>|);xROo?{$}HFcpaEyxc9ft2$VYfshk{SD_T%)HobE&+g5AuO=tnM-IiDo)R||
zH+2<hA#&L5LzrRC2#8!`F|M&h!XK9v?HXX^pNhUn$!o+|ynzeRL@`D)T!sOqRQ5;7
z9tuTikj4bSkH3#_CDvs8#$0^?;&qVuHMlRs5Te75GwNZ1*l_$>WZ^s-q52WE{5{k(
z;M(C5&3I5^lhZmg^kQ6GnOM(#t)J{vBvXJQTSigjk&xGJ;8<#7hPQS+o=Z^bkSi4T
ziVVv#(T@WkMn`GesiTu;BjWX9imJv#wBmvO>VnZhnx|GU!NqxSXk}Kr-$Iq0k~iLr
zS&lg8HGD__ggC=W-V~LNFeuj=-$@bv910uA(L)}{GDK1d``X@;I|efbT3#<LgP^Aq
zXjTitBC!-wuYL9IN7umd2uNyIKH$~$H*Ppk9$JcNP29v*7Frr%0IewuiEYKsP;_F-
zTa91cKIS3$UKv=XXM{?aCUJ-_glNS?>+Nez^~JEvrJUdh`c_@z-T`XNENd=us_OWJ
z-S(?w%AhM?i-kNZzguUz4WeskS{Fodlx6i?f!Va$YA#O8Sot+95sWONPC%Se9nqEa
zH_S~WeegvtiWK~r2PcK45&x}^<4FLb|5aX{elg4pqj-Q7TDeF#qguFl@|7)sD7+L{
z78nK)RIPt^l3YijeeA9;Y@S_DI+zV2F?V;#B_xPZ$ku5-^Fwohm>nw^wf82THCZ+Q
z3CO)w!w^Bc>W1QZq$#nC<;4SP*e$<}Vf7Nvoxfv1;L?G>dt?(pFfAjxQq^Iw;d|KO
zsR-#5Y4S1Lpu<t{T9Y(*HIvk*vZBYSCTP)0tF+>itG(Lm>pZ&@k_NP|`Ef8rgEOkS
zZH20R#X$Q0jE^*iq9GZtyWCQ8D@d$Z2M_Ua$yPU{PI|t`c-Uk7!fSejg%^``WE1^_
z13GjZjFQ!FL&&+~b^eo&Q!o5lHyYywJ%SP;jJ@-fH~Q+!BceLjX|AoBUZ^A#Wn$pa
z>NL{6>F7zG_G2{;7qQbY@#CLP<CShRKU5TGxH4|Hbb+}ge-|v)+o1|Jw~osv`}zwe
zN<wQA!3Xygtud9(N4CX{n%l!VVhsBeudTr<whre{JCz*~Ez-Xqy*5;ZcZdREOoJ?-
zfgd3O;lu1$bC{v<itM}!IV`vGTJYMX<J6QvHOUMRb~zY))ZlQ`8UfCI_EH6s*%+we
zqU5AK15-Y8RwY;OW^hmL%uL{T?qd{XDJ+)E;K+c^%EiS{lGrz;)IXCh)l|^2sx;mB
zQvQ3K3YB+nyEQ&Y3e}p9P1aE$pn@Hpuw~ezicfAc<aGerr*{F#qHbf-F9Bs>DfrsH
zNwx}$2{rC8k^A-#g?CtQFQ49ZuoK0=mgCrc%U7XgY9|=DY^Cbvwq0EonY#PQELIW8
zb9B6jMcpZ~@*^yj5&t5ZLn*A-^{X?}3D5l&jm_3|LkSJbwC!SoAT2FreqAMl+X7V(
zA{5oX5oaC#Aq-Jpt_TsVpzMf^)(YX2CLhA)pzMdlFJ=20bb5T-=?bjY`4(RnqD~Bh
zijmPPZ$=q`K*;&c^%H(HPvn3%DgjC&LIg}fPx9J1J~6qp?5L9T+lsg$CmIf)jI~bV
z+v1NsV!b3n_ZTT-EPUHTKLs>qR{x|>rSuy4Q5<ATm%^5%eC<FlG5@e?s7b8(VKgi7
zpsJ@5?)HFUb;U0x+mH~Be@ZJqALB+Cdh$p*V0xrZh=Eo=S4-f3Rdsv!&f1}`|6)W?
zN9HEG7mwg8XSBaJ_~jCW?gqh<>+;wk9?c1t%kF=<=jg%IHVO%KD^@ZYlvwg)^Sg)N
zp6FkY5%)NxH@=;wWF#}eNz%*;40u#qrEkaUtlNEj@v<x}$+Opsy=woFRsuAl_2#)A
zAJUFYyOdt|hCUj@NHTa&hrKx6wWFWlZQZE!-Ux7R*Iiy>ya;^Zz>eooD)@9j^L0(2
z3{0r$$k|3g`m`puwpT2+DVZ{6?%nDg7KV`j$h=TDA6Jj>`mLJ)>pu8!LWobacIQq?
z#NP4IP`xPr*K?23WK<yGJj&q7%a(tPRsUqsZibK#@g2PI$9FF*v0WC9Cu=A~W{v(!
z-L`vm+lh$C`$wf>vo{oa9d#x<b)5@WLy8YMjuh5rij(><=8R7Lxl6{msbcl{U|KM`
zSfj}aj;h<yA_2Z?gNlonXB};B$gY$WqnlvFIMR<;ko6Kw7t$n911%Im@a&ytdTUld
z69^Nl%tD^=HAViigyRfGu7pMw=(fc9<IF;Qv2u3wV{n}RV-S}b=>=i<_H<qc!bE4l
zshco6Gz)Qc@Z*}UMb^O9i`B^(+;@iK$CsO;d#~eWBFiw7CP(;94Vbqae(lP61AWqe
znkM`Drb*Xj^Eu|r?-xlADlPMMm8Vk8&A90zWY3@jV1^)A=bT;+_EBHaiQfjAIV%)T
zqwHM)P=?P}_i)>LMXjYREjh=f?!M^k?8StkuRNZJzB>$@dvQPEu%KY7A?xi|d&2P&
ze62(}1It4WHvZIisgxJX%F!dQd-JBkgqJ!W&V@H>!**T2PuvWC#WbR(A26KH?!b=x
z1rt32N4}`W5@!>kJSx8e?vA31273Ky)|f$xb<T{_4|kEz$W}$Q=6>%mddycjQM$u-
z^nPWoCBNNH=_AX^D|BI@BE#ozB24?0r@^tB-W9h$5!zTnoD61@V=y|)mMVAk9;Ukb
z0t<d|mVFoewvRf<M68xulVowShv1e9tSaXtSB0`_vI<gPT@IQagth(C<|pNC&x(aK
zd!-O@@ngh(;=#j)52z)T%Y4<DHymGA0HZ{($m!7VPE{DZ?3b>|n2l$;YQ6QFTr1U3
zY~HIG{G{PXe!5kr$jnT36W|$yCJ=LAmDg<y7Er44r8Xu)9TY`Hsb)YGl$@f~oC~0C
zIn3#KlJpvXxUs}|$bi#$136wZ5O0URonj5d>=H-<c04miJcDeA;NczD&!e(?zqUi!
zV-f#J)Ygbl;%AU#EiS3x5kL)rmxse&m2zVw88yy|Y{*U!A{>PvkOuTl`L}Bv2^v`S
zt0AYGAV~|G`+P5=<XP5@PX5IG-Hm4&X>rj)p`axS+CzBirOLxd@KNM3<+??5)R-L!
z**+sgrhLA(*iLEq;N@yOoq(@#_7l2DMFb17{a#B1C|HV-i2&ArqF6_OnpspW<wg?2
z3}yV)y#*{`OUgz#H-rdZf{WW2Yf6+n)Eg@-&D~%&DCrNGYT3i0uT&4pq6-Tb`}1av
z=%0>`%1<6tjvg$s4@f-y%{w}cnvCcUcGLDKca#XGt@ijgPZ3b(PH*hK_c)>91pMxi
z6jwKOVe6-R^Z6BPIORo1+kWF=Vj2!d%W7$(`*IYzPa}v&d6LX*RTKR=S+}8VxQQtg
z15BuL^fboaG1bQLwD4|v@837bD6#GpPNOM`q@5P{^V?c%58N9Ms&v+IJyMn_$y)ee
zLZGCB;nL_<ZgNP7&%<G11nD;lOTRaC%=|6bNgG{dyeC@|EPVO-#Ho$RcoWs<?X(!T
z_*FLJ3{~dslG8*cxCSo5Fc7v_P+PbomU_FfHZd95KNrdolOn?l-f$GPZWhWCr+1@P
zz&NFrMapw+)h@Vc4g>^Y$B7h@ubGq{NU4;;!u1g=Yzc4mC?o$QeqW25?xjFK8YF+@
z_Y(#?&RcZjQ^p2YYHJ=l7D!_@uL4I35%#STNmxRWu_7xc(IxY%FY^6w--a2C&8bB;
zXep6Irx2K|4l@jW{t`gJiw|}#fzy?;WrraKi?7Kd-2&k#4E$UZUT>JG#;b;@{BM|8
z7!n9V2k{Z>&9l|#%25JH92jF{tz}@v-^6MR%+j_9qSRZtxj3O%=6JORxtXcq^+8l1
zOr`fy2jm9d((1!zSa?{*-rLT+FSwU`P-gagSi_?pr-&<!7#9D)5H<sp;>B%0Q8(no
zOU6NvzhNVzzN$v}7B(e78(nW{$u1FDX<OFY5pUeC7c+%h3Zt~T^rsiRSc;VHa+9#k
zn$6@9MJ3cpZSBr#(rce|^va>_orTW!x3@F!GI4+0zawx7^tc#Uo1&j+TcUHZ9h=z_
z%3?+?98&ZH@gD^m%HpD5jptM5dWV$4!zU3uAaAeG$5Ja_&q!}Ef{s=VMVhC3rITB<
z-bpbcQ9+3Ze}dmB#$R5a{KYGyp=QQU(5&?}hzve?rv!qTOv@4}DWXF$%~3?Ox}=9F
z@|(MwiWLvV5{9nYvRL11=vYoTSoeda<*(0NeW5cpvz1>flMifYe+nFEu#R!9`R*Rk
z3GEmB_Qi6_<NZ);K&j)}StAch4`HTqs_4yn9T1ROO_hvOYEheSc17`Ay%+0`TWLj`
zQ1ebD`@Vq9GrVuD;u0(sU|GSP*dvO;0Q<`i6lSJ3FTRl6UtTaZ31x;4lvo>D*J<lU
zMd+32#s>Guv4O%SPR06kwuex3=It7QN=HcvBREKhMN3^>T}=&hT*_yqs;bJ-+B#0%
zD_i^h`}aCJO;t_#i9o+oX<3<K(eKIwe0+SMRquWBtFJ(BYfH<*&65Vxbd{0VNdW%F
zXdBQ#4}92zVmJ7Q&1<Wxe}2ro5kV=K0@@X`eXr*W3k!jsK)?InIWe;N6vY%kn+DK4
zz4rjLeCHVH>yM7UKd7cfG=i!#MUmYFmWsB0Z)iB4tC2bu5Twb|VA9mmvec!Z+6q_$
zIbpnk1M@eAu);I;D=>|v<JFlr@O*>kVut0yap7N3BbE##QioBC0A{p9KjGy*IH~wT
z#NF(V!^DZPG0Sxk)EQ;oO9iu9%ZGj)X$T`K*R`$CdF3_n6%q9p6=;W><&^^59yu&N
zT$aHV6Iuzh72zA~Ts<1fa!*Lj?K7WRDu#qkE$PYY!9${^2V!g4Hypfn{b6Ibi(>3g
zG>@yq(xfzIyN4hj*TVQQSY0qh8aF2qyr|&sINh$#=s(lK*w*^uTtBf<@nYh$^mIMn
z7EOHR3P#GW_GCT;IkI(kA?rU09Hxeg(iSfVb+*;Ca-;v84<F}N16ubHMwX=p26s(7
z*3d@NL+)jTFK#mNV1Z&)q0?lKC9pC3cU_6v0^>RwMu}=%J2D6qY&BGLU*Y5YZg^K%
z2&t3O*jky1<Tk^wYOh!oL#*{>->UQ--bZ>HNB_qE!`54eRTXt@znkt3>6S+6?iLWF
zOS+|{Q@XoBIwYhUq(fR#I;0z<rN7DZyzh6;xvs-sUbr`F@3qz(bBue8`?u`2qRRdj
zm!{Q5iS|NsO4QA&)S9z!S%*x)7TL-N1ZJWW>HD0Z*Zedr!u`Bs=^sLi=%>w8!R2C&
zq>_`CAz1SqK4P7cx|^Ir$N1EGrnHOJ?!Fz9JYM0$U{P1pouEy{EL^<ACn+u}8rn)g
z(M?p?kaCZRj2dSf&s#?K5E2XRYO~HyvZ&4up*PNf8E?4D)cYh;(W-fX;9Xi~_g=_F
zrMvmTMMVECW#C`^WA@>LijN^`h`2#NDe2x)SDO;T_A%FWAP1b}B)pbvEbv({cw#?_
z;;1{Wc(2=-gi5^H?BS&E;jvJnY^p6;{#%)}Gl{ESEke6y=Y)+m@b!05hAA5>MDu>R
z+5q#&b!YMs_KC(K!oFX5qvq+_X02OaD47Rnjj)s@!z3-`h#hT>2%TbKdi|)(_p-U2
z`dae3ux#cLLc)S5XtDFXvfu`_ZBYV_eP{wzLXF&9vaTO@t`3_7pS=mTx~B!trsQLJ
zpim}8RTyOPOao(_HJmKJpoy8H9NI_Dnm!s{{yB$}=RvDrw6fW^wApX_Tkja`!G@j)
zS6aZ^P}Xhu&%siZO&%I6lxd{`S*W5$m|jM!D?z2>cIT^-$m1oLo?e=onyNDBR8<nX
z;>fvG0B$XxH3Q)C!ch{eF&#<(>oh+<A56g-c<vX0K|&$<37sn4rV}uz{o-b5vR%^n
z@PXBCxq0Mmr}su*I4LRVxXAMZb*$jj{5-GaL~fP$HWQepG~EHyp<phzs%z{13D_$h
zz&tLP_k<c886jph?AkBR5w=~d@4Iie;<jCM2EGsxFx3bqt1W&+KZ{^4*jxj9g*+xK
zm~&)&^9J}-DoaZVc^wS3wHL%vCMGArgm6S#TO05<w95S2_I<p&0%ox1$@zK9C2qeD
zfr%==GPn+j*ZxTh+F|?q(9pvN)`vh_=c<R+lxsODewfJ6=GZNFfR>&Ml!(*tGo(k-
zfKb3d%U9S`r02pql}O2sMX^#enGWrd<v{!>@^X7vRmCvrrGGIF8V#xRj=v)*te6N&
zN}ge!(cjsU#AW5Z+?^D~-#d!;h=Y1pD2AIMCcC$y$O!xHi#z<-dwQ1s*$Q_rLVdlk
z24X|3^NY#Z5E1*H#w<|*GH;>`Wv#HRr5YarjS47Od}e8Dgsxo0w#PR*x|35un)(Bc
z$9};((rq1(=;(h(4D?J3q$r;xxUzlWJZ=fJkZ3u%3)*4C*BjveGDnfa85#A$GkYRL
zqos8s5_7}gXAqqoh*X=yEJNO-S=tApPAmrH;#9C$+QS88R^@7x7t%@vAQDZms}VTO
zI=xE%-&ugQgf|2!m*=(-pUl1|IKqh)7nY#^c@-o3>u&vD$0kZc5iWupQ@>nz*HCeZ
zLwyldQA`#cbL__Un-+fWPgY~vaPO)KBEq@kiwN@kq$ZF_s_AI7#NEbCVdwWhW1H=W
z%8iw4%U=^_%tMPqaqS^PVOSPUFU-zfJ6p7_4A&pT*yuFYJ3G7n6Ay4{`UOY2Ret%I
z5B*1<cK@gj8}G8-yVrpfyM$K%`ltox!rgI&f24)Wav&gvsaseS7gdyxHa0XBaahF{
zUAr!Yj>kdS6iqCPUOA@A9z3-9prFH*L&QbPB#6DRa+7DpaszYi9z#M{bhTh$ja^M9
zd&-`MI?Q@%Z3dzX&zYBEXcAI3842T5C}yW<Pw6{Gxa18MCnE5?ezXT@*Coi|ShHGN
z4)ISg&UF7sG0>_*uAd$4^OoJ5zO2;K)%OifhM)g6{+58Jvbrq_LL3G?yZUA^WOW@t
zhK{l5{nBFF(NQ1mog%5qli^)_aLLs*V86@Yzo&{=a5dshaI_%}CrK?uN~cUVE_I!0
zXuF%uu%YNp#1)Ss)rgOh$<H=f(s1+@JXr8J3(Iw=&rQ=tDak#|CqNC^L?4UGfGG+#
zN3)b}hZdG}Sfpk){5><X3_M7{$kh&JfA6mjdF)r)US`lCc5P=r-+$#^0p=X1-Ctlj
zyi-?M`E1d;YAl_Djx6@Ya+lYI3)c9*P?3lA7=e@Kv!BGko^$?UYjPYQ1;ghew?j%q
z#KhztTk%jZ@E^f+IvByXC;#$r#-RF*lbid+gz_@qzBQD1^9hxsO27T?x28cmuxKKq
zpv+YnfXSa&!K)e77sJJ`7YkGJ$w&s5^-G43^K@C9bHBVB5fOoIbI>4Svvk7U;t6~}
z&o9nK@M(Dc{Q5_Qj_Y0~@qzCH#4q5^GI+zxEGR1FMd}`R<TCSw%)rRV52DQUzOyCD
z&(W&Rr#s-3<2Lz;YT|pPEDGU}Hy<PMV=*6#QyBe_S4UB&648K8+R9w9j!;dKVkFBr
zt5SzT52ryP$_@d;(Tu1;s5=vuD0&h?3;4NJ^DHyioU&%7X)UaX(C-5ZsY(gTwLhV^
zT6nID=~!3Hkaf62L`NA(G}-GBYva0CV|fYVM%m+4;*eLMI$_}|<Cb`uJ9S`a{F(p2
zu+7-~{IZ77=dVbCLIVj{3%P$rkcGDOgWrIxkB{&4xRyf3+vC0;Pp<8fz#qD8V0@8$
zXV6zrT{=RDV`<mZmq*XgqdO9p8vBGd-vP5k?8mty6zTC6+A$ZY8iJuDob^55h>q-0
zuX5|zhyG@urwJl{1@E`sHx)o2S&pS+So(n=HA9Qq`8Cy4R+*(rzNH$pp_JI#(5)e6
zO%;cJ4Z+jCZ#8}5ySd6z`wH7}a{R#J;cs{$$P>ABa=kTPXGloxiFLmCe#WMF&;k8?
zD9o>4r@2_Fl;UbZX9rw+X&E9_r(R&0nWu<#5oOc|nbKDmq^IMhv5X*(!Lcx;9DQsw
z-DQv0MG1w>{uYAG{eZ-@Ydpkn$inGJFc2j%AXY)cU_2Xv#|G`!E%$B><2o%eh^k<!
z{pTNcl3hfUMl|z5NI9vDY$QjabKJ_^Ttyz$;`v3&N7m*!*C6!YgmL6r)|#HSzZaeY
z>yjVkZ{DOp6=BNyQFyf~e}UMHZ1|b|v`9hKYV0RzeZ_%vo-kxooTCWIE6M%xI!vXg
z2z5`CosG?L_A|DOeYUW<Y$;yduNF8LTg`Isid=2K^oA+MiHL?1-$Yg|F$5hed|2T^
zv*zn7;?<T1ln9&ZdL@3V`HD4peEeqd51Jo-uJ+~;UP}hTpr>Yd>i4h;Z?Tz843{^+
zb1=cPF<DkDgs2d0P}iGokqM`*+?7JKsGX9Y97IT9%~MqNMQdlV2C(NzSte@wk|rN}
zSBB+PX7GLlAo^lLCi-lx!~%LYL7smm`A-nF9$gEiNNYE0Q)F~x3lT+2!0p5v?7OGH
zQ8)7D2QyK047teDi<1YW1}@vlns$rz`)-~0D@wlSiHB94t1c_=5@~^DMdD?Ffy}%!
z)vv+(_T2u<{SwcLv#OPq6>Xxj+~XFVNXtp<VOq<#Z#$6&4;$s$b=)AxfL8)*$oN;D
z)0g<MD}2+Uza$}<NT<|sQ1(3poit<2{KcSV0BpB6YhRwb{9))6(nKYJ-xxR;fUz4>
z{1Uvst!>#c({`z`;$sW8C}T3PvV(jgQj!Yzs)0%FhwE`IGt)8$QM5UGI(Rh-CGd{~
z0b~$(hgsdi)8l<;pfaT-f7WWf)ikgZ0UP-xFqFj*aRHY}elc;u={-0Y_yLZq++4#h
zKPXVG)YLfkN8r6}`zsD|5AddM3h4=B=D?huotg?PA+KP&!?38a!zK0fR)BNs>({UM
zm;0*v`ub`~Bdt>)Q30_Q9NwxzUQUjYi79cYFsT;QI|T*QMA6*9G~d*e3LN1nDIsW<
zc6N%ii6E0<Vqn<kOQ-}s&=+&42r${d*xk>!hCo^+SU+JH0@IsUjW%nDgf8wP8tCtb
zRZSbBB4<5@trOxO?u|&y_^M78VU#-%PfG&gPa{8=>D$ka%LH|9Y`Qc71@k0NXuDAD
z7J9Iw_aPFmrO=-OasG1`e<nX8tlwIZ8XK?kQ-a(f8C(44UFcBKw@5XSxUXy$I1g5&
zh>Nz~>?bCbFIRK7_`bH0c{jWGnOf0;$-k7M(e~pu@zQ}bqOti0WC;B$5~R^w)5;Xq
zBh*l+MOCjGx-0J2D|#_K2~B?xyu7)LdEt|c@7yDPR)0e4yrLccsP($#bDCJqs{9f;
zvZv(sLl~(gzE17004*l5a)QD?(wO0tmLcyy8+9?T++8nzf>4N!>GEp|eK>BW62V9L
z^pI)2al$$jC0c@~bE+mT%D%`Y59dcieK}%+MoNz!_(YFHv4@-w?<c0w^c8=XxjKyn
z?REU`3E0p)E%~{I^N92MlloxwbQZ$?F{`}RP5#|wG(YxqP29pB;^4p)$Hug9mMJ+i
zpS6S(^9rp6$pmc}ol;TwHv&TT4Cb?hL*8j7;>0mf^#M~9X!~_j5{#;2J}r*9^z#Hq
zy946L`Dc>F&4Ssxv`adNOG}>}vjiyBPq=9`<pmIf$o=*vj19F@O6a4DHKa3$mMW6o
zl$S9RO?2KbvHr<L&q5~TD<%5zb<}F)(BZqHEdTW+|Mc_+?V|1ouA4nd7ANQwDsv2e
zziaIavJ{QcIAJDv4y^o8_QnQNOgLP5Xf+enxUrDQW|}xS8$YpAa=Z;33>a*kU`4kT
z%)r<ah|PTu4Mzg0hMaYGxvZRd_wd|NK)8CYNJM%ZhU}_@iSRY^oG>PAQT!PfTrDde
zpWDtJ8J~JsdeLI+@;HRPqW?fIyiF~?p5~MGfRhLzYK{V@lV~0Xy6$>U*ugJ6A9|9I
z0d9`s>3!g2)_>e#9vL1PSu!9f1)2L3NHU1IZM60#iW^c?XcONpHG;L{G%j?L>3L9^
z$CG*3TB^oauY8~_o+IqjaXV&@kBgg>lmv-WVJzA>Yu@$9-o4tNwg9Vg($e{NnR;+=
z@F{R3zW8E+`!1G%&G<`kK?6EWvwQZ7qml_Y;$Qsbz_2|KMc6wEyqv=?(d+hNcVP4E
zY;(W>q%0ifqwafop=7@Qj3B_KJ-#!Z^PF|+O_wb90$c!Lpa(E2eAaRVJ<dfQr$w@W
zzYRp5n^mtW%#AhR;V+o9)YV;*zz|Z?(|Zggf5>faCh*(j1s=GUWGN6s81WB+$Ui6!
z1Fz$T^(IK|!V&OLzF0V4e5(7-t+}~>4!&0jUyO3R01bc*5GoA>>Zu}ma-Yjd;ALKQ
zU9!cYRrq(m_AKoC@DFeT+JOh0r}K6U<!##s0|Nsc9nWsKS05ew@xTwT)V{b6r$PP%
zkOJ}yp{RtPf2(WifiDUoYpl@qJTT;HsjD}D3JQD&`|G0x5M*pDE!~0ka(H;CS~>&%
zmcxq6Tyn}uox+NXJ-`+G>qa1N16do0+IPZk3F~o)23Jr{C9pw>b*!H&VebuNwHX)i
zU{LZ<C_j!xgh5c42~mxcM?&dfv?>#HtkJ06<#i%Z$IA>sbIw42kWX5z><#jPbcrG~
z6IMf{DT*j4>+x0mwQHfb>|jU{(W$+^LuSb&_{aXr_F?<T&YB1Ol$3ZZd6iB0Z<cuZ
z>N{OU0n6CrEIwi$SDK_C-o9mHKzW2rh7!s7y>fCW3(hq{{+*ENAqzynwPezVt?Y)p
zPfUWJaK#8o6vZ;dAA2QyupT>}7w@X*8Xv!$=aX-y1WN6yJk|U$r}&K%i@>#Kp8@VA
zdq|QfN^@X17Au!EvQr&H$ZUC`X-JA8@rQ^c(`u>{9aYMc;Wb2fEmmtnS@XQSq7>$O
z<X(1b(&*M?n2|=?Wl@Eq+AY)MxcZxt>@*?Qd=jWMf;huTYPZ*c=`d@d&V)KvW_6^V
zVOB+2a<6`oCBmC$DU<({TX2Z#5$o!}7!c2`pDjRmWl>jSCNQnTP{o4zr@h|EO8Yoj
zS-9d~VYp9#UVO!Gz9mJtwx}_#JxmJCaLS77J!gKU;fc;0OlDqKBM0?^J~1DCy|3s2
zt|Q>N%2C`Wx5d|Ri&S)%Ohg>ze@pPH)3k53;VQIlE|H9frsguVbPdNy4z$Y`R<bfu
z@T?82BE>Q<sD)>Sn4wH{S2}crC$M(X%&^#Swr;4C9&p`b+u~cUiVOIlmsYDo&heNn
za~k?b3t`M(@lV(UrmJgg21=C+g~>_a{C#s43)RV#u{-&R=Qa<^a?JgZSVf<PNX#|o
zLa>V_+1u*aGP1(%%~!!vdo!z#Buzf1tlq{(Xs~}EHvVv8j*4CUrs(6X5)u-<AUEkS
z89*PYZH;ymA1JCZ8TggPX7UAE<K4R<V8#Ztnns{&0HbQEz9&QC&`Vu1WD2@bHN7Ed
zD7LPJNlY5$fLB7w_t{iTOl%zc{Nh51f1hUUMzcbv0n|dkyZrP7yrLVgI(ZGdAXIlR
z*o*1+JTEoU_ww>gVV}E~^5jJ_0mfGZ@*VHRdMgOfM$<fQ&$mUM&SDED_g?a<#>U10
zGOr`AwfohVk}L8}t1JV+-?#znh5`R}k}5cG4hVe2_HAb%1ats9cB9>jCkTg}oSX_@
zxxo-RYl(N7l&oBf*+>fT@tW5yRPz_9`T9PAHTnUB8&E&JU`qf3KykcT8Dv9u0L$?J
z{uXeT7uVNc0>k>|OQZnU33~uOFnB!Be8r-}35?w!3N+W(JFm35059^#W`}j#rWJ0h
zDVmX5@IluE0b=FFCpuqaV&Hw&4}dbSiK15>deKe+0HP>WpWo;1MOpe$RJ09{Iae>=
z6+}~z*BmdmT)dQc8-Nx93Qmys5Zg7c57Fgha#~FRvI^In3y8mKfZie^2%_t_;Kiu^
zk9vU603K>$cGJPj{plYS?Wm!ov-~Twi`sLB>h><5c$&uLENWYY<0{6EYvAlRH&E)*
zqm(SI?8|JwD~7Sj7AW)DH&&2I(}(TYTWe?KmVL_V{8?+Rh0o7Vlz8U4Mpq%}RqSgo
zW<H!|%TiiYIzt#JLz}-k8+~Xq(A!8>D{kolGgdx)ca`d&<ZpAZS7Lu^BNzAGwYm(&
z#E{KO#sjXkt>$&jb##=TBu(p2Zdg1AM-i{C+_v}v+KMly4cFJdEF&YhcupK^X$og+
zw5XC(=N1;!)VlCs?rimcC=XGbaJ{BNBe$M&YmzIdgop6;NeyANPYoM3K9n2GHhqz3
z^YQJKV?Bop>7$zqYgF$L$Zz&8v8d4Nm~Rv+!>PCKykav<R3~^Z4#Tx4BuLJNQ;2Mg
zC7_~~J=Xle?+=k?u}}I{JjhYGs}v-0z8=rIbYy%a2)$;3)oYP7<Pbb&TIi{z{AznV
zSZ`)luGh%<*Stba?G^rEglHN4%%61_xQm}$-)#m)3M8w9`VC*hEY)2^My?Lc<`-DM
zMRT{UGiIr1W$DqE{x)5k2%~BH(L!0RzNw&K<)q%-LX*$FR%E5h+E!yr)+`~$r*_c!
z+pC5~bDV%W?c_21q?Y3Bk4G)DJ<wG~CdBgXa}~QZS!GE5bmQd&Y`N~;8g`#h?=BM|
z-X)#Jzu$g<L4>`wjlSJxDOIE8#5O!8l^YxhNOHTYM}8~*MOUNFNhWmWz=xcGs5(3X
zyCyN@TFB?K)LK4FKPPW3tXOqn3*j<q^kn!qQTV)A2QQ1I;SgGZDbClFk(nD=6}r1k
z6*EK(Z%$-ggRY4>PT$(if8q;6MvaO|xP!s4f25dSXfWI;*HS*YhiDyM&(7*}K^@u9
zF3$cWZ`)GBMAaBJ=wRx~&wfL;u6Iy%uzS}{xqK}H`+7U#ryIgn?Al`E8wVI;<&|4$
zIJ7c5{aH;6qzUDf+RrwoWjnoAT0ci^8yZ+nSq_)dp*XBpW+#<l=KWaA+B(+L>P!R(
z3d7!7moF|RR0saTaCWM8NPz3#{Hd-;I88?;bl%brsyd?(%14`ljoU0gh;j7h;-b$#
znMLTxFgZCnc<`t<fUW_KEY@Gj|LllC{RGx7MrN!MVhsSFynuGV3WNFk2=D<%b>r}V
zcopfrtSVC98a(|8#K;CTBd=Qw*VGB1_T)U@<^WIurXMiAe{@@M@$m3~bO4G=CSZgC
z7kRQ=`@{JNAXMG{a_dOU$;nYH78(~IApD-64=pMq6N<S3gkx=>)&dFG8?b^3r}rrk
zUt$5sHcmlQn6J0O@k><Xw*s6GFcYfCzYktdDD3SnB_-wL<OI-KAQAyhT3Ry2EO>IP
zx3bQ5$z{dvw~}1CToF7f0juYwB}5g}WUNNw<JaJ?16T#9^TvU0v^SAgARV2->$m|5
zH0fxfZ4Z4AD*vwcz6jv~0RexpmqGM>A>&vEUVjI{@TFP>(M(5KSy@Bl8hpOTJsFW!
zP@aM+G8+VDkg}o^bC<WYc+6Gmy#N$|c(x3BT$GY#X4|QqfD*%D0SeE*;twO84<oNS
z{=!voVF>|%2QiNwU=Q5uzcL5BmobCqyE%}nIrIb}tytLdwy`=D7Fr(hR1%lr)pmUN
zY-82`4z9*nPh0I4@02NZ5pO&G0UFJ_{c>qPSAtjSxx5^C;4#&(8BI}VNMVA|={$q4
zXRBE<S}sM~)zP;G?%P{^M#Zo3EDR)g;j-W<o{{%x4OQ*`j#}|f%^#dKQ6K@B8hw$9
z8NPQj`xd<MX)2#elIG;piTnq@b{jWMXE{_@Iv%Lbd1@Ne-qc4!E#*ImS1G--`90Wu
zVenP+hmSR;soReq{x(aD!>1gNQBEPzgmGgh=WW6<deskE{V4ih=RS7I4^s=g*@9u=
zBoma3F+(E~zKeQxi=X51FuUgo52H9Qcug8+!G-rjAFFh3N2t#G>}wXOt=tHi-VCu&
zf#cD%DYSNq>TJ{V=z@g-_g$eidyEPuO+;4Dxh11Wz(f94yF^ZyR(w^MX>skaadz@6
zoIC+V_eU0YsWO^V-O7%QpmQ<*D64s#L^{(Wc>-oC*K+&XAZ7$!7uStGhU1Ip>!~Rz
zdsF_Un+$HuK&U0hi2D?<`Lqq!NW2~Tn5v%s@kcu8dWt6=QoM!Oplq(7SrPNuAr3u}
zgb}5TphadP0ZuV0ga*6j%#7N8jOko9>1UMR)6mB9)OmP~WYMu_jk^@%cx3;_ae@2G
zvNO6%tHZxHS(W@T^uresC{(a)w%>$}R6GPrA@0?cU4<xduj4lJ+j5qg=J6vSl3`3i
zVI(TtH68tv7v|_`tocg1GP##xEzLRsCd$R-ycuDnzl9wNm<Apn%TftZcLFTmfhYaV
zIfgrIm(FDiJ&7;X3Nm54sgnBYYM604jfUe%MkLnIh|-~bJv-3c!3v@nj>NrwONQtt
zBKs*M;B0d|LqL-Wu1k*QTsq3RKv|5_?pG#Ms=i#x>yLCglW!Zy>l|Sqh`hZBxI*{T
zc0XJUR5Jp4lDG<U5_!3?&%u+%p3W}5DDirbp68IBLrh)xx3`+$3>4?zL2S>z_s385
z%=mBo^!;En{`IadCo4{I00I4S!_k<`>9=}vakiYK>5}4F$7GxSlmy9==?4`Ok<-yh
zG`M6&;<o1}p=xI3w9oR9RFx(hTSTY>lv|5M#hKpcOjV@{A<Ra}xY3H&gl{3z=O5N+
zLS<CLB&q1K-Hk0Qb^(_O<Y3}_=AcRSqB`HxPEzL&4-da&AONqW2a=o@rrG)VImoqI
z1^(QWsegZAv%L_~0H}zV)AB=A@YeRW9_WLq7Avj*U<fGho?iqoo>g}f0Cobbm%eZu
zsF#We1T8+W&;g6@rV9q6xf0M;b`^7kAAP1#zD<K?HJL4dEcN962{hTx&PU#ITmWDo
zWM+2uKxj_IW{FJD-2oJh22bayAlU?IV#s-l%E6z4%7e%9aZvD%rm<}TmF|96v*R;h
zTg8$JT=$^71yr?8GwqdMu|`!PU!sV(7>S}8i(aHQ&}|1LZBt_-f6Vm4LW}G1BG?E(
zHx7g`kb%LW;4cHf44__tumnqC_REk`L_{Q64ojH^yA-^nq=W#V|ID;Oyq$Fr!@OKv
z1VA^yz4JPpt+2>+lx{*PT&MP{kS#RqX+9Y@>vDj|<~u8lKO!Ta-w0NVtfsc*5eV1s
zR<<@b=QfY;Tzz+&u~Dz}!um=zX}K0LEG8jCquQb~?3T*zaB5u8wRcnMF*Q`IaY^1>
z<SC&lA#wAG4~Z!`Ra2X*B>%Zf*15?%L-T{6<zCo9^r4P!1_rPpe!@&q*HQz&8%*fH
zYqZ4}NYi%{*qt^Y;4m7#uJ}4lN=jba`p}a<{1iBRui0W(csPt1hoY0JKr?)obb9m8
zVU6Bz2;zw-B}De5S4ICe<N-R<M|pCvX4AS+{eIy{pXruHlCdhc_PJ}=KTz*2J7(`{
zG03^@vfVz<>Gt@mMg<t-9(l#BH(SUG^3h~RdZixe7=%k5bw4KDi_pt>6hS_lz(6S+
zb|k{Z!L}k-|8V1{5u77(#*F6Mp?`PHK<Mj9D_d2e-SFK^##TPs9Dcqr^FcOGD!;=-
z)2lVGJGchhd?mrk?$~lgtuv-OpuG8OSj$<Xjl7=q@voGO64Jv&X7UL)ze5tW4>z)o
zXI(EGFt41m&HE_wFU{ju%~H7AQc7Px7wS3^3`TA%5%X`FR3<ThKdrYCYD$VC)n#`*
z8d#2s#Y0=df4q~tS3kkgnlWVcuBF62i#T`hKk<+>XgmG$<il$t&6d3j`a)xlnE;3h
zB;l8h8Ylu{&rb+!67WFG0rkm8hV7YBRnWot0?MKn(oo*Ms*B6jTi>%t1hAyQPMQU1
zR!=tHHqQQ86=%^F*;rXQ>fOA6$rQm}3>wlO23~lZV0FZU6e1_*8KCg0Ug<e2J2_o|
z{onPp{YEj1|Eep03TQV_Tf4iuDGkp1Qvm_cR0%4dDiF|!F0ZP-_-qZv3+YNpcASa;
z1;}mLQ3kLU2rV*|!MF;b)4i5Y!QA#nLtpiGG(bj11`70ea%yTP=z}Q<-);e1XM5F`
z$HTdgPGd-PnSLMPtA)%1*JT*YOiZXD1CI5x9j5{@PC!}-I@?(TT{6IyvIUwGz-(@V
z*JH*Gq%vzy=>^QT{!kdHB^lVa`*L%wA3fA$vv3YTZ`qDN2~B=RvF&IMpoxd&CrmvA
z<SXYHslAfyaa`$5vz)r3dPG2DsVYC1RgqvGarGo%2?5=6F$w9*Ly-@pDq7nG=mEA1
zG55kl3W*-K)C1YPrvLUH_g4^N&6WUsuFBWB@Eql5RF0s{B~tshv6CxoV$=|muoUNu
zoRo*Bs_$P;!)ZiOkK+XLOhc?&E*c+&jquE!2g)NNlQd3e9)%{v()z{fD&^40lZZNM
z&sOx(St1_ehZ4=vv-LFQZz$lH1G^picy}~0(BOM(y4RENz{Lceyl~6y;bahB^-Dxa
z(_YY;;dI%}fG-Nv`#-IAATa{a%~#A?6AKGSxuvE1lQQIwx8q-YU+x;;+if8UF_$`^
zJ%N@0#^yjQfH=Bx5=BcFNQ9t5O-xPojOJxtf_MpJuob}DbO%&X01>nYq*WlPgP88Y
zq*<BCY80@0%qEX3?u^0-rV&8J4u>f{Lj>YCfE5CAmP`yuX%ey<$|H#vBQFSh;A)#v
z#W!Eg>J}^IR5pN-BY>5)bh|}ELtEiP2*zwL6Il^BYG|*lv^!pGz@byj1fR;pWb-A1
zy+3Z!R8vDG6HE(BQb{p8_DZr{%4?VbE!7+Ft%Q%>QDKW)R^)h}4_V~+T(5_ccO2D@
z^ibTz3ZBbW%|-SG^&$8teFlKUmmKu*21Ibo(-4zaBaH>*?ey;?V-DM^Vy4@h4t+MJ
zJ|a`L4X($C-q%ntGZ~Y%J9>kZKh$4a9z6+vJpno}%sCetQXWiW87+FJV_mAf9fH@V
zpaR!%UX&u2XpCHs8X!u3@7e$#*loy4fQ}OCfecRU%bgCoIHbHM>O26L3(lYgZQ_|=
zRdcgD_)sr{8~`!~nyeX;wO+#iI}7mjE4kNE4Nw{%!9c}Nuz*wk)BjME1hqi+E{k?8
zO}s2BDL-f8kl@{30XY9?y&}Ko=EieGI&*V#0l#a;QKLfV3J}YDpYPX14$JGnzH0!r
zx3?Fka`G11Oh#q^bWZf<DoRT|0Cf&f{<=Yh!dGwq(*Jnr(t>l@*xWqtm<dFW7wQ^V
zPpeFw`X%R_TwK3@{{~md`s(2K3(X7aUq_BCO+;iQV8DrVfJ+Q+ByBL<0+N9cRKnUS
z2GtTabxlnkPR^ifgY|207lHP+#c&eCoW7SW(2Kw(-(#1g!XKlmq0wkE5CdLtj2Ay#
zTMjA+Ai*31uqzoTruHTamKGNQ@UEq&r{_iP$P1R&)6)aDXQS=X({)p4-birVk3A39
zb|Q~sF#4hU(lTSE*GiL_0v7{ZM>=S#k(%YOO&xB;QJdZxW+S9^NxiwoJUt`b2?xQi
z1V-E#W8%#z)j`LLZx)bkS~mW`fT`h^Ysq`U_rQ&tVn$l%31<ib-WmW}f;elSrm1-Y
zv~Yk412En>gD*ZtM(f~``2ZmtMyuuXHfWQ73H472+ZzFC1}FxL6|+2h(RjRp`~o^n
z89?=YQHOxEWA^ax3}gnGKy?NyjPKJ-Nj38L#hV{TwN}&L0Jd?mK!y%G9K`46Pe3#i
z_Wm~*Pkr+~w)yk+FwlSmu*1s@I(>oY43ebH7m_P$!V9(()IAw5A7?1xHBjjREKh?m
znIB|KHN|0xazy}L3dWIURM(!LZe(J~+Cj<!$aN6#F$a<uRB076WhGy}Cp;QaJpk7N
z&={C6a3=qrgd9o4X58lo1q0M9lcj5Lbyi6w%F4@+0O<4OD}fthWOx`r4F$p10L3K(
z0M2!Y0KMxa)dD@k7o4xYtJe1R_KOzw(mMeAltLzN&9`snC(A9MJDRh}n@IaUgVm_#
zJ<rPJPhxwJw7t=)hKK^R?OrsVG!%*vpwiWyx-8Ev>;lH)M*!n{K|X7$tCMbx{pVr_
zuJigKnkG81P%=}o)<NRosOBrpw0oxTkuV3(VGlFqq$W~WWaAuUMIrdwebSxm;06io
zp6JJlQBYG+Id7=)vF!`PihT9f&dLg$NvD@Bb10Dx29Sfm7CEVHk<~w%m(Vk6gi1Sp
z@d8q$lmJ^tyeaz&%J>x?UOO1n&~36i1`u8_U-5ZX+hTinm+Bn`nP78mE&C--CcD`%
z=vQ!p%u$W;YUR||4TuFo>SQCefSarb=08I`GWcB%N|X!0iBQwfz$X$#BVhe=cUVOZ
znk4`hoOTOBv<SF-62v8-_FnV87|WY8<s@nc*ld%Ud0qSV->a*>V_b8%uU@?cAkBE$
zKI0k=P;b4|%s}m2Z#@}J=O7(8^8tzIn#&5{veJP*sR{!GFf4z7MQq@)ojie6`y(Fc
zj4dFb0)+!y(`7Jk2Da3Tagi?vGo>+`AT0<z@du5TMq6<9gP{%F4Qa5E1U_{Kz%4b}
z67zlDdLa)39Cb^jfa69Vz+@hRY9HJpb6^{*YT3!SI-E;QO%;gY0EYp4QZnc?QRIi?
z(5-?mwDQIuv}pleairOC^ZGXwSmpr6=KxOT)xkfjmo*)@{x3L4s*yT~0@8;`Ba4Yd
zNg@^iixWmcVMCeLBN!DBZi6YNg=<HHV!Dig_MfoqEY^ParJ=qa+*6>l6*SyG;CFg2
z$1r~3$tc$zqDw_Zg<MhN$tW<oys)tEvFg7W%u#oG54lG634X&=c7nd}{UQ5Du&YX=
zt=lOk|Ln2ZB`z$Kp94>Y%eZ~rLOEvsk5VH(qu)X!Kz(SmJ!QfO4wh%QIbefyoN_q1
zyLzcF>e*nzg8y04sc1NCUFqKOdYY_@ME9O;Kk>lLZ?Lnp95iJIt0R={^4~!Bd6VM>
zm(~-y&7geMyvOGAt5|oVC=QSogw=Cyq~83oc!q~?DE4MO2U1eUIDbVA3C#6<^>i5O
z{&yF%OZ3kq7r`av^gRjurzN+C)349B!#frqv0h^aveZg@Cff4af6IyeKqb1<H@Khi
z`_q?rV?1OnRJ~6(rxq9sdsUWG4MerWL;@8Kv;B3#si}hkQ@f_D1lNVWbRDkftL_Ly
zy!X%R2qH4itBL)@FY4Hq<q+DOby^{e>EY516BR#!2|3lGX6wNdGD2Pp=41zvcGV|M
zrFE++_?yL$b~%cC7+w=lF@3N8T}S72EJ}6I(BzdeyOQAZwSI2k(PeIuS?|9g_}?By
zKy~moawIb}TRjC76g2Zd?Vt*#IHnfjk}<0oYZ{ah5;i7!JU%o&R{D(je|O+kJ{?$)
zLj5=VJ3{*hiQ17yBL*PCQbmeVpo{pGXFgN-L#V_gVZ{P+-~V&{?@PUE4RGlG=Usv8
zALb42x+X-wLbjs5qtmEP`gd~wCszC&>Os`{?_vJ4M}XTgq=4wfXY$`)qH}mjS0Srz
zu?F7UMcmAX-pv2>N;UrPO`8}M?r-*=e}cUwv)Wu^G$Wm0Gy@h<!LT$vO<RH$qA@zE
z(84)k!={Vcufrixr)|#l-v|Zs|2hIWYDzgYgDh59H^mq<bFO!htd)k11c!BLgN9Kd
zQwTf<iX|LNLrBONEquy<PC|%Xl;8h7t_^Eliq5S~c&0o>ytz!E_N@+z#;6Vlhr9vP
zujAf{2!afkF1cFsiteAm6aP7b5<Jpf|BaJ|tR&L1OeLa+6H1swG+O>FvrtUk5$RlA
z6HrX%&@dB*wrm_sL+h@#gpb4jJ^BCN19QmFc4+j$nzM$plHn(f>r~I8z8@`zslW@h
z3m_`DtqwugGIM$p^fU1P=fJ`Q>>qq-QPwfL40WqjbZUN?Po+52Wh$$Dn)RiP<EOxc
zps{3$%G6&L<y_$*OieD%|Ia1a`+u%iNH08=hX#$YSLHTz-vl=wmcYTHU7ss;ij0nQ
zW{EkTqhKj)7{t7!g!C-aa^{+<@jC@o%q|~UJX<e2L5y%zqYW!T3@ie6^=MeP1BsK(
z+UkJwCR#`V7Bph!{n|qx>Hv+*nC;hBW1W^gMI0fSlaKOLVTGx5vYGM1s&R%uJc>6L
zrt^ZT`_+)S&KIASXgQuuN}tosMyH>!M|_npbD{<olP~LAIk3=ac;ozvK(F4{<~<vM
zuwpYx5$;7zU4-evhedOEC{)W?9i|GA)jA4FO9QyL^oH5cIwC5n8ciY8prP-c3)*3u
z=B)jwV??KJ?}ITzr6SDWo!++JO*;CeoStHT3BnX($V*04XC9N2vyATb6d^_pC8x|y
z?&mLlX01;^{UeL9YV%oIBWh(yv_QV-;BnLU8mS#+%wfimx3${F(#+-Al*_sz9m>qN
z3`FDHYUaZUbGCcD>M;7mA<#l2k$3V6Tn~%`lvEXv>klMwrP%&pVPRmfg+}Z+MetwC
zD@-`|`BD}D3q@o&t4AIV91>0?vTV6tNfP3IJ&0{2DN2&;h(rh0x*1>bPSLBZmt1|t
zjFgp;kS6P1C)JrRtn{Kn6au`CUEtH8;5pn}>$zrmoLyamJZ1<nb7g5%)IMS>eG4o3
z@y^h>z6p%=n6ncKN0eitV?p=hoFxdG>C=cConm2@RsP8=lPYZZWcSJDlMQ0PpSVL~
zQOy4Qnx6awEh@^M)uTJca)yVAtg()z{D5ob;={F*6-3N>FY3_OnDH95rFnU!rDBW?
zr!8LbvdWH*?8!=At9+FF%#3Z96sy*E@_zGm+HK>g9tL5Oy7KH<G01)tMQc;bG>62w
zH+I?UTE(yS+fjaYN4s6m&S3aNIs5&*lyg5kt$C-FU&@d?B+2ZT%LSf1@fZSshdCaE
zfrN!L{D}C2OuOZ{J^M|8wHOk!C5E2vm$tH2BOXc2{2TkO^4s3!>yS;@5&Y=)x9o=z
ztUFn2i_6T3XqRFQ<_BbkL||SOm+HW`m{m2+Gws6%Et{oK?D0>X1FTNW_)Kr!2-mw3
zp~{F{U0s3EtonMjBa2+uBo!^#0e^VdHp{+?{Ie3xxSY;Vn;9;Iy_TAmI<`#<?0B`F
zNbr<l6N}t{q{P%PnlMQp{%Jzk;Owt|+fF3ZyksV3M$gi|D$G}+<hIoKZXRsgx1SBS
zZrjV_Ia?E+nXXQ-B`RjS?EG0TFs_kwYbPp8Cc+EIu1)w(y+Avnbi;0l1sxxjCF?2B
zXsdtrOIQBYW5GAitXakxv7l<XJB^qu=izWw#eETnFf##3MDH&n->&3eb>5zQtvV?y
z75>>S0TohxbPgp^{Oe%`35WJasjO~evM5Ew{8@9VgUuY(u=&(?O=h3nireq2dJ~H?
z4uiX+er5|kObA)5XGutj#k9jA+;Vwbh+(??X4d1-b&a+HN1D*M6Yqu*BHvhrLFAo+
zf~h5Mf=>z?Ir#~gDL!sXVKoIyfX2*HtNJO6=cYYVYDfCM<tfA4;MC^k^79}ZrEKrv
z(%|hP7O}dSA2Pkr8pA`8KaiGc2?4fqrOpg94(^!dA0$vHg~9AKoHha_uA~r6!UW_`
zbNogE_9X*fpD-5*>CwHcX>Nt8d0qSbcOCrx0>wT&q1fZc#-Tezm`EI&_~Vw)Bbn)0
zN{$X=pAV`$*js#5gl^Z|oDBpu1h^E2+VqTX#FQuV8g{GWQnJu&U5qfKkF?kmB3hLp
zXvZ$33$9|ONJ}+sqW5y|;01cZ=g-<C$^7|iZPAX=0w7f55%_(`AD4szcF%`iZ+_20
z`@7xoxgy3xEy+TLYG7DyvRjrq)MsWwZ@)&E%!52A1P5vDYvZAm@okn}hxA|nU)ox@
zk-Rm7Y+UF(1gPY&zgOdJZDv#gWc%`cX=+d$Aq@O|4o3+5!+Yx?n*<duO|RDD?M4vH
znk)#N_UwuT`1!rpFq(K{aAF67G1C<Q+1j$gj6z^K>utHG=i@IIju7upqj5Mmc3ZFP
z*P6cj&LQ1y%3Va@#3%3^@bfeky#@6cP%qT9qLIaDiJ3pnKOJ2NdU)sT)4kGAZ{V42
zzg=wWM=`kK(0-e7?%DY6!Ygvuey#Q?Y5EtT&j#m`EAvRLP4RoqHf*1^$uc^8h^P%i
zvHf(HtrD)C{tk+DERKmZjWHT&wuot5yYZPk@oYIPJ<-S^rLUv4)5TiOv(r`UZ&iz|
zj~`P`TY`0YY;^xFMlOBl(Q3p|!Xv$h*Mc<LS&S`<JbYRTxvKDaFdG|TsfC>kOK=SD
zsnM^`9d)ezI@fl#vD`R|zvQ|(gdt#HJKu1)-4biBd!x8C|HZSHoRnT(G$+nRu)%(J
zP<&-#Y9eB%nYe)}*i=eyPDh=9(_PNlrJ<-fmr_9ZM{DfwW$a9X@YMM2H)l^B%RUnE
zu8)>dy?>Bq{h$SuH7zQ9PurWG&Qx@r>dXXcOxHW2t*q^y1|pR-?X?nlDvyr;WHsip
z6brM;l<$?A<?vf|MJ1@cD=4YLIH=$j=;XGR!PZw+E~GG0*Nqo2(P)<v)s>RjWQacF
zFm~<OZqaAP7cfk<c1Z1|;6PVYWhH3<eMSY^=|?uKSf)k|I~!kXD_awZ;wi7R6RbZ{
z<xk0_?@Z(nH2OHw6}Z1P*b1a8oaounM!NjAw1O!}Z2r|CCyg3jbiHu=7#E>gZvC;@
z#zI0~x-{@y4KLsvC*F}df-ZJ{vqgy3OUM@WRzf?76ko&E=k$yIU3!0vIbZ%(P-5tK
z2CHd+nQrX(^Kp+$l}x6keqpYldP`l!H-?p5?c|>$p}4Vkmr>&YTE;U6UbObbR+k$K
z?G1tB)7Cipp@>bBxa~i8!++L1Jda1p+I2XyC#jrJ6J;iYkX`VUuAi~43E|XpeSuPP
zz?R|q$0rE$%>7}a9O4<AE+8P_vxb3SGEf)tn{=;B%EW1>Xs)TeiQkOBu4?}h%7J9E
zij&VwVgIkc2f4Zc&<o=w5W0Wytm)j$KbtM;1etlzMJ=mwUCmvmM-#ox3Z0tCOR}oP
z1l=8|@}vZn5Pg?CF>QiXCd7c}oOYhrx&p?FXOVFyMfH`U`|H(xMGFIdFA5|YEQNVk
zTv%MOhXcPr-h-0S8x)RT-u-JMw)!!3&4(lDx7}ePP0CnelgbOJ)rpvtqJ!DajVI2Y
zr$5xPoU+%*UF_}O4T@`FpWOcrI%#!L`dv|`>+)+?a2z~S>&gGgC+nX3c%iz&%V+K9
z*|VF|!1iyKn-lHiYB(OuhR%AA&pY+Ita)GA*F(?@R;$+&oIkCWzxGG^{f*_RtoUY5
z<4H8$=j@hHSN~|Kb`|L|P1_L)>(;ln>4)`ipD%;Dq=hQF{%_WD)UuYiokm2<vLY5t
z^RX-wZI<4>Eq_yD=*$riA*VFi(U@vLu(vQ)4!2>r`5prC2XMFf29qVB=L^f@e?lGn
z7IU-$^J}NK{#|^0?>=`VQDHsyWYD>Bo~&p!Nbp(i1eL8$BwEI{akL5qpp6q+3%=#`
z)|IK{(-{3U@Xf{g^Z3dVCn+PE>{Ny-BMNP=7r)!nR?%~2T3XxhoF${_Q-ZZzCHr`a
zVD}BYLP$8g*&mGT$EvLRr}RX5Ni)|*n-#zoNI)rM$12J?L~%Ot|8dXVS^bOPyO4q0
zyI>Ov^KXW`F=N&UIiBCP<Dt;-A=RSMm6=)kT8JpKi;4&C3_8U(do!IY@6cE_*KV?R
zRChgBvY(fNh(16@ODkE0P?5<9dG%cDJgq6I&RE!<I5b(6MAjT{?71(5yr##|8KYX1
zKQyj--CdTsFx`CFE1NxYStN^F=AU_k4BvtYyZezKLSQB%ls0NPQ0e$-%tROAfs97N
zUB=asUplqv`ozcU#_X-I*Jt(*p<hOb#kfg6{yvM0Qb1o3jbKt5dV}Q%y}HzS*xlY~
zr=UQVlW7;dwAfT*>k#O8tDQd0q+ZbgANM4_hC}LBq$Y&J@M^19P%$9$FAnAq{#MKT
z^CSMAztqwjk~QGj#_-GWrIq%)jxu}6bB<O1+s4bUP0X?Q(&5S}Igw!=!~I<>6lB#j
zEcX9;+)GS|64)&2-A5j?q;@*3W?KC|*g9ipQT>Jr5v4geB=$Z?KZ?{<<Hs^fAT8C#
zK=9*pEncYgQ~x$Pq_FICE*P8MP#i#k#dojbYKY-Qf!rp_P*FdwkbR!8B{N$&W|o>}
zNhVfDW&AeXVf47==;`TKE9qD5urPP_3#t$@?&V}M$HO4E8kbou>fpbnCza&pb<bWT
z#dO~#TdDNbW$XWC)bDx>;ns+Ja-f19Pwf#6jeiq>%*4oV{&p{eT~7-N8dKow!u0IJ
zuEoPsxeINM^(pb*AX;I1sbPYEz}l<`@0q2iVt>|`M6@`>t%L9SBnCf-AWT_`YPHm{
zc@4AWf&Q6CR!+0*3}tjK5j|o(Y3MeW69caUjxc|$4ngj#uXL!;;9uj_t1M1Kf)h~Q
z=Y{s1$lkm{zEW(<FNcMosz@pzmvGb0{jfJ!MMpKS?QX6&*>#eO*N_W_31%T~3XL}^
zx>I}BZx<=7kwBr9TarvHMfnKXNm}dE&*Qy`W$kZ}!-MGrQwFr6@bEOrD@_(k1}f@D
zC9>*j?l5GKe>Dhm%J1P*`H+lyejc~qto@+A74agTy<*>BB<RJ2&AKfUNn3e5pGb<h
z7`H5!BR|Ne{72?ZF6jU-lg^A&=OJ?%XnkYK(e7(28#LKyTNNa0s^WuDwZu`%;Ir8C
zvj*?YDoI@au%w;}BbVSYS}~cmlICk`RWW`<!3vi@2v(|u@BVm^sj^+Ar88lpA*qW=
zb`(dPGGDYxNM=_NT2=G3sX@Z)tCW`8n*U8YPJ}0Jw&h`0U4^VNYt0mX9?|8WP#fE^
z<a+E$YVDS3Oux?_R8#+l^L46fh%ALcbthT^=RQ2hy3Bk{Q^!N0S7Ok_`tQGE-E?-d
zRf@2rQzon$4Rq&z+9^2>4lCpLg#;Z+qlG~b<LZGvR24%hhrizDv*mcAuq={>=2N#M
z;nZ=ERgVsRVdDv@IOcQ;_9b!Sb0Lg~<c&|qY<zI<>7{Lua`TWR4OOT~6aDh53@Di|
zUkggaWg{hK3*ThvTKpnwT*sVgJZ&v(0#fFuqOc*13wX>pXN;IH|N3uSPwk2FzLP-V
z`{MLKPu!amG4Ms%>;KI*vz}f|ph~9r+(*HRWxZ85h!KDYf5$oADJpMPBQKn3#dkN2
zNFf~-Hq#BJLw;x%{_umc{KlDRR2@cz3G814!xrYoMYw{rQxq5<iQq9|qUJwmg(vNL
z<e9)*|3g=dr)8}f-hF@5kv)uTHe`o!y`1R&lsn)Z@V-OHO-G&=9o1dDBUjDE$@C2t
z4wDAOZcS}@{$N@+Gg@3S4mD@)s5Ls4M3Pfpc=pmvvOTfQA`#rg=jxFd2u6v4`n-qZ
z^Xfu!y4t}X6D3+pID<e$X~URBrWLJ)8n4^|^G8SV4mW2BD`@L2_|IQmFIh76a@<wn
zjj-acg``ZSCc_Mor#>JRd7HoQGkl*(@mgUu%!Po_aF%54#E)kkwpSM#z8*91kF*Xc
za#s}0H&R6sPbVRtmW~Jt)%cQ$GC^p5UqnGye;Gy6k&=43kv7N2Ci|1Mg=||pSu6FY
zU#SdE>|%f`#*Bp$kBO73|NEld`~HpUj%4V^j)@;WyM}MRX7hHV{M<>x!eZ+0>w|^Y
z>b?+_Jev>g$`JKaF${rZ!;3MPS0i%zr%5vK%GWre#%J$XRfGhdAgR5ZLL|V9-}Q(|
zrti<DZH%bTpURDwjNj1;r=%q_!(tML-OLPkn*N-g(Xq%+FT^Q_ll#4qvfP%!Iz3NI
zE*w=Qj6KkZG{GC!O#+Jl?+}LZfX2h!jeBw(R8@U)fvv{GBu_uDRG2K5wUi=nC2=VM
zRW&>X+XtMaO0wi%3@U<!MLNL{`Xh{+^5b)+Hb<0C+S6$^NK?6<uZpM<IVrot2*Cb?
zV^t(7nIQ&Yilg<1_o5Ix8^tsZ<$yxMvZ~;t37MKeR}{m-if@eR@s>?~C&ONXB1^$w
zf)OnW|1k-zm-lTlXW{@eYFBqm*dPU!xr#<6JQ5OvMz&^Ikv53()!+f^N^Y87GC7(K
zrDC&xF`D;6G*XZUpTrU|#`3ZX5#_|ZC<1s%eA?Zy2uG!7-S|smw?j7b;u1Ri2l)}&
z2;rDnlLlZl*oWIA@M_Gb6v!wwx=Gp6)b{1y!%A&oF&Q%=X_>7eA{L6F3x1jsrKx=H
zPKB%mTf3pZeDTmrd?M6t)xTe1|9yz2qM=f7-7%BGxT0bDjvzvZ^_@f!+b`3Lcv<aw
zaKQ_y-n^>}7ow+alqAta)f{+Ie~Y4JHk>(+sodP@HBuq6*kX3kGw%DXroqK@#?vOe
z$mPgUQVxer;+`S^9v0R3y(DafC^EVsIqg(Lay4WT3oDmXtvn?VR>zB|K-vJd)`k+W
zJw4!QMmIeCes3}0qPQUCV#-`w76}pIut*io_YoREV>&|Qq6vzSSheol<;1Kh;}8)g
zQP1V2&Ns+Pp*amVz3BX~I#Dq3f@5x-MD6-MGn~BM7!5~sc#S1TdY57aAs$zPDXSmA
z?>}Qno~g<E4cXIdg~Ab&g;a-JLk%GJp&GsR)Zzv<E7*Q`(ysmwuM_eMamgF#ofq^d
zKWRq}@P_@AF->29Qb_tRfz12?R-?SZoOYXyFF*qmLy~AKQ12Zh6)zc4Aht)gj0bDd
zx32C_?x?ij=_`c8ZgYm@-hK)cu(hzSAz{G{ahhtI1mc`Cb;-QYBg$J?%wY0AI~0cA
zEoiMZj5Tp;SW?+E3L818>WK33mJqQk)KQGCOBGR&6$*a6=A(gP5a(f+NC*!~FRb@y
zBUTkmU!hM?Ff1oKbR?G-nRSjf7M520d6;N<sJ;5=U%}ak@UXR{gJoS<XwI6bU!Am6
z%{b%AA}8#uHBM7mV1&GbZmO|SsL4y^YG&skWuJvZu}e<aUKqA-LMObqi+2&Bit4N)
zpQi&>3TzJgI0S#gXcS>7Sw9TSKQj_B|L0ct8z&kbhqH@r{7Ozzj)9g+s*s+HstZw#
zc}!L?6T;M|p&HRIt;67|7@>kHR@zJy-{(}f;W0t&MT81^(U_pioGIEL0K;S%oxhOW
z#fFQ8!(bl!d(Srp(@FA=7J+gcY#h-#Cbb{7kw0cqz^+j<T!5-y&&Gb5FqTnJ@tlbp
zPs*6MJ^Ab@te>h*H}be06p2P=NQ$)0T3`|e3Prf8d0syBZ@dy&lMzM1b|@tBVnr!S
zR$FM<_2BpJj!WH2-ED+v0;E54O+-8qYOFBM`8c_Z-Z8N!Yjxs{+Xd{JbkqDJdVlrT
zNAQuV@x-^^j?5ooZX6rQ5G0w8i#H-34na*mM6A32Jlp!wEv@|g&_ZKr5MW2FbUsx1
zs1roBHeKx(R({pMDfEf;fghSb#+`nK&?ori5ilEK(;63T^u3PQh?K;Ugjp?dO@mM}
zN*Q`6VEJ>^2dWfqno3e}J>bKgixOCC)6yuYkkGKjBV312nFmRQiH6ltYR9uvygy^r
z@>+D_EG$bFe#OyQ)FwL|B^uT@Z5V)v<u!3I0hfaJ0W!@rfPtBd=M86^W|FEUTXX(k
z;<#yK>{_1ftMk9J0QNhq5>)i#Ce<<2)gcG=^z;S;gok~75nN0#JXGkZVc|Yw5d<Hf
zes%aeczeOerXfK=puQK3OnnK&3af$la{;bh*L_ndZX$Q{O_;jr;w)~nh0*Z3a-loX
z;a+>KzxYf$iWC*Nf<ngG`-7O6f%%_T4b$1_SQP$-1*M0vbcqHtzT8o5!B~EX9V$A}
zG3DW;l6bEW{*D@A;`BqOM27{38HR7#!EmHepjJ>8;?})JD4G8v%MfL0AjT9G?rJ2i
zPa!$6$PtgFvDMGb8NpV_!6u=c`P}b0YN%{X{nz+LhTt!heSQDg#OdgSTuEU*A|>`m
zXWBQy#OQ!`Hw3->M6vn>i`%B>E(@}*$CQiX?n?JY5buj4;hmEMexmN*K_byjq0_Da
zzWJ5#2wY@1=VzW#ipmy}ec0BzlB%Ceiv7{gZzn|Vei9m=DLic7$0?Yw#)in*%tz?@
zqNxUVdh>jH9|X7R%YV{Ld}G)BJ2k%&T?nS6s(T549i=JXUgRg)VBQ5ICX{2QIARL#
zbYhX(TxQsy%6-m{?C0Oo$rHC7gD89T);mRqiPpaL>!V8FevXw{uPxS{XzJ4DOfENB
zu&U#5Hq0`)#3?tSqG(N^8uvX}N{-55`6p8`8RAzU6?S>f!!XUh-dq-<q$=WyH^Y=L
zp%0G3t05zEAD}LTxx10ltp8X0f4DmDc&h*R|08lxvUlOwBO$Vh4%s0)vZ=^k$sXBz
zb0Q+yD`fAzQzDX4Bzs0izw5l;pU?OE`~CW(+xy+kInH@LU(azpuIsu#!uIrE+&P2v
zgX5&3t&(DVL}$A`itEQSag^vs3Q|J8FOu&iemql4j$U=0raLUXD^M%suFSFVPWs(p
zCy8+bmPw9$1vk6VUM2qP3S|B#0Y#w>CaYLxw;i`2L`ix`FZ(Ml^Wl=P^sw86xYi5t
zy8EcEjP&DI0l8KW@96^sU;Vw*Tff=N^f|lHqUbBS&pypG)IIv}G-yVj$6PBbMuPm3
zbFa^fpfJf;W^ME}Z<X7s$^uEb7K+%c%=t0rPpt-$#00`@rMiO(-rq?PO)(vgzl^#1
zqTVDKUJ_nj<3(y(RzI^*6=-264nPJTz7Ui^+_uYCm$y9-{XlqWdGVB4v*gKG*c27F
z(&cVO90C0ZY!g-04npNr@g||=Q2w63MelCX2tO0CzKNyqC!hby6=(DvjmToYK%ABx
zllLoTstmEokz=#L8Aaw=W=}8wTH`OHWo*JdIb13hS^E8(Is3*n|D9;Wb^9}6`(sM(
z2~ivAq)?@$@##qVmPi>%|JD1Bll$Y@7j9ppZn~VvK2P1+JuGv_LFSh&$z!8kcV4^h
z3&q>_MDCFCNl;JNj!pTW86Qo?Zf8EPH02w~BucvwJN(vysXItW$dM^3?Hg;Z(Y>?{
zZr%p-k!};u=W)dDrC$r)5pthx+*>Do<#OkqD9I<)w{+`$){TUY-WAuKRyll2DDLs+
z>@$*bg~xhwRfZ}or(F809uxL1;`zp%6)(@%cLFKn8FqtGzwi|$vbgB$hiJaH*_yi!
zM44~jxOCs|(^y!Sx@ju#RLcBnbn9jGYuLP(`P=?(dfhdYQ;*OErkoE?H#KDLE;gX@
zRrWtc*DTWiHv9e4pHZ4Lmw>?AltSS&`9^+1FVbz5o&WI6b^C|a00W!xg_D-wQ!;aV
zD}Rr_M%GC+EwmUnKg|oHefSELE<Juci(x0?WM_NRlfx{vOqZ`;H~kyLIXJqvgh(&D
zX~z5tz5AOBih^9KNJm32`3xR^s`pG`ses3&k`zmhPLiGOcJAA?{sIrlX7T3JeFB%2
zghkQ|&uRaxFfqKj!i;|^xW5uMTT05)*RChNIX2|=@$-U5api%S-9}Ftu_pnqlvQwX
z;B9{~HJZ=<5Bx%FhaWws*zyy3hlA*uU!~q&ceyRa_xLV5p80p8_h+(-61q>@j+-Bq
zC}<+N`jx}@pNrqJYj3v2YHv_`RCWPD-qZPee9S+xxSjCwr!?)*bKluIx(DZ%Ru*<2
zezb`(B{Q?@%}{=>@h5Yv>V4%iU*ovlwKqD{7YTj8WWLXPyw~}3Lih0S-GOlShqSVL
zaxq@>61r02;tHFG%yeRZR03{Ke#y$pl9dQ+)6jZB7ZLQQr)<=}yd?2+j=fj92t5*s
z%A<%WaJaX6?NC>lSiFqk7s0Nf{fGXxvX{qY_U3^f9;&%Dw4A?6d1j?97^cJ_$$uPB
zUZWd9$)#V>J2X34vk_zCD<E*w?Ktq}lctQN8cq5aH@r9K8|&=TcjMOtQ(Ghq44yR^
z@?rWs$dfKG9+$r%*Pz18$3l)d?&Mo}b5-}Hp5|}$^;Ioiv-^*GeI~OMX6nYUN0eC}
z(jAmB#~xOeUk!{O|EaxeBh#lGn1|0!A;)x?M)HVtEUFlfp?rQGmv@^MhZDz}3A?-d
z4c<0I94<u{rOVCk93+<a5w6XZ+E(HcCgN?ZfC8-B0$SK<nwbGLKYzCMl>Vf}^1pq7
zavRGj|H`t$puR;cOY(U#>5bTr2&EGJB{YI7EbPOR)4&%T4!*vf%`+Fp_a<~zdGSUG
zh((>8ZVDM1^4e<=SBcy`YND+a>ubM9r?mQ=*e9=ZEfYC}R?d4jj(06d;9&O37)6*q
zo8(Ap&5Jq)iQW3EQYQs@JkX%q_j7znRzP}N2hZ&4+BupewTeM?F43GIF1`+S)@{lH
z-I%L-eXXa7+4g@9UUDaEffK=&_e#g$w+Ej-y3UGdFRO*-GK~$D;N&lEGbx~FdgY$u
zyMJpu8L7^^A(ii@srJK(yn@O6aFg%d#?bHIku_L$IrF&)tT6rO+}l?sI>P(R970`x
z-uTUY5{ubWm0p=&Jz31|je8=QQ04E2WJZqEPmTYKo=X*U;3KB;RwEcKPn+txvp00d
zB0{UQ+1w}sp>!H*s;|vy{XBgBwo_;Omj!39JEanQwbJh{p%&+9Ds32*KZGN?!X?6J
zOD>&|6PXG;xrvh&G%`=l^kF=9>{a2lpR!+3Mn_;<Wp=q}kwQ+6o4}&7k=iUwjNB<3
zkDK&}M6bkHap$7Z&b%lQa~RJjBrf{Gr58TVxQ<hwL!6?=9;fknN+$)Pes1<=5ua{*
zOx0e|4rhIROY_SE3GdTLHsUMVp2~IZWE@CGk|dNetu!NYv5Np5<mRZ3CU+XqRa4bd
zE7JGqeX|^LkW8*ctlmY~)lO&olKcl3{j})Zx9;-W8edpAOsP#swJ7#1e_rUVN)de@
ze@$O|cF8QCNjeyZ$8pntP!U%*j^h#jUH;(UJ1Gl93A)Mxe7JX6h-1Po@DSr<hq>Te
z`t$|-mS;~Mqi7LM5FLn`4?oW`vH$A%)Mp~^j{^5{X*i3G$}>CS4pV&mSNoAuq!&Mh
z`VRD@$~g9Uy$HRF>iS}K-R`@}$TX*ADV~2#o>b+Fv`G69oZsBFCx4h0Q7ER@r!7~*
ztC8T2M#5Dl*uyp}MoL90o<&7Hxm;_%`fOS0GXJZU5n(gWBah}9v7hpXzdD@1ArRbS
zSX`9UwBe}%;n-RkJlujApPWV?s(3vl!7ZT~s@0g<8N0JFDtTv!Tan%Hfz3;gm{diE
zmSl27#8xEE3X`CF6cWKg6!yU`ij$pV5q;FcH<HpV5(VXH=sZMMyr`&i_f|u)-bARl
z6K)BZe>Xq>eRyyH1%1#5pOLXpm=F*U0F;5vP5aS3;1I!|E5EeRqbnX;b6;m>c#u#U
z1~q@OBv|}C>(H$W9ULuN_p&7GcPTA=<ShY&1}*VWaaLSb7Sq@r6chv%kgT~-Ez9<>
z+9tup9!g9x<+I=jeNSTJ-RDO<R_b(!ZGS8VF`H<&k??@X;N&0yB#DY6R=6lB)?}N(
zZ4!B`D7S%d7Qvm5X*G^~n@a?yB+&#OW^o~u6^PtE(UmDNQ8GLpD*iiFF9O{ikv$E8
z)vqNw<z6`|b~?Xeh@&?S;eYT;{{fK&YBr9ZqSm2@<(}z?>gmB{ns5FY%Ck-)Sw7cz
zQWtvGuzH3bdp@~7`Lw6`%EG(*JB3$R5<A{|ea8=w+-1%sL9~;0keFe!%Q0V}#=&Ef
z%fYfBWz|@vtW&%lI)8rG>C^M=E2*zNum-8kJtW@xb7U(FHf~d=nm?d_bFZz%efEIW
z8E=ZD?*elN&aT)iGQR`9l5fFA!Rhxao(QL0zwIG%&CvK`1)7ptylU~bhTNnns-HDd
zaSHJuXv{xEzE7U)VnEjO?xaTP^S+KN>O@a;%6J^iu1}9qxJ%DTPrb}`cOOhSao3Gf
z@ltADIUY$bxEB?gG0E&eN~&E_4Z}<7#OLgYv(ksk)yCyY+P{Yt{NXVPx2_MFsA$kw
zJAt;-)P~)7g~2@1z(~O3{HUJ((Ed?LiLIirbk$x!HG6hry~Uu#Ocogr*1V!z7&jdO
zb+?XM)>danM38;}O#m!kF$qIN6#fJ0pextv^p;uKw^o}Y?pSDO>zOUc>1_*%WPN>0
z*02^{MzCc+-C7e#Xe9jFdFGxwA{P<C|9!+=15ciW?)&95(yPVE;<8aUbFfW2`_dmB
zQ7(7P&#QQRlR6u!3!QGydMQC~J@oU5?O+uBXHkobTx=wj7Q*Eslq2mgCc|A8EjbsJ
znRM;iMV)eap}f<kl#<2p#bRi8L~xG*4~baqB^gqir<&Frxw(vtWJ9#KFD!BhF-{i!
zEsepti0n3axzNp8>HZwqp3zUp6+^@ji(WBR`y-r2#?FM*q{RpZ%Mx3S4jZlK#W)mk
zO}{SOPrV^Ejmz66W)l_PHzk*J9ko+pHXA)2!FaNl!XIU=cD3$LNNwTLniqbRvClNo
zNSfYDr3l1LQ*<AJ`{QJ|DkCayBO-zqv4hC>4fUUg6(+FEcac;aZhiBE0{>aSf@O+0
zj93an!$>=L+~0(SdX$eT^2NX|pkD;M(^7a1cEKP05tN@>7}C(tyvLZvH*VBkroSb+
zf&ui*L1zZ^sFs1PBoq;2imib-2KAxPwe_cJkPn(O!EW7k1Wd*S-f;J%-33m@<O7GZ
zj|I|z=CC<#$sS$7w5elyUf)fqH-cmRLtE@Ms9tRaga!O|E#eODn+Xf1Y0L-|>Jn+?
zh&Tt?-38H5PTm)E*rrvwD1wp;53u-Sm?jNvUXqk>U)*f#23S~^GH%qbUU7E(J0zqi
zvqyN2I2K*xJn>AtPe@qh(KWiBb_9gZlcMjZIHzI7Ni0gU-9}N2Zi?aCzPM0}2u8>>
z5>aOb^my&NE(P3m{CnT?84;q-a_n>Hw{z0QLmZ!@@^8qhw}l<&XGg<~I~`vB+fz^W
ziE)UNx_{ODT)XQaa?O+CsRVX(8K307=%L4glud4-nP;rXR@>G!xdxp3hH)Q%GNNt~
z)xQ5r;2DMIGNv8*wQ*<2(A90R(x$?VXFjKq{)%M8L1xZXbO=!n@033>4*L>Ga@9Gn
zouyeg0?D+dAr!)f6=O7967g#Cb)G0?v_uW}Kmpee-i{Z?=`QDwHTQDG$9v+!@mPaM
z2nr?~DV#D$!aPTct$Qoc1RR`QI4NxeVjPo(PL2v_7JVWcHjRrTZQb<&IQ;XJE;vdk
zRZA#lM#uNHV=K?2Z;qtr>;^gp#&MOC3J^-K1_yVAu!gkobT_t#>#rcG@h#qat}|CU
z)8OYBB9H<8C2pFQ#AHd{aqmk6Ib;})9wM2rdLj^E46MitZ<_cXS+NlBg?rNke}5Zy
zcqbUgf@w0hF7r@tDq^6b=$F4c9zT^PF1!BoA$9$ci0G)uT%XKaZW~hz=t!66XWB*f
z1K&|^1Y^{-WarVG8`%8uW^2J+bn*_?C+wv3^1A5mx335*%~@=e7?|ir#BIC6M6VgD
zp-iJ5aiDbfD;Z{tdRuDmUUO2%8c$apK7IB{O%3%Oo8a~Z{K%8*H>vOiSc!>+PSZF@
z+ZxE#Pw5A&VqfB-?|2u6`-dIaY7Hqzr+KpEYLk5IRgt{Dx}Ry_FKvx7Eg@ezTwn8<
zFH9veL!pqlY4_5F+b2}FQE{rCLwWC%*}QF^QqW>kgbNp)4)fk$wq$FOL5)=1cXGd*
z+Fe{&aOy>*-;>6F39gLst@#?9k~>}Ge~)7SibCP{T3Mt0aE363zZt~z9x9^&!Helt
zoF4Uchx%zoXs7tSKE0QJ!)FzW+?$qSB)GY`Ul=-$lp0hoyJVjjL8Df$^r67YET)eN
z3aO#P0@D|XX#jvqZx`q^nDvA9$&<O?RD4F5-gGFxjtt{#_?at!ZGZ$$)=uWP(`9G~
z56cb&Sm-u&>rprC1e*?j4;^l%5ABF+;InQ1NM5oVjFLq~QCR3I@XGi1D3w$qM+}^Z
z2MBTzc}w+t%weP?^NX%hfo>%Fwxupe^F^?#)#qSBB09fEwFTTYq>H=!w24))hb+;>
zvh?;PWXjtlPBN8i>hmQo<^oy~l-Si=Tnuuz1Q&<W9-yl$NN<mH&?pwFGPZM>{$`(T
zxcnANjtQT0eKyeHFLfLi;cdi`zpOjf-<>;5c$V1soNWuA{cy4Lrw|@r^@i3@+Mhb9
zUp=f_TJd;QNc!n9)KT_|iukiljA@7J*pBH(uimfpU_5laaCS8D^=N|oEv}u6DOU8%
zdBK#SjfDM~jzH>m>v?0E;?Z}*HuGLSkdz`4>Coi#1Y%C8MO^a*qsL3EImM6F^hQ-H
zZ=|Sp=0=4%GBJ0UqO+c@dj2T(*8lls{Oh;$Vby!gX7s7r?Gc3h$7nBeoYzluBc8t{
zp(TlWHE+r*N)?5Qsv!|&>BhPqj`giWKL2CyR0j_y+nb0mfj4<x3OIMwdlT5Ysuqtj
zG*c$ul|RpMd0EZ$g2dFx_IK$B^N+Nv-JgRZ=e6y|@i>sIPQOU8I7TAa+TsxH2ITVN
zeLVSZYsK{rXMA~b7rEaOPsX_z#<OvSIpuua>y^SHn4R~#WQf2%xi)PBWz#qzIIT@1
zj8pO1Lc(_GWBj!Sr23J4{)k>f6IB5g=US48NUVxUZ9*xgWu})|?me_6QXjQ#GBWj7
zU7udAbe<HqVT!{k{GM57nJQWtUroZJt6+%`<>?U}9C4BiWxnOCj$X<cat;fM?&9uc
z+KK%Eb}Vjd+2u{hqO#ZRV-M=>mTk_<a^Ljam=wD}z4vmE;bH)d6LH!8doI2==E}$6
zD9uXX{@oB?L3PsUHL-7J5IH-Edgofcj`Mq3cX&}2Op8lsSVr)&-kAR;AIhV=fu1G{
zHUFfWPf_iHT*NVv@4<3potC`NExC%h*jkDPIFW@{xT!PYED`^B;*DyW!_jqc*qXQ}
zBV{v!hCUC*g$E4{4H!ZcHNaOm0I~_qOml!+aCdh<2PTa^z!jirYqRCVer49=?2`%f
zgvk5&_#k(G-0*ugm~tJ6E(}l1OikyZNqKdB{j^7<#R>ZK3~YVAwZclr;57^H7f3_P
zWI?v?A82J;c{bPj_oTJ1t}fKf6FNG5mTzW13*qFjb{~=Y52P2f834D%9msf2zsyZc
zzCn)@K0dzflPA!uL!<Ke>({T)iU&BWf`VQ{Xq9U4`|FETE|`Eml;M><0A@|RQQK2?
zmxmko)2C1CjmlbDd;xIhRBO>B(=A?e8^C(dVB_H604-I)1mr*&kKHLr5^E<7Agx5^
zQ9M@5uaG6Gq={&AUhJYJwJ~Ga5xx}6#A}Y`45-u~R^=xZptpTlEmzrtWW|w@jbNsd
zTPO<r5T&ITg!|#bNg#79iZueMN!O!DA&U+nMR9u7^(v^0h^?W!fA$-Z?lKLiU=vp8
zG|pJJY_CMK$9&{&K&pMIWUKKZPpVBRd30mj8AUR{>QQ$~rn=gtDB?bY5a<02_vqnw
zl5H2d+t2Qt-zUJz&mDED^X^+KbhQh8NGa=!Rh-4(`_kViSiV}IFq~?5*;Uzn)5$hl
z-tbj%gu`82R`aE)gle6_8quiS_u=t26NzHiNAbQOlJutnEtshroeb_i#FD|jiFIXp
z8c`$XhR~!_A(!JiSI1TPP{(ufA+}B>doYjeO>+_%cWu%TzF^oxcl&IsHbfQii*_9H
zYz_<gE_q^Q)9YPbNUF;PBxI>ZTPlzW3L|r-iVw2#tW-%o)Und}Qm+`)OZnstQz9ef
zWzBaFZEHV$(hMQ#3a3eMoC<uXk9O0fNHF6t?|y<L$EP-=Pwyj_66exkb?8nZC!g~Q
z<2g@LL}$)wSGG<b2x%V7RqV^N(wc4bFcJOANbeJ3O%(q+@yS?6@0}8zTQn6*#U;lm
zoD_39z3fYq$J2Az;MHr`W78vYrS3``$Rr~;Sn)0waY=1ih}tOAPVQeX<vRO<iebRV
zWfzi0Q^u3tRg8Kcm^kGk;2$|Jw}N`5Ac=>5AL#OhV5f50LW8*+Yk%FQZSza|sCBoP
z=0FPilFIL|g5@Z*+#}}KEEWEuWGA>t^+!63XwljVyJ-Jz`9(=Y_|4s-C-sbh+x}BH
z+y?>bx}ZDv$!n@vIiAlL-W{RmvMy!KdbFE{Wp**Zc!!Ex;gO1sV6vi+HU~wjaEYLm
zI``sE3Nr!orMIFFrm)M+eUA^QDsnl&nq_6O%=kI5HPtiSQOV;Cjg3GO0jaXt0UE0z
z7=0=32Hmun&N=8K1hK1Q2+Exs8pP98$u!rag?lbQNnADDH{eJfOug}2a&)gre~mOA
zg&hC*@f|>+qUl9i_CKg#5N-|*t#GL8mKkBXz+x{g(FN7G!wC=gJC83jGnih<)}^ak
z+dhoY$u@&Qe2)5^@Aag?PT+gg$#|aGdYlO;6%4Ox>+ktifogh{*TFAnq4r*aCeWQ&
zzb5FAf**i}LA=|xwriN4Hw^wtmc(wZ=@@gC$NKKSK0iN)ek)^OQZXL2;lJRmTlB+l
zelge?fZ(;y&znMDEd&O8@P^*dSYG~XKS>NbJR}w2Scv+zbZ=lVGr`4>oQV2~9A`qc
zIzd#WIvRz1lO~6_!A!*zMn)fggk!@*6+xnaz_BQ;A%4YK`^il%`IDCB^{-b^UrGes
zmwtAH8YqgO!mB4yqyc?mX^N;48lDhyLbG*oVh;0C0;_i?!J__R24chub7O~Ahno*w
zKR><6*rKPL^YDAzueHLTYi>h5qSyt+WI<moFX<E<z7$)*uQ%GTeK4Z%;+$52Yuzh8
zSK*@B)<cm>IYV_n%ESho3hn$uuK_U?KEc}gF4CLf<drYT9v3;JY^;SR`?NPm5toyE
zP<|zw-cUELvKDbSF~o)tfAzM+D{4Vb|6tt)J3C$l1yc(>B2sEG=C+Kf+rw?7>l$Q*
z5qHJNR9^B$Ae}6`QGBkJO5XS!ldSj8yB=Q$t(19zK+`38n1+tcjIb>(_~>yanUi@}
z$cXIQB-fup5-Wruff4W83~g#A;=@Qv1jyvGern>+g%93coGZC=CRk3Z6=uX7T)-BE
zt|=I?=hyr-`n-C)lRf=%`s|}sh7>mw7j8fGuLD+U_I?ddpL?7pgq%`kx-+Z1YWuK$
z&7s}2j5~tO$mNm;y>*6EsdfM13w)7fJ2QXs*Run7{^9v&m5hiO){mNXeqJtTxz8@Q
z{AO=1>SiV+>Jj6y25U?#n=T6E1>58>a_aD536@E(&IHBvM62jXd8hfe4tc?gF5y^a
z!mM<`ZCJ0seXU!H+eHyKOz{#!G)cv6f0x6qhHOGr>4dzrKsQ6v?htiJ0X!CER2Kyr
zH`HO7vo5?zF>Iljm+TS_cg(3I7fT?emrMKPSu!Qp4W$(RC+td>G|-}L0Vd7pof$9g
zN!J*YmOQI3!ODF9B(Uwmf{421M=C|TTKb&6j8^W#UD|1X40;a%c!WU#!O}gba4@sZ
z*QSuaQ|MB8`f+9^Q@vnRVyg*20UHp2s^n|IsSG^bt(5BzZb1WL{B>Me#{sT_Fhw};
z%ZwYno<GO53Bxje={&%duDW~y7L8u(pHH@B^~=!OdJf#ViMhFR=%YTLX+4_(IGTxB
zIp+92*=qe;3%>>e+7W2swiG0IRZy@p0>69pO=in(a`{lUjLZlh8hAyNdPd_jGbhkN
zeYn5h1S`>`(mZ6+t2gE<rWY3O_NT8I&mO_9grRNwp6_MfFnsqGNWKL!XRBMjzZ?HU
z_dkWe;BfVxUqfZ(PJ#*_t@sNwnsW@;5}0@=Ave6uAe{GueWc~Erd;^eE#F`Fv&D<{
zhXgtBakwZ5<6SQJHF&&gym35UoM|)sA<HC8wYK>trMxDEVS;zsDAh&QK9=m^lm4?1
zUpxxVZ`o^Ho*~#24fPrY6ub43_n8H=To~jRHqYL~jV|w=2qq`rTxNIw+1%(+^~j!n
zCfv<Dd~8gz-Ox5)13_eb?A_j>)T(%e0h{%^WGccKvTCj#%Wmc$kGjP43*0m$9{*Y_
zy&;n_NR`n1b#*Q)o~#*5*N8-0TUY1qo8kuU9Cne7FO$|C{E0t{l---9d%Z82S2|q5
z)`;u5;NudLBO4HIXZ+NqA-BR~JN|Zpbg`U%pL5ISKOZCuLpEZf*QTF+ltt|mb_%E!
zN$cQYJtt20=OtNF72KYviGH@kjH7rlfsB@3ev)n`d#hii?Avl9QpfQYK7EHWSD<>h
zb_6+fha2sEP8hZyV?Xfod&6tgk$M#|!h=!M6aDG6Pahb#mNG~_(q<Yl{85<E@JIS{
zr4;=Y(RfPL-}t+lX8fr{B@QKAQ#cX}=h6e;pXT_>wZ}?6zELy5^2QXuTI{m%NRw}|
z+3s<S-Q*jo;GAex9~Z}w2=UV6)i<aX77tU`D{QWvLq#rn?|AK=CdCp5{I?cB|BR$6
zXv7eoDNr)%`Zqa=+e<75{c}TI?GdU0;fTd9399BMc5)-fWt6VxdcM?|Cg)u$g?q1(
zR&J!vI*z9&X8uaN+dBQ30GKB2End?pF4Nv=)gy$s$YZ&MkbstHYyJ>b>3I6S-y4JW
z1o>m%)nZhc)i1?FJs#XF(Y;n}$wSqsWw{b>fnMWg@4T}y&Egam+Gk4RenI*bp_x2Q
z%0z?-L4YXrfi;V3oJzDVL&sZ@tr-y?<{>ik+4BHnyq8;y>w9Oc1izOX_Iz`0%#auH
z=XyJscxaPe`;Cvfl{vgQy-|WakE&w-g+uWQt+Y(U@#BZy?LYm8{U?8ld{SlP*7}bI
zpS}`J7jrxBvLd3N=qK;Ys8>xFyc<$eHa7lpZE6H-cd+xl=zB3IWCa#~by&&6=8P~*
zFTH^IL;&<sk7P-mJb>1Al^f0PJa-m-P7bAjwYZ~VxK`J^@f5>c#B2bdUDa1tlea;a
zw(BeE<Viym22@n;vtbL3&1*k@VptZ`Hy&P^pLH26Eid=m?dJge2GH9)2d~!|c`k%N
z*LaM}=BrPamcrsOt?z(_#HlcC4=hR;UmJNTfdOh1mo@y#uq^|Q8gR+TvwZ)oLNfLg
z_KBFxKY;W;j*s=ugG>6>T=OKLG{1m5#~=|E?;{6?Z@|(9V)&<tiHT324wgVFU|~G&
zOV)ch{qbW`zojoIecynyM*YxpA?|fk&EcBKU$6W3Cm_zK0}l_l9st3iJAl9nd#&`v
z?WPFViHMsCA^bEnj>q0lbc<J>ZM%M&ot`)!HNhvM(iRb-$xU_{g%6!ee!oY)JwdxW
za6J?!f!)E##QyQq+<cuPq`EH2Fdv1w8oTwR6%OKMt>WLWy}zv0eN5cgGZDJCgnpLY
z-#W=Q*_f-m&Zt1bg~vp46HHCBs`fXw-Xp{f5S<?1_a-_VA5s>+l8d|Jq^p*+`-D5A
zw^iz9F_(D7Q0mHFUO%PFO7w#h?SqNfxz97r-xS8fPUyMvwO02NZt9MteIEOIQmbn1
zEAKo|4X!GOg*wyizH38D{MG8Fr8;iLodX^Zjo$W_G^?cPS7vAnyYSInEX{~l9#twm
znz$+hv~VBhRF{#KjQ2N@y2=II-0CJyhgJ90v**YySsuMP?$@>z*0bU7R%?_qevn_h
z?0mmqPrbBoH?byo;jyi?_8T6-U%a=XhyO(Vs+dE*EhTufZK*OcA?su9{i3<8zFVbt
zG_C%qU1qMvs<Xk4@4-+aU9*T`p>~6FfBMR;8yV8g4o(g&nGws(2_IJLDI<GVcfLFe
z`QoH$^;5*Q<yVlA@!bh_6*nuvM1=I#ed_tRG#b@3na!)@DS6ZMdM^Iyn(2udVub^L
zrJR06p|lDnK1Hh)x)$Z+F6Mfuz7%DfsP))x``+rRHJo82>|=gWy-rmid|<$7SP6?x
zh*9mYKSdB(#Jy5wJw==s*q(7uVw>A!zWA`nx!n0B1u&vXt(s~JCZW5?4`I4OWw&Z7
zYHG{GcdQTn?Telqhg$vZGc`G$ShF5Y=f6RjCoAf(8KR~$+_GClUgK8Sb5=jHdHP&F
zk6Fy#{L{OP2t)Ra&sFn#Hhw`^!wqaV-L#gz3xv25tH3&Vw^m*^nGp8*!v`UYhFu>d
zo!{KE$-SgW?!-caS|QJFuL%b?npyqKKPJ@Ehd*fCPD4;l>DQn9Lj7Pou=@K#BfY4>
zef;lJWwo@#%)}3rSbtt;E$`<UOdCY^TjPsxNZ42p&$=#p9-@^B!?zlncGq3#sE0nc
zj3O^bQjZ>YaVXj*E9X=3ZYMZA#QsA=plOmSy|h0W-5|ig%aAkSXT&Q+>$sLzlzzWx
z;Fk^*NwMW=|M=K{U;1FYqCEB;p<x03@Xfyw%z+lZ2~7ebkyE#y0Ivv~b;z@ix_F*v
zi`=@^vSz#m!a^tp$Oym<fLjw`Fgx7IzyYh8>>&up049Jv-PqU&2&Op<z~lV%7{Ygp
zFU+~nv4~;mp8VeOn-9PT!b=V8DDsh1zzF#P(TPb*lLn{v@5XOmTYOFcwD4!c!LPBj
z^y$tb+%mj(0@Seid(h98(*eHo^9tM*z&<P`g$fJgfD0EcfG!~N&AWQ#nUs_i{0sOH
zft*n3FZ}#)eR(-|CZy(Y=;kPZD<W2&!KZ<|K*Qhjlg5Sy$UL+HrUauL0wB9(<r%ye
zgd|fHIn~wo*A|a=CAUDfI6MGA3J4p<Kp+BjqzMpE5bT&2mk2ssM>YNWmHv0F=z<BY
z-{$nvsMPbHqg-rD^;f8=d1btp$VYk`NAN4`JZ-Cc?xA;giTvgo8;3Iz`Y&h-#y`p6
zf4tezE$k+rZ@S~i!*jQ$(ErJNRj1uEqlpunVF|0jo`bI^5-+Qrh?&W4ifq~My%ew*
z3Dj=DdEaz2{$}Jsh39dt+Kq#K_trPG!R7e4);bS9Pl<ah<UPLm?vSr5AK$n#XW>Qk
z4e7e#r?&2n+i7*|{#r#1Z|ryXC)2A}j%BN8ei2+O^5@1{%IJA1`1_;YlaPQXSbhYi
z%iAA5JixgWZk+CA%ALWXK3%a{Z!A$qp)e{>)Hak}l+l0Rrgr2(n&KPX`O{8sf5Z8|
zXt5{#2Hk%-;vaP<>8gsU5?QG$j6AgyudC!b_c1c|IaP7XZ#CE4%lW2lxg&U4sHfF-
z%uO`^C=g!=yIYs^QmLz|_M4`g)FM}Xo5`amm#6(^PXCHJNi@2zugx?T`_0s5sUfxQ
zYd&Ai<)LZ1F+=<+>(`S+2Wmx!mPS*eSbJ{`G|}n&Y(4cGS1+dZ;q=+Tt7}a@vUjZR
ztN26~#P&55c`XlBx9q*VkUDxi7P{>H^G@&a>#Qcv36786=dT_1A2Q(gd{#gD9wO2*
zLa)=}H_$61;M~8?Q;zoekzBrK-&BS|ka_LQe(5GBCQs6Uc)N}+Py1|9{veY2@^u3&
zqoZG6ye)7`MD^=(a?D21V&SO8WW7Vlt*DzLN2z|tsi0vOmzHi|vQWjv&sXzP0XQfw
zA<=qJ>fq+#;kQ6!0^*$ftxTBZ9(d$h?WU`)_-+TWh*0Yl7&f@B0{Pv!e)jZu594@K
z`|jNkP)qxM{(^`1__N>HXH2dOgGmMORFmUE2PO=_=yK)n7bBQ|JPN}snp9YUurN<B
z!u-ek_s$|)k0Q0XDSIt3<Yi!OMPGPt4fH_3q%dbPN_c(wOmp)ciYvcn#Jc+AaN|M+
ztQAO?lbS69bETlB#_cUB>A0ca`~}8Gc6O&A12`^4i{KnhfwYhLKm}QUB7)!&&$Tc9
z;x+mBQTY9y=<K*r>#rI=c!9~Un19q3S7gB!uR5I7`smA0y;&L~;TA{S%89J``nr|i
z%ZC-7kJr%0W)#=TuX?@?i{1=mS~w0Je{H))pIRm=l09fZ-kibXALcEy9n?7+5zBIL
zS<THxc-cAV&|i@Tx!V+kUc`u|w_OiCPW@s}dl^m60pf<@T@)(3Pp^A4&|0@)!)0vZ
zcxDspSMGE;J)#p&L`%r3K9uLW_OZy|h8vjG)QMHP6s44*z2@?59{L93=we!0usHcf
zU2;bmG39?<^U<Ugi|h5kXx)ay<6$h5S4nQ}BHXPoh@!n?ZzC<o$r1wqgI(&}hPdn@
z%p}BOJdhFL<_0_&z9|5ySnx`W_<qBbK+8Iql~jR<1K1m3LXXW^F>QArh_^r@5y3bR
z0C5PzW5giDM*;5x@U2ir-=jbxJ{sC!%Gmss(QH3}V!>t&n(a%>i!kISP?Eqr1nWJp
zcgtb#W<o5YTc$YX(gg67dKS?xBY3hT9IPO~FgDKz5(P4m#CuC;tFl8y#8NX`E|g9{
znFJ`zXRwJjK|*vBz(+T{7l?pxlgO?n{-{YmU9gur-?d>k8HXd5+Lz?$E`uHo>wVeX
z(quW6u72%5(Q}a&0crPFovH4@!NWn!=ioA3VJgNH81_i3UT)+`Vr;Onwj(?JGTNoS
z!qkX@{+PgSa@4BRj*s^{@I3k{o$I|@*3I-_&B25wg+IISBwXJX5<MYsPlU8>vs`+s
z07{cWUK2>37%bmEe5Plk&*23Ls_bCedJ8P*p6xBecZrFK0pcZgnz&n0*!q{6HL#``
zwXUu=+Qh6Mt5oAdKpF!o9g0I%vi+{L<tZgfj$Vixw)O)U+17OR3h>dhgdS`8`T4;~
z1Am}j+qxRR2l49r_fKEG{OP{o>jb7O08)pSpC8lSwEp<%6QsSYO-*0r<h1WK!D&`m
zS~|Sf#IoueVwR?y{{~ov;B5zD9Gyi^G<`wM!)C7oAe@DB+6SjU_wth7OCLR5@jJHx
zeH)~+2uhw5I4yyB1#%gS3@MBU_+`Ps^fCtSx+2pF$$aAwfv9<o3maVdZgHv?m1N>o
zIZKmucZy|^hy>X<#A(}v;YolxD_OXu-19%87Z2ev{%2&>UFZA}CBVx9>kD&G7DLsp
zF(4yyb&7R;U`S1>tuK>;hsD_hI70k5@dcKCFW~Ak?Hg9N&NsI(7F^)Fhxy`LUY{HJ
zn)g4z7-GT)j6jLmlwdzo;j8~%30=CsiDQ;T_gWjDcPvNYRp=va{IM>OwX23FaAi~(
zb7xx)a`z`#eJNKF&%XYx*}cD@;rHe`k=3`ehbQ}AY>W*i#b!Pz72U5zMaT}BDqIXh
zPh7N{OZWZj*|kH+%AAlJ*0yq=chcNL?jHR4U6<|na|{EI0svd6%Q-y7-ryZNS80JC
zEP0PT@mA&r9RBci?%caKI*`nh4Yv$nxnX$yCm7*GjJZKuC*-fJdc)p`1Yr86F$=B{
zRGilHwbp;66^!js=rylLz{7`k!K^S?{B^!(UQ_ysm7**CmV(#*RyUHbc$=E~yFAp<
zIRN@34CvoKAfJNC4-f#rNMjE+4<J`}Z1!7z_~8sHh%{^;&wp=1ga*?t;rzW1BvKZB
z>^mg5vLv_$oSZqTxq<2=G_Ql5a|x3)0|XUNSIKdEL|;c~DdS!J!BD#p%+D=2z?n$<
zI>}Li&C88o_@7tSv%R0jNg01MTV<u<?tTc>5y2!Q^+iQ&Y-}@i4&^X~rKNb9N)+8!
zCENiFB=!8z)zy_Ix(7tq69@!%y_y#sr<QiSfyq~+&zrys@z`0X%xNch0!)4SB+qr$
z*m}|k&Lg^ddUpcpBfFgei$)Z9o9n=<8L)<Z7|JL7j<1a#m@uy}ew@}qOiZlNV+*jc
zFW#>!yxswerStJvU)Q7Uh1*PyV0d6qI*B3Fxw?K_+Bsdfk!b{_n#RJwzyR}b)gU%6
z1_9;maySY{k`qsslaKElQdSF&q9wX(;2R~D*pGQ9Son*Ji||_=zBcF-OaP<`6FF-<
z$FLmj?Kj(l@L?9Js;YbstFvK0Cc008L<BM+s@gQvX(?H`H53p(f)Pnt3S6%PhiI&j
z)SPTlWb9f!v}{QvWn>+=%A7e!O3~No`wFSv4~a1oBx}zg@}NyHbGW;^`*Q}hLCO!n
zqQY1yB>;x%wiph4x|pjHljSB`zfY_TYNzd}B11?3vIL2_O>oQWd{U#O%%=1O2!rrQ
z2DLM-Ft{SG^NQzxbQUP6QZJZ!?}QlvUaD%tQujuw8rED+`>zc(EiH4Ff&nMY%w0d6
zrB1gyF{@FHJ+a+%3V5mDM8gsv_OQIpu=HY307(_Rps0u&&}kdHa3LVz#^u`v5pcHY
z*-Z)-DXFvbldW$xwt7Nz!gSHP#gwRb*B{gZ7svl_Wyf<9ZV_;u8Ql7vsmh)?{NfiS
z3|1ghB-Q{fQZY(XnUf7okR+&#$PYx}Dd>!FvY<(1&C$idT5^aVQG`ia9k>Gs7F0eF
zc_Afctm5PchOg0SC<{;j%mBF5D_r9URwcgF!5_}UV)3vZa9QNSm=Gk>zwV5(MVMPJ
zQzkwzsbwXRyCywYh0c-|sjq!leot83A}ZetdZeNiEQ)a4rmKgJ$zsCogD~?L$cyAO
zLy}uwq|y&|RKP`Q41_U7MhOP+b89HmB;p3Vjq^(;*D*~ZP>p-bq@j#EFcv`a&j*tU
zvVHK+8!+`<4kDAIwTjYI;L1UhD05l`!@Li0<_BvL`jM&sl*e9MB;#1e_sb0q!3;&8
zJbxV=rI^<`-^mFcJ*Gd;{&wkaL)1Pg=JBYPzl}|V`3>-&?XRf*eMvMP9`8TT4$1e6
zoj3NVInXA5W_}#z?m4{p=Pj=zdfMdvd5x?+tI-dm1vI&wXsBP5qnvh#%YfJliV01w
z^MiY3-={FbrGsF}=tq&V_U;Pfzb`=$>i_4PL<f@#_a_Grq$v|D1m9N{V?mP%=Wr+S
zpnf2k(ZvL1j>!*)<lZ_bD+Y7a7jbfH*7uE*lK%5yFH=<f`(|<U7de7?jFh->bO>_L
zj>=-Gh!sg%J=7a+BOR*|F+K944ly6U8)aH$geWy<wtk&5=Rm4|fBz}%|Go+SN>NAh
z`ywskm;l0jqh2u<<v^DKoetSoT4WGYMro*rDQhQ_BAtZ$KLr;#kI>#dBmDnA2f;mO
zwjRpikT`){XSw%e_bK0SGngAW3AgLu1zM7-i&Be@!~Z1H=pj~+zx4hYf&cpxTwFn^
z?V=?4_^<kW6N!5pQRFO48p(mjmcy<MVWcpv3i~HqwkYzB@Ps4^`5*nk31J)*S=yGA
zK8Szjar}R$(M&@fw*$9>OOq=<xEO~Lmpei=msP1yxezHBPuhdX57rpqV&+~!S|Q0%
z@N)zUNJ^A!9!>Anf4-3%W_qev+s7s0iK)`Bq+8E^xK|&IS2q#4NY3^#w@!e2_F7A#
zW)DFfy%D-t!XkUo=#%@$@QaCdggw&f8jEPhphwDemUON!@EZQzfvBkW2=pmFH^o^E
zCp=w9+LpOTFxH48Ymb}s=qk?Zx1=m8;UAY9MCWWTPQPa_ch6A?VCw57$dAnHlqLC|
zvKWCwWx-29$VNsGLu(pmrOZoppTf${&Mt?@`Ld(tM>X{?<WBLVa*9G^;oZ4Fd7_J6
zd>z5`8NVP2@2vKA8z+gJq9olxbS#Iedr=W%El(gXD_X$JdVr3Y9#4)&smRjHThh#Z
z%<~dQ3L?<k$Z>>CDUHFS$3l=LIMGu`{H9p?$MnJkqn{njh$<n+#;*yi#=S9m6cyS!
zupy}z?6D*f%2EORW@Kmx^?Ll>J0V#Tss>TLXr?$b{6tp%rVp!bRq8fl{pz7XF2n*(
z_eBbloZ+UcU$tzF*u{SJl3ebmynUN27n^u^`Vfjam`o|<r8>3Jy#h``mPq;$8Z*=w
zeV81A1IfZykk~l!Hka3R91h+QM|P4jM1ra)GrF&eZ+P`P?dsGqEb?Ujo-&8tE8zh(
zN$iIR=j;4EriVO;4l>&n9G3mu6(%Dbi!r>er5#U6X%83KIM&sM69le-_S+)5*k1DR
zV0G|3a}`ldr^Lk>MQ|i(X*L~k1oAr1T?tZ0Fb6+msJ&jAN*BnaLb`C4%FYZ&dz%Wm
z1S77jv^9U2gmYxoyQ`aJ8b_9bw6VP_s;lk1M0mWG&)UR7-T7ei8x$FN7e{_Q%V^*(
zu7R>?qddv?<p3$-Kqexrk;kN#^|Az{oTTp*xBlH9Fn?Ed$rAD68mMaoBq)Wvj1D7B
zrd%bH7ss>u&jtnClx1Oc7jB8C<h5u8KrnNTTz@8*j!sWc-($<yF8@p)t<a+ndpRyn
z0_*gKyFto|%=r2`FS_rM(8w_tT{Il`zg)#%kH7CS=U}eGLb3%B9D0zySkBvaC{xVg
zYZ_zw&rS#XO<e522NIZfka;$K{er0ZiM{<?^K%d2b)%m`9t@OcCjeN6pv<DUBB}tw
z3%qR+sz@ntMFY91ZrTnL7Mq-Qn}p@Q<s@7=)w&K)QAwyx!5~~Am4-1Wfve8PYz(^d
zpGyT;w0-mQQ&%79^;$qUM>{HeYO&o{3SfP#+Kq+xKcm#=Ja0{;ACU{((B_AJ2aRuw
zyr3!tQzYjJ5!ZYsCP$uX4;~+Y*}wFJB41lG35fa-7x+SP_=4jRCQFAwm%}Bq^(q}5
zMwEj3RAm(vsFxup41sE!GVO3e!s!nmb^)0xA}stAl&PjBqm#n&@-48KI)j+=HsTml
zyLwgFpNWw%icFm#A9azEx*7kU-3ha^#96fuQ&m`7TSM~TfwA$+loa>mM$U*2pFX*P
z<q1amnyq(%?D`eHO!tAM9ZZ?uHhKQXmA%5EKXAWyf=g{wCCnD22lv481XH~OUfz<D
zlDtYEASi5xsA@2aBDex>Sm1jAF=4w|4l4{hN3dc-l3|pisHB+KW5IO0TlDKhS}!>x
zc09eUt$Q8`eX+aMqt8o~nVz1LB+d|{eNRD0$4A<PlcH+c&bpe`xyNbb^*AR3D+z)o
zTEQTNP3c2Pk9lQUE_ib<^yDwfrSP9zXn5Q@7Ef_XD1Y$pv4A-|4Q|caVe_P`0(iNk
zx_S@3pHt&kz3NEL2n=rgjanvJ-3A_1n?Y@<NAPDfODgl$)1tC6>nPewFXkX&dVUUZ
z%g)Y@DL-(@ZvYbWp7$cT%TW39HWUhf1wTD_#)IP=MZID8a&nl1+OcrRnSp7|0LWO7
zZ6HeXX#5Ii2gs%z;L(MFkwfc&_{$M^&+~KN)Il8+6B7-M3WskkwY9gD5}?MT5nMlK
zXN~0gK7RZNgj=Wiwt&^u)shlSEN0y3o?lQ<S5tHL=W{D$nc7VqEiG9&I5^@J;e^5z
zTaB+BU{b|ko-DS4A%`m{^nB80uWCA$*EsJ?JN0$ed@=X$E$E+HgkZqe^*sUhh(+Kp
zpMWvMrAwELhWU~}cV9j5lGj9=w0#IeAu&2F;L8E!El@Y}w0gSM;hVIW*l{vsbit6T
z>FjVO^u-HIHW|EYT2<YysLJbnf=`RdUc(^BJU=YS<qVF4@dQJlzsFE`_8v&zC?vMR
zJ$UE8vn$e{p$5fiNuhfBbjl+3Y!50zwzs#Zrlv3zq2S3Or}GJ5*_ee3_JQ8t<L|1Y
zMKvEj#C!($`Q|^ripL+pLwdHG+UgB@a_%^IvXzdmoO}RkFK92vpkR#tZr<|Q!dN(h
zz1ZIFuCt2^1`Q0hJ=Emn<iT+eZ9!v`1mM$#my4UR`>J|S1ZKe+uGdii-f`gX6iSg9
zoJbeqhEZTklZAJ|RKb*%`+h5)b@ui?#kiwkN>4#hAeBRD&O&YrGaTU3nmKHpIefSe
z3j}WAAnH`=N4fAReaOd${U4k$Z0+r(oxdxpsYMU|@PYsdd|>}BJDe{A!kUGJB?++K
zM-U&1i-`P&!8U@h3zB2)rboSgr@b&5Br(Sz6-mKLO<<*@rj`MZ5g^+GMV>>i{Nd$L
zaez__(QT}+doQQkfbekyzLISVj+ZW9rj72wi0QDFgB!eHOtZk1>GbsUHWTpnkMI56
zzX#6|Uuto6bt1`b;XR)LOF?6+;eYP1|K4N+dBv#MiI^>T(^a!Xo#YB8d_c5(zwl=e
zu*WeBV!KC6u>}PMog!Y%z7Uat$M^W7eb626u#$0swYKZev7bL5DJdzzOiiq!$rZTc
z6+eFdEFvPpDC6sGY1w68Cj(nH42KEq>@eU6W+u~@qE3+aqXb=RYjaaq$IaO}fkaSD
ztgwiiIj^E(>`4uo9B5z&X4rV}@bIE4(P%UTdehSe@|s{{VrXdS3$ebOQN{ZDdQVSJ
z_UU(y#Kc7LmoX$&Kp<~wl7!GI!W=vsnDd&Or9?#P3JVK!b8&7m{}cZG`>m^*3}~<;
zNpg#d+D$8FHVpX->E$^o{(LI;#rXXV3@DTMWA)Hjl)Qeugn1yqs|6~Z4mZEr3Z{1~
zY-9cj)-VjcST9UI;NU&?$M3d6r9*U76b73P?xD_&Um<+04Oe)*GV+oL7st1iah4wZ
zV!&et;>3?1DeNvN*qFva(*o0Z0}~=+n%d4lLj%iCOZn+7dS**^cU=<PDH`7dRxTMC
zKd8n469P_7PRt<$i)kz1_CMjne~zsxBltG3Kf;MH2N5`Iei<?^{<vW_H;`;pFb0G~
zL_{kzpi&%2o$XJk7q~!|6Gl+OK-|4R2R1n#&;!Au+G(jr3@4?i2)zeV0RZJ?0MOpt
z+*}N~omSuGZu4<DCx~{dr`<PakGDIoG)>!k|F;(40WX!Kvt^W(wRH=u<h|5H7vh;9
z91Qon_o_$RZ<r$D(9TuYC-sP$W{Ao$4>3kw_VW5-UHx>fiFWVG|1&@J)3UP!%n`Zz
zrIQdlTY&z?IQ}foMOr`rs#kzNNAyiqRdxMqqX-XAF=VCHAMK%#JK%5%(XkBp&G&w_
z_W(W5&@iKSsd8q6gkbGMP;pU4R+cYp-e>SL`0>RR6&f(cX)g9PhM4}4JE6pQIM}hh
ziRtOI#G#N%LdJ)^;TdM(4{*};_4S21RKN2TKj=5Z3&N<UXD7eQAJp+(zrLZhCsh0i
zJi~H02N=P~aMXMGW?a`lvF3j^9ZTA3EZJE%@vk5c)Y?s(SL&C!dGPxLqWv+4&M77z
z1vWw;m^^Itm0AJ%{t{-&AnjkS3k3qxBG+Q^gW53__+U)k+1UxcCcRKDAS5b!1UPz0
z(3D&*wZl#ZlxD&7B-Y1pyvJcVL16$KI5%(JtkmZv3+Ie5ubu`OrUf(W@cZiE8{6yX
zT)8UF8Ug0<M|Z*kk`-BR=LZUQW(Sk#=)fA%R&;XO0f&me!{Aj3Hk#bmuRnEg_=_n9
z#H_jL>1lZL!snP>lK1*`SPvVJ9W*vE*>bKgt0DfM%L-=8rNZbYbkQJ2-QeVWPdqBy
zGX<LGHScZNf{8*O3n98;>uOe|qz`a;K;UAW>Odq@dD<AIumrq%oJLC<8^1Lul7QuT
z0DchO@ZSw^%V8i6t<7gbz_|=sMoURi(Vr8@jzA6(bXPJtO9N27;ChLNZ4VDP=2{3>
zs|pz7=7+6N$cT}K!G{3EJ&aH_Jd8)gn>u&^&ong!#qCSHiT&?<PTXIra|~8>n?Kub
z$A{tuj9d&}6l=txOPFFP8<`V>>gjb?amHug<4;e$r--(_n!&UV5|;3hL}q~<^FN0U
ziDjki4Y!vp!AEE3m6ZaaoeS=b4!-I3bTBy##0AQFj3KbUDJ5ui_L%TQ<l+X&!bNFt
zS8@j6!P19Q79eh6YRqm53YML?)`=DssjI2UzaB3s;k~YU{$YNWVApSpeMOx#o^{K|
zalib$JMXjXP2apANeN|yXsOouz0GZwkqX`j>qs`83-WRKk-3~X8k|&-zz2Ul?%Dal
z|EQpU^!&<kogY(hoZ3v^@4qry+U^!HQA~_HiA&%Ao;}|+xks*`D9!x$t2&`FRf8%b
za27{3CgHCw$FnU8O@ew`{A0f?b}>96V|JKunYw+nE}SJ4&5wy)<m`>{1DU2a4ximZ
z;Nn07CfJ+yVD6rnn}>puL!aP9mQ$Ahxq=V+KQL@FbcW@9ikVM%-2P&Q02~I>do_q;
z5zIKuI4sCJv_uBFW08OVJH+*Fx3O)x;^X6Yz9-)E{nyi?A%^6ruWH37bUxC){p60b
z9NVgwduq(H9fP|mc-+jS>d0VzxqTvIntv4<5cuds3yqc2hL_=-3E{)5@)U>+4$F8<
z=)=a|OqNnKl-HxyMD#!qw_Y|_?6G=6i)eXVAGchJ_L2C1-=T~Vln~3~ZR*E)JxADE
z+{q-3in_BE8V;a4Dn?n+xr!^PCdw-~ykcZ-VExaA*OzN8jU74HSC1XYtHqZY=Azk2
z5MYz|clrT|z60Ck)b}Y`a$WREZ!gPIF<f}fcJV3)TGNYV<^cZ>wt9+Do&Rjr(9_~6
zS5n-1F2~*W8pd^RP@rgyG~05M6kcm)kJjzK`Hk2$(@+*$fyXUf^&$>-&y?2V6(frL
z`4WM23Qp!=)}<X3l2kW2No77sgPlJ`urc<Asu`(efF?Z3pRV{d(7q1v7a)tLg&$3}
z<fPF!^X<^=H!OO@<LLOC)*`C%S;`O5l`-vilX6^?xMrHA-<?J%<&SSar^2PXM7Jc6
zP9_&V&~L~8cIbtodBAfYWv-9M9aI*?dNa(kySJBGKa{&<C3TM?A4hL*^W}J*kf7o(
z23oT*3(9HdvXaL;2H!<$YnB}-9+@Kh1Yc+ozT`h8xF>$83>d5j2Sa-Xrf@0=(UmW~
zLR1|spnI2`<TYXQ2)trjhAo?SQOW{`IamW3mH<I8FWiX1?ON7`i6rgi%kFV3XP!=)
zn{c<QKJFCBPh!o@<?Z|<K#mG(S5#%sEn@Uwj*lfE$uk{7aYgqoNoU`PS2TM3T$7?y
z7js*Ep&M1%-{1d8s6kO&OSy3T(Yu)qM$Ri1U8W@R;UsdRIl(mh7wv@-^4@9uz~#)3
zSG2UYR;WLmUhl=7HHSS$(f+47d%WV%$Ow0Y;N7+#Xre$s)Vqp*C8HC){$i3dB6rw&
zV(nnC;8-Al?lKNJf+Lu$r^^yair(ScnX0+^LI6JbtxO&71!SKyuBw7uPTVcc{G<*~
z)$-5ZOtB+fMrabPj{gisGDXtp=-|nkMkyr-jVdphqsR#I`6u4sSValQYEZCIMWzp1
z%aVvg+G6Y5w~2|M*Vx~66%z7hXJ?@eQ(IfxsC!b7wCgt>Z-=E464?`G)BysGJ}-Mj
zE|im;oWXuA1ub2YRW0L>cyHYJ22wWIgfO!L{#ICc1NMC{FRyslyg;V+UpHZQ2J*Sj
zJiZb#s+}Q03#mkAmc+Y1|J2j-2&|&uS{U11_Vm`tj+qdhii!%9tX`_4Ne_j?436BB
zvvU<Z-o~#$>(zvSE0`StZQ8KE+d4SJ_AU|SGs2||EdD^4x3aY4Rm7I2Vhz_&5M}lf
zyeW0COz&{(?qayc&x{!6L1!@gfjV4gXJ_d*=14@9|J$N3pFexR>8ruXiU=J1K_>6y
z>6u3240SJGHV)TB*@?&EAEf3t5#Or&4*8#S)t;rD<>h6}QN6UZL=|ZjPT&H<Fu}#3
zv$HeU<xEb#e1^6a8N0oYan34rHn->QY9@I~@*z6*_xCaO&tN9%f83dDgmdQW2M$Q!
z*av;;PwQrWZ_Gf^%o*{1xxD$~T+V^MzCOq*VG9002T}TfGox=zF!BsCTNvXL$R<}$
zL)>oe*!dj|qXRb+$i+ZJ!7m~4Tp;PmI3!C8-n{AQ?OlmNwz*2dzsA39U?f~=f=C*&
zT*eO`z$5hF?IdEj9Q>f5m;r(_BL(Hl9Kq!3VW>9>q3USqtM3YEQSNF=nt|=}P>P6A
zFQ5h)982bd!kGpyjXYx|OHWJNJ~_+fbRLn{WVm`&hvG+r2^*&=bDuL7!^4XgFG6x#
zrrW$SA%X0rdgzk>4?<k02S2rV-P;~LgL#L7pnXqC`?}{jD#5douT-KGUgLv3&9hxg
zC8h9g^K_0&a4D^>u7=5mrr)B^YhUXevSFEvzs%+2ONdEw$x>zS?d^p?NWN|ca;?AQ
zwJOApyjw7-E;lzyDyoMbo{_N^<Ta7;ihh27K}@Q1Sy9g%t{r1DY|>D+G5{M57kkDP
z?|9Axy?zKR;3^{}D5#~Y>%RJ9R)}d|N0ec6W5eg*lO7};Z$u9EcchZ#lD%wC$WEnq
z%^QRo9c>+u%plmp<O(rYgV^4^og^~ZIJ`R)@}327x<%wNA-tN;wQd~Piq1!0X9TG(
zgfjwKyl81?ph^TU_}Jf<S=eySt@s*KqRB*tjA|WcK9R7-gm%{8fK;mQvuTGue4dRb
zhYbXvPAM0%D(U(C;t}@eF&3`Kvl4vt9MV8g3I*o05RDQUBfr1tprL1El=0kt1v+HC
zk>%VWFCy@(y`Ihic=bV=JLv=5AN0AK!sX9NP?|9QVEio#f#$Pk^$*KUev%o#7#|-`
z<ha-H)jo6BJ-@Q>&<pu_rk^!YNv==0*$AUUiP1&Z*3>{|GuL1E%GfjI|3lMv$5Y+^
z?<3<FA%rAR5`~aWR`v{$J+dW{nU$IBj3^mpB$6#6D_J2Sn<OMjQE98+b?(picOL!G
z-OV}Y{eHh*&v9MX^LnNEmcFU26KviN`v-po)J#H|434g?bxt;=dN%J>FNGeulVS0|
zG^MBZGLW&4%|olcHuK)B0y$UH97pyRq&<Ckj#iwH5Xq~BizzHDY-pHR_dj;jk3pGH
z#qK$oa<H9%VdioQQwAduY5nYFlB5bYn~t8INhMMJAiv&!rBnV8RrUK^{Sy6>gA>wZ
zg&AdKvM8KIMGcLNz{XW{zh*vr7o&3)55@}$3T|=A>@L`$Z(tCs!qa_Ybd`LOR?Uzs
zB7A7Y*ko+g7hksb72yB?Y25><Ub4AqyoZ?L5b@0fDHQ=OVJAat-$Tykd6hm-lb4T=
zuVDBd+N(!afV9*H0|kohyzE8TKlsaKkb7zb2RG}N92F93c^2FuW5CPQVaHfd?0zO!
zlUL==DRGXNjj^z{`IRrlAJJx739}`>#<B503C7%GZibtJ)Yd!7^(b)7Ug!RpwnBM}
zXg82YtY=_DJ11!Qi=As_Zt6aNB)nuAZp4w$)&7In%)1^VH_!+O`2O<cX+y(ZKlTbA
zLEC#kD-C-pCx@KEWtW(o9L~zY)^ak>yK#dM+qo1))->i$5iB*ym58W$T+J|ApO2Fs
zhVsB;P1lMW!bMqAdNedNfP1^StxC)te8-6EW;JlE=%Ut@bTn^0{cabzef0^JFN{g*
zE-o&BX?$h(J!MGlLR0SGBaFjeP+`ej;u>(S9dpmk&W=|vWDJ}H$$93())-0g%d^!3
z0|SEnZ+qBx9n#8lx@+Y#@su5;C|aKgCA-qVk^_W`TS~odj8*MQHorqcsZ_zcWlCm@
zc=3)*k1!F)abMcCjY~!krS}1?!Yowy%Y8I4i1a|SP!@19-@5dd{C^D-5{#A>b&;Iz
zpm=^$<+npQ5ON5irL3v>JW{%N-9P8cvY%j>N_RERWE~$jP48o&j~#UOad0T?S=__H
zAsg}oV_h%Y#$Ud4baLugsjjXDF!<iL36c8d;9J0o2=YqP$J7jgRG}rsoCkm_Jw9VW
zRv+11pec~4)Yos}_$e^Ic=&5uNR62II=cHqw(w5!^48YYuU~x|W_GT<^74a{Q8pJx
zwY@)iRCT7ct*s4Dt*)-Fx_XzdZvN23^t7zB^gAzU$Pf0>_k8%^;o@@mu?64?yns63
zQBo*h?O8;7f@}>*W^?~?c217G*K3klQ*AdiLde+VV+g962dnV)p(3#Lf0%9o8d$07
zdSYUt%${uF2lv)ocVDZp|7)R=DOH102L9anav<=HQNT?XSBW8mEPvN80XHVd9dt`Z
zg~BM%-SySTD<~+)%jf9w{r>%1kB|AQgH2@s9)*|Ji{*<QO_5z^`<^|+3P2oOU0;{D
zsXOd?I;{xv<H7P=fp?0y<Wn^hlaqgiwBlUcC!?X)uXI<vZ;qCK%^H8y+)g>HB<9H2
z77rm(za+}!gti-^^v91^3<t5?Jv}P~`?*pMbESNmX)q`mjacShkw1J`BXhYkmLay&
zzF}rxoN8L1BaScxG{w2tbN@0Ia~!#Yhq=2ro0@^vsmh-*v`2l26=w}OD~X+9wCG)P
zyU>ghE-<?0pF#2v$dOgir`?;b5k!OjWo5`xF!R^1AQjCsXC{Dk;VSVWiy$fYJE^~k
zwcsU<06e<-a%~HHfHgt6_e6^ggTEzkWLI(Z1G}WvEG#%2x#PReB5=kRgxk{(kN|sg
zK+(>c?qps$0cKp6&$z@Z#|I(qt)YMOoU8Z~1N`uUml-Z_4@>#TNjD1Ab6ty@t_6L?
zov!RGPYCsumB)aB6%;&K`O@<9?JX25SZr8K!>+zKr4K1-E?|D&o;^UaWOIdCXzCw6
z^bZKYIcO&-_|mny%}~k&1wmF|*XoV41+g`8UYfqHE~l)oYe<Nd$ylw+5SBkqcKiPQ
z`+iy9dsS78OzuU=t{sjf<JR_uY^V3C5Q<W8sHcDPKDt7q?;~qlxEJ)XJMNaBa$}4n
zKf?O-sm{UQUeE7H))_~M<G73kp@K4Zr0EwgPN}KIOsqkaQo!%*;u2M|+o8O<S&p5q
z!mOI?Wd+c8!0m1G3~luE^x!j~elTH-#*RbR9QmiH?EU1V8BU;||3uXpFT+ORYzx2w
z%d2uij+5Rzljk5%kyC13LK#L_4&odLw%BW8H^~#|Kyz|^{~3Y;-%doDGx^eohjwLE
zOU_okNfaSH3NIb0^Fm$2rdM=su||*6P)0>^M|ax4dwSYr>u1Od*PWC*<j)x!8*c`3
z1{uCEzuk%RKk%r|R$KMRn=(ds9qV;390LLdEFAAHE1P}&x{0Ob(SETTT~018=g1oP
z<AYjSTUxfC#Lyz2<5@*0y#^*^(ZfU^+i{i2M!ZV1aCBAGkS<0gzF>0Q|M?{m_R}m~
z3tm=zf(fDz10%DAoNpHh@BQe$Db*$u&VIOGOq{xTuGMo+)&!qq8LBXkGw;^@n{6s%
zcfCYt;7*oa;p~==Q$3)l*arOwS4zI(WqgAZIE?RXb+if(Zt*d`+NltU%y90_0p_?1
z)Sc9Yx@oFfP5QLuw>(NxxO)dxeRy7y3*Lyc`c7)tu5#@V1OJrBc(gaNFwjq|Q4}Tp
z{rypgJiWGPVpL-3_h}PFEM6E-uMymTBFHx=$Re#s;@mh;$CTvcJXybZBcyGo+Nsxs
z0{$zV_LFHw(|DeimU{{07M7MEdB8`~($Ii9ob~)3L(7)Tbpl%w9U(Tqjg5`_k|29S
zQgSjE)@fj%Sz1wAS{gw8)vaI12@r!NrKHTPf5RXP4j;yPM9zQ?){9q*FC79R{^-m2
zdWea))7n#W^;dvaDcwb{wKh>1Ak8UqA^Icge-s6Q@_9KqoVog0fT~-C(s;MNe@{tF
zEVp{Z<jA6x#-f(WOa152c~CsgJ9hTE*WWEEi4xx-YTnb`ZCYuAQs2p3!T$rhH2d8$
zNxQ+9_+qoOoOG19TNCBx>AAU~?_UaRe9XzlAulah3O-`Y)RByFs+3ez0>ucUpet|=
zDbk{F);U+#dreIiYug7?dp9<`_*o9HozF(d$6N6z6!bZkm6igEt~R4{zWzcU%^0w=
zuCo#Sq_@j<tGRi4ehZWbo>%3KW`LUjxbZG2xR%9DrOtQj>j({TJ&RBzq1LOb!+NWl
z$v$Wo5k#E#dHHhb%TG@F9>|uFcF<?2tE=N_pp`jh)kyL!Dvfszn$<L32`Q<QdoAHp
z0J83be=vF%bTqi-Ans6zq7%FR*uu-2jVId5?T7-EGUM%F9=6UJEC1u&ppPa|u4@o(
z=H)pj;@rynWz(ahh5Xu~q1#xx{KYUD=t<e1qIy8`)_IIK7Z(>-o0S(AtDPIb8K%C?
zbET9I&g>vpYWcO!My@9o&i_Lp-=K10#^WK1U-L|S33BDCDGyxpQ>SkBuYOnxK)chN
z_L$P+`|)bg#CGEC+q@RYcz6*-TU%`^CysZw2R<w*Im}Ln<FGNOiEv3pMyfd+IF;&M
zrXDvC<s9MXW-X-BHvMzBf+9PbKfcOYQd08n=&G2QSUb~;l`nAx?|fwujq3I75JOqi
z00?dcEhNc{i52kEn9=(C`a;V2`0-=tG%v~L-QQJ}SsSKZy3soJO2YOwpPkTyyD{9o
zCIv$0h7*+X6QRPnxTZpl#pWzwG4cA#PehzSXJeDy1LPo5P#FL4ZM3RDPKV6`ikOBO
zYaD<pBH{(aAFWQH{44Zx80Q>!ug`-P8BnyFo12~QHN>SeX994rD*Em-rluGd&4Dcy
z?f^?yp+*W#<SA?G&>tK2fH5$!3a)r_)0;mLC+=~b$EXGt2N39luCwdw>-c+(AhPf=
zIk{#urwDDxI7s#AlhM$@5#l=U>c&DRe$p<MV#ho1H4zb{gYV-R3!Ky^*0zADf<1s>
z`kb3vAM(%8(BWtV+~zrYvh-osoZQ^v4Ak%Y<@`PH{NoGI@OA8BQQNb3uUybq{z)lZ
z_u$$VD;pb<Q}3tmsPh|Ua<j6e0Cvd8ta?dHuh@BJYVtyh;J}QT-C#%yLH3~F87(o`
zFLP0~a)Ky1)zc&L#9r%%4f4=SnckN$`DsL?u0~dUe{KH-O&+6i2b>9zoaDqr*Y6za
zVJLQyYO4zQ<B>-98bqC?nC5&WeI+k9x5aBA5|wXmZc^7-V9@u^R9<;)TELH_`t|$w
zO7mLEr3WA~QA^_S=gMD7v#N8UCMN@(bcDGgTNM9GQbFO><Rsyth1c%s-PDmJ_2=s?
zq&#OQv4=-T)AREcL6AsFLZyI$7m{==WCX|eUec%@P>>@xoje(3Q;E*-HCR6U044xk
z#5trqCSRcA8z0Zi%Azu-75?HXI8jP=QK%|mpzY?{GD_JW*B?@mhT7TLK}d{nzV`n8
zblc_2*of;G9Y$?{3TWl++bf+5NWqwMoL<{PoAq(N%{FsclXnDR`OKM;i8a6*0f6|4
z0I++=-j$r~_fj@Wc5x=hC5Ns4kUW<m7;TI~AbCg&nH9U4GKzGZ0q4E4sM#mt5c#`p
zZT~>?pnF1H0;~w?*7370$WbVX4r8<0O$8MP2S^N(kk;LjSowYEOvH3dQSwQn!b9x_
z8+_;8+#>jjCe|R;w{wFIAu%aQ?LaC5f61s@?Ua7L!ra&x#or$w+oVH()s3z`&L-T@
zS^n7~%zr0C^w|%IhHL!~#er|{oIW158oRcGSPH)JQrW|-ChBKKtuD?TsBs__EwfFd
zm$G37B>x+hr4<z*iI#J~E1-}CZXY4Mqgss^m!F*!=LiM5#BJIz=UG`+cJYyw-Q(k^
z&m|>Ulhk*sY4I|N6r%4z(}Tr-jk>z9a7T?sNoJNPo-B}<uV3qW7Tr8Ne5E<<y71H)
z6u7bGK-Xcn;KqSIEi0pEU}$*wu=R3RPC~(r{QUfzH%kW-#1&POAS<GeJ(x-;zZ7|9
zE~nyns%DO?9}Srw##T-yDQ9F74I$JEQ$@c)91jf7&D1WwfIt4pF{U`xy4|ZXA1`j`
z39{~GXWs~vhXmHS)|{p5jLjt{aJizH>FGE;8)}55B}epE+>BsYedtyfUL)OygoHqg
zyL<O;#wRD`cCA=dPGn_fzFl7K=w@W_d+z=A$0leEz^U+-Kwd^r!6rl}P&-8z^&>Bj
zOM4ZWnB4&>55dCE&kx0s%TON0dSzAB1~e}q7Ov=*tbO<pU8jtnA+mbb0dvb+LG}tF
zJyYbKIoNe*aw($_Qn0S-4xb{~N3X%p^8V8F-aoc4yKUslG)M2*8Ev3dCUMQ#nE
zR$lO8Pp;xsA>)zS6PQ`)R}*@DCw2BYZ*OmOkMABr5R4n1SPhlQDSmNDNxuHisIZ{Y
z73>EhglLGODxPdR4RFs;-dvp<bK|>L2L|H2sP>7;|Lt?UsNX1?t53xzf}ce4IU+2q
z&zBz7)>c`GW7(xmLN)i1FN(AO<afQ`uV-q?Qj9U3vuDo+22L(`dCWD9*@?6tw6(F(
zCm=|mjY6VCE&TVCqI8G}I-_eTvwI4{W4Lv=)CCA;+1Z^ks9;d*r)s|QZF=%V1!Wy}
zBREfjXvOHNk|nw@?2^9ze&Ej2OUkr!?qWB2wc>jgdAs~5p6^Z}c09eT|6xbyOADo5
zq}8q8!5OUN)P;{9=NL^xg!Jp{DsVpjGQoZ<bH;Z?7?N-l+sVLivQ+8ujm5ZepvrS|
zJL#n}AcFeb-K~CpuZhW=XT#{Kps;Y*LJS*RQE+HCx7$$<1W!EAj~^96fBAxqM3tv>
zeQk9$)oIGJ;Uw=FLl&$rR2nbl=jVrqN&hP2^RNcg0|FY*+&r>E3H~^V`$>i~2gzYF
zTIu^^G<R!n+q=cn4YYEP&O14LZNHP&$2+TiDn{J$$PYyp8sJ1Y7_{R-meV6N{CH{-
zcSdq{M9MirH-?p`YUttVi2x2#B&`VX89Uwd^l9nb=VpTAQ&SMVuYUiH0t{f$g$tFV
zs{_xT{d#&>K~z@u89N{$^+Hip)(WAr{~ey2oJ^lT1d}wpmNo-b4WpY80><32I*b9X
zAV#ATv8LnL%hC!=7Sude=TL7i)^4gMxr%b<PBcY;#KEb1HbgO;!Kwug#Cen<Otf}M
zBc$e9F>Uk^3h%6KiTVz!U)y6OWP9YhEUvl^CLiQT%GTwp>V2`c1)06SKWm<z>p(Wu
zN2ip>f|eJ{MxL_w4-TGpbq#MojsW0px1u9#Bdz)jxjaINWSoIqndEo5qnON!zFRKO
z&)0f6#y6$b&!)Z!kPZI-T7W|x#kBg^5!a^E7z;*M+vn<wipc4%p_vt81@(w&1E6M+
zw~6Wt?&x+AYE6lyoLb+y#}wn2D%MA2wI1Z?<O5&Kl-j>poJhtRDM(cpv1^4>H`+t;
zRslaMKATFZ!-snu#ZXs9k?!&+EPMa-sj7}nKPp1>1E`NX4jP!45Z>qu#MH^v2zEYJ
zX(8PBsdP1+NKu!J4nBV_){RmX;0St5blqmo9V4=iuXxy)XBZe5@I6t`BfP9n)gC^0
za7WmA4-Z`(orIo6#w>VZ1^D{19FS5z?I$hA6D|67D+s`^Bom@{_6>>S$9>*Bje}eT
zFLaaL?BqYuDJbMKyg52JFrakxC|HUwKZ)7+`|lkQN|22~&jNj!*iG=)k;b%<j0s&K
z3brOD*GE^QgD4N{HP0zXOPiUQNkj55HGhY!qnIMzMT8xQ+ndeC4!aw9dC5JCcK&y|
z9>#7AiExawhDpqjs}Q6~P))QCQnVN}@2e49Sy?G5F78N91oKSFx^rXathY1=CuwAF
zcwv57*{PNsh$n`}#(?8~>oW*rI(u4&DDvQ*)Zk`(_u&IB9WR*T!hti~c?YXmP>aH`
zW1ld!h>nq+j(+D(v{}>Z{x9Zk+bQH<@b#@jcMF&bWR6BM=VnmLv=coY-DAMLNJ)6r
z2u7V=@s({jKjSRI4tS)l7z-JZ0KCmXjpWCGC{V|s1odKkFjB`b!QntK=lZ&}g=5mg
zwMxeqi@&93KRUppdXCp(f0}!wD3!lxoSFOOd-dWQ#WAJF_$OX3nwpsrsA<|G4>A(M
zuP7Yupa^-*wtcIPp)|&vY2Ez(9=$8Z#w0>RuHx?Z`L1mf$8nsU`GkaoSZLtCbcB%r
z6`uJ6(~bA&0kPx@w%jtsGK{<+kAJ%8z3y*i!P_V|%|R)qXyK6eWjW1wPsP^*;ffME
zN+K3WBhIy#o=c)#I(jt02uhgBCrrserwAX35-@`Qa%s<mLdiq*K$^@8Ux;%4vNWBb
z=Q_cX7po0)T4?(loEeu)HG;d1*Xf`Fj^zHd<FDzp^$HlU^rjv}Jqd6bpwij11&<$d
zs>Wj*(O#($%s&zAkAH>sF(gEusP861)EJ*0^^nA3ErNdQ4^P-GX@wtCe*XTgn0ca)
zb#Qg%M>!7s5UI0%!e%cQ7uSAbpQD1B4UZd|P^2*X-6yzGfa^fe1wg3N8^?){xnt}H
zR;td1hN<(P7$~ENfvK-gCgjHk995A*a4s-~ayVK0b$~P~v_TZ12XsyCd1b}K$khJ=
zu|(=Qe%zR)3#u-#Gml|h0Z<c}4&n6Py?X@(kr>l?0`dSr1!PaRGBYJbMXMpKY;9FQ
zszPeHa)lBIj)+Lr<$pw3C-~ANb+l4=0bj5US}g`r2s{X4)P|W8=yEWxj(>3mJnJQN
z>{umyabQgW0jagMH6pehvDBs<F1;-|;OryKIX>&IB%y-=K*tde$(7}0B!rmG1v5c^
zv_TM?0rqcc<0f2_rgo=%#hj|CdTux4T^jWnYy?hvpnvi4G*_P~ii!eG;;{l;F}ctg
zd+yvhq$QMEPbKb`mltKAT|lK&Tu^Wp!}y8Spb(L+hldSgJ0XVjtetwlwsvQu!)^nE
zkoU^V2Y{JWnV%akC*4t}$$Js>BzleAEG)WudL5@pqn|_ZKm;=q6PK2k_nio031P?j
zpy!frgwmyCbP66sP*`sT8RSUsO6Wp2h`=%HDYe3xrdeUr($vs!2EFk5Hy{rqBSr*@
zq|P^%|5N|?vf-MzQjW$m(ZmqjgNB(qKKDyYQ5~4DIN^(1h)YdcukmO-V`2LntxtEg
z>+Fj|?x6=ipxDBbvkhFJz#GrcU#K>F*54002o?ahMFRo?5Q-+o$0Ip2-SsYgeP^f{
zJB{1FFM*qYz>rPz=guk%8)slY_Sw6hx2NetAOVf>+x7gBeu-sE5F+vxMB>CDO!Tb?
z8W8JZf~lyu7z4JS(Lq*ak4#LQF*ZIJf8c9Ud;w?%NPpp>a@~i17ehy%wP6NI58#!!
z=QNQo=H{%8jU8&ISTC;&QQC3uO6fVayNa81lPS`NUP#15Gm@_?DTX@&Qb!q>HMO^s
zc%<j$(bGm?1vA9~7DSZ0dzY`b<kH6L4p;Fb{bG;L!Qk`eaTf&eOP7A)FSDMgLom6m
zqNYaixTsfcadY{Xg3%+42RKwW-ab;A;BsmPlq(k7;wGphkmINLK`>yJT=_?ozTcGP
zJ)T}6TC?LyF0{0NrXxqU_A&m~ah!T$Z2Gf3oANC++Q;Z?a4<jv1YHvVP;4trUb3hM
zW`fS*9MFyu?HYh-pq))jq!W>b5<5=yEcQOq+LJYoq;8@{S@S@u2uBh$MnG8|9Z`?~
zKgAAGVN!w;+Nf(|qiWC@&)!>_6G~RZAQ7YJ18i(;J&T)Iw#tkcg$6w2WR6{|2>Q^#
zAjPKgn1BFjezPX8n1~42N02T+h!dq3xU;rWc5p`;1qc6LTH-DCg{1VTX*rCpxl&4y
zU)tIf+38e5oCfDoL@}@n&cxm~58Z^Vt)hqsiQ7X|g#*sV2~yDd7Vu;Ixf~G&-Xaqd
zL;%s%6@v&U$J(DZ;s)NkNAS39$2p5O`Jw^jd#KZ+gQac_qPbw9`7aG!ryz@1Sg=2@
z%v1(ee8#;Vv`tNon3|zda`}S?evhmeWw*<w3B_e)MA%Mhzc|CyQ<x}p7w1SwNDyMB
zjlLlOMW&F@7+qtXhb?j<(9%`k$cBSAu4SO7dHa@wj#6`;(h_eQ8A;|<!O6~!yFC3s
ziX(~GJYxl~ucL^=kfwU!@Pd~TmlQBsC|L3M+^B9FO5id0`Tg7uYo0^Jx$Y$mEdfM{
zzkdBvV!P(FXDMz;?RIqs`G<qTpSg8d-if!D?%~)m(AOcxqM&qcII_IsE0cfh;Opa#
znlI`u;yG5Rzu4f_dNwqa<s_4;+3q5;7lS|Y5N*78ajkzDJ%_(Er+VQo4xj&IlNKm=
zp*vVwD&n(?Sm#arqI8vNj!K)rm7t^69w|#TBc-B=1b`wX%~%_<G!``o0JzyG%3Z~?
zQ&X#<BgIg5YU(cY%)J~Oc#eA<>>lHR@oE`4IXUU;)B8@?#bn>SX$~MtULIAQj<z;;
zqTT!TE0%Q25qS&Rd=^(J|J`2oJ)$F=!Zo{nI^FTwX<a^M^6E%`vSU-O;xvP4j9`A}
zJcs)Gcgz_xMuYFlA7Z1U{PN`s2oqb|{SMIDmxGI75f$RSH$GXXO#4-&w9Cx&l!~+g
z-*vRV8^3@3{@qcK#Hb>mr=xSrSZ4YbM-cV9@r@4EYs=d1igfnwuO*UDea@D7J$Y27
z)*d6nKqt$SYV7GL{D-$+O!YP7nk~TgULodL3$uCX5=bM|8KbACEv}M4*L&Jzu(uaO
zPF0x|p_DTPfE(YX@M@u?5b@FBWnw&{W0-;20rXHUyAu}#S#|XEb|${ReY$G#%FexZ
zd3^n1E1V-F9WOTEEbzKZGR3o)lq&`iP~<PAOC9nM=>wEZuU}WaD-%k>0zD~XZ)5@3
z`_w6v)d2Q&kj%W?ymT86K;>}hi!=M&`>kJQf^Wb7-bWuh<uRTaB2bL<C*lHTDRUXO
zBcTfoH}E3v+GcJ7nrFaMpXN=T+k*{Sx2PIM8;pqtNT5V6hhMaDnqZ*4Wp|FUf7pu+
zNcPUL1N~xw0ZnrsF|#G&b;F0TdlHZ^w=c)+F9@=)H5kp^evQ@a7s(A--_x^1N%!d_
zk|)I-F-j&&ND0f!)RA@#GsdxCQzN}^POQa9QhqL&Skp5wm`J|OkM6UtxA(=9`-a;4
z=&7D^j5<EOU&`+|Me?^~qbNL=g*}A`xwircVZ6GJuKe~)v@=7BWC+><v|=tUC2u`k
z8)ks{Ahm)6RWAe*e)le=KZL<R)i+cj%97=z*Stsd8RK!-#cNwCiriX8Xv5G*N{%wC
zc>}Z9_`s-w-hx+)?5NttpP0dRFF&560XB6i?xFSE1SqSy3!?_kK$Zl}aT7*U;tPmx
zo2$PrL~?;sD-AxJ=BG~=`N0oCQ}8~2e21MG5}LXBp05Smg>0;>Ppq%}O=l!)kOMFH
zy?)7Pb<R<#oowQ>%zWo5`%)5%F-}@RrQmZ)C+jU9?3MJ;3?+>yCV9NoPBrCTukspm
zHwbWvhqz~y4y5)ze{O1G!qNo>_grow>t~}c7p>zxZ=8vGCZlO|=hVHD5~?eEq8W(_
zlB&ET1W>TUzl<JStoso_Bs)LvP&c10AYA>i-*8ZaNUTd|cx`QWjUaPkOM|<Azh2Nq
zO?f@ShK{Df?P%$u;k8bl-&|KIN*#`|uS6)_4rQ?Kw@<uxo|=Y*6C$LUv(?uUPD)+$
zzOrK_=BnYe{X$V`SCoh-Xl30^Alq*_cd{<je_D84YU{3;n&B!v4XKa&otT9YqX2^w
zhVooghk&2$jGQm|B^W-wWA}AkKAw$QZbBNb^3X>wX*-e!>ttg$IO?S9&mJ4Qp-`uP
zd#UvD^SJKfxpz9nqUXQAq;0F?sL!|i3`{B$nJ}nbXXt*t)_L1Wqh>GH&eHRA9Fzk!
zZ#k9acKz8yN9iYP9TfCSVr+NK%Lu9-4@uyqzqi9SZ1uMV#$M69wYY79zBc0cF<!>k
zIRK`z`itw261>$ECWDOk8?6Vw+uKLg-1?H9`RVLSjguv#9S#(i-5J=-^qz<xV<fyB
z9~ZXRM=D>s9i~V=y(qBxxNhx0*1+j{Hk#99kBa77>lP11hjXYAF#p4QXnNNp>9c%n
zC)eB8O-&>i35{05lU$FOvYUH~BcF_jaf_k%igh41@Z~bJLcGgP8ph!}#n<L!9?)MQ
zpH6UlUO9b^Abazrt;`B^i${;rzG*#nTzVzW5p`WesasjgUSHD}_cXPcwXJ_47K7eG
zb0K>F3n9{Kf3{^)W*V(EVb_vrEHjx5seRX9;A?hLzM=O+FiwKq^CV&R+M5XfYuSY>
z77IDSEjG<_Tr@GPZ80)MEIGE;Z*t(^9Mz>!#w+DwROaTQ!ve<j+j^uG#fm&fQgTuf
zP(#oQ^MA;=X$h)z7$yO99tH;;bq$RNrfc%X2d`d}kdb-h&_iE2@f&(;H{e~%7bAA7
zzmF<a8~XV!CZ{$`i9lML)$d6++6L|cw1%Z6?UA7aPuI4<%d7&F2015wYcQdwh%BuK
zs(mm45p$v!4~gF|))A^gt;QtAT{E+e?KIhTS&`?dqnMCg-<?<hXs;gmln_;MXG5$`
zrCseNi?A~j*Nh(Ot9&M7u=AM=6=6z7K_P7Bd;_FqL~yO2p~{Dfl-4<m@z-J#!&M?=
zajDL0H=0<;mZ@Vq(Q7OP`7m|bAh1f61x|ndtsze+_Vx1Gpxpi5Vd+REo8;a-dmim^
zvfws4Z{zuQYN6=SqemDl5xJ==)ke}wM&_NRW@uIKetrJad~?7tN9~r8Z?A`pjE!sE
zM)?xDpb|V+*@6UH6w_zv)qeLN#uA|Rr=)7?qMo!RCyM}51r0^l@qHAul`s<rg?~0y
zMDa{~1ce(XTiNM}2*jhduuu_p`Osfq!Lg7%D}Aju`{G3vW&zbkif1+X7pyi12NG48
z#2!7IIPv30rA56v2p(wR9gc8A%68Q`g5nPJPTY2PDE>>C2Xu_|^^>PO9?q;|vKKRZ
zGO+G?{rYz1kmsF*tK78VyjnnSQHfo<O!BB0(kj%So((ge4f8lO1p5>?IOZv}x9`|g
zBF7&+c8n^)us5BAY2d)B${q)-L0m>-W8mgJm8?TRT`*+>B?lC2KlmRuNU^rq<lnkw
zX>NWqGxOmKC-V<*yFhw_!`VQph4c!fQYTJ?4hK~|k1%?_6{NUTYJips7=w`AIdOKn
zpFe-%%k<a&?20?uS$k=*TUeO4^Uk?xfPUcQ&`bu&=cSbTVhWc#!w6bb2xsC{*&Y0w
zty$Er4tUK~G7&}KZk^PClaVw{SSS)HYS8-D)EE`U7G3dEx@JiwMA&ulz9E#g82S+{
zDK|X_Is;lq*1PYGa3EeSN}MP8*V4-G-E%cm`(vLtxH)K^Bho$V8Gxl^Za#G&^<VEH
z$YQAyx@w#UIfO2RQ=(kBE*R%e5a}Kn9YxgzLYA#cLabYmwZA4YOpU-JdbWC|!E>%g
zKo0X~NnA^Reo8+BjTqU<4rW#j7x9OB^8^Mzc$S*xdv)IKjOOmW{<@4KPSq$%xR_p+
zXPs7m5h8QHa9#V2OOB39Nm8$*y<WGYad9#Z_kMm}at3x3_y8bUW#_A+omUfpPoM++
z4Za5K--WzMDXE=}tNzU`Fw=r&7sFjYRC4k`Ic3>~T>^4ZU%;&%267sg=;X9)<lU*u
z%LF=uI4*GKWbBg#kw>rI(d?&qXa5k54mLBYh~=q2(rn~>1Tyo?GqK#g8r*)G>V!R<
zoFQ;baxy2{hIoha2zYy4;nvx&ZSQzfR&dmm!+2B$;|8-vbPNptFoEfL{=A<w=bk-#
zvT~bMtiEBZV94hLCh$<2yV85jc|8Z6Mr=BPHedx^Eofx_aG8e$80{!Z<Q)w1r%s=a
zZV2S!ZXsDp)!pV2tjc_G6~m60MEO5)uubKev?2<(q!*n~%^?4-L{M>|b7=YL)VN3R
zray-C$qFad6t!dj59|~sN$WUyMX)!&{`N9vrp-lE?!3r55e&u>ql#NW+kf_=X9qdb
z@G7X^z25!d$KHAj(EY@`7m&EOOG{P)CU2Z3EEGJYR}j)&UEcwU6V9S#-t~M3(ViG{
zCb5iOC7I*54=@t`z{D3!8#d1uD$7UI0ve$6p%}w}(bqGVx6UL;Ch)`hqGzXrE`*mg
ztue&d#>`(Fry6O5@6!aBp?2yqd<8Ld*j4o_31jnL_CR&&^;!vq5Jo2FS+5(RH+~wl
zpObT-x7RGsQ(?a%!eEI({`0;*Xq%RpWJq3V#1|-u$z3%H@DUf(F4Zpq0tH*s8#iw%
zlb+*hCp(uMu9kV5B6Da(?(*LdW#xz|k5&%`lAT5XdH2%vhwaz+w)dSq*me4SvR^U8
zbFzj2d8#GjI|fB<F-GEU>`T|>+g7j@=K#k7(J07NGKOP!4Eb<vJE_QGh*(w0sK9<!
ziyK6C4UFlVO=<tU2|fA0|7rvBBMhxpUbX*oaenySdh1FPqY9boCrY~R$wtPWIES8?
z$>jCMBI$KM&v@qfS;z!Dz2o-Hc<ar2bxKllR+V<&colt*N%P)?`CR{33>xLp_edkm
zyu)mj?vH)Ad9$|JCGXO6A4fGqLDqy}S9#~h+#n((C4X&)Sx-PS2xyX4I4A1iE)_Hh
z2+(dWjqTOTWaz0ycbJ8am5rj{FDvt?cqaOrorjk-VIP)QeZ9#jGGAtDDs%-yMzLrZ
zY6K6DPcBF&H<MJ8unx7qX{2T2;pIxeHUa`6#7_6D#-rZi=bLk!J>CPNK3d7xQ_cH(
z3(vS*-Pc#B^SV}i0rd8VS1mJ(oA=5rz)u(&iaPc(ruSu-37whnGaX6I71I{5q&HwB
z*DkGCi~DD+J>|m`XMI>9AkKIoH-lG8qjG1K^lL3umM({H>Q}A@&R5w-&8)MM-vQNv
zhyd4F`q(JHBbiCp&6f9sSFDvY#~1Ye9kPk>d(JHN_T3Hc9`D(hw}Da7##x-Cf+i-5
zn~hy(SN}e$bjud(|M1(ykVV@qW!cTo+MvAOf8f||$-ex#tH*EZ&Pj14bopAl40qJl
z=vdjV8GL!~t6eyJZ~l`@KEcCXG+(CIo0F=Y2qbo=JD;eco%0zj`kSPuMEu=Y8$ks>
z3~wW2Chu2l1?t)kK*<ExfBG~qa)6!Yt%-O_vbm_NLC}W7;ih>mz5u}Lq=#hloN)ie
z;9$9ClhNsEZ2};*G~P=sHuuMZB7fS(RrdP$aHg@zo$BG#ta@ReU-$lNRp{#QUT^7l
zQ{&^`;3e>OWhG_LaSUQ$yzT+;I=GCj`a*GCO}i7b-Coi#^6y*_A*q~L^OgNi$Rl}E
z*R#QLv<~c&RvLCPbut5wYNnHd<f8OPvH)dAyVa&vr1Y)?wiG<hukVHhE2E9DiC-1#
zWMyV#07MkBZIx%IgTGf&96M{VFV4urnIHl>dp+T)6gH7OH(xvDf!Ylk(sgW55qlLA
zlij~(FfhfS@B{Zy_m-p(bTUKVa@I}^-)jo6)sQ|=yFb+CoPTD^Jl(<QPM<jQ@6M#<
zMSGnta`xozEa1<3CLO1D>%G)GZD_*sC*Obc2cj{G0YPoG8SqLo>M}H?Xyal#F<%LO
z2j}`(3!ec4C&E74<abV*)A>sqJl0?UfTO&`VDyP!!h(X5EF&vRjx!mq5A%zggh)pE
zSPa1e8G@XPn#@dI>gVaIuLXZoBPf*8iWZBDsZivJn4x0G&VEK#3BlgV7tAXl-q$^R
zh{hK03ZNLnFsHP!?k3i9MeHN_<Kc2TG&s0V$}>l>A6CuRhgOKiB(-O7ZSz|f9}Su0
z#Wa%NReDABc~^LU@Q#T;0FDn6rZ!maAz0DS(vqIi{{CtHA90>A)6&!Xkrm$Y@|(EB
zxS})W=6!#)GndiE%?GVLK&-z%v|<U!p<)fS0zQXC#P9uS>j`5cBlt_Kt*v3Ak?0|)
zD8$e2CCv!}bhto`=;;o(0x7@uFI#7QD!zXxKfm4Id6a{rGh5hDyC+e*c}uQy-?vsz
z(+fOyJM{cJ+PmNV5ldW@rYC6*@Ee&r5hvg=dsFV09g~W<UV`cYc$#=f_Sei#Pb22^
zFT)HAl6x}J`(v`6v(m8sKw^eHMA)A|>j|O(ES4I4xX?hU8BUf$uLxGC2^u%z+<0VU
zB+<JI0xBt~iRFvALH8RMW!_e6ckk%s2pO;pf}C6WgO@b0p|Ub)#5MzGAu@O{4lOKA
z%wwBRu(b5GhJT&jfx0Hw;Q}ajpr>>NHrHj=y?^HUrVamanxlL;qM1wQ%@B4zWfI!h
z!z8V+ODt4R%9abLfF!S7U8P1aSUwNXHwmWgb;+y#gefMNSchzOG4tu#`B33mdciRP
z)Q2Jlp_?jbBO$KBQ_B*v{1&VD#~ue(0F6idgq`RaBO@a?<QcVI^KVloOjVUClRWzL
zYBM%lW&gl@K;KuTOONU+XU6vO6EqKm$8>(zy!2~#k2hz4js3}E|4m-89<#x1rLhB?
z3jQmSd+F_4h?XX>*T%+@I3(QL!$~uYGXEHzI(4m4wDUmPhkyDd$B!L@sA{hg76fPp
zL&ITe=UtySNGGfdPQv8^CcOCd)g}da5dwZvX2c=}Xaw08Gl5(^kv%m%{UG59d<mg+
zfL5^ium@kzTWXtxlxNb(i$Q7^$*Y)bso~df0dG8T^Y~QC2lbpe&;1G^fv7;=kKM0q
zdVl1h<T@%79O-3dWhL+b!A!D-^u^1Uyjp2f9?$#xbvzz}UV;o`rpBp1-9Q1wpW@G@
zbP8Hl`Csn}ag$#^YTb^~FYR|bB2aff`?T3o?Np+vPfMBGyJOd@%;KmX{yBdCoDRX>
z-d4eEg&4x#PbB@Jscqo=qREk@UOCZ>!rxUKHGGNQEmK?DHY|le`RA==leZIuM!TOi
z)wYd@C+&9q)H?MvYu_ynVB!x<k6J3%1e@K_(D@<omWjXvE}4-Kt#Su&A+-6hWO-m#
z4K}AX?1`+SJlBNypsRR|29#6c98TZ6m~%H+dMqCQ_D1ab+2wYT+;#HsU*bV0ivNBN
z4PlksR*piA#@v?1TUuA=3+GjxOhK?&1}KIK_<=T|AK0$M*gdM|r~YQE_qo2HQn^=A
zkq6rV80pcPXJRM2;P+gfb7rn|ZS9EVILV;+>*rtDpTE|Qz6uHM>`u~4<xZ<k?$p}i
zmyYZVZT)2N)SGi6%ZzZ+#N?qJ_;ta{8YU)4aQrmks_XuLBc?2o{iq(xsazdeNq4#g
z=`83;(48DffT*n}OiY~4wX8OI5*=M;uw4Of3lwAg3UHd~{p-bjnDq0K9#En4c#sEr
zBBci)cw`4qeMg3xd%$Yac&m%w0Gkb_4vRNXJNuyJBSUzYh57&20`M)~aoCQ|{a#p7
z50EXZzus>R^FQfnX(|Fv%de(1NE32UCMq)`t|OA6q(ljqk(cLLZT6qSIaQPQKe+^e
zMZHn90R00-EHPW>Iq9RT<r4Y==@4RrQo|<QF7U{ZnGL`<6Wg;54hedB@;{U1K5Ft_
zGuDRS8RuhRPVe#ufEdvCrD`t0+mJ*AsuFk!jtC(Jz#sDs8k#`pv2YhKXntRHgmr>J
zTq5>W$hm)86W1SDw5(ibV<5ZeUESl6MlzB@#gs7u`EV2!14t7zayhpJ2|eCcm(>JJ
z;9AO}HwuXspj<2Atj>jd4<77O2>Ai>3OBnrOF~a>RXb+3^j+9Mvn`^^wd`yQYwH$N
z(J&#HTK@*&m<kv*d-F3M!{97Tjf`Mzq0Krujf1~nr0ou*)W`@2Zm|)_QGj4jt~XN<
zwoJ$!sJq+$ZvQ@{_4#KAo-ib^1A~LR|0}V|%RjV7>?q&|HVBP9Yt52=;=W6)>+635
z#VBo=T12ZOjo+LR43~f{C5izkt{Z0Nrl*5IZrOKUDV+o1g?<Z4PiQWXD^Vd6%_2DI
zG0TMplw`px5OghCIz&SlP-q(%KxTx$pbI^B{`_@bSa3uh@~U?JI#vBzI$4gPDrNaN
z^}EByB>flx^Pj@|3N>S<A2%#h^IdU9e1WvnGZtc0ehdc-oFB#&`gIuAFTqf5l{9Xm
z4Ofn$gm;pC?=_ye!|WIPbVgl$eD0OXoE6*ivGxAv%FA=o_nx)*PF>e;E^RqsWAo8?
z>6C3lb-;_xxE&vorG7IIOJ%SthAcm?<t{eTVj<TE*8X5=-s<YsjVq$d{M*U;=+j#F
zH>wYEP>q7K#DvB-ZXVcu0kCG^5bt#qTYuHkTv&KeLc(vh(Hp8&G&4Yt*Zpf(qd&;}
z54<7TGl!GzP&MQfbG*tQk3@!WbK=CDd;O94H@>pxB`rfkTLzu?io0IxT)=dE9Njtp
ziYdBdtjYE(O`nqWF4~=8gFe26#(qR-S302^>Nqe!n0go|<u7JUC`Qn_cTY2|=z(R!
z_ym#31?vbo9Yl@5-ER%@_1ARRu`7f$^2cM?h%em@u^){t2_*v;XF?aMM{JEo??o`e
z!D{1cOaW7*yayKli*AM(iu_o8dh;^xO)|5l3g|kCPFhfSUjDHV3t2}WnVGJ(_A_+9
z8!t{W%)xd99X^$a+elGHMuzbeW`n>=hOF05$a4)rIR#f_DYS%O@E?AQmb-P<`}~);
zCnM~1xt|1o`V{!3<%-+|p=^oT@+R5Cax>-@?{~*Mf7+6nX=f|-FyNH6%l9T`ze*N0
zQ4S1D0UF_(FiT&Qu4o`cw$qG*D<)1};|3Fc?>?GkZfZ)&y=w+jG#<MrT0MNz9LCrk
zs&91IVfG1@Jty=D_V*uR#0p8@|8nX+cQr#i99ds}l5ZhIO3P4LnKa=<H8V6>-6{L?
zuiha<o;q_zL*M~!vz{IWoO1;G;p=&xCVXPeDdIRuhS50J!?#+toVSWPoabsJ_9K?u
zxNwWb?2LVjex}nnDN~m2hcg~q;doN26K8AxfyNttaFTwnWa3~cFHulJg3)30K5=3j
z9<>M66YZiZ;Hyu94|6(B<z%k^nDbxsg7im~ZZb2rXDllICmo;xn8yja`$IMmtQk<1
z^0iv+r0j&PG4YH*WEf$a$AJ(^K$TeXLk(;^5r6^3@Ct<>0s?-G>abq6IEu%x<@`hx
zPD3Z!JYP{Y(|-QoCNO?;)X3&ii}UBFkKK%EG#gUQ{ym~rm+i@p`x=;e(Mg1zNn-AI
zQyl85(DqeddQR!8ii+nb-^88otbzHz1ycBD$RC<7b(iGxaLE`)My7)p>M7wm=lF?(
zKfZQXeyE%jQviS$+*OeN_s;yMp@D%qi{D)wPhIp5oGa~VR-f>zoyt*laJWB~jROG)
z<YxQsBlG~3e0pJuhNNYT$6(nF`cJ@g#HAX*;~=_y{E@2OT@qHX`JO<FF#hZXhau^j
z<WQ&vapKV?Y?~_0s^8!=(o1m2Ee+{Me9H{mf-&?f-%-5;R-qv14s9^si?S4jWNqW&
z*{NM|_jc}#=nnn7aisr-=p)+)7X*c?<Z?|%E_gPMc>a^IqD(en&>FZ3HKL<fHC4K6
zd7pP^Y&heo@HXpTsn`Y(SGU4&jEbHUG~i(~bF<Mp_%dNm@6zWbjE(%QzBG+40HwYZ
zXjn)Ivih$RC~CajkD2aq=&Dsu;W{tQ!OXndAbhtnZEPn}1C%!{V~Ky?-!LYS=7?(t
z+Mr>8`ePfG7yQNav4t9T*@u>MZ)H{)+`x@TN_(>4m2-=2nHX@Z>B^65RcR)Zur@zR
zZR`T{D2`8s9&Q_1VXlox;yMpz`@yV-gk8ctV`ycqdY`siaB%Q<lS~P{-?&q)I4sKG
zhR6b(STfN7iMx#uMZ!dg3wm#&?a}bwh37t}Cj<_){T;k2z_xoZIN(dca?-IACko5!
z_UCFShx0~|kTE8lOcl6&gyxoG$M~{ijEY0`<gl8VOtKuM^^RL>s-#+E2JNeC1Rm^#
zQzm}sVW7x7{6ikQ9}c##f5D*-G`3Mcbj<`aaf7wf>cE);3qF4HpqG(gG5l-upGn`i
z9tcg;H%!L}vGz}$Gg0(<OedK6E>|^*rgrMTvGO!n8u>j8UW|(?xRIJF3d?Pr^S))W
z7nSdGs`7yY&+TMSGm%#jcqDHT``KW_vf)CFf5Zc>;4J-dGmvS%68jd}hX@z`3_(At
zF@Fv&mY9qX=I7UxADNsa9;*xE{aI`~EL9TV&#iyADl5^-LqCt>FBj1TAG}JQG5$f`
zv0iI!V!!K!q+Q?a+U&(T*CWW-!?)#DJ3`;4#oy_S)hHCzmMdX=R)h5Br|qW~AhXXu
ztT|;-zr2+6xBnB8d*MA)%#{(7234Q7fjqo@>+G-mynyRO9u#(yvr>7S-fuCMg9w4@
z2nStE&ar&!da8bI?kHpRe&#Rp%~@1u8NQu;+F>VBR8#~IVAD9nh%05KAVK&n#a6ST
zz5u2*1Ox=;bYA4GgedH{eEPV!csX#@!KH9S<!m*f8^4Etc>5Mc2{xC7k{26!-)se~
z4BSL};WkvFA4!tQACH9^`fvNYd3YQIG;n|3hP4`cCMG!p!XEj+jl~p2SR;s@VIqov
zVW)*5**8aFGx-ljb0pW&(>t%|MRG%!cLH88@dap(^441xgRI@l&cD2uL-lt)^YXEs
z8U@p@4>0f-%f7g9m$7NK_m~Qy>f~hW__sd!;YFRkff4@QYH_&ZB(W|khKyvSvTueJ
z&Wrctmhe-!O{P$Mdforsj-2;)Bb{kp)ad@I)v-u!Y-6Z7U{eV7*C~GAuUlm>Kns+Y
z|NedeNfzPBRIKgx?RH{sYO11%CXAou>~aNW0B<eOE3qJ5zau*EK{togFZje2poM3^
zG2xO<8m>cz-%jmM<L&coxTSly@UoLQ2QInhbcahm38sSa6dN1aHM+}~MDN;sVsENA
zmpy)hyT5rqdd7oVY5mJ*6RLCGvIaFMyX@fM6%tAkV$a|vsHm$`TJ}DagK8AR9gF9m
zt@^@3%K!3H=h}VSA#m)lu?cg2@b*_?&-vE1=Z0r&tt;GQNd;L?I^R8+_G|cd$E_>F
z#;=2!WXbnbO#CxYtkT$fs)(Ah&yhuqIyWX{l0S)r<#DoH&pu|JQ;tpN_nFJ=-=(yJ
z@=QUH;19jeVag_BO592Uenc?L6(TX6EMaKLS4DTm8@Wr~!mu7vs-pYLdrKBB>mkNx
z&U}A+>jK&B3~2MPe@skG;|m(GnGp{mP)_K2<KGO4EHrt3a|>P&F%D%*^m|Bt0%`zF
zQyT9s6=KJY^P>w24ulO}{)oDCC||J+^lb1jKkFKFr)ZopX`r=h9UeBHzAyGhK03ju
zk^}gMW-0fIod`d%IKpf~AJd0LL`W%M2sok&JfRo->hf*0p79dwyk{>*Gd9lgHT-4|
zp#M#+_2g;7X72vi7cNJT^Uy5Vekn?B_1xWa=rCFP&OlNcvj~}n+wSB=MK`%Sb6I#W
zU?ON%Wrq+ZoOSf*(Y1GKluqWa&<w&PkUt({$Sb1dqw0iI-Z(DmIa40pXr{@zGb#kj
zu%e#PNtP$Av+1<#auOdt_+Gx2Tj*#t{p0ERk8>xqIg1_TPZ5C)%QrLs>KfX2m~m!B
z=WxyXO7R}I`FrAx63uJ59N(@_CO)&JYpr)ncK-M_B>MZ-#>PnOc04x$dQDrnkPT1G
z&dKCHN8}t*_$`QUcf*9xXv{+(({Z+!c)*mBP=LjX*)g3etWi6Ws%f)0L&0GB;3s>M
zJs%zK(7!h9`XP*xxqjeRm3?1KfE_k{;Vu=XsEE-SG6p+utBcQ$KHaBSupG7AkeB%_
zl&_40!twURIR8o`ElK}|X0`o`uErl6vg;S$_<r|JZ>+UL2LKDN5Cv9onYTV>*Ovh)
z;ssq#*qyMT$@UBixu#e}Go|RR60>UwCm#Iv)~9nPy;{)AZ1#)hzA*w_vQ3Og>EWl?
zbAXK?s5U*?&-`JH6|K6pSJUN*d@e4a-bKrst7mK`B)x7-UaV^Iv;}5cRW!N270J*i
z{ZUcr>S)wgLFu_a{+HTR3DhwvkY+j8=5h;mWy%8p&2+kHJC!G?%s9#9*HjUpIGqNE
z8$L;ACsIMllP_?Sf~(4()epNm%9v~I<Ja;pzZ#Z8CrBdr`bN<wdhJy-W>V*;GcZ?Y
zVew3x@ZEFuD9%jySTrde;ewS$5%1pg@nR274192>Y5eN_z^jKZE7?lL<468HAv*v^
ziKnFHsv<FR=RZi-(v^RtI-dFxN=M7w^>_HMm8<zz6`n@Moe-;HqW!jI-K<Y_bgru@
z{jS@8^N@$w=`d%GsDd$A-XsK(o<jne5q^BMwVz2?dk#s^SRc;jD?8$N^T<WphFE-L
zi0>OzrbQbh<2N0=+qsMh%xaaByPOpvoOICUkkjz+WE*OKe=_ci-+(~07R%<HjAu2v
z1Vfe>AzC}hTM~MhNQEIxIkS7V|2La$!Sgp4rG+n5dz0m>^m?<2t6c4H)ouD+fMvWw
z*UYEbvBIlEE1RYFKFjPgeR$#VDpf<r04rvRX>+Bb-%V(zLPvc+Ry~cAmJPpdwd<9a
zG)|uls-2o09_Asa$aA^;c9OisT>bLnQHO7t4#JB7^n32ioT2+4&(SQhSj>sUo_yH5
zI4H7?5LQ2Rw8>!l{a?F+ag*;<I81YKJ6<j;dC$03)3hUiQxMz0);Q@QgRi$hEGLl+
zjv3@9Z+}+3#j5QF$KCl)*uze#H8!)-X!#Yrt_0Bk*nO<$P>ZTF>HbY0;#Jl9<aeDE
zIPdOGLr(UgqH@YZUrTGjK8BX`*1gEiT}LH#NBnY&Z4tC^zGm;*R`Q<XS02c}IO=OX
z4_|*2_F%w37zY|4G_6_7hj)2vrR;o`zShbQ7DT4|Vx`&FqF{;r{(AivNWQZApAL3x
zIus_~;jN%kqBL4ZWOZ!Y@*b3DrKcYc`s!La5iDtQ|FTX~fqo?S4J=tD?E&lJj2hW<
zSR5@a`;;oPI*71)w%O3>Oa$Yp%E`|Ze@3rh9n@`NeQZ&MhyY{?AZx0q?LQ4qLOT)F
zLc{bvRe>=gKK8z;qRf57x*-3;<mLy%r~63c9t(!R@TOu7KmYG=UUyzpr^HrpwMq6}
zx9sj#evEj1(%-Xoc~Z-9<$lj%=vdKcCz;s?#dnfMOym*E_D~JfdVd)rnk@0Liov5C
zc9m!^*~pdCc(eK?+(vkzR$Dvv%$qYya5C^iU7K+rNdj)vV&h){Ll@-<zf7uX9Bkw&
z4k{W%a6XJojSMqzf(3EH&$(70eq>@@F9^Hn-{B+2jkL{Gv=7*tnT!=Wm^4)!jek<J
zp#Q0Yu2|fg$-HUjOyiUXq>&zP*pD$?Hu|8-AAfP6_L{2Hz~t(<c#>LqvW;1IOhZs^
zpQH8w=S#m9xYo3`<~{JiEc)|TO|itKpeU*T`aa{uS&Aef`oRkVSRQ_d`!8ak1d+mi
zS7z|?#Yg3)_j$E|o?uiotq3uX^@pbJmj$@%ijB2Yocc~DsFh|G$&j3Qg*WZBnSIY_
zAjqG`urT^|9#I;VC6pjj<CBlnjk;`qUAyZRU%;6gaou;J?IW`V^T9)zyn)}`M-IO|
zLdHnmf%$|M8?Cg!KN+}VNsVTAzpb8+psJD<Uae*zljL4od*}sl^BB-8uq_ZK0QWqa
z46hp?$WQ`;l*r4>&ekTnp`~g@>wQ@eO&-l8sBE#{@@Kfp>wM3zQnmZz)83k#drhGn
zm0Wa`ZGS%Nn3z!WKy+7D20QAi#@+WLU&_f%%~z*SI?(XQ-DxrV@Q2gx#7+{OHY1bk
zgPe|EwA2jcbqe{$Z!&M|rSaBI4QTg%`HEmE?De7fudYJaB|Ppln*-rv6FWg^%C)&>
zL)e%QA0OFa=i^fgme_{nq+td$xKP<b_uR_Y?KbL%QHDaxVNrEMmT%JZL#Hs#b3!a=
z@^<EOFItJwt5R_VuGMCfYc-!w3YwLcxeKy7+|ddvSt**l4Z+m@+z5o2bbk8YC*yF{
z%XE^=I%UXW$g;R8egAR^|FfIA4l-Qknax8Q3bJ|q*eV@o8|bD&WBxB2jyQbZP5jc*
z6>^zZ4)$Mkaw;BnwQ32fzw6eMm<Tfw`dDfntnz*^T?IN{zwD&dzcb_e;A2WHxfJWn
z*)OuqmA&JkGz+VG@h&%y#TjYD?LG_x9)1>xzzX=AtSW3Khr*K7{F7y3L@*;+rG2!#
zwt5@;C%ypi=|jease2_{aHYTgn!mV45PlbiEK^L;Eq}qmd22-*@GMEwH&tj5M=zg!
z`b10)RYgY(Sz2s9!72(Z)SIq^fc5FIMK{69qVjSNj5wis0WujnGL4}-bhYi2cM&nG
zM@|W{e)+xaTzm7uPj;}m9k(miBDs0*GW}<lGO2qM%qK@RlbRvW-YN2G)g!FM;+*uD
zE`bM8df(Dk(9eM7xYZ`EIMvBVc0KV<71aUSOkMDa-}=2B$^D-#rltVKt<rcYqLd$=
z3ZExh!MDup=P!nxB|9Bo<|0TYAYC9o>_h@vUfF~Yn~|c<%!hUXyYw&6_UISGtq$fB
zp9AFq*tv|WB+D7BHocmj25|&Ee_F%cyJcl~13co<I%t^zMAuIJYBD5l;}|%cPfqd^
zRDk7oorRqO8dE{fkrO@fCf@*hKx0?!Z#LrU_T(VLo2fPx5}I(ffZJ7d47XtXP6V|H
z2qW|*qgn)ejXWni>?qZ~5K|if@p@@RkXE8;<J8Z^>0+k$w-ldUaVWWwYir0d2WV~!
zU@LC1_BnX$<`4BS+v{_4Bw;A`(8a4w$2pR4s>*^FZ&!QV7S%&nrLz)7T4|?E03PA2
zi~qW|mPSsVV%P+UvL>LPs+`avK$o_O-2A&*EJVxutA0iCu7KxHKKs3h&~bSqby>~E
z-nJobvGI#FFH`CL`^@A#nD+Xr5PXReg~Ex4<3_d(k;Iw=KDIP8G)@1^%z7&;p9?o*
zgmcEv`huq?eT)iGQy?TnLauBldQh^Eh9rTzYN;I7NMv?~hciDEmbsrixx=zX<nZAY
zmK_N_a$Okaz^kPpYnzj{zRG8K+uM{mH^%u_+4+B|3YYqsRfznQVj4OzDmOff#4&fc
zG|8n4Z6G;exq|;VceZ-+<6JWXr?l6Vv8VjS7}3W!_$juSX3ozt-Q;)lmdnKAnGEPy
ztztX9CEfFhS$bA$vvNObF#M^Is*ap@;cAynkBM&_?RMYTe>}X)fQtCmvHzD{kcmS>
zPW14q(XXvtL6G#G-4&g#81vt+|L;wb7k|;C_$7X!|Nm!2LLuhb|9)gmuRLVz=7s1p
zR<hqGAIUlI6#N#LUx)fGd68Je|MweK$C$`FmM`|zguGRog)1e0e9M@~$J5xv`9m(%
zX36GH=aw&S9)E%l*xHSPR@iF$wyc}tBn7dOq&xNcalav)b}<$L{h&*$fHETtQEpy{
zmGgsC^u=xEp8h&S4E4fIPN_J#{f3cG`7`oT_N(H}bb5B$?Nafz&T_hW_=3+iQ}O9h
z@$o(i9)X08T`H$d#>DjuS^k;=N@{*hIB?oz#4nTKq%w=z-6ndq)x|5M1^n7ZpJor4
zJdc`t^dg7bRa0QuB|3_KmzwCijF>9lT=%lO=QvK+S6X(XHNYH1NmjgfL3K<S+UX42
z-9Fpb5yglBh<B$Qshz0w|2~P4n#}NY0&hIaHKi!=uzQC4PN@{^+W2DyjD|_Ow2xPE
zKhyMgZAzsWVH!idi35fO0wn)70G2NCYRR@xui}afwK0V~y1L|((HIuj70O?%UsBuf
zktsh|&==txLIT?c;`5VRDQ8_`E!e{!UbQ#*!J6pb12;~D&I#gi9xq!fay~BE&`I%9
zS$oiTX=zsiXxuJutA7CnYyR2-DK_5Q>^kPM&AIYiC&lU1u5dqbyfWx5`7u7<k&WZM
zmz|=~@oSdhwW;U$`X`&2?~r^>SkPPFUe9-EO5Sd+a$(bcb}e_Pmi>8?CcDI~J8SG_
z)c0E|Eq#i-7%iog#Oz+%nR$fHaU{#J_Jv1WXD6tV9uxUQQRH;?WuV#mW8R%;1Jwjo
zB+I+@4s{S*Nl44ozr5RxV5MHem*+M6v!ZbK-nRDa<$Die<{8qd-AdU-DLW4zsAOCb
zUP*X!e|O+l`jEAT#*dQhLn;EZUW^yoqIZ9K6Hy`(^x4fbnk=49FJd_H<5W@8l|aq{
zLeysY)3nnRHFQzCC`*s8w$xK!cpRPoht=R8^~7a1NsUi!uJZ*f7de~8znuDh{VwI7
z&9Ha3F9+t)@qb(!=ZxTT=Wct%=Fj|h;{avGS3jTU<S_+GCidbgOY^_C^~VSUk9HnA
zv17-nS_+0&i4n%HzlHy|y<3vVI=J22lzjH;DjoD1)eV}O__3~lHLz3R+H#}K|KsT_
zqpFJ5uYc$+=`LxI?(S}+8>G9tOS&Yb1*A&}>5^0gq@+W-yLp%Q-v2v>pU#JK7~8$}
zUhA22{-$h0Fn<26@bejoVNYvSa$b@g!GX?3(!_>6HQSAK77q&T7{8s0P+}DNK#8^e
zLps$fnK7@=)@n1{Bt4J3^^e#~9^{onA&8K|gU<``nFV4CYKB29LR~9%ZkenZn}gkJ
zjn3;E3#PmnKk#x)B%IoEtS~52i*8IBCccvN8L*n|&@Oq2!fRi7+C!AZ7sV01NUR5|
zx;`D8dW#kP`qJezn*CmCx9?`Sdv%aSbRbzB{CC#2@C&y-zma-3{(u01)e7jslwYw1
zBT!W=F-Uv}WVpvuXU!i2Q&qu)Hx7tX|D<?;2n75<0K;1NM(YY96u{vVH<(KQ8|0Hf
z3=<Z*vQocBJ97*GlU_9eEDnIlo!kIu9~Y6)n@ph9&a_jf$btF%;!*SBL=zITA%~r9
zAMwOy<c`b2hb&LDsHCvPXx;SVRmtz&;odJ86VK?`3j{o6_{NHu;woNgd6)!~gs%h?
zVWFZp<A!XByk^$E&apq-*Ot91EFndCfj<+WKhNg9Akfb_z4ZU$#w9PMN}}R6;t)@w
zs4->6QSbr($l#2x*{SEDZtbAu+EbGNdIXUwFX<hqXt#;@T#)4WkhDuwu^KJ=9LeN-
zcu?kIEqi9V<bkoo;Bzx!gAfl{=R^dT$X==k-q265whbvf8eDSlJPLU&4pE%`0^m-a
z=c7!VR^$IlF%x8{Aquw2%0B=DpBvasfG(!bjQ3wxiseWu7$O0s0(5f#8w)@Y;o;&o
z>^Ltpz5|mbAl!kn4Dg)j=m-Esj6h<^cx?*+9RJk)xvuN4BWbGM&^SZvZX^fVn|ao3
z?zB!XP*I5rmij4|4wjbv=HJO*h{?MmH{AFP3vnEOu;rL}Q{hE*qZPwuwz$`lm!7sz
zzk)7hBWfqJF2;I=B$?Rn6ipJ|jaG@=gsU>aUKWJ~B_&25Qb?{9K8W6OLXdWjLm28c
zV1X$;Z#0xIMmKiaB9Q`y#Cg!-uqsCiBddun5UuE5@a;@OIB~_DxDE7&V)PNsRs^VQ
zi<oHC1duZdGFe&T*FgBJr55vqg^qz2XBgLR?U2>Fjg`Sn@DN#DO_al=s9#uj@jm`a
zC7DQ#!?zTIN6+Da4!$F2&7;>wsQ4_MYFCyI0)fX~GWM95(0CgGP7{2~G16Ez(UpCc
zVF$zIdFf?TyU~^*%kNz<L(w5}KX(7f$?D%*{+}11CrJ+T8+-}U8&=e1qLDuo^7Vg`
z_;Ah$s3>)-+4L~c`d8tlLy-_YD1M=fM=UQ~ZOI|_Lb%}Iu??LFe#knlVyK7pN6LM^
zh;QxSd$v0;@*sWo8tBWT8{0ef)kO@Y7n_^(LySdL4;zEIP>18jfqUi|W{1@Ct$C16
z&V)nRFn)BhArjE19CPvicJH#|t8U8GyDsA8!SPC>9!4N?(C?6U_$(qE5w5U;Eqncv
zM1&P5g{>GXi$!=<Arze+nM8#^IYtIEuID(SH^v2#Qq%*@iYd`sGWW__!K|VG?{NSO
z?KL~m@sgH56P38GL;au0$wV7iS_&L7e<mp`FF<6R<c1Z%L<+?uU~%!kCF%e4TK+RZ
zR5}+b;roQDNi76^^e!$I@JtV%j7V2X*Rl95wY7EOj0PTdrLS)XCOtd5x}L#-_8HXJ
zfE^LID^Wr;fLHEv09$nlXs|$40H#G?IRUr3injLbe@E=ij*HjJCqbZjwhh!;o&e41
z-zQ_B771{e{viQDa2l8hZgW>3t-u=iS3Ce@dr)8ks$v>|_zxxk-eu^d1ARA>OY!8Z
zF-0Xwt#Y)mZE&rJ`nQX=vM9x78!nS-A&=K&s0H!ojMYYo+;Qq`h;z&D`#8R|ti??<
zn6qCy!@zwsj?a&h=S;~%5D%#|z1p!n6P)^m^tuj{SjT*sLrQX(zNa;g-By6)xHc0H
zVpNhlmkQ@>F-QBEqfwFVNg@8DkJQ59edq0=1^9{Ca5e*V0o1fN;(t$_{KeI~F%(6<
z;}=lPBb(657)MZ+P{>ozRhCJG2c7@rV19u0Z<ctwep%@EigsMgi~$y^fOTM~&SDxr
zIfOTVurW!U4-?-g_CvwO1sr0_=9ns$Q>1v`)s5+`E(8(2-j6~1h4{fsEG<PZf?ad>
zHcozrOHv_|<uw!DX0aYy_St&w=ekge;zE)%YqNQR`NR1p?30cN)=X-0%aMoARgwLX
z2PYCE6xBwdaL!ZwylJCY%pwxuL5<t))w=PW21N&_WoEap1%d=dMo!|PNtTjYOu`1k
zknlIhaCMsqQYbVU*#oF(O=9B@H4jC;)MN|Yu*D)wHYzlpk~-W$TQLP)2M>?P(ua6f
z)n8Koo)Jsz%L_HzWOs!>j}wDci-mbO{eD597Lo0~#@E4ET5ECS#+(&xt7E!LKmLNB
zF+S9MMbTrgx-sp|?(^EI6$avg#%*wVw)j~uNe)41kANp7F;@mzK`>m()Bkj$yQn@a
zkw8);h}d3KdIZzZok?L-wNdAYMfZ$B`-#|75Q;SW?sI;Od?tnP!6_#qs?`@RvnL|v
zTe?1|c|!sLGR-%#;_^B%l}@Y~K@oKsMV&|xH$`?U<nKzyh5{iP0hc`2b5piWBP)5j
z_fUOiOFrb^RpBVX>sfWj;vf_8Yj0j+FEzU-4?K>i^gRxtVDsAaO}!_J7GcFgRv^qF
zx*Y9V#I*BU+yfo+k)ipogp-&WO>E{Iv_HkP$soU-h74FKS}=72ux+DnTAVOe9udUO
zek>+ugnlS6U!ZH<@XJ!tWy3J&jwquPCZ6tAk&UNGtc8=YrH^_0Ld%42vFc@m?54o(
z#VDgJqu{s0Ia}M)=k0Rm`Ri@R4f4J{n-lGMZ2c{agK?bD+Ld+vmLyKbx`B7krldV*
zmti6<N<%OTMNekj8NS6vd@Y)rycA|~cAVav>BeA;Y|%M?1em{&7DLkDyzo(<8m6GZ
zcCH0z5$mT^_8>XzfA=@@9|OYOePNR)H{l0VbzNOBs0D%*zXc_<!oW5Ij<Ud(4`Ne5
z7d8g<NWk&<uaRiyNAArqb&{d`4{q>w19K`M_5G{+jX=T%2GTq8IKz`)o+R_po@46l
zuW<VlQ#QmA6-M{+M8&z3K9WIVqRxg7TFbD;M7}59ooZ8S9xHX^+%P`-kt>PKfOwjh
z^Uwyb>zeiLtanNphH`(`vd=cd7;hN7a_=!Kdm&K{6v}hd%}Z3lBd)ts*bF=b2UD!2
zmhM1A)*f5q0@JqFUe)4C;=W0Ccq=>kwOZ^qVV`5Oj(Y+H_##s_fj_59&Swklx6kBk
zXB$5k?H|0>?_|$5EQVF2XK*}VPY;vWuLb_h$_#AQiD09yJq52<JQNPH`Jg*k9j=C|
z28A#=_tuC?orw(;pL$c!!ie$3C!)hX#tdO0Q{sf64{RdBQ>qPY<{=!F@Se?k7efDS
zqF6Y+%k_D%UAn^DUV*8CNiVj-&JRZYp^V5aS#)YuSb-k=fHKQ6B;4IQK&Xa$D~O2{
z4w)o?hT0{<A<T&i^glb*w7j%Lt%aA8lGyr@d3q?U7DB1hXW7&2>GSf|?dZm3`bLcl
zGooiTk%pXdDlt&7!CpAvuF#6v7ptx#hFnDK<dMH2{|}K@5!=cWb!9E(#moB|a%z8)
zC1@?&Q1pOv^!$6)+XMDn4L*)~d#3HqD5kFWi^sBmT_860-w{;J&e1N5vyId-DlKl%
zg`RbA(o4({`zUXp)GT)o)mpr_3mK5R^h3*olHQ`I3FEFP9ai+x)uj>6SsdS}^Fu=%
z9jQ)4<3n|lNr~&%%q$bbt0mrw?Y-yt&?nw;L-g#&8u0fs0oq7Rjs5A*i`(Nmm(AtV
zGDrvo#Tp%XM{Gg?WhntIesf^t-N~A%uu~TTOf1B7)k)~Vy5khKGMb_swK5(~)wCD8
zbw@tnNJh8%7x!<6p$<Cv%^(M3=*h@*=Ff~Cq{Ll4CS*PMifj&48jbbdblWf3i#$_{
z$8<E|6o|jH9CX9WUt*G0`(xIJ8T+0CeKvS42P_9l*vNzLYbs4=UOlcGA1)@2R=AX#
zZZa|lWFNO?F5S6*vQl$K{sUwAoRpjsN~qIkE?EeJvN9O_0^jX}>4c#bc|5OV?1M_n
zq)*)7F5HMfH6MC6VK(C4dZTCfeDxEDKUh#CU-tQyG)w7oD=Z#<#xqKRPy#X80GhW~
znf+1CAX5@GUjH)@*DOUK#Rf+oCAilH1q8F&V0tM&C#pfnJ41JZpu+J#U{l7-H=v(%
z$2>eEtUjvjRIH~a()R`8jl%t#&4DKYLMnC0Ai(R2f^x8bd>qgrI4>n3fk-K(o(??x
z%zBMm_Xl+ap!8{C2S!apg99Y^fcqUbyccMDz&!rX=nv>6AZ!OY^m+jH!Fv!ah?1a`
zRtIB^1Jc;|ZAqKOs7m5Q|2u28)WgSG@&%lvoC!7IyyrFPR2|q{7`?X7%TY<jQpVh!
z2RfEwSYkGkq`qhB9XF^^@e+PKD1?|uv)m|=BRcCtNz{TEjqyL+Pxm!_&gZ_ghQ(*2
zVq%8T592#;#`?P)OssiK>c$5=;ypifk3>5c1t2!MVIiCD$@<)|8+x-^8HRpQlA^xd
zSLirJ=y>+V$pA;yGkTE#(h1=xLY-gEftg#|l5clwNw==(hPP*2?h*}XZA@71*2quM
z2bKy7tC5Riwk3VmS#@uPDrHQ?D8Cy=jFWU+dv{z2)jspcs74GTn-B$5437@JILcnV
z4%oEv(MZmQQFG8s$|RRQ#xZn98HJZjuXn~@@k<eUhA>vK`_e+hWSQ#U5$3rgWP2bv
zctt#3_P2~o2lu7I5eN{$#JVm%-P(|4)b$T!XFVy;=M9hUIW*xKHBspV#ICM{SFX3N
z8>5mTi%U(5cRX=C-|1LdZI-p0xrac9MFesa2%xvTylRV7!*S?9R((Qp7M3bA4jQGI
zppPeG_Q&pLE$a?Qv)t}6jTnkXvp@Z#`OEY1mLHV}xe1OXx_-?_=nkLHaF_@}9tTSZ
zC1KOTDb65f2JeBEij^4%Llb`1XQ9hYJ~uvCubx+WaNxC>fM4$h^|W3vygO3Q|7!^>
zonC{ap1<NjNF;AU`qCpmIQKttKVB9LGll#}j6X~pZ(7ue<INWz8EjowN!Jm_X=uT7
zeuhz8>DkK8bH=7JouJ*aoHPg$SAY-Q2iu<DCvFj6LTWlnCj1D~e*CN(*xKAU{?#Z}
zv~P)ZR_o-!-#JA~Hp^1`rhXndAW{UQ)OO)Z%J$kx%JAqDogEf5_#qt!B5s!VtjoAl
z<8?;Qk<qqEpO#Y6pw%efo+^(AW3&WeWCp{Kv6{Ow8^jFOl{nD}4XLtMEyn5OgQNY@
z3tv=x)J$x+=j|!l`C|z^^AMVM@}ZF5lcVn4EAWMly#%_EKTOv9b=be5W}2YHp*0{P
zO2eG)5fgY59=~u-HDSh?ZIV3Rw{Oi1u@_=ml#9KscSZBPQ^$!PMDs4GhAxH@tQ%Z>
z>Cx<#5+}6!8TuLuCunVTVq?nRWiGgPJ!%>*msC3Z^k<dNIo5a^5;a|qWz=FEsln;_
zOP@|+Zg*1e3}m-g;QaT=+4?HG8PWeA8Sdm-BAfom6TAU^TlY^V!6qOI`S$H#vC6+r
zE>PC>8xWASw178pK24c+8u)pDogb(_|MHN)K|7L0XKn4Ryu|<<G@w8clxQqCly#`U
z&M|qQiHn+tuwKQOzpV^Q<oRr;6BCW-(q`7^Vf^@T?s{}&x&O{c5!+AWEDs?$MHya4
zG8K~Cr}6uww-Q6W3D<%(Yyn<E4(E4vv7u)tpPR&v<>wA)ss7u$K+8Qk`nrfQ6o|U%
zEl$7_9u698m8_$?=}7MFC7hu@N~2ptJ3Pv&<e+rxu^@fbp~G7zk+2}42bAh|q6Od2
zt8u)~_>p^;84ttrBh}~V3^gH)7Rwr+FI(T$8<H)ZWBX{Tv!hy}KyW^+s|jm&#u+VQ
zp;OY3rouWSYf&&^y!(*%VBWOZBZ3I)L>>$8^^-3yqW+xoa3N&1D}qqRQzev{3^5MQ
zBrt{sh24Y8kSv4(7ByGmO*bA~X`bw|U3nFL%8#mFlTlF8^|;hb7^H;^d!Ibitn=eY
zU=XFeJ|a?4c3cUW_gr~f%zpft$;1%797)P__H(m!%bwo+Zvy?2m0<u4t<s57m*SYz
z4=<r}>%*$S<P_Q8i~?j5Vjj97C(j?_a}edB5&z_z4Qf>DB_KS<EX?(p9|%Qtaglr;
z>L>@^43`81N(yMiJwo}{uv7vUpE%qL8C`q|VfgRp34bq$@>chT;k#UU5A$w&U1t|-
z@EO@GMJJIBeI!=reib36!)D6Y`TT~%?Ga1((T58S;Xu6|iPt1i9eYg?CS=_3la=|)
zn@?6AA_*EXJ%e+}an{~s-@-Hw=ObS3xUXlZ@FHgt`#2t-3&2q{EcNH}-dT|qO3)Kx
z#bP*Eq@5~oR?8t(U2x7sh@q8h(-^0}7jKUyPHRqjGSMM4Fza3x0OTq{o@Px!gXV8{
zw%(t`hTO86^bsnLW|6aT(hc$DdB2s^Ky$CGZdlWKlI}S=;OP5LMjh#Q^?&N?@K}Xa
ztc|5LIOMKu{Kh1=-Xo~-LtA0vbwobd?@FF{z%SFoVPV2Fj=9dDeI{91P*oUJ@5b+b
z7JhS&)Bl~~Rp0)^*&w{o15ud?ToZ6M<4C-!_hbOaiWKbRd^k)l81JR%lcng7k3X9*
zdd$TY1ad+Sg;1*xh4znrSoV_LSB(wtzLy(;h@B&^Ul)532UO6Bw*7B<@9mWLBXY!V
z5p;MK_-=z_SWYx~5FaKY@Evp1dl&)eGz7>#ziK=hK=2F1Vu06*j|9}t9W}%h0p>PP
zOqrO{UdxMOWIFGprqoKR7ee5tu_j}`b+!h=bGWndA@sYc<%Kgf)|<2MuFw1!9V!mc
z)wx6#`w1EEAd-}Y3L%kYj@5;g!Bz_C?&QRi5x0oZQjNva@%V7=`b}LJ{zK%g8-ET4
z6r{8s(*K>ZrO^ICkDs0>>?)+2^qY#e!(g8(n<X<xBgGIV3N>Enh!O%iJXEto(Gkmh
zC+XrM`<}iAPpkcj6nI3LA3E$dY?5VwTDR_3{X@N`yMOTG(7d2h*enSl^kNJig+sY1
z8oNY=-hU-YtNU}_(UZreA|q0nV-58E^2EWh7+_`&Abq&@5Sd0mt(DQYnw#ZtlV4!_
z;v{8%kwT{;==Ie?Laha%YPGkz&O)oj&rC{eP|fcuPuC~;cxmdcu#DQ94<2nVM_}Kr
zB!CUJu#+^_ZE15l{IDF`Nz51bwWRfpjdO1l?q}wqd4{}6rrG9=(AHZw2bh|L2p^KI
z(@G23!%_7;b!NIaFMs2nBxU<T$Yy!Jxg>MEE`b|KHa`I;++wKkS2Zn6ij|NC&f!z`
za-2?RirC)=;X~1XQO*W;Y^tNFByreW*sKhF`TC%DiQrL-sfWBt1xk=$qAB1I?~8^p
zrBTVE7FNZ?k+^W(NX`gw+T2qe3OnuR;4`N8Cr?Yg;hp*@9rU9~Ot%PH1M}MPk|@g|
zC9HjAGxO~zh2ZsA6<Xzd5Nix2r#y67l}vYan1)nDp?6C*Dpo1mZ+UzeTNz(XqJ=s%
z%VK!|4`Cx1nc+3RcJ$dFx=eb(h%4y9$bX&o=6KZ5BGCCe32ODepLO<l)|F>G>%bo{
zSuE+qCDyh$x6w)n3HeD{Bmj?NHctcgmuL@QS6~W}QI${;LFC_{nXmfg^kCj#fmy$g
z&1@1@20vg?C@@;Z_CP#L=Ay8Z`9DqTXOZP-4=BRg8cqR5{~k#UVGnTn%mwbezo++3
zLk0aH#8Xu3KBIIDO&e3MYYN{dZeusMDT7`4MnZw%HI^HyU`w&1wKYH8cgqMDOZ*uw
z5`may!Cx+zy$~w(2pEqGqF1nTR0@h_5gW>Be%1WD=Tw~##~MU<feR<pU8My%icc3<
zmEtK_m;Dfj<#_y;Sv@Qg2$tNFlLw@MjmiubMlCe{V}5Sqd~!<kQCyP0F1@rnwbw~c
zsNd}q7)_#29TCe#qf@55@#te7KJG?^o770s#XNaNQNi#)RzsvJHalEMe7cH}7JT^*
zMNlJ5*P8v~7q?Bh_DU9Ho#Gls2e0=?cta;O^wT>SimT|&cZF<3Nim0r(9C!Q5Th52
z*`_)1@onl2l{2svWGf`er&e6V>^TyLlcRtr@BNv6xtxAO7TyhG_3K?hDq!Fp<Sj?-
z&o?7MQM`i82(&QCzx4hIfW*RYL_M7CPPiU%-(K_X&R4vqhT(@vHb88L5woWMVY#P=
zlF|9$vR~X-TIR|4w}Z4I6P4Kj)t5aAz3%fE(y|bnFiVKdPPiKhlqhqLHHvzB$wdb?
zwD>@jCnEGZCVi=6G@{rAvY%z$<Zq9SaY3rx>AXtw6P_$_QNPT9wz3NrR>T<T`aLFC
z0PwlUw#{yRB>udhkRL!S`VDUL8yuBhQ&UL#mwXhHva%A(*LEV3LWw>&N6|Kh)XtsA
zB^8`yw1|jarru~aI4N>F;UZ#~sv24Gq+cJ+mmcWOZ^$>l9k?vEfERFMW3oy7QW*8D
z7f1p9yUAKFBst*fxs)$_gmjENXa4!oLFI4~nQ#tf--agh*U^(3O$F?RN*6u=AOdBZ
zc`DXGy{5nf@u=<oxs}RF0M;i$jY?0)AzRt-1G8s3Tpk3XYig4HkE!q!!q_*D&^gEw
zzy8DrKarjYLzlE2Ag4*2eOv5=se42t7Yai!bi-WYcez8{B6HS5Ry+Ir-nJN`9#JVa
z4e?uYf%g}OR?D+NMisj-cA&`{QOomxZ8keY|Aw*{suCVK#(V<BA}p=Cu)`l=#RrP~
zi8NYYO&{KwY^fr`uKez-<Uq{=vBiAHk3cy(S3z7VU8~TX;MMz#h#v*}v-*8r4>)D#
zG@lvuq2$9new!e{l8GgfA4~`kd-CDjl6O#q<nNgZM81lCExwz=sYnr<3}HzTmw_lG
ziR{qXLL$o~U>4d*NuvwN6W?M%tM?h{P;-_W_|2W|#FC-7`kwtbDYV4wdL%IkkaBv&
zcMEmZbh-Q5dU$Y3((R$|ofE|>GE_CO(H~5OJ4}wQO4~3mZZpBPK^C33uf{#x(^X?r
zAxeewu4O%Uj*=QXf`SYtaXJBRuXXaR0+wSmbfZ5f(G7{~(v*hF^j9M&S@j4yxGpYT
zsFsJAU0jHou4_DDS+N`%{3~bJnIhX?mapuFuD{BDk;PQgim!%p@fBFj5>R=Cu5x<o
z@*{zsP)g?|H{e=;EEuVx=Sv*e618qs=1*$;PgXy$AHnG{2a(k?&>4K%yyygQg#0dR
zA^$K?rDbJ)oh!iY59}f3(@0|K>C$m{pe(h!9e_>&^b5?kTAWu`0O(ik7k```NU4FO
zx~Z}8%0$pLC(l`ObVDoWJ2nbI{5#Nl#J=xE$x^~;pupiVlm0#m3%x&8S#ug`xPg^>
zQ(fT2WUD1<?FQqQ^Ek~Ld?}>j5V(u_tJP)J$@l2&IJ^9V3?yOQxIA<Yv$Y<Tn?)g(
zg?Pm8p*gz~otGzB7$bhxM_R&C*6UJ0Kl9L^V;LmByImAZkaZ@8;1Hg4#nU~99Bglt
z`6p+is7Fjjyo;;f+)Qgpc$$9dwB_he2^$xB+zL)M@bTdxK_SJ2uo<XaL5r-zItNyf
zmWml*ONh^ZG)ijnvpy1&*~XeE7Y^D@xc8I6HYBuhl;ZQ*vR97?vw?Ep@%YY;r1jyK
z)7?8QS)rg*iWJL`8Du85o9HaI8YbZcQVry+aW(=FQV@OmbXjWt9XCq|CZ(Ww9ish6
z>?{)f(urMRo8>wq&m#+=D*TIS>;voEtxE2TJZ+7gMhFih5!CU>8}Q}YvfU~arS+<u
zKp3j>&sV+cZ!(->^RI$FeXU^fjC&t}br3QWc$k1fH5G@o9OoaL5NoD}G4bkXiSH-}
zhh(=Wc5uFnz#{ha4&gIdQe69o5o*}0m+sDo#I#BQV!mfV@B8uQg%$4w=tBjB3XK71
z#Nx1o;<EX)tXmn2%LJMk8?J`G%D?9Y@u+k$kv=2RQ@q9b#Cq80)jXlnw)12G-&`N}
zeV`|2=V63={b%P+KFARZ++^<vArKuPMb<Z?4A0C}oCm%}?{n%u9>6W<JfCsASyy&=
z7>*XKLM3?<ZxKB2*$#uQC}*{yXE$D9=n8}TE}-M1Y|vPZqKA2Gj?b(*lqO{aCYaMC
zmXqCGd4O2A`}Q~kK$*Qv+kFLsW1jblZxTo|0B_ZQ(R2Wva}RpltBu-$w}+dR)fuG1
zPP$*7fr1IV@v{KN1UL=<qSGLY4$LwD-)}h339z|<g1TQ1Qh~p8K3cyXz#?2w`uM%e
zN<>$}S8qormuTTj##coGK3a`;Wn&0G%izDROUm$PyMjJe>Nlagx(?`em9{oFu$Adk
zt<Ax9HMTmF3^?XD_zo`-OCkctrvm0fvBfskJRdUTg9K>W5#eKEDFt}WyG@g0-H3=A
z7UU<W2zhFX9>t>J6}-;(+`cp&Y=a-|%I3=AO4|_k!H#*~e_VJDi4HqL2-k?9z}aWR
z{i#OJ)N8tknVQN#A~u4zZ|5qD<=I;0MaTW&w|20+T6NlKn?%u1^>jJoOj??RjnHb}
z=pzF`Smd#>%bc6N$e&ElbSmZCMVMuG6L8JN7Q3FWax*RlGtd_g?CuUV(Z#LMz3A{z
zj&;$G?Q`4Uxwsp>@sCR4))`NN*f3rK-DOjZ#P|=F6PG8ikK2X0xYaz9d)OPt`>6NJ
zVsz=H{z7><m)GJ&V>?p~!rZMSweHJY_jYwgD~5ZL_fMJ9?;qT{Q0^#cXj)gMdu(i+
zNnvwTRDy!zBFG!wg*Jz<)Xw=o7%Z_-0+o!X50DS}ij39z3Ks2{MRn4Sx<5i=oi_PY
z==xRP(P*WwyR~p1yQl}poWnTsj?*z+b+q$caRiP94wRpdhX-_3)R2~*_D0R*y_h@P
z#^9bzTeIa86c>*$X&9`0Y9vwhSZrhFsjT{3!~f{y<!w?S$JY7udpj29a7D$D8y){W
z0}jS^`<(0M@}bz#V+RWv-GA=Lf%iq$mZp&~^0IyMAm;P!pmq^>vq0&_Q&6`CBVfEv
zDY6=UwVOE!x@iZp`ryzIh%)(tf1>x3Nl~bTsp;t+vwGIRV*Cu4L(2^ovY>h%wD^FA
zZ8p$A0P>3<E$lJ|Vs#+x2q^UHzA>PbGl$Do!_IEM!H-soj@|hC$I*z9j%(}UVgk|Z
zO9v8_F!6nRz4oxV{4zI;xhEOvA+N=lK^a4i=m3XR2{&XFe3l;pXAD4ZE9svO!nt;2
zx{N;AaNAIRjbpjPlhG-O8&6W+XKONq{vpET$Y?ym-S@;-gkUI(Gv>-Xik@}zX|UR4
z1a8%0+;;2QtS_>IKZyqAOe!T=iC)T=A<o!hj*Nq&JyAl}QEG4sl}50Fk#07wPWt3g
zf+a9~+e@-x!G*6PGSx^c9g8$>EZl7EeIK6eg7K)rsnojLrn2R{+{!(ZBHk-pMK3aK
zrUW#H=ZLw2l9v&~0N))u1yT>j_kXFd-|F{;27V%JT}mPCqeQ8ZSNa<tciYR-!AV}1
zY1}EUuG8O+@_TW@?bz_c-P=(kauHi_F9wQhaotI`E*W)`%66MH7{e!+j;!ilBDcY9
zZCNx;t4*8-pKfC-5h<KJ&yz|GCg=3u&n*OqAiZxbv9Yu4A98q%xt^5HMT?GOq+gH@
zWZ^+@;(dx%HyrOP=eGreFW0uyyZJNzxEGZlP_<8RzJcxgHS%xa+Pi!HQg%Ck`sC;T
ztUX(8dKMt_akQ0FUS2j=D^v5WIJ4A{X@iG<aPz9u*OXM5g*RToq0^COMGg5Fnf2>W
z$F&|2(}N*qsG+<&DS6T_X05_sh!}Gd|IZ6>KmYXu9BYx}cw5fO)9%}zZ0qxD6MkPI
z3zb1^WwxHEnU0na^tU?#ZvF%!QCKR*!TISP=W#B4HcsXTv@5=(CY-#hk^LjR8pe-t
z=Xh*)LfobV?`W28V2u6JT^#O*)pWYV#mQ;&r45BbA^y#rCs@U1^SZ^w4Y~v#9L_?$
zeZG9lmfPlE-9AU5Rp|*8tUR^eJ`7b>J2%N%ah*JV{L{eqk<X~wpf{J5Gk^rUJT>RN
z?UWk6-W}x|^{Kl5Eu%67*m~4{IeGbPkw8BQ2?O~=kVg7P;seZbkO~7S@6vyp77(8N
zSs?lX)b^U%T2P4d(g~;<-wCZj*bV}$dH_2B>XK){Y@yX<y$|GE4{F<;_J3D^LB6^I
zSbQM(^skVT{KbYz%CSNBF(??H^wkDII#qYff{yzl)$=W<Hmmh7#E$o0xEiblPdZ1R
z6>7UuCb1j7j>RHnW(rU-Mx~VR4H;`;Jmom%%E^^ILJDe%8qftJ!er0Gzn}#F6g%iE
zzx5FAjI+6FIJPhj3O_L#W+*rF4w1>_A~%vX#76E}fBQ1Uk?oOa%x&^?R$kqnY#jOJ
ze*T&;E`P2(uLMjpP+z6Co+67pggiK#&GEz{{MxV>6R^9UcINv)%-)VH?Ndy^ldqG%
zzV}c8@YB(i3ResI`o!-;O(~|BTB55R0#5%-YaNz*i<TKld7#SMTJALMq7@eppZv(?
zq$eBxe5Uj*V(4WcKkt$_c2QXmiD|{`0so_5^k<u25lVKHzuD*HCE4Wc-3?ei9g^H{
zoo{d2q;+&D<%`SU{n_eSXDY#&w%+q(`b`~N=D|vr)N5xkfxhz0q}R#k@Aav*eHQi+
z_AqZ#wM<^R9kmXP{Nl8Zfb+x2mIrB%#Pq9L+Zm_avHlT@>~k2hXHI1nR5Ec7RDbV=
zTR1zWw&#|VD;jJ~0*<Ke&OwV{^9SqK<t~Nkj>Pqh0a6N0Fvs+bZZ=_=1*WTS?lCs2
zFBr%M;!j+6&>q+EWRzdCXXh}^)~<Elv~DB3{sgf}%Shy|4};>V`|Iq@=|i&GUK<C=
z=$BUzFvw`yplN<eE)I%N>YDG{uqNP3PJ=gZC{_k46+s?r-Yi!*VIS8SE!#SD&>h6q
zo5PI+9pT}+(UC=4_LGHdtyke!$s(qAzuHB0UP76iKNZDl{-mHu$g$%I7%vWXIUrph
z3AeO#pOvmQ8Yi!{o_3hx_*`eP>}Fmf-!S=S-cjE~J$U;uY%xgOh(+={W`Wopl!)HX
zcJhl~;R6GcOT!1O^}A?JZE;;sXHxE?d)3|Xg5>W`>8*^TG;D?=t0h)^Qzm#*4a78I
z>~&73ke$ATVQsQAx0Vp_hgM{s{wkR7&V}lCX<O(Fe|u8zUmoO=kE}C*vFd#P8Ar4|
z{8GuUH!k?E8XBCm62D2gRTWEJlR257NVyMY5rts4=}G}#@arddb72`AUBrKTub9n3
zus{I-5ikzGI$ZZ^5k8TYl?8PA$eoYwiNLVD;1JTkp2g?7@makDc+vw!OdK4lfk#fi
zMwW<>%~TB3wE)5fF)?vTaj_6t($i_GD#&36Zl%cnQ6<n)0HM$Frd&OB36k*sk%6Wd
z{ZYY-yDb<p6FDYbitmaq(|02{-&HC1T&xOaenR9_Pm;MT8(g;e_g|npt~gj<$RyJ^
z6$;oj2M0I3(msY=xvVIl{E!^dLs4xEKE=B@?$?nv4-Rt0#t$xtcYvU=H=q*7@pehl
zFuY67_+eX-@?p)vOGZhp+b^oSyC>>z#|`~8fyTY-73=S|rjY||e!DH7podo%1?y?0
z)qcp%Sm8ql{5n-=RSW4ZTDJSA?{U(s3b{|lcM@a~XOns2o2)?3+8CJz2V$@sTAaue
z$yKy)bx^_0w))%egwyX6DQWWTgH!@I+C~0u;_r!uZG>8Go%b&ekN+*f@Hq;S_%X1}
zX$s4`+8@zG6}gC=n$4EgI*-1`i5H%Zdat}bOrF*S)B5aW92lEGe!fQ)l|aiGon=#o
z_F8}^bs{ah>ewenyhOJlYsX18I_9W<i;2%4Pf}Jk{~{0l9ND!^K<Jt{HcPkcvY$AK
z1Mzm@YeEvQ%dEGnM;fz56Z@`~#*`oqj!vv`XWKb^<GC(J!5mqSiL{5ajPm+n93eO5
zE7U8ggS|{lu9*ktrqj?<$NqE__Hg>s@3Q-rg9GeG63e>f+AJ26X%S%`Jw;Yv-`tCg
zRE!5Fdgqt=k{>G}N4ZWE@COPs7>l(nU6RWhdKfq*P7HM|R~!9owdFI)8-gZ)50AR2
zD$jZ$smeV4`9v>A1OlUc-fsrOp-9m0QHOr#U2F@fqvnd4BP(F-{GGCYA8Wn`ogV;W
z%@HWWc>q#@>t<9V@yO21aFKjVq^}wbowf}nH3Z^}&GyE)P6qQ7z1T5?$Fp}*lTp_a
z%|}s}$nb<FQg;wZsooS^)XfGyRN*B|hn3{Ma&|p4bmMMtW!CFiOq$jgSAvQR_uZEL
z=!=fWwAg&QPCVWg%Q=w%JLZOx<;l<empM3keN*lS#HIhY9v{hgaO~fSSIDpCauI?{
z%1x^KCgn@%>8IwFYp0s@o}ey{DScuho<9aucHODk^=%C&>?By<%S%ansz^8K5^#K6
zBr`o*^?gu}QhC>>SNVb&)DpvMY<|Rn_d*D_Vs7=E?7s8MTWoF<7aWfwFgI@1zg3ZE
z016f8is&?U*3g)mtV2FyG0m~^^!z%pqo}IdLeiPM+WiLS(+1?>TFqENk&}nm^~~~b
zO8zpXIt{^E8pauIO5Q3zteVNt%8^!v1^p|{VU~f_(dXp>pONuFF5h+6r!@O2r<V_&
z*B@3g;IB`5R~3{<7JOfqHAhx4VrXbfm?Q7$IZl#HTh3|QRqf7152W|!A-S1$i`46I
z|F&qsB389x?RZa0WfBTOxH))gx3N{vw`p{fS1R#fP4Kk*D?OExIIF8V;>4&hN#sn`
z>1RQI?J#si$<*z*uc?gt;4O{ka>yCt60@+XuxDI3YI`+l4Uu1Wpip+iE%o<Q#x1K5
zQvdWWHDd-(^iOwmEXP!nyo$ZSDhrO~iAoO9FayQ5%R46%6tFj~kPtt49vIZ@Dwyir
zx~3JWa6F8zTxf~gS5U%M<;rezoNPIT+ysOUO7zM*wt4Dn8vV>a`m~drA+1S$v;0%k
z`0rd2r=J9V)lVoEWrr6WZ?3jTqfb4*$<0sDjWnfu>Wa<fCcq#>lF{V0@o-t*5f_gM
zX=DrM*OJ(tu?FYj;2_rr1crr#Raps+&If69=c@+m_F93MiRkIlR6s@TCB^mqx{fK>
zvhlvGklu~@t<LG8iSM!3b8FUQxu@VBGr?9*(CGD)TC~Q%_q2Rv1;5vY+xAvF+7gt=
zae@P4{rE!P75#X41obuF{yb*2sUF&<pvgNFsvKotr75Ry!#KH(FM~&pJ(Kc24zG)x
z5+-4u>B9T#k&>mL?X>vo&ngmv;#WeANmm>^i?0oioIFf$gM97dSulka>>QFjYeSoq
z{NZ25hoAqxJ6wFvoANQNnfLcxU*ZJf?Z`>oaz#Oh$<w+OSzuhs=6rYDIR`0fMV2md
zY=2Qdx*g*}cXW<|1f@AxF0V_Mg~8ppXWV$PDDqp@Mmdn>!AsLb(Sy13Vci7_QB7TD
zk8J<oi`nO0-Nth{`2(-nPna9?UyeJfkx!KahHkebUbNFQ!L6lme|8M22oNVjVe1Ei
zE1ZQwJa4&RA0xsD;I;h?I!c5cWfZc!&r%bATk$Z1(*wC~sm;~dM%w+*oYrbkG=Ma`
zkBdvdnbMZQca3<XT6DDz_blMVtE_}ASrlLEkGmq9>$nrk<vAX`Vi8|D?W8X|n}6UT
z<FQkyx}{!JrS8r9t+u_A`huJ~!A5C$=H2fsN-hE;<E`2#$@VxCql(}=$-gM)7pq@s
zzLFjh-1KK&ugjrOVwbX^cZVCTLTCjHr#qj#5^T-wP$SJqJg)?rHNR!bz6{?7pY|MV
zZFvh8vrWIoiRG&L7SPny7f9T8hJI5s-Tp{(W7b~e>FrGJd&`BcBP@EoIQcB*=ye7a
zzcgV4IvBsRopK-0k*j&Hj+Z!dxPr$fa_I2}qvx(AB3ef6HBvBJ(#%AOh)LPP^+NiT
zjzoZ?zPl+o@!YQ<B#Qo?zqG=+haFB<i3O$-;<Ggy*$9N92^ZTQPX=+|TDyL{3}J|n
zxHOFl-5jGWCfl^ttU%WsDZJ<pAWa!E;+t(5f1~Q^;X-#vkdku!@WrJ6OM96>-Z6oN
zZFj@ik&%~tg;Vgtajl|}vkpP!LvXXG(M>RCdz>pt+igNL-zi)PhAPcLbDo9qqs0xl
zT-qfU8E?D~ipP-qr;hHw<o&U&p8N3dTb}xi*27&)aHzHJTVPY-pO<;wmCU8+A(B5S
zcmm7nChYj~3|cxZ=H53KDMv)(m>*=3bv%O=y?3|RkNa8MgA<P@bnb>xD7&=i2ewsj
zf8O~}Aj%f&LJ{z!XS-*6yvc5R*-o=l_M|c3T~{uwM||iueCYfAn{4ALb5HoRz+mPE
zcFW#s%<oH?4QBpl)>RWk$^j%*XkCd*r6_TY)O=3L^mIniq-)f&3gjp@OV}aZkVb?_
zqMEn*&15{+x{7wO7EJ`7tk~UOvAdd(!Qn}K+~vYO&1)j`C*mR2$41pK8fmX{ySrNY
zmO#F|q78eo+Q+rfk?~%$ZQU*CnmG0OFC+sR=FsG*yaL-$nVVO+-#ezfC1vix=SBwn
zDc!vd(7OAIbjpa6X;+7WOLi8&eT;tb5+E-4-8+AJq0!)I+n{}w)}Al_XOmvq>WzEw
zXMq|Fif0C%Xd=<<XK*^p9HW<-D2Ko&m~WT13fo|7PVZaXGQ55a)p;I`s8ISidq=?u
zyKhe`L4+Ll!NbMx+fUN6T>jg<2#F)wd?u|M+reehTUL%AZz>T;0&A%++S{@7rJNh2
zQr}u!xbm3p?PJy;MZSi6i%990;Z#tP4oVh=cP^gj{HX_m<7YwLUz9`<=-n-e4)T7V
zh~jH5@!><SSRsG`j`?|Qz?$n0k4xKkaR=+vcuh5~8*mkGQZZ%_u$D1os>hHiF@%Zt
zlh;cQbHV7B^lH&ZGR2Bhvy^`{Gic_z-MjB;Wnf$zVne)Td5chk`>wJKx!;cVHN57O
zBof1DeZ<bO9wUY>6Gqgl)NlquM>-DW#pmV9ul!m0RV>;Ddt{q&KcvK^8Hhz@UC(;s
zmrM<z=}RT|)xKVtR+OGol-bh0hSTJfeD8Qvf@38&qrIqbL-YaR%k;Z4D_9MVc+(#E
zA~A|JdvzlfU2WuPuF5e-1O=_Pa8XzaBZy^|yRERO;j`KX(iNq(JjpqP$aR;<A?i97
zOpDgv2Vgat-WJa2f0JdPPR^IKkL$E7udJEX&dvTvG=5mR$dXg^L8CXMwBxsiu^mZe
z?=fPj6}%>ghZYw-8sl0EDLs1tOX!xz%#_1&Q{HTbX;2Cc0h@x?<W$6JId5=ZQI2VV
z20f)TXp_0nW5UoyKP+h`DmS1jho5#I-_^at6-T6ewg7gxIe&OjZ7MMeOdBVs=Wj-@
zHX_QtqQgS#9{_lFer{~(S9G}Wpi*!Ipb4P?2MQ8$@cdbLs^LMre`5K;qF+VrN`;Fo
zP+VLy344If{Sd&oR$igba@YR!btry<N8g+x#;I4-B_jAZiIANvGgqwV?vG|h!6S#u
z91Bb4%jK}1mS*_b&|<=ey5^Op9wh~CvM$9Mzqi@-=eV86+>SZVi^evjyzkYLRki1h
z1D6^DD7Dpg1s)|49$24%$7;<5@};>k=)=(rU5zs<1-v;TzXOY=bG`JSt|L;fX<=z8
zT-;Z88s<_k2wGNnPz-LU4p@DKK>IC!c7!v0$7pm-9nYkx;nLA_LiK4UsQW6GSWXE}
z*{H^GB`xD2n6zUve;ZV>T|0TdTnm&scDe6V))3by7S!8(rrP@S_q-rCjg^8Lp;Qe%
z!84$K{%(LYWBJ{H1d2EUi?z|ae9q_<x+2t)>H(j%IlpD9mn}sWLD80=dZisYxa~(n
zXMKBv=P)~I7;i&|@1Nk`w14$HM_pDc$Lrh<xDpxbMgH(nLecj#)at@2O8$${`Mm;9
z2ByTL^}K^@P0RC)2XW7ozF^+ZThK<ZNJFV&qi<gWc8eE&ox<EOC`!`~Am!>K1Uf8z
zZUFs0;+EAaDkc${ge<B5;HZAvR1rG??aSKuxSNbG;vc>4RD?QeYgpc8MPqUasIg`i
z^^4~|cZ`$|{mJTC;zH_Dn{Mq!N8Jg;Ywy2HdMM#^#@!x!UcT90H{rCb6nKEfCEnTN
zo!`7_bmU}xCVyyly>q-Ik7(5O3Lv|hGVxufJU=IN;pi#j?znzkh37JJ6;Dt_qVf=`
z@i5h`X&@c=b^6HlJDmsCFC^Ic<`kW0nLDzeAl|z`Hq?VZNfyTUXrWeaZhM>|44ize
z6HTWg6A>d@xTXz_dloDA)c8M}m4sJQa%3F&C(j&KG20KhkrB=gGF{uW?w}r0Bd>RY
zRqbf`J7cb1q1JB)=6A$>lIZehveo0_6%;Yt@cC<IsB({kjW4T7sy?a3E90mq-p&5e
zJ3IE`n>z}j(8oz^-$xZ(hrbWq^_JVM?kG#|<sEbfv0K=7NuGDiQ#EXCn`@1+E)Mgy
zw$fH}Z!KyYslcw@M^GC+<F!YOGyC$-v*WQj>zwVg<FJ2<NYC#S%Tl$N!h&Ssb5#=*
z^3Dl==QKEQNE%ht<HJC6ul;*zD>WEqb<0;=e7BxN_>6ZK_&yP;zIamMpn1V@edIlS
z7HvL#RQ#t3C{fF6^TWN@rRD_Rid0wv<G4@Uj!yWj(JS@}34&r(RNl@7M}Ig;S;cP;
zzggJ`hZ}DV8>^*h!bg4#(wJ(y!E2V)-rqIPn!ydI&w8hLyIjC;jhAneF5X?FTl4sg
zE~E6iJS5(sEPtXg+V{+LxxL{0`^7xDi?z#f=_e~0nIasTuS)w9rJ@vq7&uVfKTm3!
znb*jjISz5mJJXLWo~I92M-pz>j+GA4mC}lzzcil@KYjEiqy8Q}XZ9TdM)ln_uH}ve
z0Rdh*6-`{`OIUYG%I3%k%CA4gyn)rTy-%WMm%6CoYB6C{9Y}mew@<5EPybF#t7``8
zivKs{_=#k{GE~{HK>e=IzDoCA09Wu5w_7Xl0Ve&>tRFdQ!t$2S9K%{?aPZ*0=1ePs
z-a|^hUTJYj_m<T9Zt7szpaddn<P1^m@7hs}dNZ^@S$01v!iAJ4i_Mt|Yu9r*9`HQX
zCqbX>l$C(C{wS$kC->gFJg%rD%jeCB)8TaoF2ekm*t1RkD`s<a>tMQa_G6C~pLk_*
zb$80Wg-hbmrPcF`7xlsLm_+jhji4tANtx!E)HzxPHZ$Z-Wa_~amN(okuxUZVVbSxx
z43W|fPw8*#*sU7Lc;)$xAW@NzUH6A^GbO5LcQR3lGNC*p%H~_JZYT}SxoPHKWoFgC
z=q5=)KJY+!1<4dprE1jcj*M<Lk{2o;CbG6||C8T%kxY^Pqr{z^tVOAcZmd1Ky!T)f
z+0kruAps{^nF19DAt=!g2zL&2X{^5p^^WXaDi4KXWzTz=w8$u!qU&@EI7vpPq})=J
z()N)@6+c&*_qqq1@~m4rmLkG`W+^Yt{tgfQBfUfR_%(;SAhX);^{?`*uI2pr90i5u
zb>*E>*#@%O<Wf{SMq9h}9<qHqxry)VTOaklmFWxL8-$&18f1GxO*S~PM22l8ycUkT
z-uxoCJ6!-dTvYCv=ps#*92CDA2>jA4b={!*JCs9dz9~<eoL(tLfmWpGf~uQw$*Wz6
zqda#K3=tS~UP4fetxoxnAEw*0s!0s<>%7_`!HY{(m?7jXsG9|6>!l?Jf~dKJAETq7
z{6^Lo4HmlH>!`A>Zccw_V8HU-J6|EvXH7s_`|7+Z(!J{3v;g2Dpuw_#(caDuG)ofT
z<9l2F+dhM9`tpUS)$h%Yam1J;{czdW$G^M-uhr%Y25=ybQ-37jS`A65WJ5Hh1}8T(
zfBeXzHIk{<n#&bDeK_M*@61iYv!M4t<f3PD0fxyqvo(yh@ZDW>2E`B#7uogVgS&A3
z&6~YhT2E7ilLy2*jOb{bQJDau>`tUQx6e55E9`z(j6Y!0{6HndBYei9mGZZ}Q5Naj
z><zIZ*Pot>z#YA5+T{<caeN?9k=eq=UBt`PK-RKMAL8+W6>X18TYYz>@%+ein4H(Y
zpyPTSY58|VA_ANV!VcJ>s)3t8=;5%572q2)SPVk;rpo~|`2j#ARpsO$U`T<_qJKGW
zkpx)zBm^LE3aG-M&VnpaK2o9x(960``%wO2vs>3DXLL-2L3w8iwJbp60ayONklqgH
z5&@qtyT|#W{JSq>Hd(oI)1ua{AwJk(UmU6Ga=);(CCw<aKq4h+uY;SZpVLs3r&5aZ
zM{1h?bqOWG?nckGzg`W2_I<(VO)VV*jHB25gc&tw?o{V|VcUDPdW#3z8kXBVf%Fm_
zy!I(Tx^dus(64Srzf-0+e8bU5Jv2Ml7{vXMgVZO>M939;`>8n)Ej$ArS~j8d>j%tv
zQu@dr;+*k9;p8g{nwstGj9+Mf#*HZ(DdRsKUQdP4<)m!#+p@jGzZkR*=t}up=YGS&
z#mlk7IMvi{EA{X2P-G2Te|@~{4{(30-m8Ig#{NfWyi1tKk^cyrfV_M=DNEi7xV~WG
zlkc}*k?Zu$7NFEIL?7$N{`vs|b8Xvp4nQkc)qWd2xT@9vx}Vwo_&1eC(W&!l78Hq+
z0bC#`6w+%j9|S)j3YsH;`VIWg3FM+HT+07j?J>cjPACUpn7z#}^aShNDdie`kI`>V
zL-{_4;c&5LbWF}R^V!7s`}LJ&lJ@!^C2a(!Q*JC&H=j0F($gxVTz0L+3ITT1jSczH
zo>d8byuomR4x-cWJ}YX*s4rNDPaj9!kYoS!y$$MK=~O)0ce06X_2EFMC?jfYlt1S4
zrd*S)7#0%Y9N2(ca*ul+`N6og6WTsJ{%+e)NZs*i=?sedEvY`6vR5Efa`yWlGWtBO
zm=0z2pEu!v4xwAE%WPyA+Pu7bYAhwNfDG??28KOCF%|f1@8G-J5_O*YhLJwKE8fx*
zL!?wO6s8ElMT7-BXmG7>mO*_Ia8kWhP(Wt0`Bexmqjf>x^_OWCCI$xI=er${8TFqt
z@&*M*gi)@e4CTLPW|rN?SpHR+E3g1s-AF181}MMr?z$iaUDY=aX=!P!ZCk0JdL5mR
zW5Dh!bd(vrJTcyXm;W0)rvC44y>K8_lQuL}#jDP~wZ-m2Lf-w8jx&od(#7{Y123bi
zm-wJ<6aG~yJM#cda{T><=IBk@0#gTbp?-IcD$nGD3p+z;{X&D75sQe4Pp-<C8XemD
z$m;po!zWA3f|xKhIdbN=6Cz47c++pFM6!#p?`~CAjKqv7E3ylbvID-Uumok79m}St
z%Di9+T?YG2X>h-j`E(H)vQjV8$yx9yqST(etK00f?cCzJul44>%S+&Qblt@x`QC@2
ztZAedeFuWWs8Z+Ys^l-IA!;!!2YQ$Iv2<!`@jt{@q2fOdpU>%aWvQ2&e=Xwfvg))d
z^}C=6JT~NZdu({qRaEak-;%!_HJouT*>OuxtJE1%Z3xLt_=1}EvG)4#_kC3cDK>bj
zbX<l&v;za5z}iRu==z-yz-QJI0PpD^9~$WH0Me_dpupp_bbHt|fro<=VFJ1%LB&Co
zZBD3|Dd^w;*!X{+00q63F#|C8x$S;~7UMk+0hrK`ftvrSk==|R52h>pv<!u&t(wjP
zSAs%at${O~f$$@DcqHq^)T)hRkq%T5rh({Y_KSst$R-^YcC^M8o(5Ma2LAcBZdAhe
zdNuW*O*bioks^keqik|Yv{19%Ll1F#MpAVWxCH|P49xzCI^+n_-M#Y;-^y}IoiA<+
z+Ab&$y$-#;tC(ML7`G=yyp~Dqw<>Q;oh#V-MdsF5vdh@cz#hqNUB4}Sb=w&cwuNi1
zOTSq6{0yyVyxEdu>aOM*;=Qe@4fLm^(a~PJnYl-qWtWEMhbG2u=S8kgF5M@*2G@9k
zxMh{MG;zl7RE6=CVwv-^K8$_QbI>2<3@$3l|AHz}wsO|yx2`^_T@}YF%kax4fv0`9
zre9v$887JbYM>-{OY(=f#s1J&$ANOZ+*L^B*tp?!dd|k>Je~cwaaw*EpG=UzldBf!
zMMOkIUPpg$LCNgTPUN`6#C(4DZ4k9Ex3B;qX+)Cu#!z1Efm5I4L3u5Jv>5>Wc_OJ`
z6b3ojzZYXPfF9>Mcj?)a0?`OL@;R*^>px3nRsa4oYev7S+335aa|9Iwxk(BV72%-9
zZ-2czd{Daeq>f!4MZ6rSkb)jz!(~kypf4FM|1ghgpFR{!IzxDv-X65>^0szV=dI5A
z$h_gK+Zho(8)F*zxiu?G2&Np8ZTBrgSAxgS?7oA1!j+b2BDZG=A9>Kcae8{v+79W2
zeDS+qOyF^_aTTw4WC|vvMgL9~cvpX%Nt_<6*KEBEzFkyadNx9aH*09LU~xSa?Y7#`
z!l~Bljq5i5^C6yuIA8s<OoH|C)&5V>y%g-<mkTL0SaILo|KI@qZ&7t4w!}ktG3;@d
zDDV&0UuB3(+tuFm@1oU<xq_zS6H7FjHO<AAU+u}c`JZgWJ}FvZn&^nRh%rGRh*<Jc
z;#wd6UsGou)zq=aaWF-8p^yi9gm_Taf;1Id(@<g?fhv@Mh$P6C!&(S|ut-&)ZecT0
zwjfVr5fl)45HLU<OM{45M3Ee13lK4EBAXy(H}ZZ-nseU$D<}8f$xP<nncvKO=liJ{
zMcEfU%ey#B<XCMPRc|O{_e_jBnvSth|Ggf_%E)u_P8(kS^18GCwAZ6Jk?IE^Fs1C!
zQJRmzTo2HjV6SVLJJ)I7js{CmIKXYmjYw1+vaa^e%E-{H{vf7X`}h=sn-i7OPz}`B
z?r!y+%YvyR=90?6$J^<sijqGLzn)xgTby5V_1eW|-5(>D_$%+O;h+6QFW0xnteowy
zs;EJ(&sd9AaV}Xhv`Ef1zPTFpM(>7VO`uJEzwKWxG~Mez*9PkAzAU|qdF1JKbns0V
zJ%qVf+L$@IOD;J(M7K8rJ@Os9mF|*pzw&+m9WG+o_UAn@jklw4c8HA_{q5f-XwZtD
zu%Dn56LEY6s?B>tT;|Hx{ya2pth4P1&udFth0!_3c#+1s-4_#n@zzg=+*|{uzc^J9
z|1Mb#RhDIS(A+FE?L~V?u+;({=R@d_*JD3_?%!!TMXg+|&T`%SoW3to1(r;kwC<$6
z^g$Nd5`GU5*+v{>v&h50M5$nP3pv)93B4%Qp%sOy!^pooLIdB@5=KtUJ~8Ll=WJaX
z4d<?(+0rnwQLLbw(=|<6pc0O#wF_y0$%%EyQBf>px(az`%Q4J>a}?^{r2bGqlC5ov
z3S!YtR-+roaB|X6e4z}v@B4gf4r3J@S<0`>$N$PPE)+b!k`0kX6^{8t_!RRMdGaIr
zSHq6$${V#KwcpDTP^wMs<CkiEV96ZW8+4MtXXh@ZHp|zRg{#}*8Se=;tgW<RBz`&=
z6*ob7=9h-7+vRH2?31{pRGGs?nEjCIUbK5EhGTa*KRoHCU+05YhXdYa3RKmF3OZL^
zlmavOjK9OqC72@0*()Vig{t2Ms3DRjB0A8tChYt~s^8O%PCH!QNc&@<+FpB8dWTk_
z!2?&Tlij>~gtNaNcQ+@LztltGQwmF<3GuQdf;m2PyaRoG?yg5PmBPlje}XgwIQFTj
zsXs<XuVfOPUXOz43g9U~&C}J^&IW99a8KddG7DT!o$h?+VR7p!LeW9ZQ5<LEKke5$
zwD@|;QHK(MANyuVe@MSzc=n+g1B(srLem;s?<X)y4<9z~{Y0$Ppwy24G3z%Nu0f>g
z4;o{U6}mwr+C}c9m%FFo)NBz(kO_?3g4PA|WbI`33jvG{@@p>rI1gm_GT<c-f&FA=
z8ca?)+pP9cIfB?3UNJ3j9l|Sp<<pz-T%cdAK6+s}Qcr;*I>7q4*j9cm8A>Js9a$XM
zErP-H6M2hLrdzbG<n0>2Q;O74)nQm{gZgZ|s28z|gh~%ydfQf>AJx^aal8er8X-&p
z%##>(1gRl*tDSUk03<bcGRrZ%d7%M#6z)Xg0c^|+Fl08zc0^+aCz1?}i~@s#2!%iF
zy8z4au^--e6N#M5JD<fcK|#}b07ZbCzp#-GZf@WL44_Fk-~*Jbjl6V0!&~ydrbPU!
zfX~>!IBCMk%)8jK4f7TyChz6XDCM|90^-1Rp3ffqkB1e2&5L^0A?ELfZW*+k%F4>&
zoH%gQ07xM4+ooYp?P63T3e5GtJY+B4Vh{UmW1B@Ut$JEOh8)4&oL(5=XaGA0^v6I^
zXkC7`ACNWEF<3ysE`j1bm&pXQQPp`<WY?=#-0>Uc!>lk<Nxx3t(5;vu?M}xVbd-yq
zgfnirlqIk>&dJS<`|}M*i0=8?Si;sMAPhIcK0qKrBt<{6|3lBi#pNvxBUjYJ0SO;e
zA63xwEmB2&FPhx<;54uSP|gAN3BXwdmq#4Hc)$@SI3VCAfQDeR2gpkuDgfE=wP(XT
zulR(}&EoOVZZMC9fe>7z$B}15{5vEMG6AM{K(pu-V69DBz;SQs?N1Oiu3k$2%~zUt
zXs06_(v``HiFgATOV*N}m}|uP<pp61x`ebC58hXrN+8>=><?wv=UnYOcR<-&znGad
zzBm@H^;k>VHp8s?4HP=D#s(arqZ*sy`q66TE2O#9p|FOv;!1R>TTdFSSjc`Ze(qFO
zrgU&Bz8zD1N>2I)NiNCf8D256)ffn@Iwreg+;iG7lzh9)%(-*i^L^(>g|d*f(QBeD
z^l;iqS+W@UkgC=-#08GLMLSxIw|w|;vVs)v39kb%!@Ru((f8_vImhY;+Q=@rhq7!F
zXz-A@&kcH<t{L&K8vuVqOUp&ynH)_M(^6(g9=H&OQmJVS2AgDI^Uanv$_~9ZrqK%o
zy*80bvNS%&nU;cnWzf1|PVUwK7O4WUhiGoyF-4n%i9AiAviaP#cVMY}nV{ZY!w>tb
z87$AGC3||z?)>d~;rez0u1LbC+u6h^FCVyD54cP9M0_}>`5nQa?;NKN*nm3Vh$ngW
zr8mc)Y1;G8l!+v*$ZInd43=~bMf^p|j@JLx4s-%XWiu?2v)XxI)X1tsJQ{R@r6k!u
usX0zJmbZ}RP<%}!K3fjvbY-V8#iHY?R8p8_deVv<9H;CEw$*s@wf_MILbwkA

literal 0
HcmV?d00001

diff --git a/docs/discussions/deepseek-v3-gb200-optimization/images/image6.png b/docs/discussions/deepseek-v3-gb200-optimization/images/image6.png
new file mode 100644
index 0000000000000000000000000000000000000000..cb2ed2eb9adc4a2e6deecaf700d646da312fa88d
GIT binary patch
literal 203011
zcmdpe`9IX{7x#>HDqE!@Tci+K8v9tvnuJi+Y{}TyDNAD+rIhuiqOles$&$&wP012t
zh_Zy4DKW`1mKcoXxu*Mj{}0de%nzfN=JlEDI@dYx^FHr$E|0F58*}eJz8?aCaGP8-
zu!2B1&O;!q6EF_&8^M3V)WHunl%B~|7<fg(Za#!SPC!fyE?f=InV*b!CVZ`-YnkS#
z{n+im35mNwhU^{eN3Ph%-P~_rdEW5EtmeKG&nY~wUWKpyxO*Vsz>g0SEG$^-#}DNa
zsvmvbmxy_DS8ji`Z1(fGI7#i>i(=PuXKyun-l_hFEjpiyEvd!bwy-#79hkd%_u>DA
zuQ#v#40+ERfLAAy)<So#tE{A5bk!LMTc{a{-dt>=@yOn}F7fY6_OaLhKHHcn`6-^U
zwS{DHVQJ;qrqzxIpf~r6W_0ggTMTPMC;QYij-ke=FHIhk)FW1Yww`)L{}#1AMlEtL
zr{<tdYaE<B->l{FzW(}qm{T;Ia@^D*X65H6N4ef*rir>=>%;#p>01yH8n;q5NxM99
zpD(n0WDXazR^~F@-|=jHJg}|x#fukE*FcIT<&G<>D=TfMH$PFSJADk*ukBx6GyL0h
z=+D>@l?PGu$vV1i+t$~+N51Y=|I9&K=1~t9N^4f?r*Z1TzDg%l2nq-Y_>>|B``1Lo
z<J4YyH;(q-N|&5tWn_&#^8c;P{bsHukP#S*3UF~XihsL)hc;I(RdN|?wNHGW<@(^A
zvvEsmaCA0(S6Ael-$D$6Vln-_joOz@A9UH<8cAHU&(MZ$@K<Hhceht2xLjQ*A+#Mt
z<F7!~s?$Mn^j+h}QWnmLgn}W0Uq0W5r`}nM%2qq~cy&f+h`DGDzPg4;oh2pOj;`^q
z7;6eCbVLuPYccd^;Is2RrmeJey*QXZFA9+uofXg+HVzCb7SXdB2*>UHSzq5Jqg0B4
zom!;tiuqN4IxQ4I3!{_TuDwrvO5RwWqMyT|(j~`24F9;4+o;iIXEn~6q!-#6?DeLj
z7H{L*&EBW#vWi}mi8jw?QB}gp>cir1dv#*UdRJtEtRr@7kn6{&6TL^vT-WWam!>{c
zFXm1!hf`?82HSUnG-obOU1Y`@2d0yIJ4Tc0JmjDd7bMpX9?3F<A+xR$=D*^jOw~``
zY-w$6O*E<}Z3w;9IR6Ef;rB%H)9I5ci#g**?F!$lR<7Lg@(KIcb|Duv1#E&yme^1d
zQqTT@{<2}bxr6^|U1IJ(Zk{eV4j108PyS9@R;Gx;8I+4ViNG%1tBEWpnC@H1D4<8b
zrbp9&HKH)QEUB3zt-o{7711RQ8-I3#yF}<Y1kJRfFg%p=Pu3V3Guh|YbwufbbV8^y
zZig`MD_gMVQN$I!4YlJA=TeuBtE!_%C(ua|v=Wxbu?mnqj@tQKg~IB!+YO=ov4*yJ
zG1@_qz|ULEa#!_XkEz65h7N7#3*U!4Jh{iWlC8KlDNHyB8DYm7s=&zEKX0Ohx*0{8
zI=i+G<zbzP<7>^MZlYW04D1*>)`Hc7WzN9z57v_WPVc1g0DL;smpUO|_{5(m`v;!j
z`f;uib|NxfB0L@1CR#Mvf#7v>qD*__Gw#sn3T<`Pn5o_y**x6b{$z&{jk?EJCp*zQ
zl&ECdTY9vvt60y8YNi7<D@sF{b~@Mi^*Cav#A2_EzI&OXZ#JU4C7fKikgif4f><TM
zk3o`)Ds8?TvXk|jE%)fqs1pr3TV)yHgQD#SdS~ejTqmMqO6j{z;&J7K`B&aU#kcn#
ztr)&a6(cc}<CgkXzPyx9Q1y{%Gt8qhio7+anpcUv&_U%LMAeZca`ol8L+)Lc1fETJ
zRT;hYAO??)(T9CcT3K=@Hq3au*oqCt>w9NKdjth731VVnW5sDZkML0t<gjMvXXOl5
zN8Sw^VDrlCOC*Lp7pH}hqfta?LsTbPcjnYcf%KguiHEoA>AOENdfWWI={B3Kg;_V(
zZhBmq&zngkF|wQK$7)NZm`3Qg)|S2V*BoW9?C~D>3Orpp&&Ii1gMJ5&M(0tDq1o86
zKz*1Lq^`(&E68-r_5mM{GETNYddnSb>;Yf&`QXCf5Pn1Y@Z8#hJ`C~f2whalB5&K-
zHIm;DC;POAR&17SqcDDFUzNs{BG}*`mZ4a&?6FB!R@S|M#U_XHiU8!kDz~g+vU&PV
z1e_t<@p#bX4@vC6Ka!se;AmUe_dn7Jq5yy(+x+3TVc(A-LzC#ctsz{M`@}yYL-k;8
zDcC5s?q+&wP3t3*bU!zwZyet`Yl<QcwsAIyC8&_TyF9b<)#tW89MMf!q?0Uso5S#*
zk?VX*_xV0t^Um5WoKyo?&`0n?QQY}WXSbp1s&PT+tYY4pj4)!0;4_G@sNU!V0SPIA
zsMVph%K?@nFv<F`%Ci<NQ2@;QAyWm?S-Hit>}Pjc4u+#(-{Htwl@8DOE@bE`9*>Xm
z@1zg%edxtl_xTA?Xn3hubTd81H2r3~R=-pn-)ZHTP=ept=W205q(8Gmd*j~%5X1(5
zOkEtGjwiB`NBp{VbZQ^~!p`9p>*C8}@^OlS8^;yJMh|Tm!Q9l~rm@!Gar4ErK_0vV
z#FF3tod36X2fHSZ3(Oi7S#1X7QC)n5Rb5s|<gXPe^ym~+g>_KKy%0p5Q(mgF883<$
zl^`+tRu%t5#=4eAyjjfE%j;JR;vHHdC<OMMk2-OU#DE*!apD$_tC?HdN<^d4`H<Sg
z0xz`(nHk6XZY4~d=u)RvERj6R*^4h@L#}Gb&giO<!lzoDYMUMc&xL4Bd|&yneHN)&
zxp@~AAp3l|#qmenEYtZ`whILRLY8SUkMbeDe*acV4nsW17xtZsthJArim-q;eI!J?
zoR$m-eTWs|&e2s?zO|py=XVl5K0pZN6&H3ay;__-Rw8ud%+ntBr<Xjku(|ov&BUJQ
zx)*jKGiMS?3P)JBFgvln`Y`Idnd1)&ynNw^K0@nL$xrW*>rx36PUSv7C7fxj0(R`J
zc-)Xo$l>P3=5=vOv7`wJfO7@@EHd=mfG1L+pgwquG2FZ#xR^I`1giFI%(I8W$#2T)
z#FC@qJ^1o#767hW_1LHb>SG#VUiS1~E7~Lh^>IQqpa{^jIjE+rv8Q-L{`b6?Fi2*W
z{Y;KyUD5W4bcp`8z-aTl`=KD2AmYzYkI56?jLW1G%u@<}W#SRN*6}uHo(d+@a!~=_
zhyD;7b})_@9P^8zwE%5#>5|xibFIm-<g77nkEKHMgWHV{CCE^GS8qR2>i8PdDwh1*
z08x9T6B>I=e&jG9LE1r=urVy~ah_*Lq}mg5qG23gD7y}8GrPJ-J<n))q}X9f2z4UU
zVJ?|cH@*H^tDl<m!g*vxYtOF%9R_<*hQH$d)+09uC1fQx&1NW{VA;e86Zac_r53gI
z$E8O0SBq}ZY}K6oQKV5|V4%o`Q-sg+@kSD3Ch`nw{OLY%<-3PIi-b+QC3wDhnlB>D
z*mCown@tI{y<r?Zc<^A_wZ}*d6Q$*_X3_y)6hclCHPi&v3$(UHgS=sG-2|4Pqf4hL
ze3Y!>+L_GpuYL0;5`_B5NcH~rlOB`%&QCpBdUnG**BOp3a3~Mp%3*6xmI%bRdYpHI
zjdE-t?Ws>f#;cZ27h!X+EJh2xnvU2_FAl-UWMquR|KRv}5buOkRft<cxH`cZM><vv
zqVSBKR#`kdzuJ2;k6iN$UnD2%ls>EkJ9eVOdTEXC!#XllvdYuKqTjRkt(tN`RRuJM
zOY35q?)TsX50nm9vsOaEUSYU{2*#&o>rlc3n?ljEQ_H0xerwgF_tT6`ehg|+@p+o!
zHa|r;LFkLoK|U;>;b%9wMt%o`*vn_^OX9A)3D~p8GAe6E&|*Ei>l{Ssx$P8*kqgc#
z%d2<<Vczb!Mtt`=-ccX+Yz(0fJ3#PLNx<7qX(OIWoT!x@sBaL<tgMkrD99GLk0L5X
z7H+Fe&xzTl&>rI#`JtukB*swoYt;ZkOzIDK_-RAe(*@m6--;~(UVCSYLUApT+8x*U
zir)Jv&=a<Dc@;22iFWd;%S-RAi)zX0)MA$8n?L)YP35s_EMMl<rW(Co2eZaRQ0Wm_
zH#j~mLWm8ta9Z1d$7BS*E?Eka$QF`|QsCq{!%KHm)pxoIdyxhhB;SX=vm?&-92e!y
zhyIiTJN=MOXg#VIj>T#V>Z!|^4b5eA)iZhf&FLZt(>1z(9gRo_Wc$;U9t@prFLoaG
zone+KWbrQlXUpDoQ-lzL%CaO*oj65e%=&LNhIn~-<<Ab4)R7`vnl8WM!q8iXBi~+s
zJ?jtI+oiX4xkN5^^r=}Ygb=S`Dp_|-(uFgNz2`<D!)!V;ExM(BH5{y@$!68PFQwLX
z#MSM}CvZJXO%_T-(j+7UyTD5bB9<?<#2m#FQud1r@66-+bx@=Ts*3J~m^4%XePylL
zCoVV#pwbtkXg12=%IWzgf#?-cjC8_!QT;w$GJ5Z~VfK$3)QL5@JNnHI0sTv)n3oMp
z01E0VubxTG8hcM{(2bzQkld2OX_aix&Uy3@#n~`I&GgK7GF#3Gu3d7p9nrHpJ8!K@
zCpelXHNxJ|cVA70ktcz5u!6{1(>W_EVnZTiP`O2{o~$uaf1z#E(^Hbzupo0+8e$tx
zBac}9q!Sy;WbP_8SrYzu)o?3PT3nuKAxzS<`{r-s9YL~}S&9wlGUy$=I;}q9wrnqu
zhV2s-5cs8h*sT#2pksLc^bPfJrAhSOf>KhK<+hVbFM4mScy-R2E$f+*d#Coo8Px;g
z%>{1}4_Y$~^69&E#D>Sx$cQM6gm<JG$wH~!Yg~22Sx;~Y8r}Bp-YsJ8<%2l{nT!>9
z0)-^j=l2`9qVMxxln}-%)S_hqoqXK14Lin}i#mW&3v(b|W_m|n%saM>zuvKxi$beY
zNopQFPr{5W{{T!bYz_sFpj^iQR6noOPO(fV`DuKvqOV~S|H=QZ7`MNebE!?)!FCIJ
zzt6sC2&F~$fkZwVvNkw=y(Ib2He|!W_r?!E@g_;5yyAW%!rRgeUmm`**H^q<U0qx0
z(V?bGg)fxQ&xeMVQdz|U_D9PJn>S2;(V*|ggkVc;GpOUObtFc91Z|HS&>BvefF+%u
zExNC2=0U>G_c>1fEz(&x0Ob@O=v^{nbhO438$_h<a)#5SYG(tX?eDQF&}e!SnLPn3
z2Z<L8!b20^jc;jtjRa;o8(=Noqjg{CV|{xU#sM=?^{>!fY{~x?DPcAg!;-9Zw(P94
zhh-l$hbOPVF1=!N?i1e!W209%jw*S>#pD`+=+xSaAc>swxD<G7YD=zzAS&Q&^V;pp
z$TgqHUBu3H`D5a7Pj0Aom*t{7Q*2tl*sb$YuTKm*pM1nHlgmCqoRe<VowH<1A3oEs
z%dcnWH>(OCZ<(0P|3MyHja|i{WROnU=F8d*D8VE%<|)}6%z`pg|GCEdrixE{xi17y
zEeSlxJX=7(4Ztw!)QPOK3tHpv#J<hz{%FzleRD6d;1%y^kuW*!B)|W}fIk{Fh$i<{
ztCXt^A9F)4u&_j#G5i@MPzf_SQ^jIn9dhXncv;O`pI_-#s$94;w#P0WC!R3<@wnP$
zwGzA6kN7@h8Xu1t>qPRmNmWkeFpegYyYsq&Aink!zb12T%qr%f$gaz5#8sJ$>y9sV
z;Z2PbQG0jU4>Y3|9Vj<xJ9Z(qNz!oxBjI;jA7=+N&cYr2YreYS%K+zw2kD!pnR6?P
zb{=2T&e4t8jp<7EJqEx2OYy;8ZN!5c5z5lhqT}aq)-re1w8i6`6R^%=P?8@~C$37N
zW6;z8l*x+3gUs{U0CZ}LF4i?@9H;e9I%T+=9#k&ZptkY|b2hhY(2=yAzLQ@+t!~dq
zClp)CxrM5>=bIAdH$vz`3x`@2Vgs#zD(~l~S>{SqHH%f{qZWUis~R7Gof}@!()9t=
zghgNSQ;nz;j#oBi<6JyEwbAB=;8=b}T!iv5)9+^|KE3d6!VJWgmCj;OXKz##JQT?4
z`c7z}??P-R_rR&#-0?{3=IoWI1#?ZRjjx=A%H0ahU!WM7lt?HbKZ!3lwN7r>lTP?B
zFu=#9;LBzRaleh>`w*yk)qnWe?dqdT9hN<zj3+T_JMWhc`Q*xRr987g+rO%iiA!5Q
z+otm)r>P&km*Sx)Pw*@I+%pl)SkMP1t{M&5Wc<iM5%(FMIrQAaL=!^Wk$_k9Oh0yw
zqg;Z04<j*9+5M+X(y?Qw_;E_+o^g<e*qtMzeBJu6{6OoQ=3{LdROgu%F{S=TSnY44
zBBc}6AX((07S$;p;&G`gVsPAxkcQe0U=reO;-sT!?OA?30VOMy2XA-sGK${ipkiKh
z%dFZ^;QSYU8MWPp&p`vcxuc(qSg7K{SjPd(s3dq$nI8T#!u)@x6v-~0{aSd;@s|-T
zs;LO0X`kV{%X`}rN&01&(I#n9tAv}AL3V0c%~)-=V6`u#{fT<(5g;(#Qf(=5J|eFT
z_DeN(ot4DMOm+6VY$V)9Z=K5~gR*H=8YvZpi%gG+NhoSnMG+_W`z^H4LXYZ6S){(*
zULm?b72WoB=(>zIIHJQhdyd4j!B2~4U2;*z5V-Ve*t-y7Imd+l!R=?PAHB2Wk50hE
zx4#k=S!$ne=$x(1aqxW_f*6K$-_Wq^^x~xWz?qP19FeDs7)-Tnao)28Tn8Pira?8Q
zlf<4#nw+d8)D0^8XHFrpOQew;RGtK+Iv`y#ALR1*>FLALcO?)A>hKb&-@5{sqn+5$
zW-S+3x43e=keSNgTz|sKG}_HulRBR#o@p!Tn4}L+R~l8nvxro9Lk{rYK0V21oCik^
zUu4nkL+>Rho26bJ`ojxqSg#(H0SEs#J8^t@FA15ShlW<a2E3N@c5(MYzeBkL@c4$_
zyF}qjfQ$$U5D(y#1H{VDr%NVJ97uh@w_D4#HUCnnY>L8u;OM;0jZZ~GO3x`dXr-W2
z9qAqiFfA?gks4XB-g2(CnC#>i<ojxixhVg~?Fj1e2D4r0c2Q!jaR_gFZ$_>^@+9#p
z7UVd0E`|FeoQXe=-0_j=8cd#v-xv=3rrR_TV3KVgfL^Gv6*Dv!`t{F&i_HDz#qdKw
zxE-_)HHU~oRMl|$`DNz$5i{5Ze)wrcoLG(QR1W&^wAm=%hY6~WU!fPlAmUA5jfd*v
z6J|r4vDUrqEk?3pdi96GSw=@S=%0%gV!XVh<M@7ij61%Q31JNq0>lzwIs~LR<pg2n
z0mHlN@=3?p&f)r>HrW<=I|#j98sz#o|EqW>U;f@{L{w+3*bAaqZ0CGMF3<*22aTuT
zB-M>yIgEe?h9|`abE)XuV~Geg{N8OZ`n~D(bF`i0!X!5X62mywx~JVAW8!qxH&WsB
zX_FOg$|-#A+MOM$DLg}7g{2Sm;K2iP-yCJ!efZtZWTCpzw&1eL)-3~9>dI1CFABp<
z2GuK9Ab-6;aG9jrz4W-l<rZ;aG%{-NrO8uClRI=kW=N&hC8}gQ=hxXg$F2uKxpa4$
zfe<#7U1?kPb$<u<kXdXD&?mkZ%?1kkY|A&=tbUR-(b+fu<pnXl)i${(rxQUX`hoH?
zF&D+FNmb>zIP~Wn<!D0oUc8h^wTJ1Z*Y7W-E%Cx3-8W37Yv7=Q)-)su5awZ+<*BZz
z93f6hOWVC0Hc-5{N&5V^>unC@4(XDAKVPp+pEwM7MW>WWNOZis5PV-&W}AcDKi_la
zkQx&)%AJcjO~qzH#w@`R8&{4koP7&QV&mA<%=m0|>h3}g<GMylo=?rUp=n>AU*2E*
zznG@~#NiK4uN}@hfS~SjG{4oF=tIZ&7X}R323l7f0%e#;ho@>NF{-tw<q^KCr`@!L
zPKV|I&AI;yp$9VBc$qYLoz~0WYjZ&Z=j|OqvncMabGx-W<|`j#-_Z0H8+Fu9XoH?4
z2sg@t-sl?-r8z}J8&fA@a!})^O>Q*JSNDG4$Uz0N9=|{ugChxE=EAz!5<#D~H&<zX
zi@?_|oha%av!;{Cn%{i+nJI(HIsLCMKWL^0%8p>kT=fzr>8p3p=$I<k_jiN&dwcyx
z$Hx)6{i|UR+b3T$?v$RQ?VOtC9;RiDNm3`~`92(r1r_nhx9d2Mf+^(WdW%-Zy++~i
zvrC`AsS|+$BA_?Qs=mKI<3{=JDUmLT>1Mhvis%oCWFIAK)WyKQ%Ox}mCVrirn;Z8h
zHAjR&Dpha*U_(FwN}FA~6Xj2lipUGpM6b^at&h5=E+{lFeF#|b)B5c7O@l7okvt~?
zl#4^$TwH9l9pfGdK@=HUidr1Z%gdWODrd)s9ka!VAp7%n=Cgb&8rXx{u-s4@ipWmM
z;rp<RM`R;SuK#G+Gb&u=``|*DkETo$8~F6<`9=#J%3Gs?f~wr3g=jkfe_dFLq2n1l
zJF5Mwv&doLi&--*dpD2K^bt-mwas)XP3i+ozNMV!pUF<S1S3+y41_I+4C~`or*a(U
zJiE47;GKa-lUDOoFt*B0ZS_TwxLj#ghkN(#d1_KaS&JcLlk}em{0PBM?SW@j4p<R6
zV`Ot|rFe%EQ<#h5q(<!ClTL^pxH^B8wi71UWKN^+!=pO~h${Hd<a=8Eh#7Sm)T&)W
zoBpPTLlP?BWI~(e2uFnyTy@m-o@_1(C%x<B748M>n@A{LA?>vvAwMB2Zfodv5@z5z
zayu6iv^1E(yu$M0Us0z}Xh{Dgu)ZVZIs`sm=3LpYjO!)%=_d%0#d=noH~0PQJk0P_
zsy^7Iq^$hsaDqYaZC5im;)TpcuuPRjR&hCwBP(-8a5k*nsG(zcn06znk9w38xkie)
z8{!RQiPZh#$^jv}J!o?M+i#6Y*6t=RYWKYcieqE~9rt0$N&}*`(tXl}v$a0x1d@@d
z)>m0#=i}u@N`n*C-7mntNAZ8EQo^y$t=&ofG4s0^FM=Ow<cdE3_Lh~qU_~Yb5yvMg
zA|PNFezM-VQVgP;!FR{A#kuVxLB1gT+nxz!iavqrXEmd>)1{I7=iQOGth-`lAW4l+
zL}V4Kv2rSt4#US;Pb3sb3EZbgL!Ry5eyvsG$#3YHL7Obcza=qtzFierz*Rdmhzwad
zkr>v}EVjDOWvB+IAB%h+ETmgY;9Y~NOYW4Rh`cTmLrO@hD^;xR^yD6*s=F_WwsVyt
zN%4jka(SuT71MP9kyi3e#j0|I*Ik|V=`?6(aK=ux(Bp4uh-P((>Ujx8E|FY57F>AX
z`R%E%k7rs@H3ugrZT@PlRVhIgmv+~Z+h{rPXC@{aO7QE-2+rmH*tWY?c}xh@YE}u)
zKcDoN+;dmo2o_iTjCKhu|5rM8`IpKMj-{MeP%Kti_ar=94x~=(@_!1?jv*)L!FWr&
z?}V8?eH?4*)!;_%^f<38l1ItYiQ2kONs=@PP{OGQv>p_~Ae!lW0oDtu2}1K_P!ZI<
zI6jg$a&92vQ|@jE+B(~}?uy)Fey@mR5PUN)P%{J#?YMz(vjbUo^%2$+^Zud7JIod~
z7dV1D1w9GD#0542kN9@s=C|ogmps<4(<WAMoKaO5hS5yteOC*ZkUz13HCAkpUwFT`
z0dgHf-z9~*dv&4r&eu3-0}ktQh0RkgZpjOUT|uo{QjW=qD&Kh>F(yje;ak$6PJF^Y
z;EJg{N!xkD<Niv6T3Z4WWq<yorLC-YnHNA#EH)SNGr1L$MvCvG|85hhh&j5!vlOo8
z?j@MYxeYW{B24^^zV(239NtWA^41zq!qUQCwVphytB*XY2Ri_Igt59V!5v2u*<r;N
z`MY-Ml7WF+6SoHSmm>m2Xk9rdDWz=^D;Fpwzm&cZgd?>;k`@R;4uyl}oLyG2$USb6
z36_uIaT*}<oJ;gb3>r{p<r$3Ci`;;uR`TiT8V0@dPkH<R<e>=-`dNzb16|!jm3>-^
zEiRBqS(9{J-~6=P@he%J+Z|}#LEnw2bji2aa`M$MSUsrk^Uc?onfm{&*G;+qTE921
zQFDZNe>xpibIqeiUoaJl5|0bU3&qkQzS;JwKwUO0vck@_<ZJ8g`9fc`PfwtA>n865
z1y{5RS!5Lv4$A#kYg<5@8A&ei%9T!-;o;)a`Zhi1Se!0tUszJ&xx;Kh#`fvMY@dyF
z;Du>B%}10@Gnw8&dzC+LenFX}_n{W=c39GM3?%X>1<%TxGy^)Q9F!2aV_VUL+tW+z
z5bDJEs#cpnNF7ct7O4Xx&5|ZJ;7xt#wu=qY33(8hc%-O$#ioNjHFbK86O$RXx01n4
zov@9q<l^S;LK5Z=PcNPXEd=23x+7DEm|vcDb1u^1zgd85_x^1ZN|@5ZUCkTvS@&4g
zU4?^0mySKRa;CA%bBrRh{pZI#^C$ZW(Ohr12}Q6JE&B{7-VMbnXwXxv$PS6#N_TAA
z><c`aU2eI8ufhA^k$C9&e5{kxV*XdF-LdL<T%Qz^x7G9!GUIW2Fxu>TvYky1${6Y*
z!yCgh0X4w0Q0yp0_=vAQYpfuN@oJAu9sax2r}_Ua?-P1&c4i7_HaC!H76l&hm)==C
zwPp0Ik!GX{9Kro6j!zLca)(9>^q%uXh90hJ#YWu*{qRPv`o{Jv#5Gbs1XQ_E$aQXb
zQ(bdt0ZM+Aq^tOCZgycon`4x3LrC$I*^sAE8BSQ4G&Iyqk5_d8pKnVo_+?J<x8!d*
z_%ACpFgSX+{J)d4PzedSwVgXon7KmWis&Rbe>=Vn0b~;N=S+|L-{-5ilzy{?$1KHn
zo>@tP#=H)tW4csDALg5^>P4Lhi*4^|PmQ3736~Q?2D{Tj0_X0(fB(KIi|6UcmA^st
z&FU`|Hn=$^G-tL6K5BgVoT#2U3{OzRneXc#w=XYER!g}$pg~umpr*fDw9sP&^mTh3
zrnIm7HB<r`=($$h9`LxI*%E%(eIO|V|JMQRp*g6oCr$l|Ije;y;Z3!(LD#$PCOQhp
zDdEgb4OQ-P8E1~?qiMQh{+y3bn;o|;y~?A4QyVz#`dN|%G|j}u4RNbI3l~5G3ahH9
z800ebp+0!ZTs};JxY|4v;>t|?AoG{})n^}eC%i>9v{YM_ZU@U9>6^d!z|$uQ`JJ|n
zT(9Kun9-!lpP?V?ZBX*z)YevjMo)MwH%X^IOGP3mmaa?=m5Y)By8M)M0uizmD@1GC
z+nuazUb)owmt`JItNpihI%u!-dwTsdkXxIwC8t(pkcn>u^}@v#SWm2G^X8x&Az@GA
z_gH3bc`@FFpquEs`c6qjU$rQWA(1CbuSnX6J*Dp|Q71a_rFe6^Cr~&qM{HWsc2rFj
z&6!?!Fam1%zk}%bt?F{oH9Vr!y!SFl0KPIASq{g%2tUIG!a;eJFY?=o*N?|93+Q@-
z%G!0kN>EBmjPPiv-0eo?WAz;2Cly;~w|7A2rkU=k5U;~jTYyHOXEN~jUp8;jLXgUH
z&|x`x<-(O2R{Th8C5th{7ZS#b2&V-Ky;!i0?ZT)K=DDa7Hclzt5lyi_YW(!x&VNN<
zbXf!0i6O+O@DA?3+M0n)=)W0RqWZ?wo`vijRCWZ7CR(&zZ<GsQ=_KOGW_TX8wX(9Z
zdh2)D9`)yAi;6Jmx>37kdSxiFk98O@ZRT`@fUt06L3R(Z;i1GuB?rYW4CoRVH+s>;
zUEBFSqz1Nb&a?-%?W`M}qt|Ir<qKgCwm~9!M~|+`4o>*j>k|L%;XHf;5sypkoh;Xb
zji(j`6j{ykdxbMas8V>p*6JfAHr?mjO?ZMcJD?!lgjTHwTYIb%oLcHVBdqvjr*~n;
ztK7zB6acS+gww*<D**82TraojN_KnmKY#ll)0B`s5BkSJL=je3WOk_Kbjh?TuXr4f
zN5*ex$y{aI?hF6gu^B49q;DQZo$v=#%ec%FnR=g3=a`<|V+mUmse^_(>7V~(j5iG_
zw=_7#@_l%LT(^Zgb`j1j^e}e>0xA1{pd?FDGD*)#EA}3zfD$|0RWD-Wld@U9gXq2b
zRU#sy*O9c+!1qCwUqj8@B>iS+R`Kk`H4YAr83@_&f&T32e^6H%qK<ZFx{?3gg4T=w
zLS~3lYyTT}B(DZ_9lFS=2NOSsix7_sWI<<4TQCvJJA%gxq#xMBToj#k&mGBWk)=Fa
z2P(8@Em{o!V3mI$P3lylDlQNZeGK-iOvbo_$c8WmyP%_}s7TWg&i=cRHxmB}E&yK=
z1)@h^O|4ueF*e2H9`QHGLx|7{)?Gj32#Z?wSROb!+I_whT`&q^>~O=!#6BD4B3(zD
zpPk)Y4*7)x3}_#IFC*SsA6Az&CWx2eZ_qs+VGsb+q^XAgzD)DKQ0-jNYLki!lEnil
zwL-MiFRQo?LgNviA9~T<fOiIi4)SJ0<HkO+af-u&q_U^uo(>A(H0>b2h;WL)xRVcw
z5y;Abz<aI5AjK?m){<Arf}`W;7#d1j^>47+tGq)Gn*PPl46AfWZR9Yu#YL`u;1Mcd
zZ5YKAuhiAmf4t$PP8=6!(}S%7?i-)+ye)#nsEtt0LHPpzqTU)`tOj~k2cfejM|6eM
z?{aYJ@3a3ud`qJ~`<u|ZQN0><8bMlB#zV8S;&BSa-ghi~H-T{Ci|kU`j_CwLVmqK~
z)Y>os=G)}rs?0$>Ef!mToT?8KQ^Ubq=>2jG+mnAC8N3bVF+)V*1ZZAh$}lCEUW{gy
zI3H^qpQ<m<U3F>-dn;u7NIkpVu~Bvb&dZ=2`8>Gpfj^GnT&bQn!slA@FDL@tMJjXE
zlqoCxeL+t+d}ieX62ljn^P+kicj?jTbkf(kH9Dk?T@iO!i2bqTSfa#6a0JZFg4Tlm
z@kj~`1%UaW`rtc-^MGvM!Q@-9=|=x-$^X$pR_I^V?yYwG84-;~t)KaezDy`)w*Yc&
z+OdpHJgawB-k>^=@{&Ab%X-`El!@76YCV%eL&`gx_*0qJ4*`5XUVmT|pj`xJ4Cl$|
z>Pwl|K+On`dAS2n<6vSz`t)l0Uq58N9cAXj?gOU0RhBglQ8B%n+`S7CX4GMpidB->
zryt17@l4h?4KrD~a;brud)upXee5lC+ow&OK$Dt2lzRQY2i@di#!-aZri`L|LP4b_
z^)xH6C#MekHjqT!FrOYvwLQ6^QD)vyP$63v(V+cm!p7(q`Xn5^Jjmtz-$PzH?#F=P
zCrmia0;<E~$9@?_bm9``d3ipj@OqKe7}SmvCX%&f-Mf5OK7K&v`G2P>QGI#sFK_Ke
z$rp9^e#baL^<WWs)IL+&mS+cdQ$f6s`=!|#m7M={<64*Mm(P<C)LaCV%x2>vy1*nM
zFvtt5(xj~c(>!X6c${Ihr83SHxi)jIu*My!4~7$-PNVf;?hzRH)iY1o#n}#sU&VkH
z_?42-0_oS;_IcC@_J@Qw0Z2E{#hnn?dg4)eto~-=*;;iULtd~o8>fGF1^Rtl6Kva@
z8_{{xp+f1cM_9Act7oji*u@>%9%yM)Z>D~+5syoDU^y}AFatkdJ9;Bg)+?*!?>2k;
z5P}gifeJz1@xG3%LeNy!k<ia05AZ_L(J{>j!wVR0EK5AkdBaaBa)$H0g07A~iBhQw
zwhwip1bQ@>c8->SZN9nr_l#lzgRJD-g=lfl&$P60;5F2s>Ia^&aX}PRa6-Ky{;WsA
zgw9CF>+C%XmZkah<iE#b^HM!!FMg4Q#nIC~)Y^1%7LShoiittmuUE@&F-|SXg9gg?
z8i$2ArKB}KSD(2}t^PJGPzVRsKG`{_h?SsJKa}fz7g+|J?F_T|kap~$bb|2G555l~
z-dWB=>a>=8YEJfkKMub%$xn05^g09;IkdU=sJwT<XL|h%W%7KN{YktSVScd)eS~R|
z>bu*A!(|f{G_qKyVzJZ9ALfFEV}v&(ml}&#`jdt&g}aQQwEg02vc)1{*?}qQDHbna
zU|>*PD~|7}e-f*xzf9AyAc3XJFgIN|<A9=p63T$igy%fYKVpp!yDxQW)M-}9fWCbQ
zeBA5FnKn)xW2L4a&a|B?@gctF-{ytPvD0=~s1sVQ-Gp7oyb4xF&;X;h=vI42PDy8Q
zYycr<@6s4jC2mRVgn9bUm&#^T<sP5}>-S8`a&mBp3QgBAvt9Qk>E3G1pr|I_ot>RZ
zg-qiX{`$7r!A1OUP#jMJdG92qGG(~m9I?O;4J_Gcxn-SDRMvQ?Yq=y=A2W`V8?Q+`
zqsuz;fUkJir)ITj!I<fABJL&<rzIqMv8csDybOK;zW~IRGR)!F$62kS@|7)|^+)Ot
zY($S4A>cB1B^6B5B?EHa*py8@mfYO8gnSN01zt7Zifo7i<ETy0%ek7jqY`Q!yRctZ
z2DlT$@-sq*GaM3LiH$1x9GWhPFt)rUp$IbP^r7Zce@`YIj!W#BMB(Jn)xkP(if4PP
zZJmK!u=gTTz*AXUTbo>LCqH+E(%~+A$z8B4FvV{FFB(m^UT)8pU6*+RZSV4mfMGsp
z(2w+#{Lq6P%N`TtH-&wPfAC;e8BFlRXY~T~DaZO7DOtPwQ@#}UlK+rm8fsD{O<KK>
zWR@8#tru$X65prds=I-t(&l}9$O2>WWI9GaO<=_zFAN_CB!z-4uC~T_Pk-0o$8m+R
z+8hv{e^xwu%{|5zEWzNWN9~X~!v70s*(e6-=TT|707TmtdOCu)MJTIyGT7VgrgPxP
ze6z;<C<M7)9f7XpaSWK@607{`BbA~I9Igx623emw9cC=tRl>^ofUniA>}!fhc{4K-
zvzUVb-97GL$e0&E!JRzn>Y#Ei!s@%SStWagzp#|a$~fN#xFtX6{L!I_?B{3t^JDy6
zT*wXlxXUD4;BNWs(X%Zv1r5#g-Nz>B+n)*17t@48U>krwd-C$|$X7?>u-p&$0ApfX
zm$7i+tyebN8{QcQ>96{CwtDc5d~DPRf#><Nh!`{2_W_6U&_GnfzQd>A*pyyvWr<Aq
zu{Y+jwzej6XSw58n8V;9%<k@PCX__5;Uqow7(0CODQ%$hKC(6-!vm30Q*lw>!ouRD
z#wHoTuG{E=RAY(bllRRzuD=E-6z9XKa72QAZ?cCyIyTVLe!aYSWv#lh?1y@5o`jgQ
z`SPj*t4_MdSX;(Y(cgg3`*+^lz#!uV_|nuY`zi0WGU-F=fhKQrES`ASL%g&moijk!
zdlAB?cM&uuTuXF*s5?LN(u@lVDz`yIaZP34K$6*<FT8z;jLGnbq1Cot)EDl`h8%-d
z_Eh_QJXK+o4Xp-LI8ab;<G@loI6dp(u?&gu(LSz!a$|e5&7CeS4|qO*w%p9Vv?kZv
zYL&64vGl!q{w#iKde`WQ$#@gigz4$BYX7zsfO2B=LtUqeD|&)QILBiw#Xh40&xr|C
zX4gG^h0RzL`Wg>@)uJ62B#*(~=JNU&9C@W-{Gw{h^WN|F!d+e1F=YVL<k%ZCkGj)?
z@IsimjN&xlK=FJ;D8PXoW7AQ>2ZoS%952Az3c23UPu25)_^`?#?+J@^1^c!rKo0|t
zG|up#b+1Nx_KXaX$$LeogqgYRjpScfJ9D#Nn?9e72n$HXs_3qH&z09XM|CC%uG;(@
z1^&(^o$)l1Wwa<_Ge@o$3B_ggh(P`Tb@|m-pCKWsRjo=e;&&kd^XbNE^PKv9OWa`2
zAAz@M5*hvY=x`V`g-d*!2556DB4DCk2$OkhZ_waDM{u*ok~c^T@y0SbS@$p_ki(Vr
zKN1*OL|vzVeEa5}0)iaxi;W`<Hf6bEk>xgJJ2EL#Ll_IBeeJ~-Pz$;cvQ*}Dlc%t7
zPaS}FNATkWKi$TQujHAAS6~Ns5^O?<4Tq(N**RUeH<lL?v=6-5y`F3Wc60Pt*K(Ll
zMq^`X;slgK5iBY<%YvAt;(I~3i=MeK`NU()rf&qqoi4?rj&#SDmf;wnaa(RG8w9zS
zmjj1Jiu;bXwCU7sypedEG3aM?S=Fu$h9Gc#T-=2D%wXZYLmN&R9_V+zC;C@ouwzdA
zu}?xBVoq~Ew<?X~+X(2b>FhVs*<zN@H$947!VL{<ZV($3z(~A$;tg!VaLvQy?<G@A
znCb8)3}``IskvblKO=Ux9Lm=-%{`d&>1PtAtFu%xR!WuglM)f_@hmvenC)dv!yBGy
zN2XaI@a%KG!*8~%YHy4DQ~fI>K%;}JXfoCp(+1Chdn$oEysn<o&)zx!{9ql#_O+>R
zar+Jrvyi@76)mr7Vmnw*BnQ9S^#iqV_ZcH*I(|B?gntZbxnl3Fog>-;fZ7T=<xvs9
zQ*XlD{Ndy0Ql_${171bz^j35B-e`dU7&o^5Y3Vf_UM2o6XHWG?iJkn#oA+md(nYx5
z$mMm6;!7P1X|f|?<$8&%L|<*Ou4SRvNidiA#iwS-p~8$4%+*I<Df=1#Rm6SO1CxRM
znESXAFb&#@ckWx$hv}ZfwZx8m4BiI*E-y1ane+YBFP(4GzCeIWd-{Y)<7L~dk8Fd9
z5c^XGV5T^i3s9&Y!tQ~2N9R3AyQz&c+hz82^%UA1s0Ksws0yOTk7ser4}4n!mLT!?
z@kYw_RySp?&hZJRVRNr8I_G<gx#t3akKDf$oj~c@nw~uiAnKT%Y+TS)fGW4+Du$)&
z7BcSSUQL<SqZ|eO=9SGmExYoRzF=W;*AijYh|EwqGvZU@IHex_?8aCl87#iCdixt*
z6aN`pJJI>T478g^`2G?qo99ijb$|}sw*3P<+2H0?4nDmNgA5Owvaim-J&XC(tSdQe
z08sxxL#yRXW5u8`?5$TSM~*$w;8KO|I@TV8Ud~Yu-nmp1a!L{GL_uXu?Mw9$`$cg3
zr%4491EGRpN3j&EZ4-)k!nRP%iw%v9!%MIw4Ax)y(oRWi6DEHAdJ}j33%>)HUil`!
z=nRiag??R*?ULqbCzv>~+Rp*S3NY)S`my@b`MHA>65j^~P7Z;5^7awaX`bH}i2$a5
z1V}ApV*g7LV|ZGLxsOvIRD<zp_8Di_!)%1Rz>Os6M25#$h8fkCRR`Mq^782@D}F;4
zC7c2z)5Ct92oU}Y;Whpd-Wv$Ck*;EtRgb1Nc2Pq0{0pTdx+=wxiJWKs5Ba7&H5cxR
zvd`$7E1m~z=w0U#+v`f65vk00&vSHOF$LcZCNh(;J0cfzP;<AXk3#pBNwo3rjQ|9-
z)Iu<i4okJV%@DA4HP?N&7ru@}p6jjlj5W+?Oq!~<Tt5u%E!c5*@%DJc^t2xDWf9>K
zHcCUxqfP0D4DxgvxuYR+CQI%=K0cZP%_m^=e$cv!Cfm(b%KcC$Y4)$KA&EJg@kg}5
zfN+|J*}oB0nY~U{(QByKt7kgxbTJDk8Wf~gZYw*MPvpU^SOI-Nu*5@d+J_zkJk!C|
ztOsK14cicdqqT!T$2cDux@XnKa!~M^aF-NR%v+cm0Zb6bms0W7r|XG_up)%*TV-o-
zc!q}*WFB1Ih>zz>p$#}WM{DPL*q3k?08&LqX56a3UQ)k@x|!iY<{b5}o4f-qLxrg<
z9R<Uk52plNkr^I4xmzII?Y}awoPc{JPkwm!v!8tO2vd@m{hgVRVY`>#Dt|4TQYUHk
zR=2VKq0`0mG_p_jRu}f5Dmk33%Dz_kTuqq@mk3fcJTzmWMg%bscts9X^H4p7&Pv=b
z9$5px{q^j&I;B@UPL%S@p<K1DJzPl52H|up?R2nk<pBaDlzkVb1;o(-f+}SA9Ih!V
zK`o?W7}I61%sHn{2~57n_koN#V88vMm>n-0>#q-s5%mbqv}C*FxEEjtxy+`@lINY1
zqZUZ&2bZhhJ#WuKPO~1KQg2o4N2!4_WDg{b>I7>bm)<;;Q9Ropn_u}Wn7O%MAh;L|
zMi?B&8k_h-eV2RSK;)Afq1`6Q)yQ>A3I)KZbT6SKe96s>B0+f~-{?m;W8X5pZ5q1=
zhhyN<=@)TRDTvlrb#?SI{uoXot7@@b`dkx85y$NQ@fa&Pm}XuJ3Xd;&(Oa`&E-^c6
ztHxz~dg_2}?|8gCXP>ddv*9$idG1++;CX&S<D@S}<u(lVrNqz?uol|}hQZJ*95iZx
zMTd?{Z7~=z?#Ll<8*08BKYemd2JGp~e+__}g8RgU1CW`Z62Oifgdo@P0|QqEhlWsQ
z)}JdOJYKD(_PpZF!vs-c^_%QSwkQuqxrKLpD8tq{V)}8gu-I^M?oRgl5y(Mr_YZOV
zz}T{9Mr=pr?ZeyB<w|$bQifSiL+CRRyZxuUPozwlN#8AiV^)QzIJs<gf{(mo*;kGq
z>UkWlNPSaHFD61i`isyF=QcNI#N&K>bnAgKI^l^FgY5op5mVuOh*~s#XRlM(0VgqP
zj&q`EEnzNz-9Ex!I6KYrhqPC{`4MO%TPs@RUP82q>0Mjql|uKzWJ_>>*KFkl;a8$>
z1DN!qc$f~^W_akbM}M2yzW1(Q<9vpPe}7_k_nD`c?%x>xavOiv{Z*rK!`AT&sV9AF
zzCAV_H7cJV0Tx+zLCEgbCwZ{eYZ-sn3Y7LOd;~rPluCOnzS(2Ncy7G1Ynyyl(-HNw
z8;_Ea`k;Zd=rMi((Bhp&tr+njsSYtcG{jS#I&+6sqSiAL5&aySYw$K@gSThM5!wU+
z6KJ99B(=&!IVM@GzSO;%Bb@;Fcx}Seh8e%ct>-XxVmAP~;XBCC^TJ&SOSyZV;&EMI
znPY`(fWpZ3PLAur>dt<8HYHKEnHY<%mSYTiO-o&VwxkJol`iOBE12Xn>5$N0Iuy{f
zvInlvMqa<Rv$s4#HBVP{l0itoy3L_4=1pg63nHOo@03yFVs{JbPh7MAS@uSvUcCPK
z#*r%qmlj|0TiV#a3>&;(Gk=8d;C`0-dXB#`l;QO4-DiSW9urLyefUsoeyXu39JNTz
zTg6^E*GtVqQCuk=E2dU&lyRqs4QY5q$}Lx`Gfx{&=9<TA-{ybt-9-kw?tBJz*E-g{
z_Zx9~=SSB*H-QT2cY8$x{T7%~Na@wrBtnANYvTU)r85{%esGye9tuov`*543EI*cM
zH3_TRgg5FZfAr$bBZE3usZr!M#e{<K{i#Cx>&oP0J&_x&Z3lwv`3)1%0xJo6+l7|=
z+9O@>QaO!Sw4=?XFG(J<J<_>tdirrhYpOe-J6De!0r~dzGnBkC?kTaMzhr1uaJtL7
z^ve`7G#l$=a$n4^$1F?P`-*TE=P}X6>mk3Md{{`jvCMjctNk(`TO~{3y%QTI0uQL+
zd1^fxb>d5f;Qw5~WR|HEV8-`Qiz<}D!&2s#-@SY1rR@fAHSPAmOW&DYfoFuQvy6AO
zNs!R`<=&>9rgTapcwLw0tcu4*ebN}Yw2OFp3Xj4&<73aig`B&;ZK&U2FJVue5J#$V
zZ-`bwHVkduMMAp8g`kD8YMPpwtzR;wsEpl`PkVJ>f}^rrUI%-5mt`r|9&n-0nM#0r
zzE0Z@XwtPe^`CCocJN|uY3@YwvF)*BZD|?_cQIJ<Id?@KxXm82+eb-L<cb;W@b0J=
zzpqmv`*SO{=8>a=gEQ}4h-(3A(PyPM6eRtJ-Ho2o19lsw>_=#*&!0aZ9Qpbcngd29
z{*W_8Nbl)UoR`<5_Q1^VjDh8seR6V%jPduJV$HXsqIMAYesFNQ2?f0KQdYLMbSGCd
zB@f7xp6InCQk~29b<$UMRTY(FHJk!VcwU&fSQp5crI)^axn%Hii@WqhXZeLa&!Yi*
zXb(F+HUvxiERyvab%HP$+&4e)VJc76Mr1KZ=q*PCv$u2cw&P>^%WM8il=mHaACGFs
zaeJuHCg<knOk<42Hlj9L+eDU3FzP97gT2a$QCytSSHi>ZSiCheGaHY{qaR+O?I^VD
z?J_*qJT8ON^C@CsJ`#OUT^4p%q){{c!#*iL)7{@yACD&m3s((jsI|THS6D$O-P#l%
zCd@ohxovaqA8a{JEP<)s4u|EYWI6o4HIv>$YG;`UBS*;|2<v)GCNHv9kNFw%8=9hj
zgNrXm{vlu02FBTL1!Q9Xh5;Il@b~vuu+4HQB@&4i5IM)%`Pb43cP4t<SDU4?`vTNr
zcDKdAT`p&rPNt!dO#1V((Tq)$Wfyy?DD)ie*|uDwaMh_G?kZNra)n&Z3rndVCS8h#
zX7|k7!id~TxV(fjy<6u<@!uP$dE<i`BXQ0{1_m!LJv%rO@#pqfR*igUctd?yWnmt5
z`rG(;d^70AA@*i1Vzvgsl$|k)-`kF(2P`zEpOF2U7Ha#ExG~Lc7s%&<fq~^=$H1sR
z%+CDk>#_>X*^Q~xHW}F6?D6q&p#&wOM)XFz*lyK}7jD3vJA>!_8>TY?g@ZpHRP%3M
z?mRH1zPNt3wXqQwd12wdS%Aj*0>ebrJ8NHU!*-_?Vj{Ar5jS!6%-Z!TA6v|~pmuKZ
z!>Q54snKjO(B376o_{+@Yh^4>Av84wG(v86QFZ7IGC;Y)I)M!HJ!k0ty(0pH=Nb7k
zADedEmBO|A_Ej9p<%y1Yo=o1_^dxt8XuDj#iL7X8X<7VYZQ+!DQ8ILUN^CEDw?t>H
z6m3&+-QL#4B|mEAlN7yya`%7$B6PlG#3^i&N7lAtVyaJ#yggc+9kA6?<<_2|N#zPc
zBEu$vyVBGqf>^;t`JE_0N5(vV4ZJ`js06}wPtOuHa8YQ^_Kn?p4QkG63z$gI{xv^;
zeR{9YKq3u2m8@2~`0JN|>do$dI<mpFaiDV^TMc6l_BUPNn`pc>YjyX;#fe)(W__|Y
zZ@i=z3c&?#)bfuGA*blg=}cm_THsI62d~&AhPA@c-#^@+I?%u}KqBcp%<~M78P%dp
zJB5y&f@3s*n<tmW)&U7`u5xB(CJh<dip{-1kHLAyyM+g-bu}Jgd*20mCUSgnZs8Wo
zc4wYyzOK43HRL9+Xd4}H=IK@~)(LuD@WJ<=CtxI~DjRWM`%;Nh(zTpTB~`BWS9J{_
zIKO0c9oN~MyAc2L`r5GzTJ71B#O8HeD7v(cx(FQ6nuAwV%X{GYg<kFcCa4!-I>b2i
z)-Av7rFP!Tw!pC{!ZW2=Fstjt$$k7|`YBUl9^8}I-^1|p@%bH6`wF!=qn?6Q@f~)<
zlWiZ9x?AX=cLqML&&eygtretO<))hL@BH>g#}xAfn)t%-yA0aid5N^=t*yJKR$S_%
z{ib`KHq%LHTS6{^P6#mmhxb{~+C=_{5c&8oturd9ed7d#dtJoSPDX1Ci{kB$gd;Qp
zuy2S&+N-(wc|UUKxc}al(NO^f=jM%PQhDJ&bX-?=*2ihd9=#b(wL#xMNNM+%p@<##
zIJl2DbR2cOJ~l8=6~4JLZLGuG;}h<;w&$>s1VUb7!7oKhO~uM6AV!{FM@GcHkVvF0
z&<ro`;ZZyF_)`*Ao8RzrT4a#eA&ez(?akGJ7y42CtE6!7XCqp7XMo`S{O78Xlhfq$
zh}9V<r&*d2h;D~jANzx8XNJFh+gb#FZ=k=dva*fWi^E)A6in!QBg^p%QS-Z(UD=kd
zcrUZ!SDVL(4PmpzziuGopQv!Zc5R-<r?*O;0V&z^wDV9^(|hu(<dK#O`9dF`s(lDU
zqta=N81@usF=WKZ@(J|f>BpShg6D4b%HQA>k9qsC1z+-jSBy0MW`F;xIfqS#N*8iQ
zgX(bT(Z}*nH&;GiE~Xy%5Jm2XpjmX;iotErd9cETg*tM8pibmQjOR1S$Dhjk=GiG|
zd}6RLl`x-l;T}isyHBUHbCg2oM;n-z<n@c%GS{41%0E}X@tqWJFRiTKN(<8s@^yEg
zQbN$0SCczB^<o3|m<QtqA=8hs3_aiy3|T)7qUsVSv7rcGA1@ZYvAjYAxvDg%?tVam
z804f;k{86w&(BX<cZLzYeB<Tb!0qLGH_tV+VK?u`J0=Mz{m3|YQm6hJnfYfo%=ir*
zKG+x-_+<UfyaC_dc`x9}cTa2}HhHO{7*v3zX09_SQ|I1^QI=6LSKw~OnPP$7-uC=#
z@Atd5gF)^<CD340>(SG3-ok6p55N(bd1|fA8kxO)3#P4r4yhDFUrFuVk2xza9FIzY
z27pPRkAt0;Ps5wCM}?}#=56PG|0Y6pzB-jy@;i~QHPIOKV~fPTo9AfjPwcF%hxYwU
z6w^&E_%(5Z@y>flILO!APc^%x2t+`hbi&b0<@|ZZt_>bsa(au|e}V<nB(#lIGM67v
zxB1{gH;@M)3JC=xmwwo6wd(HMQ040iZhk?HSoo;``<+zabwN#k<NkQZq_Bb}i@@cP
z-a%1zWtPcBLvUk8Y;=&2v`B7~x(XzlwA-;v`>058wVOY0=KE~h1!X{21HjbAq7LRP
zG!%?~=nAP9jta27KaDrSo4N^@b1*$y{<ucS&j$RXzP`SZy4u>-Ye^PGRvofq&d)S&
z`=E8F6@Y<uN^D(1wp#bRz6TigA`LD7kOhlcY4kgSNHV>SZ5=YhLajKR^C9HA#)@gw
zy4Ua<``lYQ9w_7Qsufz+_8_T#;c0fhFmY)Gr}#>;HHa8vBL?m#w@kLb4YLW0c61!?
zxr74aN~O2YfL7(zv=cCSaozTlT<uG<SI?|D!G#RyE|=}TdqxO6&@By0vcyY*QMqGi
zm^NqqVRd!b-9&IpM-7)78we3&H&@^YsCv)w7PY8DQS4uh&bd^C%o_8>lkjix<4>G|
zL47Bo;;XUbET75H!HdTWpVLaam_)pJZki!@t4M)y#rg8?k3{<;&S|85?+h((oy5P0
z7i~<YMzq7RgrO(_#+Xjd-`4jvJ$7L9-&W19m)R-aD|0rsuM1cnq5@EG*?_Fw%VEK8
z0wjR$So}0T8X0=LD){@-DK~-LMuG9c!9$!Ef(``<EwRHHEKBl=)qK@10uNe9*RKJ3
z^E#dNP#!gRmhpec`tEqD`}hBI94j-M%mzwjWN(rvN|Yo!p)#{G4o4wbNkV2K2@zRY
zr`#Et4f}8i9a+adWSrl1bl;!vAHUZh^>|3<yx*^Jjpy@uT`x2{FN@2<)@~!_1JsQE
zF2Mxh@vKU9i>3LDiswIL-?rWKe4gsc;Ii+>Rs)%#^wguAyM=sSYSJvXh;wt>XyMB|
zlt@{kXR6r?>h5ecP`K3gsY4K?)kFMq#z?cqQgR7=v1#3SuGhkpQd~T^hBOG>s{$S|
zH!VJrW{TZrzvhl0C$~?Xjj;1a^SmpGv{RxT*atk-N9AOurXI?+V>8cJvVEPJF`hmg
zw{4A*qI!vuX&{#P?#r|uhz>&9+ld-%#R#hU>MWNVR?3wd@hk^MZJ#ouL)aUvb8Lde
zh(w4Z@27iRcItfI`_-xlg3X6lG%1>|f<rKaNStwyn$OG?J*oaF>x{m>J}#_Pea9>T
z+kHG{N8IC*un7eQ8nVTnA4^O%biDfRD`_X5ibSOfyS3oxqXnx(?I&%MN0-}ev>U3+
zuHO}4l`jt%6Q<<meSPcro$o<-&6w@2qRzDE&lTLAozw8TJUovZMI0`QB`V*(u)H^i
zn0$(|-P8qgcnccsQfDKXf-5uG-RSl5rv9rJVMCRTuC#WyrkD<08^CUU{<L&Hxo`$*
zgsEH@a5L!^CT+@$Jd#0)+n*1woR1a`-(Em*BWnFKNrTH?U=>5Rg>JE?4(9jCg}{Qb
z@%ir^0-5d=H<|`nyH5|A7jAUSse*e8@IU#ALI%z%(#6~Y_k8E-*${MdU7kl^pje{$
z^|pORnmeoKMd+AzCdD-edn0CP$I>kvHZS2x-E<ACb?NO>anB*M$Z5zw37>MP_wNR-
z<m;(+87xau|3{8W*ur3wvH(AjIvsOGl5xx^kFJpqVkqzvf^FTq{)p=0+G-tEb?<9D
zokBfNI~^OZbE`hx-g@QaS%Z7}<elWf#vgsNz;vsE|J=nj!FE1#hW7C~Wi?M}@1N5~
z&;L%sUb0PQJA8KAeLuAz=iBQi#zO^r@Lurd=7Dc%p~pxt>~pb4Imx79cg>P_@0L8r
zv~11A68F_ASG}C8RP9G95v*Hpp0b+n#SpiKL5T*5D7o$CDf|iLRsJy-S$^Wy?wT>@
zN%uQ<hHxFHZ8;aGJ5ptNxc(?HD)IOWDZL6KNPf%n>%u!9A3JiD>|T3-H-*l!>MN^I
zpjUPxJjbT=Z$?=-+FtY?x7#L07&X<;1^fS|%u5{@YE9T#rnVttZg`Rn;_Q<UR+NY3
z?K<LCIfm3Kb)mxR#iksdq{+-g>jnV_&*zPsr|!k1&Lisx>9yIBSJq|9e_9KXPlkH?
z`l3s~L%7%q?B8r4b8iloLA6GT`R|U$?F>~qn8F~u;??@Ge?qg)8he`=HFB^m^N8-)
zW+zVPHKHgxq*dj@o&hRH_|sg|-o5|BbBtL;GTK0i;E3RP_5HV5zuP0q*348zDA8=3
zTr^(JI!T^RS5-}HtFz*+J>H{*{&k1t8WU(3_x(T_J*7<>yp{d6<JbF&iq6@Ck)o4$
zsp7J%mLl`=vI!{QI?%Edk10WzXOAkAIxQvr)?iMDqu+JaBzL$RwmmbhoU|Mo9<->Q
zqHpGj%Fq&?##aJQw(?<!^CH-^GmzyYSRdF8sP)vqJnB1$S+8_pB07bp)dI%9@Hf^C
zPhu{fzJsS^^$beE*(=lGf^OT(ZseKxxKZVhk|X=q^lY3Xk#ps+Olh(`sqi5|DRNLl
znEN4PU!6kFIm+_vjUNa0z=&%CEg>W|V!!r&L!^dK#EauX26-k>?dgn}=ZN~p7X+<$
zM@UB~=YTTG&`Y!`NH(_LpgYk+V!Rd@^Y)T^byj`U@T6zWWx6BUl*36G2@-$P*Da34
z$6jySE25gDx78mb(e+o$_awKJ-zo6iJH(BRm6et2?^*Q&$0-_uzk}gD9n8R*>g$@C
zd;ZSOvrpXO#HXRVf(ZSAbgw8E1=ZZQZ{NzsO~e6|k#zFp$-)yAK9C?^Q%Dn3a~UXo
zQDeFHuih#nt{)u$JZBaZ6x31cE<`=+?d_fVByBl3y#3#Io#}f(2?~1WYq9Za-CI}p
zQ}YpU9F$D%iSf&`%)VXotU>#fn5y#?GibFJ508a(#{BscfMAcJ?yA94*+@0#nG`_L
z1b`Cq4&~;Eju>#wtWAqW-!uT38JJG59aZ)_t)ru}>8~1Yx6wVS0sO_^k<eg*q^!=4
z(8dW5nb6c7hx^P5{_V<&0`!lx=jZ1;lYAi(@xiuf@h>W7b4c6nUo^R0lsdx7B-Y5&
zW+_5vNZrL0W_z1Af``j~LTUE}6j<OGafEc)?9q-cl+eXI6Fu1FgY|)bVEs&-N9+7c
zm`gETIj4S=2DnVUW)!ke*xPFl^~skhPx2_-u3CgX>@XC1E0lX-Ccd0rcsQ|vvv8|%
zxGTCtqiqX8T&ksHKX{uTinY-f@=W>@q|k;|P+`*siuNp2inQIQr)Lf=9rd{5<+Wm3
zTwLr9vlAe@#SMZ`y5HUbXU}PJbh_}!Zpmc>z5xgH?c2ny(6iWL$x4Qs_8T&fg{Jq`
zC6`=)fJcU<3ZhdgSuB4gbP`v&c4ZqOU49q)4bGO%yA7l?<uUGyT%(^Gzc(6=y`4#j
z_3c8Yn_dUmdH(lWTJ5pWdNIS4(vQ%d;<!W~e4l>j5bCw?^bv{4P-9fa1Il3tQmVsT
z1Zb}BgISYwU3QoftG_S6YE{X?(&dT_eqPY1!<iwTLw(gRmvB#YUE(rj>+05EE@tz1
zYN`D_RIQPFz3T30?Wg8Js6HWwW}>=Snil6p=rvI32O`F03TF=|o#*R8oL~&IRUKm&
zu~1T@SCVCFB6Btm)A1rZh;K@i;+&ojj*DIxHd~aN>2T4fxeIGn@s|4;`U&}4$lEnj
zTo(Da71LGw{*({b-<U#DyBfO}>{DDWAV|ry>nBycWfm1SKYjiR2|9bk_>%fsQu~ze
z+rmPAqzjGO#RjbLOv)7rA6u0S^e5#iuNT+8PPLqt4SZ9##gJvv7}7MBzLFWZP;*8y
zpVb56p+(=}lW9B4!UZ-7R<5^tUzcxxL4Y{9<A9Wfe8B2-mwflF`LYiOch<N2onGq2
zFBHkvxiI%8R^H0(s>!db@v6^slqu<(9`T*^P8KqC5_B)psJ^50^WbpbcxI{7udd=g
zVg%Dey?7xjoe*Q)Gr2=oW~0*|K14*F`FU&kbG*|hX`DKJ*hHwsQBL+x(sj!8VXBzJ
z64TC#woRX!48Ww^UIJJS*>l5@&o(!eZ<AHP>Xhv9xlI`m#+)@UgXQK*_3xC;7-@KT
z^+i#8V_e^jr5w%bx#hJ#o22}ojd|CXni<L*16SJv&#Ah1ySR2DWa_4*9Avy+YLnM>
z&)m!~H7MzVVB)0>f&><BhcQ0VhwZW+jqvq!Xs?lkjiy*YX(k<8G25&y96`aStDefc
z=K|Z2xQUVW=Mp=a`Og|y!QlNqUO(E)xkI>ab8lxetKronVVcUb95%4i@0@0CI34}4
z$nO+1u(KL%T6*;A21GqdrJ)Oo65U80$f{T_=6~n2HTDZmGcWV!!77{0mFHV>BN&gj
zQ9|Lpar467Y1vV*>Ri&kQkrYW8$m=GU|vAJ!tcjsLU#ht?Rah>C(Qiif-7aw1Dy<2
z(TENglkvOu_iO<{(H~z}v*Ue66r|Ysy|Mh_(w;$p(1EIfkNTrS_^b6#^JRd~{1Ksl
zh0yTO3%5E@T0pp`Dd7rq@Xx#0Q7tC}gZ9GkUm=K>9dqx$4P23^;4h`w%zH0+R1R-d
zy#q>)A-g1Gr;r3Lxcrj<1*Y5^eJKjsJZuFw9DPL>kBt?H&?_J-PY8U#t<)J~ofzVc
z3QF6F2olo%zDV_#j;l%p@Bnm)S@lavH*A6DT~Q-Cj7^KsyV3qikV+Ruy0|#n9Bq!U
z8-Ryi!=7{W)!k1mm4G+&Ji5qcYc0^CxIX0v*8cDK#zQFzF!>A)VLF{(53SIA4r>>#
zBmP7Rtu~vkdV;CR@Aw>%i2aIf!IUTyx)2S>IU34=X8t??vGF}D0rL^-gmFjVseP%R
z19I&qHM@0F<iw;;z`=EcNgL;N7gyKdm;H@@%QL`oBxrnNf|N)i`C`5qRV|k=opvdJ
z`iy^uAi1f!JW!`bA2tsg+l=*{B0>=nBdm$;hu$hX_VU3-)MDH}6|)pvhqfDjQ8Ki>
zQ6_eU5;)$_-k;7Yhv<*6Q)0Ef@JM(nKmz^L!fNSdv_BU`+O{`tVsGs`0Wq7ej7O1I
z7qj<&p8;m(%wCvOm6jTOGdq#e>|$>ei0*mlcS>=JbiYt$)Y=}?T7>^;noW-HIcYB0
z@)8u8mIb-6FKmo3NuoYBg>3&Mtrp@+*eivke2Yo(-cy=zh#6u#B#+)cz!X#!d71LF
zf5)-C-FE&$h6)#!R)zJyA17STRi~&F{u?(lUQ)cq|3;A)MPsDTwJ*#TDNV1W%+#a_
zS)*#H=HvNsIo4~u5#y51hxPvN(<nYi-)FGeL$%05mE~5^SY3kfMV0@}L&dqhY48-1
z=JhXH-0&aM66tSJraiIsc;)}1mScN!0>p*X$-p2*4-z8p|7N6|@=E6M)Y_$~kPIWt
z&>bBvU!ES5dRCQhQg9Me9GaCjcw&r%4U=ZVFXWKzupR`QE_MM+>U44AjUfLf<p1wk
zz8&5R(I6;{M5UqbN*T##L$cq;aDx~>;omRhx%f%iTQpvZO7eu7<vsyK`o6ke94^Qf
zVx>ld8<D6g)CEV{9+qupk)-p%Dj65?!-Y<JB1y^@l+o@jhfpKJ*ZTg-@nfMU4%^sn
zo?MhJY+(73%&o~FNzJWFK;L#`<^D<LQ5smOvq9;dZt$SOG07p5QRV4g$2(k<7R6<2
z<>|wgAOn3z+impU8s*;Do1l0js@S?lwIoy-Ru{|D*#YeWnT{~bZy^;Eq||~6c_zH6
z`~cszFwzEJWmC-$aZ2?0Au_ERqrj8%c~5?OtLW^mO<tyeZsf9xinZPwK>+;z(;>#Q
zYzmg?-_ghLcb0e~N|LK+v^^L$f40~`JeXS6R6?!2sJu+`xG?lQX;3=5A*^~nG&yuR
zcCcJ~{|RKJVmasIyIq3j*VbqH|HiR9og(yNs7OUcMVs$KLvMEexA7)*2+ZD8AmV@N
zj7hyKp!vP`qcjtl35UY<M{5X{=H!QRTvU322}+pEkY=)(Jv3<(K70`&Tbr|3{UXwh
zL{f8~wVwRniT&oZXLF{~y}p0{y_%-}V6v4sZ@dphi_~NZ8?%^X9AAUD5*U+G-lzIU
zSg6iY38y5PNOLH?m|S|z&_XQG0*4fOpG9SQ3@~Od%m>|Q?w!Rp@vN9H_K?Q&bj+<x
zCa<)s5Z-=)1W>ZQ|HqH_(S&=+REH6OwkEXQd^;^siGzkp9wvZK5{_~@*FJ8Y@+^Uz
z=7OZ_YA{*J_S9x$iBDI}?5Wg_ED65L)VAlI%{@HuCpXFKpPk{CUYk_{@`tRA8|a>|
z8h5dY-<j6j2^nfzGqb5&{yiW+H)=Cd%+<Ghtx+9v^@Ka7I_~YdXZnmBebM`=;{hab
z-h6jOLY{^90@~*B`4iNmC=FT{1GGY`t*pT-lL9<!1ZD<%x288+2}OtECPD&6B@*e6
z@BGZgH`@9GSbr9&nm`tuW@Ji0{=ApD0>$}IY)&S|Dvt6Bw#c*yhip&yL65O-q6W|8
zm=4GtO>VncS+o0RmOPp6j2LR;(TbSztRdHI{EQCqgrbPiGS8_m54_@vD~Im<PS6wD
ze4NTGMpkq@CjF_SS<t&jJK4r}&_U0wa?mjvsIz3*=X$XRdUtn-A>@?&EcBo0GdZ)G
z30=x(;{<AK+n$K_cV%gYwAK<ERK1}K+h;zXdF**e8MgruSdFK!H81X3GY8j;I}eWq
zl!cJZ>biqh1gx324Wclvk?*F`O4J%jIa6+n&)2JNthhe&(7jAm^<?O1*rerl!d(lE
zl`p|teJ5>9+Sp%wi#66M+ToEcw`iy<Qh4^fRawc)$Q`d)w}^AhsF62N7W$E5wbVKv
z$b_DeC_PX!D!yPl_%UQ)R@g>RwRLvx^=kP)L%)#j08n>z?|B20+C95)qe~gzacHbW
zdw&qKf8sVBMxa!DlldF+)s=7Hr6QB7Af$)hYfQ4kj*wavJbU&`u1A|y7s_mD>jMrE
zXG!j-ePIQ3vG;dIN*=C`LYFg1J{9ljTiq*;K(bC}_>FFE>+)Ew-dftwrW0Ik|MTaM
zc;_5)44T`GE(Mb@kkp|TfJO0=tR4%?dLs3MYHVxQcB~uDg~O@Tz~|4Ocde=sk25nf
ztENL=7|nZ&AL&TfE@(6gDYy(A@2XjDR2Ou71|56x(Du0{l2v$@6AS4aKZ~FWC&AJJ
z6#AX;Ot%4*zL-fENz^}5nJIVs+a|0~4|x^uZrEq4M_%Vs%z4Y*u-)BQ;D7sAiEHKB
z#rQR|EMhu2%F>ln6T$wg9!++BS+(1F7?siXLBwYb*MG->R{zu3viOAFsSCCBKZJKq
zs^z;~y3mPOqLuOP;}OQsC}r|aUjf^tVYp``tMb#moKgqFvF^1|oA$gBHq*0+-`;;T
zwf9>xZ$lZYvj}uSsY)%*PIH%n(q)GWN!IfH-3BfX^(z*GL!507^>DW=K@;5(O(5w^
zccDmIZd)$7eTO2jB|eiQvq~RM(4PZ*bk~@3#ME<c68c<AY20#luEmjdw6S^j=~zeo
zeUv*v>Z&4LUl%+#@Qz=JuukLA6<j=^;XU1BL=_zx4ZREAy_f#A0(8liqpvGijk8?a
z5@&d=%`CFY+57N3a~82dG}dGCeODVyi^$}Y!POcn)j!RD6iyxREh!+djL{M^LeYi6
z6d6R`^FRzYn|W65_usnks-9yUSl+QMY%w(QgSdB}E$i6v9qB7~-=g*GaFL;(y~}f-
zriwE9MGGHD=5`B|@Tk*A(?YxqXsG7F#`mLnTFLFLz}gVXe$oM2&bVFEqo*_YZgw;K
z20mEwwF*H48AAh$-P~xv3V@wU(&@<5dHn{Fy5@EiMOpwU=+NcMW@1qfi%LF8pFnGq
z?5uK<PU7Akz3qi<D7bRRQzc{9^mb`#EH#LJTNvVl3Ao14_@{mX)E_^F*Y-!+Z3L0?
zB@dxGNjp}doE~=2?N?4-UeNS+jeSep$Un-Y+khq?H;;H(ZRecDZLYa7c3*vr9k6)`
z=OhsDqztlQ{qdx%n4IZN3TVh->lf;xFpS&NQNTvIX-nzst<MY*PQD0z0}u7bR*m#p
zzx~vB33uQih7lT&Sl2z#xs2?X^U9lmli8in_+={(G#6W+?valP?D|S1J->A7m%?On
zRvWTpO~Hw+fUmq&O3)`nxJ!^mfc~-mk!P!%13!MqQ4JI>mR7LZ1DjsT<{`bvDr@0T
zXDH@%#6!x1YViSa%Sntq${bh%DJRez#y7<L>(fwwlizmaWO8yUZC&m20Z%gPhA%`e
z%e&u9WlCjB85hgFRWc-L!QsP?osbu>>e~A`8F{x7ZD<`$&bjFAr#3@4r3^a2PfMPR
z#%_Dc$D<TiPuZ{)ARCBAv}d8j1?;%v`g#xn_w>`q@irdg4^#I>eX&kRPziKT&tD5)
zdV3tl{PO5?Gc@!SH3Fja@aB#o`s%GrZr!hR46T0yE0d8uJgky4`OVDQ5Xp`=Q+rop
zzMD#q2OUS_d$-F<AYy!Qitx`}wPs~fXl1(Ve_DVOrA7zWuPwJ*$^(YuVNQ0UGDBFP
zTDZ`@RBvh4nVU+>tCXYleO7`)^v3wHj||0DUOv$oEE|>t($pCfj0p@~$vHaXiGT6w
z+Wj`3%g8u1*sQHTiVgyco&qgZNx}eD-vQ^#wHz`F#k$r?a-*c3sbGPB!z$W`l&aw(
zJ&3Dw6_)^3iAUD1=a6MF&~iTc;g6zjlq!NF2v3&V9CQHb4l;ZqYcZJooss)Qwfh3D
zrjXMuv2D`WksC3OErE7?CAHd=ep3ztEuZTjUwo7^z20zSc57Eq>#%fwaq)cH@`U5M
zm^9PISx#LuBs6|HI4sH@RoFQv4R|i}365-t*+a(<9dgd1KoAve6kyQ25#BXkg>X?;
z_UIhnl`FSU8)A!{;FmWHu*B%t1|ku;&CN?CAgudUa3uCSge9Oa4WWT_uHQd1c|o#L
z9HcOT2{y~DW8zi<Ss>}d41WGehXoMTW+;TbqxpScVB=7!`KeSjvBWmqbu{`^s(`a&
zpb8-qpzg0T%jd)rv#6U9@i$MU{w|Cr1&8o@)OmHumIuNc+xrj@0zN;Cqkz{@$@cb*
z{d`vd*z)R?uX@5eSY<$Ohe2Kl=rVNsF)+%~2)-?9|JOcT;irU{ciP83xo(9z9y~N#
zZz-v$nV_HkD_xboHA#p{!_Bb;5#rai1GW}trZ&BW7L*no=u!6b6ieJR@UsVIr}Cq+
zA9HE4*TToC_T;Sf*2D3uw|^h+URkHBL_N%zPLPaKOtca5b=Z6tDHQO!iL}FoeTH4n
zYXRbgL+M=)$wSs+a%<NB`xKfK0gX%Cy4*&reJ{94)l=I*Y=j3n3==Y<yFH2(ZYA&F
z3ocnc2ZffiF{dEzyBDxDTNEKrKK4~msmG`i;G^BB!nxs}Tf60V1;j%IJD`5vF9_er
zxM9ty+a7S>@1DyYdQCy#bUp!cD{5vO{gCM?xCuwhvrpX;;DK4BIJI8EOlv-S&r0{m
zz$(Md@iv~MD5<XaD5ol#1Pa)}4i>+zPMV9n;QD(`BWadxrb}S+9Md|Lx=rnon3}<(
zYwUbG3n)4RhV~U7U*EWynN4W?<>;7269IhqcoPMK75!}AKy*LTaBb;PYQ@v{bd!qb
zLr@##UJK9;iG6^7prD}Ow+)Dvs*?W6$8dz2g&F|SiF|vC(yref_MDLJS6R`eEkrz`
z9pi_QQX#NA`bw03Ar>FcB)I-eALi%X2QDS5Ea%13AZ~D~`xn&~Wo#uq4s1?gyHH}N
z5~QZpNXlR9(Mw5_Vf=Py4j(ohbq>fhvz(c6wPjzNdeU?j?K_M{P>en_&f-JJIwN+k
z_t~g>aCq2_FMV~aRDUc4f}LW}3)n2-<2ox3YI;rNLF6Op^cbem8I~HcaYIk9MYe%R
zrUJ?VnU0D<bOE`IZo4f-Wdqc?pQtNgqk*ha8cw+P5;H8)_)$fg0VH_wN&m#J1(UCg
z`E}JGl&U5N4r5P?z_!YKvhG|Pz+&YQo&|mHhtTrxk^!w%6V&}G^tt)wDdo<N-HYf!
zY0CD7k^5?Iy>-JxQqH6IJv;tMIz8ij9@@#V$=qjt-IMkN$ZC!|i=Jak=+!^M4rs{O
z&S`o8M;*dN(-olS2yePqURsV{)zU_nC@D({xBKS(hVk?)?o+kq!fihB?v)0imZfs+
zAeKusa)HJT9NwRA8_>R<R%qc6iJUUXjSW#mL#`BPo8}f_O%2@&xcI@MIgT%&96rZ4
zEC#Tt)~ss7k*XqYDI%T36WcBW+uZA%ve(VyVe54YG@$RJ{K6yfxO8o4$2rkwNC>j@
zO)2}2!ys95gxkMxuII?I!8sq(9~Ix$L3Xg!&?qme`#|z18Hh8&Jw;VQCC%=4-MQqB
zqOOk7xnsmFVq+s}BbB(-kBkiuQX}A0fk3@yM0oooWw37w8lkAqZ|z#zw%ml2Db?^-
z3u0@b*8P*RpZ5!`mBJ^N<7lA)vhV%XT~1x&AG%&lwvPFGycC2T`kj9PHh>j%K#S7<
z$i_e1?pnj`;)nXZwV>UxADdcUHxw&>y#3@NS5(e)?|J)tqpZx?I~|{@OU#Ch>~MYo
z93caCk@x;m8^g^V7m*66q0quFr-c!5isiy!jlVkjhI=g>cZB{f3p19Iq|@hISo{?e
z+bkg@yw<&9DVe;|ae~g@nA7w+?_F#*s7vBgnNPm2a4H{cq0|*%rm(OCA;z2+jyZR6
zyV02`z1oYJ^iBUhJf%dFTiQ8$1t6_28x0|r3Tf&J3HnTQyOG<!e#WElaXGin-FD&B
zO$Pdo#UnlS0+kq-BRWVNV$?fO5+Nu$&cX|JXdY{PSK#tP8e0fJlMS;pcD8$S`!DY9
z?0U!s=H{vcfL#lmHBs02O1g|a!qGR0xFrQa-=*=*c?N;LH2hyntzSWTD#})lPxp%M
zbg{8LybSnkap-rjSCX#I&Rc$i$I_62Pb+9ZO3jK&{r0xaeRY>@Ol!GZN(JLM=1stn
zLWLNiN?#At#7N*y=CL@}(1XrtcmLsO@?wWw#G}iV8h7>D+jf+%#AMxhB#4_RU&p<7
za;CyWcz>l2w{~2ls`Ts<{c^gagB!xbc}%O(nVZ4S+HM}Q^}d}(-0G&XSygjyTY^-z
z`D7K2ooC3Msu@KB1Bku>s*q@)yE&CwlzCzWRr|mZ1-zsGi8Oe5bg0&))x3ci!l?w2
z0Uk7%*ZpdAjo&}rF0&{EU#~TWl-&Y~6%55KT~1whDt-uoG(66ZE8pv?$%3BjW2XtY
zN6QU+PkiQ0>K=7)f<DX`I}%<Pjd3f%`hf5T7<@1&RzY|_tRm0J>vCKv8HQp;M{KtJ
zK`m;O^iO7*p%~B<g3$lulgSktYegMSCoh-Y<n44X0Gt67a^U}_XS|9Uqswz`Algvv
zZ&{MsI+eM%X<D_6DUkAqoqYp5)il-jx0CIxOtn-#+~QzMK#pype>;wwBV_rR;GP4!
zdqDy%@epk?jU~&e+UkSLSBG6Y*sBzN`fN$6xnR$=b1sm+{lXqZfbDN2w)_9J&?wzK
z1aF;h*|{PF_!Toc6+S@Z>W@Ae3dT!moap*_)a?}dgop0!phF1*Dcjb<c?TGlA`w$G
zkL?gp$6XVD>;N%@W%{l?YxjyT)e~%cu)yEm88@+ei@f6)bnSDz2>m+sP^d1J&5l>Y
zB0w$aeDA%?sx&V*24cxwvh!HH9dFm~oW=_oURgk8n|^A?On^e_CG<4=p2Iq!48*3&
zwpsj8b8KwjWAZ6{u_QW_4;yPUI-hVp3^W!Ise=ZNl`_&u|Ae4-_R7*#`+eaO5mxSn
zMs2i$ej;&8k4q?UGDJ!orAj5mxfP#tE1kHNfqeiby>Zd67uCQJbim&OQb6)Z;>*%W
zZtO8*&Yh2>opG!y)_d%#2ohR-Vw5Kpuiz;XU<9OfD~^?=kCVAP)%RC3T>JE<{PN#j
zawm+kBYNkrkIhoFYx1BZZED;qpP=aFX&g~Hp={V+SP7c_vbEsyD6P5V9V<DYMw1_;
z9}m1$)Qfpl@BaJ;rh$|)nAugMAiE{*amd5yP(n5L^lZ|XLnbc|aQNKEY+;z`RFbWy
z@18va)${FO4R2$OFa=f*4A<3&aX+CmquWX_xm=4LH>$fmf|YS_)kHSja7@IsiO&0h
z{OG1%0WxKffq*+Pmh*`<+_#lS`65`%f+xOu4Yp+r1k2`*E-*yaiWC1vbSL*f!xV}j
zhd;5?kx1n`)7=1!w_hudW*18G#bTbh@qTIxlnr75Qi6nw`9wE2x6#&>Iy|c7IkYa5
zqttZuDH9wYH->H2)>tmEQkE9(+n7;wTz{OJ!A||LLz=0C*i8^Q)O)g7+4fk4DWaei
z0$D-nUeZnt*0<=7qPGL2iqEnx9}N?rb?JB`dqV}v_ezV4T^-D~@4=c#Iab~~7bF=s
zk!qFe?J~Y@914B<onU*Q7w%b~u~ROlLRmvl(Dp466udHdAOhja_O|y1C6Z7ol#_-n
zpjrQoS_~;?LIRKiLB~oSpeW}@9*Bn00N+z0j4u|Uk2_+sagBaEDF~U1vkD&`gsj{x
z6xa>F&<ThI80Vg!&%0=p|K04DXH5VciX)BYTO}X@el<mbDxyGq#wS}FiIyCL<Q|sl
z*YfTYv!s7CtN*+v0a&AVe2&z&3=Dr|6^51b#)st|9ad$L?`gPy?NCP59|)=Vjyft(
z`_uYA+8$c8+SyLr9$Mi#muKRG^~bh5mJ1^lrp3IEF5cvgz!QHCl$}Pa^0+lx-^v2b
z@A5aSw~E#Fvg>JJm)?C-`#)Cc0VQL)@(zM^kP5hQe0i2H0rv-4<^>02`dzZKiN}wE
zbXgEEu58{>9IlS7hu*tpF3M|xPj;B674{REn`LeF!}49(;io|#{~CID=1HABVFOp1
ziqA|SHgIP!-)o@+JK=cMa(xEHzx#Y}cT({z@kwe$0$w=xsPXt>zz7fmxPT$+`GP`1
zbJU|sE@6MSwS$MaZr=lP_5I!0?IyZsj=!h-S*#?RC4!x$d(u~)X^f7Xdil-3pxm3O
z@P%KXq6VAPd`ZKMH9F{?Z(9AI!rBU~F?nF3omPbrAXZij=6ZSfTOOozu9L}iA8PXQ
zrq+Z`SBW&<c1y<7!L3`07E{p~=NlF0NJO5jkljrqs>VZ&H<KG0ISbfn-?vn2sy!c`
z>EvQ5`O?VfJJ}x*y!n}6Gu=9)UbdRyRNX)7#oXvG*V1#~HL@sXJjJ!H#B%A=N{iFP
zBdJ@{S1mP<&b|t|`>;4DZAL0gNqQzZruLUpcZR|ORl7sQnbiZYS6X$+j}&)?kIOL2
zFQ!>O57)a`9O6HznANR9`p6g-&~z`;Vx#Z)eZA2Dfwy*ByXTjhk^*C^q;Ay`H?!Q|
zvGbVC7J93D3`70pY6S+wGH322Yv7__d13pxE{7Bb{<(pdEuW%j2lE@kL<SfKg#CfN
zGcaJ8`U1T)N#}+A?4pG@h}MlE)kC-QGj>_#`tUoC^vU{G>ZH5}?>e;z?^MpBi$A(*
zG<;`4WkeUD(5tILi3;nP0@HF5U&lTcOSLqrPCc~hS}4)LE4#-#T)UwkvPK%0@p$XC
zv16m*Jo~Z+on3CuBd|4GlgwN=QH1Gw$=)<$i!W3qO(@oO6XSPB)f0lthv(KjLRKrT
z9;v)J5uL`uf0quxR}eQJ5*FJtOF!831Td*qU!yMvoCrl^(J3G$>BDSUm(8v?V5}Hh
z{FuXRWj57SGHIWEyYMlml&n{9-p1(aRbFvm1?<r)8)FZQQT<4Q>FiXANuZdUiw>Pi
z<S6gemTdHV54-#dSv}=*)4C8-JRrWa576YQ)q`oP``ia&cYc_`-nx@LZT4jv7f5`8
zdl9k)RFPZZ3&mH!I3(9Dd|+IriM~*~F$lS>|3UA4Nu8}n-Ez@<i_+4J{`A#lJ!sLz
zBOVUC5#0&+z}$wk1{TS{ZT~@DtJL}QG&AkYzvc-3bT*`PT6hEeoIA;5BNI#z^@4Yo
zAmg^1{75I2!p+{iVoykbjj_Sl<66$+DRgr{=Eg%J>bCSigq{}JgwfSHkX>G0UenNk
z){u6DUf64Jt&+Wyj7HLVd7hOE`QJ;5#q>b#+Z?vCegHLhuG@g&Dx(3ff#UYZiSM*3
ze;L7qT@a%Jw#L}B_*R5|L9QHM`kYFiHzI^~*K$;d`s3~-qX-W~aF^O%7%&72!B3<>
zj<WKA<07vix@jq1m5^zW|GgY};J^W(%Md2u2-L@phc{^IHt3hNDl0l)$SQfLe{v~?
zOcw+(^6RI3<UQgIzCFPwg3^4jAXuiSftH~7Y=mlm#CR`73T}-B2HX+EMCuGdd{||_
zGcuKVd3mU1e(UFh1={;NSi)?%J-iN1KJ!)y#vhnGddMXDjw}8p7b*kH9Sr@DzXbEM
z^LZLK9PcCgKaaC4n{kKG`g<`)+!iP2_0v5%3PPL>XrN#OZI0-fXlDR{fkK+L@2@U^
znJ5EhlQqm%VY73f>Yz=^Z@KbfN=cU0{r?k81?;KnBpe;3kjz{UtOxpKU|#m~ej8Z=
zE&z{cEGR9Z3zZ4({GTz-ZQKi<KBs%drwnRnRn9$DCj3ep<Q%gB@=zM_`>$Vyr@!*A
zYxY{um*qd(Z(!>aVYjh9)I}+_-W&hM+MlmmSm05k{L^ax{ll3%v5GzW_NL(9KUNk0
z&20bv%Ky(DQ^=10jRc=KO0e_4QPxF?R{wAGDf{eA@v7B+YE9|}XU7b<BnH#pB;CQ{
zZT-Lhg}Ol5*>{F*5d=$hY1R$b{nYnq6-590mW015*@}5ZEK@cIFX+k!ga_<J69308
z^WCEG0zXQDN2Io<zUAiDAPY#-Li$$AOczLpj@pJt-sO#G4`jphMcC=FzPo>qyhc$k
zlzj;L-=qFXqr8YbR6oANIDwXcN6XpSnF%P*nT2!^|3KF}<o4U#8d>@<BFgN6U3HEe
z8Wco6Ai5!770*8>gBQsIL;6n6mvXM|3J{ra#?N&?C8DzEp^}mLJ(=)-Tqe&DMPB%b
zMtkwA2>tGU>W?g+D}mfop${{_SYXniWYH&YCv}hrft3&xMmwR_u%cL>sU`TbZqSmk
zQaM4lvP!zQjA3K5dGFx6_H~HaY;X9!AUk!=No30>o%gXw`ET|BciCf2pkS7aY~fN+
z25j&mb%U9IHJ!)x{nW`<1{|n(Q3Apd<4N%ROiINbMKyBLN#`?|1eo7LLy0pWjR#@g
zES5&gumOw2!U>qgG$NZqOJI{lL%>GOLaWj-H{tkfrqhceYTg^>9Dl!5R+XVzE#o2?
z1&sOyss_{v`lcQ*NtD7@e<*xySt8ZQp8L=f&Ry!zf4Y5$p_Nrzq)Mc^RLb?&dm0*~
z?q|VEclFq~IFrjHvU@Vm>1ZR_9De80(?2Y7IQF#cX`YsWL9$$1hkB!?<2ROr=e|2W
zsrepc-7VXV14R}AEiRing(fPuAiP4|ee%@_IH9f@vND02hfA}s?Olk;wV>Bv49$)C
z32;y3L{UW}s@1W)DKr7A+f{S?(M&@kDAakbR?T0G`QK*E%ccf<v90UM7`~}WA2#+l
zXa1yCCEa|LuSq6K$g|7p+s8_hp-ZOz5w!U=OI*WG(7BEtJ8=;rvk66&^Z!jy0`uPI
z)Saa1D(!PiI-j>Lg5R>gE17Iz^2vU?(t})B83u0ppe?<v%FxF^Jo@ws2Ls6}_^pdw
z%wUlHea?h0UMpp-$*K`$%Qd?@8(Z&+@^dl+jT}B(cxsp}&@1pItzW;+kt99(x*$hL
z6;YFv#8VEu4ej}7s&ImTe`~`QW2x*7Ej-7!f%XV6&iKgxExTfUpDq8gx`8M5Di&&l
zy@qCZ?bpS!5Wna2zSeVgB28}PpMd7VJXcIjO^xj|t92E8UPjWP*U5cM)e}jcdf$Rw
z-4Cr0x=R*rTL7r%dQe&JoSSII<P-ar3FfE2mPk&jM70xveLn7HQm$`foJLWm5!6o?
zE8<B>OVh|Yl4(<7c0bw(lK9HJFOP2(KX$^S>?2bS5N!<ANR|dU;;vSuD_zCApC;aP
z&V*m-@_EJfb+Nwi`|hAEk&qcZTXPU~2EKQ`s0I2+v<7@$(}A!DQY?IN#j&x%$&mb!
z>GZVG%8~1V64FrC@S$sH8hdAK-}1R=D@KRN>(2S0ai;PoMjo(N*@;;kRpejE%&K%S
z(BQS#VA!VTz1F2Pbc8+XnFxKK*aP6TD@YCP!oAx2Z*oMPapu&G0+Hun8;^~tX@%qb
zBa&Ehd#^uHaaus<P{J+nK=Y+{Kw&HKD2JrpKQF8YLAIHou5tWD&em7N*eSLx7aNCN
zlB4er3ulpsn}gCuyH9)D8438BS4l!y)WPfF;WfX=yTSR-^`s*HS<Ivx7!HcVjdl73
z{ra3teB}^}KD>$zBP4H4_?~0R*liR*9UsdmlMnM49;+5UnbH{O94Dl=-4QKS^+z*H
z4f|e_hJ&OTN~dG`-Fm&STe8}by~ZUky(TU%obeZ7!Cy>qJx-UcmuoTEGUNV)pOIzl
z*yb~_fRysht}veMFZUbdf~JePZ7Qsy4$V2#R9(BbD4M)bZ~KwqXIl9iss*YBvjwo}
zxt_mUVjt==U2|roWAW<B7(u6by%l;*yroIN^<sPZJx>sDXxDaN3^naL%#e&7l*`Cg
zpiVGozkj6UFN#FzM}$DRep`s;JXCCvkdetBMue282JQ#3rrjAkwXBos;}AF|i@l7U
zlbi__zNWKF9H8DtAO5K-G*GkOhHJCGFN|eIP)9?%Ph+16^ES=v?Abqg>2pad+_Vvb
z?Sd_Tc$dkX!yz7XEe5^6zIiF(`{zvg#9a-;Cg1pR&y`KAM;0}f74=JHrAYg%jD7Wt
z{3_Us#I2v7>{7bX@1FFgQxwnN$dJf;<;{9PJd$WZ=hWcw(O1vQwdUm@r$FFlT{Nxu
zhGWgdblk^%1Eden?5l5e_xIZb?mWx*LY}G|oW+>D?-ggakolS8F;mL#{{i;`Jn^cS
zD9ql^@mpgCeM6|wWauV*77DZZ>w|mXuu^Pre%t~1H(W}CfgyaQMu2m`ww&_TplfYm
zP}dr&^dsE)eRa9sNZ}mRdoozk%|YX9p{%u0{0Oi?n-|x&XcsR)uSp8}8ockZG*dUN
z33hZ|A%kjL1UQkQPTmlv?QE;1WK1-m#h+biDk>_5`R&ojnAzL{np8uvkqrCuWZ%_Q
zJzDBlH=3!A-HYghc{6YNopR>0n%P}N2d~RWadJOj`t-e$eA4UJ{ZC}ji=*<3jh=L6
z=Z<bV7bIU0<YCG*&Z+bqo|7*<b7755$G5+p?so+gg{6I9_;W+zP}#X!PJhGzO{w<&
zI=A9AzzHt*4Y`^WTrqVQ^zTYn7D}<XHbQF);9m){F!UhAVf=k)Vf&6q#%R}lv6c8Z
zqH2f|+@o39{X7kTz?&t~5DUnavMyc$#ibTpyp_#1(8lZK$s5ta)lVL1(YVSHF@72Q
zS!+0|tgBgZlw;uJqeE$JHw~deB$Ow#KC~P{2I7W_9WOdLXnA7oG(YWziu8f%qfAZR
zoTkF(cMnBf9a9_2*R6uK*T;Xpas1cR|Hl6ScYg$~KruoJSM(hi<CzRZ<;KWVA$jfJ
zR`!if)rZJsT-2<PX?YEjV|HERx6n@Or8TcT11b8^7f)<8K+x*8^F5Y*L-UxEq5S}g
zeMStrtj_^_es8Q5f|GlcGR3Q!sfOs<(v2&yPaefa)w2UeZWbfc;&duCjX}2?3F_8(
zDvr>m&`MXZ-aE*FnC)#*;ER2~Zuer3c@er?`QyGM8a=CcEWGPXXKU`H3N=~W8|HRC
zt|f86XP<L*o$qT-DF;Ae{+mMbml^n(HqYJvg3y1S1wah;&qJ>+xEJSc*98>B7}RQ1
z=?opx%{bfQotG8s(fmV|GOr|44M%SkXIPt=RRvnqRIHawMw(F_bt4&sZyh!%_z{Hw
zN$&_-H8bygJ=si0rzs7D%{lv^x~7M>eEmpc#k$<G_$YpH)K4kC?Ntr{I5jzwdhaT`
zht|E`Sh~h+-UPBkM!Kx9Quf|F;pc!0>XUQYYTalc-lXn@^bam<Kj@=BjG*sk?Ee?9
zJ9D#sZ-HI`1x8ca*N3_feYN@RyAEl}tPzST@U(7L<&K^G6!7NR+aW4YeT5i_C2CS>
zuu*@!;ykZ9DgJTv)uky;0`9(U3)u-8YG8R+tn6dT`+ykyddb!+TR!7r*0-yQ85f<3
zD+o`ruz@)h>!Qoz-BlDCGZ9erm4H8r1(#gwkFkFaha<TwR5`q!u1Tm5skOlPllATG
z?Y(M$BEz@7V#j=k5B(RjN<3wUkX43|Zb5Yd30(j`Usp}q{-^gaMwX5VE^v@Yx^v+O
zd!U}N9JN0br#5i(gY>5aHQWcq98i)$XUn|Vjj;eJjj#6{q(#;ctJcc=B1{^^H8KKC
z8;JL~5PmB0eBRqr%|X-;>qsk^pc*pL{4@QsA$VsO8f+v?629MB8`bP-s?;#lFdvVV
zDXb$IAueG1`T6;6thyIAf8L^%|4$3RHyc|c&D3-p42r7ma3AN%Ds>~*`l}I<cD(n9
z%pQ?4zbwa!4aF|2U3va*Dx!bl7)57nm+mDS%11fgP-WPKVsB0<0T$!Ti!Y{FzKnTF
z9~ynM$-LXS{O=@W1^*@T>O$W<9hzm1UTp}BU9wPRuf9fV7|D6%Toq7;jehp{JnTzs
zB*yEGvoirIPCX7K&|~I@&DKX7Y3rP0%<0wKVg{qP1;iqeMgZsu^}(l{WfhM9@2N^m
zsKBwEwu0_J2BGeVT$6&dSee?#n&W5{t0P*u?wkQGh7v_X>~i-wH?5VN>qs^-%>s-9
z))p3laCLM?jTb?3!KDRAs$8hxX>U%k2zBST2XavAvOe0M=+&H3q2t||q3TCDaL1ri
zm!)LY(FA=L_n5&a+m>?QV`NC6SJp^uE1H>00qYLwLoIZwAgN~bB2z$9esFZl9<vF%
zh}G(S|6rX1{HDtl)|V(&HIK<qB<d;-y6Mgeo^}HfT3W^X_a0EF`xck-FKz!&AAgzh
zcAwy_B<wr(XZI>sQHg*MmIi0ai#|`Bci@f~k8CxvW-Wy?aPL*VtK+HE8r?##PtpR+
zb&fI0k_q~j4BpQXNGQUtBLQqVQU=9H$xz&)Zr{N$MiqM6i1nHxHFs=mY&_Te?h!3O
zGpN3YvLfZNf$Sm_;t=!*O#o(dU@M^{8bsq}Dk)e{h=w4x2Lgo?vbt}=e`%x>g0j58
zs$SLU?EAFh>`})t$1>F33!^k=X5RUE%#DQJ(}Eg8GelGWWpe-MR*9LZDO<#N1<cCR
zw=;jK&xiVI(??Pt>Kk52JvDV-s~2!)0_vJ`BHX5(7}|y{>>J9WlFjfUQi0}*yYE8W
zNe2%QpAeC1+70>5U*Y-5v#}Gfp&*%6W3QHfloRqetEa)hZ^XweZaa!xp>6_(rmplF
z2sfOq=Hbs%_8qXe8N=?w&?ewMkZbL2B(3Y0jxH<)p5=|uOEj`no2z$+pp#l#dY=-<
zK%ulz8y^A!ZPV}?SA?60W@_EBA~Tdx3GuGqSoCBh4T|@h>1S!Ce5j_9cr*I&U!4A-
zKHpI|Hte!}L-nEG>q%Qwy?sOXj5&k31_Z0~Ue73C9kfQ^z7baFy>PWL{g&EA$j9gx
zOFqrC!AtqAE+0IMAO;&fu_o{=5TLU{RYBn+?fp#DzB9LmA0O0_Je0MavJY-ff0QF6
zI&XQYTC)ml@Y_$zfb!Js^M6JX;|sE&@V)Hn&{60GHaWB%oYq8NdAn3-Z7VBJrEG@g
z-|I`**rHOqn!j_iC7gl`h#Bzfq(xz5GfIIzj1Fn`dfm(RgvZg%!p$ZOijGB6r`qHQ
zQ<GrmFs8GfBo4bSf_b3+B!<nWXCWW-Jw=H-j+_hmmBR!nh*dPN<F%Wl1@tud7UiU!
zg4gOv!UyqNhs>(Ewm3@tB$HL;;rNu8H7^Kq!6$8oe<m@fnx57~S@T21y7{DV^zWh@
zaQZGxa>V)}x=A+-|IPlsG46Q^Yg&{Al{*S_FXqnka$_GptvhFU;ZL5>bShS-OX>ag
zt?a>eI+KY;KjJW^Nahz{LnV?!+z7Z=t)RKNc?$`KC!R_JS%ehhoQW+cF5aP$=B&<z
zX*9R>vW30iPYk&cQv;2;_kqeA?vAzDSCmom^6`(-<`h8lok3t64hNt9#Of&iX3S<3
z3C+By-Y0s|ky})lzG;!2xMPN4jFst!+(Wh*#}Gq%9djT4&+<<?{J?-xNo~kc;*Mgh
z3{N02{&jJ2@N4+u5oxBvV;%CRr9}B*8L0-k14Gv42FuIsc*Rr-1E*g^#(~?5L{>pC
zVe`OlqhR#mf7nt$JjDquJwYvwW&!6@4%&v<;tFv@3sniHnQzyKV{rzf0FIDw-s#C?
z;<b5K#~8l8${R7a`5zSw9JTLI4xbvvIKhQ;5M5FUz}p}f?~Blj;GHKu?K{>|4PUvk
zdp%D};{iQ3@7T%gcbDootA}{K7(CKtoK@93psW6EFf?^m0#yg>ed!vQgKA&zieg9L
zpko$xj;T1tq$1OA#B8dRI`c*-B77zzQ~uaNfN<~b+NW4)Ce7{-<L?YPb*o^X`yCvS
z2Hs7ujzouG1P7*B)$j+f$pG6@(yrME+V<Zqz<&JvUYsN2XhBqOPFW{*M`PrHDJ;J~
z!hXv-tPDzq^?>s+LJYDfz$I<a2T16;g>46yRzzH6;CU(%LQX+Jk{ay~w!}SE|Hz$s
zlJSdr2i+B?A|LV&NeWQaYh#@tgcH#M`|1Zj(k%b*;ZJ~l$HUXTNg!eK!-MrGo*xGB
zmLC8xHui?aB#nL0ZUN>Yz5ZZGA~YOG{R};=ih$-yZcgb&W}R?Rkpdq0l4aGaSB?%q
z0MC4y31pWC66YXOlNaoa;#pNkPF(_|`ir9_RpNReSLVE-LOIt9Zt6$>4P&DO_firE
zdbsU+=iF~@k>$~n8QUyUC;!1<VwWn>k{S17<PY>}x%Ye<$3Jq)Zvj^o%KzbV)yoIi
z9TFaYTJhKB(L45hi&YDK_)Er|=Jp*<7)N`<XcC&<V-zwPO&JhK4pjz_H(E5y;04rr
z1IeO&qJM@Wc80jB3&2^1!s&?|5`KB%Zzw87rRfTbUtd@a-iHRLxRONYz{UOl;-|SC
zPXXtMO0MLn6MonC7V)OFn{7#31cJ7YeM2YPNPiHws47pZYvqQ)h87&78ruYyEUN_{
zozR@(9!O+`Qp7B1&Ls^ml7xWwGG{Jw>+g7L#~UDWJBF_=Q>THUZob|fe_!I~4p~(X
zsZ&SFy5r&!RBko$Gw{DjQ>IMXja=zEw&s+T(3K%~#pdnGL@T^v4D^PuLUh1IIHOm4
z1_u0(dpI8~xLYmrYhp6+hWAv~r1%r!Ddeq~HY$TynJj2n@!<5`<C|@S`v5dkE1<e)
z-cld3@8AL3t+aX5o^f;Y90CNDi+)%mSkU1S^gsWZEW+M2un~7cFjA#<`(hu+{ED(>
zIG3BtqBItY3IE)vprFw30}c1t*x7ezG^d=|aupmX_i+DEltgZjea7<S5e<QRVVFGz
z>0v1D<97@viK2Ys-iVUfd%YtW7aN%^zoxa6_8qVl?_-{_4^@O}7uAABKIun3W`q0$
z+=|uv#*>%6S1#1MLt$R@dA8as>6Wm=yN?i|1lHy<Zv<jX2<iq76QQw?=;z^BnJrFk
z3nVNo(-kl}evf9M$QD&9zj=;e#+Bs0kBRO%yyu}%L$U96Vc;0}L+xc#4VOqSx@Jac
z1gN$_#uM}^x@E7GAIyZz3kqnbd81gJDuq6%eB@P#xpO=e*El^{n~2v$0~NDzrPdcc
zp1$Q<TXqu3xE-_YaT>vEfAzX^ymy!Uvjq?-3Ir{F6lMq;^H^@utr%2iYWmLD-VJ1}
zq1~pP?TteTN}5bfbgd!6w?)ZA)YTy9BXd5}qzMvdYWf}Sql4CXJj*y+KnZY>f80Y0
z0I^t@-KYBTPhkA}eUH}vozMh^JyejqY+$P>!i+lup@vOp6Gj!Y6MY;^AdY5E4}B|}
zjOfLhV{7_`#0a?f=r>u|4+kHZOa{iJBLx^bB#%JJzRMd8L-e)RH!8yUv94OLN`sU<
zm^~cFE1!`XKJu<4LJV4YL2H+xlxH!YB~F>Gy2Q%dkK+L1zIpP%wXDkA(QM{hlsZew
z_@ciLh;Uxz@dq4FAQsV-YYw6t=u##H3@bR#A6h03e-?K+yexiGPlfP7*1feIYX=<?
z3Q9`cCEpJ-D2=`R%vD?wawKcJ`inBDB4;@t6N1P>qAiaWI*zmv!G%_FK65g6wh`BS
z&^JDrc(m;H2d)8(k4_<%EGP5jq_&&SBJ5JAD#z+MQs@R<e3ziWVc7D@x{a;e=H&2@
z-9YG3=sGa;@m&K`5Z$c>TZ_QUp_tj?BtL3t#I(n2rT;dn@ZWRz;X?r3Sm0!!Q{8vB
z6$Pxx*tzAV#WUE*P>7lne{V2sl0d}&=KjyFUj!w{j~R$HpP(zj<BD~8TO;Rr)2^ED
z7#RfI3=K6%C+BD4EBgY7I|Cd>YInz&6>&Z98<Z4vFPA(gT~^6@pc)sKdiT$QqF>cA
zuExuvReazBY)ZKV;>f#(0A<@H+V@*5L^A$&kz19xO!v)G?r$3@m(5egKT&&$9bCBi
zT80*i87#hQCqbf?<!bS+Plz6OGp6RL+Yv35uf6HC@o*R4eD-$nr(lay6Q@E?&k3Gt
zPQPSo>a%b2_@AwpahcspvlrtJ{kOdaW)`aZp(cwiMuvxn+d`$l#I){b`j+D2EuOVn
z`}v+TkbKinir;v7&k|OSy%+v+r^HRb389<#lBB4NAMd&Ju2;bHY8|_gA|C&IWwxa9
z_wvc4wUEiJwQ^IFLa)g$f=v+*)kM+D$LBtC78exx+ZU~t<c(J3=6U1&3}>B_)-vN;
z-m6`<lYuDQMQLr%M6)7F0*UzPn}Mcu4(jS{%ffIMcRtile7#z!y16e$b(?LrJ|x@9
zJ>Eqz-gH^`bGOFUoR;lz1()Sn4STp?lk2M}$x*NFE9ZBS;V8ABo4I^GgE^ZPQdU!i
z?Sk=FUfm3vUYq)gJuV#eB<TswQYeWrqB|=~aP)iSRrgx2%W@+utfRpP49x`Re}0<a
zh<dYCC#>wbyM1sf?$r}!!CTkA$KR~F=kZSbb*N%6tBY|{&s-Uy@_MP#fgj)3N1>sd
zvgSa`q~1(Xs3sy4$1Obgb19EC_U%1q;|nz}3vSwL>@V8=gTEdjdAlISNn=^sA>X#P
zkXvK8>_eUt;cS_Xg1hTx%W82UcNgvVul;Mh%jVBX8{-Nq+`+$!C*Mrn)lhsNfUCZH
z;1SfoC|pn1oIG>p&r2FCuYHGaU1RcRW-N|neQ;WMwr}WV8!M)whgY`o*5xOX=-TIR
zn?KA120qDJ4G^h+Z2S8d(eA?WubCFKs*TGx&iK%**1JU6mK5KYl5$^8G<G*0)UOdh
zPit{76*~Si?EG*i%GPN2^X~Gexr)`X;EGPHqIE~5TL^YRK3<bL%1+(&Eot*1E_8g@
zW%p;h8R~q{I`)ga#sN*0f$hwGFQ%eUoS8z{$l2#nQjLN;QS93qxS2e&=fR%(vA2Ws
zsh)%gF}3|-MSGuL`}UcxXRS25ObC9_!H@s;diu(n{5Mn$>P1nuxU0WfoUfbfvNjTr
zT&?FX9^4E~=<T+OD;fH5hHbs<7T*Q?^&Q`w+n+suZUsy2lcUl+NvTvesrk?C^~cI6
z^4pL03yvr&76h9g#4Lrl{IgY4;d|;5Yu&yesOZmjO_TZMy6mOp?f>OlpypdEmfYHW
z1%us*J2304R4qv#Mod&^%I-O_URZATZ22mPV)y8}KUgljH6)bk*>WhU+gjP(&UWA(
z?ru^1!I1htEW!u;TndIP1Kid&8v;ngq&M}}4(vzFP6pfcvo3fT*60i#*`A4FH4+q3
zGg*-S@KPc)<m7UBx0)wT5m4wS#8{i~bF0;g=ZLan!Lwt54gOz!TRi_F%6@<-(}`}J
zr69`M`vyuHd>D8ZHm@HWvhY2yHFcKQyG!+I<GjnqR=l8RbE%wBk}BMNlrYNKf1>fB
ze&fxUdkok>vXirG@`{6DrrJG(#{uc1k8*DFdkQ$uCrl<nN3HiD&Z+S=U7#BMeW>d`
zL$qtwVb|cD+8NB!MKf2onFz5!wC+8dA^TFkYYV)&%Yy1V2}n#qIx#J8A@^sYUrA9W
zr<k!!w<+!dpS3yh#eH@`mUlHv5;GMyV}#%P=?0%|F>kXmsCSu3$-Wu&0XJPw`}9q5
zgI()Fou6*K8Q_U7F@N#j-^W)!rJO<U#&S_0!QSh&R(CVtW+K+gz6D^{gphGmqe9Sa
zj(J2qqGFwTj61uHx7-WR&#mSkK3&Dg=NMNpqfE7i`;NXRa8eSuEUW4ed_3T#6}omZ
z^>=#TwAAub?%CNJ`bM|tp~^dDX%z1iQ$wpH`UnbC^NT8ETg8tYTGp}7W9a4KwP&gM
z`RMouc~W@A`lV&BsJdVEnr+IYAf8@4avgftiocWmh^g=uw&foaCE&s-8o}o-3W0nc
zo&X9S%D7mP@y`gam$}{DB}&`Lh8OiWzgJCe1-jA(ab3H-aP7lE#7d>iuU~AM_x#V*
z-2cyJ$L3RFmkxj-`e$Zl8ic5|;WGA8{>5^ZH@m2fpOiw6(a`@#-g`e%{r~Ud=Qws5
zMUj~(dz|cDWF<RfWt^PoSXswBC@r1H-Xx(iBdddRT1S~>%Pb+`SaHnbb3dN1dVl|i
z@8gGl$ay#q_s4x)x9fJ@uG@^P)aW2LqU+TF=5|nF3oXHIcJw+(sEM9H<k6mSKpBKD
zMZx$Oa~Ql>BMQnbmLlaRAt-DkNEPpal=sK+oo<LIjV#R8=Uh&;=Lu7HldD^&mYvUm
zqyOMaWG<#Y#9?4=D9b=`E$64{3t1wsj|<+whMob~2yoc#;dr8I`n*rJXl`H+a@gzm
zs-_V=lp}QMF_~%g0W+j$?oSiosx#m-tS&F%$Q!Q7<nbucT+{9wrQv()gMeskamSuu
z;`6~8h2!h2+Hj!jm(p(K$?V5I>*jHZia*00{>L>N(os1o5h8$?F{y;bJ0to)WixM+
z7uaLO@mW+&4KP0w0X=|jonQjK9qY#s9VLL5EVr-y9bP}`Z)~*_365_RLG+AC)@$Rl
znN5s9y!#M1&d&Z3@%i7NA4z<4fEX`b0_MUspkg?+$O8o`Q?t84my6tuX=+KTer>*K
zNfg6wg?bP#4^JL6el%n1?{vDK5_fQfCtf<U?^ZrWw^<uNO5p9)2OMiAyx6<z(ff-$
zPyT+`kj{eo_h?lEM`j6tGRDJ!1z`gjb7YIDZDj47;@0O+-BJ?H3(fd)^7~_dpYVa~
z0i=1SsyuA34%yVKGI8yCq^|tr0X}5OxpUfI+XwWwA{EK~laol?`VUp)IKXmz%K7Q}
zXha^t0T)#Q3FjJABzCNfchis4+5;ARe&_zbyM7oQz(4;kX$GGg4)p_9Sr5y+{P74b
z34*oZzS*~b`TzTuA=KxQ0IrG0s$Py#-P-@VE;AqezoA4b&+lflq<l#8BTsNO&lP9e
zq(}ls@BgmEhJ!(vDXSHQI@9?RkVNj@ii>hCgy_?R)@_ZyKn^TXD*Y-#EUQh9!E@R;
zFFvz}T-!PG7vTU!{eOA&{^b0fDbVnVZkqL<4Dp#I?KCTR$bosa)BpS(E+}CS#$~oy
zFm!jNbudMp_1`mHd@580!nclad@_7y3s83VqMcK#y-|zoE^IEKM&Pr|Z*L|n?5R#7
zl!Wfm%k9gDc+rOtLYC5j+nwpiKQ+W0%K>Q+Te~6R!Z2`}M?;P=gKf9R-oaW-elb|_
zTyg+ukk_v)H~L{rVaz_wPVnWW4;*HE^rMV?Lq5Iv(Y<kpKx$+F7-!VP#Gr1=Bf$nF
z-_@A?hByE3FdLdt$?~Iu6ZFCJZadv?hSKQXq(WW|TxI5idO3^P_uGxVeEITTbZ(3w
z?8MR@1kpi`?zThOs?WAqUfxqdbg(hE3vFHKLPwiH2d|*gfz&8aRR7;d<Q<IqnI>}7
zSt6gAR^}@q8W$t9ggq(>mcuj(*v**p>>3aewHG_KtDh4<HIeoWNl<Ai%Y!&ecm=q7
z^`-6kN@v&da(asS^Hq!4QP`d7@Uq$j=+tLXy?^l}F#7m!*@4Z@xwrEwpXTy?GG!Jh
zW^s^#Z>-6Jbb|C*O=R+MstwuovRcr~5^yaKW8|YX4`p{tU&KHrbFy8^*9jHCg=^Rg
z)JjMiZ+IMpG1Pz54t_Zn5Weo=hz=y^bQ$upMhzqu5rF7<09HAAa4;``B)b^*ty{DQ
zGm;Fx_%3T*^LeegSH)FYk&&(R@$2Kxv>HQ?-uy4TK6Og-Xv%@0!jV%?85pW<0BV&X
z*rt^`AQ)^d^)-vU@9lB2RjRq7h9uwm7GveC@>AL`!tp8VDAD>#MiW*4q8pM|Rt-OY
z`^`JffD)*9J}X(_(-L9Qm9Bo@KQ2phKIms89`oj+r+Z!CQbDAU?P~XP=5Xn08{2{X
z8`xAzZ6EHcuguu~OV7{>AoK|7$lAB=3r_{|2ckI}h}=R(ctTv)fA^O=^_^hEim%*6
z3+0#nTBf~ArkqENC-csc5=xTt*)z4HDC|pn8y^&2_!4YSKd1<kQx5yiZ1%-EKRe^T
zMqj*=;P&3bHMHIELTOl^jdc)dj1D-cNd*YdDi$%n;tFMa`O;<)J=bL>tI<O%ZQ{hj
zA?jpEd%?GI6($>{Vmo#;XlzW%bob?KT|QonXP`IOULSj-&57J!e$-fy-ipb&QPaY@
zb8=sCi=P3n5oJh)&bMEF9^by~Q_@&zLkw?Ws5|i?t$jQL-}_^_rT37`8`Q^)SM6J$
zTcBFTcXns97nC^A8RTtz>zN5BOD4g~uhwH+i|2OL9qJ>RU!2X`%DE?xJ`~&>_O=F=
zUBln~U~sXH-e=&=OkSpXzec2V#_ywgL+uf1#rxS>q&cnxAJ#0}*7e(Mkake@N%n7q
z0UXtKY>-X$NFt`)9>=h3^yC=#VJ!dIYoC<MYCFNSd?q_eH2*;k0AyVHq=&32enB5<
zkeDDIY?~8yGF$i?^8Rq-TV}y-DHC-&wh=aYhBD7};V0KcX)npl*Sy!3>$@PilKL33
zGMU^DbEqGx7`W!JAyWLH_9A=eZnT7z{K>aZ(wH003?xV0j$HKCesJ`Ujd6wJKoTz;
z>E-r$FEtUelJ`99%O|LBbMUFH4{yAl%}9N%dw<)z=Ar*(TY}wW&b_a<J2Tu%_n1;9
z*CM~nS5}^RZj^*NO(}V+$<6WFXXS0=Q4>kqq@{8RUeDnh6KBJ!PSkkRik={s<n5Ak
zEp$6w%T>pV@8y%vw`V!d+o(T!ky+1k^yYTfDBZ-R`84&rXGWShR`yzwAu~|wmiI}&
zG};*fY+k$G5-Q^cL^*w&GHrfvkDtkpp1ZUoRQYB{zW%3n1u>p~htJRT#fWt6lx%)F
ze};=272p1bVD_*5nZ><#T9VKUmlUniTl*TG>l9ZPmJ82FvtEjCpgeECclBeyH0yOM
zm&zQpr3<$`NoAnbyx}P3-Yc&3mz_lu!^r1mK0z)ozRg>oIoWvQvoX#a->Emv>TXrF
z<eNVlzHoBw4i(fJ!S+LYC&=Y&k9PC`c3<6Nn$<OjK2U(=Ej+o`?K@OQ$^p@2XYu3*
z$(hI}XI${qn-rmZfd(6c5!VxJFMb_Yzy2ypA)?aC_^VAmBfb16|IN8&$&jc2DLRT9
zkXw2ewxyT$9L9HSK{~aWe59%^d_Tgqd2u6$jDh++B4b2nN3oW~Ww7~=vPu;scQYX_
zUAf{l&;Lb<*2Y#-?kdB$lwL%ths)FI3JHEEt2bA*It`Yrp-$RybZ6AV@|hbW<j3Rl
zD4(JUA1FtcYcKMHzunr7i#IbC!<+J{>-O{K<mRj=eOyHAcazSj6V5dO2Y7rA;BnON
z<HgtZOM#q9VgYwssJ?&v*m0_0>~86ESzt-DcU~>m2f_mZ=TJLgZ!Yjng)AaN+4rgf
zW_Q!ZWe1>YXW)<a#3{A>5V@NM6k36j^+Cc;<thj=R)g<+<OK#Gij{-6K*yc`F7c<O
zO+ezZs7|OA&5!W+ZFV*Y)QZ^}A*2yAIN}bK*K)-0N2lim;9+vaXNR)QfPT(_YY=E`
z)qaN8?anCU#oZVFZNj;h)UA#i2#Zhie`5v&M(0Uwyn(zN%@B}!6_GKJWh8JUuqpU`
zlORc$1%E@x<PI_8ee~KVGj@$eu`b1dZ>7<_kKklGR5$E6%o!FB$s>>Nzv)Z(2ZWx<
z|2@OQTz>15#((DU_B}%6&_?ihh;ud@VTN`PSU8i5g@e=V=u>g4;)S_pd@px^%JPGE
z#q}Tw44$pN9z_}X$sue28RTlSfv)8SOewAYQ&8OZ;-n&_ABYZp68Yq^C0YWw(PRUQ
zjm5yTcA@OF7AxBT>*v$z1G@sNM{oujSp{Ia69F9Om4d!(Sh{4@221&|PJ;IBfMMMq
zC$#=^Sn5F>JbG(=H1Jw&hi)~h2Ge0KU&(KK^WhGIK2#f`Pk-Le5Y5)}<;(KGUHUZB
zl1lmtk}3E@A|_w6Xx{yYF~kor21i$8cue@i_Oe=M87XQ(Pwj+g>cvn*$7ztU{Y+%b
z|3Ay5&rEInU>gYZTK)Ow{YACET7b_GJcA5LYzz7rAm~H8{3-=(y1;Gszw3eK0kFL>
zP*$s&@D=oqe>wN`CDcA#`~QRS{yWG)*L#Ly;2`@ZzRm%aK+8(^=HlbO&wBrldKOi7
zL&U;MU~G`6x`C_n&G-rKnlDr^hs4)~jqki~=c4v7Jh%*ZpHs~<v8uBIwDAxh8Z3MU
z;&Q`cbqatbG>8}Y-49v*1rj2657K}$IRW+1dOMUE3J<#GE&*ZIr4#S|iIN{E9^l?s
z0Q{%p&;PykndL!pR(Rms-X}IQl?l~);K}JTdy-L~#)1oX;^cfzc!Sxkf9;B2Kn~tq
zdlWM{pN`^%ZDbpI%DYob)8Bk7<=>MReHHlyQgGKGSh%m3VdUR&Jo4G<;QT+~*x*YK
zO2~ECX%*eE|J*{KojQ1#!d;F1i*a|=XVcP7XFPH#=V#d!oDbvXIXRoN<FNJg@1K)M
zIiP_v)aITg1Bq-+d2KhW9H6E+yBegX4XHs5Q9gFW5VjLkwRjm+v_%U1d%Qmm2k)V{
z0!LDdhQ;(o%Y&LRevpy|xNE5-FYJbOXyN=9wiwXOUKxsmhKDV&(c3_69Z=d1U6cQY
zEz06xfd_P1L$9DpVE1U=W8wKe!2RLX0F5J?<76|4B(<F?R7y;;mE_XfBadGJS-;`{
zA6F_%?F6johWtN*UF^SWFM+C6x(sxoV1L0uc4)L2WS}J4VyGE3AqB1h9EU{@J)Gqd
z)rRB*v~nINSC*FUoSV=GJzD>Ff9k>g?6afu1oS-?UZmFEoSF8JjSu#7DqJesnUjQi
z^PGeJyd3?NQHGU&?@9Z=K%PIK0}d>fo_`D1?&A*7+zLHU<8tx~&E@bVSqaqRDtG01
zYw`_F_>5%PU=caPvew<dedR$x(TxhW&1Vm)PlxaB4NFQ_p@Ot2EpvqstMjs_LBVx^
zwOiu7C%5v#E>ccdTT~W_XuW*i{Y$0v^^&@(^E+b1!?z!9X0>#M+_#sU&%Fp7QiLz}
za9IH)(`p~CvSsu|G&4x_s{maZ#0<!{By7DFFN~Ghw&iC+q^?w*d<Dpo8%xIHfkw3S
zT5<|pxf8#*6sJ32^jYovz1tz&Do?*vE=vklghiaMHkdeOlfccdICj&{yyjQly%_7$
z%&(MBGbYI@B@9W2cW0Ar*&_GuTxf*7ynMTKC86avin4x|9LlXno)B&OXdaRfw(zQz
ze&h8krm0qC11<q_lBd&_;Ogh!4aOJFuhNB-i6r2)M<VCNcHXOdTy46$&vV6z6f)4N
z8)QN_G``=iLD^)W|MR*Tlv^rn{g##wan_ZMAf;!#&G5RcxmrF)`T8LyL3`X+womNN
z$`n`S)@puef$9(KVw!QX#$n1A_9fZKt;FAYOSGEXO;L#SmEnb=>k6lBq;_SGEk`*&
z?qHoLa__2ceQD67Alo&8_6PQS_xI*OKKzIAFb2e@5|{ibc^nMmJ9w}^M{R)SB60mp
zbg0FnF0`QPiImRo<@->Xnp9dH0!|}_7523^9+S1|@;!-@<WZr)cV9}}@^?LI{-lmE
z$b^ZCeRju|-@dCaIPBR9NQhEdKAH!>^P9)K#?3RwjQ(=$ZxKxTm`&MTu6E5!&dT1d
z0#Olv9$VIK^lYwl8rYuesp({Ty!m!2flUU@mwl`J?&iXCHZ41G(enjCgTGtzO*d4w
zz9Qdku1G$J>Zy6u7=Bt&*x7uwkdwHS<)qu+dn}-OyJ&WUyQ%4O%Fi`S|M3wd+fdkr
z>D`IjTixvs8CV_bR7hOJI@1iff+R`>Rd{l9{|U#Dx%!-dp@`!@&$_5Y!_V-6K)!k4
zTMUpb#)3GDtBrTsevF^=<mdZS*c0++4mCOa4lZ)5xGL3%6WWO~hcoeti?d0|qS>d<
z@>bXEtV}J<X1#cF&FR6G?vn?*CfoUx-Owd%`xDIp;Cv3da6nN6bQQ%YoC8fX<PaVE
zSojEyH89CIUy#m7%(xB6icqr?x{=8dNyVk5nt}{h7n1+Qh0p%eJn8gN$t~jRatD<B
zIq7IJ#q>$BMq&(^B7*3E5m*fbBLjbcRvlN>M@_(&h%yKHfFu1-`?12m3;d5Pb+f+A
zj#{)KG!hKQvzNuTzF6|U3*W!unzYdi+MxVVpKPr2KF_{eP_>Cvb^CkdB~rxeIEZog
z4aaY`@+)5>L_=+1f-ri^47O)3GRwy%rDsP`)AuVoL65bT$!0tj^iO>l{Si1IGDPpI
z%x=hDkNCSY_23VV7X(NRx}0;Zr31~yX-osUH0|mLYOdvbrvTfV5>Y*)NF^^GSJVIB
zjuwBwfq*gu$W=L_h9SoA#dRRNYvZ*hUqRKA$JszBYeKsEKkx?5m<O4Fi%p=W1?kK1
z{}~)m2ZHn9(8&Ng?u!7=s!Sfz6S)5gZ;Mv_+(_2CSkmiD_P_u4X-^#!wj)8nAq~vW
zolhXTzI_^-pu3xTR^aaumU!`icDw}S_iqw$G77{d>mHZCXTk-E9O^@_+~kvs4bpXS
zZzhNHAdpaNLPZ(xQAvgc`VM+6M!qlP@6<L^2iJ%6gTL~7PIzsk9q>g=)meS7O1^~a
z>;~|G0kpc?Kker)82hhK$MVRVoXeoWbXz+RbWSSsi)cfDYdy`V5BA0U$ftkL@xz2V
z|29W%fXW|5Col3jlr!Bw>}AQMKsDRMhAu6^iI+^yB!i=~zW(*=vP{kJI>3++xvgLk
zbSgAosAu^9t^-2k0UQV<qIY6=p@6rc>V$PMJ!`!WU*=igUH<pgZtee(4*>h!B2yv|
z9TxmvE#2YT^0}sFOKXJx``jh!BQnbYIkXU16|=z~=|euMlEEDOd%z!gAWy|B7p@fo
zc=?@}@s)6Kc2sgY-r$*X<o?dY?AC0);2{xay(Lf=D}DXasb-<ni;v`pUI4H(@dg?2
zAnGK_w!M^apjT8uW4bP<xHxJtn1H;~kDSz-b?h@5>x}mPoNx9Z`c7A4nidn)&P^$U
z`Z|uiv)>VS@ngt7{AD2r*O;K2gpo<T=<SL1gxrV$E)1G&{K<yCAo0l(aag6~`R1`n
z%cVWv-nl@l&{G`Z(x^po!V5x7SEFf^tZBAVCr~|CLV^esz3d*~<7?&9w4y)JUJ&;O
zG(h^+&mBOstmtyB02nFIY})L60C|IULIL5G-p45HQ&0)iLg{L`Z%2JJ>>1#$``{H2
z2WUNi@j{hFg@!TaJpbiG(e;4ld}anFE6~9~Ktafr3t`E%!JJ#ku7yqH8lbv@IY4Zo
z+HAKb5$6on6dqjWL@nmWB;%}<fqTcJpvs*}no1f$8jPKlRRDd}Q`=jd;zEdyK$lZ~
zvdg?KxOaPY1g)PO6I59#C^wcZqcC4#+T~V}29C~l>F2=0NaQ{sN{Imbu%3$QVIgq&
zEYg+n#_K&ZyMnq0+Fp!1dtkRj<eEm|5B@w|i?*OfYug{{bWq-se&uA!r)s@3mNWz3
zbfdLt1ZjPsc{9z;x!(qTPl6OZbQ2rk6ZEwXWwEL&(aP;2_81k;?|`y>ydS0Vq~R%i
zMsIS0a0H~`^>lS_gUP%02%ZiTE~^~`j&UO;UqMRuQ`iBjv>LFFKKuo=$!SG)WULs5
z3Lgzk7>qfN=)gzcj_GXYOEC21VRXS}S~zm+eQgHRA#}km?fHd^qD)PyZXfRF7MtY@
z*rB4v?etKKCta?)fbi#f7cis?S_z&Vb!g@*HT<kH8|>i>T*@NB-G{I!Tk?s?CqJrz
zVOre)hGtW0>l$_0f4F2$-C&-98uq%6k}127RbCC`ox^BlmF5HlL}#77C(<+W5~N95
zxwrsai>J!^{b^5IN^~_wsm^Mtfo6GiM)|ny+)Y&%nyLt34&h721fGfn&E+~{_?2DK
z>kb*K=O{RV8PjG=_1yj8O;=?APKRY+QREJo=bz6D<9Q$v94FJ;a~)><E^N1{5}3|;
zl$-D(v?jiNQ)6yus@lV@-Z|4VTL)PlpJ0*$&BY>*K8U3g;03%AR<g3mQZ%Nh$H|!T
zBmN=IE()lxDBI)AhSWKlcd-509NO*X4mi9#>(3xuJ&eqbWEj#005}BHMU=MjCm1^W
zbu~i|*A2)p{B9x#eZF?o<rSGy2&P2Ls=ioHiAFRWDU0$R*}WziT>x5=Z5o3d1qKAm
zo_4~V!+>?whL+SxQgQ=G4r4>)QUaXN2E^jlz8|P)TC<Rv6#{!(J)Ah|jcpvT=Y`l2
zUY8=Hk+Y6(mV{F%AMA1R%dp89P^%EwICW&s7?k>$O>z;sqg!KE&JA$Of3@Gyo^?do
z!8qr_N0RX>Qxn9DsPHBHVLlJ6>V*AHQ%kxbohKBTY&H`aySr?gUpLV5GiN90x;nM*
zV*{DOOXQC3-8p3rmd3E(L9{d?t=Dc_SyExDVCakE1nJIWvyR3Vp%s1{!y;gFn|cJt
z3q`#n1B7;hn4uff9-R*plpVdW&9byt3y2M%0+Dq<;aPXHc9;FOubw&vY{-x!)wRJi
zerJ|LC*4qsz%27_{030^4JPh8`kjk({y?on=QTZmA;2^oH|yUh!TjDN@ZANAXOam;
zvJrfr=+2vKySJsBS#3`6AK%H1rZ!B?TOF6Bi}&wpwi+<hyZDKFP244XY3~SlH+4EI
zhd_J;jsSN;L9(b>o*d{O-rB}pZ0Nk>!r<=x*{>6J>KMp9axVj9d2aIBB#y9i^yI)d
zJ3=DCq^nUpl9w9UdNCe^(q3TVf<!QV;we>^OfrUBuE~5RMgpWM44~?d^HnZiOMVR8
zDxjt?v5Oe<D^XwUM;I6QY2Plz+-_%oYUpeX0gB{;+urBDU&YH8_0&3jTFD#qcQyRR
z`?-7PdkVEx&jam9ILJ&TlGNxuCPqAt`LW1xX8de+KBo6IJ5nIiP!>)sb2LkRZoZ@)
z$sTX$Jol2Jm2dLW(jJoEi<fb-8Qpv|)sQJSXuxiUYDqf;ycFNu?)2sgxLhxpuSePW
zGI8)VFAgt?_8bFSAg?^~5@?@ri=4tDsFefywRt{g66ZxN>JpB3HOAl7J@Uc>jG6$j
z0;cF=7#rH7E9*=`e9ATRb{!cU>R!L@>&Vl@6+JAz;8qU8Z#lW4@Coo)8AuCgWpkGY
zK`)YQ1oqeQ7SKmgGrD#IPtYRxQt?}-Hi+~ZfzzriuuT?_10memTvJ);srN_hWKoMG
z!cw$yc6N4m;|h`2Sv<uV#z%*x4MulqT8rgFFG2Ix$M@MovUL)~R%dO}KnmmD_NFU%
zO`pR4Lv5`B;a+lnlZ5tYd1gI<Pf3FQjH|XDVJ?jUKG#m@aK5$Q;jStQ@R<jigC1T$
zVgdauKS6-sYlG0z)p&15MJh4r)xbbf{)a7?)8&ya)%xci`KJl@gM<8k&c<vT%oTEq
zYoZo~3HQ{8Qr5-=lTAzZuU@V%9E=Ee4(>9Mb*C1&_Q>;q?vx@#(DUw($<p2l!W2RN
zrzqbT*QV&QOchrZ$JVR+dTrCeE{2V_+2^FeTxFq3qZ$d6pJuD{gaE(^7=Ut_-yWNs
ziE*z2=X$~ou-rGrm#G!M8GP33`zz$~yw4)Zv5^L}P_7H-F%lBDKK-~=ZRAqiMayaD
zcy4s_so4Enr)VS^3K{P|ZtRCKF)}iWog&3CGs}qG_qydJG%w{eOE$2k=8lu@bcp2-
zu66W|S=alFy{x$LPBUm}D~glN{@jt#L(|R-RUJVB2*-(z7W%&1prcrAZRDRMV;Q^G
zceA9c+uX3~MV?`XjNj$rJheC`$L-65?Lj$-)g$+@y*;5=8JFm3!80b&44}ntD(cyH
zFgj<EJRr+<HYPI@hCd|q3^biYXAd8q^mkd~p3<G>$ew_;up@T=G$?_ta{pb|XpD$B
z>OC(}Vd3FRG81Mp&gxJ%Y&`@I)zT7R`5OZq$$ZWU#88==P2VGF*mXQEy;e7b^Z&0@
z0nCw<gP#sz4)J*)9a8d_=w=#sb_VRO=ei(eVOJrSiRE{|^Zj>mJf^lt<`|hH#FJVy
z&tX?e2rLAqorRk2M}&TY5<!_*`QYzoPjMt1eE14><sw>}I40<MC0NdntBRpY+|P>f
z`-hbq|Kc0)6#sXXL0y}8!M8{fJCx`)&}2O5i{2hm|Bqg}cFGbQyw_+(T<`IJTWXk!
zKM#@5KAgk00NRRaFpaYD@gR14H*5cKYS(^PP(ObQUXvW%tst$%8N%F991+QxLGMM+
zuJh~H;v)Z*<i&r)Smvh`S|{-9;We|b6gCr~6A%)uOc5hyl&wjNxT!O#N2r93M$TmU
zmj8X^PaldPV5xnu6nDn+BVKb)!AUK$%Vhs8Gw?ECAGl}lu$MWKKi>2x{dY&~e|H4x
z?wO>y@6%!qHQ*-tu27qY3rq^0+u`xf{r6GYL8WmP0I`ck_w<BlA-?wODpg|LYwGr1
z$L&+NjwJ3|058KTxSun2MQ~~?FVz*vOCwvbxnxT-%8zJ@if~ZG^eE)==?p=C-G`5>
z*>90Zq<9gdU>GR@PFc^qk)CNCole6G)sBZtOk_~McGvb`z-Ww;M9}EbNQ>T==i^MD
zew0??4V!T|b7~de-a!%cllvlw&<M&n+iFhBU-1x5@i(cfyweTyrEg*8Q||J%a~&HS
z%O#}8ymno=9&LuLE6;2e7|Jf%Uz}%XIfjU6D(HX{-Vlz#i8%YdR|0-ITPrniVkpbJ
zmUT-|nwHBlyPPDk!SC?m2=5R_h!}CR|7)A_5KRN$P)<>=Yy35iNqu0v@r!xkx?VY5
z4nq@jh>V`hS!#HfxX;53qUB5Hgkozt7WT#R;>s;VG?vOPOQdHFJEobss?avkd}=Kz
zI(2HFBbZ(bG%L@A6BFvh)-2*!T^NY!5KFeCdgeH@5ThgwHp8)8_IELaUa8I?MJah)
zrd!`9dXv)e#Ns(UyR=+8428X_9JkMVMFwK&6Xmu=K@pm&DP(9qACe5bskGg&^7@`L
zKqSSC+z7<3;0jYJ+hgbmyT`=Xj4SV4zsM@(6+-P+cxu&IPZhwtGih&E*YybVF@w->
z^Glj=+J&L2-5^f0%nCY<hPxx;VDz8e6dAkMqhR8Y@?u{L)%Mmp9U@}ne+|154ku<C
zWKkqcdOvhvOjjoGo&<F_tPqiVbnn8Xs!%ME@X;;q9>Y61Yo*|y%9#9-lu9^}yKdCK
zJr3t<0`GC-ojfUz3vLL`8qOBKC76)%Fj%nSpt;@svJ?O9PJK6O^Y5|8vkD!KzZ&+y
z2u@tff2IH6zJ_r__;DRWT}Dl0uoL#C&(!amsG<G5?dNwyiIys{emu)OlPfVXF<jtq
z4F4QF{X9Xw(@U>7o5yr~bGxxSQig#<H?9KuuLS|+OdzZ(A+xh>ZTJ3-+HVuDBSp_8
ziKtJ9QrMT{6io8--G|3Y3%n0Dnd+klTi3al{l94@N2bPDk0f3%6fugwN#-143xO{`
zOl(VU)Qv}zan(}l&7Nb%V#QY5hS-&H|6vOP3N#{~&{R>`REryBQ8m9wu0+~xWS`ak
zI4ib#j&?FWbZsVbD+^WOskrxrMt%eq!tn9_z1UL*M$Q3m!4i6uB(fz}<!XoS99#1A
zL1z^+++$~b%Kh1ouS!eqgi0fZ@BQ4|+*TECkRyBcoB`wa4$!$(9m=Y`yKJ<#x0nAU
zgUF1I!e=(qUHM7k0$ZHHL$&%K^7d;h14Vjmo{!uU%9bNG{h(AP|6=bSC!yF0n5I!u
z4s`cLi<U{R(xgJPeYuimte<F=E~D#p7d@Jjx)3DrSdfeh7&c8Ty>{tySLZ?_L7e3w
zjh`=eMgDz?Hq+3Xf(7mEot^GnPzQhO$i|9g+zTRwE<%{Nc|Wt9E`lfHVrJ`yvC1f*
zR*}U4q@fsjARNXXq61SCToAM`?C^cC<^Qoz8s_;0B|%Jm?dwnLxvk5ZV^2Xsg#A8E
zPEPVoz9haAiuI8nc`k*snR)|!(tujM<RfK5KM{A$@_luEiRYWhG<W0T*t0c3V*Xxz
z2_^KGbr>^-p>Sd@Z(Wr1N7W(zp)%L4LEd<TDslsjo+u7&#C887k;W_S%PG&jW^rQ0
zL9uRet(t99_|zX7O1^lykFOtp;I8EJ*+<>NO&rl_$oUcORIIBVJ4x6e4BMscb#{z1
z?ec_t#_TL|RRH|;3VX6GX1!xUD=tF*5i7iA9f1{|*_Ep)jFA_kvF)4>hQ-IjDW$H%
zd~x$0CxEMG)vh87f!fLZxE?8GOzddqV9D;Aho>H>bH?P<vA_Gy>s9qu&@=bpL1ORc
zi4s0=h=J=4v!CX&u#0_Gu`hQ~L^DT(OKi|1{^@mpTWHDP+7)x`<}c8UbZsoMk{jXS
z>znVGhX2$NeB*h^cxH>{@{7A+8?^K~j3rdJWMfM15-F;X5wqZs<}KMiak#0cW5f43
zPK#~|Nnk@@zXKD?rC&=+`<;HG1tlf3HZHtqyyx3nbjiHm0@%MKihO?L<m7~knpL03
z6hw?>xLt!0DC^a5V*9-pTCxeQqa}$iZAw-BZf>n;nv`BouS`&AQuF%(8pJBuy>%+5
zTd>G`KSywm7AIZh_bgN}LDO~>Gh+A#{Cr@`5IsoO@9EA3skq73NP-%R`vj}k_}G|c
zs(T5h6!`lWmNz%=S-L2r@lwPP^Y`1GF*R*r-`d}76dC#-?0{!wsH+)V;+Ng9W{o?x
z(RYMmRl^r+ZKqERb%V21R5_xhoN}gn4OSzybL3o9lr}+n`X%wg;`-0=8aM29kbLI$
zr9T@@2gq+b0i7ZC3=YID1|h7&$d%?h(f{<3_I)^s1+)oQJ`B>GCDxDcQA<~oaApOc
z=5O{`+d9>X$m(NPm?zav8YTT~XUXn2Xt?BUeLZ=5zj8P1u=`-_wnw(Dg7gG$L+e<S
zIR+E$_J@rigD6uszhqJug<WZdz=>Na!f+i%-xD{d*p*-d^e^E=<7}J3`aR&aDn5aR
zH0&7gu8n=*mVE#hurh5Q&M?HheP)y_L%R_`F0GN8zws4Y-s$yuA3@~C3nC(1e!f3o
zMnh-BAP1AF=VW`eq<AqltM02lZ{)&>A-w>t*eKXO^tnx`vA%tZQJpMrL7suV6iU|F
zna%~dzI$RMdw3YRQM9_`h>i+s^zm!7pOh@*gF%3hX^uV^lGjmPX8qWW@nme3&BLHV
ze_8sIvFycI%h%I0k~Q5HQp%@JoAmm`xpa+zHN<q1m;qbQ;dkamj6pNNUR?g@(AXZ&
z<Sf=-gM{(h+Awan=6e?rhvGT81UAWMIY`O&>LUhe3Y!9c+~(z%B^X$&W+0RGo>@K`
zM74v3l4sEdKrFa=M2HQAnX1bc^0@ecUdQsXFMmRhR8?7@M_1nY;utX$_T)wK=$Ny!
z51z$Op<N3yJ&l>IeO`iS$W`j!A7$6sVbmkhvU>RpUb(~m`|Z=O#lj9gR&5tXONRra
z+27TtEr`W#8{W@<=Bai%_RD#A(v-4L>_`m1?Gp;n{7ALuovkrAG5x)k{T+0A#7KHT
z^9Kn}Q)iO&ENymcgG6aNQc-U6yU~qAZ7gp7aJ{mKd!+`z*a_r@8CqMWy&7e6?Y}f&
zX)+b|f~(xg7#{TH@RNX8|6~WLdEZxFDlr3Zv0q;^OT-cf+20jT&l<a74az=f>G!ce
z@%TJeZ~xYG1G#>Z=)p9XeOf4%tE??PJJU{dd2Dq`BRVfH&t!B@b-KD&m(lp>J!OIV
z%smBS!@R^o%k8fQ(HiVJSMb)V=zLZ$yB0P=uTZS$m8{nhQc6(FA<#-(gBW5t)y6&5
z9aG!60OJJ3b-IkU*~6xAqIMYpbFjPH@S4ONB-ndy;RNkl_QZ@i(6XYj%kPdqOb9#k
z?ea^spYInpr%1KZd(b42#%H@4Jz|eieedri{NA~7I@X-c@ih-z4p^IEm|c+he28p)
z-f}BJ?*Q0xAM@+!l-2b!Xgq#&@GYmXFy)Aj%BfhtOYbLe;zxnJIQ*Pf<K&&pMcqV$
zYp*R7)|bK>=iiMf(*{dV)LWPK<6(FPYSV~s2(J-Nbfsw2en?jFNWBCnV%HM8VscKi
zwSyH=My)F<KA$J`R||0ATCEj8{@{9c2fW{Bzy1i2COQ^PDj^xe*P-F`**54aS;gy~
z334@KF}iID29OS*!Ob_e9f>m)R(vVdj-eZ6^FksYJs5-6C|im!2tLWcpzeI+7A!st
zoB+?(?4x9?W|ZubqCSkxw}OtTilUMw1ug9e>>>LyXPIa><aKk}caRE+N|tC7=&DBk
z*O8HM3t!(QWgZHEz`NYbv{m;q9|g1e#~tCkt|Svk_7+RZ-Vb>8sC}-^Wlk(rUo<5s
z@)WV*x#IvEm0)*2S`1E3L{@QnvF96s_L)-8?&^hBg5ESm2J|bOjE1FU2R|%=W?mQB
zJjlPCJ)Q+3ZgAx--i;LdG<F?86oxt(F6qybM-XLM!|l-=on?ZXoPK<IC7aEJwJX6l
zO6!UT-z2VDi?Fz7&4<dx`W0nH3T_Jfxi#*h;l$IbdH|DkrCyx2C9oiEScD>a#*|?#
zOd5hJs>F~mt1(1gJvc!4onKt`q0#b2GTdd?nkD%@I~>-q6Ka>%>8&<SMDSW*R}jcL
zrRA%j@vVCHDQ>w(MTPu4$*tO7$HpS<l0@!7@NoH64Q+X&q`jEMHaF2$?rz_2)_dw9
zC7I1NdR8QuOdK3TN9YKq(PDip^0`@<@13BSe}<R5P9_mGsf*k=G8qEaWNg*3k0}na
zHzX#8kd&?ND2iXZbp5Uu<M%1#3<;yp_#J)DCM}${;Y~q2+Ulq1@L&f8NKO}E3rxFT
z$JE92Nm2nOAGpEnI6vvM{-U|~Ql@`FW+$x-nrx-Rh=WWNGggmkyt{c2Noy1;0P*m_
zQ0GA??|bG7(>_%%$V>_YFRbp-Jy8?A7Pg4YQ!9V2Q>)Yt2{j!H=@{cp&fY)XFfkVO
z<#KBwVR3rmtPW#Binl{w)n-@qLVNU~Xic$Of@mv^-2InCH9-7gV0^THpkpKo*Q5cU
zPgFzm2J`#C3N@H?MMsr2O240*pWk_b6NFxHQAgvkr#5dk;sn7i9iFXZ2Rz1&h+@Q$
z!+yv8#6R9oE{_fZ1@wmiQ=y#3CKPBzduba2Fs+#0@|1lV%cjDcn5Q)qgXsyj7)fy&
zJuVTW>PYNL-`&+i;w{;|(Od6pBmMmR@~cP9vx=o*P9zHL$Jzk69tG<Rbr!oVXiFoT
z03YCK(>7YslxAw${lSG_wlCkHn-rPk5{gZLrbEe)S%`0Ywby6G<A}h?kSAOjG&w9y
zS*QGNI;Od^Sz+&PY3dPP;9W=1RypnEoeR+#-Db2%X!sp=lEi!ibBHc>CGUIJBWl3<
zN!~m4?9qc{+-|*bZy<=+juY|`Sm^-kz@Q-SK7hQH0LyvLzFfJc9-0cs7!)95gaHW7
zu_^7Z>ss6Ojf{+Z*nT#b)Y3_<xf=vEcjl;30JNob89#6@p&ZA1duxtYrQ-Ha6IIO9
zoY;mG+N;&u!_bA?LH@Sb6|?vJv#(kOvVlZkD*PV%Kmp_P(c=imnGoRGt{MvD@($>I
z1Mr<s2NYRkO?n6Kuq#3H?3+SXI&B?qe)K<oT7Lcscc-5j_U3pT>3zplQ}`RCPr1#G
z5xA#nOL1ciJ<$eG*f^(72ECM6I~&eSegt%iH$Njs24NK)wT4*b(lBe>lQw7G;#8iB
zaef^}vp*XfT5>v>-Z2IUtXcMOF#$Cic{6WTd1HND8ZFSPFb4W{-`m;V7Vx$hL=sTq
z0wFtA*p)7eZ}sYWNo)5ISZz=H`WReg{_NHm7@yGht3ji{ELjsQWZ98FFWKLv5da9=
z4+7tQ0!8{qHyvQO|DXMn$RvzK15p_DBEB!UC!pr(;ql9fK(3a;MKz5ekRvpLbS=Dy
zrl6(~VUFxf?-`P~9Dqg!M4ToSDsWhggeX&EtZRCiCxFfB9GvL;C?Fuu0=dCD*=Ju~
z=Kv;x{I~NcBnpP7=NpnGhLGhO@z-x<M)HRkVON5t#?ov$7W5~9BaCn>PP#l}cr9iL
z#zwP4<mSTa@&?m%(JI}CQ}D06di(p=pI$8T*YtD4!g=Rgz=qDI!#L8gQ%-1LI2VnI
z=AwzB&r5}}%ZICeIW!8wgz%;j`M1q-vCDLm!RMgVx}C)12T(n+zj!_y9SB+E<o{L3
z6isdpzjut}@(AvR?54}Jaz-KzXeML^%`z-102kEDz`(#o?QG?7x^#$h=;*I2k2-lw
z=|awLVcK|xChm8XVfgt^AeyRLno3Jc<+}s$yQ}S7wL@O&lYk<Pk(}L!_7ZVNgfnO~
z@_)Q3EsJOF^ny)lPq(JGaz`n*ok3aTl&ojlf(38l{MzH|t<uUCHD7zh)E`>IA!UnM
zz=At0G!IX+>6GSPU++OaPC!N9SbM|%?kfocHny0%t2<EJ+M)hl!Xa={JCxgSCG?bW
z(M&vqV{VqU!Yvd=D;WSLuV=O{*IQA4ZJ|3>NsnP2#fPl?SgdIH5qGE5d|zVj_wO5r
zghFcV%N1pmYq%_ED<E}PIAuQ5$2e~`3Ir7;1QU;72I>f={T)GRqj8o6ro-_CAv#~-
zn_bpJ?g)lHK{5S1yVo<SQI2M2-JEE=3lIQk)3r1u_4lcVwI01}pF8k}V!tI&rS4}o
z^-jdd)F|m1&vMK0k-q3u7Nv3~#k6K0(X4H<)`#CX`r{BhOzp&o)o)el7G3%9SGat6
z(5TrArtNL-<ckV$xwe`6b8CyBfZj(wj9jGMy>)6iMAi?{m>}c=pl!ZeN=E>XQfd7b
z^W?<z&MagHy3Z;E%E5Um0NZ1;wejMWz<b&?L>%Fxw(YCDZS+tGOGgknT!(Q#2S`jM
zZ1AfcUOawh<;FU+I1zjvSM~(s<|Dja#<M)kYV9K%_^j(nGZ&8{cDJ&IDyf@!6vqME
zaIKap)+rih<C45p1`2ULa{#hx<P8ot@ted^y!K^G8Oq8eRMy?ba4xTwo}Ql7$JaM1
zG)bghQHD*Z2|AQD0A<f~al=0BQ5fUONOB*(U25KM3?egHWK9I*ntLClT58_g(F3V&
zbqO{Ay@1~LN^_68aRtx+(Xg<a&1$K{4U2|QXA_88d})yP*i8Ws!)2E?Cf^aPZcdwE
zW|w-KNA~vZ+dFl&wcaB)6gpqV!6}V698Nv9>WO`M{Kzjh7mi67;TmYE#<32F&erz%
z2>Ud5vx>J$W5ze7qhLcfSx7(xa^WZb)<MWntycaknG7rBiUC-^ZFg&JZeiiDQBohE
z_vg*RW!GZEYVDiX7JZ|3{ClG_>`r&DeHk9MvjemJ%b&Sk)NFnfCX)Z0Ly|}>&`yRn
zdojM%WfVYoZCwa8Gc$7^ZwhtY{xio<_@U8_va56a9rH|s_%=)(;)boAc{9_U-woPw
z^D}nN8;9B%B#C^cZaW~_gljMQAfb$)VS4HdtCBid`_~>fYwev176I=Bn1xheCErw7
z_<oGP?L|jcw^deg>iU2~0)t>g)mbphgwWjxY<piQFnYObUvBdN&I%1gqiw+pQ=_IB
zK-a;*L{LiENIjpO75g$rgGangZ))w#&)%Ny&V=y+;|5Sns(ONl=SJma3ElFp(#+a^
z@<eZM@*gr;wANl43{lzo#)gb|ezB)J=o8KlJVKXz^R|(vL-sb;G%91+)mAl}H9E-g
znbzOFeOujHo2{HCtpPAsJ*s#%VCE9|slSxT;7u@4s#)(llGt_B*};Hw_5R3Hs0of+
zYUqECt^1yq?u&x!gGbo+L7{jnSOADn5|5R;TyDeQxy_#mN9bn&)lF|8p8V&}Wr@^_
zn9rxv6=W*=2c?0^@v_7*>Q@65h<k;Zd^Qtb&r3~Givy!8Y%-o3VYar-J(VA$ojojF
zURUyqkKqZhIDpEP?B)&w2z%N@I1AFFpvfG<v3zZ<&&V^&=D6%tEUw-xTglrGq~axm
z_EPcCV=m9Pu3qX@U=k8}G~vFsWWA*s1m>kQ$Zj~1>Jgy8@Q`ftpPpl^m<+q3Ci>?3
z`ucoVeRcqUMzYc~?LLlut}Yws?X|LN@N)%5i0=UJ$dJ_zTDs8#fPJ!RbjboS93LMq
z(j6ek$M>f=TpD`ZDx&q6ym0K}zV6nh&^3YaYsn^eOO=^~<P;S8h6e`fzKl6g2D0rO
z7z+kQM&_rLyw80cW3W$6fM0b(F45I_HzKI};Pu992icxeXLG=$XO+jbjom9Y%~j*n
z;SMC9msfu-RV^~)io|6M_ky+7$)C$Wqid9OSmtU|$W2C956bS=-~xW1vd;Q#FXlW*
zGj*_*rn3kghAPVM+{!fZdG0GIkNXIIB~b7FA_0<3EY>d})7v^N)KyiGG7)1$$ZqS_
zRS>b)0)Ah`8N;gUQ-wJ#pF3K<eu;79^I$|}4|B!X=gpekW50pim;%R_DgUBITI)K*
z9Jo010<O*Zv4~M;hffxm0<+w86IpJ|S@zej&k_Kub9b~^E47#X<7Mmh>Ta{%KgL3_
z1=Na!=fEl*FBM*&Dc$=$XLgGr=Dd8iHxh4{bj^eMqQLu&uy3TO$p^@XUk_y^MShgL
znh2|;QIhHQEETUzQZR9z`1<u`K5kGVwe81hQ{M28W%|yAZ3UBrK=L>6z4RrQGJBRt
zy2!fZOm9~t9*l>yV}sxMBoPoB&3)|}X5`E085`UC4G?()j%_AX3-2{?R@k0Z_2*;I
z6hsNUL4_2sDz{#MYJ<MYL|FQZzDQ)KR7v)*qEtA)wjEw+zDL1H*`(JhCJQGUOxBy;
za$Kbi1#cSs*0Ymv`@oAOo?$$48O3uW4t%pG{TP-V<sFo}*CJe82<<P5(!`dg3oaH_
z%NZp><#_|iPM(|lDH|Ibi1xbm)Q2seND@i6)_!rccu%XlLu-zto12?^8!)i9r)$SY
zRY&de>fI}t<VQ}$nhOrE6d!M`y?|X=NgSwO=KSJ+?Hr?P?&=~#*0%shFhM}OXX@P#
zS2YCB*0g{rmin0~kohBdl}NmE546(bJXzR)QS!L5etj|#pzLNhWYm+*yu4s?R;E**
zV`#Zew(WT3PC^kJP*#tN=Y|Q7akBp8kJIF)$kJ8hMr`I@`S&$@&e;Gl6RQAn0T{5u
zptASHM2C`8yO$3#|5=0JaZ#h{j33HVGV%`i7Zet-m|3}hFD(gj^YQULYZ%1%AQzDv
zCzk;c<)q0B5`me;b7!ao8PEu|9+z?a<xa^UzPWBbimJU>bhdKWgzs!kcQ+tZRVR9$
z;G(=iEh2B~$RFI!yXsv(;WG>|wLnD>%sZDu4NNt7J%Nx0WW%n6=H>J13?)46opXz`
zU^lS56w~X)m~rS*r7V>>P!pS(+JA_1X4f$ko7r`yiiOt2)T-#AqkE4y-#&iWzE=w^
zfeOlyZSx)W4iV;~ImC|Y&vG(lXw(168g3!HpAL3_ER|$D{kdDctGm7dBs6o5lTPkq
zwpO%(SD{V!+}g^*beuqdsrK=;x@h`Ngkn_`2ylFCZEexOX|esQ^0d{&1RaJc;?60f
zJ%}H7m2G(V&M@Isd7Zc4`OvG{I;yKQo0eN)bbbPypFhXg3B}sd)YA-F243@}<74^5
z4?J|{uVS{Q*FD>Q2ld13ucu5$Sz_`gE|aV{Xa9J(jIFMvtY-Z7ce%m%9T}HH>nvNZ
zdVKQ}NFoOlPp3QfXqbE(7m8Je&YJ)9Q32+#4KY8m!X5+b2i9eu$)EN3y~X?;5*m+l
z*|btB6LPsHVuXQSm`vcbl0LC~Ox}SV(Zu)!bc>OUo8LR^BBZzcv+>-;o8!C&>iykc
zTiXXY#JZ+;VgTuF7yLmN&G<d#ZnZbzs)ERFF!^Gw{od`&T!sh{lU_|gZCgY}GJfZn
zp1&?$0S8-ur03SgOYW&iWSvj%3d{wm$*Bq#c-AmJGP?H>q&Pi3O@m~|u}v{Ow?95{
z(Ib`7tmu7s`!@dWpweV6AQ3G`qrw$ZKaSxC{;0AEnG1BcnU>UdaGD@DzOQ-%bXw3@
zmFa!B4`$!_{%Fgkn@d;cNPq-YaJi8;%WFxiX#3&mYOQL<G-s&2%P^mW8I;!-^j_8$
zPtVs`*=s(bGTK1_G@Op^RwF0=4&pPEZkfdXZr=j?T;+~iU|S5K{;v3b;FfJVEI^mB
zH|A>GsIW6bT_AbOoqa2SP(Z@?Fn))|g%@+E9Bb(6V7(95avUJHu)h-($H2M@Hk~p}
zZ!R{osU-d&Z#i)apX-7g$L(l>z2P(1K7xl^);(1ckZG0?m28lSf(G09*_BPOkE}-v
zXT=AVXmsx%cGORtkYLba%<)CCwQ^VbN@&vtf&-dtwYxzj3=EA90-DUnZ5CWh;@TJu
zqDqM)2EMEu7ysf&vH}`30jr1zH%N1T-#O@9kH?`!jr{d|eSIU_Uea+w6S}Pe@53TG
z#<~Gtlg{GpUGmEV!&0S(T?wA#1C45)PVM=^i8VZEB(k#P*C>T!d4PM$YI?Ocx)Qh3
zY_eE7xOdKuluDoo#oh*cxxGS%-h>$O7jlDsvg>ZVR*yonRP6Ey`#YKPI+VwC<^_5U
zeuM_jb!&=HbPgo-=%Ya#N4tE{<%vycb1gs(S9P}_eEs9&7&>XSr!l>5*nHX{U;JKH
z3H;FXE62+N>&PdXM3n{fZ&E$jlbit+x$B>Oe)2W|clmVjo)TA~T@T=x2B~l@zeCHG
zlX>j#9H>wzW!`_X0jO_<?HZe8OuxQT<IS+k&wAN(SIhJ!zY{IHP6%NPeS@T+Tq7N$
zT;%yCAk7``^D5ysaH?&uZsshEAIJmbZQpRsby+b3-JHv5W17TG4l1EtIU9;GWh`iv
zuPgWJo)y2v-l^of%dWGjzf~uYVee!A#*!J}kNNAJS?lO%>h?_4A!_QsH@v@!c{A3r
zQOFqG0iL+t`c4s<$lWcOu&>Su{qzWM8*lQW-j5A33xU&lEk$SjtCVspH<zrZM^=k+
zY?Y)j#}LD$Ae?AcJG3t)pKY6V;wfVWU{_Qy85=&VO~*Gc_HtfcpZ{|maGt`}@hU&=
zUj$6Y?~CTzM;Qia=z(kp3K)@u)Wxg|H!q1=?rK$3`HOtHIo)at*dkhxr=*xBiOgC?
zbW4hKuQ`GIbkc;1d^rCXE7QdFg5td;lNH>?sYzG)k>pbzEL8#670TUeWhfd@%V%o@
zEgXfDW6WB)=g*(+QQ#-3RVFNKg9A=|{SIKPG_<?dvUM1j6l&Jou#d>Ar7<;Qdq1Vx
z>Dz*vjt`}uy7G+=vW4FIc3>(irEBE^Y!p7lUY<#hzB|bP1UnOOVlDAXYuW1J8;}Yt
z7e)F$cDHlZ+_ZR!Hf%0--H$>Y#y}3Uu$(4cU3%;taqG?PryQ?)+|Wxh>DuOOn3Q#G
ze#EY8n)^fELxVSJy??(=SREpjWVm@;2I>Qz+k(E~BO?;-2Fs3V!M|9q*Hq`?z3eG9
ztF<>J@=TyJUAeem#ByOEom|2Xrr${C3I=TmH5dC;4J~<i;zb^0ty(JF4m{0v_?BAX
zN2Lks2S7H^k~U>#CbY$j`V+2md9(S~reDLfAHfAiGtz~LAxHHLB3sqRSK>{h0oV7j
zT>r3nyd0y@La*CcN7!;<Sv{t6VPJf`#dZr5NmW7=fS_fYYzXE*NRv#fy$0`-OKjDq
z%+`58Zn0knA|G!0{)_`8${u7NwZE+%t`cErZ6W*!1A5am2cH)fmx9&}A9kKmY_9Wn
z)ZB!!BO+qmV&4V}#cH-iR&wb)cV_4uwAj1qJd-C~p9^hz<d*WTfY9Okt?EQo&=xsj
z@}`R~S{rBWiq<ZR*x4gahvqrkwTwN(;lhm`&%HY&Th^JSi0ar1w>b}C6#CPDTM#4`
zt;F?DV2AE1n3O#8eB+)~?4YpgeYs+sbBLR}3=9&OWB8cL+LykZvfhU=Da9}&M%O6$
zk*XY4RDk@*eYaQ!DVC?cte4PaXC->=wy9=u;uXP0<;<eNOKW}B?eLm<8d<1-4|w_1
zB*@LUueYiKf7&>)Y@P>WUWdT1F8T|5jqZ_F7ya-LsgzZ>Jyj9{s4GiG*WT4dL;gR)
z!5i1?HXP(}oGPy^UuZaX(d}n*PX!^oh;C_Tnw;Ksj2nK988ls?T`7KzIg1fdjmCAK
zD0lJ9L<Etygktefb!?rh>h=#7clDCSo%XgP4B0gMC*0R2(%r|0`ul6I0-O-}%t+IP
z9YRH_fQ&pXL|f6k@$F-0D#-jR)=WUu69&fnpY#VMf<)`j<MZ}LSy$8T1lmHAY~C4;
zs8)R>5J0<Z>#A#E&+09)A|XswOdrK~M<eyr-ilgjbUIJ4ktqGfGL6r+DE2Z}1_uYp
zk25uyywZ#31VG@=r=!BP*@%mR$NC-dQ?J4RYDR$60$u(Ie2)kH?dy8z4@b$^(;aTT
zF|;;M>ZN9UICX|z_uFR7ZiS{^bk|N>8pYTG&~wSIdl$^WY@UrlzN!&TVu<aYuC47L
zGWFtVSiDJZ;Q2K7cG|l-jDoZj9-W5Do1uUw`4o3yUOrhQ(-XPGK;c6)Jy1k7+?%<p
zKDfU2RS!Phd$vbm@!75un%rya5+!*9x4QoFNLydP$^NJWhiNAJW0mv+$hkmFr@e`$
zjq=RS$=EMv>#<a1_+Mg~V=zEp_Us$(DOY5${6~(F5=#*!ZSn3kUl1W4ncYN_>4ahp
z#8+h?#;eC24dJWb%{aH<l=ENVdryyfJ12=0(w)>lJHWbZEWYY>iC%_TXgAL7WeSY@
z^nFf5({Kld@fk=Og~{WZN0)HXBhi91ATGc1?X6bM_u}hz$!5~^JWgjvDeoEQZQPlk
z9!XH{PT#VxEZ|mL<J7B&$D7kO-^FR`AF7h7DLvva+)%KSufr(I23V-kODmjVdG5mt
zXInoixi{*HtEI|v7quzEZxz_>Z8C`63Ug58GEDl3o*p4**(nk?1*82_29hQY$vb>l
z7Pcr>#E8KRMIrdZLJ_g2N@yXTSpx=7J4lO*d&Upnj9-FJe|^PiRUs``Lp`aGfcN++
zvJg~?*e<hJ8fJgjEnW=w;(H}`I<{(PY`)OgW-#c>OU?Bo#wAu*2<os{U&&JkWUO_P
zh-5a|T}3#)3PcF02}Lr9_hG4}g;cd#;{Iu>SPg8^KS}lJ{JMoEUljn@yMt89e)@i1
zn=lI!t;tUVLazwL26-}KQpO>HB1WsbAaJ{q=Dt44??4l-WDD0>Uq8a+%TU2rFnZsF
z578(vc@?c4=HfDG0J!-Vv$-nnuR$XnxsyTUwdnWlw6i}$G4@54syMCAzAs<COo7M>
z4XYF^&f|i~UWrlKuk^pWw97^S@%HU{kSlc-?;0)<_fOHVlZleAnv!@e1Xh;}2%Zf6
zhuzZ^>q@4dKay%^Rjxrp9s+{S`Ec1ZY5nUtRxc3#If#{f0LeP{6s3?}RN*(dcH8z^
zt8$xsxf)3^{T`_`r&#vi`%!<~(&AY?-1SC!#&5VXet*5{-F}3|T_EXU$HK)p&nyaK
z4nP6!oK~pvDvj>+6LkF(6WT+=!;NkD8kXOx;Us#JkM$G2)VsJP)0>gX_D<egk&OvA
z0H4R#PEQ)A+YM1%6x2WQ@rce9l+-lw$rj#<x^r1tvQo+H%*Q&6>m_%!lZxkT9?kzh
z_TKy-$}j#OzQ%+p>y)i*8A{o=P}Z!eL^0MZC1c+bV>e1AYZTcfp~jxwSSE$amNZF>
zU1S-%!C1b>`*VNpf8qY&{`Qkcn{l0Uz0T|P+|IYc_xi~N|B{eLW!vikW;Yx<biePu
zkynQ08X}(iRav<7d@ypF;|5S7?*xO=_@_Yk1eaXMQ*3BM#&q@+nDW8e&F!v=&t?F<
zM7hvbg}O{~Uk=X*`{~F-#Q(f(JG%oj$#vsJ+(kt&y2<OLMS(%u%%rF5t+KwuToj+K
z`V)U6(+gvcF6jcl>SPT2ADzGNB#{ly{^9#8io|}R5jb>z$O$u>wVu{QjFZ5HtK9>G
zMf!uA-@#16b{({$5Cm{r)Ih6JL?}*I7;Wsi#v47g@S@B8Qvr$bR_r@hvm7bsdOn1>
zSfp{UONIaBs+`kR^X-)!Oih~%I#%KSo!4z2c1-<@2Drgt!`fzQU)#ph>pb!roKQj9
zglsT1JVRMtQaEDY<!&t>yeRaP?}NWDzu})l1~Fu^6##{sk?iq&jzc652nb8e*P_|A
zic~L)J6yILebsJU6K2#{8>oxLQrEiN<ss>xaD`68jRA??HQxvV&IMe$KC2fu-{meg
zy;MX<$d3~q#D{pC1Q*jHk%5Z+h{IL0%xX`!%k_L4KO7eQ-hUGzl=-sYU%f$+`z(bK
z5|rqS{e4t@I3)CcwEzNIK;p(yM<ann2?LK^`s-Tbew9yAIOASlSNsF&=u3Xz8L+Jh
zuuGRNQTTay>bL_c()dB49qy{3i$B8ZSt@a;l4F5}(gsH;>Zu}M6@f;Z`v!=3f2$kA
z#m}^0j~Vr&dNl-xCy98`tLyaj1v>d=71<vE9ZXRXY0d^y!{jYnj$H0dR21uN`$R0k
z`13EbFY~lL&|D-I^GvEauJA^%MY50F!z{tVQbfKAgE2?>+u9_hGpjK9Ieb3d(mZ?S
z0H8!=Jd>71xg!^Z7z>`VH{Zjk+?u-b`?&JsiFEhOORl)PRux!Ci67k9&JUD^GZ3Np
zF7LipqH`mYavp%EPigU-)2D{S-O-D9IUny~o9Z~&hS}u*W^a3u<yX`P30iX6UN&lW
zjY(bwaQSLx?@i<B)W5s(Vh^k@iY<&O7>4s|1)SHt|Ib=)$S$T~u7zLJc(^5Q|BCGW
z@onNF#-Bl^m%NmBML;796+tzr+Gn{`)!1=7`u*xzSdJu>Ftz)bUER=!t;dzr4wwt2
zO4^$0!DWDK=M@$@iDD00?)yzH4Wtn-gA?QW*k@$?HdQFH5XYTniOrtDo-tSz8`~YP
z2`rcj+#eD#C}kXR$K?1>i_SlECJ<;RIjCRp@mt&SZ3>TrQwJGw834vlA8g<-L6x%I
z=5g2skFXY0P&fM-1;buep<TolcX<w><<$gVD8rT!Vw`Ktg9y~Mdjf5H!NQWIk6o*G
z>eBX(E59)-+A9W<x=&zHoWM3Sl3krnCdPs7<jk2sim*cDdU4K!eTm%8$wMCMe?DBw
zVrYDPcVwq;fAjo-RkSS79LVW*^xPDREF$k+1u8kDV?}$R&5%XdxAZ2Qi8f*&b+8(A
zg4b3aY<CmctQI!Uk4><*l8ENtF15^1T8`9hO~lY5iY0>6(rk=mZN7bd$)72qk^D7}
z!)V5qORMg~!tmr_^9ZBeeDGX=ODpw-$X6;3DCs5xvo)+dHC$4I;Md?cg#9@hX!S%X
zgQVMT7^UQna6w(h;7J6oJxB}!`=zB|_DbX%`TY`(i_AT*x0=K6VdkOFJYIfOrI#N2
z4BlwKAU|QhQIPuW-?~JnfctNK{eHK~9cq1-NjD`dxIW=pr|+-Sy1GZdOt?u@*`~Gz
zvwFUQm*L$9wxt8|R?RQQuNNxk48S_z=I~cAX?jnvXTOmNowZNX2aRYnS~x@FtNe&h
z?MtO$@p?f-m*bkTK0><3X~7YbNg`?dT6NJV;)-mvo4-ej#-_AlmB;TyRiQ{bMQPW7
z(ExryyJk;#*Lzd30Xji%D%1JWKFkL4H0=p{DGYTDp0E*Qtp>Bxu!|e8+*8RJysg~6
z#f=TGB;*_`X37LY*vRj^T3cMp^wEjD;ckDb!a#zR@+&)-y3xu|InS0iBG;#ZP|f?T
zcJ<U)v4}yl7iJ*k^26GjN6NxRjXi}xHGcf*CtNedaR63(E3*g`2{Z>k4GZ5uBH~8T
zd+M%!;r`_zZ?<*<1vZ2TuGN=nnHkg=G6Ab^V*AJm$~ls4za8u+*U2ylZ#$>xEc@6L
zh?T(*ey%`GmAmdwSD2=cM`-<UZ;fNH0PL3iO~m-Yxzds%oV*r-GdlRCMCV`F#ym}4
zD02Q>Sh%9Zkco!VHA)r`PIyyW;6n|xX@jI;L<rk&{;gz!AZdaF+`OTnUo={DB;ZYL
zEp}68=&&JFbvyr`*089%IIp(F;aA^~oznd5Vz|tcmU$=sroAYmCZLpF<8hdv%otyP
z<~s;lh;^PxUCO>cwxz<>ACN*V52%xXSCMN4wci@RNDZ{52sLu(KES364hf`Qp2^|A
zdu=>Q*OTs@EpH|qR5vw+mhenQkX-%lXs2Bh1Yq7VUB*pboYxtHA3%V&9he<nWmZHU
zuy86d3vMO|>h+!9>Hz12^NS}9VYiD&mm-XeA1rituZ461+R6hUYHT=y-8dzOS!j63
z!mHXUi!t*bMG#MNHIjcQ(WyQ~As9b++05}<qrDDWVy2=1j-I(`Q2VpJsimsDY{!I#
zVTT|n_U4pgpl_^?d9>)J3dk1_<M{Yaa!qx#^p0Iisj1kD^LodM4g3CKw;s|niOIIV
z;2pXESg*3f1`9O$C`kY{O%Crx0?CO@{|X;`K?hkLgz8=M;57@E>|JxPGNnpJC(?KK
z&u22<{&8EmcP*iTcGR)Md2-bY5}gWc4lPnlXeS#1HGHUSdm6xLq+eh8xyyzf1+8p*
z`v=Puo2;4bKO9*<RS2|D5N7I;0YZ@;U=J*;t%u%kd;rDjELH;$g*ggKm1aeYlN9|(
zcBvBS7+G-f-0-OY!L}F68gNAbp~RyGIx{ks60hd38b!Y*7vjk2J)L^Q_mur7U{nb{
zH2|;)EHyMD4g8lqPo~3r*BYA|Uk%|p6v0*7H4>fiM|zG5HRk;b*;VOXdrurVuV)`}
zsXc%Xi1)D#wD+K{jfU(BMUruo42+B|R(6UqGTs8Z%kLeRB|AR^gHs1|qQ*|yGhXoh
zV<MFOHTKq?=?l%_{TL+ztp^bI4f`{cGG{k8>kjfjv~2bXhfFqmj1BP)(KxnZ`bv$6
zfvrsuNfArcfP(93!r(#8xA71hV&nQ_{ZeMuJWA$VrYMY&>@~hr(esynV`F8TPe(5E
zkAa(xdy8I`kX)C%sVB0-cK(@d-)>%@EO}YgfYYX<ftIQ_1;qJ4n4^t7IMQ|{bw0w+
zA-3=_d%VP>_i-Kv9Bt>AzfESnn%N!#iZxen?{YL<;GrEW2XNo*lx70K2<Gog-3O2L
zHa15``?X|yfV8gW8G12Fvl-aRgTSm=1B4mXxx<!3rt<~f8N6D`Vx1otD<y(Ili51g
zcC+Ac_(AEbGP5G1IvNw$ddKRX-F0mn(n4g^)Yi6~?fejaq2^-m+9Pt4Xk__t^#Y~P
zICjLkwJV;^-!WBH?y%GC_3zyjc?rH)m2kq)paHtiJR4ql3;+N!0kC83xq=v%1X(Hh
z3%3SW{ME~yVk<}|Xs8n@Y29=|xXHV*cKju~t|L?*jy!gMAc@Ct&b=Rf^a?y*J#p!S
z;I4%e9C<67;19fkxCML@xjyIrIESdJ_1U}xDFy*)jj$%rInL}{lBY3vKiICA?jJy%
z7_R3qG0AtkU+DOQY6J(eX|tN_4IC9dAilM-a1Uc`9Q#e{BO5qT6My}WR>FyJbEYV~
zW@jZKe8I6NNdvAAz}U*Py-!`hJ)$iu*<T^s{<NbuA0=MO7ud(v_^XT6>JcIAct8=M
zrQv#U^R3}fJYh6rX4|Kz!ee#RQ6*Yut|K0+eJI*FD`vQMcxv~$9+E;o0j{b8HZFjY
z<5OQu4N4jIW8o?dv^>zan@p1b-xP_%^Zx(SUzX>AvIg8Yz&m%?h!4pd_M69Jt*2TZ
zrIo0Rc>FirME&{CB?!z;ab50iV?1MxJd>jx1ht}!G=8`#;6*n{YrPtWza!q-^#_NA
zdKsH}96}ysW>hiD578+fPvvt^^#KwfK6w>nzyr!~J+~cz#j1T=KUq@3z#Cf$BxHXR
za(6Q#$T*NxHd8_j^QXU|w)#bQG~gpWcWh^c;<gMBdpeIPO%A8{IA984*bOSxe5NaH
z&2B?QffbWBsN@hk%DePm9N2Y{{|4fy;!JL+1FpfK$Bg6iCc=_?dU_@!(q!BD2Q-Jw
zo=!%@tM<0A#=1Q%Ps=*8z_=^dzV%Vo{Rb-_)4~xQdo6lRoMePi%95x%bYx4xI~Xy3
z%dG!`J%$?^f{pS!;3kfYy#hX#cQE!bXiSYbij$?+1hZ=)2rk$>@?`OzQYG{6Mu`(R
zeH`0o9zpk;n@&V~ODRa3Xzmc}tLJ8P@;3P-5Jz|=O!J&QF0XRpcN@7Eh#7y0nNg39
zJz;Nk)RZ*rU&O^|BVfNTwEu3wFNU5?9dsme5tZ)7a+l?jDe&uxm4csnz}KONM8x%g
z|G@!V*M5JMRyh_Rsp~ZAOeYimsCV~{1pl8CH%tEcSpXt41Ae~zQCgE!`_>DmD#s#y
zjWL6ySgZ<E!)Qz2RU$q6@840%exqt{;V;)}f)vnDwJ&`<|A*~)*+>6)l8(EHcuYhX
z)-KCvWQnd3QRhfs^RG}yMi#&X8=$l>)Ty*@-7^{d_{0OW=1Ez&lf<V|bz={wn|b9`
z__qSKePHF~?!Bc%X8B;~w^X|_;EjI;W-n>2sA=e~Sl4m^Ju}Rm%ONBn^6Vni$X9o_
zxI^Y6xb^3PK@#V7y#9x&Q_MmS=r&wFwr_O2FDi1I2aG2FmRk(&lP9sAh;{?Ih_Ks;
zad8}syX5`ef6wK?iHQSe;1rk&*G7!92t|I<M(D?lFMVNey*Ww#dqJpuOFCL~UwTja
zXxMdyR}eQ`5HIab-G~?h$Vnb(D~5dB(u&`CJz?*Ok_~fr6#ufedyQCz6E`=>*c7w#
za<jdIak_a#g94N4Sqpi}GpQ%r9(TU9<08=)chEZ$`qbbS8u%-+=h9fp$0m+#37@>F
zsre@AT#`tkVZUDx%<{;}1*YR&s5f+C2G^$)3WK;T<WN!3SB#G)-F$f(HTc-^)57(E
zz=OJ4jB8MEs}!$hbIwd!A)&|<4P3=@?;FO4IinqiZ6=0%yWLyg-OMxdrH1wiew!Qr
zD0i#G>;$CJkx({0a#_2chj<>Wzn$F=@vwjSuP8^#1#vla7UgdTTF!wYJ_-%T8P}`f
z2!d>5PXX6kQM@XQvRPRykYzr7SfV1-G{G)&{yKM3{VP=Vb@@z;vrfrm_rg`kTxz;;
z*GR6?4R(7-Vf#$vuZ_t`@g~;P!LQfsFX$S*KC{sj&P!Yax}g3C;gRgO*&1k~l@f${
zRSin#SA2sKZm{hE^72CYb2#x32xS_>1EENUc=ESyXyjL4Uj?|2aX`EKBv7b1MpkH0
zI=*OyHT_K;>`nRAP*(r<6r=aeUF#8BIvLjdY!{%@zhSjGc=_S-!K^QIL+@SH;eitL
z$$=gLeuk!k_jBn8HE86S@#{LR>>0uL1M}ftS8Gq;D*FV(+&FNbx{8Z3OF^GAPjQ0%
z?kA6mpk|;{Mtk?{nT%ia8-<fQo_fxMEZ76T6vrGsk(f2=X!rpqIIact&mG}}l+p$}
zGFGuiaAj$OTR*C-Aysf%zMHFsUDrpvzfjZX98y(OH1E~T3!1F^G_Y2sx;$e9n7wKC
zj7G`8{3!2)%%(yAIx}*Rb8qs50>r2CLwN(ga}v_eJy2F+KSpkcDUWpBIPc*mjogN5
z%?B_V!<K{PRbIPf(csFuI*LA261!M*5VSkkb+%}Z4QXXLYL8*WyNcpGV2u|_CwlR7
z2+0Kvrm?oWWYp>?m`zC{2I~9+7e~n#IzJGu4TUq|a-e{gv=YINpe^IfPyk^tK;X~<
zl9}7l-p!3aoE2CX=jIw5MLIlW+)C@us;?#{WQwCyPG^o95oj}jeKxsB20ckT5C|Qx
z>^+|FfJ&7rjb8S@pL+)3p&O7lx7M}oWLOn}2DGg-7heI>4G*YClV?B=8tw;nFZ%I-
z!1Lr5q}Wk)2TY4NFf8nZA_v(tff!VwgvJb6rw$gZtdXFzb+pTtDQd=7sjQLnRDS3)
zS<qeKRB*Bx$(TffhMDZ$GdE??p+|C$yO$Km=&Ygr#3Qq{^N{V2FSd6vxFwFdV5ljg
zQCE<gb5H($OOTW2y_uR<T3Tv}-{Hey;Q`l*6i}-*7J>J()6~$}h4>cTEq?dUd-Z?1
z)Gs7?D3v&=;yR#^o-QfpbSfdWrP(1LZ-VSesD?H26E_fdh@TJZ`NELlyfPE<FQ(=z
zYRUy3>ASIrpx>@dv()!(aIJ2*nk1q@SFUt*v>tB@Cw9BDq18BvI+5Qk?Ych{i%SvA
zl0=-K-_STuJj1f1d?nYmC5*U@7_T{nI1nrtnBTB_Ki2`{W3=atoS){#2OY<Srw_9F
zY->Oka3VH6+!|fYYVQ^~3O338>={x})4s7uLqE*j3&Z<vjvE$oGI!t_J@F&PKlEZ#
znpz3oPV3$77;fdTok42bgZHJ+Ed?)dWXu2>60Z8bFOAzaNZR!P8oBwDAN&c%=LQ7&
z8>5Wt&_?|ah)`Q#b{WH1H{^*-bc5}R7uZP}Bs~S%U>N>SoN?VgA1+yE>bZg;<9w6P
zRlu9^K?(2x81Vq6tcjLZ0Rob5afW8_<$?mo0s7@^i*vgrb-NPKBq&Uu0J^;7aY=wb
zPcP02vVW0UYYAmvOPW$fH`$=+wT(%%yT2<6CqGD|I3}`+@Jx?=Tq<_^5S;X6L<guw
zlT}i<LiYC9b3gVqbxNaXyLYdTve<{Co3zp9VkJA>keLrPt55LktS)q7f_8zOAt%gf
zclYYBzpU-2g}Qyt;n2*eb<Ta@A1(vVVED3Nq_49YvL1Na$XAC1;bS(?uNcnr1R-W`
zHAdnq(~aK>hAJ;awuM`_xA<@@po*0NGWld#E4U+i9+lL-sK#b5j;yyqW6%9~H+OPz
zaq&P`+1x4jfS_^(T>?~}=S%JzMn0U&gN9>`WjE-cs4xHULH7b__y$(3J6j#>p!!20
z&RwapYsj9s)l?oe#N!UPmjgm5!OQO?LW}}~(hHXK5i?bnT~<|K#jixZ*4=90XSXWy
z;!gNjiTV&~ZEbz=bK>X5UGBZygrJF;zdo8cWA9L2E!{pQ1bu5uG08Q@>)WLN$|BSB
zziKNFhe)^W8}hv6UBG9f{v9Yh&12?6@`1wI@o_X^l;!mRIp-WGtI_x!?#RhC<_SMG
z`&VsU<`2DXn(NydH;_HKNAq2ZKCgE`b&PuhKZbOttSj!<x@Xm8Bh}pSa~Oo0t$MU~
zw;GfEK!7A&e(SD?!5?HP=_q1GXFLlU;nhwFWgXw~Mh2P;w@--}?9<zutRG1KgM9}o
zr$>g{gF=5D<aT<?EJ1-P!uP`(Zx?Gw$%z)E*;sWq$Ql)Kod6wb%?JF*vDfgumXE!#
z-^vC_hp`zxhEFk)&^#zFCXi^`q#WD$*8U~KenV@%9w!))tmGwo$biqXJmWkK-iP&c
z(;|`{Y%6RxApKAhmoXFA%z|4&izUCYce8`^!QP|TV=^)ZxgdYtb60GHH?7XSWO@Kj
zMYhlD@=)VfaxZ*&=1^JE?S2gkh6UK_s4HJFQ({<dN^Ozs6J!g^0mSotm}5~y83D-<
zIl@l#!PKOsc`%GAK<_~Ez{~g!i%4%(Z|dH}recibJOO!Z)Ai9*#j-qPr%)+5`2l>Q
z{d#3Du>6lkc-y)9M)lvl{*4ltF+_d}nhtEvN!vpeWZ{gVgVVSyhd<p80!v5APvYR#
zd>QiWVm$t_B^}i+VXVLcZV-9(^_$1L@tB%tX^Vgv6Q<anH<Lg?-p<1_lCaEU_Qg>z
znb`RE9+N7eh2#BU3O8sCQRi?*pC|WWSPd3pH0qYrupM77-EXD8UQCg)XHKIS`2z3R
z*|d?Mu#*Xsm=&)RkP&p{g~(uWRr6hqUv$fEkxQ;jkO}*-?F6qs`)m{QivSwWNFC%t
zeRxFB=+C@~l@@ax88sa=i)<c}1tO=IXQ&S<T6a9^xy|5vO;2PTzi}WF-*A8G`r)UL
z@3QtRP0b544|~_)(h}3*LFRg7+!E{E97gP_@uohSRL{J1Lj=`kA<{jw&&_^pk1=us
zbSC)<hDz7_AM9;Wn-68m_$yGW1#^x$NxEo`8;EZ)P=K_SZmSl^7cezFX7kF?vPogy
zKJs#(U|frN+tOzxhC#}e>K(&5581MSgEJ6g8aSp-o!fK>TF5fFq49dviKhdOBOHu$
zjd1r4*swQ$!c4nW4|m>yX|;VpLN{b9sTqR}fQr#wiizn9FYyovr`6Cc8zBP=@t)>Z
z=01kEkB6Hp=BNd4o<$`8tn6|xNb`tYwyuD33&#sEHAXJ{x&pqCF=<n^H=J(l5uzzL
zdT9lYXsRBgfS+PDK(7g64>n(9?{8t-Ye5!PE2|r1Qunylz$T9xd2RvI0=vA-KwyGc
z{DgfU@3lM+jteh&8qk}EKYI}$|1aDV6Dk?ly}*i<J{DnB{ZhRG;c)~4u$p3XldM4!
zxkuw*H@hy|n%zpYmgHKUY*jp?4F0(b;T3G_W|s*KfgR{7_9EJ24}`R?Ep6w!I6&^O
zu?~QL;~UEfMmn>5A2wxpFE;VLIR;3e^YMc{C^^@twiqpA`O5oUG`-usMpyfz%P(mt
zRW{4$V-!2cY%mDf=h9zL5*-n=sydo$(cZN$uM=jbb<ng>C-fuqV^^4E$T`k{vYRDl
z;`tABBldq`{COb|AwjzciMIa(2F%Ks&vANrTRp)mVs$hM#%y0b;aR`N{?rSjc>30h
zj+m!47P}qVQ{$z2d9q)-_IU?<cyV^sR|*lyPoIJk>>vO=@b};6BtAtR!_u<3b>7bQ
ztvg1#RldXRLbxJ!vB;6=u(gT#J3Kl-i^v0k24N#F`X5(&*I?N5w&=A7@8;)^%G2;^
zoa5g;TZz%Bt?eaT{?v&O!S6*^b<{;r12JEcSC&2;a{cK+3Y_ZD7)PY<?7hfx56yQ+
zEWgb5<;6Uta;IZBX(|n>eO*%S)!F&8FEIW$Rxaz@NbnlyXl)weYP$hZxDy{mAI12K
zYdONcuk$lc9*3TC3m#5p_od&s#klc8a~nJIh~&~d_I8`|?nlSYVwVViOB?3!p7ON#
zv~G#~Yj;Q5?*<gn{T76VLp9ry)A%17zgOxZceXgTg?YJQS|kly#F{<jKW~nxeO3iM
z_ZuaS`2X2dXkE9pmh?g6l4rp%Rq*2YcKQQE>;rxuQDS|G4RP6|Y#_%R4Q+J*F~0)g
zyvIZ1-wl@-o?L|CMyoBT7a>+Gbpv)=QD=l{?VBKw_uND?UUySCWersE5D&V<toB6W
z8=0SLs;`v2em~Sg^E1BgCJ%vE*34Pq)Y7TT``_k0Jr`|Z5ca^$#R_Tbn)@+AcS=xp
z4-B3Mf*>_Q%WLho6d36`1+UT%dFk?G+#^z5FrhwMWesy;<|LJg4p=fHcr)8V*8>`l
z0UgK9{RIjOGL6MP-{R<ReYE+CeVd4ZUVU)z!uXGG#h{10XxRU^`pPSK9U*X+Nj_NQ
z$;0^}V&+BP{rc8p;+bbrIob!lpU)yXR^*{g9p;35{1VLARI%Fi>u7D~$_5RiI7U#a
z9Bp&E@58L@58QGlB(q0awR48h=e35w1_BioX`jLL8in#=EJs<Hw`a!4l=eo0(1KZk
z3$8!M5lu|$dHB5Uh35t9OK_Qi^maD6$`fe6Ymf<g1C(f>tr;dyU|rwer}Cz&Q=`9V
zAhpz3F8$R>FDQc7!GugX*n6@vKqr6q{GbQ;EHv3lM(%N%bWgv=g@cxNB-eq@*IcSW
z`&tsk!O2xmI4B8WdGCrGSxk}WOc#G+8+TV&)E|r07`z+C@LCKt4<18RY{npYI+C)_
zh79DYIG%J^be^;yV?UhD4DVAH?|w9OOnv(>modZ-ngrI0AO0(C+^?*^vH1&Dy;Hkq
z%^AXV=ZaaDf=klvpnv%>@~0PZNsA%|n^9iW8%a~}bJLZ#-YaW8?s=Zfh;KNG<AoKY
zP+;|Y?YAbaoli%GYzGx**7M#WPAtLz=<>;0qMt7XF};}FF?a54oQ!nL-M3BN3?`YP
z-Sd{G0(F5}9{__HQ6a^t1$Z|hQ>oqSI;T<6@Kj-xICBNwY-R<gYLuy5F{>;8d;8+)
zJx=r6eMNX(-GLeQAiEMijh2TpdS8LwJNuFG`SNr^PKu)?m3>>H3ZH5DgKPSalvpIg
z?hT-H;*DJ4hah~A5YFXMao#}d)*s!E2P-b=>S8EAh!^M}^?=!@Z3F!5u?gW-R(;^I
zMymCl$DBd11!0s@y^5)ywLI+;YN5aA!k9(WmH6c*Hm5gH#j*R@5+0Blm`wNdH`doV
zW|osqRR!nH%k{1$PaY&mz7V+xgNLv25e3=XV}L!Aw_mE{%26X=G_52xUmf1pgSbw;
zYyw<*pg~d>@0-;ZhpTm!hwe}gZd)XfQ1*#4b{p}d%LTt{k;L_<CW=Gyx!}n=(;Zi?
ze%R-*13ga(yA}s$xF9ldv3p_SBNrH`-GtZmPBM+jQ<}Z<`hK(Ko(>iXVz+8KU?ln=
zCU0f3j*UXBr|zZ6KDsU~@m{O<;3VK~4N*V%n*SO{u2_2>DQ_VxEl`Gp<N3?$4lS{~
z<0=*}lYSPmA8SegoQ&J)*y;Q4)0$=}qYM!8pAVH~k|wTSH!|LnY?ktQ+k|6|O_>4X
ze_YGCoW<1dZV^4;)0Nk`=Mt`@IO_70mQ0MTbZ<KcsvYXTnYzFUD7C<Y4L?dWd^GKG
z^3>+=*AF3=&XoVJ7GPl}PS0ULFR#LUcc`h)nJ)~WloDS6cz3J!^_81}fxz<4&UD~U
z;c4-j3bc*9Zv9gBI#-*0Zfo?4I)%@ZzGA|mZnC**)#$LXZ1^PbUxR7oC##jaWUcqy
ze?!5L3ZOvE>mg`KE1Fbw!H4|kS3?0ifa1C(sqlP#0$kEf^T_Li?#?^P^BNTGs2e0*
zhc7EaMt}f~`G>#s<KYadu}=w>`Skm$h7UEI4JlTD&No4F&8GuJ^MzKATGXsp*IJ9|
z?^-z=_A^xu7zOSDBaCk92Y!bHI!H6UfK4L8Y>ahG4UGOa8$nI=)XVV&R~h69X6Q0T
znLhI95ioN`ObemfaN~^6zTDKb`|>WAL&f_!-qs$I5T`szDQPELnAXz$<}ML8IUafJ
z(<jh!#{u{WuGP>M>c(kXCjFIt&GrFx2GT?i=+~9v*u`9pG5#z#H=sk~Omu)P!0rDU
zcQW7cjB&X(y<HA)laF_j+~4BeQU^`01r31}H~^f8%)~zH#?T;%<mbgqsE)wL%U6yE
z)tSnCu*I|m?xWFaMS&XR(7TN()|-$OGaE}zB-q5WasLmPWnlHwTRV?21Nmupolf}{
zn(bUc(VVMGx-GYMJ5=#nUuP4rCZUg9!H^IRMRWZcki6ThcU`Bnh~zr-ZZ5-N|7_Hk
z2!z^My*~|szf%^#XJXa5@?H?{TCEC9+9b4J9*XUgd!7yY1oYb}CsZ%yi=OaT`|KIr
z%S$?uAAiljQ>9Tw_&Ekcmza>uQhb|*lrd>#wv6Ln@xVM&98&HUAeXSzrx1}m$^rp$
z&c-D{Z8N~8?eR^LIq=WWABr{uuuD;u%?_9~V8F8*$yv#tRzxPYNp496A0aXrB!x7j
z8W)j%nUPvfo3>YpjY+q!*PUzT3fz^)(b$6!CCp=tpatX7?nH{Ujl>5u;a%V0i;7%(
zjhVO{F20*{#W&QA2Fu>~s@0h>ICOWU=Ih$-629RXzO-q+lmLtep(gpa26?m-V3l{P
zt@v{a{+)73FD=2(zncs78Q}znB?8Kiehct0Em|?&%Zx--GL#BBz8<Opd}#d&V5@7W
zCh;?RWBMKe^MWis=tN3y-EnE?_c5Yq^wAZ*5RtGDv|1=Ir0jEzhUuYOZe??at0)<h
zlCQm(L77Szj)LbZrnzMUbW^72MTz{P)2c>g$hF2E_Q3%9@o*9kh<>svDW!t|ijZgO
z$BRshXJ??0;^%K=YA`l`+c=~+_ImBv4v!+Mq_D@v%9oP7O^!Z^67<3_-96LM3(G9T
z&%?H)ad{vDFe>v@KYua42uc+}vAvsHK}@oRz;khP9T@+he*dR(rk7m<2T^26N|-#(
zdg3=@;pcyLvX!IEl_e;yJ$vfC&_LPBS7<*Y=oS16FlH1*Noby1-ciNHL3|8jZ_UI3
zO$+8<^=C5nE1dYCXN09i3Jt|^+iTF3rrV_7WZQD7CU%6w5dH<OS#6~j0GM*_!?LYn
zkR$-r3Uj#b>!AyUnP2X0r0!OMVYBqdL|YiESwxCLZ`r`Suj$5pNG1A98G1toJ9>D7
z=qP()2RXE^!Z7KF{|Sk(Pp{j~2S}#`auHoot5NtcZp~w3a0JMofJ5?b@8;AX4&n*C
z6*FHbS8FP-n^Flu`ITOk1{l?Xfm3jgg?Pv3`&uAnpFh$Ec^`VF;3w}x_0_p|thL^|
zmY{KJZRa_Dgw_62Y85)*+~xPcPw7`QSBg-R?J*~zk;h)L>A$NRrZ;S=)4BJAV8pnb
ztzg*ln4Qa2JhDPD`hoB<L(VmT{`o(6&{z+OLEGvsc>S~W@_h258rg_8P_~t0GFs|B
zwHsQnP?yM`G~uH)>D=>;bf`Oyt8M=m{(xFs<&NBREnn7uO1G7b591|<5ATst_XB>v
zw6hX9p4i(_tF1mc9jLCa<9R&M19tmVS+6z}g2<(F(~h-gPTLB6=Mgvi=gV41uZV#s
z{W3q=1LB#X*j?V*AEnp4I%8iyV_xLJ;!hTglY#cArhXh+OO3XWoeob(7&p|y0AWY9
zb;6j&epWbicoyKX(AJx%#!hnQ_<k6}*cFH$zjF-t2*$yQ)(!gu5xX$&=R}}^m@C}4
zA@s%!q#kW-xk87_>Rv!H|D5A710o`Qef{8Jw$r<1B4F>L%IOYETgvOB9V33MINy{p
zF=>Y7mtey(eIBELIl>YYUvutf!%b&Vc3g>huuLA&QN$Rtyg0Xlmd^gkQC2g}VLVhq
zciKha)WLZ}@z!@Cg1OZ+UDRr>vWF-{RVc|Lg$|fNW>{*lbr<jBf@2Zj;UB*JNMZ%*
z{(YBU8qmUH<AA|iO|}OKc*+drY0Q^<Q1EGvOJ4LKwQKa<`Al<O=;C~F<tfOx#H?Sr
ziAhs*hba}P<^Uuk-w}KA1zfqK%l0NI6mn-8do`;eY6}?yfUGHZ7?=%bKCgC*Gko3e
zlqF9a_7f(;!kLK)x>I7<aD1QhItVb-z)6z11>T%#R(~<G%?G7|TjTmcBH+nlVmTdz
z`VrF1F>m?S_+Fm`1Pl-EkxTYTQM)|Q@SZDjyD@FA^5s4s6z)&u)tM?*UW6i-ueT71
zzS}pjMo2O9bjb&-hg<v;nHoz!r#ruO19|Z~SrGR8cIXC)>02l0<$}>!(-iUOFUbIt
zwoM0c+O_?{yCdmmprGL<uQb)(C8MN!DiAl_*3Hb2L+NZopR=ko%IK`Y59;3G3~7bN
zGA#2VxBV;9w=OY4!%Za3AKGfP1>5(gzZjQ0RzEXIH!fSE$-KmNN?37}J7Nrhj38(r
z#rf~Qw4!)5{3{4=nT%ICjS_$MCD<YC@3Sukkf$Hj8er_$L|YwKgu~ph`jrv6NZDJ>
z{QaNEM##jX4-(=}yTpQgS+--%%LWMGgdPFJ4`(=256t@oVJT^*9;i?3>Vxf;E+F53
zo_8h&j!}FkQ#t1y?4{#swv^s{e*l&T3!t!9gzyu&RCXxS1@rXXoGMf|m2~YC^xav(
z7HC|IJ}<r!XS^TjwqoR*+vmJ{4ghdEQ<YI2vZa$k$U)KGwN+{ufv|KMCE76(!iX{r
z<Ii-o96eBySGkHFf`uFfB9bs8mnJoo9FT^zy!@fTBTi#|Hq&k9^Q!^^ISGG{m8Xn4
z0BIwTchHn@7dra^?26|JQ9TSXdZQ#^h67a|VA*L+(AYNs80?LZ^@Z231b{l?7IqQj
zIgp`v6H*R}YZx9d*!5Ss(QG*C`*8v~f;#8S&mE41JU|On_Pn3$#g#~cHL_d(vlDcB
zmR2nmvVWFKpeiUB@(9eXJw{(Gl-|nb=jXR^x{7avH}^;Wxd*^C_UQ=#cdQF<WLX^=
ze%SzI(o30}OE)hfQGhz*XHb)^)Yve-(H#(iumDw{=>elSd^!+_cR7QnI2_Q#u56f$
zu)wP~D5r=t$*M9+SVYwA#F_2R?zK0cppd*iXJ+RtFHG-ehl=A9SlhCJ88{9f_J^Es
zA0PuMX@z(%`sGz{mB}ec2C%dO=&aaZaGwF)sY}SfQkR_=#7rOR>B{j$##PmDUfe~?
z6v<Se0J;Fc_DjdX+mRoj&i2~~>IE|1OAt2_xaC7?%!<xb+<MoTAS%q^-0N2HuNPHi
zAzh}z=v|E;iSp)K17Hcjhl`Y+*qoBs{jP@?u=15I^<{5=JL|DwBfKt$OWUvsUuoEB
z!Z)*xt1N?{Oto+z1h2=Jo=1_p2R@{c-{FfocI(qPX~E1mK`a(qd+JpI=>h_)VXV{B
zc3SQEUB{mq{R&O6+#l~PN>w^PJnk73j#s7wtw(f3hZJ|>VX=e9JEa%oM0+iX3%)2=
z)HpU~jSo6?mo5F3ZP34Zx3Z=hFPn<fVP_yG@c$o_l7hZnQc<y+o1Og*C}s81wP}gY
zXg8;7<u8{YO`RS;YNyUU;u1i^n1KCON2fV5VrEQ4LQDDZz4TVc6`5|-BATeHz>+p$
zQT%Mb{KhO;W5I;MSCtatIg>*u)6#0yCqOUL1eM0N=!?T}giqudM@z*qzWDjQ61?Z5
z@4hS)b<xfb7bVR?d9-?u+uJ+No`sgX<a?PBkDM{oLqlv#o_>Nl>-a9u!T~*WT{|ZG
z`r$>PH_+JDRw0p?=<lDp-3{W-93pw^4b+C{#OvTIolbrCXMmdM!}w!dz-6VwF#zp<
zotD*-`Qb{Hh3N)>2&FycD@$Ij)Z{Rfi{ap13vb4MuvKIiLf0V~S=N^~ykzh}k~kaK
zTa20A$hHu8r-DlUr(r^!dgcXF79gI)H(bz)%9BNm$G6idCnXE62mN$bpn%{DdFZ;@
zG7&!1qedN0jJ;F!cK0aJp{!ulp;wJsd^%QMuBo!}MY5pRZ2~ItHI1~|b4b(nxg8Zh
zB%7n0EinEM@%w#bjdLiCBU*@)P+{~J=E#RiZw9ge%yRgS933kKwm>$nRX3N5K!ayC
z{!Pz);djMRoQ;Z#%95>C;F|#<ti~zgG}zepEz}QB%K!W8|2~2LUq1m?-4rZC)JPN<
zA@z06!Yq)QaH2R=Dp%Q&;?#Wj-T|D44M^A@Mj>!YuvrH0vNW5f%*w;G%`#b!P8GP$
zHS6?(j;Jq3AXd%cJx(b6B)}cdfcThZ>8HD<I#ZXR`3v1atVD;-OBy<z;P296zUUkt
zl1p}F19nzcL$^qeVD3+hR}ffkWZ)6v1<YoxuFWW`%7U>a=%`xg>|fl?g7g5R)g9Cn
zX4GFZaKu{~R}Jlc>GJfd?olutWgQDf$&GaR3w~7RhwSkLyF|yqliF{GU_>M(3{>d>
z=IS1x<9$UCJT?|2ui|(la7z`JfZ}@uWCFn7;-)PLOhlqU$IlWunF!}uq>OfdSm^!a
zame|lWGNWdav<ry3y2=Yag+B}B|0e<Hv)GuqcVpTw#}eoDEA-`e+fzK_<pb}1Z6XN
zKTadpE6*336$OhL+=4$p{W7Zxz6D$mAKEqx;i-r*0eM<b(Z*uqTF=*v(cQoQP;!Ih
z1n>=i-Pv@x2IbIycf;iHpvX_4W)}O>G=7<feqnH;yL^VCsTBlrSu+3{sl3y9wIg+K
z<dfvUx`e3@RpXxJuU#6b?nrUX+3<9HHtSgbv`cOxdm}JGn57qhz#_3uIV04n;ktPk
zXs2zFgFHl&PC3D<SumO4o}^uPV%Yeg#!mIYX8NF9S)=u-!;HA-s7$*M(6VXJ%Pe&N
zn}|vlgo`02mS9kb9eRc?z_FzIR}k%)Xtwgg3Ys*;UOKIGq4!$Sy;pV3!d(js%G)kb
zhVX4F0B=aHO(?mOxONX>*OSg~X3!=_qF*|->?;knAch)hTdaMHKk%U*MX;&O)A3Z%
z>7X@^jG-VeuRo0M=CUdK@6Fg$0Qby|ZJ{H+E134bNwV&CrwMI7I$R#mGqMn0SzXt-
zg`R;c50Mi<04?_9-+wPw79T^+5a1`>D~fB3V)W7CQS=L#ny#K#KQ>Q5$<x)q?`hZ{
z2g`)U$dZ_FUGxmZ10N$}6x~m1rx(1?nQE-N?t5}v2dySR?Cn}8%Kw)ktlGKo7GIk-
zi0+=xlDX5`0QDRJM8X8Z-y;}eJgw6KKKv2lCFpDi^^qCw-Q0DEkarO1@ZG7)qh?U=
zFrjD?;(O*D_HtnDIL^1rJvY>t<Oiun7C4C`)86896f3DH)3eCF`v45zu`@UI?4tAL
z-Ra#vhLhC2mx)&g;_Q@-pgp_Jb4H_4WlvKF1t(}3DSb6AvVdbVB=6mT!c9qP-R{5c
ze`$ZX1(ZS>HmpN-6Nk4JGq;8TtpN)pE?wgB?8{XOD+GEE{cv%;&s4!9%YQ+RjGQ?s
zC*jnEIp3Kq?1&bd9%!GWP~lp2+Z5QcKIBX0mt}s`u?L_SOMK9)G_oUx1>MAuOLxx*
zy;qPHTG;4`4i&`-wkHI~zg~s&{9biC7Z}3dubKz&77+g=1xaacWo15!=x`D`1Yg)U
z`5_9!@;kz)7%D?Z)!khJRoF3~zcdbf5-x%uL(zO?(hhd}L~Z?f+#=*&h|j{9@nXWk
ziaQTpnV46FTJ*PC7NN7J*NgX(qOM=AQ?mLM*wEc&8@qJS#9V6>GjkW*mstlVuD&bZ
zLE@z80@#TRop)$u=~2Zv_N%<meys7Yo2rZ=h=C~!=)jCL4_#NN4D6o2NlIqVepVCx
zLIA!jvlEOHLG-ez2%8oUV=(5QtMp;Nd5M1@^?ZDbe5HB1;)-!dD2R7<()-}H;`j0S
z`zP;=uk=4S17)-w%c{*i*mk-CCfpt@EQFqT8>)rsV7U+fkrBP3cE%)??&7j>TVN%C
z##XjgkPsDKcXHg)ixHZ~&ygw3CMk}+aUy~`RqJ4u-yh&SSf+qr*>Ccueo7F7{RXg$
zFpQaijX*}wKYvt1EEvdE?G>-6B{qr}q)@jEK9M<^r9OW*EP}RFaMzHYm-HXJXh@CD
zn0{sTIOP_}0MWz`sP^|D+*%be_On2t2v)~B4jYC2gl_*B7Ip%`L7?Y9wQ&LJzv-%b
z57SZP%Gmnot2&VD&wy^aH-$^GA$9baeMLi=G~TM@*I$YTWL))D6+wJDIsY0YR^1F-
zdrnwN1YrPYp5iGiWA`Jk#_=P?%pfrF3``DXKnpj+EN+p!gdVtz8lW|tF||$Up+KGY
z=1d3qAQ=pN^2zv8oVVTj9muU{&Z%o*JwGb}Q1Q)7!~m#Q;Z)qH^x|UQ@e88OirgvE
z;<}%52RmJ8*P`LT&d*qjAQ0qn+R$}aITJ3%wAvM;maia=TAz>-(`bf`U6rS~tvvxp
zS~%*y%+=|q7)5<_%fs(*H#SHR=I!CKr`sFr`Co#Y@KK*ek)W#!XHjjH?Kh#3qhsH5
zxj+qegU$ujW&#n~tC^xXiAaG1P^ive6d3_meNpmkAbalkAC1WwLHwKmRI205ZuW^?
zZSEP^xKE6<)-~?rep8nxaA~e21bo0t6>*)@g)I#`R;gLh4L2#9bAkPGsWXqNKbSfG
zqOs=voFYQ$;nCK(?H|Qk&c9Z}bJA?0z2SDs?=Qv?aw6HU9)oJ+Dr2l43lqSU&6PS>
z%Wid``m-uNp`$hf1a5KP&_c6mzmtG)4QYE$k+e1ZRG>O*Zb$n?s7AKZm)%O1Wu|iF
zg1C7Yr0LGDC?B?KAZvq8(3<_XCXGD^@0M)qT1egUM?(j-?`Vg!#5EEw_?&#kJcS@!
zrN!8)M*bkfB~)yP1@`q1b8SY~^spg|C*?>#Ox7a<Y@W2Zas@wPw#T$&YlMe!=8D3h
z>)_^$?p|?m)RRd+>Z7g&LW!6L1ZcRNwko(7Z}-BRA02i;Y}}XM`?PI<_b}o?Lj3#y
z!!jeJk~(-4^5pZ35HaY9^>T{-QfnpBx78rLBOD9m-lvBs(&#V<>RNDJSF##z0A&Y(
zvMT)zj~w~Fs(SDsNyHo$z=R`xTe}-mU`F39nm(9l=n}Ed#UvPA*2cmJWSi!|=!K2`
zqH;AR>pi|rV<;!ha@67sVD8SL>^v(Lv!LN0!;jHuLEWg2;k(6_mCYByo#OSgUVhEb
z()Q5k7HI4q#MI^fAibc4BT@TmQ6NI|X&TwOh;$r#a6-%72*MG<#d_B&2@MSl!^QZy
zXv)3|@?!4AN=n%E6vt74a=6N3WNqWLq(HDq$anG#U7e-Qx7>stxynb@3mQPVbQbtt
zo_wFKX)GxNLNJWt>kCHd1>#Xxik4PXDsVwpDmL^k7W|_CTEIe}#TQa8Rb2+{R-W$E
z5<<z&k$Zi8J!E$xMq0po?IOs3{strq1%%+W-z)%?7JN5X<J>TK<okPk*2-Q&S23wx
z4lqB-A_f5Bvx5iN^)IOCOdV!lF>Ocy&fOc6J!5w@X-W-2CNTjk$qm$MU7Dn8*~=1G
zES5?Gev?LpW`niOTKKXx<W2Phzwy>(UkM6fB>G}%{<OMTC}8{(@L|lkDgnnGYBwMA
z#z+Q0W0e@i1$#24##mP?#gW?fEnYC@OCLVyE;G5@<hq=%2s}ahJfOf^TW8$z(SNkk
z3*6dkS)c^gq=gQ|BjJ>?MQZy%Ay5U_rxtre?qB5qoxN2UgGXAH<DffQvX!8qAsYL2
zEhJ9FfI@F9^=_{=%6*)2aG{b?;K-+0m}Y1~5``5Xg;4j4fCz=hqyz+GIbhmot54FZ
zlYdnzYKd=o+25H@9n|6!#qR_k%1@&KW4qJAyj4js`ubtvD4NVf<OHqlk+EzM16H`P
zr)*e5TBu^Fmm5?lydEXV=Z<kouFWWL3C_P22x2^E@(#kvmn`aOwBfzvpeA*DzSe-^
z(gg}7`!HeTR+7jxyS;C4aPR;K3+x45Uffyyj$&l+EX?6t9ZeB1Dz~z0M#E_6pr9as
zgQU+g%gvrQ^7<^ab^a>Mlvn~w5Rp6}ZTRQ>UqNRd!7{v{j&o+E?V+WJCI-A7Yx5gB
zy9FC%AW}j@=iWxa-ZmhaN8@NT3zuI{uMRyW2m*yh=f}p3HU>2f=WGgPDCUa|X7iTG
z&OCeT6`Q*l6HrBc`vWe3#zN7KsjMA1(ASiWw?9GOhlW&`h;_G;P$DEcWc{1KZH#v8
znGfn(Y*E##s!Nb}1U@_ZyZwTX{E)8?^^Ok!O@$+6oE{5l@962yHj4uI#X6KRG;VO%
z20CrnFD<m59r&lKpvLMw!H7HY2DU8viw{KM|EkX3_v53$>GdZmA_n!R>z2*}ld%Wg
z>$=1iIB^hRqXQd5p!vVcHJ-n5Ffv=V>kWZBF|f$An0+o$O=qNxX!B-Uq$$bwTC$+I
zFf-q#_ihLrCyuTT{60PL#CL6FvO^=*IKWaB$1$EB&D(ez9nLPUA#gd|oU{QAn-r1E
zys4u?reU`k#{?ku#2nD@#~MP=?Ch)s3z4N_)q$(kaPDFh{U&Nv620dcn8xc_m#ezG
z-h;YtRJ>cqwWpMyV@%3XS`-Fe!9Q!NEvNR91y5?tzI_rEB1r5_|KW88)z<w-Qr4>t
z_jA;gWT)Vj_jp2?Yt$zGd$5yK>&1R583fv6pM7aUJpM{;_u%B@r2(zAT}r_}+;=B%
zgzs1u?*{M?17#D!wRm0z8%9p<NHA>ji1lRg!F%6de)h$MvhNl7Ey0Psr>Jvcp_{yC
zgx~pfnafVvAn6)=>EjvXUFs&Cg}%=I6ZV-0pMivz24y0~o{1RL2%H7$ED%Nej?M=6
z7wNML3_l0`X-v}7JHrXz?;{X*>Z2D8#q#Ut(Vmm8>(mchmH03|q9-KdHPV>Nu*dt&
zo812Do^-}1q*Xk5kut!VRDeEnHSy`?PhaY3(<^>CuGQa+BMvdE(JLJ%=TGpy@H=a;
z$_74cB`h=Md%3eA_JD2YlNTK%)wtYxSH<n~x2QKC>4*1h^53@kW(W@p1C`~E$fGBc
z>J}5|0_eb_wL#=2#AuC*n)+#aC6oP!N?686bWJy&@+jM{f}GAmx)lqMOkNqK8e_b@
zq5@IdwsU|2Kl1n10vGtUQdsHi^F2TosHrW5473Bn5U}*a0UmKj{#Oeim_V|G_L=Mr
zk%3k^>XE;Gom2~Y-~?8Ho*28;paoo%lZZ(V=GXWOku#-m_H6MUokl8WqDFzspPO&V
zAY1EgpK~Tq>5>r1f=;JZ%XVSJXrO)A2I3Z?%<x`rUCEmnU4gZWg2Yb4ZJLml^6w4y
z=2~B9_ZS2Uz`i&t$L(qhAOZxyCI#tJGcX`h=>X~e)lnSqJKUq%6?^p%Cg#hDPm9{a
zzm9_UVT+}e`hXkAioKBSG429Pg>~w`j7hwAr#ozBy{61y%iie)B@7qr+(EHOIKAo2
zotP4rLk<m>55D753oam_Atp3uQ1`>q3xZ+idU9eIY?etUKYZ|z<!DjSR)pYy=Hi8P
z?(KR90)}D2t$z1WRLTi6+;?8``c_aH0RWe#8+(*6&eph{8R)uPrBUi&7}T8L4PVnm
z^A}(E)Q3>=r=dQRRqHA%*UkVZgizP@zFYh55Dog>LGFzo+CPI!=uqfDF<baY5sWT}
ztM2pn{iMP6=yOzr){kt9;JzYCvIV@Z$}<QF{6$lmjX+97^Q<5is(4D&hcXZM;xkG!
zH?9T3MO_Cod#DlywR$#^_pw*U<b&sF{69fbyEG1U_U{10#T%OE8a<)km0+g6nmuy@
z#wR$#kH^Cq+`x=HAjEYXuKcs>XIH`LD!BvttCRS5a|F-`^9$L|><4BW#c$Ihqd1gG
za)6}TO8^z_O;vFi*w34I+d`+zKE6enq}XWkcmtVy8uW{PxkR;Bw0-UG3n-dyu?c8n
zb1kqSEk;*|RR_WN0GGz^jA0bYRLWsko&Zi^0dK0>RT@iVbap6v&m*R%-@=D`tYNz?
zTPp#d*XH{41+!xJrJ9!X!yjL%WD~Bky3|Id#+BUmkAX+S?-)1sJ(3`>W2NtE=Zp*v
zUc9$!BTqAWkL@eK{~0j3suTVF>I@)$&M3}0GOMAPDP(h5t3M-)6!kYx-wM^ND6NNM
zi%=n>koRZDS2mlYZMPSHAF41x*Q4mMGxQcz<0;BDcl%q&^9>;_GOd^QzQN_`_Qu()
z>d$&QREijIohm=Poi3$5mw|ue)E#&E*AJwnP`syy^Wen7$<USc!yIDNP7Yf$9)BX*
z5p+Rj?oKL^42l{$b<#J;dnYLSb`XI;8(?qu4){pc;IB0a9)HXo{qXObn?J!;6_hVZ
zx9lcUb;5f|On}r<Y^wpsX~gnvIj-E?j${P&>kA}hI)=eTS0I5S(Gy|>{qe=W;~hSu
z37WHk;nKLK?&3Mh3*;%HrVfL`A1&wneRkYGu%OR6=Iqx;y%La#S$%&1lJSq?^z2JB
z=2ygW*R#43qxY_BYd3tP!!F`NyB40;vAvj-8<pUH#ky|J5K8J&cfl|Ma{1^TnH7IX
z^!=RGqbasShKz%lJ?nMvLHO=%)k-=DB-b{uwyrR0d8JzokG#A!zJ_#34|`p;14bK7
zXN8(z)iB0(zwqNkjp4nU5P|Mokjk!_XZW#J-vw#eSmnQMNa$|^Ediu-h_)}e9NS^q
z8-2PwY-*M%Zw;UfK}P$#*)t~WJr#k_$XOI=e6{?X6iD^uBWk3t8pj60z=@W){+#D0
zpo@ZZr*tB1PqQb=L&fpzes|y9$L0R#_M~w2DrA6uodwo7Lz=3WFznwzwx4)NXc-nW
z%%1Tj?;#-&G178(6=8OOkC#MBS~5rIdU2Fkoe5>!vvEDhcC=h!Tgr+miR)SropuB`
z3;vQouFN(g56`Lh3t8P{JV5Sqjx4{$bCn#SfKV<4N%P2o?TWMwasbBCUl2#TtNa?y
zZdG*~_Y2&*CC`{<zLl4@h~5ct<{J*QgN7jGnyNifaJ;I6=n)FM+&@W#5!yNnb!Dkg
z6MZJz7k>@APJyO4J0Jf_*z`2Z41d0ZUoHM@R5D8iXYXWw)lkQPQL^VPa#txR4+DUr
z#riH$hN8X<0a~8H^f;hv!*tQ~#G90`6F3&ePH^2sFTAeCs-#)N;5ZZjaxvVb8)o0Y
z9Js+-<Of`l=fYGAZOf>k2N?evd_3K<Jb<+-{J`yq1k^AO8C<WCbM@cB<A^59=x_BC
zYh=lutjlK%Zhcl)TLV{{YwYaImjOZ;<XUfg)RI%$9Qwc|ZCJtL$KPtNP0ACyJfp@x
zmg@1$+|^~ac^6m5rVraXhjVl4&ECr!{{hzoQNn>0`G0y4GY3X!667BH9X_6csd+oO
zRpWZvSXFuWx#QrA>63sP!x``>YliaTRKP2?qUyTIAL-_K0uqGAbRB2!27%m@@RJft
z`6*u4wDSJ>U7;=3j?Vr^e(kI&&MIF5cO9DGZ#(#cE9&1&m(}ZEgg}OQDWAjNvIygI
z!vnrVa?C?YZ>r^J=;FqJ-DnYhhEAD*Xe}EcG`_?e*Iog*vF^u713=%Q3T}Vlf>KAF
z6-skJD6(CE0D4bM%Obn*R9rg+{IyW3J&-Kvq6f6=oI+y=UQzpc(vEC&%Ejn&{U3lT
z?=pylMbZv*(M=qC4uYyPlnf`!UvLP(Xy57<)nsJ&c-T)_gw}1Ree3cB)+PPOY=(aH
z#?)o2`5aYnI0zq_UPtPN1;IJx(=;TcI@HXQ(EAMWf2e!Qs3^ZLZ1^5RKxsul(gA}K
zK}u3|kQAh0Xa|w*&Jh$rN~Bwb0f~W8x&{OVX%rYrN<@UAyLk`4|MPr(zdx+yTCNfA
zIroWu_P+MDuT74RBiXA<n_%ADWk;){{^9V4^+xY#fS0h!JOI4vO@Nd*`Y5o@?E5i6
zpgCq2AT8CwOv{Q6+g1BAM(?8@R8&^>M7V?)zhI0S&j_QZf<XpM@s;%cJ*3CGQ^M)S
z7Pba=>PzjmhYRh0usDG#L^_*sr)nqC$CrWuzHO|n8YXqps-VBk|3WPZjM)E9K*P79
zb1$^cy(wJmqvxMY{>nYwbJ_nEeY|_wJA*miIJoKN+)=dKE>N<P6fam^S~|)ontbIt
z!F2We<wd^=%E`p}eGcpf=Vry%P7$R)6Oh4u+h*wz-}}N*-#MVkn8!cPt+BrR=k-eN
z-oKb9DE&qwePR8ffF})oAJn((zNK;&)iM4?H?Ev~Q6%X9@}wn*`n4N~JXJ?}f6^G(
zl6`UiPXGwSJMOEKD;a(Kax!ybNtVRr(X-Sll+R3lAeks&0lfn`_{#?mZf{p^HEnbX
z1V!8`=c#=@-mnh>q$!vw2>_2=3^j73y%v>c$C>G!|5AJ$CN|paIIbHbdZUQr)^D7c
z*TY^pV)c*~Zn;VCzJ(!kf7D1gyPPWGPd~KZn_67~I{0M|eW7uE{XT;seWc)U=ZDEH
zrZ80nJ-y(>+Cwwwga!g@m##=~^N6y;IxkW%ndC5$qE8o#ic7D!?LNITPJVT1*r3Q?
z`al5N>r@(T^!l|&o&M9VDoVOj7vW<pxupw(ak-Me5sPeNV3w>9JW4uQ%~<mO_qxM#
z`zF`JimF9JPL6&z2qVL3#n<NEt`IcOaFw?7BIyVEWjaIcZ+ZQwtfn{pAWa~n{_EaH
zUjw9V2~H~e-jCwx>M%#**5xFwml7}LuiE_xZKPz9tvB7*9@`;-s@vmU72ErDQv!B-
zlXFcc9n_b|uW){sXN&!LL}32Mqnmsyo9AS2q>YAt$u30Xv4mJYqBb|)D)_LGYmGbH
z@|q=*D$ObDnrBBhzP}*wZTMBjfVa1IulMdnKsr>u!YFCy?_}`eio@Nu%m|H0m)%SW
zC*-=U8T)*azL$qShkf(HlQbp%W^+`Gcq|MDL$`AUEPb*I;}vMrWqnDQ(kJNc!vGlY
z#IfD2XEQncXSu5IeXSySfCm$~W!oEQNOK$}`Qc%Ih5I&PGtbqpPF+?eeV{NP`18)c
z+ih2mvbR2L@0G2+8_BV1wFY_Nivhrq{6_ZmR^v5E9_pLHdUi;e`&X|c5c*yL)D3ru
zil@F5VN7en2I^Zsxu;KyhKXS>Hy0efG#;+hS6g`Wi1$cd=BbTmTe2xP^2V_duMTri
z<ld@2Z{ZU0=Vem*4M8KE<qD{($T@fJD**vY<wVTey*B^$9ClF>VrpVR`795MZ~4!#
z{^OeO#NzAl;%~#MMRn)So!}+t{RODc2o@=`zM{%JFHk0i`ekIUxoI)JT(7#WT&7hJ
z1F}AJg^OAYbtvJ)zK=tvQA$|Esr3D>cbnZygG#2CDX;#+N1eE858wLsrNzW->YH1j
zma=mD`7@I@`?=<j8DZ`7!lR4f{6W-Zv{1cD`(;<QuW=XX`HS#ZPA|8&+U&%Cu_;<C
z=oi!M{rr37n~}K3?7X7D;jf_gQ{-o`$O*!l{DrH+Q?r%Hw5OwUVS-gTMb6RBcYilR
z@+`xnMen~4xJAGD_Q~CJH7e{HC^bU>7%2%R@IJ(mp8HpOOkROkacI9`|I~Eekd#f|
z)nA*H4X+G_u>GgeBrgKK>ziPlelE%f;X=oLk3JhY+yiCY@U8VONIt)aD1jYjxbF&k
z|7v%^mkGp_wmxOtOS?8j{=Dhgg})kWd?tZ*jW%VH%em>tFXs1}ppV6iSvKAz$!~N}
zpsL8F(F+jwdgKDqX$WlfAOBo&Q<*Vromh>NI~k{MRbT?9Hv93=>iZ@N6*{yFQ&bfa
zyc*f`^xnEAlo9!zj={fHIM%pTb|CPQ)vqi=Ae}Rxz`O8Jiq^#?>ALFMA8?%)G;E@Q
zcNiMIKasqcyt$%yQ%uNNjqwG&o)!(B5`Vi}+K|)w;J5cHLFc@OdLJ8rt&027g8i7{
zXQ-hsNB>^`)LZ>YP?8$CZIFll60tnR%zN~Mo+yWG_tAQ5w}sxtA6%=xMm9T4`dKgO
zb(UM#SNZghRRIxf1}JU?>NANI)$tv;RJ}qPYOh!7`JXFVydHsC`4zM~F<`>W*EYsd
zLGdDuqo8O?NWT6Mr7jxx==IcV_m`II3Lf_0DO9I9jWTozAEZv9lb`?_pIWAMNbiI1
zlgF|==a(cVUrV-V2yr3+L3uFe3aTN2rR?jW1gA)+0o^-pwFdZBDEx|x_B}G~bx5i9
z@=ki`6xUEW@WSrM)MvKrZpzBu88Fi~8@MCm>BS+u)-*^9g!uE!A_Oo$W$vX@4QCt+
zI6jy+8^L@neNIz1NuYg`{E!#h@lb!!)Klu<Uv3t&x$o6g=^8DCW6wpGF%WnCVNZJ&
zy}%^VX~Tv>ni`xn;UdtWx+USF)ZT=;X7i?FAP=O!un;S=QnU4idd}ALZ*>|sCFqhk
zs>oKV+FIjN|H`YU_LmJWqv|=uUUg<gw$%jQ1XTn)K66{5E!FhPo7wd}JDiACm2=mp
z;kSAvFK<D?<M+o*KapKX*0}CNIoos>3V(U(?q}}Mcpj=Ca0y^y(OEH`7tt)}RGmc@
zXxARwUg|tS_#7?~)R!%{-Xpp1tX+XDiWypKG_ADP|M1`yZ6P$*Gd7g=NhVQe_-VfC
z4frb-ls_q2<gYMXl=EKG#>U1WKR<u)aJY1}r&cZ|Hb0aVW%I>*&FL20Sca|jc@f2n
z1?B1wd-s38p#LG=at&vz`^m`RxQ=j;v^J$K<QWV#Pr`pUPs;Z0l)b|wAg^xk8ugFm
zWY1%Fe^96u&cbAA$M<I(u;lmL_Iyq84Nh;Qg-tktZXb4kS(;bBjQ#X;OfP>PXKm&{
z%X+#{R(>U33Gt!B+TCHKuv!lG%LsT%|MWI87Z(?8a@RjdPx+}&{Ogn6_M!O~jw`oY
zZQRx5$7mcVyl{IOEmDx@<RZ#;jWbTt_&@^@Ys$t%GkoqO$S8+dcO0<2be}in+L}&D
zq%=$kfO>sz`@0o{LvqD6ZuD-;EHnGmQ=mVUmmE8SaTqS8m{Uo{whC1W%}>Qg<Vn|N
zugSVN1#tnT>5s03r`5$3gn^cw!7e0`uP~#3Av<8DcFVhSO?J5LTK0S;OJLn&0fUk~
zSo6`^oK5Y|YY81&X&;F}3y&~^@8Ll9%7hKIqAFv-Al7@?d>$VEy<ld53&uXwGH(>$
znApO^#1uqZpXUIv$y}dbuOJK;%nu=(3M%n;yEnNV8)`)%RH9DtM7~-=na~J<g_B0|
zC`GVi)MeOSt<cE|MN1-aPZpXUeU|g+w0F1F{;}FN-E4C+OS!t##igpX?e${aox0op
zTmQ-+L8q(6A}vV*h(8uH5*@ydZ7c~VY+rlnKWs<exo4}Z-B06z(!fRplD;bt`Fj()
zP4T_S;5a=oJ%2)-Z#&9SaUfcx!!$RXkPx1+k0gGXK`jPiH3h?LqD27u#{+4u5U}I9
zHv`9rw5;1mvnSCVJ68o?H1gq5Cb5Rv+VNy-$Er87D*RMcBZc9lEz@(w+t1@TWUAEo
zlCF#<R6b{p`sRHhg#_$Yu40b{Sv>ig?2_1acat)yr505Ga)IAUSd<M3x}?q>hU-o;
zlloG7OQ^bF2BolGU+y@c_taR7ocE*2ooI_;GsGkoy=<i1>2LG$PtuWrd_@EAQbUZ9
zUnSKFZ-7dUX|$1`w+2`WrnskPSMln}Bw(_2S%nBta0fs?zRb*isBkKbt@6H+?fb;~
zXm5QKpHS$KU~If|#s)hWNGAT|w~R55niq5X{na0%$KPwwn|2BA{%mh+$|z3c=g%X_
zz=p4z4KnDqO-y1Z69opVGdv^YePoMOxDXcW0%Hra#s<e~$!+U7`C^x$5j?hi0GUHE
zEsN2E4ztWIl7{FyjrXqmHl)n^S=d~^-6mz{v`u4$Tc?C3VDE~G>{FtHF2bl`t}k`$
zvk3<|X=xV{)Au*=5qZBwPbiwxpekK8J8Y#rYLuvEe>tdB|N75WhdlY=eX9d!wZeo-
zLZ5X<ihb9y3}tAyuOgx%)`?P~6TwCe2x}~;QVNKn*3XP{oW+A@>0mDiX3ZgV6&^w}
zfN!8JPSz^*y^7{&EN}D8-nHGU2#fIS`&E{>_#shRz)6??&EeGWV<sJ*(Svh}{et*N
z5uho)``cF*uVg|e>inz5pTUk#{-J|%5k|V`iCUpe?hGO?F5-v;lI;t)>2aF=uJQ!o
z5R^&quOS>HMu)UQ(Nx5Za01c}8#jaup@Kdcl~jv7B#pR&GAAG{oTF8RoD43<wI7xh
z`txI)NnP#sHcq;a+foM;*J9NQ54Gg?|Cs9HHhcQjg766lYaRPs!U63F-$)imd#x(p
zP_b8~g-_sA*@M})NYW>tjH+0L3mKl#sGG2$vY$6HY;^4`kLRSPDe`%_?QI1(vPklt
zrT@3j{mCqEv)4`?6z?Fc`~K>Qzs_Va?S=L7iWMWawi{PXym>mlxW^axOCkF1&lfsH
zDLRX<1^2y8TDQLsWviN*teTsf<LkPr^PQy&`npxVtvL)e;<zbK$k|dG>my9x2t7i!
zk4d34N>j6mFLV~~L)9;Y+s~A$I(mXo8vgbBGp@!&0_W^5<+E>Ws8W?<Av^4iFW$FE
z?%Bz5P8C!>EmH?mpizKI(rGN`t(6N5<n+GH39RZvm@ikA+@ND&m9CbY*Bu5`p{u**
z@OWDZ-Pj0%kT;rMe3+h0i*Kmph6SJgheZFooN?8cOg`w#_GLIktD0p_uta^M?T}g#
zpqzEgGc(1y7(esN^d7S_fu-W|>1?7^Iaa^;$_D$|2l&3<U`4n7d}|mUH<74wsuVuc
z`pP9g(SI&cSCjAN^yhiM!V9@8v9n^H&D31-fp-AXGf24yvJ}BKCxAEJtF3k{V45SL
zzZ#IF!<Ap4tLBBKc^JDU_jKTfP|($KdRkg8I<kIiRzA#NL`7jk3iD&a*xT2-KOJp8
z%u0nZlqzOwT<<%FuDF+Bru4@SqgR?LJ1UkI$_|p!+G-P}i3emcZZg@oZryUeO!0lQ
zAq3hp+}mh)`}VD|T4Bb|ft#3vtERpDymp7T5H+%yA~rfoN=gY(7W^$z(_Xk_|LG2r
z$16Ttc(RvuW_BI!ZgN128T_-hpo0P}fWzkfE?UaG`T%kA3k$p0(tn#>--LcK-~&}1
z<buQ(&&uo^G)<CsSg*)u)AzbN-RJXqqqBGiqgVd?QSkyl<N<ktK2kqjHUuyS{FwZg
z!Ug9Pr?E4rYSkjDn{cf>7eh5U63ahV<bUnsoN89uQVmg`j9Srbn88ZiFplQ=N*K4w
zH<$e;U)f2)kG_6>`w(W31f%z!gSYUxaX_9t8%FOSs8Ek>^()wOG1NQ{qel$*#3hIF
z!El9j)Q;si<EG-%E;nn=pdMs4a8vSIQOqutCE7=5xwTN;__OS&%FF0sMZphiR#Gdx
zIIt^#;Y7^(oL~%Y&5jW;<(ARL@&3VpYm{^ob1SZWwjBD!<eW`4O$U-e_U?~X{U+~=
z(Xx=Cl@oRP+Mj&lETmB@gkuJy?CeBk?)P2bx>#9RS(dljk1RMRzI`eFuyrqj*xH%Q
zsHd+_LzlBcGM%**wC;PvAa)`rn}ytP0rc8R2!Fmh2P`hz6H|Un-Hn6}H{5cTUF>Xy
zmp2UqR?(CsO2S)-HBM+l*5$rb`iuc|<K;S4x*PkAVZrG8qzsaWK(n_jLC2l}jb(55
z6VG(dEahxg!ERr8nVfdLQh4~A*&;jB{7B9+iw7(aHqE>+Ux>$Z;v~mpn$1=Iyj?a`
zMVh5AF~^%j^5nR*h=$C$BeCf8pGYljwXAai<p=$yIViC!6X*Api3Nuw&BD<lS_K@3
z5>SdAwn<lwhGdNqz3joEh2Xy<v7E-UtTU1^y^tN&=yyVa8RTklqHC70cK?YCd6!tn
zqp=&s-d>E3nSZNZ)yLoX>2sY!IR<a|2p<9_<u(-eLU@<!j*UjEQi+I-I*?`ZX3e*P
zIF*76cl(6^7hfi8pB1}}xx4eg;n)GA$F%fqZ3m~@ULKQJOo4-}p0z|U@JP^wPH(wR
zfe7d%Iqm(255F&B24$f}SVQn)VZp;3|HZ;&o1==Y>a4U<DvX}U(oL*U@<c#hPO+=m
z+~DR9Fb;l(QFTPGu3A<~yw6?q1=+sCn?bpRx>q`jAZ2|8HQBS_Bdg{RguJl&^32!H
zx?d)KWpH>{fjQ=(3z;i9S~p3j$0pduf1~4u?l9}M?^F5iEn=$HreXHj;@|6gNpN@J
zrGHxs#--wu5^bh$Rd}x?r?|6I*f!Ai_$&?L9p~Y4X#XnF4>Ka7voJ?eOT0*aK(fQ2
zRzC{`@9?&Fsf4~-t9U)34$0?r9(|r+ffsIJotfhgxkz4uZ$yjOL`P%_!WW$Ap_nX5
z?3%;KQ=R(U^>S6XtP7GD2V8kMxO-IuwV@h~R$u02P*r8Q{`*@#@FeAw@?@Q^yVa4Q
z!ySGg;Oj*?pi2G~RAb2J<x)fBgZ#=ADhUAsn7=LN1#w*)jcOz{nhsfabLF=iJH52v
z+gXu2#9^&T-`SXCO?$bP-Bqbx<wE}wQ@ae+8Bx;<Rw&Q5&4!#Nehi-673L`T=4$M0
zV8UDw%v*Ild@eFc2W3eyC!~5kp;~^k`%cdjQ{IFQi-+*|IM|kC8~eHO<eq~5skBVe
z+9Vxi5?_&a{MP1}sskw81oU@PGg(eyrg&zUV|H$qZyMpiVu7uKa6<rN1MOiVWP$A#
z<mod-bx#3S*0<Df1RLay?r(D@St4(d5xS-UJXS-#?YoYGXl%4dK$Hpt`t-}KE*%}<
z`LPPjxQ(2aG~EyOfsOEMW$lETg%+@(8(~|}+SR3)(?TTihw+9ib%2%ZwLZ70@EDml
zM4Qu19J7<lpOI4*$7c`ittBf6t8KIE1BgFlsBUxk=Zi~q$K}x?A(t7LV{*UV>9VH_
z@Tj3eX)t>4JT$b<<iT#Y+>V$~ff_CQhIVmTLKgN|mrj*aJzv-WmjupNVdO%eBs~y`
z{V7OLyA`iu-DfT8$LyFHSBhgEpvg6>Ae8Z8Vo6;ceH$7fPpYYQS?F6^YGmOSsO@?S
z8dpEXO}z@TQrN7v6u!OM$EfFi6CGeVEH=p_dxE%h&mkyM$6gu@gMz6Jjc-vCcOnS~
z+3d!0BW!yf<x3`H0kfw4QmA*>{4d$%-O%(h>>uukbwTEBhqXR0W33TlhdExSq94Y1
zZUxWgov2@RVcZ*(@fbz}Zy9sXw;yRiAN7sXm3=;NWL1xAq%yuK?3m36W?4lz!0Tmm
zQ_`ZdhBmLFpJk-jM}B|js@gysJB#Swc>65DinP0F_<r54l}uf*$`lKk;!n3sDqM*A
zzD=NK_b<z6)|>G8)>~1xDXRuHAB@DO%(n8a^+bF(Z%cKsWIJjKHH)1Qf>g2FwL%Sa
zX9!O9WN0I2R~16n093XWlDZ~=Yixx0OCC3zQw(WP;JR5h{YBFpBbdAo77D=q)D$fe
zWkta`_Zg~^(+(4aMa%WE6(=6B?6j?1o>_-YKZ5kZi!z_RXvg;1U%`-s;asGtS-|$T
znsfE_{bHy>U2ip~>-qkRF5rX{(YBpD?-$j=R$cJQYnh(l^^~5h$%lPjWW@M_fM{pH
zr5))b9<l24GhTHOfjM|!^Gv~gDAJbdKH}YDj$8g!W;LE6jdd1>!UZtBFPzku$;V+h
z_Nt&->MFU!)l3XmfS*)A9i8J+y}oVq^S9D}U(QgUv|2uMTY||r%ms}&EboFcsvEFo
zb>T+6vcw{^S^7r`M;QvM3Hfj*U2(h9bBecH+%nlL%Pi_&*p*j)$|x)=o8n=N{KXH?
zYVqF=mpjcNNiD5(mT(}ix@g1TUOhAwW=6YLNg}NPjc3=0%X(LN-;Wge`@CCGWz-hS
z^*rl%oqp-=3ROL?kC0==UlT|@jC=1HJbsT6O4Kn6lFO(4zg&P;>0y}fv#ozWR&4}i
z7$ZS2rp!q0dI`A#rO}QvRDOKvxoNrTU>&gPVRhQJA>=d+V-+y=&6d>X6DMnS6{|{8
zP@WA6A5Tvrd6%D`Z{g;)ZU+C!;9dDr6F2{N92}Tg08M6mdwuo8HY)6OzDC8>Gk+&X
z<9dKh?8Qgd6Uvq2I2(Hjkul$%$SA?%g%H-0Kx6Royn6lV$zj*HOVh;Pg>tALYa(e3
znqOvgvPd|NB*&}d-9QV=^_l#95`LN8?JL8^Mwjq>Nt8}eu855?gK<%%bKwA0Yd8&&
zB@*FJ8bKHy4iWWU8j)zf83|MbmGa8x{XE<;ch|pOR990|^S%@x8#~)w`0?X+w&urt
zL*pz1-&pR{W5n;zuKV7A8bi7F9Kj4>X(hO#zEv^CFGq_gTw{#f$M!T;Z2?m(zc(Fb
zljA4L=>wX~N)+9k>wiF(WM~oORf#_=cGz2+8JCV7plQ|uK)_y*;8!3RIWG$ejP6eP
z)G_cp{0`oiK)efra`Cnb_3H_np#F0c)pZvQpqN-idcf)ws37E2WuEcK39meQ?<$|6
zB@z}6i76>A9At`GP-WC*q4YDZT*=ep{l1VkuVJ&{zy3C_zdiXDJG(>zb)^XBY#(j*
zRZAqB-;w;~N_y6PD)Vr)VN=sI(5<S2QYfl@woxf>Vq>oBnf&R|ro-`=^9asr0z|*D
zko>l6**7#11e!9A?9uubf$QHgLGGN%6Pun}k_G7nWT)IQZ(Eybtp}F~umW}5lz-<O
zqUboJD}umwpoJbA8cGHL2h5KMqObrk6`wwUy1<Ommq+7KDs-ssKu}CMS)l+fqdCw{
zzH8s_uRR6U!HO+_f}!w2sHGwpMeU10k#_dkqj`l>yY)?l+2}i&Ang9oGnL@w1m;%`
z{E9b(A3{p9;nE&SIpotVF}iqE5`<2KKrsIQ(vK6nb&w%C!Zzr!S{7{D9;zg_S@=*n
zDep1c|NcQeGc&Wo`FC_^?qBS%+r|z-Hh2=?XE?##+0k_sJ(UnA0jcmNck%%w3qeSt
z`Q4w<6s{1OqLKg_5mXUC^uYx)g}7Or8ByKX?D>gm`NjWqpb<TM82K=#uvC(_%%nBS
z4naQ2?tg6(8DMMFXa##Lag#kn8r{=Bk}L`>hH=;2jaTs<-JoWZD3Q7ngD1C46ZuQh
zw<+JPvqFTtVXonXgO?<e<5!sD+WQ#e9jvAjVDutwvj6-hV~Dx(2t-EFzk^bsg6047
zLUS*+<FUE9{KpR;p728;p1Oi6Ws0}D`}P#cx9tEK+F>)yrooN=1@1157ApVKXu;<d
zLQlrbiaHSoaMe@0!#WaM0>ec@*>{#rL3D2GBlQbGE)JcA1=)3eqG?Ag_q3pGNdEaU
z*{CvNMPF^=!n+(ooiWduKY8X+8VOc0qqC4{SRdp`4}1F$TgMRVI<n?#z&Ra;vNvU2
z+>*s8Y`XqQ#TuyDG5B?`c)TX~E7@BBFP*fR6M79%L;ZP93~}wJB-56VICS~)34uV3
z8C-#E!i7np)i2%~OWg1fT{SZjOCRhu9kI?CygwYo&_D9+&!c7fJ$JtGL&Goo+FQ*l
z9<~fG3-I%slY&UB6q>vgPdg(|V|fdWfisG?c|Kn@Wj#0TqQ;k7iRYW$+qTUN5|!@e
zqN?VioPONywj$cLb<WidiO=K#9*^YvPwQ^60P_*b3=>!58xl&>h<(0%WORPo2T9bP
z!8eh*!j|Z!$)WMmuDbt@hqi7c;`alLo`1$oDoYB8KQKsrFBK`5M+u6oT)RcurDA6r
zNMttIp%dzNV6L$dp5*<s%_7jw<@oj(k!sQ!F)c51vb9@FK}FJ#G9=Z^;=zj^52s<<
zsRvyU^K9PoT8%$ann96b1}!1OaN)9HWRppMhf(<nn>?OwS`)i{@sp9Gdb?XM$=VHQ
z0{TTZElGTTHe2jxvvnOfpj7WfJ?eG%hhQ5K1pg<oOU%$e=pBK@BV$%-*g#KDtaeR)
zAur*>@>t~s=qvQ=V<NJs60aZ>sW|dR<$A(y9H(5}`&(*-w{Xlo6cOK9!5S;|ZJKxQ
z1}Ey6a63MZ`2G~Yti*~<o(eAXmzhmb0B#m?Ac+bu=Dkn19}E9ieb;&4AxQNAghU}P
zIJl8S0r$Ca$MT+&u(sZ~t_ni+(|mv%kWO<za!P*$F@rB~9Lc%-2UB)_tD(#H{!vYr
z*oyhfphx#7qjc;cG!;($QP7f<CgJJJ;e6M^IV?<2r0wX-X2U7TJ#baSnuP#*3mtKx
z0qQi;3)ZLH%S;*m;ZaOEUf>?0;yHgul{TL^brmTYWWFW4+S%Pp2eC*xo64a#t%Ubr
zGNFa<P<Gf9R#Z629z*kAxcg(+cGd-`Emhf|)YMNIwlEwXuDQL{8z=U%MBu-&TLe*y
z;QhGHNRYr0vz$?q#RUPZ4a6Sl`;BQL`61a)qV8-2)5$cS$K9FRB&hQZU1&x?S&-nm
zY?+Ia(l==f#t|0X^D=jyIu~v7nCa^e4b2Q-5!Rbt$h%<Nmt;VJEVAH2OBx%JcyZ&}
zAo7%Q`fzQIR;`fZyj%o8F=0lm`0i1+cZp-*{PUrE&cf(ez;30e{aN<3SVs#)bymhZ
z1;4W{Jp%{&>_>q#cGef?^dYS+_4Lx4xoZF-%1F||8lSJ0UiMQDD|8NxDPL#(+-_`o
z1Mx>`#QR6ETH(~QmDQ0?vura1hH`xY0a*sI5w;h<UBg90oc9E=<OQ@DGT{bgXk_dx
zJNY{<P@o}eF#T!lODsFd#;L(nwn4#wT;E}y__nUvF}VcJAFp}wxs$6Y@Hr&Lul1Om
z)z+0nG~^{p%U4E&tPEVN^je^w!=lQ6<7G=6yoZgG>&r-!#CXZKuoAI)6Yh+H00E$S
z$KE{OmU>aPPyMn#gwdmbeBQ6i&HaXq@+wap@CYPPM6FN;RzcyaDa)BN<2U(d%}BZB
zTzna@tva?GkpObUk-mudcn^S36VW2dQIydl0_Qt)b#*s=4|kSNVKPtabz`l2ZuJ9~
zr<4>&gZ`#SuhE%o_cel{9yp-zz}hClL6+kJ(v|6a4$a;TYcf_4u$2NY!mZuS1~x*A
zE=csJ+X*^l^#0pUjwH5^Ki2y1P>kJ{0;oxRa)Ox4MVK%DvR~Dqp*U0;?&Lt$mKuPR
z;ikT)3_W8`^=3skaB#nW4*iAw6M$<8VDOZiD>UyCW~J(5uZY`g3J_Hh2&b>K^gvhq
zZtMVOb1%dVuO-Qo6afVc{1`p^I&XsAIPhrl5|NP1Z0_Y|R!9d%(SDsVvV?<Mkv=aJ
zG9PQ}rL)EIICZ3`!{a}a;2L#`Xsc#Xk82!6&{JpGv5wjQ?A;G(!$J4A*Uv9N3z)<)
zChvZ|7?Ho{p$Dx&&+M^NT`De6A_!?_h<?CllfL|f#Uby_L1lh-A52A-Q!C=T>j>*E
zpK|$TScL7w6KXKv7Ai-Ok4BAQpKnkDp6Ub02DV4p><CQVVNO5tRN|Qps+TTjGCxsT
z!LZKj5ucaf6`2tWbXMIYO*0E-Fo=kK^56cbxL)#uHmrXe=kz`f3gm}B3m@mAShI<h
z9%Mzeks6V`tJYZ@t9)E00kBVf1TC#a?ETrdt-?1knx<MqaQGPSr~mdqTPhOOeIFc#
zFE9V_2@6j##xzyF?5#?LxbrJH(}d&a{eY=p#y4p3d3ipcD`NtAL9Z4guD&m^V^?U|
zc4u!w)!`kz079(6|DTOpj4e%CFgb6T-Qx@0APfy<<yQ7qmgqB}&!f7#W{6V)m>s*;
zPlTJ8K_{HLhZEH_KX$vYEwvTOv)PFQ7*G8qzY3xva)!|efgl%cJNhTp+l6!Z>dkk%
zZw@z51Fm%W?hzq~p6m-wg?Hn#NdC?4@?V<`WA{7DN=sv_9fo@;T<OtMUwUh98cFe>
z)c??hnv=b|?{KVv|NJ^CijLg%1~f<@cn5$Wvbe=83F1%EQ5}(j&+}uF?XaxWXY9?0
zfAcqnyl+;Za`Vt)AUPzKBtaQE7m4`rM&bu36v+as!&GIqS*l|nfZYzg&wu%xVhFWR
zQ_4q?zfOId&UWt`#vxOZ=|gTx%}2b9!ld_DD1+Q(IZxvv*lDH}0e*`0QdAeX$e=y`
zzf}NF2AC<$ONtIsbnwjVscc`@m9AEn@;u<~7@I7Lfm{a?GiZk6;C_&(qXkJla4@rF
zXgb%AM|(a<(&4Be6n|c37kPr<r3@v)nprj-k@L!=*zF{>LQ<f;#K!yIJ4FNsziw@x
z0H*O)_WWFo9S~J!LxJ`Vkbpa{Nol2d{IihW-{5}_C?VuYapFtS2SFFQr(pL=-jPNo
z!oH8gLfd?~a`L1Ve)?tmXdt7%hn^b(39Lmc3Q7iTqF8rgMEh(AsD;QkrApGFAQ`!c
zD_7n~tVMP)*kdoM6;41$tj!D1WvHg8c_{?lxYjHATNNq^rRmSsrzg7x(E@F|!tc5U
z_N=(TJDO*U5MfLDG}@2Xl(9w_eU<Xr^Jlq<(h^HRdo;uroAOts4(SAX1tqu7t~<2s
zy*|8&=^aEW7~CT;;g&xxUjBI(y3G98d@;zHLn`@w|FyTk?uq|r_q-UPha|2_f6ieB
zO|jemx)ExH*|;ukFLDUgt>jRygCsV6on3c^yI*yr-wu_&S3z*#z3E3!RtY>CI!hOb
zj>OhmQ}gazq7dZBgkP$;9;^P}i6jE-0LiBh1JyP?Ajr80l7J@?ce20*zVz7uy4~TH
zX&^K_j*Oj^wTfniy}Q)RqgI%NbP%@S#CnN4LObHu#QTfi>}X!i;zb}T(no;4=-_!I
zanV=ygpI+EwK*1I1g?lC11qIW)Q2n0jgTg7d5LJ-(i;`tVC1;h#rS8IvuH%3&L*_=
z136r>JbUIGe550NXGrp|DDi-#83swiR9do&Lr6wgo0*Zs1cFlI|K_=e_#;=j9q{$N
zMbJVZL;{)3IEWo2k`qA!kIdB|wc&E}CIH-##ZDhyzkYoMda72qYe`{qY{Dn5>u~&K
zOx8KPthHTHt*|{>WH>^Fzf?78R%)Q-I0_2<L`zgcRCFM*eBlTL+ow#9ui5iKfTf^C
zHcg6-*n(~|H_Jj(?>=p!k|UND{3buJ&-qYpR@*Fk^!nRj1z?#k+Gt;CV{iIY4h;=8
z+C}fvINo?H3|+A5>k5PttYj3-!?NC=Rq}zSXNR8aoD4o(7Jx00PbatzWJCBcr;9yI
z@p|*^U2Um%DhWO%k+US|2r1iWlu=^s6w4hkrzvDsWR9vs5Y5tCB(&zhU<{34PbW(C
z`au4qsZ`U|NMiNy(}{=(3H{8oB^h+FED+M_OR)ukK3Ch5PZtq?tQiVla#&T|L2FqF
z+mfQXJ+a%r*p&iF*fbi$ME)Kd47!|OQYk7Ck*Of`0U(e9T&aGaj0DqFco$8ygEL|P
zKy}W$Xg>w2YrS<UPTzc=!LOA?m&7uZaPZ1Yo9n;3)4q}ww;XesqT3!j54Ptl;t%)8
zp?W>Z2x-;DKO86xkQ|Ly;WhZn_c!|XYT|tXP~-PP-AoDA5MMjB+2E&Ec&8=Bth1{N
zAgIC`GwGO0JiehJ5Fpu{%~jA2#Tt_%8*UN+Z0gTv!Aa?f-!_B6Kg4|b9?z4dUu;lG
zisO%0`E%hWUNy3Pb{RGZB!M)gt^T)rj-`M*omW#+6Qr%J-2-xgNv2cVqmREn`LqMs
zUyLN{qxyd-u>SuWEB4N3ya4Wf#J48_q$8s7Dn~v+!~Y8rCCon=Eg8apg2tJu<aran
z+%j9EMO^%|cZcdea-BW$N89gbL$D8t2j}kvKwYq<(#q0~kq`;=IBf9!et-aD`~V=+
zv=z8Sx6htlinoHsU=buAN!BFL<&}h-vMitfv%^8|#0ttL$DMCx$8Ia8*?|=mh!%NZ
zInGIowuj0buyLK*HsX%s`6wf2IdpL)VYej9NAc`%RJcW!uAZ+z@Kpi_#t`(nx*{Vk
z{a0lYcJCQ901dvgyAeUAh?B~7q`L>S$DsUXiTiahDYE7Zz;ss<9F`i-!~_|BJaiyI
zGeHs*xJMvKsmFi@LEZUg&k>keVO3|mT45yg1W9DFI#`AR$=;DwWj(PRcMgDRO!&6L
zY##hKRUehu0x2lbD5XE3M!Z#mv6qy-S%O5A6`~BC{)d2n60~@)RtSU6wZ?vsR&+Q<
z)-+y3&%+X7QY6h?4*!Dw&nQ&*yqM5CZK(waYkJ3ar8-h58bX8WZ|Y2GFMH8S9?rNB
zK6pFC0!?FFH!E=*d`&-)3U9qLr$~IE<}B#UxB`1|ZP@D-$q3!O#q>g!=2`q|UuQxA
z9XcMeZ+)bV>Xu1LN<s|`Bqb+%d$7;P;lk#17tD)GN-~XXkI!0cTaYQPfd);@l5MHX
z`~m_6OmC;WCaj>HmyN<^WYd0AyB1w31tld*f()ZnJDF0@<vslAy=2dVNR^!1S<uJz
zpwqIK_-b6R@|Q`rG712FbeEKr1d7f|0nt$^TzOJC-?`gkyT?9AkEKehuvO6Iy!&$e
z1+b(yF*~8lP=`7~`s`XJnYKM+1!Zr@md#wquQY$3=uf-z0MFc9TC5%PKfim=jAl9v
zq-R+p5wyyiUyy}uhvW-cT<@nyS?KP;eEA$V+0R4ZgEOzH3jmx@zt5wlv;O5DwtKwH
zU~6n6FfPrum6=lF<kI2EgS7q5&dw8BZd%Lwtz|gLu-Et@-hv}J<R<zRw3Ha{Y;0^i
zdNz5lE<J9)0Pti5NX9zR4Nt5q*vFsdT;6D+03@9Fik|tpQkh@7$A+Q<bnH?%6R|6e
zOwDP=o%|qlxCPx55^@3wt0Yiy-d5{j<|8n*E9|(?NOMK(<A__PA+Yf8)W-tU!6Sgk
z+=GEC{TA=ZKbv>a+mo=j`$}WefKh@RyHdS;z5^9qXm^*11p3$izV)v0hA>@@v)SWo
zfR6<{jD$<KEQV;CUBD=5AF-A5?Y@fRvR<K&FKaD$b`wJeA`Aa#G6wMYR0<grj)F{g
zwZ5Se>9cbXZW918L#gEN!d4G|AL5}@%JG@U$!TA_CT8I7tmte=Q)O;F$Rb*#HAX}c
zxp4?Cs!|n!00_MU7p=i`)|Se!16@b&+ijVN25n#gp_&e_i50(%Q@S1L)s+fq&G4-K
zL-%^`<@B!=w*&;n1+T<h2MTo2`!|mp$rW#RrnZSl#?EF^%G`XW?=L+<8!=U1dYF<t
znKLVu^7{KdNC6s=D4ANp<+NWdCE`=nOe`##DA2E9zY^A0C7{h#>rn$&<h;ZlE4R~(
zR4*lFFb>BdON%Z8xvKmfkVw~ryh+jRoB@vd$#9UfJ6DT}`QOpIuF2;mw)D9+w~{dM
ziiWL9JvQ$!JZ@B4`6mU2s^vMdEp^x<@f!44PtTJK9a34<dv?%@fq>ZuGaxe_2Y4ec
z`C2*V-=}St@P{j!yDJm*fT$<jo6_zzAq4Y0Sa0WQ+FNU-8~4=R{mv5?t!HSsdp>kJ
z1BfMD)@yncvwCRJ(kuV{S8xfju8n0N1QZc0?}DO>f^$ipWUg5v*Y`o~2IuZVK@R>+
zcG>68?i!2x;EUKK+m>vj{X#ku`Kt4h<eZw(Fv-hIOlHINKC7+sJsHD}mmjK`nPutf
z>Agr2FqA(cn7G7f3Ghf*Hlrti&vPJT{5g*ez503XbmkS;N)`aCF0y*DOS?`tGk^H;
z@A*W%&y)j@8Kxn%LHv=0UJ(cc&1}FYPZ;IX&DXdqzWkJ|c>=l(GF!)YdM8dwiRpmV
zd&_n-270Wo@6~g9vfJd+a?rCf)pRNjkg0&{X&MrNyJx-R`k>4VEd8*TFi3)20DTBH
zS;^Twu;U}r_jDYVCzC!ks_|`ys3!~hZM|eRr$ppk5Y0zmfWeYMb&En-gooFj^=6Jz
z-BS?Le0k&SNXI-NYt8_o$b}YDOz$hz`6`*vO(YT0u?dyyhVM!wwGr0-v0Een{R#RM
zE^`m)Q7=GJl#T`T3_to1tCNi>{FqpCkYQ;8BJ@Jgmg8{g#4rVDNm|XL9GmI8^+Q?M
z=+hG=H~(yG)tLR9na@P!>7R}Siif#BdSF|&;J4{87|awMr|e!*Jdbj)fr$wb<m!jd
zc#z6Uxj)RTo^g4OJ&6+fySqBobO2td#&4$OWmdGvTMUv0EJUaaen0;oNaLJ(J$lr0
z8NLw(!oY1s3XY1cbca&Ez}*QS3v*pv&<z_EedfLhJ5J$)_)8JM{$-js`Um+CvDMd;
zZ<!%8XI<G|u9>QgIV^$pH5bm&TzOGyR{sz@{x2T9kgJ=js_F)y^KnWzjaA6ic@cNA
zRu6M}K!b(mtq(ePc%{9UjCMgYg%xSLpS%k|zFZmAty%u8Z;}xWTtn1;lA(F+gv@F~
z7@+y0qVPe7oR3XR*g8I54CSVDuANH`1C6%Bip%f^wH7BEDKrab&oj~17V&R+L3JD}
z^*ciG;&lyMLjaCB`hAS$kO{7FpX($Yn6fxq1aq$;eV2xl10sdF=MSYYE$M|kTae-O
zEboyhW|j8*_XvPHkJVy8^m&-ysnN$3iRgZNO?}J^NPwGwpnL{!kN+7Wkz;Oiok>bR
zY{fp7m6e@k`Q4gf6rLf!BX0!jTUZE>;;X^nXFp$k5Kbm8F5U}jFB>;K`sAv20o!@7
z6o@u^f$5$ilr^2yjXKvaSAm#1XXgk^+GouxBLMKU2VDk=W<f8*Fd)LSp8ya#VT%e|
z?NbtkgR0k%Z-G{h{7@~Uqaz159VyEz*TGAU^vScZUlh|bJ(5#_R{`>Q<NExSr){a$
zNLGhdeu&G8H0|7(H-m79MoY#?QnjUWC|N5&%xAbz2T}^5-o;cFQ)LP$ixHIfhD*)D
zfKpL@$LTXg`tK<g!K{U)rOxiNB|fW@+uy!@yBi{D)0b_Qh}TemMa!X4X5N@ib7>D$
zr15}6n6Pc(jtXxmV7g;er_E5&f%|hQ@xT=pFSGzY^r&4yRyv0Z&?t^)5-@OFQ*YIE
zu3yC;b`EIDEI+@do_zE$ucICBqOHx49keqV56<W4{8D*ecBSh~i;6E#<H4D5g=oOe
zk}^+xc)+hNn5Q>81$e;92C=Lum>PYc3qs5uu(gY$TURV}bV5Ns@7XG_I!kv!9Y$~M
zB6B1Hx7jNrtT6N9q6XNi2c<z#QBmvDVRYFC;EOszVUSEDrM<m<o>{&3+-N5tv|M1m
zbyNe=s3E3xo_4(`XX<E?5m0%TkR{_o3v?ihy1KgBAHo?*kd^j;nv<Qr2c9$L0+zKH
zY}GrmjFIo~_!Cda!w1-0;h(SHa)lg;Y=G)6<wYW~2W<EKF9Kx^?u#Hf=)-!~{m-bD
z?E2T7Xpvq_in;$+a6tO_Z{2DG_Fcgnun(8xf54bx9`^kH7R3VwM*ZQ(%}G!Y0Mv<=
zaD7dnnKvcm*mS%je&<>5FQF?jPe8Ks#@PpSJ?Y}bqs<3JMf{10iK!euzeKh(^mYpr
zj_sP;xk3Wnz?1_LVL?044iyDM4%l5O2G53MU4f9jY;>E$0~q??z<<8HPd@4j3fCup
zILL;&74a~47o4%3_h9{;0CV|M#9(&dMh6!_`e(oyO>Y99H_A8=N_Iwj15{NzUUO6C
zgDmD}I{7`z)w4c%Cp>Yo?u8M9G_0b3?AzU}M?XJ}Jln$rpQ)f>K@BBdMnyohPvHVN
z=v`%aye0VeILA-lE|)V4AuFn6mdIkBua=K}JcAp$`epa_kI$Tqr`p;-sG(etx&y8*
z6a2H3;oY`Wq1Jng4s$I{;Ouk=ZvQ8b{2_;m!S7--FGSM?Xc&)a#v}XFy%DAHxdDH>
z{z&yYV5J@^9&9a;;<D}|Ab)V>Isqd%$rRJ{jFIe1QT3!hk;mF|>vYkuX{^gFGxqz*
zICGYKkpJIReYew-<6Y%}=LPW;GDeL5mkY2kP|!pdz*EP4e7MU+$?bJfJCysQA9$t>
zcUKJV<oJ;8`$r#Z@Qte^ANjX@E3B@F+4fh10Mcn{mrlh=0Q@k-KLtGZo18#Uz!jXV
zGL2>8c=hUz378WphW)FQp^ZRP_Q1;~t*|#yG3=aB6asE|fDh5N&mq={F!<SnJ3$We
zTpB0<y*ihgpOdx=8-FH)Z{SuBY3@8U8*X%8_=yPt-*e~<ZhX0)Riw={_*IJpYy-}j
z{50#x6gUaN^}qpeVg)ns^XJblJTRv3;i7IsMf%lIDsn9zkC8~^vJ`Eyu*J#oJH;?(
zPY?qXiiUFgI)c-XA_(q4JLHdg;D4@y4zW!iDk}77SZ+_}+ln2$ix32SntKeYBu4SY
z=1-qKb-5cGc6P(RG7UN2-pyNO6SIBn26hCn?VRu@Zw@{9&+h%QXb~?s9M~nGhU4Du
zW-foSH#<N~Kals7{r4`+oA1^w{5dou17KlI0jH?xWM|x^Pec8XA5E7mX#BIRqQY|l
zU?0^Mc6JN1QOv?C0K-rK{B<8C^{E_S2LHa14>|~JwC;GD;M}m@mQdQbGZv(#uI^&|
z>B%<+_;1kkV|~?%)oXNv4!lQQdU`sk(jVfpY`=WCM*U<;z&O~P?tJ|GYLA}tF~kGt
z4R5H{19Ke&6hAn?hC10X!_3~1g62oy>SbWi29Te6OMw#9tD{y)O3#9-3Z;3%3OJnN
zZAaG8enl~_?h3Z=_(LG$c5&S37K65VDRGCw^-ekk8?EVZBdnPNzbe@sNCZB+)W|!0
zt=PgpFSA`YsOMpAN>Y*>2iX@UaQz&Ypry@er<1#v!7Rr~$9jALzDDuD&&l)CbjF>&
zAbQe8N?&#w0hQ<e9BAUtYJCF(LQUx*9YgJBkNIAcFXvp(L7qgy^53=mrJ+%21<_7<
zS&xj>L#kn~J~ws{y_GCQfMCaMzmqbh@8Ac^rpmz%H)WettVe0YrTWr(%<7>+-m<eq
zU9j#zt;akd=dotw1Rrk?PpM8#OKH08?PbqOS-|!#ENMi-fWKZlpB=EX9i$LsEQrYc
z0o&f*o`Uw20Q(^fm-Q(G1U?zbi`U@lFfu?5a~}5MfH*8=LUN(-f}*4{>S3>4^4sF5
zO+%<xeF|*PG08xGe-I*QQrxC%&Vf<KJ<L5_8Y~>(ANGpd3JT$e_V5Z_UK64Wg_q)k
z&FVbUF8-wpoO0P=OYLcSc_B=0X9PtDfz1}oA?@i)2StnA37qO<<DK-HD1~%5;w3M;
zH+Vh8dZGc6(;GW$dcl%wgwq?ghN36)xxpA|=P;WpWcx2k&1BY51OmG<3i_3uZG@-_
zJ^pNSLFa#IECpRqV)5(>D!jEyo9vzwJ=tv2{@;unU_H{k#7y#C_XQx9ODClLQcuLT
z=or#KJ4S}u5f+adpX0<dV?-MsI&c86OXcBR8o;NggD?;PJCki50W{+v{EK%mwW6=W
zHx?A)4-@Fx{qaQ>EJcTAnhh#wom5vnWpMbs6-lgn*5Vx;4O@F$Q%l=yfv~pyTB_=)
zg9iavfVd-dzA@BG6}Nr?ihKRt7%+uH&Jz<iC{W$yaCeHOAe=n30=qrvHE|d?b!ygC
z@FG~#bXbT42gZyF)|k2hpMxgF+cb<~9#l}^7@OJl89~rn3;C1meg6D;BV^$aH2V<+
zVT}uno$|0cA=u@ux;Duv#XFg$)dyT7o13Y`Ito}#+wSp8tGdC)<y@$HHz-5`qUMyX
z$q@N_3*rGgFB}v49_%xRprB)0kfvda7Ab~~Xqt1(hB<!xe00K1c|G!bCET49Ei<so
zidt-SsPWtHq>tB%0ie*cVlRp{UUZhFjFPvOdWygNmFBRvRB|v#k8^aKM@IByyHVfi
zN2Z8dy@^)1spC0_lfv~X1V$X<xiqrA5qa@O70bpKmQBeQpUPuIfF49H{O7ROL?nKd
zt|P(A5PE<hOSqwhZ^F!$s25MbL>)(PDq>fbsV#3WQM@hJXm<lu8G~KVK&7+gCqi9+
z)UICGS8kMAt`0wm<sIyHTw2;$<(0pWT%Y>(iUELwk6^4E+&Bwqa5jR@moT3h5(Xzr
zz(j*QVW2ssx*DeJLC$Zf-Z#rt<10B<CQtDqe`P`UM2BPDDtF@I;w?d7B&qErZ^GkM
zKIiA#R0C@aNBd*FRslyUf*@U(=r63Xb3k<?v;X9*%E=`V&moT|hpY-Z1Rz>KSYI{Q
z)?PP5%5_21j$O6faQl~CFw?oi-~{yNQsS-WS#t(&0WDh)$Q=xkbe=fu?O8oMcwjnw
ze7=f@=f<#?52@=FwExuZq4kmj#Weo+@84zpo@7DwFE20?y3>blQ%&m+d(}5jZn4AA
z9NcX2;AUfr`Ys$JQaCEN5JpF~bGqUOmS?K2w4Uk8sW7M}u}Ca)9QMkEoFS0aiUTlH
zQ&);PBAjGI(BkcyTU_)q2iPQauT>trdeTN6ACtX$xLVK{$kg0eO1uI!!ETe^vk!L)
zN<G1B9f74XU#eR<#D&Zb!&vTs$Q5*<rnCfMOCf$WjszO^Dj@Y`z1IgeuL-z0h5Pi&
zK<uhVx|cz$3(>lspedRb4P36lN7=Hq1h0t%NS1WEDsa^ah$||zm}kIc@f6cs&@Fc}
zA214F!QC=n&Ug_`r*>}?%$+hef125sssi@(2N*CN_;UhH8$cEZ_?!cw#_wLB;hiZ`
zUtfQ)%%Uku0r2E9IULV!PSO#&<_B>1+SS7@)8Q=YX>F(-*zbnb!}h=5gXO_NjP2Y#
z8Ga?<b7rNy;qIOz<DLdJpw9D25Z{#$y`Pwe#vIhsFV?2iFoP=4KqbNADW@{ukbs5-
zsbzJAe|wS+44npO*wE62X`wUTQ}^?%#`qV@*N)iGT(Bca{S}}{bMgs@E76eRa&+wZ
zmza6agJk~{$(}246>_TAxBiEV_%ni%GcWdMluM$p!h7#mVSDUVF>kS1jLos8#2av7
z=$<TRlM2EdR32q!?}0c;;``_9b(~eJ{m-t_j~`WpD(mT+Nucpcm#Gs8jNa3lAd3<0
zygRUNatOPvrN&3sJkyq{1@K<2J8czVjcL1=z{`ueB+@frMewUP;u+=v1}c(^;5B@v
z`2NdRU5<;7qz%C|A2i>(mar?IB=T2<VO0E1NR3N&M9J+oF)G=m!{DmzdEI?hvLGIC
zo74sIGP=cxJc-_q5wYP$55qE74^NKt>cDou<4)&Sr|VYOpo4;x*}9cVXk{y?>FUFl
z=`UeEB(85n@=2HOtxP}-i-*Cb82$k!=>(oT6RV=9gflx60zyYRtqcjLYZ7MFE#Cb*
zD&q#E&?|QL@A!o4kNhQh8c9J53vvj+DB_SenXB&8r*j|m;9%bXBTb7|M(}cS@(54u
zmXl9R3d>IZ3Z04<Dh8L-=uB65?|28;)-)*epNz^+VG)jUU{}K#YKK_v+&DTZNNF-0
zF?P2CabV7avEy}5fIOH)eGkT!AKgj^O{yQIo_dUEF6LOuDE6)eokngo%(z<?knX{}
zA5Ndc^l)&;&N`Bh7n6_hiJy|<K7vX)5Sm}y(BuGhC}9X|t`Sy_-PT_AyKGgGd;{?i
zXYYss$-P&7r^ZO6|1HOYc0FrjQpPHg_Z0-1fu6G~k6K1uZ0TmvmT-Fp6njO++c9b&
z^nW>&R>pIbQ|FuOaf+QO6pCy1fYu{wfd@v5128mNW3L}IM*Je5R<thZ$W*>m)r#qe
zd4Sz^#0<_r{7QeGK+FZp*;L%6mDw4|CnA_Zeu$|pb$;OIiFI^sw_D~uIpI}=ZH#LD
z)r@6xouFi=QB#@#u794xUZ=~4Cm+J-<ZEsoHBS6u*=!F?yEc5~IRsF;BVfFStyX#F
zo<@&qA#~Im`}-`w&Diw(dYASsu{L(_lF|;qw=T>WqIem1*h)JsPv;GJ38A|zXO#gG
zr&L6b>i%R~H>fR#E(0DN0D4V^TLNmSV>movON^?_EU1;6{QP6Lel-=2Ne61=3|)kw
z0Wt#X=e3E77-T`cIdJA)5(FrDr{L2js(@L1|J3K}z+a7mOv&Gku_RzU@~&-vmsg>H
zVyT|_S+jjq5J}U(YwPIzyq2bMAF)zeQqnDvu)s4i+JAR-R3_HLd2j0#J4ZXrk>a<v
z@y-N;y!_1JiE?f1CwxUM=~tF=>SjN<=rzjFT9A&Pj#$Z_f233+pQd9kik?v79d?ZR
zfwO3_$)$s&+EP~wG}4|xfQ`dJXBl832U6w+UxIN8fQCpe@e|oplnDAN6x?onZWgv{
zg57@Gz4aevQ89C;Em0<RGl-sS%0D~B=WdiIh%Yx`fP|@5NCkO|wq5$wE7BH78{mO4
zp4E4d6L1Xhg!NClu~UUwUPE(L5PFjP`;iWq!5db_SysdD=70SrkM0v6AS$jWCMWOs
zu;1|VP$%`hs{UENl-T>C@$O$}?@IG(&{R%*_V?Xm3e#bZ^V5%^)!NcS;gRMqGNisG
zpFfLwK#uuJILVpx<6g}S39NK<;TuDM;A=39POyXll(HVFcIp8FxObZIDu`CvbE#n3
zMqmDs`J0N8E-|g$&;qp#;L;k{Og#1(8NHYQ(15bw<;r1$%KYF`;tDLFwE~ol<5#C9
z?jJ<KzQO|8$i9dJP}FXtF<ta&C%E2r!y6+r#oD$m$4A25MbSdAJZXY-cw9n4$DcP_
zBNnAZ%}|cSeqKunbVzGk1C^!AyHhGqm--ku^LXHC^<n*1?}}}W?ct{!_wzt7cY~un
zjUS$poRri9Jos)2bYSjuowEfbG6ht+KL=RvjjRRTY3pW)2PNpNb4M*K3Fp6Noez3^
zX(a%J4DVT=a*<n_j2PELXkh$+P{ds*WIO2edEmA;`+U#Z0>_yL<&N<%+cf}Isps}6
z<4!=oNDyj7yFM<$kxnMzuD)iG?z1mv{iK1FyIN?LQd25(UFe}&XZ8$A8_61SyX~k5
zK+{Gw3lrU9tYka*{X^rOZ!PV8?tje6+;=kn3!y5%dQD_QRnN^1Wr+VDQC}Vpb^E<P
zW6!P>*^{zwm7PJ6P>JljY%%r`*+)vUhHM$8WXY}=YnYOyVr1Vl*2%tw!Qgj$>ht|Q
z|Mb$M_xpa|%el|F&ULOsS-w=y0ttHSy<bgk3$E9$X)FVmycEX^js2%^9`c<)%=g_x
z6vGR$#adouAj^xW4r*Xqzspuc0{e1(^|;c1a>|>PK;GWGmOUFOE%S#X@YE%>&}$1f
z^C^?>ak))!7|@7QT&ESzeMxo!di^xt)2A<)i*_Bz-crzXy-tbEN`DX-D;)kaMp)J(
z$%C-lzC1$MDjX?zaB!%cz?0?Y?5wq;Jv!k(9}>66?{#OP8mNVqmX>C1V#%4rgy2DE
zV26+OBQHG*?yeq`NG@z)-{K5ToXO!eg9lY{w6G)Sn(pn-5T7((T)#@xlh|=7O-b6Z
zAd^gagb-i1hr6eVlAFkaV2Kl?+>~KnFzRFMk^RD|=!WoH5ho>}O85t;%m`;)tFZs@
z=9-$7)x<g<%_hH(z&Z>zK#~QF3<m^ai+dstP=q{+Z(r^kq4-^H2A7na`1$kax+Y;<
zW+AP6>OOCrFHYr5`-X&7)(`eAGrXVaW++%G-gQZQDlHQ>=8?xQ>UiN|b^Tuw31Rp#
zhtjS`J*z&!aT$W8m<FTaV^92j>{<!N*>CHxldPItqIxj2ptxG}s(q&i5$%pbY=y%$
zW~F*}IlP)MJ6ewIr0h%Ef@M%Lmc8rs2dwZFBglx`4j)S$OW4{+8hL#UCq=?4wz+2B
z9Iln}rvZXaS}X?yenm$hDbC%No*b^KKmh88<?GEc(dv~pejn>|eM|Ia;7#KMl#+#p
zA_~!`;XqOGzO>YR24qhp%G8H`{;X(3td9H%g0m*JuglzoMU-FkPsi-2*mpAH8Ehy1
z{LDgLSlHdW_R<>%s+u>REy&*`D|_M?Y6E+s*Q3aLe6W3qbg;TuwKi!EZ#LRdk(nYN
zDIFZ$FKapu&#7ZfG0*`uv_9I-phFK@{Mv_P*)!``Nj$CH=K{q-CVcVX)TwOxdHMPC
zO#GV~+GFRjEx?va64>S1C%Kg8Q9)l7YIZjNynAJ!Q+gLbm#Y*Kx~dY;GKMGkU)SGC
zmA-vzSNKP8aMXKn)JhlTM;4lZE)7-(5;8Yjg6uo<h=$07nQ|5$s!)&Ez5BA66AjcY
znNOQ<4s)lcG=);xIPVnFuBc)QjGM2iOy0q=g{Q?y+bn(FGv-idwj}#RLlV(W0^Odu
zo*t`G$e$f9sI<>QwB~fS8okiH!~V_TO7q<aUZ$8^69CHeZyWmhctpWJNH$aDeZz+1
zm5he)4C48{NpcD7aTpm%BD0H8vRbB%D;d@qN=1yu_#Fsnw?U49x?bhTa?S522#Ggt
zc`q?LlMB@sthnld=(>W{c9wwlhfocF8QALA?({X^bmG};eKz;#Xr@Aw1?M_k-4F`V
z{8e@KSc>LJ`E&#?%X-8no*IeS@g;Tp6-NaNGlW`_aoNy^LcV&Fkcd@!ybozDniGX!
znu!QVQzCpfIVOEq*uC~s8DumbxRz*z|Io~(IzF193*-H>wfcm&uy|c&f{`Ur3)$X>
zZ|BC<-O!ETg*>}HB|{khh8^SYrBIGx3Z2bD6qv#%b&nZF*TZ9etT%5RMI3W0ok5Q-
zQDe6+9pm(%m7*kz)i@@<PtU`SVGQHZl9Jx~0qYlyI;9?kJPV1qjxL3KoAYUANpvqn
z)w>FL8L?`Htrc&H=7E(jY_gB7;~YMt8bQlfJ@}e-#k#%w{p+V_?R{xdi7ryB6ccoI
z3>50D@lt$_P~!=MiWAL@1i$oFCI?F6#*i0vWlK#y>EA0Z0NI8Fuxift-bN`LQF%~#
z*|BPFi7cDwl(u~wS-yZ($JJR_n462_Qx`b^jcNA%Vf%z_p{!p(SVExEsjJ8Z0BIkq
zT`vK<(-6~fir=RIv(v=Gb0)f@8dt{&{-yPDQK|I&b8Pnp6W$Lq=$9d?%HWGyKn3x~
z&rI~WhqWDBHQ(2`3-|B{ZDp^NZfaX@!Zr?$dcYq8IIK<eu~_^CFI*REA_8e?GDMHw
z?=N@)%6!HlPl?XPW2xu9TW0{W;{qxZ9K;`vV=+Va$Ihi$)>F0i;g?C<LlQ)e^J6Jx
zt9N3AR4KVUN<%OXK~MGc2Hfqu>|?!gjV$dOm6yiObraWq|7KTk{QfFnVTZ6@*mJjL
z&|2Tk2IPZ-000148653ukRNhJD%ob7UtJ+`aK;UeKKS|EO*SYhYJ4VhtY_{pW&4mC
zFRJEHn)UO}SOHQz3(?*N^qaozwZ(K`uoi=(rS*#!R|uNHuX8OIYE2x$j0J%sA&x@O
zfh^%F5IA#9Yes-cZ{z=nMwE%i&xH97R5vw+7W2`7<{7#7gp?<GdkwrV1p$~t7W+e7
zTwFJ6jJ;B|;w@tJkdMYZGcz;j(-+biH1TOk!)m=&-nQ#k1vx#9d{elRxYr~wYYB)l
z(;d6dpbzzG2GG@Og;?IW(E`QQF@y^+P&6^OZ_LbrYOEmgOf$G=X@w^feuj9Kbl$ce
zvy-Aj&^;I#H&=Ry1{(1UFLARzbr=nYbGo#BzD~4K50CA@rWD5PYa?(tFWe=pu{W1w
ziq<|zv@n9Mq^c#=)q~L;9>mPm3!ovXU2m@TUYzXc`{Jf<>ZTb~r8gJvXr{N!wQIrR
zS*I=$V}Sd%Qehn$WMm)fLPg&s|K}aiEG0|j;p5Z8973$TuX5#n4ox`=i>C^FYr(c(
z$>^l+>gsCEj8%%0hC|yq>{lxWL#qpBI-M#fDdT-{Ap-nxU+J5e*EB8b&^JGxb6^Gg
zzRdhk<^vWqyO&i@YnqqLQDe5jr3hu?pH}^ieM_h!LfT1a-37jKXjP?h07Mv%&6p`1
zDo21M*Z8{!?bJMv@f;vx<DDwkbwyr{lcvNg4UX!5ALF$5`%-XY-*6PhoQQ~(b{`x~
zv_dUP1VbFCgX4*-(Ug(^pVGfH7EkHa-`^kG2+H!tASD=2$Uz>{V<rImel;I+w4#Ks
zgb9Cpjt;eXq*gOz3h;Y9B*@}1tV2uJW=yn7{*+)6{64}tq*7^zu7iwbrXnE}mCt)9
z%bZr@^6;S)P626__n8TK`|?)G^|xNvQUuk~61r185x6cOa}drQ$;ka$cw_*oY{5Z-
zJalmh08x#qt+Rb63~PcyJhN+w%&LFblQTu+;LDUJc-&bMRJQbM%HaQ4jPSS(ZW8#N
zJ9mQK-ebSa?_6*Ui-FT{+JlQ!wY;=crRLo``}C-)OWIe?$-~^QH0Q2Z6aSc>w`G94
zOB;<_+U7<DU&`!)iShHk=y;8*JB5`ZjL#;)j^dc8tYgv2t3b5V74GU<hPBaaFV<Zr
zf0LIt$AVAHuGU+d@K~^$l>Ff4`blTS`hC@exe}&eM&u;BtVk+~<~gw-G>_jucjsN!
zR)Vjz5YIkN**R-6Q+`i1LUVwekKacQM)a;hta7siWL(<Ij&Ti7bwyhU19R#cz>enJ
z=Ukv}9g&|~p?&s+?K%B<pGW(1R#-)Bs5-Moo#pCSTRIhdW&C3kQ>*0G<fWA$t3aX5
zE)QY;(gim(_4?Jp8uIF^ZNU`F&ZQwA{U_axl$THknq2JwaBLbkYSF2bPZp??qj|#K
z!Q+L6!cWi}UI>6b^B5l2T$gLj{IMOVh;@@~@Ov$p&POBbjeC^wU9EKgi_|B2UeaSm
z^yw8x7Gaw?eRAbj-2@rp+gD`g$^+%kq00|lSj?iBAYA0AtS+a5R&V9HWP8uA<*8ze
zB^6?A>F(>FpG}kFSyawXg4)p<-K5!5q!xm5=-IJf#8HdpE~m<vX?bhQPjcX6p6dzs
zM~0>Q6(d%SSoY|3T{V64AFM?2RD}tv?81o8>v>rtcpammDf(x3wl&Br%A)>M32pNu
z{~Y_KIL7YD!-&wM;}GGD+w%hWGwRI2z*Pt^y!GLgz&GQC?Tsr}alDW5;Y@MT?%10Q
z3=Euda&rC_QB8&%d}HU{+!J~gFZpI`7uCWRb0j9Et07@p^|0sLrI2$rFp$VFD6E^7
zKYA)XAHbrZlKc&|in>U1F6@WDDO}a2m!!?IrwxSi^dKliUWm<oW6Mw)-R#QLb25|N
z>|63)RZ(14%Yxhl`@zv(_(Ix-ONxY$$NNGv(r-WWyn#WT!c`*~Nbm4)Ep@2&#B+Vt
zxI(8%;G^wNf>d+EJg60{F{`ls+zB#z<-KxLuG=XvuUfX|ue;ZSVbjqlgbdb!F#gD!
z%f<rKX7VS0i)}6N9i_7&tw>%9k(=hd`RR4`%)P*kbu|rH=r3QK6;JzyrVH9af$)H3
zgpA1HrBmKCB?3%V-s}%+!Ue`Bwz{O2i_xDGaQy=VabLP0lh?j4ATTH1-(YG02sKg^
z0|B!Yn{my>p!FKHj<C_^MqFKnH`k<Py|h)Ayn%*nsqeklg9T6<&j*%FID~Z5t(8ms
zn+$=E8(#{1Qy}s;3Yx;pg%yqo$mSOvXR*_es3;OFBp?n`?K^OzX?FM>3L)9%Z3+7}
zUGfKEQE#5=CaYwwF`s)CDZF5RH8?vE*#{XN8$)wkap+J!fkv|jN6$MRvYDhPC+|pe
zg}GC$)4^9VeRG<cnpncDW>JW=)YzF~WaZ$|+p5Nm){5OP)bSGbXYFGZsE*g_xV;|N
z?|tN>aWZ~*bpOqH^i1=Eml8Bj8c<=b%|_s^3j{aa9ESX~ZI^)dU_0A`RcXRF2Y*sw
zMKL#!=R(|m*00!VI}$lKdOuEjYhAr%3RlOMg_jc4PSOVdGUf8;YY-bOwa8689lBf8
z#=W<vN8be0@i5pXB0O@V1c*y70tQDX)3*0cd)*y%4tV3D8r=9wlX%UzG7I4jbSlF{
z+#O1}J!2K9(K&Dm7@y*h0s)w*RI{$USBn`%RB9Tz+{f)V`sjr63{?w@i^t@Wzgd!%
zoFa9aD-{;+EG#EOYKP5(-1B5d-26~c!JMiMA*H~F`Q#ZJa8*00y9Q)MI}e64+++_J
zSQMaP(jmcYKR%{#2n9`f-UJ=P#R!%KP-c?B9H`9;WqO0bJc4-v%CsKVi*&onVDigk
z?J-B#{a?lXCdA+l^otqd$~3T4x-QxC3t}0U>OYP^ek<FU7QKC|jzR>yVSHl{I;wL*
z&3x0?cu4l9u)2EZJLnXhW&E(p)#w6*g9K`)BQezvMp+2s_WU2I&pLe_8W><e;110-
zWT`@L$w2{T)r0N9a`T@~EW5Q5FFl_uw#ON8XQ4?zn+Fh7JyMlP*2>EG$qCd92-HT|
zw`^DfCKZMvZMY4@-W-<7$Hx>4`l#8Pm)kKmd99u!Hg8*i3+#OR#%<(D2qx7=|GE74
zry=&OH;C0q{*QE_d7Z}Cn8DF$ha?u_lHj^#z~t@bH5=&jDY`c`Q-V6nRIy&TL#g<g
zUj8pOlz!SKC84vZOmTh-@=S7GFmCDinP_1TW8($#KZp(W7t<OknI#yT@}E1rYFbGG
zX~0dt+E!v!*gZc#UlS)ywtSZ2^GaKKL@9=@m(`Gs3_io#zx!~`K$u0CO0LPSR6NT!
z?Q=PM{pHNACU)R6feZK@@Iltrht^tUP2rMjT?&2XR8}qOyh8uu0_2yIrvns6i<n}j
zval708_maX2aa|JsJIloTC4+GXmgL$XU7g>a^r}cst}6yt*$W8*-ix4!P_VjM848t
zt1V?T{`}RO^Ie&{?kvJtDvjELhuQr6Ce!zEpCF%7b}~Wuw+du3E`o%-b(yc5en?12
zT$Gy-{}3ea+4|t(4(I(Mbe?*PHsGweH@F<!6>Nx3R!t40c~E-TJiT;n*Wa^U7M(Qy
zB}VV^N56XCrXflLYI4H(3zxg|jvng(U`?F=DxOd6g}WohN-LOBvvM1BHdt~Ad}6WI
zum%HKj@u$<F*<=vc;ZaLBr%fH<M+U_Vcsi|G%8hWcfYmH<>V1+c~gzM9^jhBvG=*C
z0{{0SvTtLExgA3NX!NmJ13rT42XIFAu<Tdq<j2_&wU`|b`_66R0Dq<K;u*tUdafr-
z%x>{ve%F{(TMkVeD`%UT+!AcG8_vTReg=~;J>R^!j_a!I;VVM+ofFu**aD4N+hBYy
z12F;PM@X8d#JiK4?<_plns)B-Ov-lVS+{(9PyQV^PdX!ees?0SazZk5Wj@bCKuq_+
zo2%Rh0|ea&j+G-r;Zyuq7<re^r<Iv2cA5uC0>wOCEDKf!m=~aek!Jlv;*r%@PtOM@
zK(^Dx9$`sIyN^Dc`*Fa($<b&V>jIViTK>k`^-?^>Ptd;80?Wa_Y5HwX$Am1U;G>~V
zp`&3VRTN@1`r72?j2$}<t7cE}1W)cOgQ6jg**m0`Zg{vf(ah{@+IQecAJ_12KEg{7
z#?QEf*EmM^gVA9$W_=-q@w93Aro!58_Ny#3fW;^LO?&j)Fi=+@Af3aDkq=;lzvdZM
z&$Ok-tp|8TtUA$wI@T#qdKlFw+tS8zw=nLb`O3<iuqVymKGh{3rr_#C?=c12mSzq2
z&GaKhe)0Le9&0d<l~z(BM5|`1h}@P-XSc^;yM01*G(^Pb!l1R#V>N9gxu(N)Fa`Ah
zp~;bt&umzj#Czr*0|>}}z_`>pN2s2Uz9sVd+jUp=NOWw@?~QOCAOJO&bdJ*#JH!pC
zmD5hr(d@E74rL+cgtJpwC_d|LoQd=q9K9|XvC|4)A}B7w4oNO}k6=9b;D&{kwce%F
z3)r)R=4tty`%2goexo|rm9I}gwX<Z|4D6s<$sB|?fO>Sumi>78#a+4w7H}qNd7u~Z
z-1$nzL?r$B&98t^RdZXd&HMm*<QcfP(}(N>$i6JKD-i#+SzfkuIaq=CO@{1_>c6|W
zGCn$5|NZzVVWze=dfo?Oe>=9hnJ8LODD8BEw8&n95qxTQi&YapdA0S0if$ttK)^S9
zU<b>UpE=+q8;|g3TpFlLF=4W8pO`vb;P9Zv0{d%ivF!zs`^;ez)t#;)@Ri86bZO$X
zN=p1nb#*oRgee8HRV7#^&^d_a8QX;DQ#7U@`vl=}QuJuskw5e|knndx_3uUrCOU-!
z&k(DRSk8m^*EM$Gn)txc!5UgS$>An5MFC{!MaMpNBxN+Z<27+^rc%S9@IgGlEZ`Zp
zFH^wMx}1(V+BYn$==C5=C?z0qlol&3=-*Rq17fmkiiD<?B*(riDm6x+%^?G>zSS??
zK1I-@Q{yIs%eRB=I?I%q{s=*mx|}4b;=izw1@r!dbnrRH?$N>y2a*ARu>YHDk{tFz
zZ{QJMMJi(Ty@2-72peHZv@QKUauQmJLfA(ae$_F9qsd@4KAnC<(gONiA`8*1&`N=q
zS9$O5x9Z+6e|RMb>jOFx^ex>}@NNp?!K-g#L8`2H=Tz%+n)+!>{Xnx5$Y0cKFeMCO
zkGXlCFb#$^gWw_JV>JOHPiW$8_txXVg;H0jZHzVy3Abh;ChjPU=V$o>;FGWENIgKG
z5V28w?_3bT-+r}~fBBt?*|`z8;eSGnqFZ06P1OaeGvx+bCxoLF;-8%2cj)O!`%Lg?
zm^l9dNHZSov~haUYnw!Y+iMnD+snS?^Y$Lm5e{DxLq6vT6oQ6YjCyn2J`e3g1Ily4
z<xk-3HinA=_mSVH7ULJ_>|ToThx(^vA!ye9m?;!TG=&FBn{H7{RnKmFU^moEcQzD)
zt4slOxH&dpt|SvOv9PvvLih$2zYH^I?O1B{H0<73#&k($kP>iD>H`3-(;73<sOb>l
z(YEs-Nk9uAUIub2`zg{+56ty!{p-yn+++9bfa7dI%$%4e{|HLQ_1YvwC##n2x$$~d
zTqX;sgqRL9B4;w0uH?2!#L&|}TNgt%vH=fvl183Ub!%lSm-4gb>?=Uz-Xx2ONR@UX
zTi1GnM=8m~&+w>JRSd#FW1B=GwTKeHqJ|!M$6wum{;<PdZA-7|>pzu;c@C%{zGkA@
zr5|^wt8d$QMjr==H0!y1rG6{p_|Cxf18^uHer@IZ;?|O3jI~!XyUvA`+s2O2sVdBO
z$;X!${~R#5Ivyvz3s&LCI>`x<VYaUF)A3HlgU_2uO_N`3JUwXLW>HEss&<5j_P5hS
z8wYfXmGQ?9_BU9XOb_2KJh#)n(-3HT@L>HNT{BsvJV8<SY{M&Xjdp3yUSKiP(^xFU
z^aT=CATPU|n(2aR(<jHrKlNJD70>ziU$W|I<a*q0we33qx9f&Ecf0zV8C7<VAo7Zi
zt+vnd>9cYhtiS-+D_&euYlCsTn$bxU9BK(d6!%M&E%mJJ-r=;5=%@^2GY?Nm-PAbx
zH9oEk+*yY8RtZKIihKcWA0K84RkFK<{X$l+kJa~fe2shSA=*j4kFp;eG=$4&&N{EG
ztfUS0_bY3>&R1gqVlkZStr}|;gV{gX2q#hf8P&qv$GCEGK{73nM=UNa<qiUNtZStz
zLY*(Oi)RJZV67p*5IX#eYqGa@Qx()W+ongwF6JyW5HoV=%uciW5jpoB^_4qA*MJ9D
zIxsp~RIF}%QJ*!75+(C<=AKSzFxY!ecoTwzQ)~cznEPHbD3L^?1hg5B{5$stM{yFb
zm2~3E+ih-;gyUy^d>@OtRUn)zGF(!e#cvRdwBj0KaQQr+QxhwlyfHZw-Y8$rp#c(O
zSNT6CN4YtaTA^OAyu8793RoOi%Aj%3d5~*1w$0px^<F3FO-F=en!?%2kG?roDz$o(
z&>wdQ(}J*Okeb-IH1>Q-kX0VPHaNOdmEhiI_zi(VJom+HUd-%TmGYVGF!TQvP#`)|
zZDKAL3Dmn*_L}3(&A9ys?l(vJH$NIT(qsB_uih8@S?)dat-)~ae8)0khje8<?K2OB
zor19#<EtWSwG#mT6AGctF-xRae`M<&{(vRp=ZLo`IZIu;g8y^OPS45B61fVOvAkE(
zY!_?xF0(p?>-KAoVqlhfJVt@{Ca8U7#9T=UjOx~8Nw1-iL+lGAmlqnd({ZF&j@dD=
z@8sQJ+GFzY^i)MvMIAuQUWJws-vGyRRfIo`ax@F!oHm<l9VtC8245)`byH<Plat=c
z$FGTbcs_hM+^Vqah_{5VJYVQuzTz?yD{V33tU%Up27grj20+MXAIYyPdR<Qvm~1Z?
zrh~>x6Dnsv-wXOCUBGU?Q#qLZyg8Dc@sn9^6T~g8S_M>0>k`KQNPs<yj0*mgd`O7|
zQA6I5tRjJt;^<!WO57jV8*+@ncZVzCh*f_4dSqlIqAx{N8?>Ek50{)O=Wh4y>oONJ
z)4vv-^v9Xsr$%~Sk#r#Pxtvc?X5z5)V`w#G^HRBu_JNQ`5LCW{{eB>WI5rr*vgb0!
zdE)j~NYBUl+E6uraTT|n1`I|Sqq#-9dv{8fugffDNh~(KSd4=)A-O>J0ZwZ5iq>51
z<owEDT?KFex0B`LCvSOTLxI*Bhdsx7m?&*-D}#yg;Aq(P$3vf?Us7%TFUurAHfK25
z{&i-i=VDU|hZsO&ZPuA2z(MR?`>YOM;kWPPBE`8htG);INWiE-9=3EdQw#$%>j;V_
zuUCklfkQNA6R@Ld&!e8UrN1*fjsF3u)G_Y1d>HHUbyC0vEuftl6?|S^U}b4_njJiz
z16MZ*Wx&;$lNh|6Z>Ba{=kG+B9|9>hzu}h`ZtPeC%ptjE!Y{Lo3VegHi@8~QZ?Z|~
z&u`K*y$M^USI!gKv#KWOhIKWLZkoUR^yF?|39z!BQRTdCfUQ7ut5*P^*HUT?TjGNc
z-dREoNv^>6?slpyc+ufNu-rRXJa21xIXR##(}OJ4QZA*yIOv>wf!o-jb`t3Xb$k19
zC98{G@(8t@5o+iYy1z%jmO36-w@*4xJOksohbGz4wRj1VB!_z4f&IPgbS2}AC*aKT
z%-t{nDml$17}YCn%8Zssn^V*4dw<%=!dh&q%C6~hQ+EUs0~zjWm{SEx6`9axeX=eQ
z2zk8%0Qlr{>&q`TNn<Jdzu41(8<QMrMb~c%7tMN?PV35NOc!@1@o=Nk{0yF(k2&!s
zscVx5E8{ZumWaIJZ46&nOW;U7+LdQgjqsAQbz`Q;Jy7+<Wx1A$4-TknJV~-wU3B`i
zLd-D482bCAntyw0Cr|%4I;n~msIdzDDh{SuX4mY=Y+UgN>eCo?=90v2LYnpqQ-(TA
zj}@@f>i|z3;b|}}lxKVo#8#JSo+Mn-o(A^sd-6}u*P(YI9|q0sV@=^{3xeVj3{q|8
zL2@AJ<Pa=gw^{w1++TqZ%G<ti-tj=T{dgs4%zVtL^e|I8|I|YOQrF4Hv#~v8e1d-%
z%<#Omm?sOt_M#(^_>Fc>_U@RNzrK&m`m}s#25n(!;CJ%_>wVZ6c&X@^zp3PYfb5+D
z&4uo-0HRV20E&{nfhg(xs)RAcgN(&eP)lJ(=cXYzv>Ty8_1L{}%jq|!$iT>W8*&Ty
z-(LvURA{+%Z&F+U8J7hAnIwUc;twBsuF1%3@6TTs&NG~>9Q;KghT*?(hy1KO;TK54
z6b9$&9Eu&17~m^g<cg;UN`bXc@WQ!Iq2Say_iwp5$DXJB1TPKYJ;yd8QMQ_42vC-V
zYq#`YL3T~yg!~mb=-^k$32RCBtI88igMiiLbhL#&_H`NSCaWP2qijM|Pn?l&XTREG
z&IturyOl<+y#4B()Lg$sxh+Ds>Vw&i#1_R7@e_)kWpZv~V`C_gRXd+{Ia|V0S6l6i
z`N37ZGl(i5sWYP>01=n~h{E<9wK<&NR5>z(LQsS5J1O0aVxCHsAV-=epnb)gYxf(u
zs|-;_R@Q@F`Dxh0Ju0b;OKYDSky*&07AcYgt7gJ>ZnqS@e@pojjuq6HBz=nolJHK*
zyok5~x`aMC2?;Lpk5w+Wq;cS1=n&{z)?=`;xH@k@lihu8C(n@1qnUW*u3y9()zOYL
zW*Gq&{1quCX2LibKDBCuWpK2{c7eJv>$kQsy~)mbQ<5z)Y$5GAq?Loo9~#H+<06|`
z%96-P`u^iVbnycnaj*@3vfGFF&!6M_a?C@bZ?bL)@#cb;<_=iy!>p(1B~n6p>S;p_
z74tbhC-lRx5d(`QOyYOh@?RBYH<?0xphr1JtOQFq3~lr26L6&!@tIg@b>^p7&jUQ^
z1o3R!2kx*VSGy#`x+zt|{qUi6rqO;r;KT`rFRe4lM5Zq9Blh=BfCgTmazjCM^)86A
z?9onZm=X+!1hb(p2D9ioMXy4}8A|GGKR@Y1PG{MEn7`|y6K77k@uE8yn#6?cap9#y
zA+$gy#XT;=H51fOP*0TCL-3!ed6ySxGg`r!G7leI9R-*AWiN!@z8gRc-qy_AD^3Mc
zA#KN8II{SdoZnN^O<-Nx3Zm<Hh&(Pl$7LWRRT_SPWPtRJ?=|Eey0dBaOkcau@|=k`
z{y0RcI(hYo24!4-fxg9)yjf%+NDB{I6thQ;2u1&rSkn+TvQlUMlqvAd#YMchIC@Nz
zf-wGtkA@=)(WqTHYQ<3B>aOAN*q;Gff*At&MhcI!iM^-DB|%&v%)j=EdsCC-kR=Ii
zf92LKX<qgFhY-fq+{|_C_aal?zM2s^UtC<=(u^}lXF|^J#mz%~R?NIlowLR)JIlg1
z-cXQ$P`QtK+m}q2Kiz8MDOna>*=a1`e`IUzV+kl~52CZr501K!E3yyVzp)N&nj)g}
zO!`*fiTR$cE-o2NopNoKv?$#<6p+#EP>?Hk7<=t9>AoOcKJ9Go@pARR^MhTIfFR(6
z(7+xW|B<RVyAIdk=};tqCQ-W6@)COOZ(l81N*C<G^jXC*Nz&0d&PeAkQY6v!1u1%_
z=$zl-Wr_&g3O|3h5@E6T{17)0Ij4zD_#zd2Avy=#cmgd+%>!cv<uMERryrY3?bx#!
zNT(m4A0ASWQ@(Ssqk;$6_#DwNMyC+gKR!MV9ruhKp+*OO^fe~s0Ko0o2<CHMDX)FU
z!D*Kqi^DeLah77(?V}G#<!<0Bnx-+;5MJZ@yA^{+l9^rEBK&L>v!Q$t>yxt4g!?IB
z>&X)}A_AYU*?QE(QR8mm)3<S%Z(SNePDjRg007&|@y*)DdTj16GB4c9B!Kw+?%z+&
zzi>~daLzVXcz*Jj<`}|e3%BoF#e!4$!<(y_0r>hucltc4$Sv~SYp%5|$92o>7h!fV
zVAEd!so5wozQ)f8oH%o$a)r28HfG1aRT>m$@`6nJHU#7ZF`&{ZOiQu)Eit2NMJ#JA
z%mj!s5-fv~1iEVg)*9$UwDki8ge5P^Lm;7=;pV4I;ewU9**E>&kFiOwPOG?t@q0bc
z8?COY>U>j@{T$=Ras>VTK}Ds&py)p18)c$C$f`6(@I9s<E@{Al0+b^t>7CowS%4fd
zEmO0n_J1)W{k#^PBSH5QY%+apxXW0n&Tby`HR$LCl*<cV74ZOUo>-OqBZ$;#U6rLL
z*lk$9SRhA;nyF>5`5CB5|9Qx8_#x%N0$jyh8?a1V2aE=CGC*^;ICM`%09)5+Uw0+l
zvo9jscwJyHMk27j`15%Ch3ZOVd+$oYtE$5*QVgn@pVB4f>4T&5!=_>wFY>o&&guk<
z7c5|XSprU@duM=Km!L}x3hB*Thd^E1_#20z_V7ZN<38Q$W9VZI4IbJjwrg_-)zszS
z1MjczIjZU`y%)-RW&Q$GZr#3d$x3!PWA61?asF*AOUljwC?jq!SeX8LMz5reJq4um
zO1<KuLO*_ty$|j&-oi!Nm)gmf8_!2hHDEnJDi0qF3k!SZRFIEGACFBQRlk4zA9~U$
zO{`Kp`{LGuSVi-UbAXYKdJ5Eu48+WKAVe#PTE?R|6Z4mekPcJ$59hjRRD-65HO2TV
z;rrH-BimQJuIWDcH8-j;yCBh21KTbSJ1lNt?^|fLqI7per|APGL3R{ZYG^>P^UBQx
z9;;*l?MlR|Di-*sXXFX<QMbYe_#D%LU-ixoV#jXHquFylt4iVQLvP$fwXz^cT(bwO
zZ!6&Hw$eP)V@w`NR2m%BFO`WtwIg1}f+|L)TxyIDfDH8nR>Z6n-Cz0yDwINopuKF4
zFN?*@-as2t`cXy38QSkRUq2hL9thh9$w*O}Cq4W=HtdnR_?W#$aJ${mID!Q8yDK(~
z5{v<w^a8Upm|Yz=L!^A8MTrEt2gcu5RJ63m<dAYAvF@-6OQwW<9WXVhC=#WO|Ah;O
zoat28$%6SFhQL>reGekAM?}<WYjfI6oV0!TYtTFxa76+>3<JqYCFt{98_;-E9%PYJ
zGrl=6vL=#;7l+sMZdi%&PwKQ^UPzCg`2h*X7&{+`0IF0`Vd0(=fGLi!3t*c=QRgVs
z;UKI~R0mlR4UWEiJ;K)BZcwkYL>lKR$^x|5hyy97srx2pLDV<DwYTZ3099j;oLjii
z9@TMpM)0m>k!%`b)k%b|_wZez55xh~s{O3w?*iXEd`fBgr#^8&8q*o_Vo@x8Nt98}
zEt%u-lZ>m*i)+>Wffrj#Rrs&|=B;Ypv{oNg9H(vm>^JV+?k5-ggTu^JtX0lbfp%%d
zMCse1;PA?b^Hp+{*Wt1cqH4S3ZjGnCrp<^?d2@}F-j%nr6Hqyd4!LCZwxbo)84s_z
z4t~4UPEEhFD~jzq4l2ETHEh+E{bBpb|65qYfN&F>gi!9tDIQ{m-gBrVS(MmO&11G&
zspuS4qg8c~#V&%exU?5Ah~m1u7PL0V92rurr=-t5=53~ZH=J5^2)bPiUOMDV<DmSC
z^(?_;;f_zlfBOK0ker2})VWL^1>|~FH;SFUcT?*+Q2>^aIpdH`Sjg$fshaRDq#}M*
zK)b^sHL&f3hh($jf(98ymOagV4gkd}EYN;$Wdt>;btjcBbjJ;!<hkp}+3Ch-Pz{wI
zz<-5>F4o$Ux@XnOY-6(HD*;5<Ri1*^k1%k3Lz>Goq1~$P@~CTjCrQ6*wq2%nFu$Ft
zyxm-^iX$m-K%iuw8#k@5_zqfdW3EK?MO=!v&0_gXdaagpmV35OvU#*;$F?v32D;Aa
zWwVWvX~ou*#q$2NE{1r@s~AJM5>K#ZtNG<MvfdRCp~U)9ei(6oWdGSv(z6o$J#_G4
zR`nNmHn)3Qqm2=y!%>`lrRpgE)+#*WEzSUmULOmHeAT3gkVqS|)`)jl_#=28pC}YP
z-K1E4!r=n|z@y7)KN!v=UJc*${d8Kh=ThT>D1FOBc*Mr%Sh9+%p_Iy4eSOx6UtJpt
zd^*j_V$nH4EmB-D<wM(PLeY1NygHv@{ln23`o12XZN;w)2Cfhq-1=a?-301Du8G9>
znx&z`IrwXdim$b*Du<1b-7v8Xc3Jkg$Hj5w!&m)6FdSKp{yTv|4Wr^bMK^6igwvJm
zlOIW#$+mo16#KMTh#;Ruh4X0ndjx-Gn0sOsl<d@SzcQ@W>Xwwx1DX9r-K8u!d#U!k
z)L!Sexxp}1y4u?EeGt2W4~s~ybc5WsSBO!Q6#T4Lt)NYRyx6`~lyVz*jx?5raZNcw
zfn1M2`C5P@cY)uAqG@1wSU#_?aG_BmzMJ4vSFGV6;8>}Wg+MRNRtQXlwMWNU3+JWU
zTnXN_bu<;7ca^2VW5Rjb-dqvD-$TXn{Py;B?t5E8XLt!R?OJ0hOe-P)Up<eFJ^7W>
z^sBABG2AtA6mrJJmA11w8=+PiC0-B#`j)tGhTfPkpSp2HB3Xo0slgfByD&6*NR#tq
zZG<sCMU*g@8o?TY#1&IzNYM{bR>$T%yt%&(m?w$82=p{6bEv=nnsY#RW;dakHFAnA
zyd;VppX$L_$H{J@2*lP#Dw21WatS?x0$^HFwSso<P(4%6%mIQBK4xzrw*v2bpYej|
zO7w>@ve#Sd$kEqAh)dBZo0BOlQtbqwpgpqd_v9zyd5xpSUv&vb4ZPiTB0n=7jeM_o
zsY|3O!O-24G{I&XckM^|P4cc_?SZH044jBYjItoNoX5D+!ViK*?${qGegpbY2Y?-d
zTO5wfpL1`K8s{YTZpH%HQ2+Rb#f9>zrOyoWGAuWcq&UH~&1E$v4KRpBJyBMOH;#cV
zQRchsT6lNJSeCfS?D&66eE7Qo+eGg5D+a=V6vA*l;p5Xj4eB-UH*J+{pRdNBNt8h$
z;7}_nwJ%aA`fHl{zK-5w!yoLm9Ij*(_er9oa(=(4TFJ)MS#9l+wjD!Q?^8_@P?@wZ
zy9vCEEfweGVe~DNH}(H?CEbvqjwB=a?}h+L;sL$zN7RALD(m!bpv+*Bic`+gB4@Yu
zhQ3IZQ0s^`C4BA{`z=8^s3q#-ak7Smq5T7~8%>5O*8_D*<27at@EnA3>V^C@sOr0e
zgICojW!Wt}s2u1+nwAPvy{}V!&2f-yP9W|;&>bxk<R5xwM^s9k{dgCN2_Fi#0dMLx
zzfb@TBAWzPs;=5go|?;2T?+nYOu#W(RHdT|gs2#@|Aqor!{E^U862she=RMEEKQ$v
zd)5gUSwr5;5L~teq~YhWoZ^i^CC@h1{XG<z&VB=r+NsEA*e}yV`m9~#MP?CRckq#a
zDqE<9X|W)oyZC*y2||jyboDDGFQo8APRA>MF3lb-Y?&SVUCQ}N0mv1V=$tj9ji7t~
zt&M9+^%v+%AK*+QC1r_ycXx&$Y52658smeLDD504wdJ^banpqfIeqZsz8(nYsbFV-
z%V1SiNAd7Q&ijThbFF->Jt_+RteVG;3)cxMz&IT>s@(j~XbEF9eo}{oLWy>42UoER
zxH{<&uanu<eYnU4{&0P91KHQ$QYp<m=Tp~nD8+NK5GUVWyidzay0Y~wYsDAmF2*0#
zQ4Mx0TvkS=Yo(+V{K^0t<tQQL1GyU#$9|U`xdpE7#auy5FfEpuWhAGZ`h>E4vMGa%
zP9blnOHG*)#`B4!DhntEd}v2+YmYaM63f&piT;Uy4wha$KLm*zlU5@b=@c57OQap7
zM*-7W^MMK6<pt9*DQRPDG~sC|M8eLb(0`ojR|X+@B2eJQqr)-ZWsmDVWpzxyT9+h0
zQ|x{6c}3CVYD{Y2LyX+%EcL5b60EpEra~Cqt=i*%<G+ys_bbgc4?B8ZHD>W5Ue?yr
z_M)xfvud(V5N}S1Wc|k#17DDOfuD)o0^etoMU5hJqoH4#eR&;*4l6}R7X14{d+>!C
z`hNm99<fIzK<IL?+xLe4#|04DhY;Uz9R5e+1%vU_)bD5fM8+IBHG`$oXKhrec=vx}
zDPmIx5#!RpD)3|fd$O?xtENeXo1)m?ANxg07NIXKC3Zp9kLh+?&Tpl;gBEaFJE=l}
z135DQYyS5rBR=3$o+{zh>>1w7T4>UP)M)AZsy8Rx{rg;_N9q_Q*2HeIAQRe92rKQg
zn|M=v#XoW!_+XJ!E$eL9Z&**!6)+w{TUGq=KEm>3Ex|H1-5-E-)OydzwzH|ORbc}_
z0vPpYE^t7(=HJU7U4TU8yvM}I?vkUs=6tkvx?ldCCWDZ|BIwK8mn&H+&fWW65{J0k
zo2CSU7EE!#Nl<+9d~;|YGB&D2=_mQgP6vNiTKktRaRVEpRuCR16n!pV65T=A;x*hl
zIT|PbYT!lo_yLNtihON%!}Q;q4&N+l{tVX&-v?<^vbzvP2=p{zJdocfY;<)FCjY;Y
z8f~(Iy@gZQE&bD!D?1^_RLj5)wFE$dJNdPne@A3^)~JytkX3V%B9wZW>~=e~1`pKf
z6@@C!6%KD`I&qx;&WaAmcAvAM;AYulSf@CG&9T^Ct{`9M#ixdI{9E6m`uzu#G$5G*
z;*vvLT}f1MvBQuaG7PLt_vW7<;8eQ57Wjt224TGcU|`~!_G3<hEir-o6s%**hgGw{
z$dxnFPJCU@R=vLPT&-esPX65ae~)(c0&#n{9XsIGTG(iW(z>Ot)>9GzdD9r&jxt^s
z4gU_(=suc44FU$rU8|DzBg97IirO7kO+;Ev^QkkJNlMyDA?piOioYGUb=V_WUh0ks
zEVNfh(W{a_)n~mUTn%9AWiPxH$zWG=Vg4=ltsVPi;ObO;nlbwObOu=KA7fx3hCs+h
zM-QFX)HnR?ZezzDN?jXuY4<dx?N`7<jvNR58-Cm#q-|OE!RIF3IdALAsyT9Z-~Uob
zM03p^Afq?0teE~i-R9ieW{_FHKj${Lun2;Hs-4bkdGG1|xdlP`mIU%$B|<fom@QHk
zYxMBoQSkaqk5KfE$jC3Ueg#5X*eU{@>9^G-$M!|alKL?XFNvrFjuII#Lw3#tOFF5-
z4<Eb_LC=kmA6Ngocz{XAzxpu;6xrj#1)_75(h3s5r1UEhB*^loP2MZnn%FJ@^skXK
za>^J0CldKQky9yHVeA(6D)E%ndHlK*{Tj4QOaG7E=*|q5J{UWLc4?sZl3LFM$8{0}
zpi7&vf9*-(fHqhL;KML!&Yr=_lb#(`0vS`FDz+R6P(%XRnRi0bK|d!7!2N|LFI8*3
z1Y)2vCa%oTfa@cxr?8^@n;5_!k3pJjTrIB#l=hTvLb;eM3e3z^aOk0^GZdd&&+h*_
zi=|J&i?aEBM7-!L)a%Wnfw}m^3P8%9xQXpKRM1&@hFSpYB=pYsQ>;?TQa8~BtE#N6
z5`!+H5bfadEYQK#G0U?KGx07%h>TL-75trG=wmDGF&i@Hn2u*is*|J`H~@<`2V&j;
zH*hVwqdUCq!7;9`x>b<=i6)3$4t6VkXyfWG0@ebDX+ex%CK+{pxsAj`&*0z=;07>3
z_>*!owQ_BEZQhpeEfDNQm+$}GPo)lDqySKz1}XR`K-6EojgQLy1CC~lKI`EJcG9hy
zuCIF4WW%+!RvM8rYmRqC8A5Ym=up@-ZbH18ikizhIaa}6W}T}dM6d1rf1~VQEL(&!
zvnX)g0*qgCeb!U!td~UZ04CR$2YvrNx?wN)sFCNIIF+oqsiL05NdllDN|^$WvbZ|&
z$$IqoaE@DnkWB&mI>hon>qNq}Q=bb>{#6j%d_B{K9PQ%fcI^k^i4;9_@K$s1$Jz-6
zfA9T|%0%e;<ex3M7AbmUR$qc6m|23x>}4cq!&g?x7yoyyM2rSbfO;jU1$hrhX3ghj
zUUlqJM6!i9wkPm5UpAUYHK<1CJUi>>2+~e(EG6V<hx0078nc&zz5f(ZM}hmUxg+QN
z4^|ta3G@6WL)9!J^3@2dk8g%r6N}XqVU`M$oV@xA<|SdQso6B|3}5&mg`#mf%{&Hg
zyCjh^5q!fRYqI2IWy=`Gsg|ioqR}}vY`ZTypbI}XgD#rxWWSUDu=G}k;EfwNxiBJs
z#q0kyd;@ok;7EDGAWPySP>8dXV}@i(QU)<TFm~UStXBL_Jck=dO#Q>FCK~(8Z6Sjt
zW)=|V@Lz90>%GY0^SvMNJz=4)!|&K`<S%}hm(V_dcaqkA5U5vTTo1OtioRF5>hm_}
zQRgl1gzbYmHu{6rqi*G}wr+URvlO1TLTD;Ichh^XrGPQ#UjfXyyvH$?obF?>Dz@`s
z-9*iwP4D#<tG6iy!w+l@8sHuqgH0-yYZhI}ElNv=D}OS&Lws)I6-87Yt}?YCD}ngU
z!QMo*VFaj(y~+~K(*$BS(zW%ow$KX7ismSkkq~R(ee}Br>k|rcn9L8W)YrDBmH+Kk
zzoNjaS>1$!NIPPB)HuCcs)t1L8{Q|)ZyyUq=P(rq9QC)?k1n7NY?vu13FA+2qaYih
z4BFVvor#x2(wSaq0N7fQ>(RW0g!%4#YWe9{`S;D;E3UE*tHu_V@{7;dBDItL?9O!0
z?Yv9yBu$BzD<8|)J`6(#Jq#kvB9^QlPu*RE$>lV6m)7s@pUVuXI->55zv(Ks)b!}A
zwz!O^%EnhM3wQ6&?+MwQzsd>L(3Q^}Um~~G#^|T2LRIUq?*n!xN#7Oic}x*K#pGw7
zGORPT4B+qcuonA@|Dq)UrdIaN7Jo>*$OImBpw3L8l{~m%3biVO=q^Q{IL}}j>VL9T
zp5mokJaI^Z>CoiB6|ZGFiu5ucUG0L&%i-!;+7ft=ivcp1QjY(9YquR&w<jiiw>B0y
z1Tqz{kXN*hK&Z#Fomw4i<?ha-DI9`k|IiF}i{T)u6FToZ=2!87{7gDyQdqaDt5pVF
zCwJ6VF?h7|ZuL=jtZ>H@Il@}w=_BR3Z~csh{5@~o!%&SwsbS07hw_;HfYX4AmVi{W
z9s9GSvc4-e3B3uekpsCeAqeo4^RRiX<t<Xmr<+CIfh<X8ju6LCgonu8bIgfW4a&D1
zbpF2yDOs$^_iWrAG{bhOkxP*epB|=7XC%=9C-f_5ehxD$6X&M}5tCis7zZckb8>2O
z@Iyk5kU51iwUphLrz;}9mE;wSDO+TsdHvrnu-EsNG~GMQ7P_@<E+uuWm1G7^>1W8G
z{+ZILd=cP<n)5wp@>8Y_mY$FyPk&<b?E|FNi4y0nBn3P4>27Oln+QS#yRR6~r7<q$
zno+9(5t`jV<i9=XR1_K^AFw%_`|jNg9X1VF^TV&1jtkV#$a&N+<R_%QJIGBQk|*e-
zRY?x`GgB;)wddKK8`BOPw^39|K&(3dkpAZb_4Bc6>YwqVS?}Hq!PWJJhrAOCw59C!
z*4p=3;-jg)b#SCz?{59<M)a6O{c8Qv>3Y*VRiMtkjY1qi#zCXKwXzQR|6S>ZDwM|T
zkxS$^`pz2<8NY3|!`7>DR;Yg~yVD|N30S<z-2?(a_u4^5g39aEx&bhVm$iEt=@Gar
z^tr3b!qGV<y)y-P@=Iu%7B5j-zqj`u{&2X0rjpPdBL!-<m;b$wI39yLlc5BJNQc-%
zqu-ujn7^7f+BgX*?w+@7V5UN>N0fa0vJ>ZKOy0Z_p|ZbB4zm8IDb(p51aGHTtbToQ
zhONN2xUf)xo)^3D5f=5=LVquIh2#RiLBKZL>b<pMf_?bXkLoyxH5sV=37?KFYDeyd
z-w7X4XC7ZhDd}k3gp|*u)sQo5_NdsgAF&e|)@^KEFDGY^2XmmE4{C#9<#Ol#Efq&7
z6-s-1^}e+)$YMrLF%U<?`T6Aup_!}bYoIfgfA5ede$A#DcumDvw_sPi9o7jOvk(d(
z!e6*KpJWYzB~%dDU8e-|izl?E1%txN?Q;HiGlO&#=!5k_lIO_oY(oeQQTAti^9P}G
zm)S@}=WDXERG_v;n^n^rfD<C(?2j)nM|UV;K~1F|08+JdmEv=L*8s<onu`YY%A<*f
z*#0SCtLi<Vlbl%p;`nDqt7{Jk<2B(IK}qFSjR=3Qy#MOBMGR<D9SBYmVA%Z(vl-7W
zGYDg12dhyTLtXDRH2{)y<xq)7xCzTZ1H!l_uCAbn)cDtoV04OHA;19}1dco9PT2!=
zH(9sem=thwEW*LxdA1iczeaZxCV%G~!#O!Q-|^9ehL<yKUT4)TtK9iJn}sO`ke5H^
zaGiv$v`0g=O8TrJz+n^(2h5ix@=Va6Tu91c`s<5sqld@W*&~CZJG=<v#@b2!RG^Hy
zf-gEJZ`H&DobYE@M?3ak&zcKH%rtnY&k1O6j)H5MR#fp{tYS=UR1O8j1>Qa&0%`<v
zG>=j;()i^x@?hyaz8OSz)+<|Vn)-jt^~ONRjG5}Q{qMpQ&NEhb!9IYq)Lo-*2?@7>
z-uUsF5(@??eg^wHx0}$HcHCNSxmI?V32Kh|n#+j)5CR256^H>yuv6j{@U|><?_%;*
zsc^+Rd;cQgB1`s2LGqSpn|$6B<*T6O$HmKcUjH#@roa97tpm<QJP_zj@`LR+R(nM!
zP|V3>Es6%+Gbn#YM~6(<e=PQI`a*xQL0^hZP|H!+^jrQL^M1=vcqMXm@$f8oY<~~a
z%s&Tc`vX9#bA)=ak$-oD>pg%*kpKMOkHv{+@7_2YIfVmp0mA@$?B8F~F}h)IKSKpF
zGw;KBK;(b&4p)51Pg)S#)=+wBGE-1PFd!>(bjpT0w>X8b1-bQ3Xn*S;&IW#61TY=Z
z2%J?S2$8@lJ3qI3e9lX;;(c)oGA-N_fKZ0b^oIZYZezey70?a>%onD|1-^)HS_RL0
zLsqRY92`Jq&pk<?f$W*@eLN~iG=4_zL@WmBuCrreU|$+i)qnt$%{Wa5^3+dkpm5`)
z9J9BYZ1Tx}Bj%t0g`CI{fYUveuW!~99u(;}H2XV)rDWipSHQWI1I{<wISI>JqMa`Q
z(akpO3J3H-aWubn(mQ0bZcR)_7@!5&fg~hZ@VVNz)DzmyN;^P1)E1~RKjQaMsDLjt
z0flOZ*u7%Rj?Iq^@&C?9p*lav!~&-<Yz%D3UwX|)4C|Rtp47h+S;SlF#quZk=y;@I
zjVK?zwK0Qs*6X6YKMuKnM(U{tU-|=h`_m-m>vY5BH)w*%g<syhejUktny+e|?!q%x
z#m~Qz8J=2`nIf*(-lrF^jlLmAMNY*wE-<k|IvQ3v5JN}j-LFLNE?q)<`1?8Cg)R8)
zW&~VrtYYQX=aik?O<+zGRtqDHi}U*^d7I00E6Bylb3@g*rLr+vbB^I)zhefBfx_QC
z>}9=fieHpF6rqg4oa~T?kQ4aQpvV)kcY+1|w<B8mM`Y9t_zgbS7gU~pBTRoI%e)L!
zQ{IS*;B^MKYIC(A&inu-g&Bs~-wrSssxgNHr_~W_qB;8lxD@gu!RWPvInc4fD_44+
z2EIbh9Vq?ZgNw8%)%ny^&Gr0^Vj&|^^cEJ~_x^1bt;bmkpk{$Om0Wai1Fql|>O5+^
z1X#uwux8jVgHL!%6_zVn41f;WO9W13!h8gzi1Ppgg^86y719A};V*}Vd5G~__~-Ke
zeUr)X8Pl8XW%si)z~w)$^3(b6xift6NS4yLXoVmC0{IPBM^GB6HV>N}db#JVCHnB;
zL;3p}5RZ;IpYlgJlEV0lgz@66_c#6<;WjAGPQixo_a-i#Aa~#;hLU}SkSIi%u|~H2
zHp6m<Ld}OLW!g@%yO**98Ie(92wWqD*qwsDUEh3njCG+754yCh45p@0b|#T(dRuk1
z)8PtRjZt_M>B`<t94L767jg`jp4WLpmhM>E<(td+-&+PmKT&ageZABs4p}9urVbwb
zbk3Y{{1Sf)*uAPi!$+}vEU@UGMY)nii_Gm?p!~{qy7+{Iux`d4d2miCA84kyfo4e~
z;3fv>(k8B|e2|k`FriYVoBT1|ZzeYhezx=WH-V+h+5~ys*=TOJScS9fw4^cfAV)1l
zDW}eKRw+<=HUEEOQhI7vOZNDqmjyG038aDi0Jz?ki7kFKPq09V<nX+F5odkjzcD??
zx)Y9x1j=oztEdCzV&|tNj>V<bBK|li{Q=PT6gkl<t(SOHHmDjTn7~BtZw^0(+Qurd
z)-uW$e)^<LVQ&5^ICztION)aS=o2}Lq!dvyLOej}*OHfr#NQcecxESGoCn+^pk|>3
z0DY~3zJ-RdB9L><g3O?n2m!xt;_CF0WseMUmmk_n4mKI<K)V50eLK+}Ct#f(a+Sqw
z8D+sZEJc4jWIXwI+y<8zk$~kw+r4G*bt#xt2KaI*>dYW5-P0U)9FBF)TaH*vhH3Li
zRt>^ZCLflDd}T+-N)0H~sA`h&wWXJzE|9~T2N`K`>?*IBV0*9yz~Xks_rLw`>H9F2
z5#<1`abvI|)l@tb)od(D>ehQ97r#g;OCFOzRnHnVit_fuxAo0XIE5PM;(4h;<gA<e
zA>s)iJ6q(`!AiiL|2Dafd&egOV<1_rME96`Sd8Y0ZgdKe<~faw`gHBlq9H@55DKxk
zZTmfDIaD-&3xG(&4`TFJY27F+ClaeeCRML%;4bpK*ge^Nl1sN#{vMp6GeGvp?L!t&
zE0~OtSKHvfHx$3WBVeuPKvtn3MNum|;_m^jT|K(*sY--RLcIm_Vp4l4(j95TYRpCP
zxg6Mt@_d8BuB6Q|5VAvWDiUxWTE~B<brhfCOLc3SWm<+C2ikB4<5j=PeQ0O4VTD|S
zmiwxY6?wj<29r#w{L9To0U6bQ2Mr5}^M$shTOmc03JB%!cWvx9J~2B{+^!g`e)RxE
zZs57w0JO7O3CLz5sx!;uFY?Wx#_nh|-MjY(I4zSR)<0Z<FQIkXP}s|fnIedc`b9?>
z<(So`CVOP5L`5i&mNwSZuW~k!oh*L_0fn@@=@<B&p{g#$3IC?}w~it?1)te4iHlFQ
zC`=E}ZcMG}L3S57ggXW`rTdpzHj>&^&y5`ZnDs72b$oT9;0x80Iadti0zhM1wzhQ7
za~?oO(HVbk8uA0M(3BK={r~P;Ineeg6vTAUbuB(Rdh)wMG@;6LBzgK4#ylhOm*|C_
z>H4TrkiUSmpB*Eh4w#t9xIq8Zt_aD$11of@e?Ub8a3`P`dl^?3qLC{+vJ9OAnstxl
zCVcN#i;fzF+z0)DlR=3PBVl|fPI^#bm-*${#EVF8EI>i``JZQ}D*r#C&H^gRwR`&m
zLrEjup^_4c(xrk(D@dmzC8=}|dIT&G5Rq0wBnG4#MnytO=^jL2q?-Y$Z-eiDeXMoP
zI!BP1=b8Jt?;Y3vT@wyy*Ezxsi%A^D#kK>WFJOyhgCYDHcTxFLGeZ|oE>Yp90niT&
z4j8_;VAv#^^Q0V^hzvs(lv8O8Cx>wgh0vv)X}HmtU+?^p#a17ajygc?q05<lrZ1nI
znBJ9MatD`Iy=aNz!a|xP!xVXDAk)yz!M%BxxG!tmNX@0p<8L8y*;y{tjqH()80JH=
zn;f}c;bT`A@x)bCJl~`a{*alcB|DH-Hq2gLl~po7^YCxZ2jhiU1Q}G2pQBnH@eEz^
zZ}^lyq8~g8)Z>yV5$j#e&S#7w%Y+SQ`9ew-B&XOH)uog_@Yp;sWe<B6)|BqG@kZ4A
zP5L?gQ!bQ2uN1IF!xv_{s%>fH{=8F9hvB7joc=nNqR%oq6}lyx(xWI$B*{u1l;Ws|
zUY0wwsWzSA=C!KLF$AOK$@Zqw+J`F7&ITu*90}IY!#B7>yNQ&7^^vLH4`?ZZCZwLy
z(~LhYojy6S7o9Em<Z)8;F&`u-6K18)jXIk<@+$}a&3`R}SO>iIRczI@tlLG!K;GU?
z9JNYoYQzE&+9fBX`<Nq-(>J_Y;m8+0@hxAt&DqNO?8>0hTB)G;%wGS3z-jb(otBzc
zBHRTlgH2p#l0FViAp{;(DXkj&v<u^|I87AYNSyrJJQn7a8OM1jpPy4pFnJvDF!}nJ
zBC;@{aV_ZD_oDskiZCv3I@$W*cXKfHjz5FibE5*22NS(|%9fY*UC{@w-rigFz}64?
zX*PNn=pm9o^GeaowhEX{ba>cSfuGxntecoMa$d?sGv3%{7PWnms9UGh-LmAk5HNXf
zQi#s~hb-P%?(N92kU~F?c}ISgO4I9%ASg+mGK+B&S{Ry}*YY$1b}>W*&NyU~Gw*EV
zV-6%LQ+3+l-S%7CDA~Ef+GXLo5la=A=ox*TfmOSmaqUCA88a1O4|9mqU;o##(q{IZ
z85xpW7AqEFf&I4_Ct~rM{pDg)k;38ezf)QYOlMpZusoN@{DOZS)Mz{r@vW}uE5OP7
zuB?xCT~<p7tyd`D*9q`lZ24KXraR*+>R<3ewp3qnJUMN9e*fZx|5EOsFD*jO^GNq0
zahD{K^t};-8SapQwi*5uXAfrXq}UI{o5%8!?V^t5KAgr=vL7lUxT0}7#BKrm$jybP
za3V{GtyJW=z|3{%!oqXAW-|kbk~2yV=D*49M*xELzKz4=Z^37WKccQlk?In1d2|{2
zmRTNe9Jy8*eS=qVgw}+!5qEY<zi=LbyKpJSyKPYT3M~hXHgQy8ULG4&ft)*IjE<SJ
zwyKh&YpD9!Gq`{wTGQ+R)<N}0;aZ-To+-@OV8goxyuTFrIJfqU+LAx2oLaiBni2dv
z*_PZR_#V&i2&x{z+y}CnKd|i~)#_)Cf9C2p^@?9_uf(3S%GV|ku)^ybjAjQ+1aEw2
z(>)sb6T4|BHY7Pp-0qWL!jZySk8=y2a(8w=auUuq8~jBS!}m}|qDdBi1w`G+xdkbn
z^yu=qf^v#E)k(FJ#gHI4Ox1pj$sy^|NUM;Oue`|;UP<3go_jjx<=~$bPiv=NpU5Ch
z43zs}nB7OBttpc-wDmW+?P)`gUiq8YiBF3nE053GCC>+vyxA1}P`RLmmbXU<X4iDx
zwCyNt5~^3Qi7EHdH7DI|P(Cfg#1q%HIGh#^WoUuaxsS)sZ{J^7hl*i|ZN0xib?Ims
z<=ZvR%Xhhex|IwSS@!y>pDhX+01om$PCf~tY$?HUI{o?dAfaqcKt(C+ZhK6g1Bsr{
zy9pLgL-UsBtv4nqFi6V*jgt><0-aaN2rQCC!m;#Fm_BYCD33b3L(s-IhuJIpbf^x;
z6M;aXGyDGzo{u$W5bK@OT0(|=fF-de;xJv^vYS+}ky$kWZk+p^iLEwym7M?IWU=ME
zlg-E7`;>>5It@?_4Cwq>SO_?n!tc0>*d5!I{Kq$1q|tC7ErEn$9AK{64)c>aA2Ldi
z?YOG%p-&FyzVNJe1Dhvz-p<WSy&j=8bzX6=n7d>90@27ALyzh;lkp;hw%Th?H#g($
zlLB;JCG2tjle<w|D=B-o?-}MT*FNTPY+&<X^|;Ft9v5~8^O}c@oVmrTX(11d_y9FY
z;qi~2%RzLk#QQR2k0c{|?`r7df^cJ<oJ;>}3$n-RlB&d}RGxuKCb<Np(r8pzS;NP7
zq(L$O4APSyZP348F8It1qAG@|L>|#y>vca!FL&Q91pYfy4<K0L_=GiguQ!zUoYe;S
zfMlPn^pNb}i1#xD#qo^j9r+|bTF3J;g}Yy}4Ka1odw5ht?0S5Y&?9ET84Mj1uO8UQ
zrG_{hfssHkR7FE&WbJ7`5xu3bc&(d*b^rg7jJNP{QJ|FAL-CV%+o?vJKMwgD<suSc
zM36uz?eifxyDv#KOL^^y*R5ujbidD=gFm;Fk8gB&h&(}65V#nyOM!EO^0QVA3@cyH
zkM9?RCgY*1lnxPmrFp(WsxJ4>i6{^qUR;ge-{2wp8*!(qF8<nESx@^*@b|6}L@6Kc
z5F8iIOSUWi*9?CK@9DJKt=F2_114ZIyG`VCzvb7c52PO<e>Wf1OA{Z?cq9|vl9n3h
z-f8^ItbC*@U19o=Z(Yqs-i%r$BzXrP0wUWP-AnODjZUsa*(a+??=*t0&Dt0IpykVc
zSADk-OSoTa(o)X2?+Ey@e@oR{JBN^`t#37$yz<E%KbxF@EGaKLj1{I?@rRFI*)VVO
zTUlrwwQkf^MjCv#3p+rCo>d#DsgiDfK|oEy=ZtpBUZzJ0AqGYab2);r*ej~SPkSY%
z-Oo~9Y<3}~L>QSlWB*KRoiwL}pGr`QlZuJ^h46dWcW>P|i2#NA$7_P<ys`&kGPgFJ
zEXRGU*XnTo-}L9U>!<!t3t&<YJ%Hm<L{4kNwXn{x8mak-nkP%$B|fkI&`zSK_jWN%
zu6_R=g_Gyuwf^{3B#-8n4l}}+mef@k#Sbg46h;f5cT&KFrRT3F%oC8)STm@hNmTT6
z`$QqczlIwVcrvPX79o}4_wEKtjWbXP?odtA%3duYJ*p8HRF|3b3gPQ?%z<ej;t}K~
zv2YeJ*W{=FyOWSr3VCZFz&P_)Q|1?ATI_4rLT(RTmheF@wLMKV(EL16vP+`YmX=hj
zch~##j(eB%Hu^DA+!hgHU^N@TYAYQ17=0J*>XJKlOP%wZLwc~$uM|(Xw;#4EE<kCx
zMDUrC0AC|9SINM{kGUwkep~lO_VgU_*#*Aor>%Ob)2EF{dpEwPcy=QLeYMo%=lLNh
z*4_`C{JDq^^P}5j2HF}d(If9bTp!727D+ZgB0>6?A}~W#9JpG|9=29MeKP?$Xzm=^
zr$64Xl75yi;hbKm8=7i#6qvb1%rR^Ol#%f5==%#MwjH%iP7n5wdtMLgubga=u|odG
z7w;LS6_+D|k~R&zupn%6CwCM?vT1i;$X?p5;P_ZNP$)PCEMR8Ud+a!iY@C0ZXW?hr
z%8PMrW%02Ef>?YdV~jX0w#xsr+1?eLRhWJ~cw#vq8%2it_GhPs!TLm&r4@V0OjAJ%
zt>~Hp!byvJhW#z<wuP|U$F|+mHN{)ut@c(LL!U|6|M5?g#>fhERS#p-3#cDusj9jo
z^0%6I)_zy&Wb5l8uc0_r*?t6FSZ7$hAZK9ZO<>z`12?9NQ6x?lNlIXcoVD?`u-((a
zoj?uMSy`Nd4bSQ|*i)4&1`2DsbRP)|5McGBF4I~2R#v(X5@PRe;qJfjJHQmN+Ah<@
zJO$o`)^pB<y+fxaCokr+uJbF=$IoQI%h<HHFSwN(PRoSvJeqO=HT^9H#c`0_U)Ln2
zqr+KT6Z+pBgRQpKrdOAt;|G9iaeV8+HqLastOZe#HZQ7wm<9f<sSx4|C5h5bOCui}
z`@D=NgS$r`LmzepGdeKR$Ry+$9_gaPp~}p|uJ*GZ$}O%WKVDvVjG_pO7iFNEO12s_
zb5>sZvv4!}GE@md`1D!!9p{e3S2!jar;-zwtduoX#YmTtAL!FdgQ#~tL<aS;Y{0O$
z^uoZ#M1Ef0yg-xmU?Fjr<NLQr+L+RU^2Fu&Wc_i`c+FH|N~GAsu6lw#7>hsO5h`{q
zD7B}ruWTJKLC*LW7qdq)Yia`MCrRj7kttL`r*_EIdfFlDq=)Pi^7i7B19Nik@Fv|Z
z?GAwl>rEMiJ^zCrzx5~O%Tj78&UvdmF5fQLC$a6&z$k|m$>Oe;%E^>GTiMH0n?8-R
z&{bU~ZK#e~5<Xs(-np?PDhl1~6hT}&=PAD`zb*mU!u&U}8(h{TKJSMMr0J?3$?!a+
zT_e6j|KP_I<z$de>TwYu9ih(8oRpi@proeigA~(9J0vw>M1bWF*P{Q#lO|@o^Qyv%
zqisj^zzC^XKDc6jhD|aH9U_BUCb@*@(Rv*puR}OXPb#46j4l9Q2V!bu2n7e(Vhy#9
zAyVAb*_gQZ#fA8$GuB5mCgSjdEa^M$+Q3s3pmP#J#10m**JR(;yN>|%-IuiG$dOgQ
zG~-_>$!GXNA)&kT;%Ow3=bH;5<eztdCH1Q9o|06v1(C%c7~!s}pL{!vxiBySIimoA
zy-4fm7>~a7Co@`Cbj%)I-wEhs)uzlRVI(lmFTo)*B#R$^G#r1jgVkTvK=O1QPuy_Y
zwCE3j(zYH@--DQ9%q6obe@C?Cg61lq>O#&v%TPpJux*LrlmhRs=#k!IpH$=PDNzVI
z)>CyqKNsRzY&)1R^-Y;f5l8?!O!`kShsC9MuE6Z0Yn<)&xiq`dh)9-qAc!h+>Yd1V
z(x(B?c!rok6ltobr~K~2+fR2ILNBbX=qLHvUw=;U>M^z4$E&I##2ql+(!*k)Cz4x|
zR63w5Ub6dpy7=2=^}jsVf{-qfbWVDxkK?6|ujex%-X|#kRWcBw%Hh~8;+|av{@5?o
zw(FA@YacyAM3(zhKDzjA;Fn8(X0*~skAuo{?{i_>Smah$^&DTxz$D|zhL%>*C2$>`
zi$+^gDo}0GJhDmGAAm!qzVledMI2-UKtYMAx{jrb(7Un2TtwX#?q7b5I{3R)OS_Zj
zxqNBr3IATk3SD=sEcp(1r_yssi8jAIXLe7c2|<`Rr=M$cT1we7Jxx3Jk+to&B|M?%
z!k6!AA}-hi!0(G0`;X?2{lE+!jqJ@Cf8!!N<I!9Guo$n_u)<Bb&qfxPD27hmn89;8
zy^!r?@yxq0dmD>?T)3R0Kkh;T9nF?)FZI{=A0s`wIIZtliw#%0#sr@;mdLb4BQiXf
zFWPXeh8ERto4x4?;X2XS#jCb43pl&DDCB2kOqopngm6X@-f*aDmcKlgg!_D7E;n;7
z3_Sk*Pw<%-E#2>wy*H7!OQ@NHVJ=kWg$xzVz{s~H+2eX8^!!P+JMUeTX0dG;dLM=7
zvF!+J68d`RiVl|2kS}X~loSo&Ps!buHwg(5&_;^`y)Nh9{k+1$i7Tp7NPD^*Q~mm{
zgQyE#)#(+668<!tBxBC#`U=&h=bmnk594qZ736qgzIES8JKUK6u8wHDcUOH{&|ap!
zH$V?2%mSU5y$y2Rpnt-7h4uyMasiA*z-&tJZPldcFU*t2{MrUA((<-&5uu7|2+dfJ
z3pS>$K`i|z5Y8bD2$|gd65OzIbW@cYacF}MhRv#`oK^JX2~qqoP4Ehg<D8!wB=PW7
z#wR3*+b%s{4#9Mz^0merV83Celcm^k=8vnsVCq$C8H7EeTEk*Ox~l6(0#k7Q;<Ti%
zR=R!fSpFT=+GjH{Gd11an)a`?{t7-<5%AlP+RQPPay?Zz|6TU#6%*&JStD_{CbBB(
zs^xn8#9pwo9gV^&Mi@6%%~x{a$1I!p!Vl8)v!XDAvajDiY7m5NWE&!=;*D#oOLcL3
zaAgGaE<*8|{lx(X8S$7WO{L`_&M%zG2CVv@SQ_P-6J7D(YQs{~?b<@E>GVJv=L~^6
zfD%$tbaT?(ts?Zt-vbdl2`1FVk=7TRI|LH9t|(C0`uT3&>3DnL@l$2M94WqTyT`<|
ztk?C}8W}Rdd2rI;n*SI}s)AB$a{81M>F&Z<TW$Dl-eOcUSJ$JL^@XiKjdGE<QJ!)3
z>5)kmTcvXiES+bQ&FW(yEFk97uNvB==&I&ME}89p#UCt14jTV0Ew9+Ufl>O*;(5vg
zl8~+l?hrqxWYS9pMn-SAvqOLY@YP$f0nQW(8wZ=j^6drJc;oRirnD8Os_-wR&8&Mq
zz>;2o%~r2$pkd<vFMy-FAo~dX(gtz8zJD8(wN-&LY3{72Wp8?!Ab$5^^IJ{2HgL9^
zL4dpD1mY5naN3<0!%t*7u|DIyf!Q@<cPBX~4b2*KH3-DjZ<TeWOXk_WxNs(tG<=_I
zXuxdG?N<nRQxSz|mP7{A*<ro{=r6q3w#5i+L?vx#j7X^lVl}fHE(181QFPOR9yex+
z(Ji2+lr|;~IuqAckW?-gu<1C5Z9D?z=u=E$+bq<#R)E8kY&NfTfkt`XgWls<NxJOa
zMWywZx5>$v`FShRWW9H;rzK#^^;DP7=}Pa`0@V`t<)Hbk5XJ}82Nx5y=~W<ik6YQ>
zP4}<ut^IdAmcs}2MdJK$WJi?T?sQzcqg2ltxK+BY5$$};j9Mq5WC0-${p2qRN@lMI
z%<;mh=SfWK5qG8U-&JgjZ!8SLDyR6>N3$1{moF(S`b<4<C7F5>-guUeX~!LEO%GsC
z6P?)Au}|1%kIWGKtCb^%z4qclGSZqv2|6eX96=JzL9Bmhf~Hg2%M2S>zz2$8v1^|7
zQeFhOyI!Un9+UO;glDPJ#@JweC}`k|D~2NXcHS=EES)Ge>%P{ZuW)MnjMb72XMK3&
z{^=nmKcJz}OsV;tv1xyF7x;5&1c%xplGi}|92u$C5?Z0TB@F%1C;g2ZdCWjFKrqwM
ztfTm@P|yvt$`roL(?z(gD!3caC6X=nnEeubR!ER`{|+9fHf`)$M*6_U-w=@nrL9n(
zuGAQX`a~I(2OwJVTm4LmJ%%7N9ZghZ6&5Z#cUH36>JswH;A-yKr(NJD9;4t;QCPKq
z$;t(Rb|7IOGV;cg3$sB}-LV%ZDKv_?fr$eX0ZDL*|9KX2^0AZ~8+Rb)VB#2Pm7{%q
zud|1}z$r>N%o7S5SssB<!S8I-*m7&4Aa7(L`h0{c1+}K_W{iIQZTdAffE=T0Rh1TI
zw*#I4q3JW<I{UjZv%S=PALpgJw%StuHCek4U`aAg{d|dd!7jagA2;?D<Ji3+Mx^VO
zJ?u^>52%WQq9XCSE6GR7f;INn-KSFaq9PZ6r^r9BjK19bf`p&$$Wr-2T$@6Dp*I*2
z%i_jZ+t_FvDh#c>qt6CIWB~xmTT>gc9G`Ubv%a$!(Cp9rj#lm>4Tu1#{6btJ6)h<P
zp71ZLEf=qq%7KaVQeDM}-os?>Q*~rP;&E-~#rvHXuDBy#V#p>pnZ5@--f13BH_L|{
zR9p`CZi~1NW*1XmvP#<D6poi8m5JU_*zTF$yUWGHr5fW}Q6J-{Sgv(LKN&JMfN0W(
z?H4?);|KDW>NKHBLAfZgGQx_4nvcux7E4(x!6;IizuWl0bu;+r?j+?qt{ukJtn!N7
znwq~WX}G0{m`Qc56i@kQWPBI1Z+lB^TzY`|Ej}>9eF+Qp^0=^W0X=iG=L>rXAWQyF
z)!Cn^$%TMM4*k^g`2pQ^qT{erAL}?oJtXcF$m^`@yrLIma4#M_|7fF`z7x_QJtN&;
zEKx7QT<pLdmidqBV7URbc?mWQOPrqikFf}V0KqxTdwgalf18KynK}3-Q}A6(0z&2G
zUGI1<>1Kvnx?OQwN{r>ei0$aEeoZF~pcwG2J5L5o-m&etsRAM38~ajI8bQfssBTvb
zbcq8(r<8H++m=0GrywoP<eYmPxk%=+kYdiD7E^feQww-av@uVbq+{0SayTB`r;7}2
z+$IPI+d+AU7p7N(qKd7rT&7z?gfo1q^z6#y#7X~dep%Zp>r1^_Snd-YTla3_*Zs+)
zFH{2~DHmpKCX4SGR7V!D*}1-8r49mQjra2GUIQ*=RMtPwNvl5qJo>_#LdDJMS(j3r
zE$aN%roc7zQ)lu3Fbb|JB;Qy0yr!jkPn9LyIV%t(yLX|qtu7#4NYTx0AAZMAWuxZU
zwDh)AGR#FH$)!S%#ddP34VmbMEqgHju|_l?EgaGHtt=azZ@%;6NH&t)^$34$we^yc
ze?Q7GQEYxIPJ1-w>KqK|DI)EBMK>5XwAnmcF>^MyqoR$;40{6gflcVNb+@?#)G0Lj
zje+6J5w&#rr^X>?8I%mB<kQRF7VG^Aj<1UxHtrqqIlCe;utUS~sK&9s6FM+b`kXUa
zGy5>UeI`lVZ#l&?9y2}dPz+-y-HrR}?MuN<C*g(V2N8SJn7YE<526VO=GRqA96(-=
z@{$X_>P!qu)A${J`)h!r4~AguNUOq{TtPQw!#mVWX4g>D_0u6~-)ZcpPHde=(;TLm
zE~omhv_vh$kLWGMbxF06n$PRplVSsPkuOo7F$t0C^Yx_lo;pn38x?vDiaM&g?e5+7
z#`P)auo*xfh7F8-VHK68wT{#SnV9GVhf_oZ$Y-nI7F6zCe>`uNsQuy33kd9-nkX{G
zD<L0Vb3l(*Jz4k0bPbG%Cb?o0cMeK{0q7f2yRz5l(`AWJutSeGZWLwVZ#-i8))Qh+
z@%>UGuN3!A{e{_6Q;n|I!Zw~d#8})xALuZ98|N<ju(q{s4n(%rfvpwi)jFda&mZxu
zh9#pCA|0q?Y(nXVnfyXQxg`%stwJ*`C`_}{$tLW~Ze42%8x=(rU!heX;ouJc^(AXL
z20lAhx;ecd>L02>OFjjWV_Za^L5A<+cvHpwRR@Me|5NTR)z-F*uzwwsqc@!p79Syx
z>(Y9}%xOh>{*;v{(n{##USJdQkCiB82!rj5VdfzMlT$BXQa-p1Q0zZws>Blw>&~pI
zO&#H%v^8)%VCz=HB#<K|G{%3&xhEjm>5deYf3TEtL>83$GBR{`fB(p<R0Px^^(g`p
zAYNRu?N}?2MSl8X7C34ft!Q-V(?#g^Uhm#me@vPToTp^we?b+UB^|>5iA?t<oAlch
z&zQ&;XO*7Tr0%FT<d->EK6=_DBvsf=s*HfM04Bz<VOV->&~UkP&BxdC&!J2n$8hQe
zo9%bt94dwxpvza77xy?lUY7gxlVRYLSW=t*$hSpFTnN<!Afy4Eom^W~-^X{d@z{kz
zv6x!BHnKGWV_HzpGpji%Jk`hC$Zr+lWEYZiVfNyr1a3^5&gt^UaW)9`+bRKu=FEv!
z_s$P#vkf#5kEh4pa9Q@7ygP-rVgx3n$K;oLM?t`BGTjLJNC{)&0%+%)z-mL>^=3c=
zKG|D7tIKUr6*WpedD_`Emo#6nKW;*!VPHo=v(TV|KkZvF%T6xNpJMbDHUg8)_wXw~
zj<7^;LCC+{RZSX3mxeoF*pY<{NS@vrc7lA3oZcf67UhtA%x&Q<Ru74_$%2U3notDg
z!!FAt-JI@jX%hnjPjqBZ^v;H75%mGX0Jk)u49*Gy6g3AqsH=HQIX9Zgy_M^O4ViSS
zNbNZj+U&WesuC$k6gi3Q)Iq?X0D)pd9{M;>rL0QvTh)9IE+kmyw=*g7*HsJex$5ve
zGyx}(4Yh4X^MvKVHfDzOa*Gb8uIh}1Y*t%-#xADwXZg7D9j>B{b1ROeY5kS)`%Pyx
z1a7IV(RI?T30^z()x#)v)NMf5+=x*1R+&+uDO8Fs$~me&-GlL;P^z=Kok(IQiPH{9
z?2R7$`7?*3@+p3=FJpEdl_V49oW&kJ^LuTr^;aPJz)Yz3cR_hNL8xFwH_L{|B!ED5
zIxCKdc8FI&W@#!SdZV<&Hj=qK__LY0%bQm2xRxAn!@G1~h<$ZrnRmau@BV>M1GT;q
zs$|~Yfk90^a3Zhj`%lrn$_r~Xk_&MHPt^srN+(IXs^e6rHww~Z`MRnZA>2SOFJ<Uk
zw#(vac(Pi{ubw9IryS}B_(iI2Xq6FhClt$t*IaT9`SsHF@6r-^J}o*UB{FpRtqkLD
z-wj~2gL~Y0sz)}2Yn=<XFxQ~08FlY^@Ew>|*r)^RlBly>9>t$8HgjCt_iZdJE?x`w
z;fS>+U7ra0o<rSQbg(@_AzpXW51kyUEPAoIPA!PD&Qv(wEH+E<0PHk;z~Fj1Wv?*{
z`>inEph@y2FIS{*x~E&Dr&}4$-~DMT2X3qcvcQZv!>6RnjQPfnQj$MpD`{Sxjxi>e
zvL@V5eN&X(ZU!@9LHlbhFF~ex6ctGcGbTOyvL}UvWutY(4Y}0AvX^|PiNYuzO~Ryu
zxS03eQyu7kKkb($Wrv409(AehsFq#D)pg0tAFbFO_*oXO<cw{)ZCT(l;aR;Hc?+y(
zFLjE%xtN9bR9)ig&aQb;UU3Kf!NvX35|Y5qne7Y(xX=3@Q?Uwpsq3iErtwNMn;iSZ
zTPF3FZlaLXevCHYeT*)QgCqDM&^77~+62-W^vdJS*<yr!^s4Z!_N79AjZRks1L{B>
z1B2cpEhg)U-FJ_v%@*zk{>)Ij46fvb#9+(*%@q8m0-;{lJUeqd+W!*(Pzk_h{{Dhh
zGGrhqR9wI0+pR&;fsy=3b?M5={RvT*MzX4p9_G+Cy?n|1Os1tp7w<RzM6cqH#b=#k
zOq~<xbLLF7U<Ll{X`pV85s?$fk*~@S!o227)fK<v{kQF|55*&!Cy_y-WRDsn?hO58
zpV^fx{bK6EOc>d^$TzbgT_z56U-BuH8$ao%z{;%=42hM)HuyoB?Bwd|8fc$a5FL`D
z&;_^ZdVa2G+?H54U3X!1ZFz|(6j;wsA-=YQIl;5|HH8&l3`Nu6&<Z_iZzYKpWQ&GC
zET{3SR9u3XQmufCNV2}X1Zdbk;#Dzi_tHcj?yIYq&zYNVeitcPg@5X(>c)S{opd5<
zu2$;lP(9*NPVyXu5MKVlom<-EDyo#uvx}YmeSP~RWW95n3tiP$_<E)mqXeJLK5sq!
zESi6*%-A~E8?kn(%<3JpM{dcVIGTweuODX|v??1fThfd89?a>MV$)=lVZd7bAuT?#
zx$nA8yK;kH^5M(Sg>BkMTh-xBU)-{c*KW#{@o^t_=qrr@{wlN=n)8c+EA`S)6>;h}
zBSMl;zEuXV%}j6u9ETviVF`Trlt;mmTnU)>6JSrH4l%o-^bPpFr@3o+{~%(vE45Y!
z!jQsjDX?}dH3tLw_uL^ncaCP-6IKkW#N|EW!WR{d-ORTS&+v@z?=hgKztJ!)nl((v
z30t(|x4<-@WGdsiSQ0pcRsR8l_X#`jK0g3IN>4E8Uc}Gm7b(s7PnZ!<A#fJ=f4sZW
zf7~g_hvil+zf!k>HZ8Rm9lM{8o+w14$8Cp8c6|Ndhp0d})0=xdV-TC_GcuP5*XWx=
zOu@7%ZX|$dIZzjS;O=J0-?6Y6ZDVO?piUFK-D&Q_IwaF*+&w#d6U|YvbrC;!>hZmA
z7O%GLXM_ouV>oN@IIDf0Qs>>N=uvt04=poUd`XQ{jbEj$y|kmtj!ciV8ds-|8rGcr
z-AwheZ8QkFm+CgU&>yC6)yH`*&)-XpLR3{h?td-d%}ByTqr){wP9=w{ySMFcb;$Pq
z&)tc$0^j-cbx7U+;K7ytS|n*H8kA8E9@mu6L(T&u+2}VeVUdf(v}(7UOLaor(FabY
z4OW^dQ}<8X$n{hl_Yz*ur?@prP=58Wqa{Kd#NBE<M*%ZM%co;}O(Z{kqV&713Uzo<
zqC@SVUq7ehc==*8gPqFcgw+)?azsY|c&6agtbRW$+0MxkJ@?sp$0boD--I$)01>+(
zoK*LxG~PAgyyE+xq6M$MT}xPK^+s8UwE#n9LvOIE%BsSJ749_?>IKV%sU^W@(_4eq
z%u~0H(iz{l-oO7nA$zxBmW_>V-EJ^RC^rAkAdgPF(RS-foZocbUvV+(b*F32^z2OH
zR*T!9A?%eNam*acP<zf6)*K#ALM``)=XkDDam<V!8a_gMU|k^$zf5MU&*13C(9l~j
z`rgcl7crKLWv=&{)2CM6U(L7gO!8iW&1}C7)<|!(Ol9CL3XywHe+->jad^<qX8b|Q
z?9!`UwZ}?%4wtDG8()9q9bIbFC5*K!<}>aOy|p_vkvd=aie6m!`I~YUm&N<Jj%_6=
zx6cc&RCjMM$mQpEpN=Uyi}ev40mW8Z(_AhSIJyfgCAsqRsbR&ts#`{7yv{FZAnw6R
z9_GCkZi9C1#!Wiftg3Sd-*$QV;__m}sGBvh5{wo{Kv9JAyV9qbH|AUj4rVnv`2R-t
zhS1&hnQE{*FLfEdI2wfQ$<|5$SGHGz&#wDqx%T_Ycm&_kKb$&`?s{v+Y{z|=BP3ns
zP$FGGVfwd@`i5h(Y(~RsNNRmY{Nlm9Xh?(5NEN=(y8URc&Wc7V&Qmp4X*E36>u*nN
zy><4l-GcnZgO$OCEGTBew5H4T(#tBUO(6d?gsI}EP1UOehoJJ3FGE;r^~`OKG9*3(
zDU=fCJl(DVEs=p|_1MGi4JK)Jr}Af>ZuE3>B^3Bn0EM2D$jP5yX=h4r%NVoIG-H*V
zP|JGpOxi8Ea?~iHu6JC=_f2whi=X0y%$eU~x01DTK77wF+-lpk-XSX@C3&eu9Oyc7
zR9`{y=63#4Fisxp<cb&W+V1G6yS@2m>cZ#;ui#j##W?Hb>N<0WP{d@6+2+W8z0}g+
zL2s;Fn~tqoiyODE)lqyx_qcOJ``#F?dOiRcHe)bI_A<qM<94qsG#e`c!=_tj)CH3z
z6IF#<0Knw-5Pd+qxT?<OpH-SJBw{uy#K-Pia#-GxD49bk!}+IzT>@^54t^rAs2W_|
z{o#Puo4EU2R7o_2Guaplkb&ShUs*F9t0fkd5qW&-9Gl@9WC{vz4>p6I&#tKP@NXQq
z|8z{)s7q<*9(VBOPhN0dGjk3PJ&k(URb4)>k*!}1$L(G__!zQQEfm&Hz$}YK`y0{r
zA_KLf!vl7ZOx4sM@33I99c>M4+VRJ4#4OE-f52WI4sr?aM)2Q={snQzKJ<C+!!OSg
zdNX+K@!z%*YsC>652p%;@bz-zfh(QV6A4HAvnicyrKz>d)B1KlA61J}j{m_nqHc?a
znB(4*EPmR&RdispNgeAtpPTr;{yRq4^g)a(M{(AB-$v_XzN)*8C2R5xO1N>CzUrnt
zF~hXI#ZH_r%S~VEdO0;Ow^QjU(g)LnhuAminq}L#$?)q_p&90B?juVt)h4&5q+M)@
z#2f17oD63?lTsGOii~bs4VO&~@3q{n_n`7ql=@KMTCG<1w<}P<pGrhH;4=&AArFVF
z`KuE8|I-3Iy!RYwrFBREO{K?}kj_fnL+6wuEU~tT_b6J>LRIE6h)rmK`$Z*lJOdpc
zdaz*aY(ez2l?!9kRH}Yk`nzj|@49-!-U_a^M5kUUvBW4~%m4YjhE;rn^k3?O#`&eJ
z2;J)J0`~^#x-{}oM9*W_d@EpIWJd;7W?#AaR64ANT53(v2IblHp?9u)z{F8ai*qFV
z$&$b1!26dztg;X9G3_|#F1WV4)h^8*gUOR20IOy+aemocXX!h>wfO#81mRfmpjDvd
z==JhIPVjo!yKQ8o57IdU2veNxM1;~3-|d6hVO?%s-ZO#ppmhi6v6wg$tOsuoJG5SB
zclFwVnQ#)<>X|oj<oG36JEN2wGOHiqwCkf{mpVUwJ=Bm}YOQ9tk$xyEXYXgy#jwqV
zpVtI9|2_%Sx`pjoDY5+e<CgL3j6$(EwpLW=;8^LFv0p|`ZBMdxo!hnsVc@2hvYEm1
zXIVXmoVwND<5sS<UZYHhuWtQp&g>Fbv$W*D9QipRs^0s@xM(P~Tjc%2wH_a#nBM_4
z-00j@8@wfx6-}nr#dqty>AjP}>Zw-GYHGCa)B8;L<c|i_urKv9(yfYwbsGc)QxKFu
zLH0exh^x-8Cmx4{zt5oZ^6~&ud^JCr{>bGL;@>=0-(Oy$i7+Mu%xn7YLr47G9)EDz
zBS|5ILG!3ZM^d%__KD&jd0BMMAfRn!ro%j(K*@SB>lB8i7dGPJlfVbG$$Nq3-carb
ze-45V6-Hewo|0>PbIfMW=}r|Z>(is5)Z9w@kBzPT?a;rEPb4sHhORUkJRMA&xo<>`
z)3eaLg1Y^<QK%u7Klq+Rd$Zl^^NRO;cG6~od2=h-W%4M~R_5F`h4~cYuHCg<l2iF^
zEqKck?%7|}yp0cv*9|=?8>}g^|F%wkpf6%KQJUNAx%b*5KGUn&>}mWyDX7XgvkOa!
z)(vb*+L%>BxPB%Ua3Y4;Kr~n68$9?wVT+|G<=23B4SicMcjY|EjA`dvp8;Ll-Q4^`
zqJ7j9vcL!^vu>1MCCDG~gvQyU+4Jc6wONQ3$vr?2g|FW9^KV#jKf`BCAWu)46@IQZ
z?Nak40wn6`z=#7yRQ35?DTWxMBkq^boL2Tk<&Xa9l8$THLpdZZ?P;v<l$BRFYF>0+
za%flywT3?X2R2))bBB%WF?EHdP-BR2-8@l-YI?_P@n-<nv{Zi8nr`>hgJ<Z6Jf~yO
zsh+!D=IW5@@q+rd5j;?MY_w^KXy|Y?qNL3+7AaZ!xw8V8f4%7Dm(mk&`IOQZx*i0r
zueXa!TX-98tew$x^#hq0=Nel?H%u;kGkT^=3_9#UqM<U62+!qizDJRLkRNYC2Q3<D
z3$~9ye(j~$uv6@oXOJN`F`XamNmZS;wNdgwtOcCey?i<!71BiT>UOE^b^4;+t%C9j
zlbXEC&yeW^N6#`oqFEI^QP&|Ka=rxXh6Egjy3O`*1Hv$~Lc<ZX`q>;_3CcZZ31%Ti
zUIX1iciB&90v-ouR~K{;|H)+cYfs|am=RsFF!iK1Z4TZgJ&SKcX`{c(a`;u+?B!$b
z019k5=9|aKJlaTRM#dLX-4A}te+%`M?)QCkQb}*y&AV;<5n7m34_1+r+)xnhnm1*?
z8iYK8rVJJ_OW>(`?;e-{gK3%803R#)ZDdj0`Rm5!=Ew8KG=#RQ(@R+sG^m)S#uY@p
z9J7yk>@b6}cEd<u$LEEcKIbH=Qtf<&E5IvCfqL=Bgj9}BvK04FN&U?8#K7$|<X`;8
zVf6Arazo+wuoe>Q{UD9?^^<n7clj1SB33s?8S)5Ord3nVP8#uN8I_>YD;dZzhG*In
zs&m~Sz<G9g@Vq}i<=1M<u^BEXp0QV<cA5fm%eTw6IdFT2?=eJ^VURdrw>l)d3p|_k
zu=qxPz7~jLVOY(#XkiWR=74pJcADu0o8jxeKkJ=6vQdKuQ)0DSW#dd0rZow4F^Z<C
z4cYRe<AhYGk5qu!%bT22HWT!bTfNQ_e%#LtGK5LQLdKZ=a&>YO1Sl(inLpW*#ZjLI
zIlb8=y%F|Dx;Asxx_&)Mf@Q~6&o`5^i-+sd3H_jd0WD2Hi}I3FOy41VKMwQ~2V`mO
z2le`PD6#wMW$VX1RgX5Du4K23A`KZH=yRj1e(tyQ+MS9;VsZnMywKCIvM2DY6^jbv
zD~Y}^Yv;UlG_d<bu)jUUZ+uZcQr+EtRM`Bg9Y!=x7rV76rL}z(tajJ`OuSY^JcRLv
z?XYY%X`8~Us5eXpOPb>JSdV#60@@n<ithVnuKUKh6Zf2_uG|(4PV;Bjw}e7h5kxcw
zz#5C!%ns32xVga5y#ZrG9=Cpl-C+>}hR2sRz7o+@OFC8%^3SI188E16!6%l{_;?|w
z;bB@&UoYp$*yR19En!54S%PME7k`@lk;H1Z;Ir_+G+&Oj8i7z~1eXkSIfjhE9mogm
z+(v<w4A(%4UGE3LY5S=#s0kpDaXO4my*cq_+?2%On<}-*6_#0eFyiVNC`k1ABtNS3
z7iN{}as)<<Ts?cnFBBWHecm$p4U8Ni%CC4ikB++-ervQQlR+rs&RR{y|9D8xoDlcE
zK8%R8xMgf?Ttz#X{_?_ZvXa`M0>n>Pgbz~;Rvs796B7>c=`X&AmpsHgSl4dI)(>Y5
zFe_d*D=K<%+NDrn4}FH|f1{tUu%u`gE<J0!4e-FKyfjDOYYyo89Me^#t+woHh*k@W
ztq+M9A?SQcqAEoB1_>k!$`gFc{*88J*YvEla4Ubz*?aeIUlvYvODBMW!^L+1ZbY;I
zm$?NdylTp7_uk)4!#(wMqhNM8d^Nk5>bdu{brur_)e`i<61ZQVw~?IupzB>h3XL>1
zx@y*;Y>!&cWgJw@e7{Ap0^3X*H}=PVCMO`xS06kDlXsKAXqMRRgQ?~@+?W?`>=&dD
zFm-45Z^lsyX2;HYs60#sOX+33Ij?}!-v66L4Tw&AOYo50LwZ%p!&o>zK$z@NeZV_z
z^2iKFNn8qT&J)w$%{41~XVj*dCaEW7<!B`&n-|aU1tRh0&P5_>>9ui@3RINHtCQ(X
z3pvxO)0(y&-=5gLypYUh%cb#%J1!EtAWhJ&HVsH%M%Qv_C}Cg01h@6dqs(LFCYWQ2
zfg+S^m3*>0ntct%)ML(1;`lCt)nBpVn80zF4D^!%UVD_UBq$q^5)Y=Df}!#DwDs)d
z)ybjd<>!Qx;%&dt7`C}$)oFK35M6Z@NHIUN^v0lu)!Kf$%l&NO0yUtKDnIz65XnrU
zkl&%8u!4vp)FlxU6l@UjAoHj%-`?E6ZmUhU&%Tp%VKxBxn{P(6vh{b<dqtr|Loq)@
z;is=k@_N4fM46h~53Yg?Hn5M4!p-MdrI`Flucz=J`WTWrP+C8_ni4d=IF8c!Grm9)
zfJv@QYgniQvDV2I3KzC8T}fB)Uq5eSL`x(|pcB@Qd~v~;u!W!-VhV00h~$SK1ve5k
z%i=zmIv>Tkh*_Mi|MO_Bb3UDngCGUy;2&<pST&+HzQ>D%uJ#$|q33Z>SSx`Ja^sqU
z`=y&ct-KK~n-K1Yyjg4z@vkfYMD4tEreQ^%Zdc(G=a0JxA{kMC3tLMj=Ov2fx8GF0
zfkbO?WiBUu;F8Jl0i|#G4k){vlc_K$OXUI$-gh~BWxU@%+o1?bTp5-$4p5#b=YX0s
zCqJ84C3Xr$bVUN@oAlPZVT5)CQmKT<+qBV>{%0t^62s7O!!Tp;P4q_a5yNDQiY<Vj
zN(aQmhbo0UWK`_zX5NFFR^($wbEezHFE7}`=Kq^%9!m`Z_yABliA>SyKaX>r!5B5a
zwHp|*N;a>S*ohI~8zn@B?Gi)n_j41oBb8dzF#V3WYqZTCod@!h!LRFZP>@p+ALf6i
zIjONmLC-Y~WD1u@D=vRxT=O64k)X6Dyiw?&WLnGAuR}tY?uqv?1X<Y0QtK~?EjoT&
z+nS_3jHRdibHG2&hD3sX$AOmUjMjL?eQqpX{dw!RrYn)wKrL1_ud-IvC2U6Mz(n>D
z0T-X{FE8^e#mQlf6@8$x)UNYsPR69Rt<ezl#=-kO;;}Nw)T4_zKoKxB4kdze&-`9s
z<Me|ZzCEMjfr%UYl7A~;{n>UrZGPLUI$fkfzT+xTQ+WIglz#@MFQo_Wr#g52ecB<R
zoRlfL^c*FPAoMzfN)g*PnfsFhJm>ST250}19kXW@nh+h_p8WHLstrt`L{dGY5!NJ8
zi;pS{<s(}yF-Be0TIh0PY3xc$AeSG(4oYTa`Fc}k5&2-cqY&a%CE;5=g?2&fmg`Mt
zs(C_IsNT81m^b@|KZrTI&>z`a`J5pU{4$mA&-vlT8h~T}W?;n8#by7Zhy1GOl-iFX
z<nn{Amn8!OzEvA1ONG7SH|l{e<fwVCHNm{n0F@dBdtT1zbZlCW<oN#>Me$}JG=ynS
zQo>ANd@w@0e3DWH<qRW~5&h3o-7UUX7>=fRve;T7q)5de4!|*sB(&Y*^6grpSVXv_
z2f31RnB45ZE4UldwzHI$uk7L2Y)J%?XAz@_bn%b2fzC@l#CtS46?j&@5}HqOo`oRX
zTn^(?10UaO&Y2kD@88Y36^yN{8iewDdqw$lbG`Xj$Dx3}IaKp+kTTf9;8z?Bz8Vok
z5a@v{{vLFbhh_1C5*NrG{SL^AIJs2K+*skc&f5T~r!zgvmx&6D?QBHM!tA@qaTa3q
zIw1jyMi<~RANS6c&Gs+xAGgR)4bl6t`!%z8l0Wbuhx(=+OhB-!@W-|Nbb0S-tNlyO
z_BUuE$$c9|_-L(7z^$k=6q^KL>7e+3X$~)T<s?|&itz1K+;rI^K$5s<QtW1^P0tCW
z`DJ`VJYCs~c-s0EBi~is2x_4fHrs6^7j4W6MAgNxZ|wCC4vH@<E}S$IM_$R|+|H!E
z5AqF3#`&wu<K7ep43wN~UHW-6B2I{`iV7cHS|WGwgn!Hf3Y{pwUyA~8qWvXM@%Q&n
zgu0|f=I3axQchy#0Dld{Z+A3@@!J}qeiy7j({og%G^yKn*+C;z)}TbQHlhc!hxt4(
z@}|dvlZS8glMmvaEHt*i4UEvmB!{I$!ghB|jMsy6oqN`Qf}XGBd1MfMRRz7c`teS6
z|Jv<S(9v9Z+U<-d0!3^|P#7SX^tzrZFFd6_(aXRHPyY0rSBj^@(^gMRH-%J@W;{%M
zus`OxK|rAiPjjz7h8y+9-m+{fWarh+)mx}aa$gzTHgimvN-;4enSCZ@?|edgeQ#8W
zBBX_9AFMtG{Ma|>b~j=**It@>q8UJ=7^g}^=OnT;)~pf?2eC<2aYMtQEEexh3tZb@
z3)|!;M;L<#!I~V81Eb5{!#f0YvNV2(9b@V3f5Waq{c4~MfiSHb30n_1nixave}x=R
zdS&&}>h1OHnwau!<e_pPo)dT7dpUG}<3+{rhbLB4VK%n+Q?8P||9WndX?4Cp;|mop
zV~>WL+f_H7=?@Qjd8k^&8Acs^bRrN!C|(sZ0__&B7sa1#Uc4M-a_U-%j`?J=QrI4q
zWw{^RXjPJxD2Y>ebKR^{!Lv(4K+3vzi30OA7+rg?J(_O%GJwmXm*qbC(qca&CgX0g
z%q;iOPK(GvJQeSlt9>90$;h|2<44^abvj&pZHf~+i#`Uj+r&Bbeo*|8pZD@uo7f3u
zkBaJeicXit^{<}~id5liKxtx64f6+ZZXXg~g0*ff;FR4hhkC#zjvFrh6+UE$vR9bz
z`0K#@Hxan}*=EIAH2$e?EWrn?9OMSO;_9)UI-?A^k3}@`m5^-FQ^OJe{e9y@%-7xz
zI<<_L4ka1|G7CTGX<dq-WqNDMaCOHjcmHU@XVevsSZq8x93>b;)-J<;1RkrEkHq5r
zId)XYe#IiGWiZLZ8yc}LF80Z<bOn$2S1a!yMySEt4wpZc=80&;YP-6-Ti>iM#1os@
z!CUu5*pmF5mA(=2ICZ){eKaswDIMG1p+w0LfWFU@8hNm+G_R04!`#U7Bk^n-Rs2(S
zy5p}otW5xFI*dNd4L9bBp`6rSIrx+~c;((@4uhI4A7T#?mDsa-iNn$ziwcwbJv%?&
zvhx0CoisedIO?h`r%cWJpN`0z(2M_hyiepL;b6LI;%L8MT)?fI67VAuzWUl^5dcX9
zR4;Nr9e<d1{}T8g<1d#M5wyfJjqAvHEt(&Px%QxOLZ{yku^w3#?W*<+TiFx?JC|!+
z)tUA9{EM3f$G@u8%vItPt%JSs6IwAsYGNhwl-U$5sJ~Y%&CQvfZh2rVb>~@MC;z6h
z!!v@^83Et12kJN@bJwj6PT#S$sk_BhmJDhuXHw~Bem$XJ?g~8C$0hMJ+!P$c!VrKn
zgW4I$F}Gd(cVVC7!n!veP(RIZ92b06A;v&XN<b4%%N3pR(RrlIY2)~=#SlE9ZB88K
zMy=Ck2!N#S8d<tjsSi)v-M1p3I>Qucgo-n11d=faCO88Eg2YTyPnvH@NPoE0*0d;~
zn^z}9!RkB{9(bCkB>4VYeUZ3Cchv)<N6OxKa;5U)Yyv8Ecw$U~8)TeBHK<@<+%4E&
z2U=fhBEaX{9>d37WKUmmV@W$%%5`G?b-H4p6tqA?;=Br9E8(;Vj_b9A2OTXBTR45a
zV)>1%kROV4_l{RG;RB9t#5PD|Eh$~77VYihP#PFlP`3U@`wUEoi}_taVs#^Fzp6d3
zQBNtov9&{KTyO1W&98dy)kjP?i!<5ptKOV9eTeZMv@{lL1S%h5Ra|oKlqTx<oktej
zwvT%kxPm9}=$Z*D#w2KZ@&ze9%V?CxeMlZ=gi3O?biUN_B34x;nW{g!gz|I~@8ROz
zBTy$KO)~u{i?^M0S~<whH{Civ$+B{=m5mccGFnlQBDj%N0Hd<Xx43nliMxH~p+VRw
zUZU^U3ZEUn>J98gE!1oO6<ZW=u=_jMG+Ig(iDb_Hx2k~~bD8v6IWR7DGe3FJEb(kv
zm1Uu=z9HWKBz|@@T$Ss)4X^5L^9Fy|YFMzsI2nM-jFJ;>*KP{PYH<?jKo5a4XrC;e
z1M}Z0`es5s`=P?)S8SEnojvucL?((`Tyb9lP7Q{hHnK|6W_er#VTTafdbU0kT@;Wd
z$8WW7Lh*_EpxgbHXmwlqgxX5^@n7!)YjJl3KHkmRyK9Ne;#3{QP^T<2kZok)W(8_K
zjk*GF(GejyNJD$o57R+S%H+S&zJYo@8oie*MtBSrIVI!!shG-;;ETr}tUl#`@d;*X
zT3qC1FS-kA@-*Jq;X~gf&1;m%A66DdoMwwSJiPy8u)J~mqD6ys0)yn3b>QsT_Yc|&
z^74}$*EY@4GuhX}+YjY-zr7j{OgOI-dEMS9_4WmVo@D&t=}BO%ieV~IqBjD=JI<!6
zXQ7RX$q+3B4!15m^?g}Dx@hwI-zWNax;~U`h!Z1=C$$NlTm_vfNt!@C%K0Xka@9Wj
zHl>3}&Qg>=ZBvlAMA1RqK($}>kX@2pV(s+RA)ikVwxn#EDUm+!wwX2RJBM5|zNI^@
zuoh=k1%23gFNnxgFaDWaq)<JRLVV}uwRh(_&piLOCs#4~z8&+BRn3I>{f!5uNa^4|
z{eczt(~dMf>%X#+H!ri)`+rXlU?ab_e<=`LKz~$J*$l*)cUzxMA~7&7w!8gcO>rsH
zoQrdQ&pE7+Q*V6!XM$3#&etj8ms`{hf-JS(Wjo690oS(~7C755u?TfH+sy9JkmihY
zmXGCqO<BsV-fvX2S2%^-zi85_`?#>-rp-^@xXQr&7r{vx%XZgx9viaT|0l?|zn}h%
ziphs+35(P~)ke23eVP-nKQF@2OJO*08(%B6B&L0EI7?Ne%|3-KQq;B{D*qN~Y+pee
z8_=joC*hfs>SOjE?`7JlygRYig3<#!Ct{e{;kY`KG4P;Zi!hl$eo&tBWLJ9sCGf};
zkp7=BM!f%h-PjFcXjyxAnh=UN^CX8iYY2Y8(Pqk^#?8AFyiSE1!`9*P34_VQHOPJ^
z`~G>u)SEaWGTB((5V2iRUfK-7$AcZ`dCz|R{T=)2tm>@;eB67!k|~K1Yx+``ZDV%+
zz5FM4Pj{3xzRAWKUkgpRe1oT0`;g!C!O&unM(nGQ>kGkG{RPQLc<&AOmOT5ie&FqK
zUYI%oi{$+tubh4@Bou8v-y3*D;=$-)2w?g~s|N+&!oNjm96zX819kXH1n^yd<pGE<
zZ{H0}^wKFTW>U0JFHxPsgAufcndv&$oDQ}Q3aoAAm&yzfY3(%=mw)>Fv~3Un`?HKd
zT-5GRE;)*GQ^E;R+Mx2~`n8}NyzMc8TT%XjE_D;_Thw-fiGDwAp9Hjgx=5l>!qYK+
zZ=u+;<fHqqO7>(5s#^P7hFc~`oo<W2>u%<$lC=}>DbKkw(?ExktjgcWZ?05*c~hTh
ztMh1^$vix~v-D$LKtb58%HaKPu2I*qMT{e20bcK^);1PmKC2gsD0L6l`Z(OeE?`;r
z=3j9WwzK(TYp6UIIGgjnpC_0~k*5{(%~;r;zyfLnPSZG;bi3ts;S)T7Pu3rS!<ao|
z#PP-vdBYS=^@)z_aaM>v2t>ZgN|QShADW0UU&VB6uTLE1*A--D+CHGV>O#uVUetOS
zAvyX!eSLxO1>5EM^nMz2hzpB_4jHYTzma_(*Vm~d?Y@f&Q?D5>jZB8y=Q1SuRQ&QV
zwR0T$(GxS6?NSxg7}a4{)hOknbpEjWPTvmMhe7!lymnQKMf8;wjt`CE)y_zc;?Kov
z5Q5Z^Rk#MM;yEc(ZpQ5U(Xyz^t@%(c#;c9;m0twb4Gm$-tEf>65!p#B^grv~hr1qc
z-F0VVa8bk|<S%kfwl#YErN6gDGo$z=M^9IQ31b!haF#9HUw^J4=#xCN-(xTR?aDUX
z#tJb%Vt*|<bg|y-WKR!Z^&&i*-o0{aKV$O}0aNUuNh670sn@xFKN|uG;r~1m5`%#c
zp#jIrV^`~}@NJR?^9Ubt<ZHz7BV!tzQ>?IiA;Ifyu2xu!X8Q<=^ePz>z}>HrH4-Ti
z7Z?#vgBv#4Zcu~5;^&Nsg;dW`1bg604&LmrF{|ZXbU1Aq(dq<tE~S?4?VprdOkilh
zH-0px86$2ipmg#~miND-=ds^rwK=;g0w)nf?CgX^$c3cgz<NAQd^_L~MZ?aa_>~fV
zeX#mnV}CLH9o(1J5xAmIE}7GBm+G-Kbz`!4;ZGE+tSj|JL-)cxmlMu&5BS<`MPDe5
z2%ELahZMBFKLxKu&AWKlURoAM#yjS0--+SMzW9yyOO{Mjx{x~ro|%c9L_t4geaStz
zKhZd?OlD)$@;CX87gc1z%tdyCf|Pwh#Wb1}lYqY!NHR$gWp0v8wUhI;)#IN1OT@Rk
z500$zzhB!Fo3kzD`}DI#X}dVSOTo$B5`(>gTXhuRoE=P99u0F2OVG4)h~0j5l%MEI
z=|Z^A`IhbKjw45la&Ccq)bG(7fjJg53REwMxe#Im+Zz+g4JA7#ALQAu8b9_{D>U|n
zDWA7FpZw816AA>>B&~{O7G5tK%ZObly-W7K`u+R&Ps(!C8td?qVVTC~XjcyCF>77b
z_BPsNckacWjrQ{L+8hnXhp<VY!5>^+a+KpOxXHEW_oSU%ZB||)VorEmJm2C^{Yt@~
z(_+VQ5lCKA>(dgV^=bl`@9nBtypQ&NJX^m)$Yq-93Pv~9JgFZoPu`M|0n@2hh{=?N
z*(Kv!@!ZNkMSk6ie|+lrv!;ttI{&b1tDi#Ar|aB#Ww%qj|6T9-&A4U8eEdy+(pEl#
z9{n13i%zMei;n*Sl6%Cw|B*;OK8!Yam~HN^s1aLdT8=q+oh<v38>oHCx8LV=?x!KS
zI^Ve-{}Q^G1`A?^_Ib`6Rm^jP99_}d0%uSDJU9Ewam+}Aq;a5qesY;N_@`vjPIVOc
za{RkK<y-Yg+LNFC_pdfHrfW9fli28JKMH=err^o*8zu(uh@6<Nty{8d{>asL_`@T+
zpB4Xpoi*6p7ux#>uNL4BAFV%x8ZU-UMNe&w%x^sK=rLP_(8<dfUi?GU3iY9@#>Rt{
zVQF1mT`NWfBUC%~%E_YNaT}gsbngFY>EAot{MOMxm_i=(v93c_nyu<h2*0{-*JZzz
zqZ}!^e}Ua)Dmj9l{(f&+Fg`nQE~>A1SfU~_u1xCnCM}}pKt__CMv&m-W%&}}NfJP$
zBkTiLR?sH>s!97ML>>12?puU&Y8JNlS#wS<;<g@Kvp1pNO82tE0q?{fPJ~$oNBHXA
zkAK(LO8)=Ydh<Xizc*}r7Hee5nl%!#@B0=hl!WYiSrXazu}u+?z3fXu7;9uVOsObB
zWD7${lXYYp+wdNIzQ5o5$6J3DGtYC*d7g9b`?|0Dy87JNhpu)D>{)VaHUlT^#xG7K
zbI)zS$PvK3?=_jj>1{DNNm7Ok>y6eJ?|x7mgVAY@f5;6j`yUtJC#dk-Of;U}m)on*
zcOzlMN>9)VkY*Vonga(xH+Q+M`RSqvTkvW45JhN-BIKC9Mpi|i-(}Q4P97I}3!9H>
z>05#B7f*h(>o$1ooWevmv3AoVBMlxdF1Tm56|H)NVKl+haj@fbjXdc=y4GeYwux0A
z9vmF}J~-(0<$JlYvhY|mo!q57c1Mqu_l(EOpYq`IU7BQO5G&x&gw6_U?-3I-(bb}Z
zh>X<?=fUlloDm2AM8O}NQFd>XU+!#YF#gE4yY}>@^m2yTI0u|x%m2cZVHX}uJ80Yj
z7LZF~b`*5K9F#a4p3c$;bt`#n7LjF5Y0U~Zr3(iQk(}hYR3el-o2%!Ie1XsV<lDDz
z4;Xfo*7Al{axk($u^ZD$CU7!dw?0I~S+At572xvV!X&q=yP%e(7Pc4JwR^fHtx5dl
zx&RW2EYO-k-U0CgW_}dbRKNScw_hzbGYccz33rtSrk%2nA0i}jed)a}t=Z11;E7o!
z^K;1kugjyMYMoa&x~Z1_don7s1mo6$EF@Vj@ZyW#d|yiHe2oHnfs!Y*r>|P%;kqZD
zNLeun#H6p!mJbcnf~RbusIP}lR}P5vfBZ7f6D3?E_{AVgs^oS{dsWp#G;m3Gc1D<r
zgu@ih2Mr<$E}V=C{^c$WT|2>!#Dh1F_P0Fd2Bwehy~t2C&euHupcxF=l6i$@C<4k&
z;`}0w8-m?mz%)VomY0=2@}GSEu?#p$%_3_cM`VZaAl4wss5M^j*GPMP&I3=(4;%X~
zkDF=($R;?{po4aIe|<MvzX)N0N08Xc)h9*M9jbu}MuO1d^yR3vyO($W$<G>K80|T{
z)EB&;ojgI6SNSRFAb27@<{wYg%(n4w=UH_$p`Z`-^~wVh3ZBqXLfp`ZJ*1%8N#o>(
zQbqAZF!*~&ap=aL;5xOJkk^OB?@PIpk2YCxnnS@fc(#|zh{$26CWSx<-e3b8(aOkz
zIpm4@QvAsUxrNLDnXM#t-yj$-0Gq#8HB))3@^twtMEI{n#1jt1ySXC{j-N#JrF&Z{
z_%OS)S7W_<5N{4UEFwBfRZB@1VSx8P`HWOMuzQp8>hSdtl6dr%g{(L&Uwj=)c)@Cs
zB^oKo6U8v6&>>YYkR)55v}qi0!=8Gr3fPdNVwYz6TNolMG$|%wK+F9b2D>(@?fl{p
zl1tGZarPI}@#|u-<d_YCHt#J~P+_Pn#&STB>otZ8&X@&lr*=2WiG5HbKsUe0d6nCT
zii_goR-l_@_)SaUCrm8V=DbmKSr!Sug00%%ohHz(`zS+&7mMHiB;TQy`5rzHSzCu+
z`}HffEIK4KpKNG&m;oqFyD|1oUiz#TbE9S~T$Qmi)ot}hM*O=ftBlt<ws?!r<d^*9
zE8S<R<8E_!&6IV|yf!kJKz2r)(+nU!GfR72oemAc+kLLBCJ)+Y>Cz||lhz}IZJo;^
zrPJIYQVQgH8M=;FsKxwR$Eq*qwlOMf03xHU=TNb(XZ4l8r<0!`d+S&Qp0tC^`WL!D
zG4lf00M9$ieMKDg*e_a`@I*OFFA*4TP&lMy`I}Y{kAdE-&A2zM9It97H3$5Yp0w16
zP;#2i*@{s<%aq=DDmr7Wr}fvL|MG9P_w8fO36{8bOLtR$306L-G0@3wYnm!r`|Ha+
ztu)r)7yWn)7)!nf;$Xx@z&2Fwt4y!4I98bL0xaCAy9+}l!nv6%pA&?{%)K6t#pYk<
z*BZLHvVHmS)R1z)(bj8rf1sRx?IDeBPL+#I=aI|5#%fr7<lVD5{Z*5Wv_K3?pfR)4
z@nhO{wBgd0-?XHY$7(_r=l6huXuk9R)^0>h)_MPXM5Y-uiBpMtb<CS&2b??Hmj~ek
z+n1sS+X2~*B|rF2Ruv*7#B?XO6I(~Dt5&p&D^y6e3|EgCyxsS3eJyMt^j^H7YE4zc
z(~mtAH(Is2WRqelMDmO)o%QAaWTMALTN3&s)}LDQSp0h%Hg?U*Z_AokG+BxGEYkOY
zph(MOX6+g8!X3WwU6h>#{YG=Nwxy*72dZTGRxzFWEui!HdI2C^RkgOYneB*XnctIY
z+bBOPXB+zQh54P~0mZ1P)YqzMF$F5_OP?G+cH(5uL8ZGB*h?u<OFfDNI}94TZ$)kl
z;2rfJeM)FT1bf-BuuDVT&EHQ;_B#?gc_Gq8B_+{xn$QbTYmVyvmmtxa4&1Y!Uc41P
zem?3<*>BP&p9L^2a22(s;W-dVoXauwzlWRZ$4nGYza#K-;u^s=GL-flh@lH!(viY*
z4p$Jhzh8#dl`%~17qQEazppO8zHgwG^=x@b-Qy!HFXv&?{XZ<>|K_We+U2aihBvf$
ze>+e!NcZ#if72`0HuZMI)XK(csTp{Qk4bo9+xcHs5L!R|I+s5waWrsUPJy@`eYPR&
ztK7n;Pn~M_NESNUN3-{xXD4HD;e9=9`HZ5M+v(Zzv)K2h<k`~OKTjovZ+|kAvuzS9
z*jULAeZc4N(MC%5oZOUGmo{Rx1^3yXA|99q%86SwRt^m8UdtQs9!cyx8gkQbI69mN
z<2jonp7#xhXlc}eRZhD9TppeJkS&kZ-#m!d^))NRi*bFe>FnDmCGK=nRle4}MLq6#
z#cZy}EZ>vFREO{Rq1<#c7|+Oo@8KtQ&)<N#J~=hkD!94!DK?MAre&Jj?NHHEZ&s_O
z^M}f-LBh-io8)&|C&3;1wI{wFN^9bHVnog26M;NGBfzRq9_z!>E?-x&*o~x2u8e9r
zT4}E;xI2$W5rfGEG(up}pF;fJ*yvM*{Ow0oPYrNcezKUWM^!&?|Ge3-P0))-)i`(d
zF>$NKiKm{5!O{b9e<GC#0HbU~a7cpqC)T|<B(jZ_mOjmnGRuZ>WNm8GDS8F`TxpSf
zIAc`#NujevN<Mv;eN0~H53#JUv8$f9@Ha=<_x6{(>OKkVq6yN#VNBOKKvLpUAB|r>
zG>Y!M7RKPKJKLZ!n~$SA5K<P%v#w2<XQ&m2imhYq6k?CMA6#aRj|b51pUoz0Un8m9
zYd!KBP^A)v)R+~mI@~Qggq%ltpLJ?z*Ry((QMeb(hVs@Wo*j9dP}n{oRvri;vVN(_
z`raRaah&bK0FhnKw8O>%cFkti=Dxl^Z~#)z^xY!oSdi4%m1v_1dU;zg*LdE1U-=KK
zSCFvuq~MZ9e#l}u^DO@R?#A?YjR7YD6PB>^NW+7o;Lhr;mk+!DZs64SzZXXTx)4+U
zR!{_Sk6+X*@8<m}l_H9Im*`*-#SalnpT)y_EZ#^^-_kX)^SwBYoBmcm(JsdNMK`&h
zq(8*2i?&;B<NNuKY9Z~_%<=D+<(*|amryDIlzYb8m%r8L@J`?;<S^~1p=&9faY<GI
zmpt@c<EK1<8GAI8<Di4Dxe@P0NJ+?W&yINDeU40?=3(qB$HR(xIrF1d%BW9O^=Bk~
zM~Nc!-0erG(rZ;?BM83e26LZb<B#702X5VGeP1wpNjoU?lX1kuXM3&Gep^x2T6KT3
z$QvzPeq>Od&2c_wCu>g7QrVr_h-lO_kX5FuM5Sni1A&G5ePM~R3*ab)HQxm!1aeUJ
z5QmFm-Z^jE+UvDgGgj4R^pn1hrn~mZJG9UKt~mq9Qfz+bA{Z4UL6Xke{crfi(pPsg
z`-@LjKcA)}3)fv^v0;&5YdVVbN1`J|zb?070(+hIGp`GfjRKIo^j5ZvtH;^*VZsn;
zL<LK1){4yFHJ+&bIvPiCLwUtDR@3^;Yi~N`*~rI~6Ol6767vhc0~wC)O^Mf+dD6Hu
z_Oq#r$G4&`$+-8Gb+#3_`;uggDW?~Hu-O@pPkEz{o@3RyL82b?axE_++;!|`NTUC0
z_vnpnZgt_4PkDSM%is$6M8>jF6Vfg9{I__>@_^!8Y)H3ca*E>k{(x$!*J$;Cqv6c^
zr^}|};Rjt7-%UgdUCA3m@cRb$k@Ab2y|;bd<GG30*xWoDyaJh&5V<m5=Fec|B4VyL
zRK&vbfXQOS>};Vc?I(|;)r(7<-KZ~oH->>6y|CBP&lqlO^E)zj4qA6TSN9)kS9zjh
z!1P~?ZzL~-V*0{qIHt{zbejC?jwir+vDi60@iGL&(ife#)%FVNdn2{6`@IjCtUW<}
zP(~Il%Xue4pfnEDWr%40D<ir^PQI0v57}j^6<8*-h4NdvYcs9HpRvtQ4+C^6`P{?>
z#GUUw+ZQY63(k*>1kZ9^+*4hWj$&+Loq0vvTJgE&p+G&V=<UlcsaVu{W-^JJd8XzD
znDA8Yn`W@$_puzpebEYx`t5(;q*p$9r#H$zld7#+XZ>1T$P*Y>aJ{_Pt5oqfeCQ`)
zTNOw>jO746Rt`u*S0ax{by9zKZ&4YXBpr3<y8qc=Z2TW|z%w;wwBEkJB{v^}rly;@
zzVr9qG9`}W`d*CWgn)z~^|Q&FlQ0JF*!4>&`oS934t}6+PXr|(fCI((t}@LAzq}mf
zbsKonTB^3xpT@mYXXU5p%(3u}(j?wavFW37LLA+D9OA{U*#fA$;8o9mh)Cu)lA%1o
z?|X#GESr9XsvTKKv+K^J?a4xh+X-1oR&7e{gi>Xk5>j7BK<8PcI(!Vg1Xil|7*0~I
z<@t@>qOb1P_qgihVa_ph9<g&ln5P&kE32Ho<u2z&Nev2zwjq>Oo^O$E463yop-!6#
z?}Gp`PDdu+=NX?C4xT6?asXlg6Nd!hT>wW1$}Lsz0%EhLJW<EaV{{8wSEA@{aej|K
z$u{cJdsLvu)l2~SvcQCMrViX+R-Z)o^8B}Q?*lwXKsvsikTjyqe|&%RpYwfJIcAqI
zJ<zzX<oWPJ)!R0Pj4P0v!s<1dH_<+<)21)G5J#aPY?$uzfQSKx)-~6<jl9TUfHi~c
zxUk)j;%BJ+qb&4fU$tC)46qLBFAV~Ivu4#p5$4B>=qQ>I3<Gu$Gim(lO2~cSV*IVI
zRabzi(w&fAIcaMQNT)OyhOI=clNCpGDH7B%78*#u@kuu=iu&MvZ#|k!`tE;`pJ~B9
zAQt821w0_qwnM~cL~I_aI&atb>?$OBy1Kbp)tHNf_x5$zSsp9^Ss(&7&}@{m``<F?
z%Fg@hE+WCSL+2Gp@iXe~n;lDz?k@X9hm-Z$>y<f~F6mn3S`6Z-T4ag&WM0lk8A3qc
z!{py=x9VWu+86pD%cE-cmu*^r_KQxFeaB+4lqw|8EQ%l}FCgEf?%-E<5?3Rqsyypc
z%;-rmBw2g?bXZSfZZa@|QX6f1&tzN5DG>5~GB$+#G(&7KNyumGgpUG0jn!N?lqT`y
zQRc1qh&Z}5iw7<{{VhoTD<=p+N?@#PKHL$sS}p?>9`dY-cNzvCQPIT=FY)A4zjNEF
zk^<O}>}Wz@mFr3AkC!*hyZ<3s=!?zf;z~$6w4JIMNcvO$4SQ!w(s!2UZMU~d4gb6U
za`#GanB(NbCfQlRA$a^D$DP)#5{my}Ef$k(JEM@LZm9aQ%QQPr6asQd<7CO6w1C9n
zV2=BU0!YI^kI4BdT7s|sFh2R&RX#Ub=w!03UxO0$U)@y4@NJ%_$FH7FRh2i41cT^g
zHP1B^IUOxP-ozo|Bg!-<^Iu+b<&wr8AcHAL{6NgOWcc8$;cLdTdSCy8ei{IWo_5v?
zPaGo?+%TRM!xI}C;%}ZlLzhsRc?Gb3F8i=VKWuUl>OY`I$BxEjc9)8h$DQ^A*eN5k
zukxK-*~BD)L`R_3J#s52eKpXlzsf3l`?OpcvSb}CWup<Fg>L)t?qti**>aL10ZQqU
z-k?fPDni;w$_7Fm0b~l($46Q7k@b25+}l|3FR#K<MD#Ny3NK~!u29Kb()BR|K-;5`
zZa%3gPQ>yXQP4y&E|WONvcwtR;|0vn8v|4Maxy14@Wjp~&ZHhMV96>^mPig;I~=Mr
zMqV4D9N<yC^@i^K^}fopLG(c{x&EE5Sfi9~WE34hNVkMDvL-IU-KD7h_PlzSQu(&A
z(F0V|X^~ZtkCC4Q<5s=;r_UT<5$c^C^l{X7KzO;G#(Y4LfFc}bhIC()%2mMKBH@ps
zqY))yLnagh5fnfB6%r$i@jF+aNJ&r9H`k8U#qVFX3f}>xS}hYA{gat~aG^qXkZ=JV
z_khxibYaA#2)VT<=!VI`?zfz85KVn57#9xzeZwFlb#<6L!-EQ%=XlNn-rv#|iC*H2
z>G1-zfIza>uqe6?T7X^8kgq!j<od4a18~8g+R#x4m?s_b>0-5%-i{EE#>j_}&gle5
z`JFXv=k1D9@VK}Yj34ZZ>D-{!44=mCn<35W%j}A;F|(YBMwbGL(7-wjHzkiOfB?wv
zE}pmqP8P=<P*3QFaYOPUpGFipQXIPbxP(;?bU3;LGq`4c=f)#VXS*WLUEn2Vss#JU
zuZe(4@0D3?Jn_a>g5K${FI8||@cf4@$oJ3VZJ0%?S0#NMOX1hJz^QC`)ilM|LuAeP
zC8Yvo(Ay6Br_^>lQS|8if_u(Y8ilubo>D7g_o*VO7DqaKS3#hvVNhdo7Gs0mpWGk+
z(~0C=zz=drbRi=A8}6Ne5I|>C1<JclQ#26Ulf6$T@x<?bi~k}=d{3>jvno12qGr?U
zD)&ci$5_RmQ4HBy6m~N8Pl=4Xs%wqM+!dACCG;16MO?oSbA0FO-@kuR*G{1pJl|ub
z7GR;Q?&91DPX(Cb4MNLMRi#D<=`h7CSigd^uftN<?xTV#xi0Wl2!bGv!um`*0NX-q
zkH=)S4+(@dH8nPdOJww_a%d_f`R!CF?=PSQ>qdS1S7=l|#C2a*EpbxBhKoE;=BoG|
z$r9O4I7C+d0_HMV^OU_Q#ht>vzR9fUkZ53%qzAYMF2t>|nLv<xD@wCd_K}BY_s(-c
zJU$DZ7E}UJU^@T~);v*O(hJ3poZ=&&7OVf}hUW9DCJRFL;p}kFmxZ?-gLbqR4sT%h
z<rmqW_N@8P)UqmkLx4g}{32&^kJoQ>XGK-jN+jL;7a$H&j%xDx7Z`#kRJpkGYE5a*
zdSZs+oO;3gq917%*`|{ebIx97Eqv%6Gq+}Dnzu@!Ul*d&{pBOj7*(W2v7hlJy!maj
z#t(p>+h=DH^Ew2)PdDf@kM$yNW=YLE&;I%1)Za1}nTCu(cA*KrLZe}<&Fn$4x~VSY
z!;Lx(;E1PKCcjBuUGA&E+XX5-Bts50{0RJ!4L<1VlQz2dIhb?yMR6YoK)I+<A+hxP
zWsww~uLyMpg||Lgz8=2zYzr<6nO`~$`&$x#3y}bf@X`A~Z{<}O_-IU|>cYLm0=oC-
zq;ps_$D6lPPWD}D^^s;?0#&i9=63nba1c-&Q9dL8bj{H34$}|j&r!asUb8q2Gm4+!
z8MW9-)OBRtRa$!ha2T9WI@1!<vmUQ>z~tdU>xIeGvea_A8Xe9#2bBA+_WDyBI5zL9
zNXSLiYT-%HK>wEz&bzQyy4!lS$(sH`lve<r2~aSTU=dZpS5G$Gt~;+Of#sSlMogoW
z5N@dckqb`2vmCVf?Yg}4M28MD2yph1Gbvn<0`~5I`u;S!j^kea#53c{0Gu!|7+}Rs
z9V^)(2`0}KF^Wyb#8-^xxj=jUpuO{vpz1UzPJo0N(3<{X%ybaz;qD-5$r%&ZOL1@_
z(ctJ7-!uathvLhxii83SId|u4<pi{^*qy2H)m(P;@GWH%8W$$L$*GaHd_b`FPwlVK
zc5_>5wfLm)ozEe9*Xl`eKVMtC%JQ3-5K~*!Vdee=Q;0y-%6o|{aUTbVkNHk+D2G*v
z2|E^pvBMv|Uc2F8p{_md-0{q}xS99ex-Jg4nb|&AF=FN4U-~`F?9!NIk<rmqCs)Q(
z8zRgcUk|zl*fC{?UxM(6T;I%zy`{?fTMTIa1BUX_v|CE9&$IGq-Km1`#Fpa~%DfEA
zoan$1iO*XS{B}NcxiLNO@nY2eW~N6A8ZOk0TKW-3fSx3d3<F;qu5tY<obi0PX~?6Y
zzQpQlT`ktt6*8XPu%TdGm>1)k$!t76439|40t8e$y^p;tPq*aFbV%qlS+6J^-E}_B
zkU7?LUsem)k3M|kq~3;hiO<L&eZ`|PtSgD>$eARM_;)bOpoGvIwqHEc@}wBCeQ_yi
zr&pebEz2Z!;zs(nmTm$e>)-pQy{}ep*$h&+OAMul0zvztbjFzN4=T|RhJaX_*4dWQ
zYXK}|fntKc*;2<`RO=>`B2*cCi1~-*osN_@XtI2E(p;8Rf1H_a35k(rtem{t+n+UB
z|M7jbL@{A3pCo+q-IKX)m5uLI(NR~>frW){n*V0)2Xe@+OeR|iRM6F%g{JKu`CmW$
zywMmko%+jCJ?3@E5nqM@eqwillzL$XK252zKHjD#`D&qj)iIg7s<^zRbOXCY@C^TZ
zP)J0)Y>qrH-r;BTy`?gk!WuqEVJ&$qTN}aRk_}n-`xi*9<j^jI&;?ji;ToEn)Xi*X
zKtK*UT;{8%41(|(juRa{_CS!<55mo#*Eak6_yDFR(l^I6(E?q!ZOE6IBZf$p2khxR
zYYdv>g6R99vv$u5?>hU@pTA87P9smAC|!VsTeb`-1~Pl`E!6tTJ5G3UblU*B<0L7t
zu29JUwdqX1pf<O@WdmT_2ug(Y?c_QU-4CEI0jg-pl8V;j-bngxq?HH|zYuqdf@Q%a
zp;|tce{6#)&DkPvG!2aM3{mxr4ywJ(4D+@ZS0p<}-TK}rIQ+FMbk{IL%3qw5O&72n
ztCbSAENnXukRP}2_3jQ5h_6y=N3zzkzs)VFwT?6$rT>bpJvqN&??p-1cg8Rf_j;Ue
z0BHd?{hOaOq4MOA`>`d3#$Vk%^aaPBSn5<dTk<8xT72GoYZYSRRbeNU4Be|IV-nfr
z5o!&{7u0R}!I7Yc-ETu_sd9s@R&2rgLa$DsCHz|Ip-!v1ZYskhDB1S3hNZ~P=Pp5R
z!92v6<F8*gd|vd6wkqoqcE7)sR8Z=fB2Eo9MLCPEA#h($ap+PsyXC>brMXj)B^!co
z1G|o>a{!%JIpOfR(i1_cN<hWqM~{;ZVOi(ro>Q3DygdlT6EhyL8t?jsKPMaxfc+f_
ziU<s#>iXmQZUJN_wDd^TpG7Ue&5^?#uVDIG`Db5w!SsB)$YO{qEGLOo`EXuj5ydau
zzFvMdVspJ<k+b{u5FoIKpLaI_zwfM@a<680N^H!@aO{<IP#}S9uD~kz^Io0PFkyPF
zeN*A8(L)y1fV6iGTZ#9aY?mEz5xH-A87#ib9=Ww2C-5^Aq*~WjqZe`>dMVb7Dkm25
zMf@ys!S!{R9e?|}>vV#IS{jwkpQEvByMqcpTnx=*A8tNg==uYo*oESyn$8YD0_0kF
zVH^nC(3O_-gdCAng947mXWH|6xxUw7^E|XPfQqN;JnHIv<BR+r_Y;k)rXI;=+HsW)
z;S_yP52<?I!G&C^lHc7U&H;vDT@2@K+X;vmy6Q`LI-$%Z@vo5Gx-s_;%tb+|%Q--M
z!D07|kFe;GBfhS`<r@at@U*gnmwi1$4K~Q=#Nsj@kmTSU9dds>G}i8#4WetFda%qo
z0DU2johFUw{~>6l2Az2}$tJ+>M!>!st8BRWa?Qt;Pj2_$*#a>p0_H<}cHN{M`}HM+
z@I8}#*cuiOpUs`T6jd9R$~O?`=ci&>wSARrL2Q=V-gs}d!yVWTq`V6v+fmc%<Sy+=
z25dzSz=s4N=G=<bkPHG;kUf>@7wkB?yP-FM_**ZZZUA{Y<fz)rV218JeM!tbgVFqB
z?8SY-Ll64_6#1p&qi&tXaErQ(a40$Z=0Vd%DOT)0Ddr={cYo)7K^*b+;vn=XBXr&W
z!Z2Yc|LMH``?IeO!ra70t$>?JGZyd=Shg&~r4}UKQE5?Z=*4@I9@21t-33(SX_<0D
zOZ*~2MjoFet-<81cVNGb3-+Fi+O%$7l}02H4zB}PMN}u^1FZ~?3rU}Fp5JMD`xzK8
z1kV1Zd%p{)p+5Z&Dca+aNyd;fJW*!96jnjFC_OpTcetFpL=sN+45CI}X)NRuz|l<r
zRdhQLk2GaZ@L=#D^I`ewer6##h~Bm4%S{2-Q_4dr+Z~uAsM*}>TkgheW0LgZYaB*e
zoeHROWi$UvtoFQRy!aPt4Fyv8>C~FeOp21xKo4Iyhz!7tBW=vXsW<u7Y5->J&2jXq
z4s2}dKK=8NwAmAq#od<mkxQBsEFf)_?ixRd#<6e6UO+%@Y09XrPA*qtelUX>-BeA|
zbZR5ey#9|r0rH4-7U{{#yQ%-;b&{@}fxA0naxlXdpMIeu&Sho};F{#?!y*NvygWI&
zlW1$dwj6WK*{ML85(7fcW8_V}xB2vH3*c&ClQ$0;cZeZw|H<(TuV8N>9O`5E@c-ii
zq`!+fS$W>NKVbFn%D-YSay0|*QU*|Cg=64b&n_p<;UaOS4F~fXc38Nz1v>WhyC=w0
zDMOsrf$w#baBT<Su=?Go=t;Cx_xcOKKR}fixf1{JR~{!Pr;R`LjnibragZ`u?EpGH
zsYqaWDIMkI3zq3lA?HaJono?ec9sgL&H+BBFJ>6K-;BL-nt*RP=0hlXtUUpfeg-B{
zKcE0?vGV~RzHfVbd)K1rl-E&rZVXlmEq$IJsEoTs&djgt;{ozm^(E%4lVtdg_H+Me
zutwv3J^)u@TvL8Y%1Rq56!&T@&-a~TV27_*hX3Tr{qu(X!yb+FW#a}4xTEPTkr_yD
zj05nVw^6#4wVFG5AB1OF+ybX}wqkORLgX0IgWN!hKlcFDS?RCt)N7QpezUT(xewlo
zpM2xc<nznMUp80*o&&nH+L^a7ayXup3-}`mE~+@Rp4=X_t;x9yXt3XtJ>%C8iF(0!
zO0g(c!cFPSlGuw4+E})K=wluc7dn}%&w3;0F7&rNj#P)*U>Y@ygKa1rJf(9Cs~S(i
zfITTtM&AO{-{9PMc0uyX=)6Bq6gTjTj|V37V~Z?>Cl__Rv~iqxabwg<N~t1UBKNf@
zj##??y|8FHr8v|{^|x#whtv4p^_`pbp53B*U+_!e&^E8@6fP7hyGEO=`Ejlk`5Cj~
z&7p1{-3ZA4q`0Yl<VEC62p%`0oG%NE*BkFq{Jf_@J9tNgS-{`VuMZVyz4a^CihReC
z!r@2ir5lIdAf2LB_)z5Qq2fidW`HMFnIx`I&yIYRo2;D6=sYNSlFvx5%OuP-Kwf>8
zM+I={5op-C<B3RFL|udZwkn?P={y@+>z9MiZ6m)=&122n{jXRE6a(GpaZ_BF0WAt~
zn9C>cc1@#qinvHXNZ~FIjucj6RbX#sW=*F|rh7z=wF~FcHl$ip7oO}_By?heE<k+%
zf<LElcq^EwE08C{o6#FK(U(X<vyBZT7#onUfg`nzsh5VV<K3je16i1SFg~J1=%@yC
zYrM+JGYtuOL<s+kawm7!*vmhZ%IKZ``~}ynM5n;gOjq^*24v&BA^=6^yGbli=zRE(
ze6|ME|NEBj%qlyB>`Gb#z<*B_5<5+Ctf!crR@O0liU1=3qg?jB$`*5x03e`O5&1as
zQ5GjqVPjYWScG_;huzgtwlVnRaTmm`*^RL06X(a<8XdMOrj*6X-5A|~)TpBC`kd0N
zf<5O@{ss8MxyTJ<f{wxaK&Ybhewpv;B<3;z<AEi$t222TO+hC6%P;qk2!!Y)RH1US
znZ4GCGX}WZQStae>=NdJJR)9XPkfInEs8YJk9^KRHLY8x=jpb)wJ7~A?TwR$*}Z#3
zsocE0AfOS56i8It`<rtufo*F<Fs?O~Z})kY4MaqgvosXi47iDCJJXrild(G1zItH7
z(e2#?a;@vvuV(<}JKcNRl0swVG=AG_rWFtAErj1b=gyqK0d4%HqML7$oa&tAM5op9
zXj;%P<INGcZ3guD>Fv*ekuuv?62d>^BUUFkG@HpXNh@jXosskS5e<RmHVerGqkfg#
z1+=Hfg9n!6<LPfpzfw#4I&bT_HlsRzO}!d>b;SNM?3fwX4C?Ay9NqEYT^|BSEnbUa
z5^_D7u9_qp8Ky}QMyx)m6s0)4X4A4qUn&3stIph{qoitAltd^onzQ{6Kr1vp2yO7(
zj%?X7Nn`ZfG=wM0ByVGGtYp6eO%Qm_624VoclMYim=}cL4XssWii2d>hPcYFo%71|
z^$?yF$B1DDkOP}<rY~WJ=-zYpEt}i(uKgQ%*j`$CT~!pkc`3*6F_4P}SwTTjq@*($
zfa}k!nREX4x;_KDs5ihntZ!y7k0WJv4Fi}L)-Lz96AWCY8)GY9gzJR-x=V|jG7I^-
zU;AFy^*9mhLe2!?Bc|P^?+o!I^M~MP1VtxlGaa1~Oz|Hj4H<1H{L&W>@3jE0y~oAq
z#d9y;A!h8P{%is2Qbm@{SrO5P8)g9Y+!tETMLhA-;lD`i-^T89ZwhU&+sosLAS*tC
za;%+MW5N?36F(SYx3AgDxWaf~w#N<Jg){qDf~G6c1H*(#hd6HoKKN%$TRUNL%LI4a
zL=Fq$UNe7Vu2Nqkh21|7|JXoQnIz}j)X*@mMG*+O4p7BJS5MCfl-Zjq<b*dz#8<^8
zcrEBTItu0J?zBp1Ov~|Ty$p^LICDxzVoflf)^g-bt}i!fxVF+w)?#00B7frC%%<@n
z4s%^M##H*{H-x&!rH34PN_-X|PtA{;r>#98XaJNS^hyZlO=@3tRPgE7=SAD!HGWyg
zGHa)UYgnWP=_VH2f$HKpC56$MCU9No<jLC!g|7LyO1+~sIX?U%CyKR1R-ha~UoMv(
z`~i|k=kI&Oy}_{B=ctXY+{%9Yu+n^6%sz)tm+RT9`wX+XFU#)?(B}Fs!R`1tu{H5B
zS%bfF5g!npR@@KU9TzUWHb|wJYk=2Q`1ZFGJoVdufGs$T;-~HqenCLTz$XdP@7-TD
zhdwgZ&bt>$dwgag@$i9^{%_xKwFYaWl!PU{qYMGvm;*srjLN?c3{Qb41D^P?SYYG&
zo6w#Q*M{=hi8S}dr>3S1LxNmDk*c2fq9Ph7O{y&B0$m;hM(qMPHY6d>SDhzM;E#BT
z_5dVv!W*;5ry=Ll?++7C>ky;#dg;5$BdzTB<@se|zd8(H$VWAdV@sIOq2E9LI0qno
zSG%$MO=5fs;QCge(#{VTNbVS*T-cWbj*h8>@THC)z@b4~$@5m>Z9#Xc!O>B<*5@2Q
z(l_;R9E0x8-L+@ZCgWqN<@;NxbP#(?x}D(k4GfI<3kq_f^&Kf&;9*c)yEywi?qd_6
zmii$OZGQw#PF4%jkxY0o=MLPfj1XQ4HZLQmrSuSkb_$MRK=ai5*vb?_S{qvZw8Obd
z?77Da)$8W~gPmljnh-94C*HysAh$pVcsF=TgYn%kfmD9Y_X6s>pRbuZV#f5go}J~%
zwY5A1{tF1GrInRDCC^(>X;S-%o@w#(KMKH1W8^1){J`26y3k6z@9RIq!!d`Rd51WH
z;=>BG&NenSR<mgCkbzz8y^)A=`k@_5MqJ)v?&Kii==MVKo@mMZ(Gr?_2|e?+ru9b6
z$e)rJxc+_H8}luCAp)O9;eQtv6lq?*<rXDr1R|R`CXUXfP<il9+k|j-!y?E7XnAr~
z1{?cZw7xF8VKy*W>^>mAz0pnOxS36DApq#!HP#)4g+K2|12eCxO|^`fCeQi<G@^GG
zSHPX?yPwLhZcC~r<-hn{5$tUP(8(us9rNg!5rld_qEodZ<A9$2{4Ect80jrx;tIQb
zR2ui3ZkkLfq$zcKwvApvA?}qG7`$BBy`Ynzsq5MwN<f?@#qJLPYG<DRG%qiF=kCH`
zPJfG`N!bx7KhX9X2!+@meC$gsGe2}0qU`7n@b}mMTa=Fv!ABT|gL*7pH;s+nRr8Tx
z91UCzqOyeUY=7kuSpWeMb^()rI)iG{2k%hgm&1#Oj1M0;$3U6@6fbUCO~n&Rnh9Ul
zRze0_C}s`6|FZSM8daKs5cld|Aq}OTjT5de%k2x}lSq6U^AXj0{(hD9?OX2Abcvo1
zA5J?)K8&zz?VLg!jjmaPHaNlyELGofRbf@%a{Evhw-H#*dw3!NF*2P6@JX_27|3Jh
zz{ze}DK0?3%FzSp`8^bn`p!4D6U-2J0}zasW8YLhGDzFT9E7Cu2?XOiNp3>Isg_^x
zeWoR!I_)utyh4MV&y^k$4mAs0H-pDv5&CSxyjkWz6TD!|!X=L?au_)P$4TxyZ4kB@
z9$1e#KFAQcUhwR`${Je|v+Xy_-NjvoFKYg{Ln(OLfyfDcZyGJ>P+^Be{Zow=mq_R|
zXp?*+bd<uU+ZgwXVm9f7+g%+$7X-Ek7R-QV*;g|^3aIdaidEj8Jicg86sfOCQBY%T
zF2dXm^zm?-6jqy9d&sa|`0jiYI?-9K$?qt|C@FJnZ=w;>w~nom_<e_?J3|>Kw$#u}
zn#rIU6n06H3F++|0Il_YW6m-8?c2Y`U4vA<7Rk6*#*sD8L%72_&iI1r`1b_gYNYIe
z=-|YJMLS?XN+;eQQNYpg=ts_?kAj0+2;txs*=DJnw@K6O4tP$UE+VTtZE{Tt&1EC=
zuU51RwUWHNS;uEQ-1VFT{aQV+w`oPXBX~DubD5}B`i$)aZ1zy4>=&XWUikpJKBEF;
zyva|Q&YZiMp&4z)sVkOYrJy0=DOtawM;lLM%rc2{L7SA<(9RzbgwTi1%sNMMw-Z!i
zvtHPl!S4f5p5&xmncvJr&Ff$vJ29<srjWqPtDpVo73_?Tq2(*x09*$Zl;p}A`@&u0
z=gvjtk)+A|$*O^JP)g_>)FvAea#!QBQSoIuBONKk06xjrQ%YI6zD-xIE$25;Wk9XX
z7vxX_c`efQzF6@0e!$E=sx=0+T7oJNWjtf$IGJQTl7}$vBy`k%5vsT&A8M#(x1>tj
zGIBkEt-<HxN@sjeh3fLfs4Vd#lGs;&n?^|WN`}HPhZvivrKJUh#9maZNsX1qGs(x#
zQL7(*g&ACidjici$5QD101v)F(B5l0*AW4s-S$O1ll;Vb@0o?O=mPIw0^K!>fr{~M
zxiw==ff+kw8zpDdM&v~bDiQ;7e$k&m<l;53^$<nvvDi<o{p=dJBy!HC%KF~q(9on|
zsXl~v=yK#wbOGQR5JJKv@QBHUL(}ax+wOg(WB#A_#;X}vC~UmeT!cpag`RYqun4a&
z#gZ{N8@9u%q>8R3Jm3vY_iMt0v7lPXc%pl(u=`6;5b466^~ZhJf6U991En2l>wMpS
zj4ZX;52Vt_1?}%G)znxo2Por-PaKmh<hyfucyS?kbpc*P*@JlBYII$)uLoN#f}>lT
ze&YI78uC@M5p<rfaYpU*CsyfXTWI*Z_x>*-f9PPdISd_1d{KB2Cws!O3)z~<`yY4V
zmyV7KZ)5|^`vz)H;-~K@m?<z_!;deW0rXa`;s+$DsPH79kZx%I+;<fnw=V{@<$rc_
zW#E?1y}r1)NeqrMKIN$56S2mn)6M@-d@I+0cc}bvXG@Dkq;Y*tut4BF)9w2&$DqmD
z$>dLHTwPsTs?q2$kMI~&HQ50xsA-!IOXm8j-x*0m5{m@)7<n!X+3XTbwxXAJBK-Qp
z-n91BI&_7@DygZ<jrGxx5Axu6D+T?%EYU}n+xwyRFbjx~H1`g*7idqt3tDaiC<f9{
z`75~<)U5OGDvdyFpx!pgxf6*3A~Q>GcY?NAHOH2GIO#3Wr2$ZV@FLKn7Pm1}7$(%7
zCvFNpLYl^R8CnA3Setf&75r-?sm3$OUJ=}s8^-&^<Q;!4dzGIX0fD%wB_=`wUvb+M
z++18YHKd$s|1mjuOt+K>1F+j8CV9U2O{<E%iJXl!{jiR^PBZ6b=Yzvj$XxtfO6s`_
zE(r|L9;Jw7{pI|#ZT;Zm`gSOv<pWBa{U;D(=e_L53C!KK!(0VKtbL~~Dcfu5Qrsiq
zbooCiG#1R&W4NJ-FcZ6Bo234%BT6#jR_mgLpK_3w_GXP@pIwFbpt}D}MdutfM1*Mj
z`rrKpHq{LF+m8=ai+`l{L(1njFhG?k5vY>lYz;?vFxImdX3I;wfTsHy5kx^2Owb`O
z=79_RI-EOhE@%amlLeXMm$VK2xN6U?9SpgH{`9P4t*AVo7M0RFLg+Ej7p|TCS~*Tf
zj~SvnyqWF(@JSx6+Rm*0kWAP#P{Gy!l^`8Fkqy(C=Nr$Y!<;e^1sOzw+Lm(T+qWvC
zSgcy9xkxnt4xhz7gLvE=ep@PYV>-_zC-yB&-X(8Q+!@ydLM;Wa!dPouZrg>)OX{Og
zZw|Z=uYnQ{`_R?(ysaSPHou?wPjpRF|K~+iE3-qwvxA}?<*3MUM1Kpd*>j@z_Wn<l
z_B$wG5+%lRkXK3reWUlXW7fmQai|6)?v8(&i2A->6#WNCU}IC0Sq*)CBq6*Ncxo(9
z4-HAZL8FOPhDF!30SCak!CEYWN7%GQtlJfH8N+gCYhgLWt^qU?1{K~80=nW?oQN|A
zf8^&MQHcc^n&cp#Mon<$DB*0urS<78K>5sF8}Y<Bg2u7CK=PA_XgACX^u>od`Szo!
zmdjD~$gpE~QM)|fenlcygW|HJb`<FB49N?C-4<lbUYzP2KNhQ(a)5l+2*d=@8zD>2
ze+c8^A{qDwj7Dqfe%n9~BVw-yu{jRrOJA6ckRS#{NcCD1x{c<vGkGM+;Hg}{tp~&p
z$`(3{X~A=pp0~91Wx*x*UG><M)W%`Rs!Pkah~N-AmB2%j4;Q7%2F|y)2lLV1>aHF;
zV;jX7WJ&hV7k8yUOO41Kg?sq4W^7Dbh5B&Ge=p^bTO{~Xe1`<}xPPx`Q)r<rQtfOc
z$bP~>uPsmOfJ?r*M2@_?JZ+YO_p1ahF!aQO8xa`<Nt|^BYSo~p&?am~-DC@|Q6IYu
z+HZgSL8~(brLH-3BKx_`jkek>tK|3F^%G>jOc%UfSOPd$1&UDO8Unk$jZOM8L~khc
z0(*O82Buu<x(D;BLLJhZe?&ycIPh%JG90=_C$pgj_1P=9AG6FUVD+zhfCf@CLB_*k
z)EEe3gn6yb89C;ezGxM)TPijyU^iq8`-tegymlE+44e2P5V;~Wy1V0~og-#A0^^bJ
zp<R29N^d6=A@d+0HsaS^zdBO59zc}XFW=TNB?m1R5piqn2Okv!{i_-Ubfxr7Y9O~B
zxVyKXQ)rGE{yH|eQ-eC>_6iMn=0Z_^!FN}-c2@FcYO#rl8R;j+GtrW$JuUkYpO)Mt
z%R`_7_BKvQtpCS`|LhVfM8wOl@b)-!bJZ(rs9;}#x*Y_}MZqh<*0f~oe5po7>;avE
zsfodbzJ3+vagyc{#Xt{QoIBXMUU79^1zjbKDO9$>^Do4|>1Tf^xqRd7(^$V<b^bKi
zv0CoHV@4|=1<vt+_QJ_YDe0W`z0@!pJbM|GZjs+#B;6%DW5{^+iC~JP^Ms$QL*u8S
zhSg1M6Ql{2G}7$JJ*#IQFS7FwVGoMPSLuWQ9s8Zx*_vK;J_oyK^I96j_m@OuY*wJ}
z%ZSQCi@{Hm4Gt(~_c7HN%BU=_f*e)Ec7d=kC|5&Sap<G75@gM1cFsB&E)`L5js`0q
zk{)cJDykQ^%IZP(c|jyHj1)-!ro_5m=|O}i`>y^(+2liP8B^Lqj3WTis82?p7IqgO
z)&U^|M7`ylA>9VNm{F8(N36u~9PfMg++l4;f!QXE14t@N!$1h17L|CkkKB(0X}#Gw
zE?n1;jH-lgs+sVYB&aQ5#pR^;A|l8oY_7`v1@Y=yRv`aR$@ZLN&+xF>tMrPX>C{hq
z_Xu(X_o%6K`8!xxL{Z+K5AVNF?4Cs(IszOTQQdz>md#z9bNb0gVTlB+Svpa9?N>C5
zZ6*u|yIVwIKI>y_EwXM<B&|$D-LX?<sQ%3DM;He-jO!vfXbQOUx=~%DyHQv-u;3KN
zutCPGef0}U$o^ZW;E_^k4G<RzqxRTpt5CL9Vi`&Kz-6anTns?WK;3(t9yNTUBz69-
z%qNlRIpnailG35sQa_*WakY<d-SGQ-m>3wX$(Ep|5Ra0Qk}YT=7iWq8=n2*GX#V+x
zw7jHlg2E~Y?^Z(c1Jqw^6IQOn_4`{$@e#Z}d4$6N+*A%`;LetieJtTH5A<_cEpg=f
zjy(f9-|NSDZhEW?#ECw*9`v#8p*OB-E|zNl%5A%D!VSAc{sEy5s*o9rjOyTR{qw`E
zW_tSlWFDB5BUlOWT(cb>AK&q%cP04)HqeFF`esdU-#!POPzIC*h8xGj0H}to^z>MJ
z@W!g=g9m%BVj00J*-=z9M+~W_LK8|MIMCzj$1b9m`db{;>K5n(kO+uXd`}zb-vkv3
zTJsxMjc`*`vR@3CQ`&}PRyJ8}Ll#P$W^bCHsaf;Kz-YdHx0n!ljfZx^&j5mlZt5M}
z(<2oVw}(y(b(WFz6DtCB=lNXdouBoA4IBhbBAsU-sJ%cu5wO#CZ_MYRl%P9|URsRW
zaH;)^7a<cDVs8lH&F>}1HePB6zPBe)X-FaH%vb^1HeJ^eIfBX8{&~a^!F0HTdfVpK
zzl76ee$8y6%3ao!&lolP?=rAf+%`^Yy*+}al-_2m7!&>so47Q~gqwN-|E6~OrRkTV
z^W#ObfeCTIp>xi|0DnM9`GS$L*Loj2Iz~xVi<O8tJeUKumZe09+;M0u`6;tw8?-nb
zx*i*3U(?A^pf=B_mFv-u+I-BShzp<8XSy*Bbo;jtgeofQL~FkSZJwqDpo_l$E6qS{
zHyf2m-f`AF`Sn7$f$G*Bv_;N;U)pE%^&tWjek6g-W>RcVD|Ua(WgwsU0Z7i<Nhdr#
z9o?r*7L)1kh)zWb@*pAI)NEH-@<%8S9oCzTY->i_gzzX?jvKDoT(&|lS}P5lPiF1e
zg8Ky)g7oQCSdFi9blb6{NK@9|!CHeouBl_WuJ=K;uhW0O`=Fde<5LoMr%{XbA+JQ|
ztrXZ=rmx4|kV40QH}L%xO8aF||1_)}WO&{Ypk{+0j*FbmWuMXD;26^<?}IF#mQWFi
z#+X+w5cS2Sx&PMhCdd>-XU$X@b9i|7A+%dsX{Q&JykS^Cu5q<kYR5Y~`*p;*l(#5z
zXi#fS(!>9L-cYvajZvKV8WUL$`ae){lxx+oH<!jfo&RqM<G=}8witl9bu0=tvRCJE
z`v1=?U`xQ_#+~!(>Ay|s4DC$+pgno}R{su%{P(;#Q<@Cu$)C=S2yRd2b_(C;JF}P4
zhkh;oH)Zvird)r?PDA~Bdu9dXLcfmB##Itf_<9HIVP61voTD3o7tqiCy}!5fzln}V
zVd0g(awm^_9E=1RTSE<6fV}$fiQ$E%%YW1DcH<(XPIOR=*9s2G90r5_uRqhxrt;}l
zFa7_sCTlYN1JT(Mdy@>iiJiGJCQHwL)iFCZcY1Wx6s^(v-|lvPxHSGHJhrC<Gdl5A
zRNJvvNt+ug0=fI&g=o9udd%uO$IhjsE6uCrzh1a%Z>iqUdpfH6=bOP05?0#EYlL`e
z=17)|_aL=98OQ7N0+`t#k-!MW0>;mzIIxxZZzBF%n*ZlNGnp(4#2@7wk5sgkcTzSF
zDat#!a?bqsK@?z;x!L>E|C3!6gBSp5;R+zm-?xZ+{oiWtnHSVRU!e+M{4n;-+ne;5
zW8dK81<VXDM5Q4ORT=kzki!7xd-(lSiz{V9{fc)?a(x>fd48fk1DGqkyb0KS{3U=G
zAw7~LTR=2gbzHAHqAlCy0^yRLl@<HPyZ?gdCjhXMW>mhJ6z=g_62bxG$uGtLRq={D
zLN5?PfET93>r;2KO^1(juULKC+@+>7LBq%4J5+02Yc0#w?35Q#0VG)1wqO6(h5vi8
zRJnKfy~rg5mS!gzb}rIEyO2rH97P<KvpGF`J^{6Ql@&q(WQxk2?n*ctmZKoo;hmS-
zSvqb7sdHTWdmsB=kuZRCE3`;S;8d0a%Fb+52WJ3o(XB{WSoA@)Qh9miJxBy<OC5|k
zuGA2e(tAW_MXWWPnpDicKdi-c0;ri8UF~N{(W=+qpg5lf@$l*A9HiB^b=X{>P6jOt
zT(x9tpl4wN?7oc2N;nzedVNN116ViAWqbbr+d~r-L1}*gCL=HJIX*bJ*6dq-N&F>>
z6Ely=M7qARxJ6_OO7S}DA&Yq^`TEBv5$f;*So0Zkmht>S7vDS-7f`D9p3S~*dKH|j
z^?;~L!f{50vXZ>7()hpwSZ%dLy~ENw-0r$REA`%`P4b4mukQvK<|A*Nx)*ybi$t60
z1js1)@x<ULw|y?nC&ez5SJJAGom)4G%r1nn0ZzliNLrZ79rth+bJ<Q1(B>PB#30WG
zO#7RyqMK|ZK~~;ZcYmtc`yzNFWdyJFgGypSY%Zi7bbjA`IeCw)Qz6j|l%rRW9zd3J
zp?3_TxQ2np%y@O34UYMOjOy<|=lV9CwO;C5*EBI8Q=_yd?*k8^^O`H4A9KNKu5!uU
z{Q{G%oA&IQ*Rf9UZwmFlK+m?3Jvhu3czsyb1QuQDcApUrLCf~*dG?dNVjBXwm)aks
z^5I2@ra4R|T3TISF)`Y|0Zt#SG0pf0yDy9Eij06?SHk5llsZ?}?#V!m#4<|d<mA-m
z(CWF-^~&LAtSv;C<)ejlv;W5h5PG2}?ld_HZTt~5mtfWjc9)~EjE54B&9(=IpJGP(
zTmCUk%98=t%?g&!c7Mtq<fp9*;-)SU{rVD+S)k7E+0@h|ZfNUZqRON|3Zmd91G~31
z;*Lp6_&ne1w0>3(3`aPDnpe!8P{S2BH@DwtJ5JWc-2Fg<`Rhqs=yl+;Q$T_Z?0-0%
zbFXZXE6VS}y<Fc2>(p)3q6LOBhAZUn8t@3J`}6A!N(p8jEm2|{so39=T|kOzl}rB9
zq+P5690LM9LF8m>Gsi1MHa?87^Zl`4{~F~61B@7=UHKi$SI<)>++6AiYPV8<hnYMi
z8HJtcZ^;N555tr#hD2><;Wx)zqGAsWOqOn$$bAw~2j{e%;YcF=#cR2gp{kPy+?snx
zeX?pN=U~b3gPqqtEmPF(p!#~3qnklnWdFr^>meD@tA<g84v(2<gm90D#?P#gGWrpA
zLNB%<0Qe)85!6s-ql#lre*UR^gtEm14Pl;Smy5MujnZB({3|h9Fo1mh`c;dKt#!rc
zDEx5p5jU~O0Db`oB!yU+%+RNZZU8Mb_B;L!K)uRngo+~1mYqRKQgla>f-yq<;a5M0
zqq_#Y_Mbi!sjVOW;u=7x%N$vmbVJ4<odUl(ZLpiY@*F0dnwh3YNdOav<S!MWsKV|T
z6#T=|w#v=76VCDujc543c5U^S`{A@jB+PU!yw35+p0U~e^ywN!&G5&T?exFWS4`wD
zRqo3anXVO?i+FHof9sB9pt#86D~?(ekL*;$t(N~-VX`;H%HfG_pnD**3}v8<t6r{F
z_2%%TaHmS<`&<%_u!a8}z}(_pmu5bE{FaaH%e>EFiG!Kups?Mu23nMzkS%XJZQhyO
zQG@OS+S(kH$4V}uwII>;*#h_FNR-EXZZC^A)2#2rQ88zf;obl|>FiC0uj|FuVg+z_
zJlgZCoW*7rwUQy@S2$gYUHM`tZpxGAS^hxW6hIH7pl&ZM9DvyOm!)ctwW555#3sIC
zpNqQZsKPT#gn-L<td0TE)J~xIW2jTP+!v9-Qs}Ydz%K9lbra%i-_=L%8#R%h@HUqw
zmtZ^|zjQB0J8<ozf$XoAFE?bOB-q*ngrVwC56eegVhV|b@M~&k7JKag2f%trJnd3z
z2}&7uL>@Y+*i`I!?S>3PQPY`d%^%;sLD!@PGmsy{U5{S%44Y6J<*SAFwG)m=^O;f%
z9N#5i8;clBrqd-Va943t8>%ntZsyP*zEf%b^lyx`nmnLKj>y#9kC@@(b%Elb8+ZIc
zP--2So}T`KqQJNY5*UOxJC`y5pXptks(g?gF2Gx-%rhuelGN|15!+L+UnXQHxwz>x
zOF9b=yF<8ZJTgP4W+hfb4_ShM@Y@WOfv%rZn`?xF0HAMuUTT*00#q&a;Xd2oE!O0;
zwi!j2mW;3GK9fD+fbr?3HUmXN??IYhlEUp7=Ho6K>?YH>bP{S$Y6bBN@Nv0&%vN#=
zh{@yK(tir(UW9r<6rf)d$4wcKm+ALbX(zti24AuiDT@5OD~Fp3Y$wco_UIR8Jj0F#
z2z?V_>&@4@YgaL9kCrd@xFG)3$6sLI9{CBHwV1|X3Jww^>QXt7xia%M=h`+<4ZtyQ
zbvyUEwS)pg?;2<UdE;@XL{q(bNG;FLR@-_5Hq(-p7R7aJLFRO5<F%9sm~s%l047Jk
zMgUrnZ7QD&Jg3BUPxt+!0IhY-c0#cIA5iU<|1!Tmc2eZqnBDuSD&vM;4z%mbXQd#A
z<^USz;!<3m?-7X_jk_9!a|8#R<4O>1MWbTvQ^FxC+<jnknQ$J}YN^gO##;XzxGi7u
zi+WZyF82-5$-X9`k^P#R#}CF<e-(7T!789E@XEJCdhp2$_ZGeSUugdZWa=Ln_9y}d
z4;zj=o9jz^_C7qpg4*K8d_A<CFozKI=+hXdQsCb4`8Yt2F}Snk0-vazH!K){K+9fq
zis+`=(O+ZAeCMJTpT1bu9clp_W4*M%HOaq3m17McTkB7N%EHd_+xvAW4+B=FWh5DH
z>XC&O*p(8Fkwf@#OTEcR(m7^Nc+i-le4dauN6lqkZpWYZ2~T`l4thR3Wnl3Wa&c4m
zo#!>_QeKiBxd2ku_+MNgB1$;!C6YvlkO+>QzpNnN+tA72z~@`K@i!6Wk@h0O+C&~Y
zdTEn#AV)po@U2^Q%j>;hb9m-iKS~gdi$&oqB7xAMj^~f$hVv%VH0FwvM?e_kR=lE9
zJ=K#CydCJqObEdlHmItXKKCLLyA;Rnt?}a-YVN0zv*q~`OfpPNcz*A3WWR_9_-GXX
zP|Zy|8Y-u{#5;C=wFBFXAQyUFWDnx#S}Na&G(RMEZpWC{hzw-PX%f+ITF+WBjN&sb
z8^SnewLnKk7@&bsRaf18Gk$^B>S>S8EDvrf6cjjv!V9WU7hwU84qqz9xI_LHvH*`K
z9H|rV4;)r->`A6&fDO}KWA&q}UUC7?NLAU8%hqZxQ53r$n8vrT9)cPLvN$BmbLav~
zRZBaQ2y$w;N7AeNcR&+n>85#q3y#8da<t4ij2u&}oA!tPS|MiziW{zGD#Dy|qh#pA
zBIh0du=ncio-XQCAH3VEVt<caEd*(~-LZ7+wqK%b;x~%WM{9>`&Ld&Ubk%KuLQ~@c
zj(j<8wa^_^pkQJBRDzv9f;M<U(M+}1f#fF}-g_kjb<gw4UoQ~g`lS9J%HBJk>iGR1
zKF5}wA_;XULL_@DM@Uv2duC;iV{aiVq_W3RvW^+q>(C(MC^{rNbTZFD<}rRRpU?OE
zyC3&|_x-PRj`w+u>v~?#!Ds1Mm&~JV1vY@!J(V>SVqHWY4+<tGmV#G|Jp!+rg!-5E
zievUh)cJMtOvEMMqCGTe1n>?#IRF_^Z|v+{iNX8Q)H?54Db336**B70&TP*K<N$7F
zORBi9{{b<k1nmcJBs3Io1GpqpltBTj;Xjycy5G4SgRGXboRr6q$4scImbZojgbf@J
z1Gve$50~V}!ODO9RcZMCdB>R<sGdkBb)){5A7UO;%>vq^x{08X`GpCKIaO7Lwu8}d
ziRR0HcsC2XjI6zrBv@VS-U-8;DETzEoy#S%HGp=@&K9D3Sr5e?BisY9igGqp=2%d&
zzi<Kjz_nuKD?0co`s8ZL!cgX}*bwHjy#HFQgtq97qlJIW#H+Tpe|3$(YKtUA*$FjP
z6`^M-xQBPcXqt5qy8Q$8QO`qY419aZBL@1FXr?OAamHLu>?|#fZ#&~6Nr3ARzV}%)
z+&l2>T17cUG(ipOch{};$FXMtnYZ3V3oCb(qvcw^e0;C$<zS>9&don2U_8J#_eWUI
z%gMi;T5a(=Ao<a`zV1Sy(>6fG?1xZ%TKm&@W-Jb|lZ$TEm+r+z!#~zFaFjTZlps@=
zdcAjg3df!Hco296kBTY4@7<heJQ8c?$$E$W*tRi~9p4UoikyUg{bQb&RsK+yhxUM2
zsL*sSEdiTsbHj#W^slg}wvIhc_qwk)y4cdF1^Z7W=^B0CqOC8c^}8P3WRZ<$euDPp
zAohW9=le#+(5Y7?&ZP}E;r3xH+4q|}$G=LU&P|~@tOPAJF1^oDmJT=z1Vr)q(u72Q
z`SPWMX55cEvNJDsd?yQiID^0$nC=HE(kQ&3<ZML<T-eIyd<D`7{NhM5_q;ZaR_4@2
zas8QGQ1Qr|ZUw@z(F97l?4d&`nv~FI$X2j$3^s4D*e{A}kpa@*EEYQ3Xb4ephHml{
z%^_fqy0TO!_3ypNtuy3ExA3>F0sc?V`>f0Zocxrp!XgqD_K7QXpwg{?bQP`o`ElO9
zMt5~-M`5kQ6AG$3t~Y3Y))|AX-Pv*AYgLX8Z?A(PEY%Q9ycPrL4iRUwk2H*N6u+s=
zD&D>)<lxptoI>8Quh|O=gfgevcM7)7CMBAOTwo`9*3CUE){3MC&ZO#)h_~A$Y&x(R
z3N7uKIJEB~7swpox@4E(iVl8`E_i)>MfTge*g&#;6nn=W-H4aczi+Bs_R(_BIUWM5
zo+kFRkVxRmX_t>|!)A#s*u`Ag(S~i+qQl?Cxjg5P05lY)3u(%bn)tpA&6mPkHqv&7
z-Rp*7vjN!rSH}m?d(6N7r76*Y12_R4T#ueXqe%{vzk$PxGSms;1XX5B<6MwYYN<(a
zr9y#aHw(3fwP)>7eF0iE>fLe?^R(b%AOAiN40sD6v<5v&x->5g|6m62F9>uN%N?gu
zwft{*!x;eXSC!{<LfzBXSyX?x3-i_EZW(zXly9x?;n{PUpZR2-q;UIk^Tk47mltAF
z7ayAaO}2%pmfE&tfJ2fyMTT_XE498=T<Jbrq2Kr`B<A}t)A{>XdEh3w6mw<3FV{sQ
zIZTe>7C@u*<{r&y_bk*m?VSYoNVJ*dH@38Y4C0@Y$H)RGc;!uMVqgC3;aHR406za$
zgX8%j*dWo=q@`~nrs07ZO_C#zwh3>UT<xQQyD%S4+Y7zim{b~Z(UU^7b(KI9*Sze}
zUH|7DQ7_}&bpAf6l2CRX=b>rA3`6|4XS!4`-8#t!-a(XfUS%uqV;DyQ>g?7;V8i52
zWSoXLMb5(a@4dUpuwchh5SNca1?ilU9B93|L6)dBn3Udn5oTKb5CIHRYiOaR<54vb
z>#KuLThn5Br&!XuSgrUE%s6Jha;NH>PCQT^=(kj9c0{qIA)<HypzLAL10vMJ>KFWS
zmeVOq#vt5Ctaf#Sr>O7cgBtV@81rvGD1J0FAq(@Njodq#PCQn+PXh_++3|<eX2yrM
z8*z2K81Bh2Ug!*x+{ir(aAt^s{7i`1lc%Y_n^K0hW4qn9^=nn<f||J#FcpIC%tSB-
zYQaT@6Ai1o4EsEdi!s5O#Njb(T4;gy3~8Iz^%=k`QzOLz`H;1GI*cZ&*=Q~kq5X@a
z`5t_XPhpG%>7wi$`SIglv|HD39Z^g!)yv(8N?>*%T=ehlN4TSo@Cd?jYJJ#l-`K#x
zIj<sgHA27lVIjCWi8R+PS_oa+C%gRz5Y|r~@7QT(T~kw4jXv&rJm_<FVk36^5y$mt
zD;hmvHe-Dpl9r&QJRA;2OapBd16c)t5yU*-)~~@LD?~~O1uGr~3xR+}4IP~1CzyK|
zcIrfI=hGB{oRQ@54oPn!cuY{lu%4{Dx(Ia#Fc94T02tNe(>Lr^eh!A5MtN*B-DM3c
z(|u%9z9MnY$<9L*;m-RUJIxXZ%&OkTSJJtOz^H)<gU(An$x(+a2V<%2-^}+OWOWEO
z5>!pt9iNDzO=U@8ff=RTbE0T7*+)u*KF$XwdygOalWNe1G;bD8*kiD6G<25!GI%x%
zB*>cR3;?r`VRD|KCJ<I&fU}(#nygS32In($HJHGo^YUH8@1y&G!=P;(LA?u{IccU?
zdpDnfPRu6LN`bN9+nZx`{FK;4BWN=(&YjMHMA&j)%WtlCsY}t^@Ud&ldFJx7N<c(5
zd6r1a-Jp&$&ZWLGm)FK%1C;&|Tynlo$h`)E7otDJQvO?r#lyD8H$Lrv8n8SG%qmt&
zH7h>6kG}#S8GloM64vca=O(6<CLaxvW*wO^q_XxH7?F1`K`(&dfWK~;rKs@M&G)_B
zLgc_Y?e4w8KU$y&?cyC^()p{<8*-Y?r}^uQN!PSB6M>42dS)^6l@t{UP=6kq`L(1J
zFbiw*TCHmGhrDw>d@qm);cnjj<!XSaI#9|VlFFve1FbtbrUhRhe~_lmeIR*FU;zro
z2hv1O#1JaCt=06WU+KX%-t)He$h*))Qe6G961&=W{;1X@@1#D_m8qSeo`$iT8sNtK
z6|*XRs>e<^J_Q4z510L})Szd-u4tNOZB&otu(W7LC1}3<fN+~kx?S-HmA>j?@BHg<
z^iM@iXYjmq*MW8pS44<S&fdu)(6JlmKQ4TH>$OLMhEgy*AE~*!H-rurnxCq`p{1|C
zTf0QHwwl*Q*?dEbKQ8_X(?j;O8c=}s@3u(F$5|qF#(|rV{iNsDU#BgJ&K*rDJjy4h
zz`sep9gNbIF%gE6Oni!Fvd2wE=1d?j_S#@2=bsLqe7>(cK~a^g%VIr#@2Rd4&6k&@
zXG4*0CuXC<qP6iB(4+TQZzjr4eY}$0;2p4uknlx$L;9nB$4X0;K5yk$%!})gMl#0K
zh5cAQO`Qq^O00uOF`r}h3XAG0h>dREy&9^%F`L0*U|#U(gVry@9-CGa8E+RcR<sZJ
zv^%j^aeOe`fz<89ckJ=L;>#ApXx}N3&rRsdh*^_vFxePX`tV6yrS6}qIm>-nT|Qsf
zQ9>-QFagk5Gx@&q=OcNpKJPq^iO;Kj*DDo0(o38FTGz<K7q|%IO%BMNgTU^HtQroi
zW?2@kP2lo>I_Lc1n$x~W!lLQJArA`_f<$cpsQ4L561+AfR$4(b2pcq1fy)(ZcwvXY
zZZWd@igM~^0_NujLH_MCa38uDNu8APUrM~LA6FFDP)(<A`;$Q|Wfj;;X4!0IoT-06
z_zs9PEsbfo3|J}sR%^4L!6CkVwU-f$;zIsR0)C1-K6smHC+6(eM*lv<TNIqQjLW1b
zzjW8Cc*@gq5QCZ6#etrE?`t#<c>77QJaR0N=<qbRvHRRdkSDILxzrZazGl{ZbX&fV
z+|S<arj4>)Y$(cSl`VgA=&<VWU$jL;`KtOy<tD-#n){EAn-un;ULYJ;%n6&f5QSfs
z<K*w+Zlw$5F=aV|U_wwe@*@VAN~^=Vc4XV=3GB`TxCd)mm!on3z|))2W9MFWLZ(#q
z-CABTza|S5HYdD&C6lr}{+kmKps1OJc(0$4%L!6hG8@b@N+h(E)2Nm25Xh|H%xNAx
zcz~`vU)Wcj82$q~MRUaB@C->ON)SIaGlA-$4gM3JN2$Ce2RPaCj?(<b6v`0dOawm(
zc|CG-p;y{rGU604umT=L1pxogp(mc6HR<ZDy$xm0n0Pv6y%bb3&>m63HK*SvyoJcq
z$OeP<rUR0Kh`d6ty1ulUr~AWo-S2#%w*psc7|X;fBFBu*ex3(E{kPUDfnwUDz>>#Y
z9^ZI<wURBZY6a)~5U>k_Kz#gN{FETJ#rC2yP{OZ6Cf0|*m)!~{T_2XHe8ynT#W{4H
z+I2IDzkn}>-MiZ2Xq|}O(AvlDlhvkCDi5l-czO<#L@Ny5kpudn`!3!|u*vwJfdX*2
zxbP6QoWyZ8ghg(Qv20BkRFk5xI#I7c2V49U3;OVSu=L2JaL5)@5enB_W7J<_wgDT=
z+vgxkB93{9LU@ZRLupI7YH+|@M<0?_)-SV>rG8}#xba-Wntk8XZ^8`p4$qiJsJeSh
z1r^Kvut_7d6W2ACJ0gV43)`AbBgZ(iXE_^YDvMY4VA*#Z`bl6l8V3`Yc4hz@(3(78
zEGt?6)m}Juy=m5pFeS~JrcG0X0ZwPRfP%zlD<L1V{dF8yb`i310a!nM0I;88jDGNe
z*!Vj^8Kduw8gQ3}b%c#M%LMB7?|&aEj{@NKPvaNeVXLPFFGaIVwJxZ|F+Y{D!~(*$
z@Yn5ak7V}%@($Csn^pCS5y?i%A_3(5%vN#QF|Xa^$IJ0R_!I#BlENhflA};8X|x!}
zm<vvE2sXB~8$IzeNWms~ya!lT_ACe2ZU20O_rR8v-|&FYAio?2&^up;rJk2tTuEHm
zPlEX{Uh|@x7e6Za$fSJaYHZ=HW`!3AKE|WeLu)?n34PfUC~Et9{;lz-y&3%YBs0Kk
zrLns~%e#}Fgn{z5gPl!LmoDY1JTB=Q?>M4~W<-#fZm@I8H4O4ntZVKo?o!TXErI+#
zyC|-Xz04b$bvC!IA;K8<3-(TaJSzWSGZ&5Az`9w!E2(basJHi@aL6R{chBx#hTrSo
z@^o5-|CvqrL&gss4k36}G}Vh~X*Gtg+{YYAXpdi~SZUP9)zi$1&jNymjF8eb((z^D
zEkxB3wtz{*@?|HBU2A%At2OGIXpVJ6NcqY{451v@?~9OuY#ka7*Ukb4van3zvD9hA
z&LS9!wc}9h)-yY5kazTY97mCq2jhFif=l|iR(eO-bTvHt=cm{k09!=2vJ<b}<6nNS
zl7zMbOyKU{ANaNWyy)m^dJkg!B%h6tQmXHMzh+b#-65Z}EjOI1U%}d8r5>;pK5gb)
zoSu>gI3);E{$1(F8gOa9ujGAO>RLLPa;%UJfLk?_(4gedw)l3&$`NgId0c+UP1v0-
z7rhghHMMh&6p*Mv=tfm;dO5u0lSaF+*6L_!4fFvj#cTIz#V?AcFIPVpl?Dv4&~;3(
z0}G@umV+yZZWWN|XCsGc>fE^%w2&;_c!NU4#ng+z8aX6cz})^b<u<E53QT}>8yXs>
znO;!-RAw0T#dhdfey=pG4_6G`Czi*ktg=ifnakq8>}@GznyA%6Oo%xf-oX{|3M(Df
z<!{}1Z=V9zcNWlcdXCVEl!re1vgWo6FVz7YmIT-xj;q?Ep+i|p=B}0fU853J4Z9B%
zEoY?R@1hTD8a|&Rf|xBXC~@Yr)`jF5MM%)d++$(MCUjVL6c;<#hlGSwMM1v^XMp^3
z_z#qr>tLieHXn$LZnMb7#;(3w>IdZ3Qcx-!u17t|HJuNOyZxkL<VH-CFLRw8h$25C
zedDi3xeV9aN2?|;f2CFNE6L!h<B5#xyYm6in#Ko~wv;A<^R3ZS-636;AS~zcp&_ag
z`bd4od8{0H7yf9#ul1e`b0FHqJb)`SUoICwLM*Vox9D3c2`@@p<RBx(z+Okb_|w<>
zTZ-^Y-F1FTt=#BC-IY?yWse^|p7OEO(L*%iE^KN6`4N7ERW&mah)CO7-5p!o13yDQ
z8sf9~JU&<^%YBD&<z|flF0?2h7X#<uNw_@&%S206)h+8ba>Oxpt%K(;Q+3_$1d(xI
zKX4$qx8uF=Wofy3>t;(Pxix@)7ih~H9GTh52|7xy<pa6S7u1AU<nl?RWd9rA9Nzrn
z(=5=(O<+n5cZ@*|WSkiU3O}Q_?8~WwU|5Q^np`b*HftdsQAgbaP7*Cx?3pGr27|We
zo${by)W_|C8P%-^oPs}sYfOGH{^K5Sg61>0OoIYrZEe$}c<eZAk=90c4zPEyf2(Z(
zsY7cUKODC)s)bk`vdhSv5Knx|67$+IeQWQJIs1I`bb0#y;Y;;vL_ke`bJ+3r{np84
z5*${ifztZ&ZxLh)W$Bu54rSft*L-OZWkB7`@x}LdUWKGVo7IG^V&)ykWl-FFpR5DZ
z{RaL}r|Bol4Sj|y^=a9lzPAE^bx;v#B$K}+b2f?8N&(7bG<a53DEHY~3lYx|<6epA
zm(;|WfCrjI=nAqn5uI3Ui+|5OBZf1u*ROYn-_UQUpTU&kVjTp96zN*c>z`8x`%I6m
z<o-%e$n4EiWaG2G@`)ia+)_Pd6V)pPOi<Vk*V*tD)F|H#fF?s5XiE<d)`9m1T&2^-
z*CF_gLc_z-6bi(2e(fc{X`7cw4-q;8+&{-ZAFC%G_onwwNRze!Vd*0TRu}Hih~ui&
zlB*+m+*7{|{_1+aJlvkgB|vYOoSUN2)y+*oadKAz1K4q0xedr$_nbB-u`T7(l7#@s
zERB)8H}+L-ll?4YhW_V^!_=imrUaPw=*HN7iwx;FRwhN=cvjZ7Cq~wn=jytT2s)?6
zRSW?yAp>xzSS=3*;L{=Whq8F92cr^%K5Jq#ph{L>?%fRczOuKA`m67KMKp`y4O*Us
z?K}?+sn@$*b?F&6Jd~E*ys=?IB%~9<vZ}2P#9c3~PM|LLdY29#gJ;i>un@SHsC_`Q
z&OOQ(OmI3sVw6_0{7W+N`PU6nSHa3kad1)*r(MRiC7+;I;OE_6gz=avGv$R4#?Qb*
zxr2bL@8F}gpFgFQdi-A&1-MjDlqyExh0HmD07_ay;-Lp1GeY;dBSBU#zzQBThWWhF
z>tknV{C7S&y7mdL?;GqPb+$E`Ct0pT`I9Fe9^Cger0YYZNzg)cMcGO^IvD=B(K2#6
z(iec&qZR%Wl+EIxA+E2BJHPdJ_}!`;pE`Pl8VaIti~!H`VU~{Rv@A-rCtOUW@h6eG
zp0t%s!C$e$RBy*-xbvVDtHYr;Lz1VVyZ>sbkB>xZ7fAGANz?ZWz{dBLfZrec_3KbG
z5@13GQ3?pXPHxCq-Cgt<JWD30-UAM=Tej!so-(>2I)n$JvS!)lN+*V9+WFm~55cYd
z&+YU$GSs_lT&TYo92HE4S6165PHM9Vz%142t7AD_-xsLvDxCb}??qM)$l52lo!)5+
zI)6Lt&tNt?+4D<cFmMGHMY?zFHL%5PyaT;Y{+bLAL}ZYi(HQ3%iDIsv59X3)hXQ!d
z4J0p&DWbdy+(5vUK7q27dk>Ur6e8qOENMxGe)lr|q<|?J4zXi)um4kUuQ>uSZh+H1
zqgitq?)b7P;Zv7Z$rJjXVRU9lIj+HFD}b52+wZP_LDDHggvREbb8AKX6eP4bZWN1J
z6^-vpm=IH9ni?F*w>5pWcVcGa&e9?`TY{ebH*<PZbVNPwkp5cn92wXZya;z=9OdXx
z$DRSr@PTC96(0>&;0L5GDJ3<Np=_0X02%{fSEK`^bTmgw^W{-*NCb;;;PEY*$_g-6
zW@%Q3x8`daAp&k8>yi83(twZ^je>v+p@Nf>vu3z6^&t?T--ECmco-OrCWG~tZrn-R
zCjY-&0ILHwLYQ6S?8t-p|K%2`hkd{MD!^jRuwk0@zM|*db)9FO78h{BH+aOD2uwG2
zIy%@c)9}u@`d~W>(X-UyOco{RE<Ru363lwc)KNpH-u+q|mSwKUP4r=fTNjJUVuy11
zm+)a|MM9r}X-<Bcsp2hwK+<G@@PmtK3z1zFE2J2mU+rA^WO5)^sE#s08IAU29XWh{
zT{t5Y$PB1M>0}gF^*s3o`O4FMipLswzPF8A+4)VPSIYcmpZYR@A`g7HY7+}VPtoD&
z#K07ak`RJr^0Clr+=~jLH{sY4EteE~Gv$YD)10J*VEP6E$7n-NIJQH}#l1+xQ>)O~
z&J(9RFn`d&^tL}D$mcgnh!%_R>RhR%YRDLK8V`C!Ix>{+a5f*Ao{uhy<b*H!AUYz6
z7L|Zg-x42MgFP^+8PaRHh5o4oB%@@tsQC6uG_t<;&$%AEPzp;#wfh}UFy*+Z0=TnR
z5FIasXE#7eqGaCkYHwy-^W%GtYu}obiH^JbsaGq@w;0f5!0pV(UkQb7u4cM12rmyh
zL9{Z&fX{Sd0;OK&l8fB1PT!Nqrx@azXp)2*$-MowC7IbM2FAO~*4_=DDn0-9{+4sj
zIlAbJ*XRBwA{8`2ho(I4{D%!wfR^K}FsDhKSirXQhxUG?P1S6wm&rqlI@DCqMTr~k
zG`m>?6S-?J<#*d8%YbD=7e#6Y;U4^l_@j8QY0nBYfd%VL;dR;TL2|201Al~d`Ce#5
zVr?T<XF_ai?hkX1WVo%`#3;?(m3%;SJt;*yuyv|?8t8ja_q_e1Y>YFYfqn_SyDx%A
z@qN`r_=OY-zyciBnR`O8|0J}#%mvdO4cA6g*7vlmS36}=#;-1N-iHfQW{3?m9z~?P
zyG}``-nR4Ue%PNV*>z$mn0{hz$Mau~-rya_1ae@NU>9k3Sa{13@`0;C``m{ehkTIl
z3V<WR?#yWc^VQsiJb9oSI<p}ZRa<ODTb7UwgGu%lW34J1d?Jqcx#21d)ZmDkn}Fkv
z+=y|bIMBG7`1Z*W2wk(P7Jkk56)fsYUKiec+yHYxO>{5|Vl@c-zT10~DdohsNDw3-
z-44L{Wsm}4Da;0v{*(t+E`wF#{gN-DyzI|8n_GnA50~9f#EBZm>RwT8<@v*>EH>wf
zR=C}A%mM=c8&Zl6aKul{b##GXMV=k6HK)jOY>P9xmD{$l_e)Idf-VxPXw?yotiFrT
zy*~JFnK*+2waaU_)3l3AjX(q8%l0Bi{jYE*V0j7!KQRbTVUoj(fDTH5Wuo)(saC}=
zq}`QX)cG_XW-?*Vch12t;|HmfHacqZXo)=c1dDaXwLzf@2yekbIVHSxNb16>Duy|7
z?<-9SeR7x=fUbVVY2`7*d6MrTq>nr!NRvAtIB_8X^Q5PhCtZZBadU?KOmyhXvd^a}
z2U1pa0mO3bgB^N=o5y(43=ff<a|!@iuAuM!H!Xx>ll^1_oJK?2&i30z7v_hAcH8ru
zy_u$@K6P60wv`oN4+x+Y4wi|hv3JXL@{s&c{_>U2F%X>xfA=G3Ww5g2<@L_LKP7B~
z7m^OaZb@hY$O`n74!9ks`kaD_aU+mXi9VF^3N^Bh-aARve3@sA@Fi4sa*y)J;zb_e
zvSVTgF1PPR00Q$v#5CJJaIoRI_!&iAk}<G!ia>~y?~~8(=w-iM3!f_n#%nvxD#2qJ
zrJx{_u6+M~Q}~|V8}j{Z<c1VtnxokXypek78NoT=%)p(~wQ_RX$!0pylo!_<2zk=?
zRFS%tD0|d|+#vSXf!IvSu|dbsrD(namNdYC>U@jEgDcqyDEn!@U4!W^?tv@{;Hiin
z7s9s4q$~{7kMAk;*q!3}G{dcmCYVNcj$cY>qf*|MQTRRx5P(&X@m!~H$av5%WB2eB
z^pVeqvd;e{TbkZAy)0P&(8bw#0_038Fj@dYn?E45J%=I`X_~FzDQ-YObiU9e=eLXz
zi!-?nXiH`m82z4eN>`VI`a{Hv)wX2xdBLvOKqlb1FsI2!K5nE+{~um27*OeFOpXax
z2nRY1&TZcz8cC5VtP}c7V=g?@{b$%H4;uR>a$R6-bj7VZ_2R2|z6sPe)rV{UmjG(|
zm<=K}fL3rvbcss1Jue3e9sw^Ya^eK=!C}b)c0Xe72&VgFnND;=xfe&}FvIys-16oB
z;hCQO71Kiy7oPeQZB>Mp`{pk8#h0zuyJD{s|MUy}J?|?#f-otSPyZjtr~m(dBx@;|
z-k1FUA+!#@1+g;^fBg6%0eCI4JN=;Ilmg4>8kot8lj_|88ySJQhHY_Z6n%i$qY|N#
z8j=Eur1Bk^q-rX9Xg{{42~9{{2&DK1m{XHGD_}Ffi*G07<(eb*9$b|EENi@%Q#RTN
z2#!GPRr#n~B7|j^Bu8|ev&2uKI|L#)h}Y);-u5PG<cr!HG$%yVNm>~I-{T{48Dfq^
z*GvDGi+sk3RT~9U7iX3O5Iw_`paFaZ>RJKy@Pk02|H(*Zr|&l~RO!^n|E+b`HZrvf
z4#<cXny<G7kuyU@cRrxd+z>a$P#zPZ$z9$#YitWp3SYOevze6ZwYG$FQ+pWWYCt&P
za3kSdsHkfyBn>?SJ>nFg=TVwEKH2Lc{C$%>%LY<`zhc;f<x0;TAX^_b6$5jJboe`;
zu9Mn9s;TE<VZ4|B-|v?fHVGlc_c29^uU_Efhsgu1J0L0C%}g=8kh7K#&PPyf1J-ju
zs{(Kx%K%c@%6yE?S5eyTRP`th%rb@o(b4?&WCw1a?vbK=;pYqHLX(QeBo0gz5W70n
zqfq4T7zAJ=xEbKyPk-3_h-s>8Wi-S+4!|3JHa^~1-THE={?q?uMT4bZzgB()*VTm%
zFS?(7pwfb_G^L***-oIw@sj|!BklB8tOU+&xy{l=CWF`-@<#SF?I==Ik)Z!FWMm`f
z#c_c(Ld5eUidI~s@!yOm(9e?vGCzTag9%N6--_i$6P|r}TgNYD9=~h=`QeII)c^48
z|MSXnT>C7wfRCT)-*kf}5?xIedMWD<&|S``f@=uaS~o<N<|FItwm8XG<?!5K+j9UO
zc_Snzk^@xLLMzi*x5c~9_&qDt4sXz^k?jRRVAMSdoa*qnn{Ciq4<kb891n1KMmRK*
z1D>WLU^SP(>g28!5+Cl!8UZP|CCfx)Ec97s9@6vA-kIji`N@-?fKij}Hp(_pnb5K`
zmZ6867pCX4027Dzl8NghmThbwfxeRyF{xCJZi1pNra2-yMrFru_WkbYdH@F|XreGt
zNG@o@Y+v$;j)rinAjWwq3?L^9c_DuKIx2`JDkH4NeegbYKQuKBS0NgK`WFP_%GcTG
z0mT6xbbC&|#-|`$`39%^hwti90eQ$_#FYR8o1HZUM=&1>8J+<=-D%1q@Hm-sp=b1V
zVJKo+9nq!yZT<Ft4kMvTYw}@oJ6h)#!FBSsyWQ$&0(06;%ts9NVs6B}EKQX+dd(DB
zR0?EMfF?m(YonbcU$<@$wJR*}Jr<opAyQT5WCnhu8Ng2it<30P1uvvp$7m$o9+2Iv
z)Qy6heL-z;qw=T-qcg1tzF!$XWukv-IA?}9cCosV`1Uvo(17nYE|+2qR6?W?p489D
zy|BrIYKE|~KW6N}S+q_$6{<Ajz@NrMrU{-B*)W8MJ#EH9Jst4ZdG49;E~TyEb<w}p
z7f=ScTZTKA<~)T%xw%1ZFnh%czw2wBi)!CvV_f|NZ#U=(d?<Ix+^+aA|BRD6J-w^)
zZ8Z=1-h1NZz~Fu2yj}392cSZClU*&D1!ov{CAtZXX7n}B0FJLU`nX)fhM5baN=0Zu
zkinsYH%64kMhelcWh>L@!_RlWWrQJi7!iuWg_Dm`(Hqt10Jfyh)6}&Lpyymix0-wI
z0X%#aeQ1oRYAKw33)ol8h_FD-)n(NMTI+MhCIGBP>Ep}|cU;M*GrDyUqK|NmZZYr9
zZkvbJCLA)1Dsb$E+JO&_Knb_6UmNb1ygWm#hii*c1(x^0gf9BZYcQwsm1K-FCIW7s
z>W1ow`nY=K)PHVHAW;XxIMm4Xr=zQ(PVY)D9_mC+IRBIuMO{smViL06;8~_maKwDZ
zP+_EL>0UEc=%=iOc2ei5oA@5ZRGz$wnvR_P?44J7AvdFeFg~%a70DZAu!39f@a#Wn
zM<A#_5N@R`+?cCcb1ZF;1_QVi1v)Spc(5DtN=@P6t9T1FlfdI`j48068}h!wwC>Rh
zSoKt1N<{OX$15ZV3g{%@%}O3+^zX`6!edtFyynbotmgz+WamyN<O#Ri66>N)XOPml
zu@Gn!^ScMM8U_^byRHAOUjMogBn`cAfkG|Erk6@U@X;}xve;o3DGj|xeevc~8raja
z$oNMT6z^Cc`4h4UsoC|iA$uO0(~;XD1!220$UlSaPC6~(+fwc4Yp<bOo8pfILKWpc
z94pNUVN?T`<~uRE@WuZ`n?5YyPH0z+zg7#=qQ_5VU_3xXJCs}xx3o!%>2!1~Wh@O%
zI?r;mlG1Um%7%>-+ajqN5@DUUPJa=xPNC%DNVxp_+Q1xjRuj4LEvfEjK+KSP+varA
zt6z-9m0TVm>inI<Y?l$!&}Q=e)a)vy7*!?6+Ed-m06cT<fxyg&8?O5tV+?wO-h%Dq
z@_J3Y#`WQCR<*^&+8}J6EK&-{-j_ka7+7S{3B>RARYvlRSxHS<!ItoFUYKasM6T)!
z{MB}{MRz>+G|)u%%La%m<#kih8Dt_on2sbjykCiK0Ezx{;eltC-4pTTObWH-#l=gr
z7{bt6<n`}=)+Xw8!C@N25J<T{`0|PP`2;vDtxvvy#nB(3Q5FWzDd}}G^&cY}Lp*oo
zyk2g_5U88spbIH;VfUg9pWn}PY;^jmL=^~?xr-PY+^DD+h1!(>JMhN>qHhZF2a*%|
zoTB7i8-6L}=jVTxIdMc-vRs-4mOj=EW*v>beTnm@UmSPK&{i=#_`NrPA43`H@!YiD
zamdUa@R^nloyeRL`ZzELIF}voxlIXeYlsrv)MH1}qF}8`juVt6kB^JyD~AaSgkr*r
z)ORUTbL9%drO^03-ucr^#7?Q>?qaBfUu?EEJ<8Jv#~wxC*YuJ7R->-=l8<x#A~!Np
zvR+u|wPcqejsx77al{#pIl`-@C`R#Iiywf}u1tJT8$5IFONev~!7h>6N%Q4}bA@q(
zYxg4o4#X*x{$K!1o48pU)L{hXw`7YEd6`|iC!e~Nu3o&%<h9~VwqbrS84>IP&2fG0
zyB<S~SO7Ld5@!}74AlHu=Mh|VYmg~>M^AdAaOR93Dy2e43_{(4cGNb-z$~W$jawPD
zCDjFLCAJM!Dy2N}p1s9D*B48#g?x(n{9QNJZ{+8-4AH~VUf2y2Nv^rP3TTsq>8{zd
zSkUw-r9&L?bnTD}FmH+=z$Tm@2JwUrlM^^V0-_d>2<?y@jRfv{VHIEUeQiy?{0N5}
zTx-6&Xi9f=cw3VCX48)}{DB7yNU<ax`d>SUb{%a&?Rc5e>Mq)fN5%`YeC|;JyE%~3
zT6FjgfR~~-Sr7a`b9En4fy+)Z&#0(wUD=tVX)(<yv8xlVNFBX$wx%@r?sQ#P*m8wl
zOcO`!<eY372nz%iL^POHDe3?E@yhd!UN}WRGRmh+kap+Fqbj2iAGfTDVsz^*uMg-F
z#|)s_mQ!;1T7P{>z562ubYUsIC}cMI(AsxB&#o3d!Sw$3m<-G)`1ZBo3G|_Dp(GfQ
z)(MAnZde?~thlJmAI$U)IA#U1Vr0j8K{H6A0yS$?FL=tT5U}2Y--At|z#{uawn_4~
zA!VQW+3Qri8R~xi1`E(Q#lf{MKax(5kP6lqSAKUU74kZr2V$BrO*J)Ec%n$`N8uy6
z1`aGE+la`)Js?52m*G$o;O{SIgHe!6(^y<rqmSBAPEpOdTm3mi&_Xe$ONdA}kwhU*
zx<O=^YcIZg!19xYXP`c9NyQ#AxR#13jbuHyq;8`firSDAeyT>bpx-`8=rJj3nww=3
zk4$Ed)!QjsxwB7^8bCJugP{9zQO-uWo%7!&X7TpV6AG^@pn&E>+-mD)f0ggLDe^-d
z+WVUt+gsv%ICQe8IqK_h6q(1TnPDAoz{yr8+(spHLB^Klrt(VAh5a0)N7rY)y(1rn
zlbPk%>|?V1E5!BdZ;k}Ku=&($0<aK9GnxM!5*qgLSrdNyl$N$)F;9lJkEH!_FIV4`
z#rw>4|8_k-acTI~x6mnjB?J-~@5a^WS&W`Fz}0`8Wg^Z%j=y{MRmlB`*$tl>K4H0=
z!g@HNd@1&~P1%;PhZH4qTHX8ErYPXHj!a|_cN{EB=&MnkNWvGL+~4b|pdW%%_eqbI
z)#PZQ+SC@q1|zoid$yPO9d}c=7{y8Ftuz~&;w{S2v-u$FChMS5`Fw)#5}fHp{KwvH
zx9n6qwjZN~^Gw!yPldJHFQ0VP(X5zVZoix%VEfH-xWex=(?7$}cw8-9m$D{rl>I&x
zh(yvLEaY+ioiLs2l&m*>lMJk$54MyL9Z<r#uomRTE7Odjo9p4XU$<Pj6t_peeCsuG
z&(YLhb5l`!6alw)VmXjG{<N$8k}m*T1wASRfqq5$*~MJqt0r*7PD}NCr<vJ3&M^T~
zfef+w?aS`gdGURw`^`TFZY?+Rf=zQip?yJ?f+yVNXz}Z-fWGZeEX08`d5HRQ4U;p&
z&0%CFpJo8Dzd#iJ?n#kJnKe02NqIZQ>Lb2VCZLn^@8pclwF<mb)jJCU@BBhH=-G^c
zi6y-sx5T&5nld|)ZLyk1I2O#!*e4L6(sA*}Rx#Tj1KXCCA7A?Cw|i0$r?JKq9ZuI)
z$_S~H*-XmRm5k4GWxRjKFOQav*EqXBAExyot2GB8tUW?;X0Sw5#XL>J{wANG3M`2d
zF-`5MpAU)e6T!Az<a|wjN>1VuC*8q-+10y>3Rg9jAPx8!1e`)bu(sTZS;(0*SRbgM
zf>`(8f<y(DAQ}@<!;l+ZCE1F`AI_ZU9v0w9RCLeu@7{=11Ul|;)0a+Cx9^08y?tF1
z_B7j)7SQsZ;HS($&Zyy<FV<2RW_`Q<Q13<sD6TzM^GIO=nMr+Yj=SU6{>`rdMPGcM
z@q=6*AMAQI-`8zWgp;2Tb@OEOb8>QYSkpw)rR0wy-;iCgbHv9rn!)Ws^k>5$5>+jW
zmtswG>VJa)#T=#ygaoQfNlJ#%61t<aHGf91);cYdIxtf6MCW3)El)?&-Q>4cEZJx8
z-mz9mR^atAAo^#W`6Fnp^CU4vd{C&p=mibO+%bpmqCl%w14y&Ry`m!F@1L;C^b9^j
zM{;DBEDNv%v7m}5<~}J5N0@=5aqC_gm@a1`Hy*K=mirzBinn#{-vKl)mh=ZpE-XPP
zq=zm`kmw9ARiV)EwiwQ(A=3?fyf1+e%nd*f?^F7>kH}S8zzAS_|5Ic@mZ|RJf8fdi
zQ|p#K*-}{Dqk}1gU)Wi<#9AjTFZ4<E7{G=r!UMsvLJvlLpT_^AKLX?vsL+q}Y!MhM
zM%fs)^`sfAEM5O`_0T{2ZO3C(upk68E&V67=<nr}e6IgB(5it@rdXpox&?}x3Y^PF
z&4TO>im7ScosMv(SA0Pdp9<Fl9)nxQZ(ear&&kPYc>5=)5?;n1X~FNuY1cqG$6Cyo
zXc1xOcpO*l9R1y=m8hrO97AFW6%c9Y09lAUo{>2XX87|}gPD6hh((rOU1{dI<m9MY
zF!j#)mmo};YKGER9<QVmeH(JkdY}qms^DMi`fv<Zp$jOq+JL(A6k7AT_50xN%?2?m
zxX7otQZitq*PcC%@UR0ow`M@V9wTf7+Ai<bd0D2|s6x@A+)%_9doe-M9MNzQBO4Hw
z;jo}}{B!ky_D&@qtn~bOzGn*}ENKfX1*O_OK{Jy-e!L!YX&8tkS3Owq8MqZd1YQm%
z%3lQ!S5FQ7?jP(4{?D;$d%K<;v2)*spfIfbw5iZjro<1M>{c7p42%5%fiFf8J7~E4
zlCv9bJ=>+g$nH7J*)i<0FOaOIe@b6SWPU)s7^IZQ{M3c{O07;#NnA#9eX#%G4n-ts
zOTqKW)M)J^ywrs>iY$(kCoV2>7f@|WvPYLS4_dn}E?Vj!2;%is9C`Qmev-14m(+{A
zY<J8)P|_NA$j-=jgnf!<2_VO320YXA<|pF^mx&^aENz$n`;P2m59qHXI82K}jKau1
zjg>8{x;I1*$Y4P^Dnwj9taCI9U89Wv^+6+mB)=CP$*}LI#UHW%)xTWXzSY0b(<{Yt
zz!dJ~twdxxVlwV<-B5$wVIVLb+08kD?mizWUF!VHOfcYL_?u+CUIb<ceDPyT)m0cf
z^-`}ZCJ0m9{otsCYTDaOFZ_tU`6j*5$$7R&QZZ;+IhvIPltp?(x2$OZSvA5R>n_o^
znGSY>1@~(n<Jisv@^IpdIaZBR@w`!ut4OS69HDDl)f@n>wsRpQLRUnPcc6$$@uLGN
zs{{S6LWQK6Xq0(&Ce6vo)#2ppNtZz4ygAURl6@h^Lqy{>+K?N+$Qfm9;4M$~Bj}YL
zexJIEOhRsedG{XzITM6`p=Njh!#9_BR)?=C$upA@`*JaaNYq@F5%xkF3YSg-7=8Q^
zM})Oyq%K*QWlo7A-Kplw=a4bVnOm^&-|0N;3m?^z!6BlLI96%-T~cy`p+D7hf7KfS
zB$=!ibgqXp9Z{pQ+T-{%zsxV3lDX;6;PWOqRN&`gExp46Ze|+bc%N=9m>wyH*2f>Q
zp#HT{v$U``zyGl8KPNY*`)}z&`_{)u`ogw0(lT(U&gXo+9vgq5dSmBR9P1`)TV@4r
z<>?=TzZai^Oe1Lfoe#6nns~Q3<z8+Fu-HJ-`Id=Zcpmi;&q+&gAau!0=*aW6LZ`8t
z8GcG2lsj{eEC76BYWuGBI^dDo<&^Av#o|XWyd}zpJJhg8pM|c9QiKb^G8a(qpT%n&
zzjG=<dL)BqXWnx#t)>YrMk~_~e^uTb#>MX64h}%CoGc}RI->=!*>B6e?x6q46<7-(
zM5qqDn(;I0W1MrF4*cX%1~q|p0;t4R6Z^sfmwFA4{Mzrs87VikMt$?&x)I49!0kh;
zj7aQO^CY`D;6Ww@y2lCt91Rd|12%~~{)D^p%X<@W6N4&N?EyFj!ZBKbq6F>bi0}K0
zTbZC2XfPqWmOCPmuXn<|<>f*94R}Ea{yV4Lu=e}48vJl@2;t`rVljhx-yO_x%niRu
z7L^bzM6Z^u*cNNo(=POf+uyp!W^E}o$QcjlTVIA#p`9L%5wx24adL3sD~oeFIdXzn
z(=YrX0rZyh-!9Ov_g^@d+)b|{O5ufdx9S2>ZYM-<AgC7KR)}D3rYr;_<FKVV^sl$*
zRjIOwpwRg3xTtL~;b3yrqly$B(DBDw>hVYLoe#=o<)2PdOGJCbTT_JJh3`t%f1C+k
zt<v0&#k{Gbtwkd@3>^I{v%}atwavNtE-Lq!|9-_yaCPjRyLG96>PxxHFd1NsKd|u$
z2neXi=5lvbr>IzJf-hF6wE&WV!yV#BuR(Ulp&XnXpl*`ShHejqsHRm#k@uX04Xz;_
z{RO_Q<O$*11)h-UNY(R#j}nS)x!1DwnaUxAInD7Xa>34VS5sWRe^3edMRH&SVRt+V
z7t~X(v|Rbd=sVY8obnYspCe4Eqhnmw1jC#Cnf-T<xv(wdgKL+>Nf&Bee@6VQ0|B=I
z<-zRs3KOz>w1C@DJ@dFk9eT%gtP<e4O6;8{`#59k{GZd)ir0p>g9Aoru`RB7$hMpb
zZ&;oB&(5Jc1HwqX=vG~75g%+a)A-4@Eh%CD^KYiy3H+AAtfJfO8$g6AJ8&JN2tRkl
z#u4=e-x2f({>yOZ#iN!sqU?nm&qyl*p%I;eu--A8=~VqHnSAyHznPiY4QM9BF0#M^
zf^H?>o@xn@tk?Am2xyu|Ui@c9BV8}Qd9C1nv2&}dvvJVfz0>~Q_`ViDheV}yj=A!c
zl)%q`^E%_`4+ra!qWxZ3X49uLmeogv;gIv+K;Y5Y{}&CalZboPjUQ)Utvi+kN-sF%
zAyr$X7z6phKkBeB<z<!p`h}`PXikX}n3<)ZjW(?`hV|-BM2pa|_x8lZNyhILr<#Pw
zNwIq$orr{*RM8l>Z$EKuGyVOsfdg}v?aKC!C^vA6FgKH5;)n5g==C1OT$dN?+d_zw
zfal-U(2<@7>K8RuyO(F`iw0zy0*Ksl3^w30E!sRV4`0Ze6jd-roqJz+sycg{8U-zY
zFw!|Nbqj6H+^#ou+}&)N33h$_VxGxS>U~8;1#a*#jJje44fEj*f3$BZo_oI9Zj-)R
z6edlPn)~}G5gB^%bdWolN!go+T%xDwf4KlNoTcNR6bI_q;qP9ZmD6Aof5v=p9(4gB
zdOzGn%tzP{ZtWW2ETJP3(>0IE2QR%_IY@QJ<vtdfC`VhndU|@2@#sed1zm;o)m3KI
zY`iX6zWMLI;SL$wQ-C%6W=J=BgnQhXh!$ZwI`>l|Fix1c*sE5Sh(L7sf`Exh$Q3UP
zG+jbl_oMnybD6J+*xpYBqLjx+WB7--KF*WOnv$EAN+tR4rksbX>m=ay(y1*{v2a95
zi{!BR*TnK5fnj09VcO~WBMUEaME<w3hg`goxd=bfx0A%c=)0E`#uta7@Ws~>+9Cbu
zl}{s64ak)r<<(|Yoiqv*Gm~bBCSe?sI+>w9XK+n&uI=RZUOg!Fs#H0hVg*<g<NMT;
z7O2wXL6@N&&xEws-*yg-FQ5j+^~Qhl*j+Jr!-jjxNeTa~B88~bn%Fr9*H9Fxv2x+I
zj_MixiY4g1`G{}I_bxq@NSw}cn5Z$_p?r`rdqy&?MkDW$>YWC&zH2+{%M5StyyS!a
zlpm<$9`F*+%_vzOVpx>_Iv5TC#;3BtZ)kTS2naChFY%>5sQuIYHatXcF%;^V?BjQQ
z^0d0TnznoER7V{728&1R@L))z5|-d2g)Kh#E<NsGI>zepb#Q}rKD8PnS~9ksDLf#;
zh$8X56Uh>@yWfOnGg?7<wr_sPdx;B9=#3f%MY(4rD8Z=_JHhQ+wr*j7!5nRx5mxuX
z`7$w@7^Ed0ay4)7_}Zr-n#<<jrQe=(9#){0=qjVFEjlFJx~fhvHu~G={J1Hv`-5Ey
z3Exbz_APY=$oG|%JahNbeE$a?UB|muYA7r?0zVCgGu?Lo^+9B=o!JZ>&Jrkc6}IL4
zbng{%LxW|xM^tKDXfT5k?Kp<#782u_bsEl^Kt<DwN9k6$h%3F9fdFBzT@$+V;xEc9
zMbH|ZY;0<($;d*)<+TtIG;T7W=Knrgq8;K6$v!`K;L>mySd<@BV4O6JZaqI^85S1G
zBXd;WPG6goy;HZ(lo%Df1R;;ncI^$(#91djzIK#aELZSF$T5?TAX%^eAL64d`jF!Y
zZX3mVSneXW+1@wUN&UiMq5?zfoRe~qW&QN872p25S9k|gqjo%sx(w$sP7ZSA@*PZd
zO+E+@Y%t7Yz5vDfjPL-r5svQTM!Dly(Zk#IA&7?F)y0*Hnhi!H4_VB=<x$R#i2mnP
zQs5>!{*&vu`>=Aztr6|GkN0Ff@|=#)!vA}goh+1onuOM&Yy?Fc-M<)f`hTRE=l?;H
z&U3(G^<8;B!vXK_0NmCSjGjM7;a>$l4_52rwm0twNXH!v2)-HQ-uKp;uxadSC^~uW
zQtbP%C|C!o1Ds*35XeHBOiR>MDC?XjY)c!V*y+p28=0ZXD7*MqOSI#|%uh#+XPp3b
zoCT?=Ga|#=rIAR(oz;EWj~X_z_^Es(9(!yXy?xn-(@_dw`|b}x02PE9A2d^f_M9Wx
zN(^penU;9YJ$n)WVQp<~1)B4XM+cd3`+Fb~;5fAGh3iZ72HT=}K~Q2f3815mphYv8
zG?B`~XP;qF{*eZOT*}lZGz??>Wa(bqPDy><7<crcpi)4;4~BhcZTE2N^~O!{Hgkk7
zc_$2puUL`psqUHkdr!QrMBo%W&w}PDpMfw6&%#5}x6-Ff-_2v*iYT+5|J7oMn2b_<
z#dkoZbofbq^Fcw)Z<01~;b`>neuc(jzN%p)oK12F*xdxvG5qC+x)%aXu!Itb#~qb8
z21c3-d2ONtb=eFlAw}WQ*<2Uc+Cy4g8XFpJ0kTF$?DPHGaszifp%qpcR?!*=ODZ0!
zc&gqP4yHxhAuUHs&p17@xTpeE0o_pywxx8`^5fn~EbPaS$_4Vb4l9i{klR3F>XTJ?
z&da*WX!RB6UHHbib!|6y_bPvX|6O67TvMLCAJ8X1Oa0s}uT<IxdttTN)54r>jzwVO
z<L-iSO)KeFCuIY4E<I;xAdsd2j}{QtxPG*BKa24(7ax`kxiRD3-j&DJ@%T8YZ;cr7
z4=l%-=fe(<kl6XW$*%6ru#JuuB%XU@YU=1*u@ZCI!seS#V`PkvaJL56z=pEc#731t
zez-Jz=<ZZHR*i-_Dl5zm@8VwanL>HYJgy%4b+e_p6OAn9h6!?^9xqz<X;oQnRHlXk
zLN7pOck=yzl{@U<jFs=f87UeW67!Rvo+pn4hw;!RYn1fak&CWa5j#@ca?2@UPw4vr
z5|buDCwqNe$r5Go*5sdccww}mpZrmWg+MXu+(98lu8FZR6YP$%CYWhN8zn}WJigve
zjROpLy6qbp;+w1s7uD=V+eyLRqI>#{v7w@LF-VW?7XywfK(#c!8s3I42st5jGldh|
zU|VX3!B~k_X;+Z%Q;D;&mR6}UGEXR@|1WS`XQL3iM@3*A`S&|Xv;Y9(2e3Qm0$YoU
zii|J8#O3cJT=&K5fRo*8hl#uBR_C&Mh0J3*F9KkL1rK>{+L7AIIjxNa)311D;bQk*
z+}anfZOzVSd%m6WITeST8*ud3&411WGA>i94FzX=<AQGeK^C1^POf`^4C}3<jq0nt
zDs+b(S#o|WN&8n~_vT1?#0Swy^}-cQ5%e?L`%wqZMP^keH2eBFCF6-2Vr(^#F0uNW
z@l5IZSsjob^$@@j9I!g*NnSqiZW^dlqB4(4+C+B478^d+>{)s^Cbw@jVd6?VJ;2c6
zI5;|Yf(?BLAvT@#&wW%o{Vp3&AD?&8Uh^I)JGQ-u61K&EbY-WzwXrj&WH}7BCHv=Q
zR)%D~Rl!{dP(-;ix`78&qxSBR96@Wal313_FcnScDrl+6UcX3xs~7F!;+Pw^YCY&J
zxUY~@%IJj!@UZ*+|7<))0KcnWUgJcke{#1?+f|48ka<0gw<t#Q$1WN=JX?G!AGEb7
z7F1x}*G^e~N0+{sF<8`}K{9lTEH2vsrwUAyxTw>LnP5Jr?JmB+r(c-Gl>H|(aCxnM
zxZ1ib#AmG1^t41&`%P#1OTOiuPCZ;ku+U*_x9m8-avajbpJ?y=DgMM?ZyW?6XHu7y
zvV@%jW&J7gQ~ZqfGiDj5g^uehL5~IMF1pL&v*ZIjYTRKX8sER+3tE3C3Ry8c=&Ayz
zgja?`?41hv;ccy#e6lp24V~gjKfYPC``7@>)eu6dygUiMVtQ$OFC3LHXDNA!yrF7;
z3YIltF}FI;gqVIlC#b!E=cjHy-ko$>Agx#+IxfySl^XPi02??VrNz;JRUxtZ<?aWo
z<}e=_ufXC*EfU(JJm5yR2|M_#5%y@SfaJ#P_-))^#nGl_Wbc0ZAfc^VcjTW6L(Sjj
zvTmg~lp7nHee7>LTUD`tFagUqO^+@{Mx1P;JOKFE(G^az@sYt_UKeWTV&_Y<`)|2O
z<LIxrEj&X4Cz&jdUC~0SYwdc?{vvM49gL@>1W4wg@so$^vPwVPk9S8m?OX>d&l#=`
z{1}~>IFR#Nv{@Q-!zkoNUuj#{>yM46HD@%d-F%-s2D^MG?C>A^rYltSaaowD;B3OM
zmh9^~0+0PEVqXD8*|uve^;(%+rs#X8@POkSo>vt%P?x&fG(32sWu5x=@ocBxY@F{g
zZ(84EW*H<wm<^0TLL$4y-MRY?JI9~qWKV<TIRnPNn%$Q}+}n$f^;YwY1%{JqO;El3
zk~&1AwVmpY<9;~dbJamYw(@M*N_XHu!uTe>oj&|~FlVUMGpMtR3mSAJtSQs2ANEgy
z@7edq$5{>l*K;^D!?PQ$4$I;H4C0+79*2|M^>4XhJr%<iuZTT{zZbBm262?6E%=-V
z-s1dgHu7oXXlZV?a@Pj4BDLSo_S%sf_s<;18);es0sd=HWdtVh*@IeFl&C$wM<9xV
zS6YB|lM#hgxu=IfugHzdmIH5VGCAj(yI=B4ZJld@!~IKM>n;^WGrD}i$G<VE2&>Xp
zbClxM9wkNXz!!5d*I-*7p*|aw4w_4AzgP*?R#ykN<y-{aE1dcY+@oL2RZ*n+U=<${
zyb~eyb;3fG>`8b#{n^ymCSBFf;FK&nFf7~cz4NW+?SJQ?|I;`0!LFVG+c}T#uE+;g
z%-<2Z!((r{%Qut1#RkR!KMfGlx9fe%(v|$sF18a^Rti>hGV&(XuLrPiPd{&3jde{=
zPb0ynbPHC;snlpM3K*Q`a`N)*>FS@@z?C~%FTDEoI`i+JOlKIwD8>%k5>_u9;zkp<
z2B=evg9Fr$^h_RF@NB&l7T&qml3`?2HVcHySA~N!YPx@6!0=kP|Jtv6Ki|cgUb`3x
z_;hz?)wXe$xv!3-Sj%uH=>9kYd($a)+7mhns#W%x&mGEiyLZyBev}LxegoEGT#%KJ
zB8r_{(&m+fb^x|+t?_YDb~$>3V(l#&pJf_G7JP}1pu_u;Z{n&zQE<4>SsQ+f=#8H;
zAdhY6uUoqrs_``6aAz3fk8aJ(-r;kZ1t90aDC$Ha>yoV1&2L)Wg=lTzXW>%y;gmku
zX<it?{YN#@qe1Q61&GOa*{1Lehl#68+{JgpuL8D{x|699kB)oUgPegV&-lK>9eG1P
zhq+1B%2Mka+BrWc+-R=CK9=kKPd)gbduowu=w+Qivhegj_QZF}RAx)9T%NWk9*F6(
zpJizLZ`OBACZ%XVFP1`BHu<jI*<Uoa1SD&{fT=gd@aD3WLzOh~g^boZ0%-Yo7|vAF
ze<ZXgH{VztZvXphx+u3l7p?v<SLzmfS`T7->zmQ{jZZJ&uKM~=8Zl+)Ptd{$suYFN
zCpUBcyv>*7YJj^rPxvOG-6Y5kMiHEVj0%plJqvR{$0URw3T|%+3Ja+1=d*$}p#JQT
z`kbxgRHy|a<Z&@@68B^l-fxR?UG4W!(YywR8rAz0f9UGV4k(nEder%K3#Jx!E}TEj
z#|?{pCLOiy+eaxb0y1^zM$G!Xmit!!hr6%-iYjW`o&f<RMOsP+6i~W*KtNOkB!(`f
zgprn#77-)`l@0+B89IiJ0i>i<N^*uArF&?;&GWqPTJPWR{i2Jth&ku%v-f>p_Z3Qe
zQ?Ws@qdFs*$4?MJ<o=DyhuepOCH_n@!nf1a71W|<1`C!`P)8y4r;Gk~VrHHyY<20Q
zb5IQ+c7iJHM|lLC@;&#%EPK$4G9spH0P7$|*Id(Q<HOjF{JHv{L?9B7obj0-Qej31
z0#ymzf7R_4&BiECn=k*J!6@aPilPVpEdc;-BQs7#>|Lf!so({1H|CHVum*tQYW$i4
z!2Va17H=~+T$rBrcmX*k4^KSGnGd$Naelq58r6LFBg=!ESdu<K*+iiL<8V%X17g9p
z>^q`Ypx;RpPHiImU!?rM+jI9lS_P#uyc_j`4qjj<yyAT|KQT9-WaR+xb7qQH2xisa
z8=Fn$!%!Stsv+ikVgM0@av(<hRK`8Gva>icWj!5pRp-MCm$whXjxvHJGCXPk;la0{
zFDN)81Nb){ahJ&oE3U41&fK&l#VDeVf-tGzMaB}Zf9F}Qr})E>oQ?7hpN|7RB*d0%
zxG8d*_ll6(Ytls0+}vN0M4iGy;SU4a{#kg+;;9+qleT1Ns|@F1gWr}5KsX2hmVe0)
zZd{@^1xy`73-Hf*lqE(>#XR5Mct5Rve`y6wT3p_BYrra=UYvCcg|(!Ad3$bwLPOwq
zO={uiw;gL|MSWWr9XuN@!qu_~RI)4^YP;5Ace}WoNd9Q50-I3nc{(S~h!zWqMqwvA
zz;$Q{4%m@z(Lt0_?tS_c(KHaWzdBwDO!d>F2xk}>!-a=d3*GTdAdY5__hny^qP%$)
z_nw`XTk2Pycip!!d#G2eXNQlsHiCb7F4`9^6F}_tkp7+;PEzbxEy!t?t6j+~E+}bN
zr!0AVtbK0ZerxM3pf|@HAVm^B@45|4PfSmnQ{xlCT=2_hpJJRsFEyoTK%tKTk-ap2
z2^_w@AEQaNyazlB9cLew-!22`?eUIi6W(j;qC!F4Xz>RxEN{Tzmzp7LMX21(jXDrm
zp&iwIT|AJ9QuXvKn@w-YA6E(cO)GEOIfu=|5@|Wy81zS{*p45zrrGC2nB}74aHTp_
zrBgGPYKg2SsvNWnmj^@IRllH6Efgcs`K9^#bc)<KG|03#lVu{$u&C%um)ZFTtWz_>
zX;OTuQO<phUlOt)9JKNK07Mvj)HP_>67_)MED+dEN!4uK{Lj$#zisL}RmQ}>&~5m@
z4{V^7V6FL<LH{Jptjpb#HqRDm{A~!FXtZ5uPP<^l5zGOYf+Hh#`6&zci~0=#2S5!3
zZ~F{JYZHYNH_WOyt_^>#ivmGK5D>-_lkSn_J;44%@FV_k|Dci550XW+58}IsGIPBf
z{muXz6$?4J;jchm`qGH2n{Y3ITTTwo0g<HeWdE1$XP~;=erYe=kiG534|TDI%7z9%
zu9KX}_-xL7&w(}C$X)pc-^@YkwOt?mi0;AmuA$2mWtjSby6H`dQ2TGo>kts0lQBLr
zf@W8=89&P|<_h@*hHhNA^uU3F7J_=#PkG}+1!A^PL;flvC#9$5=04$lppoQdB<(#N
z(XT1}%&%R#527-jt<o&5>3=_g1G95>5IAw!D&1qf+oX8(?n98Tb;G}+PGO6%!rhoK
zcjbtNfWW$R5H$kH)5j)W@!_UN?`MY&-rIDARr_U}BMi+Kd2dPgRjhuS^Ds0V>rUv4
zNw~o%x;ybVeez0aG%J(MgA9*Jv5muTgA_XgjS0_Oa92+%HNG`GVctD%_<%pOvJmTH
z%8{4=m*x2)6OY6d?~IX)a=tYTln2ujnZcb?ke^^-pw$Ez1DxmboTO}T;2WB0sj1Bf
zY%JiG^;R4&gKc9U6gJOKj^{QvYj`%D@MednPYiIBJ_C09uRWZ}743g1D>qMqdgI#O
z-kW7QTALZQBFpzKJ~3N^L+_i)0ccHo$g!z>q5$VOs80!K(zh&?WaZzCWB%3;ATUXP
z_rA5QfP;5GM}cDMg}xxtE}WPNvSDkH)e>Qtn#3nxP)$tD6#wb?13nd5;)daaRme1B
z;V~NoZ3s<7HyO~bWb*xx!#f^$Yh-=-_vXxXVC3#vBz_iYoG4Jn^YFjh>HpL^A;xBI
z6I}5fj{u9F5sa0Uacp98&OeFIx-N4tBE(kczVa9?l@g^tv!h_|RlH`b`^`~o=cC|c
z+V}?PofhVaAa)}T>CwIc^PjH(HOLD%mKxxmiyd#l=!CbcZ)y&is@fQS?(dTgH_iN?
zzxbd3W(ojRqkA&LhlVgn1|}9=pQOGmmmCP&fL>lGT)pUlO{;Ka9Y(5cEQOff+^mm?
z12y8<!d$g*5sQVy^~ev-iygopJqeu3z#p2o3uye`i`uYxMh$-Xr+ZtpP080i*o9|6
zYll*dq#688wF=Qs_XtlLS0Em}`i?(iiuTMfBqm8e7oa<G^E;a-C6$#sG0KsIC{*U#
zAPz!_L1CN_YzyYXrH{URlaK_Ux4z)8{}ML-=iTS0|Mg07yBbg(3L_@Zm&(X1K0dQM
zhRV(DQ21Z^Bk6z6wOPX7Tf%yO3l>k(+2>56)fS#(k}&mda7EPN=86o1QD+~Ag-<-p
zMX!^nMYNLZt?B#!d#f%eRsZ)K{Qu^wGw=9W#vA`d>SF*0d6YO)yt3x)|6M4)`JYjD
ziL67I`c@Zb6M4Y*{t&0Cfy8$ws3N+E@1~9~whQ!5h5&6#3`ob6rLP_D<6w98CD->9
zvZ4}ecwTG!j|QPY>H{1lO~Gy^YtKBvN%a_#2o)5sRJMIEv^x#k7QyL=HF!Rq0J&)9
z&AJ8t*J@|_&YFci1whau2m{E#bOj^9>YcrZ0A3F%<Jxac6Z2BQ{O(%3c}Pv)YQld%
z7i@!Hf6oZ#ZMPpjn`rBngGwXfBoSo(Vf6CmBnpxR&ch7b@yQZPPzM?o>ECG}?oAsX
z1i$vxZLRb_^3T3UJj6T}OB0#e7Qcrugeb0qu=_;YpzF6;Mv3<!_5G`DH#sUP%C2mh
ziXHFxSB;P^RN4<VAA#75)CMIYArn!c`rg5$wDb3t0U0e3`N>4zndH<qe}5Gk-6Z!g
z;T?v-Zokj9m(CYY$8aP5T%wu!MfLG|!(=Il^qjB?L+m-?uHdJ}jT29iTTwNxfH_@a
zQlPI$Q)FYO&i^7nQ&STO!2{cFjTjIM3oD~58@{6y>=6RU2Jou~gq&|r5JFuoH9<CZ
zE$m+oQu|i2D4BnJ@wkiY0>PN~_>@39M1?t4TOgc%x?*rIA9@$cfMXOpcF3)8NL_~%
zkt`?)!p>8()QD*hXHU9Y#DOkZ5atf(L3GB6O||bw0!T}lS9F5k9HU62v3<^_-A7Cm
zs8k|g%&ePetMN=!gif0L4_gHJf8HE^#{ue~Ajp#ur(hw7rkxkYmApD)oUY~SD;X8m
zC_&18j0|i$_Z=P2j8wqU%bMg#*7t|(mWVr_JcBEPC4%#`pup8j{CX|K=bF(Sh8sQ`
zp@1epM#rP&Z>2LujGCZ)377y-t^SJDI%Vo{4tN$3a{|#NX~sXC2iJvdNG`i_Y%YPb
zN}{-h$RRcGvFgN@$2Tbe|B?W5EMW{x=7Dv)-y8S2^t4+r8qisO_wTMGb`8qJGYxi$
zXpo>}iIXYPpV|ePMW7@xrCI(g2duzwRqkE)Q%t4ZeqI=(mk4+*X2+=6T&q$PT52ch
z<J}JMdorJ~e$v;B^;Jn8ECj_EV~RjVhC#40xB#S^>Vl+IfnhL^u5Dp@l%&djlD~hH
zH|L1Ht~Ay*ijGJL<xCe?lg69b(vC{WoU~9eKZXM7y!=%)qwi`W%BPoI9Y9Rh^hI+f
zae`}RF)FVrL?Buy%pB~p_&>oZ5BSbfG~1R05gAld3HZbK5l2s9sTCTcB&d|o-z!vP
zPlb=ftbn@>$zQYXJFMoDbItW6K`vCRsK^wF3ADhb2yF^+m7uds1^;mkN)hq`SoQOP
zNe$AihUKE=JD2&vIJwcv%G)@^yL46K9*a(u4y(eeD*kLqvS-bmhA?pKQfpuvDIl~D
zH$DE1gG~WKzoQ}rUMn?E8gb}x)MZQ_#)|y?I?40g+9j3!xhEqV&_a+A>{JEU9O8qy
z3zo^}dA2;t6IDYr?8syD04U498`Uj|8NQ(bgUJY+BJ81r<nb=j1Rl*Hy%5_FCcr4w
zP*I4>i#TmRcfhpqDjgMxyz5rR+{DadbPFKRL}F^ntd>jsUL+B~YwbWHWB|}CmOv$$
zw>{0|i@(Zn<9z;chAf%{{yv3M@dEMV6rlD^ymv-<0_G4{s+t=r*`Q>51u)I<&BX2a
z*{<^EJeJ;&$_$SkYnEXO#3|_v-H*PaCT9XH9*|(MZ{h;@_W+$?j3$&lJg8Z)cgG(U
zQFI5Eh4Cw4D?!DjjeEB}?VVLnodKa~rmBe^15USKNC@2r?s)LYdH^wwia`pjR{{T%
zxD)}Z^;wn9K9B&GyU7?5%GbpB5sUkz6DRQXDEtkurDM~YKe35}&;Zzo6a8gDq*4WJ
zxwPYS|9t~}!^IBmgVr$Nf-Kht7P0Hn&vx_})qp<&UUk6&H=?G4%~1k#EjF4d7dum`
z8hIC$z)(w<e_J`)Lob79Q=nFMF`aVl#Z~Wi^GM|{pK!DUHvuMcE=C$>BX)e&cKHkH
zwP9+4?RbIbabkzx-_=;c^Ja!xfDcWlbN491L^=L|fF9GoP7$M*0CGa<^H?>JOp>IN
z_!zj)*m;=+iN5P*oSpBX@NjPL&I)D<3F6N=`Z2hD%V+R?*xrIAuf8W2juy{`Ni|OV
z)UMS}v<^UTqQ+|Og#v@ZbAu6$U_ijY()(CFtYSUH-U3(=wM)k@QO+*@Eq~JXu#_g3
zTH9pKubOH5q^=9Dy|l*i$>Um4pHrVHRsOmC2UbC}`7tF!XQl)-DT{ahZhAkUq_A+M
z4NxtU1Qbe$!Kw6;mM<K4hxC1^4VZaqSA)URMcfONF5RQ+m-aUi+C%zH_e}hl&~6VE
zvEBY%tI7_ebLXhS^}{b%KhXt#me|Hy)J9F<$8sIf_2OaB3-qKr$9~B_(b6Vy6YTMf
z8*8=VIwyiHqD8umn*Z$m_pX=2u?BCZBsMHBAsEI2bod1~LJS}8imZYp1wL5AJXV{o
zfA#C{_3#iegr`U2>n{L)&Q96$*(_Y9Z)7?(LDe^=H0cCIqXIAgxrXWEFxecsuGqIq
z996!491_4}c)HWN(m@77@fO+b_ARZ_ZVBqthO2!c&LZQvX8ZzdSvKXLL7!J{Vb~g8
zRM*qjY;IWsbOIk9<!yVML<vF99PGlyzis$ozS>p0L|FUE3t!{BgD1=GYuNN<nh6c6
z>ob*7{9i6W&}83rU*}rD2zlWQRO{*hq9)1WKJvuzius{RrjLWuK5)wffcd#tR~#p{
z3e3<l_+fypk$E$7-g`63#yiDWB`O>ceingYx3EzWv3w@}5DzF(!3KcDLB0usE8&&=
zawXsnHvp@ZwuY}~(|y!P*S-|YeLKnKtiUP7?qeT-?&(|%ZIL%vh^A)`RzesyM-Siy
z+SElTGAI_J_{^>}sX7iC7!rj4auMf^wZSo;DE!nf9BIB7meXr^lPM(X^IS^dXf++#
zy%G)>2`sQ}+C~QLo9B6++Z&hDm(t|~dMncI@&Ei3LVEzUX(s`$k&<P|Zv=j^!o)?&
zMCq1R8Xp>=u_1uP0P{veVr+pNH6nwwQS!oKtpsrLq*Tra0lLH0TLB;Yq`(Q6X2MTR
zWB%n$PowIAG?>z=y==|eA+>};e9`x;0L-a)G!`4~WiS!35?}CmX2Fpb<nZ#$7DN}^
zm?19s1#4SB@aH@it>l#YL!*>vYq@T3D5O!x{jffy1^0=VN?-}_FKB(3HjRKXq^VIG
zLG!(9N;8-i@N~gq=efOZ^~NULe*TkUR`H~__-2nuv19=#$=!l4tf30)>JAxkX?Bt-
z2$%1Eb>e^x+FP+&>gl-}8<*8YIN^@a7~1WH=~7moUoEOW+SLOBfP4?<m+$f3PeIBa
z;2sGe_N))2tGSj1l!nBenBuKB36H6yC;LjAxwSzDqy;)4x$G$*_A3CSe^7*M`z(&t
zXAdAl>}XjS1`eqZ3PcA*DNZ_;r6^|_`VDvJ8Aa8I9}sK;Inf3&ka#|B^fVI_E;K0A
z{JXlkDwYj)4j7Cz>{39!gly@5O7|nKXxe6qMpk&H;&|V>)06d33?U@H|HY%}<}92*
zIs4`b@RXrxSV~1&&>YW%)AWjU^l^~i=awS|EcCa4)s+uL9o=n&rW4n1%b)9l+bFfl
z8dYN?*cr_U_KsIpRS}QHE6+bJ8}n7Qz|N!&9*H48dOIbeO2P);36uMXJWQ|#weXRC
zXhqDzH0?&KJgjPm6L(dNRhOF>5#S=7KMqjpXX*9wKT~bTD?JyZWN0vHn3^9^ZP|S+
z7*ab)Wdxjr?fUY0cFOc;lp#)qD4@M>U1ww^!=o@<5@bBeTKVFq2%upbf6x_nl#kmH
zCVH7Xez>@Q5lxa)Q1Fk@Dm5h|9|iC7V_v}U^sjDYXL|WtR6fdm#TYC<?04<SLSfx_
z-;qx*n3|U87w8W_M=81x!Sl;I2L&KzLl$$zZXYO?f+(%gktt5ttQ#m<b|Wh0f9ZOU
zevpXuw(o3-oix^+)ycA9Te6&Q42EI``&ZN76EA3iym)|aI@{ah(>!fUJFw?m!5dJa
zu{Ag#*gD{vz=h8biDy9M^$JH~Qze~D#%o`CNiPYW_|!Q7PqgX9oG3w*l#)5g`AJ}N
zBS20U!4x7pqrH;9;T`FvTWUd(kxTUzNrlU{nBxo&Byo_FYo8=0b)ZfWc@sDX2*FI}
zHdca`d;`H_%h=d_L|i1+NRj1Xi#YK=T`W6gvl0-Kdw?Kx5)fjWfCZlg){)TgND$fP
zYmfE2#)(wpe;*=(-erYWE%-Jw-MXdH3alOOO9DzWTQcH}ui*M!++(rAx8B<oKDS27
z?IbIQ@0Z_(ZkptJX6lZw1pfk6NEaxvRR084d#s1T?p?-vX{LUKzy~W<D}Kfq&Z$_k
zBU&<ZP;|j`$u)`Yh;nK}!*v=ZJ3ITY4zF?|9$%`@@Q4HO$JWs{fJT|5BqTJSid;Wd
zgOT*$)L`+4xnRe>b)vKdvEn*WS~W!-IWqebfh7XU&X&1i63I@Q3K|1MKB{XNaSXNM
zL1?NiI9gGhXlx&&UKOI8i5f3GCK@H%Ct4?mf7~0^-}?@x8&T)wGn9YGd~^|5Jwva%
zR&nx!hNR3Aj>D0#ib9}$Lo_Kakd6zd;~%1qddvP`Ls2D93iK_Q;;&>2NfMa=()msP
z7u4U$F@cy?hty`^f?@7MGPs1{(ck+4nCp4=ItTAnsEtXp{EL!0;!*2gQPk4!e8+4a
zfx_#F!1So$ND#5yTWdI_SrG_D`D7~BZBS}v4#jr);e!59)H%1_{W023LoS8VzmtM0
z5sQ$gE;TxM=E{Y=6vR$LW}!Q>*BgcydDq=@fJBR&oz<xVQD_F!Ka5Nb@g^-T_!5iZ
zF~MHWg3n#>aB0Ry8g!Y9{3Sk`atb3(UFysdxV2ObXz=iQXDnwhAuO=ap2n*3B4UR5
z`35nYk-NxkO}#6p2P`KGKE$*#v_2ueGbYqV5|1M&ZcW{RRiGpM0acdGa%|nJRa36R
za-$fuVk#glZ9=Kj;*IhE^Rj%0z9$9F1ELK--94hBW+&f^BJ_&6lno2>L`SeuMw72k
z+4{y}XtpB>gQT$SuFo&iEvq}=`I8cVYWT{4m_Xr*W?EX>80Kyvway67)}d}El#@Qb
z<=e91@-lB`p)Csx1+w?j-v)IR)3CA9JNAwxc@Aj8rjU6~E5sWvNaQh_t3-3$EB;)4
z^h1OtV|;;6(t6BRfJ|=dF)JdVPPbr+ahaLS-$)f`^8?hrzv~X)1R*fn%CzI;=Ti(D
z32vAr<+>RaaMI^*eF$F*rDUBC@>>F(@7}8{IlLq#op6V?RyFzFx<!FL$me>u=RW3$
zo-@Brb^kwdHOn_7xm{a3AKWmb7zNZscdoa<iL{@)Z!W4idT_-s`YC}OEH~dg*hX6%
z($H|kOttyV`83u<RZDM#b7X{&E6l^&31@{i+<xe;ZS<2AKJ$k*>&bO1U)u5esxvgZ
z%$AzKzt#NY*@j|vX3W;;VjP*RdI}AZ_ALG?D+JK^LDDF7>ZU@spIqQ-l}O+V30fbG
z0N&57u`O(ziF*nMAlC*~krc7YIN-3C16U*Z;rsiomX&S?zKa`ef6DvTF@o7~L6My5
zYm5W5xX-RHM4Ykas-s<Te5v=coNvMwT^~?0gTt&eg-Oz057!8?et_?lY!2@q5YEdH
zI75&@1Y8QrYLa^42d3<vAm5TaYizbjxp@aD2!XaRbl()dfVrbw=OKI6Jaz5B?r#mp
zZSBlPl{{Tvmgnt+`OX7-oCRV{BHwiYJ-b75!q+!BN{(;JKsAGGiSYgFEkG%ki&Bgl
zOcSFv(pEi#o>Z7=shVtB&sq#3aQ9AXU-oZz0;;^L_*tqGy(A*oN!8|@`=P;|AM7uG
zDr=&S*k<VFn+}dw$Z>D$y+*IauWd%cRc!=l#~-PB73kmDLtZ$mr2pK%jS0sBUi0{#
z^#sm|#uecU_ykB?*O4L6?(9uSaqj+T|BO<@VxPEtArW?)`)&(tdCA>RtJaNpVCr)l
zAKZdLVCx>q$HErRw+vf{Q18~eF!E}s*OQUB`O==m0~v2qP>7EW@@)?GU!ooN;cff2
z+>AQ9cBUXqeUb8fPQ<=vd+{=~w@`TTI+a{vk-AJ?8aG#ZI6IsEWMuH0qm&~2Vfow2
zQWTJ&H_4MCJmnAp92*Xz)PJ6+Dwd#vE>u@{Fw(UDQdYve2PL1z5S{o1@%USon4!Gi
zpqj5d?rI5@dNw1QuE%Lm*f)x?Lrnm`BMh90^IE(L0v*@p2Syz(b4|I7yBYNFC{r#d
z1G@U2FTkUsOkPRX=zkHNQXE;s5tveTz{uE5O%(VK0N)=?sak%Lx;kfwJ`)W5>00iw
z&T3a(;jylO!G>gnw8-MmlJSR`c9My{?_%L*Qo;jUqo9ICnVXwiUktNAnDyA()0nDL
zg?D|gs~Z{5HbQHbEJykbpz9syki<{&8>E>XLZqBeb^-r{v(ePIRM~{s>^Z?4wk?R`
z7GoXy{d?QFu?>#_hy{na_v4u6o}IuL?QR9|D?}<qo@T$|o#DhW9dJTNFOL<Jxnu8Y
z4iRfC9%=AnyO!lOhiK->ss|ZPpV{3d9s{ZvqkG73&Xe@(>y-#o6O&F+A^i7q29goe
zaLX(HcdEol;UXt;f!7Q<cp0MpS_cO;0lV$H;oIs^3H%g;r@KD`+~_di?-)WQ<8IQA
z?%@JGdr<lsL-13Xo}TofwYfv$0$92HIR^EZHljWL{iyDm%^4oUW)Np;-M)8t`|MP)
z;(2?YvqYuNfgkwP*>HmwR|h>+lt8(BWGC4vBgNrkuKM=c>+Z0f6W{6mPg{c=7x62v
zJEbhN5ClW*yMe)%T`M9^@|#^#zZTNo7BWuUGvXq>)up`|b#3M2jGpDw)@w|%Ct_oN
z|Ckg|dS=mF$^=@g4eGJA&ENZ?zSC~RIjHmS8cIJERWFIk@_{<N&CrpMYs`tC9!LPf
zzyP#)Y<jwh`bqV6N(f=;@ScxX7ofY9`^2StOakwuHZl}}+vJK5-{1})zdJs4-{Sc^
z-Zvv&8Sk?q(DwqpDKC-K*m2YdYz{EsfTon!DbSa^+hj>Y?D#NY%2bN?f~Xwsb9Zvi
zm#;|Y*oavxO|p2i*4Q~X5NL<uAAF%5YCpG38&8%W#NyMBSz_p(5e{PvM<_O1vfxxL
zF@-=qg$}N$aom~=5L0y9;OTz>o=hx;74r+Q^9oLyd<IVtZ5sr@>~7Pr!Ha7eidF`t
zj*I#ry@KW1Tvpxfyk5OuQ}E3^XMiUqU)?XAOFtG2d|XA;R7Z87oQa=bUUF>EN)<2o
zt$ofgb&1Wz?ryl(Oc-Z)@Sw|2A%I6lXH_;`AMhR#t|mLs%hNTM0?iQ-I8N5MVd`L<
z8J|69*t^sCz~bZss88J^5(Q88WUe0|6SzTs#e9ZGQ{$D(W21ekJMvk<maBfZhFbte
zx%bo(uvs9T;Ie_+py!qhi=uIX{&kk+7Z7gW%u<FWT{y)jdEovqufS&TZ+(5{QWoZT
z=hGwGMbbUWu<L=!J6xFRr<t43d(0E!$sYk3xB9!#&kf_$(6xxcP**qerVi#;YHd5$
z2J*<GGv&P3t8%Qsf#?o%=eKYh(TQf=;<%@fjnYSe>Cif~ne-=6DmvuSxeo+fsHg08
zfXi#%W+xxzM5KLX!){7Em%2ri;kC?bnfW7|1lv*xNin&>J_D;6kK@B9X9~uN=7u~2
zsTks?=XWsPz;*oJyKA3Oy>fW+Fs;(+bdRv@#jzxyfInIC75HO{Fm3l=fi$XvCs=x&
zfkWikcUjo)*dRu~HX;-O1F8L7dj_lEYr<OYs06K0o{BKRIb}Hy#(_J3HE93OS2KK*
z*$`+<P))sk!u#MCPw8lNpgKA_%1Yxl@tV~(^;+6S`bxI6I-5}Eh=?1J5w9a}SS2Wq
zh~0b_5zPNuk(P=6>Q&V?x)4Z(%Du#tlH;@5npY^ldi#gBuXLXVVEu>8Jqwpd*Gw)9
zW*V=+u|s!EI>d^a0U$iuX<KH*deGcLItCqR&nH4E!(-~7d8LcFX}z?EZ+)j*F-69q
zM-_pmik&pR6^J0G$Nf{#`%$$KZ7v}EP!M<_vf_5D_JT|81_jN$K3)vxs`W+!iqL!Z
zp!9axAKZ13*aQCkRMH9#l4WRAi6zK8Z0*kr_RAOdxHNFpy&2_f6+KSIl!4BU_%1L(
zTyz_TuQ6u29DSt~a*Y`oOE$VS>$S&(u7B3qKukC$+slU>9UsFJxqF=IyvXL<AK%OM
zgf|$_XeKONus-EO{k{>O6t=Vk5ggI4qRumYc{1KmL^&<Vp{@_6N)&E@k>Wo>+xoQ9
z#K?#QtPno%IX-LQlz%w)vZ!L*4rt3!o>1Tio94y=%boqg-?s^bH>Sy79SkUIWw@(n
z^8#=oWOkW%4D%gQH9_U6zXZ%Bk1xkf?<BRH*b?}s^btbeuh7voh7VMf39Q#RT(#5R
z8v=OolRK%mMRP|7#yv0Grv*BjUSwf>EA5XP5C|Hb5e@g=v(T&;dFb~$Ef~Rzi0kFa
zb7raXG={|9gY<JtZND$5B(ba1nfY`WX1&s<BA&~`RIX!(x>f?#j{_4t#jwpX*FVQf
zUGoZQNGfSqufNqylaD_ljp1NONs#%de$0*$R8UaRCOX*shb8{1f5%DwyB9H;_}7W5
zZ?<pl7wD(nfHBb4aTC15B34Ip4(p{ma$f62G)J&Dw1DnLxg}R*sa;w-jn#Co!^H0#
zaLe^uM)mjj5#}i!>ZPP!1G++o_#S_prW+exq3u3W_=mhC0nlJuh=Ig!MVbb!9QH4K
zRuXZVH8V2<%sFko;SU3tekReS{`>dOI68~}T@G*7_U4*&*z;2G+{gf-#Fx^(ap10A
zZcN1i-(~`Oe+WqE4-BgT^4aS;JHO|xNr%-P5&E|L41!SjAp{qWo%j0naN|x=?+5kq
z7Nr|kFJCHY_1ypt4ES3Q-K)^mG6|pA(+<w}9#&>%X67UI`~)TxL4tfB>J^_7FaWF=
z1$MvG-Pa428Bq1!f$iK6MU6c;t^1bh5aWJzpF-xitt60ZO{i2BE^tZuX7u0BCD3pS
zgf<wVrnEo$`hg9sY}0>^z-`$D;greTK*~8QKV03JEZ}-Sa%mR@Lhp(J5&!QXaPj&s
zv+CcdTQICJ4luWujWbr!4xbd4<1o$_*<S$S6#}rFW5k&aU*m+YoPDpi-Ut#32KQ%d
z#9F+?k-xCeRd88ZWM!j)>`7n0asKI95!v0}S0uOjrP90nM+eh`k^{nYa8-nPPj~Cb
z=4T)PmkAIfQLi+rA0y5q#Ga$ME*!_xzdA7HS!DIa<@^<pR{xRj>d;jYkkuNT%S0Ky
zcnN}Bi>qLmi6>7F9rL3IyJtaaPT?~wH(sTxy5FWAh!pIvL2<nCC~{C6&5D&=Y5hkk
zXKQ(MbfncK6F=$+<vjf*C}8w8U!r>$<{lkkmN|qDT+UcC*}9dUiD+dhyNz??s~*1T
zK3KUkJfhLlePlESz5D40v2TWd^z(ckmX`JB2!$i?x;_Nrn=MK~6N)U|fD~XjsDQD+
z_#;jqSA)bR8h`Jq<I2%Mb%b`);ZhoKes|YN;HGdRZSF5dV~Y$5_r3wf?Lft-;^oz4
zuD@HPLu<w88l)gY1}gJ2ip&J*vbolIJU;@b0amttb2-iy+dF(akc*p5v@R1wQ}(Jr
z-%;|LA}Je)l_COf0&6fFSJGRp6IAzdD+EJ+lUa$#T61Y_XK{&PChf19b)N~wVAO;>
zERh;+s1iy>T+8&Q7d0_CEZOijND64#PQKd)Ce;5NKJzteKJ&+BLO7tE4CG<ebOV}`
z;`qA3ZQCUe(UL1q3P%)MttzIPxCA6eo|M8ObFi^Cxd0=yv6d{&&(Ci)0xQ4;S^n^D
zi7(iYt}_DvQGU*|_m4E>!0s6IT1?i7za+5Zxv-KZ!lhqGKdEKU8xqbLc<`ic(RHdM
zbR}YU_7Y}E+#$&n$?9lbRVxXgH9OMAX$UW4l>Pj6O&~4CB#q?*kp9-|7M$2d1eSzT
zxaAq~OER@0sarUx2;Y`Z7<=V%c^yA3Ib0o`pk=vs;jMV9_M>W6_bL0^G%J@G{Cq$_
zfGOA?!AWpy%H{W!lbnWzh7r$X-=2)&D*33Q32&ey2=eDXHB+9N`oe2dl%ef0+Ls;9
zF)<DbIJY{p8F2!^E*OMYwEA)gIJpJ&we$6Gb>?$Fx06eMJ%ZwRxc_*DQ%QBdJ<J1M
zh@u*+KsA3uQ5canAB~j07R#KOzSB~U-hX-S_QdT$`s|n32pe+D*UscI^mCdrK(Y`0
z9Zo_NqIBaYIc?8vZ5O?fw-GC#;uB2>_~MYNR>}bk-*i+Y-?H@k?!}0V`w{5syr*5G
zhyOie1G<7^qaAWYK+$2k8yZ69w87%J`~ghW$jrn~Tu=L9A6OT3kxr7o5=Sb1#NNla
zsl=!ZQas6e{$fdM!%X7(TUoraDBH}_K9k8i0VbhSncO{G(oJ6RQql{)SAdbdrjaBJ
zCxU2d$@V}`5Q4**)5AV9sGQnSiJGHL0@w(wi}+&EP~4b4TIQ(!P&OAVbX+Hw%ha)~
z)@?;>O@Xp`tOci$FwhibzkDM#x1?agAnuYh`;>dkH`G}etiwDX_LpElfDB1%<aq7X
zC4JSN|HsSXxY^xeWg}$V`o;!UR`cRrNoie;0by-*z|vgqZ*ZqSBae%aqe<psilU^w
zt4RuT`q{7d2|rAZg$3Q6x+4;};^f`Uz;olzM>>s8PiORt>>USWOg$UQLK{iGRF6sd
z^=!-gq<-n+JhOH49NZ57UO^i^G{t!1VS>TY&j&|)idIg@wa?FCo)VfHWRL09@Z!bI
zo*?#_3he_}`JQ@NJ2rzg!f<w@>^JG7A?2OGAXZd=pl7u>z&I!<s1Sx@hjb>_M9<Z7
z(N-^aZT$WF9B~YzV@};sj!kTQ9Zy6b)seJ=zAZl<<?+R$BSRCb{_5Or2G<^zMALD@
z`d+a?@0&_ZU=rvEj_fc0svyY{$kATl;D(H@yUN4Njx25(Vb&Y{`fS(fIS)<%YE<hw
z%j)`C8dh;vD}}|O>QO_v1SLmTcmlVB#Jg!FZSBEI*fylhkgbdC_Crx7%g_9pdNDbF
zJvx)`Lb+gBoq2ca1Cpoh2)`95wOXp=2MGpak<hYHNy~5n;|iOH9^ch9LORvQhl4b-
zSa+$a9`M*k8(&Cb`>uJD{hW{TGz2M{IS%krvgU9xamisDoe?mC43NI{FGJCOYa>Jt
z;i>wzLnLR-#irN^iCNj$IA>GjZoU#4#WoR<-IRSlTI4GtzHPN;x=IFH315fdo`PN9
zv@w4|ltp%83qwq^`bMMRkEpFN7_L<NEZJ2xzHPcU?(84gnSA3T8F+WBzO2rj3suS+
zfb9qv*%Ddzg>)uIsV&R_sc3A@^S4Xn@qQ-%BtsW~OR7}c2%G8X5rhgHR@HYRrXt<M
z_uan#Dt2RQVK^{xz9%v08p}^ZG{|gy2exgPzPfo$Q1{5e|BaN^ZR~K+I(O2KE-_9X
zy;y7E-dt;R&L{q7kZ2Jug+TG)KqE!<KgyH)ySw}h@g4mustfbAAI-QFX3D5vj860@
zvhU>z7|<kay?XU({aSnwbn>OQcZ(D7DJRVC?%_9Mm38P9C|}q_%aAjXh<CsLt;(nw
z=o{tPn;VKAQO@PP-ot#xj#nLZ4JvR{kb52R5|@6})LetvdVOVWt)KkMIrQK$Th~ss
zNZM?U>@r`RDt95LTQ+LuBUT&O1K1bnA1Q<26$pa1zyzrc^6%8a7)Hi`wln!0x4wAA
zDb*;Lgo3e8q*GkA*BChn)e`9}n$P9sa40{Gc)2)Rz2!*(J)63h-}76@-s9@ZvD4e$
z91#;y1>_TRI&CjaNml*i3va8pB+d7=<tkPrhM`xAp1c<2G`h*=zuAhU8tY`<*#{Bw
zAEkbmefV)W9h0->%HOMTcko8YjEUc8boKD=7!@0+sJDHV<K8fA!}#dQ$GQLNfsuLc
zVev9xq_v>x(KI8?KbR}IT)jPXN^P@STFxCHS%iZz>A}~PMQH4$W+grGPIuw!HwjRP
zb>BE}O7Fg2+q29ADje(Jr3nE@_o6p@tN^p$Vj(-1u2EFSGmmvV&sJn)#0xQ>C|Q+^
zuFuHva0rcPa-KtKSa6UUf=unsw`EIxPh>igJTX(}S(<LaZtKlhY{Jvq14*B<JAF&g
z`yQhc=RKzaNafa9=0{pSKTH~ehCBqSM>)_wTXXDSc?jrb#XzGRJ{VAa*1scR8PwaW
z=a5>RYDRtuEZ)xy);adTOO2>xrmYSG&*7D2s?|}IbaIqU_fPEM%lDfPpZjf)8C#gK
z8QH<?5hoSc2N`J1tE^|ndn%@%><Oh4;L<LO@Z)m`r{M^;X5t$Oox@9aQv^Sfz3G!$
zQ_!OOAq6I{Rs_z<<mo25S+gh;EOI;TBYVvHj*HbZz(fYAQ38UV6P5u-)oC#zR65PZ
z6JxZyi^REq+qXqmU9v#mv9j?VB%BMPd^O*4G$^k`i;zK#kS){nnu%%+^23hmps4gQ
zLw)fGK8_Z#H%6Ay##5P~IdAs*(NKPREod5t-lrIy5L$d$EM!Y6gmj{^B9T+K)uFYl
z9My98UGrV2ce&upp;Kont@LdvRtuAujRz%l48PBnZg8mvU+TP0EDXY1#X_vZ9Yc2%
zQ@e3wOZUJQ;hH_u8wqx(9%)kbm|@6oarTGV`1os3W@|~Ekqw$L9}+hyL6ZLGIX0;*
z^wYnv*z@MTV>=*A>lZ3qde?T7hZ=}CAAz>xd9=&~6R+Ux_CM;5&pm^{S*|FG;-TI8
zRGrXKhvjz$VYncW@_l26{SNP)u4`Nsf+fXrJKFxrQG=%oKW(!QVUm-R^M+Ofr{!$w
zsEBk5n?w2VG}rz&Vbs=-D~11*Y)(HC@n<3rYjS%;13?{m)7D*+h8n`Zn%^n@MUIYg
zc*-A>!@Dze{dfBVK=`=<iwe%o;jO>?JS`AKrTtrWM0d1Bq$8ibwq>MVT+>B}o}56u
z=Ex$T?N}S#XZ^w~YA%M#2)VJDX<&^iD_qV}wYlj;Ioj+a7ajlfr^#`R@6HlB^6iz2
z)n;`Suup^{b0{2clCc5YH#hZx;fQ)LV1yEi(KLtm+Bn-Z*X^rN|K9g()D$kmXvd++
za|Gi7Df;+Q^q)YZ6p`5ZQoB7C!%LI5BmVv`7a&j9foZBCupTw4w-5L>o;%OVeQva~
zD(!z~=v2IvAIFR_LNeWcs2g_#`uL~Ohhm#&MfZ*9kd-G|?!s47wjN2@P<GtN&{b88
zj>46w+xn2z_+qkAM}uaQ-omep8)8M4NH+Yd6iS3WGMUI+pqO`nUiF>X<En7B5x<?p
z4M!#Q0)1J=Ki+SqyVqk3w9MCR&PCi3D@Fw+I_2ZE(bGQ*cz?i3yf4V%zb8C`jMG1W
zEh=woLSQbM(`hB|+j0P4_-^ar@X`w2cg>~;bml8REgFwL+fV@|?#9y__li=OphalT
zOMydG&X041i_l5T*0EBZeL5ozZynBRv3BZa?BC;s6IMk0mZ%;DEcJ+{nn|}=Q;#KC
zLBK|M@6rRveYmptaJy()*{N$_b_BiJ!X#|FSzPm+c<P^F@#L_xhFQ5lw8&i;gC=<q
zi`p;SJlz70eGpsl3!{j64LGq;BJvjd&(3{z$l<q<akJ$nIjHh;pJ0Md0Vok&XPa2W
z(wg=?vUhyvA9x8!s2Um}T^wezBchq&UwwIkHZeCU9I1F6+J423?+JbUTUaRAS#8sk
z%$l_&(xG3PS93W}pVKlI!b~?>`3yW&zvS5ufvh@Yv&%*oo~e3th8l=xMrxl0@*H~9
z0@<+v-`uhhl9R}p<nP9Yi(;0t=L7i^qKcf%bK{6Jrc}cN`HG$3HMNDv6NO~!Z-ElL
zQ2@whucfDRu`P>VbHeZ=*guBv{Zyfmfc?ID(<-`$(%Cca3)p196({9urb#XYq;mK4
z*UAqUW_S$99EzL0-g#;2u)Di^LLqhYHy=!_a#3!zER;5$F1VoAFPb+}5E4jHq$8L#
zhO`iZ6-GrsnHIw|;9IX~>!h)QvGW0{+?IVq2Wza+uT?gQHbW%U*u06$Mq|Y#)HPIX
zxLjwqYp{XGk&$*6?+NB^=&v95ojh{@4iJBS0Eg)&wZzFTe``C~npa<d3c={+rAt^P
z8*hH8lT__=1I2?iC}e$`1V4~e!unZp<nmIS`Q{2O+361$iI}aglTUa7g$_s*1#a5C
z@~s&>hJGcjSHR-s2rmqinre_KgeF|Kc5c%&n0Xd`s4??F;bFod#tejVs0WgP8%MW=
z{#S%{vDmLH$jAD=Gju^EDW<RP6@Oj4W%jDvpQ<C!N*Ndctvm;V3FK^~oRC-wLBUlw
z>cO|f56h_3fBJqzWYEW1iUIGiV8ZDJifM@bcbUc!#nj|D4~(NLKZsfpaQgl1X>tGZ
zcmAX;5G|1JO3U!`<!Rw^r7^0OH_{FuZMhNR0E?T;^jtgo_1?ljRW^k6bWGbu;t?3o
zONSR8qGX@XO{@?f0<WFA(?4bn8LA7rk^wz>5MFw6Dv8I%Xa%l6UPV2ffBPD>{tZQ;
z`5ECR?r3<R-kndRdC!^RB|!G<KAZ2j%g__cJF)X4z*JmAmHLO*%H$$f!9>DW2ZsEH
z(s&K;MD}wrC8o`13EVBt=u_T*+C#h1vuPwmy;>2FD@JAy{$9GjFKQ8Uv+)qIM^Ed^
z1ZKd85`tB)U;}rpM1PtV8(3bAw{}w-SZ9pao+}!S3T8bi(Rwlxp0t(zwhd=tWU8i9
z^#^||19!{<Eso~QWq7i(hS-aOZIj}j(AF^;ez0O2Fg>xZ?dg81BAm2$Aah3k!aBN6
zOEtEYPn>5Cc7v2b=!G)pW|D5fOCx$UZ3wpON}be8FE5J(q&Md}Nuvi_R~M`GI`P+e
z`jT@T{io>4V7gNZnU2#lkJI{W@*w=UrE1{f@y`9JgE>71+Y{11HzRW@8(k4qqk$}j
z-E}FyVhJUfe$3#hFF(=1rJ0mT%N@m*7_q>jE4#K~tAIAy58J-Ft_ZCK<|Wlpce-yU
zWzom)gs@YDwW)yMA#G%FtM749sF+uyL`ybm|42Tk;rMt{2f5}kL*dMghnuAxyWP)M
z-T;n>OYge9-3lnELCFYm$Vh6Lye%{PgLOZ`wf21_Xvc{OY_G;mOgs3t<M1j!)oc>a
z|LD4HfYy!P=Ygu${;9<1gu=-(pRpWgQyHSzV3H_Vvd<Swwd#S-@IKR3Z8qekmn<4_
zOq$Is>>q1=-sM|0wir`;T{Qe4y-SMN9@n_W(ZL{WX=gW(UZ=0R1d%gFp7X)-=Y}+l
zkWy_58`w4_>64M#h^$;yn|J5URO>98ETrp%)oIZo);O=4(M*sG+i-N#*35ZMDDnYZ
zu;U(W(hqAoTr~H?`;H4v>ULFEwsiqecS6{qED`+vK(`^5YzPH$&uj?sHbDl#VEIr}
z0i!aUsWi{)8tNuK$QaSawfF+*09$C8nAX=bb-l?J@F;<Y_SoU`IXbrjgY$N!FWpYH
zge9^ci`(&|6FhD+%VYDNVJFmp^#V3_77Pf=X0+~`<BeSE2XZJE5FsDof7vH4!YuSd
z>)4~AT~X^kAJUnu-Y`_=Ef^LOOCNtIZEo#?JD^>kQ^Y;A?n`Iv$UR@?*#blD0BK(v
zXjxsvNl3`B6&^=<<6OA2&t;~EcMv}a+vXjJaFum!KzsQoa91)bO3-i_!F3b8;XKDZ
zGb^t5!*U_|5?PP-OEF}*yhqI--}MDx#tDA$yi@*b7axebXIB00e5YQS_JM1KP2qFi
z;Ja?|O5=(U06zA@wtYz*?^O+{?8u&t%s^c0lhARUGE>vfqm5q=DW}H7;E6e*`@3V?
zx;R0XKWxR$??)ykKYiWR`}F7*ZM<hNC$n`Rnfx+W<I+4EL9{DdneVkigAb2dXyM%h
zmGi@g{{8fW_XGJAhwb15$cM(qa1N<4cQ)r49+z*jq`Vz&{y_j#OABbl$hR}Xwr}Ai
zk!uV9RXG;<^CH4&Cwf7VH5LvPbJJ1Nj_8I;6ZZ?b{kn-P^RH)<o_6Hew6pGmyYCnP
zG?f~|bDG|D3Q!^`ua_^)Nc{CxGu!?>z(wQe+_70C>um~y`%LjnpgQ7y`1?06@Y4Y6
z58m1T=~Pp-dG#1A<H`|?_IjV|!fFy5BK5Nv%8xQAw0<T}erbXhrerY>EEDaE75I}H
z0%}``i_ub4Xl>+LPwka|=5DW#`KNl9pHHc<Wx05|q?7f<g%rAW#;IYl1^5pO4_oX+
z<KEhphI$F5Kg9H&o}Nz5eoV-kyY%fHMmw#&Re_z7UUnt_G~k18u(;7s62GxKy_W02
zWoP*%qA}cUla^^a%Y*sF;~D>{RX+u`<Gdt`<*qBtg;MBg4yjJ$toxY-*=lT%8<pG1
z>O2GHB36V&>)}M3i64pUD1)Jy4%`xP%CPPYnhxl?Q~+iE7n_dZ3Xjy-9m=dwKN-uo
z;~0Y8h`&O#LDAAN)BKxKOWf}}r>qCBMZTVG>yHe%zMdgF@wIe{=9??gc+NJ)|E1xR
zl_G6&@{TMKkNo*%@6-OR<Dj=8J=;W@V|IF~Z8E4-!_3Mn#~tFn^%6SgPXUOCpS#^q
zK57vc)$DuiDk<fw;%MPCT<41tv{w#Rkz(8#K`+>F{x*$1IhHjkk?;;>@w`~!viF(*
zhD9MT#Bkef?BCYtM4!rKfP#rMbF9mzP?&85bqPU^4eosMC$I1f2?LE-V>6SGg*+$0
zFJmPxfWh^~ra~#NO?0iRck0)yZG5wot9h|OL!XJCIPv}I`q@7wY3BCDFe?LM;m(E9
zv=`d}sz94GJ+l>fo`0y=NJn5hn7*2}6jbRZkhmD<rl|Gwq1)ikLRU}!z&EUuhjrDU
zO`U7u2yf6H*gmg3elM;m)T%j9L^gFPeVIoGgZ(hy6<2B*ggR2khFi|x<}2wmXWf7{
zXQTRM`YSygIJ!<`B(Y%bJEPJ2sZ<tB7>G)Vbp1pM6y6MXavjM1xi03NtPptB(9#My
zCAVyfF+)s`1Ea*WV#+nksGR3i=`W<1@VL#(uED6Yf`T<lTyJM`9BwB2`0&B30%Fev
z^e55-TR1-Cnzrl4`ik4Hu5rKmvseIQ@3?J$2f!t{@T5svT;!>|7WE7(LCY|mKTw)@
zZ(P(j=kQcEwWpQBHC6>byU-C@j1I@aly_*G#*e4W04~c1@(~Vbw7fDxKPYQ`&$Ct_
zw%<Gym^tiR5iyrm&sjT>F%SQs-9oWC@sZ>1@#)(SgQ>O!rAue$ZOoN)K%8+Z-Bg08
z|NR(Sn4h)0veK)2UiD$pl!d4hbbJ#s#^_s`LyGCKhm_82ao=MG@vhYN;Y*W%h2&7X
zBH4NBoHX~cSy}Xx$k)u#nvQHPcl_U6D#7pnhHwj0$dDAUOoSN)EE;QD=WL5lU##!M
zYj$<V`engemgYl`Gj?SE(74WT-k(x<l`CQ=yXkO}uAT`bQK#g8N%p%~Kp=v6jDJwY
zY4vX^12TiC9A3_~UstT654EE9zb%K=eoMdW8mkQl58A#n&Sn1P8~MF&q@=sKoW{s3
zi~`xd;Yb5?PWtSst5TsgUYa9GcHOWR8$dlE<3f|gcz515*reF2{j-92=1{3CiyTG`
zZQJ=(TllJRLd)3g-$xuquUT7`4ijkSbFG>~{YJGg3hJdK&9=q;o#-<&V9i+Wk`%YV
z>@H}4z(e(l(fK}H+WQ;_VIoUnqZ4#nZ@&PS_$JD9>*7;JEt=+;b4|$CeFDL9o^TkA
zHgMe$OCoCOOy);hX#_k#40hOx8!<`cdrGVupOE1qz&JoWKRa}k;P%x}3SRoC7VlJu
zgkNRWd#yf?Ojn^%7$)KpIFp0{KU|*J`N8Yff?b-iEVz`V>sJ6DQ5(nvphEwW5a{vT
zE4a(oEty%8h--|HX-#e%q!q(=d}%_Nna3t48$i^!LW~IOk#&C6KvJ#1fbQ9)VD!FP
zdU)>wqtkh?hSNuyQ(K_D)=*uLCiS}~uwQpF`K<U<YLi2VUU8=C$Ze`ON6@N2MJ!)i
zwf_05@^8z!wEsGNZSDCT#YPY32Y0$^EfJRV@!0v>yA<FoAY6Fngu17rmR$mz$Ea_B
z;mr~k1E|@RwGX_!Bf3j!%Zu;~{U58I9x#`^0*2J@x?NoFUtQs1IUC8*2K>6Z<_%K9
zZ_77&sw=)KF(OHGjf-hr*Uf5Q+7}xX;>!>tJRk$h-3%yxNNSV3F7@e+F!>*xcvt{%
z{`4$8t?T@ORe)LT!8!WRx<X*XkqqE1?i06MjW2e4T_@5h4|okrU2Z3)){lwgvEm#&
zQ4i+2LP4{_1MZ4<?eSORO)~gWXR<O;z)<tCDmRHFLwxDC<<Cz>xgVbG&Jdaj+G#G$
zhjDHM&&14&{4xvB!tZ}u?eHZ?QAASg$b^CAsL1w*Hff+_@l*m3*>8bwFrim772S+B
z`Kwa7!;Cw;m%W*iJ;3rAxX#01>a+4UkV@c8CIF#xh-S3NULc^D_=5OCx$7R@_Nx(e
zKPq2mFk9OhBR7al%;^MlMmQrL*sTg<qENDf>Qm{H!za_V<^2RaEdANIWN8WN?1`!~
zhVQEHt(}mi6JSH{$tjW0!_IhMI0G>%HUR9$P&b0deuK^6kVkL|Fj6rN4H+k7liC05
zJu1^jnEGBtxXncLk9ELs0n#0%hnYoC!Z$~kLJ_cIhG2qd#NdaqWNrg8)~}N?XrPj|
zIQCCH#FO!bNx^ATuUtylQLpsC3?VX|w$<bqC_{>K7A)~xTR}a+7JF3$!9l`5P+mcC
zp3UZ8P(gbIj@5a5_$HJE4CXyI1C+jb?taeG{XpG)iUnB66=KnSJya^ZwAbzaTT)|s
z9(*G!4-=&^g%Uy=<eNOReyr|g0eAy27~wZBD~L#|991NGBA6rvDr2eHg-CM*x>c{)
zOi-wLG*iuaP7_fokkkuala_X+SJ__f9rWpx!vhWlpZvp67Fi!A!JKr~1%ftN3jqNl
zWW_v0g_x`g4`QFQLp*1iysx`l-Rb3tm4OzC&ghA9>z;&LgPO_0Ps!4$k3UD8yoPEM
z!V&0fPB5yc>2mY`CSvC?W%Sh{RS{(3r7q|Wyz<&Pb;>^%|4WK`l@-VDL$}@>0Vb~N
zQ!gs}f2(r_>{=k#xWLfIK{%M%Iuq05r>Zk-rZ+oX>lSnmkyhHJR_iw}WBvHFW_@if
zfK=L2^^B>MlRzL~fv&3Ij}S81s%0TT<7#}n+khpbPn}7z9bp^qTH#!nnlK3s=y+DH
zNZOffc|<?dQ1v`3Mn}6qzy8NqM`2tTy83+7)TId@uYR2a;9)^gtXU>Q+e}~#a$xTN
z>tM_nIcb;hWTepF?tT5!-zLZU3EX@5(1_JAsr17wwrhw}P3O#3BV?t0>mKHo`0}Pm
zamMwH@Bd8uJ+e2tSeQ*2U%LEClJkE#^K~IJZ_&N(dGRS-9kQ;pbOSrz88eyLS>C@J
zcRh43?noq94us5+MqeXM@^ZA$o5T{g5MJqcr7I$w9*}|AdH}3~%h{q+7+1CLQv#>j
zU`Cu)57x)C+@ab`MxBMXHgYog{^=8_%CtA4;r-$u%ZN564!P-zUdp8W4><7=NZ7pr
zo+uZr0d~%I$1qq4$9GVoHXKf{_10(YY+AfVy=qPN6y8KBtCn1%wqr2s0wcz&m!kop
zl0~ZgWh^SQBC=s8@}BC<Ybj}CWX+?5FYBv}U0Wud$veRNss^>3%+gF0@+l<3EI6t{
z`W~Xx$ha$J-X}rZqe*15dX`N#ye&WE_)b|YN>T3d4km~N><`cA)si6PB3~;3NKVzD
zbEAyy4ZYFD-(}Xj7<zp7{rN%&mR4Fc-{~e#&AC{W-WjLqy5|;17tL9!2^c9%p)^?P
z&bW&Y+DLvmHb(P{!x6J!B(rs29?bnq1REDj{qUnj_-wSgHdT3=5{}_v>er;XoXc7k
zDP3SKx3j<lwXEqUzg>-&0=Qn+rTH%82j0l)PPJvT>5q2uSB-97B7Sf|%IIW6u4Q8q
zYuhI8f>7r7Be@6N?&~Ph^f4^9Wn-IgXVcN9ol0to-0*R^4BY+VCx}Y9Wg%?~zpOxS
z4Kowp$anMn8Tk4Gols{UA~IzC>nr~dOd&w&UNN~gqrHCREAAD1a)1QT+RL95p<-(-
zo>82I-i4h-&0Z%xlPe$fB8oFbY8#Ba{6j?)vdHZd2_CDGRoD#}5=x;j@urg00uk|m
z*2o-@WjYe@AK>;gK%b#S^E<mW#gls`>Z+q(%Z(1NAp+bGdpuW}DS$B|xemRiN&U-z
z6qWWhC^h!WjqdN}YulN{vcUq)11J3d>+Y<-qWZ$Ne}-0alx_#VgyhhYDg%fLh)TCZ
zNOwt#zyJe?C>@Fl(nvRhC?O>xT_Ye!OAbBn_F3!wAD;EFjGve|=j^@j_*~aL@vH9N
z*7rYD%KyFp3>fY7{dgIS(lOm8H}v##O-6#*oKWwf5^A7vSka%!wY`ZNC4ZnF?8`(z
zll)b{+y1+D2eyKs_J>#3H?N3Iu!Ke_bM!|3<D0hY+iVBJMP)m&lz?Sd?`>-Pt&U0{
z3n5v5S2R|(dR}r7RI2P5)t!u;SgvYSy6Y2`Ut=*?%29)nqvA&Kf7jUG<qD$sm(7g5
z=CzKN42Gnd@IU$`mrLn?y0_^z7M&z(qGEA2P8*-EYFYF={bBRQ65Q&@VAj`V{D|Z*
zFl2aJk9(QvzlX8i={OeseHrDsKU)i5i;h1RZT1SY5t}y`nN$du{P!Q4jc`$s8A!^(
zws6W;(dsBV(sSDBgKPKcW~oo%(`2nhVivEn;YkD%Q&<~k<jn;h+%S$=xCG6&@DO;t
z&xO~O&$rTiMdQeE7tl3oa5(;o{@DUmh8yiE6DE9>ir(Y$-%ZbkW><h{im9a{?Nq)~
zu9Kg(>SsC(=f{IOPpDi)Epq9C{+UzL*;fyxS=NWppS88({tOwXYQlHjuxR%|tRXGf
z9Ga0!@8s1_CcziB7!77aKgM4~_i5ucCp<&V`yY;|iuy$xLLrDJlvaHDXA^f6*7b1Y
z9-OHFdo<SS`nT=ffbdPYA?%GPMSK|P#moHn{Bx0Zajy>(%q(0xu1A$t6)7SRTcov*
zM$*H$Zk}AgT|QrH3U4y7BBVsRgqy}A{a)YsKmfk^=lU~mWtE-{|E^^#=8Em!#L4TQ
z{ic#*D=j;q#Y=$`%zGPnGnU0y=&Iupcr-#%OMgm%H>Uffw8n{49HqC{1$y&4PZ^V9
zCYI07L!Jf(YOeqM31AVsjQlNbhZ93~M5S`Yr=o*|_RSo~6*|~EvEVwySjrjdpLF6u
z-vF~ty;r!pFTC<1dSS_838E~2kEXDQ0GkuOrjx{~9l$3FuE%>Te8?we;h9!5q=)?%
zHzvj_@3e@b-e0cgHsX*g^q-=V3+^G7I!fL=7cH)4;Y0V)n0tVdwp7^KznRo6_JcXf
zSzX4mI`81K%n&vB>?FO;P`ae;w@IA5RbvsIWXKnfY0!+(1NM}|ps6bM;P)J2rws>s
zlOY8lrpu`L=lC=T3Ecojt8Ex1`ky5=V**-|P9M*~1ocefcySRyY%QSlA{M#EimvQQ
ztRtZp>uT*hO5Ey83HaJZG7XaK1EA&|6!)?><lJgkuA-@^<o7Eh-$ZtvGEz~z__r06
zNSD+k$0H?3*j&LZa#J>R8R@9a?-st))KquRdExNOVGf|W{-4&%Va@L<szS;kU%ed~
zx)+JGd4jP94!TiG>Q;SuNkD7YNaxO0VZqt81t82ExT^xYxeF3UE#6!~A@QU8bsda{
zG#?BRqEnbFd{|IsD8igbDod{7jGbTr4cowXm}asdUHz@z$++%<uwah_J;pe7KExn&
zHacku<O;iQ6@U|EZODnCxpXteIX<~Z4qW4iu`K1lgB=am_tF*-nUcJ_<sF`XJ`1Pa
z4f9!$R{@Y<68lLCoBmlDCX2GU;nB60PvEjP=OwcFyIi8-E%#U#^`6h8=#(N!WV9l2
zubhayB0y?Mj$Jg9QaGI}jv30DOIv99Ggdo#D;e3TdxLmapWd7)nB%|lk1uv#<J8WH
zKj5sYlL&+|Zt=t2)J>1yuMKN=FPfG)Rlj?;U@zZ(nzG~rng?5QomX*o`w_;00H9R6
zA$NmOodbF~wZ4H1DNV4&9zDmbe<+fxT}wnRDG|5zS!$iIJ;LRYzo*R)nnqhjQ$&n+
z@bw-GkcH9f&P4fyIq)m6zrfC{!eRfaLFF$aeGrQWhJmDLk=XeY3pT&Mj`Qi_ZOb%o
zXhgVf?ru9c4(6Q3D~emR&zR_8jp(F0?|orYWq?2OiS$`*V<fdIE$uZ$>?q(ZvZm{E
zi$KM1O225BR_&W`Qz-fyv#Im;dUL>T3dtE@6-3rTP>a%ducF3HhJFpHvdj~rR9XFe
z>=N3AY?`#>wP}#zUzO=9Iy@D80;ON&bq&^3t?&Z5g!bSJ1p|-oPWetx$^|a08o!f|
zHYDpz(=4qSgg6qzx8E@x9gXQbDskHB<U_1Z8<@NoozJAT6R~-JUJAylkevG4a>@Oj
zL;%)~BK4A^^w{S~;g90w@)m0W!l__msl*&4$GDlM2VcN~X;7|cXa4Xx8fWR}+vtYB
zHJY<zr&9%&yUV98)n6Qk4jYR?J_MY|C+JFaI)jnc2IU6D#$WXK@#;m6UM_7e*drx(
zYtF8z|Fk5ZkXWKd|8|{J6}7HD57rNU&Ln@N8hEh2Y`A!?8H>y(O-*t8u6R#B^y{@h
z?`jVCb7j}r@n^y(hyTu?C-{ANHoNyFb$JRqcW&tual&}zb4PXk0@(0QFBS=kpKBgt
z(>n^eWWX>iCm`R}dobF!WN<v+E|XlOa#R)-ddM!A_=~17^ufqR25143hVlIH54G2`
zR@EQTwV_^+&#}X%Z)V4su-^c+Zh5B-_4<#|`UZiv#@j3DhDOI?`YjQL169=<S=geo
zJwMa8_dl9AiNj<qCc5aFeZdX@)k};HnNjoY(6A)Y!={c7LZ7_OrvS;mI4k<t-QaIP
zkwV?qR=UpLcEt8}N*<*UKw-_uxBoKGZ;=_>jHrKgSGQ=DssE(M$gO4nIOv2gtOs{i
zbR5Urr3^pJ1n`S#ZB>o$*;TElC>yj{&SxEbyFu_^_Ofhe8vTNj!aqUnWQ~g*=-n-V
z$;w?}Bks3$fbY5+-%KljS8p+@cb$0Dj!xQph|g_ny`-tEi0cliSK!Pyx4iq|2RjJ3
zfF@)FpuF~|#YfXucbxTu0Rmjdj;4de3`YtUz))PmlsEoX^j#bZ-A7t<+{Ap6%c-u%
z#%hmk_{TK16eg<U{|)$TQBGLch)445E$o&Cwcj8r`ho-o0C}n!3a<WJ9PkO|aW;V)
zvu}z*-A2HuXJssMYiFu|j9Cnb4+LFbo?RrKJ;bfnD@a9jUa6b8DgQPQ>2B-gp=5cw
zY13BrHWWPm-K@KGP8<3l%^a<s8N%%rO)j+MqB7^qPzhf@T)rK?mAtz{=n7D-5O#eQ
z(2NOE+U3-G`Ti&b#6le&m#O>`=X2z_<&tLpnu1<td3OE^!J`uciV9<<E(Sc)6#t(E
z0O<`OYMfxQ_&F&aIY+8O@9c_AC##ONI+WE(Zzv1^Y<F1G=BLqfu%~B6%lrN_ud*`L
zS1uS)aN0P~digV69H4G?SmTdHhR3o3q@%1}p}?+Bp)I)Z(W*q}J@WG^fYn$Ig6Y0r
zhE!EF*Hdm36on+qTA!acHp&Upwe1clW`f||-R}($i-A`cwo-N4_yGW1w@KM1;sd;g
zerfeuV&cyDaNSv;Z?k-Y9~l_sx(k9A^5y=gJ}<B89xklAmd;3m@#@v%TcC>4;;~f-
zWCva#_HPd2P)*R()bln4P*ricu&2^}AcwC7fxpvk2q30Wr~wj-6`6bxuHb8MPu^K-
z7G2(m$aIbcXJ_r*Y|yC>`A1{t)cFBs@OGKt>pN2%XnuhIyq+Tz-UtOmrw`!uEIqpz
zt@y0q5?&ZYv&`JN{Int{Mdi%QbJmjj_Ls*k98!T<r%Tv*0<zsC5Xj}!da9d9w{|Rv
zg++!nAk94){AII2<^M=u(<C|pB#T;+x2&(}%oc-q74L0LeJZ5&@?>{oK|UcKS>Cmx
zJ6Qd~9-dT33M>=aZT02Z1=d1`%ZU<y&z;vn4;C1o`1y+ml+-)~3!Z)&^aQ<VTT|7)
zK*WWUqq?Q$QxM&42*%O@V3TO;=C-wf*wL|avkEV=c%?|VpvTB%kJU1c@^FhRIye%H
zaHvO~))bw8(4Ue|P<?_ug3dD@j%?LFsY*uLo!pycN;>oKTv`C#ju!AE`ujsxN&Od-
zlVXG8oK*F#2O}IA2@TmuyC!LEdl1SUX-}<_I+qnKpKRG$Jp(ZfkPd~*o0swwBr0cE
z@^pC!h#dzB6N4{G^sqi83&}4!>aJREJ*6sJns3vWaq!v9;o;Z%A!2Vg)+4T8V{1;X
ze|G)5*av;Jiw&)<trNg_p-U1K+;@I{UV1T-1UJzy6b=@3AFC930qH^EDsMpG)%BV&
z3oam|Iq0ds9;Udn)@*U)*%}h<moUY0IP^J^nngQH{u4(*pxFHpgI(SeQn!JS-6RA;
zabb0#IV*of9I$C^8B?$BeAot8>Pcy^Yx^_uG0WinkqI-kv&NmlE*E#1>l~G3{@E&0
zHZvPs^^^Pd0%q@Z!HoxMs5Zihzwp4yHC2Hs_J0lW^x4A;R(gury={KXq+B&7+JR4>
zayty!x#6{oo(=(>od8SR*NCt&m|yj`B0zM=6YnYMqwGGeK3Q@m35UaJc*B{VVWyu2
zHs~J2<y=al4%^lD33uCdunNrzOZjvg^)%c|U#6Thh+^CTwM0_I_V-nA<D+Xn;9g{^
zFKSHpJ{EOP9>UduqKhn}NV;n1c>*v*r-;4ZauwlMxEh@?nL~M+ZzeXU4`jldtE(Ag
zS!C;Dz$s8pA~n2XSOvb~h{iPxC2FSl+J@6BDg}M47VzLKC(lA-CoYa7>sE&mp6`md
zXI!VIRdlw=6Y2(20@~$v)&Q{3O!gh9Y0B;kPKR;Ki7c;EBX;cZZ`SKb0j_@o|5-3r
zcAuMH^H85isRV#8m^|~LcHtuvtT47Y*{v!{SC6c07H8^ZZt+Mg)vsdooRuZ}dCkEe
z`2=y`w`bbm%Y|dZ4I?U9+!3U+(oL|yr{yUN;q)iFDv}8c)Y0kCAh0+}T__vsyF70V
zzkXGGI<DJ3IFl>9<cXO!N`)?{Rtr+%GFpE)mlY`<OVP)Y6bcM28fr1!YvJX;KEDz;
z{Mw}r(5bcjR-L_tAyGtAp`&@2T&Ee0^Gaka&(gt}iWz1lm#tg#sn|gBM^`M^I^4iF
zyE+|gG^Z4s*D>sUgCN)0OqrJB<mHe*{cCl_Mg*qvC$i=2tP=3Jz`8g6SAcR@P3_HJ
z+QWO<ho7f5usnQkt}iMPTe7=!Xu&i^u2fO^#TRO^fD9^_FKg=o6mzP&4gX&@eJ+=c
zl#VdIdHGg;t(Q%iqg=LFxaV82HByJqrQG?D`cCj$PK}Y1<)x&#kB+jND7|VriOc>0
z{~~%QQIZ5@qoDIE_fK0JPwG@H(=v(S{4UI`qMd>RpX0&-YK#}erbDDcTYfUp!H?KB
zxs6rpob9ucs@kDnbM#vjvwv7b{{|~D`Dweo;K;p)Y%Dbs+j>wu!cS_?fu5z77PRr;
z6qv1Ou&ot|&Eo@sh$@iZUum#S4&wnB`HsfPjE4>c(zf@Qtb(E=dm|0nR0<~^{g(Xx
z!++#GUS9(U$VlhNR4$gltgzQLP@9G$jptH8-82&Sb$+D2r)s-(rV8N+jv4tjAf2&;
zios`YMJ~<Y9)*D-xEs8|LO*dwVN|`LKOsNf5JVib;+Zxp?hHwy?*tXl0P&?i+RBzs
zKfnLR6+(7qkt<z85J_4wV=GwXuKFUFGA<J;qN*1fyEV9WI<GRfmEv%F7fe)iFAUf@
zDUIn~s#l;;%fG^ur0uTmyQcoBW!l<9a`rk-sC8qse$G`q=y?>90-he5Io;3UuR<2Y
zb;vCyvo;EOaL%pK>POGVp5|;2yx1))_UGDush(4%W+)7fc{IB4^Qej==JZCSswngr
zgQ*U+J{g@x`C($WD=RVgU;{y`g$1snj2G+X9dp8^O0vK<ODAL3@%M3qnSs-WB3l|`
z=si^Hi(Gl8Br&s#lUzOODwyt^dclZen;PVjJAMJ-siHGD%Oz*8FU%a*mD>9|7-(yT
zk90uZ8JAo)izjTCgVI7lP3?q{r`X>oU`+cH0KMsEaiDcb^1(C2_IJXJSlcmnYIQ?a
znO0!TGQ&xZO~5H619o;aZfD-lTBYC~uo>9eeZYxNpKg><w_)D&<DnGU*Kz9l=s0tm
zrEB}<Ia*DW$L-Y=l|Lr{!$In!KJ7Ln>+_INorToD<8iBMZfE?)QiJUlnB}H11$2fc
zYq!ll`BP61RUey$fkNgDVE)o8KE-iZi?r(mQ{>&qR9>ugbb<qFZWVynW4y`~a+=Zz
zQjDK}MnY5T7$sPgRlvDJG2dbGZxeXzG;YKr<A5tmwF+rrvMwoqOm}I$ll3Kc0+`*o
z)Cc5%K%+1N22+Ym2kE3JY}mpZT8#a~Uf)RuC@miQGM(=D6=u4)sEHlz+oA0A#zSXG
zD65RZH7K`h#Jw+f%{rtl(5Cld<Y((yt08B3qI5+lXO}*@Pt?DI=Wh^am*wW#+R657
zLknY`TyTypgE;rU&NM!cfv@4XwB0*mCQSaX=Xs_TNfYs8eSk}-EMJJ^6IZezTX!kd
z2A1k^cT>e?zHH4)IBkqIH8%1u`8H9>9i8anXGSOej<ZhIH1xb%FwIm!<0dD{P8*Fk
z`dNoJ1gdu4*K1W|h9v^!O?HiMU>4;$rTQY#B<qL2(*`O`FqR*WxCF2;zXh^+dLyU7
z&)VIC@T84hj<{}1P;ycpog68t0kGND=N2r7l7Ghb*O<i0(bi#;6N4<s`>GC35ddNj
zqb4m-3a57$Nk=Eu0L(pP5-2i2T1v8>&Zc@W<2UyaoCR^0=a&H~R6XiySGv)0A-6wg
zYwbiO2b-DILT=yJZDEtX-3yo(^3lnMKs&zdape;cnCj#LnVjX97EY<-aC)H6svUKG
znZFVxZ~LpiGtF+0;QaVF2!GFMBiG|%R|}4N{-V(!g$k$okPW^U2MSH^wwDB<Xj7*^
z7Eu%F0h2%hSI>Q4cu#Z`H?(eDdKNBDqC9JILQ>IGbFdCz<Hg(>@ddK5InqEAr@4g8
z(;2Gq8X%5T=Zl8rLi}JIl)0VcKmIg+8fY`bYWh8GnTdiwB=aGWbBm}tQ{8-p;KQh{
ziO2FJid!`Mb7j3~^U27k?feRaKm_oY+^l{-4*232fWlXoNhVA4!>k=Cu5n<=XAT+$
z$0}Qk@fsaT>aT-p90FtqmOrg~zv{IGM4kee-@|~$UT4aJ4-~{Y;y5N$$iEK{KcwH^
zTwgbvqRtfuZG+0ugsRaT9kGknuWZ`f)HY&)_NMaono|22P80AX|5Xrwe7IXh5>rfi
z^1AnTkoSX>r`9qQSZ6f^!Vph}2Uk6!q(<=<_R1F9<Elo4BbIH;D^~Xa-OTRJM;a*u
zPrB&(13!493t*RgP8%J#q0z|&`s<4?Zk285*Pm3_mCh)~=7Fr{!St@J;tfFMF0}Xn
z6Uc4Q`??GKS1b!3r5yjsIaL&x+TujtyD%=PJsb?Mk}hK>)eFrtCV>?gvzJaeY~5lw
zZL-n^kXO)#$9W9)dVxcvzyy0&)fMq%>Pn~B|1<zp2BbUhk&c2fO-}g5p8{;|IFdI;
zBIi66mjMvb1QSzuEL4$+QJSb8l*7+v&p~wHnR6&rrX454s4qUoBb;2mnw-#?v1j-N
z@O|aQB2!5zInXKzQZJ_|Ie`}=i&uZ=kco;v+p@F|b%md$<s5T;%w~L;IckygsPjBH
zR8ogKEi7~KPFS$IP4}%PL?yqD-(;;p@dj|4kPkUM)XAdpqKMZ>@>RkPYZE$q9&6f|
zSB<(3kBt$yfP8HX+Wg$(tb6>b+wEYvLEL>71cPIP8*<6!+#P3~V=%(831)vE*p+Vi
zgOLH^cL3;&1A4q>Kpr>({^eymCsp&1$%%&3hM&h!v88xwZ*Fgm@eEOwiKQmUD>Rfy
zhVDxaF;kHPE{g%UC?N5lCxDn#6n?Zw)mh5rRx{c;I;qJbW+vZbqIlWfKYgX8O=>ny
zie#3=+dp_ub~@PIh_kD_x29jIIA?!Nw|ca--NY2C)gG?CQh6Jdu%}1v_E+KSC=R?#
zlSiTV&d{p(iO<ycZuWCxzIJaN`DoSXlJXGCFyn@E=?u>@MvmJrR0n1O6tNr+Zh)as
zk>H__cG}oFBP@VNL<m$luWJuS?df->op?N*j{&~hq|+^x#WlWl`Mz^~Dzf`$K(4nW
zq#yjkwj=DWvIqjS9{p*=^4_?dT}Jm!g`4gSP0v^In2|Xb6?L*ofq49q%PJNdR(D7&
zU48g7Ob2~WIsRp0Mf)1V7KsipgBOKbABiw$I$4R>G7g1}P7cmXJ}Yr64XzuDkp)Sl
z^>md~d4a8-%tBR20Q3A3>*i<mp))dn$-pS!95EGs1`sr`lgV_JMD^aNc(_|N!|DZ!
z>L@GExA(^xQ)Q2Rtx`d%R|CFWDfp^a*_Ul}YDPnZ>N3@(`zig^mYC|VC!!5NY9XjS
zoa`~!=@xst>TT&DVOek5%R10$V>wI)Wb9k(>p>4&x52=nAGhARyZEw4P$)34k^+L=
z>R!ykAzHrpIJhW7PURkBk^{~Q99@yp+5|Gf=k9<7)k|I5?BH?&dB`#1>n?jDZn}#n
zG-y(^Zu$YK%GT>=UhW1v4q7j5u8ed1V#I>bXO&rxnLu)4SUl1dbYkw2{CjQlQx|t6
zhim;6;;kB*=K7u7(2}CA<H1nIqyTxh7yV&Vry+@7wW(%{iqbWE@*WgcCnD`+RV3eX
zjZSujX#*!J`MSdgfcbTACY-Xz#<`1s*a9_cK2y-!Jnu}CygnEuGivm1tnMtoC?{pA
z%LaZ#USY}El?$Bv;&{8sB3OW!&9ug(w7}*POVJFkJUAPsz?=4N{{m$mUp-bKch8qs
zSq`@fV(<F-JmBogxEdo}V@a}}5_zTritDMseDuU4dA$yH^3Uo%0DliY!62`J(&3Km
z8jiVsWm6g9DfHLFX+!!H6$rF9;G0kFas+(Xf}#6DK-|6H{)~*qOf?kjGL-v!drH)&
z=BQbcAimhV%qLlkD%1sJ&WfP7ZttGNt~@LJaWlBAXfTdCxs|Q}H%rX`!~!%_2e~Xs
z6QDIuR2gt<(rrp+N&UP+WA<@JBV>C5H-Ye!Q@KO=rl4T?3f{~OKmF09G{oyW9i=}I
zd?wmCJa7KBP=Jd(C*Ciab$nZMz&D#*elvWJK)6cLsY1f-bU82dLrpCZkZ6J2c#H6C
znFov9_%YSO4SwBET6C~%xEvT4#1k6M-Hf}BXWcE?&^v?gmk!|Wp&9u+zzb107q&@=
zYVs<n+1M$jGxZ({R1aae4Y=eeAg%BITxUpn-Wm2-a{9=ot5Ja`X`bN2=}HrtY6)`0
z{aV4;a}c1v1zv?yxObrHpgA45tXh;Y*x#H`|I<UfaMfk@ek*;2g|4oy*ZA2R50Te*
zGUu^vx}=ogHj2bAd_`0;`ekndvZ4#j&}x=cf~Ed&r6isupJ!L+*7)`_qdkNND%k$D
zNr{MNxf3;<4xk{!VSc1M3`vbaR}pPrdrFQPZFNaFV!LZ_nt+i3p#Jb#<2^cmXdKj+
z*<E)WOs8!cAjQK*S7S2VYZ~8#p2LD7*YuOt*BGNLmUGb}g>2@n+q$*ugSe1?1L~Ol
zalO@+q;`1QXP~84`b%IYgmOJQ?z!?klrQW6thny0T4Ha>i;jbU8~*_KIJC*0VAW-U
z$$&594yy0|R^#kSAKMdZcvWn8=%Hx0J^z&mCQ5Dgh8+EZMkjZH3+E6I@rsopJe`&k
z>_4jQc*I~asQ{=(p<C`718V}HLnL0)9u|k8^t|=F9yuknEqSjF23#)#oSz)~&)=p_
z8*J$jFB0(<u~L(#+WJ-1=r%Zu^{o2I+T*JzTqIb~@bAO(@)pjZmr&!&67*AC$3j6{
zfA@Hw?p$GNkabsuq9Doqaj7y;*mAthnO|kjMTia3yI*d22uf<AA;y}VT`b^1(eVZ0
z@~pJ>7N22LR{^m{@<z}Nl&}7Xh`Ebhg=0da8;;ehaH~UBWn<(W?lr=bjZ{MxfCbD8
zn<G6qI#C-i{sYW-MT3BSIF}e0riewdfKsKUw3l5A*<;yUg6Hr-dh-2<*?(jWIGi;H
zeo@)eG}o$oA8*cA8ZmaW2|x0wGT>rP;=$YCL*+`p<xM6~{FU7GJDsbPc;$fzH29$1
zi~E2r8k<^MU48oS?frjZKR2Kt1D7$i@K1xBhAQQH=2b0bpC9n9?LjPZM)X*YarfUS
zFQAwfGzANkzcbB`q>WuQ<a8F8w*=HCmd64IWEJo*X9A6n;aqN&yG9ZHU>cGyQ*BZc
z5cIorgCqpZ-9)9IS@*c$#vF&lg8`+1grZ48n|^_A!R0&(&+xm?i~oF$M$89VGzbdb
zeio*5pLeg&2hT5<+Rj3;@b?A|+4rUoK|%I>uWVQg|Cqr7k4WZyMsoAU&8Th$-sfbu
zZwt&6IJT%B%oeQuEc80|ZoEt}M%Z1^^)uX<tZ}k%D(|tk7?*tg;3TVsFndw}ny(mG
zR!^TFyQ$Q*{>V;NH_nFCbW7;IM?&~-Z(v(N2BB<tjItl-X)wh@;2!}T8R7?T7(2QQ
zuxsGbL|e67x`FbMxs)eZ^!e!k$!K!ks7%;7FIDRTmWP}8sjdGfi&}`=s-MF11gp{s
z09ZQKFkq2&9Xhw-bLBlAPc9S+2`xk~yKVYS#^L86-UanYU}Hm3@*g)?8(VI*iGs1m
zlAZKW7{)&wcr0Df=7GL=nEB<zbmt0_fV55;#B03h)4xiqwY|HS6jWVKpF}|Yac4|c
zk;&AOy)EvkOiy8W7Jhw7yV>S|UaRu$i;w^t`bW~GbBjaIOtg9SyD!t^5bDw8$x84x
z0OI;jUX67%G8@7bBX(#7o{v)?65B5h%D`l4)OeU=6pL9wiLB%XIZgiM`XA!FdBM}U
z1?|_aZ^0Z_%2$<kGbHexexAOO80pMvTIl;&+xdR;ZOUgYZi$0G@B2fz$2QH{m0+Q|
z2_gDvLT7Iz&uoRZGizAlL&_*$q{dQJXuK6OYM%&{ccI6>^lUIorA|$kiwe}ePF@ui
zcbg=vqxbjD?N`0<o2X^bl{LBDa<_f&+N6YcwUuPaTY`hzse3S?UB{m$?y44DkFm4Y
z0}N-H#W*0cG*Jch8wwCF)w33BzJc?X8$foKjqFs7MoZ8%2v9vUcE9q6|6#{BR<Hk@
zzf9F?z2wZO3doi-t})q9(x$zWi8*8p;A`NI%@h1KMOAzK2Mk|XJpcohr7aSiYtzP3
zi<zOgd872!$q@_0o0si1YAf(6IJo?%aElv@Y>U(`{<(f4C)IQVwdo#8_nX`ts<?-;
zV@Eqmx&YJ=GKzUs5Cq;zuZchV!I%dd#4yzMhTM1|dWxmNrH;CzqR^{g_a3FB4=vuN
z69h*a4_(CF+c*|afTG_2DA3eyK;x^~xa$mX>}q^KwCj4c^nFiN*s$!hXbVy9p`b9S
z{^8`$??!fpV#D&B*`i$86{$|nY>OKx`-VB99F1DJsY*_Rq4F%eFvI*NYV7Gk^x8rQ
zwDL+1ROg5!!s4?Il0IZWWy)M4#`-?xNLo`W8%BlNyE3kI!h_RT+k_yX#<}FYDE*AL
z+ozc%4ER%@toZ{lH)c-(-Dl_}3!C)1rP9fv%31u2Q|)0VJ&v2m4lgV=Peurv>O^UO
zs^;DI5~Yxmdw;>oCr{`7j|IuyiQk^r2h~FF{!?k3r2kfj!1T?D{NCWZ-K(XMx2{k?
zh)HFtXFk`C&A}14F!F3KkmZJ?garOw>r{*#l@;{_Pl{|v{kHk~dC)c6)4x<<;NCC5
zOf9=bbCb0M5;>O#4ZI}x72(^1L+kPhYn*Wc_tI`an_k5HIf2E4%y^m=Xmv^O$06Zp
z*=tnj8)k@#!M1w)vvyUF`qn2Jc*8?f<?O|<liQbuXdsJ^2#6HKGd)ZB1fEyTKlK}$
zC@UOK54UN*K0&6<$z;kFo0EVq5cT2@FV(=~Ox3YuT;v4v#mF8gWTa?8f`;I^;;k@K
zYccM4-I^a^E@nPPuu{%du9|UE4L@{{?x9_;r<OI7cJpj>piTA(d%;I`V*Bcvpx#uN
zpc@eu_8alzMP>Jp>%}|@TRqf#mNf)@mdMgYP2p51xw^}v86YFL`jA@sZ(S09E+OD`
z6%>MRy=U47nopu$!4mhtQS$-x+WCKujci*tkCDhDwJy1FPQzq>9xzsiVfbUHFoKzA
zCe+WA@2_TP3^10jEC9QjT7Q2#ror_3GhTllxXY}~{*1drx@50kyA{>Wo$(8iHeFnP
z>-Af$CzXV$D-{$)&w59iM4g{Tk%9&ji|n*~un6%QVtJoHP|aRhPOQ+#9*pfK?K_nZ
zdxu5FnR3lhAUYX7*T;1q(XDoz%E@*HF+>caX9J6c?Tzn(2dy}|GJUG6e?aYf@}ZB|
zf76dux#8ED-o=HUXR`VC&?VIl7|V&B>3%W_;(LAZU2Af?Us_b4aBN<4p#1cit3pG|
zS7y^7?$HK&Jisko(s*@V+W0b%Y4XkeD%=bdM)+Mns!re4s&c!h<|vG4q=}FbFu4h(
zfbFmGaTj7ox<n);#i4}{r{zq&e(Y9aB~j_uy=BE3l?flZH72VS-}a(F;QikOp^2XL
zco^@)pyrEs;NdDf)ieGH9n9OcY{li~IG}{^{Ca=El5(|d5cK*a@QXxWo2ZcrJyuO#
zzEtPxA&n>esWcHxeP+^~@7Fkg0zuesD^tVmhpZwBM7Xqoc>ja!=nw|7llIk4+U2~u
zy1K-4${%Ye8*=yz=GeQsc)D={wZ}JSfs&>8s|VSGxa3-f1Xv`r_U{Vm`Ltd(-rDfs
z6sLcNcR(Dw>+H%@Se!YWdW`iGV!CN73au%vXqnp%n)+4<bmLDxdQ=R?m-}qH$Xz)X
zo%^nyu|W#~5!7090F$$0^B_sR5KYHdgrrRQ?Ze}%%V&5Z5SZnAk@f}AmdvjEbkact
z&`_p-hSQXg{_|ybdC>A}-v&GD1ze?IY)EF+z}n`FSjY2FzfRT?-9v7$iFQ0dbIPD>
zSp1H;zp>}VBau?M<W}79nuRAzxt|EKPT#LSw$RhSvfp^M4?5fv$7Hk&L)6`j>+Fun
z%xh6n@Vmg<W95dI?(Y7tNJUuu;FRZh^)2mq9&1+S>`RigqY?JF`0~=v_ir);nWI!F
zt=!~d4r!dZZ$oJ*JC3to;*l)yfCx-ccN+!A;~!-sG7blCKQOP2vLQ@Jg|Cht({)IA
z58XqqU#FYat@0BAzO4=JYo&Gh0&|z8x$SdaOuYh=gUhUz1ZL7&QK++uTj!##&ZIXP
zrbz-_n@Rkwi&c`ZZhsw)c3FS&Hu@4f`HpXH(-_qJJj+WBmDr$4#QY3)z!nO#ZwmY@
z9#Jyou#oNaCwCGGD=@p(RE+uK{GNsv@k0&8o2UM7#O%x>ic$RxQu*4~)iFvnaTzBi
zc4jh(scbbKv#-S*Q#IZ|t<u0efTYB!V#VL=n<7!=sW^LXoJ@sAOd|teI5_ftYhdQw
zB4CmoFP0yoOlppnnb#QA^<Me$t9{c3j5s2LYAq|N?%b!kFqL7*-Mi?mzXPjF1ZQ+w
zGcvHDf%;%7=k*W|Jc+`dLl=mIH5thK;6pkU1N0WII%_EH!PiJTAJ!=0m_Ji}r!M1+
zk3&dQ#Ls?adT`3U|DqLBd~=}g%tgO`tmjw5@uncFYM89g4MTA^?viz)+<1)l&9#P)
zSAHA9CrWDii(7*k)r-@Q93e|Yp19<$d4<o^CVyT{1>xs}HLFE*yZ4ugygi@#pHBUq
zikB*v`7=j_`i&6ZG$alK>o%`@rNcK#Av^AGPloNW0heXf+J&L}*Iv_6fmHx02F+j2
zrQ!}b#0w@SK@dxZ7V73BYt?)$NI~6(=qyl{nB5^U9f2w1&D+5INtdA60v0q<69+C8
zWy=bFO|m(WuyMZ>L(VwX5{`PEBtGpYvTna9$74GEmg69UE|2whuB(pf36r>IRvZNT
zO+Rhaag;xEG&$Us_Gzr-wU02AvG{XqBM@h>=S@5suUK>TP_+~!d>i6U<!d-zE#7^8
zlL#Yv4K%|489{tK+FdU1U7Rkkj^`n;q8)#2iRGSBv)%$nO`Nqd=|HshZxE@-LZP15
zgOx)g`t`ChhleAt9B&YZ3NI_A1t>X(?~~ykq5DE%I8aCH0**eG9n8i~(NRupWbGST
zFS9Ve*e>Qmjz_A45&Dw(Z0Ho^?gXR*64C`I3$pUzh@uofEbliSb9`(BD)RS*CZn_&
z)y3;4`2tpW9hu|7$%f5)@=IYIp<B`0vK8}07fOV6FDaB-W3OM96<DRWepsH}-)G2*
z&Ul(kdvnI-XnmpzZ8uP}hy``H50IG42tU=ca;RIwy!xK?=u&HO-zg^q@_0A7(DT!`
zGz?G}Fk>N=ec)34ch*X6<Stv`uiv6~Dc=aj=9S1<JQuw;yT_r8BH%QjW%kcNP{8K<
zqnJzJl}~oC<BKiSV#QEapXVu4?Z-WOEV9&*XYmqQJ96M~+rQE7f|MI=aYyy@bV9h#
zc?0(-ZkU|o*EzrQ7pDv|+yMX)>>;^)!+ROh4jV83y9?pD5$rk_@pZBLD<4x1`{;yL
z^L&9rSJ&tK?N#WnHoC7N<UgsogabspCp<~D;iNj<n%C{<8uSTc7TccTBfx=9iMI9^
zftxR8&A2}EX>@eB3Id<Hn+#CL@W^PS-IyMPg(ta4^$td9cO6^XsemNXWuR7VWuNv0
z42c@7@6L#?opNo82gYQ-3<1{D2_29_h;MOL5NH~DQ-_Iy(3wU33y3Tutq*?-#%7Fe
zI(+639gT0~bBcRp2i}ZcG;dGCNlB$>H6x30-8h8a%CGux)8@$-F2uA-D1ys(90vV+
zw2~CMj<Tm!K8G;=$5IN*!^*aH>v!~Cr|HmbB=L5#w*00JFT5M<*?lFN5&v08-DdPD
zYc7ttv$WV29`&oss>5u~tj2L9QPU$74Fjd;9K}Z+{QlyxQp42hHmS08JP<Iwk;5X1
z4%q8IS-sMbOZF=oooST#i`vLt<n=S>-v@ys?i6#+^l~d^!VW9?dFtmZpKJT<=gEaY
zH$d-Cpjt;*_UcWB!|x{%pCw9)nL7IHr6<<yD{`E*u2~kaXf&F&Nx_`O5Cs}~KOa;G
zAae7?O#i^2n3n{tgF1gSQ@bQh22)`mB&I3>H`lxNI)iD!^}qGs7hGwRkF8mcIlqU>
zc1rg)nRc-5szIsMpNMvcL4`g!`XC4ngaS*|LiySQ<y})P>It)floeMt-?c*!kj)!J
zIZ_p{V!A^nm^Pahwj!=Y6})*GS#qV+kTvQd)Xalh@Zhw(j#b(HyX;~9;F}QG1l)9+
zmCo4g2DJCBu>qH4Kc56!vvQ{l+X#T$aN;={e-W`$x-N;>Nj84Bba)k^QtIKGS1W3!
zOpUocL$)S?U=#D&2#iN+UeErc8jl3|XQiiwKp5fD(0gZryD1{u>XR(j2M>nu*%>6e
zXCOH!;|S#JvaoB!vF@4xmE+YgWr)}16oTLm#%4i+pdY<oK@rdOSz;R5l8{axS-PSp
zFb0~))MhPNk4*5}&<n{Y!ORAmtzzc(l8GQWIrQ!|yY_mO)6V3NBNTQ<m{Tp^>~!Cj
zxxAW}h((sib>A{#JGf;xQ2efex#Oo++7U+u-7Vo}DMJRbOwu$G{4yxY3^T}qhK-k$
z^W;1oB11Ooj1+<g2uvv~h)She@kwe3-wM7BKAFts;NWxX!|go<I~8_=TAn003KcXn
z6mKF+#?bXCS86glT~+?k7}UAZwAHGXPr;HTFq7Mb^fDiPp&rK^jtSc6NLrOV5BAjT
zHHfWD06Q@(YHMU}we{$Vb5e<UT7%tf!PMSphM@@5o;AA9%RNW-CW8_T`tTqI0@|^2
zxsnPZLx!$r0vQo@d|fKk50KC8;gsCt<ZCjgSD;{OW)W5y`UG%TZ%9Ccmxs>#f)J2|
zRKr~6k+*vPZKfDi5t@$pY2V?P?&UPD)riTh+JL0=)DIyR9!Gw|hcSPp(vE~fu1M)p
zx?cq=`jyza+$HN^0pAPG=>f^`y{gug$4jbGyY>(;XL3nwKNhNjfowCXWqIy$lyxa{
z7)B`$9`)4%5IM6q4z<QPOCZRul?t~VFvW_#G|qUFn2Zr4`Kws}c@;P_Q>^vxp+OAx
z)rF>mquBRiN5#^{U|(JPlInn()o>9PH3aGt3sd8R?t=A5vvdofmPcb(qL_;6z$yAk
zf~M`d?Kj2@B$~o^#%C-nUASVanK!{J+eg7{jtsJ9&7sDry44)tKzYlDIZD0bm1<*%
z`ej^5R3&@U{;{`D4!KLM^TaJu*RW1XYK!XMdlAJDaaK&tv>G)g%JjHWs8iBQG;$7%
zv`Be#hABv?Cn*V~tZ-wEVj+nbdk-!^M<2c9t^xGVa4Fe(I4Af;x|7gMT7WKLX8X?E
zV<Y-U>(#?L*#%ji7xQ(pCm;Qh>)gYsOf^aFmPF7g2)dtzyuu8SO6Z`tS2i}dR>-<p
zAvXrS;oAsxJx1FIiRqrqe1#<8Nt*FT?^{{L(vBY4m1%=aMG{;x>hyrNM@{b_PAV(V
z&=jEcGILQwSz0Z9Ej&BUfV}F)A5)7jVEvz2ptyZQ2dFy!-meqSPzsO)KA9>B>02P<
zF5PJraBV&z!2wisT_D7C^+Dm;p!<fn-_sXkF$REBDWO?fQ|Pl>;yS%0kQhXZ8`gR$
z(!GhsuwONPOdz!kwF0O{M|!3)k5FcGKS(!-1AR?LMC!<Aa-%aB{~7yI;^7c@m}Oc7
zX*wulo-8n%*9D3hb|}1mbujIvD_+o#d$VrgmjfgLW1m-=LCgxnyTSv&L1J?u%oaL*
zfAAvtXv{LSCD)ng&p;+{_`5F%mhs2YeJdUmC!Ao#0vmOfE%drgbuTgg4RzK(Q~}U3
zotq4aJYLEr#Z@~D2Q6!&|0)&u8Ts+#FNx_u_;{ll>1u@J@L2;e0+z-`q$k=ep<#I7
znzwfnP(F2V{HRIA&Z4KUNlw`+F0ol9F`Wn)92(QAPox~@6BwAY8j8}n`BTe>^rNdZ
z`RUw4{&|8K%ja1ZVyTHxm;{XjaFki}o-#2Xnw_=C)$yQ&<^Y>uQLdbkenPMC=S`w*
zBk?C_&)SpV_Ev`H#++#oKg1EC;o3LPE<&8{IM{jXVVsk%V?M)ag}cy;rkOSub)xV$
zcRf<fODK&L2mNtauvc+zd~b$D-o^a!9A{Y|q#36fqZS><sUnylkaS6K$QD^k*`@GO
zU9w|FOwt-Kq$df<kH(zNH*%%V<?OX&pT9|r%;r>~z$KoXoGgF=N-78i5*(0K<_L7M
z(mXvQ?WcYc_@Qtq++$c<u9PW>r(LHpd)1Nt>L#I=^_b0!L=jfU126TQG8aOGNJ^U<
zAxo=|(C2Vx?bz)K^#Yzq?d=*GoUQ964hZxgNDfr~kho{J`BoHm=fp3_n@WFAY)Oap
z=N6Pm24qfYCa$TPe{7~c8f!>1$yM%bK067?_`T#}@<MXP9<+5)Mz9TTzg!iXOj*Y}
zAm$_a=QGTP^Qp*rtP3n(P=t=0tv#c&)~1V*w{j$2`JI}Q8tI@4^2@a9Fo*8b%_l>e
zPFSX-TvqgG{ioTmcJ}`U-vl()O)9VFMI4Y<q+oRJ#arK{VUJiX`-B0b#^0qIgI7|h
z+h-kr?B%2AeP<ypYhiGKoDf*o{JJyE+-j>z4)aGK)3wo|GU&)NUE1LRjj)k2J^lb}
z?;rS*PJ{RE9AE!0&kZ`v#01S|Q-i|-K3>gS4k*}%$w(C))SOm!d4UxWnSGSo)FG45
zLm5;nSfTrz$1$1xu+#SJu|VtB#nhg%+v-x8$~-IfZwh}TcL8%LG(4R;^VYoTgRXJ%
zVri9#d!Mv5Cl&iGx7N>L5O71}MJ$u}V=)OjmY412=NJbn{EmL7c2&e~Cc6`|oQTBf
z$`9av1dtzaM3vUfKHYeisB#Smj^R>r!GIsHeb(YD;OFiArEIRtDz~@konS`Bb$;Cg
zrDLCfK_@t_1N2hbdW;J|#Oo}-86JPtMOWYw+clTX(Fz&jz`fv19WuNCmO&j(YD6V`
z*F4wJ??Q>|%l+g1xgv!LVJ*kqga9AGokiifO+Tpi-iDv$IA&6KP`PfvT6ODWgxu<$
zZ{=u@w~wR?kWNu^xDZL;=D*t3GF@uZK^We40aVrAIOv%n6j*TLr$!*9J${wz-pkxE
z@a<<?#E7mT?pvFMfXOAYn_v3k(gQf5=JVsV#Mo}Fik(R{p^5V+5FxI61(XrY4_io|
zu*E-ku8cDwHFVpA{G90}tD5(I+zt8B%GDqeteo-tcX_;ML9T&k&;7%*4Ok$Z+ua#e
zGQ1Oz(USgq__$>i0A=v|X9K4&f?Dzn7D2GhIh~X+0LIf0^W`Bqxe1%g;sx2_2+EFp
zIgZSkPIZl|6+%OsF?mle{ELyr!z}9hIVzrdq}Vl17DIOL2h=X${B}D<5q_V)RW$i4
zJG_gRos^!DO;~8m>>1&7_OE?75VMsF#>ICW13?KNVs~bnv!@SKAz!?oRUd3uNcd3Y
zdAPSoX%F_e=<LCx56FDGpFX>Sx*gw$0VO2vDpGgt?F+V?3>8lW&8cgseFqS_(YJXX
z%Gk;uC9k=GG13W`S?{T_oVSmznr=$rXDwHJvah?C4v`mUo<9Mw?#>wT{Ue*F#~}`~
znlcokO9E!qKLFxa7#Q+0JUsl%O0Ds`VU!V<iv|N3sjCtTiN7mw15ErddDO>bFIfd{
zpm;CmLl-4I#!~*g2<uz|om~nN-FfKLAR0D|MGDw=4wdjfkkX7Gw5!PB6ctS|JF*(P
zN5lxVCTmDmeEH+h@bGSj1dW@=(RT2!^;DRdEp~-83>fvbT)>mrS&&$vje6x_Mh@5`
z2c5dq_G^^Jn-uO*yPtKdXd;qvD?OceA?L@~226VHvv}DU>&W=rw@Fpdsf^Z3CJ1zy
zGEvf*nDeyZ10l#Br&B)yn6ye@MitCJ*7a89bpnW~YwyL|Vgq8*+n_h=%;ZoG(7CYJ
z%8)aE$WUENNQX*nIdbHA3<NWsv4xRXsa-pWnO7>@87m~Ud#f&<m_`Ss4Npy(E|Vt%
z5B!@9Sr-CnE2-8|f0DZyugP}4nr^UMND#<YB}gwCoyPo;LsVYe?|cb^mTlC2aj8Zr
zkKL8(U2ar~`o<I_7v=@vwaf}aH_ezO47c@8uu;bAo`A&eMI-B*5aVc>dEjUo60JfK
zxbZ_zeHjv*4%+RX_#2T5Wj(_JK*_kT&yu8qp5kE_2r-nu^-Z<w>>#SN>80H0dHM64
zpXSF|mv1Rg%;OV`q1D*?z-5i9<3-F9O1V}YctaXyHY~Vx^%y+K+*1rI7oO}rZ2*?k
zBaSknHo%>&>{8OM6`y9*{3R%wPqR=fK7b%WQ&4{_<8Y;j$|?tGr)QVkWIY&qYyuB^
zt@Y9j%sz75g0n0&$vC@Y=bzXwZCT&17lr(BtjP!7$0E+h)l7PfGt(vIk8-1tyS){I
zbTA{Jrn(@t71JqgJ{KK-zkBz~5xgXCDS!*NQpVj^nRP%57MPHf>P3!iJ@WgY&*TRv
z0l&Cz>nE?oS2u+Xr#D3)o*T*4kMtO`EyYs#;Me)Rg3fL-nEf+tm_!1TrKMSOOCOgD
z1`|z_tW_c-n~&`htZB8lv|jcBNA@#oR<Vfw)Mgn;vYUUm5S}HNL;G4@1pVTohy`si
zC;!LL@^-F;>lINA?0rF;B`{}me%^n@Ej^q05=!xiyRW6$7rWhahNU(Rfa^UanC8m*
ziWI$Az^;73_gT4)0S7Cqk*r-`8U@<`l7lTE2~$SmuDbqZoEWvgu5BeyDkj<ap89P$
z?60u2LVGsdo5`V}PYNv+pr$@@Rm<*e$OGW%)CMBAnFwBah^>GMp3b4Jmp$+zzo5Y0
z>f0+JO}ECC{Lrp9*ddVgPV|^2zEz<o9^LMVK}mO=wE;z&PFq%bU6{;a5QHPb+y~<2
zW(6S~^wJ6R{&zRBH&vXt4H*3@pNHpAak6wKltYP+1hy_CJ^#k6fKpCU1@|$G;$1MM
zgrM$XcCr^V{qmT2b3DV@M*(GNX(<W9WGCz2&9#d!WdlZCPF^AHbnR4VBc9Ie0(a#f
z@$twu&&wZb0`H;3XrH~&MI~qk!;c%F9F`>Fz(l!jz-wFkai?XUn!r3klkPqujZ;`j
z$BIzbXJ+gGH<84MZ=y=Od16r0?wQJNRu>eDdITmF1Uyd{K<uuo>;2@2vTj!w6sS0U
zr&=}#VJyLvpho*^RL|tBa~DpMbQ_FBEGX~;d$$VObZwW+t@qEXEdahP0;(PpjDb~F
zPIvOBo@W06>b2i8Z4xvc;!Ub7i+>Ct0E@YaFkU=Y(Y4AYv<k)!d~-OrXL0!c3XDyt
zHxr9Scv6+9<v{+Q!0Q9cW%_6z`MU%U#S(1ph)45uYYOJaR+BLDP=qd~*u0)&J<CLO
z{bVIYFm}^(D_`J2zvUNvkoGaVWf-$i8Aw0B0min0mn#45kkP}`cpPh0R(b(W;cUd4
z1yb#=R(zSB>xbTcn!N07^8m|+=FU>^S^dYbj#&+U%54ke%bXqXU<y<HEfUIrD=Zt;
zVdv2!QvYnb%xL@5zhEiz5u+{DIP*yp4A}6}4k}3H(pVYHDyRoEb@U~|4nJZznbh@q
zAJ4pNo`Xg)-oOz8E&Te9fj@_iWdTmdCa5y~$D8IR>m_c8vuk?*2vS^xO+j@bHm{mq
zN}TfV3uOM4mV~>%Dn-MnL6yMuZ@bVpDyk0msJnru<mkQ)4$ry1W-6Qj#0jjs-9H|P
z9yf5xwoHql`v{s&LEjX?5D#jrYhmfYbRo*ITyX7~MwsBYCSX~Zj!7XI2WhQ&%%2NJ
zYoScFPj*BW8T|}i`H=v(rUnLlHq2Rql&XaAQxCG>db^Fj=<L7uT92`Cdx7T#xS%)3
zy?LE}9=-*iRB-L}7%V6}9hkCoC{c`d1n`$^FSyEt2yUS;f5bpJWa7hv6*L%v^K&)5
z_Hm-}0q^)dZry}0haR8~<qdq+c*8hG4!7z1A3<$E@7;D<%8l9CRCoIKNnZgVhstz%
z{U)N3t2U?GtKw~{Auejcbw7pnzMjUw8Ps|y>X*GNUd8e`tuAqo>Lh?17mwO&Nut*Z
z5C9d!R!Bt%LEJV-1Y5Vd-fTan+s|q^JA_(RebZ}`;AicVx7F<P?%^w+F4kRuYTiZ>
z*~!`|?iMP5v0~=XDYNX?DFtu<y@gz*I@hJx7aq(8JhzsgIBRe}KRpsQl{I(;e^cnj
z_}g5k@X;h#7~<9vp};SFHEvkwo8oH;8e$CQ8^RL}EE?%rBJ-Mqt(J%|D_z8KfUa($
z#=^fk(zmDolHyya8PucQ7c5B$fYIm$r?d-0d4RexkHl^lY@H4d5{=q{_j%2w)=Mpr
sGg3n0`<f#;Xqf%G`v1%RG_M^Jf2pPWJ-H?@2Z0|gbsbdMEvsk$4;(IF!vFvP

literal 0
HcmV?d00001

diff --git a/docs/discussions/deepseek-v3-gb200-optimization/images/image7.png b/docs/discussions/deepseek-v3-gb200-optimization/images/image7.png
new file mode 100644
index 0000000000000000000000000000000000000000..325d0fd4f523b7c683da38c7ecead64505d99b67
GIT binary patch
literal 209740
zcmZsDcQ}^s|9449va*tu?j#9G$lfc2Bzq-US;@)>AtWIQNyw_KM7EG52_YmagzRLm
z=hgRjJje0;zQ=R_^ZBIQ_jR4udA{GT^}fTjG?fpKGn4Pywd;VYih|CrT_iHQcI|E=
zBf?K~+$|mPpFKC^RCUS7$ohsa4DH&*u}f7!R@d{%wD07N!`Hm8d5l!=yM3H8W@z_7
zJhu*=V*jI?`hR+^v_`wcs9n`5PD=cqtDB({_hNI;hHp_)QOxwW=hiWQ?y)wBy$TXj
z*C8m<R?0rur9w@Rx&8hCwa1FzDt(0jVa8LT>5BcvO6}Ev$PaxNq~{i;R^&tEj>HhQ
z{QP<1Uc7jbW}7(lE?^HSC5x!@|MNe?M!P~R9;zK%sxT$_@4x%^ug&&;326BL{n~-;
z+Ne?<x&L~*fB$ljM1Z1R;IorxflAEZHJ^5yhm9j!d5;<b>OC0g6fMlQ@)-a8mW?5(
z#ys&z`rjAiQ~z;%@zkkP8?5>X>Y1-zslV*1j#d&SrOc<Is3#`cU1rS35aHi#@3nNd
z!E()~Iy-y9ql)@}U&H(kBU1=Xm_q17>OY<mh5Db*6^J^ouFJUVNU&YziK7i0ZRwL5
zb{36os*H~M@V{@Zdh8kZZ)#G8qEIJ4Sr$<rkJ?QxzwTzQ2G1b&$p5<34VSxnB6`l~
z)eL7i56vf&R9qdXoRN<qJe_E{CG=mHl>F`pC2tHNjL81UzcoGRWR>_;YOX8kug!lw
zjfz12mUrtR-L+UOR;S;27EB>a7FCqy)kijr%$DU#9R~NBEmmygSuG?lH~OA&+junc
zmXk=9I{1o^|4B|Qx|{R$bF*4s(gj+(H>cjRZ&=k`mj31MU3*!eUUs-XgxJ)3p+<iC
z3fs@~Y<I3k)$Jc3oZI}GH1U?RE=l3ECBKZH!tkO=yaaD`CK=~_^+=JB2vf%nR&O8w
zCD!VLtJc{IJLeWR^#->nhm%r2*k65G&#KQNTB*vhBGd8LK2F>z#8~`p_cg}_&q^Di
zL=}QfBA2QTH{FXayFUW{2At6<1X>>{vmD(&o|Sb}i)#|9G5BM8JFa^|tYgwmi22m!
z!FmC?RIMCczh0N^>m*T)`-A0jbi*@k!_yQvqt!CR{(jQuEvcwDNgGB-zIWsKf~Sv%
zhe&+$f~Q1)%(E`Lnuuzq7(#55l>}R>brGH7!rEq98mClBP1J6cm>>Q9k&Pn+)j)Z&
zz-Qcc9v+NEXH{e9$cWW4bU3Je4#jS4Z3p>(wGrYy>G$?_<Q|ft1<z1NmKZ|qae}H{
z@%JD@6|UHQBtbn2gXhGw=;G8eIHSp!Za0pch*OJJaeF}aYC?;fj`aBFU#?}i|K_nb
zhBK)O&uqmQA~HHA<$|NqxP#@6(1w|IbW}-A_lZ(}luMoG=-0{76=H6PB@dPh<X{R3
zQ{W7KzDs7!>F??LBr$}=H6KxyFV;mA_4A$*!E!DWcMgVF1lF2MrNlQ`2_{f5-oCwY
zmxCc9PHjw9(9-8q^{p4WFXk8;cEu2idL0O~O=GUavRZV(q~}yb9>ix(`X=4~k@P55
zEu-LC&5f8)#Up2{6C+&9SO{@)sSngLf}d)ISfsVvh$Uxolb3hYvJ*9Yo%~yEw&NK2
zneT_&zqh(nnnfPnI-cHcBb*f1JSLdH8{2fJemF>EQ`7#oi~!STD?#(8{OKgJ)B!Ra
zDXr%-bQ07bHIK=qw$<7*g#?fkM==vrmD1a-1j|}G$iMqto@V`c;E7aQLW8A?wsyO0
zRlMYL^Ls<PQ}@?urgE!sMsw4J4>sJWRKo_a65MxPfj~=7PtWS@r<mTZ%X5q&;t~)2
zE!ot)3-sSeWvQ_!3=uOGS^B(s+;pMjFZ1`kx-%8fFswnM8iVq|L=hsF+TY**{rmSI
zv7axAZuN;cZ9K?NBW3V<CY(TMT3TAlx{zX+tLI~?qx0vRkyAp;FS*M^4j-eVqp!6-
z?ds~PS{b<I8D^UA;py39S8OeK$mR+opYg=Rgs6>%Hr?2Qr_}c9h@2{(K0YJQtcEYX
zxu#`qild(_&{>ovicQkc$f$l?DScscZLzqxIQSDOi%3vV5Wi_9-lNoe>4iF1y6yKF
zcd7#ia$mg~UGxg%2#<)!H>uFOe0ils>YGz&kis+WPveURsHph)__$(K135-r*K#gq
zd3$>=YqVcw`qdD@7{xBlKt{Z?RkPz!W~@p5{>zsy^Yinaopf(3iVY<N1lk4$wy%<J
z?Q9S394@D<pi$s#Z}nR)lE1L!L`B)}mXwrKUtcerbbu>TCr846d$T%nPt)4p$qq(-
z)4L5nrFRyk2d{C*5Y9<SZTDVFIeYeOf2m8*rKRd%(T8NU=FXOumR458Qe<HYu0P+0
zjWkBhU2I?qDeQH4>)4;h{Tpj&Z)eA&>vbr^z|^$bV{Tx1dD(2U!3?KHJ~=DJe|v3n
zQ(}tqxh<YV&Bkm67Ak+GDJn#MvZSg?QbNL^P#=3%qXVriKR^HF%l&=fd+P<xoH>KH
zy2KMVxU;?T^5x4zA*vZVRBRHD6B33OJg238E_ky0{?+#N-Fn**>c>qJ(=-~v$p4|S
z(Sn?uIPlxIZ;Ruta}mT^bojU>|LxWt{QBj5)0wF$`6t%y?#mq)q))m15TLm6yEDhW
zJFm9r;;PZgY=4nqZm@k%fB#mj^jd64NQl$S{iD@)91Hawe0(<XEanII5eLrB&K6y3
zofQxCl6cxNx%t+A<E?}6#&9TmNlD4iY)&#x7RvlruXs<L%FWF^L=i$Iy{VO|<&!kI
zi$yKcM#!#EUr9;n=Yps#wU1OyR@V4vQ><OF_n?OfUw6LW(n|%-h3$<wc5(N6V}+g)
z)s>YJA|h|g$_$K+H<kVWDu<TGbWHw@J$`}f`0@Sp=W=wtM?OSmW@e^8j?Ui~8gnwX
zNuT#oYHF$$H*s0}_wU~8e<xqRe(gKB<Xwda_Q$(w4o_@hq;bn{XK-h0aBgE?c-re;
zhs&3{<4#_#9=rco{;W(O$A|yC+}+)WZrRz{iQV{hPcy48-kOavbZ5+L`xtYiX}McY
zO^q}i`BwhUQVqx5o@VZFY-Fz3$x!LdVJW+L&q`dxcxwt{NU2UvXDGX$Cw9-=owg4Q
z`i6$ghYq32JIhTjtz+>g-OA;X6VkG^xc@FRvR4@AVI5O8M~{~o-%rcNF&!Qr&d4}c
zdh5l@m#xX-*8~&%M50!Vesb*KL{;=}PxlrrE#36dF);9&{F?FU(<fgB1#Y@>|DEk0
zKa9=H25-$(g^o1MxJ&zQ|FA3m<$5lLu;jfJyr1#+)D+dWP*kIliOJR&zrVrbhYwoE
zH#RnKem!j_eoh5hF!Ak)s6V`4&jKq1oWVl5Gnuk&o65L$bEwaUnr&~WwW6=@sB4*(
zV3+_+QT&JL#Wkso=`$&<*MyU9ikWcJ8Je1IPZzcN%?Gh};>7y-Ub%9`+uM+Xn)=qU
z$R?{lUy{X*jEuaisxvc5yl1vHR*uoC+S^anHjFQNDMacY4$8C@woRMo<yVhCw(^^m
z(tEc0=Y@Q7t?w~S&$YO%_Mk16_0J}WIDN1z{PpYC>(_!cgEg;TyH0(}{kMWBuE9#t
z9h1g}hL%^Bv=ZljlpQ1^#(hUBjo^%6t*UOJ2F_LeTQ$1_Yf1Dnp}%PT_dLH%%SBB_
zpxw7&RAxN8Jkxjh@ZqtsG1P`Ig*xjZN$<ZZoY5@OewFhTC@&oy9X?ViTaH4^tjx@b
zPoI99bi=7jP`7q;oNbCdP8;?-H+QCO@AD4ZV#CPW(oeNHsBa&M`Y7?<{vh++W9XGY
zcH5Sm(WSl{C9UK2AJCRho;-=e{-DXKrKKg<40UgDaS@-f+Ttn@$l)n5u)e-NF_Epy
zbE?z6hH{ablsC3pjXcD{-NVB+@lli2wA0M&?EU-qt34Oeo<AoxecsT}fV~)@kXBtS
z_3f6Xb|;F|`}f4W#42pdU$oqhA3q-3^jx1;N$G3q>CeV|#~C8<s+CZe73@CmAo8<0
zY>m8DK%Fz1gG`Pm?zLM)gQdK@JZh9!@)s+?t3|zQYinpJVGTcLXC3uc8ZwLYd2zTO
zNK)U6Qz7u4APXE?Tq9N0F1?rUH-IahW+d<zP#TA)g@lHNhJ?_OE1H?5XmPuVB^MO|
z9{8*dhXxO}j4z_}Yi4y21>7$FYm-PxOeC0~p587ac%?VA{Pq#A2ZZL=SGi&VrQV;v
zF0y>#z5V?+4I?fNVvkNTg?RlLZF>Lyj$!WX!~_Rf-LUh$$Vecl=6HTaKK}S--|e;W
zH6K4Smgcz?DoRQ|e*W_rU$K2+n~3*)HZ5>E5bRnyA6u7SIqQ2z9E)Sj*Qm4SxH!v%
zV8W}SqS!R<(E8J-PamNs`|?z>InkX?vB9zs=Tx2RC>^;0UtPlLM0<L0@ZK;50FdS2
z{N>&6c~6|MIP<W<a-qF!;^u?<{ce_pkCz&w31tNZ&O6(_WyS*?9qC%!42=I?C~>6h
zKJV<5S7&jEx#=uzxnrBq6kqr9ZagQbk`V2F+ZXv}N<M7<Cgp;YIlF9ki**rg*qwe#
z2{tE7%bz$`JaNB!3a_YTd>wY~aS*N<+!@^N^FO)iD3!v_&K}h`V$K#~@!rzq!Ud|p
z>glVgS|4ZJo5%hHc_j~B0{ql_X&`pABC_iCR*F*)*(d8F(dg*qh?E<mEZCPHtpt;k
zlMQpfjk%)4OZu$Ne~3JcEjAr^D8y^wXYk-$yb3|IO*Q780w@0Yymu9<7+!2qjbpC;
zm0pWAoAEnDfh^j4d~!d>YGz%`d8k6LtS=|1IyyM~sJbN*rxw8M`1aQI>oc`Mq;u69
zj$U5pZW&0Wyfw__i$BlHAphq!bL&&d8)C^Q9o5wXQ%l&0w^n9zx_-yR#00ex2?D1F
z%hi6k-K$V@l>GX?GM2P2h7eRcgc@^MPw!K6Gxio=Y!eQC4^N%k?(ix%g{Rj%S`L@_
z>bEb_Mk#4O?4GLfTFlgYxwSR8<2zbuRb)`nKfB;*XKydPK5;ZWjXQjODc@i6%$cG$
zZ>$8EoDDD1(T6F_4pb$zSX;;qz0)@_xhcw`!V64HP^F}zlJ@@laneoi(xsV*iLXM*
z&10@*#u^$e%Iv;(_EO(+l0elIJjBZpt@2#w#KSia2nj9L2g4M&Vr3K5Q*~QVVih=(
zHJhyjAB*JI`xu&-RNWZYl$Wnxn(C&RmEpF%b*lo+bbV>6fXaV+F-1K2OGAUAF!R2h
zp>*XaYDde-lCrYN1<&&G@;uWja{soOzPG0sn!kNx2-N;){c~}x*Fl&q3J2Rp6aSa2
zd7O4%m;%w;y%k>^gbznF2qxT9=ZaM!<X2_}rDgA|N$*?(_CU)Vy3M8P>g(HYT~u6B
zVp`=*zt@o=V(eS4k!Ds=bv4^L@BaYg!b0?NO>MgSl8Kpk2<z+KE-njHXbWI8F@*Ey
z&pY*(kGWSIW@cXATbTN;T`0F>((y-WsxD7&)$KEE<pAFoGISO^Su!jn*r>vF1rvx3
z$P-BY)d;laPlK*TDU&|h>h|C2M!CG4XB?2JsQnOC-^gg{_wRC_wbwd1ygx+a;^OA{
z<vs7;yT`GqviWNl=|sha6sh3ziI?h_LYl0L)_k6}j*l;{sSr46!>j}kl_-fN-*2!4
zh2x7aYK=Grs)pCeIJIN6`!=uUKOjT@+TPkg|B_y<rFvs{>f7Yv+O&Ix3SoUA%Ku(@
z5~K>Vo%L?iat#fQ(tCs^U7n@QwHqm|je-|acYPjnJ$UdSpq*SQ^&!sCaAg&?IJKo9
z_Dz}e_5v<$KFtgrV%hmMpO3LSZWXna%45901I<n~^q7S;Sf=<cAJXU$as2Tv699W;
zp=W0K>({UBTNBFWPl0wSJm$VTl>GW-{m9F*uxDbzhMSJ(`C(b=q1`#U>y7@KjlmL8
zNkyOQ1=NL@kCEX9hv;PX(ML7LHjlk(C90tMj3Rbb&>&TOBo2(Qzv@=^jQh@nvb3Dt
zo{|rrKi`w#@LcKlz}l+BxRn{>VtT)SPwbl!W{xbqmpR$qt{qwrYWVEtE+LK3dW#I5
zwJ+jxx|c4|(^{CV)5}t?`AC7~u5GRi`qxxeR%-pZZ7HMya@bk;x-T+wQl0ByL_^O7
z@)KxeY3<zP-}Dd9bp>Wl8ohN~+uD}itd~}}&!edASL5&KsL2!Ohi2hj^^~BR)Z)Ie
ze7&N7puIgH>q?cMUrkDJqJBBJMc>Ra5F#+^qeqWkrvz!WQ4iI99CMxSu(&q1xF)y3
zB7Nt#d4SBWYB3uPR@MjgC@lcO(%TDBCwX}0AB&5N3nxW#kQFf03pl>rV60gU?wEAA
zeqE7+Ji=#tbM4o!Oc@UIU<z64GGj*@8yi1A)THg74G|d`8Se+A#7aq=X`&i8hJvWn
z)YQlosh_jd3t$(=dAuSc@<q{BA@H~j{oG2E(MO?=t(FT1HYDho0bv#w(~pKLaNf9a
zgHF+;(zCF*IF8T+_+7KJQM2M*H8nNmAk3-J0dg~$$HDkoVbE(lxlSZTg&?){w<9er
z?Zpcosg$TjFox_XG*WZ6WzV|fZTnnE9SUuP)M5y$zi$dqosUdNNbnf&$|@|R$>*gF
zQ@?Owe{kK~0q>VpRmp@VfYA%mTb*OB=UDE4|8D3$^0~Fuh?n6mhn75Fyjg{N?)-6;
zm`&^|<s9AbKYmb5YOh`vpi<O!s@ZbCa^+~!e${E#>(p988uBj+WTo1aIjBEcy_QoZ
zXh0=lHVEZSHlr_7i7_@XDD&M|?k&1@>((s;N;7?Z$YTEp%j9G#HxFCXV5L{bjJuPA
z1D&2m-m6!iKYx}<`#H*~`gPK6-KQEwZK^x}rS1!TUU#sV-d>dIfq?<aFtuscpY1ku
z?I-m1KrsQu-w={v38q#nUA@_JuYlv<GT~Y6XXG17MTP6KJ3taN{w2A>bAj{D;^6ko
zAauw0=1a!LpUWS8>+Wv+@Zqr}73+56)KXqv-nY(9;~GDcT)oDTh1y!#h=$kI)p>d^
zSJv8GR8CXenC>aOG5)2Hl*KU)y}6fHu6g1sM|^WFPnj|0nBeO3`KMY%H_D!v=8p>&
zh*l;*Eo^RX7Ea17DOs)y-Vfx8EmB3|EQ^O5TF-q-_hlDP969q^^OkWc^ST||P#^KP
z(<@&+Zf9s`?~xsOm6r#gbNi2i_Cuxg0N$Wu^3x~mhMi0E^1e(hy*7(d;6x3wE`o5?
zCKMTTQ1ecZwG3apyo~<KF1e$S7#-9U3AA_u!rymYzkdCyU;=gzRu$#>W!LY;wb#9`
z^j=<fbj0?4m-B1M8-OvW)|@9Urpd}Pyjwe&yu9|wQ8FSaqCgJtrNqQU84<RqtS-Cm
zZ{M6rZin6|`!O`c@yQy0?8yfT?Z~>2q|T)<`pop*gD+#QoY50#7&hT=ii`K!7d?Oe
zyrSZki%a{Ad*g_U#2m<)BB!=g%8I814uWaENNf{i6f|yN*nI9V|8wsxPd{0iKHhHg
zCMPEcJ?Cb$$}CC<w6>dCpgTz|C9hvw3MLQ*?9x=APQ{_j&c3^qr@)EZz8lM~+vwr$
zzUm`Y&c^Y$>4RMT@I0vVaB5Fa4<tTCMMXe){0iW3UteF7l@4G0+maIDvu6V`-oAZ1
z{_B?n+XY^Ri`;a_jvWIg5^yj*(oj&T&&v?0w118zq5(_s`}gmj$UVo&p0ajB<pL;t
z+h=m|;>hA!8+X9ijE|JGq-0fjxt#zLQGiV91dy}6y}e<sFm2fM^fUyD@bGY;eY|&E
z9KC+0SaROmw~U(Oz+ID*wkfRuTBELBc>Oyz2E8YLD7QWDEfIe|zy<9O&zmp4zCMD4
z6OAi)0JX8Yw3O^uZ-4(Wax0D(Pe7q9r~@fU<68tMo;;qAVT@{wc79(S-S^h%AaQD!
zU6rYm1RFj9n-QOnB|yP(c1cvveEO74;DRX2(}V<<ihhB!*tkZ9h7=_AC!I!!QjV@0
z4;{R-`@tG#oLa2}Kn@S|II9|)VnYD0&(=lFV?57Q0pl`sfQ-25K3JZXN@+FJqf=~Y
zZRO|ZkAL(iq<+|3Kqcn6-piSZiMtA%?iG?u6o1xysIw|7pK#pO=M~11m6VV)rr=-v
z{*4-t|1Cy6LkIg^>(Bd<1>pENwFZ(Wz~t&&fHWTs_R!$?W@Tp5kp}~p^g2wxU<y$s
z&|<~8>FPY3ZTgN{wtfv+;H4&Oi=@mH%u7j3bgk&OD;8spoSmLlA;kRo<66{v?}>4#
z=#sZDFthOkEGf1lo>i)r3I{dR9!LhDpxxS}57jczQ;+lm;MbaSpE`B5Id8bPk;EeG
z%G<tc*4Eg$57owg|K=&7w`$nj^Z^Eh;swwb!#jT}JPin<!LpW5KJI~1x~;u^I=7vr
zB~gogr`=5_r~F!T4l-!Zyu7?P@FYR90E_K5%*@Q6trw?yl&NJM7P&Z*H5uvYFX`#M
z(B)|}<Ba||_NQ(!!s0RZaaq~9_m7PE7^RM@ejQ&#L4Z1guE;|dZY6jkTBX|IKtUQ5
zNMJ1jt<A3Y(_?=md{zbG)c(=v`e7LkU{iFk`>f)>oJAqwjVzSv@|<Okyu14h#lfgX
z*>Y!5UOMu#%o@td%9k#6og|nDCn=&KDN{!@;KRg{DIf0Mv$5qjz2A~Q-_FfV7o@{V
zP)J`?mluV-w0{;W7#>c3ckF`HnnJGLeqy5kKoaN+y$;`Ogn$x(wR#=&GIY*D5_<FI
z-CpyeUht(<mjEwtdb5j0Vo%J1&t23yqsv48o>iAQST5kvYu>XyQiBr{+=|**0qo(`
z*?;PcL}>l6Ynk(Xmd&O9eiO3mh5Ap_xoi_ZRPV9K)tmbH(^6Gcm6xG!U?AjfUVeU5
zgC$TL9yB){wDW*~-Na50W*p!49gQX{F9A;1TDlayf57KS_j^<GU3;?tN4O`Ji_6PD
zl|&yS<D@22*U(s4^Qkh0G*RcODV36CTMk-Ydp<kRM~eL(V?;ys>d*a&nHf4cgL6?z
z>2~<la>J~SQ@p&lr7!VZy7=#P@HG)DWh3?r)Pn(ih&^a9-R~uAYWH~@dCJPx1i1IG
z>|MYm9UY=$q@5h9uJ8nIrjrC&T)7goCH9g;hIH3iPYE{f!kLYm`n@&}_ceV8Kk@wE
zgNb|e2$;gr>EEGmw}GbHG5My&THebKWkp$;)YO<SK1hOsBnUfRdTa6mm5aG=@6+*j
z!xcm^gpq~S&d!GloRbq1?+UROA26y>D}bo!^Wt#8BuQ!k1O55)=l1qC_I><_&ufm$
z%W$oNKdm?|*^IJfOAe*{gSnem8t<o0QUBgx3h5mf_%gnzluk2nY_zMjrRCD4OQ06m
z?8c8;$NRp2XFhy5`gb7J0?)#u{oP?tf>j7|)DY4Su|b`ye-*#`oCcn<vp?i<$cJy<
zj4pXwT!jUvz<F9)nw>JV%kHM4VsPvDVHOssWBjL1b=ei8u0i1`GtR52*)b;#kKfa0
z<HJ5l7DyeYP*qjcc%M~XTC<X_E%(I>avv4#PTbGeuR7R_pkM3jV&~s}`lJTNL`_D$
zLPku4cXHmTbqR`1Y8{V{k6&Edgc1*hE~RyR!4ot=(>J~uPkJ6IsFI$Z9tDZKfk8t3
zaL<n)paj0YzGl{OzP<&obX17L(`*tmZOef*UcZL-6nN>G{@m=Woe*=0`dQkrdr?v6
z2{HJ~)YKy-5jA=*C0%|JM>Sqae+5!8>1LST&hlLbA_|bU3PJGPxkFKnuzA{TUcGun
zyjW_SXD&c??Eut2Q`6-&AMk{^74+m_x!opkwQya(7T1nnc-k}Lj$i7Yx(tEz+c%<s
zcM!`i_av`OxQUm}kp%*^%*@PS4_Ov6mEEGKe>Rw__wwrEFnS}Zt$Bik*B|0v$L!}(
ztqge?GIV-7Isl52<;obzh+pZQ{51AlXA)2J=S{NnILP_Axn2@%VG3XS`p#3o9~yer
zF$vg%y0MQ}TwENwDpge8mzbJ;?k8*YgqS1Iav=?!az0O>ZMW%k5H4~$!a{jLQ!`v4
z?aLR<N{=6h2;`yl>FMcmsRd<ay(0_HX!g0hlX#chZs}!h&BYuE_83&Fb?MUH!2^cv
zF#1q@0GA&Z0ML0#{L?esf&cAnZP6(hBCh)Q7!zm{)VWexMNXflp$z3WF1<~6FMWig
zzefYV0eY;NRi9@eVj{topOa&;-@VNkJ?ZAnn`>Kst2U3THpvvGfgmB{>GI@eXA2~?
ztlA1h)em#?@bK{QxmWa?<}-fL2v1Yz>OjMP|6YMFzQS#iHmWfxA>j}+b4>FX>M&Rj
zKs6n?NttWLn>T;&vK`X<Gc8^+Z=tH%RBYdk11{ugm5ZKQsDF>nDUbt-;3<X($hre7
z)$wW>=8~^jfZ1O6PXGML`6&@c5^#;O#PZI~JG658%|rpu{)79NpRS<cLq90gH`dYN
zjBR?NnbkNVOW4!!r)O^bFYEu!0^EkSnU$qXpw-aSgxm<QrT-X*4~h;rY_k+6dCr?C
zFQ$<8sip3Gvujn4oLSz{U7K1z8Vzs{l{22-Y}vcYy@J&vliSYAD}kUIAX89T$wD4X
zpbgW^I+N6Lx3%6JzH5%IU9lm~&yqJb7EQ&AyuAAgocs6h7t%1R_5sE*%KZkbD~4cT
zW){_1(9)ujqicBS($b3`UZf&4%_<c2y1Kf?Wv+X#QD+xcR2&Xdpdi`PywADaS}*~H
z4`P^`n1RPYT^*5X48;0Lw>l34L&I>ZCqXA=A92<Tw0Q!6q8OT(usT?OOEtifFhqz5
z|9jzS4$fW9(PfVOFzJR)(QNe^l?;j~ULe9OwW(U*gHp6%#+9BlO225D8n#=dZWhAr
z|C({$(`wY}YLUV7$-<lT6W{14Lm}_St8<~0xX4^RrV0=PDJC%!$C;*~X?zhktnwr$
zI~%z4fdp+B)(Gd6mN<Z@3gkZ~hBm8X4PJl>0hI|76iQE(X}%0cc7DEp@O>s>noO<j
zp>5{KUtkx{NaSk3zQ_gKC`6f|_Jbhu8JD8H1He$~1aaI|=Q?xl+zEyVz?&nNma86m
zN>J{7=lB-YvR%kTTiZe~;a$eCGt+^|H-@?DTtRsgu4NDCUv^ABlY#%dnw*uDrOlva
zVv^iqy<n1{u6Oa`jrYEnRrtcw9^4C)hA0ZgMNO=&PqZ&NGPyps#Tv+P+Wi(pV8dJ`
zcS2#etpY?o=^WJCU^&#;Nw@D-g0;uS-7EG4?D|X^AcMwVX_~LgbAXr#vJQH(<69^V
zWyYpPMio_6zTklRyw*a@!`8t`+pzjb<>!>2e~|jJTb7upHsQ=QrVxu3ix3M?+dFbN
zIs1e-^{F+p7NM8otwt6KLUN%P@$k5cC0iGjWoE();4s%RG%V+jf{Fuuf{Yj%{D%)p
z9Mm#L?}vntyjg>{Z@!C|ADg0Jj#>kS47RLXk|AoR7I&rT+|ZD@024}<{_%JixCe+;
zW6(^cQZDetU$}7L=sn&zwF^%~1Elyhvu=2Libkt&ObbX!Rlj~6qt4~OIl|6v=gCgN
z=bP}xH<+}?gO#El)ZqPy3tJRUL~5ycvL@m6qqw-jaOQ_pbRxClx7~tdIPeD0vu=5N
z136m!7XL0%FsR5As>O|~aL`TJOP<ISf=zB{Y<!2j^vxT$g`bMCO$yY+ma^BrJjuz+
z)8vYU3}U3O-)19(!<+x&1*&DuY6E}C+qWis@wMh;!E*DaP@JS`$%y&lr(!QiB_t-I
znpp1t7)6wr8NFOLvH*0W8Uq>84F&bt;lgW_KtV6N%FD`f+HG(IHFH2?3JXsM%e5oU
zFzvYfi%Fb`f&`rds<|i&+EQd>q{Bgv(Fg3(ek>Z-t*xzL81y@Oz|{gPMC4+AD4@$O
z`SD{LTU(TD$F~)ziO`}26FyHZZEucCo0*tgx3}L2zdhfrAixAUx=jU7Tr3&Ymg)HO
zF1zSBjcLUP@bh7wa)-Zo_3DnETFfXkV$eWn5g<1EZ|tTH4B_8r>&6%V60oeWaKW5U
zghfn9qr#Xkn^~pjobUSIu!F~)MFM1;C%+zhJ}d3@$I9JZn3@a*lr&0Tla;HpbDQH^
zU|e7l@Vzr<E}EDeINvw2@T0~*<v;)f-=B=as;bBs)i)Zo9Y^f&RLY%3JgZC}GGR4D
z=WsXmp;R6o9;75B<Wm|Pz4*cWyt4q)W_y&Mu8xk5ewW;IFB^EfO-4;s)t3Lx+zuoM
zbY-WirAJ!aEI=e|Y&aVBwzd$FLA_G7Pz@FHyMyQeEQd0V&ZgT4g_D``#NioJ)H^sj
z)`bU!HJ7jlss>RunzIFRxR(9c+}wn0>+fGvYyLPfk*D%py@1}!F6$yRIKXlFQr2<`
zYyLeanIj#qUmqSHbv@_PHeb^r)Zft&)N-{@|EaY9ta}CA#ftKBl^9qC;Fqk258FFA
zUEqrSSmy~_7NR`hBy?$3N@d_6bhMM_yaQ`FELXWRp%UOOsGkBFgS{W9sUKxyX9tJ?
zICLmu(7D)}B3+YTCRjNBF1&FB$A8ZWr~-D9cO^;AB&fsbf7oP&(*if)fhIbIUXHGC
z(#&vuC{R5>jun%&g+)MGHn<RIA`Dh2$^OBo&2)5J>Bwbf26}pK)8F~dIePV3PORQb
z#gG%t64@Uorl&h?gdTha&eeNa4CNss<EpEx>)?fFzt-0l$EhzuZ&*4(S~c#uf5CGT
zK47Unfff#;Nwv?Rtp@IJ%eQ^7AZxa#Y9L@in1t#8Wdtr=)V+JJa&ylL3x_xSgs%oa
zd~42s&_Ng&1IdlBurSDVCoku*^xMnFKv4MWT{W*?Ydhs$K_do`Xjh!bs<dye3jkGu
zP5B`EontsPsIAwoW%W8NBX_f+%=`y&j!&P+KYXd{b6xs<$$htB?()prK_#h=St)+y
zu(z7W5THmI{<N_&hf;Rk$tgmCQ?y7kMXJB63ndLa?WvS6j)L~3OE(q73hp+JsC-tC
zqS2{;r#La(ptHwVNJJ#Q#X3^y=$92ADW{@d+#h%af)XgX<f^RX!ALOm^qAB0+_4?>
zUFolr;NLa&J4g5TWJlJ#2N`kT!Q+~LmzOO`U{qm;%+1UMuStrEmKGMa_x7F>5a6IA
zXP5F3g#jrpj#YfrV%^={jU&sx)y@u2-BV(_fzb>aAyDq0KY!BQs8lj^{A$?myFMRx
zaB@PuRG(Ma-hZAiKxQ7@!A%T$Oi=qFA1QyV71Uc?WNPYeyMbD>RPIl6D}4?nc$d0Y
zrwOs9rOvf5JOvXVv5O^Z#t;x@0f+$0#>z2Mkf3!lYX-}uf=>bGwhKujR5Db%$F1J;
z($+znM4Z8+s<fRjL;Y$7C*1<QgqSssoV7`Onv?{#%N3hbTFP|R7ak}!75?<{WwUh=
ztJ;A(>DZRhmv0sLQ9q*N`KGL-G*M+R495Z(f;NoU?+)-HNGVWAM8o#%+%{iIE7l`L
z+TS!)>vKyB3_9xL)y8~pdmXUE_#IYO)=4*S2M2^Yd~9qcxaB1{3b*{Eg#+O}KXA0M
zS0R^QmngdYY{*&khoi^UtH*9Ru5WEGEV#s}bph0fx`LGi)wXqYVUzyT+^yn~Hp2zD
zow^gq6RiUC5&r*;GM1mj!_KQgR8j&7>T5pR_%V<SShJNNlpfL`*`i*bwLd5gm8GSg
za|2bAlapYH@wYaEpv^;Un}~Q_Tzrh04Ai0oDEJH$1@GDa7|Ezv0W$AwgwXPa)q8t-
zm<}D%&d~)A2|lq&QO_ZH!_DpQY<~p+JZQoABFH1>**`;S0=qjb3k!0-q0vK|2bO+T
zS!*upzg?-6E^7^+<1}+504X024|ZzBjq&>(lhd=as1(QmUAdB;pO5gsjWr+B3>_Cq
z2(|&{Ok7M9I&{#KAprs$^2INDSJkX{<$s+N;;aWak^_KGQqQcdt(}><es6z&<7UsD
zV0J-@J^qW;?5Ey~{LLPu^XpR%+V^*USpIJS_?C*zdZ_;@X&?tc=mr1vi$%Sk)jmW<
z9(Y)pdPW7|D=R_lctEnoD0Zjo*Kbzz2d`YK0Ut#oD%a>O8Z$sdduQhxrwLJpi#j@%
zLd?a6r~LjRx5Isjo1g#M(KOVb2?z8S!0AY(^jdQn7Xuux{tAzg1<|5z!?TFL$sMtZ
z@}I9I6}UH9MQP7}Acm0MK7pbP1~}{xXt_?cGfyST>%ZQSlGZ1d=OLDi#1}!edTY54
zGAZ}DH~PGYM8xx%Agv<LLTf(Ak;xcbZ{d0RKD+e&L{Z||RUbbiqr-{vaq;o#KVq))
z!WSNM7JaY~F$Q1FEwOuQ3Be{B@{K4<nj_WWG@c*#BLazibh&1o{`)1LzwOF+(tl0O
zR3Gm0lz0ja`2Ky$$0FDhfXCR_AnSdM-z}E~>#qy<efdJE(ZLy=U04{M`-_|Sg*MDv
zdU9k|s&si++1WkdX0uB<*xJ_iKUBMV{`~%^MtPo=y1GDc59UZ4^UfbXD&M@h^VcE7
z0*T#lq6d<G2pef}KkuHR5%slAqq#La2G<!W-y`*KqM#IR{)w-7ZY!3czTLgEZq}=H
zT=+{%k_ZIm^^FbmuQLf0QH{5@H_r)$EHAse4+uA(g9nHvv}eyA>msi~AxC2wZ93P(
zwJNS<Ptvl1EbqOVotaToydaj$5T@|a6e%t+`>YO=8b3*DGCKE4zjKhs(0s5Zf~t_n
zRMZx68c^tURaS1C+ZvdwT~dm%5=-`5p6-Q40VI9n#u?gmR1*mJ05@ROy$?v(9uQRH
zo0o=a;l}<$=5*^URP<3QQ;6m{2-_Eb4-O8-*^g*Q>qc(os4O-3ZBfy|vvD{=&^fv3
zzIAn_PQS#DBt@M?JV_tOztG7+TCf_4z?ky%GgQ3c(a|(Eh4Tv*9Uw+%vapHcGWup`
z`4S_NMO}yv-nB_A>6<}}23P_pX5+_cZvDKRoZ-c_0|yQ`tF(On3?1QlX(`+c%DOC=
zVr<!HZ;?t6`SCz$4hbiyE2X>G+MY<(On<)jDaWXm|IWtX;J|<q*)aEW+{Uo;W{33F
zLF1~4=70VW1g+L_?9!T*s<|T?Q3RwrM5Lso&Yr!btLrxQiC$leE$W(sgF3`>+ver;
zBg;IZ@Nz+=|5>?Q9T4;Z7hmbwySTVm#8VsdGDOgfi!evxa6zNQZ5%y%6nhj3)~`bq
z{Rp)nP^0?QLHOvvjX$HRZo3qY(tad<qk)wJ6v@lYg#e`-FY(@#W}=bZ|6Vi=bmP`>
z|36K~v-QupPxnL~7IFUct_#-jVp&-kIuVdUdeeQxP=AiYfbWU_=veM1)*^6EE>+a!
zr-G4DA37gAf9q@4V9`PxCJWrP7y2kXsMlYic*7~cQvF;LOLhb^MckJF?N@-~ShUKs
zLpAL-ynK9q$Bq#5!hu;p^6B%yLvN`Re~31qx~=2QpFV9*w{BBB<$64boEVVci=3Rv
z>1pCPu$<M^Qgtq9X(xHbxR!tXB$mCakap(U7dLlzzEh_p{Wg0n3p=~I=wDpAt<M{x
zc?p0(fC=e>$9h>E=p?knwId7gOWIn>p#i+Nb#PE9yoTxoF&4fk6=irDF5bnZP%gF9
zd05WG#Kgehkk_4a(omfYE+7!Fs6}^HOl%#U-&|~feY2Lm;B(N2(b(A7t5>h$@`Kph
zMi%bfyJ!BGx9?F>)wyj_W2>TG+OYQk??TK6bU13SXi@GZ3Xn_9&dzogOUC01Ema}l
zVVB=p;bV#B9zn$<qhSRTIoqI|DsyCLYO|tdLwvLx4sC#pfP};*l8gv}U%a@8NLk?-
zcBT+8B4$?BxF)N|iOSS>pgA5sEdPr}bHKNgYgyt8W#~~hq)_}SpFbacHWh2Zl&W=C
zC=Hqn76DZeNG~jGA6$p0#vdDAW@pPx9+R-g2|-^YmVNW)&F3#)(kN*fPs>t6XJw0m
z`*|@-Gj-A0!Xti=fuQPLHQ>Lq;@{XuLq^OQZR6-j*>RaEWG$j*HKNWS1DzPj91gOy
zni}@^0VR;N_)W@nA$oL_%wGroG30$D!$D06=gwGFHLTac`W)%lzeMpWeJ9_;M~{$V
zK{`2*1B3`N9_@)9G2;>*2=j$!x%rg?d^?d_Kppz)DN(?IsB`_WzP>(;3x<fTsGS9A
z^3W^#x!;td*pL@Pkdu=TmP1o(KL5Sg9$X$BZn|ZGM1$+#`h41%Ah}c<M1mMkkaC9$
zGDm*YsDG7<Vj@Jo>sFgke_PvawH^o`xq3*|UKLI{askH_^4X6cEO(?g%Mcf75#bL1
z$)e9PAR|C=kTO&?W^{2aOEVc&qj`MsaSyT*5KAHw$Z+0$e14b<(9zQ$prCj_@Uo$t
zLGwsTsjIUroW4lM@o}QcJ$LZ^4Zomw9ll%2LFL9KCa0*$ptrq#oqAkW`Mo;1N(M-`
zurS<FaC-}tSVCEK_5Dg9!C4hT;3Ac~r$yGhjnMk)!j=5)z8^n+3=aAi&>rRs!V<W<
zxed+BkVFp7z3rQU;(WL6NbHlFr{Aq+ag)mn^*qnQwlXyQ-r0#A>xXd973RxVuV$F$
zOX~*K5GPwyGtko?Sq)LBg9q?;uROWr)AqTQsQdRxf@<NkUcPjxR?Wb~q`23?Ft;Q(
z_ki2cWpCTsm{8?k|3LqKqlkt-&1TzwKE<8<N4FDC|8-zta^B5xHH_nKT_;bnW_Meg
zfQZOS>&|8?Nl<D@iC%tp(Sg+{IpXK_999Z3qd$LQpuxh(gMOX-VA%V%wg-rUB(*?B
z*iF;6CHu0guJH~Fi;V+!3i08}rEf+}R<wCvNvYU?u9laUW~RaqOzpqU{Osh(xK9e4
zh&Z4}Jg^Szq&03GU(}A-=NM4k%p)ejHrJVR5t}2$=Z~s^fdTj-ikMOJ{8zdwlO#Jd
z`tg~zNiAHlO?b4mPl7)mj8q~eC8d!>H45tUK<d=N;T4SKjEudfR9sH8Jd{dta&*kY
zPY^uyKeo)gp4nmRem>BPgJQur+wI*>L0>MKVS4%%WTi)<q*1?u+4hh&w)>t&HGwv&
zf3JH=gDcj$=ulaLO;KdZJDcU4oD)^1{+p{VuY{vj%u6`b8=;zgX|)oZL<`WA_FaF-
zaTjVMTU6*b2RcPK0J=O4loeLc(%OuODOq9Lkr5m7b;D3NeCN)MJN=HB$T;weEYSS8
zN}xBM3^35ImLxdRNE4U~a6I$=dyw>VddK90HDFL-5fKYXOGGNfzI8;abD1G5Qcz$n
zoP-X)U;{83peM9PY2-*ChyEvMp+3NO(^7JigTH^<@iJT)Bs)(Y$l>f>V6^Aqk!9l#
zsIB-gC@er#IP=sEL-XlvM^ai51%UIb`#9~OfriF`Cwhp)cA6gHS$E)c+`)Xo5qdbj
zXx8s73sLn^`xQ8MA3e0Om!SbI4D%gNGb$^MIjDbBcu<|vCLg>am6D&G?YXnHg7FzE
zvd_8g?d@<VOH%fi9Dcw5g<Q`J$zs@#!=~*W9cNHnA#FA_H9>+X)iR6g4#~82JAY^1
zqoRL0^9mEmO?}>e^gHN|ke`Q_t}t;iL_q(*B7>HMSeQ@OmIW1XvOL4M#KdqvLT}{3
z|17usZlj+1!<&1;+l3<U(6J=n4IYvLhJ?OoWo2b&_X4r=tPYr@@}#oWEJM_jzkd^r
z`AV4yF)NPChFEmkovnxisngeII(+ydPu$kl7VeU-2A8Qq_&PYqAFVPJNLb0FE!uvC
zxq*L?c0v;{T~y;g0s>qQ5wq3ziV$-<J=?~c(o)elH4Sz3SNZuz_ayi9c~BD}8Fp;H
zXlKuM2M4PhBY?}{Lx(^L>#OU%Jm^RtsbyfZ%YP?hR`~5E7O525IzHU~;pn()8Il4H
z<-l2W+DU2|SQd!p?HaU-3mlA{Y_9pDG-1LM+a#9)dTvbI_Hve}RSc#$%6+6~`&nwQ
z!@!RpZ~A8Z7e)Lp0xg=DL=JJgjB*pnbi6?Vc1`YIDl$@m)4Pg|D&nVycm9bDxHpxT
zUsXsXYx48*B1SQq`>vr?CrG1~i;PG<8+J~eA1Nc%&6Q-20okI<iJ7M*EId6ueS9i&
za>8z&kQ<^VJ`&pm%4GDEWbUvEa2fO=7`CIMqn|&k%gZBWB3h0SCrDQTPGYoSo>c?D
z1An>SAE_e|?{{25e%mHd>gMl<wdS`UU`D{$)D-#?Olio0$(kd*u}y^11sq^Ja0oq}
zcE!Wap5ET;_{sKsJrx9rWmUFP&z?3`w>>5NyX80_3?l2cD7|w7K=jh3IMe*bJD4?+
zIl>fzoH5`ij=uyOKMUopssV&!N}74ToG`9Lq1PddvegCcJJX9KK#R4fZBv!k32ySW
zsGZd)V?K{xpA!&Fg$*Yzehb<3!y=BI*n~X5F31)jlYBf@RqLIVpf=sol-Z8=&71Ug
zB!yXgoN-i-80|wCpRNv7Vc&$-7nYP{c1-%M*0ZzCz7z9i+f7PYV<bajTl^h#;)xQ@
zEzqZ5jWk;n1a;t5JQ5&hg&bW@$*hcw;4TdV4wJPNzqu8^g$T0(_T|5SIr@V8xq`o}
zVsxnBI<MbLW#m$TU6G^Y#>Bx&o;ue*4@&c*zRkx&hgezBLmcugC-Sw1gqU0A=LPuW
zRhzNVz?j<|&7n|YSjN^iCo|L9-2ArW*_8l`$Jm)o-mAp9R+4NNgp;lsl@ImP>^;h8
zkYX}?TrJ~I?%M%DCJNwc)TlF-n&7XP=27Dc+!tJjTA8i!FIr+`1jqv^li`cuBS)mU
zngme|ZSJ&we*n)8KKdbUmCc2XtqV^ja6qBqB4Yw0tGql>vZ*cnqq#sqOZR^ECz7d?
zRg;F(XI;xGYiepBIY8H-4U_O)*Uixdi!3QFE|hdJWu7>DpW?b(^X%MQBl+o-1IDK%
zB_+khjSLJRJMC&@kfI2!2WEs#Fyl^<#!2=QiWQn-ZZ7hH=qa!qir@}llL4rfm90HH
z1XIxEz^Zbp7NlHcwqu0!_VE!Ii%1_iuKGhU#`q1QB?#tV7D!MKQ2}5-2qhi@D~rcu
zv~^*>_u0E!TPtOCB%ixM;;8({3!jeUzP0fav76)!+MLmzQ0@_eS0SJ|{&R?;ILR!X
zovHha!8$Qa1=$u>-!_6T80P^ex5%ZJ65qH`zKd7<G{qSKfoI)Q*M*G)m=I(v)8hW8
z32n^0jZz^F<Y-f*=lqE#0HSpF^9X%GzzRIA;bDttmCnJzC{v#79S8|AY8ia-`!AjP
z_%1miq4stWyf>+oxg~FQ2~Q}k?ISyoLYX5o5$H-0-^X|Jkr#Kkor42TsnUkN@gCa3
zq?Zq_>j!*b9>53?*a#hY?fy1kqrSeE?Kbx{g<=S3;71O@oyI}O^d4kX3(?wF!A|>g
zOEERz?=LNG=wfXR@9mS7Ac9`y<>BUgG&(@45$7a7`H%dQJ%@{LSIm6MvxlK!Lq&GF
zki1!%SaxXix{ZxY#4e!{WEHTVz`z}%=Ho+@(#w3-1ZAl|9tl@UN6`+A+`S^C2Y(BS
zCkA|=uviE%6{y5FUoAF-(APJk6rMIXhuE;^2%5|Me)CU<i@IelbJL}1w@V$leJ+T1
z@!dXd+OQrs&9tE>mI<N97>$M_`~k7;gc7l@;eeoxuC00L`q$7l9?<9jtA-5qp<-h+
z_5y8~0&ej!>7#vqs5@6}Y*cvSmRDAedhn*SinCBw^z+e?vr>{8<{^!0aR!5kXwNLf
zWR>G9O7YaUPCc?vIa6~;;k_D)obeUGggl6q#fDpcHSkQHJ{4x7fG}>QEXl@-dj1r~
z<07~daZoJ|o->WO)?Gpqt_;aWV&T8uk{TT!cRxu;kUcVkBwxLNX+BEz|IF-(=8^@z
zTkZFeimE)4W?n>>XT{DKO)$84@r<A#wuk;gou7TmUB1f0t2G%J7<2?XB_;~MM3X1y
zgK8H#S4_pc`30^IDF*f1!Ol)@`j9LyZ~d67r2n=ALqrv%B}8}hhtX=jf5saCI|@*Y
ztZeKh+8r`aK|#ThL?4dK31;WegdGHJQ(5>r_w4u38~*hI_+mWSgNus`;+P~GRi<@{
zlrOXC_sc|>t-66}CyahTwK1op$lk3?A0vrbl%ZwCCF~n)`ez?d7SOk8f&k3G)o8<>
z;0O*VC1$oK!MK9Q%%)yLB*22<OwRX;G0!!#Fp4_CT|RgBIJu6-{v5UGvhQdxU}3%h
zPD|dWMgO@|kY9&3<GJE>a_)>I5t{xjgY`e}FP_q${YN~sG=$jDuhY|mdiU>vl>{SH
zIbFMlc|x3xjF^O!^mI6JYRkZY>1R;9KR6DEH%z}%{`rnl?4cL635G6t?te^9W(}<m
zXz*}zt9<eI!e8#&PcWM3US^Cudof3W+~@&{0|(IFZlYk{ylD}v!u9U#za+dytpEkj
zI_}(8irsY?Bu0^w=F4k%07kibLA3=JX09=j{30pcU!0-U-fPp68WXx1MjY~gvjAPY
z$@5Q~I^~P<!N|bS<{_G_dE?OkFs~p^u4xMgs!q~c4L)izjIqEb-1L*SYlcdV9==k&
zQsv7`s22dX^<TQ|57f53?6mWU<F54fi1c=I&F1;#&6Aa{+x#}HQ&yKOQ%u&EYVNu+
zy?Mz&Mr5n;;Ob}7N?(8q0D1%iy3Rb9jQp{U?O4M9*EHl|-%Lm`N)cuq5UIF%a|s2h
zx6;L$UpBGpae6y<-xj0qfg>`hOobQR<|oMqPe@_R*yj6n!33BjY3&oxTiV*%K7PDw
zn(u$<H-8<a!{+YYG9_2VWvSt(pgf{O*-+O%dV@)AH?dq@o-+xbk?kw=;CV(9)9di`
z=~I|4s4ozQKCa%LjJUf}%eY0K(k4`enTVpIi<d41>v&~#gr#Z!b(8+dC*#Xm*!^g~
z@p}@veSt{F!3!7}88OUdP+SsS7LYukdIQg=l+74cI1LM?aZoMLu7`GGDj`m7|Fy2d
zd%IIkXmkJx+?_;Z+s*UDXL_+xamv>RUHi2=eU^TAA?}9RB`K-<r}yUDu`mg<n(|eN
zTq%6u2%qwweQQ}~C1_IWyxWoWk=KyFYy%(LBMT;XD){=3XQ%f7egL?-bLt;CK}j3B
zMf_<6>37ro8kdm|v$J{qj+iZCXJy6o*@u|>a>NdYvh7t4)C;W2U7@%M3NQ@`9Y=Y(
zuF{m>I$*;`ikIooG^BpOW`hS45em~wqnIXvM#&ISEAmMYfT7qh7aah%^T3c;mLtZX
z=lV=r&=vMYtRI#JMzY>ZSTG?{8fHEV4G3`jSQvdd5fH3F%1>{H6p5;ZnMz_Np8foM
zfliLXxV7E}zQ^I&VHRp)*@7j6-H8ooL6MGYKDm9U)s<1)AZp2i#?19;%q_CediRQ|
z>};~ns5H_$n~SK`kWf1dSZF?8H1FG#|3hbJgG)ozLh=V>_n%(%Xtzek<Gha}3kXU<
zK9Hpr@D#WVYquY+heHWm8qgPKt|ZPkQOB4<Y8!9{KR!&g1v|WQ(SPqE%R@zCS=g2p
z{drp4LtIE8LtcB`H-m15j^KA}E6An#O7b(KBa1^dl%XT->B>X_=H}+lpo+$Wz^su7
z2pTMC#*8j63s18%oma_|RfwFSpYOu>WvrW+q=-nCP7XZ0$Pj*ph`k0(F#XKyJL2~&
zQ;rH~hRgBQ`6p|R2a!@C+JJ~eVp3ACNdY9>_E;=X_BYbA@H3SrIGb>S&0Y7KJRrIk
znz3x{CO~nesF#IO+1~zSl2dSJOr59EuzR2K+dApg1Ci=nw@?m{fcx-)g!3-)T6V?j
z+^o+YEFfqgrYXZabULi+`7~M}OcGBCKg{YPTdnt5iIWB+Xu3QGuC;npB&di?VJZaG
zbswphT`#n_DL=e0Xu9D%GQB}PGu$yL^t*9Sz`N1W=M9!9Kc`Nf6!-pde{Tux`2(q1
zrBTd&H9PQm=egdzeS5U(ex~jF-^fb~Fi{W_@ty@ZFif==SO@y+cSH+^^0MJ7edNlK
zG-TbN@8bDjFHWF(U^->^)pK8h*vIoSoY9zs1;EVDr)s=|h;gq2M7-^K45l0{cX}s5
zw(xqC$2eqg&VT)HPWKcB7|tP=_bqBU2O<tvEKH7jcDu`Bnh4(gnK~y*zt9ljlbIut
zC3$vsREl%js7p%h*%vXS%!Gv8JUr&6r;+wm*VL5q-)6bUYev5)!ydGEcg>B|y#ae*
zUS^>l!1ECI`a}4_AD-6cr<tk+Wu-3g-K|>b_ReB~8VUyDkkQxMT<%xWlpuIK$922X
zryn+GTD-b3%dzt=O*N|ncZ!?mRv%6*z2Uj%UmtL|Qm8M$^uFL+%^;Q0eU7_e=JCy4
zK!E-I$!Zw@k|~n6@ZAtF02CD)YAX&!en{DxzJ>~pZzX}{$9?kThM?`n3*xr7<osJ&
zub9$SeqEsdl!JW;rTz5jfO2{=4a*BuerV77oboqs{TU6@<&pGV7kY9O9L9zgtpDPw
z$tcB@m^`(NI#~>dp;@4vw(TKni9%woZFcx2yM>wIG$KoFZCbZ(iAyM>#^S6L>QB>u
zp|&6=qPcs!(5=2nOh^c7sji6$=0d`5OKjiyuo&!y2a4}6_&Vu^qYrBg7!X47<I>Bz
zx^z*EGw$N5F*f#9b7bdiXc_r1gdE>|R!FF`yE`=Ju6LCwFGKpo$i(4PEq0(zUYed(
z%mMV2FyiLh`cz`*A{qcDr|pled@lrA0oTKzu3Kn-PY;A;8S-6EVvuOuws-5td=m<v
z<VOj%C|w?i#B!<5H*a=Lx}~+$N`<p_`Zf(tN6<tx%>F3rK)|cJ`!e9sf3mP4N`I6I
z-BRTir+Xb3t_SrNk1cwY`bM85tvaBKm>@C)*NzqvRPpr?;Jk>5VbF?w<8B6zPz{p(
z^5Ox_wkhNrSJznz$B(aXF1F%)!xKQxSW$8Bn=};y(d3re#I8n>-$%*ieNbp3_^YYO
zh_OGE(xb=pQ?)#orws`6w6}aH#fKqzbL)xuZHjIndnhF)_S?O}33Gd0cBlEk))Diw
zia<~)qM>el5jKZDFC|G3Lj?GWtq}7ck!#(vv1i`zzAyJRL!Y-Ab9xArOkq&p&)$B2
zg#R&0-sN$bw?UsVUnvA`gpAZviLfw}Qd16Ur0EKsJ21S3(QoWR2;We!qgC+i#>dCe
zfh*>!BTC*QV8ub5t@|RV{@`+M_>X26u4U%EQr>^bh?QIag~Pw`w7oFf$4Z_Mx^<I4
z=Z}%#f=HBfZBtol1ZnM2&^>r8qr9-gFg}Je_N~1=RR_m-5E2A|R-2m+5AvyKVkROH
zka$OrkjcI5N*iqa`&US#1DgSq=GryV4(p!vT3@G`<^it%Kn56KOd{YANV}X~kM#}8
zC*~bQaFVc1SA90LegQ5VlF9-Cw~fsOU>3sdJ?X}It*963;>#{?&_xIVVG2;K#U&*V
zm_Z8d1xf|Oi&?Ff`@$0LCbl)&1o0Eh?zaAUTiZz(Wrz!nxn?{0Uh2Hvvzw7#2Xp;a
z^T%SFwzvK0t_Crql;K+^BCGC7+EddbJy(T#td{W_Sqr;jILnU^B#etghBdxfQJYRI
z5&DuAH-V0P_yxWv0;CUc3Eymyss-YJxhs4hL))0!{3g%laYuMMA*ZlsjkUE?ks#L7
z)35nW)1*k39T99tB_=H8InO~G26ISQNQfg_X8P<r)O*X`dlrvTWuT#f%eeg^8UvWb
zF$ED8U-~n2h3&aaDz%d5XUjrFiZS8vcM{qGGz*n4zQv#Ar!!`5dy%ser)o|7gN~S#
zK#&5_d5sQgSvtiod;`H3`eUkJ?TQgr1tWM1&foC>+1A0ixyi}i?n0rI)}t|(;evei
zeA&eurqGr~c7+t#we#njFOw!v&er-!?LfLy;B>**ADB887r+>#^Bh(F9b;lymKSHP
zm@Y$9m6zY+8ekB1U5L46XKRkJ+EP8Cb7~3mY}hIvA-`8)mVhrlRPIP<eY9p4RkDG9
zHp$0rjFn@G3_Z6%Sz8-{({nmWfao-Ne#gvP^W}2__%2h1rX!g`-^;uB%qF=ihJy^5
zPmGkNw|j1HdL^irU_PeB8VaQ-3kD{lRo)LfV~`a)eM<h{6zjztU9$AFjZnlQrk3#i
z1eTV8jU)7oj5}F-NjJ91lQG~m`#Ir^`4z+~u9~}eo=Mev2@4UU9*vESG!~DaJb`@z
zw34dT<4}T!{a5G&KmXw67ME;Ym^1$CS^k(QOce2LxKQKOcP%A#sn3r!>@WmXXf6e_
zv>Hb3_Wrj2USV-bP-o$BZP4u}?73o^k8Zg<eJJfN<}6u@5FCVE38R(K%qJkB6to&c
z_l1Qn*RGADl|z~;Gybx@({pQap8Lfs|IHsSexL16*1Q%>;N4AhM^SJ0Zbe~5nX5FO
zjkPm3S>9?ZkBMiW5pzuc@^0e)@buk*RQF-~U$cXw>`k^~Mn;ITvyvm5?1W?!lI)%A
zjO<NjQKS%(Eh|xxSu!$1#_vAQ`@X;DpB~Zi9iPwVzVGY4uIr8}e0{`1p?2HXz?=e)
zNKthn$T^r>L8{&>X<cVS%i!D0`N%Pvw>$}Fxpi~9yZQ9Ly<7h3`Ly}(*B2>idYb=s
zf8P;h_>-jh?q=fY)zx$H^T7Ao@qdWLCjQs)0CW&If*@1X)31?IKNQl{gnOv9?$GVw
zbO%|6$IoGoIHJJ)Ki1L8N*{;wl{+qS%;gwY83HutD3SU7?Su2OUZ~M80DVGx3>Z+r
z76kW`CVW9{{G@=QCt^O=3=Pu&qJ8CR2@D3vJWQ*u<X!+ap=M$-g-ld|G$^LW#`Ifk
zRir@VqgK=Xk{3XO5gqyty^k0C;IDELAdu*4jX-ml>sq|?_ZAt@TQQ(*47UQ05A!oi
zfCk!^CJZSco4o%^i<<KwFs?AD91aE3g_M*Oa1qdM;pB-VWtPL<#>8-b*adBqcau5n
zR)?FRU*1q(_O1Q#vxrOePKPZr)$>Bg3_1<^`e7<!I-u08_Cj$CkhI{j%geJ8VaSo5
z_mrjF#GI+>*VsrFw9c?$Mg4y#a$SRVwJwcA1y`}^jVKD^v^7X_0f+apjr;rrJlzyn
zZ+m+;*g5jV*!cJ~E|5i2s8ZffgR=sZZZX4z1ApKC9Ja1A9xf#zI0E~e<Egtf5Ky3N
z4jm)R9VpON#`ycsCqaE|??&@M=JBwGiBKB!gh9{&EsCXu1uG9duV6lsnDat-FWwUb
z(X^8za7B9?Gf#9%99{_ZRKXTzpi-^Cqt%cbDlIC?JMdIgY=<&jjPPBL9bk`u-fxUo
zKDB$z<b9A@#Bt0KV*`>Y3kwTiGlDjMlgz@%sMN5ME7|WA(PT9QHorg8V}$u3ZNI$0
zYv~EqHbcF_x)x6ir`irAsSHmvV#1FAtcFX2&UyX}kD5w#W#x2>pEw60Euk-+Fh4&f
zVLo`s_>zY{$okl2B2w^?51t3yGDAIm2bqoUg`HpO`|-uDGw<L%ZP89IZ{M4499Z)K
zNCF~B(^qJL5~CRs5)(O&SqQdc3t*GS1t*B+xk7h+XdC_w^kgBgl97>NSh%81GcuDn
zl9vMT=evjh*yMcwOuM#r?Y0&aC~Yljzs4{`C@wRxkYLEi?%EELLO<>)Xk0!da3~Ll
z7(1afI~*hmB%q)^+*^Baw7+>hOKKNvG9b1B&)Zh(rY59!6E&8(iHTz1-m$P?7bgK7
z>E3#0Knx(9!g;8Mc_Oh$ro4aw7F%Dwa^$;%r7S=Tbus!D7V#$d)RG&21fxsM4~nK+
zA3eH${jo(2^iwb2Ur+G|Iy>Xz!o^hex%bBB9o&F8Z!gN=zq>*!ctd@GUe1FSf&x4=
zxLFVqZ%kA{<3#E1?J*T@pcr3yOc4@eIu(#u3)FsKeS<)3*Z=<dSAKFfSW;cIMju^+
zP#o}2l{_zAJ1POvA^aR9#K1x8Bk$Rqe;s)2Upgw|`n?6#2V5?|Z~!JUU3G#35Qpk@
z@pkH1aUS1vu?bxOzQ<<AxvMFusn%hNkaaqaaC1=laZ><0Xy@mrTW@?)Z}_2kf_t7W
z%mHq0EhvRZ08zA{%&-!&X1|<`FE5ti*J9hDMS07}DDus9^bX~hg<A3_0n*kqwIJlC
z@g%opw|~+-3b1_{W7rfb<se<mw)y^s$hqc7v2P}6kbk9HGg?AVK{1p#{97MH=h@nB
zXNEE{FtETZakOL7&>c8VJu9!2^CZ~HRlhr2x$^Lkn~x7_;eeTU56v2#d;0o3WwRl-
zZd0cm;gdus{ai3IOwj|@S=OHXgFUO1W5>}FemY}P5ye^Xef;wS9!2E2^VJm<zykT@
zWuqqg)?x<OF~EBUHx%e#LwfhJs%v)LeI+I7PPf1?2*VfhQh=oZ2n@A}I=yB&=u<++
zp+fi446z?j2P6aX`-P|K1(?I*K#3?K1e8%PzCaTOsxav5?3gu}W<PuOweZ?Gp`dV?
zcxEZ*bW99f`2qM?)h_IQw!C1H^yZsO(=m<3ot~>YA?#M#Hkyvm4Ia2O)8ex>1!rgh
z<2|4)@?FnNNzm=}h$=`=Obj;K+|rc>s@|GUp?hSCU4kAgU+|rJFEXQo1>|9boXx&(
zs@eN47E9@bCMWKAeCa!8vE|w_emTd_msIx5A>|=R9&pOw`2YISaTJcf%?G1E?yIxy
zE)aCUUlfG}5n){<e9?l@S`MU0d_3mYIPxYGEEvp!{hj2rEj$M#A^Z9BqXV#8Oc)6@
z@6PYf=cd`v#P_7K7AkQ^zl`US^ViqzpQ~?8_E%OnJHfhQ_gu*zSRvM!^*Gt|r~8Ws
z?b{<T;o>Pa^YGY$i~xHr0~5_59iP0wEARE@n^e~In$Liwn)a@0@Q?qeGaji3avsV5
z%AHGdV1^F9AiwPQeSY)fvM)OY%JW8>^kG@(>%}xP=()y1t`1uTPXW4cP(1MToi-BC
z6UO;)%#@tL#{<~1Jkjk@#E@Ukc?pMMI62<OcQRZyiD{(f4b#Phnc(Yc$_-6BOZ6nt
zciafr9=P;iZ?%1~ATQ7AeHh7WbE}4}UrT@gEgth+2%QIjo9(g>I`tYj9a2kktC)Kf
zm{$38Mp2giOqqlN@N6UjS0N;Y>WgB`^i2341)+)5Zlz0Q3XcZX-aFW~Ew3mD^TA2W
zx9>WCmzl#sMQrfU;(e_VYA+iqRZ#JuRD&ZNlQ`6rz+wn4*3BT1E?E~dHaQt}(g<NR
z<?pQ;i?>_b+l%-693F<QDariwYSu{Rz4yJP30I02h3RFcrAghJlJNS_`Iyvk@ASHc
zMuv<hum3fmbl@q#%Hh>{<{-+T%VbRj3}Nf`^iLT~My2o(KywCfImUB{K&lg19f5bk
zb^TRZlCT8pjtEtqW;mwIZ;+Ca6)%UdqgkXyuU@s3J=MD6J}Jv02hKpvzpMYj3&uh2
zhK9T`b3u3nMZ>`uJIC>^AKb;13Rd9-7!Q`nH;EvH0Q48ptsE~@dn|y#s6S_+VCl~P
zml_~o&1w3iyQBWOIg{Lw?P{s$L){{Y5}oXd(O-M(-yR(9IAUqU{MjqAh!kt5*Bk;>
z`3F9JOb3SAePDd|U1&{Mde#SPK{Y#SX5(De?O+83%^AD_EsDK(ol@q@K0?J_aFjsg
zne2j}>Pjynp`;ffJ3ib^?*4u@Tq#|i`o-10Gobd<IXyXEbQv6e)}R(6G`JlAiU2rN
zEi9l`d@-i*YFhYJ=aUA4l=n}f3SN8vo`46_@{v)-V*ze?DkqS;LIPgTr*Fnd#HN#A
zF9If1J3p}pPNT(IPvKCTP3ew&J)$|%<`2NxEePYfx<Tvw9*ypi(ZQ^$Scwd6AlLvn
zI0UqV&F`-u5W~g=wQs6@hPkda@xJu4B6%wR%6s3MY31cj(ttcB=Q2pmDWd6UXh=5|
z-*YA!n8LEz40P8;s&{sFfEd~Hx6)U=eS!J)Ep08WEQnoqav$=pWk7QehgAIf$Ze|-
zeFlx?r03Tp$vy+7TW_s9h+8CzQ*aM`>ZKz|Ylu=aC<GwO9+_3#gW{%dQLl0w&`S@;
z^noP^4v|D|ZB0RqlLZ)vygFG@YfvmFuM&ff1m0duzIdI-*Rwy???Ql183AUn)|&w{
z*SECW7qmL`3c_!^pOAMHUV$)^!haLae=y($D=K-AynvqJ(;HyR+v-?q52v|d1$5nl
z+eN`s160hNqwfqW-!5g|9Y#f2GsGRmJV0gZKvgN}Fr2r2<?@d)VG)t_kz%q4b!}fy
z5zb+1VFtA3ASDUjl5zA(DX-L-ywvTWe+2w{_r{`rjkt<U708LRt6c`eL^#eI;?IR!
zMD7btIe!SxB#S;XhOd6(PcDT1@^S!Jter#1CVsgu@UBgAtNWjiR*v=qaa^5oQHp^v
zn=6*PIJD>_AxE8URZrKcyfGz(M!O)z?Upuo?Pt#1ZbrGps#(8xpoXd(w_#)PyFX8n
zbdC)H{IA_g6cSmo+1W%bg3JeJ{`%<Mn<UyNJY_@^1A_1gZ{@wZwhiw|XX4d3=UWSA
z2gC1K0`xqKa=V$VFfrg<hvQ#!xxBg1KO3!0A4PHdlO1<hgWz)kbK`}ZYG7*TO)Evs
z`gOQhSs<W#^g7ck!%Bm~Pk9%#-_ugm1+A*4W(6Fow@vgcMo7-GeYUByk|K5-SH1pr
zn8lpYu_rS5h$M!Sh7{C;shZ)Xn`G=U`=NYW$`l8Z?GgLOfsO;6^LQT(+2jy~7o89f
z!OIPj?0}{#pMUX^3_CRxTtAa^_o_(`rIyR&h<cN%j_mXHjlZC6v$wMYEXy%drMh|F
zvuSdXwg7|R;O7hdy1QSisS@Uo((*oHJGTRXxbemLgGBr0Q?NV%dN<%#pt6{xqLGQ=
ze9C@+!4GO~mi5CN?<J0!P(?B2#7RYtBwvkt=9S2$67rHD#X=*z!;r5WO{Z`#5H2kx
znd3)Qe7KY*mxmdK!{g5^0y=A8CIS6wNY}<QNBW?E{L{oytzYB=qpX35D^+?CWu3iz
z=w<HeT5{%(bU$F%1Cg>MrEW=UO!!ic%0ehOk%5P5Y*$FRx5&b5yG3jO+bJO>?_&ja
zT_YoqFJLXX&zt%t^PM`)SAlnqvhs^Ec!aU%C@t|Ezx%!Y4keO`kyX$5D^eOS3w8H-
z<FqbqGY{3vgM!fIPjs28k1N2hEGps_6#PAM(@G2tbeb$Tq6yXDqmHk86$|#&*<{hT
z-)+Zm_Sa&rP)Yj^rtTC!?o*_ZzD8A?^Lvacd2lV-G=R!rJiJq;DucwbI-gn-ojB<?
zR<0WC@Q^wMn__HkNTo~sr2bCYy!pY6)gCe=POXrNK&!nNJ+^A27Ev2ZKlQAddGy??
z&8_R1&#0#K%p&n+E+d9^qf`%=952sZNFlMSPc{3{Np&5i?)hO;#b7dx*utDrb&93S
znzv57(f95*>oe!f-LU5B_Jho<PJh&Y261XWPrPMTojql!m_?x`c||<>cHM4+cUyb1
zG4p`bxt!PwEHVU}P0ekEW%E&}smKg1t3My}(K*HmY3*n0@2HuZQ+DkK-pWg+=ZwJ@
ztiV>{6J^u8+v^nYj`NhfObS)A(<2N0`u<c&jFwvOnirvco{2PT7^6xg6}H>1#J08&
zos;&dW#0?sBRi&v$ya_{aF&NNJcI62{%gH-M@rTh!SD>@pSu0(KXuLd`(H+#?HX6L
zRwJ3KRtOH+KGPxE{e*sc?JUA@(XLi5IXjHbd%LPtBg!&~LeIb(`VUN48puOE0{gjR
z?kTB1e5maBOC!R1Yx_ZIQTn{D^P8xJ+{gP&(<>{P6&0iC4m6VlyU>oKjbno`ZHK6V
zL8+i%rL<1!)6X3?(Mc3<RZW-Mwa1ds9bkTGsIQOJ+}>&ij#@Y0XF5}1I`%LLNg2pe
ziIKMtmWxYE8|?qQRIe1~qCglT#>R|+YRw;ha=bmT^ix?DyZyH7V9`wi+=BTCx3>K4
z?YK)?2XCl^Q9SAP{4;B<lnzye*G3NWX@ykqw)~Y23}@8k7?J)%ZIrt-OZStf_`~o6
zKEKnb4OHmkGD>)tdy-3`ePnO%Z(%XCZOhByOs#Zu()Ova8yuN|aPrDU2HVlV9cCy<
zX2n=K3it&RX*jgz>R-IE=xirI3NY(mZlVy7rhj|0tR~NUtM<v|Vy~ebB_uN?MZ(QW
zk@<6)N{=p0*R3~u^+L&MX%m%3L$Q~?K(8i|t6M2Quic74qD+B8WXbLvAwI-557}F4
zwj<!{E;47=9of}YeD+%MCx@TkE?-!YlEl6=y@_P1>g##DU)w7xN`LToFt(ZSR&CE<
z{py8QmMz~F-sC!?njHH3d_(p9@Hd<O@+{rt@OA>Ld&s1|n52Dc@;p~;qnDKLI`bV%
zkY!POf*({+Fb}$6Jz1x*sJ)VKA{2+%Ez~@<Zk~c7x5#)jb?dEQ1u!^Jb{3a_J?$qe
z>f~&PSM&Mz_5lK1E!A65Jg?0~`u*t|1T-Bnowk~+{Q<Nlp}<=8e_NH$QwN?R1ujo{
z8lkaJ_(6q+uzh*^g)>>O2Y+!og>Zb7sU0&Jg%YUu)C!}OMQG+8UU~FvGDFV*j7PwC
zrXG=Mk$hQ{q*f><FfBLqFCZ{gK$Fm+oEX|M2*Az{gF9yb{u6Tt_vR<23IO9l?pill
zlENvT5+ip$JHtx!r_UQac7FcyRPrZ0oUJn(o4#}UJSdQO5wHsr@27moZ?}q?vZSgq
zCIYwYzD4|?{fk93z7}A`kup+thRPv4$~OnpJqVaEz|sB8N5h%<@rN?Jf2vp&j_#EU
ziWE4eQuKshtU3*}x$ZE~8qfXz837T?g@i3nm8q~orou7mHflKj!=dy`mCaVLv%xE`
zkbe2q<x;VJ;U{h$p051yf+*;F8i=GtiRKmpKd52KN>qhz*6;h8Pi(;?r+B*+g(pWB
zRU#%P@KM{w-$<oTg=atr=Fw!TK{h(wRlrln+JFRJBEV~%c_KqNpCEh5R5n{=t521`
zOU?zJB|ks^V0aUTH1H8S1NsALZIBGLyYtW8!!iRvsPPqIU&4(xnWu~x&nM`$CB`|@
z6&j8Y4=bQRR7M=i;qpc$vI7sPQ{gtCz00Df>CWwGp5o2_oO`JvRx+jtA6&UQOjJC6
zd&#O|?M$R5vV#e8hawr33f|GG)&a>;uh-ptOuM}R(`+uH?HOi5X+ue+N=bmZb~{T?
zAm&a|!UrnKCI5N9D|EvB-+-nFfg~=bE}fFFea**qFI1h9Lp<FSTV>d#_k>e8eje;X
zSjg*D1mn?Sm6F}=us&7@J6|@!ZB9rT6ohleh)Z#s)=j!NIf0vjh6eUk`LGb@f&f3i
zKd+!>`RFedd#Hgq9e>ag?ZHNmQYEwN5Hd7CpurPxrB$sk-v7CSyfD(1<UR?KoC3^;
zG!8|3eF1~#;yQwrNu;$xcO{q#>DplP03x_Qp`Ff_IbU$jF`AB}(^=|TRqrpaIgglf
zhPjuZT*PY5_jlHg6<g;GL|-v0A{sbWMZqOSn*v{%ln{4ypI%UQZEzEj(u66JmJ3SU
z>3cWH!h;T@P(r0HmL|N4r%)=#$UiZcWr*1He|t7ER4?<{Gws1Wwa#LSZBo1-ol>$s
zHde#b>9r5v{GL}}Xg6(z>ZlEoWWVmxa30|MxUhf@T8`h<vyz{eCqHHGl|cQE6uq6y
z(h<u~4*B?G=V<Gu!e62|uwQt?^3Y;e)t_JH9AWXq(x@U-4Xq0XK+!(3k8A*~0Hjh%
zs18wg2Ry`{uakxpTl~0RowWNSAtt?&41x59^TJ#L*e*o)-Iz1C=bQ%1s<f!0lcY|n
z59sfDbCJ*HUrW=wOE0-UI-i=>>GQlIf$?jA`a_!3XJf1gMMEo!11e8hH8tV3Z8<sx
z#t-@7=2)hO>V1yo`W0p5PaUi;J~vL&9FZnOK=zc8|IC^JTU4M<A|w8VD}#~{?_2?0
zEYB5TM-zwQC{8kob8JzvNw(3LD^+EjoMBcg@|xvfq4ayV&Or>V7>LTpHYJHCgEl3V
zZmEi1^%G>td&P9qv<52m?aD((7l?&{xxjOKEaEatJXEtx_PQ#7DJF6zrBkZb9VAog
z#4h_4r2<gs6mLw%I`Y0ok6YMo+U3=@TrQ|F8W2)78b|S~Vz{6<w5^Ay&lU0Ty6!GJ
zDx}8_S~-c9A{r_mhcb#~=4^wv$(UUr+4@cM<=6ffK(yQ)IHrwhOBQ2}P;<2$VZ8^5
z^z31?ceH9g7gk{C;JV!M0Q4#2Osaq%E=!O75QiFwj^nyo=;2sMsH>7_U=4I{FIiTB
zKAT<eJ9*Dykr=jMu8|vOe5}e39Z23}{*{nu+njl6U@b(_^(I)SLy}hbiNRJqH^Wjo
zf%C3|lM^gXfmDGcIy$!|bG9efAn`ur9h_Bc1nZ!psG@^$p#)5lVk!vD72mo&>lxgZ
zt7p9y0se~1+g&TeOE;EHvK~vnJPK@uvsqFqAr1cMmFf^RR;Yvvn3ATZFmW<0Q>COV
z*2kTUYQ?y|6q*=5{HQ118I-hu3zTUIa;PLdc|Jc_5H4S>S3WmdkYcm-6TV~YJbwh%
zZguI2AKdiqIV+Wf16WMqYIPp{NUiFzi+RN4w8VRedA|<+4Csz@?UIjNXawR^%UQ+S
zCPzNe1yT12u$4)VSN;qLObBIv(*rBII6tH<=QacHiX*BN%J=@Bp6Hx#r~}mQMc@;W
z7T}k9L4^R0IqMfMNL`C2tgc|`E-oo)26PNRhXOv}_wFI47?%b#)RPe6`|>dSwvYh2
zD?T0)<;~0mr9+{!tpUh7)_l$szr6Ou1)D(<uC(tr{CjtE;~sZo;rE9PAEh`te|ecy
z?S>^he;$Xs&)hQ(4N)i#|F>5vqKfdr5bxdX4B*jxVF^q4`gOG|LRinBOeppyxTUI8
z<)<1@DmhEN8L+=&0^V?vPJ7c<qESiFs<2R7(4QS7uxloql`#UGN>Lr3#wMeB>&F%r
zFik2vrl5%Fis;TKLh^M{YyS!iRZ*J<Ezo2SQ&3ZD{>0Els$h|7S4216KR{m^#Ex>9
zp>MqvmH>tG;h`GxbA(55#h^;xK*-ehGdUu>XLMAqiQ4pkSbF1g9(btOa*J1$LUb#t
zu!+V{pukj#Qb|v*zq~w6I{EF|x8Mms93=WJi6^h7q=b&WBnnP$74A=<4A~TBnSXH|
z1^niQ25*Q=oK%E(uBo;SWcSwH_*iop9V^tTVkrLj9~<749|G5Tv5M1jl}c5PZMpL(
z!nqXZx?6HX09bTvxAfR)YQ?(zna=d#n082Hszi!u?v2j%B~U=1Z)h-7g!d<my)|20
zVe^SIEKk@v_87t`F3#P^5h4g$u)FEx<VBf}$KLI}IUu|#L4>&b(^bZFNfqymm3tt=
z4r4MhU)*`$=o`eow5bD-Oqo|;Y;L$9B}dvlhT?;Rc5i=w)Tkv(_pe>7p=&f<U;XBN
zp(M0z-D71_3c?0c)^ST?8dV6~6BgKMRsP+7$8tk2gr<|Se`QqI)D3L6jGJz3A2gU=
zA3I4Jjk%rjS<_Uw{yIOjsJHze!lDotmmhSwUO-#g3SJ|03TxWNcuHVLpy+}Prj4Z=
z+V=J~Y`vP0QZxjYAppr+tbr;mB?qA-jqr$5;=KrKV3H67=joIVe2n7cEWcHD@c5CF
zLgja9&ZNosDG}WFE5L!GLlo|kg?@XC-N5_%%OW32rMax<7tuFO{izTMCU3xU`qX5C
z)H!2rvq5$^JS8~6c&&nTz;?brezdfbAeg=QB_yX<!Nfrq-kT{^4J?zEWh_q&Qb7?Y
z>7-9X1YNx=ZnFw3?fklV>;hm-sHLS`wZ|hBiYbQ9R<YhVFi#{OTMr8TEWqHan50X`
zgXYDxqOjUrsBex>fQGcIc}E#4^A`5U2DBX7+u;ge&owccG^r+R@T|f{Tp49OC+b=K
z5>qTUbnUj806R+T+h%5yQ)<-Qs8YC6>`T+<6_Wj(rQt+>nlD}v$1F%2^rYvfsQm!y
zU;I3kQl)C6*;QZD=cJE+vG1inC4D(rP4}--)ijJkF0WC&va5WmAS%J|F0U$l5=~+!
zT7j%7X6ieuaS&uUHb+NIkOWo;q(!;)6F{~#B~n1=s6nku5k{90ZJDZl@e|)=4#KOK
z?v;*nPKO~J&MfwW#T98^K9Kl|gf1Vcj%k`!nV4qXXA)bcpE`!5VuoGQxjR4Hpv&5<
zaV@&lD#s*bPM<dS-U8ie{30_$yMeZ+t6j<3OH=2k&h^*m;dMWdqZ2My5WFmLcKWp=
zA^T>AVkKS9@_c}s*+PY+vp71mUss^>pz&3bVL+pM**u$&%bP(VMfWA6Sc8U4!#-!N
zZNZ7BT6&&^)aF8_dOgzS4aRw@_JgU0ERAMz^C}*+zZb^+P3s0(tqr<ZM}(h83g@`L
ze;qQZ1!HKa2(1B1!|J!9JK~rm1>VilV078fR8Ild#ip)od}aSk`XEZb%}gjQNB%)h
zRa?>p8typ~<e3;A&&a<&Hdwi(f^pBi`JXF_R9H?^o=qpz1&z}_J;6$+=+nuUrn41m
zL?Up`2rz2Z-s$^;FE+|)^VrUzYmy!Ny}{^_YO=sR5vuETY_|An`Xd)PRcYQT;47YJ
zJ+DfB8=l1K6zGkyFL=uRDW|b-;^LuE;XTgg&c?Po(Pi>RIgR&4qnG-XP>IyZq|ffW
zk^OvhxM5L#FK&2A)@IDJC+>jZZ?_oHLymJT^~ZKQ2b^N({~7FezYP!mWD`6wAK<`a
zZxU3N%xWq>W0q-2$y~H;Bh5jkRX~?J5mlD@lIp!)-<@Fb@WJQ$V$T(HiE7533cK5S
zKYnyz&LX8T_n}bz^urC}*ZxF$HcgqotJqNv1S5EOc?!Y*^FPCEQS<_;xG%W#ZZv%U
z-Nh`YlsC~AU|cP;Xj@XUCEiTLfIt0P|L%>-1~d^unrHi?@vBvROxlBV?*@S<3MECG
z;%|r$t1go3*3|Sxe|&3#;tN@n2bg*(SLUO++G+_K$2~8vK8ty|$D~5IWi=F(RxFT~
zDaqNb5FPsOpT^^0nW3!nIZ64vKQiMIB7$wNukv{d+!IW15(zz@B2-IgAv{j3;;(U#
z$a|JXL3K8MUGYVXKauZRnBg~}X@uw1?K9WxBLd`19%_1ac6n5>5v-j_Yl}l)xoYxf
zYavv#flFOm;P8mOM&0_(Tc<TggXi&dn4U|h7tIlbq&-a7nu04i&ujEfy+mx^q*7ph
z<<p<=T~fgdtgQ(2cviPUhc{v%DtI~lc|n*$gihW7eG*|CrB#5OH3|1!74le0^WfXF
z3E2;Rr|_1Kk4Bz>`%~Dkd-rMRIc8M<fMIWySXOmQ|NNtBEPI$_Mw~G3A18xkqV`3P
z&47>Vcj(TKXMBmhs!tP!VNC6xnG{a%9O3d=e|(V~IT9JNOb|lR2+?UB&4w;(i_myA
zn=5oBpLNk-raL+{RPQ1GmH*o#gd@15sHpRa&u4xLH1RDFdc3+?i(QN2=H2Ax-P)~%
z87_G|Jkg1LnQ&bgAi!XCmeMcU#e08+#Ug@3t)XlgQ=E{K13*sdv)fir!68z1iSv2e
zg`^CV{GMAO=-}(AnHQK}*A8e$CH=5J*~<w|Ktb*oIQ%Le9k=<lpr)9GrCW92{sdc_
z9gzofWi2f+!K=Q9(s62B#5TaI#y?-NTUXgT=B-F|VfAKY@ROwF!+-ruiSzmlnnx{_
z1u3#DXR)(=Usum8GSTOXR$k0W`aN>sD1(=%6q@^k=)LHln&eD>Hreom%a>RD>TSZ#
zU|8;ZRQ8Tb<$O(KWl3eEdXFa0I_<gm$M&(4%KgHEHod&Od3@z$3Z;e1fjc#7m$rP0
zMPh8ARoU@_LE0@(g$Xf^SgkT5Qkf$K_JP2eT|E1z2G<b*`RCyTI)`!uEtb`km@<?D
zgYFC6<vW4Z1_VKZokg{66uza}T)Igmbk{P8<hAYw^!r>c<<;*{5I9qH$+Q@8ed_3|
zSuZ&^Y^T_#SDCA^tS}y7WydGC`iWY|$?T$OJDO<Hux856052$*NVsyf%r*r7Ip#Bp
zh9OA*tojRQB2qqi7E3K9&dsg345AW(u;fBy#@N7etegR1C9pp$&3^TBxhm<R#YE=^
z_r4(0^Z))*NvZc3i|1C+D|6NneM#AKd@9^_Qk71G2ou=vwF(gt8e^+Tut~*~r@FJW
zm~uC9T9<gPoxD)Nt&15>?k=ya8ZGi@lBBIrZfI`tJlAgwC5Rjm4FN}<ZCfY)10Mo!
zG>cD}&I99y7{+Rzw9YCE!XS>$eyW?Cv|OZv)EuNt5u%6!sxK%54|y*dMlr&sqZyCf
zG_F~K(E|fyS0tX;Nh%xTbB>CjEqxR0ubRat*wO81&l%-V@F)~nDKCdg=iKaO`I^<$
zOhYU5k>qUSpZ8x{IT0b{IA_lkE+R^>mml0AHxNtc4_aCk3-W9v2ymp1I6IwAw~SH_
z5)ISKLsIn0MNir(hU+>z6MWJ;Nr;Jbu`R1Q3@E$^3VDJAc9PkUAxd^aX?#n=%5#@x
zX!HGv`+I%nLV`-ecoax}Fo@Qw<oSkibB62jP0!xJv+}anU2(y0qhb%kkO~oz%8CF%
z7n+B-E;TUoe^BQv<JgVx)mbKqpB0tWPDU#*Ut=IsGE3^-Aqb5YEX^Z^bD3UevSTS{
zQEr44NB#Wg3EY-)sZc={KH-We{r&W03t~i{&`n;`1_>f2tZxV|q+!6PTMrmS7c~jo
zi3%_p!Xgn0%o}zx_;okqRhAV86k43`kY-X!NpaT_#=lMv)qxK{dnMdr!W$N4Z%tj|
z55N}UBYUf2=Sk9Fbs_#AyV%_7*_n?>&S<qU-0QqZxI?I->5Yc5lx16WmSYwAVHGH2
zX#u_Z2DK{dviRcp`jC{Q56GxLkx=x$;8v*s{^d32sQPlIq}(3k!K&M&vcgk-q?f|e
zU8|l6sx(Pm*bnN}4dTtw_s_^f9-@mzr!BJ|`>Z_nS+h$EiVo){o<vm|;47e1#|^so
z4ZTzne91glOS9>)k}BHUcZ`be?2NGL%CJ)Xc=?o4rDC4lfJ<#W_kP+S>67rur72^4
z1zo>H(W=QWH$y8JMe8L_;&l{l@_BU^Z)14K@s#+#U&HU#4NjUD+3QwIRkzkxOGx-b
zQi-Xh!TfxU;paC2UkhlA#*qr;EVrH<{?2k~CPBc0<Ma11RV*m;j`pTqKh0{LXsRkG
zE6C1VB$TK!+cf!7v%rq55UwdO;yW#Fk7l$t6j8096jCQ-@|7E6@hWbYdCk&e^#73%
zDdEL8dzWcm`Apo1qQ#?AxGp<DXRg?N2Cu@JJzct3RBQGr?_QAd@`80;^smb6+6|;s
zXGtk2f{Vji$Lg0b>flh$qtp1cuU_E{H(8GpB_dtY@i3YNZ|OMM^umqEtZ*}b!iD)v
z<o49sO67cLshv@1FQZkalD2w_d9fQmXLRvK$_j=^DF{!OT6)eVOGWaAMs}`-jRPj4
z!~|=_uRHvS<KUdj%)8@~ESHt(R~wf%TEv|vMg20sDn(E~k{x9&c8xQaeR$Jiy<|`{
z?u+w!?T1N73Djwdvc-tlW8yX1Z=-i!zi}Rl$tJqav9)p2G$Kf_XccRUaI_`$VF@@H
zy;7RK3>c<n4s-IA-Y!?Y&SyuQY`{8*j-{{~GGEH+G7HV!Bs4Zl_|X;SzvxZ%Cxy*n
zLGxQ&CZ6G!Tyv$i>Xf}_Lcvs*ZZ6oMkZVCadH=n<0BP#x5DG*ZT<yRH(NQYvsgMb$
z9}T_&@NXfh19b$@D1g{2W?=wR0fD#F?6c;CQ%!V7QE~y*F)K4Of}D8|ct6tFgU|*R
zv;E-pYlH~LNBFn5cG+i@p;$s(d^_`QwHTP$6?VZbzy7G(7(qIPia@$UVt#eLV<2RI
ze>?vLKG%6M?iezQQZ@Cki|0Z$8F{{Xu+$O?$oCI<J4GmjKmFD6V#{0+VNvuLYcFRk
z+Y|GTs3%-G4lCs@D{N7h%{y;BHkglGe!gP4Dx}4ZF^@PDM3DG=rxJpUIze@s_stS%
zr8*2yvEUQ2vI_OV3?Q68<r|;1Ll|#SqMgCXsF4kl<4@PlA8++t*vtjifd68uQFYT&
zCT`T0&-6V|DK2{8wE?lP(RJoz{mq6>ro=C3L3g~r(0a6W{C5UKw0=3KE0r}aO^bBV
zb0I`zhR==j87!bSnthh1Qh;rv#JImWpYqcfpAGLNdY|@OO>B(7g@3opr7%HRH(D%f
z!(uOPn&8LT#DAhP#xHH@C%Hg?pgYm?Z4ze~n(Y)&(#rjJBn1fm%|wV38Y9FG%S@Lz
z@Z{vM_vKz)BO(<+4?f4DZ=)#kdW*wxw+Y0<{ce~B0<JFd|5jg+(FrA7WC6=r9<8*1
zgT<#nXJ9S%0u};11%Hq;zzfB8NP7Jomb^Ow{Kq)f%W2DgNV=%C((T)eKyj>Y+3E#L
zu3T_FLVVW0`t=FF0*Xy(etz)s!DovHYdCE>Pywftu<DT8KBYwkDFo98m-ZI8n}j0!
zDrGy#7tcGQyyLw#o=Syq@|to;y?fgniig6^s#3^4Q;o$(;)2POTur5=@guV)XV%2O
zHeD(obC0&O<Qfn=F=4M``ktW>)I@0di6eNgQF9?I9<7x3VMs2J{n!0}Xu*mxM3YVl
zVu%!=nugWp(T{+p4CE=Ptj%0tX#qDWTk@_UjK;fnud2%?L&9+c^vXa}7o*&T1uXYq
z&l%=@;UjWeAGwG_@eo)O^$-5Kj5)2%w4Toek~o+Lf{8gI?Xzlp^JcOf8>}Q!nofLW
ztJZA6zqJ;4Uw@dKr!lu+7ke&8V@_jr-)+tBa9rH$ZUZm3LPc<Py0!=<?Z-vJcm>jH
zO<K2?pj6;cLP>}A^DN0!ve&UAR}v@j(Hs<1w5*2RM9OXb*kC2<8;b56`gwVw1P0jq
zU){k1{NayqdQwtSkmVN|RuU<em6rn}Bx>|m07OX=4s-|0l>pr64Eq=P%>qEMwID(A
z_DFFs@1Jv6nn#Df_>ldb(HooRN=Hc$u;c;5W8C@W7L8ZKNc~RB7C9?pl8DdI#(yaT
zu1`l-ytb=%ljiWxYJ<KQ4c1LeG00!3e>s&YI4OwX+dVqv_wMMo#`lGLX@0R=p;+#V
zc?Xi?m7Tlz)GRG@kW4jr{9RdfwFSiUI7abi@83Rke%Rr64t&X>@GSprw4OFC=TySL
zMIoVp)w*u@0V#O4n&tmBqJS_10!8J=<MMyM+KMvNzjy&(=F3!C1w<7XTUb<&JHZKa
z_hJU7I4lA9e&j6Dx;39)l@P-_@uJfW%2QC85{VCdMun#p9MT7_1Pr9+%+GHPvaqH~
zs<a=3SgAb1r^)NR;hU&J{%44yHZpy~Y0N32#3>_ViEN-$T`^CAU$;wo)g~12C7hch
z38K=i5zqfdYb-YeesT}qT4?fjy90DBDM@SjWo-`_sbJXfC7NA8P!OCS?Cne1t=cXo
zIVlwDnSki#6GLj+%mz48@`QP<O&JvHsS3j!E&xv7d}Cx}wDy|-vnDuffxh)v%@%k-
zFly%G#{>tW(01SjRvBicrgqieSuF$?CXl=XEf+|FMJEL?IR(ZpUIiu#(B=LBA*a@~
zDug{O8voPqFgPFr{L}3YEhMlf+<FD9b$Hz&?Em#aijUk;iWG3EpqJ%<fd#-gzGZV!
z4<8hc;IT@h3T}xFca7d!oHqeS0-0=`_<4Bzw&nJ6Y){6W0O>+^02@q{CmU|5yc~d2
z#7cr`(IYmjZxM(s&=eKwgRa@LXU~jzY=iYcYY&Yi@YMsy<sCU1QUfC+AnRslukHF*
z8QvW`ggXM4!ged1uQV`UyXO<t)PXAve^TLBjIY4*I6#zbL0>QeY!2WV0e}ntz1#f2
z3`UhOTL>P(qi5OK(6mIXJ|=L)(g_0$8Aj1Ma{(3d#Tm9muGvlT!Ct|d7OD=b$87SZ
zG^!ESqNVvm%gS%lVGE1ki4w9X);~8UqrvCI)*nEfo);Q~Kjxp2pza&2q2=1tf=`r4
zp<=Tu=5qBnsTi8W8U2tD?+=MvSu}Zcf@{lgAhA%e@>v&KpqY(ai@YeaLZ)r0C{2_5
z_w3m;ym;NTXWkoA@3tp4{??c`ZGCP%@rEOpjFc4UTet~S;7~X}of~+_fA#7qoS<j_
zjo07q11n%qbP*!J$p_2HjnO#2?ImIGxza48ivZ5|@0!-Y4{?kRfmE@@29<`pzeeu>
zp>xN1{BI1cAh5NKV3q;`?i^r{c*$R%1F%B>zw5^^&k`K(Gz_Y4y^*?l^%3l!#0Y`(
zFKdNQuK52dehNQAjzB-#7mgSpItxY@ynbyg@CcMUcXIqnKxa)rjQ$D9jrfq<O=Dw>
zff*LNdHB0#-W5LR-@lnt@CywcafM;1u#royXBq114!1r!1%m!VoFkk}%)$!1sesww
zGq?+cZvs;h7<I4m^A#;VEN{b;xbx_s&k}yo$?3_=DH(AnmIrLnTWz{xfGSH*Lj!zJ
zU?Tg?1pceqnV8>ff%%tUYWRBwe#y!4qkq#gfxF{Ib+U^<@r4midwGhXwcc=OcH=~3
zm2N^H-3LpF-_>u{;WASGJ|D1u1D^nrGgpx68r}uY9&Rq}lWwlls_uD{7xvc!15fyJ
z1Mrbbqnh>6(jk~q{%5VS=A8ifdr&;gcz?bHzeOtk#;;|qry{9$O{vKo8ML7)fl;Qo
zFbJy|FAuE3wwDGTfK$-ljhPz5O8#Qp8;1<yx#4da_lsb9hwMYdWIY?HRtWy+I4|Kw
zmv3OlnyM}Z=dWfS5|7NIE}ryB(!L6b#gEsG&llKB>UA$+$Fd`w;g1_u?&~<7_$ztV
zeZdZ!{Ghis%U)Sao;t16i*=u8q(tI_VuE9-I0V^4<UdY^#N|oLV00<yS%Pz&#;%Sz
z?2T8dnye4|ylkUVb?=Iyh0$o|k+1Mhh=_<l{Ab?i`~{XGa4{p#vc}*QaqSAFjDWUW
z7ek1E<3I8Ngk_K{z)}9>%Ao_8{2;aV1DR$nO+2e!Z-KTC4g>^kU|fv7Lz60~ZvphJ
zowOti>!0UUEG*cNXL5de0-?PnxZLSd>%nz6`|#QpJy>PE!1x4Aj8vxRfNcv?J04|<
zTED-3=^G&u!Y;SnFAigcTbKJYJUhrQ1swh=1>FZoNEd)?SfW=Bl82Lnp+`7&J+Q=a
zXT+nU&2HR8F!1_IHL`$aAf%?a(Z(=FSl)YO3^Z*x5+t}1gXLRI>xt6PPpkj)1@H$O
zaaHWs#(UEs{>HQFN#X&1ax`B~I?SVm^$qy})dwJ3@CAZpBWS%uOkbA4`*v;F{J`}@
z4lE}(TDIZjMB1Dha2}(;V3$A`9)E!8m=w8@iWFhOW{q<Dv!O^Wc*z3y@@UQVQ5G=F
z(}c~`;uwO@oqu%Xju<yOo4_nd&@loP{}2W$0SUhraQ)fk+(+*mJ{15d@+wRMgZJaV
z*6<lrXcN`9E(0~OESsV74qhJE^8x8w2!m<6qGSkb{MIejp=}5_NG<TdRb%NMv&iTj
zR(*51c6$-jJ_ypCBXRMb6f@GFF2;4?!Unj?<Em^Z-nt(%|HU0#@C_vBdqCBlU32De
zaUx{L{}_g1g31J!^`2#w-2pSLx8Rg^x}AF}WL#@C@f>Cx++FNVg;C12;8xqmuaS5S
z*DKk|tkZdZ3iZ7I&-_S>N5{Jm0O4jfz)BAs4`%|lADdQz;__^yaz!LZ+r>5kzW`dy
z&zHJNa+ud`!&J>prY~N^9}@anNsD1fNr^7bz6;S~N6<uS?iG5&bh=6oXADvbi_i7x
zrXk80Wf4=tYBGYY8S@1CELx&$I+ozah5*3f3w%*<x<)hsZy2)qcR)FB*_*m+WbRIc
z;NHw#xcXYFc4pU>I_EDJ<Ib_pbP~*b5B#?VJLD3_0_2Qe;iPkglLwp`fMNoNvc+N3
z%tkAmF+e=P&>a3Oe8PJphh>QaTx<_Q!rvx!SE?O<hWmtJ<qi@|Z!IneNaGy+dVArO
zE28SA*MRM6Wckn=xK-~fNQpyFVKGCjmva3ccLT`QI~$Kq|KZ{tta}&)3<U-qSXsg@
z1rjwxHZU0f3ARMdFsmEFtrLt}KK^Z)HLwKfet(YqWj?;r=g(L3BiWdw-DYu<HbJQ}
zbF%je2)A&8y<KaZ1~J9DM-$HX1DS-G9I#<Xkd6rQH|x#61L0Q!r=M#ZMju&;QYe8h
zD+GbpQ0>7OP#R<jQfZscs8=(AIDHJ{ZrlHk&x2VTY_sE!upRa$ZLI@+3NSE#3|vz{
zuD|I^CuE$Ek#PWZ(_bL^BN6A$zq4u&fn5RAGz<e8H^CivU*8(<F>3;EvK_obAI(3$
zWZ}1%Dry4hy!@Xw0?>k)!$A%EH1TxW<;@zTltV*9pB+r!jHk-(ycoK|40j6Qip!B!
zCSI9<r|{E040QbJIV6t@B1LTlDL{tYfynh25-8%cEDR<bXX9`auYs37*#z&u2_6c}
zEIb*nG&;v1icAS3P(SQxU3kmAEWpRN{N&M*ZEN5$n3TgN1wn-GY+G>6yF-}rQ4LQN
z4F7t7YaFL$3rZ{M8*&^6WlZP@MsfRD36(eou{lV7JMbYn!$rO7w|i4?w+8|5Ao9FD
zT&V=a3#JHw90#PwFe3Js*TpbH-`4_{OKsW(1-h*u%Qd%~T}tQZJ$bDh%3(#GV9Kt8
z*Xe_LSCV(LaIl-^l3N(Xr5g-9glQpC1q6{|TO@Wj2uiK<M+V#o3tsn_o3bN{l#uU4
zt3>Y=&$duXo>P9nUwagI3UwZ01tL-F?j<l&0J+Z;3`o7w4w5QhGkG)O>O8UwEut`v
z9)xG{jN%`huTTeUJhcqO3wp5L)cwl~w0xUjO{yGuR^Ii*)58N-M|mvv!in|WK?2@C
z3~eeYEd@<H?tFnd1{&F5y9MbBZt>V@R|7k%G{@fwD2+)jFt>mSi9j=kgnP?=0~^Js
z!e)El?$6-XdVT%K@7;Jdd8XvfI-v*S21a-aFsKvBqsB$%r$F&&_y#j2;WfxZjDqq5
zX<PFlK%Ddv{16zT1;n)D6G#kNVMjo94AM;1@TrE+7O_0w{llxBUh6b2@bU^SuoS4q
zy1f1o0jFhgQ4!e0fl<0-1strntq4wo5Q#$fE`#hOWzGT~hz<x2K^FmryoQGOZ7{H1
zQ1z1X@*A+vP{$T3eud}qqQ3qQ$&_2RtT#}OB8c$sQt~E8Q*1#HnsMtO#{r!W`s$q(
z74eWG_&dW&TJm~$D<B`m!=0ws)Era~8cG6~;o}DG36SHR{wmF_eea_L=?W-(JJ<ks
z`&(X`lXT(-0xxs-E||Py$#!se;bWo)wKy2}fd#D}4>hlLg3=2-1wFkHh!6pz*Drwu
zbHicWwRbwDZy<WQzB{l-fKXzCVyS(>EGWrD{4$0TSMZ8R`D!u|be^{{R~$+E`H6Ij
znL#t-&6x06nrCMOZmpi7Wg;MRqS~;DleiOq)61MxmcNB#%c6<bfJgv;(^mH)B7~QS
z1nV5#x=FeJPN$3Vr;QaKcVbbxbpkC$`c9|56_f|SuU8#Vj0ny>gJz&ue!1Z#D;6hl
zXs7Y9r{|#K{0Yzp!577kPg=QZ05<Duhfkau!Mlr3E$0Nmj?8Ji!ZPrH=N&$b=?ixp
zUU=sLCex6)PPX{H_~rsrxr9Oa+vK?nS)yOgBZ!(g57uQ;HGx9w7}<f<)ukwJdyi-U
zRH9JKTukGwP4y~_LVq1(cQFOvrKKxPr3EDwq6^w=(r{R+2vU_yv40;R1$GqEV|&o@
zfUy4rqT$j|?hSrW5D*4|8Utt{0Fc4a-~<^fWQ2V^J&M<_gA!EV0Aw}@45%S!gD8Yp
zfyM{^eRWwGP)y)Z&%W|dHj^G499p&qFT+GM*e;;`K_Kk#iakqB9fu?b%+Wwz)Udn$
z%`7SIlfx=#IB}*eYm*Wr#?pd<S``9ZT!pX=fY)WXUnR`|d7X|9oPsScAhE{DHg+w4
zp7{(A7yQ+><>{f3i3y`Xt+UiLlpuJ>+Go>f_=e}f)*{YI2D}izP~xlhG#D+Dqp{}?
z%sn{lNE}GYNOY$%u2$gCJ#`9>M6JlDkyQ;VnwPYK9Z*rd6=QWABVj(R&rM0AH;%RJ
zrLjn&9%bHJ@0R6PVpb>zMqO%XH#lRRh-djznvGi(RDz=EdMQGNQopI=bNB?B=#kn~
z^40iSZzEI)tFmMm`>(R!`Re=tG$cJFH&+N!6ey`Ao@JRHcXylYAy#Ej|5A4^elJ{u
zIW^Ck)Qt`n6-+gS5OO|nKiHb#;J}=wOM%_3oArL@-!%xbWt}8_+NGZyAfJs5E*>-s
z4ma8UA8b>X{w7Q`fzk^OWdIn4m$z@<x#J2+G1NqQ?A$W7GLSMJg4mlw^6_mG{TcOu
zf;(R!N`in2{4M}81v#i)&N4`EZ-D?hH`mh4j10j|U6J6yVWY^-+BAKt4H&-B8heED
zdbz=TM?FHG12Y;|pe^A%Og25dIrLtBaSj3gX9VP`a7sRC4<Q20UtUV!@$P2`Ib4}0
z;PZ(K=P<Vv3i$#|fs5An`(-*x<V?g7k07T9aVk)q06V~&h4*(b;2KDPC`X|Ag@j%j
z3qeL)hEjf|63tU(I6XGjxh*%r>lzRI6b3YSKZ!gQ)D8x}I2<2WgNlon6Et*5pKkuP
zmde6S$<_Qd?b<4D-=7{%Mjzq$6i_A{MED4@4k!uV%*6=8ln9uo2+1;?i1{ScX%o~+
z%WptO>k|_=PN^8om3K$cJKwVrO72;zU{s!lIkzii)D3o#oc3Z3#WRt^XV>M!g3^dM
zT-uhlLs1JO`x9b_X`1fT`u8k+qTQx5^j4^!Cc~0_Q#96@?<v%66g%3meD^y^5I1kv
z0$v5gYnAb;4G~H3q{XJF2#Y@Y=NG!wEN&Fd7`@c#vtXX)r5mXf9QTwIx~XV&A0!-<
z7|zqO%ZLBUL?d}N>c6MQ^Vvo7=BfUCGJ2uXt;cgK4?l;B>RK?=C6cb+<&eqx+p$HU
z<W8J_4VjpB?iKmKH6UqfYH4c51WfM{dBC7UNY9gtE>FTng|rXCSHyQu_<xH==M0*A
z@&ac;#&+{2R|d=%yAKZQzhDNPD$xYwQl;0kd_c~Hyawr!?ljcTxku~wTR|Lr6;UB>
z1E7?r1_trv!pm+HssFJrr}s3nQ8a`-svruliEG?$qy`731s?4WT|Ete^n2e9m(ln{
zSPy?7&CK2{!dRik3iDZp$>CaZDh;ccBph_Dt$#q0vI$2Q)OC${dAL`8(WygpdCR|c
zN_xPWK`kkiWwN;}e6fd+A0{RggTxJU1`Io_+7SKQy<pbuzw>hwyh&Rj|HReZpi=|d
z{bB%62O+6|mig*T>kRlh*xOf|ydXw!tHM!({tV_)&1XTGR~B2SXt$f^$9F6$Vh5ri
z(hO1;$wWPPFKj5PD=5;y+4*LISJ>~yOEx;;ru$`$qqj6I;~l=Ya?9NgxFP<p6Lae_
zIo>#ryYb3x`&_1P7M!F6p6f3j*u^^;`?NglN^Pq6A#;aTR~$hy{FlPre`F~T-6Q;*
z(a=atdoZpr5aP-Bo8BX)lrjA??=wl84#RY9g6(CoJJ$_X&*J~NN+7vCo!jB_-cN|k
zdbob!m2yLG>B&VxJk;nTT~d~4W`aJOy8f(7Lu)=8)346iTK)EFhU7^nTed;fAKhVh
z=MG@z@7H{ClPwU_((msRRv5%8zz*XyBQ+IfWGbT+Z!+~q@x3BY$}_dwoa=%hk^E8#
z^pQ|=LWF-BbKBlNp7~Pse<SWd`#laxIfM}4B8uDh!xZuJE>LtqjtVw2ChPHhKOnZT
z-;@zUhd#Pi;X15Bmc>$115@i@!j3h=SgJ}U7tn*1VD@WU%A+ADsUZXC-#&sB!v@tD
zgeuG96{EzQ+bMc79AoD>NOleb)|-}juV)e>AZR>>Knqnz0ElmE)o*<CNna;<sMSGZ
zHzGI|s8({flh4CDRo~d+2|EgkYOpq=FXjU;pOiNSnyrg|fo(x}xB?k`miD2`f-7&%
zj(YB{e(?k}%M!{!Nba)J()MtRkU`7_;Jm7Hy~G3Gz;y<k?%z6xP+uHihE4~Km)gGS
zVd1v{q-Ubs|2NfhzpLw;pNahB`*v=O@YdBCkv=)IM;p?CXD=SdO6IQFi2uwc=o5eK
z@ijLoyrB;7n;bSq6Lb{1aUo_12dgP{H&v9Y0cB}WJqMN|BK}B~UHI{&bfrnDp%5=&
zuk52QHuq%I-xwq+4cRfIjObC^2eT7Mqav)K3j)maY6t>%Yu-vZr92aLXqdtyAa<UC
z&PH=HgxwL(oaMi8%%ce5Xw)2|S7AHZ)L@ZJv-1rneGUaZ?%@6p0)QtHL=n>d{uG9P
z!5FL)S7QNr5>PRoyV+}{!c77)dMGYnc+dAH&pTkr0EOEn<YR9y6Rv=PA`W639-<b{
z!f3$sbDgie62^7bgLPx^#j<1I<Z*@NMW~*@WG{9taO5kzZOtvz6eUDQ)>jXp@Duqx
z21IUWSQ*UK5dXc^gSprXS0LC!Au7p>r$V7be2xvw1QbED-4zO>c&IucTL542)R(~1
zU07ODlHlTBz4^3{$NT(Y8cb$>Arvd1wUe2W89ErMM<YryDGQ)lP4Dkrz_tAJFUHTa
z`OXGIP!&Tf^BYu-*Y*Fd&4Dk$-QnKaR0D)ZlGyv8bF6M&0ietfIL6K(LPbF0DG;9E
z$|LG!^dFYIc6&j!45U$maY2~CFAEb$hEft$Wg8XVS>Gee*8va5&EpMOje-}$&rP(Q
z{=M;f8IM7{`7v+|vI#0D-an`zKqgnC4WRe&KddYfgZ|6HM@E9e?;oHUt?W$)mC`dZ
z{2)hw{Q&0c*I_HCe%Ay0Cy@LjwIIg@wX()k8AuWU&=I9T;Z}OtC>WbKIE4`b3<V}h
zfXWEG%1mV51^q4n2#D`oG@mvvOx!+%4h-JxyMOCi8XAdmyRe}f{xftiJlcHnD6W^I
z`9;qzll2l7YD0oEXU)N13YxPd2rk$o0KA8Uga81(ob%u;0y!FegNFoO^0apToSJ3_
zUyKc;Z^!~zNufi*T2V`jNKqk{8(o{4ojp)5MEqFkl3FJKnMfW0%b@?DH5)Cto*ZHa
zm^yex!C0bvI&J*EdZ-Sheg9a2A#zcTjMq;JiEHmO<@`JVD1dQ60N%Y*<<T#4q`DXY
zwLZ|5)huudUjczk3CAP|VB;ev$NvD_jhCEW+k5~1JtR-6ne)~LFunkWB1$*Iqk)uU
zWOTHnzq{G%7pGCghijmzKYzO44MpSIqpdz9=xjdKaOW!Ac6(Sf4iGW6t5*LgSs^zx
z17lnf(c8T=@$-Y@_<b&~a%H_iBT^1=%$igeO9Q4#0Cd4LEaM|-I43X;Tyf}w!e9dY
zW|b@vQRG0tBQ{OLP%L8KTO578K_OSL-Fdx@afp_iO5z*7@`sM#dY$t&*Z!BM=az4^
zaX0z&X62|Sce#O?actK`@Jj>@d&8B9Cz{1fR|0&Y>dKV73rOjlmn>KnflVjGHBdBP
z_FU=*(+rny%>c5&QP&El2-I{A4i3gRc1y$EYjF%5_Xj|51&tZ7-+^!nF!KpiHwnqf
z;+{)jTmwzU?Z2(3tVj;v2v;x7f4P%;4D-~^A-D-f3-;HS3Da(4Lv_u}9zY^km`{Pw
z07Skq(+bWz;QVYj=_+9ZT$*Hk<}!sZ?;bvYVUalI^Gz`E4z<6o32-fBsQ}(Bffdxz
z54dOqp#@=!2K2p6>uglP8i*YQ7HSY12!jNxUn^A^+7`SeXO?1-_t&RUg+py)#uB`x
z?fV(@@c|_`gwhrY%__qi>7sbZM3ou1ZTaS0_5Cvo^TlNEP6&;04&=x)BWWVH{2|lH
zS%!f#_g5$Jkg|lWf5!lOF<^7%U-REA2>`$rjyXEjPisCkmaS~z$xzE<U!4Hz3m~fv
zT^C@a5)u*s5}}H%UwnaFB?3DVr#AmFk?S+qO+do|o!#iSZJ&~E$LkaL^K;D%RPY7w
zRzM#J!R0NKhtPKnghC3omUErdtLbmxJY-~aAL3c-$!-md3<9JHW}6%}xpeavLuVY`
z@;+dFao>a^2kaj}Zv>a)0^<F1Yg1c4PobSC6k&gPLm)RGzDcbEZ6wzGx{_0psfBDw
z;_YtzgLR=<(|4*j-ALMAS?P=>EkDaLjEYuIul!#s=xdg?o1Dk=R6}+Z_tw5+bMR|Y
z8APqxppf4Hr-N>`2df{LUA1rAxazU+5pr#E1cznA?D+A@&w#3V{H}U+1)w!tMcLmE
z^KaCCS6p6y%_S^Mp82gEMDD|rlRr0o0VRo<dc4^8^c=Gk8Ugac`t|`-y-hniUi=QS
z$im{HJb-OlX+mGXtOKkqikGb^^A_5cw_d?^2LmE#3qq?cRp>o*tRBGFJ(x8Fb@PXj
z+B_vYNL2v5?qoN|;e-Hl8&#WYpco-20e$4k(Vq^0<;ZUxL2C&d5_g_j_>;VyhIv{i
zHII(TY!e6+;3yB=t+a%5|2M2}a2deO8-#jE{{Fi8*cm`vfTMBhi%W4JH368FEF`G8
z2&xU34RASo5I(QGw)PLKQI?B#AEDucLvFl&>48Kaki~H$OT0GnOJ9Rl-eU{Z<l#9%
zJO+b(qg&qNa6-YA9t<m=%P%{}Dl6eW*9pCzEpV2&l%Wmy?q3*Kk?X%w4wpXwvT`PO
z@Eu|4c4zquKY;_7ZpiPB!Bmk`X_m7Av7{GsmahW80x;x%00O@Q1{44ku7TTG@Q|0`
zzQ7jU1?nJx@zb9^y#!lk{p*mFhq!Emsr<dEmtcj7Tj+&@TDs->aBTQN7YJe3He-^Z
z`tt-}ST<=l=poS2(Ln`q9}swm*XefL-a5U!{Ufh|BnN#KKu~YE%=*B_TIh9oWm`Y!
z=@9THKeRR<0%SazGRU<{N!>u;-F*v8ik6I_SX;Ap+wv}I(TfPFFdPZ`KW6e2^5)yR
zn{-TUEKJw|u;VP79E@w%pbk@D{{s~WWR=%A{{p5B1Fy9}dpQi$AMOqoBwgyBskJpn
z#7;>{AiOCLm+h^slbzD5;LirHYZsu_HKZ1zyZVa5+zwz`z{>cQ4@00fTH!dc;-Sh3
zJaj}R{~uLf9hX(qbxC(ONK1Dq-67o#0@B?n-3`)>G%6j^4bmaqARW@(%<-Ff=bgEK
z7|-uP@ILpPv-etSuhr=AHCs^K4?GES23T94+QCgUsN%Nn1vbn7(7Qa4-tt)+q$ICJ
zMEuLs1|~66djDJpgVALjPFqifn)5)sx}nz&?!hsjYrru;P2ap~kMcr|t6gIpx_n%c
z3}95yrUBgDHjNwsm_v6Slw=hp{!1ENKTSgbBtF2&2OrBoU2Z5tu?F19=h}>aAr0XI
zBmHM!dWIzj{NZ0;PG5^Y|9i|#2GD${qy90mXb@me?PaN6v<0YH368x#fEs}7lM3t`
z_;i!=R6TooM#d!J4|)xN3j=+C*h^bM0o3s%=>3Q_X91QB0|9Ok^UEbD2$Ily{+UAo
z1_e0PSTo&x1Pc)O@lPMbOq9|^0h5kjKL8jBhf{KO2o`X+7U<VPxP&BCK=K7xq*%YI
zk$~$RTLeLmeIt<he4{S=@naij=k9gU`xK(N5<K3I0(&b5%ihy?8f8nVrp=Vx&7a=9
z$yx|t?>;j=ltZ8rW}|AuNNl6|5@N)r7*hj{*3eCD|HpuuV(758NMrG%mAAvR#ff02
z@c8J_(TQzB8>8IC^V26Ai<4~cllHyb3qd79A|gQMWbWetduPCh<vdLR1)!p{{PYoc
z`S5TGx^J}L@k>WX2e94rB7uX0yGr0RfX_-w0*39YXEY9P{-sm@yXae{r4<Y<w7O*3
zd3Y9<miUGsfw?ZAi{W8~1wrqP9Pkwc)tU%#E7XCg&qQ8*V42vmnHo2T3EZzGI%l!_
z4%8ihyY|IfVAM?eqyO&h^lTD4aHduIeozE*cR~9!EpSIA@hGY2CokKj`_mdX0MHLb
z9l+;*#@G;83O06JGcy~3kl6W|nWF7FutTVhlmHZuAu)fE3+%*y|3PCz)M-URI-P#)
zctv1M5HNd!lBxys1w`nIPRZc}jE6hUfAlzDurgtXU~N0=*{-UNjU5|`XEs`FV_kqs
z1t<%+#%dB(y7)vF{ejSec9^gfgn)n#l&}2ppc_2R{|+YqtvBl+5nJgsI|J}Vu0=O7
z0FzYQ=KCN4YKMc|L=#YeO;Oa8^zB_L_?ZIz9+%)9sQ;$lp2X)@S3COrxrzK250+C4
z$<a>*nV#L&_77960><uo>kpcT;yH1DdXQQ5e_E%gMrAB7Ep?p7I)FSJP~DyZnqv!e
zhxVXx17Zc}-%5J`a_#^HD5=+W!at}u;RC78PiqZe=}Go-27g@U4Nxa^6cp{$YaluB
z+r<6~0Q5HoKw}V2`q>h|5sj<v%V4M<V*ky_?c?i1)$0S0-2wWe;uFE^mC|dG8pgkp
zzYA19&j5EPnTA~gmeT*dM#<eD4i_l_LkZ|c*t6-cFZC4_$Iq2<V?!WV;T6zg_JIG_
zGVqdoW8miQ-f`B0{Plj+VjQ%(AOev@47dZ{Ettfwf_?le*r=*NqfzC1Yy$xhSo6#h
zmc4&u;8q+zQ1PWs_@1~x0Nnj@w#x4sh&x*VAq0r4Gf=^QpiBUn5+quGKL|fef~NQ-
zQpsm?8T4Xcng7e3PW;sHa{u-9{@+Xg-U6QeQ{Pi>>5@_b$3GAO;5Y_0;%4wEjlfR*
z|6X2St6|G^wzFNvy}%I*(pkH@x*k8`XC=jfUJ>1QUmT+T!v>&;4M4vk$I@&H^bo)S
zc~pRh=iAsV;N&h@UoTj}H;V_Hsv87gZ=-%0>w1DZaJr@!k6E`K)T6kJnoRml7y--2
zilL(*+Svz`G%v^NFAA-cvh?%iI-G#o0m<PcJSbfkJ0l>NqZnjPor0eTvKkyVApSuZ
zwC$iqG6aQeI^Es>UaSB4l@^OX`1;5hG(#Yj&GkXZ^*tFeSdBo0^9p(!A5bKKpJ{6R
zGkAFH$uZ6zpGS!iV3wAbAzfWvK~)EVA*K&}z<}TOcQI8>6;;_@Aanno4MtSeUJF}W
z-vC{z)v(j-);XBrzPi2!Hvh~a#d=!I`b}qmOI`wHE|95$V*6=G$q&#&B|)D+%Ig1f
zWqaB}+ROpu)2Fk{$c4i=(;T$YAUnbbs2}HMW<XNYUL5}k(5$W1S;YHNyjT2>765JC
zo**n$sPi)C6qMQ8sge?-N5EWwWj-0r_xnGX3li)rStZ|hm^;_kLHMTov2N9T8M+*6
z1+b<BfDKp>>t|<Y02}@Xf9SM8|L2DN@875U+$rj7yw<Ck{+f_I68vM*_5l0Z43-Ts
zTKHTd)5!49(4?aOzgS~HXMha6A3vsc)dyUo0Ot>mEuT~G^}ol*vt%c0?T?@m2K6)u
zWN*XRb>v}bJ*owKBw%S>0jqbwi(v=0<Nsc3__mNE{ksxL9G_KX!J+C4@Ca^&%YtYY
zK2XtI@A09x_ii)+C>Q8vTK<P)1#~GOP;UW@>;nE{0AOr0(=lNxO;!ba7VgSN`r;%w
z-BjSZaS=@Rl-Jqm58!g}_LT{MbO4a#29&j+_awnA_DdX3q?QB42Bgoijo~z${{MeV
zPTw{Jn3=c~42_L_Y<4!2C3u)A75WK?MDRl>z&Boa3IK9TQb#94^#^EP!$D1e4?^Dm
zi3)*t<|-&;!S}q}K=)4vv$r1L82y0(=p8V~3M_r0KVqtY-gaasQ59?QAME9OH3=lI
z`rgMs4ey7w{()(yK70HY*-mi4X`{ewB=tc0j2@^i>#Y`mtOvE1CxLqa$X@<?nG1kq
z86^qe_76fZz!Cw`@T{y=wY3+|mFcl#^kHT?pxI(#W&O%w?oW<EKtKSfERZM<(X-)b
zX!tLja(o;PguQ(B_WsDC1#+B)IvN{2939zkkaj>{zPk&oyCJ%bsyjP{H6pAs!M6$f
z1RlfvrX!Ht1<u5xA_+$2Ny~b0Ja}V*LJ?T79e8R34xzWV7X)!15P_tR7C<UTM@9L8
zf*J@Iq4)RqfyXr<DLDPNz|)|JlWKOL*tK=OpKA(7NJ@$p-m5x%babRX91-J2KuQ`X
zRj4`)j*N_o;IW}0U|UXw1^MOcS7Dju>i@HM$pGK;q=!Z@ertG4e)9f3Em75cR)?!=
zvp#sLz6%4J+lQ|Z;2wgjA7>LhE2K(?N)!^Hy?B5GAJ0LwIQV>kR!~=$uO$2E$d)<V
zN`4vyy89h|FJ1H8iiIcj6nPV>K*+ZC-`fuUWUg5Q%LGaoOn|w7jTz!k*80;f;5|2h
zj^UIDto~5~zAIAm4=I{f@^wW2eZ&Z5zOt0O*dc3!m)?Tiqz%9hV99{}bZhMAzZ+te
z&dbOc9bw)ZJF??U7}h<Sgs&(;8V}r;tSl_D(4l};;)Wx{@5;ab`8U-#XgK0?KP`bH
zeQeb6GgCh}%2~nUV<*ccO*1z!5tlIGct1+6o1d;oQsPxKW)=M0_kY%6TuRKiNCs><
zsDPH+*86wUe^<$qW3VY)K39(X3j_q=@`!&4Bk1fiYqUpeX#`0`+4xD&C^Orm*i}~Q
z3u@BMbTZJ5I8elq*h|n_G$*y0>@%zRXIv3f5yC&o-~XxiB#M+$n9go5p)F=Z3AU)i
zC1I3Mjxs*i79joFrYiA8l@A5ly@?NSME`STP2&`U>O#{TQcl)tBG}G04)J*y0jP_4
zo(;$-DJki(1;dOII7U=ub%tg2IS`97Y!YgdhnXJa6}`1?<9r{{$*f3;Zy1ZeOhU<v
zBB-RNwQk0QM(@t;nwrH<$Kxp3@|EDwQ-;bs>EI+A7tD=U`og1=S=t_w5D@DT!oqHw
zSulUA5HdMg`EjA5n3nIjEn+g=3Xeu=b;`AMKB{#NDItoaqN3a8vk?UQ^Brlt-dV&z
zhP~W_3YH2R4xdlG*+8rJa?9s}a{?UmyY~zjDrQzgn_h0nht@8&waM}hT}Ec2^swfe
zSMRH`d^ZV<3asgaWVw<s)71}|Rmfp$r~DL<iZEh-9ljyDC9$`(wgUn8*akm?J`uQS
z;Ev#pS7oW~I1^-xGEyTeA1+AD%q;gwjB=^KCI^|t!qihzLTN`Jj=fK*O_sIlK5Z|P
z_osu1=o)Pp+kXrQ0>+wHO6tP+s-s4dvfWdtjyG@vgx*(3sm0~fmKYYs-IUizdc(El
zeP8UP-@2urOkcIqYZqk;rl=yxo{q-SGaN&toBK2zvceR5=Q5b}f5@?k`cP#=|L)$L
z05jcc)4=8st3(eB3aP!TyY>6HHMH`TOiP89c3^|uQFe3z*5oOLUNV(P!yk%(Ycik$
z0)y$@p^SySdEuiAYOv=fVLBz<=;sL;7@7hZn%ikp*R1(5bJL~XKI}EgEJE_h790o!
zlNq+6JoSt5zok%WvG>fJ7I@X>0<8_PV^;BgnBY*fx_<y_KL`*;jTbFLVd#ImW5gvw
zE~oQLr`W6+*X({LYLIw=@O1eD6@*z-c6u2fV`pkHG&v=ts`Pk#F-83~Y9O|AfEj%;
zO0T80mG?#Qy!#AUj6~tN%-A9VmDxxK2n)<<-h3Egy=nev?)*tUBrw48kb^@?UnQ!R
zSiJc>x6Q~qOa>Anw0eXQS&koilGeb-?Y0|O7V_9<5`Hl6HdYd!4hlx)E^l>RENHIL
zmZzsX`|D8df0<+V(f2vQ@`zwIc$yfB#M0jP)^U1iwv46K^7>7yfw8PW>5sO|#FeS1
zBPhx!36nP-M_yLFU->_OHaWjL8sk83Y&hkeX)^y+`-Y{HYVQ{H3ycUmY8o#;WAu~G
zVzYo_eQAfqx0Fa`Gu(Sw=M}F@M*N=ImKe*d7)AA3T8xR<NJ=fd#E&6XlL@4}1UO6m
z!!~zyqaxRn5a|%GC2Oi1bc0Kaj(3sxh9`%A<0T5!n4R~HT>hSjqiW^&+aC|Rtdo8@
z*lK|ih#21yQHYzSCLpBwi(*V!%!F)Jp^lKh+VKEic>UoaIDf%2Ar3*uU@zz}+x9CY
zIZ>=`DlP(C<Ix%kN+?I0HWFI6ikg9nrs}3sYvka8tD>{jGzC^9F!>p6`6e>Bvb-RC
z6rYV}Q=<kCYo)A{@3EFmOsSoU8!MInn>C*6(%i#Pr`e@ueo#xfZm*jtPWzGQ0WSRQ
zuxf_)HN6T1ebqos9t}75->Y-4BVt?sy+6B6gl~c8>c!!1^61b63&h!W$pC@adO4JX
zirvA=MGq3hb%po3V2fU@y6jHNrHg=<#kerCh>hXpP48)XYKHDI{Wm5p#niVJ*Mxhc
zs?Yp4Cw7Z%8O7?dytTC})YLBX<uYmYQ2BQ+C4n#9j$x&WXH79n!`YibLmX3m@;^KC
zI~~ZARr=UbR8%(0Dh7#OXFVy|ZT!XkUzIHr$M`Al+Ty%iN-WLutD8G({2h6+onRcB
zx3e#fq6$p19B)u1&`3!T;7qK31}>Y;Xqu<tjLx-sQBJ<?xr(PPP@~kFd?~bMOP{Nu
z$Ivn0)z|$qIzi&)h^Y<Fh9e?if=;)^V8JFt+c-!eRf0JA?Pp`Yzaw3?^%IgKK|#Tc
zmRfGgT1o?jE6hs_Q7Y69io``x=}g+A0jry;v6EYqS9Kw+Q9G?6{*L>bpSX*~q4p3Z
zR#Js3U=6Av-+owv{;&*Za=x_~(3pYMt^Q@tKs(7o8e-vAINs5D&3Z;17p28{2@!=3
zcj*1oFSLma+ymh>RYf`@aDWmsmiCd3pC@pHBZl_^_1}S`(9>M-8*effmyY$L*8GQ&
zH0BnX<sA4;=@G^RUylTcfQTA<y4zkeWZy7;{%wa7t5-F0{TxGOJ$8$lFRria9jo~J
z6?&QLq^qYR=S=CHM(3N<_Kh~{-MyWDhl{&$irw-sCO?#?hih9N;d4J<#}*!wd~HAA
zOPl^~GZJ&dGQfE%oi=)=&-BZs_Hi8H#KXM<X%l6aQ}!BHotr1xK=60lt7SCKsD$(8
zFpT&{#p5AZo@w*?DO>dPtnqgI^&8h74IEt(stZoh1E2@6(Yk#*(b;q={e0TeVNH&%
z9Pltx;9%HfSut=1+uJSNe!Yj!Z67635F|5heq=Gt#sArx%>Vj&?dGKO#ahDOG7w%U
z?RGnA_H}=#ow3o+`!DCR_lrOL-eW~ayyo-lN&#%jeTwFa?HP<=QZFHGbdmajvSseN
zqiG6*2m*cjTc;y|qs4itpilHa&QQk?-PMI>A@N1KomECc<9rix_s~vm&>vyzjB+_v
zJ~QFjSlU02sVTMULBHzw?hw?}FU0sPdP%d~i+fp^spMTeT|_IG(a`oVm9I`JyLk9}
ziK^smTv22c{sx5%+Cb%VyHdMTeVt3hrY4Ob(l7-QlF!%Y!JtfMLy)*4Rrs;J{q=nR
z?Q%SqSKk7n)M%~s<)uUC#Sdi7Di#Jh>F<gIu~wP+%5-?qgkc-5d|AuT@$EB?`_ldb
z_WZ}W$~YqO2JtSlo10m4vjjcBkf{3i=GtRhQ+xfh3I5KLM#hAhZb|P(dMpf71xa9d
zuF%5D4*I})VO5Q@w2EG;+w(6MyooVkLHT>#$4vjHVUJdEc-UrU6kq%E_)392B4#7s
zs|>x=A!16J-_$#eE6yi&#js`ppKk|qvvS+rLOF#^uRj`GEP90^{|s;|YCR`2c<>LN
z^b?Kb<}P>`XTFPG$_}lgHTwDPeaBPA1P=tPDYK5Q<Eq<FMy)7Q^`Bw8`4$HJs_T3>
zwWBc;azV&~<M9Rtb=FoY{THo|y%X(|^<GL2REfO&44n6En-9@@`kw9?z3H}8)~9Gf
zkLJ@2;QXlWDf0c&D@q-DqNLC1@JT_jA3Cn#@vHCFU}Z4$r1Y*veR6V^-HMOo@i1pp
zNP0^#gV8|is#4q3j1Y0GY72WEhsZDct+}#27QJ+T+2ty*HG=?+$^EP2>JSXt&Vb^8
z98_zEvP7#2r7t=shdOmk{2#lJC_4z-KVT5q&?3@x;m1xKqu_eEs6uI;ifjv$EzhrD
zBbb=N%R!2%R8Bih>QMh>`F7QW1<y@!#>AJML5wf|`zg1ZQ$Vtg!RY!b{Hyone#5h1
zg5#CoJ2hB@@ga9ZkIC%I36{tKlg~+OUpVYHe~#I)+S(G5BGJp`^sx30p(IS3l^bX}
zk4d^jU1To{-8&&d5{S$^-+C}RFc06S*gG@S56^pfxu35PQ`wy)tAc8p4MndD1h(*+
z7de{p@A3>WhI~MHi+BAP9;Z7T>nMT{AR2(N_VWAB`paj7{M&~SMm$!geFQ1sfW0i7
z{%dNnzPU^b)!ZyPRolj%uJs$+yl}Q4+y298>FdnQ(up&{Z$;g7A3SLYWOL2K;~eiw
zJ@4-9C=L;NGhMtql<<%Qs|r&{6b$S)a2%-pFn1=qf5Xw{Jg$gTG4g1>BLqfjJL-8%
zu%7U%T1LrLuH!i!ae>kGF4f^|AC|}N;0SFD#^Q#fwKTnVX({5O^Z}7?H#!13-7ZhS
zcG5n{v87tATK5^<jgb=8cDWs1JN=F)O@@-F@P!<;RRKGBiw-xLmMPu{k#TPB%9bg0
zT1)fj%v?#CMP_qtZH>EOj<LYYB^nvIHb~_wA*-7-e3I&teGa01P!eR9Xhq;l=}iYV
z`<5<S&iFg*1RP47Na9N2fEGsfrIgYyc}8y0Ql(}*1qPH*5swe*hU<AFX<73<<}O{Q
zgJWM!zNrV2_r&?OqrZH*<=uexxX>%M42z0O{hT6sQkc-}&`|zzF5cbwJi>Wdhn3<)
z6erd=z^+)zma^l_7eCEs{)7MbB*{aS@KL1evEkNg`j&MvPWE}-g1lHC!+u1C=VgUi
zYHg*Vo{R)TzD;1>#}Nw~=HF|$Pu<V_P{s=?#N<jaPoxcFxWC@UwXeSH3d3|>O-jn)
zZ%^pITj!1#G`p+abV)CgyqB;N&xID!ks_yzNQlLwEU|_bD(q9%5gWubBnlcBG}*x&
z63b@LLNd|Ju}dF>KoNnQ+LN27iK~C`zc+n0QYqhmU)9PW{tidIex7&*^Wgydv4h_0
zS3wXvJfMYNFRubg{swEnGYZ6#-bB~<R*ICfz31{jYkXGNuzb33DWjRH{`?@`>2=r@
zI5v6lfTWa^28}KK5ox%hCnbL^)K}Pc?H=8bx30wa2w`f8(DQ5vnpv-+_E{sh&>ktK
z;j>CWenIi%%RTh%Em_REX@!mfWr_2TrIyye@SRns?RmDn!$>%B?;8$LbJ`2rABP@T
z*V$f2B_&I1LtwMxyuuMMP{Ku)%<0Tr)u}}ShzF~;N=2Nnkf&y`AJX96Q8oIJtIk@=
zP!y9yO(3T`Sua|-<0OoC6|`m!=BOc{O-(;9{_69K=9FZ!6*^OYRvwr{*YX%2J3Z&?
z!4ARCfP!L@Toql<jjJ_$s!Y2d)sVY@#GoLmGg$b2F;jP1Y5Cx?6Sv;VhyL4^SIR#x
zayRWJ+i5oSZ5lcnobc-qE?U4ySvSfwB@IEgej2?e2Nz@eh|&MF)B`Ap#uVwHY_N62
zq(6jJU&HeUz0A%Zkf8BIM6x5&<=XblTak&wrrENIlX}G;(T=_O!p!ok9k&UX21U`t
z`6xHy=ECsZ$><$yW1EP>REIZ)8JP?Y&Qk+vR?gHleSqhb!@MTl4u>f@PSh_2&bj6J
z_IE@h$7plB+s2}Vc$`Z~K@sVMgXB(_`6hI~1#mFrVI(LE%Es_f`8XG?MOzA}amDSG
z^f@LNJ(@7W3kN7*MP&0^z0Z|T8EW~LRr=cRm*t|?se>qBkwuMEA#jrRkNr5Op8Niu
zCiibSiC3$dyRY>GZPtaR$J(g|aXfx@N4+|sFq5VK0H;{eC$AlVINGPmhWN?qbl%h9
zmNEagK<BM|AaRTh;pgBih;FeKeVV4y%ZiQM_TY_?u!@nQK??2EI6}z)Q|YiEN$U@W
zHeE5z+H^+0kB=rUbO|*UW?<7bySp3}d1NKRj7XqOW+0JZI0k=xDe5%Y4U@V){9EB-
z{gDib&so33%FWfanjldXwD1gOs3?-JO&j~LHP2)Tto)d8W(%5T--zl{E;)}yCoUa!
z3}5qA)1V==t1-j|o$S_X)7QO^qMSSe=!hdbYkn@TPdgfnM;n|p{((}wQMjH)!Poog
z9R1<uctwk?v8T=`PEPp(HFGRffgH!*%0x~*B-<s79Ebc)(6s>xiF8C|1KoY<@~@ut
zo~if8Qonfxa&x?(NYO%;0qX9cp8=8tn1qoO)Y^w9_S=wHn26>>@{x*>%%aALT|At@
z(l^!zj5n5}pY2@AI@t`o+!_-2RcK{Spxi%FVipRwyVV!;>7{f3uvn;WH<?d0TJ2!)
zDErWiqfgl49DYEzq#~aeTP%3_!x?YJw1M5rTko;kZZgw@@neqtj!ODMV?2)6W7|or
zy<5Wdb{VS2N;MDdSei1VL&x-7L+_6@wT&Y;`l=uYeat9*3atbFVvNHIW|DV1Iy=9T
z?y+vrLcV$)OP?-9p{C85d1}L+*rm-yiX6={AG-|KA~_$AUlxfgiA`19XC$!CWkMj5
z7s6^jL=#2z87|LoY)4Hr(`)ezDAwf|9OzS{HhMnClQ81pF_q8S<!<xf%A1Tk<LBWt
zoE)E6+q8N)$tjEnK<V))#%qxvl=tB?1G~@A&v#1Ua>&Gsq?A1`{ekoE-{*Mj&;F=T
zqO)jV;CGQ??V$-&`#ucq&}`DJ5cYIgiv*uXMeCu7@V0JYM&L7y`QZ^_tz+&3bRD3d
z5T#DH2qcnM3@og8oQn@|WD!!1{9G+<MERiFFeCzu4jfI(n$+0j+m|qcU^x*>Ia@0x
zaLRkH`jDg3wB`(KrMYR?#3)1{y+Yjr^ZS*n<!D(@J`~uZp`*76?pM93bw-Vh;k1!m
zN@x2U$sT*384#DBCrrd00r75TZ}CdsN$NW?gi($MiN@>=9R_Cj)<)9>GG!1hg8A`^
z+s>btA45UE{2^-od}C~Xn(8&siV(5I_#V;@hag5!lRn9bGi@K7id&|4h8f=&W4WtP
zeHDCbjypj43@xr;6-&Wx?QBhjOD-cpo)VF0&9gH3&2%&7k{1F-lH3?eRojWWc$&>w
zLLSLjlpN!&6&|w;d;$B=(!%^MIUEMQzG3ow_Hovf>G%E}{l$ghgEX#PO2i<E&+JrG
z0;KOzuo6RX;|Cu3`6$_MJXdCax+I6r%^5k!O`sNZ%iviiUdL=GPrCX1u?XMGZR9Fz
z8AXOd>{gxPL1r?i!Q*_zKnygx`gu$;vhtxe<`e9|U32RgVn9T6^Yw4X%fOPvC5~`5
z7!KDePI)E&eUWJl{E)?8o)c(fk?V5G_O^zXB3cfXnF;ZV))5Qj9BxlrD(g*&e^q+u
zHnI?G1E{1Yzd6NsCGs#25`34rNjHasQ^nIhC?&f3;PL)yr`)Zj_SberO!|dZ(_e1X
zdN-7K%<5dfR%!>H`PILlXw6Qnt}R%GPe!e*2&}E&WE$pVop<fcd2FNOHNmfJr{{gU
z;vl4?iOP(+Z6vinz1TC2U3kbSxgOp<DF_ss{}LS&Y4Zz$u95TB(N|8((8w9`@7ZMc
zoV3sMR3hRjcD{+%P|3hy)hT*zDij{I)aj@}0j=;T)dw9Tio&|mT&+RGf{N14VQBFL
zL3p?eR#)B}4wEmMP_;HW?ale^@Sp1gisi<6+7xFu*BK|9e)<|t1U+p;9g!pFoA)$M
zP|RLj<j!Y}xRdR-?;rV4ezx#WD=jN5Iw08?)pY+-ycisj-SHF$t<iftVQ<?$1CM-g
z`RF=neH3h7&^rd*!Bq9^34q`@!gl15J~3o<6f1-f5f}<AwbA4NNfb&;e>%#<ptm##
zqef66irP|NRAG9~lTS`=a9XnMNz|P;3Gv(3^m{iD;c!G`>8I;q>OM84QwRTo$xKQi
zu#bd=F7L-h7Goog&noGL>5Acp+-rhX`z6D;t9XKoAlI$R?rSjO+6P5FIJRGU2Ps7%
zq;C8RRT%;CRdB9m4u?Elgg7c)cQmBd`C^y}+gfUuX^WgPo_k_rSR+`o2ZC(LIzFHK
z>H0M?1Oo++f!!aQGU1yPRoHUPn7GnGgf;?Z^gxD;>~V{1P<IQ7HTx{amllk&cUb9i
zimJ+^g7)0~%j~her<ZY~L$r>w#zps#kVYkaj6)LSloX-WbfmRQxv095fvNd<nDM*(
z;Q^!e4Oa6*@2smi3F4*3`qsq|==+-fd@RD55PNzHQButJ*h|KMEG;TKHcW3@5=`-y
zl;{TD<a?!egLjAJ3^7_?vjifAg(9Qc@ECN0CEW#Z%==2U-{v{W6j3p0ounpaIY2Sh
zPyE)PPc<MTbW=r&6b5e;&*h>UDPr+sGZf6=wsP)^{lMm&5nbxny_8WkK0R;bSfiO3
z2@?|`;VE2QLdFy)ao&=_l#Li3cBZHN1&L<ItX8od_B1w<;q+8&M&7~Y)5{N=3Hx-y
z)WyFivA;((g>tBukmtUoCfG)6E)=(xN8BPrlX#25>CaJ^;@Hv3Xy3n8DxO6Kjd}gx
z9h?v_u`zOC%3owc<#9s~g$?r-7g`Q7=!=t<Qh+24zE}WHg>c-jiokB|#q4sKe!1Sa
zMd42`5=P2Uh%qB#nNQXa^UlQDsfTZ(ST6g0SVrp2&DALiYRkVQ&Hi!J_V5=JpDcGA
z<o_+IxXyIi=JSVwqBoeg=6BplfUCE>Ae+kY>sQW9Lr4e;f~fwjRG=;Z$PFh|cVc?i
zl4Y1Q!dz=>A~s9NQdTa^L3tw+_Ip~s0eEq8vp`ARAz3-bA{<U_d6E&MCubfNLd#iO
z4rC^b2-sNsYOKyblbsmAK97huTH4?<6y99KYp;BM04apX#&_O9hA;APxNoba`$r)2
zy5zHWlM3=eq%(ad&_)krGSntAvC)(f>G7z#A<87mk|`4`c)u^kRgh8}pdnC*lo(k;
zi<=K_D_RxQAm$*&K!|>SBibh>5*Izhfi^W|VBIvqwi#@doB%}~OO+v`XOBz~@hME>
zQGDa>-gi~!6MeeQBe8<tp3DzSrH@dq`}gN~+C?~Hiek{rd~%vWGGm$M1!}Q+TeV`=
zfg<$8Hl&|Pv`u{i2g1jhD{;*E6Yi-HC>8%ID+L4;yC5PA3B>ZuO^4>y=lL*HIMN2g
zXeTPdWxq751^kSQopj(8m#&6kB(lk%?wXShA41|9aq4u7XG(k}6+Os}rSkkC(Ag)Z
zpe{#AA+62T8~`gR=1@=7x`c9VTCCAywHhyjMj6m8FQ#C?7(gLCne5Qk5JF}Xz`aY5
zD}u>;jmfnB&HsG%0e?<Y|6P@{pP##EDktM|7N=j@PLWv~ES}k69)i)A^e!lyqj01k
z9V4;lwC;lvjcnxs8V0Y1R!9U$dQUhzA=Lc3>6jUP2#%ToK{Z;!*dQcH4TtpMHmVQY
z!g4x-kR0jG0s~x*KP>g#VCmN-?FUfQ^|}vHMFNooxGpKkq>Mzw=%MQJ#oa{?U<zUq
z-t&Hf{YMW2MryL|6OH+=-z_OXyBM*}(+{Ifk1r<O{UH9Gxib0pr7@_J6{WtA3<Vba
ztfntj6|hgH36fvRL>Lx8qPm77fwr+luPD7JEX_k1lz}au;o(v&pAydLRuO^3d)Z|n
zw(r4q0Oo7Ty{ZAyM9i4l*7!YrL{S<2+v<s&ye5R<_TS#p62EiA>K5lCc;|{is9C<F
zM2{V4LgP#UgF}s@k<i2<(_)7#YtZheheFbbW#XF$W1`Q}DZkhKt)JICP8LGCi@+qX
zI5xlj28x-wu2v*SLO-Dv2SIip_GH^*J?P!xN3(7wGdL5*;)v37O)@g1AQ>c~j;=~+
z1QCglrX(jSz5i$dGP(mOi5k(!NcKsPzW&S`JWjMKVIUxgI}up;sJUNJHkiSY?98Wk
z<IZ5TxDs>)MH`qAAx)p6LjU&&nbevzT`g0vl<V&^2MY$5<`@rVP7+j&>8G)UMQNR_
z@KiN?C^>UA;Y^j&+T=eH2<5I`ZzOcPj5Q?=3g8leI9QN_vA?{1dgyRpgZj%*k!}FB
z<#40lls;P@Q%S^gRfLroA`>h&CUUMh5o9zCBH|z-bkk52Wi(c0{<L{P<w~Zn!}={I
z)Dx!vyyISg{~zwGle9$EvgVsMYJ5{~4q78##7|*=*rD*~l742VcZDi{is7yn^6U%e
z>6Q7ACQgoMntk@o#5!~!T$biKxA`D1xe!HKL#zm!>s&Act2p@(`9~lgOBHpa$*2cg
z%HF0+Tl6k^$CLZErJiiBYN~M><Ez|!R{ozq$PT)bcU-^T;&OhJ!#Azs<yZD+tM-GJ
z$=y_;`=-5OEhN@~#c&f>gQYB+Ahx#WK=T`XNq_h=mknVTA>y<SqHGIBNzm@}AQA-$
zrEFN-&;Ox;O~TT#N;Htjr)rbey53+`UnQtI94AUkJX55$2pdC*i#qmTGA7irB4QT>
zywiYZf$2vPmcU5Fo@1iCbX9;Xg4V1V<iVR-xP#IS&9r35XwiSCJ1Xnft40qoAVJLL
zG%~hOji~fQ7@<U*^|#2(l?fY*T=F}0m=QwBRf3$n@$>t)vxM*F3%C7cl!jM6vm~)0
z%4?H|wPL6ahq7^gc2Pn?#Nf2b(VCE$hO>~>HHyWq))tYKr<W=kArz?0!Gdtito~A+
zMGRb@5aDIsGmBIwATXmt$yI0Tc8Lu~z0F^!!Jtfn#hX~Gqe2iBlW$KRd$K7QU+YS$
zB4J>?@ijeOVMoHm48pw3c_x{#x9r$^LpFTDr;3Ehd5s$Wa?&B}|GdR$JyKVkJ~+(-
z-Rb8ijP2*S*L(o0*pnN!hX4NkM>oIY#3hu)^Cj<R55ekV@2|S-RSdL+a2`+?g(Q<-
z7AYNfKMgTiR3=f6c9^V0zMYhbMEOfZffdMPHCht;K4w;s^X)YXwq!B=3oQ*?>ZoO4
z%9Cb5No`l=k<HxuCN&|)OxfX5Q}<P?29hwz^l!V<3{gp>dDH#re|%$Txl<JR>2}_-
z7K`R7GZ9C_if?==E?3(R${_F8HTvWeM+tSMPF)!Ai@<EMG;!)o!U3K(PWD*_`&Sp=
ziEp7vPy}o=Ay+M_u>RjR#Z&qOGaduSfmm2A!?lZOc&IG**xBl!^EM3&JVW<Lopl0X
zPb!73T4F_u``NS)aPYxZb}=7}r|Tirx-D-Cyi(>3r>~c?HqsiDZ`2`=<VW6hmRzKH
z+&bL3toKgPkroRw6ic8KYZ>(Co%P#U8+qQecqo<-VGN1V@qWxh!PADJ)2>OUU#w-S
zWv>2LO2R|Vm3h3w5(!5b#TW_@DX)jsr2LwdELJAAbS^8hzfdhk1BEElEn#lWCWG@1
z719QUZ#0!OU>hcDA4yuG9yjB%m3F2{g-o8KRc^6Wb!t9P>e;>xc1}WttAwgK(p*|P
zA$vZ05?)^Q!YSLMA6de9g8EuK6@{_pNCR;>FM(Z+esB@5T1v+VQ$GNahd^Hb3{fPy
zArVm`>RMN6IOKM<4+;-6LZ_323MRzpn3SK=$f^u8L#AtrVkabBtcx<}-MNYM>NW~{
zbz(;X3KerT1U3DbvkIH1HNF^znls{eL&j9N_~j-w*>l?#TZAwit}#i(Sc5AL6*eq~
zBLZX5nx-8tiNob0GCE}QU>StOAXTU|BfHq1Y~|lr3CksX{19x(<aqL`Uox+_VWP6;
z9LaV4UC&mEH`r;S9)^%;(nR~FHO?)k%!Eni`Dr6F=#Kky^7j3iV3can_%#u}gvhiv
zDpE&CV<69=zI%StD{X6mJ@p{c-ue~=tB;^PhVqM1t<*NDqUE1Do#%QLdmopnsg>kC
zCG)f)bs_e;>=hk!7)e5xj#%FC^p<ZbzZ9Do`=`I-in!W_g=eVJLkDDA$(<)bLWLUV
zSLTlsTPX72C5DkxrczO`D|r1W3hdW2Quxhvm2Ila5`y^CMhsmauGGeWs|I-^vbsw)
zeXlN)S3weibQ%MJ%8TJ`iHvg8s-l)tLhqK_yhiByiehe!BqD`25)BD8^-pbb(ISQU
z&^Tg&c0M}F?x4^a6LKlH9l{^*U&_h{Gg5Y#nMvIF#ym~0=3v>o#5l<7^Lxq#wY%Tq
zY14dpe~{NFAdr2f2@&z@to8A|zH4C9eaUmm-?{8}d(ZgQ$*Ld-jwlui1Vz~2I_>@v
z_y}~n917S0p9qB3@6=n~zdwbhiYSU}O4J>t&BjIv`#!+io{3AI6d_Fm^~+jIbf5er
zAW;TZ4ylA|Iw6^Teme%Y8HnLh(%qsFi(p}o-=#WI5PNvZw5BKY(l&N&a-mx32OdDB
zLQ%>Dr_lX@LV*ZC41t7}wwp~CVLOI^P$5S`Acx5j(k_+IdccOp5aH2LuyFPkgX}hi
z?Fqn+fE=$krlo+BfCOK~o%i;rbhncR1Ck(e?^q~x=rn@46-p%0C^n{r$uedq+vwX&
zKqW+wYQ%tX4;zBAwP|_dlo-1eBg?yDY9!HE=N_Xw941)q1KVIUyQR^k>@S2ZJ7NfP
zE@a>32>D^DL=vDdX;G^Z@wAgIGnTN}D2%K|BuGrCH3)f8k(ro{=!;(wiEx9*<PX17
zzQg>DFGEn%HN~wXZ5(YR%OSmP45e*+xUc|Xvcwu;4M&O5LUC)p7`Fw4>O=>^#x!AA
zOR5x8XH8L@U(xDjy;m5iPwvGKp>)K>v>vJLbRW@SDZOe&9Z_A%3qqJor_aa<g2iN)
zp&ZJN;w;{w4G1v`$5B-luqRAJH2uDvsr5biPYP0MMV>SUHp)=gAInh=7)&*WCtjx>
zxZkzOb2FMO47qBF8ri4p+HX38BIKu4k;1xFt4*=wof(7`n7R8mQ%&)L%4c#YWZ!M4
zTglKU&RJwi8=KP=%a5SInqVS*jZjyVMb<`CY77|>*F`3H9m2uDfljAnyZH{KVm(+=
z!xDmmHFUyx9lK`3K_x@u_+XuYrGr49aY0=4*t>}Gq#)x(y_L4aU`oUil0nw~UhY#5
zPJ<$nH4$U874EyJ$T)RA>CI$Gr(ILm45uOm_I`4X5MiDjiru-FU4P#lG8Vdz9y9CP
zjsy_ee+W!)b8+?sSJ|O7`#)5*L(w8^3f;gvPtAW&YMpw?K2s2Y1mV0!M3~h)yt>8x
zn-VzEA`1K{RIz$gY<bU87@U}~y*mVp4)shhZykx)s36kuD4i0;GE!CXno%rwM!4W}
zy?NFB*{;Uz-wssJmNQ_P;0<$Wc!VTFNn@C{1Qbu}GkytTQ$Ud%{2O8=iiLtm7h+P4
zvw}v&sxN7R!>J*EU@;aY;h#*5B|ls{cq}a+#2l&&qx!As7n(SGXydWnYgKr`*fCDL
z?5e7~nxuSc57NMbtmij`FM*LeY_KBVzsZzPc58A;u|QuCsEYi>G32Z8t<5N&Gk_t3
zM3P!stHsEL9Z^;$&w*2fLoVOg*r3hzGzJc6vfrawTK2)@Ea|Dondoj>=aZSCAsHr^
zpIiY(D5oB7wzd#<^%N05QE>y)LojrQrZ6iIbyaAwP~@r>LP-r=iEUIL=AH0@GDQDO
zexBumW#YAe!(3dFz(TRbBNaJIRMT}AySUwGMRv}`R6P|>OmDqiiiBWv&8z(rW0920
zb?c|yJ}-)CehJbDUST`*=@BFh5LTgSvi{1KI^u(YKq&AbWf8`fT(msuQPUIE&BJuu
z{_~@3axwJ5-N*CYu=C_%`~WxQ0<!d%H}m>o4J@vxf1ZA&Yef|bdV10F#VZOV&4wr|
z2bXE))ndT&Rlj^>suE|4;%<P}6oYMj;TUE8Ko^!1Uh~4-<-#OBEZv4`oRC-o0V^s^
zkY1F1O@Ki_dSoZiRG3#SCcWrazb3qrmZPOA=pkt!Dr2bhLHs$jU~1{GS&kVNYV-WA
zL0I@}lA)@gLul_t=1c>1ejN1~W_AE24;EA^XU(g)JR&sQYFm;06g_qc;(GxZ?iUG^
z@(|>FdEF!Go)H3>Mfz2%R0IJ*WTsD)J83eG#VMFUxeyrfSk>yoP|;YHf^iVW6iH=j
zBsDs5ZnngFkjS#qJ)g0|!+Ue*;v&ot=?7SE5v+YoZ!^R&VUkqqOKZ4^<5)BF{<!eF
zB3s*%*&YxZD5q$t{&}bHw`($RcYMQ<D>rTD2jj$dx|@SH#L)xr&y&i1*=mV)iN-z3
z;2bULY@a`M;dgF}YqF9NT@qt6<E2Q4!OlznUOcisw{;6|vS_e3)eL*TSeaV6A<i7^
zB3oxY;*8T3@{)g6*()gNbpos5mAGi8d}_3<d}-7vyy+Bmui^F1jMr&8>Q}`>e~0Dh
z=naovOZZmFPiqw!F7s=&&^q67{jbO(EA?nUQHZU`IG8mw_pjkibXX{^aQq8CNRb<L
zkqLRa=GC$OaUHCMIt7!a;W(4&ZcS!+7UcEyl}4`#9Ub}A%T_ZQ+oSYDl$m_8t{3(Q
zlyQ2ROhkW=PWG9Pu<ANbOY%a#Eb(eU9(&(PK?f#EMz1U`>aLMm+8x_jGn7}1(X)PC
zqQHj`MKncYb73gZpk?gM)Etzs4yrLnKuJJ36*#nzVES^hP*Oj|_Xw3LojXqSJnPC-
zZMnHh`|L);e}pm+vtGiJTQG(GRhzv)A~XKGEDzpl7w7q0irZ&hBMRfN#@lD7TyHA`
z(Nrj9-I|=$YP-4sa%sir1z8~NOm01cxsR6e{Z{hXx8D{+ttXVtq)bp0Pdn0;vupCE
z<0E_~{#+{aHx`vtN-=UAU#qRwSu@z^y%hc!tI!mfcoO;ytZ0JUA7)ZAPL>ld+BUQi
zc}zO`e?GdnZ%fOWo>JgOMLZEtG+Xd7$W}OVEzJ<GREuZO5MwdrEG~vklS~Xn!S__-
z)H!~vUin##mWnI%n=Vp@9u9FA^NpD%qCbXQGZIoZzM&BgetK|kw}-pbn`NGfzdGKe
zbqKbLpU~j19{PO8GisaInzmNAGe<^S++%IdeG)0>rZ)T!QB(F{5}3wsvww2Ut;w*Y
zc@G6i%h_07+V}nqo+u4b9~Prxxo{EKD|b`}W8vZ(B#*8L=%Q5pgvTYHC~SGt#$CQP
zYkRJoS9bj2Z!J);OWw&e|4@MKK`&A*59>rrUgK~i`(RB)5kxLKmay&*IdXdYZ{Y5q
z-!Pa__~)jkqbb`JB|&-axw7LNn^6C6yb;V8oFE(C^?-M@wzi&vzXRtHB``h9q6IfG
z0R+gA67tig=JdtW2ibxyO^Zgrf)EJo*MTA&xM%|0W0xwB4wFlacE^nJ`&N9BLddmu
zf6(B}hzX_Od+Bzk9tcLe{vLT#t8k_miPVfr*g5TMnSs$cJZ)@b?MQL2AVSJdM_qy+
zA#Gwl7+%Oh@tyx<-87l+J`OSqgng++?(~N0^u@y04`cO(d-HxP2UP?Tql;JW%)_t?
zd*ahPWLMvSP|UJ$)G$XnjXbu;N;;GNCj<7A2PP=dyJtGqP_!1`E!?|(3rI5c_b(HM
zJzSV5`QIQ^#Srw>;{ES&-Di}_KZVxZK>0kotEctW8|m&Ji#uW7oD96-$NseCe{YxF
z&h+U4Cwe0m`K+o_bpH1hl=Jt5H&UNfp~QNKqtkUPf$Bhs5a!C`-Sb9A=MZx_1l>Ma
zgk(@)ey#@oy7MaS%X?DPr;F1nL%)qN!^iGZ>R`g~Tf66jb^q-K)@Qx3w&El?5ec@j
z?copz^j_s^ta}L2$iCS=qhICNDmFR_iD*{>Ui`DDSQ3=0P>jukGNT_du(oa&P#^>Z
zAsIJJh5~~BNSDP$hFGOZN3<iRD8mP+DJM&JORGxn5*QI>?|vTP8{buDEihI^NF;!x
z!&BqK8TEveqtE2sdR5tj&uf1NYqz+3ez|_?y-haH{N05ibi?TC9yM0))@}9rV0PzM
z2Y18I)yHjn`Y&59u2*4<NP=IRoG0%`Mqt!D{Lj{C-7C_yst;p~-%ko+JRN^MG+bCw
zO=l)i)sD(bHo^*mQ5&|BWlN7(?SJ@q>~L&}9%8a#=|e|gsG9X!$ZEL2N$_yL)jF=H
z`w;pj?NI;@)1CC>rR=NiT`S1Hc2pGNM13f{6>7F|<gt}LIYLjv^?cb)VoaOCh3T0T
zwzpiVZ{(ccvpqqp<SleGFtc~rJ6nIY9uf9&cauz*ub+|2iWhk536y5a{kjv;pR!EP
zJYpPXq-qS6hEXuy@#oY~@NO!#klL%#vF|E;)P}YEWo9<oq@>x&W7)xw&-G-pppc~3
zKHTyIZ7$Vp)oW{t6CX$EvL-SFPo7DF2`@UU2HRv>RGx|eX>_I5<gp(N-`{t<mg{$9
z2UxOs?jJYaKAlc$`Q5g9X=qGy^iZBPg^tmq(qiFY#GX{9{|qQ@Z2Rz$z0P!Qb=A@?
zI(<s)y3C{of^U4d6t!l{jSi0HJW!ThhJ+9U#pWXQM_$Ph0;0T8Sf+X5{9NB@KAkFC
z>2r|ElrsO@q;fSStMs9*VPibR^ACYAIZ>>QYY8N87Adz9_*Ol3Ir?A1M3dTtOkkK4
z`EbpJjuA!B9mp|IQQ)gyhbKBO?`{0<e!h{xDDw2~ZRPemUW0!<=d>7Qr0t$wDkHGK
z8eb9;#JOZK)c5Rt*FMoXb6dCGV?+MqLdZv_J9ggv`DSuq*W=ey?NpV|xS+qmCUFU2
z+h_lIR<fu@_oT?|5th-99!`%FuOo)*J@-1HhIghnp?itb-<i}x-^%Ln;k6SS(+o-k
zIH#njKdl-L1wkbSg+v(mT8%Wm3|SwKwC<{&2HhXOI=!A<Rwb!9%lHkwx@4VsF7daz
z8mDE?TI}U^N_h%AY@p}1*YDxK+Fd`c?y0BwpQCZ&P*E%%5U(_le0<(9d>HL2bGRn4
z<ap~eurj>Ky1tPsj2}>2e&-g<!OFchKOpfv<4`+UZ1iHazVhtt{9uqkF%BYTXMWk`
z%Q_6`KZW*ONlW5H%Jb3_-v>l4i=H7!!@^Hxy@5nirfUA-{R+!OnWzfY1O}bK5Slqb
z<Z&Rl{tev#6FuLvZg|#~{er8nwrg$o;DvTzBnOFtj{f=wUbyM-N14$5kWw6LTYLL6
z@UZLz=DWaz0Qpa};nOCV(gv2Ky1<ki=#X!2Z^3Lga5`P=7~R^n5Pn<)u2*1mQtYEe
zR#M>0i=&GiI<1|AkI-%Qk0%|u8As;=@1GK-y)fMZ3|f=v(2!Ip0lds|X>*KN-H%u|
zWEuW^I)eSaV#aOdDG5cT<6Off-{Z2aZQKdCR|A&q#XMMFw)_Md5MTGqeXewUF3bfQ
zNK<7$^Jqs#QLDN!l1GKkHeKr`r~SE4JNv__&5Pc+nq9x5QjL=wx+%uh_-C(E%8y0J
zog_6ou(Js-UJyD5t%;m0cKMv_ws)iJnA_wRA&&>FyDcn-L=YTPS72uw+Pf6bW9Fwr
znNijO=J~=FhI=~*;TIOos;)PG&+CNA=YwBQN3=cIGxfmM1vy~i$bPuSAs*v^2!kOL
z?wwa<ay;w7r0e&|sLB6%1YP)cKYmW%M@{2+3f=#KY|Zn*B8M4%WN(z#==4S_I=XB(
z*C}rKSR+ZdG2XY*`lxX$ZZ=wwN)Vy6jc2O4#Pu%hd;HhCD>Y}7tYO4Oo9!rV1Y?>n
zM&HF}X&KmZ_`X1q!^)AGVf$|;1dSW0>+5-$UbY^~{k;&~CqyB!Ju&?G?OQFCjlbHu
z66vbb31KK%M|EXUm-ip(^+ML!*Q?vsA0s}XMkMDc8jqAMF|%8mJ{PWf?{iq&BBwRs
z#M?LboHOBIIlP~lhY<984!i9>v84__1?}Oj|H|*X?Z2_!l_%d>#)K?XCR@Y3AH&3S
z40^d{OIAr3Ck74Gb7*1dzvUa*9;^iMJ7^y|4Ut^0qIY`!$!J8^N-<|oz?k7D=J#+K
zI@V(SK{w7YbW40pJbo^{YSwUj=YQVU8g1iT*;?ID!n9{O@VoBsn}=XE&-RSe&-T8(
zHhOyYt;QS8Bf~)(3zvW2yk0g#Ta2?BDCZ2wqlq??4pcN&jGps)kRCnE);*HFs69Uo
zw^@Z{%qu9f@q#Iv59L4+pIPT@=DWJGTcJ0C&x#j@d31Uz<nwK_obz%|okUKwPV?6-
ze5NN();{%wX|BMJZ!=%Xa1<Nl1T`58tlalXabf7GsU=9DO7J<3lZ|T2luMrrSr~`!
zhX+^}>s!CKd*_u?p(9n<Ep@tI2kjs;ezYH8Z;rIKc&d*?7In0A@IBf@Ub)<JuAD~+
zg_Sw|QCG0qzH$7>M!<Z(Npcc;8UEg$(DO)V{9@bpslPnMBI7HPo8N(bve$<mIW#Fj
zE!L6ds{%_crPpcym?NP-trrC=Bdn0qo{i?J0jeCd{3}aK&shTy!|gA!>leixj4Z@=
zd*RPn4PN*xuB?VQJ~q+3Y)9WJTs1WBJWk?H92{rVMd=H_Ro}+5!Wn#)(@fc2_p}@T
zg)ha|3g>s1r;3+8v~%IGj=NAzy);GR(-8ul7sR)ecALZMjmLU*y25#O$4Nfid`|Si
zs-|*EVoW;Jr1CNq*YAqYte@ZST*P#mWu18*N8iKk#c%~<(Wxs+a)2@Tlmv=Oh3kG#
zre;@DR)#)0I=bD!;jlL~H5H6>*XHMc0w&TXaq|i9rj|2PyZV1GE_#81O@Cla0UQ@j
zkAX2Mm=FE8p@Es=9dUHQqC!~^3h~;y{(J%~)AC;hc!x$t7+F~SI&D62My(4zj7ROD
zv*_KdkE9A#`z%5mK3vo(DEOA9nyrs=Y>0?%b><rFG4>`5_*lbcnw}o`mY{O$BT}4o
z`uCW`LqM)PBix*PFykH1%ED`=kL@dlb((`>N{`O*K5t*&G{PqJ!V+%xr`isPgK(>d
zeLW{P(2xI++mo6TA>}k@oJXsoKCSX}3GMmmZLSxhgM$=TSX9r@QP7^2$)fQLSs%Jz
zups=|cJ$rpz~nqSC3#qJwBwp=ef>t&D{idAr$@5$QDmc@M9G6|A-^lB(FJ21i1hRQ
z#XUoh7qiBhQNsqK1Fz_bwgsEK&c@Z_>rult^Ss8tgBu^2*nZD1Pu-mmGFH*Cl;zau
ztTg@Qn+sB^L^Ia6OU$aH;tjrCcf}axgP8KJTsaq)n5AAEvA!hpJFkY?ESa4<b~r&|
zDj&LS+Me?zQIz>R{$0pM)X6O2RNO-I<(C=qEjOCO!fgJ|WnU;Z+*86ae9IiuXzOO%
zA+;MUP<gTH=(yeVP)?8fxYgu&4K3noeKl-sdhb6=NNtmv8fa=W?-)1F>#t}qZ`AbN
zcd%BS4$Iq3@3H4_WeSRwGb45QhUaCo7M5`rf2?Y__2DVg1rOh@>Xdg$sHpu0Z(^w9
zrl2KyqEeTQcH%fu_It8<%cGG84_>FM#tzscY=-M53=RBtGKt<DOvNDNOFymqHCi0&
z?4(s@s;Q;6H#HcoW@a8j&3C-@olKX3DV}j7`@!%1$?2ol_|BfX;b+ZXa#^~%t>L9<
zlX`^_GDL()6%%~JwHH5jlUY92?hVnFd+O`VIcjN5bc>tGrSi0`O`ELpAun+629I0K
zLAegw(3qQl;<;L<k~z>`97t`R+$|F0)=0@Z=C$xT9?^S)&g~)Y=<U6J5?ryzR93+-
z*2K<FA%OtXD=4_yF4$h9&BV^l!Q;Mf+0;DyQI9$55;KNpJa><d5s`$4K+toh**((Y
zyueXT@ASDd`p5>MVtw)U?6go&up?!JWN$)}DtEy^6G>&-yPKDs$n)Um#(F0)uTTB!
zI@!(X3hK02zYd;LQG4Oud?&I%ozUGA+-tu_x$=UB7IJ$2JoiBOb7knIgi6OD!A0&*
zebG>&%+Y=!h`yus=GM~_c*@iFcVvD)UnBP-7rf^<#+{%8%wTVh{@&jjq6$BMHl5E;
zP}9TvUo@R{P?TL8hnKFUTe=$-P`bN8LO{Ajy1PTVLApag>28qj?h;VCyT6Bb=9}R!
zYIb(^Jm;MI{@vFlZkp4|x4gdey5rQ%-GH#MxbC%KXWZVd))d@;;8Q-%ap8!d?`yKx
z@fv+(ee!s-mPa)<9>KAoZSh0qD3!GPJp~CZGjDb3kjpwCDTsY+EVle?Y1O2~+yIc6
z0a^*j{a=~|8Zi&#eu8tnq0wgL27nJ-IpsX+0<*Sk0La<z_115r)<RiNZw+{2Gy@zW
z5HO+(fQJ2mU%2y;EK+CYI)Likc3|3O#0Fd(qutw;wxbNRjEW4v<kW!ohqbB#F1m=-
zBWrT^m18FW403}&)^ejfaqD&q&ayt7LB~O^3KNJIMMLWs>rS*~HHW&4{~l>i5Zeit
z1g%6Klk@t{?=@D-?~7{RSnLIs79QFMav{VffA6+BPD?>}5P8oeMgs|S4Jcq-NWyq<
ze0~E8IY^vm5vmD_ZMhW#x%)3Ht=zGNkq2C1*GtgeDV(l9zKvj5zX&X>!wWJ`rai&k
zaP(dO#@6BM_x9VE+H%H)ft>o0{auCA4~blsg^6NVa?nVaD(5DxxqIy?8_#-}*OU=k
z&t?8U3@WuY#jXxC*m7~87-;i~F%-n;_XUB>EC)@R>$3i#Pp98cpYhBzUe3f|SSSyn
zOQdgl^ZH(=^$VU0mqMqlwmm=IVdVXAAB0PrfKR1Q(Ahx=PNe{ICg~(;3JDQL27c7l
z=e8VAmPWIVbREcuIkM!bs&~Dr3*4&tlh~BwU^ae!zKduahsoK#fYDF*lVrf(D82ap
zHpPXLl47E+x^Lh!zZ)6f9~r*INh6*9y|T9K)S1O;!5RToo$Laq#FdnfYItc}T%D0P
z9skb)OwZ0|#WC6tAO(C`<((%b(gsTH_^@5z$^r1hQMUml?(RK|P-L25gEq!_x-Vx+
zoKOyL4F~sL?G2eA1MabqbAJ5*p=Kf(l4oi+xN~AO(!HB}D>JUKf!Sgu{v^Appt-QE
ziW}<+S^TO1j&RkH<5Tu&C2RPdAT4G_F(QI>=ZD!yY_a$C*Pzv@MBT=s^ZoB=LhUD;
zDeWx90WlR4?^&<&)0&OgV0wwqKJP}e`Wum09|*W8B@z}JX{9{7IUGPX&mYT+6@{0k
zr`w$H8RmTN>?dAeicAL58=h~?G&cqw-O23Gjg_2#ce}I#83)&VhP`M+MZ<k(JI}wG
znofx{WhXvuC|ze-^Qp@TOWSfli90vA*5!0c)0InsR83n8gIDoB`HRizV!PD>fHw8?
z^jy`j0S7JfZ}Z<G#?nDdC}Xm&CUR)VjqR*85z>9s`ccGt(McTim*~Xw;}2tnt67((
zjNIF^*{PaFWaluUcYc*6(eY!g+}21V=8Q>{m3vji%OFy;w*C!(k3Y508Ju^Oqm9lb
z^S8E+wq5(jz<8xZ4A(}O%`e}O6Lz{j;!t|rU^>H28#sYg6tK9gSz0mxh8>%tF@!L9
zY{9x*g~ds05gPR-2we6fR&p<Sk1qMBNO)6jX?VRGj2B1WU{;1dkmjr|X?uUVk~7r$
zcZPT4PE)+=>|u18l6pLxQHH5%ns$7t?t|Z15ZS8pj3Pi;^*!vi1%NmJ6El;?VGH1{
zod9s~u?+6mcOMu>5~RfQOG*&B0Gk%Th0AM<0ti`eM1ujLn`(ea^PY>V<8CVwG+zmf
z(yqAyxO=bVI?FeG859ul24JeRILJOXwK**G^Z@VvKS7!G5>~hbKwjtYx;eVIa0E%r
zE-p3!@DG4Q)lgp_{qDn803!@w{?6P5<Vpd?lr2DMI9Y855R5ATUy>XnmCxls5?{|5
zpoRlfbKs{mY;1M_qZ0+6bvCxVq+}NWo~+kd&H%I`0OtolAf5oOJq{jTM<3n2&^TWh
z#8Q=E^FJZEM>sW1$#$l%-JdL{ZCQ%q6R(DAnW<`Hzo8#H{H0(0#e8tBi6W+!5x*3b
zhA1LbSt4x8HuKfQe31<nR|(b<z%`_Qpf+|9@0Tp&$Jg$+h2rKGfK62^+w2Z?m(oWZ
z-8(oLJ6g_&J%W{*@7WeLqNwN?9upmj!Y0`Z&mQ@4<xI}#^!3OhKEa2x;748YOB<Y5
zL!0weS3@yXYb$?@a;EDLZ~0g&F8B%(mZUjlxa2)?G=d2p5je&eszrlGXmj<kS@$SF
znQwD--fzR?U$@Q??5JylEIM+`dsE^eFZxyv*)l@hnW==2qG?P8C^%8M7)t3*?tB{3
zZsyP}usAKGU<>PY^INC|T%7-;JpWR(5!AV@SExXWEgQamWR?qWo-SsF5v>d<OiGyk
zU}_Gob0s`41mCm7+L&4s6~}~Ppt7v>SR4y`*8FjA><Be`=Yg5CVZk8Dhh+3%EEcBH
zb#ph?fL*OdQ=q%OaprymJV)Y|uH5DNV}7Nvo9sGG>}Y6`tkkx;4s8uG6x;ccoo=rM
z)!qfPCxVL`ktrlJ>#X)?G<}NB<XuHzeeT;E2Qc7)T(hZW+YATG3aKY=Mk6j`{GO$d
zRH<{WdDm&A1oH&{kI++c0%KE+^CwFaE*r1Qt;<6<;7|Z})T}0*B`RA?p)li2T;djn
z5hx>Nu#wW_?t7PizxLWLz|*mAX%$<s1ly&if%IF2P6=_O0Np(9U>{Q~^}g9>pNNCf
z=pfvYa(^5Ll|8CSmT1*KgPuBhEI`YpvyPp^tJv>CqW)?-JMgJuft&87L2{=g{aY(X
ztiO?p6F!rfyLCK~5^i2(k&t;HbwMHGXp)SI@~WP~kcqiE4L`xydiqx*oj-(X+AX&u
z5=vGp%_!=4(5_O2yGxli0E9;1ycb(6jMwR3p_+CUA;8jqgTbu<{D~ZZKk~L_0U(n7
zpPrEW`w}{o+1^P2oCuKKg*~snjMM~U03g1VCI>)Q_5{#w+RVq^f!>(!Zf<S>T)9v-
z@r|K!wf_dX1VAXu0PJ$j`*I9j@M`j>b}b;*c>|9CJh2x9;X6@!oMnKQ{RT||U_@R3
zoG?IS6ZrFVZo74Kzq{W6z@D+_t;h{P0exnFd<1yg<rNhGsSx6C=X0n75LVu#O8{~6
z0WdVdxC2Z@;k#~hz+ec-L!JOY3LFFgSpr}asJo>JUsr&H)Sh_h^lV~$`42#eKdF&D
zFYbBZ&puC(aUHKN9sDDF_|{vMriLJpTKk;KQBm1O;SSU9Z{|>&z4MP<?)FF=-$yQ)
z;HeDWwO(A702=P^7b^s?e_TfF69kReO}1#XkI_>jmDJg_`~9C=GZ}O2QZnrigW~-r
z5zZYI-3LsQ%TN>;$k+NG88xw^L416o$Sb#-uP>zcrB!y6)R~;4up_M1T&tp`^C_e=
zAIx%hZ;LJGqO&?)?uAbl+x}_&?uf_U)tH?}&rd(rvF}8Qyt<y;B}BTN{Il}J;IC$9
zbk`~bw?*wUXm5_`sr56=sj%R=EheUAU1CREa}%e_uq&GU2-VJ~HE#$XemGKWH=1$P
zsj%JJ?abb3B9I{&D(Co;r<p?8Vm7?r%yzK7wyEGV-&R4{#gl$(BMpRk1blz2wfd1U
z<CEvvMy6+JiyOt(YE@+it=;p@Hc=xt82}W}N!0v^Lwg`t&Fw*6!2hqyLdOSBUfQnp
zbdA1^lqp>WiB8gm9g@iwlJ?s-1N*(3>OEZINP9ymOk4;CRL<XOMM_sVBrr%~b?KNI
zJ1J+IU-Cklj23L9ZRdq(l#XDEhKKh_A<NT!n@M`Zz2La3`&>T7J*ayI=#C8xvOb%E
zy(vm<KUhJ_i`|@5DCZt%@1t*UgjW8>kOps{=Nw3m(<)V;3jg}-uzWY<*?fx;Oi?Vo
zdHWCo*wRF<n=QptZ3KV4w!SP-QJP5M#B{W|?{Ac6KTo<m-9_fS2EIpy(u%kL(jO(O
zX`sj9Sh&9ON`gAqovk!@yUF(YUZ-zmdb^~RT}o+C*hTl+@73ctlF04VW~5u*Jc{t6
zMy4RmKpaGIra@43Sjf##XS{>z{24)<vp7`9FC+^)+;(N(+Aeg0ZAHm{X>`P>6t-&=
zT8I<Fw8(VE0oJP&H#Cor|Cgc4UE4At&BI+7hpknvUVJw!JGp?l@kXA+47-6#(RF+g
zvzha){@gU2!GWWK0yBuVrtBivta@9;0v#E7=KFNt&2)20UV01)1-McZ)@PO^1jNr+
z`!1+$HuvZVtD)q%x@5cc7G)%mYcPDL8ZWq(xs)42oV93jV$m(Wxo~jlD&=#^-1PJX
z07*m}{p`}B)$aMhA0UAP@a_|!GY_zpUInai{)i|5hk&Vk$v3WB=ku}M1^`q7;C{)f
z0D*Vk*zc-<Y|UYS1SE7_s=Vg+`V3SZ%pQ7+<iA&FRc`@2L4X_tppMwyXx(qph5!JK
z18^7LzV&kPy%RvV;bQ*!Tz>#y%wGDrJI;Y5z+V8M*#V%--WY7jOq#B4ZYUH2r0D+k
zJOK8FP563NO&F-nf}Vgvtqg7A8@L;&<-Q(vzMypJxvjzFR#%?`;LFO&N<V;M{DyA^
z=w&c>!<rp6z{_sDquv-;3fZ0vjYe2^zn(-8f|xyr25mf#1G1|{)ZPsflh-Okb7M-{
z;`i9oD^sT9FjM7JGIj?u6gT6y4@$*pqF_Ws?5J70-_I;yM3_Rg)RKuyccVfX@;~}X
zHP1yj0?TC<=EF}gKVs3ke{rGSaxead1{<;|xpwdVPOc4r5#gQ;@q5OweZ=a=p~MSH
zqGy6%4q(ki`U_XeAc+w@K<>Z%t_#K)`mxHi?2y4>pU5Xq9rudruk&jTSBTmQua7w4
ziq|WwJ}>k@_a0vtx-&@e2Ys;gzy@r!89p%rFNO6+P<^b?@uk)yj*{yi6)F)CoJE*k
z5PXZjpa_<V2vrsLh;9HBEv30RC9Qo5Cg<g^efTa|$5=DtPhfbt@`LBRRhvqoMw^-8
z{Yzm=g)iwMsTB~G%SKzVOjZG(IciZ%oD#Le5+ZrK3}sH7lUfDg3tzud9n;$iiGGvf
zt4NUpl>LnZ)t#jH;luu;7uS7cnD%tSybOQSA5p?XCtqV)is&hBM2W-uTetSrQ^m&v
z;2(o)d_IO!fW`gXdnKNQm~Ky36_T!64d?N2c7wf5$}Vn*k9F@G#YH<m6!R%#?!pT8
zH3x8Kw4jq6&U-u;<p)KRWZb$X@4zvuVU)-j-gu2}t#)-3kg(8gwLH12W*sR(xBOv`
ze;goWQOg<McdWByQZdlkJSSjUAX{(i^l{d(Raz`p#=#&F96!HFr7$GOZ|C%_Ds}SK
z?mDec?Pdl+n$eqX#lW;U|6tJ?C1Obr<#+(=N0_oR4d0fWCb?ewsIa8&XAD@n0*4Z6
zHb^m_oq~(Lo+Mf-6;!3oyWS&up`~O9m?Bt)r_RNX$kbRuGKLcV_xoUq;0mdSMhr2w
zv3TEip{+R9p_4Q3mukR=;F)q%l;>Vq@px_m=CX2mXO|OC*CNZMK9l~v-QwD<D&M<M
zBqK=mJRc}<kR`wc|B^I)wq&{R#Kfx+JBVS;@Ntzbp8B~E9#1mU_S3_tlg@pTHMy1M
zU)PdNikIo__1y2|lW~5l6N>~tGiMhIv1GWq_F*wiiHFPwd1SixMQRA;0#!eZya*O{
z+!|TA9H8eweJF&xB>i$CRtOCfQsqNo)U;IfAZP|f#jp*`TP;pC9cy`AZj{z&lauT&
z)-uKZ(pqmb_P7*B|6sS2zIQU!`ao0{lz6nH5Wn7Oje!1ef5o5e6}|o9w{s#qVBd*+
zYgK1srK@KturJW=IU+t`_qvxrSP@<H-41w*{_(*oU`vHAeSauaoT(CQV9l9nFY-S)
z3b$Q@BEN<C{!Ym5HpNDFPqc%(!-N|qI|A8BEbZ%IUcb`B7K5u28WXOc4Z^o0XXQQ!
z*0-2+Uf!F%&$clirK6N(IO(8`Ae`*e4e=+^oJXSnW)s*eJS^XhPi<E}+bz>b23V@e
zl$~@|Z0Y7$`C)m<$v4ZFC4S_aJjrs&#su>Teynef53z5Ztn*07cGs5L`n!e`yV55)
zA13$pD4!~;KUL0Xsoh+GOCF6r;+Ws?;n_NEYGC2!=j!D1VEPVGt2W;dcLssVH~!fU
z?7}ddQbC&oEqu(6?<sEIf3#q}Oo7Ui)^%ku14Z$OW10^0xlPe+-dZyPaD`B#x`u`q
zfExz^Z$Dj52(RS|M+5l0H_IDf7EKrOX|Y)WEIA8+D{=}b2)hBy#S)Eiz=jR<!(0Je
z{L^1HKMz1=3)1zy{|kf<@BvrC(d{Fk1Q{M4enTw-c`U7zB5^dRRp49+Fg`&5JN)9U
ziezQ*xcp&3S4~BwYjgpiAOh;em$PreZw4pe)};&376D&Yz05%(Ks{w?Tm{_3ClZu@
zZXaDfZrqQ%y#cLr%gPSkn3({5`)y|d6j2!d+dPT1ioj9Q@BO=ERaa+c;EonU%v%Eh
z<Qp1p-Y#+NUX2xh9^Jo*$kkYz%>FPYzvWyI4DvqwBWyqzdl^qfCly8SsT}oEg8a!4
z36I>IOB7tRNWxXbBr_9W?b$yJE=@Y&#oa$oAP_?YWbs&|DrFP)KM^CUoGx1y<^>xJ
zhdPr!MX$XqJAJ$9;If#i|C8CXOZI4{<hwMBB18}q>@`vC4PiEy#)Nsu58@-TQT!eM
zZg9J{ykTFBL<DVIH9ShsXNcK`laU;K<ON^26PK(Nnf!=9SRUW-ypLx-uwdZN^_B!(
z|HAgjMsJ1S53-<GJvd22XaTLucNMFlX^H0reti)dKYsPo2_f#$9f=Gj;U0p()Vc+~
zQK^#{Zd6+QJoSqS_MqC9)|gPes{d$=W<(o25oLSB7xmq3V(si2y+U>MtNHaLe>b`T
zKi#^WFFHEA{;7dW6-Fr6?|1VHEXUjKX?FKzd3ks*<bz79+$-+a*A4IeCnuh$JK6xM
zOih{g+F$mjVhI{0eRTNecXjeWN{AL_VW80gUOo#*22PwjRZYOCe`}ZQ*^)<9mD@LF
z+n<0%8vU2{%f@zzzN+wje(~?td7(>TPPQj4mo*O}O~Ki<B`%9U!jcyeg~OYVN*vbk
zQ2L0_><O?0hH!${Gd9^!r7|l{j`nCW{@p4P4?eQSAc=OCnMrJdZ(Y!vLgzW7D#&tM
zL%4BhtZf*@;TGAVA2G*;$!w0@Uw9RAV%SY`>DQ3MF-Pddq0yjd5hJ?cgEtyjeq(sr
zQY+9JV?mq4Z=^sK>AXx9!iath)rr++_*8SsKg*x`7c#Pw#Y+&O#URNHrZA6TyMOVq
zqaXU7ZJ?2K=tj&C++--%XJWPCeYUiwT88<TxAKJVW~Ib+Ot^CMDZFT?P&Hyu@oQ<1
zsMkZ&*6H)h^<&EE)25`0P;C7}Jr$dsRRcILNi|`51e2py6&6wTCpCi_J4#Vl2stH#
zET%?sXurm8dv)XW*nBTg)RSX$e3VwA-VuzoeJ&a4-w75tY~T`n{XB!f_M?I|nwn%t
zA>BnaqV7X^nAQeu>5vIB%J21oPsf_Q*YzI!Xm!FDTODx&<i-r)^pUKoVV6`Le$8Kd
zwyn_FP4?S9SK8Pt&OZ%CJ_`AGJpV*0Qm8`Rj-5D(p)u1IJ-b(w{$$PL*!Q$N{gj(j
zR!`O72pk0BPHUFtj_J0QfvmPc2Dg*<(7ewtq6Th1dr>_NSuMFD&asjuCiumRM@=D%
z(qqM<ka&5v58fp^*@7;JhzY7g`d>p)3x4|Q=H{N=0W%M5%{;2QKMNz=oR2(Tct3r;
z_$D+vv)*yJWN}$uF3r+Z<?-0yV>Lg^_E}4dn-Ne3JXo;$a0hi`F^fV&M@9Kl({ziK
zLreS%_D7;lgF_q*jCTaH2T>;;%{^eIPB?7w3wonA$P-ch=GYb=Qcw!W<fAF07i?Cd
zw8J`JlOEAOY9=$o&-({W?1?nMZG2P`uNHdjCsEhu(u1nO1dYxcX-rxc91sTGPu|W8
zb+KmG$Kf<B%M;_Aw0$00!-N6_>cI#w9Beq|%=nQ_MIZv1TOP}&9BG)blg~hX4!rus
z>{c@7#8J~`!Sx`71|?JVt_{<{p!_?CH_SFv)V$%?0$&1Xk~S-gBC^-ie3||c(OI#=
zZsUADiu7TJM{sIq*k$FGSm=dK(;yBX?debWwJd|M>r&Lsc=m=+!^u{f30oE?tDv`z
z7<;W*I%({o1BD1jv;Mozwey{_r}D~+PbmX8Cx^>5YH&72X(Z@Bhwmq+Cz~F6{9eT#
zFLuG5jeBK9+4(&wC7S)NWZZIry&x1gSLW?rCo9TYi+={qunoBU?(`PxKPQte4GAQE
z(z{R`A5QnP<)-3r9{P3nK7<64lvo|cY$nONepDJL)+``uBr0I!NvAgd#K_~Jdai7{
zr`5R5$}rw(j6{=2XYioIfq4ls?DnTN?<P-4k4KO<hnu=NiVV@z)we&EUb%EKO!bh*
z^F&73idKw}HRoIm27Q<Kob5pY$rE~oiCluT=&u~Hu(3RCsv3Qc?tDsiYHB`f&A|~m
zJ@<Oi?4oZzEZ#Yvzd^`8o5;{||C@00!SB%XLK5@LKMVw;A~EbjP~z?EAQd>Z&1cAa
z3>p98ZQ-|A&0Jnv`^GQ_`k)o%<@`Zdb^cbs?ie6K9xsD~iwoEv9^W=6o5r<I+S(qU
zB>{)$8gTD10PZZn-qr&6RDd7|KA=1Lj}#0zEdV^SP4-oSKSQAjE?|UJe3N0f1I+5g
zp~B)_5HX)Kq6nbf8`$;$)TElgg$MBExeK7>ZUlw`JkbYfTiXiOh95s<fYb!^o7AGy
z*GGVqjg9RU*i@eYp0*F*>I2k=ml~RSfP={i5QS&_TbZP2i_Iga$w&cJS?yK}3)Yzp
z?<7RBkv=}5XxagO)Q3@r+KEbb#XR9(-HuP4+@cB*T9AN{;-V|lY*BC@A#BR5ufeD3
ztlx)m#EN+!l?Vk|bMZ2v6Pds;y1T9uO-+HPoO%<BT|AA`<+gu)-|WbEjeG@2{mx5U
zt)wQ(GVDdkiBF!rZO|7@l^7+L7jo$z^K~tuBa@A%{EF#RThcp-LTRg}$%M?FlrXih
z&Wx|I-Q5{ebVG?c5Ggi!csy}#Ur49St)q>{<msEBb2{-NFk%Z@5mNqs^-o9Jj;F&!
zHae%#YQvZoRpU&8#wHhK4C;c8vv0Sw^-`}0{7q9uj1fQ}A%X$r=XW@c5hkm`{j$rR
zlx(=p%$$l+WPbr5!e%w(&inlO$;f&N9a4n|!8}Mu{9(9kpx|&5hbiJr>j8t;u*(v|
zZe8m&m<)&o-&aP50@%!`AM}d2SON6yqL?EcJ~h-BqF9aGZLB2HPFetpG#&j|MU^oS
zT4ClVA!lvv^I~RS4tg*$F&wQeU(M~%qunK(<6-~AN~6>E1;VSHtcMT1>P7!U=ZVMj
zezh7Nyi<hp%%P73dnl|oYh;>z(N~3tVEz*re^rHaHKNs{MiAvExj=2qeW*a05-J!g
zm?T1uIYjMi%jm+%x#+oaj7Xy`2?BW1N6z9giDw>Sp<rR*2zvqzjr~0}y;xuQ2$@FR
zAw!B_8?RuaFCXAXlWH+lhV?!dYDRX0r`fE(VCRVVzzh8fBuw2CwanRW4wz*5R_7Hb
zj|}}?i9zQ@?h+^<qhh16cNLU5+TGoG#Qs`-trY$YL*Lcrt@O7<7*YXQ$Wf;b=WESs
zDxj{W%oeJ+bCQ=9zIzH7_(TSRw0F=8A1z&&f2U4aUD+V=R|WIP$|lGp7=h5@55oBp
zsB5XYzz*FWW_~NaNBQ9)xP5u1jLg-)_zAB(#C#7Ug>_+My7WCo@263K7?~9FpWZ~A
zXeL?eZ~|asySG<UV06U!yL<Q2iLQLNUF+3xZ9oLl*2~s0!1XY>l`W-b)zDPtpc#=5
zpn#<moj_nCwoKm|a*_6Z+(GjX`ZFp0Pg9XdhZz<peu1m&ZdYdkgw&-kQ2vDdJ?7eT
ztT5l?cq*AF1}9h$r8osRV07$G0pRtn!Gr0um>`=@AIE{k<+h703?9j2v$H(w#r1kW
zPNh&KC4w=4%#F4#C?lK4L0gU-5EwXpzWq*rNsF&y{mN5&C>#5j{}XeGME*&j$J#Fz
z9WvjluP1OWPXFqF<XQjY=}2Yi(665yfC-60<m;E|n>AQL1WL(J4CaR@W4Qn@mS?`5
zd3KKknw4Nq__|)YER7uc95~d&)&t$~k{jBeV3Kfu|DiNaUc0fx<Mj_cBwmkHV3Tn<
z=Q}^V*^ERk2ESkLy@}*s=Bv!j1{?Y;N@OdCI~wN?)qgO97Wiq>30gie|H!Gdk?9TS
zR>kS$M*KPMKA^|;Wa>AQ{Se5Y&#!DMBxH`aYPF7J<f&kUHf;Z_M-DO=Hk++1vLw?t
zw~=hPEz0_e9P}aLC1dEXU+wR`73li<#JP9wMA`YE)VhqaMr;%~fm+&Vt(&uSJMVw0
zIX3ZI*ja+*T*n=Le-9(g=OU2>njfNXV)i6C`-yp$FA6q(BHkVGCLVh0>-cdMlbdU$
z>{ojsG1FY#(iswdwR2h+*IU0xsDMGhetd^fMFFe%EoQy>=N6FxvT{ZyrxiT;plm~h
zSM&b&&h2yvjUGoRDA~%bv*r0NViy@tU;QD?s*AbcV@a#DyTGvjsLZ!M=+B)>{y1Vt
zRsMk^WAcdN{(|PW747FH1INIgIgs<I2#&f79tT8`uc+8fsOH3`52Bz?v(~<x4uAEC
zkPq5nY^Fek5CqCrHP|LosJrakQ7JFYY2-PWl=m-)e{B0T7d}9!658!y^qZfY1}8{h
zD*Hnv_q>3%_q|ykS!c`6HW>vaZX~JSSC89BohKAp_!8w0Vgd7Xnw05LqdD08%ab>Y
za<8MMd1*7-Q+q=L2B1_}94)m91{97p9Hfur!*R@D2SCMWl)>Yw%T#=gX_3=qY(7#0
zz@}Anb-m4L0H*^A2=J+219717nx9(C#hhP%>9;?mTUGF0f9*A9HwQ%Kz$FZ@V@(6V
zb5_82|He@VWCH1cHW=sxhVp2>?j0OJp8>K2At2SBhVUg|4u~Jv0oDY?3}7I4P6KfB
zV2S~r@1h(5p~P94OS3ecV2n}Xa0wxR#jpA46Ci-=YzLGDCr#TieUbRDE1l0PJs~JC
z0bLC17mqxLJPwpES2GzfqMU4%o6^Gi^^P8#hC)|}K37y~tj$f_YMWjI<AP>Y&`gwh
za8-eXsxzveIi{Ip<t$C>C76mn8N#F3KxGu-#P``VRJ{*dg!Jt!k(Cki{`%<{Wr<+U
z_Apwwq?!CF>ii+_^<qrgI+iU{hjj+q0$zd06e8YiDnxCT<KgsGot(C6YC_7{r0+3m
zKS_1NB4ltdh>JW^g(wtWC19oVn@uak2f0m%m_KxP-f%=J7suXy*ACcp`lI1EW8QCG
z+cbUQ;<9tnbBq;^NcqH^8LNVLe0=xEr|J4~v6f6`%bh=^QbOKpSR1eBWbN({`Dh8j
zJpk>eJKirF5iG+lAkm<84{kW_{$9}+;$%eNqk#UqYh3VdUiti>wn;FC1|{zMw-~e|
zA8dms@D@@o!Wm4+-Oj?99hsyV9Eg}VFxjfw4<Dnu<vUdP+#wXvL}h6LR_gz=0Ak`W
zTpfYp181|2JCdr(!X)i`Ty$er_78JRzsNBCvlXzC#C*@g4*lvFFor{_Z~~U-E3=-W
zb2_ocnP?m9^J;7lxP9-xLR1Ys0_C8@)MGMTXOv%45;ZBoUaMClc27g{l126LPt(BX
z+T+jMYqq9(ISvOlRx=D8J634Q2wnY@{OEAB?kwuyU)j8Y&4B(w3`4w9uF6ySgnzCY
z!*R50zVFwlob64d2<5tW`z(ZDisEvBcR+C?cmkml-NohZ<W<yHNMQ!o*`eRS03uQm
z53^DOrnpiIMY?^Q{fUhoL?f@%XrwCM@HC5E0LF?{L;eW`n{vZ|rah|zU7ga0=`WEW
z?A6y4tE;Q2_s-mPz&J3~OW#A$ve3n$jx&7_=>cf2NP75|${^zWh41h^R*$uzEu!4=
zpXhM(T}xt@9J3Mp<6i<<n26HqLAtkL@WLhQ&8OdZEziLJ7u?>1p;}E|!7_4x-H7)u
z)F-Ckis^c>zk0rRLFgluM*1_>#`BT$3de7#2@RA3C0Ck2-|~DOU3~%tK2x24bNI39
z5l(@mrOvS-56f+@FE?#HtN!ZDu;x)r3@l)-Mpq^=OHrUUDdfAK+7@;y<FsI4Ahaiw
zY#9fsrHHa(tz;X%H1Ij!1Jn$S*->esgOPV~7tqa^cG7Nvsp0$L_%SVCMfG%~1vYDk
zQN(HBpw_&;<H2@5gby{Cr5AtH!_s%XY?GPPi95{TF3RuV>MUm7lHrKU7gob;I4L1S
z?m>!h`j==@q!lqLlo8%<;Hf=!mDc>%%#dCh-tm&P<z-})cyO#<AbX_a(Z>!V6TwKU
z_%Nt^I?BX`ATE2$X>>iFzSQvn!Z3Dpb4pSNSr+^;v2BtNL%Q|o{R~#MRZd+$!_(e3
z;dkR|yJ<1fHoV#Sm;PsR5ruV6?GX%ZC6Xekq`2@cF9)9AKabyS%g8$}Q1%=Wx!eN9
z4JqAsArPIaCd&^+@1(>EHX-7j3fE%`Q^EY~3WdiRPJz6mR*H-AZ|3kLc2MfimIA*8
zZjbIB4nsuA;VNd$ktEV?7Yhk;5kxJ0icsGi3!<q+I&R^MAN!YqM$0L5Y<4?G+5wrF
z>jyBN0bibDID9_#s?3&LqewNFIlwhnI&O9_pRar!ocK01AV6zq4{dc9m2O9}%q=ym
z6Jn~$nyJ_M&y9ON2_KQ7P`%mcbyrB>bUV^wRs#>-J6lKS6{_?{9eD!wh4AfRWeWbT
zyEUg`z<#yX)eu*os<N%~hDw^Bcb`-B7q^9CZtBG`L$i!(@o2mChnznnc<v)|pm-j~
zI;Z=uaCo6m@^zgZytLSY2xz7LzeEOnZ&hxm3If=v2*Zx~>4`F6IB9Q*EmcU>3We<h
zOqJ`-oAK4(AM|d6&0`4#y$jFNEV4`nN~sI6@dhOl3KiaSyZzdL&Vi%6`K;G^%O!BB
zyU>D~U8DMX#~up7idLladx9*$ixLYi+dKDhRtrATFd=qf2%;*YGG&J!FVbjN+w>EC
zMtk+vcK-vnzGX}p>H2GJGYisR#f<zQFZ|4Ir!8yFO+!u}|HX5!dhbAU#rJu4?<J^v
zKXJ@Z#nolt_UO>p9aj#f`ggP<(<kIk;f7m8u*1&OfSHw7O>66Q8a|0V@TL?Zb&oRS
zH9?m<qaVNHkNV+?Q;U(GS1rmx_$@ZYj3CY;v1>XxUX}H8-ChJ!1rO&xGhmpCuknE@
zR^VcgD%SMG^ZxI5K=zhC{~k3O$lU_1XQ1}!^ZK|ED?teYymD`*1fVDhh&7LwYUvaW
zix#TiAu6gr@A|#$?h4)V9`oXhr4|p3W@Tjo?I}Q}I0HEO+DwPARvP&E_&RRePqR**
z599}otFyDSfmWICCJfmZD=ROcUj-PmLIGpx%f7N-I?&kPPXQ7tctD8ne6G^&i;XSn
zZs^zY%TvG2VXK$dxyttCD%0~j`#mxTZ43%klrLZAI{u`$^)eStN4O!mp;f2`VwsDM
zs$PED;n3bu`lI;wKnpy_Z7P0PbQw_&d_iFp{P0%)7?D#+$O*&H@$-8eJ|A9SQH+$r
zF$rQI+M=;4W6;HEyP}>_KNsn=U>czqPZ!$gs!Cv|Kt!iN>T)s)PO-HU%&U`~RJ`v`
z9VFNQh=>U5e0!A3MB&Z9Kl~fe;9SHtI{U*{C{U6Z+q)(sY%T$WMG=b05VG|H{m))A
zT&F$%TtH{QL|C{IH-*T%(13AL@^E=!*h+liyN=dpD{d;Ro<Fpku$yq!<T0(Mqdc5s
zVW6&q@712XaWFWW)dT)Kap+o2yB4K#=0c=vGNzSV2o<KKvE@39^hkb?PIHBh2D10O
zxZ_OGJKw*SX0dh?4gMk(97Fv{4m$gGao9fwpNsfsb<5tix<f_snEF|q@k-Nh|6Ysn
z;e#9lUA!|AgQDgE55!gST3at~vM@RUd9+Y9I>!@<+4Zka=08PF<Vo);F&CFp)?ZfU
zQsn+q1%L1sG_MvaSs!dw-)EU2AM+Ubq+|eO5mir0M%fyrPFQ#b5j5-pN}MqAX-ixH
zxW?ci@<r-%kZVjX4%6S|Eo5L9U}i(+5FNhS9Re{%iv10tpyWV>5sQ2!5Sg@yEggsX
z?VuYvXQ2ah#tw#^oP9ml7BH%h5|^@q^u4_G0{%hQn`PqP{FU}L<VjG30h(GUm82|e
zs8JQe$sAdlVFU9!hjO_89&oD}U}3Rjzl#%oOo92{?!u-Sx4j_n*;C%dp(!5%+w~rf
zpa@PBWXI*&kfYF>z`<z*8)P$*lu)#jJ3*=u&!5jMoihlglWn0>;vq&}#FeCwj;9EY
zKlg5H;4S7O17RYx-P~cqfl{06$bVEa(WLcgbQRGgpZIy}y9@4)uZc^D-E5m<Fw=u^
z@z`q<u>38xLtLn!Ao0hy-+}m+e<671FT%wI)DQKhVE<2s?V8kQnFk%t0*3HOFHJ6p
zM&{NSLDaP|4wqLfavZWJs1&AEf(q#D?_+>vd9!RK0D{S*`MyFW1*Wbt{amyBYVGiK
zR37hw71gmmLp+&+p!aKPIU+`wR>Dx$<K#I!3R$p0=3@78r3h@u+16kp7(t2v5^RVu
zLmuhEA&M!>p;Yv#M3jUl$UE3kCjPz^9y(>BN}yOK9?R2w7-W@~1@{RIQ6MO07}u8Y
z`qW!6<&yJ?FPz(1wkfQq#%1I!49!%ZUbk{IV?S_Tw}QTR>gm$hN+<;HsvqS`#;e-!
zlxqmFnAAUd_o6LgC%-~bMP0-pBmsxB10Q)I*85W?z=*)u*{1sOAdNPP(F8QAU-`l7
z0ud@}g4(5J@AV^gF3&cD$FJ-KHy7b%*(8Jz;R2>BFDw?i?M`=<y*35t!CRCSSkTCx
zx3BmB3|qu-)p~xy&<>eqW}Jvv^#q1*`DHa~J|p$ds7C5w1=`rxJM<otus~3-zga&T
zmxSH(udqeRfeD9iM9WSUO}z$s9$Rw@W)`|T_fTD2UR<a9FNaq7)EJ7k!uOBKRj>*H
zn4%Q0U`+m1oOZJ``m69<d=Qu-mNeGqQ2*l*HWVA|X8o3Q?7*ovsJL5D73aRiU@<E7
ziiJRWiooUiDh(A$g|d{mRDr^=P0z~U)YDtgo0cwV@H#9316EyN>ZX-0jv5Cz=)Mm5
zP#RJkxwsOCbyT5-2w|Rybdr+{l!dHh?n}xp{TT)I<k>T)*(-=gj5<?UpiAuBKC(6M
zBrsLBU4R#v6#rTSx=g_22n<d<zHtbAsKmkmE7B=2D2OinbVdh+4FK_MmK>JKZ^*lM
z?|{}B7vRB+^}Q+(%27rHT%6%4DR>~h-z{j9kx~UfFr63;o!JzKIe|X?)T;h6l@C+_
z(M7zx9s!YV2@t?KcL&^$P)R8%@Ij(QN%|Tpr8DxGhUv7J?QhZ}JeC$fOxk%C!ZtfU
z?|U9;u-a(1@fOyml8$LpHhd<@3%hXamAs_W_yo_^WZ3&V)|yP)Ot-_?oBj@cNFQn~
zW{Lz~6zDCKn}-pO?h}%5ntsL0^PxV1WblcEgXqoomDt1c$gLT0G1<i@4(r%UG#F#}
z*+_@^h*F0VkfuL+ZfYbw>l#-fna5dSUhS(o(<sp)q{Jf=;b@B?mR9;?Eqw17Vy<aA
zt*}2p`vC1PY+~n7=3VGp5_?tH@FS`{^TA-IebVl<#}1*Hj3AX88-HQ|_xsi)CJwIg
zKhw4)|B@Vpr}ikK2#$lq3V#H8xwX%*h$T08X2dg^0g?%leAcKcy@3x+?}`EyBqh*d
zXG~>+$E)WCMpDZ%L7aRank6PFg863JY^x0yUWH|bes&_4;soJV7vPZNh$1z{*CIiF
zQ+zi#(o9mdCte7hMOC#pU69_7784n+GqkC)e0PnCV>o@plIom_QK-(xnW&2P`}r(5
zAy;iMpifRjhJ*((PPH(^=J~KIlcny=S`@QzIO#oj-s#JwdfdT=2l$Hsp)}fDMNKB6
z2uSdJr<_05X3^Jr?PlBHVKO*w<IuE!pg3xG7U6xa5DZ(=v;OMU*Ong<TZ5ej{3e?w
z^P`%oH_bnUJ_57P3@36w0Acs6z%j_tfWL^dBID)Q(ebmn^bU_KNqU(i`k9BrNlLk!
zIO5~PZVhHLVf_hb!xf>0mjB=mvs0HF4No7RsC_4fm~=C71ju5XM1;JLc}cH<Qqqf^
zlgWHZKA0W5#xy|ybuknZ$~iUS_!|5)&LNJ7p!*H!2#haAzgp^kqX+ByD~>|W@P{?S
z@#B17GCDS=0{Pa%(CS9Z#tn=wJ!mvgB#5cXdA!x=t0eimyEL>AiWc^FZ3EtoA|eF(
zM(&=%7Upr{kTl#d0=z_H81UsPszgu^2Bk1=pXKZVN+q`yZjCK<=n;RzIp}afa9Yvo
zLlr?*doB7-9A?wA7^2G%$I8O^zTY?rEI^)1cos@;6v-lFq_I=VDDXF>;n{}rWS;#C
z4MW;!mP?&#lxjx2+4wPIrd1Mg=b<|@eFIdV%?t-kkJ2oB#X2KmMgU6ScyFcJ{nYHi
zS<F+dHIn|lXkbrplxuC|-ew2VWapy*^P%(%!ADdrx8W3TM8pAOlpe#DS7hRa%B!+7
zdTOpR2AZ^>jX($;1S^2(&f%`)YN5_cn8qrvTAsW@Th_B5k*0vm3@ZBYMEN^rP|GwT
zEhvxtnq2nC)VUitT3+0H1FQIR-Iu8}%SylrWNE40>`3DeR64$i<y}>XkiVrFfnh8+
zphJ)a6je$KSDBcZpL*$ps*U?1h-{nks;eD_lNjE-)NU&_jZmrVCh-11;u27k|I%x{
z0me~*1oI^z)qB9@@O`?|0i>|}Z(a3{%dEqSR#pOV1yIld$}4wo8c5(A=?Q3fL2qeL
zKq6h&*m#(rqoZR|y~r@!3!*P#9${o<)vq$_j^<vcFUkbm*nsvg$Kzj`A>_^%>$a2s
z^YZM61%)*3$^-P**8M!;<HtEWwhZ3(gJZIrb)+ir%WvP;VwfS6-@gXDw&GL*lhJ+{
z)0fVGgAl372`WA+{L}`e4NEO*2EaL*5Uoy%;X^Bs&t(u=Lc$c*I2gL9zhJ_mOBzD)
zJ;oA^%$$a_9He%<?B%t!WwHmCt!tNhu|wU*T>ZrdcQAz;tj4-yB@O`9V%W2OE`b(e
z9WGEn?VqCK@?gz`5TcMCdt5CaoDsQzP$%CU_F#-Mw><2VkqY*bpHh_c-^jI!Xg%e2
z=Uv4SYXjHdJd?0&5h<u}QF0lC7BWhwyDKbx2T+B2OJ!0xa>hZ>@{sqH88yt#V}sFU
z@@4_*avYe3GRL7)><tkVv*dU#|Nat!7*t_V3{|OV;G<@D`GQpvl$?Cg*c?7%h)I}?
z#{?aZ{SwU4)#A2v4lue>PV$JcZ|Q-~dZyyu%Hpq3p(_)vCH|+15l?QsFT-sUDF2U;
zpbBqMI}ilR3mX}#|NYT9!ZgKRI`9@mP^cwq-u;{%M<q&$dz8T|NGpRNcewu$ioqxg
zbVNK4!;hKCXZeXR&!Nz>IPntpIOCA3l-vabZYjgut8NAqBqHWiDl@b(m`GU_B0^N<
zist_j6sszTp-yX8`&t%4oPWvC(+d^3PB38E{m6hF&yu^a3Gj!k_1UkrO@b4^;*ZwO
z&i*T&6F<Ir$wT~A<faCphpY$erNknHR*+hFuWW~Iikr9@AtMz5uql+JTbD+N!+Y>c
zb0!wNce=GXpZSXsRj=i_FH$zY$O^`*S5Qeq$?g4hQ9fp5NIbA6T|x&d$tk-?nhqCA
zXx^UBvANhTOZm!!%_Ac@TZX%^Z30@L<88`B_WjnEaqHWelDCoD8Ekw@d?7Gri=;DA
zoN?9O<JvS;LV!z@RliMZ;4q8Tdd2+T^W&}XSr-fl$UUbtFF?SGynqQ?4bZ-R0~i}n
zJTXD)PX`@7kMQqw0A;w(W&lD(=gR}&h~;fRA7-k2I?(Lob3M`mB_}5X3ei_U(^&&F
z{@&JGxxsUp5N%cyU_N`3uKISq0_JmX;QDse|7Ow#WRD>Whk7-pLkDx^z`QvSN9-i)
zs$bCe0`l3w8g2lzm_L6iXmZS`&*jDMvq(7aP3DD$!czk7)5rDOjz-_-hqwQ;zYWNu
zJHoN)vb%qKlUblhC<Jq_^L*8AbFo6%bas>aR;0dOjsczJFtup;{)dDZ11+tn#L1K9
z?aQ+Cj?@Q~DkRm>(3pHVx}j+&6Ty&gpA4bm5?Gi7w{c^l<cA0ZT;Sec$Ep8Ry}+h}
z`W+0#Fe6XaGsuOAiJ~=XNC_9;_k``AOsLE3{BkME)YIPMO#>VLUXY5u8>zRQAP)YV
z1XXH9Y4w~;1lN2%Z=X8m1feBh(;xiD0v^eXX8H!IS-!d{i-&;=F~v-UkyBkD8^Us$
zHumFN2u55Cm6=#rT?h3soY-e^Xk$vOK>B2M`C@m{4>BW@C)AWFay3NIT_EggQ$$F$
zAvVau)T$uYvU<9MzJVBNV)I^WhC5ECrGshwhc7%q+|N}rG2E3+XG9~uu?W=%d7K0i
z=%tiA1x`8AFr&mK{^KLmMKS;PiBb_s!>TsP`B6*njnYdfi%H)>_XN_wDu~0ie^U1>
zlJ&+Tn6HJyJk$O@R?%!Kr6C$4j8m1_7}Czisg=TRqB|7a5~$K8wC%|eMg7PuFGDj`
z-X(_Tv7{Yni5x5KJODLeh+>*01j}Q&SZtYgda&e;{4<&mOYG{AZwuM93HB3hH>$`-
zMmGIo#~GH0cvVMCbQY|4r0|AC=2Y6kddET>@Wh4$JyvN$JVSqWm5#3HXE2JFI+;)q
zeig<ih2kuEjz>WU*Ad^)uW%u3yZ4APS@M{ItC?*K=;P1&Y-WhH$_={|KFe}5yN2b<
z@O+v+Gd4teHe4x|6`2(^t$Kd~$yZVdbDJ&mwg8(C76u|g5~&-yBebEJwe~+O<z~KM
ztu{jK<6_pRhJ$$soo&pIlk^2xO(LQGSzDn37@l#ng6R^Xo<r_!y5vS`9Yz+P2;*c^
zervOo%(=}xCk1b03UAU{I-~wXNMBstiDl9d36dHI^Nf9hEaipQXp?214@E4cFH_BQ
zEn3c8$N9CEPODQJOUEsQVZYeh?=~>ahX3nlqfY4|*rKNEB)ZmEl9PX)#`E2w4K{>5
zJQ=X_fCafk{89;D4DmN4NmZ>x$;Q&Mni<GX^Lt$EG&FEgNKj(|CS)Bz|18Ba^^V<G
zLPsaVh#)P31vPen)9qvxP^U^1Oaapj3`KzB7&<@%LZCo)`F!g4s+RMh?&fy8H&qbS
zZK$I|#JOt<OZ`11#rJ;1;N{3dcx7?%M^f33ABf+6>sR7;^@d}U0h#9C-_RQy8v*4u
zpg3IxUY%U23ccydjo;IN-`g-%nzO!_7e8l-39i-GulhjQt4JL%reYY6XY!(=q5}0H
zU>0IzVxqF50xd+0Z}6PV%A?f{tBld<yZ)!U_R|jVV#+tr)bezi%n!q}wLYoL55d+F
zZ4gvVSFk^#jG6CL_&4>4E=O@hT*LiYP0utj?z|5uMc)vnNBTGlRdMh0o)tf0WAuEr
z^c~w}3*VU+HN|H`5-Bwusn#qI90-Phx@5a9YLNEFN3}6)E)05mT*gX|epg`7qdh{?
zB`ffWg=My7W6mg%^5YPHVZoqkWKK753|!+FvT|pkLX2UTfK0OcZe8+ovz=G%W*(ML
zu=bFL%55h9ssi5yL284wa|nY?sHK#>g)sKaNsRwoA*aFRhWQlJgT2y~dle|aM#meM
z5|U$LD<>O6ImyHRX20WI^hgkbAqZGnrMjPgQfX+Fz#4;o7M3d!SU>Fa|0;Ac#AH&%
zFf7{Fp+(eeJ!v|ggs?MJnkviJ4nS;5Y3s8Wp_Dw2M58~z%W7<_eho9agEB6)8+RZk
zW3MBSD^!I$l%N;|f1(Zts;u^l*ozxl8YZM1?7@hU-RJnTgXi_#tGRS#`K0g+eX48&
z3zcZ25|r^*HlE4IDA+|s&4dNVgt^~ZbD~-B*Q9Lo9mkutV7!JS6jrdt`3J7v!ww_s
zD)MDX4xBfO@)&-D;Yyt&L?VSV{l<8N^Ea6W3d+rK@2f7hEOnOC6dceNm@Dc|p^ZHo
zP-Ty?Lxm<EOzc?hQ4y`MyW8G4H#B!mC5aOeM*YZv)S#+oSKJh&wSN*br5w<Qwz#k1
zh#9o;`R4Q`G1(e{r<5!$F~z=%R1eSNQY5x>+~na~`XqAL&GGrLuAQiF)kcMUNwPK1
zdK&idwX^%;=sPz!5G8{rW6QHfSw6if*s(OS7{|uuI1HB3wQ;OJPR$ygfYP>Zp0twT
z41ebuNrxSk-O6~qdppa>ZZSf8DRR1Ajv(uvgof(plkTfO)kAI>bEdNK#rpi%3==#*
zt)J#fVHGYK`mlXFN*DQn#^<3XV_^{$*--RRzDHawlo|pKQ9HjR2r7e-1$q3i+Pqym
z^a9kpbU?CMuKeGw{DA(iow$kOP!m7WCsV%j1)8*LcM3pFIlZUQDK(d~ZSll1H6_xY
zsD~b@k$4VURU`_U8E%v8CiP{~zSXUwa384Ng8dmk7w_ICimCW>#A9D;DtwqBq;;W&
z`zj`3bC_yu>KPRR$2_BVwyOum;~-=y<K1HG*lRQ1E!+z8U3_Rar>}yAu_u;h7!u=w
zmiwru7_(uKr3igEy2u0?B?jsi0HcekqG@X@l5PAT`GX>-u&}`^u!=2P7@fR6!2%Bv
zh?cr~l1t^r>B7*j_ceHCE3Ur2FI{?KVIt%zfJu3M>+*V;FD-d?4`1_3y1A{b?Y|Uz
z9zdbZ#P|@!3Q!(a+P%2XxcW%iVinZYgMDu<W4-^D(>Z6<F`LW|w#n>}u|uW67uA^6
z8mpS_$u^HaZhjd@<oF=gvRz`jXjqtFx*yzIdPg0vYRE3mS60E?cJDufE1w>Ud2G9Q
z-p`hnAdVIpYV@EmgMerps?ej-JE=woPi=<2=EP}H1f34A--YpG7**4G)9_Fh8!<RD
z?UJbPNmOHAfsZQ)aoXbHNy-6-#UBEWB6${jPNmy`S^7A^2>wSNj}Hbw(^8DVaZr*+
z_0cWUgXf73=6In#ZALaTf0B7L^Nf1l!-VdK$QKlS7=`{Uo^K^(D9N0v##F)NW>hI!
z8_|@BRjZz$y;pP(%5$cr*G$U4?BY}a^77Fhef8g#b#UV;B<_bQhHu7=aJ5Vk@k)16
zU{GbhOUHpYdG>i&HH)aM)10B{7--@#Ah=AW0P=-6O{MCoGEuzxj;DHtt|c03|1+XF
zx?{uHD0kp?XH#xjUf1nsr|)D(|GM6(*sK{4RDan^!!JCxY&Z2?YVthCBYqLgzuezk
zt6^_a*L%#9uXxeE49^nGnp<ZfEl|s}Wvz1xjcWUN>YeHF0oTh*{kmeYt{VHGIF=1|
zpk8^7g`HG-nh!sgLU^I|i&9_qw-&w(5>6~^y!O7oRa$AY`zSP&WLoMPahJB)qb2d5
z`QqTzHi)%8!a<|tQKfiUex;3r%@|#!<FCDWs5;1iIsCP+MLn539FXjylu+6=+@?Bd
z$I_g8!R@!Uav`T|n!kXSrX5b2oV!R-V&e0(d|&_J@AET+tfTNoX6B|?!s*Y6SnCX)
zqX;Japm=zK#7NTi_skpv1MV-Oapm}P?@2b6GI>5#(QO=ze^P%(JCb`)ae6>JcdmRX
zos4@;Q3V$Fb~hTU{CxYKE_U~HqPCUAv=lcnCqKD*T=tkOTj!$&pTd#6jpW^hw%NcZ
z5?RlmS@q3lX6{C5@p1LieCD=%u6>fpMUuNcAFsFq(ioH&ht5Q3soP44wK7!=k>Uc$
z;e)UueC(K{MMX6E5I%90U_ezTL60bqly;9kI|?_;51TRs12^Km0m<E9!N`A~PJD99
zJv1!FJ$yO8gso#B+9zxem>nMM-Ot@~(&Cnsb887jISH<&hc?-p-xp`Gl%vy5RDK!1
zlxA@{+d=&uK(RGFGl}VOW4N@Xovbzh$8EiM%Jbgqt3xcdce}|sY*=P7i4+H8sJ8o+
zO)%8mW=+_sW885xZuA=eNkV4nwve;zwPTkGteO=vrw?JpQ(0>Xf4l<6CWJpGUCSU9
z`y%u0;bEx#$<mU+9~gzIZD?=>4Bi$B!R>VCZzAwyO?@bc?P^n}o8AlNN7W-X2Gaie
z>)4<h>d(e9%fyP*EeWWhgU6r7RQXhg3Zs~4iWK-5*jjO9Im`r>{?=l}IA@w>CMDSu
z)8%Q=@Fm7bPUkfxB2tqum{AhmeircZ?z4JSZpi*pt1wfe1dSAy{{Jk%P@J*3bm{rr
z-_V(%utXYlj*2Aph%Kd*RKGpl3w`<`b&hx!9$I^d;b>WT6yy8GdY$5-uc?*|l*n*F
zaJ+f*RMQ2FWm3J3B#RbAwDFCVBo{)uUyzF`imlylp-@5?Zsd5U<IUTOc_y=%SI+-A
zP+Ag@{^n`ihUS#dPd`XsCm(X%DpW-??D|No%$Id8C9i`1fj_1oNsiB<sL+&<Y}Gwc
zApwUU?RV0&Yy?ldBlQ5yA*{o+xoWU2l9@jg&9F~1{(Ov{lXH#&HR^Ab5<I|fP5PUj
zg~ds;eY6WHP#kBMR5nJP@qcW+byQnj^eqZBK#)RkFYd0zo#KV!?%E)wxVyW%Tag4S
z?(R^EI}|H!#qH($y?4hO_l~>&%^Ar#<E-qx)?9PWmE|HSy(_0cqr+H6lT4!)w<Bju
zI3{;XlUkfoY^BU}c39#6oVpcuJM+Nx_5FQH5=Z=M;X+P5m>o(YR=L$Pde)*wS(K-9
zBJF<H^8;TV_u>H$>%r%wj&h?ON3(}VgocF*%5dZVT+gWjy|nvUap~NA3FAKAO7?wT
zLewm-f_Bt0(OWdVh}~`o3&$M`U;B1Df7ZMSbAXE_qX2?vdVR47SQprq^;piqcFc7<
zG|9V<pOY?Wrks6oyy<S8=`-cI<moaa$yS~IEX8T-HZ|(hyNwpnL_Ku{0OU@*&kZ#X
z+Wj>?{?b873XuMN<9#0>?tW7-t64sEmFGxAytv}JTt|elN=PCVwB(j1Iif$Sbvg5-
zNA9Kd{vX{HmA?Q2J~uiaao<e4gZ)yI-)-!46~?4svSOU|6OArgYzvgT?rdI{W|tz7
zqNnNF09~TUa6M5XC7i2G%<&-i4-ykNcP!T8%2pl`k)~YsdOHWF#bxtK_?lrWlOfz$
zz*YtG{Vtbv^jJrtk}^#+pth3s_EkSC-=nqtP8!vTPGBA|y-G`X&{OMU;@lsn?8-fD
zgoWmF--RcJ;+h8)Wgo(A<$=H<EzSL?ckC4Kdx|SlHJNf%cNP8@2zv??ch?1%XeW8E
z+b}se9OtF?>^sJ-mJdxgjok6JmCW96os;o0Q~5O_?AS>jO?rriy2l&t;Pt6k*RZw=
z4yUJ^*tk>C%j>BH4`aTs2?s&a0aoBOBfbpp^Qku0<FEVm?Cne@pG0>oxNP+WsYfw{
zY}!ZY8>pMit*={^_)c5g_|ika*{^<3e1E`r&St4IL^QwirDt5yCMFK?x-~OfxuQta
z6dirUz=0~M;FJp7z(mANTSr}{sDr*+*zl_XMW<wIFpJfv%--`vd$ZwcHQx1Kz8uA8
zo|&I0IcDanc2$i2!(^;Dd2?(oNDbDVE)r~PDSKO24D8&Z-yK(}*)uS6qckflR0Y$k
zMZXTUn8#8z?X*2UIf6fYI2pC5haMxbJjCf|!)<6AVV#_6HTkAK9U2z4s9BCN6Mt;#
zd|B8t4P~L2zQe)o$B*l85@9qcC3AD3%5-TwZiAkYEVSkvEWPfd>A>)i{I}-=nAhDj
z0uX@E<;5E-`@u6LGgFtfvNb1uday4>(`i?WFuQ+`5^OzXC;`G)C8??b$A#=zxn{2F
zrA>$1XPY9#mGUhRWjoAe+7z_1YH5<`?YFT?+cF;M@)*4Ntv2mlLFzm<-lvBi9ZT+6
zen|&wLu_@Mr(PfX#Y8thf;irPkqZ^JE;WD{?z2{D`=5~AA%$Ut8t7W03`eB2zPEH(
zn=oH!fuL(3_Fo!w5_LL2HF)a2J+r!wA8XNf-xlZRogZWB?Hi=dR6BbL8DTy3h`8U?
z=srCjk4|RK>z^+_L-K{Me!m8r*pCPlTRqqJH63&P-Jc}c^gXxV2OAc(km{}eZ2GMv
zZQ0ggHQ1)itJ2kRerbGyy;a%~cG8m7OR}mhav$fubyyq$BE&7ZxY^zhyAZ<TOP|+B
zt1--eB$Ej1pK7}=Ym;*o2DzRPQ?KZR3AlGN2ldKXf=H<f5fqqp@#?g7+*&pe#nCI#
z0af2qTQavG;xzLRt`9%HVv<2$&vwKJOiJfM&5WHloJ!Oj+ieey^c_h)D_6v#{j6Up
zVN$<ps+5amJYq(u?4Z(`Z!-uDo=?3L=P8wJD{G?4(S$OGnCskyoh*3B`2JFJxqBbW
zdXu<5d=!@akhy4#tv3d7uYu~!lTu5@CS{LFCJu#pJGA)u2y@P6WM!Hsep`)74XlQh
z%vWi9=|o85e|i?4Aw?{61LJT8&Cs?=u8Wnzqu!adKY-B-T|#snB#2FnokLB`Fv0_@
zCIo6|YFE|ch25LbG;z1sOVhs7WX|sF0Y;<2FFHcOKTx{;EPnX&duQflrG5b)uiQtK
zag{=GW#`6Agf%?xR-R_v^gt+T;H(81YQ)8Lv5l+tD0KFrk)x>C%PS@cgsnD@b#kLS
zau%Ywlcu2so=^H6J3E$_$b8RHr@N{gDu}_^V_I{e<@6GI{E2cUH&a5Io&u|G9#3xO
zy4OG774>fEox?4nK#bIzEYa}v6AK5XCL3LO3w^`G+mUCv@u(>ov}~cPx*HpMrkM?E
zs#MjwU%u#m@Nl*CzcU%i%-cqn2I^Hp7~9N!C#z|dC=l%p@M@sIuBNXA*8Hbas$lvy
zA?%2BT@r0gs2y8=BN`8`y`i=`31)XwTel;P5NkNfJLw3({7Tvt>!~K1-H+aYw!7Q3
zrnIXU+3-)p^qP`X(#%2N=0!&xyzZ6N4#r8%o2n`)-p1ZfA8*MQQymeUN(5WVe4M44
z9E8Q%y1p0dQidcbZ{y>+r-y1a4d`#w9x*bmx}9*Ou?8<xHu1H%R^esTOE_d$tv+p@
z^J7G4u;6cWaT^$61OyCy1<K%DQNc?_C~y-qXaCu^lZ1-v3a~ICSux~_=_o9w%A#8;
z;nsU9Yii1TyDRMNDvZfyj>(z;BJPf57aePBrll)G$EvVeF!maV1cM}8Ev?oJe#+rA
z)LY5OShhBv>+Bs~Gp_C_=81Cr!qvK~W-x9q61I^aNA;LeNwXX@3&h-<ZS(T{_jehu
zwWIre(>(_TBJU!g8B$eQfsUZ_niwDbZ3(QkP{z7F#wAQ)f`Ad@s^#L`QbtT+*8ly(
zGsOZVZfHkZnwpwgdTw(wf6@xp3V|_ln3$Ndva&XI&kuH-8w(pno&HF}9I&J_8o)?a
zXUE44_TE-yQ?y0J$Ioc(#$wA(ZY+s8){3$Ux`w)S4tr6j*?e;Y%$*#PYCzY*fHH!?
z%<J`<Ga--N-NWs=8QoddRM}$iie9t5L3f+0=izaM%j1odtdW>U$+b^&G5h+;gI;h!
ziCuNodY4nb>+#~JzH3W}aw+#y+y{Tp>8`Ub(N!btep~FH)m;wPS+@|9g~4%lW|~<l
zv(>38WQ<dx?<K`mLh0j^&LyUV_T%aHn=O9kPe%cJ=Wq}A_Y;xF9=7k$ny?i<pT&52
zmuB(8Q4WMiB_1H35>*1RjS?Ig(Z+SyXkuAO#ih09M1Pteh!~$em`~zJfC39rveFv*
zT<zKQ!}k%F(?NJ0MX6k6k*`p0+w!5%f7aKj3_m~{&Q@P=26w+V?AK@GfWIgi`)>Fw
zH8}@t5lyNEjCgB^urq55HE|!3DP`O;;ASu53XFIkl{b0@E;onJD_BjO&C5xLnHu=5
z;FIU0_`r`3TRUAKRu9sqPnZABd`L3o#QDNOxYTWZet)0w@#|N}@zK$LE%+8^n^mWz
z3YU#cWpPu!6x493G)_eue<XVKLnXf93Fl0&m*2fg((vO<XyF7N@<YiLcB0dI?328^
z0eB2(h&4SYiWM$&xsykpUHsfASQtos5}UKj!^761f<i)fFf)~hPv4-&Q2k~o1fmS?
zbU)uXJ9DVHw%CV>WB+qL89P+`4*?8SK{8leuQindgTaT)FjBM~U2^MEWAhF_%%F%l
z*7lfxDp^I;6Gc9gba=VWT-Mk3WuiGOJUrk3)ziGGb-20(+743#=|*3*w#m_8tH4?m
z2F)AUD2}U5u@Nu}symn|N_XUE;r}p)X1irIV)~Wg1Fp-?-#$J*P-&LgpZI*|6MOYL
zJ3AXOx?qv+*0EAk22HGX@q{^DN>yEc1N^+ahmnJ#h);cAp1F+rzcCPEJdCBp$4^sj
z@qeaD)M?q^R1s)0<e(ed`#x&%oyyXU0;KbspPxVx_Thc~vJ<KG5g#6RxmtxHsMc=P
z&FaL1Hd!8WLyOC+jZF`6<;pZ@kv!m!%f~!g5eMhW(XfDT?<@6=?f8yg&iFrbWp+H5
zzb4J-!P=k9mHHR0NEe7K<mCBPAE>VALc1dC<NsUJ)k-m`>AG@<`H~Tlmv;&zLF|<Z
z-0>yYY;PUu26KaXHeWDqTl+FS&aBkDirFFatW+?*iY_V?m)V&7|6KXHm;e35|Gx5>
ziH&=%F8`0!TJ(RvR4k$$c2o1Q=~$!Vl7HKv$8COXIREp7`Dz)SRNh;ATl{egD^KgL
z>pr4liJ=z?ir+teX%016z5O{F>^^9>+7&Vvh?8!!5*@9kVS9sbv?VO_VBorY{m|;r
z`4Q*VZ&8nZOx=IfV?9WW^-kB|zqlVhKrq-wk%oz^xi}NJohd*bI5<d8_x~+}Ko}on
z17@_bVKu$iGcjX>3x@^KZ73@(wKRL?RldF&H<mtw3EDzoy&KZ#K?UPPwT*B6@1e&=
zCoFPcLtR|~$Payo*1aOsOa`V8@zal5JjDO$(+K=}Gd=YnK2SBu!e21^4Bkb^omHPv
zF{GkGHwE_EF53{t>o=-V?+udg5z^wP|9lIr5?<us<>hsB!-GZOe?e$Nl?V%5-2sHi
zAC;9~vCs@$va++;4|b$AjE%b)cqxm9hK6Jqzrvt87<s0t#q!Yrk1(vRN1q%cynp*b
zZ4vg|@2%Y6!4?(+A8)TOTNyfJ=`xiB<D5@LL|YA}T-QIASyNC^jUAjCi2B1jy9cFk
z*&vosbXwryNG`=CkM9|{-;MzpS!Q|O2WKkQU}AAP^QAYFku2&(w+I8n)ZoXj*k**0
zQK^u)ma9+s72V&b79y0vq`lws45-W~(#Mx?LmzR1Y%}1;rRxa({}5>%x=^yTCa6US
z3=qS-h258M#UMuv`07%=d^S(H{;oVJ*)Y6(#byCc99x4!gN`F3lbx38leWtFF)~K3
z@G5VX(v&l2X@X{I9R%iDrqfwpB+PdzM+MBfZa_B!=B+x!5t^{d<(!_0e*1S{7suUS
zY~C<pF|k)?;|(4Un`hQf=S|JYRXhB*jZ=<6D&$wzzUXRjPv<3*lA_$mo4p9tZLWl{
zkh1YQ+^r<JGk1bLPX1-CBV)SX-5uClGcrjPSiWyO)^396H`Y_(o0}JLDWMl;>Tm-R
zGivKCtdP9ph2et^IvSNs0?NNS2GOmO1rwGG?rsJV#rSN9eml8OOfDCky0N%!^*nMU
z{<K<OrVQ4qaj-{0R_2sCV9xpT$D8&XM>3&6;Js32A@A{@mn%O7N{+ka(#E_5FZS*#
zmQKdOhNt~nM}!lRjzKpq8+1?*Et-{EXSvBC((qv0O{wzZig*HpYq7Z%r17H#$ymmf
zAh<?9Su)LSJZssN3=TG%)#`G@+-le`>6IuTK@InKdcwplw^VSEce?sTfSadbVNo)>
zJA`d(CA-eJA0z?H@>Zx<CH&=8KdHhm;q9;BIOOhGevygB7Sj-Fszv<CX=vnEH42XN
zS2H>!mBtSyR+IxL2wEJ327OUa_Xg1(Wcil5i@Q7AqM}n13XH2wsT%R84^CX^G*niG
z`U|xzT^$u)i6TyV4X1v=Qz6>OYAyBR)g8@C>PiE!0ah@I23%ML@FFb-?&rcr{Q^&H
zvh}p@`Aj~Uu;+@K!TDBe;q1DItdL-hnEPwzT^BDghVlReH)K0Rc*A^bne;7_@oVhR
z>8r;hsTE!tVTH*+we5-We%JNiX^ec)<mS`XzS*da*M73xr}$i-X%TmO{ms1QYq-{|
z55Bjh1a~Jy#EMHz?=}HdxJ7Yi8&c&{YxTazBJOMV16}#5U+(N62FoDF>bUuUgvsUU
zla-fU#df6Op)?-g-{r2?V0xjZ{NsmTosO(;V@??WBt>FJ)PKB-vBP7`d*5U?3t#IR
zq=R%9KM{}hryi#}nn1i`e9{wh!9_Tq`SF@6F+W9XS`U|UzuT8n!ww|E;wExVvL{0U
zBJ?1<iyf<s?EHRpe@E{B-GaqbXm$rxB{<@9{`s>1pWRi5wl4bvR+aZ8pXBWcfs~tU
zL)vM`L24cl->8C5NvK0X>O5?vDYjW~NV4C<KfkA+^;E(uB?KYVMKrp}$jHnA62|v_
zE<|cvg|4^HR{WbFq6=2;H<({roBIXA2#xs5<*jIS`<W@^B1)kww^KHJ2wMQ+J9r-(
z+^Y~-wo>27JQi8dIJNiP1icc6>`&+~lb!mlG_Hm$UF6_i+TR4+y6)3;g^6`1mTtI~
zrq1iI6kw?lr>@j{cTG_?&z*EtzqD~kyOHmwX1eyf`{si#x<YY^0ztL*O4G^u6)QMo
zSVAH4(v}pZs~uL!EO`lI>L7n}E7(chVQ+;-V{h~0GsGh@ZPXvCAqcaETtD6Ad%k+w
z5>D8q$w<Aw0XuZ2LQCZIwXxn&FBg}GC0F>2f^jj3nA6ig&V_9J{L1<vuf3-)KA%v?
z2a0;tB)Y)q)g@M8*tN@<7x?X$&b+~f4hgIna$rk;2Lb|ww_i21|NLv^Ty^7|Hp-c%
z?F~rzQGs4#tZB=HPzxIifHgQW_(@-JQm6J0sRZ1_az^HlhCZQeiR3DVevW<ITG*z~
zmIn5~wC?*!nLcjGMKoU9^>o#2O#0{`CyniJ)j95eaK?+N5mqJ6wRr7wHeyXFAwJdO
z{qy-%SZ30MC;5c*<n;^JDt?XL{92nl<70cg7+(VM*KW3rR3Q)qSYzDmH@iPsD1OnC
z2&2`##v{podd~=un5Yy}hOFvPJ<J{$3Hm}0)X6>l83-A&Vhh}VJue$k7g)8jvqkUC
z%rqY26n5nYTSZ@t36nYsurcTHFL;(UPbVZr%&pa#ocg?OjjD?TOMywwtI!wO_0#<K
zmblw7NHx@e5(>XhxR%*Dvs~XsM(+G>=X&Fc#Ymr|LO}&Y&zHRU?VhKnmRc;CpGBVM
zoK-s8^xL5nNoFWDlwc8I!P}h!BSQYoySG=<OWu(>6|LuQQh(!vFWI#|Y_7ii?JDf%
zkfA9|CfB+3wx6%kuhL(dv6eb4j~zJSHJ_vBw%-|xYN22#ElCjw$H*=0KafYUQI+Da
z6JB4v?f(EkB(y7h!Z$ZAI-i+EcFwHEm1Hx?-EVWyKJWSH|4)56^?B&X-K??-EO78z
z?4+0e=4`H_X#f4;RN2v){)9?i;qJy~U+G-c+4YoZmPu+xi4Fv0u<UaC@%E&WKZzcv
zuNO_kR86;A!eh3*>A9JuIa%i;nm>Jj93J*|i7e)GYA37nIp1Wr(V?pP`M&c`w@hAB
zQ!)ajcuG+UoI7a+a~M`KR&l3&J)KE5T&FHEH0JaRHl^z4`q;eav-_F~pFDfW!_yW_
zpO@0DzIeAG{!nhoU%lMO0^XKrB$&!{-Z6gdz1*1j=^H2#kJhUy$pafR=qx+F&+9}9
z4fSic{`usX|I`pTfh8}YoVls@v{_u3L2u%Fa%pT_&O#+$&`vCRTS(&HSXYL!wQ~?B
zdVgQzSW9J_>!W{4ucZmO1XMfs=lZ`+l+Qj$$cOU!f2Mb=d=*!8x%rKv)0vw)?RTV(
z-y^=9)p+UN<$R<3QGV8-Xx6aGCRY3Q#_wR1I~eMQtBg^OzV}_=ulZf0VRM(>XjRLk
zH+7%z;X~Q>>A2B8$L@9540nRx---rg6%2*NG#8?1wy^gV2n#Swwd>{a({P=71uony
z$9F!lSF=SNcGu-Y7DF7qNNoxwirJ<5u3HiJrUgyJySD{LB_(_*|Esr*129w9QD@IG
zBr&l0Qytr9v+v_HxTr*7+h{tXqM`&Sy*$+l;8xncv}}$P%Y)$|S(P@@w9FD%8^ugc
zlP^u#Xa5*n*>~R3Zo8gU`RW4|%{?L^5umjwc$-_|D97n9<n)fgrN@zqtefrc?{$3M
zjSSp29p@0HR)@P@Ve`DK50U?QJ}~;ay`46MWHouhfun<nuG8T9olOcA$QWV^-v9(i
z%dMQDl?w_!g)5iB*}&$M0VNX(=SG&6$X^xRW5MfQZ)Y8@Jv_L98OxS=tLc~NSad)M
zH6RB@{z=3~2u;QWdMz$fIuRkcP{XmwfqKoE)ncmTMG$d6t-75|P+Xoc;!lDpHOdi+
z$$;RFs?dZ~tz6vHv=7?9Dcq*qpGTUQ!$`hD<OHmKfYCuF34}yMo_^DuVK(|-uxer~
zuD&cf013-*t20yJ<RzthVCu>u=ZYEGxYGv1VH_obq-z5ZqkI$CJx?UMah6@}ofL?E
z_gmBs(J*%m6(>hea4>ameN^R#Lga?b3^%fAfh1IuyX{`b*Nd&m*5c+!g33eErMyR0
zB4D1gzA@?A>*8D1+jMtRYgxyZBn<R=za7CTRsJorzn3C%@a~ej@1g6Tw%<39RJbEX
zCuy=ZjV1mauIP30&X}L~?8Z7tD)xC-@hIA#PZaVX3s1~t?20Hukx_O(SAVty)%T2e
zBMo!uK2&CbIZjp#&+%eouY8c@`rD{D)h=<sm!+8Hv#%0r>PA3;2U2|oZAjj`ST&rc
zY-!1#IuW{)61jYH7G@F>MHAJW6$ZE;{p@nNOK7dvk@aXwK=3`;ID~Z>ItcLp(gXhs
z`t9vxsb-#zjz)Mqb=vHiwX_$J$o#x#!@i~)UhQ_L)xKeZ&DCcns!X@n2if71Lg-!N
zO_LUS=)@ZB?KQxPGXsPCsOITWs#sy>efA%?h=j@Zc+t&=C2Hi4xt<lK=a>lZ=tpne
z^9k^fa89Bsw{N{Ma@wD=57!+1?WzzX!uXO9pmdC*DFIB?@Z*5plP9^ZfW4g^=a~cw
z%21GG0!We;hra!@NXp2D&Uuav2da6L6b0oG)ko=I%`_|ybnfi*(Sb|uVe--IAtgOR
zPIVv`x)dXI`tiBQ)9yyx<J3luCE<i1sJgPUT@)9Z|BWp>a^d!V<T7x>Z~|JPA(%Y5
z8VAP2xH%tN#tS~wEUT0F&4L=x{2(VS8KKKs3;au_;u>2@ixps}b?YMIEFYZ#1D`Sr
z5FWzz%(rV<Lv?hm0Y9Y{3My#C^@?j8cmDQ#?nUj2*y|mzNY^~I{jsSCkWwfFWJI8t
zNPPFWAb9A70RxRwt_HM-4f-F9jWyd1$7q-7EZxG9ZsXC<LT`Dfr{|(203a0cfR103
z3aHpYG+3NM#r@Oe-1Ze*QblG^A`EynAT!8?G7Q<RGz>G|@?7btJ*5)#3qVD6P9X;$
zhICFOnC@h+D2Mc^FR<4pBsIsesLXY8#6w0<R$9TsHCGQA35$^n(W3AHm2%4MN`3m4
z%@Jiii?cXwr)7P8oksS(6z|86+{fE>YdhV4I|c<F11_D>JYgEmW97fz`5ym#_^$q<
zrJlDz*epB;6$|HB!ua%}X=`mMi<B08_vf!$x79xm_<g`OC#LL~ZvXeG55GmX>qAOE
za-1f_4i#dLB8TnRgUXp$pZ`24Fg!u5j3S4`95}A`{hJ(#7SkgAho`~T)bVlZ!Ycnf
zC6F4B764EtJ4ZmJK`i`*scgWHoq#Rrg_T@}E2EZxA4EsXn)GuHEDSH_sNG|%WQ6^$
zKQlb<n79Mtv}g}$SgZaHK8LV!nTmC`mzUO6xBYgnxNfcg6|;!(KIZnO%kkx5e~FNS
z$M`F=CEO2;)#jfr@`3*TMa6|<l?>EWq@ui!&&<~Dc^;Lv=lymvt$xvBU}IFJ{m0oF
zK+Xvp^TqG5PdEYC6zsQ}hhllwEZvXDRxteIBOwxFf`s%1fQ&K4bg0A}H7%(Zz|o;B
z%-$1cO(}U1$1)qoh-d&rDoK$9O8n*-_X;DMHjP@2U9@ws>(Z@wKuF|JHCj$06iWPj
zi<NlX*6L%_ayWO-yfcc2VpvSGYlmZG1_}g9Du_><P@(*lw@~vOra<85#NK%Ez~fjP
zs=*NLNyH0Ary7Zw63j`G2IP)@!X$06e)j0xT>RV{Sl6)+aVLL|%#BVZGcd5BBa@)E
z+{DS_;2kRg_8vTPN0gjbZ2pcCy5r{d&Ef=?A^=5OAgCPClbIJq%_tig5K#~lFu^Dx
zImtPIy##(ylRvFhD>EG^fzY5-P)2mP0pX%3Py!Sm38+OVvEfJ=<Fzzh(nHx>Y`{%1
zWaZN;7AhIKm~csD!l*QOeG+O&DA^^)ZBrxS{u^n9ss)XZP~Nt1wf=+fh4I-m6O+7Q
zRmQ1J+RxAPhnkMdg!Z;TZ$$&fd8IdF{Gbwpe?*j6zcZh?y}g^mretDhOjsVgR;&d>
z6HYaYRuyu@Je#^8Gw8&j<<|0!L%(eHrLENl8h&SKA9OO{GFXux5+9RHNlRKrYy8bd
z#$u$}v%ON^Kw(H=s>V|Yb)<sXfQl=r4~Yb+(s<OXPz|klD@6yx#r*lc$JG4ZYnFeW
zcOlu?4N{eRV0*Hzb3N7^*I~`#tc<FoLD=~~jB)4m-dxD*ysj|yQd)|Kb>L476OxDe
z%a*z|2}T}io3B2pbPYL4)W5rhymeLx!*m*?&)SKCirZkuy@+x7de-n@Q%?(cEPClS
z`f~F>*xFz)K%Nm1Wi(G5v6Kc*B7+0}QmdvU0w8`d!6}!JM0F5Efk*?drZC+)pvskt
zL)~urzqkO-(Ks4p=vq4PitX3RyntR{DkJq@Tk?sM7CRKK7&sJK@hA%G<5wPW3Ir5N
z6tfk9jtb8OC(={mAh?z|6o3^e{})nQdYv_9(8Mo0Z>fB@_aAX&TgA@vQ2Uj&Z5QDz
zdq4`HXvu2cuq1%2#TAs^D#(>b>$FV~&nhk=s4&A+CKJuQmcazYBewU+I6*^AT#gM0
zn}dTbO(8J6&U!+Xn4q>2M@`Ep11B#LMIla%$jwfOZFDQOzv0ss%VJgwq7H;ZH(%&e
zln?JuUbKqSF%6o-)s+Qe|9;w3thFHw8{aKyJr<HTOg~a8W%+3AKr1m6npLASW&UEj
z%2pCdLs_b!UY~(yg0INU=)#K`v&VFl97bd1zwuCJI%XF!luXo~kmh#iExNi;@sssf
zN3PLBk4RlLX8+XS?278EArVMIE=d6h6uAit!7WKa47OlGNoJN~1Txa7xZdBywCY5d
z31EM(Kx9GCaC#c?Z(@(tSearh_%6!4?;~UK%W}09f907cVxY(4+=CB`+q_P$)lw%b
z(~}b8QB)hCcWpz}&0-hM3c2}X7mQU{#sWtDOyisfxlrxHIH)hY6g5qKfCej~B_=8l
zoSd8$#-N5Yf+hfxPJbvAKPFiZ68um3Mw|q~?wj=kO4F2H*kzUc+oc4;bI5@_{m_!c
zL4fJ%PvSidJxpvap)B&u0~GL#-y{^HY@LcmSM6@8{BUR{yL^g*zHND`D2WYRmt;J)
zE#^=0u4B=T^Pni91SzOVvE6dT1xmn)&(8~taLM8K>v_s2qTOoeB?D0y>ADDlOLLTT
zTK|Fqc~kkDQnT39zw{uWB+%5%d!Y$3M!))B>ziPuXi&J<U#j<}$I}U$l`v$mNUHYQ
zWNqsL@w=aejRSbR^D};Sumz%}=G1r4Ba9YtX}d7f1c9C3+^?qeKZR2hP4i>BNEikH
z(Af0cK@=r4jMil+)K+b(dSQzF?MVufsD+-kpCvASPq%xur7!^{BP2fzM`!AKUnW`O
zd)RuM8D-I{2X|k}O|!P&ACOR(c&<mNs{I&@u9L~5|1-bTJcSfUBL~7#=9bqQJNEc-
z@i)y!+N)T2)@7Upr8hY%)x^M0;R#KXyq>|(khys`c4)OOdoyE^WEwiEimWzb9Vhm%
zo0z{bb1$wx8#QJl{B>|_oM|pbiCR>f2_UYZrp6ylpGgu`*76w#P|%a-J!;9Ben%3l
zE^ww8z0VL+5!7LICf}2Si)p%Dhfg>n^k|UuH5j)EzcAY3n<D~xWvmIKx}cnRP$Wet
z6aOFw9wN&o0t{rfMxj6{456UZ@u*KpLI?n5L{QR<%rc@f1>}HIQz#IeBn;HlyqK*^
z=^ApJW-=6we_XR*2}Ze6GsxuqE8Pe6p!SJ=q0F0^EB>ML83jSr8BK>rvnNZi#f#i<
z;7b5JLH-j$Tz+439^guQyCm<64THgjJSE#2B0>m-fa{zFc61X@Zlj?3LZ!L^FcL(?
zA&y`KBI@l+7E7g}iU>lRI`M#PHaLNTN~mgnNQR{rC7=dxA*$x_*(PBt7|Qir+0}TC
zF>_;JXRIScM|6DSQ&V7EMwBlkrJE*=O$8XL`p^F|yL!%x4NGYl(6gee^YAm8MGMC*
zv5=T-2cTa10pqw|<mn}++~VEhFygG-s%y}Y=LSS5i?%OM&ZvErc@s4!@xBIfp~Ckl
zKD7}otr)56Z3&Yn28|^Du*xXIF7&2w{dXb<mU59XKbibpOQRf}`7g}nGdi}KTwMf0
z8a{VeS``sxsOknJ^NNKT<qI%SlgZJWnl}Wyuc0T)r%j%LGK0?Cq^{?W;r%i*=8r$$
z2b+7^l=hFtZhm_|bwz>_8o7%PXQZv3Noss#<T@ZX>yFDF&OD^v4OROMFol3VV@ouh
z38R3_?*qw}&PcK$#h^@Nb+6}`AI372;1&IEF1s7q)W?f4!dEW0E(FRpI(pIKwMX${
zTP3s9m}0#G)`i4rr+_9X^V{6iZesChxlte`JIE!d*Hb_LxIne@N3@byMkddne7^*7
zYB9gxM9L<%d}Q!IAc~o|*OlNqit-Dq!%tTy|Mdjor)8vq<bqJ~b!dbqfkr7+MFD$c
z6+c?S=o1KD)|sV1076Lx$K}7v4N4R?_)cHx)J#?`qyUAeWZfD4t<hg1>{v^B?24sl
zZmsGCWd<#8kH2uS=6KQIO*Y^=0l&e;x{Cy)yKU=F!{2K1R~#_9(jA_<;o(cDiK8%)
zwU+e!)|tnx;x5fqy*wxhkjV?66(OJLgJs4NyPE!SL1@|DYVsa=!7{nps)t)js(DhZ
zlc3sxP}bT2#wmzd*smiZozx{G+!H#H8Y3VT+Yh^4gak>f$e<iKR7pdfo}irjCO8zz
z(xmTU%@mx9jA;rnB|-l@ZH3B%et1(nK=R4JP#Cw!u&R7n8mo8oDsSgS%%|{Lk-5su
zRGk156!a;1S?uYEx%-V-UyF@dTa9Hc=jb^8Lps8-_CT?zGo_Z!(`R(ie<u}pYYR=D
zuJaT9Phsl$Z)3;)kH`9}>Dv5f**j#^ofg6B^K&0fG+9z`qX@qAWQlnFnMXlM08BPP
zIW=e8zh!x1NXSv?0w1)$7K&q+Z2ltDDaF_6Vt=(Xo4RZhI>$YzEK9J`K`q6VSc00j
zXBvneAosT&QD7(0up4D52~cxH(5D+~HAUp8->i~Xww*(#-d<AYuDuy}ZZZ|y_w@PZ
z)?g-Tn_r$r4pHEvdG)ml&f-?<&CN$;^IaRzl>JAD+U+#KFCy-k8J`}@*%O^8O^02B
zkG0sIj*MfwTEJVzej~rs3BIt{X>9scQ7DMKpos`~xu95yyb&PLL<Y@BM3Qx%Oa9&2
zX#mIprwQTm4hotdVOpVj1{2N#iTZ?TWk&gci|412)jtU^VT&j9y&6b)1rIl>tpIT$
zrfc4r@*jYKHykho*P2%spF>*<S&aTX(`d7?ye_?a{^xb_=QNSJm{*{Kr%@Dv94fHp
z8!v3zavc66d2ZAv%XZp+kuuJh5^&pbnE_83!m@mAYLpP>EO3#6l>>q^=)un8CrQjh
z#K-&XDuK(2OA`U`a3GKb$pOy~DJUsZ;Tl+?8qnD>@CyJMGYH^6LEax)(Y{fJ<UP?T
z8(54S;_HNRxFX(sR-gsEt5!&kl1DK$_=^NiVDSj_vQ(5G%iu-ApY$;!z+~KN<ubmI
zQ~Ah(Ck#kDJ*v}SOKdWLDAUq!lm@+LWMu5K>J>|(%a@)emSKVkz457=4Syg2dJ;-X
z%I&bZP2+@4=NFt@+0N19t!D*fm7F|TpV?>;`+uundPoqMPH`JuthcJtD4h4W1n1LA
z$x-6Zd`=d=UI$zGD$<N9GV&|iCsl&hvdY+&<q{H*<g?C(RVWw%(UIDnM&ISxQ`t>e
zsrE@bTich?{%9jer;L?)eVR%~X8Qe*!b6J{LM2eX**%RH3!<^CK)&9iD$U?C1yL{#
z$N+U_>5S2_YA1B0QHg%kObU9n1@lTIFj$Ba(4H+y_Z=G|R+y!CBxsFE|7K2*!{aGv
z6&pgQs;W1Tw@gbXDccr&7~P4+N1+LUqfwsQrDE^v%K&m0inFQmv`U(ukpj3vTGiJc
z|Gy2%X!w(v8E1_~uRt&8TPIOD!IoUmnk2p?ZrbjKACv$w4UH`%KrKi?Nf!TWAg}W7
z)j+-3KNf*umQZ*bkZ0#VmhwwGEd`AH&CptnpIJ;Mh5R>VX7WVh?@&kJd;j?TKr4Z(
zqFK#GqY2>P!0Hpiw19snhhW;Xs{OGCK@NeAA`l(MJ^?ZKB?QuJE&nPB1ev_sUwf)8
z-3=r)!57Ow#^p%N8$sj>0FVX*TnwU{B;)I#2jJr6%Af=!%=Q;~<Le|-GY0aW_)KxI
zL<BwWc9upr+g#yw_0*>UPtuyG0)bN82)mXf|9qY(gq<ZRJK!A9b=UaRLbFta-oq=Q
zQx&FId&pE0I$9N3;!_#nsS$f@#L(`sjD#i>w1Q}2;F#o0KVYEZzzMigQ2-Iqs*BW^
zOKG;T4P=l4d(aU=LOQmFgyAJQy?78j=`*KnV17_Dhsa)RA&Szyp{gE!<E7g_I&_Py
zI2!0%grIJQO-d*|*G<%YS8adGH@XLTEcs1r3_+|CBYTFkpfsl4Y}KF?5Dvah-ZLN}
z!GfuRW;u>8g(ATUUnaUFa;MWT8Q$ua3MW&6P+|WnjuH_i>xaq=2NIj}hTFTcwWhMB
zQc>b~#P^IDXr`@p+T8tIAxV<h@->;0s}{oS-8rXB@(QTudU$5Lxa+Xid#ak8gr0Fo
zOsO0Y9<uDTuECUE9Mkd*J7_$NvIM)5P;*+}@9*d@Ga3N=1RBPdPkE!|&b{wyP&)$L
zOKiAGV7+sA@R(50h-wKyA_a<H@%K`BP<!+kGo9<=V`>ZN(Ewv%jk-IB_PJLRGx0Bx
z$lZa;>nN-1oonfIMi`2tn%AV<Y_}2zMMMeqiC3L;WCUR{CwWCdsj-rZn0cbOI3<Jh
zhRnjJrJObT#Vi~FRALT1xE8j03?d{UHgLKmo$#=;cKplk=tN*~=8J5s4T!>KQdeJY
z6uKW6W5QNl)Z)?|n?mp_gpyP{&QgY^rwL7}N5kY39cDUKQ`Beb3EgnJaC=r%Zy6fo
z?<F+y6=dOk3%fu0Q>|&YzG89cMx1O=ufH9D-Bid0ny$RQ56di2svo+1u1~j4+uhxD
zgX;H<2z`@K_-pq;VyExcm{u+U&eQfZdND9cY(#$__h7SOD<1eC^!#34P9Bk@GUy<V
zrG;YT&*uuX06ElYaZLqXpx_>4TNgyL<*|$|0a7=QWBG-kIJv|~NmUwk*|eY5GeUB@
zU)$67PbB8S5U6MfC~rek8<i!J`?H{5>&sdskwY|~UQ=+*p$db9S{!huTz3Z70}u>;
zHvM5;++ITI@B+XkNUsX6b&_dv=A8ByvwMTJIVe>F-Ma}yI{BS29J^6mIse6Oc~6_S
z5O9c_cD(uJoJ+lNY}6AmAN}GDpf?&`o~8Z0R9!ODf_i5!Wsdjx=Fh{+(oa-3jQCRo
z0}4#Q<#w~VC3QXDZEg=v{JTW7RNb-NJsL_h94t5!xu6Tu$*6Y0jb~FkkFDKpL(RtV
zQniCAQiWK|VWwlPpN{d1Jm|N%Vs2ZZZDsTJ;HIxg@NuUd&nNE8JF(0Qr?u5v!pTGk
zF1Cu%*a(10j!=3N-B5}%n#hjSENf^%%D=u1g$dfR+KU+XsE1bziP0dYCM=clfG@bU
z|6&40(dv4Vh-Gr|Ld4~TlnKsIlpA}@fRw_F%h^cSyCJeA8iVr{1a-Q~?shpw@JNdC
z9;3Y#7ERUdAFCQQzFJvW*Zg~kYD3SUK;)dwGk-a(?riLAVzKaI{cEu(Cp&%rh<7=3
z6tVNSpRC{7Ylz#koQqhev$~^|?P&Dl4&&3aIG0Jq&=FU<4k(rm*64WehFL;IS<FRT
zOJ*;JEvdk$z*<`Zf<r6f7TL-5jkFLXkA@|iP$&~--XW2gn_EjGWz%=vR(j0$v6gP+
zr&ZD6i;V-RAb6>N@fH|4ABdHACDZg)VQ^D*pOedm6J{>)$g%^And>c`0mO0bSHh@(
zwKq-~WwB{A+^Q>PF45WF56%V-m2oljr#9jWRE&GV_&NaC`4{hkeuG}hqRyw4Zf_Tz
z9X7T+bvd=&wLP~m-}t100lOkNO>f<xu88szT2pNzO=QUN*VU=Jn>!4xyQ+Gbme%Wj
z6UNe%P|=;|F5hM8BBz}p6VKLWUg5Q6fp32-CXY1nU^=al%9tMPP{VC7T#zcr3%?za
zk<uou)2~K~p~NCHX)2@)(djN^gBGt=Ix?4Yx-)*)St7_8bLKJYXAi=a!`ZLD>fruW
z=MfZII648WZ^H*nQb6{`Sh!zK4j1!MWF*>r5dGzz2>{LSR&ue1W=MtAb(UpXD~GSt
z=lnHb5RHUVZ#I;h<mk6I#{P6(IPSOQA61M`TQ`}r*7e@;-vz6_GEq05@DLxn@Z}%>
zRT6n&TJ;2=V^gUbgXb3y^ZeSY2qq+d{8|@!2VN;~z10b)*``%YlT_Mo!#WB5Lb=e}
zw$kJ4PTZ(DWkb~oc0aq<{X!JE=1n4>>GjHMExe8B2V8IJudY(=tY5^b9dR`$Z{w})
z%1xPYzoF^6<c4xlj{K-WG4AH|hw6t&_PU-fL#HO#Z^N>PowFKViYJJ5S7F`fPRsUX
zor4(K`Wo_or0_I}C6UjP+`ov4KDO8!ep~%SgvuXGU6F1yO(vwc*NeDP<C&dv5()ts
zeZw%Ub>TonfV&pvg5@SK!lMC?bf{}hx}vuvH`8BUS_ex>>{di%QiNS;kUKjXB`o6!
zq4=#3`TH+psFFRw5t4Nn!9S~wygc?nQhj?3VT}o_W|y{g7QB$sWkx&*oe!B+$8x;b
ztBS|;O9D0&O={a5b8C~lFTFeRc=fU({?DMH$0ZR3MoFzbD{Ks?&l|nR4MoA_miOh>
zld5`X$NPo*d3R;}ZmKvHfAmaJHTeN%N?P=H=YGs*Z@d9LSkD`>@D#ZcEU1j8L@jst
z&x2yysqbi!jAuvw<Oy9qbw@oC@7i<hEMotF_7(PfoTLVYE-t>|lB{fVMO;vB1!iB0
zb%j9-Lt~j}Z-;i36~tkq5~(OVmMoLC^|9s5TnetAj9-i8eiBQo5w{w66<lxKwu|0&
zsrbaLuPt`nZ{L#Lz0KsmX`lcD&0$xH89$E%=RdA$9ycj+ZWg5i+Xxd;0RRB56b_9i
z<-Bk(9d_Q4LXl1V!QOJV!*347hg|^@1qnoT7N#<{OdCrxs$Uax3k>+6chSJ#th;9K
zl(OnAEelosb%B43R8d53(t4$X0PrOx*nLO_v<e1}p#ivPpjrUucS$SpvK<dBSu3pC
zjC2NL-I=Sfl9CsC<kvGH<n9M+$JLhlD59MT*vb2M$=m0mfqYXwgx>i0aoq77T>Z0h
z3^Kbus`~3b*1fvhTt69?hd=A#w^TS39KYF8XJqpaqwMu?J2<{`p|g$Bj!__M4DD){
zNlsp4?dAqst}@|r?#wy9^S7;B;-t_4QiFZ0rSW62(6D+dyO@v7aMp^7?>A@N5gL=}
z%RsTmb^@_GlEoV1FSLMIx_bWyhzI~9qrRA(sA*#QZD<NoRi(cCywg4<+F4#MY=%Mm
zz1i-EVK<du)5lBXx7uUJ*P7$(<1vGD|L4BLn&*Ey1K5pty@?i|L9KcOXG^5YpFpmd
zbZ@7Z#bTSjU7d_sV}jO?zt7t|9~?Wqi+7H+F>an;`iC9#^m+3phN$)v8+ID;0ym$2
z?dKjJ5fG|q;j_@nGwumOj|Ckkl9sM7ZbFvxsKfJLp1R*I#GcPBFZtnHEv|O2r^y~K
zRnDHC4?<JWR<h`UPOb7kpbUV-0yEIv?^r5!6g$&#@1wF<Rhl0Kf6}*Z2x-|u%xv=x
zlep7jin_d9M?oeNWE{>;?_7-p5(-oclz?9%ZiX26haL@Lf$-CZ@eHYxzV~AO53~6m
zt8e%JkmayTeD;@L$X_P3U-#agZdi}AY+r4SZ3AbGF0LZ1Pkya5Ro-<ux15a@zCE~x
zAYM5P3Y|fH4t!sX-?Uy^JHM(czLfjTezmn)yy4^GzT2F?yEAT7$)_({f_zNrzKJ_~
zyP>`NMWl;U_1G<j)oF@cVluqvP1<uUUub*ggbmm?F*o1Ie+`UDOH8rIni6St_uWT3
zi`-bs{TRLoJ#1XR4EO(BhW-9>Ma54e!wR|+V`6ohw&~(KIQcQjG7(fVckfDq{Cdmm
z@TOzpslHAxR^}{_K*2a{yxD*7bS(NXlUSbTe4GERp{Wttc&pw`WMjV&_s+`qZJmp_
z5g8eACPyBax&7^5uloRbi)>n|?Wsb)%ZHZobLO`!uV>mQq=Wk<Tnow}bZ^nWI78ZS
zlr#l|bf#x^O#g4=6UBr1RI&t5f`LDF+GUYRH_7{K{`+^@-9x<4EnhVyW2#99BHo=0
zqMC$`+S%FNhXc_ht7q68-$g{ydF=b8@QXnAKRzE{53@b)WuD99x89xiA#a8{ioMtF
zcisMM$Hx6L>{t@qGxVs{aqgEge9@o($I(Avb>;FB6OXOQz2vk&IBJLfVz66A6VY!^
zWU1o=xhHbh!#i;)@IVp23t-a&m=>t=ipmG=zrFF;+Y<n=ya!xtt$xvt9nS9)^T7m@
zT-4nab>TyicoK#-eUXkAw<4`C1|5spLf&qXTW&t(hz55*Xm6>Zvz4tOQkIpU-DG8f
zaDm!OalQvaeht3;lcT#A(PB?S(;ps<o4ZbX_fxYsA4uL#4>n)M^PuL+tfz2sr`}~<
zg%JDGIB|vnudW4-7f7qajs=)eu2D@XK96G<MYx=k{b+7(`8VQ$Hi5uop6*7DHOyXK
zu{$v#DTE>cJBqHaZ+i2B{<9>yxP0M29~B)7kwTO`7_80b-sw|oJv+t(3LD}opNc&-
z(4N&G5%BT)ThrZe4p}`SZ}F6|xBM%;IMVFzc5>d;-3?(}S<v|$qAJeB2;AjAgGzOZ
zc|NQSpX+)a#956ec*os{!8QV=soO3m#ylUsoDtt$`>N)q#TYv0_>QH^b$S&te%_W2
zQLmk_mEiKQ@S*ydgu}0Xd#dMCH;t#Pcna9T!?ROl;IEsW$592Pw;#ywhs<TkJr7;L
zQW_wb`LEE~Bnj9Z3Y&?Cx~Kt|zNJeWW>s<G7fZMw4v)7p)_j=KUrAe(ZGXfO^W6To
z4IOx19Co_tpU(B$lY2YwvhJU{xyh`48BruN_OIUwgE7+dnNo3iEr-U!{`!8jXF|(H
z{*3Xu6OZA4d})5Hw=+}QNO2erFD_?FID{RUdMzbAx!SY{)mv}$cHbF_{msE;ZZ1Ve
zHtu%z!|rQaMo?OHlp{vyrB@Rh7%HJUb%`ISNPZ1IRWR&lvH0u?kw{QTY`)nQuzX?*
zSp>Lh)@}M<Kt;mar|#nU7PNyNS{LIsUHey*tFo&D0;5O!j|FzprGu!rr1!TtpBA>a
z#<ad*InCNK#{P26zV>~|6bVms`uf&SU%fY3Jl)B23nBla`{Sw?GsS)#_iY~Y_VtnA
z7oizsIlDJrpx50YvUBgz&V70W(=Tb&UD)T%9`A15P;OQf5{DjO$)6(X>-CSk3$-Zf
zu0y2WRekp?`;*b@;%;<J$m;4lk`;B8?vps03B2)%$F|+G&fCOUt>f<;8Z3er7b>&y
zB2{T0HT<>=EkB-(xVgvsoQ<D+omS<$TshlCYd^X6t*j3+Je5(|f)+--T|EWju|}?Y
zf9krXPGInVdpAS2s61!0@FAGnZEFE?>ArDL_WQ;7rFrf8i3(Fx^=89eqw65><g8yX
zS=Z#OZ`>19Bj3CJAKlvWOy+8$`Cw_GYfe|wb~#T-Aa!ZiNaEjCSG#ym{D=O2F#yp2
zhK!Uy#)DK}zo)^TnV?G#HCwwFg7}27-Th4R3F+b&^?emP9u=-<I7;9Y+w<h&MW2K4
zKcLZSyupX;MK~!qI7wOht_w}!ZW0yIZ~|i)w)n8hhb!9}`=K^BQh~T>zjxN{7fa6f
z>EtFJ0O^2O*5f*R?zX9&_%|VJ?{9yIo_C!%YRftYUeCBLZcVM`%?#+fZ-$!BJ8n8-
zAyu#v3%u##yu>3L=Jl@7WHsy5y!c&i`7K&xVqqXOuK!g3yt2Zvb#29<AWzZb`8QWF
z-(V>#UOmW2*l47rG`88FGNQJ+%uWhCxJWKmN)%T-eOEs9zHB938;SMiv93!;b-mSh
zd8nx}E9zG)3CV{0L8ZE3h4INbYqdS97-!bRsSxKkU+-rq*|;`)m5vd4r>El1g68}g
z>8u(3>6)kEtiC&&@w`TtrRGhXYa>OmHwDrA?5Zp#q~QiwQDr!JSp710EK-%te8ki*
znNZa#Y}R!imkRr9r7y~Edih>A@$!1dW4hhYMMMxCr>Advn;$D@6KnAd1f`kf<*1yA
z3H)g6dTA>=1%tD{QVA_AtT%Rg`WcH+jqT@lzikw*@_p>EbT;=DUR%)cdup;|u6#E<
zjd5U40I=$MV}+2cRHn_Rt-Be{<}UIIceb}ziM5KOvB1H>z$ZtAXln>_R@TvCjo8Jy
z|6<c5^uzfn3BOal-?|qysZob;=px~(<%8t`F&G{IQxWB;+h$y_yvO5=pqc5@S`^i$
zF;d^t(f#%mmR4>g<Xu@s9yH~S2_r%K4TtS1yQUJ%XGJC`Sw22KJw73y_;hjfF=dJ^
zr=AKD*yyr>nG(mcg;plpvL<G5ZX?-_)86;qrY<`leCm3JJZGM*9S?_fU#>s*m{`mT
zUN@zVq~3pBa@ZX@t@b>zo~9x9;@h14c)Pf%a4G_)J>4}=FAXhszW{H37Q57Y99_h2
zcRRLQ_-YHkHqlTcxU<tc&upoj^E5=I|556A;*i^67zmd{RcF2V*Vsp7aVGb1<C=Q&
zjS;>u_SjN}5MYxl=;3kU|LS~HXS^Gqmn+=3fMB(JYu|ddkr@7A@=P#i-PPu=;#(3q
zah^YYQoDMheYT~p#lMP!BxCQnGeY8rHDik@x6GTyz{%5-(`#1FSMC!Sr)j<)4o5~^
z%FKr&o%A4j+1r!)TOcn(JZ17=cQ@P|9#zHbUi!u1^q*F9=f{k9;s`gqzlh7Hao=@n
z{}Ev4(2685qKbTH9mpkaYJ%&DoG5oG@`)-CN=X5b{v{Rw8@R^)_o;eO-dqYtZtJf+
zzqvRZu-G5a^p1)mG`p#X2Mc41JAWXbgWJ9Ee{lhxQAD3S?ex$VepKYOPsju{#z`|P
zGmj8UQ<27MFVsxzahc5V{j_g=@Xam9q#0Hh$sL}`-Do$vX&O6f(;e&`teC(i@0M2m
zQkHpaD1p|%l8yRKO^$NYqQSvRx_=)Q-sZ&>Qsv=SaiXpcJT{f{lX#IXug~K35X!a=
zQPd!-v>;f3p6cjt2<43Jum9fl%*>hnm}4|U2!R5*deEgJ3rhF9BHJp3O)}NENfflC
z5ttb2J?U*KQpd;4DX%RKYIlzM$)t8X-R%`+c<i`>iQpAKQPXk%+^<h)?SP(q(`L$T
zi0qlz!{R38*+QgXf63bFol#O3p3~5`%KSag0+58qQ@vI`0&K5|WB2sf>_-~rZh9XL
z+~o<@9h+cs$J1RORx49rYvA+2gb1V@dqb&C4A)9`OMR0S8I*{fy7)R1c7Wi?)n@u(
zoCAyxb5L%VTcxu0W(VbVo*3pm%72%ax(aF!|7M5%+zQM$TYI5$1vx+denLy2p%js|
zo;JB3ThY7H1)<NXo<dxk?QGZlPjZqQ@5qJj#tN_XSJDS}ED0K;KS@jVn-9m6!A5(_
zapkgEG-8#=4snOaCnjx^G!^;N_>8|F$k}lIB{EC0)|MA=vdrdfOdrY`h3FjLl|xQK
zJ<FpF*>Y;vkp+>0b&I(L(}cXD-X*aFD}Pr%Z2c-nL7)0v$h(3IsMyk9Ea(`m3KuXd
zl(g2qSsjPW*Zl4Uc6j|n4sVc+Z#ea85cK|kXgbTVxSC*F4-UcI-GfVTm*DR1?oMzB
z0Rjo`8VK(0?he7-ox$DD{?55~e$F$~d+&Lst5;QZuh$q&HnyOcInzHNMzKJhj!ttv
zq-VuraF8dVpcYhu!ELIS(Nsk~JN$6BLOZ3(DWW3hg;c$(Z~VAGnfv9;7<xr_*XVWq
z5jUXjx+~x-9W|reye8$#*U*?-19*R6>v<M~)!r1k1K8WAy2$bmpK{wwXM8Cv>q=QJ
z7ANI0YA6VVG34U{wwF1Y(~;tYi(}RQtzUU$EUgAvgX7m;yp%O%2_X*3#|>!jA9Hgw
zYO#hl&Yul}g;pj&3B;OEV3qG5bs0MQ;Z8SxMA$$4sER_5xv(y9g`65$vXZKzQCIH2
zs}TMmFzfMbtaNx~|GDP9mH+kbgp)INdK#P>oz0MbQ6R|MAFFBzLAXj({|oqE>#gpx
zfcZ(IQl+L)jJhft8`%X|(@BUX!oGj)PSl5ofu)w@G(Wgjob;5XwT~|2;2mP0`<UAB
z4PRE#Kkjq#c$jc;MM=ls&-@W8fkuYLKAdW7Kknr4YQk!%(^t8egRlW3v|wl!FnCV@
zW%-k9$u2~M3IYQkAy$O$P#~VBD5JG(^6CAk)9ajy980A7M(9Ss&B<#Wus}OGS98j`
zA>j|qayuk??p~YiqBYR_X5#!h{o1gaahlP(;N(PNgkQ-J<0Hsda{D{=9UDq020~6&
zCJo0D`4RlRr#^r2Z<`OhX1L>Dv>jtLdGFu;3I@b3vw$CZULGADLM!n~PTMMF=;b%0
zCMml!X%c393&M}|1J{<$;<VgzSM0fY4TYSVZ^w2|8X9oiP9&}eYtAZHxTl0X5u%yH
zJaI^ce}AA+Bp<b2&dUX6dR3sK1t`_O+~SFS|0))5|N9FfMAI0i<ZYC_BaS)ya*I_=
zhb}p%0qUcFOAGUqh-tw`8+m_h?W+!^t(N0b!?(lWyVa8$%8dk?IpW~?qSJH}ykrZ{
z4^4PWS-*DXFJ6k)ljD=&*$>E7bp#ohPF2vHS~3Wsg)4la+peF!)05ze9mTPZ+dqd%
zu!__`XC#|3Kj18#9rn*$c{P4Jv!}}4xd^~Z`_jknn8^1*Sea=(G#WFpZzrk9S64j!
z2SKkVF5R?Tx>=us2wbT->r)2*dFI-+dp!9^^Re{5*4E#V5Y*MvTl6eHa$lrS4qMaZ
zTl?nTp~}^r&|`PMi$dC=eaGvM>q$0#uJ@9>I9#d;sI(w9`u5%VrC!L!<>f6dh`2DB
znkO)#6txT;id4fGV5fyv=H?^4WPp&#u=XigLhfEnru=@FAJ&YvTL@TR1OX=v#%7EZ
zJQvrd{-KD5Tbz0zmmke9!IGn)c#^7oKJ=jsYUGOTZJl^r_NPPS4X-U|j+RGAekodi
z#*&TUQk;5n80~&9J@%QnEV*dEUJ9FcB3f(mI9ggRND@D0U90jT)i#d~C-JAs%p0Qm
z^mA5gdv@xXH6lSCJy<B|Uf?@V)S()YqeJ%6f}s~r%Rb7D{xXWiJm-W^C(mmR3B%Ij
z(D|>K-mRt9SEe>St%D~o(qCCz1v7_V{75JXZtI6sYeIj)W*rpB&rhclX(sL7G`C7Y
z_D$afZ4xW#sH*UletB_+d(F(v05g;ASP%2(eSXOL0+(FHTkYYrT0LrQ1N0$R)fJ84
zzFMu#`7={yz+-_B&%d09Ui$p<R;5u$>cj;_h{HXzxzTNHQ%&F*uv?j?4OvuISW<1>
z;I-03%-VF|gCd$c#yAraaqoy}p3HDDS8&nAQnw+}zCS%#U=CjRJ#+|?2F|s*4IR1R
zHh&N9n<zh5chz5cl<eu`s67-vKD+P6ctfxEbew$;X{jS2`mT~@L~%(j;UIL;NMr=B
zAdk{_di5!bQ{nrG%S<|JdMg2`-$nMyWkhQ)dci<}kvhL`Ze8wn!-bzz>l)}*nu^nG
zFjS72nS}2)%5m{$=r1G$H=d&mp3>`>K!3C2$ln(g8v!fzCP@}gJVbR>wC)C8Ee5Lh
zNPcek3ymEk4*VgrG~t~OU$m$&j5-!prs2D^xVWLp55MYsZli__5Cnksd!115o-8Qr
z?(-Wap<=y<4Y+@yEvnEhvNV-~vCr$Ynt#nL7#zHR--vYG4RMnm?H^PqrG{0~y-6AV
zDMZfx`6kpP)<#8-_WGJ5X?fmrwk53ao&-zGnglh(@cfr(P#t`8FbS@RbYl}@Q_=qM
zz=%i2kFg+!ScdqC3V33ieV<_g`t`wg5$evz`593pQ#2z!0OX8`iJt45_6NX)TM~LX
zfhRKEryhL%kKAs}M<#+~jmC#Z7B)@JiAMQi07}8t<qK2bfh%}_=ZS(hQOr?zW^p)s
zNKcz7dPAPm34*o%YRBJB!kCI+54RHJsek9O_DuO}SW*CjUPTwa9^}v+ppKJYgat82
z3e6Fw4#oTjW+}2tL5cQkw>&_JyXSo7moN-FCw$S=_<Lz>rOJIyw{M$CI-VxtW00jW
z?PtVom+$!S#&<rcIM;bR&L9R{vwcGC255rm1z0pRC@u(>pkOXz%vfluYN(~Tsh5cV
zhi*47c{Pvqp9e#9Be{;U*HEhJXtQymjq2TOm*up7^yVk91O+Kzx})ZnZ&THVVY)2%
z@=3-*4=5{s@C{L;VY0cT`wox8&h`~t7(HXnu1HdpWzf5k&xqGd70V^4>GCD8)!i&J
z7oJvQdKL2j1}&e)a4e=lXri@|p-@ev+E^L|LrAf(MLc&!J+G;>(r`|6UV#ly-iG9J
zeoFB8!55{Z;Y(m$`G#ri*>9}AU;DO+1w2$o5>cSQ3Ba_xpWOjg)?R3JXkxM}tyK;a
zjgxtqLAi)K^2p$uBT3%(iMCY&y`B3Xa#R*_R-dZj3}fKFhKbb%PNw<TC!pY|WSx2t
z%4ve1&GqCABsno|(bzbkCG=9h{o%Dd<hFN|0~cT58SXrM(;pva?7r_dvvOB{76}cB
z2(%LSn+l#xEa4=NXf6=^m#wS_Rh$E`+v9OT<M)&5oJYi}1SF~$^ejk&2{Wf$NCsw>
za;|{a!(u3n#Av=!BbR7FSYlYxNoW7$(JZF|yrs3+5<L%i?9~2W7H8rlh#QFuX@WIc
zi$J!~NLs!>fA|b_54A+b!^1(lw%^(JQ#f+ng=+4x*zB3-Cx!6o2s|{gSqRZ%f>dzl
z`-%L?tXX_<Q4U#+|9qxg${QwyjJ$jY{^z2y*{q3<JiQpHU5HU1SCpM$<E|T2lO8{q
zXU(M`G_htu(cGQ|fJmqY4=<ON!`anTVIrsIyNf%w6aY9&7}n$+pEace+iv!s1{5;5
zGk*5tt`}n>+@DT-ljyU+qb!Y5OV)iIq*dyyJ=poCMnpKKxQP@5Vc;c%2q7L`7##PF
zb|tJf*UmA8-#_SfR#n8Kf0wihj@;)SKYew!#RnO;Ic`^>e0?~msa>8_>nj|ewO+={
z+RsYq&?MP%-6;r4j(rpB65_Ty>3%_b_=`vtwO@Kng0!D<@9J@gGJ0x0aj;z3j~U<|
zbQbn65TA$meLU7sX&kzZ@tk;+lHVt$xU|Xq&a(CWYxV5SgqL`gs;@C6#uKAF$*u~1
zS)W#A;3}1v(NH&^Q=h))T3IQLDaB$=xv{=j?{ycC%UP;5cz~1WPVqi|d$c1R$o8FD
zsnvTFSlEE2(bUtOJD^dOh}=7XZ79mZQqfX%-1xh)lc<4C23|zsPi6;wjn9Q=cgXrn
zO5rdK^}nmH6)ZzF^s1W`aer*2Lt_NBeGE*7h{?V(C|TW@#<Cy+A_VBghN`Mk_8`&w
zruG2a-0g(5pC`*=CSL|)<jSO~3~RhkTOSl(Ry4M)w{HTUVlqRnkBb61h<>_%4DJV8
zvg+OIN8x+gdkdezt#Khs)w0A)MRQBV9Do+O8Iz>zL_u_?ZY!4RBIM`oxTvoLDVeA-
zgl+dbiTFGgIGz^+$!|fy2VB1hf^*g*nO0fLaSpSP0m`_(_mh9-4O^jNx06%%sZ(wq
z5#kgCn52=cpzlvNw7$sas60rrS#a)zl{Og+D^>w^!}IB%!3l3)-bo(p2csD3G3D5p
zx<d+XIK4&)Si@KTgPdSB%k3JytKPDs$nC@g=Na|(doIU!80J@k$<*&DMnkiK1E`gQ
z`7Yk}=$_y~)WFb8_{w(@<%IJzcesSm;qiDRG&Y1$$CLsQXc^;b3Tc=l05?7a9|w&F
zx!qecJN51OC%-FV*xawoxOmX+R%Hc?@KXi?EfXGx#YQ%<o#D3=ZZ|;L#^xq$8eS_)
z+4a#M8sbl|)T-kYEl)agz+Z}>Ma0QsYRlW~$iP9Vq5}jtDBX81C<w_<L8i(5AS_<W
zggC`L8w{kl0sfUPOF9JltGU@~`AiQSB&aE&s65k50Yn7_?{nVq^dE7QXGRFVB)!t+
z^W+c-p!LpO?b*<&eJ*>95O}dB8*vCh{@4n_{6EhvQa&MnYA-k~3$%6Dq*i{XEysqJ
zpA3`ZMJ}mE>cIER4q|OS4-ZMyJke_GOSTu%|DaLKQh(oRYa<CT5vQ=2PnOau=XajU
zL$*?25wNkB8C6$BXNwq4{7mGmM5uAw`|@(`YkBsgXz@v)a#3jMZDHueAdDs{KAvvf
zh@eh&v`{#os)T+fR2^D`zLrWx68lFFo4Bjc+ER1fNG4zHaIHF@M64Rl+cOKPLBM2$
z_3+#M$@8Js+(%BGZK<3%z1p&VBt^U|+xJv#Og5|<sxJZFq8g%GKeZaE6nIN%E)tj&
z0`*#{kN!n0#pgd8{GdTc_6_jfehfF<h_By!a<dg+-p!i7L+z~JAHt0hw6RCP)DOF#
z-tizVnoq)kHZ<(?y=wDi&eYS(#I4S^wf@cG<EH1h<n^AJ9=7mkWVFoQ$Q-!v7$3v+
zrL)uZqTnVBnf`aI`go>Fvdb^0K2q-k-aenM&T<~VbH52MZQe%VzJhQv8~XV($$qiK
zXaa1;&$b(l{*O9VVV^!FCjM+}{1Szm5U<Akrdi%{oFEx>ZFl-L``f|!JgI~CRYA>R
zjuxlA_evh@NL4Im@|$5Y5(Gq8n{Q0*=57d##Qp!_#v{@i&ZCWCWoSn5jf71Pg7e~H
zbZ%VQvk5-NruU^GZ=qm@S%kwS<*(G&*eeo~+<nF0d~qN_p|D-TTdw#k=oXB;iqjxr
zG6%NN5Xe5f%=`xsx~Qz=BsC^v12zoi*F@nua6%epDb4Rvs<D$_)VNG~9Ra8Bf$iYi
zHR-LGnD*@2Ibp`k<+YW&j3GmQulpsKqUf>IhcM;w^3&GUGsa3ZW47foP9o=qH-DRR
z$wS}IV>^W0UjDVB&j!8ID<7?lt{d%_p;wZ^9feN~i(g`BKQcMipT5E6_;Ix~=wLm8
zEzl7f^>xSO#>8k9#tgif#d`M6*SC+=#4(4&XR&G|Cxb!NZ_OSgUQ}s4RLh2_=7{+a
zj1|#J3$QfNZn!x0olOGoS2PusztXTd7_jlU3y-pMJQzQ^HDYtPFOZVt8#?a`WeVXy
zh_H;NI7k#D_oW~`V;HU38x~x{=ISj{W(Rtfu)_UG!2yHVoHad`G)6hTvYcf!5t-)g
zC+bcWN>|)WsGO>sgof;$yjQowU~i)nL%XT>-c3Ix?+&>0_XJ~==gp>cIKp(2SJKTi
zj!#oY)31Z8XMw&d&;kT;kenhI%lPPRxKIhds3;E@yV)NS7Huv!@cPMMua^H0kPXAZ
z!5}H^D?ooPYRo&Sdkh&5)_|?ek(Z5|>@TNZ{YztpBc89?Zgu$Vl`O#N_#D!{vjXhW
z08YSn>=p$o=(_If1+r9S@z`Fs1Ng1|;C<NR@j_QWDv`OsPV?|`V;YN;Ba?qWe~qce
zAfLoSYg%e4IWa1_88Q3SV=29&PVy3J`4Of^t7Gtc?renT64bE$v~Z%J{Uz50LD9c!
z?I1-5T>$#?2Flj|r3_zJv^$r^ssLV&a>i4I#9%a`mriQ*k&g{cthP@u64gEKz=$Q2
zO_`EZseg!)!Hh^d`nA0+NypTaf0Zf(ll<erk_rO^g`*r|SJg`yI5^Gsx~B^t>`j!>
zxRr(r8l1M3CkgNHy>iDe3p0DBYJWlJ`B6iuY-3f^#BKn-WlBhxUflST>bviwW<INN
zWW7dFpbo7NjFq@^uudqw{1>&gQe#emYj?uTQvVQnj}C^06TI1QyZkZAQbH^gaDL8I
zwi$5T)2iZ@_YFH^p}oUl;uO=;0Bw?qVl=y)eFtga(v~YKjl!xqmIA_ZiA1{`t7f7E
z+5Vmogw9aV4fX>?5XCRzTaK^O+i2h6{gya7=^aIllQn&IhYwy%F&XEPPYrtZVf+ST
zVNqP-f`iv4Ge)AU*fA50>(-{9Uby`Jd3yHLB}jF~vlx6oU<|vwdoT-Mp6!l+uHyMi
zzmR%-h^;3ih(fc`4wFZChu>FrBpwwtx@2tDUD^~)D<v$@{i@l@aPwX1<H0}#^8Ds7
z(a3XhIJ0_HH9w*igPmFiT}~1+99RFT!Q|s+<(}cg48NQ9H&Y4SZPoH}t~~|)?j`(-
zP=mZ=q;$<7*ASKxz2v0vg7eH$Jhv7pkC~sSi($8_xzYQ5DE3`jStR7*`d#H)s<yZ6
zQ+t{?l6Vv~<Lr(^Bvy@r7kZ6q`=N;H=US}&P=QV1z&Rj_zFq|C!f1;#xP@y;G|iDJ
zOUTH1xEB<oLQsZAzzW`2Y1nn|czFrem~#Y$ybi<EF<AYgUjWG1L(^gqi}PK6xYCg4
zFu=BO?-kZ2xu%Hp70inY!BpJ)%rAdWjOAWysQKS}ro%KOo6a2kU6=3i%_7TWdyP*#
z^kKC*)=2&G8^Ttk7O$3XqK67-3qnuWbnYboQvdyWuJ>;E?{d3!bwYMvb;Yy&CYgPI
zxb9#eJ-vy^ARP=Q3OFe_d%5?oeQc?{+Py9f=b^WUiFnRB2&C^Bm*|hwVT~ebDpe)>
z9V$q4mo2P<V|Tw2pn2;v*ut{DxEAFpoOf?AFn3FdCLSF?X{-c}IWhhyGl7IEfP1k;
z)6N>X|Iqb3Ja|U_dqdOqyLdR=ho7Z&+*b>J{jd0oyGx%Lf@SfoHcn9C9l;soWr0F|
z)g;sro+wCOc~73({;}iy*evXI`)bjt<(jA|mto*6>nG~snV-*bx>F|azv%{UEk~O(
zdB3wn7ykP%>mLtIpCerrGq~1HU6zTGnIrJSa@+KM!n##^qv6q~zm%1fn5{p*?(|MN
z@T;z(l5K^c=_l1aa6hEHoyI^?hG{4ep^6M*B9CqGJPrxFc_`;cr0URH78f%#dfxOL
z8chnH-sZ`5qJ+&ar`bAQ?H$G$CwaW-h(MG_L?i}b1i_kGO3LSxX$P(Q*v~fZ^}uSG
zh|cEWmQcmOXn!}gJgCfS^P>!xvQ&f$m8Bv`>G*fQG^UTCY^*7ZPLGTfMuQcRL?KB9
zt?`2}=i7*e0X~UfDfr}~PW9(^xQa<;8VUo<ftK{52S^x>vW3aV>xMfe=FdEb<*!v)
zYR%3NGBzHbB>MHur~4Y6EoT0d;dVVin(;Bs_?bM+HC5SyB%94D?<$n0zkgTUQgk!g
zo28G`<jwLCgG8i3E@9@mK)dq+9wKE-Lsd22W{@oSON1!W4##(FZFGdyc<i(^UXFZO
zUt<Ume|GM#dacCW;DHc4$Y~jRiY#(<rJ0b3AIvo4?SE{S>U(E&`_6=q%P$mCJw!!i
zeh=+_Y$$!4xcc_;7O?N0^#>h5a$53)gQ!GLr}$j_hCnnS(>@g}JT*Ilg(mKJ(8<y%
zV4d-IdMJy>DgMhVA9*6gDmXa8>RUm3L#REdPhElV3cvA$ut4|Vz7ynBL2Ns2NXSdG
zD^svjcL^;yo`@VnzytLAUE5M8CaX&|z7mU=n%uLtXt_~LQFc!wu?U)VU*3SVBaD1o
zRJ9EQj$k<Qs`GjQj|UECMfktcW9=^Vk7tJR0OAjvzV?K;gzt@9;N=**N)Z7BcZh&L
zXepaP7t9hi2%+8C9onr#zMdcmMqk7$yp1Qc%bfx0rCRI@=Ap{30#8U3+qX%1w{TWG
z%k8jM%pDY?5+{fdl4L*H^V01;b`6ZkkhWE!!!!*!u4q3R_|PVc+^sdfP4ox0`2H47
zG;4lq%+*%mezNK^z>mg(pZH=s`kJ%x7GNl9L8t3HNzFat^h(v_9HDw7(};$F1gVMy
z$-%gK)R1KYu0>iBPfrUTFKe1AHpWR~n`#V?HVZ|7hed3ol}PW#SSDfyNyAjZZN4!l
zia9cecHey<Rly0d*xI?gI1G;jSBe)Bx3UKcn!E_CWF6P`HG{k5h>>W^V}>=AjYr}+
z%u?sbr2<-q=%V`T6u1j~6q3#a<%ie=k=g8xs6q4zAKPnjn>-bb^<=|HrjUQ6<%u=)
zb+Dcnmize3fI=SqDB_-Ed$KusGwb5V>=<ne(nLKLo3G#?FD-JKvX=87YMkq~8ww%7
zV32*!O^4mCxt4D9M$r2vZ97PBHdLK4!fuoTb)B7?=rT3AmZb#E+FCK|?d-E$38x;z
z`<t&CvH|qlIZU11N}FmxRT-7Aa_T8JlWWUu)WHI}3)N9v<Qd$|FnJ5=COa+njz%O5
ztBJO#OZLYM$8KZ`>(GS<A#M&YBgQRQXu9U;w?iSgr-zj+^zr@M-<#fz8^w|=w#xh#
z!CX5E+P#UWXR!b;Uq%c5Z~hLR>wq5D<BJ!HQOU#|kMTJx@{$SIdX}SYMp~biv~YRs
z3gqI)az2!5ccV#;DVbnlLDGr0pILN)2X`)}iHi=4#JoxkVU=Ua#l4L;Mtse5(VU;N
z+3uwP+(uwrU6r1O0!=LgsZOrFCtLS%nuJ9nM+bj*J$iM;U||P~&a@~<`*(7z?tX%0
zXhbMCRDvsL-E<c;{5GD?I1?c%cpd<|jO6IU@cj{OgclX+WwCPoZnCX)Y(PG{U4Rk;
z-~ahoFk@4}m^f~9hpaV@U<pdj{|SK+9?vml=n`Ip8_9}oD4?V;&BGV`31XeH|1b{d
zY}Yh4qLTOxUp{r0x2<P~gqReKqkMI%O-J=gPFWMN-~G@<L#H1#G>}ePSUaZFd<BeX
zBzYmlWU-N-s$K`dPE$FCdWKtf&5ln!kR`=b!tt2*T~E_pScl<j%m(zd<_jvKBRhz^
zf-Nqolabbah;pV(Of~a%1%thotH?|`o6R?kZ6QUI&M8=;5p&u822~wWt!QH1XiOJ@
zco+5;EA0kP2A|e0j}i>pJYT{W(A->K6dSsF_+9Mn4;}ZpBrNIW$*`j?{NE*$q_uxy
zL4mnU`90749c$*NC8){yt@UJO<VT*LCs7xA&4MC2z8*gR_RaC_w0n}(QVDkISI)Ju
zd<%~D&ECBRrS7&pfDLc;ZC4dXMhzh%E>@Zj)2!4onsmyItxLX7_?~yN@L09B6^jh+
z);tK^{wl_7RSYn?dSJdw#&U}>-w07F@sAP4H+l{j6Q3I7a(Yn0vw;4tX_eCAAq9SV
zP)3iu02r13OuYrKn0!NNJO!_~m*>(?PHg?&=gkGsU#x$-i!PmhRs<oIA$_x`I$l8>
z0Z}26-;*w}FpZtw;iM#*tHZ-0lutqZlrX(HnDBi#zk7L0pizi4hZFS2>EFFVCggg2
zdyk`(B3Z>jF+~f9QV^-54r~*L&kFyY#T5lkwRnj$FZ(3R3e6^00*1WTk1#IUnn_ai
z<ZM$F1Y1!^|CBbg;i<v`vAH6aNv3;$uEG!JG+H<lp@t?)U3PtS%vJxe>qDxxY<n;6
zB&Y|D#0nxX!tQm?K7!{+{K4J!03le;us7anrp-0Tdd1}H5GJaX27dqOVlWyS2n%g+
zR73m8$H$2|;2ZxItScIa%OVQ|1Qx`n>FN{Aav!KmNENcM&3A(7V85b_1*d^$Y*6gO
zhw-7|t!vIA^gTC-KGO?9jC}2l!Rlq|9eEqg^Bigg#PSE?6Y4wLY05Vd5GXk?V!sL)
zCKA^W)>MS#&u;Du2sYBU@@8d4(x$h6!`?UDJY!i_RMz|T-we4*epOr0WjG?_KS);f
zbYU0YKI(b1AV<Z~-55=jEOUO|C-HCd+XBAu*cwPv>IFXEjCsXU6>sgv4XIQW%gO3E
z`U@uH3i?VEai{+OUI5OQU!Nx7Ib=$@fsE6jCVuj%R)I{`)90@iJUPAe&l%N2hM-c}
z!@9*Na&gdM;pnqQP9rO5$qGSa@dsK++Okb3rG9?CDQ&L`d!2|+OX_|R6D($g{WWql
zM@qb^t-m0BrghscOa}MZxY{Sz*B`tArrMeZ_vYlviG+0H2xxW8(9re}Tp*z)J879v
zh*|n6NAW7=-|AT5U&z4)J`iYiiGl~WhD~<XlF@=(U;~pbT8zsj)UV_cdQvCACQ+mc
zB%r8LV4l&95n?Ax3Bnsj`1Vs96B4o{u`f3fH>?(w7TQ&8V=XoXUv`D<6Ws_R&LM1S
zu3boQNHJ>n`S+}_LK){6hVE$(1^VRp4#&T(Jo+kl7?6$8Nj7ou^3bz;K|^#5NV1ZR
znev5!Yhi|JI4~mphD~hs>_a4*Fe-tf(K-0Zl3~uF{Oru$lg76`Tb{(iRge~^7PmxC
z@0pn*j?9?!V~~H2eH<;YK{V`Z*{|dcpv1&Z={ek=kkwHTK_G^TaJiWjBF3IZ0bQjW
zbNU_vkR`c~4Lm=2C}>S*3JZ3XJoGpOx*E|Bt6X*W{*0lra}=2sP2hz{%frJ6l3`ba
zkH>~r7AY}Thh(PzX-1^`(%MINL6@g9wBKf#63<v~T{XP$G$(s-FO?!my1aechqOD0
z@G|gpH7SAij-q;IIo}uQMRbX*TD|me1}V?PNijb$Eh!$!Ivr0n+iD6X!baO8_s!O`
zO(5Z`1}L_66}%X_w-Fv>dTto&PI%?9YBshI=||eG(<yD3VtZ;I;9YZ8IzS+?gqVT?
z3R`%(mm;2G!u=6|#|NU;Sgmua*Bc{C6HLE6%97KHg0!lFA~#M`co0Gm$3$ld4XP{=
zgv021D}%>__c4*(z#VI`j<2e1f9iO!kowdj1Y*5?Y(1;tUihi+wDXtgOsRvOvnLD<
z8^TvH|AY{;D?naC;IicmYUKOGPkvHN71VY|_-=>du;e|cDfl2J40I*?0h;_oE7^)~
z)d;0SD`9A4v+)Kl`!m~%`{AdphA+~11PmCWxYtl<%E|N3P1NPm68@m{bAMTHcjz_O
zvGd2549R}8hx|$VDIDse3Z!$InpzJ(@g8N0G*I3AKYIPJp|e4RYvzan)2hzr6Mz-S
z(N2JljfKUi-&|c^uTForVpr7x;DDJ^INVctiBJOHPK~NcOHZ15)jv1bEml?l2+^f#
zlfhs3b(RygO@L$YnN#4)Y8WyhjbdiA^L8(}c$lB<s{Cv_Fo*$j3NbYyN;u%2Orwyl
zuCDH$YPI<9-&Hphs(#?x9q1DxBEY=&>4#nbOoxl))7Sx8T8g312rH7MVu5OFrIpXY
zr3rv3ZdrG%h>;(`jiAl(a|O7`9TJ9TRlo;r3=GzB=PE@(L7){?T`PU)byZnflAdU)
zY6Cg&a;I##6tBujyp9>H{T-<<?3m(Qa8OWUWJIdNr(cpQV6xS;GXA;Jz$4JZ+8Ffy
z-uP+Bc!EI~F)d@kvz|K^+L5Gji4tYCdTYY7wXwyJFbEzZHyhhZdAd&1XbLhr8wNa;
zc*m<YbJRlJmcPCP33?oRdmIBDLLu*d-TF1_JZKNDkTN70{N0_LrTzMOYm8613U+nX
zKu78UDgXq6deX6|6-<Vft{wiwq}DH{axd0aZ$=V>w=7;fWkpWri<YIca}POwMCSX3
zLnQ;>m~?Q4jyTC9{D64)yFAgf8Xy)zh^Z{FfjwyDcn`+}hf>vJb7%%Bq4URi%8{uC
z<<`EFAJmfy#nm#w9b1ctevT52ejyy`qo~$KfS1|pAEY0UbtQE`U#)tdb(JRN#Uh{R
zUEYLiXI7j!Gz>_7tBI{<YTTfHp<lQ_ZQD7xZg>F-#MQQTV=<jVd|PgMwL>1x?}%2!
z*Ul@m7cJ8B1{yK}moJDzcdN=J!}5Ay#p%!d3Q3=2U3_KJ<K=B;gZ!t`+l1XWWp#33
zRuRjREtZ2#P`;eG^z9G2gn$zEgrd;bZgSbStdFY1RQVnm#EjX#V<(uyA7neR(Tfrw
zkV7}WnAW{Dn=RP4+%rBv^XE%?Vw#LYd#F3AFe^kAT2V5`ND;G#UP7~LcV+VQN8K{~
z*<~i<j45oX%~n8Ch0fAJpb6{A*M<@o4q(ZH$VH4p^oD?~kE7hWx?K(J+3I;G7m<ie
zR23wF%ITy_ws5#7*pXd2pIBf0NuCRq7oMbPigsPKy?8Pc7tfwzoAROlxNq5V^KkkX
zR9z%B0`6Eo8-FP3s&<RW1A(+PkMd<;in56Cg$Kb$wmzqV$8|Rqb+f%QWFT+k(6z=k
z4=?Ig5t-_t?Ptf4%&1ubC=oxexBI}SE|GSna01<LB;aYW7kLh02_Gh|%%libmI|Na
zg{;|{j#UwW1#Izh88yDYZO@;X1{Dlq^8hc^Ty1wvr>cB8%qAuheh($9=Huhb4t#$C
zNT9;LyTfsmhvR8#y1E20Lz~|YvsNA3Di*5_#?xk5g69_&?&s8$)&cis2L}fbiT|xi
zMz?H%s_^?&%%<-}M-+4G5nu`K<lxY8-g<g{eGQC!f3~Wnp|St)^0H8vc;b8XbaS}r
zcQcVdt0Y8pVBHsn?6u|+zIfUJ@PWTZ6Y1#s>>caQh84Q05es-bU+oN7SXdNgd8RL(
zzR7C<=;Gpr29NFDAHYj$E!!+wqfGPd{h=%7>)(4VatTAjjZ%#=(0GpU2cp9YneowU
zH`hm`!@_aycQ`WyX}X@snf4@NssZIa@9fwhG-J~Y#9HF=C6`JUi8Tni+N5Tk0*~Ji
zzG5Nv7Wu(s>I)d@s>WHyQ{!p)kuObNvw2^~_d=cTg6g2B8H&!xUr+G`1*?4$DItDh
zx-bbl1)U*jH=j;PQ?Uhbew_;m-e<VFjas@SN8`aMs~0??ryH^9jNSG&_8=7(p}RY#
z+>Rg*RbEeP<T*tiO;dpmf|tdBF1wXbD!5BVkpceGiK>uf1F|cej=HocHd8PI$Ht4l
zh_|3gNy0o}m8ZxT$LMO);D>G$^7X#nZ&_Kc3hUDkNCXvv_5LM4sUuyL5NOLTi$=)O
zW59OjOP6+Tx2=1vT?MP(e?PiGM&yNt#-MNc+u6b4Xy|FVp1LBSu+{whXPHbSU_lWr
z=udm`!>x;C7;_?DuV38%qJS9`=6LH&b90rLtIePx;A`PwiTPL_tV-f_ddLqtA&q0r
zsU48ZNqprIW@fT|mUn(b)8}#)8Ybe37nId<6LnN)K|Iza&%RXhx#(`8)9yvx%0a@m
z-pr#YQ}GjDq+mjddBtzXQ)Oh3jZAm<Q{fwmxbQ8m9!@MGkOiGneJXh$b=HmD8B?So
z(OW{v1N0<;v&BhWU;KS5|1yJ!CpaR6rdW9SQhe)jXa@ZkA8b*givAX<E}@gHqu*P_
zq{ivUUqq6nbiSRMtT%CkKQx~tUXm$pbsXNxrRuGs(I8_-WuuzC_{Y3(A=j8w9Px<c
zw4GQkW+Y7~8Ci4Cppz48>+0M9_;u5UkDZ}mHgWvPmA8Gs=QR2Al?KG_nF6`DfUdWG
zmll1%fZV?f*uCcSdNmaI*hd;o#Cr|cp956v(OCV}Ca1vjmO~O>aE;rPAU6xk#ZTJo
z=F48ho9k=9g4`3JY70Ie{SA*8E32sRzwCh%&s7#634Gd5aq75Rcz%8semhnICeQm^
z-D-oKmYUk^RN(u~7)zG`8a$B->RnKw4OjU5^t3ZDP?p!mH-O^(S`NJ+_^?)Izg#=3
zB>WaiQC-=3QtIS?s+U&PTDiEmIF-fk1>nAD)%icX#6U8L6)yikW2*iAR1p#OHyW0v
ze`iRvWrI9oO&7YyWk2Uj+8SF~)*&#iLLtV+8+^6QW@H484MOETl=^rkgvJ({CwU6T
z(ZdrX(PwS3l>E7Sz@WfkP2IMpywAFa@2=jp%DnJ|?1CWdU5|c}g~vHY5JpVeqrxB|
zIuuYC!u$28`TG0;m4_!t)TZ~Z1;)4=HFYR20HJ`ELygQ;wT6d9M75j^Nt{v-UpB&=
zBDc+NXk-lwGTA~l4NGw_&mM(gSG<=DIA++oKT=S?AE1paz>e1__h)Z>t7)Jb4^o~{
z8?L*WPSD}cAIIu9O)F0s#L>HB5^b4Towl9wg9rPtvp>D#sx?trp?`*1b&w=SHK}6l
z*@i8wRTs_ngpeS|Y%P_#nK)UOKi@^#uRj~^Cnj)-R#&d({t7R@pCu<l!fUMR*xZez
zZ=%{U>#io@c=R7^Pbf{wvmQQuI9a%|s*1)7t5hJ-;Vk45KJp@V;m1!O5lSIXj8_b8
zsSA%@%NL?@H)vkh)q->$Gy7@`kvX&0>Ud;?5JL*f8^iMN^jsR9?yP??szW&upl#ga
zxtE&eW1mMH)3KDp;@Lo!ZR(|DZFyfJU~pYihmIATLiaOD2tXzMGu0Hqq*d1*L?`bs
zrNnlJx2%mYXhb6F4k^e=Kpy2=$@PVvo~5$yQYVMf#4+F>HnAWBHksXO%PQ$k|KOvs
zLxfyQW~b`2uuitYh}-;2i%fZp*vfR0@%1SxbkW+dlfCgH$lhA!tkkM-ivOdV4UVHt
z-(?;CWnH$=Z8HA`?yk1)2O8S+kBCq)`joLrS3j|K#P|_&Ik~Z7mA{q~>DVaYzGn&r
zs(^ZgabB@0^0(C><aTU-Id40h%K6y(j(VRbaDTChY77vm|5>~Dh9Wd=2DI6v*J#@V
zD+2e1_iirnU&!F@r5ZDbsXu8PRtxj<1_4j|UC;AX09RXh@8Dp?0^?_YG%>%ft}ZY^
zfdvX&+^5aBFg*?WQ(n%PB#S_9FU&H_iTSht`R>%ifE0lAlA%p|tZ!`0&+ogoh>_Ed
z(vif_V*C^#i_I;ze~JeVjxQ`|=1iqXQvsNkcEhTn*_Q^Y0(&Q?&1#eHQ-?qW9U;80
zwG#M>1O*^bgOZbzqobph=@R8>Fn@+Fo+{8J*e#R;(Cw1qVq?p(T7VuhH#f&=J6D{U
zNz}BOW$=w6ZmHFy%tFV*!(-cdN%7J4q0|JAB<8R?7|Qn^j1Sx*6-EuJ|5kT!SXkJ9
zH_<L&aM6*hsiN}FZVcdaplkuaaB!O)T8?lCteE2w!BR8>WCREjlR*!vObMP`P6Q4;
ze>7BWnc_U!E0JRL3!-XC<m>dp8yhiNFHiG_e+n&2yc%t&P`9r>U6Sou7X=PK8Q7Sb
z0imxCsf6gCnN6Ac1H`gv<2a{CY%WN)_g#cvhhEK^B7O0T-&eWW&Y+-$GPZGe?EV`$
zU7}!pO~VN@Yf<Nr+#ir<yEN_9{_8EU0!Dmxq4@Gk2B-#PRi)vR`~O7g@=K){5n%b2
zlPT)hFPPv(&|lnD@U9lPhuq0Iw^!P?FR;ZATJ(E%t6AwI%Kh6?Jn9{=#V+KGRaV-f
z&ocWUZNmDm@TsJr$^w@%<0cO1C<?HkrI&A+Fhe;8nuZS|IEm|A<rcoowS8Hqo`3w#
zl!2(-iYgk&F88Iiv*e)}3}s5|I?l<&QYZLKzD0%ooe8TKK=UT$)k1y<89Co%pL}*`
za-l<s>OPROeQa)3=Gi+}cH#b>Xii@e^Cl%BPZci%AktL~1M<o_`8q$OoBAYZy##W+
zzt-Eq9K@;(VLKUU8UCs8v?+lAd_q8g@ScOBt-gjfU3ix4E`w`tt>_Je7RFVs&@>6x
zG}OSra;|md5vm-)lHc_ffOMniOurV{Wk&Tw;0NComzl@LI&`+?Ig3tO=@AI9^^7y7
zF~y3oAeSd1EX?M<==i2;r#WA@S|y|*HFj$@(c-c_9?f<6Drr757#9lrwP=PY5~-(z
zBmPlXS*o^u4r+AoSs5Hr@#1-<Cb=0WOK?bGNJ|7mVJ41T@&?y~q-)TOL*!=;F9za9
zt~sWal?n^0U~jO^`&j!^Ti3icP@);GlA+sDV`fMiY|stsx|T31(Z%zAJ~|dyiy64<
zd|%e*Eiis#_k4-To%g+seIZO}LlMDcb%%GUZ!zjJu8y_TQdzGQxzb69s^A(hp$(nR
z=JDL54uMEbF3HCb!{bVV*-B-!)J~?}50Z5ggO3?<JNmHzOs(&5s3@dD?F|hLH8uZs
zfu7T$)guA@{%8^dkT}k)tgOt;>|A-fy1Bg|KgdyG{I{wWDrCH$ZN5*`mj5Q?hJ_5y
z%cIEftPyO~tkBia*MA)(bQ;g(Rhz8`ASD(jb0wQklU^tla*}gq$S1X9%r7HK0oRLN
z?{{H@j>8iZ76%@u3;%^3QvcgHgDp7W#~ual^~97EFJRg4d^*SgRkmLPGqU~KtLtSw
zCgApOhwsCN>lhQN(4%o-@8-w*LyCc?4gf9b@VTqEnVkf9oRU#E%WdyKi|s#dN{ryW
z_TU$p#XI%SNhZxeytoC@WP$g`-4uOS8BbwuZtl(hl*Oi>o8=fO5ciw{ZZd#n{}x+I
z-^3|R$e%9=K>Bpt&d6!$yA14&BxqIYBW(S5mF+M>&2sIln2*O8QoqY!gU7rm=I5!8
z*D0W23i|!~_rqr3$8YY9)RNa|{_|!);J*fZL}D`lsezilzJSl|(W0R*Q=M0T1Ujz1
zvuNQfpgG)@EmB~<_lUc{06{m~|9+WcO5ny>U>){~S7ez!$UziYl3*#BvhmAGu%m_t
zD1VtmGrF!(@S7PwN>mS7ki;NVlu24?DP3aQx@}Z}yMo*xA$L_v%CJD94I`()jcqF&
z8gv+EF1;yjSWIv*&98n%=ai1bHKhSai#QPrekTV`n7+f5-6B#v>yGc_shpO2Epp%7
zV~iLgbJ54eB3j8WkSo63G5m@4k<Vp=i&*n4rl+6Dq{D*!69JSG3VFP%nR+C|IMPuA
zmb5=;ZmzBj(Zv?u(hodQHXRH<nLov{24mMahiok=>Wi$|1uSAlN5>|{>%`U)WB<qk
zDKDifb9odkb*i}=@R$6t^_5E~$fuD|u_|xwkTW*smVgk6rO}2r@eRAfW_OvCUE$D?
zOTQ=l?QrbtX?mU?FCiw9!icS6698DF{+Md`_27~Ax8V-L{r*l%OYCB2ZC=1$&igT`
zZp`b@TZ{OpzJ>K<3M%SngpM3l$VEH3L1nU<5;GzHSE2zvD=~K_WFe}Dm#Mf-LsqMk
zixa(tMla28!dDTV3YpfB&9srkewk#;i_~&l)I%}&!sflh`pWTXaR`0VG$yUla6fY}
zJ*To%I&Hk9P6O0psJ8h^Z-d7nWs6XhBz_#`7{*}=Ikp<)(9V=V>9F37Bd06tkZ9`&
zk=tA0XsnyQPI|)9%hGbzHxX4hWkK_C5NN+~ck!oOr1;F-!{p`hc%REyz-?|=;Xgm8
z>+idH2*;*MiY=VkX5Gu|zRrG*yp9pgAI62Xl#Q#!a~#eO8A2haVzo!Xy)P{t1SOVe
z`&F9sLc2pYRs}C?G6V;i>slMZb@r?8)Pz<3TuzYHbhP#}3;)t^D^kgFFcY&zX1;)t
zY^UVweq6cZW~K3~<c8U(W+0x^!Ch1yR>)r1Eq|KgWf8w5l_%f?I|n;GD307gG4ZRv
z=91j>-u<9!m=mFk?Ny@6;Use)$uySeVt4tS_L%cpRcothp$)sSYSY5_KMd{h_HQ=_
zsKT$xvdbw2s<IkdT2q<4H)y$T<7>;y%PT9bK6fX;#B&2$3j+fKbyfhO3tTrgHWsJo
zy8Q1SKveQ7H|pyA^T&9n|L1?R+r`DDs??s84m6g+V$|+^HTat_9F<fk@ck9I&i>(H
zN=k~6*Sd$6k^h~JRaS%fNW9U<^WuLZ3dpYiOF2yX!u%I7yY5%)oZ7Dj?@kw$viLjz
z7FZKd6O;l1R*~>qNn_*veu~li080Ry`_VM%>lR$s+nLjUP|c4Mt*+MN!i<B-EIkbk
z?5KhBj)x66AXW?k1%MD3#C!1aO+wD=o|N#j3Q%za9^Rg|FeJp$!8~UQjw}P3iXvsK
z!FIkhi9u__=d|L#Eau#+i{EA@AN;N*3-Fh~2N_P;!h!GeT1M+@YknVZ4<CSTD=#kQ
z0e+YNss?tIEr3w`5+iUue7W6685c2eRiWQ<%@2g+xe_%cp{G5C46ZleHlG2x2$U!O
z0eT46$nWZY)rnNd@9}QYC?_G|28fqy-n+m5i+?b<`qRLbE;=6G!B)bAUu@@|4s)FF
zzIu4v0i{M<W_|CohQ;%>mK)&dpQZxtIY3$l9`nGH0Rua>oizgD)CpJ;!i@{_6M-M1
zCyJ%hd(prPIe$SoVhUe@9;`&Mzf);mJF!wo-XzzcJ(D<ETi&dyI+}06N1&}rmE<^}
z=HMu3BQwjnD=PyrE1@b%ldh&3Yf&1SbX(A_s!Pvs&UTdlZ@;Xu>CDOA{kYW;+TE{}
zX<)oi71Q>!N7)Zv(biZ?F?u;!*;J@+#x?sE@v;T+Xm6#q&fQfCWhD^_OS98^?CzR5
z>=T;#6g|$Oj@B2NW7V>Qzu1LW+D8g(B+eQCeHV4_ONJsb)a&f@hzh0y&-=$GTj=WM
z<#m(RjV{=4Rwd4HrW6vJoJaLJ{gPDIQd=jcS{BPxoiVj$#imxZw9DH-8)3F(V^Wi~
z(LpF+jIuBVeUby=B&bI+Cj2mwQIpC{<YZEmZiLxk<W8V+m(?b~i_KfY<*o4Sug%Js
zP?ufgTtFS`q%n=DOiD_5P~dWoU80h8PLV8BWVh56Es)ktl#h>>r&o<k<W!iAZ>{94
zn~g6_j4%8(snjpkejOYrY386#bGz|O&-h~j{K<14$H>!XB7(dnL$3`(7AAsk=PE23
z<!?+(h2-9yPXPrx5|{oE02LI0Cq|1b&L8$2Y0sVNI5uqQnyD~dA+vaW68nVv&CO0K
znI%<?1jTw{f;d0X)P|iZ!`e<9Zn!o&d|j}d?>g(LA!@_<f`2+Y<eR!nC=$26#=qe;
zodQfLBr42DYA7}j>Qy~VevfUnJcZJ80mwv=j7}wZ1yErUg;ZX&%)e-8$a-N>CVB?y
zByl)Q6{%Ws6VH=gmnS6`Id;~6*Sc>+kC(9&wlHAE&|vdkR$n<N`U7A;f@_nPJ8cwE
zzg5qOnq0nmRlAxIU>$?b*-F|q-pQ;pZ)3n(=e{-!c{Vm)&q0aZWLlG2prc^GFW*&S
z+(=8HU_&!5!MJQg&k+|Xu2yK#=<y|A;=~P&dORpq4}u>tw=DNxiu}ESeI&&Fqvsc{
zlaakcmN}1J3L;Ks!M6JXueGVO2nmUl!(w<wcX}`Vm~4TLy>cCFh2M8SCB*3WT~2S5
zD$yVr+}v)Tb;9aVs7=`;D1Xd&Fwj~JH<RC3X+67Bfy_2hx?xh&ZE9>hsx;kY8`Z6}
ziYQc78_yl3e=Dt|oyH`0V~8atB~=)9JTF`2%zFs(d-A2pYiejrWbro{Ih7zq6ql9R
zJ)Ez%d0jZ#+XG?3doK>4{qDs9@?`g<tSb8J-$viXgNckBpOccvNT5OKy}cbydVX@!
z3aA&Kr;C15Hh#s+^R^42-CrDo(Zny);;3)?EP>x1x5HUFA5E(keP<Q8v)pH72)XQ=
zpHIrhMn?ml#*7rxIgfXuSl-KJiNqrQ{gr~UwgLD77GueRZ@04#8@^R%%bWqPkAPLx
z8c-EQ#9_n}4tO0R4MZ}YD^@AW_W2wHB#lk0{-+g90Us}$EcEm>f8{EhcOn5VCvTt<
zllF|dZq{0<l*ywqTkp)HZ)hlFHJSPAvITjT)8Xs}1{hJh8YChr+Vf>me|BTT5XfhB
zb<=ZlP>q3xi+DK77lqjX2}58h_^(=Yw$%yT#Y0I^cI!pw3v(No9R#T33@!)VR`=r{
zxQ3|-32s3482Ek|2()_29J(5n>2CiMgupU!m=h=zbbo&@$I@|InBn-Dfgx&W7a94#
zqAgsI(d!QGwsZ5}zh*$M#cR|5;r;F9r}ZE27XYdUJUgiRl&BvU#Z;a#EXE+=?VDuW
zteS0nkaGFgJtaCsXjlDt-cok!mJm(mJ~ZVSJl$gg-{dju`(%x9*Bk#nYz<p!Yh5Or
z0k^8e!nskY@KGi&vOSjZ2g#6uNJOb<dQy(BLbXRN4lgGi%qgDMW}`Sp=IKI~w@tX?
zH=IsLbRENg>9ukt2Y5?a?emP*jbX3-b9>c)6ge-5WTXERnJn*(y#9Cf5pd9Q=6mYz
zm_kz@FoxkWS}XhMnpyAxeDJexq5PPDcgE}FIN{@PAjR<GlCmICZn5j+95)9YsU&f_
zezs7a@1lG;H%8ZEyZskUvf{W4;G=3Ts<ZzHaK;%lg+C7XCEST#V##F?$P0hr-(|rZ
zn%?28c4HYbajpxu)7=UZbA%Km{YWUH4ft$Krz!3>gadmWB|xou0OLCPDR$Or05an7
zdvGtBKH%jo&?>iBnpetY4rBHy$T*^SD8~wW%O2N0PgZJ~)k|licsgun$VfY$T`la&
zMptFLP@N>^rF^+AEXyx);Rl$T8DigAX#sI(@O90(b4)}1#48-}Ix-B^?eHLR@8H>W
zCoD#Gl=5GGuJ$ZrpTCT9GOpxje@BB%zJ~l#&c~7D_=VKv2gNJ+U%$Nv7bMEY#KiuL
zJK)6yb(N}42zHd!)j8GS6Wzi|X=pyVqrM1mx*w^G8b2)s)@HKgy%Wqq;&BTI3okxg
z4fY;H&s||fkxDqRZgEL?*f_fzC~_V(GL9YIV5cNMiv10F*67K^4_4#XH^|;0w`g_I
z5Ame=P0vV=RK77cvz9H;ia>uDDqt4AubB)j-zot(Ly&=FBt+Hk`gWt0{PtiFJK3)E
z5Eyb1)(h}4<!JW9h|i*o#@lti!-|NMjo_<fYULpyi&&`apK*K`HJ#HOL^?az6t6KG
z5iz|_CfX_3tcVUbw&-G%GTst@^U0HTJRf%QmqUgAljt;Ot)=s=3X(T>Iv;oo5n5!g
zq7o&Vgz_0U0{f`$z5a|(tF@pm$z|_ga{&&>MkG_;Cgq~yYp0R=P@UBZ8oK;dmad$o
zmT<vvkGsjq$qu_UGr6;JS)4o4s#s<wG1;6MU(I|QFJhr6Q$1&-5vTUn@@2{a?=MSp
z_wX20U0lW~q2S=ETL}2xN6OG6DNz;VL$^oDEtHsCf)5>(s}Z*rVYR;SY5><-{&&{3
z>XwkQlv2Emf~4u-?l}wzAF})K=7-oh;#4t-A@)XX5sd|MI{FX^5?(qT)1?w#at7fQ
z2+!y2E`4f5{fO5_*VzcIyvLZX`BZ@S@9TbnF*4Doo0zJ-R&riRNwEksV;PCR|A4*^
z9Z<;zf^Q8kV@4wH(_TW?(}dHCp?Y{w<>_ppg70xb3Xm-x{rd|Q+`ZCZ2c+Vz|3y~6
z>k&ZpGJpQ;10=_OLYtuon83%<zt~lF+^=LfwjL*{DgHNFUD*j3yjE+081$bE1JcW2
zpks7WT4p((dX>`kY5_z&pp!H<HPy8jF9*_2_BhP)0>og#=|aT|paeGpUe5CJ^6Dn}
z%JTE~Sf}{JBQpx0fFyj0_WE%*1~YE)6d4+_OtT_#=;a_=LQ!#Yf_qaRAcO+?yzArb
zf{BTV-|HNDdVby`HGJ}G)zTmmQ0iOf1!|6e6R&}LOk=kg1u6{w$K<_l1;g1~4(Vr$
zK=sjL2!lkx`x2<|lTpx9qgQCx>}>UXAAil*d@Zc0ad~^b1H!7RoLp2z?dI~b&Eo9J
zimrx+hNdQL_r-%!Dl0^H*T+lOs!MvDbpOJbQh_Q|1rzJd_5K*C_f{~Vg$s=u-bMZ1
ztFf2~VUcbigtl_v=sXCVJr3m9SY2OR-jWHwe7<q<sWO-sSJp8E-2BR#95<IYmOUcw
zPz@GuT=UchqtjSzC3x^eCSr+h_c36vGe_aBgRh`89P4%m{`*p^>)A81UQ<J3!^7XF
z5u)FrQgaK6c!GJY<yhSXo9+u|f!@p9R!Uj*w(DW*FDJrN8l9M=VU;P^xC$iPmf-4K
zp{m2d(GR?0a|_A4dIU4ga$SSK>K+0f%{S{m2r8PY0*?KcSCnBt#6<;8+Uq#U;F{r^
z;Xz@L;=xV<jioSX5UmD7gAdEdWN^bhbED&;XFs>7zgSS(mTEaI)!R*N<bR{v_e&Wo
zt|-qG@Vt#kCudG!tIMG<{DV_0sp2DU2%6sqoy-u5EqBt)2Dz&k;Cdu&xBd@TZy6O=
zutW<F?l4HO0KuK$!F>p1AOr~RE(z}L5D2cpU4u(-4Fs3q?(XjP4!QS!@2!u2tTpF!
zpYE#es=aqt_b<)GJsN2AhDH(4AlTDkx@hw8K5`j34@OgFZ$D-1U?frViD#Cu$*bHL
zWBT4g+c)htw_g5W9ork|5nGW!5)t#*2=W?)^rX!fk6DeGTN%R**dXjbG1}-VkRj3`
zP+8JFYa?(1vpVjN>~|#@DNY}K8Y-#yNc-KlGJM!QETlu|8%Sv7yqg+zt5@}cS8&Yc
zG%N!~Huh4nWkgn;hu36ZMjJD>hSIf#+^;B5CjgWM;Bv4tp2#`a9N0_v`AZrzY7BHS
zZWet*^S$SvJcL_?MT_jm-r##-e(Zl=aq?_>JhZ+}Km71Ka;|ipO01nT5W}olYjj&%
z8E$Fe&u3vDNaY(q9R=Upz(<aj0w%wo->f@Xtni2hBU7|dqorU3(8ypSJJ(x=vCrJJ
zvZ}{eE)tX4JE=ZC?IfM<)L6*)q`9@Zv_!c#78e%)Wpsou1v3~McOtz>489$Vjh(;o
zx{)XzDUxQJwjXY8MNy;EG+`q|Cp&~|&^B1A`RzK6`i&RD>BsD`(&NeFDRk@oW(6){
z>Dc~9{xeBZ5!*P|4;D@Q5D0J-Dha^O=dT`a&tJkB0i14j21Z6G(i@mWz$<}^fiGE~
z&sc$=M6n;h%;|!9^`9PY0oBJYu+YrU&E3VV8&SPguQHmgGL8UpgV}1<wudu-IQ~^r
zW3+|R3g}OOXr{*yz|}gC-e>_qlU1DneyAr9=K-u;)4eI}l@!A-%lUVp+uPe(%Qk?k
z1GS#TmtV{d$(5bv-dSh?K%J@Zc@mXW1Q4#i1g9-6q&?poiA#t*pQI%qrfH5-$6nV>
zx=&|6corRSebC<4uK*AluY0eKU)&f<2tY;5)tKY@$fB`)v*DG6yabz@37R_IkB*=p
zx?ZmIczgCJ9ENxEeW8QsT{81^SWc5!@;eRN>2NE^5y@>l{oh01wyiYp#1uL*&l`@j
zJqWXAc)T5331eVvHrtJ{#~-x!XuZphV+$BrXTO)kcUdTN<t+GATt=+nb0ZJ{`Y~`(
zaB)F#Qm<v7FIQAV^5zrud)Bxm%VeE$Q>BdTbxF37q<6;9$pAHG;S0qNkPX|W=12U~
zzh+#?vQrkCkAZEe+N;8IL~?~E*}p%<Z-sg4Kd-aTrs;@GtAtdle;$6@wqcF*R7ayR
zLKJdkJNvl%)4N%3>N}A#C(HcZE}HYR>%PBN4uq1uZDlMkHbK{8D$ma`Antw6sM-5a
z?9feo@e;L4zu=c(`BS>!t$oJ5maXk+B^DgW!O4jhC*<Mbp(Yi;95C8L^iTuEPhFZb
zGsz!|stxo^O$&_G-*;yMP;l5Ak=Ej3rqcApe04x6Fg1lgLMZCw<n#ipww`vsHXTZK
z9AK&QI0u$zKvVDxC>fRkyfx!?&@_lj4|^0qSwVVd8+{-km1t~g;;~*jICUXm=~Jh-
zY0t(a<#zz^3M5!xT>x?cJk_B=t5~&Mr@_uR;|%G+J^1coIS~MX0XQKYpk?LWE?L$s
zUI9tUOUV5aWh0I?b6|_tYn6S7rF{gjh~ly`r`AuDl-U5E16qE%0=TaG!xvpshaZAY
z`Sl}!#-0!HATgDC*G#j=HaZ{9U1mJrW-Q-y^W>Yr@0mT7*(3!Gko2!VcXoj(-EFAa
zD9GElFK?RXLeFzg+*>*on&d>9t?E?rxrm2?GpR8lQADayQNfTLedkZqUwSqaG^vXU
zdxC><R@2R>i3e<oKQos7#od)FUCjM`YQx)aLI0YABpkAieMkgM6IZf=G4&8Ku!mO}
z5BPPg+?=j|E7x&!c78tl(bjxeJFnw@_zSqy8_P%W3s7bc<kyiIcLu;R-ax9g41h6t
zGO^KyfW8(%UQ6qsP!-A`)zR12m&EV5_rh-jh&A7uf~x8?AVeGCT}PL4f&p$bKOYf!
zKByjP1>p3zKY;N%`kZk$QE$5rAkuJ|4GnzS8nj8mA5UE#DtpMjT39p#BTe@)xodDc
z9Z(i~-n+TE<$uaJI=~Hq0E~cJR=X@QG4T{2Tr>Fd9Z0cwO%9U+1qzlL>`mQ5aTgq#
zFD@<~fGVZ#<luS$#s^YClaX|7H8r5~JZ0+DCZpMsyRXQQsKCgB=|cBe-^gIuY}X*B
zhFrwJferxfyS%)#ZhO1}VE9VC4%i-@MWJko5(wp~$n&`fD1lXXsb^m1Wgv;e-`h8X
zsg?uGjY=*H7MK&0FvjD?<TT&UeGdf7Qp=84Xh*QCoFSXzes0WQMqlM<$zL@9b7GnC
z7ri_^gv{~VD%q>8Es}MT{nSM;!g($yQdmPxEiomf$$UH?NClm5&o(C~Cxi60+;;N-
zc=7~@b{>E@2ryoN01-axr7}%-&5zBf2g0Am#>PCE9|>_lg0S@>5_*Zhfan7zvL_Zm
z(f|>l<xIuul%}<#UoB_?DE@m@*Y>y#q>_PX1c3LZrltanOF{y^6doEpEZlp}z|Pg7
z6rOvJ*4o-yKuEC<%tR8O{niT=0@!)p1c3ZK_EJRxDnlE9vH`R|X=!OM{4H<;yyx}e
z^s*8Y82JSg(D=3YYg#9T!x(C53AmIFT4r@MuFv#-X<3;UkX5I9-R=JT`4ixRo&W}B
zNB0e#Umu|7CLlNl^cO%`?D2w6$2ph4R<PYim%&U=uh#6sjt$%!NM*>~iGoYp49sr|
zkM)&XONU;?h_ZXj(kmtv!Hd5V%lAvyK!6Gux2A>$nfJqKetv$_aqIktS3rXW<v<1x
ze9Xae+cSWOvMXZS)0y|9=>Z7E%f9t8a)8o8)vQ=9{AFR|N{$WCOC+bF>el6B0{fvy
z<<>lOk-tL*p;5+i7&G*1ZAJrH+a!2)Gx^RB|DJdsp`ebrb@N4!5C8z`0iSxoUjchf
z`0+#Je2^1>kachSRY|UofWdzuykDsE@`kPC@83~-Z1#MS$O?;!LVDI;I`e{v09#;u
zTpV!Sz1e8CI}m>ZDZf^mm#3@iEx;PL0!~m|*2ri%k^szeskP^eeC7E1{_*LBx&j<m
z#BQS(fL2)RSNu9~re2`mTfkER^;ksdq%pu?0V`=LuyccgfwxP(P)9FwTs<P}{^$EE
zKpSdlW(LHtNF>vAbbt9RSG5!rY$ugf7Xer}16cil#(+1*{`f(F`Hjb728{WQiIMSX
z_07|20zkq6{tmz)yMzi^b!jU6w8a$_Ho&i6+IU$`?*R%8K%tHJ6M=Mc5de(=i2SKv
zwUxtqNzg>6$?5QSAlit~wHBSm7a$^t6u#R5Kot8$$LYuUw#SsuRq?CQ0JOda;MjC8
zusGj#sy&dsSS{37wzfXK0OTXv=l73Tf+y_&ujc?bzg!}l{&K75Fa>}^VFrrJDGc`q
zMI>%230I$7IFE_^kcYmc8Xz?QZ8;IT<OclvW%m8W*${Xo+715b`LzE=DTaFHB1crv
z^xo2Y21g}T$3bA36EPGSSY-cMio#|NK?Mj9#x4Ng18goY^JHqe{lc&f^8s=_&YnI8
zG(>-(KjVN=PnT)W&(6ADFFLbo)R^_f&<ub7Hm&RZ2z+=?1nEBB3#s|5+_SW?(QUEO
z5r`xJ7E`m*K=R|qj~cUmLqjb)*`Z_44|CJ4Ti5r%HrEU&hX7mX4?!4!Alm|CmzZA^
zXRrk*CVE_3a&xJ*rGbnIz}zPRQ2Yh`Dk?JH9m@lBzW~5wuA+kP(_p{-)Z_io1JIeN
zX+Q=@(021jj}2I<-_O^Y4kogD-dz9(DU6Jbv4KqNztTZqAvR6ka&A3O11>ZNz@=&1
zOjc&*2Y{A{&d4}U<p=UjfN*8|Sl!UjP*_+PX+H%p5MEE`8EwEW19*8I^L{m8N<gwS
zj=BiI{3j=!SmnT(ma*IqzM^`-79nys8VN8NDQ~F>(O-B~AWhY5a<cUFd|dkz3iPVd
z<<x+V=p2XwSXo$r2Q8hQuNNBZzkc~rYqi)vI0&F}Pe5)N@kT^gdjen|ydT!zjAV$2
zXtw~H#|!QU@EgOj84eZ9#Wgh-fcL4jdNd4d@@FAIU*;4rgK;#9_EZKv8vo+pkX^If
zcv!wBcK@`UpBqke{jI9ju?Xz3#J)G~|Li3K<5yiBuEOWfpSg%jOG*~nyuEdFbO2wK
zQhc2kXRMxYj(JsNIsd*buN84#9m+Qd&oxjL0H!)S`eo^!Nawf~gE&81Ye~6RetcoN
z!-@fj7|(p@zW1zmVu#!%ZkndM-8gU5NI@HbJ$P(v45np7&OA;LV0{7IN-5sgm+t29
z1-h8_%<H<@eh73|aSP2eFufUJI*oA6IV`s&l5zIWs|wZLM6-MAQoKGZpB>I2Sb@Xn
zrm(clnr=UPkj%+!{<vf9EZnw_B}^s7_%+D-lyCiQ{aI>p&8`27OT3Sa{Ba=)kw9jL
zaCwvl1P47Ml8T|2+|ZG_jjaCQy!a8cM0@H<Xc9cGw>)CIi2WZwA|8^06C)%aWr7_T
zEs^G>P>q`#{Q!`I(W3)!p~~eAItB**(%@ZW=9iy)(3@R~{de?p_=fb4;>GD>hxZ}q
ziaRt)d5Xv%D5wJ2;_Va9SZNU1vHf8z*UDZNYAhHQJ@bD<0Q^_Ljs8x|WGluGofh1y
zXZHlP<^jPHK_Ubas!1U&R28Yp$m$hJB@z`c51dnEBK&LlWx}^=g2>TrOi(Feu2iUl
zqmvjDxoqiHyHrWJZMSny)rP(F#D-V(E;Q9O{y9DNdo;EGE*8W?yXe^HFvB)##+wjj
zUj)LI00#a>{?*Y5(M^&V^?Y)QU<5pehxnIa6h^P{$@`Ci<%~WE*-ZTWPH!G8Nq!Mi
z5qDmfW0i+RK~MSqJ${xwx$mpog#UGul;D3W6Y<An9h1YBcbJuJ|5mD;F#LZ%`ro(#
z?|$?3|7qi&E$7^-<^10-eLwy>jiB&l=pu>cy0riIBww0>54=)PdAt&oMno}bUfE8U
zK93sj&7<@O8ctP8C3)1b+wwNUgn!LaMf~4plgEh>%*FCH=n&}h+I{?L+)*^43^X9o
z4|%zYajw$k8yLn+=1fuqpZSX<UK-1Z{Qo^GG1n5nh)f?;74HZ2w&CKC5ub%B9!qcp
zVM1vl<K@ZI?EdvuLF)e*a`SLpb7RwdFqGz1W?U&X9_gY2LkvLWD)tZJiA&SC$1}v&
z<i+|Hs_m=3OcRmgzcG=stgWRYQ6&d$RJxyi1ddXYtI=`(8=_e8j$z<}2V6O0Sf8qR
zMbFey;226Qj7rg=^ihK1&s+>+hN3t>zrSSvX8r$r-PEt5Mlu>?)Lsnd;lc0tTSh3Q
zy8nzZHeVMt%fW~#9<RbJ4_pNsR}<TzE;S(EqRdz{#t;DgZ?`=E+pQ{XB|U6hQ5(A8
z5C`Ia4#$;5UsM85eT`E=o&_>zC~`y7j0kBeH4;|{LP}Wt+8|E<+DROmN3E2xA^y+V
z-ow54LGtI<ZKCZ6wDh8Mm|tFuoRi!ZZLC)*C#EXil1URvDQEqo7M?t>%Cavm6;Dx4
zP$i!xL8+F-QaWUim*qb%Li_KF43H7wzH`W${m1rl6)G(0i|exDW%IyL^6`BrYy?k2
zxG<So5{w#DT%(T*Li5Y-#kQ0GGf`U;-3ijtY#`YEr|N$;^QchaCZ+cwUMVdOfkBz^
zFDhfpqDsGFj9Z|VF+Q>;gC>Kb+`H`mSatY|i3}BetP7MUkp->%a>V)1pm-|a46!TO
zakbEtx#e2Uk44aB+Y(Xaenu`0NQzSI#;sHn{7pS(++|c3$3YroJnzJ{WCbk`OthSz
z>-}fxIsbigxELD)gWj#|sx1+Jr2LCnCd<l>*V8`_YB?*og_f519sJhI%;a#6p;;H5
z+lS)cd_RZ1UAHej%aMe{*-B@VH~lqk1o_bKuqMXT_0kBUR?!#m4iNkeEqt*j6W}g4
zz*G2ssVfEqC=HMKi*LE95sSMw{9CN;<37CN7HPU_OX66Hgf?K557@9omdeZXG72;J
zV`LEV@_3b%S{Qv-gd!~PsV{**9c#&fy3Gz($`E+^x)Vgf*^+5Rpv%NVX4!yWn;6;f
zTPESg7hpgxY+K7RUlT2UJ*O%O`8SkrKG{9GHas=Mh%mnIBS(6LQAG+u=Atcs$ld?9
z5DfvfvV9y+Rywd6$bb=#Fnk0!_XXfAfx@)$1`v=!iyQ_-iOws3mhUU&PdHLK5kkn<
z(ru`5!g-lov09k|z`iBSKg^+g8c&sjBq80uS1T;%;f~2?wlD9Rb@JR;{&qnU;uaT$
zP9a*F_LqKo$tL7X17POQqxP7dWOOD8ib0KyI4Ed2TV)2x`OC-lL_tblg133JN4!QJ
zdOIPtaf_=qg7_jqrRW`pPDl@4v@u^+9C<It3vPFKXL<a#Hi$~XNwW`!%#M7-Fo|rw
zDfqet`h*`f{9EC(Mhjez&x3ZSmt+`ZWMq)=D;OCW-`9L09$H2K@F`|y=E45{yFw<8
zGp={<y4O!D4SV>XYvSXN4meQW^Bar8hD<Db7@W#07Cj>EFR9h(Z#IU1)0p2_X;B^r
z$LG;x2p*?4a260CoZla_K}2{adERJUQQBS2IahccCRmoYHkH*@_8ry9)+_#OF4UV%
zvL8+sa91>@IUmjEnV9^gzaQ~q*~|3sY7u3CxcRNObG@vg(`0~5!rOEqN2rxkyW*}X
zqF>k`jACK2`+V7=x7TL)&^;!^;B>SQ9XE~Le66Z_IF-{=!&VH;PW^_2l2BS?4o8tu
zz)^aOQzWdX<jv0!%%n&PI8kb1%{tfpl&{O)&ihxxHby5gK;houbZ*N(Yt{KGs<)m?
zYR_o!VH6Q>5Wk$TQ(e9ph6UX{%D2B!0dIqUO@=YTEgIy3M{cx0If=UODC@i`r0(Kq
z&*Rf(yRWR=C4r4ZY?kqM*uVhRl*HiD{QT~K4fR)@@*V<o5Dd#Nb4tDQ%*n@+;W}h4
z`;6IrD&AroJIWhqg6wvQzZQ~%5vAXbI5rCFM~-;Zrs(c2fDH)sC38lB@bdBgdV6y-
z{>OflUij8cl;Y7+OH)k^7Z}-N!jVY-zhO{zMFksvXj0H^TX8ya;CzT0qk6KciZX|*
zf%+@asp#qTZ#+I2ezL(2iQWdQGp@fkD@)ZVM~mV@XSrl~*u|xOtB5yN1>|K99@sT3
zSn+Vm<O$C-uAiFf7Ft_eH7t6RD3luym{Hnp#U+0gwpI9(xaLrTik&PFhJ{%l?#tf@
zRY)&)m2J+-wI9)wxVQ2yV`dilyY#3#BB(n(bKu<6MV@BDpv+6g)Z2HvKtnK`!mjzj
zIC<jLk=I!RfvQ(U2ugmI=caoWXrPL@C}B9sv#(0B+{!ITu~<v_)wC?(XJvfm&+<c-
zq$90QQv=5(+==KE?^z?c_2(1f>}fwWUd4odO`ky{07-96DK3|xtiP^8>uK7%Ce>M<
zX#F8AO*3HEvxS6;FUu!0>r~3rIFCzIp{B_AH=RBrW&Pon@l~$*Y)#J6Bq2v)rYPUh
z)dR$ADgqN3mOP1=@uR@V{Zs_Yr`C&~<aR_o^)`7*3ss4K@<O*iX;pbBfBHL^Is_}o
zDA<Y4Lae&soc9AqLXa#Hk^>K$4OOqTMAldACy`<>PR<|M4yD^DoBLbaVks02@y?1!
zta>bCz&-oIXwR$MS1a$apFR~twHOkR)?jK?Yx>^NMr&4j?_RNAn%Y1;0ZhrnBm*Te
z+gYe7F0B^Gcep<9lqA&Bh~u_btRNq;)KuZc@fFW$@Ql57%~~Me41upoM%#+l*?rec
z<>gc~;Tv>R+N2?_JsOMNJ#^NTuY^#{STzKrOIARBPwd?Gu!M%fwAy??8!wyoPT$Q{
zfzRj7^X_x~44e<L<gB5|r^L_N>>tEh>g<Q+HC2;;D?#z$zx%5r5WH<BY>x=CEHCp`
zpOezA#8XOVW$j7vzMWN#P<*^@y<V!_PRF(4i8}*QC;)+OT0?zTgV1k_8#<tkH0OM5
zc?eXhyZ!_N*|<!Kf91;g#joIwvpmjj3As}wAnDV9`t~?Zo=q-Iv$6C1>Xy|!8Sj#Z
z+9pW2oUfA7v_C+wZCaWdXVr$#XZe^1Jx9R8a$(kOMZUl|T*aVgnQxH3_k5@fB?0dy
zxZ*6`o8T=0vb>G*r^1~Jf=O&b`5lz{d1wpYz?U&N?!379!z&lTJA}+pvmgX)^@LZE
zJs(Dy=5;-aFBwFd_z!OidOR2DU=7i_{U9Zth91w)a4;Ztd)Kd5RV-QEKIIbyZU<#N
zZj{^zlCj+%6O9QkcCeN#B5GxYb58UeB;;DX?=FZ;Q*DFh551f_x9D_SXh_11T!o+?
zxFG>}pmx2hChgZwhvs*SvCD0zbEyYDim2o!lcV9+*Y1MdpmnvPLQ1fcBqq}9Wf;h_
z5Hj+natUy)>|B|@rM5OuQw$ps?H9IRm0)_h^bO#-E~^Mhhh&=C<F%#wn^$nZtUAeI
zN81~|dDnsUh=0qwxOe8meMHi&eVX)vkw$P_-)ORLZur9|8O~z)d_TstUweF!h=?6W
zOo?}F#E$TKaNqeT!qH*b{l{{{C@Q6$#FFO6Kh7>;-YkXlE-pj)Q}<d2dvT{<yO)%d
z)woM7nrn9C+jWjZFs7mz!zHszafY<NN5#e-p1V<^lv#4rk_L4|p&CD#N#+_(p?zD?
zr*D_|wszq(@Km>d?JDaZvZDrE5@nZ?ue4`vUG7?zmM*+beBrvFv}Sr#=4y!cz{{vN
z8v<!=(nR?Y;&|G-6qJAZ$M~~Qx`mYSXDG_nQN?9{a?s@Q8+zcZPa#{*apWlCo3(?<
z<M4)?hYvE_;q35}JV6;Awj=qAj+WXi39mjL_gUNs&Kib7%XM^q*{GaKOByodt)14t
z<gXgF8r7Aj4LsNH1@;7YvRjw^OdKj#+;s6uW(BX#e<=NU)wp)bOFdWkq`j~SgBO*O
zs5)WWoywj;c=}l;kY}3A%f#4{F6*h{e_8;|l6m}u>{v4fyZ|m$ORMIU!7BR^=5zN?
z-{kDv>kli3u(;nl5m8gm>Smn1c}3I573!_wY#L5ZDVAw$JROOlqLf7}e`)DhSN7O}
z7s<rDPVh=sS6O}?S*=uisBQM<`KoIk!;X&zV(jr@RXN`GLk^vuBNw-2>;7AwOrQ4L
z<c9&)4EI^%9EY;;L}0KM_K|dEXKMvU77uyG&ncmd-iAgCS69WD&KY$Tf&%+^?{Ji%
zG&mtHg2IElZ}Z}^;{SgB_EA7HLsNZzaw+xeRpMHg?kO}&c-76?et2S!J)_kodp-)_
zAmoKJ%Cp!X245r9mM-rLDVlcW@Ta<SMdcIqv1o{t?p2-B4W`EF-ggR&iHIMki6E0P
zcIO1)GA1X+Pc{hlR2vC~T%b1eEtfnke&I@94_L8vm1&NDdv$wgRma4%P~GPB+?uFN
zggm)jpGBBUuB|SlaxiVZ6u+KPT~p&`C-x&QWH{mw>H0`XX|B2aSC3(j8_p<X@U`=`
z11Y~^UE8mwl@Nlupd4%oWFJ7hp?~u&*iscryGAjvXG;XsWjC~UcXuM!o3C^!fn!^*
zBxw^^Ar?*dE7e2B@e3S4vw4;_faN!s5<uFUZ*oVch!baKw2--AMFpYlUa9#>a^br_
zSd-o|ZinT4wVW1<G>2u>Z@{gD<*h~`29dYF)3(ef;>NHotZb<F!0B+HjFaso{c^wJ
z)3RTR5prF3%n=;iZqX<xmapKd513=+yZMNEbldt$ltNpb)Lw!OlkL(XyoDh34JO>U
z{U3`~<3knuS5NQonJizc9uAQLh0kwgaaPmINRjrb>v?DJdr_~u=%lDPAC7Qn8D0?~
zR*E4L+Hm5n{BAw!=K1RU8X{|i`{!d;(1g2mM_NUy>-p3a?S;EE6HS=cqU(~k&$70r
zsv1XreuPl8iy%qldl(*UAYcH={N$M?ox9oU{xn})5K5|ZA=>Aq%Fsw(QChH9H_{{Y
zHl2gwxah}q>%~#B%}BR&O<(PJQsnnuo21!NQtm~Cfqiz9PnZ!(T-t7dA(rlQfM<Tq
z^KiGAGQs)^v)|e6*EGO5r_FPZ#d^w35P{_)jg9;pp&o=nhzDkvG=x_`-LK_(z7B0t
zuIQxyP88&+2#YV8V77m7yZ*DDymjZGl1h0b5==XMg-6dOA^rXJnwk5`Rc3plI5}7=
zC?!Urn;QPp1;tJoW^eGk!)wafD-Qegg5Pw|F&QEajxBc2AVSGW39u`OAQQwX@+q;P
z;`f;XMV{MHINWnXo8fvMZc6v|@5Or|>}ny$tiR{?vaxT~xTY*I7bf|X3B4+=dR$#8
zKfWPO{@vzowT<U}NE7=S?Lj~Glb|3JX0^V!mGflm>TfJ+MpK0!GQ1VqpQn?vC#v!m
zPUmy`hVXewN^y;LVuak)qdta3d$T|5S$ToFx*G^)7O#LxnEH*qmiZeDfh4iA>?<Qm
zFDSuQJp~L-m#i!V65?X&Z02klHwbb6E4|0%Cz|O~(0e@0f$>A15Rxh#z(4<WospI%
zV3pEOAC)4&QtG$1fbdNEG;T<WIKA=qhDv0S+TImD<~tWv^1OpG`NNJD?_W6j{*ugO
zk$-SGMjh&pB85gP4^R2t=BWJzQJiiNYGGIE{RMe)!(s-yCA(+K87Mjb_&RW>XJ49v
zWP=NFay7VWo@&+$2=Rj%k7GA>LhkT^s%@n&Zd)$1&(r+!i4VfDP*t@~6TC$jkMc`G
zr<AnOLJ=M9U8NE#)x6&16on|#_IK+h>av9g@~x$PMDU9_ES<-iZqe@%NRq6=?|Zcw
zf($12A!H5?Nd);Ra*zp;M7ZFb7D|(i9rI@)>){6t7&CqUldOMy6l_Fu@If5xkHT;%
z3EX;OAkjLn_z8sjA^WcRK$}lpfxXZ#50o_#Ry$m2NdbmuCw!#LL-Y}7a#?;#4~W4v
z6t8C<nA2_2h1T_~r}H~9ijkV5lrPGA!_<zjJE4H$xc_qh1eG8(yJnQQSZXUQMxWGr
zVnp{Qm^Fu}vzd~{2gS!Q;_p3WHndzUew197sXnV(>}-_i^lR)4X=`!kH9Jg8{584}
z;8&1=U1;LKp5?b-5$w2+scN+p@3}SM(<d9T{|csugM?3CLm<1DhfC2(39nm}+_B2g
zdeaYER{E8f;p1w<I2ziyAeG`sB#x8LEII6TD~1&ubujWqjbo8B9(FGX%^+W=%orXA
za%RTY#r0RfiJGDv1fThSQtYtC#9Lzer)4R|Ko9pMJkre!OSbZ@`_^Uiso3~nvfmj_
znqtANi~j7Fq5N*G$tpby`z(>4aq@I|dh~5FlXw9*V~=f`uT7N^XcL-3PCBNh<}82N
zUzeei%@)au$UgVYe*1#>UUbu;Zu&sG;ceT&CO1%hDV~eesnrtt>*7oBvF<;E=2h^4
z;Rp$q#tu%L;aV6JXG1}tlcVB`|4L0oO#;V2`%d<UF$yhZ1uVKgm=inct*W8a!Y)3O
z@iyYuGqij%d#}N5R*Nrcldp~`DCZL@T^4+34&|AXP5bJ>aGFxF(C=!4uoJ5Bb_T5)
zZrRZRhqNHZh25_+F`Q~Lb5#TL-0ac1U=?z|;J$!TH*`wB?~*`XHu%GmPhoM&#K3Sn
zqNKl{N4EMk`MY2|l0RZFLEzdyhuSE#uyKkixAl4AVVO&#ul@8JB`ljLD=|rah03-<
z87^sQfNv7bow=1yMC_R%+QCp{n>qF436PcU_|3K;JLnexk1d@ytdAoLOP(dXWAWCA
zwjHOLjz=KQ(V{aGj=6tcJXQE>#<oLyhL4_IL%S?<Vs0D<PDr-_J8<^aN+m{vzjHKU
z7<x?s1B*d)mTA_?dvg#yTWUF4W<ziF3$>HGBD;Wo!ZWkgRz>xb2Rs5ZCB!h~vT1I3
z!2E&`g_}RxE!j{Ez8{ygRoc*V{f3kF_9Fq+rC`<gUIc8`k=kN3+1s#c^RQkL-}4o_
zcH#QErWQ^oeq=Qe5k!vn>#^$30r-$OxA-DGb`-e2qQE6s{MT}S?V#<7{>cXsYMU@r
z+*+fsQz5|i*MwoGxe5BZm}{PrSa&&eca%<OLyW36>{0BuSFKUA71v%hc{C?4X;t6;
z#uT5<6AlH0`xA{nEbXfGPX2O+l5$SxHL63+CfOXVY~<LeYYwtlqVLsRjP(9bmX8_z
zoW{_0=tx%P@f*+4M?rwaUI6oD<5n8b@k%Dbf^gBJaVk=6h+)8qy<m8sBy=Z}-lPtA
z<c>jEc!>&@al0lCZJlvyS7thsaqystMu947ml2^~aB#2QiRe&<og!_p^oh?`Q|t~!
zC}T)&rU~dRI({WHQGfhTI0*%QOC1K<O<z;IyG1+lqBSKk-(0CYX5Nqxyn;4vlPGeT
ze2(q73%A7tZVe$hO1mVoPMZS$U%P62Qg$Cv)FFei<Z{A7He{T1_4+F@Vsc^eVAup4
zKXiW(4g}GErMC?Pk`U!JjnN*3U^irewi4$9^hr7ceWl$ZM4BUm13;o(ax54UGw=RL
z*qvKi+I=yL40cnyV?gAHJiUpbSB$n1chXe+8DOA!(r)O5Q2Q5Grq}SPERrA(9H9<F
z^#wJC&g#$&oqm@1qSOPrOR|!K%X9)M-k#W`B1wy0sZXIgbs4K1jxKZIGBnL)!A0p4
zHely~#Rq{`w(41MbEXbF5_ylm(iv!6V*apWpHXKFyc@;`N@S~D-uWQ+`6-5zF_9hm
zlEK0DEts+Br4gP6uq3U#`NAlEr!C}D^+{s=3$5_qs<xIU?j089UY3h`A%60w32}y3
z;eDdDn~n@J5o(-K+mR``Yqs;Qo<W9V*6K?(NX-pYj^APAQNKh5CMkbGIap_A=cXjg
zs=a|PwiN`2s0~`v@pq!=*T=BN<zB*J^f*ZVcjJWnO>TmmG*ytQUlGfP3=0p3N*zoH
zv9aI@Zudjhqd^vj5hsjD2?7&o#KPhR@YYKB>d7A`dD3zO*l5^wz+F4Xk7FaTqoAi6
z&TLPFWtk*###4_LXw#@rwK+9I!;wFY9yLQ5Q+zmaCHk14-A6z@oEZXU^7o(OLAlax
zMV!{gdki*|6@(T(DB}E)h$v#iW2X^5lLcOk%J|qszsc1i-1aXtQ@<^`Vu}qbcED4T
zq+Na%{W5f8h@#eqkP(?)wfqXxCMsv?_bDM9xf+<M2)qc~xr2`Z3Hg8mBfPaP14RX@
z2Ed?8pvNVkZP%W+J0a4A*u%|)z$(+-6iY`=AZf7rfL?u)o=+gF12SM#>iF;7%n}s?
zMm|BMw2E9NFg8t)W7tRq3GRyblC>=B36{jaxo&cBD&bM2mZUq0*u!&NbUKXwN@GAn
zGT|fbo*z5?kuAU}U7{HkuyrU~6R{bmDD3FC7JjcTiJ@Q=+$3ugl}v?%iZN_8)JK*l
zu1*7F*3qS=v~*<rax3qrw&}&^q<(AF1XcVEJbdH$xQ$5LB&y^70wPjmcv^ge6}kHm
z3J7a8j8x($w|A+7ksS&B2}bB~aM`K3aEejT{y_z-V6{Yf#s0W3P7=Ie1kM3r1Wd<Z
zMU<3Oy!a;}Ri9Z<w9>l?`eb(M>Jb#g*BrY;(fAc(Er_JP07(2E6NlVCF)=ZX7ncHA
zj$Q@YZa|H|T;R-{I?B;$<M)X~K}w9!FOejW=}}P;#2NN9kbWs)({>e39;OsF=yR$n
z^%&l6pobk?*ydx5e#8PHdfZz=P=t;WcR2gNQ+u6KSu93;dxGzh>(DVh_C+Rw#Ecz(
zFu}>5rXdpa6H4#`D@HpNc^7oifFQ`cZ&0E-(DRKB$kC(GH#imb{XGX9A@r2^zp-<8
ziM}O1Bo#VLgneKm0^J3({^oUp$4}j@3iAv)7X;&?@m68sfhgdsmB_~p=XTS>!Fuaz
zunB^0?uoN(h}H9|&=_zI&m{|d0J$)GX>62qZ+C6*{6@Bi?i-k3<mkkOisGWO%KaG0
z_*r!mlLtLRnT<c(NcmwSB02eg*+}-Q^-qCo*ASRfMBdFGEA|L}|BG9x!A}|3pp#Lj
zTZ5fdR_Q*p)yYZsYB-TF?u`Oo;7&@;yXJ<8*sk4<&}ab+7!TrG0fBC1zu-i-wm$(N
z6ljV#*`p6B7PHAkRXrsv3a#XB0|lb5)b*2MKO9=YLEKQ!w~qIoSG#X98)dH(ONi@M
zpU(5?aD%lJ91mSzYI%ewbNiZfDfIQ7VIN=;FUaUC_6awUlk~VZ^PobBTwULbd6gVR
zegEU$hW%=dW4k{OP*m|euI`>}X>ULgB%urstufYGrk4&p&sPjY;TYw=J~NsarwIwv
zKP(Jmr0_po@YF~0Ey)|zFU04mNcDa>ws9*;+Kd+O8=A(=x^?yrtE<J0ROwvdZR1bp
zdROTD(e1Jm3G&~ihSp?D$qNP-y+e*p&BhvLfLsv*3F?UOFX3zulXmMwEy&#Xx@)$~
z|H{`fEID{-S2KcA4_R*av;TWNG5(NOonh_ShLU=Dq(}|cB2iNON>CV7)1hxlJ}6zE
zIwA`CZ+;^KWuF<jkqu~9gBgpCHN#f^I<)Uy;P7+$=ZN|&hqP1je)=hWJA_6N^JL|}
zq%BEK`7XewY=BJ>R^^Vmk1RG;d`7d~&LF3Y?fthTBG<={3rJz@1_m7fFtts`sK(YA
zgkzLzvYDFkmkpkI8(XO_-~%x=EZTR)fz)tRzYlE1xObK83fhL#IU^RYd1z1m$P^G~
z1I3V{`sbVr*IA1(-)hQJ1PHL$uzz;udP*s%ds;U&%~1GE9kC6otM$1j?0hR8t)oYY
z!Lq3K=&R@2W?F`}u=(iV*pq;RgVdX+fno34Dr>oqS;pQ<*lMywZt>$*XMZ36M*9Hx
za1BwMfztl{syIMh6XSVqjbp%UPkzkQd20*_VguGixxkS8?@(hZ{?}@JWS|_4M<di#
zGZ9H|Ec674Ucy`H(@3}v>pTYAbf72(t-oI<(v!ildbS4TH!!yRVo92W($Gh`sTukx
z$S56azsPfHP)|Dg<L;~eD-*BpiD}%n9#74j{JQEy)zq#rrV2Ru1HBpCLinh^Qi5KE
zk4FuBDj<JV*OEum(NKLjJUm?gk?fohj!x#zRF?QYtvqOH`R~oMkd+LReML?#PC|bg
zcbYHe^0|}D{mD;TN;Xb^y!Ng>as_DMINklKM*El<7Zoas!MFk`A-VyFtdjvBP#8xX
z=A9Zyk-d`$%n@x9$BY~V(jyESC&Kq*F#I!zi(4^PV6%Z_cmgoSbULaRduau%6+e9P
zf(U%$&)WHuxnl3S4m=T)gnQV_tbE*xjRb#y_2dGbLzV>4RjOc(%I7ey!~FL+CDN^S
z3^*@D-|?o+W`jE77~arxSNKRe9Q5X{suZZg<5S6WAU1C#zH%pJXKAs@G}sp4LN}r9
z2B)uXFz1>PBK9`Cg~>~dsq{r<(B+QR2ZN;dxS^cz==N&ZYQt~2+7AhDws?iy=(CMP
zlfa14@Ol+uD;pmI*_)9|CXS<|3G)dBVM7B6X^d$uH!euCv4V5{7KIgj4~Es5&b8$O
zmLKP1<CL`1mJa5`7Fx89{1t~}3QnTIh;(yg4ve5Z<ufoLB71d*1*(b!nIuH~r(Ui5
zR+k=I&)47_Muj~MJ+^y~_jVC(gfa?O&p3A;$xvA#%mAuj6N^<W$#w&ZncSLFe79+V
zYKAwM8(j~7%?%MD)WH}gdrZfu<F+04*&icYYlAz2LeUNWwB)-WAHZjuAggt1-ChK&
zqT@Obec({*iWVA+UVDQC+^y7Pez-Kb>3ceq_EGLPIT2y5Vc~6kr>wZm&$^j5?PJa~
zPm6$2)pY%+j!qp7Wf(^D+!zHUhI#(A@Q*)BJ8e>==9PDM*!w2uCGE7s<9s$BVv~Z?
zGQFS$-_g}!B;~*?oYAY)Vwffs2<-`|0Y^63MYC4sujEs8W~p{Ric&J%i)X50AW1UP
zq29TqVA10Av4=|+2=kXy`e4iNP$&AEi0t$D0o1NlZ=XP>Q7tAe%4)A75w_acY^*f4
zZ$v(r-<zHeR;CJUj_aJAGj7nub~7_>6tr_c41E<J_LYdstRGPTTMaJ!=3Y{d^m9A3
z?GXMB9uk6H`5yKj1{UhmDJmR=1PX@55XF>_;%9^<>7NmUVG*T^LaKpl0Exoe;e=p<
zKz6>E<j6I!l0cDHJvbx~P%KuvT`lun&P<j$0>6*q3Q%FT5d_Qlc^U_9?{fhh+Y>Kb
zM~nay$Ovo{<kRK39MFgNd%`^dxi;L!xp#jA-%qK3BppOE-r(IwQO`d`pNiZ+yhF{_
zSxo*)t4}|fY1enNc{?PUixP@>M<Y~W){ZBxDECxC4h}WSFEzA?U+60;i|%KPOIl^x
zp#DReL-$?^klTdzI$Q?R{opz9o`k%w%316BguML&+1!TlG69*IJc6%GpN@1!!7V`l
zgj^d9q0b+|G#uVf4^^cLUUAs&^RRz=fKhs$UH&>1JCoJytS}F|5u;jB3ETc0S^--%
zl{k#e7@D|{ZTY-SQ^0ryA;gc5r4~)@eLSkz+s|Ndgr;zvIdY7mLjR%02ge!#D8xre
zZsoOBK6r33I-rL}E2ugF!N*7q9rq8e(<f@|lZO0H%!R}$+qOjJ^OlqlgM>K775gMa
zPBFIEf=Yq}FoQ-ilcW(%+Cd|hGaL=f1T>w7S#f(Iv|9N#DZdjs@i#!GAJsMcT`0+R
zk9N=<f=foZZIM6-_<n&ojrp`Av0uaU>UbMSzo@~zru_58K{*Kvn?G4RSU3l!$E@nv
ziPE5BxSigf83LgeB^o}G!_re~W9)Y+_2TjkEI?~rvi>H!u}iPVu3*0%{igKpY|_wL
z#Da(LEnIHQyRf?t{F64~j%pQ|8xHm3*0Dtaw>2|%B>S>g10{Ox@(`S=Ur3}B#GgUl
zds4U`3-74#&?!fdKti^T^)gBuMcrgXq4JVsT8JRzJ&%oY?Q4<zZg`9~G*s!CpYN!@
z!!hE;j&?G~SGUK@6IBa+hY!Y6V>pWV{`d`|USzSOaErKkPaZ!vjYw^#MIjsA+&d?y
zrjMLqk6nVt^JZ&{e6zvWolY6QJqbll?Po2~2_q!@l!mQuuBvX8b6I(IR%-_V8}Bda
zXQ@Vrdb))fMPjZf1eR?gYB)x*sN1SGIMhRBQ6JvE_tqrnc9F>-3YLgmVDi$YF&)&g
zUjy~8{qS!C=)v+2gw8z!=1C&4lXI)oXCe(gn81Yep&<Hnz^gz}JBD|%BO9s>9Xw?b
zE3_y$6yZ*?gesy)Dln=(>%x2u_H{P-t|x-UEa!-h3tD|%_{LYg!ZEt&FCr2u^^8W}
zVJveRSX^O6b@Yf4g#f8s6I^g$(QzZidO{&54=7&_{#4?#9C`&#zoCkQ#l_S2K=3-V
zKbZYyDR8pOw0fFZjMnCk>g_>v0w?LRCQ@Pg29tYj4J~R@su0)P6{XD+{{($WJ6@Ea
ztPN9@SxW9*W8>-ck4hsLjd`1W2_JYjwy%s<Ed<7S@~HF9D+1RrAru1#_Hd9pYzmw{
zXS1`{MmTcBihIH+h(x?vb<Um11m6dKqK_}M?faKq<v06{?b@>z7AD&kI$bwe3oF0#
z>i?am3Pk}^uAZPhyL;=O);Gw!JvTPS5=#G(+Bgrtfi37mSsz;r-<axVr)dAN$dEdw
z(tlz<`dF2h9fd-)>!1tqN{+&Tf9(C2Te#UKFg~H2NnF$}t1`H`WxxgtFxBx<NBm%t
z*A^zCsHV9oWuVtRZ2sg3@%S65c)!KG8o#?Z3=XEwGr?=tx;>>mF0Rf<qA{V_wV=wQ
zeyt^$Q<$%HF`3=4y<Lp%`WcSlzBf;$P)t58P~2!^qMQ{gsk}UMdDVT_Y%wBf`mrL9
zI=iuZcrpcZlH1AnUi;7O({OtIfL;${`ezPYG}9rNDvQgbDJu89xhA(YKJ|3_tvJ>A
zZL7KQhoz}b?)7tmA$oheLM#%@mNT8h`j}MvVR{~TSM6p?-LHqy!&Af32|cOm+BIe-
zMV15Ug_n1Y{-*(Hv71ZF;Htbvqtk%l^qEFdsjZ1P*%Yu*cA1UGWxhny{v69ZnFuYN
zRnY^pKam6tH4dJ<JN?DcROjW#Md3R3TI%~$&i-@Qqa&NO#nNy4$^1Y!+)dB3rsTV}
z*-e#R4JT=Ba0KmZZ80-40jjO0qRIw|U*8pTy)|<J{BS<Nph5deSz8GY4^kbD8fIx|
zK$%L9-QgH1JgS3f0w#MC`r621D3V6u8u21py_Kc1^R=Xp5M(yf4H;8110MwB?`oH`
zazsVLI63C>i9fCRht<;GRjo<+nw}_R5bH5}^Jhx4H|52B$K!MDfTi1sExawXP55OC
zUlJ~`m#*7j{uG8nz?_U)BswvJb2s$aS}skk*5H(@ZCa=#zjgUcR2B>|>RTz*9ZcqI
zI{f5)=z2@)g5JfltX_@UdOAMLvy3TpTJmN0j5#C4dx%au5CPi1^W<L|DX4!~A`$Te
zhjBaqYZX4LndbpkG|#j|l)6fy^F2>XY_S;LY;^{Xttg5?M;F5g%pb<tSFs%1Yt_}P
zEq{B@=2vZYZrdD%Ph$${={+7)R?oRbZp=J%Nzui`;g9bl3fv6#kDnn>?Dkq_&lRiX
zl3jU5uiVPrtv9n|uYI<|iV#GeJ&d-ks6<9IwI^H5?Wc8h-acGLHcmGa0M?U7&7ZS$
zzP+jKnN$o^z+X$VZcXE;Htq1A6Q&<939Yhz6u+7^t(-Uh)=BE^+Pcrfn$6*!_!K?(
z0gJhVL6>t|3*Lx*0ac#OtU~3(f0CgO2<;qV<Si$<&xCkV2?nNFoRr&(QA+VKwx9ay
zv0~oy7x(l1O}Z9R;hbqR|7b#$??-Bu>8u7V%?$f<a(Cd{x_Fxfi+qQy*76FefDi?8
z8tVOLEzrzDObQG2>?Cp-vgz_5a!o0KwrR{e1M2btMTwI8RBrZ;z*&f{4yT;<$$&p)
z{=)DD9o5rpHe<MG)06u5pQ<&3(-xy}UcIV6e{<TS?1Ml^&)OMB96*KaNU3l5z^mnr
zK`h~N%A!}%7lUl|Ao#SZG4n22bM<tsSQp|1I`#&E!QZS&yzJ;6ytsb(5p6Sf=ytCi
z-Jd=^bqKu+y14_cYVJp-2Fg7<a6QO@gR#+^OFWKLYb|f9(NLk?e+);lRD^$7J~Htv
zouRcYKX>#OylbV}N_Q+8Fs^F9yDWd&9TjLh^?$yq7m_GA!R^QollmmjK}DOS63bNp
z%jeBOr%LJ&qZ7HbDmL)Va1FEL182!aC!`lV%;ANXq>GUl87(e7{7I6pCsYRp*MGho
zJ2()QMiqqyN1P4Y7yTWBXB6S^vAi+87KGW1?bnYDGsHD8_QG52P|a(hTK%&dJ%ztW
zfeG>(;g^@;maCiRJv}V<eX34R)1gh<ANvPJu1c0XZt7T%p7u6B3)_qRny2|a+J*KN
zbGQssr_l&D_-e{|;d-Xg|D(`2)490N>7-_a!(~^XUQd@uOH`fVlj+?c!BP=7x`N6|
zC~Zm0KGx$w`TVl$3AB!3Y1VF|J^E-D7wrs=3@kh|`OqFafBbmmApu6=AH(-LDGFcj
zfp4}ld>l^D6>htK_A>0K;_QLa)E!tpp6~ff&X(!53swIVWtd#Rc0JimyY^&B;Ukop
zi4?B8{A5j*$p5y9ZMx}vrK7u6+IeD4DRV_{d9-ZqN>`}mR$gXg!?wXvdvtqhqIAw-
zC&OAxw0$WkLRYwHKhJsp`<dXZ*Yv#ra1zJy_;7kyr&X`H1MXd(r)|2B3CG7Qkw}jJ
zX#ryEk2MH$q{c!O#0ag7=dA^AM8%AahZEVmNUW^OJ&#-Fu9wR-?B1Q9Oqr5Apb1Lq
z8w6!~Inl~yZd9b-q2ncLK?Wa<Frb*HW3)#(NBHlFSaR^m<Ouf;dHy1~HhR@T@&`8I
ziRr@C8;*0z_>67dW25SwxoL~I+ih-QSjudF;y{N(Y^(V)fc3^>ixtab{E*|`IUkXM
z&9K1X1j`raU87sGah`g$DMZHu2^uZwYfs?r*AcPxuEyd1t-6}q_;eo|ZmH#9DyON6
z-dV1M$gSkYAx774W$;BL3zjibA)S>R;pej!YuDmpoWny75)9cC10$qVFYWvcB$yIp
z#XCA>*PaB;EU?AAKz`Tt<o>$A^&j0OXu@E!AfHFK!&g%lJc#m#qX$l7E=JAy`dsN*
zl@nZo*(^DMi4Rt)S7GL5%Zu(eWzQ)y>a5K=&jI}<dyE1kl}$|_7^$K*Uu%*rw?v9n
z3G8VKdk+rjn$`)7jxnm?dF^qmUiT1VVsIZsf5+f_7MR|8tIgcxUXo7c?Bz8(v}`sw
zeB`n*5+_o^TQad}3V)H`hVI{9v^2T$4G^*p^@+)qeQXP&WubzR-n0w$4dQ};7re!$
zaf`!Z%Xq`G-}44}^sZ^x4q&J?!XKCQ6j*l_2>ZSf3h2hyszHy<|D7PuktChYEF%V`
zN9d|>L4B*{S4JjygZ#D$p6Ln2x($t2wJ;Me4ni4W`WzJ6_MP<s{B^oxz2jpBtVXQ9
z>?mQnD++-(8i5x9)3T%jMgB)vN`IwEM8BH}j;?i=4R4!SI@{G!7e|_Gg~GVdshqms
z+nwcadNv8xaWmTco!KgJ2r_tFl6v59T{1^Gup0!`1C1wDwVdxUmOXdote2dp&R8v6
zt{b|8JZBpuw{@<<JDa8_kDh#pLh9!l%r|q3Ov8(lH|?4tj^AU|+8-Zo@Oolyv&)#c
z@recI%v%emqvZ+jl~d8cfq&=fh!GUv401K)n;lL3{xLOy6Yclq3)XQF)=&`x$>$Z#
z>dg~>@7dcwXmnoBXy-@G`W@FVcWfOcx<6Mq&OfP$q5Dl+68eSE6IZKRneqOe@qGLf
zYkj|#ROfB}q*|V+qkvO^gz_vs!_jkmcV_(gsNU;|B_qXs)z2=V6wB#d0)g>Riek;a
zp1a{!LU|OK)`r&A2hPx9rn3NY2Ib!>s;NS&SFA&Ak22?No|h#JxOUH1N4`{+`aNfD
z^ecCRuFB3@-do{6J~A<!NEETpzBQrzd*pu@*;;=+7P$F*Gt1|^P_dE$^w~(|gR@0p
z$yZ8h=3Ctn%l#1Q*7IF;5#Wo`c$jb2{%{x9)s`$e!NW_~v@X}Xtk%!BaXm-u6D)WP
zl_Xake0!?-Rm$n61;yh`$^XD*bk(twnai9Z<i61CDzNPGQ3Kz{MpN^ANyu<JHTtv0
znf~<Fgh_=!8Pap|XAmVFC^=x)j@q09nVhah-o8J^A&|Zw%j5j#pJU6?Mb&9oTBc;R
z%Z{UB)xji=$_X3NPY#%3jOLmNma|2>7=AL4ch-@l!fT~vIDZ-#_3hc_c?`eQlnLK;
zyo=sveL5rP$iD3pA#L87chtSpxeqP3OAxuaEoJ~TWDN0fQ8=Ut&kG2x$sZQqSnywz
zUr%b^ly-l66T(eGmi>+~ddKClH0b!jzLUi{a7jlZKicuhplD6IyiBPw(Rm9`Au5MI
zCp{VEJ)20gsjh*0DW|Eib3nwyS+sw~oz<hr;^ghH`_fre>WEfE0$Z-1;^|hxR6)Ac
z%~PA>!y0?y?^)!$iRi19$sg78<4?bf9d&QT7FDgjx*r(};Cu*YLu^3OR#>hIblqmg
zJf73M_ILln`m_-J%`0~2pugqB!`^_Q1j{fQ{@($B(XPJL-OFR&`L0-=4Ha^_j8nt9
zrv_Mln<>GIfYa$V3I}ju_xbVKAD#+hf`G5RFk)D=%Gnj**R?AlJU$_!xJS>&Il4b!
z6BVX?q@K{}NoK=K-NB*y4{DhL4WG1yye<gHVQTcp-(3V?qvv{T-&0lEkn}?<jgh=A
zIpk4mR&S@ld+@m6&Z=2LRo~9=n~@(rv+a=PHlkBrMkXZI^9==gAIm><BU&8BSA6Px
zvj~4)#m8=KY!f&KYRJ|pu<N_2h)FI!u_$J5_WDX7IV+9uHrWi`M02>kDSY44@?4aO
z8ZEXKm!A1{_<Sf4{LsntOIzqPfCh_f_seeBM=}EH>@=h~6IDd~A$k^;UIX!=`iEC&
zJ+E%M?nr@KwR?T!oh_z2A{r(;s!bwLwp^+_wk}XQ`d&5El5CK5A2h^<4yS6w5CGS|
zpvY#ZfjZ65Bp-FE-7Iu?O^z*hD)PSUBDYoN{K}^+C-0FUFH))<aa?MylyqHUF6XoU
za$53pCE<}&O2a*shJ~>Q9)E-}jYC*OWZp11=Vvyo(^PHzb`$<EA!nuFwBh%VO+K{z
zaF`%PGL(r~Tz98Q-@f%TsNYjWnF}QaUEEd})7b=%lcZtoxJ^2@y{x%d!nE;Vx>Aji
z%5I0mB2uISnG_^OanOkHje<{`qOYG{6%nVi@cFbfJv9rH!;buZ*<#*!K9SW(MVbPz
z@-u`fRnbk6%0O6*lk$<^p<8YOUN&wg?%#8i5|%lu^P6kgYnp1Y+shuMYLEB?o%iai
zvKQ8g<=7OY3Y&l;hASO&z=?}gTb`MpU&D^d9pmv*)(4biCaTTylg^*3ua|7?@J#P+
zK2Pee4Ify<sc4kqph`=N!%#rvx?}pJsyu}c1lAoezhR_eJzHdqY#nee5QKGH5#9TJ
zv;(251^geH?lLCMrfmc8F0Sq3R<sl^?(R^mxVux_-RTx9(Bc$*aCdiiFYfN{{@wkO
z_Xo*_g#ZaN_cb%uoX1B*uw(#bh|_$$dhuv;tVL7t>fN2x_{dn9+b0MnW|Obr)h}~z
zUR8Gu4Gn>tmT{uNw(2jowpUDPCNndpe{_Dg>jj39+bX{KzTe~HeZeB~jmRU{!Sx>F
z^>}uqi&fxnpVC?ziS17Qp+9D`ic<Px1{az3@`U)}2Z{R=rk#$v+X_|Mpw>+-XiuYb
z+}-7ono|p_&&j~OcT*y6bhqwoeT5bzI`ntZ^#IXeoj0SonC&&6dce^)mTDjmA5K%n
ze0sL$jKq17SB;mC8Ck?;b_Om_e)MEH-s4fC^h4=N&r5WHhw<7VV&g}g6gJenWUbby
zEi3EJi^hHp9`D6b>F@TlEUC_GFE>Jlkx)pKF?RO-L@@YMPmX}KSy{DEURS(Sk~?zN
zqen=vgD(Fif}+yI*y^R!z0EcjkgGYcM8%T#PTS-wUksv;Flea>jFy7ruIJJNCK@15
z4y3q$?sdg~;E!R_bo_+rK&<&Rz#x^xivZGggIVZOPv+;hr=?`^=VU-NIdr%a!V~8Q
z3)v8Ha0GZ-Uj1aL7J_i1MIeKoV6qf*B<b-&9vxMHRPt)q{wC9wdgt-mx)bY!R8KPr
z%KxOU%-ZpgurH%XPlG;?CGlWZ?a38_Im;KOIPA4gqUw5*G&8MiB>Y*|7UOLSkY_dE
z1rGj;EBFa-!LEKyS+&qYxUYz^{b7c9d@Na+7<oaL-KBoSS8myv(ll8avpSA-zgkYG
zA;o4miJ6MEj;^7#R%mbkX36^{FNu>}9ivJ>)-mHVtY9!f&vE_F%9*T$IyuJjmPp}^
z04p8INfX}*IbnZR1D%8NFa$X<OJjf;ni8toq6Lvx6tf{G>1aQlc^YT$O{5Jr>b*3+
znhM4`3zTNAafT%)$%`qF;02C<TZq-=w?1yBG$e=-#m3eo<+d0eF(;c~c8`K{YrEO4
znN~PkshwsnJq(M@F=GBnNb}PeTZ#uxob}KBnTEfgT<<a5joQ6+2u<GrCdz{Q#&?{y
z2uka0ZXnRyX|d-q)|{)YwehB;myL_7zI<`{VstzTRSk~$x3U*p&eVdy$%<UuutvCX
z5t)ps(-D_MVk$(NpXsW@Bnu0-qPWVwH2YNW&9NT^Dc`l;PIkabt&4rRzOL!T(Lk?M
zz|;8jqIT7e0HUZa)*3~vc0^K_%S`!<&^$I^hOy0!{S~>rSL??Vfx66M9mvI>?rQh2
zDBAaLI>+a`fvc=7bDkA}hZ~*st0@JMe6}u5Lwt&D=ha2Ig_46iBCNv})v&l>mg<Vn
z1yvDS->f@Tr3)V)Z%!4A{y^3}62flpbX^cE<goHE1bnUTLmnV7;<rg&2X}v5IR<D$
z)~1QMA(5Dx+4#LQ?>_u^88x5_z%;gbi%QomTB{Yqc|@YoCcsw%d$es!gTa73G&OJs
zQm?G&rk^$>FoaH-(WBLckybg}59S+(Qn#aq56U`iHU(pD;>3u$;xJ$hQo`XRSg?6`
z6W?Bc7~+?dP%u*@sZ@5M`>3qN=X*PqnEssc`N+U}sd4iBVej+9eju7gbWXUl=DATj
zD33NjvYg*U<Ym~}eXbA2-u{I5qJhPvNX&NSz()Vc*Zj*+PXMHgl@bFBoz>Y)7z@v)
zbCnn>=)Stu$Dh?^yn3B7H|L(dkTCyuiqBqVXWLO}7=M6Mvn|U?CLpu{pE__uLJW<9
zQ=S%)2y*g=5mS^>IqG^?dZH-m5uG+ki5Q)IR*gNXkQsfDpJpW!QX$vP9^*dK+=`Zt
zn4pA4&i^3>sw9wQbtyHr^n5*Sk~cp8a@#~}kxD|$=cek%!76zC{)J|hDXNH&Y@KG0
zYss#W;r2aihaexbgC80a5^j_e@8crz4xEj@nc0rf=TlIsYSGq->-W?OsFCEyO2@5d
z9GoA`x>8g<_m4%fdtZG`e20mSbXGm`&2Dp2rg0VerRffS84*rZF;HhPE!77E9v|K;
zB&7X{{VKztzi*$ietIxa=0RLxN&`pBeHKR$MQ48LB#llRz$`z;!!PYrYq0jWmG}*g
zDAjMLbjW!Ojl`Wn&_5UBoZ9_lNT?atsJKRtMTv`h3wL+_m{nu3H9s!t`x-)rcBk5R
zoT2HlC`EC}sgm%@*&GffBgz=-mC>?dFE0#_Gxdfu;%vBC8cCLLSP)(??41(=;gsk*
z+qWL(HP0jtL`@W>gy=HUYdu{}K}vE1NW$lBBrSNvGhM?q@>MavO<&1?lW6SU^HV50
zUChiDyt8)jJ^@`;)y!7>B;Kut?mKrl3`-&ts>^yvdacaz;vXe^vA~U+>AlRzn>~X2
z6&rSwtGTuZ0g!@HMkpL>93z6i<m4hkZiOFe+kJg77kdCL2t`l}7SH_bEV$FK(p`_R
zCJ3+8K(qTvnQ+OO94jG--(+&j|K9(s<-(;u{iOf1wz?LM^UCH=#$r*iVyrV)<~X}u
z!{DzZs(WkJ(e2N59`N(-(9OVFFcJ$I%faX6xp<<QXjpLK3m*kS2Ex^OAyx}9vCO-`
zh_ZKEhh1#cYQXG#^Zci@^~>FS-nZ#E9Fb<K!?l(8sQgsTo^dp=sgs?oh1Y4#qGAVO
zLkHRtJ6(61$IxJNld<6iGd0V(9<~l;`OnK)Q#jb!$h;ycjZ>e;o3thEr)aB^4;n;y
zo4mo=%$w;RbyZVm)e*_CL(wevQ{RKs`FC=v!-o#H+Kwn$=K6x-RQkV`8??DNP1v*I
z(fz`fHcd5_ssYI_PySISSWzUR!_`Fl>rXBdr+DBkQc9I$rTxQNdPIC9C8e_3qSX6(
z@-tIg(&=%M=P3`=*7@LC#{SpT%-Ycd@6Z6_*&q#jHUBj=+K648*!#EeA9n%opV%We
zL;KqJSfsE*AhqOH+Ih^<GJ&0<HeyNMn9csT0~!h9LX8avabKg~mN49|L_V%gUaWtS
z<@b$3SJijxJ>c?l?pkjA(Woi)u7P1Tg9r1HRLHq2BVF%mKEl5Jem4PC;g1RiYPZ^V
z<nh2sQO`fot@O=C(o0o({RGz$Rt2`UjipO3@>q02u3<Q(`~D_B6?ViK)=%wVt9kPD
zA%V$ZKM%F5DMn=W>TRS(NNAXH4o@rlBY#bzSQ-xp$eNv{Rk0C0dtB<bqb#|k=v?u+
zKlE|U<N1Ze$+TCd!1f8J5F<1b8a6DZu;}0W{GF3JX)P!vc0nFC7)jVDTO$u)7Bx~*
z>r88n`8}M<E=4l`m)F)iQab19RLJzyQ<Dsad7V|7eKtI%T(}Db91yml)y$mqB@{;N
zj`6Yd$+A*XpaK|GN-gfpo+RH##0|^84u3CWh_CJG?fXF>(J0dB9?P{(8`6I);72CO
zm=S$<$6GcPX~UQpc}FkbBTe-<eGo@gaCtj=D?*5NJ5qr^0XN$1gVOb3{4KTBAvrMG
zqCn82DIG{pc#p2%h_=eqTw#J21lD!Ioq%!aj$DsT+^`o6OksYE9cWHkhU*C=5#H!A
ziKq!6<<t2!YJ4jvNAlhUwB&fA)(cKVP`dEYr**kdKkt|%=)K;8%iCnuL1*}I4NkWb
z;SXHsH(oW(gx&8%?$4rtlNSatwT7t@vgH{b^a?`?psB&_?e9n%@dsex8UkS0GePzD
z5@3akOn!$>{B==1V+y8{Z~KZZE?WTjnU;wRoW-+vbMdOraocT{+!SZ%+zka^CF&o4
zvkc0n!8jY<z3Op@i@LkpCH{ebAP3~|(u}UAH}+VVBQbW8)4;GfS02fd{imG9G`nt0
zOsBtC7480C<Yq&<Y<&=_g-C9CBmz60AHJ$5duWUd`L6)w3Bp2bRsu`(ISKwK9};mg
zy+a5$TK(}B^DW-j;SownV<C1C>j^(0_6oLAt?5uwV5UpczZ-4lNtv&UOvWnaJd+W8
zc`Sn-uID&n%;m<c5gOom=6`Af>SErlpvgZ2<%<)QtsK*u8X{=&BO^VEJtFN3nt1xn
zr53|uuPxbn5+?%5dTC$45VrU<2u2b<2uEAOvYZMwMGK2wf-R-!knUx?z1I8Y_LTsu
z*mhVkC5$W}UHmo8R)9y!dUQ#9izwyEoKsyD&NqDCS?g1I5#F2GnQ&xZQ}_w-f_t6o
zU5~T){@305x?5_e6y%h7Z(*n(`&@qzWcF}Wk8cS}1`Fek%4xj4(ON@lsQ9PMRn&@O
z|0i>o8*>AlIq*adR3=x8HY)$}poAhwc{yXi!YLeSA@%U*Ufh48$jPlaA98sgz>z<b
z3z&&YIIKR)SG!G_LTvpW3l-&UM{{U%IB4S|sGTn0Fs$ZAZ}0e$8LX5Wed@};^1QRY
z<g_RsBb5k#th!qwQ!8R@0fku=2>+DE`O{oQioZIf`pi6;MfFs0@06W=kl?evzPaQv
zA5FL%_7EeAWF*}k1C5)ylt8?cKdb!@yGoF|-ySjY<KUzo3I+^a01BC^jtMmAX`t)Y
zA2Jd!r-{MF5|YH5#rWY;sL`n#wYW~#Y0Re~g*|tAq3tzpwx=gdoa)y=4Asuz>Z&PK
zteBSn+L3q)R4LrkA|Q#Q8X<1|ufc4^k)(otSI=Or{q%f5*?F!;=}gp?eTt!hN6=z?
z<_Yp!ZvK)Q2i>ocwzgkB=2rMQMy8xLEdNb*bTtNu5YuJ@C%hjJtstIhUvC9_GP^!}
zD9}VGj~Ld!Q%0&U7OM9ALlAaPrSLb|=CUoHc5@mHp)zm%*u=~P-PO#)g34uVkQIOA
z?wa@Pm|Q8zVPb?(U)Es%*Kh1b`K7cmMuRQxwP&pw>SA|F-{A!OEC?1BMtpq5vsh^E
z>Vl<s_C_pj*q2zBk-UmE_k)9YE`8YcQVtPh?uit3j1ORVdGVAG>zLsbX}msd2qf1|
z^+K;pnnhQ%?JplG74b#^j5Rh;VtON3SHPe`U=c#KtS<Ysb5I|W1eyX}1GALFJLE=W
zyj@UOY9NX-s4C2;xrna8Vc(0-`-bV$->$|J{h#}2`_u1-RPJp2p1xl(HZy9gz4bT0
z78coc9gsFtsq1i6#9P54B-K!Plq8e*-3EXU(3fi$|Mc$#by+Jq-Y#^;;wP<7(_|cP
z8Lv5@wYl-7r$VDs1J_IED~a+MJ8DeMhv-_WaU2nw7b^_k`CZW4EJ6!o9X3T{=8U+`
z_vRcQWm0|V{H{4#u6e20*L&b4K3BjLZu0Dci>1lq6<Kc`QAwjojS)6K=KSRhz%B70
z)X)t%dlg7G2z{hWtpIH}bRry;=3jIm-*1~WTwHm$k!;eY)`3*0w%*g9_oy`Ne$#be
zB=Pys{)pUmBfy{)!sj1Rx^xk3Aa2VcScjIR$uHxGk`a-Z0M;Zk7PwcYhc}L{8T=+M
zBW7udF2B9YrX5QWu#T#(AoN;NOMfP0zS`)zmCwW2VYyIs`}OT=1#7IqY0@guR{yCh
z{K?N_?32MC-lLp+l1D28jiV}h`vU8v0+)pq1+3=)1Rn^R`8EfcNK?lP1_&7;j^b-Z
zCX_UXtwWzfMJ6~oG~OHx5jhBYr{Lw1R@?~#S+ByffKqKcH|*&*ykl%a$_w*8?QC%5
zzSFI#-FSU+oYm5-=-^8^|3>mrhw%`cx<NgOuZ=owN>yQan?BZA9Xmn>UwT`73_jRY
z2#f1NidsW6_AHS60yEJQ&R#ju<D5T(LhGe+>M8rJOJrD08_97o$dRdR!FN3N&yx4_
z)=}3{-A$Q?>m1O7<he28)juN>o2_Z>6#wpV6w4yo*UI*({PlY5_WO(Aux4%L)rqYD
zG_H&)7g6$Lq=X#|Of~Z1H6g0vb}8(YQ#l-J624dYGlH!}YOzVqa`LO6P5G3=^(5Nw
zr<;gvFB~ScTXUaw+?(26E=KxTHcS^M!{eyrbELhgv}9DqCS|O159VoS^2f@2_rDVh
zge~V({wgiKz?-WP5yqgG|Hh340fz+by1&_-mXoZ+`0k&m>~hTKRk~T<P1~P7^!4wJ
zW$5?I397u_Ui=*O#^i_(oSSuTxm1`-d_A1k{Qi}8Wih$sZcu(Q=Ik(<cxf^D@uvH1
zBtyx4_P@TlRV)AJ_=K<iHoK`N-*Bub81EFb|J%NrK#ztGnbj@`Y#P>{Q%EBu;41SS
zT?7)<aYH4g3r#iVik?nC<|+V&gv!cF`I@)=$u=!&SwoBG%uz8wSio!OEpVURPmz3y
z$d(N0)%7rob=UEM6|#a)2yovSLV%MatLH}kgwUHGi_i$GL=|GB+JV^kjNj80{cis~
zej&!{!@L3BUN?*eAJJ_~+URy1RR$A<;w$!hFL_kj$N<`CW-fOy90(5*5E7GpJYxvK
zq8G^I#r5*hv*{e^(>e7#C?88-W3Q8;pCKAwe%YtD=B!=y7`=Qv9B6+zth8BuIiwz6
zot}AXHF$owbjH(dJPw)qUYki^lLFO~)0Ga`q_8Hf`0cNm{PuUJ^Pr!S%l1q8noJ$C
z3e}st?OE)&T86vZ3!Wfx2A|Gj$&iXd5aguYd%{tdpavgC^jin|&?>b-fo4^~<?-}r
zk!D%#XLfgWheviK5km=fMUiB<d6RG0vF7F!=uik;0~xY;SWgrBMsLLPPn;KegV8%m
z(~^WRb~rYlEyBLnVb_D1qmjkKf&U&J%1+aNWH{K0!9sqAD6SetYO;wTq0>~LsJZc<
zOLxy?6f|CzAAgaLk?ehLG-pqd&Ba;Y)va`w5h3@`E%5GD2_%@NkWCzL;C{J9@V3#r
zj{9V1p|QE6MiE>3c4%QX(f-t2AHe)d{T64vH0^lhA0p%=^MZrt{&H5`>b@Ktno+o3
zQXI-2FW<tJiO(a6G7$hNsXf;hHT&8_6UY&+6uBu7%n?h4DK2LS5upvrk0OV7>nDSf
zZ~}!#a*#n7HM1i$hgOtU@}|t*!=7Y32qLsJrJoHnJz;r9MdxLHZ8gMDvFJ)Pu?vg@
z?7$|=Mcw9Lqsez8vUy@sno|){>FOf|NG$l&(ENcV!BxG*Lf0<MS!T_w-(0F+w`c6^
zYUH-UcznEAM_G1xFKlVsY47)*Y9t9tFb#AZ?HTm-P+j+d!P^c?_P@qWHhgp+S#p%`
zr?<GbOkDtV`Qd|GeRg&+w`80&<>YMC7zrpKc*T2SL4eFxCU0UkuuOl1k6=0l3dNEM
zjI!f@;`Q?p^5Mi~>i9GqGh{fee(ytu&MgdU_o9*WA=RBVAp;q-veNKI{@nfqBFzCs
z1VtoRi4s=G-OI;FF&jhL_s}J!m3}oCAz4!B^=)~W(pcdq_)kUV28PoaXCe1=Rng~-
z2zHZ(d)~r`{%5s_j5^k0et4YwYA9#T+^x1)1DkHz=cqT0r=Iem-wX*?6yv}+m$<>g
z3JL>**)!r0?j<m#n)F)D|0>0X*9ZNmipI1XQ>%<-X{_gk5}y4is`}q;&>uJt2MC@O
zx824j4LlRVP^>{Cqa2ouNMVqGZ6&r?6VbQ%DGV$tVQhHp1m%<@{E3oA)gNEP_Fp0u
z;n6+vFs(~Hdg5GjtjfnGL_`lzu&@F$*B<YARO(NUGo0mWA?HYGMbhgj5z*dOH>Vsr
zUiu!zrF5Hb+l?yYf<{4W77a2)(4=KWjM8(@($VX%%5i=6qxf3e@se-okE66(+2}J_
zQEibB(F1TRN;NaO=VeRfMyC9THXGYQ9>2Ab9=D-qNdm<9l!c?+zwIF?mN1)Kr!dI)
zGzl3agMdhf$jid}t3g*-NXfXL&Ef*J+<8OKq9*e?*LQ5~YHC%q`^blW)d%0aDeQ{T
zb}hlH1-0h7<M8sOS++*<%gS7=`r1A*aJ5jpwVmvKt8Qx!WSXi+A$`tJr%&e(FR9J~
z1svfwF@qM<*-fpRGGhrFLZ{crrkl}LVu57K`PYAcrS@M-!l{{U8cYxit$f*(s4&)!
zUMYJrbfh^a4E0;h)C*%B^oRP7Pr@=soxf?>AW1EzVMZw?=A*y!3TtP}P|d*wK=rO6
z^F{mD(QoCcx|%IZwe2T(O;QOlDf21ulpF~E{pUQ6P-!Ic4x2dp&K%<3;+dDG(DRi4
z^#bgymT6+4<AWdosAZpK%9<gKLfgS$qob+LOh~NileztkRtmY~4*V5CfT1O<?meHy
zAGaxg?U#P}=DG2rbbGB0WivdQa2+^d!%hgZ@2AsS*dVu5)g_XRowi@f2+x6*e`#dr
z>A*sU*rg<b$3B~#-$6<I$;Ut&gxuEk_vt_GcRoG~#={V8>s)LWG+JV*#-fQA$9Uj*
ztC^s@=o)WExJtAPNg+^{nPzYC;G(<UpKnIo&Q90R$J$U_YjR&+5hyqpVWyJ0eLk*N
z;3XDtG@7ZZ25&cedHKI7uK7ks9D;aw*GR`}-5U<jBu<W_W>?;v{bw$nVXG|=qW;7$
zO|L#38U$heN!8W=BN_*#!3?6Ao7|!NouY+WLA#>MIX9VncJOK1oLMe;VfFXXVb$Tc
zCK64NpYdqsvb=?=ZN+?ze!=#-V%MqD;O#{|KEFG~!`}Lav2v$IU2mzjasydrHp*08
z!KKxW+dF!28S1UzB?DboyzJM=bT3vO1>==RAAWGZ(J)%7YY;&pe$E(9z|=uKmnj$f
z+2e!1Muxwg<>KFtkptdDdE2E)&pXm8d;6QwlpfI-_L}jaDI0v7ByAZKgwRl19a#`s
zCm}-z;U_b{!LWcBbM63RBLbbbEg|u4P4B6kJ|1Tofo-M#713DISjbdV(a6f%zJZK%
z6)0P{KZ9U~Kh7tAVFTgngMR+i{v$pDoedsg4^4tMAK|lj-rk}w?jwZypMJp`PjXkN
zDOzPdiPn9M&a?VKQL7yfZzT)MkgR2e9zXz48nKM;1VmCQ65lIz7F#Akx_&ib{a3)&
zp$LQ8n=Fs4y0w#daY;2o7;T9(dSbZ*%N>BuobE?JUvJnc4ykBEf6+^t5AdjBHd<Va
zw12^_rXp7lOk0_61pE7-6tb^p)}pT6Yiv99D#Kme%ap7{ni3WuKAuN&bq~VYiFn*D
z6WZ8`$&$7?!jJL8Ls|B((ZlN3I;+bAAJ1D23=g^G3c82Vxa|?l$Ho0o_7ROcG?#BO
zJoX9LXGFlGC!y}R*T!$PFwrmYwrRq|b`PKFNF}WKxPK7Y|M(P`V*EjxU_&{FywT&R
zHDLO`NU;F+I8n8g?H$s$x|8=PMpo#;ob01KZ?S9WA#|ulEQB{(<_vE=^}eq$?7sT1
zot0ST1}-=42;K1kJf6FQGbT}mp|fy7II`m}I=Rc1CHlqWlcFK~9ccN?<Z)Y~<6-Qc
zZ^4~GwfVeo;c9+oUOf!+L>FK(W4%q!#>kE!d~ri`RbJ?9j9{ueNfO3SQ6yataycwP
z(!S{aexWD_d}UD_c$5|m_zy-k{Uf>%I@}s@@sv>TY)*2-GNR{JihF(s=IXt=NF?*<
zcn<>ce;4`ZWz0B83{LkSsX5bF%gNO@sP!r?C&g9zB&_LS!{TCHmjlMb)$T>XKn*SY
zw--jk*R+ZJ`_?ye&KyndY&#jT9HE_iTQ<xp(DnyC3}!VgJ6ZYv%IMB#=y%~?@~{=P
zQ8DFBJ{sH?^JE3=(2SyiPmf2bHpdG4xf-++Ew8?&b9Kj1tk)-uko*~?UE(aFXjANP
ziK}DqT>VWYp-NB{&&ba<{m0nETJ%>;X$^w-bW*tNSkhwR2$nPZ_SKRk?-4Xu-CF99
z8n_HT-XKXpATt7utAFX98bVq7z12s;S4fxa_FbIYQX;6%Y{+Kyld;D1?*)@_?fImU
zhPqECIIj*9m+v$*2pXP`t@3DdvC(U@#&wzL_qjfbSZ|Cx63DA@#QV+7D0{@hz?NDR
zvXV?WQpB2baL>w5_eezakEP-iM6$BP5`{|c>HCC%juj^=O5ip0tsZ@}RbC>KC!tWo
zlHXy;$N>;Cfz<Z(AK1D{&O!LRG_!=1nn<`}+RVnKdcWmHDxeE$gIO6#3)?R>kf)xp
zIRk?$CLv}M7Ppm&mPU1xt%ErqH-`&EBnFaNK~I0~CS(Tk^{XWjg9(VT3@r*2c|@Nb
zgbyRm&?(x*oO5p&i=FS~MUu=unO+U2t%KDM|5e*%bIp%R+dxRsuY<98;}lW5={>l`
z4u4XZXuaXvMoHe6ts;|O5PTUWi3$IMg{#Cg*5)kEZ-jjojQTP?u9e}M66YCjug{j;
zhSFIPDt4<!GNFNj@4)azad*s-$T!ai{#fNL2X1#t6ATS7DB%6EHU5da5m|L#^c;t!
zrqmVU(0fn^tJmwZ%bTpfBVRMEA$Z^awf6e4RDDZxR_5eezRhY$v0Jm(0Lk{f!ol^}
z(ra*>Q#B7#2j|q8jaB1=!qASD5B|t89UCDFo#?^IQuUHsw?zPfA6w<+=*X^leW@5i
z@a#P=z-MO9Ev$yez>~FFJw=heic_l$WryVq*x1^=ZhUp2DAW`~iCJ<T?|xx=?JX*v
zx`>;uS_+l;9w%w$G4g%*d8_YTRUIVAsWI)r`)c&$V1~$v&rJ*6Rg%5r@pn`6_{k}j
zl7C%k_Kn3GGHqI!KGNiK_Y%!sL!-@EM!H<7*4gRl1Q=^XcYj~3l1gzhr?Oyv%$ryR
zSJ}n)Y3Z@(>wO~c&wgTRN#v4jd?0h%56FX0WYlW{?80un1f>sZF)&lrn;*@483e#6
zSlvX)?<p!;20z1)C23isBRGnnDE+k(v6hz$$il`zF#Z~v#qQHtz=RDih5?gRB-{{A
zkfNl;2R?4K8fw;sXJagFP)#!8Gu79G>%_`Bk6CqZ+%v&=x(%fv#o5n7!Kc%%hOGz`
zY?(%8HU!}sVr$?H)NSOZfk3d#6D@@eLG_do?z7nJ>gPsFUL7DgVWTJLraKt4z+LAG
zrZR+im%;^hTA%a5<nsdc75j_Wk54o>(N4_1+dhBug{`f<A9`Qm`U#Z!<QWn)KZRx*
zh(rcLUHQDfije8SfDCbJwLGAS<6j8dP8Ut-y)6Z0LDDoB?O-(%<D@}wEY|;BY9fx0
z$|Sv7+J0m<^f7-FRq`JwCV|IqlfG%V{M2>bdr0jii^iuZVE+3~KvnB(#Y{sCUCA&$
zxr*iEQ5Z6E3BPD~C&N4{IRkA46rEgLDNqfCoPn-F1}=f-FgK#*Ak)_pJ$L;8gCEL`
zOV5yqIWq%7p(wfRewU?B0Mqq>9qqee=g^jC#D^M-VL0_|Tr)yl%8FY7^S`j<AtO2L
z+eqCoR3t_3`I06G%LtJ^qCTF)tWlY3yl<k&3Lw5*(zUcj5g7%^(iUfnfXe6!c_q}?
z@gv~Ge<_qAd1d(okI5iu8Q@$}8m?kyn9cSwAjCmwd!y>^x612MoqTM4)doIqX|;5N
zbv1B|&A%dUHGI$bXe6}$o#x%DZ59ZP?8EhUSWdhZWmv(8K%mM-P5e+_-=vCUM3}c#
zg-wBrtg3zwkMg=xc4XDRPQxm%MTDv*AyA|g7xj+3h&~t;XHIUa7k{_!IAH%BhBHZT
z6l!SR2~r|wtQS8enW|*5`uukyNoe|v?ZuDTdZ#NQvm_i$mK%b-kwOT{(H-8aK0K5u
z&Ju>o2!?wNr01ia;oufxYqEwIqE*bUu22B~!V325r!2wju}}sSME<GhwI9)nobiiS
z%WRw-!bm_@Bowzz`aiaF5z~6!qK!yqV#hemNMv%~sfytcuaS}g6%<w$n+iI(5fs{9
z#9~K^UHy*sTO5$Nn{plRf6G~v@eD&~Lf3bjnPR}ssg=O}%x?Y9i<|h7lJJ+9!yypF
z)Lg!)t*L8^*;ba5Llw+#`TFJkNem=28Bu>qlq`Pijyl9jIAP~821ZUnmMF#D+aE<N
z;dfv`y8h`gZl2PiQS(Wf<q?ey4P4zP)b<3fL2bSzcM;*F-dpzAQTwHXlT_Z{(zb+G
zJM9-->@_bBj$8*w{uK@l;&gL(c+k#d-MZ=}`nq$xHG>5kKkMok%-nzl8^(6C{tirX
z;h}{(EFE_k2DHIshR{)$opkC977CnTpf;DuujjDNKo%=9rK=ZIx*v?`;sgZ@*jmAA
zCLwg3&UVp~#g3M*(8wXvb}}?V+A<}Ka*zU*!}BivJfz9!f09S{{5ZZz9o_-3fkx7o
zeJLQp{6oOM7Y6rTcy!?elEnR<JMkKA0CW2Py33f*+cW9geLtWCto6T}KDY)zs(-8?
zDG;g;uz;I@8Gsg$3~;HFfaC(|02~$|bN*$`q%U&8egRM=zJ2@ldXVvY07QpJ<KBz|
zoD28uya_nY?G<bY*cX5gx&kD(v&k6#+$0kP`SN^>O#m;cz4Huy=t626fi2EaxztD9
zW+fUy`+7Yro^F?$DT1M>jfKHxEzltm?-S(VzY+oSMe!ETQ_S&JG#q*c2MLz{`uRU{
z59#Q#^SexwE5Ur~UJxorX!+%CRIlPtAD!Pur)i=RB}&VtM{#_)VS?@ci;S6dLSns-
zj&p+n0aHr=9tr8G4W;n@LATU+uTJ!TLQz9Bq34Hx?xTfZepTtk<25|%%1Gh$j)dtN
z?u5@6Pkf@52$I3lmrJQ36gRZ`+1f;ah!uSF#i~XU#P;#DRgy#cm#~s65^zq0SB}VQ
zs@f4i1BKA#f@T-DJSDyasulb&<d%ps)P%8qqs~Nxv-a+Mzc3#5bd#1h#vJm+T;6z-
zWu>w5`zaWQf1s5bL-dH^^9V#ca1}nV7ydOR`AefQoR03et^ZrTbLP1y(r{erM}dkb
zO{1JoPXzL4Qt@m7=^nRS(Y7Ajy=f*#L2vhA4>(|*LUQzB!6;VD7s2BU*kM>fXKfRk
zc_%NCC~tcjP%d!cOx`MYfVhHkS?U+`Wvr)!gC|O*s+z|49XcGGv~`wMsQKZF?x;~`
zkQUKuw<mT0a!!(N5QN`jpiDPLqzhG)krWrR1_qTXPh^yXt$8F|rdn`YUas{$%rGv6
zx{o{obmMgANz9ByGyAd_wzuPoe=67k8=@)NagJ<-cTdX1{{^cXgyDS^UlP{A{61Kq
z37bOwIulDUgA?j+6NS68*ZG6Wsg`7H{Q^WTW206$KBg!d;dF%oOS4Q%QpIvij;`zo
zFNU@#4sseu={|wv5-0rpZi5^!{jfN$x9BosCMm&P>8NryJ$@u(of(shN%&vMo^Bh~
zmL3BwQx+&Dn)))DUoSnb!o}^sE+#o-W;t+m3n8AnWf+*S&P6%6oHF7!Vge&P47N9V
za0_CW;1^LE#|D84epw5iYyDHwsI7la27u%Oj@b-X+r`hBVvT=_4nXpV;|l;J+y9G_
z*5$37$#Q^))b0b2oj}w-m-TE(>z$-H5ZvDm&`1Ar{9%(gtP|GG?)<*l*|n_)p}wA;
zz1nSbea`?W;(w3|k$npbJ{sUMu*Ay{1GaIep~D$*QyFSzety3AzTHyY3E%^O&Zje8
zT}}~`5N4X|5FNftv4s0}u-^C8!@8F`w-c^3(#K;^e(50kjEkcw7r^M|UI@YHjY&5T
zj9`JT*7%T91=QT48K^#-iy=`=d4*;TF&lcAtHGS3S&eRzqfjnL!zJ|NRO}76y8p;B
z3{ln=OiCa$KM^6(3`TJ4QT(WrPa=5_enS$nT7EdQTnT1NAk#+f?@Typ*s(L@f-DT1
z9Dsz=TX{g(s)GCu#i5|hDQ_8J8uY2BLS#fV@{rr#gPyqr9*!~|{@C3R>1xPCuU%W>
z78JGpeZ+r=xK;?^C=86AhDwEdImk`t9oX<w|E>R&_^4wOuc!=%E`@^xrT_b`5-RZl
zg0OU<+D3&%D2A39r>1luN-A1;t4!>TA{FWjH|{m@4waTH7X)WTGAJCXSxx)?v>}W6
z79l4<RymgXl9vJs(swVe6!clpijB(9=d<z=e{a#`PU8R?FNjNGXS}Q@O*3ASO#brX
z9h5ku7NkY1CR5;IiK7W4isU4n^ie4E%rj%eP9)9EB{PoJMOxFetRM+@r!1N#ybvAj
zizpH!CO9|rPgWTE8XFp!BIkIO`+OW1537eESNI(QQ#T=dH-4o8sywEY^wNXiI<hsv
z&}YprF~c?<o@g?S3ZUb5Z%?FfC}`a?kTr(XF0ECLMiLAXt-w|!lwrB(^z22gP?0{C
zjml15fQWv{3i%WkojIG444>-4JJ~Y*g^Dl3gX(4M9Y)1FPiq=DQNIu<J;tiL&WK!0
zSqv1C@>lq^v+AP<F(Lv3pCMAi2yZM#La!j{JJi6LwY9a?RYB+7k-H`%XXh(G1TkH@
z4acDeV4>CJWk7^@y9d}*-lLN0G5#yL(wndTk1C*lRI2A*T7IqI>-`o$VK52auLlDU
z8?MnTi$x$7yY<ZXj0*r1|9uQIHkSaBO&k7?7J$nDW!(2rD+s+@1I)DmU}FEXsQ?7r
z9Z*OlDSrL4y1OwoNzk&_Ve75u%5>~oZvd0A5wNZPk!Qc<&;pdhLv5Bw+}_L3me!8%
z(Hrb@N5wgvRfA%NcWjncne=jN-9Au5kdD1;l_HHvEibo@QN(S?3FNkutG(%Pa#0WT
z;}MXN5Rf}S)=*Py6SP5-#QE%1<nKmoo{s(^?P|YS%~%B<bo;6#eP&z0ew+)%%W@m?
zTf5})3Hp8AqDI>er3^Ti!=80y-KRw0Y383nZF9`TWQc@?os>rA5lpJKNYX~0`dCvA
zk&1|JT*P5;vu#G}8{QY^t-*NO!k31}x?-HQveP$~6jS<i30*{TSVU;;w!6ge<RLK^
zZL^BK9*&l(H)y6)0t$kRAq?K1@g$c?Iw?EID$FreDFskb7+|8;19wioCuhY&_a<^s
zNj`+PJkq21iG-8ElaV!a;^m~$e}{;xE!Zrw-Y)$@p`3=rvp`ZK0||>lfmj?tD07aH
zY=$8-jwn*A)u&P%EH*R=Mi#`Eys@Xpv~YyUN}-kD>5*TD0W*+FR-TNI=;aptv3@=0
zRlIODIP_s&?s3>aOl)ci#%w{tV^`4`1dshs+^I<SrPF&{n9dRt3CazKj;HsA-ON3X
zD%MeHx+m4>8pP5(@S2tJyWz;-*a2BbvUePTW;&AGrc^4*_m_y1wc%Cf^A-=jq)kw7
zd}cPRYN}P8z}(LN{vKX*R7?zW7PRh04ZH1phj?+#>RkGHiYY}V6CUiwV`)mAUOAh7
z#ICY|&*sYR50TRDKOCHuHqs`w+b}X>jLafNM+L>oo_iju1i;fWnc^<;pp9M#9`b+?
zQvX|3jGVn?l_sa7_YpwmOG^|~X=j>i4fIvkpxci@Li{`@&*rSw6ZQ0*Y^g`KFy8m;
zn(TfgeNX>qxqdmXmn#)DkYDv-f?*m1rZeYxqv3#5gZOVF13Uo@0p|O(Ff()M?voSS
z*AsyzBb%+T2r)ME574D)peYe>ZT?v+89dH;9!zk137>WT`JPL5&9;F1mjGNq;X3;*
z%Lr6u-m_(T%`OMZ<ULql`741HrOH=<+M*(q4gh%Rk0b!((R5Q*f(Z2A)AYFj4gG!X
zd6$c8*`XV--(JX%zY1m`u2-r88TiR^yH^kS8Gerd;&R_j`l_g-(|qPc2%TJ6dHFY1
z%bHJ{?l&Xa0TeKf^GmKWP+u+=sTJ$arSW@S0)7yiQ5PIYKtO<Az2yAq1qTPGy!BR_
z`@`#0V%zm!m9MU@uAlXquK&UFR#q(XSHP$As!YFxg)9P5@qn1rY}5?}Ldp-O^O+3V
zd^G)v+S}iD02dH&t{$JB`hODD16+|r1g_R>L2L)jDw_>#!UnoZ2g-R5>v5xsH(@L*
zJg$gGwLTRR&8I0kZV9}g0$~#WUPT$MnIBxo;cQ|k)%ngGRy*%!Bl*4X<mlop2hE;B
zfZmOv?(=FML;P|M=!~J~*vUvHMu>oDCOIL>of1`Itf+YLG^)^I@_yA1-W8Y;3W)Pz
zUmnUBQ%Nv%4HKv53lz9<u{Y}N@^#dv@aIPh#TSx}UY1c2oY5FnlRkU2`^)rzD80DT
zSP*-q?NBAlTb;m{X^5!y5~m=fm`9zkJNEUgn%VI1Y$Z;i76+Mu@OhKLp{PWJ(fie&
z3{-+jqN(^1ZCoO-gQ668oq_}rpCHul>+Euq@kVj(#-`SY_OEm;Hb_dyBS`a|lw0MH
z+KtBlZ7E9enJheRX}t>jjonDLAt0s;5;L;>3Bk-bdXVDsSuU4KLLBQ*tTjH~-iA9*
z>9k1LiIjs3ea*zH?{{N-b-wz_ljb*f4Heef$_ZgK#$uz8;%qi^`6zWMsj1giE=r!)
zfG2C0hrEXVjQaa|3;iTbYnuY*rl<nx>=*-3kk)}a2AY?LkxUY1$&atPjRugO^k-G#
zBxYM<ap5b&_a$QN_{Bfr&E*T1Ynw;|ODf*U1khDm7YJIfZfpA946xCVQZUb(If0cj
zi)99y;md`PSZD&_SQ-tPN)+F(#v22CK~<;#RyQmYF^)>)IZ~-yBZGC#o|hrFx#&(#
zMeBX30R4E1ZX^jIS|Z`<Hv?WlUE(L^ac8!9awh@=RsHIN-f_$Zh%_pAb_8=o(UZ<I
z*w2tAdH+4%j)ZWTwBg|UwBhlr5VJvv>TmvUdjple8-;4VPV>*3XBY&wCErSsLtvaO
zR9)+DXmat!OJ1G1Y8AAnY95U|V_ftRQF(V7UL5gZ>Ox*8o}SnWPrpQHoBY)HY<sK#
zR$C37a8}ti7D{b3OmA_Ehe%VVdB6Sjo}C&p$qRa~67mL&^izQ3q%UhQ9!upp^DDl+
zbprwJrzDGw$50$C2pA1|bLa@cfL^!z$G&Z0ago>W*#qQ#du#;+T$h(KX$W~r=THB8
z;(~Pj9(Mi_@9ysIDk{?eOhxev2{g`m1h{<w#|d2e_4ftTuiNYEWq>66w_;1SNN==1
z?5`?`ii!dZvYmf}9smUaa`COPtZeLwHgWjnA9eT-O&|TEp`-JUjt9@z*)DAl#x5-_
z0Sm-8-<!jk)fB)cUd<Q0DSSF>e*?6<(t6X=<wl_Xy^yK(d2pI$2gI#s09-rzHw^)N
zmHY90rQgdVlix+e+pYZ@E(*Hx)2G<Me-C@EH6Ler`B4BywQt<P1+~B4l>fus+Mno!
z!j(J*tQ$}&L{e&JF>_j<^?Q5#)32Jij3@Q^a*#OL&`lAp(cF->!)j{O^=Fwi4lZUo
zzqooySxm)r4fZd&3yrs&fx{qOF;m7+2#mtJwu;1?(U0QTMg?c2pn&j7y_^X1fgL`0
zO+y<>_UbVYYYjx$x@am=to|Ad{bYuQ{U~}kt%@#l6J?ig6tv>LCkZUBN6|TV>lh$}
z>RuaGSm=Z{G9!E_Qt3jC!$^a)ls{vvdoHtb7;)I^=#MCHTo>k);jJjN!G^i_lQSF@
zRcQ##Ur*+^+mcz29Q^jKw$S)lv@GQGHw+AG<G`R>WDs_@A#)Bn?qBxP{gZFwRPyBX
z)s31|_3c=A4C_kPqC-ZJa`<R&F*wp}A)V?FxZiTxAb4*JVcK2>_)KZ5sd${u=+Eg6
zU-_?&)Q?;m3)Lw{TPYZIlWYroeo^VD<oRE3Bj0cYiZ7mgG3E6xa&F`-t%jArPO$A4
z%_^iGO?O&EMn{yL-yB+o4h)Z_cWBlhv|#m}9Vgu+j<OFf;<G%-JN8UVNa0;#@4NKZ
z91;vGq&pqQ4f#-MQB+Wpc7^zL26*0n+0x-tw$>gf8VL^w@-eVM;iI}i%$xbE@1U(R
z;m=M}p%^27&?->DibAOE5j(g8!vHN7MzEUQnSLH2lW2;WfShjz-5selex)H(GR=dg
zWv?4kgo#N7liY+#_$H?D(O&Fx_Hen46wl2Bf`2lI+?tu#yZh#_d9QlL7QdT1?lJ<i
z9euF=vGHT55ebBL;&o<a-V2Hiia~=8S)ur4PKU!$Mv!RsIXq*-@^rw9Pja|mlfYFN
zs*mO+ghkt8I&Wo#RhoA2J7E6Mx;B3~bJ5_m=zG3_S0O4&)myqM`|lu@-bjLUBz@&9
zn)BMGI_+5BlS%TTjXYDXIp26>tPnLZ)pI$_xrhWVhXNn7NAg!ajN;*N-iJ9bD*5v-
z#XuO$A7QH;^3}PyJHT8}&2Y5RkLg8M6czQZv^GEa(k8!Zq2#DZd8piHhaOMQW%8tc
zZFO!gad_wTO6U!%1JFrtqgrGsF#+v61<*y+H1ujcugvW?{*AVOq;_4M8(_Ttvnl7=
zZaF{}8NRoG9#>(v(v+X>by&A#NA%zJA`!q+{zHTR4a#xfQ)i%R2&S&MJO}7XUBEdt
zcv_r$eJp?D#S8=7S|Fz2<q*(b-Ii?YztFY{_yfQ!Kd>SIwu$7XulXCxS}uo}{2zul
zfSVxH87-$xN2`D-$~MOR5gP^Q2_*)8v9q&=tthkM1i%peXTvrF#9bm#3;-_Q<)zI^
z)6Ku>4seOvpLdu5HrxoHGc#VV^M$UG$AO3u9+FWmpy%0}-Qq$u$bzs&u(En);T?#O
zdiL7lN_}kKB{lNT+LkG5*o^Mt!DiCRpoU)Aprmo5P5zPjpmgP_<F6B~G-pGzyzfoU
ztzDRnsRQQCNqwoj{@OQVBlOmD2sS#Q?zs)*yOG7Ohu5*k%i2SfagefworW!^sKfCn
zVM7)Tt(?m!g_}zJJOb>Igz|3(BZWinTI-BVChEfOKEFt=s>Swm<@Iy%zdG0r)eJ?H
zabKnPXH=kDNnbh0<9yhd5D)L^R}k88E`P~!u}b7SNKGEd%5QTzBtW$gdfLr%p|X|k
z#_v46HVh1{(l&7GqirzJF;XfQX$^Eq$&5iOtW({bd36hQgD6DyQaj!9{I3^)ty5ZX
zX@|R3%juDYUxOk5=_tTy#Rj2kWH@d@uPjz}ufU)Zgyq~r7C${7FC`t+A%gMe$bj=L
zM5xq1ySaIeRo>72kt^5r?^y;n;M6{3k%t_G#X|o~FMV-`otpB~nJZ|?-9pCsX%){|
zQ@lK+`!0$#l7DY@?`m3)x;#Isyy4qJ<etI%wMI7|-gtstxZFtbAXM|80`3PPEF54E
z^=NVZPoD8M{Z==rue?GN?d<3anktUFK~NRksN$%{+tr!Cz@)7O&i=4%RY7HPCIR=$
zE0#4>`^b}{FOyG1s?nRRfks-p+m32@Og6f#_K_j61AJx0mvLWkv3N9}i^NPGAd!gQ
zOkThG*l2GZW#P@b)zzEdJaNOp)*7u@_0Oa=Rb&;CJ3&xA@v~|clYDY{^g<1qCI%HJ
z>%t!|!W$=b1lhcNls*ka9o1ZC^|q6Q5Ok*7c<35w4EZu%a9*0(Vj86iG%l71b#$@p
z>DI%ZS3x9ow2xc)dE<`yz#07&t&iU<H;@rMO5T=|czA9bQS%hMTTdPk>uUB?4-il?
z`z+GuvD*wE;=;BBmtE6Cw+8RiUw4~^F3PWK7|wff_{ML`)3J1?el4#2)^hgQ*{s`s
zN4_Gt7MJL?W|LXy>8?p%BnBh@_npVh(rN_@tLtLSZA|E6L^4Hb8RO)Pgp|YI2Sl``
z-+LK?m2GtAn>v$H_5EuZcO-}}uL~+gLaw(BTX`Dwr;Eulxk*|m49OY>80_izMBZ*D
zKmIu1w&w%)3GnkKnHg^<WB({{m#xD64p^uwouK%n@)wJ-s(ECb$R%jHIADy?N;{2c
zP3CL5uuLHN<Ktrp1eit|b?pF>lF#ipwAc7Y+~Pa{69YwoZ8vkxEU<<450n0rlCnAq
z1?DRN^PCJp#sXyX+`hg(aSBIZ1>m>GxArhw-{uMcrV<ptTzGwf@&b^<Z*t}Txz_ql
zgP*+54u5j3YBo4z-Ms)Ui<b{R0GIxDxgGm;)&K1USV-^$*l;e*&E?m=LkK1&CI;Zn
zCSVV)to3%$7rVT*)fXs0fK?DlG;9M<@c3V6lg4jrfX2ADB0uZY0rOkeblS;k>l0uB
z`yN;JYin!s^6?2hEsO&;7~g3__g0ISx3_)61{5fmtVdHgRv(XMOMy4G$^iQMk6lw?
zQ`6$!m7en`2T;yb=QDY4e8*GZJE;NQ`nQAbzZpgi=*WJ+o{64gFAlv%nJxgS?g5%E
z@CCHE5j#$e|Fq)P)|W0+rjlFbis((Uz|cCIg{g^&iu%p7?sw*=`}-3^@spBV<+j~Q
z3h%3Ll{>T(Y>A+>*wwZUt8~zJznFE6K3#blB?)#{<e1lyOACYD=sH0R(~7By&>99S
zLdI&dg+NuRx8gN{^9kAV*^ppp+!KB)+q~N1<aO>a6Yp7#is(a5Hw9B?msuVAGoxDQ
z1`-35*=)66t9>l0!e4zVh$%+HrGMp4@P4znb|c0LO$UEgy!GJN>dM^rA(oWOOieI@
zOTMV=w*7Her+)RN*_0Qn;^YpS(f4m~vUAzd-P7v^mIi}9bAEk$v5V15TL5hhInp^%
z-pk|C(c6pv>FD+)co-9rFRoG%-(giP&+KJIVp!1WkTz?q#vOw2LFe{QR>?VhWR9}c
z)Prammv5fYeU7XRUB(H*ajWcZA%WbF95DwDvK$vU?8>i>($#BM8f>}NRYao=G~dXo
z{K$Q8`w|k)mLG;qd5LR2)86j9?~YUz6sl`FBz=7xDrB?hP0ONc{ayRn8GHY3^AN7v
z<+H!!{Z(A8oIyZHnC{ql<N#cV^wWZqd^<rD0w|}NE_v+8Nd6a`+;jobg2)?4qBsnT
zmO9ZL5w$MFj9?Na!<KktT`)0?u^#Ik3{qd08ptV1f$~bG6?N}P=>tDAS?>9Qc}|xF
zVNhsX@uh-(G7ojkt?upj1j*F`w7;^B(gXV~*;|NPvqX6%NDo1Q*%q_pX|i{{-bv)&
zA<B;@!rb_EpNTq7%VtFxq9A!zYH)Ngub==^s&wg3tqDEspQkH|{!;^0J-GP)m(gM>
zX9z|jm+xjrtIn(11x3uQWt^Vf>HnHy4vAAkNB>w#mc!0KuWsNEr^Vo35Hr?L=;9|$
z0km`kW-?(uP9R#+DGOXM=GM@DmfErHHm@kwTdLu(qNw_uQTzcZY0AlEH7;-bT20MQ
zIGcQjyS3RJl@|DUIYyG-tVdYIOt}(|?CtLk>*_^m)DMUj+#cS@J|zAw^jTFqa}FP3
zHl?xr5u5-S*t<%WLlEZvP{$Av=+QN}o%+6Z@Gk~Bxy^8>`aOdR%)XT!V`H97J5wgQ
zf#a=;?8g0wZXIa?Jni?b3aiJDzl6DcxYs7~tlZq(UY~(c_HU2Ej;0!5t(SpKmh|72
zfE^Az=rUS&vJ?lVHCF)V_8%<_Y+m>S60vFZf)sErHYLmC3wYRo)E-Hm-CtJ52olV&
zA5RxZzdmSQdpV8$D)<0wa-_K}S^))3pm4`kECA6CCo;7I-<kMX#|?n(|AE%jsSaHL
z2rl5b*<*xC5Z#~1pk=&t>;E$6FQ3G`ab)n;gqP3&c(uE`<`hjpN%Zj-65HO@13(E=
z#ts5vFFXvGB3v?CpSRb^!%rV%T$yCMMDnG@X-5VGoXOpfxwmaC^xfUufbZWHny=1j
zid-C@&k9!)bEdU;pVA>n<dmjHU&sP%x(dkGPYZ&Bw<5=M_q<$pFSr;rk{cO?T>FYg
z99(xdUGd=HG2lfKD#V0E2infOsSk4Mi+I<BqG<n&i3SKudk96p|5{R3CRm%{HO^zt
z!OrgD?%rHxBQD<a`;+YJMYO-q&5XuZft$a8&}=c9^64{H)-NntIB$?gg$BY;n!Zvn
zei6_3^o#Ucn0oQsR=Izp&FgZk#kgzA=zrYeO_C^1jSLf}+T0fEnS*#B;bNtCtO!O0
z-^x&g;nz7-S>}6{(9f^!N%U)N27k4%eYQRM&%<cS9R;+$Wt$q$FCfUNeQUzmN@^Vl
z<PRxCWcb|wP5!Sc+%e{DHzg}maUi?9dv{dsh7%tojK^$B)c34)P&kd%#^+J<U)}76
z16uN@9Tb#qM@lMIWA)2xvRG0e4nLYe7%2cHv)v#NvBL~L50Q`J=~u=s^DBGUj9TpG
z)~f%(!<JAgJq>NGwyn^ppV0;~I9D_opOnj6^0#^^0%(hD<&P<dH)i#d*kiPBqMC98
z?i5nlS^i(E?Iu~zq(sM_{e?GJjpQJj;5hO!9nI3XLTbs?|1Ki8xN~I+RcXQepvP(G
z(Bu2lVzJz~WQV!;lKHa-e|bUsm2UM(e{6$ltIN~Oh_{jQe;4C8nU%Npo7dmYH)|$B
zq98o^6LhYv?;=KxU<0DsIPWjwqkF`53g^5mRS7|stX#K?4C`>tCO?$(E)~5!JP8ML
zb@N>H^?A$5?R!2ZgMX$f6i-<7`(}QlaZ)(7NBeN3jP|8WHHG5ik7SW7^(XWTmoWU?
zAfg*~Te|q6n^7ycj$xI99Z!zmW&erXqxp_hBPyW(A7Nh}Pi4D)zfF;Z%9JTnGDS&3
z=EzivWS*ylgpipGDKbSgkuhXWGNnjJk|{GqGKExzl<-@-_nh~9|NHrz&pGe$+Iv6y
ze(w9a*0t8Uu7~EwE2$(B5gi6bYSrV%n^c=84r*rqfP#IHHG1vmf|!udZ-@)i9x@hw
z^M{%0zRb;;;?d?HHZ34!w<PQ%tIJT0!b9CtkyH=y^EY`4>>-o{_jR9ADD2x@m5-^c
ztbFq1$>LP|&!r{xV&1Go;seefJ^F%jDeylC;;>V%bAO2y&Mgl3avH~lojZ3L(&gdA
z{|sj~r`o>VtjvCUv)jp&C&9racJZpkxU%ny%s9c{?O1MPWc1A@m2tK7<g9WRgJvEV
z6`{_})XIvRpl4u!k1V+;$PoGd{rf8>;sOHGg%uwLfqTH#2L7!1NTP_xiNC<p*xOQ$
zGS`Hq%`$QGNe8;S|M^@h`a1Sg1`qxtIv(|5OG7)8{kpSNuS4?mmW)1!vSbk~^$n6!
zwShWK1**HtnyR^|(yxzt=$Q8^Fq_gJ&vFgw;7D)!w)tZ9>{@usV7Dw;8SfsS;-#va
z&$nex@yN<8x7@#TIqOx948@f2(Nw?j&qoV-CO-t<XYAJh@cf(TFVAICu`qh&U9GoA
zYz#p?o46P~w!N+^-G!`N@e{m`4@jQ@XUTBo((~Q?1gdS@&IsL_n3y2M-MqPzWz}X$
zPF`MKSXdZ?5^8Hl_6Ii}UrCYk)A!hYOpD5@_D+_dkB?8R!p16;8%BD1*XfRQz%w6!
zCUDhwbp3w6<*~Y*@9rKJK6tQmXlN}v+1$9)x^b}FY5dbCJj)ydvpi+Y+}(d%YL{bX
zWqlKvyl$$fsE7u6b$yG-OW{|qlGcIBTCO9b%k6QLG!sqE*AvR@oUYad?8Ej#L2Xj*
zc=o~t;x4E`$@}o(LnuuYX8js$-1y#9C)M%n$|1Igx#QV_+K!7|zO!dJ%eJ@BcJeRZ
z-<e0ByS?YplY^<3{1Z8bPwjqD&%H8}s9@i<m%z!|KGQ^~9X3hR&g@kaG%(lwiKn_M
zPwPgv<v*lQg8DQtFn|QG$7d-rzIB3xgk)@NY^wdiX0yVE8?r(B?So{$&pbOKDLLpY
zDS6Jx$tfV9w%(Fub(fNo((c{69|UdqczPbbr)zE=_-FO|`R<3MJAw!uU0pwdHh%^k
zvVUt>?#Rgf`PKDZ@$vD`pFem0X6fg*mezP?WnYaEQ1#}Y_Dw<^ifAKaV^&U1PDaM^
z%M0d8^gO)0nSXyk;Q9IaofYcYn|+v({7Rv_lG2rW%WIL5)>^`%qV^ZnZMAF~`ztPf
zJVqzyu`)OC?c2AvZ{HHpJ2bqDw0q=DEi4p%Sg24DmvNkhB%DTS_tft_t)uVU!o~yd
zs@t;1yAc$nzP|jmbzef&@mwl3Tk(O|DAs#w)Of*>q=S`1r!~us=SMDotaTm`PdexL
z**jIi=f~&S;ul03V6Rq^VPWsSRPPW^wI7R%CcwWqrxmz`*g2f?{;RQ&Oky~W0!ef3
z-YppU2|2jR|JRSmNNU2jm6gCBE&E8gL`A<2L0Jcx00MP4;kH7Myw(M6!NS~JDSVP}
z@w1)u^rLuAYJ9vKC=$@5cyH3W)6ykr3V~UALJQTiMNmn67bn}0UZ=l)-6-DtEk;1$
z!L8M{@eu@QUS1w#$?^ebh37B5ZLJX9$Vf@?S$8n=!S>v_J+j{4I?|63X-uL8&~sLQ
zn$D~=n}UL3exPjXYs~@>NJS6~f;b}H>%W)H&CS6w>`=y{cBmXzEF#ZXol($Mx^{y=
zl0nGuNTHouex^cu|K+l9*ctN@MIK&-K-I0}WQO?rqUQ%$lyi0S^ovZTFMg0MHxBq-
zCHB6r@8`g|2eqhjuua<G0%{LjIn>L%m4KUpg~rPL>LrS*y(Z3|4z<goP+~3A9~v2{
z#fHOjJbn5!H&-BLoJrJlwNq`+2u@*Z(tevm#TG9DJ2O;GH)pwb?%IV7xtXlm91;@3
z={ucHxyS$8Edz6sU;}d>yTK>tUF>W~c*_Pv8q;NEWr_L~sNX(wzx|SuD^Q>MpZP2<
zwky2wnm3kg0Hw5ong=IYiJq22CacrI^pJV!v$>)(Dmu>R&+kx0_mCQ=k4m1NJirw0
zip`+{lZvmPu*e8G-CGH?5oCm$H*d=Sp3WqKw$f%!o3iYO4~aw)um2)1aarkEe^R-!
z(`!c_(jir#UF8*z*>(N>qmzn%zLy8HbBin2O$aAm+RdjPOs3tmBZhwTTU#)3aY?!c
z1~jP7pFdwZ;Ea{Lbqf!scjH?if95RPz9<{CyHl7zPkx2*Y@Qz6QnV*BKj8Ea4jLO7
z<@Gp*hJ>8pPmnlu_Ux@5$L(}<eJd~G-;cRU96sFB*LRdXX7ulmm2a3)Veh9;pHK(~
z2M71}8w#$~yc%<T-`o4B$1x}<D2XpMuei8kXr4NphmS8zcd);|e{gWHvvUVK``rBe
z{L~aBAs`^2u&|K5nPSV9#|ttk?|>s~MmTrwd}$tISuw=M%KF*8s>Rm&j0lsKiHXU{
zlhK;l;&)C21_l=Bp>RjY#0D%la;JL+t4&lG3JPfG=#Jbuf$+s1vEMH91Vdw2S65nE
zno`+XtW#na-an_i<x|z)g|(n%p4{+n9(FyOArYRl!VnuyGFVqzICTF#t*CU#pC0bA
z=%|(Q3$ve&nvoigX7x@y|2cBsT}#(gT<pi@a@%G_=IXA(VJ-40kC9=Y_#<}hCXM2@
z#OPwsz_L_#vvs2Adx{oMLQ59=e5g3_`Sfm}8ORh^rKV5i>0J#EubQ~L2jEyklla5x
zxFuV6HwTBDtFNnT$<h(-Fx5B1lbw#`E@l~qb*-L}n%R-hzAfkb)|wah(V2=p-C@ey
z7$|Y@;0`1OoIbSe@th&eS<QREptjYOKz=QRUemNj-%Xr0*p)hl{^gZfgUW60mMvRz
z`0n2zSkj8%I{QeU&oKSXD(PFJgHLR^KNsS_pk><V7Dkp#VA>l-M$nXcSxOS(EFR@k
z`l6plI*BoLDF`}0eRcJL;P8JwIPAEhD4F)knfuo>$)!jYrFL(#Tn<tZ9HSy|vya)b
z_wBixp(yREbUU(5(Kd`w&;Ix9lh9cH{akK|^c|z06-|9DcZ>!T=(*fK-AzeR+#K3!
z7e@a{oP)}gKAVS~Ub@+hot7Js+v9>IVcUN%n4-i~XoEdUk%WGLqH`aq!pg%>wsmH=
z7bD4B*MhXSsB9q*cJ0{w;EXRi?O0D>`g@v`ckSk`bE|Vx*!zS>k!tpA;|@}0KDT{g
z1i~0qy24(!oPzEot;EQ^Ej%LOO#gi5vemzz8Kb^wD#aOBvUzE%<@)N+*kXQ<HR>wY
zcy<zHHv+dZRk{=D!V%)$`scHztFMv}*ZDuc@i1%n?^O_gQ(g4$8?*J&|LQxq|0v#J
zmWJO=?e7a5crqDO2-F!8s#!b~1ZoCyYMwt!PnCA`JL@qheKt*2q8QC>-He*6Nu++i
zyc~wtbU;w}&*$+=P-pDQ)yfgl955kQ`n<K)@^~71FuMv#u9j&e=?+1X@Yef0*JQ6b
za}M-aP~aMm$o}t^=+INpQzUPkQ8e<8q)sHgB~LGSTAG3&q${Mm!?ea!r-Hz=Rz#&J
z%_a4j9-n%a_$@nDCnX(7X4`QYaYw0T>3DuZN+wZ>JCmlzpmHTLYjZ;|gli`?@t;Gc
zeGM2DD=p)B7`>IUGN?#b>aau}+llv9;`xRA^l1G!lG6k+wIRMj8%OV8<(RYQafP{i
zD3l07mD*{E+0{Eue_N20eARQV7WlLDd|$84w$Yp<9wAc3VA5+M9K>7gaf`UR63>k&
zx|&085GdjX!sPFe33n2gi9+C-?CE4qL!N7jBm$>Xb$yj~UVfuP&{U28k)cIfuu-f|
zF7zWqUDa2~%Nrm3&+c*X?-r~}Bab0aoF2R~%B4*F7s`pcsg$KJE{oe$6{(~VG89#R
zch{cD;(9|u5ICj!A^Qbmu)3H7cLr9PkwoRU%{fuxs~)|?Ho)7xR?H#L2Jhq=Zk!_C
zaTmpBR2i*`3W5qp8S?~m1u1IO1+VcCuJJIcQY+ME#6>1n+oonYZxPb{9wPGX@9hrx
zyYR=io?iXlGin%c9KYPc9WqM(#%xtnNpdyz1y?8|h0qTIA%l>3PE5$Q`%2fgmwI~S
z@yx24L7K6|6Kr;p1*;!@QJaMQW$eX;q;dK!xJCyqYHd!HDkr*v4Zg3kjH>ZVKdqSZ
zUVk#uOXJx*LnlJ5$6)4esUsPxekD@x)&DuuO%>+*JY*D)69^up9S@A7h|qwXz@<BA
zAFiF7^sqm-aqO)-^*d3&sFTjTX_nnSgHCa5K1$P`9$x)gc4<jTR@nj1UPrC}D!TtH
zv!!>McVRNUufCn>ARTUMdOi)HL;CkN6Q%2~5i&Rgo=M!^N(>0q!O8Dt!_T)rViY<b
zsh-!;{qfj%x{fS=WJV%cl<w*L_ha($1Cf**+WXuzK4}X^uf%?T?@LPH=Id0L`1r}<
zvn~7SFSBFJqa0zyI!f3)XR&$GJC$CC6_%S#<$Ikear*AxRKQML^kpG#Ut{H^CpxBS
zHxfP`Uf9qv<s$Akolx=;DsJtZT?q$yJT&?R$30~BeV*Zs%n*p&<=*q(AYeAdK;6hi
zBBFWz+_`=74|zOxgPQe8ec+uFJL1Bux}%teD*Cc+#2X!jWVPp?^9{Cm1iX}s!K>4?
zCM0Rp8C3V|c~BogfA_>gCtbcUep=1E0q2z;KYo1s#xnc(#&hJih=wZ$Tf&-->M)Q5
z(~EyO7Alx6B3tjq+@gHshjiyF#kCgO$EBj%aLHj!l=@D$O1=nB)+sx9oEW`oH*<a$
zw?xtP3<t@3B#MRlPUVB0X5L7v4P$XFw!XE2x23`bB5P}FwR6shCj0sN#<=Y6Sia7|
zMgS%a*U`epp^8>9Fi2iHlC75s!kYD;m~w<Vb!scO0WaVF$P6wj-kJWx#4D*=kvcJ*
z#QFC{qWV6T0@($}n4X3bqvw&xd}hg)!3@NUq_2MT-(2%|5sRFN8<zN2fB&oR@PFeL
z@i*zStXoR{^KbC4J3P1l#hU;5&HpbRw=`E1k~QeVh)Sk@e)o*1%%SY}&XVA#P!wt`
z4mm&V?bVNHusXov@9$5=&1|NP!IGy=#p>oQEiX?<{p{b9{Ukm83cd9KmgBlY|Gh#g
zZi$`aV!;!xQb_aK599+T-`-&q))z6Y{FY&H-r3n7)jajKZG8nshk`cz2_?pb#M<HM
z1rswfGw=@W;QGSC!lIf=o}!j;>JtsE|Mn%n_{*|y#i{&9k6?&b@b>l|_Uto@VitQo
zynO6`Ys1WPibN!J>gU|RB1k+a1T~eFECc`|(ck+c8cHusy+g6UN`R@OiJw!>cZpbh
zg2I4>atVUz8^>}Y5IvsxeQBmUJ3AW)Q8U|pG~->W+}ii%J!FbrUMpSMS|jo!@NS?;
zC2;Zbj=#}o`1jUOt=rPLTb4h4nkpY~SJyYs;)!wLDL5yA-)|)(Tmr{ITBqK$o+9HJ
zsakA0rQrER&(N?xGgkihw+1@ci-&;oliKWOr>9{T34{}iHm6&9IwQ*}wD|e&M?-2C
zE?mIXy00-_Z?nH7#eVEohf77xdPbW)+ant7oU)3F%eksLnz)k!k;mGC|J@Z5UvDVU
z6@2~r6(6LYr`Ocn9Lq<#S0_*J;`3h*_4A32F`_SICg*|qlCb2x3n+5{I1~6!zG-W_
z_3(@)_P@~);qwPZC)@53OCh3{G-G!GN=}$i?__21oqv;lECT=ckn20tBg4bPcki;^
z(q!Dd`_9dqpT@_@2$Zz!(_g+&Z{2#{$w^pD49YlaZB%kD7cT4<5h=*c?QoEAb92+s
z(9r!3kt&8=TGz-3%fFkMnVFMQ9_v(P`fl+L>}a!JzPxN`Xl-r1ckf<kXsC3|Hfn0@
zGF<@%LPtl3>zDT+_o8js{uXG&1t5NRLKMC3nKO&{ejiXCCG0!J`HT6MX10XP#W0Ra
zT=rr;<(_Atfq~77NDA;oz(eQWLE$ck+)vz#wLd@Hd+=Zr+ST(vKU%kL-P#~>06#oD
zpI*oFQqplIk{V*;s>QIA1h1iB45gEgvzNTrhQ*zX5_>Li@)zuT-@rkOf^{NR=O~hs
z<KkSu4olB;6Kz8gdjetExhFA!DyzMvg@kagZ3Ut{(bL|g*4f!fq`oh?yOS&RS6!OI
z7dYbi<$Xv9DPa{w-A@qV0Wheu9dV)AzCAlJRmzR$=+QNh=+gfuEMO7p>6Hnm1sJfa
zgErSbf?xAn4$OY?WQj=UJoo<v>tIGkhN*(_W80Qi7<aKEZyzWG{a%`Z`1J?XInEH|
zGum=vxwqnMY;1c_M(#dj4;CPNVvIBxuO*U3^&u4a^TbIHnUp<2f4<|}uO`3vlU-F+
zRbIY)9ZbQmiR8;)hqn?ycM#~=+5MsN-MxGF1<VGhU+~u%x_QGjmoKWRHUD0oB^fR6
zGspcV4VLp;A+AQgL9x8g58I0u7f_=X=+A;y@|^2Gc;`e@_06pz-!|43rY0ujHvcT5
z?i|9~fSn<DczTwOj)1LF^CaW_H$a(fv)NTgO32n7=<R(9tK*+P{;5-H0ly&afuv|@
zYYRY&fVH(XxsvO;UQV|hT4|iG5}gjz;h&&uuC`qa9G{rz0hu^3ApQ1c&WSsZL2Lx9
zt)v{itj(Z`Py;H5usVM+NEp@704p2Yi`7rt2(RD0>vfROe>h(MUoU{rtuH9C3BiRX
z6=0c?v}YlfYMwep2cq-2zqFs+C$s@Ei5OZ+uriC8%VO;sBIt(Zy&@WP0BQ&zHco12
zpkzWI+<-PgO+X;!4l9YjeM<KGtAL*)$@lNijg5UrbRnx-nIG~=y?F7Wg6k-Oz_)K-
z>=#KQeEKI!R5LaG*dIn6o{^bp1E~~r)s-t(GHbq(QPRr7^G&Dl=Nla6X$qUwrW@VA
z9m@w_y;31KI5_lIx{bp}qE&0YiRhJ_oJ>hU0ae)SSoB_O0=Q;Bf>MFzn%@}34BOia
z*un{Mw{9)B9r>tUW+ajX#iV882eg@2hm|Y98VTN=oSA9aKsdl9VDIxMTtA793+#@$
zi)90EkF!?CRNj|3f74V{49wB|yd6ma>hPH}OtgOJrh-`}5B2$Yb1Zn2MdQL1cgel~
zX3mb=tgVD3fk>|!x&8a2Cl2pX1%(Vrx1EIRzyVi~CLvtxG9FX}tW)#O^S^%o);tu1
zhn<*!&_cMsx}D8y;X_J7!XnOA)uTs_Bh{WecI+Ty{^}uPV{NSuP1<d|iO6Zd8HWlV
zDJ~VuG2)VJZ=)lWI%VNKM$uOsarLTZkk*}NRNU;%Efel}bj<f&Zag(E+`D)0#>Pg`
z4uNpxgLZEck&f@RHIW(?`P3utrUtAGFvBZccZJM3*T&xd;S<GwH*&h0?OPt!5U$s@
zk3qY9#zD0gynnnC;aEmShA^tIxu^hkZ3Qy;EbawG@bYZmxpU|G-oIDBq_3y<XXEFK
zpw0E4Ao-9IL4gHb7_M|{j^Wr7xM~GPHf>u@;v3t?M1LQl1YeI%ezmT>{UU-sdvn#*
zIKw9na1kczYUT+<&c%XlPSKu{bi5hRx&Kh@(hHdsmoM-0+S=NRY}mkKE-o&PkB@^B
z5QuaM*URq_;qF7Ad3Jf>1G;r;R@(o)PxP#6O%ExPxTv;-avweF2NEl@vj6#Wc^!sj
z-+<%Cx3vC#MXwBt;|eeVkz@je8++r1ZFfyg%>(}*EnpJ^T5j&0TMsy%xq}cA8cI?(
z<3YP<QeR&$EiH}3kR-_d9KO`$s&L)B*aAE(vf1T-(wOqK9!I%=-%nn=2=ee)dTkJ?
z>v(_#`HN(9x-%0AOEbI6u^hxE3z5PCJ(sp%3!gR?Wfc7of1}l<d5^3AOS>_|)T8@I
zH!pCd>;cgt{+P0S``lx`$WI?PR*pn8d<Q44qM|}hK~eS2!q}Laa3)7P1r10N2Ne{8
zupvJNt`)Y}KFZH03&ydhBqw+N_?&~V<hy=A;mho7jb(iZ%n$@cUEQ6n_7ldb$bZB%
zUbFBVJ%>|YYk!)Hi536(^JjB4cGDX>8LqN{E1??#*|P05C#^Px7$OJC&iVQHtmD3y
z56$c7=)j!5zP=Fj0`Cmt*Vd2&4g_IjI$mBIv=D;z2Iyhin2-E7YAE*=9;C87FL9S>
ztw7I-Zk}FpExqKIj>CdATvWx+enciG_rXp|Iay>)!du}zP+(LR!zm|oY4QZ&w&aCD
z3v^=<J?>C)nN`0du)4P0epkXmuXP5G=k4n5#`SQpv&-5v(LD9<({*FN4D-nbf-3P$
zVh0cu(@+0dsd!Plxv^#^zT4z-o-BJa=sYnou|9UgqTR*1y1GQW2@a3d{9w8B8ytak
zh`^0aO$fG6CHgLNJ>GjFMW)B{z+1F|gR8pzAL4@;7RMQ8=H~dqpjh=gKYjiB^_w@E
znVl><ce*bJ-kkDLqA%j}r69mZfB5j>g9i_GIjs@66cj$XRGhjauA0+b)n)<y?#!88
zM1rf}K~_!88uGlu`Sahuf4_6k=IT<UqK2xfDty3M8VUWbviI8b4GnYk^KEQwj5;16
z_*GZSh>HV5W^~Vp2ngVXJIL$X+r@ZzLINtDJ$sg&Z3jNwxr3*D5mYC5LEw^tZvcUn
zmHqZ*d-?e_rC-9p@95;zW?u%?AS%k{7>BA36yc)6!W791tR{CqkJg7GQ~Iqh_Yv)Z
zrej;wnD<6ngI7F#IxZrj*)Z8IQAZOvD@8kJR+tJa5+e`^8k3OL<4E~Ip!hTjfj0XD
z^th&`x@;G6v74Tmag$<)S~H&-&j0A^SC^F}HY97SnC*Y#m4=DQJ=95wXDyn!f<04g
z1}2)T85(w|y77(V8Jn0`1VE6s2=H9^P`iyuq;d;IO+As&@JIoN#JcOB5~w4aRU-JX
zq{eXZxxSztN50N{Vr-gvm$LSwRqW8;pJJuFe@|do(ayI~5+qwgTp`eaiA8w!z#gU!
zw=Vcwy8V;=Jbsl>_L4)s?=7~tSTV%Q%WHP1{(o2@T8*Xd?rng9rfRDH?fgu#f9kAi
zFM;MOC${jup1Q_Q^|YBJIIdKbni$KP4*pZ7O>4Rl+)0^(wnruyyzku+rTVZZBxm1o
z2^aDo)xLuxWO{rp8fkhlo^F59s=3wLKD^q4tcrC9FZJn32D*V)-L$32T~?Eyt7|+j
zzL=FhWVZS9@|CG#Wup#HwVDl!)I-d^@9wtXF-@LEf8Qh3m7p0TBrUX$p@i};F%+6c
z@FI}fzF(_59jwdw+_QrvEkV$mR-xhsFFlQB!Ij|yRNN2g9R%5D-MMh59ERg|jRq%4
z+b;C^8LJv{zS`gS4+&X)(Bt~j$7&xA?uyn|CLWp3TxoOfFVAuokued#+9*<9`y`M|
zFso1@F)p+*qaxn)=zR}7b8nG9x<72O_POrZ<6Q!Osjn~>dUxfnv(yya@;{=c9#!*^
z*QLcZ=Q3<i-w9#!ipwyiP3MkFA=|j3cp$bunwy7Rzdf{QcV#mX)=MbA_|kQ1ZyL$#
zAHlRcZU}J^)s^%p?%Q^|)3+*#jR($LBqu~WCyrB*TRzN@)F_VM@riqn*H|iN$>fyl
z$CR|wbq>v)9iCt6{YHz}(+Iyizph5$Qq#9S5+X@c@X1%NPb2&~AXhS7qG7m0*#*SL
z?}ad>wfZoV!(lE0<aId;)e>K(9;fp9#+2-(yVsaPreHBG_G&tb#4N}ES<s(Q-QgMH
zMq$0~+EaJl_8UPzAYlEhD2?*2GswNCPPH2*TQ5Pd)y$i{MN(tOEn!8&==QRkeZ+y^
zpSMj)E)UNXt4bP<xpKYwLi0>I?XJtQH<G^BoZE3}uUe%*B!EIFWeF0Sm|qpuNJ(T=
zRDi!fl7b<#?b6{RM~<@7_6dh}&%81bQQj45Dmo?g(=8~3>srRO9O>$GsO8D0`j>zh
zs6r0(W>>O9T;Olm(VQ7P<@5a|v%`tkHs%bfO7u<R>rTpFV{{|}e~MUBapSKZUHTy_
zrphNBlbINqp-z>axD)q+%qb^WV|WMa{sQF#++ulMt4a?M=%A;ChlhLB1kh-O)3Y*)
z-w_cPcSnj%Pfssk<)%{B&}gX-r967{sH(pKS9LrK%~b?h0LojAac<kjlm7-hDYs85
zkV`A`bLo{y$;i&VyT6-_Ey}8()xO+#@y8Fa3iC3jgXuH+W&yr$#>c1Y=Qo#lXXZY9
zIEktZF<G`f*3(Gz4%CK|CsPn`_47GqADddJ5JRB6*PJ1G7^Jqp;w6kqi6X@zbRe>h
zSN{CqcS<z5?9Dk#OG^uj2LsObBqAx==MJ!-fXt?1&ej7*^bg4+s)Py^A>Huw>Cu@P
z!GxBQJz~n^DO^;zRB6}II%2oI--`3Swx@6lA^D%tJj}X1b$q%>Nf5~vCDAr!(YGxv
z-&a<iuU>8<L(K>I4vd9Kp5CLLFK(B0qcZT;uHQ!MZwVWS!iE{r$W$Dil$3-^T!5%7
zlcJrcS1nI3!ydD~_7in-v3c#Ft$6eW>EbvpDgcSW606WQd-x*8MTa#6J!MjW>slcY
zx3p}ffj?xYnV3NKM`TA386r$k$1`_d_G8je(Rth6uHZ4<fmA6MwDIT15BHgyq6sb4
zH8rq=8XAaz<f<4lt8pmZFbb9f@x=Se%@pMt+NnX!@SnU3MR3b}&vklM3xRCOdxce1
zK7bCuPSMfPNU@Tv(J&*hWF0^9*Pk&mG6G)-tQn@YzRTp5ZCgX)?%k`gX^wgQT0OM>
zfVSE`Y4(_vl@%->&`(xY)(ua$rrk)cp0m9u<BzMUIh7k%*eKG6Az>bipap3E;NH5Q
zHQK)5jn4#HSS2rhD9{r+rmUQ(IpHlEM4^NfB^~&uvflD_OG|yA;R#jMMqsAm?imz5
zXN4GhPc(@!NnE^eVI3tC5`m`-N3&Z&D<hjizHXi&!Kq`8>22k+XvmFnG2_s@K|bIh
zqRp;(goQk;<Mfhm?FoM2{rg9r2l$;km-AP<>wkIOMN$0DTgdq{9$x{ufSGrtQL1QZ
zwc-?zk&%IRXkIw;z_<|Y>G)?bJRme)=6CNE+`k_jm4ml;Wv2tXq0hL~JMbQk<zLaT
zj?;uiwb6Yoe6DN&hhe0v>!mgO_>;SD-0pWeeD>ME8JwA!0f)mnyE9rYb&76}{M*H6
zFJ35k)!ipNF}{HPjr|J{l?WmvWpAzZ?#G6*PPu#kN)kNm&1d!WPHPU7eKlmJ<d(RL
zS`J7Km#eA+xv^P>r#gN&%~fy$xH((t>%{N1m6w-ub8{0jYpc5|G)(+CzG{m0K7D0T
z4HzdHt?x*j?>apcO>Zs18oNiqrPQ#<ZQyu(!&q){u|b~RuC%_JIXUO-?CeZT*tV1q
z0NPQYc>DOAFCYBQ7OqzG-mx6~mW#{Rxq&jHf;;Q2GFE@}4o7y{2<7zCd3uJIRUAUb
zq;M*X3-x2h@%PH<mGbwBjT9D3MJeu%;uncdHqUmEUzzfWK7L4>l1`I~8)m2(j~Gv-
zraemZRNRi`zPBlrqmT1Oe)3kB^hq@k0!`%pteA>zZ$#dk-ASw`vULS-z0oNkSbDzf
zIsAcj&ex^f*iQV;Qqh6AD~NRg1tX#3QeIW@{MAn*Dr+CRGDrhFb?52ln+2_lm&jac
zv$r8DZ0;P<8hk$|IX-ARCKEeEL&hxbx}_pTz^x{a=Kj~;o|0tqm$RMxW`^I4uI$Ku
zQd}H6E_L2xJ@?P>0pX(#K3hiKhq|;|J*3+#9AlPeND5Vi<l2AlM#NxNfwfy_I5NP>
zH`$jL<o{@Y_W596CdfeDJLR97WVkY-lrP_~5Tz94v0CdtZ&avHux!uhJC!F&Hl)-w
z$??Va@E3!!D4t`_D*WzSn;hd$C^eZnJ*38CwixmIz2}zml=astN<6kDwb+WBmU_u(
z)t9^D-HqW&c_!N5c^=y;SM1nSomGe5eH50RG`kq?^08Xe!18HX)7o<u&$*e<aQ=uq
z{`wM6%j?}U9#ZU%<;J4PaE^ub8~}KalBEq=Xa6-O;a_->;kztwSi~T!?!n^+MMY9_
z04Y673n-apPF+9zzV9G}*lO~*H!nq9=OTN%ySqn5E>sL{<@T-p(Aw(se&~6Bs1u+y
zwvw;!Z^q|)BgQMiN}_@SGADxO%(Jiaqeo=l+F!bS`AGXBwt$1g<G>%fBT;SLLqkrI
z_b54jy>w&O>T@hL#DUh%vFTRTnFD<~J9{a`NHm!(`gpi<ed-as0hk(YmCVjnkOj+@
zG2{2OVH3*lakOg=#)-8GxAgdnpQU3KmzO<f`P8XOhvu7O_ZSE;2nq^<12fQ-)mNSL
z`QR+sI^j-77ppWi=JUL^){LHf&z?Pb192&0MTd7PU+Z*u?;z17-RKp8I@|0)mouSb
zV4y`_H@|1jq^pXbN*Nz&5?Tn}`o|Ae4Y~gEF0t7u51FGfGAcSKP+ja?oSepeKEO_t
z%cP@x4f$16v?xFS-qFin)2a-%1UACzAR)mLeH_^2%3fz@XIbP&cX#&_CvG&1O;1m=
z^mBwxT!Ok`Up8P}s@{nEk5agy;aIkA=>`>Qj;bo7-{-4Li@IlEAW_jte3u9CFBvm;
zGxdpB&+7);#I8)WSePAtEl$mPRJ*a(S<hmbYpScgQ?VAv2xP_fVR0Wme0cXxJ1A)L
z%1EGDvAuJ%hE*bbycPL?D2v~}@3vyj+WKor0DJw*(Uf=S!%Ur`OgYKW7#BxhGvb2V
zwz2_+bB>8_{ageRH`#Ao%#Gu`yN%ca&z+mPzH1-26#IAgeKbf!zz*yshV$6hDl>Bp
zE}2EKufpi_dj`Sgy^n8lJfolA<19%hJKN(}9vh2^5%3#!?KF(AOVMt28O{vkNR3NQ
z6K>{I%#v#@TEz7io$O51jcT+mHSG6pNcV~shy()p_3PK2J9khvGY8mP9fq%s|CZ(i
z>Zhkp{Rflo-J6}C2if3}qTM$<EbCyQLq|s!THn#v*Z1Lr=>D*Asc_|VoD-Q8r{z$3
z>oRy%2IstvHoF-YCiPSvuf3cu5D6nQJ_B0`z>2JI#HFI*$&=}2UzA*(zdiM*vJ$H)
zdIx?9w;lTT%shP~6ffIuaP};Rynkh0UW-cwS#WZ)0Tu=ZK6Orgu^Bbw$mwY*Fpbba
zic6?l-R`&957qizc5s;X&f8w*cMc@Ii;Ih&UyVSddzGw^kRx&tLVtl-%7qN*a_}U*
zeEAY&L+^YNfdKkv->FohSH;Px=GWvsP(TB6UDl=S3W2f;3Kn-CO#Lb;DFNAldmPc=
zhPOgqzuRU{00Vga`gI64cCiNm6M&ZhDJAc1C&<dlCEUD;%KmDejA`~s+fqY$5R~xa
z5W8>Zscd7<V(F21rKJZ7>!)s>)-=k920aa$`Sa(`cMhGSy&pYZSzTQX>I9@2hzs5W
z2jbSPb#ywvj?`rTW^%GNuJ!eFb{2tas2z91Ku<3*Ik~y12`bRN@Q8kr)B5^PFMfQE
zt*XO-8wzd^dGJ|n?Ep3W>&We8g@q@DLIol*T;trulo!*_UAI}5(%Tvs^ucZl?-7xh
z5j1@%X^&L|MSvG6D%Pd3C8%EOPxu0Y<oM$@>6rYyJRgwd3(pqc6%>e69LPPvkNq+N
z$s|$m%x8C>%a?z`F4?M_)UjtZEfMbu@0tWZKi2OMgot?3Q_c>yhK9sLY-yQhDq3ME
z0(;!IA3s7iWD<}@m6c!Oefs?>`!#+NB@Xzov@|uVHR-%X=sS9P>?ByTZ~r)bi8|!8
zo*q_svi(6&cz{lGq{o4&$w|q>hf4)PdBBS1UVHaW+a$g}uogHmbSu6&to<gY4zK_y
zuKdXp#1#>X=dyeEhBxlLAUV{Tsn-5LVS#mMd2z87;CFeM!%A!I)hy27u!zelSYcEG
zB|{J1=LD~t=;(Zh^9{gaez@ukb$Qt-3MDF9TJuutaQ%G1S#YL6wE4xw%;Z<brLgIt
zNAU6Un=id-FQUO_0ehV6A;{TZwk7($o)6ojTC0~TCk)fN56g564ps-Oc}XgAn#TBe
z<z0`8GO6}B563Dr48f7f{ui_nynFZB&2(kcMvOzmuq^bLi#I9MU;FiI@4kI%s;bex
zQ6nmY9GU8aWpn(xdU{X5Vp4K?bIHn@eo1Ov^mlXn4y;sGR<`ftT?xI3u&H05NNW9l
zKFrE`J@F&z!?1MB?A)9`=<zdW&M=Fb=Jw4}_8y3eiptE$5Xc7P=qoY<V4h<YZaZHw
zWKdujpPFi}ufGCSM!mR!C#<B>(AQVy=zRi^FSau1AegGLT}cgbW?EWW3VM$i_xF|<
ziWHaybv(_0Ukxr{VqF4b_M(H_k*X~v!@iap3J2dtvk;5t_>A>pDZAt~91^%9zsV{%
z^@<<^c^&-)J&T>T8Mf|W8Ij7~(Ckgmluu2bfxY`w`4fOERvhM^K1qKrHLcbqq|W$z
ztj~8f!`xg$^q8612#ynYW$2~bw;VfmtO1)}ARAom5!Ps&XKYewB$>;XD|GXaa1w8F
zX@=(&6>UIdgRp}$1sb1}l+;k$LjN6dNzAx?d+PphGU%!1#Xx1)6kytQ5VVG?E^Xz8
zTNC8wo#Nt)#>NO9N720T^JCD1Telc@?c$|+3E2!l*It4(At6EShpl^Op~TC!*VR|s
za9FZ+5o7zP_nMFnjdumUct**sc^*tC&gS{^c{n&j^Ek(Q`U^wpyWrG>NBL=aIbOlU
z9v^Z8;ZIuHsJQP~(4~X3Gx}%Flwluqb%76WvyZxQBau{t(DDBLlc3E&9R`?7A9c?V
zr|N8O_!a6OJ@<~i+;}^opj!nhii%3ulTO2_;&{DcGhu4obK7Ia=e%lw70*RG@#Y{0
z19>>jdJR!IJC%{DTkM&H>CW5F?;{#@p+RV7>k7rr-VtVuy<nT7O_Y3ozZFStHL_rX
za$jz2R7p)RjHnEyr1SFd5J8xLHBqjge9IODG!uqKR8Ba#Ah9jcWrKX>TbnvwSyxwg
zHea8DoV?Op_^JZG*z%c}aUo%0j0JFm9k#i#v1-{Pbeq^c=X@as>ZcmnBL}B1ZeN}~
z7BFFK-nQ~}q;>HRXkR&5S@3!<eoeC7If2y&_uptO?f&_7-kBWyzQ^%!&E2Y$PEe@m
zM9j_4M+Azze%&fw>}h~h3^eexM0<OCO@l+dX4apNsWxd1HNDo@S8iU6AuRky^X9$Q
z;(68Pyw+e(HMH2-%S}r_8T;VDM{J%ft-VA$M#*Bv<Hzohvsk0~g@vPP5AIh`*o5ei
zl?Xd~>*3H3X=?8#%pwm4d+zPewz<t|2+p8{Y{c_j;R9OkSIy1)@FI#z=n8ZkG7Ky%
zFeKaRpIrX-t;4Zgw!Naf9Kj$WEKEHrBhlB#2dCj)TQBgV1naJyJNw@v(SyFe;NW0b
z>svnS$0lZ6#j%yfa=RvChN9ibCsSc=XPNdzoQ1?LE1`XaI?*3<#-9giB;Wsm@8tWZ
zM!+GF2WdkAM%52XYtDj<XtQs9^9C8#y-*4pzLd)y!2`4;1cX^lXUTgISYAMA16zQ)
zc$YFOD=P#mGjsFj4xOkER0InP3Wm$h#d1-hPoikYlh$0-8ab=d`7{*l0WW#NypcnU
z9>2nHsri#!2D%61x;iUU3X9}Qu2$`^A@Z<W`xzt5QlcUwp*_GrT)(>N?NzmP+qTR^
z7kI(t<Wl4w9NXjo26{c=zLK>+p#_Cmqcy}y0POPSf~AXpeIrnQ=FaH1+4UX1rD<1c
z2n$R_$dK79$8zu)KtNswaq^#}V&poI(olSzSG}6YQ{tx!N6v^#-;9Uz=lIzif@P^8
zX6QJTU#dOxc+3DL^IUdxL0t04Y%|Qb@82^pFvw!a6^LGN(>Ndc`O6rG0f`>W8U7ej
z<}no5>gwv|=A2Hy;vYQ<ivueTrIRYewVC6ximGbqi9cOi&W$jEcmgq$&CmPvH-um4
zZ?Hum(#B$!b|ziDdKCxf)vH&)sJ;8YLjDkEp(%K3U||si6o5Q7)|bp!Tv1`bpjvC*
z{xY#>D5<T^JTj|qHhbJw0M-7XL%U4B`mi@&&v<>DMevs9)04xTSH-rrFZT5G;5wm8
zRrEN5NvJWgDPF39A*Zpi@pxvuTvq8(iR>peHA9f%;9e*Sc-1cpfC{Ju@trS>m{z*D
z-w4?vgP;t;kPrB6;mx(OvO;q&sv_iwIn!~U=M@!40A2U*Z!7ofquSG$-Fv|GJV`<o
z5w7@{=G3HIFHm<b6}gJ5ff|GAnTx7Ruw9BB(j-hH>+9<XX<vL@-`Jx1x33ri31T#H
zeO^TpfVmymaYSagl=fFa7;GIPLMlZ52{{n*G_visty`@O_9J5hA9&@yy?xLIV%J>o
zDJ?DBFaQgYyG{B=GW@DKOW(e^9(Bk*(=y}nIOUVqNlVIL<@C5MsU?iI-6X-${n_iG
z8f(9Q_h)4BThADP$xu!Q!J~sBKt}q{hFzZ`uB_a6MD&?+l#V7eNRm2Ki~Exg)!Z!=
z)2vy0Tx^fx1&{5;<q|t!@IM0ym(b{8Wi<wtVg1jaIWIpX15?vsbPM^eezSrPl>3!+
z>3u1;40tDyj&H=o*k9kF5mX@<GY-o(Se4_)uR6K<e5MQAwJ@XO=x-f6iv&VRP*#WZ
z9vJvsAQJgm_cGzUq8k3#SgnTo(SN-F#&?G(RQMBs>N#Y+OTeSPJGyWKTa8UxLgGnT
z*&;UanTIE~Qc=alSb3@0|5TdU@+R<;ZUi~4Z>cKpkrRZd0N?%FE-lBAv<V@q>l#d;
zm1fsIZE|vQtnsl((vmd_!mFkxem=g+avpV0Rg_wvKilf}pep50m;unq#h@zOn}vD1
zDhjytQ2poy)IK|V+xy^>@D%G^52v0auOs)q8cTDFHY{ep*}7!FU=&K9e~y;DzS-q_
z!57%TU{+8x73X`LbK}DY^WI)zv+n-u4^IoA2O;~zPntEEwHZih@89dKRCLeK(9jS!
zE25eqn=R@j@WNQ<yYcb$j`F3YrM0!SU<Y9zlZlEd-TBy9B0sKDep%hAv#+mtFl)eo
z5WKzgoLt*LQ~cAbx_P#XHNDliWoU$Wt>59<Qg9`q+|ktWmg1w+Rc1YA3)>n@=l8wt
zp@yY8Zo_;U+ZD2r)`=7MWqlrd7@<tY+FFHDH@tj_85#YzU28_njf|u(3_gK#;a=M$
zEJ2Bhi6wsiCr+FoiqQGB0JsRz5M5E3H8mYCH|AhtYb_dajtfyvM@|=juPHyDH!Lh7
zq8-_0Xz1R0H6nOh+kP=Ipo-&}2M-^<m0fAgJOKCxJ5PSkmxYA|ka{Q!{V&|6x0bwj
zDpl4<Uw_h7YRNai)YKG^&&h;8OybCqJx9F?9zMJ}E|t(CarvvMWK!ga$j^n37Ut#{
zI|2_3#(kYVb7p*D0ZJ}ug+;tezKg=Vz7Xv8jEu|uHn9jKrD_(n<_oj4t?mRjc4m6|
zZ>!_6WfiY)B_<MrO`MBxC|19Z6H%kPyZDkCWA+3488$S|WbHW7MdM!OzqzrFGy;!^
z^IJ<>TU$LnJuNNNzgBJHpf#}hIzEOTFYZH*-2PGc-gqTwYq-L&N;(XPY=B`znKL}C
zq~seI^lR6yIUv9n7EVZs%E}J!pFZT832R94i;Y1Dvv&?f*og~?i78;LlA0RjP{7=N
z$vJ|T$-A_Q4_7<`V#bq1&=1$Lv)tIs)KrA(WnG|uf!|A@P3Xtlw=|&a&*BA1fO5^k
z0$;g3q-xZiCJXaAA3g-n{`<GDVAGq(H`O&DCOSG!C;f~(OtJ9s#S6skM+9kh+R%Ea
zoknLC)71J7_^CLC*}mAe&bZBetbf<1o2F;dPh(TSH6nmOird*U1ih7xT`H|_7Jb7n
z-R>I~ih{#+9aX$^sm#0>k@1wKW|xBm%8LF{8|bzOb_mglU2sQU%c#VTf|*XT3!FWC
zTp78!ug6?Vc;oC06Et7D-A7rVb%jLqEYHd(y_NXmnRhTr1_evY7Gup>fr;fl$8z`^
z;QLWQhlsoTW6f+C@b}-xo0&xn>Dkz{1Pk-?Z2~G)be0wuwKO!Y!!j|x8)Ki|4vygf
zk}Cn!L^18B-oD+gk36UQR|B2G=cy^gPiz_nhQ#>zvDsNvt2IHJ8&IZGw12Mu{D^Hh
zhwjktOG{wK(C_7HXP4bDhRPk31hTJ<fwJ;7<3i+CdtzWmBEXhum@`#Y2hW5H1DS2)
zc}P@L6xE@p;;2+nRn;$OH^74;&h<_FT4y&Ayza~S_1c%scvK_NBJ~1q5qE?cy=n9}
z*~rL9-=$795DagV0Ao;tgYxS4{9r3C_v1D1oTlI8+ubbRCgZUCh`tX9PH5=4@S^PG
zg|Y$g{YF!2EUr~GxgeFKzEw9ry~RLxYs+~}x<ijQz9w`TWMxjk_5qdVzPMea$1yzr
zS6xG6pwi6-7RICA0G=qohdMgy?k0kac&MH7rs|nV<kIDn9AiS2TLwC_dDb<CBko$g
zTykN1=-5o_3D`n6bNigh)#$c!8kO0_>TNU~A0=Ks&(_aJyC32a_%Oqh*Uy@1r^whz
z-s^GnbanlP@&=pO?$z&o<}?9%l`c#=3`PeU%V{T#rL8X5*IZtJpA5Z-A|V6a-3?zy
zakYspwye=j)}{U%cs}#~{RDbO#>3E|nBFMBYm2P`+;iy?ae^0W8J7br&o_S0f`M6D
z@+dcMTl7~_s)xxES3<L;0SrOi#14%|ft9p@$*fe_J2g2z_c#)<6a*E}JomM81R?=T
z;AkLn9as1W-{5V4nH06x$Bqtq4vy^vXZ8R)yS+|#EJG^?cECj$v^3p`BXjlYmX-UT
z#>R-#M8Fkr%6MF}+oajH4V0v)iG_Z#W$1<3+1ah!XygKQgu_KNca{K8qsK9P8*>sI
z4^pngayzFxMLuBArDAP$6=8_xMCxJL8DDwm-b29*36;EI$G6o~mL&aw6XNaL6TPNl
zXG0M6TdN(TGSJ41eGilj0p}!tf_d=*MzDQ~5_7+F378L(D!!ql%dkVaF`RdAA9KPk
zte33|$Pb4bAW<!P{{itaA;tR0Bh=s*hMqRQdbPT?Rz`jdi3>+@PU$krcof`i_oXkP
z^X^S+YhR(sPsCY-p27K*1(=V=V--@RFWIC2X~0=N;I~BUSJ?x4e1-PlD~W!}&#&Wg
z*(pTRbw%L1Z+?%=-@d^SOnHSt^mf~1SQt4$KtP}$COcxQBokB9PUiqPbdjx~(=5!-
zuj3n`)&hkvrvD4PE!gKx_@+#cPDEzpcF*({7>y5pHxwoETiSOeuGi9Y5RlGb&W3>a
zh1jpATNt&M*QUR{^fhf1%NCR!BE|^s|2XsHy82OfwDD8D1g{2<BMN;(Ll!oh*WxCn
z=1X%i8J!N`FHsQ;IKzD<vQ4ag(K*&YUtdx}0(Lp;+zYLe7j10DVXgB{J*BP=4F_$@
z5VwLb(gjRbLrf7Im8H#d=)UXi?IqenpFR5nm*?dX5tr7iL?&itH5HW-PxPhxes5$(
zJ2JdlC<|c@1vPJN&9n6L?aimUAY+JO(p-gWo&p>@W_CvJZ(BAnyb-%}`944M^O76%
zU7ek7qje;=G|L@2Q8J46ZqvA12zt~(f(c!@6&1)0xQ4VeE+#cgjRG^~#^o1Gk$W);
zaD_+|N1BQrVWB~NA$c#Fu?ZpjTr0$e%U)hsuJEw1k|%FMzfv4=ajG}FEgZ(-t$QPQ
zxx;GjR%&V?5fNu6C!2lzv-^p$9}~L_<~sYFB^QRPh+z=`qNwOAaVR{Ryjvc{;uA1d
z5c1_JEJN5(hR3Fn=Flz-HfeTt2rLIoe1>;>Ff0(?LDD7ul8+UWpF*C8-s|6fPMU{}
z`&M*Qr;HL`Ew3g{k`sWx=jP^MIXf&Vd0bU>d1VE{V(j)p;FHbKP&$EZFtPk$3`|Uu
z6B8rQiD58G?1B>)ay>FOnojSv5$R6UKxo<rtOUP5KU5Kfume-kwr$&pE_TpIpgcI?
zCInMDKHiOk)7e>1I!hfMa<+z!Q}*`VyI<g=?eAa3Bp*3UBu0iP$Wg@h$EH96$4LSl
z|L8NtAsOh^5qFR~tGIXp4qa)VAAGZ)^eoWiy8at(w{zew*qd>uf{+iePWV=51sI5o
z7pNfOoHLCCen`ov@%}76*gS!@${RPH3^>Ct2vgD!(b>95-4GbAYkc~237A1I%aZFM
zT(?i~CkeDIJ2LIswTlStSO^{Hh5*A97gsmPNX0$ZNHzC~Omy7D=<Nhb2UK(-A~*J0
zFRrZM4$#uj*uvn-<R$&4y&ZEPu|q(T1LX29Sc<<bclz^b3*WAI{RHz>eUZMIIWOLn
z)w!ySf`S|4mBau{s|=P)<Vq9<NOEW8=R2W5LZpSFiy{i-msssm7jX`p&*EgMcd1@7
z%dheE0~O8^I~zVGQgU*SGngc&f7#mgv1nG9!&qcj_Uxac3s+uc?^k0`1<rJpGAxTe
z!G9DD)-WPgpk%&wjmm4BXfH>H@pv!gxv>1;h-x;bcX3Z|EFBrLE0QkB3OruWa_Gnr
zw8JNj0E+seH{)(n(j-v9*|WH{WpGi0fJFcVSVqsolg!L<j4B{vgW1_N99qnjaCR<;
zjiqyvlYs&+QTZ8({Nl6k$CZ_p0dBDyG0w%MVrXDm;rz$a$ONYgXC6JM3-^@OZ%gQX
zX<v3LIT@f0+M8=$2HeZVCKWN;jU1rVBb-5r2MI61deYN#IUlVO*uJ2SATToJ%D8`4
zcXWINYS77P7Q_=U1L{O5&B%ke^+cUwaPW7O#h;MIo1fi(!Tf7vS5Q`VY3S(%01<?7
zIFP4)LB4vNzMnQC6EQ$ctld@|s13s;@N0<FvT%nV>uIMv{C>w*V;{CjhB|b+O}isD
zGPS0y&zQ}7)x5WEzk7)!c)Su<@a}<vf|QhDPB-qOr-#QdM)U#L0KSQ=`JFw>ILq7M
zc3(H|&p>-%Qfg{Z4-lmjvMwU5ecRoMg@yJcWWU33P-4Z<Nw33zEC%8W(9*}}*T(wV
z#hUr2NE(QAU~^MbIg@Ud)Yi(u61ji>1&jg(g8`E}fDJ6ZW8Z}PQKjKS_w0Fr2{ncK
zFf3+v&mfeehXlGOoi-Q+6g;1$M8(C4s?AX^5k9`G($eoRIU~oIVsHbzf?)FQoFFE;
zx+HR@vfF@5vucUDc}Hr5R}A|H28^H~R166V2_a4P2m9v@u#2)siLSTFNK;Z$20`U0
zI5O~|)(;>EjCn&t10L6a))(B<!>lK9meti?qbiiKd&96o(~@D(b<-$>Tax}pqc|c9
zax|PN?I}mWdcHNjvSp-X?t}ky2Y1a#D!Qd_-_F$&!t%U#Tn3qPg!m4<vbE+9z;edN
zTZ!L6Vg$HC3<b4mME5vd{TQEj>LxNKST#&rO~TFkAqx=glaxWR<wpOxlX)U!*PhH|
z*?5ndhjPnE?owu_rHpbKhS45p8<K;|(fHby+>8w6;=ZyYgTx&7;h9gvPZ7&0=2`hk
z*1ULSLZWU&L+9}DGrylMwdP_}%ID8RX3#>lfH?;T-ZHUWeezIbU%ZKa4ML4RLGs!H
zSSQ%Y`4gbk0S1uOA<`IKxH0t$!<W%G@9XUiiG^MntCe%|9uotD3Keu(_$N_icg!yP
z);`b4q5PdmoOy)!fp#6ETHnK3OJ1XOS1@El!w#q$tr;jraK!Pgq1<KpI7`B>&7Td%
z2az9k2WV?b4Z8~#uA?vgyOC?2)MK6dfIgTkZ8?$x9Q^_i5^Z?BOONz~aQh4jjTxDk
zzzCs~mUQkvZEDKS%Ia7)fb9r53N1wEu=2*nxG7@|5hy9}XD(=!L4Al+vb3}WOcOJ%
zsig&!3gmVF)-Ci30dLg1kL}~*E2$NvQugwa)QWxpsu!rBzkk}7P-A^~1lb=Y0y?dF
z&|zU<fL=Yo{>ML?0V&XsHTn5-Ntg?`9AaMz0xX0U;2a=rMpo8p-{mi(qn5pNUookM
z!~2SLjku&_?*%`^kBWlb9YKd@VH$!hoQsNx`)X^ol0|P-8)}~7A}e*d;aj^gyV?A~
zPp!hue5T=0=!c_<JGtsVXvAj-MkeNJ8a>&s<6bfRDSF&<ZdcCyz+9^LDfZ2ktOso^
zlg^KyPVhxuFe-YC3f6ZaL*P1t-##S<Qk8|exBfxH6aKq`vvRFFv!;H9QnFjxfAIad
z_TlG;sjovYG^q2j?%v%wyNt#RS8Z+Dt5zlX=ni4;uL2;`6C%#UOS_QcKY4`Y(1P&I
z7wE*p>B>3v;GxQ{<ktN{LLmc6ss$bUZ@X8qKQ-xeSVDhH1TDvJ%=9bAq>pKen|wd#
zhmujJr=+a+=*8Dx7`rz)C^>O^VS3uN%Je<sfmY=##>fmsiLLOj%s==3ZY{+w$wK2V
z|8dOq4-DE+<t!>(I1b1HETAPO6}1OazynbOvCtv%sK#oqdCW#&vHM-`=I`nn$Mao8
zNNBL;a`AxkyLay<-K(H2u@bFr%CRt4AZkE5$2|0(YX@nLq7}^3vs_OoobduUa&&OS
zSv1agV7}l1X^bL7=hEY>EL#+x5XR%x`OrTA48HVu2RY8%iE#U4l2f2_&L~_{;Zam3
z-FVbjY;ojX8*V5gb@(VBEm<t7#!c%|7VXPW@{t|DrE@jKHIH9$8R+V|nUb<yi#xd$
zz;FQ8I%q@2h3IQT3c_!2tUMuKpf%OE7AxtCNo1WG?<iM+4}E1hF>?Z~5RV=Sz5XLg
zPbJ`a4LyDy)zNH6SZS`JXEe4smg6YKy>sn-H$q}lXI&Ruf0FF8Jfmr_)$K;2AN$HT
zrVy^Wc?_*F6iGs0Lz~S*(Q@PUw};vZdC_3+G<5lP`qY|#dA;qWl_gukty>7ot`lz<
zqZ&yi-lD4~Eln|}+lu93-z)+i!VSJ-45avRfk;P*yABdS1m3dd(7Zt6G>?}WihvG;
zC%<oYnNQ=9-Xp+7wy(Bzkqp$~1;o72`ua6yG#!&-XQB>|SI;Ud>$W`b+E)B^H(zoq
zMqwjc{qlFI@bmC^l9wmQ%S+auf`k9?1ew}RTNI!nv2-C^0+HIulC%XEwR0|ccraXR
zx|un4^U8Q-^Z56*b(}J!Mt}JifDm9AfSg#Pw^*;`=jZ?O55(Dj&<@^;X!yReFXYYb
z)MpQ}E}CJOB%(=x{GO;r1oL~`^BnB&?wxpuh=R7119G-@c3`&r|9o%ePuS|a$NPDm
z+KK!g>@f3MU$C4_)(2&deWNLMaB}h+y+Vm#%55kHdqQR>-$!+N6=UN;VlUM<Mw#7C
z2mI0mWF#fMKrOg_)|U5xhCwH{6n9zx`W8%nw&D;_oxzTwc@b&m5PO$0;r-jGQaH=7
zDN(7regK9aK!=i(#NBeIzIXiF#%OLus_+mU{O9^S?6Rw?eerVSJ>(Pf;>)ON2Oh&=
zM)QY+f)wzP#LyUJmXD9L_xG&qY;LNZQH@OdjcpIGDEf9-$cc+vi{C*nEopt#U(HK6
z-CO=aaamacb)X78`LuStM$R*&^kAj!QohBeRgclX2ljD>pEfeXUtWQQrmKByhjC>B
z_P~I1-No%&pBW_>8DYGWzCP0{zTWrmpTWZMSNOf&_z4DzI<|T$T9FdV!ejf$c;Ym)
z?u@v=jtF)U*W2t?ZXAF{Q6-W6H^@&Hq_0~sk=f08Y#rtL?sb5L{N8z(v#_d^&%!X&
z&sH3{EJWfMJ^1gL<=-HG6}d*eYACmCL4`^-eui(SQuSNGh{#TdF!_@X6*H0-(@#%>
zE8^3Lxp719?Ahxo83W{B<N2iJ<vHQ)6BpMnHgB6+L=uq4<9`6J;1jJf6}_dY^v2HK
zK2jwEH7qJF;vB5vzIO3#++?<0cb=4&-x>@+TNc$#icD$7J6czEo*8px;WxtxSK;B-
z5C?)nodE&?Rt!5DLz?HQiaH*+LH_>t<tG=arZ`ub8cIM}Kx%cDx^E~li3bpwUp#h<
zEKC^<eH5Yf(1tTRuUXwT6e-Hhot~fP;N<kG8IeheiI=sYIL*Mp!otGAk$pt0@_a*u
zZ!IT}WU%uXkQO!)fN05ROl&NyJK0%TiVp{YV*$W!L`PrYWmKZalBZ>KM=#@8oj8G(
zM>)*b!Alg+z`x;v7D?ZOR3R9S^l%BuQBxsERR?-P_k_Du0^e7J6TBKfvYd2G1GY)#
znoXaiKy156umZAKj+{Dm$^ttIi4|Kt?QP(TJu&R38BU%$h3Ejui9YvQMy_$8_Nh}u
z1j$8D$TZ~JDJUSIe)6QM4hRz*JD7WcbDtpmrLzE>U=*2x0_>LY>WS!y#>K@pUn<iH
zs~~F%ZVV;~H6clErKsRoJ+kvUz8%N|MXrwSZr*tHx?t5`yRP(8Jia|SIoa8%bItOy
zOv-x_3T2yx&^^ZpdIAhQ`>sol9oX}H;=6a6#+twUw>c@VxqhIy(PqxreXP+TH<fPk
zIXjUYI-~>(?EITs!Z4zu1#VMr@7_ks1K_0)CzPI6Dss_BABU8G@`8N*I|?rz$4lDU
z$Df@#=lhN520S#z6V(W*uf=5>7S*Gg*in983FEf>;nfDHB58j|<H@bDbM+CY_bAC|
zc)Vt|L9sckUQ1tk^10(q=5(hgR4GWXwgIl7^$ix;+q8nc7pOhN91a*OJv)m!1wJ_)
zh7q(@daxVHKWKNUU}j__N!b;`CChoLQ2$buX>zM-kcjw96!$Gngs<jt60%;JTdMp$
z+sKvb2$Xvt#ME$X>4!k8%x2B<uzN;8w}g#UwbSx|_or7|*_*i!ajo(x-=LAb-!SI<
z;o0uvVv!lhGUQ6nFl7orp~dTMSVZWXo6dHb$1!w_Br7|jNJr=vU??^VEF7mkIKJcL
z(?A1RGpFB;)a9#uElBb&H4ZD$Bf<f2A?KQu*++wk<m3#*Xx-^6eg}%ugzFn(bPu|*
z=e%~=^>8b@H}F-YhD3dx61@sy0lKWW6;!+G@a@zo=GW+&aKFM4-7p5b%*CL!A?V%W
zcbsL;?JZf+<l9;OYiHON`V})w7lvyfvy(7VY+U&8Qbc-!hZa3y)<b6eyHs*3_6`sa
zCo3x^4pD`ip6QRz8dd}&<>3Jnzc2$ei=<Rc@~vAW<p)ejzS4ZNAx(<@T>Afry7F)+
z`?hTv`@|&Km!U~RVk||-mV_iEsbot?lB7t=zL&j4$WoFbO0r~^Bt#)eLXs^Zgi?Ly
z^c?T;`R_fRUS{t5w_Mk`UAhXD2MbWN&q1k#WHs(P0Ua$t>z@IC?)P|MsFO(1CE?m6
zNfW=L7Z%7euQFYfsu|E$R7{vbzyPFiDP9CTmut3u98IU5_E%#S0Sk)J&vsg4t%S?d
zwy*}c-=LFqmKly4beIJ;<-GYP$!$8XHeN9%SBptVu-(&QE!Nc(SCEqnHM1)Vb`X5A
zeMy8f>`vwC=YT&wt*oxv$mb^VfWtUl{sG6$(|ydWL=WzV*6zq)uTAG<h}~g)2M2hN
z4#T-n%%{MH1OCUTc+~3t+Pj}5-xA-pq2))#UNgX>NFaVw^ArV+QuW&P_I#<YCPqm4
zfB$S`UF4*6VE67-uEn#yD~JfiB}Yo%z4o!0^^&)`SlRQvqeewu>!};4(M%D^rn$@o
ziR8%7L=N{Y=W);BM3WWCf8W&}xNX%{*#yHbcy!@!j1jK9_Uf`u)eQ@gSLqsT$NaPI
zWO5@gHLhv<_{ZdBV!nI44r{W7cOog|r=n5f{7)!b!t@gIMb;&0Xz<6dgm3ZXc5nJ7
zEXd4H|Hi_DhguvTe(+G8v37BCqT7@Sx(p((ynR3(0f{QM&Ih{_rYA{a$L374uQPi0
zYQ4YV)h{oc0@r4=!#{jy!~4K~6f-Z{l)fiYIAo?!rm(WHo!Ek|q%Vk>jh+&RkUO3E
zG~2@V_V!$=`)Wc^r7{Q0%}4JThz*FG##lJ*4K@Pogc{6ogn!_-yjfJ4jl8R#db&Ix
zXBFJeC}!pZ^JEx*k1Y5>JWe9a_-dw``V7^x$YyY*OCKPU#uk3!!qBUv1e1q6uODnM
zOuW?S$P~ojzP(mT0mt6>LD?(s-JhV+Cs}*lIc}?tq}5}YDRs@RU+kC0X{nKt#D?&Q
zh=AXdb{pj0sHcCUM-7N7_LJ-n;ELJiJ7W)%14<1Tf<MhJjg3)b8Ueug0_lgWt5%xn
z?MB)Wm?k|lg6!p;ZVc*gN!`R7Rl6zjjeYiOYFyK;uX4$pECa@rM404@dqs(?<3G}S
znbSYlIXtvB&PkNIwPQm=sLYP&_R+$FCG^y9=tlzy-1duUL(Q39lBzF%Yo6xOQv~W_
zk$2liV!??&rn{pfJ|lx?m@U=BPHnpvHPV%N=grW<+NOfXkI{DCX#3Lt)W@#e>nOY<
zovV1GLtjmutwFYecpM$*!_Da0V|r85XrJ=X^U&TPllL)32k&+E`cNNRfL(+|U}gq*
zIz-QXa5n>DuXgK}6>YlpnkriQCP(O6OU@?&XXORM#IMuSNFE5t@Z6@Vy4WaSN0{C}
zA!VoxmFX~L+qV93?`lMH)Y|(t_(0qfzm*of<X>%YYiqzC%~tMsXPQ^-aTJl@!Y0Z_
zQlnBdPQBEnlGwZ4&OGXucuNKZ;n_drc_}U~_x9~TNi~P>*;Hert*mz2Kh$$Rt<@nB
zM6YE16%^r;@Dzn6ZnCFPa|<`|e_8+}WW=ZWxw^h^D+yl7`0jqGzCXWFl~XOY1r{HJ
z?Lse)JQSp?xy}Cw{$A7p$YBf$VvG#AW0S_fQ86ZWWsMWR@EzD9?sqr8<nV{M>QJSx
zA~n?WqRIaHx}6%5A<z~B098u4^zvSc&AmP$FOFXd)_YFsp0DJxM`<HSE8{S86nkTi
zBMOBc(edY?DD&NCYd(vWh~rvbMI^3tYX9zp$ZP1`5C9DC5P`#AKdF6N@Eb~*XG{Eh
zonK%J4U8p-a2U9~eJJnh^q#0izRkK8;pglXw3>kP$<qS>mpEq_#5fpm8iR|4S%`97
z9P~T2lOdTi`C^L6Rr|(U?u_#>2=p2qrEj*}9^sqoc%#O&V-0ShMuvPNJDcfMtQjI}
zp_isDM@kDphLL4vQ|;e7Iu{faM=`=FXA=L=I=`w)jD5V$jg?h*#)<J)WXni!=UToU
zzc1GGO`(tR*9!3WUqR}Gdin!o#u!5ruP?QF#>%ujQZGRFoA`?&gG474VU{D*3lDP(
z=P%X_jChTVjt<a{ukGT#Yb`A;UHdnzLTf~8cVSNr=J9aw)X|9S-*eA%{XR1zDM$oa
z!Q7{6L)e*$7d7|<gy|D&Ib3U9@?sWjZw|Ec-ZU}w6Q5o7kJJh|IXt`c>{+O#EOO?l
z(SDjYk@Ik>{KLeAo7<!~JE3-8#0@3S>Ew(xcdWnHB#S4_2+6GmG>jiea_Uc278Q{`
z$d1=!9qgN43}H-m8x8d_m$+?_hryQ1{@$E3GBK*@u{#nef{tVROp7cNRV<W|Spl6w
zx&~$l!Ra&>hs)FI{$}yG?f#DkrrTRvKOmh4ID>Wknq+%+6_^K(6!*p}l0J(^G(<HG
z%U|1gE+o_>S`K0x2oUC$@ABQpF8X9fW$l%j-f<_<*;7O12tZ?+pqz}%R^ORHNcZ>^
zY~}bk(X{JjSwC{(Xsk6}C1`l<m{Q7JHQ}EWV_$PXxQ|qhJhbL`_5iOB1?xWqEMx_O
zWc06F)^^Q}HzaCZHRJ1^TmT-WmBDkH&1BnU1Lh+fj#6O!G8fo`BMok0Sx8=%?p9NN
zEb=JObNfT1za}Std>;0pY5b>$<g;>_si^QAs;8UhF}YJ^-Ma0!UZ+8JB1mNN5jRTI
z31Km>rL5xM$uPB$G+L8U*X9`h_K^LF^TlVf;+DrP1)ScbUT+vOq$C1Tg45*cRsJel
z9zrZ)3#S+NUge8xVwRDv?J^B`MGRTFdVgj&!-dzB4Go$uR;DBZ+=*>1E#OKJJ99Tb
z--Kpn@n<JB8m>Nj8oO=xXKlCL^WPXI_7HTj#5_fFMO(*5ZeNqk+L#wmClURi)4~T$
z2Cy#_#PAvQOy-?0JpA+t{BhF(e}Bndk=?MNX~Zk6!ND_C&m2c<y_;#?zRo-$4wjJi
zvD-1KHdt}Cg{sQRr;Wp{{KQ<w9`zCz4mGG=YfofkW-Vn%q?{6s*d7M~J;*Mw%Ynz`
zX<<)o9@x_iNrWqAA=e4lcYK;v6Sc+egF_}OOOl{Fn8F@R2&qF8XGSIHo&#bw*l5i1
z!YnTP!Jry9qbo2%ye{#$2y&#S+K0~0(*SbpX`q6TqPZtSYgKFslG~k}2w$}by82QH
z_-1&5`!YKaKK!2^9CtcWgwaX}?<SKF6M@))``vh%&=cvH|GxMa5D&QiIu@DDn-{R7
z&ps8qsjZ~M^xA1jTS`3czU4h&jiSPXc81L+e@Bqf3G*UUWMpw!5<55_&sHN}xo)$r
zDjRu$C##^&23SPUy>#gk{nuNZ7Hw*Co}+sWK7QmDF=U_aaaEa_LHzst;vx?>H;8v%
zO)VO&qrblZnk1zIG(k-ec0e45RK#7w<@mWe4g%zuokelTt$CCWgAt64U*dLVWo0Eh
z6anoyd2**YpXr;xR8jXx_Ta#$7Xt!n4i-2+v|gGIlmt{X6nYQiF#wue(ua2tyUc1V
z^a*TgTmMGlYkmFdZofT)n^u|UvEJxWe*V|<01bVr=h6l2{AZE%;|F>UsOqI*(9qh|
zh;)UWiO7nei7C{Bv|OAub#=O>eHc94yyuRAMs&7Ud0Nx3uCP`jMT~pru3cPa0=k<}
z_@Fk{($pmG5Ec`w@}12-Sil;Ca{!A;Ew;%~v9wQ&o!}|T6VU+bEWt2T%Zv9$<c*`v
zXL7$B(G(Ymp|_g|Z;EGF%}TqQ*l|H;<*TO}Xj_A|A}OuxDVly9!RQ~m$Gt&UM<l}B
z6RDm3jBD$$n~}fQ*~{;Z3zplVJdaEqmIsTV+Mr$5k-r9TeEVAJ+D)R|AgNA*zVY=H
zeB}#?>|P5Cbm92Ni>i3$#o|gMlI%?Gv#(u~8y%N_UnE^{E-o)D<l$s()G=4#;|%g8
zyeUOhjTYF98anJtCcT5q0z45b>jg8>@sdp9sv*yJPD<+yl@lpgHuRZCARd4Hz9=5Y
z_J(*5XBv15d>T7D2}9+K_8zz+K7NcHR=Yi;asG$gv(<n!u}#d%l0+aMqU<V$ze6iy
z&)@Zi#WhG^fgYF~5mGmZ5=Ms{Xm2qQwDkFH@K1p+^{Cct<a^K>I*f5cDT}nHoSdM7
zvrbN*VVeL%cgf%u;1CdOL?V@s)6hViJL0gsfVYqte|=z<^p1Kn8=7JKdA`J&oU?(Q
zyMcw!Be`uVeIMrJu+ULRa5x}LkxZRnbsvB+Y0ZW4H|t&9(4C`cB<KcC$?N*on{s@~
z>ziV&2{EKFT|7M>eBnet;}@1YBFx_B${1*=ZqkM{!TluOwY!*1JDq2nLo2~RR8$D7
z@#7pva?QXrRFDx2j}0kry1UQd_s46_`hJ*P2xiFLm1;=QvkOj&QRz9cyjd^UnduE#
zH-N$HiQv+H3a`-R=YRfOB$q<R+Vv7!|3+%6?h?u|L_+=ieUT<uSCLlKWf8Ye_KH-(
zUBsNbZ^3HBU+L@Py3J`hP&Z3E99r)}-Kr;I$}tkxG%S@c2IW#Y*kc?PAYyFTrpJ?$
z`Cg1X!dy$ddX!@g%7^89uyMgG4lZ4_IDAJ=`|wqZ3~0i-n3_)E#ddZo(`Y&+tP3g?
z!N%L7ZN&<ExYR|6XF7h@4;6Kehb~<SZ8q%Ckx}N0KbGjFB>rDj6`yntVbo`L><H_b
zq$|T9ON9<OemhDk!OPE~!!a|E)ZQV-&;0sU&t&nVN9bLE>7(&bv4#@^kIG>OndA?e
z8|SK5c_lbekeNtq19P5bM&*yGW97NY+8&X4&sq+NB4t3@5h@Loora3mqcj7$GU%1j
z5^ma50FfVT!T~=!Ga&fD_6`Wpc9q5ZaNqK`DPV;@2K3PW{X1Q%z%u~Ogcr$l=gtTK
zAgOIYE8{n=YMO=V#s1fiDTE!_Li+{v?esLv)5uFof?{~w8%`{E4Cj~qvEPwYS5UBu
zXx*70F+dUw2TFkWEadfUz$pfF6cU(%f)kC=YXL+gui-b|#k4BnLe~^vd|>?aTpzRy
zHu4~G8))vxrXQ?F0|7x_&FN9ohz3dCD6j-*f{;uG-IR*(;S#rQC^isuO;b?`%^m}g
z&mL@z5;)vJ_R75n4?GYuV%F=cd2#CfW<!O@;j+G|kL9Of^Y@>~%N@JTte52pS`I`h
zL{hdD-g$-;P!=}^rUxD!jiGZ7+`945rr}O-a(3qam(@iN%*QJzvfR_FPvgyGF-%+;
z4frz(Db!#54rt)G!}Oq;_Pn~|_u$)~ObokmLJl6IRYD@LtyE0^XmB%ws~spWj99yf
z5WCX8^u$CZH8o$f^biMr9vNZW%o}(^&1{`-`fJyGP!J`@uXVezT-+#1*U{~eTCH~d
z1G3?O-r!+zcs~Og4nGmN<E^5vmX_$@{$UR95Xg?YtrDu5w6rvcgTbGvV(uI6g?o8<
zjE5O>MlOaYQXb1DlUS8EKOL$?EW#X$`-27i?8oz-(unj>HCT2GZjFEY2H_6`=IZI(
z9ysQ^wr8%%TXc4E>gw)>weTHk<IkTFn}f?_;?T@@5pnU4r6(o&jglIMAQi<!dp<mC
zM%ACr3>6Q3f`bPcK*!WX#o;*;7e___atd8NJmgiFHm8E{c<uUiY|IzF6M;<!DI|83
zk*GLmMyLXiRS1smM4y$7&DXhQU<&(fZMivCL63)?;>YxWwx*_BQZuerTmV!)rcUi(
zGTH<bcbezCfCGUJ2eb+W^1#h)=V6k9ug=bH09eJ5BeyJNbLQ>uxp(+wa^tk6Q=ILZ
zbFG-<M?Drk3_pHrTN@&X4&jwU@ZIP*J~1IHBLk>Uj{p9hJI?m@kRWK?y?Zy1PzcNn
z_4{Vw%P_~|F9QBQhtb)nPyMy*&Y#yvZ3{o~t{^NVi^|C~ijRW8BTLyU@g82?_x1^2
z4wt9o&F;C7>2cEC{R6t$rKK{9JRJ7(zkVYJqR=9bkZlvw_y|dn$Zt<DglHR;kE;8-
z%+7H%`>3+HMb&0j=Gq|&20{^leDR+{JOPmo?~pQIjDj}+xk!3hr1a>M9@QZq!#M=j
zNUODaBvN2{X&9pQ8zF~W;lLwYS#a@v81b5V_wZIp3AXiG_k3JGu+}6Q=Lm*F$y$Kk
zA3>JTFhj|Juk2jB3<hbBkx|yJwW3Xs8sibe*@ZJDW_W4f?;oFZ(^7<J!nsPn_2_eG
z5SYPyT&Q0+u&#}%;!JpEZl4{0jdb7*)2){FL#)eD!)lw?4PNzB^9O$dOV~Xz_TY|j
zZ*+~h3NJg%^s<U7D@6^pB`>#Q;_{i5rBg^$I(Tqm))yim5Shh}jhy^3fUprg4={Ot
z>{c9WNS!tY;lLDug$#)H0>p^6m}CNIJ#>d#op+NMAz!1Tn5<*Zr8Dgjzp3jDkKYbw
z=Px)+3=>;lya>$E);iVz34$M{ML;u%Xb>Fcd-o<;-49#nLig#bFTIodx-eSx445pJ
zN09{DiJ(8Lef!G$ztOiwPM{7MQ6Y?eYAmtCkd#2TJzsLZ!WQB$4_8+;?=jL0hu&R%
z6=9sHYnB2w*;c-TuoL<|L{liG<m12uXWtOMihLw5M_H>`0}R^0bncCq2Oo7p;wioG
z6)P!$w!{FCi&%LOUEX@lK49@n;odpj*}zhAlHaX{p(U)2rluzPh&6nw8UP~bBY8Au
zbYL<4`)B%Z_4k(>@raTSa5kM9<9?aJ<yvolZKtWyiSmArjtAq%gbvXERctoZb%-;p
ze(Ee2A=qhxjg=_aN=&36{6Jr-T&((Y;U*wj(b+}Igo`#_iS+pZH~~-vm-fLZgw1+Y
z{xIh$|2lh;Xy)BRYND~mIrX&Gsk67J2<Yc{JO)9ran7THg0`u%3WqEBVoy8lsfOm8
zebcO8RS`F1ic0Koyva6g?F{(n%WX;yZb~p*xmW`|dT}qv-NO~$GBg7~zAvI6OUSXn
z6_fWb7Y1wy&|Lfj#9ngjGsN3~1`%Nc!?V}h5a9{h>j|jsD1ALrC~ApuZ>Au7Xe2yJ
zTbv#tr4*F|pm_4fL_<n~AzUGz<sTrtK^NBT1}|3gp)j;RO-T3GLhn)Mh&_T@TD^q4
z;m9odf&&MH^kNu?=9Yn#wu~^j6dT;iD=f_Q4`E>nvdH6OCqVH5uRR6Bnyw~;3Ad&q
z=J@<3{+RaB&dCLEc&)Eqg%2yB@VCg5N{DQB@_O3O8j^*(9k_JxD(v4z$5WVn6VZ^R
z5obt2G+)!`9F$1^5@GO6!j?2do3LVWhvK7Tw6k5y#*u_ScGvPAIyW?3Py4@ls(noY
zlJ?M==$30nC9)!=hCikepkJfo<+!*bf>_DxueIPa)Y%7a{(kir6T?S`c;?s&Lv1w-
z*Ft78nFQ0d&Mrs6tBHwHe4JPg%}&9lp*og+=V0W)R~h%ltK}=)f<Q~zE5Kmzc-=}s
zIpc%W4n{ns@J1Qua1xjhJTN?qOAu(P&}9Q!p>LIKjzvmFh6HCAQ&1q>-hxE*lMO>U
zl{`j?m@#2EkGj9^kx-Prrg)sS+^e=WmSUEDyPx*U^F}EtE6Z?q;eB>?;#^?<;Mrwf
zj5tXJ1-4P+D1GGo>^*o8uEtZMY$^B)3VfVHk|?Z1xwkVJqucs$sLtNg(NQnY@@jH2
z#$do7U@5yUA+l$ZWm<e9eocM`073a{+lO78oh@cBaiDp`s?HZ@55|Lqc@N@w&!AA#
zd0$O?Eh{P?ZxptIqaf=M3`o!um|0#iqk^w{2vQa|bV9=FngMh!P<Kko@WeGiL=T0<
z^x{sb1i*gqj6~@(SwxcAGevR}Md$>4-ztT~W+y;)GzqlrcLJ4}@5N!Q;f87%fol&n
zAB(uuz2e$Q&Vyd=?tq!W&Ov)ZxSe7~1vt>vff!w^4LAiq&n*xB+;GLXu9j`LqPjZg
zj|;LIO>KHqK4^S`bnJ_Jab1!KbmIlu67suvJxy_+avLZO9zRa@6KG`t!@?hf*WEb|
zUJl|TklEyII^LLp3rWJ8d{k?|?LIsOrcq||0IirI<?K_824dcEK}9%Bh%F&}$kW%;
z(-XH7SKx(S)U8`jvnm~=yx10R-oZZ40Tir$hz5?w;8xe?;3I;l;7RNRCS6D{3@KPI
zz_5icV|8}FeG3#RI3=oml=&QhK45N+59w(jc)m@;s|l+)!^jiL+qQ{m7S!%y(j@_C
z_`*<>E<|K~SD;&0X<OLle10q|Vr(Z1(IXO^G1`sbLm#}0mH1<zmwj-kl-u?8=sJw?
z{(KWc{JrwCgCZaw$CAWAUh5{^e%cBQG0W_FLIUIcE0BUrE)-QrveysK(ugtoQZU3)
z`6j&8;joBlG?Ct`CMqu@gF^s5xQ0`jSa2RB-(>6&R}~yswuRRk9bw+ntyE?q=w`nj
zLqrnjnbw$-YK{--94D}X57Hf1idbH>v@pNf=&D{6Rowd(&1FY^t-g|q3M!}Co9r<f
zpM5aCX@{mbo5c?tU?JIti4@nJuoX_+z$w8_@K!(U<|Z4{h(m2~5~KA%2%!S3OBXU=
zr|m;_*M3tpH<+?H?yZ#paP`(}Ljezt!MSCSdH1`+^(12&KP~=3%Y<tYt6e9${*gR?
z%n7=k0mlT^R!(Bzc};P?9pl@NJ30opk8bx^HFGssk!SwepfXGKD11Q#U6hF6^cf4;
zZ*Mi-y?e7DQ7c11Jg&h-Z9K>!Ej_)!j7n~-L>YeiK!@gn-!|Go6qIk8h3wBzWo5G~
zc%Gbp*h~__K{ezFaUj?nlzxq@ygiwj1!#|PO62F~<M!?NGwPkGWKa96L()fAiXlw;
z%$q3j6pn}c>7$)apPs|gruMJCYfD~YX`pFk{0&HC*|D0y$iv>jakpW;nleX3gU(g`
zM2e?5S6DqN{hIstCo%nO@BQxh?^fV3iNcvi+n=c^33Y`Gmzhy-bFY<Dbd=*C-W_~z
z2k?k{?)DOK2DjYl9}uacOYnNYA8GVzYSl(7T!H#hwYRsQdfZT2)XREQZ0vEf(Vn}W
zY9>{$hCJ!*A6hDeK3BHI2CCLC^H@@bR0s38__IR`2lJUs%^~N}d*kQYZcmFGR*P#2
zvDBbUi7H`DsT%@pIC<J$J4i7wo61MLiI9{^NFngPKt^wnSb1}Q9f>_uPjX&PmjuR_
zu2zBP1=ug}U41E9vM3zfkdMZHyX=?TIqr{~WT?(rN4M0T3pI(=5p>7IL)jts7yTzA
zt>swHSB|^C?IPs}6Bsse@T&@1e%2PR8&cyI-&RZAi3Czk-BxZBA=yUanRVWZmPwy}
z)1jG;K`2pJLE-^_Y2L^i6&QLSkrnp#3=KsPyuvlFs+YU{yRn}U+2Xow32;e}&&Z^J
zKzOf;0ZkYDn{|x++r9ZtgBMvwLL&nGGxyk*DPjQyMl>6q2uX_THRofwrs{Ub<k}P7
z8^BhUcJo$%RKQ*h(y1LnK}Is*iyfwQ4OKJ^pF=1m&R<oJ4~A#r^`)|2tgFrBNpqzw
zVAG$;X6KaRV>ManQUmza6*Fiaq!UOna^u$1$fUj;@lvp@^o8FAxz|jWlsMQU8fg1m
zz#G|QG3NaUl@EN5#yDW9K?2#qr_e!5w`BA(`$@<eO04Cm33^U=QvG?NmK@rfUQ5wy
zQ*zyAd?UzEfHMrJB>V9|tJdko?LInpv4b@H=En*zF5Jg<JM>_<hX8DqKL1NrR(32(
zpOH2**1*B=k?NFqoFV^vmwOnCfb|MFf}<*xMr7VD_(DC)%y5B8Go#TlC`<I`YTodr
z-xsZa0<ED5-c~qGjRqG7ETjg_#{BD+!SHwJL6D_DA7Rz-ULZ=J-d>~*tlWVc*5m3F
zcm3o7BKy0Yl-3Fe>>179Y4T{hG;x2C0;NUBu-Yh*63I?GShv56m-RkT(AfU+bD9a2
z&*C0?C7<{X5<!zw&E)sRfJgJpC#GP(MU$|ICqL~Z(zXY1GCeDH)>jjj`-ua_=WnML
z=e03i7?(gp^R>7g6(mH<d3_G204~z++qq1a`cfdoyit_T&5Zp~!duC=?78rg(d_ug
zI0!y#Tfd*+TWBw7`tG&~hoCuhK29f<T=;XyL8gW$w%cpYE&JbF;SgT^0gCfRN8ko#
zcK6)bgBf0LI(#j>etUw>mkp(BAM*y2NyR*ewq}{JAEI?^8ImV9z<f_vR$&z1y@JnP
zTe~8^Cy_YAP_TQl)_{lXm_grqI=Bu>msEzWLTo!Z%&Zf27YJsGWU}kn{z0jtxV`%a
zRY}6E+&SJUyTYu39>sKcm3XO%s&O`Kh|;W`4);^q=E&4Q`?;Td?XdiQgzw>;vXt#k
z=I)!(nK1at?9gPmUt`SN-cu{Yg!N=d;rA0iLpRhYZlw@(NlSI!cIKr?loT^Dlu0`c
zn!AmBH&?YK+|~-y<&A6FX<)z}3`3EjZx68>_g={Q;5QpG7ofDp6XtzvVm;c?oYus#
zF5ZvHKh<pM5bNg3Tz5AH;_J|f;$HWPa|s_`btUO4M{a0Lt7p!As+zTKY;#VuvBH*k
zmvu>uxoVfas9Hu186CIzsO$Jv-guPSRd{oVPEx-xQEC2}qhlu5>*s$2$#w*6-<|V0
zx;1kJj~+#r-;}qQ{$>_2j@_IU0t+Nxncr1ryi16P-jcd|xoB^~^%qVVZ+!YWLL&U+
z2nyuRxTMew-2^kfGf(?L2;-zYSa4ST80%B%d!Og;T-SYKR=>uCcg9mtxpZ9zFY6rf
zjoi5<-7|OZgzs)4^5hn-D>_*BfH}o!G0-_kK=<*BDa)zjk>mCIc*EBO^6EBcw!aN!
zQQi8@tHL((CQqF4H}>mR9KF5cO3`&~!D&RvG%x#x>ovb}Dux->I;!7ebP}%O(<RZi
z?mowLNvCY1!yU7AX$Q$;vO6Cs5w&mec#*zyyg8|Bpg5pK_K%zB9lyF1+7{e2j7xuf
zYKk62KUUiCiF-o_m|Ukj*>*-&tvUul*O@jY|2)e&LV?b2a%-pLfv2X{<71O#y`+a-
zzIS3KL%#%X%%oZ@<Q!*GyDYr6I;?)fV-vkYua9b+Nq-}iII?vc6W^=d!%2^MW|&dm
ze=W7Te+5~5-ERM>uu_?~Huo*=%g-Nl-F@Qb^wL}sWE4<v>kKW)K31Lh8Z0%gU#-}D
z)Qpd=CZ2iRJIyM?%)goU?gs}32)p7VSLv8$M90>-GhB?FNqHpsaYlZu5My|KoQn_0
zUcqUNluMBbCoW>`I<6bD#6!;t`7*yeI8cm2_51Qlp{qi)gJ9a$rVZy@1U_FnWN)*J
zvWcgm@|%L!z2r!uGe;uDfx*P3R!X`&W!mrcH=1+J#S16u1dHxG+r1!HHbcjEAk#y;
z1_1$Jm!Wi%R=Yqsrp~QZd+NGO%jKomndIZOGNv(c0S5+@>}Il8E-}V{@+xM@&+~rY
zqVa2`zFrS@;o;eDH@@qVLJe-|I-eqX9cNfg6xk&^^<LF!IaF-(XT`J&S9C|wHR6zD
z`{-kE`6*ZC*Km?3Auaud+;iD^==JuY*8^q+SpxC)ZLYPo9zVdn?8>o-emV3_fO&^G
zxoc^H7+L#t7RGF(Ksd?9rLr*aCP}9KPYW>774%bcASjz^Ek3%yL{M^b7WQK842t&6
zZjxBP!r-sUUwb{*#%^@lY$u6e^@gQG%!f06_~?&qTrZ=Lms9F-7@HSxTG>i#r}@KL
zopZ(($xfbPT^k&gSSVCWPlFeZ5AADMlY$lP742*7tr&`#X8!oEm1k9pMP6+Z1$`+0
zb+!l~{#Kb`L)ifWnGh0jvMdqX&m5?h3B0`(qAwjuw4yGB>Hv#y%Dk|jX%qbnEfBbW
z`%f8@kNispXMHW}^I1Oh;EH?|p;XA2mx5l#zmS`D&$Z<Jp<MyXyIF6C1J>e=a#Cl0
zo9tvV#|0)|_6pOwQGW?aZGyhb=#q3uqa*a0#l8N;+fq|fj?o0kDg8{gMPJqh5?{Hh
zNS8I5J1Hf%Y_cv1EM&Dlwfeuce1PfuUho{R9Unq!Q>eF<q2g|s0K38JgUT(Vau3aR
z*7xuebWw2G)0RRcR%)G#>4E6b6BM`aG<i6!;5*mm9k@D1&NY>LFx9NnjEbrlS2m*&
zWmE&)4*WR~d97;wJ=c{RLNNlj;8jjf>>q!lM7z!nkPOEUxGn&VdvDw&`+UTtjW1{3
z?+(%_0@DDh_yw;<Ksu@kWY0rK9`r1Mh>{!9L<O5NnxZ1~=88mS=B8CA31{~^1^wh|
zw~MEY1ETO)tobupBd+GjFwZ$gR&S6NEFy2z3tpuDrytKpQ43KWK*%M;#i9WNC}9xg
zRBO-3@JOb~@gZ#Yn8gdK`v{;)q{*U%mQ)pDX5|uzw`v{o<XgM;#Ji%QaGC5cEKB}>
zU@wJ^4Cd;<KW9x{-+KsoUI%i4_lKGh=!tsI`Wpf9hLkwW$pDuM)vb!~s{UAx<`>A%
zz+7GY-^ZeCzKGF?m|KecqhDYTzb*cP9BP|(e<eeaci+?xV48>r!1#ln^*<{zD=Ym0
zYe7iQ#4|e{>>%X-`yaHm`0F=<PJpC<kqf8Xe4+haZtufT=-j+C-pqx*DvoNywOYOd
zRD_NE07zAU;GlzT1S05MHcSoWHXaD3V&>5_J8&Q>8<}e7fu*5YflBlgm;l-GH#XOj
z8AA&BGZn$AoWy8MfUr1+Pan?KV1?cuP#GFg=f!XRP`=K^9AbNt2gr`@xd)41`J|{-
z64LRS#j0tstzJ$8)nFD=yw2&EV}eKlgyyhCBH8Cb%(<uNmk`SWe*&uOV;&yd5e+X^
z4%FD+-N+Y>s9XBzPDVQV7}0Rd&F#(P0;t<Y@P3jJ!!`oAV;Bw8Qrke`&{O_#Geb8l
zA(30(2jGG^D6n&!@Npxt!AU8l+YJ^-se~6ErRd52w$yy}Dj!6a1PnEs)zt3N;hYAW
zw{eUi=h%bEz>V=3#-iE<SPc`Q3wmgMDLNO>1=poJ(TpJ{mB1zGxv!>cc0;!gInC^5
zj?T7`_o-epW)gL7TOL;E)v9v&7b^zPr@ku2NtN1WYp=Bs|MT2~U8iCOy~^ia`F#Dx
zpxO21WV;fBz2NiFxuMf!QsyT%hmjltpA$JNk+=Q~9s>k$<Nrz{*0`oaf)Ce1hjg76
zMY-d{(1xLN75ztR2l*M^ZiG7xRM*GZ>YmB4;fZsqc^Fw8rH@N~NR0GxH;FKG_UY8w
z^TDfPT1L=g0BN>=uG#T!`S+i5F5?VGvJGxw_R*#CEkb~D{QW;nFAk)ej1a5UbzaFo
zv`)Ku(^R#m!sY?cNx>CRZ!IGh&1~ZaXa5~0&>afWmbBw3$Yk+`JsSh4!9t!Z?ZGFz
zs+)LQUkdYPk0Jd-Uf$Nw6c%`~opIvZe7>diP2HS)^g+ci5$4kZxMND(u3&a5?d;X3
zQPy&2!SX;RY6mmI^kQcpQY_&+rrTk0c5n^4zZvqRcU=hNkWM=GF=eY^;^$dkKw-FZ
z;OaZf(0_`}0cjyfr9rwWkUInA0}Oy@AO*~d5C^l9HTDs3CC-6uK{`5#w+|M)bt?hu
zAt88nJlT`W<%Dx>AT|h5@Pjl`4KT15=nQgCb;$PIK|=ngagwMASAi*oQUVMU-M`(l
zzCu|LoKD4=`z<5JSo^PD-SkPiL5>8R4M8`utg%yIG%c5Zz0xq!at-4eq8+SZB7mGQ
zsch#h^ZVU&!~E#Y^AEDK9c}yAE@%zF0f-2b!frRvS)=Kzge&ML6Znt|u+jukTpw^P
zl<3d|SHP=0uqnr!AoMr<PRWa^oD?JI&gPas2K>EH*z?q(fi=qvm{@8X-0uEb85ASf
z3ovQrLz8#wzw_b0Hy9152NowE5T}qeMrYyNILQsMiM5=#nAmTS17OoFWz#2l5guea
z4e`W~w>sFlQH6HwTC|GjjFC+ffX_Bsp*BZEAR)2{HO0|yQ&JV6g)xy95fprfxGji)
z+)7Scyy)AhDPDsaLCC#;%W?GN<2*AeP%peth^76ikREd8!7i0WGW#dkVi9z(iAp8K
zG+r`4S*L?elMsw_Ya}jXU~SPpNay%tHlKd<-}UzI`#?<?WpE1_cn3^PV+|=cCLhu4
zX{yM{ax19-8}3#j&%lhqV#iOO+_QUkSmvd2`Bw$o!IXDTfX1ZzSFxKP#3*M#X@8{T
zAcEM+Y`!HQVc0>>Y5?X%k8V9)%NfxC$sI0b>7=&fB5aYF@FG8asQ5fEjBF^!fBe`n
z>?(^q?~hyKm98^VqhF4EN7581M(EbiYp}|pSWo;+ywE0CBuKOWx8!j0P>j%FBGZgJ
zLb-6Ug7{_KT|>r1%IMi8j<9;{1MY~?BBBo`Iyn+M{4Q;anIe2U!t{o)7Ax0nxpA5h
zq8m&O)f*xn(V_5$H4K5$RpmPC0!cYd5Syg4cl^t0;DXTIgz23=9AC)dDT+%3*mKix
z+v3fm@eF)B&S{D_mKiVBI{uSA(mz;;2rhEt^O<(Jxr#Tpc}zWcx?Ed0waX<=V_#)?
zfpjdkArh&i*roVmE*lJsK7R^%+PSGZT{dfG2MZKM9$fJj7ar_Vv}5e6PuUXc{-a)S
z4i?=u=XRfs8<3J?pdl9jIQBuxhws5>T=rKKmiu`Ym;LIlqse6*%G>x~9g1Vdp0=@;
zHSj?lz!;1^8uA2S?NZZRcj|*%$<N<^phv=Vgw#WU_57T)5oTCrP{Kf7e*En!Vgt52
z^8E#gp0eU`P2+MePRF>aK>v$Ps3$*>%Hn;sXA*n1`d40|86Tv6c&+r1dW6Hbx3e+x
zHwLecX*A<Y?<H(cbduv=$If)|T#IdJQTN{)^V3tzvR=ijf``>&hf4>p`;b&c8Hmoc
zt5#Xh^?zax+&#Tps0a$?rs0|M#D;2a%~NEa(F#604V`J7J^%bUn3seQuJydlUvrWM
zrX@r~_EP!4|6i5Xv$ncp!_N_ng9ZA3dzxHe*%y3j6%|k`kBW<PZ5TnM86}d&d~$7X
z*x%JNPi`5hk5&$;b@~4n_g8{rG>tu2hm(e@o+jvgY`w0zig+9n=Aq~NiG0tSm!om<
zHw>vEc^6(ZWTFLoVfrJ5m(HiZcE$9g;lu0ZK^tK^<4xge57n1Kb}ble+iL%E<D8!p
zoH#oE84S`7G#HqJ=fnIw*zaIq())Dtkgr%YVlU`%T>#0nmz($KOTF%2JO$bvDG|`4
z!3G&Yyk5k$iohg)?YO(U31-S%elD~5;JaxIo{o!+t+0`YxXn@)r_CAl{!Q*J0=IMb
ze7}8@5mq^0-*-cv1SUrDIM6W&D#2{{55fyH;vwJPUOfz7YQ}|0&lc)w@AY=O>=oeK
zu|2C}W`_3e8$G8s=bt`w{PgjMEUO>WDu%h4L{DcP@D!C066%H9nkQgsJBqLKTc^F%
zwV|X3#|dX(ui&#&q9-x4X4xMlB*(^e`KeH3?KGbsijaV2_O!=HjaVjCn8@lg^?v;o
zV>`d`9H#@TFB@PKfpC;QY7>u_u4>A>FWx)M91(mTU#9%uwkJBr$ZV@H=L=(gNEpo@
z*O|I@_py?yrJ^E{E26>f;xei?sPj$D<j0-EQr}-5kiILRTYTN*4s;82&Bz&++Ef0R
z0O{m(`_Utd0;~2J{(HyAY!^#s4BT4Sw1j=M86spE!X0oE<A-8|td*f52SjD5sC-`V
z0|ox*(W5&byg-mbdkC|5M*DtiYuDmldW0^L)=mu@l-pe7I#tLe6H21B$b>jt$5<$Y
zl_Jpp9}9@x@1(Q|6B$q(vW5(OOFmoiNRc(audbCfpJlk;K=pJhgWBq?u;}mF1;PX4
z%P2+=kr<)xRZ+3DvU)XSe<)sR>_<XJ@jHsgy#lJDlE_ik9p=GgT)e2WuWdSXm8|Db
z$E49Rm0x4G#!QbvrRwvVXRa#mA+Gf*hrw~${%F#+qPu~MLak;0e_qT4F!YCcb?tP$
zL-ibtC>+!0NDkL6J%^;|VYLs9&c%=}#SVL>|8Xjckxa_<MB7FO3IThLJlIpk{?Sw>
zfA77UU2%CE_O1^N$cxGJG0hEnow2=&PigoAh8AK8z%R@v=XaU+{pawLw;yzfkhG9#
zg_8JAW+sCu<Jgo*H@6?7iD}2t=EKTIt9U=MNKmgNOvo)=2(C8Ww_$lxbK@@GGkTkN
ztHYver)VFc*zo!E3{h0XD19YGMMmlCPVMZTx0Z$_{#x7E(2c}MY}(8pbGXuX7RrH*
z|030%9yszr?|mz?^GUzwG<OJj0P#Rzfqw(NDGnZZdjDd21Ds<Pc?d&dFHp@DVR)f!
z%zRI<YwgeZwDtCB?SB@DV>&KGN&O8C&Z7(5qO@IWG{YSbmjofoIGV5h4{Y)*@<;^C
z96=t<0L(zp^Z^53U%Q6z-kWPxB4r?v?XP>Fd_MJy28&<2#q)KZ{b8S(=hts8x9e!$
zuIAQw+oFHH+#`lEDH8il$d`OIADB_?e|GfP1(TiM8j)^1HtArzb0{|V{vYB-x!)gU
z1`>-rH`rx$F<Acrg1z~B9&U<HnCTWuX3w)URu#4gOF2+*gL|)VQEg|&5ZxsFU(F_%
zSxrf+sjGoFY;+Ffaa0sXiU~Yl6ka1a{$gu6!Em&GMe2>iP@Lj2kmN+BduRhrI28Hl
z5)dbjG=F}srdeFphLnFKtqYliNrkLyh`OXq@hs}Z+;W=fhROu^pwq)%K1wtDu0v+1
z6l4m*z+8~(MLxbDS({KUK}(M#`IJqbB?bhJf6~V_+b{%OGs6Zx+F0%RAZ9MDTU4-K
z(4n<CW2+nI@Sa@~42J^<5-vVYm;(By9$K4#l~xh<^OFO4IJb-tQA3Kw{cfzh_lTN=
z$r+-b(mr@$jg2|^Y9|-oPo2Hi0v0-`K4e0M*zk{5F7BI>N-(_NExjRer+JriF}}Q3
zh6FnSp}OGN@+{5leYw99P}}!=G||A=!`|`IQh+<6-$T{=Dx&RORUj}I=f13|sEE1`
zf&r!mc3slH{sc%gbb{qJru~KzXS}yd29x2lM6?2aU|sF0`Lo&oihI3|dDrw}Pj@#?
z3F^j;T*O%D*zkLYXLTrFHkkymJUO3snUSlZ2o)543hKL}tm<l8>7+VVXZTy7o<ZM$
zg)e)B^LYXIOx6(Yi0y?v@USLQHb_g8C(O_~<0E&CEdBz=3YmcjqfsJ^f^^9vFOKnF
z{oKSjmbVH%tCU5GK-1Em>w5EZ*Zgyb+SFV`8pug7tI+h#oJTItf*;*kaY*heC}(Rq
zC?wF?2a^$I)IJLETRd(QT;^MjFg;`Mb#%KiGB09>i^|CCyq68cxY_dKibtt7|IRae
z^l8@c5u+@Mi#t?cMbL#3i;t5fuBrH7!H}o5bdtFRyEj9Jk+mGZE(zrD@GKrDV2E-X
zZk6=Rk2(Xr?pwn2Fl`vlqcKW{45=H@U*Hed^Ky7kCaZJk9r^|K5lCgr2+5yqQZ70>
z^KYOgm}`JWFDsJhzzAM(`)Ekmp)51A=Xv(DF>iH<TkvcBS!OTkoZxt?4o1DS52i_o
zEMgkvueCrE(eHu#(ds_1=FJl*qIxFJl-qzM!~I-xFf#qwDJ<`#B$d-x<dX~0jgHr)
zQI4)(zaz(@-~EoE>mfgd?R(d8H3zOGvL1PO-BNb<^==3TU^ms`BeI(F?e4ftB7oOY
z(c_O<d)Xj2nTuis6`E+%x&-!&_B~|xkW$q5EVZ4_|J%`&khJ-TJu_<|fEo9`2+KN<
z%#ax19(2K%zSMHY-(`v7-WB@1VpkO@_D`N_T1d3L*0NB~rC=`@KD>m}iY}7jj#y2|
zgG9*PJ#<wAX0d3)35^Q|>&^M-(JD-`MeYNA;2I_a;!cFl4pou=(hOp|hc1JtWl=gA
zEfpjysou<5f=HyY#FUg}G+zjxnO$B16|TK9#Dfn7AjG*?NSBml1`Ce@<`hv=9`y{*
zBDU`b6y_i?XDn(4urLR6kd>eYZVfYQ12ASAf&+|RMWchsj%7Z$Nzu&02DK%1%0>Iv
z0Z16sL}B3CP6}-qp<B3-v-ZiWTrGBSVF4?55_uJ#N04`ecmO*Ki%)aQwgJD-K;kky
z3znz`ZB38u`O7UMXf0r^6W2IPyqJL`i)U;00NB#a!%+!$pJ|#JV^<9>H9F}Uc@q^H
zf&}T4FHjU>qBo$0Z`Od7;eWl(T01=sMhabw@dChGNIUS{7yK0Ls#hR#!#q~sK}=Dy
zDma+a?Y5dABrg63=>&Vy)miLD{>A0@qi;3~1XGD|r}%1+060EGwp<xBY3ZgrPo9-E
zS?i`x(#I^p`TSn@*hWXoTEp+ioh-p%R7ysB#OWh5%}jk`tIj=343gJpy75^ii3l;<
zmQ(~H0N5>=#dB(NKKZe7D<3ByWMaenzCL;$&p$uP7HAdlY9_oZ@YdS3e&v^xtQ9YD
zY>dV=2bdv?iZLg6d<*a~S3G>U`qjj*U%%iT8K}V$!P6;Tj7xohI)hhPbud_Hu2^V;
zU^os4^w-GHQ4t=$DFZ1lvN|iQ<(|_eTYsU4$6b7i!V4=y0N4wJm#T^WneW^KAcV7~
zy`=@y8xQZ>cSVOh^XoSj1$^m$N&|kW=2%3LBil!Rwb=c^R4;l?OYh0AuZ=z!D9fps
z-QZZ^;x^M1FT@~-r%w?r!KWJ!0gUxL`X0Ihf7|S{HBj-{$X_EzqG1IIxd0pF1KUE>
zOOVuKmEu$dwe=5R>v{VYE@FgS(%n7^3Oo%(u&w}%KoI2!Y{<u<V6Hf`@mdRx0mR_Z
z)1)#50Xila>dRl-BV%urC@LrjaE1r!f}xif{fXk^ezzNQP%W?%AYjhU(k1x=KrSGU
z++{5^gb1l)7`TkJ9<~v1AvO)Z1OhA2IiM*)<_t13*n=VUTA1unRZ%g_dE=?Jg5<RD
z1_u<P%ioWdl|qq)tW*F%kST_3gcr91J3zOx!Qow#M1cR#`D05zP?7^<EP-4qOpiXk
z6<TJ5;-N!;4m#j>Pe8$eV(4=TmgEAfr<k_^k>epXmF&N}b`P;Q*5Zl@SQ(bUh$@ud
zBS0A@{M1CDh#tj0xrqM&eFKDx-+e1HRz7b)S#|L|9J13F7oVo3iAL!|%O7TX=O|-X
z6q!Ad0>pLC#qUW7+r}9ncj(Dij82bOFRDu_?D^F<@P}?#2otciuJ+%$&Z@eZo#0q?
zL7uQ<b*R3fe%>H*xla0v&@qixHnUehx@u-hfy5#P52K(FO@grj<uI)v#0tC$h6-p*
zS1N02y3+)4d_moFumG<Kf-;nV=<<g=m*9S}1^Iywi`5MyGD12GDL6;~phHD5d1wH_
zG2F|}#W<h0dH^hiL<PEGXn?UdqoFlL%m>`b3Ts(11u%+h4AFB35SJCD|MA>tI@%Gi
zSa96%$K>=(BEsbfo-6)mOrzIWM;77=^$M)^x=b^?I~;&Ff%_boFY2z*_6$fYvl748
zHuL-ABO~J!U|nI=4+wDFkY|R4g*&s6TSHONeE<GM%qvB59gZ=Nz9*YDZ!XEs#^Lex
zW0eMP)XGwm_Nx~!Fj@V5O@MZ23qB2<V#8vG#m67>kkRM>(`p)~k6J)n7^3fO{>}4%
zrURo2Y>|k;8wL1SNN9DGzV^@8abO-`cmR-SZej5VWOXFrcI}9lr1~H_=c_Fv_{CTr
z>tlz}IdqHz^r)%%i>cmF;i7UzV~9=>R)Ng68f0TawU5I?FgzW75E4V76R+e!9{n~I
zm3iD#h-k;Saxr!9G5YiodO#l>>CggW#b7844)5IsRvEAkApD9q>ULIE(D%SpuLgr1
z>uYPe6p~bfpqa_D3_iYx!g~r~2=a$fmVjU>Z5=IPaLgOKp7U0iodCllC@TpgStn@-
zyi&fdhA8ZcF>H=l9T};qs4)wyCZT0OgI=@JQUg0ZU;=-{k>S(W*<M)K=lLuoR!3Un
z225Ec@B55yG!MRV3`*JMy!RRsb7p-JL0^_`A!UfW{)+Ltwb|5rJ(J+iF^>^Lt**cu
z;Hu(><_1xO+c2dV3lnKB-~^DF8{h1Ns9mNcW4-(om^|l}Rr@Nr@-)8jjh$dRArhtE
z>O6;2Wx?+P)aVGLJ9O|MjOkd$sHWuYsw<t_gkd#${u1RHwx60IiYw&hK%Q<w;a!Fp
z0*;Y<Gb(Jfi0ObtjKBohm?wXj!*$%b<KEP0C~u*F1Ky13HsBg=k|PnQf!B^&o|4ys
z^&ld`Y}$^>U|()0G8Tq#$wGGmeUo;$1GHvNN{z#_f1wSQh|Y#P4vG-%5q*RGwJ;|E
z4mXkR<heK#0y+n07TpyOV2KXlViX8)O^k+kij9SxP+LBT?r+3v8QoT0R@1mQuA<*?
zOu&PI$Tfd-&hRc^BnucUz_$ozA4W}r+Z3`I-}Ha80C4B12a{18^_8E_1<#2CMTO<)
zVU(b-#USqwXL$Rz#Wa|_aAjkPEnwHp!fjs#Iu40U%pIGpJ+*@UfVA4rXl-2{TI1mk
zBC1J3P|z3Hcm}NhAouVckmr++Ya9CM`Mv?a$pRz`FqelTa>J#DZUKpnMcCNr=g$Cc
z&pzb{F2gk<eqsJKlCv>d1n2a2niIY7vRnBi0DA1YZK2Dvm#U(Fe|^ab_<fC;T}p=}
zAN<Vi+Og8#B{JG}n|ns+ja^6~CqdWA#RUwMk^Hp=sU_qS){l7Yxb+$`E$k7nnX|Aj
z!GI5D5p4&qI(emYZhjc$Ex^x@mH65fXBplGAQ&9<0EhLZF#pGzu7h@ZeMKBs+w7+u
zA$1tM#1GwU_FZ@i#>azBLVZKw4LSryV_>?*b4U+Ek&ul!II}XIoAGD4J>cW(*Nkt4
zfU-lm4|<K4kdTrRl|3f$XC?xIIzQVTk%<WCLL9_#H#bbN-(Uo-ZE;PF#1G|pR^;ah
z65%g|_zt4PfBsDhFUN(A>{c`=BvVpkNbI8AJCUtSI|Rcynld;@2G!^p?CR=>-XFs3
zUdueo`qLx=^8Mp~wqX*trtgf#z_RJ#!+`3Tre;9qXQ0{K4M(O2N+C?7pbJF$G121n
z>x4`e-4yijm_&nuE;<{k>qbXZlK1iH*zF*vL@phOht-6HmJviT<A<b`XDN%R?O_Oz
zLIa-o8D95ABsY0NZz&kjfD{0PCF`PA#KuG>D7;ToQ|sBKM7c{#OWBx$@Np5;E}$Ea
zJE2bU2V~sjJ6}y<1V@hWJT7wV;HFu+M;DEwPs-&#$76ckH(>3miL$mRt$h494`Yd+
zReb5nQdyW6-~Y4#zyBZ$?OnJeyB8b|8SRUhms9^D$gG!c;{kC1x_5BD^+r}7Zmw2G
z4Q2SB^`$T{hr%nFG(^`{CO6tiCm}!xpxtEy>@ieXr)UZI#1e>CM+M>G?*5D>w`tRE
zDxb$d^>4JZ3#e(nJT@KBU;6zAn%}>1RjQX`KdG;&xqokQca_imLj@isSKn4k5t#DB
z`sM0G>dw0JDnyB8kj^ItcsYM;PBC&eP28##HJnE1ThqkB9eYBr=<*5pz(s->IO031
zr!9^jJF~0c<I{T;A9>pES{&QNUTpkefGzg!gORsYrQa@mJDrgOLwiFJTue7^2m=&Y
z@WYEl@sG>D6Y-UnmKZmQ#s(W0`M)S(PKhEB_<h-_b*;rrL4$N;^T?5PcgY|Z5#+Tl
zPE(3E%APaCCFsf`6pt)Sb?8&Lt(4&VhYPf|&QmnP=Bg)V-<kOM_&{8RL{#QCuMkIz
zc^~q&m1Q`6-#4D5`^ShF-PYF!3s6dG>iwR{l!0~Ev*~*sal)=DtV}`3&jpQWNcoOv
z3B>4X&D1iRQBq6*oXXpj#oxTiZS@XV26%4P5XOrqPM$>28Cns>f)5Z>Uc?=wF3KHW
z2Vnq!m7hH|+<*FU8o?e$Pul~P7OHO#xh~4YYM77h1<|~Jc=JXHQtgF+`>7XYWYG6B
z^6Vq%VqsjCSQ%uS4>&ezRApO((in$RE`$jPo8_0neOPz)0b=JjZaj{&bnx>sdWApu
zdl$c?xL|xLQF>DLim_BeC9+KDE}fGX=Tb2m2oMvTx5(4Qb&V2K)P`bR{nF{E8;N33
zE%~F|Wj}XcuAOhSpVylZaz0Z`SlH2Cux1$F1hC@3swbix!GV!C@fmO}LK=6l;0ZnG
zR<h20%XXW6kRyE;GD^gK;-mzz9QvPEl!mwTEIK`!<(D-8X+A%=!(1D(Q_yAsQ|*y0
z1?Pns^)d2ML05s1dT9NjtBbIn7|{S?!Oab4X<^U(&T+#;!iLTNY*LV&Lq4v|3z062
zTQA$%U<C10!XGlTGhZ8=O%?RqtfGRj(^kia5T~G|11fbgBUpCTOL#ItKDQ&{z@KF4
z2m~4vGgMB&<=mx7W>-Qf3bh(5v-+A%o6$?qg^_w$?1->2go6qK{k!W^Or-f^&?%&)
zo`50d(hI{xgz6z#8J6P7g*UGpVYPdF27Y#_1hsV2=<G5Z4@XD1tHnh{)2?5K!X4U1
zVeWgMFe-Q(ZZ4=eu49%0P4Mx-P5LTxzM9Ygphv2yK@4Ny3rH*w>s>1{@4P8hCi>H?
zFDheLbcSbDP>ZUn;=k#6Jxu)`d6Z|&R+3$8rT^{Qv#=h%^MbAu{^^jze%AUyzo0N>
z-Yk#Aya<%pNYjIDP^ZN|ci>Xkdf_6%?VYsTP7YJ+cw2h7EUgJ4XJut-P-HL$lDSbu
z^4i2?-_^@P*e=GrF8XS^@2_bKT&=1vt3!H*B$L?KPin75yg&$zc%8%fWNZ5o+Z@xU
zQ1%STCQ|CYb4%|<3`laa%GRw=<(u=ZH?jb~3lJu2kawwJc-Cak9(db=$f0!dnPDGa
zbT-x#y_Y$H0PuuJi@Xm-4h@($Vq($?2PvST18XY~f5hi%3ge6+CAc0{4+?fzoJnb}
z?7>BSQ;>{GN-~Dqh&Ex)bgk<JdwbNtK#;y%Ky3pu8~unb$9g`_@yD8I_U?BhwII)u
zQ%vw{&{%{8AB(Q+5dJET59A)l$gYK+xb`Ed^$fTiG-7EkAHA<$XNviPf*K|-TVQc>
zWWyZvgY3bU`}fnEMvO_A*$YS;QN~E7rH`GqkP(yQa&|1R;(Gg91t-Kglv4-=*QVbj
zh=Hf)3jp4se`kaud-R5Gby|M&!ikp<_0C-}bxZE!(%s}2>j@4cNBH@#9yPUWmHBy6
zJnkCs5Hw~!-rj*4eX!m&jCkqBMp;M%E}vEt4Ikzbd=Yq+V71!(C`|D(GDbNTb$)%W
zD&Y-7*kAJ3`ntP!<Z2LELr6`WnHf9G!DI&)-7z;ki`n6>w$7R{%PM5qD{hE_I#}=n
z@*)I;z#s?M5kd!VBG(r&oj1s)Rg3-QUyX1+jCi@xFofBC=*p>c{`(XG`te4QuhPhT
zoOO?gQWiUM{v23Qhoy)ER-jFa4y>X=!ov6R^DDhZfX4-u?Qgxi590t~Kf(BKpPFBX
zv1!_z4<Vfxy?89{S^#%95$J(m*G!GZ(VXfk!qSF-K<pR91Hd{_yB?=pffZaCRuLtF
z<cuB|vDHKkNQ5Wf92Bb1nN(NbELX~^(XSbpp7jMI2|52{O8e+8N+Nu$8;jnRY-{Of
z4tGF^o{5hGDMZIGK4uUV*gUb3V4zIUC3CBae)d$$yoC6t;$em24;DHbpSg4W+)#3`
z3I#C+sle<5dlH=`o=aPU3_J+XFhTvt5<d&kHJ%*icv~=B8)*^8j>!mm!()fmrq>-z
z(0Dlyg;CHxAtfqRTx313QF4JQwd;9P6TnP;i=EB{!Q$Ra4%cBC@~a}PBrL1TUyJ#8
zCZ<9%U4bR#3Zv&M3+U-`JB8NEyk2OpoH*-dh~>%keh?sBFYL<!f7<`9>_K_++VzsU
z3*6aYL$nd!f|aQggvJHsD2afyzewJ&tchd$U4HsvR??b1nh4r`81NV4Sm7%C&8{1d
z?pv78R?hC|#Dn*LN7#1OJZbTpuG(6&JT*G1a7mJVGOM<}{(CozKQow@-o8G>o#BA?
zz1wz9NtZ;>)k(e|c9QFd6?`BQYLPY&RGK_2L9lT{5rR%2R14zY4mL52b92(U8(K&d
zGE5A$);6C85v!B6y^pP}lxfsc4XU5Qo@>{xX^7Iz4zN7ntU~FpI>B4S&czx6A6fe-
z;6-|~0xM?fmJ`8Mzu`Af&z|@?0l=B8uDsFVN!!7$eRKP#w8`{pR$kO=?Y;Y1SwpD2
z&}!JgLK4vcjXJ)A!^>}gp%CGpcH;&;Ul>pdFgYkr*rL{L;%~XR?fh*E8RgiuYOZp3
z;<=b7Xo$>MvxZrV8@EE55FqOdS`sG;{_b;5_^q2|hC>cKTZEpZc$|QU2v@8ncSJ}m
zV=KXgPqJ`*=WTgWZloPTtlA~6q|IiPai(;3-|^M#?t}x!VTS}D4MP#l9gJ!9E;UhZ
zqO{4cw^%-P#Rx1TzRs^$`LU+^yzV+Q(7hcU3@L|@ow<9Dj=9dn7NGfUcm3lx5N>F=
zMdwoCHN0~5>YEY9szl=)<n=1(@4H363Ljr$tTjQ$9e*GJYDKuzAmi$t)M0=s<xLB%
zWY?fLZr^OB*NPimhI(#uvAUqoQWu)-Z%8y5okhc{h5nchw1MyAiaK_^1}`7M0t~wG
zz)+Yp_p`CF0hrBu?TX0xiltn{YJb|P{nH#diS*izpi9U9P}6?sTC_jlH|YDhKJ3-5
z?%(GZuO6=Mnl1iY<~5PTNRTx9`t4`Y0n9|L7JbaKdd<N-b()c_hUWJ188>f!?CjhN
zMDog&95d?JX)+A~)O<T27enL(EOvi9ijE2_ke!_O%ScPZ`jn;}@NsmmE(Cj>n(HdC
z;b1H7Rz&~d{sPG%m7pUkQ8Tb-z)FrE%z`1)ZjwGc^B~WhMq8OOsR+&AqNb*%u5Rq&
z!?<4fx53OOf-d5!mVtE?Jq6=X>fVcSzX&tHMKfdqXQsw7#Rwq-XMB8`o;^dU)7*?q
ztCKWAcz?mXy>oiKBd0XEZo4vX)U(D$fS`b0>iWTdVTuC1T0^kQ`_{_`BD^=h^4J-9
zn(+NW)A0D@<eDgbOdskHHQ)bwCsR-@@}`=dpf&u**`Wc%W!jF2X&@O`&E>(_ckAqd
zhX00?6A5_$T?tG<#yK~xUk~K_g2Il%yPBX|W5{PdtylBv>$jbW6cEiu7<o&_0)T)Z
zXGQG<=2r<4fleBpK<-f*F`#p8%&=Nn;j0IC`+g7gJ37}4yZ`f5M@B`Jdyn%jUD6p@
zgVL>a{AE_^e_|!jWk}C@H;2Rl`~Z#utaSDlYDC{Lo%9efsoyrPEK4RhIUck3JGE3F
z^AIsR&*dFBVqVHsv#!4zZ3CJ$OdLVoUq|NiBMY{41oEgcnBPaFF1jI_U?>}~HvE9%
zn-RszMq!6W?ItX34r!qpI)!(fMe7FFgk-t@(0QzRW7xSa*<|b|AyxWk4g0MHnP_^4
zTlMEtV+`6%oYb_QkS@h-+g5!7JBQf;$f~^G9o%c#WwX6h&&fIeDQ>U6sc<`Nbv5xf
zb9@EqD7+xG8uA1I9Hc|0n~D{`3hL(SktlqeU_O0cK6{3><b(NR#VtX{Zz9a{DZuUi
zj&sOyc60mLU3d|ON5~V<2O$Pvd}RxkBuWWr?z7A~q6_XG)FTmoiWIOPv6Ar(YbeBL
zs;)+>8GM)3rQTC);Y>#c@vzsIv|T)3T9<u-9=zfU{o)jLz@Cv}gbrW35*b+-5?2P?
z6Ok19$80e@>Js?{LM4Ww-P8N?71WXzEbiVP78{Yvgh!33Zx91Ryh9%xnq(JpG;e5O
z$X~q4ty?nkN1JN<h54+}w$d$04o;q~@A-0_s@SnXud<Q8hVXmWMyB)Y*Rl`^>qtB8
zk8|g5LR^Bsf29sNvoRUKEGCcG)2P>4uoxwu+%>qx*emA2;_}8qjC~OW9C9mWh8hrO
z1FalEx2vb;e3k9NPq~6u#jxf<*v1Pe03K)<!jK$0a1{Uo(1xQ*mrkNb0;9t3b}Q|>
z04^4d5fXc|sV9!{yrn1T(?d{zN&yw25ho`mkZ+2>T_-CpUDDuu&-?c;hi4I`0=n|u
zFHA&vqW}mg+l<=kT--BR<vpI*@3F_!6#X9*ef4J`A`y=RM~u;)5;$do7%WkJ)SarV
zsj-7C6y=dEBJS-DA4bdMt=<nGHSC%VLq&#^?3@!+@#yTwu{<{ddmr)b*qZ(duHud{
z91sv^0_4Xe_k%)&MF-Bjj%S$F=Igrzoi-A1N$i2kYt@cLH+}-b?<yQp2hS|{b+EB9
zNdHw(RD1~i2~ZTCFg?ud9PvUE1o#*LScleXGxvihn!a8%<Mmxf6;VpM3H<!_?c1&@
zH_FP&LPJBL-LA4le1`;)6^{i38*;a>FJa4HrMz+DE-K#^Rn@(GM~i#WUhg9BSzoNt
zf0HU+x~AfQ0PS(mpaMOFgC0kLm~_xmdO9lCTDsr7k-F9bjGD;esq~mD+yQ)b1djyT
zUnzQui`##Z!Wg`OZU(VUyukO0VSh(-|B$Cmg?E(g5T?Xq5X%)~xSUnAv_2w{J3bz%
zQJ8LpCI;B}n5kZXm7E>Z7lCcRdPcestaK~nP@B&U9Wp$$Pa)5T?sGMF#3p_&wJR8{
zPF9w;pJ}G;?1A_Y7#8rzV=BTijl4uw82!|s?dsUhQ>TeptN%yWcgJJ-w}0CSl_F#%
z6qTJlGKwM;QV5YPJ4yCRWhZ-OWM_qBlaS0vQlwHSt1^mG&---W&+qqo{&=oG?yvim
zab4&6IX|D{cpvWpt#@%TEd?2``m5n#?8fb#op*WnP*WgpBDmEIG!pe#M@L8Mpa;ga
zupy}`E<SwpDCb}(*O4ni#VuiEMilOv-?l8<|4QY&^Ijaw*ZwN1Ds&zwR^v7HvxgX@
z0b18BB<9`}u7Cfmxk%Hw_GVl67u;6oXM|NKJ{?X@ZaKNs>;eoOQN03PFR;TRrLM+!
z5H^3<Xz<g}7}?sQsjT;y54tS}G3IkKMpdBA*x^CwqWn-Z^9uBcoEMDD%<!|q(IgV9
z;P*%R>3;ky2n`r$=@*{GEC3$x0HAAYucO}AEfi<peq>l}c{@ersUwHCMQLhl6QwsF
z-xk#uSS9S8Xaus(R)wfZQEb${U=@0N9mUPK(-yvE+`L;A*f^k6$6OPeoFi-h*<xsp
zbr=O>;eWU$(426eEIgaPcz7K`jrSKO)?eNG_s0ec&uS%qWES+-QH%}<UuQxY>+*pt
zhzml2rgvNLWB}VoektzUt5<o$_Rg;kmkY>zW$TFMbksgIlJKCiX`^pSaj%2#VzB-N
zseB~l@E<>!>vOL3lDWAdXTYD<xQwkqm{-1^qY7h-S!yvRqre~y+r&N^=`@m6Mi|6J
zN5cXd&(blGADB#1hi^B%>T*DSI|cLWC`1}y@xq(OxpCzj<keA=xJ+2Hq&hE4IqvoM
z_x6IquZSUyqa$2|larD_zj^h=y~-2D5(vsBOTLw^Jq;wBg^f+ldo&>?2B8{%fGSlz
zdUT1U17<sJGA#bBf91t0z3o6)fBqbvC*<AU=*%^wu(i~_4QhRl!4B9#{nOyapt(P#
zt-VRI{4_Pisp!{GU(ZIt^350jwmHm?RBM}Ff;YD5f96kJmJfm0r(sx$ASwj<YUeYw
zmE&o>m_O92Efx}*D?#g2!!|~f#K3S?EV>gcW8_3Y@Pni0z}-Ta8WQ0I?_8uory<>@
z(R=&?tODp46aYua$gF`T3HmcVwUR?2828bgh*|>wus;LHJoW^5z=7AY_%kssuH|9x
zrHVL28=O76yt<06n_Bb{*xm!7BH<@a4eIO?bd=ii_Jxa^ukQv(){t<oB_*-I=><<E
zwy-(8b<t!^GvP<5#z8on$;BuZt5)&fT0hU7XP`iGWo2|uu7P^?7}`zhK~2T&$G?8A
z@(R(XF=;fj5c|!JH}ctymWIZKT@j%$a8#mnU597EX2xSAvfE+c^LRj(ES6X6zz_z7
zg#S%53}nILd6@{I2S7pCw|@SC$q<no_`#)M*#6rC(j^8Kc1)Xw(TV`LMr;&;v_;GR
zxTTR>R9s9%nV<Gw*OOgla};R#@!7zD@1et2rVgPuPptZ0D-gu1N#(G&D6ufO_C`5G
z*0Sc)1Z4=HoE)2jMXOKJUmr`cQA~>g_iRUuH8V3a9o4wgPv~^aLoo@(8nzU}z~Th1
z#rw0cPciR<e*)ph1nd(gK}W)}qc|00MMOZ|2*gf8&^}<##P8szwAGOEl|zyqxx=N_
zgdL8RTdB0FoM_#mA>N04yYMV)a)mOL|I(G|J`9maJHI2t1vBIWe0*)UC~D)Acl`Lh
zfhU4O8RQpiPq45gyl2X{kz2qeE`M6f+75wk$jrku5`nK<<ZM7uhk^!0_ro&&_~*c)
zap*Cs!f^+;gg9rdJ3xM7V5kHP2;>#Rfis9%PuH$BZgPApNu3E3;36oz0CIzp&P@%Z
zG$k?d{FyT(m(U$==@#baRHlc8Xj8b1RV_GlwZ<5Bycie=$hL(8J}gokXsF0YZ)wt8
z-JUR6^_@#|8_Yv8r;AETHZWcyG(YOOgWwH9diVp)b21vFW`F-pZ>T``j%SK*AMqTS
z5Oz3d&hluC(Ey(X<N)D@Z%;6TB9Ohm1>*9Yi$HUH<}vB^`m!J4YMW7sjMNKFo^ZQ^
zHX@qq861|@SKm4>j#lQQ?UdvVwXF4aktIhTO?fBeLX6F?pZ@=zd|BSUywka{^241n
zX89j3+kIJ&?=a_M1^V-u)bWAx-aB2EXyb1xWpOQlYk<@(&L;(Jr5u{QfS%|VNJFB*
z14p5mtBag_tnZV&Vyux!;P@1eq28}wo*-*^9cEMoaSSNH%yCcIbSD7EsEsga5MW}X
z3q^lIO|B>>n5mYt_GYB%5m+uYZzHn{Flhhc9%Fx#7YWFOVSErJy((N^o?ogMkSo>1
zDh0?6ybI}wXf9sZ(%n!2j_5Oib~HU9f$FeJg+T+JUA!~wYnLuLI0(#LZFK}q8e?1l
zddl?p{YVQS@h&`D=Q`N|DEP}4JJ?$yy6v+YXmT34WD~L=Kj7HHMgzt6)zq{!ShAlu
zabn-ua*f=e2d^O*MbH}kSK6<~1(+0(q3|tp)GbT5P(WB16Hu59-@pIVOaw)bi!}K7
zAYdcu-7IIf&?v^b;!eD~DhvdOFpzPl9Ta3S7VqXgCZ)Fz$8M&0acu8gUKWq;1p5$)
zVr?H<vP&O7er;c^vi=0yRC(|*mgMM|WJ)#xkr|enXPP0^#B2!9^CxHNCICeE)bMWl
z=nS(^2xah=(H!oRh#z&xo3+GB0+J3^M&~4<8RAv%c={A<l11M|Z``&v5o`5?+%@tN
zXE)h&;<-NCAs&{Es55|mK#n1fz-@<bO52BTYd&y{hG#4zOco-|kr0EX7VdsWSR;Eh
zkOlG3s0954o{^*a0|1PJvTZR+dr;3G26`q4co_NNzZ6-+_WwR#{7czWFTm6d-V@TE
zp!+*?2=l=6g=c$WJgyw#F<>HhRFs?8IwC%B;n2VV8AeroSS{7moG_F*!-)MoE;+et
z@;lZ7{vJN7@)w9p!RLKp3rPosGDZ%>q7CvG+7J;ES^$c<my>hVQ=TQ%uG%QLRf$g!
zlxs}FxE_0hejTaCKurPI1Ca?j?pUa}@y3q`#U{`FE=$4=HQ8IR+l{);)wXRfg))Db
zhNh+uA`8sY0&6WWLT)vS>0^z^h7r2np?KW6ggs_XsCuN~BmyfD^G&WQ-1nj;#ggYh
zYsqv>d|!+6^)1Fv5q>bhMo9$by;IEvF8Y9W(V0iq-MlFL3@V;jilNm+3lpyaYdE!>
ztZneu^9Ef?O-&8$ILc>uJsAt#8)5=G1q|LK|NXmeYv%`V-T6>(0%rvvV{AZXs%*^6
zTb876wh>G4zJ1_F>_V7Ux0aH~pX6smpyTPB7Xj47m@w$Z<8h~(0U0qiR3RaVskp<t
z^}s6V>o}z_cslcsKP;$)e02rT7se4V+du1o$_}jCwsK;;+w+5~(&Ip8e5;ImV9SM5
zx`C9?(mMN0vi<wr{LlN_d*#=4mPo87SX%_+?x@dWg!y88c=hbB)f-?Q@AdiC+I5%j
z-^9P){sae2CCd}Y0>THI70@x#cFI~AYA>&9ad_3_7GGgkKE7i<%6}?3zW?ZzM>!S(
z^ey{$y-OG3K&oW7AgMrk5;rB7lNuV?EIVC(`S<}?;K&c)A`E%}*&zFp-bpDFYhXq*
zicjQTR_}!tjy5xq0jhu0LDV;%z@uxOxX-+?V4kiU#cQZ4iWa@*52K+7Lbq4$u;nJt
zo697BF|r7_-^Rt+d(icXZ_8O4&afVX&-4_ayy0A$kvqnI&68I4@ZJ&u$Q#vicE;{i
zRk+hX{nkP9HQ5b)ut{?Pp}m9|isk|80#dm(7*%B(1)M$6N3j#~Disz#2A`vQRXoIo
zmE614xOYOXBAJTaBs&G~G~g)pLU1<9166BzSsh0G;^EBn5HckrHtD@7YPq^EoS*s_
z+RnnKu<o0V;K};JAe%_oPepUiz<foThbMS+A#<2)E9cihLq#1*B^^dqjs+4)r!!Z9
z@IBLX$SinbJjr8dFVZXq^D2Z7GaAs-v83!$)k)Xh;$`{lys9?_=VY}UUs7(u#{^u0
zm-Z4H`(Sjg`U2T(zvykFEo`;EJrAr$U5HWsuUnaITy%nbvnrlE#zQw;Mr}21X+g~e
zJcZ*p=OtzWAP>78_zqEv7J{dHj-OCr(bnv-iz++`-Yp~=18g5=&Hl1VXb+Oc(o<w@
z*(;QYtvP~=+JFgjN>RlED;bJ$X-!Hd1<ICXiW4Xm%svIyf_4eE94{{~LjEkRc|l>q
zuR7B&LK9fAV+@Ft@`Zs)zAK#fC63Y$kx<FbsP>R1Yz@msdcgCpwBMQGIL>dVkS-nI
zNHcE|2DKoFo$_bawLQwGJK4>WuUtg#c_sPM+Mfa*kP4aVu!BT&fIt|pVSnJnK3}P4
zzH|~`*}P|8Kac(cpsYUWfi;&4G0X2C4a_I&t)(#;oY_H}q#|riiR>;Iz5gSKyJi2@
z%PRc=vtdw!WQO-;{u`IUen2j=NmxGEngqR$I*1VDVe7?NKLZX2!Dt0-6`6YPs+gtr
zh3)viTmZA$dhj~<)#f+Ol^X5m7e2l5={UprV+rwZFvak0fAZv^L$YIFH3>`T%JMRh
zj+#f0U?WnVktCe<Jo15}^Cu%up4go-m+y23&wBCuiZu>~@(o%UWO|1CKVL~W^i}ze
z|MuvO&kI+2X_-B9)=D?o4bhR2gpuThic9U_@;E<b`J|TM-)$#)n*1FyhH&hleed&=
z8(8lxtr?xa+FNJx*n8|`nZ<4csV7gE47!h5s4R*roewNfsyrR+m85Xm_xASH{T+3x
z?>v&XDNFUul~k``VhVjF5I*$Iz~Qq?8m8;#6NUN{<?iii3)fbw`BgujP3ocT)K*TP
ztDo<vT1e^Jk(fkw_^TH7P&l|A1ziYr=f28a$IuJs<>sQeogc4<{_1KUTc3<9G`Jhs
zx8#DjntzWk0tuj}=<4tPw}8<RD51+BJR#f7Y`yP?zL{s-*oK*tT`~gJ`<v^U=jV^f
zy(+2b6DC1ucDaC~tzLNhseW)vA@jvPPD~9lWv<14+U7-nLuIGc5w}@08q@NxO>BoK
zR>Srkp5RU-tpTJ+_>|f`Ge5PaT~d%}NGA}s(<bCy#XUV4&zh^Z`7=!x+z%!kV&oZr
zJ$KPAcV2gKb@`9#+b=zZl{wbB$HvloBu|+uGnKTc=s*_PfM5(W$GFr~?;67@<=^l>
zET=d=wRWhP`}o14Y=QErdV8<|&M;D*SDtEsbuz4iF3zKXY;IP3-hm0yWk}rsM}Q<|
zrXbDsjV$s<)KRxj&k(cSggZ@)B+c^qn8FDUULrsDBXcvv&F@hAiGAB;Q+ohH&!@I}
zl_tC$Mn(tdLy}y)T)Cf2R<H^X(?;rFq~HppAbWNS&~8^(U|KKiI0Ak9<ZcbyZ@l~2
zv_)CSsh<JlFi=JrsJHngzTB!F3$iz&kwMR|Y@}VJYPqoOZ&T%Lq!L%l^wRZ)&t{Ij
zlix#BvihfSu%GfuKCZ*~^p5%dZ?o`4&LqStAoTgr2q5-hXQ8wIcPFOz8yrBLvj%h*
zc~MZdiAdUSBiZv4^aAb+3H{Ul({dA}&7Rca9&9w%O}=?Hph*denks%!NMTWh^O-3;
z%c?N>X=fD~ZF=DK54x=Rkei|<8`_H*tj>!Epym<E>U;f(85w=@i1$<43cTq!pXlw6
zty}gQS6XXf3SMan8WyfKxB<scQeut_jy@kB-@aI~?(?L<%8#y|8f)IT%n;<x|8ghA
z?c@x)khc2<RdGkznQ#?V$qu@~dIe@e_xdk>azilibIXToDjMV4T%_5PHABZttDnvL
zf0Q6KoDr%y`na;ibmQgAxM6z}&f0vqA!8rDc<~<W()3%?g<d2AN`{gNzgK8S`^R~T
zpkwvMLWnEamu!7+(p?tQP<+1=aoJFX*jJm@x>1U46?jp){aDP~d3gQ;-_~ZHE)D3M
zM2IdPDD((-PF5wIicsTmyY`zSSf}s2j}T;$vn#Q@$+59dB6g}BE`Lw4{U$?hkBP>t
z3%2%q_Y{94!1543f6~&fRLBQLnI79<yA5HmAk0dCYci@{adWfYYh&2)bdS})iRSqE
z@mZ>k16$f2oU@9E7~cWXm0<?F5+Nan^E%<W4=RK4^kvhlKg$>vPHtU%VqZ%e?tY%M
z!W(j#H0^0A=-fWv@ifYL@!(?f-)Y5vGJqFt#eI(Yi|$=9p;Xn0=`!V_SCTE4Gs)hM
zK;kE$Um~3vGI9s)fw!vz)2Oylz@zd`tQy3c!3)Mkg*7!`s2~Xsk!0iPEng8BgV+Yp
z^8%PcAw_;*SUL5wP!IOjRKez=g5Ez5hc<m25}nEa<L8f;wzf8Wzv<@OZv{Wo6r07g
z5Kr#2!&*CxPpJpCKXFS{)_H0@^xC0YImC+iz;uvCB->;JQZ%`5KgyI)Mpm8;;w>Ps
z8UXDJ?ygR(=st<c6|1okz9OIQk-G|P<qt#bgLS0>xJR_krz(54hMEHlLCJ!Lo{gQ|
za(z<i9>73Q6AfTDr2Eql#Xdx@Pl2mV13J7B{Awb-rO348oZ_EvZP+4CHG(D$9unFD
zIV3BCuE%?-acKXg+I?o9pcDfs@%p#cEp`@dpjj`iDFsrBK7n>jJr=x`17y<M($mt8
z>$v;)AQu!a1R!rgTP2pf@S3)wTtI7cWR?HImZIj)X9`sv)c(>@m47P*1TOBx4aUsY
z=jeol)YfTDn@7)%ICDRc_LeID)#ow-88!$cD4r6GyUMzdkOd3g&dIFQgI_t4VU2(K
z+Sgg294LNZi113mY7~<%F!$kR0?d60zI<RMq15fp>xbhhNM5j|t$*d~K-yvgJ%Cl0
z-Gmc4*ow8aqaR!Z+7qm92X%ig8vY6nl<4kiIC{qnreyPPa)61JjE3Ur<jo-iN+#Ow
z-avgbu=WM`3ieFNnKx0KrVcv3-}S=#p44Y<&)I7}8Ti;dPE9^yFsFNS`*kX+_*2XC
zNo_A;muTyYlN8D;6CMei3g0iDU*5ANLS||I)*EMhX(LWOpxuWe7_V+cF6!*u+|QVv
zUXe|wox6gdH{C)=P|!Xdzp0PdGjKoQO@qmH)we)%5Kmzh(fj;4R7DWu5TqODb1m-?
zw~CDk>ewJIr{~)YOPN6oNlP=vCQxDUo?1N?VigeWPhGAt&N}npLXD83qW_(&phZ7i
zCS>PQj@F-HtTqq;wuv5+I%sa9`QABGw8vmfxeNq{PJZ`fR0ZICL#5*CEYx6dL4s?8
z3%iYMW;<Hc%}!-J`v+*qm|LirZ>kZIy>_IyZG9I;kl6D;_^Wvf%!h&DF2EWiqq>*B
z<yCdyg*mENhNbLQ`zIkG?6WoLfe>`{FurU2eayJRm;w1A6$UVMX{Bo%0ry8ohg3ZT
zzg;EkHIA)0;groALmc3L_WCvd0~-^)_5{Wi>90Mt`NJ^O`3wIFq`~|}eKxCGScSd<
zDS?e6vyr!No99_XALj<|7QB5T83O#s7T!;Jb8B!f!2pbCY4BaZSRVAZrw8$N;SwVb
z-5tztc%ZhLeZyP|Qw+F;(*De@kHn*d4+-l&1YQa6xG53D1#TMFC4V_I0439)VuNtH
ztnJCq7&M8_l{@c5^4Is$Fy_UOQ!h}J{;8mr;&TBy{wC+~$^=o5`VHi7J;q$os3hun
z55#k(@Vglt`~f|z@Rx!Q^h);M@6xL3I9xM2gvky8lgYYcm-8sS$O$<#ra~qrM0F<z
zQOu+LPd<E`nSoJ^v2=yR@9~Gk^(rstLX;jO^1Hr)f(Aksx+h_$X}M%xPz(QoCCnPx
z0rUvXH>USMU<#IRls^ZCyVrO54l>lbx@r%yZVSuCAcW{)0cxPH|Lm3+lWm!d`W=4|
z>@OUR@fueC{{Mi<iiwHc*ZneR&m7CEz<C<Mzkt<nk;M!Sn6q?DemB$K`}s@P^wJmS
zwA$ECkqCl}@aH7&6Lt!eI~Pvnk<Ff~5jJ57%a&&E6SKYZ_B4b@kXImm8o$`u#wFOi
z;nAa4W+HQMs!s_pO%%GlV`Q3OOB{Io6kZEU;8sF$dNTL<qj52=UAF;h){pxw&*<RY
zRU6$=-SKOa*oeD&gHoB8(E4KM+l<_Ty5w?b@_QF)9HFx-NQg%w=gQBYIO-}4<bSJd
zzsZAaI8$dlS;od`8o71f2fZ5K5t2JH(z{(0`xKr3WC&c`J`juWWDtYU2H3(UAb-eC
zJZ1j~KT9Ygxm~1<L9xN}aDSVwEn1VAtn72X%ijx@bQr<-DEBm@IthP-whz#D6Bprf
z+{}})P#eMX@PNRe81qP#tfUR~Ct=78f<Pk=l)ZiXK4{0?xnX7M!d?c__;GIZNfDxp
z?zat`$hah7QGn(efNSL_UENAM-T(8^LwLJj9gS?$@(I(P{rq82nqj|gZB<6yr^H0d
zsEWBeDv-`e*uF@a63V}c3E{+U!`rJu%A4LMc*=vgfTl6$kT=SKfQ<(k+RHus<w~R!
z9=bYphYa{yl&4O!gtqS2IYs?=_mM{nj(TRjLUi39p%{izv+!)+Pj%4Vbqmo$;gpyc
z=A`X@LQnBxXb7_8#JcNT-45VvDRIrr&hn`5^3Y;B^1hM=6)N#c_=np8w~<+01+b;T
zIebe!7BwZmO;rchnDkG3b?pOexKVj&+L?Fxj(pXF<UrS-=2nJu)Lq_#yKXZ@Rr!uQ
zq7KW(Q>z@apV8Zow8fGB&80_V;gaQ-{!%2IE8o&LcjStZ*|_~hPHS@m%2$cxDFV+c
z<-#y>%xMvEfX}g;o9>}_SOMSC?_bXp+{a}~IzQqgx7XJ@Zz}akVur5sf}H8%I6pz+
z?4gqn-}B87C(M7&fq%-v*O-Na0%Ay4IcQ-VC^xZe)pkh(GLoQHGu~Z@BzQKSFB|wV
zFvwYSCD`2Hto!_B6`ivpEWGNv+;_(6;@PT%$M#wdP(DXFO;0!?yO%xoHnNmCsL>Jx
zMV`zq8Q0q7RJDWUQ|=!FcE}ecGeyN+n;f{IoN&H=%sO+Psh~wBT{CeDe;5h|3_Ou&
z0m>q#t{S-j?3UK9)!7P+v<mLp$ApIn<`r~pQ>iJ=?LF;doR3RsH~bXEj^qmcEkZ53
z{U)A(^iy7U_NP^P@~Tq>D;HF#z7w=h2y`sJy#0UI+~>UkBo)tmwHZ~_V<~i~5>l2v
z*JNB-@t!oUtl|%YHwtP6oZ<vLb?2m|n;ZIVFyev_%uFTRG!&swE-kdfG5FaH58y$@
zFl64}NSD%Kgir?a2+#n95{2LO9$}$_Tf_3E+!QW!@3Lz`B}5v;Znhb=Ts+9W9ln0%
zEQp;DNbKIHm2s}Pk*z_Q`Bj^c>U*~2C27axb~<;;&YT9yogI8WzXi(;n4hq1S&m=w
zja`a-+f8!vD2ZeiOjoFbcrgyKy^<f^;l^Q{Xz4bI2F9ZvXz73AX>+Cy{IW#-6g7!+
zLh95bWx{SmnZ9c_1)f`wN3kb%6G<l@t;o0E1Qj09oefZJ!5&X*@?cW^K0yb(&R$T8
z)IWb7L`5hzga*Yk>;}UH-q@u5?-@v5o^JTZ=+04wSB<m=W`c8itDhfR+hxh)jDxgl
zeM+hsI(%#!kQuiTbs;F7d5Jgu3IKCc1>=pquXxyk17LM|`DtgTa9$XS157Y-iE%#4
z#Bk=%I*d?K0(yl|XS#$|i&C`t!{QA=w4?|;4$wSC9h8PW2pT_>w?NQqvdu=?#q`Wx
z2zfqRc}y?l5O`J2!t4dxj|XJ4Y6kZyU(E{)I+7Uo)2oqbaYkS3)c+)KG3squ3-&Cu
zv6&muDSc)ZbD%K*mGPdd=yXaD2sAgq1h}}YX&q`hKq-QM?>-fQdhH??Ju!p-t|xz!
z;O*hlf(vfaLmiL}d+lck`AUw9Ba$Ne-ZAf^zGP;mRCso<o@aztW6tn?zjSd6hQq{b
zZHw7rxhJgv&XowtSG@xKL0q&n)yvP>-)*`Ez;zb$sW=TQO-(wzz6-CN{6(9^{4smO
z-~0L>hfkQazw8rSI(LQ$kt%jK;!aysMzvbe$r<dUprdhfO(>#>biKrRT_wtp*NZ`-
zi1(Xt)a17&dMQB!yj`k*xBP_>?(k`*t1sm!ed4e9agDg|OIz}M2|%Pi_msw~8{hI;
zjG6k@4ofEs@qI?zH!5cEr#2HChREN$d3BaCR$hC?l(91P`{38KHEVWHcJSfV@1FA=
z9E^g4es?zr4LT2>UZRn_SP1*_@Nk5|l#M!s_OOMg(Pza}F;+Y~$k_)s&7Q~>%Dl6i
z10vwypfC-QGdp;Kfd2Q$#W!T(SNB=|=c?6iFYP7^H=PeQp}J*En|A$h+<_~l{qQJJ
zk2Ni0b{lJ%ZRtW?4jxsNJEbC~nl91++{~1>mys_9nHwPEih>Lxey3B-J6X!XGJuQz
z9u{R*6rCW`FaE?h*45@iC}A%FE)~gEgznNm7o`i9W&{|IAHJdT_|VrzP?(X{;gy$r
zovtmYHFIJy8Wmnbj1b?r5dG4-$?w>e5dSd3HotyTg#CG`)F6`{S;o8yb;%-35ms}T
zwjq5F)$WwX;XfZ%5L*@?6=uKFz!`KS3_x>_+{g!YGlyTJ=Gn)Kq_qrp&N&^d`qDcm
zb;+-!(uC4Rc;D2Y5a(dB%Y9GEjfFLRtr)Kq9Y~aYAE|%g!ey~!9FDNTv#$G6#@f?+
zh$Z(J5t)#~r{`rGbP#EZtZn5O9ff5l9pMmTIN|2ymC`v0qIN8=@?D%0D4;>gF5IJb
zlCJHS&rx^2sR-nrpwdK}71zubYQFsGhR6XfFVh_ULq%sbK^X%$;S=3+5Kj8=2T9hX
zQjM&mp}Nk0D7q79EH2nLZt7!~nIzhWWR*9`L>R?lT#p<6n4UyE77GeZL&>=^!%C_m
z%H%!w@NRMB^D}Z?7EK>u9(sTiaNqw_Oje=+?2kOP7_O+7fBW(NoB*d%q3GLX4%HV>
zCPG&CYr_xkGA)hn61KRN-y7JeL3x>2>$|4Z#J*>bjPH6CtUFwR%))92+>#UgjF8&E
zj1Ty1q#AfKFkC--XuRC~7&y7l^rfSKR4BU(g77wM57rv=9IULYh>(?$kWh{Nbn)N%
z2ntWkP2j4*4k4@JyEkv(Y6KPv6mT%?G+Sb@Yzk)<euY7FurjPWk*bAbW0czr+X^6B
zgw$7pn~K3QWR%EX^gx<bW#tjjpgKAd5)=7&d3#=+Fk{JkVT;BS14;0EMd=Rn^Al=8
zP&4rN&&O+2!+7Xha8gDFh;;q<QRLw*du@EYYT$k>fb4J)GoTJ|8m5I3G1NkcJ-qfY
zij%kwi7EDAfL1A*2g0)9K3!Z?gffpm475xDX-17+5{Ph<O($9$^vJM<xq4N?!54%I
zgq^>Lb#io^2ek+(<Y+za7T&*4q2evjq$gOZFXZTdLO5jRp7hew`8<YXo+bYBq{K1%
z++DH^yY?4-6Bwba%cR}c$Zldi5GJ#p)0FGF!_bLiIgRYtIWGEw!opg#A;9QpZlFEH
zd=iUUEg#fMB8}|QC1fq!?W%&5yLg!9f*dHOg{7r^&z~bcsOIf_c@_?}B8=Y&{_i6X
zY@P_9CsbRe9j<@=tSw>V+O+2YnHxQ~1&gGDf{Bg}o<GnGv(6R6{K6gdz_BsJqz}J)
zr>zrru6Vr3w;mk!<J>Hvpvy%o{`IuA1+DLHmph2t$9rE=2f-N=A~JNtctmh-Q9_`B
z29f7p|1~IEPV!z7VSyaVL7dfwZz!N?d|*ICM8u5nO7Sx_HH8`t3kcJD75b-ypnvy_
zTP=YK9=+!eO=fVsAt*5*or0`bOT=A_!E8+PK%2;eqN1-zSAg>l>Rs3!!mbhd9Y9)P
z=11Tta7?Er<0b*ix7tXnd5&x~qw<2sBg$72TmD!{8v$G(UOslVv~lfi4IZNT<70zr
zIP3KEh6tApG^F5~AY~rd((2EjJV$=SOe~fa7ayaiXe-AEzUG}*F}P<kGPRkR0lZy_
z7yWAU@~9}v@OE$+@aaLzfD9tKb7_6Hj1Y<pcY%TkBEsodOC6m%z1FZeL`KoTvuC$d
zvYz$#BeR%&J0ElyGBSv@211L6VXuOhxTGYZo<%_h#qK(&Is2@@-z~%{^<I4dkBd=<
ztTQu!+!q%XqP&#pkbmu9Ya38}G8O_1h{NyUq9H#5B5b7g6co7B8ee(DrK*GO93177
zQ2%`Qgo(ssuN4H|K_r$L8eHR1YYag;R8)uJP0vdiWb#3YNw`^{ypxfUIagebwdPba
zBn;;pJfKQ3K&5<en;=@nQp4?s`QhQR2DgPVnKQ}=pde)GVEE&*07HUlG>BIYj*d4>
zzGFvdS{^*bs9IuB!3}ACb@d7;OVw}ZN8a9hQ5KQ?sdy<*fcDz2&z4tD250lX?i*O;
z3cq;1a?EjnBcAIP8d>=FD&^|JnxO+E9B@+z={Y+4ATAV#&JEu~M`qyzi5Cl)*c&S&
zP%+<0X88nE2FdnJ)D(EKE>--(g7XDygg_n&hrc2j29`@XC!$Tkx?vPG7OKIr&dcj?
zkhHO~0;io&B6DzXWbRn;l*a^F<jMzWfCPjVC>n8gR#wouqkDthBx>>PH(_QBMqM>(
zOE4bch+RL9B(yR_O!j1?QU}@E+rvq_tsF6~(2hZ*i=4aV<z=w!U_glF75?Z%R5n5}
zU^l>h2S!NYZ66xercyVpjWiC$Lj~c1tzg+0HwJvIT7ZP%?a01O@57}DRms9f!LP!T
z2pMWwW;Yz8V1J<ifiX-+wY6c?gzDbi9Z+)+g|4CD+D*kjft{0>lL4M1mi)?!$)dyW
z{c4=A2mvDTvPwo}hb4_VQCD{mGE3ZVdJ5E>FwEGu#QuiM3aM}q_1(X331d)j^e{~2
z?ZSuu`0<iu$6iOccwiYbs)EqDSK1{rD+}yXxZNVW2&JEqk-cPU>v_VcjE#c>dsH<H
zY3s))zk_py)*q%UZ{Fa@sRcI$0%wTYad;wn6*x$CHup*A`)Bx(zV#O~Y4%R&)FJJY
zmXu6LN;-K$Hrh*TJsM~VFe1!Qk>)Y!iTwl^zgkYRiHu*fKMY7>&J^M*Vgj0z!=tFE
zh==;&LpS&ZT35PCr(x-~wzfhiiuoMd#V=X>5Nq_^Qv{(1q(YoHoVSn1-n|>mF7qT}
zLHUI753fcXxR_<VdF03z-3fOfY-<SQX*7*-$C7DYtuEWo$9F6ok2us@HkOv}r2#eh
zA;SXhK)5!@LQU3O1we#MO9W|obUlCakfeK#T-iqczU?uUH{pM_Cq{hkP||V7o(F`s
z0hZ99-$NpY&!og94PY6fTErZdmPXyaJ&IsPj8MX|ap{3t%XzOlxVb$<Sg1}MZx?2F
z*jJ&4!bc<$B2a~Z4)eC$JlH^BVQvn!0$wQXK~iDN9s1(2FlJXdDnf?{OF(gWs3F&^
zvg?s&foqkbVslm1*|HH#IJ5X=rKM*;tED;y%<1jCA0ADNB;XD8!qx{=Wl)?!ZgE)I
zmi~*9rrhoGa~PO{Y$hriIQa`M2V6ADFQsTRZi9u2n84lC6xfZxg=}nW#In?ekHevO
zERO?sQEiMO;|#<=R^>VX0T9RGpag_A`TbV=M*u>Y+oDoUx`Jij=rj%^vV(U{f}Cro
zcll=93(WtGjT^vis~lygrod2~@JV_rl2rabz=joBST^n#2C1NQ;5x+cE5K8v<lKq$
ztzrpETwK(b&>K6|pc*;m#PPb|<`o1WVh%GVBPRzQ1E}ea?;4;Sa2S0EfGd{wW@zY4
z!6(0Fc<SDA)_B{&Kmjs6w0arP%|J^Ti^mdgpWcmD8D8K}ipIvp5mvd}cSaqizI)<&
zrv#btGd}O@i$eEsoEy2^M~^lk7_;-c%<1hEWKeY>SuDE%+6(ME9`)+6;>nu0tr!Bs
z-Vcei_#AGv@?iYrg-DUbUm}P>%7MMsITwy+o#|Qgpx1J+w|@>IDqSefJUlBr)TkNq
z^7GYWs|boa<?%lt(c^{iz{GA~6Os<E1Gvzky{D%sfg`umx7|<&CD1dfQU)uRKMxr=
zpJ~OV{=*}$!lHJ!QpQopP?M7jhrYmo;7~jU`ruynPlFpsB=P3w!|rUr1ms3M4a7he
zz%WSlE~t*d|8L<Tu=g+SpyogO`~#&#wGsA)iN%|Caa_0XTmuJvEQw2r^rzzD%WiJF
zro;}wXaiJSIw}m#n3)rkl3ouE?xMMYL@Ib+%t9Oha+&*+!~eqtz-L7*BWnUfF4T=o
zj})J{%45}mSFTK7ZR9AHOiz)Vk%29<a<>5{FV>ZTAG>VDgX$uAyReQbN5|kS`SUjn
zLt9#uAf7}|bON_1wHHQ$B6J8e!pR4n<~7-L_iZprxgfxlR|fl!O>_lGVh<V`#w^8P
z#e!=T6O*Z1NDM&!i%(spr=!9Y4uC7b$osEUrlY@I5=(Bv9+Ix;-&9hfm30QjzPlM2
z0Uiv|2#D0)OoC{7z*Zc^HnImkxNy;(xN_ylFV+FO2J8HH=t3dzMfBr7RFo&7`Nx6{
z%l4>G&*r~%dAk@l@BXwSx4aoQE!I4-Gv5xgl4Fz2mcQ4J)|AK--|0y(l%gUd0i$9^
z1{FBC)ur<309cEd;o@dYe&>>2@cFC3s0zTflhb<+rx%ceBJKQBCr{!mJ_+7YTicN<
zN3gW3jfg@}LIVCjTZ0hy2`{Sy)EE#!)jav#>G@%lX98PW!j>R`rMVH*_95WKHj-do
zN8zQ<K8Xek(m+^19g-h2mxYH5mO8Jxq@*Nfh_IZH_2EVJYF{nS>5R0wvJsw$xa$FM
zSFRtA84Sth#~{0sSDtoCL)JY7=In>!K~A3cXd6#5W1%{-U4esvB9QCW#3F8(P~t4M
z*brPY?BC_WrYvvF*z>drkL+hYwHY}-3Y4T*L7Zh{OO1`y#k62#1UeB!rlP0eN$kFp
z>?m3Hno$)Dv^yu^o{Ctxx=Ty}eo)RMdJ%HBl5<1Zr_LT<y|Cqf;1I*3nGt`aY42^7
zEkrSe^^Hm%FJQ!|M8Nxm(@XF5t<S1iLa)nb5M(ew%Q*IMly*+y#2~^unz#k0evy?1
z(KfgHB=%o(d+B&x-V}pr6%`en-SFLjGY$yPr);)0yV~B7J!Cwah8+_xNKtUo;jf_p
zL0ml$zzI~Q_ZA~F+kKB-R8?(9uLJ%Sx?NRnuzLrP5bUC78c1nQZY_8@L2nXNETo?o
zZxp!FnKDG2frk*TE<fY7()<5H`ln~-=I-Z%Q*J5Fx-b3^?OmCgoTLtA&1rWE0?2l-
z>$xsvbXQ_}d`jN`=<u}9kprqa(owVS5Y4>`1RUIJ8WH#K+tzSp-d%<zuXZT@aRH>n
zMGs7rfYr;{u0Q8EOZvaKw0{fZRJLX7sQ;0FCOzrSey%pH(@TK&-_9dTxKfHd=eY<R
zZGZj-A8psJT|j%0O-gujf%$?-?Y@5k?b&dC{e|ZnTv^Bp($Wb2ETl^q7`G(t<$gAJ
zI_ol3M5tM^IEeYCJ2opX!1mv$%}m=LO!K$Oeswn5D}Lc-``O*SOasX$-fKpx-HnTT
z`TF&(Nl%=?!}hro>nZ8xO?A18dl?vb=&1qyCH2Ymd=w2AmzPH*FL6sIC&>eq)YLk%
zj@?8l_u$;&E4Rt~!SF3C#A(JGuK^we6gi+#V8xMFYsK<j$xksnKjM@4xhkXCu%~dt
z((sUg$e%vb7Gb+HgwMZO<dY9BwyDglFzz)pJb1SYn|rTyJ<p9aI9AF*pz_NRI&m<4
z(NLcYR6W)Y?BXLtA$Vq@%vcbC0FOv)aAQ1B_q|sLBaQ%}L|VG=mJa(>IyrAm3r-vD
z3pnO7T(k4W*A_H&7!kdwf$gRMF1MO(Q)%wDNq2b^994VY(E9i3{M)ZC1~*6nCh)6*
zCV4H{GX!M-<GR&iAn@r#hM>j3v;_-XAJF-|orfXfQkT&c?dl$Hp>IO8FL$Nhi1|-#
z`uQKpF;|1NEy1l|Czp+!ioK7=SDKyvbnCpwF%ca%DkJuNQ(H7gomK6UPqz}#N2+EX
zhKulS!WyEc0J6{;yyH0u_s+NmxKCpai0T|1dzk8Eoq;g4Ro_em&X-86cVUOX3(im|
zNx<4J=g;Q1_ws^`>K3rBU%Y@^l)dvyBB~bYsNhHiPf^Ti@i5_Tv3H`|hN~grCWz2=
z`;J0-d(N@-_tcDT9vnd@HSSTxx5z(fp#N*#BvCp1Ui|iP9CS$j1tSdOG>GEVW8uY*
zI#Ql9mKPovIQF>cxzyo`N=&dwbhI>UXe&v?k_Gnf&qZAa00lEpvOs)B@bRT1opyQ_
z;@RH#{j+Tyu8_cIYK`IhAJocqvI~Aa9`y?N_TW}9Kv!2i!1vZIbYaiuha%5YK0}G?
zy}K-|Q|JU$2x;p3r=*J#So1rp|M>m;^-J2_VJci8bp@kk_mMC+d2^3Y{*9@$&Yfx_
zDT^T&bpF0wePBiB9$c^^Vf4fF6%8Ym0n<R1(5-Oo6fZHGp!iVXWV4kS4$cG3F!?yM
zZS2dgan*s?t;+Nu%zSnm9vKN{7eiBwHtYAqKb+i9fdDSR!-Ab})8yXU+dDNS1=2y7
z(JO=w<j^SG6FVV!rm$rvB!XjOV?#reKY!yfx_I%Tw>K8A{-Jm-@&V?cmX@^eFERw}
zKC&YrApv2?TgXVM>)}yHIZBn&r13cB;MN{aXB+=+_lg|tJm>v)PQsrInoQi$%_(4J
z*^f0J6r*R7@d$`XLRE8%d&@q?q>mI05~@02q!bT7s;UBc5M1Zz&Y|&f1qhcDW$3AK
z%c$v~QZz53jQ{xQlVzL>naD-+Ku_wqnpVIkc>UO6+f~@uxPf-r=Y8OoN6r(b7cTt%
zT3mMY-=8JG4}dw;0X~5uzNL2U)}Fhbzr34n(Ayq%S`P|5I8BPP4t@g|1v)oxc)-RX
zhzrtY7qSwwZ1ihDyf8#}a&kIXcoz2_^&*%jm>`#z6}+$|9B*KZ#@>lC{gQ>n`4Xf3
z2oKOj(8>br{ZOMts#$E#Baz<-s1QeC_h(q5a=-!!PIZv(f=B~S2<9t);97~=1)CU`
z!a&;&Mc~h%6QV&k;RUG`WMY+X7p(`c{lc9#ileYgD)k8B8gr8J-Ro7u5QW<lLG5b4
zHK@fEBx;OUZ&8u87P+u5&d&ZOc%Uedkqnq!QSlwzf(@7^+_`t}A1)R<q97IjPAX!j
ztejFVz0nL*`SHH;41vRku|_ahNkn6Y5*ap~P-lP=p3gu5m7+2a)y~i8CKa9qy&M{X
zD`Er0QH5u}{`|d@!c{h%xQgIOhh+n$;8iD*OO@$2ja9&o0CENf9!|rjPbospwevCi
z0z0W`<<$jPC?e33Ko8f4?!nB9XvEPMG$Ipv0`YthUu1P;<_NKwY%MvbX6<NujIDK|
z$IQH(D!5rk?T@sPVNb)ql5?a3JajFkUz^33`DA|Qe||CT$_}sxjQz|Ol_@dVoSP`%
zJ!`CZLv)pQz^`+u-SN+JG!ABo+y@U5@fdJ3WM^Xwe*Fok2yDy(&V0ljl7}55F`>D4
zFFM;+ODoc-<lRFnT8D$J#>a}o-e**qkxVKooDUkk%0&;5>I}+6DAR0hm2|2eJfJhb
z#lRJUsLxb|s<4|}u;INq2}T+bj0)ZXlwFWf1FQvK1;IG*$lLLOBRSy=g>p;jzA^SH
z#WO~~$di*hb37<MkVmJ??hLIW^mwo$+7k)s#qq~%#IcW!91j^3Q)ou;zv%p&-c)rt
zNVZKXeWtiZV#OpV2^kaiMovV#yl)MLO2CX_{=28`*88QUPXGRH5LS-Jkv<J~6d)`9
zzia%FmJFy18wMfRJhQH&OCdUGc)M?JV_Z5FS{LH=msD^Rf-3Z*&r(3zB;Nb?Cz!wc
z`5m3>3p^C^sD*o4sxnAou;j3yFE9J{3ni~Gr)#dRzG`o;0n@M7<*kc9gD;Ri-W8Xa
z7@P+3Hx?V5dU0yvVxR)Uwv418dWyiiR$`+m5|>fZ;WWm-ojaGHolkzJs-WN`_np^-
zDL2%<c>J*0a4{iE6krIZfvDZV>%w3i5jRXsh<d}04wnV^oKE=uw6e5pQ2~BP`S6Fb
z70gEQ5T9gb(YFIreh>^0w95dB_rNA2R!W&3!e(GJQ0zP^J(KeSy1*BtUm)Z|tp#Td
zF^i-;IJD>%9_L0VN%<1m2>9)SQLi4$c>6e!)z%D7dWGMH*Hoxvr3`&2C6g~Y82G+c
z!!g;%$5?;~UOG_Xe*<)I8s*c|5<Y5`19u%qALceT`T`<COz)w1r_qP|Vec)bd50I1
zLPlsfB_*d;yl{cgnDkh~NoQ(l4aYlm5TFB`poqP}<N&Y{hC?(rMCefUm-B4<s#<dH
z6n%(+079slZy$G$9M6vO+LHB_yXe%w@3%T(f-_gkA6z;VG1KUpce8t&eH^aG{o)hW
zO!MfMuai}ZTubdR-%2|CI3SG1ud!Glwv>4@N&x4}*+cqgA1RkdnSWij$&aZY4%j05
zQY6&;{rSGL>?leYY4*9T`q&9IJwI-(L=V^tA|pCFI!wL6&jKk1racVS=wTniEd_%a
zSk!FtH1PSbC7Pw&J;^Jgy6ecQ9;{V(cfI&Q7sQ2j=Fe#zMu58DX8nUrT24;Ef^@({
z@@TTJP385u1F2iK@_*PJbCc`YF>2}%E;5aaE)RCheIGq{dgbMnmz>qy=W<`2`&(!r
zW7M~kv8d`y_;3-wQPiz4)s_yjzz}(lrLpnR>&wyiA|?-n#8_6=cl})2!q?L`i7KHd
zed5nTqjvrhwo~U*r@CKPSimpV(AZe>=0RcM==Iq1j367mC>G{f!FqtR?q?*@Bv_$}
z)Yrd?9Ec)2z@H{=UHbu^F8G9__yP>+ftFV)P0E?G<nAi4yq*+^;-%j6I&o;xSZR{a
z@l&g~x{5ZE<QxPk?O{9|qk67}0zJ>pM&Ex{Pmf1^5CA`)R*GfK+k^Mg5BaGbjcdPj
z@bKX)_2V$wgoD#Vt8VB#zci#O`upKDB4p1mKe+VVQ@ppU4#m^%^Tx`@$fH?AgZ_%F
zcE8?f6cgsStUj@a%z%W1<eT8+?rdQC4<6hPoJUoU^sz4+-_UFp+N(&gEO{!7Zz<m;
z>UVvbgb9BVw1QYGLq^piznwIu5v&p#vAjjYfJ;$9!qo)M@6|?y6Blz#_je;k>h6h1
z)?EHDUiFvNG=eirYZ&&e00WdhuG0Xc?=M|oU8nX+!eIfC#FV$%FJXZ3JnkUk7IR*F
zFW5$_?^E{pBTdv~!S-If4|%j=_em>1+SdR+FDfq0-bF<gs2a}nLW9MEaqOUwkmQbR
zDDGh6Z>(!->IBOop9K7~EBlFwU@mHus##}jp@xK2<rE4B!^bXJyy{`u*g>W~eL@Es
z-mVhwS!AACb7`T6TPUD?>Cdg5JSe8@JiSr2xUXc}v2J8XJiY+iU=I(;#$WNgUDlO4
zd-%mLrRjieZG}Ph3+xR5`j*F@uyiBL3cxfL^wspHcTf{~vEI)o>qtU~N7*&d6PF!#
zut_3*7y6S)w#mLIhvCF~R$oO}Yuy)4hrD}zZS-IGof3wclhY*U#s<zZf1NFBCZN%R
z{COykQhOg+cEw)VePAbZOjZsb>4QLl)6F?_2U2_0@*7^rMx;j$vod_y-%8S1`1Z{k
zQ@=H<`f+XHo5-L-I&|*`qdHc$ZTq%7bg*kRZgr7OXCw(z%bD_~u!xv^3@lrTOKDph
zf?bF0B^Gw_s70_yIwXl6<+5-Xysw^;m%JDeH+X6v7@stRsS=Bf+kJs<_fGSO7maqf
z<8`Haw~A1v2n(N%<>eXJTT`OM6%!v1%la>$??P0_F_BqhkGappVj{14!e^8(fDqLY
zkaaNgoRN_Rk3grOv`ruTAxU4Xb-NP<9o*C^XfZ@sARMm~jn=WrG&(NLp2uILV<l$K
zc$TEY099S%zZ`8vN6EJ#TN!{4SQ<)31-+1iBC!G*;ME3fJGp+3Ju~uylF6^Qo*4D)
z^(fs-{@<mqu`}OGarjZ;g36ck6$U-9<eI<t)pYq#Y&wU1ekw`AV6f;lp|}?hGM7%A
z-C<#~gat-BPc0#G`*NZsZ|s2SnYW}ospAO#O+B)h3A-%W(P8mO(?hJ66L`Bkdu?Cp
z&3niV2VB)_UR1kQ*^yGn`M~|FUy>-ElE`3{%02PjLitHQUl{MosS3*m5{`EM8|dEW
zO|I^?=rL&M-V1EewU}E?@75KL%agQNOdiSSRD1<fn=dHSr)lTA9w$C6buW6dFYh}z
zcIq%j-lwuSyeu)8H@UjsX>a6{{NHU2t+pz!*Ibgw4W=Xo82RVekBGe~e@a=t9?P;j
zydAROFDJU|?DyFU(Y@KR%DVBS-Y)*t=iCF0XH4T=*Y$W)gr?l%aaR95b^8!Gd;Y7d
z<MO4{5>nx*H>a=5?q^tT;w#bVefp<5$@%+~^4ZtdoGGxDRJZo(%@kc~O5gMIfX8a7
zvh(qoj`8b;lVhS2DgD(Yk5kkVy)|aD6uvfSdkqb(by_s#rsSV_`v<e#^A~+3<m7&5
zhYMZGyC5%j!A8X2!Zmf)xbxP9;M#K*4sl1toT}YN@|@d@^}_1AlO_z3mKY}AYhx=T
znUFoNt(VxdGG}RJ)_y}|{l@oiJl$@I@$Jf(<->f~AGm%*1r`Yw-j(5^{%&Ii2_w?w
z^D+m$gX)B{L^Ogpk8Xj-g5M*C2_bTWj>+X(&V90{7bi#(_`?8xFH@*~@l5ZIrhCwI
zq^VJ;pOt6mNXg_QM&V3{qbfhec;0A_<}tRg{`&Y`9u9B(?+3Ns@_#Z-r{&NxrVyg;
zbOq`I(@(;tyX^yEq<fX-j;xKBafb5_;a}8<)g)<=qEOC^(ds8RcqqWOM|?bFcZAIE
zirW<ZOD_i=OkB2DjyZxzpf`oe#&%QEQR|OEYd~l8y>@WaD~MBs-$}KWYQxibrg|Q1
zr~(C*k45j}zP3|!-@U){+(Ls~QK7|y$0w+1XohlbTvtxvWN!J|9hf=tgYise`042%
zskB40+qPM`H}GFyH6|61{l2BsIU~K7R#G?g%ivQMJ*}Lw?SKBhagel6dSw+iW-H&`
z7gZn~V|B_^?dPY*)W-DdyTP!*o(mwBZtu~wI_Z-hwN1)3Ztdb}w_Mv)NN913`l6@T
zz;-|-03YAJ=xWyp&N3L9SX<}yPZyVz%+Jh-=q^l7eY)OMTPp+CmyxR$kDBOJMX@Bn
zdRB9Mu($XTV+&+377;7^^X`6}8K7g6d{9tuE0)abXNb~l*3NK9z~PUBo?b%1Y{T7n
z=GK{i>s#$V+sKPT9<1lTx>5hvQdDtwr&hkA?6n2$E<@MWgN=H83%<@FSKqlvHpb+e
z&2UBW@Iedx;d&FoS0}$eO^@xcYN)P`*ZmM{IM5`ztBl3srNk~dwgNHJQkL}A71{o(
z=m_D4r4x)lrDR<f^EO_dv8XUWp^JfV0j+AnRACVLZ9hBgr~HC~xjJI9&Aca>4y6io
z<2VHSs^CiY4gp-98I*Tk<5x!<QRaVev9z>IyT;<IH3TDO%)GH~-T(}RZuu`Z3BTrx
zjO~5VPto0Cx`8c2AEp(cJL5a7`!mK96ML{=k68B*ks5GA0rmN#8b909`7PU~D#E-1
z6A&QT|LobX5AZ)*8Mz1t3;#SnQ3Or-e9SO)H*t1$E-NWXmvS}*M>yz_Sj^18fS&8v
z7whBJwc0uKtdY_^$xe)vBF&A9-Oix}MTqK}F>LIfQ8zYUe^|1w7~!i=Lbjs67Q)A+
zP+Q7>DOQ8F7gI`MVPV4?bIgn+**iJ;$0Phcx$ZHjD`ji03q(~t6D^cV|Fw>D`^_(Z
z#I7Y!#PXWBw(o}USJ~HBVVh|3+^*a`QIj9Nk~JyaFM%q9mfOT*)XM!RD~I*I07_iJ
z>CUA;xrwG;e|~+Jn(!H#UGdoKJ0LE|z8wmI{v=TgkUB8G|IcW09sMUp|2_RwF9U4>
zTf=k<#mE}^=c=kImhtSLxmA;$(V#NFX}EsH$qCS#wlN)T9Zc*nX_RJf!WbKV&wzqK
zY<%bU+Yyd!IotQw2PmjSR|9B3rUj<^oa}q(r!v0C+L-OpDd|f-r`D#hq`Q$3n9HvL
z<;#syW1;iYdeWsLzN4>VKD)~jPHAw)1`P?eDk&BQaVtZZ4%ojexV|)n-iEiPz*qS<
zclSdsYG`V4@Q@1dY`qg2Fwp$=-g?V`c%-b;$W@cr`ye@8p-RZJdwEzgM%1E%h){qT
z03af{g_U6PNX|CbG4zeNKiUkv%sv$5xN`6RAbS!9)j-uw2{*dGZy~h^X_c-kMW_$T
z0$|?#_Y@WR_-=}#n-k9;0uQr~Fad_B#o!EMyH1W;2}n!p%U`nj5zT!=B@0duP1lq^
zx=5Ec{CJMZKe!=l@SYHR=iH_=cy;`<TQ6?M=tC=p1DfRq0;Xj@2}3?*<yMSmk@8~Z
z{yY66DYe!Rt~IWS3~r43X;o3IG@2V2xfGZnE>ARrhLw#C)=C_#=oFV}c)MZTq7cb~
zrZIxg5n*VpaB7m+Qf{kMW6CVN>o%qWv1&yCt6(MS6pCV%F?8}V=q-%@cIW27mF(A}
zw@%(my;2vEWP%&7r<uI{8W(*MoLX>MhBl<i?2I-Y-d<j9gq0`R0PeA5D&d>Z*r<S}
zl#%8IR}r2?lz3dJdWA)B?%!JTwq&+$(-y(C*-~F0WAK8L=olE=<DMs~3+XU|XhlOq
zV~Rfry+c4p?)-$-`zK<A4?Zezus1qTg0Xz5ac@N@k&>sg<F<)VH%5z5lW^x@xP2TK
z2lN~00L;xlkR<GdKwW$uW;jT@WzqE=7`WO9KP0B`ZBeU?Bm)czUw+-L+aFeZLN{^R
zwcCx|rt0wh1H0me-bLrCnkd%JR1C<spY#{W_vX#9Wp4R%jDx*keD8+V+PNSumPRkO
z%H;eZ?x{Yn2tS*M!2U?egN%c-Y6`9gP3GnX7G009+?bOmUuNfM?PH#HqG~_I<0@z>
z=dZz&oZ|05F2lXgWJm9X=z4FD409Lu#J}+aqWZf3wo5#7UzqKcw|0_Z_OT=mKoSd^
zi#vyu0u?SkH=aiF7%U3Lmnj|;*`uui5uLYi@5Nt}$&!>W3RWX3(42zLIqL-ri4;FN
zk&Njt{8iP}V1^{?eUQC3rI1`Y>xKHTS^}8O3=WpMfmq(I#h(`khLLr%;J<4+owTnw
zHy1v<6;4}T-CyVj7WMnxV>v@AG1g()j+OF}BXov!rHFdTIJ~!^SrU2G2#pAOwL{@$
z8?E2hjA^`Ikr+%_3AATcHm!7)Guj3di1CoSmWD^cYJyvV@6m}HM`u-1WD$!ra?!uU
z?v2EV@!cjQYJ%tDglfQd$I78FF&Hu9F||!mJCTqfC9VZ)XngWpQn@MMKP^r)|7)m~
zM^;95Hkl3oeO7&aScEg!izj4iJ$pzv>bX0s%DHoG?tc;KzT{)GCyc+c%r>N=LT6N!
z|Gb^U{plp@Vhc&mt2@3s%ycdqE!Pk!D=+*dwA$3uQwq|xPqXHHpu6$%c$?UPQe~?N
z?@u9tA)(dye4DSBLHZW5|9%}17fQnP->*sTi*IfnoBx+9H;dsKzWNoKXP1(<;$&zt
z(qAvI?NzB=O6hFlTblbEvL74&nkf5!Nz5@b4KAH6Dq<aS)y>b#Fisx57A>Hpp}x#P
zq4;+>X!G}rbg*=~1}eR8;MwW{+jaM5q=jLEoRWuKaM|l{jHx{5X+OWF(%IW=$7uSx
ztt&rIPyh7IRMHxn^N>R?BQ5(-+$)i|C0lr|=W^X_yOWrB?cK4Qq9R^(ay{OFj7e!B
za1`vT4=nr$+UxK20R=;+E4CSoWbsHN8rih0ro3Eal#=}9k-E~oGH$%|)Q`s(EE9A8
z0ov=hRDu57l9_^J*J0MkRV&x1$>Tb6O$mF#+r_nD*|O5U51wPshboLpx`et1a*K+x
z`8OUOLYo;VBAXcbMZP~g`?YXtfZxldW5QwF**!5qmWJByLHQs?H0OtxUc7rKEJ)le
zfL7(Iw5kr(#21UIzbroWR3xtJf0qaE^6h`4A;2^{`H0Kp{jL0<zhPQQsY_2w7;Z{S
ztb7|NF0!xMN_moly>q{8-OG#LeNXb6jHv3&$p#?%GNkh?ervwwg$o$Lv-#wo7RQ3x
zeV!^nIGlTwgk<My_2bHCvnEvzhd-7kZmd$!OkGBL`uJ6n?&+m%E_Nh?>7}!P`*AaZ
zA8s`_F-aHbek?{p$MI00xaiH@a>h<q5GAy35<kn8HE>%_=^x9}(JkWk1`kh>l92PV
zMsgx_0m|nSK=Dqe+=Up-zyMue8URQ4%fuN@vh_bpz$XXLKdg+1IH!;9vK-q_oc7Yb
z-F}HMX@dwa0mB&h0=d83M*Pg}fd4L=q|#X+oUf}dp9i&LWHvXj!|P#OHb34m2o-P@
z`GxWz%=H2BGno_rDH<X~nIhGQKzSEw%n&J*XRg0B|4NQ>)De<99L39Cjavozh@YTS
z_B=s@|3--B6@3Nm3KrOo)kc)c<;zK2)KG{bwM^O-0S<iLio~~@(W_7q4~x$+eSP&<
zpc6N?kGHs{4cnv6ZAk+2f`lZ7`ahL_jb}!4=4x=rBtmg_o#iF|DZrh^>|mJNxA(0_
zZ2Ev2!Gmbekc8H6<lS6Bvwcu65BVN!LkKxh#5e?@4(hR}{tjrTu}F#IcwEEe0nS%h
zc1D<-K6ahNK{Jzspl}2WnTbeW{fbC=84)>Zyao4{p5@G>4@^TEi9bu?#h<=TBsKpW
zyK)*vl{mCnr8W@rW>RJQ0%1<JSb5$n!{^}<$0T6zt#Q%U87`ODsG1k*W`RRR<~E_|
zlv{Xg@utdd#P~`t*w0LYM($nv6X#x6@GymocEHD2%Hg#L(#GIAw(ZkVY%aXaR%R<?
zRC_6i5B^j=pH847sr{091X#&VUj}pW>+^5dl@6VmC@LC^gFqUhi&(<EEfnaAVFFS$
zmM|9s0}DK`#s25d5m|m6PB<X_y8WK7Ie+%7LCGs<72hGz3~mJ1TwTB4SHH_*QKj9W
zNktN{?}k{pxK@2;F8^G7CO2^ok=!>V>7KbbiSoD&?(g9LPoD<s8ksJ8I$jeYNc#FH
zGu%yptgmA#?T1*ZQln~n)Ia|C5`Xr`T^MmBHvh4LLfb-9e;6H4{X!jKlmJ9R^#mu5
z4)GP)fE#o~-A{5dO}zNB?$MV8Iyr>y1pzT9xbO5W`M^jTKz$v;kbt~lW9xL4?}y<z
zWGE@3tGFeovxs?0L<JGg5U3RJbu62oAX`RM*koK+OZXf(E{Mw+mpLn&LQm}AN;DeX
zbyHw__OjH&SMKM+c(<mVA*KaUqLL81!ir^S8Duzr5Qnc!^=zdIM)VHo7Nrx5QvDbZ
zZ|9VM4902wcvM0#@c~?YW9A-`<M*~?#wU<I*vfFg;ax+r=ql{YaB6lm{@8Z2Ekaa>
zaWa8mZp=OG2CNBA%%)=`qA=yj`<GB?-IKpAwo<kE<P+DTkV}x<ASF-GwBc7Mn-d0c
zQp6{+6}OKvKN$9;&*SI2{a<I1|H*Ua|NcnK`k+`)j-8RVcAsF)*bmqdDgJoI1Af)i
zX%iu43jx+h57RUH`pWd1ah=W%4s&BSP#kw8g%W>{c*W2RiUp`bvVUzzOKKd81glQs
zW*QJn3uN8t4~bu~er0pd*8|TYw515h0Sj{tEKTIRNtB7~x;-~mtEWGV+rr9m{Xf4E
zE9h30gUU&Sv;n<^w0HxVRSx5I!$}$U?)AHlu{k&Bn3y<b{KIhU0vsMd$i>^oV~YCM
z<I}U9OLRB4bJVTAxlGu_F7b!lvLq#GiOHU=yUxpSWpU2czf(}cZ2#udvX4KK+#sT&
zp@CqHMT=VZ<)%oxeiZim9u5YZ^Vs9>K(S#645c?F`mfQ;pjw2lJf*S$i2W802u{RZ
z1}6ra5`GQ|hw`l~garx^CY_zoXJPBqvVpYus|7lEvu_`ywTHxUWT@)Eb88q6=E(4H
z*F%BsABB4PkVN4AIy);~|6zIga<N+i7~t)jo0$^#eM1HnR`Y>BDK**ImoRsM*`Tn6
z=8$-b=Uc}b47iEh6IdR8!VP#iFi>vMxxM2xCcE%C>0?Qiz4{gLh@4<if*gq+34_Ed
zm<0jX#pC=lh?3SbW8|s=vA6hv?~QcD!4!Rc$nmg;5p5rp&j_AsDl*kF;&et$tN?>Z
zq+x?pUQ|S+s?ahC6g+r93<U9TG-ASn-St*>!K`fm(&qk5qV(~;9!dA53IW51mOV+L
zn99q$;>s-_l2%l_cYukRSrNWzM2|zH=|SNO!2}RnQ<z_)3C}#RBby)lLlF}A=4&Q!
z2)Te|2O5oaPf~?hD+B^3j%cPzB9KT|B>Y^l9aK2VIVvP1EKm8#!^zMepH31%DW^;?
z9d!Z7^~9o01gIcj(tFEunvCe{5BF<$j1t?H*}%u3|6d(58&o<Aw4kV{ZrlkgNZdq+
z5p7e24_j=wS`OAnZLMJ}FS2nw>L)LqE-LbXt!#BQKuvJ+-C=rHTtPVDYCI8}y#5a6
z>@{Rc;#zWE%eGwfF7@MGLZ=jm{@ZRq9Jjw-fFwo<e+-+5Ats_89*=fLX6yLwSI=&~
zo&oGvU*9l6RY%<E7JDZuE$m{*ppkQOHzS^%fTmOVN_3sBa7>sU+B8T-s=WLN4W%;v
zkVz<;bQFfkz6Y_fDamFZ2vH*m?^w&73S=zJZUU)a{I7L{D@YqOv2nx&;ZhzvxLFwA
zR@0&uu2SGy`J3_I9sWNB^8}jDhXOyH8cbAmdba)9-22yeZsNl&EEe+p(QHzsfTDPK
z+T=sKuIsGpqe*}R)%DEXoA32+kW?<dz_%iY2dgRTOhHA3PJ&f+Cz1oe13+vbD8lt$
zymk@ao;eZ|NNidWq8+Yd7?8k|z81vPSFg^2jC=P$U`ifF>(!tT>+5T!N#Bw6-9r4-
z%*FqLga4<obB}5A3d4B4UsBCB$v{UCCvkOFP^=L<MKVPZ(5QqAWd%nCFLUUyh@eQl
zPi!D|u1*|ffWycTL5(=BIzv^oL#d$1O`QlGLe=WLOk}^WSpQqH?=MQeeuQ(*_n!B>
z&-*-2WAt;N9`<oPEFK6bA-dMCHrsihl0g&3V$hyoJoF2)RYRTDkhQC=cf8!|eMRbR
zw8O%mq$8nG$XdDXgenFJyWX|Or~2&n8Z|2{xb}S9tM-+{$`wLLXCfSqb`-k~<}TKh
zOHd#8!RemNY1v0xy(@jQ7t`LImZnv_XS-MAv`nSIAcC7q!#o&1$2q3cq$IZW^u0p2
zHK(IkdL=mvc<gRf?>9vKj+Kb>^cs18QPhpa2V4j5i0SXHZ=0TS;Ro9+h(y}HsoA3S
zE|-dX4<G9Ly){JJ*O!d4&5`Zr|9L)Hs#U0)!I@}k&CT^R(DK#L`JwORXg%mvh%`r%
zEJ-GXYmKQif>Hzopb`v=J!=)V3d08_9zoyAp(eKM%GBP$Va|R0E5v!kKfyGWBAx}m
z$TxtRj$xYwHwS|lbKK)a&C3YxbeRAg5#JM^H$6u_bji7W80erq^UdRXmEIYt58@%2
z68Pt4P@?n*J;O=w9!Z9Cdz1Z-sV)Ae#Cgmw5*pD!WOO<gHOA~~icTU>*X1iVlUjko
zx0qFPKi<zlGm+S&DMWHNFimEL8G{nuN<DeA&@o8vs40YKjibgZ=t2>iZrl$h;&+7v
zI~^*oF_y^>uP6t~HJyf6k399hN^R+dx3eai9VQ=8fZZ9)Ou>LwWv-d^=5Z=K66cGs
zP#&;n3o8GN0L2aDVxO|1Ofulx2*2vQ^))iGIR0&7o1KX`=84iL$@v{{=PFzh+B08T
z+j^i1kDM0mTy4ea@?{gfh#lcQ2?$i<PBvB<sC5$-NWk9<?c*YjHQ(4-0P&Zq-3ra|
z@Cs}dXqT~WA;VL|HB~xo#y2xo(NdArgPq#bOJh@|Aq99$Kbm1t$=i&F<BU^pf>Pta
zkx2ss<P1$Ar}<dQhca5TLn+cLA5u|4OYFPQx2sZgBt=zqPL5>`p}rI-uC#Q9^(p(s
z+J-a6&W-c3uQO<dTc=l>Hoa-M^;3(brydd9Zc9>2L?PS|pe(`D(NUq=AFrou*4g-*
zDeBbTtW{kTg#x+l(oRV48$wf&kd%eY*IqRdbPi4oHCVSya)OV&LW7dORXlu;l6*5Q
zXQ@<2PRRzHQQfAJ8^KETC~pZ1oN`sb?{Nd|k4+fIV^2z_7&zrfbry%ie|jN2<1ZT(
zsj2imD@41X@?c(zIUlpUvLpuMh@xfo@A?fe-x#DbMkzJ;K4tv6H$#$)gUIAMWBWF(
z8#0t=%1ivWR}Hb>-z`axNudBfQc0Cj7kQ{}up7p8Gd5kjyIwPTmbXwyth;@CP1qt`
zt4GD_lbM-cq1zr^)X?#aW)`|i4!4=x+h33oxXu66!(-kjUG@hb8=LM~07~sq_O#}U
zu=ri_VoyY4k|^vF5zp4H0Ta;_B6#6vk|}rdb_#|@1ZG}(m^5uxp?%JK<HH~0_3)Dr
zx`CNM{VqMnRr4;J9Lkn~E&7R)F1g{>3AD~2GYRp*9|;b?1t+Dj-lwMsISHgq7i1}g
zLTGmOY!hN%FND(>3b1GhS5;TXl{8_~8KHp%@sh*>k<Q#)q|6hD#DGEM5uBon^QRbO
z3m9~;0O#S5<hWqZZn*kxISllXPaQ)$S%6Mj&q79C?kMLt#4n%BsK7~qx+|E0f=+`d
z4F)7a`f4fvr;9|R93f-0VjvnVM~75X$f0aU-G=(egMyp1z+jX((6F@YW(RR8ZW=E$
z#Uy;7g2FA^UAJ(=6I4ix){RyMvdP>`UkLjv5<K8Zx&7@^kJ*Dbkn=?ZK*oKr_kd1^
zk>%#HW;*v)IOcs_oDXGoCPIX$LG7IUEiQF*Fy%CZozMqPWW;?*7Dd)2wD#o$9)bq8
zVD;LyjkMp8#YbG-09&mksi&K69_UQdrjgWbT&m{(MS1jiPn@_WBWE>|c_ew!MW5A2
znpaY)tF0wrnX|6<O?4e#BUMK(3VsG$;(QS<TGgSyqz*%=-slb-xb|q5LZ0|a1qB4C
zSdu(sf>b=DH`QaMx%n~TvJw67mR8pYyaoRwp7*IQg=gAV;m<w402$doP3qrUNU(h#
zKdg`KOecKsTm9~lA|L<RHVDji(|>PYhK}MtoJ_I>CYj=<xn;${p{^5$-81cRAAZ^W
z?Ftny{!%uo)eC>CRcQ}YazK8!`4EdPyF2;3g5ijsNtR?>iEDSN`}-7TY?sfH*Q2i|
Wbl#Zm(OoP2V!rRfIr*P2-~KOU5Fs7_

literal 0
HcmV?d00001


From 4959f1f69755280d5614d13c13d895b62d49ec16 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Fri, 31 Oct 2025 09:27:34 +0000
Subject: [PATCH 090/334] ci(hotfix): Disable flaky tests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 tests/test_utils/recipes/gpt-dynamic-inference.yaml | 2 +-
 tests/test_utils/recipes/moe.yaml                   | 2 --
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/tests/test_utils/recipes/gpt-dynamic-inference.yaml b/tests/test_utils/recipes/gpt-dynamic-inference.yaml
index 77a98d4bd7f..b992f7fac2f 100644
--- a/tests/test_utils/recipes/gpt-dynamic-inference.yaml
+++ b/tests/test_utils/recipes/gpt-dynamic-inference.yaml
@@ -57,7 +57,7 @@ products:
   - test_case: [gpt_dynamic_inference_tp1_pp1_583m_logitsmatch]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr-broken, mr-github] # Broken since: [Was PR1912][Dev] feat(moe): Fine-grained activation offloading (#1969)
         platforms: [dgx_h100]
   - test_case: [gpt_dynamic_inference_tp8_pp1_583m_logitsmatch]
     products:
diff --git a/tests/test_utils/recipes/moe.yaml b/tests/test_utils/recipes/moe.yaml
index 53047ff4a3b..9fc418ee541 100644
--- a/tests/test_utils/recipes/moe.yaml
+++ b/tests/test_utils/recipes/moe.yaml
@@ -111,8 +111,6 @@ products:
       - environment: [dev]
         scope: [mr]
         platforms: [dgx_h100]
-      - environment: [lts]
-        scope: [nightly]
   - test_case: [gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective]
     products:
       - environment: [dev]

From 4008d3de18fd78da06bfbb2095ef9727169aca79 Mon Sep 17 00:00:00 2001
From: buptzyb <robinz@nvidia.com>
Date: Fri, 31 Oct 2025 19:36:04 +0800
Subject: [PATCH 091/334] [DEV] feat(MoE): Refactor cuda_graph_scope (#1917)

Signed-off-by: Robin Zhang <robinz@nvidia.com>
---
 .../text_generation_controller.py             |   4 +-
 .../common/language_module/language_module.py |   4 +-
 .../core/models/gpt/fine_grained_callables.py |   3 +-
 megatron/core/models/gpt/gpt_model.py         |   2 +-
 megatron/core/pipeline_parallel/schedules.py  |   6 +-
 megatron/core/ssm/mamba_block.py              |   4 +-
 megatron/core/transformer/attention.py        |   2 +-
 megatron/core/transformer/cuda_graphs.py      |  86 ++-
 megatron/core/transformer/moe/moe_layer.py    |  58 +-
 megatron/core/transformer/moe/moe_utils.py    | 240 +++++++
 .../core/transformer/moe/token_dispatcher.py  |  39 +-
 .../core/transformer/transformer_block.py     |   2 +-
 .../core/transformer/transformer_config.py    | 151 +++-
 .../core/transformer/transformer_layer.py     | 186 ++++-
 megatron/training/arguments.py                |  36 +-
 megatron/training/training.py                 |  18 +-
 .../golden_values_dev_dgx_h100.json           | 644 ++++++++++++++++++
 .../golden_values_dev_dgxh100_coreweave.json  | 644 ++++++++++++++++++
 .../golden_values_dev_dgxh100_eos.json        | 644 ++++++++++++++++++
 .../model_config.yaml                         |  96 +++
 tests/test_utils/recipes/moe.yaml             |   5 +
 .../inference/engines/test_dynamic_engine.py  |   9 +-
 .../transformer/test_cuda_graphs.py           | 268 +++++++-
 tools/checkpoint/checkpoint_inspector.py      |   2 +
 24 files changed, 3022 insertions(+), 131 deletions(-)
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_scoped_cudagraph/golden_values_dev_dgx_h100.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_scoped_cudagraph/golden_values_dev_dgxh100_coreweave.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_scoped_cudagraph/golden_values_dev_dgxh100_eos.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_scoped_cudagraph/model_config.yaml

diff --git a/megatron/core/inference/text_generation_controllers/text_generation_controller.py b/megatron/core/inference/text_generation_controllers/text_generation_controller.py
index e82397b854f..db5c8936022 100644
--- a/megatron/core/inference/text_generation_controllers/text_generation_controller.py
+++ b/megatron/core/inference/text_generation_controllers/text_generation_controller.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 import asyncio
 import concurrent
@@ -659,7 +659,7 @@ def generate_all_output_tokens_static_batch(
         # Check whether CUDA graphs are enabled
         enable_cuda_graph = (
             model_config.cuda_graph_impl == "local"
-            and model_config.cuda_graph_scope != "full_iteration"
+            and "full_iteration" not in model_config.cuda_graph_scope
         )
 
         # Pad batch tokens if necessary
diff --git a/megatron/core/models/common/language_module/language_module.py b/megatron/core/models/common/language_module/language_module.py
index d855322c2df..8f90fb3ba47 100644
--- a/megatron/core/models/common/language_module/language_module.py
+++ b/megatron/core/models/common/language_module/language_module.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 import logging
 import os
 from typing import Optional, Tuple
@@ -136,7 +136,7 @@ def compute_language_model_loss(self, labels: Tensor, logits: Tensor) -> Tensor:
                     # Use is_cg_capturable=True for full iteration CUDA graphs to avoid torch.equal checks
                     is_cg_capturable = (
                         hasattr(self.config, 'cuda_graph_scope')
-                        and self.config.cuda_graph_scope == 'full_iteration'
+                        and 'full_iteration' in self.config.cuda_graph_scope
                     )
                     if is_cg_capturable and not is_te_min_version("2.7.0"):
                         from megatron.core.utils import get_te_version
diff --git a/megatron/core/models/gpt/fine_grained_callables.py b/megatron/core/models/gpt/fine_grained_callables.py
index 786a1b850dd..f3fd63a5a32 100644
--- a/megatron/core/models/gpt/fine_grained_callables.py
+++ b/megatron/core/models/gpt/fine_grained_callables.py
@@ -367,7 +367,8 @@ def submodule_post_attn_forward(node: ScheduleNode, hidden_states: torch.Tensor)
             with get_fine_grained_offloading_context(layer.offload_mlp_norm):
                 pre_mlp_layernorm_output = layer.pre_mlp_layernorm(hidden_states)
 
-        local_tokens, probs, _ = layer.mlp.router_and_preprocess(pre_mlp_layernorm_output)
+        probs, routing_map = layer.mlp.route(pre_mlp_layernorm_output)
+        local_tokens, probs, _ = layer.mlp.preprocess(pre_mlp_layernorm_output, probs, routing_map)
 
         # Detach here for mlp_bda residual connection
         node.layer_state.residual = node.detach(hidden_states)
diff --git a/megatron/core/models/gpt/gpt_model.py b/megatron/core/models/gpt/gpt_model.py
index 209fdc9530d..98294d84630 100644
--- a/megatron/core/models/gpt/gpt_model.py
+++ b/megatron/core/models/gpt/gpt_model.py
@@ -375,7 +375,7 @@ def _preprocess(
             and (
                 (
                     self.config.cuda_graph_impl == "local"
-                    and self.config.cuda_graph_scope != "full_iteration"
+                    and "full_iteration" not in self.config.cuda_graph_scope
                 )
                 or self.config.flash_decode
             )
diff --git a/megatron/core/pipeline_parallel/schedules.py b/megatron/core/pipeline_parallel/schedules.py
index 09f95ac25d2..11e54e0fa53 100644
--- a/megatron/core/pipeline_parallel/schedules.py
+++ b/megatron/core/pipeline_parallel/schedules.py
@@ -654,7 +654,7 @@ def forward_backward_no_pipelining(
     if (
         hasattr(config, 'cuda_graph_impl')
         and config.cuda_graph_impl == "local"
-        and config.cuda_graph_scope != "full_iteration"
+        and "full_iteration" not in config.cuda_graph_scope
     ):
         create_cudagraphs()
 
@@ -1921,7 +1921,7 @@ def pp_post_backward(input_tensor_grad, vp_stage=None):
     if (
         hasattr(config, 'cuda_graph_impl')
         and config.cuda_graph_impl == "local"
-        and config.cuda_graph_scope != "full_iteration"
+        and "full_iteration" not in config.cuda_graph_scope
     ):
         create_cudagraphs()
     nvtx_range_pop(suffix="misc")
@@ -2308,7 +2308,7 @@ def enable_grad_sync():
     if (
         hasattr(config, 'cuda_graph_impl')
         and config.cuda_graph_impl == "local"
-        and config.cuda_graph_scope != "full_iteration"
+        and "full_iteration" not in config.cuda_graph_scope
     ):
         create_cudagraphs()
 
diff --git a/megatron/core/ssm/mamba_block.py b/megatron/core/ssm/mamba_block.py
index 01b9f4eac66..8e23a3b2aae 100644
--- a/megatron/core/ssm/mamba_block.py
+++ b/megatron/core/ssm/mamba_block.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # Copyright (c) 2024, Tri Dao, Albert Gu.
 
 # Some of this code was adopted from https://github.com/state-spaces/mamba/
@@ -301,7 +301,7 @@ def forward(
             (
                 (
                     self.config.cuda_graph_impl == "local"
-                    and self.config.cuda_graph_scope != "full_iteration"
+                    and "full_iteration" not in self.config.cuda_graph_scope
                 )
                 or self.config.flash_decode
             )
diff --git a/megatron/core/transformer/attention.py b/megatron/core/transformer/attention.py
index af6dada6746..c4738ea84b5 100644
--- a/megatron/core/transformer/attention.py
+++ b/megatron/core/transformer/attention.py
@@ -810,7 +810,7 @@ def forward(
         if (
             in_decode_mode
             and self.config.cuda_graph_impl == "local"
-            and self.config.cuda_graph_scope != "full_iteration"
+            and "full_iteration" not in self.config.cuda_graph_scope
             and inference_context.is_static_batching()
         ):
             raise ValueError(f"CUDA graphs must use flash decode with static batching!")
diff --git a/megatron/core/transformer/cuda_graphs.py b/megatron/core/transformer/cuda_graphs.py
index f75eff7399a..4d6893908ed 100644
--- a/megatron/core/transformer/cuda_graphs.py
+++ b/megatron/core/transformer/cuda_graphs.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 import gc
 import inspect
@@ -22,7 +22,7 @@
     get_cuda_rng_tracker,
 )
 from megatron.core.transformer.identity_op import IdentityOp
-from megatron.core.transformer.module import MegatronModule
+from megatron.core.transformer.module import GraphableMegatronModule, MegatronModule
 from megatron.core.transformer.transformer_config import TransformerConfig
 from megatron.core.utils import (
     get_attr_wrapped_model,
@@ -1059,9 +1059,12 @@ def __init__(
         ), "RNG tracker does not support cudagraphs!"
 
         assert config.cuda_graph_impl == "local", "Option cuda_graph_impl=local not enabled."
-        assert "expandable_segments:True" not in os.getenv("PYTORCH_CUDA_ALLOC_CONF", ""), (
-            "expandable_segments:True may not be safe when using CUDA Graphs, and may result in"
-            "a crash due to illegal memory access or other undefined behaviour."
+        assert (
+            "expandable_segments:True" not in os.getenv("PYTORCH_CUDA_ALLOC_CONF", "")
+            or os.getenv("NCCL_GRAPH_REGISTER", "") == "0"
+        ), (
+            "Setting NCCL_GRAPH_REGISTER=0 to avoid illegal memory access when using "
+            "CUDA Graph with PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True."
         )
 
         self.cudagraph_runners = []
@@ -1311,23 +1314,40 @@ def _layer_is_graphable(layer, config):
     Check if a layer is graphable.
     """
 
+    # Only GraphableMegatronModule can be graphed.
+    if not isinstance(layer, GraphableMegatronModule):
+        return False
+
+    # If cuda_graph_scope is not set, every layer is graphed.
+    if not config.cuda_graph_scope:
+        return True
+
     # import modules here to avoid a circular import
     from megatron.core.ssm.mamba_layer import MambaLayer
     from megatron.core.transformer.identity_op import IdentityOp
+    from megatron.core.transformer.mlp import MLP
+    from megatron.core.transformer.moe.moe_layer import MoELayer
     from megatron.core.transformer.transformer_layer import TransformerLayer
 
-    if isinstance(layer, MambaLayer) and config.cuda_graph_scope == "full":
+    if isinstance(layer, MambaLayer) and 'mamba' in config.cuda_graph_scope:
         # mamba layer.
         return True
     if isinstance(layer, TransformerLayer):
-        if config.cuda_graph_scope == 'attn':
-            if not (
-                isinstance(layer.self_attention, IdentityOp)
-                and isinstance(layer.cross_attention, IdentityOp)
-            ):
-                # attn layer.
-                return True
-        else:
+        if 'attn' in config.cuda_graph_scope and not (
+            isinstance(layer.self_attention, IdentityOp)
+            and isinstance(layer.cross_attention, IdentityOp)
+        ):
+            # attn layer.
+            return True
+        if (
+            'moe' in config.cuda_graph_scope
+            or 'moe_router' in config.cuda_graph_scope
+            or 'moe_preprocess' in config.cuda_graph_scope
+        ) and isinstance(layer.mlp, MoELayer):
+            # moe layer.
+            return True
+        if 'mlp' in config.cuda_graph_scope and isinstance(layer.mlp, MLP):
+            # mlp layer.
             return True
     return False
 
@@ -1346,18 +1366,17 @@ def __init__(self, model, config, seq_length, micro_batch_size, optimizers=[]):
         assert (
             config.cuda_graph_impl == "transformer_engine"
         ), "Option cuda_graph_impl=transformer_engine not enabled."
-        assert "expandable_segments:True" not in os.getenv("PYTORCH_CUDA_ALLOC_CONF", ""), (
-            "expandable_segments:True may not be safe when using CUDA Graphs, and may result in"
-            "a crash due to illegal memory access or other undefined behaviour."
+        assert (
+            "expandable_segments:True" not in os.getenv("PYTORCH_CUDA_ALLOC_CONF", "")
+            or os.getenv("NCCL_GRAPH_REGISTER", "") == "0"
+        ), (
+            "Setting NCCL_GRAPH_REGISTER=0 to avoid illegal memory access when using "
+            "CUDA Graph with PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True."
         )
-        assert config.cuda_graph_scope != "full_iteration", (
+        assert "full_iteration" not in config.cuda_graph_scope, (
             "full_iteration cuda graph is not supported for cuda_graph_impl=transformer_engine. "
             "Please use cuda_graph_impl=local instead."
         )
-        assert config.cuda_graph_scope in [
-            'full',
-            'attn',
-        ], f"--cuda-graph-scope should be full or attn, got {config.cuda_graph_scope}."
 
         self.model = model
         self.config = config
@@ -1440,6 +1459,16 @@ def __init__(self, model, config, seq_length, micro_batch_size, optimizers=[]):
             f'{len(self.flattened_callables)} graphable layers.',
         )
 
+        # One helper object can only capture CUDA Graphs once. Use this flag to check if the graphs
+        # have been created.
+        self._graphs_created = False
+
+    def graphs_created(self):
+        """
+        Returns whether the CUDA Graphs have been created.
+        """
+        return self._graphs_created
+
     def _get_cuda_graph_input_data(self):
         """
         Create the CUDA Graph capturing input data.
@@ -1480,8 +1509,13 @@ def get_rotary_pos_emb(transformer_module, transformer_input):
                     from megatron.core.transformer.identity_op import IdentityOp
                     from megatron.core.transformer.transformer_layer import TransformerLayer
 
-                    contains_self_attn = isinstance(layer, TransformerLayer) and not isinstance(
-                        layer.self_attention, IdentityOp
+                    contains_self_attn = (
+                        isinstance(layer, TransformerLayer)
+                        and not isinstance(layer.self_attention, IdentityOp)
+                        and (
+                            not self.config.cuda_graph_scope
+                            or 'attn' in self.config.cuda_graph_scope
+                        )
                     )
                     if is_te_min_version("1.10.0"):
                         # te.make_graphed_callables() accepts keyword arguments since 1.10.0.
@@ -1590,6 +1624,8 @@ def _start_capturing(self):
         """
         Start capturing CUDA Graphs.
         """
+        assert not self._graphs_created, "CUDA Graphs have already been created."
+
         torch.distributed.barrier()
         gc.collect()
         torch.cuda.empty_cache()
@@ -1623,6 +1659,8 @@ def _finish_capturing(self, start_time):
         gc.collect()
         torch.cuda.empty_cache()
 
+        self._graphs_created = True
+
     def create_cudagraphs(self):
         """
         Capture CUDA Graphs per TransformerLayer per microbatch.
diff --git a/megatron/core/transformer/moe/moe_layer.py b/megatron/core/transformer/moe/moe_layer.py
index 4ad83ce4a8f..893b2e7b99a 100644
--- a/megatron/core/transformer/moe/moe_layer.py
+++ b/megatron/core/transformer/moe/moe_layer.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
@@ -9,7 +9,12 @@
 from megatron.core import parallel_state, tensor_parallel, utils
 from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.transformer.module import MegatronModule
-from megatron.core.transformer.moe.moe_utils import get_default_pg_collection
+from megatron.core.transformer.moe.moe_utils import (
+    MoECudaGraphPartialCaptureSignal,
+    MoECudaGraphTensorStore,
+    get_default_pg_collection,
+    maybe_skip_or_early_return_by_cudagraph,
+)
 from megatron.core.transformer.moe.router import TopKRouter
 from megatron.core.transformer.moe.token_dispatcher import (
     MoEAllGatherTokenDispatcher,
@@ -169,16 +174,29 @@ def __init__(
             if self.shared_expert_overlap:
                 self.token_dispatcher.set_shared_experts(self.shared_experts)
 
-    def router_and_preprocess(self, hidden_states: torch.Tensor):
-        """Compute and preprocess token routing for dispatch.
+        # Cudagraph tensor store for resuming the forward pass from the end of the cudagraph.
+        self.cudagraph_tensor_store = MoECudaGraphTensorStore()
+
+    @maybe_skip_or_early_return_by_cudagraph("route")
+    def route(self, hidden_states: torch.Tensor):
+        """Compute token routing for preprocessing.
 
         This method uses the router to determine which experts to send each token to,
-        producing routing probabilities and a mapping. It then preprocesses the
-        hidden states and probabilities for the token dispatcher. The original
-        hidden states are returned as a residual connection.
+        producing routing probabilities and a mapping.
         """
-        residual = hidden_states
         probs, routing_map = self.router(hidden_states)
+        return probs, routing_map
+
+    @maybe_skip_or_early_return_by_cudagraph("preprocess")
+    def preprocess(
+        self, hidden_states: torch.Tensor, probs: torch.Tensor, routing_map: torch.Tensor
+    ):
+        """Preprocess token routing for dispatch.
+
+        This method preprocesses the hidden states and routing probabilities for the token
+        dispatcher. The original hidden states are returned as a residual connection.
+        """
+        residual = hidden_states
         hidden_states, probs = self.token_dispatcher.dispatch_preprocess(
             hidden_states, routing_map, probs
         )
@@ -186,12 +204,14 @@ def router_and_preprocess(self, hidden_states: torch.Tensor):
 
     def dispatch(self, hidden_states: torch.Tensor, probs: torch.Tensor):
         """Dispatches tokens to assigned expert ranks via communication.
+
         This method performs the actual communication (e.g., All-to-All) to distribute
         tokens and their associated probabilities to the devices hosting their assigned
         experts.
         """
         return self.token_dispatcher.token_dispatch(hidden_states, probs)
 
+    @maybe_skip_or_early_return_by_cudagraph("shared_experts_compute")
     def shared_experts_compute(self, hidden_states: torch.Tensor):
         """Computes the output of the shared experts.
 
@@ -273,8 +293,18 @@ def forward(self, hidden_states: torch.Tensor):
 
         # MoE forward: route -> dispatch -> compute -> combine
         def custom_forward(hidden_states):
-            shared_expert_output = self.shared_experts_compute(hidden_states)
-            hidden_states, probs, residual = self.router_and_preprocess(hidden_states)
+            try:
+                shared_expert_output = self.shared_experts_compute(hidden_states)
+                probs, routing_map = self.route(hidden_states)
+                hidden_states, probs, residual = self.preprocess(hidden_states, probs, routing_map)
+            except MoECudaGraphPartialCaptureSignal as e:
+                # This signal is raised from the maybe_skip_or_early_return_by_cudagraph decorator.
+                # It means we should early-return from the MoE layer forward pass.
+                # This happens when we are partially capturing the CUDA graph of the MoE layer,
+                # like cuda_graph_scope=["moe_router", "moe_preprocess"].
+                # We need to return the intermediate tensors as CUDA graph outputs.
+                return e.get_early_return_outputs(hidden_states, shared_expert_output)
+
             dispatched_input, probs = self.dispatch(hidden_states, probs)
             output, mlp_bias = self.routed_experts_compute(dispatched_input, probs, residual)
             output = self.combine(output, shared_expert_output)
@@ -282,7 +312,7 @@ def custom_forward(hidden_states):
 
         if self.moe_layer_recompute:
             if self.config.fp8:
-                output, mlp_bias = te_checkpoint(
+                outputs = te_checkpoint(
                     custom_forward,
                     False,
                     tensor_parallel.random.get_cuda_rng_tracker,
@@ -290,11 +320,11 @@ def custom_forward(hidden_states):
                     hidden_states,
                 )
             else:
-                output, mlp_bias = tensor_parallel.checkpoint(custom_forward, False, hidden_states)
+                outputs = tensor_parallel.checkpoint(custom_forward, False, hidden_states)
         else:
-            output, mlp_bias = custom_forward(hidden_states)
+            outputs = custom_forward(hidden_states)
 
-        return output, mlp_bias
+        return outputs
 
     def backward_dw(self):
         """Compute weight gradients for experts and shared experts."""
diff --git a/megatron/core/transformer/moe/moe_utils.py b/megatron/core/transformer/moe/moe_utils.py
index 17942fa5a3e..5a0793ef5b9 100644
--- a/megatron/core/transformer/moe/moe_utils.py
+++ b/megatron/core/transformer/moe/moe_utils.py
@@ -1,6 +1,7 @@
 # Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 import math
+from dataclasses import dataclass
 from typing import List, Optional, Union
 
 import torch
@@ -1033,3 +1034,242 @@ def get_default_pg_collection():
         with_context_parallel=True
     )
     return pg_collection
+
+
+class MoECudaGraphPartialCaptureSignal(Exception):
+    """
+    Used to early-return from a MoE layer forward pass in CUDA graph capture.
+    This signal is raised when we are partially capturing the CUDA graph of the MoE layer,
+    and the related intermediate tensors are recorded in self.kwargs.
+    Call self.get_early_return_outputs() to collect the CUDA graph outputs.
+    """
+
+    def __init__(self, moe_layer, return_step: str, **kwargs):
+        self.moe_layer = moe_layer
+        self.return_step = return_step
+        self.kwargs = kwargs
+
+    def get_early_return_outputs(
+        self, hidden_states: torch.Tensor, shared_expert_output: torch.Tensor
+    ):
+        """
+        Get the CUDA graph early return outputs for the MoE layer, including the intermediate
+        tensors and the intermediate attributes of the token dispatcher.
+        """
+        if self.return_step == "route":
+            # Capturing the router step returns three intermediate tensors:
+            # hidden states, routing probabilities, and routing map.
+            outputs = [hidden_states, self.kwargs['probs'], self.kwargs['routing_map']]
+        elif self.return_step == "preprocess":
+            # Capturing the preprocess step returns three intermediate tensors:
+            # hidden states, routing probabilities, and residual connection.
+            # It also returns the intermediate attributes of the token dispatcher, recorded in
+            # "token_dispatcher.cudagraph_attrs".
+            outputs = [self.kwargs['hidden_states'], self.kwargs['probs'], self.kwargs['residual']]
+            valid_cudagraph_attrs = []
+            for attr_name in self.moe_layer.token_dispatcher.cudagraph_attrs:
+                hier_attr_name = attr_name.split('.')
+                attr = self.moe_layer.token_dispatcher
+                for name in hier_attr_name:
+                    attr = getattr(attr, name, None)
+                    if attr is None:
+                        break
+                if isinstance(attr, torch.Tensor):
+                    outputs.append(attr)
+                    valid_cudagraph_attrs.append(attr_name)
+            if self.moe_layer.token_dispatcher.valid_cudagraph_attrs is None:
+                self.moe_layer.token_dispatcher.valid_cudagraph_attrs = valid_cudagraph_attrs
+            else:
+                assert (
+                    self.moe_layer.token_dispatcher.valid_cudagraph_attrs == valid_cudagraph_attrs
+                ), (
+                    "valid_cudagraph_attrs mismatch: "
+                    f"{self.moe_layer.token_dispatcher.valid_cudagraph_attrs} != "
+                    f"{valid_cudagraph_attrs}"
+                )
+        # Also return the shared expert output, if it is not None.
+        if shared_expert_output is not None:
+            outputs.append(shared_expert_output)
+        return outputs
+
+
+@dataclass
+class MoECudaGraphTensorStore:
+    """Storage for tensors used in CUDA graph replay for MoE layers.
+
+    This dataclass stores intermediate tensors computed during CUDA graph replay
+    that need to be resumed from the end of the CUDA graph scope to skip redundant computations.
+
+    Attributes:
+        hidden_states (Optional[torch.Tensor]): The hidden states output from the CUDA graph replay.
+        probs (Optional[torch.Tensor]): The routing probabilities for each token-expert pair.
+        routing_map (Optional[torch.Tensor]): The sparse mapping indicating which experts
+            were selected for each token. Used to skip the normal router step.
+        residual (Optional[torch.Tensor]): The residual connection tensor before routing.
+            Used to skip the normal preprocess step.
+        shared_expert_output (Optional[torch.Tensor]): The output from shared experts
+            computation. Used to skip the normal shared expert computation step.
+    """
+
+    hidden_states: Optional[torch.Tensor] = None
+    probs: Optional[torch.Tensor] = None
+    routing_map: Optional[torch.Tensor] = None
+    residual: Optional[torch.Tensor] = None
+    shared_expert_output: Optional[torch.Tensor] = None
+
+    def is_empty(self) -> bool:
+        """Check if the store has any non-None tensors.
+
+        Returns:
+            bool: True if all fields are None, False otherwise.
+        """
+        return all(
+            getattr(self, field_name) is None
+            for field_name in [
+                'hidden_states',
+                'probs',
+                'routing_map',
+                'residual',
+                'shared_expert_output',
+            ]
+        )
+
+    def set(self, **kwargs):
+        """Set the tensors in the store from keyword arguments."""
+        for field_name, value in kwargs.items():
+            assert field_name in [
+                'hidden_states',
+                'probs',
+                'routing_map',
+                'residual',
+                'shared_expert_output',
+            ], f"Invalid field name: {field_name}"
+            if value is not None:
+                assert isinstance(
+                    value, torch.Tensor
+                ), f"Value must be a torch.Tensor, got {type(value)} for field {field_name}"
+                setattr(self, field_name, value)
+
+    def clear(self):
+        """Reset all stored tensors to None."""
+        for field_name in [
+            'hidden_states',
+            'probs',
+            'routing_map',
+            'residual',
+            'shared_expert_output',
+        ]:
+            setattr(self, field_name, None)
+
+
+def maybe_skip_or_early_return_by_cudagraph(step_condition):
+    """
+    Decorator to skip certain codepaths in the MoE layer forward pass in CUDA graph replay,
+    or early return from the MoE layer forward pass in CUDA graph capture.
+
+    Args:
+        step_condition: The step condition to check. Can be "shared_experts_compute", "route",
+        or "preprocess". If "shared_experts_compute", the shared experts computation will be
+        skipped in replay if it is in the CUDA graph scope. If "route" or "preprocess", the
+        router or preprocess will be skipped in replay if it is in the CUDA graph scope, or
+        early return from the MoE layer forward pass if it is in CUDA graph capturing mode.
+
+    Returns:
+        A decorator function that wraps the MoE layer forward pass.
+    """
+
+    def maybe_raise_signal(moe_layer, **kwargs):
+        """
+        Check if the MoE layer should early return for CUDA graph capture.
+        If so, raise a MoECudaGraphPartialCaptureSignal.
+        """
+        if (
+            moe_layer.config.cuda_graph_impl == "transformer_engine"
+            and moe_layer.training
+            and is_graph_capturing()
+        ):
+            if (
+                step_condition == "route"
+                and 'moe_router' in moe_layer.config.cuda_graph_scope
+                and 'moe_preprocess' not in moe_layer.config.cuda_graph_scope
+            ):
+                raise MoECudaGraphPartialCaptureSignal(moe_layer, "route", **kwargs)
+            elif (
+                step_condition == "preprocess"
+                and 'moe_preprocess' in moe_layer.config.cuda_graph_scope
+            ):
+                raise MoECudaGraphPartialCaptureSignal(moe_layer, "preprocess", **kwargs)
+
+    def decorator(func):
+        def wrapped_func(moe_layer, *args, **kwargs):
+            """
+            Check if we should skip executing the original function based on the current
+            step condition and the tensor store status. If the tensor can be found in the store,
+            it indicates that it is already computed by the CUDA graph replay, so we can skip it.
+            Otherwise, we execute the original function and check if we should raise a signal to
+            early return in CUDA graph capture.
+            """
+            # The non-cudagraph codepath just calls the original function.
+            if not is_graph_capturing() and moe_layer.cudagraph_tensor_store.is_empty():
+                return func(moe_layer, *args, **kwargs)
+
+            assert (
+                not is_graph_capturing() or moe_layer.cudagraph_tensor_store.is_empty()
+            ), "cudagraph_tensor_store cannot be used when it is capturing cuda graph."
+            if step_condition == "shared_experts_compute":
+                if moe_layer.cudagraph_tensor_store.shared_expert_output is None:
+                    # Don't skip the shared expert computation.
+                    shared_expert_output = func(moe_layer, *args, **kwargs)
+                else:
+                    # Skip the shared expert computation and get value from store.
+                    shared_expert_output = moe_layer.cudagraph_tensor_store.shared_expert_output
+                return shared_expert_output
+            elif step_condition == "route":
+                if moe_layer.cudagraph_tensor_store.probs is None:
+                    # Don't skip the router.
+                    assert (
+                        moe_layer.cudagraph_tensor_store.routing_map is None
+                        and moe_layer.cudagraph_tensor_store.residual is None
+                    ), "both routing_map and residual must be None if probs is None"
+                    probs, routing_map = func(moe_layer, *args, **kwargs)
+
+                    # Maybe early return after the router.
+                    maybe_raise_signal(moe_layer, probs=probs, routing_map=routing_map)
+                else:
+                    # Skip the router and get value from store.
+                    assert (
+                        moe_layer.cudagraph_tensor_store.routing_map is not None
+                        or moe_layer.cudagraph_tensor_store.residual is not None
+                    ), "either routing_map or residual must be given if probs is given"
+                    probs, routing_map = (
+                        moe_layer.cudagraph_tensor_store.probs,
+                        moe_layer.cudagraph_tensor_store.routing_map,
+                    )
+                return probs, routing_map
+            elif step_condition == "preprocess":
+                if moe_layer.cudagraph_tensor_store.residual is None:
+                    # Don't skip the preprocess.
+                    hidden_states, probs, residual = func(moe_layer, *args, **kwargs)
+
+                    # Maybe early return after the preprocess.
+                    maybe_raise_signal(
+                        moe_layer, hidden_states=hidden_states, probs=probs, residual=residual
+                    )
+                else:
+                    # Skip the preprocess and get value from store.
+                    assert (
+                        moe_layer.cudagraph_tensor_store.probs is not None
+                    ), "probs must not be None if residual is not None"
+                    assert (
+                        moe_layer.cudagraph_tensor_store.routing_map is None
+                    ), "routing_map must be None if residual is not None"
+                    hidden_states, probs, residual = (
+                        moe_layer.cudagraph_tensor_store.hidden_states,
+                        moe_layer.cudagraph_tensor_store.probs,
+                        moe_layer.cudagraph_tensor_store.residual,
+                    )
+                return hidden_states, probs, residual
+
+        return wrapped_func
+
+    return decorator
diff --git a/megatron/core/transformer/moe/token_dispatcher.py b/megatron/core/transformer/moe/token_dispatcher.py
index bb034292715..51575d423e2 100644
--- a/megatron/core/transformer/moe/token_dispatcher.py
+++ b/megatron/core/transformer/moe/token_dispatcher.py
@@ -77,6 +77,11 @@ def __init__(
         self.tp_rank = utils.get_pg_rank(self.tp_group)
         self.ep_size = utils.get_pg_size(self.ep_group)
 
+        # Attributes that need to be captured in cudagraph. These attributes are returned
+        # as cudagraph outputs when the cuda_graph_scope contains moe_preprocess.
+        self.cudagraph_attrs = []
+        self.valid_cudagraph_attrs = None
+
     @abstractmethod
     def dispatch_preprocess(
         self, tokens: torch.Tensor, routing_map: torch.Tensor, probs: torch.Tensor
@@ -242,6 +247,10 @@ def __init__(
         # device token permutation is enabled and **AllGahter** is performed.
         self.global_local_map = None
 
+        # Attributes that need to be captured in cudagraph. These attributes are returned
+        # as cudagraph outputs when the cuda_graph_scope contains moe_preprocess.
+        self.cudagraph_attrs = ['routing_map']
+
     def dispatch_preprocess(
         self, hidden_states: torch.Tensor, routing_map: torch.Tensor, probs: torch.Tensor
     ):
@@ -434,12 +443,38 @@ def __init__(
             "before_finish": 3,
             "no_sync": 4,
         }
-        self.cuda_dtoh_point = "before_permutation_1"
+        if (
+            config.cuda_graph_impl == "transformer_engine"
+            and 'moe_preprocess' in config.cuda_graph_scope
+        ):
+            self.cuda_dtoh_point = "before_ep_alltoall"
+        else:
+            self.cuda_dtoh_point = "before_permutation_1"
         if MoEAlltoAllTokenDispatcher.cuda_dtoh_stream is None:
             MoEAlltoAllTokenDispatcher.cuda_dtoh_stream = torch.cuda.Stream()
 
+        # Attributes that need to be captured in cudagraph. These attributes are returned
+        # as cudagraph outputs when the cuda_graph_scope contains moe_preprocess.
+        self.cudagraph_attrs = [
+            'tokens_per_expert',
+            'input_splits',
+            'output_splits',
+            'output_splits_tp',
+            'num_out_tokens',
+            'num_global_tokens_per_local_expert',
+            'reversed_local_input_permutation_mapping',
+            'routing_map',
+        ]
+
         self.shared_experts = None
 
+    def set_shared_experts(self, shared_experts):
+        """Set shared expert to the dispatcher."""
+        super().set_shared_experts(shared_experts)
+        if shared_experts.use_shared_expert_gate:
+            self.cudagraph_attrs.append('shared_experts.gate_score')
+        self.cudagraph_attrs.append('shared_experts.cached_fc1_input')
+
     def preprocess(self, routing_map: torch.Tensor) -> torch.Tensor:
         """
         Preprocesses the token routing map for All-to-All communication and token permutation.
@@ -1341,6 +1376,7 @@ def __init__(
                 num_experts=self.tp_size * self.config.num_moe_experts,
                 config=self.config,
             )
+            self.cudagraph_attrs = ['_comm_manager.token_probs', '_comm_manager.token_indices']
         elif self.config.moe_flex_dispatcher_backend == "hybridep":
             self._comm_manager = _HybridEPManager(
                 group=self.tp_ep_group,
@@ -1348,6 +1384,7 @@ def __init__(
                 num_experts=self.tp_size * self.config.num_moe_experts,
                 config=self.config,
             )
+            self.cudagraph_attrs = ['_comm_manager.token_probs', '_comm_manager.routing_map']
         else:
             raise ValueError(
                 f"Invalid backend: {self.config.moe_flex_dispatcher_backend}"
diff --git a/megatron/core/transformer/transformer_block.py b/megatron/core/transformer/transformer_block.py
index 06e8f1372f4..aa0b94a7c33 100755
--- a/megatron/core/transformer/transformer_block.py
+++ b/megatron/core/transformer/transformer_block.py
@@ -525,7 +525,7 @@ def _should_call_local_cudagraph(self, *args, **kwargs):
                 kwargs.get('inference_context') is not None
                 or kwargs.get('inference_params') is not None
             )
-            and self.config.cuda_graph_scope == 'full_iteration'
+            and 'full_iteration' in self.config.cuda_graph_scope
         ):
             if kwargs['inference_context'].is_static_batching():
                 using_cuda_graph = kwargs['inference_context'].is_decode_only()
diff --git a/megatron/core/transformer/transformer_config.py b/megatron/core/transformer/transformer_config.py
index 9f1b112ba83..722329b7ee2 100644
--- a/megatron/core/transformer/transformer_config.py
+++ b/megatron/core/transformer/transformer_config.py
@@ -1,6 +1,5 @@
 # Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
-import logging
 import warnings
 from dataclasses import dataclass
 from typing import Callable, List, Literal, Optional, Tuple, Union
@@ -30,8 +29,6 @@
 except ImportError:
     HAVE_PACKAGING = False
 
-logger = logging.getLogger(__name__)
-
 
 @dataclass
 class TransformerConfig(ModelParallelConfig):
@@ -693,11 +690,10 @@ class TransformerConfig(ModelParallelConfig):
     excluding optimizer) is enabled.
     "transformer_engine": capture the CUDA graph using TE make_graphed_callables()."""
 
-    cuda_graph_scope: str = "full"
+    cuda_graph_scope: Optional[List[str]] = None
     """Determines the CUDA graphs capturing scope.
-    When cuda_graph_impl is set to "transformer_engine", valid values are "full" and "attn".
-    "Full" scope captures a whole Transformer layer. "Attn" scope only captures operations in
-    TransformerLayer._forward_attention().
+    When cuda_graph_impl is set to "transformer_engine", valid values are "attn", "mlp", "moe",
+    "moe_router", "moe_preprocess", "mamba". None means ["attn", "mlp"].
     When cuda_graph_impl is set to "local", "full_iteration" can be specified as cuda_graph_scope
     to enable whole iteration CUDA graph. All other values enable layerwise CUDA graph."""
 
@@ -944,7 +940,7 @@ def __post_init__(self):
         if self.moe_enable_deepep:
             if self.moe_token_dispatcher_type != "flex":
                 raise ValueError("DeepEP backend is only supported with flex token dispatcher.")
-            logger.warning(
+            warnings.warn(
                 "moe_enable_deepep is deprecated."
                 "Please use --moe-flex-dispatcher-backend=deepep instead."
             )
@@ -1562,24 +1558,133 @@ def __post_init__(self):
             ], f"Invalid cuda graph implementation: {self.cuda_graph_impl}"
             if self.cpu_offloading:
                 raise ValueError("CUDA graphs not supported with CPU offloading.")
-            if self.recompute_granularity:
-                if (
-                    self.recompute_granularity != "selective"
-                    or self.cuda_graph_impl != "transformer_engine"
-                    or self.cuda_graph_scope != "attn"
-                ):
-                    raise ValueError("CUDA graphs not supported with activation recomputation.")
+
+            if self.cuda_graph_scope is None:
+                self.cuda_graph_scope = []
+            elif not isinstance(self.cuda_graph_scope, list):
+                assert isinstance(self.cuda_graph_scope, str), (
+                    "cuda_graph_scope must be a string or a list of strings, "
+                    f"got {self.cuda_graph_scope}."
+                )
+                self.cuda_graph_scope = [self.cuda_graph_scope]
+
+            if self.cuda_graph_impl == "local":
+                assert not self.cuda_graph_scope or self.cuda_graph_scope == ["full_iteration"], (
+                    "For local cuda graph implementation, the only valid value "
+                    "for cuda_graph_scope is full_iteration. "
+                    "To use other scopes, use cuda_graph_impl=transformer_engine."
+                )
+
+            if self.cuda_graph_impl == "transformer_engine":
+                assert "full_iteration" not in self.cuda_graph_scope, (
+                    "To use full iteration cuda graph, please use "
+                    "cuda_graph_impl=transformer_engine instead of cuda_graph_impl=local."
+                )
+                for scope in self.cuda_graph_scope:
+                    assert scope in [
+                        'attn',
+                        'mlp',
+                        'moe',
+                        'moe_router',
+                        'moe_preprocess',
+                        'mamba',
+                    ], (
+                        "--cuda-graph-scope should be attn, mlp, moe, moe_router, moe_preprocess, "
+                        f"or mamba, got {self.cuda_graph_scope}."
+                    )
+
+                assert (
+                    'moe' not in self.cuda_graph_scope or 'moe_router' not in self.cuda_graph_scope
+                ), 'cuda_graph_scope must not contain both moe and moe_router.'
+                if 'moe_preprocess' in self.cuda_graph_scope:
+                    assert (
+                        'moe_router' in self.cuda_graph_scope
+                    ), 'moe_preprocess cuda graph is only supported with moe_router cuda graph.'
+                if self.num_moe_experts is None or self.num_moe_experts <= 1:
+                    assert (
+                        'moe' not in self.cuda_graph_scope
+                        and 'moe_router' not in self.cuda_graph_scope
+                    ), 'moe cuda graph is only supported for MoE.'
                 else:
-                    for module in self.recompute_modules:
-                        if module in ['core_attn', 'mla_up_proj']:
-                            raise ValueError(
-                                f'attn cuda graph is not supported with {module} recompute.'
+                    if self.moe_layer_freq == 1 or (
+                        isinstance(self.moe_layer_freq, list) and 0 not in self.moe_layer_freq
+                    ):
+                        assert 'mlp' not in self.cuda_graph_scope, (
+                            'mlp cuda graph is only supported for dense layers, '
+                            'but not found in the model.'
+                        )
+                    if (
+                        self.moe_expert_capacity_factor is None
+                        or not self.moe_pad_expert_input_to_capacity
+                    ):
+                        assert (
+                            'moe' not in self.cuda_graph_scope
+                        ), 'moe cuda graph is only supported with drop-padding MoE.'
+                        if self.moe_token_dispatcher_type == 'alltoall' and (
+                            self.moe_expert_capacity_factor is not None
+                            or self.moe_router_padding_for_fp8
+                        ):
+                            assert 'moe_preprocess' not in self.cuda_graph_scope, (
+                                'moe_preprocess cuda graph is not supported when there are '
+                                'DtoH copies and synchronizations in the preprocess step.'
                             )
+
+            if self.recompute_granularity:
+                if self.recompute_granularity != "selective" or not self.cuda_graph_scope:
+                    raise ValueError(
+                        "Full-layer CUDA graphs not supported with activation recomputation."
+                    )
+                elif self.cuda_graph_scope != ['full_iteration']:
+                    # For scoped CUDA graphs, only the non-graphed parts of the layer can be
+                    # recomputed. So check if there are overlaps between the recomputed parts
+                    # and the graphed parts.
+                    if "attn" in self.cuda_graph_scope:
+                        for module in self.recompute_modules:
+                            if module in ['core_attn', 'mla_up_proj']:
+                                raise ValueError(
+                                    f'attn cuda graph is not supported with {module} recompute.'
+                                )
+                    if "mlp" in self.cuda_graph_scope and "mlp" in self.recompute_modules:
+                        raise ValueError(f'mlp cuda graph is not supported with mlp recompute.')
+                    if "moe" in self.cuda_graph_scope:
+                        for module in self.recompute_modules:
+                            if module in ['moe_act', 'moe', 'shared_experts']:
+                                raise ValueError(
+                                    f'moe cuda graph is not supported with {module} recompute.'
+                                )
+                    if "moe_router" in self.cuda_graph_scope:
+                        for module in self.recompute_modules:
+                            if module in ['moe', 'shared_experts']:
+                                raise ValueError(
+                                    f'moe_router cuda graph is not supported with {module} '
+                                    'recompute.'
+                                )
                     if "layernorm" in self.recompute_modules:
-                        warnings.warn(
-                            "input_layernorm recompute is not supported with attention "
-                            "cudagraph. Will only recompute the pre_mlp_layernorm."
-                        )
+                        if (
+                            "attn" in self.cuda_graph_scope
+                            and "mlp" in self.cuda_graph_scope
+                            and (
+                                "moe" in self.cuda_graph_scope
+                                or "moe_router" in self.cuda_graph_scope
+                            )
+                        ):
+                            raise ValueError(
+                                'cuda graph is not supported with layernorm recompute.'
+                            )
+                        if "attn" in self.cuda_graph_scope:
+                            warnings.warn(
+                                "input_layernorm recompute is not supported with attention "
+                                "cudagraph. Will only recompute the pre_mlp_layernorm."
+                            )
+                        if (
+                            "mlp" in self.cuda_graph_scope
+                            or "moe" in self.cuda_graph_scope
+                            or "moe_router" in self.cuda_graph_scope
+                        ):
+                            warnings.warn(
+                                "pre_mlp_layernorm recompute is not supported with mlp/moe "
+                                "cudagraph. Will only recompute the input_layernorm."
+                            )
 
         if self.moe_token_dispatcher_type in ["allgather"]:
             if self.variable_seq_lengths is True:
diff --git a/megatron/core/transformer/transformer_layer.py b/megatron/core/transformer/transformer_layer.py
index c36ff7515e4..cacfb9d01b8 100644
--- a/megatron/core/transformer/transformer_layer.py
+++ b/megatron/core/transformer/transformer_layer.py
@@ -15,6 +15,7 @@
 from megatron.core.dist_checkpointing.utils import apply_prefix_mapping
 from megatron.core.packed_seq_params import PackedSeqParams
 from megatron.core.process_groups_config import ProcessGroupCollection
+from megatron.core.transformer.cuda_graphs import is_graph_capturing
 from megatron.core.transformer.enums import LayerType
 from megatron.core.transformer.identity_op import IdentityFuncOp, IdentityOp
 from megatron.core.transformer.mlp import MLP
@@ -371,19 +372,29 @@ def __init__(
         # [Module 9: BiasDropoutFusion]
         self.mlp_bda = build_module(submodules.mlp_bda)
 
+        self.is_moe_layer = isinstance(self.mlp, MoELayer)
+
         self.recompute_input_layernorm = False
         self.recompute_pre_mlp_layernorm = False
         self.recompute_mlp = False
         if self.config.recompute_granularity == 'selective':
             if "layernorm" in self.config.recompute_modules:
-                if (
-                    not isinstance(self.input_layernorm, IdentityOp)
-                    and self.config.cuda_graph_impl == "none"
+                if not isinstance(self.input_layernorm, IdentityOp) and (
+                    self.config.cuda_graph_impl == "none"
+                    or 'attn' not in self.config.cuda_graph_scope
                 ):
                     self.recompute_input_layernorm = True
                     if self.config.fp8:
                         self.self_attention.set_for_recompute_input_layernorm()
-                if not isinstance(self.pre_mlp_layernorm, IdentityOp):
+                if not isinstance(self.pre_mlp_layernorm, IdentityOp) and (
+                    self.config.cuda_graph_impl == "none"
+                    or (not self.is_moe_layer and 'mlp' not in self.config.cuda_graph_scope)
+                    or (
+                        self.is_moe_layer
+                        and 'moe' not in self.config.cuda_graph_scope
+                        and 'moe_router' not in self.config.cuda_graph_scope
+                    )
+                ):
                     self.recompute_pre_mlp_layernorm = True
                     if self.config.fp8:
                         if isinstance(self.mlp, MoELayer):
@@ -395,7 +406,7 @@ def __init__(
 
                             set_save_original_input(self.mlp.linear_fc1)
             if "mlp" in self.config.recompute_modules:
-                if not isinstance(self.mlp, MoELayer):
+                if not self.is_moe_layer:
                     self.recompute_mlp = True
         self.offload_attn_norm = (
             self.config.fine_grained_activation_offloading
@@ -588,7 +599,6 @@ def _forward_mlp(self, hidden_states, inference_context=None):
         """
 
         from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
-            fine_grained_offloading_group_commit,
             fine_grained_offloading_group_start,
             get_fine_grained_offloading_context,
         )
@@ -618,7 +628,19 @@ def _forward_mlp(self, hidden_states, inference_context=None):
             and not isinstance(self.mlp, IdentityOp)
         )
 
-        if self.recompute_mlp:
+        if (
+            self.is_moe_layer
+            and self.config.cuda_graph_impl == "transformer_engine"
+            and self.training
+            and is_graph_capturing()
+            and 'moe_router' in self.config.cuda_graph_scope
+        ):
+            assert (
+                not self.recompute_pre_mlp_layernorm
+            ), "Recomputation is not supported for CUDA graph."
+            cudagraph_outputs = self.mlp(pre_mlp_layernorm_output)
+            return cudagraph_outputs + [residual]
+        elif self.recompute_mlp:
             if self.config.fp8:
                 # import here to avoid circular import
                 from megatron.core.extensions.transformer_engine import te_checkpoint
@@ -647,7 +669,6 @@ def _forward_mlp(self, hidden_states, inference_context=None):
             bias_chunks = [bias for _, bias in outputs if bias is not None]
             bias_output = torch.stack(bias_chunks, dim=0).sum(dim=0) if bias_chunks else None
             mlp_output_with_bias = (mlp_output, bias_output)
-
         else:
             mlp_output_with_bias = self.mlp(pre_mlp_layernorm_output)
 
@@ -659,6 +680,23 @@ def _forward_mlp(self, hidden_states, inference_context=None):
             )
         nvtx_range_pop(suffix="mlp")
 
+        return self._forward_post_mlp(mlp_output_with_bias, residual)
+
+    def _forward_post_mlp(self, mlp_output_with_bias, residual):
+        """
+        Perform operations after the MLP computation.
+
+        Args:
+            mlp_output_with_bias (Tensor): Output tensor of the MLP layer with bias.
+            residual (Tensor): Residual tensor.
+
+        Returns:
+            output (Tensor): Transformed hidden states of shape [s, b, h].
+        """
+        from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
+            fine_grained_offloading_group_commit,
+        )
+
         # TODO: could we move `bias_dropout_add_exec_handler` itself
         # inside the module provided in the `bias_dropout_add_spec` module?
         nvtx_range_push(suffix="mlp_bda")
@@ -717,7 +755,9 @@ def get_layer_static_inputs(self, seq_length, micro_batch_size):
         """
         static_inputs = super().get_layer_static_inputs(seq_length, micro_batch_size)
 
-        if not isinstance(self.self_attention, IdentityOp):
+        if not isinstance(self.self_attention, IdentityOp) and (
+            not self.config.cuda_graph_scope or 'attn' in self.config.cuda_graph_scope
+        ):
             slen_per_cp = seq_length // self.config.context_parallel_size
             static_inputs["attention_mask"] = (
                 ~(torch.tril(torch.ones((slen_per_cp, seq_length))).bool())
@@ -731,18 +771,28 @@ def _get_submodules_under_cudagraphs(self):
         """
         Get the submodules that are covered by cudagraphs.
         """
-        if self.config.cuda_graph_scope == 'full':
-            submodules = [self]
-        else:
-            assert (
-                self.config.cuda_graph_scope == 'attn'
-            ), f"Invalid cuda_graph_scope {self.config.cuda_graph_scope}"
-            submodules = [
+        if not self.config.cuda_graph_scope:
+            return super()._get_submodules_under_cudagraphs()
+
+        submodules = []
+        if 'attn' in self.config.cuda_graph_scope:
+            submodules += [
                 self.input_layernorm,
                 self.self_attention,
                 self.pre_cross_attn_layernorm,
                 self.cross_attention,
             ]
+        if (not self.is_moe_layer and 'mlp' in self.config.cuda_graph_scope) or (
+            self.is_moe_layer and 'moe' in self.config.cuda_graph_scope
+        ):
+            submodules += [self.pre_mlp_layernorm, self.mlp]
+        elif self.is_moe_layer and 'moe_router' in self.config.cuda_graph_scope:
+            submodules += [self.pre_mlp_layernorm, self.mlp.router]
+            if (
+                self.config.moe_shared_expert_intermediate_size is not None
+                and not self.config.moe_shared_expert_overlap
+            ):
+                submodules += [self.mlp.shared_experts]
         return submodules
 
     def _te_cuda_graph_capture(self, *args, **kwargs):
@@ -753,12 +803,31 @@ def _te_cuda_graph_capture(self, *args, **kwargs):
            attribute can be set to control the scope of the CUDA graph.
         2. If context is None, it cannot be returned as output.
         """
-        hidden_states, context = self._forward_attention(*args, **kwargs)
-
-        if self.config.cuda_graph_scope == "full":
+        context = None
+        if not self.config.cuda_graph_scope or 'attn' in self.config.cuda_graph_scope:
+            hidden_states, context = self._forward_attention(*args, **kwargs)
+        else:
+            if len(args) > 0:
+                hidden_states = args[0]
+            else:
+                hidden_states = kwargs.pop("hidden_states")
+
+        if (
+            not self.config.cuda_graph_scope
+            or (not self.is_moe_layer and 'mlp' in self.config.cuda_graph_scope)
+            or (
+                self.is_moe_layer
+                and (
+                    'moe' in self.config.cuda_graph_scope
+                    or 'moe_router' in self.config.cuda_graph_scope
+                )
+            )
+        ):
             hidden_states = self._forward_mlp(hidden_states)
-        cuda_graph_outputs = [hidden_states]
-
+        if not isinstance(hidden_states, list) and not isinstance(hidden_states, tuple):
+            cuda_graph_outputs = [hidden_states]
+        else:
+            cuda_graph_outputs = list(hidden_states)
         if context is not None:
             cuda_graph_outputs.append(context)
         return tuple(cuda_graph_outputs)
@@ -770,6 +839,11 @@ def _te_cuda_graph_replay(self, *args, **kwargs):
         However, CUDA graph accepts only Tensor inputs.
         Hence, `inference_context` and `packed_seq_params` are excluded from input list.
         """
+        context = None
+        if self.config.cuda_graph_scope and 'attn' not in self.config.cuda_graph_scope:
+            hidden_states, context = self._forward_attention(*args, **kwargs)
+            args = (hidden_states,)
+            kwargs = {}
 
         assert (kwargs.get('inference_context') is None) and (
             kwargs.get('packed_seq_params') is None
@@ -779,19 +853,69 @@ def _te_cuda_graph_replay(self, *args, **kwargs):
             "For inference cuda graph, please use cuda_graph_impl=local instead."
         )
 
-        cuda_graph_output = super()._te_cuda_graph_replay(*args, **kwargs)
+        cuda_graph_output = list(super()._te_cuda_graph_replay(*args, **kwargs))
 
         if kwargs.get('context') is not None:
-            context = cuda_graph_output[-1]
-            cuda_graph_output = cuda_graph_output[:-1]
+            context = cuda_graph_output.pop()
+
+        if (
+            not self.config.cuda_graph_scope
+            or (not self.is_moe_layer and 'mlp' in self.config.cuda_graph_scope)
+            or (self.is_moe_layer and 'moe' in self.config.cuda_graph_scope)
+        ):
+            # CUDA Graph captures the whole MLP/MoE part. CUDA Graph output is the layer output.
+            assert len(cuda_graph_output) == 1, "CUDA Graph output should be the layer output."
+            output = cuda_graph_output.pop()
+        elif self.is_moe_layer and 'moe_router' in self.config.cuda_graph_scope:
+            # CUDA Graph partially captures the MoE.
+            # The rest of the layer should go to the normal pass.
+            shared_expert_output, routing_map, residual = None, None, None
+            mlp_residual = cuda_graph_output.pop()
+            if (
+                self.config.moe_shared_expert_intermediate_size is not None
+                and not self.config.moe_shared_expert_overlap
+            ):
+                # The shared expert output is the fourth element in the CUDA graph output.
+                shared_expert_output = cuda_graph_output.pop()
+
+            # Split cudagraph outputs into function outputs and attribute outputs, and
+            # process them separately. Function outputs should have three tensors.
+            func_output, attr_outputs = cuda_graph_output[:3], cuda_graph_output[3:]
+            if 'moe_preprocess' in self.config.cuda_graph_scope:
+                hidden_states, probs, residual = func_output
+                valid_cudagraph_attrs = self.mlp.token_dispatcher.valid_cudagraph_attrs
+                assert len(attr_outputs) == len(
+                    valid_cudagraph_attrs
+                ), f"attr_outputs: {len(attr_outputs)} != {len(valid_cudagraph_attrs)}"
+                for i, attr_name in enumerate(valid_cudagraph_attrs):
+                    hier_attr_name = attr_name.split('.')
+                    attr = self.mlp.token_dispatcher
+                    for name in hier_attr_name[:-1]:
+                        attr = getattr(attr, name)
+                    setattr(attr, hier_attr_name[-1], attr_outputs[i])
+            else:
+                hidden_states, probs, routing_map = func_output
+                assert not attr_outputs, "cuda_graph_attr_outputs should be empty"
+
+            # Resume the MoELayer forward pass from the end of the CUDA graph scope.
+            # The MoE layer will skip redundant computations when we pass in the calculated values
+            # through the keyword arguments. See MoELayer.forward docstring for more details.
+            nvtx_range_push(suffix="mlp")
+            self.mlp.cudagraph_tensor_store.set(
+                hidden_states=hidden_states,
+                probs=probs,
+                routing_map=routing_map,
+                residual=residual,
+                shared_expert_output=shared_expert_output,
+            )
+            mlp_output_with_bias = self.mlp(hidden_states)
+            self.mlp.cudagraph_tensor_store.clear()
+            nvtx_range_pop(suffix="mlp")
+
+            output = self._forward_post_mlp(mlp_output_with_bias, mlp_residual)
         else:
-            context = None
-        if self.config.cuda_graph_scope == "attn":
-            # CUDA Graph only covers the attention layer. Feed-forward
-            # layer still goes through the normal pass.
+            # CUDA Graph does not capture the MLP/MoE part at all.
             output = self._forward_mlp(*cuda_graph_output)
-        else:
-            output = cuda_graph_output[0]
         return output, context
 
     def _get_te_cuda_graph_replay_args(self, *args, **kwargs):
@@ -864,7 +988,7 @@ def _should_call_local_cudagraph(self, *args, **kwargs):
                 (kwargs.get('inference_context') is not None)
                 or (kwargs.get('inference_params') is not None)
             )
-            and self.config.cuda_graph_scope != 'full_iteration'
+            and 'full_iteration' not in self.config.cuda_graph_scope
         ):
             if kwargs['inference_context'].is_static_batching():
                 using_cuda_graph = kwargs['inference_context'].is_decode_only()
diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py
index cd1de6a5118..9174df437e3 100644
--- a/megatron/training/arguments.py
+++ b/megatron/training/arguments.py
@@ -772,7 +772,7 @@ def validate_args(args, defaults={}):
             if args.rank == 0:
                 print('accumulate and all-reduce gradients in fp32 for '
                       'bfloat16 data type.', flush=True)
-    if args.cuda_graph_impl == "local" and args.cuda_graph_scope=="full_iteration":
+    if args.cuda_graph_impl == "local" and "full_iteration" in args.cuda_graph_scope:
         if not args.inference_dynamic_batching:
             assert not args.check_for_nan_in_loss_and_grad, \
             "--no-check-for-nan-in-loss-and-grad should be set with full_iteration CUDA graph"
@@ -1232,9 +1232,12 @@ def validate_args(args, defaults={}):
         if args.transformer_impl == 'transformer_engine' and not args.te_rng_tracker:
             args.te_rng_tracker = True
             warn_rank_0("te_rng_tracker is not enabled, enabling it for CUDA graphs.", args.rank)
-        assert "expandable_segments:True" not in os.getenv("PYTORCH_CUDA_ALLOC_CONF", ""), (
-            "expandable_segments:True may not be safe when using CUDA Graphs with some specific parallel settings. "
-            "The training may crash with illegal memory access."
+        assert (
+            "expandable_segments:True" not in os.getenv("PYTORCH_CUDA_ALLOC_CONF", "")
+            or os.getenv("NCCL_GRAPH_REGISTER", "") == "0"
+        ), (
+            "Setting NCCL_GRAPH_REGISTER=0 to avoid illegal memory access when using "
+            "CUDA Graph with PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True."
         )
         assert (
             args.recompute_granularity != 'full'
@@ -1438,22 +1441,27 @@ def _add_inference_args(parser):
                        help="Number of CUDA graph warmup steps")
     group.add_argument('--external-cuda-graph', action='store_true',
                        help='Deprecated. Use --cuda-graph-impl=transformer_engine instead. '
-                       'Use TE make_graphed_callables() to capture the CUDA graph.')
+                       'Use TE make_graphed_callables() to capture the CUDA graph. '
+                       'Use --cuda-graph-scope=\"attn\", \"mlp\", \"moe\", \"moe_router\", \"moe_preprocess\", \"mamba\" for partial capture. ')
     group.add_argument('--cuda-graph-impl', type=str, default='none',
                        choices=['none', 'local', 'transformer_engine'],
                        help='Determines the CUDA graph capture implementation. '
                        '"none": no CUDA graph. '
                        '"local": capture the CUDA graph using MCore local implementation. --cuda-graph-scope=\"full_iteration\" enables whole iteration CUDA graph. '
                        '"transformer_engine": capture the CUDA graph using TE make_graphed_callables().')
-    group.add_argument('--cuda-graph-scope', type=str, default='full',
-                       choices=['full', 'attn', 'full_iteration'],
-                       help='Determines the CUDA graphs capturing scope. Valid values are '
-                       '\"full\", \"attn\" and \"full_iteration\". \"Full\" scope captures a whole '
-                       'Transformer layer. \"Attn\" scope only captures operations in '
-                       'TransformerLayer._forward_attention(). \"ful_iteration\" scope captures a '
-                       'whole iteration. '
-                       'full_iteration scope is only supported with --cuda-graph-impl=local, '
-                       'attn scope is only supported with --cuda-graph-impl=transformer_engine.')
+    group.add_argument('--cuda-graph-scope', nargs='+', type=str, default=[],
+                       help='Determines the CUDA graphs capturing scope. '
+                       'choices: "attn", "mlp", "moe", "moe_router", "moe_preprocess", "mamba", "full_iteration". '
+                       '"attn": captures operations in TransformerLayer._forward_attention(). '
+                       '"mlp": captures operations in TransformerLayer._forward_mlp() for a dense layer. '
+                       '"moe": captures operations in TransformerLayer._forward_mlp() for a MoE layer. '
+                       '"moe_router": captures operations in TransformerLayer._forward_mlp() up to MoELayer.router(), '
+                       'including the shared experts if they are not overlapped with EP comm. '
+                       '"moe_preprocess": captures operations in MoELayer.preprocess(). Must be used together with "moe_router". '
+                       '"mamba": captures the mamba layer. '
+                       '"full_iteration": captures a whole iteration. '
+                       'full_iteration scope is only supported with --cuda-graph-impl=local, other scopes are only supported with --cuda-graph-impl=transformer_engine. '
+                       'If not specified, the default scope is to capture the whole Transformer layer.')
     group.add_argument('--inference-max-requests', type=int, default=8,
                        help='Maximum number of requests for inference.',
                        dest='inference_max_batch_size')
diff --git a/megatron/training/training.py b/megatron/training/training.py
index bda9e42dc82..27a64f10fee 100644
--- a/megatron/training/training.py
+++ b/megatron/training/training.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 """Pretrain utilities."""
 
@@ -2235,7 +2235,7 @@ def train(
     eval_iterations = 0
     # Wrap forward_backward_func for Full iteration CUDA graph
     forward_backward_func = get_forward_backward_func()
-    if args.cuda_graph_impl == "local" and args.cuda_graph_scope=="full_iteration":
+    if args.cuda_graph_impl == "local" and "full_iteration" in args.cuda_graph_scope:
         forward_backward_func = FullCudaGraphWrapper(forward_backward_func, cuda_graph_warmup_steps=args.cuda_graph_warmup_steps)
 
     def get_e2e_base_metrics():
@@ -2366,12 +2366,13 @@ def get_e2e_base_metrics():
         # Capture CUDA Graphs.
         if (
             args.cuda_graph_impl == "transformer_engine"
-            and iteration == args.cuda_graph_warmup_steps
+            and not cuda_graph_helper.graphs_created()
+            and iteration - start_iteration == args.cuda_graph_warmup_steps
         ):
-            if iteration > start_iteration and should_disable_forward_pre_hook(args):
+            if args.cuda_graph_warmup_steps > 0 and should_disable_forward_pre_hook(args):
                 disable_forward_pre_hook(model, param_sync=False)
             cuda_graph_helper.create_cudagraphs()
-            if iteration > start_iteration and should_disable_forward_pre_hook(args):
+            if args.cuda_graph_warmup_steps > 0 and should_disable_forward_pre_hook(args):
                 enable_forward_pre_hook(model)
                 cuda_graph_helper.cuda_graph_set_manual_hooks()
 
@@ -2446,8 +2447,11 @@ def get_e2e_base_metrics():
                     # Set the manual hooks here since it's not set right after the capturing.
                     if (
                         args.cuda_graph_impl == "transformer_engine"
-                        and iteration == args.cuda_graph_warmup_steps
+                        and args.cuda_graph_warmup_steps == 0
                     ):
+                        assert (
+                            cuda_graph_helper.graphs_created()
+                        ), "CUDA Graphs should have been created."
                         cuda_graph_helper.cuda_graph_set_manual_hooks()
 
         iteration += 1
@@ -2647,7 +2651,7 @@ def evaluate(
     eval_batch_size = args.global_batch_size
     eval_num_microbatches = eval_batch_size // (args.micro_batch_size * args.data_parallel_size)
     forward_backward_func = get_forward_backward_func()
-    if args.cuda_graph_impl == "local" and args.cuda_graph_scope=="full_iteration":
+    if args.cuda_graph_impl == "local" and "full_iteration" in args.cuda_graph_scope:
         forward_backward_func = FullCudaGraphWrapper(forward_backward_func, cuda_graph_warmup_steps=args.cuda_graph_warmup_steps)
 
     if eval_iters is None:
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_scoped_cudagraph/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_scoped_cudagraph/golden_values_dev_dgx_h100.json
new file mode 100644
index 00000000000..309b2533461
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_scoped_cudagraph/golden_values_dev_dgx_h100.json
@@ -0,0 +1,644 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.93663,
+            "2": 10.9327,
+            "3": 10.94263,
+            "4": 10.94969,
+            "5": 10.95052,
+            "6": 10.94157,
+            "7": 10.94484,
+            "8": 10.93674,
+            "9": 10.94996,
+            "10": 10.93686,
+            "11": 10.94102,
+            "12": 10.93763,
+            "13": 10.9235,
+            "14": 10.93428,
+            "15": 10.88791,
+            "16": 10.87434,
+            "17": 10.86896,
+            "18": 10.86065,
+            "19": 10.86311,
+            "20": 10.78063,
+            "21": 10.73125,
+            "22": 10.60283,
+            "23": 10.73278,
+            "24": 10.61888,
+            "25": 10.55212,
+            "26": 10.62704,
+            "27": 10.6391,
+            "28": 10.5908,
+            "29": 10.59809,
+            "30": 10.37777,
+            "31": 10.1201,
+            "32": 10.46078,
+            "33": 10.45538,
+            "34": 10.20107,
+            "35": 10.25779,
+            "36": 10.20889,
+            "37": 10.33688,
+            "38": 10.16827,
+            "39": 10.40875,
+            "40": 10.05239,
+            "41": 10.09432,
+            "42": 10.17894,
+            "43": 9.74205,
+            "44": 9.8904,
+            "45": 9.74009,
+            "46": 9.72707,
+            "47": 10.09139,
+            "48": 9.75298,
+            "49": 9.40106,
+            "50": 9.83667,
+            "51": 9.77071,
+            "52": 9.65705,
+            "53": 10.03051,
+            "54": 9.87899,
+            "55": 9.79604,
+            "56": 9.52924,
+            "57": 9.36583,
+            "58": 9.75331,
+            "59": 9.48065,
+            "60": 9.40785,
+            "61": 9.60145,
+            "62": 9.90753,
+            "63": 9.2583,
+            "64": 9.68397,
+            "65": 8.80003,
+            "66": 9.60779,
+            "67": 9.25408,
+            "68": 9.71438,
+            "69": 9.71682,
+            "70": 9.6617,
+            "71": 9.52466,
+            "72": 9.47116,
+            "73": 9.38822,
+            "74": 8.80223,
+            "75": 9.33966,
+            "76": 8.93574,
+            "77": 9.99333,
+            "78": 9.64731,
+            "79": 9.28114,
+            "80": 9.29588,
+            "81": 9.39589,
+            "82": 9.60893,
+            "83": 9.21629,
+            "84": 9.33891,
+            "85": 9.52979,
+            "86": 8.95817,
+            "87": 9.51641,
+            "88": 9.68228,
+            "89": 9.50664,
+            "90": 9.75348,
+            "91": 9.23465,
+            "92": 9.25972,
+            "93": 8.94517,
+            "94": 8.69188,
+            "95": 9.44591,
+            "96": 9.4101,
+            "97": 9.20087,
+            "98": 9.58175,
+            "99": 8.75818,
+            "100": 9.29466
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 22750260.0,
+            "2": 22953110.0,
+            "3": 22604450.0,
+            "4": 23266322.0,
+            "5": 22735560.0,
+            "6": 23061920.0,
+            "7": 22793342.0,
+            "8": 22960820.0,
+            "9": 22865664.0,
+            "10": 22950364.0,
+            "11": 22499674.0,
+            "12": 22456088.0,
+            "13": 22948060.0,
+            "14": 22384512.0,
+            "15": 22846272.0,
+            "16": 22856858.0,
+            "17": 22836412.0,
+            "18": 22590058.0,
+            "19": 22627048.0,
+            "20": 22712308.0,
+            "21": 22762624.0,
+            "22": 22816888.0,
+            "23": 22545124.0,
+            "24": 22794440.0,
+            "25": 22841936.0,
+            "26": 22549680.0,
+            "27": 22464820.0,
+            "28": 22453684.0,
+            "29": 22534640.0,
+            "30": 22636152.0,
+            "31": 22989488.0,
+            "32": 22594070.0,
+            "33": 22566010.0,
+            "34": 22855504.0,
+            "35": 22813688.0,
+            "36": 22595396.0,
+            "37": 22499360.0,
+            "38": 22926126.0,
+            "39": 22825392.0,
+            "40": 22675666.0,
+            "41": 22671586.0,
+            "42": 22682140.0,
+            "43": 23013940.0,
+            "44": 22764458.0,
+            "45": 22678992.0,
+            "46": 22915276.0,
+            "47": 22642868.0,
+            "48": 22954190.0,
+            "49": 23786668.0,
+            "50": 22934008.0,
+            "51": 23866222.0,
+            "52": 23807290.0,
+            "53": 24007532.0,
+            "54": 22871610.0,
+            "55": 23571284.0,
+            "56": 23954310.0,
+            "57": 24211632.0,
+            "58": 23914404.0,
+            "59": 23771838.0,
+            "60": 23813560.0,
+            "61": 23797288.0,
+            "62": 23739984.0,
+            "63": 23916692.0,
+            "64": 23895952.0,
+            "65": 24150562.0,
+            "66": 23796504.0,
+            "67": 25032232.0,
+            "68": 23673188.0,
+            "69": 23648580.0,
+            "70": 23903504.0,
+            "71": 24864636.0,
+            "72": 24767108.0,
+            "73": 24850612.0,
+            "74": 24132990.0,
+            "75": 24146528.0,
+            "76": 25025540.0,
+            "77": 24358472.0,
+            "78": 24910064.0,
+            "79": 23810516.0,
+            "80": 24821440.0,
+            "81": 25020512.0,
+            "82": 23851244.0,
+            "83": 24961024.0,
+            "84": 25144020.0,
+            "85": 24823608.0,
+            "86": 23153096.0,
+            "87": 24850204.0,
+            "88": 24749150.0,
+            "89": 22505554.0,
+            "90": 24059620.0,
+            "91": 23839038.0,
+            "92": 23874568.0,
+            "93": 24769548.0,
+            "94": 23992452.0,
+            "95": 25189838.0,
+            "96": 23909262.0,
+            "97": 24713068.0,
+            "98": 23832506.0,
+            "99": 23983474.0,
+            "100": 24101108.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 763142656.0,
+            "2": 778734592.0,
+            "3": 772525056.0,
+            "4": 803593216.0,
+            "5": 803593216.0,
+            "6": 803593216.0,
+            "7": 801299456.0,
+            "8": 803593216.0,
+            "9": 801840128.0,
+            "10": 803593216.0,
+            "11": 802987008.0,
+            "12": 803593216.0,
+            "13": 802987008.0,
+            "14": 801299456.0,
+            "15": 803593216.0,
+            "16": 801840128.0,
+            "17": 803593216.0,
+            "18": 802987008.0,
+            "19": 801299456.0,
+            "20": 803593216.0,
+            "21": 801299456.0,
+            "22": 803593216.0,
+            "23": 801299456.0,
+            "24": 803593216.0,
+            "25": 801299456.0,
+            "26": 803593216.0,
+            "27": 801299456.0,
+            "28": 803593216.0,
+            "29": 801299456.0,
+            "30": 803593216.0,
+            "31": 801299456.0,
+            "32": 803593216.0,
+            "33": 801840128.0,
+            "34": 803593216.0,
+            "35": 801840128.0,
+            "36": 803593216.0,
+            "37": 802987008.0,
+            "38": 801299456.0,
+            "39": 803593216.0,
+            "40": 801299456.0,
+            "41": 803593216.0,
+            "42": 801840128.0,
+            "43": 803593216.0,
+            "44": 801840128.0,
+            "45": 803593216.0,
+            "46": 801840128.0,
+            "47": 803593216.0,
+            "48": 801840128.0,
+            "49": 803593216.0,
+            "50": 801840128.0,
+            "51": 801299456.0,
+            "52": 803593216.0,
+            "53": 801299456.0,
+            "54": 803593216.0,
+            "55": 801840128.0,
+            "56": 803593216.0,
+            "57": 801840128.0,
+            "58": 803593216.0,
+            "59": 801840128.0,
+            "60": 803593216.0,
+            "61": 801299456.0,
+            "62": 803593216.0,
+            "63": 801299456.0,
+            "64": 802987008.0,
+            "65": 803593216.0,
+            "66": 801299456.0,
+            "67": 803593216.0,
+            "68": 801299456.0,
+            "69": 803593216.0,
+            "70": 801840128.0,
+            "71": 803593216.0,
+            "72": 801299456.0,
+            "73": 803593216.0,
+            "74": 803593216.0,
+            "75": 802987008.0,
+            "76": 803593216.0,
+            "77": 801840128.0,
+            "78": 803593216.0,
+            "79": 801299456.0,
+            "80": 802987008.0,
+            "81": 803593216.0,
+            "82": 801840128.0,
+            "83": 803593216.0,
+            "84": 801299456.0,
+            "85": 802987008.0,
+            "86": 803593216.0,
+            "87": 801840128.0,
+            "88": 803593216.0,
+            "89": 801299456.0,
+            "90": 802987008.0,
+            "91": 803593216.0,
+            "92": 801299456.0,
+            "93": 803593216.0,
+            "94": 801299456.0,
+            "95": 803593216.0,
+            "96": 801299456.0,
+            "97": 803593216.0,
+            "98": 801299456.0,
+            "99": 802987008.0,
+            "100": 803593216.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 993582592.0,
+            "2": 1210942464.0,
+            "3": 1210942464.0,
+            "4": 1210942464.0,
+            "5": 1210942464.0,
+            "6": 1210942464.0,
+            "7": 1210942464.0,
+            "8": 1210942464.0,
+            "9": 1210942464.0,
+            "10": 1210942464.0,
+            "11": 1210942464.0,
+            "12": 1210942464.0,
+            "13": 1210942464.0,
+            "14": 1210942464.0,
+            "15": 1210942464.0,
+            "16": 1210942464.0,
+            "17": 1210942464.0,
+            "18": 1210942464.0,
+            "19": 1210942464.0,
+            "20": 1210942464.0,
+            "21": 1210942464.0,
+            "22": 1210942464.0,
+            "23": 1210942464.0,
+            "24": 1210942464.0,
+            "25": 1210942464.0,
+            "26": 1210942464.0,
+            "27": 1210942464.0,
+            "28": 1210942464.0,
+            "29": 1210942464.0,
+            "30": 1210942464.0,
+            "31": 1210942464.0,
+            "32": 1210942464.0,
+            "33": 1210942464.0,
+            "34": 1210942464.0,
+            "35": 1210942464.0,
+            "36": 1210942464.0,
+            "37": 1210942464.0,
+            "38": 1210942464.0,
+            "39": 1210942464.0,
+            "40": 1210942464.0,
+            "41": 1210942464.0,
+            "42": 1210942464.0,
+            "43": 1210942464.0,
+            "44": 1210942464.0,
+            "45": 1210942464.0,
+            "46": 1210942464.0,
+            "47": 1210942464.0,
+            "48": 1210942464.0,
+            "49": 1210942464.0,
+            "50": 1210942464.0,
+            "51": 1210942464.0,
+            "52": 1210942464.0,
+            "53": 1210942464.0,
+            "54": 1210942464.0,
+            "55": 1210942464.0,
+            "56": 1210942464.0,
+            "57": 1210942464.0,
+            "58": 1210942464.0,
+            "59": 1210942464.0,
+            "60": 1210942464.0,
+            "61": 1210942464.0,
+            "62": 1210942464.0,
+            "63": 1210942464.0,
+            "64": 1210942464.0,
+            "65": 1210942464.0,
+            "66": 1210942464.0,
+            "67": 1210942464.0,
+            "68": 1210942464.0,
+            "69": 1210942464.0,
+            "70": 1210942464.0,
+            "71": 1210942464.0,
+            "72": 1210942464.0,
+            "73": 1210942464.0,
+            "74": 1210942464.0,
+            "75": 1210942464.0,
+            "76": 1210942464.0,
+            "77": 1210942464.0,
+            "78": 1210942464.0,
+            "79": 1210942464.0,
+            "80": 1210942464.0,
+            "81": 1210942464.0,
+            "82": 1210942464.0,
+            "83": 1210942464.0,
+            "84": 1210942464.0,
+            "85": 1210942464.0,
+            "86": 1210942464.0,
+            "87": 1210942464.0,
+            "88": 1210942464.0,
+            "89": 1210942464.0,
+            "90": 1210942464.0,
+            "91": 1210942464.0,
+            "92": 1210942464.0,
+            "93": 1210942464.0,
+            "94": 1210942464.0,
+            "95": 1210942464.0,
+            "96": 1210942464.0,
+            "97": 1210942464.0,
+            "98": 1210942464.0,
+            "99": 1210942464.0,
+            "100": 1210942464.0
+        }
+    },
+    "mtp_1 loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.88689,
+            "2": 10.90485,
+            "3": 10.90869,
+            "4": 10.86903,
+            "5": 10.91601,
+            "6": 10.906,
+            "7": 10.90268,
+            "8": 10.88984,
+            "9": 10.90425,
+            "10": 10.89144,
+            "11": 10.93384,
+            "12": 10.91647,
+            "13": 10.91108,
+            "14": 10.91974,
+            "15": 10.88488,
+            "16": 10.9077,
+            "17": 10.87571,
+            "18": 10.91379,
+            "19": 10.9092,
+            "20": 10.87837,
+            "21": 10.87896,
+            "22": 10.85583,
+            "23": 10.88007,
+            "24": 10.87245,
+            "25": 10.85859,
+            "26": 10.8696,
+            "27": 10.87702,
+            "28": 10.88641,
+            "29": 10.88866,
+            "30": 10.85422,
+            "31": 10.79713,
+            "32": 10.86631,
+            "33": 10.8781,
+            "34": 10.83982,
+            "35": 10.84165,
+            "36": 10.85012,
+            "37": 10.85556,
+            "38": 10.83674,
+            "39": 10.86355,
+            "40": 10.82887,
+            "41": 10.8341,
+            "42": 10.84469,
+            "43": 10.78828,
+            "44": 10.82123,
+            "45": 10.78831,
+            "46": 10.7823,
+            "47": 10.82898,
+            "48": 10.78985,
+            "49": 10.71269,
+            "50": 10.77382,
+            "51": 10.76639,
+            "52": 10.7397,
+            "53": 10.80285,
+            "54": 10.77365,
+            "55": 10.76066,
+            "56": 10.71068,
+            "57": 10.66686,
+            "58": 10.74378,
+            "59": 10.69209,
+            "60": 10.66474,
+            "61": 10.7073,
+            "62": 10.77206,
+            "63": 10.61812,
+            "64": 10.7178,
+            "65": 10.49439,
+            "66": 10.67106,
+            "67": 10.57534,
+            "68": 10.6873,
+            "69": 10.6816,
+            "70": 10.66836,
+            "71": 10.64586,
+            "72": 10.60925,
+            "73": 10.56508,
+            "74": 10.37144,
+            "75": 10.51183,
+            "76": 10.39914,
+            "77": 10.75182,
+            "78": 10.6268,
+            "79": 10.46827,
+            "80": 10.47524,
+            "81": 10.51083,
+            "82": 10.58769,
+            "83": 10.4381,
+            "84": 10.45057,
+            "85": 10.55084,
+            "86": 10.28076,
+            "87": 10.51088,
+            "88": 10.60323,
+            "89": 10.50794,
+            "90": 10.60274,
+            "91": 10.38238,
+            "92": 10.38703,
+            "93": 10.23076,
+            "94": 10.08438,
+            "95": 10.42616,
+            "96": 10.44905,
+            "97": 10.32215,
+            "98": 10.4966,
+            "99": 10.04765,
+            "100": 10.33491
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 51.30209,
+            "2": 1.41746,
+            "3": 1.28029,
+            "4": 10.57024,
+            "5": 0.66643,
+            "6": 0.67893,
+            "7": 0.65727,
+            "8": 0.66196,
+            "9": 0.66227,
+            "10": 0.65877,
+            "11": 0.65828,
+            "12": 0.65862,
+            "13": 0.65727,
+            "14": 0.65896,
+            "15": 0.65851,
+            "16": 0.66826,
+            "17": 0.65878,
+            "18": 0.65573,
+            "19": 0.65631,
+            "20": 0.65579,
+            "21": 0.65091,
+            "22": 0.65603,
+            "23": 0.65158,
+            "24": 0.65266,
+            "25": 0.65816,
+            "26": 0.65194,
+            "27": 0.6541,
+            "28": 0.65515,
+            "29": 0.65439,
+            "30": 0.65241,
+            "31": 0.65597,
+            "32": 0.65551,
+            "33": 0.65318,
+            "34": 0.6553,
+            "35": 0.65725,
+            "36": 0.65926,
+            "37": 0.65606,
+            "38": 0.65571,
+            "39": 0.65846,
+            "40": 0.65642,
+            "41": 0.65509,
+            "42": 0.66105,
+            "43": 0.65448,
+            "44": 0.65534,
+            "45": 0.65304,
+            "46": 0.65227,
+            "47": 0.64871,
+            "48": 0.65257,
+            "49": 0.65485,
+            "50": 0.65054,
+            "51": 0.67883,
+            "52": 0.6571,
+            "53": 0.65671,
+            "54": 0.65877,
+            "55": 0.65584,
+            "56": 0.65072,
+            "57": 0.64951,
+            "58": 0.65703,
+            "59": 0.65106,
+            "60": 0.64536,
+            "61": 0.64416,
+            "62": 0.64816,
+            "63": 0.64084,
+            "64": 0.6396,
+            "65": 0.64182,
+            "66": 0.64004,
+            "67": 0.64101,
+            "68": 0.63928,
+            "69": 0.65723,
+            "70": 0.6828,
+            "71": 0.64052,
+            "72": 0.64287,
+            "73": 0.64136,
+            "74": 0.64252,
+            "75": 0.64617,
+            "76": 0.64857,
+            "77": 0.64304,
+            "78": 0.64068,
+            "79": 0.64048,
+            "80": 0.64091,
+            "81": 0.64179,
+            "82": 0.64793,
+            "83": 0.641,
+            "84": 0.64077,
+            "85": 0.64011,
+            "86": 0.64018,
+            "87": 0.64132,
+            "88": 0.63901,
+            "89": 0.6407,
+            "90": 0.64277,
+            "91": 0.64132,
+            "92": 0.64123,
+            "93": 0.65051,
+            "94": 0.65036,
+            "95": 0.64542,
+            "96": 0.64561,
+            "97": 0.6504,
+            "98": 0.64563,
+            "99": 0.64524,
+            "100": 0.65049
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_scoped_cudagraph/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_scoped_cudagraph/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..309b2533461
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_scoped_cudagraph/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,644 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.93663,
+            "2": 10.9327,
+            "3": 10.94263,
+            "4": 10.94969,
+            "5": 10.95052,
+            "6": 10.94157,
+            "7": 10.94484,
+            "8": 10.93674,
+            "9": 10.94996,
+            "10": 10.93686,
+            "11": 10.94102,
+            "12": 10.93763,
+            "13": 10.9235,
+            "14": 10.93428,
+            "15": 10.88791,
+            "16": 10.87434,
+            "17": 10.86896,
+            "18": 10.86065,
+            "19": 10.86311,
+            "20": 10.78063,
+            "21": 10.73125,
+            "22": 10.60283,
+            "23": 10.73278,
+            "24": 10.61888,
+            "25": 10.55212,
+            "26": 10.62704,
+            "27": 10.6391,
+            "28": 10.5908,
+            "29": 10.59809,
+            "30": 10.37777,
+            "31": 10.1201,
+            "32": 10.46078,
+            "33": 10.45538,
+            "34": 10.20107,
+            "35": 10.25779,
+            "36": 10.20889,
+            "37": 10.33688,
+            "38": 10.16827,
+            "39": 10.40875,
+            "40": 10.05239,
+            "41": 10.09432,
+            "42": 10.17894,
+            "43": 9.74205,
+            "44": 9.8904,
+            "45": 9.74009,
+            "46": 9.72707,
+            "47": 10.09139,
+            "48": 9.75298,
+            "49": 9.40106,
+            "50": 9.83667,
+            "51": 9.77071,
+            "52": 9.65705,
+            "53": 10.03051,
+            "54": 9.87899,
+            "55": 9.79604,
+            "56": 9.52924,
+            "57": 9.36583,
+            "58": 9.75331,
+            "59": 9.48065,
+            "60": 9.40785,
+            "61": 9.60145,
+            "62": 9.90753,
+            "63": 9.2583,
+            "64": 9.68397,
+            "65": 8.80003,
+            "66": 9.60779,
+            "67": 9.25408,
+            "68": 9.71438,
+            "69": 9.71682,
+            "70": 9.6617,
+            "71": 9.52466,
+            "72": 9.47116,
+            "73": 9.38822,
+            "74": 8.80223,
+            "75": 9.33966,
+            "76": 8.93574,
+            "77": 9.99333,
+            "78": 9.64731,
+            "79": 9.28114,
+            "80": 9.29588,
+            "81": 9.39589,
+            "82": 9.60893,
+            "83": 9.21629,
+            "84": 9.33891,
+            "85": 9.52979,
+            "86": 8.95817,
+            "87": 9.51641,
+            "88": 9.68228,
+            "89": 9.50664,
+            "90": 9.75348,
+            "91": 9.23465,
+            "92": 9.25972,
+            "93": 8.94517,
+            "94": 8.69188,
+            "95": 9.44591,
+            "96": 9.4101,
+            "97": 9.20087,
+            "98": 9.58175,
+            "99": 8.75818,
+            "100": 9.29466
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 22750260.0,
+            "2": 22953110.0,
+            "3": 22604450.0,
+            "4": 23266322.0,
+            "5": 22735560.0,
+            "6": 23061920.0,
+            "7": 22793342.0,
+            "8": 22960820.0,
+            "9": 22865664.0,
+            "10": 22950364.0,
+            "11": 22499674.0,
+            "12": 22456088.0,
+            "13": 22948060.0,
+            "14": 22384512.0,
+            "15": 22846272.0,
+            "16": 22856858.0,
+            "17": 22836412.0,
+            "18": 22590058.0,
+            "19": 22627048.0,
+            "20": 22712308.0,
+            "21": 22762624.0,
+            "22": 22816888.0,
+            "23": 22545124.0,
+            "24": 22794440.0,
+            "25": 22841936.0,
+            "26": 22549680.0,
+            "27": 22464820.0,
+            "28": 22453684.0,
+            "29": 22534640.0,
+            "30": 22636152.0,
+            "31": 22989488.0,
+            "32": 22594070.0,
+            "33": 22566010.0,
+            "34": 22855504.0,
+            "35": 22813688.0,
+            "36": 22595396.0,
+            "37": 22499360.0,
+            "38": 22926126.0,
+            "39": 22825392.0,
+            "40": 22675666.0,
+            "41": 22671586.0,
+            "42": 22682140.0,
+            "43": 23013940.0,
+            "44": 22764458.0,
+            "45": 22678992.0,
+            "46": 22915276.0,
+            "47": 22642868.0,
+            "48": 22954190.0,
+            "49": 23786668.0,
+            "50": 22934008.0,
+            "51": 23866222.0,
+            "52": 23807290.0,
+            "53": 24007532.0,
+            "54": 22871610.0,
+            "55": 23571284.0,
+            "56": 23954310.0,
+            "57": 24211632.0,
+            "58": 23914404.0,
+            "59": 23771838.0,
+            "60": 23813560.0,
+            "61": 23797288.0,
+            "62": 23739984.0,
+            "63": 23916692.0,
+            "64": 23895952.0,
+            "65": 24150562.0,
+            "66": 23796504.0,
+            "67": 25032232.0,
+            "68": 23673188.0,
+            "69": 23648580.0,
+            "70": 23903504.0,
+            "71": 24864636.0,
+            "72": 24767108.0,
+            "73": 24850612.0,
+            "74": 24132990.0,
+            "75": 24146528.0,
+            "76": 25025540.0,
+            "77": 24358472.0,
+            "78": 24910064.0,
+            "79": 23810516.0,
+            "80": 24821440.0,
+            "81": 25020512.0,
+            "82": 23851244.0,
+            "83": 24961024.0,
+            "84": 25144020.0,
+            "85": 24823608.0,
+            "86": 23153096.0,
+            "87": 24850204.0,
+            "88": 24749150.0,
+            "89": 22505554.0,
+            "90": 24059620.0,
+            "91": 23839038.0,
+            "92": 23874568.0,
+            "93": 24769548.0,
+            "94": 23992452.0,
+            "95": 25189838.0,
+            "96": 23909262.0,
+            "97": 24713068.0,
+            "98": 23832506.0,
+            "99": 23983474.0,
+            "100": 24101108.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 763142656.0,
+            "2": 778734592.0,
+            "3": 772525056.0,
+            "4": 803593216.0,
+            "5": 803593216.0,
+            "6": 803593216.0,
+            "7": 801299456.0,
+            "8": 803593216.0,
+            "9": 801840128.0,
+            "10": 803593216.0,
+            "11": 802987008.0,
+            "12": 803593216.0,
+            "13": 802987008.0,
+            "14": 801299456.0,
+            "15": 803593216.0,
+            "16": 801840128.0,
+            "17": 803593216.0,
+            "18": 802987008.0,
+            "19": 801299456.0,
+            "20": 803593216.0,
+            "21": 801299456.0,
+            "22": 803593216.0,
+            "23": 801299456.0,
+            "24": 803593216.0,
+            "25": 801299456.0,
+            "26": 803593216.0,
+            "27": 801299456.0,
+            "28": 803593216.0,
+            "29": 801299456.0,
+            "30": 803593216.0,
+            "31": 801299456.0,
+            "32": 803593216.0,
+            "33": 801840128.0,
+            "34": 803593216.0,
+            "35": 801840128.0,
+            "36": 803593216.0,
+            "37": 802987008.0,
+            "38": 801299456.0,
+            "39": 803593216.0,
+            "40": 801299456.0,
+            "41": 803593216.0,
+            "42": 801840128.0,
+            "43": 803593216.0,
+            "44": 801840128.0,
+            "45": 803593216.0,
+            "46": 801840128.0,
+            "47": 803593216.0,
+            "48": 801840128.0,
+            "49": 803593216.0,
+            "50": 801840128.0,
+            "51": 801299456.0,
+            "52": 803593216.0,
+            "53": 801299456.0,
+            "54": 803593216.0,
+            "55": 801840128.0,
+            "56": 803593216.0,
+            "57": 801840128.0,
+            "58": 803593216.0,
+            "59": 801840128.0,
+            "60": 803593216.0,
+            "61": 801299456.0,
+            "62": 803593216.0,
+            "63": 801299456.0,
+            "64": 802987008.0,
+            "65": 803593216.0,
+            "66": 801299456.0,
+            "67": 803593216.0,
+            "68": 801299456.0,
+            "69": 803593216.0,
+            "70": 801840128.0,
+            "71": 803593216.0,
+            "72": 801299456.0,
+            "73": 803593216.0,
+            "74": 803593216.0,
+            "75": 802987008.0,
+            "76": 803593216.0,
+            "77": 801840128.0,
+            "78": 803593216.0,
+            "79": 801299456.0,
+            "80": 802987008.0,
+            "81": 803593216.0,
+            "82": 801840128.0,
+            "83": 803593216.0,
+            "84": 801299456.0,
+            "85": 802987008.0,
+            "86": 803593216.0,
+            "87": 801840128.0,
+            "88": 803593216.0,
+            "89": 801299456.0,
+            "90": 802987008.0,
+            "91": 803593216.0,
+            "92": 801299456.0,
+            "93": 803593216.0,
+            "94": 801299456.0,
+            "95": 803593216.0,
+            "96": 801299456.0,
+            "97": 803593216.0,
+            "98": 801299456.0,
+            "99": 802987008.0,
+            "100": 803593216.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 993582592.0,
+            "2": 1210942464.0,
+            "3": 1210942464.0,
+            "4": 1210942464.0,
+            "5": 1210942464.0,
+            "6": 1210942464.0,
+            "7": 1210942464.0,
+            "8": 1210942464.0,
+            "9": 1210942464.0,
+            "10": 1210942464.0,
+            "11": 1210942464.0,
+            "12": 1210942464.0,
+            "13": 1210942464.0,
+            "14": 1210942464.0,
+            "15": 1210942464.0,
+            "16": 1210942464.0,
+            "17": 1210942464.0,
+            "18": 1210942464.0,
+            "19": 1210942464.0,
+            "20": 1210942464.0,
+            "21": 1210942464.0,
+            "22": 1210942464.0,
+            "23": 1210942464.0,
+            "24": 1210942464.0,
+            "25": 1210942464.0,
+            "26": 1210942464.0,
+            "27": 1210942464.0,
+            "28": 1210942464.0,
+            "29": 1210942464.0,
+            "30": 1210942464.0,
+            "31": 1210942464.0,
+            "32": 1210942464.0,
+            "33": 1210942464.0,
+            "34": 1210942464.0,
+            "35": 1210942464.0,
+            "36": 1210942464.0,
+            "37": 1210942464.0,
+            "38": 1210942464.0,
+            "39": 1210942464.0,
+            "40": 1210942464.0,
+            "41": 1210942464.0,
+            "42": 1210942464.0,
+            "43": 1210942464.0,
+            "44": 1210942464.0,
+            "45": 1210942464.0,
+            "46": 1210942464.0,
+            "47": 1210942464.0,
+            "48": 1210942464.0,
+            "49": 1210942464.0,
+            "50": 1210942464.0,
+            "51": 1210942464.0,
+            "52": 1210942464.0,
+            "53": 1210942464.0,
+            "54": 1210942464.0,
+            "55": 1210942464.0,
+            "56": 1210942464.0,
+            "57": 1210942464.0,
+            "58": 1210942464.0,
+            "59": 1210942464.0,
+            "60": 1210942464.0,
+            "61": 1210942464.0,
+            "62": 1210942464.0,
+            "63": 1210942464.0,
+            "64": 1210942464.0,
+            "65": 1210942464.0,
+            "66": 1210942464.0,
+            "67": 1210942464.0,
+            "68": 1210942464.0,
+            "69": 1210942464.0,
+            "70": 1210942464.0,
+            "71": 1210942464.0,
+            "72": 1210942464.0,
+            "73": 1210942464.0,
+            "74": 1210942464.0,
+            "75": 1210942464.0,
+            "76": 1210942464.0,
+            "77": 1210942464.0,
+            "78": 1210942464.0,
+            "79": 1210942464.0,
+            "80": 1210942464.0,
+            "81": 1210942464.0,
+            "82": 1210942464.0,
+            "83": 1210942464.0,
+            "84": 1210942464.0,
+            "85": 1210942464.0,
+            "86": 1210942464.0,
+            "87": 1210942464.0,
+            "88": 1210942464.0,
+            "89": 1210942464.0,
+            "90": 1210942464.0,
+            "91": 1210942464.0,
+            "92": 1210942464.0,
+            "93": 1210942464.0,
+            "94": 1210942464.0,
+            "95": 1210942464.0,
+            "96": 1210942464.0,
+            "97": 1210942464.0,
+            "98": 1210942464.0,
+            "99": 1210942464.0,
+            "100": 1210942464.0
+        }
+    },
+    "mtp_1 loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.88689,
+            "2": 10.90485,
+            "3": 10.90869,
+            "4": 10.86903,
+            "5": 10.91601,
+            "6": 10.906,
+            "7": 10.90268,
+            "8": 10.88984,
+            "9": 10.90425,
+            "10": 10.89144,
+            "11": 10.93384,
+            "12": 10.91647,
+            "13": 10.91108,
+            "14": 10.91974,
+            "15": 10.88488,
+            "16": 10.9077,
+            "17": 10.87571,
+            "18": 10.91379,
+            "19": 10.9092,
+            "20": 10.87837,
+            "21": 10.87896,
+            "22": 10.85583,
+            "23": 10.88007,
+            "24": 10.87245,
+            "25": 10.85859,
+            "26": 10.8696,
+            "27": 10.87702,
+            "28": 10.88641,
+            "29": 10.88866,
+            "30": 10.85422,
+            "31": 10.79713,
+            "32": 10.86631,
+            "33": 10.8781,
+            "34": 10.83982,
+            "35": 10.84165,
+            "36": 10.85012,
+            "37": 10.85556,
+            "38": 10.83674,
+            "39": 10.86355,
+            "40": 10.82887,
+            "41": 10.8341,
+            "42": 10.84469,
+            "43": 10.78828,
+            "44": 10.82123,
+            "45": 10.78831,
+            "46": 10.7823,
+            "47": 10.82898,
+            "48": 10.78985,
+            "49": 10.71269,
+            "50": 10.77382,
+            "51": 10.76639,
+            "52": 10.7397,
+            "53": 10.80285,
+            "54": 10.77365,
+            "55": 10.76066,
+            "56": 10.71068,
+            "57": 10.66686,
+            "58": 10.74378,
+            "59": 10.69209,
+            "60": 10.66474,
+            "61": 10.7073,
+            "62": 10.77206,
+            "63": 10.61812,
+            "64": 10.7178,
+            "65": 10.49439,
+            "66": 10.67106,
+            "67": 10.57534,
+            "68": 10.6873,
+            "69": 10.6816,
+            "70": 10.66836,
+            "71": 10.64586,
+            "72": 10.60925,
+            "73": 10.56508,
+            "74": 10.37144,
+            "75": 10.51183,
+            "76": 10.39914,
+            "77": 10.75182,
+            "78": 10.6268,
+            "79": 10.46827,
+            "80": 10.47524,
+            "81": 10.51083,
+            "82": 10.58769,
+            "83": 10.4381,
+            "84": 10.45057,
+            "85": 10.55084,
+            "86": 10.28076,
+            "87": 10.51088,
+            "88": 10.60323,
+            "89": 10.50794,
+            "90": 10.60274,
+            "91": 10.38238,
+            "92": 10.38703,
+            "93": 10.23076,
+            "94": 10.08438,
+            "95": 10.42616,
+            "96": 10.44905,
+            "97": 10.32215,
+            "98": 10.4966,
+            "99": 10.04765,
+            "100": 10.33491
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 51.30209,
+            "2": 1.41746,
+            "3": 1.28029,
+            "4": 10.57024,
+            "5": 0.66643,
+            "6": 0.67893,
+            "7": 0.65727,
+            "8": 0.66196,
+            "9": 0.66227,
+            "10": 0.65877,
+            "11": 0.65828,
+            "12": 0.65862,
+            "13": 0.65727,
+            "14": 0.65896,
+            "15": 0.65851,
+            "16": 0.66826,
+            "17": 0.65878,
+            "18": 0.65573,
+            "19": 0.65631,
+            "20": 0.65579,
+            "21": 0.65091,
+            "22": 0.65603,
+            "23": 0.65158,
+            "24": 0.65266,
+            "25": 0.65816,
+            "26": 0.65194,
+            "27": 0.6541,
+            "28": 0.65515,
+            "29": 0.65439,
+            "30": 0.65241,
+            "31": 0.65597,
+            "32": 0.65551,
+            "33": 0.65318,
+            "34": 0.6553,
+            "35": 0.65725,
+            "36": 0.65926,
+            "37": 0.65606,
+            "38": 0.65571,
+            "39": 0.65846,
+            "40": 0.65642,
+            "41": 0.65509,
+            "42": 0.66105,
+            "43": 0.65448,
+            "44": 0.65534,
+            "45": 0.65304,
+            "46": 0.65227,
+            "47": 0.64871,
+            "48": 0.65257,
+            "49": 0.65485,
+            "50": 0.65054,
+            "51": 0.67883,
+            "52": 0.6571,
+            "53": 0.65671,
+            "54": 0.65877,
+            "55": 0.65584,
+            "56": 0.65072,
+            "57": 0.64951,
+            "58": 0.65703,
+            "59": 0.65106,
+            "60": 0.64536,
+            "61": 0.64416,
+            "62": 0.64816,
+            "63": 0.64084,
+            "64": 0.6396,
+            "65": 0.64182,
+            "66": 0.64004,
+            "67": 0.64101,
+            "68": 0.63928,
+            "69": 0.65723,
+            "70": 0.6828,
+            "71": 0.64052,
+            "72": 0.64287,
+            "73": 0.64136,
+            "74": 0.64252,
+            "75": 0.64617,
+            "76": 0.64857,
+            "77": 0.64304,
+            "78": 0.64068,
+            "79": 0.64048,
+            "80": 0.64091,
+            "81": 0.64179,
+            "82": 0.64793,
+            "83": 0.641,
+            "84": 0.64077,
+            "85": 0.64011,
+            "86": 0.64018,
+            "87": 0.64132,
+            "88": 0.63901,
+            "89": 0.6407,
+            "90": 0.64277,
+            "91": 0.64132,
+            "92": 0.64123,
+            "93": 0.65051,
+            "94": 0.65036,
+            "95": 0.64542,
+            "96": 0.64561,
+            "97": 0.6504,
+            "98": 0.64563,
+            "99": 0.64524,
+            "100": 0.65049
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_scoped_cudagraph/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_scoped_cudagraph/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..e8c2bae571f
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_scoped_cudagraph/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,644 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.93663,
+            "2": 10.9327,
+            "3": 10.94263,
+            "4": 10.94969,
+            "5": 10.95052,
+            "6": 10.94157,
+            "7": 10.94484,
+            "8": 10.93674,
+            "9": 10.94996,
+            "10": 10.93686,
+            "11": 10.94102,
+            "12": 10.93763,
+            "13": 10.9235,
+            "14": 10.93428,
+            "15": 10.88791,
+            "16": 10.87434,
+            "17": 10.86896,
+            "18": 10.86065,
+            "19": 10.86311,
+            "20": 10.78063,
+            "21": 10.73125,
+            "22": 10.60283,
+            "23": 10.73278,
+            "24": 10.61888,
+            "25": 10.55212,
+            "26": 10.62704,
+            "27": 10.6391,
+            "28": 10.5908,
+            "29": 10.59809,
+            "30": 10.37777,
+            "31": 10.1201,
+            "32": 10.46078,
+            "33": 10.45538,
+            "34": 10.20107,
+            "35": 10.25779,
+            "36": 10.20889,
+            "37": 10.33688,
+            "38": 10.16827,
+            "39": 10.40875,
+            "40": 10.05239,
+            "41": 10.09432,
+            "42": 10.17894,
+            "43": 9.74205,
+            "44": 9.8904,
+            "45": 9.74009,
+            "46": 9.72707,
+            "47": 10.09139,
+            "48": 9.75298,
+            "49": 9.40106,
+            "50": 9.83667,
+            "51": 9.77071,
+            "52": 9.65705,
+            "53": 10.03051,
+            "54": 9.87899,
+            "55": 9.79604,
+            "56": 9.52924,
+            "57": 9.36583,
+            "58": 9.75331,
+            "59": 9.48065,
+            "60": 9.40785,
+            "61": 9.60145,
+            "62": 9.90753,
+            "63": 9.2583,
+            "64": 9.68397,
+            "65": 8.80003,
+            "66": 9.60779,
+            "67": 9.25408,
+            "68": 9.71438,
+            "69": 9.71682,
+            "70": 9.6617,
+            "71": 9.52466,
+            "72": 9.47116,
+            "73": 9.38822,
+            "74": 8.80223,
+            "75": 9.33966,
+            "76": 8.93574,
+            "77": 9.99333,
+            "78": 9.64731,
+            "79": 9.28114,
+            "80": 9.29588,
+            "81": 9.39589,
+            "82": 9.60893,
+            "83": 9.21629,
+            "84": 9.33891,
+            "85": 9.52979,
+            "86": 8.95817,
+            "87": 9.51641,
+            "88": 9.68228,
+            "89": 9.50664,
+            "90": 9.75348,
+            "91": 9.23465,
+            "92": 9.25972,
+            "93": 8.94517,
+            "94": 8.69188,
+            "95": 9.44591,
+            "96": 9.4101,
+            "97": 9.20087,
+            "98": 9.58175,
+            "99": 8.75818,
+            "100": 9.29466
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 22750260.0,
+            "2": 22953110.0,
+            "3": 22604450.0,
+            "4": 23266322.0,
+            "5": 22735560.0,
+            "6": 23061920.0,
+            "7": 22793342.0,
+            "8": 22960820.0,
+            "9": 22865664.0,
+            "10": 22950364.0,
+            "11": 22499674.0,
+            "12": 22456088.0,
+            "13": 22948060.0,
+            "14": 22384512.0,
+            "15": 22846272.0,
+            "16": 22856858.0,
+            "17": 22836412.0,
+            "18": 22590058.0,
+            "19": 22627048.0,
+            "20": 22712308.0,
+            "21": 22762624.0,
+            "22": 22816888.0,
+            "23": 22545124.0,
+            "24": 22794440.0,
+            "25": 22841936.0,
+            "26": 22549680.0,
+            "27": 22464820.0,
+            "28": 22453684.0,
+            "29": 22534640.0,
+            "30": 22636152.0,
+            "31": 22989488.0,
+            "32": 22594070.0,
+            "33": 22566010.0,
+            "34": 22855504.0,
+            "35": 22813688.0,
+            "36": 22595396.0,
+            "37": 22499360.0,
+            "38": 22926126.0,
+            "39": 22825392.0,
+            "40": 22675666.0,
+            "41": 22671586.0,
+            "42": 22682140.0,
+            "43": 23013940.0,
+            "44": 22764458.0,
+            "45": 22678992.0,
+            "46": 22915276.0,
+            "47": 22642868.0,
+            "48": 22954190.0,
+            "49": 23786668.0,
+            "50": 22934008.0,
+            "51": 23866222.0,
+            "52": 23807290.0,
+            "53": 24007532.0,
+            "54": 22871610.0,
+            "55": 23571284.0,
+            "56": 23954310.0,
+            "57": 24211632.0,
+            "58": 23914404.0,
+            "59": 23771838.0,
+            "60": 23813560.0,
+            "61": 23797288.0,
+            "62": 23739984.0,
+            "63": 23916692.0,
+            "64": 23895952.0,
+            "65": 24150562.0,
+            "66": 23796504.0,
+            "67": 25032232.0,
+            "68": 23673188.0,
+            "69": 23648580.0,
+            "70": 23903504.0,
+            "71": 24864636.0,
+            "72": 24767108.0,
+            "73": 24850612.0,
+            "74": 24132990.0,
+            "75": 24146528.0,
+            "76": 25025540.0,
+            "77": 24358472.0,
+            "78": 24910064.0,
+            "79": 23810516.0,
+            "80": 24821440.0,
+            "81": 25020512.0,
+            "82": 23851244.0,
+            "83": 24961024.0,
+            "84": 25144020.0,
+            "85": 24823608.0,
+            "86": 23153096.0,
+            "87": 24850204.0,
+            "88": 24749150.0,
+            "89": 22505554.0,
+            "90": 24059620.0,
+            "91": 23839038.0,
+            "92": 23874568.0,
+            "93": 24769548.0,
+            "94": 23992452.0,
+            "95": 25189838.0,
+            "96": 23909262.0,
+            "97": 24713068.0,
+            "98": 23832506.0,
+            "99": 23983474.0,
+            "100": 24101108.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 769688064.0,
+            "2": 775359488.0,
+            "3": 769690624.0,
+            "4": 801299456.0,
+            "5": 803593216.0,
+            "6": 801299456.0,
+            "7": 803593216.0,
+            "8": 803593216.0,
+            "9": 801299456.0,
+            "10": 803593216.0,
+            "11": 801299456.0,
+            "12": 803593216.0,
+            "13": 801299456.0,
+            "14": 803593216.0,
+            "15": 803593216.0,
+            "16": 801299456.0,
+            "17": 803593216.0,
+            "18": 801299456.0,
+            "19": 803593216.0,
+            "20": 801299456.0,
+            "21": 803593216.0,
+            "22": 803593216.0,
+            "23": 801840128.0,
+            "24": 803593216.0,
+            "25": 802987008.0,
+            "26": 801299456.0,
+            "27": 802987008.0,
+            "28": 801299456.0,
+            "29": 801299456.0,
+            "30": 803593216.0,
+            "31": 801299456.0,
+            "32": 803593216.0,
+            "33": 801299456.0,
+            "34": 803593216.0,
+            "35": 801299456.0,
+            "36": 801299456.0,
+            "37": 803593216.0,
+            "38": 801299456.0,
+            "39": 803593216.0,
+            "40": 801299456.0,
+            "41": 803593216.0,
+            "42": 801299456.0,
+            "43": 801299456.0,
+            "44": 803593216.0,
+            "45": 802987008.0,
+            "46": 801299456.0,
+            "47": 803593216.0,
+            "48": 801299456.0,
+            "49": 803593216.0,
+            "50": 801299456.0,
+            "51": 801299456.0,
+            "52": 803593216.0,
+            "53": 802446336.0,
+            "54": 801299456.0,
+            "55": 803593216.0,
+            "56": 802987008.0,
+            "57": 801299456.0,
+            "58": 801840128.0,
+            "59": 801299456.0,
+            "60": 803593216.0,
+            "61": 801840128.0,
+            "62": 801299456.0,
+            "63": 803593216.0,
+            "64": 802446336.0,
+            "65": 803593216.0,
+            "66": 801840128.0,
+            "67": 801299456.0,
+            "68": 803593216.0,
+            "69": 801840128.0,
+            "70": 801299456.0,
+            "71": 803593216.0,
+            "72": 803593216.0,
+            "73": 802987008.0,
+            "74": 801299456.0,
+            "75": 803593216.0,
+            "76": 803593216.0,
+            "77": 801299456.0,
+            "78": 801299456.0,
+            "79": 803593216.0,
+            "80": 801840128.0,
+            "81": 801299456.0,
+            "82": 803593216.0,
+            "83": 801299456.0,
+            "84": 801299456.0,
+            "85": 803593216.0,
+            "86": 801299456.0,
+            "87": 801299456.0,
+            "88": 803593216.0,
+            "89": 801840128.0,
+            "90": 803593216.0,
+            "91": 802987008.0,
+            "92": 801299456.0,
+            "93": 803593216.0,
+            "94": 801299456.0,
+            "95": 801299456.0,
+            "96": 803593216.0,
+            "97": 801840128.0,
+            "98": 803593216.0,
+            "99": 802987008.0,
+            "100": 801299456.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 988765184.0,
+            "2": 1206831616.0,
+            "3": 1210116096.0,
+            "4": 1210116096.0,
+            "5": 1210116096.0,
+            "6": 1210116096.0,
+            "7": 1210116096.0,
+            "8": 1210116096.0,
+            "9": 1210116096.0,
+            "10": 1210116096.0,
+            "11": 1210116096.0,
+            "12": 1210116096.0,
+            "13": 1210116096.0,
+            "14": 1210116096.0,
+            "15": 1210116096.0,
+            "16": 1210116096.0,
+            "17": 1210116096.0,
+            "18": 1210116096.0,
+            "19": 1210116096.0,
+            "20": 1210116096.0,
+            "21": 1210116096.0,
+            "22": 1210116096.0,
+            "23": 1210116096.0,
+            "24": 1210116096.0,
+            "25": 1210116096.0,
+            "26": 1210116096.0,
+            "27": 1210116096.0,
+            "28": 1210116096.0,
+            "29": 1210116096.0,
+            "30": 1210116096.0,
+            "31": 1210116096.0,
+            "32": 1210116096.0,
+            "33": 1210116096.0,
+            "34": 1210116096.0,
+            "35": 1210116096.0,
+            "36": 1210116096.0,
+            "37": 1210116096.0,
+            "38": 1210116096.0,
+            "39": 1210116096.0,
+            "40": 1210116096.0,
+            "41": 1210116096.0,
+            "42": 1210116096.0,
+            "43": 1210116096.0,
+            "44": 1210116096.0,
+            "45": 1210116096.0,
+            "46": 1210116096.0,
+            "47": 1210116096.0,
+            "48": 1210116096.0,
+            "49": 1210116096.0,
+            "50": 1210116096.0,
+            "51": 1210116096.0,
+            "52": 1210116096.0,
+            "53": 1210116096.0,
+            "54": 1210116096.0,
+            "55": 1210116096.0,
+            "56": 1210116096.0,
+            "57": 1210116096.0,
+            "58": 1210116096.0,
+            "59": 1210116096.0,
+            "60": 1210116096.0,
+            "61": 1210116096.0,
+            "62": 1210116096.0,
+            "63": 1210116096.0,
+            "64": 1210116096.0,
+            "65": 1210116096.0,
+            "66": 1210116096.0,
+            "67": 1210116096.0,
+            "68": 1210116096.0,
+            "69": 1210116096.0,
+            "70": 1210116096.0,
+            "71": 1210116096.0,
+            "72": 1210116096.0,
+            "73": 1210116096.0,
+            "74": 1210116096.0,
+            "75": 1210116096.0,
+            "76": 1210116096.0,
+            "77": 1210116096.0,
+            "78": 1210116096.0,
+            "79": 1210116096.0,
+            "80": 1210116096.0,
+            "81": 1210116096.0,
+            "82": 1210116096.0,
+            "83": 1210116096.0,
+            "84": 1210116096.0,
+            "85": 1210116096.0,
+            "86": 1210116096.0,
+            "87": 1210116096.0,
+            "88": 1210116096.0,
+            "89": 1210116096.0,
+            "90": 1210116096.0,
+            "91": 1210116096.0,
+            "92": 1210116096.0,
+            "93": 1210116096.0,
+            "94": 1210116096.0,
+            "95": 1210116096.0,
+            "96": 1210116096.0,
+            "97": 1210116096.0,
+            "98": 1210116096.0,
+            "99": 1210116096.0,
+            "100": 1210116096.0
+        }
+    },
+    "mtp_1 loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.88689,
+            "2": 10.90485,
+            "3": 10.90869,
+            "4": 10.86903,
+            "5": 10.91601,
+            "6": 10.906,
+            "7": 10.90268,
+            "8": 10.88984,
+            "9": 10.90425,
+            "10": 10.89144,
+            "11": 10.93384,
+            "12": 10.91647,
+            "13": 10.91108,
+            "14": 10.91974,
+            "15": 10.88488,
+            "16": 10.9077,
+            "17": 10.87571,
+            "18": 10.91379,
+            "19": 10.9092,
+            "20": 10.87837,
+            "21": 10.87896,
+            "22": 10.85583,
+            "23": 10.88007,
+            "24": 10.87245,
+            "25": 10.85859,
+            "26": 10.8696,
+            "27": 10.87702,
+            "28": 10.88641,
+            "29": 10.88866,
+            "30": 10.85422,
+            "31": 10.79713,
+            "32": 10.86631,
+            "33": 10.8781,
+            "34": 10.83982,
+            "35": 10.84165,
+            "36": 10.85012,
+            "37": 10.85556,
+            "38": 10.83674,
+            "39": 10.86355,
+            "40": 10.82887,
+            "41": 10.8341,
+            "42": 10.84469,
+            "43": 10.78828,
+            "44": 10.82123,
+            "45": 10.78831,
+            "46": 10.7823,
+            "47": 10.82898,
+            "48": 10.78985,
+            "49": 10.71269,
+            "50": 10.77382,
+            "51": 10.76639,
+            "52": 10.7397,
+            "53": 10.80285,
+            "54": 10.77365,
+            "55": 10.76066,
+            "56": 10.71068,
+            "57": 10.66686,
+            "58": 10.74378,
+            "59": 10.69209,
+            "60": 10.66474,
+            "61": 10.7073,
+            "62": 10.77206,
+            "63": 10.61812,
+            "64": 10.7178,
+            "65": 10.49439,
+            "66": 10.67106,
+            "67": 10.57534,
+            "68": 10.6873,
+            "69": 10.6816,
+            "70": 10.66836,
+            "71": 10.64586,
+            "72": 10.60925,
+            "73": 10.56508,
+            "74": 10.37144,
+            "75": 10.51183,
+            "76": 10.39914,
+            "77": 10.75182,
+            "78": 10.6268,
+            "79": 10.46827,
+            "80": 10.47524,
+            "81": 10.51083,
+            "82": 10.58769,
+            "83": 10.4381,
+            "84": 10.45057,
+            "85": 10.55084,
+            "86": 10.28076,
+            "87": 10.51088,
+            "88": 10.60323,
+            "89": 10.50794,
+            "90": 10.60274,
+            "91": 10.38238,
+            "92": 10.38703,
+            "93": 10.23076,
+            "94": 10.08438,
+            "95": 10.42616,
+            "96": 10.44905,
+            "97": 10.32215,
+            "98": 10.4966,
+            "99": 10.04765,
+            "100": 10.33491
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 58.67467,
+            "2": 1.49483,
+            "3": 1.38721,
+            "4": 11.78499,
+            "5": 0.75759,
+            "6": 0.75678,
+            "7": 0.76144,
+            "8": 0.80382,
+            "9": 0.74706,
+            "10": 0.74893,
+            "11": 0.75091,
+            "12": 0.75087,
+            "13": 0.74803,
+            "14": 0.75316,
+            "15": 0.80396,
+            "16": 0.75267,
+            "17": 0.75378,
+            "18": 0.75457,
+            "19": 0.75484,
+            "20": 0.75428,
+            "21": 0.75639,
+            "22": 0.81363,
+            "23": 0.75607,
+            "24": 0.75553,
+            "25": 0.75564,
+            "26": 0.75334,
+            "27": 0.75722,
+            "28": 0.76027,
+            "29": 0.8113,
+            "30": 0.75278,
+            "31": 0.75471,
+            "32": 0.75104,
+            "33": 0.75271,
+            "34": 0.74877,
+            "35": 0.74765,
+            "36": 0.80549,
+            "37": 0.75089,
+            "38": 0.75395,
+            "39": 0.75254,
+            "40": 0.76025,
+            "41": 0.75356,
+            "42": 0.75573,
+            "43": 0.79632,
+            "44": 0.77927,
+            "45": 0.75515,
+            "46": 0.75759,
+            "47": 0.75978,
+            "48": 0.75749,
+            "49": 0.75504,
+            "50": 0.75616,
+            "51": 0.77974,
+            "52": 0.76581,
+            "53": 0.76997,
+            "54": 0.76705,
+            "55": 0.76737,
+            "56": 0.77352,
+            "57": 0.77833,
+            "58": 0.81195,
+            "59": 0.77251,
+            "60": 0.7711,
+            "61": 0.77181,
+            "62": 0.77006,
+            "63": 0.76957,
+            "64": 0.77251,
+            "65": 0.82259,
+            "66": 0.77112,
+            "67": 0.7683,
+            "68": 0.77335,
+            "69": 0.77022,
+            "70": 0.77335,
+            "71": 0.77822,
+            "72": 0.77769,
+            "73": 0.79476,
+            "74": 0.7728,
+            "75": 0.7711,
+            "76": 0.76863,
+            "77": 0.77228,
+            "78": 0.77031,
+            "79": 0.76995,
+            "80": 0.77286,
+            "81": 0.76616,
+            "82": 0.76752,
+            "83": 0.76583,
+            "84": 0.77264,
+            "85": 0.76732,
+            "86": 0.76873,
+            "87": 0.77239,
+            "88": 0.77971,
+            "89": 0.76112,
+            "90": 0.76225,
+            "91": 0.75814,
+            "92": 0.76144,
+            "93": 0.75796,
+            "94": 0.76412,
+            "95": 0.777,
+            "96": 0.77207,
+            "97": 0.7628,
+            "98": 0.76325,
+            "99": 0.76204,
+            "100": 0.7668
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_scoped_cudagraph/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_scoped_cudagraph/model_config.yaml
new file mode 100644
index 00000000000..ef2b76069a1
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_scoped_cudagraph/model_config.yaml
@@ -0,0 +1,96 @@
+ENV_VARS:
+  CUDA_DEVICE_MAX_CONNECTIONS: 1
+  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
+  NCCL_ALGO: Ring
+  CUBLAS_WORKSPACE_CONFIG: :4096:8
+MODEL_ARGS:
+  --num-layers: 13
+  --hidden-size: 512
+  --num-attention-heads: 8
+  --mtp-num-layers: 1
+  --micro-batch-size: 2
+  --global-batch-size: 32
+  --seq-length: 1024
+  --max-position-embeddings: 1024
+  --position-embedding-type: rope
+  --rotary-base: 10000
+  --untie-embeddings-and-output-weights: true
+  --disable-bias-linear: true
+  --attention-dropout: 0.0
+  --hidden-dropout: 0.0
+  --train-iters: 100
+  --lr-decay-iters: 320000
+  --split: 949,50,1
+  --distributed-backend: nccl
+  --lr: 0.00015
+  --lr-decay-style: cosine
+  --min-lr: 1.0e-5
+  --weight-decay: 1e-2
+  --clip-grad: 1.0
+  --lr-warmup-fraction: .01
+  --transformer-impl: transformer_engine
+  --tensor-model-parallel-size: 4
+  --pipeline-model-parallel-size: 2
+  --expert-model-parallel-size: 2
+  --expert-tensor-parallel-size: 2
+  --pipeline-model-parallel-layout: Et\\|\\(tt\\|\\)*6mL # Et|(tt|)*6mL
+  --sequence-parallel: true
+  --num-experts: 8
+  --use-distributed-optimizer: true
+  --overlap-grad-reduce: true
+  --overlap-param-gather: true
+  --moe-token-dispatcher-type: alltoall
+  --moe-router-load-balancing-type: global_aux_loss
+  --moe-router-topk: 2
+  --moe-router-dtype: fp32
+  --moe-router-fusion: true
+  --moe-router-enable-expert-bias: true
+  --moe-router-score-function: sigmoid
+  --moe-router-pre-softmax: true
+  --moe-ffn-hidden-size: 1024
+  --moe-shared-expert-intermediate-size: 512
+  --moe-grouped-gemm: true
+  --moe-layer-freq: ([0]*4+[1]*9)
+  --moe-permute-fusion: true
+  --deterministic-mode: true
+  --no-gradient-accumulation-fusion: true
+  --attention-softmax-in-fp32: true
+  --use-checkpoint-opt_param-scheduler: true
+  --use-mcore-models: true
+  --bf16: true
+  --fp8-format: hybrid
+  --fp8-recipe: blockwise
+  --first-last-layers-bf16: true
+  --no-bias-gelu-fusion: true
+  --recompute-granularity: selective
+  --recompute-modules: "[moe_act]"
+  --cuda-graph-impl: transformer_engine
+  --cuda-graph-scope: "[attn mlp moe_router moe_preprocess]"
+  --log-memory-to-tensorboard: true
+  --log-params-norm: true
+  --log-num-zeros-in-grad: true
+  --log-validation-ppl-to-tensorboard: true
+  --log-timers-to-tensorboard: true
+  --tensorboard-dir: ${TENSORBOARD_PATH}
+  --log-interval: 1
+  --timing-log-level: 0
+  --save-interval: 50
+  --eval-interval: 1000
+  --eval-iters: 10
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
+  --data-cache-path: ${DATA_CACHE_PATH}
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
+  --save: ${CHECKPOINT_SAVE_PATH}
+  --load: ${CHECKPOINT_LOAD_PATH}
+  --ckpt-fully-parallel-load: true
+  --ckpt-format: torch_dist
+  --ckpt-assume-constant-structure: true
+TEST_TYPE: ckpt-resume
+METRICS:
+  - "iteration-time"
+  - "lm loss"
+  - "num-zeros"
+  - "mem-allocated-bytes"
+  - "mem-max-allocated-bytes"
+  - "mtp_1 loss"
diff --git a/tests/test_utils/recipes/moe.yaml b/tests/test_utils/recipes/moe.yaml
index 9fc418ee541..477a495ac54 100644
--- a/tests/test_utils/recipes/moe.yaml
+++ b/tests/test_utils/recipes/moe.yaml
@@ -131,6 +131,11 @@ products:
       - environment: [dev]
         scope: [mr]
         platforms: [dgx_h100]
+  - test_case: [gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_scoped_cudagraph]
+    products:
+      - environment: [dev]
+        scope: [mr]
+        platforms: [dgx_h100]
   #######################################################################
   # Super important mr, mr-github tests that run for both DEV and LTS per mr, mr-github       #
   #######################################################################
diff --git a/tests/unit_tests/inference/engines/test_dynamic_engine.py b/tests/unit_tests/inference/engines/test_dynamic_engine.py
index 2d87b3c6adb..20b970f44df 100644
--- a/tests/unit_tests/inference/engines/test_dynamic_engine.py
+++ b/tests/unit_tests/inference/engines/test_dynamic_engine.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 import asyncio
 import random
@@ -80,7 +80,7 @@ class DynamicEngineTestConfig:
     return_log_probs: bool = False
     materialize_only_last_token_logits: bool = True
     skip_prompt_log_probs_for_dynamic_inference: bool = False
-    cuda_graph_scope: str = "full_iteration"
+    cuda_graph_scope: List[str] = None
     force_build_cuda_graphs: bool = False
     # If False, do not build cuda graphs in the tests, even if
     # num_cuda_graphs is set.
@@ -111,6 +111,9 @@ def __post_init__(self):
             if self.context_max_tokens_override is None:
                 self.context_max_tokens_override = self.num_requests * self.max_sequence_length
 
+        if self.cuda_graph_scope is None:
+            self.cuda_graph_scope = ["full_iteration"]
+
 
 @dataclass
 class DynamicEngineTestEnv:
@@ -403,7 +406,7 @@ def teardown_method(self, method):
         not is_fa_min_version("2.7.3"), reason="need latest flash attn for dynamic batching"
     )
     @pytest.mark.parametrize("num_cuda_graphs", [None, 1, 4])
-    @pytest.mark.parametrize("cuda_graph_scope", ["full", "full_iteration"])
+    @pytest.mark.parametrize("cuda_graph_scope", [[], ["full_iteration"]])
     def test_simple(self, num_cuda_graphs, cuda_graph_scope) -> None:
         """Simple test that runs without errors, and validates output."""
 
diff --git a/tests/unit_tests/transformer/test_cuda_graphs.py b/tests/unit_tests/transformer/test_cuda_graphs.py
index 685e3674374..b4da65aa056 100644
--- a/tests/unit_tests/transformer/test_cuda_graphs.py
+++ b/tests/unit_tests/transformer/test_cuda_graphs.py
@@ -1,7 +1,8 @@
-# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 import os
 import random
+import sys
 import time
 import types
 
@@ -9,6 +10,7 @@
 import torch
 
 from megatron.core import parallel_state
+from megatron.core.enums import ModelType
 from megatron.core.inference.contexts import DynamicInferenceContext
 from megatron.core.inference.engines import DynamicInferenceEngine
 from megatron.core.inference.model_inference_wrappers.gpt.gpt_inference_wrapper import (
@@ -27,6 +29,7 @@
 )
 from megatron.core.models.gpt.gpt_model import GPTModel
 from megatron.core.models.mamba.mamba_layer_specs import mamba_stack_spec
+from megatron.core.num_microbatches_calculator import destroy_num_microbatches_calculator
 from megatron.core.pipeline_parallel.schedules import set_current_microbatch
 from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.ssm.mamba_block import MambaStack
@@ -39,6 +42,14 @@
 from megatron.core.transformer.transformer_block import TransformerBlock
 from megatron.core.transformer.transformer_config import TransformerConfig
 from megatron.core.utils import is_fa_min_version, is_te_min_version
+from megatron.training.arguments import core_transformer_config_from_args, parse_args, validate_args
+from megatron.training.global_vars import (
+    destroy_global_vars,
+    get_args,
+    set_args,
+    set_global_variables,
+)
+from megatron.training.training import setup_model_and_optimizer
 from tests.unit_tests.test_utilities import Utils
 
 
@@ -715,6 +726,256 @@ def test_capture_freeze_gc(self):
         )
 
 
+def is_deep_ep_available():
+    from megatron.core.transformer.moe.fused_a2a import HAVE_DEEP_EP
+
+    return HAVE_DEEP_EP
+
+
+def is_hybrid_ep_available():
+    from megatron.core.transformer.moe.fused_a2a import HAVE_HYBRIDEP
+
+    return HAVE_HYBRIDEP
+
+
+class TestPartialCudaGraph:
+    """Test that CUDA graph outputs match non-CUDA graph outputs for various scopes."""
+
+    def setup_method(self, method):
+        self.seq_length = 512
+        self.micro_batch_size = 2
+        # Store original environment variable values
+        self.original_env = {
+            'CUDA_DEVICE_MAX_CONNECTIONS': os.environ.get('CUDA_DEVICE_MAX_CONNECTIONS'),
+            'NVTE_ALLOW_NONDETERMINISTIC_ALGO': os.environ.get('NVTE_ALLOW_NONDETERMINISTIC_ALGO'),
+        }
+        os.environ['CUDA_DEVICE_MAX_CONNECTIONS'] = '1'
+        os.environ['NVTE_ALLOW_NONDETERMINISTIC_ALGO'] = '0'
+
+    def teardown_method(self, method):
+        # Restore original environment variable values
+        for key, value in self.original_env.items():
+            if value is None:
+                os.environ.pop(key, None)
+            else:
+                os.environ[key] = value
+        Utils.destroy_model_parallel()
+        destroy_global_vars()
+        destroy_num_microbatches_calculator()
+
+    def model_provider(
+        self,
+        pre_process=True,
+        post_process=True,
+        layer_spec_fn=get_gpt_layer_with_transformer_engine_spec,
+        **config_kwargs,
+    ):
+        model_parallel_cuda_manual_seed(123)
+        args = get_args()
+        config = core_transformer_config_from_args(args)
+        transformer_layer_spec = layer_spec_fn()
+        return GPTModel(
+            config=config,
+            transformer_layer_spec=transformer_layer_spec,
+            vocab_size=args.vocab_size,
+            max_sequence_length=args.max_position_embeddings,
+            pre_process=pre_process,
+            post_process=post_process,
+            fp16_lm_cross_entropy=args.fp16_lm_cross_entropy,
+            parallel_output=True,
+            share_embeddings_and_output_weights=not args.untie_embeddings_and_output_weights,
+            position_embedding_type=args.position_embedding_type,
+            rotary_percent=args.rotary_percent,
+        )
+
+    def create_test_args(
+        self, cuda_graph_impl, cuda_graph_scope, cuda_graph_warmup_steps, ep_size, **kwargs
+    ):
+        destroy_global_vars()
+        destroy_num_microbatches_calculator()
+
+        sys.argv = ['test_cuda_graphs.py']
+        args = parse_args()
+        args.num_layers = 4
+        args.mtp_num_layers = 1
+        args.vocab_size = 1024
+        args.hidden_size = 128
+        args.num_attention_heads = 8
+        args.max_position_embeddings = 512
+        args.global_batch_size = self.micro_batch_size * 8
+        args.micro_batch_size = self.micro_batch_size
+        args.create_attention_mask_in_dataloader = True
+        args.seq_length = self.seq_length
+        args.tensor_model_parallel_size = 2
+        args.sequence_parallel = True
+        args.pipeline_model_parallel_size = 1
+        args.context_parallel_size = 1
+        args.expert_model_parallel_size = ep_size
+        args.train_iters = 10
+        args.lr = 3e-5
+        args.bf16 = True
+        args.add_bias_linear = False
+        args.swiglu = True
+        args.use_distributed_optimizer = True
+        args.position_embedding_type = "rope"
+        args.rotary_percent = 1.0
+        args.hidden_dropout = 0.0
+        args.attention_dropout = 0.0
+
+        # MoE settings
+        args.num_experts = 4
+        args.expert_model_parallel_size = ep_size
+        args.moe_shared_expert_intermediate_size = 1024
+        args.moe_layer_freq = "[0,0,1,1]"
+        args.moe_permute_fusion = True
+        args.moe_router_fusion = True
+        args.moe_router_topk = 2
+
+        # CUDA graph settings
+        args.cuda_graph_impl = cuda_graph_impl
+        args.cuda_graph_scope = cuda_graph_scope
+        args.cuda_graph_warmup_steps = cuda_graph_warmup_steps
+        args.use_te_rng_tracker = cuda_graph_impl != "none"
+
+        for key, value in kwargs.items():
+            assert hasattr(args, key)
+            setattr(args, key, value)
+
+        validate_args(args)
+        set_global_variables(args, False)
+        return args
+
+    def get_batch(self, seq_length, micro_batch_size):
+        data = list(range(seq_length))
+        input_ids = torch.tensor(data, dtype=torch.int64).repeat((micro_batch_size, 1)).cuda()
+        labels = 1 + torch.tensor(data, dtype=torch.int64).repeat((micro_batch_size, 1)).cuda()
+        position_ids = torch.tensor(data, dtype=torch.int64).repeat((micro_batch_size, 1)).cuda()
+        attention_mask = torch.ones(
+            (micro_batch_size, 1, seq_length, seq_length), dtype=bool
+        ).cuda()
+        loss_mask = torch.ones(seq_length).repeat((micro_batch_size, 1)).cuda()
+        return input_ids, labels, position_ids, attention_mask, loss_mask
+
+    def _run_test_helper(
+        self, ep_size, cuda_graph_impl, cuda_graph_scope, cuda_graph_warmup_steps, **kwargs
+    ):
+        """Test fp8_param with gpt_model."""
+        args = self.create_test_args(
+            cuda_graph_impl, cuda_graph_scope, cuda_graph_warmup_steps, ep_size, **kwargs
+        )
+
+        set_args(args)
+        torch.manual_seed(123)
+        Utils.initialize_model_parallel(
+            tensor_model_parallel_size=2, expert_model_parallel_size=ep_size
+        )
+
+        input_ids, labels, position_ids, attention_mask, loss_mask = self.get_batch(
+            self.seq_length, self.micro_batch_size
+        )
+
+        gpt_model, optimizer, _ = setup_model_and_optimizer(
+            self.model_provider, ModelType.encoder_or_decoder
+        )
+        assert len(gpt_model) == 1  # Assume only one model in the model provider.
+
+        loss_list = []
+
+        cuda_graph_helper = None
+        if cuda_graph_impl == "transformer_engine":
+            from megatron.core.transformer.cuda_graphs import TECudaGraphHelper
+
+            cuda_graph_helper = TECudaGraphHelper(
+                model=gpt_model,
+                config=gpt_model[0].config,
+                seq_length=self.seq_length,
+                micro_batch_size=self.micro_batch_size,
+                optimizers=[optimizer],
+            )
+
+        for i in range(100):
+            gpt_model[0].zero_grad_buffer()
+            optimizer.zero_grad()
+
+            # Capture CUDA graphs after warmup if helper is provided
+            if cuda_graph_helper is not None and i == cuda_graph_warmup_steps:
+                cuda_graph_helper.create_cudagraphs()
+
+            output = gpt_model[0].forward(
+                input_ids=input_ids,
+                position_ids=position_ids,
+                attention_mask=attention_mask,
+                labels=labels,
+                loss_mask=loss_mask,
+            )
+
+            # Check output shapes
+            assert output.shape[0] == self.micro_batch_size
+            assert output.shape[1] == self.seq_length
+
+            # Verify gradients
+            loss = output.mean()
+            loss.backward()
+
+            for param in gpt_model[0].parameters():
+                assert param.main_grad is not None
+
+            update_successful, _, _ = optimizer.step()
+            assert update_successful
+
+            loss_list.append(loss.item())
+
+        return torch.tensor(loss_list)
+
+    @pytest.mark.skipif(
+        not (HAVE_TE and is_te_min_version("1.14.0")),
+        reason="Partial CUDA graph support requires TransformerEngine version >= 1.14.0",
+    )
+    @pytest.mark.parametrize("ep_size", [1, 4])
+    @pytest.mark.parametrize("moe_dropless_dispatcher", [False, True])
+    @pytest.mark.parametrize("moe_dispatcher_type", ["alltoall", "deepep", "hybridep"])
+    def test_moe_partial_cudagraph(self, ep_size, moe_dropless_dispatcher, moe_dispatcher_type):
+        extra_kwargs = {}
+        if moe_dispatcher_type == "deepep":
+            if not is_deep_ep_available():
+                pytest.skip("Deep EP is not available")
+            extra_kwargs["moe_token_dispatcher_type"] = "flex"
+            extra_kwargs["moe_flex_dispatcher_backend"] = "deepep"
+        elif moe_dispatcher_type == "hybridep":
+            if not is_hybrid_ep_available():
+                pytest.skip("Hybrid EP is not available")
+            extra_kwargs["moe_token_dispatcher_type"] = "flex"
+            extra_kwargs["moe_flex_dispatcher_backend"] = "hybridep"
+        else:
+            extra_kwargs["moe_token_dispatcher_type"] = moe_dispatcher_type
+        if not moe_dropless_dispatcher:
+            if moe_dispatcher_type == "deepep":
+                pytest.skip("Deep EP doesn't support drop&pad MoE")
+            extra_kwargs["moe_expert_capacity_factor"] = 1.0
+            extra_kwargs["moe_pad_expert_input_to_capacity"] = True
+
+        loss_list_ref = self._run_test_helper(ep_size, "none", None, 0, **extra_kwargs)
+        for cuda_graph_scope in [
+            None,
+            ["attn"],
+            ["moe"],
+            ["mlp", "moe_router"],
+            ["attn", "mlp", "moe_router", "moe_preprocess"],
+        ]:
+            if moe_dropless_dispatcher and (cuda_graph_scope is None or "moe" in cuda_graph_scope):
+                # Dropless MoE doesn't work with "moe" scope cudagraph. Skip.
+                continue
+            cuda_graph_warmup_steps = 3
+            loss_list = self._run_test_helper(
+                ep_size,
+                "transformer_engine",
+                cuda_graph_scope,
+                cuda_graph_warmup_steps,
+                **extra_kwargs,
+            )
+            assert torch.equal(loss_list, loss_list_ref)
+
+
 if __name__ == "__main__":
 
     test = TestParallelTransformerBlockCudagraphs()
@@ -729,3 +990,8 @@ def test_capture_freeze_gc(self):
 
     test = TestCaptureFreezeGC()
     test.test_capture_freeze_gc()
+
+    test = TestPartialCudaGraph()
+    test.setup_method(method=None)
+    test.test_moe_partial_cudagraph(4, True, "alltoall")
+    test.teardown_method(method=None)
diff --git a/tools/checkpoint/checkpoint_inspector.py b/tools/checkpoint/checkpoint_inspector.py
index c62f0ca7417..3d03f4db959 100644
--- a/tools/checkpoint/checkpoint_inspector.py
+++ b/tools/checkpoint/checkpoint_inspector.py
@@ -1,3 +1,5 @@
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
 # python checkpoint_inspector.py inspect /path/to/checkpoint
 # torchrun --nproc_per_node=8 --nnodes=1 checkpoint_inspector.py convert-torch-dist-to-fsdp-dtensor /path/to/input_checkpoint /path/to/output_checkpoint --swiglu
 import gc

From 44539d16fe154fb97431d95e28dfc7d4de0f3296 Mon Sep 17 00:00:00 2001
From: Charlie Truong <chtruong@nvidia.com>
Date: Fri, 31 Oct 2025 08:46:12 -0500
Subject: [PATCH 092/334] Add mirror to main workflow (#2042)

Signed-off-by: Charlie Truong <chtruong@nvidia.com>
---
 .github/workflows/mirror-to-main.yml | 129 +++++++++++++++++++++++++++
 1 file changed, 129 insertions(+)
 create mode 100644 .github/workflows/mirror-to-main.yml

diff --git a/.github/workflows/mirror-to-main.yml b/.github/workflows/mirror-to-main.yml
new file mode 100644
index 00000000000..cb77851942b
--- /dev/null
+++ b/.github/workflows/mirror-to-main.yml
@@ -0,0 +1,129 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+name: Mirror Dev to Main
+
+on:
+  push:
+    branches:
+      - "pull-request/[0-9]+"
+
+jobs:
+  cherry-pick-to-main:
+    runs-on: ubuntu-latest
+    permissions:
+      contents: write
+      pull-requests: write
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          token: ${{ secrets.PAT }}
+
+      - name: Get PR info
+        id: get-pr-info
+        uses: nv-gha-runners/get-pr-info@main
+
+      - name: Configure Git
+        run: |
+          git config --global user.email "github-actions[bot]@users.noreply.github.com"
+          git config --global user.name "GitHub Actions Bot"
+
+      - name: Cherry-pick to main
+        env:
+          GH_TOKEN: ${{ secrets.PAT }}
+        run: |
+          set -x
+
+          PR_NUMBER=${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number }}
+          BASE_REF="${{ fromJSON(steps.get-pr-info.outputs.pr-info).base.ref }}"
+          HAS_MIRROR_MAIN_LABEL=$(gh pr view $PR_NUMBER --json labels | jq '[.labels[].name] | any(. == "mirror-to-main")' || echo "false")
+          TARGET_BRANCH="cherry-pick-$PR_NUMBER-into-main"
+
+          # Skip if not labeled with mirror-to-main
+          if [ "$HAS_MIRROR_MAIN_LABEL" != "true" ]; then
+            echo "PR is not labeled with mirror-to-main, will not mirror to main."
+            exit 0
+          fi
+
+          # Skip if not targeting dev
+          if [ "$BASE_REF" != "dev" ]; then
+            echo "PR is not targeting dev, will not mirror to main."
+            exit 0
+          fi
+
+          # Check if target branch already exists
+          if git ls-remote --heads origin "refs/heads/$TARGET_BRANCH" | grep -q .; then
+            echo "Target branch already exists, will not cherry-pick again."
+            exit 0
+          fi
+
+          # Get PR details
+          PR_AUTHOR="${{ fromJSON(steps.get-pr-info.outputs.pr-info).user.login }}"
+          PR_TITLE="${{ fromJSON(steps.get-pr-info.outputs.pr-info).title }}"
+          SOURCE_BRANCH="${{ fromJSON(steps.get-pr-info.outputs.pr-info).head.ref }}"
+          SOURCE_REPO="${{ fromJSON(steps.get-pr-info.outputs.pr-info).head.repo.full_name }}"
+
+          # Fetch all branches
+          git fetch origin dev
+
+          # Handle forks vs same repo
+          if [ "$SOURCE_REPO" = "${{ github.repository }}" ]; then
+            git fetch origin "$SOURCE_BRANCH"
+            git checkout "$SOURCE_BRANCH"
+          else
+            git fetch "https://github.com/$SOURCE_REPO.git" "$SOURCE_BRANCH"
+            git checkout FETCH_HEAD
+          fi
+
+          # Find commit range to cherry-pick
+          START_COMMIT=$(git merge-base origin/dev HEAD)
+          END_COMMIT=$(git rev-parse HEAD)
+
+          # Create cherry-pick branch from main
+          git fetch origin main
+          git checkout main
+          git checkout -b "$TARGET_BRANCH"
+
+          # Cherry-pick commits
+          if ! git cherry-pick "$START_COMMIT..$END_COMMIT"; then
+            # Comment on the original PR about the failure
+            COMMENT_BODY=$(cat <<'EOF'
+          ❌ **Cherry-pick to main failed**
+
+          The cherry-pick encountered conflicts and could not be completed automatically.
+
+          **Next steps:**
+          1. Manually create a PR with these changes to main
+          2. Resolve any conflicts
+          EOF
+            )
+
+            gh pr comment $PR_NUMBER --body "$COMMENT_BODY"
+            exit 1
+          fi
+
+          # Push branch
+          git push -u origin "$TARGET_BRANCH"
+
+          # Create PR to main
+          gh pr create \
+            --base main \
+            --head "$TARGET_BRANCH" \
+            --title "cp: \`$PR_TITLE ($PR_NUMBER)\` into \`main\`" \
+            --body "[🤖]: Hi @$PR_AUTHOR 👋<br><br>We've cherry-picked \`$PR_TITLE (#$PR_NUMBER)\` into \`main\` for you! 🚀<br><br>Please review and approve this cherry-pick at your convenience!" \
+            --label "cherry-pick" \
+            --reviewer "$PR_AUTHOR"
+

From 41c6e66a64492c52901250ae04d1e5220c0121fe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Sat, 1 Nov 2025 00:18:48 +0100
Subject: [PATCH 093/334] ci: Remove cluster specific golden values (#2069)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
Co-authored-by: helen ngo <helenn@nvidia.com>
---
 .../test_inference_regular_pipeline.py        |     8 +-
 .../test_pretraining_regular_pipeline.py      |     8 +-
 ..._pretraining_resume_checkpoint_pipeline.py |     6 +-
 .../shell_test_utils/start_interactive_job.sh |    13 +-
 .../golden_values_dev_dgx_h100.json           |   100 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   287 -
 .../golden_values_dev_dgxh100_eos.json        |   287 -
 .../golden_values_dev_dgx_h100.json           |   100 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   287 -
 .../golden_values_dev_dgxh100_eos.json        |   287 -
 .../golden_values_dev_dgx_h100.json           |   190 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   287 -
 .../golden_values_dev_dgxh100_eos.json        |   287 -
 .../golden_values_dev_dgx_h100.json           |   100 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   287 -
 .../golden_values_dev_dgxh100_eos.json        |   287 -
 .../golden_values_dev_dgx_h100.json           |   288 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   537 -
 .../golden_values_dev_dgxh100_eos.json        |   537 -
 .../golden_values_dev_dgx_h100.json           |   200 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   537 -
 .../golden_values_dev_dgxh100_eos.json        |   537 -
 .../golden_values_dev_dgx_h100.json           |   100 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   287 -
 .../golden_values_dev_dgxh100_eos.json        |   287 -
 .../golden_values_dev_dgx_h100.json           |   163 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   162 -
 .../golden_values_dev_dgxh100_eos.json        |   162 -
 .../golden_values_dev_dgx_h100.json           |   163 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   162 -
 .../golden_values_dev_dgxh100_eos.json        |   162 -
 .../golden_values_dev_dgx_h100.json           |   100 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   287 -
 .../golden_values_dev_dgxh100_eos.json        |   287 -
 .../golden_values_dev_dgx_h100.json           |   200 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   537 -
 .../golden_values_dev_dgxh100_eos.json        |   537 -
 .../golden_values_dev_dgx_h100.json           |   200 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   537 -
 .../golden_values_dev_dgxh100_eos.json        |   537 -
 .../golden_values_dev_dgx_h100.json           |   198 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   537 -
 .../golden_values_dev_dgxh100_eos.json        |   537 -
 .../golden_values_dev_dgx_h100.json           |   198 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   537 -
 .../golden_values_dev_dgxh100_eos.json        |   537 -
 .../golden_values_dev_dgx_h100.json           |   200 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   537 -
 .../golden_values_dev_dgxh100_eos.json        |   537 -
 .../golden_values_dev_dgx_h100.json           |   200 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   537 -
 .../golden_values_dev_dgxh100_eos.json        |   537 -
 .../golden_values_dev_dgx_h100.json           |   200 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   537 -
 .../golden_values_dev_dgxh100_eos.json        |   537 -
 .../golden_values_dev_dgx_h100.json           |   200 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   537 -
 .../golden_values_dev_dgxh100_eos.json        |   537 -
 .../golden_values_dev_dgx_h100.json           |   100 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   287 -
 .../golden_values_dev_dgxh100_eos.json        |   287 -
 .../golden_values_dev_dgx_h100.json           |   100 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   287 -
 .../golden_values_dev_dgxh100_eos.json        |   287 -
 .../golden_values_dev_dgx_h100.json           |   198 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   537 -
 .../golden_values_dev_dgxh100_eos.json        |   537 -
 .../golden_values_dev_dgx_h100.json           |   200 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   537 -
 .../golden_values_dev_dgxh100_eos.json        |   537 -
 .../golden_values_dev_dgx_h100.json           |   200 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   537 -
 .../golden_values_dev_dgxh100_eos.json        |   537 -
 .../golden_values_dev_dgx_h100.json           |   300 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   287 -
 .../golden_values_dev_dgxh100_eos.json        |   287 -
 .../golden_values_dev_dgx_h100.json           |   100 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   287 -
 .../golden_values_dev_dgxh100_eos.json        |   287 -
 .../golden_values_dev_dgx_h100.json           |   100 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   287 -
 .../golden_values_dev_dgxh100_eos.json        |   287 -
 .../golden_values_dev_dgx_h100.json           |   976 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   537 -
 .../golden_values_dev_dgxh100_eos.json        |   537 -
 .../golden_values_dev_dgx_h100.json           |   198 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   537 -
 .../golden_values_dev_dgxh100_eos.json        |   537 -
 .../golden_values_dev_dgx_h100.json           |   102 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   287 -
 .../golden_values_dev_dgxh100_eos.json        |   287 -
 .../golden_values_dev_dgx_h100.json           |   102 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   287 -
 .../golden_values_dev_dgxh100_eos.json        |   287 -
 .../golden_values_dev_dgx_h100.json           |   336 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   287 -
 .../golden_values_dev_dgxh100_eos.json        |   287 -
 .../golden_values_dev_dgx_h100.json           |   102 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   287 -
 .../golden_values_dev_dgxh100_eos.json        |   287 -
 .../golden_values_dev_dgx_h100.json           |   280 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   287 -
 .../golden_values_dev_dgxh100_eos.json        |   287 -
 .../golden_values_dev_dgx_h100.json           |   138 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   287 -
 .../golden_values_dev_dgxh100_eos.json        |   287 -
 .../golden_values_dev_dgx_h100.json           |   282 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   287 -
 .../golden_values_dev_dgxh100_eos.json        |   287 -
 .../golden_values_dev_dgx_h100.json           |   382 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   287 -
 .../golden_values_dev_dgxh100_eos.json        |   287 -
 .../golden_values_dev_dgx_h100.json           |   100 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   287 -
 .../golden_values_dev_dgxh100_eos.json        |   287 -
 .../golden_values_dev_dgx_h100.json           |   100 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   287 -
 .../golden_values_dev_dgxh100_eos.json        |   287 -
 .../golden_values_dev_dgx_h100.json           |   200 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   537 -
 .../golden_values_dev_dgxh100_eos.json        |   537 -
 .../golden_values_dev_dgx_h100.json           |   558 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   537 -
 .../golden_values_dev_dgxh100_eos.json        |   537 -
 .../golden_values_dev_dgx_h100.json           |   236 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   537 -
 .../golden_values_dev_dgxh100_eos.json        |   537 -
 .../golden_values_dev_dgx_h100.json           |   200 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   537 -
 .../golden_values_dev_dgxh100_eos.json        |   537 -
 .../golden_values_dev_dgx_h100.json           |   200 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   537 -
 .../golden_values_dev_dgxh100_eos.json        |   537 -
 .../golden_values_dev_dgx_h100.json           |   200 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   537 -
 .../golden_values_dev_dgxh100_eos.json        |   537 -
 .../golden_values_dev_dgx_h100.json           |   200 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   537 -
 .../golden_values_dev_dgxh100_eos.json        |   537 -
 .../golden_values_dev_dgx_h100.json           |   100 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   287 -
 .../golden_values_dev_dgxh100_eos.json        |   287 -
 .../golden_values_dev_dgx_h100.json           |   200 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   537 -
 .../golden_values_dev_dgxh100_eos.json        |   537 -
 .../golden_values_dev_dgx_h100.json           |   200 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   537 -
 .../golden_values_dev_dgxh100_eos.json        |   537 -
 .../golden_values_dev_dgx_h100.json           |   200 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   537 -
 .../golden_values_dev_dgxh100_eos.json        |   537 -
 .../golden_values_dev_dgx_h100.json           |   200 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   537 -
 .../golden_values_dev_dgxh100_eos.json        |   537 -
 .../golden_values_dev_dgx_h100.json           |   200 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   537 -
 .../golden_values_dev_dgxh100_eos.json        |   537 -
 .../golden_values_dev_dgx_h100.json           |   100 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   287 -
 .../golden_values_dev_dgxh100_eos.json        |   287 -
 .../golden_values_dev_dgx_h100.json           |   100 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   287 -
 .../golden_values_dev_dgxh100_eos.json        |   287 -
 .../golden_values_dev_dgx_h100.json           |   198 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   537 -
 .../golden_values_dev_dgxh100_eos.json        |   537 -
 .../golden_values_dev_dgx_h100.json           |   200 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   537 -
 .../golden_values_dev_dgxh100_eos.json        |   537 -
 .../golden_values_dev_dgxh100_coreweave.json  | 10037 ----------------
 .../golden_values_dev_dgxh100_coreweave.json  | 10037 ----------------
 .../golden_values_dev_dgx_h100.json           |    13 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   178 -
 .../golden_values_dev_dgxh100_eos.json        |   178 -
 .../golden_values_dev_dgx_h100.json           |   333 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   178 -
 .../golden_values_dev_dgxh100_eos.json        |   178 -
 .../golden_values_dev_dgx_h100.json           |    16 +-
 .../golden_values_dev_dgxh100_coreweave.json  |  2699 -----
 .../golden_values_dev_dgxh100_eos.json        |  2699 -----
 .../golden_values_dev_dgx_h100.json           |   316 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   161 -
 .../golden_values_dev_dgxh100_eos.json        |   161 -
 .../golden_values_dev_dgx_h100.json           |    11 +-
 .../golden_values_dev_dgxh100_coreweave.json  |     1 -
 .../golden_values_dev_dgxh100_eos.json        |     1 -
 .../golden_values_dev_dgx_h100.json           |     2 +-
 .../golden_values_dev_dgxh100_coreweave.json  |     1 -
 .../golden_values_dev_dgxh100_eos.json        |     1 -
 .../golden_values_dev_dgx_h100.json           |     2 +-
 .../golden_values_dev_dgxh100_coreweave.json  |     1 -
 .../golden_values_dev_dgxh100_eos.json        |     1 -
 .../golden_values_dev_dgx_h100.json           |     2 +-
 .../golden_values_dev_dgxh100_coreweave.json  |     1 -
 .../golden_values_dev_dgxh100_eos.json        |     1 -
 .../golden_values_dev_dgx_h100.json           |   292 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   287 -
 .../golden_values_dev_dgxh100_eos.json        |   287 -
 .../golden_values_dev_dgx_h100.json           |   290 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   287 -
 .../golden_values_dev_dgxh100_eos.json        |   287 -
 .../golden_values_dev_dgx_h100.json           |   290 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   287 -
 .../golden_values_dev_dgxh100_eos.json        |   287 -
 .../golden_values_dev_dgx_h100.json           |     2 +-
 .../golden_values_dev_dgxh100_coreweave.json  |     1 -
 .../golden_values_dev_dgxh100_eos.json        |     1 -
 .../golden_values_dev_dgx_h100.json           |     2 +-
 .../golden_values_dev_dgxh100_coreweave.json  |     1 -
 .../golden_values_dev_dgxh100_eos.json        |     1 -
 .../golden_values_dev_dgx_h100.json           |   490 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   287 -
 .../golden_values_dev_dgxh100_eos.json        |   287 -
 .../golden_values_dev_dgx_h100.json           |   440 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   287 -
 .../golden_values_dev_dgxh100_eos.json        |   287 -
 .../golden_values_dev_dgx_h100.json           |   916 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   537 -
 .../golden_values_dev_dgxh100_eos.json        |   537 -
 .../golden_values_dev_dgx_h100.json           |   200 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   537 -
 .../golden_values_dev_dgxh100_eos.json        |   537 -
 .../golden_values_dev_dgx_h100.json           |   200 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   537 -
 .../golden_values_dev_dgxh100_eos.json        |   537 -
 .../golden_values_dev_dgx_h100.json           |    98 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   287 -
 .../golden_values_dev_dgxh100_eos.json        |   287 -
 .../golden_values_dev_dgx_h100.json           |   100 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   287 -
 .../golden_values_dev_dgxh100_eos.json        |   287 -
 .../golden_values_dev_dgx_h100.json           |   298 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   287 -
 .../golden_values_dev_dgxh100_eos.json        |   287 -
 .../golden_values_dev_dgx_h100.json           |   376 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   344 -
 .../golden_values_dev_dgxh100_eos.json        |   344 -
 .../golden_values_dev_dgx_h100.json           |   378 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   344 -
 .../golden_values_dev_dgxh100_eos.json        |   344 -
 .../golden_values_dev_dgx_h100.json           |   315 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   287 -
 .../golden_values_dev_dgxh100_eos.json        |   287 -
 .../golden_values_dev_dgx_h100.json           |   531 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   537 -
 .../golden_values_dev_dgxh100_eos.json        |   537 -
 .../golden_values_dev_dgx_h100.json           |   100 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   287 -
 .../golden_values_dev_dgxh100_eos.json        |   287 -
 .../golden_values_dev_dgx_h100.json           |   100 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   287 -
 .../golden_values_dev_dgxh100_eos.json        |   287 -
 .../golden_values_dev_dgx_h100.json           |   100 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   287 -
 .../golden_values_dev_dgxh100_eos.json        |   287 -
 .../golden_values_dev_dgx_h100.json           |   100 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   287 -
 .../golden_values_dev_dgxh100_eos.json        |   287 -
 .../golden_values_dev_dgx_h100.json           |   100 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   287 -
 .../golden_values_dev_dgxh100_eos.json        |   287 -
 .../golden_values_dev_dgx_h100.json           |   600 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   537 -
 .../golden_values_dev_dgxh100_eos.json        |   537 -
 ...s.json => golden_values_dev_dgx_h100.json} |     0
 .../golden_values_dev_dgxh100_coreweave.json  |   344 -
 .../model_config.yaml                         |     2 +-
 ...s.json => golden_values_dev_dgx_h100.json} |     0
 .../golden_values_dev_dgxh100_coreweave.json  |   287 -
 .../model_config.yaml                         |     2 +-
 .../golden_values_dev_dgx_h100.json           |   784 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   537 -
 .../golden_values_dev_dgxh100_eos.json        |   537 -
 .../model_config.yaml                         |     2 +-
 ...e.json => golden_values_dev_dgx_h100.json} |     0
 .../golden_values_dev_dgxh100_eos.json        |   178 -
 .../golden_values_dev_dgx_h100.json           |     4 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   161 -
 .../golden_values_dev_dgxh100_eos.json        |   161 -
 ...e.json => golden_values_dev_dgx_h100.json} |     0
 .../golden_values_dev_dgxh100_eos.json        |     1 -
 .../golden_values_dev_dgx_h100.json           |     2 +-
 .../golden_values_dev_dgxh100_coreweave.json  |     1 -
 .../golden_values_dev_dgxh100_eos.json        |     1 -
 .../golden_values_dev_dgx_h100.json           |     2 +-
 .../golden_values_dev_dgxh100_coreweave.json  |     1 -
 .../golden_values_dev_dgxh100_eos.json        |     1 -
 .../golden_values_dev_dgx_h100.json           |   288 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   287 -
 .../golden_values_dev_dgxh100_eos.json        |   287 -
 .../golden_values_dev_dgx_h100.json           |   100 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   287 -
 .../golden_values_dev_dgxh100_eos.json        |   287 -
 .../golden_values_dev_dgx_h100.json           |    50 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   162 -
 .../golden_values_dev_dgxh100_eos.json        |   162 -
 .../golden_values_dev_dgx_h100.json           |   200 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   537 -
 .../golden_values_dev_dgxh100_eos.json        |   537 -
 .../golden_values_dev_dgx_h100.json           |   200 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   537 -
 .../golden_values_dev_dgxh100_eos.json        |   537 -
 .../golden_values_dev_dgx_h100.json           |   200 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   537 -
 .../golden_values_dev_dgxh100_eos.json        |   537 -
 .../golden_values_dev_dgx_h100.json           |   198 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   537 -
 .../golden_values_dev_dgxh100_eos.json        |   537 -
 .../golden_values_dev_dgx_h100.json           |   200 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   537 -
 .../golden_values_dev_dgxh100_eos.json        |   537 -
 .../golden_values_dev_dgx_h100.json           |   200 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   537 -
 .../golden_values_dev_dgxh100_eos.json        |   537 -
 .../golden_values_dev_dgx_h100.json           |   200 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   537 -
 .../golden_values_dev_dgxh100_eos.json        |   537 -
 .../golden_values_dev_dgx_h100.json           |   200 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   537 -
 .../golden_values_dev_dgxh100_eos.json        |   537 -
 .../golden_values_dev_dgx_h100.json           |   200 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   537 -
 .../golden_values_dev_dgxh100_eos.json        |   537 -
 .../golden_values_dev_dgx_h100.json           |   200 +-
 .../golden_values_dev_dgxh100_coreweave.json  |   537 -
 .../golden_values_dev_dgxh100_eos.json        |   537 -
 .../python_scripts/generate_local_jobs.py     |     9 +-
 ...pt-dynamic-inference-with-coordinator.yaml |     2 +-
 .../recipes/gpt-dynamic-inference.yaml        |     4 +-
 tests/test_utils/recipes/gpt-grads.yaml       |     2 +-
 tests/test_utils/recipes/gpt-nemo.yaml        |     2 +-
 .../recipes/gpt-static-inference.yaml         |     4 +-
 tests/test_utils/recipes/gpt.yaml             |     2 +-
 .../recipes/mamba-static-inference.yaml       |     4 +-
 tests/test_utils/recipes/mamba.yaml           |     2 +-
 tests/test_utils/recipes/mimo.yaml            |     2 +-
 .../recipes/moe-dynamic-inference.yaml        |     6 +-
 .../recipes/moe-static-inference.yaml         |     4 +-
 tests/test_utils/recipes/moe.yaml             |     2 +-
 .../test_utils/recipes/multimodal-llava.yaml  |     2 +-
 340 files changed, 11493 insertions(+), 111116 deletions(-)
 delete mode 100644 tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp2/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp2/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp4_vp2/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp4_vp2/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_local_spec/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_local_spec/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist_local_spec/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist_local_spec/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/bert/bert_mcore_tp4_pp1/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/bert/bert_mcore_tp4_pp1/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_7b_tp1_pp4_memory_speed/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_7b_tp1_pp4_memory_speed/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_7b_tp4_pp1_memory_speed/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_7b_tp4_pp1_memory_speed/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_dp_last/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_dp_last/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_nondeterministic/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_nondeterministic/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mla/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mla/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp4_cp2_native_fp8_tp_sp_cp_tp_overlap/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_logitsmatch_decode_graphs_only/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_logitsmatch_decode_graphs_only/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_logitsmatch/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_logitsmatch/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_583m_logitsmatch/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_583m_logitsmatch/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_16b_multiprompt_tokensmatch/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_16b_multiprompt_tokensmatch/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_cudagraphs/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_cudagraphs/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_fp8_cudagraphs/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_fp8_cudagraphs/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_logitsmatch/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_logitsmatch/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp1_cp1_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp1_cp1_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp1_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp1_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp4_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp4_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/hybrid/hybrid_static_inference_tp1_pp1_2B_cudagraphs/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/hybrid/hybrid_static_inference_tp1_pp1_2B_cudagraphs/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/hybrid/hybrid_static_inference_tp1_pp1_2B_logitsmatch/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/hybrid/hybrid_static_inference_tp1_pp1_2B_logitsmatch/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_eos.json
 rename tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/{golden_values_dev_dgxh100_eos.json => golden_values_dev_dgx_h100.json} (100%)
 delete mode 100644 tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_dgxh100_coreweave.json
 rename tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/{golden_values_dev_dgxh100_eos.json => golden_values_dev_dgx_h100.json} (100%)
 delete mode 100644 tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_eos.json
 rename tests/functional_tests/test_cases/moe/gpt_dynamic_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch/{golden_values_dev_dgxh100_coreweave.json => golden_values_dev_dgx_h100.json} (100%)
 delete mode 100644 tests/functional_tests/test_cases/moe/gpt_dynamic_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/moe/gpt_dynamic_inference_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/moe/gpt_dynamic_inference_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgxh100_eos.json
 rename tests/functional_tests/test_cases/moe/gpt_static_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch/{golden_values_dev_dgxh100_coreweave.json => golden_values_dev_dgx_h100.json} (100%)
 delete mode 100644 tests/functional_tests/test_cases/moe/gpt_static_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/moe/gpt_static_inference_tp1_pp1_ep1_16B_logitsmatch/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/moe/gpt_static_inference_tp1_pp1_ep1_16B_logitsmatch/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/moe/gpt_static_inference_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/moe/gpt_static_inference_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp1_pp1/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp1_pp1/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp4_sp_cp2/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp4_sp_cp2/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/t5/t5_11b_mcore_tp4_pp1/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/t5/t5_11b_mcore_tp4_pp1/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1_resume_torch_dist/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1_resume_torch_dist/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/t5/t5_mcore_tp2_pp1_vp1/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/t5/t5_mcore_tp2_pp1_vp1/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1/golden_values_dev_dgxh100_eos.json
 delete mode 100644 tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgxh100_coreweave.json
 delete mode 100644 tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgxh100_eos.json

diff --git a/tests/functional_tests/python_test_utils/test_inference_regular_pipeline.py b/tests/functional_tests/python_test_utils/test_inference_regular_pipeline.py
index c153e3769e5..9a0758d3ba1 100644
--- a/tests/functional_tests/python_test_utils/test_inference_regular_pipeline.py
+++ b/tests/functional_tests/python_test_utils/test_inference_regular_pipeline.py
@@ -1,3 +1,5 @@
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
 import json
 import logging
 import math
@@ -39,10 +41,10 @@ def test_inference_pipeline(golden_values_path: str, test_values_path: str) -> N
         # First warmup iteration is excluded from throughput statistics.
         throughput_sampled = median(output_current["throughput"][1:])
 
-        # 5 token/seconds is empirically observed to be within run variance.
+        # 10% is empirically observed to be within hardware variance.
         assert (
-            throughput_sampled >= output_groundtruth["throughput"] - 5.0
-        ), f"Throughput is slower than expected! Expected ~{output_groundtruth['throughput']} tok/s but benchmarked {output_current['throughput']} tok/s"
+            throughput_sampled >= 0.9 * output_groundtruth["throughput"]
+        ), f"Throughput is slower than expected! Expected to be within 10% of ~{output_groundtruth['throughput']} tok/s but benchmarked {output_current['throughput']} tok/s"
 
         # If throughput is significantly improved (> 20%), update golden values accordingly.
         assert (
diff --git a/tests/functional_tests/python_test_utils/test_pretraining_regular_pipeline.py b/tests/functional_tests/python_test_utils/test_pretraining_regular_pipeline.py
index db03d30f65a..58311542ee9 100644
--- a/tests/functional_tests/python_test_utils/test_pretraining_regular_pipeline.py
+++ b/tests/functional_tests/python_test_utils/test_pretraining_regular_pipeline.py
@@ -1,3 +1,5 @@
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
 import logging
 from typing import Dict, List, Optional
 
@@ -9,7 +11,7 @@
 logger = logging.getLogger(__name__)
 
 CHECK_THRESHOLDS = {
-    "iteration-time": [common.ApproximateTest(atol=0, rtol=0.25)],
+    "iteration-time": [common.ApproximateTest(atol=0, rtol=0.05)],
     "mem-allocated-bytes": [common.ApproximateTest(atol=0, rtol=0.05)],
     "mem-max-allocated-bytes": [common.ApproximateTest(atol=0, rtol=0.05)],
     "lm loss": [common.DeterministicTest(), common.ApproximateTest(atol=0, rtol=0.05)],
@@ -32,9 +34,7 @@ def test_regular_pipeline(
             model_config = yaml.safe_load(f)
 
         checks_types = (
-            model_config["METRICS"]
-            if "METRICS" in model_config
-            else ["iteration-time", "lm loss", "num-zeros"]
+            model_config["METRICS"] if "METRICS" in model_config else ["lm loss", "num-zeros"]
         )
         checks = {metric: CHECK_THRESHOLDS[metric] for metric in checks_types}
 
diff --git a/tests/functional_tests/python_test_utils/test_pretraining_resume_checkpoint_pipeline.py b/tests/functional_tests/python_test_utils/test_pretraining_resume_checkpoint_pipeline.py
index 64cbe0b9b5e..d5da2dea30c 100644
--- a/tests/functional_tests/python_test_utils/test_pretraining_resume_checkpoint_pipeline.py
+++ b/tests/functional_tests/python_test_utils/test_pretraining_resume_checkpoint_pipeline.py
@@ -1,3 +1,5 @@
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
 import logging
 from typing import Dict
 
@@ -20,9 +22,7 @@ def test_resume_checkpoint_pipeline(
         model_config = yaml.safe_load(f)
 
     checks_types = (
-        model_config["METRICS"]
-        if "METRICS" in model_config
-        else ["iteration-time", "lm loss", "num-zeros"]
+        model_config["METRICS"] if "METRICS" in model_config else ["lm loss", "num-zeros"]
     )
     checks = {
         metric: test_pretraining_regular_pipeline.CHECK_THRESHOLDS[metric]
diff --git a/tests/functional_tests/shell_test_utils/start_interactive_job.sh b/tests/functional_tests/shell_test_utils/start_interactive_job.sh
index 0b30fc01283..13067e7c0ea 100644
--- a/tests/functional_tests/shell_test_utils/start_interactive_job.sh
+++ b/tests/functional_tests/shell_test_utils/start_interactive_job.sh
@@ -26,6 +26,7 @@ DATASET_DIR=""
 TIME="1:00:00"
 RECIPES_DIR="tests/test_utils/recipes"
 CONTAINER_MOUNTS=""
+NO_GPUS_PER_TASK="FALSE"
 
 # Declare associative array for tracking unique mounts
 declare -A seen_mounts
@@ -53,6 +54,10 @@ while [[ $# -gt 0 ]]; do
         TIME="$2"
         shift 2
         ;;
+    --no-gpus-per-task)
+        NO_GPUS_PER_TASK="TRUE"
+        shift 1
+        ;;
     --help)
         print_usage
         exit 0
@@ -65,12 +70,6 @@ while [[ $# -gt 0 ]]; do
     esac
 done
 
-# Check if yq is installed
-if ! command -v yq &>/dev/null; then
-    echo "Error: yq is not installed. Please install it first."
-    exit 1
-fi
-
 # Validate required arguments
 if [ -z "$PARTITION" ] || [ -z "$SLURM_ACCOUNT" ] || [ -z "$IMAGE" ] || [ -z "$DATASET_DIR" ]; then
     echo "Error: Missing required arguments"
@@ -89,7 +88,7 @@ SRUN_CMD="srun \
     --container-workdir=/opt/megatron-lm \
     --container-mounts=$CONTAINER_MOUNTS \
     --nodes=1 \
-    --gpus-per-task=8 \
+    $(if [ "$NO_GPUS_PER_TASK" = "FALSE" ]; then echo "--gpus-per-task=8"; fi) \
     --time=$TIME \
     --pty bash"
 
diff --git a/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp2/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp2/golden_values_dev_dgx_h100.json
index 2219c242a8b..df02cb774f4 100644
--- a/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp2/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp2/golden_values_dev_dgx_h100.json
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 10.41177,
-            "2": 0.63219,
-            "3": 0.53615,
-            "4": 0.53244,
-            "5": 0.53041,
-            "6": 0.53364,
-            "7": 0.53797,
-            "8": 0.52807,
-            "9": 0.53172,
-            "10": 0.53116,
-            "11": 0.52906,
-            "12": 0.53113,
-            "13": 0.52796,
-            "14": 0.52974,
-            "15": 0.52875,
-            "16": 0.52005,
-            "17": 0.51948,
-            "18": 0.52008,
-            "19": 0.52456,
-            "20": 0.52593,
-            "21": 0.52988,
-            "22": 0.52281,
-            "23": 0.51971,
-            "24": 0.52235,
-            "25": 0.54145,
-            "26": 0.52876,
-            "27": 0.51926,
-            "28": 0.51381,
-            "29": 0.51526,
-            "30": 0.51632,
-            "31": 0.52532,
-            "32": 0.61496,
-            "33": 0.59949,
-            "34": 0.52069,
-            "35": 0.52649,
-            "36": 0.66485,
-            "37": 0.52497,
-            "38": 0.52464,
-            "39": 0.76801,
-            "40": 0.52465,
-            "41": 0.69091,
-            "42": 0.74369,
-            "43": 0.5242,
-            "44": 0.75825,
-            "45": 0.68331,
-            "46": 0.75831,
-            "47": 0.51724,
-            "48": 0.51305,
-            "49": 0.51686,
-            "50": 0.52176
+            "1": 10.44279,
+            "2": 0.55345,
+            "3": 0.53909,
+            "4": 0.52187,
+            "5": 0.52958,
+            "6": 0.5241,
+            "7": 0.5353,
+            "8": 0.51946,
+            "9": 0.52732,
+            "10": 0.52759,
+            "11": 0.51849,
+            "12": 0.52326,
+            "13": 0.52472,
+            "14": 0.52577,
+            "15": 0.51817,
+            "16": 0.51922,
+            "17": 0.51686,
+            "18": 0.5248,
+            "19": 0.51945,
+            "20": 0.74697,
+            "21": 0.51544,
+            "22": 0.52412,
+            "23": 0.66206,
+            "24": 0.51781,
+            "25": 0.52429,
+            "26": 0.52068,
+            "27": 0.62432,
+            "28": 0.52016,
+            "29": 0.52217,
+            "30": 0.51949,
+            "31": 0.69033,
+            "32": 0.52127,
+            "33": 0.52602,
+            "34": 0.6403,
+            "35": 0.51723,
+            "36": 0.52445,
+            "37": 0.51746,
+            "38": 0.52296,
+            "39": 0.52159,
+            "40": 0.6718,
+            "41": 0.58171,
+            "42": 0.7393,
+            "43": 0.54277,
+            "44": 0.81615,
+            "45": 0.52284,
+            "46": 0.71947,
+            "47": 0.52219,
+            "48": 0.51866,
+            "49": 0.51764,
+            "50": 0.51841
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp2/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp2/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 8ff12f47d08..00000000000
--- a/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp2/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.47723,
-            "2": 10.47576,
-            "3": 10.46809,
-            "4": 10.47326,
-            "5": 10.47148,
-            "6": 10.46049,
-            "7": 10.46357,
-            "8": 10.47334,
-            "9": 10.48063,
-            "10": 10.46319,
-            "11": 10.47102,
-            "12": 10.45502,
-            "13": 10.44665,
-            "14": 10.451,
-            "15": 10.48846,
-            "16": 10.4509,
-            "17": 10.44648,
-            "18": 10.44272,
-            "19": 10.43057,
-            "20": 10.44534,
-            "21": 10.41778,
-            "22": 10.38667,
-            "23": 10.39322,
-            "24": 10.37847,
-            "25": 10.35474,
-            "26": 10.35955,
-            "27": 10.34527,
-            "28": 10.33539,
-            "29": 10.25416,
-            "30": 10.23011,
-            "31": 10.14092,
-            "32": 10.13601,
-            "33": 10.13944,
-            "34": 10.11377,
-            "35": 10.0888,
-            "36": 10.09247,
-            "37": 10.06836,
-            "38": 10.04664,
-            "39": 9.97584,
-            "40": 9.93781,
-            "41": 9.90867,
-            "42": 9.84873,
-            "43": 9.8577,
-            "44": 9.79259,
-            "45": 9.8035,
-            "46": 9.7029,
-            "47": 9.73432,
-            "48": 9.70106,
-            "49": 9.69981,
-            "50": 9.70258
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 2137.0,
-            "2": 1618.0,
-            "3": 1561.0,
-            "4": 1871.0,
-            "5": 1983.0,
-            "6": 1565.0,
-            "7": 2779.0,
-            "8": 2108.0,
-            "9": 2008.0,
-            "10": 2086.0,
-            "11": 2534.0,
-            "12": 1686.0,
-            "13": 2120.0,
-            "14": 2814.0,
-            "15": 1735.0,
-            "16": 2535.0,
-            "17": 2409.0,
-            "18": 2345.0,
-            "19": 2374.0,
-            "20": 2739.0,
-            "21": 2030.0,
-            "22": 2819.0,
-            "23": 2763.0,
-            "24": 2731.0,
-            "25": 2429.0,
-            "26": 2817.0,
-            "27": 2944.0,
-            "28": 2741.0,
-            "29": 2639.0,
-            "30": 2723.0,
-            "31": 2158.0,
-            "32": 2242.0,
-            "33": 2046.0,
-            "34": 2139.0,
-            "35": 2492.0,
-            "36": 2641.0,
-            "37": 2853.0,
-            "38": 2705.0,
-            "39": 2807.0,
-            "40": 3333.0,
-            "41": 1762.0,
-            "42": 1410.0,
-            "43": 1558.0,
-            "44": 2384.0,
-            "45": 3170.0,
-            "46": 2664.0,
-            "47": 2641.0,
-            "48": 3490.0,
-            "49": 2928.0,
-            "50": 2487.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 3404871168.0,
-            "2": 3404871168.0,
-            "3": 3404871168.0,
-            "4": 3404871168.0,
-            "5": 3404871168.0,
-            "6": 3404871168.0,
-            "7": 3404871168.0,
-            "8": 3404871168.0,
-            "9": 3404871168.0,
-            "10": 3404871168.0,
-            "11": 3404871168.0,
-            "12": 3404871168.0,
-            "13": 3404871168.0,
-            "14": 3404871168.0,
-            "15": 3404871168.0,
-            "16": 3404871168.0,
-            "17": 3404871168.0,
-            "18": 3404871168.0,
-            "19": 3404871168.0,
-            "20": 3404871168.0,
-            "21": 3404871168.0,
-            "22": 3404871168.0,
-            "23": 3404871168.0,
-            "24": 3404871168.0,
-            "25": 3404871168.0,
-            "26": 3404871168.0,
-            "27": 3404871168.0,
-            "28": 3404871168.0,
-            "29": 3404871168.0,
-            "30": 3404871168.0,
-            "31": 3404871168.0,
-            "32": 3404871168.0,
-            "33": 3404871168.0,
-            "34": 3404871168.0,
-            "35": 3404871168.0,
-            "36": 3404871168.0,
-            "37": 3404871168.0,
-            "38": 3404871168.0,
-            "39": 3404871168.0,
-            "40": 3404871168.0,
-            "41": 3404871168.0,
-            "42": 3404871168.0,
-            "43": 3404871168.0,
-            "44": 3404871168.0,
-            "45": 3404871168.0,
-            "46": 3404871168.0,
-            "47": 3404871168.0,
-            "48": 3404871168.0,
-            "49": 3404871168.0,
-            "50": 3404871168.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 4194526208.0,
-            "2": 5660965888.0,
-            "3": 5660965888.0,
-            "4": 5660965888.0,
-            "5": 5660965888.0,
-            "6": 5660965888.0,
-            "7": 5660965888.0,
-            "8": 5660965888.0,
-            "9": 5660965888.0,
-            "10": 5660965888.0,
-            "11": 5660965888.0,
-            "12": 5660965888.0,
-            "13": 5660965888.0,
-            "14": 5660965888.0,
-            "15": 5660965888.0,
-            "16": 5660965888.0,
-            "17": 5660965888.0,
-            "18": 5660965888.0,
-            "19": 5660965888.0,
-            "20": 5660965888.0,
-            "21": 5660965888.0,
-            "22": 5660965888.0,
-            "23": 5660965888.0,
-            "24": 5660965888.0,
-            "25": 5660965888.0,
-            "26": 5660965888.0,
-            "27": 5660965888.0,
-            "28": 5660965888.0,
-            "29": 5660965888.0,
-            "30": 5660965888.0,
-            "31": 5660965888.0,
-            "32": 5660965888.0,
-            "33": 5660965888.0,
-            "34": 5660965888.0,
-            "35": 5660965888.0,
-            "36": 5660965888.0,
-            "37": 5660965888.0,
-            "38": 5660965888.0,
-            "39": 5660965888.0,
-            "40": 5660965888.0,
-            "41": 5660965888.0,
-            "42": 5660965888.0,
-            "43": 5660965888.0,
-            "44": 5660965888.0,
-            "45": 5660965888.0,
-            "46": 5660965888.0,
-            "47": 5660965888.0,
-            "48": 5660965888.0,
-            "49": 5660965888.0,
-            "50": 5660965888.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 11.13654,
-            "2": 0.5493,
-            "3": 0.46515,
-            "4": 0.45431,
-            "5": 0.46032,
-            "6": 0.45814,
-            "7": 0.45793,
-            "8": 0.46137,
-            "9": 0.46682,
-            "10": 0.46519,
-            "11": 0.46206,
-            "12": 0.46526,
-            "13": 0.46309,
-            "14": 0.46231,
-            "15": 0.47151,
-            "16": 0.4581,
-            "17": 0.4833,
-            "18": 0.47393,
-            "19": 0.48513,
-            "20": 0.47017,
-            "21": 0.47471,
-            "22": 0.46394,
-            "23": 0.46475,
-            "24": 0.46879,
-            "25": 0.46294,
-            "26": 0.46242,
-            "27": 0.4645,
-            "28": 0.4715,
-            "29": 0.46842,
-            "30": 0.46401,
-            "31": 0.96127,
-            "32": 0.4785,
-            "33": 0.62004,
-            "34": 0.4827,
-            "35": 0.47953,
-            "36": 0.48459,
-            "37": 0.48738,
-            "38": 0.49573,
-            "39": 0.58967,
-            "40": 0.79369,
-            "41": 0.46618,
-            "42": 0.72243,
-            "43": 0.63291,
-            "44": 0.62301,
-            "45": 0.68335,
-            "46": 0.48579,
-            "47": 0.46817,
-            "48": 0.46582,
-            "49": 0.46457,
-            "50": 0.46777
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp2/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp2/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index df02cb774f4..00000000000
--- a/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp2/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.47723,
-            "2": 10.47576,
-            "3": 10.46809,
-            "4": 10.47326,
-            "5": 10.47148,
-            "6": 10.46049,
-            "7": 10.46357,
-            "8": 10.47334,
-            "9": 10.48063,
-            "10": 10.46319,
-            "11": 10.47102,
-            "12": 10.45502,
-            "13": 10.44665,
-            "14": 10.451,
-            "15": 10.48846,
-            "16": 10.4509,
-            "17": 10.44648,
-            "18": 10.44272,
-            "19": 10.43057,
-            "20": 10.44534,
-            "21": 10.41778,
-            "22": 10.38667,
-            "23": 10.39322,
-            "24": 10.37847,
-            "25": 10.35474,
-            "26": 10.35955,
-            "27": 10.34527,
-            "28": 10.33539,
-            "29": 10.25416,
-            "30": 10.23011,
-            "31": 10.14092,
-            "32": 10.13601,
-            "33": 10.13944,
-            "34": 10.11377,
-            "35": 10.0888,
-            "36": 10.09247,
-            "37": 10.06836,
-            "38": 10.04664,
-            "39": 9.97584,
-            "40": 9.93781,
-            "41": 9.90867,
-            "42": 9.84873,
-            "43": 9.8577,
-            "44": 9.79259,
-            "45": 9.8035,
-            "46": 9.7029,
-            "47": 9.73432,
-            "48": 9.70106,
-            "49": 9.69981,
-            "50": 9.70258
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 2137.0,
-            "2": 1618.0,
-            "3": 1561.0,
-            "4": 1871.0,
-            "5": 1983.0,
-            "6": 1565.0,
-            "7": 2779.0,
-            "8": 2108.0,
-            "9": 2008.0,
-            "10": 2086.0,
-            "11": 2534.0,
-            "12": 1686.0,
-            "13": 2120.0,
-            "14": 2814.0,
-            "15": 1735.0,
-            "16": 2535.0,
-            "17": 2409.0,
-            "18": 2345.0,
-            "19": 2374.0,
-            "20": 2739.0,
-            "21": 2030.0,
-            "22": 2819.0,
-            "23": 2763.0,
-            "24": 2731.0,
-            "25": 2429.0,
-            "26": 2817.0,
-            "27": 2944.0,
-            "28": 2741.0,
-            "29": 2639.0,
-            "30": 2723.0,
-            "31": 2158.0,
-            "32": 2242.0,
-            "33": 2046.0,
-            "34": 2139.0,
-            "35": 2492.0,
-            "36": 2641.0,
-            "37": 2853.0,
-            "38": 2705.0,
-            "39": 2807.0,
-            "40": 3333.0,
-            "41": 1762.0,
-            "42": 1410.0,
-            "43": 1558.0,
-            "44": 2384.0,
-            "45": 3170.0,
-            "46": 2664.0,
-            "47": 2641.0,
-            "48": 3490.0,
-            "49": 2928.0,
-            "50": 2487.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 3404871168.0,
-            "2": 3404871168.0,
-            "3": 3404871168.0,
-            "4": 3404871168.0,
-            "5": 3404871168.0,
-            "6": 3404871168.0,
-            "7": 3404871168.0,
-            "8": 3404871168.0,
-            "9": 3404871168.0,
-            "10": 3404871168.0,
-            "11": 3404871168.0,
-            "12": 3404871168.0,
-            "13": 3404871168.0,
-            "14": 3404871168.0,
-            "15": 3404871168.0,
-            "16": 3404871168.0,
-            "17": 3404871168.0,
-            "18": 3404871168.0,
-            "19": 3404871168.0,
-            "20": 3404871168.0,
-            "21": 3404871168.0,
-            "22": 3404871168.0,
-            "23": 3404871168.0,
-            "24": 3404871168.0,
-            "25": 3404871168.0,
-            "26": 3404871168.0,
-            "27": 3404871168.0,
-            "28": 3404871168.0,
-            "29": 3404871168.0,
-            "30": 3404871168.0,
-            "31": 3404871168.0,
-            "32": 3404871168.0,
-            "33": 3404871168.0,
-            "34": 3404871168.0,
-            "35": 3404871168.0,
-            "36": 3404871168.0,
-            "37": 3404871168.0,
-            "38": 3404871168.0,
-            "39": 3404871168.0,
-            "40": 3404871168.0,
-            "41": 3404871168.0,
-            "42": 3404871168.0,
-            "43": 3404871168.0,
-            "44": 3404871168.0,
-            "45": 3404871168.0,
-            "46": 3404871168.0,
-            "47": 3404871168.0,
-            "48": 3404871168.0,
-            "49": 3404871168.0,
-            "50": 3404871168.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 4194526208.0,
-            "2": 5660965888.0,
-            "3": 5660965888.0,
-            "4": 5660965888.0,
-            "5": 5660965888.0,
-            "6": 5660965888.0,
-            "7": 5660965888.0,
-            "8": 5660965888.0,
-            "9": 5660965888.0,
-            "10": 5660965888.0,
-            "11": 5660965888.0,
-            "12": 5660965888.0,
-            "13": 5660965888.0,
-            "14": 5660965888.0,
-            "15": 5660965888.0,
-            "16": 5660965888.0,
-            "17": 5660965888.0,
-            "18": 5660965888.0,
-            "19": 5660965888.0,
-            "20": 5660965888.0,
-            "21": 5660965888.0,
-            "22": 5660965888.0,
-            "23": 5660965888.0,
-            "24": 5660965888.0,
-            "25": 5660965888.0,
-            "26": 5660965888.0,
-            "27": 5660965888.0,
-            "28": 5660965888.0,
-            "29": 5660965888.0,
-            "30": 5660965888.0,
-            "31": 5660965888.0,
-            "32": 5660965888.0,
-            "33": 5660965888.0,
-            "34": 5660965888.0,
-            "35": 5660965888.0,
-            "36": 5660965888.0,
-            "37": 5660965888.0,
-            "38": 5660965888.0,
-            "39": 5660965888.0,
-            "40": 5660965888.0,
-            "41": 5660965888.0,
-            "42": 5660965888.0,
-            "43": 5660965888.0,
-            "44": 5660965888.0,
-            "45": 5660965888.0,
-            "46": 5660965888.0,
-            "47": 5660965888.0,
-            "48": 5660965888.0,
-            "49": 5660965888.0,
-            "50": 5660965888.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.44279,
-            "2": 0.55345,
-            "3": 0.53909,
-            "4": 0.52187,
-            "5": 0.52958,
-            "6": 0.5241,
-            "7": 0.5353,
-            "8": 0.51946,
-            "9": 0.52732,
-            "10": 0.52759,
-            "11": 0.51849,
-            "12": 0.52326,
-            "13": 0.52472,
-            "14": 0.52577,
-            "15": 0.51817,
-            "16": 0.51922,
-            "17": 0.51686,
-            "18": 0.5248,
-            "19": 0.51945,
-            "20": 0.74697,
-            "21": 0.51544,
-            "22": 0.52412,
-            "23": 0.66206,
-            "24": 0.51781,
-            "25": 0.52429,
-            "26": 0.52068,
-            "27": 0.62432,
-            "28": 0.52016,
-            "29": 0.52217,
-            "30": 0.51949,
-            "31": 0.69033,
-            "32": 0.52127,
-            "33": 0.52602,
-            "34": 0.6403,
-            "35": 0.51723,
-            "36": 0.52445,
-            "37": 0.51746,
-            "38": 0.52296,
-            "39": 0.52159,
-            "40": 0.6718,
-            "41": 0.58171,
-            "42": 0.7393,
-            "43": 0.54277,
-            "44": 0.81615,
-            "45": 0.52284,
-            "46": 0.71947,
-            "47": 0.52219,
-            "48": 0.51866,
-            "49": 0.51764,
-            "50": 0.51841
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp4_vp2/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp4_vp2/golden_values_dev_dgx_h100.json
index edd42f32479..0d85e13b23b 100644
--- a/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp4_vp2/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp4_vp2/golden_values_dev_dgx_h100.json
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 14.52125,
-            "2": 0.80201,
-            "3": 0.7469,
-            "4": 0.73694,
-            "5": 0.7315,
-            "6": 0.74178,
-            "7": 0.74868,
-            "8": 0.76041,
-            "9": 0.73349,
-            "10": 0.73103,
-            "11": 0.72627,
-            "12": 1.24485,
-            "13": 0.92369,
-            "14": 0.9992,
-            "15": 0.71522,
-            "16": 0.72059,
-            "17": 0.70821,
-            "18": 0.72513,
-            "19": 0.92847,
-            "20": 1.55552,
-            "21": 1.65501,
-            "22": 1.61714,
-            "23": 1.01208,
-            "24": 0.97003,
-            "25": 0.73922,
-            "26": 0.76213,
-            "27": 0.71228,
-            "28": 0.74068,
-            "29": 0.70429,
-            "30": 0.73547,
-            "31": 0.73693,
-            "32": 0.72401,
-            "33": 0.73688,
-            "34": 0.73718,
-            "35": 0.70434,
-            "36": 0.71346,
-            "37": 0.71973,
-            "38": 0.70358,
-            "39": 1.01971,
-            "40": 0.72495,
-            "41": 1.04905,
-            "42": 0.71671,
-            "43": 0.89934,
-            "44": 0.71242,
-            "45": 0.70583,
-            "46": 0.69596,
-            "47": 1.2374,
-            "48": 1.16,
-            "49": 1.08122,
-            "50": 1.48874
+            "1": 14.48983,
+            "2": 0.782,
+            "3": 0.71913,
+            "4": 0.71541,
+            "5": 0.71528,
+            "6": 0.7219,
+            "7": 0.72729,
+            "8": 0.72714,
+            "9": 0.7634,
+            "10": 0.71523,
+            "11": 0.72303,
+            "12": 1.34179,
+            "13": 0.93338,
+            "14": 0.72484,
+            "15": 0.70784,
+            "16": 0.72443,
+            "17": 0.72151,
+            "18": 0.71102,
+            "19": 1.13624,
+            "20": 1.56469,
+            "21": 1.66622,
+            "22": 0.9574,
+            "23": 0.69921,
+            "24": 0.70477,
+            "25": 0.73932,
+            "26": 0.74798,
+            "27": 0.72633,
+            "28": 0.72782,
+            "29": 0.73646,
+            "30": 0.73665,
+            "31": 0.74301,
+            "32": 0.73363,
+            "33": 0.71952,
+            "34": 0.7406,
+            "35": 0.71103,
+            "36": 0.70026,
+            "37": 0.71087,
+            "38": 0.88272,
+            "39": 0.71279,
+            "40": 0.92123,
+            "41": 1.20193,
+            "42": 0.72924,
+            "43": 0.70749,
+            "44": 0.72158,
+            "45": 0.71169,
+            "46": 1.23637,
+            "47": 1.13432,
+            "48": 1.26896,
+            "49": 1.13682,
+            "50": 1.21366
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp4_vp2/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp4_vp2/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index b825cf8964e..00000000000
--- a/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp4_vp2/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.55236,
-            "2": 10.52891,
-            "3": 10.55085,
-            "4": 10.55035,
-            "5": 10.52311,
-            "6": 10.53328,
-            "7": 10.53097,
-            "8": 10.54323,
-            "9": 10.54514,
-            "10": 10.53676,
-            "11": 10.53791,
-            "12": 10.54319,
-            "13": 10.5263,
-            "14": 10.5316,
-            "15": 10.52714,
-            "16": 10.50594,
-            "17": 10.5009,
-            "18": 10.51023,
-            "19": 10.493,
-            "20": 10.48862,
-            "21": 10.47473,
-            "22": 10.42799,
-            "23": 10.42684,
-            "24": 10.4036,
-            "25": 10.39991,
-            "26": 10.38461,
-            "27": 10.38216,
-            "28": 10.36877,
-            "29": 10.32192,
-            "30": 10.2204,
-            "31": 10.17094,
-            "32": 10.12605,
-            "33": 10.10628,
-            "34": 10.09438,
-            "35": 10.07042,
-            "36": 10.07481,
-            "37": 10.03644,
-            "38": 10.01812,
-            "39": 9.96852,
-            "40": 9.93082,
-            "41": 9.87316,
-            "42": 9.81842,
-            "43": 9.8156,
-            "44": 9.73841,
-            "45": 9.7628,
-            "46": 9.67691,
-            "47": 9.68688,
-            "48": 9.66292,
-            "49": 9.67587,
-            "50": 9.67446
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 2320.0,
-            "2": 2645.0,
-            "3": 2441.0,
-            "4": 2417.0,
-            "5": 2730.0,
-            "6": 2332.0,
-            "7": 1661.0,
-            "8": 2386.0,
-            "9": 2256.0,
-            "10": 2428.0,
-            "11": 2152.0,
-            "12": 2337.0,
-            "13": 2643.0,
-            "14": 2209.0,
-            "15": 2607.0,
-            "16": 2411.0,
-            "17": 2529.0,
-            "18": 2418.0,
-            "19": 2363.0,
-            "20": 2323.0,
-            "21": 2401.0,
-            "22": 2588.0,
-            "23": 2338.0,
-            "24": 2305.0,
-            "25": 2702.0,
-            "26": 2370.0,
-            "27": 2462.0,
-            "28": 2407.0,
-            "29": 2240.0,
-            "30": 2850.0,
-            "31": 2882.0,
-            "32": 2837.0,
-            "33": 2645.0,
-            "34": 2874.0,
-            "35": 2913.0,
-            "36": 3000.0,
-            "37": 3122.0,
-            "38": 2680.0,
-            "39": 2216.0,
-            "40": 2211.0,
-            "41": 3456.0,
-            "42": 3624.0,
-            "43": 3364.0,
-            "44": 4026.0,
-            "45": 4145.0,
-            "46": 2924.0,
-            "47": 1942.0,
-            "48": 3363.0,
-            "49": 3532.0,
-            "50": 3710.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 2061524480.0,
-            "2": 2061524480.0,
-            "3": 2061524480.0,
-            "4": 2061524480.0,
-            "5": 2061524480.0,
-            "6": 2061524480.0,
-            "7": 2061524480.0,
-            "8": 2061524480.0,
-            "9": 2061524480.0,
-            "10": 2061524480.0,
-            "11": 2061524480.0,
-            "12": 2061524480.0,
-            "13": 2061524480.0,
-            "14": 2061524480.0,
-            "15": 2061524480.0,
-            "16": 2061524480.0,
-            "17": 2061524480.0,
-            "18": 2061524480.0,
-            "19": 2061524480.0,
-            "20": 2061524480.0,
-            "21": 2061524480.0,
-            "22": 2061524480.0,
-            "23": 2061524480.0,
-            "24": 2061524480.0,
-            "25": 2061524480.0,
-            "26": 2061524480.0,
-            "27": 2061524480.0,
-            "28": 2061524480.0,
-            "29": 2061524480.0,
-            "30": 2061524480.0,
-            "31": 2061524480.0,
-            "32": 2061524480.0,
-            "33": 2061524480.0,
-            "34": 2061524480.0,
-            "35": 2061524480.0,
-            "36": 2061524480.0,
-            "37": 2061524480.0,
-            "38": 2061524480.0,
-            "39": 2061524480.0,
-            "40": 2061524480.0,
-            "41": 2061524480.0,
-            "42": 2061524480.0,
-            "43": 2061524480.0,
-            "44": 2061524480.0,
-            "45": 2061524480.0,
-            "46": 2061524480.0,
-            "47": 2061524480.0,
-            "48": 2061524480.0,
-            "49": 2061524480.0,
-            "50": 2061524480.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 4385424896.0,
-            "2": 5245672960.0,
-            "3": 5245672960.0,
-            "4": 5245672960.0,
-            "5": 5245672960.0,
-            "6": 5245672960.0,
-            "7": 5245672960.0,
-            "8": 5245672960.0,
-            "9": 5245672960.0,
-            "10": 5245672960.0,
-            "11": 5245672960.0,
-            "12": 5245672960.0,
-            "13": 5245672960.0,
-            "14": 5245672960.0,
-            "15": 5245672960.0,
-            "16": 5245672960.0,
-            "17": 5245672960.0,
-            "18": 5245672960.0,
-            "19": 5245672960.0,
-            "20": 5245672960.0,
-            "21": 5245672960.0,
-            "22": 5245672960.0,
-            "23": 5245672960.0,
-            "24": 5245672960.0,
-            "25": 5245672960.0,
-            "26": 5245672960.0,
-            "27": 5245672960.0,
-            "28": 5245672960.0,
-            "29": 5245672960.0,
-            "30": 5245672960.0,
-            "31": 5245672960.0,
-            "32": 5245672960.0,
-            "33": 5245672960.0,
-            "34": 5245672960.0,
-            "35": 5245672960.0,
-            "36": 5245672960.0,
-            "37": 5245672960.0,
-            "38": 5245672960.0,
-            "39": 5245672960.0,
-            "40": 5245672960.0,
-            "41": 5245672960.0,
-            "42": 5245672960.0,
-            "43": 5245672960.0,
-            "44": 5245672960.0,
-            "45": 5245672960.0,
-            "46": 5245672960.0,
-            "47": 5245672960.0,
-            "48": 5245672960.0,
-            "49": 5245672960.0,
-            "50": 5245672960.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 16.04066,
-            "2": 0.7032,
-            "3": 0.64317,
-            "4": 0.64902,
-            "5": 0.64969,
-            "6": 0.63112,
-            "7": 0.65022,
-            "8": 0.64825,
-            "9": 0.6561,
-            "10": 0.65389,
-            "11": 0.63629,
-            "12": 0.61059,
-            "13": 0.61378,
-            "14": 0.63387,
-            "15": 0.63512,
-            "16": 0.67245,
-            "17": 1.84585,
-            "18": 0.92074,
-            "19": 0.88511,
-            "20": 1.52328,
-            "21": 1.57421,
-            "22": 1.42349,
-            "23": 0.90417,
-            "24": 0.62214,
-            "25": 0.61751,
-            "26": 0.62328,
-            "27": 0.63404,
-            "28": 0.64274,
-            "29": 0.61224,
-            "30": 0.6522,
-            "31": 0.65622,
-            "32": 0.64451,
-            "33": 0.65916,
-            "34": 0.67975,
-            "35": 0.63318,
-            "36": 0.63519,
-            "37": 0.62099,
-            "38": 0.63824,
-            "39": 0.65345,
-            "40": 0.63256,
-            "41": 0.64564,
-            "42": 0.61807,
-            "43": 0.84645,
-            "44": 0.85427,
-            "45": 0.85855,
-            "46": 0.97022,
-            "47": 1.2994,
-            "48": 1.26968,
-            "49": 1.21118,
-            "50": 1.43722
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp4_vp2/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp4_vp2/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index 0d85e13b23b..00000000000
--- a/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp4_vp2/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.55236,
-            "2": 10.52891,
-            "3": 10.55085,
-            "4": 10.55035,
-            "5": 10.52311,
-            "6": 10.53328,
-            "7": 10.53097,
-            "8": 10.54323,
-            "9": 10.54514,
-            "10": 10.53676,
-            "11": 10.53791,
-            "12": 10.54319,
-            "13": 10.5263,
-            "14": 10.5316,
-            "15": 10.52714,
-            "16": 10.50594,
-            "17": 10.5009,
-            "18": 10.51023,
-            "19": 10.493,
-            "20": 10.48862,
-            "21": 10.47473,
-            "22": 10.42799,
-            "23": 10.42684,
-            "24": 10.4036,
-            "25": 10.39991,
-            "26": 10.38461,
-            "27": 10.38216,
-            "28": 10.36877,
-            "29": 10.32192,
-            "30": 10.2204,
-            "31": 10.17094,
-            "32": 10.12605,
-            "33": 10.10628,
-            "34": 10.09438,
-            "35": 10.07042,
-            "36": 10.07481,
-            "37": 10.03644,
-            "38": 10.01812,
-            "39": 9.96852,
-            "40": 9.93082,
-            "41": 9.87316,
-            "42": 9.81842,
-            "43": 9.8156,
-            "44": 9.73841,
-            "45": 9.7628,
-            "46": 9.67691,
-            "47": 9.68688,
-            "48": 9.66292,
-            "49": 9.67587,
-            "50": 9.67446
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 2320.0,
-            "2": 2645.0,
-            "3": 2441.0,
-            "4": 2417.0,
-            "5": 2730.0,
-            "6": 2332.0,
-            "7": 1661.0,
-            "8": 2386.0,
-            "9": 2256.0,
-            "10": 2428.0,
-            "11": 2152.0,
-            "12": 2337.0,
-            "13": 2643.0,
-            "14": 2209.0,
-            "15": 2607.0,
-            "16": 2411.0,
-            "17": 2529.0,
-            "18": 2418.0,
-            "19": 2363.0,
-            "20": 2323.0,
-            "21": 2401.0,
-            "22": 2588.0,
-            "23": 2338.0,
-            "24": 2305.0,
-            "25": 2702.0,
-            "26": 2370.0,
-            "27": 2462.0,
-            "28": 2407.0,
-            "29": 2240.0,
-            "30": 2850.0,
-            "31": 2882.0,
-            "32": 2837.0,
-            "33": 2645.0,
-            "34": 2874.0,
-            "35": 2913.0,
-            "36": 3000.0,
-            "37": 3122.0,
-            "38": 2680.0,
-            "39": 2216.0,
-            "40": 2211.0,
-            "41": 3456.0,
-            "42": 3624.0,
-            "43": 3364.0,
-            "44": 4026.0,
-            "45": 4145.0,
-            "46": 2924.0,
-            "47": 1942.0,
-            "48": 3363.0,
-            "49": 3532.0,
-            "50": 3710.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 2061524480.0,
-            "2": 2061524480.0,
-            "3": 2061524480.0,
-            "4": 2061524480.0,
-            "5": 2061524480.0,
-            "6": 2061524480.0,
-            "7": 2061524480.0,
-            "8": 2061524480.0,
-            "9": 2061524480.0,
-            "10": 2061524480.0,
-            "11": 2061524480.0,
-            "12": 2061524480.0,
-            "13": 2061524480.0,
-            "14": 2061524480.0,
-            "15": 2061524480.0,
-            "16": 2061524480.0,
-            "17": 2061524480.0,
-            "18": 2061524480.0,
-            "19": 2061524480.0,
-            "20": 2061524480.0,
-            "21": 2061524480.0,
-            "22": 2061524480.0,
-            "23": 2061524480.0,
-            "24": 2061524480.0,
-            "25": 2061524480.0,
-            "26": 2061524480.0,
-            "27": 2061524480.0,
-            "28": 2061524480.0,
-            "29": 2061524480.0,
-            "30": 2061524480.0,
-            "31": 2061524480.0,
-            "32": 2061524480.0,
-            "33": 2061524480.0,
-            "34": 2061524480.0,
-            "35": 2061524480.0,
-            "36": 2061524480.0,
-            "37": 2061524480.0,
-            "38": 2061524480.0,
-            "39": 2061524480.0,
-            "40": 2061524480.0,
-            "41": 2061524480.0,
-            "42": 2061524480.0,
-            "43": 2061524480.0,
-            "44": 2061524480.0,
-            "45": 2061524480.0,
-            "46": 2061524480.0,
-            "47": 2061524480.0,
-            "48": 2061524480.0,
-            "49": 2061524480.0,
-            "50": 2061524480.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 4385424896.0,
-            "2": 5245672960.0,
-            "3": 5245672960.0,
-            "4": 5245672960.0,
-            "5": 5245672960.0,
-            "6": 5245672960.0,
-            "7": 5245672960.0,
-            "8": 5245672960.0,
-            "9": 5245672960.0,
-            "10": 5245672960.0,
-            "11": 5245672960.0,
-            "12": 5245672960.0,
-            "13": 5245672960.0,
-            "14": 5245672960.0,
-            "15": 5245672960.0,
-            "16": 5245672960.0,
-            "17": 5245672960.0,
-            "18": 5245672960.0,
-            "19": 5245672960.0,
-            "20": 5245672960.0,
-            "21": 5245672960.0,
-            "22": 5245672960.0,
-            "23": 5245672960.0,
-            "24": 5245672960.0,
-            "25": 5245672960.0,
-            "26": 5245672960.0,
-            "27": 5245672960.0,
-            "28": 5245672960.0,
-            "29": 5245672960.0,
-            "30": 5245672960.0,
-            "31": 5245672960.0,
-            "32": 5245672960.0,
-            "33": 5245672960.0,
-            "34": 5245672960.0,
-            "35": 5245672960.0,
-            "36": 5245672960.0,
-            "37": 5245672960.0,
-            "38": 5245672960.0,
-            "39": 5245672960.0,
-            "40": 5245672960.0,
-            "41": 5245672960.0,
-            "42": 5245672960.0,
-            "43": 5245672960.0,
-            "44": 5245672960.0,
-            "45": 5245672960.0,
-            "46": 5245672960.0,
-            "47": 5245672960.0,
-            "48": 5245672960.0,
-            "49": 5245672960.0,
-            "50": 5245672960.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 14.48983,
-            "2": 0.782,
-            "3": 0.71913,
-            "4": 0.71541,
-            "5": 0.71528,
-            "6": 0.7219,
-            "7": 0.72729,
-            "8": 0.72714,
-            "9": 0.7634,
-            "10": 0.71523,
-            "11": 0.72303,
-            "12": 1.34179,
-            "13": 0.93338,
-            "14": 0.72484,
-            "15": 0.70784,
-            "16": 0.72443,
-            "17": 0.72151,
-            "18": 0.71102,
-            "19": 1.13624,
-            "20": 1.56469,
-            "21": 1.66622,
-            "22": 0.9574,
-            "23": 0.69921,
-            "24": 0.70477,
-            "25": 0.73932,
-            "26": 0.74798,
-            "27": 0.72633,
-            "28": 0.72782,
-            "29": 0.73646,
-            "30": 0.73665,
-            "31": 0.74301,
-            "32": 0.73363,
-            "33": 0.71952,
-            "34": 0.7406,
-            "35": 0.71103,
-            "36": 0.70026,
-            "37": 0.71087,
-            "38": 0.88272,
-            "39": 0.71279,
-            "40": 0.92123,
-            "41": 1.20193,
-            "42": 0.72924,
-            "43": 0.70749,
-            "44": 0.72158,
-            "45": 0.71169,
-            "46": 1.23637,
-            "47": 1.13432,
-            "48": 1.26896,
-            "49": 1.13682,
-            "50": 1.21366
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2/golden_values_dev_dgx_h100.json
index a7cfd87bc71..1352649be85 100644
--- a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2/golden_values_dev_dgx_h100.json
@@ -180,51 +180,51 @@
             "3": 3108323328.0,
             "4": 3108323328.0,
             "5": 3108323328.0,
-            "6": 3108323328.0,
-            "7": 3108323328.0,
-            "8": 3108323328.0,
-            "9": 3108323328.0,
-            "10": 3108323328.0,
-            "11": 3108323328.0,
-            "12": 3108323328.0,
-            "13": 3108323328.0,
-            "14": 3108323328.0,
-            "15": 3108323328.0,
-            "16": 3108323328.0,
-            "17": 3108323328.0,
-            "18": 3108323328.0,
-            "19": 3108323328.0,
-            "20": 3108323328.0,
-            "21": 3108323328.0,
-            "22": 3108323328.0,
-            "23": 3108323328.0,
-            "24": 3108323328.0,
-            "25": 3108323328.0,
-            "26": 3108323328.0,
-            "27": 3108323328.0,
-            "28": 3108323328.0,
-            "29": 3108323328.0,
-            "30": 3108323328.0,
-            "31": 3108323328.0,
-            "32": 3108323328.0,
-            "33": 3108323328.0,
-            "34": 3108323328.0,
-            "35": 3108323328.0,
-            "36": 3108323328.0,
-            "37": 3108323328.0,
-            "38": 3108323328.0,
-            "39": 3108323328.0,
-            "40": 3108323328.0,
-            "41": 3108323328.0,
-            "42": 3108323328.0,
-            "43": 3108323328.0,
-            "44": 3108323328.0,
-            "45": 3108323328.0,
-            "46": 3108323328.0,
-            "47": 3108323328.0,
-            "48": 3108323328.0,
-            "49": 3108323328.0,
-            "50": 3108323328.0
+            "6": 3108842496.0,
+            "7": 3108842496.0,
+            "8": 3108842496.0,
+            "9": 3108842496.0,
+            "10": 3108842496.0,
+            "11": 3108842496.0,
+            "12": 3108842496.0,
+            "13": 3108842496.0,
+            "14": 3108842496.0,
+            "15": 3108842496.0,
+            "16": 3108842496.0,
+            "17": 3108842496.0,
+            "18": 3108842496.0,
+            "19": 3108842496.0,
+            "20": 3108842496.0,
+            "21": 3108842496.0,
+            "22": 3108842496.0,
+            "23": 3108842496.0,
+            "24": 3108842496.0,
+            "25": 3108842496.0,
+            "26": 3108842496.0,
+            "27": 3108842496.0,
+            "28": 3108842496.0,
+            "29": 3108842496.0,
+            "30": 3108842496.0,
+            "31": 3108842496.0,
+            "32": 3108842496.0,
+            "33": 3108842496.0,
+            "34": 3108842496.0,
+            "35": 3108842496.0,
+            "36": 3108842496.0,
+            "37": 3108842496.0,
+            "38": 3108842496.0,
+            "39": 3108842496.0,
+            "40": 3108842496.0,
+            "41": 3108842496.0,
+            "42": 3108842496.0,
+            "43": 3108842496.0,
+            "44": 3108842496.0,
+            "45": 3108842496.0,
+            "46": 3108842496.0,
+            "47": 3108842496.0,
+            "48": 3108842496.0,
+            "49": 3108842496.0,
+            "50": 3108842496.0
         }
     },
     "iteration-time": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 11.95325,
-            "2": 1.03495,
-            "3": 1.01983,
-            "4": 1.02247,
-            "5": 1.02376,
-            "6": 1.01057,
-            "7": 1.00305,
-            "8": 1.00511,
-            "9": 1.01164,
-            "10": 1.00809,
-            "11": 1.00401,
-            "12": 1.01195,
-            "13": 1.00522,
-            "14": 1.01037,
-            "15": 1.01016,
-            "16": 1.00481,
-            "17": 1.00787,
-            "18": 1.00866,
-            "19": 1.0117,
-            "20": 1.43302,
-            "21": 1.37362,
-            "22": 1.11681,
-            "23": 1.05672,
-            "24": 1.00983,
-            "25": 1.01065,
-            "26": 1.00572,
-            "27": 1.00992,
-            "28": 1.00576,
-            "29": 1.00599,
-            "30": 1.00468,
-            "31": 1.00657,
-            "32": 1.00207,
-            "33": 1.00815,
-            "34": 1.01333,
-            "35": 1.00888,
-            "36": 1.01481,
-            "37": 1.32861,
-            "38": 1.01215,
-            "39": 1.00755,
-            "40": 1.00235,
-            "41": 1.00954,
-            "42": 1.00544,
-            "43": 1.0136,
-            "44": 1.34075,
-            "45": 1.00937,
-            "46": 1.0108,
-            "47": 1.01217,
-            "48": 1.11889,
-            "49": 1.34225,
-            "50": 1.09191
+            "1": 11.98661,
+            "2": 1.05916,
+            "3": 1.01721,
+            "4": 1.02611,
+            "5": 1.02779,
+            "6": 1.11252,
+            "7": 1.0176,
+            "8": 1.02427,
+            "9": 1.02561,
+            "10": 1.01845,
+            "11": 1.02419,
+            "12": 1.01745,
+            "13": 1.01224,
+            "14": 1.02388,
+            "15": 1.03687,
+            "16": 1.01886,
+            "17": 1.01708,
+            "18": 1.01143,
+            "19": 1.01902,
+            "20": 1.49878,
+            "21": 1.47537,
+            "22": 1.01801,
+            "23": 1.05158,
+            "24": 1.03481,
+            "25": 1.01773,
+            "26": 1.01186,
+            "27": 1.02203,
+            "28": 1.01824,
+            "29": 1.01865,
+            "30": 1.02165,
+            "31": 1.0184,
+            "32": 1.02106,
+            "33": 1.04655,
+            "34": 1.03129,
+            "35": 1.01893,
+            "36": 1.02153,
+            "37": 1.02154,
+            "38": 1.0213,
+            "39": 1.14846,
+            "40": 1.02149,
+            "41": 1.01905,
+            "42": 1.02038,
+            "43": 1.03126,
+            "44": 1.04155,
+            "45": 1.01649,
+            "46": 1.01742,
+            "47": 1.02406,
+            "48": 1.27122,
+            "49": 1.15085,
+            "50": 1.10861
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 2e4f3c6e211..00000000000
--- a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.48367,
-            "2": 10.48426,
-            "3": 10.48254,
-            "4": 10.48311,
-            "5": 10.4764,
-            "6": 10.4844,
-            "7": 10.48458,
-            "8": 10.48829,
-            "9": 10.49008,
-            "10": 10.47268,
-            "11": 10.47256,
-            "12": 10.48259,
-            "13": 10.47857,
-            "14": 10.45154,
-            "15": 10.47925,
-            "16": 10.45346,
-            "17": 10.45145,
-            "18": 10.46238,
-            "19": 10.44113,
-            "20": 10.45448,
-            "21": 10.43454,
-            "22": 10.40592,
-            "23": 10.39961,
-            "24": 10.37579,
-            "25": 10.38182,
-            "26": 10.35147,
-            "27": 10.35388,
-            "28": 10.34937,
-            "29": 10.28711,
-            "30": 10.21159,
-            "31": 10.1726,
-            "32": 10.13421,
-            "33": 10.14744,
-            "34": 10.10737,
-            "35": 10.10581,
-            "36": 10.08735,
-            "37": 10.08157,
-            "38": 10.07233,
-            "39": 10.00094,
-            "40": 9.98143,
-            "41": 9.92541,
-            "42": 9.87527,
-            "43": 9.88711,
-            "44": 9.80642,
-            "45": 9.82325,
-            "46": 9.73785,
-            "47": 9.74817,
-            "48": 9.71609,
-            "49": 9.74484,
-            "50": 9.72982
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 2570.0,
-            "2": 1923.0,
-            "3": 1512.0,
-            "4": 2322.0,
-            "5": 2033.0,
-            "6": 1774.0,
-            "7": 2781.0,
-            "8": 2460.0,
-            "9": 2308.0,
-            "10": 2635.0,
-            "11": 2397.0,
-            "12": 1817.0,
-            "13": 2348.0,
-            "14": 2749.0,
-            "15": 2027.0,
-            "16": 2719.0,
-            "17": 2487.0,
-            "18": 2533.0,
-            "19": 2547.0,
-            "20": 2850.0,
-            "21": 1990.0,
-            "22": 2884.0,
-            "23": 2857.0,
-            "24": 2685.0,
-            "25": 2514.0,
-            "26": 2958.0,
-            "27": 2673.0,
-            "28": 2723.0,
-            "29": 2571.0,
-            "30": 2858.0,
-            "31": 2157.0,
-            "32": 2357.0,
-            "33": 2242.0,
-            "34": 2464.0,
-            "35": 2544.0,
-            "36": 2933.0,
-            "37": 3293.0,
-            "38": 2730.0,
-            "39": 2795.0,
-            "40": 3310.0,
-            "41": 1816.0,
-            "42": 1467.0,
-            "43": 1817.0,
-            "44": 2633.0,
-            "45": 3576.0,
-            "46": 3015.0,
-            "47": 2805.0,
-            "48": 3071.0,
-            "49": 2974.0,
-            "50": 2267.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1784014336.0,
-            "2": 1784014336.0,
-            "3": 1784014336.0,
-            "4": 1784014336.0,
-            "5": 1784014336.0,
-            "6": 1784014336.0,
-            "7": 1784014336.0,
-            "8": 1784014336.0,
-            "9": 1784014336.0,
-            "10": 1784014336.0,
-            "11": 1784014336.0,
-            "12": 1784014336.0,
-            "13": 1784014336.0,
-            "14": 1784014336.0,
-            "15": 1784014336.0,
-            "16": 1784014336.0,
-            "17": 1784014336.0,
-            "18": 1784014336.0,
-            "19": 1784014336.0,
-            "20": 1784014336.0,
-            "21": 1784014336.0,
-            "22": 1784014336.0,
-            "23": 1784014336.0,
-            "24": 1784014336.0,
-            "25": 1784014336.0,
-            "26": 1784014336.0,
-            "27": 1784014336.0,
-            "28": 1784014336.0,
-            "29": 1784014336.0,
-            "30": 1784014336.0,
-            "31": 1784014336.0,
-            "32": 1784014336.0,
-            "33": 1784014336.0,
-            "34": 1784014336.0,
-            "35": 1784014336.0,
-            "36": 1784014336.0,
-            "37": 1784014336.0,
-            "38": 1784014336.0,
-            "39": 1784014336.0,
-            "40": 1784014336.0,
-            "41": 1784014336.0,
-            "42": 1784014336.0,
-            "43": 1784014336.0,
-            "44": 1784014336.0,
-            "45": 1784014336.0,
-            "46": 1784014336.0,
-            "47": 1784014336.0,
-            "48": 1784014336.0,
-            "49": 1784014336.0,
-            "50": 1784014336.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 2365860864.0,
-            "2": 3108323328.0,
-            "3": 3108323328.0,
-            "4": 3108323328.0,
-            "5": 3108323328.0,
-            "6": 3108323328.0,
-            "7": 3108323328.0,
-            "8": 3108847104.0,
-            "9": 3108847104.0,
-            "10": 3108847104.0,
-            "11": 3108847104.0,
-            "12": 3108847104.0,
-            "13": 3108847104.0,
-            "14": 3108847104.0,
-            "15": 3108847104.0,
-            "16": 3108847104.0,
-            "17": 3108847104.0,
-            "18": 3108847104.0,
-            "19": 3108847104.0,
-            "20": 3108847104.0,
-            "21": 3108847104.0,
-            "22": 3108847104.0,
-            "23": 3108847104.0,
-            "24": 3108847104.0,
-            "25": 3108847104.0,
-            "26": 3108847104.0,
-            "27": 3108847104.0,
-            "28": 3108847104.0,
-            "29": 3108847104.0,
-            "30": 3108847104.0,
-            "31": 3108847104.0,
-            "32": 3108847104.0,
-            "33": 3108847104.0,
-            "34": 3108847104.0,
-            "35": 3108847104.0,
-            "36": 3108847104.0,
-            "37": 3108847104.0,
-            "38": 3108847104.0,
-            "39": 3108847104.0,
-            "40": 3108847104.0,
-            "41": 3108847104.0,
-            "42": 3108847104.0,
-            "43": 3108847104.0,
-            "44": 3108847104.0,
-            "45": 3108847104.0,
-            "46": 3108847104.0,
-            "47": 3108847104.0,
-            "48": 3108847104.0,
-            "49": 3108847104.0,
-            "50": 3108847104.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 12.28863,
-            "2": 1.02215,
-            "3": 0.91269,
-            "4": 0.90798,
-            "5": 0.9095,
-            "6": 0.89623,
-            "7": 0.91406,
-            "8": 0.93659,
-            "9": 0.98867,
-            "10": 0.97926,
-            "11": 0.92244,
-            "12": 0.93168,
-            "13": 0.91684,
-            "14": 0.92151,
-            "15": 0.90545,
-            "16": 0.92975,
-            "17": 0.9771,
-            "18": 0.91421,
-            "19": 0.91325,
-            "20": 1.37492,
-            "21": 1.35582,
-            "22": 0.90471,
-            "23": 0.90119,
-            "24": 0.9066,
-            "25": 0.89745,
-            "26": 0.90071,
-            "27": 0.90705,
-            "28": 0.91467,
-            "29": 0.90066,
-            "30": 0.94983,
-            "31": 0.9257,
-            "32": 0.92349,
-            "33": 0.92172,
-            "34": 0.93247,
-            "35": 0.91594,
-            "36": 0.9259,
-            "37": 0.91518,
-            "38": 0.91714,
-            "39": 0.91191,
-            "40": 0.91531,
-            "41": 0.91413,
-            "42": 0.92876,
-            "43": 0.95961,
-            "44": 0.90524,
-            "45": 0.89573,
-            "46": 0.90239,
-            "47": 0.89546,
-            "48": 1.05878,
-            "49": 1.18954,
-            "50": 1.15643
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index 1352649be85..00000000000
--- a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.48367,
-            "2": 10.48426,
-            "3": 10.48254,
-            "4": 10.48311,
-            "5": 10.4764,
-            "6": 10.4844,
-            "7": 10.48458,
-            "8": 10.48829,
-            "9": 10.49008,
-            "10": 10.47268,
-            "11": 10.47256,
-            "12": 10.48259,
-            "13": 10.47857,
-            "14": 10.45154,
-            "15": 10.47925,
-            "16": 10.45346,
-            "17": 10.45145,
-            "18": 10.46238,
-            "19": 10.44113,
-            "20": 10.45448,
-            "21": 10.43454,
-            "22": 10.40592,
-            "23": 10.39961,
-            "24": 10.37579,
-            "25": 10.38182,
-            "26": 10.35147,
-            "27": 10.35388,
-            "28": 10.34937,
-            "29": 10.28711,
-            "30": 10.21159,
-            "31": 10.1726,
-            "32": 10.13421,
-            "33": 10.14744,
-            "34": 10.10737,
-            "35": 10.10581,
-            "36": 10.08735,
-            "37": 10.08157,
-            "38": 10.07233,
-            "39": 10.00094,
-            "40": 9.98143,
-            "41": 9.92541,
-            "42": 9.87527,
-            "43": 9.88711,
-            "44": 9.80642,
-            "45": 9.82325,
-            "46": 9.73785,
-            "47": 9.74817,
-            "48": 9.71609,
-            "49": 9.74484,
-            "50": 9.72982
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 2570.0,
-            "2": 1923.0,
-            "3": 1512.0,
-            "4": 2322.0,
-            "5": 2033.0,
-            "6": 1774.0,
-            "7": 2781.0,
-            "8": 2460.0,
-            "9": 2308.0,
-            "10": 2635.0,
-            "11": 2397.0,
-            "12": 1817.0,
-            "13": 2348.0,
-            "14": 2749.0,
-            "15": 2027.0,
-            "16": 2719.0,
-            "17": 2487.0,
-            "18": 2533.0,
-            "19": 2547.0,
-            "20": 2850.0,
-            "21": 1990.0,
-            "22": 2884.0,
-            "23": 2857.0,
-            "24": 2685.0,
-            "25": 2514.0,
-            "26": 2958.0,
-            "27": 2673.0,
-            "28": 2723.0,
-            "29": 2571.0,
-            "30": 2858.0,
-            "31": 2157.0,
-            "32": 2357.0,
-            "33": 2242.0,
-            "34": 2464.0,
-            "35": 2544.0,
-            "36": 2933.0,
-            "37": 3293.0,
-            "38": 2730.0,
-            "39": 2795.0,
-            "40": 3310.0,
-            "41": 1816.0,
-            "42": 1467.0,
-            "43": 1817.0,
-            "44": 2633.0,
-            "45": 3576.0,
-            "46": 3015.0,
-            "47": 2805.0,
-            "48": 3071.0,
-            "49": 2974.0,
-            "50": 2267.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1784014336.0,
-            "2": 1784014336.0,
-            "3": 1784014336.0,
-            "4": 1784014336.0,
-            "5": 1784014336.0,
-            "6": 1784014336.0,
-            "7": 1784014336.0,
-            "8": 1784014336.0,
-            "9": 1784014336.0,
-            "10": 1784014336.0,
-            "11": 1784014336.0,
-            "12": 1784014336.0,
-            "13": 1784014336.0,
-            "14": 1784014336.0,
-            "15": 1784014336.0,
-            "16": 1784014336.0,
-            "17": 1784014336.0,
-            "18": 1784014336.0,
-            "19": 1784014336.0,
-            "20": 1784014336.0,
-            "21": 1784014336.0,
-            "22": 1784014336.0,
-            "23": 1784014336.0,
-            "24": 1784014336.0,
-            "25": 1784014336.0,
-            "26": 1784014336.0,
-            "27": 1784014336.0,
-            "28": 1784014336.0,
-            "29": 1784014336.0,
-            "30": 1784014336.0,
-            "31": 1784014336.0,
-            "32": 1784014336.0,
-            "33": 1784014336.0,
-            "34": 1784014336.0,
-            "35": 1784014336.0,
-            "36": 1784014336.0,
-            "37": 1784014336.0,
-            "38": 1784014336.0,
-            "39": 1784014336.0,
-            "40": 1784014336.0,
-            "41": 1784014336.0,
-            "42": 1784014336.0,
-            "43": 1784014336.0,
-            "44": 1784014336.0,
-            "45": 1784014336.0,
-            "46": 1784014336.0,
-            "47": 1784014336.0,
-            "48": 1784014336.0,
-            "49": 1784014336.0,
-            "50": 1784014336.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 2365860864.0,
-            "2": 3108323328.0,
-            "3": 3108323328.0,
-            "4": 3108323328.0,
-            "5": 3108323328.0,
-            "6": 3108842496.0,
-            "7": 3108842496.0,
-            "8": 3108842496.0,
-            "9": 3108842496.0,
-            "10": 3108842496.0,
-            "11": 3108842496.0,
-            "12": 3108842496.0,
-            "13": 3108842496.0,
-            "14": 3108842496.0,
-            "15": 3108842496.0,
-            "16": 3108842496.0,
-            "17": 3108842496.0,
-            "18": 3108842496.0,
-            "19": 3108842496.0,
-            "20": 3108842496.0,
-            "21": 3108842496.0,
-            "22": 3108842496.0,
-            "23": 3108842496.0,
-            "24": 3108842496.0,
-            "25": 3108842496.0,
-            "26": 3108842496.0,
-            "27": 3108842496.0,
-            "28": 3108842496.0,
-            "29": 3108842496.0,
-            "30": 3108842496.0,
-            "31": 3108842496.0,
-            "32": 3108842496.0,
-            "33": 3108842496.0,
-            "34": 3108842496.0,
-            "35": 3108842496.0,
-            "36": 3108842496.0,
-            "37": 3108842496.0,
-            "38": 3108842496.0,
-            "39": 3108842496.0,
-            "40": 3108842496.0,
-            "41": 3108842496.0,
-            "42": 3108842496.0,
-            "43": 3108842496.0,
-            "44": 3108842496.0,
-            "45": 3108842496.0,
-            "46": 3108842496.0,
-            "47": 3108842496.0,
-            "48": 3108842496.0,
-            "49": 3108842496.0,
-            "50": 3108842496.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 11.98661,
-            "2": 1.05916,
-            "3": 1.01721,
-            "4": 1.02611,
-            "5": 1.02779,
-            "6": 1.11252,
-            "7": 1.0176,
-            "8": 1.02427,
-            "9": 1.02561,
-            "10": 1.01845,
-            "11": 1.02419,
-            "12": 1.01745,
-            "13": 1.01224,
-            "14": 1.02388,
-            "15": 1.03687,
-            "16": 1.01886,
-            "17": 1.01708,
-            "18": 1.01143,
-            "19": 1.01902,
-            "20": 1.49878,
-            "21": 1.47537,
-            "22": 1.01801,
-            "23": 1.05158,
-            "24": 1.03481,
-            "25": 1.01773,
-            "26": 1.01186,
-            "27": 1.02203,
-            "28": 1.01824,
-            "29": 1.01865,
-            "30": 1.02165,
-            "31": 1.0184,
-            "32": 1.02106,
-            "33": 1.04655,
-            "34": 1.03129,
-            "35": 1.01893,
-            "36": 1.02153,
-            "37": 1.02154,
-            "38": 1.0213,
-            "39": 1.14846,
-            "40": 1.02149,
-            "41": 1.01905,
-            "42": 1.02038,
-            "43": 1.03126,
-            "44": 1.04155,
-            "45": 1.01649,
-            "46": 1.01742,
-            "47": 1.02406,
-            "48": 1.27122,
-            "49": 1.15085,
-            "50": 1.10861
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_local_spec/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_local_spec/golden_values_dev_dgx_h100.json
index fb44f049ad6..bf20b2b00e3 100644
--- a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_local_spec/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_local_spec/golden_values_dev_dgx_h100.json
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 11.5674,
-            "2": 0.87925,
-            "3": 0.84214,
-            "4": 0.85037,
-            "5": 0.85134,
-            "6": 0.84821,
-            "7": 0.84955,
-            "8": 0.84912,
-            "9": 0.85227,
-            "10": 0.84641,
-            "11": 0.84805,
-            "12": 0.84791,
-            "13": 0.86059,
-            "14": 0.86196,
-            "15": 1.10537,
-            "16": 1.03739,
-            "17": 0.8309,
-            "18": 0.82806,
-            "19": 1.30044,
-            "20": 0.83029,
-            "21": 0.82677,
-            "22": 1.30745,
-            "23": 0.85382,
-            "24": 0.83942,
-            "25": 0.83871,
-            "26": 0.8337,
-            "27": 0.83434,
-            "28": 0.8309,
-            "29": 0.83936,
-            "30": 0.83788,
-            "31": 0.83476,
-            "32": 0.83236,
-            "33": 0.83163,
-            "34": 0.84328,
-            "35": 0.83702,
-            "36": 0.83877,
-            "37": 0.83834,
-            "38": 0.83145,
-            "39": 0.83941,
-            "40": 0.84432,
-            "41": 1.16619,
-            "42": 1.1534,
-            "43": 1.08513,
-            "44": 0.84537,
-            "45": 0.99113,
-            "46": 0.84419,
-            "47": 0.89066,
-            "48": 0.83549,
-            "49": 1.01154,
-            "50": 0.96557
+            "1": 11.68301,
+            "2": 0.87796,
+            "3": 0.84756,
+            "4": 0.85513,
+            "5": 0.85643,
+            "6": 0.85366,
+            "7": 0.8468,
+            "8": 0.84974,
+            "9": 0.84989,
+            "10": 0.8464,
+            "11": 0.84369,
+            "12": 0.84972,
+            "13": 0.84311,
+            "14": 0.85648,
+            "15": 1.1084,
+            "16": 0.8827,
+            "17": 0.87952,
+            "18": 0.88554,
+            "19": 0.82673,
+            "20": 0.82222,
+            "21": 1.06414,
+            "22": 1.09134,
+            "23": 1.02591,
+            "24": 0.82601,
+            "25": 0.82277,
+            "26": 0.81844,
+            "27": 0.82627,
+            "28": 0.82854,
+            "29": 0.82653,
+            "30": 0.82247,
+            "31": 0.82906,
+            "32": 0.82363,
+            "33": 0.82944,
+            "34": 0.82401,
+            "35": 0.82902,
+            "36": 0.83537,
+            "37": 0.8265,
+            "38": 0.82728,
+            "39": 0.82087,
+            "40": 0.82525,
+            "41": 0.82691,
+            "42": 1.14473,
+            "43": 0.97566,
+            "44": 0.82343,
+            "45": 0.82956,
+            "46": 0.82572,
+            "47": 0.83635,
+            "48": 0.94255,
+            "49": 0.99753,
+            "50": 1.10127
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_local_spec/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_local_spec/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 0ff198806cb..00000000000
--- a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_local_spec/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.4837,
-            "2": 10.48435,
-            "3": 10.48251,
-            "4": 10.48303,
-            "5": 10.47647,
-            "6": 10.48423,
-            "7": 10.48457,
-            "8": 10.48837,
-            "9": 10.49003,
-            "10": 10.47255,
-            "11": 10.47245,
-            "12": 10.4828,
-            "13": 10.47855,
-            "14": 10.45162,
-            "15": 10.47936,
-            "16": 10.45364,
-            "17": 10.45143,
-            "18": 10.46239,
-            "19": 10.44136,
-            "20": 10.45438,
-            "21": 10.43469,
-            "22": 10.40587,
-            "23": 10.39982,
-            "24": 10.37585,
-            "25": 10.38173,
-            "26": 10.35154,
-            "27": 10.35401,
-            "28": 10.3497,
-            "29": 10.28714,
-            "30": 10.21194,
-            "31": 10.17274,
-            "32": 10.13439,
-            "33": 10.14753,
-            "34": 10.10759,
-            "35": 10.10592,
-            "36": 10.08756,
-            "37": 10.08177,
-            "38": 10.07257,
-            "39": 10.0013,
-            "40": 9.9816,
-            "41": 9.92549,
-            "42": 9.87529,
-            "43": 9.88742,
-            "44": 9.80641,
-            "45": 9.82342,
-            "46": 9.73815,
-            "47": 9.74831,
-            "48": 9.71619,
-            "49": 9.74504,
-            "50": 9.73004
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 2554.0,
-            "2": 1919.0,
-            "3": 1521.0,
-            "4": 2330.0,
-            "5": 2010.0,
-            "6": 1725.0,
-            "7": 2803.0,
-            "8": 2435.0,
-            "9": 2286.0,
-            "10": 2570.0,
-            "11": 2438.0,
-            "12": 1829.0,
-            "13": 2332.0,
-            "14": 2832.0,
-            "15": 2008.0,
-            "16": 2659.0,
-            "17": 2454.0,
-            "18": 2500.0,
-            "19": 2588.0,
-            "20": 2834.0,
-            "21": 2042.0,
-            "22": 3037.0,
-            "23": 2702.0,
-            "24": 2700.0,
-            "25": 2568.0,
-            "26": 2896.0,
-            "27": 2735.0,
-            "28": 2699.0,
-            "29": 2548.0,
-            "30": 2843.0,
-            "31": 2160.0,
-            "32": 2458.0,
-            "33": 2130.0,
-            "34": 2517.0,
-            "35": 2597.0,
-            "36": 3001.0,
-            "37": 3305.0,
-            "38": 2682.0,
-            "39": 2805.0,
-            "40": 3425.0,
-            "41": 1812.0,
-            "42": 1481.0,
-            "43": 1726.0,
-            "44": 2575.0,
-            "45": 3438.0,
-            "46": 2960.0,
-            "47": 2792.0,
-            "48": 3107.0,
-            "49": 2854.0,
-            "50": 2145.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1767237120.0,
-            "2": 1767237120.0,
-            "3": 1767237120.0,
-            "4": 1767237120.0,
-            "5": 1767237120.0,
-            "6": 1767237120.0,
-            "7": 1767237120.0,
-            "8": 1767237120.0,
-            "9": 1767237120.0,
-            "10": 1767237120.0,
-            "11": 1767237120.0,
-            "12": 1767237120.0,
-            "13": 1767237120.0,
-            "14": 1767237120.0,
-            "15": 1767237120.0,
-            "16": 1767237120.0,
-            "17": 1767237120.0,
-            "18": 1767237120.0,
-            "19": 1767237120.0,
-            "20": 1767237120.0,
-            "21": 1767237120.0,
-            "22": 1767237120.0,
-            "23": 1767237120.0,
-            "24": 1767237120.0,
-            "25": 1767237120.0,
-            "26": 1767237120.0,
-            "27": 1767237120.0,
-            "28": 1767237120.0,
-            "29": 1767237120.0,
-            "30": 1767237120.0,
-            "31": 1767237120.0,
-            "32": 1767237120.0,
-            "33": 1767237120.0,
-            "34": 1767237120.0,
-            "35": 1767237120.0,
-            "36": 1767237120.0,
-            "37": 1767237120.0,
-            "38": 1767237120.0,
-            "39": 1767237120.0,
-            "40": 1767237120.0,
-            "41": 1767237120.0,
-            "42": 1767237120.0,
-            "43": 1767237120.0,
-            "44": 1767237120.0,
-            "45": 1767237120.0,
-            "46": 1767237120.0,
-            "47": 1767237120.0,
-            "48": 1767237120.0,
-            "49": 1767237120.0,
-            "50": 1767237120.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 2336500736.0,
-            "2": 3079487488.0,
-            "3": 3079487488.0,
-            "4": 3079487488.0,
-            "5": 3079487488.0,
-            "6": 3079487488.0,
-            "7": 3079487488.0,
-            "8": 3079487488.0,
-            "9": 3079487488.0,
-            "10": 3079487488.0,
-            "11": 3079487488.0,
-            "12": 3079487488.0,
-            "13": 3079487488.0,
-            "14": 3079487488.0,
-            "15": 3079487488.0,
-            "16": 3079487488.0,
-            "17": 3079487488.0,
-            "18": 3079487488.0,
-            "19": 3079487488.0,
-            "20": 3079487488.0,
-            "21": 3079487488.0,
-            "22": 3079487488.0,
-            "23": 3079487488.0,
-            "24": 3079487488.0,
-            "25": 3079487488.0,
-            "26": 3079487488.0,
-            "27": 3079487488.0,
-            "28": 3079487488.0,
-            "29": 3079487488.0,
-            "30": 3079487488.0,
-            "31": 3079487488.0,
-            "32": 3079487488.0,
-            "33": 3079487488.0,
-            "34": 3079487488.0,
-            "35": 3079487488.0,
-            "36": 3079487488.0,
-            "37": 3079487488.0,
-            "38": 3079487488.0,
-            "39": 3079487488.0,
-            "40": 3079487488.0,
-            "41": 3079487488.0,
-            "42": 3079487488.0,
-            "43": 3079487488.0,
-            "44": 3079487488.0,
-            "45": 3079487488.0,
-            "46": 3079487488.0,
-            "47": 3079487488.0,
-            "48": 3079487488.0,
-            "49": 3079487488.0,
-            "50": 3079487488.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 12.70758,
-            "2": 0.8354,
-            "3": 0.78875,
-            "4": 0.77893,
-            "5": 0.81797,
-            "6": 0.77299,
-            "7": 0.76726,
-            "8": 0.77744,
-            "9": 0.77036,
-            "10": 0.76808,
-            "11": 0.77009,
-            "12": 0.77543,
-            "13": 0.78463,
-            "14": 0.77498,
-            "15": 0.76065,
-            "16": 1.28888,
-            "17": 0.78476,
-            "18": 0.77415,
-            "19": 0.77341,
-            "20": 1.04994,
-            "21": 1.25413,
-            "22": 0.7709,
-            "23": 0.85615,
-            "24": 0.76186,
-            "25": 0.75903,
-            "26": 0.75431,
-            "27": 0.76868,
-            "28": 0.7776,
-            "29": 0.74989,
-            "30": 0.75136,
-            "31": 0.7956,
-            "32": 0.74247,
-            "33": 0.73237,
-            "34": 0.73066,
-            "35": 0.74241,
-            "36": 0.74361,
-            "37": 0.77983,
-            "38": 0.77753,
-            "39": 0.75036,
-            "40": 0.75188,
-            "41": 0.75332,
-            "42": 0.89635,
-            "43": 0.73883,
-            "44": 0.92932,
-            "45": 0.73444,
-            "46": 0.73103,
-            "47": 1.01543,
-            "48": 1.06091,
-            "49": 0.92342,
-            "50": 1.25669
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_local_spec/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_local_spec/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index bf20b2b00e3..00000000000
--- a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_local_spec/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.4837,
-            "2": 10.48435,
-            "3": 10.48251,
-            "4": 10.48303,
-            "5": 10.47647,
-            "6": 10.48423,
-            "7": 10.48457,
-            "8": 10.48837,
-            "9": 10.49003,
-            "10": 10.47255,
-            "11": 10.47245,
-            "12": 10.4828,
-            "13": 10.47855,
-            "14": 10.45162,
-            "15": 10.47936,
-            "16": 10.45364,
-            "17": 10.45143,
-            "18": 10.46239,
-            "19": 10.44136,
-            "20": 10.45438,
-            "21": 10.43469,
-            "22": 10.40587,
-            "23": 10.39982,
-            "24": 10.37585,
-            "25": 10.38173,
-            "26": 10.35154,
-            "27": 10.35401,
-            "28": 10.3497,
-            "29": 10.28714,
-            "30": 10.21194,
-            "31": 10.17274,
-            "32": 10.13439,
-            "33": 10.14753,
-            "34": 10.10759,
-            "35": 10.10592,
-            "36": 10.08756,
-            "37": 10.08177,
-            "38": 10.07257,
-            "39": 10.0013,
-            "40": 9.9816,
-            "41": 9.92549,
-            "42": 9.87529,
-            "43": 9.88742,
-            "44": 9.80641,
-            "45": 9.82342,
-            "46": 9.73815,
-            "47": 9.74831,
-            "48": 9.71619,
-            "49": 9.74504,
-            "50": 9.73004
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 2554.0,
-            "2": 1919.0,
-            "3": 1521.0,
-            "4": 2330.0,
-            "5": 2010.0,
-            "6": 1725.0,
-            "7": 2803.0,
-            "8": 2435.0,
-            "9": 2286.0,
-            "10": 2570.0,
-            "11": 2438.0,
-            "12": 1829.0,
-            "13": 2332.0,
-            "14": 2832.0,
-            "15": 2008.0,
-            "16": 2659.0,
-            "17": 2454.0,
-            "18": 2500.0,
-            "19": 2588.0,
-            "20": 2834.0,
-            "21": 2042.0,
-            "22": 3037.0,
-            "23": 2702.0,
-            "24": 2700.0,
-            "25": 2568.0,
-            "26": 2896.0,
-            "27": 2735.0,
-            "28": 2699.0,
-            "29": 2548.0,
-            "30": 2843.0,
-            "31": 2160.0,
-            "32": 2458.0,
-            "33": 2130.0,
-            "34": 2517.0,
-            "35": 2597.0,
-            "36": 3001.0,
-            "37": 3305.0,
-            "38": 2682.0,
-            "39": 2805.0,
-            "40": 3425.0,
-            "41": 1812.0,
-            "42": 1481.0,
-            "43": 1726.0,
-            "44": 2575.0,
-            "45": 3438.0,
-            "46": 2960.0,
-            "47": 2792.0,
-            "48": 3107.0,
-            "49": 2854.0,
-            "50": 2145.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1767237120.0,
-            "2": 1767237120.0,
-            "3": 1767237120.0,
-            "4": 1767237120.0,
-            "5": 1767237120.0,
-            "6": 1767237120.0,
-            "7": 1767237120.0,
-            "8": 1767237120.0,
-            "9": 1767237120.0,
-            "10": 1767237120.0,
-            "11": 1767237120.0,
-            "12": 1767237120.0,
-            "13": 1767237120.0,
-            "14": 1767237120.0,
-            "15": 1767237120.0,
-            "16": 1767237120.0,
-            "17": 1767237120.0,
-            "18": 1767237120.0,
-            "19": 1767237120.0,
-            "20": 1767237120.0,
-            "21": 1767237120.0,
-            "22": 1767237120.0,
-            "23": 1767237120.0,
-            "24": 1767237120.0,
-            "25": 1767237120.0,
-            "26": 1767237120.0,
-            "27": 1767237120.0,
-            "28": 1767237120.0,
-            "29": 1767237120.0,
-            "30": 1767237120.0,
-            "31": 1767237120.0,
-            "32": 1767237120.0,
-            "33": 1767237120.0,
-            "34": 1767237120.0,
-            "35": 1767237120.0,
-            "36": 1767237120.0,
-            "37": 1767237120.0,
-            "38": 1767237120.0,
-            "39": 1767237120.0,
-            "40": 1767237120.0,
-            "41": 1767237120.0,
-            "42": 1767237120.0,
-            "43": 1767237120.0,
-            "44": 1767237120.0,
-            "45": 1767237120.0,
-            "46": 1767237120.0,
-            "47": 1767237120.0,
-            "48": 1767237120.0,
-            "49": 1767237120.0,
-            "50": 1767237120.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 2336500736.0,
-            "2": 3079487488.0,
-            "3": 3079487488.0,
-            "4": 3079487488.0,
-            "5": 3079487488.0,
-            "6": 3079487488.0,
-            "7": 3079487488.0,
-            "8": 3079487488.0,
-            "9": 3079487488.0,
-            "10": 3079487488.0,
-            "11": 3079487488.0,
-            "12": 3079487488.0,
-            "13": 3079487488.0,
-            "14": 3079487488.0,
-            "15": 3079487488.0,
-            "16": 3079487488.0,
-            "17": 3079487488.0,
-            "18": 3079487488.0,
-            "19": 3079487488.0,
-            "20": 3079487488.0,
-            "21": 3079487488.0,
-            "22": 3079487488.0,
-            "23": 3079487488.0,
-            "24": 3079487488.0,
-            "25": 3079487488.0,
-            "26": 3079487488.0,
-            "27": 3079487488.0,
-            "28": 3079487488.0,
-            "29": 3079487488.0,
-            "30": 3079487488.0,
-            "31": 3079487488.0,
-            "32": 3079487488.0,
-            "33": 3079487488.0,
-            "34": 3079487488.0,
-            "35": 3079487488.0,
-            "36": 3079487488.0,
-            "37": 3079487488.0,
-            "38": 3079487488.0,
-            "39": 3079487488.0,
-            "40": 3079487488.0,
-            "41": 3079487488.0,
-            "42": 3079487488.0,
-            "43": 3079487488.0,
-            "44": 3079487488.0,
-            "45": 3079487488.0,
-            "46": 3079487488.0,
-            "47": 3079487488.0,
-            "48": 3079487488.0,
-            "49": 3079487488.0,
-            "50": 3079487488.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 11.68301,
-            "2": 0.87796,
-            "3": 0.84756,
-            "4": 0.85513,
-            "5": 0.85643,
-            "6": 0.85366,
-            "7": 0.8468,
-            "8": 0.84974,
-            "9": 0.84989,
-            "10": 0.8464,
-            "11": 0.84369,
-            "12": 0.84972,
-            "13": 0.84311,
-            "14": 0.85648,
-            "15": 1.1084,
-            "16": 0.8827,
-            "17": 0.87952,
-            "18": 0.88554,
-            "19": 0.82673,
-            "20": 0.82222,
-            "21": 1.06414,
-            "22": 1.09134,
-            "23": 1.02591,
-            "24": 0.82601,
-            "25": 0.82277,
-            "26": 0.81844,
-            "27": 0.82627,
-            "28": 0.82854,
-            "29": 0.82653,
-            "30": 0.82247,
-            "31": 0.82906,
-            "32": 0.82363,
-            "33": 0.82944,
-            "34": 0.82401,
-            "35": 0.82902,
-            "36": 0.83537,
-            "37": 0.8265,
-            "38": 0.82728,
-            "39": 0.82087,
-            "40": 0.82525,
-            "41": 0.82691,
-            "42": 1.14473,
-            "43": 0.97566,
-            "44": 0.82343,
-            "45": 0.82956,
-            "46": 0.82572,
-            "47": 0.83635,
-            "48": 0.94255,
-            "49": 0.99753,
-            "50": 1.10127
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist/golden_values_dev_dgx_h100.json
index 8063c892338..dc5d31f8f8b 100644
--- a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist/golden_values_dev_dgx_h100.json
@@ -381,50 +381,50 @@
             "54": 3108323328.0,
             "55": 3108323328.0,
             "56": 3108323328.0,
-            "57": 3108323328.0,
-            "58": 3108323328.0,
-            "59": 3108323328.0,
-            "60": 3108323328.0,
-            "61": 3108323328.0,
-            "62": 3108323328.0,
-            "63": 3108323328.0,
-            "64": 3108323328.0,
-            "65": 3108323328.0,
-            "66": 3108323328.0,
-            "67": 3108323328.0,
-            "68": 3108323328.0,
-            "69": 3108323328.0,
-            "70": 3108323328.0,
-            "71": 3108323328.0,
-            "72": 3108323328.0,
-            "73": 3108323328.0,
-            "74": 3108323328.0,
-            "75": 3108323328.0,
-            "76": 3108323328.0,
-            "77": 3108323328.0,
-            "78": 3108323328.0,
-            "79": 3108323328.0,
-            "80": 3108323328.0,
-            "81": 3108323328.0,
-            "82": 3108323328.0,
-            "83": 3108323328.0,
-            "84": 3108323328.0,
-            "85": 3108323328.0,
-            "86": 3108323328.0,
-            "87": 3108323328.0,
-            "88": 3108323328.0,
-            "89": 3108323328.0,
-            "90": 3108323328.0,
-            "91": 3108323328.0,
-            "92": 3108323328.0,
-            "93": 3108323328.0,
-            "94": 3108323328.0,
-            "95": 3108323328.0,
-            "96": 3108323328.0,
-            "97": 3108323328.0,
-            "98": 3108323328.0,
-            "99": 3108323328.0,
-            "100": 3108323328.0
+            "57": 3108842496.0,
+            "58": 3108842496.0,
+            "59": 3108842496.0,
+            "60": 3108842496.0,
+            "61": 3108842496.0,
+            "62": 3108842496.0,
+            "63": 3108842496.0,
+            "64": 3108842496.0,
+            "65": 3108842496.0,
+            "66": 3108842496.0,
+            "67": 3108842496.0,
+            "68": 3108842496.0,
+            "69": 3108842496.0,
+            "70": 3108842496.0,
+            "71": 3108842496.0,
+            "72": 3108842496.0,
+            "73": 3108842496.0,
+            "74": 3108842496.0,
+            "75": 3108844544.0,
+            "76": 3108844544.0,
+            "77": 3108844544.0,
+            "78": 3108844544.0,
+            "79": 3108844544.0,
+            "80": 3108844544.0,
+            "81": 3108844544.0,
+            "82": 3108844544.0,
+            "83": 3108844544.0,
+            "84": 3108844544.0,
+            "85": 3108844544.0,
+            "86": 3108844544.0,
+            "87": 3108844544.0,
+            "88": 3108844544.0,
+            "89": 3108844544.0,
+            "90": 3108844544.0,
+            "91": 3108844544.0,
+            "92": 3108844544.0,
+            "93": 3108844544.0,
+            "94": 3108844544.0,
+            "95": 3108844544.0,
+            "96": 3108844544.0,
+            "97": 3108844544.0,
+            "98": 3108844544.0,
+            "99": 3108844544.0,
+            "100": 3108844544.0
         }
     },
     "iteration-time": {
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 12.25998,
-            "2": 1.04599,
-            "3": 1.00983,
-            "4": 1.01193,
-            "5": 1.01326,
-            "6": 1.01181,
-            "7": 1.01264,
-            "8": 1.01822,
-            "9": 1.02424,
-            "10": 1.0191,
-            "11": 1.01303,
-            "12": 1.00485,
-            "13": 1.0025,
-            "14": 1.00999,
-            "15": 1.00956,
-            "16": 1.00094,
-            "17": 1.00769,
-            "18": 1.01014,
-            "19": 1.01639,
-            "20": 1.22304,
-            "21": 1.4851,
-            "22": 1.19412,
-            "23": 1.01165,
-            "24": 1.0106,
-            "25": 1.01512,
-            "26": 1.00595,
-            "27": 1.01769,
-            "28": 1.01182,
-            "29": 1.00676,
-            "30": 1.00481,
-            "31": 1.1042,
-            "32": 1.00908,
-            "33": 1.01083,
-            "34": 1.00353,
-            "35": 1.00454,
-            "36": 1.00641,
-            "37": 1.00279,
-            "38": 1.00471,
-            "39": 1.00143,
-            "40": 1.00802,
-            "41": 1.00755,
-            "42": 1.00913,
-            "43": 1.00814,
-            "44": 1.00935,
-            "45": 1.00635,
-            "46": 1.01076,
-            "47": 1.01077,
-            "48": 1.14065,
-            "49": 1.24856,
-            "50": 1.09012,
-            "51": 1.03825,
-            "52": 1.44742,
-            "53": 1.3184,
-            "54": 1.01374,
-            "55": 1.01506,
-            "56": 1.01099,
-            "57": 1.04106,
-            "58": 1.02232,
-            "59": 1.01748,
-            "60": 1.00992,
-            "61": 1.02073,
-            "62": 1.02809,
-            "63": 1.34383,
-            "64": 1.38941,
-            "65": 1.10673,
-            "66": 1.01505,
-            "67": 1.00839,
-            "68": 1.00645,
-            "69": 1.01066,
-            "70": 1.01137,
-            "71": 1.35475,
-            "72": 1.02215,
-            "73": 1.0187,
-            "74": 1.01939,
-            "75": 1.10218,
-            "76": 1.12059,
-            "77": 1.12057,
-            "78": 1.03631,
-            "79": 1.12601,
-            "80": 1.33494,
-            "81": 1.09935,
-            "82": 1.06264,
-            "83": 1.31187,
-            "84": 1.0139,
-            "85": 1.00708,
-            "86": 1.02816,
-            "87": 1.02033,
-            "88": 1.01728,
-            "89": 1.2628,
-            "90": 1.01941,
-            "91": 1.01944,
-            "92": 1.0295,
-            "93": 1.01897,
-            "94": 1.01663,
-            "95": 1.02386,
-            "96": 1.00901,
-            "97": 1.00751,
-            "98": 1.0074,
-            "99": 1.00366,
-            "100": 1.00628
+            "1": 11.84806,
+            "2": 1.03522,
+            "3": 1.00793,
+            "4": 1.00939,
+            "5": 1.00929,
+            "6": 1.01517,
+            "7": 1.01009,
+            "8": 1.01561,
+            "9": 1.02131,
+            "10": 1.01787,
+            "11": 1.01149,
+            "12": 1.0128,
+            "13": 1.01358,
+            "14": 1.01768,
+            "15": 1.23565,
+            "16": 1.01096,
+            "17": 1.19479,
+            "18": 1.01674,
+            "19": 1.01808,
+            "20": 1.23016,
+            "21": 1.01908,
+            "22": 1.11536,
+            "23": 1.0888,
+            "24": 1.02965,
+            "25": 1.03972,
+            "26": 1.00766,
+            "27": 1.00981,
+            "28": 1.01339,
+            "29": 1.01801,
+            "30": 1.01655,
+            "31": 1.01796,
+            "32": 1.01286,
+            "33": 1.01823,
+            "34": 1.00604,
+            "35": 1.01493,
+            "36": 1.01106,
+            "37": 1.00783,
+            "38": 1.01573,
+            "39": 1.01525,
+            "40": 1.09842,
+            "41": 1.39919,
+            "42": 1.22658,
+            "43": 1.00841,
+            "44": 0.99932,
+            "45": 1.00156,
+            "46": 1.18473,
+            "47": 1.01528,
+            "48": 1.00768,
+            "49": 1.00498,
+            "50": 0.9957,
+            "51": 1.29149,
+            "52": 1.10051,
+            "53": 1.00264,
+            "54": 1.00531,
+            "55": 1.30558,
+            "56": 0.99836,
+            "57": 1.00645,
+            "58": 1.00413,
+            "59": 1.00106,
+            "60": 1.00076,
+            "61": 1.32205,
+            "62": 1.00795,
+            "63": 1.2523,
+            "64": 1.01369,
+            "65": 1.01151,
+            "66": 1.01484,
+            "67": 1.00831,
+            "68": 1.01849,
+            "69": 1.01821,
+            "70": 1.01316,
+            "71": 1.01068,
+            "72": 1.01792,
+            "73": 1.47417,
+            "74": 1.01143,
+            "75": 1.14077,
+            "76": 1.01286,
+            "77": 1.08819,
+            "78": 1.01005,
+            "79": 1.0069,
+            "80": 1.01196,
+            "81": 1.0882,
+            "82": 1.00417,
+            "83": 1.29479,
+            "84": 1.0044,
+            "85": 1.0103,
+            "86": 1.00862,
+            "87": 1.01863,
+            "88": 1.2549,
+            "89": 1.0075,
+            "90": 1.00874,
+            "91": 1.0111,
+            "92": 1.01049,
+            "93": 1.01084,
+            "94": 1.01043,
+            "95": 1.01246,
+            "96": 1.01317,
+            "97": 1.09821,
+            "98": 1.01406,
+            "99": 1.00578,
+            "100": 1.09442
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 137f195264d..00000000000
--- a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.48367,
-            "2": 10.48426,
-            "3": 10.48254,
-            "4": 10.48311,
-            "5": 10.4764,
-            "6": 10.4844,
-            "7": 10.48458,
-            "8": 10.48829,
-            "9": 10.49008,
-            "10": 10.47268,
-            "11": 10.47256,
-            "12": 10.48259,
-            "13": 10.47857,
-            "14": 10.45154,
-            "15": 10.47925,
-            "16": 10.45346,
-            "17": 10.45145,
-            "18": 10.46238,
-            "19": 10.44113,
-            "20": 10.45448,
-            "21": 10.43454,
-            "22": 10.40592,
-            "23": 10.39961,
-            "24": 10.37579,
-            "25": 10.38182,
-            "26": 10.35147,
-            "27": 10.35388,
-            "28": 10.34937,
-            "29": 10.28711,
-            "30": 10.21159,
-            "31": 10.1726,
-            "32": 10.13421,
-            "33": 10.14744,
-            "34": 10.10737,
-            "35": 10.10581,
-            "36": 10.08735,
-            "37": 10.08157,
-            "38": 10.07233,
-            "39": 10.00094,
-            "40": 9.98143,
-            "41": 9.92541,
-            "42": 9.87527,
-            "43": 9.88711,
-            "44": 9.80642,
-            "45": 9.82325,
-            "46": 9.73785,
-            "47": 9.74817,
-            "48": 9.71609,
-            "49": 9.74484,
-            "50": 9.72982,
-            "51": 9.71485,
-            "52": 9.66475,
-            "53": 9.60919,
-            "54": 9.62705,
-            "55": 9.61012,
-            "56": 9.617,
-            "57": 9.56786,
-            "58": 9.52731,
-            "59": 9.51668,
-            "60": 9.51865,
-            "61": 9.53132,
-            "62": 9.45016,
-            "63": 9.45725,
-            "64": 9.43435,
-            "65": 9.45801,
-            "66": 9.4368,
-            "67": 9.3968,
-            "68": 9.36474,
-            "69": 9.4095,
-            "70": 9.376,
-            "71": 9.41716,
-            "72": 9.42574,
-            "73": 9.37581,
-            "74": 9.41547,
-            "75": 9.37891,
-            "76": 9.28017,
-            "77": 9.32205,
-            "78": 9.35754,
-            "79": 9.32162,
-            "80": 9.31486,
-            "81": 9.2678,
-            "82": 9.34178,
-            "83": 9.32145,
-            "84": 9.24785,
-            "85": 9.35023,
-            "86": 9.22392,
-            "87": 9.3062,
-            "88": 9.29891,
-            "89": 9.22716,
-            "90": 9.28483,
-            "91": 9.23109,
-            "92": 9.27463,
-            "93": 9.19241,
-            "94": 9.23984,
-            "95": 9.28006,
-            "96": 9.17526,
-            "97": 9.21894,
-            "98": 9.17192,
-            "99": 9.16446,
-            "100": 9.14816
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 2570.0,
-            "2": 1923.0,
-            "3": 1512.0,
-            "4": 2322.0,
-            "5": 2033.0,
-            "6": 1774.0,
-            "7": 2781.0,
-            "8": 2460.0,
-            "9": 2308.0,
-            "10": 2635.0,
-            "11": 2397.0,
-            "12": 1817.0,
-            "13": 2348.0,
-            "14": 2749.0,
-            "15": 2027.0,
-            "16": 2719.0,
-            "17": 2487.0,
-            "18": 2533.0,
-            "19": 2547.0,
-            "20": 2850.0,
-            "21": 1990.0,
-            "22": 2884.0,
-            "23": 2857.0,
-            "24": 2685.0,
-            "25": 2514.0,
-            "26": 2958.0,
-            "27": 2673.0,
-            "28": 2723.0,
-            "29": 2571.0,
-            "30": 2858.0,
-            "31": 2157.0,
-            "32": 2357.0,
-            "33": 2242.0,
-            "34": 2464.0,
-            "35": 2544.0,
-            "36": 2933.0,
-            "37": 3293.0,
-            "38": 2730.0,
-            "39": 2795.0,
-            "40": 3310.0,
-            "41": 1816.0,
-            "42": 1467.0,
-            "43": 1817.0,
-            "44": 2633.0,
-            "45": 3576.0,
-            "46": 3015.0,
-            "47": 2805.0,
-            "48": 3071.0,
-            "49": 2974.0,
-            "50": 2267.0,
-            "51": 1923.0,
-            "52": 2515.0,
-            "53": 3615.0,
-            "54": 3426.0,
-            "55": 3436.0,
-            "56": 4411.0,
-            "57": 4095.0,
-            "58": 4308.0,
-            "59": 1687.0,
-            "60": 2431.0,
-            "61": 2151.0,
-            "62": 3986.0,
-            "63": 3558.0,
-            "64": 4286.0,
-            "65": 3052.0,
-            "66": 1720.0,
-            "67": 1910.0,
-            "68": 4193.0,
-            "69": 4347.0,
-            "70": 4596.0,
-            "71": 2078.0,
-            "72": 4406.0,
-            "73": 4062.0,
-            "74": 3358.0,
-            "75": 4606.0,
-            "76": 2187.0,
-            "77": 4854.0,
-            "78": 4098.0,
-            "79": 2652.0,
-            "80": 3776.0,
-            "81": 3550.0,
-            "82": 3031.0,
-            "83": 5345.0,
-            "84": 4396.0,
-            "85": 4354.0,
-            "86": 3332.0,
-            "87": 4815.0,
-            "88": 3303.0,
-            "89": 4611.0,
-            "90": 4346.0,
-            "91": 4361.0,
-            "92": 3502.0,
-            "93": 5624.0,
-            "94": 3733.0,
-            "95": 4728.0,
-            "96": 3534.0,
-            "97": 3873.0,
-            "98": 4525.0,
-            "99": 4329.0,
-            "100": 3365.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1784014336.0,
-            "2": 1784014336.0,
-            "3": 1784014336.0,
-            "4": 1784014336.0,
-            "5": 1784014336.0,
-            "6": 1784014336.0,
-            "7": 1784014336.0,
-            "8": 1784014336.0,
-            "9": 1784014336.0,
-            "10": 1784014336.0,
-            "11": 1784014336.0,
-            "12": 1784014336.0,
-            "13": 1784014336.0,
-            "14": 1784014336.0,
-            "15": 1784014336.0,
-            "16": 1784014336.0,
-            "17": 1784014336.0,
-            "18": 1784014336.0,
-            "19": 1784014336.0,
-            "20": 1784014336.0,
-            "21": 1784014336.0,
-            "22": 1784014336.0,
-            "23": 1784014336.0,
-            "24": 1784014336.0,
-            "25": 1784014336.0,
-            "26": 1784014336.0,
-            "27": 1784014336.0,
-            "28": 1784014336.0,
-            "29": 1784014336.0,
-            "30": 1784014336.0,
-            "31": 1784014336.0,
-            "32": 1784014336.0,
-            "33": 1784014336.0,
-            "34": 1784014336.0,
-            "35": 1784014336.0,
-            "36": 1784014336.0,
-            "37": 1784014336.0,
-            "38": 1784014336.0,
-            "39": 1784014336.0,
-            "40": 1784014336.0,
-            "41": 1784014336.0,
-            "42": 1784014336.0,
-            "43": 1784014336.0,
-            "44": 1784014336.0,
-            "45": 1784014336.0,
-            "46": 1784014336.0,
-            "47": 1784014336.0,
-            "48": 1784014336.0,
-            "49": 1784014336.0,
-            "50": 1784014336.0,
-            "51": 1784014336.0,
-            "52": 1784014336.0,
-            "53": 1784014336.0,
-            "54": 1784014336.0,
-            "55": 1784014336.0,
-            "56": 1784014336.0,
-            "57": 1784014336.0,
-            "58": 1784014336.0,
-            "59": 1784014336.0,
-            "60": 1784014336.0,
-            "61": 1784014336.0,
-            "62": 1784014336.0,
-            "63": 1784014336.0,
-            "64": 1784014336.0,
-            "65": 1784014336.0,
-            "66": 1784014336.0,
-            "67": 1784014336.0,
-            "68": 1784014336.0,
-            "69": 1784014336.0,
-            "70": 1784014336.0,
-            "71": 1784014336.0,
-            "72": 1784014336.0,
-            "73": 1784014336.0,
-            "74": 1784014336.0,
-            "75": 1784014336.0,
-            "76": 1784014336.0,
-            "77": 1784014336.0,
-            "78": 1784014336.0,
-            "79": 1784014336.0,
-            "80": 1784014336.0,
-            "81": 1784014336.0,
-            "82": 1784014336.0,
-            "83": 1784014336.0,
-            "84": 1784014336.0,
-            "85": 1784014336.0,
-            "86": 1784014336.0,
-            "87": 1784014336.0,
-            "88": 1784014336.0,
-            "89": 1784014336.0,
-            "90": 1784014336.0,
-            "91": 1784014336.0,
-            "92": 1784014336.0,
-            "93": 1784014336.0,
-            "94": 1784014336.0,
-            "95": 1784014336.0,
-            "96": 1784014336.0,
-            "97": 1784014336.0,
-            "98": 1784014336.0,
-            "99": 1784014336.0,
-            "100": 1784014336.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 2365860864.0,
-            "2": 3108323328.0,
-            "3": 3108323328.0,
-            "4": 3108323328.0,
-            "5": 3108323328.0,
-            "6": 3108323328.0,
-            "7": 3108323328.0,
-            "8": 3108323328.0,
-            "9": 3108323328.0,
-            "10": 3108845568.0,
-            "11": 3108845568.0,
-            "12": 3108845568.0,
-            "13": 3108845568.0,
-            "14": 3108845568.0,
-            "15": 3108845568.0,
-            "16": 3108845568.0,
-            "17": 3108845568.0,
-            "18": 3108845568.0,
-            "19": 3108845568.0,
-            "20": 3108845568.0,
-            "21": 3108845568.0,
-            "22": 3108845568.0,
-            "23": 3108845568.0,
-            "24": 3108845568.0,
-            "25": 3108845568.0,
-            "26": 3108845568.0,
-            "27": 3108845568.0,
-            "28": 3108845568.0,
-            "29": 3108845568.0,
-            "30": 3108845568.0,
-            "31": 3108845568.0,
-            "32": 3108845568.0,
-            "33": 3108845568.0,
-            "34": 3108845568.0,
-            "35": 3108845568.0,
-            "36": 3108845568.0,
-            "37": 3108846080.0,
-            "38": 3108846080.0,
-            "39": 3108846080.0,
-            "40": 3108846080.0,
-            "41": 3108846080.0,
-            "42": 3108846080.0,
-            "43": 3108846080.0,
-            "44": 3108846080.0,
-            "45": 3108846080.0,
-            "46": 3108846080.0,
-            "47": 3108846080.0,
-            "48": 3108846080.0,
-            "49": 3108846080.0,
-            "50": 3108846080.0,
-            "51": 3108846080.0,
-            "52": 3108846080.0,
-            "53": 3108846080.0,
-            "54": 3108846080.0,
-            "55": 3108846080.0,
-            "56": 3108846080.0,
-            "57": 3108846080.0,
-            "58": 3108846080.0,
-            "59": 3108846080.0,
-            "60": 3108846080.0,
-            "61": 3108846080.0,
-            "62": 3108847616.0,
-            "63": 3108847616.0,
-            "64": 3108847616.0,
-            "65": 3108847616.0,
-            "66": 3108847616.0,
-            "67": 3108847616.0,
-            "68": 3108847616.0,
-            "69": 3108847616.0,
-            "70": 3108847616.0,
-            "71": 3108847616.0,
-            "72": 3108847616.0,
-            "73": 3108847616.0,
-            "74": 3108847616.0,
-            "75": 3108847616.0,
-            "76": 3108847616.0,
-            "77": 3108847616.0,
-            "78": 3108847616.0,
-            "79": 3108847616.0,
-            "80": 3108847616.0,
-            "81": 3108847616.0,
-            "82": 3108847616.0,
-            "83": 3108847616.0,
-            "84": 3108847616.0,
-            "85": 3108847616.0,
-            "86": 3108847616.0,
-            "87": 3108847616.0,
-            "88": 3108847616.0,
-            "89": 3108847616.0,
-            "90": 3108847616.0,
-            "91": 3108847616.0,
-            "92": 3108847616.0,
-            "93": 3108847616.0,
-            "94": 3108847616.0,
-            "95": 3108847616.0,
-            "96": 3108847616.0,
-            "97": 3108847616.0,
-            "98": 3108847616.0,
-            "99": 3108847616.0,
-            "100": 3108847616.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 13.09913,
-            "2": 1.02984,
-            "3": 0.9509,
-            "4": 0.92961,
-            "5": 0.88057,
-            "6": 0.86499,
-            "7": 0.87435,
-            "8": 0.87748,
-            "9": 0.88481,
-            "10": 0.87813,
-            "11": 0.88937,
-            "12": 0.91092,
-            "13": 0.85441,
-            "14": 0.87519,
-            "15": 0.89434,
-            "16": 1.08771,
-            "17": 0.87461,
-            "18": 0.8785,
-            "19": 1.08419,
-            "20": 1.00138,
-            "21": 0.98051,
-            "22": 1.32806,
-            "23": 0.85982,
-            "24": 0.88387,
-            "25": 0.88245,
-            "26": 0.87335,
-            "27": 0.88317,
-            "28": 0.88985,
-            "29": 0.895,
-            "30": 0.87281,
-            "31": 0.88109,
-            "32": 0.87358,
-            "33": 0.89681,
-            "34": 0.91049,
-            "35": 0.89763,
-            "36": 0.89169,
-            "37": 0.89357,
-            "38": 0.89732,
-            "39": 0.88241,
-            "40": 0.90292,
-            "41": 0.88715,
-            "42": 0.90721,
-            "43": 1.00024,
-            "44": 1.05261,
-            "45": 0.88589,
-            "46": 0.89065,
-            "47": 1.19824,
-            "48": 1.03763,
-            "49": 0.88362,
-            "50": 2.54681,
-            "51": 0.88554,
-            "52": 1.29624,
-            "53": 0.90469,
-            "54": 1.25859,
-            "55": 0.8959,
-            "56": 0.89223,
-            "57": 0.91307,
-            "58": 0.9046,
-            "59": 0.90217,
-            "60": 1.19764,
-            "61": 0.96385,
-            "62": 1.26273,
-            "63": 1.00365,
-            "64": 0.95065,
-            "65": 0.87723,
-            "66": 0.87675,
-            "67": 0.8752,
-            "68": 1.1677,
-            "69": 0.87584,
-            "70": 0.88581,
-            "71": 1.19607,
-            "72": 0.88789,
-            "73": 1.11276,
-            "74": 0.89256,
-            "75": 0.8887,
-            "76": 1.28091,
-            "77": 0.93746,
-            "78": 0.87892,
-            "79": 1.07934,
-            "80": 0.88837,
-            "81": 0.87726,
-            "82": 0.87655,
-            "83": 0.89632,
-            "84": 0.90579,
-            "85": 0.88535,
-            "86": 0.8924,
-            "87": 0.8763,
-            "88": 0.8769,
-            "89": 0.87952,
-            "90": 0.89745,
-            "91": 0.8736,
-            "92": 0.8825,
-            "93": 0.8845,
-            "94": 0.87495,
-            "95": 0.88075,
-            "96": 0.94076,
-            "97": 0.87753,
-            "98": 0.88407,
-            "99": 0.89106,
-            "100": 0.88092
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index dc5d31f8f8b..00000000000
--- a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.48367,
-            "2": 10.48426,
-            "3": 10.48254,
-            "4": 10.48311,
-            "5": 10.4764,
-            "6": 10.4844,
-            "7": 10.48458,
-            "8": 10.48829,
-            "9": 10.49008,
-            "10": 10.47268,
-            "11": 10.47256,
-            "12": 10.48259,
-            "13": 10.47857,
-            "14": 10.45154,
-            "15": 10.47925,
-            "16": 10.45346,
-            "17": 10.45145,
-            "18": 10.46238,
-            "19": 10.44113,
-            "20": 10.45448,
-            "21": 10.43454,
-            "22": 10.40592,
-            "23": 10.39961,
-            "24": 10.37579,
-            "25": 10.38182,
-            "26": 10.35147,
-            "27": 10.35388,
-            "28": 10.34937,
-            "29": 10.28711,
-            "30": 10.21159,
-            "31": 10.1726,
-            "32": 10.13421,
-            "33": 10.14744,
-            "34": 10.10737,
-            "35": 10.10581,
-            "36": 10.08735,
-            "37": 10.08157,
-            "38": 10.07233,
-            "39": 10.00094,
-            "40": 9.98143,
-            "41": 9.92541,
-            "42": 9.87527,
-            "43": 9.88711,
-            "44": 9.80642,
-            "45": 9.82325,
-            "46": 9.73785,
-            "47": 9.74817,
-            "48": 9.71609,
-            "49": 9.74484,
-            "50": 9.72982,
-            "51": 9.71485,
-            "52": 9.66475,
-            "53": 9.60919,
-            "54": 9.62705,
-            "55": 9.61012,
-            "56": 9.617,
-            "57": 9.56786,
-            "58": 9.52731,
-            "59": 9.51668,
-            "60": 9.51865,
-            "61": 9.53132,
-            "62": 9.45016,
-            "63": 9.45725,
-            "64": 9.43435,
-            "65": 9.45801,
-            "66": 9.4368,
-            "67": 9.3968,
-            "68": 9.36474,
-            "69": 9.4095,
-            "70": 9.376,
-            "71": 9.41716,
-            "72": 9.42574,
-            "73": 9.37581,
-            "74": 9.41547,
-            "75": 9.37891,
-            "76": 9.28017,
-            "77": 9.32205,
-            "78": 9.35754,
-            "79": 9.32162,
-            "80": 9.31486,
-            "81": 9.2678,
-            "82": 9.34178,
-            "83": 9.32145,
-            "84": 9.24785,
-            "85": 9.35023,
-            "86": 9.22392,
-            "87": 9.3062,
-            "88": 9.29891,
-            "89": 9.22716,
-            "90": 9.28483,
-            "91": 9.23109,
-            "92": 9.27463,
-            "93": 9.19241,
-            "94": 9.23984,
-            "95": 9.28006,
-            "96": 9.17526,
-            "97": 9.21894,
-            "98": 9.17192,
-            "99": 9.16446,
-            "100": 9.14816
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 2570.0,
-            "2": 1923.0,
-            "3": 1512.0,
-            "4": 2322.0,
-            "5": 2033.0,
-            "6": 1774.0,
-            "7": 2781.0,
-            "8": 2460.0,
-            "9": 2308.0,
-            "10": 2635.0,
-            "11": 2397.0,
-            "12": 1817.0,
-            "13": 2348.0,
-            "14": 2749.0,
-            "15": 2027.0,
-            "16": 2719.0,
-            "17": 2487.0,
-            "18": 2533.0,
-            "19": 2547.0,
-            "20": 2850.0,
-            "21": 1990.0,
-            "22": 2884.0,
-            "23": 2857.0,
-            "24": 2685.0,
-            "25": 2514.0,
-            "26": 2958.0,
-            "27": 2673.0,
-            "28": 2723.0,
-            "29": 2571.0,
-            "30": 2858.0,
-            "31": 2157.0,
-            "32": 2357.0,
-            "33": 2242.0,
-            "34": 2464.0,
-            "35": 2544.0,
-            "36": 2933.0,
-            "37": 3293.0,
-            "38": 2730.0,
-            "39": 2795.0,
-            "40": 3310.0,
-            "41": 1816.0,
-            "42": 1467.0,
-            "43": 1817.0,
-            "44": 2633.0,
-            "45": 3576.0,
-            "46": 3015.0,
-            "47": 2805.0,
-            "48": 3071.0,
-            "49": 2974.0,
-            "50": 2267.0,
-            "51": 1923.0,
-            "52": 2515.0,
-            "53": 3615.0,
-            "54": 3426.0,
-            "55": 3436.0,
-            "56": 4411.0,
-            "57": 4095.0,
-            "58": 4308.0,
-            "59": 1687.0,
-            "60": 2431.0,
-            "61": 2151.0,
-            "62": 3986.0,
-            "63": 3558.0,
-            "64": 4286.0,
-            "65": 3052.0,
-            "66": 1720.0,
-            "67": 1910.0,
-            "68": 4193.0,
-            "69": 4347.0,
-            "70": 4596.0,
-            "71": 2078.0,
-            "72": 4406.0,
-            "73": 4062.0,
-            "74": 3358.0,
-            "75": 4606.0,
-            "76": 2187.0,
-            "77": 4854.0,
-            "78": 4098.0,
-            "79": 2652.0,
-            "80": 3776.0,
-            "81": 3550.0,
-            "82": 3031.0,
-            "83": 5345.0,
-            "84": 4396.0,
-            "85": 4354.0,
-            "86": 3332.0,
-            "87": 4815.0,
-            "88": 3303.0,
-            "89": 4611.0,
-            "90": 4346.0,
-            "91": 4361.0,
-            "92": 3502.0,
-            "93": 5624.0,
-            "94": 3733.0,
-            "95": 4728.0,
-            "96": 3534.0,
-            "97": 3873.0,
-            "98": 4525.0,
-            "99": 4329.0,
-            "100": 3365.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1784014336.0,
-            "2": 1784014336.0,
-            "3": 1784014336.0,
-            "4": 1784014336.0,
-            "5": 1784014336.0,
-            "6": 1784014336.0,
-            "7": 1784014336.0,
-            "8": 1784014336.0,
-            "9": 1784014336.0,
-            "10": 1784014336.0,
-            "11": 1784014336.0,
-            "12": 1784014336.0,
-            "13": 1784014336.0,
-            "14": 1784014336.0,
-            "15": 1784014336.0,
-            "16": 1784014336.0,
-            "17": 1784014336.0,
-            "18": 1784014336.0,
-            "19": 1784014336.0,
-            "20": 1784014336.0,
-            "21": 1784014336.0,
-            "22": 1784014336.0,
-            "23": 1784014336.0,
-            "24": 1784014336.0,
-            "25": 1784014336.0,
-            "26": 1784014336.0,
-            "27": 1784014336.0,
-            "28": 1784014336.0,
-            "29": 1784014336.0,
-            "30": 1784014336.0,
-            "31": 1784014336.0,
-            "32": 1784014336.0,
-            "33": 1784014336.0,
-            "34": 1784014336.0,
-            "35": 1784014336.0,
-            "36": 1784014336.0,
-            "37": 1784014336.0,
-            "38": 1784014336.0,
-            "39": 1784014336.0,
-            "40": 1784014336.0,
-            "41": 1784014336.0,
-            "42": 1784014336.0,
-            "43": 1784014336.0,
-            "44": 1784014336.0,
-            "45": 1784014336.0,
-            "46": 1784014336.0,
-            "47": 1784014336.0,
-            "48": 1784014336.0,
-            "49": 1784014336.0,
-            "50": 1784014336.0,
-            "51": 1784014336.0,
-            "52": 1784014336.0,
-            "53": 1784014336.0,
-            "54": 1784014336.0,
-            "55": 1784014336.0,
-            "56": 1784014336.0,
-            "57": 1784014336.0,
-            "58": 1784014336.0,
-            "59": 1784014336.0,
-            "60": 1784014336.0,
-            "61": 1784014336.0,
-            "62": 1784014336.0,
-            "63": 1784014336.0,
-            "64": 1784014336.0,
-            "65": 1784014336.0,
-            "66": 1784014336.0,
-            "67": 1784014336.0,
-            "68": 1784014336.0,
-            "69": 1784014336.0,
-            "70": 1784014336.0,
-            "71": 1784014336.0,
-            "72": 1784014336.0,
-            "73": 1784014336.0,
-            "74": 1784014336.0,
-            "75": 1784014336.0,
-            "76": 1784014336.0,
-            "77": 1784014336.0,
-            "78": 1784014336.0,
-            "79": 1784014336.0,
-            "80": 1784014336.0,
-            "81": 1784014336.0,
-            "82": 1784014336.0,
-            "83": 1784014336.0,
-            "84": 1784014336.0,
-            "85": 1784014336.0,
-            "86": 1784014336.0,
-            "87": 1784014336.0,
-            "88": 1784014336.0,
-            "89": 1784014336.0,
-            "90": 1784014336.0,
-            "91": 1784014336.0,
-            "92": 1784014336.0,
-            "93": 1784014336.0,
-            "94": 1784014336.0,
-            "95": 1784014336.0,
-            "96": 1784014336.0,
-            "97": 1784014336.0,
-            "98": 1784014336.0,
-            "99": 1784014336.0,
-            "100": 1784014336.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 2365860864.0,
-            "2": 3108323328.0,
-            "3": 3108323328.0,
-            "4": 3108323328.0,
-            "5": 3108323328.0,
-            "6": 3108323328.0,
-            "7": 3108323328.0,
-            "8": 3108323328.0,
-            "9": 3108323328.0,
-            "10": 3108323328.0,
-            "11": 3108323328.0,
-            "12": 3108323328.0,
-            "13": 3108323328.0,
-            "14": 3108323328.0,
-            "15": 3108323328.0,
-            "16": 3108323328.0,
-            "17": 3108323328.0,
-            "18": 3108323328.0,
-            "19": 3108323328.0,
-            "20": 3108323328.0,
-            "21": 3108323328.0,
-            "22": 3108323328.0,
-            "23": 3108323328.0,
-            "24": 3108323328.0,
-            "25": 3108323328.0,
-            "26": 3108323328.0,
-            "27": 3108323328.0,
-            "28": 3108323328.0,
-            "29": 3108323328.0,
-            "30": 3108323328.0,
-            "31": 3108323328.0,
-            "32": 3108323328.0,
-            "33": 3108323328.0,
-            "34": 3108323328.0,
-            "35": 3108323328.0,
-            "36": 3108323328.0,
-            "37": 3108323328.0,
-            "38": 3108323328.0,
-            "39": 3108323328.0,
-            "40": 3108323328.0,
-            "41": 3108323328.0,
-            "42": 3108323328.0,
-            "43": 3108323328.0,
-            "44": 3108323328.0,
-            "45": 3108323328.0,
-            "46": 3108323328.0,
-            "47": 3108323328.0,
-            "48": 3108323328.0,
-            "49": 3108323328.0,
-            "50": 3108323328.0,
-            "51": 3108323328.0,
-            "52": 3108323328.0,
-            "53": 3108323328.0,
-            "54": 3108323328.0,
-            "55": 3108323328.0,
-            "56": 3108323328.0,
-            "57": 3108842496.0,
-            "58": 3108842496.0,
-            "59": 3108842496.0,
-            "60": 3108842496.0,
-            "61": 3108842496.0,
-            "62": 3108842496.0,
-            "63": 3108842496.0,
-            "64": 3108842496.0,
-            "65": 3108842496.0,
-            "66": 3108842496.0,
-            "67": 3108842496.0,
-            "68": 3108842496.0,
-            "69": 3108842496.0,
-            "70": 3108842496.0,
-            "71": 3108842496.0,
-            "72": 3108842496.0,
-            "73": 3108842496.0,
-            "74": 3108842496.0,
-            "75": 3108844544.0,
-            "76": 3108844544.0,
-            "77": 3108844544.0,
-            "78": 3108844544.0,
-            "79": 3108844544.0,
-            "80": 3108844544.0,
-            "81": 3108844544.0,
-            "82": 3108844544.0,
-            "83": 3108844544.0,
-            "84": 3108844544.0,
-            "85": 3108844544.0,
-            "86": 3108844544.0,
-            "87": 3108844544.0,
-            "88": 3108844544.0,
-            "89": 3108844544.0,
-            "90": 3108844544.0,
-            "91": 3108844544.0,
-            "92": 3108844544.0,
-            "93": 3108844544.0,
-            "94": 3108844544.0,
-            "95": 3108844544.0,
-            "96": 3108844544.0,
-            "97": 3108844544.0,
-            "98": 3108844544.0,
-            "99": 3108844544.0,
-            "100": 3108844544.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 11.84806,
-            "2": 1.03522,
-            "3": 1.00793,
-            "4": 1.00939,
-            "5": 1.00929,
-            "6": 1.01517,
-            "7": 1.01009,
-            "8": 1.01561,
-            "9": 1.02131,
-            "10": 1.01787,
-            "11": 1.01149,
-            "12": 1.0128,
-            "13": 1.01358,
-            "14": 1.01768,
-            "15": 1.23565,
-            "16": 1.01096,
-            "17": 1.19479,
-            "18": 1.01674,
-            "19": 1.01808,
-            "20": 1.23016,
-            "21": 1.01908,
-            "22": 1.11536,
-            "23": 1.0888,
-            "24": 1.02965,
-            "25": 1.03972,
-            "26": 1.00766,
-            "27": 1.00981,
-            "28": 1.01339,
-            "29": 1.01801,
-            "30": 1.01655,
-            "31": 1.01796,
-            "32": 1.01286,
-            "33": 1.01823,
-            "34": 1.00604,
-            "35": 1.01493,
-            "36": 1.01106,
-            "37": 1.00783,
-            "38": 1.01573,
-            "39": 1.01525,
-            "40": 1.09842,
-            "41": 1.39919,
-            "42": 1.22658,
-            "43": 1.00841,
-            "44": 0.99932,
-            "45": 1.00156,
-            "46": 1.18473,
-            "47": 1.01528,
-            "48": 1.00768,
-            "49": 1.00498,
-            "50": 0.9957,
-            "51": 1.29149,
-            "52": 1.10051,
-            "53": 1.00264,
-            "54": 1.00531,
-            "55": 1.30558,
-            "56": 0.99836,
-            "57": 1.00645,
-            "58": 1.00413,
-            "59": 1.00106,
-            "60": 1.00076,
-            "61": 1.32205,
-            "62": 1.00795,
-            "63": 1.2523,
-            "64": 1.01369,
-            "65": 1.01151,
-            "66": 1.01484,
-            "67": 1.00831,
-            "68": 1.01849,
-            "69": 1.01821,
-            "70": 1.01316,
-            "71": 1.01068,
-            "72": 1.01792,
-            "73": 1.47417,
-            "74": 1.01143,
-            "75": 1.14077,
-            "76": 1.01286,
-            "77": 1.08819,
-            "78": 1.01005,
-            "79": 1.0069,
-            "80": 1.01196,
-            "81": 1.0882,
-            "82": 1.00417,
-            "83": 1.29479,
-            "84": 1.0044,
-            "85": 1.0103,
-            "86": 1.00862,
-            "87": 1.01863,
-            "88": 1.2549,
-            "89": 1.0075,
-            "90": 1.00874,
-            "91": 1.0111,
-            "92": 1.01049,
-            "93": 1.01084,
-            "94": 1.01043,
-            "95": 1.01246,
-            "96": 1.01317,
-            "97": 1.09821,
-            "98": 1.01406,
-            "99": 1.00578,
-            "100": 1.09442
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist_local_spec/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist_local_spec/golden_values_dev_dgx_h100.json
index b5f4b597886..27a34e32198 100644
--- a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist_local_spec/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist_local_spec/golden_values_dev_dgx_h100.json
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 11.88602,
-            "2": 0.95024,
-            "3": 0.88873,
-            "4": 0.84081,
-            "5": 0.8407,
-            "6": 0.841,
-            "7": 0.83666,
-            "8": 0.83819,
-            "9": 0.83577,
-            "10": 0.83982,
-            "11": 0.83346,
-            "12": 0.8683,
-            "13": 0.84255,
-            "14": 0.83676,
-            "15": 1.08071,
-            "16": 1.25785,
-            "17": 0.83186,
-            "18": 0.8423,
-            "19": 0.84907,
-            "20": 0.84641,
-            "21": 0.84182,
-            "22": 1.26058,
-            "23": 0.86142,
-            "24": 0.84798,
-            "25": 0.84097,
-            "26": 0.84232,
-            "27": 0.85483,
-            "28": 0.85596,
-            "29": 0.85197,
-            "30": 0.85702,
-            "31": 0.85002,
-            "32": 0.85132,
-            "33": 0.85438,
-            "34": 0.86588,
-            "35": 0.87207,
-            "36": 0.85768,
-            "37": 0.87379,
-            "38": 0.85134,
-            "39": 0.8537,
-            "40": 0.84912,
-            "41": 0.85397,
-            "42": 0.9623,
-            "43": 1.06611,
-            "44": 0.98659,
-            "45": 1.18823,
-            "46": 0.86085,
-            "47": 0.85574,
-            "48": 0.8596,
-            "49": 0.97573,
-            "50": 0.95882,
-            "51": 0.86517,
-            "52": 0.85872,
-            "53": 0.86263,
-            "54": 0.86436,
-            "55": 0.89018,
-            "56": 0.8674,
-            "57": 0.86176,
-            "58": 0.85395,
-            "59": 1.16789,
-            "60": 0.85822,
-            "61": 1.20441,
-            "62": 0.85426,
-            "63": 0.85652,
-            "64": 0.85392,
-            "65": 0.86218,
-            "66": 0.88112,
-            "67": 1.16257,
-            "68": 0.85308,
-            "69": 1.00689,
-            "70": 0.86168,
-            "71": 1.01898,
-            "72": 1.007,
-            "73": 1.32547,
-            "74": 0.87953,
-            "75": 0.86331,
-            "76": 1.21865,
-            "77": 0.97064,
-            "78": 0.86068,
-            "79": 0.97841,
-            "80": 0.87282,
-            "81": 0.87319,
-            "82": 0.86404,
-            "83": 0.85854,
-            "84": 0.86686,
-            "85": 1.10394,
-            "86": 0.88271,
-            "87": 0.88117,
-            "88": 0.86213,
-            "89": 0.86328,
-            "90": 0.86472,
-            "91": 0.86372,
-            "92": 0.86414,
-            "93": 0.86268,
-            "94": 0.86412,
-            "95": 0.86343,
-            "96": 0.86012,
-            "97": 1.00046,
-            "98": 1.16876,
-            "99": 0.86021,
-            "100": 0.86224
+            "1": 11.74907,
+            "2": 0.85881,
+            "3": 0.84325,
+            "4": 0.84358,
+            "5": 0.84379,
+            "6": 0.84251,
+            "7": 0.84123,
+            "8": 0.8499,
+            "9": 0.8999,
+            "10": 0.92522,
+            "11": 0.94116,
+            "12": 0.85793,
+            "13": 0.84568,
+            "14": 0.84264,
+            "15": 0.84084,
+            "16": 0.84084,
+            "17": 0.83843,
+            "18": 0.8412,
+            "19": 0.84178,
+            "20": 1.1044,
+            "21": 1.21871,
+            "22": 1.25946,
+            "23": 0.85008,
+            "24": 0.91404,
+            "25": 0.84787,
+            "26": 0.84792,
+            "27": 0.85174,
+            "28": 0.84996,
+            "29": 0.84337,
+            "30": 0.84498,
+            "31": 0.8486,
+            "32": 0.84203,
+            "33": 0.84451,
+            "34": 0.85648,
+            "35": 0.83537,
+            "36": 0.84205,
+            "37": 0.83563,
+            "38": 0.84541,
+            "39": 0.84231,
+            "40": 0.84639,
+            "41": 0.84365,
+            "42": 0.84512,
+            "43": 0.84437,
+            "44": 0.84299,
+            "45": 0.85866,
+            "46": 0.84237,
+            "47": 0.84617,
+            "48": 1.18328,
+            "49": 0.88875,
+            "50": 0.96388,
+            "51": 0.98149,
+            "52": 0.89905,
+            "53": 0.84382,
+            "54": 0.85382,
+            "55": 0.84338,
+            "56": 0.84282,
+            "57": 0.92404,
+            "58": 0.84627,
+            "59": 0.83811,
+            "60": 0.83802,
+            "61": 0.85109,
+            "62": 0.83231,
+            "63": 0.83505,
+            "64": 1.15842,
+            "65": 1.1324,
+            "66": 0.83972,
+            "67": 0.82896,
+            "68": 0.82596,
+            "69": 0.83118,
+            "70": 0.84229,
+            "71": 0.8328,
+            "72": 0.82924,
+            "73": 0.83555,
+            "74": 0.83422,
+            "75": 0.90796,
+            "76": 0.85077,
+            "77": 1.07568,
+            "78": 1.30938,
+            "79": 1.12037,
+            "80": 0.82751,
+            "81": 0.83544,
+            "82": 0.88688,
+            "83": 1.16362,
+            "84": 0.83207,
+            "85": 0.83917,
+            "86": 1.14681,
+            "87": 1.17025,
+            "88": 0.82985,
+            "89": 0.82492,
+            "90": 0.90586,
+            "91": 0.83299,
+            "92": 0.83139,
+            "93": 0.83405,
+            "94": 0.83756,
+            "95": 0.83351,
+            "96": 0.83063,
+            "97": 0.83499,
+            "98": 0.84617,
+            "99": 0.83623,
+            "100": 0.84014
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist_local_spec/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist_local_spec/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 1c7c359e92d..00000000000
--- a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist_local_spec/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.4837,
-            "2": 10.48435,
-            "3": 10.48251,
-            "4": 10.48303,
-            "5": 10.47647,
-            "6": 10.48423,
-            "7": 10.48457,
-            "8": 10.48837,
-            "9": 10.49003,
-            "10": 10.47255,
-            "11": 10.47245,
-            "12": 10.4828,
-            "13": 10.47855,
-            "14": 10.45162,
-            "15": 10.47936,
-            "16": 10.45364,
-            "17": 10.45143,
-            "18": 10.46239,
-            "19": 10.44136,
-            "20": 10.45438,
-            "21": 10.43469,
-            "22": 10.40587,
-            "23": 10.39982,
-            "24": 10.37585,
-            "25": 10.38173,
-            "26": 10.35154,
-            "27": 10.35401,
-            "28": 10.3497,
-            "29": 10.28714,
-            "30": 10.21194,
-            "31": 10.17274,
-            "32": 10.13439,
-            "33": 10.14753,
-            "34": 10.10759,
-            "35": 10.10592,
-            "36": 10.08756,
-            "37": 10.08177,
-            "38": 10.07257,
-            "39": 10.0013,
-            "40": 9.9816,
-            "41": 9.92549,
-            "42": 9.87529,
-            "43": 9.88742,
-            "44": 9.80641,
-            "45": 9.82342,
-            "46": 9.73815,
-            "47": 9.74831,
-            "48": 9.71619,
-            "49": 9.74504,
-            "50": 9.73004,
-            "51": 9.71503,
-            "52": 9.66484,
-            "53": 9.60935,
-            "54": 9.62735,
-            "55": 9.61036,
-            "56": 9.61745,
-            "57": 9.56794,
-            "58": 9.52742,
-            "59": 9.51685,
-            "60": 9.51873,
-            "61": 9.53147,
-            "62": 9.45024,
-            "63": 9.45733,
-            "64": 9.43455,
-            "65": 9.4582,
-            "66": 9.43694,
-            "67": 9.39693,
-            "68": 9.36491,
-            "69": 9.40957,
-            "70": 9.37605,
-            "71": 9.41735,
-            "72": 9.42581,
-            "73": 9.37614,
-            "74": 9.41544,
-            "75": 9.37897,
-            "76": 9.28015,
-            "77": 9.32215,
-            "78": 9.35752,
-            "79": 9.32154,
-            "80": 9.31496,
-            "81": 9.26776,
-            "82": 9.34189,
-            "83": 9.32163,
-            "84": 9.24791,
-            "85": 9.35021,
-            "86": 9.22383,
-            "87": 9.30627,
-            "88": 9.29884,
-            "89": 9.22708,
-            "90": 9.28475,
-            "91": 9.23116,
-            "92": 9.27477,
-            "93": 9.1922,
-            "94": 9.23984,
-            "95": 9.27996,
-            "96": 9.17534,
-            "97": 9.21892,
-            "98": 9.1719,
-            "99": 9.1646,
-            "100": 9.14809
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 2554.0,
-            "2": 1919.0,
-            "3": 1521.0,
-            "4": 2330.0,
-            "5": 2010.0,
-            "6": 1725.0,
-            "7": 2803.0,
-            "8": 2435.0,
-            "9": 2286.0,
-            "10": 2570.0,
-            "11": 2438.0,
-            "12": 1829.0,
-            "13": 2332.0,
-            "14": 2832.0,
-            "15": 2008.0,
-            "16": 2659.0,
-            "17": 2454.0,
-            "18": 2500.0,
-            "19": 2588.0,
-            "20": 2834.0,
-            "21": 2042.0,
-            "22": 3037.0,
-            "23": 2702.0,
-            "24": 2700.0,
-            "25": 2568.0,
-            "26": 2896.0,
-            "27": 2735.0,
-            "28": 2699.0,
-            "29": 2548.0,
-            "30": 2843.0,
-            "31": 2160.0,
-            "32": 2458.0,
-            "33": 2130.0,
-            "34": 2517.0,
-            "35": 2597.0,
-            "36": 3001.0,
-            "37": 3305.0,
-            "38": 2682.0,
-            "39": 2805.0,
-            "40": 3425.0,
-            "41": 1812.0,
-            "42": 1481.0,
-            "43": 1726.0,
-            "44": 2575.0,
-            "45": 3438.0,
-            "46": 2960.0,
-            "47": 2792.0,
-            "48": 3107.0,
-            "49": 2854.0,
-            "50": 2145.0,
-            "51": 1964.0,
-            "52": 2437.0,
-            "53": 3823.0,
-            "54": 3427.0,
-            "55": 3392.0,
-            "56": 4421.0,
-            "57": 4003.0,
-            "58": 4224.0,
-            "59": 1816.0,
-            "60": 2520.0,
-            "61": 2106.0,
-            "62": 4011.0,
-            "63": 3637.0,
-            "64": 4375.0,
-            "65": 3080.0,
-            "66": 1753.0,
-            "67": 1913.0,
-            "68": 4407.0,
-            "69": 4475.0,
-            "70": 4419.0,
-            "71": 2152.0,
-            "72": 4399.0,
-            "73": 4134.0,
-            "74": 3315.0,
-            "75": 4815.0,
-            "76": 2322.0,
-            "77": 5019.0,
-            "78": 4171.0,
-            "79": 2788.0,
-            "80": 3831.0,
-            "81": 3411.0,
-            "82": 3004.0,
-            "83": 5145.0,
-            "84": 4399.0,
-            "85": 4295.0,
-            "86": 3410.0,
-            "87": 4880.0,
-            "88": 3350.0,
-            "89": 4659.0,
-            "90": 4370.0,
-            "91": 4273.0,
-            "92": 3325.0,
-            "93": 5509.0,
-            "94": 3804.0,
-            "95": 4711.0,
-            "96": 3631.0,
-            "97": 3774.0,
-            "98": 4477.0,
-            "99": 4459.0,
-            "100": 3220.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1767237120.0,
-            "2": 1767237120.0,
-            "3": 1767237120.0,
-            "4": 1767237120.0,
-            "5": 1767237120.0,
-            "6": 1767237120.0,
-            "7": 1767237120.0,
-            "8": 1767237120.0,
-            "9": 1767237120.0,
-            "10": 1767237120.0,
-            "11": 1767237120.0,
-            "12": 1767237120.0,
-            "13": 1767237120.0,
-            "14": 1767237120.0,
-            "15": 1767237120.0,
-            "16": 1767237120.0,
-            "17": 1767237120.0,
-            "18": 1767237120.0,
-            "19": 1767237120.0,
-            "20": 1767237120.0,
-            "21": 1767237120.0,
-            "22": 1767237120.0,
-            "23": 1767237120.0,
-            "24": 1767237120.0,
-            "25": 1767237120.0,
-            "26": 1767237120.0,
-            "27": 1767237120.0,
-            "28": 1767237120.0,
-            "29": 1767237120.0,
-            "30": 1767237120.0,
-            "31": 1767237120.0,
-            "32": 1767237120.0,
-            "33": 1767237120.0,
-            "34": 1767237120.0,
-            "35": 1767237120.0,
-            "36": 1767237120.0,
-            "37": 1767237120.0,
-            "38": 1767237120.0,
-            "39": 1767237120.0,
-            "40": 1767237120.0,
-            "41": 1767237120.0,
-            "42": 1767237120.0,
-            "43": 1767237120.0,
-            "44": 1767237120.0,
-            "45": 1767237120.0,
-            "46": 1767237120.0,
-            "47": 1767237120.0,
-            "48": 1767237120.0,
-            "49": 1767237120.0,
-            "50": 1767237120.0,
-            "51": 1767237120.0,
-            "52": 1767237120.0,
-            "53": 1767237120.0,
-            "54": 1767237120.0,
-            "55": 1767237120.0,
-            "56": 1767237120.0,
-            "57": 1767237120.0,
-            "58": 1767237120.0,
-            "59": 1767237120.0,
-            "60": 1767237120.0,
-            "61": 1767237120.0,
-            "62": 1767237120.0,
-            "63": 1767237120.0,
-            "64": 1767237120.0,
-            "65": 1767237120.0,
-            "66": 1767237120.0,
-            "67": 1767237120.0,
-            "68": 1767237120.0,
-            "69": 1767237120.0,
-            "70": 1767237120.0,
-            "71": 1767237120.0,
-            "72": 1767237120.0,
-            "73": 1767237120.0,
-            "74": 1767237120.0,
-            "75": 1767237120.0,
-            "76": 1767237120.0,
-            "77": 1767237120.0,
-            "78": 1767237120.0,
-            "79": 1767237120.0,
-            "80": 1767237120.0,
-            "81": 1767237120.0,
-            "82": 1767237120.0,
-            "83": 1767237120.0,
-            "84": 1767237120.0,
-            "85": 1767237120.0,
-            "86": 1767237120.0,
-            "87": 1767237120.0,
-            "88": 1767237120.0,
-            "89": 1767237120.0,
-            "90": 1767237120.0,
-            "91": 1767237120.0,
-            "92": 1767237120.0,
-            "93": 1767237120.0,
-            "94": 1767237120.0,
-            "95": 1767237120.0,
-            "96": 1767237120.0,
-            "97": 1767237120.0,
-            "98": 1767237120.0,
-            "99": 1767237120.0,
-            "100": 1767237120.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 2336500736.0,
-            "2": 3079487488.0,
-            "3": 3079487488.0,
-            "4": 3079487488.0,
-            "5": 3079487488.0,
-            "6": 3079487488.0,
-            "7": 3079487488.0,
-            "8": 3079487488.0,
-            "9": 3079487488.0,
-            "10": 3079487488.0,
-            "11": 3079487488.0,
-            "12": 3079487488.0,
-            "13": 3079487488.0,
-            "14": 3079487488.0,
-            "15": 3079487488.0,
-            "16": 3079487488.0,
-            "17": 3079487488.0,
-            "18": 3079487488.0,
-            "19": 3079487488.0,
-            "20": 3079487488.0,
-            "21": 3079487488.0,
-            "22": 3079487488.0,
-            "23": 3079487488.0,
-            "24": 3079487488.0,
-            "25": 3079487488.0,
-            "26": 3079487488.0,
-            "27": 3079487488.0,
-            "28": 3079487488.0,
-            "29": 3079487488.0,
-            "30": 3079487488.0,
-            "31": 3079487488.0,
-            "32": 3079487488.0,
-            "33": 3079487488.0,
-            "34": 3079487488.0,
-            "35": 3079487488.0,
-            "36": 3079487488.0,
-            "37": 3079487488.0,
-            "38": 3079487488.0,
-            "39": 3079487488.0,
-            "40": 3079487488.0,
-            "41": 3079487488.0,
-            "42": 3079487488.0,
-            "43": 3079487488.0,
-            "44": 3079487488.0,
-            "45": 3079487488.0,
-            "46": 3079487488.0,
-            "47": 3079487488.0,
-            "48": 3079487488.0,
-            "49": 3079487488.0,
-            "50": 3079487488.0,
-            "51": 3079487488.0,
-            "52": 3079487488.0,
-            "53": 3079487488.0,
-            "54": 3079487488.0,
-            "55": 3079487488.0,
-            "56": 3079487488.0,
-            "57": 3079487488.0,
-            "58": 3079487488.0,
-            "59": 3079487488.0,
-            "60": 3079487488.0,
-            "61": 3079487488.0,
-            "62": 3079487488.0,
-            "63": 3079487488.0,
-            "64": 3079487488.0,
-            "65": 3079487488.0,
-            "66": 3079487488.0,
-            "67": 3079487488.0,
-            "68": 3079487488.0,
-            "69": 3079487488.0,
-            "70": 3079487488.0,
-            "71": 3079487488.0,
-            "72": 3079487488.0,
-            "73": 3079487488.0,
-            "74": 3079487488.0,
-            "75": 3079487488.0,
-            "76": 3079487488.0,
-            "77": 3079487488.0,
-            "78": 3079487488.0,
-            "79": 3079487488.0,
-            "80": 3079487488.0,
-            "81": 3079487488.0,
-            "82": 3079487488.0,
-            "83": 3079487488.0,
-            "84": 3079487488.0,
-            "85": 3079487488.0,
-            "86": 3079487488.0,
-            "87": 3079487488.0,
-            "88": 3079487488.0,
-            "89": 3079487488.0,
-            "90": 3079487488.0,
-            "91": 3079487488.0,
-            "92": 3079487488.0,
-            "93": 3079487488.0,
-            "94": 3079487488.0,
-            "95": 3079487488.0,
-            "96": 3079487488.0,
-            "97": 3079487488.0,
-            "98": 3079487488.0,
-            "99": 3079487488.0,
-            "100": 3079487488.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 12.43441,
-            "2": 0.78136,
-            "3": 0.7462,
-            "4": 0.7121,
-            "5": 0.71539,
-            "6": 0.71675,
-            "7": 0.71163,
-            "8": 0.71648,
-            "9": 0.72398,
-            "10": 0.71927,
-            "11": 0.80592,
-            "12": 0.70909,
-            "13": 0.71547,
-            "14": 0.71572,
-            "15": 0.70839,
-            "16": 0.71281,
-            "17": 0.71709,
-            "18": 0.70875,
-            "19": 0.71455,
-            "20": 0.989,
-            "21": 0.98319,
-            "22": 0.95078,
-            "23": 0.94171,
-            "24": 0.71144,
-            "25": 0.70971,
-            "26": 0.71131,
-            "27": 0.70864,
-            "28": 0.72406,
-            "29": 0.71861,
-            "30": 0.71986,
-            "31": 0.71003,
-            "32": 0.70772,
-            "33": 0.71322,
-            "34": 0.70935,
-            "35": 0.71103,
-            "36": 0.70629,
-            "37": 0.71354,
-            "38": 0.71466,
-            "39": 0.71799,
-            "40": 0.71635,
-            "41": 0.72804,
-            "42": 0.71281,
-            "43": 0.7097,
-            "44": 0.71324,
-            "45": 0.70979,
-            "46": 0.7111,
-            "47": 0.71491,
-            "48": 1.05833,
-            "49": 0.89093,
-            "50": 0.8836,
-            "51": 0.72864,
-            "52": 0.72146,
-            "53": 0.72243,
-            "54": 0.71938,
-            "55": 0.71917,
-            "56": 0.71867,
-            "57": 0.72048,
-            "58": 0.72484,
-            "59": 0.72197,
-            "60": 0.7218,
-            "61": 0.728,
-            "62": 0.71944,
-            "63": 0.73343,
-            "64": 5.90055,
-            "65": 5.53828,
-            "66": 0.91077,
-            "67": 1.09715,
-            "68": 0.70698,
-            "69": 0.70556,
-            "70": 1.00845,
-            "71": 0.71076,
-            "72": 0.71777,
-            "73": 0.71659,
-            "74": 0.71156,
-            "75": 0.8128,
-            "76": 0.7115,
-            "77": 0.97488,
-            "78": 0.89177,
-            "79": 0.87098,
-            "80": 1.01456,
-            "81": 0.81896,
-            "82": 0.71793,
-            "83": 1.04586,
-            "84": 0.72118,
-            "85": 1.02779,
-            "86": 0.72077,
-            "87": 0.71418,
-            "88": 0.71356,
-            "89": 0.74602,
-            "90": 0.77996,
-            "91": 1.05945,
-            "92": 0.72043,
-            "93": 0.72396,
-            "94": 0.72365,
-            "95": 0.72843,
-            "96": 0.71516,
-            "97": 0.71321,
-            "98": 0.72468,
-            "99": 0.72441,
-            "100": 0.71951
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist_local_spec/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist_local_spec/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index 27a34e32198..00000000000
--- a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist_local_spec/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.4837,
-            "2": 10.48435,
-            "3": 10.48251,
-            "4": 10.48303,
-            "5": 10.47647,
-            "6": 10.48423,
-            "7": 10.48457,
-            "8": 10.48837,
-            "9": 10.49003,
-            "10": 10.47255,
-            "11": 10.47245,
-            "12": 10.4828,
-            "13": 10.47855,
-            "14": 10.45162,
-            "15": 10.47936,
-            "16": 10.45364,
-            "17": 10.45143,
-            "18": 10.46239,
-            "19": 10.44136,
-            "20": 10.45438,
-            "21": 10.43469,
-            "22": 10.40587,
-            "23": 10.39982,
-            "24": 10.37585,
-            "25": 10.38173,
-            "26": 10.35154,
-            "27": 10.35401,
-            "28": 10.3497,
-            "29": 10.28714,
-            "30": 10.21194,
-            "31": 10.17274,
-            "32": 10.13439,
-            "33": 10.14753,
-            "34": 10.10759,
-            "35": 10.10592,
-            "36": 10.08756,
-            "37": 10.08177,
-            "38": 10.07257,
-            "39": 10.0013,
-            "40": 9.9816,
-            "41": 9.92549,
-            "42": 9.87529,
-            "43": 9.88742,
-            "44": 9.80641,
-            "45": 9.82342,
-            "46": 9.73815,
-            "47": 9.74831,
-            "48": 9.71619,
-            "49": 9.74504,
-            "50": 9.73004,
-            "51": 9.71503,
-            "52": 9.66484,
-            "53": 9.60935,
-            "54": 9.62735,
-            "55": 9.61036,
-            "56": 9.61745,
-            "57": 9.56794,
-            "58": 9.52742,
-            "59": 9.51685,
-            "60": 9.51873,
-            "61": 9.53147,
-            "62": 9.45024,
-            "63": 9.45733,
-            "64": 9.43455,
-            "65": 9.4582,
-            "66": 9.43694,
-            "67": 9.39693,
-            "68": 9.36491,
-            "69": 9.40957,
-            "70": 9.37605,
-            "71": 9.41735,
-            "72": 9.42581,
-            "73": 9.37614,
-            "74": 9.41544,
-            "75": 9.37897,
-            "76": 9.28015,
-            "77": 9.32215,
-            "78": 9.35752,
-            "79": 9.32154,
-            "80": 9.31496,
-            "81": 9.26776,
-            "82": 9.34189,
-            "83": 9.32163,
-            "84": 9.24791,
-            "85": 9.35021,
-            "86": 9.22383,
-            "87": 9.30627,
-            "88": 9.29884,
-            "89": 9.22708,
-            "90": 9.28475,
-            "91": 9.23116,
-            "92": 9.27477,
-            "93": 9.1922,
-            "94": 9.23984,
-            "95": 9.27996,
-            "96": 9.17534,
-            "97": 9.21892,
-            "98": 9.1719,
-            "99": 9.1646,
-            "100": 9.14809
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 2554.0,
-            "2": 1919.0,
-            "3": 1521.0,
-            "4": 2330.0,
-            "5": 2010.0,
-            "6": 1725.0,
-            "7": 2803.0,
-            "8": 2435.0,
-            "9": 2286.0,
-            "10": 2570.0,
-            "11": 2438.0,
-            "12": 1829.0,
-            "13": 2332.0,
-            "14": 2832.0,
-            "15": 2008.0,
-            "16": 2659.0,
-            "17": 2454.0,
-            "18": 2500.0,
-            "19": 2588.0,
-            "20": 2834.0,
-            "21": 2042.0,
-            "22": 3037.0,
-            "23": 2702.0,
-            "24": 2700.0,
-            "25": 2568.0,
-            "26": 2896.0,
-            "27": 2735.0,
-            "28": 2699.0,
-            "29": 2548.0,
-            "30": 2843.0,
-            "31": 2160.0,
-            "32": 2458.0,
-            "33": 2130.0,
-            "34": 2517.0,
-            "35": 2597.0,
-            "36": 3001.0,
-            "37": 3305.0,
-            "38": 2682.0,
-            "39": 2805.0,
-            "40": 3425.0,
-            "41": 1812.0,
-            "42": 1481.0,
-            "43": 1726.0,
-            "44": 2575.0,
-            "45": 3438.0,
-            "46": 2960.0,
-            "47": 2792.0,
-            "48": 3107.0,
-            "49": 2854.0,
-            "50": 2145.0,
-            "51": 1964.0,
-            "52": 2437.0,
-            "53": 3823.0,
-            "54": 3427.0,
-            "55": 3392.0,
-            "56": 4421.0,
-            "57": 4003.0,
-            "58": 4224.0,
-            "59": 1816.0,
-            "60": 2520.0,
-            "61": 2106.0,
-            "62": 4011.0,
-            "63": 3637.0,
-            "64": 4375.0,
-            "65": 3080.0,
-            "66": 1753.0,
-            "67": 1913.0,
-            "68": 4407.0,
-            "69": 4475.0,
-            "70": 4419.0,
-            "71": 2152.0,
-            "72": 4399.0,
-            "73": 4134.0,
-            "74": 3315.0,
-            "75": 4815.0,
-            "76": 2322.0,
-            "77": 5019.0,
-            "78": 4171.0,
-            "79": 2788.0,
-            "80": 3831.0,
-            "81": 3411.0,
-            "82": 3004.0,
-            "83": 5145.0,
-            "84": 4399.0,
-            "85": 4295.0,
-            "86": 3410.0,
-            "87": 4880.0,
-            "88": 3350.0,
-            "89": 4659.0,
-            "90": 4370.0,
-            "91": 4273.0,
-            "92": 3325.0,
-            "93": 5509.0,
-            "94": 3804.0,
-            "95": 4711.0,
-            "96": 3631.0,
-            "97": 3774.0,
-            "98": 4477.0,
-            "99": 4459.0,
-            "100": 3220.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1767237120.0,
-            "2": 1767237120.0,
-            "3": 1767237120.0,
-            "4": 1767237120.0,
-            "5": 1767237120.0,
-            "6": 1767237120.0,
-            "7": 1767237120.0,
-            "8": 1767237120.0,
-            "9": 1767237120.0,
-            "10": 1767237120.0,
-            "11": 1767237120.0,
-            "12": 1767237120.0,
-            "13": 1767237120.0,
-            "14": 1767237120.0,
-            "15": 1767237120.0,
-            "16": 1767237120.0,
-            "17": 1767237120.0,
-            "18": 1767237120.0,
-            "19": 1767237120.0,
-            "20": 1767237120.0,
-            "21": 1767237120.0,
-            "22": 1767237120.0,
-            "23": 1767237120.0,
-            "24": 1767237120.0,
-            "25": 1767237120.0,
-            "26": 1767237120.0,
-            "27": 1767237120.0,
-            "28": 1767237120.0,
-            "29": 1767237120.0,
-            "30": 1767237120.0,
-            "31": 1767237120.0,
-            "32": 1767237120.0,
-            "33": 1767237120.0,
-            "34": 1767237120.0,
-            "35": 1767237120.0,
-            "36": 1767237120.0,
-            "37": 1767237120.0,
-            "38": 1767237120.0,
-            "39": 1767237120.0,
-            "40": 1767237120.0,
-            "41": 1767237120.0,
-            "42": 1767237120.0,
-            "43": 1767237120.0,
-            "44": 1767237120.0,
-            "45": 1767237120.0,
-            "46": 1767237120.0,
-            "47": 1767237120.0,
-            "48": 1767237120.0,
-            "49": 1767237120.0,
-            "50": 1767237120.0,
-            "51": 1767237120.0,
-            "52": 1767237120.0,
-            "53": 1767237120.0,
-            "54": 1767237120.0,
-            "55": 1767237120.0,
-            "56": 1767237120.0,
-            "57": 1767237120.0,
-            "58": 1767237120.0,
-            "59": 1767237120.0,
-            "60": 1767237120.0,
-            "61": 1767237120.0,
-            "62": 1767237120.0,
-            "63": 1767237120.0,
-            "64": 1767237120.0,
-            "65": 1767237120.0,
-            "66": 1767237120.0,
-            "67": 1767237120.0,
-            "68": 1767237120.0,
-            "69": 1767237120.0,
-            "70": 1767237120.0,
-            "71": 1767237120.0,
-            "72": 1767237120.0,
-            "73": 1767237120.0,
-            "74": 1767237120.0,
-            "75": 1767237120.0,
-            "76": 1767237120.0,
-            "77": 1767237120.0,
-            "78": 1767237120.0,
-            "79": 1767237120.0,
-            "80": 1767237120.0,
-            "81": 1767237120.0,
-            "82": 1767237120.0,
-            "83": 1767237120.0,
-            "84": 1767237120.0,
-            "85": 1767237120.0,
-            "86": 1767237120.0,
-            "87": 1767237120.0,
-            "88": 1767237120.0,
-            "89": 1767237120.0,
-            "90": 1767237120.0,
-            "91": 1767237120.0,
-            "92": 1767237120.0,
-            "93": 1767237120.0,
-            "94": 1767237120.0,
-            "95": 1767237120.0,
-            "96": 1767237120.0,
-            "97": 1767237120.0,
-            "98": 1767237120.0,
-            "99": 1767237120.0,
-            "100": 1767237120.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 2336500736.0,
-            "2": 3079487488.0,
-            "3": 3079487488.0,
-            "4": 3079487488.0,
-            "5": 3079487488.0,
-            "6": 3079487488.0,
-            "7": 3079487488.0,
-            "8": 3079487488.0,
-            "9": 3079487488.0,
-            "10": 3079487488.0,
-            "11": 3079487488.0,
-            "12": 3079487488.0,
-            "13": 3079487488.0,
-            "14": 3079487488.0,
-            "15": 3079487488.0,
-            "16": 3079487488.0,
-            "17": 3079487488.0,
-            "18": 3079487488.0,
-            "19": 3079487488.0,
-            "20": 3079487488.0,
-            "21": 3079487488.0,
-            "22": 3079487488.0,
-            "23": 3079487488.0,
-            "24": 3079487488.0,
-            "25": 3079487488.0,
-            "26": 3079487488.0,
-            "27": 3079487488.0,
-            "28": 3079487488.0,
-            "29": 3079487488.0,
-            "30": 3079487488.0,
-            "31": 3079487488.0,
-            "32": 3079487488.0,
-            "33": 3079487488.0,
-            "34": 3079487488.0,
-            "35": 3079487488.0,
-            "36": 3079487488.0,
-            "37": 3079487488.0,
-            "38": 3079487488.0,
-            "39": 3079487488.0,
-            "40": 3079487488.0,
-            "41": 3079487488.0,
-            "42": 3079487488.0,
-            "43": 3079487488.0,
-            "44": 3079487488.0,
-            "45": 3079487488.0,
-            "46": 3079487488.0,
-            "47": 3079487488.0,
-            "48": 3079487488.0,
-            "49": 3079487488.0,
-            "50": 3079487488.0,
-            "51": 3079487488.0,
-            "52": 3079487488.0,
-            "53": 3079487488.0,
-            "54": 3079487488.0,
-            "55": 3079487488.0,
-            "56": 3079487488.0,
-            "57": 3079487488.0,
-            "58": 3079487488.0,
-            "59": 3079487488.0,
-            "60": 3079487488.0,
-            "61": 3079487488.0,
-            "62": 3079487488.0,
-            "63": 3079487488.0,
-            "64": 3079487488.0,
-            "65": 3079487488.0,
-            "66": 3079487488.0,
-            "67": 3079487488.0,
-            "68": 3079487488.0,
-            "69": 3079487488.0,
-            "70": 3079487488.0,
-            "71": 3079487488.0,
-            "72": 3079487488.0,
-            "73": 3079487488.0,
-            "74": 3079487488.0,
-            "75": 3079487488.0,
-            "76": 3079487488.0,
-            "77": 3079487488.0,
-            "78": 3079487488.0,
-            "79": 3079487488.0,
-            "80": 3079487488.0,
-            "81": 3079487488.0,
-            "82": 3079487488.0,
-            "83": 3079487488.0,
-            "84": 3079487488.0,
-            "85": 3079487488.0,
-            "86": 3079487488.0,
-            "87": 3079487488.0,
-            "88": 3079487488.0,
-            "89": 3079487488.0,
-            "90": 3079487488.0,
-            "91": 3079487488.0,
-            "92": 3079487488.0,
-            "93": 3079487488.0,
-            "94": 3079487488.0,
-            "95": 3079487488.0,
-            "96": 3079487488.0,
-            "97": 3079487488.0,
-            "98": 3079487488.0,
-            "99": 3079487488.0,
-            "100": 3079487488.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 11.74907,
-            "2": 0.85881,
-            "3": 0.84325,
-            "4": 0.84358,
-            "5": 0.84379,
-            "6": 0.84251,
-            "7": 0.84123,
-            "8": 0.8499,
-            "9": 0.8999,
-            "10": 0.92522,
-            "11": 0.94116,
-            "12": 0.85793,
-            "13": 0.84568,
-            "14": 0.84264,
-            "15": 0.84084,
-            "16": 0.84084,
-            "17": 0.83843,
-            "18": 0.8412,
-            "19": 0.84178,
-            "20": 1.1044,
-            "21": 1.21871,
-            "22": 1.25946,
-            "23": 0.85008,
-            "24": 0.91404,
-            "25": 0.84787,
-            "26": 0.84792,
-            "27": 0.85174,
-            "28": 0.84996,
-            "29": 0.84337,
-            "30": 0.84498,
-            "31": 0.8486,
-            "32": 0.84203,
-            "33": 0.84451,
-            "34": 0.85648,
-            "35": 0.83537,
-            "36": 0.84205,
-            "37": 0.83563,
-            "38": 0.84541,
-            "39": 0.84231,
-            "40": 0.84639,
-            "41": 0.84365,
-            "42": 0.84512,
-            "43": 0.84437,
-            "44": 0.84299,
-            "45": 0.85866,
-            "46": 0.84237,
-            "47": 0.84617,
-            "48": 1.18328,
-            "49": 0.88875,
-            "50": 0.96388,
-            "51": 0.98149,
-            "52": 0.89905,
-            "53": 0.84382,
-            "54": 0.85382,
-            "55": 0.84338,
-            "56": 0.84282,
-            "57": 0.92404,
-            "58": 0.84627,
-            "59": 0.83811,
-            "60": 0.83802,
-            "61": 0.85109,
-            "62": 0.83231,
-            "63": 0.83505,
-            "64": 1.15842,
-            "65": 1.1324,
-            "66": 0.83972,
-            "67": 0.82896,
-            "68": 0.82596,
-            "69": 0.83118,
-            "70": 0.84229,
-            "71": 0.8328,
-            "72": 0.82924,
-            "73": 0.83555,
-            "74": 0.83422,
-            "75": 0.90796,
-            "76": 0.85077,
-            "77": 1.07568,
-            "78": 1.30938,
-            "79": 1.12037,
-            "80": 0.82751,
-            "81": 0.83544,
-            "82": 0.88688,
-            "83": 1.16362,
-            "84": 0.83207,
-            "85": 0.83917,
-            "86": 1.14681,
-            "87": 1.17025,
-            "88": 0.82985,
-            "89": 0.82492,
-            "90": 0.90586,
-            "91": 0.83299,
-            "92": 0.83139,
-            "93": 0.83405,
-            "94": 0.83756,
-            "95": 0.83351,
-            "96": 0.83063,
-            "97": 0.83499,
-            "98": 0.84617,
-            "99": 0.83623,
-            "100": 0.84014
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/bert/bert_mcore_tp4_pp1/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp4_pp1/golden_values_dev_dgx_h100.json
index 36ea57771ea..88adf60a26e 100644
--- a/tests/functional_tests/test_cases/bert/bert_mcore_tp4_pp1/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/bert/bert_mcore_tp4_pp1/golden_values_dev_dgx_h100.json
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 10.20165,
-            "2": 1.76894,
-            "3": 1.75257,
-            "4": 1.76371,
-            "5": 1.76165,
-            "6": 1.76697,
-            "7": 1.7566,
-            "8": 1.76422,
-            "9": 1.76493,
-            "10": 1.76085,
-            "11": 1.75557,
-            "12": 1.7612,
-            "13": 1.84209,
-            "14": 1.7609,
-            "15": 1.75819,
-            "16": 1.76084,
-            "17": 2.14365,
-            "18": 1.77031,
-            "19": 1.77623,
-            "20": 1.81462,
-            "21": 2.1764,
-            "22": 1.76578,
-            "23": 1.75799,
-            "24": 2.18418,
-            "25": 1.76236,
-            "26": 2.12149,
-            "27": 2.09277,
-            "28": 1.77853,
-            "29": 1.83529,
-            "30": 1.77362,
-            "31": 1.77704,
-            "32": 1.78154,
-            "33": 1.76732,
-            "34": 1.77318,
-            "35": 1.77963,
-            "36": 1.77541,
-            "37": 1.77626,
-            "38": 1.77185,
-            "39": 1.78486,
-            "40": 1.78003,
-            "41": 1.78092,
-            "42": 1.77118,
-            "43": 1.77626,
-            "44": 1.78384,
-            "45": 1.78376,
-            "46": 1.84893,
-            "47": 1.78761,
-            "48": 1.79814,
-            "49": 1.79323,
-            "50": 1.77941
+            "1": 10.29331,
+            "2": 1.82828,
+            "3": 1.75745,
+            "4": 1.75149,
+            "5": 1.76912,
+            "6": 1.75888,
+            "7": 1.75313,
+            "8": 1.75423,
+            "9": 1.74482,
+            "10": 1.84387,
+            "11": 2.01499,
+            "12": 1.74448,
+            "13": 1.75425,
+            "14": 2.09351,
+            "15": 1.77765,
+            "16": 1.76841,
+            "17": 1.75495,
+            "18": 2.05727,
+            "19": 1.77481,
+            "20": 2.11285,
+            "21": 1.77659,
+            "22": 1.75669,
+            "23": 1.75872,
+            "24": 2.1065,
+            "25": 2.02543,
+            "26": 1.84773,
+            "27": 1.76632,
+            "28": 1.76482,
+            "29": 1.75732,
+            "30": 1.75335,
+            "31": 1.75453,
+            "32": 1.80627,
+            "33": 1.757,
+            "34": 1.75719,
+            "35": 1.75478,
+            "36": 1.76009,
+            "37": 1.75602,
+            "38": 1.75806,
+            "39": 1.75609,
+            "40": 1.75247,
+            "41": 1.75179,
+            "42": 1.75873,
+            "43": 1.77534,
+            "44": 1.80833,
+            "45": 1.74663,
+            "46": 1.75048,
+            "47": 1.7473,
+            "48": 1.75253,
+            "49": 1.76783,
+            "50": 1.75365
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/bert/bert_mcore_tp4_pp1/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp4_pp1/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 73cbc43b7f2..00000000000
--- a/tests/functional_tests/test_cases/bert/bert_mcore_tp4_pp1/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.42626,
-            "2": 10.41171,
-            "3": 10.41885,
-            "4": 10.42153,
-            "5": 10.42192,
-            "6": 10.41563,
-            "7": 10.42859,
-            "8": 10.42079,
-            "9": 10.43014,
-            "10": 10.40859,
-            "11": 10.43501,
-            "12": 10.4025,
-            "13": 10.42274,
-            "14": 10.41249,
-            "15": 10.40948,
-            "16": 10.40806,
-            "17": 10.3892,
-            "18": 10.38857,
-            "19": 10.37147,
-            "20": 10.40453,
-            "21": 10.36615,
-            "22": 10.34963,
-            "23": 10.35388,
-            "24": 10.30136,
-            "25": 10.31117,
-            "26": 10.30241,
-            "27": 10.2821,
-            "28": 10.27928,
-            "29": 10.23928,
-            "30": 10.14742,
-            "31": 10.10532,
-            "32": 10.09426,
-            "33": 10.09032,
-            "34": 10.06437,
-            "35": 10.04643,
-            "36": 10.03306,
-            "37": 10.00505,
-            "38": 10.00274,
-            "39": 9.91418,
-            "40": 9.91103,
-            "41": 9.86562,
-            "42": 9.78095,
-            "43": 9.79496,
-            "44": 9.73077,
-            "45": 9.7428,
-            "46": 9.63829,
-            "47": 9.6868,
-            "48": 9.637,
-            "49": 9.6554,
-            "50": 9.65776
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 3452.0,
-            "2": 2890.0,
-            "3": 1856.0,
-            "4": 3256.0,
-            "5": 3333.0,
-            "6": 2985.0,
-            "7": 3208.0,
-            "8": 3314.0,
-            "9": 3134.0,
-            "10": 3124.0,
-            "11": 3913.0,
-            "12": 3008.0,
-            "13": 3108.0,
-            "14": 3652.0,
-            "15": 3267.0,
-            "16": 3662.0,
-            "17": 3680.0,
-            "18": 3708.0,
-            "19": 3375.0,
-            "20": 3449.0,
-            "21": 3115.0,
-            "22": 3545.0,
-            "23": 3516.0,
-            "24": 3789.0,
-            "25": 3570.0,
-            "26": 3719.0,
-            "27": 2808.0,
-            "28": 3823.0,
-            "29": 3626.0,
-            "30": 4136.0,
-            "31": 2541.0,
-            "32": 3945.0,
-            "33": 3501.0,
-            "34": 3795.0,
-            "35": 3652.0,
-            "36": 4269.0,
-            "37": 4152.0,
-            "38": 3787.0,
-            "39": 3873.0,
-            "40": 4661.0,
-            "41": 2846.0,
-            "42": 1556.0,
-            "43": 2809.0,
-            "44": 4030.0,
-            "45": 4724.0,
-            "46": 4587.0,
-            "47": 3120.0,
-            "48": 4366.0,
-            "49": 3839.0,
-            "50": 3146.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1661765632.0,
-            "2": 1661765632.0,
-            "3": 1661765632.0,
-            "4": 1661765632.0,
-            "5": 1661765632.0,
-            "6": 1661765632.0,
-            "7": 1661765632.0,
-            "8": 1661765632.0,
-            "9": 1661765632.0,
-            "10": 1661765632.0,
-            "11": 1661765632.0,
-            "12": 1661765632.0,
-            "13": 1661765632.0,
-            "14": 1661765632.0,
-            "15": 1661765632.0,
-            "16": 1661765632.0,
-            "17": 1661765632.0,
-            "18": 1661765632.0,
-            "19": 1661765632.0,
-            "20": 1661765632.0,
-            "21": 1661765632.0,
-            "22": 1661765632.0,
-            "23": 1661765632.0,
-            "24": 1661765632.0,
-            "25": 1661765632.0,
-            "26": 1661765632.0,
-            "27": 1661765632.0,
-            "28": 1661765632.0,
-            "29": 1661765632.0,
-            "30": 1661765632.0,
-            "31": 1661765632.0,
-            "32": 1661765632.0,
-            "33": 1661765632.0,
-            "34": 1661765632.0,
-            "35": 1661765632.0,
-            "36": 1661765632.0,
-            "37": 1661765632.0,
-            "38": 1661765632.0,
-            "39": 1661765632.0,
-            "40": 1661765632.0,
-            "41": 1661765632.0,
-            "42": 1661765632.0,
-            "43": 1661765632.0,
-            "44": 1661765632.0,
-            "45": 1661765632.0,
-            "46": 1661765632.0,
-            "47": 1661765632.0,
-            "48": 1661765632.0,
-            "49": 1661765632.0,
-            "50": 1661765632.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 2506479104.0,
-            "2": 3205449216.0,
-            "3": 3205449216.0,
-            "4": 3205449216.0,
-            "5": 3205449216.0,
-            "6": 3205449216.0,
-            "7": 3205449216.0,
-            "8": 3205449216.0,
-            "9": 3205449216.0,
-            "10": 3205449216.0,
-            "11": 3205449216.0,
-            "12": 3205449216.0,
-            "13": 3205449216.0,
-            "14": 3205449216.0,
-            "15": 3205449216.0,
-            "16": 3205449216.0,
-            "17": 3205449216.0,
-            "18": 3205449216.0,
-            "19": 3205449216.0,
-            "20": 3205449216.0,
-            "21": 3205449216.0,
-            "22": 3205449216.0,
-            "23": 3205449216.0,
-            "24": 3205449216.0,
-            "25": 3205449216.0,
-            "26": 3205449216.0,
-            "27": 3205449216.0,
-            "28": 3205449216.0,
-            "29": 3205449216.0,
-            "30": 3205449216.0,
-            "31": 3205449216.0,
-            "32": 3205449216.0,
-            "33": 3205449216.0,
-            "34": 3205449216.0,
-            "35": 3205449216.0,
-            "36": 3205449216.0,
-            "37": 3205449216.0,
-            "38": 3205449216.0,
-            "39": 3205449216.0,
-            "40": 3205449216.0,
-            "41": 3205449216.0,
-            "42": 3205449216.0,
-            "43": 3205449216.0,
-            "44": 3205449216.0,
-            "45": 3205449216.0,
-            "46": 3205449216.0,
-            "47": 3205449216.0,
-            "48": 3205449216.0,
-            "49": 3205449216.0,
-            "50": 3205449216.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.60443,
-            "2": 1.59144,
-            "3": 1.53882,
-            "4": 1.56784,
-            "5": 1.52207,
-            "6": 1.53885,
-            "7": 1.52214,
-            "8": 1.52095,
-            "9": 1.51957,
-            "10": 1.51224,
-            "11": 1.49689,
-            "12": 1.5078,
-            "13": 1.50118,
-            "14": 1.4917,
-            "15": 1.60359,
-            "16": 1.55447,
-            "17": 1.55262,
-            "18": 1.84594,
-            "19": 1.55841,
-            "20": 1.7545,
-            "21": 1.48478,
-            "22": 1.49549,
-            "23": 1.81525,
-            "24": 1.79126,
-            "25": 2.12023,
-            "26": 1.49775,
-            "27": 1.80406,
-            "28": 1.49411,
-            "29": 1.96966,
-            "30": 1.48009,
-            "31": 1.47915,
-            "32": 1.48757,
-            "33": 1.47812,
-            "34": 1.4701,
-            "35": 1.47099,
-            "36": 1.47773,
-            "37": 1.48414,
-            "38": 1.51352,
-            "39": 1.48595,
-            "40": 1.49001,
-            "41": 1.48545,
-            "42": 1.50863,
-            "43": 1.47565,
-            "44": 1.48135,
-            "45": 1.48123,
-            "46": 1.48152,
-            "47": 1.48884,
-            "48": 1.56195,
-            "49": 1.55628,
-            "50": 1.48725
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/bert/bert_mcore_tp4_pp1/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp4_pp1/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index 88adf60a26e..00000000000
--- a/tests/functional_tests/test_cases/bert/bert_mcore_tp4_pp1/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.42626,
-            "2": 10.41171,
-            "3": 10.41885,
-            "4": 10.42153,
-            "5": 10.42192,
-            "6": 10.41563,
-            "7": 10.42859,
-            "8": 10.42079,
-            "9": 10.43014,
-            "10": 10.40859,
-            "11": 10.43501,
-            "12": 10.4025,
-            "13": 10.42274,
-            "14": 10.41249,
-            "15": 10.40948,
-            "16": 10.40806,
-            "17": 10.3892,
-            "18": 10.38857,
-            "19": 10.37147,
-            "20": 10.40453,
-            "21": 10.36615,
-            "22": 10.34963,
-            "23": 10.35388,
-            "24": 10.30136,
-            "25": 10.31117,
-            "26": 10.30241,
-            "27": 10.2821,
-            "28": 10.27928,
-            "29": 10.23928,
-            "30": 10.14742,
-            "31": 10.10532,
-            "32": 10.09426,
-            "33": 10.09032,
-            "34": 10.06437,
-            "35": 10.04643,
-            "36": 10.03306,
-            "37": 10.00505,
-            "38": 10.00274,
-            "39": 9.91418,
-            "40": 9.91103,
-            "41": 9.86562,
-            "42": 9.78095,
-            "43": 9.79496,
-            "44": 9.73077,
-            "45": 9.7428,
-            "46": 9.63829,
-            "47": 9.6868,
-            "48": 9.637,
-            "49": 9.6554,
-            "50": 9.65776
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 3452.0,
-            "2": 2890.0,
-            "3": 1856.0,
-            "4": 3256.0,
-            "5": 3333.0,
-            "6": 2985.0,
-            "7": 3208.0,
-            "8": 3314.0,
-            "9": 3134.0,
-            "10": 3124.0,
-            "11": 3913.0,
-            "12": 3008.0,
-            "13": 3108.0,
-            "14": 3652.0,
-            "15": 3267.0,
-            "16": 3662.0,
-            "17": 3680.0,
-            "18": 3708.0,
-            "19": 3375.0,
-            "20": 3449.0,
-            "21": 3115.0,
-            "22": 3545.0,
-            "23": 3516.0,
-            "24": 3789.0,
-            "25": 3570.0,
-            "26": 3719.0,
-            "27": 2808.0,
-            "28": 3823.0,
-            "29": 3626.0,
-            "30": 4136.0,
-            "31": 2541.0,
-            "32": 3945.0,
-            "33": 3501.0,
-            "34": 3795.0,
-            "35": 3652.0,
-            "36": 4269.0,
-            "37": 4152.0,
-            "38": 3787.0,
-            "39": 3873.0,
-            "40": 4661.0,
-            "41": 2846.0,
-            "42": 1556.0,
-            "43": 2809.0,
-            "44": 4030.0,
-            "45": 4724.0,
-            "46": 4587.0,
-            "47": 3120.0,
-            "48": 4366.0,
-            "49": 3839.0,
-            "50": 3146.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1661765632.0,
-            "2": 1661765632.0,
-            "3": 1661765632.0,
-            "4": 1661765632.0,
-            "5": 1661765632.0,
-            "6": 1661765632.0,
-            "7": 1661765632.0,
-            "8": 1661765632.0,
-            "9": 1661765632.0,
-            "10": 1661765632.0,
-            "11": 1661765632.0,
-            "12": 1661765632.0,
-            "13": 1661765632.0,
-            "14": 1661765632.0,
-            "15": 1661765632.0,
-            "16": 1661765632.0,
-            "17": 1661765632.0,
-            "18": 1661765632.0,
-            "19": 1661765632.0,
-            "20": 1661765632.0,
-            "21": 1661765632.0,
-            "22": 1661765632.0,
-            "23": 1661765632.0,
-            "24": 1661765632.0,
-            "25": 1661765632.0,
-            "26": 1661765632.0,
-            "27": 1661765632.0,
-            "28": 1661765632.0,
-            "29": 1661765632.0,
-            "30": 1661765632.0,
-            "31": 1661765632.0,
-            "32": 1661765632.0,
-            "33": 1661765632.0,
-            "34": 1661765632.0,
-            "35": 1661765632.0,
-            "36": 1661765632.0,
-            "37": 1661765632.0,
-            "38": 1661765632.0,
-            "39": 1661765632.0,
-            "40": 1661765632.0,
-            "41": 1661765632.0,
-            "42": 1661765632.0,
-            "43": 1661765632.0,
-            "44": 1661765632.0,
-            "45": 1661765632.0,
-            "46": 1661765632.0,
-            "47": 1661765632.0,
-            "48": 1661765632.0,
-            "49": 1661765632.0,
-            "50": 1661765632.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 2506479104.0,
-            "2": 3205449216.0,
-            "3": 3205449216.0,
-            "4": 3205449216.0,
-            "5": 3205449216.0,
-            "6": 3205449216.0,
-            "7": 3205449216.0,
-            "8": 3205449216.0,
-            "9": 3205449216.0,
-            "10": 3205449216.0,
-            "11": 3205449216.0,
-            "12": 3205449216.0,
-            "13": 3205449216.0,
-            "14": 3205449216.0,
-            "15": 3205449216.0,
-            "16": 3205449216.0,
-            "17": 3205449216.0,
-            "18": 3205449216.0,
-            "19": 3205449216.0,
-            "20": 3205449216.0,
-            "21": 3205449216.0,
-            "22": 3205449216.0,
-            "23": 3205449216.0,
-            "24": 3205449216.0,
-            "25": 3205449216.0,
-            "26": 3205449216.0,
-            "27": 3205449216.0,
-            "28": 3205449216.0,
-            "29": 3205449216.0,
-            "30": 3205449216.0,
-            "31": 3205449216.0,
-            "32": 3205449216.0,
-            "33": 3205449216.0,
-            "34": 3205449216.0,
-            "35": 3205449216.0,
-            "36": 3205449216.0,
-            "37": 3205449216.0,
-            "38": 3205449216.0,
-            "39": 3205449216.0,
-            "40": 3205449216.0,
-            "41": 3205449216.0,
-            "42": 3205449216.0,
-            "43": 3205449216.0,
-            "44": 3205449216.0,
-            "45": 3205449216.0,
-            "46": 3205449216.0,
-            "47": 3205449216.0,
-            "48": 3205449216.0,
-            "49": 3205449216.0,
-            "50": 3205449216.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.29331,
-            "2": 1.82828,
-            "3": 1.75745,
-            "4": 1.75149,
-            "5": 1.76912,
-            "6": 1.75888,
-            "7": 1.75313,
-            "8": 1.75423,
-            "9": 1.74482,
-            "10": 1.84387,
-            "11": 2.01499,
-            "12": 1.74448,
-            "13": 1.75425,
-            "14": 2.09351,
-            "15": 1.77765,
-            "16": 1.76841,
-            "17": 1.75495,
-            "18": 2.05727,
-            "19": 1.77481,
-            "20": 2.11285,
-            "21": 1.77659,
-            "22": 1.75669,
-            "23": 1.75872,
-            "24": 2.1065,
-            "25": 2.02543,
-            "26": 1.84773,
-            "27": 1.76632,
-            "28": 1.76482,
-            "29": 1.75732,
-            "30": 1.75335,
-            "31": 1.75453,
-            "32": 1.80627,
-            "33": 1.757,
-            "34": 1.75719,
-            "35": 1.75478,
-            "36": 1.76009,
-            "37": 1.75602,
-            "38": 1.75806,
-            "39": 1.75609,
-            "40": 1.75247,
-            "41": 1.75179,
-            "42": 1.75873,
-            "43": 1.77534,
-            "44": 1.80833,
-            "45": 1.74663,
-            "46": 1.75048,
-            "47": 1.7473,
-            "48": 1.75253,
-            "49": 1.76783,
-            "50": 1.75365
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_7b_tp1_pp4_memory_speed/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_7b_tp1_pp4_memory_speed/golden_values_dev_dgx_h100.json
index 561b46578fc..478f889b21c 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_7b_tp1_pp4_memory_speed/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_7b_tp1_pp4_memory_speed/golden_values_dev_dgx_h100.json
@@ -1 +1,162 @@
-{"lm loss": {"start_step": 1, "end_step": 25, "step_interval": 5, "values": {"1": 12.58569, "5": 12.58279, "10": 12.4826, "15": 11.79651, "20": 11.47672, "25": 10.97989}}, "num-zeros": {"start_step": 1, "end_step": 25, "step_interval": 5, "values": {"1": 521035392.0, "5": 520993600.0, "10": 521175520.0, "15": 521589568.0, "20": 521132352.0, "25": 523547008.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 25, "step_interval": 5, "values": {"1": 24540168192.0, "5": 24540168192.0, "10": 24540168192.0, "15": 24540168192.0, "20": 24540168192.0, "25": 24540168192.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 25, "step_interval": 5, "values": {"1": 52729761792.0, "5": 60518424576.0, "10": 60518424576.0, "15": 60518424576.0, "20": 60518424576.0, "25": 60518424576.0}}, "iteration-time": {"start_step": 1, "end_step": 25, "step_interval": 5, "values": {"1": "nan", "5": "nan", "10": 1.21699, "15": "nan", "20": 1.20992, "25": "nan"}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 25,
+        "step_interval": 1,
+        "values": {
+            "1": 12.58569,
+            "2": 12.58406,
+            "3": 12.58486,
+            "4": 12.58642,
+            "5": 12.58279,
+            "6": 12.57912,
+            "7": 12.56177,
+            "8": 12.52304,
+            "9": 12.4966,
+            "10": 12.4826,
+            "11": 12.31462,
+            "12": 12.272,
+            "13": 12.20924,
+            "14": 12.20094,
+            "15": 11.79651,
+            "16": 11.78035,
+            "17": 11.74188,
+            "18": 11.71656,
+            "19": 11.59074,
+            "20": 11.47672,
+            "21": 11.23784,
+            "22": 11.3586,
+            "23": 11.25768,
+            "24": 11.14081,
+            "25": 10.97989
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 25,
+        "step_interval": 1,
+        "values": {
+            "1": 521035392.0,
+            "2": 521662624.0,
+            "3": 520932992.0,
+            "4": 521225120.0,
+            "5": 520993600.0,
+            "6": 521369824.0,
+            "7": 521417344.0,
+            "8": 521054784.0,
+            "9": 521458592.0,
+            "10": 521175520.0,
+            "11": 522277376.0,
+            "12": 521435904.0,
+            "13": 521472640.0,
+            "14": 522442496.0,
+            "15": 521589568.0,
+            "16": 521414080.0,
+            "17": 521025696.0,
+            "18": 521279168.0,
+            "19": 521154400.0,
+            "20": 521132352.0,
+            "21": 522909696.0,
+            "22": 521591904.0,
+            "23": 521353504.0,
+            "24": 521426496.0,
+            "25": 523547008.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 25,
+        "step_interval": 1,
+        "values": {
+            "1": 24540168192.0,
+            "2": 24540168192.0,
+            "3": 24540168192.0,
+            "4": 24540168192.0,
+            "5": 24540168192.0,
+            "6": 24540168192.0,
+            "7": 24540168192.0,
+            "8": 24540168192.0,
+            "9": 24540168192.0,
+            "10": 24540168192.0,
+            "11": 24540168192.0,
+            "12": 24540168192.0,
+            "13": 24540168192.0,
+            "14": 24540168192.0,
+            "15": 24540168192.0,
+            "16": 24540168192.0,
+            "17": 24540168192.0,
+            "18": 24540168192.0,
+            "19": 24540168192.0,
+            "20": 24540168192.0,
+            "21": 24540389376.0,
+            "22": 24540168192.0,
+            "23": 24540168192.0,
+            "24": 24540168192.0,
+            "25": 24540168192.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 25,
+        "step_interval": 1,
+        "values": {
+            "1": 52730810368.0,
+            "2": 60518424576.0,
+            "3": 60518424576.0,
+            "4": 60518424576.0,
+            "5": 60518424576.0,
+            "6": 60518424576.0,
+            "7": 60518424576.0,
+            "8": 60518424576.0,
+            "9": 60518424576.0,
+            "10": 60518424576.0,
+            "11": 60518424576.0,
+            "12": 60518424576.0,
+            "13": 60518424576.0,
+            "14": 60518424576.0,
+            "15": 60518424576.0,
+            "16": 60518424576.0,
+            "17": 60518424576.0,
+            "18": 60518424576.0,
+            "19": 60518424576.0,
+            "20": 60518424576.0,
+            "21": 60518424576.0,
+            "22": 60518424576.0,
+            "23": 60518424576.0,
+            "24": 60518424576.0,
+            "25": 60518424576.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 25,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": 10.03336,
+            "3": "nan",
+            "4": 1.18525,
+            "5": "nan",
+            "6": 1.18158,
+            "7": "nan",
+            "8": 1.18536,
+            "9": "nan",
+            "10": 1.18428,
+            "11": "nan",
+            "12": 1.18625,
+            "13": "nan",
+            "14": 1.18256,
+            "15": "nan",
+            "16": 1.18023,
+            "17": "nan",
+            "18": 1.18227,
+            "19": "nan",
+            "20": 1.18284,
+            "21": "nan",
+            "22": 1.18238,
+            "23": "nan",
+            "24": 1.18151,
+            "25": "nan"
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_7b_tp1_pp4_memory_speed/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_7b_tp1_pp4_memory_speed/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 3aad045fc8e..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_7b_tp1_pp4_memory_speed/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,162 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 25,
-        "step_interval": 1,
-        "values": {
-            "1": 12.58569,
-            "2": 12.58406,
-            "3": 12.58486,
-            "4": 12.58642,
-            "5": 12.58279,
-            "6": 12.57912,
-            "7": 12.56177,
-            "8": 12.52304,
-            "9": 12.4966,
-            "10": 12.4826,
-            "11": 12.31462,
-            "12": 12.272,
-            "13": 12.20924,
-            "14": 12.20094,
-            "15": 11.79651,
-            "16": 11.78035,
-            "17": 11.74188,
-            "18": 11.71656,
-            "19": 11.59074,
-            "20": 11.47672,
-            "21": 11.23784,
-            "22": 11.3586,
-            "23": 11.25768,
-            "24": 11.14081,
-            "25": 10.97989
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 25,
-        "step_interval": 1,
-        "values": {
-            "1": 521035392.0,
-            "2": 521662624.0,
-            "3": 520932992.0,
-            "4": 521225120.0,
-            "5": 520993600.0,
-            "6": 521369824.0,
-            "7": 521417344.0,
-            "8": 521054784.0,
-            "9": 521458592.0,
-            "10": 521175520.0,
-            "11": 522277376.0,
-            "12": 521435904.0,
-            "13": 521472640.0,
-            "14": 522442496.0,
-            "15": 521589568.0,
-            "16": 521414080.0,
-            "17": 521025696.0,
-            "18": 521279168.0,
-            "19": 521154400.0,
-            "20": 521132352.0,
-            "21": 522909696.0,
-            "22": 521591904.0,
-            "23": 521353504.0,
-            "24": 521426496.0,
-            "25": 523547008.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 25,
-        "step_interval": 1,
-        "values": {
-            "1": 24540168192.0,
-            "2": 24540168192.0,
-            "3": 24540168192.0,
-            "4": 24540168192.0,
-            "5": 24540168192.0,
-            "6": 24540168192.0,
-            "7": 24540168192.0,
-            "8": 24540168192.0,
-            "9": 24540168192.0,
-            "10": 24540168192.0,
-            "11": 24540168192.0,
-            "12": 24540168192.0,
-            "13": 24540168192.0,
-            "14": 24540168192.0,
-            "15": 24540168192.0,
-            "16": 24540168192.0,
-            "17": 24540168192.0,
-            "18": 24540168192.0,
-            "19": 24540168192.0,
-            "20": 24540168192.0,
-            "21": 24540168192.0,
-            "22": 24540168192.0,
-            "23": 24540168192.0,
-            "24": 24540168192.0,
-            "25": 24540168192.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 25,
-        "step_interval": 1,
-        "values": {
-            "1": 52730810368.0,
-            "2": 60518309888.0,
-            "3": 60518309888.0,
-            "4": 60518309888.0,
-            "5": 60518309888.0,
-            "6": 60518309888.0,
-            "7": 60518309888.0,
-            "8": 60518309888.0,
-            "9": 60518309888.0,
-            "10": 60518309888.0,
-            "11": 60518309888.0,
-            "12": 60518309888.0,
-            "13": 60518309888.0,
-            "14": 60518309888.0,
-            "15": 60518309888.0,
-            "16": 60518309888.0,
-            "17": 60518309888.0,
-            "18": 60518309888.0,
-            "19": 60518309888.0,
-            "20": 60518309888.0,
-            "21": 60518309888.0,
-            "22": 60518309888.0,
-            "23": 60518309888.0,
-            "24": 60518309888.0,
-            "25": 60518309888.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 25,
-        "step_interval": 1,
-        "values": {
-            "1": "nan",
-            "2": 9.35391,
-            "3": "nan",
-            "4": 1.17482,
-            "5": "nan",
-            "6": 1.17131,
-            "7": "nan",
-            "8": 1.17328,
-            "9": "nan",
-            "10": 1.17214,
-            "11": "nan",
-            "12": 1.17467,
-            "13": "nan",
-            "14": 1.17439,
-            "15": "nan",
-            "16": 1.17582,
-            "17": "nan",
-            "18": 1.1764,
-            "19": "nan",
-            "20": 1.17744,
-            "21": "nan",
-            "22": 1.17439,
-            "23": "nan",
-            "24": 1.17461,
-            "25": "nan"
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_7b_tp1_pp4_memory_speed/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_7b_tp1_pp4_memory_speed/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index 478f889b21c..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_7b_tp1_pp4_memory_speed/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,162 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 25,
-        "step_interval": 1,
-        "values": {
-            "1": 12.58569,
-            "2": 12.58406,
-            "3": 12.58486,
-            "4": 12.58642,
-            "5": 12.58279,
-            "6": 12.57912,
-            "7": 12.56177,
-            "8": 12.52304,
-            "9": 12.4966,
-            "10": 12.4826,
-            "11": 12.31462,
-            "12": 12.272,
-            "13": 12.20924,
-            "14": 12.20094,
-            "15": 11.79651,
-            "16": 11.78035,
-            "17": 11.74188,
-            "18": 11.71656,
-            "19": 11.59074,
-            "20": 11.47672,
-            "21": 11.23784,
-            "22": 11.3586,
-            "23": 11.25768,
-            "24": 11.14081,
-            "25": 10.97989
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 25,
-        "step_interval": 1,
-        "values": {
-            "1": 521035392.0,
-            "2": 521662624.0,
-            "3": 520932992.0,
-            "4": 521225120.0,
-            "5": 520993600.0,
-            "6": 521369824.0,
-            "7": 521417344.0,
-            "8": 521054784.0,
-            "9": 521458592.0,
-            "10": 521175520.0,
-            "11": 522277376.0,
-            "12": 521435904.0,
-            "13": 521472640.0,
-            "14": 522442496.0,
-            "15": 521589568.0,
-            "16": 521414080.0,
-            "17": 521025696.0,
-            "18": 521279168.0,
-            "19": 521154400.0,
-            "20": 521132352.0,
-            "21": 522909696.0,
-            "22": 521591904.0,
-            "23": 521353504.0,
-            "24": 521426496.0,
-            "25": 523547008.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 25,
-        "step_interval": 1,
-        "values": {
-            "1": 24540168192.0,
-            "2": 24540168192.0,
-            "3": 24540168192.0,
-            "4": 24540168192.0,
-            "5": 24540168192.0,
-            "6": 24540168192.0,
-            "7": 24540168192.0,
-            "8": 24540168192.0,
-            "9": 24540168192.0,
-            "10": 24540168192.0,
-            "11": 24540168192.0,
-            "12": 24540168192.0,
-            "13": 24540168192.0,
-            "14": 24540168192.0,
-            "15": 24540168192.0,
-            "16": 24540168192.0,
-            "17": 24540168192.0,
-            "18": 24540168192.0,
-            "19": 24540168192.0,
-            "20": 24540168192.0,
-            "21": 24540389376.0,
-            "22": 24540168192.0,
-            "23": 24540168192.0,
-            "24": 24540168192.0,
-            "25": 24540168192.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 25,
-        "step_interval": 1,
-        "values": {
-            "1": 52730810368.0,
-            "2": 60518424576.0,
-            "3": 60518424576.0,
-            "4": 60518424576.0,
-            "5": 60518424576.0,
-            "6": 60518424576.0,
-            "7": 60518424576.0,
-            "8": 60518424576.0,
-            "9": 60518424576.0,
-            "10": 60518424576.0,
-            "11": 60518424576.0,
-            "12": 60518424576.0,
-            "13": 60518424576.0,
-            "14": 60518424576.0,
-            "15": 60518424576.0,
-            "16": 60518424576.0,
-            "17": 60518424576.0,
-            "18": 60518424576.0,
-            "19": 60518424576.0,
-            "20": 60518424576.0,
-            "21": 60518424576.0,
-            "22": 60518424576.0,
-            "23": 60518424576.0,
-            "24": 60518424576.0,
-            "25": 60518424576.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 25,
-        "step_interval": 1,
-        "values": {
-            "1": "nan",
-            "2": 10.03336,
-            "3": "nan",
-            "4": 1.18525,
-            "5": "nan",
-            "6": 1.18158,
-            "7": "nan",
-            "8": 1.18536,
-            "9": "nan",
-            "10": 1.18428,
-            "11": "nan",
-            "12": 1.18625,
-            "13": "nan",
-            "14": 1.18256,
-            "15": "nan",
-            "16": 1.18023,
-            "17": "nan",
-            "18": 1.18227,
-            "19": "nan",
-            "20": 1.18284,
-            "21": "nan",
-            "22": 1.18238,
-            "23": "nan",
-            "24": 1.18151,
-            "25": "nan"
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_7b_tp4_pp1_memory_speed/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_7b_tp4_pp1_memory_speed/golden_values_dev_dgx_h100.json
index cec72020182..0847af86737 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_7b_tp4_pp1_memory_speed/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_7b_tp4_pp1_memory_speed/golden_values_dev_dgx_h100.json
@@ -1 +1,162 @@
-{"lm loss": {"start_step": 1, "end_step": 25, "step_interval": 5, "values": {"1": 12.59715, "5": 12.59013, "10": 12.49091, "15": 11.82108, "20": 11.51704, "25": 11.00354}}, "num-zeros": {"start_step": 1, "end_step": 25, "step_interval": 5, "values": {"1": 523037536.0, "5": 523009792.0, "10": 523184992.0, "15": 523630496.0, "20": 523230848.0, "25": 525638016.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 25, "step_interval": 5, "values": {"1": 20663519232.0, "5": 20663519232.0, "10": 20663519232.0, "15": 20663519232.0, "20": 20663519232.0, "25": 20663519232.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 25, "step_interval": 5, "values": {"1": 50289545216.0, "5": 57143791616.0, "10": 57143791616.0, "15": 57143791616.0, "20": 57143791616.0, "25": 57143791616.0}}, "iteration-time": {"start_step": 1, "end_step": 25, "step_interval": 5, "values": {"1": "nan", "5": "nan", "10": 1.12041, "15": "nan", "20": 1.1186, "25": "nan"}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 25,
+        "step_interval": 1,
+        "values": {
+            "1": 12.59715,
+            "2": 12.60067,
+            "3": 12.59727,
+            "4": 12.60021,
+            "5": 12.59013,
+            "6": 12.58834,
+            "7": 12.57605,
+            "8": 12.5362,
+            "9": 12.50745,
+            "10": 12.49091,
+            "11": 12.32614,
+            "12": 12.29366,
+            "13": 12.22589,
+            "14": 12.23023,
+            "15": 11.82108,
+            "16": 11.80586,
+            "17": 11.77001,
+            "18": 11.74946,
+            "19": 11.62189,
+            "20": 11.51704,
+            "21": 11.27121,
+            "22": 11.38966,
+            "23": 11.29559,
+            "24": 11.16591,
+            "25": 11.00354
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 25,
+        "step_interval": 1,
+        "values": {
+            "1": 523037536.0,
+            "2": 523668064.0,
+            "3": 522933056.0,
+            "4": 523228480.0,
+            "5": 523009792.0,
+            "6": 523364320.0,
+            "7": 523427840.0,
+            "8": 523074688.0,
+            "9": 523459232.0,
+            "10": 523184992.0,
+            "11": 524288736.0,
+            "12": 523447712.0,
+            "13": 523490112.0,
+            "14": 524476096.0,
+            "15": 523630496.0,
+            "16": 523459232.0,
+            "17": 523075936.0,
+            "18": 523360192.0,
+            "19": 523206816.0,
+            "20": 523230848.0,
+            "21": 524941248.0,
+            "22": 523654464.0,
+            "23": 523420576.0,
+            "24": 523494720.0,
+            "25": 525638016.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 25,
+        "step_interval": 1,
+        "values": {
+            "1": 20663519232.0,
+            "2": 20663519232.0,
+            "3": 20663519232.0,
+            "4": 20663519232.0,
+            "5": 20663519232.0,
+            "6": 20663519232.0,
+            "7": 20663519232.0,
+            "8": 20663519232.0,
+            "9": 20663519232.0,
+            "10": 20663519232.0,
+            "11": 20663519232.0,
+            "12": 20663519232.0,
+            "13": 20663519232.0,
+            "14": 20663519232.0,
+            "15": 20663519232.0,
+            "16": 20663519232.0,
+            "17": 20663519232.0,
+            "18": 20663519232.0,
+            "19": 20663519232.0,
+            "20": 20663519232.0,
+            "21": 20663519232.0,
+            "22": 20663519232.0,
+            "23": 20663519232.0,
+            "24": 20663519232.0,
+            "25": 20663519232.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 25,
+        "step_interval": 1,
+        "values": {
+            "1": 50289545216.0,
+            "2": 57143791616.0,
+            "3": 57143791616.0,
+            "4": 57143791616.0,
+            "5": 57143791616.0,
+            "6": 57143791616.0,
+            "7": 57143791616.0,
+            "8": 57143791616.0,
+            "9": 57143791616.0,
+            "10": 57143791616.0,
+            "11": 57143791616.0,
+            "12": 57143791616.0,
+            "13": 57143791616.0,
+            "14": 57143791616.0,
+            "15": 57143791616.0,
+            "16": 57143791616.0,
+            "17": 57143791616.0,
+            "18": 57143791616.0,
+            "19": 57143791616.0,
+            "20": 57143791616.0,
+            "21": 57143791616.0,
+            "22": 57143791616.0,
+            "23": 57143791616.0,
+            "24": 57143791616.0,
+            "25": 57143791616.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 25,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": 6.11084,
+            "3": "nan",
+            "4": 1.11678,
+            "5": "nan",
+            "6": 1.11532,
+            "7": "nan",
+            "8": 1.11539,
+            "9": "nan",
+            "10": 1.1161,
+            "11": "nan",
+            "12": 1.11723,
+            "13": "nan",
+            "14": 1.11756,
+            "15": "nan",
+            "16": 1.11596,
+            "17": "nan",
+            "18": 1.11605,
+            "19": "nan",
+            "20": 1.11783,
+            "21": "nan",
+            "22": 1.11636,
+            "23": "nan",
+            "24": 1.11585,
+            "25": "nan"
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_7b_tp4_pp1_memory_speed/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_7b_tp4_pp1_memory_speed/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index a059e81b488..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_7b_tp4_pp1_memory_speed/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,162 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 25,
-        "step_interval": 1,
-        "values": {
-            "1": 12.59715,
-            "2": 12.60067,
-            "3": 12.59727,
-            "4": 12.60021,
-            "5": 12.59013,
-            "6": 12.58834,
-            "7": 12.57605,
-            "8": 12.5362,
-            "9": 12.50745,
-            "10": 12.49091,
-            "11": 12.32614,
-            "12": 12.29366,
-            "13": 12.22589,
-            "14": 12.23023,
-            "15": 11.82108,
-            "16": 11.80586,
-            "17": 11.77001,
-            "18": 11.74946,
-            "19": 11.62189,
-            "20": 11.51704,
-            "21": 11.27121,
-            "22": 11.38966,
-            "23": 11.29559,
-            "24": 11.16591,
-            "25": 11.00354
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 25,
-        "step_interval": 1,
-        "values": {
-            "1": 523037536.0,
-            "2": 523668064.0,
-            "3": 522933056.0,
-            "4": 523228480.0,
-            "5": 523009792.0,
-            "6": 523364320.0,
-            "7": 523427840.0,
-            "8": 523074688.0,
-            "9": 523459232.0,
-            "10": 523184992.0,
-            "11": 524288736.0,
-            "12": 523447712.0,
-            "13": 523490112.0,
-            "14": 524476096.0,
-            "15": 523630496.0,
-            "16": 523459232.0,
-            "17": 523075936.0,
-            "18": 523360192.0,
-            "19": 523206816.0,
-            "20": 523230848.0,
-            "21": 524941248.0,
-            "22": 523654464.0,
-            "23": 523420576.0,
-            "24": 523494720.0,
-            "25": 525638016.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 25,
-        "step_interval": 1,
-        "values": {
-            "1": 20663519232.0,
-            "2": 20663519232.0,
-            "3": 20663519232.0,
-            "4": 20663519232.0,
-            "5": 20663519232.0,
-            "6": 20663519232.0,
-            "7": 20663519232.0,
-            "8": 20663519232.0,
-            "9": 20663519232.0,
-            "10": 20663519232.0,
-            "11": 20663519232.0,
-            "12": 20663519232.0,
-            "13": 20663519232.0,
-            "14": 20663519232.0,
-            "15": 20663519232.0,
-            "16": 20663519232.0,
-            "17": 20663519232.0,
-            "18": 20663519232.0,
-            "19": 20663519232.0,
-            "20": 20663519232.0,
-            "21": 20663519232.0,
-            "22": 20663519232.0,
-            "23": 20663519232.0,
-            "24": 20663519232.0,
-            "25": 20663519232.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 25,
-        "step_interval": 1,
-        "values": {
-            "1": 50289545216.0,
-            "2": 57143791616.0,
-            "3": 57143791616.0,
-            "4": 57143791616.0,
-            "5": 57143791616.0,
-            "6": 57143791616.0,
-            "7": 57143791616.0,
-            "8": 57143791616.0,
-            "9": 57143791616.0,
-            "10": 57143791616.0,
-            "11": 57143791616.0,
-            "12": 57143791616.0,
-            "13": 57143791616.0,
-            "14": 57143791616.0,
-            "15": 57143791616.0,
-            "16": 57143791616.0,
-            "17": 57143791616.0,
-            "18": 57143791616.0,
-            "19": 57143791616.0,
-            "20": 57143791616.0,
-            "21": 57143791616.0,
-            "22": 57143791616.0,
-            "23": 57143791616.0,
-            "24": 57143791616.0,
-            "25": 57143791616.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 25,
-        "step_interval": 1,
-        "values": {
-            "1": "nan",
-            "2": 6.55725,
-            "3": "nan",
-            "4": 1.12211,
-            "5": "nan",
-            "6": 1.11783,
-            "7": "nan",
-            "8": 1.11727,
-            "9": "nan",
-            "10": 1.1176,
-            "11": "nan",
-            "12": 1.11841,
-            "13": "nan",
-            "14": 1.11918,
-            "15": "nan",
-            "16": 1.12025,
-            "17": "nan",
-            "18": 1.11888,
-            "19": "nan",
-            "20": 1.12,
-            "21": "nan",
-            "22": 1.11939,
-            "23": "nan",
-            "24": 1.11949,
-            "25": "nan"
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_7b_tp4_pp1_memory_speed/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_7b_tp4_pp1_memory_speed/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index 0847af86737..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_7b_tp4_pp1_memory_speed/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,162 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 25,
-        "step_interval": 1,
-        "values": {
-            "1": 12.59715,
-            "2": 12.60067,
-            "3": 12.59727,
-            "4": 12.60021,
-            "5": 12.59013,
-            "6": 12.58834,
-            "7": 12.57605,
-            "8": 12.5362,
-            "9": 12.50745,
-            "10": 12.49091,
-            "11": 12.32614,
-            "12": 12.29366,
-            "13": 12.22589,
-            "14": 12.23023,
-            "15": 11.82108,
-            "16": 11.80586,
-            "17": 11.77001,
-            "18": 11.74946,
-            "19": 11.62189,
-            "20": 11.51704,
-            "21": 11.27121,
-            "22": 11.38966,
-            "23": 11.29559,
-            "24": 11.16591,
-            "25": 11.00354
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 25,
-        "step_interval": 1,
-        "values": {
-            "1": 523037536.0,
-            "2": 523668064.0,
-            "3": 522933056.0,
-            "4": 523228480.0,
-            "5": 523009792.0,
-            "6": 523364320.0,
-            "7": 523427840.0,
-            "8": 523074688.0,
-            "9": 523459232.0,
-            "10": 523184992.0,
-            "11": 524288736.0,
-            "12": 523447712.0,
-            "13": 523490112.0,
-            "14": 524476096.0,
-            "15": 523630496.0,
-            "16": 523459232.0,
-            "17": 523075936.0,
-            "18": 523360192.0,
-            "19": 523206816.0,
-            "20": 523230848.0,
-            "21": 524941248.0,
-            "22": 523654464.0,
-            "23": 523420576.0,
-            "24": 523494720.0,
-            "25": 525638016.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 25,
-        "step_interval": 1,
-        "values": {
-            "1": 20663519232.0,
-            "2": 20663519232.0,
-            "3": 20663519232.0,
-            "4": 20663519232.0,
-            "5": 20663519232.0,
-            "6": 20663519232.0,
-            "7": 20663519232.0,
-            "8": 20663519232.0,
-            "9": 20663519232.0,
-            "10": 20663519232.0,
-            "11": 20663519232.0,
-            "12": 20663519232.0,
-            "13": 20663519232.0,
-            "14": 20663519232.0,
-            "15": 20663519232.0,
-            "16": 20663519232.0,
-            "17": 20663519232.0,
-            "18": 20663519232.0,
-            "19": 20663519232.0,
-            "20": 20663519232.0,
-            "21": 20663519232.0,
-            "22": 20663519232.0,
-            "23": 20663519232.0,
-            "24": 20663519232.0,
-            "25": 20663519232.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 25,
-        "step_interval": 1,
-        "values": {
-            "1": 50289545216.0,
-            "2": 57143791616.0,
-            "3": 57143791616.0,
-            "4": 57143791616.0,
-            "5": 57143791616.0,
-            "6": 57143791616.0,
-            "7": 57143791616.0,
-            "8": 57143791616.0,
-            "9": 57143791616.0,
-            "10": 57143791616.0,
-            "11": 57143791616.0,
-            "12": 57143791616.0,
-            "13": 57143791616.0,
-            "14": 57143791616.0,
-            "15": 57143791616.0,
-            "16": 57143791616.0,
-            "17": 57143791616.0,
-            "18": 57143791616.0,
-            "19": 57143791616.0,
-            "20": 57143791616.0,
-            "21": 57143791616.0,
-            "22": 57143791616.0,
-            "23": 57143791616.0,
-            "24": 57143791616.0,
-            "25": 57143791616.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 25,
-        "step_interval": 1,
-        "values": {
-            "1": "nan",
-            "2": 6.11084,
-            "3": "nan",
-            "4": 1.11678,
-            "5": "nan",
-            "6": 1.11532,
-            "7": "nan",
-            "8": 1.11539,
-            "9": "nan",
-            "10": 1.1161,
-            "11": "nan",
-            "12": 1.11723,
-            "13": "nan",
-            "14": 1.11756,
-            "15": "nan",
-            "16": 1.11596,
-            "17": "nan",
-            "18": 1.11605,
-            "19": "nan",
-            "20": 1.11783,
-            "21": "nan",
-            "22": 1.11636,
-            "23": "nan",
-            "24": 1.11585,
-            "25": "nan"
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_dev_dgx_h100.json
index 22254614510..db410897813 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_dev_dgx_h100.json
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 6.5684,
-            "2": 0.10503,
-            "3": 0.08759,
-            "4": 0.08854,
-            "5": 0.08902,
-            "6": 0.08493,
-            "7": 0.07755,
-            "8": 0.0738,
-            "9": 0.07491,
-            "10": 0.07437,
-            "11": 0.07546,
-            "12": 0.07621,
-            "13": 0.08298,
-            "14": 0.07518,
-            "15": 0.07632,
-            "16": 0.07439,
-            "17": 0.07556,
-            "18": 0.07572,
-            "19": 0.0773,
-            "20": 0.07632,
-            "21": 0.07507,
-            "22": 0.07379,
-            "23": 0.07514,
-            "24": 0.07634,
-            "25": 0.07537,
-            "26": 0.07376,
-            "27": 0.07568,
-            "28": 0.07436,
-            "29": 0.07588,
-            "30": 0.07446,
-            "31": 0.0821,
-            "32": 0.08812,
-            "33": 0.0891,
-            "34": 0.08234,
-            "35": 0.07539,
-            "36": 0.07468,
-            "37": 0.07649,
-            "38": 0.07542,
-            "39": 0.07476,
-            "40": 0.07444,
-            "41": 0.07481,
-            "42": 0.07343,
-            "43": 0.07666,
-            "44": 0.08426,
-            "45": 0.07584,
-            "46": 0.07674,
-            "47": 0.07463,
-            "48": 0.07387,
-            "49": 0.07347,
-            "50": 0.07545
+            "1": 6.7331,
+            "2": 0.09599,
+            "3": 0.08799,
+            "4": 0.08582,
+            "5": 0.08478,
+            "6": 0.08513,
+            "7": 0.07688,
+            "8": 0.07429,
+            "9": 0.07778,
+            "10": 0.07515,
+            "11": 0.07987,
+            "12": 0.07525,
+            "13": 0.07727,
+            "14": 0.07535,
+            "15": 0.07896,
+            "16": 0.07509,
+            "17": 0.07751,
+            "18": 0.076,
+            "19": 0.07647,
+            "20": 0.07502,
+            "21": 0.07467,
+            "22": 0.07544,
+            "23": 0.0742,
+            "24": 0.07536,
+            "25": 0.07588,
+            "26": 0.07381,
+            "27": 0.07407,
+            "28": 0.075,
+            "29": 0.07424,
+            "30": 0.07454,
+            "31": 0.07482,
+            "32": 0.07526,
+            "33": 0.07493,
+            "34": 0.07437,
+            "35": 0.07447,
+            "36": 0.07482,
+            "37": 0.07454,
+            "38": 0.07501,
+            "39": 0.07495,
+            "40": 0.07481,
+            "41": 0.07433,
+            "42": 0.07467,
+            "43": 0.0754,
+            "44": 0.07543,
+            "45": 0.07498,
+            "46": 0.07457,
+            "47": 0.07378,
+            "48": 0.07477,
+            "49": 0.07465,
+            "50": 0.07444
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 8e0ed5db84f..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.89592,
-            "2": 10.89514,
-            "3": 10.88761,
-            "4": 10.88903,
-            "5": 10.89131,
-            "6": 10.90004,
-            "7": 10.89143,
-            "8": 10.89938,
-            "9": 10.90231,
-            "10": 10.88299,
-            "11": 10.87827,
-            "12": 10.89318,
-            "13": 10.89818,
-            "14": 10.89188,
-            "15": 10.84786,
-            "16": 10.85369,
-            "17": 10.831,
-            "18": 10.83994,
-            "19": 10.82779,
-            "20": 10.74925,
-            "21": 10.73558,
-            "22": 10.61567,
-            "23": 10.72599,
-            "24": 10.63027,
-            "25": 10.59226,
-            "26": 10.63312,
-            "27": 10.63277,
-            "28": 10.58231,
-            "29": 10.58547,
-            "30": 10.41136,
-            "31": 10.15833,
-            "32": 10.48326,
-            "33": 10.46651,
-            "34": 10.23801,
-            "35": 10.28136,
-            "36": 10.24029,
-            "37": 10.3617,
-            "38": 10.20342,
-            "39": 10.404,
-            "40": 10.09306,
-            "41": 10.15805,
-            "42": 10.21903,
-            "43": 9.84274,
-            "44": 9.97219,
-            "45": 9.84149,
-            "46": 9.82007,
-            "47": 10.14934,
-            "48": 9.85997,
-            "49": 9.54155,
-            "50": 9.91285
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1581.0,
-            "2": 1674.0,
-            "3": 1724.0,
-            "4": 1803.0,
-            "5": 1962.0,
-            "6": 1846.0,
-            "7": 1864.0,
-            "8": 1792.0,
-            "9": 1848.0,
-            "10": 1435.0,
-            "11": 1868.0,
-            "12": 1782.0,
-            "13": 1874.0,
-            "14": 1783.0,
-            "15": 1944.0,
-            "16": 1933.0,
-            "17": 1807.0,
-            "18": 1737.0,
-            "19": 1822.0,
-            "20": 1679.0,
-            "21": 1808.0,
-            "22": 1806.0,
-            "23": 2077.0,
-            "24": 1663.0,
-            "25": 1645.0,
-            "26": 1719.0,
-            "27": 1925.0,
-            "28": 2030.0,
-            "29": 2042.0,
-            "30": 1912.0,
-            "31": 1603.0,
-            "32": 1938.0,
-            "33": 2158.0,
-            "34": 1896.0,
-            "35": 2023.0,
-            "36": 1910.0,
-            "37": 2330.0,
-            "38": 2298.0,
-            "39": 2498.0,
-            "40": 2270.0,
-            "41": 2464.0,
-            "42": 2296.0,
-            "43": 2042.0,
-            "44": 2138.0,
-            "45": 2152.0,
-            "46": 2282.0,
-            "47": 2529.0,
-            "48": 2454.0,
-            "49": 2358.0,
-            "50": 2580.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 581488640.0,
-            "2": 581488640.0,
-            "3": 581488640.0,
-            "4": 581488640.0,
-            "5": 581488640.0,
-            "6": 581488640.0,
-            "7": 581488640.0,
-            "8": 581488640.0,
-            "9": 581488640.0,
-            "10": 581488640.0,
-            "11": 581488640.0,
-            "12": 581488640.0,
-            "13": 581488640.0,
-            "14": 581488640.0,
-            "15": 581488640.0,
-            "16": 581488640.0,
-            "17": 581488640.0,
-            "18": 581488640.0,
-            "19": 581488640.0,
-            "20": 581488640.0,
-            "21": 581488640.0,
-            "22": 581488640.0,
-            "23": 581488640.0,
-            "24": 581488640.0,
-            "25": 581488640.0,
-            "26": 581488640.0,
-            "27": 581488640.0,
-            "28": 581488640.0,
-            "29": 581488640.0,
-            "30": 581488640.0,
-            "31": 581488640.0,
-            "32": 581488640.0,
-            "33": 581488640.0,
-            "34": 581488640.0,
-            "35": 581488640.0,
-            "36": 581488640.0,
-            "37": 581488640.0,
-            "38": 581488640.0,
-            "39": 581488640.0,
-            "40": 581488640.0,
-            "41": 581488640.0,
-            "42": 581488640.0,
-            "43": 581488640.0,
-            "44": 581488640.0,
-            "45": 581488640.0,
-            "46": 581488640.0,
-            "47": 581488640.0,
-            "48": 581488640.0,
-            "49": 581488640.0,
-            "50": 581488640.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 4605813248.0,
-            "2": 4702429696.0,
-            "3": 4702429696.0,
-            "4": 4702429696.0,
-            "5": 4702429696.0,
-            "6": 4702429696.0,
-            "7": 4702429696.0,
-            "8": 4702429696.0,
-            "9": 4702429696.0,
-            "10": 4702429696.0,
-            "11": 4702429696.0,
-            "12": 4702429696.0,
-            "13": 4702429696.0,
-            "14": 4702429696.0,
-            "15": 4702429696.0,
-            "16": 4702429696.0,
-            "17": 4702429696.0,
-            "18": 4702429696.0,
-            "19": 4702429696.0,
-            "20": 4702429696.0,
-            "21": 4702429696.0,
-            "22": 4702429696.0,
-            "23": 4702429696.0,
-            "24": 4702429696.0,
-            "25": 4702429696.0,
-            "26": 4702429696.0,
-            "27": 4702429696.0,
-            "28": 4702429696.0,
-            "29": 4702429696.0,
-            "30": 4702429696.0,
-            "31": 4702429696.0,
-            "32": 4702429696.0,
-            "33": 4702429696.0,
-            "34": 4702429696.0,
-            "35": 4702429696.0,
-            "36": 4702429696.0,
-            "37": 4702429696.0,
-            "38": 4702429696.0,
-            "39": 4702429696.0,
-            "40": 4702429696.0,
-            "41": 4702429696.0,
-            "42": 4702429696.0,
-            "43": 4702429696.0,
-            "44": 4702429696.0,
-            "45": 4702429696.0,
-            "46": 4702429696.0,
-            "47": 4702429696.0,
-            "48": 4702429696.0,
-            "49": 4702429696.0,
-            "50": 4702429696.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 6.83679,
-            "2": 0.10466,
-            "3": 0.07514,
-            "4": 0.07264,
-            "5": 0.06334,
-            "6": 0.06416,
-            "7": 0.06155,
-            "8": 0.06516,
-            "9": 0.06439,
-            "10": 0.06295,
-            "11": 0.06245,
-            "12": 0.06307,
-            "13": 0.06464,
-            "14": 0.06342,
-            "15": 0.06273,
-            "16": 0.0658,
-            "17": 0.06138,
-            "18": 0.06379,
-            "19": 0.06329,
-            "20": 0.06616,
-            "21": 0.06117,
-            "22": 0.06327,
-            "23": 0.06081,
-            "24": 0.06339,
-            "25": 0.06116,
-            "26": 0.06459,
-            "27": 0.06165,
-            "28": 0.06346,
-            "29": 0.06054,
-            "30": 0.06342,
-            "31": 0.06119,
-            "32": 0.06267,
-            "33": 0.06074,
-            "34": 0.0635,
-            "35": 0.06057,
-            "36": 0.06382,
-            "37": 0.06202,
-            "38": 0.06345,
-            "39": 0.06229,
-            "40": 0.06422,
-            "41": 0.06182,
-            "42": 0.06246,
-            "43": 0.06164,
-            "44": 0.06299,
-            "45": 0.06869,
-            "46": 0.06388,
-            "47": 0.06106,
-            "48": 0.06243,
-            "49": 0.06122,
-            "50": 0.06339
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index db410897813..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.89592,
-            "2": 10.89514,
-            "3": 10.88761,
-            "4": 10.88903,
-            "5": 10.89131,
-            "6": 10.90004,
-            "7": 10.89143,
-            "8": 10.89938,
-            "9": 10.90231,
-            "10": 10.88299,
-            "11": 10.87827,
-            "12": 10.89318,
-            "13": 10.89818,
-            "14": 10.89188,
-            "15": 10.84786,
-            "16": 10.85369,
-            "17": 10.831,
-            "18": 10.83994,
-            "19": 10.82779,
-            "20": 10.74925,
-            "21": 10.73558,
-            "22": 10.61567,
-            "23": 10.72599,
-            "24": 10.63027,
-            "25": 10.59226,
-            "26": 10.63312,
-            "27": 10.63277,
-            "28": 10.58231,
-            "29": 10.58547,
-            "30": 10.41136,
-            "31": 10.15833,
-            "32": 10.48326,
-            "33": 10.46651,
-            "34": 10.23801,
-            "35": 10.28136,
-            "36": 10.24029,
-            "37": 10.3617,
-            "38": 10.20342,
-            "39": 10.404,
-            "40": 10.09306,
-            "41": 10.15805,
-            "42": 10.21903,
-            "43": 9.84274,
-            "44": 9.97219,
-            "45": 9.84149,
-            "46": 9.82007,
-            "47": 10.14934,
-            "48": 9.85997,
-            "49": 9.54155,
-            "50": 9.91285
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1581.0,
-            "2": 1674.0,
-            "3": 1724.0,
-            "4": 1803.0,
-            "5": 1962.0,
-            "6": 1846.0,
-            "7": 1864.0,
-            "8": 1792.0,
-            "9": 1848.0,
-            "10": 1435.0,
-            "11": 1868.0,
-            "12": 1782.0,
-            "13": 1874.0,
-            "14": 1783.0,
-            "15": 1944.0,
-            "16": 1933.0,
-            "17": 1807.0,
-            "18": 1737.0,
-            "19": 1822.0,
-            "20": 1679.0,
-            "21": 1808.0,
-            "22": 1806.0,
-            "23": 2077.0,
-            "24": 1663.0,
-            "25": 1645.0,
-            "26": 1719.0,
-            "27": 1925.0,
-            "28": 2030.0,
-            "29": 2042.0,
-            "30": 1912.0,
-            "31": 1603.0,
-            "32": 1938.0,
-            "33": 2158.0,
-            "34": 1896.0,
-            "35": 2023.0,
-            "36": 1910.0,
-            "37": 2330.0,
-            "38": 2298.0,
-            "39": 2498.0,
-            "40": 2270.0,
-            "41": 2464.0,
-            "42": 2296.0,
-            "43": 2042.0,
-            "44": 2138.0,
-            "45": 2152.0,
-            "46": 2282.0,
-            "47": 2529.0,
-            "48": 2454.0,
-            "49": 2358.0,
-            "50": 2580.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 581488640.0,
-            "2": 581488640.0,
-            "3": 581488640.0,
-            "4": 581488640.0,
-            "5": 581488640.0,
-            "6": 581488640.0,
-            "7": 581488640.0,
-            "8": 581488640.0,
-            "9": 581488640.0,
-            "10": 581488640.0,
-            "11": 581488640.0,
-            "12": 581488640.0,
-            "13": 581488640.0,
-            "14": 581488640.0,
-            "15": 581488640.0,
-            "16": 581488640.0,
-            "17": 581488640.0,
-            "18": 581488640.0,
-            "19": 581488640.0,
-            "20": 581488640.0,
-            "21": 581488640.0,
-            "22": 581488640.0,
-            "23": 581488640.0,
-            "24": 581488640.0,
-            "25": 581488640.0,
-            "26": 581488640.0,
-            "27": 581488640.0,
-            "28": 581488640.0,
-            "29": 581488640.0,
-            "30": 581488640.0,
-            "31": 581488640.0,
-            "32": 581488640.0,
-            "33": 581488640.0,
-            "34": 581488640.0,
-            "35": 581488640.0,
-            "36": 581488640.0,
-            "37": 581488640.0,
-            "38": 581488640.0,
-            "39": 581488640.0,
-            "40": 581488640.0,
-            "41": 581488640.0,
-            "42": 581488640.0,
-            "43": 581488640.0,
-            "44": 581488640.0,
-            "45": 581488640.0,
-            "46": 581488640.0,
-            "47": 581488640.0,
-            "48": 581488640.0,
-            "49": 581488640.0,
-            "50": 581488640.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 4605813248.0,
-            "2": 4702429696.0,
-            "3": 4702429696.0,
-            "4": 4702429696.0,
-            "5": 4702429696.0,
-            "6": 4702429696.0,
-            "7": 4702429696.0,
-            "8": 4702429696.0,
-            "9": 4702429696.0,
-            "10": 4702429696.0,
-            "11": 4702429696.0,
-            "12": 4702429696.0,
-            "13": 4702429696.0,
-            "14": 4702429696.0,
-            "15": 4702429696.0,
-            "16": 4702429696.0,
-            "17": 4702429696.0,
-            "18": 4702429696.0,
-            "19": 4702429696.0,
-            "20": 4702429696.0,
-            "21": 4702429696.0,
-            "22": 4702429696.0,
-            "23": 4702429696.0,
-            "24": 4702429696.0,
-            "25": 4702429696.0,
-            "26": 4702429696.0,
-            "27": 4702429696.0,
-            "28": 4702429696.0,
-            "29": 4702429696.0,
-            "30": 4702429696.0,
-            "31": 4702429696.0,
-            "32": 4702429696.0,
-            "33": 4702429696.0,
-            "34": 4702429696.0,
-            "35": 4702429696.0,
-            "36": 4702429696.0,
-            "37": 4702429696.0,
-            "38": 4702429696.0,
-            "39": 4702429696.0,
-            "40": 4702429696.0,
-            "41": 4702429696.0,
-            "42": 4702429696.0,
-            "43": 4702429696.0,
-            "44": 4702429696.0,
-            "45": 4702429696.0,
-            "46": 4702429696.0,
-            "47": 4702429696.0,
-            "48": 4702429696.0,
-            "49": 4702429696.0,
-            "50": 4702429696.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 6.7331,
-            "2": 0.09599,
-            "3": 0.08799,
-            "4": 0.08582,
-            "5": 0.08478,
-            "6": 0.08513,
-            "7": 0.07688,
-            "8": 0.07429,
-            "9": 0.07778,
-            "10": 0.07515,
-            "11": 0.07987,
-            "12": 0.07525,
-            "13": 0.07727,
-            "14": 0.07535,
-            "15": 0.07896,
-            "16": 0.07509,
-            "17": 0.07751,
-            "18": 0.076,
-            "19": 0.07647,
-            "20": 0.07502,
-            "21": 0.07467,
-            "22": 0.07544,
-            "23": 0.0742,
-            "24": 0.07536,
-            "25": 0.07588,
-            "26": 0.07381,
-            "27": 0.07407,
-            "28": 0.075,
-            "29": 0.07424,
-            "30": 0.07454,
-            "31": 0.07482,
-            "32": 0.07526,
-            "33": 0.07493,
-            "34": 0.07437,
-            "35": 0.07447,
-            "36": 0.07482,
-            "37": 0.07454,
-            "38": 0.07501,
-            "39": 0.07495,
-            "40": 0.07481,
-            "41": 0.07433,
-            "42": 0.07467,
-            "43": 0.0754,
-            "44": 0.07543,
-            "45": 0.07498,
-            "46": 0.07457,
-            "47": 0.07378,
-            "48": 0.07477,
-            "49": 0.07465,
-            "50": 0.07444
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json
index ca95ad65b3d..686e980d509 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 7.50382,
-            "2": 0.09494,
-            "3": 0.08499,
-            "4": 0.08516,
-            "5": 0.08574,
-            "6": 0.07205,
-            "7": 0.0678,
-            "8": 0.06716,
-            "9": 0.06722,
-            "10": 0.06806,
-            "11": 0.06825,
-            "12": 0.06735,
-            "13": 0.06795,
-            "14": 0.06749,
-            "15": 0.06675,
-            "16": 0.06707,
-            "17": 0.06697,
-            "18": 0.06753,
-            "19": 0.06817,
-            "20": 0.06848,
-            "21": 0.06619,
-            "22": 0.06841,
-            "23": 0.06785,
-            "24": 0.06849,
-            "25": 0.06774,
-            "26": 0.06776,
-            "27": 0.06722,
-            "28": 0.06759,
-            "29": 0.06651,
-            "30": 0.06707,
-            "31": 0.06654,
-            "32": 0.06698,
-            "33": 0.06699,
-            "34": 0.06679,
-            "35": 0.06871,
-            "36": 0.06753,
-            "37": 0.06724,
-            "38": 0.06699,
-            "39": 0.06694,
-            "40": 0.06736,
-            "41": 0.06719,
-            "42": 0.06704,
-            "43": 0.06772,
-            "44": 0.06769,
-            "45": 0.06718,
-            "46": 0.06687,
-            "47": 0.0666,
-            "48": 0.06791,
-            "49": 0.06768,
-            "50": 0.06799,
-            "51": 0.08137,
-            "52": 0.07388,
-            "53": 0.07162,
-            "54": 0.06825,
-            "55": 0.09073,
-            "56": 0.06514,
-            "57": 0.06572,
-            "58": 0.066,
-            "59": 0.06584,
-            "60": 0.06564,
-            "61": 0.06432,
-            "62": 0.06646,
-            "63": 0.06643,
-            "64": 0.06637,
-            "65": 0.06605,
-            "66": 0.06606,
-            "67": 0.06661,
-            "68": 0.06602,
-            "69": 0.06559,
-            "70": 0.06607,
-            "71": 0.06417,
-            "72": 0.06658,
-            "73": 0.06562,
-            "74": 0.06641,
-            "75": 0.0655,
-            "76": 0.06656,
-            "77": 0.065,
-            "78": 0.06615,
-            "79": 0.06666,
-            "80": 0.06535,
-            "81": 0.06679,
-            "82": 0.06885,
-            "83": 0.06577,
-            "84": 0.06461,
-            "85": 0.06689,
-            "86": 0.06445,
-            "87": 0.06546,
-            "88": 0.06624,
-            "89": 0.06635,
-            "90": 0.0643,
-            "91": 0.06631,
-            "92": 0.0655,
-            "93": 0.06522,
-            "94": 0.06652,
-            "95": 0.06592,
-            "96": 0.0658,
-            "97": 0.06642,
-            "98": 0.06519,
-            "99": 0.06466,
-            "100": 0.06561
+            "1": 7.46673,
+            "2": 0.07879,
+            "3": 0.06822,
+            "4": 0.06744,
+            "5": 0.06664,
+            "6": 0.06786,
+            "7": 0.06766,
+            "8": 0.06659,
+            "9": 0.06797,
+            "10": 0.07184,
+            "11": 0.07288,
+            "12": 0.07188,
+            "13": 0.07026,
+            "14": 0.06821,
+            "15": 0.06667,
+            "16": 0.06656,
+            "17": 0.06764,
+            "18": 0.06816,
+            "19": 0.06695,
+            "20": 0.06832,
+            "21": 0.06808,
+            "22": 0.06822,
+            "23": 0.06838,
+            "24": 0.06731,
+            "25": 0.06857,
+            "26": 0.06706,
+            "27": 0.06819,
+            "28": 0.06784,
+            "29": 0.06785,
+            "30": 0.06735,
+            "31": 0.0685,
+            "32": 0.07005,
+            "33": 0.07122,
+            "34": 0.07241,
+            "35": 0.07067,
+            "36": 0.06981,
+            "37": 0.06934,
+            "38": 0.06771,
+            "39": 0.06805,
+            "40": 0.06824,
+            "41": 0.06831,
+            "42": 0.06733,
+            "43": 0.06819,
+            "44": 0.06816,
+            "45": 0.06847,
+            "46": 0.0674,
+            "47": 0.06856,
+            "48": 0.07158,
+            "49": 0.07079,
+            "50": 0.0717,
+            "51": 0.08179,
+            "52": 0.07272,
+            "53": 0.06939,
+            "54": 0.06631,
+            "55": 0.07046,
+            "56": 0.09852,
+            "57": 0.06464,
+            "58": 0.06466,
+            "59": 0.06537,
+            "60": 0.06301,
+            "61": 0.06361,
+            "62": 0.06551,
+            "63": 0.06563,
+            "64": 0.0749,
+            "65": 0.0748,
+            "66": 0.07507,
+            "67": 0.07552,
+            "68": 0.07573,
+            "69": 0.07066,
+            "70": 0.0658,
+            "71": 0.0647,
+            "72": 0.06444,
+            "73": 0.06462,
+            "74": 0.06543,
+            "75": 0.06609,
+            "76": 0.06503,
+            "77": 0.06499,
+            "78": 0.0644,
+            "79": 0.06439,
+            "80": 0.06417,
+            "81": 0.06401,
+            "82": 0.06575,
+            "83": 0.06494,
+            "84": 0.06442,
+            "85": 0.06396,
+            "86": 0.06422,
+            "87": 0.06484,
+            "88": 0.06512,
+            "89": 0.06426,
+            "90": 0.06481,
+            "91": 0.06476,
+            "92": 0.06383,
+            "93": 0.06456,
+            "94": 0.06292,
+            "95": 0.0638,
+            "96": 0.06392,
+            "97": 0.06356,
+            "98": 0.06355,
+            "99": 0.06439,
+            "100": 0.06428
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 0f5131905ca..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.89631,
-            "2": 10.89416,
-            "3": 10.88786,
-            "4": 10.8914,
-            "5": 10.89154,
-            "6": 10.90001,
-            "7": 10.89184,
-            "8": 10.89886,
-            "9": 10.90208,
-            "10": 10.88361,
-            "11": 10.87816,
-            "12": 10.89332,
-            "13": 10.89816,
-            "14": 10.89241,
-            "15": 10.84798,
-            "16": 10.854,
-            "17": 10.83093,
-            "18": 10.83991,
-            "19": 10.82802,
-            "20": 10.74822,
-            "21": 10.73494,
-            "22": 10.61719,
-            "23": 10.72621,
-            "24": 10.63177,
-            "25": 10.5931,
-            "26": 10.63365,
-            "27": 10.63304,
-            "28": 10.58259,
-            "29": 10.58595,
-            "30": 10.41201,
-            "31": 10.15907,
-            "32": 10.48362,
-            "33": 10.46704,
-            "34": 10.23815,
-            "35": 10.28193,
-            "36": 10.24052,
-            "37": 10.36227,
-            "38": 10.20306,
-            "39": 10.40456,
-            "40": 10.09271,
-            "41": 10.15831,
-            "42": 10.21934,
-            "43": 9.8436,
-            "44": 9.97299,
-            "45": 9.84189,
-            "46": 9.82017,
-            "47": 10.14968,
-            "48": 9.86021,
-            "49": 9.54238,
-            "50": 9.91347,
-            "51": 9.85447,
-            "52": 9.73936,
-            "53": 10.07426,
-            "54": 9.96915,
-            "55": 9.88574,
-            "56": 9.62437,
-            "57": 9.4823,
-            "58": 9.83483,
-            "59": 9.58732,
-            "60": 9.50245,
-            "61": 9.69343,
-            "62": 9.98806,
-            "63": 9.39103,
-            "64": 9.78021,
-            "65": 8.94515,
-            "66": 9.70494,
-            "67": 9.37251,
-            "68": 9.78329,
-            "69": 9.79058,
-            "70": 9.74454,
-            "71": 9.62301,
-            "72": 9.58458,
-            "73": 9.50513,
-            "74": 8.94312,
-            "75": 9.42524,
-            "76": 9.07601,
-            "77": 10.06353,
-            "78": 9.72308,
-            "79": 9.37502,
-            "80": 9.40453,
-            "81": 9.47794,
-            "82": 9.69667,
-            "83": 9.3072,
-            "84": 9.41526,
-            "85": 9.61293,
-            "86": 9.07195,
-            "87": 9.5884,
-            "88": 9.74762,
-            "89": 9.59982,
-            "90": 9.81672,
-            "91": 9.3379,
-            "92": 9.35605,
-            "93": 9.07425,
-            "94": 8.8351,
-            "95": 9.5184,
-            "96": 9.52391,
-            "97": 9.30923,
-            "98": 9.66743,
-            "99": 8.88419,
-            "100": 9.39924
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1483.0,
-            "2": 1650.0,
-            "3": 1681.0,
-            "4": 1767.0,
-            "5": 1903.0,
-            "6": 1952.0,
-            "7": 1967.0,
-            "8": 1651.0,
-            "9": 1886.0,
-            "10": 1427.0,
-            "11": 1897.0,
-            "12": 1855.0,
-            "13": 1941.0,
-            "14": 1749.0,
-            "15": 1901.0,
-            "16": 1813.0,
-            "17": 1710.0,
-            "18": 1707.0,
-            "19": 1819.0,
-            "20": 1639.0,
-            "21": 1880.0,
-            "22": 1769.0,
-            "23": 2016.0,
-            "24": 1692.0,
-            "25": 1672.0,
-            "26": 1778.0,
-            "27": 1861.0,
-            "28": 1964.0,
-            "29": 2021.0,
-            "30": 1938.0,
-            "31": 1645.0,
-            "32": 1864.0,
-            "33": 2150.0,
-            "34": 1828.0,
-            "35": 1982.0,
-            "36": 1864.0,
-            "37": 2355.0,
-            "38": 2358.0,
-            "39": 2385.0,
-            "40": 2407.0,
-            "41": 2501.0,
-            "42": 2435.0,
-            "43": 2033.0,
-            "44": 2089.0,
-            "45": 2210.0,
-            "46": 2351.0,
-            "47": 2502.0,
-            "48": 2444.0,
-            "49": 2302.0,
-            "50": 2492.0,
-            "51": 2598.0,
-            "52": 2547.0,
-            "53": 2957.0,
-            "54": 2750.0,
-            "55": 2372.0,
-            "56": 2569.0,
-            "57": 2395.0,
-            "58": 2901.0,
-            "59": 2741.0,
-            "60": 2430.0,
-            "61": 2868.0,
-            "62": 2651.0,
-            "63": 2507.0,
-            "64": 3014.0,
-            "65": 2683.0,
-            "66": 2935.0,
-            "67": 2783.0,
-            "68": 2725.0,
-            "69": 2788.0,
-            "70": 3152.0,
-            "71": 3026.0,
-            "72": 2415.0,
-            "73": 3122.0,
-            "74": 1967.0,
-            "75": 2581.0,
-            "76": 3010.0,
-            "77": 3294.0,
-            "78": 3166.0,
-            "79": 3150.0,
-            "80": 3246.0,
-            "81": 3566.0,
-            "82": 3285.0,
-            "83": 2817.0,
-            "84": 3269.0,
-            "85": 3425.0,
-            "86": 2819.0,
-            "87": 3577.0,
-            "88": 3004.0,
-            "89": 3323.0,
-            "90": 3023.0,
-            "91": 2661.0,
-            "92": 3066.0,
-            "93": 2691.0,
-            "94": 3305.0,
-            "95": 3403.0,
-            "96": 3377.0,
-            "97": 3242.0,
-            "98": 3697.0,
-            "99": 3112.0,
-            "100": 3199.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 581488640.0,
-            "2": 581488640.0,
-            "3": 581488640.0,
-            "4": 581488640.0,
-            "5": 581488640.0,
-            "6": 581488640.0,
-            "7": 581488640.0,
-            "8": 581488640.0,
-            "9": 581488640.0,
-            "10": 581488640.0,
-            "11": 581488640.0,
-            "12": 581488640.0,
-            "13": 581488640.0,
-            "14": 581488640.0,
-            "15": 581488640.0,
-            "16": 581488640.0,
-            "17": 581488640.0,
-            "18": 581488640.0,
-            "19": 581488640.0,
-            "20": 581488640.0,
-            "21": 581488640.0,
-            "22": 581488640.0,
-            "23": 581488640.0,
-            "24": 581488640.0,
-            "25": 581488640.0,
-            "26": 581488640.0,
-            "27": 581488640.0,
-            "28": 581488640.0,
-            "29": 581488640.0,
-            "30": 581488640.0,
-            "31": 581488640.0,
-            "32": 581488640.0,
-            "33": 581488640.0,
-            "34": 581488640.0,
-            "35": 581488640.0,
-            "36": 581488640.0,
-            "37": 581488640.0,
-            "38": 581488640.0,
-            "39": 581488640.0,
-            "40": 581488640.0,
-            "41": 581488640.0,
-            "42": 581488640.0,
-            "43": 581488640.0,
-            "44": 581488640.0,
-            "45": 581488640.0,
-            "46": 581488640.0,
-            "47": 581488640.0,
-            "48": 581488640.0,
-            "49": 581488640.0,
-            "50": 581488640.0,
-            "51": 581488640.0,
-            "52": 581488640.0,
-            "53": 581488640.0,
-            "54": 581488640.0,
-            "55": 581488640.0,
-            "56": 581488640.0,
-            "57": 581488640.0,
-            "58": 581488640.0,
-            "59": 581488640.0,
-            "60": 581488640.0,
-            "61": 581488640.0,
-            "62": 581488640.0,
-            "63": 581488640.0,
-            "64": 581488640.0,
-            "65": 581488640.0,
-            "66": 581488640.0,
-            "67": 581488640.0,
-            "68": 581488640.0,
-            "69": 581488640.0,
-            "70": 581488640.0,
-            "71": 581488640.0,
-            "72": 581488640.0,
-            "73": 581488640.0,
-            "74": 581488640.0,
-            "75": 581488640.0,
-            "76": 581488640.0,
-            "77": 581488640.0,
-            "78": 581488640.0,
-            "79": 581488640.0,
-            "80": 581488640.0,
-            "81": 581488640.0,
-            "82": 581488640.0,
-            "83": 581488640.0,
-            "84": 581488640.0,
-            "85": 581488640.0,
-            "86": 581488640.0,
-            "87": 581488640.0,
-            "88": 581488640.0,
-            "89": 581488640.0,
-            "90": 581488640.0,
-            "91": 581488640.0,
-            "92": 581488640.0,
-            "93": 581488640.0,
-            "94": 581488640.0,
-            "95": 581488640.0,
-            "96": 581488640.0,
-            "97": 581488640.0,
-            "98": 581488640.0,
-            "99": 581488640.0,
-            "100": 581488640.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 2594126336.0,
-            "2": 2690742784.0,
-            "3": 2690742784.0,
-            "4": 2690742784.0,
-            "5": 2690742784.0,
-            "6": 2690742784.0,
-            "7": 2690742784.0,
-            "8": 2690742784.0,
-            "9": 2690742784.0,
-            "10": 2690742784.0,
-            "11": 2690742784.0,
-            "12": 2690742784.0,
-            "13": 2690742784.0,
-            "14": 2690742784.0,
-            "15": 2690742784.0,
-            "16": 2690742784.0,
-            "17": 2690742784.0,
-            "18": 2690742784.0,
-            "19": 2690742784.0,
-            "20": 2690742784.0,
-            "21": 2690742784.0,
-            "22": 2690742784.0,
-            "23": 2690742784.0,
-            "24": 2690742784.0,
-            "25": 2690742784.0,
-            "26": 2690742784.0,
-            "27": 2690742784.0,
-            "28": 2690742784.0,
-            "29": 2690742784.0,
-            "30": 2690742784.0,
-            "31": 2690742784.0,
-            "32": 2690742784.0,
-            "33": 2690742784.0,
-            "34": 2690742784.0,
-            "35": 2690742784.0,
-            "36": 2690742784.0,
-            "37": 2690742784.0,
-            "38": 2690742784.0,
-            "39": 2690742784.0,
-            "40": 2690742784.0,
-            "41": 2690742784.0,
-            "42": 2690742784.0,
-            "43": 2690742784.0,
-            "44": 2690742784.0,
-            "45": 2690742784.0,
-            "46": 2690742784.0,
-            "47": 2690742784.0,
-            "48": 2690742784.0,
-            "49": 2690742784.0,
-            "50": 2690742784.0,
-            "51": 2690742784.0,
-            "52": 2690742784.0,
-            "53": 2690742784.0,
-            "54": 2690742784.0,
-            "55": 2690742784.0,
-            "56": 2690742784.0,
-            "57": 2690742784.0,
-            "58": 2690742784.0,
-            "59": 2690742784.0,
-            "60": 2690742784.0,
-            "61": 2690742784.0,
-            "62": 2690742784.0,
-            "63": 2690742784.0,
-            "64": 2690742784.0,
-            "65": 2690742784.0,
-            "66": 2690742784.0,
-            "67": 2690742784.0,
-            "68": 2690742784.0,
-            "69": 2690742784.0,
-            "70": 2690742784.0,
-            "71": 2690742784.0,
-            "72": 2690742784.0,
-            "73": 2690742784.0,
-            "74": 2690742784.0,
-            "75": 2690742784.0,
-            "76": 2690742784.0,
-            "77": 2690742784.0,
-            "78": 2690742784.0,
-            "79": 2690742784.0,
-            "80": 2690742784.0,
-            "81": 2690742784.0,
-            "82": 2690742784.0,
-            "83": 2690742784.0,
-            "84": 2690742784.0,
-            "85": 2690742784.0,
-            "86": 2690742784.0,
-            "87": 2690742784.0,
-            "88": 2690742784.0,
-            "89": 2690742784.0,
-            "90": 2690742784.0,
-            "91": 2690742784.0,
-            "92": 2690742784.0,
-            "93": 2690742784.0,
-            "94": 2690742784.0,
-            "95": 2690742784.0,
-            "96": 2690742784.0,
-            "97": 2690742784.0,
-            "98": 2690742784.0,
-            "99": 2690742784.0,
-            "100": 2690742784.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 7.57521,
-            "2": 0.07593,
-            "3": 0.05387,
-            "4": 0.05352,
-            "5": 0.05602,
-            "6": 3.85308,
-            "7": 0.05787,
-            "8": 0.71621,
-            "9": 0.33662,
-            "10": 0.6136,
-            "11": 1.43071,
-            "12": 0.0585,
-            "13": 0.05762,
-            "14": 0.0573,
-            "15": 0.06754,
-            "16": 0.06151,
-            "17": 0.06798,
-            "18": 0.05523,
-            "19": 0.18762,
-            "20": 0.28771,
-            "21": 0.05854,
-            "22": 0.05692,
-            "23": 0.05871,
-            "24": 0.05788,
-            "25": 0.05853,
-            "26": 0.05723,
-            "27": 0.05911,
-            "28": 0.05718,
-            "29": 0.05914,
-            "30": 0.0562,
-            "31": 0.05914,
-            "32": 0.05683,
-            "33": 0.0585,
-            "34": 0.05641,
-            "35": 0.06095,
-            "36": 0.05706,
-            "37": 0.05915,
-            "38": 0.05666,
-            "39": 0.05887,
-            "40": 0.05689,
-            "41": 0.06354,
-            "42": 0.05728,
-            "43": 0.06056,
-            "44": 0.05698,
-            "45": 0.05866,
-            "46": 0.05782,
-            "47": 0.05864,
-            "48": 0.05766,
-            "49": 0.0593,
-            "50": 0.05709,
-            "51": 0.07764,
-            "52": 0.06534,
-            "53": 0.05923,
-            "54": 0.08052,
-            "55": 0.05743,
-            "56": 0.05803,
-            "57": 0.05961,
-            "58": 0.05679,
-            "59": 0.05691,
-            "60": 0.05989,
-            "61": 0.05604,
-            "62": 0.05739,
-            "63": 0.05673,
-            "64": 0.0572,
-            "65": 0.0573,
-            "66": 0.05797,
-            "67": 0.05694,
-            "68": 0.05763,
-            "69": 0.05765,
-            "70": 0.05718,
-            "71": 0.05666,
-            "72": 0.05782,
-            "73": 0.0577,
-            "74": 0.05704,
-            "75": 0.06457,
-            "76": 0.06526,
-            "77": 0.06461,
-            "78": 0.05996,
-            "79": 0.05701,
-            "80": 0.0582,
-            "81": 0.06253,
-            "82": 0.05976,
-            "83": 0.05924,
-            "84": 0.05851,
-            "85": 0.0593,
-            "86": 0.05994,
-            "87": 0.05913,
-            "88": 0.05723,
-            "89": 0.0581,
-            "90": 0.05828,
-            "91": 0.06035,
-            "92": 0.05762,
-            "93": 0.059,
-            "94": 0.05728,
-            "95": 0.05927,
-            "96": 0.05721,
-            "97": 0.05992,
-            "98": 0.05777,
-            "99": 0.05867,
-            "100": 0.0569
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index 686e980d509..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.89631,
-            "2": 10.89416,
-            "3": 10.88786,
-            "4": 10.8914,
-            "5": 10.89154,
-            "6": 10.90001,
-            "7": 10.89184,
-            "8": 10.89886,
-            "9": 10.90208,
-            "10": 10.88361,
-            "11": 10.87816,
-            "12": 10.89332,
-            "13": 10.89816,
-            "14": 10.89241,
-            "15": 10.84798,
-            "16": 10.854,
-            "17": 10.83093,
-            "18": 10.83991,
-            "19": 10.82802,
-            "20": 10.74822,
-            "21": 10.73494,
-            "22": 10.61719,
-            "23": 10.72621,
-            "24": 10.63177,
-            "25": 10.5931,
-            "26": 10.63365,
-            "27": 10.63304,
-            "28": 10.58259,
-            "29": 10.58595,
-            "30": 10.41201,
-            "31": 10.15907,
-            "32": 10.48362,
-            "33": 10.46704,
-            "34": 10.23815,
-            "35": 10.28193,
-            "36": 10.24052,
-            "37": 10.36227,
-            "38": 10.20306,
-            "39": 10.40456,
-            "40": 10.09271,
-            "41": 10.15831,
-            "42": 10.21934,
-            "43": 9.8436,
-            "44": 9.97299,
-            "45": 9.84189,
-            "46": 9.82017,
-            "47": 10.14968,
-            "48": 9.86021,
-            "49": 9.54238,
-            "50": 9.91347,
-            "51": 9.85447,
-            "52": 9.73936,
-            "53": 10.07426,
-            "54": 9.96915,
-            "55": 9.88574,
-            "56": 9.62437,
-            "57": 9.4823,
-            "58": 9.83483,
-            "59": 9.58732,
-            "60": 9.50245,
-            "61": 9.69343,
-            "62": 9.98806,
-            "63": 9.39103,
-            "64": 9.78021,
-            "65": 8.94515,
-            "66": 9.70494,
-            "67": 9.37251,
-            "68": 9.78329,
-            "69": 9.79058,
-            "70": 9.74454,
-            "71": 9.62301,
-            "72": 9.58458,
-            "73": 9.50513,
-            "74": 8.94312,
-            "75": 9.42524,
-            "76": 9.07601,
-            "77": 10.06353,
-            "78": 9.72308,
-            "79": 9.37502,
-            "80": 9.40453,
-            "81": 9.47794,
-            "82": 9.69667,
-            "83": 9.3072,
-            "84": 9.41526,
-            "85": 9.61293,
-            "86": 9.07195,
-            "87": 9.5884,
-            "88": 9.74762,
-            "89": 9.59982,
-            "90": 9.81672,
-            "91": 9.3379,
-            "92": 9.35605,
-            "93": 9.07425,
-            "94": 8.8351,
-            "95": 9.5184,
-            "96": 9.52391,
-            "97": 9.30923,
-            "98": 9.66743,
-            "99": 8.88419,
-            "100": 9.39924
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1483.0,
-            "2": 1650.0,
-            "3": 1681.0,
-            "4": 1767.0,
-            "5": 1903.0,
-            "6": 1952.0,
-            "7": 1967.0,
-            "8": 1651.0,
-            "9": 1886.0,
-            "10": 1427.0,
-            "11": 1897.0,
-            "12": 1855.0,
-            "13": 1941.0,
-            "14": 1749.0,
-            "15": 1901.0,
-            "16": 1813.0,
-            "17": 1710.0,
-            "18": 1707.0,
-            "19": 1819.0,
-            "20": 1639.0,
-            "21": 1880.0,
-            "22": 1769.0,
-            "23": 2016.0,
-            "24": 1692.0,
-            "25": 1672.0,
-            "26": 1778.0,
-            "27": 1861.0,
-            "28": 1964.0,
-            "29": 2021.0,
-            "30": 1938.0,
-            "31": 1645.0,
-            "32": 1864.0,
-            "33": 2150.0,
-            "34": 1828.0,
-            "35": 1982.0,
-            "36": 1864.0,
-            "37": 2355.0,
-            "38": 2358.0,
-            "39": 2385.0,
-            "40": 2407.0,
-            "41": 2501.0,
-            "42": 2435.0,
-            "43": 2033.0,
-            "44": 2089.0,
-            "45": 2210.0,
-            "46": 2351.0,
-            "47": 2502.0,
-            "48": 2444.0,
-            "49": 2302.0,
-            "50": 2492.0,
-            "51": 2598.0,
-            "52": 2547.0,
-            "53": 2957.0,
-            "54": 2750.0,
-            "55": 2372.0,
-            "56": 2569.0,
-            "57": 2395.0,
-            "58": 2901.0,
-            "59": 2741.0,
-            "60": 2430.0,
-            "61": 2868.0,
-            "62": 2651.0,
-            "63": 2507.0,
-            "64": 3014.0,
-            "65": 2683.0,
-            "66": 2935.0,
-            "67": 2783.0,
-            "68": 2725.0,
-            "69": 2788.0,
-            "70": 3152.0,
-            "71": 3026.0,
-            "72": 2415.0,
-            "73": 3122.0,
-            "74": 1967.0,
-            "75": 2581.0,
-            "76": 3010.0,
-            "77": 3294.0,
-            "78": 3166.0,
-            "79": 3150.0,
-            "80": 3246.0,
-            "81": 3566.0,
-            "82": 3285.0,
-            "83": 2817.0,
-            "84": 3269.0,
-            "85": 3425.0,
-            "86": 2819.0,
-            "87": 3577.0,
-            "88": 3004.0,
-            "89": 3323.0,
-            "90": 3023.0,
-            "91": 2661.0,
-            "92": 3066.0,
-            "93": 2691.0,
-            "94": 3305.0,
-            "95": 3403.0,
-            "96": 3377.0,
-            "97": 3242.0,
-            "98": 3697.0,
-            "99": 3112.0,
-            "100": 3199.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 581488640.0,
-            "2": 581488640.0,
-            "3": 581488640.0,
-            "4": 581488640.0,
-            "5": 581488640.0,
-            "6": 581488640.0,
-            "7": 581488640.0,
-            "8": 581488640.0,
-            "9": 581488640.0,
-            "10": 581488640.0,
-            "11": 581488640.0,
-            "12": 581488640.0,
-            "13": 581488640.0,
-            "14": 581488640.0,
-            "15": 581488640.0,
-            "16": 581488640.0,
-            "17": 581488640.0,
-            "18": 581488640.0,
-            "19": 581488640.0,
-            "20": 581488640.0,
-            "21": 581488640.0,
-            "22": 581488640.0,
-            "23": 581488640.0,
-            "24": 581488640.0,
-            "25": 581488640.0,
-            "26": 581488640.0,
-            "27": 581488640.0,
-            "28": 581488640.0,
-            "29": 581488640.0,
-            "30": 581488640.0,
-            "31": 581488640.0,
-            "32": 581488640.0,
-            "33": 581488640.0,
-            "34": 581488640.0,
-            "35": 581488640.0,
-            "36": 581488640.0,
-            "37": 581488640.0,
-            "38": 581488640.0,
-            "39": 581488640.0,
-            "40": 581488640.0,
-            "41": 581488640.0,
-            "42": 581488640.0,
-            "43": 581488640.0,
-            "44": 581488640.0,
-            "45": 581488640.0,
-            "46": 581488640.0,
-            "47": 581488640.0,
-            "48": 581488640.0,
-            "49": 581488640.0,
-            "50": 581488640.0,
-            "51": 581488640.0,
-            "52": 581488640.0,
-            "53": 581488640.0,
-            "54": 581488640.0,
-            "55": 581488640.0,
-            "56": 581488640.0,
-            "57": 581488640.0,
-            "58": 581488640.0,
-            "59": 581488640.0,
-            "60": 581488640.0,
-            "61": 581488640.0,
-            "62": 581488640.0,
-            "63": 581488640.0,
-            "64": 581488640.0,
-            "65": 581488640.0,
-            "66": 581488640.0,
-            "67": 581488640.0,
-            "68": 581488640.0,
-            "69": 581488640.0,
-            "70": 581488640.0,
-            "71": 581488640.0,
-            "72": 581488640.0,
-            "73": 581488640.0,
-            "74": 581488640.0,
-            "75": 581488640.0,
-            "76": 581488640.0,
-            "77": 581488640.0,
-            "78": 581488640.0,
-            "79": 581488640.0,
-            "80": 581488640.0,
-            "81": 581488640.0,
-            "82": 581488640.0,
-            "83": 581488640.0,
-            "84": 581488640.0,
-            "85": 581488640.0,
-            "86": 581488640.0,
-            "87": 581488640.0,
-            "88": 581488640.0,
-            "89": 581488640.0,
-            "90": 581488640.0,
-            "91": 581488640.0,
-            "92": 581488640.0,
-            "93": 581488640.0,
-            "94": 581488640.0,
-            "95": 581488640.0,
-            "96": 581488640.0,
-            "97": 581488640.0,
-            "98": 581488640.0,
-            "99": 581488640.0,
-            "100": 581488640.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 2594126336.0,
-            "2": 2690742784.0,
-            "3": 2690742784.0,
-            "4": 2690742784.0,
-            "5": 2690742784.0,
-            "6": 2690742784.0,
-            "7": 2690742784.0,
-            "8": 2690742784.0,
-            "9": 2690742784.0,
-            "10": 2690742784.0,
-            "11": 2690742784.0,
-            "12": 2690742784.0,
-            "13": 2690742784.0,
-            "14": 2690742784.0,
-            "15": 2690742784.0,
-            "16": 2690742784.0,
-            "17": 2690742784.0,
-            "18": 2690742784.0,
-            "19": 2690742784.0,
-            "20": 2690742784.0,
-            "21": 2690742784.0,
-            "22": 2690742784.0,
-            "23": 2690742784.0,
-            "24": 2690742784.0,
-            "25": 2690742784.0,
-            "26": 2690742784.0,
-            "27": 2690742784.0,
-            "28": 2690742784.0,
-            "29": 2690742784.0,
-            "30": 2690742784.0,
-            "31": 2690742784.0,
-            "32": 2690742784.0,
-            "33": 2690742784.0,
-            "34": 2690742784.0,
-            "35": 2690742784.0,
-            "36": 2690742784.0,
-            "37": 2690742784.0,
-            "38": 2690742784.0,
-            "39": 2690742784.0,
-            "40": 2690742784.0,
-            "41": 2690742784.0,
-            "42": 2690742784.0,
-            "43": 2690742784.0,
-            "44": 2690742784.0,
-            "45": 2690742784.0,
-            "46": 2690742784.0,
-            "47": 2690742784.0,
-            "48": 2690742784.0,
-            "49": 2690742784.0,
-            "50": 2690742784.0,
-            "51": 2690742784.0,
-            "52": 2690742784.0,
-            "53": 2690742784.0,
-            "54": 2690742784.0,
-            "55": 2690742784.0,
-            "56": 2690742784.0,
-            "57": 2690742784.0,
-            "58": 2690742784.0,
-            "59": 2690742784.0,
-            "60": 2690742784.0,
-            "61": 2690742784.0,
-            "62": 2690742784.0,
-            "63": 2690742784.0,
-            "64": 2690742784.0,
-            "65": 2690742784.0,
-            "66": 2690742784.0,
-            "67": 2690742784.0,
-            "68": 2690742784.0,
-            "69": 2690742784.0,
-            "70": 2690742784.0,
-            "71": 2690742784.0,
-            "72": 2690742784.0,
-            "73": 2690742784.0,
-            "74": 2690742784.0,
-            "75": 2690742784.0,
-            "76": 2690742784.0,
-            "77": 2690742784.0,
-            "78": 2690742784.0,
-            "79": 2690742784.0,
-            "80": 2690742784.0,
-            "81": 2690742784.0,
-            "82": 2690742784.0,
-            "83": 2690742784.0,
-            "84": 2690742784.0,
-            "85": 2690742784.0,
-            "86": 2690742784.0,
-            "87": 2690742784.0,
-            "88": 2690742784.0,
-            "89": 2690742784.0,
-            "90": 2690742784.0,
-            "91": 2690742784.0,
-            "92": 2690742784.0,
-            "93": 2690742784.0,
-            "94": 2690742784.0,
-            "95": 2690742784.0,
-            "96": 2690742784.0,
-            "97": 2690742784.0,
-            "98": 2690742784.0,
-            "99": 2690742784.0,
-            "100": 2690742784.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 7.46673,
-            "2": 0.07879,
-            "3": 0.06822,
-            "4": 0.06744,
-            "5": 0.06664,
-            "6": 0.06786,
-            "7": 0.06766,
-            "8": 0.06659,
-            "9": 0.06797,
-            "10": 0.07184,
-            "11": 0.07288,
-            "12": 0.07188,
-            "13": 0.07026,
-            "14": 0.06821,
-            "15": 0.06667,
-            "16": 0.06656,
-            "17": 0.06764,
-            "18": 0.06816,
-            "19": 0.06695,
-            "20": 0.06832,
-            "21": 0.06808,
-            "22": 0.06822,
-            "23": 0.06838,
-            "24": 0.06731,
-            "25": 0.06857,
-            "26": 0.06706,
-            "27": 0.06819,
-            "28": 0.06784,
-            "29": 0.06785,
-            "30": 0.06735,
-            "31": 0.0685,
-            "32": 0.07005,
-            "33": 0.07122,
-            "34": 0.07241,
-            "35": 0.07067,
-            "36": 0.06981,
-            "37": 0.06934,
-            "38": 0.06771,
-            "39": 0.06805,
-            "40": 0.06824,
-            "41": 0.06831,
-            "42": 0.06733,
-            "43": 0.06819,
-            "44": 0.06816,
-            "45": 0.06847,
-            "46": 0.0674,
-            "47": 0.06856,
-            "48": 0.07158,
-            "49": 0.07079,
-            "50": 0.0717,
-            "51": 0.08179,
-            "52": 0.07272,
-            "53": 0.06939,
-            "54": 0.06631,
-            "55": 0.07046,
-            "56": 0.09852,
-            "57": 0.06464,
-            "58": 0.06466,
-            "59": 0.06537,
-            "60": 0.06301,
-            "61": 0.06361,
-            "62": 0.06551,
-            "63": 0.06563,
-            "64": 0.0749,
-            "65": 0.0748,
-            "66": 0.07507,
-            "67": 0.07552,
-            "68": 0.07573,
-            "69": 0.07066,
-            "70": 0.0658,
-            "71": 0.0647,
-            "72": 0.06444,
-            "73": 0.06462,
-            "74": 0.06543,
-            "75": 0.06609,
-            "76": 0.06503,
-            "77": 0.06499,
-            "78": 0.0644,
-            "79": 0.06439,
-            "80": 0.06417,
-            "81": 0.06401,
-            "82": 0.06575,
-            "83": 0.06494,
-            "84": 0.06442,
-            "85": 0.06396,
-            "86": 0.06422,
-            "87": 0.06484,
-            "88": 0.06512,
-            "89": 0.06426,
-            "90": 0.06481,
-            "91": 0.06476,
-            "92": 0.06383,
-            "93": 0.06456,
-            "94": 0.06292,
-            "95": 0.0638,
-            "96": 0.06392,
-            "97": 0.06356,
-            "98": 0.06355,
-            "99": 0.06439,
-            "100": 0.06428
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_dev_dgx_h100.json
index dc66396ad6b..df5117f4d8f 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_dev_dgx_h100.json
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 6.44856,
-            "2": 0.10562,
-            "3": 0.09824,
-            "4": 0.09657,
-            "5": 0.10604,
-            "6": 0.09627,
-            "7": 0.09681,
-            "8": 0.09299,
-            "9": 0.09413,
-            "10": 0.09401,
-            "11": 0.09341,
-            "12": 0.09223,
-            "13": 0.09373,
-            "14": 0.0936,
-            "15": 0.09439,
-            "16": 0.09285,
-            "17": 0.09422,
-            "18": 0.09511,
-            "19": 0.09966,
-            "20": 0.10107,
-            "21": 0.09445,
-            "22": 0.09548,
-            "23": 0.09554,
-            "24": 0.09478,
-            "25": 0.09465,
-            "26": 0.09292,
-            "27": 0.10339,
-            "28": 0.09562,
-            "29": 0.09593,
-            "30": 0.09298,
-            "31": 0.09573,
-            "32": 0.09264,
-            "33": 0.0942,
-            "34": 0.09203,
-            "35": 0.09537,
-            "36": 0.09222,
-            "37": 0.09501,
-            "38": 0.0938,
-            "39": 0.09662,
-            "40": 0.10355,
-            "41": 0.09832,
-            "42": 0.09636,
-            "43": 0.09409,
-            "44": 0.09306,
-            "45": 0.09367,
-            "46": 0.09321,
-            "47": 0.10415,
-            "48": 0.09382,
-            "49": 0.09322,
-            "50": 0.09238,
-            "51": 0.09596,
-            "52": 0.09089,
-            "53": 0.0918,
-            "54": 0.09088,
-            "55": 0.09144,
-            "56": 0.09049,
-            "57": 0.09241,
-            "58": 0.09222,
-            "59": 0.09415,
-            "60": 0.09271,
-            "61": 0.09208,
-            "62": 0.09152,
-            "63": 0.09266,
-            "64": 0.09085,
-            "65": 0.09196,
-            "66": 0.09181,
-            "67": 0.09397,
-            "68": 0.08963,
-            "69": 0.09222,
-            "70": 0.09229,
-            "71": 0.09614,
-            "72": 0.0904,
-            "73": 0.09323,
-            "74": 0.09152,
-            "75": 0.09189,
-            "76": 0.08973,
-            "77": 0.09202,
-            "78": 0.08991,
-            "79": 0.09241,
-            "80": 0.08986,
-            "81": 0.09353,
-            "82": 0.09206,
-            "83": 0.09177,
-            "84": 0.09067,
-            "85": 0.09271,
-            "86": 0.09133,
-            "87": 0.09239,
-            "88": 0.08972,
-            "89": 0.09242,
-            "90": 0.09005,
-            "91": 0.09389,
-            "92": 0.09396,
-            "93": 0.09776,
-            "94": 0.09824,
-            "95": 0.1008,
-            "96": 0.09732,
-            "97": 0.09819,
-            "98": 0.09221,
-            "99": 0.09502,
-            "100": 0.09143
+            "1": 6.36321,
+            "2": 0.1218,
+            "3": 0.11132,
+            "4": 0.10707,
+            "5": 0.0969,
+            "6": 0.09387,
+            "7": 0.09166,
+            "8": 0.09482,
+            "9": 0.09368,
+            "10": 0.09371,
+            "11": 0.0914,
+            "12": 0.09315,
+            "13": 0.09323,
+            "14": 0.09407,
+            "15": 0.09341,
+            "16": 0.09525,
+            "17": 0.09338,
+            "18": 0.09247,
+            "19": 0.09648,
+            "20": 0.09425,
+            "21": 0.09329,
+            "22": 0.09356,
+            "23": 0.09379,
+            "24": 0.09405,
+            "25": 0.0935,
+            "26": 0.09238,
+            "27": 0.09612,
+            "28": 0.09315,
+            "29": 0.09297,
+            "30": 0.09342,
+            "31": 0.09294,
+            "32": 0.09287,
+            "33": 0.09256,
+            "34": 0.09461,
+            "35": 0.09355,
+            "36": 0.09517,
+            "37": 0.09434,
+            "38": 0.0956,
+            "39": 0.09435,
+            "40": 0.09568,
+            "41": 0.09615,
+            "42": 0.09244,
+            "43": 0.09364,
+            "44": 0.09376,
+            "45": 0.09258,
+            "46": 0.09268,
+            "47": 0.09255,
+            "48": 0.09424,
+            "49": 0.09573,
+            "50": 0.09436,
+            "51": 0.0945,
+            "52": 0.09894,
+            "53": 0.09918,
+            "54": 0.09823,
+            "55": 0.09863,
+            "56": 0.09834,
+            "57": 0.09709,
+            "58": 0.09303,
+            "59": 0.09404,
+            "60": 0.09192,
+            "61": 0.09198,
+            "62": 0.09274,
+            "63": 0.09166,
+            "64": 0.09147,
+            "65": 0.09327,
+            "66": 0.11015,
+            "67": 0.09684,
+            "68": 0.09642,
+            "69": 0.09562,
+            "70": 0.0924,
+            "71": 0.09384,
+            "72": 0.09189,
+            "73": 0.09372,
+            "74": 0.09193,
+            "75": 0.09409,
+            "76": 0.09252,
+            "77": 0.09275,
+            "78": 0.09475,
+            "79": 0.0945,
+            "80": 0.10107,
+            "81": 0.09197,
+            "82": 0.09204,
+            "83": 0.09353,
+            "84": 0.09326,
+            "85": 0.09194,
+            "86": 0.1029,
+            "87": 0.09285,
+            "88": 0.09168,
+            "89": 0.09478,
+            "90": 0.09254,
+            "91": 0.0921,
+            "92": 0.09246,
+            "93": 0.09207,
+            "94": 0.09324,
+            "95": 0.09431,
+            "96": 0.09195,
+            "97": 0.09285,
+            "98": 0.09175,
+            "99": 0.09153,
+            "100": 0.11457
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index b668a763f40..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.89631,
-            "2": 10.89416,
-            "3": 10.88786,
-            "4": 10.8914,
-            "5": 10.89154,
-            "6": 10.90001,
-            "7": 10.89184,
-            "8": 10.89886,
-            "9": 10.90208,
-            "10": 10.88361,
-            "11": 10.87817,
-            "12": 10.89334,
-            "13": 10.89814,
-            "14": 10.89242,
-            "15": 10.84803,
-            "16": 10.85398,
-            "17": 10.83097,
-            "18": 10.83991,
-            "19": 10.82801,
-            "20": 10.74824,
-            "21": 10.73496,
-            "22": 10.61719,
-            "23": 10.72621,
-            "24": 10.63178,
-            "25": 10.59309,
-            "26": 10.63369,
-            "27": 10.63304,
-            "28": 10.58264,
-            "29": 10.58594,
-            "30": 10.41204,
-            "31": 10.15899,
-            "32": 10.48366,
-            "33": 10.46706,
-            "34": 10.23811,
-            "35": 10.28189,
-            "36": 10.24056,
-            "37": 10.36219,
-            "38": 10.20309,
-            "39": 10.40454,
-            "40": 10.09271,
-            "41": 10.15835,
-            "42": 10.21933,
-            "43": 9.84358,
-            "44": 9.97303,
-            "45": 9.84194,
-            "46": 9.82017,
-            "47": 10.14969,
-            "48": 9.86023,
-            "49": 9.54235,
-            "50": 9.91343,
-            "51": 9.8545,
-            "52": 9.7393,
-            "53": 10.07426,
-            "54": 9.96913,
-            "55": 9.88574,
-            "56": 9.62438,
-            "57": 9.48229,
-            "58": 9.83484,
-            "59": 9.58731,
-            "60": 9.50243,
-            "61": 9.6934,
-            "62": 9.988,
-            "63": 9.39105,
-            "64": 9.78022,
-            "65": 8.94516,
-            "66": 9.70492,
-            "67": 9.37249,
-            "68": 9.78328,
-            "69": 9.79057,
-            "70": 9.74451,
-            "71": 9.62298,
-            "72": 9.58457,
-            "73": 9.50511,
-            "74": 8.94308,
-            "75": 9.42524,
-            "76": 9.07602,
-            "77": 10.06352,
-            "78": 9.72307,
-            "79": 9.37497,
-            "80": 9.40454,
-            "81": 9.4779,
-            "82": 9.69669,
-            "83": 9.30714,
-            "84": 9.41525,
-            "85": 9.61295,
-            "86": 9.07198,
-            "87": 9.58834,
-            "88": 9.7476,
-            "89": 9.59984,
-            "90": 9.81672,
-            "91": 9.33791,
-            "92": 9.35608,
-            "93": 9.07423,
-            "94": 8.83511,
-            "95": 9.51841,
-            "96": 9.52391,
-            "97": 9.30922,
-            "98": 9.66746,
-            "99": 8.88421,
-            "100": 9.39923
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1483.0,
-            "2": 1650.0,
-            "3": 1681.0,
-            "4": 1767.0,
-            "5": 1903.0,
-            "6": 1952.0,
-            "7": 1967.0,
-            "8": 1651.0,
-            "9": 1886.0,
-            "10": 1427.0,
-            "11": 1939.0,
-            "12": 1778.0,
-            "13": 1964.0,
-            "14": 1762.0,
-            "15": 1980.0,
-            "16": 1923.0,
-            "17": 1817.0,
-            "18": 1783.0,
-            "19": 1750.0,
-            "20": 1588.0,
-            "21": 1855.0,
-            "22": 1641.0,
-            "23": 2098.0,
-            "24": 1679.0,
-            "25": 1649.0,
-            "26": 1806.0,
-            "27": 1834.0,
-            "28": 2042.0,
-            "29": 2033.0,
-            "30": 1984.0,
-            "31": 1518.0,
-            "32": 1954.0,
-            "33": 2068.0,
-            "34": 1900.0,
-            "35": 1921.0,
-            "36": 1965.0,
-            "37": 2321.0,
-            "38": 2340.0,
-            "39": 2344.0,
-            "40": 2367.0,
-            "41": 2457.0,
-            "42": 2367.0,
-            "43": 2020.0,
-            "44": 2135.0,
-            "45": 2184.0,
-            "46": 2310.0,
-            "47": 2463.0,
-            "48": 2450.0,
-            "49": 2259.0,
-            "50": 2444.0,
-            "51": 2543.0,
-            "52": 2613.0,
-            "53": 2945.0,
-            "54": 2713.0,
-            "55": 2503.0,
-            "56": 2692.0,
-            "57": 2338.0,
-            "58": 2961.0,
-            "59": 2620.0,
-            "60": 2367.0,
-            "61": 2909.0,
-            "62": 2728.0,
-            "63": 2399.0,
-            "64": 2909.0,
-            "65": 2605.0,
-            "66": 2983.0,
-            "67": 2793.0,
-            "68": 2663.0,
-            "69": 2833.0,
-            "70": 3135.0,
-            "71": 2997.0,
-            "72": 2464.0,
-            "73": 3088.0,
-            "74": 1970.0,
-            "75": 2556.0,
-            "76": 3064.0,
-            "77": 3231.0,
-            "78": 3097.0,
-            "79": 3035.0,
-            "80": 3301.0,
-            "81": 3599.0,
-            "82": 3215.0,
-            "83": 2757.0,
-            "84": 3130.0,
-            "85": 3380.0,
-            "86": 2742.0,
-            "87": 3723.0,
-            "88": 3066.0,
-            "89": 3264.0,
-            "90": 3198.0,
-            "91": 2718.0,
-            "92": 3070.0,
-            "93": 2624.0,
-            "94": 3301.0,
-            "95": 3431.0,
-            "96": 3358.0,
-            "97": 3142.0,
-            "98": 3704.0,
-            "99": 3107.0,
-            "100": 3089.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1261848064.0,
-            "2": 1261848064.0,
-            "3": 1261848064.0,
-            "4": 1261848064.0,
-            "5": 1261848064.0,
-            "6": 1261848064.0,
-            "7": 1261848064.0,
-            "8": 1261848064.0,
-            "9": 1261848064.0,
-            "10": 1261848064.0,
-            "11": 1261848064.0,
-            "12": 1261848064.0,
-            "13": 1261848064.0,
-            "14": 1261848064.0,
-            "15": 1261848064.0,
-            "16": 1261848064.0,
-            "17": 1261848064.0,
-            "18": 1261848064.0,
-            "19": 1261848064.0,
-            "20": 1261848064.0,
-            "21": 1261848064.0,
-            "22": 1261848064.0,
-            "23": 1261848064.0,
-            "24": 1261848064.0,
-            "25": 1261848064.0,
-            "26": 1261848064.0,
-            "27": 1261848064.0,
-            "28": 1261848064.0,
-            "29": 1261848064.0,
-            "30": 1261848064.0,
-            "31": 1261848064.0,
-            "32": 1261848064.0,
-            "33": 1261848064.0,
-            "34": 1261848064.0,
-            "35": 1261848064.0,
-            "36": 1261848064.0,
-            "37": 1261848064.0,
-            "38": 1261848064.0,
-            "39": 1261848064.0,
-            "40": 1261848064.0,
-            "41": 1261848064.0,
-            "42": 1261848064.0,
-            "43": 1261848064.0,
-            "44": 1261848064.0,
-            "45": 1261848064.0,
-            "46": 1261848064.0,
-            "47": 1261848064.0,
-            "48": 1261848064.0,
-            "49": 1261848064.0,
-            "50": 1261848064.0,
-            "51": 1261848064.0,
-            "52": 1261848064.0,
-            "53": 1261848064.0,
-            "54": 1261848064.0,
-            "55": 1261848064.0,
-            "56": 1261848064.0,
-            "57": 1261848064.0,
-            "58": 1261848064.0,
-            "59": 1261848064.0,
-            "60": 1261848064.0,
-            "61": 1261848064.0,
-            "62": 1261848064.0,
-            "63": 1261848064.0,
-            "64": 1261848064.0,
-            "65": 1261848064.0,
-            "66": 1261848064.0,
-            "67": 1261848064.0,
-            "68": 1261848064.0,
-            "69": 1261848064.0,
-            "70": 1261848064.0,
-            "71": 1261848064.0,
-            "72": 1261848064.0,
-            "73": 1261848064.0,
-            "74": 1261848064.0,
-            "75": 1261848064.0,
-            "76": 1261848064.0,
-            "77": 1261848064.0,
-            "78": 1261848064.0,
-            "79": 1261848064.0,
-            "80": 1261848064.0,
-            "81": 1261848064.0,
-            "82": 1261848064.0,
-            "83": 1261848064.0,
-            "84": 1261848064.0,
-            "85": 1261848064.0,
-            "86": 1261848064.0,
-            "87": 1261848064.0,
-            "88": 1261848064.0,
-            "89": 1261848064.0,
-            "90": 1261848064.0,
-            "91": 1261848064.0,
-            "92": 1261848064.0,
-            "93": 1261848064.0,
-            "94": 1261848064.0,
-            "95": 1261848064.0,
-            "96": 1261848064.0,
-            "97": 1261848064.0,
-            "98": 1261848064.0,
-            "99": 1261848064.0,
-            "100": 1261848064.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 2013852672.0,
-            "2": 2563430400.0,
-            "3": 2563430400.0,
-            "4": 2563430400.0,
-            "5": 2563430400.0,
-            "6": 2563430400.0,
-            "7": 2563430400.0,
-            "8": 2563430400.0,
-            "9": 2563430400.0,
-            "10": 2563430400.0,
-            "11": 2563430400.0,
-            "12": 2563430400.0,
-            "13": 2563430400.0,
-            "14": 2563430400.0,
-            "15": 2563430400.0,
-            "16": 2563430400.0,
-            "17": 2563430400.0,
-            "18": 2563430400.0,
-            "19": 2563430400.0,
-            "20": 2563430400.0,
-            "21": 2563430400.0,
-            "22": 2563430400.0,
-            "23": 2563430400.0,
-            "24": 2563430400.0,
-            "25": 2563430400.0,
-            "26": 2563430400.0,
-            "27": 2563430400.0,
-            "28": 2563430400.0,
-            "29": 2563430400.0,
-            "30": 2563430400.0,
-            "31": 2563430400.0,
-            "32": 2563430400.0,
-            "33": 2563430400.0,
-            "34": 2563430400.0,
-            "35": 2563430400.0,
-            "36": 2563430400.0,
-            "37": 2563430400.0,
-            "38": 2563430400.0,
-            "39": 2563430400.0,
-            "40": 2563430400.0,
-            "41": 2563430400.0,
-            "42": 2563430400.0,
-            "43": 2563430400.0,
-            "44": 2563430400.0,
-            "45": 2563430400.0,
-            "46": 2563430400.0,
-            "47": 2563430400.0,
-            "48": 2563430400.0,
-            "49": 2563430400.0,
-            "50": 2563430400.0,
-            "51": 2563430400.0,
-            "52": 2563430400.0,
-            "53": 2563430400.0,
-            "54": 2563430400.0,
-            "55": 2563430400.0,
-            "56": 2563430400.0,
-            "57": 2563430400.0,
-            "58": 2563430400.0,
-            "59": 2563430400.0,
-            "60": 2563430400.0,
-            "61": 2563430400.0,
-            "62": 2563430400.0,
-            "63": 2563430400.0,
-            "64": 2563430400.0,
-            "65": 2563430400.0,
-            "66": 2563430400.0,
-            "67": 2563430400.0,
-            "68": 2563430400.0,
-            "69": 2563430400.0,
-            "70": 2563430400.0,
-            "71": 2563430400.0,
-            "72": 2563430400.0,
-            "73": 2563430400.0,
-            "74": 2563430400.0,
-            "75": 2563430400.0,
-            "76": 2563430400.0,
-            "77": 2563430400.0,
-            "78": 2563430400.0,
-            "79": 2563430400.0,
-            "80": 2563430400.0,
-            "81": 2563430400.0,
-            "82": 2563430400.0,
-            "83": 2563430400.0,
-            "84": 2563430400.0,
-            "85": 2563430400.0,
-            "86": 2563430400.0,
-            "87": 2563430400.0,
-            "88": 2563430400.0,
-            "89": 2563430400.0,
-            "90": 2563430400.0,
-            "91": 2563430400.0,
-            "92": 2563430400.0,
-            "93": 2563430400.0,
-            "94": 2563430400.0,
-            "95": 2563430400.0,
-            "96": 2563430400.0,
-            "97": 2563430400.0,
-            "98": 2563430400.0,
-            "99": 2563430400.0,
-            "100": 2563430400.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 5.78359,
-            "2": 0.10731,
-            "3": 0.08283,
-            "4": 0.07992,
-            "5": 0.08439,
-            "6": 0.07969,
-            "7": 0.08163,
-            "8": 0.08089,
-            "9": 0.08141,
-            "10": 0.07975,
-            "11": 0.08161,
-            "12": 0.0805,
-            "13": 0.0818,
-            "14": 0.07991,
-            "15": 0.08157,
-            "16": 0.07954,
-            "17": 0.08164,
-            "18": 0.07926,
-            "19": 0.08125,
-            "20": 0.07966,
-            "21": 0.08124,
-            "22": 0.08103,
-            "23": 0.08196,
-            "24": 0.08021,
-            "25": 0.08231,
-            "26": 0.07972,
-            "27": 0.08528,
-            "28": 0.07953,
-            "29": 0.08123,
-            "30": 0.08056,
-            "31": 0.08212,
-            "32": 0.08047,
-            "33": 0.08698,
-            "34": 0.07962,
-            "35": 0.08139,
-            "36": 0.0794,
-            "37": 0.08328,
-            "38": 0.07999,
-            "39": 0.08718,
-            "40": 0.08108,
-            "41": 0.08156,
-            "42": 0.07929,
-            "43": 0.08201,
-            "44": 0.07973,
-            "45": 0.08159,
-            "46": 0.08471,
-            "47": 0.08541,
-            "48": 0.07975,
-            "49": 0.08192,
-            "50": 0.08031,
-            "51": 0.08385,
-            "52": 0.08324,
-            "53": 0.08018,
-            "54": 0.08375,
-            "55": 0.08221,
-            "56": 0.08137,
-            "57": 0.08577,
-            "58": 0.08166,
-            "59": 0.08204,
-            "60": 0.08143,
-            "61": 0.08073,
-            "62": 0.08115,
-            "63": 0.08107,
-            "64": 0.08084,
-            "65": 0.08278,
-            "66": 0.08197,
-            "67": 0.08122,
-            "68": 0.08061,
-            "69": 0.08097,
-            "70": 0.08354,
-            "71": 0.08073,
-            "72": 0.08394,
-            "73": 0.08209,
-            "74": 0.0827,
-            "75": 0.08731,
-            "76": 0.08195,
-            "77": 0.08148,
-            "78": 0.08314,
-            "79": 0.08109,
-            "80": 0.0807,
-            "81": 0.08051,
-            "82": 0.08191,
-            "83": 0.08724,
-            "84": 0.08176,
-            "85": 0.0832,
-            "86": 0.08166,
-            "87": 0.08365,
-            "88": 0.0816,
-            "89": 0.0817,
-            "90": 0.08103,
-            "91": 0.08096,
-            "92": 0.08046,
-            "93": 0.08298,
-            "94": 0.08019,
-            "95": 0.08128,
-            "96": 0.08237,
-            "97": 0.08167,
-            "98": 0.0806,
-            "99": 0.08319,
-            "100": 0.08202
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index df5117f4d8f..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.89631,
-            "2": 10.89416,
-            "3": 10.88786,
-            "4": 10.8914,
-            "5": 10.89154,
-            "6": 10.90001,
-            "7": 10.89184,
-            "8": 10.89886,
-            "9": 10.90208,
-            "10": 10.88361,
-            "11": 10.87817,
-            "12": 10.89334,
-            "13": 10.89814,
-            "14": 10.89242,
-            "15": 10.84803,
-            "16": 10.85398,
-            "17": 10.83097,
-            "18": 10.83991,
-            "19": 10.82801,
-            "20": 10.74824,
-            "21": 10.73496,
-            "22": 10.61719,
-            "23": 10.72621,
-            "24": 10.63178,
-            "25": 10.59309,
-            "26": 10.63369,
-            "27": 10.63304,
-            "28": 10.58264,
-            "29": 10.58594,
-            "30": 10.41204,
-            "31": 10.15899,
-            "32": 10.48366,
-            "33": 10.46706,
-            "34": 10.23811,
-            "35": 10.28189,
-            "36": 10.24056,
-            "37": 10.36219,
-            "38": 10.20309,
-            "39": 10.40454,
-            "40": 10.09271,
-            "41": 10.15835,
-            "42": 10.21933,
-            "43": 9.84358,
-            "44": 9.97303,
-            "45": 9.84194,
-            "46": 9.82017,
-            "47": 10.14969,
-            "48": 9.86023,
-            "49": 9.54235,
-            "50": 9.91343,
-            "51": 9.8545,
-            "52": 9.7393,
-            "53": 10.07426,
-            "54": 9.96913,
-            "55": 9.88574,
-            "56": 9.62438,
-            "57": 9.48229,
-            "58": 9.83484,
-            "59": 9.58731,
-            "60": 9.50243,
-            "61": 9.6934,
-            "62": 9.988,
-            "63": 9.39105,
-            "64": 9.78022,
-            "65": 8.94516,
-            "66": 9.70492,
-            "67": 9.37249,
-            "68": 9.78328,
-            "69": 9.79057,
-            "70": 9.74451,
-            "71": 9.62298,
-            "72": 9.58457,
-            "73": 9.50511,
-            "74": 8.94308,
-            "75": 9.42524,
-            "76": 9.07602,
-            "77": 10.06352,
-            "78": 9.72307,
-            "79": 9.37497,
-            "80": 9.40454,
-            "81": 9.4779,
-            "82": 9.69669,
-            "83": 9.30714,
-            "84": 9.41525,
-            "85": 9.61295,
-            "86": 9.07198,
-            "87": 9.58834,
-            "88": 9.7476,
-            "89": 9.59984,
-            "90": 9.81672,
-            "91": 9.33791,
-            "92": 9.35608,
-            "93": 9.07423,
-            "94": 8.83511,
-            "95": 9.51841,
-            "96": 9.52391,
-            "97": 9.30922,
-            "98": 9.66746,
-            "99": 8.88421,
-            "100": 9.39923
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1483.0,
-            "2": 1650.0,
-            "3": 1681.0,
-            "4": 1767.0,
-            "5": 1903.0,
-            "6": 1952.0,
-            "7": 1967.0,
-            "8": 1651.0,
-            "9": 1886.0,
-            "10": 1427.0,
-            "11": 1939.0,
-            "12": 1778.0,
-            "13": 1964.0,
-            "14": 1762.0,
-            "15": 1980.0,
-            "16": 1923.0,
-            "17": 1817.0,
-            "18": 1783.0,
-            "19": 1750.0,
-            "20": 1588.0,
-            "21": 1855.0,
-            "22": 1641.0,
-            "23": 2098.0,
-            "24": 1679.0,
-            "25": 1649.0,
-            "26": 1806.0,
-            "27": 1834.0,
-            "28": 2042.0,
-            "29": 2033.0,
-            "30": 1984.0,
-            "31": 1518.0,
-            "32": 1954.0,
-            "33": 2068.0,
-            "34": 1900.0,
-            "35": 1921.0,
-            "36": 1965.0,
-            "37": 2321.0,
-            "38": 2340.0,
-            "39": 2344.0,
-            "40": 2367.0,
-            "41": 2457.0,
-            "42": 2367.0,
-            "43": 2020.0,
-            "44": 2135.0,
-            "45": 2184.0,
-            "46": 2310.0,
-            "47": 2463.0,
-            "48": 2450.0,
-            "49": 2259.0,
-            "50": 2444.0,
-            "51": 2543.0,
-            "52": 2613.0,
-            "53": 2945.0,
-            "54": 2713.0,
-            "55": 2503.0,
-            "56": 2692.0,
-            "57": 2338.0,
-            "58": 2961.0,
-            "59": 2620.0,
-            "60": 2367.0,
-            "61": 2909.0,
-            "62": 2728.0,
-            "63": 2399.0,
-            "64": 2909.0,
-            "65": 2605.0,
-            "66": 2983.0,
-            "67": 2793.0,
-            "68": 2663.0,
-            "69": 2833.0,
-            "70": 3135.0,
-            "71": 2997.0,
-            "72": 2464.0,
-            "73": 3088.0,
-            "74": 1970.0,
-            "75": 2556.0,
-            "76": 3064.0,
-            "77": 3231.0,
-            "78": 3097.0,
-            "79": 3035.0,
-            "80": 3301.0,
-            "81": 3599.0,
-            "82": 3215.0,
-            "83": 2757.0,
-            "84": 3130.0,
-            "85": 3380.0,
-            "86": 2742.0,
-            "87": 3723.0,
-            "88": 3066.0,
-            "89": 3264.0,
-            "90": 3198.0,
-            "91": 2718.0,
-            "92": 3070.0,
-            "93": 2624.0,
-            "94": 3301.0,
-            "95": 3431.0,
-            "96": 3358.0,
-            "97": 3142.0,
-            "98": 3704.0,
-            "99": 3107.0,
-            "100": 3089.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1261848064.0,
-            "2": 1261848064.0,
-            "3": 1261848064.0,
-            "4": 1261848064.0,
-            "5": 1261848064.0,
-            "6": 1261848064.0,
-            "7": 1261848064.0,
-            "8": 1261848064.0,
-            "9": 1261848064.0,
-            "10": 1261848064.0,
-            "11": 1261848064.0,
-            "12": 1261848064.0,
-            "13": 1261848064.0,
-            "14": 1261848064.0,
-            "15": 1261848064.0,
-            "16": 1261848064.0,
-            "17": 1261848064.0,
-            "18": 1261848064.0,
-            "19": 1261848064.0,
-            "20": 1261848064.0,
-            "21": 1261848064.0,
-            "22": 1261848064.0,
-            "23": 1261848064.0,
-            "24": 1261848064.0,
-            "25": 1261848064.0,
-            "26": 1261848064.0,
-            "27": 1261848064.0,
-            "28": 1261848064.0,
-            "29": 1261848064.0,
-            "30": 1261848064.0,
-            "31": 1261848064.0,
-            "32": 1261848064.0,
-            "33": 1261848064.0,
-            "34": 1261848064.0,
-            "35": 1261848064.0,
-            "36": 1261848064.0,
-            "37": 1261848064.0,
-            "38": 1261848064.0,
-            "39": 1261848064.0,
-            "40": 1261848064.0,
-            "41": 1261848064.0,
-            "42": 1261848064.0,
-            "43": 1261848064.0,
-            "44": 1261848064.0,
-            "45": 1261848064.0,
-            "46": 1261848064.0,
-            "47": 1261848064.0,
-            "48": 1261848064.0,
-            "49": 1261848064.0,
-            "50": 1261848064.0,
-            "51": 1261848064.0,
-            "52": 1261848064.0,
-            "53": 1261848064.0,
-            "54": 1261848064.0,
-            "55": 1261848064.0,
-            "56": 1261848064.0,
-            "57": 1261848064.0,
-            "58": 1261848064.0,
-            "59": 1261848064.0,
-            "60": 1261848064.0,
-            "61": 1261848064.0,
-            "62": 1261848064.0,
-            "63": 1261848064.0,
-            "64": 1261848064.0,
-            "65": 1261848064.0,
-            "66": 1261848064.0,
-            "67": 1261848064.0,
-            "68": 1261848064.0,
-            "69": 1261848064.0,
-            "70": 1261848064.0,
-            "71": 1261848064.0,
-            "72": 1261848064.0,
-            "73": 1261848064.0,
-            "74": 1261848064.0,
-            "75": 1261848064.0,
-            "76": 1261848064.0,
-            "77": 1261848064.0,
-            "78": 1261848064.0,
-            "79": 1261848064.0,
-            "80": 1261848064.0,
-            "81": 1261848064.0,
-            "82": 1261848064.0,
-            "83": 1261848064.0,
-            "84": 1261848064.0,
-            "85": 1261848064.0,
-            "86": 1261848064.0,
-            "87": 1261848064.0,
-            "88": 1261848064.0,
-            "89": 1261848064.0,
-            "90": 1261848064.0,
-            "91": 1261848064.0,
-            "92": 1261848064.0,
-            "93": 1261848064.0,
-            "94": 1261848064.0,
-            "95": 1261848064.0,
-            "96": 1261848064.0,
-            "97": 1261848064.0,
-            "98": 1261848064.0,
-            "99": 1261848064.0,
-            "100": 1261848064.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 2013852672.0,
-            "2": 2563430400.0,
-            "3": 2563430400.0,
-            "4": 2563430400.0,
-            "5": 2563430400.0,
-            "6": 2563430400.0,
-            "7": 2563430400.0,
-            "8": 2563430400.0,
-            "9": 2563430400.0,
-            "10": 2563430400.0,
-            "11": 2563430400.0,
-            "12": 2563430400.0,
-            "13": 2563430400.0,
-            "14": 2563430400.0,
-            "15": 2563430400.0,
-            "16": 2563430400.0,
-            "17": 2563430400.0,
-            "18": 2563430400.0,
-            "19": 2563430400.0,
-            "20": 2563430400.0,
-            "21": 2563430400.0,
-            "22": 2563430400.0,
-            "23": 2563430400.0,
-            "24": 2563430400.0,
-            "25": 2563430400.0,
-            "26": 2563430400.0,
-            "27": 2563430400.0,
-            "28": 2563430400.0,
-            "29": 2563430400.0,
-            "30": 2563430400.0,
-            "31": 2563430400.0,
-            "32": 2563430400.0,
-            "33": 2563430400.0,
-            "34": 2563430400.0,
-            "35": 2563430400.0,
-            "36": 2563430400.0,
-            "37": 2563430400.0,
-            "38": 2563430400.0,
-            "39": 2563430400.0,
-            "40": 2563430400.0,
-            "41": 2563430400.0,
-            "42": 2563430400.0,
-            "43": 2563430400.0,
-            "44": 2563430400.0,
-            "45": 2563430400.0,
-            "46": 2563430400.0,
-            "47": 2563430400.0,
-            "48": 2563430400.0,
-            "49": 2563430400.0,
-            "50": 2563430400.0,
-            "51": 2563430400.0,
-            "52": 2563430400.0,
-            "53": 2563430400.0,
-            "54": 2563430400.0,
-            "55": 2563430400.0,
-            "56": 2563430400.0,
-            "57": 2563430400.0,
-            "58": 2563430400.0,
-            "59": 2563430400.0,
-            "60": 2563430400.0,
-            "61": 2563430400.0,
-            "62": 2563430400.0,
-            "63": 2563430400.0,
-            "64": 2563430400.0,
-            "65": 2563430400.0,
-            "66": 2563430400.0,
-            "67": 2563430400.0,
-            "68": 2563430400.0,
-            "69": 2563430400.0,
-            "70": 2563430400.0,
-            "71": 2563430400.0,
-            "72": 2563430400.0,
-            "73": 2563430400.0,
-            "74": 2563430400.0,
-            "75": 2563430400.0,
-            "76": 2563430400.0,
-            "77": 2563430400.0,
-            "78": 2563430400.0,
-            "79": 2563430400.0,
-            "80": 2563430400.0,
-            "81": 2563430400.0,
-            "82": 2563430400.0,
-            "83": 2563430400.0,
-            "84": 2563430400.0,
-            "85": 2563430400.0,
-            "86": 2563430400.0,
-            "87": 2563430400.0,
-            "88": 2563430400.0,
-            "89": 2563430400.0,
-            "90": 2563430400.0,
-            "91": 2563430400.0,
-            "92": 2563430400.0,
-            "93": 2563430400.0,
-            "94": 2563430400.0,
-            "95": 2563430400.0,
-            "96": 2563430400.0,
-            "97": 2563430400.0,
-            "98": 2563430400.0,
-            "99": 2563430400.0,
-            "100": 2563430400.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 6.36321,
-            "2": 0.1218,
-            "3": 0.11132,
-            "4": 0.10707,
-            "5": 0.0969,
-            "6": 0.09387,
-            "7": 0.09166,
-            "8": 0.09482,
-            "9": 0.09368,
-            "10": 0.09371,
-            "11": 0.0914,
-            "12": 0.09315,
-            "13": 0.09323,
-            "14": 0.09407,
-            "15": 0.09341,
-            "16": 0.09525,
-            "17": 0.09338,
-            "18": 0.09247,
-            "19": 0.09648,
-            "20": 0.09425,
-            "21": 0.09329,
-            "22": 0.09356,
-            "23": 0.09379,
-            "24": 0.09405,
-            "25": 0.0935,
-            "26": 0.09238,
-            "27": 0.09612,
-            "28": 0.09315,
-            "29": 0.09297,
-            "30": 0.09342,
-            "31": 0.09294,
-            "32": 0.09287,
-            "33": 0.09256,
-            "34": 0.09461,
-            "35": 0.09355,
-            "36": 0.09517,
-            "37": 0.09434,
-            "38": 0.0956,
-            "39": 0.09435,
-            "40": 0.09568,
-            "41": 0.09615,
-            "42": 0.09244,
-            "43": 0.09364,
-            "44": 0.09376,
-            "45": 0.09258,
-            "46": 0.09268,
-            "47": 0.09255,
-            "48": 0.09424,
-            "49": 0.09573,
-            "50": 0.09436,
-            "51": 0.0945,
-            "52": 0.09894,
-            "53": 0.09918,
-            "54": 0.09823,
-            "55": 0.09863,
-            "56": 0.09834,
-            "57": 0.09709,
-            "58": 0.09303,
-            "59": 0.09404,
-            "60": 0.09192,
-            "61": 0.09198,
-            "62": 0.09274,
-            "63": 0.09166,
-            "64": 0.09147,
-            "65": 0.09327,
-            "66": 0.11015,
-            "67": 0.09684,
-            "68": 0.09642,
-            "69": 0.09562,
-            "70": 0.0924,
-            "71": 0.09384,
-            "72": 0.09189,
-            "73": 0.09372,
-            "74": 0.09193,
-            "75": 0.09409,
-            "76": 0.09252,
-            "77": 0.09275,
-            "78": 0.09475,
-            "79": 0.0945,
-            "80": 0.10107,
-            "81": 0.09197,
-            "82": 0.09204,
-            "83": 0.09353,
-            "84": 0.09326,
-            "85": 0.09194,
-            "86": 0.1029,
-            "87": 0.09285,
-            "88": 0.09168,
-            "89": 0.09478,
-            "90": 0.09254,
-            "91": 0.0921,
-            "92": 0.09246,
-            "93": 0.09207,
-            "94": 0.09324,
-            "95": 0.09431,
-            "96": 0.09195,
-            "97": 0.09285,
-            "98": 0.09175,
-            "99": 0.09153,
-            "100": 0.11457
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings/golden_values_dev_dgx_h100.json
index 8056e7174f0..b0474f2f8ec 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings/golden_values_dev_dgx_h100.json
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 10.43555,
-            "2": 0.12658,
-            "3": 0.11069,
-            "4": 0.10147,
-            "5": 0.10118,
-            "6": 0.10108,
-            "7": 0.10059,
-            "8": 0.09885,
-            "9": 0.10197,
-            "10": 0.10148,
-            "11": 0.10092,
-            "12": 0.10046,
+            "1": 10.39748,
+            "2": 0.11699,
+            "3": 0.10324,
+            "4": 0.10602,
+            "5": 0.10273,
+            "6": 0.10169,
+            "7": 0.10402,
+            "8": 0.10582,
+            "9": 0.10893,
+            "10": 0.10156,
+            "11": 0.10006,
+            "12": 0.10034,
             "13": 0.10111,
-            "14": 0.10211,
-            "15": 0.10226,
-            "16": 0.10138,
-            "17": 0.10161,
-            "18": 0.10294,
-            "19": 0.10161,
-            "20": 0.10231,
-            "21": 0.10295,
-            "22": 0.10337,
-            "23": 0.10219,
-            "24": 0.10301,
-            "25": 0.10137,
-            "26": 0.10266,
-            "27": 0.10223,
-            "28": 0.10298,
-            "29": 0.1033,
-            "30": 0.1033,
-            "31": 0.10269,
-            "32": 0.1022,
-            "33": 0.10279,
-            "34": 0.1017,
-            "35": 0.1017,
-            "36": 0.10155,
-            "37": 0.1018,
-            "38": 0.10278,
-            "39": 0.10226,
-            "40": 0.10208,
-            "41": 0.10264,
-            "42": 0.10119,
-            "43": 0.10372,
-            "44": 0.10116,
-            "45": 0.1015,
-            "46": 0.09996,
-            "47": 0.10089,
-            "48": 0.10148,
-            "49": 0.10042,
-            "50": 0.09948,
-            "51": 0.10234,
-            "52": 0.10011,
-            "53": 0.09939,
-            "54": 0.09905,
-            "55": 0.1003,
-            "56": 0.09964,
-            "57": 0.10028,
-            "58": 0.10099,
-            "59": 0.09982,
-            "60": 0.09923,
-            "61": 0.09876,
-            "62": 0.09945,
-            "63": 0.10026,
-            "64": 0.09913,
-            "65": 0.09908,
-            "66": 0.10039,
-            "67": 0.10115,
-            "68": 0.10055,
-            "69": 0.09942,
-            "70": 0.09949,
-            "71": 0.09986,
-            "72": 0.10015,
-            "73": 0.10084,
-            "74": 0.10077,
-            "75": 0.09933,
-            "76": 0.10121,
-            "77": 0.09959,
-            "78": 0.09938,
-            "79": 0.0991,
-            "80": 0.09802,
-            "81": 0.10115,
-            "82": 0.09939,
-            "83": 0.09963,
-            "84": 0.0992,
-            "85": 0.09904,
-            "86": 0.1026,
-            "87": 0.09983,
-            "88": 0.10128,
-            "89": 0.09897,
-            "90": 0.09918,
-            "91": 0.10029,
-            "92": 0.09877,
-            "93": 0.09988,
-            "94": 0.09933,
-            "95": 0.10109,
-            "96": 0.10013,
-            "97": 0.10103,
-            "98": 0.10004,
-            "99": 0.09987,
-            "100": 0.09979
+            "14": 0.10835,
+            "15": 0.10198,
+            "16": 0.10295,
+            "17": 0.10379,
+            "18": 0.10096,
+            "19": 0.10678,
+            "20": 0.10208,
+            "21": 0.10213,
+            "22": 0.10179,
+            "23": 0.10357,
+            "24": 0.10282,
+            "25": 0.09979,
+            "26": 0.10143,
+            "27": 0.10197,
+            "28": 0.10127,
+            "29": 0.10116,
+            "30": 0.10243,
+            "31": 0.10107,
+            "32": 0.10147,
+            "33": 0.10181,
+            "34": 0.1038,
+            "35": 0.10095,
+            "36": 0.09889,
+            "37": 0.09992,
+            "38": 0.10001,
+            "39": 0.10006,
+            "40": 0.10004,
+            "41": 0.09886,
+            "42": 0.09836,
+            "43": 0.09974,
+            "44": 0.10016,
+            "45": 0.10004,
+            "46": 0.09945,
+            "47": 0.0989,
+            "48": 0.09882,
+            "49": 0.09906,
+            "50": 0.09893,
+            "51": 0.10108,
+            "52": 0.10571,
+            "53": 0.10114,
+            "54": 0.09935,
+            "55": 0.09893,
+            "56": 0.09871,
+            "57": 0.10568,
+            "58": 0.09952,
+            "59": 0.10185,
+            "60": 0.09937,
+            "61": 0.09902,
+            "62": 0.10469,
+            "63": 0.10029,
+            "64": 0.09881,
+            "65": 0.09927,
+            "66": 0.09932,
+            "67": 0.10538,
+            "68": 0.09988,
+            "69": 0.10144,
+            "70": 0.09918,
+            "71": 0.10686,
+            "72": 0.09922,
+            "73": 0.09936,
+            "74": 0.09915,
+            "75": 0.09862,
+            "76": 0.1068,
+            "77": 0.09885,
+            "78": 0.09998,
+            "79": 0.1002,
+            "80": 0.09911,
+            "81": 0.10038,
+            "82": 0.09931,
+            "83": 0.09871,
+            "84": 0.09987,
+            "85": 0.09983,
+            "86": 0.10014,
+            "87": 0.0994,
+            "88": 0.09924,
+            "89": 0.10058,
+            "90": 0.10033,
+            "91": 0.10009,
+            "92": 0.10037,
+            "93": 0.09877,
+            "94": 0.09968,
+            "95": 0.10011,
+            "96": 0.09929,
+            "97": 0.09969,
+            "98": 0.09929,
+            "99": 0.10037,
+            "100": 0.10155
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 482e2d753b9..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.85787,
-            "2": 10.87336,
-            "3": 10.86821,
-            "4": 10.87255,
-            "5": 10.87398,
-            "6": 10.89631,
-            "7": 10.86379,
-            "8": 10.87834,
-            "9": 10.87399,
-            "10": 10.83714,
-            "11": 10.86988,
-            "12": 10.85947,
-            "13": 10.87777,
-            "14": 10.87924,
-            "15": 10.81888,
-            "16": 10.83058,
-            "17": 10.78684,
-            "18": 10.80146,
-            "19": 10.79775,
-            "20": 10.71155,
-            "21": 10.6865,
-            "22": 10.55277,
-            "23": 10.7014,
-            "24": 10.58527,
-            "25": 10.52658,
-            "26": 10.58299,
-            "27": 10.59487,
-            "28": 10.54787,
-            "29": 10.55928,
-            "30": 10.32818,
-            "31": 10.08272,
-            "32": 10.44699,
-            "33": 10.42755,
-            "34": 10.17932,
-            "35": 10.24095,
-            "36": 10.18094,
-            "37": 10.32809,
-            "38": 10.16727,
-            "39": 10.37344,
-            "40": 10.05079,
-            "41": 10.10728,
-            "42": 10.17799,
-            "43": 9.77846,
-            "44": 9.91207,
-            "45": 9.77392,
-            "46": 9.75431,
-            "47": 10.09497,
-            "48": 9.79523,
-            "49": 9.46391,
-            "50": 9.8673,
-            "51": 9.80381,
-            "52": 9.68202,
-            "53": 10.02345,
-            "54": 9.91634,
-            "55": 9.82456,
-            "56": 9.56974,
-            "57": 9.42672,
-            "58": 9.78081,
-            "59": 9.53243,
-            "60": 9.44593,
-            "61": 9.64254,
-            "62": 9.94293,
-            "63": 9.31764,
-            "64": 9.72548,
-            "65": 8.88739,
-            "66": 9.65691,
-            "67": 9.31749,
-            "68": 9.73495,
-            "69": 9.74866,
-            "70": 9.69625,
-            "71": 9.57689,
-            "72": 9.52422,
-            "73": 9.45595,
-            "74": 8.88269,
-            "75": 9.37584,
-            "76": 9.01136,
-            "77": 10.02287,
-            "78": 9.67963,
-            "79": 9.33172,
-            "80": 9.35826,
-            "81": 9.43394,
-            "82": 9.65054,
-            "83": 9.25503,
-            "84": 9.3714,
-            "85": 9.5623,
-            "86": 9.03489,
-            "87": 9.54614,
-            "88": 9.69785,
-            "89": 9.54656,
-            "90": 9.77624,
-            "91": 9.2884,
-            "92": 9.30662,
-            "93": 9.02647,
-            "94": 8.78837,
-            "95": 9.48027,
-            "96": 9.47974,
-            "97": 9.25611,
-            "98": 9.61949,
-            "99": 8.83824,
-            "100": 9.35135
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1858.0,
-            "2": 1854.0,
-            "3": 1803.0,
-            "4": 1955.0,
-            "5": 2000.0,
-            "6": 2036.0,
-            "7": 1932.0,
-            "8": 1791.0,
-            "9": 1935.0,
-            "10": 1654.0,
-            "11": 2080.0,
-            "12": 1881.0,
-            "13": 1977.0,
-            "14": 2080.0,
-            "15": 1957.0,
-            "16": 1910.0,
-            "17": 1974.0,
-            "18": 1896.0,
-            "19": 1955.0,
-            "20": 1816.0,
-            "21": 1906.0,
-            "22": 1972.0,
-            "23": 2062.0,
-            "24": 1897.0,
-            "25": 1830.0,
-            "26": 1788.0,
-            "27": 1849.0,
-            "28": 2008.0,
-            "29": 2128.0,
-            "30": 1969.0,
-            "31": 1630.0,
-            "32": 2057.0,
-            "33": 2171.0,
-            "34": 1947.0,
-            "35": 2097.0,
-            "36": 1972.0,
-            "37": 2348.0,
-            "38": 2186.0,
-            "39": 2378.0,
-            "40": 2181.0,
-            "41": 2326.0,
-            "42": 2334.0,
-            "43": 2219.0,
-            "44": 2234.0,
-            "45": 2231.0,
-            "46": 2229.0,
-            "47": 2449.0,
-            "48": 2439.0,
-            "49": 2159.0,
-            "50": 2290.0,
-            "51": 2514.0,
-            "52": 2513.0,
-            "53": 2894.0,
-            "54": 2656.0,
-            "55": 2348.0,
-            "56": 2506.0,
-            "57": 2501.0,
-            "58": 2770.0,
-            "59": 2681.0,
-            "60": 2434.0,
-            "61": 2776.0,
-            "62": 2596.0,
-            "63": 2617.0,
-            "64": 3012.0,
-            "65": 2657.0,
-            "66": 2947.0,
-            "67": 3089.0,
-            "68": 2818.0,
-            "69": 2909.0,
-            "70": 3025.0,
-            "71": 2924.0,
-            "72": 2702.0,
-            "73": 2947.0,
-            "74": 2306.0,
-            "75": 2791.0,
-            "76": 3093.0,
-            "77": 3107.0,
-            "78": 3134.0,
-            "79": 3205.0,
-            "80": 3123.0,
-            "81": 3290.0,
-            "82": 3172.0,
-            "83": 2719.0,
-            "84": 3328.0,
-            "85": 3255.0,
-            "86": 2546.0,
-            "87": 3472.0,
-            "88": 3068.0,
-            "89": 2953.0,
-            "90": 3300.0,
-            "91": 3154.0,
-            "92": 3061.0,
-            "93": 2889.0,
-            "94": 3535.0,
-            "95": 3078.0,
-            "96": 3181.0,
-            "97": 3135.0,
-            "98": 3569.0,
-            "99": 3319.0,
-            "100": 3223.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 921653248.0,
-            "2": 921653248.0,
-            "3": 921653248.0,
-            "4": 921653248.0,
-            "5": 921653248.0,
-            "6": 921653248.0,
-            "7": 921653248.0,
-            "8": 921653248.0,
-            "9": 921653248.0,
-            "10": 921653248.0,
-            "11": 921653248.0,
-            "12": 921653248.0,
-            "13": 921653248.0,
-            "14": 921653248.0,
-            "15": 921653248.0,
-            "16": 921653248.0,
-            "17": 921653248.0,
-            "18": 921653248.0,
-            "19": 921653248.0,
-            "20": 921653248.0,
-            "21": 921653248.0,
-            "22": 921653248.0,
-            "23": 921653248.0,
-            "24": 921653248.0,
-            "25": 921653248.0,
-            "26": 921653248.0,
-            "27": 921653248.0,
-            "28": 921653248.0,
-            "29": 921653248.0,
-            "30": 921653248.0,
-            "31": 921653248.0,
-            "32": 921653248.0,
-            "33": 921653248.0,
-            "34": 921653248.0,
-            "35": 921653248.0,
-            "36": 921653248.0,
-            "37": 921653248.0,
-            "38": 921653248.0,
-            "39": 921653248.0,
-            "40": 921653248.0,
-            "41": 921653248.0,
-            "42": 921653248.0,
-            "43": 921653248.0,
-            "44": 921653248.0,
-            "45": 921653248.0,
-            "46": 921653248.0,
-            "47": 921653248.0,
-            "48": 921653248.0,
-            "49": 921653248.0,
-            "50": 921653248.0,
-            "51": 921653248.0,
-            "52": 921653248.0,
-            "53": 921653248.0,
-            "54": 921653248.0,
-            "55": 921653248.0,
-            "56": 921653248.0,
-            "57": 921653248.0,
-            "58": 921653248.0,
-            "59": 921653248.0,
-            "60": 921653248.0,
-            "61": 921653248.0,
-            "62": 921653248.0,
-            "63": 921653248.0,
-            "64": 921653248.0,
-            "65": 921653248.0,
-            "66": 921653248.0,
-            "67": 921653248.0,
-            "68": 921653248.0,
-            "69": 921653248.0,
-            "70": 921653248.0,
-            "71": 921653248.0,
-            "72": 921653248.0,
-            "73": 921653248.0,
-            "74": 921653248.0,
-            "75": 921653248.0,
-            "76": 921653248.0,
-            "77": 921653248.0,
-            "78": 921653248.0,
-            "79": 921653248.0,
-            "80": 921653248.0,
-            "81": 921653248.0,
-            "82": 921653248.0,
-            "83": 921653248.0,
-            "84": 921653248.0,
-            "85": 921653248.0,
-            "86": 921653248.0,
-            "87": 921653248.0,
-            "88": 921653248.0,
-            "89": 921653248.0,
-            "90": 921653248.0,
-            "91": 921653248.0,
-            "92": 921653248.0,
-            "93": 921653248.0,
-            "94": 921653248.0,
-            "95": 921653248.0,
-            "96": 921653248.0,
-            "97": 921653248.0,
-            "98": 921653248.0,
-            "99": 921653248.0,
-            "100": 921653248.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 2237722624.0,
-            "2": 2600334336.0,
-            "3": 2600334336.0,
-            "4": 2600334336.0,
-            "5": 2600334336.0,
-            "6": 2600334336.0,
-            "7": 2600334336.0,
-            "8": 2600334336.0,
-            "9": 2600334336.0,
-            "10": 2600334336.0,
-            "11": 2600334336.0,
-            "12": 2600334336.0,
-            "13": 2600334336.0,
-            "14": 2600334336.0,
-            "15": 2600334336.0,
-            "16": 2600334336.0,
-            "17": 2600334336.0,
-            "18": 2600334336.0,
-            "19": 2600334336.0,
-            "20": 2600334336.0,
-            "21": 2600334336.0,
-            "22": 2600334336.0,
-            "23": 2600334336.0,
-            "24": 2600334336.0,
-            "25": 2600334336.0,
-            "26": 2600334336.0,
-            "27": 2600334336.0,
-            "28": 2600334336.0,
-            "29": 2600334336.0,
-            "30": 2600334336.0,
-            "31": 2600334336.0,
-            "32": 2600334336.0,
-            "33": 2600334336.0,
-            "34": 2600334336.0,
-            "35": 2600334336.0,
-            "36": 2600334336.0,
-            "37": 2600334336.0,
-            "38": 2600334336.0,
-            "39": 2600334336.0,
-            "40": 2600334336.0,
-            "41": 2600334336.0,
-            "42": 2600334336.0,
-            "43": 2600334336.0,
-            "44": 2600334336.0,
-            "45": 2600334336.0,
-            "46": 2600334336.0,
-            "47": 2600334336.0,
-            "48": 2600334336.0,
-            "49": 2600334336.0,
-            "50": 2600334336.0,
-            "51": 2600334336.0,
-            "52": 2600334336.0,
-            "53": 2600334336.0,
-            "54": 2600334336.0,
-            "55": 2600334336.0,
-            "56": 2600334336.0,
-            "57": 2600334336.0,
-            "58": 2600334336.0,
-            "59": 2600334336.0,
-            "60": 2600334336.0,
-            "61": 2600334336.0,
-            "62": 2600334336.0,
-            "63": 2600334336.0,
-            "64": 2600334336.0,
-            "65": 2600334336.0,
-            "66": 2600334336.0,
-            "67": 2600334336.0,
-            "68": 2600334336.0,
-            "69": 2600334336.0,
-            "70": 2600334336.0,
-            "71": 2600334336.0,
-            "72": 2600334336.0,
-            "73": 2600334336.0,
-            "74": 2600334336.0,
-            "75": 2600334336.0,
-            "76": 2600334336.0,
-            "77": 2600334336.0,
-            "78": 2600334336.0,
-            "79": 2600334336.0,
-            "80": 2600334336.0,
-            "81": 2600334336.0,
-            "82": 2600334336.0,
-            "83": 2600334336.0,
-            "84": 2600334336.0,
-            "85": 2600334336.0,
-            "86": 2600334336.0,
-            "87": 2600334336.0,
-            "88": 2600334336.0,
-            "89": 2600334336.0,
-            "90": 2600334336.0,
-            "91": 2600334336.0,
-            "92": 2600334336.0,
-            "93": 2600334336.0,
-            "94": 2600334336.0,
-            "95": 2600334336.0,
-            "96": 2600334336.0,
-            "97": 2600334336.0,
-            "98": 2600334336.0,
-            "99": 2600334336.0,
-            "100": 2600334336.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 9.95491,
-            "2": 0.12886,
-            "3": 0.09196,
-            "4": 0.09036,
-            "5": 0.0891,
-            "6": 0.08806,
-            "7": 0.08916,
-            "8": 0.08903,
-            "9": 0.08912,
-            "10": 0.08738,
-            "11": 0.08775,
-            "12": 0.08738,
-            "13": 0.08675,
-            "14": 0.08535,
-            "15": 0.08586,
-            "16": 0.0851,
-            "17": 0.08505,
-            "18": 0.08481,
-            "19": 0.08648,
-            "20": 0.08679,
-            "21": 0.08735,
-            "22": 0.08776,
-            "23": 0.0857,
-            "24": 0.0851,
-            "25": 0.08801,
-            "26": 0.08761,
-            "27": 0.08685,
-            "28": 0.08721,
-            "29": 0.08807,
-            "30": 0.08783,
-            "31": 0.08825,
-            "32": 0.08805,
-            "33": 0.08749,
-            "34": 0.08564,
-            "35": 0.085,
-            "36": 0.08606,
-            "37": 0.08494,
-            "38": 0.08477,
-            "39": 0.08603,
-            "40": 0.08627,
-            "41": 0.08694,
-            "42": 0.08578,
-            "43": 0.08584,
-            "44": 0.08577,
-            "45": 0.08596,
-            "46": 0.08538,
-            "47": 0.0862,
-            "48": 0.08574,
-            "49": 0.08854,
-            "50": 0.08527,
-            "51": 0.09439,
-            "52": 0.08466,
-            "53": 0.08545,
-            "54": 0.08497,
-            "55": 0.08493,
-            "56": 0.08787,
-            "57": 0.08631,
-            "58": 0.08602,
-            "59": 0.08587,
-            "60": 0.0854,
-            "61": 0.08742,
-            "62": 0.0911,
-            "63": 0.09274,
-            "64": 0.08551,
-            "65": 0.08568,
-            "66": 0.0853,
-            "67": 0.08594,
-            "68": 0.08625,
-            "69": 0.08637,
-            "70": 0.08573,
-            "71": 0.08555,
-            "72": 0.0872,
-            "73": 0.08585,
-            "74": 0.08614,
-            "75": 0.08597,
-            "76": 0.08636,
-            "77": 0.08583,
-            "78": 0.08519,
-            "79": 0.0856,
-            "80": 0.08653,
-            "81": 0.08552,
-            "82": 0.08602,
-            "83": 0.08556,
-            "84": 0.08482,
-            "85": 0.08554,
-            "86": 0.08706,
-            "87": 0.08629,
-            "88": 0.08512,
-            "89": 0.08574,
-            "90": 0.08568,
-            "91": 0.08531,
-            "92": 0.08556,
-            "93": 0.08519,
-            "94": 0.08579,
-            "95": 0.0868,
-            "96": 0.08804,
-            "97": 0.08724,
-            "98": 0.08666,
-            "99": 0.08515,
-            "100": 0.08511
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index b0474f2f8ec..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.85787,
-            "2": 10.87336,
-            "3": 10.86821,
-            "4": 10.87255,
-            "5": 10.87398,
-            "6": 10.89631,
-            "7": 10.86379,
-            "8": 10.87834,
-            "9": 10.87399,
-            "10": 10.83714,
-            "11": 10.86988,
-            "12": 10.85947,
-            "13": 10.87777,
-            "14": 10.87924,
-            "15": 10.81888,
-            "16": 10.83058,
-            "17": 10.78684,
-            "18": 10.80146,
-            "19": 10.79775,
-            "20": 10.71155,
-            "21": 10.6865,
-            "22": 10.55277,
-            "23": 10.7014,
-            "24": 10.58527,
-            "25": 10.52658,
-            "26": 10.58299,
-            "27": 10.59487,
-            "28": 10.54787,
-            "29": 10.55928,
-            "30": 10.32818,
-            "31": 10.08272,
-            "32": 10.44699,
-            "33": 10.42755,
-            "34": 10.17932,
-            "35": 10.24095,
-            "36": 10.18094,
-            "37": 10.32809,
-            "38": 10.16727,
-            "39": 10.37344,
-            "40": 10.05079,
-            "41": 10.10728,
-            "42": 10.17799,
-            "43": 9.77846,
-            "44": 9.91207,
-            "45": 9.77392,
-            "46": 9.75431,
-            "47": 10.09497,
-            "48": 9.79523,
-            "49": 9.46391,
-            "50": 9.8673,
-            "51": 9.80381,
-            "52": 9.68202,
-            "53": 10.02345,
-            "54": 9.91634,
-            "55": 9.82456,
-            "56": 9.56974,
-            "57": 9.42672,
-            "58": 9.78081,
-            "59": 9.53243,
-            "60": 9.44593,
-            "61": 9.64254,
-            "62": 9.94293,
-            "63": 9.31764,
-            "64": 9.72548,
-            "65": 8.88739,
-            "66": 9.65691,
-            "67": 9.31749,
-            "68": 9.73495,
-            "69": 9.74866,
-            "70": 9.69625,
-            "71": 9.57689,
-            "72": 9.52422,
-            "73": 9.45595,
-            "74": 8.88269,
-            "75": 9.37584,
-            "76": 9.01136,
-            "77": 10.02287,
-            "78": 9.67963,
-            "79": 9.33172,
-            "80": 9.35826,
-            "81": 9.43394,
-            "82": 9.65054,
-            "83": 9.25503,
-            "84": 9.3714,
-            "85": 9.5623,
-            "86": 9.03489,
-            "87": 9.54614,
-            "88": 9.69785,
-            "89": 9.54656,
-            "90": 9.77624,
-            "91": 9.2884,
-            "92": 9.30662,
-            "93": 9.02647,
-            "94": 8.78837,
-            "95": 9.48027,
-            "96": 9.47974,
-            "97": 9.25611,
-            "98": 9.61949,
-            "99": 8.83824,
-            "100": 9.35135
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1858.0,
-            "2": 1854.0,
-            "3": 1803.0,
-            "4": 1955.0,
-            "5": 2000.0,
-            "6": 2036.0,
-            "7": 1932.0,
-            "8": 1791.0,
-            "9": 1935.0,
-            "10": 1654.0,
-            "11": 2080.0,
-            "12": 1881.0,
-            "13": 1977.0,
-            "14": 2080.0,
-            "15": 1957.0,
-            "16": 1910.0,
-            "17": 1974.0,
-            "18": 1896.0,
-            "19": 1955.0,
-            "20": 1816.0,
-            "21": 1906.0,
-            "22": 1972.0,
-            "23": 2062.0,
-            "24": 1897.0,
-            "25": 1830.0,
-            "26": 1788.0,
-            "27": 1849.0,
-            "28": 2008.0,
-            "29": 2128.0,
-            "30": 1969.0,
-            "31": 1630.0,
-            "32": 2057.0,
-            "33": 2171.0,
-            "34": 1947.0,
-            "35": 2097.0,
-            "36": 1972.0,
-            "37": 2348.0,
-            "38": 2186.0,
-            "39": 2378.0,
-            "40": 2181.0,
-            "41": 2326.0,
-            "42": 2334.0,
-            "43": 2219.0,
-            "44": 2234.0,
-            "45": 2231.0,
-            "46": 2229.0,
-            "47": 2449.0,
-            "48": 2439.0,
-            "49": 2159.0,
-            "50": 2290.0,
-            "51": 2514.0,
-            "52": 2513.0,
-            "53": 2894.0,
-            "54": 2656.0,
-            "55": 2348.0,
-            "56": 2506.0,
-            "57": 2501.0,
-            "58": 2770.0,
-            "59": 2681.0,
-            "60": 2434.0,
-            "61": 2776.0,
-            "62": 2596.0,
-            "63": 2617.0,
-            "64": 3012.0,
-            "65": 2657.0,
-            "66": 2947.0,
-            "67": 3089.0,
-            "68": 2818.0,
-            "69": 2909.0,
-            "70": 3025.0,
-            "71": 2924.0,
-            "72": 2702.0,
-            "73": 2947.0,
-            "74": 2306.0,
-            "75": 2791.0,
-            "76": 3093.0,
-            "77": 3107.0,
-            "78": 3134.0,
-            "79": 3205.0,
-            "80": 3123.0,
-            "81": 3290.0,
-            "82": 3172.0,
-            "83": 2719.0,
-            "84": 3328.0,
-            "85": 3255.0,
-            "86": 2546.0,
-            "87": 3472.0,
-            "88": 3068.0,
-            "89": 2953.0,
-            "90": 3300.0,
-            "91": 3154.0,
-            "92": 3061.0,
-            "93": 2889.0,
-            "94": 3535.0,
-            "95": 3078.0,
-            "96": 3181.0,
-            "97": 3135.0,
-            "98": 3569.0,
-            "99": 3319.0,
-            "100": 3223.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 921653248.0,
-            "2": 921653248.0,
-            "3": 921653248.0,
-            "4": 921653248.0,
-            "5": 921653248.0,
-            "6": 921653248.0,
-            "7": 921653248.0,
-            "8": 921653248.0,
-            "9": 921653248.0,
-            "10": 921653248.0,
-            "11": 921653248.0,
-            "12": 921653248.0,
-            "13": 921653248.0,
-            "14": 921653248.0,
-            "15": 921653248.0,
-            "16": 921653248.0,
-            "17": 921653248.0,
-            "18": 921653248.0,
-            "19": 921653248.0,
-            "20": 921653248.0,
-            "21": 921653248.0,
-            "22": 921653248.0,
-            "23": 921653248.0,
-            "24": 921653248.0,
-            "25": 921653248.0,
-            "26": 921653248.0,
-            "27": 921653248.0,
-            "28": 921653248.0,
-            "29": 921653248.0,
-            "30": 921653248.0,
-            "31": 921653248.0,
-            "32": 921653248.0,
-            "33": 921653248.0,
-            "34": 921653248.0,
-            "35": 921653248.0,
-            "36": 921653248.0,
-            "37": 921653248.0,
-            "38": 921653248.0,
-            "39": 921653248.0,
-            "40": 921653248.0,
-            "41": 921653248.0,
-            "42": 921653248.0,
-            "43": 921653248.0,
-            "44": 921653248.0,
-            "45": 921653248.0,
-            "46": 921653248.0,
-            "47": 921653248.0,
-            "48": 921653248.0,
-            "49": 921653248.0,
-            "50": 921653248.0,
-            "51": 921653248.0,
-            "52": 921653248.0,
-            "53": 921653248.0,
-            "54": 921653248.0,
-            "55": 921653248.0,
-            "56": 921653248.0,
-            "57": 921653248.0,
-            "58": 921653248.0,
-            "59": 921653248.0,
-            "60": 921653248.0,
-            "61": 921653248.0,
-            "62": 921653248.0,
-            "63": 921653248.0,
-            "64": 921653248.0,
-            "65": 921653248.0,
-            "66": 921653248.0,
-            "67": 921653248.0,
-            "68": 921653248.0,
-            "69": 921653248.0,
-            "70": 921653248.0,
-            "71": 921653248.0,
-            "72": 921653248.0,
-            "73": 921653248.0,
-            "74": 921653248.0,
-            "75": 921653248.0,
-            "76": 921653248.0,
-            "77": 921653248.0,
-            "78": 921653248.0,
-            "79": 921653248.0,
-            "80": 921653248.0,
-            "81": 921653248.0,
-            "82": 921653248.0,
-            "83": 921653248.0,
-            "84": 921653248.0,
-            "85": 921653248.0,
-            "86": 921653248.0,
-            "87": 921653248.0,
-            "88": 921653248.0,
-            "89": 921653248.0,
-            "90": 921653248.0,
-            "91": 921653248.0,
-            "92": 921653248.0,
-            "93": 921653248.0,
-            "94": 921653248.0,
-            "95": 921653248.0,
-            "96": 921653248.0,
-            "97": 921653248.0,
-            "98": 921653248.0,
-            "99": 921653248.0,
-            "100": 921653248.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 2237722624.0,
-            "2": 2600334336.0,
-            "3": 2600334336.0,
-            "4": 2600334336.0,
-            "5": 2600334336.0,
-            "6": 2600334336.0,
-            "7": 2600334336.0,
-            "8": 2600334336.0,
-            "9": 2600334336.0,
-            "10": 2600334336.0,
-            "11": 2600334336.0,
-            "12": 2600334336.0,
-            "13": 2600334336.0,
-            "14": 2600334336.0,
-            "15": 2600334336.0,
-            "16": 2600334336.0,
-            "17": 2600334336.0,
-            "18": 2600334336.0,
-            "19": 2600334336.0,
-            "20": 2600334336.0,
-            "21": 2600334336.0,
-            "22": 2600334336.0,
-            "23": 2600334336.0,
-            "24": 2600334336.0,
-            "25": 2600334336.0,
-            "26": 2600334336.0,
-            "27": 2600334336.0,
-            "28": 2600334336.0,
-            "29": 2600334336.0,
-            "30": 2600334336.0,
-            "31": 2600334336.0,
-            "32": 2600334336.0,
-            "33": 2600334336.0,
-            "34": 2600334336.0,
-            "35": 2600334336.0,
-            "36": 2600334336.0,
-            "37": 2600334336.0,
-            "38": 2600334336.0,
-            "39": 2600334336.0,
-            "40": 2600334336.0,
-            "41": 2600334336.0,
-            "42": 2600334336.0,
-            "43": 2600334336.0,
-            "44": 2600334336.0,
-            "45": 2600334336.0,
-            "46": 2600334336.0,
-            "47": 2600334336.0,
-            "48": 2600334336.0,
-            "49": 2600334336.0,
-            "50": 2600334336.0,
-            "51": 2600334336.0,
-            "52": 2600334336.0,
-            "53": 2600334336.0,
-            "54": 2600334336.0,
-            "55": 2600334336.0,
-            "56": 2600334336.0,
-            "57": 2600334336.0,
-            "58": 2600334336.0,
-            "59": 2600334336.0,
-            "60": 2600334336.0,
-            "61": 2600334336.0,
-            "62": 2600334336.0,
-            "63": 2600334336.0,
-            "64": 2600334336.0,
-            "65": 2600334336.0,
-            "66": 2600334336.0,
-            "67": 2600334336.0,
-            "68": 2600334336.0,
-            "69": 2600334336.0,
-            "70": 2600334336.0,
-            "71": 2600334336.0,
-            "72": 2600334336.0,
-            "73": 2600334336.0,
-            "74": 2600334336.0,
-            "75": 2600334336.0,
-            "76": 2600334336.0,
-            "77": 2600334336.0,
-            "78": 2600334336.0,
-            "79": 2600334336.0,
-            "80": 2600334336.0,
-            "81": 2600334336.0,
-            "82": 2600334336.0,
-            "83": 2600334336.0,
-            "84": 2600334336.0,
-            "85": 2600334336.0,
-            "86": 2600334336.0,
-            "87": 2600334336.0,
-            "88": 2600334336.0,
-            "89": 2600334336.0,
-            "90": 2600334336.0,
-            "91": 2600334336.0,
-            "92": 2600334336.0,
-            "93": 2600334336.0,
-            "94": 2600334336.0,
-            "95": 2600334336.0,
-            "96": 2600334336.0,
-            "97": 2600334336.0,
-            "98": 2600334336.0,
-            "99": 2600334336.0,
-            "100": 2600334336.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.39748,
-            "2": 0.11699,
-            "3": 0.10324,
-            "4": 0.10602,
-            "5": 0.10273,
-            "6": 0.10169,
-            "7": 0.10402,
-            "8": 0.10582,
-            "9": 0.10893,
-            "10": 0.10156,
-            "11": 0.10006,
-            "12": 0.10034,
-            "13": 0.10111,
-            "14": 0.10835,
-            "15": 0.10198,
-            "16": 0.10295,
-            "17": 0.10379,
-            "18": 0.10096,
-            "19": 0.10678,
-            "20": 0.10208,
-            "21": 0.10213,
-            "22": 0.10179,
-            "23": 0.10357,
-            "24": 0.10282,
-            "25": 0.09979,
-            "26": 0.10143,
-            "27": 0.10197,
-            "28": 0.10127,
-            "29": 0.10116,
-            "30": 0.10243,
-            "31": 0.10107,
-            "32": 0.10147,
-            "33": 0.10181,
-            "34": 0.1038,
-            "35": 0.10095,
-            "36": 0.09889,
-            "37": 0.09992,
-            "38": 0.10001,
-            "39": 0.10006,
-            "40": 0.10004,
-            "41": 0.09886,
-            "42": 0.09836,
-            "43": 0.09974,
-            "44": 0.10016,
-            "45": 0.10004,
-            "46": 0.09945,
-            "47": 0.0989,
-            "48": 0.09882,
-            "49": 0.09906,
-            "50": 0.09893,
-            "51": 0.10108,
-            "52": 0.10571,
-            "53": 0.10114,
-            "54": 0.09935,
-            "55": 0.09893,
-            "56": 0.09871,
-            "57": 0.10568,
-            "58": 0.09952,
-            "59": 0.10185,
-            "60": 0.09937,
-            "61": 0.09902,
-            "62": 0.10469,
-            "63": 0.10029,
-            "64": 0.09881,
-            "65": 0.09927,
-            "66": 0.09932,
-            "67": 0.10538,
-            "68": 0.09988,
-            "69": 0.10144,
-            "70": 0.09918,
-            "71": 0.10686,
-            "72": 0.09922,
-            "73": 0.09936,
-            "74": 0.09915,
-            "75": 0.09862,
-            "76": 0.1068,
-            "77": 0.09885,
-            "78": 0.09998,
-            "79": 0.1002,
-            "80": 0.09911,
-            "81": 0.10038,
-            "82": 0.09931,
-            "83": 0.09871,
-            "84": 0.09987,
-            "85": 0.09983,
-            "86": 0.10014,
-            "87": 0.0994,
-            "88": 0.09924,
-            "89": 0.10058,
-            "90": 0.10033,
-            "91": 0.10009,
-            "92": 0.10037,
-            "93": 0.09877,
-            "94": 0.09968,
-            "95": 0.10011,
-            "96": 0.09929,
-            "97": 0.09969,
-            "98": 0.09929,
-            "99": 0.10037,
-            "100": 0.10155
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_dev_dgx_h100.json
index 866cb310652..8655a61eb9b 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_dev_dgx_h100.json
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 10.22635,
-            "2": 0.13443,
-            "3": 0.11453,
-            "4": 0.11544,
-            "5": 0.11529,
-            "6": 0.1139,
-            "7": 0.11696,
-            "8": 0.11432,
-            "9": 0.11422,
-            "10": 0.11467,
-            "11": 0.1115,
-            "12": 0.11137,
-            "13": 0.11192,
-            "14": 0.1124,
-            "15": 0.11313,
-            "16": 0.11436,
-            "17": 0.11212,
-            "18": 0.11209,
-            "19": 0.11518,
-            "20": 0.11167,
-            "21": 0.11083,
-            "22": 0.11186,
-            "23": 0.11362,
-            "24": 0.11218,
-            "25": 0.1144,
-            "26": 0.11178,
-            "27": 0.11153,
-            "28": 0.11303,
-            "29": 0.11052,
-            "30": 0.11214,
-            "31": 0.1141,
-            "32": 0.1126,
-            "33": 0.11238,
-            "34": 0.1134,
-            "35": 0.11232,
-            "36": 0.11052,
-            "37": 0.11225,
-            "38": 0.1121,
-            "39": 0.113,
-            "40": 0.11315,
-            "41": 0.11169,
-            "42": 0.11263,
-            "43": 0.11419,
-            "44": 0.11234,
-            "45": 0.11091,
-            "46": 0.11336,
-            "47": 0.11328,
-            "48": 0.11388,
-            "49": 0.11279,
-            "50": 0.11198,
-            "51": 0.13191,
-            "52": 0.11591,
-            "53": 0.11273,
-            "54": 0.11461,
-            "55": 0.11358,
-            "56": 0.11259,
-            "57": 0.11325,
-            "58": 0.1162,
-            "59": 0.11491,
-            "60": 0.11726,
-            "61": 0.11465,
-            "62": 0.11311,
-            "63": 0.11801,
-            "64": 0.11752,
-            "65": 0.11546,
-            "66": 0.11225,
-            "67": 0.11448,
-            "68": 0.11548,
+            "1": 10.33977,
+            "2": 0.14663,
+            "3": 0.12463,
+            "4": 0.11901,
+            "5": 0.118,
+            "6": 0.11842,
+            "7": 0.11849,
+            "8": 0.11649,
+            "9": 0.11703,
+            "10": 0.11655,
+            "11": 0.11646,
+            "12": 0.11802,
+            "13": 0.11742,
+            "14": 0.1167,
+            "15": 0.11429,
+            "16": 0.11654,
+            "17": 0.11533,
+            "18": 0.11853,
+            "19": 0.1171,
+            "20": 0.11735,
+            "21": 0.11515,
+            "22": 0.11632,
+            "23": 0.11865,
+            "24": 0.11706,
+            "25": 0.11644,
+            "26": 0.11684,
+            "27": 0.11688,
+            "28": 0.11839,
+            "29": 0.11706,
+            "30": 0.11761,
+            "31": 0.11696,
+            "32": 0.11567,
+            "33": 0.1149,
+            "34": 0.11395,
+            "35": 0.11367,
+            "36": 0.11567,
+            "37": 0.11646,
+            "38": 0.11392,
+            "39": 0.11516,
+            "40": 0.11529,
+            "41": 0.11559,
+            "42": 0.11519,
+            "43": 0.11808,
+            "44": 0.11599,
+            "45": 0.11605,
+            "46": 0.11502,
+            "47": 0.11651,
+            "48": 0.11713,
+            "49": 0.11667,
+            "50": 0.11432,
+            "51": 0.12857,
+            "52": 0.12187,
+            "53": 0.11684,
+            "54": 0.11222,
+            "55": 0.11538,
+            "56": 0.11241,
+            "57": 0.11229,
+            "58": 0.11087,
+            "59": 0.11183,
+            "60": 0.11124,
+            "61": 0.11009,
+            "62": 0.11052,
+            "63": 0.11585,
+            "64": 0.11262,
+            "65": 0.11148,
+            "66": 0.11248,
+            "67": 0.11274,
+            "68": 0.11394,
             "69": 0.11397,
-            "70": 0.11275,
-            "71": 0.11441,
-            "72": 0.11487,
-            "73": 0.11522,
-            "74": 0.11426,
-            "75": 0.11345,
-            "76": 0.11269,
-            "77": 0.1157,
-            "78": 0.11597,
-            "79": 0.11379,
-            "80": 0.11587,
-            "81": 0.11486,
-            "82": 0.11305,
-            "83": 0.1127,
-            "84": 0.11361,
-            "85": 0.11384,
-            "86": 0.11703,
-            "87": 0.11426,
-            "88": 0.11283,
-            "89": 0.1146,
-            "90": 0.11235,
-            "91": 0.11207,
-            "92": 0.11217,
-            "93": 0.11286,
-            "94": 0.11446,
-            "95": 0.11504,
-            "96": 0.11469,
-            "97": 0.11241,
-            "98": 0.11333,
-            "99": 0.11104,
-            "100": 0.1126
+            "70": 0.11233,
+            "71": 0.11354,
+            "72": 0.11589,
+            "73": 0.11373,
+            "74": 0.11483,
+            "75": 0.11512,
+            "76": 0.11378,
+            "77": 0.11431,
+            "78": 0.11374,
+            "79": 0.11521,
+            "80": 0.11486,
+            "81": 0.11364,
+            "82": 0.11419,
+            "83": 0.11439,
+            "84": 0.11589,
+            "85": 0.11422,
+            "86": 0.11458,
+            "87": 0.11184,
+            "88": 0.11418,
+            "89": 0.11264,
+            "90": 0.11169,
+            "91": 0.11452,
+            "92": 0.11215,
+            "93": 0.11431,
+            "94": 0.11145,
+            "95": 0.11129,
+            "96": 0.11113,
+            "97": 0.11365,
+            "98": 0.11127,
+            "99": 0.11136,
+            "100": 0.11229
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 444ff2cd262..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.85902,
-            "2": 10.87345,
-            "3": 10.86919,
-            "4": 10.87273,
-            "5": 10.87389,
-            "6": 10.89658,
-            "7": 10.86387,
-            "8": 10.87869,
-            "9": 10.87439,
-            "10": 10.83846,
-            "11": 10.87012,
-            "12": 10.86011,
-            "13": 10.87824,
-            "14": 10.87935,
-            "15": 10.8191,
-            "16": 10.83109,
-            "17": 10.78722,
-            "18": 10.80215,
-            "19": 10.7983,
-            "20": 10.71224,
-            "21": 10.68683,
-            "22": 10.55402,
-            "23": 10.70111,
-            "24": 10.58621,
-            "25": 10.52673,
-            "26": 10.5837,
-            "27": 10.59499,
-            "28": 10.54816,
-            "29": 10.55965,
-            "30": 10.32899,
-            "31": 10.08331,
-            "32": 10.44752,
-            "33": 10.4278,
-            "34": 10.1796,
-            "35": 10.24121,
-            "36": 10.18155,
-            "37": 10.32827,
-            "38": 10.16792,
-            "39": 10.37357,
-            "40": 10.05111,
-            "41": 10.10708,
-            "42": 10.17823,
-            "43": 9.77867,
-            "44": 9.91197,
-            "45": 9.77404,
-            "46": 9.75415,
-            "47": 10.09501,
-            "48": 9.79531,
-            "49": 9.46422,
-            "50": 9.86729,
-            "51": 9.80375,
-            "52": 9.68218,
-            "53": 10.02348,
-            "54": 9.91595,
-            "55": 9.82442,
-            "56": 9.56994,
-            "57": 9.42628,
-            "58": 9.78075,
-            "59": 9.53254,
-            "60": 9.44561,
-            "61": 9.64249,
-            "62": 9.94298,
-            "63": 9.31745,
-            "64": 9.7256,
-            "65": 8.88735,
-            "66": 9.65711,
-            "67": 9.31747,
-            "68": 9.73506,
-            "69": 9.74863,
-            "70": 9.69601,
-            "71": 9.57682,
-            "72": 9.52425,
-            "73": 9.4558,
-            "74": 8.8826,
-            "75": 9.37563,
-            "76": 9.01106,
-            "77": 10.02278,
-            "78": 9.6796,
-            "79": 9.33171,
-            "80": 9.35836,
-            "81": 9.43399,
-            "82": 9.65055,
-            "83": 9.2551,
-            "84": 9.37131,
-            "85": 9.56237,
-            "86": 9.0351,
-            "87": 9.54617,
-            "88": 9.69806,
-            "89": 9.54657,
-            "90": 9.77627,
-            "91": 9.28858,
-            "92": 9.30652,
-            "93": 9.02646,
-            "94": 8.7883,
-            "95": 9.48041,
-            "96": 9.47962,
-            "97": 9.25545,
-            "98": 9.61947,
-            "99": 8.83854,
-            "100": 9.35116
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1862.0,
-            "2": 1874.0,
-            "3": 1748.0,
-            "4": 1955.0,
-            "5": 2050.0,
-            "6": 1997.0,
-            "7": 1967.0,
-            "8": 1853.0,
-            "9": 1965.0,
-            "10": 1652.0,
-            "11": 2042.0,
-            "12": 1877.0,
-            "13": 2076.0,
-            "14": 1956.0,
-            "15": 1953.0,
-            "16": 1915.0,
-            "17": 2045.0,
-            "18": 1965.0,
-            "19": 1988.0,
-            "20": 1785.0,
-            "21": 1941.0,
-            "22": 1928.0,
-            "23": 2112.0,
-            "24": 1802.0,
-            "25": 1933.0,
-            "26": 1786.0,
-            "27": 1945.0,
-            "28": 2037.0,
-            "29": 2119.0,
-            "30": 2022.0,
-            "31": 1699.0,
-            "32": 2130.0,
-            "33": 2187.0,
-            "34": 1929.0,
-            "35": 2092.0,
-            "36": 2109.0,
-            "37": 2362.0,
-            "38": 2211.0,
-            "39": 2383.0,
-            "40": 2203.0,
-            "41": 2288.0,
-            "42": 2224.0,
-            "43": 2150.0,
-            "44": 2206.0,
-            "45": 2187.0,
-            "46": 2181.0,
-            "47": 2260.0,
-            "48": 2341.0,
-            "49": 2210.0,
-            "50": 2219.0,
-            "51": 2508.0,
-            "52": 2483.0,
-            "53": 2959.0,
-            "54": 2554.0,
-            "55": 2408.0,
-            "56": 2452.0,
-            "57": 2528.0,
-            "58": 2594.0,
-            "59": 2750.0,
-            "60": 2563.0,
-            "61": 2794.0,
-            "62": 2495.0,
-            "63": 2493.0,
-            "64": 2965.0,
-            "65": 2569.0,
-            "66": 2877.0,
-            "67": 2969.0,
-            "68": 2803.0,
-            "69": 2944.0,
-            "70": 3001.0,
-            "71": 2867.0,
-            "72": 2714.0,
-            "73": 3017.0,
-            "74": 2281.0,
-            "75": 2774.0,
-            "76": 2983.0,
-            "77": 2955.0,
-            "78": 3148.0,
-            "79": 3076.0,
-            "80": 2992.0,
-            "81": 3255.0,
-            "82": 3212.0,
-            "83": 2809.0,
-            "84": 3266.0,
-            "85": 3188.0,
-            "86": 2616.0,
-            "87": 3492.0,
-            "88": 3130.0,
-            "89": 3020.0,
-            "90": 3238.0,
-            "91": 3106.0,
-            "92": 3183.0,
-            "93": 2960.0,
-            "94": 3492.0,
-            "95": 3112.0,
-            "96": 3256.0,
-            "97": 3055.0,
-            "98": 3558.0,
-            "99": 3196.0,
-            "100": 3109.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 921653248.0,
-            "2": 921653248.0,
-            "3": 921653248.0,
-            "4": 921653248.0,
-            "5": 921653248.0,
-            "6": 921653248.0,
-            "7": 921653248.0,
-            "8": 921653248.0,
-            "9": 921653248.0,
-            "10": 921653248.0,
-            "11": 921653248.0,
-            "12": 921653248.0,
-            "13": 921653248.0,
-            "14": 921653248.0,
-            "15": 921653248.0,
-            "16": 921653248.0,
-            "17": 921653248.0,
-            "18": 921653248.0,
-            "19": 921653248.0,
-            "20": 921653248.0,
-            "21": 921653248.0,
-            "22": 921653248.0,
-            "23": 921653248.0,
-            "24": 921653248.0,
-            "25": 921653248.0,
-            "26": 921653248.0,
-            "27": 921653248.0,
-            "28": 921653248.0,
-            "29": 921653248.0,
-            "30": 921653248.0,
-            "31": 921653248.0,
-            "32": 921653248.0,
-            "33": 921653248.0,
-            "34": 921653248.0,
-            "35": 921653248.0,
-            "36": 921653248.0,
-            "37": 921653248.0,
-            "38": 921653248.0,
-            "39": 921653248.0,
-            "40": 921653248.0,
-            "41": 921653248.0,
-            "42": 921653248.0,
-            "43": 921653248.0,
-            "44": 921653248.0,
-            "45": 921653248.0,
-            "46": 921653248.0,
-            "47": 921653248.0,
-            "48": 921653248.0,
-            "49": 921653248.0,
-            "50": 921653248.0,
-            "51": 921653248.0,
-            "52": 921653248.0,
-            "53": 921653248.0,
-            "54": 921653248.0,
-            "55": 921653248.0,
-            "56": 921653248.0,
-            "57": 921653248.0,
-            "58": 921653248.0,
-            "59": 921653248.0,
-            "60": 921653248.0,
-            "61": 921653248.0,
-            "62": 921653248.0,
-            "63": 921653248.0,
-            "64": 921653248.0,
-            "65": 921653248.0,
-            "66": 921653248.0,
-            "67": 921653248.0,
-            "68": 921653248.0,
-            "69": 921653248.0,
-            "70": 921653248.0,
-            "71": 921653248.0,
-            "72": 921653248.0,
-            "73": 921653248.0,
-            "74": 921653248.0,
-            "75": 921653248.0,
-            "76": 921653248.0,
-            "77": 921653248.0,
-            "78": 921653248.0,
-            "79": 921653248.0,
-            "80": 921653248.0,
-            "81": 921653248.0,
-            "82": 921653248.0,
-            "83": 921653248.0,
-            "84": 921653248.0,
-            "85": 921653248.0,
-            "86": 921653248.0,
-            "87": 921653248.0,
-            "88": 921653248.0,
-            "89": 921653248.0,
-            "90": 921653248.0,
-            "91": 921653248.0,
-            "92": 921653248.0,
-            "93": 921653248.0,
-            "94": 921653248.0,
-            "95": 921653248.0,
-            "96": 921653248.0,
-            "97": 921653248.0,
-            "98": 921653248.0,
-            "99": 921653248.0,
-            "100": 921653248.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 2240868352.0,
-            "2": 2603480064.0,
-            "3": 2603480064.0,
-            "4": 2603480064.0,
-            "5": 2603480064.0,
-            "6": 2603480064.0,
-            "7": 2603480064.0,
-            "8": 2603480064.0,
-            "9": 2603480064.0,
-            "10": 2603480064.0,
-            "11": 2603480064.0,
-            "12": 2603480064.0,
-            "13": 2603480064.0,
-            "14": 2603480064.0,
-            "15": 2603480064.0,
-            "16": 2603480064.0,
-            "17": 2603480064.0,
-            "18": 2603480064.0,
-            "19": 2603480064.0,
-            "20": 2603480064.0,
-            "21": 2603480064.0,
-            "22": 2603480064.0,
-            "23": 2603480064.0,
-            "24": 2603480064.0,
-            "25": 2603480064.0,
-            "26": 2603480064.0,
-            "27": 2603480064.0,
-            "28": 2603480064.0,
-            "29": 2603480064.0,
-            "30": 2603480064.0,
-            "31": 2603480064.0,
-            "32": 2603480064.0,
-            "33": 2603480064.0,
-            "34": 2603480064.0,
-            "35": 2603480064.0,
-            "36": 2603480064.0,
-            "37": 2603480064.0,
-            "38": 2603480064.0,
-            "39": 2603480064.0,
-            "40": 2603480064.0,
-            "41": 2603480064.0,
-            "42": 2603480064.0,
-            "43": 2603480064.0,
-            "44": 2603480064.0,
-            "45": 2603480064.0,
-            "46": 2603480064.0,
-            "47": 2603480064.0,
-            "48": 2603480064.0,
-            "49": 2603480064.0,
-            "50": 2603480064.0,
-            "51": 2603480064.0,
-            "52": 2603480064.0,
-            "53": 2603480064.0,
-            "54": 2603480064.0,
-            "55": 2603480064.0,
-            "56": 2603480064.0,
-            "57": 2603480064.0,
-            "58": 2603480064.0,
-            "59": 2603480064.0,
-            "60": 2603480064.0,
-            "61": 2603480064.0,
-            "62": 2603480064.0,
-            "63": 2603480064.0,
-            "64": 2603480064.0,
-            "65": 2603480064.0,
-            "66": 2603480064.0,
-            "67": 2603480064.0,
-            "68": 2603480064.0,
-            "69": 2603480064.0,
-            "70": 2603480064.0,
-            "71": 2603480064.0,
-            "72": 2603480064.0,
-            "73": 2603480064.0,
-            "74": 2603480064.0,
-            "75": 2603480064.0,
-            "76": 2603480064.0,
-            "77": 2603480064.0,
-            "78": 2603480064.0,
-            "79": 2603480064.0,
-            "80": 2603480064.0,
-            "81": 2603480064.0,
-            "82": 2603480064.0,
-            "83": 2603480064.0,
-            "84": 2603480064.0,
-            "85": 2603480064.0,
-            "86": 2603480064.0,
-            "87": 2603480064.0,
-            "88": 2603480064.0,
-            "89": 2603480064.0,
-            "90": 2603480064.0,
-            "91": 2603480064.0,
-            "92": 2603480064.0,
-            "93": 2603480064.0,
-            "94": 2603480064.0,
-            "95": 2603480064.0,
-            "96": 2603480064.0,
-            "97": 2603480064.0,
-            "98": 2603480064.0,
-            "99": 2603480064.0,
-            "100": 2603480064.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 8.53967,
-            "2": 0.14008,
-            "3": 0.1043,
-            "4": 0.16652,
-            "5": 0.10343,
-            "6": 0.10275,
-            "7": 0.10316,
-            "8": 0.10367,
-            "9": 0.10405,
-            "10": 0.10359,
-            "11": 0.09939,
-            "12": 0.09913,
-            "13": 0.09947,
-            "14": 0.09988,
-            "15": 0.10308,
-            "16": 0.0992,
-            "17": 0.10106,
-            "18": 0.0992,
-            "19": 0.09921,
-            "20": 0.1056,
-            "21": 0.10004,
-            "22": 0.10135,
-            "23": 0.1021,
-            "24": 0.10492,
-            "25": 0.09982,
-            "26": 0.10268,
-            "27": 0.10169,
-            "28": 0.1028,
-            "29": 0.10458,
-            "30": 0.10225,
-            "31": 0.09971,
-            "32": 0.09988,
-            "33": 0.10453,
-            "34": 0.10059,
-            "35": 0.10094,
-            "36": 0.1008,
-            "37": 0.10217,
-            "38": 0.10611,
-            "39": 0.10301,
-            "40": 0.10034,
-            "41": 0.09987,
-            "42": 0.09958,
-            "43": 0.10624,
-            "44": 0.09987,
-            "45": 0.09978,
-            "46": 0.09969,
-            "47": 0.10044,
-            "48": 0.10951,
-            "49": 0.10288,
-            "50": 0.10274,
-            "51": 0.10908,
-            "52": 0.10956,
-            "53": 0.10353,
-            "54": 0.10291,
-            "55": 0.09986,
-            "56": 0.10048,
-            "57": 0.10053,
-            "58": 0.10032,
-            "59": 0.09989,
-            "60": 0.09972,
-            "61": 0.09968,
-            "62": 0.09979,
-            "63": 0.10038,
-            "64": 0.09948,
-            "65": 0.10028,
-            "66": 0.0996,
-            "67": 0.10025,
-            "68": 0.09985,
-            "69": 0.1,
-            "70": 0.10176,
-            "71": 0.10036,
-            "72": 0.09961,
-            "73": 0.09996,
-            "74": 0.10022,
-            "75": 0.10121,
-            "76": 0.1012,
-            "77": 0.10049,
-            "78": 0.10212,
-            "79": 0.10036,
-            "80": 0.10284,
-            "81": 0.10151,
-            "82": 0.10433,
-            "83": 0.10034,
-            "84": 0.09991,
-            "85": 0.10037,
-            "86": 0.10005,
-            "87": 0.10117,
-            "88": 0.10004,
-            "89": 0.10192,
-            "90": 0.09956,
-            "91": 0.09987,
-            "92": 0.0995,
-            "93": 0.10044,
-            "94": 0.10249,
-            "95": 0.10315,
-            "96": 0.10488,
-            "97": 0.10312,
-            "98": 0.10392,
-            "99": 0.10217,
-            "100": 0.10295
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index 8655a61eb9b..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.85902,
-            "2": 10.87345,
-            "3": 10.86919,
-            "4": 10.87273,
-            "5": 10.87389,
-            "6": 10.89658,
-            "7": 10.86387,
-            "8": 10.87869,
-            "9": 10.87439,
-            "10": 10.83846,
-            "11": 10.87012,
-            "12": 10.86011,
-            "13": 10.87824,
-            "14": 10.87935,
-            "15": 10.8191,
-            "16": 10.83109,
-            "17": 10.78722,
-            "18": 10.80215,
-            "19": 10.7983,
-            "20": 10.71224,
-            "21": 10.68683,
-            "22": 10.55402,
-            "23": 10.70111,
-            "24": 10.58621,
-            "25": 10.52673,
-            "26": 10.5837,
-            "27": 10.59499,
-            "28": 10.54816,
-            "29": 10.55965,
-            "30": 10.32899,
-            "31": 10.08331,
-            "32": 10.44752,
-            "33": 10.4278,
-            "34": 10.1796,
-            "35": 10.24121,
-            "36": 10.18155,
-            "37": 10.32827,
-            "38": 10.16792,
-            "39": 10.37357,
-            "40": 10.05111,
-            "41": 10.10708,
-            "42": 10.17823,
-            "43": 9.77867,
-            "44": 9.91197,
-            "45": 9.77404,
-            "46": 9.75415,
-            "47": 10.09501,
-            "48": 9.79531,
-            "49": 9.46422,
-            "50": 9.86729,
-            "51": 9.80375,
-            "52": 9.68218,
-            "53": 10.02348,
-            "54": 9.91595,
-            "55": 9.82442,
-            "56": 9.56994,
-            "57": 9.42628,
-            "58": 9.78075,
-            "59": 9.53254,
-            "60": 9.44561,
-            "61": 9.64249,
-            "62": 9.94298,
-            "63": 9.31745,
-            "64": 9.7256,
-            "65": 8.88735,
-            "66": 9.65711,
-            "67": 9.31747,
-            "68": 9.73506,
-            "69": 9.74863,
-            "70": 9.69601,
-            "71": 9.57682,
-            "72": 9.52425,
-            "73": 9.4558,
-            "74": 8.8826,
-            "75": 9.37563,
-            "76": 9.01106,
-            "77": 10.02278,
-            "78": 9.6796,
-            "79": 9.33171,
-            "80": 9.35836,
-            "81": 9.43399,
-            "82": 9.65055,
-            "83": 9.2551,
-            "84": 9.37131,
-            "85": 9.56237,
-            "86": 9.0351,
-            "87": 9.54617,
-            "88": 9.69806,
-            "89": 9.54657,
-            "90": 9.77627,
-            "91": 9.28858,
-            "92": 9.30652,
-            "93": 9.02646,
-            "94": 8.7883,
-            "95": 9.48041,
-            "96": 9.47962,
-            "97": 9.25545,
-            "98": 9.61947,
-            "99": 8.83854,
-            "100": 9.35116
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1862.0,
-            "2": 1874.0,
-            "3": 1748.0,
-            "4": 1955.0,
-            "5": 2050.0,
-            "6": 1997.0,
-            "7": 1967.0,
-            "8": 1853.0,
-            "9": 1965.0,
-            "10": 1652.0,
-            "11": 2042.0,
-            "12": 1877.0,
-            "13": 2076.0,
-            "14": 1956.0,
-            "15": 1953.0,
-            "16": 1915.0,
-            "17": 2045.0,
-            "18": 1965.0,
-            "19": 1988.0,
-            "20": 1785.0,
-            "21": 1941.0,
-            "22": 1928.0,
-            "23": 2112.0,
-            "24": 1802.0,
-            "25": 1933.0,
-            "26": 1786.0,
-            "27": 1945.0,
-            "28": 2037.0,
-            "29": 2119.0,
-            "30": 2022.0,
-            "31": 1699.0,
-            "32": 2130.0,
-            "33": 2187.0,
-            "34": 1929.0,
-            "35": 2092.0,
-            "36": 2109.0,
-            "37": 2362.0,
-            "38": 2211.0,
-            "39": 2383.0,
-            "40": 2203.0,
-            "41": 2288.0,
-            "42": 2224.0,
-            "43": 2150.0,
-            "44": 2206.0,
-            "45": 2187.0,
-            "46": 2181.0,
-            "47": 2260.0,
-            "48": 2341.0,
-            "49": 2210.0,
-            "50": 2219.0,
-            "51": 2508.0,
-            "52": 2483.0,
-            "53": 2959.0,
-            "54": 2554.0,
-            "55": 2408.0,
-            "56": 2452.0,
-            "57": 2528.0,
-            "58": 2594.0,
-            "59": 2750.0,
-            "60": 2563.0,
-            "61": 2794.0,
-            "62": 2495.0,
-            "63": 2493.0,
-            "64": 2965.0,
-            "65": 2569.0,
-            "66": 2877.0,
-            "67": 2969.0,
-            "68": 2803.0,
-            "69": 2944.0,
-            "70": 3001.0,
-            "71": 2867.0,
-            "72": 2714.0,
-            "73": 3017.0,
-            "74": 2281.0,
-            "75": 2774.0,
-            "76": 2983.0,
-            "77": 2955.0,
-            "78": 3148.0,
-            "79": 3076.0,
-            "80": 2992.0,
-            "81": 3255.0,
-            "82": 3212.0,
-            "83": 2809.0,
-            "84": 3266.0,
-            "85": 3188.0,
-            "86": 2616.0,
-            "87": 3492.0,
-            "88": 3130.0,
-            "89": 3020.0,
-            "90": 3238.0,
-            "91": 3106.0,
-            "92": 3183.0,
-            "93": 2960.0,
-            "94": 3492.0,
-            "95": 3112.0,
-            "96": 3256.0,
-            "97": 3055.0,
-            "98": 3558.0,
-            "99": 3196.0,
-            "100": 3109.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 921653248.0,
-            "2": 921653248.0,
-            "3": 921653248.0,
-            "4": 921653248.0,
-            "5": 921653248.0,
-            "6": 921653248.0,
-            "7": 921653248.0,
-            "8": 921653248.0,
-            "9": 921653248.0,
-            "10": 921653248.0,
-            "11": 921653248.0,
-            "12": 921653248.0,
-            "13": 921653248.0,
-            "14": 921653248.0,
-            "15": 921653248.0,
-            "16": 921653248.0,
-            "17": 921653248.0,
-            "18": 921653248.0,
-            "19": 921653248.0,
-            "20": 921653248.0,
-            "21": 921653248.0,
-            "22": 921653248.0,
-            "23": 921653248.0,
-            "24": 921653248.0,
-            "25": 921653248.0,
-            "26": 921653248.0,
-            "27": 921653248.0,
-            "28": 921653248.0,
-            "29": 921653248.0,
-            "30": 921653248.0,
-            "31": 921653248.0,
-            "32": 921653248.0,
-            "33": 921653248.0,
-            "34": 921653248.0,
-            "35": 921653248.0,
-            "36": 921653248.0,
-            "37": 921653248.0,
-            "38": 921653248.0,
-            "39": 921653248.0,
-            "40": 921653248.0,
-            "41": 921653248.0,
-            "42": 921653248.0,
-            "43": 921653248.0,
-            "44": 921653248.0,
-            "45": 921653248.0,
-            "46": 921653248.0,
-            "47": 921653248.0,
-            "48": 921653248.0,
-            "49": 921653248.0,
-            "50": 921653248.0,
-            "51": 921653248.0,
-            "52": 921653248.0,
-            "53": 921653248.0,
-            "54": 921653248.0,
-            "55": 921653248.0,
-            "56": 921653248.0,
-            "57": 921653248.0,
-            "58": 921653248.0,
-            "59": 921653248.0,
-            "60": 921653248.0,
-            "61": 921653248.0,
-            "62": 921653248.0,
-            "63": 921653248.0,
-            "64": 921653248.0,
-            "65": 921653248.0,
-            "66": 921653248.0,
-            "67": 921653248.0,
-            "68": 921653248.0,
-            "69": 921653248.0,
-            "70": 921653248.0,
-            "71": 921653248.0,
-            "72": 921653248.0,
-            "73": 921653248.0,
-            "74": 921653248.0,
-            "75": 921653248.0,
-            "76": 921653248.0,
-            "77": 921653248.0,
-            "78": 921653248.0,
-            "79": 921653248.0,
-            "80": 921653248.0,
-            "81": 921653248.0,
-            "82": 921653248.0,
-            "83": 921653248.0,
-            "84": 921653248.0,
-            "85": 921653248.0,
-            "86": 921653248.0,
-            "87": 921653248.0,
-            "88": 921653248.0,
-            "89": 921653248.0,
-            "90": 921653248.0,
-            "91": 921653248.0,
-            "92": 921653248.0,
-            "93": 921653248.0,
-            "94": 921653248.0,
-            "95": 921653248.0,
-            "96": 921653248.0,
-            "97": 921653248.0,
-            "98": 921653248.0,
-            "99": 921653248.0,
-            "100": 921653248.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 2240868352.0,
-            "2": 2603480064.0,
-            "3": 2603480064.0,
-            "4": 2603480064.0,
-            "5": 2603480064.0,
-            "6": 2603480064.0,
-            "7": 2603480064.0,
-            "8": 2603480064.0,
-            "9": 2603480064.0,
-            "10": 2603480064.0,
-            "11": 2603480064.0,
-            "12": 2603480064.0,
-            "13": 2603480064.0,
-            "14": 2603480064.0,
-            "15": 2603480064.0,
-            "16": 2603480064.0,
-            "17": 2603480064.0,
-            "18": 2603480064.0,
-            "19": 2603480064.0,
-            "20": 2603480064.0,
-            "21": 2603480064.0,
-            "22": 2603480064.0,
-            "23": 2603480064.0,
-            "24": 2603480064.0,
-            "25": 2603480064.0,
-            "26": 2603480064.0,
-            "27": 2603480064.0,
-            "28": 2603480064.0,
-            "29": 2603480064.0,
-            "30": 2603480064.0,
-            "31": 2603480064.0,
-            "32": 2603480064.0,
-            "33": 2603480064.0,
-            "34": 2603480064.0,
-            "35": 2603480064.0,
-            "36": 2603480064.0,
-            "37": 2603480064.0,
-            "38": 2603480064.0,
-            "39": 2603480064.0,
-            "40": 2603480064.0,
-            "41": 2603480064.0,
-            "42": 2603480064.0,
-            "43": 2603480064.0,
-            "44": 2603480064.0,
-            "45": 2603480064.0,
-            "46": 2603480064.0,
-            "47": 2603480064.0,
-            "48": 2603480064.0,
-            "49": 2603480064.0,
-            "50": 2603480064.0,
-            "51": 2603480064.0,
-            "52": 2603480064.0,
-            "53": 2603480064.0,
-            "54": 2603480064.0,
-            "55": 2603480064.0,
-            "56": 2603480064.0,
-            "57": 2603480064.0,
-            "58": 2603480064.0,
-            "59": 2603480064.0,
-            "60": 2603480064.0,
-            "61": 2603480064.0,
-            "62": 2603480064.0,
-            "63": 2603480064.0,
-            "64": 2603480064.0,
-            "65": 2603480064.0,
-            "66": 2603480064.0,
-            "67": 2603480064.0,
-            "68": 2603480064.0,
-            "69": 2603480064.0,
-            "70": 2603480064.0,
-            "71": 2603480064.0,
-            "72": 2603480064.0,
-            "73": 2603480064.0,
-            "74": 2603480064.0,
-            "75": 2603480064.0,
-            "76": 2603480064.0,
-            "77": 2603480064.0,
-            "78": 2603480064.0,
-            "79": 2603480064.0,
-            "80": 2603480064.0,
-            "81": 2603480064.0,
-            "82": 2603480064.0,
-            "83": 2603480064.0,
-            "84": 2603480064.0,
-            "85": 2603480064.0,
-            "86": 2603480064.0,
-            "87": 2603480064.0,
-            "88": 2603480064.0,
-            "89": 2603480064.0,
-            "90": 2603480064.0,
-            "91": 2603480064.0,
-            "92": 2603480064.0,
-            "93": 2603480064.0,
-            "94": 2603480064.0,
-            "95": 2603480064.0,
-            "96": 2603480064.0,
-            "97": 2603480064.0,
-            "98": 2603480064.0,
-            "99": 2603480064.0,
-            "100": 2603480064.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.33977,
-            "2": 0.14663,
-            "3": 0.12463,
-            "4": 0.11901,
-            "5": 0.118,
-            "6": 0.11842,
-            "7": 0.11849,
-            "8": 0.11649,
-            "9": 0.11703,
-            "10": 0.11655,
-            "11": 0.11646,
-            "12": 0.11802,
-            "13": 0.11742,
-            "14": 0.1167,
-            "15": 0.11429,
-            "16": 0.11654,
-            "17": 0.11533,
-            "18": 0.11853,
-            "19": 0.1171,
-            "20": 0.11735,
-            "21": 0.11515,
-            "22": 0.11632,
-            "23": 0.11865,
-            "24": 0.11706,
-            "25": 0.11644,
-            "26": 0.11684,
-            "27": 0.11688,
-            "28": 0.11839,
-            "29": 0.11706,
-            "30": 0.11761,
-            "31": 0.11696,
-            "32": 0.11567,
-            "33": 0.1149,
-            "34": 0.11395,
-            "35": 0.11367,
-            "36": 0.11567,
-            "37": 0.11646,
-            "38": 0.11392,
-            "39": 0.11516,
-            "40": 0.11529,
-            "41": 0.11559,
-            "42": 0.11519,
-            "43": 0.11808,
-            "44": 0.11599,
-            "45": 0.11605,
-            "46": 0.11502,
-            "47": 0.11651,
-            "48": 0.11713,
-            "49": 0.11667,
-            "50": 0.11432,
-            "51": 0.12857,
-            "52": 0.12187,
-            "53": 0.11684,
-            "54": 0.11222,
-            "55": 0.11538,
-            "56": 0.11241,
-            "57": 0.11229,
-            "58": 0.11087,
-            "59": 0.11183,
-            "60": 0.11124,
-            "61": 0.11009,
-            "62": 0.11052,
-            "63": 0.11585,
-            "64": 0.11262,
-            "65": 0.11148,
-            "66": 0.11248,
-            "67": 0.11274,
-            "68": 0.11394,
-            "69": 0.11397,
-            "70": 0.11233,
-            "71": 0.11354,
-            "72": 0.11589,
-            "73": 0.11373,
-            "74": 0.11483,
-            "75": 0.11512,
-            "76": 0.11378,
-            "77": 0.11431,
-            "78": 0.11374,
-            "79": 0.11521,
-            "80": 0.11486,
-            "81": 0.11364,
-            "82": 0.11419,
-            "83": 0.11439,
-            "84": 0.11589,
-            "85": 0.11422,
-            "86": 0.11458,
-            "87": 0.11184,
-            "88": 0.11418,
-            "89": 0.11264,
-            "90": 0.11169,
-            "91": 0.11452,
-            "92": 0.11215,
-            "93": 0.11431,
-            "94": 0.11145,
-            "95": 0.11129,
-            "96": 0.11113,
-            "97": 0.11365,
-            "98": 0.11127,
-            "99": 0.11136,
-            "100": 0.11229
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear/golden_values_dev_dgx_h100.json
index 63425028dd5..72743900cff 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear/golden_values_dev_dgx_h100.json
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 12.78916,
-            "2": 0.129,
-            "3": 0.1167,
-            "4": 0.11497,
-            "5": 0.10818,
-            "6": 0.10473,
-            "7": 0.10532,
-            "8": 0.10616,
-            "9": 0.10723,
-            "10": 0.10865,
-            "11": 0.10729,
-            "12": 0.10632,
-            "13": 0.10608,
-            "14": 0.1066,
-            "15": 0.10589,
-            "16": 0.10567,
-            "17": 0.10574,
-            "18": 0.10663,
-            "19": 0.10656,
-            "20": 0.10767,
-            "21": 0.10522,
-            "22": 0.10601,
-            "23": 0.10475,
-            "24": 0.10392,
-            "25": 0.10556,
-            "26": 0.10438,
-            "27": 0.10635,
-            "28": 0.10742,
-            "29": 0.10795,
-            "30": 0.10745,
-            "31": 0.10836,
-            "32": 0.10639,
-            "33": 0.10597,
-            "34": 0.1064,
-            "35": 0.10496,
-            "36": 0.10549,
-            "37": 0.10538,
-            "38": 0.107,
-            "39": 0.10567,
-            "40": 0.10655,
-            "41": 0.10552,
-            "42": 0.10527,
-            "43": 0.10546,
-            "44": 0.10643,
-            "45": 0.10624,
-            "46": 0.10787,
-            "47": 0.1068,
-            "48": 0.1075,
-            "49": 0.10525,
-            "50": 0.10727,
-            "51": 0.126,
-            "52": 0.1146,
-            "53": 0.11042,
-            "54": 0.12389,
-            "55": 0.10643,
-            "56": 0.10676,
-            "57": 0.10677,
-            "58": 0.10573,
-            "59": 0.10709,
-            "60": 0.10515,
-            "61": 0.10668,
-            "62": 0.10599,
-            "63": 0.10616,
-            "64": 0.10462,
-            "65": 0.10742,
-            "66": 0.10693,
-            "67": 0.10628,
-            "68": 0.10748,
-            "69": 0.10707,
-            "70": 0.10621,
-            "71": 0.105,
-            "72": 0.10801,
-            "73": 0.10662,
-            "74": 0.10641,
-            "75": 0.10562,
-            "76": 0.10643,
-            "77": 0.10629,
-            "78": 0.10538,
-            "79": 0.1047,
-            "80": 0.10541,
-            "81": 0.10526,
-            "82": 0.10753,
-            "83": 0.10562,
-            "84": 0.10631,
-            "85": 0.10586,
-            "86": 0.10685,
-            "87": 0.1065,
-            "88": 0.10696,
-            "89": 0.10619,
-            "90": 0.10588,
-            "91": 0.10452,
-            "92": 0.10667,
-            "93": 0.10546,
-            "94": 0.1036,
-            "95": 0.10483,
-            "96": 0.10512,
-            "97": 0.10433,
-            "98": 0.10471,
-            "99": 0.10514,
-            "100": 0.10516
+            "1": 12.71973,
+            "2": 0.14026,
+            "3": 0.11862,
+            "4": 0.10675,
+            "5": 0.10706,
+            "6": 0.10639,
+            "7": 0.10733,
+            "8": 0.10668,
+            "9": 0.10876,
+            "10": 0.10818,
+            "11": 0.10917,
+            "12": 0.1083,
+            "13": 0.10781,
+            "14": 0.10774,
+            "15": 0.10649,
+            "16": 0.10734,
+            "17": 0.10691,
+            "18": 0.10561,
+            "19": 0.10658,
+            "20": 0.10698,
+            "21": 0.10786,
+            "22": 0.10799,
+            "23": 0.10759,
+            "24": 0.10883,
+            "25": 0.10795,
+            "26": 0.10754,
+            "27": 0.10823,
+            "28": 0.10763,
+            "29": 0.10845,
+            "30": 0.10831,
+            "31": 0.10745,
+            "32": 0.10718,
+            "33": 0.10787,
+            "34": 0.10797,
+            "35": 0.1082,
+            "36": 0.10752,
+            "37": 0.10829,
+            "38": 0.10875,
+            "39": 0.10866,
+            "40": 0.1088,
+            "41": 0.10879,
+            "42": 0.10749,
+            "43": 0.10899,
+            "44": 0.10725,
+            "45": 0.10697,
+            "46": 0.10761,
+            "47": 0.10683,
+            "48": 0.10976,
+            "49": 0.10965,
+            "50": 0.10766,
+            "51": 0.123,
+            "52": 0.11396,
+            "53": 0.10816,
+            "54": 0.10864,
+            "55": 0.12449,
+            "56": 0.1076,
+            "57": 0.10895,
+            "58": 0.10793,
+            "59": 0.10902,
+            "60": 0.10551,
+            "61": 0.10575,
+            "62": 0.10761,
+            "63": 0.10614,
+            "64": 0.10584,
+            "65": 0.10699,
+            "66": 0.1077,
+            "67": 0.10786,
+            "68": 0.10744,
+            "69": 0.10671,
+            "70": 0.10786,
+            "71": 0.10765,
+            "72": 0.10586,
+            "73": 0.10669,
+            "74": 0.10611,
+            "75": 0.10692,
+            "76": 0.10782,
+            "77": 0.10601,
+            "78": 0.10616,
+            "79": 0.10555,
+            "80": 0.10728,
+            "81": 0.10656,
+            "82": 0.10848,
+            "83": 0.10786,
+            "84": 0.10935,
+            "85": 0.11246,
+            "86": 0.11271,
+            "87": 0.10885,
+            "88": 0.10616,
+            "89": 0.10731,
+            "90": 0.10705,
+            "91": 0.10547,
+            "92": 0.10622,
+            "93": 0.10619,
+            "94": 0.10678,
+            "95": 0.10769,
+            "96": 0.10574,
+            "97": 0.10691,
+            "98": 0.10682,
+            "99": 0.10685,
+            "100": 0.10542
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index f5a45f2f146..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.85599,
-            "2": 10.8648,
-            "3": 10.87042,
-            "4": 10.85288,
-            "5": 10.88397,
-            "6": 10.89184,
-            "7": 10.86732,
-            "8": 10.87057,
-            "9": 10.87432,
-            "10": 10.84185,
-            "11": 10.87989,
-            "12": 10.87417,
-            "13": 10.87884,
-            "14": 10.89184,
-            "15": 10.82659,
-            "16": 10.83027,
-            "17": 10.80933,
-            "18": 10.81431,
-            "19": 10.8167,
-            "20": 10.72165,
-            "21": 10.70557,
-            "22": 10.56881,
-            "23": 10.72025,
-            "24": 10.61194,
-            "25": 10.55765,
-            "26": 10.61149,
-            "27": 10.62635,
-            "28": 10.57155,
-            "29": 10.58212,
-            "30": 10.36267,
-            "31": 10.11682,
-            "32": 10.4682,
-            "33": 10.45411,
-            "34": 10.21121,
-            "35": 10.27207,
-            "36": 10.22246,
-            "37": 10.34079,
-            "38": 10.18964,
-            "39": 10.40228,
-            "40": 10.08758,
-            "41": 10.13714,
-            "42": 10.21175,
-            "43": 9.82878,
-            "44": 9.96255,
-            "45": 9.82846,
-            "46": 9.80952,
-            "47": 10.13734,
-            "48": 9.84349,
-            "49": 9.52888,
-            "50": 9.91046,
-            "51": 9.85075,
-            "52": 9.73181,
-            "53": 10.06388,
-            "54": 9.95432,
-            "55": 9.87204,
-            "56": 9.61823,
-            "57": 9.47467,
-            "58": 9.82802,
-            "59": 9.57962,
-            "60": 9.49074,
-            "61": 9.68473,
-            "62": 9.99245,
-            "63": 9.38364,
-            "64": 9.77766,
-            "65": 8.94008,
-            "66": 9.70099,
-            "67": 9.3605,
-            "68": 9.77766,
-            "69": 9.78865,
-            "70": 9.73813,
-            "71": 9.61811,
-            "72": 9.58068,
-            "73": 9.4964,
-            "74": 8.93812,
-            "75": 9.42081,
-            "76": 9.07416,
-            "77": 10.06077,
-            "78": 9.71952,
-            "79": 9.37088,
-            "80": 9.39874,
-            "81": 9.47802,
-            "82": 9.69299,
-            "83": 9.30276,
-            "84": 9.41548,
-            "85": 9.60883,
-            "86": 9.07461,
-            "87": 9.58826,
-            "88": 9.74392,
-            "89": 9.5951,
-            "90": 9.81217,
-            "91": 9.33796,
-            "92": 9.3534,
-            "93": 9.07315,
-            "94": 8.83127,
-            "95": 9.51524,
-            "96": 9.52183,
-            "97": 9.31012,
-            "98": 9.66532,
-            "99": 8.88179,
-            "100": 9.39375
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1640.0,
-            "2": 1738.0,
-            "3": 1638.0,
-            "4": 1810.0,
-            "5": 1755.0,
-            "6": 1681.0,
-            "7": 1781.0,
-            "8": 1502.0,
-            "9": 1817.0,
-            "10": 1394.0,
-            "11": 1927.0,
-            "12": 1691.0,
-            "13": 1901.0,
-            "14": 1631.0,
-            "15": 1765.0,
-            "16": 1864.0,
-            "17": 1704.0,
-            "18": 1771.0,
-            "19": 1817.0,
-            "20": 1831.0,
-            "21": 1813.0,
-            "22": 1673.0,
-            "23": 2005.0,
-            "24": 1553.0,
-            "25": 1577.0,
-            "26": 1656.0,
-            "27": 1734.0,
-            "28": 1896.0,
-            "29": 2051.0,
-            "30": 1897.0,
-            "31": 1452.0,
-            "32": 1785.0,
-            "33": 2061.0,
-            "34": 1857.0,
-            "35": 1920.0,
-            "36": 1990.0,
-            "37": 2191.0,
-            "38": 2142.0,
-            "39": 2215.0,
-            "40": 2166.0,
-            "41": 2154.0,
-            "42": 2148.0,
-            "43": 1881.0,
-            "44": 2066.0,
-            "45": 1952.0,
-            "46": 2217.0,
-            "47": 2513.0,
-            "48": 2356.0,
-            "49": 2294.0,
-            "50": 2140.0,
-            "51": 2509.0,
-            "52": 2528.0,
-            "53": 2851.0,
-            "54": 2747.0,
-            "55": 2333.0,
-            "56": 2724.0,
-            "57": 2315.0,
-            "58": 2754.0,
-            "59": 2774.0,
-            "60": 2336.0,
-            "61": 2912.0,
-            "62": 2415.0,
-            "63": 2341.0,
-            "64": 2837.0,
-            "65": 2661.0,
-            "66": 3000.0,
-            "67": 2779.0,
-            "68": 2691.0,
-            "69": 2793.0,
-            "70": 3183.0,
-            "71": 2962.0,
-            "72": 2393.0,
-            "73": 2997.0,
-            "74": 1935.0,
-            "75": 2463.0,
-            "76": 3065.0,
-            "77": 3184.0,
-            "78": 3154.0,
-            "79": 3127.0,
-            "80": 3286.0,
-            "81": 3386.0,
-            "82": 3128.0,
-            "83": 2608.0,
-            "84": 3079.0,
-            "85": 3260.0,
-            "86": 2687.0,
-            "87": 3591.0,
-            "88": 3035.0,
-            "89": 3165.0,
-            "90": 3166.0,
-            "91": 2690.0,
-            "92": 2897.0,
-            "93": 2630.0,
-            "94": 3348.0,
-            "95": 3349.0,
-            "96": 3288.0,
-            "97": 3055.0,
-            "98": 3516.0,
-            "99": 3035.0,
-            "100": 3109.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 746194432.0,
-            "2": 746194432.0,
-            "3": 746194432.0,
-            "4": 746194432.0,
-            "5": 746194432.0,
-            "6": 746194432.0,
-            "7": 746194432.0,
-            "8": 746194432.0,
-            "9": 746194432.0,
-            "10": 746194432.0,
-            "11": 746194432.0,
-            "12": 746194432.0,
-            "13": 746194432.0,
-            "14": 746194432.0,
-            "15": 746194432.0,
-            "16": 746194432.0,
-            "17": 746194432.0,
-            "18": 746194432.0,
-            "19": 746194432.0,
-            "20": 746194432.0,
-            "21": 746194432.0,
-            "22": 746194432.0,
-            "23": 746194432.0,
-            "24": 746194432.0,
-            "25": 746194432.0,
-            "26": 746194432.0,
-            "27": 746194432.0,
-            "28": 746194432.0,
-            "29": 746194432.0,
-            "30": 746194432.0,
-            "31": 746194432.0,
-            "32": 746194432.0,
-            "33": 746194432.0,
-            "34": 746194432.0,
-            "35": 746194432.0,
-            "36": 746194432.0,
-            "37": 746194432.0,
-            "38": 746194432.0,
-            "39": 746194432.0,
-            "40": 746194432.0,
-            "41": 746194432.0,
-            "42": 746194432.0,
-            "43": 746194432.0,
-            "44": 746194432.0,
-            "45": 746194432.0,
-            "46": 746194432.0,
-            "47": 746194432.0,
-            "48": 746194432.0,
-            "49": 746194432.0,
-            "50": 746194432.0,
-            "51": 746194432.0,
-            "52": 746194432.0,
-            "53": 746194432.0,
-            "54": 746194432.0,
-            "55": 746194432.0,
-            "56": 746194432.0,
-            "57": 746194432.0,
-            "58": 746194432.0,
-            "59": 746194432.0,
-            "60": 746194432.0,
-            "61": 746194432.0,
-            "62": 746194432.0,
-            "63": 746194432.0,
-            "64": 746194432.0,
-            "65": 746194432.0,
-            "66": 746194432.0,
-            "67": 746194432.0,
-            "68": 746194432.0,
-            "69": 746194432.0,
-            "70": 746194432.0,
-            "71": 746194432.0,
-            "72": 746194432.0,
-            "73": 746194432.0,
-            "74": 746194432.0,
-            "75": 746194432.0,
-            "76": 746194432.0,
-            "77": 746194432.0,
-            "78": 746194432.0,
-            "79": 746194432.0,
-            "80": 746194432.0,
-            "81": 746194432.0,
-            "82": 746194432.0,
-            "83": 746194432.0,
-            "84": 746194432.0,
-            "85": 746194432.0,
-            "86": 746194432.0,
-            "87": 746194432.0,
-            "88": 746194432.0,
-            "89": 746194432.0,
-            "90": 746194432.0,
-            "91": 746194432.0,
-            "92": 746194432.0,
-            "93": 746194432.0,
-            "94": 746194432.0,
-            "95": 746194432.0,
-            "96": 746194432.0,
-            "97": 746194432.0,
-            "98": 746194432.0,
-            "99": 746194432.0,
-            "100": 746194432.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1926153216.0,
-            "2": 2209851392.0,
-            "3": 2209851392.0,
-            "4": 2209851392.0,
-            "5": 2209851392.0,
-            "6": 2209851392.0,
-            "7": 2209851392.0,
-            "8": 2209851392.0,
-            "9": 2209851392.0,
-            "10": 2209851392.0,
-            "11": 2209851392.0,
-            "12": 2209851392.0,
-            "13": 2209851392.0,
-            "14": 2209851392.0,
-            "15": 2209851392.0,
-            "16": 2209851392.0,
-            "17": 2209851392.0,
-            "18": 2209851392.0,
-            "19": 2209851392.0,
-            "20": 2209851392.0,
-            "21": 2209851392.0,
-            "22": 2209851392.0,
-            "23": 2209851392.0,
-            "24": 2209851392.0,
-            "25": 2209851392.0,
-            "26": 2209851392.0,
-            "27": 2209851392.0,
-            "28": 2209851392.0,
-            "29": 2209851392.0,
-            "30": 2209851392.0,
-            "31": 2209851392.0,
-            "32": 2209851392.0,
-            "33": 2209851392.0,
-            "34": 2209851392.0,
-            "35": 2209851392.0,
-            "36": 2209851392.0,
-            "37": 2209851392.0,
-            "38": 2209851392.0,
-            "39": 2209851392.0,
-            "40": 2209851392.0,
-            "41": 2209851392.0,
-            "42": 2209851392.0,
-            "43": 2209851392.0,
-            "44": 2209851392.0,
-            "45": 2209851392.0,
-            "46": 2209851392.0,
-            "47": 2209851392.0,
-            "48": 2209851392.0,
-            "49": 2209851392.0,
-            "50": 2209851392.0,
-            "51": 2209851392.0,
-            "52": 2209851392.0,
-            "53": 2209851392.0,
-            "54": 2209851392.0,
-            "55": 2209851392.0,
-            "56": 2209851392.0,
-            "57": 2209851392.0,
-            "58": 2209851392.0,
-            "59": 2209851392.0,
-            "60": 2209851392.0,
-            "61": 2209851392.0,
-            "62": 2209851392.0,
-            "63": 2209851392.0,
-            "64": 2209851392.0,
-            "65": 2209851392.0,
-            "66": 2209851392.0,
-            "67": 2209851392.0,
-            "68": 2209851392.0,
-            "69": 2209851392.0,
-            "70": 2209851392.0,
-            "71": 2209851392.0,
-            "72": 2209851392.0,
-            "73": 2209851392.0,
-            "74": 2209851392.0,
-            "75": 2209851392.0,
-            "76": 2209851392.0,
-            "77": 2209851392.0,
-            "78": 2209851392.0,
-            "79": 2209851392.0,
-            "80": 2209851392.0,
-            "81": 2209851392.0,
-            "82": 2209851392.0,
-            "83": 2209851392.0,
-            "84": 2209851392.0,
-            "85": 2209851392.0,
-            "86": 2209851392.0,
-            "87": 2209851392.0,
-            "88": 2209851392.0,
-            "89": 2209851392.0,
-            "90": 2209851392.0,
-            "91": 2209851392.0,
-            "92": 2209851392.0,
-            "93": 2209851392.0,
-            "94": 2209851392.0,
-            "95": 2209851392.0,
-            "96": 2209851392.0,
-            "97": 2209851392.0,
-            "98": 2209851392.0,
-            "99": 2209851392.0,
-            "100": 2209851392.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 11.78981,
-            "2": 0.14641,
-            "3": 0.09823,
-            "4": 0.09626,
-            "5": 0.09543,
-            "6": 0.09563,
-            "7": 0.09569,
-            "8": 0.0947,
-            "9": 0.09571,
-            "10": 0.09565,
-            "11": 0.09526,
-            "12": 0.09451,
-            "13": 0.09577,
-            "14": 0.09578,
-            "15": 0.0954,
-            "16": 0.09495,
-            "17": 0.09576,
-            "18": 0.09506,
-            "19": 0.09526,
-            "20": 0.09508,
-            "21": 0.09525,
-            "22": 0.09601,
-            "23": 0.09712,
-            "24": 0.09956,
-            "25": 0.09858,
-            "26": 0.09859,
-            "27": 0.097,
-            "28": 0.0963,
-            "29": 0.09742,
-            "30": 0.09459,
-            "31": 0.09583,
-            "32": 0.09745,
-            "33": 0.09523,
-            "34": 0.09486,
-            "35": 0.09594,
-            "36": 0.09571,
-            "37": 0.09608,
-            "38": 0.09689,
-            "39": 0.09574,
-            "40": 0.09565,
-            "41": 0.0958,
-            "42": 0.09573,
-            "43": 0.0958,
-            "44": 0.09524,
-            "45": 0.09519,
-            "46": 0.0952,
-            "47": 0.09476,
-            "48": 0.09432,
-            "49": 0.09445,
-            "50": 0.09411,
-            "51": 0.11832,
-            "52": 0.10335,
-            "53": 0.10105,
-            "54": 0.11751,
-            "55": 0.09996,
-            "56": 0.09926,
-            "57": 0.1014,
-            "58": 0.10002,
-            "59": 0.10069,
-            "60": 0.09932,
-            "61": 0.09999,
-            "62": 0.10028,
-            "63": 0.09961,
-            "64": 0.09886,
-            "65": 0.10127,
-            "66": 0.09994,
-            "67": 0.09975,
-            "68": 0.10037,
-            "69": 0.09896,
-            "70": 0.09847,
-            "71": 0.09907,
-            "72": 0.09929,
-            "73": 0.09893,
-            "74": 0.09893,
-            "75": 0.09961,
-            "76": 0.09928,
-            "77": 0.0991,
-            "78": 0.10211,
-            "79": 0.09934,
-            "80": 0.10027,
-            "81": 0.0996,
-            "82": 0.09986,
-            "83": 0.09951,
-            "84": 0.09761,
-            "85": 0.09909,
-            "86": 0.099,
-            "87": 0.09903,
-            "88": 0.09905,
-            "89": 0.0999,
-            "90": 0.09942,
-            "91": 0.09983,
-            "92": 0.09886,
-            "93": 0.09982,
-            "94": 0.09894,
-            "95": 0.09946,
-            "96": 0.09983,
-            "97": 0.09904,
-            "98": 0.09902,
-            "99": 0.09961,
-            "100": 0.09808
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index 72743900cff..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.85599,
-            "2": 10.8648,
-            "3": 10.87042,
-            "4": 10.85288,
-            "5": 10.88397,
-            "6": 10.89184,
-            "7": 10.86732,
-            "8": 10.87057,
-            "9": 10.87432,
-            "10": 10.84185,
-            "11": 10.87989,
-            "12": 10.87417,
-            "13": 10.87884,
-            "14": 10.89184,
-            "15": 10.82659,
-            "16": 10.83027,
-            "17": 10.80933,
-            "18": 10.81431,
-            "19": 10.8167,
-            "20": 10.72165,
-            "21": 10.70557,
-            "22": 10.56881,
-            "23": 10.72025,
-            "24": 10.61194,
-            "25": 10.55765,
-            "26": 10.61149,
-            "27": 10.62635,
-            "28": 10.57155,
-            "29": 10.58212,
-            "30": 10.36267,
-            "31": 10.11682,
-            "32": 10.4682,
-            "33": 10.45411,
-            "34": 10.21121,
-            "35": 10.27207,
-            "36": 10.22246,
-            "37": 10.34079,
-            "38": 10.18964,
-            "39": 10.40228,
-            "40": 10.08758,
-            "41": 10.13714,
-            "42": 10.21175,
-            "43": 9.82878,
-            "44": 9.96255,
-            "45": 9.82846,
-            "46": 9.80952,
-            "47": 10.13734,
-            "48": 9.84349,
-            "49": 9.52888,
-            "50": 9.91046,
-            "51": 9.85075,
-            "52": 9.73181,
-            "53": 10.06388,
-            "54": 9.95432,
-            "55": 9.87204,
-            "56": 9.61823,
-            "57": 9.47467,
-            "58": 9.82802,
-            "59": 9.57962,
-            "60": 9.49074,
-            "61": 9.68473,
-            "62": 9.99245,
-            "63": 9.38364,
-            "64": 9.77766,
-            "65": 8.94008,
-            "66": 9.70099,
-            "67": 9.3605,
-            "68": 9.77766,
-            "69": 9.78865,
-            "70": 9.73813,
-            "71": 9.61811,
-            "72": 9.58068,
-            "73": 9.4964,
-            "74": 8.93812,
-            "75": 9.42081,
-            "76": 9.07416,
-            "77": 10.06077,
-            "78": 9.71952,
-            "79": 9.37088,
-            "80": 9.39874,
-            "81": 9.47802,
-            "82": 9.69299,
-            "83": 9.30276,
-            "84": 9.41548,
-            "85": 9.60883,
-            "86": 9.07461,
-            "87": 9.58826,
-            "88": 9.74392,
-            "89": 9.5951,
-            "90": 9.81217,
-            "91": 9.33796,
-            "92": 9.3534,
-            "93": 9.07315,
-            "94": 8.83127,
-            "95": 9.51524,
-            "96": 9.52183,
-            "97": 9.31012,
-            "98": 9.66532,
-            "99": 8.88179,
-            "100": 9.39375
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1640.0,
-            "2": 1738.0,
-            "3": 1638.0,
-            "4": 1810.0,
-            "5": 1755.0,
-            "6": 1681.0,
-            "7": 1781.0,
-            "8": 1502.0,
-            "9": 1817.0,
-            "10": 1394.0,
-            "11": 1927.0,
-            "12": 1691.0,
-            "13": 1901.0,
-            "14": 1631.0,
-            "15": 1765.0,
-            "16": 1864.0,
-            "17": 1704.0,
-            "18": 1771.0,
-            "19": 1817.0,
-            "20": 1831.0,
-            "21": 1813.0,
-            "22": 1673.0,
-            "23": 2005.0,
-            "24": 1553.0,
-            "25": 1577.0,
-            "26": 1656.0,
-            "27": 1734.0,
-            "28": 1896.0,
-            "29": 2051.0,
-            "30": 1897.0,
-            "31": 1452.0,
-            "32": 1785.0,
-            "33": 2061.0,
-            "34": 1857.0,
-            "35": 1920.0,
-            "36": 1990.0,
-            "37": 2191.0,
-            "38": 2142.0,
-            "39": 2215.0,
-            "40": 2166.0,
-            "41": 2154.0,
-            "42": 2148.0,
-            "43": 1881.0,
-            "44": 2066.0,
-            "45": 1952.0,
-            "46": 2217.0,
-            "47": 2513.0,
-            "48": 2356.0,
-            "49": 2294.0,
-            "50": 2140.0,
-            "51": 2509.0,
-            "52": 2528.0,
-            "53": 2851.0,
-            "54": 2747.0,
-            "55": 2333.0,
-            "56": 2724.0,
-            "57": 2315.0,
-            "58": 2754.0,
-            "59": 2774.0,
-            "60": 2336.0,
-            "61": 2912.0,
-            "62": 2415.0,
-            "63": 2341.0,
-            "64": 2837.0,
-            "65": 2661.0,
-            "66": 3000.0,
-            "67": 2779.0,
-            "68": 2691.0,
-            "69": 2793.0,
-            "70": 3183.0,
-            "71": 2962.0,
-            "72": 2393.0,
-            "73": 2997.0,
-            "74": 1935.0,
-            "75": 2463.0,
-            "76": 3065.0,
-            "77": 3184.0,
-            "78": 3154.0,
-            "79": 3127.0,
-            "80": 3286.0,
-            "81": 3386.0,
-            "82": 3128.0,
-            "83": 2608.0,
-            "84": 3079.0,
-            "85": 3260.0,
-            "86": 2687.0,
-            "87": 3591.0,
-            "88": 3035.0,
-            "89": 3165.0,
-            "90": 3166.0,
-            "91": 2690.0,
-            "92": 2897.0,
-            "93": 2630.0,
-            "94": 3348.0,
-            "95": 3349.0,
-            "96": 3288.0,
-            "97": 3055.0,
-            "98": 3516.0,
-            "99": 3035.0,
-            "100": 3109.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 746194432.0,
-            "2": 746194432.0,
-            "3": 746194432.0,
-            "4": 746194432.0,
-            "5": 746194432.0,
-            "6": 746194432.0,
-            "7": 746194432.0,
-            "8": 746194432.0,
-            "9": 746194432.0,
-            "10": 746194432.0,
-            "11": 746194432.0,
-            "12": 746194432.0,
-            "13": 746194432.0,
-            "14": 746194432.0,
-            "15": 746194432.0,
-            "16": 746194432.0,
-            "17": 746194432.0,
-            "18": 746194432.0,
-            "19": 746194432.0,
-            "20": 746194432.0,
-            "21": 746194432.0,
-            "22": 746194432.0,
-            "23": 746194432.0,
-            "24": 746194432.0,
-            "25": 746194432.0,
-            "26": 746194432.0,
-            "27": 746194432.0,
-            "28": 746194432.0,
-            "29": 746194432.0,
-            "30": 746194432.0,
-            "31": 746194432.0,
-            "32": 746194432.0,
-            "33": 746194432.0,
-            "34": 746194432.0,
-            "35": 746194432.0,
-            "36": 746194432.0,
-            "37": 746194432.0,
-            "38": 746194432.0,
-            "39": 746194432.0,
-            "40": 746194432.0,
-            "41": 746194432.0,
-            "42": 746194432.0,
-            "43": 746194432.0,
-            "44": 746194432.0,
-            "45": 746194432.0,
-            "46": 746194432.0,
-            "47": 746194432.0,
-            "48": 746194432.0,
-            "49": 746194432.0,
-            "50": 746194432.0,
-            "51": 746194432.0,
-            "52": 746194432.0,
-            "53": 746194432.0,
-            "54": 746194432.0,
-            "55": 746194432.0,
-            "56": 746194432.0,
-            "57": 746194432.0,
-            "58": 746194432.0,
-            "59": 746194432.0,
-            "60": 746194432.0,
-            "61": 746194432.0,
-            "62": 746194432.0,
-            "63": 746194432.0,
-            "64": 746194432.0,
-            "65": 746194432.0,
-            "66": 746194432.0,
-            "67": 746194432.0,
-            "68": 746194432.0,
-            "69": 746194432.0,
-            "70": 746194432.0,
-            "71": 746194432.0,
-            "72": 746194432.0,
-            "73": 746194432.0,
-            "74": 746194432.0,
-            "75": 746194432.0,
-            "76": 746194432.0,
-            "77": 746194432.0,
-            "78": 746194432.0,
-            "79": 746194432.0,
-            "80": 746194432.0,
-            "81": 746194432.0,
-            "82": 746194432.0,
-            "83": 746194432.0,
-            "84": 746194432.0,
-            "85": 746194432.0,
-            "86": 746194432.0,
-            "87": 746194432.0,
-            "88": 746194432.0,
-            "89": 746194432.0,
-            "90": 746194432.0,
-            "91": 746194432.0,
-            "92": 746194432.0,
-            "93": 746194432.0,
-            "94": 746194432.0,
-            "95": 746194432.0,
-            "96": 746194432.0,
-            "97": 746194432.0,
-            "98": 746194432.0,
-            "99": 746194432.0,
-            "100": 746194432.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1926153216.0,
-            "2": 2209851392.0,
-            "3": 2209851392.0,
-            "4": 2209851392.0,
-            "5": 2209851392.0,
-            "6": 2209851392.0,
-            "7": 2209851392.0,
-            "8": 2209851392.0,
-            "9": 2209851392.0,
-            "10": 2209851392.0,
-            "11": 2209851392.0,
-            "12": 2209851392.0,
-            "13": 2209851392.0,
-            "14": 2209851392.0,
-            "15": 2209851392.0,
-            "16": 2209851392.0,
-            "17": 2209851392.0,
-            "18": 2209851392.0,
-            "19": 2209851392.0,
-            "20": 2209851392.0,
-            "21": 2209851392.0,
-            "22": 2209851392.0,
-            "23": 2209851392.0,
-            "24": 2209851392.0,
-            "25": 2209851392.0,
-            "26": 2209851392.0,
-            "27": 2209851392.0,
-            "28": 2209851392.0,
-            "29": 2209851392.0,
-            "30": 2209851392.0,
-            "31": 2209851392.0,
-            "32": 2209851392.0,
-            "33": 2209851392.0,
-            "34": 2209851392.0,
-            "35": 2209851392.0,
-            "36": 2209851392.0,
-            "37": 2209851392.0,
-            "38": 2209851392.0,
-            "39": 2209851392.0,
-            "40": 2209851392.0,
-            "41": 2209851392.0,
-            "42": 2209851392.0,
-            "43": 2209851392.0,
-            "44": 2209851392.0,
-            "45": 2209851392.0,
-            "46": 2209851392.0,
-            "47": 2209851392.0,
-            "48": 2209851392.0,
-            "49": 2209851392.0,
-            "50": 2209851392.0,
-            "51": 2209851392.0,
-            "52": 2209851392.0,
-            "53": 2209851392.0,
-            "54": 2209851392.0,
-            "55": 2209851392.0,
-            "56": 2209851392.0,
-            "57": 2209851392.0,
-            "58": 2209851392.0,
-            "59": 2209851392.0,
-            "60": 2209851392.0,
-            "61": 2209851392.0,
-            "62": 2209851392.0,
-            "63": 2209851392.0,
-            "64": 2209851392.0,
-            "65": 2209851392.0,
-            "66": 2209851392.0,
-            "67": 2209851392.0,
-            "68": 2209851392.0,
-            "69": 2209851392.0,
-            "70": 2209851392.0,
-            "71": 2209851392.0,
-            "72": 2209851392.0,
-            "73": 2209851392.0,
-            "74": 2209851392.0,
-            "75": 2209851392.0,
-            "76": 2209851392.0,
-            "77": 2209851392.0,
-            "78": 2209851392.0,
-            "79": 2209851392.0,
-            "80": 2209851392.0,
-            "81": 2209851392.0,
-            "82": 2209851392.0,
-            "83": 2209851392.0,
-            "84": 2209851392.0,
-            "85": 2209851392.0,
-            "86": 2209851392.0,
-            "87": 2209851392.0,
-            "88": 2209851392.0,
-            "89": 2209851392.0,
-            "90": 2209851392.0,
-            "91": 2209851392.0,
-            "92": 2209851392.0,
-            "93": 2209851392.0,
-            "94": 2209851392.0,
-            "95": 2209851392.0,
-            "96": 2209851392.0,
-            "97": 2209851392.0,
-            "98": 2209851392.0,
-            "99": 2209851392.0,
-            "100": 2209851392.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 12.71973,
-            "2": 0.14026,
-            "3": 0.11862,
-            "4": 0.10675,
-            "5": 0.10706,
-            "6": 0.10639,
-            "7": 0.10733,
-            "8": 0.10668,
-            "9": 0.10876,
-            "10": 0.10818,
-            "11": 0.10917,
-            "12": 0.1083,
-            "13": 0.10781,
-            "14": 0.10774,
-            "15": 0.10649,
-            "16": 0.10734,
-            "17": 0.10691,
-            "18": 0.10561,
-            "19": 0.10658,
-            "20": 0.10698,
-            "21": 0.10786,
-            "22": 0.10799,
-            "23": 0.10759,
-            "24": 0.10883,
-            "25": 0.10795,
-            "26": 0.10754,
-            "27": 0.10823,
-            "28": 0.10763,
-            "29": 0.10845,
-            "30": 0.10831,
-            "31": 0.10745,
-            "32": 0.10718,
-            "33": 0.10787,
-            "34": 0.10797,
-            "35": 0.1082,
-            "36": 0.10752,
-            "37": 0.10829,
-            "38": 0.10875,
-            "39": 0.10866,
-            "40": 0.1088,
-            "41": 0.10879,
-            "42": 0.10749,
-            "43": 0.10899,
-            "44": 0.10725,
-            "45": 0.10697,
-            "46": 0.10761,
-            "47": 0.10683,
-            "48": 0.10976,
-            "49": 0.10965,
-            "50": 0.10766,
-            "51": 0.123,
-            "52": 0.11396,
-            "53": 0.10816,
-            "54": 0.10864,
-            "55": 0.12449,
-            "56": 0.1076,
-            "57": 0.10895,
-            "58": 0.10793,
-            "59": 0.10902,
-            "60": 0.10551,
-            "61": 0.10575,
-            "62": 0.10761,
-            "63": 0.10614,
-            "64": 0.10584,
-            "65": 0.10699,
-            "66": 0.1077,
-            "67": 0.10786,
-            "68": 0.10744,
-            "69": 0.10671,
-            "70": 0.10786,
-            "71": 0.10765,
-            "72": 0.10586,
-            "73": 0.10669,
-            "74": 0.10611,
-            "75": 0.10692,
-            "76": 0.10782,
-            "77": 0.10601,
-            "78": 0.10616,
-            "79": 0.10555,
-            "80": 0.10728,
-            "81": 0.10656,
-            "82": 0.10848,
-            "83": 0.10786,
-            "84": 0.10935,
-            "85": 0.11246,
-            "86": 0.11271,
-            "87": 0.10885,
-            "88": 0.10616,
-            "89": 0.10731,
-            "90": 0.10705,
-            "91": 0.10547,
-            "92": 0.10622,
-            "93": 0.10619,
-            "94": 0.10678,
-            "95": 0.10769,
-            "96": 0.10574,
-            "97": 0.10691,
-            "98": 0.10682,
-            "99": 0.10685,
-            "100": 0.10542
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_dev_dgx_h100.json
index 2125b88c754..50639a30816 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_dev_dgx_h100.json
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 12.82981,
-            "2": 0.12202,
-            "3": 0.10747,
-            "4": 0.10702,
-            "5": 0.10713,
-            "6": 0.10667,
-            "7": 0.10627,
-            "8": 0.10699,
-            "9": 0.10657,
-            "10": 0.10715,
-            "11": 0.10642,
-            "12": 0.10705,
-            "13": 0.10495,
-            "14": 0.10784,
-            "15": 0.1107,
-            "16": 0.1105,
-            "17": 0.11162,
-            "18": 0.11128,
-            "19": 0.11269,
-            "20": 0.10842,
-            "21": 0.10915,
-            "22": 0.10863,
-            "23": 0.10818,
-            "24": 0.10975,
-            "25": 0.10577,
-            "26": 0.10559,
-            "27": 0.10659,
-            "28": 0.10616,
-            "29": 0.10712,
-            "30": 0.10735,
-            "31": 0.1064,
-            "32": 0.10562,
-            "33": 0.10538,
-            "34": 0.10678,
-            "35": 0.10507,
-            "36": 0.10502,
-            "37": 0.10532,
-            "38": 0.10636,
-            "39": 0.10511,
-            "40": 0.10497,
-            "41": 0.10557,
-            "42": 0.10413,
-            "43": 0.10684,
-            "44": 0.10567,
-            "45": 0.10719,
-            "46": 0.10887,
-            "47": 0.11215,
-            "48": 0.11102,
-            "49": 0.10907,
-            "50": 0.10761,
-            "51": 0.12141,
-            "52": 0.13372,
-            "53": 0.10585,
-            "54": 0.10595,
-            "55": 0.10712,
-            "56": 0.10573,
-            "57": 0.10825,
-            "58": 0.10991,
-            "59": 0.10753,
-            "60": 0.10565,
-            "61": 0.10639,
-            "62": 0.11,
-            "63": 0.10465,
-            "64": 0.10596,
-            "65": 0.10785,
-            "66": 0.11597,
-            "67": 0.10697,
-            "68": 0.10722,
-            "69": 0.10693,
-            "70": 0.1079,
-            "71": 0.10852,
-            "72": 0.10729,
-            "73": 0.10617,
-            "74": 0.1046,
-            "75": 0.10476,
-            "76": 0.11096,
-            "77": 0.10553,
-            "78": 0.10593,
-            "79": 0.1069,
-            "80": 0.10615,
-            "81": 0.11416,
-            "82": 0.10544,
-            "83": 0.10562,
-            "84": 0.10576,
-            "85": 0.10568,
-            "86": 0.10984,
-            "87": 0.10814,
-            "88": 0.10556,
-            "89": 0.10524,
-            "90": 0.1051,
-            "91": 0.11373,
-            "92": 0.10616,
-            "93": 0.10743,
-            "94": 0.10695,
-            "95": 0.11373,
-            "96": 0.10777,
-            "97": 0.10685,
-            "98": 0.10614,
-            "99": 0.10571,
-            "100": 0.10707
+            "1": 12.88983,
+            "2": 0.12288,
+            "3": 0.10944,
+            "4": 0.10822,
+            "5": 0.10919,
+            "6": 0.10835,
+            "7": 0.11035,
+            "8": 0.10879,
+            "9": 0.11001,
+            "10": 0.11009,
+            "11": 0.10945,
+            "12": 0.10868,
+            "13": 0.1086,
+            "14": 0.10899,
+            "15": 0.10852,
+            "16": 0.10822,
+            "17": 0.10818,
+            "18": 0.10877,
+            "19": 0.10888,
+            "20": 0.10828,
+            "21": 0.109,
+            "22": 0.108,
+            "23": 0.10722,
+            "24": 0.10731,
+            "25": 0.1075,
+            "26": 0.10744,
+            "27": 0.10843,
+            "28": 0.10831,
+            "29": 0.10841,
+            "30": 0.10718,
+            "31": 0.10837,
+            "32": 0.10773,
+            "33": 0.10792,
+            "34": 0.10698,
+            "35": 0.10976,
+            "36": 0.10758,
+            "37": 0.10825,
+            "38": 0.10781,
+            "39": 0.10912,
+            "40": 0.10847,
+            "41": 0.10786,
+            "42": 0.10767,
+            "43": 0.10761,
+            "44": 0.1076,
+            "45": 0.1078,
+            "46": 0.10992,
+            "47": 0.1061,
+            "48": 0.10654,
+            "49": 0.10566,
+            "50": 0.1066,
+            "51": 0.11234,
+            "52": 0.11065,
+            "53": 0.10795,
+            "54": 0.10668,
+            "55": 0.10678,
+            "56": 0.10889,
+            "57": 0.10802,
+            "58": 0.12482,
+            "59": 0.10666,
+            "60": 0.10637,
+            "61": 0.10776,
+            "62": 0.10743,
+            "63": 0.10782,
+            "64": 0.10634,
+            "65": 0.10744,
+            "66": 0.10859,
+            "67": 0.10949,
+            "68": 0.1075,
+            "69": 0.10803,
+            "70": 0.10688,
+            "71": 0.10797,
+            "72": 0.10752,
+            "73": 0.10816,
+            "74": 0.10734,
+            "75": 0.10832,
+            "76": 0.10815,
+            "77": 0.10868,
+            "78": 0.10839,
+            "79": 0.1074,
+            "80": 0.10866,
+            "81": 0.11122,
+            "82": 0.11035,
+            "83": 0.1101,
+            "84": 0.1122,
+            "85": 0.10866,
+            "86": 0.10915,
+            "87": 0.10842,
+            "88": 0.10723,
+            "89": 0.10849,
+            "90": 0.10814,
+            "91": 0.10833,
+            "92": 0.10719,
+            "93": 0.10725,
+            "94": 0.10754,
+            "95": 0.10758,
+            "96": 0.1082,
+            "97": 0.10768,
+            "98": 0.10708,
+            "99": 0.10785,
+            "100": 0.10841
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index f5278baae82..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.85599,
-            "2": 10.8648,
-            "3": 10.87042,
-            "4": 10.85288,
-            "5": 10.88397,
-            "6": 10.89184,
-            "7": 10.86732,
-            "8": 10.87057,
-            "9": 10.87432,
-            "10": 10.84185,
-            "11": 10.87989,
-            "12": 10.87417,
-            "13": 10.87884,
-            "14": 10.89184,
-            "15": 10.82659,
-            "16": 10.83027,
-            "17": 10.80933,
-            "18": 10.81431,
-            "19": 10.8167,
-            "20": 10.72165,
-            "21": 10.70557,
-            "22": 10.56881,
-            "23": 10.72025,
-            "24": 10.61194,
-            "25": 10.55765,
-            "26": 10.61149,
-            "27": 10.62635,
-            "28": 10.57155,
-            "29": 10.58212,
-            "30": 10.36267,
-            "31": 10.11682,
-            "32": 10.4682,
-            "33": 10.45411,
-            "34": 10.21121,
-            "35": 10.27207,
-            "36": 10.22246,
-            "37": 10.34079,
-            "38": 10.18964,
-            "39": 10.40228,
-            "40": 10.08758,
-            "41": 10.13714,
-            "42": 10.21175,
-            "43": 9.82878,
-            "44": 9.96255,
-            "45": 9.82846,
-            "46": 9.80952,
-            "47": 10.13734,
-            "48": 9.84349,
-            "49": 9.52888,
-            "50": 9.91046,
-            "51": 9.85075,
-            "52": 9.73181,
-            "53": 10.06388,
-            "54": 9.95432,
-            "55": 9.87204,
-            "56": 9.61823,
-            "57": 9.47467,
-            "58": 9.82802,
-            "59": 9.57962,
-            "60": 9.49074,
-            "61": 9.68473,
-            "62": 9.99245,
-            "63": 9.38364,
-            "64": 9.77766,
-            "65": 8.94008,
-            "66": 9.70099,
-            "67": 9.3605,
-            "68": 9.77766,
-            "69": 9.78865,
-            "70": 9.73813,
-            "71": 9.61811,
-            "72": 9.58068,
-            "73": 9.4964,
-            "74": 8.93812,
-            "75": 9.42081,
-            "76": 9.07416,
-            "77": 10.06077,
-            "78": 9.71952,
-            "79": 9.37088,
-            "80": 9.39874,
-            "81": 9.47802,
-            "82": 9.69299,
-            "83": 9.30276,
-            "84": 9.41548,
-            "85": 9.60883,
-            "86": 9.07461,
-            "87": 9.58826,
-            "88": 9.74392,
-            "89": 9.5951,
-            "90": 9.81217,
-            "91": 9.33796,
-            "92": 9.3534,
-            "93": 9.07315,
-            "94": 8.83127,
-            "95": 9.51524,
-            "96": 9.52183,
-            "97": 9.31012,
-            "98": 9.66532,
-            "99": 8.88179,
-            "100": 9.39375
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1640.0,
-            "2": 1738.0,
-            "3": 1638.0,
-            "4": 1810.0,
-            "5": 1755.0,
-            "6": 1681.0,
-            "7": 1781.0,
-            "8": 1502.0,
-            "9": 1817.0,
-            "10": 1394.0,
-            "11": 1927.0,
-            "12": 1691.0,
-            "13": 1901.0,
-            "14": 1631.0,
-            "15": 1765.0,
-            "16": 1864.0,
-            "17": 1704.0,
-            "18": 1771.0,
-            "19": 1817.0,
-            "20": 1831.0,
-            "21": 1813.0,
-            "22": 1673.0,
-            "23": 2005.0,
-            "24": 1553.0,
-            "25": 1577.0,
-            "26": 1656.0,
-            "27": 1734.0,
-            "28": 1896.0,
-            "29": 2051.0,
-            "30": 1897.0,
-            "31": 1452.0,
-            "32": 1785.0,
-            "33": 2061.0,
-            "34": 1857.0,
-            "35": 1920.0,
-            "36": 1990.0,
-            "37": 2191.0,
-            "38": 2142.0,
-            "39": 2215.0,
-            "40": 2166.0,
-            "41": 2154.0,
-            "42": 2148.0,
-            "43": 1881.0,
-            "44": 2066.0,
-            "45": 1952.0,
-            "46": 2217.0,
-            "47": 2513.0,
-            "48": 2356.0,
-            "49": 2294.0,
-            "50": 2140.0,
-            "51": 2509.0,
-            "52": 2528.0,
-            "53": 2851.0,
-            "54": 2747.0,
-            "55": 2333.0,
-            "56": 2724.0,
-            "57": 2315.0,
-            "58": 2754.0,
-            "59": 2774.0,
-            "60": 2336.0,
-            "61": 2912.0,
-            "62": 2415.0,
-            "63": 2341.0,
-            "64": 2837.0,
-            "65": 2661.0,
-            "66": 3000.0,
-            "67": 2779.0,
-            "68": 2691.0,
-            "69": 2793.0,
-            "70": 3183.0,
-            "71": 2962.0,
-            "72": 2393.0,
-            "73": 2997.0,
-            "74": 1935.0,
-            "75": 2463.0,
-            "76": 3065.0,
-            "77": 3184.0,
-            "78": 3154.0,
-            "79": 3127.0,
-            "80": 3286.0,
-            "81": 3386.0,
-            "82": 3128.0,
-            "83": 2608.0,
-            "84": 3079.0,
-            "85": 3260.0,
-            "86": 2687.0,
-            "87": 3591.0,
-            "88": 3035.0,
-            "89": 3165.0,
-            "90": 3166.0,
-            "91": 2690.0,
-            "92": 2897.0,
-            "93": 2630.0,
-            "94": 3348.0,
-            "95": 3349.0,
-            "96": 3288.0,
-            "97": 3055.0,
-            "98": 3516.0,
-            "99": 3035.0,
-            "100": 3109.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 746194432.0,
-            "2": 746194432.0,
-            "3": 746194432.0,
-            "4": 746194432.0,
-            "5": 746194432.0,
-            "6": 746194432.0,
-            "7": 746194432.0,
-            "8": 746194432.0,
-            "9": 746194432.0,
-            "10": 746194432.0,
-            "11": 746194432.0,
-            "12": 746194432.0,
-            "13": 746194432.0,
-            "14": 746194432.0,
-            "15": 746194432.0,
-            "16": 746194432.0,
-            "17": 746194432.0,
-            "18": 746194432.0,
-            "19": 746194432.0,
-            "20": 746194432.0,
-            "21": 746194432.0,
-            "22": 746194432.0,
-            "23": 746194432.0,
-            "24": 746194432.0,
-            "25": 746194432.0,
-            "26": 746194432.0,
-            "27": 746194432.0,
-            "28": 746194432.0,
-            "29": 746194432.0,
-            "30": 746194432.0,
-            "31": 746194432.0,
-            "32": 746194432.0,
-            "33": 746194432.0,
-            "34": 746194432.0,
-            "35": 746194432.0,
-            "36": 746194432.0,
-            "37": 746194432.0,
-            "38": 746194432.0,
-            "39": 746194432.0,
-            "40": 746194432.0,
-            "41": 746194432.0,
-            "42": 746194432.0,
-            "43": 746194432.0,
-            "44": 746194432.0,
-            "45": 746194432.0,
-            "46": 746194432.0,
-            "47": 746194432.0,
-            "48": 746194432.0,
-            "49": 746194432.0,
-            "50": 746194432.0,
-            "51": 746194432.0,
-            "52": 746194432.0,
-            "53": 746194432.0,
-            "54": 746194432.0,
-            "55": 746194432.0,
-            "56": 746194432.0,
-            "57": 746194432.0,
-            "58": 746194432.0,
-            "59": 746194432.0,
-            "60": 746194432.0,
-            "61": 746194432.0,
-            "62": 746194432.0,
-            "63": 746194432.0,
-            "64": 746194432.0,
-            "65": 746194432.0,
-            "66": 746194432.0,
-            "67": 746194432.0,
-            "68": 746194432.0,
-            "69": 746194432.0,
-            "70": 746194432.0,
-            "71": 746194432.0,
-            "72": 746194432.0,
-            "73": 746194432.0,
-            "74": 746194432.0,
-            "75": 746194432.0,
-            "76": 746194432.0,
-            "77": 746194432.0,
-            "78": 746194432.0,
-            "79": 746194432.0,
-            "80": 746194432.0,
-            "81": 746194432.0,
-            "82": 746194432.0,
-            "83": 746194432.0,
-            "84": 746194432.0,
-            "85": 746194432.0,
-            "86": 746194432.0,
-            "87": 746194432.0,
-            "88": 746194432.0,
-            "89": 746194432.0,
-            "90": 746194432.0,
-            "91": 746194432.0,
-            "92": 746194432.0,
-            "93": 746194432.0,
-            "94": 746194432.0,
-            "95": 746194432.0,
-            "96": 746194432.0,
-            "97": 746194432.0,
-            "98": 746194432.0,
-            "99": 746194432.0,
-            "100": 746194432.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1926153216.0,
-            "2": 2209851392.0,
-            "3": 2209851392.0,
-            "4": 2209851392.0,
-            "5": 2209851392.0,
-            "6": 2209851392.0,
-            "7": 2209851392.0,
-            "8": 2209851392.0,
-            "9": 2209851392.0,
-            "10": 2209851392.0,
-            "11": 2209851392.0,
-            "12": 2209851392.0,
-            "13": 2209851392.0,
-            "14": 2209851392.0,
-            "15": 2209851392.0,
-            "16": 2209851392.0,
-            "17": 2209851392.0,
-            "18": 2209851392.0,
-            "19": 2209851392.0,
-            "20": 2209851392.0,
-            "21": 2209851392.0,
-            "22": 2209851392.0,
-            "23": 2209851392.0,
-            "24": 2209851392.0,
-            "25": 2209851392.0,
-            "26": 2209851392.0,
-            "27": 2209851392.0,
-            "28": 2209851392.0,
-            "29": 2209851392.0,
-            "30": 2209851392.0,
-            "31": 2209851392.0,
-            "32": 2209851392.0,
-            "33": 2209851392.0,
-            "34": 2209851392.0,
-            "35": 2209851392.0,
-            "36": 2209851392.0,
-            "37": 2209851392.0,
-            "38": 2209851392.0,
-            "39": 2209851392.0,
-            "40": 2209851392.0,
-            "41": 2209851392.0,
-            "42": 2209851392.0,
-            "43": 2209851392.0,
-            "44": 2209851392.0,
-            "45": 2209851392.0,
-            "46": 2209851392.0,
-            "47": 2209851392.0,
-            "48": 2209851392.0,
-            "49": 2209851392.0,
-            "50": 2209851392.0,
-            "51": 2209851392.0,
-            "52": 2209851392.0,
-            "53": 2209851392.0,
-            "54": 2209851392.0,
-            "55": 2209851392.0,
-            "56": 2209851392.0,
-            "57": 2209851392.0,
-            "58": 2209851392.0,
-            "59": 2209851392.0,
-            "60": 2209851392.0,
-            "61": 2209851392.0,
-            "62": 2209851392.0,
-            "63": 2209851392.0,
-            "64": 2209851392.0,
-            "65": 2209851392.0,
-            "66": 2209851392.0,
-            "67": 2209851392.0,
-            "68": 2209851392.0,
-            "69": 2209851392.0,
-            "70": 2209851392.0,
-            "71": 2209851392.0,
-            "72": 2209851392.0,
-            "73": 2209851392.0,
-            "74": 2209851392.0,
-            "75": 2209851392.0,
-            "76": 2209851392.0,
-            "77": 2209851392.0,
-            "78": 2209851392.0,
-            "79": 2209851392.0,
-            "80": 2209851392.0,
-            "81": 2209851392.0,
-            "82": 2209851392.0,
-            "83": 2209851392.0,
-            "84": 2209851392.0,
-            "85": 2209851392.0,
-            "86": 2209851392.0,
-            "87": 2209851392.0,
-            "88": 2209851392.0,
-            "89": 2209851392.0,
-            "90": 2209851392.0,
-            "91": 2209851392.0,
-            "92": 2209851392.0,
-            "93": 2209851392.0,
-            "94": 2209851392.0,
-            "95": 2209851392.0,
-            "96": 2209851392.0,
-            "97": 2209851392.0,
-            "98": 2209851392.0,
-            "99": 2209851392.0,
-            "100": 2209851392.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 9.70442,
-            "2": 0.13019,
-            "3": 0.0979,
-            "4": 0.09686,
-            "5": 0.09768,
-            "6": 0.09685,
-            "7": 0.09593,
-            "8": 0.09527,
-            "9": 0.09564,
-            "10": 0.09666,
-            "11": 0.09434,
-            "12": 0.09507,
-            "13": 0.09515,
-            "14": 0.09479,
-            "15": 0.09471,
-            "16": 0.09457,
-            "17": 0.09471,
-            "18": 0.09471,
-            "19": 0.09425,
-            "20": 0.09404,
-            "21": 0.09478,
-            "22": 0.09431,
-            "23": 0.09582,
-            "24": 0.09629,
-            "25": 0.09606,
-            "26": 0.09601,
-            "27": 0.09669,
-            "28": 0.0955,
-            "29": 0.09877,
-            "30": 0.09681,
-            "31": 0.09783,
-            "32": 0.09679,
-            "33": 0.09636,
-            "34": 0.09497,
-            "35": 0.0955,
-            "36": 0.09533,
-            "37": 0.09488,
-            "38": 0.10172,
-            "39": 0.09491,
-            "40": 0.09435,
-            "41": 0.09527,
-            "42": 0.09493,
-            "43": 0.10246,
-            "44": 0.10248,
-            "45": 0.10163,
-            "46": 0.10184,
-            "47": 0.10193,
-            "48": 0.10237,
-            "49": 0.10206,
-            "50": 0.10141,
-            "51": 0.11047,
-            "52": 0.12328,
-            "53": 0.10274,
-            "54": 0.0969,
-            "55": 0.09666,
-            "56": 0.09655,
-            "57": 0.09837,
-            "58": 0.10123,
-            "59": 0.10037,
-            "60": 0.09607,
-            "61": 0.09522,
-            "62": 0.09645,
-            "63": 0.09756,
-            "64": 0.09502,
-            "65": 0.09541,
-            "66": 0.09681,
-            "67": 0.09707,
-            "68": 0.09483,
-            "69": 0.09531,
-            "70": 0.0962,
-            "71": 0.09572,
-            "72": 0.09677,
-            "73": 0.09704,
-            "74": 0.09624,
-            "75": 0.09474,
-            "76": 0.09532,
-            "77": 0.09678,
-            "78": 0.09534,
-            "79": 0.09817,
-            "80": 0.09669,
-            "81": 0.09724,
-            "82": 0.09754,
-            "83": 0.09837,
-            "84": 0.09528,
-            "85": 0.09597,
-            "86": 0.09653,
-            "87": 0.09565,
-            "88": 0.0961,
-            "89": 0.09685,
-            "90": 0.0967,
-            "91": 0.0944,
-            "92": 0.09565,
-            "93": 0.09526,
-            "94": 0.09573,
-            "95": 0.09396,
-            "96": 0.09557,
-            "97": 0.09618,
-            "98": 0.0957,
-            "99": 0.09558,
-            "100": 0.09514
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index 50639a30816..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.85599,
-            "2": 10.8648,
-            "3": 10.87042,
-            "4": 10.85288,
-            "5": 10.88397,
-            "6": 10.89184,
-            "7": 10.86732,
-            "8": 10.87057,
-            "9": 10.87432,
-            "10": 10.84185,
-            "11": 10.87989,
-            "12": 10.87417,
-            "13": 10.87884,
-            "14": 10.89184,
-            "15": 10.82659,
-            "16": 10.83027,
-            "17": 10.80933,
-            "18": 10.81431,
-            "19": 10.8167,
-            "20": 10.72165,
-            "21": 10.70557,
-            "22": 10.56881,
-            "23": 10.72025,
-            "24": 10.61194,
-            "25": 10.55765,
-            "26": 10.61149,
-            "27": 10.62635,
-            "28": 10.57155,
-            "29": 10.58212,
-            "30": 10.36267,
-            "31": 10.11682,
-            "32": 10.4682,
-            "33": 10.45411,
-            "34": 10.21121,
-            "35": 10.27207,
-            "36": 10.22246,
-            "37": 10.34079,
-            "38": 10.18964,
-            "39": 10.40228,
-            "40": 10.08758,
-            "41": 10.13714,
-            "42": 10.21175,
-            "43": 9.82878,
-            "44": 9.96255,
-            "45": 9.82846,
-            "46": 9.80952,
-            "47": 10.13734,
-            "48": 9.84349,
-            "49": 9.52888,
-            "50": 9.91046,
-            "51": 9.85075,
-            "52": 9.73181,
-            "53": 10.06388,
-            "54": 9.95432,
-            "55": 9.87204,
-            "56": 9.61823,
-            "57": 9.47467,
-            "58": 9.82802,
-            "59": 9.57962,
-            "60": 9.49074,
-            "61": 9.68473,
-            "62": 9.99245,
-            "63": 9.38364,
-            "64": 9.77766,
-            "65": 8.94008,
-            "66": 9.70099,
-            "67": 9.3605,
-            "68": 9.77766,
-            "69": 9.78865,
-            "70": 9.73813,
-            "71": 9.61811,
-            "72": 9.58068,
-            "73": 9.4964,
-            "74": 8.93812,
-            "75": 9.42081,
-            "76": 9.07416,
-            "77": 10.06077,
-            "78": 9.71952,
-            "79": 9.37088,
-            "80": 9.39874,
-            "81": 9.47802,
-            "82": 9.69299,
-            "83": 9.30276,
-            "84": 9.41548,
-            "85": 9.60883,
-            "86": 9.07461,
-            "87": 9.58826,
-            "88": 9.74392,
-            "89": 9.5951,
-            "90": 9.81217,
-            "91": 9.33796,
-            "92": 9.3534,
-            "93": 9.07315,
-            "94": 8.83127,
-            "95": 9.51524,
-            "96": 9.52183,
-            "97": 9.31012,
-            "98": 9.66532,
-            "99": 8.88179,
-            "100": 9.39375
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1640.0,
-            "2": 1738.0,
-            "3": 1638.0,
-            "4": 1810.0,
-            "5": 1755.0,
-            "6": 1681.0,
-            "7": 1781.0,
-            "8": 1502.0,
-            "9": 1817.0,
-            "10": 1394.0,
-            "11": 1927.0,
-            "12": 1691.0,
-            "13": 1901.0,
-            "14": 1631.0,
-            "15": 1765.0,
-            "16": 1864.0,
-            "17": 1704.0,
-            "18": 1771.0,
-            "19": 1817.0,
-            "20": 1831.0,
-            "21": 1813.0,
-            "22": 1673.0,
-            "23": 2005.0,
-            "24": 1553.0,
-            "25": 1577.0,
-            "26": 1656.0,
-            "27": 1734.0,
-            "28": 1896.0,
-            "29": 2051.0,
-            "30": 1897.0,
-            "31": 1452.0,
-            "32": 1785.0,
-            "33": 2061.0,
-            "34": 1857.0,
-            "35": 1920.0,
-            "36": 1990.0,
-            "37": 2191.0,
-            "38": 2142.0,
-            "39": 2215.0,
-            "40": 2166.0,
-            "41": 2154.0,
-            "42": 2148.0,
-            "43": 1881.0,
-            "44": 2066.0,
-            "45": 1952.0,
-            "46": 2217.0,
-            "47": 2513.0,
-            "48": 2356.0,
-            "49": 2294.0,
-            "50": 2140.0,
-            "51": 2509.0,
-            "52": 2528.0,
-            "53": 2851.0,
-            "54": 2747.0,
-            "55": 2333.0,
-            "56": 2724.0,
-            "57": 2315.0,
-            "58": 2754.0,
-            "59": 2774.0,
-            "60": 2336.0,
-            "61": 2912.0,
-            "62": 2415.0,
-            "63": 2341.0,
-            "64": 2837.0,
-            "65": 2661.0,
-            "66": 3000.0,
-            "67": 2779.0,
-            "68": 2691.0,
-            "69": 2793.0,
-            "70": 3183.0,
-            "71": 2962.0,
-            "72": 2393.0,
-            "73": 2997.0,
-            "74": 1935.0,
-            "75": 2463.0,
-            "76": 3065.0,
-            "77": 3184.0,
-            "78": 3154.0,
-            "79": 3127.0,
-            "80": 3286.0,
-            "81": 3386.0,
-            "82": 3128.0,
-            "83": 2608.0,
-            "84": 3079.0,
-            "85": 3260.0,
-            "86": 2687.0,
-            "87": 3591.0,
-            "88": 3035.0,
-            "89": 3165.0,
-            "90": 3166.0,
-            "91": 2690.0,
-            "92": 2897.0,
-            "93": 2630.0,
-            "94": 3348.0,
-            "95": 3349.0,
-            "96": 3288.0,
-            "97": 3055.0,
-            "98": 3516.0,
-            "99": 3035.0,
-            "100": 3109.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 746194432.0,
-            "2": 746194432.0,
-            "3": 746194432.0,
-            "4": 746194432.0,
-            "5": 746194432.0,
-            "6": 746194432.0,
-            "7": 746194432.0,
-            "8": 746194432.0,
-            "9": 746194432.0,
-            "10": 746194432.0,
-            "11": 746194432.0,
-            "12": 746194432.0,
-            "13": 746194432.0,
-            "14": 746194432.0,
-            "15": 746194432.0,
-            "16": 746194432.0,
-            "17": 746194432.0,
-            "18": 746194432.0,
-            "19": 746194432.0,
-            "20": 746194432.0,
-            "21": 746194432.0,
-            "22": 746194432.0,
-            "23": 746194432.0,
-            "24": 746194432.0,
-            "25": 746194432.0,
-            "26": 746194432.0,
-            "27": 746194432.0,
-            "28": 746194432.0,
-            "29": 746194432.0,
-            "30": 746194432.0,
-            "31": 746194432.0,
-            "32": 746194432.0,
-            "33": 746194432.0,
-            "34": 746194432.0,
-            "35": 746194432.0,
-            "36": 746194432.0,
-            "37": 746194432.0,
-            "38": 746194432.0,
-            "39": 746194432.0,
-            "40": 746194432.0,
-            "41": 746194432.0,
-            "42": 746194432.0,
-            "43": 746194432.0,
-            "44": 746194432.0,
-            "45": 746194432.0,
-            "46": 746194432.0,
-            "47": 746194432.0,
-            "48": 746194432.0,
-            "49": 746194432.0,
-            "50": 746194432.0,
-            "51": 746194432.0,
-            "52": 746194432.0,
-            "53": 746194432.0,
-            "54": 746194432.0,
-            "55": 746194432.0,
-            "56": 746194432.0,
-            "57": 746194432.0,
-            "58": 746194432.0,
-            "59": 746194432.0,
-            "60": 746194432.0,
-            "61": 746194432.0,
-            "62": 746194432.0,
-            "63": 746194432.0,
-            "64": 746194432.0,
-            "65": 746194432.0,
-            "66": 746194432.0,
-            "67": 746194432.0,
-            "68": 746194432.0,
-            "69": 746194432.0,
-            "70": 746194432.0,
-            "71": 746194432.0,
-            "72": 746194432.0,
-            "73": 746194432.0,
-            "74": 746194432.0,
-            "75": 746194432.0,
-            "76": 746194432.0,
-            "77": 746194432.0,
-            "78": 746194432.0,
-            "79": 746194432.0,
-            "80": 746194432.0,
-            "81": 746194432.0,
-            "82": 746194432.0,
-            "83": 746194432.0,
-            "84": 746194432.0,
-            "85": 746194432.0,
-            "86": 746194432.0,
-            "87": 746194432.0,
-            "88": 746194432.0,
-            "89": 746194432.0,
-            "90": 746194432.0,
-            "91": 746194432.0,
-            "92": 746194432.0,
-            "93": 746194432.0,
-            "94": 746194432.0,
-            "95": 746194432.0,
-            "96": 746194432.0,
-            "97": 746194432.0,
-            "98": 746194432.0,
-            "99": 746194432.0,
-            "100": 746194432.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1926153216.0,
-            "2": 2209851392.0,
-            "3": 2209851392.0,
-            "4": 2209851392.0,
-            "5": 2209851392.0,
-            "6": 2209851392.0,
-            "7": 2209851392.0,
-            "8": 2209851392.0,
-            "9": 2209851392.0,
-            "10": 2209851392.0,
-            "11": 2209851392.0,
-            "12": 2209851392.0,
-            "13": 2209851392.0,
-            "14": 2209851392.0,
-            "15": 2209851392.0,
-            "16": 2209851392.0,
-            "17": 2209851392.0,
-            "18": 2209851392.0,
-            "19": 2209851392.0,
-            "20": 2209851392.0,
-            "21": 2209851392.0,
-            "22": 2209851392.0,
-            "23": 2209851392.0,
-            "24": 2209851392.0,
-            "25": 2209851392.0,
-            "26": 2209851392.0,
-            "27": 2209851392.0,
-            "28": 2209851392.0,
-            "29": 2209851392.0,
-            "30": 2209851392.0,
-            "31": 2209851392.0,
-            "32": 2209851392.0,
-            "33": 2209851392.0,
-            "34": 2209851392.0,
-            "35": 2209851392.0,
-            "36": 2209851392.0,
-            "37": 2209851392.0,
-            "38": 2209851392.0,
-            "39": 2209851392.0,
-            "40": 2209851392.0,
-            "41": 2209851392.0,
-            "42": 2209851392.0,
-            "43": 2209851392.0,
-            "44": 2209851392.0,
-            "45": 2209851392.0,
-            "46": 2209851392.0,
-            "47": 2209851392.0,
-            "48": 2209851392.0,
-            "49": 2209851392.0,
-            "50": 2209851392.0,
-            "51": 2209851392.0,
-            "52": 2209851392.0,
-            "53": 2209851392.0,
-            "54": 2209851392.0,
-            "55": 2209851392.0,
-            "56": 2209851392.0,
-            "57": 2209851392.0,
-            "58": 2209851392.0,
-            "59": 2209851392.0,
-            "60": 2209851392.0,
-            "61": 2209851392.0,
-            "62": 2209851392.0,
-            "63": 2209851392.0,
-            "64": 2209851392.0,
-            "65": 2209851392.0,
-            "66": 2209851392.0,
-            "67": 2209851392.0,
-            "68": 2209851392.0,
-            "69": 2209851392.0,
-            "70": 2209851392.0,
-            "71": 2209851392.0,
-            "72": 2209851392.0,
-            "73": 2209851392.0,
-            "74": 2209851392.0,
-            "75": 2209851392.0,
-            "76": 2209851392.0,
-            "77": 2209851392.0,
-            "78": 2209851392.0,
-            "79": 2209851392.0,
-            "80": 2209851392.0,
-            "81": 2209851392.0,
-            "82": 2209851392.0,
-            "83": 2209851392.0,
-            "84": 2209851392.0,
-            "85": 2209851392.0,
-            "86": 2209851392.0,
-            "87": 2209851392.0,
-            "88": 2209851392.0,
-            "89": 2209851392.0,
-            "90": 2209851392.0,
-            "91": 2209851392.0,
-            "92": 2209851392.0,
-            "93": 2209851392.0,
-            "94": 2209851392.0,
-            "95": 2209851392.0,
-            "96": 2209851392.0,
-            "97": 2209851392.0,
-            "98": 2209851392.0,
-            "99": 2209851392.0,
-            "100": 2209851392.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 12.88983,
-            "2": 0.12288,
-            "3": 0.10944,
-            "4": 0.10822,
-            "5": 0.10919,
-            "6": 0.10835,
-            "7": 0.11035,
-            "8": 0.10879,
-            "9": 0.11001,
-            "10": 0.11009,
-            "11": 0.10945,
-            "12": 0.10868,
-            "13": 0.1086,
-            "14": 0.10899,
-            "15": 0.10852,
-            "16": 0.10822,
-            "17": 0.10818,
-            "18": 0.10877,
-            "19": 0.10888,
-            "20": 0.10828,
-            "21": 0.109,
-            "22": 0.108,
-            "23": 0.10722,
-            "24": 0.10731,
-            "25": 0.1075,
-            "26": 0.10744,
-            "27": 0.10843,
-            "28": 0.10831,
-            "29": 0.10841,
-            "30": 0.10718,
-            "31": 0.10837,
-            "32": 0.10773,
-            "33": 0.10792,
-            "34": 0.10698,
-            "35": 0.10976,
-            "36": 0.10758,
-            "37": 0.10825,
-            "38": 0.10781,
-            "39": 0.10912,
-            "40": 0.10847,
-            "41": 0.10786,
-            "42": 0.10767,
-            "43": 0.10761,
-            "44": 0.1076,
-            "45": 0.1078,
-            "46": 0.10992,
-            "47": 0.1061,
-            "48": 0.10654,
-            "49": 0.10566,
-            "50": 0.1066,
-            "51": 0.11234,
-            "52": 0.11065,
-            "53": 0.10795,
-            "54": 0.10668,
-            "55": 0.10678,
-            "56": 0.10889,
-            "57": 0.10802,
-            "58": 0.12482,
-            "59": 0.10666,
-            "60": 0.10637,
-            "61": 0.10776,
-            "62": 0.10743,
-            "63": 0.10782,
-            "64": 0.10634,
-            "65": 0.10744,
-            "66": 0.10859,
-            "67": 0.10949,
-            "68": 0.1075,
-            "69": 0.10803,
-            "70": 0.10688,
-            "71": 0.10797,
-            "72": 0.10752,
-            "73": 0.10816,
-            "74": 0.10734,
-            "75": 0.10832,
-            "76": 0.10815,
-            "77": 0.10868,
-            "78": 0.10839,
-            "79": 0.1074,
-            "80": 0.10866,
-            "81": 0.11122,
-            "82": 0.11035,
-            "83": 0.1101,
-            "84": 0.1122,
-            "85": 0.10866,
-            "86": 0.10915,
-            "87": 0.10842,
-            "88": 0.10723,
-            "89": 0.10849,
-            "90": 0.10814,
-            "91": 0.10833,
-            "92": 0.10719,
-            "93": 0.10725,
-            "94": 0.10754,
-            "95": 0.10758,
-            "96": 0.1082,
-            "97": 0.10768,
-            "98": 0.10708,
-            "99": 0.10785,
-            "100": 0.10841
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu/golden_values_dev_dgx_h100.json
index 732eb3335b2..245c396be68 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu/golden_values_dev_dgx_h100.json
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 15.52736,
-            "2": 0.14752,
-            "3": 0.12429,
-            "4": 0.12037,
-            "5": 0.12096,
-            "6": 0.11965,
-            "7": 0.1198,
-            "8": 0.12021,
-            "9": 0.12041,
-            "10": 0.12377,
-            "11": 0.11828,
-            "12": 0.11903,
-            "13": 0.12052,
-            "14": 0.11683,
-            "15": 0.1179,
-            "16": 0.1185,
-            "17": 0.1178,
-            "18": 0.12085,
-            "19": 0.11844,
-            "20": 0.11779,
-            "21": 0.11689,
-            "22": 0.11623,
-            "23": 0.11674,
-            "24": 0.11908,
-            "25": 0.11762,
-            "26": 0.11952,
-            "27": 0.11831,
-            "28": 0.11712,
-            "29": 0.11898,
-            "30": 0.11914,
-            "31": 0.11719,
-            "32": 0.11849,
-            "33": 0.1193,
-            "34": 0.11601,
-            "35": 0.1215,
-            "36": 0.11653,
-            "37": 0.11596,
-            "38": 0.11751,
-            "39": 0.1194,
-            "40": 0.11662,
-            "41": 0.11896,
-            "42": 0.11624,
-            "43": 0.11775,
-            "44": 0.11757,
-            "45": 0.11618,
-            "46": 0.1194,
-            "47": 0.11754,
-            "48": 0.11775,
-            "49": 0.11637,
-            "50": 0.11524,
-            "51": 0.14043,
-            "52": 0.12567,
-            "53": 0.12158,
-            "54": 0.1217,
-            "55": 0.15002,
-            "56": 0.11858,
-            "57": 0.11887,
-            "58": 0.11705,
-            "59": 0.11599,
-            "60": 0.11585,
-            "61": 0.11429,
-            "62": 0.11598,
-            "63": 0.116,
-            "64": 0.11878,
-            "65": 0.11921,
-            "66": 0.11734,
-            "67": 0.11708,
-            "68": 0.11543,
-            "69": 0.11703,
-            "70": 0.11514,
-            "71": 0.1178,
-            "72": 0.1154,
-            "73": 0.12116,
-            "74": 0.12077,
-            "75": 0.1166,
-            "76": 0.11599,
-            "77": 0.11628,
-            "78": 0.11749,
-            "79": 0.11828,
-            "80": 0.12013,
-            "81": 0.11887,
-            "82": 0.1195,
-            "83": 0.11685,
-            "84": 0.11603,
-            "85": 0.11434,
-            "86": 0.11762,
-            "87": 0.11821,
-            "88": 0.12276,
-            "89": 0.12384,
-            "90": 0.11892,
-            "91": 0.11831,
-            "92": 0.11619,
-            "93": 0.11613,
-            "94": 0.11455,
-            "95": 0.1172,
-            "96": 0.11583,
-            "97": 0.11939,
-            "98": 0.11877,
-            "99": 0.11703,
-            "100": 0.12143
+            "1": 15.33061,
+            "2": 0.15156,
+            "3": 0.12174,
+            "4": 0.12197,
+            "5": 0.12023,
+            "6": 0.11997,
+            "7": 0.11882,
+            "8": 0.11859,
+            "9": 0.11967,
+            "10": 0.11724,
+            "11": 0.11735,
+            "12": 0.11593,
+            "13": 0.11661,
+            "14": 0.11794,
+            "15": 0.11649,
+            "16": 0.11682,
+            "17": 0.11623,
+            "18": 0.11719,
+            "19": 0.11753,
+            "20": 0.11581,
+            "21": 0.11757,
+            "22": 0.11628,
+            "23": 0.11692,
+            "24": 0.1163,
+            "25": 0.1167,
+            "26": 0.11646,
+            "27": 0.11803,
+            "28": 0.11984,
+            "29": 0.11941,
+            "30": 0.11857,
+            "31": 0.11687,
+            "32": 0.11515,
+            "33": 0.11754,
+            "34": 0.11591,
+            "35": 0.11819,
+            "36": 0.11754,
+            "37": 0.11694,
+            "38": 0.11726,
+            "39": 0.11761,
+            "40": 0.11745,
+            "41": 0.11768,
+            "42": 0.11775,
+            "43": 0.11661,
+            "44": 0.11724,
+            "45": 0.1189,
+            "46": 0.11964,
+            "47": 0.11985,
+            "48": 0.12086,
+            "49": 0.11855,
+            "50": 0.11941,
+            "51": 0.13155,
+            "52": 0.12627,
+            "53": 0.12132,
+            "54": 0.12027,
+            "55": 0.12076,
+            "56": 0.14178,
+            "57": 0.12294,
+            "58": 0.12155,
+            "59": 0.11843,
+            "60": 0.11687,
+            "61": 0.11827,
+            "62": 0.11957,
+            "63": 0.11945,
+            "64": 0.11781,
+            "65": 0.12041,
+            "66": 0.11949,
+            "67": 0.12059,
+            "68": 0.11821,
+            "69": 0.11858,
+            "70": 0.11799,
+            "71": 0.12009,
+            "72": 0.12095,
+            "73": 0.11845,
+            "74": 0.11834,
+            "75": 0.11893,
+            "76": 0.1214,
+            "77": 0.1195,
+            "78": 0.11933,
+            "79": 0.11885,
+            "80": 0.11948,
+            "81": 0.12097,
+            "82": 0.12,
+            "83": 0.11954,
+            "84": 0.11693,
+            "85": 0.1175,
+            "86": 0.11941,
+            "87": 0.11723,
+            "88": 0.11941,
+            "89": 0.11804,
+            "90": 0.11751,
+            "91": 0.11952,
+            "92": 0.11778,
+            "93": 0.11924,
+            "94": 0.11755,
+            "95": 0.11789,
+            "96": 0.11673,
+            "97": 0.11967,
+            "98": 0.11752,
+            "99": 0.11926,
+            "100": 0.11806
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 5147f8fd670..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.76985,
-            "2": 10.81791,
-            "3": 10.784,
-            "4": 10.788,
-            "5": 10.81927,
-            "6": 10.84306,
-            "7": 10.83464,
-            "8": 10.8066,
-            "9": 10.83359,
-            "10": 10.73562,
-            "11": 10.86814,
-            "12": 10.85075,
-            "13": 10.84505,
-            "14": 10.87136,
-            "15": 10.8218,
-            "16": 10.80433,
-            "17": 10.76124,
-            "18": 10.80363,
-            "19": 10.80599,
-            "20": 10.74747,
-            "21": 10.7254,
-            "22": 10.60597,
-            "23": 10.74387,
-            "24": 10.65549,
-            "25": 10.58002,
-            "26": 10.64496,
-            "27": 10.67191,
-            "28": 10.66903,
-            "29": 10.66652,
-            "30": 10.46947,
-            "31": 10.26264,
-            "32": 10.56932,
-            "33": 10.54232,
-            "34": 10.36113,
-            "35": 10.39558,
-            "36": 10.36866,
-            "37": 10.47523,
-            "38": 10.33715,
-            "39": 10.49947,
-            "40": 10.23019,
-            "41": 10.30905,
-            "42": 10.33124,
-            "43": 9.99091,
-            "44": 10.09605,
-            "45": 10.00787,
-            "46": 9.96718,
-            "47": 10.27077,
-            "48": 10.01043,
-            "49": 9.73437,
-            "50": 10.04737,
-            "51": 10.00084,
-            "52": 9.89672,
-            "53": 10.19876,
-            "54": 10.09066,
-            "55": 10.00567,
-            "56": 9.77199,
-            "57": 9.64533,
-            "58": 9.98587,
-            "59": 9.72608,
-            "60": 9.6777,
-            "61": 9.8157,
-            "62": 10.092,
-            "63": 9.54758,
-            "64": 9.90438,
-            "65": 9.09492,
-            "66": 9.84068,
-            "67": 9.48471,
-            "68": 9.88996,
-            "69": 9.87691,
-            "70": 9.85294,
-            "71": 9.73278,
-            "72": 9.72558,
-            "73": 9.63706,
-            "74": 9.12334,
-            "75": 9.55335,
-            "76": 9.21765,
-            "77": 10.15202,
-            "78": 9.81465,
-            "79": 9.47558,
-            "80": 9.52073,
-            "81": 9.5872,
-            "82": 9.79125,
-            "83": 9.44848,
-            "84": 9.49585,
-            "85": 9.72189,
-            "86": 9.18037,
-            "87": 9.66127,
-            "88": 9.84359,
-            "89": 9.71651,
-            "90": 9.88102,
-            "91": 9.48434,
-            "92": 9.4705,
-            "93": 9.20911,
-            "94": 8.95382,
-            "95": 9.60554,
-            "96": 9.63976,
-            "97": 9.38762,
-            "98": 9.7573,
-            "99": 9.0159,
-            "100": 9.49925
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 2680.0,
-            "2": 2615.0,
-            "3": 2642.0,
-            "4": 2479.0,
-            "5": 2971.0,
-            "6": 2822.0,
-            "7": 2833.0,
-            "8": 2508.0,
-            "9": 2922.0,
-            "10": 2508.0,
-            "11": 2917.0,
-            "12": 2817.0,
-            "13": 2935.0,
-            "14": 2969.0,
-            "15": 2679.0,
-            "16": 2976.0,
-            "17": 2609.0,
-            "18": 2868.0,
-            "19": 2790.0,
-            "20": 2461.0,
-            "21": 2636.0,
-            "22": 2356.0,
-            "23": 2798.0,
-            "24": 2613.0,
-            "25": 2640.0,
-            "26": 2701.0,
-            "27": 2761.0,
-            "28": 2801.0,
-            "29": 2971.0,
-            "30": 2590.0,
-            "31": 2307.0,
-            "32": 2751.0,
-            "33": 2881.0,
-            "34": 2352.0,
-            "35": 2480.0,
-            "36": 2443.0,
-            "37": 2748.0,
-            "38": 2692.0,
-            "39": 2709.0,
-            "40": 2570.0,
-            "41": 2752.0,
-            "42": 2689.0,
-            "43": 2381.0,
-            "44": 2483.0,
-            "45": 2397.0,
-            "46": 2281.0,
-            "47": 2684.0,
-            "48": 2330.0,
-            "49": 2293.0,
-            "50": 2740.0,
-            "51": 2575.0,
-            "52": 2621.0,
-            "53": 2891.0,
-            "54": 2655.0,
-            "55": 2559.0,
-            "56": 2566.0,
-            "57": 2471.0,
-            "58": 2767.0,
-            "59": 2529.0,
-            "60": 2289.0,
-            "61": 2642.0,
-            "62": 2820.0,
-            "63": 2654.0,
-            "64": 3020.0,
-            "65": 2687.0,
-            "66": 2884.0,
-            "67": 2666.0,
-            "68": 2720.0,
-            "69": 2738.0,
-            "70": 3004.0,
-            "71": 2816.0,
-            "72": 2537.0,
-            "73": 2826.0,
-            "74": 2192.0,
-            "75": 2647.0,
-            "76": 3048.0,
-            "77": 3019.0,
-            "78": 3134.0,
-            "79": 3092.0,
-            "80": 3054.0,
-            "81": 3298.0,
-            "82": 3350.0,
-            "83": 2597.0,
-            "84": 3436.0,
-            "85": 3350.0,
-            "86": 2993.0,
-            "87": 3509.0,
-            "88": 3403.0,
-            "89": 3490.0,
-            "90": 3368.0,
-            "91": 2461.0,
-            "92": 2803.0,
-            "93": 2933.0,
-            "94": 2888.0,
-            "95": 3138.0,
-            "96": 3047.0,
-            "97": 3016.0,
-            "98": 3382.0,
-            "99": 2995.0,
-            "100": 2490.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 745731584.0,
-            "2": 745731584.0,
-            "3": 745731584.0,
-            "4": 745731584.0,
-            "5": 745731584.0,
-            "6": 745731584.0,
-            "7": 745731584.0,
-            "8": 745731584.0,
-            "9": 745731584.0,
-            "10": 745731584.0,
-            "11": 745731584.0,
-            "12": 745731584.0,
-            "13": 745731584.0,
-            "14": 745731584.0,
-            "15": 745731584.0,
-            "16": 745731584.0,
-            "17": 745731584.0,
-            "18": 745731584.0,
-            "19": 745731584.0,
-            "20": 745731584.0,
-            "21": 745731584.0,
-            "22": 745731584.0,
-            "23": 745731584.0,
-            "24": 745731584.0,
-            "25": 745731584.0,
-            "26": 745731584.0,
-            "27": 745731584.0,
-            "28": 745731584.0,
-            "29": 745731584.0,
-            "30": 745731584.0,
-            "31": 745731584.0,
-            "32": 745731584.0,
-            "33": 745731584.0,
-            "34": 745731584.0,
-            "35": 745731584.0,
-            "36": 745731584.0,
-            "37": 745731584.0,
-            "38": 745731584.0,
-            "39": 745731584.0,
-            "40": 745731584.0,
-            "41": 745731584.0,
-            "42": 745731584.0,
-            "43": 745731584.0,
-            "44": 745731584.0,
-            "45": 745731584.0,
-            "46": 745731584.0,
-            "47": 745731584.0,
-            "48": 745731584.0,
-            "49": 745731584.0,
-            "50": 745731584.0,
-            "51": 745731584.0,
-            "52": 745731584.0,
-            "53": 745731584.0,
-            "54": 745731584.0,
-            "55": 745731584.0,
-            "56": 745731584.0,
-            "57": 745731584.0,
-            "58": 745731584.0,
-            "59": 745731584.0,
-            "60": 745731584.0,
-            "61": 745731584.0,
-            "62": 745731584.0,
-            "63": 745731584.0,
-            "64": 745731584.0,
-            "65": 745731584.0,
-            "66": 745731584.0,
-            "67": 745731584.0,
-            "68": 745731584.0,
-            "69": 745731584.0,
-            "70": 745731584.0,
-            "71": 745731584.0,
-            "72": 745731584.0,
-            "73": 745731584.0,
-            "74": 745731584.0,
-            "75": 745731584.0,
-            "76": 745731584.0,
-            "77": 745731584.0,
-            "78": 745731584.0,
-            "79": 745731584.0,
-            "80": 745731584.0,
-            "81": 745731584.0,
-            "82": 745731584.0,
-            "83": 745731584.0,
-            "84": 745731584.0,
-            "85": 745731584.0,
-            "86": 745731584.0,
-            "87": 745731584.0,
-            "88": 745731584.0,
-            "89": 745731584.0,
-            "90": 745731584.0,
-            "91": 745731584.0,
-            "92": 745731584.0,
-            "93": 745731584.0,
-            "94": 745731584.0,
-            "95": 745731584.0,
-            "96": 745731584.0,
-            "97": 745731584.0,
-            "98": 745731584.0,
-            "99": 745731584.0,
-            "100": 745731584.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1928906752.0,
-            "2": 2210568192.0,
-            "3": 2210568192.0,
-            "4": 2210568192.0,
-            "5": 2210568192.0,
-            "6": 2210568192.0,
-            "7": 2210568192.0,
-            "8": 2210568192.0,
-            "9": 2210568192.0,
-            "10": 2210568192.0,
-            "11": 2210568192.0,
-            "12": 2210568192.0,
-            "13": 2210568192.0,
-            "14": 2210568192.0,
-            "15": 2210568192.0,
-            "16": 2210568192.0,
-            "17": 2210568192.0,
-            "18": 2210568192.0,
-            "19": 2210568192.0,
-            "20": 2210568192.0,
-            "21": 2210568192.0,
-            "22": 2210568192.0,
-            "23": 2210568192.0,
-            "24": 2210568192.0,
-            "25": 2210568192.0,
-            "26": 2210568192.0,
-            "27": 2210568192.0,
-            "28": 2210568192.0,
-            "29": 2210568192.0,
-            "30": 2210568192.0,
-            "31": 2210568192.0,
-            "32": 2210568192.0,
-            "33": 2210568192.0,
-            "34": 2210568192.0,
-            "35": 2210568192.0,
-            "36": 2210568192.0,
-            "37": 2210568192.0,
-            "38": 2210568192.0,
-            "39": 2210568192.0,
-            "40": 2210568192.0,
-            "41": 2210568192.0,
-            "42": 2210568192.0,
-            "43": 2210568192.0,
-            "44": 2210568192.0,
-            "45": 2210568192.0,
-            "46": 2210568192.0,
-            "47": 2210568192.0,
-            "48": 2210568192.0,
-            "49": 2210568192.0,
-            "50": 2210568192.0,
-            "51": 2210568192.0,
-            "52": 2210568192.0,
-            "53": 2210568192.0,
-            "54": 2210568192.0,
-            "55": 2210568192.0,
-            "56": 2210568192.0,
-            "57": 2210568192.0,
-            "58": 2210568192.0,
-            "59": 2210568192.0,
-            "60": 2210568192.0,
-            "61": 2210568192.0,
-            "62": 2210568192.0,
-            "63": 2210568192.0,
-            "64": 2210568192.0,
-            "65": 2210568192.0,
-            "66": 2210568192.0,
-            "67": 2210568192.0,
-            "68": 2210568192.0,
-            "69": 2210568192.0,
-            "70": 2210568192.0,
-            "71": 2210568192.0,
-            "72": 2210568192.0,
-            "73": 2210568192.0,
-            "74": 2210568192.0,
-            "75": 2210568192.0,
-            "76": 2210568192.0,
-            "77": 2210568192.0,
-            "78": 2210568192.0,
-            "79": 2210568192.0,
-            "80": 2210568192.0,
-            "81": 2210568192.0,
-            "82": 2210568192.0,
-            "83": 2210568192.0,
-            "84": 2210568192.0,
-            "85": 2210568192.0,
-            "86": 2210568192.0,
-            "87": 2210568192.0,
-            "88": 2210568192.0,
-            "89": 2210568192.0,
-            "90": 2210568192.0,
-            "91": 2210568192.0,
-            "92": 2210568192.0,
-            "93": 2210568192.0,
-            "94": 2210568192.0,
-            "95": 2210568192.0,
-            "96": 2210568192.0,
-            "97": 2210568192.0,
-            "98": 2210568192.0,
-            "99": 2210568192.0,
-            "100": 2210568192.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 14.07236,
-            "2": 0.1439,
-            "3": 0.10617,
-            "4": 0.10423,
-            "5": 0.10661,
-            "6": 0.10547,
-            "7": 0.10337,
-            "8": 0.10254,
-            "9": 0.10285,
-            "10": 0.10538,
-            "11": 0.10211,
-            "12": 0.10209,
-            "13": 0.10172,
-            "14": 0.10352,
-            "15": 0.10417,
-            "16": 0.10185,
-            "17": 0.10199,
-            "18": 0.10179,
-            "19": 0.10297,
-            "20": 0.1054,
-            "21": 0.1025,
-            "22": 0.10172,
-            "23": 0.10344,
-            "24": 0.10371,
-            "25": 0.10166,
-            "26": 0.10183,
-            "27": 0.10449,
-            "28": 0.10545,
-            "29": 0.10167,
-            "30": 0.10337,
-            "31": 0.10277,
-            "32": 0.10385,
-            "33": 0.10255,
-            "34": 0.10441,
-            "35": 0.10202,
-            "36": 0.10215,
-            "37": 0.10277,
-            "38": 0.10448,
-            "39": 0.10501,
-            "40": 0.10325,
-            "41": 0.1085,
-            "42": 0.10236,
-            "43": 0.10413,
-            "44": 0.106,
-            "45": 0.10424,
-            "46": 0.10394,
-            "47": 0.1034,
-            "48": 0.10504,
-            "49": 0.10449,
-            "50": 0.10267,
-            "51": 0.12806,
-            "52": 0.11548,
-            "53": 0.11073,
-            "54": 0.1334,
-            "55": 0.10772,
-            "56": 0.11009,
-            "57": 0.10972,
-            "58": 0.1102,
-            "59": 0.11446,
-            "60": 0.11073,
-            "61": 0.10863,
-            "62": 0.10838,
-            "63": 0.10921,
-            "64": 0.10822,
-            "65": 0.11173,
-            "66": 0.1072,
-            "67": 0.10938,
-            "68": 0.1065,
-            "69": 0.10824,
-            "70": 0.10675,
-            "71": 0.10695,
-            "72": 0.10752,
-            "73": 0.10679,
-            "74": 0.10848,
-            "75": 0.1071,
-            "76": 0.10649,
-            "77": 0.1042,
-            "78": 0.10173,
-            "79": 0.10326,
-            "80": 0.10215,
-            "81": 0.10267,
-            "82": 0.10344,
-            "83": 0.10345,
-            "84": 0.10379,
-            "85": 0.10264,
-            "86": 0.1045,
-            "87": 0.10535,
-            "88": 0.10336,
-            "89": 0.1083,
-            "90": 0.10383,
-            "91": 0.10217,
-            "92": 0.10152,
-            "93": 0.10202,
-            "94": 0.10212,
-            "95": 0.10185,
-            "96": 0.10273,
-            "97": 0.10301,
-            "98": 0.10313,
-            "99": 0.10255,
-            "100": 0.1027
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index 245c396be68..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.76985,
-            "2": 10.81791,
-            "3": 10.784,
-            "4": 10.788,
-            "5": 10.81927,
-            "6": 10.84306,
-            "7": 10.83464,
-            "8": 10.8066,
-            "9": 10.83359,
-            "10": 10.73562,
-            "11": 10.86814,
-            "12": 10.85075,
-            "13": 10.84505,
-            "14": 10.87136,
-            "15": 10.8218,
-            "16": 10.80433,
-            "17": 10.76124,
-            "18": 10.80363,
-            "19": 10.80599,
-            "20": 10.74747,
-            "21": 10.7254,
-            "22": 10.60597,
-            "23": 10.74387,
-            "24": 10.65549,
-            "25": 10.58002,
-            "26": 10.64496,
-            "27": 10.67191,
-            "28": 10.66903,
-            "29": 10.66652,
-            "30": 10.46947,
-            "31": 10.26264,
-            "32": 10.56932,
-            "33": 10.54232,
-            "34": 10.36113,
-            "35": 10.39558,
-            "36": 10.36866,
-            "37": 10.47523,
-            "38": 10.33715,
-            "39": 10.49947,
-            "40": 10.23019,
-            "41": 10.30905,
-            "42": 10.33124,
-            "43": 9.99091,
-            "44": 10.09605,
-            "45": 10.00787,
-            "46": 9.96718,
-            "47": 10.27077,
-            "48": 10.01043,
-            "49": 9.73437,
-            "50": 10.04737,
-            "51": 10.00084,
-            "52": 9.89672,
-            "53": 10.19876,
-            "54": 10.09066,
-            "55": 10.00567,
-            "56": 9.77199,
-            "57": 9.64533,
-            "58": 9.98587,
-            "59": 9.72608,
-            "60": 9.6777,
-            "61": 9.8157,
-            "62": 10.092,
-            "63": 9.54758,
-            "64": 9.90438,
-            "65": 9.09492,
-            "66": 9.84068,
-            "67": 9.48471,
-            "68": 9.88996,
-            "69": 9.87691,
-            "70": 9.85294,
-            "71": 9.73278,
-            "72": 9.72558,
-            "73": 9.63706,
-            "74": 9.12334,
-            "75": 9.55335,
-            "76": 9.21765,
-            "77": 10.15202,
-            "78": 9.81465,
-            "79": 9.47558,
-            "80": 9.52073,
-            "81": 9.5872,
-            "82": 9.79125,
-            "83": 9.44848,
-            "84": 9.49585,
-            "85": 9.72189,
-            "86": 9.18037,
-            "87": 9.66127,
-            "88": 9.84359,
-            "89": 9.71651,
-            "90": 9.88102,
-            "91": 9.48434,
-            "92": 9.4705,
-            "93": 9.20911,
-            "94": 8.95382,
-            "95": 9.60554,
-            "96": 9.63976,
-            "97": 9.38762,
-            "98": 9.7573,
-            "99": 9.0159,
-            "100": 9.49925
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 2680.0,
-            "2": 2615.0,
-            "3": 2642.0,
-            "4": 2479.0,
-            "5": 2971.0,
-            "6": 2822.0,
-            "7": 2833.0,
-            "8": 2508.0,
-            "9": 2922.0,
-            "10": 2508.0,
-            "11": 2917.0,
-            "12": 2817.0,
-            "13": 2935.0,
-            "14": 2969.0,
-            "15": 2679.0,
-            "16": 2976.0,
-            "17": 2609.0,
-            "18": 2868.0,
-            "19": 2790.0,
-            "20": 2461.0,
-            "21": 2636.0,
-            "22": 2356.0,
-            "23": 2798.0,
-            "24": 2613.0,
-            "25": 2640.0,
-            "26": 2701.0,
-            "27": 2761.0,
-            "28": 2801.0,
-            "29": 2971.0,
-            "30": 2590.0,
-            "31": 2307.0,
-            "32": 2751.0,
-            "33": 2881.0,
-            "34": 2352.0,
-            "35": 2480.0,
-            "36": 2443.0,
-            "37": 2748.0,
-            "38": 2692.0,
-            "39": 2709.0,
-            "40": 2570.0,
-            "41": 2752.0,
-            "42": 2689.0,
-            "43": 2381.0,
-            "44": 2483.0,
-            "45": 2397.0,
-            "46": 2281.0,
-            "47": 2684.0,
-            "48": 2330.0,
-            "49": 2293.0,
-            "50": 2740.0,
-            "51": 2575.0,
-            "52": 2621.0,
-            "53": 2891.0,
-            "54": 2655.0,
-            "55": 2559.0,
-            "56": 2566.0,
-            "57": 2471.0,
-            "58": 2767.0,
-            "59": 2529.0,
-            "60": 2289.0,
-            "61": 2642.0,
-            "62": 2820.0,
-            "63": 2654.0,
-            "64": 3020.0,
-            "65": 2687.0,
-            "66": 2884.0,
-            "67": 2666.0,
-            "68": 2720.0,
-            "69": 2738.0,
-            "70": 3004.0,
-            "71": 2816.0,
-            "72": 2537.0,
-            "73": 2826.0,
-            "74": 2192.0,
-            "75": 2647.0,
-            "76": 3048.0,
-            "77": 3019.0,
-            "78": 3134.0,
-            "79": 3092.0,
-            "80": 3054.0,
-            "81": 3298.0,
-            "82": 3350.0,
-            "83": 2597.0,
-            "84": 3436.0,
-            "85": 3350.0,
-            "86": 2993.0,
-            "87": 3509.0,
-            "88": 3403.0,
-            "89": 3490.0,
-            "90": 3368.0,
-            "91": 2461.0,
-            "92": 2803.0,
-            "93": 2933.0,
-            "94": 2888.0,
-            "95": 3138.0,
-            "96": 3047.0,
-            "97": 3016.0,
-            "98": 3382.0,
-            "99": 2995.0,
-            "100": 2490.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 745731584.0,
-            "2": 745731584.0,
-            "3": 745731584.0,
-            "4": 745731584.0,
-            "5": 745731584.0,
-            "6": 745731584.0,
-            "7": 745731584.0,
-            "8": 745731584.0,
-            "9": 745731584.0,
-            "10": 745731584.0,
-            "11": 745731584.0,
-            "12": 745731584.0,
-            "13": 745731584.0,
-            "14": 745731584.0,
-            "15": 745731584.0,
-            "16": 745731584.0,
-            "17": 745731584.0,
-            "18": 745731584.0,
-            "19": 745731584.0,
-            "20": 745731584.0,
-            "21": 745731584.0,
-            "22": 745731584.0,
-            "23": 745731584.0,
-            "24": 745731584.0,
-            "25": 745731584.0,
-            "26": 745731584.0,
-            "27": 745731584.0,
-            "28": 745731584.0,
-            "29": 745731584.0,
-            "30": 745731584.0,
-            "31": 745731584.0,
-            "32": 745731584.0,
-            "33": 745731584.0,
-            "34": 745731584.0,
-            "35": 745731584.0,
-            "36": 745731584.0,
-            "37": 745731584.0,
-            "38": 745731584.0,
-            "39": 745731584.0,
-            "40": 745731584.0,
-            "41": 745731584.0,
-            "42": 745731584.0,
-            "43": 745731584.0,
-            "44": 745731584.0,
-            "45": 745731584.0,
-            "46": 745731584.0,
-            "47": 745731584.0,
-            "48": 745731584.0,
-            "49": 745731584.0,
-            "50": 745731584.0,
-            "51": 745731584.0,
-            "52": 745731584.0,
-            "53": 745731584.0,
-            "54": 745731584.0,
-            "55": 745731584.0,
-            "56": 745731584.0,
-            "57": 745731584.0,
-            "58": 745731584.0,
-            "59": 745731584.0,
-            "60": 745731584.0,
-            "61": 745731584.0,
-            "62": 745731584.0,
-            "63": 745731584.0,
-            "64": 745731584.0,
-            "65": 745731584.0,
-            "66": 745731584.0,
-            "67": 745731584.0,
-            "68": 745731584.0,
-            "69": 745731584.0,
-            "70": 745731584.0,
-            "71": 745731584.0,
-            "72": 745731584.0,
-            "73": 745731584.0,
-            "74": 745731584.0,
-            "75": 745731584.0,
-            "76": 745731584.0,
-            "77": 745731584.0,
-            "78": 745731584.0,
-            "79": 745731584.0,
-            "80": 745731584.0,
-            "81": 745731584.0,
-            "82": 745731584.0,
-            "83": 745731584.0,
-            "84": 745731584.0,
-            "85": 745731584.0,
-            "86": 745731584.0,
-            "87": 745731584.0,
-            "88": 745731584.0,
-            "89": 745731584.0,
-            "90": 745731584.0,
-            "91": 745731584.0,
-            "92": 745731584.0,
-            "93": 745731584.0,
-            "94": 745731584.0,
-            "95": 745731584.0,
-            "96": 745731584.0,
-            "97": 745731584.0,
-            "98": 745731584.0,
-            "99": 745731584.0,
-            "100": 745731584.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1928906752.0,
-            "2": 2210568192.0,
-            "3": 2210568192.0,
-            "4": 2210568192.0,
-            "5": 2210568192.0,
-            "6": 2210568192.0,
-            "7": 2210568192.0,
-            "8": 2210568192.0,
-            "9": 2210568192.0,
-            "10": 2210568192.0,
-            "11": 2210568192.0,
-            "12": 2210568192.0,
-            "13": 2210568192.0,
-            "14": 2210568192.0,
-            "15": 2210568192.0,
-            "16": 2210568192.0,
-            "17": 2210568192.0,
-            "18": 2210568192.0,
-            "19": 2210568192.0,
-            "20": 2210568192.0,
-            "21": 2210568192.0,
-            "22": 2210568192.0,
-            "23": 2210568192.0,
-            "24": 2210568192.0,
-            "25": 2210568192.0,
-            "26": 2210568192.0,
-            "27": 2210568192.0,
-            "28": 2210568192.0,
-            "29": 2210568192.0,
-            "30": 2210568192.0,
-            "31": 2210568192.0,
-            "32": 2210568192.0,
-            "33": 2210568192.0,
-            "34": 2210568192.0,
-            "35": 2210568192.0,
-            "36": 2210568192.0,
-            "37": 2210568192.0,
-            "38": 2210568192.0,
-            "39": 2210568192.0,
-            "40": 2210568192.0,
-            "41": 2210568192.0,
-            "42": 2210568192.0,
-            "43": 2210568192.0,
-            "44": 2210568192.0,
-            "45": 2210568192.0,
-            "46": 2210568192.0,
-            "47": 2210568192.0,
-            "48": 2210568192.0,
-            "49": 2210568192.0,
-            "50": 2210568192.0,
-            "51": 2210568192.0,
-            "52": 2210568192.0,
-            "53": 2210568192.0,
-            "54": 2210568192.0,
-            "55": 2210568192.0,
-            "56": 2210568192.0,
-            "57": 2210568192.0,
-            "58": 2210568192.0,
-            "59": 2210568192.0,
-            "60": 2210568192.0,
-            "61": 2210568192.0,
-            "62": 2210568192.0,
-            "63": 2210568192.0,
-            "64": 2210568192.0,
-            "65": 2210568192.0,
-            "66": 2210568192.0,
-            "67": 2210568192.0,
-            "68": 2210568192.0,
-            "69": 2210568192.0,
-            "70": 2210568192.0,
-            "71": 2210568192.0,
-            "72": 2210568192.0,
-            "73": 2210568192.0,
-            "74": 2210568192.0,
-            "75": 2210568192.0,
-            "76": 2210568192.0,
-            "77": 2210568192.0,
-            "78": 2210568192.0,
-            "79": 2210568192.0,
-            "80": 2210568192.0,
-            "81": 2210568192.0,
-            "82": 2210568192.0,
-            "83": 2210568192.0,
-            "84": 2210568192.0,
-            "85": 2210568192.0,
-            "86": 2210568192.0,
-            "87": 2210568192.0,
-            "88": 2210568192.0,
-            "89": 2210568192.0,
-            "90": 2210568192.0,
-            "91": 2210568192.0,
-            "92": 2210568192.0,
-            "93": 2210568192.0,
-            "94": 2210568192.0,
-            "95": 2210568192.0,
-            "96": 2210568192.0,
-            "97": 2210568192.0,
-            "98": 2210568192.0,
-            "99": 2210568192.0,
-            "100": 2210568192.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 15.33061,
-            "2": 0.15156,
-            "3": 0.12174,
-            "4": 0.12197,
-            "5": 0.12023,
-            "6": 0.11997,
-            "7": 0.11882,
-            "8": 0.11859,
-            "9": 0.11967,
-            "10": 0.11724,
-            "11": 0.11735,
-            "12": 0.11593,
-            "13": 0.11661,
-            "14": 0.11794,
-            "15": 0.11649,
-            "16": 0.11682,
-            "17": 0.11623,
-            "18": 0.11719,
-            "19": 0.11753,
-            "20": 0.11581,
-            "21": 0.11757,
-            "22": 0.11628,
-            "23": 0.11692,
-            "24": 0.1163,
-            "25": 0.1167,
-            "26": 0.11646,
-            "27": 0.11803,
-            "28": 0.11984,
-            "29": 0.11941,
-            "30": 0.11857,
-            "31": 0.11687,
-            "32": 0.11515,
-            "33": 0.11754,
-            "34": 0.11591,
-            "35": 0.11819,
-            "36": 0.11754,
-            "37": 0.11694,
-            "38": 0.11726,
-            "39": 0.11761,
-            "40": 0.11745,
-            "41": 0.11768,
-            "42": 0.11775,
-            "43": 0.11661,
-            "44": 0.11724,
-            "45": 0.1189,
-            "46": 0.11964,
-            "47": 0.11985,
-            "48": 0.12086,
-            "49": 0.11855,
-            "50": 0.11941,
-            "51": 0.13155,
-            "52": 0.12627,
-            "53": 0.12132,
-            "54": 0.12027,
-            "55": 0.12076,
-            "56": 0.14178,
-            "57": 0.12294,
-            "58": 0.12155,
-            "59": 0.11843,
-            "60": 0.11687,
-            "61": 0.11827,
-            "62": 0.11957,
-            "63": 0.11945,
-            "64": 0.11781,
-            "65": 0.12041,
-            "66": 0.11949,
-            "67": 0.12059,
-            "68": 0.11821,
-            "69": 0.11858,
-            "70": 0.11799,
-            "71": 0.12009,
-            "72": 0.12095,
-            "73": 0.11845,
-            "74": 0.11834,
-            "75": 0.11893,
-            "76": 0.1214,
-            "77": 0.1195,
-            "78": 0.11933,
-            "79": 0.11885,
-            "80": 0.11948,
-            "81": 0.12097,
-            "82": 0.12,
-            "83": 0.11954,
-            "84": 0.11693,
-            "85": 0.1175,
-            "86": 0.11941,
-            "87": 0.11723,
-            "88": 0.11941,
-            "89": 0.11804,
-            "90": 0.11751,
-            "91": 0.11952,
-            "92": 0.11778,
-            "93": 0.11924,
-            "94": 0.11755,
-            "95": 0.11789,
-            "96": 0.11673,
-            "97": 0.11967,
-            "98": 0.11752,
-            "99": 0.11926,
-            "100": 0.11806
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_dev_dgx_h100.json
index 7b9a1722673..d3d593b49c2 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_dev_dgx_h100.json
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 14.52368,
-            "2": 0.12904,
-            "3": 0.11517,
-            "4": 0.11756,
-            "5": 0.11573,
-            "6": 0.11676,
-            "7": 0.11475,
-            "8": 0.11625,
-            "9": 0.11519,
-            "10": 0.12088,
-            "11": 0.11883,
-            "12": 0.11908,
-            "13": 0.11781,
-            "14": 0.11708,
-            "15": 0.11808,
-            "16": 0.11499,
-            "17": 0.11904,
-            "18": 0.11758,
-            "19": 0.11836,
-            "20": 0.11696,
-            "21": 0.11517,
-            "22": 0.11537,
-            "23": 0.11509,
-            "24": 0.11668,
-            "25": 0.11421,
-            "26": 0.11535,
-            "27": 0.1148,
-            "28": 0.11573,
-            "29": 0.11684,
-            "30": 0.11652,
-            "31": 0.11749,
-            "32": 0.11508,
-            "33": 0.11651,
-            "34": 0.11541,
-            "35": 0.11609,
-            "36": 0.11722,
-            "37": 0.11735,
-            "38": 0.11849,
-            "39": 0.11931,
-            "40": 0.11381,
-            "41": 0.11418,
-            "42": 0.11682,
-            "43": 0.1172,
-            "44": 0.11595,
-            "45": 0.1149,
-            "46": 0.11591,
-            "47": 0.11441,
-            "48": 0.11991,
-            "49": 0.11482,
-            "50": 0.11551,
-            "51": 0.12066,
-            "52": 0.11485,
-            "53": 0.11554,
-            "54": 0.11513,
-            "55": 0.11749,
-            "56": 0.11612,
-            "57": 0.11313,
-            "58": 0.1131,
-            "59": 0.11488,
-            "60": 0.11602,
-            "61": 0.11343,
-            "62": 0.11313,
-            "63": 0.11487,
-            "64": 0.11581,
-            "65": 0.11438,
-            "66": 0.11344,
-            "67": 0.11567,
-            "68": 0.11465,
-            "69": 0.11374,
-            "70": 0.11452,
-            "71": 0.11431,
-            "72": 0.1157,
-            "73": 0.11626,
-            "74": 0.11498,
-            "75": 0.11329,
-            "76": 0.11264,
-            "77": 0.11291,
-            "78": 0.11343,
-            "79": 0.11536,
-            "80": 0.11515,
-            "81": 0.11726,
-            "82": 0.11537,
-            "83": 0.11363,
-            "84": 0.11591,
-            "85": 0.11747,
-            "86": 0.11816,
-            "87": 0.11504,
-            "88": 0.11547,
-            "89": 0.11463,
-            "90": 0.11598,
-            "91": 0.11209,
-            "92": 0.11386,
-            "93": 0.11296,
-            "94": 0.11351,
-            "95": 0.11409,
-            "96": 0.11256,
-            "97": 0.11707,
-            "98": 0.1149,
-            "99": 0.11577,
-            "100": 0.1143
+            "1": 14.49723,
+            "2": 0.13917,
+            "3": 0.12323,
+            "4": 0.12243,
+            "5": 0.12247,
+            "6": 0.12126,
+            "7": 0.12098,
+            "8": 0.1227,
+            "9": 0.12232,
+            "10": 0.12216,
+            "11": 0.12203,
+            "12": 0.12472,
+            "13": 0.11919,
+            "14": 0.12363,
+            "15": 0.11934,
+            "16": 0.12078,
+            "17": 0.1214,
+            "18": 0.12382,
+            "19": 0.11938,
+            "20": 0.11818,
+            "21": 0.1195,
+            "22": 0.1193,
+            "23": 0.11729,
+            "24": 0.11671,
+            "25": 0.11812,
+            "26": 0.11788,
+            "27": 0.11835,
+            "28": 0.11687,
+            "29": 0.11683,
+            "30": 0.1185,
+            "31": 0.11738,
+            "32": 0.11696,
+            "33": 0.11541,
+            "34": 0.11482,
+            "35": 0.11307,
+            "36": 0.11445,
+            "37": 0.11503,
+            "38": 0.11448,
+            "39": 0.11562,
+            "40": 0.11468,
+            "41": 0.11341,
+            "42": 0.11368,
+            "43": 0.11604,
+            "44": 0.11649,
+            "45": 0.11581,
+            "46": 0.11637,
+            "47": 0.11699,
+            "48": 0.11661,
+            "49": 0.11522,
+            "50": 0.11451,
+            "51": 0.12299,
+            "52": 0.11449,
+            "53": 0.11137,
+            "54": 0.11274,
+            "55": 0.1121,
+            "56": 0.11212,
+            "57": 0.11573,
+            "58": 0.11206,
+            "59": 0.11388,
+            "60": 0.11369,
+            "61": 0.11208,
+            "62": 0.11287,
+            "63": 0.11238,
+            "64": 0.11193,
+            "65": 0.11205,
+            "66": 0.11482,
+            "67": 0.1131,
+            "68": 0.11433,
+            "69": 0.11257,
+            "70": 0.1116,
+            "71": 0.11365,
+            "72": 0.11214,
+            "73": 0.11376,
+            "74": 0.11389,
+            "75": 0.11397,
+            "76": 0.11359,
+            "77": 0.11346,
+            "78": 0.11235,
+            "79": 0.11282,
+            "80": 0.11301,
+            "81": 0.11347,
+            "82": 0.11356,
+            "83": 0.11321,
+            "84": 0.11412,
+            "85": 0.11256,
+            "86": 0.11555,
+            "87": 0.11224,
+            "88": 0.11344,
+            "89": 0.11351,
+            "90": 0.11218,
+            "91": 0.11235,
+            "92": 0.11417,
+            "93": 0.11691,
+            "94": 0.11326,
+            "95": 0.11519,
+            "96": 0.11321,
+            "97": 0.11272,
+            "98": 0.11268,
+            "99": 0.11187,
+            "100": 0.11371
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 6e9f643a273..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.91349,
-            "2": 10.90719,
-            "3": 10.91328,
-            "4": 10.87838,
-            "5": 10.91769,
-            "6": 10.93821,
-            "7": 10.90469,
-            "8": 10.90393,
-            "9": 10.90876,
-            "10": 10.89645,
-            "11": 10.92562,
-            "12": 10.91891,
-            "13": 10.91537,
-            "14": 10.93343,
-            "15": 10.86115,
-            "16": 10.85374,
-            "17": 10.82717,
-            "18": 10.86544,
-            "19": 10.86225,
-            "20": 10.76737,
-            "21": 10.74634,
-            "22": 10.62228,
-            "23": 10.76122,
-            "24": 10.64732,
-            "25": 10.59597,
-            "26": 10.66352,
-            "27": 10.6542,
-            "28": 10.6077,
-            "29": 10.62581,
-            "30": 10.41591,
-            "31": 10.16855,
-            "32": 10.50267,
-            "33": 10.50304,
-            "34": 10.25481,
-            "35": 10.31879,
-            "36": 10.27167,
-            "37": 10.37751,
-            "38": 10.22122,
-            "39": 10.44798,
-            "40": 10.14166,
-            "41": 10.1771,
-            "42": 10.2426,
-            "43": 9.87148,
-            "44": 9.99875,
-            "45": 9.88702,
-            "46": 9.86139,
-            "47": 10.18144,
-            "48": 9.87873,
-            "49": 9.58706,
-            "50": 9.9542,
-            "51": 9.8866,
-            "52": 9.78429,
-            "53": 10.10842,
-            "54": 9.97368,
-            "55": 9.89803,
-            "56": 9.65427,
-            "57": 9.52013,
-            "58": 9.87297,
-            "59": 9.6132,
-            "60": 9.54967,
-            "61": 9.70681,
-            "62": 9.98533,
-            "63": 9.41357,
-            "64": 9.80966,
-            "65": 8.97052,
-            "66": 9.72773,
-            "67": 9.39183,
-            "68": 9.8084,
-            "69": 9.82052,
-            "70": 9.76655,
-            "71": 9.63414,
-            "72": 9.60485,
-            "73": 9.52299,
-            "74": 8.9718,
-            "75": 9.42321,
-            "76": 9.10113,
-            "77": 10.0716,
-            "78": 9.74266,
-            "79": 9.40343,
-            "80": 9.41333,
-            "81": 9.49931,
-            "82": 9.70236,
-            "83": 9.33436,
-            "84": 9.43774,
-            "85": 9.63924,
-            "86": 9.07931,
-            "87": 9.60447,
-            "88": 9.7824,
-            "89": 9.62386,
-            "90": 9.84241,
-            "91": 9.35506,
-            "92": 9.38398,
-            "93": 9.09747,
-            "94": 8.8471,
-            "95": 9.5314,
-            "96": 9.54263,
-            "97": 9.32886,
-            "98": 9.6926,
-            "99": 8.89976,
-            "100": 9.43124
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 22727424.0,
-            "2": 22925204.0,
-            "3": 22596900.0,
-            "4": 23219556.0,
-            "5": 22714624.0,
-            "6": 23021776.0,
-            "7": 22771632.0,
-            "8": 22926560.0,
-            "9": 22842156.0,
-            "10": 22918168.0,
-            "11": 22500688.0,
-            "12": 22459470.0,
-            "13": 22917228.0,
-            "14": 22387988.0,
-            "15": 22821732.0,
-            "16": 22830306.0,
-            "17": 22819520.0,
-            "18": 22582628.0,
-            "19": 22618028.0,
-            "20": 22693852.0,
-            "21": 22739344.0,
-            "22": 22799596.0,
-            "23": 22539016.0,
-            "24": 22770946.0,
-            "25": 22819324.0,
-            "26": 22547928.0,
-            "27": 22468716.0,
-            "28": 22453820.0,
-            "29": 22529898.0,
-            "30": 22631220.0,
-            "31": 22955420.0,
-            "32": 22585276.0,
-            "33": 22558602.0,
-            "34": 22835792.0,
-            "35": 22788208.0,
-            "36": 22589796.0,
-            "37": 22496928.0,
-            "38": 22896192.0,
-            "39": 22801858.0,
-            "40": 22657640.0,
-            "41": 22658982.0,
-            "42": 22667052.0,
-            "43": 22975816.0,
-            "44": 22747688.0,
-            "45": 22674846.0,
-            "46": 22884684.0,
-            "47": 22633708.0,
-            "48": 22928466.0,
-            "49": 22728092.0,
-            "50": 22905080.0,
-            "51": 22791108.0,
-            "52": 22748190.0,
-            "53": 22924900.0,
-            "54": 22840164.0,
-            "55": 22518344.0,
-            "56": 22877680.0,
-            "57": 23113944.0,
-            "58": 22846268.0,
-            "59": 22716084.0,
-            "60": 22742984.0,
-            "61": 22724584.0,
-            "62": 22672944.0,
-            "63": 22846388.0,
-            "64": 22823650.0,
-            "65": 23061058.0,
-            "66": 22729266.0,
-            "67": 22908888.0,
-            "68": 22610020.0,
-            "69": 22583826.0,
-            "70": 22829374.0,
-            "71": 22748240.0,
-            "72": 22654480.0,
-            "73": 22741180.0,
-            "74": 23047914.0,
-            "75": 23054396.0,
-            "76": 22900788.0,
-            "77": 22271588.0,
-            "78": 22789024.0,
-            "79": 22743632.0,
-            "80": 22706696.0,
-            "81": 22891372.0,
-            "82": 22777860.0,
-            "83": 22840532.0,
-            "84": 23010386.0,
-            "85": 22711212.0,
-            "86": 23103006.0,
-            "87": 22734564.0,
-            "88": 22637848.0,
-            "89": 22497850.0,
-            "90": 22972712.0,
-            "91": 22767188.0,
-            "92": 22808834.0,
-            "93": 22659304.0,
-            "94": 22911552.0,
-            "95": 23047794.0,
-            "96": 22829386.0,
-            "97": 22608168.0,
-            "98": 22762756.0,
-            "99": 22905900.0,
-            "100": 23015488.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 746443264.0,
-            "2": 746443264.0,
-            "3": 746443264.0,
-            "4": 746443264.0,
-            "5": 746443264.0,
-            "6": 746443264.0,
-            "7": 746443264.0,
-            "8": 746443264.0,
-            "9": 746443264.0,
-            "10": 746443264.0,
-            "11": 746443264.0,
-            "12": 746443264.0,
-            "13": 746443264.0,
-            "14": 746443264.0,
-            "15": 746443264.0,
-            "16": 746443264.0,
-            "17": 746443264.0,
-            "18": 746443264.0,
-            "19": 746443264.0,
-            "20": 746443264.0,
-            "21": 746443264.0,
-            "22": 746443264.0,
-            "23": 746443264.0,
-            "24": 746443264.0,
-            "25": 746443264.0,
-            "26": 746443264.0,
-            "27": 746443264.0,
-            "28": 746443264.0,
-            "29": 746443264.0,
-            "30": 746443264.0,
-            "31": 746443264.0,
-            "32": 746443264.0,
-            "33": 746443264.0,
-            "34": 746443264.0,
-            "35": 746443264.0,
-            "36": 746443264.0,
-            "37": 746443264.0,
-            "38": 746443264.0,
-            "39": 746443264.0,
-            "40": 746443264.0,
-            "41": 746443264.0,
-            "42": 746443264.0,
-            "43": 746443264.0,
-            "44": 746443264.0,
-            "45": 746443264.0,
-            "46": 746443264.0,
-            "47": 746443264.0,
-            "48": 746443264.0,
-            "49": 746443264.0,
-            "50": 746443264.0,
-            "51": 746443264.0,
-            "52": 746443264.0,
-            "53": 746443264.0,
-            "54": 746443264.0,
-            "55": 746443264.0,
-            "56": 746443264.0,
-            "57": 746443264.0,
-            "58": 746443264.0,
-            "59": 746443264.0,
-            "60": 746443264.0,
-            "61": 746443264.0,
-            "62": 746443264.0,
-            "63": 746443264.0,
-            "64": 746443264.0,
-            "65": 746443264.0,
-            "66": 746443264.0,
-            "67": 746443264.0,
-            "68": 746443264.0,
-            "69": 746443264.0,
-            "70": 746443264.0,
-            "71": 746443264.0,
-            "72": 746443264.0,
-            "73": 746443264.0,
-            "74": 746443264.0,
-            "75": 746443264.0,
-            "76": 746443264.0,
-            "77": 746443264.0,
-            "78": 746443264.0,
-            "79": 746443264.0,
-            "80": 746443264.0,
-            "81": 746443264.0,
-            "82": 746443264.0,
-            "83": 746443264.0,
-            "84": 746443264.0,
-            "85": 746443264.0,
-            "86": 746443264.0,
-            "87": 746443264.0,
-            "88": 746443264.0,
-            "89": 746443264.0,
-            "90": 746443264.0,
-            "91": 746443264.0,
-            "92": 746443264.0,
-            "93": 746443264.0,
-            "94": 746443264.0,
-            "95": 746443264.0,
-            "96": 746443264.0,
-            "97": 746443264.0,
-            "98": 746443264.0,
-            "99": 746443264.0,
-            "100": 746443264.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1926291456.0,
-            "2": 2210100224.0,
-            "3": 2210100224.0,
-            "4": 2210100224.0,
-            "5": 2210100224.0,
-            "6": 2210100224.0,
-            "7": 2210100224.0,
-            "8": 2210100224.0,
-            "9": 2210100224.0,
-            "10": 2210100224.0,
-            "11": 2210100224.0,
-            "12": 2210100224.0,
-            "13": 2210100224.0,
-            "14": 2210100224.0,
-            "15": 2210100224.0,
-            "16": 2210100224.0,
-            "17": 2210100224.0,
-            "18": 2210100224.0,
-            "19": 2210100224.0,
-            "20": 2210100224.0,
-            "21": 2210100224.0,
-            "22": 2210100224.0,
-            "23": 2210100224.0,
-            "24": 2210100224.0,
-            "25": 2210100224.0,
-            "26": 2210100224.0,
-            "27": 2210100224.0,
-            "28": 2210100224.0,
-            "29": 2210100224.0,
-            "30": 2210100224.0,
-            "31": 2210100224.0,
-            "32": 2210100224.0,
-            "33": 2210100224.0,
-            "34": 2210100224.0,
-            "35": 2210100224.0,
-            "36": 2210100224.0,
-            "37": 2210100224.0,
-            "38": 2210100224.0,
-            "39": 2210100224.0,
-            "40": 2210100224.0,
-            "41": 2210100224.0,
-            "42": 2210100224.0,
-            "43": 2210100224.0,
-            "44": 2210100224.0,
-            "45": 2210100224.0,
-            "46": 2210100224.0,
-            "47": 2210100224.0,
-            "48": 2210100224.0,
-            "49": 2210100224.0,
-            "50": 2210100224.0,
-            "51": 2210100224.0,
-            "52": 2210100224.0,
-            "53": 2210100224.0,
-            "54": 2210100224.0,
-            "55": 2210100224.0,
-            "56": 2210100224.0,
-            "57": 2210100224.0,
-            "58": 2210100224.0,
-            "59": 2210100224.0,
-            "60": 2210100224.0,
-            "61": 2210100224.0,
-            "62": 2210100224.0,
-            "63": 2210100224.0,
-            "64": 2210100224.0,
-            "65": 2210100224.0,
-            "66": 2210100224.0,
-            "67": 2210100224.0,
-            "68": 2210100224.0,
-            "69": 2210100224.0,
-            "70": 2210100224.0,
-            "71": 2210100224.0,
-            "72": 2210100224.0,
-            "73": 2210100224.0,
-            "74": 2210100224.0,
-            "75": 2210100224.0,
-            "76": 2210100224.0,
-            "77": 2210100224.0,
-            "78": 2210100224.0,
-            "79": 2210100224.0,
-            "80": 2210100224.0,
-            "81": 2210100224.0,
-            "82": 2210100224.0,
-            "83": 2210100224.0,
-            "84": 2210100224.0,
-            "85": 2210100224.0,
-            "86": 2210100224.0,
-            "87": 2210100224.0,
-            "88": 2210100224.0,
-            "89": 2210100224.0,
-            "90": 2210100224.0,
-            "91": 2210100224.0,
-            "92": 2210100224.0,
-            "93": 2210100224.0,
-            "94": 2210100224.0,
-            "95": 2210100224.0,
-            "96": 2210100224.0,
-            "97": 2210100224.0,
-            "98": 2210100224.0,
-            "99": 2210100224.0,
-            "100": 2210100224.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 12.93568,
-            "2": 0.13825,
-            "3": 0.10934,
-            "4": 0.10452,
-            "5": 0.10497,
-            "6": 0.104,
-            "7": 0.10328,
-            "8": 0.10258,
-            "9": 0.10234,
-            "10": 0.10351,
-            "11": 0.10272,
-            "12": 0.10199,
-            "13": 0.10258,
-            "14": 0.1027,
-            "15": 0.10293,
-            "16": 0.10182,
-            "17": 0.10316,
-            "18": 0.10197,
-            "19": 0.10305,
-            "20": 0.10272,
-            "21": 0.11174,
-            "22": 0.10459,
-            "23": 0.10481,
-            "24": 0.10575,
-            "25": 0.10937,
-            "26": 0.10268,
-            "27": 0.10583,
-            "28": 0.10249,
-            "29": 0.10137,
-            "30": 0.10307,
-            "31": 0.10524,
-            "32": 0.10586,
-            "33": 0.1041,
-            "34": 0.10278,
-            "35": 0.10412,
-            "36": 0.10185,
-            "37": 0.10244,
-            "38": 0.10111,
-            "39": 0.10231,
-            "40": 0.10346,
-            "41": 0.10527,
-            "42": 0.10187,
-            "43": 0.10283,
-            "44": 0.10242,
-            "45": 0.10465,
-            "46": 0.10208,
-            "47": 0.10316,
-            "48": 0.10189,
-            "49": 0.10524,
-            "50": 0.10242,
-            "51": 0.10733,
-            "52": 0.10211,
-            "53": 0.10215,
-            "54": 0.10143,
-            "55": 0.10092,
-            "56": 0.10225,
-            "57": 0.1029,
-            "58": 0.10504,
-            "59": 0.10464,
-            "60": 0.10364,
-            "61": 0.10221,
-            "62": 0.10154,
-            "63": 0.10225,
-            "64": 0.1013,
-            "65": 0.10347,
-            "66": 0.10142,
-            "67": 0.102,
-            "68": 0.10339,
-            "69": 0.10291,
-            "70": 0.10294,
-            "71": 0.10164,
-            "72": 0.1026,
-            "73": 0.10225,
-            "74": 0.10241,
-            "75": 0.10146,
-            "76": 0.10155,
-            "77": 0.10259,
-            "78": 0.10243,
-            "79": 0.10169,
-            "80": 0.10195,
-            "81": 0.10134,
-            "82": 0.10222,
-            "83": 0.10368,
-            "84": 0.10065,
-            "85": 0.10117,
-            "86": 0.10158,
-            "87": 0.10243,
-            "88": 0.10233,
-            "89": 0.10157,
-            "90": 0.10229,
-            "91": 0.10188,
-            "92": 0.10172,
-            "93": 0.1013,
-            "94": 0.1011,
-            "95": 0.10202,
-            "96": 0.10173,
-            "97": 0.10128,
-            "98": 0.10222,
-            "99": 0.10127,
-            "100": 0.10148
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index d3d593b49c2..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.91349,
-            "2": 10.90719,
-            "3": 10.91328,
-            "4": 10.87838,
-            "5": 10.91769,
-            "6": 10.93821,
-            "7": 10.90469,
-            "8": 10.90393,
-            "9": 10.90876,
-            "10": 10.89645,
-            "11": 10.92562,
-            "12": 10.91891,
-            "13": 10.91537,
-            "14": 10.93343,
-            "15": 10.86115,
-            "16": 10.85374,
-            "17": 10.82717,
-            "18": 10.86544,
-            "19": 10.86225,
-            "20": 10.76737,
-            "21": 10.74634,
-            "22": 10.62228,
-            "23": 10.76122,
-            "24": 10.64732,
-            "25": 10.59597,
-            "26": 10.66352,
-            "27": 10.6542,
-            "28": 10.6077,
-            "29": 10.62581,
-            "30": 10.41591,
-            "31": 10.16855,
-            "32": 10.50267,
-            "33": 10.50304,
-            "34": 10.25481,
-            "35": 10.31879,
-            "36": 10.27167,
-            "37": 10.37751,
-            "38": 10.22122,
-            "39": 10.44798,
-            "40": 10.14166,
-            "41": 10.1771,
-            "42": 10.2426,
-            "43": 9.87148,
-            "44": 9.99875,
-            "45": 9.88702,
-            "46": 9.86139,
-            "47": 10.18144,
-            "48": 9.87873,
-            "49": 9.58706,
-            "50": 9.9542,
-            "51": 9.8866,
-            "52": 9.78429,
-            "53": 10.10842,
-            "54": 9.97368,
-            "55": 9.89803,
-            "56": 9.65427,
-            "57": 9.52013,
-            "58": 9.87297,
-            "59": 9.6132,
-            "60": 9.54967,
-            "61": 9.70681,
-            "62": 9.98533,
-            "63": 9.41357,
-            "64": 9.80966,
-            "65": 8.97052,
-            "66": 9.72773,
-            "67": 9.39183,
-            "68": 9.8084,
-            "69": 9.82052,
-            "70": 9.76655,
-            "71": 9.63414,
-            "72": 9.60485,
-            "73": 9.52299,
-            "74": 8.9718,
-            "75": 9.42321,
-            "76": 9.10113,
-            "77": 10.0716,
-            "78": 9.74266,
-            "79": 9.40343,
-            "80": 9.41333,
-            "81": 9.49931,
-            "82": 9.70236,
-            "83": 9.33436,
-            "84": 9.43774,
-            "85": 9.63924,
-            "86": 9.07931,
-            "87": 9.60447,
-            "88": 9.7824,
-            "89": 9.62386,
-            "90": 9.84241,
-            "91": 9.35506,
-            "92": 9.38398,
-            "93": 9.09747,
-            "94": 8.8471,
-            "95": 9.5314,
-            "96": 9.54263,
-            "97": 9.32886,
-            "98": 9.6926,
-            "99": 8.89976,
-            "100": 9.43124
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 22727424.0,
-            "2": 22925204.0,
-            "3": 22596900.0,
-            "4": 23219556.0,
-            "5": 22714624.0,
-            "6": 23021776.0,
-            "7": 22771632.0,
-            "8": 22926560.0,
-            "9": 22842156.0,
-            "10": 22918168.0,
-            "11": 22500688.0,
-            "12": 22459470.0,
-            "13": 22917228.0,
-            "14": 22387988.0,
-            "15": 22821732.0,
-            "16": 22830306.0,
-            "17": 22819520.0,
-            "18": 22582628.0,
-            "19": 22618028.0,
-            "20": 22693852.0,
-            "21": 22739344.0,
-            "22": 22799596.0,
-            "23": 22539016.0,
-            "24": 22770946.0,
-            "25": 22819324.0,
-            "26": 22547928.0,
-            "27": 22468716.0,
-            "28": 22453820.0,
-            "29": 22529898.0,
-            "30": 22631220.0,
-            "31": 22955420.0,
-            "32": 22585276.0,
-            "33": 22558602.0,
-            "34": 22835792.0,
-            "35": 22788208.0,
-            "36": 22589796.0,
-            "37": 22496928.0,
-            "38": 22896192.0,
-            "39": 22801858.0,
-            "40": 22657640.0,
-            "41": 22658982.0,
-            "42": 22667052.0,
-            "43": 22975816.0,
-            "44": 22747688.0,
-            "45": 22674846.0,
-            "46": 22884684.0,
-            "47": 22633708.0,
-            "48": 22928466.0,
-            "49": 22728092.0,
-            "50": 22905080.0,
-            "51": 22791108.0,
-            "52": 22748190.0,
-            "53": 22924900.0,
-            "54": 22840164.0,
-            "55": 22518344.0,
-            "56": 22877680.0,
-            "57": 23113944.0,
-            "58": 22846268.0,
-            "59": 22716084.0,
-            "60": 22742984.0,
-            "61": 22724584.0,
-            "62": 22672944.0,
-            "63": 22846388.0,
-            "64": 22823650.0,
-            "65": 23061058.0,
-            "66": 22729266.0,
-            "67": 22908888.0,
-            "68": 22610020.0,
-            "69": 22583826.0,
-            "70": 22829374.0,
-            "71": 22748240.0,
-            "72": 22654480.0,
-            "73": 22741180.0,
-            "74": 23047914.0,
-            "75": 23054396.0,
-            "76": 22900788.0,
-            "77": 22271588.0,
-            "78": 22789024.0,
-            "79": 22743632.0,
-            "80": 22706696.0,
-            "81": 22891372.0,
-            "82": 22777860.0,
-            "83": 22840532.0,
-            "84": 23010386.0,
-            "85": 22711212.0,
-            "86": 23103006.0,
-            "87": 22734564.0,
-            "88": 22637848.0,
-            "89": 22497850.0,
-            "90": 22972712.0,
-            "91": 22767188.0,
-            "92": 22808834.0,
-            "93": 22659304.0,
-            "94": 22911552.0,
-            "95": 23047794.0,
-            "96": 22829386.0,
-            "97": 22608168.0,
-            "98": 22762756.0,
-            "99": 22905900.0,
-            "100": 23015488.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 746443264.0,
-            "2": 746443264.0,
-            "3": 746443264.0,
-            "4": 746443264.0,
-            "5": 746443264.0,
-            "6": 746443264.0,
-            "7": 746443264.0,
-            "8": 746443264.0,
-            "9": 746443264.0,
-            "10": 746443264.0,
-            "11": 746443264.0,
-            "12": 746443264.0,
-            "13": 746443264.0,
-            "14": 746443264.0,
-            "15": 746443264.0,
-            "16": 746443264.0,
-            "17": 746443264.0,
-            "18": 746443264.0,
-            "19": 746443264.0,
-            "20": 746443264.0,
-            "21": 746443264.0,
-            "22": 746443264.0,
-            "23": 746443264.0,
-            "24": 746443264.0,
-            "25": 746443264.0,
-            "26": 746443264.0,
-            "27": 746443264.0,
-            "28": 746443264.0,
-            "29": 746443264.0,
-            "30": 746443264.0,
-            "31": 746443264.0,
-            "32": 746443264.0,
-            "33": 746443264.0,
-            "34": 746443264.0,
-            "35": 746443264.0,
-            "36": 746443264.0,
-            "37": 746443264.0,
-            "38": 746443264.0,
-            "39": 746443264.0,
-            "40": 746443264.0,
-            "41": 746443264.0,
-            "42": 746443264.0,
-            "43": 746443264.0,
-            "44": 746443264.0,
-            "45": 746443264.0,
-            "46": 746443264.0,
-            "47": 746443264.0,
-            "48": 746443264.0,
-            "49": 746443264.0,
-            "50": 746443264.0,
-            "51": 746443264.0,
-            "52": 746443264.0,
-            "53": 746443264.0,
-            "54": 746443264.0,
-            "55": 746443264.0,
-            "56": 746443264.0,
-            "57": 746443264.0,
-            "58": 746443264.0,
-            "59": 746443264.0,
-            "60": 746443264.0,
-            "61": 746443264.0,
-            "62": 746443264.0,
-            "63": 746443264.0,
-            "64": 746443264.0,
-            "65": 746443264.0,
-            "66": 746443264.0,
-            "67": 746443264.0,
-            "68": 746443264.0,
-            "69": 746443264.0,
-            "70": 746443264.0,
-            "71": 746443264.0,
-            "72": 746443264.0,
-            "73": 746443264.0,
-            "74": 746443264.0,
-            "75": 746443264.0,
-            "76": 746443264.0,
-            "77": 746443264.0,
-            "78": 746443264.0,
-            "79": 746443264.0,
-            "80": 746443264.0,
-            "81": 746443264.0,
-            "82": 746443264.0,
-            "83": 746443264.0,
-            "84": 746443264.0,
-            "85": 746443264.0,
-            "86": 746443264.0,
-            "87": 746443264.0,
-            "88": 746443264.0,
-            "89": 746443264.0,
-            "90": 746443264.0,
-            "91": 746443264.0,
-            "92": 746443264.0,
-            "93": 746443264.0,
-            "94": 746443264.0,
-            "95": 746443264.0,
-            "96": 746443264.0,
-            "97": 746443264.0,
-            "98": 746443264.0,
-            "99": 746443264.0,
-            "100": 746443264.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1926291456.0,
-            "2": 2210100224.0,
-            "3": 2210100224.0,
-            "4": 2210100224.0,
-            "5": 2210100224.0,
-            "6": 2210100224.0,
-            "7": 2210100224.0,
-            "8": 2210100224.0,
-            "9": 2210100224.0,
-            "10": 2210100224.0,
-            "11": 2210100224.0,
-            "12": 2210100224.0,
-            "13": 2210100224.0,
-            "14": 2210100224.0,
-            "15": 2210100224.0,
-            "16": 2210100224.0,
-            "17": 2210100224.0,
-            "18": 2210100224.0,
-            "19": 2210100224.0,
-            "20": 2210100224.0,
-            "21": 2210100224.0,
-            "22": 2210100224.0,
-            "23": 2210100224.0,
-            "24": 2210100224.0,
-            "25": 2210100224.0,
-            "26": 2210100224.0,
-            "27": 2210100224.0,
-            "28": 2210100224.0,
-            "29": 2210100224.0,
-            "30": 2210100224.0,
-            "31": 2210100224.0,
-            "32": 2210100224.0,
-            "33": 2210100224.0,
-            "34": 2210100224.0,
-            "35": 2210100224.0,
-            "36": 2210100224.0,
-            "37": 2210100224.0,
-            "38": 2210100224.0,
-            "39": 2210100224.0,
-            "40": 2210100224.0,
-            "41": 2210100224.0,
-            "42": 2210100224.0,
-            "43": 2210100224.0,
-            "44": 2210100224.0,
-            "45": 2210100224.0,
-            "46": 2210100224.0,
-            "47": 2210100224.0,
-            "48": 2210100224.0,
-            "49": 2210100224.0,
-            "50": 2210100224.0,
-            "51": 2210100224.0,
-            "52": 2210100224.0,
-            "53": 2210100224.0,
-            "54": 2210100224.0,
-            "55": 2210100224.0,
-            "56": 2210100224.0,
-            "57": 2210100224.0,
-            "58": 2210100224.0,
-            "59": 2210100224.0,
-            "60": 2210100224.0,
-            "61": 2210100224.0,
-            "62": 2210100224.0,
-            "63": 2210100224.0,
-            "64": 2210100224.0,
-            "65": 2210100224.0,
-            "66": 2210100224.0,
-            "67": 2210100224.0,
-            "68": 2210100224.0,
-            "69": 2210100224.0,
-            "70": 2210100224.0,
-            "71": 2210100224.0,
-            "72": 2210100224.0,
-            "73": 2210100224.0,
-            "74": 2210100224.0,
-            "75": 2210100224.0,
-            "76": 2210100224.0,
-            "77": 2210100224.0,
-            "78": 2210100224.0,
-            "79": 2210100224.0,
-            "80": 2210100224.0,
-            "81": 2210100224.0,
-            "82": 2210100224.0,
-            "83": 2210100224.0,
-            "84": 2210100224.0,
-            "85": 2210100224.0,
-            "86": 2210100224.0,
-            "87": 2210100224.0,
-            "88": 2210100224.0,
-            "89": 2210100224.0,
-            "90": 2210100224.0,
-            "91": 2210100224.0,
-            "92": 2210100224.0,
-            "93": 2210100224.0,
-            "94": 2210100224.0,
-            "95": 2210100224.0,
-            "96": 2210100224.0,
-            "97": 2210100224.0,
-            "98": 2210100224.0,
-            "99": 2210100224.0,
-            "100": 2210100224.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 14.49723,
-            "2": 0.13917,
-            "3": 0.12323,
-            "4": 0.12243,
-            "5": 0.12247,
-            "6": 0.12126,
-            "7": 0.12098,
-            "8": 0.1227,
-            "9": 0.12232,
-            "10": 0.12216,
-            "11": 0.12203,
-            "12": 0.12472,
-            "13": 0.11919,
-            "14": 0.12363,
-            "15": 0.11934,
-            "16": 0.12078,
-            "17": 0.1214,
-            "18": 0.12382,
-            "19": 0.11938,
-            "20": 0.11818,
-            "21": 0.1195,
-            "22": 0.1193,
-            "23": 0.11729,
-            "24": 0.11671,
-            "25": 0.11812,
-            "26": 0.11788,
-            "27": 0.11835,
-            "28": 0.11687,
-            "29": 0.11683,
-            "30": 0.1185,
-            "31": 0.11738,
-            "32": 0.11696,
-            "33": 0.11541,
-            "34": 0.11482,
-            "35": 0.11307,
-            "36": 0.11445,
-            "37": 0.11503,
-            "38": 0.11448,
-            "39": 0.11562,
-            "40": 0.11468,
-            "41": 0.11341,
-            "42": 0.11368,
-            "43": 0.11604,
-            "44": 0.11649,
-            "45": 0.11581,
-            "46": 0.11637,
-            "47": 0.11699,
-            "48": 0.11661,
-            "49": 0.11522,
-            "50": 0.11451,
-            "51": 0.12299,
-            "52": 0.11449,
-            "53": 0.11137,
-            "54": 0.11274,
-            "55": 0.1121,
-            "56": 0.11212,
-            "57": 0.11573,
-            "58": 0.11206,
-            "59": 0.11388,
-            "60": 0.11369,
-            "61": 0.11208,
-            "62": 0.11287,
-            "63": 0.11238,
-            "64": 0.11193,
-            "65": 0.11205,
-            "66": 0.11482,
-            "67": 0.1131,
-            "68": 0.11433,
-            "69": 0.11257,
-            "70": 0.1116,
-            "71": 0.11365,
-            "72": 0.11214,
-            "73": 0.11376,
-            "74": 0.11389,
-            "75": 0.11397,
-            "76": 0.11359,
-            "77": 0.11346,
-            "78": 0.11235,
-            "79": 0.11282,
-            "80": 0.11301,
-            "81": 0.11347,
-            "82": 0.11356,
-            "83": 0.11321,
-            "84": 0.11412,
-            "85": 0.11256,
-            "86": 0.11555,
-            "87": 0.11224,
-            "88": 0.11344,
-            "89": 0.11351,
-            "90": 0.11218,
-            "91": 0.11235,
-            "92": 0.11417,
-            "93": 0.11691,
-            "94": 0.11326,
-            "95": 0.11519,
-            "96": 0.11321,
-            "97": 0.11272,
-            "98": 0.11268,
-            "99": 0.11187,
-            "100": 0.11371
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_dev_dgx_h100.json
index 9ec4370d823..b5d55ac433c 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_dev_dgx_h100.json
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 12.73497,
-            "2": 0.13463,
-            "3": 0.12132,
-            "4": 0.12121,
-            "5": 0.12122,
-            "6": 0.11968,
-            "7": 0.12077,
-            "8": 0.12029,
-            "9": 0.12102,
-            "10": 0.12242,
-            "11": 0.12132,
-            "12": 0.11963,
-            "13": 0.11976,
-            "14": 0.12077,
-            "15": 0.12284,
-            "16": 0.12192,
-            "17": 0.12079,
-            "18": 0.12083,
-            "19": 0.12289,
-            "20": 0.12192,
-            "21": 0.12178,
-            "22": 0.1217,
-            "23": 0.1195,
-            "24": 0.12278,
-            "25": 0.12076,
-            "26": 0.11902,
-            "27": 0.12039,
-            "28": 0.12124,
-            "29": 0.12162,
-            "30": 0.12043,
-            "31": 0.12129,
-            "32": 0.11876,
-            "33": 0.12087,
-            "34": 0.12139,
-            "35": 0.11913,
-            "36": 0.12007,
-            "37": 0.11949,
-            "38": 0.12009,
-            "39": 0.12132,
-            "40": 0.1201,
-            "41": 0.12285,
-            "42": 0.12083,
-            "43": 0.12338,
-            "44": 0.12174,
-            "45": 0.12023,
-            "46": 0.11927,
-            "47": 0.11992,
-            "48": 0.12123,
-            "49": 0.12216,
-            "50": 0.11881
+            "1": 12.80183,
+            "2": 0.14507,
+            "3": 0.13423,
+            "4": 0.12539,
+            "5": 0.12233,
+            "6": 0.12325,
+            "7": 0.12437,
+            "8": 0.12453,
+            "9": 0.12348,
+            "10": 0.12305,
+            "11": 0.12491,
+            "12": 0.12346,
+            "13": 0.1234,
+            "14": 0.12145,
+            "15": 0.12227,
+            "16": 0.12254,
+            "17": 0.12422,
+            "18": 0.12237,
+            "19": 0.12342,
+            "20": 0.1219,
+            "21": 0.1212,
+            "22": 0.12243,
+            "23": 0.11962,
+            "24": 0.1224,
+            "25": 0.12155,
+            "26": 0.12253,
+            "27": 0.12095,
+            "28": 0.12035,
+            "29": 0.12115,
+            "30": 0.11898,
+            "31": 0.12063,
+            "32": 0.1189,
+            "33": 0.12106,
+            "34": 0.11766,
+            "35": 0.11962,
+            "36": 0.12112,
+            "37": 0.11847,
+            "38": 0.11727,
+            "39": 0.11905,
+            "40": 0.11887,
+            "41": 0.11948,
+            "42": 0.11832,
+            "43": 0.11858,
+            "44": 0.1186,
+            "45": 0.12057,
+            "46": 0.1186,
+            "47": 0.12097,
+            "48": 0.11934,
+            "49": 0.11972,
+            "50": 0.12006
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 796e07451cc..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.84523,
-            "2": 10.85412,
-            "3": 10.85365,
-            "4": 10.83867,
-            "5": 10.87428,
-            "6": 10.89334,
-            "7": 10.8541,
-            "8": 10.86232,
-            "9": 10.86355,
-            "10": 10.82858,
-            "11": 10.88772,
-            "12": 10.87148,
-            "13": 10.87939,
-            "14": 10.89122,
-            "15": 10.81926,
-            "16": 10.83064,
-            "17": 10.79873,
-            "18": 10.81769,
-            "19": 10.8196,
-            "20": 10.72749,
-            "21": 10.70555,
-            "22": 10.56395,
-            "23": 10.7282,
-            "24": 10.60841,
-            "25": 10.55195,
-            "26": 10.60869,
-            "27": 10.62878,
-            "28": 10.5827,
-            "29": 10.59984,
-            "30": 10.36504,
-            "31": 10.12095,
-            "32": 10.47626,
-            "33": 10.46908,
-            "34": 10.22325,
-            "35": 10.27845,
-            "36": 10.22879,
-            "37": 10.35946,
-            "38": 10.19333,
-            "39": 10.41585,
-            "40": 10.09773,
-            "41": 10.15714,
-            "42": 10.22441,
-            "43": 9.8328,
-            "44": 9.96934,
-            "45": 9.84203,
-            "46": 9.83023,
-            "47": 10.15603,
-            "48": 9.85506,
-            "49": 9.54051,
-            "50": 9.91254
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1725.0,
-            "2": 1664.0,
-            "3": 1710.0,
-            "4": 1712.0,
-            "5": 1834.0,
-            "6": 1743.0,
-            "7": 1803.0,
-            "8": 1744.0,
-            "9": 1770.0,
-            "10": 1478.0,
-            "11": 1879.0,
-            "12": 1696.0,
-            "13": 1952.0,
-            "14": 1732.0,
-            "15": 1891.0,
-            "16": 1872.0,
-            "17": 1737.0,
-            "18": 1744.0,
-            "19": 1843.0,
-            "20": 1639.0,
-            "21": 1817.0,
-            "22": 1615.0,
-            "23": 1960.0,
-            "24": 1646.0,
-            "25": 1623.0,
-            "26": 1671.0,
-            "27": 1841.0,
-            "28": 2009.0,
-            "29": 1956.0,
-            "30": 1882.0,
-            "31": 1597.0,
-            "32": 1921.0,
-            "33": 2114.0,
-            "34": 1828.0,
-            "35": 2043.0,
-            "36": 1947.0,
-            "37": 2338.0,
-            "38": 2227.0,
-            "39": 2346.0,
-            "40": 2168.0,
-            "41": 2204.0,
-            "42": 2247.0,
-            "43": 2078.0,
-            "44": 2064.0,
-            "45": 2159.0,
-            "46": 2489.0,
-            "47": 2497.0,
-            "48": 2305.0,
-            "49": 2272.0,
-            "50": 2319.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 759681536.0,
-            "2": 759681536.0,
-            "3": 759681536.0,
-            "4": 759681536.0,
-            "5": 759681536.0,
-            "6": 759681536.0,
-            "7": 759681536.0,
-            "8": 759681536.0,
-            "9": 759681536.0,
-            "10": 759681536.0,
-            "11": 759681536.0,
-            "12": 759681536.0,
-            "13": 759681536.0,
-            "14": 759681536.0,
-            "15": 759681536.0,
-            "16": 759681536.0,
-            "17": 759681536.0,
-            "18": 759681536.0,
-            "19": 759681536.0,
-            "20": 759681536.0,
-            "21": 759681536.0,
-            "22": 759681536.0,
-            "23": 759681536.0,
-            "24": 759681536.0,
-            "25": 759681536.0,
-            "26": 759681536.0,
-            "27": 759681536.0,
-            "28": 759681536.0,
-            "29": 759681536.0,
-            "30": 759681536.0,
-            "31": 759681536.0,
-            "32": 759681536.0,
-            "33": 759681536.0,
-            "34": 759681536.0,
-            "35": 759681536.0,
-            "36": 759681536.0,
-            "37": 759681536.0,
-            "38": 759681536.0,
-            "39": 759681536.0,
-            "40": 759681536.0,
-            "41": 759681536.0,
-            "42": 759681536.0,
-            "43": 759681536.0,
-            "44": 759681536.0,
-            "45": 759681536.0,
-            "46": 759681536.0,
-            "47": 759681536.0,
-            "48": 759681536.0,
-            "49": 759681536.0,
-            "50": 759681536.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 3866813952.0,
-            "2": 4148525568.0,
-            "3": 4148525568.0,
-            "4": 4148525568.0,
-            "5": 4148525568.0,
-            "6": 4148525568.0,
-            "7": 4148525568.0,
-            "8": 4148525568.0,
-            "9": 4148525568.0,
-            "10": 4148525568.0,
-            "11": 4148525568.0,
-            "12": 4148525568.0,
-            "13": 4148525568.0,
-            "14": 4148525568.0,
-            "15": 4148525568.0,
-            "16": 4148525568.0,
-            "17": 4148525568.0,
-            "18": 4148525568.0,
-            "19": 4148525568.0,
-            "20": 4148525568.0,
-            "21": 4148525568.0,
-            "22": 4148525568.0,
-            "23": 4148525568.0,
-            "24": 4148525568.0,
-            "25": 4148525568.0,
-            "26": 4148525568.0,
-            "27": 4148525568.0,
-            "28": 4148525568.0,
-            "29": 4148525568.0,
-            "30": 4148525568.0,
-            "31": 4148525568.0,
-            "32": 4148525568.0,
-            "33": 4148525568.0,
-            "34": 4148525568.0,
-            "35": 4148525568.0,
-            "36": 4148525568.0,
-            "37": 4148525568.0,
-            "38": 4148525568.0,
-            "39": 4148525568.0,
-            "40": 4148525568.0,
-            "41": 4148525568.0,
-            "42": 4148525568.0,
-            "43": 4148525568.0,
-            "44": 4148525568.0,
-            "45": 4148525568.0,
-            "46": 4148525568.0,
-            "47": 4148525568.0,
-            "48": 4148525568.0,
-            "49": 4148525568.0,
-            "50": 4148525568.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 13.82235,
-            "2": 0.15582,
-            "3": 0.10905,
-            "4": 0.1073,
-            "5": 0.109,
-            "6": 0.10732,
-            "7": 0.10878,
-            "8": 0.11223,
-            "9": 0.10518,
-            "10": 0.10855,
-            "11": 0.11135,
-            "12": 0.10511,
-            "13": 0.1065,
-            "14": 0.10507,
-            "15": 0.10485,
-            "16": 0.10494,
-            "17": 0.10498,
-            "18": 0.10434,
-            "19": 0.10497,
-            "20": 0.10409,
-            "21": 0.10596,
-            "22": 0.10798,
-            "23": 0.10596,
-            "24": 0.10493,
-            "25": 0.10426,
-            "26": 0.10473,
-            "27": 0.10393,
-            "28": 0.10415,
-            "29": 0.10372,
-            "30": 0.10375,
-            "31": 0.10526,
-            "32": 0.10354,
-            "33": 0.10378,
-            "34": 0.10407,
-            "35": 0.10415,
-            "36": 0.10637,
-            "37": 0.10889,
-            "38": 0.10823,
-            "39": 0.10551,
-            "40": 0.10613,
-            "41": 0.10424,
-            "42": 0.10385,
-            "43": 0.10519,
-            "44": 0.1044,
-            "45": 0.10488,
-            "46": 0.10678,
-            "47": 0.10342,
-            "48": 0.10517,
-            "49": 0.10469,
-            "50": 0.10438
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index b5d55ac433c..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.84523,
-            "2": 10.85412,
-            "3": 10.85365,
-            "4": 10.83867,
-            "5": 10.87428,
-            "6": 10.89334,
-            "7": 10.8541,
-            "8": 10.86232,
-            "9": 10.86355,
-            "10": 10.82858,
-            "11": 10.88772,
-            "12": 10.87148,
-            "13": 10.87939,
-            "14": 10.89122,
-            "15": 10.81926,
-            "16": 10.83064,
-            "17": 10.79873,
-            "18": 10.81769,
-            "19": 10.8196,
-            "20": 10.72749,
-            "21": 10.70555,
-            "22": 10.56395,
-            "23": 10.7282,
-            "24": 10.60841,
-            "25": 10.55195,
-            "26": 10.60869,
-            "27": 10.62878,
-            "28": 10.5827,
-            "29": 10.59984,
-            "30": 10.36504,
-            "31": 10.12095,
-            "32": 10.47626,
-            "33": 10.46908,
-            "34": 10.22325,
-            "35": 10.27845,
-            "36": 10.22879,
-            "37": 10.35946,
-            "38": 10.19333,
-            "39": 10.41585,
-            "40": 10.09773,
-            "41": 10.15714,
-            "42": 10.22441,
-            "43": 9.8328,
-            "44": 9.96934,
-            "45": 9.84203,
-            "46": 9.83023,
-            "47": 10.15603,
-            "48": 9.85506,
-            "49": 9.54051,
-            "50": 9.91254
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1725.0,
-            "2": 1664.0,
-            "3": 1710.0,
-            "4": 1712.0,
-            "5": 1834.0,
-            "6": 1743.0,
-            "7": 1803.0,
-            "8": 1744.0,
-            "9": 1770.0,
-            "10": 1478.0,
-            "11": 1879.0,
-            "12": 1696.0,
-            "13": 1952.0,
-            "14": 1732.0,
-            "15": 1891.0,
-            "16": 1872.0,
-            "17": 1737.0,
-            "18": 1744.0,
-            "19": 1843.0,
-            "20": 1639.0,
-            "21": 1817.0,
-            "22": 1615.0,
-            "23": 1960.0,
-            "24": 1646.0,
-            "25": 1623.0,
-            "26": 1671.0,
-            "27": 1841.0,
-            "28": 2009.0,
-            "29": 1956.0,
-            "30": 1882.0,
-            "31": 1597.0,
-            "32": 1921.0,
-            "33": 2114.0,
-            "34": 1828.0,
-            "35": 2043.0,
-            "36": 1947.0,
-            "37": 2338.0,
-            "38": 2227.0,
-            "39": 2346.0,
-            "40": 2168.0,
-            "41": 2204.0,
-            "42": 2247.0,
-            "43": 2078.0,
-            "44": 2064.0,
-            "45": 2159.0,
-            "46": 2489.0,
-            "47": 2497.0,
-            "48": 2305.0,
-            "49": 2272.0,
-            "50": 2319.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 759681536.0,
-            "2": 759681536.0,
-            "3": 759681536.0,
-            "4": 759681536.0,
-            "5": 759681536.0,
-            "6": 759681536.0,
-            "7": 759681536.0,
-            "8": 759681536.0,
-            "9": 759681536.0,
-            "10": 759681536.0,
-            "11": 759681536.0,
-            "12": 759681536.0,
-            "13": 759681536.0,
-            "14": 759681536.0,
-            "15": 759681536.0,
-            "16": 759681536.0,
-            "17": 759681536.0,
-            "18": 759681536.0,
-            "19": 759681536.0,
-            "20": 759681536.0,
-            "21": 759681536.0,
-            "22": 759681536.0,
-            "23": 759681536.0,
-            "24": 759681536.0,
-            "25": 759681536.0,
-            "26": 759681536.0,
-            "27": 759681536.0,
-            "28": 759681536.0,
-            "29": 759681536.0,
-            "30": 759681536.0,
-            "31": 759681536.0,
-            "32": 759681536.0,
-            "33": 759681536.0,
-            "34": 759681536.0,
-            "35": 759681536.0,
-            "36": 759681536.0,
-            "37": 759681536.0,
-            "38": 759681536.0,
-            "39": 759681536.0,
-            "40": 759681536.0,
-            "41": 759681536.0,
-            "42": 759681536.0,
-            "43": 759681536.0,
-            "44": 759681536.0,
-            "45": 759681536.0,
-            "46": 759681536.0,
-            "47": 759681536.0,
-            "48": 759681536.0,
-            "49": 759681536.0,
-            "50": 759681536.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 3866813952.0,
-            "2": 4148525568.0,
-            "3": 4148525568.0,
-            "4": 4148525568.0,
-            "5": 4148525568.0,
-            "6": 4148525568.0,
-            "7": 4148525568.0,
-            "8": 4148525568.0,
-            "9": 4148525568.0,
-            "10": 4148525568.0,
-            "11": 4148525568.0,
-            "12": 4148525568.0,
-            "13": 4148525568.0,
-            "14": 4148525568.0,
-            "15": 4148525568.0,
-            "16": 4148525568.0,
-            "17": 4148525568.0,
-            "18": 4148525568.0,
-            "19": 4148525568.0,
-            "20": 4148525568.0,
-            "21": 4148525568.0,
-            "22": 4148525568.0,
-            "23": 4148525568.0,
-            "24": 4148525568.0,
-            "25": 4148525568.0,
-            "26": 4148525568.0,
-            "27": 4148525568.0,
-            "28": 4148525568.0,
-            "29": 4148525568.0,
-            "30": 4148525568.0,
-            "31": 4148525568.0,
-            "32": 4148525568.0,
-            "33": 4148525568.0,
-            "34": 4148525568.0,
-            "35": 4148525568.0,
-            "36": 4148525568.0,
-            "37": 4148525568.0,
-            "38": 4148525568.0,
-            "39": 4148525568.0,
-            "40": 4148525568.0,
-            "41": 4148525568.0,
-            "42": 4148525568.0,
-            "43": 4148525568.0,
-            "44": 4148525568.0,
-            "45": 4148525568.0,
-            "46": 4148525568.0,
-            "47": 4148525568.0,
-            "48": 4148525568.0,
-            "49": 4148525568.0,
-            "50": 4148525568.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 12.80183,
-            "2": 0.14507,
-            "3": 0.13423,
-            "4": 0.12539,
-            "5": 0.12233,
-            "6": 0.12325,
-            "7": 0.12437,
-            "8": 0.12453,
-            "9": 0.12348,
-            "10": 0.12305,
-            "11": 0.12491,
-            "12": 0.12346,
-            "13": 0.1234,
-            "14": 0.12145,
-            "15": 0.12227,
-            "16": 0.12254,
-            "17": 0.12422,
-            "18": 0.12237,
-            "19": 0.12342,
-            "20": 0.1219,
-            "21": 0.1212,
-            "22": 0.12243,
-            "23": 0.11962,
-            "24": 0.1224,
-            "25": 0.12155,
-            "26": 0.12253,
-            "27": 0.12095,
-            "28": 0.12035,
-            "29": 0.12115,
-            "30": 0.11898,
-            "31": 0.12063,
-            "32": 0.1189,
-            "33": 0.12106,
-            "34": 0.11766,
-            "35": 0.11962,
-            "36": 0.12112,
-            "37": 0.11847,
-            "38": 0.11727,
-            "39": 0.11905,
-            "40": 0.11887,
-            "41": 0.11948,
-            "42": 0.11832,
-            "43": 0.11858,
-            "44": 0.1186,
-            "45": 0.12057,
-            "46": 0.1186,
-            "47": 0.12097,
-            "48": 0.11934,
-            "49": 0.11972,
-            "50": 0.12006
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_dev_dgx_h100.json
index f88bc4dbaad..5e069163f6c 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_dev_dgx_h100.json
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 13.77378,
-            "2": 0.15884,
-            "3": 0.14867,
-            "4": 0.12729,
-            "5": 0.12441,
-            "6": 0.12501,
-            "7": 0.12396,
-            "8": 0.12217,
-            "9": 0.12636,
-            "10": 0.12685,
-            "11": 0.28489,
-            "12": 0.1228,
-            "13": 0.12284,
-            "14": 0.12293,
-            "15": 0.12456,
-            "16": 0.12522,
-            "17": 0.12575,
-            "18": 0.12506,
-            "19": 0.12636,
-            "20": 0.12549,
-            "21": 0.28282,
-            "22": 0.12596,
-            "23": 0.12451,
-            "24": 0.12852,
-            "25": 0.12585,
-            "26": 0.1249,
-            "27": 0.12809,
-            "28": 0.12564,
-            "29": 0.12685,
-            "30": 0.12691,
-            "31": 0.29536,
-            "32": 0.12574,
-            "33": 0.12648,
-            "34": 0.12772,
-            "35": 0.12732,
-            "36": 0.12522,
-            "37": 0.12739,
-            "38": 0.12791,
-            "39": 0.12659,
-            "40": 0.12766,
-            "41": 0.28835,
-            "42": 0.12796,
-            "43": 0.12957,
-            "44": 0.12516,
-            "45": 0.12485,
-            "46": 0.12641,
-            "47": 0.12384,
-            "48": 0.12562,
-            "49": 0.12302,
-            "50": 0.12604
+            "1": 15.65845,
+            "2": 0.14332,
+            "3": 0.12833,
+            "4": 0.12525,
+            "5": 0.12451,
+            "6": 0.12488,
+            "7": 0.12455,
+            "8": 0.12623,
+            "9": 0.1249,
+            "10": 0.127,
+            "11": 0.29256,
+            "12": 0.12446,
+            "13": 0.12388,
+            "14": 0.12448,
+            "15": 0.12475,
+            "16": 0.12507,
+            "17": 0.12682,
+            "18": 0.12473,
+            "19": 0.12569,
+            "20": 0.12441,
+            "21": 0.28384,
+            "22": 0.12554,
+            "23": 0.12552,
+            "24": 0.12663,
+            "25": 0.12441,
+            "26": 0.12547,
+            "27": 0.12485,
+            "28": 0.12492,
+            "29": 0.12419,
+            "30": 0.12518,
+            "31": 0.28416,
+            "32": 0.12399,
+            "33": 0.12692,
+            "34": 0.12606,
+            "35": 0.12537,
+            "36": 0.12614,
+            "37": 0.12484,
+            "38": 0.12464,
+            "39": 0.12396,
+            "40": 0.1239,
+            "41": 0.28831,
+            "42": 0.12609,
+            "43": 0.12537,
+            "44": 0.12484,
+            "45": 0.12567,
+            "46": 0.12791,
+            "47": 0.12281,
+            "48": 0.124,
+            "49": 0.12486,
+            "50": 0.12585
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 24a2e339e46..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.84523,
-            "2": 10.85412,
-            "3": 10.85365,
-            "4": 10.83867,
-            "5": 10.87428,
-            "6": 10.89334,
-            "7": 10.8541,
-            "8": 10.86235,
-            "9": 10.86352,
-            "10": 10.82859,
-            "11": 10.88772,
-            "12": 10.87148,
-            "13": 10.87938,
-            "14": 10.89123,
-            "15": 10.81927,
-            "16": 10.83063,
-            "17": 10.79878,
-            "18": 10.81771,
-            "19": 10.81957,
-            "20": 10.72749,
-            "21": 10.70552,
-            "22": 10.56396,
-            "23": 10.72823,
-            "24": 10.60839,
-            "25": 10.55198,
-            "26": 10.60868,
-            "27": 10.62879,
-            "28": 10.58271,
-            "29": 10.59982,
-            "30": 10.36511,
-            "31": 10.12096,
-            "32": 10.47628,
-            "33": 10.46906,
-            "34": 10.22326,
-            "35": 10.27848,
-            "36": 10.22883,
-            "37": 10.35947,
-            "38": 10.19331,
-            "39": 10.41586,
-            "40": 10.09773,
-            "41": 10.15718,
-            "42": 10.22441,
-            "43": 9.83281,
-            "44": 9.96935,
-            "45": 9.84205,
-            "46": 9.83017,
-            "47": 10.15602,
-            "48": 9.85503,
-            "49": 9.54049,
-            "50": 9.91258
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1725.0,
-            "2": 1664.0,
-            "3": 1710.0,
-            "4": 1712.0,
-            "5": 1834.0,
-            "6": 1743.0,
-            "7": 1803.0,
-            "8": 1737.0,
-            "9": 1779.0,
-            "10": 1459.0,
-            "11": 1898.0,
-            "12": 1661.0,
-            "13": 1860.0,
-            "14": 1764.0,
-            "15": 1886.0,
-            "16": 1916.0,
-            "17": 1773.0,
-            "18": 1702.0,
-            "19": 1742.0,
-            "20": 1649.0,
-            "21": 1899.0,
-            "22": 1631.0,
-            "23": 1960.0,
-            "24": 1570.0,
-            "25": 1647.0,
-            "26": 1649.0,
-            "27": 1811.0,
-            "28": 1930.0,
-            "29": 1910.0,
-            "30": 1964.0,
-            "31": 1536.0,
-            "32": 1873.0,
-            "33": 2191.0,
-            "34": 1838.0,
-            "35": 2017.0,
-            "36": 1916.0,
-            "37": 2345.0,
-            "38": 2247.0,
-            "39": 2374.0,
-            "40": 2207.0,
-            "41": 2246.0,
-            "42": 2291.0,
-            "43": 2027.0,
-            "44": 2147.0,
-            "45": 2164.0,
-            "46": 2300.0,
-            "47": 2418.0,
-            "48": 2467.0,
-            "49": 2255.0,
-            "50": 2224.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 552054272.0,
-            "2": 552054272.0,
-            "3": 552054272.0,
-            "4": 552054272.0,
-            "5": 552054272.0,
-            "6": 552054272.0,
-            "7": 552054272.0,
-            "8": 552054272.0,
-            "9": 552054272.0,
-            "10": 552054272.0,
-            "11": 552054272.0,
-            "12": 552054272.0,
-            "13": 552054272.0,
-            "14": 552054272.0,
-            "15": 552054272.0,
-            "16": 552054272.0,
-            "17": 552054272.0,
-            "18": 552054272.0,
-            "19": 552054272.0,
-            "20": 552054272.0,
-            "21": 552054272.0,
-            "22": 552054272.0,
-            "23": 552054272.0,
-            "24": 552054272.0,
-            "25": 552054272.0,
-            "26": 552054272.0,
-            "27": 552054272.0,
-            "28": 552054272.0,
-            "29": 552054272.0,
-            "30": 552054272.0,
-            "31": 552054272.0,
-            "32": 552054272.0,
-            "33": 552054272.0,
-            "34": 552054272.0,
-            "35": 552054272.0,
-            "36": 552054272.0,
-            "37": 552054272.0,
-            "38": 552054272.0,
-            "39": 552054272.0,
-            "40": 552054272.0,
-            "41": 552054272.0,
-            "42": 552054272.0,
-            "43": 552054272.0,
-            "44": 552054272.0,
-            "45": 552054272.0,
-            "46": 552054272.0,
-            "47": 552054272.0,
-            "48": 552054272.0,
-            "49": 552054272.0,
-            "50": 552054272.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 3798206976.0,
-            "2": 3940899328.0,
-            "3": 3940899328.0,
-            "4": 3940899328.0,
-            "5": 3940899328.0,
-            "6": 3940899328.0,
-            "7": 3940899328.0,
-            "8": 3940899328.0,
-            "9": 3940899328.0,
-            "10": 3940899328.0,
-            "11": 3940899328.0,
-            "12": 3940899328.0,
-            "13": 3940899328.0,
-            "14": 3940899328.0,
-            "15": 3940899328.0,
-            "16": 3940899328.0,
-            "17": 3940899328.0,
-            "18": 3940899328.0,
-            "19": 3940899328.0,
-            "20": 3940899328.0,
-            "21": 3940899328.0,
-            "22": 3940899328.0,
-            "23": 3940899328.0,
-            "24": 3940899328.0,
-            "25": 3940899328.0,
-            "26": 3940899328.0,
-            "27": 3940899328.0,
-            "28": 3940899328.0,
-            "29": 3940899328.0,
-            "30": 3940899328.0,
-            "31": 3940899328.0,
-            "32": 3940899328.0,
-            "33": 3940899328.0,
-            "34": 3940899328.0,
-            "35": 3940899328.0,
-            "36": 3940899328.0,
-            "37": 3940899328.0,
-            "38": 3940899328.0,
-            "39": 3940899328.0,
-            "40": 3940899328.0,
-            "41": 3940899328.0,
-            "42": 3940899328.0,
-            "43": 3940899328.0,
-            "44": 3940899328.0,
-            "45": 3940899328.0,
-            "46": 3940899328.0,
-            "47": 3940899328.0,
-            "48": 3940899328.0,
-            "49": 3940899328.0,
-            "50": 3940899328.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 17.59634,
-            "2": 0.14856,
-            "3": 0.11161,
-            "4": 0.11302,
-            "5": 0.11107,
-            "6": 0.1136,
-            "7": 0.11041,
-            "8": 0.10987,
-            "9": 0.10957,
-            "10": 0.11046,
-            "11": 0.24569,
-            "12": 0.11057,
-            "13": 0.11113,
-            "14": 0.10972,
-            "15": 0.10919,
-            "16": 0.10934,
-            "17": 0.11,
-            "18": 0.11335,
-            "19": 0.11254,
-            "20": 0.11141,
-            "21": 0.24662,
-            "22": 0.11244,
-            "23": 0.11141,
-            "24": 0.11252,
-            "25": 0.11118,
-            "26": 0.11137,
-            "27": 0.1105,
-            "28": 0.11086,
-            "29": 0.11045,
-            "30": 0.11129,
-            "31": 0.24072,
-            "32": 0.11093,
-            "33": 0.11087,
-            "34": 0.11452,
-            "35": 0.12015,
-            "36": 0.11133,
-            "37": 0.1109,
-            "38": 0.11245,
-            "39": 0.11262,
-            "40": 0.11211,
-            "41": 0.23988,
-            "42": 0.11163,
-            "43": 0.11285,
-            "44": 0.1115,
-            "45": 0.1137,
-            "46": 0.11213,
-            "47": 0.11057,
-            "48": 0.11163,
-            "49": 0.11229,
-            "50": 0.11164
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index 5e069163f6c..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.84523,
-            "2": 10.85412,
-            "3": 10.85365,
-            "4": 10.83867,
-            "5": 10.87428,
-            "6": 10.89334,
-            "7": 10.8541,
-            "8": 10.86235,
-            "9": 10.86352,
-            "10": 10.82859,
-            "11": 10.88772,
-            "12": 10.87148,
-            "13": 10.87938,
-            "14": 10.89123,
-            "15": 10.81927,
-            "16": 10.83063,
-            "17": 10.79878,
-            "18": 10.81771,
-            "19": 10.81957,
-            "20": 10.72749,
-            "21": 10.70552,
-            "22": 10.56396,
-            "23": 10.72823,
-            "24": 10.60839,
-            "25": 10.55198,
-            "26": 10.60868,
-            "27": 10.62879,
-            "28": 10.58271,
-            "29": 10.59982,
-            "30": 10.36511,
-            "31": 10.12096,
-            "32": 10.47628,
-            "33": 10.46906,
-            "34": 10.22326,
-            "35": 10.27848,
-            "36": 10.22883,
-            "37": 10.35947,
-            "38": 10.19331,
-            "39": 10.41586,
-            "40": 10.09773,
-            "41": 10.15718,
-            "42": 10.22441,
-            "43": 9.83281,
-            "44": 9.96935,
-            "45": 9.84205,
-            "46": 9.83017,
-            "47": 10.15602,
-            "48": 9.85503,
-            "49": 9.54049,
-            "50": 9.91258
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1725.0,
-            "2": 1664.0,
-            "3": 1710.0,
-            "4": 1712.0,
-            "5": 1834.0,
-            "6": 1743.0,
-            "7": 1803.0,
-            "8": 1737.0,
-            "9": 1779.0,
-            "10": 1459.0,
-            "11": 1898.0,
-            "12": 1661.0,
-            "13": 1860.0,
-            "14": 1764.0,
-            "15": 1886.0,
-            "16": 1916.0,
-            "17": 1773.0,
-            "18": 1702.0,
-            "19": 1742.0,
-            "20": 1649.0,
-            "21": 1899.0,
-            "22": 1631.0,
-            "23": 1960.0,
-            "24": 1570.0,
-            "25": 1647.0,
-            "26": 1649.0,
-            "27": 1811.0,
-            "28": 1930.0,
-            "29": 1910.0,
-            "30": 1964.0,
-            "31": 1536.0,
-            "32": 1873.0,
-            "33": 2191.0,
-            "34": 1838.0,
-            "35": 2017.0,
-            "36": 1916.0,
-            "37": 2345.0,
-            "38": 2247.0,
-            "39": 2374.0,
-            "40": 2207.0,
-            "41": 2246.0,
-            "42": 2291.0,
-            "43": 2027.0,
-            "44": 2147.0,
-            "45": 2164.0,
-            "46": 2300.0,
-            "47": 2418.0,
-            "48": 2467.0,
-            "49": 2255.0,
-            "50": 2224.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 552054272.0,
-            "2": 552054272.0,
-            "3": 552054272.0,
-            "4": 552054272.0,
-            "5": 552054272.0,
-            "6": 552054272.0,
-            "7": 552054272.0,
-            "8": 552054272.0,
-            "9": 552054272.0,
-            "10": 552054272.0,
-            "11": 552054272.0,
-            "12": 552054272.0,
-            "13": 552054272.0,
-            "14": 552054272.0,
-            "15": 552054272.0,
-            "16": 552054272.0,
-            "17": 552054272.0,
-            "18": 552054272.0,
-            "19": 552054272.0,
-            "20": 552054272.0,
-            "21": 552054272.0,
-            "22": 552054272.0,
-            "23": 552054272.0,
-            "24": 552054272.0,
-            "25": 552054272.0,
-            "26": 552054272.0,
-            "27": 552054272.0,
-            "28": 552054272.0,
-            "29": 552054272.0,
-            "30": 552054272.0,
-            "31": 552054272.0,
-            "32": 552054272.0,
-            "33": 552054272.0,
-            "34": 552054272.0,
-            "35": 552054272.0,
-            "36": 552054272.0,
-            "37": 552054272.0,
-            "38": 552054272.0,
-            "39": 552054272.0,
-            "40": 552054272.0,
-            "41": 552054272.0,
-            "42": 552054272.0,
-            "43": 552054272.0,
-            "44": 552054272.0,
-            "45": 552054272.0,
-            "46": 552054272.0,
-            "47": 552054272.0,
-            "48": 552054272.0,
-            "49": 552054272.0,
-            "50": 552054272.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 3798206976.0,
-            "2": 3940899328.0,
-            "3": 3940899328.0,
-            "4": 3940899328.0,
-            "5": 3940899328.0,
-            "6": 3940899328.0,
-            "7": 3940899328.0,
-            "8": 3940899328.0,
-            "9": 3940899328.0,
-            "10": 3940899328.0,
-            "11": 3940899328.0,
-            "12": 3940899328.0,
-            "13": 3940899328.0,
-            "14": 3940899328.0,
-            "15": 3940899328.0,
-            "16": 3940899328.0,
-            "17": 3940899328.0,
-            "18": 3940899328.0,
-            "19": 3940899328.0,
-            "20": 3940899328.0,
-            "21": 3940899328.0,
-            "22": 3940899328.0,
-            "23": 3940899328.0,
-            "24": 3940899328.0,
-            "25": 3940899328.0,
-            "26": 3940899328.0,
-            "27": 3940899328.0,
-            "28": 3940899328.0,
-            "29": 3940899328.0,
-            "30": 3940899328.0,
-            "31": 3940899328.0,
-            "32": 3940899328.0,
-            "33": 3940899328.0,
-            "34": 3940899328.0,
-            "35": 3940899328.0,
-            "36": 3940899328.0,
-            "37": 3940899328.0,
-            "38": 3940899328.0,
-            "39": 3940899328.0,
-            "40": 3940899328.0,
-            "41": 3940899328.0,
-            "42": 3940899328.0,
-            "43": 3940899328.0,
-            "44": 3940899328.0,
-            "45": 3940899328.0,
-            "46": 3940899328.0,
-            "47": 3940899328.0,
-            "48": 3940899328.0,
-            "49": 3940899328.0,
-            "50": 3940899328.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 15.65845,
-            "2": 0.14332,
-            "3": 0.12833,
-            "4": 0.12525,
-            "5": 0.12451,
-            "6": 0.12488,
-            "7": 0.12455,
-            "8": 0.12623,
-            "9": 0.1249,
-            "10": 0.127,
-            "11": 0.29256,
-            "12": 0.12446,
-            "13": 0.12388,
-            "14": 0.12448,
-            "15": 0.12475,
-            "16": 0.12507,
-            "17": 0.12682,
-            "18": 0.12473,
-            "19": 0.12569,
-            "20": 0.12441,
-            "21": 0.28384,
-            "22": 0.12554,
-            "23": 0.12552,
-            "24": 0.12663,
-            "25": 0.12441,
-            "26": 0.12547,
-            "27": 0.12485,
-            "28": 0.12492,
-            "29": 0.12419,
-            "30": 0.12518,
-            "31": 0.28416,
-            "32": 0.12399,
-            "33": 0.12692,
-            "34": 0.12606,
-            "35": 0.12537,
-            "36": 0.12614,
-            "37": 0.12484,
-            "38": 0.12464,
-            "39": 0.12396,
-            "40": 0.1239,
-            "41": 0.28831,
-            "42": 0.12609,
-            "43": 0.12537,
-            "44": 0.12484,
-            "45": 0.12567,
-            "46": 0.12791,
-            "47": 0.12281,
-            "48": 0.124,
-            "49": 0.12486,
-            "50": 0.12585
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_dev_dgx_h100.json
index 0c1982c8b78..603dba4c2e5 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_dev_dgx_h100.json
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 15.75462,
-            "2": 0.12782,
-            "3": 0.11297,
-            "4": 0.11221,
-            "5": 0.11226,
-            "6": 0.11209,
-            "7": 0.11157,
-            "8": 0.11109,
-            "9": 0.11159,
-            "10": 0.11411,
-            "11": 0.11336,
-            "12": 0.10975,
-            "13": 0.11129,
-            "14": 0.11016,
-            "15": 0.11082,
-            "16": 0.11173,
-            "17": 0.1107,
-            "18": 0.113,
-            "19": 0.11419,
-            "20": 0.11333,
+            "1": 16.0335,
+            "2": 0.14377,
+            "3": 0.129,
+            "4": 0.12162,
+            "5": 0.11612,
+            "6": 0.11324,
+            "7": 0.11415,
+            "8": 0.11274,
+            "9": 0.11392,
+            "10": 0.11729,
+            "11": 0.11228,
+            "12": 0.11141,
+            "13": 0.11245,
+            "14": 0.11042,
+            "15": 0.11174,
+            "16": 0.1114,
+            "17": 0.11204,
+            "18": 0.11241,
+            "19": 0.11298,
+            "20": 0.11272,
             "21": 0.11169,
-            "22": 0.11202,
-            "23": 0.11053,
-            "24": 0.1123,
-            "25": 0.11015,
-            "26": 0.11042,
-            "27": 0.11289,
-            "28": 0.11429,
-            "29": 0.11129,
-            "30": 0.11046,
-            "31": 0.11122,
-            "32": 0.1104,
-            "33": 0.11073,
-            "34": 0.11003,
-            "35": 0.1113,
-            "36": 0.11176,
-            "37": 0.11321,
-            "38": 0.10946,
-            "39": 0.10923,
-            "40": 0.10989,
-            "41": 0.11025,
-            "42": 0.11059,
-            "43": 0.11079,
-            "44": 0.11083,
-            "45": 0.1125,
-            "46": 0.11427,
-            "47": 0.10872,
-            "48": 0.11101,
-            "49": 0.10925,
-            "50": 0.10952,
-            "51": 0.11025,
-            "52": 0.11105,
-            "53": 0.11002,
-            "54": 0.10971,
-            "55": 0.11074,
-            "56": 0.11019,
-            "57": 0.11283,
-            "58": 0.11172,
-            "59": 0.1132,
-            "60": 0.11512,
-            "61": 0.11318,
-            "62": 0.11088,
-            "63": 0.11201,
-            "64": 0.10971,
-            "65": 0.11109,
-            "66": 0.11046,
-            "67": 0.1107,
-            "68": 0.11123,
-            "69": 0.1121,
-            "70": 0.11129,
-            "71": 0.1106,
-            "72": 0.11162,
-            "73": 0.11219,
-            "74": 0.11285,
-            "75": 0.11259,
-            "76": 0.11452,
-            "77": 0.11103,
-            "78": 0.11112,
-            "79": 0.11137,
-            "80": 0.11228,
-            "81": 0.11061,
-            "82": 0.11185,
-            "83": 0.111,
-            "84": 0.11067,
-            "85": 0.11266,
-            "86": 0.11269,
-            "87": 0.11295,
-            "88": 0.10971,
-            "89": 0.11137,
-            "90": 0.11022,
-            "91": 0.11153,
-            "92": 0.10828,
-            "93": 0.1125,
-            "94": 0.11279,
-            "95": 0.11157,
-            "96": 0.11174,
-            "97": 0.10966,
-            "98": 0.11031,
-            "99": 0.11036,
-            "100": 0.10984
+            "22": 0.11228,
+            "23": 0.11255,
+            "24": 0.11124,
+            "25": 0.11188,
+            "26": 0.11351,
+            "27": 0.11159,
+            "28": 0.11318,
+            "29": 0.11016,
+            "30": 0.11051,
+            "31": 0.11184,
+            "32": 0.11116,
+            "33": 0.1106,
+            "34": 0.11105,
+            "35": 0.113,
+            "36": 0.11198,
+            "37": 0.1117,
+            "38": 0.11109,
+            "39": 0.1099,
+            "40": 0.11097,
+            "41": 0.11159,
+            "42": 0.11191,
+            "43": 0.11283,
+            "44": 0.11266,
+            "45": 0.111,
+            "46": 0.11347,
+            "47": 0.1099,
+            "48": 0.10973,
+            "49": 0.11225,
+            "50": 0.11231,
+            "51": 0.1122,
+            "52": 0.10985,
+            "53": 0.11147,
+            "54": 0.11064,
+            "55": 0.11101,
+            "56": 0.11356,
+            "57": 0.11368,
+            "58": 0.11185,
+            "59": 0.11193,
+            "60": 0.11205,
+            "61": 0.11176,
+            "62": 0.11293,
+            "63": 0.1127,
+            "64": 0.11343,
+            "65": 0.11282,
+            "66": 0.11245,
+            "67": 0.11385,
+            "68": 0.11071,
+            "69": 0.11079,
+            "70": 0.112,
+            "71": 0.1108,
+            "72": 0.11299,
+            "73": 0.11305,
+            "74": 0.11343,
+            "75": 0.11155,
+            "76": 0.11323,
+            "77": 0.11174,
+            "78": 0.11138,
+            "79": 0.11246,
+            "80": 0.11252,
+            "81": 0.11217,
+            "82": 0.11269,
+            "83": 0.11312,
+            "84": 0.11075,
+            "85": 0.11227,
+            "86": 0.11159,
+            "87": 0.11227,
+            "88": 0.11227,
+            "89": 0.11277,
+            "90": 0.11219,
+            "91": 0.11067,
+            "92": 0.10961,
+            "93": 0.10907,
+            "94": 0.11584,
+            "95": 0.1087,
+            "96": 0.11107,
+            "97": 0.11046,
+            "98": 0.10986,
+            "99": 0.11249,
+            "100": 0.1095
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 73ffbc48219..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.84517,
-            "2": 10.85349,
-            "3": 10.8539,
-            "4": 10.83826,
-            "5": 10.87422,
-            "6": 10.89306,
-            "7": 10.85452,
-            "8": 10.8626,
-            "9": 10.86463,
-            "10": 10.82907,
-            "11": 10.88787,
-            "12": 10.87098,
-            "13": 10.87914,
-            "14": 10.89069,
-            "15": 10.81973,
-            "16": 10.83156,
-            "17": 10.79863,
-            "18": 10.81648,
-            "19": 10.8189,
-            "20": 10.72685,
-            "21": 10.70581,
-            "22": 10.56347,
-            "23": 10.72794,
-            "24": 10.60761,
-            "25": 10.55128,
-            "26": 10.60749,
-            "27": 10.6277,
-            "28": 10.58262,
-            "29": 10.59959,
-            "30": 10.36566,
-            "31": 10.11988,
-            "32": 10.4755,
-            "33": 10.46637,
-            "34": 10.22009,
-            "35": 10.2744,
-            "36": 10.22594,
-            "37": 10.35729,
-            "38": 10.19156,
-            "39": 10.41342,
-            "40": 10.0956,
-            "41": 10.15511,
-            "42": 10.22085,
-            "43": 9.82797,
-            "44": 9.96276,
-            "45": 9.83425,
-            "46": 9.82209,
-            "47": 10.14765,
-            "48": 9.84681,
-            "49": 9.53377,
-            "50": 9.90532,
-            "51": 9.85116,
-            "52": 9.73516,
-            "53": 10.05863,
-            "54": 9.94369,
-            "55": 9.87297,
-            "56": 9.61703,
-            "57": 9.4675,
-            "58": 9.82223,
-            "59": 9.57338,
-            "60": 9.48861,
-            "61": 9.67921,
-            "62": 9.97513,
-            "63": 9.37045,
-            "64": 9.76643,
-            "65": 8.93435,
-            "66": 9.69463,
-            "67": 9.35357,
-            "68": 9.76826,
-            "69": 9.77682,
-            "70": 9.72364,
-            "71": 9.59895,
-            "72": 9.56454,
-            "73": 9.48327,
-            "74": 8.92062,
-            "75": 9.40392,
-            "76": 9.05301,
-            "77": 10.04175,
-            "78": 9.69879,
-            "79": 9.35128,
-            "80": 9.38215,
-            "81": 9.45866,
-            "82": 9.67518,
-            "83": 9.28411,
-            "84": 9.39313,
-            "85": 9.5893,
-            "86": 9.05182,
-            "87": 9.56419,
-            "88": 9.71756,
-            "89": 9.57129,
-            "90": 9.78202,
-            "91": 9.3061,
-            "92": 9.32048,
-            "93": 9.03942,
-            "94": 8.79522,
-            "95": 9.47913,
-            "96": 9.48454,
-            "97": 9.2699,
-            "98": 9.62563,
-            "99": 8.84255,
-            "100": 9.34982
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1655.0,
-            "2": 1697.0,
-            "3": 1773.0,
-            "4": 1782.0,
-            "5": 1897.0,
-            "6": 1802.0,
-            "7": 1874.0,
-            "8": 1653.0,
-            "9": 1814.0,
-            "10": 1441.0,
-            "11": 1909.0,
-            "12": 1645.0,
-            "13": 1931.0,
-            "14": 1678.0,
-            "15": 1918.0,
-            "16": 1961.0,
-            "17": 1711.0,
-            "18": 1658.0,
-            "19": 1791.0,
-            "20": 1610.0,
-            "21": 1815.0,
-            "22": 1677.0,
-            "23": 1952.0,
-            "24": 1612.0,
-            "25": 1597.0,
-            "26": 1657.0,
-            "27": 1850.0,
-            "28": 2013.0,
-            "29": 1966.0,
-            "30": 1875.0,
-            "31": 1585.0,
-            "32": 1941.0,
-            "33": 2085.0,
-            "34": 1837.0,
-            "35": 2045.0,
-            "36": 1898.0,
-            "37": 2333.0,
-            "38": 2247.0,
-            "39": 2266.0,
-            "40": 2184.0,
-            "41": 2209.0,
-            "42": 2164.0,
-            "43": 2076.0,
-            "44": 2169.0,
-            "45": 2077.0,
-            "46": 2325.0,
-            "47": 2505.0,
-            "48": 2442.0,
-            "49": 2205.0,
-            "50": 2196.0,
-            "51": 2500.0,
-            "52": 2572.0,
-            "53": 2905.0,
-            "54": 2794.0,
-            "55": 2351.0,
-            "56": 2606.0,
-            "57": 2388.0,
-            "58": 2864.0,
-            "59": 2726.0,
-            "60": 2359.0,
-            "61": 2915.0,
-            "62": 2610.0,
-            "63": 2397.0,
-            "64": 2886.0,
-            "65": 2577.0,
-            "66": 2913.0,
-            "67": 2715.0,
-            "68": 2646.0,
-            "69": 2805.0,
-            "70": 3151.0,
-            "71": 2917.0,
-            "72": 2403.0,
-            "73": 2948.0,
-            "74": 1994.0,
-            "75": 2425.0,
-            "76": 2898.0,
-            "77": 3085.0,
-            "78": 3228.0,
-            "79": 2981.0,
-            "80": 3254.0,
-            "81": 3499.0,
-            "82": 3121.0,
-            "83": 2711.0,
-            "84": 3105.0,
-            "85": 3492.0,
-            "86": 2693.0,
-            "87": 3602.0,
-            "88": 3052.0,
-            "89": 3230.0,
-            "90": 3160.0,
-            "91": 2647.0,
-            "92": 3160.0,
-            "93": 2650.0,
-            "94": 3430.0,
-            "95": 3247.0,
-            "96": 3353.0,
-            "97": 3064.0,
-            "98": 3486.0,
-            "99": 3190.0,
-            "100": 3076.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 759681536.0,
-            "2": 759681536.0,
-            "3": 759681536.0,
-            "4": 759681536.0,
-            "5": 759681536.0,
-            "6": 759681536.0,
-            "7": 759681536.0,
-            "8": 759681536.0,
-            "9": 759681536.0,
-            "10": 759681536.0,
-            "11": 759681536.0,
-            "12": 759681536.0,
-            "13": 759681536.0,
-            "14": 759681536.0,
-            "15": 759681536.0,
-            "16": 759681536.0,
-            "17": 759681536.0,
-            "18": 759681536.0,
-            "19": 759681536.0,
-            "20": 759681536.0,
-            "21": 759681536.0,
-            "22": 759681536.0,
-            "23": 759681536.0,
-            "24": 759681536.0,
-            "25": 759681536.0,
-            "26": 759681536.0,
-            "27": 759681536.0,
-            "28": 759681536.0,
-            "29": 759681536.0,
-            "30": 759681536.0,
-            "31": 759681536.0,
-            "32": 759681536.0,
-            "33": 759681536.0,
-            "34": 759681536.0,
-            "35": 759681536.0,
-            "36": 759681536.0,
-            "37": 759681536.0,
-            "38": 759681536.0,
-            "39": 759681536.0,
-            "40": 759681536.0,
-            "41": 759681536.0,
-            "42": 759681536.0,
-            "43": 759681536.0,
-            "44": 759681536.0,
-            "45": 759681536.0,
-            "46": 759681536.0,
-            "47": 759681536.0,
-            "48": 759681536.0,
-            "49": 759681536.0,
-            "50": 759681536.0,
-            "51": 759681536.0,
-            "52": 759681536.0,
-            "53": 759681536.0,
-            "54": 759681536.0,
-            "55": 759681536.0,
-            "56": 759681536.0,
-            "57": 759681536.0,
-            "58": 759681536.0,
-            "59": 759681536.0,
-            "60": 759681536.0,
-            "61": 759681536.0,
-            "62": 759681536.0,
-            "63": 759681536.0,
-            "64": 759681536.0,
-            "65": 759681536.0,
-            "66": 759681536.0,
-            "67": 759681536.0,
-            "68": 759681536.0,
-            "69": 759681536.0,
-            "70": 759681536.0,
-            "71": 759681536.0,
-            "72": 759681536.0,
-            "73": 759681536.0,
-            "74": 759681536.0,
-            "75": 759681536.0,
-            "76": 759681536.0,
-            "77": 759681536.0,
-            "78": 759681536.0,
-            "79": 759681536.0,
-            "80": 759681536.0,
-            "81": 759681536.0,
-            "82": 759681536.0,
-            "83": 759681536.0,
-            "84": 759681536.0,
-            "85": 759681536.0,
-            "86": 759681536.0,
-            "87": 759681536.0,
-            "88": 759681536.0,
-            "89": 759681536.0,
-            "90": 759681536.0,
-            "91": 759681536.0,
-            "92": 759681536.0,
-            "93": 759681536.0,
-            "94": 759681536.0,
-            "95": 759681536.0,
-            "96": 759681536.0,
-            "97": 759681536.0,
-            "98": 759681536.0,
-            "99": 759681536.0,
-            "100": 759681536.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 2358048768.0,
-            "2": 2639760384.0,
-            "3": 2639760384.0,
-            "4": 2639760384.0,
-            "5": 2639760384.0,
-            "6": 2639760384.0,
-            "7": 2639760384.0,
-            "8": 2639760384.0,
-            "9": 2639760384.0,
-            "10": 2639760384.0,
-            "11": 2639760384.0,
-            "12": 2639760384.0,
-            "13": 2639760384.0,
-            "14": 2639760384.0,
-            "15": 2639760384.0,
-            "16": 2639760384.0,
-            "17": 2639760384.0,
-            "18": 2639760384.0,
-            "19": 2639760384.0,
-            "20": 2639760384.0,
-            "21": 2639760384.0,
-            "22": 2639760384.0,
-            "23": 2639760384.0,
-            "24": 2639760384.0,
-            "25": 2639760384.0,
-            "26": 2639760384.0,
-            "27": 2639760384.0,
-            "28": 2639760384.0,
-            "29": 2639760384.0,
-            "30": 2639760384.0,
-            "31": 2639760384.0,
-            "32": 2639760384.0,
-            "33": 2639760384.0,
-            "34": 2639760384.0,
-            "35": 2639760384.0,
-            "36": 2639760384.0,
-            "37": 2639760384.0,
-            "38": 2639760384.0,
-            "39": 2639760384.0,
-            "40": 2639760384.0,
-            "41": 2639760384.0,
-            "42": 2639760384.0,
-            "43": 2639760384.0,
-            "44": 2639760384.0,
-            "45": 2639760384.0,
-            "46": 2639760384.0,
-            "47": 2639760384.0,
-            "48": 2639760384.0,
-            "49": 2639760384.0,
-            "50": 2639760384.0,
-            "51": 2639760384.0,
-            "52": 2639760384.0,
-            "53": 2639760384.0,
-            "54": 2639760384.0,
-            "55": 2639760384.0,
-            "56": 2639760384.0,
-            "57": 2639760384.0,
-            "58": 2639760384.0,
-            "59": 2639760384.0,
-            "60": 2639760384.0,
-            "61": 2639760384.0,
-            "62": 2639760384.0,
-            "63": 2639760384.0,
-            "64": 2639760384.0,
-            "65": 2639760384.0,
-            "66": 2639760384.0,
-            "67": 2639760384.0,
-            "68": 2639760384.0,
-            "69": 2639760384.0,
-            "70": 2639760384.0,
-            "71": 2639760384.0,
-            "72": 2639760384.0,
-            "73": 2639760384.0,
-            "74": 2639760384.0,
-            "75": 2639760384.0,
-            "76": 2639760384.0,
-            "77": 2639760384.0,
-            "78": 2639760384.0,
-            "79": 2639760384.0,
-            "80": 2639760384.0,
-            "81": 2639760384.0,
-            "82": 2639760384.0,
-            "83": 2639760384.0,
-            "84": 2639760384.0,
-            "85": 2639760384.0,
-            "86": 2639760384.0,
-            "87": 2639760384.0,
-            "88": 2639760384.0,
-            "89": 2639760384.0,
-            "90": 2639760384.0,
-            "91": 2639760384.0,
-            "92": 2639760384.0,
-            "93": 2639760384.0,
-            "94": 2639760384.0,
-            "95": 2639760384.0,
-            "96": 2639760384.0,
-            "97": 2639760384.0,
-            "98": 2639760384.0,
-            "99": 2639760384.0,
-            "100": 2639760384.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 14.25777,
-            "2": 0.13394,
-            "3": 0.09922,
-            "4": 0.09894,
-            "5": 0.09775,
-            "6": 0.09731,
-            "7": 0.09832,
-            "8": 0.09902,
-            "9": 0.0976,
-            "10": 0.09738,
-            "11": 0.09769,
-            "12": 0.09775,
-            "13": 0.0973,
-            "14": 0.09697,
-            "15": 0.09749,
-            "16": 0.09763,
-            "17": 0.09815,
-            "18": 0.09802,
-            "19": 0.09718,
-            "20": 0.09775,
-            "21": 0.09758,
-            "22": 0.09773,
-            "23": 0.09785,
-            "24": 0.09828,
-            "25": 0.09821,
-            "26": 0.09669,
-            "27": 0.09722,
-            "28": 0.09732,
-            "29": 0.09861,
-            "30": 0.09875,
-            "31": 0.09867,
-            "32": 0.09834,
-            "33": 0.0982,
-            "34": 0.09928,
-            "35": 0.09811,
-            "36": 0.09669,
-            "37": 0.09757,
-            "38": 0.09767,
-            "39": 0.09702,
-            "40": 0.09753,
-            "41": 0.09794,
-            "42": 0.09878,
-            "43": 0.09912,
-            "44": 0.09929,
-            "45": 0.09921,
-            "46": 0.09947,
-            "47": 0.10001,
-            "48": 0.09906,
-            "49": 0.09991,
-            "50": 0.0993,
-            "51": 0.10133,
-            "52": 0.09956,
-            "53": 0.09824,
-            "54": 0.09904,
-            "55": 0.09915,
-            "56": 0.09925,
-            "57": 0.09859,
-            "58": 0.09644,
-            "59": 0.09661,
-            "60": 0.09755,
-            "61": 0.09709,
-            "62": 0.09665,
-            "63": 0.09681,
-            "64": 0.09617,
-            "65": 0.09641,
-            "66": 0.09621,
-            "67": 0.09683,
-            "68": 0.09678,
-            "69": 0.09664,
-            "70": 0.09803,
-            "71": 0.09677,
-            "72": 0.09645,
-            "73": 0.09681,
-            "74": 0.09753,
-            "75": 0.09704,
-            "76": 0.09776,
-            "77": 0.09822,
-            "78": 0.09631,
-            "79": 0.09728,
-            "80": 0.09766,
-            "81": 0.09703,
-            "82": 0.0976,
-            "83": 0.09876,
-            "84": 0.09779,
-            "85": 0.0973,
-            "86": 0.09965,
-            "87": 0.09825,
-            "88": 0.09698,
-            "89": 0.09761,
-            "90": 0.09663,
-            "91": 0.09746,
-            "92": 0.09681,
-            "93": 0.09761,
-            "94": 0.09917,
-            "95": 0.09904,
-            "96": 0.09748,
-            "97": 0.09707,
-            "98": 0.09661,
-            "99": 0.09831,
-            "100": 0.09719
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index 603dba4c2e5..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.84517,
-            "2": 10.85349,
-            "3": 10.8539,
-            "4": 10.83826,
-            "5": 10.87422,
-            "6": 10.89306,
-            "7": 10.85452,
-            "8": 10.8626,
-            "9": 10.86463,
-            "10": 10.82907,
-            "11": 10.88787,
-            "12": 10.87098,
-            "13": 10.87914,
-            "14": 10.89069,
-            "15": 10.81973,
-            "16": 10.83156,
-            "17": 10.79863,
-            "18": 10.81648,
-            "19": 10.8189,
-            "20": 10.72685,
-            "21": 10.70581,
-            "22": 10.56347,
-            "23": 10.72794,
-            "24": 10.60761,
-            "25": 10.55128,
-            "26": 10.60749,
-            "27": 10.6277,
-            "28": 10.58262,
-            "29": 10.59959,
-            "30": 10.36566,
-            "31": 10.11988,
-            "32": 10.4755,
-            "33": 10.46637,
-            "34": 10.22009,
-            "35": 10.2744,
-            "36": 10.22594,
-            "37": 10.35729,
-            "38": 10.19156,
-            "39": 10.41342,
-            "40": 10.0956,
-            "41": 10.15511,
-            "42": 10.22085,
-            "43": 9.82797,
-            "44": 9.96276,
-            "45": 9.83425,
-            "46": 9.82209,
-            "47": 10.14765,
-            "48": 9.84681,
-            "49": 9.53377,
-            "50": 9.90532,
-            "51": 9.85116,
-            "52": 9.73516,
-            "53": 10.05863,
-            "54": 9.94369,
-            "55": 9.87297,
-            "56": 9.61703,
-            "57": 9.4675,
-            "58": 9.82223,
-            "59": 9.57338,
-            "60": 9.48861,
-            "61": 9.67921,
-            "62": 9.97513,
-            "63": 9.37045,
-            "64": 9.76643,
-            "65": 8.93435,
-            "66": 9.69463,
-            "67": 9.35357,
-            "68": 9.76826,
-            "69": 9.77682,
-            "70": 9.72364,
-            "71": 9.59895,
-            "72": 9.56454,
-            "73": 9.48327,
-            "74": 8.92062,
-            "75": 9.40392,
-            "76": 9.05301,
-            "77": 10.04175,
-            "78": 9.69879,
-            "79": 9.35128,
-            "80": 9.38215,
-            "81": 9.45866,
-            "82": 9.67518,
-            "83": 9.28411,
-            "84": 9.39313,
-            "85": 9.5893,
-            "86": 9.05182,
-            "87": 9.56419,
-            "88": 9.71756,
-            "89": 9.57129,
-            "90": 9.78202,
-            "91": 9.3061,
-            "92": 9.32048,
-            "93": 9.03942,
-            "94": 8.79522,
-            "95": 9.47913,
-            "96": 9.48454,
-            "97": 9.2699,
-            "98": 9.62563,
-            "99": 8.84255,
-            "100": 9.34982
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1655.0,
-            "2": 1697.0,
-            "3": 1773.0,
-            "4": 1782.0,
-            "5": 1897.0,
-            "6": 1802.0,
-            "7": 1874.0,
-            "8": 1653.0,
-            "9": 1814.0,
-            "10": 1441.0,
-            "11": 1909.0,
-            "12": 1645.0,
-            "13": 1931.0,
-            "14": 1678.0,
-            "15": 1918.0,
-            "16": 1961.0,
-            "17": 1711.0,
-            "18": 1658.0,
-            "19": 1791.0,
-            "20": 1610.0,
-            "21": 1815.0,
-            "22": 1677.0,
-            "23": 1952.0,
-            "24": 1612.0,
-            "25": 1597.0,
-            "26": 1657.0,
-            "27": 1850.0,
-            "28": 2013.0,
-            "29": 1966.0,
-            "30": 1875.0,
-            "31": 1585.0,
-            "32": 1941.0,
-            "33": 2085.0,
-            "34": 1837.0,
-            "35": 2045.0,
-            "36": 1898.0,
-            "37": 2333.0,
-            "38": 2247.0,
-            "39": 2266.0,
-            "40": 2184.0,
-            "41": 2209.0,
-            "42": 2164.0,
-            "43": 2076.0,
-            "44": 2169.0,
-            "45": 2077.0,
-            "46": 2325.0,
-            "47": 2505.0,
-            "48": 2442.0,
-            "49": 2205.0,
-            "50": 2196.0,
-            "51": 2500.0,
-            "52": 2572.0,
-            "53": 2905.0,
-            "54": 2794.0,
-            "55": 2351.0,
-            "56": 2606.0,
-            "57": 2388.0,
-            "58": 2864.0,
-            "59": 2726.0,
-            "60": 2359.0,
-            "61": 2915.0,
-            "62": 2610.0,
-            "63": 2397.0,
-            "64": 2886.0,
-            "65": 2577.0,
-            "66": 2913.0,
-            "67": 2715.0,
-            "68": 2646.0,
-            "69": 2805.0,
-            "70": 3151.0,
-            "71": 2917.0,
-            "72": 2403.0,
-            "73": 2948.0,
-            "74": 1994.0,
-            "75": 2425.0,
-            "76": 2898.0,
-            "77": 3085.0,
-            "78": 3228.0,
-            "79": 2981.0,
-            "80": 3254.0,
-            "81": 3499.0,
-            "82": 3121.0,
-            "83": 2711.0,
-            "84": 3105.0,
-            "85": 3492.0,
-            "86": 2693.0,
-            "87": 3602.0,
-            "88": 3052.0,
-            "89": 3230.0,
-            "90": 3160.0,
-            "91": 2647.0,
-            "92": 3160.0,
-            "93": 2650.0,
-            "94": 3430.0,
-            "95": 3247.0,
-            "96": 3353.0,
-            "97": 3064.0,
-            "98": 3486.0,
-            "99": 3190.0,
-            "100": 3076.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 759681536.0,
-            "2": 759681536.0,
-            "3": 759681536.0,
-            "4": 759681536.0,
-            "5": 759681536.0,
-            "6": 759681536.0,
-            "7": 759681536.0,
-            "8": 759681536.0,
-            "9": 759681536.0,
-            "10": 759681536.0,
-            "11": 759681536.0,
-            "12": 759681536.0,
-            "13": 759681536.0,
-            "14": 759681536.0,
-            "15": 759681536.0,
-            "16": 759681536.0,
-            "17": 759681536.0,
-            "18": 759681536.0,
-            "19": 759681536.0,
-            "20": 759681536.0,
-            "21": 759681536.0,
-            "22": 759681536.0,
-            "23": 759681536.0,
-            "24": 759681536.0,
-            "25": 759681536.0,
-            "26": 759681536.0,
-            "27": 759681536.0,
-            "28": 759681536.0,
-            "29": 759681536.0,
-            "30": 759681536.0,
-            "31": 759681536.0,
-            "32": 759681536.0,
-            "33": 759681536.0,
-            "34": 759681536.0,
-            "35": 759681536.0,
-            "36": 759681536.0,
-            "37": 759681536.0,
-            "38": 759681536.0,
-            "39": 759681536.0,
-            "40": 759681536.0,
-            "41": 759681536.0,
-            "42": 759681536.0,
-            "43": 759681536.0,
-            "44": 759681536.0,
-            "45": 759681536.0,
-            "46": 759681536.0,
-            "47": 759681536.0,
-            "48": 759681536.0,
-            "49": 759681536.0,
-            "50": 759681536.0,
-            "51": 759681536.0,
-            "52": 759681536.0,
-            "53": 759681536.0,
-            "54": 759681536.0,
-            "55": 759681536.0,
-            "56": 759681536.0,
-            "57": 759681536.0,
-            "58": 759681536.0,
-            "59": 759681536.0,
-            "60": 759681536.0,
-            "61": 759681536.0,
-            "62": 759681536.0,
-            "63": 759681536.0,
-            "64": 759681536.0,
-            "65": 759681536.0,
-            "66": 759681536.0,
-            "67": 759681536.0,
-            "68": 759681536.0,
-            "69": 759681536.0,
-            "70": 759681536.0,
-            "71": 759681536.0,
-            "72": 759681536.0,
-            "73": 759681536.0,
-            "74": 759681536.0,
-            "75": 759681536.0,
-            "76": 759681536.0,
-            "77": 759681536.0,
-            "78": 759681536.0,
-            "79": 759681536.0,
-            "80": 759681536.0,
-            "81": 759681536.0,
-            "82": 759681536.0,
-            "83": 759681536.0,
-            "84": 759681536.0,
-            "85": 759681536.0,
-            "86": 759681536.0,
-            "87": 759681536.0,
-            "88": 759681536.0,
-            "89": 759681536.0,
-            "90": 759681536.0,
-            "91": 759681536.0,
-            "92": 759681536.0,
-            "93": 759681536.0,
-            "94": 759681536.0,
-            "95": 759681536.0,
-            "96": 759681536.0,
-            "97": 759681536.0,
-            "98": 759681536.0,
-            "99": 759681536.0,
-            "100": 759681536.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 2358048768.0,
-            "2": 2639760384.0,
-            "3": 2639760384.0,
-            "4": 2639760384.0,
-            "5": 2639760384.0,
-            "6": 2639760384.0,
-            "7": 2639760384.0,
-            "8": 2639760384.0,
-            "9": 2639760384.0,
-            "10": 2639760384.0,
-            "11": 2639760384.0,
-            "12": 2639760384.0,
-            "13": 2639760384.0,
-            "14": 2639760384.0,
-            "15": 2639760384.0,
-            "16": 2639760384.0,
-            "17": 2639760384.0,
-            "18": 2639760384.0,
-            "19": 2639760384.0,
-            "20": 2639760384.0,
-            "21": 2639760384.0,
-            "22": 2639760384.0,
-            "23": 2639760384.0,
-            "24": 2639760384.0,
-            "25": 2639760384.0,
-            "26": 2639760384.0,
-            "27": 2639760384.0,
-            "28": 2639760384.0,
-            "29": 2639760384.0,
-            "30": 2639760384.0,
-            "31": 2639760384.0,
-            "32": 2639760384.0,
-            "33": 2639760384.0,
-            "34": 2639760384.0,
-            "35": 2639760384.0,
-            "36": 2639760384.0,
-            "37": 2639760384.0,
-            "38": 2639760384.0,
-            "39": 2639760384.0,
-            "40": 2639760384.0,
-            "41": 2639760384.0,
-            "42": 2639760384.0,
-            "43": 2639760384.0,
-            "44": 2639760384.0,
-            "45": 2639760384.0,
-            "46": 2639760384.0,
-            "47": 2639760384.0,
-            "48": 2639760384.0,
-            "49": 2639760384.0,
-            "50": 2639760384.0,
-            "51": 2639760384.0,
-            "52": 2639760384.0,
-            "53": 2639760384.0,
-            "54": 2639760384.0,
-            "55": 2639760384.0,
-            "56": 2639760384.0,
-            "57": 2639760384.0,
-            "58": 2639760384.0,
-            "59": 2639760384.0,
-            "60": 2639760384.0,
-            "61": 2639760384.0,
-            "62": 2639760384.0,
-            "63": 2639760384.0,
-            "64": 2639760384.0,
-            "65": 2639760384.0,
-            "66": 2639760384.0,
-            "67": 2639760384.0,
-            "68": 2639760384.0,
-            "69": 2639760384.0,
-            "70": 2639760384.0,
-            "71": 2639760384.0,
-            "72": 2639760384.0,
-            "73": 2639760384.0,
-            "74": 2639760384.0,
-            "75": 2639760384.0,
-            "76": 2639760384.0,
-            "77": 2639760384.0,
-            "78": 2639760384.0,
-            "79": 2639760384.0,
-            "80": 2639760384.0,
-            "81": 2639760384.0,
-            "82": 2639760384.0,
-            "83": 2639760384.0,
-            "84": 2639760384.0,
-            "85": 2639760384.0,
-            "86": 2639760384.0,
-            "87": 2639760384.0,
-            "88": 2639760384.0,
-            "89": 2639760384.0,
-            "90": 2639760384.0,
-            "91": 2639760384.0,
-            "92": 2639760384.0,
-            "93": 2639760384.0,
-            "94": 2639760384.0,
-            "95": 2639760384.0,
-            "96": 2639760384.0,
-            "97": 2639760384.0,
-            "98": 2639760384.0,
-            "99": 2639760384.0,
-            "100": 2639760384.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 16.0335,
-            "2": 0.14377,
-            "3": 0.129,
-            "4": 0.12162,
-            "5": 0.11612,
-            "6": 0.11324,
-            "7": 0.11415,
-            "8": 0.11274,
-            "9": 0.11392,
-            "10": 0.11729,
-            "11": 0.11228,
-            "12": 0.11141,
-            "13": 0.11245,
-            "14": 0.11042,
-            "15": 0.11174,
-            "16": 0.1114,
-            "17": 0.11204,
-            "18": 0.11241,
-            "19": 0.11298,
-            "20": 0.11272,
-            "21": 0.11169,
-            "22": 0.11228,
-            "23": 0.11255,
-            "24": 0.11124,
-            "25": 0.11188,
-            "26": 0.11351,
-            "27": 0.11159,
-            "28": 0.11318,
-            "29": 0.11016,
-            "30": 0.11051,
-            "31": 0.11184,
-            "32": 0.11116,
-            "33": 0.1106,
-            "34": 0.11105,
-            "35": 0.113,
-            "36": 0.11198,
-            "37": 0.1117,
-            "38": 0.11109,
-            "39": 0.1099,
-            "40": 0.11097,
-            "41": 0.11159,
-            "42": 0.11191,
-            "43": 0.11283,
-            "44": 0.11266,
-            "45": 0.111,
-            "46": 0.11347,
-            "47": 0.1099,
-            "48": 0.10973,
-            "49": 0.11225,
-            "50": 0.11231,
-            "51": 0.1122,
-            "52": 0.10985,
-            "53": 0.11147,
-            "54": 0.11064,
-            "55": 0.11101,
-            "56": 0.11356,
-            "57": 0.11368,
-            "58": 0.11185,
-            "59": 0.11193,
-            "60": 0.11205,
-            "61": 0.11176,
-            "62": 0.11293,
-            "63": 0.1127,
-            "64": 0.11343,
-            "65": 0.11282,
-            "66": 0.11245,
-            "67": 0.11385,
-            "68": 0.11071,
-            "69": 0.11079,
-            "70": 0.112,
-            "71": 0.1108,
-            "72": 0.11299,
-            "73": 0.11305,
-            "74": 0.11343,
-            "75": 0.11155,
-            "76": 0.11323,
-            "77": 0.11174,
-            "78": 0.11138,
-            "79": 0.11246,
-            "80": 0.11252,
-            "81": 0.11217,
-            "82": 0.11269,
-            "83": 0.11312,
-            "84": 0.11075,
-            "85": 0.11227,
-            "86": 0.11159,
-            "87": 0.11227,
-            "88": 0.11227,
-            "89": 0.11277,
-            "90": 0.11219,
-            "91": 0.11067,
-            "92": 0.10961,
-            "93": 0.10907,
-            "94": 0.11584,
-            "95": 0.1087,
-            "96": 0.11107,
-            "97": 0.11046,
-            "98": 0.10986,
-            "99": 0.11249,
-            "100": 0.1095
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_dev_dgx_h100.json
index c681b5bd1b4..f0d9be9be9d 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_dev_dgx_h100.json
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 17.55882,
-            "2": 0.13655,
-            "3": 0.11858,
-            "4": 0.11941,
-            "5": 0.11739,
-            "6": 0.11681,
-            "7": 0.11862,
-            "8": 0.11921,
-            "9": 0.11665,
-            "10": 0.11215,
-            "11": 0.11312,
-            "12": 0.1133,
-            "13": 0.11518,
-            "14": 0.11608,
-            "15": 0.11464,
-            "16": 0.11376,
-            "17": 0.11276,
-            "18": 0.11015,
-            "19": 0.11044,
-            "20": 0.11079,
-            "21": 0.11474,
-            "22": 0.11541,
-            "23": 0.11297,
-            "24": 0.11166,
-            "25": 0.11284,
-            "26": 0.11199,
-            "27": 0.11465,
-            "28": 0.11372,
-            "29": 0.10904,
-            "30": 0.10993,
-            "31": 0.1098,
-            "32": 0.10938,
-            "33": 0.10814,
-            "34": 0.11037,
-            "35": 0.11052,
-            "36": 0.1106,
-            "37": 0.11033,
-            "38": 0.10993,
-            "39": 0.11259,
-            "40": 0.11019,
-            "41": 0.11104,
-            "42": 0.10843,
-            "43": 0.10994,
-            "44": 0.10984,
-            "45": 0.11066,
-            "46": 0.11026,
-            "47": 0.11119,
-            "48": 0.11328,
-            "49": 0.11122,
-            "50": 0.11048,
-            "51": 0.11634,
-            "52": 0.10989,
-            "53": 0.10877,
-            "54": 0.10843,
-            "55": 0.1103,
-            "56": 0.11044,
-            "57": 0.11032,
-            "58": 0.10904,
-            "59": 0.1093,
-            "60": 0.10814,
-            "61": 0.10768,
-            "62": 0.10827,
-            "63": 0.11047,
-            "64": 0.10921,
-            "65": 0.11011,
-            "66": 0.11245,
-            "67": 0.10798,
-            "68": 0.11072,
-            "69": 0.10966,
-            "70": 0.10787,
-            "71": 0.10889,
-            "72": 0.10915,
-            "73": 0.10943,
-            "74": 0.11136,
-            "75": 0.11012,
-            "76": 0.11056,
-            "77": 0.1092,
-            "78": 0.11055,
-            "79": 0.11067,
-            "80": 0.11178,
-            "81": 0.11295,
-            "82": 0.11012,
-            "83": 0.11251,
-            "84": 0.11453,
-            "85": 0.11392,
-            "86": 0.1136,
-            "87": 0.10936,
-            "88": 0.10748,
-            "89": 0.109,
-            "90": 0.10971,
-            "91": 0.10877,
-            "92": 0.1101,
-            "93": 0.11367,
-            "94": 0.11157,
-            "95": 0.11149,
-            "96": 0.10884,
-            "97": 0.10884,
-            "98": 0.10766,
-            "99": 0.10924,
-            "100": 0.10913
+            "1": 17.57509,
+            "2": 0.1453,
+            "3": 0.11184,
+            "4": 0.11457,
+            "5": 0.12345,
+            "6": 0.12167,
+            "7": 0.12451,
+            "8": 0.11003,
+            "9": 0.11229,
+            "10": 0.11078,
+            "11": 0.11178,
+            "12": 0.11071,
+            "13": 0.11183,
+            "14": 0.1131,
+            "15": 0.11195,
+            "16": 0.11109,
+            "17": 0.11155,
+            "18": 0.11436,
+            "19": 0.11335,
+            "20": 0.11235,
+            "21": 0.11323,
+            "22": 0.11234,
+            "23": 0.1131,
+            "24": 0.11154,
+            "25": 0.11274,
+            "26": 0.11525,
+            "27": 0.11435,
+            "28": 0.11247,
+            "29": 0.11318,
+            "30": 0.11126,
+            "31": 0.11489,
+            "32": 0.11045,
+            "33": 0.1114,
+            "34": 0.11253,
+            "35": 0.11114,
+            "36": 0.114,
+            "37": 0.11201,
+            "38": 0.10979,
+            "39": 0.11069,
+            "40": 0.11078,
+            "41": 0.11142,
+            "42": 0.11091,
+            "43": 0.11324,
+            "44": 0.11151,
+            "45": 0.11295,
+            "46": 0.11174,
+            "47": 0.10954,
+            "48": 0.11083,
+            "49": 0.11195,
+            "50": 0.11251,
+            "51": 0.11627,
+            "52": 0.11199,
+            "53": 0.11127,
+            "54": 0.11464,
+            "55": 0.11072,
+            "56": 0.1136,
+            "57": 0.11119,
+            "58": 0.11025,
+            "59": 0.11083,
+            "60": 0.11126,
+            "61": 0.10968,
+            "62": 0.11104,
+            "63": 0.11515,
+            "64": 0.11136,
+            "65": 0.11454,
+            "66": 0.10994,
+            "67": 0.11003,
+            "68": 0.10997,
+            "69": 0.11155,
+            "70": 0.11002,
+            "71": 0.1121,
+            "72": 0.11334,
+            "73": 0.11221,
+            "74": 0.11542,
+            "75": 0.11082,
+            "76": 0.10997,
+            "77": 0.11087,
+            "78": 0.11222,
+            "79": 0.11343,
+            "80": 0.11462,
+            "81": 0.11272,
+            "82": 0.11293,
+            "83": 0.113,
+            "84": 0.11134,
+            "85": 0.11308,
+            "86": 0.11357,
+            "87": 0.11341,
+            "88": 0.11349,
+            "89": 0.11342,
+            "90": 0.11212,
+            "91": 0.11377,
+            "92": 0.11421,
+            "93": 0.1115,
+            "94": 0.11293,
+            "95": 0.11334,
+            "96": 0.11303,
+            "97": 0.11198,
+            "98": 0.11326,
+            "99": 0.11128,
+            "100": 0.1117
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 14b95ca2ef5..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.84517,
-            "2": 10.85349,
-            "3": 10.8539,
-            "4": 10.83825,
-            "5": 10.87427,
-            "6": 10.89307,
-            "7": 10.85454,
-            "8": 10.8626,
-            "9": 10.86468,
-            "10": 10.82907,
-            "11": 10.88789,
-            "12": 10.87095,
-            "13": 10.87916,
-            "14": 10.89079,
-            "15": 10.81974,
-            "16": 10.83162,
-            "17": 10.79863,
-            "18": 10.81667,
-            "19": 10.81919,
-            "20": 10.727,
-            "21": 10.70594,
-            "22": 10.56364,
-            "23": 10.72802,
-            "24": 10.60832,
-            "25": 10.55217,
-            "26": 10.60845,
-            "27": 10.62847,
-            "28": 10.5831,
-            "29": 10.60012,
-            "30": 10.36614,
-            "31": 10.12044,
-            "32": 10.47684,
-            "33": 10.46873,
-            "34": 10.22319,
-            "35": 10.2778,
-            "36": 10.22892,
-            "37": 10.35949,
-            "38": 10.19371,
-            "39": 10.4155,
-            "40": 10.0976,
-            "41": 10.15737,
-            "42": 10.22396,
-            "43": 9.83286,
-            "44": 9.96916,
-            "45": 9.84196,
-            "46": 9.83045,
-            "47": 10.15628,
-            "48": 9.85484,
-            "49": 9.54086,
-            "50": 9.9125,
-            "51": 9.8587,
-            "52": 9.74287,
-            "53": 10.06647,
-            "54": 9.95168,
-            "55": 9.88096,
-            "56": 9.62625,
-            "57": 9.47766,
-            "58": 9.8335,
-            "59": 9.58522,
-            "60": 9.50125,
-            "61": 9.69186,
-            "62": 9.98858,
-            "63": 9.38478,
-            "64": 9.78027,
-            "65": 8.94761,
-            "66": 9.70857,
-            "67": 9.36847,
-            "68": 9.78438,
-            "69": 9.79407,
-            "70": 9.7424,
-            "71": 9.61808,
-            "72": 9.58427,
-            "73": 9.50347,
-            "74": 8.9422,
-            "75": 9.42532,
-            "76": 9.07407,
-            "77": 10.06351,
-            "78": 9.7208,
-            "79": 9.37296,
-            "80": 9.40396,
-            "81": 9.48168,
-            "82": 9.69778,
-            "83": 9.30711,
-            "84": 9.41712,
-            "85": 9.61405,
-            "86": 9.07618,
-            "87": 9.59088,
-            "88": 9.7464,
-            "89": 9.59987,
-            "90": 9.81418,
-            "91": 9.33775,
-            "92": 9.35372,
-            "93": 9.07397,
-            "94": 8.8317,
-            "95": 9.5173,
-            "96": 9.52412,
-            "97": 9.30995,
-            "98": 9.66807,
-            "99": 8.8859,
-            "100": 9.39541
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1655.0,
-            "2": 1697.0,
-            "3": 1724.0,
-            "4": 1720.0,
-            "5": 1803.0,
-            "6": 1772.0,
-            "7": 1811.0,
-            "8": 1678.0,
-            "9": 1828.0,
-            "10": 1448.0,
-            "11": 1890.0,
-            "12": 1657.0,
-            "13": 1852.0,
-            "14": 1717.0,
-            "15": 1879.0,
-            "16": 1921.0,
-            "17": 1666.0,
-            "18": 1729.0,
-            "19": 1767.0,
-            "20": 1657.0,
-            "21": 1827.0,
-            "22": 1594.0,
-            "23": 1918.0,
-            "24": 1622.0,
-            "25": 1625.0,
-            "26": 1649.0,
-            "27": 1788.0,
-            "28": 2030.0,
-            "29": 1980.0,
-            "30": 1882.0,
-            "31": 1564.0,
-            "32": 1918.0,
-            "33": 2045.0,
-            "34": 1884.0,
-            "35": 1954.0,
-            "36": 1910.0,
-            "37": 2267.0,
-            "38": 2195.0,
-            "39": 2346.0,
-            "40": 2191.0,
-            "41": 2171.0,
-            "42": 2246.0,
-            "43": 1997.0,
-            "44": 2156.0,
-            "45": 2091.0,
-            "46": 2439.0,
-            "47": 2539.0,
-            "48": 2418.0,
-            "49": 2207.0,
-            "50": 2189.0,
-            "51": 2608.0,
-            "52": 2444.0,
-            "53": 2898.0,
-            "54": 2664.0,
-            "55": 2325.0,
-            "56": 2614.0,
-            "57": 2394.0,
-            "58": 2812.0,
-            "59": 2771.0,
-            "60": 2361.0,
-            "61": 2855.0,
-            "62": 2675.0,
-            "63": 2393.0,
-            "64": 3014.0,
-            "65": 2673.0,
-            "66": 3051.0,
-            "67": 2657.0,
-            "68": 2662.0,
-            "69": 2736.0,
-            "70": 3139.0,
-            "71": 2943.0,
-            "72": 2293.0,
-            "73": 2908.0,
-            "74": 1887.0,
-            "75": 2519.0,
-            "76": 3060.0,
-            "77": 3191.0,
-            "78": 3211.0,
-            "79": 3081.0,
-            "80": 3205.0,
-            "81": 3563.0,
-            "82": 3201.0,
-            "83": 2614.0,
-            "84": 3162.0,
-            "85": 3209.0,
-            "86": 2660.0,
-            "87": 3729.0,
-            "88": 3002.0,
-            "89": 3160.0,
-            "90": 3168.0,
-            "91": 2753.0,
-            "92": 3258.0,
-            "93": 2617.0,
-            "94": 3341.0,
-            "95": 3261.0,
-            "96": 3370.0,
-            "97": 3163.0,
-            "98": 3566.0,
-            "99": 3179.0,
-            "100": 3135.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 763220480.0,
-            "2": 763220480.0,
-            "3": 763220480.0,
-            "4": 763220480.0,
-            "5": 763220480.0,
-            "6": 763220480.0,
-            "7": 763220480.0,
-            "8": 763220480.0,
-            "9": 763220480.0,
-            "10": 763220480.0,
-            "11": 763220480.0,
-            "12": 763220480.0,
-            "13": 763220480.0,
-            "14": 763220480.0,
-            "15": 763220480.0,
-            "16": 763220480.0,
-            "17": 763220480.0,
-            "18": 763220480.0,
-            "19": 763220480.0,
-            "20": 763220480.0,
-            "21": 763220480.0,
-            "22": 763220480.0,
-            "23": 763220480.0,
-            "24": 763220480.0,
-            "25": 763220480.0,
-            "26": 763220480.0,
-            "27": 763220480.0,
-            "28": 763220480.0,
-            "29": 763220480.0,
-            "30": 763220480.0,
-            "31": 763220480.0,
-            "32": 763220480.0,
-            "33": 763220480.0,
-            "34": 763220480.0,
-            "35": 763220480.0,
-            "36": 763220480.0,
-            "37": 763220480.0,
-            "38": 763220480.0,
-            "39": 763220480.0,
-            "40": 763220480.0,
-            "41": 763220480.0,
-            "42": 763220480.0,
-            "43": 763220480.0,
-            "44": 763220480.0,
-            "45": 763220480.0,
-            "46": 763220480.0,
-            "47": 763220480.0,
-            "48": 763220480.0,
-            "49": 763220480.0,
-            "50": 763220480.0,
-            "51": 763220480.0,
-            "52": 763220480.0,
-            "53": 763220480.0,
-            "54": 763220480.0,
-            "55": 763220480.0,
-            "56": 763220480.0,
-            "57": 763220480.0,
-            "58": 763220480.0,
-            "59": 763220480.0,
-            "60": 763220480.0,
-            "61": 763220480.0,
-            "62": 763220480.0,
-            "63": 763220480.0,
-            "64": 763220480.0,
-            "65": 763220480.0,
-            "66": 763220480.0,
-            "67": 763220480.0,
-            "68": 763220480.0,
-            "69": 763220480.0,
-            "70": 763220480.0,
-            "71": 763220480.0,
-            "72": 763220480.0,
-            "73": 763220480.0,
-            "74": 763220480.0,
-            "75": 763220480.0,
-            "76": 763220480.0,
-            "77": 763220480.0,
-            "78": 763220480.0,
-            "79": 763220480.0,
-            "80": 763220480.0,
-            "81": 763220480.0,
-            "82": 763220480.0,
-            "83": 763220480.0,
-            "84": 763220480.0,
-            "85": 763220480.0,
-            "86": 763220480.0,
-            "87": 763220480.0,
-            "88": 763220480.0,
-            "89": 763220480.0,
-            "90": 763220480.0,
-            "91": 763220480.0,
-            "92": 763220480.0,
-            "93": 763220480.0,
-            "94": 763220480.0,
-            "95": 763220480.0,
-            "96": 763220480.0,
-            "97": 763220480.0,
-            "98": 763220480.0,
-            "99": 763220480.0,
-            "100": 763220480.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 2359490560.0,
-            "2": 2643299328.0,
-            "3": 2643299328.0,
-            "4": 2643299328.0,
-            "5": 2643299328.0,
-            "6": 2643299328.0,
-            "7": 2643299328.0,
-            "8": 2643299328.0,
-            "9": 2643299328.0,
-            "10": 2643299328.0,
-            "11": 2643299328.0,
-            "12": 2643299328.0,
-            "13": 2643299328.0,
-            "14": 2643299328.0,
-            "15": 2643299328.0,
-            "16": 2643299328.0,
-            "17": 2643299328.0,
-            "18": 2643299328.0,
-            "19": 2643299328.0,
-            "20": 2643299328.0,
-            "21": 2643299328.0,
-            "22": 2643299328.0,
-            "23": 2643299328.0,
-            "24": 2643299328.0,
-            "25": 2643299328.0,
-            "26": 2643299328.0,
-            "27": 2643299328.0,
-            "28": 2643299328.0,
-            "29": 2643299328.0,
-            "30": 2643299328.0,
-            "31": 2643299328.0,
-            "32": 2643299328.0,
-            "33": 2643299328.0,
-            "34": 2643299328.0,
-            "35": 2643299328.0,
-            "36": 2643299328.0,
-            "37": 2643299328.0,
-            "38": 2643299328.0,
-            "39": 2643299328.0,
-            "40": 2643299328.0,
-            "41": 2643299328.0,
-            "42": 2643299328.0,
-            "43": 2643299328.0,
-            "44": 2643299328.0,
-            "45": 2643299328.0,
-            "46": 2643299328.0,
-            "47": 2643299328.0,
-            "48": 2643299328.0,
-            "49": 2643299328.0,
-            "50": 2643299328.0,
-            "51": 2643299328.0,
-            "52": 2643299328.0,
-            "53": 2643299328.0,
-            "54": 2643299328.0,
-            "55": 2643299328.0,
-            "56": 2643299328.0,
-            "57": 2643299328.0,
-            "58": 2643299328.0,
-            "59": 2643299328.0,
-            "60": 2643299328.0,
-            "61": 2643299328.0,
-            "62": 2643299328.0,
-            "63": 2643299328.0,
-            "64": 2643299328.0,
-            "65": 2643299328.0,
-            "66": 2643299328.0,
-            "67": 2643299328.0,
-            "68": 2643299328.0,
-            "69": 2643299328.0,
-            "70": 2643299328.0,
-            "71": 2643299328.0,
-            "72": 2643299328.0,
-            "73": 2643299328.0,
-            "74": 2643299328.0,
-            "75": 2643299328.0,
-            "76": 2643299328.0,
-            "77": 2643299328.0,
-            "78": 2643299328.0,
-            "79": 2643299328.0,
-            "80": 2643299328.0,
-            "81": 2643299328.0,
-            "82": 2643299328.0,
-            "83": 2643299328.0,
-            "84": 2643299328.0,
-            "85": 2643299328.0,
-            "86": 2643299328.0,
-            "87": 2643299328.0,
-            "88": 2643299328.0,
-            "89": 2643299328.0,
-            "90": 2643299328.0,
-            "91": 2643299328.0,
-            "92": 2643299328.0,
-            "93": 2643299328.0,
-            "94": 2643299328.0,
-            "95": 2643299328.0,
-            "96": 2643299328.0,
-            "97": 2643299328.0,
-            "98": 2643299328.0,
-            "99": 2643299328.0,
-            "100": 2643299328.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 16.57994,
-            "2": 0.13128,
-            "3": 0.10309,
-            "4": 0.10229,
-            "5": 0.10072,
-            "6": 0.09862,
-            "7": 0.10136,
-            "8": 0.10155,
-            "9": 0.10115,
-            "10": 0.09973,
-            "11": 0.10272,
-            "12": 0.10529,
-            "13": 0.10516,
-            "14": 0.10397,
-            "15": 0.10407,
-            "16": 0.10362,
-            "17": 0.10333,
-            "18": 0.10307,
-            "19": 0.10283,
-            "20": 0.09949,
-            "21": 0.09817,
-            "22": 0.1027,
-            "23": 0.10231,
-            "24": 0.10218,
-            "25": 0.10307,
-            "26": 0.10424,
-            "27": 0.10183,
-            "28": 0.10321,
-            "29": 0.10228,
-            "30": 0.10178,
-            "31": 0.10491,
-            "32": 0.10267,
-            "33": 0.10205,
-            "34": 0.10154,
-            "35": 0.10239,
-            "36": 0.10188,
-            "37": 0.10547,
-            "38": 0.10217,
-            "39": 0.10273,
-            "40": 0.09793,
-            "41": 0.09773,
-            "42": 0.09752,
-            "43": 0.09866,
-            "44": 0.0975,
-            "45": 0.09867,
-            "46": 0.09876,
-            "47": 0.09929,
-            "48": 0.09909,
-            "49": 0.101,
-            "50": 0.0978,
-            "51": 0.10715,
-            "52": 0.10113,
-            "53": 0.10133,
-            "54": 0.10021,
-            "55": 0.10053,
-            "56": 0.10041,
-            "57": 0.10033,
-            "58": 0.10121,
-            "59": 0.09846,
-            "60": 0.09725,
-            "61": 0.09803,
-            "62": 0.09772,
-            "63": 0.09712,
-            "64": 0.10005,
-            "65": 0.09924,
-            "66": 0.09828,
-            "67": 0.09806,
-            "68": 0.09771,
-            "69": 0.103,
-            "70": 0.10104,
-            "71": 0.10088,
-            "72": 0.1012,
-            "73": 0.10067,
-            "74": 0.1036,
-            "75": 0.09878,
-            "76": 0.10012,
-            "77": 0.09887,
-            "78": 0.09891,
-            "79": 0.09932,
-            "80": 0.09828,
-            "81": 0.1,
-            "82": 0.10177,
-            "83": 0.09881,
-            "84": 0.09963,
-            "85": 0.09854,
-            "86": 0.09886,
-            "87": 0.10179,
-            "88": 0.10085,
-            "89": 0.10134,
-            "90": 0.1035,
-            "91": 0.10105,
-            "92": 0.10027,
-            "93": 0.10157,
-            "94": 0.10164,
-            "95": 0.10203,
-            "96": 0.09929,
-            "97": 0.10135,
-            "98": 0.10191,
-            "99": 0.10128,
-            "100": 0.1009
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index f0d9be9be9d..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.84517,
-            "2": 10.85349,
-            "3": 10.8539,
-            "4": 10.83825,
-            "5": 10.87427,
-            "6": 10.89307,
-            "7": 10.85454,
-            "8": 10.8626,
-            "9": 10.86468,
-            "10": 10.82907,
-            "11": 10.88789,
-            "12": 10.87095,
-            "13": 10.87916,
-            "14": 10.89079,
-            "15": 10.81974,
-            "16": 10.83162,
-            "17": 10.79863,
-            "18": 10.81667,
-            "19": 10.81919,
-            "20": 10.727,
-            "21": 10.70594,
-            "22": 10.56364,
-            "23": 10.72802,
-            "24": 10.60832,
-            "25": 10.55217,
-            "26": 10.60845,
-            "27": 10.62847,
-            "28": 10.5831,
-            "29": 10.60012,
-            "30": 10.36614,
-            "31": 10.12044,
-            "32": 10.47684,
-            "33": 10.46873,
-            "34": 10.22319,
-            "35": 10.2778,
-            "36": 10.22892,
-            "37": 10.35949,
-            "38": 10.19371,
-            "39": 10.4155,
-            "40": 10.0976,
-            "41": 10.15737,
-            "42": 10.22396,
-            "43": 9.83286,
-            "44": 9.96916,
-            "45": 9.84196,
-            "46": 9.83045,
-            "47": 10.15628,
-            "48": 9.85484,
-            "49": 9.54086,
-            "50": 9.9125,
-            "51": 9.8587,
-            "52": 9.74287,
-            "53": 10.06647,
-            "54": 9.95168,
-            "55": 9.88096,
-            "56": 9.62625,
-            "57": 9.47766,
-            "58": 9.8335,
-            "59": 9.58522,
-            "60": 9.50125,
-            "61": 9.69186,
-            "62": 9.98858,
-            "63": 9.38478,
-            "64": 9.78027,
-            "65": 8.94761,
-            "66": 9.70857,
-            "67": 9.36847,
-            "68": 9.78438,
-            "69": 9.79407,
-            "70": 9.7424,
-            "71": 9.61808,
-            "72": 9.58427,
-            "73": 9.50347,
-            "74": 8.9422,
-            "75": 9.42532,
-            "76": 9.07407,
-            "77": 10.06351,
-            "78": 9.7208,
-            "79": 9.37296,
-            "80": 9.40396,
-            "81": 9.48168,
-            "82": 9.69778,
-            "83": 9.30711,
-            "84": 9.41712,
-            "85": 9.61405,
-            "86": 9.07618,
-            "87": 9.59088,
-            "88": 9.7464,
-            "89": 9.59987,
-            "90": 9.81418,
-            "91": 9.33775,
-            "92": 9.35372,
-            "93": 9.07397,
-            "94": 8.8317,
-            "95": 9.5173,
-            "96": 9.52412,
-            "97": 9.30995,
-            "98": 9.66807,
-            "99": 8.8859,
-            "100": 9.39541
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1655.0,
-            "2": 1697.0,
-            "3": 1724.0,
-            "4": 1720.0,
-            "5": 1803.0,
-            "6": 1772.0,
-            "7": 1811.0,
-            "8": 1678.0,
-            "9": 1828.0,
-            "10": 1448.0,
-            "11": 1890.0,
-            "12": 1657.0,
-            "13": 1852.0,
-            "14": 1717.0,
-            "15": 1879.0,
-            "16": 1921.0,
-            "17": 1666.0,
-            "18": 1729.0,
-            "19": 1767.0,
-            "20": 1657.0,
-            "21": 1827.0,
-            "22": 1594.0,
-            "23": 1918.0,
-            "24": 1622.0,
-            "25": 1625.0,
-            "26": 1649.0,
-            "27": 1788.0,
-            "28": 2030.0,
-            "29": 1980.0,
-            "30": 1882.0,
-            "31": 1564.0,
-            "32": 1918.0,
-            "33": 2045.0,
-            "34": 1884.0,
-            "35": 1954.0,
-            "36": 1910.0,
-            "37": 2267.0,
-            "38": 2195.0,
-            "39": 2346.0,
-            "40": 2191.0,
-            "41": 2171.0,
-            "42": 2246.0,
-            "43": 1997.0,
-            "44": 2156.0,
-            "45": 2091.0,
-            "46": 2439.0,
-            "47": 2539.0,
-            "48": 2418.0,
-            "49": 2207.0,
-            "50": 2189.0,
-            "51": 2608.0,
-            "52": 2444.0,
-            "53": 2898.0,
-            "54": 2664.0,
-            "55": 2325.0,
-            "56": 2614.0,
-            "57": 2394.0,
-            "58": 2812.0,
-            "59": 2771.0,
-            "60": 2361.0,
-            "61": 2855.0,
-            "62": 2675.0,
-            "63": 2393.0,
-            "64": 3014.0,
-            "65": 2673.0,
-            "66": 3051.0,
-            "67": 2657.0,
-            "68": 2662.0,
-            "69": 2736.0,
-            "70": 3139.0,
-            "71": 2943.0,
-            "72": 2293.0,
-            "73": 2908.0,
-            "74": 1887.0,
-            "75": 2519.0,
-            "76": 3060.0,
-            "77": 3191.0,
-            "78": 3211.0,
-            "79": 3081.0,
-            "80": 3205.0,
-            "81": 3563.0,
-            "82": 3201.0,
-            "83": 2614.0,
-            "84": 3162.0,
-            "85": 3209.0,
-            "86": 2660.0,
-            "87": 3729.0,
-            "88": 3002.0,
-            "89": 3160.0,
-            "90": 3168.0,
-            "91": 2753.0,
-            "92": 3258.0,
-            "93": 2617.0,
-            "94": 3341.0,
-            "95": 3261.0,
-            "96": 3370.0,
-            "97": 3163.0,
-            "98": 3566.0,
-            "99": 3179.0,
-            "100": 3135.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 763220480.0,
-            "2": 763220480.0,
-            "3": 763220480.0,
-            "4": 763220480.0,
-            "5": 763220480.0,
-            "6": 763220480.0,
-            "7": 763220480.0,
-            "8": 763220480.0,
-            "9": 763220480.0,
-            "10": 763220480.0,
-            "11": 763220480.0,
-            "12": 763220480.0,
-            "13": 763220480.0,
-            "14": 763220480.0,
-            "15": 763220480.0,
-            "16": 763220480.0,
-            "17": 763220480.0,
-            "18": 763220480.0,
-            "19": 763220480.0,
-            "20": 763220480.0,
-            "21": 763220480.0,
-            "22": 763220480.0,
-            "23": 763220480.0,
-            "24": 763220480.0,
-            "25": 763220480.0,
-            "26": 763220480.0,
-            "27": 763220480.0,
-            "28": 763220480.0,
-            "29": 763220480.0,
-            "30": 763220480.0,
-            "31": 763220480.0,
-            "32": 763220480.0,
-            "33": 763220480.0,
-            "34": 763220480.0,
-            "35": 763220480.0,
-            "36": 763220480.0,
-            "37": 763220480.0,
-            "38": 763220480.0,
-            "39": 763220480.0,
-            "40": 763220480.0,
-            "41": 763220480.0,
-            "42": 763220480.0,
-            "43": 763220480.0,
-            "44": 763220480.0,
-            "45": 763220480.0,
-            "46": 763220480.0,
-            "47": 763220480.0,
-            "48": 763220480.0,
-            "49": 763220480.0,
-            "50": 763220480.0,
-            "51": 763220480.0,
-            "52": 763220480.0,
-            "53": 763220480.0,
-            "54": 763220480.0,
-            "55": 763220480.0,
-            "56": 763220480.0,
-            "57": 763220480.0,
-            "58": 763220480.0,
-            "59": 763220480.0,
-            "60": 763220480.0,
-            "61": 763220480.0,
-            "62": 763220480.0,
-            "63": 763220480.0,
-            "64": 763220480.0,
-            "65": 763220480.0,
-            "66": 763220480.0,
-            "67": 763220480.0,
-            "68": 763220480.0,
-            "69": 763220480.0,
-            "70": 763220480.0,
-            "71": 763220480.0,
-            "72": 763220480.0,
-            "73": 763220480.0,
-            "74": 763220480.0,
-            "75": 763220480.0,
-            "76": 763220480.0,
-            "77": 763220480.0,
-            "78": 763220480.0,
-            "79": 763220480.0,
-            "80": 763220480.0,
-            "81": 763220480.0,
-            "82": 763220480.0,
-            "83": 763220480.0,
-            "84": 763220480.0,
-            "85": 763220480.0,
-            "86": 763220480.0,
-            "87": 763220480.0,
-            "88": 763220480.0,
-            "89": 763220480.0,
-            "90": 763220480.0,
-            "91": 763220480.0,
-            "92": 763220480.0,
-            "93": 763220480.0,
-            "94": 763220480.0,
-            "95": 763220480.0,
-            "96": 763220480.0,
-            "97": 763220480.0,
-            "98": 763220480.0,
-            "99": 763220480.0,
-            "100": 763220480.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 2359490560.0,
-            "2": 2643299328.0,
-            "3": 2643299328.0,
-            "4": 2643299328.0,
-            "5": 2643299328.0,
-            "6": 2643299328.0,
-            "7": 2643299328.0,
-            "8": 2643299328.0,
-            "9": 2643299328.0,
-            "10": 2643299328.0,
-            "11": 2643299328.0,
-            "12": 2643299328.0,
-            "13": 2643299328.0,
-            "14": 2643299328.0,
-            "15": 2643299328.0,
-            "16": 2643299328.0,
-            "17": 2643299328.0,
-            "18": 2643299328.0,
-            "19": 2643299328.0,
-            "20": 2643299328.0,
-            "21": 2643299328.0,
-            "22": 2643299328.0,
-            "23": 2643299328.0,
-            "24": 2643299328.0,
-            "25": 2643299328.0,
-            "26": 2643299328.0,
-            "27": 2643299328.0,
-            "28": 2643299328.0,
-            "29": 2643299328.0,
-            "30": 2643299328.0,
-            "31": 2643299328.0,
-            "32": 2643299328.0,
-            "33": 2643299328.0,
-            "34": 2643299328.0,
-            "35": 2643299328.0,
-            "36": 2643299328.0,
-            "37": 2643299328.0,
-            "38": 2643299328.0,
-            "39": 2643299328.0,
-            "40": 2643299328.0,
-            "41": 2643299328.0,
-            "42": 2643299328.0,
-            "43": 2643299328.0,
-            "44": 2643299328.0,
-            "45": 2643299328.0,
-            "46": 2643299328.0,
-            "47": 2643299328.0,
-            "48": 2643299328.0,
-            "49": 2643299328.0,
-            "50": 2643299328.0,
-            "51": 2643299328.0,
-            "52": 2643299328.0,
-            "53": 2643299328.0,
-            "54": 2643299328.0,
-            "55": 2643299328.0,
-            "56": 2643299328.0,
-            "57": 2643299328.0,
-            "58": 2643299328.0,
-            "59": 2643299328.0,
-            "60": 2643299328.0,
-            "61": 2643299328.0,
-            "62": 2643299328.0,
-            "63": 2643299328.0,
-            "64": 2643299328.0,
-            "65": 2643299328.0,
-            "66": 2643299328.0,
-            "67": 2643299328.0,
-            "68": 2643299328.0,
-            "69": 2643299328.0,
-            "70": 2643299328.0,
-            "71": 2643299328.0,
-            "72": 2643299328.0,
-            "73": 2643299328.0,
-            "74": 2643299328.0,
-            "75": 2643299328.0,
-            "76": 2643299328.0,
-            "77": 2643299328.0,
-            "78": 2643299328.0,
-            "79": 2643299328.0,
-            "80": 2643299328.0,
-            "81": 2643299328.0,
-            "82": 2643299328.0,
-            "83": 2643299328.0,
-            "84": 2643299328.0,
-            "85": 2643299328.0,
-            "86": 2643299328.0,
-            "87": 2643299328.0,
-            "88": 2643299328.0,
-            "89": 2643299328.0,
-            "90": 2643299328.0,
-            "91": 2643299328.0,
-            "92": 2643299328.0,
-            "93": 2643299328.0,
-            "94": 2643299328.0,
-            "95": 2643299328.0,
-            "96": 2643299328.0,
-            "97": 2643299328.0,
-            "98": 2643299328.0,
-            "99": 2643299328.0,
-            "100": 2643299328.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 17.57509,
-            "2": 0.1453,
-            "3": 0.11184,
-            "4": 0.11457,
-            "5": 0.12345,
-            "6": 0.12167,
-            "7": 0.12451,
-            "8": 0.11003,
-            "9": 0.11229,
-            "10": 0.11078,
-            "11": 0.11178,
-            "12": 0.11071,
-            "13": 0.11183,
-            "14": 0.1131,
-            "15": 0.11195,
-            "16": 0.11109,
-            "17": 0.11155,
-            "18": 0.11436,
-            "19": 0.11335,
-            "20": 0.11235,
-            "21": 0.11323,
-            "22": 0.11234,
-            "23": 0.1131,
-            "24": 0.11154,
-            "25": 0.11274,
-            "26": 0.11525,
-            "27": 0.11435,
-            "28": 0.11247,
-            "29": 0.11318,
-            "30": 0.11126,
-            "31": 0.11489,
-            "32": 0.11045,
-            "33": 0.1114,
-            "34": 0.11253,
-            "35": 0.11114,
-            "36": 0.114,
-            "37": 0.11201,
-            "38": 0.10979,
-            "39": 0.11069,
-            "40": 0.11078,
-            "41": 0.11142,
-            "42": 0.11091,
-            "43": 0.11324,
-            "44": 0.11151,
-            "45": 0.11295,
-            "46": 0.11174,
-            "47": 0.10954,
-            "48": 0.11083,
-            "49": 0.11195,
-            "50": 0.11251,
-            "51": 0.11627,
-            "52": 0.11199,
-            "53": 0.11127,
-            "54": 0.11464,
-            "55": 0.11072,
-            "56": 0.1136,
-            "57": 0.11119,
-            "58": 0.11025,
-            "59": 0.11083,
-            "60": 0.11126,
-            "61": 0.10968,
-            "62": 0.11104,
-            "63": 0.11515,
-            "64": 0.11136,
-            "65": 0.11454,
-            "66": 0.10994,
-            "67": 0.11003,
-            "68": 0.10997,
-            "69": 0.11155,
-            "70": 0.11002,
-            "71": 0.1121,
-            "72": 0.11334,
-            "73": 0.11221,
-            "74": 0.11542,
-            "75": 0.11082,
-            "76": 0.10997,
-            "77": 0.11087,
-            "78": 0.11222,
-            "79": 0.11343,
-            "80": 0.11462,
-            "81": 0.11272,
-            "82": 0.11293,
-            "83": 0.113,
-            "84": 0.11134,
-            "85": 0.11308,
-            "86": 0.11357,
-            "87": 0.11341,
-            "88": 0.11349,
-            "89": 0.11342,
-            "90": 0.11212,
-            "91": 0.11377,
-            "92": 0.11421,
-            "93": 0.1115,
-            "94": 0.11293,
-            "95": 0.11334,
-            "96": 0.11303,
-            "97": 0.11198,
-            "98": 0.11326,
-            "99": 0.11128,
-            "100": 0.1117
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_h100.json
index 39c385529c2..acadb81abbe 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_h100.json
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 17.54138,
-            "2": 0.13158,
-            "3": 0.11931,
-            "4": 0.11269,
-            "5": 0.1124,
-            "6": 0.11102,
-            "7": 0.11179,
-            "8": 0.11071,
-            "9": 0.11115,
-            "10": 0.11216,
-            "11": 0.11019,
-            "12": 0.10929,
-            "13": 0.10974,
-            "14": 0.11072,
-            "15": 0.11028,
-            "16": 0.10961,
-            "17": 0.1105,
-            "18": 0.1098,
-            "19": 0.11053,
-            "20": 0.11011,
-            "21": 0.10991,
-            "22": 0.10929,
-            "23": 0.11003,
-            "24": 0.10899,
-            "25": 0.10976,
-            "26": 0.10976,
-            "27": 0.11215,
-            "28": 0.11012,
-            "29": 0.11201,
-            "30": 0.11164,
-            "31": 0.10958,
-            "32": 0.10984,
-            "33": 0.10959,
-            "34": 0.10961,
-            "35": 0.11104,
-            "36": 0.11182,
-            "37": 0.11063,
-            "38": 0.11001,
-            "39": 0.10974,
-            "40": 0.10932,
-            "41": 0.10961,
-            "42": 0.1101,
-            "43": 0.11018,
-            "44": 0.11136,
-            "45": 0.1111,
-            "46": 0.11139,
-            "47": 0.1089,
-            "48": 0.10943,
-            "49": 0.10954,
-            "50": 0.10991,
-            "51": 0.11785,
-            "52": 0.11209,
-            "53": 0.11006,
-            "54": 0.11154,
-            "55": 0.11442,
-            "56": 0.11224,
-            "57": 0.11144,
-            "58": 0.11019,
-            "59": 0.11203,
-            "60": 0.11138,
-            "61": 0.11054,
-            "62": 0.10988,
-            "63": 0.11137,
-            "64": 0.11375,
-            "65": 0.11099,
-            "66": 0.11062,
-            "67": 0.11059,
-            "68": 0.1103,
-            "69": 0.11052,
-            "70": 0.11117,
-            "71": 0.11388,
-            "72": 0.1141,
-            "73": 0.11416,
-            "74": 0.11486,
-            "75": 0.11283,
-            "76": 0.1123,
-            "77": 0.11047,
-            "78": 0.11279,
-            "79": 0.11417,
-            "80": 0.11037,
-            "81": 0.11258,
-            "82": 0.1135,
-            "83": 0.11215,
-            "84": 0.11183,
-            "85": 0.1122,
-            "86": 0.11261,
-            "87": 0.1097,
-            "88": 0.1112,
-            "89": 0.11201,
-            "90": 0.11377,
-            "91": 0.11526,
-            "92": 0.11074,
-            "93": 0.11279,
-            "94": 0.11178,
-            "95": 0.11134,
-            "96": 0.11018,
-            "97": 0.11123,
-            "98": 0.11129,
-            "99": 0.11384,
-            "100": 0.11183
+            "1": 17.61957,
+            "2": 0.12347,
+            "3": 0.11094,
+            "4": 0.11482,
+            "5": 0.11141,
+            "6": 0.10928,
+            "7": 0.10905,
+            "8": 0.11026,
+            "9": 0.11003,
+            "10": 0.11095,
+            "11": 0.11002,
+            "12": 0.1122,
+            "13": 0.11472,
+            "14": 0.11511,
+            "15": 0.11073,
+            "16": 0.11228,
+            "17": 0.11342,
+            "18": 0.11197,
+            "19": 0.11062,
+            "20": 0.11097,
+            "21": 0.11081,
+            "22": 0.11379,
+            "23": 0.10968,
+            "24": 0.11083,
+            "25": 0.11649,
+            "26": 0.11043,
+            "27": 0.11175,
+            "28": 0.11122,
+            "29": 0.11218,
+            "30": 0.11261,
+            "31": 0.11314,
+            "32": 0.10971,
+            "33": 0.11028,
+            "34": 0.11149,
+            "35": 0.11122,
+            "36": 0.11079,
+            "37": 0.11188,
+            "38": 0.1115,
+            "39": 0.11238,
+            "40": 0.11528,
+            "41": 0.11165,
+            "42": 0.11137,
+            "43": 0.11139,
+            "44": 0.11074,
+            "45": 0.11141,
+            "46": 0.11158,
+            "47": 0.1105,
+            "48": 0.11128,
+            "49": 0.11164,
+            "50": 0.11572,
+            "51": 0.11625,
+            "52": 0.10969,
+            "53": 0.10904,
+            "54": 0.1098,
+            "55": 0.10896,
+            "56": 0.11225,
+            "57": 0.11301,
+            "58": 0.11047,
+            "59": 0.10959,
+            "60": 0.11005,
+            "61": 0.11018,
+            "62": 0.10831,
+            "63": 0.10997,
+            "64": 0.10896,
+            "65": 0.11116,
+            "66": 0.11148,
+            "67": 0.1092,
+            "68": 0.10947,
+            "69": 0.10933,
+            "70": 0.10869,
+            "71": 0.10873,
+            "72": 0.10849,
+            "73": 0.10872,
+            "74": 0.10951,
+            "75": 0.1119,
+            "76": 0.1109,
+            "77": 0.10896,
+            "78": 0.10963,
+            "79": 0.11057,
+            "80": 0.10858,
+            "81": 0.10732,
+            "82": 0.10824,
+            "83": 0.11006,
+            "84": 0.11062,
+            "85": 0.1096,
+            "86": 0.10933,
+            "87": 0.11001,
+            "88": 0.11053,
+            "89": 0.10899,
+            "90": 0.10989,
+            "91": 0.10903,
+            "92": 0.10959,
+            "93": 0.11185,
+            "94": 0.11166,
+            "95": 0.11067,
+            "96": 0.11183,
+            "97": 0.11136,
+            "98": 0.11022,
+            "99": 0.11091,
+            "100": 0.10951
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index d31da6ac7cf..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.84517,
-            "2": 10.85349,
-            "3": 10.8539,
-            "4": 10.83825,
-            "5": 10.87427,
-            "6": 10.89307,
-            "7": 10.85454,
-            "8": 10.8626,
-            "9": 10.86464,
-            "10": 10.82906,
-            "11": 10.88792,
-            "12": 10.87099,
-            "13": 10.87921,
-            "14": 10.89078,
-            "15": 10.81976,
-            "16": 10.83158,
-            "17": 10.79868,
-            "18": 10.81672,
-            "19": 10.81919,
-            "20": 10.72701,
-            "21": 10.70594,
-            "22": 10.56367,
-            "23": 10.72804,
-            "24": 10.60832,
-            "25": 10.5522,
-            "26": 10.60853,
-            "27": 10.62847,
-            "28": 10.58306,
-            "29": 10.60011,
-            "30": 10.36616,
-            "31": 10.12043,
-            "32": 10.47685,
-            "33": 10.46868,
-            "34": 10.22316,
-            "35": 10.27781,
-            "36": 10.22892,
-            "37": 10.35949,
-            "38": 10.19369,
-            "39": 10.41549,
-            "40": 10.09758,
-            "41": 10.1573,
-            "42": 10.22398,
-            "43": 9.83289,
-            "44": 9.96912,
-            "45": 9.84191,
-            "46": 9.83041,
-            "47": 10.15626,
-            "48": 9.85486,
-            "49": 9.54086,
-            "50": 9.91248,
-            "51": 9.85868,
-            "52": 9.74284,
-            "53": 10.06645,
-            "54": 9.95167,
-            "55": 9.88096,
-            "56": 9.62626,
-            "57": 9.47768,
-            "58": 9.83346,
-            "59": 9.58526,
-            "60": 9.50125,
-            "61": 9.69182,
-            "62": 9.98853,
-            "63": 9.38476,
-            "64": 9.7803,
-            "65": 8.94762,
-            "66": 9.70856,
-            "67": 9.36852,
-            "68": 9.78439,
-            "69": 9.79406,
-            "70": 9.74241,
-            "71": 9.61808,
-            "72": 9.58428,
-            "73": 9.5035,
-            "74": 8.94221,
-            "75": 9.42529,
-            "76": 9.07408,
-            "77": 10.06351,
-            "78": 9.7208,
-            "79": 9.37294,
-            "80": 9.40396,
-            "81": 9.48168,
-            "82": 9.69778,
-            "83": 9.30714,
-            "84": 9.41712,
-            "85": 9.61407,
-            "86": 9.07615,
-            "87": 9.59094,
-            "88": 9.74641,
-            "89": 9.59993,
-            "90": 9.8142,
-            "91": 9.33773,
-            "92": 9.35373,
-            "93": 9.07395,
-            "94": 8.83173,
-            "95": 9.51734,
-            "96": 9.52415,
-            "97": 9.30995,
-            "98": 9.66805,
-            "99": 8.88588,
-            "100": 9.39538
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1655.0,
-            "2": 1697.0,
-            "3": 1724.0,
-            "4": 1720.0,
-            "5": 1803.0,
-            "6": 1772.0,
-            "7": 1811.0,
-            "8": 1766.0,
-            "9": 1750.0,
-            "10": 1413.0,
-            "11": 1861.0,
-            "12": 1650.0,
-            "13": 1895.0,
-            "14": 1662.0,
-            "15": 1951.0,
-            "16": 1998.0,
-            "17": 1798.0,
-            "18": 1687.0,
-            "19": 1856.0,
-            "20": 1561.0,
-            "21": 1882.0,
-            "22": 1652.0,
-            "23": 2075.0,
-            "24": 1606.0,
-            "25": 1665.0,
-            "26": 1686.0,
-            "27": 1839.0,
-            "28": 2053.0,
-            "29": 1907.0,
-            "30": 1893.0,
-            "31": 1581.0,
-            "32": 1791.0,
-            "33": 2149.0,
-            "34": 1872.0,
-            "35": 2010.0,
-            "36": 1799.0,
-            "37": 2311.0,
-            "38": 2221.0,
-            "39": 2261.0,
-            "40": 2188.0,
-            "41": 2204.0,
-            "42": 2300.0,
-            "43": 2001.0,
-            "44": 2119.0,
-            "45": 2126.0,
-            "46": 2374.0,
-            "47": 2468.0,
-            "48": 2405.0,
-            "49": 2247.0,
-            "50": 2250.0,
-            "51": 2607.0,
-            "52": 2618.0,
-            "53": 2828.0,
-            "54": 2730.0,
-            "55": 2351.0,
-            "56": 2753.0,
-            "57": 2323.0,
-            "58": 2809.0,
-            "59": 2721.0,
-            "60": 2440.0,
-            "61": 2875.0,
-            "62": 2726.0,
-            "63": 2444.0,
-            "64": 3001.0,
-            "65": 2602.0,
-            "66": 2981.0,
-            "67": 2676.0,
-            "68": 2623.0,
-            "69": 2802.0,
-            "70": 3234.0,
-            "71": 2902.0,
-            "72": 2337.0,
-            "73": 2856.0,
-            "74": 1903.0,
-            "75": 2388.0,
-            "76": 3118.0,
-            "77": 3108.0,
-            "78": 3122.0,
-            "79": 2994.0,
-            "80": 3186.0,
-            "81": 3470.0,
-            "82": 3164.0,
-            "83": 2726.0,
-            "84": 3214.0,
-            "85": 3262.0,
-            "86": 2602.0,
-            "87": 3658.0,
-            "88": 2906.0,
-            "89": 3054.0,
-            "90": 3018.0,
-            "91": 2690.0,
-            "92": 3106.0,
-            "93": 2701.0,
-            "94": 3263.0,
-            "95": 3426.0,
-            "96": 3405.0,
-            "97": 3087.0,
-            "98": 3510.0,
-            "99": 3148.0,
-            "100": 3204.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 551269888.0,
-            "2": 551269888.0,
-            "3": 551269888.0,
-            "4": 552318464.0,
-            "5": 551269888.0,
-            "6": 551269888.0,
-            "7": 551269888.0,
-            "8": 551269888.0,
-            "9": 551269888.0,
-            "10": 551269888.0,
-            "11": 551269888.0,
-            "12": 551269888.0,
-            "13": 551269888.0,
-            "14": 551269888.0,
-            "15": 551269888.0,
-            "16": 551269888.0,
-            "17": 551269888.0,
-            "18": 551269888.0,
-            "19": 551269888.0,
-            "20": 551269888.0,
-            "21": 551269888.0,
-            "22": 551269888.0,
-            "23": 551269888.0,
-            "24": 551269888.0,
-            "25": 551269888.0,
-            "26": 551269888.0,
-            "27": 551269888.0,
-            "28": 551269888.0,
-            "29": 551269888.0,
-            "30": 551269888.0,
-            "31": 551269888.0,
-            "32": 551269888.0,
-            "33": 551269888.0,
-            "34": 551269888.0,
-            "35": 551269888.0,
-            "36": 551269888.0,
-            "37": 551269888.0,
-            "38": 551269888.0,
-            "39": 551269888.0,
-            "40": 551269888.0,
-            "41": 551269888.0,
-            "42": 551269888.0,
-            "43": 551269888.0,
-            "44": 551269888.0,
-            "45": 551269888.0,
-            "46": 551269888.0,
-            "47": 551269888.0,
-            "48": 551269888.0,
-            "49": 551269888.0,
-            "50": 551269888.0,
-            "51": 551269888.0,
-            "52": 551269888.0,
-            "53": 551269888.0,
-            "54": 551269888.0,
-            "55": 551269888.0,
-            "56": 551269888.0,
-            "57": 551269888.0,
-            "58": 551269888.0,
-            "59": 551269888.0,
-            "60": 551269888.0,
-            "61": 551269888.0,
-            "62": 551269888.0,
-            "63": 551269888.0,
-            "64": 551269888.0,
-            "65": 551269888.0,
-            "66": 551269888.0,
-            "67": 551269888.0,
-            "68": 551269888.0,
-            "69": 551269888.0,
-            "70": 551269888.0,
-            "71": 551269888.0,
-            "72": 551269888.0,
-            "73": 551269888.0,
-            "74": 551269888.0,
-            "75": 551269888.0,
-            "76": 551269888.0,
-            "77": 551269888.0,
-            "78": 551269888.0,
-            "79": 551269888.0,
-            "80": 551269888.0,
-            "81": 551269888.0,
-            "82": 551269888.0,
-            "83": 551269888.0,
-            "84": 551269888.0,
-            "85": 551269888.0,
-            "86": 551269888.0,
-            "87": 551269888.0,
-            "88": 551269888.0,
-            "89": 551269888.0,
-            "90": 551269888.0,
-            "91": 551269888.0,
-            "92": 551269888.0,
-            "93": 551269888.0,
-            "94": 551269888.0,
-            "95": 551269888.0,
-            "96": 551269888.0,
-            "97": 551269888.0,
-            "98": 551269888.0,
-            "99": 551269888.0,
-            "100": 551269888.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 2290489344.0,
-            "2": 2432397312.0,
-            "3": 2432397312.0,
-            "4": 2432397312.0,
-            "5": 2432397312.0,
-            "6": 2432397312.0,
-            "7": 2432397312.0,
-            "8": 2432397312.0,
-            "9": 2432397312.0,
-            "10": 2432397312.0,
-            "11": 2432397312.0,
-            "12": 2432397312.0,
-            "13": 2432397312.0,
-            "14": 2432397312.0,
-            "15": 2432397312.0,
-            "16": 2432397312.0,
-            "17": 2432397312.0,
-            "18": 2432397312.0,
-            "19": 2432397312.0,
-            "20": 2432397312.0,
-            "21": 2432397312.0,
-            "22": 2432397312.0,
-            "23": 2432397312.0,
-            "24": 2432397312.0,
-            "25": 2432397312.0,
-            "26": 2432397312.0,
-            "27": 2432397312.0,
-            "28": 2432397312.0,
-            "29": 2432397312.0,
-            "30": 2432397312.0,
-            "31": 2432397312.0,
-            "32": 2432397312.0,
-            "33": 2432397312.0,
-            "34": 2432397312.0,
-            "35": 2432397312.0,
-            "36": 2432397312.0,
-            "37": 2432397312.0,
-            "38": 2432397312.0,
-            "39": 2432397312.0,
-            "40": 2432397312.0,
-            "41": 2432397312.0,
-            "42": 2432397312.0,
-            "43": 2432397312.0,
-            "44": 2432397312.0,
-            "45": 2432397312.0,
-            "46": 2432397312.0,
-            "47": 2432397312.0,
-            "48": 2432397312.0,
-            "49": 2432397312.0,
-            "50": 2432397312.0,
-            "51": 2432397312.0,
-            "52": 2432397312.0,
-            "53": 2432397312.0,
-            "54": 2432397312.0,
-            "55": 2432397312.0,
-            "56": 2432397312.0,
-            "57": 2432397312.0,
-            "58": 2432397312.0,
-            "59": 2432397312.0,
-            "60": 2432397312.0,
-            "61": 2432397312.0,
-            "62": 2432397312.0,
-            "63": 2432397312.0,
-            "64": 2432397312.0,
-            "65": 2432397312.0,
-            "66": 2432397312.0,
-            "67": 2432397312.0,
-            "68": 2432397312.0,
-            "69": 2432397312.0,
-            "70": 2432397312.0,
-            "71": 2432397312.0,
-            "72": 2432397312.0,
-            "73": 2432397312.0,
-            "74": 2432397312.0,
-            "75": 2432397312.0,
-            "76": 2432397312.0,
-            "77": 2432397312.0,
-            "78": 2432397312.0,
-            "79": 2432397312.0,
-            "80": 2432397312.0,
-            "81": 2432397312.0,
-            "82": 2432397312.0,
-            "83": 2432397312.0,
-            "84": 2432397312.0,
-            "85": 2432397312.0,
-            "86": 2432397312.0,
-            "87": 2432397312.0,
-            "88": 2432397312.0,
-            "89": 2432397312.0,
-            "90": 2432397312.0,
-            "91": 2432397312.0,
-            "92": 2432397312.0,
-            "93": 2432397312.0,
-            "94": 2432397312.0,
-            "95": 2432397312.0,
-            "96": 2432397312.0,
-            "97": 2432397312.0,
-            "98": 2432397312.0,
-            "99": 2432397312.0,
-            "100": 2432397312.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 16.46548,
-            "2": 0.12959,
-            "3": 0.10184,
-            "4": 0.09901,
-            "5": 0.09738,
-            "6": 0.09779,
-            "7": 0.09844,
-            "8": 0.09824,
-            "9": 0.0976,
-            "10": 0.0989,
-            "11": 0.09806,
-            "12": 0.09847,
-            "13": 0.09693,
-            "14": 0.0975,
-            "15": 0.09734,
-            "16": 0.09676,
-            "17": 0.09761,
-            "18": 0.10064,
-            "19": 0.10268,
-            "20": 0.10193,
-            "21": 0.09868,
-            "22": 0.10036,
-            "23": 0.10125,
-            "24": 0.10069,
-            "25": 0.09985,
-            "26": 0.09933,
-            "27": 0.10255,
-            "28": 0.09872,
-            "29": 0.09702,
-            "30": 0.09893,
-            "31": 0.10092,
-            "32": 0.10188,
-            "33": 0.09747,
-            "34": 0.09867,
-            "35": 0.09716,
-            "36": 0.09808,
-            "37": 0.09735,
-            "38": 0.09948,
-            "39": 0.10526,
-            "40": 0.10139,
-            "41": 0.09798,
-            "42": 0.10054,
-            "43": 0.09915,
-            "44": 0.09761,
-            "45": 0.09943,
-            "46": 0.09837,
-            "47": 0.10213,
-            "48": 0.0976,
-            "49": 0.09851,
-            "50": 0.09815,
-            "51": 0.10646,
-            "52": 0.10032,
-            "53": 0.10073,
-            "54": 0.10074,
-            "55": 0.10099,
-            "56": 0.09991,
-            "57": 0.10044,
-            "58": 0.10136,
-            "59": 0.10068,
-            "60": 0.10185,
-            "61": 0.10193,
-            "62": 0.10012,
-            "63": 0.09915,
-            "64": 0.09898,
-            "65": 0.10063,
-            "66": 0.10749,
-            "67": 0.09751,
-            "68": 0.10261,
-            "69": 0.10397,
-            "70": 0.10225,
-            "71": 0.10161,
-            "72": 0.09906,
-            "73": 0.09842,
-            "74": 0.10577,
-            "75": 0.1039,
-            "76": 0.10082,
-            "77": 0.09852,
-            "78": 0.09796,
-            "79": 0.10077,
-            "80": 0.10371,
-            "81": 0.10025,
-            "82": 0.10234,
-            "83": 0.10234,
-            "84": 0.10127,
-            "85": 0.10403,
-            "86": 0.10427,
-            "87": 0.10111,
-            "88": 0.10052,
-            "89": 0.10059,
-            "90": 0.10355,
-            "91": 0.10168,
-            "92": 0.1012,
-            "93": 0.10032,
-            "94": 0.10123,
-            "95": 0.10403,
-            "96": 0.10413,
-            "97": 0.10405,
-            "98": 0.11267,
-            "99": 0.11812,
-            "100": 0.11125
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index acadb81abbe..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.84517,
-            "2": 10.85349,
-            "3": 10.8539,
-            "4": 10.83825,
-            "5": 10.87427,
-            "6": 10.89307,
-            "7": 10.85454,
-            "8": 10.8626,
-            "9": 10.86464,
-            "10": 10.82906,
-            "11": 10.88792,
-            "12": 10.87099,
-            "13": 10.87921,
-            "14": 10.89078,
-            "15": 10.81976,
-            "16": 10.83158,
-            "17": 10.79868,
-            "18": 10.81672,
-            "19": 10.81919,
-            "20": 10.72701,
-            "21": 10.70594,
-            "22": 10.56367,
-            "23": 10.72804,
-            "24": 10.60832,
-            "25": 10.5522,
-            "26": 10.60853,
-            "27": 10.62847,
-            "28": 10.58306,
-            "29": 10.60011,
-            "30": 10.36616,
-            "31": 10.12043,
-            "32": 10.47685,
-            "33": 10.46868,
-            "34": 10.22316,
-            "35": 10.27781,
-            "36": 10.22892,
-            "37": 10.35949,
-            "38": 10.19369,
-            "39": 10.41549,
-            "40": 10.09758,
-            "41": 10.1573,
-            "42": 10.22398,
-            "43": 9.83289,
-            "44": 9.96912,
-            "45": 9.84191,
-            "46": 9.83041,
-            "47": 10.15626,
-            "48": 9.85486,
-            "49": 9.54086,
-            "50": 9.91248,
-            "51": 9.85868,
-            "52": 9.74284,
-            "53": 10.06645,
-            "54": 9.95167,
-            "55": 9.88096,
-            "56": 9.62626,
-            "57": 9.47768,
-            "58": 9.83346,
-            "59": 9.58526,
-            "60": 9.50125,
-            "61": 9.69182,
-            "62": 9.98853,
-            "63": 9.38476,
-            "64": 9.7803,
-            "65": 8.94762,
-            "66": 9.70856,
-            "67": 9.36852,
-            "68": 9.78439,
-            "69": 9.79406,
-            "70": 9.74241,
-            "71": 9.61808,
-            "72": 9.58428,
-            "73": 9.5035,
-            "74": 8.94221,
-            "75": 9.42529,
-            "76": 9.07408,
-            "77": 10.06351,
-            "78": 9.7208,
-            "79": 9.37294,
-            "80": 9.40396,
-            "81": 9.48168,
-            "82": 9.69778,
-            "83": 9.30714,
-            "84": 9.41712,
-            "85": 9.61407,
-            "86": 9.07615,
-            "87": 9.59094,
-            "88": 9.74641,
-            "89": 9.59993,
-            "90": 9.8142,
-            "91": 9.33773,
-            "92": 9.35373,
-            "93": 9.07395,
-            "94": 8.83173,
-            "95": 9.51734,
-            "96": 9.52415,
-            "97": 9.30995,
-            "98": 9.66805,
-            "99": 8.88588,
-            "100": 9.39538
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1655.0,
-            "2": 1697.0,
-            "3": 1724.0,
-            "4": 1720.0,
-            "5": 1803.0,
-            "6": 1772.0,
-            "7": 1811.0,
-            "8": 1766.0,
-            "9": 1750.0,
-            "10": 1413.0,
-            "11": 1861.0,
-            "12": 1650.0,
-            "13": 1895.0,
-            "14": 1662.0,
-            "15": 1951.0,
-            "16": 1998.0,
-            "17": 1798.0,
-            "18": 1687.0,
-            "19": 1856.0,
-            "20": 1561.0,
-            "21": 1882.0,
-            "22": 1652.0,
-            "23": 2075.0,
-            "24": 1606.0,
-            "25": 1665.0,
-            "26": 1686.0,
-            "27": 1839.0,
-            "28": 2053.0,
-            "29": 1907.0,
-            "30": 1893.0,
-            "31": 1581.0,
-            "32": 1791.0,
-            "33": 2149.0,
-            "34": 1872.0,
-            "35": 2010.0,
-            "36": 1799.0,
-            "37": 2311.0,
-            "38": 2221.0,
-            "39": 2261.0,
-            "40": 2188.0,
-            "41": 2204.0,
-            "42": 2300.0,
-            "43": 2001.0,
-            "44": 2119.0,
-            "45": 2126.0,
-            "46": 2374.0,
-            "47": 2468.0,
-            "48": 2405.0,
-            "49": 2247.0,
-            "50": 2250.0,
-            "51": 2607.0,
-            "52": 2618.0,
-            "53": 2828.0,
-            "54": 2730.0,
-            "55": 2351.0,
-            "56": 2753.0,
-            "57": 2323.0,
-            "58": 2809.0,
-            "59": 2721.0,
-            "60": 2440.0,
-            "61": 2875.0,
-            "62": 2726.0,
-            "63": 2444.0,
-            "64": 3001.0,
-            "65": 2602.0,
-            "66": 2981.0,
-            "67": 2676.0,
-            "68": 2623.0,
-            "69": 2802.0,
-            "70": 3234.0,
-            "71": 2902.0,
-            "72": 2337.0,
-            "73": 2856.0,
-            "74": 1903.0,
-            "75": 2388.0,
-            "76": 3118.0,
-            "77": 3108.0,
-            "78": 3122.0,
-            "79": 2994.0,
-            "80": 3186.0,
-            "81": 3470.0,
-            "82": 3164.0,
-            "83": 2726.0,
-            "84": 3214.0,
-            "85": 3262.0,
-            "86": 2602.0,
-            "87": 3658.0,
-            "88": 2906.0,
-            "89": 3054.0,
-            "90": 3018.0,
-            "91": 2690.0,
-            "92": 3106.0,
-            "93": 2701.0,
-            "94": 3263.0,
-            "95": 3426.0,
-            "96": 3405.0,
-            "97": 3087.0,
-            "98": 3510.0,
-            "99": 3148.0,
-            "100": 3204.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 551269888.0,
-            "2": 551269888.0,
-            "3": 551269888.0,
-            "4": 552318464.0,
-            "5": 551269888.0,
-            "6": 551269888.0,
-            "7": 551269888.0,
-            "8": 551269888.0,
-            "9": 551269888.0,
-            "10": 551269888.0,
-            "11": 551269888.0,
-            "12": 551269888.0,
-            "13": 551269888.0,
-            "14": 551269888.0,
-            "15": 551269888.0,
-            "16": 551269888.0,
-            "17": 551269888.0,
-            "18": 551269888.0,
-            "19": 551269888.0,
-            "20": 551269888.0,
-            "21": 551269888.0,
-            "22": 551269888.0,
-            "23": 551269888.0,
-            "24": 551269888.0,
-            "25": 551269888.0,
-            "26": 551269888.0,
-            "27": 551269888.0,
-            "28": 551269888.0,
-            "29": 551269888.0,
-            "30": 551269888.0,
-            "31": 551269888.0,
-            "32": 551269888.0,
-            "33": 551269888.0,
-            "34": 551269888.0,
-            "35": 551269888.0,
-            "36": 551269888.0,
-            "37": 551269888.0,
-            "38": 551269888.0,
-            "39": 551269888.0,
-            "40": 551269888.0,
-            "41": 551269888.0,
-            "42": 551269888.0,
-            "43": 551269888.0,
-            "44": 551269888.0,
-            "45": 551269888.0,
-            "46": 551269888.0,
-            "47": 551269888.0,
-            "48": 551269888.0,
-            "49": 551269888.0,
-            "50": 551269888.0,
-            "51": 551269888.0,
-            "52": 551269888.0,
-            "53": 551269888.0,
-            "54": 551269888.0,
-            "55": 551269888.0,
-            "56": 551269888.0,
-            "57": 551269888.0,
-            "58": 551269888.0,
-            "59": 551269888.0,
-            "60": 551269888.0,
-            "61": 551269888.0,
-            "62": 551269888.0,
-            "63": 551269888.0,
-            "64": 551269888.0,
-            "65": 551269888.0,
-            "66": 551269888.0,
-            "67": 551269888.0,
-            "68": 551269888.0,
-            "69": 551269888.0,
-            "70": 551269888.0,
-            "71": 551269888.0,
-            "72": 551269888.0,
-            "73": 551269888.0,
-            "74": 551269888.0,
-            "75": 551269888.0,
-            "76": 551269888.0,
-            "77": 551269888.0,
-            "78": 551269888.0,
-            "79": 551269888.0,
-            "80": 551269888.0,
-            "81": 551269888.0,
-            "82": 551269888.0,
-            "83": 551269888.0,
-            "84": 551269888.0,
-            "85": 551269888.0,
-            "86": 551269888.0,
-            "87": 551269888.0,
-            "88": 551269888.0,
-            "89": 551269888.0,
-            "90": 551269888.0,
-            "91": 551269888.0,
-            "92": 551269888.0,
-            "93": 551269888.0,
-            "94": 551269888.0,
-            "95": 551269888.0,
-            "96": 551269888.0,
-            "97": 551269888.0,
-            "98": 551269888.0,
-            "99": 551269888.0,
-            "100": 551269888.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 2290489344.0,
-            "2": 2432397312.0,
-            "3": 2432397312.0,
-            "4": 2432397312.0,
-            "5": 2432397312.0,
-            "6": 2432397312.0,
-            "7": 2432397312.0,
-            "8": 2432397312.0,
-            "9": 2432397312.0,
-            "10": 2432397312.0,
-            "11": 2432397312.0,
-            "12": 2432397312.0,
-            "13": 2432397312.0,
-            "14": 2432397312.0,
-            "15": 2432397312.0,
-            "16": 2432397312.0,
-            "17": 2432397312.0,
-            "18": 2432397312.0,
-            "19": 2432397312.0,
-            "20": 2432397312.0,
-            "21": 2432397312.0,
-            "22": 2432397312.0,
-            "23": 2432397312.0,
-            "24": 2432397312.0,
-            "25": 2432397312.0,
-            "26": 2432397312.0,
-            "27": 2432397312.0,
-            "28": 2432397312.0,
-            "29": 2432397312.0,
-            "30": 2432397312.0,
-            "31": 2432397312.0,
-            "32": 2432397312.0,
-            "33": 2432397312.0,
-            "34": 2432397312.0,
-            "35": 2432397312.0,
-            "36": 2432397312.0,
-            "37": 2432397312.0,
-            "38": 2432397312.0,
-            "39": 2432397312.0,
-            "40": 2432397312.0,
-            "41": 2432397312.0,
-            "42": 2432397312.0,
-            "43": 2432397312.0,
-            "44": 2432397312.0,
-            "45": 2432397312.0,
-            "46": 2432397312.0,
-            "47": 2432397312.0,
-            "48": 2432397312.0,
-            "49": 2432397312.0,
-            "50": 2432397312.0,
-            "51": 2432397312.0,
-            "52": 2432397312.0,
-            "53": 2432397312.0,
-            "54": 2432397312.0,
-            "55": 2432397312.0,
-            "56": 2432397312.0,
-            "57": 2432397312.0,
-            "58": 2432397312.0,
-            "59": 2432397312.0,
-            "60": 2432397312.0,
-            "61": 2432397312.0,
-            "62": 2432397312.0,
-            "63": 2432397312.0,
-            "64": 2432397312.0,
-            "65": 2432397312.0,
-            "66": 2432397312.0,
-            "67": 2432397312.0,
-            "68": 2432397312.0,
-            "69": 2432397312.0,
-            "70": 2432397312.0,
-            "71": 2432397312.0,
-            "72": 2432397312.0,
-            "73": 2432397312.0,
-            "74": 2432397312.0,
-            "75": 2432397312.0,
-            "76": 2432397312.0,
-            "77": 2432397312.0,
-            "78": 2432397312.0,
-            "79": 2432397312.0,
-            "80": 2432397312.0,
-            "81": 2432397312.0,
-            "82": 2432397312.0,
-            "83": 2432397312.0,
-            "84": 2432397312.0,
-            "85": 2432397312.0,
-            "86": 2432397312.0,
-            "87": 2432397312.0,
-            "88": 2432397312.0,
-            "89": 2432397312.0,
-            "90": 2432397312.0,
-            "91": 2432397312.0,
-            "92": 2432397312.0,
-            "93": 2432397312.0,
-            "94": 2432397312.0,
-            "95": 2432397312.0,
-            "96": 2432397312.0,
-            "97": 2432397312.0,
-            "98": 2432397312.0,
-            "99": 2432397312.0,
-            "100": 2432397312.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 17.61957,
-            "2": 0.12347,
-            "3": 0.11094,
-            "4": 0.11482,
-            "5": 0.11141,
-            "6": 0.10928,
-            "7": 0.10905,
-            "8": 0.11026,
-            "9": 0.11003,
-            "10": 0.11095,
-            "11": 0.11002,
-            "12": 0.1122,
-            "13": 0.11472,
-            "14": 0.11511,
-            "15": 0.11073,
-            "16": 0.11228,
-            "17": 0.11342,
-            "18": 0.11197,
-            "19": 0.11062,
-            "20": 0.11097,
-            "21": 0.11081,
-            "22": 0.11379,
-            "23": 0.10968,
-            "24": 0.11083,
-            "25": 0.11649,
-            "26": 0.11043,
-            "27": 0.11175,
-            "28": 0.11122,
-            "29": 0.11218,
-            "30": 0.11261,
-            "31": 0.11314,
-            "32": 0.10971,
-            "33": 0.11028,
-            "34": 0.11149,
-            "35": 0.11122,
-            "36": 0.11079,
-            "37": 0.11188,
-            "38": 0.1115,
-            "39": 0.11238,
-            "40": 0.11528,
-            "41": 0.11165,
-            "42": 0.11137,
-            "43": 0.11139,
-            "44": 0.11074,
-            "45": 0.11141,
-            "46": 0.11158,
-            "47": 0.1105,
-            "48": 0.11128,
-            "49": 0.11164,
-            "50": 0.11572,
-            "51": 0.11625,
-            "52": 0.10969,
-            "53": 0.10904,
-            "54": 0.1098,
-            "55": 0.10896,
-            "56": 0.11225,
-            "57": 0.11301,
-            "58": 0.11047,
-            "59": 0.10959,
-            "60": 0.11005,
-            "61": 0.11018,
-            "62": 0.10831,
-            "63": 0.10997,
-            "64": 0.10896,
-            "65": 0.11116,
-            "66": 0.11148,
-            "67": 0.1092,
-            "68": 0.10947,
-            "69": 0.10933,
-            "70": 0.10869,
-            "71": 0.10873,
-            "72": 0.10849,
-            "73": 0.10872,
-            "74": 0.10951,
-            "75": 0.1119,
-            "76": 0.1109,
-            "77": 0.10896,
-            "78": 0.10963,
-            "79": 0.11057,
-            "80": 0.10858,
-            "81": 0.10732,
-            "82": 0.10824,
-            "83": 0.11006,
-            "84": 0.11062,
-            "85": 0.1096,
-            "86": 0.10933,
-            "87": 0.11001,
-            "88": 0.11053,
-            "89": 0.10899,
-            "90": 0.10989,
-            "91": 0.10903,
-            "92": 0.10959,
-            "93": 0.11185,
-            "94": 0.11166,
-            "95": 0.11067,
-            "96": 0.11183,
-            "97": 0.11136,
-            "98": 0.11022,
-            "99": 0.11091,
-            "100": 0.10951
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_dev_dgx_h100.json
index ef753336010..2a87d7e4de5 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_dev_dgx_h100.json
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 759681536.0,
-            "2": 759681536.0,
-            "3": 759681536.0,
-            "4": 759681536.0,
-            "5": 759681536.0,
-            "6": 759681536.0,
-            "7": 759681536.0,
-            "8": 759681536.0,
-            "9": 759681536.0,
-            "10": 759681536.0,
-            "11": 759681536.0,
-            "12": 759681536.0,
-            "13": 759681536.0,
-            "14": 759681536.0,
-            "15": 759681536.0,
-            "16": 759681536.0,
-            "17": 759681536.0,
-            "18": 759681536.0,
-            "19": 759681536.0,
-            "20": 759681536.0,
-            "21": 759681536.0,
-            "22": 759681536.0,
-            "23": 759681536.0,
-            "24": 759681536.0,
-            "25": 759681536.0,
-            "26": 759681536.0,
-            "27": 759681536.0,
-            "28": 759681536.0,
-            "29": 759681536.0,
-            "30": 759681536.0,
-            "31": 759681536.0,
-            "32": 759681536.0,
-            "33": 759681536.0,
-            "34": 759681536.0,
-            "35": 759681536.0,
-            "36": 759681536.0,
-            "37": 759681536.0,
-            "38": 759681536.0,
-            "39": 759681536.0,
-            "40": 759681536.0,
-            "41": 759681536.0,
-            "42": 759681536.0,
-            "43": 759681536.0,
-            "44": 759681536.0,
-            "45": 759681536.0,
-            "46": 759681536.0,
-            "47": 759681536.0,
-            "48": 759681536.0,
-            "49": 759681536.0,
-            "50": 759681536.0
+            "1": 759682560.0,
+            "2": 759682560.0,
+            "3": 759682560.0,
+            "4": 759682560.0,
+            "5": 759682560.0,
+            "6": 759682560.0,
+            "7": 759682560.0,
+            "8": 759682560.0,
+            "9": 759682560.0,
+            "10": 759682560.0,
+            "11": 759682560.0,
+            "12": 759682560.0,
+            "13": 759682560.0,
+            "14": 759682560.0,
+            "15": 759682560.0,
+            "16": 759682560.0,
+            "17": 759682560.0,
+            "18": 759682560.0,
+            "19": 759682560.0,
+            "20": 759682560.0,
+            "21": 759682560.0,
+            "22": 759682560.0,
+            "23": 759682560.0,
+            "24": 759682560.0,
+            "25": 759682560.0,
+            "26": 759682560.0,
+            "27": 759682560.0,
+            "28": 759682560.0,
+            "29": 759682560.0,
+            "30": 759682560.0,
+            "31": 759682560.0,
+            "32": 759682560.0,
+            "33": 759682560.0,
+            "34": 759682560.0,
+            "35": 759682560.0,
+            "36": 759682560.0,
+            "37": 759682560.0,
+            "38": 759682560.0,
+            "39": 759682560.0,
+            "40": 759682560.0,
+            "41": 759682560.0,
+            "42": 759682560.0,
+            "43": 759682560.0,
+            "44": 759682560.0,
+            "45": 759682560.0,
+            "46": 759682560.0,
+            "47": 759682560.0,
+            "48": 759682560.0,
+            "49": 759682560.0,
+            "50": 759682560.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 4340902912.0,
-            "2": 4622614528.0,
-            "3": 4622614528.0,
-            "4": 4622614528.0,
-            "5": 4622614528.0,
-            "6": 4622614528.0,
-            "7": 4622614528.0,
-            "8": 4622614528.0,
-            "9": 4622614528.0,
-            "10": 4622614528.0,
-            "11": 4622614528.0,
-            "12": 4622614528.0,
-            "13": 4622614528.0,
-            "14": 4622614528.0,
-            "15": 4622614528.0,
-            "16": 4622614528.0,
-            "17": 4622614528.0,
-            "18": 4622614528.0,
-            "19": 4622614528.0,
-            "20": 4622614528.0,
-            "21": 4622614528.0,
-            "22": 4622614528.0,
-            "23": 4622614528.0,
-            "24": 4622614528.0,
-            "25": 4622614528.0,
-            "26": 4622614528.0,
-            "27": 4622614528.0,
-            "28": 4622614528.0,
-            "29": 4622614528.0,
-            "30": 4622614528.0,
-            "31": 4622614528.0,
-            "32": 4622614528.0,
-            "33": 4622614528.0,
-            "34": 4622614528.0,
-            "35": 4622614528.0,
-            "36": 4622614528.0,
-            "37": 4622614528.0,
-            "38": 4622614528.0,
-            "39": 4622614528.0,
-            "40": 4622614528.0,
-            "41": 4622614528.0,
-            "42": 4622614528.0,
-            "43": 4622614528.0,
-            "44": 4622614528.0,
-            "45": 4622614528.0,
-            "46": 4622614528.0,
-            "47": 4622614528.0,
-            "48": 4622614528.0,
-            "49": 4622614528.0,
-            "50": 4622614528.0
+            "1": 4340903936.0,
+            "2": 4622615552.0,
+            "3": 4622615552.0,
+            "4": 4622615552.0,
+            "5": 4622615552.0,
+            "6": 4622615552.0,
+            "7": 4622615552.0,
+            "8": 4622615552.0,
+            "9": 4622615552.0,
+            "10": 4622615552.0,
+            "11": 4622615552.0,
+            "12": 4622615552.0,
+            "13": 4622615552.0,
+            "14": 4622615552.0,
+            "15": 4622615552.0,
+            "16": 4622615552.0,
+            "17": 4622615552.0,
+            "18": 4622615552.0,
+            "19": 4622615552.0,
+            "20": 4622615552.0,
+            "21": 4622615552.0,
+            "22": 4622615552.0,
+            "23": 4622615552.0,
+            "24": 4622615552.0,
+            "25": 4622615552.0,
+            "26": 4622615552.0,
+            "27": 4622615552.0,
+            "28": 4622615552.0,
+            "29": 4622615552.0,
+            "30": 4622615552.0,
+            "31": 4622615552.0,
+            "32": 4622615552.0,
+            "33": 4622615552.0,
+            "34": 4622615552.0,
+            "35": 4622615552.0,
+            "36": 4622615552.0,
+            "37": 4622615552.0,
+            "38": 4622615552.0,
+            "39": 4622615552.0,
+            "40": 4622615552.0,
+            "41": 4622615552.0,
+            "42": 4622615552.0,
+            "43": 4622615552.0,
+            "44": 4622615552.0,
+            "45": 4622615552.0,
+            "46": 4622615552.0,
+            "47": 4622615552.0,
+            "48": 4622615552.0,
+            "49": 4622615552.0,
+            "50": 4622615552.0
         }
     },
     "iteration-time": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 12.91878,
-            "2": 0.30301,
-            "3": 0.26726,
-            "4": 0.26031,
-            "5": 0.25815,
-            "6": 0.26195,
-            "7": 0.26064,
-            "8": 0.26459,
-            "9": 0.25765,
-            "10": 0.26159,
-            "11": 0.25801,
-            "12": 0.2577,
-            "13": 0.25882,
-            "14": 0.25879,
-            "15": 0.25853,
-            "16": 0.25689,
-            "17": 0.25763,
-            "18": 0.26042,
-            "19": 0.25687,
-            "20": 0.25459,
-            "21": 0.25315,
-            "22": 0.2615,
-            "23": 0.25473,
-            "24": 0.2558,
-            "25": 0.25524,
-            "26": 0.25354,
-            "27": 0.25658,
-            "28": 0.25019,
-            "29": 0.2622,
-            "30": 0.25785,
-            "31": 0.25516,
-            "32": 0.25092,
-            "33": 0.25655,
-            "34": 0.25493,
-            "35": 0.2541,
-            "36": 0.25492,
-            "37": 0.25229,
-            "38": 0.25775,
-            "39": 0.25432,
-            "40": 0.25358,
-            "41": 0.25502,
-            "42": 0.25428,
-            "43": 0.25111,
-            "44": 0.25239,
-            "45": 0.25573,
-            "46": 0.25505,
-            "47": 0.25199,
-            "48": 0.25057,
-            "49": 0.25588,
-            "50": 0.2569
+            "1": 13.61511,
+            "2": 0.1778,
+            "3": 0.1277,
+            "4": 0.12936,
+            "5": 0.13227,
+            "6": 0.12879,
+            "7": 0.12864,
+            "8": 0.12608,
+            "9": 0.12256,
+            "10": 0.12099,
+            "11": 0.12182,
+            "12": 0.12459,
+            "13": 0.12256,
+            "14": 0.12133,
+            "15": 0.12193,
+            "16": 0.12162,
+            "17": 0.12333,
+            "18": 0.12123,
+            "19": 0.1213,
+            "20": 0.12425,
+            "21": 0.12132,
+            "22": 0.12275,
+            "23": 0.12087,
+            "24": 0.12024,
+            "25": 0.12097,
+            "26": 0.12149,
+            "27": 0.1222,
+            "28": 0.1211,
+            "29": 0.12079,
+            "30": 0.12068,
+            "31": 0.12272,
+            "32": 0.12225,
+            "33": 0.12154,
+            "34": 0.11969,
+            "35": 0.12134,
+            "36": 0.12208,
+            "37": 0.12324,
+            "38": 0.13559,
+            "39": 0.13696,
+            "40": 0.12255,
+            "41": 0.12095,
+            "42": 0.12133,
+            "43": 0.12263,
+            "44": 0.1226,
+            "45": 0.12131,
+            "46": 0.12049,
+            "47": 0.12042,
+            "48": 0.12231,
+            "49": 0.12137,
+            "50": 0.12131
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 0c1ce6fced4..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.83936,
-            "2": 10.8442,
-            "3": 10.86813,
-            "4": 10.86022,
-            "5": 10.87939,
-            "6": 10.85969,
-            "7": 10.86386,
-            "8": 10.8444,
-            "9": 10.88995,
-            "10": 10.8926,
-            "11": 10.89136,
-            "12": 10.85312,
-            "13": 10.87319,
-            "14": 10.83805,
-            "15": 10.83088,
-            "16": 10.82011,
-            "17": 10.79138,
-            "18": 10.81055,
-            "19": 10.77977,
-            "20": 10.6635,
-            "21": 10.69765,
-            "22": 10.67421,
-            "23": 10.77344,
-            "24": 10.63919,
-            "25": 10.50497,
-            "26": 10.61911,
-            "27": 10.56921,
-            "28": 10.46859,
-            "29": 10.41119,
-            "30": 10.42916,
-            "31": 10.52553,
-            "32": 10.34942,
-            "33": 10.2967,
-            "34": 10.46909,
-            "35": 9.99632,
-            "36": 10.13945,
-            "37": 10.0434,
-            "38": 10.4139,
-            "39": 9.80941,
-            "40": 10.12495,
-            "41": 10.14883,
-            "42": 10.04042,
-            "43": 10.22142,
-            "44": 10.07348,
-            "45": 9.71369,
-            "46": 10.00449,
-            "47": 9.94758,
-            "48": 9.68856,
-            "49": 9.93637,
-            "50": 9.96042
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1026.0,
-            "2": 1184.0,
-            "3": 1226.0,
-            "4": 1248.0,
-            "5": 1259.0,
-            "6": 1421.0,
-            "7": 1182.0,
-            "8": 1036.0,
-            "9": 1293.0,
-            "10": 1319.0,
-            "11": 1212.0,
-            "12": 1373.0,
-            "13": 1327.0,
-            "14": 1121.0,
-            "15": 1217.0,
-            "16": 1163.0,
-            "17": 1246.0,
-            "18": 1280.0,
-            "19": 1128.0,
-            "20": 1019.0,
-            "21": 1147.0,
-            "22": 1156.0,
-            "23": 1341.0,
-            "24": 1312.0,
-            "25": 1066.0,
-            "26": 1138.0,
-            "27": 1270.0,
-            "28": 1260.0,
-            "29": 1292.0,
-            "30": 1532.0,
-            "31": 1477.0,
-            "32": 1460.0,
-            "33": 1537.0,
-            "34": 1513.0,
-            "35": 1235.0,
-            "36": 1316.0,
-            "37": 1466.0,
-            "38": 1564.0,
-            "39": 1380.0,
-            "40": 1513.0,
-            "41": 1633.0,
-            "42": 1509.0,
-            "43": 1731.0,
-            "44": 1636.0,
-            "45": 1501.0,
-            "46": 1884.0,
-            "47": 1567.0,
-            "48": 1631.0,
-            "49": 1825.0,
-            "50": 1639.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 759682560.0,
-            "2": 759682560.0,
-            "3": 759682560.0,
-            "4": 759682560.0,
-            "5": 759682560.0,
-            "6": 759682560.0,
-            "7": 759682560.0,
-            "8": 759682560.0,
-            "9": 759682560.0,
-            "10": 759682560.0,
-            "11": 759682560.0,
-            "12": 759682560.0,
-            "13": 759682560.0,
-            "14": 759682560.0,
-            "15": 759682560.0,
-            "16": 759682560.0,
-            "17": 759682560.0,
-            "18": 759682560.0,
-            "19": 759682560.0,
-            "20": 759682560.0,
-            "21": 759682560.0,
-            "22": 759682560.0,
-            "23": 759682560.0,
-            "24": 759682560.0,
-            "25": 759682560.0,
-            "26": 759682560.0,
-            "27": 759682560.0,
-            "28": 759682560.0,
-            "29": 759682560.0,
-            "30": 759682560.0,
-            "31": 759682560.0,
-            "32": 759682560.0,
-            "33": 759682560.0,
-            "34": 759682560.0,
-            "35": 759682560.0,
-            "36": 759682560.0,
-            "37": 759682560.0,
-            "38": 759682560.0,
-            "39": 759682560.0,
-            "40": 759682560.0,
-            "41": 759682560.0,
-            "42": 759682560.0,
-            "43": 759682560.0,
-            "44": 759682560.0,
-            "45": 759682560.0,
-            "46": 759682560.0,
-            "47": 759682560.0,
-            "48": 759682560.0,
-            "49": 759682560.0,
-            "50": 759682560.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 4340903936.0,
-            "2": 4622615552.0,
-            "3": 4622615552.0,
-            "4": 4622615552.0,
-            "5": 4622615552.0,
-            "6": 4622615552.0,
-            "7": 4622615552.0,
-            "8": 4622615552.0,
-            "9": 4622615552.0,
-            "10": 4622615552.0,
-            "11": 4622615552.0,
-            "12": 4622615552.0,
-            "13": 4622615552.0,
-            "14": 4622615552.0,
-            "15": 4622615552.0,
-            "16": 4622615552.0,
-            "17": 4622615552.0,
-            "18": 4622615552.0,
-            "19": 4622615552.0,
-            "20": 4622615552.0,
-            "21": 4622615552.0,
-            "22": 4622615552.0,
-            "23": 4622615552.0,
-            "24": 4622615552.0,
-            "25": 4622615552.0,
-            "26": 4622615552.0,
-            "27": 4622615552.0,
-            "28": 4622615552.0,
-            "29": 4622615552.0,
-            "30": 4622615552.0,
-            "31": 4622615552.0,
-            "32": 4622615552.0,
-            "33": 4622615552.0,
-            "34": 4622615552.0,
-            "35": 4622615552.0,
-            "36": 4622615552.0,
-            "37": 4622615552.0,
-            "38": 4622615552.0,
-            "39": 4622615552.0,
-            "40": 4622615552.0,
-            "41": 4622615552.0,
-            "42": 4622615552.0,
-            "43": 4622615552.0,
-            "44": 4622615552.0,
-            "45": 4622615552.0,
-            "46": 4622615552.0,
-            "47": 4622615552.0,
-            "48": 4622615552.0,
-            "49": 4622615552.0,
-            "50": 4622615552.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 14.98171,
-            "2": 0.13344,
-            "3": 0.10755,
-            "4": 0.10562,
-            "5": 0.10638,
-            "6": 0.10549,
-            "7": 0.10612,
-            "8": 0.10814,
-            "9": 0.10654,
-            "10": 0.10633,
-            "11": 0.10725,
-            "12": 0.10667,
-            "13": 0.10769,
-            "14": 0.10593,
-            "15": 0.10694,
-            "16": 0.10715,
-            "17": 0.1064,
-            "18": 0.10706,
-            "19": 0.10964,
-            "20": 0.1054,
-            "21": 0.10752,
-            "22": 0.10979,
-            "23": 0.10834,
-            "24": 0.10667,
-            "25": 0.10762,
-            "26": 0.10605,
-            "27": 0.10756,
-            "28": 0.1059,
-            "29": 0.10662,
-            "30": 0.10738,
-            "31": 0.1065,
-            "32": 0.1074,
-            "33": 0.10712,
-            "34": 0.10631,
-            "35": 0.10672,
-            "36": 0.10785,
-            "37": 0.10664,
-            "38": 0.1064,
-            "39": 0.10666,
-            "40": 0.10518,
-            "41": 0.10655,
-            "42": 0.10605,
-            "43": 0.10563,
-            "44": 0.1064,
-            "45": 0.10629,
-            "46": 0.10691,
-            "47": 0.10711,
-            "48": 0.10618,
-            "49": 0.10991,
-            "50": 0.10529
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index 2a87d7e4de5..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.83936,
-            "2": 10.8442,
-            "3": 10.86813,
-            "4": 10.86022,
-            "5": 10.87939,
-            "6": 10.85969,
-            "7": 10.86386,
-            "8": 10.8444,
-            "9": 10.88995,
-            "10": 10.8926,
-            "11": 10.89136,
-            "12": 10.85312,
-            "13": 10.87319,
-            "14": 10.83805,
-            "15": 10.83088,
-            "16": 10.82011,
-            "17": 10.79138,
-            "18": 10.81055,
-            "19": 10.77977,
-            "20": 10.6635,
-            "21": 10.69765,
-            "22": 10.67421,
-            "23": 10.77344,
-            "24": 10.63919,
-            "25": 10.50497,
-            "26": 10.61911,
-            "27": 10.56921,
-            "28": 10.46859,
-            "29": 10.41119,
-            "30": 10.42916,
-            "31": 10.52553,
-            "32": 10.34942,
-            "33": 10.2967,
-            "34": 10.46909,
-            "35": 9.99632,
-            "36": 10.13945,
-            "37": 10.0434,
-            "38": 10.4139,
-            "39": 9.80941,
-            "40": 10.12495,
-            "41": 10.14883,
-            "42": 10.04042,
-            "43": 10.22142,
-            "44": 10.07348,
-            "45": 9.71369,
-            "46": 10.00449,
-            "47": 9.94758,
-            "48": 9.68856,
-            "49": 9.93637,
-            "50": 9.96042
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1026.0,
-            "2": 1184.0,
-            "3": 1226.0,
-            "4": 1248.0,
-            "5": 1259.0,
-            "6": 1421.0,
-            "7": 1182.0,
-            "8": 1036.0,
-            "9": 1293.0,
-            "10": 1319.0,
-            "11": 1212.0,
-            "12": 1373.0,
-            "13": 1327.0,
-            "14": 1121.0,
-            "15": 1217.0,
-            "16": 1163.0,
-            "17": 1246.0,
-            "18": 1280.0,
-            "19": 1128.0,
-            "20": 1019.0,
-            "21": 1147.0,
-            "22": 1156.0,
-            "23": 1341.0,
-            "24": 1312.0,
-            "25": 1066.0,
-            "26": 1138.0,
-            "27": 1270.0,
-            "28": 1260.0,
-            "29": 1292.0,
-            "30": 1532.0,
-            "31": 1477.0,
-            "32": 1460.0,
-            "33": 1537.0,
-            "34": 1513.0,
-            "35": 1235.0,
-            "36": 1316.0,
-            "37": 1466.0,
-            "38": 1564.0,
-            "39": 1380.0,
-            "40": 1513.0,
-            "41": 1633.0,
-            "42": 1509.0,
-            "43": 1731.0,
-            "44": 1636.0,
-            "45": 1501.0,
-            "46": 1884.0,
-            "47": 1567.0,
-            "48": 1631.0,
-            "49": 1825.0,
-            "50": 1639.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 759682560.0,
-            "2": 759682560.0,
-            "3": 759682560.0,
-            "4": 759682560.0,
-            "5": 759682560.0,
-            "6": 759682560.0,
-            "7": 759682560.0,
-            "8": 759682560.0,
-            "9": 759682560.0,
-            "10": 759682560.0,
-            "11": 759682560.0,
-            "12": 759682560.0,
-            "13": 759682560.0,
-            "14": 759682560.0,
-            "15": 759682560.0,
-            "16": 759682560.0,
-            "17": 759682560.0,
-            "18": 759682560.0,
-            "19": 759682560.0,
-            "20": 759682560.0,
-            "21": 759682560.0,
-            "22": 759682560.0,
-            "23": 759682560.0,
-            "24": 759682560.0,
-            "25": 759682560.0,
-            "26": 759682560.0,
-            "27": 759682560.0,
-            "28": 759682560.0,
-            "29": 759682560.0,
-            "30": 759682560.0,
-            "31": 759682560.0,
-            "32": 759682560.0,
-            "33": 759682560.0,
-            "34": 759682560.0,
-            "35": 759682560.0,
-            "36": 759682560.0,
-            "37": 759682560.0,
-            "38": 759682560.0,
-            "39": 759682560.0,
-            "40": 759682560.0,
-            "41": 759682560.0,
-            "42": 759682560.0,
-            "43": 759682560.0,
-            "44": 759682560.0,
-            "45": 759682560.0,
-            "46": 759682560.0,
-            "47": 759682560.0,
-            "48": 759682560.0,
-            "49": 759682560.0,
-            "50": 759682560.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 4340903936.0,
-            "2": 4622615552.0,
-            "3": 4622615552.0,
-            "4": 4622615552.0,
-            "5": 4622615552.0,
-            "6": 4622615552.0,
-            "7": 4622615552.0,
-            "8": 4622615552.0,
-            "9": 4622615552.0,
-            "10": 4622615552.0,
-            "11": 4622615552.0,
-            "12": 4622615552.0,
-            "13": 4622615552.0,
-            "14": 4622615552.0,
-            "15": 4622615552.0,
-            "16": 4622615552.0,
-            "17": 4622615552.0,
-            "18": 4622615552.0,
-            "19": 4622615552.0,
-            "20": 4622615552.0,
-            "21": 4622615552.0,
-            "22": 4622615552.0,
-            "23": 4622615552.0,
-            "24": 4622615552.0,
-            "25": 4622615552.0,
-            "26": 4622615552.0,
-            "27": 4622615552.0,
-            "28": 4622615552.0,
-            "29": 4622615552.0,
-            "30": 4622615552.0,
-            "31": 4622615552.0,
-            "32": 4622615552.0,
-            "33": 4622615552.0,
-            "34": 4622615552.0,
-            "35": 4622615552.0,
-            "36": 4622615552.0,
-            "37": 4622615552.0,
-            "38": 4622615552.0,
-            "39": 4622615552.0,
-            "40": 4622615552.0,
-            "41": 4622615552.0,
-            "42": 4622615552.0,
-            "43": 4622615552.0,
-            "44": 4622615552.0,
-            "45": 4622615552.0,
-            "46": 4622615552.0,
-            "47": 4622615552.0,
-            "48": 4622615552.0,
-            "49": 4622615552.0,
-            "50": 4622615552.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 13.61511,
-            "2": 0.1778,
-            "3": 0.1277,
-            "4": 0.12936,
-            "5": 0.13227,
-            "6": 0.12879,
-            "7": 0.12864,
-            "8": 0.12608,
-            "9": 0.12256,
-            "10": 0.12099,
-            "11": 0.12182,
-            "12": 0.12459,
-            "13": 0.12256,
-            "14": 0.12133,
-            "15": 0.12193,
-            "16": 0.12162,
-            "17": 0.12333,
-            "18": 0.12123,
-            "19": 0.1213,
-            "20": 0.12425,
-            "21": 0.12132,
-            "22": 0.12275,
-            "23": 0.12087,
-            "24": 0.12024,
-            "25": 0.12097,
-            "26": 0.12149,
-            "27": 0.1222,
-            "28": 0.1211,
-            "29": 0.12079,
-            "30": 0.12068,
-            "31": 0.12272,
-            "32": 0.12225,
-            "33": 0.12154,
-            "34": 0.11969,
-            "35": 0.12134,
-            "36": 0.12208,
-            "37": 0.12324,
-            "38": 0.13559,
-            "39": 0.13696,
-            "40": 0.12255,
-            "41": 0.12095,
-            "42": 0.12133,
-            "43": 0.12263,
-            "44": 0.1226,
-            "45": 0.12131,
-            "46": 0.12049,
-            "47": 0.12042,
-            "48": 0.12231,
-            "49": 0.12137,
-            "50": 0.12131
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_dev_dgx_h100.json
index ecbd1bac9aa..ea2bd7effce 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_dev_dgx_h100.json
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 11.59299,
-            "2": 0.13612,
-            "3": 0.11964,
-            "4": 0.11995,
-            "5": 0.12152,
-            "6": 0.121,
-            "7": 0.1191,
-            "8": 0.11751,
-            "9": 0.11711,
-            "10": 0.11878,
-            "11": 0.12221,
-            "12": 0.11956,
-            "13": 0.11737,
-            "14": 0.11954,
-            "15": 0.11916,
-            "16": 0.12038,
-            "17": 0.11939,
-            "18": 0.11747,
-            "19": 0.11879,
-            "20": 0.11955,
-            "21": 0.12128,
-            "22": 0.11892,
-            "23": 0.12306,
-            "24": 0.11834,
-            "25": 0.11924,
-            "26": 0.11961,
-            "27": 0.11912,
-            "28": 0.11913,
-            "29": 0.11896,
-            "30": 0.11897,
-            "31": 0.12121,
-            "32": 0.1215,
-            "33": 0.11867,
-            "34": 0.11783,
-            "35": 0.11835,
-            "36": 0.12172,
-            "37": 0.11939,
-            "38": 0.11963,
-            "39": 0.11846,
-            "40": 0.11889,
-            "41": 0.11897,
-            "42": 0.11775,
-            "43": 0.12004,
-            "44": 0.1201,
-            "45": 0.11742,
-            "46": 0.1204,
-            "47": 0.11915,
-            "48": 0.1208,
-            "49": 0.11898,
-            "50": 0.1165
+            "1": 11.4694,
+            "2": 0.13977,
+            "3": 0.12731,
+            "4": 0.12879,
+            "5": 0.11865,
+            "6": 0.118,
+            "7": 0.11942,
+            "8": 0.11938,
+            "9": 0.11951,
+            "10": 0.11735,
+            "11": 0.11836,
+            "12": 0.11978,
+            "13": 0.11914,
+            "14": 0.11821,
+            "15": 0.11692,
+            "16": 0.11708,
+            "17": 0.11825,
+            "18": 0.11909,
+            "19": 0.11996,
+            "20": 0.11962,
+            "21": 0.12002,
+            "22": 0.11972,
+            "23": 0.11943,
+            "24": 0.11873,
+            "25": 0.11787,
+            "26": 0.1172,
+            "27": 0.11703,
+            "28": 0.12106,
+            "29": 0.11863,
+            "30": 0.11927,
+            "31": 0.11941,
+            "32": 0.11801,
+            "33": 0.11903,
+            "34": 0.1181,
+            "35": 0.11794,
+            "36": 0.11973,
+            "37": 0.11831,
+            "38": 0.11753,
+            "39": 0.11901,
+            "40": 0.11713,
+            "41": 0.11926,
+            "42": 0.11756,
+            "43": 0.1189,
+            "44": 0.11853,
+            "45": 0.12132,
+            "46": 0.11905,
+            "47": 0.11892,
+            "48": 0.11664,
+            "49": 0.11721,
+            "50": 0.11854
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 19e0972675c..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.88372,
-            "2": 10.87208,
-            "3": 10.8784,
-            "4": 10.85806,
-            "5": 10.88547,
-            "6": 10.89556,
-            "7": 10.88051,
-            "8": 10.87687,
-            "9": 10.868,
-            "10": 10.86477,
-            "11": 10.87779,
-            "12": 10.8736,
-            "13": 10.8617,
-            "14": 10.88756,
-            "15": 10.81334,
-            "16": 10.8276,
-            "17": 10.80766,
-            "18": 10.81067,
-            "19": 10.81127,
-            "20": 10.71864,
-            "21": 10.69427,
-            "22": 10.58083,
-            "23": 10.69548,
-            "24": 10.60367,
-            "25": 10.55396,
-            "26": 10.61304,
-            "27": 10.59026,
-            "28": 10.54029,
-            "29": 10.55687,
-            "30": 10.36075,
-            "31": 10.13943,
-            "32": 10.44344,
-            "33": 10.44459,
-            "34": 10.21087,
-            "35": 10.25855,
-            "36": 10.22779,
-            "37": 10.32843,
-            "38": 10.18154,
-            "39": 10.37655,
-            "40": 10.0779,
-            "41": 10.12618,
-            "42": 10.19378,
-            "43": 9.85406,
-            "44": 9.94224,
-            "45": 9.84493,
-            "46": 9.831,
-            "47": 10.13553,
-            "48": 9.84455,
-            "49": 9.5571,
-            "50": 9.89982
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 22726932.0,
-            "2": 22924916.0,
-            "3": 22597332.0,
-            "4": 23219544.0,
-            "5": 22713776.0,
-            "6": 23021572.0,
-            "7": 22771346.0,
-            "8": 22926354.0,
-            "9": 22842338.0,
-            "10": 22918608.0,
-            "11": 22500808.0,
-            "12": 22460148.0,
-            "13": 22917564.0,
-            "14": 22389452.0,
-            "15": 22821768.0,
-            "16": 22831588.0,
-            "17": 22819586.0,
-            "18": 22582872.0,
-            "19": 22618426.0,
-            "20": 22693536.0,
-            "21": 22739728.0,
-            "22": 22800622.0,
-            "23": 22539616.0,
-            "24": 22771504.0,
-            "25": 22819092.0,
-            "26": 22547456.0,
-            "27": 22468726.0,
-            "28": 22453546.0,
-            "29": 22529680.0,
-            "30": 22630868.0,
-            "31": 22955432.0,
-            "32": 22585376.0,
-            "33": 22557692.0,
-            "34": 22835582.0,
-            "35": 22788568.0,
-            "36": 22588652.0,
-            "37": 22497950.0,
-            "38": 22895768.0,
-            "39": 22801524.0,
-            "40": 22657832.0,
-            "41": 22659668.0,
-            "42": 22667616.0,
-            "43": 22975828.0,
-            "44": 22746024.0,
-            "45": 22674860.0,
-            "46": 22884404.0,
-            "47": 22633804.0,
-            "48": 22928614.0,
-            "49": 22728000.0,
-            "50": 22904840.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 689176064.0,
-            "2": 689176064.0,
-            "3": 689176064.0,
-            "4": 689176064.0,
-            "5": 689176064.0,
-            "6": 689176064.0,
-            "7": 689176064.0,
-            "8": 689176064.0,
-            "9": 689176064.0,
-            "10": 689176064.0,
-            "11": 689176064.0,
-            "12": 689176064.0,
-            "13": 689176064.0,
-            "14": 689176064.0,
-            "15": 689176064.0,
-            "16": 689176064.0,
-            "17": 689176064.0,
-            "18": 689176064.0,
-            "19": 689176064.0,
-            "20": 689176064.0,
-            "21": 689176064.0,
-            "22": 689176064.0,
-            "23": 689176064.0,
-            "24": 689176064.0,
-            "25": 689176064.0,
-            "26": 689176064.0,
-            "27": 689176064.0,
-            "28": 689176064.0,
-            "29": 689176064.0,
-            "30": 689176064.0,
-            "31": 689176064.0,
-            "32": 689176064.0,
-            "33": 689176064.0,
-            "34": 689176064.0,
-            "35": 689176064.0,
-            "36": 689176064.0,
-            "37": 689176064.0,
-            "38": 689176064.0,
-            "39": 689176064.0,
-            "40": 689176064.0,
-            "41": 689176064.0,
-            "42": 689176064.0,
-            "43": 689176064.0,
-            "44": 689176064.0,
-            "45": 689176064.0,
-            "46": 689176064.0,
-            "47": 689176064.0,
-            "48": 689176064.0,
-            "49": 689176064.0,
-            "50": 689176064.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 2158024192.0,
-            "2": 2416613888.0,
-            "3": 2416613888.0,
-            "4": 2416613888.0,
-            "5": 2416613888.0,
-            "6": 2416613888.0,
-            "7": 2416613888.0,
-            "8": 2416613888.0,
-            "9": 2416613888.0,
-            "10": 2416613888.0,
-            "11": 2416613888.0,
-            "12": 2416613888.0,
-            "13": 2416613888.0,
-            "14": 2416613888.0,
-            "15": 2416613888.0,
-            "16": 2416613888.0,
-            "17": 2416613888.0,
-            "18": 2416613888.0,
-            "19": 2416613888.0,
-            "20": 2416613888.0,
-            "21": 2416613888.0,
-            "22": 2416613888.0,
-            "23": 2416613888.0,
-            "24": 2416613888.0,
-            "25": 2416613888.0,
-            "26": 2416613888.0,
-            "27": 2416613888.0,
-            "28": 2416613888.0,
-            "29": 2416613888.0,
-            "30": 2416613888.0,
-            "31": 2416613888.0,
-            "32": 2416613888.0,
-            "33": 2416613888.0,
-            "34": 2416613888.0,
-            "35": 2416613888.0,
-            "36": 2416613888.0,
-            "37": 2416613888.0,
-            "38": 2416613888.0,
-            "39": 2416613888.0,
-            "40": 2416613888.0,
-            "41": 2416613888.0,
-            "42": 2416613888.0,
-            "43": 2416613888.0,
-            "44": 2416613888.0,
-            "45": 2416613888.0,
-            "46": 2416613888.0,
-            "47": 2416613888.0,
-            "48": 2416613888.0,
-            "49": 2416613888.0,
-            "50": 2416613888.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.71503,
-            "2": 0.1487,
-            "3": 1.53681,
-            "4": 2.08776,
-            "5": 2.61238,
-            "6": 1.60198,
-            "7": 0.87803,
-            "8": 0.10645,
-            "9": 1.03031,
-            "10": 0.10629,
-            "11": 0.2821,
-            "12": 0.10863,
-            "13": 0.10328,
-            "14": 0.10854,
-            "15": 0.10326,
-            "16": 0.10341,
-            "17": 0.10778,
-            "18": 0.11121,
-            "19": 0.10959,
-            "20": 0.10422,
-            "21": 0.10422,
-            "22": 0.1042,
-            "23": 0.10422,
-            "24": 0.10385,
-            "25": 0.10416,
-            "26": 0.1052,
-            "27": 0.10423,
-            "28": 0.10355,
-            "29": 0.10327,
-            "30": 0.10455,
-            "31": 0.10463,
-            "32": 0.1045,
-            "33": 0.10325,
-            "34": 0.10331,
-            "35": 0.10475,
-            "36": 0.10327,
-            "37": 0.10355,
-            "38": 0.10433,
-            "39": 0.10353,
-            "40": 0.10394,
-            "41": 0.10379,
-            "42": 0.10774,
-            "43": 0.10625,
-            "44": 0.10346,
-            "45": 0.10532,
-            "46": 0.10766,
-            "47": 0.10537,
-            "48": 0.10462,
-            "49": 0.1051,
-            "50": 0.1039
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index ea2bd7effce..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.88372,
-            "2": 10.87208,
-            "3": 10.8784,
-            "4": 10.85806,
-            "5": 10.88547,
-            "6": 10.89556,
-            "7": 10.88051,
-            "8": 10.87687,
-            "9": 10.868,
-            "10": 10.86477,
-            "11": 10.87779,
-            "12": 10.8736,
-            "13": 10.8617,
-            "14": 10.88756,
-            "15": 10.81334,
-            "16": 10.8276,
-            "17": 10.80766,
-            "18": 10.81067,
-            "19": 10.81127,
-            "20": 10.71864,
-            "21": 10.69427,
-            "22": 10.58083,
-            "23": 10.69548,
-            "24": 10.60367,
-            "25": 10.55396,
-            "26": 10.61304,
-            "27": 10.59026,
-            "28": 10.54029,
-            "29": 10.55687,
-            "30": 10.36075,
-            "31": 10.13943,
-            "32": 10.44344,
-            "33": 10.44459,
-            "34": 10.21087,
-            "35": 10.25855,
-            "36": 10.22779,
-            "37": 10.32843,
-            "38": 10.18154,
-            "39": 10.37655,
-            "40": 10.0779,
-            "41": 10.12618,
-            "42": 10.19378,
-            "43": 9.85406,
-            "44": 9.94224,
-            "45": 9.84493,
-            "46": 9.831,
-            "47": 10.13553,
-            "48": 9.84455,
-            "49": 9.5571,
-            "50": 9.89982
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 22726932.0,
-            "2": 22924916.0,
-            "3": 22597332.0,
-            "4": 23219544.0,
-            "5": 22713776.0,
-            "6": 23021572.0,
-            "7": 22771346.0,
-            "8": 22926354.0,
-            "9": 22842338.0,
-            "10": 22918608.0,
-            "11": 22500808.0,
-            "12": 22460148.0,
-            "13": 22917564.0,
-            "14": 22389452.0,
-            "15": 22821768.0,
-            "16": 22831588.0,
-            "17": 22819586.0,
-            "18": 22582872.0,
-            "19": 22618426.0,
-            "20": 22693536.0,
-            "21": 22739728.0,
-            "22": 22800622.0,
-            "23": 22539616.0,
-            "24": 22771504.0,
-            "25": 22819092.0,
-            "26": 22547456.0,
-            "27": 22468726.0,
-            "28": 22453546.0,
-            "29": 22529680.0,
-            "30": 22630868.0,
-            "31": 22955432.0,
-            "32": 22585376.0,
-            "33": 22557692.0,
-            "34": 22835582.0,
-            "35": 22788568.0,
-            "36": 22588652.0,
-            "37": 22497950.0,
-            "38": 22895768.0,
-            "39": 22801524.0,
-            "40": 22657832.0,
-            "41": 22659668.0,
-            "42": 22667616.0,
-            "43": 22975828.0,
-            "44": 22746024.0,
-            "45": 22674860.0,
-            "46": 22884404.0,
-            "47": 22633804.0,
-            "48": 22928614.0,
-            "49": 22728000.0,
-            "50": 22904840.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 689176064.0,
-            "2": 689176064.0,
-            "3": 689176064.0,
-            "4": 689176064.0,
-            "5": 689176064.0,
-            "6": 689176064.0,
-            "7": 689176064.0,
-            "8": 689176064.0,
-            "9": 689176064.0,
-            "10": 689176064.0,
-            "11": 689176064.0,
-            "12": 689176064.0,
-            "13": 689176064.0,
-            "14": 689176064.0,
-            "15": 689176064.0,
-            "16": 689176064.0,
-            "17": 689176064.0,
-            "18": 689176064.0,
-            "19": 689176064.0,
-            "20": 689176064.0,
-            "21": 689176064.0,
-            "22": 689176064.0,
-            "23": 689176064.0,
-            "24": 689176064.0,
-            "25": 689176064.0,
-            "26": 689176064.0,
-            "27": 689176064.0,
-            "28": 689176064.0,
-            "29": 689176064.0,
-            "30": 689176064.0,
-            "31": 689176064.0,
-            "32": 689176064.0,
-            "33": 689176064.0,
-            "34": 689176064.0,
-            "35": 689176064.0,
-            "36": 689176064.0,
-            "37": 689176064.0,
-            "38": 689176064.0,
-            "39": 689176064.0,
-            "40": 689176064.0,
-            "41": 689176064.0,
-            "42": 689176064.0,
-            "43": 689176064.0,
-            "44": 689176064.0,
-            "45": 689176064.0,
-            "46": 689176064.0,
-            "47": 689176064.0,
-            "48": 689176064.0,
-            "49": 689176064.0,
-            "50": 689176064.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 2158024192.0,
-            "2": 2416613888.0,
-            "3": 2416613888.0,
-            "4": 2416613888.0,
-            "5": 2416613888.0,
-            "6": 2416613888.0,
-            "7": 2416613888.0,
-            "8": 2416613888.0,
-            "9": 2416613888.0,
-            "10": 2416613888.0,
-            "11": 2416613888.0,
-            "12": 2416613888.0,
-            "13": 2416613888.0,
-            "14": 2416613888.0,
-            "15": 2416613888.0,
-            "16": 2416613888.0,
-            "17": 2416613888.0,
-            "18": 2416613888.0,
-            "19": 2416613888.0,
-            "20": 2416613888.0,
-            "21": 2416613888.0,
-            "22": 2416613888.0,
-            "23": 2416613888.0,
-            "24": 2416613888.0,
-            "25": 2416613888.0,
-            "26": 2416613888.0,
-            "27": 2416613888.0,
-            "28": 2416613888.0,
-            "29": 2416613888.0,
-            "30": 2416613888.0,
-            "31": 2416613888.0,
-            "32": 2416613888.0,
-            "33": 2416613888.0,
-            "34": 2416613888.0,
-            "35": 2416613888.0,
-            "36": 2416613888.0,
-            "37": 2416613888.0,
-            "38": 2416613888.0,
-            "39": 2416613888.0,
-            "40": 2416613888.0,
-            "41": 2416613888.0,
-            "42": 2416613888.0,
-            "43": 2416613888.0,
-            "44": 2416613888.0,
-            "45": 2416613888.0,
-            "46": 2416613888.0,
-            "47": 2416613888.0,
-            "48": 2416613888.0,
-            "49": 2416613888.0,
-            "50": 2416613888.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 11.4694,
-            "2": 0.13977,
-            "3": 0.12731,
-            "4": 0.12879,
-            "5": 0.11865,
-            "6": 0.118,
-            "7": 0.11942,
-            "8": 0.11938,
-            "9": 0.11951,
-            "10": 0.11735,
-            "11": 0.11836,
-            "12": 0.11978,
-            "13": 0.11914,
-            "14": 0.11821,
-            "15": 0.11692,
-            "16": 0.11708,
-            "17": 0.11825,
-            "18": 0.11909,
-            "19": 0.11996,
-            "20": 0.11962,
-            "21": 0.12002,
-            "22": 0.11972,
-            "23": 0.11943,
-            "24": 0.11873,
-            "25": 0.11787,
-            "26": 0.1172,
-            "27": 0.11703,
-            "28": 0.12106,
-            "29": 0.11863,
-            "30": 0.11927,
-            "31": 0.11941,
-            "32": 0.11801,
-            "33": 0.11903,
-            "34": 0.1181,
-            "35": 0.11794,
-            "36": 0.11973,
-            "37": 0.11831,
-            "38": 0.11753,
-            "39": 0.11901,
-            "40": 0.11713,
-            "41": 0.11926,
-            "42": 0.11756,
-            "43": 0.1189,
-            "44": 0.11853,
-            "45": 0.12132,
-            "46": 0.11905,
-            "47": 0.11892,
-            "48": 0.11664,
-            "49": 0.11721,
-            "50": 0.11854
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_dev_dgx_h100.json
index 10eb9e57910..8f65ccec75e 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_dev_dgx_h100.json
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 12.82473,
-            "2": 0.09608,
-            "3": 0.08117,
-            "4": 0.08184,
-            "5": 0.08242,
-            "6": 0.07918,
-            "7": 0.07939,
-            "8": 0.07963,
-            "9": 0.07945,
-            "10": 0.081,
-            "11": 0.07867,
-            "12": 0.07897,
-            "13": 0.0828,
-            "14": 0.08361,
-            "15": 0.08417,
-            "16": 0.08323,
-            "17": 0.08405,
-            "18": 0.08256,
-            "19": 0.08229,
-            "20": 0.0827,
-            "21": 0.08446,
-            "22": 0.08314,
-            "23": 0.08296,
-            "24": 0.08234,
-            "25": 0.0813,
-            "26": 0.08393,
-            "27": 0.08424,
-            "28": 0.08312,
-            "29": 0.08286,
-            "30": 0.08113,
-            "31": 0.07871,
-            "32": 0.08259,
-            "33": 0.08088,
-            "34": 0.07808,
-            "35": 0.07855,
-            "36": 0.07792,
-            "37": 0.07877,
-            "38": 0.07813,
-            "39": 0.07792,
-            "40": 0.07826,
-            "41": 0.07872,
-            "42": 0.07977,
-            "43": 0.07875,
-            "44": 0.07847,
-            "45": 0.07879,
-            "46": 0.07965,
-            "47": 0.08085,
-            "48": 0.07886,
-            "49": 0.07904,
-            "50": 0.07778
+            "1": 12.45868,
+            "2": 0.10817,
+            "3": 0.08964,
+            "4": 0.08342,
+            "5": 0.08198,
+            "6": 0.08179,
+            "7": 0.08172,
+            "8": 0.08319,
+            "9": 0.07964,
+            "10": 0.07872,
+            "11": 0.07783,
+            "12": 0.07839,
+            "13": 0.07961,
+            "14": 0.07913,
+            "15": 0.08021,
+            "16": 0.07965,
+            "17": 0.07946,
+            "18": 0.07924,
+            "19": 0.0792,
+            "20": 0.07919,
+            "21": 0.07872,
+            "22": 0.07958,
+            "23": 0.07857,
+            "24": 0.0793,
+            "25": 0.07936,
+            "26": 0.07956,
+            "27": 0.07904,
+            "28": 0.07939,
+            "29": 0.08007,
+            "30": 0.07912,
+            "31": 0.07945,
+            "32": 0.07845,
+            "33": 0.07804,
+            "34": 0.07801,
+            "35": 0.07775,
+            "36": 0.07835,
+            "37": 0.0781,
+            "38": 0.07939,
+            "39": 0.07789,
+            "40": 0.07803,
+            "41": 0.07935,
+            "42": 0.07838,
+            "43": 0.07862,
+            "44": 0.07884,
+            "45": 0.07747,
+            "46": 0.07832,
+            "47": 0.07792,
+            "48": 0.07896,
+            "49": 0.07798,
+            "50": 0.0779
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index f1fd0f05b76..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.89824,
-            "2": 10.90282,
-            "3": 10.89982,
-            "4": 10.86583,
-            "5": 10.88993,
-            "6": 10.9049,
-            "7": 10.89182,
-            "8": 10.90189,
-            "9": 10.88632,
-            "10": 10.88255,
-            "11": 10.91544,
-            "12": 10.90811,
-            "13": 10.91696,
-            "14": 10.92165,
-            "15": 10.86969,
-            "16": 10.8841,
-            "17": 10.87056,
-            "18": 10.88709,
-            "19": 10.87706,
-            "20": 10.84335,
-            "21": 10.83631,
-            "22": 10.76629,
-            "23": 10.83029,
-            "24": 10.79277,
-            "25": 10.75377,
-            "26": 10.78891,
-            "27": 10.79166,
-            "28": 10.74336,
-            "29": 10.75965,
-            "30": 10.62875,
-            "31": 10.45418,
-            "32": 10.68825,
-            "33": 10.68615,
-            "34": 10.52385,
-            "35": 10.56066,
-            "36": 10.53762,
-            "37": 10.60286,
-            "38": 10.46752,
-            "39": 10.60804,
-            "40": 10.36652,
-            "41": 10.38788,
-            "42": 10.45579,
-            "43": 10.15865,
-            "44": 10.24803,
-            "45": 10.15385,
-            "46": 10.13564,
-            "47": 10.39205,
-            "48": 10.1415,
-            "49": 9.88025,
-            "50": 10.18997
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 22727178.0,
-            "2": 22924812.0,
-            "3": 22596704.0,
-            "4": 23218766.0,
-            "5": 22714208.0,
-            "6": 23020316.0,
-            "7": 22771086.0,
-            "8": 22926440.0,
-            "9": 22842352.0,
-            "10": 22918036.0,
-            "11": 22500516.0,
-            "12": 22459304.0,
-            "13": 22916284.0,
-            "14": 22387532.0,
-            "15": 22820856.0,
-            "16": 22830090.0,
-            "17": 22818880.0,
-            "18": 22582012.0,
-            "19": 22616784.0,
-            "20": 22693674.0,
-            "21": 22739360.0,
-            "22": 22799250.0,
-            "23": 22538774.0,
-            "24": 22770954.0,
-            "25": 22818024.0,
-            "26": 22547278.0,
-            "27": 22468476.0,
-            "28": 22452228.0,
-            "29": 22527980.0,
-            "30": 22630720.0,
-            "31": 22954516.0,
-            "32": 22584820.0,
-            "33": 22557266.0,
-            "34": 22834728.0,
-            "35": 22787216.0,
-            "36": 22588668.0,
-            "37": 22496474.0,
-            "38": 22895320.0,
-            "39": 22800062.0,
-            "40": 22657316.0,
-            "41": 22658142.0,
-            "42": 22666692.0,
-            "43": 22974950.0,
-            "44": 22745468.0,
-            "45": 22674868.0,
-            "46": 22883238.0,
-            "47": 22632908.0,
-            "48": 22927884.0,
-            "49": 22727252.0,
-            "50": 22903748.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 638724608.0,
-            "2": 638724608.0,
-            "3": 638724608.0,
-            "4": 638724608.0,
-            "5": 638724608.0,
-            "6": 638724608.0,
-            "7": 638724608.0,
-            "8": 638724608.0,
-            "9": 638724608.0,
-            "10": 638724608.0,
-            "11": 638724608.0,
-            "12": 638724608.0,
-            "13": 638724608.0,
-            "14": 638724608.0,
-            "15": 638724608.0,
-            "16": 638724608.0,
-            "17": 638724608.0,
-            "18": 638724608.0,
-            "19": 638724608.0,
-            "20": 638724608.0,
-            "21": 638724608.0,
-            "22": 638724608.0,
-            "23": 638724608.0,
-            "24": 638724608.0,
-            "25": 638724608.0,
-            "26": 638724608.0,
-            "27": 638724608.0,
-            "28": 638724608.0,
-            "29": 638724608.0,
-            "30": 638724608.0,
-            "31": 638724608.0,
-            "32": 638724608.0,
-            "33": 638724608.0,
-            "34": 638724608.0,
-            "35": 638724608.0,
-            "36": 638724608.0,
-            "37": 638724608.0,
-            "38": 638724608.0,
-            "39": 638724608.0,
-            "40": 638724608.0,
-            "41": 638724608.0,
-            "42": 638724608.0,
-            "43": 638724608.0,
-            "44": 638724608.0,
-            "45": 638724608.0,
-            "46": 638724608.0,
-            "47": 638724608.0,
-            "48": 638724608.0,
-            "49": 638724608.0,
-            "50": 638724608.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 2610025984.0,
-            "2": 2840250880.0,
-            "3": 2840250880.0,
-            "4": 2840250880.0,
-            "5": 2840250880.0,
-            "6": 2840250880.0,
-            "7": 2840250880.0,
-            "8": 2840250880.0,
-            "9": 2840250880.0,
-            "10": 2840250880.0,
-            "11": 2840250880.0,
-            "12": 2840250880.0,
-            "13": 2840250880.0,
-            "14": 2840250880.0,
-            "15": 2840250880.0,
-            "16": 2840250880.0,
-            "17": 2840250880.0,
-            "18": 2840250880.0,
-            "19": 2840250880.0,
-            "20": 2840250880.0,
-            "21": 2840250880.0,
-            "22": 2840250880.0,
-            "23": 2840250880.0,
-            "24": 2840250880.0,
-            "25": 2840250880.0,
-            "26": 2840250880.0,
-            "27": 2840250880.0,
-            "28": 2840250880.0,
-            "29": 2840250880.0,
-            "30": 2840250880.0,
-            "31": 2840250880.0,
-            "32": 2840250880.0,
-            "33": 2840250880.0,
-            "34": 2840250880.0,
-            "35": 2840250880.0,
-            "36": 2840250880.0,
-            "37": 2840250880.0,
-            "38": 2840250880.0,
-            "39": 2840250880.0,
-            "40": 2840250880.0,
-            "41": 2840250880.0,
-            "42": 2840250880.0,
-            "43": 2840250880.0,
-            "44": 2840250880.0,
-            "45": 2840250880.0,
-            "46": 2840250880.0,
-            "47": 2840250880.0,
-            "48": 2840250880.0,
-            "49": 2840250880.0,
-            "50": 2840250880.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 14.66119,
-            "2": 0.10511,
-            "3": 0.07267,
-            "4": 0.07159,
-            "5": 0.07147,
-            "6": 0.07254,
-            "7": 0.07213,
-            "8": 0.07141,
-            "9": 0.07159,
-            "10": 0.07239,
-            "11": 0.07155,
-            "12": 0.0717,
-            "13": 0.07155,
-            "14": 0.07174,
-            "15": 0.07179,
-            "16": 0.07185,
-            "17": 0.0714,
-            "18": 0.07139,
-            "19": 0.0717,
-            "20": 0.07106,
-            "21": 0.0716,
-            "22": 0.07218,
-            "23": 0.07161,
-            "24": 0.07166,
-            "25": 0.07144,
-            "26": 0.07156,
-            "27": 0.0718,
-            "28": 0.07207,
-            "29": 0.07096,
-            "30": 0.07235,
-            "31": 0.07223,
-            "32": 0.07219,
-            "33": 0.07195,
-            "34": 0.07232,
-            "35": 0.07433,
-            "36": 0.07598,
-            "37": 0.07242,
-            "38": 0.07166,
-            "39": 0.07174,
-            "40": 0.07148,
-            "41": 0.0722,
-            "42": 0.07169,
-            "43": 0.07213,
-            "44": 0.07193,
-            "45": 0.07163,
-            "46": 0.07302,
-            "47": 0.07199,
-            "48": 0.07329,
-            "49": 0.07491,
-            "50": 0.07339
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index 8f65ccec75e..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.89824,
-            "2": 10.90282,
-            "3": 10.89982,
-            "4": 10.86583,
-            "5": 10.88993,
-            "6": 10.9049,
-            "7": 10.89182,
-            "8": 10.90189,
-            "9": 10.88632,
-            "10": 10.88255,
-            "11": 10.91544,
-            "12": 10.90811,
-            "13": 10.91696,
-            "14": 10.92165,
-            "15": 10.86969,
-            "16": 10.8841,
-            "17": 10.87056,
-            "18": 10.88709,
-            "19": 10.87706,
-            "20": 10.84335,
-            "21": 10.83631,
-            "22": 10.76629,
-            "23": 10.83029,
-            "24": 10.79277,
-            "25": 10.75377,
-            "26": 10.78891,
-            "27": 10.79166,
-            "28": 10.74336,
-            "29": 10.75965,
-            "30": 10.62875,
-            "31": 10.45418,
-            "32": 10.68825,
-            "33": 10.68615,
-            "34": 10.52385,
-            "35": 10.56066,
-            "36": 10.53762,
-            "37": 10.60286,
-            "38": 10.46752,
-            "39": 10.60804,
-            "40": 10.36652,
-            "41": 10.38788,
-            "42": 10.45579,
-            "43": 10.15865,
-            "44": 10.24803,
-            "45": 10.15385,
-            "46": 10.13564,
-            "47": 10.39205,
-            "48": 10.1415,
-            "49": 9.88025,
-            "50": 10.18997
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 22727178.0,
-            "2": 22924812.0,
-            "3": 22596704.0,
-            "4": 23218766.0,
-            "5": 22714208.0,
-            "6": 23020316.0,
-            "7": 22771086.0,
-            "8": 22926440.0,
-            "9": 22842352.0,
-            "10": 22918036.0,
-            "11": 22500516.0,
-            "12": 22459304.0,
-            "13": 22916284.0,
-            "14": 22387532.0,
-            "15": 22820856.0,
-            "16": 22830090.0,
-            "17": 22818880.0,
-            "18": 22582012.0,
-            "19": 22616784.0,
-            "20": 22693674.0,
-            "21": 22739360.0,
-            "22": 22799250.0,
-            "23": 22538774.0,
-            "24": 22770954.0,
-            "25": 22818024.0,
-            "26": 22547278.0,
-            "27": 22468476.0,
-            "28": 22452228.0,
-            "29": 22527980.0,
-            "30": 22630720.0,
-            "31": 22954516.0,
-            "32": 22584820.0,
-            "33": 22557266.0,
-            "34": 22834728.0,
-            "35": 22787216.0,
-            "36": 22588668.0,
-            "37": 22496474.0,
-            "38": 22895320.0,
-            "39": 22800062.0,
-            "40": 22657316.0,
-            "41": 22658142.0,
-            "42": 22666692.0,
-            "43": 22974950.0,
-            "44": 22745468.0,
-            "45": 22674868.0,
-            "46": 22883238.0,
-            "47": 22632908.0,
-            "48": 22927884.0,
-            "49": 22727252.0,
-            "50": 22903748.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 638724608.0,
-            "2": 638724608.0,
-            "3": 638724608.0,
-            "4": 638724608.0,
-            "5": 638724608.0,
-            "6": 638724608.0,
-            "7": 638724608.0,
-            "8": 638724608.0,
-            "9": 638724608.0,
-            "10": 638724608.0,
-            "11": 638724608.0,
-            "12": 638724608.0,
-            "13": 638724608.0,
-            "14": 638724608.0,
-            "15": 638724608.0,
-            "16": 638724608.0,
-            "17": 638724608.0,
-            "18": 638724608.0,
-            "19": 638724608.0,
-            "20": 638724608.0,
-            "21": 638724608.0,
-            "22": 638724608.0,
-            "23": 638724608.0,
-            "24": 638724608.0,
-            "25": 638724608.0,
-            "26": 638724608.0,
-            "27": 638724608.0,
-            "28": 638724608.0,
-            "29": 638724608.0,
-            "30": 638724608.0,
-            "31": 638724608.0,
-            "32": 638724608.0,
-            "33": 638724608.0,
-            "34": 638724608.0,
-            "35": 638724608.0,
-            "36": 638724608.0,
-            "37": 638724608.0,
-            "38": 638724608.0,
-            "39": 638724608.0,
-            "40": 638724608.0,
-            "41": 638724608.0,
-            "42": 638724608.0,
-            "43": 638724608.0,
-            "44": 638724608.0,
-            "45": 638724608.0,
-            "46": 638724608.0,
-            "47": 638724608.0,
-            "48": 638724608.0,
-            "49": 638724608.0,
-            "50": 638724608.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 2610025984.0,
-            "2": 2840250880.0,
-            "3": 2840250880.0,
-            "4": 2840250880.0,
-            "5": 2840250880.0,
-            "6": 2840250880.0,
-            "7": 2840250880.0,
-            "8": 2840250880.0,
-            "9": 2840250880.0,
-            "10": 2840250880.0,
-            "11": 2840250880.0,
-            "12": 2840250880.0,
-            "13": 2840250880.0,
-            "14": 2840250880.0,
-            "15": 2840250880.0,
-            "16": 2840250880.0,
-            "17": 2840250880.0,
-            "18": 2840250880.0,
-            "19": 2840250880.0,
-            "20": 2840250880.0,
-            "21": 2840250880.0,
-            "22": 2840250880.0,
-            "23": 2840250880.0,
-            "24": 2840250880.0,
-            "25": 2840250880.0,
-            "26": 2840250880.0,
-            "27": 2840250880.0,
-            "28": 2840250880.0,
-            "29": 2840250880.0,
-            "30": 2840250880.0,
-            "31": 2840250880.0,
-            "32": 2840250880.0,
-            "33": 2840250880.0,
-            "34": 2840250880.0,
-            "35": 2840250880.0,
-            "36": 2840250880.0,
-            "37": 2840250880.0,
-            "38": 2840250880.0,
-            "39": 2840250880.0,
-            "40": 2840250880.0,
-            "41": 2840250880.0,
-            "42": 2840250880.0,
-            "43": 2840250880.0,
-            "44": 2840250880.0,
-            "45": 2840250880.0,
-            "46": 2840250880.0,
-            "47": 2840250880.0,
-            "48": 2840250880.0,
-            "49": 2840250880.0,
-            "50": 2840250880.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 12.45868,
-            "2": 0.10817,
-            "3": 0.08964,
-            "4": 0.08342,
-            "5": 0.08198,
-            "6": 0.08179,
-            "7": 0.08172,
-            "8": 0.08319,
-            "9": 0.07964,
-            "10": 0.07872,
-            "11": 0.07783,
-            "12": 0.07839,
-            "13": 0.07961,
-            "14": 0.07913,
-            "15": 0.08021,
-            "16": 0.07965,
-            "17": 0.07946,
-            "18": 0.07924,
-            "19": 0.0792,
-            "20": 0.07919,
-            "21": 0.07872,
-            "22": 0.07958,
-            "23": 0.07857,
-            "24": 0.0793,
-            "25": 0.07936,
-            "26": 0.07956,
-            "27": 0.07904,
-            "28": 0.07939,
-            "29": 0.08007,
-            "30": 0.07912,
-            "31": 0.07945,
-            "32": 0.07845,
-            "33": 0.07804,
-            "34": 0.07801,
-            "35": 0.07775,
-            "36": 0.07835,
-            "37": 0.0781,
-            "38": 0.07939,
-            "39": 0.07789,
-            "40": 0.07803,
-            "41": 0.07935,
-            "42": 0.07838,
-            "43": 0.07862,
-            "44": 0.07884,
-            "45": 0.07747,
-            "46": 0.07832,
-            "47": 0.07792,
-            "48": 0.07896,
-            "49": 0.07798,
-            "50": 0.0779
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgx_h100.json
index b668521f995..e88d1fcb739 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgx_h100.json
@@ -6,104 +6,104 @@
         "values": {
             "1": 10.85163,
             "2": 10.85389,
-            "3": 10.83867,
-            "4": 10.84326,
-            "5": 10.87865,
-            "6": 10.87589,
-            "7": 10.86185,
-            "8": 10.84926,
-            "9": 10.84876,
+            "3": 10.83866,
+            "4": 10.84328,
+            "5": 10.8787,
+            "6": 10.87586,
+            "7": 10.86186,
+            "8": 10.84928,
+            "9": 10.84877,
             "10": 10.80639,
-            "11": 10.88684,
-            "12": 10.85677,
-            "13": 10.86234,
+            "11": 10.88679,
+            "12": 10.85682,
+            "13": 10.86235,
             "14": 10.87768,
-            "15": 10.81036,
-            "16": 10.81987,
-            "17": 10.78281,
+            "15": 10.81037,
+            "16": 10.81984,
+            "17": 10.7828,
             "18": 10.80322,
-            "19": 10.78354,
-            "20": 10.6869,
-            "21": 10.66901,
-            "22": 10.5231,
-            "23": 10.68441,
+            "19": 10.78358,
+            "20": 10.68694,
+            "21": 10.66905,
+            "22": 10.52315,
+            "23": 10.68436,
             "24": 10.56577,
-            "25": 10.49701,
-            "26": 10.5655,
-            "27": 10.58174,
-            "28": 10.52997,
-            "29": 10.55562,
-            "30": 10.32673,
-            "31": 10.07635,
+            "25": 10.49705,
+            "26": 10.56553,
+            "27": 10.58171,
+            "28": 10.52995,
+            "29": 10.55561,
+            "30": 10.32672,
+            "31": 10.07636,
             "32": 10.43058,
-            "33": 10.42459,
-            "34": 10.16648,
-            "35": 10.22488,
-            "36": 10.1834,
-            "37": 10.29955,
-            "38": 10.145,
-            "39": 10.37068,
-            "40": 10.04384,
-            "41": 10.09449,
-            "42": 10.1738,
-            "43": 9.77535,
-            "44": 9.90309,
-            "45": 9.77899,
-            "46": 9.76547,
-            "47": 10.1072,
-            "48": 9.80031,
-            "49": 9.47524,
-            "50": 9.85793,
-            "51": 9.80033,
-            "52": 9.69511,
-            "53": 10.02851,
-            "54": 9.91434,
-            "55": 9.83811,
-            "56": 9.57832,
-            "57": 9.42584,
-            "58": 9.79169,
-            "59": 9.53621,
-            "60": 9.44188,
+            "33": 10.42455,
+            "34": 10.16647,
+            "35": 10.22486,
+            "36": 10.18341,
+            "37": 10.29956,
+            "38": 10.14498,
+            "39": 10.37061,
+            "40": 10.04385,
+            "41": 10.0945,
+            "42": 10.17381,
+            "43": 9.77538,
+            "44": 9.90308,
+            "45": 9.779,
+            "46": 9.76548,
+            "47": 10.10723,
+            "48": 9.80029,
+            "49": 9.47526,
+            "50": 9.85792,
+            "51": 9.80039,
+            "52": 9.69506,
+            "53": 10.0285,
+            "54": 9.9143,
+            "55": 9.83807,
+            "56": 9.57833,
+            "57": 9.42582,
+            "58": 9.79172,
+            "59": 9.53617,
+            "60": 9.44186,
             "61": 9.65656,
-            "62": 9.9438,
-            "63": 9.32147,
-            "64": 9.73338,
-            "65": 8.88431,
-            "66": 9.65528,
-            "67": 9.32102,
-            "68": 9.75063,
-            "69": 9.76395,
-            "70": 9.70471,
-            "71": 9.56858,
+            "62": 9.94377,
+            "63": 9.32151,
+            "64": 9.73339,
+            "65": 8.88427,
+            "66": 9.65533,
+            "67": 9.32106,
+            "68": 9.75064,
+            "69": 9.764,
+            "70": 9.70469,
+            "71": 9.56861,
             "72": 9.53902,
             "73": 9.45226,
-            "74": 8.87734,
-            "75": 9.37931,
-            "76": 9.01864,
-            "77": 10.0352,
-            "78": 9.69265,
-            "79": 9.33457,
+            "74": 8.87736,
+            "75": 9.37933,
+            "76": 9.01867,
+            "77": 10.03519,
+            "78": 9.69263,
+            "79": 9.33459,
             "80": 9.36591,
-            "81": 9.4392,
-            "82": 9.66576,
-            "83": 9.25445,
-            "84": 9.37801,
-            "85": 9.57423,
-            "86": 9.03279,
-            "87": 9.55778,
-            "88": 9.71526,
-            "89": 9.55706,
-            "90": 9.78807,
-            "91": 9.29512,
-            "92": 9.31513,
-            "93": 9.03245,
-            "94": 8.79084,
-            "95": 9.48837,
-            "96": 9.49575,
-            "97": 9.27132,
-            "98": 9.64072,
-            "99": 8.84738,
-            "100": 9.3587
+            "81": 9.43919,
+            "82": 9.66572,
+            "83": 9.25441,
+            "84": 9.378,
+            "85": 9.57422,
+            "86": 9.03277,
+            "87": 9.55775,
+            "88": 9.71521,
+            "89": 9.55703,
+            "90": 9.788,
+            "91": 9.29518,
+            "92": 9.31516,
+            "93": 9.03246,
+            "94": 8.79087,
+            "95": 9.48833,
+            "96": 9.49574,
+            "97": 9.2713,
+            "98": 9.64071,
+            "99": 8.84741,
+            "100": 9.35871
         }
     },
     "num-zeros": {
@@ -111,106 +111,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 604.0,
-            "2": 601.0,
-            "3": 657.0,
-            "4": 631.0,
-            "5": 677.0,
-            "6": 630.0,
-            "7": 662.0,
-            "8": 607.0,
-            "9": 614.0,
-            "10": 588.0,
-            "11": 713.0,
-            "12": 679.0,
-            "13": 667.0,
-            "14": 649.0,
-            "15": 667.0,
-            "16": 659.0,
-            "17": 681.0,
-            "18": 674.0,
-            "19": 586.0,
-            "20": 668.0,
-            "21": 679.0,
-            "22": 646.0,
-            "23": 757.0,
-            "24": 633.0,
-            "25": 653.0,
-            "26": 662.0,
-            "27": 682.0,
-            "28": 746.0,
-            "29": 758.0,
-            "30": 711.0,
-            "31": 645.0,
-            "32": 705.0,
-            "33": 759.0,
-            "34": 667.0,
-            "35": 745.0,
-            "36": 744.0,
-            "37": 799.0,
-            "38": 781.0,
-            "39": 903.0,
-            "40": 806.0,
-            "41": 804.0,
-            "42": 853.0,
-            "43": 651.0,
-            "44": 817.0,
-            "45": 834.0,
-            "46": 842.0,
-            "47": 859.0,
-            "48": 846.0,
-            "49": 831.0,
-            "50": 774.0,
-            "51": 927.0,
-            "52": 907.0,
-            "53": 981.0,
-            "54": 884.0,
-            "55": 858.0,
-            "56": 950.0,
-            "57": 885.0,
-            "58": 961.0,
-            "59": 949.0,
-            "60": 837.0,
-            "61": 953.0,
-            "62": 907.0,
-            "63": 911.0,
-            "64": 1085.0,
-            "65": 964.0,
-            "66": 1054.0,
-            "67": 1008.0,
-            "68": 975.0,
-            "69": 1027.0,
-            "70": 1025.0,
-            "71": 1093.0,
-            "72": 882.0,
-            "73": 988.0,
-            "74": 685.0,
-            "75": 857.0,
-            "76": 1040.0,
-            "77": 1138.0,
-            "78": 1115.0,
-            "79": 1049.0,
-            "80": 1127.0,
-            "81": 1260.0,
-            "82": 1089.0,
-            "83": 1000.0,
-            "84": 1123.0,
-            "85": 1179.0,
-            "86": 927.0,
-            "87": 1264.0,
-            "88": 1041.0,
-            "89": 1165.0,
-            "90": 1105.0,
-            "91": 1136.0,
-            "92": 1151.0,
-            "93": 880.0,
-            "94": 1183.0,
-            "95": 1125.0,
-            "96": 1202.0,
-            "97": 1026.0,
-            "98": 1189.0,
-            "99": 1171.0,
-            "100": 1097.0
+            "1": 600.0,
+            "2": 574.0,
+            "3": 677.0,
+            "4": 617.0,
+            "5": 669.0,
+            "6": 650.0,
+            "7": 700.0,
+            "8": 624.0,
+            "9": 649.0,
+            "10": 562.0,
+            "11": 661.0,
+            "12": 622.0,
+            "13": 711.0,
+            "14": 656.0,
+            "15": 688.0,
+            "16": 667.0,
+            "17": 696.0,
+            "18": 660.0,
+            "19": 607.0,
+            "20": 649.0,
+            "21": 646.0,
+            "22": 653.0,
+            "23": 743.0,
+            "24": 678.0,
+            "25": 663.0,
+            "26": 661.0,
+            "27": 703.0,
+            "28": 769.0,
+            "29": 775.0,
+            "30": 767.0,
+            "31": 606.0,
+            "32": 755.0,
+            "33": 764.0,
+            "34": 676.0,
+            "35": 779.0,
+            "36": 768.0,
+            "37": 824.0,
+            "38": 808.0,
+            "39": 893.0,
+            "40": 795.0,
+            "41": 774.0,
+            "42": 895.0,
+            "43": 758.0,
+            "44": 770.0,
+            "45": 738.0,
+            "46": 856.0,
+            "47": 912.0,
+            "48": 843.0,
+            "49": 884.0,
+            "50": 782.0,
+            "51": 967.0,
+            "52": 940.0,
+            "53": 988.0,
+            "54": 937.0,
+            "55": 870.0,
+            "56": 981.0,
+            "57": 838.0,
+            "58": 909.0,
+            "59": 969.0,
+            "60": 821.0,
+            "61": 1016.0,
+            "62": 953.0,
+            "63": 895.0,
+            "64": 1137.0,
+            "65": 917.0,
+            "66": 1050.0,
+            "67": 946.0,
+            "68": 974.0,
+            "69": 1091.0,
+            "70": 1024.0,
+            "71": 1104.0,
+            "72": 888.0,
+            "73": 967.0,
+            "74": 657.0,
+            "75": 879.0,
+            "76": 977.0,
+            "77": 1172.0,
+            "78": 1085.0,
+            "79": 1107.0,
+            "80": 1178.0,
+            "81": 1236.0,
+            "82": 1103.0,
+            "83": 975.0,
+            "84": 1164.0,
+            "85": 1160.0,
+            "86": 879.0,
+            "87": 1184.0,
+            "88": 1102.0,
+            "89": 1105.0,
+            "90": 1122.0,
+            "91": 1065.0,
+            "92": 1090.0,
+            "93": 848.0,
+            "94": 1158.0,
+            "95": 1173.0,
+            "96": 1140.0,
+            "97": 1074.0,
+            "98": 1203.0,
+            "99": 1141.0,
+            "100": 1111.0
         }
     },
     "mem-allocated-bytes": {
@@ -218,106 +218,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 689618432.0,
-            "2": 689618432.0,
-            "3": 689618432.0,
-            "4": 689618432.0,
-            "5": 689618432.0,
-            "6": 689618432.0,
-            "7": 689618432.0,
-            "8": 689618432.0,
-            "9": 689618432.0,
-            "10": 689618432.0,
-            "11": 689618432.0,
-            "12": 689618432.0,
-            "13": 689618432.0,
-            "14": 689618432.0,
-            "15": 689618432.0,
-            "16": 689618432.0,
-            "17": 689618432.0,
-            "18": 689618432.0,
-            "19": 689618432.0,
-            "20": 689618432.0,
-            "21": 689618432.0,
-            "22": 689618432.0,
-            "23": 689618432.0,
-            "24": 689618432.0,
-            "25": 689618432.0,
-            "26": 689618432.0,
-            "27": 689618432.0,
-            "28": 689618432.0,
-            "29": 689618432.0,
-            "30": 689618432.0,
-            "31": 689618432.0,
-            "32": 689618432.0,
-            "33": 689618432.0,
-            "34": 689618432.0,
-            "35": 689618432.0,
-            "36": 689618432.0,
-            "37": 689618432.0,
-            "38": 689618432.0,
-            "39": 689618432.0,
-            "40": 689618432.0,
-            "41": 689618432.0,
-            "42": 689618432.0,
-            "43": 689618432.0,
-            "44": 689618432.0,
-            "45": 689618432.0,
-            "46": 689618432.0,
-            "47": 689618432.0,
-            "48": 689618432.0,
-            "49": 689618432.0,
-            "50": 689618432.0,
-            "51": 689618432.0,
-            "52": 689618432.0,
-            "53": 689618432.0,
-            "54": 689618432.0,
-            "55": 689618432.0,
-            "56": 689618432.0,
-            "57": 689618432.0,
-            "58": 689618432.0,
-            "59": 689618432.0,
-            "60": 689618432.0,
-            "61": 689618432.0,
-            "62": 689618432.0,
-            "63": 689618432.0,
-            "64": 689618432.0,
-            "65": 689618432.0,
-            "66": 689618432.0,
-            "67": 689618432.0,
-            "68": 689618432.0,
-            "69": 689618432.0,
-            "70": 689618432.0,
-            "71": 689618432.0,
-            "72": 689618432.0,
-            "73": 689618432.0,
-            "74": 689618432.0,
-            "75": 689618432.0,
-            "76": 689618432.0,
-            "77": 689618432.0,
-            "78": 689618432.0,
-            "79": 689618432.0,
-            "80": 689618432.0,
-            "81": 689618432.0,
-            "82": 689618432.0,
-            "83": 689618432.0,
-            "84": 689618432.0,
-            "85": 689618432.0,
-            "86": 689618432.0,
-            "87": 689618432.0,
-            "88": 689618432.0,
-            "89": 689618432.0,
-            "90": 689618432.0,
-            "91": 689618432.0,
-            "92": 689618432.0,
-            "93": 689618432.0,
-            "94": 689618432.0,
-            "95": 689618432.0,
-            "96": 689618432.0,
-            "97": 689618432.0,
-            "98": 689618432.0,
-            "99": 689618432.0,
-            "100": 689618432.0
+            "1": 689356288.0,
+            "2": 689356288.0,
+            "3": 689356288.0,
+            "4": 689356288.0,
+            "5": 689356288.0,
+            "6": 689356288.0,
+            "7": 689356288.0,
+            "8": 689356288.0,
+            "9": 689356288.0,
+            "10": 689356288.0,
+            "11": 689356288.0,
+            "12": 689356288.0,
+            "13": 689356288.0,
+            "14": 689356288.0,
+            "15": 689356288.0,
+            "16": 689356288.0,
+            "17": 689356288.0,
+            "18": 689356288.0,
+            "19": 689356288.0,
+            "20": 689356288.0,
+            "21": 689356288.0,
+            "22": 689356288.0,
+            "23": 689356288.0,
+            "24": 689356288.0,
+            "25": 689356288.0,
+            "26": 689356288.0,
+            "27": 689356288.0,
+            "28": 689356288.0,
+            "29": 689356288.0,
+            "30": 689356288.0,
+            "31": 689356288.0,
+            "32": 689356288.0,
+            "33": 689356288.0,
+            "34": 689356288.0,
+            "35": 689356288.0,
+            "36": 689356288.0,
+            "37": 689356288.0,
+            "38": 689356288.0,
+            "39": 689356288.0,
+            "40": 689356288.0,
+            "41": 689356288.0,
+            "42": 689356288.0,
+            "43": 689356288.0,
+            "44": 689356288.0,
+            "45": 689356288.0,
+            "46": 689356288.0,
+            "47": 689356288.0,
+            "48": 689356288.0,
+            "49": 689356288.0,
+            "50": 689356288.0,
+            "51": 689356288.0,
+            "52": 689356288.0,
+            "53": 689356288.0,
+            "54": 689356288.0,
+            "55": 689356288.0,
+            "56": 689356288.0,
+            "57": 689356288.0,
+            "58": 689356288.0,
+            "59": 689356288.0,
+            "60": 689356288.0,
+            "61": 689356288.0,
+            "62": 689356288.0,
+            "63": 689356288.0,
+            "64": 689356288.0,
+            "65": 689356288.0,
+            "66": 689356288.0,
+            "67": 689356288.0,
+            "68": 689356288.0,
+            "69": 689356288.0,
+            "70": 689356288.0,
+            "71": 689356288.0,
+            "72": 689356288.0,
+            "73": 689356288.0,
+            "74": 689356288.0,
+            "75": 689356288.0,
+            "76": 689356288.0,
+            "77": 689356288.0,
+            "78": 689356288.0,
+            "79": 689356288.0,
+            "80": 689356288.0,
+            "81": 689356288.0,
+            "82": 689356288.0,
+            "83": 689356288.0,
+            "84": 689356288.0,
+            "85": 689356288.0,
+            "86": 689356288.0,
+            "87": 689356288.0,
+            "88": 689356288.0,
+            "89": 689356288.0,
+            "90": 689356288.0,
+            "91": 689356288.0,
+            "92": 689356288.0,
+            "93": 689356288.0,
+            "94": 689356288.0,
+            "95": 689356288.0,
+            "96": 689356288.0,
+            "97": 689356288.0,
+            "98": 689356288.0,
+            "99": 689356288.0,
+            "100": 689356288.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -326,105 +326,105 @@
         "step_interval": 1,
         "values": {
             "1": 959652864.0,
-            "2": 1220175872.0,
-            "3": 1221224448.0,
-            "4": 1221224448.0,
-            "5": 1221224448.0,
-            "6": 1221224448.0,
-            "7": 1221224448.0,
-            "8": 1221224448.0,
-            "9": 1221224448.0,
-            "10": 1221224448.0,
-            "11": 1221224448.0,
-            "12": 1221224448.0,
-            "13": 1221224448.0,
-            "14": 1221224448.0,
-            "15": 1221224448.0,
-            "16": 1221224448.0,
-            "17": 1221224448.0,
-            "18": 1221224448.0,
-            "19": 1221224448.0,
-            "20": 1221224448.0,
-            "21": 1221224448.0,
-            "22": 1221224448.0,
-            "23": 1221224448.0,
-            "24": 1221224448.0,
-            "25": 1221224448.0,
-            "26": 1221224448.0,
-            "27": 1221224448.0,
-            "28": 1221224448.0,
-            "29": 1221224448.0,
-            "30": 1221224448.0,
-            "31": 1221224448.0,
-            "32": 1221224448.0,
-            "33": 1221224448.0,
-            "34": 1221224448.0,
-            "35": 1221224448.0,
-            "36": 1221224448.0,
-            "37": 1221224448.0,
-            "38": 1221224448.0,
-            "39": 1221224448.0,
-            "40": 1221224448.0,
-            "41": 1221224448.0,
-            "42": 1221224448.0,
-            "43": 1221224448.0,
-            "44": 1221224448.0,
-            "45": 1221224448.0,
-            "46": 1221224448.0,
-            "47": 1221224448.0,
-            "48": 1221224448.0,
-            "49": 1221224448.0,
-            "50": 1221224448.0,
-            "51": 1221486080.0,
-            "52": 1221486080.0,
-            "53": 1221486080.0,
-            "54": 1221486080.0,
-            "55": 1221486080.0,
-            "56": 1221486080.0,
-            "57": 1221486080.0,
-            "58": 1221486080.0,
-            "59": 1221486080.0,
-            "60": 1221486080.0,
-            "61": 1221486080.0,
-            "62": 1221486080.0,
-            "63": 1221486080.0,
-            "64": 1221486080.0,
-            "65": 1221486080.0,
-            "66": 1221486080.0,
-            "67": 1221486080.0,
-            "68": 1221486080.0,
-            "69": 1221487104.0,
-            "70": 1221487104.0,
-            "71": 1221487104.0,
-            "72": 1221487104.0,
-            "73": 1221487104.0,
-            "74": 1221487104.0,
-            "75": 1221487104.0,
-            "76": 1221487104.0,
-            "77": 1221487104.0,
-            "78": 1221487104.0,
-            "79": 1221487104.0,
-            "80": 1221487104.0,
-            "81": 1221487104.0,
-            "82": 1221487104.0,
-            "83": 1221487104.0,
-            "84": 1221487104.0,
-            "85": 1221487104.0,
-            "86": 1221487104.0,
-            "87": 1221487104.0,
-            "88": 1221487104.0,
-            "89": 1221487104.0,
-            "90": 1221487104.0,
-            "91": 1221487104.0,
-            "92": 1221487104.0,
-            "93": 1221487104.0,
-            "94": 1221487104.0,
-            "95": 1221487104.0,
-            "96": 1221487104.0,
-            "97": 1221487104.0,
-            "98": 1221487104.0,
-            "99": 1221487104.0,
-            "100": 1221487104.0
+            "2": 1221223936.0,
+            "3": 1221224960.0,
+            "4": 1221224960.0,
+            "5": 1221224960.0,
+            "6": 1221224960.0,
+            "7": 1221224960.0,
+            "8": 1221224960.0,
+            "9": 1221224960.0,
+            "10": 1221224960.0,
+            "11": 1221224960.0,
+            "12": 1221224960.0,
+            "13": 1221224960.0,
+            "14": 1221224960.0,
+            "15": 1221224960.0,
+            "16": 1221224960.0,
+            "17": 1221224960.0,
+            "18": 1221224960.0,
+            "19": 1221224960.0,
+            "20": 1221224960.0,
+            "21": 1221224960.0,
+            "22": 1221224960.0,
+            "23": 1221224960.0,
+            "24": 1221224960.0,
+            "25": 1221224960.0,
+            "26": 1221224960.0,
+            "27": 1221224960.0,
+            "28": 1221224960.0,
+            "29": 1221224960.0,
+            "30": 1221224960.0,
+            "31": 1221224960.0,
+            "32": 1221224960.0,
+            "33": 1221224960.0,
+            "34": 1221224960.0,
+            "35": 1221224960.0,
+            "36": 1221224960.0,
+            "37": 1221224960.0,
+            "38": 1221224960.0,
+            "39": 1221224960.0,
+            "40": 1221224960.0,
+            "41": 1221224960.0,
+            "42": 1221224960.0,
+            "43": 1221224960.0,
+            "44": 1221224960.0,
+            "45": 1221224960.0,
+            "46": 1221224960.0,
+            "47": 1221224960.0,
+            "48": 1221224960.0,
+            "49": 1221224960.0,
+            "50": 1221224960.0,
+            "51": 1221224960.0,
+            "52": 1221224960.0,
+            "53": 1221224960.0,
+            "54": 1221224960.0,
+            "55": 1221224960.0,
+            "56": 1221224960.0,
+            "57": 1221224960.0,
+            "58": 1221224960.0,
+            "59": 1221224960.0,
+            "60": 1221224960.0,
+            "61": 1221224960.0,
+            "62": 1221224960.0,
+            "63": 1221224960.0,
+            "64": 1221224960.0,
+            "65": 1221224960.0,
+            "66": 1221224960.0,
+            "67": 1221224960.0,
+            "68": 1221224960.0,
+            "69": 1221224960.0,
+            "70": 1221224960.0,
+            "71": 1221224960.0,
+            "72": 1221224960.0,
+            "73": 1221224960.0,
+            "74": 1221224960.0,
+            "75": 1221224960.0,
+            "76": 1221224960.0,
+            "77": 1221224960.0,
+            "78": 1221224960.0,
+            "79": 1221224960.0,
+            "80": 1221224960.0,
+            "81": 1221224960.0,
+            "82": 1221224960.0,
+            "83": 1221224960.0,
+            "84": 1221224960.0,
+            "85": 1221224960.0,
+            "86": 1221224960.0,
+            "87": 1221224960.0,
+            "88": 1221224960.0,
+            "89": 1221224960.0,
+            "90": 1221224960.0,
+            "91": 1221224960.0,
+            "92": 1221224960.0,
+            "93": 1221224960.0,
+            "94": 1221224960.0,
+            "95": 1221224960.0,
+            "96": 1221224960.0,
+            "97": 1221224960.0,
+            "98": 1221224960.0,
+            "99": 1221224960.0,
+            "100": 1221224960.0
         }
     },
     "iteration-time": {
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 10.63286,
-            "2": 0.29932,
-            "3": 0.28799,
-            "4": 0.28475,
-            "5": 0.28729,
-            "6": 0.28613,
-            "7": 0.28182,
-            "8": 0.28376,
-            "9": 0.28071,
-            "10": 0.28064,
-            "11": 0.28008,
-            "12": 0.27999,
-            "13": 0.27369,
-            "14": 0.27735,
-            "15": 0.27802,
-            "16": 0.27647,
-            "17": 0.28017,
-            "18": 0.27624,
-            "19": 0.27907,
-            "20": 0.28457,
-            "21": 0.28621,
-            "22": 0.27968,
-            "23": 0.2788,
-            "24": 0.27704,
-            "25": 0.27774,
-            "26": 0.27744,
-            "27": 0.27759,
-            "28": 0.27978,
-            "29": 0.28051,
-            "30": 0.28034,
-            "31": 0.27733,
-            "32": 0.27813,
-            "33": 0.27733,
-            "34": 0.28166,
-            "35": 0.27601,
-            "36": 0.27766,
-            "37": 0.27784,
-            "38": 0.27709,
-            "39": 0.2776,
-            "40": 0.27758,
-            "41": 0.27975,
-            "42": 0.27633,
-            "43": 0.27864,
-            "44": 0.27802,
-            "45": 0.27955,
-            "46": 0.27725,
-            "47": 0.27926,
-            "48": 0.28083,
-            "49": 0.2781,
-            "50": 0.27962,
-            "51": 0.30289,
-            "52": 0.2758,
-            "53": 0.27484,
-            "54": 0.29013,
-            "55": 0.28835,
-            "56": 0.274,
-            "57": 0.27512,
-            "58": 0.27238,
-            "59": 0.27429,
-            "60": 0.27435,
-            "61": 0.27493,
-            "62": 0.27237,
-            "63": 0.27125,
-            "64": 0.27873,
-            "65": 0.27559,
-            "66": 0.27509,
-            "67": 0.27136,
-            "68": 0.27248,
-            "69": 0.27308,
-            "70": 0.27367,
-            "71": 0.27224,
-            "72": 0.27404,
-            "73": 0.27347,
-            "74": 0.27274,
-            "75": 0.27659,
-            "76": 0.27508,
-            "77": 0.27421,
-            "78": 0.27262,
-            "79": 0.27496,
-            "80": 0.27635,
-            "81": 0.60573,
-            "82": 0.27646,
-            "83": 0.27511,
-            "84": 0.27432,
-            "85": 0.27697,
-            "86": 0.27845,
-            "87": 0.27696,
-            "88": 0.27613,
-            "89": 0.28436,
-            "90": 0.27824,
-            "91": 0.27389,
-            "92": 0.27309,
-            "93": 0.27377,
-            "94": 0.27986,
-            "95": 0.27303,
-            "96": 0.2751,
-            "97": 0.2752,
-            "98": 0.27677,
-            "99": 0.27534,
-            "100": 0.27167
+            "1": 10.34397,
+            "2": 0.2989,
+            "3": 0.28701,
+            "4": 0.28299,
+            "5": 0.28509,
+            "6": 0.28378,
+            "7": 0.28776,
+            "8": 0.28423,
+            "9": 0.28722,
+            "10": 0.28077,
+            "11": 0.28936,
+            "12": 0.28752,
+            "13": 0.2827,
+            "14": 0.28574,
+            "15": 0.28467,
+            "16": 0.28217,
+            "17": 0.28486,
+            "18": 0.28581,
+            "19": 0.28155,
+            "20": 0.28509,
+            "21": 0.28251,
+            "22": 0.28381,
+            "23": 0.27876,
+            "24": 0.28748,
+            "25": 0.28028,
+            "26": 0.28778,
+            "27": 0.28262,
+            "28": 0.28332,
+            "29": 0.28115,
+            "30": 0.28178,
+            "31": 0.28495,
+            "32": 0.28165,
+            "33": 0.28663,
+            "34": 0.29207,
+            "35": 0.28688,
+            "36": 0.27656,
+            "37": 0.28363,
+            "38": 0.28429,
+            "39": 0.28629,
+            "40": 0.27969,
+            "41": 0.27978,
+            "42": 0.28454,
+            "43": 0.28022,
+            "44": 0.28402,
+            "45": 0.27645,
+            "46": 0.28795,
+            "47": 0.28097,
+            "48": 0.28395,
+            "49": 0.28183,
+            "50": 0.28615,
+            "51": 0.28373,
+            "52": 0.27449,
+            "53": 0.27345,
+            "54": 0.27869,
+            "55": 0.27079,
+            "56": 0.27901,
+            "57": 0.27662,
+            "58": 0.27749,
+            "59": 0.27681,
+            "60": 0.27639,
+            "61": 0.27275,
+            "62": 0.27644,
+            "63": 0.27655,
+            "64": 0.2741,
+            "65": 0.27749,
+            "66": 0.27321,
+            "67": 0.27962,
+            "68": 0.2759,
+            "69": 0.27771,
+            "70": 0.27472,
+            "71": 0.27602,
+            "72": 0.27221,
+            "73": 0.27682,
+            "74": 0.27563,
+            "75": 0.27287,
+            "76": 0.27345,
+            "77": 0.27491,
+            "78": 0.27512,
+            "79": 0.27463,
+            "80": 0.27721,
+            "81": 0.27482,
+            "82": 0.27638,
+            "83": 0.27219,
+            "84": 0.27519,
+            "85": 0.27727,
+            "86": 0.2756,
+            "87": 0.27351,
+            "88": 0.27369,
+            "89": 0.27604,
+            "90": 0.27461,
+            "91": 0.27436,
+            "92": 0.27679,
+            "93": 0.27705,
+            "94": 0.27348,
+            "95": 0.28014,
+            "96": 0.27482,
+            "97": 0.27546,
+            "98": 0.27381,
+            "99": 0.27767,
+            "100": 0.27505
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 3a7a72a10c2..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.85163,
-            "2": 10.85389,
-            "3": 10.83863,
-            "4": 10.84324,
-            "5": 10.87867,
-            "6": 10.87588,
-            "7": 10.86181,
-            "8": 10.84924,
-            "9": 10.84875,
-            "10": 10.80634,
-            "11": 10.8868,
-            "12": 10.8568,
-            "13": 10.86235,
-            "14": 10.87766,
-            "15": 10.81037,
-            "16": 10.8198,
-            "17": 10.7828,
-            "18": 10.80323,
-            "19": 10.78353,
-            "20": 10.6869,
-            "21": 10.66905,
-            "22": 10.52312,
-            "23": 10.68437,
-            "24": 10.56579,
-            "25": 10.49701,
-            "26": 10.56552,
-            "27": 10.58172,
-            "28": 10.52997,
-            "29": 10.55561,
-            "30": 10.32668,
-            "31": 10.07633,
-            "32": 10.43056,
-            "33": 10.42454,
-            "34": 10.16648,
-            "35": 10.22486,
-            "36": 10.18345,
-            "37": 10.29955,
-            "38": 10.14498,
-            "39": 10.37064,
-            "40": 10.04385,
-            "41": 10.09446,
-            "42": 10.1738,
-            "43": 9.77535,
-            "44": 9.9031,
-            "45": 9.779,
-            "46": 9.76548,
-            "47": 10.10718,
-            "48": 9.80028,
-            "49": 9.4752,
-            "50": 9.85787,
-            "51": 9.80034,
-            "52": 9.69507,
-            "53": 10.0285,
-            "54": 9.91432,
-            "55": 9.83807,
-            "56": 9.57827,
-            "57": 9.42584,
-            "58": 9.79171,
-            "59": 9.53621,
-            "60": 9.44186,
-            "61": 9.65655,
-            "62": 9.94377,
-            "63": 9.32146,
-            "64": 9.7334,
-            "65": 8.88429,
-            "66": 9.65527,
-            "67": 9.321,
-            "68": 9.75066,
-            "69": 9.76398,
-            "70": 9.70468,
-            "71": 9.56857,
-            "72": 9.53903,
-            "73": 9.45227,
-            "74": 8.87742,
-            "75": 9.37933,
-            "76": 9.0186,
-            "77": 10.03521,
-            "78": 9.69265,
-            "79": 9.33456,
-            "80": 9.36592,
-            "81": 9.4392,
-            "82": 9.66571,
-            "83": 9.25447,
-            "84": 9.378,
-            "85": 9.57419,
-            "86": 9.03278,
-            "87": 9.55776,
-            "88": 9.71523,
-            "89": 9.55706,
-            "90": 9.78804,
-            "91": 9.29518,
-            "92": 9.31513,
-            "93": 9.03243,
-            "94": 8.79087,
-            "95": 9.48835,
-            "96": 9.49572,
-            "97": 9.27133,
-            "98": 9.64071,
-            "99": 8.84737,
-            "100": 9.35871
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 627.0,
-            "2": 608.0,
-            "3": 673.0,
-            "4": 679.0,
-            "5": 640.0,
-            "6": 694.0,
-            "7": 628.0,
-            "8": 602.0,
-            "9": 653.0,
-            "10": 534.0,
-            "11": 712.0,
-            "12": 631.0,
-            "13": 674.0,
-            "14": 682.0,
-            "15": 711.0,
-            "16": 655.0,
-            "17": 720.0,
-            "18": 660.0,
-            "19": 641.0,
-            "20": 653.0,
-            "21": 651.0,
-            "22": 628.0,
-            "23": 722.0,
-            "24": 647.0,
-            "25": 682.0,
-            "26": 658.0,
-            "27": 655.0,
-            "28": 725.0,
-            "29": 794.0,
-            "30": 729.0,
-            "31": 632.0,
-            "32": 733.0,
-            "33": 803.0,
-            "34": 704.0,
-            "35": 728.0,
-            "36": 797.0,
-            "37": 839.0,
-            "38": 830.0,
-            "39": 885.0,
-            "40": 788.0,
-            "41": 878.0,
-            "42": 897.0,
-            "43": 770.0,
-            "44": 867.0,
-            "45": 735.0,
-            "46": 812.0,
-            "47": 884.0,
-            "48": 879.0,
-            "49": 828.0,
-            "50": 812.0,
-            "51": 896.0,
-            "52": 876.0,
-            "53": 976.0,
-            "54": 939.0,
-            "55": 875.0,
-            "56": 951.0,
-            "57": 865.0,
-            "58": 1011.0,
-            "59": 947.0,
-            "60": 786.0,
-            "61": 1059.0,
-            "62": 920.0,
-            "63": 917.0,
-            "64": 1022.0,
-            "65": 940.0,
-            "66": 1052.0,
-            "67": 994.0,
-            "68": 1024.0,
-            "69": 980.0,
-            "70": 1046.0,
-            "71": 1132.0,
-            "72": 911.0,
-            "73": 1006.0,
-            "74": 688.0,
-            "75": 889.0,
-            "76": 972.0,
-            "77": 1162.0,
-            "78": 1045.0,
-            "79": 1008.0,
-            "80": 1089.0,
-            "81": 1209.0,
-            "82": 1067.0,
-            "83": 999.0,
-            "84": 1135.0,
-            "85": 1194.0,
-            "86": 936.0,
-            "87": 1271.0,
-            "88": 1144.0,
-            "89": 1099.0,
-            "90": 1140.0,
-            "91": 1115.0,
-            "92": 1127.0,
-            "93": 961.0,
-            "94": 1203.0,
-            "95": 1140.0,
-            "96": 1177.0,
-            "97": 1055.0,
-            "98": 1335.0,
-            "99": 1164.0,
-            "100": 1093.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 689356288.0,
-            "2": 689356288.0,
-            "3": 689356288.0,
-            "4": 689356288.0,
-            "5": 689356288.0,
-            "6": 689356288.0,
-            "7": 689356288.0,
-            "8": 689356288.0,
-            "9": 689356288.0,
-            "10": 689356288.0,
-            "11": 689356288.0,
-            "12": 689356288.0,
-            "13": 689356288.0,
-            "14": 689356288.0,
-            "15": 689356288.0,
-            "16": 689356288.0,
-            "17": 689356288.0,
-            "18": 689356288.0,
-            "19": 689356288.0,
-            "20": 689356288.0,
-            "21": 689356288.0,
-            "22": 689356288.0,
-            "23": 689356288.0,
-            "24": 689356288.0,
-            "25": 689356288.0,
-            "26": 689356288.0,
-            "27": 689356288.0,
-            "28": 689356288.0,
-            "29": 689356288.0,
-            "30": 689356288.0,
-            "31": 689356288.0,
-            "32": 689356288.0,
-            "33": 689356288.0,
-            "34": 689356288.0,
-            "35": 689356288.0,
-            "36": 689356288.0,
-            "37": 689356288.0,
-            "38": 689356288.0,
-            "39": 689356288.0,
-            "40": 689356288.0,
-            "41": 689356288.0,
-            "42": 689356288.0,
-            "43": 689356288.0,
-            "44": 689356288.0,
-            "45": 689356288.0,
-            "46": 689356288.0,
-            "47": 689356288.0,
-            "48": 689356288.0,
-            "49": 689356288.0,
-            "50": 689356288.0,
-            "51": 689356288.0,
-            "52": 689356288.0,
-            "53": 689356288.0,
-            "54": 689356288.0,
-            "55": 689356288.0,
-            "56": 689356288.0,
-            "57": 689356288.0,
-            "58": 689356288.0,
-            "59": 689356288.0,
-            "60": 689356288.0,
-            "61": 689356288.0,
-            "62": 689356288.0,
-            "63": 689356288.0,
-            "64": 689356288.0,
-            "65": 689356288.0,
-            "66": 689356288.0,
-            "67": 689356288.0,
-            "68": 689356288.0,
-            "69": 689356288.0,
-            "70": 689356288.0,
-            "71": 689356288.0,
-            "72": 689356288.0,
-            "73": 689356288.0,
-            "74": 689356288.0,
-            "75": 689356288.0,
-            "76": 689356288.0,
-            "77": 689356288.0,
-            "78": 689356288.0,
-            "79": 689356288.0,
-            "80": 689356288.0,
-            "81": 689356288.0,
-            "82": 689356288.0,
-            "83": 689356288.0,
-            "84": 689356288.0,
-            "85": 689356288.0,
-            "86": 689356288.0,
-            "87": 689356288.0,
-            "88": 689356288.0,
-            "89": 689356288.0,
-            "90": 689356288.0,
-            "91": 689356288.0,
-            "92": 689356288.0,
-            "93": 689356288.0,
-            "94": 689356288.0,
-            "95": 689356288.0,
-            "96": 689356288.0,
-            "97": 689356288.0,
-            "98": 689356288.0,
-            "99": 689356288.0,
-            "100": 689356288.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 962798592.0,
-            "2": 1220175872.0,
-            "3": 1220175872.0,
-            "4": 1220175872.0,
-            "5": 1220175872.0,
-            "6": 1220175872.0,
-            "7": 1220175872.0,
-            "8": 1220175872.0,
-            "9": 1220175872.0,
-            "10": 1220175872.0,
-            "11": 1220175872.0,
-            "12": 1220175872.0,
-            "13": 1220175872.0,
-            "14": 1220175872.0,
-            "15": 1220175872.0,
-            "16": 1220175872.0,
-            "17": 1220175872.0,
-            "18": 1220175872.0,
-            "19": 1220175872.0,
-            "20": 1220175872.0,
-            "21": 1220175872.0,
-            "22": 1220175872.0,
-            "23": 1220175872.0,
-            "24": 1220175872.0,
-            "25": 1220175872.0,
-            "26": 1221224960.0,
-            "27": 1221224960.0,
-            "28": 1221224960.0,
-            "29": 1221224960.0,
-            "30": 1221224960.0,
-            "31": 1221224960.0,
-            "32": 1221224960.0,
-            "33": 1221224960.0,
-            "34": 1221224960.0,
-            "35": 1221224960.0,
-            "36": 1221224960.0,
-            "37": 1221224960.0,
-            "38": 1221224960.0,
-            "39": 1221224960.0,
-            "40": 1221224960.0,
-            "41": 1221224960.0,
-            "42": 1221224960.0,
-            "43": 1221224960.0,
-            "44": 1221224960.0,
-            "45": 1221224960.0,
-            "46": 1221224960.0,
-            "47": 1221224960.0,
-            "48": 1221224960.0,
-            "49": 1221224960.0,
-            "50": 1221224960.0,
-            "51": 1221224960.0,
-            "52": 1221224960.0,
-            "53": 1221224960.0,
-            "54": 1221224960.0,
-            "55": 1221224960.0,
-            "56": 1221224960.0,
-            "57": 1221224960.0,
-            "58": 1221224960.0,
-            "59": 1221224960.0,
-            "60": 1221224960.0,
-            "61": 1221224960.0,
-            "62": 1221224960.0,
-            "63": 1221224960.0,
-            "64": 1221224960.0,
-            "65": 1221224960.0,
-            "66": 1221224960.0,
-            "67": 1221224960.0,
-            "68": 1221224960.0,
-            "69": 1221224960.0,
-            "70": 1221224960.0,
-            "71": 1221224960.0,
-            "72": 1221224960.0,
-            "73": 1221224960.0,
-            "74": 1221224960.0,
-            "75": 1221224960.0,
-            "76": 1221224960.0,
-            "77": 1221224960.0,
-            "78": 1221224960.0,
-            "79": 1221224960.0,
-            "80": 1221224960.0,
-            "81": 1221224960.0,
-            "82": 1221224960.0,
-            "83": 1221224960.0,
-            "84": 1221224960.0,
-            "85": 1221224960.0,
-            "86": 1221224960.0,
-            "87": 1221224960.0,
-            "88": 1221224960.0,
-            "89": 1221224960.0,
-            "90": 1221224960.0,
-            "91": 1221224960.0,
-            "92": 1221224960.0,
-            "93": 1221224960.0,
-            "94": 1221224960.0,
-            "95": 1221224960.0,
-            "96": 1221224960.0,
-            "97": 1221224960.0,
-            "98": 1221224960.0,
-            "99": 1221224960.0,
-            "100": 1221224960.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.57061,
-            "2": 0.29948,
-            "3": 0.25664,
-            "4": 0.25525,
-            "5": 0.25975,
-            "6": 0.25312,
-            "7": 0.25214,
-            "8": 0.25198,
-            "9": 0.25236,
-            "10": 0.25037,
-            "11": 0.2502,
-            "12": 0.26,
-            "13": 0.25174,
-            "14": 0.2554,
-            "15": 0.25351,
-            "16": 0.25165,
-            "17": 0.25076,
-            "18": 0.2547,
-            "19": 0.26231,
-            "20": 0.24779,
-            "21": 0.2545,
-            "22": 0.2531,
-            "23": 0.25207,
-            "24": 0.25132,
-            "25": 0.25306,
-            "26": 0.25309,
-            "27": 0.25693,
-            "28": 0.25352,
-            "29": 0.25148,
-            "30": 0.29402,
-            "31": 0.26128,
-            "32": 0.24916,
-            "33": 0.24618,
-            "34": 0.25663,
-            "35": 0.25422,
-            "36": 0.24893,
-            "37": 0.2479,
-            "38": 0.24866,
-            "39": 0.2519,
-            "40": 0.24703,
-            "41": 0.26177,
-            "42": 0.26238,
-            "43": 0.26445,
-            "44": 0.25941,
-            "45": 0.25966,
-            "46": 0.26213,
-            "47": 0.2596,
-            "48": 0.2599,
-            "49": 0.26099,
-            "50": 0.25831,
-            "51": 0.26468,
-            "52": 0.27616,
-            "53": 0.28242,
-            "54": 0.25962,
-            "55": 0.25746,
-            "56": 0.2557,
-            "57": 0.25914,
-            "58": 0.26888,
-            "59": 0.25926,
-            "60": 0.2602,
-            "61": 0.25903,
-            "62": 0.59856,
-            "63": 0.25221,
-            "64": 0.26626,
-            "65": 0.25583,
-            "66": 0.25184,
-            "67": 0.25017,
-            "68": 0.24797,
-            "69": 0.25276,
-            "70": 0.24957,
-            "71": 0.25739,
-            "72": 0.25804,
-            "73": 0.24807,
-            "74": 0.24833,
-            "75": 0.24684,
-            "76": 0.24858,
-            "77": 0.2483,
-            "78": 0.24799,
-            "79": 0.24873,
-            "80": 0.25713,
-            "81": 0.24828,
-            "82": 0.25747,
-            "83": 0.25481,
-            "84": 0.25333,
-            "85": 0.25368,
-            "86": 0.24984,
-            "87": 0.24993,
-            "88": 0.24848,
-            "89": 0.24598,
-            "90": 0.24825,
-            "91": 0.24841,
-            "92": 0.24485,
-            "93": 0.24192,
-            "94": 0.24464,
-            "95": 0.24499,
-            "96": 0.24711,
-            "97": 0.2469,
-            "98": 0.24804,
-            "99": 0.25199,
-            "100": 0.24705
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index e88d1fcb739..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.85163,
-            "2": 10.85389,
-            "3": 10.83866,
-            "4": 10.84328,
-            "5": 10.8787,
-            "6": 10.87586,
-            "7": 10.86186,
-            "8": 10.84928,
-            "9": 10.84877,
-            "10": 10.80639,
-            "11": 10.88679,
-            "12": 10.85682,
-            "13": 10.86235,
-            "14": 10.87768,
-            "15": 10.81037,
-            "16": 10.81984,
-            "17": 10.7828,
-            "18": 10.80322,
-            "19": 10.78358,
-            "20": 10.68694,
-            "21": 10.66905,
-            "22": 10.52315,
-            "23": 10.68436,
-            "24": 10.56577,
-            "25": 10.49705,
-            "26": 10.56553,
-            "27": 10.58171,
-            "28": 10.52995,
-            "29": 10.55561,
-            "30": 10.32672,
-            "31": 10.07636,
-            "32": 10.43058,
-            "33": 10.42455,
-            "34": 10.16647,
-            "35": 10.22486,
-            "36": 10.18341,
-            "37": 10.29956,
-            "38": 10.14498,
-            "39": 10.37061,
-            "40": 10.04385,
-            "41": 10.0945,
-            "42": 10.17381,
-            "43": 9.77538,
-            "44": 9.90308,
-            "45": 9.779,
-            "46": 9.76548,
-            "47": 10.10723,
-            "48": 9.80029,
-            "49": 9.47526,
-            "50": 9.85792,
-            "51": 9.80039,
-            "52": 9.69506,
-            "53": 10.0285,
-            "54": 9.9143,
-            "55": 9.83807,
-            "56": 9.57833,
-            "57": 9.42582,
-            "58": 9.79172,
-            "59": 9.53617,
-            "60": 9.44186,
-            "61": 9.65656,
-            "62": 9.94377,
-            "63": 9.32151,
-            "64": 9.73339,
-            "65": 8.88427,
-            "66": 9.65533,
-            "67": 9.32106,
-            "68": 9.75064,
-            "69": 9.764,
-            "70": 9.70469,
-            "71": 9.56861,
-            "72": 9.53902,
-            "73": 9.45226,
-            "74": 8.87736,
-            "75": 9.37933,
-            "76": 9.01867,
-            "77": 10.03519,
-            "78": 9.69263,
-            "79": 9.33459,
-            "80": 9.36591,
-            "81": 9.43919,
-            "82": 9.66572,
-            "83": 9.25441,
-            "84": 9.378,
-            "85": 9.57422,
-            "86": 9.03277,
-            "87": 9.55775,
-            "88": 9.71521,
-            "89": 9.55703,
-            "90": 9.788,
-            "91": 9.29518,
-            "92": 9.31516,
-            "93": 9.03246,
-            "94": 8.79087,
-            "95": 9.48833,
-            "96": 9.49574,
-            "97": 9.2713,
-            "98": 9.64071,
-            "99": 8.84741,
-            "100": 9.35871
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 600.0,
-            "2": 574.0,
-            "3": 677.0,
-            "4": 617.0,
-            "5": 669.0,
-            "6": 650.0,
-            "7": 700.0,
-            "8": 624.0,
-            "9": 649.0,
-            "10": 562.0,
-            "11": 661.0,
-            "12": 622.0,
-            "13": 711.0,
-            "14": 656.0,
-            "15": 688.0,
-            "16": 667.0,
-            "17": 696.0,
-            "18": 660.0,
-            "19": 607.0,
-            "20": 649.0,
-            "21": 646.0,
-            "22": 653.0,
-            "23": 743.0,
-            "24": 678.0,
-            "25": 663.0,
-            "26": 661.0,
-            "27": 703.0,
-            "28": 769.0,
-            "29": 775.0,
-            "30": 767.0,
-            "31": 606.0,
-            "32": 755.0,
-            "33": 764.0,
-            "34": 676.0,
-            "35": 779.0,
-            "36": 768.0,
-            "37": 824.0,
-            "38": 808.0,
-            "39": 893.0,
-            "40": 795.0,
-            "41": 774.0,
-            "42": 895.0,
-            "43": 758.0,
-            "44": 770.0,
-            "45": 738.0,
-            "46": 856.0,
-            "47": 912.0,
-            "48": 843.0,
-            "49": 884.0,
-            "50": 782.0,
-            "51": 967.0,
-            "52": 940.0,
-            "53": 988.0,
-            "54": 937.0,
-            "55": 870.0,
-            "56": 981.0,
-            "57": 838.0,
-            "58": 909.0,
-            "59": 969.0,
-            "60": 821.0,
-            "61": 1016.0,
-            "62": 953.0,
-            "63": 895.0,
-            "64": 1137.0,
-            "65": 917.0,
-            "66": 1050.0,
-            "67": 946.0,
-            "68": 974.0,
-            "69": 1091.0,
-            "70": 1024.0,
-            "71": 1104.0,
-            "72": 888.0,
-            "73": 967.0,
-            "74": 657.0,
-            "75": 879.0,
-            "76": 977.0,
-            "77": 1172.0,
-            "78": 1085.0,
-            "79": 1107.0,
-            "80": 1178.0,
-            "81": 1236.0,
-            "82": 1103.0,
-            "83": 975.0,
-            "84": 1164.0,
-            "85": 1160.0,
-            "86": 879.0,
-            "87": 1184.0,
-            "88": 1102.0,
-            "89": 1105.0,
-            "90": 1122.0,
-            "91": 1065.0,
-            "92": 1090.0,
-            "93": 848.0,
-            "94": 1158.0,
-            "95": 1173.0,
-            "96": 1140.0,
-            "97": 1074.0,
-            "98": 1203.0,
-            "99": 1141.0,
-            "100": 1111.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 689356288.0,
-            "2": 689356288.0,
-            "3": 689356288.0,
-            "4": 689356288.0,
-            "5": 689356288.0,
-            "6": 689356288.0,
-            "7": 689356288.0,
-            "8": 689356288.0,
-            "9": 689356288.0,
-            "10": 689356288.0,
-            "11": 689356288.0,
-            "12": 689356288.0,
-            "13": 689356288.0,
-            "14": 689356288.0,
-            "15": 689356288.0,
-            "16": 689356288.0,
-            "17": 689356288.0,
-            "18": 689356288.0,
-            "19": 689356288.0,
-            "20": 689356288.0,
-            "21": 689356288.0,
-            "22": 689356288.0,
-            "23": 689356288.0,
-            "24": 689356288.0,
-            "25": 689356288.0,
-            "26": 689356288.0,
-            "27": 689356288.0,
-            "28": 689356288.0,
-            "29": 689356288.0,
-            "30": 689356288.0,
-            "31": 689356288.0,
-            "32": 689356288.0,
-            "33": 689356288.0,
-            "34": 689356288.0,
-            "35": 689356288.0,
-            "36": 689356288.0,
-            "37": 689356288.0,
-            "38": 689356288.0,
-            "39": 689356288.0,
-            "40": 689356288.0,
-            "41": 689356288.0,
-            "42": 689356288.0,
-            "43": 689356288.0,
-            "44": 689356288.0,
-            "45": 689356288.0,
-            "46": 689356288.0,
-            "47": 689356288.0,
-            "48": 689356288.0,
-            "49": 689356288.0,
-            "50": 689356288.0,
-            "51": 689356288.0,
-            "52": 689356288.0,
-            "53": 689356288.0,
-            "54": 689356288.0,
-            "55": 689356288.0,
-            "56": 689356288.0,
-            "57": 689356288.0,
-            "58": 689356288.0,
-            "59": 689356288.0,
-            "60": 689356288.0,
-            "61": 689356288.0,
-            "62": 689356288.0,
-            "63": 689356288.0,
-            "64": 689356288.0,
-            "65": 689356288.0,
-            "66": 689356288.0,
-            "67": 689356288.0,
-            "68": 689356288.0,
-            "69": 689356288.0,
-            "70": 689356288.0,
-            "71": 689356288.0,
-            "72": 689356288.0,
-            "73": 689356288.0,
-            "74": 689356288.0,
-            "75": 689356288.0,
-            "76": 689356288.0,
-            "77": 689356288.0,
-            "78": 689356288.0,
-            "79": 689356288.0,
-            "80": 689356288.0,
-            "81": 689356288.0,
-            "82": 689356288.0,
-            "83": 689356288.0,
-            "84": 689356288.0,
-            "85": 689356288.0,
-            "86": 689356288.0,
-            "87": 689356288.0,
-            "88": 689356288.0,
-            "89": 689356288.0,
-            "90": 689356288.0,
-            "91": 689356288.0,
-            "92": 689356288.0,
-            "93": 689356288.0,
-            "94": 689356288.0,
-            "95": 689356288.0,
-            "96": 689356288.0,
-            "97": 689356288.0,
-            "98": 689356288.0,
-            "99": 689356288.0,
-            "100": 689356288.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 959652864.0,
-            "2": 1221223936.0,
-            "3": 1221224960.0,
-            "4": 1221224960.0,
-            "5": 1221224960.0,
-            "6": 1221224960.0,
-            "7": 1221224960.0,
-            "8": 1221224960.0,
-            "9": 1221224960.0,
-            "10": 1221224960.0,
-            "11": 1221224960.0,
-            "12": 1221224960.0,
-            "13": 1221224960.0,
-            "14": 1221224960.0,
-            "15": 1221224960.0,
-            "16": 1221224960.0,
-            "17": 1221224960.0,
-            "18": 1221224960.0,
-            "19": 1221224960.0,
-            "20": 1221224960.0,
-            "21": 1221224960.0,
-            "22": 1221224960.0,
-            "23": 1221224960.0,
-            "24": 1221224960.0,
-            "25": 1221224960.0,
-            "26": 1221224960.0,
-            "27": 1221224960.0,
-            "28": 1221224960.0,
-            "29": 1221224960.0,
-            "30": 1221224960.0,
-            "31": 1221224960.0,
-            "32": 1221224960.0,
-            "33": 1221224960.0,
-            "34": 1221224960.0,
-            "35": 1221224960.0,
-            "36": 1221224960.0,
-            "37": 1221224960.0,
-            "38": 1221224960.0,
-            "39": 1221224960.0,
-            "40": 1221224960.0,
-            "41": 1221224960.0,
-            "42": 1221224960.0,
-            "43": 1221224960.0,
-            "44": 1221224960.0,
-            "45": 1221224960.0,
-            "46": 1221224960.0,
-            "47": 1221224960.0,
-            "48": 1221224960.0,
-            "49": 1221224960.0,
-            "50": 1221224960.0,
-            "51": 1221224960.0,
-            "52": 1221224960.0,
-            "53": 1221224960.0,
-            "54": 1221224960.0,
-            "55": 1221224960.0,
-            "56": 1221224960.0,
-            "57": 1221224960.0,
-            "58": 1221224960.0,
-            "59": 1221224960.0,
-            "60": 1221224960.0,
-            "61": 1221224960.0,
-            "62": 1221224960.0,
-            "63": 1221224960.0,
-            "64": 1221224960.0,
-            "65": 1221224960.0,
-            "66": 1221224960.0,
-            "67": 1221224960.0,
-            "68": 1221224960.0,
-            "69": 1221224960.0,
-            "70": 1221224960.0,
-            "71": 1221224960.0,
-            "72": 1221224960.0,
-            "73": 1221224960.0,
-            "74": 1221224960.0,
-            "75": 1221224960.0,
-            "76": 1221224960.0,
-            "77": 1221224960.0,
-            "78": 1221224960.0,
-            "79": 1221224960.0,
-            "80": 1221224960.0,
-            "81": 1221224960.0,
-            "82": 1221224960.0,
-            "83": 1221224960.0,
-            "84": 1221224960.0,
-            "85": 1221224960.0,
-            "86": 1221224960.0,
-            "87": 1221224960.0,
-            "88": 1221224960.0,
-            "89": 1221224960.0,
-            "90": 1221224960.0,
-            "91": 1221224960.0,
-            "92": 1221224960.0,
-            "93": 1221224960.0,
-            "94": 1221224960.0,
-            "95": 1221224960.0,
-            "96": 1221224960.0,
-            "97": 1221224960.0,
-            "98": 1221224960.0,
-            "99": 1221224960.0,
-            "100": 1221224960.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.34397,
-            "2": 0.2989,
-            "3": 0.28701,
-            "4": 0.28299,
-            "5": 0.28509,
-            "6": 0.28378,
-            "7": 0.28776,
-            "8": 0.28423,
-            "9": 0.28722,
-            "10": 0.28077,
-            "11": 0.28936,
-            "12": 0.28752,
-            "13": 0.2827,
-            "14": 0.28574,
-            "15": 0.28467,
-            "16": 0.28217,
-            "17": 0.28486,
-            "18": 0.28581,
-            "19": 0.28155,
-            "20": 0.28509,
-            "21": 0.28251,
-            "22": 0.28381,
-            "23": 0.27876,
-            "24": 0.28748,
-            "25": 0.28028,
-            "26": 0.28778,
-            "27": 0.28262,
-            "28": 0.28332,
-            "29": 0.28115,
-            "30": 0.28178,
-            "31": 0.28495,
-            "32": 0.28165,
-            "33": 0.28663,
-            "34": 0.29207,
-            "35": 0.28688,
-            "36": 0.27656,
-            "37": 0.28363,
-            "38": 0.28429,
-            "39": 0.28629,
-            "40": 0.27969,
-            "41": 0.27978,
-            "42": 0.28454,
-            "43": 0.28022,
-            "44": 0.28402,
-            "45": 0.27645,
-            "46": 0.28795,
-            "47": 0.28097,
-            "48": 0.28395,
-            "49": 0.28183,
-            "50": 0.28615,
-            "51": 0.28373,
-            "52": 0.27449,
-            "53": 0.27345,
-            "54": 0.27869,
-            "55": 0.27079,
-            "56": 0.27901,
-            "57": 0.27662,
-            "58": 0.27749,
-            "59": 0.27681,
-            "60": 0.27639,
-            "61": 0.27275,
-            "62": 0.27644,
-            "63": 0.27655,
-            "64": 0.2741,
-            "65": 0.27749,
-            "66": 0.27321,
-            "67": 0.27962,
-            "68": 0.2759,
-            "69": 0.27771,
-            "70": 0.27472,
-            "71": 0.27602,
-            "72": 0.27221,
-            "73": 0.27682,
-            "74": 0.27563,
-            "75": 0.27287,
-            "76": 0.27345,
-            "77": 0.27491,
-            "78": 0.27512,
-            "79": 0.27463,
-            "80": 0.27721,
-            "81": 0.27482,
-            "82": 0.27638,
-            "83": 0.27219,
-            "84": 0.27519,
-            "85": 0.27727,
-            "86": 0.2756,
-            "87": 0.27351,
-            "88": 0.27369,
-            "89": 0.27604,
-            "90": 0.27461,
-            "91": 0.27436,
-            "92": 0.27679,
-            "93": 0.27705,
-            "94": 0.27348,
-            "95": 0.28014,
-            "96": 0.27482,
-            "97": 0.27546,
-            "98": 0.27381,
-            "99": 0.27767,
-            "100": 0.27505
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_dev_dgx_h100.json
index f1d9edf458f..077c5e1317a 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_dev_dgx_h100.json
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 12.86117,
-            "2": 0.13933,
-            "3": 0.12865,
-            "4": 0.12909,
-            "5": 0.13086,
-            "6": 0.12937,
-            "7": 0.12955,
-            "8": 0.12832,
-            "9": 0.13012,
-            "10": 0.12917,
-            "11": 0.13042,
-            "12": 0.13029,
-            "13": 0.12973,
-            "14": 0.1288,
-            "15": 0.13228,
-            "16": 0.13052,
-            "17": 0.13054,
-            "18": 0.12967,
-            "19": 0.13242,
-            "20": 0.12969,
-            "21": 0.13088,
-            "22": 0.13019,
-            "23": 0.12965,
-            "24": 0.12899,
-            "25": 0.13258,
-            "26": 0.13001,
-            "27": 0.12913,
-            "28": 0.13084,
-            "29": 0.13114,
-            "30": 0.13032,
-            "31": 0.13065,
-            "32": 0.13047,
-            "33": 0.13027,
-            "34": 0.13197,
-            "35": 0.13065,
-            "36": 0.13067,
-            "37": 0.12989,
-            "38": 0.13114,
-            "39": 0.12933,
-            "40": 0.12861,
-            "41": 0.12817,
-            "42": 0.13081,
-            "43": 0.12928,
-            "44": 0.13005,
-            "45": 0.13082,
-            "46": 0.12995,
-            "47": 0.12857,
-            "48": 0.13137,
-            "49": 0.12979,
-            "50": 0.13191,
-            "51": 0.15409,
-            "52": 0.13157,
-            "53": 0.14032,
-            "54": 0.13375,
-            "55": 0.13825,
-            "56": 0.13176,
-            "57": 0.13198,
-            "58": 0.13061,
-            "59": 0.12937,
-            "60": 0.1313,
-            "61": 0.14432,
+            "1": 12.96096,
+            "2": 0.14328,
+            "3": 0.13234,
+            "4": 0.12983,
+            "5": 0.1339,
+            "6": 0.13424,
+            "7": 0.13558,
+            "8": 0.13644,
+            "9": 0.13434,
+            "10": 0.13106,
+            "11": 0.13377,
+            "12": 0.13148,
+            "13": 0.13136,
+            "14": 0.13331,
+            "15": 0.13429,
+            "16": 0.13208,
+            "17": 0.1316,
+            "18": 0.13139,
+            "19": 0.1287,
+            "20": 0.13199,
+            "21": 0.1318,
+            "22": 0.13196,
+            "23": 0.13019,
+            "24": 0.1317,
+            "25": 0.13217,
+            "26": 0.12983,
+            "27": 0.12928,
+            "28": 0.13258,
+            "29": 0.13441,
+            "30": 0.13276,
+            "31": 0.13264,
+            "32": 0.13228,
+            "33": 0.13159,
+            "34": 0.13219,
+            "35": 0.133,
+            "36": 0.13166,
+            "37": 0.13174,
+            "38": 0.1304,
+            "39": 0.1314,
+            "40": 0.13029,
+            "41": 0.13074,
+            "42": 0.12839,
+            "43": 0.13136,
+            "44": 0.13209,
+            "45": 0.12923,
+            "46": 0.13318,
+            "47": 0.1319,
+            "48": 0.13259,
+            "49": 0.13079,
+            "50": 0.12933,
+            "51": 0.15172,
+            "52": 0.1333,
+            "53": 0.14462,
+            "54": 0.13216,
+            "55": 0.13399,
+            "56": 0.13553,
+            "57": 0.13325,
+            "58": 0.13361,
+            "59": 0.13333,
+            "60": 0.13354,
+            "61": 0.13207,
             "62": 0.1338,
-            "63": 0.13267,
-            "64": 0.13096,
-            "65": 0.13182,
-            "66": 0.13165,
-            "67": 0.13147,
-            "68": 0.13711,
-            "69": 0.13191,
-            "70": 0.13223,
-            "71": 0.13057,
-            "72": 0.13123,
-            "73": 0.13196,
-            "74": 0.1341,
-            "75": 0.13029,
-            "76": 0.13292,
-            "77": 0.13191,
-            "78": 0.1325,
-            "79": 0.13167,
-            "80": 0.1322,
-            "81": 0.13122,
-            "82": 0.1304,
-            "83": 0.1321,
-            "84": 0.13338,
-            "85": 0.13207,
-            "86": 0.13126,
-            "87": 0.13079,
-            "88": 0.13219,
-            "89": 0.13079,
-            "90": 0.13174,
-            "91": 0.13224,
-            "92": 0.13121,
-            "93": 0.13434,
-            "94": 0.13083,
-            "95": 0.13012,
-            "96": 0.13136,
-            "97": 0.13212,
-            "98": 0.13196,
-            "99": 0.13215,
-            "100": 0.13279
+            "63": 0.13105,
+            "64": 0.13392,
+            "65": 0.13319,
+            "66": 0.13384,
+            "67": 0.13217,
+            "68": 0.13367,
+            "69": 0.13229,
+            "70": 0.13221,
+            "71": 0.1335,
+            "72": 0.13557,
+            "73": 0.13385,
+            "74": 0.13485,
+            "75": 0.13327,
+            "76": 0.13288,
+            "77": 0.13329,
+            "78": 0.13402,
+            "79": 0.13416,
+            "80": 0.13423,
+            "81": 0.13316,
+            "82": 0.13278,
+            "83": 0.13364,
+            "84": 0.13264,
+            "85": 0.13203,
+            "86": 0.13235,
+            "87": 0.13381,
+            "88": 0.13365,
+            "89": 0.13338,
+            "90": 0.1334,
+            "91": 0.13418,
+            "92": 0.13669,
+            "93": 0.13477,
+            "94": 0.13244,
+            "95": 0.13237,
+            "96": 0.13182,
+            "97": 0.13149,
+            "98": 0.13223,
+            "99": 0.13163,
+            "100": 0.1326
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 48eca17dac7..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.84466,
-            "2": 10.84794,
-            "3": 10.84923,
-            "4": 10.8433,
-            "5": 10.88246,
-            "6": 10.8808,
-            "7": 10.86574,
-            "8": 10.85417,
-            "9": 10.85542,
-            "10": 10.81812,
-            "11": 10.88726,
-            "12": 10.86329,
-            "13": 10.86656,
-            "14": 10.884,
-            "15": 10.8231,
-            "16": 10.82809,
-            "17": 10.79467,
-            "18": 10.81466,
-            "19": 10.80122,
-            "20": 10.71614,
-            "21": 10.69886,
-            "22": 10.56738,
-            "23": 10.71707,
-            "24": 10.60503,
-            "25": 10.55053,
-            "26": 10.60941,
-            "27": 10.62543,
-            "28": 10.57767,
-            "29": 10.59725,
-            "30": 10.38488,
-            "31": 10.15554,
-            "32": 10.48231,
-            "33": 10.4763,
-            "34": 10.2393,
-            "35": 10.29064,
-            "36": 10.25146,
-            "37": 10.35662,
-            "38": 10.21142,
-            "39": 10.42144,
-            "40": 10.11569,
-            "41": 10.16423,
-            "42": 10.23644,
-            "43": 9.86597,
-            "44": 9.98146,
-            "45": 9.86983,
-            "46": 9.85349,
-            "47": 10.16995,
-            "48": 9.876,
-            "49": 9.57237,
-            "50": 9.92525,
-            "51": 9.8709,
-            "52": 9.7737,
-            "53": 10.08149,
-            "54": 9.97376,
-            "55": 9.90036,
-            "56": 9.64783,
-            "57": 9.50136,
-            "58": 9.85199,
-            "59": 9.6034,
-            "60": 9.50993,
-            "61": 9.71315,
-            "62": 9.99373,
-            "63": 9.39358,
-            "64": 9.78904,
-            "65": 8.96358,
-            "66": 9.71142,
-            "67": 9.38175,
-            "68": 9.79833,
-            "69": 9.80889,
-            "70": 9.75039,
-            "71": 9.62004,
-            "72": 9.59387,
-            "73": 9.50631,
-            "74": 8.94916,
-            "75": 9.43188,
-            "76": 9.08702,
-            "77": 10.06886,
-            "78": 9.73459,
-            "79": 9.38325,
-            "80": 9.41272,
-            "81": 9.48499,
-            "82": 9.70672,
-            "83": 9.30939,
-            "84": 9.42428,
-            "85": 9.61991,
-            "86": 9.07811,
-            "87": 9.59541,
-            "88": 9.75596,
-            "89": 9.60274,
-            "90": 9.82165,
-            "91": 9.34268,
-            "92": 9.35878,
-            "93": 9.08116,
-            "94": 8.83791,
-            "95": 9.5238,
-            "96": 9.53556,
-            "97": 9.31807,
-            "98": 9.68183,
-            "99": 8.89422,
-            "100": 9.40138
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1713.0,
-            "2": 1750.0,
-            "3": 1744.0,
-            "4": 1895.0,
-            "5": 1839.0,
-            "6": 1881.0,
-            "7": 1850.0,
-            "8": 1743.0,
-            "9": 1810.0,
-            "10": 1452.0,
-            "11": 1886.0,
-            "12": 1752.0,
-            "13": 1834.0,
-            "14": 1774.0,
-            "15": 1909.0,
-            "16": 1803.0,
-            "17": 1927.0,
-            "18": 1765.0,
-            "19": 1847.0,
-            "20": 1707.0,
-            "21": 1950.0,
-            "22": 1794.0,
-            "23": 1974.0,
-            "24": 1676.0,
-            "25": 1652.0,
-            "26": 1774.0,
-            "27": 1799.0,
-            "28": 2135.0,
-            "29": 2048.0,
-            "30": 2032.0,
-            "31": 1599.0,
-            "32": 1929.0,
-            "33": 2143.0,
-            "34": 1874.0,
-            "35": 1974.0,
-            "36": 2011.0,
-            "37": 2364.0,
-            "38": 2199.0,
-            "39": 2363.0,
-            "40": 2239.0,
-            "41": 2269.0,
-            "42": 2228.0,
-            "43": 1972.0,
-            "44": 2070.0,
-            "45": 2033.0,
-            "46": 2357.0,
-            "47": 2520.0,
-            "48": 2316.0,
-            "49": 2307.0,
-            "50": 2302.0,
-            "51": 2514.0,
-            "52": 2430.0,
-            "53": 2840.0,
-            "54": 2677.0,
-            "55": 2394.0,
-            "56": 2601.0,
-            "57": 2341.0,
-            "58": 2837.0,
-            "59": 2789.0,
-            "60": 2425.0,
-            "61": 2923.0,
-            "62": 2591.0,
-            "63": 2416.0,
-            "64": 2937.0,
-            "65": 2572.0,
-            "66": 3008.0,
-            "67": 2843.0,
-            "68": 2761.0,
-            "69": 2834.0,
-            "70": 3108.0,
-            "71": 2989.0,
-            "72": 2316.0,
-            "73": 2950.0,
-            "74": 1899.0,
-            "75": 2378.0,
-            "76": 2962.0,
-            "77": 3343.0,
-            "78": 3183.0,
-            "79": 2979.0,
-            "80": 3209.0,
-            "81": 3583.0,
-            "82": 3160.0,
-            "83": 2776.0,
-            "84": 3242.0,
-            "85": 3425.0,
-            "86": 2720.0,
-            "87": 3820.0,
-            "88": 3050.0,
-            "89": 3297.0,
-            "90": 3069.0,
-            "91": 2685.0,
-            "92": 3061.0,
-            "93": 2584.0,
-            "94": 3338.0,
-            "95": 3406.0,
-            "96": 3389.0,
-            "97": 3104.0,
-            "98": 3583.0,
-            "99": 3229.0,
-            "100": 3225.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 490700288.0,
-            "2": 490700288.0,
-            "3": 490700288.0,
-            "4": 490700288.0,
-            "5": 490700288.0,
-            "6": 490700288.0,
-            "7": 490700288.0,
-            "8": 490700288.0,
-            "9": 490700288.0,
-            "10": 490700288.0,
-            "11": 490700288.0,
-            "12": 490700288.0,
-            "13": 490700288.0,
-            "14": 490700288.0,
-            "15": 490700288.0,
-            "16": 490700288.0,
-            "17": 490700288.0,
-            "18": 490700288.0,
-            "19": 490700288.0,
-            "20": 490700288.0,
-            "21": 490700288.0,
-            "22": 490700288.0,
-            "23": 490700288.0,
-            "24": 490700288.0,
-            "25": 490700288.0,
-            "26": 490700288.0,
-            "27": 490700288.0,
-            "28": 490700288.0,
-            "29": 490700288.0,
-            "30": 490700288.0,
-            "31": 490700288.0,
-            "32": 490700288.0,
-            "33": 490700288.0,
-            "34": 490700288.0,
-            "35": 490700288.0,
-            "36": 490700288.0,
-            "37": 490700288.0,
-            "38": 490700288.0,
-            "39": 490700288.0,
-            "40": 490700288.0,
-            "41": 490700288.0,
-            "42": 490700288.0,
-            "43": 490700288.0,
-            "44": 490700288.0,
-            "45": 490700288.0,
-            "46": 490700288.0,
-            "47": 490700288.0,
-            "48": 490700288.0,
-            "49": 490700288.0,
-            "50": 490700288.0,
-            "51": 490700288.0,
-            "52": 490700288.0,
-            "53": 490700288.0,
-            "54": 490700288.0,
-            "55": 490700288.0,
-            "56": 490700288.0,
-            "57": 490700288.0,
-            "58": 490700288.0,
-            "59": 490700288.0,
-            "60": 490700288.0,
-            "61": 490700288.0,
-            "62": 490700288.0,
-            "63": 490700288.0,
-            "64": 490700288.0,
-            "65": 490700288.0,
-            "66": 490700288.0,
-            "67": 490700288.0,
-            "68": 490700288.0,
-            "69": 490700288.0,
-            "70": 490700288.0,
-            "71": 490700288.0,
-            "72": 490700288.0,
-            "73": 490700288.0,
-            "74": 490700288.0,
-            "75": 490700288.0,
-            "76": 490700288.0,
-            "77": 490700288.0,
-            "78": 490700288.0,
-            "79": 490700288.0,
-            "80": 490700288.0,
-            "81": 490700288.0,
-            "82": 490700288.0,
-            "83": 490700288.0,
-            "84": 490700288.0,
-            "85": 490700288.0,
-            "86": 490700288.0,
-            "87": 490700288.0,
-            "88": 490700288.0,
-            "89": 490700288.0,
-            "90": 490700288.0,
-            "91": 490700288.0,
-            "92": 490700288.0,
-            "93": 490700288.0,
-            "94": 490700288.0,
-            "95": 490700288.0,
-            "96": 490700288.0,
-            "97": 490700288.0,
-            "98": 490700288.0,
-            "99": 490700288.0,
-            "100": 490700288.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1553275392.0,
-            "2": 1681702400.0,
-            "3": 1681702400.0,
-            "4": 1681702400.0,
-            "5": 1681702400.0,
-            "6": 1681702400.0,
-            "7": 1681702400.0,
-            "8": 1681702400.0,
-            "9": 1681702400.0,
-            "10": 1681702400.0,
-            "11": 1681702400.0,
-            "12": 1681702400.0,
-            "13": 1681702400.0,
-            "14": 1681702400.0,
-            "15": 1681702400.0,
-            "16": 1681702400.0,
-            "17": 1681702400.0,
-            "18": 1681702400.0,
-            "19": 1681702400.0,
-            "20": 1681702400.0,
-            "21": 1681702400.0,
-            "22": 1681702400.0,
-            "23": 1681702400.0,
-            "24": 1681702400.0,
-            "25": 1681702400.0,
-            "26": 1681702400.0,
-            "27": 1681702400.0,
-            "28": 1681702400.0,
-            "29": 1681702400.0,
-            "30": 1681702400.0,
-            "31": 1681702400.0,
-            "32": 1681702400.0,
-            "33": 1681702400.0,
-            "34": 1681702400.0,
-            "35": 1681702400.0,
-            "36": 1681702400.0,
-            "37": 1681702400.0,
-            "38": 1681702400.0,
-            "39": 1681702400.0,
-            "40": 1681702400.0,
-            "41": 1681702400.0,
-            "42": 1681702400.0,
-            "43": 1681702400.0,
-            "44": 1681702400.0,
-            "45": 1681702400.0,
-            "46": 1681702400.0,
-            "47": 1681702400.0,
-            "48": 1681702400.0,
-            "49": 1681702400.0,
-            "50": 1681702400.0,
-            "51": 1681702400.0,
-            "52": 1681702400.0,
-            "53": 1681702400.0,
-            "54": 1681702400.0,
-            "55": 1681702400.0,
-            "56": 1681702400.0,
-            "57": 1681702400.0,
-            "58": 1681702400.0,
-            "59": 1681702400.0,
-            "60": 1681702400.0,
-            "61": 1681702400.0,
-            "62": 1681702400.0,
-            "63": 1681702400.0,
-            "64": 1681702400.0,
-            "65": 1681702400.0,
-            "66": 1681702400.0,
-            "67": 1681702400.0,
-            "68": 1681702400.0,
-            "69": 1681702400.0,
-            "70": 1681702400.0,
-            "71": 1681702400.0,
-            "72": 1681702400.0,
-            "73": 1681702400.0,
-            "74": 1681702400.0,
-            "75": 1681702400.0,
-            "76": 1681702400.0,
-            "77": 1681702400.0,
-            "78": 1681702400.0,
-            "79": 1681702400.0,
-            "80": 1681702400.0,
-            "81": 1681702400.0,
-            "82": 1681702400.0,
-            "83": 1681702400.0,
-            "84": 1681702400.0,
-            "85": 1681702400.0,
-            "86": 1681702400.0,
-            "87": 1681702400.0,
-            "88": 1681702400.0,
-            "89": 1681702400.0,
-            "90": 1681702400.0,
-            "91": 1681702400.0,
-            "92": 1681702400.0,
-            "93": 1681702400.0,
-            "94": 1681702400.0,
-            "95": 1681702400.0,
-            "96": 1681702400.0,
-            "97": 1681702400.0,
-            "98": 1681702400.0,
-            "99": 1681702400.0,
-            "100": 1681702400.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 13.69891,
-            "2": 0.13291,
-            "3": 0.11069,
-            "4": 0.11005,
-            "5": 0.11137,
-            "6": 0.11181,
-            "7": 0.11024,
-            "8": 0.1118,
-            "9": 0.11019,
-            "10": 0.1115,
-            "11": 0.10932,
-            "12": 0.11102,
-            "13": 0.11122,
-            "14": 0.10885,
-            "15": 0.11063,
-            "16": 0.10921,
-            "17": 0.11073,
-            "18": 0.11138,
-            "19": 0.10984,
-            "20": 0.1097,
-            "21": 0.11067,
-            "22": 0.10976,
-            "23": 0.11182,
-            "24": 0.11128,
-            "25": 0.11361,
-            "26": 0.11246,
-            "27": 0.11156,
-            "28": 0.11079,
-            "29": 0.11109,
-            "30": 0.11063,
-            "31": 0.11335,
-            "32": 0.11146,
-            "33": 0.10977,
-            "34": 0.10982,
-            "35": 0.11082,
-            "36": 0.11114,
-            "37": 0.11175,
-            "38": 0.11066,
-            "39": 0.10976,
-            "40": 0.11142,
-            "41": 0.10972,
-            "42": 0.11235,
-            "43": 0.11078,
-            "44": 0.11209,
-            "45": 0.11117,
-            "46": 0.112,
-            "47": 0.11091,
-            "48": 0.11186,
-            "49": 0.1122,
-            "50": 0.11209,
-            "51": 0.11626,
-            "52": 0.1141,
-            "53": 0.11342,
-            "54": 0.11372,
-            "55": 0.1122,
-            "56": 0.11383,
-            "57": 0.1146,
-            "58": 0.1142,
-            "59": 0.11394,
-            "60": 0.1139,
-            "61": 0.11353,
-            "62": 0.11377,
-            "63": 0.11401,
-            "64": 0.11264,
-            "65": 0.11272,
-            "66": 0.11265,
-            "67": 0.11267,
-            "68": 0.11872,
-            "69": 0.1156,
-            "70": 0.11377,
-            "71": 0.11536,
-            "72": 0.11453,
-            "73": 0.11588,
-            "74": 0.11658,
-            "75": 0.11499,
-            "76": 0.11315,
-            "77": 0.11296,
-            "78": 0.11428,
-            "79": 0.11415,
-            "80": 0.11548,
-            "81": 0.11393,
-            "82": 0.11142,
-            "83": 0.11373,
-            "84": 0.1132,
-            "85": 0.11294,
-            "86": 0.11271,
-            "87": 0.11374,
-            "88": 0.11311,
-            "89": 0.11318,
-            "90": 0.1122,
-            "91": 0.11311,
-            "92": 0.11396,
-            "93": 0.11384,
-            "94": 0.11636,
-            "95": 0.11934,
-            "96": 0.12031,
-            "97": 0.11987,
-            "98": 0.11805,
-            "99": 0.12232,
-            "100": 0.12103
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index 077c5e1317a..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.84466,
-            "2": 10.84794,
-            "3": 10.84923,
-            "4": 10.8433,
-            "5": 10.88246,
-            "6": 10.8808,
-            "7": 10.86574,
-            "8": 10.85417,
-            "9": 10.85542,
-            "10": 10.81812,
-            "11": 10.88726,
-            "12": 10.86329,
-            "13": 10.86656,
-            "14": 10.884,
-            "15": 10.8231,
-            "16": 10.82809,
-            "17": 10.79467,
-            "18": 10.81466,
-            "19": 10.80122,
-            "20": 10.71614,
-            "21": 10.69886,
-            "22": 10.56738,
-            "23": 10.71707,
-            "24": 10.60503,
-            "25": 10.55053,
-            "26": 10.60941,
-            "27": 10.62543,
-            "28": 10.57767,
-            "29": 10.59725,
-            "30": 10.38488,
-            "31": 10.15554,
-            "32": 10.48231,
-            "33": 10.4763,
-            "34": 10.2393,
-            "35": 10.29064,
-            "36": 10.25146,
-            "37": 10.35662,
-            "38": 10.21142,
-            "39": 10.42144,
-            "40": 10.11569,
-            "41": 10.16423,
-            "42": 10.23644,
-            "43": 9.86597,
-            "44": 9.98146,
-            "45": 9.86983,
-            "46": 9.85349,
-            "47": 10.16995,
-            "48": 9.876,
-            "49": 9.57237,
-            "50": 9.92525,
-            "51": 9.8709,
-            "52": 9.7737,
-            "53": 10.08149,
-            "54": 9.97376,
-            "55": 9.90036,
-            "56": 9.64783,
-            "57": 9.50136,
-            "58": 9.85199,
-            "59": 9.6034,
-            "60": 9.50993,
-            "61": 9.71315,
-            "62": 9.99373,
-            "63": 9.39358,
-            "64": 9.78904,
-            "65": 8.96358,
-            "66": 9.71142,
-            "67": 9.38175,
-            "68": 9.79833,
-            "69": 9.80889,
-            "70": 9.75039,
-            "71": 9.62004,
-            "72": 9.59387,
-            "73": 9.50631,
-            "74": 8.94916,
-            "75": 9.43188,
-            "76": 9.08702,
-            "77": 10.06886,
-            "78": 9.73459,
-            "79": 9.38325,
-            "80": 9.41272,
-            "81": 9.48499,
-            "82": 9.70672,
-            "83": 9.30939,
-            "84": 9.42428,
-            "85": 9.61991,
-            "86": 9.07811,
-            "87": 9.59541,
-            "88": 9.75596,
-            "89": 9.60274,
-            "90": 9.82165,
-            "91": 9.34268,
-            "92": 9.35878,
-            "93": 9.08116,
-            "94": 8.83791,
-            "95": 9.5238,
-            "96": 9.53556,
-            "97": 9.31807,
-            "98": 9.68183,
-            "99": 8.89422,
-            "100": 9.40138
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1713.0,
-            "2": 1750.0,
-            "3": 1744.0,
-            "4": 1895.0,
-            "5": 1839.0,
-            "6": 1881.0,
-            "7": 1850.0,
-            "8": 1743.0,
-            "9": 1810.0,
-            "10": 1452.0,
-            "11": 1886.0,
-            "12": 1752.0,
-            "13": 1834.0,
-            "14": 1774.0,
-            "15": 1909.0,
-            "16": 1803.0,
-            "17": 1927.0,
-            "18": 1765.0,
-            "19": 1847.0,
-            "20": 1707.0,
-            "21": 1950.0,
-            "22": 1794.0,
-            "23": 1974.0,
-            "24": 1676.0,
-            "25": 1652.0,
-            "26": 1774.0,
-            "27": 1799.0,
-            "28": 2135.0,
-            "29": 2048.0,
-            "30": 2032.0,
-            "31": 1599.0,
-            "32": 1929.0,
-            "33": 2143.0,
-            "34": 1874.0,
-            "35": 1974.0,
-            "36": 2011.0,
-            "37": 2364.0,
-            "38": 2199.0,
-            "39": 2363.0,
-            "40": 2239.0,
-            "41": 2269.0,
-            "42": 2228.0,
-            "43": 1972.0,
-            "44": 2070.0,
-            "45": 2033.0,
-            "46": 2357.0,
-            "47": 2520.0,
-            "48": 2316.0,
-            "49": 2307.0,
-            "50": 2302.0,
-            "51": 2514.0,
-            "52": 2430.0,
-            "53": 2840.0,
-            "54": 2677.0,
-            "55": 2394.0,
-            "56": 2601.0,
-            "57": 2341.0,
-            "58": 2837.0,
-            "59": 2789.0,
-            "60": 2425.0,
-            "61": 2923.0,
-            "62": 2591.0,
-            "63": 2416.0,
-            "64": 2937.0,
-            "65": 2572.0,
-            "66": 3008.0,
-            "67": 2843.0,
-            "68": 2761.0,
-            "69": 2834.0,
-            "70": 3108.0,
-            "71": 2989.0,
-            "72": 2316.0,
-            "73": 2950.0,
-            "74": 1899.0,
-            "75": 2378.0,
-            "76": 2962.0,
-            "77": 3343.0,
-            "78": 3183.0,
-            "79": 2979.0,
-            "80": 3209.0,
-            "81": 3583.0,
-            "82": 3160.0,
-            "83": 2776.0,
-            "84": 3242.0,
-            "85": 3425.0,
-            "86": 2720.0,
-            "87": 3820.0,
-            "88": 3050.0,
-            "89": 3297.0,
-            "90": 3069.0,
-            "91": 2685.0,
-            "92": 3061.0,
-            "93": 2584.0,
-            "94": 3338.0,
-            "95": 3406.0,
-            "96": 3389.0,
-            "97": 3104.0,
-            "98": 3583.0,
-            "99": 3229.0,
-            "100": 3225.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 490700288.0,
-            "2": 490700288.0,
-            "3": 490700288.0,
-            "4": 490700288.0,
-            "5": 490700288.0,
-            "6": 490700288.0,
-            "7": 490700288.0,
-            "8": 490700288.0,
-            "9": 490700288.0,
-            "10": 490700288.0,
-            "11": 490700288.0,
-            "12": 490700288.0,
-            "13": 490700288.0,
-            "14": 490700288.0,
-            "15": 490700288.0,
-            "16": 490700288.0,
-            "17": 490700288.0,
-            "18": 490700288.0,
-            "19": 490700288.0,
-            "20": 490700288.0,
-            "21": 490700288.0,
-            "22": 490700288.0,
-            "23": 490700288.0,
-            "24": 490700288.0,
-            "25": 490700288.0,
-            "26": 490700288.0,
-            "27": 490700288.0,
-            "28": 490700288.0,
-            "29": 490700288.0,
-            "30": 490700288.0,
-            "31": 490700288.0,
-            "32": 490700288.0,
-            "33": 490700288.0,
-            "34": 490700288.0,
-            "35": 490700288.0,
-            "36": 490700288.0,
-            "37": 490700288.0,
-            "38": 490700288.0,
-            "39": 490700288.0,
-            "40": 490700288.0,
-            "41": 490700288.0,
-            "42": 490700288.0,
-            "43": 490700288.0,
-            "44": 490700288.0,
-            "45": 490700288.0,
-            "46": 490700288.0,
-            "47": 490700288.0,
-            "48": 490700288.0,
-            "49": 490700288.0,
-            "50": 490700288.0,
-            "51": 490700288.0,
-            "52": 490700288.0,
-            "53": 490700288.0,
-            "54": 490700288.0,
-            "55": 490700288.0,
-            "56": 490700288.0,
-            "57": 490700288.0,
-            "58": 490700288.0,
-            "59": 490700288.0,
-            "60": 490700288.0,
-            "61": 490700288.0,
-            "62": 490700288.0,
-            "63": 490700288.0,
-            "64": 490700288.0,
-            "65": 490700288.0,
-            "66": 490700288.0,
-            "67": 490700288.0,
-            "68": 490700288.0,
-            "69": 490700288.0,
-            "70": 490700288.0,
-            "71": 490700288.0,
-            "72": 490700288.0,
-            "73": 490700288.0,
-            "74": 490700288.0,
-            "75": 490700288.0,
-            "76": 490700288.0,
-            "77": 490700288.0,
-            "78": 490700288.0,
-            "79": 490700288.0,
-            "80": 490700288.0,
-            "81": 490700288.0,
-            "82": 490700288.0,
-            "83": 490700288.0,
-            "84": 490700288.0,
-            "85": 490700288.0,
-            "86": 490700288.0,
-            "87": 490700288.0,
-            "88": 490700288.0,
-            "89": 490700288.0,
-            "90": 490700288.0,
-            "91": 490700288.0,
-            "92": 490700288.0,
-            "93": 490700288.0,
-            "94": 490700288.0,
-            "95": 490700288.0,
-            "96": 490700288.0,
-            "97": 490700288.0,
-            "98": 490700288.0,
-            "99": 490700288.0,
-            "100": 490700288.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1553275392.0,
-            "2": 1681702400.0,
-            "3": 1681702400.0,
-            "4": 1681702400.0,
-            "5": 1681702400.0,
-            "6": 1681702400.0,
-            "7": 1681702400.0,
-            "8": 1681702400.0,
-            "9": 1681702400.0,
-            "10": 1681702400.0,
-            "11": 1681702400.0,
-            "12": 1681702400.0,
-            "13": 1681702400.0,
-            "14": 1681702400.0,
-            "15": 1681702400.0,
-            "16": 1681702400.0,
-            "17": 1681702400.0,
-            "18": 1681702400.0,
-            "19": 1681702400.0,
-            "20": 1681702400.0,
-            "21": 1681702400.0,
-            "22": 1681702400.0,
-            "23": 1681702400.0,
-            "24": 1681702400.0,
-            "25": 1681702400.0,
-            "26": 1681702400.0,
-            "27": 1681702400.0,
-            "28": 1681702400.0,
-            "29": 1681702400.0,
-            "30": 1681702400.0,
-            "31": 1681702400.0,
-            "32": 1681702400.0,
-            "33": 1681702400.0,
-            "34": 1681702400.0,
-            "35": 1681702400.0,
-            "36": 1681702400.0,
-            "37": 1681702400.0,
-            "38": 1681702400.0,
-            "39": 1681702400.0,
-            "40": 1681702400.0,
-            "41": 1681702400.0,
-            "42": 1681702400.0,
-            "43": 1681702400.0,
-            "44": 1681702400.0,
-            "45": 1681702400.0,
-            "46": 1681702400.0,
-            "47": 1681702400.0,
-            "48": 1681702400.0,
-            "49": 1681702400.0,
-            "50": 1681702400.0,
-            "51": 1681702400.0,
-            "52": 1681702400.0,
-            "53": 1681702400.0,
-            "54": 1681702400.0,
-            "55": 1681702400.0,
-            "56": 1681702400.0,
-            "57": 1681702400.0,
-            "58": 1681702400.0,
-            "59": 1681702400.0,
-            "60": 1681702400.0,
-            "61": 1681702400.0,
-            "62": 1681702400.0,
-            "63": 1681702400.0,
-            "64": 1681702400.0,
-            "65": 1681702400.0,
-            "66": 1681702400.0,
-            "67": 1681702400.0,
-            "68": 1681702400.0,
-            "69": 1681702400.0,
-            "70": 1681702400.0,
-            "71": 1681702400.0,
-            "72": 1681702400.0,
-            "73": 1681702400.0,
-            "74": 1681702400.0,
-            "75": 1681702400.0,
-            "76": 1681702400.0,
-            "77": 1681702400.0,
-            "78": 1681702400.0,
-            "79": 1681702400.0,
-            "80": 1681702400.0,
-            "81": 1681702400.0,
-            "82": 1681702400.0,
-            "83": 1681702400.0,
-            "84": 1681702400.0,
-            "85": 1681702400.0,
-            "86": 1681702400.0,
-            "87": 1681702400.0,
-            "88": 1681702400.0,
-            "89": 1681702400.0,
-            "90": 1681702400.0,
-            "91": 1681702400.0,
-            "92": 1681702400.0,
-            "93": 1681702400.0,
-            "94": 1681702400.0,
-            "95": 1681702400.0,
-            "96": 1681702400.0,
-            "97": 1681702400.0,
-            "98": 1681702400.0,
-            "99": 1681702400.0,
-            "100": 1681702400.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 12.96096,
-            "2": 0.14328,
-            "3": 0.13234,
-            "4": 0.12983,
-            "5": 0.1339,
-            "6": 0.13424,
-            "7": 0.13558,
-            "8": 0.13644,
-            "9": 0.13434,
-            "10": 0.13106,
-            "11": 0.13377,
-            "12": 0.13148,
-            "13": 0.13136,
-            "14": 0.13331,
-            "15": 0.13429,
-            "16": 0.13208,
-            "17": 0.1316,
-            "18": 0.13139,
-            "19": 0.1287,
-            "20": 0.13199,
-            "21": 0.1318,
-            "22": 0.13196,
-            "23": 0.13019,
-            "24": 0.1317,
-            "25": 0.13217,
-            "26": 0.12983,
-            "27": 0.12928,
-            "28": 0.13258,
-            "29": 0.13441,
-            "30": 0.13276,
-            "31": 0.13264,
-            "32": 0.13228,
-            "33": 0.13159,
-            "34": 0.13219,
-            "35": 0.133,
-            "36": 0.13166,
-            "37": 0.13174,
-            "38": 0.1304,
-            "39": 0.1314,
-            "40": 0.13029,
-            "41": 0.13074,
-            "42": 0.12839,
-            "43": 0.13136,
-            "44": 0.13209,
-            "45": 0.12923,
-            "46": 0.13318,
-            "47": 0.1319,
-            "48": 0.13259,
-            "49": 0.13079,
-            "50": 0.12933,
-            "51": 0.15172,
-            "52": 0.1333,
-            "53": 0.14462,
-            "54": 0.13216,
-            "55": 0.13399,
-            "56": 0.13553,
-            "57": 0.13325,
-            "58": 0.13361,
-            "59": 0.13333,
-            "60": 0.13354,
-            "61": 0.13207,
-            "62": 0.1338,
-            "63": 0.13105,
-            "64": 0.13392,
-            "65": 0.13319,
-            "66": 0.13384,
-            "67": 0.13217,
-            "68": 0.13367,
-            "69": 0.13229,
-            "70": 0.13221,
-            "71": 0.1335,
-            "72": 0.13557,
-            "73": 0.13385,
-            "74": 0.13485,
-            "75": 0.13327,
-            "76": 0.13288,
-            "77": 0.13329,
-            "78": 0.13402,
-            "79": 0.13416,
-            "80": 0.13423,
-            "81": 0.13316,
-            "82": 0.13278,
-            "83": 0.13364,
-            "84": 0.13264,
-            "85": 0.13203,
-            "86": 0.13235,
-            "87": 0.13381,
-            "88": 0.13365,
-            "89": 0.13338,
-            "90": 0.1334,
-            "91": 0.13418,
-            "92": 0.13669,
-            "93": 0.13477,
-            "94": 0.13244,
-            "95": 0.13237,
-            "96": 0.13182,
-            "97": 0.13149,
-            "98": 0.13223,
-            "99": 0.13163,
-            "100": 0.1326
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2/golden_values_dev_dgx_h100.json
index db2baf5c599..02b4683ea0b 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2/golden_values_dev_dgx_h100.json
@@ -175,7 +175,7 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 759895552.0,
+            "1": 757801472.0,
             "2": 933156352.0,
             "3": 933156352.0,
             "4": 933156352.0,
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 16.00603,
-            "2": 0.37533,
-            "3": 0.32669,
-            "4": 0.33301,
-            "5": 0.33912,
-            "6": 0.32887,
-            "7": 0.32417,
-            "8": 0.32988,
-            "9": 0.33113,
-            "10": 0.32547,
-            "11": 0.32805,
-            "12": 0.328,
-            "13": 0.33007,
-            "14": 0.33264,
-            "15": 0.3341,
-            "16": 0.33744,
-            "17": 0.33776,
-            "18": 0.33727,
-            "19": 0.33724,
-            "20": 0.33333,
-            "21": 0.32884,
-            "22": 0.32956,
-            "23": 0.33051,
-            "24": 0.33032,
-            "25": 0.3332,
-            "26": 0.32905,
-            "27": 0.32375,
-            "28": 0.3404,
-            "29": 0.33196,
-            "30": 0.33981,
-            "31": 0.33813,
-            "32": 0.34997,
-            "33": 0.34437,
-            "34": 0.33045,
-            "35": 0.32839,
-            "36": 0.32738,
-            "37": 0.32817,
-            "38": 0.32837,
-            "39": 0.32923,
-            "40": 0.33033,
-            "41": 0.32725,
-            "42": 0.32793,
-            "43": 0.32998,
-            "44": 0.32897,
-            "45": 0.32784,
-            "46": 0.32856,
-            "47": 0.33025,
-            "48": 0.32747,
-            "49": 0.32752,
-            "50": 0.32926
+            "1": 15.78036,
+            "2": 0.34723,
+            "3": 0.33492,
+            "4": 0.3292,
+            "5": 0.33036,
+            "6": 0.34971,
+            "7": 0.33848,
+            "8": 0.33262,
+            "9": 0.34028,
+            "10": 0.3518,
+            "11": 0.34239,
+            "12": 0.33211,
+            "13": 0.32961,
+            "14": 0.33263,
+            "15": 0.32808,
+            "16": 0.33152,
+            "17": 0.33313,
+            "18": 0.329,
+            "19": 0.3317,
+            "20": 0.33143,
+            "21": 0.34166,
+            "22": 0.33873,
+            "23": 0.34817,
+            "24": 0.3415,
+            "25": 0.34495,
+            "26": 0.32592,
+            "27": 0.32935,
+            "28": 0.33233,
+            "29": 0.328,
+            "30": 0.32746,
+            "31": 0.3275,
+            "32": 0.327,
+            "33": 0.32765,
+            "34": 0.32542,
+            "35": 0.32703,
+            "36": 0.33052,
+            "37": 0.33413,
+            "38": 0.32701,
+            "39": 0.32816,
+            "40": 0.32555,
+            "41": 0.33676,
+            "42": 0.33367,
+            "43": 0.33748,
+            "44": 0.33125,
+            "45": 0.32793,
+            "46": 0.33387,
+            "47": 0.32628,
+            "48": 0.32993,
+            "49": 0.32747,
+            "50": 0.327
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 7b244eb8d53..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.86535,
-            "2": 10.85873,
-            "3": 10.86284,
-            "4": 10.84009,
-            "5": 10.87856,
-            "6": 10.88856,
-            "7": 10.86532,
-            "8": 10.86017,
-            "9": 10.8599,
-            "10": 10.82981,
-            "11": 10.8895,
-            "12": 10.8751,
-            "13": 10.87423,
-            "14": 10.89675,
-            "15": 10.82054,
-            "16": 10.82504,
-            "17": 10.78983,
-            "18": 10.81029,
-            "19": 10.80535,
-            "20": 10.70398,
-            "21": 10.66993,
-            "22": 10.50643,
-            "23": 10.69004,
-            "24": 10.56314,
-            "25": 10.4942,
-            "26": 10.56628,
-            "27": 10.58025,
-            "28": 10.51571,
-            "29": 10.55299,
-            "30": 10.30549,
-            "31": 10.02245,
-            "32": 10.40614,
-            "33": 10.39874,
-            "34": 10.13771,
-            "35": 10.20184,
-            "36": 10.16052,
-            "37": 10.28973,
-            "38": 10.11474,
-            "39": 10.361,
-            "40": 10.01903,
-            "41": 10.07292,
-            "42": 10.14698,
-            "43": 9.74687,
-            "44": 9.87766,
-            "45": 9.74966,
-            "46": 9.73383,
-            "47": 10.07535,
-            "48": 9.78068,
-            "49": 9.44784,
-            "50": 9.8399
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 653.0,
-            "2": 642.0,
-            "3": 630.0,
-            "4": 585.0,
-            "5": 635.0,
-            "6": 687.0,
-            "7": 615.0,
-            "8": 601.0,
-            "9": 607.0,
-            "10": 522.0,
-            "11": 637.0,
-            "12": 675.0,
-            "13": 649.0,
-            "14": 648.0,
-            "15": 640.0,
-            "16": 602.0,
-            "17": 668.0,
-            "18": 634.0,
-            "19": 593.0,
-            "20": 579.0,
-            "21": 633.0,
-            "22": 597.0,
-            "23": 756.0,
-            "24": 612.0,
-            "25": 591.0,
-            "26": 620.0,
-            "27": 700.0,
-            "28": 705.0,
-            "29": 795.0,
-            "30": 752.0,
-            "31": 628.0,
-            "32": 712.0,
-            "33": 752.0,
-            "34": 737.0,
-            "35": 741.0,
-            "36": 770.0,
-            "37": 861.0,
-            "38": 823.0,
-            "39": 812.0,
-            "40": 814.0,
-            "41": 826.0,
-            "42": 801.0,
-            "43": 769.0,
-            "44": 822.0,
-            "45": 777.0,
-            "46": 828.0,
-            "47": 878.0,
-            "48": 915.0,
-            "49": 908.0,
-            "50": 848.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 510689792.0,
-            "2": 510689792.0,
-            "3": 510689792.0,
-            "4": 510689792.0,
-            "5": 510689792.0,
-            "6": 510689792.0,
-            "7": 510689792.0,
-            "8": 510689792.0,
-            "9": 510689792.0,
-            "10": 510689792.0,
-            "11": 510689792.0,
-            "12": 510689792.0,
-            "13": 510689792.0,
-            "14": 510689792.0,
-            "15": 510689792.0,
-            "16": 510689792.0,
-            "17": 510689792.0,
-            "18": 510689792.0,
-            "19": 510689792.0,
-            "20": 510689792.0,
-            "21": 510689792.0,
-            "22": 510689792.0,
-            "23": 510689792.0,
-            "24": 510689792.0,
-            "25": 510689792.0,
-            "26": 510689792.0,
-            "27": 510689792.0,
-            "28": 510689792.0,
-            "29": 510689792.0,
-            "30": 510689792.0,
-            "31": 510689792.0,
-            "32": 510689792.0,
-            "33": 510689792.0,
-            "34": 510689792.0,
-            "35": 510689792.0,
-            "36": 510689792.0,
-            "37": 510689792.0,
-            "38": 510689792.0,
-            "39": 510689792.0,
-            "40": 510689792.0,
-            "41": 510689792.0,
-            "42": 510689792.0,
-            "43": 510689792.0,
-            "44": 510689792.0,
-            "45": 510689792.0,
-            "46": 510689792.0,
-            "47": 510689792.0,
-            "48": 510689792.0,
-            "49": 510689792.0,
-            "50": 510689792.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 759898624.0,
-            "2": 933156352.0,
-            "3": 933156352.0,
-            "4": 933156352.0,
-            "5": 933156352.0,
-            "6": 933156352.0,
-            "7": 933156352.0,
-            "8": 933156352.0,
-            "9": 933156352.0,
-            "10": 933156352.0,
-            "11": 933156352.0,
-            "12": 933156352.0,
-            "13": 933156352.0,
-            "14": 933156352.0,
-            "15": 933156352.0,
-            "16": 933156352.0,
-            "17": 933156352.0,
-            "18": 933156352.0,
-            "19": 933156352.0,
-            "20": 933156352.0,
-            "21": 933156352.0,
-            "22": 933156352.0,
-            "23": 933156352.0,
-            "24": 933156352.0,
-            "25": 933156352.0,
-            "26": 933156352.0,
-            "27": 933156352.0,
-            "28": 933156352.0,
-            "29": 933156352.0,
-            "30": 933156352.0,
-            "31": 933156352.0,
-            "32": 933156352.0,
-            "33": 933156352.0,
-            "34": 933156352.0,
-            "35": 933156352.0,
-            "36": 933156352.0,
-            "37": 933156352.0,
-            "38": 933156352.0,
-            "39": 933156352.0,
-            "40": 933156352.0,
-            "41": 933156352.0,
-            "42": 933156352.0,
-            "43": 933156352.0,
-            "44": 933156352.0,
-            "45": 933156352.0,
-            "46": 933156352.0,
-            "47": 933156352.0,
-            "48": 933156352.0,
-            "49": 933156352.0,
-            "50": 933156352.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 16.5499,
-            "2": 0.36629,
-            "3": 0.28373,
-            "4": 0.2889,
-            "5": 0.28714,
-            "6": 0.28308,
-            "7": 0.28631,
-            "8": 0.28716,
-            "9": 0.2827,
-            "10": 0.28014,
-            "11": 0.28458,
-            "12": 0.28337,
-            "13": 0.28673,
-            "14": 0.28763,
-            "15": 0.28453,
-            "16": 0.28536,
-            "17": 0.2915,
-            "18": 0.29241,
-            "19": 0.28738,
-            "20": 0.28157,
-            "21": 0.28725,
-            "22": 0.28594,
-            "23": 0.28463,
-            "24": 0.28697,
-            "25": 0.28822,
-            "26": 0.28636,
-            "27": 0.29484,
-            "28": 0.29612,
-            "29": 0.29284,
-            "30": 0.28832,
-            "31": 0.28707,
-            "32": 0.28946,
-            "33": 0.28737,
-            "34": 0.28546,
-            "35": 0.28437,
-            "36": 0.28751,
-            "37": 0.28834,
-            "38": 0.28784,
-            "39": 0.28871,
-            "40": 0.28919,
-            "41": 0.28543,
-            "42": 0.28646,
-            "43": 0.29593,
-            "44": 0.28978,
-            "45": 0.29038,
-            "46": 0.29126,
-            "47": 0.28667,
-            "48": 0.28881,
-            "49": 0.28809,
-            "50": 0.28744
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index 02b4683ea0b..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.86535,
-            "2": 10.85873,
-            "3": 10.86284,
-            "4": 10.84009,
-            "5": 10.87856,
-            "6": 10.88856,
-            "7": 10.86532,
-            "8": 10.86017,
-            "9": 10.8599,
-            "10": 10.82981,
-            "11": 10.8895,
-            "12": 10.8751,
-            "13": 10.87423,
-            "14": 10.89675,
-            "15": 10.82054,
-            "16": 10.82504,
-            "17": 10.78983,
-            "18": 10.81029,
-            "19": 10.80535,
-            "20": 10.70398,
-            "21": 10.66993,
-            "22": 10.50643,
-            "23": 10.69004,
-            "24": 10.56314,
-            "25": 10.4942,
-            "26": 10.56628,
-            "27": 10.58025,
-            "28": 10.51571,
-            "29": 10.55299,
-            "30": 10.30549,
-            "31": 10.02245,
-            "32": 10.40614,
-            "33": 10.39874,
-            "34": 10.13771,
-            "35": 10.20184,
-            "36": 10.16052,
-            "37": 10.28973,
-            "38": 10.11474,
-            "39": 10.361,
-            "40": 10.01903,
-            "41": 10.07292,
-            "42": 10.14698,
-            "43": 9.74687,
-            "44": 9.87766,
-            "45": 9.74966,
-            "46": 9.73383,
-            "47": 10.07535,
-            "48": 9.78068,
-            "49": 9.44784,
-            "50": 9.8399
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 653.0,
-            "2": 642.0,
-            "3": 630.0,
-            "4": 585.0,
-            "5": 635.0,
-            "6": 687.0,
-            "7": 615.0,
-            "8": 601.0,
-            "9": 607.0,
-            "10": 522.0,
-            "11": 637.0,
-            "12": 675.0,
-            "13": 649.0,
-            "14": 648.0,
-            "15": 640.0,
-            "16": 602.0,
-            "17": 668.0,
-            "18": 634.0,
-            "19": 593.0,
-            "20": 579.0,
-            "21": 633.0,
-            "22": 597.0,
-            "23": 756.0,
-            "24": 612.0,
-            "25": 591.0,
-            "26": 620.0,
-            "27": 700.0,
-            "28": 705.0,
-            "29": 795.0,
-            "30": 752.0,
-            "31": 628.0,
-            "32": 712.0,
-            "33": 752.0,
-            "34": 737.0,
-            "35": 741.0,
-            "36": 770.0,
-            "37": 861.0,
-            "38": 823.0,
-            "39": 812.0,
-            "40": 814.0,
-            "41": 826.0,
-            "42": 801.0,
-            "43": 769.0,
-            "44": 822.0,
-            "45": 777.0,
-            "46": 828.0,
-            "47": 878.0,
-            "48": 915.0,
-            "49": 908.0,
-            "50": 848.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 510689792.0,
-            "2": 510689792.0,
-            "3": 510689792.0,
-            "4": 510689792.0,
-            "5": 510689792.0,
-            "6": 510689792.0,
-            "7": 510689792.0,
-            "8": 510689792.0,
-            "9": 510689792.0,
-            "10": 510689792.0,
-            "11": 510689792.0,
-            "12": 510689792.0,
-            "13": 510689792.0,
-            "14": 510689792.0,
-            "15": 510689792.0,
-            "16": 510689792.0,
-            "17": 510689792.0,
-            "18": 510689792.0,
-            "19": 510689792.0,
-            "20": 510689792.0,
-            "21": 510689792.0,
-            "22": 510689792.0,
-            "23": 510689792.0,
-            "24": 510689792.0,
-            "25": 510689792.0,
-            "26": 510689792.0,
-            "27": 510689792.0,
-            "28": 510689792.0,
-            "29": 510689792.0,
-            "30": 510689792.0,
-            "31": 510689792.0,
-            "32": 510689792.0,
-            "33": 510689792.0,
-            "34": 510689792.0,
-            "35": 510689792.0,
-            "36": 510689792.0,
-            "37": 510689792.0,
-            "38": 510689792.0,
-            "39": 510689792.0,
-            "40": 510689792.0,
-            "41": 510689792.0,
-            "42": 510689792.0,
-            "43": 510689792.0,
-            "44": 510689792.0,
-            "45": 510689792.0,
-            "46": 510689792.0,
-            "47": 510689792.0,
-            "48": 510689792.0,
-            "49": 510689792.0,
-            "50": 510689792.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 757801472.0,
-            "2": 933156352.0,
-            "3": 933156352.0,
-            "4": 933156352.0,
-            "5": 933156352.0,
-            "6": 933156352.0,
-            "7": 933156352.0,
-            "8": 933156352.0,
-            "9": 933156352.0,
-            "10": 933156352.0,
-            "11": 933156352.0,
-            "12": 933156352.0,
-            "13": 933156352.0,
-            "14": 933156352.0,
-            "15": 933156352.0,
-            "16": 933156352.0,
-            "17": 933156352.0,
-            "18": 933156352.0,
-            "19": 933156352.0,
-            "20": 933156352.0,
-            "21": 933156352.0,
-            "22": 933156352.0,
-            "23": 933156352.0,
-            "24": 933156352.0,
-            "25": 933156352.0,
-            "26": 933156352.0,
-            "27": 933156352.0,
-            "28": 933156352.0,
-            "29": 933156352.0,
-            "30": 933156352.0,
-            "31": 933156352.0,
-            "32": 933156352.0,
-            "33": 933156352.0,
-            "34": 933156352.0,
-            "35": 933156352.0,
-            "36": 933156352.0,
-            "37": 933156352.0,
-            "38": 933156352.0,
-            "39": 933156352.0,
-            "40": 933156352.0,
-            "41": 933156352.0,
-            "42": 933156352.0,
-            "43": 933156352.0,
-            "44": 933156352.0,
-            "45": 933156352.0,
-            "46": 933156352.0,
-            "47": 933156352.0,
-            "48": 933156352.0,
-            "49": 933156352.0,
-            "50": 933156352.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 15.78036,
-            "2": 0.34723,
-            "3": 0.33492,
-            "4": 0.3292,
-            "5": 0.33036,
-            "6": 0.34971,
-            "7": 0.33848,
-            "8": 0.33262,
-            "9": 0.34028,
-            "10": 0.3518,
-            "11": 0.34239,
-            "12": 0.33211,
-            "13": 0.32961,
-            "14": 0.33263,
-            "15": 0.32808,
-            "16": 0.33152,
-            "17": 0.33313,
-            "18": 0.329,
-            "19": 0.3317,
-            "20": 0.33143,
-            "21": 0.34166,
-            "22": 0.33873,
-            "23": 0.34817,
-            "24": 0.3415,
-            "25": 0.34495,
-            "26": 0.32592,
-            "27": 0.32935,
-            "28": 0.33233,
-            "29": 0.328,
-            "30": 0.32746,
-            "31": 0.3275,
-            "32": 0.327,
-            "33": 0.32765,
-            "34": 0.32542,
-            "35": 0.32703,
-            "36": 0.33052,
-            "37": 0.33413,
-            "38": 0.32701,
-            "39": 0.32816,
-            "40": 0.32555,
-            "41": 0.33676,
-            "42": 0.33367,
-            "43": 0.33748,
-            "44": 0.33125,
-            "45": 0.32793,
-            "46": 0.33387,
-            "47": 0.32628,
-            "48": 0.32993,
-            "49": 0.32747,
-            "50": 0.327
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss/golden_values_dev_dgx_h100.json
index c8c73bdbafc..f2adbef4530 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss/golden_values_dev_dgx_h100.json
@@ -175,7 +175,7 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 759895552.0,
+            "1": 759898624.0,
             "2": 933156352.0,
             "3": 933156352.0,
             "4": 933156352.0,
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 16.50426,
-            "2": 0.36653,
-            "3": 0.34466,
-            "4": 0.34777,
-            "5": 0.33341,
-            "6": 0.3232,
-            "7": 0.32752,
-            "8": 0.32335,
-            "9": 0.32468,
-            "10": 0.32504,
-            "11": 0.32396,
-            "12": 0.32512,
-            "13": 0.32567,
-            "14": 0.32353,
-            "15": 0.31982,
-            "16": 0.3257,
-            "17": 0.32525,
-            "18": 0.32037,
-            "19": 0.32059,
-            "20": 0.32739,
-            "21": 0.32382,
-            "22": 0.32191,
-            "23": 0.3644,
-            "24": 0.35527,
-            "25": 0.32169,
-            "26": 0.3265,
-            "27": 0.3207,
-            "28": 0.31972,
-            "29": 0.32327,
-            "30": 0.31924,
-            "31": 0.32108,
-            "32": 0.32626,
-            "33": 0.31775,
-            "34": 0.31872,
-            "35": 0.32546,
-            "36": 0.317,
-            "37": 0.31972,
-            "38": 0.32263,
-            "39": 0.32037,
-            "40": 0.32326,
-            "41": 0.32505,
-            "42": 0.3215,
-            "43": 0.31898,
-            "44": 0.32895,
-            "45": 0.32343,
-            "46": 0.3229,
-            "47": 0.32813,
-            "48": 0.32454,
-            "49": 0.31943,
-            "50": 0.32434
+            "1": 16.72434,
+            "2": 0.40342,
+            "3": 0.32477,
+            "4": 0.32459,
+            "5": 0.32511,
+            "6": 0.32478,
+            "7": 0.32469,
+            "8": 0.32479,
+            "9": 0.32229,
+            "10": 0.32534,
+            "11": 0.32568,
+            "12": 0.32325,
+            "13": 0.3234,
+            "14": 0.32735,
+            "15": 0.32264,
+            "16": 0.32664,
+            "17": 0.32289,
+            "18": 0.32328,
+            "19": 0.32997,
+            "20": 0.32955,
+            "21": 0.32699,
+            "22": 0.3292,
+            "23": 0.32982,
+            "24": 0.32452,
+            "25": 0.32644,
+            "26": 0.32596,
+            "27": 0.32426,
+            "28": 0.32527,
+            "29": 0.32409,
+            "30": 0.32549,
+            "31": 0.32259,
+            "32": 0.32488,
+            "33": 0.32331,
+            "34": 0.3242,
+            "35": 0.3261,
+            "36": 0.32048,
+            "37": 0.32127,
+            "38": 0.32479,
+            "39": 0.32338,
+            "40": 0.32137,
+            "41": 0.32292,
+            "42": 0.32202,
+            "43": 0.32321,
+            "44": 0.32105,
+            "45": 0.32265,
+            "46": 0.32148,
+            "47": 0.32443,
+            "48": 0.32158,
+            "49": 0.32089,
+            "50": 0.32389
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 88252ac05b0..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.86535,
-            "2": 10.85873,
-            "3": 10.86284,
-            "4": 10.84009,
-            "5": 10.87856,
-            "6": 10.88856,
-            "7": 10.86532,
-            "8": 10.86017,
-            "9": 10.8599,
-            "10": 10.82981,
-            "11": 10.8895,
-            "12": 10.8751,
-            "13": 10.87423,
-            "14": 10.89675,
-            "15": 10.82054,
-            "16": 10.82504,
-            "17": 10.78983,
-            "18": 10.81029,
-            "19": 10.80535,
-            "20": 10.70398,
-            "21": 10.66993,
-            "22": 10.50643,
-            "23": 10.69004,
-            "24": 10.56314,
-            "25": 10.4942,
-            "26": 10.56628,
-            "27": 10.58025,
-            "28": 10.51571,
-            "29": 10.55299,
-            "30": 10.30549,
-            "31": 10.02245,
-            "32": 10.40614,
-            "33": 10.39874,
-            "34": 10.13771,
-            "35": 10.20184,
-            "36": 10.16052,
-            "37": 10.28973,
-            "38": 10.11474,
-            "39": 10.361,
-            "40": 10.01903,
-            "41": 10.07292,
-            "42": 10.14698,
-            "43": 9.74687,
-            "44": 9.87766,
-            "45": 9.74966,
-            "46": 9.73383,
-            "47": 10.07535,
-            "48": 9.78068,
-            "49": 9.44784,
-            "50": 9.8399
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 653.0,
-            "2": 642.0,
-            "3": 630.0,
-            "4": 585.0,
-            "5": 635.0,
-            "6": 687.0,
-            "7": 615.0,
-            "8": 601.0,
-            "9": 607.0,
-            "10": 522.0,
-            "11": 637.0,
-            "12": 675.0,
-            "13": 649.0,
-            "14": 648.0,
-            "15": 640.0,
-            "16": 602.0,
-            "17": 668.0,
-            "18": 634.0,
-            "19": 593.0,
-            "20": 579.0,
-            "21": 633.0,
-            "22": 597.0,
-            "23": 756.0,
-            "24": 612.0,
-            "25": 591.0,
-            "26": 620.0,
-            "27": 700.0,
-            "28": 705.0,
-            "29": 795.0,
-            "30": 752.0,
-            "31": 628.0,
-            "32": 712.0,
-            "33": 752.0,
-            "34": 737.0,
-            "35": 741.0,
-            "36": 770.0,
-            "37": 861.0,
-            "38": 823.0,
-            "39": 812.0,
-            "40": 814.0,
-            "41": 826.0,
-            "42": 801.0,
-            "43": 769.0,
-            "44": 822.0,
-            "45": 777.0,
-            "46": 828.0,
-            "47": 878.0,
-            "48": 915.0,
-            "49": 908.0,
-            "50": 848.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 510689792.0,
-            "2": 510689792.0,
-            "3": 510689792.0,
-            "4": 510689792.0,
-            "5": 510689792.0,
-            "6": 510689792.0,
-            "7": 510689792.0,
-            "8": 510689792.0,
-            "9": 510689792.0,
-            "10": 510689792.0,
-            "11": 510689792.0,
-            "12": 510689792.0,
-            "13": 510689792.0,
-            "14": 510689792.0,
-            "15": 510689792.0,
-            "16": 510689792.0,
-            "17": 510689792.0,
-            "18": 510689792.0,
-            "19": 510689792.0,
-            "20": 510689792.0,
-            "21": 510689792.0,
-            "22": 510689792.0,
-            "23": 510689792.0,
-            "24": 510689792.0,
-            "25": 510689792.0,
-            "26": 510689792.0,
-            "27": 510689792.0,
-            "28": 510689792.0,
-            "29": 510689792.0,
-            "30": 510689792.0,
-            "31": 510689792.0,
-            "32": 510689792.0,
-            "33": 510689792.0,
-            "34": 510689792.0,
-            "35": 510689792.0,
-            "36": 510689792.0,
-            "37": 510689792.0,
-            "38": 510689792.0,
-            "39": 510689792.0,
-            "40": 510689792.0,
-            "41": 510689792.0,
-            "42": 510689792.0,
-            "43": 510689792.0,
-            "44": 510689792.0,
-            "45": 510689792.0,
-            "46": 510689792.0,
-            "47": 510689792.0,
-            "48": 510689792.0,
-            "49": 510689792.0,
-            "50": 510689792.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 759898624.0,
-            "2": 933156352.0,
-            "3": 933156352.0,
-            "4": 933156352.0,
-            "5": 933156352.0,
-            "6": 933156352.0,
-            "7": 933156352.0,
-            "8": 933156352.0,
-            "9": 933156352.0,
-            "10": 933156352.0,
-            "11": 933156352.0,
-            "12": 933156352.0,
-            "13": 933156352.0,
-            "14": 933156352.0,
-            "15": 933156352.0,
-            "16": 933156352.0,
-            "17": 933156352.0,
-            "18": 933156352.0,
-            "19": 933156352.0,
-            "20": 933156352.0,
-            "21": 933156352.0,
-            "22": 933156352.0,
-            "23": 933156352.0,
-            "24": 933156352.0,
-            "25": 933156352.0,
-            "26": 933156352.0,
-            "27": 933156352.0,
-            "28": 933156352.0,
-            "29": 933156352.0,
-            "30": 933156352.0,
-            "31": 933156352.0,
-            "32": 933156352.0,
-            "33": 933156352.0,
-            "34": 933156352.0,
-            "35": 933156352.0,
-            "36": 933156352.0,
-            "37": 933156352.0,
-            "38": 933156352.0,
-            "39": 933156352.0,
-            "40": 933156352.0,
-            "41": 933156352.0,
-            "42": 933156352.0,
-            "43": 933156352.0,
-            "44": 933156352.0,
-            "45": 933156352.0,
-            "46": 933156352.0,
-            "47": 933156352.0,
-            "48": 933156352.0,
-            "49": 933156352.0,
-            "50": 933156352.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 17.48733,
-            "2": 0.32636,
-            "3": 0.28113,
-            "4": 0.28069,
-            "5": 0.28063,
-            "6": 0.28085,
-            "7": 0.27912,
-            "8": 0.27833,
-            "9": 0.27983,
-            "10": 0.28235,
-            "11": 0.28033,
-            "12": 0.27634,
-            "13": 0.27743,
-            "14": 0.27968,
-            "15": 0.27741,
-            "16": 0.27901,
-            "17": 0.27898,
-            "18": 0.28259,
-            "19": 0.27738,
-            "20": 0.27602,
-            "21": 0.27999,
-            "22": 0.27615,
-            "23": 0.27868,
-            "24": 0.27928,
-            "25": 0.27684,
-            "26": 0.27875,
-            "27": 0.27628,
-            "28": 0.28571,
-            "29": 0.27681,
-            "30": 0.28404,
-            "31": 0.28086,
-            "32": 0.28479,
-            "33": 0.28538,
-            "34": 0.28086,
-            "35": 0.28036,
-            "36": 0.28227,
-            "37": 0.28585,
-            "38": 0.28963,
-            "39": 0.28114,
-            "40": 0.28277,
-            "41": 0.28191,
-            "42": 0.28102,
-            "43": 0.29373,
-            "44": 0.2876,
-            "45": 0.27991,
-            "46": 0.27977,
-            "47": 0.28135,
-            "48": 0.28282,
-            "49": 0.28275,
-            "50": 0.28218
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index f2adbef4530..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.86535,
-            "2": 10.85873,
-            "3": 10.86284,
-            "4": 10.84009,
-            "5": 10.87856,
-            "6": 10.88856,
-            "7": 10.86532,
-            "8": 10.86017,
-            "9": 10.8599,
-            "10": 10.82981,
-            "11": 10.8895,
-            "12": 10.8751,
-            "13": 10.87423,
-            "14": 10.89675,
-            "15": 10.82054,
-            "16": 10.82504,
-            "17": 10.78983,
-            "18": 10.81029,
-            "19": 10.80535,
-            "20": 10.70398,
-            "21": 10.66993,
-            "22": 10.50643,
-            "23": 10.69004,
-            "24": 10.56314,
-            "25": 10.4942,
-            "26": 10.56628,
-            "27": 10.58025,
-            "28": 10.51571,
-            "29": 10.55299,
-            "30": 10.30549,
-            "31": 10.02245,
-            "32": 10.40614,
-            "33": 10.39874,
-            "34": 10.13771,
-            "35": 10.20184,
-            "36": 10.16052,
-            "37": 10.28973,
-            "38": 10.11474,
-            "39": 10.361,
-            "40": 10.01903,
-            "41": 10.07292,
-            "42": 10.14698,
-            "43": 9.74687,
-            "44": 9.87766,
-            "45": 9.74966,
-            "46": 9.73383,
-            "47": 10.07535,
-            "48": 9.78068,
-            "49": 9.44784,
-            "50": 9.8399
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 653.0,
-            "2": 642.0,
-            "3": 630.0,
-            "4": 585.0,
-            "5": 635.0,
-            "6": 687.0,
-            "7": 615.0,
-            "8": 601.0,
-            "9": 607.0,
-            "10": 522.0,
-            "11": 637.0,
-            "12": 675.0,
-            "13": 649.0,
-            "14": 648.0,
-            "15": 640.0,
-            "16": 602.0,
-            "17": 668.0,
-            "18": 634.0,
-            "19": 593.0,
-            "20": 579.0,
-            "21": 633.0,
-            "22": 597.0,
-            "23": 756.0,
-            "24": 612.0,
-            "25": 591.0,
-            "26": 620.0,
-            "27": 700.0,
-            "28": 705.0,
-            "29": 795.0,
-            "30": 752.0,
-            "31": 628.0,
-            "32": 712.0,
-            "33": 752.0,
-            "34": 737.0,
-            "35": 741.0,
-            "36": 770.0,
-            "37": 861.0,
-            "38": 823.0,
-            "39": 812.0,
-            "40": 814.0,
-            "41": 826.0,
-            "42": 801.0,
-            "43": 769.0,
-            "44": 822.0,
-            "45": 777.0,
-            "46": 828.0,
-            "47": 878.0,
-            "48": 915.0,
-            "49": 908.0,
-            "50": 848.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 510689792.0,
-            "2": 510689792.0,
-            "3": 510689792.0,
-            "4": 510689792.0,
-            "5": 510689792.0,
-            "6": 510689792.0,
-            "7": 510689792.0,
-            "8": 510689792.0,
-            "9": 510689792.0,
-            "10": 510689792.0,
-            "11": 510689792.0,
-            "12": 510689792.0,
-            "13": 510689792.0,
-            "14": 510689792.0,
-            "15": 510689792.0,
-            "16": 510689792.0,
-            "17": 510689792.0,
-            "18": 510689792.0,
-            "19": 510689792.0,
-            "20": 510689792.0,
-            "21": 510689792.0,
-            "22": 510689792.0,
-            "23": 510689792.0,
-            "24": 510689792.0,
-            "25": 510689792.0,
-            "26": 510689792.0,
-            "27": 510689792.0,
-            "28": 510689792.0,
-            "29": 510689792.0,
-            "30": 510689792.0,
-            "31": 510689792.0,
-            "32": 510689792.0,
-            "33": 510689792.0,
-            "34": 510689792.0,
-            "35": 510689792.0,
-            "36": 510689792.0,
-            "37": 510689792.0,
-            "38": 510689792.0,
-            "39": 510689792.0,
-            "40": 510689792.0,
-            "41": 510689792.0,
-            "42": 510689792.0,
-            "43": 510689792.0,
-            "44": 510689792.0,
-            "45": 510689792.0,
-            "46": 510689792.0,
-            "47": 510689792.0,
-            "48": 510689792.0,
-            "49": 510689792.0,
-            "50": 510689792.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 759898624.0,
-            "2": 933156352.0,
-            "3": 933156352.0,
-            "4": 933156352.0,
-            "5": 933156352.0,
-            "6": 933156352.0,
-            "7": 933156352.0,
-            "8": 933156352.0,
-            "9": 933156352.0,
-            "10": 933156352.0,
-            "11": 933156352.0,
-            "12": 933156352.0,
-            "13": 933156352.0,
-            "14": 933156352.0,
-            "15": 933156352.0,
-            "16": 933156352.0,
-            "17": 933156352.0,
-            "18": 933156352.0,
-            "19": 933156352.0,
-            "20": 933156352.0,
-            "21": 933156352.0,
-            "22": 933156352.0,
-            "23": 933156352.0,
-            "24": 933156352.0,
-            "25": 933156352.0,
-            "26": 933156352.0,
-            "27": 933156352.0,
-            "28": 933156352.0,
-            "29": 933156352.0,
-            "30": 933156352.0,
-            "31": 933156352.0,
-            "32": 933156352.0,
-            "33": 933156352.0,
-            "34": 933156352.0,
-            "35": 933156352.0,
-            "36": 933156352.0,
-            "37": 933156352.0,
-            "38": 933156352.0,
-            "39": 933156352.0,
-            "40": 933156352.0,
-            "41": 933156352.0,
-            "42": 933156352.0,
-            "43": 933156352.0,
-            "44": 933156352.0,
-            "45": 933156352.0,
-            "46": 933156352.0,
-            "47": 933156352.0,
-            "48": 933156352.0,
-            "49": 933156352.0,
-            "50": 933156352.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 16.72434,
-            "2": 0.40342,
-            "3": 0.32477,
-            "4": 0.32459,
-            "5": 0.32511,
-            "6": 0.32478,
-            "7": 0.32469,
-            "8": 0.32479,
-            "9": 0.32229,
-            "10": 0.32534,
-            "11": 0.32568,
-            "12": 0.32325,
-            "13": 0.3234,
-            "14": 0.32735,
-            "15": 0.32264,
-            "16": 0.32664,
-            "17": 0.32289,
-            "18": 0.32328,
-            "19": 0.32997,
-            "20": 0.32955,
-            "21": 0.32699,
-            "22": 0.3292,
-            "23": 0.32982,
-            "24": 0.32452,
-            "25": 0.32644,
-            "26": 0.32596,
-            "27": 0.32426,
-            "28": 0.32527,
-            "29": 0.32409,
-            "30": 0.32549,
-            "31": 0.32259,
-            "32": 0.32488,
-            "33": 0.32331,
-            "34": 0.3242,
-            "35": 0.3261,
-            "36": 0.32048,
-            "37": 0.32127,
-            "38": 0.32479,
-            "39": 0.32338,
-            "40": 0.32137,
-            "41": 0.32292,
-            "42": 0.32202,
-            "43": 0.32321,
-            "44": 0.32105,
-            "45": 0.32265,
-            "46": 0.32148,
-            "47": 0.32443,
-            "48": 0.32158,
-            "49": 0.32089,
-            "50": 0.32389
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic/golden_values_dev_dgx_h100.json
index 67aa60490cf..a74ab8d8415 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic/golden_values_dev_dgx_h100.json
@@ -6,53 +6,53 @@
         "values": {
             "1": 10.86535,
             "2": 10.85873,
-            "3": 10.86284,
-            "4": 10.84007,
+            "3": 10.86281,
+            "4": 10.84011,
             "5": 10.87855,
-            "6": 10.88852,
-            "7": 10.86534,
-            "8": 10.86018,
-            "9": 10.85988,
-            "10": 10.8298,
-            "11": 10.88947,
-            "12": 10.87509,
-            "13": 10.87426,
-            "14": 10.89675,
-            "15": 10.82058,
-            "16": 10.82501,
-            "17": 10.78981,
-            "18": 10.81029,
-            "19": 10.80531,
-            "20": 10.70396,
-            "21": 10.66991,
-            "22": 10.5064,
-            "23": 10.69006,
-            "24": 10.56312,
-            "25": 10.49419,
-            "26": 10.56627,
+            "6": 10.88849,
+            "7": 10.86536,
+            "8": 10.86016,
+            "9": 10.85987,
+            "10": 10.82979,
+            "11": 10.88946,
+            "12": 10.87508,
+            "13": 10.87423,
+            "14": 10.89679,
+            "15": 10.82052,
+            "16": 10.825,
+            "17": 10.78984,
+            "18": 10.81026,
+            "19": 10.80535,
+            "20": 10.70395,
+            "21": 10.66988,
+            "22": 10.50641,
+            "23": 10.69004,
+            "24": 10.56309,
+            "25": 10.49417,
+            "26": 10.56626,
             "27": 10.58024,
-            "28": 10.51573,
-            "29": 10.55298,
-            "30": 10.30548,
-            "31": 10.02248,
-            "32": 10.40615,
-            "33": 10.39876,
-            "34": 10.13771,
-            "35": 10.20187,
-            "36": 10.16047,
+            "28": 10.51572,
+            "29": 10.55294,
+            "30": 10.30552,
+            "31": 10.02243,
+            "32": 10.40616,
+            "33": 10.39875,
+            "34": 10.13772,
+            "35": 10.20189,
+            "36": 10.16048,
             "37": 10.28972,
-            "38": 10.11475,
-            "39": 10.36102,
-            "40": 10.01904,
-            "41": 10.07293,
-            "42": 10.14696,
-            "43": 9.74687,
-            "44": 9.87765,
+            "38": 10.11479,
+            "39": 10.361,
+            "40": 10.01902,
+            "41": 10.07292,
+            "42": 10.14694,
+            "43": 9.74686,
+            "44": 9.87768,
             "45": 9.74966,
-            "46": 9.73379,
-            "47": 10.07533,
-            "48": 9.78071,
-            "49": 9.44786,
+            "46": 9.7338,
+            "47": 10.07535,
+            "48": 9.7807,
+            "49": 9.44783,
             "50": 9.83991
         }
     },
@@ -61,56 +61,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 594.0,
-            "2": 641.0,
-            "3": 677.0,
-            "4": 648.0,
-            "5": 645.0,
-            "6": 681.0,
-            "7": 639.0,
-            "8": 590.0,
-            "9": 648.0,
+            "1": 600.0,
+            "2": 620.0,
+            "3": 606.0,
+            "4": 684.0,
+            "5": 647.0,
+            "6": 679.0,
+            "7": 630.0,
+            "8": 568.0,
+            "9": 627.0,
             "10": 519.0,
-            "11": 703.0,
-            "12": 589.0,
-            "13": 650.0,
-            "14": 706.0,
-            "15": 675.0,
-            "16": 652.0,
-            "17": 685.0,
-            "18": 596.0,
-            "19": 672.0,
-            "20": 667.0,
-            "21": 650.0,
-            "22": 656.0,
-            "23": 706.0,
-            "24": 595.0,
-            "25": 593.0,
-            "26": 595.0,
-            "27": 685.0,
-            "28": 756.0,
-            "29": 674.0,
-            "30": 743.0,
-            "31": 612.0,
-            "32": 723.0,
-            "33": 778.0,
-            "34": 695.0,
-            "35": 716.0,
-            "36": 683.0,
-            "37": 805.0,
-            "38": 756.0,
-            "39": 850.0,
-            "40": 822.0,
-            "41": 870.0,
-            "42": 767.0,
-            "43": 747.0,
-            "44": 798.0,
-            "45": 782.0,
-            "46": 891.0,
-            "47": 887.0,
-            "48": 898.0,
+            "11": 635.0,
+            "12": 640.0,
+            "13": 677.0,
+            "14": 631.0,
+            "15": 668.0,
+            "16": 666.0,
+            "17": 671.0,
+            "18": 623.0,
+            "19": 658.0,
+            "20": 639.0,
+            "21": 624.0,
+            "22": 614.0,
+            "23": 741.0,
+            "24": 607.0,
+            "25": 636.0,
+            "26": 639.0,
+            "27": 689.0,
+            "28": 751.0,
+            "29": 724.0,
+            "30": 771.0,
+            "31": 564.0,
+            "32": 750.0,
+            "33": 765.0,
+            "34": 693.0,
+            "35": 737.0,
+            "36": 754.0,
+            "37": 807.0,
+            "38": 786.0,
+            "39": 879.0,
+            "40": 737.0,
+            "41": 817.0,
+            "42": 857.0,
+            "43": 709.0,
+            "44": 808.0,
+            "45": 795.0,
+            "46": 837.0,
+            "47": 879.0,
+            "48": 899.0,
             "49": 890.0,
-            "50": 881.0
+            "50": 860.0
         }
     },
     "mem-allocated-bytes": {
@@ -198,33 +198,33 @@
             "21": 933156352.0,
             "22": 933156352.0,
             "23": 933156352.0,
-            "24": 933156352.0,
-            "25": 933156352.0,
-            "26": 933156352.0,
-            "27": 933156352.0,
-            "28": 933156352.0,
-            "29": 933156352.0,
-            "30": 933156352.0,
-            "31": 933156352.0,
-            "32": 933156352.0,
-            "33": 933156352.0,
-            "34": 933156352.0,
-            "35": 933156352.0,
-            "36": 933156352.0,
-            "37": 933156352.0,
-            "38": 933156352.0,
-            "39": 933156352.0,
-            "40": 933156352.0,
-            "41": 933156352.0,
-            "42": 933156352.0,
-            "43": 933156352.0,
-            "44": 933156352.0,
-            "45": 933156352.0,
-            "46": 933156352.0,
-            "47": 933156352.0,
-            "48": 933156352.0,
-            "49": 933156352.0,
-            "50": 933156352.0
+            "24": 934204928.0,
+            "25": 934204928.0,
+            "26": 934204928.0,
+            "27": 934204928.0,
+            "28": 934204928.0,
+            "29": 934204928.0,
+            "30": 934204928.0,
+            "31": 934204928.0,
+            "32": 934204928.0,
+            "33": 934204928.0,
+            "34": 934204928.0,
+            "35": 934204928.0,
+            "36": 934204928.0,
+            "37": 934204928.0,
+            "38": 934204928.0,
+            "39": 934204928.0,
+            "40": 934204928.0,
+            "41": 934204928.0,
+            "42": 934204928.0,
+            "43": 934204928.0,
+            "44": 934204928.0,
+            "45": 934204928.0,
+            "46": 934204928.0,
+            "47": 934204928.0,
+            "48": 934204928.0,
+            "49": 934204928.0,
+            "50": 934204928.0
         }
     },
     "iteration-time": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 16.5651,
-            "2": 0.34314,
-            "3": 0.32308,
-            "4": 0.32445,
-            "5": 0.33098,
-            "6": 0.32202,
-            "7": 0.32251,
-            "8": 0.32355,
-            "9": 0.32346,
-            "10": 0.31687,
-            "11": 0.32105,
-            "12": 0.32381,
-            "13": 0.32098,
-            "14": 0.32322,
-            "15": 0.31579,
-            "16": 0.31699,
-            "17": 0.32307,
-            "18": 0.32662,
-            "19": 0.33548,
-            "20": 0.32088,
-            "21": 0.32691,
-            "22": 0.32206,
-            "23": 0.32261,
-            "24": 0.32621,
-            "25": 0.32403,
-            "26": 0.32368,
-            "27": 0.32665,
-            "28": 0.32924,
-            "29": 0.32322,
-            "30": 0.32903,
-            "31": 0.32199,
-            "32": 0.32034,
-            "33": 0.32453,
-            "34": 0.32691,
-            "35": 0.32014,
-            "36": 0.3206,
-            "37": 0.31874,
-            "38": 0.32448,
-            "39": 0.32813,
-            "40": 0.32242,
-            "41": 0.32196,
-            "42": 0.32843,
-            "43": 0.32328,
-            "44": 0.32049,
-            "45": 0.3265,
-            "46": 0.31996,
-            "47": 0.32173,
-            "48": 0.323,
-            "49": 0.32398,
-            "50": 0.3329
+            "1": 16.61636,
+            "2": 0.35255,
+            "3": 0.33784,
+            "4": 0.33448,
+            "5": 0.33388,
+            "6": 0.33362,
+            "7": 0.33399,
+            "8": 0.33377,
+            "9": 0.3345,
+            "10": 0.33436,
+            "11": 0.33616,
+            "12": 0.33216,
+            "13": 0.32717,
+            "14": 0.3285,
+            "15": 0.31893,
+            "16": 0.32207,
+            "17": 0.32068,
+            "18": 0.3232,
+            "19": 0.31799,
+            "20": 0.32295,
+            "21": 0.32148,
+            "22": 0.3312,
+            "23": 0.33388,
+            "24": 0.33493,
+            "25": 0.33793,
+            "26": 0.33838,
+            "27": 0.33827,
+            "28": 0.34,
+            "29": 0.33074,
+            "30": 0.32608,
+            "31": 0.32629,
+            "32": 0.3285,
+            "33": 0.32776,
+            "34": 0.32575,
+            "35": 0.32648,
+            "36": 0.3252,
+            "37": 0.32697,
+            "38": 0.33001,
+            "39": 0.3354,
+            "40": 0.33513,
+            "41": 0.33447,
+            "42": 0.3352,
+            "43": 0.33163,
+            "44": 0.32495,
+            "45": 0.32668,
+            "46": 0.32429,
+            "47": 0.32917,
+            "48": 0.32614,
+            "49": 0.32637,
+            "50": 0.32702
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 303a87c0069..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.86535,
-            "2": 10.85873,
-            "3": 10.86281,
-            "4": 10.8401,
-            "5": 10.87858,
-            "6": 10.88853,
-            "7": 10.86535,
-            "8": 10.86017,
-            "9": 10.8599,
-            "10": 10.82979,
-            "11": 10.88945,
-            "12": 10.87509,
-            "13": 10.87423,
-            "14": 10.89675,
-            "15": 10.8205,
-            "16": 10.825,
-            "17": 10.78982,
-            "18": 10.81028,
-            "19": 10.80532,
-            "20": 10.70394,
-            "21": 10.66988,
-            "22": 10.50642,
-            "23": 10.69005,
-            "24": 10.56311,
-            "25": 10.49417,
-            "26": 10.56628,
-            "27": 10.58023,
-            "28": 10.5157,
-            "29": 10.55296,
-            "30": 10.30548,
-            "31": 10.02248,
-            "32": 10.40617,
-            "33": 10.39875,
-            "34": 10.13774,
-            "35": 10.20186,
-            "36": 10.16048,
-            "37": 10.28974,
-            "38": 10.1148,
-            "39": 10.36104,
-            "40": 10.01904,
-            "41": 10.07288,
-            "42": 10.14695,
-            "43": 9.74684,
-            "44": 9.87761,
-            "45": 9.74967,
-            "46": 9.73383,
-            "47": 10.07539,
-            "48": 9.78069,
-            "49": 9.44781,
-            "50": 9.83988
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 593.0,
-            "2": 628.0,
-            "3": 611.0,
-            "4": 628.0,
-            "5": 651.0,
-            "6": 650.0,
-            "7": 630.0,
-            "8": 551.0,
-            "9": 708.0,
-            "10": 508.0,
-            "11": 656.0,
-            "12": 633.0,
-            "13": 683.0,
-            "14": 683.0,
-            "15": 633.0,
-            "16": 614.0,
-            "17": 628.0,
-            "18": 626.0,
-            "19": 574.0,
-            "20": 620.0,
-            "21": 684.0,
-            "22": 598.0,
-            "23": 752.0,
-            "24": 593.0,
-            "25": 549.0,
-            "26": 607.0,
-            "27": 661.0,
-            "28": 739.0,
-            "29": 699.0,
-            "30": 728.0,
-            "31": 571.0,
-            "32": 695.0,
-            "33": 761.0,
-            "34": 670.0,
-            "35": 708.0,
-            "36": 677.0,
-            "37": 861.0,
-            "38": 768.0,
-            "39": 836.0,
-            "40": 789.0,
-            "41": 818.0,
-            "42": 853.0,
-            "43": 774.0,
-            "44": 800.0,
-            "45": 743.0,
-            "46": 832.0,
-            "47": 902.0,
-            "48": 827.0,
-            "49": 914.0,
-            "50": 878.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 510689792.0,
-            "2": 510689792.0,
-            "3": 510689792.0,
-            "4": 510689792.0,
-            "5": 510689792.0,
-            "6": 510689792.0,
-            "7": 510689792.0,
-            "8": 510689792.0,
-            "9": 510689792.0,
-            "10": 510689792.0,
-            "11": 510689792.0,
-            "12": 510689792.0,
-            "13": 510689792.0,
-            "14": 510689792.0,
-            "15": 510689792.0,
-            "16": 510689792.0,
-            "17": 510689792.0,
-            "18": 510689792.0,
-            "19": 510689792.0,
-            "20": 510689792.0,
-            "21": 510689792.0,
-            "22": 510689792.0,
-            "23": 510689792.0,
-            "24": 510689792.0,
-            "25": 510689792.0,
-            "26": 510689792.0,
-            "27": 510689792.0,
-            "28": 510689792.0,
-            "29": 510689792.0,
-            "30": 510689792.0,
-            "31": 510689792.0,
-            "32": 510689792.0,
-            "33": 510689792.0,
-            "34": 510689792.0,
-            "35": 510689792.0,
-            "36": 510689792.0,
-            "37": 510689792.0,
-            "38": 510689792.0,
-            "39": 510689792.0,
-            "40": 510689792.0,
-            "41": 510689792.0,
-            "42": 510689792.0,
-            "43": 510689792.0,
-            "44": 510689792.0,
-            "45": 510689792.0,
-            "46": 510689792.0,
-            "47": 510689792.0,
-            "48": 510689792.0,
-            "49": 510689792.0,
-            "50": 510689792.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 759895552.0,
-            "2": 933156352.0,
-            "3": 933156352.0,
-            "4": 933156352.0,
-            "5": 933156352.0,
-            "6": 933156352.0,
-            "7": 933156352.0,
-            "8": 933156352.0,
-            "9": 933156352.0,
-            "10": 933156352.0,
-            "11": 933156352.0,
-            "12": 933156352.0,
-            "13": 933156352.0,
-            "14": 933156352.0,
-            "15": 933156352.0,
-            "16": 933156352.0,
-            "17": 933156352.0,
-            "18": 933156352.0,
-            "19": 933156352.0,
-            "20": 933156352.0,
-            "21": 933156352.0,
-            "22": 933156352.0,
-            "23": 933156352.0,
-            "24": 933156352.0,
-            "25": 933156352.0,
-            "26": 933156352.0,
-            "27": 933156352.0,
-            "28": 933156352.0,
-            "29": 933156352.0,
-            "30": 933156352.0,
-            "31": 933156352.0,
-            "32": 933156352.0,
-            "33": 933156352.0,
-            "34": 933156352.0,
-            "35": 933156352.0,
-            "36": 933156352.0,
-            "37": 933156352.0,
-            "38": 933156352.0,
-            "39": 933156352.0,
-            "40": 933156352.0,
-            "41": 933156352.0,
-            "42": 933156352.0,
-            "43": 933156352.0,
-            "44": 933156352.0,
-            "45": 933156352.0,
-            "46": 933156352.0,
-            "47": 933156352.0,
-            "48": 933156352.0,
-            "49": 933156352.0,
-            "50": 933156352.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 14.98198,
-            "2": 0.32508,
-            "3": 0.27859,
-            "4": 0.28973,
-            "5": 0.28871,
-            "6": 0.28743,
-            "7": 0.28586,
-            "8": 0.28626,
-            "9": 0.28734,
-            "10": 0.28834,
-            "11": 0.29037,
-            "12": 0.29031,
-            "13": 0.27847,
-            "14": 0.28002,
-            "15": 0.28617,
-            "16": 0.28603,
-            "17": 0.28309,
-            "18": 0.28753,
-            "19": 0.34589,
-            "20": 0.28022,
-            "21": 0.28261,
-            "22": 0.28865,
-            "23": 0.28869,
-            "24": 0.2851,
-            "25": 0.28458,
-            "26": 0.28706,
-            "27": 0.28515,
-            "28": 0.29088,
-            "29": 0.28891,
-            "30": 0.28446,
-            "31": 0.28444,
-            "32": 0.28347,
-            "33": 0.28941,
-            "34": 0.28783,
-            "35": 0.28386,
-            "36": 0.28238,
-            "37": 0.28325,
-            "38": 0.28579,
-            "39": 0.29406,
-            "40": 0.28819,
-            "41": 0.29033,
-            "42": 0.28815,
-            "43": 0.2919,
-            "44": 0.2895,
-            "45": 0.28613,
-            "46": 0.28704,
-            "47": 0.29081,
-            "48": 0.29057,
-            "49": 0.2897,
-            "50": 0.28865
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index a74ab8d8415..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.86535,
-            "2": 10.85873,
-            "3": 10.86281,
-            "4": 10.84011,
-            "5": 10.87855,
-            "6": 10.88849,
-            "7": 10.86536,
-            "8": 10.86016,
-            "9": 10.85987,
-            "10": 10.82979,
-            "11": 10.88946,
-            "12": 10.87508,
-            "13": 10.87423,
-            "14": 10.89679,
-            "15": 10.82052,
-            "16": 10.825,
-            "17": 10.78984,
-            "18": 10.81026,
-            "19": 10.80535,
-            "20": 10.70395,
-            "21": 10.66988,
-            "22": 10.50641,
-            "23": 10.69004,
-            "24": 10.56309,
-            "25": 10.49417,
-            "26": 10.56626,
-            "27": 10.58024,
-            "28": 10.51572,
-            "29": 10.55294,
-            "30": 10.30552,
-            "31": 10.02243,
-            "32": 10.40616,
-            "33": 10.39875,
-            "34": 10.13772,
-            "35": 10.20189,
-            "36": 10.16048,
-            "37": 10.28972,
-            "38": 10.11479,
-            "39": 10.361,
-            "40": 10.01902,
-            "41": 10.07292,
-            "42": 10.14694,
-            "43": 9.74686,
-            "44": 9.87768,
-            "45": 9.74966,
-            "46": 9.7338,
-            "47": 10.07535,
-            "48": 9.7807,
-            "49": 9.44783,
-            "50": 9.83991
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 600.0,
-            "2": 620.0,
-            "3": 606.0,
-            "4": 684.0,
-            "5": 647.0,
-            "6": 679.0,
-            "7": 630.0,
-            "8": 568.0,
-            "9": 627.0,
-            "10": 519.0,
-            "11": 635.0,
-            "12": 640.0,
-            "13": 677.0,
-            "14": 631.0,
-            "15": 668.0,
-            "16": 666.0,
-            "17": 671.0,
-            "18": 623.0,
-            "19": 658.0,
-            "20": 639.0,
-            "21": 624.0,
-            "22": 614.0,
-            "23": 741.0,
-            "24": 607.0,
-            "25": 636.0,
-            "26": 639.0,
-            "27": 689.0,
-            "28": 751.0,
-            "29": 724.0,
-            "30": 771.0,
-            "31": 564.0,
-            "32": 750.0,
-            "33": 765.0,
-            "34": 693.0,
-            "35": 737.0,
-            "36": 754.0,
-            "37": 807.0,
-            "38": 786.0,
-            "39": 879.0,
-            "40": 737.0,
-            "41": 817.0,
-            "42": 857.0,
-            "43": 709.0,
-            "44": 808.0,
-            "45": 795.0,
-            "46": 837.0,
-            "47": 879.0,
-            "48": 899.0,
-            "49": 890.0,
-            "50": 860.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 510689792.0,
-            "2": 510689792.0,
-            "3": 510689792.0,
-            "4": 510689792.0,
-            "5": 510689792.0,
-            "6": 510689792.0,
-            "7": 510689792.0,
-            "8": 510689792.0,
-            "9": 510689792.0,
-            "10": 510689792.0,
-            "11": 510689792.0,
-            "12": 510689792.0,
-            "13": 510689792.0,
-            "14": 510689792.0,
-            "15": 510689792.0,
-            "16": 510689792.0,
-            "17": 510689792.0,
-            "18": 510689792.0,
-            "19": 510689792.0,
-            "20": 510689792.0,
-            "21": 510689792.0,
-            "22": 510689792.0,
-            "23": 510689792.0,
-            "24": 510689792.0,
-            "25": 510689792.0,
-            "26": 510689792.0,
-            "27": 510689792.0,
-            "28": 510689792.0,
-            "29": 510689792.0,
-            "30": 510689792.0,
-            "31": 510689792.0,
-            "32": 510689792.0,
-            "33": 510689792.0,
-            "34": 510689792.0,
-            "35": 510689792.0,
-            "36": 510689792.0,
-            "37": 510689792.0,
-            "38": 510689792.0,
-            "39": 510689792.0,
-            "40": 510689792.0,
-            "41": 510689792.0,
-            "42": 510689792.0,
-            "43": 510689792.0,
-            "44": 510689792.0,
-            "45": 510689792.0,
-            "46": 510689792.0,
-            "47": 510689792.0,
-            "48": 510689792.0,
-            "49": 510689792.0,
-            "50": 510689792.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 759895552.0,
-            "2": 933156352.0,
-            "3": 933156352.0,
-            "4": 933156352.0,
-            "5": 933156352.0,
-            "6": 933156352.0,
-            "7": 933156352.0,
-            "8": 933156352.0,
-            "9": 933156352.0,
-            "10": 933156352.0,
-            "11": 933156352.0,
-            "12": 933156352.0,
-            "13": 933156352.0,
-            "14": 933156352.0,
-            "15": 933156352.0,
-            "16": 933156352.0,
-            "17": 933156352.0,
-            "18": 933156352.0,
-            "19": 933156352.0,
-            "20": 933156352.0,
-            "21": 933156352.0,
-            "22": 933156352.0,
-            "23": 933156352.0,
-            "24": 934204928.0,
-            "25": 934204928.0,
-            "26": 934204928.0,
-            "27": 934204928.0,
-            "28": 934204928.0,
-            "29": 934204928.0,
-            "30": 934204928.0,
-            "31": 934204928.0,
-            "32": 934204928.0,
-            "33": 934204928.0,
-            "34": 934204928.0,
-            "35": 934204928.0,
-            "36": 934204928.0,
-            "37": 934204928.0,
-            "38": 934204928.0,
-            "39": 934204928.0,
-            "40": 934204928.0,
-            "41": 934204928.0,
-            "42": 934204928.0,
-            "43": 934204928.0,
-            "44": 934204928.0,
-            "45": 934204928.0,
-            "46": 934204928.0,
-            "47": 934204928.0,
-            "48": 934204928.0,
-            "49": 934204928.0,
-            "50": 934204928.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 16.61636,
-            "2": 0.35255,
-            "3": 0.33784,
-            "4": 0.33448,
-            "5": 0.33388,
-            "6": 0.33362,
-            "7": 0.33399,
-            "8": 0.33377,
-            "9": 0.3345,
-            "10": 0.33436,
-            "11": 0.33616,
-            "12": 0.33216,
-            "13": 0.32717,
-            "14": 0.3285,
-            "15": 0.31893,
-            "16": 0.32207,
-            "17": 0.32068,
-            "18": 0.3232,
-            "19": 0.31799,
-            "20": 0.32295,
-            "21": 0.32148,
-            "22": 0.3312,
-            "23": 0.33388,
-            "24": 0.33493,
-            "25": 0.33793,
-            "26": 0.33838,
-            "27": 0.33827,
-            "28": 0.34,
-            "29": 0.33074,
-            "30": 0.32608,
-            "31": 0.32629,
-            "32": 0.3285,
-            "33": 0.32776,
-            "34": 0.32575,
-            "35": 0.32648,
-            "36": 0.3252,
-            "37": 0.32697,
-            "38": 0.33001,
-            "39": 0.3354,
-            "40": 0.33513,
-            "41": 0.33447,
-            "42": 0.3352,
-            "43": 0.33163,
-            "44": 0.32495,
-            "45": 0.32668,
-            "46": 0.32429,
-            "47": 0.32917,
-            "48": 0.32614,
-            "49": 0.32637,
-            "50": 0.32702
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last/golden_values_dev_dgx_h100.json
index 91630133bbc..f64661824cb 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last/golden_values_dev_dgx_h100.json
@@ -175,7 +175,7 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 759895552.0,
+            "1": 759898624.0,
             "2": 933156352.0,
             "3": 933156352.0,
             "4": 933156352.0,
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 18.74335,
-            "2": 0.3476,
-            "3": 0.32845,
-            "4": 0.34133,
-            "5": 0.34487,
-            "6": 0.34494,
-            "7": 0.33861,
-            "8": 0.33955,
-            "9": 0.34794,
-            "10": 0.32879,
-            "11": 0.32446,
-            "12": 0.3306,
-            "13": 0.32382,
-            "14": 0.33396,
-            "15": 0.32393,
-            "16": 0.32115,
-            "17": 0.32752,
-            "18": 0.32386,
-            "19": 0.32588,
-            "20": 0.32805,
-            "21": 0.32785,
-            "22": 0.32655,
-            "23": 0.32262,
-            "24": 0.32541,
-            "25": 0.32541,
-            "26": 0.32301,
-            "27": 0.32448,
-            "28": 0.32526,
-            "29": 0.32436,
-            "30": 0.32542,
-            "31": 0.32734,
-            "32": 0.32473,
-            "33": 0.32718,
-            "34": 0.32951,
-            "35": 0.33292,
-            "36": 0.34033,
-            "37": 0.34474,
-            "38": 0.34306,
-            "39": 0.34159,
-            "40": 0.32995,
-            "41": 0.33037,
-            "42": 0.33033,
-            "43": 0.33246,
-            "44": 0.33318,
-            "45": 0.33332,
-            "46": 0.32932,
-            "47": 0.33279,
-            "48": 0.33327,
-            "49": 0.33082,
-            "50": 0.33522
+            "1": 18.71096,
+            "2": 0.39649,
+            "3": 0.33228,
+            "4": 0.33042,
+            "5": 0.33036,
+            "6": 0.3326,
+            "7": 0.33962,
+            "8": 0.37041,
+            "9": 0.33077,
+            "10": 0.33179,
+            "11": 0.33053,
+            "12": 0.33332,
+            "13": 0.33149,
+            "14": 0.32928,
+            "15": 0.33252,
+            "16": 0.3321,
+            "17": 0.32661,
+            "18": 0.32933,
+            "19": 0.32718,
+            "20": 0.32982,
+            "21": 0.32827,
+            "22": 0.3313,
+            "23": 0.32836,
+            "24": 0.3287,
+            "25": 0.33025,
+            "26": 0.32605,
+            "27": 0.33501,
+            "28": 0.32889,
+            "29": 0.32971,
+            "30": 0.3318,
+            "31": 0.33458,
+            "32": 0.33222,
+            "33": 0.33434,
+            "34": 0.3337,
+            "35": 0.33221,
+            "36": 0.32984,
+            "37": 0.32779,
+            "38": 0.33131,
+            "39": 0.33056,
+            "40": 0.32941,
+            "41": 0.32351,
+            "42": 0.32946,
+            "43": 0.32913,
+            "44": 0.3283,
+            "45": 0.32845,
+            "46": 0.32474,
+            "47": 0.33097,
+            "48": 0.32791,
+            "49": 0.33143,
+            "50": 0.33005
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 81f4d5c3832..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.86535,
-            "2": 10.85873,
-            "3": 10.86284,
-            "4": 10.84009,
-            "5": 10.87856,
-            "6": 10.88856,
-            "7": 10.86532,
-            "8": 10.86017,
-            "9": 10.8599,
-            "10": 10.82981,
-            "11": 10.8895,
-            "12": 10.8751,
-            "13": 10.87423,
-            "14": 10.89675,
-            "15": 10.82054,
-            "16": 10.82504,
-            "17": 10.78983,
-            "18": 10.81029,
-            "19": 10.80535,
-            "20": 10.70398,
-            "21": 10.66993,
-            "22": 10.50643,
-            "23": 10.69004,
-            "24": 10.56314,
-            "25": 10.4942,
-            "26": 10.56628,
-            "27": 10.58025,
-            "28": 10.51571,
-            "29": 10.55299,
-            "30": 10.30549,
-            "31": 10.02245,
-            "32": 10.40614,
-            "33": 10.39874,
-            "34": 10.13771,
-            "35": 10.20184,
-            "36": 10.16052,
-            "37": 10.28973,
-            "38": 10.11474,
-            "39": 10.361,
-            "40": 10.01903,
-            "41": 10.07292,
-            "42": 10.14698,
-            "43": 9.74687,
-            "44": 9.87766,
-            "45": 9.74966,
-            "46": 9.73383,
-            "47": 10.07535,
-            "48": 9.78068,
-            "49": 9.44784,
-            "50": 9.8399
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 653.0,
-            "2": 642.0,
-            "3": 630.0,
-            "4": 585.0,
-            "5": 635.0,
-            "6": 687.0,
-            "7": 615.0,
-            "8": 601.0,
-            "9": 607.0,
-            "10": 522.0,
-            "11": 637.0,
-            "12": 675.0,
-            "13": 649.0,
-            "14": 648.0,
-            "15": 640.0,
-            "16": 602.0,
-            "17": 668.0,
-            "18": 634.0,
-            "19": 593.0,
-            "20": 579.0,
-            "21": 633.0,
-            "22": 597.0,
-            "23": 756.0,
-            "24": 612.0,
-            "25": 591.0,
-            "26": 620.0,
-            "27": 700.0,
-            "28": 705.0,
-            "29": 795.0,
-            "30": 752.0,
-            "31": 628.0,
-            "32": 712.0,
-            "33": 752.0,
-            "34": 737.0,
-            "35": 741.0,
-            "36": 770.0,
-            "37": 861.0,
-            "38": 823.0,
-            "39": 812.0,
-            "40": 814.0,
-            "41": 826.0,
-            "42": 801.0,
-            "43": 769.0,
-            "44": 822.0,
-            "45": 777.0,
-            "46": 828.0,
-            "47": 878.0,
-            "48": 915.0,
-            "49": 908.0,
-            "50": 848.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 510689792.0,
-            "2": 510689792.0,
-            "3": 510689792.0,
-            "4": 510689792.0,
-            "5": 510689792.0,
-            "6": 510689792.0,
-            "7": 510689792.0,
-            "8": 510689792.0,
-            "9": 510689792.0,
-            "10": 510689792.0,
-            "11": 510689792.0,
-            "12": 510689792.0,
-            "13": 510689792.0,
-            "14": 510689792.0,
-            "15": 510689792.0,
-            "16": 510689792.0,
-            "17": 510689792.0,
-            "18": 510689792.0,
-            "19": 510689792.0,
-            "20": 510689792.0,
-            "21": 510689792.0,
-            "22": 510689792.0,
-            "23": 510689792.0,
-            "24": 510689792.0,
-            "25": 510689792.0,
-            "26": 510689792.0,
-            "27": 510689792.0,
-            "28": 510689792.0,
-            "29": 510689792.0,
-            "30": 510689792.0,
-            "31": 510689792.0,
-            "32": 510689792.0,
-            "33": 510689792.0,
-            "34": 510689792.0,
-            "35": 510689792.0,
-            "36": 510689792.0,
-            "37": 510689792.0,
-            "38": 510689792.0,
-            "39": 510689792.0,
-            "40": 510689792.0,
-            "41": 510689792.0,
-            "42": 510689792.0,
-            "43": 510689792.0,
-            "44": 510689792.0,
-            "45": 510689792.0,
-            "46": 510689792.0,
-            "47": 510689792.0,
-            "48": 510689792.0,
-            "49": 510689792.0,
-            "50": 510689792.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 759895552.0,
-            "2": 933156352.0,
-            "3": 933156352.0,
-            "4": 933156352.0,
-            "5": 933156352.0,
-            "6": 933156352.0,
-            "7": 933156352.0,
-            "8": 933156352.0,
-            "9": 933156352.0,
-            "10": 933156352.0,
-            "11": 933156352.0,
-            "12": 933156352.0,
-            "13": 933156352.0,
-            "14": 933156352.0,
-            "15": 933156352.0,
-            "16": 933156352.0,
-            "17": 933156352.0,
-            "18": 933156352.0,
-            "19": 933156352.0,
-            "20": 933156352.0,
-            "21": 933156352.0,
-            "22": 933156352.0,
-            "23": 933156352.0,
-            "24": 933156352.0,
-            "25": 933156352.0,
-            "26": 933156352.0,
-            "27": 934203392.0,
-            "28": 934203392.0,
-            "29": 934203392.0,
-            "30": 934203392.0,
-            "31": 934203392.0,
-            "32": 934203392.0,
-            "33": 934203392.0,
-            "34": 934203392.0,
-            "35": 934203392.0,
-            "36": 934203392.0,
-            "37": 934203392.0,
-            "38": 934203392.0,
-            "39": 934203392.0,
-            "40": 934203392.0,
-            "41": 934203392.0,
-            "42": 934203392.0,
-            "43": 934203392.0,
-            "44": 934203392.0,
-            "45": 934203392.0,
-            "46": 934203392.0,
-            "47": 934203392.0,
-            "48": 934203392.0,
-            "49": 934203392.0,
-            "50": 934203392.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 21.7688,
-            "2": 0.32156,
-            "3": 0.2747,
-            "4": 0.2768,
-            "5": 0.27883,
-            "6": 0.27703,
-            "7": 0.27847,
-            "8": 0.27539,
-            "9": 0.27303,
-            "10": 0.27375,
-            "11": 0.28033,
-            "12": 0.28202,
-            "13": 0.27965,
-            "14": 0.27594,
-            "15": 0.2733,
-            "16": 0.2734,
-            "17": 0.2761,
-            "18": 0.28051,
-            "19": 0.28074,
-            "20": 0.28674,
-            "21": 0.27278,
-            "22": 0.2765,
-            "23": 0.27317,
-            "24": 0.27474,
-            "25": 0.27496,
-            "26": 0.27426,
-            "27": 0.28705,
-            "28": 0.2814,
-            "29": 0.28559,
-            "30": 0.28098,
-            "31": 0.29666,
-            "32": 0.28302,
-            "33": 0.28642,
-            "34": 0.28282,
-            "35": 0.28457,
-            "36": 0.2843,
-            "37": 0.27728,
-            "38": 0.2746,
-            "39": 0.2774,
-            "40": 0.27644,
-            "41": 0.27658,
-            "42": 0.27835,
-            "43": 0.27776,
-            "44": 0.27654,
-            "45": 0.27705,
-            "46": 0.27383,
-            "47": 0.27806,
-            "48": 0.27418,
-            "49": 0.27617,
-            "50": 0.27185
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index f64661824cb..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.86535,
-            "2": 10.85873,
-            "3": 10.86284,
-            "4": 10.84009,
-            "5": 10.87856,
-            "6": 10.88856,
-            "7": 10.86532,
-            "8": 10.86017,
-            "9": 10.8599,
-            "10": 10.82981,
-            "11": 10.8895,
-            "12": 10.8751,
-            "13": 10.87423,
-            "14": 10.89675,
-            "15": 10.82054,
-            "16": 10.82504,
-            "17": 10.78983,
-            "18": 10.81029,
-            "19": 10.80535,
-            "20": 10.70398,
-            "21": 10.66993,
-            "22": 10.50643,
-            "23": 10.69004,
-            "24": 10.56314,
-            "25": 10.4942,
-            "26": 10.56628,
-            "27": 10.58025,
-            "28": 10.51571,
-            "29": 10.55299,
-            "30": 10.30549,
-            "31": 10.02245,
-            "32": 10.40614,
-            "33": 10.39874,
-            "34": 10.13771,
-            "35": 10.20184,
-            "36": 10.16052,
-            "37": 10.28973,
-            "38": 10.11474,
-            "39": 10.361,
-            "40": 10.01903,
-            "41": 10.07292,
-            "42": 10.14698,
-            "43": 9.74687,
-            "44": 9.87766,
-            "45": 9.74966,
-            "46": 9.73383,
-            "47": 10.07535,
-            "48": 9.78068,
-            "49": 9.44784,
-            "50": 9.8399
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 653.0,
-            "2": 642.0,
-            "3": 630.0,
-            "4": 585.0,
-            "5": 635.0,
-            "6": 687.0,
-            "7": 615.0,
-            "8": 601.0,
-            "9": 607.0,
-            "10": 522.0,
-            "11": 637.0,
-            "12": 675.0,
-            "13": 649.0,
-            "14": 648.0,
-            "15": 640.0,
-            "16": 602.0,
-            "17": 668.0,
-            "18": 634.0,
-            "19": 593.0,
-            "20": 579.0,
-            "21": 633.0,
-            "22": 597.0,
-            "23": 756.0,
-            "24": 612.0,
-            "25": 591.0,
-            "26": 620.0,
-            "27": 700.0,
-            "28": 705.0,
-            "29": 795.0,
-            "30": 752.0,
-            "31": 628.0,
-            "32": 712.0,
-            "33": 752.0,
-            "34": 737.0,
-            "35": 741.0,
-            "36": 770.0,
-            "37": 861.0,
-            "38": 823.0,
-            "39": 812.0,
-            "40": 814.0,
-            "41": 826.0,
-            "42": 801.0,
-            "43": 769.0,
-            "44": 822.0,
-            "45": 777.0,
-            "46": 828.0,
-            "47": 878.0,
-            "48": 915.0,
-            "49": 908.0,
-            "50": 848.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 510689792.0,
-            "2": 510689792.0,
-            "3": 510689792.0,
-            "4": 510689792.0,
-            "5": 510689792.0,
-            "6": 510689792.0,
-            "7": 510689792.0,
-            "8": 510689792.0,
-            "9": 510689792.0,
-            "10": 510689792.0,
-            "11": 510689792.0,
-            "12": 510689792.0,
-            "13": 510689792.0,
-            "14": 510689792.0,
-            "15": 510689792.0,
-            "16": 510689792.0,
-            "17": 510689792.0,
-            "18": 510689792.0,
-            "19": 510689792.0,
-            "20": 510689792.0,
-            "21": 510689792.0,
-            "22": 510689792.0,
-            "23": 510689792.0,
-            "24": 510689792.0,
-            "25": 510689792.0,
-            "26": 510689792.0,
-            "27": 510689792.0,
-            "28": 510689792.0,
-            "29": 510689792.0,
-            "30": 510689792.0,
-            "31": 510689792.0,
-            "32": 510689792.0,
-            "33": 510689792.0,
-            "34": 510689792.0,
-            "35": 510689792.0,
-            "36": 510689792.0,
-            "37": 510689792.0,
-            "38": 510689792.0,
-            "39": 510689792.0,
-            "40": 510689792.0,
-            "41": 510689792.0,
-            "42": 510689792.0,
-            "43": 510689792.0,
-            "44": 510689792.0,
-            "45": 510689792.0,
-            "46": 510689792.0,
-            "47": 510689792.0,
-            "48": 510689792.0,
-            "49": 510689792.0,
-            "50": 510689792.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 759898624.0,
-            "2": 933156352.0,
-            "3": 933156352.0,
-            "4": 933156352.0,
-            "5": 933156352.0,
-            "6": 933156352.0,
-            "7": 933156352.0,
-            "8": 933156352.0,
-            "9": 933156352.0,
-            "10": 933156352.0,
-            "11": 933156352.0,
-            "12": 933156352.0,
-            "13": 933156352.0,
-            "14": 933156352.0,
-            "15": 933156352.0,
-            "16": 933156352.0,
-            "17": 933156352.0,
-            "18": 933156352.0,
-            "19": 933156352.0,
-            "20": 933156352.0,
-            "21": 933156352.0,
-            "22": 933156352.0,
-            "23": 933156352.0,
-            "24": 933156352.0,
-            "25": 933156352.0,
-            "26": 933156352.0,
-            "27": 933156352.0,
-            "28": 933156352.0,
-            "29": 933156352.0,
-            "30": 933156352.0,
-            "31": 933156352.0,
-            "32": 933156352.0,
-            "33": 933156352.0,
-            "34": 933156352.0,
-            "35": 933156352.0,
-            "36": 933156352.0,
-            "37": 933156352.0,
-            "38": 933156352.0,
-            "39": 933156352.0,
-            "40": 933156352.0,
-            "41": 933156352.0,
-            "42": 933156352.0,
-            "43": 933156352.0,
-            "44": 933156352.0,
-            "45": 933156352.0,
-            "46": 933156352.0,
-            "47": 933156352.0,
-            "48": 933156352.0,
-            "49": 933156352.0,
-            "50": 933156352.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 18.71096,
-            "2": 0.39649,
-            "3": 0.33228,
-            "4": 0.33042,
-            "5": 0.33036,
-            "6": 0.3326,
-            "7": 0.33962,
-            "8": 0.37041,
-            "9": 0.33077,
-            "10": 0.33179,
-            "11": 0.33053,
-            "12": 0.33332,
-            "13": 0.33149,
-            "14": 0.32928,
-            "15": 0.33252,
-            "16": 0.3321,
-            "17": 0.32661,
-            "18": 0.32933,
-            "19": 0.32718,
-            "20": 0.32982,
-            "21": 0.32827,
-            "22": 0.3313,
-            "23": 0.32836,
-            "24": 0.3287,
-            "25": 0.33025,
-            "26": 0.32605,
-            "27": 0.33501,
-            "28": 0.32889,
-            "29": 0.32971,
-            "30": 0.3318,
-            "31": 0.33458,
-            "32": 0.33222,
-            "33": 0.33434,
-            "34": 0.3337,
-            "35": 0.33221,
-            "36": 0.32984,
-            "37": 0.32779,
-            "38": 0.33131,
-            "39": 0.33056,
-            "40": 0.32941,
-            "41": 0.32351,
-            "42": 0.32946,
-            "43": 0.32913,
-            "44": 0.3283,
-            "45": 0.32845,
-            "46": 0.32474,
-            "47": 0.33097,
-            "48": 0.32791,
-            "49": 0.33143,
-            "50": 0.33005
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last/golden_values_dev_dgx_h100.json
index 910068628d2..cc1700ed493 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last/golden_values_dev_dgx_h100.json
@@ -6,53 +6,53 @@
         "values": {
             "1": 10.86535,
             "2": 10.85873,
-            "3": 10.86283,
-            "4": 10.84011,
-            "5": 10.87855,
-            "6": 10.88851,
+            "3": 10.86285,
+            "4": 10.84007,
+            "5": 10.87854,
+            "6": 10.88852,
             "7": 10.86537,
-            "8": 10.86017,
-            "9": 10.85989,
-            "10": 10.8298,
-            "11": 10.88947,
-            "12": 10.87508,
+            "8": 10.86015,
+            "9": 10.85985,
+            "10": 10.82982,
+            "11": 10.88949,
+            "12": 10.87509,
             "13": 10.87426,
-            "14": 10.89677,
-            "15": 10.82053,
-            "16": 10.825,
-            "17": 10.78979,
-            "18": 10.81027,
-            "19": 10.80535,
-            "20": 10.70395,
-            "21": 10.66991,
+            "14": 10.89674,
+            "15": 10.82054,
+            "16": 10.82501,
+            "17": 10.78985,
+            "18": 10.81032,
+            "19": 10.8053,
+            "20": 10.70397,
+            "21": 10.66986,
             "22": 10.50641,
-            "23": 10.69004,
-            "24": 10.56305,
-            "25": 10.49417,
-            "26": 10.56629,
+            "23": 10.69001,
+            "24": 10.56317,
+            "25": 10.49421,
+            "26": 10.56628,
             "27": 10.58022,
-            "28": 10.51575,
-            "29": 10.55298,
+            "28": 10.51574,
+            "29": 10.55292,
             "30": 10.30549,
-            "31": 10.02244,
-            "32": 10.40616,
-            "33": 10.39872,
-            "34": 10.1377,
-            "35": 10.20186,
-            "36": 10.16052,
-            "37": 10.28973,
-            "38": 10.11481,
+            "31": 10.0225,
+            "32": 10.40617,
+            "33": 10.39874,
+            "34": 10.13772,
+            "35": 10.20187,
+            "36": 10.16045,
+            "37": 10.28977,
+            "38": 10.11478,
             "39": 10.36101,
-            "40": 10.019,
+            "40": 10.01903,
             "41": 10.07294,
-            "42": 10.14697,
-            "43": 9.74685,
+            "42": 10.14691,
+            "43": 9.74683,
             "44": 9.87762,
-            "45": 9.74969,
-            "46": 9.73382,
-            "47": 10.07533,
-            "48": 9.78067,
-            "49": 9.44782,
+            "45": 9.74966,
+            "46": 9.73384,
+            "47": 10.07535,
+            "48": 9.78069,
+            "49": 9.44783,
             "50": 9.83992
         }
     },
@@ -61,56 +61,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 601.0,
-            "2": 613.0,
-            "3": 655.0,
-            "4": 593.0,
-            "5": 678.0,
-            "6": 642.0,
-            "7": 620.0,
-            "8": 549.0,
-            "9": 640.0,
-            "10": 502.0,
-            "11": 660.0,
-            "12": 645.0,
-            "13": 615.0,
-            "14": 696.0,
-            "15": 670.0,
-            "16": 631.0,
-            "17": 648.0,
-            "18": 611.0,
-            "19": 605.0,
-            "20": 621.0,
-            "21": 673.0,
-            "22": 661.0,
-            "23": 715.0,
-            "24": 654.0,
-            "25": 594.0,
-            "26": 589.0,
-            "27": 648.0,
-            "28": 690.0,
-            "29": 755.0,
-            "30": 678.0,
-            "31": 584.0,
-            "32": 712.0,
-            "33": 793.0,
-            "34": 765.0,
-            "35": 738.0,
-            "36": 737.0,
-            "37": 868.0,
-            "38": 726.0,
-            "39": 868.0,
-            "40": 809.0,
-            "41": 833.0,
-            "42": 806.0,
-            "43": 783.0,
-            "44": 785.0,
-            "45": 800.0,
-            "46": 875.0,
-            "47": 903.0,
-            "48": 899.0,
-            "49": 878.0,
-            "50": 873.0
+            "1": 607.0,
+            "2": 628.0,
+            "3": 600.0,
+            "4": 658.0,
+            "5": 657.0,
+            "6": 707.0,
+            "7": 637.0,
+            "8": 593.0,
+            "9": 632.0,
+            "10": 553.0,
+            "11": 641.0,
+            "12": 631.0,
+            "13": 676.0,
+            "14": 643.0,
+            "15": 623.0,
+            "16": 611.0,
+            "17": 687.0,
+            "18": 622.0,
+            "19": 581.0,
+            "20": 609.0,
+            "21": 652.0,
+            "22": 621.0,
+            "23": 800.0,
+            "24": 618.0,
+            "25": 623.0,
+            "26": 595.0,
+            "27": 679.0,
+            "28": 726.0,
+            "29": 719.0,
+            "30": 723.0,
+            "31": 624.0,
+            "32": 737.0,
+            "33": 776.0,
+            "34": 713.0,
+            "35": 696.0,
+            "36": 759.0,
+            "37": 829.0,
+            "38": 784.0,
+            "39": 798.0,
+            "40": 813.0,
+            "41": 814.0,
+            "42": 880.0,
+            "43": 780.0,
+            "44": 775.0,
+            "45": 759.0,
+            "46": 849.0,
+            "47": 938.0,
+            "48": 876.0,
+            "49": 886.0,
+            "50": 817.0
         }
     },
     "mem-allocated-bytes": {
@@ -175,7 +175,7 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 756752896.0,
+            "1": 759895552.0,
             "2": 933156352.0,
             "3": 933156352.0,
             "4": 933156352.0,
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 18.51483,
-            "2": 0.38305,
-            "3": 0.31916,
-            "4": 0.33028,
-            "5": 0.34426,
-            "6": 0.35623,
-            "7": 0.32503,
-            "8": 0.32084,
-            "9": 0.32047,
-            "10": 0.32595,
-            "11": 0.32652,
-            "12": 0.32296,
-            "13": 0.32617,
-            "14": 0.32833,
-            "15": 0.32492,
-            "16": 0.32302,
-            "17": 0.32458,
-            "18": 0.32598,
-            "19": 0.32565,
-            "20": 0.32747,
-            "21": 0.3272,
-            "22": 0.32863,
-            "23": 0.32847,
-            "24": 0.32664,
-            "25": 0.32485,
-            "26": 0.32858,
-            "27": 0.32665,
-            "28": 0.32434,
-            "29": 0.32998,
-            "30": 0.33789,
-            "31": 0.32692,
-            "32": 0.32521,
-            "33": 0.32521,
-            "34": 0.32786,
-            "35": 0.32813,
-            "36": 0.32665,
-            "37": 0.32466,
-            "38": 0.33006,
-            "39": 0.32341,
-            "40": 0.32787,
-            "41": 0.32762,
-            "42": 0.32448,
-            "43": 0.32181,
-            "44": 0.33035,
-            "45": 0.32497,
-            "46": 0.32334,
-            "47": 0.32904,
-            "48": 0.32458,
-            "49": 0.32391,
-            "50": 0.32652
+            "1": 18.67374,
+            "2": 0.33434,
+            "3": 0.32862,
+            "4": 0.3312,
+            "5": 0.32463,
+            "6": 0.33221,
+            "7": 0.33167,
+            "8": 0.32476,
+            "9": 0.32742,
+            "10": 0.32327,
+            "11": 0.31599,
+            "12": 0.32511,
+            "13": 0.32273,
+            "14": 0.31956,
+            "15": 0.32777,
+            "16": 0.32745,
+            "17": 0.31743,
+            "18": 0.32418,
+            "19": 0.32759,
+            "20": 0.32696,
+            "21": 0.32321,
+            "22": 0.32923,
+            "23": 0.32125,
+            "24": 0.32088,
+            "25": 0.32288,
+            "26": 0.31739,
+            "27": 0.33667,
+            "28": 0.32586,
+            "29": 0.31738,
+            "30": 0.31392,
+            "31": 0.32116,
+            "32": 0.31637,
+            "33": 0.32029,
+            "34": 0.32057,
+            "35": 0.31739,
+            "36": 0.31341,
+            "37": 0.32121,
+            "38": 0.326,
+            "39": 0.31692,
+            "40": 0.31511,
+            "41": 0.32216,
+            "42": 0.31654,
+            "43": 0.32474,
+            "44": 0.32162,
+            "45": 0.31451,
+            "46": 0.31434,
+            "47": 0.32885,
+            "48": 0.31603,
+            "49": 0.31732,
+            "50": 0.3234
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index f0eb7547392..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.86535,
-            "2": 10.85873,
-            "3": 10.86282,
-            "4": 10.84009,
-            "5": 10.87855,
-            "6": 10.88856,
-            "7": 10.86539,
-            "8": 10.86016,
-            "9": 10.85985,
-            "10": 10.82981,
-            "11": 10.8895,
-            "12": 10.87506,
-            "13": 10.87424,
-            "14": 10.89677,
-            "15": 10.82052,
-            "16": 10.825,
-            "17": 10.78983,
-            "18": 10.81027,
-            "19": 10.80534,
-            "20": 10.70395,
-            "21": 10.66987,
-            "22": 10.50641,
-            "23": 10.69005,
-            "24": 10.56316,
-            "25": 10.49414,
-            "26": 10.56627,
-            "27": 10.58026,
-            "28": 10.51573,
-            "29": 10.55295,
-            "30": 10.30554,
-            "31": 10.02245,
-            "32": 10.40617,
-            "33": 10.39881,
-            "34": 10.13768,
-            "35": 10.20187,
-            "36": 10.16048,
-            "37": 10.28976,
-            "38": 10.1148,
-            "39": 10.361,
-            "40": 10.019,
-            "41": 10.07292,
-            "42": 10.14692,
-            "43": 9.74685,
-            "44": 9.8776,
-            "45": 9.74967,
-            "46": 9.73383,
-            "47": 10.07533,
-            "48": 9.78069,
-            "49": 9.44781,
-            "50": 9.83988
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 615.0,
-            "2": 640.0,
-            "3": 586.0,
-            "4": 621.0,
-            "5": 619.0,
-            "6": 683.0,
-            "7": 667.0,
-            "8": 564.0,
-            "9": 646.0,
-            "10": 540.0,
-            "11": 654.0,
-            "12": 647.0,
-            "13": 656.0,
-            "14": 652.0,
-            "15": 658.0,
-            "16": 624.0,
-            "17": 657.0,
-            "18": 621.0,
-            "19": 555.0,
-            "20": 613.0,
-            "21": 643.0,
-            "22": 626.0,
-            "23": 749.0,
-            "24": 638.0,
-            "25": 562.0,
-            "26": 613.0,
-            "27": 653.0,
-            "28": 668.0,
-            "29": 780.0,
-            "30": 710.0,
-            "31": 577.0,
-            "32": 719.0,
-            "33": 821.0,
-            "34": 708.0,
-            "35": 690.0,
-            "36": 697.0,
-            "37": 878.0,
-            "38": 734.0,
-            "39": 867.0,
-            "40": 810.0,
-            "41": 837.0,
-            "42": 829.0,
-            "43": 687.0,
-            "44": 782.0,
-            "45": 761.0,
-            "46": 856.0,
-            "47": 896.0,
-            "48": 904.0,
-            "49": 841.0,
-            "50": 838.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 510689792.0,
-            "2": 510689792.0,
-            "3": 510689792.0,
-            "4": 510689792.0,
-            "5": 510689792.0,
-            "6": 510689792.0,
-            "7": 510689792.0,
-            "8": 510689792.0,
-            "9": 510689792.0,
-            "10": 510689792.0,
-            "11": 510689792.0,
-            "12": 510689792.0,
-            "13": 510689792.0,
-            "14": 510689792.0,
-            "15": 510689792.0,
-            "16": 510689792.0,
-            "17": 510689792.0,
-            "18": 510689792.0,
-            "19": 510689792.0,
-            "20": 510689792.0,
-            "21": 510689792.0,
-            "22": 510689792.0,
-            "23": 510689792.0,
-            "24": 510689792.0,
-            "25": 510689792.0,
-            "26": 510689792.0,
-            "27": 510689792.0,
-            "28": 510689792.0,
-            "29": 510689792.0,
-            "30": 510689792.0,
-            "31": 510689792.0,
-            "32": 510689792.0,
-            "33": 510689792.0,
-            "34": 510689792.0,
-            "35": 510689792.0,
-            "36": 510689792.0,
-            "37": 510689792.0,
-            "38": 510689792.0,
-            "39": 510689792.0,
-            "40": 510689792.0,
-            "41": 510689792.0,
-            "42": 510689792.0,
-            "43": 510689792.0,
-            "44": 510689792.0,
-            "45": 510689792.0,
-            "46": 510689792.0,
-            "47": 510689792.0,
-            "48": 510689792.0,
-            "49": 510689792.0,
-            "50": 510689792.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 757799936.0,
-            "2": 933156352.0,
-            "3": 933156352.0,
-            "4": 933156352.0,
-            "5": 933156352.0,
-            "6": 933156352.0,
-            "7": 933156352.0,
-            "8": 933156352.0,
-            "9": 933156352.0,
-            "10": 933156352.0,
-            "11": 933156352.0,
-            "12": 933156352.0,
-            "13": 933156352.0,
-            "14": 933156352.0,
-            "15": 933156352.0,
-            "16": 933156352.0,
-            "17": 933156352.0,
-            "18": 933156352.0,
-            "19": 933156352.0,
-            "20": 933156352.0,
-            "21": 933156352.0,
-            "22": 933156352.0,
-            "23": 933156352.0,
-            "24": 933156352.0,
-            "25": 933156352.0,
-            "26": 933156352.0,
-            "27": 933156352.0,
-            "28": 933156352.0,
-            "29": 933156352.0,
-            "30": 933156352.0,
-            "31": 933156352.0,
-            "32": 933156352.0,
-            "33": 933156352.0,
-            "34": 933156352.0,
-            "35": 933156352.0,
-            "36": 933156352.0,
-            "37": 933156352.0,
-            "38": 933156352.0,
-            "39": 933156352.0,
-            "40": 933156352.0,
-            "41": 933156352.0,
-            "42": 933156352.0,
-            "43": 933156352.0,
-            "44": 933156352.0,
-            "45": 933156352.0,
-            "46": 933156352.0,
-            "47": 933156352.0,
-            "48": 933156352.0,
-            "49": 933156352.0,
-            "50": 933156352.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 19.15382,
-            "2": 0.382,
-            "3": 0.2953,
-            "4": 0.30669,
-            "5": 0.2864,
-            "6": 0.28721,
-            "7": 0.28819,
-            "8": 0.28856,
-            "9": 0.3024,
-            "10": 0.29011,
-            "11": 0.29044,
-            "12": 0.28948,
-            "13": 0.29391,
-            "14": 0.29381,
-            "15": 0.29174,
-            "16": 0.29101,
-            "17": 0.29087,
-            "18": 0.30622,
-            "19": 0.28768,
-            "20": 0.29439,
-            "21": 0.28914,
-            "22": 0.28729,
-            "23": 0.28503,
-            "24": 0.28932,
-            "25": 0.28325,
-            "26": 0.2863,
-            "27": 0.28599,
-            "28": 0.28766,
-            "29": 0.28539,
-            "30": 0.28326,
-            "31": 0.2833,
-            "32": 0.28222,
-            "33": 0.28588,
-            "34": 0.28764,
-            "35": 0.28697,
-            "36": 0.28266,
-            "37": 0.2825,
-            "38": 0.28576,
-            "39": 0.28329,
-            "40": 0.28369,
-            "41": 0.28375,
-            "42": 0.28077,
-            "43": 0.28714,
-            "44": 0.28289,
-            "45": 0.28552,
-            "46": 0.28119,
-            "47": 0.28252,
-            "48": 0.28882,
-            "49": 0.30153,
-            "50": 0.299
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index cc1700ed493..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.86535,
-            "2": 10.85873,
-            "3": 10.86285,
-            "4": 10.84007,
-            "5": 10.87854,
-            "6": 10.88852,
-            "7": 10.86537,
-            "8": 10.86015,
-            "9": 10.85985,
-            "10": 10.82982,
-            "11": 10.88949,
-            "12": 10.87509,
-            "13": 10.87426,
-            "14": 10.89674,
-            "15": 10.82054,
-            "16": 10.82501,
-            "17": 10.78985,
-            "18": 10.81032,
-            "19": 10.8053,
-            "20": 10.70397,
-            "21": 10.66986,
-            "22": 10.50641,
-            "23": 10.69001,
-            "24": 10.56317,
-            "25": 10.49421,
-            "26": 10.56628,
-            "27": 10.58022,
-            "28": 10.51574,
-            "29": 10.55292,
-            "30": 10.30549,
-            "31": 10.0225,
-            "32": 10.40617,
-            "33": 10.39874,
-            "34": 10.13772,
-            "35": 10.20187,
-            "36": 10.16045,
-            "37": 10.28977,
-            "38": 10.11478,
-            "39": 10.36101,
-            "40": 10.01903,
-            "41": 10.07294,
-            "42": 10.14691,
-            "43": 9.74683,
-            "44": 9.87762,
-            "45": 9.74966,
-            "46": 9.73384,
-            "47": 10.07535,
-            "48": 9.78069,
-            "49": 9.44783,
-            "50": 9.83992
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 607.0,
-            "2": 628.0,
-            "3": 600.0,
-            "4": 658.0,
-            "5": 657.0,
-            "6": 707.0,
-            "7": 637.0,
-            "8": 593.0,
-            "9": 632.0,
-            "10": 553.0,
-            "11": 641.0,
-            "12": 631.0,
-            "13": 676.0,
-            "14": 643.0,
-            "15": 623.0,
-            "16": 611.0,
-            "17": 687.0,
-            "18": 622.0,
-            "19": 581.0,
-            "20": 609.0,
-            "21": 652.0,
-            "22": 621.0,
-            "23": 800.0,
-            "24": 618.0,
-            "25": 623.0,
-            "26": 595.0,
-            "27": 679.0,
-            "28": 726.0,
-            "29": 719.0,
-            "30": 723.0,
-            "31": 624.0,
-            "32": 737.0,
-            "33": 776.0,
-            "34": 713.0,
-            "35": 696.0,
-            "36": 759.0,
-            "37": 829.0,
-            "38": 784.0,
-            "39": 798.0,
-            "40": 813.0,
-            "41": 814.0,
-            "42": 880.0,
-            "43": 780.0,
-            "44": 775.0,
-            "45": 759.0,
-            "46": 849.0,
-            "47": 938.0,
-            "48": 876.0,
-            "49": 886.0,
-            "50": 817.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 510689792.0,
-            "2": 510689792.0,
-            "3": 510689792.0,
-            "4": 510689792.0,
-            "5": 510689792.0,
-            "6": 510689792.0,
-            "7": 510689792.0,
-            "8": 510689792.0,
-            "9": 510689792.0,
-            "10": 510689792.0,
-            "11": 510689792.0,
-            "12": 510689792.0,
-            "13": 510689792.0,
-            "14": 510689792.0,
-            "15": 510689792.0,
-            "16": 510689792.0,
-            "17": 510689792.0,
-            "18": 510689792.0,
-            "19": 510689792.0,
-            "20": 510689792.0,
-            "21": 510689792.0,
-            "22": 510689792.0,
-            "23": 510689792.0,
-            "24": 510689792.0,
-            "25": 510689792.0,
-            "26": 510689792.0,
-            "27": 510689792.0,
-            "28": 510689792.0,
-            "29": 510689792.0,
-            "30": 510689792.0,
-            "31": 510689792.0,
-            "32": 510689792.0,
-            "33": 510689792.0,
-            "34": 510689792.0,
-            "35": 510689792.0,
-            "36": 510689792.0,
-            "37": 510689792.0,
-            "38": 510689792.0,
-            "39": 510689792.0,
-            "40": 510689792.0,
-            "41": 510689792.0,
-            "42": 510689792.0,
-            "43": 510689792.0,
-            "44": 510689792.0,
-            "45": 510689792.0,
-            "46": 510689792.0,
-            "47": 510689792.0,
-            "48": 510689792.0,
-            "49": 510689792.0,
-            "50": 510689792.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 759895552.0,
-            "2": 933156352.0,
-            "3": 933156352.0,
-            "4": 933156352.0,
-            "5": 933156352.0,
-            "6": 933156352.0,
-            "7": 933156352.0,
-            "8": 933156352.0,
-            "9": 933156352.0,
-            "10": 933156352.0,
-            "11": 933156352.0,
-            "12": 933156352.0,
-            "13": 933156352.0,
-            "14": 933156352.0,
-            "15": 933156352.0,
-            "16": 933156352.0,
-            "17": 933156352.0,
-            "18": 933156352.0,
-            "19": 933156352.0,
-            "20": 933156352.0,
-            "21": 933156352.0,
-            "22": 933156352.0,
-            "23": 933156352.0,
-            "24": 933156352.0,
-            "25": 933156352.0,
-            "26": 933156352.0,
-            "27": 933156352.0,
-            "28": 933156352.0,
-            "29": 933156352.0,
-            "30": 933156352.0,
-            "31": 933156352.0,
-            "32": 933156352.0,
-            "33": 933156352.0,
-            "34": 933156352.0,
-            "35": 933156352.0,
-            "36": 933156352.0,
-            "37": 933156352.0,
-            "38": 933156352.0,
-            "39": 933156352.0,
-            "40": 933156352.0,
-            "41": 933156352.0,
-            "42": 933156352.0,
-            "43": 933156352.0,
-            "44": 933156352.0,
-            "45": 933156352.0,
-            "46": 933156352.0,
-            "47": 933156352.0,
-            "48": 933156352.0,
-            "49": 933156352.0,
-            "50": 933156352.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 18.67374,
-            "2": 0.33434,
-            "3": 0.32862,
-            "4": 0.3312,
-            "5": 0.32463,
-            "6": 0.33221,
-            "7": 0.33167,
-            "8": 0.32476,
-            "9": 0.32742,
-            "10": 0.32327,
-            "11": 0.31599,
-            "12": 0.32511,
-            "13": 0.32273,
-            "14": 0.31956,
-            "15": 0.32777,
-            "16": 0.32745,
-            "17": 0.31743,
-            "18": 0.32418,
-            "19": 0.32759,
-            "20": 0.32696,
-            "21": 0.32321,
-            "22": 0.32923,
-            "23": 0.32125,
-            "24": 0.32088,
-            "25": 0.32288,
-            "26": 0.31739,
-            "27": 0.33667,
-            "28": 0.32586,
-            "29": 0.31738,
-            "30": 0.31392,
-            "31": 0.32116,
-            "32": 0.31637,
-            "33": 0.32029,
-            "34": 0.32057,
-            "35": 0.31739,
-            "36": 0.31341,
-            "37": 0.32121,
-            "38": 0.326,
-            "39": 0.31692,
-            "40": 0.31511,
-            "41": 0.32216,
-            "42": 0.31654,
-            "43": 0.32474,
-            "44": 0.32162,
-            "45": 0.31451,
-            "46": 0.31434,
-            "47": 0.32885,
-            "48": 0.31603,
-            "49": 0.31732,
-            "50": 0.3234
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_dp_last/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_dp_last/golden_values_dev_dgx_h100.json
index 5fd95d06800..cd45ff021d9 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_dp_last/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_dp_last/golden_values_dev_dgx_h100.json
@@ -175,7 +175,7 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 756752896.0,
+            "1": 759895552.0,
             "2": 933156352.0,
             "3": 933156352.0,
             "4": 933156352.0,
@@ -207,24 +207,24 @@
             "30": 933156352.0,
             "31": 933156352.0,
             "32": 933156352.0,
-            "33": 933156352.0,
-            "34": 933156352.0,
-            "35": 933156352.0,
-            "36": 933156352.0,
-            "37": 933156352.0,
-            "38": 933156352.0,
-            "39": 933156352.0,
-            "40": 933156352.0,
-            "41": 933156352.0,
-            "42": 933156352.0,
-            "43": 933156352.0,
-            "44": 933156352.0,
-            "45": 933156352.0,
-            "46": 933156352.0,
-            "47": 933156352.0,
-            "48": 933156352.0,
-            "49": 933156352.0,
-            "50": 933156352.0
+            "33": 934201856.0,
+            "34": 934201856.0,
+            "35": 934201856.0,
+            "36": 934201856.0,
+            "37": 934201856.0,
+            "38": 934201856.0,
+            "39": 934201856.0,
+            "40": 934201856.0,
+            "41": 934201856.0,
+            "42": 934201856.0,
+            "43": 934201856.0,
+            "44": 934201856.0,
+            "45": 934201856.0,
+            "46": 934201856.0,
+            "47": 934201856.0,
+            "48": 934201856.0,
+            "49": 934201856.0,
+            "50": 934201856.0
         }
     },
     "iteration-time": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 17.87202,
-            "2": 0.35495,
-            "3": 0.32873,
-            "4": 0.33459,
-            "5": 0.32873,
-            "6": 0.33081,
-            "7": 0.33232,
-            "8": 0.3289,
-            "9": 0.33298,
-            "10": 0.33358,
-            "11": 0.33283,
-            "12": 0.33379,
-            "13": 0.33111,
-            "14": 0.3333,
-            "15": 0.33177,
-            "16": 0.33147,
-            "17": 0.33096,
-            "18": 0.33187,
-            "19": 0.33163,
-            "20": 0.33051,
-            "21": 0.33361,
-            "22": 0.32835,
-            "23": 0.32736,
-            "24": 0.32984,
-            "25": 0.32922,
-            "26": 0.32419,
-            "27": 0.32825,
-            "28": 0.33117,
-            "29": 0.32926,
-            "30": 0.32943,
-            "31": 0.33565,
-            "32": 0.33382,
-            "33": 0.33313,
-            "34": 0.33602,
-            "35": 0.32634,
-            "36": 0.33173,
-            "37": 0.33173,
-            "38": 0.33145,
-            "39": 0.32666,
-            "40": 0.33039,
-            "41": 0.3278,
-            "42": 0.32774,
-            "43": 0.33361,
-            "44": 0.32996,
-            "45": 0.32769,
-            "46": 0.3288,
-            "47": 0.33016,
-            "48": 0.33102,
-            "49": 0.33052,
-            "50": 0.33008
+            "1": 17.72917,
+            "2": 0.36269,
+            "3": 0.33585,
+            "4": 0.33878,
+            "5": 0.33758,
+            "6": 0.33453,
+            "7": 0.33628,
+            "8": 0.33416,
+            "9": 0.33309,
+            "10": 0.33521,
+            "11": 0.33536,
+            "12": 0.33148,
+            "13": 0.33565,
+            "14": 0.33401,
+            "15": 0.33029,
+            "16": 0.33788,
+            "17": 0.33302,
+            "18": 0.33337,
+            "19": 0.33761,
+            "20": 0.33672,
+            "21": 0.33256,
+            "22": 0.3374,
+            "23": 0.33652,
+            "24": 0.33672,
+            "25": 0.33982,
+            "26": 0.3335,
+            "27": 0.3328,
+            "28": 0.33835,
+            "29": 0.33338,
+            "30": 0.33371,
+            "31": 0.33991,
+            "32": 0.33259,
+            "33": 0.33537,
+            "34": 0.33777,
+            "35": 0.33494,
+            "36": 0.33504,
+            "37": 0.33915,
+            "38": 0.33462,
+            "39": 0.33387,
+            "40": 0.33791,
+            "41": 0.33426,
+            "42": 0.33834,
+            "43": 0.33785,
+            "44": 0.32761,
+            "45": 0.32857,
+            "46": 0.33205,
+            "47": 0.3355,
+            "48": 0.33535,
+            "49": 0.33792,
+            "50": 0.33613
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_dp_last/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_dp_last/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 3730bf58aa1..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_dp_last/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.86535,
-            "2": 10.85873,
-            "3": 10.86284,
-            "4": 10.84009,
-            "5": 10.87856,
-            "6": 10.88856,
-            "7": 10.86532,
-            "8": 10.86017,
-            "9": 10.8599,
-            "10": 10.82981,
-            "11": 10.8895,
-            "12": 10.8751,
-            "13": 10.87423,
-            "14": 10.89675,
-            "15": 10.82054,
-            "16": 10.82504,
-            "17": 10.78983,
-            "18": 10.81029,
-            "19": 10.80535,
-            "20": 10.70398,
-            "21": 10.66993,
-            "22": 10.50643,
-            "23": 10.69004,
-            "24": 10.56314,
-            "25": 10.4942,
-            "26": 10.56628,
-            "27": 10.58025,
-            "28": 10.51571,
-            "29": 10.55299,
-            "30": 10.30549,
-            "31": 10.02245,
-            "32": 10.40614,
-            "33": 10.39874,
-            "34": 10.13771,
-            "35": 10.20184,
-            "36": 10.16052,
-            "37": 10.28973,
-            "38": 10.11474,
-            "39": 10.361,
-            "40": 10.01903,
-            "41": 10.07292,
-            "42": 10.14698,
-            "43": 9.74687,
-            "44": 9.87766,
-            "45": 9.74966,
-            "46": 9.73383,
-            "47": 10.07535,
-            "48": 9.78068,
-            "49": 9.44784,
-            "50": 9.8399
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 653.0,
-            "2": 642.0,
-            "3": 630.0,
-            "4": 585.0,
-            "5": 635.0,
-            "6": 687.0,
-            "7": 615.0,
-            "8": 601.0,
-            "9": 607.0,
-            "10": 522.0,
-            "11": 637.0,
-            "12": 675.0,
-            "13": 649.0,
-            "14": 648.0,
-            "15": 640.0,
-            "16": 602.0,
-            "17": 668.0,
-            "18": 634.0,
-            "19": 593.0,
-            "20": 579.0,
-            "21": 633.0,
-            "22": 597.0,
-            "23": 756.0,
-            "24": 612.0,
-            "25": 591.0,
-            "26": 620.0,
-            "27": 700.0,
-            "28": 705.0,
-            "29": 795.0,
-            "30": 752.0,
-            "31": 628.0,
-            "32": 712.0,
-            "33": 752.0,
-            "34": 737.0,
-            "35": 741.0,
-            "36": 770.0,
-            "37": 861.0,
-            "38": 823.0,
-            "39": 812.0,
-            "40": 814.0,
-            "41": 826.0,
-            "42": 801.0,
-            "43": 769.0,
-            "44": 822.0,
-            "45": 777.0,
-            "46": 828.0,
-            "47": 878.0,
-            "48": 915.0,
-            "49": 908.0,
-            "50": 848.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 510689792.0,
-            "2": 510689792.0,
-            "3": 510689792.0,
-            "4": 510689792.0,
-            "5": 510689792.0,
-            "6": 510689792.0,
-            "7": 510689792.0,
-            "8": 510689792.0,
-            "9": 510689792.0,
-            "10": 510689792.0,
-            "11": 510689792.0,
-            "12": 510689792.0,
-            "13": 510689792.0,
-            "14": 510689792.0,
-            "15": 510689792.0,
-            "16": 510689792.0,
-            "17": 510689792.0,
-            "18": 510689792.0,
-            "19": 510689792.0,
-            "20": 510689792.0,
-            "21": 510689792.0,
-            "22": 510689792.0,
-            "23": 510689792.0,
-            "24": 510689792.0,
-            "25": 510689792.0,
-            "26": 510689792.0,
-            "27": 510689792.0,
-            "28": 510689792.0,
-            "29": 510689792.0,
-            "30": 510689792.0,
-            "31": 510689792.0,
-            "32": 510689792.0,
-            "33": 510689792.0,
-            "34": 510689792.0,
-            "35": 510689792.0,
-            "36": 510689792.0,
-            "37": 510689792.0,
-            "38": 510689792.0,
-            "39": 510689792.0,
-            "40": 510689792.0,
-            "41": 510689792.0,
-            "42": 510689792.0,
-            "43": 510689792.0,
-            "44": 510689792.0,
-            "45": 510689792.0,
-            "46": 510689792.0,
-            "47": 510689792.0,
-            "48": 510689792.0,
-            "49": 510689792.0,
-            "50": 510689792.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 759895552.0,
-            "2": 933156352.0,
-            "3": 933156352.0,
-            "4": 933156352.0,
-            "5": 933156352.0,
-            "6": 933156352.0,
-            "7": 933156352.0,
-            "8": 933156352.0,
-            "9": 933156352.0,
-            "10": 933156352.0,
-            "11": 933156352.0,
-            "12": 933156352.0,
-            "13": 933156352.0,
-            "14": 933156352.0,
-            "15": 933156352.0,
-            "16": 933156352.0,
-            "17": 933156352.0,
-            "18": 933156352.0,
-            "19": 933156352.0,
-            "20": 933156352.0,
-            "21": 933156352.0,
-            "22": 933156352.0,
-            "23": 933156352.0,
-            "24": 933156352.0,
-            "25": 933156352.0,
-            "26": 933156352.0,
-            "27": 933156352.0,
-            "28": 933156352.0,
-            "29": 933156352.0,
-            "30": 933156352.0,
-            "31": 933156352.0,
-            "32": 933156352.0,
-            "33": 933156352.0,
-            "34": 933156352.0,
-            "35": 933156352.0,
-            "36": 933156352.0,
-            "37": 933156352.0,
-            "38": 934203392.0,
-            "39": 934203392.0,
-            "40": 934203392.0,
-            "41": 934203392.0,
-            "42": 934203392.0,
-            "43": 934203392.0,
-            "44": 934203392.0,
-            "45": 934203392.0,
-            "46": 934203392.0,
-            "47": 934203392.0,
-            "48": 934203392.0,
-            "49": 934203392.0,
-            "50": 934203392.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 18.70462,
-            "2": 0.49178,
-            "3": 0.30373,
-            "4": 0.3001,
-            "5": 0.29469,
-            "6": 0.29224,
-            "7": 0.29428,
-            "8": 0.29177,
-            "9": 0.2949,
-            "10": 0.29498,
-            "11": 0.29024,
-            "12": 0.28647,
-            "13": 0.29815,
-            "14": 0.28835,
-            "15": 0.28856,
-            "16": 0.29348,
-            "17": 0.28749,
-            "18": 0.28567,
-            "19": 0.28368,
-            "20": 0.29149,
-            "21": 0.29096,
-            "22": 0.28857,
-            "23": 0.28606,
-            "24": 0.29136,
-            "25": 0.29054,
-            "26": 0.28694,
-            "27": 0.28152,
-            "28": 0.28851,
-            "29": 0.28838,
-            "30": 0.2819,
-            "31": 0.29168,
-            "32": 0.28475,
-            "33": 0.28928,
-            "34": 0.32279,
-            "35": 0.28586,
-            "36": 0.2887,
-            "37": 0.2901,
-            "38": 0.29895,
-            "39": 0.28981,
-            "40": 0.28651,
-            "41": 0.30755,
-            "42": 0.3078,
-            "43": 0.30107,
-            "44": 0.28402,
-            "45": 0.28696,
-            "46": 0.28819,
-            "47": 0.2889,
-            "48": 0.28688,
-            "49": 0.28638,
-            "50": 0.28429
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_dp_last/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_dp_last/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index cd45ff021d9..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_dp_last/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.86535,
-            "2": 10.85873,
-            "3": 10.86284,
-            "4": 10.84009,
-            "5": 10.87856,
-            "6": 10.88856,
-            "7": 10.86532,
-            "8": 10.86017,
-            "9": 10.8599,
-            "10": 10.82981,
-            "11": 10.8895,
-            "12": 10.8751,
-            "13": 10.87423,
-            "14": 10.89675,
-            "15": 10.82054,
-            "16": 10.82504,
-            "17": 10.78983,
-            "18": 10.81029,
-            "19": 10.80535,
-            "20": 10.70398,
-            "21": 10.66993,
-            "22": 10.50643,
-            "23": 10.69004,
-            "24": 10.56314,
-            "25": 10.4942,
-            "26": 10.56628,
-            "27": 10.58025,
-            "28": 10.51571,
-            "29": 10.55299,
-            "30": 10.30549,
-            "31": 10.02245,
-            "32": 10.40614,
-            "33": 10.39874,
-            "34": 10.13771,
-            "35": 10.20184,
-            "36": 10.16052,
-            "37": 10.28973,
-            "38": 10.11474,
-            "39": 10.361,
-            "40": 10.01903,
-            "41": 10.07292,
-            "42": 10.14698,
-            "43": 9.74687,
-            "44": 9.87766,
-            "45": 9.74966,
-            "46": 9.73383,
-            "47": 10.07535,
-            "48": 9.78068,
-            "49": 9.44784,
-            "50": 9.8399
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 653.0,
-            "2": 642.0,
-            "3": 630.0,
-            "4": 585.0,
-            "5": 635.0,
-            "6": 687.0,
-            "7": 615.0,
-            "8": 601.0,
-            "9": 607.0,
-            "10": 522.0,
-            "11": 637.0,
-            "12": 675.0,
-            "13": 649.0,
-            "14": 648.0,
-            "15": 640.0,
-            "16": 602.0,
-            "17": 668.0,
-            "18": 634.0,
-            "19": 593.0,
-            "20": 579.0,
-            "21": 633.0,
-            "22": 597.0,
-            "23": 756.0,
-            "24": 612.0,
-            "25": 591.0,
-            "26": 620.0,
-            "27": 700.0,
-            "28": 705.0,
-            "29": 795.0,
-            "30": 752.0,
-            "31": 628.0,
-            "32": 712.0,
-            "33": 752.0,
-            "34": 737.0,
-            "35": 741.0,
-            "36": 770.0,
-            "37": 861.0,
-            "38": 823.0,
-            "39": 812.0,
-            "40": 814.0,
-            "41": 826.0,
-            "42": 801.0,
-            "43": 769.0,
-            "44": 822.0,
-            "45": 777.0,
-            "46": 828.0,
-            "47": 878.0,
-            "48": 915.0,
-            "49": 908.0,
-            "50": 848.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 510689792.0,
-            "2": 510689792.0,
-            "3": 510689792.0,
-            "4": 510689792.0,
-            "5": 510689792.0,
-            "6": 510689792.0,
-            "7": 510689792.0,
-            "8": 510689792.0,
-            "9": 510689792.0,
-            "10": 510689792.0,
-            "11": 510689792.0,
-            "12": 510689792.0,
-            "13": 510689792.0,
-            "14": 510689792.0,
-            "15": 510689792.0,
-            "16": 510689792.0,
-            "17": 510689792.0,
-            "18": 510689792.0,
-            "19": 510689792.0,
-            "20": 510689792.0,
-            "21": 510689792.0,
-            "22": 510689792.0,
-            "23": 510689792.0,
-            "24": 510689792.0,
-            "25": 510689792.0,
-            "26": 510689792.0,
-            "27": 510689792.0,
-            "28": 510689792.0,
-            "29": 510689792.0,
-            "30": 510689792.0,
-            "31": 510689792.0,
-            "32": 510689792.0,
-            "33": 510689792.0,
-            "34": 510689792.0,
-            "35": 510689792.0,
-            "36": 510689792.0,
-            "37": 510689792.0,
-            "38": 510689792.0,
-            "39": 510689792.0,
-            "40": 510689792.0,
-            "41": 510689792.0,
-            "42": 510689792.0,
-            "43": 510689792.0,
-            "44": 510689792.0,
-            "45": 510689792.0,
-            "46": 510689792.0,
-            "47": 510689792.0,
-            "48": 510689792.0,
-            "49": 510689792.0,
-            "50": 510689792.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 759895552.0,
-            "2": 933156352.0,
-            "3": 933156352.0,
-            "4": 933156352.0,
-            "5": 933156352.0,
-            "6": 933156352.0,
-            "7": 933156352.0,
-            "8": 933156352.0,
-            "9": 933156352.0,
-            "10": 933156352.0,
-            "11": 933156352.0,
-            "12": 933156352.0,
-            "13": 933156352.0,
-            "14": 933156352.0,
-            "15": 933156352.0,
-            "16": 933156352.0,
-            "17": 933156352.0,
-            "18": 933156352.0,
-            "19": 933156352.0,
-            "20": 933156352.0,
-            "21": 933156352.0,
-            "22": 933156352.0,
-            "23": 933156352.0,
-            "24": 933156352.0,
-            "25": 933156352.0,
-            "26": 933156352.0,
-            "27": 933156352.0,
-            "28": 933156352.0,
-            "29": 933156352.0,
-            "30": 933156352.0,
-            "31": 933156352.0,
-            "32": 933156352.0,
-            "33": 934201856.0,
-            "34": 934201856.0,
-            "35": 934201856.0,
-            "36": 934201856.0,
-            "37": 934201856.0,
-            "38": 934201856.0,
-            "39": 934201856.0,
-            "40": 934201856.0,
-            "41": 934201856.0,
-            "42": 934201856.0,
-            "43": 934201856.0,
-            "44": 934201856.0,
-            "45": 934201856.0,
-            "46": 934201856.0,
-            "47": 934201856.0,
-            "48": 934201856.0,
-            "49": 934201856.0,
-            "50": 934201856.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 17.72917,
-            "2": 0.36269,
-            "3": 0.33585,
-            "4": 0.33878,
-            "5": 0.33758,
-            "6": 0.33453,
-            "7": 0.33628,
-            "8": 0.33416,
-            "9": 0.33309,
-            "10": 0.33521,
-            "11": 0.33536,
-            "12": 0.33148,
-            "13": 0.33565,
-            "14": 0.33401,
-            "15": 0.33029,
-            "16": 0.33788,
-            "17": 0.33302,
-            "18": 0.33337,
-            "19": 0.33761,
-            "20": 0.33672,
-            "21": 0.33256,
-            "22": 0.3374,
-            "23": 0.33652,
-            "24": 0.33672,
-            "25": 0.33982,
-            "26": 0.3335,
-            "27": 0.3328,
-            "28": 0.33835,
-            "29": 0.33338,
-            "30": 0.33371,
-            "31": 0.33991,
-            "32": 0.33259,
-            "33": 0.33537,
-            "34": 0.33777,
-            "35": 0.33494,
-            "36": 0.33504,
-            "37": 0.33915,
-            "38": 0.33462,
-            "39": 0.33387,
-            "40": 0.33791,
-            "41": 0.33426,
-            "42": 0.33834,
-            "43": 0.33785,
-            "44": 0.32761,
-            "45": 0.32857,
-            "46": 0.33205,
-            "47": 0.3355,
-            "48": 0.33535,
-            "49": 0.33792,
-            "50": 0.33613
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last/golden_values_dev_dgx_h100.json
index 7f2dfc8b2bc..524007ed7d6 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last/golden_values_dev_dgx_h100.json
@@ -8,52 +8,52 @@
             "2": 10.85873,
             "3": 10.86283,
             "4": 10.84007,
-            "5": 10.87856,
-            "6": 10.88854,
-            "7": 10.86537,
-            "8": 10.86016,
-            "9": 10.85989,
-            "10": 10.82983,
-            "11": 10.88946,
-            "12": 10.8751,
-            "13": 10.87425,
-            "14": 10.89673,
+            "5": 10.87854,
+            "6": 10.88853,
+            "7": 10.86532,
+            "8": 10.8602,
+            "9": 10.85991,
+            "10": 10.82981,
+            "11": 10.8895,
+            "12": 10.87507,
+            "13": 10.87426,
+            "14": 10.89678,
             "15": 10.82054,
-            "16": 10.82498,
-            "17": 10.78981,
-            "18": 10.81028,
-            "19": 10.80532,
-            "20": 10.70399,
-            "21": 10.66989,
+            "16": 10.825,
+            "17": 10.7898,
+            "18": 10.8103,
+            "19": 10.80536,
+            "20": 10.70398,
+            "21": 10.66992,
             "22": 10.50644,
             "23": 10.69005,
-            "24": 10.56315,
-            "25": 10.49423,
-            "26": 10.56628,
-            "27": 10.58023,
-            "28": 10.51568,
-            "29": 10.55294,
+            "24": 10.5631,
+            "25": 10.49418,
+            "26": 10.56626,
+            "27": 10.58028,
+            "28": 10.51572,
+            "29": 10.55298,
             "30": 10.30549,
             "31": 10.02244,
-            "32": 10.40614,
-            "33": 10.39877,
-            "34": 10.13771,
-            "35": 10.20187,
-            "36": 10.16047,
-            "37": 10.28971,
-            "38": 10.11478,
-            "39": 10.36106,
-            "40": 10.01903,
-            "41": 10.0729,
-            "42": 10.14696,
-            "43": 9.74682,
-            "44": 9.87762,
-            "45": 9.74966,
-            "46": 9.73383,
-            "47": 10.07536,
+            "32": 10.40615,
+            "33": 10.3988,
+            "34": 10.13773,
+            "35": 10.20188,
+            "36": 10.1605,
+            "37": 10.28974,
+            "38": 10.11477,
+            "39": 10.36102,
+            "40": 10.01902,
+            "41": 10.07292,
+            "42": 10.14694,
+            "43": 9.74685,
+            "44": 9.87766,
+            "45": 9.74965,
+            "46": 9.73384,
+            "47": 10.07535,
             "48": 9.7807,
-            "49": 9.44779,
-            "50": 9.83987
+            "49": 9.44783,
+            "50": 9.83991
         }
     },
     "num-zeros": {
@@ -61,56 +61,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 603.0,
-            "2": 644.0,
-            "3": 642.0,
-            "4": 665.0,
-            "5": 647.0,
-            "6": 668.0,
-            "7": 615.0,
-            "8": 545.0,
-            "9": 591.0,
-            "10": 540.0,
-            "11": 689.0,
-            "12": 629.0,
-            "13": 696.0,
-            "14": 658.0,
-            "15": 592.0,
-            "16": 672.0,
-            "17": 674.0,
-            "18": 623.0,
-            "19": 635.0,
-            "20": 573.0,
-            "21": 651.0,
-            "22": 625.0,
-            "23": 761.0,
-            "24": 631.0,
-            "25": 593.0,
-            "26": 614.0,
-            "27": 646.0,
-            "28": 744.0,
-            "29": 756.0,
-            "30": 699.0,
-            "31": 600.0,
-            "32": 686.0,
-            "33": 777.0,
-            "34": 734.0,
-            "35": 765.0,
-            "36": 763.0,
-            "37": 876.0,
-            "38": 802.0,
-            "39": 832.0,
-            "40": 788.0,
-            "41": 811.0,
-            "42": 850.0,
-            "43": 765.0,
-            "44": 854.0,
-            "45": 853.0,
-            "46": 878.0,
-            "47": 862.0,
-            "48": 881.0,
-            "49": 859.0,
-            "50": 919.0
+            "1": 647.0,
+            "2": 614.0,
+            "3": 640.0,
+            "4": 603.0,
+            "5": 600.0,
+            "6": 683.0,
+            "7": 630.0,
+            "8": 565.0,
+            "9": 671.0,
+            "10": 531.0,
+            "11": 670.0,
+            "12": 643.0,
+            "13": 626.0,
+            "14": 635.0,
+            "15": 655.0,
+            "16": 643.0,
+            "17": 693.0,
+            "18": 634.0,
+            "19": 648.0,
+            "20": 644.0,
+            "21": 690.0,
+            "22": 606.0,
+            "23": 694.0,
+            "24": 565.0,
+            "25": 605.0,
+            "26": 636.0,
+            "27": 638.0,
+            "28": 721.0,
+            "29": 750.0,
+            "30": 760.0,
+            "31": 572.0,
+            "32": 705.0,
+            "33": 816.0,
+            "34": 737.0,
+            "35": 720.0,
+            "36": 710.0,
+            "37": 862.0,
+            "38": 763.0,
+            "39": 909.0,
+            "40": 795.0,
+            "41": 776.0,
+            "42": 858.0,
+            "43": 771.0,
+            "44": 858.0,
+            "45": 857.0,
+            "46": 864.0,
+            "47": 880.0,
+            "48": 923.0,
+            "49": 899.0,
+            "50": 868.0
         }
     },
     "mem-allocated-bytes": {
@@ -175,7 +175,7 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 759895552.0,
+            "1": 757801472.0,
             "2": 933156352.0,
             "3": 933156352.0,
             "4": 933156352.0,
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 17.48669,
-            "2": 0.35686,
-            "3": 0.33796,
-            "4": 0.33709,
-            "5": 0.33802,
-            "6": 0.33381,
-            "7": 0.33842,
-            "8": 0.3348,
-            "9": 0.33686,
-            "10": 0.3401,
-            "11": 0.34206,
-            "12": 0.33741,
-            "13": 0.34235,
-            "14": 0.33743,
-            "15": 0.34813,
-            "16": 0.342,
-            "17": 0.33354,
-            "18": 0.33386,
-            "19": 0.32453,
-            "20": 0.31766,
-            "21": 0.31357,
-            "22": 0.3174,
-            "23": 0.31757,
-            "24": 0.31831,
-            "25": 0.3365,
-            "26": 0.33734,
-            "27": 0.33686,
-            "28": 0.32433,
-            "29": 0.3211,
-            "30": 0.31641,
-            "31": 0.32085,
-            "32": 0.32356,
-            "33": 0.31983,
-            "34": 0.31994,
-            "35": 0.32561,
-            "36": 0.3216,
-            "37": 0.31934,
-            "38": 0.31931,
-            "39": 0.32259,
-            "40": 0.31785,
-            "41": 0.321,
-            "42": 0.32432,
-            "43": 0.32102,
-            "44": 0.31762,
-            "45": 0.32401,
-            "46": 0.32061,
-            "47": 0.3186,
-            "48": 0.32263,
-            "49": 0.31974,
-            "50": 0.31888
+            "1": 17.58309,
+            "2": 0.34736,
+            "3": 0.32683,
+            "4": 0.3279,
+            "5": 0.32934,
+            "6": 0.33179,
+            "7": 0.3281,
+            "8": 0.3324,
+            "9": 0.32989,
+            "10": 0.32742,
+            "11": 0.33009,
+            "12": 0.3345,
+            "13": 0.33455,
+            "14": 0.3346,
+            "15": 0.33747,
+            "16": 0.33625,
+            "17": 0.3454,
+            "18": 0.33586,
+            "19": 0.33227,
+            "20": 0.33242,
+            "21": 0.33093,
+            "22": 0.33378,
+            "23": 0.33439,
+            "24": 0.33159,
+            "25": 0.32826,
+            "26": 0.33259,
+            "27": 0.33154,
+            "28": 0.32855,
+            "29": 0.32973,
+            "30": 0.33267,
+            "31": 0.33156,
+            "32": 0.32832,
+            "33": 0.33304,
+            "34": 0.32817,
+            "35": 0.32993,
+            "36": 0.33154,
+            "37": 0.32842,
+            "38": 0.32508,
+            "39": 0.33067,
+            "40": 0.33115,
+            "41": 0.32719,
+            "42": 0.33205,
+            "43": 0.3472,
+            "44": 0.33564,
+            "45": 0.33202,
+            "46": 0.33051,
+            "47": 0.32871,
+            "48": 0.33055,
+            "49": 0.33399,
+            "50": 0.33114
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 5c64711360d..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.86535,
-            "2": 10.85873,
-            "3": 10.86283,
-            "4": 10.84006,
-            "5": 10.87853,
-            "6": 10.88852,
-            "7": 10.86537,
-            "8": 10.86018,
-            "9": 10.85991,
-            "10": 10.82984,
-            "11": 10.88948,
-            "12": 10.87506,
-            "13": 10.87427,
-            "14": 10.8968,
-            "15": 10.82052,
-            "16": 10.82498,
-            "17": 10.78984,
-            "18": 10.8103,
-            "19": 10.80531,
-            "20": 10.70396,
-            "21": 10.66991,
-            "22": 10.50642,
-            "23": 10.69005,
-            "24": 10.56311,
-            "25": 10.49418,
-            "26": 10.56624,
-            "27": 10.58025,
-            "28": 10.51574,
-            "29": 10.55295,
-            "30": 10.3055,
-            "31": 10.0225,
-            "32": 10.40617,
-            "33": 10.39874,
-            "34": 10.13767,
-            "35": 10.20188,
-            "36": 10.16051,
-            "37": 10.28971,
-            "38": 10.11484,
-            "39": 10.361,
-            "40": 10.01901,
-            "41": 10.07292,
-            "42": 10.14698,
-            "43": 9.74684,
-            "44": 9.87759,
-            "45": 9.74966,
-            "46": 9.73384,
-            "47": 10.07536,
-            "48": 9.78071,
-            "49": 9.44782,
-            "50": 9.83988
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 597.0,
-            "2": 639.0,
-            "3": 612.0,
-            "4": 595.0,
-            "5": 633.0,
-            "6": 679.0,
-            "7": 626.0,
-            "8": 555.0,
-            "9": 700.0,
-            "10": 529.0,
-            "11": 658.0,
-            "12": 622.0,
-            "13": 660.0,
-            "14": 622.0,
-            "15": 690.0,
-            "16": 639.0,
-            "17": 671.0,
-            "18": 653.0,
-            "19": 595.0,
-            "20": 584.0,
-            "21": 656.0,
-            "22": 560.0,
-            "23": 743.0,
-            "24": 616.0,
-            "25": 626.0,
-            "26": 623.0,
-            "27": 680.0,
-            "28": 680.0,
-            "29": 750.0,
-            "30": 690.0,
-            "31": 560.0,
-            "32": 794.0,
-            "33": 753.0,
-            "34": 693.0,
-            "35": 696.0,
-            "36": 760.0,
-            "37": 852.0,
-            "38": 792.0,
-            "39": 849.0,
-            "40": 773.0,
-            "41": 842.0,
-            "42": 798.0,
-            "43": 732.0,
-            "44": 751.0,
-            "45": 788.0,
-            "46": 834.0,
-            "47": 853.0,
-            "48": 888.0,
-            "49": 919.0,
-            "50": 813.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 510689792.0,
-            "2": 510689792.0,
-            "3": 510689792.0,
-            "4": 510689792.0,
-            "5": 510689792.0,
-            "6": 510689792.0,
-            "7": 510689792.0,
-            "8": 510689792.0,
-            "9": 510689792.0,
-            "10": 510689792.0,
-            "11": 510689792.0,
-            "12": 510689792.0,
-            "13": 510689792.0,
-            "14": 510689792.0,
-            "15": 510689792.0,
-            "16": 510689792.0,
-            "17": 510689792.0,
-            "18": 510689792.0,
-            "19": 510689792.0,
-            "20": 510689792.0,
-            "21": 510689792.0,
-            "22": 510689792.0,
-            "23": 510689792.0,
-            "24": 510689792.0,
-            "25": 510689792.0,
-            "26": 510689792.0,
-            "27": 510689792.0,
-            "28": 510689792.0,
-            "29": 510689792.0,
-            "30": 510689792.0,
-            "31": 510689792.0,
-            "32": 510689792.0,
-            "33": 510689792.0,
-            "34": 510689792.0,
-            "35": 510689792.0,
-            "36": 510689792.0,
-            "37": 510689792.0,
-            "38": 510689792.0,
-            "39": 510689792.0,
-            "40": 510689792.0,
-            "41": 510689792.0,
-            "42": 510689792.0,
-            "43": 510689792.0,
-            "44": 510689792.0,
-            "45": 510689792.0,
-            "46": 510689792.0,
-            "47": 510689792.0,
-            "48": 510689792.0,
-            "49": 510689792.0,
-            "50": 510689792.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 759895552.0,
-            "2": 933156352.0,
-            "3": 933156352.0,
-            "4": 933156352.0,
-            "5": 933156352.0,
-            "6": 933156352.0,
-            "7": 933156352.0,
-            "8": 933156352.0,
-            "9": 933156352.0,
-            "10": 934204928.0,
-            "11": 934204928.0,
-            "12": 934204928.0,
-            "13": 934204928.0,
-            "14": 934204928.0,
-            "15": 934204928.0,
-            "16": 934204928.0,
-            "17": 934204928.0,
-            "18": 934204928.0,
-            "19": 934204928.0,
-            "20": 934204928.0,
-            "21": 934204928.0,
-            "22": 934204928.0,
-            "23": 934204928.0,
-            "24": 934204928.0,
-            "25": 934204928.0,
-            "26": 934204928.0,
-            "27": 934204928.0,
-            "28": 934204928.0,
-            "29": 934204928.0,
-            "30": 934204928.0,
-            "31": 934204928.0,
-            "32": 934204928.0,
-            "33": 934204928.0,
-            "34": 934204928.0,
-            "35": 934204928.0,
-            "36": 934204928.0,
-            "37": 934204928.0,
-            "38": 934204928.0,
-            "39": 934204928.0,
-            "40": 934204928.0,
-            "41": 934204928.0,
-            "42": 934204928.0,
-            "43": 934204928.0,
-            "44": 934204928.0,
-            "45": 934204928.0,
-            "46": 934204928.0,
-            "47": 934204928.0,
-            "48": 934204928.0,
-            "49": 934204928.0,
-            "50": 934204928.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 18.56725,
-            "2": 0.36563,
-            "3": 0.29793,
-            "4": 0.29146,
-            "5": 0.29688,
-            "6": 0.29337,
-            "7": 0.29262,
-            "8": 0.28985,
-            "9": 0.29835,
-            "10": 0.32046,
-            "11": 0.28909,
-            "12": 0.29047,
-            "13": 0.29281,
-            "14": 0.29357,
-            "15": 0.29127,
-            "16": 0.29335,
-            "17": 0.29304,
-            "18": 0.29416,
-            "19": 0.29357,
-            "20": 0.29492,
-            "21": 0.28986,
-            "22": 0.29152,
-            "23": 0.29187,
-            "24": 0.29293,
-            "25": 0.28805,
-            "26": 0.28928,
-            "27": 0.28866,
-            "28": 0.29096,
-            "29": 0.28896,
-            "30": 0.2822,
-            "31": 0.31729,
-            "32": 0.28381,
-            "33": 0.28187,
-            "34": 0.28158,
-            "35": 0.28315,
-            "36": 0.28905,
-            "37": 0.28877,
-            "38": 0.29206,
-            "39": 0.28679,
-            "40": 0.28818,
-            "41": 0.28755,
-            "42": 0.28911,
-            "43": 0.28782,
-            "44": 0.28493,
-            "45": 0.28392,
-            "46": 0.28061,
-            "47": 0.29507,
-            "48": 0.28442,
-            "49": 0.28204,
-            "50": 0.28301
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index 524007ed7d6..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.86535,
-            "2": 10.85873,
-            "3": 10.86283,
-            "4": 10.84007,
-            "5": 10.87854,
-            "6": 10.88853,
-            "7": 10.86532,
-            "8": 10.8602,
-            "9": 10.85991,
-            "10": 10.82981,
-            "11": 10.8895,
-            "12": 10.87507,
-            "13": 10.87426,
-            "14": 10.89678,
-            "15": 10.82054,
-            "16": 10.825,
-            "17": 10.7898,
-            "18": 10.8103,
-            "19": 10.80536,
-            "20": 10.70398,
-            "21": 10.66992,
-            "22": 10.50644,
-            "23": 10.69005,
-            "24": 10.5631,
-            "25": 10.49418,
-            "26": 10.56626,
-            "27": 10.58028,
-            "28": 10.51572,
-            "29": 10.55298,
-            "30": 10.30549,
-            "31": 10.02244,
-            "32": 10.40615,
-            "33": 10.3988,
-            "34": 10.13773,
-            "35": 10.20188,
-            "36": 10.1605,
-            "37": 10.28974,
-            "38": 10.11477,
-            "39": 10.36102,
-            "40": 10.01902,
-            "41": 10.07292,
-            "42": 10.14694,
-            "43": 9.74685,
-            "44": 9.87766,
-            "45": 9.74965,
-            "46": 9.73384,
-            "47": 10.07535,
-            "48": 9.7807,
-            "49": 9.44783,
-            "50": 9.83991
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 647.0,
-            "2": 614.0,
-            "3": 640.0,
-            "4": 603.0,
-            "5": 600.0,
-            "6": 683.0,
-            "7": 630.0,
-            "8": 565.0,
-            "9": 671.0,
-            "10": 531.0,
-            "11": 670.0,
-            "12": 643.0,
-            "13": 626.0,
-            "14": 635.0,
-            "15": 655.0,
-            "16": 643.0,
-            "17": 693.0,
-            "18": 634.0,
-            "19": 648.0,
-            "20": 644.0,
-            "21": 690.0,
-            "22": 606.0,
-            "23": 694.0,
-            "24": 565.0,
-            "25": 605.0,
-            "26": 636.0,
-            "27": 638.0,
-            "28": 721.0,
-            "29": 750.0,
-            "30": 760.0,
-            "31": 572.0,
-            "32": 705.0,
-            "33": 816.0,
-            "34": 737.0,
-            "35": 720.0,
-            "36": 710.0,
-            "37": 862.0,
-            "38": 763.0,
-            "39": 909.0,
-            "40": 795.0,
-            "41": 776.0,
-            "42": 858.0,
-            "43": 771.0,
-            "44": 858.0,
-            "45": 857.0,
-            "46": 864.0,
-            "47": 880.0,
-            "48": 923.0,
-            "49": 899.0,
-            "50": 868.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 510689792.0,
-            "2": 510689792.0,
-            "3": 510689792.0,
-            "4": 510689792.0,
-            "5": 510689792.0,
-            "6": 510689792.0,
-            "7": 510689792.0,
-            "8": 510689792.0,
-            "9": 510689792.0,
-            "10": 510689792.0,
-            "11": 510689792.0,
-            "12": 510689792.0,
-            "13": 510689792.0,
-            "14": 510689792.0,
-            "15": 510689792.0,
-            "16": 510689792.0,
-            "17": 510689792.0,
-            "18": 510689792.0,
-            "19": 510689792.0,
-            "20": 510689792.0,
-            "21": 510689792.0,
-            "22": 510689792.0,
-            "23": 510689792.0,
-            "24": 510689792.0,
-            "25": 510689792.0,
-            "26": 510689792.0,
-            "27": 510689792.0,
-            "28": 510689792.0,
-            "29": 510689792.0,
-            "30": 510689792.0,
-            "31": 510689792.0,
-            "32": 510689792.0,
-            "33": 510689792.0,
-            "34": 510689792.0,
-            "35": 510689792.0,
-            "36": 510689792.0,
-            "37": 510689792.0,
-            "38": 510689792.0,
-            "39": 510689792.0,
-            "40": 510689792.0,
-            "41": 510689792.0,
-            "42": 510689792.0,
-            "43": 510689792.0,
-            "44": 510689792.0,
-            "45": 510689792.0,
-            "46": 510689792.0,
-            "47": 510689792.0,
-            "48": 510689792.0,
-            "49": 510689792.0,
-            "50": 510689792.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 757801472.0,
-            "2": 933156352.0,
-            "3": 933156352.0,
-            "4": 933156352.0,
-            "5": 933156352.0,
-            "6": 933156352.0,
-            "7": 933156352.0,
-            "8": 933156352.0,
-            "9": 933156352.0,
-            "10": 933156352.0,
-            "11": 933156352.0,
-            "12": 933156352.0,
-            "13": 933156352.0,
-            "14": 933156352.0,
-            "15": 933156352.0,
-            "16": 933156352.0,
-            "17": 933156352.0,
-            "18": 933156352.0,
-            "19": 933156352.0,
-            "20": 933156352.0,
-            "21": 933156352.0,
-            "22": 933156352.0,
-            "23": 933156352.0,
-            "24": 933156352.0,
-            "25": 933156352.0,
-            "26": 933156352.0,
-            "27": 933156352.0,
-            "28": 933156352.0,
-            "29": 933156352.0,
-            "30": 933156352.0,
-            "31": 933156352.0,
-            "32": 933156352.0,
-            "33": 933156352.0,
-            "34": 933156352.0,
-            "35": 933156352.0,
-            "36": 933156352.0,
-            "37": 933156352.0,
-            "38": 933156352.0,
-            "39": 933156352.0,
-            "40": 933156352.0,
-            "41": 933156352.0,
-            "42": 933156352.0,
-            "43": 933156352.0,
-            "44": 933156352.0,
-            "45": 933156352.0,
-            "46": 933156352.0,
-            "47": 933156352.0,
-            "48": 933156352.0,
-            "49": 933156352.0,
-            "50": 933156352.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 17.58309,
-            "2": 0.34736,
-            "3": 0.32683,
-            "4": 0.3279,
-            "5": 0.32934,
-            "6": 0.33179,
-            "7": 0.3281,
-            "8": 0.3324,
-            "9": 0.32989,
-            "10": 0.32742,
-            "11": 0.33009,
-            "12": 0.3345,
-            "13": 0.33455,
-            "14": 0.3346,
-            "15": 0.33747,
-            "16": 0.33625,
-            "17": 0.3454,
-            "18": 0.33586,
-            "19": 0.33227,
-            "20": 0.33242,
-            "21": 0.33093,
-            "22": 0.33378,
-            "23": 0.33439,
-            "24": 0.33159,
-            "25": 0.32826,
-            "26": 0.33259,
-            "27": 0.33154,
-            "28": 0.32855,
-            "29": 0.32973,
-            "30": 0.33267,
-            "31": 0.33156,
-            "32": 0.32832,
-            "33": 0.33304,
-            "34": 0.32817,
-            "35": 0.32993,
-            "36": 0.33154,
-            "37": 0.32842,
-            "38": 0.32508,
-            "39": 0.33067,
-            "40": 0.33115,
-            "41": 0.32719,
-            "42": 0.33205,
-            "43": 0.3472,
-            "44": 0.33564,
-            "45": 0.33202,
-            "46": 0.33051,
-            "47": 0.32871,
-            "48": 0.33055,
-            "49": 0.33399,
-            "50": 0.33114
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_nondeterministic/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_nondeterministic/golden_values_dev_dgx_h100.json
index bb6bba8ed0e..fb8e93ed571 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_nondeterministic/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_nondeterministic/golden_values_dev_dgx_h100.json
@@ -6,53 +6,53 @@
         "values": {
             "1": 10.86535,
             "2": 10.85873,
-            "3": 10.8628,
-            "4": 10.84009,
-            "5": 10.87853,
-            "6": 10.88854,
-            "7": 10.86533,
+            "3": 10.86285,
+            "4": 10.84011,
+            "5": 10.87856,
+            "6": 10.88852,
+            "7": 10.86536,
             "8": 10.86016,
-            "9": 10.85986,
-            "10": 10.82978,
-            "11": 10.88951,
+            "9": 10.85989,
+            "10": 10.82982,
+            "11": 10.88947,
             "12": 10.8751,
-            "13": 10.87423,
-            "14": 10.89676,
-            "15": 10.82054,
+            "13": 10.87425,
+            "14": 10.89675,
+            "15": 10.82051,
             "16": 10.82498,
-            "17": 10.78983,
-            "18": 10.8103,
-            "19": 10.80532,
-            "20": 10.70395,
-            "21": 10.66992,
-            "22": 10.50638,
-            "23": 10.69003,
-            "24": 10.5631,
-            "25": 10.4942,
-            "26": 10.56628,
-            "27": 10.58022,
-            "28": 10.51569,
-            "29": 10.55298,
-            "30": 10.30552,
+            "17": 10.78982,
+            "18": 10.81029,
+            "19": 10.80533,
+            "20": 10.70397,
+            "21": 10.66991,
+            "22": 10.50644,
+            "23": 10.69004,
+            "24": 10.56312,
+            "25": 10.49421,
+            "26": 10.56627,
+            "27": 10.58027,
+            "28": 10.51573,
+            "29": 10.553,
+            "30": 10.30549,
             "31": 10.02248,
             "32": 10.40616,
-            "33": 10.39876,
-            "34": 10.13775,
-            "35": 10.20182,
-            "36": 10.16045,
-            "37": 10.28971,
-            "38": 10.11479,
-            "39": 10.36102,
-            "40": 10.01903,
-            "41": 10.07292,
-            "42": 10.14694,
-            "43": 9.74688,
-            "44": 9.87761,
-            "45": 9.74964,
-            "46": 9.73382,
-            "47": 10.07536,
+            "33": 10.39874,
+            "34": 10.13771,
+            "35": 10.20187,
+            "36": 10.16049,
+            "37": 10.28975,
+            "38": 10.11483,
+            "39": 10.36101,
+            "40": 10.01902,
+            "41": 10.07289,
+            "42": 10.14695,
+            "43": 9.74689,
+            "44": 9.87763,
+            "45": 9.74967,
+            "46": 9.73381,
+            "47": 10.07535,
             "48": 9.78068,
-            "49": 9.44785,
+            "49": 9.44781,
             "50": 9.8399
         }
     },
@@ -61,56 +61,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 575.0,
-            "2": 661.0,
-            "3": 612.0,
-            "4": 601.0,
-            "5": 654.0,
-            "6": 680.0,
-            "7": 639.0,
-            "8": 567.0,
-            "9": 683.0,
-            "10": 559.0,
-            "11": 618.0,
-            "12": 620.0,
-            "13": 668.0,
-            "14": 681.0,
-            "15": 642.0,
-            "16": 637.0,
-            "17": 645.0,
-            "18": 610.0,
-            "19": 622.0,
-            "20": 611.0,
-            "21": 667.0,
-            "22": 590.0,
-            "23": 734.0,
-            "24": 615.0,
-            "25": 598.0,
-            "26": 634.0,
-            "27": 667.0,
-            "28": 675.0,
-            "29": 769.0,
-            "30": 715.0,
-            "31": 607.0,
-            "32": 763.0,
-            "33": 814.0,
-            "34": 694.0,
-            "35": 713.0,
-            "36": 780.0,
-            "37": 817.0,
-            "38": 759.0,
-            "39": 886.0,
-            "40": 790.0,
-            "41": 758.0,
-            "42": 895.0,
-            "43": 763.0,
-            "44": 846.0,
-            "45": 765.0,
-            "46": 822.0,
-            "47": 882.0,
-            "48": 890.0,
-            "49": 875.0,
-            "50": 829.0
+            "1": 625.0,
+            "2": 644.0,
+            "3": 614.0,
+            "4": 636.0,
+            "5": 605.0,
+            "6": 649.0,
+            "7": 606.0,
+            "8": 559.0,
+            "9": 658.0,
+            "10": 524.0,
+            "11": 693.0,
+            "12": 598.0,
+            "13": 702.0,
+            "14": 660.0,
+            "15": 638.0,
+            "16": 596.0,
+            "17": 662.0,
+            "18": 586.0,
+            "19": 594.0,
+            "20": 598.0,
+            "21": 656.0,
+            "22": 608.0,
+            "23": 706.0,
+            "24": 609.0,
+            "25": 610.0,
+            "26": 632.0,
+            "27": 664.0,
+            "28": 766.0,
+            "29": 765.0,
+            "30": 755.0,
+            "31": 606.0,
+            "32": 708.0,
+            "33": 775.0,
+            "34": 735.0,
+            "35": 729.0,
+            "36": 739.0,
+            "37": 840.0,
+            "38": 749.0,
+            "39": 911.0,
+            "40": 763.0,
+            "41": 830.0,
+            "42": 835.0,
+            "43": 755.0,
+            "44": 823.0,
+            "45": 799.0,
+            "46": 811.0,
+            "47": 869.0,
+            "48": 839.0,
+            "49": 897.0,
+            "50": 869.0
         }
     },
     "mem-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 759895552.0,
-            "2": 934203904.0,
-            "3": 934203904.0,
-            "4": 934203904.0,
-            "5": 934203904.0,
-            "6": 934203904.0,
-            "7": 934203904.0,
-            "8": 934203904.0,
-            "9": 934203904.0,
-            "10": 934203904.0,
-            "11": 934203904.0,
-            "12": 934203904.0,
-            "13": 934203904.0,
-            "14": 934203904.0,
-            "15": 934203904.0,
-            "16": 934203904.0,
-            "17": 934203904.0,
-            "18": 934203904.0,
-            "19": 934203904.0,
-            "20": 934203904.0,
-            "21": 934203904.0,
-            "22": 934203904.0,
-            "23": 934203904.0,
-            "24": 934203904.0,
-            "25": 934203904.0,
-            "26": 934203904.0,
-            "27": 934203904.0,
-            "28": 934203904.0,
-            "29": 934203904.0,
-            "30": 934203904.0,
-            "31": 934203904.0,
-            "32": 934203904.0,
-            "33": 934203904.0,
-            "34": 934203904.0,
-            "35": 934203904.0,
-            "36": 934203904.0,
-            "37": 934203904.0,
-            "38": 934203904.0,
-            "39": 934203904.0,
-            "40": 934203904.0,
-            "41": 934203904.0,
-            "42": 934203904.0,
-            "43": 934203904.0,
-            "44": 934203904.0,
-            "45": 934203904.0,
-            "46": 934203904.0,
-            "47": 934203904.0,
-            "48": 934203904.0,
-            "49": 934203904.0,
-            "50": 934203904.0
+            "1": 759898624.0,
+            "2": 933156352.0,
+            "3": 933156352.0,
+            "4": 934202368.0,
+            "5": 934202368.0,
+            "6": 934202368.0,
+            "7": 934202368.0,
+            "8": 934202368.0,
+            "9": 934202368.0,
+            "10": 934202368.0,
+            "11": 934202368.0,
+            "12": 934202368.0,
+            "13": 934202368.0,
+            "14": 934202368.0,
+            "15": 934202368.0,
+            "16": 934202368.0,
+            "17": 934202368.0,
+            "18": 934202368.0,
+            "19": 934202368.0,
+            "20": 934202368.0,
+            "21": 934202368.0,
+            "22": 934202368.0,
+            "23": 934202368.0,
+            "24": 934202368.0,
+            "25": 934202368.0,
+            "26": 934202368.0,
+            "27": 934202368.0,
+            "28": 934202368.0,
+            "29": 934202368.0,
+            "30": 934202368.0,
+            "31": 934202368.0,
+            "32": 934202368.0,
+            "33": 934202368.0,
+            "34": 934202368.0,
+            "35": 934202368.0,
+            "36": 934202368.0,
+            "37": 934202368.0,
+            "38": 934202368.0,
+            "39": 934202368.0,
+            "40": 934202368.0,
+            "41": 934202368.0,
+            "42": 934202368.0,
+            "43": 934202368.0,
+            "44": 934202368.0,
+            "45": 934202368.0,
+            "46": 934202368.0,
+            "47": 934202368.0,
+            "48": 934202368.0,
+            "49": 934202368.0,
+            "50": 934202368.0
         }
     },
     "iteration-time": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 15.70977,
-            "2": 0.39393,
-            "3": 0.33447,
-            "4": 0.34165,
-            "5": 0.33487,
-            "6": 0.33525,
-            "7": 0.33869,
-            "8": 0.33407,
-            "9": 0.32508,
-            "10": 0.32918,
-            "11": 0.32205,
-            "12": 0.32514,
-            "13": 0.32309,
-            "14": 0.32866,
-            "15": 0.32578,
-            "16": 0.32709,
-            "17": 0.32494,
-            "18": 0.3252,
-            "19": 0.32806,
-            "20": 0.32441,
-            "21": 0.32296,
-            "22": 0.32925,
-            "23": 0.32839,
-            "24": 0.32762,
-            "25": 0.33125,
-            "26": 0.3356,
-            "27": 0.32827,
-            "28": 0.32644,
-            "29": 0.32972,
-            "30": 0.32228,
-            "31": 0.3298,
-            "32": 0.32343,
-            "33": 0.32498,
-            "34": 0.32618,
-            "35": 0.32714,
-            "36": 0.32467,
-            "37": 0.32506,
-            "38": 0.32635,
-            "39": 0.3247,
-            "40": 0.32635,
-            "41": 0.32613,
-            "42": 0.32304,
-            "43": 0.32555,
-            "44": 0.32911,
-            "45": 0.3247,
-            "46": 0.32199,
-            "47": 0.32475,
-            "48": 0.32466,
-            "49": 0.32582,
-            "50": 0.32505
+            "1": 15.91359,
+            "2": 0.40136,
+            "3": 0.32913,
+            "4": 0.33946,
+            "5": 0.32404,
+            "6": 0.31963,
+            "7": 0.32283,
+            "8": 0.32302,
+            "9": 0.32004,
+            "10": 0.32058,
+            "11": 0.33128,
+            "12": 0.32725,
+            "13": 0.3253,
+            "14": 0.32532,
+            "15": 0.32194,
+            "16": 0.32237,
+            "17": 0.31946,
+            "18": 0.31937,
+            "19": 0.3185,
+            "20": 0.3193,
+            "21": 0.32216,
+            "22": 0.328,
+            "23": 0.32251,
+            "24": 0.32294,
+            "25": 0.32205,
+            "26": 0.32393,
+            "27": 0.32132,
+            "28": 0.32221,
+            "29": 0.32269,
+            "30": 0.32422,
+            "31": 0.32527,
+            "32": 0.32866,
+            "33": 0.32346,
+            "34": 0.32064,
+            "35": 0.3199,
+            "36": 0.32198,
+            "37": 0.32252,
+            "38": 0.32103,
+            "39": 0.32486,
+            "40": 0.32573,
+            "41": 0.32643,
+            "42": 0.3234,
+            "43": 0.32778,
+            "44": 0.32302,
+            "45": 0.32434,
+            "46": 0.32532,
+            "47": 0.32115,
+            "48": 0.31979,
+            "49": 0.3233,
+            "50": 0.31776
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_nondeterministic/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_nondeterministic/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 8e79ecc164b..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_nondeterministic/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.86535,
-            "2": 10.85873,
-            "3": 10.86283,
-            "4": 10.84004,
-            "5": 10.87856,
-            "6": 10.88851,
-            "7": 10.86535,
-            "8": 10.86016,
-            "9": 10.8599,
-            "10": 10.8298,
-            "11": 10.88949,
-            "12": 10.87507,
-            "13": 10.87424,
-            "14": 10.89675,
-            "15": 10.82057,
-            "16": 10.82503,
-            "17": 10.7898,
-            "18": 10.81025,
-            "19": 10.80535,
-            "20": 10.70398,
-            "21": 10.6699,
-            "22": 10.50643,
-            "23": 10.69004,
-            "24": 10.5631,
-            "25": 10.49418,
-            "26": 10.56626,
-            "27": 10.58022,
-            "28": 10.5157,
-            "29": 10.55297,
-            "30": 10.30551,
-            "31": 10.02249,
-            "32": 10.40617,
-            "33": 10.3988,
-            "34": 10.13771,
-            "35": 10.20187,
-            "36": 10.16052,
-            "37": 10.28969,
-            "38": 10.11482,
-            "39": 10.36105,
-            "40": 10.01899,
-            "41": 10.0729,
-            "42": 10.14695,
-            "43": 9.74686,
-            "44": 9.87766,
-            "45": 9.74967,
-            "46": 9.73385,
-            "47": 10.07539,
-            "48": 9.7807,
-            "49": 9.4478,
-            "50": 9.83992
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 594.0,
-            "2": 655.0,
-            "3": 626.0,
-            "4": 604.0,
-            "5": 612.0,
-            "6": 667.0,
-            "7": 653.0,
-            "8": 575.0,
-            "9": 673.0,
-            "10": 542.0,
-            "11": 672.0,
-            "12": 584.0,
-            "13": 616.0,
-            "14": 673.0,
-            "15": 695.0,
-            "16": 655.0,
-            "17": 640.0,
-            "18": 640.0,
-            "19": 637.0,
-            "20": 601.0,
-            "21": 680.0,
-            "22": 565.0,
-            "23": 706.0,
-            "24": 615.0,
-            "25": 603.0,
-            "26": 591.0,
-            "27": 653.0,
-            "28": 696.0,
-            "29": 781.0,
-            "30": 767.0,
-            "31": 608.0,
-            "32": 740.0,
-            "33": 839.0,
-            "34": 727.0,
-            "35": 729.0,
-            "36": 720.0,
-            "37": 821.0,
-            "38": 818.0,
-            "39": 826.0,
-            "40": 750.0,
-            "41": 855.0,
-            "42": 871.0,
-            "43": 719.0,
-            "44": 838.0,
-            "45": 761.0,
-            "46": 886.0,
-            "47": 852.0,
-            "48": 876.0,
-            "49": 905.0,
-            "50": 872.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 510689792.0,
-            "2": 510689792.0,
-            "3": 510689792.0,
-            "4": 510689792.0,
-            "5": 510689792.0,
-            "6": 510689792.0,
-            "7": 510689792.0,
-            "8": 510689792.0,
-            "9": 510689792.0,
-            "10": 510689792.0,
-            "11": 510689792.0,
-            "12": 510689792.0,
-            "13": 510689792.0,
-            "14": 510689792.0,
-            "15": 510689792.0,
-            "16": 510689792.0,
-            "17": 510689792.0,
-            "18": 510689792.0,
-            "19": 510689792.0,
-            "20": 510689792.0,
-            "21": 510689792.0,
-            "22": 510689792.0,
-            "23": 510689792.0,
-            "24": 510689792.0,
-            "25": 510689792.0,
-            "26": 510689792.0,
-            "27": 510689792.0,
-            "28": 510689792.0,
-            "29": 510689792.0,
-            "30": 510689792.0,
-            "31": 510689792.0,
-            "32": 510689792.0,
-            "33": 510689792.0,
-            "34": 510689792.0,
-            "35": 510689792.0,
-            "36": 510689792.0,
-            "37": 510689792.0,
-            "38": 510689792.0,
-            "39": 510689792.0,
-            "40": 510689792.0,
-            "41": 510689792.0,
-            "42": 510689792.0,
-            "43": 510689792.0,
-            "44": 510689792.0,
-            "45": 510689792.0,
-            "46": 510689792.0,
-            "47": 510689792.0,
-            "48": 510689792.0,
-            "49": 510689792.0,
-            "50": 510689792.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 759895552.0,
-            "2": 933156352.0,
-            "3": 933156352.0,
-            "4": 934202368.0,
-            "5": 934202368.0,
-            "6": 934202368.0,
-            "7": 934202368.0,
-            "8": 934202368.0,
-            "9": 934202368.0,
-            "10": 934202368.0,
-            "11": 934202368.0,
-            "12": 934202368.0,
-            "13": 934202368.0,
-            "14": 934202368.0,
-            "15": 934202368.0,
-            "16": 934202368.0,
-            "17": 934202368.0,
-            "18": 934202368.0,
-            "19": 934202368.0,
-            "20": 934202368.0,
-            "21": 934202368.0,
-            "22": 934202368.0,
-            "23": 934202368.0,
-            "24": 934202368.0,
-            "25": 934202368.0,
-            "26": 934202368.0,
-            "27": 934202368.0,
-            "28": 934202368.0,
-            "29": 934202368.0,
-            "30": 934202368.0,
-            "31": 934202368.0,
-            "32": 934202368.0,
-            "33": 934202368.0,
-            "34": 934202368.0,
-            "35": 934202368.0,
-            "36": 934202368.0,
-            "37": 934202368.0,
-            "38": 934202368.0,
-            "39": 934202368.0,
-            "40": 934202368.0,
-            "41": 934202368.0,
-            "42": 934202368.0,
-            "43": 934202368.0,
-            "44": 934202368.0,
-            "45": 934202368.0,
-            "46": 934202368.0,
-            "47": 934202368.0,
-            "48": 934202368.0,
-            "49": 934202368.0,
-            "50": 934202368.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 16.29804,
-            "2": 0.33247,
-            "3": 0.3002,
-            "4": 0.29387,
-            "5": 0.28202,
-            "6": 0.28144,
-            "7": 0.28667,
-            "8": 0.28202,
-            "9": 0.28668,
-            "10": 0.28475,
-            "11": 0.28037,
-            "12": 0.28061,
-            "13": 0.28479,
-            "14": 0.28709,
-            "15": 0.28259,
-            "16": 0.28648,
-            "17": 0.28752,
-            "18": 0.28427,
-            "19": 0.28253,
-            "20": 0.28216,
-            "21": 0.28394,
-            "22": 0.28202,
-            "23": 0.2842,
-            "24": 0.28848,
-            "25": 0.29137,
-            "26": 0.29314,
-            "27": 0.29412,
-            "28": 0.29477,
-            "29": 0.2847,
-            "30": 0.29036,
-            "31": 0.29596,
-            "32": 0.29187,
-            "33": 0.2913,
-            "34": 0.28636,
-            "35": 0.29547,
-            "36": 0.29476,
-            "37": 0.29213,
-            "38": 0.28835,
-            "39": 0.28597,
-            "40": 0.28573,
-            "41": 0.28673,
-            "42": 0.28864,
-            "43": 0.28774,
-            "44": 0.2871,
-            "45": 0.28744,
-            "46": 0.28594,
-            "47": 0.29182,
-            "48": 0.28838,
-            "49": 0.28221,
-            "50": 0.28369
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_nondeterministic/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_nondeterministic/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index fb8e93ed571..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_nondeterministic/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.86535,
-            "2": 10.85873,
-            "3": 10.86285,
-            "4": 10.84011,
-            "5": 10.87856,
-            "6": 10.88852,
-            "7": 10.86536,
-            "8": 10.86016,
-            "9": 10.85989,
-            "10": 10.82982,
-            "11": 10.88947,
-            "12": 10.8751,
-            "13": 10.87425,
-            "14": 10.89675,
-            "15": 10.82051,
-            "16": 10.82498,
-            "17": 10.78982,
-            "18": 10.81029,
-            "19": 10.80533,
-            "20": 10.70397,
-            "21": 10.66991,
-            "22": 10.50644,
-            "23": 10.69004,
-            "24": 10.56312,
-            "25": 10.49421,
-            "26": 10.56627,
-            "27": 10.58027,
-            "28": 10.51573,
-            "29": 10.553,
-            "30": 10.30549,
-            "31": 10.02248,
-            "32": 10.40616,
-            "33": 10.39874,
-            "34": 10.13771,
-            "35": 10.20187,
-            "36": 10.16049,
-            "37": 10.28975,
-            "38": 10.11483,
-            "39": 10.36101,
-            "40": 10.01902,
-            "41": 10.07289,
-            "42": 10.14695,
-            "43": 9.74689,
-            "44": 9.87763,
-            "45": 9.74967,
-            "46": 9.73381,
-            "47": 10.07535,
-            "48": 9.78068,
-            "49": 9.44781,
-            "50": 9.8399
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 625.0,
-            "2": 644.0,
-            "3": 614.0,
-            "4": 636.0,
-            "5": 605.0,
-            "6": 649.0,
-            "7": 606.0,
-            "8": 559.0,
-            "9": 658.0,
-            "10": 524.0,
-            "11": 693.0,
-            "12": 598.0,
-            "13": 702.0,
-            "14": 660.0,
-            "15": 638.0,
-            "16": 596.0,
-            "17": 662.0,
-            "18": 586.0,
-            "19": 594.0,
-            "20": 598.0,
-            "21": 656.0,
-            "22": 608.0,
-            "23": 706.0,
-            "24": 609.0,
-            "25": 610.0,
-            "26": 632.0,
-            "27": 664.0,
-            "28": 766.0,
-            "29": 765.0,
-            "30": 755.0,
-            "31": 606.0,
-            "32": 708.0,
-            "33": 775.0,
-            "34": 735.0,
-            "35": 729.0,
-            "36": 739.0,
-            "37": 840.0,
-            "38": 749.0,
-            "39": 911.0,
-            "40": 763.0,
-            "41": 830.0,
-            "42": 835.0,
-            "43": 755.0,
-            "44": 823.0,
-            "45": 799.0,
-            "46": 811.0,
-            "47": 869.0,
-            "48": 839.0,
-            "49": 897.0,
-            "50": 869.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 510689792.0,
-            "2": 510689792.0,
-            "3": 510689792.0,
-            "4": 510689792.0,
-            "5": 510689792.0,
-            "6": 510689792.0,
-            "7": 510689792.0,
-            "8": 510689792.0,
-            "9": 510689792.0,
-            "10": 510689792.0,
-            "11": 510689792.0,
-            "12": 510689792.0,
-            "13": 510689792.0,
-            "14": 510689792.0,
-            "15": 510689792.0,
-            "16": 510689792.0,
-            "17": 510689792.0,
-            "18": 510689792.0,
-            "19": 510689792.0,
-            "20": 510689792.0,
-            "21": 510689792.0,
-            "22": 510689792.0,
-            "23": 510689792.0,
-            "24": 510689792.0,
-            "25": 510689792.0,
-            "26": 510689792.0,
-            "27": 510689792.0,
-            "28": 510689792.0,
-            "29": 510689792.0,
-            "30": 510689792.0,
-            "31": 510689792.0,
-            "32": 510689792.0,
-            "33": 510689792.0,
-            "34": 510689792.0,
-            "35": 510689792.0,
-            "36": 510689792.0,
-            "37": 510689792.0,
-            "38": 510689792.0,
-            "39": 510689792.0,
-            "40": 510689792.0,
-            "41": 510689792.0,
-            "42": 510689792.0,
-            "43": 510689792.0,
-            "44": 510689792.0,
-            "45": 510689792.0,
-            "46": 510689792.0,
-            "47": 510689792.0,
-            "48": 510689792.0,
-            "49": 510689792.0,
-            "50": 510689792.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 759898624.0,
-            "2": 933156352.0,
-            "3": 933156352.0,
-            "4": 934202368.0,
-            "5": 934202368.0,
-            "6": 934202368.0,
-            "7": 934202368.0,
-            "8": 934202368.0,
-            "9": 934202368.0,
-            "10": 934202368.0,
-            "11": 934202368.0,
-            "12": 934202368.0,
-            "13": 934202368.0,
-            "14": 934202368.0,
-            "15": 934202368.0,
-            "16": 934202368.0,
-            "17": 934202368.0,
-            "18": 934202368.0,
-            "19": 934202368.0,
-            "20": 934202368.0,
-            "21": 934202368.0,
-            "22": 934202368.0,
-            "23": 934202368.0,
-            "24": 934202368.0,
-            "25": 934202368.0,
-            "26": 934202368.0,
-            "27": 934202368.0,
-            "28": 934202368.0,
-            "29": 934202368.0,
-            "30": 934202368.0,
-            "31": 934202368.0,
-            "32": 934202368.0,
-            "33": 934202368.0,
-            "34": 934202368.0,
-            "35": 934202368.0,
-            "36": 934202368.0,
-            "37": 934202368.0,
-            "38": 934202368.0,
-            "39": 934202368.0,
-            "40": 934202368.0,
-            "41": 934202368.0,
-            "42": 934202368.0,
-            "43": 934202368.0,
-            "44": 934202368.0,
-            "45": 934202368.0,
-            "46": 934202368.0,
-            "47": 934202368.0,
-            "48": 934202368.0,
-            "49": 934202368.0,
-            "50": 934202368.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 15.91359,
-            "2": 0.40136,
-            "3": 0.32913,
-            "4": 0.33946,
-            "5": 0.32404,
-            "6": 0.31963,
-            "7": 0.32283,
-            "8": 0.32302,
-            "9": 0.32004,
-            "10": 0.32058,
-            "11": 0.33128,
-            "12": 0.32725,
-            "13": 0.3253,
-            "14": 0.32532,
-            "15": 0.32194,
-            "16": 0.32237,
-            "17": 0.31946,
-            "18": 0.31937,
-            "19": 0.3185,
-            "20": 0.3193,
-            "21": 0.32216,
-            "22": 0.328,
-            "23": 0.32251,
-            "24": 0.32294,
-            "25": 0.32205,
-            "26": 0.32393,
-            "27": 0.32132,
-            "28": 0.32221,
-            "29": 0.32269,
-            "30": 0.32422,
-            "31": 0.32527,
-            "32": 0.32866,
-            "33": 0.32346,
-            "34": 0.32064,
-            "35": 0.3199,
-            "36": 0.32198,
-            "37": 0.32252,
-            "38": 0.32103,
-            "39": 0.32486,
-            "40": 0.32573,
-            "41": 0.32643,
-            "42": 0.3234,
-            "43": 0.32778,
-            "44": 0.32302,
-            "45": 0.32434,
-            "46": 0.32532,
-            "47": 0.32115,
-            "48": 0.31979,
-            "49": 0.3233,
-            "50": 0.31776
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion/golden_values_dev_dgx_h100.json
index 2fa70eac521..379b1c16f29 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion/golden_values_dev_dgx_h100.json
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 15.46989,
-            "2": 0.15818,
-            "3": 0.14336,
-            "4": 0.14305,
-            "5": 0.14285,
-            "6": 0.14415,
-            "7": 0.14655,
-            "8": 0.14457,
-            "9": 0.14518,
-            "10": 0.14657,
-            "11": 0.14517,
-            "12": 0.14486,
-            "13": 0.14388,
-            "14": 0.14419,
-            "15": 0.14463,
-            "16": 0.146,
-            "17": 0.14212,
-            "18": 0.14726,
-            "19": 0.14464,
-            "20": 0.14514,
-            "21": 0.14341,
-            "22": 0.14454,
-            "23": 0.14327,
-            "24": 0.14354,
-            "25": 0.14453,
-            "26": 0.14409,
-            "27": 0.14547,
-            "28": 0.14291,
-            "29": 0.14484,
-            "30": 0.1444,
-            "31": 0.14388,
-            "32": 0.14651,
-            "33": 0.14385,
-            "34": 0.14057,
-            "35": 0.14021,
-            "36": 0.14028,
-            "37": 0.13912,
-            "38": 0.13925,
-            "39": 0.14191,
-            "40": 0.14024,
-            "41": 0.14034,
-            "42": 0.14027,
-            "43": 0.14125,
-            "44": 0.14142,
-            "45": 0.14126,
-            "46": 0.14404,
-            "47": 0.1403,
-            "48": 0.14011,
-            "49": 0.14086,
-            "50": 0.13902
+            "1": 15.2683,
+            "2": 0.15358,
+            "3": 0.13619,
+            "4": 0.13976,
+            "5": 0.13713,
+            "6": 0.13753,
+            "7": 0.13575,
+            "8": 0.13485,
+            "9": 0.13779,
+            "10": 0.13697,
+            "11": 0.14178,
+            "12": 0.1397,
+            "13": 0.13744,
+            "14": 0.14039,
+            "15": 0.13739,
+            "16": 0.1361,
+            "17": 0.13816,
+            "18": 0.13722,
+            "19": 0.15342,
+            "20": 0.14613,
+            "21": 0.14806,
+            "22": 0.14423,
+            "23": 0.14791,
+            "24": 0.14345,
+            "25": 0.14474,
+            "26": 0.14564,
+            "27": 0.14168,
+            "28": 0.14148,
+            "29": 0.13863,
+            "30": 0.13751,
+            "31": 0.14015,
+            "32": 0.13821,
+            "33": 0.14038,
+            "34": 0.13859,
+            "35": 0.14531,
+            "36": 0.14468,
+            "37": 0.13783,
+            "38": 0.13787,
+            "39": 0.13879,
+            "40": 0.14072,
+            "41": 0.14065,
+            "42": 0.13865,
+            "43": 0.13953,
+            "44": 0.13882,
+            "45": 0.13622,
+            "46": 0.14034,
+            "47": 0.13659,
+            "48": 0.14369,
+            "49": 0.13987,
+            "50": 0.13803
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 9a1bfb0707b..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.85949,
-            "2": 10.85553,
-            "3": 10.86548,
-            "4": 10.84554,
-            "5": 10.88344,
-            "6": 10.89429,
-            "7": 10.87068,
-            "8": 10.86983,
-            "9": 10.86919,
-            "10": 10.83883,
-            "11": 10.89435,
-            "12": 10.8798,
-            "13": 10.87987,
-            "14": 10.90317,
-            "15": 10.8405,
-            "16": 10.83786,
-            "17": 10.80668,
-            "18": 10.83025,
-            "19": 10.82262,
-            "20": 10.73192,
-            "21": 10.7075,
-            "22": 10.56005,
-            "23": 10.72406,
-            "24": 10.61116,
-            "25": 10.5481,
-            "26": 10.61334,
-            "27": 10.6305,
-            "28": 10.56645,
-            "29": 10.59672,
-            "30": 10.37136,
-            "31": 10.11721,
-            "32": 10.46127,
-            "33": 10.45247,
-            "34": 10.21687,
-            "35": 10.27171,
-            "36": 10.2312,
-            "37": 10.34809,
-            "38": 10.18842,
-            "39": 10.41042,
-            "40": 10.09426,
-            "41": 10.14711,
-            "42": 10.21247,
-            "43": 9.84106,
-            "44": 9.95919,
-            "45": 9.84082,
-            "46": 9.82482,
-            "47": 10.13882,
-            "48": 9.85839,
-            "49": 9.5472,
-            "50": 9.90883
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1690.0,
-            "2": 1776.0,
-            "3": 1642.0,
-            "4": 1825.0,
-            "5": 1809.0,
-            "6": 1795.0,
-            "7": 1830.0,
-            "8": 1626.0,
-            "9": 1878.0,
-            "10": 1423.0,
-            "11": 1868.0,
-            "12": 1653.0,
-            "13": 1897.0,
-            "14": 1783.0,
-            "15": 1861.0,
-            "16": 1938.0,
-            "17": 1825.0,
-            "18": 1730.0,
-            "19": 1727.0,
-            "20": 1735.0,
-            "21": 1783.0,
-            "22": 1576.0,
-            "23": 1949.0,
-            "24": 1630.0,
-            "25": 1498.0,
-            "26": 1649.0,
-            "27": 1809.0,
-            "28": 2019.0,
-            "29": 2009.0,
-            "30": 1832.0,
-            "31": 1524.0,
-            "32": 1943.0,
-            "33": 2081.0,
-            "34": 1888.0,
-            "35": 1935.0,
-            "36": 1898.0,
-            "37": 2325.0,
-            "38": 2070.0,
-            "39": 2248.0,
-            "40": 2199.0,
-            "41": 2264.0,
-            "42": 2349.0,
-            "43": 2087.0,
-            "44": 2107.0,
-            "45": 2098.0,
-            "46": 2407.0,
-            "47": 2456.0,
-            "48": 2404.0,
-            "49": 2417.0,
-            "50": 2407.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 516194816.0,
-            "2": 516194816.0,
-            "3": 516194816.0,
-            "4": 516194816.0,
-            "5": 516194816.0,
-            "6": 516194816.0,
-            "7": 516194816.0,
-            "8": 516194816.0,
-            "9": 516194816.0,
-            "10": 516194816.0,
-            "11": 516194816.0,
-            "12": 516194816.0,
-            "13": 516194816.0,
-            "14": 516194816.0,
-            "15": 516194816.0,
-            "16": 516194816.0,
-            "17": 516194816.0,
-            "18": 516194816.0,
-            "19": 516194816.0,
-            "20": 516194816.0,
-            "21": 516194816.0,
-            "22": 516194816.0,
-            "23": 516194816.0,
-            "24": 516194816.0,
-            "25": 516194816.0,
-            "26": 516194816.0,
-            "27": 516194816.0,
-            "28": 516194816.0,
-            "29": 516194816.0,
-            "30": 516194816.0,
-            "31": 516194816.0,
-            "32": 516194816.0,
-            "33": 516194816.0,
-            "34": 516194816.0,
-            "35": 516194816.0,
-            "36": 516194816.0,
-            "37": 516194816.0,
-            "38": 516194816.0,
-            "39": 516194816.0,
-            "40": 516194816.0,
-            "41": 516194816.0,
-            "42": 516194816.0,
-            "43": 516194816.0,
-            "44": 516194816.0,
-            "45": 516194816.0,
-            "46": 516194816.0,
-            "47": 516194816.0,
-            "48": 516194816.0,
-            "49": 516194816.0,
-            "50": 516194816.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1670130688.0,
-            "2": 1840523776.0,
-            "3": 1841310208.0,
-            "4": 1841310208.0,
-            "5": 1841310208.0,
-            "6": 1841310208.0,
-            "7": 1841310208.0,
-            "8": 1841310208.0,
-            "9": 1841310208.0,
-            "10": 1841310208.0,
-            "11": 1841310208.0,
-            "12": 1841310208.0,
-            "13": 1841310208.0,
-            "14": 1841310208.0,
-            "15": 1841310208.0,
-            "16": 1841310208.0,
-            "17": 1841310208.0,
-            "18": 1841310208.0,
-            "19": 1841310208.0,
-            "20": 1841310208.0,
-            "21": 1841310208.0,
-            "22": 1841310208.0,
-            "23": 1841310208.0,
-            "24": 1841310208.0,
-            "25": 1841310208.0,
-            "26": 1841310208.0,
-            "27": 1841310208.0,
-            "28": 1841310208.0,
-            "29": 1841310208.0,
-            "30": 1841310208.0,
-            "31": 1841310208.0,
-            "32": 1841310208.0,
-            "33": 1841310208.0,
-            "34": 1841310208.0,
-            "35": 1841310208.0,
-            "36": 1841310208.0,
-            "37": 1841310208.0,
-            "38": 1841310208.0,
-            "39": 1841310208.0,
-            "40": 1841310208.0,
-            "41": 1841310208.0,
-            "42": 1841310208.0,
-            "43": 1841310208.0,
-            "44": 1841310208.0,
-            "45": 1841310208.0,
-            "46": 1841310208.0,
-            "47": 1841310208.0,
-            "48": 1841310208.0,
-            "49": 1841310208.0,
-            "50": 1841310208.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 12.9332,
-            "2": 0.16326,
-            "3": 0.12463,
-            "4": 0.12744,
-            "5": 0.12912,
-            "6": 0.12823,
-            "7": 0.12454,
-            "8": 0.12362,
-            "9": 0.12458,
-            "10": 0.12419,
-            "11": 0.12352,
-            "12": 0.12552,
-            "13": 0.12365,
-            "14": 0.12466,
-            "15": 0.12255,
-            "16": 0.12286,
-            "17": 0.12294,
-            "18": 0.12246,
-            "19": 0.12292,
-            "20": 0.12533,
-            "21": 0.12268,
-            "22": 0.12434,
-            "23": 0.11979,
-            "24": 0.11976,
-            "25": 0.11744,
-            "26": 0.11555,
-            "27": 0.11746,
-            "28": 0.11709,
-            "29": 0.12764,
-            "30": 0.11818,
-            "31": 0.11917,
-            "32": 0.11662,
-            "33": 0.11909,
-            "34": 0.11844,
-            "35": 0.1167,
-            "36": 0.12045,
-            "37": 0.11624,
-            "38": 0.11602,
-            "39": 0.11985,
-            "40": 0.11702,
-            "41": 0.11671,
-            "42": 0.11663,
-            "43": 0.11741,
-            "44": 0.11703,
-            "45": 0.11752,
-            "46": 0.11604,
-            "47": 0.11836,
-            "48": 0.12278,
-            "49": 0.12884,
-            "50": 0.11659
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index 379b1c16f29..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.85949,
-            "2": 10.85553,
-            "3": 10.86548,
-            "4": 10.84554,
-            "5": 10.88344,
-            "6": 10.89429,
-            "7": 10.87068,
-            "8": 10.86983,
-            "9": 10.86919,
-            "10": 10.83883,
-            "11": 10.89435,
-            "12": 10.8798,
-            "13": 10.87987,
-            "14": 10.90317,
-            "15": 10.8405,
-            "16": 10.83786,
-            "17": 10.80668,
-            "18": 10.83025,
-            "19": 10.82262,
-            "20": 10.73192,
-            "21": 10.7075,
-            "22": 10.56005,
-            "23": 10.72406,
-            "24": 10.61116,
-            "25": 10.5481,
-            "26": 10.61334,
-            "27": 10.6305,
-            "28": 10.56645,
-            "29": 10.59672,
-            "30": 10.37136,
-            "31": 10.11721,
-            "32": 10.46127,
-            "33": 10.45247,
-            "34": 10.21687,
-            "35": 10.27171,
-            "36": 10.2312,
-            "37": 10.34809,
-            "38": 10.18842,
-            "39": 10.41042,
-            "40": 10.09426,
-            "41": 10.14711,
-            "42": 10.21247,
-            "43": 9.84106,
-            "44": 9.95919,
-            "45": 9.84082,
-            "46": 9.82482,
-            "47": 10.13882,
-            "48": 9.85839,
-            "49": 9.5472,
-            "50": 9.90883
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1690.0,
-            "2": 1776.0,
-            "3": 1642.0,
-            "4": 1825.0,
-            "5": 1809.0,
-            "6": 1795.0,
-            "7": 1830.0,
-            "8": 1626.0,
-            "9": 1878.0,
-            "10": 1423.0,
-            "11": 1868.0,
-            "12": 1653.0,
-            "13": 1897.0,
-            "14": 1783.0,
-            "15": 1861.0,
-            "16": 1938.0,
-            "17": 1825.0,
-            "18": 1730.0,
-            "19": 1727.0,
-            "20": 1735.0,
-            "21": 1783.0,
-            "22": 1576.0,
-            "23": 1949.0,
-            "24": 1630.0,
-            "25": 1498.0,
-            "26": 1649.0,
-            "27": 1809.0,
-            "28": 2019.0,
-            "29": 2009.0,
-            "30": 1832.0,
-            "31": 1524.0,
-            "32": 1943.0,
-            "33": 2081.0,
-            "34": 1888.0,
-            "35": 1935.0,
-            "36": 1898.0,
-            "37": 2325.0,
-            "38": 2070.0,
-            "39": 2248.0,
-            "40": 2199.0,
-            "41": 2264.0,
-            "42": 2349.0,
-            "43": 2087.0,
-            "44": 2107.0,
-            "45": 2098.0,
-            "46": 2407.0,
-            "47": 2456.0,
-            "48": 2404.0,
-            "49": 2417.0,
-            "50": 2407.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 516194816.0,
-            "2": 516194816.0,
-            "3": 516194816.0,
-            "4": 516194816.0,
-            "5": 516194816.0,
-            "6": 516194816.0,
-            "7": 516194816.0,
-            "8": 516194816.0,
-            "9": 516194816.0,
-            "10": 516194816.0,
-            "11": 516194816.0,
-            "12": 516194816.0,
-            "13": 516194816.0,
-            "14": 516194816.0,
-            "15": 516194816.0,
-            "16": 516194816.0,
-            "17": 516194816.0,
-            "18": 516194816.0,
-            "19": 516194816.0,
-            "20": 516194816.0,
-            "21": 516194816.0,
-            "22": 516194816.0,
-            "23": 516194816.0,
-            "24": 516194816.0,
-            "25": 516194816.0,
-            "26": 516194816.0,
-            "27": 516194816.0,
-            "28": 516194816.0,
-            "29": 516194816.0,
-            "30": 516194816.0,
-            "31": 516194816.0,
-            "32": 516194816.0,
-            "33": 516194816.0,
-            "34": 516194816.0,
-            "35": 516194816.0,
-            "36": 516194816.0,
-            "37": 516194816.0,
-            "38": 516194816.0,
-            "39": 516194816.0,
-            "40": 516194816.0,
-            "41": 516194816.0,
-            "42": 516194816.0,
-            "43": 516194816.0,
-            "44": 516194816.0,
-            "45": 516194816.0,
-            "46": 516194816.0,
-            "47": 516194816.0,
-            "48": 516194816.0,
-            "49": 516194816.0,
-            "50": 516194816.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1670130688.0,
-            "2": 1840523776.0,
-            "3": 1840523776.0,
-            "4": 1840523776.0,
-            "5": 1840523776.0,
-            "6": 1840523776.0,
-            "7": 1840523776.0,
-            "8": 1840523776.0,
-            "9": 1840523776.0,
-            "10": 1840523776.0,
-            "11": 1840523776.0,
-            "12": 1840523776.0,
-            "13": 1840523776.0,
-            "14": 1840523776.0,
-            "15": 1840523776.0,
-            "16": 1840523776.0,
-            "17": 1840523776.0,
-            "18": 1840523776.0,
-            "19": 1840523776.0,
-            "20": 1840523776.0,
-            "21": 1840523776.0,
-            "22": 1840523776.0,
-            "23": 1840523776.0,
-            "24": 1840523776.0,
-            "25": 1840523776.0,
-            "26": 1840523776.0,
-            "27": 1840523776.0,
-            "28": 1840523776.0,
-            "29": 1840523776.0,
-            "30": 1840523776.0,
-            "31": 1840523776.0,
-            "32": 1840523776.0,
-            "33": 1840523776.0,
-            "34": 1840523776.0,
-            "35": 1840523776.0,
-            "36": 1840523776.0,
-            "37": 1840523776.0,
-            "38": 1840523776.0,
-            "39": 1840523776.0,
-            "40": 1840523776.0,
-            "41": 1840523776.0,
-            "42": 1840523776.0,
-            "43": 1840523776.0,
-            "44": 1840523776.0,
-            "45": 1840523776.0,
-            "46": 1840523776.0,
-            "47": 1840523776.0,
-            "48": 1840523776.0,
-            "49": 1840523776.0,
-            "50": 1840523776.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 15.2683,
-            "2": 0.15358,
-            "3": 0.13619,
-            "4": 0.13976,
-            "5": 0.13713,
-            "6": 0.13753,
-            "7": 0.13575,
-            "8": 0.13485,
-            "9": 0.13779,
-            "10": 0.13697,
-            "11": 0.14178,
-            "12": 0.1397,
-            "13": 0.13744,
-            "14": 0.14039,
-            "15": 0.13739,
-            "16": 0.1361,
-            "17": 0.13816,
-            "18": 0.13722,
-            "19": 0.15342,
-            "20": 0.14613,
-            "21": 0.14806,
-            "22": 0.14423,
-            "23": 0.14791,
-            "24": 0.14345,
-            "25": 0.14474,
-            "26": 0.14564,
-            "27": 0.14168,
-            "28": 0.14148,
-            "29": 0.13863,
-            "30": 0.13751,
-            "31": 0.14015,
-            "32": 0.13821,
-            "33": 0.14038,
-            "34": 0.13859,
-            "35": 0.14531,
-            "36": 0.14468,
-            "37": 0.13783,
-            "38": 0.13787,
-            "39": 0.13879,
-            "40": 0.14072,
-            "41": 0.14065,
-            "42": 0.13865,
-            "43": 0.13953,
-            "44": 0.13882,
-            "45": 0.13622,
-            "46": 0.14034,
-            "47": 0.13659,
-            "48": 0.14369,
-            "49": 0.13987,
-            "50": 0.13803
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mla/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mla/golden_values_dev_dgx_h100.json
index bac5baf3a43..d0103111a28 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mla/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mla/golden_values_dev_dgx_h100.json
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 10.00615,
-            "2": 0.13355,
-            "3": 0.1156,
-            "4": 0.11748,
-            "5": 0.11709,
-            "6": 0.11516,
-            "7": 0.11746,
-            "8": 0.11799,
-            "9": 0.11829,
-            "10": 0.11844,
-            "11": 0.11847,
-            "12": 0.12334,
-            "13": 0.12621,
-            "14": 0.1244,
-            "15": 0.11572,
-            "16": 0.11683,
-            "17": 0.11639,
-            "18": 0.11916,
-            "19": 0.1174,
-            "20": 0.11558,
-            "21": 0.11518,
-            "22": 0.1165,
-            "23": 0.11972,
-            "24": 0.12052,
-            "25": 0.11938,
-            "26": 0.125,
-            "27": 0.11874,
-            "28": 0.11938,
-            "29": 0.11733,
-            "30": 0.11731,
-            "31": 0.11777,
-            "32": 0.11704,
-            "33": 0.121,
-            "34": 0.12101,
-            "35": 0.11619,
-            "36": 0.11824,
-            "37": 0.11821,
-            "38": 0.11953,
-            "39": 0.11906,
-            "40": 0.118,
-            "41": 0.11938,
-            "42": 0.11873,
-            "43": 0.11887,
-            "44": 0.11808,
-            "45": 0.11848,
-            "46": 0.12012,
-            "47": 0.11741,
-            "48": 0.11744,
-            "49": 0.11829,
-            "50": 0.11954
+            "1": 9.86323,
+            "2": 0.13474,
+            "3": 0.1236,
+            "4": 0.12168,
+            "5": 0.12406,
+            "6": 0.12501,
+            "7": 0.12711,
+            "8": 0.12778,
+            "9": 0.12839,
+            "10": 0.12143,
+            "11": 0.12109,
+            "12": 0.12077,
+            "13": 0.11905,
+            "14": 0.12184,
+            "15": 0.12152,
+            "16": 0.11812,
+            "17": 0.11693,
+            "18": 0.11549,
+            "19": 0.11712,
+            "20": 0.11675,
+            "21": 0.11877,
+            "22": 0.11837,
+            "23": 0.11757,
+            "24": 0.11636,
+            "25": 0.11722,
+            "26": 0.12393,
+            "27": 0.11736,
+            "28": 0.11759,
+            "29": 0.11945,
+            "30": 0.11726,
+            "31": 0.12096,
+            "32": 0.12206,
+            "33": 0.11734,
+            "34": 0.11894,
+            "35": 0.11695,
+            "36": 0.11712,
+            "37": 0.11489,
+            "38": 0.11866,
+            "39": 0.11749,
+            "40": 0.11829,
+            "41": 0.11674,
+            "42": 0.1181,
+            "43": 0.11808,
+            "44": 0.11621,
+            "45": 0.11832,
+            "46": 0.12031,
+            "47": 0.12023,
+            "48": 0.11643,
+            "49": 0.11855,
+            "50": 0.11792
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mla/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mla/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 5f5b4095502..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mla/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.92337,
-            "2": 10.91811,
-            "3": 10.91506,
-            "4": 10.92436,
-            "5": 10.92089,
-            "6": 10.92887,
-            "7": 10.92681,
-            "8": 10.91989,
-            "9": 10.92227,
-            "10": 10.92192,
-            "11": 10.918,
-            "12": 10.9238,
-            "13": 10.92406,
-            "14": 10.90862,
-            "15": 10.92351,
-            "16": 10.91807,
-            "17": 10.9154,
-            "18": 10.91265,
-            "19": 10.9091,
-            "20": 10.90031,
-            "21": 10.8959,
-            "22": 10.8828,
-            "23": 10.89975,
-            "24": 10.88437,
-            "25": 10.87827,
-            "26": 10.88155,
-            "27": 10.88649,
-            "28": 10.85679,
-            "29": 10.85657,
-            "30": 10.81423,
-            "31": 10.76651,
-            "32": 10.83131,
-            "33": 10.83158,
-            "34": 10.78071,
-            "35": 10.78865,
-            "36": 10.78003,
-            "37": 10.80446,
-            "38": 10.72434,
-            "39": 10.78066,
-            "40": 10.65927,
-            "41": 10.69208,
-            "42": 10.70973,
-            "43": 10.56128,
-            "44": 10.61369,
-            "45": 10.56875,
-            "46": 10.54455,
-            "47": 10.66751,
-            "48": 10.53792,
-            "49": 10.40861,
-            "50": 10.55421
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 22791636.0,
-            "2": 22989424.0,
-            "3": 22661212.0,
-            "4": 23283558.0,
-            "5": 22778528.0,
-            "6": 23085340.0,
-            "7": 22834596.0,
-            "8": 22990452.0,
-            "9": 22906466.0,
-            "10": 22983232.0,
-            "11": 22564584.0,
-            "12": 22524010.0,
-            "13": 22981124.0,
-            "14": 22453096.0,
-            "15": 22886400.0,
-            "16": 22895424.0,
-            "17": 22883736.0,
-            "18": 22647090.0,
-            "19": 22682526.0,
-            "20": 22758358.0,
-            "21": 22804276.0,
-            "22": 22863814.0,
-            "23": 22603616.0,
-            "24": 22835172.0,
-            "25": 22883742.0,
-            "26": 22611358.0,
-            "27": 22532968.0,
-            "28": 22517794.0,
-            "29": 22593448.0,
-            "30": 22695256.0,
-            "31": 23019472.0,
-            "32": 22648896.0,
-            "33": 22622516.0,
-            "34": 22899620.0,
-            "35": 22851572.0,
-            "36": 22653160.0,
-            "37": 22560476.0,
-            "38": 22960058.0,
-            "39": 22865476.0,
-            "40": 22721680.0,
-            "41": 22723112.0,
-            "42": 22730726.0,
-            "43": 23039588.0,
-            "44": 22810020.0,
-            "45": 22738904.0,
-            "46": 22948334.0,
-            "47": 22696668.0,
-            "48": 22992832.0,
-            "49": 22791208.0,
-            "50": 22968272.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 387744256.0,
-            "2": 387744256.0,
-            "3": 387744256.0,
-            "4": 387744256.0,
-            "5": 387744256.0,
-            "6": 387744256.0,
-            "7": 387744256.0,
-            "8": 387744256.0,
-            "9": 387744256.0,
-            "10": 387744256.0,
-            "11": 387744256.0,
-            "12": 387744256.0,
-            "13": 387744256.0,
-            "14": 387744256.0,
-            "15": 387744256.0,
-            "16": 387744256.0,
-            "17": 387744256.0,
-            "18": 387744256.0,
-            "19": 387744256.0,
-            "20": 387744256.0,
-            "21": 387744256.0,
-            "22": 387744256.0,
-            "23": 387744256.0,
-            "24": 387744256.0,
-            "25": 387744256.0,
-            "26": 387744256.0,
-            "27": 387744256.0,
-            "28": 387744256.0,
-            "29": 387744256.0,
-            "30": 387744256.0,
-            "31": 387744256.0,
-            "32": 387744256.0,
-            "33": 387744256.0,
-            "34": 387744256.0,
-            "35": 387744256.0,
-            "36": 387744256.0,
-            "37": 387744256.0,
-            "38": 387744256.0,
-            "39": 387744256.0,
-            "40": 387744256.0,
-            "41": 387744256.0,
-            "42": 387744256.0,
-            "43": 387744256.0,
-            "44": 387744256.0,
-            "45": 387744256.0,
-            "46": 387744256.0,
-            "47": 387744256.0,
-            "48": 387744256.0,
-            "49": 387744256.0,
-            "50": 387744256.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1122646528.0,
-            "2": 1245896192.0,
-            "3": 1245896192.0,
-            "4": 1245896192.0,
-            "5": 1245896192.0,
-            "6": 1245896192.0,
-            "7": 1245896192.0,
-            "8": 1245896192.0,
-            "9": 1245896192.0,
-            "10": 1245896192.0,
-            "11": 1245896192.0,
-            "12": 1245896192.0,
-            "13": 1245896192.0,
-            "14": 1245896192.0,
-            "15": 1245896192.0,
-            "16": 1245896192.0,
-            "17": 1245896192.0,
-            "18": 1245896192.0,
-            "19": 1245896192.0,
-            "20": 1245896192.0,
-            "21": 1245896192.0,
-            "22": 1245896192.0,
-            "23": 1245896192.0,
-            "24": 1245896192.0,
-            "25": 1245896192.0,
-            "26": 1245896192.0,
-            "27": 1245896192.0,
-            "28": 1245896192.0,
-            "29": 1245896192.0,
-            "30": 1245896192.0,
-            "31": 1245896192.0,
-            "32": 1245896192.0,
-            "33": 1245896192.0,
-            "34": 1245896192.0,
-            "35": 1245896192.0,
-            "36": 1245896192.0,
-            "37": 1245896192.0,
-            "38": 1245896192.0,
-            "39": 1245896192.0,
-            "40": 1245896192.0,
-            "41": 1245896192.0,
-            "42": 1245896192.0,
-            "43": 1245896192.0,
-            "44": 1245896192.0,
-            "45": 1245896192.0,
-            "46": 1245896192.0,
-            "47": 1245896192.0,
-            "48": 1245896192.0,
-            "49": 1245896192.0,
-            "50": 1245896192.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 9.48646,
-            "2": 0.13915,
-            "3": 0.11332,
-            "4": 0.11062,
-            "5": 0.10601,
-            "6": 0.10405,
-            "7": 0.10505,
-            "8": 0.10406,
-            "9": 0.10505,
-            "10": 0.10412,
-            "11": 0.1027,
-            "12": 0.10452,
-            "13": 0.10273,
-            "14": 0.10271,
-            "15": 0.10391,
-            "16": 0.10227,
-            "17": 0.10295,
-            "18": 0.10375,
-            "19": 0.10202,
-            "20": 0.10246,
-            "21": 0.10149,
-            "22": 0.1037,
-            "23": 0.10264,
-            "24": 0.10318,
-            "25": 0.10409,
-            "26": 0.11044,
-            "27": 0.10485,
-            "28": 0.10691,
-            "29": 0.10499,
-            "30": 0.10361,
-            "31": 0.10501,
-            "32": 0.10466,
-            "33": 0.1048,
-            "34": 0.10456,
-            "35": 0.10388,
-            "36": 0.10498,
-            "37": 0.10375,
-            "38": 0.10297,
-            "39": 0.10174,
-            "40": 0.10044,
-            "41": 0.10196,
-            "42": 0.10494,
-            "43": 0.10303,
-            "44": 0.10254,
-            "45": 0.10314,
-            "46": 0.10306,
-            "47": 0.10329,
-            "48": 0.10445,
-            "49": 0.10543,
-            "50": 0.1043
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mla/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mla/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index d0103111a28..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mla/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.92337,
-            "2": 10.91811,
-            "3": 10.91506,
-            "4": 10.92436,
-            "5": 10.92089,
-            "6": 10.92887,
-            "7": 10.92681,
-            "8": 10.91989,
-            "9": 10.92227,
-            "10": 10.92192,
-            "11": 10.918,
-            "12": 10.9238,
-            "13": 10.92406,
-            "14": 10.90862,
-            "15": 10.92351,
-            "16": 10.91807,
-            "17": 10.9154,
-            "18": 10.91265,
-            "19": 10.9091,
-            "20": 10.90031,
-            "21": 10.8959,
-            "22": 10.8828,
-            "23": 10.89975,
-            "24": 10.88437,
-            "25": 10.87827,
-            "26": 10.88155,
-            "27": 10.88649,
-            "28": 10.85679,
-            "29": 10.85657,
-            "30": 10.81423,
-            "31": 10.76651,
-            "32": 10.83131,
-            "33": 10.83158,
-            "34": 10.78071,
-            "35": 10.78865,
-            "36": 10.78003,
-            "37": 10.80446,
-            "38": 10.72434,
-            "39": 10.78066,
-            "40": 10.65927,
-            "41": 10.69208,
-            "42": 10.70973,
-            "43": 10.56128,
-            "44": 10.61369,
-            "45": 10.56875,
-            "46": 10.54455,
-            "47": 10.66751,
-            "48": 10.53792,
-            "49": 10.40861,
-            "50": 10.55421
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 22791636.0,
-            "2": 22989424.0,
-            "3": 22661212.0,
-            "4": 23283558.0,
-            "5": 22778528.0,
-            "6": 23085340.0,
-            "7": 22834596.0,
-            "8": 22990452.0,
-            "9": 22906466.0,
-            "10": 22983232.0,
-            "11": 22564584.0,
-            "12": 22524010.0,
-            "13": 22981124.0,
-            "14": 22453096.0,
-            "15": 22886400.0,
-            "16": 22895424.0,
-            "17": 22883736.0,
-            "18": 22647090.0,
-            "19": 22682526.0,
-            "20": 22758358.0,
-            "21": 22804276.0,
-            "22": 22863814.0,
-            "23": 22603616.0,
-            "24": 22835172.0,
-            "25": 22883742.0,
-            "26": 22611358.0,
-            "27": 22532968.0,
-            "28": 22517794.0,
-            "29": 22593448.0,
-            "30": 22695256.0,
-            "31": 23019472.0,
-            "32": 22648896.0,
-            "33": 22622516.0,
-            "34": 22899620.0,
-            "35": 22851572.0,
-            "36": 22653160.0,
-            "37": 22560476.0,
-            "38": 22960058.0,
-            "39": 22865476.0,
-            "40": 22721680.0,
-            "41": 22723112.0,
-            "42": 22730726.0,
-            "43": 23039588.0,
-            "44": 22810020.0,
-            "45": 22738904.0,
-            "46": 22948334.0,
-            "47": 22696668.0,
-            "48": 22992832.0,
-            "49": 22791208.0,
-            "50": 22968272.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 387744256.0,
-            "2": 387744256.0,
-            "3": 387744256.0,
-            "4": 387744256.0,
-            "5": 387744256.0,
-            "6": 387744256.0,
-            "7": 387744256.0,
-            "8": 387744256.0,
-            "9": 387744256.0,
-            "10": 387744256.0,
-            "11": 387744256.0,
-            "12": 387744256.0,
-            "13": 387744256.0,
-            "14": 387744256.0,
-            "15": 387744256.0,
-            "16": 387744256.0,
-            "17": 387744256.0,
-            "18": 387744256.0,
-            "19": 387744256.0,
-            "20": 387744256.0,
-            "21": 387744256.0,
-            "22": 387744256.0,
-            "23": 387744256.0,
-            "24": 387744256.0,
-            "25": 387744256.0,
-            "26": 387744256.0,
-            "27": 387744256.0,
-            "28": 387744256.0,
-            "29": 387744256.0,
-            "30": 387744256.0,
-            "31": 387744256.0,
-            "32": 387744256.0,
-            "33": 387744256.0,
-            "34": 387744256.0,
-            "35": 387744256.0,
-            "36": 387744256.0,
-            "37": 387744256.0,
-            "38": 387744256.0,
-            "39": 387744256.0,
-            "40": 387744256.0,
-            "41": 387744256.0,
-            "42": 387744256.0,
-            "43": 387744256.0,
-            "44": 387744256.0,
-            "45": 387744256.0,
-            "46": 387744256.0,
-            "47": 387744256.0,
-            "48": 387744256.0,
-            "49": 387744256.0,
-            "50": 387744256.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1122646528.0,
-            "2": 1245896192.0,
-            "3": 1245896192.0,
-            "4": 1245896192.0,
-            "5": 1245896192.0,
-            "6": 1245896192.0,
-            "7": 1245896192.0,
-            "8": 1245896192.0,
-            "9": 1245896192.0,
-            "10": 1245896192.0,
-            "11": 1245896192.0,
-            "12": 1245896192.0,
-            "13": 1245896192.0,
-            "14": 1245896192.0,
-            "15": 1245896192.0,
-            "16": 1245896192.0,
-            "17": 1245896192.0,
-            "18": 1245896192.0,
-            "19": 1245896192.0,
-            "20": 1245896192.0,
-            "21": 1245896192.0,
-            "22": 1245896192.0,
-            "23": 1245896192.0,
-            "24": 1245896192.0,
-            "25": 1245896192.0,
-            "26": 1245896192.0,
-            "27": 1245896192.0,
-            "28": 1245896192.0,
-            "29": 1245896192.0,
-            "30": 1245896192.0,
-            "31": 1245896192.0,
-            "32": 1245896192.0,
-            "33": 1245896192.0,
-            "34": 1245896192.0,
-            "35": 1245896192.0,
-            "36": 1245896192.0,
-            "37": 1245896192.0,
-            "38": 1245896192.0,
-            "39": 1245896192.0,
-            "40": 1245896192.0,
-            "41": 1245896192.0,
-            "42": 1245896192.0,
-            "43": 1245896192.0,
-            "44": 1245896192.0,
-            "45": 1245896192.0,
-            "46": 1245896192.0,
-            "47": 1245896192.0,
-            "48": 1245896192.0,
-            "49": 1245896192.0,
-            "50": 1245896192.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 9.86323,
-            "2": 0.13474,
-            "3": 0.1236,
-            "4": 0.12168,
-            "5": 0.12406,
-            "6": 0.12501,
-            "7": 0.12711,
-            "8": 0.12778,
-            "9": 0.12839,
-            "10": 0.12143,
-            "11": 0.12109,
-            "12": 0.12077,
-            "13": 0.11905,
-            "14": 0.12184,
-            "15": 0.12152,
-            "16": 0.11812,
-            "17": 0.11693,
-            "18": 0.11549,
-            "19": 0.11712,
-            "20": 0.11675,
-            "21": 0.11877,
-            "22": 0.11837,
-            "23": 0.11757,
-            "24": 0.11636,
-            "25": 0.11722,
-            "26": 0.12393,
-            "27": 0.11736,
-            "28": 0.11759,
-            "29": 0.11945,
-            "30": 0.11726,
-            "31": 0.12096,
-            "32": 0.12206,
-            "33": 0.11734,
-            "34": 0.11894,
-            "35": 0.11695,
-            "36": 0.11712,
-            "37": 0.11489,
-            "38": 0.11866,
-            "39": 0.11749,
-            "40": 0.11829,
-            "41": 0.11674,
-            "42": 0.1181,
-            "43": 0.11808,
-            "44": 0.11621,
-            "45": 0.11832,
-            "46": 0.12031,
-            "47": 0.12023,
-            "48": 0.11643,
-            "49": 0.11855,
-            "50": 0.11792
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_dev_dgx_h100.json
index 3a9edd7e4f6..7c1078c0b3d 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_dev_dgx_h100.json
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 11.73094,
-            "2": 0.19559,
-            "3": 0.1642,
-            "4": 0.1606,
-            "5": 0.15484,
-            "6": 0.15429,
-            "7": 0.15295,
-            "8": 0.15498,
-            "9": 0.15721,
-            "10": 0.1545,
-            "11": 0.15341,
-            "12": 0.15604,
-            "13": 0.15488,
-            "14": 0.15754,
-            "15": 0.15556,
-            "16": 0.15659,
-            "17": 0.15948,
-            "18": 0.15489,
-            "19": 0.15826,
-            "20": 0.15555,
-            "21": 0.15514,
-            "22": 0.15475,
-            "23": 0.15663,
-            "24": 0.15606,
-            "25": 0.15661,
-            "26": 0.15687,
-            "27": 0.15374,
-            "28": 0.15858,
-            "29": 0.15645,
-            "30": 0.15976,
-            "31": 0.1537,
-            "32": 0.15299,
-            "33": 0.1537,
-            "34": 0.15989,
-            "35": 0.16418,
-            "36": 0.16174,
-            "37": 0.15863,
-            "38": 0.15554,
-            "39": 0.14997,
-            "40": 0.15226,
-            "41": 0.14966,
-            "42": 0.15127,
-            "43": 0.15105,
-            "44": 0.15192,
-            "45": 0.15376,
-            "46": 0.15087,
-            "47": 0.15236,
-            "48": 0.15124,
-            "49": 0.15141,
-            "50": 0.15372,
-            "51": 0.17295,
-            "52": 0.16619,
-            "53": 0.16729,
-            "54": 0.15813,
-            "55": 0.15026,
-            "56": 0.15186,
-            "57": 0.1532,
-            "58": 0.1539,
-            "59": 0.153,
-            "60": 0.15346,
-            "61": 0.15406,
-            "62": 0.15229,
-            "63": 0.15251,
-            "64": 0.15279,
-            "65": 0.15341,
-            "66": 0.15398,
-            "67": 0.15765,
-            "68": 0.15411,
-            "69": 0.15465,
-            "70": 0.15275,
-            "71": 0.15486,
-            "72": 0.15324,
-            "73": 0.1548,
-            "74": 0.15612,
-            "75": 0.15592,
-            "76": 0.15644,
-            "77": 0.15832,
-            "78": 0.15223,
-            "79": 0.1545,
-            "80": 0.15466,
-            "81": 0.1518,
-            "82": 0.15396,
-            "83": 0.15168,
-            "84": 0.15232,
-            "85": 0.15293,
-            "86": 0.15384,
-            "87": 0.15453,
-            "88": 0.15446,
-            "89": 0.15333,
-            "90": 0.1576,
-            "91": 0.15805,
-            "92": 0.15474,
-            "93": 0.15345,
-            "94": 0.15146,
-            "95": 0.15371,
-            "96": 0.15549,
-            "97": 0.15452,
-            "98": 0.15437,
-            "99": 0.15398,
-            "100": 0.15413
+            "1": 11.96359,
+            "2": 0.17007,
+            "3": 0.15511,
+            "4": 0.15439,
+            "5": 0.15477,
+            "6": 0.15459,
+            "7": 0.15427,
+            "8": 0.15173,
+            "9": 0.15484,
+            "10": 0.15363,
+            "11": 0.15353,
+            "12": 0.15567,
+            "13": 0.15258,
+            "14": 0.15438,
+            "15": 0.15305,
+            "16": 0.15314,
+            "17": 0.15342,
+            "18": 0.15282,
+            "19": 0.15336,
+            "20": 0.15333,
+            "21": 0.15174,
+            "22": 0.15412,
+            "23": 0.15337,
+            "24": 0.15464,
+            "25": 0.15638,
+            "26": 0.15618,
+            "27": 0.15599,
+            "28": 0.15616,
+            "29": 0.15792,
+            "30": 0.15422,
+            "31": 0.15441,
+            "32": 0.15356,
+            "33": 0.15622,
+            "34": 0.15397,
+            "35": 0.15443,
+            "36": 0.15392,
+            "37": 0.15454,
+            "38": 0.15581,
+            "39": 0.15513,
+            "40": 0.15813,
+            "41": 0.1595,
+            "42": 0.15604,
+            "43": 0.15809,
+            "44": 0.15585,
+            "45": 0.15659,
+            "46": 0.15599,
+            "47": 0.15378,
+            "48": 0.15475,
+            "49": 0.1544,
+            "50": 0.15569,
+            "51": 0.16391,
+            "52": 0.16196,
+            "53": 0.16029,
+            "54": 0.16138,
+            "55": 0.15673,
+            "56": 0.1503,
+            "57": 0.15071,
+            "58": 0.15268,
+            "59": 0.15095,
+            "60": 0.15189,
+            "61": 0.15199,
+            "62": 0.14938,
+            "63": 0.15046,
+            "64": 0.14924,
+            "65": 0.15129,
+            "66": 0.14938,
+            "67": 0.15233,
+            "68": 0.15028,
+            "69": 0.1525,
+            "70": 0.15334,
+            "71": 0.15152,
+            "72": 0.15138,
+            "73": 0.15304,
+            "74": 0.1515,
+            "75": 0.15282,
+            "76": 0.1518,
+            "77": 0.15193,
+            "78": 0.15262,
+            "79": 0.15274,
+            "80": 0.15251,
+            "81": 0.15108,
+            "82": 0.15199,
+            "83": 0.15046,
+            "84": 0.15298,
+            "85": 0.15063,
+            "86": 0.15132,
+            "87": 0.15257,
+            "88": 0.15109,
+            "89": 0.1502,
+            "90": 0.15259,
+            "91": 0.15063,
+            "92": 0.15237,
+            "93": 0.15096,
+            "94": 0.1517,
+            "95": 0.15049,
+            "96": 0.15002,
+            "97": 0.15011,
+            "98": 0.15349,
+            "99": 0.1565,
+            "100": 0.15223
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 39079566d74..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.85949,
-            "2": 10.85553,
-            "3": 10.86543,
-            "4": 10.84553,
-            "5": 10.88346,
-            "6": 10.89431,
-            "7": 10.87067,
-            "8": 10.86979,
-            "9": 10.86918,
-            "10": 10.83886,
-            "11": 10.8943,
-            "12": 10.87983,
-            "13": 10.87985,
-            "14": 10.90321,
-            "15": 10.84052,
-            "16": 10.83787,
-            "17": 10.80669,
-            "18": 10.83026,
-            "19": 10.82261,
-            "20": 10.73193,
-            "21": 10.70748,
-            "22": 10.56005,
-            "23": 10.72399,
-            "24": 10.61114,
-            "25": 10.54813,
-            "26": 10.61329,
-            "27": 10.63053,
-            "28": 10.56646,
-            "29": 10.59668,
-            "30": 10.37137,
-            "31": 10.11725,
-            "32": 10.46127,
-            "33": 10.45249,
-            "34": 10.2169,
-            "35": 10.27172,
-            "36": 10.23119,
-            "37": 10.34809,
-            "38": 10.1884,
-            "39": 10.41044,
-            "40": 10.09425,
-            "41": 10.14707,
-            "42": 10.21242,
-            "43": 9.84105,
-            "44": 9.95918,
-            "45": 9.84079,
-            "46": 9.82479,
-            "47": 10.13878,
-            "48": 9.85831,
-            "49": 9.54705,
-            "50": 9.90875,
-            "51": 9.8558,
-            "52": 9.75237,
-            "53": 10.07589,
-            "54": 9.95688,
-            "55": 9.88203,
-            "56": 9.6313,
-            "57": 9.48649,
-            "58": 9.83109,
-            "59": 9.58897,
-            "60": 9.50643,
-            "61": 9.70363,
-            "62": 9.98286,
-            "63": 9.38302,
-            "64": 9.77901,
-            "65": 8.95166,
-            "66": 9.70158,
-            "67": 9.37203,
-            "68": 9.78849,
-            "69": 9.79851,
-            "70": 9.74737,
-            "71": 9.61908,
-            "72": 9.58502,
-            "73": 9.49721,
-            "74": 8.93927,
-            "75": 9.42703,
-            "76": 9.0802,
-            "77": 10.06567,
-            "78": 9.72893,
-            "79": 9.3776,
-            "80": 9.40982,
-            "81": 9.47976,
-            "82": 9.7018,
-            "83": 9.30612,
-            "84": 9.4209,
-            "85": 9.61371,
-            "86": 9.07649,
-            "87": 9.5945,
-            "88": 9.75068,
-            "89": 9.60238,
-            "90": 9.81898,
-            "91": 9.33894,
-            "92": 9.35716,
-            "93": 9.07879,
-            "94": 8.83503,
-            "95": 9.52172,
-            "96": 9.53003,
-            "97": 9.31306,
-            "98": 9.67783,
-            "99": 8.89058,
-            "100": 9.39725
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1688.0,
-            "2": 1806.0,
-            "3": 1675.0,
-            "4": 1842.0,
-            "5": 1909.0,
-            "6": 1908.0,
-            "7": 1783.0,
-            "8": 1611.0,
-            "9": 1753.0,
-            "10": 1457.0,
-            "11": 1880.0,
-            "12": 1683.0,
-            "13": 1907.0,
-            "14": 1733.0,
-            "15": 1930.0,
-            "16": 1840.0,
-            "17": 1892.0,
-            "18": 1650.0,
-            "19": 1790.0,
-            "20": 1596.0,
-            "21": 1765.0,
-            "22": 1616.0,
-            "23": 1974.0,
-            "24": 1621.0,
-            "25": 1557.0,
-            "26": 1745.0,
-            "27": 1722.0,
-            "28": 1976.0,
-            "29": 2068.0,
-            "30": 1860.0,
-            "31": 1536.0,
-            "32": 1883.0,
-            "33": 2071.0,
-            "34": 1894.0,
-            "35": 1902.0,
-            "36": 1885.0,
-            "37": 2231.0,
-            "38": 2129.0,
-            "39": 2333.0,
-            "40": 2207.0,
-            "41": 2193.0,
-            "42": 2322.0,
-            "43": 2015.0,
-            "44": 2089.0,
-            "45": 2095.0,
-            "46": 2392.0,
-            "47": 2430.0,
-            "48": 2414.0,
-            "49": 2340.0,
-            "50": 2416.0,
-            "51": 2613.0,
-            "52": 2538.0,
-            "53": 2792.0,
-            "54": 2801.0,
-            "55": 2216.0,
-            "56": 2858.0,
-            "57": 2381.0,
-            "58": 2854.0,
-            "59": 2787.0,
-            "60": 2457.0,
-            "61": 2941.0,
-            "62": 2543.0,
-            "63": 2408.0,
-            "64": 2968.0,
-            "65": 2472.0,
-            "66": 2977.0,
-            "67": 2839.0,
-            "68": 2775.0,
-            "69": 2832.0,
-            "70": 3057.0,
-            "71": 2909.0,
-            "72": 2421.0,
-            "73": 2982.0,
-            "74": 1922.0,
-            "75": 2474.0,
-            "76": 3059.0,
-            "77": 3177.0,
-            "78": 3067.0,
-            "79": 3052.0,
-            "80": 3338.0,
-            "81": 3644.0,
-            "82": 3234.0,
-            "83": 2798.0,
-            "84": 3196.0,
-            "85": 3324.0,
-            "86": 2855.0,
-            "87": 3820.0,
-            "88": 2962.0,
-            "89": 3379.0,
-            "90": 3096.0,
-            "91": 2857.0,
-            "92": 3077.0,
-            "93": 2693.0,
-            "94": 3312.0,
-            "95": 3399.0,
-            "96": 3378.0,
-            "97": 3030.0,
-            "98": 3619.0,
-            "99": 3160.0,
-            "100": 3128.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 517505536.0,
-            "2": 517505536.0,
-            "3": 517505536.0,
-            "4": 517505536.0,
-            "5": 517505536.0,
-            "6": 517505536.0,
-            "7": 517505536.0,
-            "8": 517505536.0,
-            "9": 517505536.0,
-            "10": 517505536.0,
-            "11": 517505536.0,
-            "12": 517505536.0,
-            "13": 517505536.0,
-            "14": 517505536.0,
-            "15": 517505536.0,
-            "16": 517505536.0,
-            "17": 517505536.0,
-            "18": 517505536.0,
-            "19": 517505536.0,
-            "20": 517505536.0,
-            "21": 517505536.0,
-            "22": 517505536.0,
-            "23": 517505536.0,
-            "24": 517505536.0,
-            "25": 517505536.0,
-            "26": 517505536.0,
-            "27": 517505536.0,
-            "28": 517505536.0,
-            "29": 517505536.0,
-            "30": 517505536.0,
-            "31": 517505536.0,
-            "32": 517505536.0,
-            "33": 517505536.0,
-            "34": 517505536.0,
-            "35": 517505536.0,
-            "36": 517505536.0,
-            "37": 517505536.0,
-            "38": 517505536.0,
-            "39": 517505536.0,
-            "40": 517505536.0,
-            "41": 517505536.0,
-            "42": 517505536.0,
-            "43": 517505536.0,
-            "44": 517505536.0,
-            "45": 517505536.0,
-            "46": 517505536.0,
-            "47": 517505536.0,
-            "48": 517505536.0,
-            "49": 517505536.0,
-            "50": 517505536.0,
-            "51": 517505536.0,
-            "52": 517505536.0,
-            "53": 517505536.0,
-            "54": 517505536.0,
-            "55": 517505536.0,
-            "56": 517505536.0,
-            "57": 517505536.0,
-            "58": 517505536.0,
-            "59": 517505536.0,
-            "60": 517505536.0,
-            "61": 517505536.0,
-            "62": 517505536.0,
-            "63": 517505536.0,
-            "64": 517505536.0,
-            "65": 517505536.0,
-            "66": 517505536.0,
-            "67": 517505536.0,
-            "68": 517505536.0,
-            "69": 517505536.0,
-            "70": 517505536.0,
-            "71": 517505536.0,
-            "72": 517505536.0,
-            "73": 517505536.0,
-            "74": 517505536.0,
-            "75": 517505536.0,
-            "76": 517505536.0,
-            "77": 517505536.0,
-            "78": 517505536.0,
-            "79": 517505536.0,
-            "80": 517505536.0,
-            "81": 517505536.0,
-            "82": 517505536.0,
-            "83": 517505536.0,
-            "84": 517505536.0,
-            "85": 517505536.0,
-            "86": 517505536.0,
-            "87": 517505536.0,
-            "88": 517505536.0,
-            "89": 517505536.0,
-            "90": 517505536.0,
-            "91": 517505536.0,
-            "92": 517505536.0,
-            "93": 517505536.0,
-            "94": 517505536.0,
-            "95": 517505536.0,
-            "96": 517505536.0,
-            "97": 517505536.0,
-            "98": 517505536.0,
-            "99": 517505536.0,
-            "100": 517505536.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1246524928.0,
-            "2": 1428695552.0,
-            "3": 1428695552.0,
-            "4": 1428695552.0,
-            "5": 1428695552.0,
-            "6": 1428695552.0,
-            "7": 1428695552.0,
-            "8": 1428695552.0,
-            "9": 1428695552.0,
-            "10": 1428695552.0,
-            "11": 1428695552.0,
-            "12": 1428695552.0,
-            "13": 1428695552.0,
-            "14": 1428695552.0,
-            "15": 1428695552.0,
-            "16": 1428695552.0,
-            "17": 1428695552.0,
-            "18": 1428695552.0,
-            "19": 1428695552.0,
-            "20": 1428695552.0,
-            "21": 1428695552.0,
-            "22": 1428695552.0,
-            "23": 1428695552.0,
-            "24": 1428695552.0,
-            "25": 1428695552.0,
-            "26": 1428695552.0,
-            "27": 1428695552.0,
-            "28": 1428695552.0,
-            "29": 1428695552.0,
-            "30": 1428695552.0,
-            "31": 1428695552.0,
-            "32": 1428695552.0,
-            "33": 1428695552.0,
-            "34": 1428695552.0,
-            "35": 1428695552.0,
-            "36": 1428695552.0,
-            "37": 1428695552.0,
-            "38": 1428695552.0,
-            "39": 1428695552.0,
-            "40": 1428695552.0,
-            "41": 1428695552.0,
-            "42": 1428695552.0,
-            "43": 1428695552.0,
-            "44": 1428695552.0,
-            "45": 1428695552.0,
-            "46": 1428695552.0,
-            "47": 1428695552.0,
-            "48": 1428695552.0,
-            "49": 1428695552.0,
-            "50": 1428695552.0,
-            "51": 1428695552.0,
-            "52": 1428695552.0,
-            "53": 1428695552.0,
-            "54": 1428695552.0,
-            "55": 1428695552.0,
-            "56": 1428695552.0,
-            "57": 1428695552.0,
-            "58": 1428695552.0,
-            "59": 1428695552.0,
-            "60": 1428695552.0,
-            "61": 1428695552.0,
-            "62": 1428695552.0,
-            "63": 1428695552.0,
-            "64": 1428695552.0,
-            "65": 1428695552.0,
-            "66": 1428695552.0,
-            "67": 1428695552.0,
-            "68": 1428695552.0,
-            "69": 1428695552.0,
-            "70": 1428695552.0,
-            "71": 1428695552.0,
-            "72": 1428695552.0,
-            "73": 1428695552.0,
-            "74": 1428695552.0,
-            "75": 1428695552.0,
-            "76": 1428695552.0,
-            "77": 1428695552.0,
-            "78": 1428695552.0,
-            "79": 1428695552.0,
-            "80": 1428695552.0,
-            "81": 1428695552.0,
-            "82": 1428695552.0,
-            "83": 1428695552.0,
-            "84": 1428695552.0,
-            "85": 1428695552.0,
-            "86": 1428695552.0,
-            "87": 1428695552.0,
-            "88": 1428695552.0,
-            "89": 1428695552.0,
-            "90": 1428695552.0,
-            "91": 1428695552.0,
-            "92": 1428695552.0,
-            "93": 1428695552.0,
-            "94": 1428695552.0,
-            "95": 1428695552.0,
-            "96": 1428695552.0,
-            "97": 1428695552.0,
-            "98": 1428695552.0,
-            "99": 1428695552.0,
-            "100": 1428695552.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 11.53934,
-            "2": 0.16774,
-            "3": 0.13459,
-            "4": 0.13439,
-            "5": 0.13482,
-            "6": 0.13444,
-            "7": 0.13371,
-            "8": 0.1345,
-            "9": 0.13658,
-            "10": 0.13405,
-            "11": 0.13498,
-            "12": 0.13346,
-            "13": 0.13373,
-            "14": 0.14049,
-            "15": 0.13447,
-            "16": 0.13314,
-            "17": 0.13441,
-            "18": 0.14264,
-            "19": 0.15581,
-            "20": 0.14614,
-            "21": 0.14655,
-            "22": 0.14484,
-            "23": 0.13377,
-            "24": 0.13618,
-            "25": 0.13595,
-            "26": 0.13394,
-            "27": 0.13248,
-            "28": 0.13405,
-            "29": 0.13411,
-            "30": 0.13464,
-            "31": 0.13321,
-            "32": 0.134,
-            "33": 0.13496,
-            "34": 0.13356,
-            "35": 0.13325,
-            "36": 0.13329,
-            "37": 0.13359,
-            "38": 0.13442,
-            "39": 0.13494,
-            "40": 0.13456,
-            "41": 0.1333,
-            "42": 0.1357,
-            "43": 0.13407,
-            "44": 0.13499,
-            "45": 0.13371,
-            "46": 0.13423,
-            "47": 0.13545,
-            "48": 0.1355,
-            "49": 0.13329,
-            "50": 0.1329,
-            "51": 0.13926,
-            "52": 0.13217,
-            "53": 0.13369,
-            "54": 0.13177,
-            "55": 0.13062,
-            "56": 0.25118,
-            "57": 0.13283,
-            "58": 0.1331,
-            "59": 0.1388,
-            "60": 0.13244,
-            "61": 0.13219,
-            "62": 0.13234,
-            "63": 0.13297,
-            "64": 0.13104,
-            "65": 0.1339,
-            "66": 0.13079,
-            "67": 0.13112,
-            "68": 0.1322,
-            "69": 0.13305,
-            "70": 0.13172,
-            "71": 0.13249,
-            "72": 0.13138,
-            "73": 0.13329,
-            "74": 0.13115,
-            "75": 0.13263,
-            "76": 0.13234,
-            "77": 0.13051,
-            "78": 0.13097,
-            "79": 0.13092,
-            "80": 0.13147,
-            "81": 0.13202,
-            "82": 0.13235,
-            "83": 0.13167,
-            "84": 0.13099,
-            "85": 0.13063,
-            "86": 0.13192,
-            "87": 0.13259,
-            "88": 0.13267,
-            "89": 0.13154,
-            "90": 0.13131,
-            "91": 0.13195,
-            "92": 0.13132,
-            "93": 0.13226,
-            "94": 0.13075,
-            "95": 0.13002,
-            "96": 0.13313,
-            "97": 0.13202,
-            "98": 0.13321,
-            "99": 0.1318,
-            "100": 0.13349
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index 7c1078c0b3d..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.85949,
-            "2": 10.85553,
-            "3": 10.86543,
-            "4": 10.84553,
-            "5": 10.88346,
-            "6": 10.89431,
-            "7": 10.87067,
-            "8": 10.86979,
-            "9": 10.86918,
-            "10": 10.83886,
-            "11": 10.8943,
-            "12": 10.87983,
-            "13": 10.87985,
-            "14": 10.90321,
-            "15": 10.84052,
-            "16": 10.83787,
-            "17": 10.80669,
-            "18": 10.83026,
-            "19": 10.82261,
-            "20": 10.73193,
-            "21": 10.70748,
-            "22": 10.56005,
-            "23": 10.72399,
-            "24": 10.61114,
-            "25": 10.54813,
-            "26": 10.61329,
-            "27": 10.63053,
-            "28": 10.56646,
-            "29": 10.59668,
-            "30": 10.37137,
-            "31": 10.11725,
-            "32": 10.46127,
-            "33": 10.45249,
-            "34": 10.2169,
-            "35": 10.27172,
-            "36": 10.23119,
-            "37": 10.34809,
-            "38": 10.1884,
-            "39": 10.41044,
-            "40": 10.09425,
-            "41": 10.14707,
-            "42": 10.21242,
-            "43": 9.84105,
-            "44": 9.95918,
-            "45": 9.84079,
-            "46": 9.82479,
-            "47": 10.13878,
-            "48": 9.85831,
-            "49": 9.54705,
-            "50": 9.90875,
-            "51": 9.8558,
-            "52": 9.75237,
-            "53": 10.07589,
-            "54": 9.95688,
-            "55": 9.88203,
-            "56": 9.6313,
-            "57": 9.48649,
-            "58": 9.83109,
-            "59": 9.58897,
-            "60": 9.50643,
-            "61": 9.70363,
-            "62": 9.98286,
-            "63": 9.38302,
-            "64": 9.77901,
-            "65": 8.95166,
-            "66": 9.70158,
-            "67": 9.37203,
-            "68": 9.78849,
-            "69": 9.79851,
-            "70": 9.74737,
-            "71": 9.61908,
-            "72": 9.58502,
-            "73": 9.49721,
-            "74": 8.93927,
-            "75": 9.42703,
-            "76": 9.0802,
-            "77": 10.06567,
-            "78": 9.72893,
-            "79": 9.3776,
-            "80": 9.40982,
-            "81": 9.47976,
-            "82": 9.7018,
-            "83": 9.30612,
-            "84": 9.4209,
-            "85": 9.61371,
-            "86": 9.07649,
-            "87": 9.5945,
-            "88": 9.75068,
-            "89": 9.60238,
-            "90": 9.81898,
-            "91": 9.33894,
-            "92": 9.35716,
-            "93": 9.07879,
-            "94": 8.83503,
-            "95": 9.52172,
-            "96": 9.53003,
-            "97": 9.31306,
-            "98": 9.67783,
-            "99": 8.89058,
-            "100": 9.39725
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1688.0,
-            "2": 1806.0,
-            "3": 1675.0,
-            "4": 1842.0,
-            "5": 1909.0,
-            "6": 1908.0,
-            "7": 1783.0,
-            "8": 1611.0,
-            "9": 1753.0,
-            "10": 1457.0,
-            "11": 1880.0,
-            "12": 1683.0,
-            "13": 1907.0,
-            "14": 1733.0,
-            "15": 1930.0,
-            "16": 1840.0,
-            "17": 1892.0,
-            "18": 1650.0,
-            "19": 1790.0,
-            "20": 1596.0,
-            "21": 1765.0,
-            "22": 1616.0,
-            "23": 1974.0,
-            "24": 1621.0,
-            "25": 1557.0,
-            "26": 1745.0,
-            "27": 1722.0,
-            "28": 1976.0,
-            "29": 2068.0,
-            "30": 1860.0,
-            "31": 1536.0,
-            "32": 1883.0,
-            "33": 2071.0,
-            "34": 1894.0,
-            "35": 1902.0,
-            "36": 1885.0,
-            "37": 2231.0,
-            "38": 2129.0,
-            "39": 2333.0,
-            "40": 2207.0,
-            "41": 2193.0,
-            "42": 2322.0,
-            "43": 2015.0,
-            "44": 2089.0,
-            "45": 2095.0,
-            "46": 2392.0,
-            "47": 2430.0,
-            "48": 2414.0,
-            "49": 2340.0,
-            "50": 2416.0,
-            "51": 2613.0,
-            "52": 2538.0,
-            "53": 2792.0,
-            "54": 2801.0,
-            "55": 2216.0,
-            "56": 2858.0,
-            "57": 2381.0,
-            "58": 2854.0,
-            "59": 2787.0,
-            "60": 2457.0,
-            "61": 2941.0,
-            "62": 2543.0,
-            "63": 2408.0,
-            "64": 2968.0,
-            "65": 2472.0,
-            "66": 2977.0,
-            "67": 2839.0,
-            "68": 2775.0,
-            "69": 2832.0,
-            "70": 3057.0,
-            "71": 2909.0,
-            "72": 2421.0,
-            "73": 2982.0,
-            "74": 1922.0,
-            "75": 2474.0,
-            "76": 3059.0,
-            "77": 3177.0,
-            "78": 3067.0,
-            "79": 3052.0,
-            "80": 3338.0,
-            "81": 3644.0,
-            "82": 3234.0,
-            "83": 2798.0,
-            "84": 3196.0,
-            "85": 3324.0,
-            "86": 2855.0,
-            "87": 3820.0,
-            "88": 2962.0,
-            "89": 3379.0,
-            "90": 3096.0,
-            "91": 2857.0,
-            "92": 3077.0,
-            "93": 2693.0,
-            "94": 3312.0,
-            "95": 3399.0,
-            "96": 3378.0,
-            "97": 3030.0,
-            "98": 3619.0,
-            "99": 3160.0,
-            "100": 3128.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 517505536.0,
-            "2": 517505536.0,
-            "3": 517505536.0,
-            "4": 517505536.0,
-            "5": 517505536.0,
-            "6": 517505536.0,
-            "7": 517505536.0,
-            "8": 517505536.0,
-            "9": 517505536.0,
-            "10": 517505536.0,
-            "11": 517505536.0,
-            "12": 517505536.0,
-            "13": 517505536.0,
-            "14": 517505536.0,
-            "15": 517505536.0,
-            "16": 517505536.0,
-            "17": 517505536.0,
-            "18": 517505536.0,
-            "19": 517505536.0,
-            "20": 517505536.0,
-            "21": 517505536.0,
-            "22": 517505536.0,
-            "23": 517505536.0,
-            "24": 517505536.0,
-            "25": 517505536.0,
-            "26": 517505536.0,
-            "27": 517505536.0,
-            "28": 517505536.0,
-            "29": 517505536.0,
-            "30": 517505536.0,
-            "31": 517505536.0,
-            "32": 517505536.0,
-            "33": 517505536.0,
-            "34": 517505536.0,
-            "35": 517505536.0,
-            "36": 517505536.0,
-            "37": 517505536.0,
-            "38": 517505536.0,
-            "39": 517505536.0,
-            "40": 517505536.0,
-            "41": 517505536.0,
-            "42": 517505536.0,
-            "43": 517505536.0,
-            "44": 517505536.0,
-            "45": 517505536.0,
-            "46": 517505536.0,
-            "47": 517505536.0,
-            "48": 517505536.0,
-            "49": 517505536.0,
-            "50": 517505536.0,
-            "51": 517505536.0,
-            "52": 517505536.0,
-            "53": 517505536.0,
-            "54": 517505536.0,
-            "55": 517505536.0,
-            "56": 517505536.0,
-            "57": 517505536.0,
-            "58": 517505536.0,
-            "59": 517505536.0,
-            "60": 517505536.0,
-            "61": 517505536.0,
-            "62": 517505536.0,
-            "63": 517505536.0,
-            "64": 517505536.0,
-            "65": 517505536.0,
-            "66": 517505536.0,
-            "67": 517505536.0,
-            "68": 517505536.0,
-            "69": 517505536.0,
-            "70": 517505536.0,
-            "71": 517505536.0,
-            "72": 517505536.0,
-            "73": 517505536.0,
-            "74": 517505536.0,
-            "75": 517505536.0,
-            "76": 517505536.0,
-            "77": 517505536.0,
-            "78": 517505536.0,
-            "79": 517505536.0,
-            "80": 517505536.0,
-            "81": 517505536.0,
-            "82": 517505536.0,
-            "83": 517505536.0,
-            "84": 517505536.0,
-            "85": 517505536.0,
-            "86": 517505536.0,
-            "87": 517505536.0,
-            "88": 517505536.0,
-            "89": 517505536.0,
-            "90": 517505536.0,
-            "91": 517505536.0,
-            "92": 517505536.0,
-            "93": 517505536.0,
-            "94": 517505536.0,
-            "95": 517505536.0,
-            "96": 517505536.0,
-            "97": 517505536.0,
-            "98": 517505536.0,
-            "99": 517505536.0,
-            "100": 517505536.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1246524928.0,
-            "2": 1428695552.0,
-            "3": 1428695552.0,
-            "4": 1428695552.0,
-            "5": 1428695552.0,
-            "6": 1428695552.0,
-            "7": 1428695552.0,
-            "8": 1428695552.0,
-            "9": 1428695552.0,
-            "10": 1428695552.0,
-            "11": 1428695552.0,
-            "12": 1428695552.0,
-            "13": 1428695552.0,
-            "14": 1428695552.0,
-            "15": 1428695552.0,
-            "16": 1428695552.0,
-            "17": 1428695552.0,
-            "18": 1428695552.0,
-            "19": 1428695552.0,
-            "20": 1428695552.0,
-            "21": 1428695552.0,
-            "22": 1428695552.0,
-            "23": 1428695552.0,
-            "24": 1428695552.0,
-            "25": 1428695552.0,
-            "26": 1428695552.0,
-            "27": 1428695552.0,
-            "28": 1428695552.0,
-            "29": 1428695552.0,
-            "30": 1428695552.0,
-            "31": 1428695552.0,
-            "32": 1428695552.0,
-            "33": 1428695552.0,
-            "34": 1428695552.0,
-            "35": 1428695552.0,
-            "36": 1428695552.0,
-            "37": 1428695552.0,
-            "38": 1428695552.0,
-            "39": 1428695552.0,
-            "40": 1428695552.0,
-            "41": 1428695552.0,
-            "42": 1428695552.0,
-            "43": 1428695552.0,
-            "44": 1428695552.0,
-            "45": 1428695552.0,
-            "46": 1428695552.0,
-            "47": 1428695552.0,
-            "48": 1428695552.0,
-            "49": 1428695552.0,
-            "50": 1428695552.0,
-            "51": 1428695552.0,
-            "52": 1428695552.0,
-            "53": 1428695552.0,
-            "54": 1428695552.0,
-            "55": 1428695552.0,
-            "56": 1428695552.0,
-            "57": 1428695552.0,
-            "58": 1428695552.0,
-            "59": 1428695552.0,
-            "60": 1428695552.0,
-            "61": 1428695552.0,
-            "62": 1428695552.0,
-            "63": 1428695552.0,
-            "64": 1428695552.0,
-            "65": 1428695552.0,
-            "66": 1428695552.0,
-            "67": 1428695552.0,
-            "68": 1428695552.0,
-            "69": 1428695552.0,
-            "70": 1428695552.0,
-            "71": 1428695552.0,
-            "72": 1428695552.0,
-            "73": 1428695552.0,
-            "74": 1428695552.0,
-            "75": 1428695552.0,
-            "76": 1428695552.0,
-            "77": 1428695552.0,
-            "78": 1428695552.0,
-            "79": 1428695552.0,
-            "80": 1428695552.0,
-            "81": 1428695552.0,
-            "82": 1428695552.0,
-            "83": 1428695552.0,
-            "84": 1428695552.0,
-            "85": 1428695552.0,
-            "86": 1428695552.0,
-            "87": 1428695552.0,
-            "88": 1428695552.0,
-            "89": 1428695552.0,
-            "90": 1428695552.0,
-            "91": 1428695552.0,
-            "92": 1428695552.0,
-            "93": 1428695552.0,
-            "94": 1428695552.0,
-            "95": 1428695552.0,
-            "96": 1428695552.0,
-            "97": 1428695552.0,
-            "98": 1428695552.0,
-            "99": 1428695552.0,
-            "100": 1428695552.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 11.96359,
-            "2": 0.17007,
-            "3": 0.15511,
-            "4": 0.15439,
-            "5": 0.15477,
-            "6": 0.15459,
-            "7": 0.15427,
-            "8": 0.15173,
-            "9": 0.15484,
-            "10": 0.15363,
-            "11": 0.15353,
-            "12": 0.15567,
-            "13": 0.15258,
-            "14": 0.15438,
-            "15": 0.15305,
-            "16": 0.15314,
-            "17": 0.15342,
-            "18": 0.15282,
-            "19": 0.15336,
-            "20": 0.15333,
-            "21": 0.15174,
-            "22": 0.15412,
-            "23": 0.15337,
-            "24": 0.15464,
-            "25": 0.15638,
-            "26": 0.15618,
-            "27": 0.15599,
-            "28": 0.15616,
-            "29": 0.15792,
-            "30": 0.15422,
-            "31": 0.15441,
-            "32": 0.15356,
-            "33": 0.15622,
-            "34": 0.15397,
-            "35": 0.15443,
-            "36": 0.15392,
-            "37": 0.15454,
-            "38": 0.15581,
-            "39": 0.15513,
-            "40": 0.15813,
-            "41": 0.1595,
-            "42": 0.15604,
-            "43": 0.15809,
-            "44": 0.15585,
-            "45": 0.15659,
-            "46": 0.15599,
-            "47": 0.15378,
-            "48": 0.15475,
-            "49": 0.1544,
-            "50": 0.15569,
-            "51": 0.16391,
-            "52": 0.16196,
-            "53": 0.16029,
-            "54": 0.16138,
-            "55": 0.15673,
-            "56": 0.1503,
-            "57": 0.15071,
-            "58": 0.15268,
-            "59": 0.15095,
-            "60": 0.15189,
-            "61": 0.15199,
-            "62": 0.14938,
-            "63": 0.15046,
-            "64": 0.14924,
-            "65": 0.15129,
-            "66": 0.14938,
-            "67": 0.15233,
-            "68": 0.15028,
-            "69": 0.1525,
-            "70": 0.15334,
-            "71": 0.15152,
-            "72": 0.15138,
-            "73": 0.15304,
-            "74": 0.1515,
-            "75": 0.15282,
-            "76": 0.1518,
-            "77": 0.15193,
-            "78": 0.15262,
-            "79": 0.15274,
-            "80": 0.15251,
-            "81": 0.15108,
-            "82": 0.15199,
-            "83": 0.15046,
-            "84": 0.15298,
-            "85": 0.15063,
-            "86": 0.15132,
-            "87": 0.15257,
-            "88": 0.15109,
-            "89": 0.1502,
-            "90": 0.15259,
-            "91": 0.15063,
-            "92": 0.15237,
-            "93": 0.15096,
-            "94": 0.1517,
-            "95": 0.15049,
-            "96": 0.15002,
-            "97": 0.15011,
-            "98": 0.15349,
-            "99": 0.1565,
-            "100": 0.15223
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgx_h100.json
index 4fc4344a2e0..c677311f507 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgx_h100.json
@@ -6,104 +6,104 @@
         "values": {
             "1": 10.86535,
             "2": 10.85873,
-            "3": 10.86284,
-            "4": 10.84005,
-            "5": 10.87854,
-            "6": 10.8885,
-            "7": 10.86534,
+            "3": 10.86285,
+            "4": 10.84007,
+            "5": 10.87856,
+            "6": 10.88856,
+            "7": 10.86538,
             "8": 10.86017,
-            "9": 10.85988,
-            "10": 10.82978,
-            "11": 10.88948,
-            "12": 10.8751,
-            "13": 10.87424,
+            "9": 10.85991,
+            "10": 10.8298,
+            "11": 10.88947,
+            "12": 10.87508,
+            "13": 10.87422,
             "14": 10.89677,
-            "15": 10.82052,
-            "16": 10.82497,
-            "17": 10.78983,
-            "18": 10.81028,
-            "19": 10.80533,
-            "20": 10.70398,
-            "21": 10.66993,
-            "22": 10.50641,
-            "23": 10.69004,
-            "24": 10.56313,
-            "25": 10.49419,
-            "26": 10.56627,
-            "27": 10.58027,
+            "15": 10.8205,
+            "16": 10.82499,
+            "17": 10.78984,
+            "18": 10.81029,
+            "19": 10.80536,
+            "20": 10.70396,
+            "21": 10.6699,
+            "22": 10.50644,
+            "23": 10.69003,
+            "24": 10.5631,
+            "25": 10.49417,
+            "26": 10.56624,
+            "27": 10.58026,
             "28": 10.51571,
-            "29": 10.55294,
-            "30": 10.3055,
-            "31": 10.02244,
-            "32": 10.40616,
-            "33": 10.39877,
+            "29": 10.553,
+            "30": 10.30552,
+            "31": 10.02249,
+            "32": 10.40613,
+            "33": 10.3988,
             "34": 10.13771,
-            "35": 10.20185,
+            "35": 10.20186,
             "36": 10.16052,
-            "37": 10.28974,
-            "38": 10.11478,
+            "37": 10.28975,
+            "38": 10.1148,
             "39": 10.36102,
-            "40": 10.01901,
-            "41": 10.07288,
-            "42": 10.14698,
-            "43": 9.74686,
-            "44": 9.87764,
-            "45": 9.74965,
-            "46": 9.73383,
+            "40": 10.01904,
+            "41": 10.07292,
+            "42": 10.14696,
+            "43": 9.74683,
+            "44": 9.87763,
+            "45": 9.74966,
+            "46": 9.73387,
             "47": 10.07534,
-            "48": 9.78068,
+            "48": 9.78069,
             "49": 9.4478,
-            "50": 9.8399,
-            "51": 9.78024,
-            "52": 9.67265,
-            "53": 10.02013,
-            "54": 9.8979,
-            "55": 9.81663,
-            "56": 9.56041,
-            "57": 9.4118,
-            "58": 9.77417,
-            "59": 9.51799,
+            "50": 9.83991,
+            "51": 9.78025,
+            "52": 9.67263,
+            "53": 10.0201,
+            "54": 9.89789,
+            "55": 9.81664,
+            "56": 9.56044,
+            "57": 9.41178,
+            "58": 9.77419,
+            "59": 9.51794,
             "60": 9.43538,
-            "61": 9.64483,
-            "62": 9.93002,
-            "63": 9.30912,
-            "64": 9.72066,
-            "65": 8.87152,
-            "66": 9.64433,
-            "67": 9.31332,
-            "68": 9.74069,
-            "69": 9.75327,
+            "61": 9.64484,
+            "62": 9.93004,
+            "63": 9.30911,
+            "64": 9.72068,
+            "65": 8.87154,
+            "66": 9.64427,
+            "67": 9.31328,
+            "68": 9.74067,
+            "69": 9.75334,
             "70": 9.70004,
-            "71": 9.56557,
-            "72": 9.53091,
-            "73": 9.44385,
-            "74": 8.8678,
-            "75": 9.37308,
-            "76": 9.01275,
+            "71": 9.56556,
+            "72": 9.53094,
+            "73": 9.44386,
+            "74": 8.86782,
+            "75": 9.37314,
+            "76": 9.01274,
             "77": 10.02855,
             "78": 9.68739,
-            "79": 9.32795,
-            "80": 9.36169,
-            "81": 9.43364,
+            "79": 9.328,
+            "80": 9.36168,
+            "81": 9.43367,
             "82": 9.66094,
-            "83": 9.25137,
-            "84": 9.37353,
-            "85": 9.56936,
-            "86": 9.03179,
-            "87": 9.55585,
-            "88": 9.71056,
-            "89": 9.55398,
-            "90": 9.78472,
-            "91": 9.29079,
+            "83": 9.25139,
+            "84": 9.37352,
+            "85": 9.56939,
+            "86": 9.03181,
+            "87": 9.55584,
+            "88": 9.71055,
+            "89": 9.55395,
+            "90": 9.78475,
+            "91": 9.29077,
             "92": 9.31245,
-            "93": 9.03137,
-            "94": 8.78667,
+            "93": 9.03142,
+            "94": 8.78671,
             "95": 9.4873,
             "96": 9.49052,
-            "97": 9.26686,
+            "97": 9.26684,
             "98": 9.63648,
-            "99": 8.84331,
-            "100": 9.3555
+            "99": 8.84333,
+            "100": 9.35549
         }
     },
     "num-zeros": {
@@ -111,106 +111,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 603.0,
-            "2": 642.0,
-            "3": 648.0,
-            "4": 599.0,
-            "5": 644.0,
-            "6": 645.0,
-            "7": 625.0,
-            "8": 544.0,
-            "9": 657.0,
-            "10": 536.0,
-            "11": 673.0,
-            "12": 618.0,
-            "13": 646.0,
-            "14": 683.0,
-            "15": 639.0,
-            "16": 616.0,
-            "17": 656.0,
+            "1": 585.0,
+            "2": 648.0,
+            "3": 630.0,
+            "4": 656.0,
+            "5": 620.0,
+            "6": 637.0,
+            "7": 641.0,
+            "8": 581.0,
+            "9": 660.0,
+            "10": 504.0,
+            "11": 664.0,
+            "12": 639.0,
+            "13": 670.0,
+            "14": 666.0,
+            "15": 652.0,
+            "16": 624.0,
+            "17": 704.0,
             "18": 579.0,
-            "19": 637.0,
-            "20": 628.0,
-            "21": 672.0,
-            "22": 627.0,
-            "23": 744.0,
-            "24": 610.0,
-            "25": 578.0,
-            "26": 602.0,
-            "27": 633.0,
-            "28": 750.0,
-            "29": 709.0,
-            "30": 736.0,
-            "31": 626.0,
-            "32": 716.0,
-            "33": 754.0,
-            "34": 692.0,
-            "35": 707.0,
-            "36": 733.0,
-            "37": 797.0,
-            "38": 813.0,
-            "39": 878.0,
-            "40": 807.0,
-            "41": 808.0,
-            "42": 831.0,
-            "43": 703.0,
-            "44": 810.0,
-            "45": 768.0,
-            "46": 858.0,
-            "47": 879.0,
-            "48": 856.0,
-            "49": 814.0,
-            "50": 862.0,
+            "19": 682.0,
+            "20": 623.0,
+            "21": 657.0,
+            "22": 561.0,
+            "23": 763.0,
+            "24": 593.0,
+            "25": 629.0,
+            "26": 669.0,
+            "27": 691.0,
+            "28": 738.0,
+            "29": 788.0,
+            "30": 744.0,
+            "31": 604.0,
+            "32": 736.0,
+            "33": 787.0,
+            "34": 706.0,
+            "35": 692.0,
+            "36": 714.0,
+            "37": 835.0,
+            "38": 768.0,
+            "39": 894.0,
+            "40": 764.0,
+            "41": 852.0,
+            "42": 878.0,
+            "43": 733.0,
+            "44": 827.0,
+            "45": 785.0,
+            "46": 877.0,
+            "47": 927.0,
+            "48": 873.0,
+            "49": 891.0,
+            "50": 869.0,
             "51": 928.0,
-            "52": 1001.0,
-            "53": 1019.0,
-            "54": 978.0,
-            "55": 917.0,
-            "56": 1023.0,
-            "57": 835.0,
-            "58": 1020.0,
-            "59": 1033.0,
-            "60": 900.0,
-            "61": 998.0,
-            "62": 966.0,
-            "63": 933.0,
-            "64": 1084.0,
-            "65": 960.0,
-            "66": 1081.0,
-            "67": 1043.0,
-            "68": 1032.0,
-            "69": 1029.0,
-            "70": 1108.0,
-            "71": 1123.0,
-            "72": 848.0,
-            "73": 991.0,
-            "74": 685.0,
-            "75": 878.0,
-            "76": 1149.0,
-            "77": 1198.0,
-            "78": 1087.0,
-            "79": 1095.0,
-            "80": 1114.0,
-            "81": 1229.0,
-            "82": 1048.0,
-            "83": 1002.0,
-            "84": 1115.0,
-            "85": 1228.0,
-            "86": 896.0,
-            "87": 1212.0,
-            "88": 1039.0,
-            "89": 1111.0,
-            "90": 1085.0,
-            "91": 1140.0,
-            "92": 1186.0,
-            "93": 896.0,
-            "94": 1148.0,
-            "95": 1102.0,
-            "96": 1113.0,
-            "97": 1002.0,
-            "98": 1267.0,
-            "99": 1178.0,
-            "100": 1179.0
+            "52": 968.0,
+            "53": 1089.0,
+            "54": 966.0,
+            "55": 913.0,
+            "56": 983.0,
+            "57": 889.0,
+            "58": 1063.0,
+            "59": 1005.0,
+            "60": 876.0,
+            "61": 1043.0,
+            "62": 897.0,
+            "63": 971.0,
+            "64": 1100.0,
+            "65": 911.0,
+            "66": 1107.0,
+            "67": 948.0,
+            "68": 1033.0,
+            "69": 1064.0,
+            "70": 1118.0,
+            "71": 1032.0,
+            "72": 854.0,
+            "73": 1007.0,
+            "74": 739.0,
+            "75": 877.0,
+            "76": 1075.0,
+            "77": 1108.0,
+            "78": 1103.0,
+            "79": 980.0,
+            "80": 1055.0,
+            "81": 1240.0,
+            "82": 1101.0,
+            "83": 1007.0,
+            "84": 1147.0,
+            "85": 1157.0,
+            "86": 897.0,
+            "87": 1247.0,
+            "88": 1015.0,
+            "89": 1155.0,
+            "90": 1138.0,
+            "91": 1141.0,
+            "92": 1142.0,
+            "93": 947.0,
+            "94": 1116.0,
+            "95": 1119.0,
+            "96": 1099.0,
+            "97": 997.0,
+            "98": 1188.0,
+            "99": 1141.0,
+            "100": 1102.0
         }
     },
     "mem-allocated-bytes": {
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 16.11625,
-            "2": 0.36631,
-            "3": 0.34354,
-            "4": 0.34024,
-            "5": 0.33469,
-            "6": 0.3419,
-            "7": 0.33228,
-            "8": 0.32074,
-            "9": 0.32378,
-            "10": 0.32158,
-            "11": 0.32213,
-            "12": 0.32775,
-            "13": 0.32607,
-            "14": 0.32118,
-            "15": 0.3245,
-            "16": 0.3215,
-            "17": 0.32118,
-            "18": 0.32636,
-            "19": 0.32325,
-            "20": 0.32277,
-            "21": 0.32375,
-            "22": 0.32539,
-            "23": 0.32026,
-            "24": 0.32491,
-            "25": 0.32391,
-            "26": 0.32302,
-            "27": 0.32176,
-            "28": 0.32809,
-            "29": 0.32603,
-            "30": 0.3249,
-            "31": 0.33977,
-            "32": 0.34038,
-            "33": 0.34031,
-            "34": 0.32189,
-            "35": 0.32635,
-            "36": 0.32269,
-            "37": 0.32267,
-            "38": 0.3225,
-            "39": 0.32579,
-            "40": 0.32854,
-            "41": 0.32405,
-            "42": 0.32252,
-            "43": 0.3294,
-            "44": 0.32763,
-            "45": 0.32247,
-            "46": 0.32281,
-            "47": 0.32544,
-            "48": 0.32623,
-            "49": 0.32647,
-            "50": 0.32132,
-            "51": 0.32838,
-            "52": 0.32103,
-            "53": 0.32972,
-            "54": 0.32308,
-            "55": 0.3197,
-            "56": 0.32532,
-            "57": 0.33022,
-            "58": 0.32385,
-            "59": 0.3254,
-            "60": 0.33968,
-            "61": 0.334,
-            "62": 0.33471,
-            "63": 0.33468,
-            "64": 0.32025,
-            "65": 0.31712,
-            "66": 0.327,
-            "67": 0.3195,
-            "68": 0.32296,
-            "69": 0.32809,
-            "70": 0.321,
-            "71": 0.32464,
-            "72": 0.33034,
-            "73": 0.32003,
-            "74": 0.31593,
-            "75": 0.32867,
-            "76": 0.32348,
-            "77": 0.31767,
-            "78": 0.33054,
-            "79": 0.32363,
-            "80": 0.3218,
-            "81": 0.32884,
-            "82": 0.32228,
-            "83": 0.31938,
-            "84": 0.32519,
-            "85": 0.32022,
-            "86": 0.32099,
-            "87": 0.32558,
-            "88": 0.32258,
-            "89": 0.32117,
-            "90": 0.33145,
-            "91": 0.33173,
-            "92": 0.32613,
-            "93": 0.33404,
-            "94": 0.32862,
-            "95": 0.32897,
-            "96": 0.32817,
-            "97": 0.32958,
-            "98": 0.32759,
-            "99": 0.33061,
-            "100": 0.33344
+            "1": 15.91944,
+            "2": 0.35854,
+            "3": 0.34422,
+            "4": 0.34655,
+            "5": 0.33791,
+            "6": 0.34327,
+            "7": 0.34394,
+            "8": 0.3383,
+            "9": 0.34058,
+            "10": 0.32396,
+            "11": 0.32631,
+            "12": 0.33064,
+            "13": 0.32832,
+            "14": 0.32645,
+            "15": 0.32686,
+            "16": 0.32351,
+            "17": 0.32796,
+            "18": 0.33094,
+            "19": 0.32865,
+            "20": 0.32722,
+            "21": 0.32666,
+            "22": 0.32679,
+            "23": 0.32717,
+            "24": 0.32824,
+            "25": 0.32793,
+            "26": 0.32517,
+            "27": 0.326,
+            "28": 0.32627,
+            "29": 0.32627,
+            "30": 0.32688,
+            "31": 0.32603,
+            "32": 0.32544,
+            "33": 0.32613,
+            "34": 0.32696,
+            "35": 0.32522,
+            "36": 0.32966,
+            "37": 0.32462,
+            "38": 0.32724,
+            "39": 0.32622,
+            "40": 0.32646,
+            "41": 0.32504,
+            "42": 0.32464,
+            "43": 0.3299,
+            "44": 0.32495,
+            "45": 0.32382,
+            "46": 0.32567,
+            "47": 0.32847,
+            "48": 0.32521,
+            "49": 0.32738,
+            "50": 0.32495,
+            "51": 0.33517,
+            "52": 0.33963,
+            "53": 0.33084,
+            "54": 0.3299,
+            "55": 0.33062,
+            "56": 0.32923,
+            "57": 0.32909,
+            "58": 0.331,
+            "59": 0.32595,
+            "60": 0.32446,
+            "61": 0.32961,
+            "62": 0.33126,
+            "63": 0.32393,
+            "64": 0.32986,
+            "65": 0.32836,
+            "66": 0.32921,
+            "67": 0.32945,
+            "68": 0.32848,
+            "69": 0.32625,
+            "70": 0.32898,
+            "71": 0.33227,
+            "72": 0.32403,
+            "73": 0.3284,
+            "74": 0.32761,
+            "75": 0.32791,
+            "76": 0.33223,
+            "77": 0.33113,
+            "78": 0.32546,
+            "79": 0.32925,
+            "80": 0.33175,
+            "81": 0.33071,
+            "82": 0.32698,
+            "83": 0.32738,
+            "84": 0.32835,
+            "85": 0.32729,
+            "86": 0.33228,
+            "87": 0.32668,
+            "88": 0.33091,
+            "89": 0.32825,
+            "90": 0.32752,
+            "91": 0.32814,
+            "92": 0.33195,
+            "93": 0.32686,
+            "94": 0.33172,
+            "95": 0.33336,
+            "96": 0.32938,
+            "97": 0.33024,
+            "98": 0.32939,
+            "99": 0.32654,
+            "100": 0.3311
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index af0dc8991a7..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.86535,
-            "2": 10.85873,
-            "3": 10.86286,
-            "4": 10.8401,
-            "5": 10.87854,
-            "6": 10.88851,
-            "7": 10.86534,
-            "8": 10.86016,
-            "9": 10.8599,
-            "10": 10.82977,
-            "11": 10.88949,
-            "12": 10.8751,
-            "13": 10.87423,
-            "14": 10.89677,
-            "15": 10.82052,
-            "16": 10.82497,
-            "17": 10.78983,
-            "18": 10.81028,
-            "19": 10.80533,
-            "20": 10.70396,
-            "21": 10.66992,
-            "22": 10.50642,
-            "23": 10.69003,
-            "24": 10.56316,
-            "25": 10.49422,
-            "26": 10.56629,
-            "27": 10.58024,
-            "28": 10.5157,
-            "29": 10.55294,
-            "30": 10.30549,
-            "31": 10.02246,
-            "32": 10.40618,
-            "33": 10.3988,
-            "34": 10.13772,
-            "35": 10.20188,
-            "36": 10.16051,
-            "37": 10.28976,
-            "38": 10.11481,
-            "39": 10.36103,
-            "40": 10.01902,
-            "41": 10.07292,
-            "42": 10.14693,
-            "43": 9.74685,
-            "44": 9.87763,
-            "45": 9.74968,
-            "46": 9.73387,
-            "47": 10.07535,
-            "48": 9.78069,
-            "49": 9.44782,
-            "50": 9.83989,
-            "51": 9.78023,
-            "52": 9.67265,
-            "53": 10.02014,
-            "54": 9.89792,
-            "55": 9.81667,
-            "56": 9.56045,
-            "57": 9.41178,
-            "58": 9.77416,
-            "59": 9.51797,
-            "60": 9.43536,
-            "61": 9.64484,
-            "62": 9.93004,
-            "63": 9.30908,
-            "64": 9.72064,
-            "65": 8.87155,
-            "66": 9.64428,
-            "67": 9.31328,
-            "68": 9.74066,
-            "69": 9.75332,
-            "70": 9.70004,
-            "71": 9.56561,
-            "72": 9.53094,
-            "73": 9.44384,
-            "74": 8.86782,
-            "75": 9.37311,
-            "76": 9.01276,
-            "77": 10.02852,
-            "78": 9.68739,
-            "79": 9.32796,
-            "80": 9.36168,
-            "81": 9.43368,
-            "82": 9.66094,
-            "83": 9.25138,
-            "84": 9.37354,
-            "85": 9.5694,
-            "86": 9.03176,
-            "87": 9.55582,
-            "88": 9.71055,
-            "89": 9.55397,
-            "90": 9.7847,
-            "91": 9.29075,
-            "92": 9.31241,
-            "93": 9.03141,
-            "94": 8.78668,
-            "95": 9.48729,
-            "96": 9.49051,
-            "97": 9.26682,
-            "98": 9.63648,
-            "99": 8.84335,
-            "100": 9.35548
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 602.0,
-            "2": 621.0,
-            "3": 616.0,
-            "4": 577.0,
-            "5": 617.0,
-            "6": 617.0,
-            "7": 645.0,
-            "8": 568.0,
-            "9": 673.0,
-            "10": 569.0,
-            "11": 637.0,
-            "12": 647.0,
-            "13": 676.0,
-            "14": 666.0,
-            "15": 706.0,
-            "16": 627.0,
-            "17": 640.0,
-            "18": 607.0,
-            "19": 623.0,
-            "20": 620.0,
-            "21": 654.0,
-            "22": 640.0,
-            "23": 775.0,
-            "24": 581.0,
-            "25": 629.0,
-            "26": 665.0,
-            "27": 689.0,
-            "28": 707.0,
-            "29": 722.0,
-            "30": 738.0,
-            "31": 640.0,
-            "32": 746.0,
-            "33": 831.0,
-            "34": 673.0,
-            "35": 746.0,
-            "36": 749.0,
-            "37": 826.0,
-            "38": 771.0,
-            "39": 852.0,
-            "40": 746.0,
-            "41": 834.0,
-            "42": 845.0,
-            "43": 709.0,
-            "44": 739.0,
-            "45": 808.0,
-            "46": 888.0,
-            "47": 849.0,
-            "48": 880.0,
-            "49": 879.0,
-            "50": 840.0,
-            "51": 915.0,
-            "52": 896.0,
-            "53": 1048.0,
-            "54": 1044.0,
-            "55": 954.0,
-            "56": 960.0,
-            "57": 849.0,
-            "58": 1035.0,
-            "59": 1036.0,
-            "60": 875.0,
-            "61": 1010.0,
-            "62": 973.0,
-            "63": 928.0,
-            "64": 1019.0,
-            "65": 928.0,
-            "66": 1115.0,
-            "67": 966.0,
-            "68": 954.0,
-            "69": 1094.0,
-            "70": 1039.0,
-            "71": 1034.0,
-            "72": 891.0,
-            "73": 1023.0,
-            "74": 764.0,
-            "75": 903.0,
-            "76": 1061.0,
-            "77": 1149.0,
-            "78": 1070.0,
-            "79": 1063.0,
-            "80": 1091.0,
-            "81": 1242.0,
-            "82": 1047.0,
-            "83": 1012.0,
-            "84": 1154.0,
-            "85": 1199.0,
-            "86": 930.0,
-            "87": 1297.0,
-            "88": 1049.0,
-            "89": 1103.0,
-            "90": 1021.0,
-            "91": 1134.0,
-            "92": 1187.0,
-            "93": 918.0,
-            "94": 1129.0,
-            "95": 1126.0,
-            "96": 1146.0,
-            "97": 1003.0,
-            "98": 1260.0,
-            "99": 1135.0,
-            "100": 1164.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 510689792.0,
-            "2": 510689792.0,
-            "3": 510689792.0,
-            "4": 510689792.0,
-            "5": 510689792.0,
-            "6": 510689792.0,
-            "7": 510689792.0,
-            "8": 510689792.0,
-            "9": 510689792.0,
-            "10": 510689792.0,
-            "11": 510689792.0,
-            "12": 510689792.0,
-            "13": 510689792.0,
-            "14": 510689792.0,
-            "15": 510689792.0,
-            "16": 510689792.0,
-            "17": 510689792.0,
-            "18": 510689792.0,
-            "19": 510689792.0,
-            "20": 510689792.0,
-            "21": 510689792.0,
-            "22": 510689792.0,
-            "23": 510689792.0,
-            "24": 510689792.0,
-            "25": 510689792.0,
-            "26": 510689792.0,
-            "27": 510689792.0,
-            "28": 510689792.0,
-            "29": 510689792.0,
-            "30": 510689792.0,
-            "31": 510689792.0,
-            "32": 510689792.0,
-            "33": 510689792.0,
-            "34": 510689792.0,
-            "35": 510689792.0,
-            "36": 510689792.0,
-            "37": 510689792.0,
-            "38": 510689792.0,
-            "39": 510689792.0,
-            "40": 510689792.0,
-            "41": 510689792.0,
-            "42": 510689792.0,
-            "43": 510689792.0,
-            "44": 510689792.0,
-            "45": 510689792.0,
-            "46": 510689792.0,
-            "47": 510689792.0,
-            "48": 510689792.0,
-            "49": 510689792.0,
-            "50": 510689792.0,
-            "51": 510689792.0,
-            "52": 510689792.0,
-            "53": 510689792.0,
-            "54": 510689792.0,
-            "55": 510689792.0,
-            "56": 510689792.0,
-            "57": 510689792.0,
-            "58": 510689792.0,
-            "59": 510689792.0,
-            "60": 510689792.0,
-            "61": 510689792.0,
-            "62": 510689792.0,
-            "63": 510689792.0,
-            "64": 510689792.0,
-            "65": 510689792.0,
-            "66": 510689792.0,
-            "67": 510689792.0,
-            "68": 510689792.0,
-            "69": 510689792.0,
-            "70": 510689792.0,
-            "71": 510689792.0,
-            "72": 510689792.0,
-            "73": 510689792.0,
-            "74": 510689792.0,
-            "75": 510689792.0,
-            "76": 510689792.0,
-            "77": 510689792.0,
-            "78": 510689792.0,
-            "79": 510689792.0,
-            "80": 510689792.0,
-            "81": 510689792.0,
-            "82": 510689792.0,
-            "83": 510689792.0,
-            "84": 510689792.0,
-            "85": 510689792.0,
-            "86": 510689792.0,
-            "87": 510689792.0,
-            "88": 510689792.0,
-            "89": 510689792.0,
-            "90": 510689792.0,
-            "91": 510689792.0,
-            "92": 510689792.0,
-            "93": 510689792.0,
-            "94": 510689792.0,
-            "95": 510689792.0,
-            "96": 510689792.0,
-            "97": 510689792.0,
-            "98": 510689792.0,
-            "99": 510689792.0,
-            "100": 510689792.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 759895552.0,
-            "2": 933156352.0,
-            "3": 933156352.0,
-            "4": 933156352.0,
-            "5": 933156352.0,
-            "6": 933156352.0,
-            "7": 933156352.0,
-            "8": 933156352.0,
-            "9": 933156352.0,
-            "10": 933156352.0,
-            "11": 933156352.0,
-            "12": 933156352.0,
-            "13": 933156352.0,
-            "14": 933156352.0,
-            "15": 934203392.0,
-            "16": 934203392.0,
-            "17": 934203392.0,
-            "18": 934203392.0,
-            "19": 934203392.0,
-            "20": 934203392.0,
-            "21": 934203392.0,
-            "22": 934203392.0,
-            "23": 934203392.0,
-            "24": 934203392.0,
-            "25": 934203392.0,
-            "26": 934203392.0,
-            "27": 934203392.0,
-            "28": 934203392.0,
-            "29": 934203392.0,
-            "30": 934203392.0,
-            "31": 934203392.0,
-            "32": 934203392.0,
-            "33": 934203392.0,
-            "34": 934203392.0,
-            "35": 934203392.0,
-            "36": 934203392.0,
-            "37": 934203392.0,
-            "38": 934203392.0,
-            "39": 934203392.0,
-            "40": 934203392.0,
-            "41": 934203392.0,
-            "42": 934203392.0,
-            "43": 934203392.0,
-            "44": 934203392.0,
-            "45": 934203392.0,
-            "46": 934203392.0,
-            "47": 934203392.0,
-            "48": 934203392.0,
-            "49": 934203392.0,
-            "50": 934203392.0,
-            "51": 934203392.0,
-            "52": 934203392.0,
-            "53": 934203392.0,
-            "54": 934203392.0,
-            "55": 934203392.0,
-            "56": 934203392.0,
-            "57": 934203392.0,
-            "58": 934203392.0,
-            "59": 934203392.0,
-            "60": 934203392.0,
-            "61": 934203392.0,
-            "62": 934203392.0,
-            "63": 934203392.0,
-            "64": 934203392.0,
-            "65": 934203392.0,
-            "66": 934203392.0,
-            "67": 934203392.0,
-            "68": 934203392.0,
-            "69": 934203392.0,
-            "70": 934203392.0,
-            "71": 934203392.0,
-            "72": 934203392.0,
-            "73": 934203392.0,
-            "74": 934203392.0,
-            "75": 934203392.0,
-            "76": 934203392.0,
-            "77": 934203392.0,
-            "78": 934203392.0,
-            "79": 934203392.0,
-            "80": 934203392.0,
-            "81": 934203392.0,
-            "82": 934203392.0,
-            "83": 934203392.0,
-            "84": 934203392.0,
-            "85": 934203392.0,
-            "86": 934203392.0,
-            "87": 934203392.0,
-            "88": 934203392.0,
-            "89": 934203392.0,
-            "90": 934203392.0,
-            "91": 934203392.0,
-            "92": 934203392.0,
-            "93": 934203392.0,
-            "94": 934203392.0,
-            "95": 934203392.0,
-            "96": 934203392.0,
-            "97": 934203392.0,
-            "98": 934203392.0,
-            "99": 934203392.0,
-            "100": 934203392.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 17.126,
-            "2": 0.48552,
-            "3": 0.29604,
-            "4": 0.30321,
-            "5": 0.28764,
-            "6": 0.28618,
-            "7": 0.28577,
-            "8": 0.28879,
-            "9": 0.28726,
-            "10": 0.28646,
-            "11": 0.28506,
-            "12": 0.28217,
-            "13": 0.2868,
-            "14": 0.28787,
-            "15": 0.28549,
-            "16": 0.2862,
-            "17": 0.28698,
-            "18": 0.29086,
-            "19": 0.28554,
-            "20": 0.2857,
-            "21": 0.28549,
-            "22": 0.28641,
-            "23": 0.28608,
-            "24": 0.28569,
-            "25": 0.28652,
-            "26": 0.28468,
-            "27": 0.28942,
-            "28": 0.28949,
-            "29": 0.28879,
-            "30": 0.28796,
-            "31": 0.29103,
-            "32": 0.29073,
-            "33": 0.28732,
-            "34": 0.29616,
-            "35": 0.28855,
-            "36": 0.28828,
-            "37": 0.28466,
-            "38": 0.28953,
-            "39": 0.29333,
-            "40": 0.28768,
-            "41": 0.28231,
-            "42": 0.28695,
-            "43": 0.28583,
-            "44": 0.28905,
-            "45": 0.28528,
-            "46": 0.28715,
-            "47": 0.28626,
-            "48": 0.28831,
-            "49": 0.28647,
-            "50": 0.28555,
-            "51": 0.29483,
-            "52": 0.28779,
-            "53": 0.28678,
-            "54": 0.28789,
-            "55": 0.28871,
-            "56": 0.29987,
-            "57": 0.29343,
-            "58": 0.28823,
-            "59": 0.28887,
-            "60": 0.29468,
-            "61": 0.28773,
-            "62": 0.30025,
-            "63": 0.28844,
-            "64": 0.28597,
-            "65": 0.28565,
-            "66": 0.2875,
-            "67": 0.28661,
-            "68": 0.2859,
-            "69": 0.28584,
-            "70": 0.28606,
-            "71": 0.286,
-            "72": 0.2846,
-            "73": 0.29219,
-            "74": 0.28688,
-            "75": 0.28871,
-            "76": 0.28938,
-            "77": 0.28731,
-            "78": 0.28558,
-            "79": 0.28696,
-            "80": 0.28619,
-            "81": 0.28793,
-            "82": 0.28828,
-            "83": 0.28522,
-            "84": 0.29988,
-            "85": 0.29704,
-            "86": 0.28664,
-            "87": 0.2857,
-            "88": 0.28622,
-            "89": 0.28571,
-            "90": 0.2853,
-            "91": 0.29259,
-            "92": 0.28615,
-            "93": 0.285,
-            "94": 0.286,
-            "95": 0.28546,
-            "96": 0.28446,
-            "97": 0.28434,
-            "98": 0.28413,
-            "99": 0.2875,
-            "100": 0.28509
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index c677311f507..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.86535,
-            "2": 10.85873,
-            "3": 10.86285,
-            "4": 10.84007,
-            "5": 10.87856,
-            "6": 10.88856,
-            "7": 10.86538,
-            "8": 10.86017,
-            "9": 10.85991,
-            "10": 10.8298,
-            "11": 10.88947,
-            "12": 10.87508,
-            "13": 10.87422,
-            "14": 10.89677,
-            "15": 10.8205,
-            "16": 10.82499,
-            "17": 10.78984,
-            "18": 10.81029,
-            "19": 10.80536,
-            "20": 10.70396,
-            "21": 10.6699,
-            "22": 10.50644,
-            "23": 10.69003,
-            "24": 10.5631,
-            "25": 10.49417,
-            "26": 10.56624,
-            "27": 10.58026,
-            "28": 10.51571,
-            "29": 10.553,
-            "30": 10.30552,
-            "31": 10.02249,
-            "32": 10.40613,
-            "33": 10.3988,
-            "34": 10.13771,
-            "35": 10.20186,
-            "36": 10.16052,
-            "37": 10.28975,
-            "38": 10.1148,
-            "39": 10.36102,
-            "40": 10.01904,
-            "41": 10.07292,
-            "42": 10.14696,
-            "43": 9.74683,
-            "44": 9.87763,
-            "45": 9.74966,
-            "46": 9.73387,
-            "47": 10.07534,
-            "48": 9.78069,
-            "49": 9.4478,
-            "50": 9.83991,
-            "51": 9.78025,
-            "52": 9.67263,
-            "53": 10.0201,
-            "54": 9.89789,
-            "55": 9.81664,
-            "56": 9.56044,
-            "57": 9.41178,
-            "58": 9.77419,
-            "59": 9.51794,
-            "60": 9.43538,
-            "61": 9.64484,
-            "62": 9.93004,
-            "63": 9.30911,
-            "64": 9.72068,
-            "65": 8.87154,
-            "66": 9.64427,
-            "67": 9.31328,
-            "68": 9.74067,
-            "69": 9.75334,
-            "70": 9.70004,
-            "71": 9.56556,
-            "72": 9.53094,
-            "73": 9.44386,
-            "74": 8.86782,
-            "75": 9.37314,
-            "76": 9.01274,
-            "77": 10.02855,
-            "78": 9.68739,
-            "79": 9.328,
-            "80": 9.36168,
-            "81": 9.43367,
-            "82": 9.66094,
-            "83": 9.25139,
-            "84": 9.37352,
-            "85": 9.56939,
-            "86": 9.03181,
-            "87": 9.55584,
-            "88": 9.71055,
-            "89": 9.55395,
-            "90": 9.78475,
-            "91": 9.29077,
-            "92": 9.31245,
-            "93": 9.03142,
-            "94": 8.78671,
-            "95": 9.4873,
-            "96": 9.49052,
-            "97": 9.26684,
-            "98": 9.63648,
-            "99": 8.84333,
-            "100": 9.35549
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 585.0,
-            "2": 648.0,
-            "3": 630.0,
-            "4": 656.0,
-            "5": 620.0,
-            "6": 637.0,
-            "7": 641.0,
-            "8": 581.0,
-            "9": 660.0,
-            "10": 504.0,
-            "11": 664.0,
-            "12": 639.0,
-            "13": 670.0,
-            "14": 666.0,
-            "15": 652.0,
-            "16": 624.0,
-            "17": 704.0,
-            "18": 579.0,
-            "19": 682.0,
-            "20": 623.0,
-            "21": 657.0,
-            "22": 561.0,
-            "23": 763.0,
-            "24": 593.0,
-            "25": 629.0,
-            "26": 669.0,
-            "27": 691.0,
-            "28": 738.0,
-            "29": 788.0,
-            "30": 744.0,
-            "31": 604.0,
-            "32": 736.0,
-            "33": 787.0,
-            "34": 706.0,
-            "35": 692.0,
-            "36": 714.0,
-            "37": 835.0,
-            "38": 768.0,
-            "39": 894.0,
-            "40": 764.0,
-            "41": 852.0,
-            "42": 878.0,
-            "43": 733.0,
-            "44": 827.0,
-            "45": 785.0,
-            "46": 877.0,
-            "47": 927.0,
-            "48": 873.0,
-            "49": 891.0,
-            "50": 869.0,
-            "51": 928.0,
-            "52": 968.0,
-            "53": 1089.0,
-            "54": 966.0,
-            "55": 913.0,
-            "56": 983.0,
-            "57": 889.0,
-            "58": 1063.0,
-            "59": 1005.0,
-            "60": 876.0,
-            "61": 1043.0,
-            "62": 897.0,
-            "63": 971.0,
-            "64": 1100.0,
-            "65": 911.0,
-            "66": 1107.0,
-            "67": 948.0,
-            "68": 1033.0,
-            "69": 1064.0,
-            "70": 1118.0,
-            "71": 1032.0,
-            "72": 854.0,
-            "73": 1007.0,
-            "74": 739.0,
-            "75": 877.0,
-            "76": 1075.0,
-            "77": 1108.0,
-            "78": 1103.0,
-            "79": 980.0,
-            "80": 1055.0,
-            "81": 1240.0,
-            "82": 1101.0,
-            "83": 1007.0,
-            "84": 1147.0,
-            "85": 1157.0,
-            "86": 897.0,
-            "87": 1247.0,
-            "88": 1015.0,
-            "89": 1155.0,
-            "90": 1138.0,
-            "91": 1141.0,
-            "92": 1142.0,
-            "93": 947.0,
-            "94": 1116.0,
-            "95": 1119.0,
-            "96": 1099.0,
-            "97": 997.0,
-            "98": 1188.0,
-            "99": 1141.0,
-            "100": 1102.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 510689792.0,
-            "2": 510689792.0,
-            "3": 510689792.0,
-            "4": 510689792.0,
-            "5": 510689792.0,
-            "6": 510689792.0,
-            "7": 510689792.0,
-            "8": 510689792.0,
-            "9": 510689792.0,
-            "10": 510689792.0,
-            "11": 510689792.0,
-            "12": 510689792.0,
-            "13": 510689792.0,
-            "14": 510689792.0,
-            "15": 510689792.0,
-            "16": 510689792.0,
-            "17": 510689792.0,
-            "18": 510689792.0,
-            "19": 510689792.0,
-            "20": 510689792.0,
-            "21": 510689792.0,
-            "22": 510689792.0,
-            "23": 510689792.0,
-            "24": 510689792.0,
-            "25": 510689792.0,
-            "26": 510689792.0,
-            "27": 510689792.0,
-            "28": 510689792.0,
-            "29": 510689792.0,
-            "30": 510689792.0,
-            "31": 510689792.0,
-            "32": 510689792.0,
-            "33": 510689792.0,
-            "34": 510689792.0,
-            "35": 510689792.0,
-            "36": 510689792.0,
-            "37": 510689792.0,
-            "38": 510689792.0,
-            "39": 510689792.0,
-            "40": 510689792.0,
-            "41": 510689792.0,
-            "42": 510689792.0,
-            "43": 510689792.0,
-            "44": 510689792.0,
-            "45": 510689792.0,
-            "46": 510689792.0,
-            "47": 510689792.0,
-            "48": 510689792.0,
-            "49": 510689792.0,
-            "50": 510689792.0,
-            "51": 510689792.0,
-            "52": 510689792.0,
-            "53": 510689792.0,
-            "54": 510689792.0,
-            "55": 510689792.0,
-            "56": 510689792.0,
-            "57": 510689792.0,
-            "58": 510689792.0,
-            "59": 510689792.0,
-            "60": 510689792.0,
-            "61": 510689792.0,
-            "62": 510689792.0,
-            "63": 510689792.0,
-            "64": 510689792.0,
-            "65": 510689792.0,
-            "66": 510689792.0,
-            "67": 510689792.0,
-            "68": 510689792.0,
-            "69": 510689792.0,
-            "70": 510689792.0,
-            "71": 510689792.0,
-            "72": 510689792.0,
-            "73": 510689792.0,
-            "74": 510689792.0,
-            "75": 510689792.0,
-            "76": 510689792.0,
-            "77": 510689792.0,
-            "78": 510689792.0,
-            "79": 510689792.0,
-            "80": 510689792.0,
-            "81": 510689792.0,
-            "82": 510689792.0,
-            "83": 510689792.0,
-            "84": 510689792.0,
-            "85": 510689792.0,
-            "86": 510689792.0,
-            "87": 510689792.0,
-            "88": 510689792.0,
-            "89": 510689792.0,
-            "90": 510689792.0,
-            "91": 510689792.0,
-            "92": 510689792.0,
-            "93": 510689792.0,
-            "94": 510689792.0,
-            "95": 510689792.0,
-            "96": 510689792.0,
-            "97": 510689792.0,
-            "98": 510689792.0,
-            "99": 510689792.0,
-            "100": 510689792.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 759895552.0,
-            "2": 933156352.0,
-            "3": 933156352.0,
-            "4": 933156352.0,
-            "5": 933156352.0,
-            "6": 933156352.0,
-            "7": 933156352.0,
-            "8": 933156352.0,
-            "9": 933156352.0,
-            "10": 933156352.0,
-            "11": 933156352.0,
-            "12": 933156352.0,
-            "13": 933156352.0,
-            "14": 933156352.0,
-            "15": 933156352.0,
-            "16": 933156352.0,
-            "17": 933156352.0,
-            "18": 933156352.0,
-            "19": 933156352.0,
-            "20": 933156352.0,
-            "21": 933156352.0,
-            "22": 933156352.0,
-            "23": 933156352.0,
-            "24": 933156352.0,
-            "25": 933156352.0,
-            "26": 933156352.0,
-            "27": 933156352.0,
-            "28": 933156352.0,
-            "29": 933156352.0,
-            "30": 933156352.0,
-            "31": 933156352.0,
-            "32": 933156352.0,
-            "33": 933156352.0,
-            "34": 933156352.0,
-            "35": 933156352.0,
-            "36": 933156352.0,
-            "37": 933156352.0,
-            "38": 933156352.0,
-            "39": 933156352.0,
-            "40": 933156352.0,
-            "41": 933156352.0,
-            "42": 933156352.0,
-            "43": 933156352.0,
-            "44": 933156352.0,
-            "45": 933156352.0,
-            "46": 933156352.0,
-            "47": 933156352.0,
-            "48": 933156352.0,
-            "49": 933156352.0,
-            "50": 933156352.0,
-            "51": 933156352.0,
-            "52": 933156352.0,
-            "53": 933156352.0,
-            "54": 933156352.0,
-            "55": 933156352.0,
-            "56": 933156352.0,
-            "57": 933156352.0,
-            "58": 933156352.0,
-            "59": 933156352.0,
-            "60": 933156352.0,
-            "61": 933156352.0,
-            "62": 933156352.0,
-            "63": 933156352.0,
-            "64": 933156352.0,
-            "65": 933156352.0,
-            "66": 933156352.0,
-            "67": 933156352.0,
-            "68": 933156352.0,
-            "69": 933156352.0,
-            "70": 933156352.0,
-            "71": 933156352.0,
-            "72": 933156352.0,
-            "73": 933156352.0,
-            "74": 933156352.0,
-            "75": 933156352.0,
-            "76": 933156352.0,
-            "77": 933156352.0,
-            "78": 933156352.0,
-            "79": 933156352.0,
-            "80": 933156352.0,
-            "81": 933156352.0,
-            "82": 933156352.0,
-            "83": 933156352.0,
-            "84": 933156352.0,
-            "85": 933156352.0,
-            "86": 933156352.0,
-            "87": 933156352.0,
-            "88": 933156352.0,
-            "89": 933156352.0,
-            "90": 933156352.0,
-            "91": 933156352.0,
-            "92": 933156352.0,
-            "93": 933156352.0,
-            "94": 933156352.0,
-            "95": 933156352.0,
-            "96": 933156352.0,
-            "97": 933156352.0,
-            "98": 933156352.0,
-            "99": 933156352.0,
-            "100": 933156352.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 15.91944,
-            "2": 0.35854,
-            "3": 0.34422,
-            "4": 0.34655,
-            "5": 0.33791,
-            "6": 0.34327,
-            "7": 0.34394,
-            "8": 0.3383,
-            "9": 0.34058,
-            "10": 0.32396,
-            "11": 0.32631,
-            "12": 0.33064,
-            "13": 0.32832,
-            "14": 0.32645,
-            "15": 0.32686,
-            "16": 0.32351,
-            "17": 0.32796,
-            "18": 0.33094,
-            "19": 0.32865,
-            "20": 0.32722,
-            "21": 0.32666,
-            "22": 0.32679,
-            "23": 0.32717,
-            "24": 0.32824,
-            "25": 0.32793,
-            "26": 0.32517,
-            "27": 0.326,
-            "28": 0.32627,
-            "29": 0.32627,
-            "30": 0.32688,
-            "31": 0.32603,
-            "32": 0.32544,
-            "33": 0.32613,
-            "34": 0.32696,
-            "35": 0.32522,
-            "36": 0.32966,
-            "37": 0.32462,
-            "38": 0.32724,
-            "39": 0.32622,
-            "40": 0.32646,
-            "41": 0.32504,
-            "42": 0.32464,
-            "43": 0.3299,
-            "44": 0.32495,
-            "45": 0.32382,
-            "46": 0.32567,
-            "47": 0.32847,
-            "48": 0.32521,
-            "49": 0.32738,
-            "50": 0.32495,
-            "51": 0.33517,
-            "52": 0.33963,
-            "53": 0.33084,
-            "54": 0.3299,
-            "55": 0.33062,
-            "56": 0.32923,
-            "57": 0.32909,
-            "58": 0.331,
-            "59": 0.32595,
-            "60": 0.32446,
-            "61": 0.32961,
-            "62": 0.33126,
-            "63": 0.32393,
-            "64": 0.32986,
-            "65": 0.32836,
-            "66": 0.32921,
-            "67": 0.32945,
-            "68": 0.32848,
-            "69": 0.32625,
-            "70": 0.32898,
-            "71": 0.33227,
-            "72": 0.32403,
-            "73": 0.3284,
-            "74": 0.32761,
-            "75": 0.32791,
-            "76": 0.33223,
-            "77": 0.33113,
-            "78": 0.32546,
-            "79": 0.32925,
-            "80": 0.33175,
-            "81": 0.33071,
-            "82": 0.32698,
-            "83": 0.32738,
-            "84": 0.32835,
-            "85": 0.32729,
-            "86": 0.33228,
-            "87": 0.32668,
-            "88": 0.33091,
-            "89": 0.32825,
-            "90": 0.32752,
-            "91": 0.32814,
-            "92": 0.33195,
-            "93": 0.32686,
-            "94": 0.33172,
-            "95": 0.33336,
-            "96": 0.32938,
-            "97": 0.33024,
-            "98": 0.32939,
-            "99": 0.32654,
-            "100": 0.3311
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/golden_values_dev_dgx_h100.json
index 2c78cced2a6..eb0e5f82b03 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/golden_values_dev_dgx_h100.json
@@ -407,24 +407,24 @@
             "80": 1840523776.0,
             "81": 1840523776.0,
             "82": 1840523776.0,
-            "83": 1840523776.0,
-            "84": 1840523776.0,
-            "85": 1840523776.0,
-            "86": 1840523776.0,
-            "87": 1840523776.0,
-            "88": 1840523776.0,
-            "89": 1840523776.0,
-            "90": 1840523776.0,
-            "91": 1840523776.0,
-            "92": 1840523776.0,
-            "93": 1840523776.0,
-            "94": 1840523776.0,
-            "95": 1840523776.0,
-            "96": 1840523776.0,
-            "97": 1840523776.0,
-            "98": 1840523776.0,
-            "99": 1840523776.0,
-            "100": 1840523776.0
+            "83": 1841310208.0,
+            "84": 1841310208.0,
+            "85": 1841310208.0,
+            "86": 1841310208.0,
+            "87": 1841310208.0,
+            "88": 1841310208.0,
+            "89": 1841310208.0,
+            "90": 1841310208.0,
+            "91": 1841310208.0,
+            "92": 1841310208.0,
+            "93": 1841310208.0,
+            "94": 1841310208.0,
+            "95": 1841310208.0,
+            "96": 1841310208.0,
+            "97": 1841310208.0,
+            "98": 1841310208.0,
+            "99": 1841310208.0,
+            "100": 1841310208.0
         }
     },
     "iteration-time": {
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 15.10612,
-            "2": 0.1542,
-            "3": 0.13803,
-            "4": 0.14173,
-            "5": 0.13703,
-            "6": 0.13715,
-            "7": 0.13669,
-            "8": 0.13634,
-            "9": 0.13883,
-            "10": 0.13804,
-            "11": 0.13759,
-            "12": 0.1376,
-            "13": 0.1382,
-            "14": 0.13696,
-            "15": 0.13434,
-            "16": 0.13528,
-            "17": 0.13745,
-            "18": 0.13625,
-            "19": 0.13968,
-            "20": 0.13682,
-            "21": 0.13596,
-            "22": 0.13719,
-            "23": 0.13667,
-            "24": 0.13638,
-            "25": 0.13753,
-            "26": 0.13644,
-            "27": 0.13707,
-            "28": 0.13952,
-            "29": 0.1369,
-            "30": 0.13707,
-            "31": 0.13675,
-            "32": 0.13583,
-            "33": 0.1367,
-            "34": 0.13775,
-            "35": 0.13604,
-            "36": 0.13754,
-            "37": 0.13616,
-            "38": 0.13653,
-            "39": 0.13703,
-            "40": 0.13711,
-            "41": 0.13929,
-            "42": 0.1367,
-            "43": 0.13765,
-            "44": 0.1376,
-            "45": 0.13629,
-            "46": 0.13767,
-            "47": 0.13691,
-            "48": 0.13819,
-            "49": 0.13713,
-            "50": 0.13764,
-            "51": 0.14385,
-            "52": 0.13731,
-            "53": 0.13926,
-            "54": 0.13909,
-            "55": 0.13708,
-            "56": 0.13606,
-            "57": 0.1385,
-            "58": 0.13816,
-            "59": 0.13715,
-            "60": 0.13837,
-            "61": 0.13836,
-            "62": 0.13899,
-            "63": 0.13766,
-            "64": 0.13809,
-            "65": 0.1396,
-            "66": 0.13817,
-            "67": 0.13774,
-            "68": 0.13776,
-            "69": 0.13995,
-            "70": 0.14012,
-            "71": 0.13829,
-            "72": 0.14013,
-            "73": 0.13752,
-            "74": 0.13771,
-            "75": 0.13835,
-            "76": 0.13975,
-            "77": 0.13762,
-            "78": 0.13969,
-            "79": 0.14152,
-            "80": 0.13795,
-            "81": 0.13719,
-            "82": 0.13686,
-            "83": 0.13959,
-            "84": 0.13635,
-            "85": 0.13911,
-            "86": 0.13853,
-            "87": 0.13756,
-            "88": 0.13795,
-            "89": 0.13781,
-            "90": 0.13889,
-            "91": 0.1373,
-            "92": 0.14159,
-            "93": 0.13719,
-            "94": 0.13599,
-            "95": 0.13739,
-            "96": 0.13865,
-            "97": 0.13776,
-            "98": 0.14044,
-            "99": 0.13747,
-            "100": 0.13826
+            "1": 15.65402,
+            "2": 0.15533,
+            "3": 0.13713,
+            "4": 0.14193,
+            "5": 0.13861,
+            "6": 0.13948,
+            "7": 0.13637,
+            "8": 0.13619,
+            "9": 0.14162,
+            "10": 0.13725,
+            "11": 0.13988,
+            "12": 0.14179,
+            "13": 0.14346,
+            "14": 0.14488,
+            "15": 0.1468,
+            "16": 0.14288,
+            "17": 0.13708,
+            "18": 0.13765,
+            "19": 0.13957,
+            "20": 0.13778,
+            "21": 0.13931,
+            "22": 0.13758,
+            "23": 0.13751,
+            "24": 0.14023,
+            "25": 0.14508,
+            "26": 0.15744,
+            "27": 0.15391,
+            "28": 0.15519,
+            "29": 0.14118,
+            "30": 0.1391,
+            "31": 0.13604,
+            "32": 0.1366,
+            "33": 0.13813,
+            "34": 0.13786,
+            "35": 0.13728,
+            "36": 0.13981,
+            "37": 0.14024,
+            "38": 0.13688,
+            "39": 0.13391,
+            "40": 0.13738,
+            "41": 0.14059,
+            "42": 0.13512,
+            "43": 0.13775,
+            "44": 0.13641,
+            "45": 0.13686,
+            "46": 0.14053,
+            "47": 0.13951,
+            "48": 0.14166,
+            "49": 0.13555,
+            "50": 0.13577,
+            "51": 0.14328,
+            "52": 0.14201,
+            "53": 0.13861,
+            "54": 0.13965,
+            "55": 0.13807,
+            "56": 0.14044,
+            "57": 0.14358,
+            "58": 0.14042,
+            "59": 0.13858,
+            "60": 0.13959,
+            "61": 0.13788,
+            "62": 0.14032,
+            "63": 0.13843,
+            "64": 0.13942,
+            "65": 0.13742,
+            "66": 0.13948,
+            "67": 0.14263,
+            "68": 0.13848,
+            "69": 0.13944,
+            "70": 0.13874,
+            "71": 0.14302,
+            "72": 0.13748,
+            "73": 0.13837,
+            "74": 0.13911,
+            "75": 0.13965,
+            "76": 0.1466,
+            "77": 0.14259,
+            "78": 0.13635,
+            "79": 0.14025,
+            "80": 0.14725,
+            "81": 0.14592,
+            "82": 0.14832,
+            "83": 0.14727,
+            "84": 0.14437,
+            "85": 0.13721,
+            "86": 0.14235,
+            "87": 0.13812,
+            "88": 0.13937,
+            "89": 0.1389,
+            "90": 0.13661,
+            "91": 0.1432,
+            "92": 0.1389,
+            "93": 0.13881,
+            "94": 0.13803,
+            "95": 0.13815,
+            "96": 0.14203,
+            "97": 0.13816,
+            "98": 0.13963,
+            "99": 0.14236,
+            "100": 0.14371
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index bb22d5373cc..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.85949,
-            "2": 10.85553,
-            "3": 10.86548,
-            "4": 10.84554,
-            "5": 10.88344,
-            "6": 10.89429,
-            "7": 10.87068,
-            "8": 10.86983,
-            "9": 10.86919,
-            "10": 10.83883,
-            "11": 10.89435,
-            "12": 10.8798,
-            "13": 10.87987,
-            "14": 10.90317,
-            "15": 10.8405,
-            "16": 10.83786,
-            "17": 10.80668,
-            "18": 10.83025,
-            "19": 10.82262,
-            "20": 10.73192,
-            "21": 10.7075,
-            "22": 10.56005,
-            "23": 10.72406,
-            "24": 10.61116,
-            "25": 10.5481,
-            "26": 10.61334,
-            "27": 10.6305,
-            "28": 10.56645,
-            "29": 10.59672,
-            "30": 10.37136,
-            "31": 10.11721,
-            "32": 10.46127,
-            "33": 10.45247,
-            "34": 10.21687,
-            "35": 10.27171,
-            "36": 10.2312,
-            "37": 10.34809,
-            "38": 10.18842,
-            "39": 10.41042,
-            "40": 10.09426,
-            "41": 10.14711,
-            "42": 10.21247,
-            "43": 9.84106,
-            "44": 9.95919,
-            "45": 9.84082,
-            "46": 9.82482,
-            "47": 10.13882,
-            "48": 9.85839,
-            "49": 9.5472,
-            "50": 9.90883,
-            "51": 9.85585,
-            "52": 9.75243,
-            "53": 10.07588,
-            "54": 9.95691,
-            "55": 9.88207,
-            "56": 9.63139,
-            "57": 9.48649,
-            "58": 9.83116,
-            "59": 9.58907,
-            "60": 9.50648,
-            "61": 9.70368,
-            "62": 9.98289,
-            "63": 9.38314,
-            "64": 9.7791,
-            "65": 8.95182,
-            "66": 9.70161,
-            "67": 9.37209,
-            "68": 9.78856,
-            "69": 9.79856,
-            "70": 9.74748,
-            "71": 9.6191,
-            "72": 9.585,
-            "73": 9.49728,
-            "74": 8.93928,
-            "75": 9.42702,
-            "76": 9.08022,
-            "77": 10.06569,
-            "78": 9.72897,
-            "79": 9.37772,
-            "80": 9.41001,
-            "81": 9.47977,
-            "82": 9.70183,
-            "83": 9.30621,
-            "84": 9.42098,
-            "85": 9.61377,
-            "86": 9.07654,
-            "87": 9.59456,
-            "88": 9.75071,
-            "89": 9.60243,
-            "90": 9.81899,
-            "91": 9.33898,
-            "92": 9.35718,
-            "93": 9.07884,
-            "94": 8.83509,
-            "95": 9.52175,
-            "96": 9.53007,
-            "97": 9.31309,
-            "98": 9.67781,
-            "99": 8.89061,
-            "100": 9.39729
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1690.0,
-            "2": 1776.0,
-            "3": 1642.0,
-            "4": 1825.0,
-            "5": 1809.0,
-            "6": 1795.0,
-            "7": 1830.0,
-            "8": 1626.0,
-            "9": 1878.0,
-            "10": 1423.0,
-            "11": 1868.0,
-            "12": 1653.0,
-            "13": 1897.0,
-            "14": 1783.0,
-            "15": 1861.0,
-            "16": 1938.0,
-            "17": 1825.0,
-            "18": 1730.0,
-            "19": 1727.0,
-            "20": 1735.0,
-            "21": 1783.0,
-            "22": 1576.0,
-            "23": 1949.0,
-            "24": 1630.0,
-            "25": 1498.0,
-            "26": 1649.0,
-            "27": 1809.0,
-            "28": 2019.0,
-            "29": 2009.0,
-            "30": 1832.0,
-            "31": 1524.0,
-            "32": 1943.0,
-            "33": 2081.0,
-            "34": 1888.0,
-            "35": 1935.0,
-            "36": 1898.0,
-            "37": 2325.0,
-            "38": 2070.0,
-            "39": 2248.0,
-            "40": 2199.0,
-            "41": 2264.0,
-            "42": 2349.0,
-            "43": 2087.0,
-            "44": 2107.0,
-            "45": 2098.0,
-            "46": 2407.0,
-            "47": 2456.0,
-            "48": 2404.0,
-            "49": 2417.0,
-            "50": 2407.0,
-            "51": 2578.0,
-            "52": 2630.0,
-            "53": 2857.0,
-            "54": 2818.0,
-            "55": 2368.0,
-            "56": 2757.0,
-            "57": 2423.0,
-            "58": 2776.0,
-            "59": 2742.0,
-            "60": 2371.0,
-            "61": 2906.0,
-            "62": 2517.0,
-            "63": 2374.0,
-            "64": 2995.0,
-            "65": 2634.0,
-            "66": 2995.0,
-            "67": 2884.0,
-            "68": 2840.0,
-            "69": 2766.0,
-            "70": 3006.0,
-            "71": 3023.0,
-            "72": 2386.0,
-            "73": 2958.0,
-            "74": 1851.0,
-            "75": 2585.0,
-            "76": 2973.0,
-            "77": 3244.0,
-            "78": 3142.0,
-            "79": 3185.0,
-            "80": 3249.0,
-            "81": 3665.0,
-            "82": 3153.0,
-            "83": 2821.0,
-            "84": 3083.0,
-            "85": 3247.0,
-            "86": 2734.0,
-            "87": 3759.0,
-            "88": 2968.0,
-            "89": 3282.0,
-            "90": 3064.0,
-            "91": 2908.0,
-            "92": 2946.0,
-            "93": 2592.0,
-            "94": 3363.0,
-            "95": 3423.0,
-            "96": 3259.0,
-            "97": 2976.0,
-            "98": 3683.0,
-            "99": 3173.0,
-            "100": 3143.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 516194816.0,
-            "2": 516194816.0,
-            "3": 516194816.0,
-            "4": 516194816.0,
-            "5": 516194816.0,
-            "6": 516194816.0,
-            "7": 516194816.0,
-            "8": 516194816.0,
-            "9": 516194816.0,
-            "10": 516194816.0,
-            "11": 516194816.0,
-            "12": 516194816.0,
-            "13": 516194816.0,
-            "14": 516194816.0,
-            "15": 516194816.0,
-            "16": 516194816.0,
-            "17": 516194816.0,
-            "18": 516194816.0,
-            "19": 516194816.0,
-            "20": 516194816.0,
-            "21": 516194816.0,
-            "22": 516194816.0,
-            "23": 516194816.0,
-            "24": 516194816.0,
-            "25": 516194816.0,
-            "26": 516194816.0,
-            "27": 516194816.0,
-            "28": 516194816.0,
-            "29": 516194816.0,
-            "30": 516194816.0,
-            "31": 516194816.0,
-            "32": 516194816.0,
-            "33": 516194816.0,
-            "34": 516194816.0,
-            "35": 516194816.0,
-            "36": 516194816.0,
-            "37": 516194816.0,
-            "38": 516194816.0,
-            "39": 516194816.0,
-            "40": 516194816.0,
-            "41": 516194816.0,
-            "42": 516194816.0,
-            "43": 516194816.0,
-            "44": 516194816.0,
-            "45": 516194816.0,
-            "46": 516194816.0,
-            "47": 516194816.0,
-            "48": 516194816.0,
-            "49": 516194816.0,
-            "50": 516194816.0,
-            "51": 516194816.0,
-            "52": 516194816.0,
-            "53": 516194816.0,
-            "54": 516194816.0,
-            "55": 516194816.0,
-            "56": 516194816.0,
-            "57": 516194816.0,
-            "58": 516194816.0,
-            "59": 516194816.0,
-            "60": 516194816.0,
-            "61": 516194816.0,
-            "62": 516194816.0,
-            "63": 516194816.0,
-            "64": 516194816.0,
-            "65": 516194816.0,
-            "66": 516194816.0,
-            "67": 516194816.0,
-            "68": 516194816.0,
-            "69": 516194816.0,
-            "70": 516194816.0,
-            "71": 516194816.0,
-            "72": 516194816.0,
-            "73": 516194816.0,
-            "74": 516194816.0,
-            "75": 516194816.0,
-            "76": 516194816.0,
-            "77": 516194816.0,
-            "78": 516194816.0,
-            "79": 516194816.0,
-            "80": 516194816.0,
-            "81": 516194816.0,
-            "82": 516194816.0,
-            "83": 516194816.0,
-            "84": 516194816.0,
-            "85": 516194816.0,
-            "86": 516194816.0,
-            "87": 516194816.0,
-            "88": 516194816.0,
-            "89": 516194816.0,
-            "90": 516194816.0,
-            "91": 516194816.0,
-            "92": 516194816.0,
-            "93": 516194816.0,
-            "94": 516194816.0,
-            "95": 516194816.0,
-            "96": 516194816.0,
-            "97": 516194816.0,
-            "98": 516194816.0,
-            "99": 516194816.0,
-            "100": 516194816.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1670130688.0,
-            "2": 1840523776.0,
-            "3": 1840523776.0,
-            "4": 1840523776.0,
-            "5": 1840523776.0,
-            "6": 1840523776.0,
-            "7": 1841310208.0,
-            "8": 1841310208.0,
-            "9": 1841310208.0,
-            "10": 1841310208.0,
-            "11": 1841310208.0,
-            "12": 1841310208.0,
-            "13": 1841310208.0,
-            "14": 1841310208.0,
-            "15": 1841310208.0,
-            "16": 1841310208.0,
-            "17": 1841310208.0,
-            "18": 1841310208.0,
-            "19": 1841310208.0,
-            "20": 1841310208.0,
-            "21": 1841310208.0,
-            "22": 1841310208.0,
-            "23": 1841310208.0,
-            "24": 1841310208.0,
-            "25": 1841310208.0,
-            "26": 1841310208.0,
-            "27": 1841310208.0,
-            "28": 1841310208.0,
-            "29": 1841310208.0,
-            "30": 1841310208.0,
-            "31": 1841310208.0,
-            "32": 1841310208.0,
-            "33": 1841310208.0,
-            "34": 1841310208.0,
-            "35": 1841310208.0,
-            "36": 1841310208.0,
-            "37": 1841310208.0,
-            "38": 1841310208.0,
-            "39": 1841310208.0,
-            "40": 1841310208.0,
-            "41": 1841310208.0,
-            "42": 1841310208.0,
-            "43": 1841310208.0,
-            "44": 1841310208.0,
-            "45": 1841310208.0,
-            "46": 1841310208.0,
-            "47": 1841310208.0,
-            "48": 1841310208.0,
-            "49": 1841310208.0,
-            "50": 1841310208.0,
-            "51": 1841310208.0,
-            "52": 1841310208.0,
-            "53": 1841310208.0,
-            "54": 1841310208.0,
-            "55": 1841310208.0,
-            "56": 1841310208.0,
-            "57": 1841310208.0,
-            "58": 1841310208.0,
-            "59": 1841310208.0,
-            "60": 1841310208.0,
-            "61": 1841310208.0,
-            "62": 1841310208.0,
-            "63": 1841310208.0,
-            "64": 1841310208.0,
-            "65": 1841310208.0,
-            "66": 1841310208.0,
-            "67": 1841310208.0,
-            "68": 1841310208.0,
-            "69": 1841310208.0,
-            "70": 1841310208.0,
-            "71": 1841310208.0,
-            "72": 1841310208.0,
-            "73": 1841310208.0,
-            "74": 1841310208.0,
-            "75": 1841310208.0,
-            "76": 1841310208.0,
-            "77": 1841310208.0,
-            "78": 1841310208.0,
-            "79": 1841310208.0,
-            "80": 1841310208.0,
-            "81": 1841310208.0,
-            "82": 1841310208.0,
-            "83": 1841310208.0,
-            "84": 1841310208.0,
-            "85": 1841310208.0,
-            "86": 1841310208.0,
-            "87": 1841310208.0,
-            "88": 1841310208.0,
-            "89": 1841310208.0,
-            "90": 1841310208.0,
-            "91": 1841310208.0,
-            "92": 1841310208.0,
-            "93": 1841310208.0,
-            "94": 1841310208.0,
-            "95": 1841310208.0,
-            "96": 1841310208.0,
-            "97": 1841310208.0,
-            "98": 1841310208.0,
-            "99": 1841310208.0,
-            "100": 1841310208.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 14.64403,
-            "2": 0.16797,
-            "3": 0.12497,
-            "4": 0.12885,
-            "5": 0.12618,
-            "6": 0.13062,
-            "7": 0.13213,
-            "8": 0.12464,
-            "9": 0.11932,
-            "10": 0.11974,
-            "11": 0.11909,
-            "12": 0.12055,
-            "13": 0.1201,
-            "14": 0.12035,
-            "15": 0.12245,
-            "16": 0.12189,
-            "17": 0.12194,
-            "18": 0.12112,
-            "19": 0.12294,
-            "20": 0.12528,
-            "21": 0.12355,
-            "22": 0.12627,
-            "23": 0.13006,
-            "24": 0.12885,
-            "25": 0.12289,
-            "26": 0.12586,
-            "27": 0.12347,
-            "28": 0.12378,
-            "29": 0.12521,
-            "30": 0.12152,
-            "31": 0.12233,
-            "32": 0.12264,
-            "33": 0.12293,
-            "34": 0.12188,
-            "35": 0.12305,
-            "36": 0.11979,
-            "37": 0.12011,
-            "38": 0.12066,
-            "39": 0.11933,
-            "40": 0.1218,
-            "41": 0.1229,
-            "42": 0.12279,
-            "43": 0.12218,
-            "44": 0.12191,
-            "45": 0.12293,
-            "46": 0.12168,
-            "47": 0.12842,
-            "48": 0.12658,
-            "49": 0.12505,
-            "50": 0.12387,
-            "51": 0.1324,
-            "52": 0.13379,
-            "53": 0.1261,
-            "54": 0.11854,
-            "55": 0.11853,
-            "56": 0.11881,
-            "57": 0.1209,
-            "58": 0.12111,
-            "59": 0.11838,
-            "60": 0.12687,
-            "61": 0.11751,
-            "62": 0.11883,
-            "63": 0.11928,
-            "64": 0.11974,
-            "65": 0.11845,
-            "66": 0.11894,
-            "67": 0.11846,
-            "68": 0.11858,
-            "69": 0.11994,
-            "70": 0.11764,
-            "71": 0.12093,
-            "72": 0.11968,
-            "73": 0.1186,
-            "74": 0.11964,
-            "75": 0.11783,
-            "76": 0.1194,
-            "77": 0.11791,
-            "78": 0.12113,
-            "79": 0.11779,
-            "80": 0.11874,
-            "81": 0.1199,
-            "82": 0.11927,
-            "83": 0.1179,
-            "84": 0.11758,
-            "85": 0.11656,
-            "86": 0.11748,
-            "87": 0.11919,
-            "88": 0.11702,
-            "89": 0.11924,
-            "90": 0.11761,
-            "91": 0.12024,
-            "92": 0.12008,
-            "93": 0.11955,
-            "94": 0.11864,
-            "95": 0.11843,
-            "96": 0.1186,
-            "97": 0.1208,
-            "98": 0.11919,
-            "99": 0.11935,
-            "100": 0.1196
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index eb0e5f82b03..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.85949,
-            "2": 10.85553,
-            "3": 10.86548,
-            "4": 10.84554,
-            "5": 10.88344,
-            "6": 10.89429,
-            "7": 10.87068,
-            "8": 10.86983,
-            "9": 10.86919,
-            "10": 10.83883,
-            "11": 10.89435,
-            "12": 10.8798,
-            "13": 10.87987,
-            "14": 10.90317,
-            "15": 10.8405,
-            "16": 10.83786,
-            "17": 10.80668,
-            "18": 10.83025,
-            "19": 10.82262,
-            "20": 10.73192,
-            "21": 10.7075,
-            "22": 10.56005,
-            "23": 10.72406,
-            "24": 10.61116,
-            "25": 10.5481,
-            "26": 10.61334,
-            "27": 10.6305,
-            "28": 10.56645,
-            "29": 10.59672,
-            "30": 10.37136,
-            "31": 10.11721,
-            "32": 10.46127,
-            "33": 10.45247,
-            "34": 10.21687,
-            "35": 10.27171,
-            "36": 10.2312,
-            "37": 10.34809,
-            "38": 10.18842,
-            "39": 10.41042,
-            "40": 10.09426,
-            "41": 10.14711,
-            "42": 10.21247,
-            "43": 9.84106,
-            "44": 9.95919,
-            "45": 9.84082,
-            "46": 9.82482,
-            "47": 10.13882,
-            "48": 9.85839,
-            "49": 9.5472,
-            "50": 9.90883,
-            "51": 9.85585,
-            "52": 9.75243,
-            "53": 10.07588,
-            "54": 9.95691,
-            "55": 9.88207,
-            "56": 9.63139,
-            "57": 9.48649,
-            "58": 9.83116,
-            "59": 9.58907,
-            "60": 9.50648,
-            "61": 9.70368,
-            "62": 9.98289,
-            "63": 9.38314,
-            "64": 9.7791,
-            "65": 8.95182,
-            "66": 9.70161,
-            "67": 9.37209,
-            "68": 9.78856,
-            "69": 9.79856,
-            "70": 9.74748,
-            "71": 9.6191,
-            "72": 9.585,
-            "73": 9.49728,
-            "74": 8.93928,
-            "75": 9.42702,
-            "76": 9.08022,
-            "77": 10.06569,
-            "78": 9.72897,
-            "79": 9.37772,
-            "80": 9.41001,
-            "81": 9.47977,
-            "82": 9.70183,
-            "83": 9.30621,
-            "84": 9.42098,
-            "85": 9.61377,
-            "86": 9.07654,
-            "87": 9.59456,
-            "88": 9.75071,
-            "89": 9.60243,
-            "90": 9.81899,
-            "91": 9.33898,
-            "92": 9.35718,
-            "93": 9.07884,
-            "94": 8.83509,
-            "95": 9.52175,
-            "96": 9.53007,
-            "97": 9.31309,
-            "98": 9.67781,
-            "99": 8.89061,
-            "100": 9.39729
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1690.0,
-            "2": 1776.0,
-            "3": 1642.0,
-            "4": 1825.0,
-            "5": 1809.0,
-            "6": 1795.0,
-            "7": 1830.0,
-            "8": 1626.0,
-            "9": 1878.0,
-            "10": 1423.0,
-            "11": 1868.0,
-            "12": 1653.0,
-            "13": 1897.0,
-            "14": 1783.0,
-            "15": 1861.0,
-            "16": 1938.0,
-            "17": 1825.0,
-            "18": 1730.0,
-            "19": 1727.0,
-            "20": 1735.0,
-            "21": 1783.0,
-            "22": 1576.0,
-            "23": 1949.0,
-            "24": 1630.0,
-            "25": 1498.0,
-            "26": 1649.0,
-            "27": 1809.0,
-            "28": 2019.0,
-            "29": 2009.0,
-            "30": 1832.0,
-            "31": 1524.0,
-            "32": 1943.0,
-            "33": 2081.0,
-            "34": 1888.0,
-            "35": 1935.0,
-            "36": 1898.0,
-            "37": 2325.0,
-            "38": 2070.0,
-            "39": 2248.0,
-            "40": 2199.0,
-            "41": 2264.0,
-            "42": 2349.0,
-            "43": 2087.0,
-            "44": 2107.0,
-            "45": 2098.0,
-            "46": 2407.0,
-            "47": 2456.0,
-            "48": 2404.0,
-            "49": 2417.0,
-            "50": 2407.0,
-            "51": 2578.0,
-            "52": 2630.0,
-            "53": 2857.0,
-            "54": 2818.0,
-            "55": 2368.0,
-            "56": 2757.0,
-            "57": 2423.0,
-            "58": 2776.0,
-            "59": 2742.0,
-            "60": 2371.0,
-            "61": 2906.0,
-            "62": 2517.0,
-            "63": 2374.0,
-            "64": 2995.0,
-            "65": 2634.0,
-            "66": 2995.0,
-            "67": 2884.0,
-            "68": 2840.0,
-            "69": 2766.0,
-            "70": 3006.0,
-            "71": 3023.0,
-            "72": 2386.0,
-            "73": 2958.0,
-            "74": 1851.0,
-            "75": 2585.0,
-            "76": 2973.0,
-            "77": 3244.0,
-            "78": 3142.0,
-            "79": 3185.0,
-            "80": 3249.0,
-            "81": 3665.0,
-            "82": 3153.0,
-            "83": 2821.0,
-            "84": 3083.0,
-            "85": 3247.0,
-            "86": 2734.0,
-            "87": 3759.0,
-            "88": 2968.0,
-            "89": 3282.0,
-            "90": 3064.0,
-            "91": 2908.0,
-            "92": 2946.0,
-            "93": 2592.0,
-            "94": 3363.0,
-            "95": 3423.0,
-            "96": 3259.0,
-            "97": 2976.0,
-            "98": 3683.0,
-            "99": 3173.0,
-            "100": 3143.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 516194816.0,
-            "2": 516194816.0,
-            "3": 516194816.0,
-            "4": 516194816.0,
-            "5": 516194816.0,
-            "6": 516194816.0,
-            "7": 516194816.0,
-            "8": 516194816.0,
-            "9": 516194816.0,
-            "10": 516194816.0,
-            "11": 516194816.0,
-            "12": 516194816.0,
-            "13": 516194816.0,
-            "14": 516194816.0,
-            "15": 516194816.0,
-            "16": 516194816.0,
-            "17": 516194816.0,
-            "18": 516194816.0,
-            "19": 516194816.0,
-            "20": 516194816.0,
-            "21": 516194816.0,
-            "22": 516194816.0,
-            "23": 516194816.0,
-            "24": 516194816.0,
-            "25": 516194816.0,
-            "26": 516194816.0,
-            "27": 516194816.0,
-            "28": 516194816.0,
-            "29": 516194816.0,
-            "30": 516194816.0,
-            "31": 516194816.0,
-            "32": 516194816.0,
-            "33": 516194816.0,
-            "34": 516194816.0,
-            "35": 516194816.0,
-            "36": 516194816.0,
-            "37": 516194816.0,
-            "38": 516194816.0,
-            "39": 516194816.0,
-            "40": 516194816.0,
-            "41": 516194816.0,
-            "42": 516194816.0,
-            "43": 516194816.0,
-            "44": 516194816.0,
-            "45": 516194816.0,
-            "46": 516194816.0,
-            "47": 516194816.0,
-            "48": 516194816.0,
-            "49": 516194816.0,
-            "50": 516194816.0,
-            "51": 516194816.0,
-            "52": 516194816.0,
-            "53": 516194816.0,
-            "54": 516194816.0,
-            "55": 516194816.0,
-            "56": 516194816.0,
-            "57": 516194816.0,
-            "58": 516194816.0,
-            "59": 516194816.0,
-            "60": 516194816.0,
-            "61": 516194816.0,
-            "62": 516194816.0,
-            "63": 516194816.0,
-            "64": 516194816.0,
-            "65": 516194816.0,
-            "66": 516194816.0,
-            "67": 516194816.0,
-            "68": 516194816.0,
-            "69": 516194816.0,
-            "70": 516194816.0,
-            "71": 516194816.0,
-            "72": 516194816.0,
-            "73": 516194816.0,
-            "74": 516194816.0,
-            "75": 516194816.0,
-            "76": 516194816.0,
-            "77": 516194816.0,
-            "78": 516194816.0,
-            "79": 516194816.0,
-            "80": 516194816.0,
-            "81": 516194816.0,
-            "82": 516194816.0,
-            "83": 516194816.0,
-            "84": 516194816.0,
-            "85": 516194816.0,
-            "86": 516194816.0,
-            "87": 516194816.0,
-            "88": 516194816.0,
-            "89": 516194816.0,
-            "90": 516194816.0,
-            "91": 516194816.0,
-            "92": 516194816.0,
-            "93": 516194816.0,
-            "94": 516194816.0,
-            "95": 516194816.0,
-            "96": 516194816.0,
-            "97": 516194816.0,
-            "98": 516194816.0,
-            "99": 516194816.0,
-            "100": 516194816.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1670130688.0,
-            "2": 1840523776.0,
-            "3": 1840523776.0,
-            "4": 1840523776.0,
-            "5": 1840523776.0,
-            "6": 1840523776.0,
-            "7": 1840523776.0,
-            "8": 1840523776.0,
-            "9": 1840523776.0,
-            "10": 1840523776.0,
-            "11": 1840523776.0,
-            "12": 1840523776.0,
-            "13": 1840523776.0,
-            "14": 1840523776.0,
-            "15": 1840523776.0,
-            "16": 1840523776.0,
-            "17": 1840523776.0,
-            "18": 1840523776.0,
-            "19": 1840523776.0,
-            "20": 1840523776.0,
-            "21": 1840523776.0,
-            "22": 1840523776.0,
-            "23": 1840523776.0,
-            "24": 1840523776.0,
-            "25": 1840523776.0,
-            "26": 1840523776.0,
-            "27": 1840523776.0,
-            "28": 1840523776.0,
-            "29": 1840523776.0,
-            "30": 1840523776.0,
-            "31": 1840523776.0,
-            "32": 1840523776.0,
-            "33": 1840523776.0,
-            "34": 1840523776.0,
-            "35": 1840523776.0,
-            "36": 1840523776.0,
-            "37": 1840523776.0,
-            "38": 1840523776.0,
-            "39": 1840523776.0,
-            "40": 1840523776.0,
-            "41": 1840523776.0,
-            "42": 1840523776.0,
-            "43": 1840523776.0,
-            "44": 1840523776.0,
-            "45": 1840523776.0,
-            "46": 1840523776.0,
-            "47": 1840523776.0,
-            "48": 1840523776.0,
-            "49": 1840523776.0,
-            "50": 1840523776.0,
-            "51": 1840523776.0,
-            "52": 1840523776.0,
-            "53": 1840523776.0,
-            "54": 1840523776.0,
-            "55": 1840523776.0,
-            "56": 1840523776.0,
-            "57": 1840523776.0,
-            "58": 1840523776.0,
-            "59": 1840523776.0,
-            "60": 1840523776.0,
-            "61": 1840523776.0,
-            "62": 1840523776.0,
-            "63": 1840523776.0,
-            "64": 1840523776.0,
-            "65": 1840523776.0,
-            "66": 1840523776.0,
-            "67": 1840523776.0,
-            "68": 1840523776.0,
-            "69": 1840523776.0,
-            "70": 1840523776.0,
-            "71": 1840523776.0,
-            "72": 1840523776.0,
-            "73": 1840523776.0,
-            "74": 1840523776.0,
-            "75": 1840523776.0,
-            "76": 1840523776.0,
-            "77": 1840523776.0,
-            "78": 1840523776.0,
-            "79": 1840523776.0,
-            "80": 1840523776.0,
-            "81": 1840523776.0,
-            "82": 1840523776.0,
-            "83": 1841310208.0,
-            "84": 1841310208.0,
-            "85": 1841310208.0,
-            "86": 1841310208.0,
-            "87": 1841310208.0,
-            "88": 1841310208.0,
-            "89": 1841310208.0,
-            "90": 1841310208.0,
-            "91": 1841310208.0,
-            "92": 1841310208.0,
-            "93": 1841310208.0,
-            "94": 1841310208.0,
-            "95": 1841310208.0,
-            "96": 1841310208.0,
-            "97": 1841310208.0,
-            "98": 1841310208.0,
-            "99": 1841310208.0,
-            "100": 1841310208.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 15.65402,
-            "2": 0.15533,
-            "3": 0.13713,
-            "4": 0.14193,
-            "5": 0.13861,
-            "6": 0.13948,
-            "7": 0.13637,
-            "8": 0.13619,
-            "9": 0.14162,
-            "10": 0.13725,
-            "11": 0.13988,
-            "12": 0.14179,
-            "13": 0.14346,
-            "14": 0.14488,
-            "15": 0.1468,
-            "16": 0.14288,
-            "17": 0.13708,
-            "18": 0.13765,
-            "19": 0.13957,
-            "20": 0.13778,
-            "21": 0.13931,
-            "22": 0.13758,
-            "23": 0.13751,
-            "24": 0.14023,
-            "25": 0.14508,
-            "26": 0.15744,
-            "27": 0.15391,
-            "28": 0.15519,
-            "29": 0.14118,
-            "30": 0.1391,
-            "31": 0.13604,
-            "32": 0.1366,
-            "33": 0.13813,
-            "34": 0.13786,
-            "35": 0.13728,
-            "36": 0.13981,
-            "37": 0.14024,
-            "38": 0.13688,
-            "39": 0.13391,
-            "40": 0.13738,
-            "41": 0.14059,
-            "42": 0.13512,
-            "43": 0.13775,
-            "44": 0.13641,
-            "45": 0.13686,
-            "46": 0.14053,
-            "47": 0.13951,
-            "48": 0.14166,
-            "49": 0.13555,
-            "50": 0.13577,
-            "51": 0.14328,
-            "52": 0.14201,
-            "53": 0.13861,
-            "54": 0.13965,
-            "55": 0.13807,
-            "56": 0.14044,
-            "57": 0.14358,
-            "58": 0.14042,
-            "59": 0.13858,
-            "60": 0.13959,
-            "61": 0.13788,
-            "62": 0.14032,
-            "63": 0.13843,
-            "64": 0.13942,
-            "65": 0.13742,
-            "66": 0.13948,
-            "67": 0.14263,
-            "68": 0.13848,
-            "69": 0.13944,
-            "70": 0.13874,
-            "71": 0.14302,
-            "72": 0.13748,
-            "73": 0.13837,
-            "74": 0.13911,
-            "75": 0.13965,
-            "76": 0.1466,
-            "77": 0.14259,
-            "78": 0.13635,
-            "79": 0.14025,
-            "80": 0.14725,
-            "81": 0.14592,
-            "82": 0.14832,
-            "83": 0.14727,
-            "84": 0.14437,
-            "85": 0.13721,
-            "86": 0.14235,
-            "87": 0.13812,
-            "88": 0.13937,
-            "89": 0.1389,
-            "90": 0.13661,
-            "91": 0.1432,
-            "92": 0.1389,
-            "93": 0.13881,
-            "94": 0.13803,
-            "95": 0.13815,
-            "96": 0.14203,
-            "97": 0.13816,
-            "98": 0.13963,
-            "99": 0.14236,
-            "100": 0.14371
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_dev_dgx_h100.json
index 925cc0a5ec5..e895f06a28a 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_dev_dgx_h100.json
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 11.60342,
-            "2": 0.19062,
-            "3": 0.17106,
-            "4": 0.15064,
-            "5": 0.15065,
-            "6": 0.1494,
-            "7": 0.15215,
-            "8": 0.14914,
-            "9": 0.15232,
-            "10": 0.15441,
-            "11": 0.15247,
-            "12": 0.15046,
-            "13": 0.15058,
-            "14": 0.15219,
-            "15": 0.15133,
-            "16": 0.15023,
-            "17": 0.1509,
-            "18": 0.14938,
-            "19": 0.15103,
-            "20": 0.1515,
-            "21": 0.1522,
-            "22": 0.1489,
-            "23": 0.15182,
-            "24": 0.1502,
-            "25": 0.15153,
-            "26": 0.15174,
-            "27": 0.15257,
-            "28": 0.14921,
-            "29": 0.14989,
-            "30": 0.14944,
-            "31": 0.15201,
-            "32": 0.1504,
-            "33": 0.1493,
-            "34": 0.15189,
-            "35": 0.14934,
-            "36": 0.15042,
-            "37": 0.15128,
-            "38": 0.15671,
-            "39": 0.14985,
-            "40": 0.15139,
-            "41": 0.15056,
-            "42": 0.14937,
-            "43": 0.15027,
-            "44": 0.15158,
-            "45": 0.15159,
-            "46": 0.15106,
-            "47": 0.14958,
-            "48": 0.15078,
-            "49": 0.15171,
-            "50": 0.15469,
-            "51": 0.17266,
-            "52": 0.16844,
-            "53": 0.16496,
-            "54": 0.16828,
-            "55": 0.15512,
-            "56": 0.15061,
-            "57": 0.1542,
-            "58": 0.15315,
-            "59": 0.15262,
-            "60": 0.1507,
-            "61": 0.15164,
-            "62": 0.15223,
-            "63": 0.15172,
-            "64": 0.15124,
-            "65": 0.15315,
-            "66": 0.15108,
-            "67": 0.15238,
-            "68": 0.1491,
-            "69": 0.15112,
-            "70": 0.15218,
-            "71": 0.15542,
-            "72": 0.1514,
-            "73": 0.15306,
-            "74": 0.14963,
-            "75": 0.15272,
-            "76": 0.15,
-            "77": 0.15284,
-            "78": 0.15228,
-            "79": 0.15051,
-            "80": 0.15149,
-            "81": 0.15215,
-            "82": 0.15086,
-            "83": 0.1515,
-            "84": 0.15437,
-            "85": 0.15454,
-            "86": 0.15197,
-            "87": 0.15062,
-            "88": 0.14949,
-            "89": 0.15096,
-            "90": 0.15098,
-            "91": 0.15349,
-            "92": 0.15219,
-            "93": 0.15171,
-            "94": 0.15116,
-            "95": 0.15081,
-            "96": 0.15321,
-            "97": 0.15268,
-            "98": 0.15451,
-            "99": 0.1496,
-            "100": 0.15252
+            "1": 11.77129,
+            "2": 0.18805,
+            "3": 0.15486,
+            "4": 0.15531,
+            "5": 0.15342,
+            "6": 0.15402,
+            "7": 0.15787,
+            "8": 0.15837,
+            "9": 0.15422,
+            "10": 0.1531,
+            "11": 0.1531,
+            "12": 0.1521,
+            "13": 0.15206,
+            "14": 0.15281,
+            "15": 0.15025,
+            "16": 0.15321,
+            "17": 0.15383,
+            "18": 0.15265,
+            "19": 0.15535,
+            "20": 0.15414,
+            "21": 0.15275,
+            "22": 0.152,
+            "23": 0.15456,
+            "24": 0.15209,
+            "25": 0.15358,
+            "26": 0.15228,
+            "27": 0.15217,
+            "28": 0.15204,
+            "29": 0.1526,
+            "30": 0.15259,
+            "31": 0.15237,
+            "32": 0.15885,
+            "33": 0.1577,
+            "34": 0.16029,
+            "35": 0.15618,
+            "36": 0.16006,
+            "37": 0.15686,
+            "38": 0.15897,
+            "39": 0.15985,
+            "40": 0.15818,
+            "41": 0.15734,
+            "42": 0.15623,
+            "43": 0.15982,
+            "44": 0.15844,
+            "45": 0.15965,
+            "46": 0.15995,
+            "47": 0.1576,
+            "48": 0.15787,
+            "49": 0.15857,
+            "50": 0.16598,
+            "51": 0.15831,
+            "52": 0.15281,
+            "53": 0.15278,
+            "54": 0.15155,
+            "55": 0.1544,
+            "56": 0.15102,
+            "57": 0.1505,
+            "58": 0.15177,
+            "59": 0.15275,
+            "60": 0.15179,
+            "61": 0.15138,
+            "62": 0.153,
+            "63": 0.14962,
+            "64": 0.15104,
+            "65": 0.15104,
+            "66": 0.1541,
+            "67": 0.15089,
+            "68": 0.15178,
+            "69": 0.15241,
+            "70": 0.1524,
+            "71": 0.14991,
+            "72": 0.15107,
+            "73": 0.15205,
+            "74": 0.15105,
+            "75": 0.14944,
+            "76": 0.15086,
+            "77": 0.15066,
+            "78": 0.15037,
+            "79": 0.1517,
+            "80": 0.1535,
+            "81": 0.15067,
+            "82": 0.15202,
+            "83": 0.1513,
+            "84": 0.15157,
+            "85": 0.15077,
+            "86": 0.15249,
+            "87": 0.15259,
+            "88": 0.15065,
+            "89": 0.15236,
+            "90": 0.15088,
+            "91": 0.15271,
+            "92": 0.15124,
+            "93": 0.15371,
+            "94": 0.14949,
+            "95": 0.15169,
+            "96": 0.15061,
+            "97": 0.15123,
+            "98": 0.15143,
+            "99": 0.15292,
+            "100": 0.15348
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 9d88acfb6cd..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.85949,
-            "2": 10.85553,
-            "3": 10.86543,
-            "4": 10.84553,
-            "5": 10.88346,
-            "6": 10.89431,
-            "7": 10.87067,
-            "8": 10.86979,
-            "9": 10.86918,
-            "10": 10.83886,
-            "11": 10.8943,
-            "12": 10.87983,
-            "13": 10.87985,
-            "14": 10.90321,
-            "15": 10.84052,
-            "16": 10.83787,
-            "17": 10.80669,
-            "18": 10.83026,
-            "19": 10.82261,
-            "20": 10.73193,
-            "21": 10.70748,
-            "22": 10.56005,
-            "23": 10.72399,
-            "24": 10.61114,
-            "25": 10.54813,
-            "26": 10.61329,
-            "27": 10.63053,
-            "28": 10.56646,
-            "29": 10.59668,
-            "30": 10.37137,
-            "31": 10.11725,
-            "32": 10.46127,
-            "33": 10.45249,
-            "34": 10.2169,
-            "35": 10.27172,
-            "36": 10.23119,
-            "37": 10.34809,
-            "38": 10.1884,
-            "39": 10.41044,
-            "40": 10.09425,
-            "41": 10.14707,
-            "42": 10.21242,
-            "43": 9.84105,
-            "44": 9.95918,
-            "45": 9.84079,
-            "46": 9.82479,
-            "47": 10.13878,
-            "48": 9.85831,
-            "49": 9.54705,
-            "50": 9.90875,
-            "51": 9.8558,
-            "52": 9.75237,
-            "53": 10.07589,
-            "54": 9.95688,
-            "55": 9.88203,
-            "56": 9.6313,
-            "57": 9.48649,
-            "58": 9.83109,
-            "59": 9.58897,
-            "60": 9.50643,
-            "61": 9.70363,
-            "62": 9.98286,
-            "63": 9.38302,
-            "64": 9.77901,
-            "65": 8.95166,
-            "66": 9.70158,
-            "67": 9.37203,
-            "68": 9.78849,
-            "69": 9.79851,
-            "70": 9.74737,
-            "71": 9.61908,
-            "72": 9.58502,
-            "73": 9.49721,
-            "74": 8.93927,
-            "75": 9.42703,
-            "76": 9.0802,
-            "77": 10.06567,
-            "78": 9.72893,
-            "79": 9.3776,
-            "80": 9.40982,
-            "81": 9.47976,
-            "82": 9.7018,
-            "83": 9.30612,
-            "84": 9.4209,
-            "85": 9.61371,
-            "86": 9.07649,
-            "87": 9.5945,
-            "88": 9.75068,
-            "89": 9.60238,
-            "90": 9.81898,
-            "91": 9.33894,
-            "92": 9.35716,
-            "93": 9.07879,
-            "94": 8.83503,
-            "95": 9.52172,
-            "96": 9.53003,
-            "97": 9.31306,
-            "98": 9.67783,
-            "99": 8.89058,
-            "100": 9.39725
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1688.0,
-            "2": 1806.0,
-            "3": 1675.0,
-            "4": 1842.0,
-            "5": 1909.0,
-            "6": 1908.0,
-            "7": 1783.0,
-            "8": 1611.0,
-            "9": 1753.0,
-            "10": 1457.0,
-            "11": 1880.0,
-            "12": 1683.0,
-            "13": 1907.0,
-            "14": 1733.0,
-            "15": 1930.0,
-            "16": 1840.0,
-            "17": 1892.0,
-            "18": 1650.0,
-            "19": 1790.0,
-            "20": 1596.0,
-            "21": 1765.0,
-            "22": 1616.0,
-            "23": 1974.0,
-            "24": 1621.0,
-            "25": 1557.0,
-            "26": 1745.0,
-            "27": 1722.0,
-            "28": 1976.0,
-            "29": 2068.0,
-            "30": 1860.0,
-            "31": 1536.0,
-            "32": 1883.0,
-            "33": 2071.0,
-            "34": 1894.0,
-            "35": 1902.0,
-            "36": 1885.0,
-            "37": 2231.0,
-            "38": 2129.0,
-            "39": 2333.0,
-            "40": 2207.0,
-            "41": 2193.0,
-            "42": 2322.0,
-            "43": 2015.0,
-            "44": 2089.0,
-            "45": 2095.0,
-            "46": 2392.0,
-            "47": 2430.0,
-            "48": 2414.0,
-            "49": 2340.0,
-            "50": 2416.0,
-            "51": 2613.0,
-            "52": 2538.0,
-            "53": 2792.0,
-            "54": 2801.0,
-            "55": 2216.0,
-            "56": 2858.0,
-            "57": 2381.0,
-            "58": 2854.0,
-            "59": 2787.0,
-            "60": 2457.0,
-            "61": 2941.0,
-            "62": 2543.0,
-            "63": 2408.0,
-            "64": 2968.0,
-            "65": 2472.0,
-            "66": 2977.0,
-            "67": 2839.0,
-            "68": 2775.0,
-            "69": 2832.0,
-            "70": 3057.0,
-            "71": 2909.0,
-            "72": 2421.0,
-            "73": 2982.0,
-            "74": 1922.0,
-            "75": 2474.0,
-            "76": 3059.0,
-            "77": 3177.0,
-            "78": 3067.0,
-            "79": 3052.0,
-            "80": 3338.0,
-            "81": 3644.0,
-            "82": 3234.0,
-            "83": 2798.0,
-            "84": 3196.0,
-            "85": 3324.0,
-            "86": 2855.0,
-            "87": 3820.0,
-            "88": 2962.0,
-            "89": 3379.0,
-            "90": 3096.0,
-            "91": 2857.0,
-            "92": 3077.0,
-            "93": 2693.0,
-            "94": 3312.0,
-            "95": 3399.0,
-            "96": 3378.0,
-            "97": 3030.0,
-            "98": 3619.0,
-            "99": 3160.0,
-            "100": 3128.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 517505536.0,
-            "2": 517505536.0,
-            "3": 517505536.0,
-            "4": 517505536.0,
-            "5": 517505536.0,
-            "6": 517505536.0,
-            "7": 517505536.0,
-            "8": 517505536.0,
-            "9": 517505536.0,
-            "10": 517505536.0,
-            "11": 517505536.0,
-            "12": 517505536.0,
-            "13": 517505536.0,
-            "14": 517505536.0,
-            "15": 517505536.0,
-            "16": 517505536.0,
-            "17": 517505536.0,
-            "18": 517505536.0,
-            "19": 517505536.0,
-            "20": 517505536.0,
-            "21": 517505536.0,
-            "22": 517505536.0,
-            "23": 517505536.0,
-            "24": 517505536.0,
-            "25": 517505536.0,
-            "26": 517505536.0,
-            "27": 517505536.0,
-            "28": 517505536.0,
-            "29": 517505536.0,
-            "30": 517505536.0,
-            "31": 517505536.0,
-            "32": 517505536.0,
-            "33": 517505536.0,
-            "34": 517505536.0,
-            "35": 517505536.0,
-            "36": 517505536.0,
-            "37": 517505536.0,
-            "38": 517505536.0,
-            "39": 517505536.0,
-            "40": 517505536.0,
-            "41": 517505536.0,
-            "42": 517505536.0,
-            "43": 517505536.0,
-            "44": 517505536.0,
-            "45": 517505536.0,
-            "46": 517505536.0,
-            "47": 517505536.0,
-            "48": 517505536.0,
-            "49": 517505536.0,
-            "50": 517505536.0,
-            "51": 517505536.0,
-            "52": 517505536.0,
-            "53": 517505536.0,
-            "54": 517505536.0,
-            "55": 517505536.0,
-            "56": 517505536.0,
-            "57": 517505536.0,
-            "58": 517505536.0,
-            "59": 517505536.0,
-            "60": 517505536.0,
-            "61": 517505536.0,
-            "62": 517505536.0,
-            "63": 517505536.0,
-            "64": 517505536.0,
-            "65": 517505536.0,
-            "66": 517505536.0,
-            "67": 517505536.0,
-            "68": 517505536.0,
-            "69": 517505536.0,
-            "70": 517505536.0,
-            "71": 517505536.0,
-            "72": 517505536.0,
-            "73": 517505536.0,
-            "74": 517505536.0,
-            "75": 517505536.0,
-            "76": 517505536.0,
-            "77": 517505536.0,
-            "78": 517505536.0,
-            "79": 517505536.0,
-            "80": 517505536.0,
-            "81": 517505536.0,
-            "82": 517505536.0,
-            "83": 517505536.0,
-            "84": 517505536.0,
-            "85": 517505536.0,
-            "86": 517505536.0,
-            "87": 517505536.0,
-            "88": 517505536.0,
-            "89": 517505536.0,
-            "90": 517505536.0,
-            "91": 517505536.0,
-            "92": 517505536.0,
-            "93": 517505536.0,
-            "94": 517505536.0,
-            "95": 517505536.0,
-            "96": 517505536.0,
-            "97": 517505536.0,
-            "98": 517505536.0,
-            "99": 517505536.0,
-            "100": 517505536.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1246524928.0,
-            "2": 1428695552.0,
-            "3": 1428695552.0,
-            "4": 1428695552.0,
-            "5": 1428695552.0,
-            "6": 1428695552.0,
-            "7": 1428695552.0,
-            "8": 1428695552.0,
-            "9": 1428695552.0,
-            "10": 1428695552.0,
-            "11": 1428695552.0,
-            "12": 1428695552.0,
-            "13": 1428695552.0,
-            "14": 1428695552.0,
-            "15": 1428695552.0,
-            "16": 1428695552.0,
-            "17": 1428695552.0,
-            "18": 1428695552.0,
-            "19": 1428695552.0,
-            "20": 1428695552.0,
-            "21": 1428695552.0,
-            "22": 1428695552.0,
-            "23": 1428695552.0,
-            "24": 1428695552.0,
-            "25": 1428695552.0,
-            "26": 1428695552.0,
-            "27": 1428695552.0,
-            "28": 1428695552.0,
-            "29": 1428695552.0,
-            "30": 1428695552.0,
-            "31": 1428695552.0,
-            "32": 1428695552.0,
-            "33": 1428695552.0,
-            "34": 1428695552.0,
-            "35": 1428695552.0,
-            "36": 1428695552.0,
-            "37": 1428695552.0,
-            "38": 1428695552.0,
-            "39": 1428695552.0,
-            "40": 1428695552.0,
-            "41": 1428695552.0,
-            "42": 1428695552.0,
-            "43": 1428695552.0,
-            "44": 1428695552.0,
-            "45": 1428695552.0,
-            "46": 1428695552.0,
-            "47": 1428695552.0,
-            "48": 1428695552.0,
-            "49": 1428695552.0,
-            "50": 1428695552.0,
-            "51": 1428695552.0,
-            "52": 1428695552.0,
-            "53": 1428695552.0,
-            "54": 1428695552.0,
-            "55": 1428695552.0,
-            "56": 1428695552.0,
-            "57": 1428695552.0,
-            "58": 1428695552.0,
-            "59": 1428695552.0,
-            "60": 1428695552.0,
-            "61": 1428695552.0,
-            "62": 1428695552.0,
-            "63": 1428695552.0,
-            "64": 1428695552.0,
-            "65": 1428695552.0,
-            "66": 1428695552.0,
-            "67": 1428695552.0,
-            "68": 1428695552.0,
-            "69": 1428695552.0,
-            "70": 1428695552.0,
-            "71": 1428695552.0,
-            "72": 1428695552.0,
-            "73": 1428695552.0,
-            "74": 1428695552.0,
-            "75": 1428695552.0,
-            "76": 1428695552.0,
-            "77": 1428695552.0,
-            "78": 1428695552.0,
-            "79": 1428695552.0,
-            "80": 1428695552.0,
-            "81": 1428695552.0,
-            "82": 1428695552.0,
-            "83": 1428695552.0,
-            "84": 1428695552.0,
-            "85": 1428695552.0,
-            "86": 1428695552.0,
-            "87": 1428695552.0,
-            "88": 1428695552.0,
-            "89": 1428695552.0,
-            "90": 1428695552.0,
-            "91": 1428695552.0,
-            "92": 1428695552.0,
-            "93": 1428695552.0,
-            "94": 1428695552.0,
-            "95": 1428695552.0,
-            "96": 1428695552.0,
-            "97": 1428695552.0,
-            "98": 1428695552.0,
-            "99": 1428695552.0,
-            "100": 1428695552.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 11.53219,
-            "2": 0.1684,
-            "3": 0.13213,
-            "4": 0.13603,
-            "5": 0.14526,
-            "6": 0.13427,
-            "7": 0.136,
-            "8": 0.13232,
-            "9": 0.13802,
-            "10": 0.13323,
-            "11": 0.13284,
-            "12": 0.1324,
-            "13": 0.13226,
-            "14": 0.13345,
-            "15": 0.13404,
-            "16": 0.13246,
-            "17": 0.13846,
-            "18": 0.14976,
-            "19": 0.15115,
-            "20": 0.1432,
-            "21": 0.14309,
-            "22": 0.14543,
-            "23": 0.1451,
-            "24": 0.14454,
-            "25": 0.14293,
-            "26": 0.14271,
-            "27": 0.14031,
-            "28": 0.13412,
-            "29": 0.13599,
-            "30": 0.13491,
-            "31": 0.13451,
-            "32": 0.1457,
-            "33": 0.13899,
-            "34": 0.14249,
-            "35": 0.13753,
-            "36": 0.13178,
-            "37": 0.13407,
-            "38": 0.13463,
-            "39": 0.13305,
-            "40": 0.13317,
-            "41": 0.13403,
-            "42": 0.1337,
-            "43": 0.13374,
-            "44": 0.13271,
-            "45": 0.13351,
-            "46": 0.1329,
-            "47": 0.13703,
-            "48": 0.1336,
-            "49": 0.13392,
-            "50": 0.13491,
-            "51": 0.15864,
-            "52": 0.14644,
-            "53": 0.13353,
-            "54": 0.13586,
-            "55": 0.1338,
-            "56": 0.13348,
-            "57": 0.13862,
-            "58": 0.13538,
-            "59": 0.13584,
-            "60": 0.13637,
-            "61": 0.1348,
-            "62": 0.13739,
-            "63": 0.13414,
-            "64": 0.13588,
-            "65": 0.13342,
-            "66": 0.13248,
-            "67": 0.13306,
-            "68": 0.13382,
-            "69": 0.13258,
-            "70": 0.1323,
-            "71": 0.13391,
-            "72": 0.13175,
-            "73": 0.13255,
-            "74": 0.13144,
-            "75": 0.13133,
-            "76": 0.13154,
-            "77": 0.13197,
-            "78": 0.13181,
-            "79": 0.13551,
-            "80": 0.13273,
-            "81": 0.13213,
-            "82": 0.13227,
-            "83": 0.13169,
-            "84": 0.13255,
-            "85": 0.13081,
-            "86": 0.13276,
-            "87": 0.13515,
-            "88": 0.13346,
-            "89": 0.13174,
-            "90": 0.13117,
-            "91": 0.13268,
-            "92": 0.131,
-            "93": 0.13188,
-            "94": 0.13089,
-            "95": 0.13284,
-            "96": 0.13247,
-            "97": 0.13153,
-            "98": 0.13147,
-            "99": 0.13253,
-            "100": 0.13209
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index e895f06a28a..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.85949,
-            "2": 10.85553,
-            "3": 10.86543,
-            "4": 10.84553,
-            "5": 10.88346,
-            "6": 10.89431,
-            "7": 10.87067,
-            "8": 10.86979,
-            "9": 10.86918,
-            "10": 10.83886,
-            "11": 10.8943,
-            "12": 10.87983,
-            "13": 10.87985,
-            "14": 10.90321,
-            "15": 10.84052,
-            "16": 10.83787,
-            "17": 10.80669,
-            "18": 10.83026,
-            "19": 10.82261,
-            "20": 10.73193,
-            "21": 10.70748,
-            "22": 10.56005,
-            "23": 10.72399,
-            "24": 10.61114,
-            "25": 10.54813,
-            "26": 10.61329,
-            "27": 10.63053,
-            "28": 10.56646,
-            "29": 10.59668,
-            "30": 10.37137,
-            "31": 10.11725,
-            "32": 10.46127,
-            "33": 10.45249,
-            "34": 10.2169,
-            "35": 10.27172,
-            "36": 10.23119,
-            "37": 10.34809,
-            "38": 10.1884,
-            "39": 10.41044,
-            "40": 10.09425,
-            "41": 10.14707,
-            "42": 10.21242,
-            "43": 9.84105,
-            "44": 9.95918,
-            "45": 9.84079,
-            "46": 9.82479,
-            "47": 10.13878,
-            "48": 9.85831,
-            "49": 9.54705,
-            "50": 9.90875,
-            "51": 9.8558,
-            "52": 9.75237,
-            "53": 10.07589,
-            "54": 9.95688,
-            "55": 9.88203,
-            "56": 9.6313,
-            "57": 9.48649,
-            "58": 9.83109,
-            "59": 9.58897,
-            "60": 9.50643,
-            "61": 9.70363,
-            "62": 9.98286,
-            "63": 9.38302,
-            "64": 9.77901,
-            "65": 8.95166,
-            "66": 9.70158,
-            "67": 9.37203,
-            "68": 9.78849,
-            "69": 9.79851,
-            "70": 9.74737,
-            "71": 9.61908,
-            "72": 9.58502,
-            "73": 9.49721,
-            "74": 8.93927,
-            "75": 9.42703,
-            "76": 9.0802,
-            "77": 10.06567,
-            "78": 9.72893,
-            "79": 9.3776,
-            "80": 9.40982,
-            "81": 9.47976,
-            "82": 9.7018,
-            "83": 9.30612,
-            "84": 9.4209,
-            "85": 9.61371,
-            "86": 9.07649,
-            "87": 9.5945,
-            "88": 9.75068,
-            "89": 9.60238,
-            "90": 9.81898,
-            "91": 9.33894,
-            "92": 9.35716,
-            "93": 9.07879,
-            "94": 8.83503,
-            "95": 9.52172,
-            "96": 9.53003,
-            "97": 9.31306,
-            "98": 9.67783,
-            "99": 8.89058,
-            "100": 9.39725
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1688.0,
-            "2": 1806.0,
-            "3": 1675.0,
-            "4": 1842.0,
-            "5": 1909.0,
-            "6": 1908.0,
-            "7": 1783.0,
-            "8": 1611.0,
-            "9": 1753.0,
-            "10": 1457.0,
-            "11": 1880.0,
-            "12": 1683.0,
-            "13": 1907.0,
-            "14": 1733.0,
-            "15": 1930.0,
-            "16": 1840.0,
-            "17": 1892.0,
-            "18": 1650.0,
-            "19": 1790.0,
-            "20": 1596.0,
-            "21": 1765.0,
-            "22": 1616.0,
-            "23": 1974.0,
-            "24": 1621.0,
-            "25": 1557.0,
-            "26": 1745.0,
-            "27": 1722.0,
-            "28": 1976.0,
-            "29": 2068.0,
-            "30": 1860.0,
-            "31": 1536.0,
-            "32": 1883.0,
-            "33": 2071.0,
-            "34": 1894.0,
-            "35": 1902.0,
-            "36": 1885.0,
-            "37": 2231.0,
-            "38": 2129.0,
-            "39": 2333.0,
-            "40": 2207.0,
-            "41": 2193.0,
-            "42": 2322.0,
-            "43": 2015.0,
-            "44": 2089.0,
-            "45": 2095.0,
-            "46": 2392.0,
-            "47": 2430.0,
-            "48": 2414.0,
-            "49": 2340.0,
-            "50": 2416.0,
-            "51": 2613.0,
-            "52": 2538.0,
-            "53": 2792.0,
-            "54": 2801.0,
-            "55": 2216.0,
-            "56": 2858.0,
-            "57": 2381.0,
-            "58": 2854.0,
-            "59": 2787.0,
-            "60": 2457.0,
-            "61": 2941.0,
-            "62": 2543.0,
-            "63": 2408.0,
-            "64": 2968.0,
-            "65": 2472.0,
-            "66": 2977.0,
-            "67": 2839.0,
-            "68": 2775.0,
-            "69": 2832.0,
-            "70": 3057.0,
-            "71": 2909.0,
-            "72": 2421.0,
-            "73": 2982.0,
-            "74": 1922.0,
-            "75": 2474.0,
-            "76": 3059.0,
-            "77": 3177.0,
-            "78": 3067.0,
-            "79": 3052.0,
-            "80": 3338.0,
-            "81": 3644.0,
-            "82": 3234.0,
-            "83": 2798.0,
-            "84": 3196.0,
-            "85": 3324.0,
-            "86": 2855.0,
-            "87": 3820.0,
-            "88": 2962.0,
-            "89": 3379.0,
-            "90": 3096.0,
-            "91": 2857.0,
-            "92": 3077.0,
-            "93": 2693.0,
-            "94": 3312.0,
-            "95": 3399.0,
-            "96": 3378.0,
-            "97": 3030.0,
-            "98": 3619.0,
-            "99": 3160.0,
-            "100": 3128.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 517505536.0,
-            "2": 517505536.0,
-            "3": 517505536.0,
-            "4": 517505536.0,
-            "5": 517505536.0,
-            "6": 517505536.0,
-            "7": 517505536.0,
-            "8": 517505536.0,
-            "9": 517505536.0,
-            "10": 517505536.0,
-            "11": 517505536.0,
-            "12": 517505536.0,
-            "13": 517505536.0,
-            "14": 517505536.0,
-            "15": 517505536.0,
-            "16": 517505536.0,
-            "17": 517505536.0,
-            "18": 517505536.0,
-            "19": 517505536.0,
-            "20": 517505536.0,
-            "21": 517505536.0,
-            "22": 517505536.0,
-            "23": 517505536.0,
-            "24": 517505536.0,
-            "25": 517505536.0,
-            "26": 517505536.0,
-            "27": 517505536.0,
-            "28": 517505536.0,
-            "29": 517505536.0,
-            "30": 517505536.0,
-            "31": 517505536.0,
-            "32": 517505536.0,
-            "33": 517505536.0,
-            "34": 517505536.0,
-            "35": 517505536.0,
-            "36": 517505536.0,
-            "37": 517505536.0,
-            "38": 517505536.0,
-            "39": 517505536.0,
-            "40": 517505536.0,
-            "41": 517505536.0,
-            "42": 517505536.0,
-            "43": 517505536.0,
-            "44": 517505536.0,
-            "45": 517505536.0,
-            "46": 517505536.0,
-            "47": 517505536.0,
-            "48": 517505536.0,
-            "49": 517505536.0,
-            "50": 517505536.0,
-            "51": 517505536.0,
-            "52": 517505536.0,
-            "53": 517505536.0,
-            "54": 517505536.0,
-            "55": 517505536.0,
-            "56": 517505536.0,
-            "57": 517505536.0,
-            "58": 517505536.0,
-            "59": 517505536.0,
-            "60": 517505536.0,
-            "61": 517505536.0,
-            "62": 517505536.0,
-            "63": 517505536.0,
-            "64": 517505536.0,
-            "65": 517505536.0,
-            "66": 517505536.0,
-            "67": 517505536.0,
-            "68": 517505536.0,
-            "69": 517505536.0,
-            "70": 517505536.0,
-            "71": 517505536.0,
-            "72": 517505536.0,
-            "73": 517505536.0,
-            "74": 517505536.0,
-            "75": 517505536.0,
-            "76": 517505536.0,
-            "77": 517505536.0,
-            "78": 517505536.0,
-            "79": 517505536.0,
-            "80": 517505536.0,
-            "81": 517505536.0,
-            "82": 517505536.0,
-            "83": 517505536.0,
-            "84": 517505536.0,
-            "85": 517505536.0,
-            "86": 517505536.0,
-            "87": 517505536.0,
-            "88": 517505536.0,
-            "89": 517505536.0,
-            "90": 517505536.0,
-            "91": 517505536.0,
-            "92": 517505536.0,
-            "93": 517505536.0,
-            "94": 517505536.0,
-            "95": 517505536.0,
-            "96": 517505536.0,
-            "97": 517505536.0,
-            "98": 517505536.0,
-            "99": 517505536.0,
-            "100": 517505536.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1246524928.0,
-            "2": 1428695552.0,
-            "3": 1428695552.0,
-            "4": 1428695552.0,
-            "5": 1428695552.0,
-            "6": 1428695552.0,
-            "7": 1428695552.0,
-            "8": 1428695552.0,
-            "9": 1428695552.0,
-            "10": 1428695552.0,
-            "11": 1428695552.0,
-            "12": 1428695552.0,
-            "13": 1428695552.0,
-            "14": 1428695552.0,
-            "15": 1428695552.0,
-            "16": 1428695552.0,
-            "17": 1428695552.0,
-            "18": 1428695552.0,
-            "19": 1428695552.0,
-            "20": 1428695552.0,
-            "21": 1428695552.0,
-            "22": 1428695552.0,
-            "23": 1428695552.0,
-            "24": 1428695552.0,
-            "25": 1428695552.0,
-            "26": 1428695552.0,
-            "27": 1428695552.0,
-            "28": 1428695552.0,
-            "29": 1428695552.0,
-            "30": 1428695552.0,
-            "31": 1428695552.0,
-            "32": 1428695552.0,
-            "33": 1428695552.0,
-            "34": 1428695552.0,
-            "35": 1428695552.0,
-            "36": 1428695552.0,
-            "37": 1428695552.0,
-            "38": 1428695552.0,
-            "39": 1428695552.0,
-            "40": 1428695552.0,
-            "41": 1428695552.0,
-            "42": 1428695552.0,
-            "43": 1428695552.0,
-            "44": 1428695552.0,
-            "45": 1428695552.0,
-            "46": 1428695552.0,
-            "47": 1428695552.0,
-            "48": 1428695552.0,
-            "49": 1428695552.0,
-            "50": 1428695552.0,
-            "51": 1428695552.0,
-            "52": 1428695552.0,
-            "53": 1428695552.0,
-            "54": 1428695552.0,
-            "55": 1428695552.0,
-            "56": 1428695552.0,
-            "57": 1428695552.0,
-            "58": 1428695552.0,
-            "59": 1428695552.0,
-            "60": 1428695552.0,
-            "61": 1428695552.0,
-            "62": 1428695552.0,
-            "63": 1428695552.0,
-            "64": 1428695552.0,
-            "65": 1428695552.0,
-            "66": 1428695552.0,
-            "67": 1428695552.0,
-            "68": 1428695552.0,
-            "69": 1428695552.0,
-            "70": 1428695552.0,
-            "71": 1428695552.0,
-            "72": 1428695552.0,
-            "73": 1428695552.0,
-            "74": 1428695552.0,
-            "75": 1428695552.0,
-            "76": 1428695552.0,
-            "77": 1428695552.0,
-            "78": 1428695552.0,
-            "79": 1428695552.0,
-            "80": 1428695552.0,
-            "81": 1428695552.0,
-            "82": 1428695552.0,
-            "83": 1428695552.0,
-            "84": 1428695552.0,
-            "85": 1428695552.0,
-            "86": 1428695552.0,
-            "87": 1428695552.0,
-            "88": 1428695552.0,
-            "89": 1428695552.0,
-            "90": 1428695552.0,
-            "91": 1428695552.0,
-            "92": 1428695552.0,
-            "93": 1428695552.0,
-            "94": 1428695552.0,
-            "95": 1428695552.0,
-            "96": 1428695552.0,
-            "97": 1428695552.0,
-            "98": 1428695552.0,
-            "99": 1428695552.0,
-            "100": 1428695552.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 11.77129,
-            "2": 0.18805,
-            "3": 0.15486,
-            "4": 0.15531,
-            "5": 0.15342,
-            "6": 0.15402,
-            "7": 0.15787,
-            "8": 0.15837,
-            "9": 0.15422,
-            "10": 0.1531,
-            "11": 0.1531,
-            "12": 0.1521,
-            "13": 0.15206,
-            "14": 0.15281,
-            "15": 0.15025,
-            "16": 0.15321,
-            "17": 0.15383,
-            "18": 0.15265,
-            "19": 0.15535,
-            "20": 0.15414,
-            "21": 0.15275,
-            "22": 0.152,
-            "23": 0.15456,
-            "24": 0.15209,
-            "25": 0.15358,
-            "26": 0.15228,
-            "27": 0.15217,
-            "28": 0.15204,
-            "29": 0.1526,
-            "30": 0.15259,
-            "31": 0.15237,
-            "32": 0.15885,
-            "33": 0.1577,
-            "34": 0.16029,
-            "35": 0.15618,
-            "36": 0.16006,
-            "37": 0.15686,
-            "38": 0.15897,
-            "39": 0.15985,
-            "40": 0.15818,
-            "41": 0.15734,
-            "42": 0.15623,
-            "43": 0.15982,
-            "44": 0.15844,
-            "45": 0.15965,
-            "46": 0.15995,
-            "47": 0.1576,
-            "48": 0.15787,
-            "49": 0.15857,
-            "50": 0.16598,
-            "51": 0.15831,
-            "52": 0.15281,
-            "53": 0.15278,
-            "54": 0.15155,
-            "55": 0.1544,
-            "56": 0.15102,
-            "57": 0.1505,
-            "58": 0.15177,
-            "59": 0.15275,
-            "60": 0.15179,
-            "61": 0.15138,
-            "62": 0.153,
-            "63": 0.14962,
-            "64": 0.15104,
-            "65": 0.15104,
-            "66": 0.1541,
-            "67": 0.15089,
-            "68": 0.15178,
-            "69": 0.15241,
-            "70": 0.1524,
-            "71": 0.14991,
-            "72": 0.15107,
-            "73": 0.15205,
-            "74": 0.15105,
-            "75": 0.14944,
-            "76": 0.15086,
-            "77": 0.15066,
-            "78": 0.15037,
-            "79": 0.1517,
-            "80": 0.1535,
-            "81": 0.15067,
-            "82": 0.15202,
-            "83": 0.1513,
-            "84": 0.15157,
-            "85": 0.15077,
-            "86": 0.15249,
-            "87": 0.15259,
-            "88": 0.15065,
-            "89": 0.15236,
-            "90": 0.15088,
-            "91": 0.15271,
-            "92": 0.15124,
-            "93": 0.15371,
-            "94": 0.14949,
-            "95": 0.15169,
-            "96": 0.15061,
-            "97": 0.15123,
-            "98": 0.15143,
-            "99": 0.15292,
-            "100": 0.15348
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_dev_dgx_h100.json
index f5b16bf0710..c1aaf21cf26 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_dev_dgx_h100.json
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 11.72275,
-            "2": 0.17301,
-            "3": 0.15386,
-            "4": 0.16174,
-            "5": 0.16281,
-            "6": 0.16123,
-            "7": 0.16321,
-            "8": 0.15614,
-            "9": 0.15485,
-            "10": 0.15403,
-            "11": 0.15407,
-            "12": 0.15562,
-            "13": 0.15964,
-            "14": 0.15764,
-            "15": 0.15375,
-            "16": 0.1559,
-            "17": 0.15118,
-            "18": 0.15439,
-            "19": 0.15335,
-            "20": 0.15351,
-            "21": 0.15162,
-            "22": 0.15323,
-            "23": 0.15304,
-            "24": 0.15257,
-            "25": 0.15184,
-            "26": 0.15337,
-            "27": 0.15366,
-            "28": 0.1533,
-            "29": 0.15626,
-            "30": 0.15279,
-            "31": 0.15396,
-            "32": 0.15273,
-            "33": 0.15868,
-            "34": 0.15298,
-            "35": 0.15363,
-            "36": 0.15504,
-            "37": 0.15404,
-            "38": 0.15509,
-            "39": 0.15421,
-            "40": 0.15591,
-            "41": 0.15488,
-            "42": 0.15491,
-            "43": 0.15536,
-            "44": 0.15405,
-            "45": 0.15301,
-            "46": 0.1564,
-            "47": 0.1538,
-            "48": 0.15496,
-            "49": 0.15554,
-            "50": 0.15377,
-            "51": 0.16069,
-            "52": 0.15674,
-            "53": 0.15488,
-            "54": 0.15626,
-            "55": 0.15428,
-            "56": 0.15332,
-            "57": 0.15575,
-            "58": 0.15337,
-            "59": 0.1573,
-            "60": 0.15494,
-            "61": 0.15582,
-            "62": 0.15444,
-            "63": 0.15451,
-            "64": 0.15468,
-            "65": 0.15421,
-            "66": 0.15605,
-            "67": 0.15502,
-            "68": 0.1555,
-            "69": 0.15365,
-            "70": 0.15482,
-            "71": 0.15668,
-            "72": 0.15572,
-            "73": 0.15504,
-            "74": 0.15493,
-            "75": 0.15395,
-            "76": 0.1543,
-            "77": 0.15616,
-            "78": 0.15412,
-            "79": 0.15658,
-            "80": 0.15263,
-            "81": 0.15632,
-            "82": 0.15472,
-            "83": 0.1556,
-            "84": 0.15407,
-            "85": 0.15567,
-            "86": 0.15631,
-            "87": 0.15367,
-            "88": 0.15509,
-            "89": 0.1539,
-            "90": 0.15608,
-            "91": 0.15432,
-            "92": 0.155,
-            "93": 0.1529,
-            "94": 0.1541,
-            "95": 0.15468,
-            "96": 0.15535,
-            "97": 0.15603,
-            "98": 0.15443,
-            "99": 0.1563,
-            "100": 0.15285
+            "1": 11.81196,
+            "2": 0.17008,
+            "3": 0.15523,
+            "4": 0.15249,
+            "5": 0.15434,
+            "6": 0.15515,
+            "7": 0.15378,
+            "8": 0.1528,
+            "9": 0.15287,
+            "10": 0.15479,
+            "11": 0.15442,
+            "12": 0.15952,
+            "13": 0.15843,
+            "14": 0.15559,
+            "15": 0.15333,
+            "16": 0.15363,
+            "17": 0.15594,
+            "18": 0.153,
+            "19": 0.15542,
+            "20": 0.15304,
+            "21": 0.15492,
+            "22": 0.15277,
+            "23": 0.15803,
+            "24": 0.1545,
+            "25": 0.15639,
+            "26": 0.15419,
+            "27": 0.15381,
+            "28": 0.15423,
+            "29": 0.15354,
+            "30": 0.1554,
+            "31": 0.15389,
+            "32": 0.15608,
+            "33": 0.15361,
+            "34": 0.15437,
+            "35": 0.15233,
+            "36": 0.15499,
+            "37": 0.15114,
+            "38": 0.15259,
+            "39": 0.15269,
+            "40": 0.1516,
+            "41": 0.15052,
+            "42": 0.15122,
+            "43": 0.15389,
+            "44": 0.15261,
+            "45": 0.15376,
+            "46": 0.15091,
+            "47": 0.15197,
+            "48": 0.15131,
+            "49": 0.15083,
+            "50": 0.152,
+            "51": 0.15723,
+            "52": 0.15481,
+            "53": 0.15087,
+            "54": 0.15175,
+            "55": 0.15331,
+            "56": 0.15504,
+            "57": 0.15471,
+            "58": 0.1549,
+            "59": 0.15621,
+            "60": 0.1533,
+            "61": 0.15499,
+            "62": 0.15222,
+            "63": 0.15091,
+            "64": 0.1535,
+            "65": 0.15463,
+            "66": 0.15169,
+            "67": 0.15591,
+            "68": 0.15173,
+            "69": 0.1509,
+            "70": 0.15063,
+            "71": 0.15755,
+            "72": 0.1545,
+            "73": 0.15374,
+            "74": 0.15306,
+            "75": 0.15223,
+            "76": 0.15203,
+            "77": 0.15194,
+            "78": 0.15284,
+            "79": 0.15345,
+            "80": 0.15138,
+            "81": 0.15298,
+            "82": 0.15115,
+            "83": 0.15281,
+            "84": 0.1544,
+            "85": 0.15277,
+            "86": 0.15368,
+            "87": 0.15373,
+            "88": 0.15359,
+            "89": 0.15205,
+            "90": 0.1535,
+            "91": 0.15459,
+            "92": 0.15406,
+            "93": 0.15133,
+            "94": 0.1533,
+            "95": 0.15198,
+            "96": 0.15195,
+            "97": 0.1533,
+            "98": 0.15406,
+            "99": 0.1528,
+            "100": 0.15371
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 00af7ef1865..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.85949,
-            "2": 10.85553,
-            "3": 10.86544,
-            "4": 10.84555,
-            "5": 10.88343,
-            "6": 10.89431,
-            "7": 10.87069,
-            "8": 10.86982,
-            "9": 10.8692,
-            "10": 10.83882,
-            "11": 10.89437,
-            "12": 10.8798,
-            "13": 10.87986,
-            "14": 10.90316,
-            "15": 10.84047,
-            "16": 10.83785,
-            "17": 10.8067,
-            "18": 10.83027,
-            "19": 10.82265,
-            "20": 10.73196,
-            "21": 10.70751,
-            "22": 10.56001,
-            "23": 10.72404,
-            "24": 10.61114,
-            "25": 10.54812,
-            "26": 10.61333,
-            "27": 10.63051,
-            "28": 10.56645,
-            "29": 10.59672,
-            "30": 10.37134,
-            "31": 10.11723,
-            "32": 10.46131,
-            "33": 10.4525,
-            "34": 10.21689,
-            "35": 10.27171,
-            "36": 10.2312,
-            "37": 10.34809,
-            "38": 10.18839,
-            "39": 10.41045,
-            "40": 10.09427,
-            "41": 10.1471,
-            "42": 10.21241,
-            "43": 9.84107,
-            "44": 9.95919,
-            "45": 9.84081,
-            "46": 9.82483,
-            "47": 10.13877,
-            "48": 9.85832,
-            "49": 9.54703,
-            "50": 9.90876,
-            "51": 9.85581,
-            "52": 9.75235,
-            "53": 10.07582,
-            "54": 9.95687,
-            "55": 9.882,
-            "56": 9.63137,
-            "57": 9.48647,
-            "58": 9.83111,
-            "59": 9.58896,
-            "60": 9.50647,
-            "61": 9.70361,
-            "62": 9.98283,
-            "63": 9.38302,
-            "64": 9.77906,
-            "65": 8.95171,
-            "66": 9.70162,
-            "67": 9.372,
-            "68": 9.78849,
-            "69": 9.79851,
-            "70": 9.74738,
-            "71": 9.61908,
-            "72": 9.58496,
-            "73": 9.49723,
-            "74": 8.93927,
-            "75": 9.42706,
-            "76": 9.08018,
-            "77": 10.06566,
-            "78": 9.72889,
-            "79": 9.37757,
-            "80": 9.40987,
-            "81": 9.47974,
-            "82": 9.70177,
-            "83": 9.30611,
-            "84": 9.42088,
-            "85": 9.61376,
-            "86": 9.07651,
-            "87": 9.59452,
-            "88": 9.75067,
-            "89": 9.60239,
-            "90": 9.81895,
-            "91": 9.33895,
-            "92": 9.35712,
-            "93": 9.07879,
-            "94": 8.83504,
-            "95": 9.52168,
-            "96": 9.53002,
-            "97": 9.31306,
-            "98": 9.67783,
-            "99": 8.89053,
-            "100": 9.39725
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 71.0,
-            "2": 65.0,
-            "3": 68.0,
-            "4": 57.0,
-            "5": 55.0,
-            "6": 70.0,
-            "7": 73.0,
-            "8": 58.0,
-            "9": 66.0,
-            "10": 65.0,
-            "11": 58.0,
-            "12": 77.0,
-            "13": 50.0,
-            "14": 65.0,
-            "15": 71.0,
-            "16": 68.0,
-            "17": 58.0,
-            "18": 57.0,
-            "19": 68.0,
-            "20": 61.0,
-            "21": 65.0,
-            "22": 57.0,
-            "23": 83.0,
-            "24": 58.0,
-            "25": 66.0,
-            "26": 63.0,
-            "27": 80.0,
-            "28": 82.0,
-            "29": 72.0,
-            "30": 71.0,
-            "31": 68.0,
-            "32": 75.0,
-            "33": 85.0,
-            "34": 63.0,
-            "35": 69.0,
-            "36": 58.0,
-            "37": 83.0,
-            "38": 65.0,
-            "39": 68.0,
-            "40": 81.0,
-            "41": 72.0,
-            "42": 76.0,
-            "43": 84.0,
-            "44": 85.0,
-            "45": 85.0,
-            "46": 79.0,
-            "47": 81.0,
-            "48": 68.0,
-            "49": 89.0,
-            "50": 80.0,
-            "51": 70.0,
-            "52": 81.0,
-            "53": 95.0,
-            "54": 101.0,
-            "55": 58.0,
-            "56": 90.0,
-            "57": 83.0,
-            "58": 90.0,
-            "59": 79.0,
-            "60": 84.0,
-            "61": 92.0,
-            "62": 102.0,
-            "63": 78.0,
-            "64": 73.0,
-            "65": 81.0,
-            "66": 88.0,
-            "67": 54.0,
-            "68": 57.0,
-            "69": 72.0,
-            "70": 88.0,
-            "71": 82.0,
-            "72": 64.0,
-            "73": 78.0,
-            "74": 76.0,
-            "75": 70.0,
-            "76": 78.0,
-            "77": 67.0,
-            "78": 86.0,
-            "79": 76.0,
-            "80": 90.0,
-            "81": 92.0,
-            "82": 72.0,
-            "83": 61.0,
-            "84": 65.0,
-            "85": 89.0,
-            "86": 73.0,
-            "87": 89.0,
-            "88": 63.0,
-            "89": 83.0,
-            "90": 72.0,
-            "91": 55.0,
-            "92": 63.0,
-            "93": 47.0,
-            "94": 74.0,
-            "95": 70.0,
-            "96": 73.0,
-            "97": 80.0,
-            "98": 76.0,
-            "99": 68.0,
-            "100": 75.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 546472448.0,
-            "2": 546472448.0,
-            "3": 546472448.0,
-            "4": 546472448.0,
-            "5": 546472448.0,
-            "6": 546472448.0,
-            "7": 546472448.0,
-            "8": 546472448.0,
-            "9": 546472448.0,
-            "10": 546472448.0,
-            "11": 546472448.0,
-            "12": 546472448.0,
-            "13": 546472448.0,
-            "14": 546472448.0,
-            "15": 546472448.0,
-            "16": 546472448.0,
-            "17": 546472448.0,
-            "18": 546472448.0,
-            "19": 546472448.0,
-            "20": 546472448.0,
-            "21": 546472448.0,
-            "22": 546472448.0,
-            "23": 546472448.0,
-            "24": 546472448.0,
-            "25": 546472448.0,
-            "26": 546472448.0,
-            "27": 546472448.0,
-            "28": 546472448.0,
-            "29": 546472448.0,
-            "30": 546472448.0,
-            "31": 546472448.0,
-            "32": 546472448.0,
-            "33": 546472448.0,
-            "34": 546472448.0,
-            "35": 546472448.0,
-            "36": 546472448.0,
-            "37": 546472448.0,
-            "38": 546472448.0,
-            "39": 546472448.0,
-            "40": 546472448.0,
-            "41": 546472448.0,
-            "42": 546472448.0,
-            "43": 546472448.0,
-            "44": 546472448.0,
-            "45": 546472448.0,
-            "46": 546472448.0,
-            "47": 546472448.0,
-            "48": 546472448.0,
-            "49": 546472448.0,
-            "50": 546472448.0,
-            "51": 546472448.0,
-            "52": 546472448.0,
-            "53": 546472448.0,
-            "54": 546472448.0,
-            "55": 546472448.0,
-            "56": 546472448.0,
-            "57": 546472448.0,
-            "58": 546472448.0,
-            "59": 546472448.0,
-            "60": 546472448.0,
-            "61": 546472448.0,
-            "62": 546472448.0,
-            "63": 546472448.0,
-            "64": 546472448.0,
-            "65": 546472448.0,
-            "66": 546472448.0,
-            "67": 546472448.0,
-            "68": 546472448.0,
-            "69": 546472448.0,
-            "70": 546472448.0,
-            "71": 546472448.0,
-            "72": 546472448.0,
-            "73": 546472448.0,
-            "74": 546472448.0,
-            "75": 546472448.0,
-            "76": 546472448.0,
-            "77": 546472448.0,
-            "78": 546472448.0,
-            "79": 546472448.0,
-            "80": 546472448.0,
-            "81": 546472448.0,
-            "82": 546472448.0,
-            "83": 546472448.0,
-            "84": 546472448.0,
-            "85": 546472448.0,
-            "86": 546472448.0,
-            "87": 546472448.0,
-            "88": 546472448.0,
-            "89": 546472448.0,
-            "90": 546472448.0,
-            "91": 546472448.0,
-            "92": 546472448.0,
-            "93": 546472448.0,
-            "94": 546472448.0,
-            "95": 546472448.0,
-            "96": 546472448.0,
-            "97": 546472448.0,
-            "98": 546472448.0,
-            "99": 546472448.0,
-            "100": 546472448.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1713796608.0,
-            "2": 1895967232.0,
-            "3": 1895967232.0,
-            "4": 1895967232.0,
-            "5": 1895967232.0,
-            "6": 1895967232.0,
-            "7": 1895967232.0,
-            "8": 1895967232.0,
-            "9": 1895967232.0,
-            "10": 1895967232.0,
-            "11": 1895967232.0,
-            "12": 1895967232.0,
-            "13": 1895967232.0,
-            "14": 1895967232.0,
-            "15": 1895967232.0,
-            "16": 1895967232.0,
-            "17": 1895967232.0,
-            "18": 1895967232.0,
-            "19": 1895967232.0,
-            "20": 1895967232.0,
-            "21": 1895967232.0,
-            "22": 1895967232.0,
-            "23": 1895967232.0,
-            "24": 1895967232.0,
-            "25": 1895967232.0,
-            "26": 1895967232.0,
-            "27": 1895967232.0,
-            "28": 1895967232.0,
-            "29": 1895967232.0,
-            "30": 1895967232.0,
-            "31": 1895967232.0,
-            "32": 1895967232.0,
-            "33": 1895967232.0,
-            "34": 1895967232.0,
-            "35": 1895967232.0,
-            "36": 1895967232.0,
-            "37": 1895967232.0,
-            "38": 1895967232.0,
-            "39": 1895967232.0,
-            "40": 1895967232.0,
-            "41": 1895967232.0,
-            "42": 1895967232.0,
-            "43": 1895967232.0,
-            "44": 1895967232.0,
-            "45": 1895967232.0,
-            "46": 1895967232.0,
-            "47": 1895967232.0,
-            "48": 1895967232.0,
-            "49": 1895967232.0,
-            "50": 1895967232.0,
-            "51": 1895967232.0,
-            "52": 1895967232.0,
-            "53": 1895967232.0,
-            "54": 1895967232.0,
-            "55": 1895967232.0,
-            "56": 1895967232.0,
-            "57": 1895967232.0,
-            "58": 1895967232.0,
-            "59": 1895967232.0,
-            "60": 1895967232.0,
-            "61": 1895967232.0,
-            "62": 1895967232.0,
-            "63": 1895967232.0,
-            "64": 1895967232.0,
-            "65": 1895967232.0,
-            "66": 1895967232.0,
-            "67": 1895967232.0,
-            "68": 1895967232.0,
-            "69": 1895967232.0,
-            "70": 1895967232.0,
-            "71": 1895967232.0,
-            "72": 1895967232.0,
-            "73": 1895967232.0,
-            "74": 1895967232.0,
-            "75": 1895967232.0,
-            "76": 1895967232.0,
-            "77": 1895967232.0,
-            "78": 1895967232.0,
-            "79": 1895967232.0,
-            "80": 1895967232.0,
-            "81": 1895967232.0,
-            "82": 1895967232.0,
-            "83": 1895967232.0,
-            "84": 1895967232.0,
-            "85": 1895967232.0,
-            "86": 1895967232.0,
-            "87": 1895967232.0,
-            "88": 1895967232.0,
-            "89": 1895967232.0,
-            "90": 1895967232.0,
-            "91": 1895967232.0,
-            "92": 1895967232.0,
-            "93": 1895967232.0,
-            "94": 1895967232.0,
-            "95": 1895967232.0,
-            "96": 1895967232.0,
-            "97": 1895967232.0,
-            "98": 1895967232.0,
-            "99": 1895967232.0,
-            "100": 1895967232.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 11.30059,
-            "2": 0.17777,
-            "3": 0.13503,
-            "4": 0.13378,
-            "5": 0.1357,
-            "6": 0.13267,
-            "7": 0.13302,
-            "8": 0.13235,
-            "9": 0.13435,
-            "10": 0.13421,
-            "11": 0.13233,
-            "12": 0.13074,
-            "13": 0.12922,
-            "14": 0.13131,
-            "15": 0.13296,
-            "16": 0.13106,
-            "17": 0.13142,
-            "18": 0.13375,
-            "19": 0.13295,
-            "20": 0.13185,
-            "21": 0.13239,
-            "22": 0.13128,
-            "23": 0.13257,
-            "24": 0.13321,
-            "25": 0.13186,
-            "26": 0.13183,
-            "27": 0.13148,
-            "28": 0.13158,
-            "29": 0.13055,
-            "30": 0.13201,
-            "31": 0.1314,
-            "32": 0.13098,
-            "33": 0.13284,
-            "34": 0.13152,
-            "35": 0.13191,
-            "36": 0.13208,
-            "37": 0.13199,
-            "38": 0.13223,
-            "39": 0.13213,
-            "40": 0.13135,
-            "41": 0.13187,
-            "42": 0.13104,
-            "43": 0.13286,
-            "44": 0.13281,
-            "45": 0.13109,
-            "46": 0.13108,
-            "47": 0.13377,
-            "48": 0.13164,
-            "49": 0.13194,
-            "50": 0.1309,
-            "51": 0.14716,
-            "52": 0.14386,
-            "53": 0.133,
-            "54": 0.13142,
-            "55": 0.12988,
-            "56": 0.13391,
-            "57": 0.14548,
-            "58": 0.1475,
-            "59": 0.1326,
-            "60": 0.13058,
-            "61": 0.13075,
-            "62": 0.13206,
-            "63": 0.13128,
-            "64": 0.13303,
-            "65": 0.13059,
-            "66": 0.12969,
-            "67": 0.13108,
-            "68": 0.13125,
-            "69": 0.1294,
-            "70": 0.13035,
-            "71": 0.13528,
-            "72": 0.13186,
-            "73": 0.13078,
-            "74": 0.12997,
-            "75": 0.13033,
-            "76": 0.13134,
-            "77": 0.13127,
-            "78": 0.12885,
-            "79": 0.13057,
-            "80": 0.13054,
-            "81": 0.131,
-            "82": 0.13102,
-            "83": 0.13228,
-            "84": 0.13261,
-            "85": 0.1312,
-            "86": 0.1324,
-            "87": 0.13346,
-            "88": 0.13044,
-            "89": 0.13079,
-            "90": 0.13018,
-            "91": 0.13115,
-            "92": 0.13135,
-            "93": 0.13062,
-            "94": 0.13049,
-            "95": 0.13131,
-            "96": 0.13099,
-            "97": 0.13099,
-            "98": 0.1311,
-            "99": 0.13221,
-            "100": 0.13235
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index c1aaf21cf26..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.85949,
-            "2": 10.85553,
-            "3": 10.86544,
-            "4": 10.84555,
-            "5": 10.88343,
-            "6": 10.89431,
-            "7": 10.87069,
-            "8": 10.86982,
-            "9": 10.8692,
-            "10": 10.83882,
-            "11": 10.89437,
-            "12": 10.8798,
-            "13": 10.87986,
-            "14": 10.90316,
-            "15": 10.84047,
-            "16": 10.83785,
-            "17": 10.8067,
-            "18": 10.83027,
-            "19": 10.82265,
-            "20": 10.73196,
-            "21": 10.70751,
-            "22": 10.56001,
-            "23": 10.72404,
-            "24": 10.61114,
-            "25": 10.54812,
-            "26": 10.61333,
-            "27": 10.63051,
-            "28": 10.56645,
-            "29": 10.59672,
-            "30": 10.37134,
-            "31": 10.11723,
-            "32": 10.46131,
-            "33": 10.4525,
-            "34": 10.21689,
-            "35": 10.27171,
-            "36": 10.2312,
-            "37": 10.34809,
-            "38": 10.18839,
-            "39": 10.41045,
-            "40": 10.09427,
-            "41": 10.1471,
-            "42": 10.21241,
-            "43": 9.84107,
-            "44": 9.95919,
-            "45": 9.84081,
-            "46": 9.82483,
-            "47": 10.13877,
-            "48": 9.85832,
-            "49": 9.54703,
-            "50": 9.90876,
-            "51": 9.85581,
-            "52": 9.75235,
-            "53": 10.07582,
-            "54": 9.95687,
-            "55": 9.882,
-            "56": 9.63137,
-            "57": 9.48647,
-            "58": 9.83111,
-            "59": 9.58896,
-            "60": 9.50647,
-            "61": 9.70361,
-            "62": 9.98283,
-            "63": 9.38302,
-            "64": 9.77906,
-            "65": 8.95171,
-            "66": 9.70162,
-            "67": 9.372,
-            "68": 9.78849,
-            "69": 9.79851,
-            "70": 9.74738,
-            "71": 9.61908,
-            "72": 9.58496,
-            "73": 9.49723,
-            "74": 8.93927,
-            "75": 9.42706,
-            "76": 9.08018,
-            "77": 10.06566,
-            "78": 9.72889,
-            "79": 9.37757,
-            "80": 9.40987,
-            "81": 9.47974,
-            "82": 9.70177,
-            "83": 9.30611,
-            "84": 9.42088,
-            "85": 9.61376,
-            "86": 9.07651,
-            "87": 9.59452,
-            "88": 9.75067,
-            "89": 9.60239,
-            "90": 9.81895,
-            "91": 9.33895,
-            "92": 9.35712,
-            "93": 9.07879,
-            "94": 8.83504,
-            "95": 9.52168,
-            "96": 9.53002,
-            "97": 9.31306,
-            "98": 9.67783,
-            "99": 8.89053,
-            "100": 9.39725
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 71.0,
-            "2": 65.0,
-            "3": 68.0,
-            "4": 57.0,
-            "5": 55.0,
-            "6": 70.0,
-            "7": 73.0,
-            "8": 58.0,
-            "9": 66.0,
-            "10": 65.0,
-            "11": 58.0,
-            "12": 77.0,
-            "13": 50.0,
-            "14": 65.0,
-            "15": 71.0,
-            "16": 68.0,
-            "17": 58.0,
-            "18": 57.0,
-            "19": 68.0,
-            "20": 61.0,
-            "21": 65.0,
-            "22": 57.0,
-            "23": 83.0,
-            "24": 58.0,
-            "25": 66.0,
-            "26": 63.0,
-            "27": 80.0,
-            "28": 82.0,
-            "29": 72.0,
-            "30": 71.0,
-            "31": 68.0,
-            "32": 75.0,
-            "33": 85.0,
-            "34": 63.0,
-            "35": 69.0,
-            "36": 58.0,
-            "37": 83.0,
-            "38": 65.0,
-            "39": 68.0,
-            "40": 81.0,
-            "41": 72.0,
-            "42": 76.0,
-            "43": 84.0,
-            "44": 85.0,
-            "45": 85.0,
-            "46": 79.0,
-            "47": 81.0,
-            "48": 68.0,
-            "49": 89.0,
-            "50": 80.0,
-            "51": 70.0,
-            "52": 81.0,
-            "53": 95.0,
-            "54": 101.0,
-            "55": 58.0,
-            "56": 90.0,
-            "57": 83.0,
-            "58": 90.0,
-            "59": 79.0,
-            "60": 84.0,
-            "61": 92.0,
-            "62": 102.0,
-            "63": 78.0,
-            "64": 73.0,
-            "65": 81.0,
-            "66": 88.0,
-            "67": 54.0,
-            "68": 57.0,
-            "69": 72.0,
-            "70": 88.0,
-            "71": 82.0,
-            "72": 64.0,
-            "73": 78.0,
-            "74": 76.0,
-            "75": 70.0,
-            "76": 78.0,
-            "77": 67.0,
-            "78": 86.0,
-            "79": 76.0,
-            "80": 90.0,
-            "81": 92.0,
-            "82": 72.0,
-            "83": 61.0,
-            "84": 65.0,
-            "85": 89.0,
-            "86": 73.0,
-            "87": 89.0,
-            "88": 63.0,
-            "89": 83.0,
-            "90": 72.0,
-            "91": 55.0,
-            "92": 63.0,
-            "93": 47.0,
-            "94": 74.0,
-            "95": 70.0,
-            "96": 73.0,
-            "97": 80.0,
-            "98": 76.0,
-            "99": 68.0,
-            "100": 75.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 546472448.0,
-            "2": 546472448.0,
-            "3": 546472448.0,
-            "4": 546472448.0,
-            "5": 546472448.0,
-            "6": 546472448.0,
-            "7": 546472448.0,
-            "8": 546472448.0,
-            "9": 546472448.0,
-            "10": 546472448.0,
-            "11": 546472448.0,
-            "12": 546472448.0,
-            "13": 546472448.0,
-            "14": 546472448.0,
-            "15": 546472448.0,
-            "16": 546472448.0,
-            "17": 546472448.0,
-            "18": 546472448.0,
-            "19": 546472448.0,
-            "20": 546472448.0,
-            "21": 546472448.0,
-            "22": 546472448.0,
-            "23": 546472448.0,
-            "24": 546472448.0,
-            "25": 546472448.0,
-            "26": 546472448.0,
-            "27": 546472448.0,
-            "28": 546472448.0,
-            "29": 546472448.0,
-            "30": 546472448.0,
-            "31": 546472448.0,
-            "32": 546472448.0,
-            "33": 546472448.0,
-            "34": 546472448.0,
-            "35": 546472448.0,
-            "36": 546472448.0,
-            "37": 546472448.0,
-            "38": 546472448.0,
-            "39": 546472448.0,
-            "40": 546472448.0,
-            "41": 546472448.0,
-            "42": 546472448.0,
-            "43": 546472448.0,
-            "44": 546472448.0,
-            "45": 546472448.0,
-            "46": 546472448.0,
-            "47": 546472448.0,
-            "48": 546472448.0,
-            "49": 546472448.0,
-            "50": 546472448.0,
-            "51": 546472448.0,
-            "52": 546472448.0,
-            "53": 546472448.0,
-            "54": 546472448.0,
-            "55": 546472448.0,
-            "56": 546472448.0,
-            "57": 546472448.0,
-            "58": 546472448.0,
-            "59": 546472448.0,
-            "60": 546472448.0,
-            "61": 546472448.0,
-            "62": 546472448.0,
-            "63": 546472448.0,
-            "64": 546472448.0,
-            "65": 546472448.0,
-            "66": 546472448.0,
-            "67": 546472448.0,
-            "68": 546472448.0,
-            "69": 546472448.0,
-            "70": 546472448.0,
-            "71": 546472448.0,
-            "72": 546472448.0,
-            "73": 546472448.0,
-            "74": 546472448.0,
-            "75": 546472448.0,
-            "76": 546472448.0,
-            "77": 546472448.0,
-            "78": 546472448.0,
-            "79": 546472448.0,
-            "80": 546472448.0,
-            "81": 546472448.0,
-            "82": 546472448.0,
-            "83": 546472448.0,
-            "84": 546472448.0,
-            "85": 546472448.0,
-            "86": 546472448.0,
-            "87": 546472448.0,
-            "88": 546472448.0,
-            "89": 546472448.0,
-            "90": 546472448.0,
-            "91": 546472448.0,
-            "92": 546472448.0,
-            "93": 546472448.0,
-            "94": 546472448.0,
-            "95": 546472448.0,
-            "96": 546472448.0,
-            "97": 546472448.0,
-            "98": 546472448.0,
-            "99": 546472448.0,
-            "100": 546472448.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1713796608.0,
-            "2": 1895967232.0,
-            "3": 1895967232.0,
-            "4": 1895967232.0,
-            "5": 1895967232.0,
-            "6": 1895967232.0,
-            "7": 1895967232.0,
-            "8": 1895967232.0,
-            "9": 1895967232.0,
-            "10": 1895967232.0,
-            "11": 1895967232.0,
-            "12": 1895967232.0,
-            "13": 1895967232.0,
-            "14": 1895967232.0,
-            "15": 1895967232.0,
-            "16": 1895967232.0,
-            "17": 1895967232.0,
-            "18": 1895967232.0,
-            "19": 1895967232.0,
-            "20": 1895967232.0,
-            "21": 1895967232.0,
-            "22": 1895967232.0,
-            "23": 1895967232.0,
-            "24": 1895967232.0,
-            "25": 1895967232.0,
-            "26": 1895967232.0,
-            "27": 1895967232.0,
-            "28": 1895967232.0,
-            "29": 1895967232.0,
-            "30": 1895967232.0,
-            "31": 1895967232.0,
-            "32": 1895967232.0,
-            "33": 1895967232.0,
-            "34": 1895967232.0,
-            "35": 1895967232.0,
-            "36": 1895967232.0,
-            "37": 1895967232.0,
-            "38": 1895967232.0,
-            "39": 1895967232.0,
-            "40": 1895967232.0,
-            "41": 1895967232.0,
-            "42": 1895967232.0,
-            "43": 1895967232.0,
-            "44": 1895967232.0,
-            "45": 1895967232.0,
-            "46": 1895967232.0,
-            "47": 1895967232.0,
-            "48": 1895967232.0,
-            "49": 1895967232.0,
-            "50": 1895967232.0,
-            "51": 1895967232.0,
-            "52": 1895967232.0,
-            "53": 1895967232.0,
-            "54": 1895967232.0,
-            "55": 1895967232.0,
-            "56": 1895967232.0,
-            "57": 1895967232.0,
-            "58": 1895967232.0,
-            "59": 1895967232.0,
-            "60": 1895967232.0,
-            "61": 1895967232.0,
-            "62": 1895967232.0,
-            "63": 1895967232.0,
-            "64": 1895967232.0,
-            "65": 1895967232.0,
-            "66": 1895967232.0,
-            "67": 1895967232.0,
-            "68": 1895967232.0,
-            "69": 1895967232.0,
-            "70": 1895967232.0,
-            "71": 1895967232.0,
-            "72": 1895967232.0,
-            "73": 1895967232.0,
-            "74": 1895967232.0,
-            "75": 1895967232.0,
-            "76": 1895967232.0,
-            "77": 1895967232.0,
-            "78": 1895967232.0,
-            "79": 1895967232.0,
-            "80": 1895967232.0,
-            "81": 1895967232.0,
-            "82": 1895967232.0,
-            "83": 1895967232.0,
-            "84": 1895967232.0,
-            "85": 1895967232.0,
-            "86": 1895967232.0,
-            "87": 1895967232.0,
-            "88": 1895967232.0,
-            "89": 1895967232.0,
-            "90": 1895967232.0,
-            "91": 1895967232.0,
-            "92": 1895967232.0,
-            "93": 1895967232.0,
-            "94": 1895967232.0,
-            "95": 1895967232.0,
-            "96": 1895967232.0,
-            "97": 1895967232.0,
-            "98": 1895967232.0,
-            "99": 1895967232.0,
-            "100": 1895967232.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 11.81196,
-            "2": 0.17008,
-            "3": 0.15523,
-            "4": 0.15249,
-            "5": 0.15434,
-            "6": 0.15515,
-            "7": 0.15378,
-            "8": 0.1528,
-            "9": 0.15287,
-            "10": 0.15479,
-            "11": 0.15442,
-            "12": 0.15952,
-            "13": 0.15843,
-            "14": 0.15559,
-            "15": 0.15333,
-            "16": 0.15363,
-            "17": 0.15594,
-            "18": 0.153,
-            "19": 0.15542,
-            "20": 0.15304,
-            "21": 0.15492,
-            "22": 0.15277,
-            "23": 0.15803,
-            "24": 0.1545,
-            "25": 0.15639,
-            "26": 0.15419,
-            "27": 0.15381,
-            "28": 0.15423,
-            "29": 0.15354,
-            "30": 0.1554,
-            "31": 0.15389,
-            "32": 0.15608,
-            "33": 0.15361,
-            "34": 0.15437,
-            "35": 0.15233,
-            "36": 0.15499,
-            "37": 0.15114,
-            "38": 0.15259,
-            "39": 0.15269,
-            "40": 0.1516,
-            "41": 0.15052,
-            "42": 0.15122,
-            "43": 0.15389,
-            "44": 0.15261,
-            "45": 0.15376,
-            "46": 0.15091,
-            "47": 0.15197,
-            "48": 0.15131,
-            "49": 0.15083,
-            "50": 0.152,
-            "51": 0.15723,
-            "52": 0.15481,
-            "53": 0.15087,
-            "54": 0.15175,
-            "55": 0.15331,
-            "56": 0.15504,
-            "57": 0.15471,
-            "58": 0.1549,
-            "59": 0.15621,
-            "60": 0.1533,
-            "61": 0.15499,
-            "62": 0.15222,
-            "63": 0.15091,
-            "64": 0.1535,
-            "65": 0.15463,
-            "66": 0.15169,
-            "67": 0.15591,
-            "68": 0.15173,
-            "69": 0.1509,
-            "70": 0.15063,
-            "71": 0.15755,
-            "72": 0.1545,
-            "73": 0.15374,
-            "74": 0.15306,
-            "75": 0.15223,
-            "76": 0.15203,
-            "77": 0.15194,
-            "78": 0.15284,
-            "79": 0.15345,
-            "80": 0.15138,
-            "81": 0.15298,
-            "82": 0.15115,
-            "83": 0.15281,
-            "84": 0.1544,
-            "85": 0.15277,
-            "86": 0.15368,
-            "87": 0.15373,
-            "88": 0.15359,
-            "89": 0.15205,
-            "90": 0.1535,
-            "91": 0.15459,
-            "92": 0.15406,
-            "93": 0.15133,
-            "94": 0.1533,
-            "95": 0.15198,
-            "96": 0.15195,
-            "97": 0.1533,
-            "98": 0.15406,
-            "99": 0.1528,
-            "100": 0.15371
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/golden_values_dev_dgx_h100.json
index b052742de3f..80f6783f6f2 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/golden_values_dev_dgx_h100.json
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 12.5643,
-            "2": 0.17332,
-            "3": 0.15504,
-            "4": 0.14953,
-            "5": 0.14296,
-            "6": 0.14226,
-            "7": 0.14346,
-            "8": 0.13938,
-            "9": 0.14124,
-            "10": 0.14047,
-            "11": 0.13835,
-            "12": 0.14091,
-            "13": 0.14198,
-            "14": 0.14069,
-            "15": 0.13974,
-            "16": 0.13801,
-            "17": 0.14306,
-            "18": 0.14074,
-            "19": 0.14027,
-            "20": 0.14158,
-            "21": 0.14008,
-            "22": 0.14191,
-            "23": 0.14006,
-            "24": 0.13998,
-            "25": 0.13889,
-            "26": 0.13978,
-            "27": 0.14315,
-            "28": 0.14416,
-            "29": 0.154,
-            "30": 0.14026,
-            "31": 0.14128,
-            "32": 0.14142,
-            "33": 0.14025,
-            "34": 0.14164,
-            "35": 0.14065,
-            "36": 0.14236,
-            "37": 0.13962,
-            "38": 0.14015,
-            "39": 0.1412,
-            "40": 0.14042,
-            "41": 0.14202,
-            "42": 0.14116,
-            "43": 0.1402,
-            "44": 0.14155,
-            "45": 0.13981,
-            "46": 0.14102,
-            "47": 0.13959,
-            "48": 0.14118,
-            "49": 0.14576,
-            "50": 0.14714,
-            "51": 0.14965,
-            "52": 0.14244,
-            "53": 0.14198,
-            "54": 0.14102,
-            "55": 0.1404,
-            "56": 0.14132,
-            "57": 0.14,
-            "58": 0.14143,
-            "59": 0.16106,
-            "60": 0.15695,
-            "61": 0.15431,
-            "62": 0.14815,
-            "63": 0.14032,
-            "64": 0.14044,
-            "65": 0.14332,
-            "66": 0.14167,
-            "67": 0.14533,
-            "68": 0.1417,
-            "69": 0.14266,
-            "70": 0.14095,
-            "71": 0.14063,
-            "72": 0.1428,
-            "73": 0.14351,
-            "74": 0.14269,
-            "75": 0.14075,
-            "76": 0.14214,
-            "77": 0.14239,
-            "78": 0.1408,
-            "79": 0.14254,
-            "80": 0.14178,
-            "81": 0.14443,
-            "82": 0.14301,
-            "83": 0.14097,
-            "84": 0.14255,
-            "85": 0.14113,
-            "86": 0.14391,
-            "87": 0.14098,
-            "88": 0.16001,
-            "89": 0.15765,
-            "90": 0.1598,
-            "91": 0.16005,
-            "92": 0.14828,
-            "93": 0.15228,
-            "94": 0.15292,
-            "95": 0.14998,
-            "96": 0.14946,
-            "97": 0.15122,
-            "98": 0.144,
-            "99": 0.14325,
-            "100": 0.14483
+            "1": 12.65353,
+            "2": 0.15729,
+            "3": 0.13911,
+            "4": 0.14117,
+            "5": 0.14172,
+            "6": 0.14091,
+            "7": 0.14103,
+            "8": 0.14008,
+            "9": 0.14444,
+            "10": 0.14215,
+            "11": 0.143,
+            "12": 0.14395,
+            "13": 0.14101,
+            "14": 0.14112,
+            "15": 0.14126,
+            "16": 0.14286,
+            "17": 0.14201,
+            "18": 0.14405,
+            "19": 0.14472,
+            "20": 0.14424,
+            "21": 0.14746,
+            "22": 0.14732,
+            "23": 0.14871,
+            "24": 0.14885,
+            "25": 0.14732,
+            "26": 0.14775,
+            "27": 0.14978,
+            "28": 0.14685,
+            "29": 0.15004,
+            "30": 0.14663,
+            "31": 0.14925,
+            "32": 0.14679,
+            "33": 0.14465,
+            "34": 0.14701,
+            "35": 0.14556,
+            "36": 0.14835,
+            "37": 0.14562,
+            "38": 0.14971,
+            "39": 0.14881,
+            "40": 0.14688,
+            "41": 0.14373,
+            "42": 0.14577,
+            "43": 0.14595,
+            "44": 0.1465,
+            "45": 0.14283,
+            "46": 0.14194,
+            "47": 0.14334,
+            "48": 0.14235,
+            "49": 0.14347,
+            "50": 0.14228,
+            "51": 0.14946,
+            "52": 0.14427,
+            "53": 0.14469,
+            "54": 0.14466,
+            "55": 0.14197,
+            "56": 0.14396,
+            "57": 0.14283,
+            "58": 0.14383,
+            "59": 0.14201,
+            "60": 0.14448,
+            "61": 0.14593,
+            "62": 0.14316,
+            "63": 0.14235,
+            "64": 0.14447,
+            "65": 0.14383,
+            "66": 0.14456,
+            "67": 0.14508,
+            "68": 0.1452,
+            "69": 0.14518,
+            "70": 0.1449,
+            "71": 0.14576,
+            "72": 0.14328,
+            "73": 0.14352,
+            "74": 0.1504,
+            "75": 0.15058,
+            "76": 0.14825,
+            "77": 0.14229,
+            "78": 0.14494,
+            "79": 0.14518,
+            "80": 0.14464,
+            "81": 0.1461,
+            "82": 0.14482,
+            "83": 0.14487,
+            "84": 0.14272,
+            "85": 0.14154,
+            "86": 0.14252,
+            "87": 0.1447,
+            "88": 0.14327,
+            "89": 0.1441,
+            "90": 0.14688,
+            "91": 0.14346,
+            "92": 0.14427,
+            "93": 0.14222,
+            "94": 0.14464,
+            "95": 0.14507,
+            "96": 0.14196,
+            "97": 0.1438,
+            "98": 0.14103,
+            "99": 0.14644,
+            "100": 0.14474
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 055edccd6a0..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.85949,
-            "2": 10.85553,
-            "3": 10.86546,
-            "4": 10.84554,
-            "5": 10.88348,
-            "6": 10.89432,
-            "7": 10.87067,
-            "8": 10.86981,
-            "9": 10.86919,
-            "10": 10.83887,
-            "11": 10.89435,
-            "12": 10.87982,
-            "13": 10.87988,
-            "14": 10.90314,
-            "15": 10.8405,
-            "16": 10.83786,
-            "17": 10.80668,
-            "18": 10.83027,
-            "19": 10.82259,
-            "20": 10.73192,
-            "21": 10.70753,
-            "22": 10.56005,
-            "23": 10.72402,
-            "24": 10.6111,
-            "25": 10.54815,
-            "26": 10.61332,
-            "27": 10.63056,
-            "28": 10.56645,
-            "29": 10.59668,
-            "30": 10.37137,
-            "31": 10.1172,
-            "32": 10.4613,
-            "33": 10.45249,
-            "34": 10.2169,
-            "35": 10.27173,
-            "36": 10.23118,
-            "37": 10.34812,
-            "38": 10.1884,
-            "39": 10.41042,
-            "40": 10.09426,
-            "41": 10.1471,
-            "42": 10.21243,
-            "43": 9.8411,
-            "44": 9.95916,
-            "45": 9.84085,
-            "46": 9.8248,
-            "47": 10.1388,
-            "48": 9.8584,
-            "49": 9.5472,
-            "50": 9.90878,
-            "51": 9.85583,
-            "52": 9.75242,
-            "53": 10.07589,
-            "54": 9.95688,
-            "55": 9.88208,
-            "56": 9.63141,
-            "57": 9.48651,
-            "58": 9.83118,
-            "59": 9.58905,
-            "60": 9.50651,
-            "61": 9.7037,
-            "62": 9.98291,
-            "63": 9.38315,
-            "64": 9.77906,
-            "65": 8.95179,
-            "66": 9.7016,
-            "67": 9.37206,
-            "68": 9.78852,
-            "69": 9.79859,
-            "70": 9.74746,
-            "71": 9.6191,
-            "72": 9.58502,
-            "73": 9.49725,
-            "74": 8.93933,
-            "75": 9.42706,
-            "76": 9.08024,
-            "77": 10.06571,
-            "78": 9.72896,
-            "79": 9.37772,
-            "80": 9.40999,
-            "81": 9.47983,
-            "82": 9.70184,
-            "83": 9.30625,
-            "84": 9.42095,
-            "85": 9.61378,
-            "86": 9.07656,
-            "87": 9.59458,
-            "88": 9.75068,
-            "89": 9.60243,
-            "90": 9.81901,
-            "91": 9.33899,
-            "92": 9.35717,
-            "93": 9.07883,
-            "94": 8.8351,
-            "95": 9.52171,
-            "96": 9.53008,
-            "97": 9.31309,
-            "98": 9.67785,
-            "99": 8.89061,
-            "100": 9.39726
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1638.0,
-            "2": 1766.0,
-            "3": 1620.0,
-            "4": 1810.0,
-            "5": 1844.0,
-            "6": 1835.0,
-            "7": 1694.0,
-            "8": 1632.0,
-            "9": 1902.0,
-            "10": 1427.0,
-            "11": 1932.0,
-            "12": 1705.0,
-            "13": 1834.0,
-            "14": 1807.0,
-            "15": 1907.0,
-            "16": 1797.0,
-            "17": 1911.0,
-            "18": 1667.0,
-            "19": 1742.0,
-            "20": 1662.0,
-            "21": 1853.0,
-            "22": 1621.0,
-            "23": 2010.0,
-            "24": 1546.0,
-            "25": 1510.0,
-            "26": 1664.0,
-            "27": 1722.0,
-            "28": 1977.0,
-            "29": 2024.0,
-            "30": 1873.0,
-            "31": 1494.0,
-            "32": 1890.0,
-            "33": 2067.0,
-            "34": 1802.0,
-            "35": 1873.0,
-            "36": 1954.0,
-            "37": 2283.0,
-            "38": 2076.0,
-            "39": 2280.0,
-            "40": 2111.0,
-            "41": 2318.0,
-            "42": 2206.0,
-            "43": 2040.0,
-            "44": 2088.0,
-            "45": 2181.0,
-            "46": 2434.0,
-            "47": 2446.0,
-            "48": 2481.0,
-            "49": 2398.0,
-            "50": 2410.0,
-            "51": 2528.0,
-            "52": 2535.0,
-            "53": 2875.0,
-            "54": 2862.0,
-            "55": 2406.0,
-            "56": 2733.0,
-            "57": 2347.0,
-            "58": 2918.0,
-            "59": 2759.0,
-            "60": 2404.0,
-            "61": 3022.0,
-            "62": 2494.0,
-            "63": 2452.0,
-            "64": 2838.0,
-            "65": 2549.0,
-            "66": 3044.0,
-            "67": 2887.0,
-            "68": 2637.0,
-            "69": 2860.0,
-            "70": 3034.0,
-            "71": 2989.0,
-            "72": 2355.0,
-            "73": 3034.0,
-            "74": 1904.0,
-            "75": 2538.0,
-            "76": 3012.0,
-            "77": 3193.0,
-            "78": 2994.0,
-            "79": 3097.0,
-            "80": 3254.0,
-            "81": 3671.0,
-            "82": 3299.0,
-            "83": 2793.0,
-            "84": 3146.0,
-            "85": 3329.0,
-            "86": 2769.0,
-            "87": 3766.0,
-            "88": 3021.0,
-            "89": 3286.0,
-            "90": 3029.0,
-            "91": 2772.0,
-            "92": 2955.0,
-            "93": 2852.0,
-            "94": 3411.0,
-            "95": 3271.0,
-            "96": 3279.0,
-            "97": 3054.0,
-            "98": 3643.0,
-            "99": 3303.0,
-            "100": 3142.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 518291968.0,
-            "2": 518291968.0,
-            "3": 518291968.0,
-            "4": 518291968.0,
-            "5": 518291968.0,
-            "6": 518291968.0,
-            "7": 518291968.0,
-            "8": 518291968.0,
-            "9": 518291968.0,
-            "10": 518291968.0,
-            "11": 518291968.0,
-            "12": 518291968.0,
-            "13": 518291968.0,
-            "14": 518291968.0,
-            "15": 518291968.0,
-            "16": 518291968.0,
-            "17": 518291968.0,
-            "18": 518291968.0,
-            "19": 518291968.0,
-            "20": 518291968.0,
-            "21": 518291968.0,
-            "22": 518291968.0,
-            "23": 518291968.0,
-            "24": 518291968.0,
-            "25": 518291968.0,
-            "26": 518291968.0,
-            "27": 518291968.0,
-            "28": 518291968.0,
-            "29": 518291968.0,
-            "30": 518291968.0,
-            "31": 518291968.0,
-            "32": 518291968.0,
-            "33": 518291968.0,
-            "34": 518291968.0,
-            "35": 518291968.0,
-            "36": 518291968.0,
-            "37": 518291968.0,
-            "38": 518291968.0,
-            "39": 518291968.0,
-            "40": 518291968.0,
-            "41": 518291968.0,
-            "42": 518291968.0,
-            "43": 518291968.0,
-            "44": 518291968.0,
-            "45": 518291968.0,
-            "46": 518291968.0,
-            "47": 518291968.0,
-            "48": 518291968.0,
-            "49": 518291968.0,
-            "50": 518291968.0,
-            "51": 518291968.0,
-            "52": 518291968.0,
-            "53": 518291968.0,
-            "54": 518291968.0,
-            "55": 518291968.0,
-            "56": 518291968.0,
-            "57": 518291968.0,
-            "58": 518291968.0,
-            "59": 518291968.0,
-            "60": 518291968.0,
-            "61": 518291968.0,
-            "62": 518291968.0,
-            "63": 518291968.0,
-            "64": 518291968.0,
-            "65": 518291968.0,
-            "66": 518291968.0,
-            "67": 518291968.0,
-            "68": 518291968.0,
-            "69": 518291968.0,
-            "70": 518291968.0,
-            "71": 518291968.0,
-            "72": 518291968.0,
-            "73": 518291968.0,
-            "74": 518291968.0,
-            "75": 518291968.0,
-            "76": 518291968.0,
-            "77": 518291968.0,
-            "78": 518291968.0,
-            "79": 518291968.0,
-            "80": 518291968.0,
-            "81": 518291968.0,
-            "82": 518291968.0,
-            "83": 518291968.0,
-            "84": 518291968.0,
-            "85": 518291968.0,
-            "86": 518291968.0,
-            "87": 518291968.0,
-            "88": 518291968.0,
-            "89": 518291968.0,
-            "90": 518291968.0,
-            "91": 518291968.0,
-            "92": 518291968.0,
-            "93": 518291968.0,
-            "94": 518291968.0,
-            "95": 518291968.0,
-            "96": 518291968.0,
-            "97": 518291968.0,
-            "98": 518291968.0,
-            "99": 518291968.0,
-            "100": 518291968.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1245476352.0,
-            "2": 1429481984.0,
-            "3": 1429481984.0,
-            "4": 1429481984.0,
-            "5": 1429481984.0,
-            "6": 1429481984.0,
-            "7": 1429481984.0,
-            "8": 1429481984.0,
-            "9": 1429481984.0,
-            "10": 1429481984.0,
-            "11": 1429481984.0,
-            "12": 1429481984.0,
-            "13": 1429481984.0,
-            "14": 1429481984.0,
-            "15": 1429481984.0,
-            "16": 1429481984.0,
-            "17": 1429481984.0,
-            "18": 1429481984.0,
-            "19": 1429481984.0,
-            "20": 1429481984.0,
-            "21": 1429481984.0,
-            "22": 1429481984.0,
-            "23": 1429481984.0,
-            "24": 1429481984.0,
-            "25": 1429481984.0,
-            "26": 1429481984.0,
-            "27": 1429481984.0,
-            "28": 1429481984.0,
-            "29": 1429481984.0,
-            "30": 1429481984.0,
-            "31": 1429481984.0,
-            "32": 1429481984.0,
-            "33": 1429481984.0,
-            "34": 1429481984.0,
-            "35": 1429481984.0,
-            "36": 1429481984.0,
-            "37": 1429481984.0,
-            "38": 1429481984.0,
-            "39": 1429481984.0,
-            "40": 1429481984.0,
-            "41": 1429481984.0,
-            "42": 1429481984.0,
-            "43": 1429481984.0,
-            "44": 1429481984.0,
-            "45": 1429481984.0,
-            "46": 1429481984.0,
-            "47": 1430268416.0,
-            "48": 1430268416.0,
-            "49": 1430268416.0,
-            "50": 1430268416.0,
-            "51": 1430268416.0,
-            "52": 1430268416.0,
-            "53": 1430268416.0,
-            "54": 1430268416.0,
-            "55": 1430268416.0,
-            "56": 1430268416.0,
-            "57": 1430268416.0,
-            "58": 1430268416.0,
-            "59": 1430268416.0,
-            "60": 1430268416.0,
-            "61": 1430268416.0,
-            "62": 1430268416.0,
-            "63": 1430268416.0,
-            "64": 1430268416.0,
-            "65": 1430268416.0,
-            "66": 1430268416.0,
-            "67": 1430268416.0,
-            "68": 1430268416.0,
-            "69": 1430268416.0,
-            "70": 1430268416.0,
-            "71": 1430268416.0,
-            "72": 1430268416.0,
-            "73": 1430268416.0,
-            "74": 1430268416.0,
-            "75": 1430268416.0,
-            "76": 1430268416.0,
-            "77": 1430268416.0,
-            "78": 1430268416.0,
-            "79": 1430268416.0,
-            "80": 1430268416.0,
-            "81": 1430268416.0,
-            "82": 1430268416.0,
-            "83": 1430268416.0,
-            "84": 1430268416.0,
-            "85": 1430268416.0,
-            "86": 1430268416.0,
-            "87": 1430268416.0,
-            "88": 1430268416.0,
-            "89": 1430268416.0,
-            "90": 1430268416.0,
-            "91": 1430268416.0,
-            "92": 1430268416.0,
-            "93": 1430268416.0,
-            "94": 1430268416.0,
-            "95": 1430268416.0,
-            "96": 1430268416.0,
-            "97": 1430268416.0,
-            "98": 1430268416.0,
-            "99": 1430268416.0,
-            "100": 1430268416.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 12.14048,
-            "2": 0.15305,
-            "3": 0.12206,
-            "4": 0.12159,
-            "5": 0.12338,
-            "6": 0.12232,
-            "7": 0.12178,
-            "8": 0.12116,
-            "9": 0.12378,
-            "10": 0.1213,
-            "11": 0.12099,
-            "12": 0.12066,
-            "13": 0.12326,
-            "14": 0.12143,
-            "15": 0.12173,
-            "16": 0.12258,
-            "17": 0.12137,
-            "18": 0.12235,
-            "19": 0.12098,
-            "20": 0.12175,
-            "21": 0.12124,
-            "22": 0.12047,
-            "23": 0.12106,
-            "24": 0.12167,
-            "25": 0.12151,
-            "26": 0.12085,
-            "27": 0.12129,
-            "28": 0.1211,
-            "29": 0.12093,
-            "30": 0.12007,
-            "31": 0.12104,
-            "32": 0.12256,
-            "33": 0.12191,
-            "34": 0.12633,
-            "35": 0.13877,
-            "36": 0.13281,
-            "37": 0.12383,
-            "38": 0.12319,
-            "39": 0.12304,
-            "40": 0.12247,
-            "41": 0.1226,
-            "42": 0.12481,
-            "43": 0.12769,
-            "44": 0.12464,
-            "45": 0.12374,
-            "46": 0.12839,
-            "47": 0.12264,
-            "48": 0.13199,
-            "49": 0.12462,
-            "50": 0.12201,
-            "51": 0.125,
-            "52": 0.13707,
-            "53": 0.12341,
-            "54": 0.12318,
-            "55": 0.12261,
-            "56": 0.12283,
-            "57": 0.12341,
-            "58": 0.12301,
-            "59": 0.12419,
-            "60": 0.12361,
-            "61": 0.12424,
-            "62": 0.12437,
-            "63": 0.12354,
-            "64": 0.12246,
-            "65": 0.12204,
-            "66": 0.1235,
-            "67": 0.12315,
-            "68": 0.12287,
-            "69": 0.12129,
-            "70": 0.12211,
-            "71": 0.12216,
-            "72": 0.12316,
-            "73": 0.12246,
-            "74": 0.12156,
-            "75": 0.12321,
-            "76": 0.12274,
-            "77": 0.12488,
-            "78": 0.12309,
-            "79": 0.12392,
-            "80": 0.12291,
-            "81": 0.12432,
-            "82": 0.1239,
-            "83": 0.12342,
-            "84": 0.12131,
-            "85": 0.12225,
-            "86": 0.12172,
-            "87": 0.12084,
-            "88": 0.12493,
-            "89": 0.12176,
-            "90": 0.12578,
-            "91": 0.12256,
-            "92": 0.12137,
-            "93": 0.12208,
-            "94": 0.12379,
-            "95": 0.12088,
-            "96": 0.12458,
-            "97": 0.12217,
-            "98": 0.12238,
-            "99": 0.12101,
-            "100": 0.12165
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index 80f6783f6f2..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.85949,
-            "2": 10.85553,
-            "3": 10.86546,
-            "4": 10.84554,
-            "5": 10.88348,
-            "6": 10.89432,
-            "7": 10.87067,
-            "8": 10.86981,
-            "9": 10.86919,
-            "10": 10.83887,
-            "11": 10.89435,
-            "12": 10.87982,
-            "13": 10.87988,
-            "14": 10.90314,
-            "15": 10.8405,
-            "16": 10.83786,
-            "17": 10.80668,
-            "18": 10.83027,
-            "19": 10.82259,
-            "20": 10.73192,
-            "21": 10.70753,
-            "22": 10.56005,
-            "23": 10.72402,
-            "24": 10.6111,
-            "25": 10.54815,
-            "26": 10.61332,
-            "27": 10.63056,
-            "28": 10.56645,
-            "29": 10.59668,
-            "30": 10.37137,
-            "31": 10.1172,
-            "32": 10.4613,
-            "33": 10.45249,
-            "34": 10.2169,
-            "35": 10.27173,
-            "36": 10.23118,
-            "37": 10.34812,
-            "38": 10.1884,
-            "39": 10.41042,
-            "40": 10.09426,
-            "41": 10.1471,
-            "42": 10.21243,
-            "43": 9.8411,
-            "44": 9.95916,
-            "45": 9.84085,
-            "46": 9.8248,
-            "47": 10.1388,
-            "48": 9.8584,
-            "49": 9.5472,
-            "50": 9.90878,
-            "51": 9.85583,
-            "52": 9.75242,
-            "53": 10.07589,
-            "54": 9.95688,
-            "55": 9.88208,
-            "56": 9.63141,
-            "57": 9.48651,
-            "58": 9.83118,
-            "59": 9.58905,
-            "60": 9.50651,
-            "61": 9.7037,
-            "62": 9.98291,
-            "63": 9.38315,
-            "64": 9.77906,
-            "65": 8.95179,
-            "66": 9.7016,
-            "67": 9.37206,
-            "68": 9.78852,
-            "69": 9.79859,
-            "70": 9.74746,
-            "71": 9.6191,
-            "72": 9.58502,
-            "73": 9.49725,
-            "74": 8.93933,
-            "75": 9.42706,
-            "76": 9.08024,
-            "77": 10.06571,
-            "78": 9.72896,
-            "79": 9.37772,
-            "80": 9.40999,
-            "81": 9.47983,
-            "82": 9.70184,
-            "83": 9.30625,
-            "84": 9.42095,
-            "85": 9.61378,
-            "86": 9.07656,
-            "87": 9.59458,
-            "88": 9.75068,
-            "89": 9.60243,
-            "90": 9.81901,
-            "91": 9.33899,
-            "92": 9.35717,
-            "93": 9.07883,
-            "94": 8.8351,
-            "95": 9.52171,
-            "96": 9.53008,
-            "97": 9.31309,
-            "98": 9.67785,
-            "99": 8.89061,
-            "100": 9.39726
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1638.0,
-            "2": 1766.0,
-            "3": 1620.0,
-            "4": 1810.0,
-            "5": 1844.0,
-            "6": 1835.0,
-            "7": 1694.0,
-            "8": 1632.0,
-            "9": 1902.0,
-            "10": 1427.0,
-            "11": 1932.0,
-            "12": 1705.0,
-            "13": 1834.0,
-            "14": 1807.0,
-            "15": 1907.0,
-            "16": 1797.0,
-            "17": 1911.0,
-            "18": 1667.0,
-            "19": 1742.0,
-            "20": 1662.0,
-            "21": 1853.0,
-            "22": 1621.0,
-            "23": 2010.0,
-            "24": 1546.0,
-            "25": 1510.0,
-            "26": 1664.0,
-            "27": 1722.0,
-            "28": 1977.0,
-            "29": 2024.0,
-            "30": 1873.0,
-            "31": 1494.0,
-            "32": 1890.0,
-            "33": 2067.0,
-            "34": 1802.0,
-            "35": 1873.0,
-            "36": 1954.0,
-            "37": 2283.0,
-            "38": 2076.0,
-            "39": 2280.0,
-            "40": 2111.0,
-            "41": 2318.0,
-            "42": 2206.0,
-            "43": 2040.0,
-            "44": 2088.0,
-            "45": 2181.0,
-            "46": 2434.0,
-            "47": 2446.0,
-            "48": 2481.0,
-            "49": 2398.0,
-            "50": 2410.0,
-            "51": 2528.0,
-            "52": 2535.0,
-            "53": 2875.0,
-            "54": 2862.0,
-            "55": 2406.0,
-            "56": 2733.0,
-            "57": 2347.0,
-            "58": 2918.0,
-            "59": 2759.0,
-            "60": 2404.0,
-            "61": 3022.0,
-            "62": 2494.0,
-            "63": 2452.0,
-            "64": 2838.0,
-            "65": 2549.0,
-            "66": 3044.0,
-            "67": 2887.0,
-            "68": 2637.0,
-            "69": 2860.0,
-            "70": 3034.0,
-            "71": 2989.0,
-            "72": 2355.0,
-            "73": 3034.0,
-            "74": 1904.0,
-            "75": 2538.0,
-            "76": 3012.0,
-            "77": 3193.0,
-            "78": 2994.0,
-            "79": 3097.0,
-            "80": 3254.0,
-            "81": 3671.0,
-            "82": 3299.0,
-            "83": 2793.0,
-            "84": 3146.0,
-            "85": 3329.0,
-            "86": 2769.0,
-            "87": 3766.0,
-            "88": 3021.0,
-            "89": 3286.0,
-            "90": 3029.0,
-            "91": 2772.0,
-            "92": 2955.0,
-            "93": 2852.0,
-            "94": 3411.0,
-            "95": 3271.0,
-            "96": 3279.0,
-            "97": 3054.0,
-            "98": 3643.0,
-            "99": 3303.0,
-            "100": 3142.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 518291968.0,
-            "2": 518291968.0,
-            "3": 518291968.0,
-            "4": 518291968.0,
-            "5": 518291968.0,
-            "6": 518291968.0,
-            "7": 518291968.0,
-            "8": 518291968.0,
-            "9": 518291968.0,
-            "10": 518291968.0,
-            "11": 518291968.0,
-            "12": 518291968.0,
-            "13": 518291968.0,
-            "14": 518291968.0,
-            "15": 518291968.0,
-            "16": 518291968.0,
-            "17": 518291968.0,
-            "18": 518291968.0,
-            "19": 518291968.0,
-            "20": 518291968.0,
-            "21": 518291968.0,
-            "22": 518291968.0,
-            "23": 518291968.0,
-            "24": 518291968.0,
-            "25": 518291968.0,
-            "26": 518291968.0,
-            "27": 518291968.0,
-            "28": 518291968.0,
-            "29": 518291968.0,
-            "30": 518291968.0,
-            "31": 518291968.0,
-            "32": 518291968.0,
-            "33": 518291968.0,
-            "34": 518291968.0,
-            "35": 518291968.0,
-            "36": 518291968.0,
-            "37": 518291968.0,
-            "38": 518291968.0,
-            "39": 518291968.0,
-            "40": 518291968.0,
-            "41": 518291968.0,
-            "42": 518291968.0,
-            "43": 518291968.0,
-            "44": 518291968.0,
-            "45": 518291968.0,
-            "46": 518291968.0,
-            "47": 518291968.0,
-            "48": 518291968.0,
-            "49": 518291968.0,
-            "50": 518291968.0,
-            "51": 518291968.0,
-            "52": 518291968.0,
-            "53": 518291968.0,
-            "54": 518291968.0,
-            "55": 518291968.0,
-            "56": 518291968.0,
-            "57": 518291968.0,
-            "58": 518291968.0,
-            "59": 518291968.0,
-            "60": 518291968.0,
-            "61": 518291968.0,
-            "62": 518291968.0,
-            "63": 518291968.0,
-            "64": 518291968.0,
-            "65": 518291968.0,
-            "66": 518291968.0,
-            "67": 518291968.0,
-            "68": 518291968.0,
-            "69": 518291968.0,
-            "70": 518291968.0,
-            "71": 518291968.0,
-            "72": 518291968.0,
-            "73": 518291968.0,
-            "74": 518291968.0,
-            "75": 518291968.0,
-            "76": 518291968.0,
-            "77": 518291968.0,
-            "78": 518291968.0,
-            "79": 518291968.0,
-            "80": 518291968.0,
-            "81": 518291968.0,
-            "82": 518291968.0,
-            "83": 518291968.0,
-            "84": 518291968.0,
-            "85": 518291968.0,
-            "86": 518291968.0,
-            "87": 518291968.0,
-            "88": 518291968.0,
-            "89": 518291968.0,
-            "90": 518291968.0,
-            "91": 518291968.0,
-            "92": 518291968.0,
-            "93": 518291968.0,
-            "94": 518291968.0,
-            "95": 518291968.0,
-            "96": 518291968.0,
-            "97": 518291968.0,
-            "98": 518291968.0,
-            "99": 518291968.0,
-            "100": 518291968.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1245476352.0,
-            "2": 1429481984.0,
-            "3": 1429481984.0,
-            "4": 1429481984.0,
-            "5": 1429481984.0,
-            "6": 1429481984.0,
-            "7": 1429481984.0,
-            "8": 1429481984.0,
-            "9": 1429481984.0,
-            "10": 1429481984.0,
-            "11": 1429481984.0,
-            "12": 1429481984.0,
-            "13": 1429481984.0,
-            "14": 1429481984.0,
-            "15": 1429481984.0,
-            "16": 1429481984.0,
-            "17": 1429481984.0,
-            "18": 1429481984.0,
-            "19": 1429481984.0,
-            "20": 1429481984.0,
-            "21": 1429481984.0,
-            "22": 1429481984.0,
-            "23": 1429481984.0,
-            "24": 1429481984.0,
-            "25": 1429481984.0,
-            "26": 1429481984.0,
-            "27": 1429481984.0,
-            "28": 1429481984.0,
-            "29": 1429481984.0,
-            "30": 1429481984.0,
-            "31": 1429481984.0,
-            "32": 1429481984.0,
-            "33": 1429481984.0,
-            "34": 1429481984.0,
-            "35": 1429481984.0,
-            "36": 1429481984.0,
-            "37": 1429481984.0,
-            "38": 1429481984.0,
-            "39": 1429481984.0,
-            "40": 1429481984.0,
-            "41": 1429481984.0,
-            "42": 1429481984.0,
-            "43": 1429481984.0,
-            "44": 1429481984.0,
-            "45": 1429481984.0,
-            "46": 1429481984.0,
-            "47": 1429481984.0,
-            "48": 1429481984.0,
-            "49": 1429481984.0,
-            "50": 1429481984.0,
-            "51": 1429481984.0,
-            "52": 1429481984.0,
-            "53": 1429481984.0,
-            "54": 1429481984.0,
-            "55": 1429481984.0,
-            "56": 1429481984.0,
-            "57": 1429481984.0,
-            "58": 1429481984.0,
-            "59": 1429481984.0,
-            "60": 1429481984.0,
-            "61": 1429481984.0,
-            "62": 1429481984.0,
-            "63": 1429481984.0,
-            "64": 1429481984.0,
-            "65": 1429481984.0,
-            "66": 1429481984.0,
-            "67": 1429481984.0,
-            "68": 1429481984.0,
-            "69": 1429481984.0,
-            "70": 1429481984.0,
-            "71": 1429481984.0,
-            "72": 1429481984.0,
-            "73": 1429481984.0,
-            "74": 1429481984.0,
-            "75": 1429481984.0,
-            "76": 1429481984.0,
-            "77": 1429481984.0,
-            "78": 1429481984.0,
-            "79": 1429481984.0,
-            "80": 1429481984.0,
-            "81": 1429481984.0,
-            "82": 1429481984.0,
-            "83": 1429481984.0,
-            "84": 1429481984.0,
-            "85": 1429481984.0,
-            "86": 1429481984.0,
-            "87": 1429481984.0,
-            "88": 1429481984.0,
-            "89": 1429481984.0,
-            "90": 1429481984.0,
-            "91": 1429481984.0,
-            "92": 1429481984.0,
-            "93": 1429481984.0,
-            "94": 1429481984.0,
-            "95": 1429481984.0,
-            "96": 1429481984.0,
-            "97": 1429481984.0,
-            "98": 1429481984.0,
-            "99": 1429481984.0,
-            "100": 1429481984.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 12.65353,
-            "2": 0.15729,
-            "3": 0.13911,
-            "4": 0.14117,
-            "5": 0.14172,
-            "6": 0.14091,
-            "7": 0.14103,
-            "8": 0.14008,
-            "9": 0.14444,
-            "10": 0.14215,
-            "11": 0.143,
-            "12": 0.14395,
-            "13": 0.14101,
-            "14": 0.14112,
-            "15": 0.14126,
-            "16": 0.14286,
-            "17": 0.14201,
-            "18": 0.14405,
-            "19": 0.14472,
-            "20": 0.14424,
-            "21": 0.14746,
-            "22": 0.14732,
-            "23": 0.14871,
-            "24": 0.14885,
-            "25": 0.14732,
-            "26": 0.14775,
-            "27": 0.14978,
-            "28": 0.14685,
-            "29": 0.15004,
-            "30": 0.14663,
-            "31": 0.14925,
-            "32": 0.14679,
-            "33": 0.14465,
-            "34": 0.14701,
-            "35": 0.14556,
-            "36": 0.14835,
-            "37": 0.14562,
-            "38": 0.14971,
-            "39": 0.14881,
-            "40": 0.14688,
-            "41": 0.14373,
-            "42": 0.14577,
-            "43": 0.14595,
-            "44": 0.1465,
-            "45": 0.14283,
-            "46": 0.14194,
-            "47": 0.14334,
-            "48": 0.14235,
-            "49": 0.14347,
-            "50": 0.14228,
-            "51": 0.14946,
-            "52": 0.14427,
-            "53": 0.14469,
-            "54": 0.14466,
-            "55": 0.14197,
-            "56": 0.14396,
-            "57": 0.14283,
-            "58": 0.14383,
-            "59": 0.14201,
-            "60": 0.14448,
-            "61": 0.14593,
-            "62": 0.14316,
-            "63": 0.14235,
-            "64": 0.14447,
-            "65": 0.14383,
-            "66": 0.14456,
-            "67": 0.14508,
-            "68": 0.1452,
-            "69": 0.14518,
-            "70": 0.1449,
-            "71": 0.14576,
-            "72": 0.14328,
-            "73": 0.14352,
-            "74": 0.1504,
-            "75": 0.15058,
-            "76": 0.14825,
-            "77": 0.14229,
-            "78": 0.14494,
-            "79": 0.14518,
-            "80": 0.14464,
-            "81": 0.1461,
-            "82": 0.14482,
-            "83": 0.14487,
-            "84": 0.14272,
-            "85": 0.14154,
-            "86": 0.14252,
-            "87": 0.1447,
-            "88": 0.14327,
-            "89": 0.1441,
-            "90": 0.14688,
-            "91": 0.14346,
-            "92": 0.14427,
-            "93": 0.14222,
-            "94": 0.14464,
-            "95": 0.14507,
-            "96": 0.14196,
-            "97": 0.1438,
-            "98": 0.14103,
-            "99": 0.14644,
-            "100": 0.14474
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor/golden_values_dev_dgx_h100.json
index 5ac3723f6cb..dbfceceac77 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor/golden_values_dev_dgx_h100.json
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 6.97838,
-            "2": 0.1863,
-            "3": 0.17806,
-            "4": 0.17695,
-            "5": 0.17974,
-            "6": 0.17764,
-            "7": 0.18024,
-            "8": 0.17572,
-            "9": 0.179,
-            "10": 0.17802,
-            "11": 0.17798,
-            "12": 0.18743,
-            "13": 0.18184,
-            "14": 0.18624,
-            "15": 0.1848,
-            "16": 0.18027,
-            "17": 0.17452,
-            "18": 0.17844,
-            "19": 0.17971,
-            "20": 0.17848,
-            "21": 0.17704,
-            "22": 0.17765,
-            "23": 0.17541,
-            "24": 0.17687,
-            "25": 0.1788,
-            "26": 0.17648,
-            "27": 0.17818,
-            "28": 0.17831,
-            "29": 0.17674,
-            "30": 0.17588,
-            "31": 0.17953,
-            "32": 0.17664,
-            "33": 0.17688,
-            "34": 0.17669,
-            "35": 0.1745,
-            "36": 0.1776,
-            "37": 0.17613,
-            "38": 0.17723,
-            "39": 0.17434,
-            "40": 0.17681,
-            "41": 0.17485,
-            "42": 0.17993,
-            "43": 0.174,
-            "44": 0.17741,
-            "45": 0.17457,
-            "46": 0.1789,
-            "47": 0.17735,
-            "48": 0.17895,
-            "49": 0.17421,
-            "50": 0.17774,
-            "51": 0.17494,
-            "52": 0.1787,
-            "53": 0.17718,
-            "54": 0.18021,
-            "55": 0.17484,
-            "56": 0.17693,
-            "57": 0.178,
-            "58": 0.17576,
-            "59": 0.17632,
-            "60": 0.17804,
-            "61": 0.17762,
-            "62": 0.1744,
-            "63": 0.17562,
-            "64": 0.17641,
-            "65": 0.1776,
-            "66": 0.18194,
-            "67": 0.17871,
-            "68": 0.17591,
-            "69": 0.17673,
-            "70": 0.17758,
-            "71": 0.17616,
-            "72": 0.17993,
-            "73": 0.17721,
-            "74": 0.17901,
-            "75": 0.1779,
-            "76": 0.17874,
-            "77": 0.17769,
-            "78": 0.17877,
-            "79": 0.17963,
-            "80": 0.1772,
-            "81": 0.18363,
-            "82": 0.175,
-            "83": 0.17819,
-            "84": 0.17813,
-            "85": 0.17602,
-            "86": 0.17627,
-            "87": 0.17621,
-            "88": 0.17721,
-            "89": 0.17686,
-            "90": 0.17595,
-            "91": 0.17984,
-            "92": 0.17771,
-            "93": 0.17526,
-            "94": 0.17818,
-            "95": 0.17734,
-            "96": 0.18252,
-            "97": 0.186,
-            "98": 0.1736,
-            "99": 0.17768,
-            "100": 0.17699
+            "1": 6.90789,
+            "2": 0.23993,
+            "3": 0.20829,
+            "4": 0.18489,
+            "5": 0.18237,
+            "6": 0.17507,
+            "7": 0.17401,
+            "8": 0.17758,
+            "9": 0.17734,
+            "10": 0.17577,
+            "11": 0.17329,
+            "12": 0.17635,
+            "13": 0.17559,
+            "14": 0.17588,
+            "15": 0.17556,
+            "16": 0.17798,
+            "17": 0.17347,
+            "18": 0.17346,
+            "19": 0.17675,
+            "20": 0.17518,
+            "21": 0.17864,
+            "22": 0.17833,
+            "23": 0.1827,
+            "24": 0.1775,
+            "25": 0.17745,
+            "26": 0.1755,
+            "27": 0.17594,
+            "28": 0.18475,
+            "29": 0.17599,
+            "30": 0.17452,
+            "31": 0.17601,
+            "32": 0.17743,
+            "33": 0.17355,
+            "34": 0.18205,
+            "35": 0.17672,
+            "36": 0.17728,
+            "37": 0.17438,
+            "38": 0.17752,
+            "39": 0.18463,
+            "40": 0.17673,
+            "41": 0.17505,
+            "42": 0.17657,
+            "43": 0.1769,
+            "44": 0.19406,
+            "45": 0.20743,
+            "46": 0.18263,
+            "47": 0.16986,
+            "48": 0.17268,
+            "49": 0.17404,
+            "50": 0.17381,
+            "51": 0.1735,
+            "52": 0.1693,
+            "53": 0.17058,
+            "54": 0.17247,
+            "55": 0.1773,
+            "56": 0.17259,
+            "57": 0.17109,
+            "58": 0.17178,
+            "59": 0.17167,
+            "60": 0.17568,
+            "61": 0.17729,
+            "62": 0.16999,
+            "63": 0.17091,
+            "64": 0.17034,
+            "65": 0.17236,
+            "66": 0.17625,
+            "67": 0.17591,
+            "68": 0.17126,
+            "69": 0.17159,
+            "70": 0.17123,
+            "71": 0.17221,
+            "72": 0.17877,
+            "73": 0.17426,
+            "74": 0.17035,
+            "75": 0.1721,
+            "76": 0.17327,
+            "77": 0.17396,
+            "78": 0.17631,
+            "79": 0.17485,
+            "80": 0.17347,
+            "81": 0.17358,
+            "82": 0.17087,
+            "83": 0.17164,
+            "84": 0.17784,
+            "85": 0.17401,
+            "86": 0.18008,
+            "87": 0.17399,
+            "88": 0.17322,
+            "89": 0.17239,
+            "90": 0.17856,
+            "91": 0.17078,
+            "92": 0.18016,
+            "93": 0.18343,
+            "94": 0.18085,
+            "95": 0.175,
+            "96": 0.17786,
+            "97": 0.17064,
+            "98": 0.17229,
+            "99": 0.17164,
+            "100": 0.20496
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 492a25fb45e..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.84466,
-            "2": 10.84794,
-            "3": 10.84925,
-            "4": 10.84332,
-            "5": 10.88244,
-            "6": 10.88079,
-            "7": 10.86575,
-            "8": 10.85546,
-            "9": 10.85543,
-            "10": 10.81818,
-            "11": 10.88769,
-            "12": 10.8634,
-            "13": 10.86681,
-            "14": 10.88414,
-            "15": 10.82464,
-            "16": 10.82854,
-            "17": 10.79491,
-            "18": 10.81492,
-            "19": 10.80133,
-            "20": 10.7181,
-            "21": 10.69905,
-            "22": 10.56744,
-            "23": 10.717,
-            "24": 10.60443,
-            "25": 10.55007,
-            "26": 10.60907,
-            "27": 10.62028,
-            "28": 10.5752,
-            "29": 10.59624,
-            "30": 10.38327,
-            "31": 10.1537,
-            "32": 10.48026,
-            "33": 10.47378,
-            "34": 10.2366,
-            "35": 10.28843,
-            "36": 10.24838,
-            "37": 10.35354,
-            "38": 10.20794,
-            "39": 10.41884,
-            "40": 10.1122,
-            "41": 10.16092,
-            "42": 10.23301,
-            "43": 9.86118,
-            "44": 9.97698,
-            "45": 9.86493,
-            "46": 9.84883,
-            "47": 10.16617,
-            "48": 9.87132,
-            "49": 9.56691,
-            "50": 9.92114,
-            "51": 9.86695,
-            "52": 9.76956,
-            "53": 10.07809,
-            "54": 9.97027,
-            "55": 9.89683,
-            "56": 9.64394,
-            "57": 9.49728,
-            "58": 9.84867,
-            "59": 9.59977,
-            "60": 9.50631,
-            "61": 9.71011,
-            "62": 9.99101,
-            "63": 9.38968,
-            "64": 9.78595,
-            "65": 8.95983,
-            "66": 9.70876,
-            "67": 9.37892,
-            "68": 9.79599,
-            "69": 9.80666,
-            "70": 9.74795,
-            "71": 9.61779,
-            "72": 9.59127,
-            "73": 9.50398,
-            "74": 8.94624,
-            "75": 9.42942,
-            "76": 9.08423,
-            "77": 10.06698,
-            "78": 9.73256,
-            "79": 9.38117,
-            "80": 9.41061,
-            "81": 9.48289,
-            "82": 9.70492,
-            "83": 9.30713,
-            "84": 9.42241,
-            "85": 9.61802,
-            "86": 9.07631,
-            "87": 9.59382,
-            "88": 9.75419,
-            "89": 9.60093,
-            "90": 9.82013,
-            "91": 9.3407,
-            "92": 9.35717,
-            "93": 9.07927,
-            "94": 8.83613,
-            "95": 9.5223,
-            "96": 9.53379,
-            "97": 9.31633,
-            "98": 9.68007,
-            "99": 8.89242,
-            "100": 9.39964
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1770.0,
-            "2": 1809.0,
-            "3": 1782.0,
-            "4": 1916.0,
-            "5": 1973.0,
-            "6": 1955.0,
-            "7": 2046.0,
-            "8": 1773.0,
-            "9": 1815.0,
-            "10": 1432.0,
-            "11": 1961.0,
-            "12": 1828.0,
-            "13": 1967.0,
-            "14": 1825.0,
-            "15": 1980.0,
-            "16": 1889.0,
-            "17": 1866.0,
-            "18": 1827.0,
-            "19": 1876.0,
-            "20": 1715.0,
-            "21": 2046.0,
-            "22": 1872.0,
-            "23": 2168.0,
-            "24": 1814.0,
-            "25": 1715.0,
-            "26": 1721.0,
-            "27": 1822.0,
-            "28": 2102.0,
-            "29": 2112.0,
-            "30": 2020.0,
-            "31": 1569.0,
-            "32": 2022.0,
-            "33": 2256.0,
-            "34": 1884.0,
-            "35": 2034.0,
-            "36": 2027.0,
-            "37": 2438.0,
-            "38": 2363.0,
-            "39": 2526.0,
-            "40": 2254.0,
-            "41": 2328.0,
-            "42": 2409.0,
-            "43": 2126.0,
-            "44": 2166.0,
-            "45": 2230.0,
-            "46": 2487.0,
-            "47": 2605.0,
-            "48": 2351.0,
-            "49": 2413.0,
-            "50": 2274.0,
-            "51": 2579.0,
-            "52": 2508.0,
-            "53": 2879.0,
-            "54": 2744.0,
-            "55": 2402.0,
-            "56": 2720.0,
-            "57": 2384.0,
-            "58": 3002.0,
-            "59": 2743.0,
-            "60": 2457.0,
-            "61": 2976.0,
-            "62": 2631.0,
-            "63": 2349.0,
-            "64": 3077.0,
-            "65": 2634.0,
-            "66": 3076.0,
-            "67": 2906.0,
-            "68": 2759.0,
-            "69": 2907.0,
-            "70": 3045.0,
-            "71": 3159.0,
-            "72": 2506.0,
-            "73": 2956.0,
-            "74": 1945.0,
-            "75": 2467.0,
-            "76": 2979.0,
-            "77": 3209.0,
-            "78": 3122.0,
-            "79": 3048.0,
-            "80": 3389.0,
-            "81": 3799.0,
-            "82": 3272.0,
-            "83": 2962.0,
-            "84": 3328.0,
-            "85": 3462.0,
-            "86": 3071.0,
-            "87": 3900.0,
-            "88": 3128.0,
-            "89": 3469.0,
-            "90": 3095.0,
-            "91": 2769.0,
-            "92": 3168.0,
-            "93": 2713.0,
-            "94": 3416.0,
-            "95": 3515.0,
-            "96": 3425.0,
-            "97": 3223.0,
-            "98": 3769.0,
-            "99": 3230.0,
-            "100": 3219.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 246998528.0,
-            "2": 246998528.0,
-            "3": 246998528.0,
-            "4": 246998528.0,
-            "5": 246998528.0,
-            "6": 246998528.0,
-            "7": 246998528.0,
-            "8": 246998528.0,
-            "9": 246998528.0,
-            "10": 246998528.0,
-            "11": 246998528.0,
-            "12": 246998528.0,
-            "13": 246998528.0,
-            "14": 246998528.0,
-            "15": 246998528.0,
-            "16": 246998528.0,
-            "17": 246998528.0,
-            "18": 246998528.0,
-            "19": 246998528.0,
-            "20": 246998528.0,
-            "21": 246998528.0,
-            "22": 246998528.0,
-            "23": 246998528.0,
-            "24": 246998528.0,
-            "25": 246998528.0,
-            "26": 246998528.0,
-            "27": 246998528.0,
-            "28": 246998528.0,
-            "29": 246998528.0,
-            "30": 246998528.0,
-            "31": 246998528.0,
-            "32": 246998528.0,
-            "33": 246998528.0,
-            "34": 246998528.0,
-            "35": 246998528.0,
-            "36": 246998528.0,
-            "37": 246998528.0,
-            "38": 246998528.0,
-            "39": 246998528.0,
-            "40": 246998528.0,
-            "41": 246998528.0,
-            "42": 246998528.0,
-            "43": 246998528.0,
-            "44": 246998528.0,
-            "45": 246998528.0,
-            "46": 246998528.0,
-            "47": 246998528.0,
-            "48": 246998528.0,
-            "49": 246998528.0,
-            "50": 246998528.0,
-            "51": 246998528.0,
-            "52": 246998528.0,
-            "53": 246998528.0,
-            "54": 246998528.0,
-            "55": 246998528.0,
-            "56": 246998528.0,
-            "57": 246998528.0,
-            "58": 246998528.0,
-            "59": 246998528.0,
-            "60": 246998528.0,
-            "61": 246998528.0,
-            "62": 246998528.0,
-            "63": 246998528.0,
-            "64": 246998528.0,
-            "65": 246998528.0,
-            "66": 246998528.0,
-            "67": 246998528.0,
-            "68": 246998528.0,
-            "69": 246998528.0,
-            "70": 246998528.0,
-            "71": 246998528.0,
-            "72": 246998528.0,
-            "73": 246998528.0,
-            "74": 246998528.0,
-            "75": 246998528.0,
-            "76": 246998528.0,
-            "77": 246998528.0,
-            "78": 246998528.0,
-            "79": 246998528.0,
-            "80": 246998528.0,
-            "81": 246998528.0,
-            "82": 246998528.0,
-            "83": 246998528.0,
-            "84": 246998528.0,
-            "85": 246998528.0,
-            "86": 246998528.0,
-            "87": 246998528.0,
-            "88": 246998528.0,
-            "89": 246998528.0,
-            "90": 246998528.0,
-            "91": 246998528.0,
-            "92": 246998528.0,
-            "93": 246998528.0,
-            "94": 246998528.0,
-            "95": 246998528.0,
-            "96": 246998528.0,
-            "97": 246998528.0,
-            "98": 246998528.0,
-            "99": 246998528.0,
-            "100": 246998528.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1503207936.0,
-            "2": 1503208960.0,
-            "3": 1503208960.0,
-            "4": 1503208960.0,
-            "5": 1503208960.0,
-            "6": 1503208960.0,
-            "7": 1503208960.0,
-            "8": 1503208960.0,
-            "9": 1503208960.0,
-            "10": 1503208960.0,
-            "11": 1503208960.0,
-            "12": 1503208960.0,
-            "13": 1503208960.0,
-            "14": 1503208960.0,
-            "15": 1503208960.0,
-            "16": 1503208960.0,
-            "17": 1503208960.0,
-            "18": 1503208960.0,
-            "19": 1503208960.0,
-            "20": 1503208960.0,
-            "21": 1503208960.0,
-            "22": 1503208960.0,
-            "23": 1503208960.0,
-            "24": 1503208960.0,
-            "25": 1503208960.0,
-            "26": 1503208960.0,
-            "27": 1503208960.0,
-            "28": 1503208960.0,
-            "29": 1503208960.0,
-            "30": 1503208960.0,
-            "31": 1503208960.0,
-            "32": 1503208960.0,
-            "33": 1503208960.0,
-            "34": 1503208960.0,
-            "35": 1503208960.0,
-            "36": 1503208960.0,
-            "37": 1503208960.0,
-            "38": 1503208960.0,
-            "39": 1503208960.0,
-            "40": 1503208960.0,
-            "41": 1503208960.0,
-            "42": 1503208960.0,
-            "43": 1503208960.0,
-            "44": 1503208960.0,
-            "45": 1503208960.0,
-            "46": 1503208960.0,
-            "47": 1503208960.0,
-            "48": 1503208960.0,
-            "49": 1503208960.0,
-            "50": 1503208960.0,
-            "51": 1503208960.0,
-            "52": 1503208960.0,
-            "53": 1503208960.0,
-            "54": 1503208960.0,
-            "55": 1503208960.0,
-            "56": 1503208960.0,
-            "57": 1503208960.0,
-            "58": 1503208960.0,
-            "59": 1503208960.0,
-            "60": 1503208960.0,
-            "61": 1503208960.0,
-            "62": 1503208960.0,
-            "63": 1503208960.0,
-            "64": 1503208960.0,
-            "65": 1503208960.0,
-            "66": 1503208960.0,
-            "67": 1503208960.0,
-            "68": 1503208960.0,
-            "69": 1503208960.0,
-            "70": 1503208960.0,
-            "71": 1503208960.0,
-            "72": 1503208960.0,
-            "73": 1503208960.0,
-            "74": 1503208960.0,
-            "75": 1503208960.0,
-            "76": 1503208960.0,
-            "77": 1503208960.0,
-            "78": 1503208960.0,
-            "79": 1503208960.0,
-            "80": 1503208960.0,
-            "81": 1503208960.0,
-            "82": 1503208960.0,
-            "83": 1503208960.0,
-            "84": 1503208960.0,
-            "85": 1503208960.0,
-            "86": 1503208960.0,
-            "87": 1503208960.0,
-            "88": 1503208960.0,
-            "89": 1503208960.0,
-            "90": 1503208960.0,
-            "91": 1503208960.0,
-            "92": 1503208960.0,
-            "93": 1503208960.0,
-            "94": 1503208960.0,
-            "95": 1503208960.0,
-            "96": 1503208960.0,
-            "97": 1503208960.0,
-            "98": 1503208960.0,
-            "99": 1503208960.0,
-            "100": 1503208960.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 7.36893,
-            "2": 0.17749,
-            "3": 0.15483,
-            "4": 3.4076,
-            "5": 1.15474,
-            "6": 1.45655,
-            "7": 0.15757,
-            "8": 0.15389,
-            "9": 0.47498,
-            "10": 0.16518,
-            "11": 0.23414,
-            "12": 0.15815,
-            "13": 0.15818,
-            "14": 0.15719,
-            "15": 0.15462,
-            "16": 0.16906,
-            "17": 0.159,
-            "18": 0.1595,
-            "19": 0.15825,
-            "20": 0.15699,
-            "21": 0.17023,
-            "22": 0.15299,
-            "23": 0.15858,
-            "24": 0.15811,
-            "25": 0.16082,
-            "26": 0.15919,
-            "27": 0.17036,
-            "28": 0.15511,
-            "29": 0.15676,
-            "30": 0.15849,
-            "31": 0.15691,
-            "32": 0.1571,
-            "33": 0.16802,
-            "34": 0.154,
-            "35": 0.15309,
-            "36": 0.15721,
-            "37": 0.15869,
-            "38": 0.16016,
-            "39": 0.15701,
-            "40": 0.15638,
-            "41": 0.15569,
-            "42": 0.15701,
-            "43": 0.16024,
-            "44": 0.15954,
-            "45": 0.16076,
-            "46": 0.15945,
-            "47": 0.15824,
-            "48": 0.15782,
-            "49": 0.15911,
-            "50": 0.15934,
-            "51": 0.15705,
-            "52": 0.17206,
-            "53": 0.17271,
-            "54": 0.17349,
-            "55": 0.17496,
-            "56": 0.16409,
-            "57": 0.16373,
-            "58": 0.16199,
-            "59": 0.16729,
-            "60": 0.16491,
-            "61": 0.1652,
-            "62": 0.17265,
-            "63": 0.17309,
-            "64": 0.15548,
-            "65": 0.15692,
-            "66": 0.16524,
-            "67": 0.15305,
-            "68": 0.16651,
-            "69": 0.15491,
-            "70": 0.15396,
-            "71": 0.15455,
-            "72": 0.16248,
-            "73": 0.15552,
-            "74": 0.1536,
-            "75": 0.15797,
-            "76": 0.15557,
-            "77": 0.15511,
-            "78": 0.16464,
-            "79": 0.15523,
-            "80": 0.15671,
-            "81": 0.15374,
-            "82": 0.15657,
-            "83": 0.16295,
-            "84": 0.15794,
-            "85": 0.15777,
-            "86": 0.15529,
-            "87": 0.16089,
-            "88": 0.15599,
-            "89": 0.16869,
-            "90": 0.15607,
-            "91": 0.15589,
-            "92": 0.15613,
-            "93": 0.15487,
-            "94": 0.15658,
-            "95": 0.16587,
-            "96": 0.1565,
-            "97": 0.15642,
-            "98": 0.15538,
-            "99": 0.15622,
-            "100": 0.16269
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index dbfceceac77..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.84466,
-            "2": 10.84794,
-            "3": 10.84925,
-            "4": 10.84332,
-            "5": 10.88244,
-            "6": 10.88079,
-            "7": 10.86575,
-            "8": 10.85546,
-            "9": 10.85543,
-            "10": 10.81818,
-            "11": 10.88769,
-            "12": 10.8634,
-            "13": 10.86681,
-            "14": 10.88414,
-            "15": 10.82464,
-            "16": 10.82854,
-            "17": 10.79491,
-            "18": 10.81492,
-            "19": 10.80133,
-            "20": 10.7181,
-            "21": 10.69905,
-            "22": 10.56744,
-            "23": 10.717,
-            "24": 10.60443,
-            "25": 10.55007,
-            "26": 10.60907,
-            "27": 10.62028,
-            "28": 10.5752,
-            "29": 10.59624,
-            "30": 10.38327,
-            "31": 10.1537,
-            "32": 10.48026,
-            "33": 10.47378,
-            "34": 10.2366,
-            "35": 10.28843,
-            "36": 10.24838,
-            "37": 10.35354,
-            "38": 10.20794,
-            "39": 10.41884,
-            "40": 10.1122,
-            "41": 10.16092,
-            "42": 10.23301,
-            "43": 9.86118,
-            "44": 9.97698,
-            "45": 9.86493,
-            "46": 9.84883,
-            "47": 10.16617,
-            "48": 9.87132,
-            "49": 9.56691,
-            "50": 9.92114,
-            "51": 9.86695,
-            "52": 9.76956,
-            "53": 10.07809,
-            "54": 9.97027,
-            "55": 9.89683,
-            "56": 9.64394,
-            "57": 9.49728,
-            "58": 9.84867,
-            "59": 9.59977,
-            "60": 9.50631,
-            "61": 9.71011,
-            "62": 9.99101,
-            "63": 9.38968,
-            "64": 9.78595,
-            "65": 8.95983,
-            "66": 9.70876,
-            "67": 9.37892,
-            "68": 9.79599,
-            "69": 9.80666,
-            "70": 9.74795,
-            "71": 9.61779,
-            "72": 9.59127,
-            "73": 9.50398,
-            "74": 8.94624,
-            "75": 9.42942,
-            "76": 9.08423,
-            "77": 10.06698,
-            "78": 9.73256,
-            "79": 9.38117,
-            "80": 9.41061,
-            "81": 9.48289,
-            "82": 9.70492,
-            "83": 9.30713,
-            "84": 9.42241,
-            "85": 9.61802,
-            "86": 9.07631,
-            "87": 9.59382,
-            "88": 9.75419,
-            "89": 9.60093,
-            "90": 9.82013,
-            "91": 9.3407,
-            "92": 9.35717,
-            "93": 9.07927,
-            "94": 8.83613,
-            "95": 9.5223,
-            "96": 9.53379,
-            "97": 9.31633,
-            "98": 9.68007,
-            "99": 8.89242,
-            "100": 9.39964
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1770.0,
-            "2": 1809.0,
-            "3": 1782.0,
-            "4": 1916.0,
-            "5": 1973.0,
-            "6": 1955.0,
-            "7": 2046.0,
-            "8": 1773.0,
-            "9": 1815.0,
-            "10": 1432.0,
-            "11": 1961.0,
-            "12": 1828.0,
-            "13": 1967.0,
-            "14": 1825.0,
-            "15": 1980.0,
-            "16": 1889.0,
-            "17": 1866.0,
-            "18": 1827.0,
-            "19": 1876.0,
-            "20": 1715.0,
-            "21": 2046.0,
-            "22": 1872.0,
-            "23": 2168.0,
-            "24": 1814.0,
-            "25": 1715.0,
-            "26": 1721.0,
-            "27": 1822.0,
-            "28": 2102.0,
-            "29": 2112.0,
-            "30": 2020.0,
-            "31": 1569.0,
-            "32": 2022.0,
-            "33": 2256.0,
-            "34": 1884.0,
-            "35": 2034.0,
-            "36": 2027.0,
-            "37": 2438.0,
-            "38": 2363.0,
-            "39": 2526.0,
-            "40": 2254.0,
-            "41": 2328.0,
-            "42": 2409.0,
-            "43": 2126.0,
-            "44": 2166.0,
-            "45": 2230.0,
-            "46": 2487.0,
-            "47": 2605.0,
-            "48": 2351.0,
-            "49": 2413.0,
-            "50": 2274.0,
-            "51": 2579.0,
-            "52": 2508.0,
-            "53": 2879.0,
-            "54": 2744.0,
-            "55": 2402.0,
-            "56": 2720.0,
-            "57": 2384.0,
-            "58": 3002.0,
-            "59": 2743.0,
-            "60": 2457.0,
-            "61": 2976.0,
-            "62": 2631.0,
-            "63": 2349.0,
-            "64": 3077.0,
-            "65": 2634.0,
-            "66": 3076.0,
-            "67": 2906.0,
-            "68": 2759.0,
-            "69": 2907.0,
-            "70": 3045.0,
-            "71": 3159.0,
-            "72": 2506.0,
-            "73": 2956.0,
-            "74": 1945.0,
-            "75": 2467.0,
-            "76": 2979.0,
-            "77": 3209.0,
-            "78": 3122.0,
-            "79": 3048.0,
-            "80": 3389.0,
-            "81": 3799.0,
-            "82": 3272.0,
-            "83": 2962.0,
-            "84": 3328.0,
-            "85": 3462.0,
-            "86": 3071.0,
-            "87": 3900.0,
-            "88": 3128.0,
-            "89": 3469.0,
-            "90": 3095.0,
-            "91": 2769.0,
-            "92": 3168.0,
-            "93": 2713.0,
-            "94": 3416.0,
-            "95": 3515.0,
-            "96": 3425.0,
-            "97": 3223.0,
-            "98": 3769.0,
-            "99": 3230.0,
-            "100": 3219.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 246998528.0,
-            "2": 246998528.0,
-            "3": 246998528.0,
-            "4": 246998528.0,
-            "5": 246998528.0,
-            "6": 246998528.0,
-            "7": 246998528.0,
-            "8": 246998528.0,
-            "9": 246998528.0,
-            "10": 246998528.0,
-            "11": 246998528.0,
-            "12": 246998528.0,
-            "13": 246998528.0,
-            "14": 246998528.0,
-            "15": 246998528.0,
-            "16": 246998528.0,
-            "17": 246998528.0,
-            "18": 246998528.0,
-            "19": 246998528.0,
-            "20": 246998528.0,
-            "21": 246998528.0,
-            "22": 246998528.0,
-            "23": 246998528.0,
-            "24": 246998528.0,
-            "25": 246998528.0,
-            "26": 246998528.0,
-            "27": 246998528.0,
-            "28": 246998528.0,
-            "29": 246998528.0,
-            "30": 246998528.0,
-            "31": 246998528.0,
-            "32": 246998528.0,
-            "33": 246998528.0,
-            "34": 246998528.0,
-            "35": 246998528.0,
-            "36": 246998528.0,
-            "37": 246998528.0,
-            "38": 246998528.0,
-            "39": 246998528.0,
-            "40": 246998528.0,
-            "41": 246998528.0,
-            "42": 246998528.0,
-            "43": 246998528.0,
-            "44": 246998528.0,
-            "45": 246998528.0,
-            "46": 246998528.0,
-            "47": 246998528.0,
-            "48": 246998528.0,
-            "49": 246998528.0,
-            "50": 246998528.0,
-            "51": 246998528.0,
-            "52": 246998528.0,
-            "53": 246998528.0,
-            "54": 246998528.0,
-            "55": 246998528.0,
-            "56": 246998528.0,
-            "57": 246998528.0,
-            "58": 246998528.0,
-            "59": 246998528.0,
-            "60": 246998528.0,
-            "61": 246998528.0,
-            "62": 246998528.0,
-            "63": 246998528.0,
-            "64": 246998528.0,
-            "65": 246998528.0,
-            "66": 246998528.0,
-            "67": 246998528.0,
-            "68": 246998528.0,
-            "69": 246998528.0,
-            "70": 246998528.0,
-            "71": 246998528.0,
-            "72": 246998528.0,
-            "73": 246998528.0,
-            "74": 246998528.0,
-            "75": 246998528.0,
-            "76": 246998528.0,
-            "77": 246998528.0,
-            "78": 246998528.0,
-            "79": 246998528.0,
-            "80": 246998528.0,
-            "81": 246998528.0,
-            "82": 246998528.0,
-            "83": 246998528.0,
-            "84": 246998528.0,
-            "85": 246998528.0,
-            "86": 246998528.0,
-            "87": 246998528.0,
-            "88": 246998528.0,
-            "89": 246998528.0,
-            "90": 246998528.0,
-            "91": 246998528.0,
-            "92": 246998528.0,
-            "93": 246998528.0,
-            "94": 246998528.0,
-            "95": 246998528.0,
-            "96": 246998528.0,
-            "97": 246998528.0,
-            "98": 246998528.0,
-            "99": 246998528.0,
-            "100": 246998528.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1503207936.0,
-            "2": 1503208960.0,
-            "3": 1503208960.0,
-            "4": 1503208960.0,
-            "5": 1503208960.0,
-            "6": 1503208960.0,
-            "7": 1503208960.0,
-            "8": 1503208960.0,
-            "9": 1503208960.0,
-            "10": 1503208960.0,
-            "11": 1503208960.0,
-            "12": 1503208960.0,
-            "13": 1503208960.0,
-            "14": 1503208960.0,
-            "15": 1503208960.0,
-            "16": 1503208960.0,
-            "17": 1503208960.0,
-            "18": 1503208960.0,
-            "19": 1503208960.0,
-            "20": 1503208960.0,
-            "21": 1503208960.0,
-            "22": 1503208960.0,
-            "23": 1503208960.0,
-            "24": 1503208960.0,
-            "25": 1503208960.0,
-            "26": 1503208960.0,
-            "27": 1503208960.0,
-            "28": 1503208960.0,
-            "29": 1503208960.0,
-            "30": 1503208960.0,
-            "31": 1503208960.0,
-            "32": 1503208960.0,
-            "33": 1503208960.0,
-            "34": 1503208960.0,
-            "35": 1503208960.0,
-            "36": 1503208960.0,
-            "37": 1503208960.0,
-            "38": 1503208960.0,
-            "39": 1503208960.0,
-            "40": 1503208960.0,
-            "41": 1503208960.0,
-            "42": 1503208960.0,
-            "43": 1503208960.0,
-            "44": 1503208960.0,
-            "45": 1503208960.0,
-            "46": 1503208960.0,
-            "47": 1503208960.0,
-            "48": 1503208960.0,
-            "49": 1503208960.0,
-            "50": 1503208960.0,
-            "51": 1503208960.0,
-            "52": 1503208960.0,
-            "53": 1503208960.0,
-            "54": 1503208960.0,
-            "55": 1503208960.0,
-            "56": 1503208960.0,
-            "57": 1503208960.0,
-            "58": 1503208960.0,
-            "59": 1503208960.0,
-            "60": 1503208960.0,
-            "61": 1503208960.0,
-            "62": 1503208960.0,
-            "63": 1503208960.0,
-            "64": 1503208960.0,
-            "65": 1503208960.0,
-            "66": 1503208960.0,
-            "67": 1503208960.0,
-            "68": 1503208960.0,
-            "69": 1503208960.0,
-            "70": 1503208960.0,
-            "71": 1503208960.0,
-            "72": 1503208960.0,
-            "73": 1503208960.0,
-            "74": 1503208960.0,
-            "75": 1503208960.0,
-            "76": 1503208960.0,
-            "77": 1503208960.0,
-            "78": 1503208960.0,
-            "79": 1503208960.0,
-            "80": 1503208960.0,
-            "81": 1503208960.0,
-            "82": 1503208960.0,
-            "83": 1503208960.0,
-            "84": 1503208960.0,
-            "85": 1503208960.0,
-            "86": 1503208960.0,
-            "87": 1503208960.0,
-            "88": 1503208960.0,
-            "89": 1503208960.0,
-            "90": 1503208960.0,
-            "91": 1503208960.0,
-            "92": 1503208960.0,
-            "93": 1503208960.0,
-            "94": 1503208960.0,
-            "95": 1503208960.0,
-            "96": 1503208960.0,
-            "97": 1503208960.0,
-            "98": 1503208960.0,
-            "99": 1503208960.0,
-            "100": 1503208960.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 6.90789,
-            "2": 0.23993,
-            "3": 0.20829,
-            "4": 0.18489,
-            "5": 0.18237,
-            "6": 0.17507,
-            "7": 0.17401,
-            "8": 0.17758,
-            "9": 0.17734,
-            "10": 0.17577,
-            "11": 0.17329,
-            "12": 0.17635,
-            "13": 0.17559,
-            "14": 0.17588,
-            "15": 0.17556,
-            "16": 0.17798,
-            "17": 0.17347,
-            "18": 0.17346,
-            "19": 0.17675,
-            "20": 0.17518,
-            "21": 0.17864,
-            "22": 0.17833,
-            "23": 0.1827,
-            "24": 0.1775,
-            "25": 0.17745,
-            "26": 0.1755,
-            "27": 0.17594,
-            "28": 0.18475,
-            "29": 0.17599,
-            "30": 0.17452,
-            "31": 0.17601,
-            "32": 0.17743,
-            "33": 0.17355,
-            "34": 0.18205,
-            "35": 0.17672,
-            "36": 0.17728,
-            "37": 0.17438,
-            "38": 0.17752,
-            "39": 0.18463,
-            "40": 0.17673,
-            "41": 0.17505,
-            "42": 0.17657,
-            "43": 0.1769,
-            "44": 0.19406,
-            "45": 0.20743,
-            "46": 0.18263,
-            "47": 0.16986,
-            "48": 0.17268,
-            "49": 0.17404,
-            "50": 0.17381,
-            "51": 0.1735,
-            "52": 0.1693,
-            "53": 0.17058,
-            "54": 0.17247,
-            "55": 0.1773,
-            "56": 0.17259,
-            "57": 0.17109,
-            "58": 0.17178,
-            "59": 0.17167,
-            "60": 0.17568,
-            "61": 0.17729,
-            "62": 0.16999,
-            "63": 0.17091,
-            "64": 0.17034,
-            "65": 0.17236,
-            "66": 0.17625,
-            "67": 0.17591,
-            "68": 0.17126,
-            "69": 0.17159,
-            "70": 0.17123,
-            "71": 0.17221,
-            "72": 0.17877,
-            "73": 0.17426,
-            "74": 0.17035,
-            "75": 0.1721,
-            "76": 0.17327,
-            "77": 0.17396,
-            "78": 0.17631,
-            "79": 0.17485,
-            "80": 0.17347,
-            "81": 0.17358,
-            "82": 0.17087,
-            "83": 0.17164,
-            "84": 0.17784,
-            "85": 0.17401,
-            "86": 0.18008,
-            "87": 0.17399,
-            "88": 0.17322,
-            "89": 0.17239,
-            "90": 0.17856,
-            "91": 0.17078,
-            "92": 0.18016,
-            "93": 0.18343,
-            "94": 0.18085,
-            "95": 0.175,
-            "96": 0.17786,
-            "97": 0.17064,
-            "98": 0.17229,
-            "99": 0.17164,
-            "100": 0.20496
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_h100.json
index e813675fa98..2bfd32d0721 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_h100.json
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 8.77968,
-            "2": 0.26175,
-            "3": 0.24794,
-            "4": 0.24501,
-            "5": 0.24845,
-            "6": 0.2486,
-            "7": 0.24727,
-            "8": 0.24913,
-            "9": 0.25845,
-            "10": 0.25285,
-            "11": 0.24913,
-            "12": 0.24699,
-            "13": 0.2473,
-            "14": 0.25154,
-            "15": 0.24973,
-            "16": 0.24744,
-            "17": 0.24812,
-            "18": 0.25005,
-            "19": 0.24688,
-            "20": 0.2449,
-            "21": 0.24547,
-            "22": 0.24699,
-            "23": 0.24408,
-            "24": 0.24933,
-            "25": 0.24233,
-            "26": 0.2452,
-            "27": 0.24682,
-            "28": 0.24269,
-            "29": 0.24203,
-            "30": 0.2418,
-            "31": 0.25702,
-            "32": 0.24123,
-            "33": 0.24439,
-            "34": 0.24088,
-            "35": 0.24457,
-            "36": 0.24197,
-            "37": 0.24309,
-            "38": 0.24278,
-            "39": 0.24374,
-            "40": 0.2478,
-            "41": 0.2422,
-            "42": 0.24357,
-            "43": 0.24957,
-            "44": 0.24752,
-            "45": 0.24273,
-            "46": 0.24413,
-            "47": 0.24327,
-            "48": 0.24256,
-            "49": 0.24524,
-            "50": 0.24667
+            "1": 8.86827,
+            "2": 0.25581,
+            "3": 0.24685,
+            "4": 0.24528,
+            "5": 0.24786,
+            "6": 0.25055,
+            "7": 0.2473,
+            "8": 0.24843,
+            "9": 0.24646,
+            "10": 0.24448,
+            "11": 0.24595,
+            "12": 0.24375,
+            "13": 0.24607,
+            "14": 0.2438,
+            "15": 0.24496,
+            "16": 0.24469,
+            "17": 0.24672,
+            "18": 0.2472,
+            "19": 0.24412,
+            "20": 0.24734,
+            "21": 0.24525,
+            "22": 0.24726,
+            "23": 0.24425,
+            "24": 0.2467,
+            "25": 0.24589,
+            "26": 0.24521,
+            "27": 0.24972,
+            "28": 0.24969,
+            "29": 0.24951,
+            "30": 0.24819,
+            "31": 0.25039,
+            "32": 0.24983,
+            "33": 0.25363,
+            "34": 0.25237,
+            "35": 0.24992,
+            "36": 0.24811,
+            "37": 0.25001,
+            "38": 0.24929,
+            "39": 0.24928,
+            "40": 0.24894,
+            "41": 0.24934,
+            "42": 0.24889,
+            "43": 0.24734,
+            "44": 0.24821,
+            "45": 0.2492,
+            "46": 0.24867,
+            "47": 0.25083,
+            "48": 0.24933,
+            "49": 0.24988,
+            "50": 0.25012
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 0e9e1ac956f..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.84269,
-            "2": 10.85556,
-            "3": 10.84446,
-            "4": 10.84222,
-            "5": 10.85859,
-            "6": 10.86289,
-            "7": 10.85166,
-            "8": 10.84694,
-            "9": 10.85648,
-            "10": 10.8187,
-            "11": 10.85952,
-            "12": 10.8434,
-            "13": 10.86329,
-            "14": 10.85467,
-            "15": 10.80947,
-            "16": 10.81639,
-            "17": 10.7887,
-            "18": 10.79677,
-            "19": 10.79127,
-            "20": 10.70829,
-            "21": 10.69425,
-            "22": 10.58587,
-            "23": 10.70272,
-            "24": 10.60461,
-            "25": 10.57071,
-            "26": 10.62002,
-            "27": 10.61414,
-            "28": 10.56371,
-            "29": 10.56749,
-            "30": 10.39721,
-            "31": 10.16567,
-            "32": 10.45764,
-            "33": 10.45152,
-            "34": 10.23938,
-            "35": 10.28311,
-            "36": 10.24692,
-            "37": 10.34247,
-            "38": 10.2052,
-            "39": 10.39167,
-            "40": 10.09728,
-            "41": 10.15266,
-            "42": 10.21035,
-            "43": 9.87733,
-            "44": 9.98208,
-            "45": 9.86184,
-            "46": 9.83605,
-            "47": 10.13379,
-            "48": 9.87207,
-            "49": 9.56144,
-            "50": 9.91021
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1669.0,
-            "2": 1797.0,
-            "3": 1769.0,
-            "4": 1812.0,
-            "5": 1956.0,
-            "6": 1892.0,
-            "7": 1848.0,
-            "8": 1619.0,
-            "9": 1899.0,
-            "10": 1416.0,
-            "11": 1910.0,
-            "12": 1734.0,
-            "13": 1952.0,
-            "14": 1901.0,
-            "15": 1958.0,
-            "16": 1961.0,
-            "17": 1919.0,
-            "18": 1881.0,
-            "19": 1883.0,
-            "20": 1802.0,
-            "21": 1931.0,
-            "22": 1655.0,
-            "23": 1993.0,
-            "24": 1633.0,
-            "25": 1767.0,
-            "26": 1727.0,
-            "27": 1709.0,
-            "28": 1909.0,
-            "29": 2062.0,
-            "30": 1901.0,
-            "31": 1678.0,
-            "32": 1944.0,
-            "33": 2164.0,
-            "34": 1777.0,
-            "35": 1938.0,
-            "36": 1876.0,
-            "37": 2428.0,
-            "38": 2216.0,
-            "39": 2329.0,
-            "40": 2126.0,
-            "41": 2312.0,
-            "42": 2207.0,
-            "43": 1975.0,
-            "44": 2062.0,
-            "45": 1927.0,
-            "46": 2258.0,
-            "47": 2545.0,
-            "48": 2291.0,
-            "49": 2254.0,
-            "50": 2307.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 299203072.0,
-            "2": 299203072.0,
-            "3": 299203072.0,
-            "4": 299203072.0,
-            "5": 299203072.0,
-            "6": 299203072.0,
-            "7": 299203072.0,
-            "8": 299203072.0,
-            "9": 299203072.0,
-            "10": 299203072.0,
-            "11": 299203072.0,
-            "12": 299203072.0,
-            "13": 299203072.0,
-            "14": 299203072.0,
-            "15": 299203072.0,
-            "16": 299203072.0,
-            "17": 299203072.0,
-            "18": 299203072.0,
-            "19": 299203072.0,
-            "20": 299203072.0,
-            "21": 299203072.0,
-            "22": 299203072.0,
-            "23": 299203072.0,
-            "24": 299203072.0,
-            "25": 299203072.0,
-            "26": 299203072.0,
-            "27": 299203072.0,
-            "28": 299203072.0,
-            "29": 299203072.0,
-            "30": 299203072.0,
-            "31": 299203072.0,
-            "32": 299203072.0,
-            "33": 299203072.0,
-            "34": 299203072.0,
-            "35": 299203072.0,
-            "36": 299203072.0,
-            "37": 299203072.0,
-            "38": 299203072.0,
-            "39": 299203072.0,
-            "40": 299203072.0,
-            "41": 299203072.0,
-            "42": 299203072.0,
-            "43": 299203072.0,
-            "44": 299203072.0,
-            "45": 299203072.0,
-            "46": 299203072.0,
-            "47": 299203072.0,
-            "48": 299203072.0,
-            "49": 299203072.0,
-            "50": 299203072.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1477945856.0,
-            "2": 1542891008.0,
-            "3": 1542891008.0,
-            "4": 1542891008.0,
-            "5": 1542891008.0,
-            "6": 1542891008.0,
-            "7": 1542891008.0,
-            "8": 1542891008.0,
-            "9": 1542891008.0,
-            "10": 1542891008.0,
-            "11": 1542891008.0,
-            "12": 1542891008.0,
-            "13": 1542891008.0,
-            "14": 1542891008.0,
-            "15": 1542891008.0,
-            "16": 1542891008.0,
-            "17": 1542891008.0,
-            "18": 1542891008.0,
-            "19": 1542891008.0,
-            "20": 1542891008.0,
-            "21": 1542891008.0,
-            "22": 1542891008.0,
-            "23": 1542891008.0,
-            "24": 1542891008.0,
-            "25": 1542891008.0,
-            "26": 1542891008.0,
-            "27": 1542891008.0,
-            "28": 1542891008.0,
-            "29": 1542891008.0,
-            "30": 1542891008.0,
-            "31": 1542891008.0,
-            "32": 1542891008.0,
-            "33": 1542891008.0,
-            "34": 1542891008.0,
-            "35": 1542891008.0,
-            "36": 1542891008.0,
-            "37": 1542891008.0,
-            "38": 1542891008.0,
-            "39": 1542891008.0,
-            "40": 1542891008.0,
-            "41": 1542891008.0,
-            "42": 1542891008.0,
-            "43": 1542891008.0,
-            "44": 1542891008.0,
-            "45": 1542891008.0,
-            "46": 1542891008.0,
-            "47": 1542891008.0,
-            "48": 1542891008.0,
-            "49": 1542891008.0,
-            "50": 1542891008.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 8.85835,
-            "2": 0.24835,
-            "3": 0.21606,
-            "4": 0.2165,
-            "5": 0.2184,
-            "6": 0.21562,
-            "7": 0.21636,
-            "8": 0.21549,
-            "9": 0.21564,
-            "10": 0.21602,
-            "11": 0.21604,
-            "12": 0.21848,
-            "13": 0.22011,
-            "14": 0.21851,
-            "15": 0.21382,
-            "16": 0.21395,
-            "17": 0.21404,
-            "18": 0.21912,
-            "19": 0.21472,
-            "20": 0.21137,
-            "21": 0.2132,
-            "22": 0.21258,
-            "23": 0.21793,
-            "24": 0.22285,
-            "25": 0.21743,
-            "26": 0.21892,
-            "27": 0.21849,
-            "28": 0.2197,
-            "29": 0.21953,
-            "30": 0.21687,
-            "31": 0.21658,
-            "32": 0.2223,
-            "33": 0.22171,
-            "34": 0.21429,
-            "35": 0.21354,
-            "36": 0.21407,
-            "37": 0.21643,
-            "38": 0.21392,
-            "39": 0.21524,
-            "40": 0.21475,
-            "41": 0.2181,
-            "42": 0.21582,
-            "43": 0.21601,
-            "44": 0.21724,
-            "45": 0.21547,
-            "46": 0.21832,
-            "47": 0.21586,
-            "48": 0.21703,
-            "49": 0.21487,
-            "50": 0.21525
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index 2bfd32d0721..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.84269,
-            "2": 10.85556,
-            "3": 10.84446,
-            "4": 10.84222,
-            "5": 10.85859,
-            "6": 10.86289,
-            "7": 10.85166,
-            "8": 10.84694,
-            "9": 10.85648,
-            "10": 10.8187,
-            "11": 10.85952,
-            "12": 10.8434,
-            "13": 10.86329,
-            "14": 10.85467,
-            "15": 10.80947,
-            "16": 10.81639,
-            "17": 10.7887,
-            "18": 10.79677,
-            "19": 10.79127,
-            "20": 10.70829,
-            "21": 10.69425,
-            "22": 10.58587,
-            "23": 10.70272,
-            "24": 10.60461,
-            "25": 10.57071,
-            "26": 10.62002,
-            "27": 10.61414,
-            "28": 10.56371,
-            "29": 10.56749,
-            "30": 10.39721,
-            "31": 10.16567,
-            "32": 10.45764,
-            "33": 10.45152,
-            "34": 10.23938,
-            "35": 10.28311,
-            "36": 10.24692,
-            "37": 10.34247,
-            "38": 10.2052,
-            "39": 10.39167,
-            "40": 10.09728,
-            "41": 10.15266,
-            "42": 10.21035,
-            "43": 9.87733,
-            "44": 9.98208,
-            "45": 9.86184,
-            "46": 9.83605,
-            "47": 10.13379,
-            "48": 9.87207,
-            "49": 9.56144,
-            "50": 9.91021
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1669.0,
-            "2": 1797.0,
-            "3": 1769.0,
-            "4": 1812.0,
-            "5": 1956.0,
-            "6": 1892.0,
-            "7": 1848.0,
-            "8": 1619.0,
-            "9": 1899.0,
-            "10": 1416.0,
-            "11": 1910.0,
-            "12": 1734.0,
-            "13": 1952.0,
-            "14": 1901.0,
-            "15": 1958.0,
-            "16": 1961.0,
-            "17": 1919.0,
-            "18": 1881.0,
-            "19": 1883.0,
-            "20": 1802.0,
-            "21": 1931.0,
-            "22": 1655.0,
-            "23": 1993.0,
-            "24": 1633.0,
-            "25": 1767.0,
-            "26": 1727.0,
-            "27": 1709.0,
-            "28": 1909.0,
-            "29": 2062.0,
-            "30": 1901.0,
-            "31": 1678.0,
-            "32": 1944.0,
-            "33": 2164.0,
-            "34": 1777.0,
-            "35": 1938.0,
-            "36": 1876.0,
-            "37": 2428.0,
-            "38": 2216.0,
-            "39": 2329.0,
-            "40": 2126.0,
-            "41": 2312.0,
-            "42": 2207.0,
-            "43": 1975.0,
-            "44": 2062.0,
-            "45": 1927.0,
-            "46": 2258.0,
-            "47": 2545.0,
-            "48": 2291.0,
-            "49": 2254.0,
-            "50": 2307.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 299203072.0,
-            "2": 299203072.0,
-            "3": 299203072.0,
-            "4": 299203072.0,
-            "5": 299203072.0,
-            "6": 299203072.0,
-            "7": 299203072.0,
-            "8": 299203072.0,
-            "9": 299203072.0,
-            "10": 299203072.0,
-            "11": 299203072.0,
-            "12": 299203072.0,
-            "13": 299203072.0,
-            "14": 299203072.0,
-            "15": 299203072.0,
-            "16": 299203072.0,
-            "17": 299203072.0,
-            "18": 299203072.0,
-            "19": 299203072.0,
-            "20": 299203072.0,
-            "21": 299203072.0,
-            "22": 299203072.0,
-            "23": 299203072.0,
-            "24": 299203072.0,
-            "25": 299203072.0,
-            "26": 299203072.0,
-            "27": 299203072.0,
-            "28": 299203072.0,
-            "29": 299203072.0,
-            "30": 299203072.0,
-            "31": 299203072.0,
-            "32": 299203072.0,
-            "33": 299203072.0,
-            "34": 299203072.0,
-            "35": 299203072.0,
-            "36": 299203072.0,
-            "37": 299203072.0,
-            "38": 299203072.0,
-            "39": 299203072.0,
-            "40": 299203072.0,
-            "41": 299203072.0,
-            "42": 299203072.0,
-            "43": 299203072.0,
-            "44": 299203072.0,
-            "45": 299203072.0,
-            "46": 299203072.0,
-            "47": 299203072.0,
-            "48": 299203072.0,
-            "49": 299203072.0,
-            "50": 299203072.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1477945856.0,
-            "2": 1542891008.0,
-            "3": 1542891008.0,
-            "4": 1542891008.0,
-            "5": 1542891008.0,
-            "6": 1542891008.0,
-            "7": 1542891008.0,
-            "8": 1542891008.0,
-            "9": 1542891008.0,
-            "10": 1542891008.0,
-            "11": 1542891008.0,
-            "12": 1542891008.0,
-            "13": 1542891008.0,
-            "14": 1542891008.0,
-            "15": 1542891008.0,
-            "16": 1542891008.0,
-            "17": 1542891008.0,
-            "18": 1542891008.0,
-            "19": 1542891008.0,
-            "20": 1542891008.0,
-            "21": 1542891008.0,
-            "22": 1542891008.0,
-            "23": 1542891008.0,
-            "24": 1542891008.0,
-            "25": 1542891008.0,
-            "26": 1542891008.0,
-            "27": 1542891008.0,
-            "28": 1542891008.0,
-            "29": 1542891008.0,
-            "30": 1542891008.0,
-            "31": 1542891008.0,
-            "32": 1542891008.0,
-            "33": 1542891008.0,
-            "34": 1542891008.0,
-            "35": 1542891008.0,
-            "36": 1542891008.0,
-            "37": 1542891008.0,
-            "38": 1542891008.0,
-            "39": 1542891008.0,
-            "40": 1542891008.0,
-            "41": 1542891008.0,
-            "42": 1542891008.0,
-            "43": 1542891008.0,
-            "44": 1542891008.0,
-            "45": 1542891008.0,
-            "46": 1542891008.0,
-            "47": 1542891008.0,
-            "48": 1542891008.0,
-            "49": 1542891008.0,
-            "50": 1542891008.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 8.86827,
-            "2": 0.25581,
-            "3": 0.24685,
-            "4": 0.24528,
-            "5": 0.24786,
-            "6": 0.25055,
-            "7": 0.2473,
-            "8": 0.24843,
-            "9": 0.24646,
-            "10": 0.24448,
-            "11": 0.24595,
-            "12": 0.24375,
-            "13": 0.24607,
-            "14": 0.2438,
-            "15": 0.24496,
-            "16": 0.24469,
-            "17": 0.24672,
-            "18": 0.2472,
-            "19": 0.24412,
-            "20": 0.24734,
-            "21": 0.24525,
-            "22": 0.24726,
-            "23": 0.24425,
-            "24": 0.2467,
-            "25": 0.24589,
-            "26": 0.24521,
-            "27": 0.24972,
-            "28": 0.24969,
-            "29": 0.24951,
-            "30": 0.24819,
-            "31": 0.25039,
-            "32": 0.24983,
-            "33": 0.25363,
-            "34": 0.25237,
-            "35": 0.24992,
-            "36": 0.24811,
-            "37": 0.25001,
-            "38": 0.24929,
-            "39": 0.24928,
-            "40": 0.24894,
-            "41": 0.24934,
-            "42": 0.24889,
-            "43": 0.24734,
-            "44": 0.24821,
-            "45": 0.2492,
-            "46": 0.24867,
-            "47": 0.25083,
-            "48": 0.24933,
-            "49": 0.24988,
-            "50": 0.25012
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_h100.json
index 39765124d93..b61916ffd95 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_h100.json
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 9.64755,
-            "2": 0.22676,
-            "3": 0.21049,
-            "4": 0.21226,
-            "5": 0.21276,
-            "6": 0.21284,
-            "7": 0.21174,
-            "8": 0.21294,
-            "9": 0.21455,
-            "10": 0.21245,
-            "11": 0.21305,
-            "12": 0.21226,
-            "13": 0.21393,
-            "14": 0.21543,
-            "15": 0.21306,
-            "16": 0.21524,
-            "17": 0.21547,
-            "18": 0.21654,
-            "19": 0.21182,
-            "20": 0.21446,
-            "21": 0.2154,
-            "22": 0.2134,
-            "23": 0.21194,
-            "24": 0.21397,
-            "25": 0.21361,
-            "26": 0.21508,
-            "27": 0.21438,
-            "28": 0.21467,
-            "29": 0.21423,
-            "30": 0.21547,
-            "31": 0.2149,
-            "32": 0.21373,
-            "33": 0.21293,
-            "34": 0.21223,
-            "35": 0.21322,
-            "36": 0.21538,
-            "37": 0.2171,
-            "38": 0.21288,
-            "39": 0.214,
-            "40": 0.21613,
-            "41": 0.22561,
-            "42": 0.21996,
-            "43": 0.2231,
-            "44": 0.21366,
-            "45": 0.20946,
-            "46": 0.21036,
-            "47": 0.21159,
-            "48": 0.21259,
-            "49": 0.2162,
-            "50": 0.21326,
-            "51": 0.21621,
-            "52": 0.20977,
-            "53": 0.20911,
-            "54": 0.20812,
-            "55": 0.20849,
-            "56": 0.20718,
-            "57": 0.21288,
-            "58": 0.20817,
-            "59": 0.20767,
-            "60": 0.20713,
-            "61": 0.21035,
-            "62": 0.21063,
-            "63": 0.21186,
-            "64": 0.20447,
-            "65": 0.206,
-            "66": 0.2078,
-            "67": 0.21155,
-            "68": 0.21249,
-            "69": 0.20772,
-            "70": 0.2071,
-            "71": 0.20716,
-            "72": 0.20814,
-            "73": 0.20979,
-            "74": 0.21089,
-            "75": 0.20519,
-            "76": 0.20953,
-            "77": 0.20632,
-            "78": 0.21411,
-            "79": 0.20748,
-            "80": 0.20907,
-            "81": 0.20802,
-            "82": 0.20909,
-            "83": 0.21401,
-            "84": 0.21584,
-            "85": 0.20979,
-            "86": 0.20899,
-            "87": 0.20903,
-            "88": 0.21002,
-            "89": 0.20822,
-            "90": 0.20988,
-            "91": 0.2101,
-            "92": 0.20692,
-            "93": 0.21116,
-            "94": 0.20766,
-            "95": 0.2115,
-            "96": 0.20949,
-            "97": 0.20615,
-            "98": 0.20442,
-            "99": 0.2084,
-            "100": 0.20996
+            "1": 9.40872,
+            "2": 0.25886,
+            "3": 0.22849,
+            "4": 0.21099,
+            "5": 0.21193,
+            "6": 0.20863,
+            "7": 0.20987,
+            "8": 0.21014,
+            "9": 0.21139,
+            "10": 0.21148,
+            "11": 0.21513,
+            "12": 0.21915,
+            "13": 0.21037,
+            "14": 0.20786,
+            "15": 0.20927,
+            "16": 0.20756,
+            "17": 0.21005,
+            "18": 0.21022,
+            "19": 0.21019,
+            "20": 0.21012,
+            "21": 0.20995,
+            "22": 0.21005,
+            "23": 0.21213,
+            "24": 0.20995,
+            "25": 0.20776,
+            "26": 0.21296,
+            "27": 0.20984,
+            "28": 0.21526,
+            "29": 0.21164,
+            "30": 0.21175,
+            "31": 0.21062,
+            "32": 0.21292,
+            "33": 0.20962,
+            "34": 0.21025,
+            "35": 0.20968,
+            "36": 0.21367,
+            "37": 0.20989,
+            "38": 0.21034,
+            "39": 0.20979,
+            "40": 0.21092,
+            "41": 0.21065,
+            "42": 0.20865,
+            "43": 0.20939,
+            "44": 0.21656,
+            "45": 0.21131,
+            "46": 0.21087,
+            "47": 0.23723,
+            "48": 0.21006,
+            "49": 0.21157,
+            "50": 0.20975,
+            "51": 0.21952,
+            "52": 0.21306,
+            "53": 0.21253,
+            "54": 0.21223,
+            "55": 0.21336,
+            "56": 0.21514,
+            "57": 0.21536,
+            "58": 0.21288,
+            "59": 0.21211,
+            "60": 0.21298,
+            "61": 0.21285,
+            "62": 0.21438,
+            "63": 0.21461,
+            "64": 0.21382,
+            "65": 0.22082,
+            "66": 0.21222,
+            "67": 0.21414,
+            "68": 0.21315,
+            "69": 0.2153,
+            "70": 0.2172,
+            "71": 0.21323,
+            "72": 0.21366,
+            "73": 0.21434,
+            "74": 0.21455,
+            "75": 0.21545,
+            "76": 0.21631,
+            "77": 0.21419,
+            "78": 0.21365,
+            "79": 0.21514,
+            "80": 0.21447,
+            "81": 0.21379,
+            "82": 0.21487,
+            "83": 0.21038,
+            "84": 0.21708,
+            "85": 0.21166,
+            "86": 0.2141,
+            "87": 0.21613,
+            "88": 0.21214,
+            "89": 0.21499,
+            "90": 0.21811,
+            "91": 0.21563,
+            "92": 0.2152,
+            "93": 0.21548,
+            "94": 0.21863,
+            "95": 0.21366,
+            "96": 0.21458,
+            "97": 0.21279,
+            "98": 0.21555,
+            "99": 0.213,
+            "100": 0.2112
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 0521ec92aee..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.84163,
-            "2": 10.85598,
-            "3": 10.84413,
-            "4": 10.84124,
-            "5": 10.85872,
-            "6": 10.86316,
-            "7": 10.85184,
-            "8": 10.84645,
-            "9": 10.85647,
-            "10": 10.81849,
-            "11": 10.85923,
-            "12": 10.84285,
-            "13": 10.86432,
-            "14": 10.85423,
-            "15": 10.81015,
-            "16": 10.81588,
-            "17": 10.78949,
-            "18": 10.79683,
-            "19": 10.79073,
-            "20": 10.70819,
-            "21": 10.69322,
-            "22": 10.58504,
-            "23": 10.70217,
-            "24": 10.60546,
-            "25": 10.57102,
-            "26": 10.61967,
-            "27": 10.61501,
-            "28": 10.56369,
-            "29": 10.56725,
-            "30": 10.39695,
-            "31": 10.16591,
-            "32": 10.4573,
-            "33": 10.45199,
-            "34": 10.2392,
-            "35": 10.28351,
-            "36": 10.24677,
-            "37": 10.3427,
-            "38": 10.20546,
-            "39": 10.39187,
-            "40": 10.09767,
-            "41": 10.1526,
-            "42": 10.21051,
-            "43": 9.87726,
-            "44": 9.98291,
-            "45": 9.86165,
-            "46": 9.83587,
-            "47": 10.13369,
-            "48": 9.87212,
-            "49": 9.56121,
-            "50": 9.91045,
-            "51": 9.85839,
-            "52": 9.7506,
-            "53": 10.05817,
-            "54": 9.96076,
-            "55": 9.88738,
-            "56": 9.6344,
-            "57": 9.4967,
-            "58": 9.83343,
-            "59": 9.59391,
-            "60": 9.51376,
-            "61": 9.69928,
-            "62": 9.98089,
-            "63": 9.39065,
-            "64": 9.77599,
-            "65": 8.9571,
-            "66": 9.70054,
-            "67": 9.37,
-            "68": 9.78529,
-            "69": 9.78966,
-            "70": 9.74676,
-            "71": 9.61906,
-            "72": 9.58963,
-            "73": 9.49629,
-            "74": 8.94963,
-            "75": 9.42381,
-            "76": 9.07799,
-            "77": 10.07105,
-            "78": 9.72632,
-            "79": 9.37966,
-            "80": 9.40721,
-            "81": 9.48238,
-            "82": 9.70152,
-            "83": 9.30657,
-            "84": 9.41464,
-            "85": 9.61784,
-            "86": 9.08212,
-            "87": 9.59511,
-            "88": 9.75008,
-            "89": 9.60356,
-            "90": 9.82256,
-            "91": 9.33721,
-            "92": 9.35861,
-            "93": 9.07956,
-            "94": 8.83268,
-            "95": 9.51351,
-            "96": 9.52947,
-            "97": 9.31813,
-            "98": 9.67451,
-            "99": 8.88607,
-            "100": 9.40106
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1736.0,
-            "2": 1692.0,
-            "3": 1695.0,
-            "4": 1761.0,
-            "5": 1955.0,
-            "6": 1791.0,
-            "7": 1943.0,
-            "8": 1681.0,
-            "9": 1884.0,
-            "10": 1441.0,
-            "11": 1942.0,
-            "12": 1786.0,
-            "13": 1940.0,
-            "14": 1862.0,
-            "15": 1907.0,
-            "16": 1947.0,
-            "17": 1827.0,
-            "18": 1907.0,
-            "19": 1818.0,
-            "20": 1700.0,
-            "21": 1911.0,
-            "22": 1720.0,
-            "23": 1938.0,
-            "24": 1707.0,
-            "25": 1686.0,
-            "26": 1792.0,
-            "27": 1891.0,
-            "28": 1976.0,
-            "29": 1958.0,
-            "30": 1941.0,
-            "31": 1622.0,
-            "32": 1970.0,
-            "33": 2129.0,
-            "34": 1830.0,
-            "35": 1907.0,
-            "36": 1892.0,
-            "37": 2395.0,
-            "38": 2161.0,
-            "39": 2493.0,
-            "40": 2224.0,
-            "41": 2201.0,
-            "42": 2175.0,
-            "43": 1920.0,
-            "44": 1955.0,
-            "45": 1956.0,
-            "46": 2166.0,
-            "47": 2517.0,
-            "48": 2272.0,
-            "49": 2211.0,
-            "50": 2232.0,
-            "51": 2621.0,
-            "52": 2597.0,
-            "53": 2926.0,
-            "54": 2633.0,
-            "55": 2206.0,
-            "56": 2627.0,
-            "57": 2328.0,
-            "58": 2886.0,
-            "59": 2639.0,
-            "60": 2157.0,
-            "61": 2736.0,
-            "62": 2544.0,
-            "63": 2332.0,
-            "64": 2948.0,
-            "65": 2630.0,
-            "66": 2931.0,
-            "67": 2717.0,
-            "68": 2643.0,
-            "69": 2955.0,
-            "70": 3040.0,
-            "71": 2882.0,
-            "72": 2390.0,
-            "73": 2812.0,
-            "74": 1844.0,
-            "75": 2461.0,
-            "76": 3067.0,
-            "77": 3152.0,
-            "78": 3018.0,
-            "79": 3008.0,
-            "80": 3104.0,
-            "81": 3589.0,
-            "82": 3218.0,
-            "83": 2748.0,
-            "84": 3217.0,
-            "85": 3167.0,
-            "86": 2876.0,
-            "87": 3604.0,
-            "88": 3017.0,
-            "89": 3249.0,
-            "90": 3069.0,
-            "91": 2865.0,
-            "92": 3074.0,
-            "93": 2680.0,
-            "94": 3392.0,
-            "95": 3206.0,
-            "96": 3401.0,
-            "97": 3107.0,
-            "98": 3624.0,
-            "99": 3007.0,
-            "100": 3111.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 299203072.0,
-            "2": 299203072.0,
-            "3": 299203072.0,
-            "4": 299203072.0,
-            "5": 299203072.0,
-            "6": 299203072.0,
-            "7": 299203072.0,
-            "8": 299203072.0,
-            "9": 299203072.0,
-            "10": 299203072.0,
-            "11": 299203072.0,
-            "12": 299203072.0,
-            "13": 299203072.0,
-            "14": 299203072.0,
-            "15": 299203072.0,
-            "16": 299203072.0,
-            "17": 299203072.0,
-            "18": 299203072.0,
-            "19": 299203072.0,
-            "20": 299203072.0,
-            "21": 299203072.0,
-            "22": 299203072.0,
-            "23": 299203072.0,
-            "24": 299203072.0,
-            "25": 299203072.0,
-            "26": 299203072.0,
-            "27": 299203072.0,
-            "28": 299203072.0,
-            "29": 299203072.0,
-            "30": 299203072.0,
-            "31": 299203072.0,
-            "32": 299203072.0,
-            "33": 299203072.0,
-            "34": 299203072.0,
-            "35": 299203072.0,
-            "36": 299203072.0,
-            "37": 299203072.0,
-            "38": 299203072.0,
-            "39": 299203072.0,
-            "40": 299203072.0,
-            "41": 299203072.0,
-            "42": 299203072.0,
-            "43": 299203072.0,
-            "44": 299203072.0,
-            "45": 299203072.0,
-            "46": 299203072.0,
-            "47": 299203072.0,
-            "48": 299203072.0,
-            "49": 299203072.0,
-            "50": 299203072.0,
-            "51": 299203072.0,
-            "52": 299203072.0,
-            "53": 299203072.0,
-            "54": 299203072.0,
-            "55": 299203072.0,
-            "56": 299203072.0,
-            "57": 299203072.0,
-            "58": 299203072.0,
-            "59": 299203072.0,
-            "60": 299203072.0,
-            "61": 299203072.0,
-            "62": 299203072.0,
-            "63": 299203072.0,
-            "64": 299203072.0,
-            "65": 299203072.0,
-            "66": 299203072.0,
-            "67": 299203072.0,
-            "68": 299203072.0,
-            "69": 299203072.0,
-            "70": 299203072.0,
-            "71": 299203072.0,
-            "72": 299203072.0,
-            "73": 299203072.0,
-            "74": 299203072.0,
-            "75": 299203072.0,
-            "76": 299203072.0,
-            "77": 299203072.0,
-            "78": 299203072.0,
-            "79": 299203072.0,
-            "80": 299203072.0,
-            "81": 299203072.0,
-            "82": 299203072.0,
-            "83": 299203072.0,
-            "84": 299203072.0,
-            "85": 299203072.0,
-            "86": 299203072.0,
-            "87": 299203072.0,
-            "88": 299203072.0,
-            "89": 299203072.0,
-            "90": 299203072.0,
-            "91": 299203072.0,
-            "92": 299203072.0,
-            "93": 299203072.0,
-            "94": 299203072.0,
-            "95": 299203072.0,
-            "96": 299203072.0,
-            "97": 299203072.0,
-            "98": 299203072.0,
-            "99": 299203072.0,
-            "100": 299203072.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 977125888.0,
-            "2": 1042071040.0,
-            "3": 1042071040.0,
-            "4": 1042071040.0,
-            "5": 1042071040.0,
-            "6": 1042071040.0,
-            "7": 1042071040.0,
-            "8": 1042071040.0,
-            "9": 1042071040.0,
-            "10": 1042071040.0,
-            "11": 1042071040.0,
-            "12": 1042071040.0,
-            "13": 1042071040.0,
-            "14": 1042071040.0,
-            "15": 1042071040.0,
-            "16": 1042071040.0,
-            "17": 1042071040.0,
-            "18": 1042071040.0,
-            "19": 1042071040.0,
-            "20": 1042071040.0,
-            "21": 1042071040.0,
-            "22": 1042071040.0,
-            "23": 1042071040.0,
-            "24": 1042071040.0,
-            "25": 1042071040.0,
-            "26": 1042071040.0,
-            "27": 1042071040.0,
-            "28": 1042071040.0,
-            "29": 1042071040.0,
-            "30": 1042071040.0,
-            "31": 1042071040.0,
-            "32": 1042071040.0,
-            "33": 1042071040.0,
-            "34": 1042071040.0,
-            "35": 1042071040.0,
-            "36": 1042071040.0,
-            "37": 1042071040.0,
-            "38": 1042071040.0,
-            "39": 1042071040.0,
-            "40": 1042071040.0,
-            "41": 1042071040.0,
-            "42": 1042071040.0,
-            "43": 1042071040.0,
-            "44": 1042071040.0,
-            "45": 1042071040.0,
-            "46": 1042071040.0,
-            "47": 1042071040.0,
-            "48": 1042071040.0,
-            "49": 1042071040.0,
-            "50": 1042071040.0,
-            "51": 1042071040.0,
-            "52": 1042071040.0,
-            "53": 1042071040.0,
-            "54": 1042071040.0,
-            "55": 1042071040.0,
-            "56": 1042071040.0,
-            "57": 1042071040.0,
-            "58": 1042071040.0,
-            "59": 1042071040.0,
-            "60": 1042071040.0,
-            "61": 1042071040.0,
-            "62": 1042071040.0,
-            "63": 1042071040.0,
-            "64": 1042071040.0,
-            "65": 1042071040.0,
-            "66": 1042071040.0,
-            "67": 1042071040.0,
-            "68": 1042071040.0,
-            "69": 1042071040.0,
-            "70": 1042071040.0,
-            "71": 1042071040.0,
-            "72": 1042071040.0,
-            "73": 1042071040.0,
-            "74": 1042071040.0,
-            "75": 1042071040.0,
-            "76": 1042071040.0,
-            "77": 1042071040.0,
-            "78": 1042071040.0,
-            "79": 1042071040.0,
-            "80": 1042071040.0,
-            "81": 1042071040.0,
-            "82": 1042071040.0,
-            "83": 1042071040.0,
-            "84": 1042071040.0,
-            "85": 1042071040.0,
-            "86": 1042071040.0,
-            "87": 1042071040.0,
-            "88": 1042071040.0,
-            "89": 1042071040.0,
-            "90": 1042071040.0,
-            "91": 1042071040.0,
-            "92": 1042071040.0,
-            "93": 1042071040.0,
-            "94": 1042071040.0,
-            "95": 1042071040.0,
-            "96": 1042071040.0,
-            "97": 1042071040.0,
-            "98": 1042071040.0,
-            "99": 1042071040.0,
-            "100": 1042071040.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 8.89047,
-            "2": 0.20763,
-            "3": 0.17962,
-            "4": 0.17996,
-            "5": 0.19517,
-            "6": 0.19097,
-            "7": 0.21371,
-            "8": 0.17946,
-            "9": 0.18028,
-            "10": 0.17811,
-            "11": 0.19549,
-            "12": 0.17995,
-            "13": 0.17967,
-            "14": 0.17747,
-            "15": 0.17854,
-            "16": 0.18132,
-            "17": 0.18068,
-            "18": 0.20382,
-            "19": 0.18932,
-            "20": 0.18279,
-            "21": 0.18143,
-            "22": 0.18461,
-            "23": 0.18263,
-            "24": 0.19677,
-            "25": 0.18399,
-            "26": 0.18138,
-            "27": 0.18309,
-            "28": 0.18505,
-            "29": 0.18571,
-            "30": 0.19268,
-            "31": 0.18694,
-            "32": 0.2033,
-            "33": 0.20046,
-            "34": 0.20101,
-            "35": 0.18537,
-            "36": 0.18526,
-            "37": 0.18418,
-            "38": 0.18481,
-            "39": 0.1813,
-            "40": 0.1837,
-            "41": 0.17918,
-            "42": 0.18044,
-            "43": 0.18093,
-            "44": 0.17996,
-            "45": 0.18187,
-            "46": 0.18178,
-            "47": 0.1859,
-            "48": 0.18306,
-            "49": 0.18442,
-            "50": 0.17901,
-            "51": 0.19352,
-            "52": 0.19143,
-            "53": 0.18977,
-            "54": 0.18373,
-            "55": 0.1848,
-            "56": 0.18899,
-            "57": 0.18927,
-            "58": 0.18981,
-            "59": 0.18717,
-            "60": 0.18468,
-            "61": 0.18658,
-            "62": 0.18885,
-            "63": 0.18928,
-            "64": 0.18734,
-            "65": 0.18347,
-            "66": 0.18338,
-            "67": 0.18495,
-            "68": 0.19141,
-            "69": 0.18134,
-            "70": 0.18277,
-            "71": 0.18011,
-            "72": 0.18334,
-            "73": 0.18723,
-            "74": 0.18857,
-            "75": 0.18474,
-            "76": 0.18198,
-            "77": 0.18177,
-            "78": 0.18552,
-            "79": 0.18363,
-            "80": 0.18411,
-            "81": 0.18648,
-            "82": 0.18145,
-            "83": 0.1831,
-            "84": 0.18203,
-            "85": 0.18466,
-            "86": 0.17969,
-            "87": 0.18127,
-            "88": 0.18208,
-            "89": 0.18448,
-            "90": 0.2123,
-            "91": 0.18681,
-            "92": 0.18312,
-            "93": 0.18238,
-            "94": 0.18152,
-            "95": 0.17994,
-            "96": 0.18524,
-            "97": 0.18522,
-            "98": 0.18434,
-            "99": 0.19103,
-            "100": 0.19147
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index b61916ffd95..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.84163,
-            "2": 10.85598,
-            "3": 10.84413,
-            "4": 10.84124,
-            "5": 10.85872,
-            "6": 10.86316,
-            "7": 10.85184,
-            "8": 10.84645,
-            "9": 10.85647,
-            "10": 10.81849,
-            "11": 10.85923,
-            "12": 10.84285,
-            "13": 10.86432,
-            "14": 10.85423,
-            "15": 10.81015,
-            "16": 10.81588,
-            "17": 10.78949,
-            "18": 10.79683,
-            "19": 10.79073,
-            "20": 10.70819,
-            "21": 10.69322,
-            "22": 10.58504,
-            "23": 10.70217,
-            "24": 10.60546,
-            "25": 10.57102,
-            "26": 10.61967,
-            "27": 10.61501,
-            "28": 10.56369,
-            "29": 10.56725,
-            "30": 10.39695,
-            "31": 10.16591,
-            "32": 10.4573,
-            "33": 10.45199,
-            "34": 10.2392,
-            "35": 10.28351,
-            "36": 10.24677,
-            "37": 10.3427,
-            "38": 10.20546,
-            "39": 10.39187,
-            "40": 10.09767,
-            "41": 10.1526,
-            "42": 10.21051,
-            "43": 9.87726,
-            "44": 9.98291,
-            "45": 9.86165,
-            "46": 9.83587,
-            "47": 10.13369,
-            "48": 9.87212,
-            "49": 9.56121,
-            "50": 9.91045,
-            "51": 9.85839,
-            "52": 9.7506,
-            "53": 10.05817,
-            "54": 9.96076,
-            "55": 9.88738,
-            "56": 9.6344,
-            "57": 9.4967,
-            "58": 9.83343,
-            "59": 9.59391,
-            "60": 9.51376,
-            "61": 9.69928,
-            "62": 9.98089,
-            "63": 9.39065,
-            "64": 9.77599,
-            "65": 8.9571,
-            "66": 9.70054,
-            "67": 9.37,
-            "68": 9.78529,
-            "69": 9.78966,
-            "70": 9.74676,
-            "71": 9.61906,
-            "72": 9.58963,
-            "73": 9.49629,
-            "74": 8.94963,
-            "75": 9.42381,
-            "76": 9.07799,
-            "77": 10.07105,
-            "78": 9.72632,
-            "79": 9.37966,
-            "80": 9.40721,
-            "81": 9.48238,
-            "82": 9.70152,
-            "83": 9.30657,
-            "84": 9.41464,
-            "85": 9.61784,
-            "86": 9.08212,
-            "87": 9.59511,
-            "88": 9.75008,
-            "89": 9.60356,
-            "90": 9.82256,
-            "91": 9.33721,
-            "92": 9.35861,
-            "93": 9.07956,
-            "94": 8.83268,
-            "95": 9.51351,
-            "96": 9.52947,
-            "97": 9.31813,
-            "98": 9.67451,
-            "99": 8.88607,
-            "100": 9.40106
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1736.0,
-            "2": 1692.0,
-            "3": 1695.0,
-            "4": 1761.0,
-            "5": 1955.0,
-            "6": 1791.0,
-            "7": 1943.0,
-            "8": 1681.0,
-            "9": 1884.0,
-            "10": 1441.0,
-            "11": 1942.0,
-            "12": 1786.0,
-            "13": 1940.0,
-            "14": 1862.0,
-            "15": 1907.0,
-            "16": 1947.0,
-            "17": 1827.0,
-            "18": 1907.0,
-            "19": 1818.0,
-            "20": 1700.0,
-            "21": 1911.0,
-            "22": 1720.0,
-            "23": 1938.0,
-            "24": 1707.0,
-            "25": 1686.0,
-            "26": 1792.0,
-            "27": 1891.0,
-            "28": 1976.0,
-            "29": 1958.0,
-            "30": 1941.0,
-            "31": 1622.0,
-            "32": 1970.0,
-            "33": 2129.0,
-            "34": 1830.0,
-            "35": 1907.0,
-            "36": 1892.0,
-            "37": 2395.0,
-            "38": 2161.0,
-            "39": 2493.0,
-            "40": 2224.0,
-            "41": 2201.0,
-            "42": 2175.0,
-            "43": 1920.0,
-            "44": 1955.0,
-            "45": 1956.0,
-            "46": 2166.0,
-            "47": 2517.0,
-            "48": 2272.0,
-            "49": 2211.0,
-            "50": 2232.0,
-            "51": 2621.0,
-            "52": 2597.0,
-            "53": 2926.0,
-            "54": 2633.0,
-            "55": 2206.0,
-            "56": 2627.0,
-            "57": 2328.0,
-            "58": 2886.0,
-            "59": 2639.0,
-            "60": 2157.0,
-            "61": 2736.0,
-            "62": 2544.0,
-            "63": 2332.0,
-            "64": 2948.0,
-            "65": 2630.0,
-            "66": 2931.0,
-            "67": 2717.0,
-            "68": 2643.0,
-            "69": 2955.0,
-            "70": 3040.0,
-            "71": 2882.0,
-            "72": 2390.0,
-            "73": 2812.0,
-            "74": 1844.0,
-            "75": 2461.0,
-            "76": 3067.0,
-            "77": 3152.0,
-            "78": 3018.0,
-            "79": 3008.0,
-            "80": 3104.0,
-            "81": 3589.0,
-            "82": 3218.0,
-            "83": 2748.0,
-            "84": 3217.0,
-            "85": 3167.0,
-            "86": 2876.0,
-            "87": 3604.0,
-            "88": 3017.0,
-            "89": 3249.0,
-            "90": 3069.0,
-            "91": 2865.0,
-            "92": 3074.0,
-            "93": 2680.0,
-            "94": 3392.0,
-            "95": 3206.0,
-            "96": 3401.0,
-            "97": 3107.0,
-            "98": 3624.0,
-            "99": 3007.0,
-            "100": 3111.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 299203072.0,
-            "2": 299203072.0,
-            "3": 299203072.0,
-            "4": 299203072.0,
-            "5": 299203072.0,
-            "6": 299203072.0,
-            "7": 299203072.0,
-            "8": 299203072.0,
-            "9": 299203072.0,
-            "10": 299203072.0,
-            "11": 299203072.0,
-            "12": 299203072.0,
-            "13": 299203072.0,
-            "14": 299203072.0,
-            "15": 299203072.0,
-            "16": 299203072.0,
-            "17": 299203072.0,
-            "18": 299203072.0,
-            "19": 299203072.0,
-            "20": 299203072.0,
-            "21": 299203072.0,
-            "22": 299203072.0,
-            "23": 299203072.0,
-            "24": 299203072.0,
-            "25": 299203072.0,
-            "26": 299203072.0,
-            "27": 299203072.0,
-            "28": 299203072.0,
-            "29": 299203072.0,
-            "30": 299203072.0,
-            "31": 299203072.0,
-            "32": 299203072.0,
-            "33": 299203072.0,
-            "34": 299203072.0,
-            "35": 299203072.0,
-            "36": 299203072.0,
-            "37": 299203072.0,
-            "38": 299203072.0,
-            "39": 299203072.0,
-            "40": 299203072.0,
-            "41": 299203072.0,
-            "42": 299203072.0,
-            "43": 299203072.0,
-            "44": 299203072.0,
-            "45": 299203072.0,
-            "46": 299203072.0,
-            "47": 299203072.0,
-            "48": 299203072.0,
-            "49": 299203072.0,
-            "50": 299203072.0,
-            "51": 299203072.0,
-            "52": 299203072.0,
-            "53": 299203072.0,
-            "54": 299203072.0,
-            "55": 299203072.0,
-            "56": 299203072.0,
-            "57": 299203072.0,
-            "58": 299203072.0,
-            "59": 299203072.0,
-            "60": 299203072.0,
-            "61": 299203072.0,
-            "62": 299203072.0,
-            "63": 299203072.0,
-            "64": 299203072.0,
-            "65": 299203072.0,
-            "66": 299203072.0,
-            "67": 299203072.0,
-            "68": 299203072.0,
-            "69": 299203072.0,
-            "70": 299203072.0,
-            "71": 299203072.0,
-            "72": 299203072.0,
-            "73": 299203072.0,
-            "74": 299203072.0,
-            "75": 299203072.0,
-            "76": 299203072.0,
-            "77": 299203072.0,
-            "78": 299203072.0,
-            "79": 299203072.0,
-            "80": 299203072.0,
-            "81": 299203072.0,
-            "82": 299203072.0,
-            "83": 299203072.0,
-            "84": 299203072.0,
-            "85": 299203072.0,
-            "86": 299203072.0,
-            "87": 299203072.0,
-            "88": 299203072.0,
-            "89": 299203072.0,
-            "90": 299203072.0,
-            "91": 299203072.0,
-            "92": 299203072.0,
-            "93": 299203072.0,
-            "94": 299203072.0,
-            "95": 299203072.0,
-            "96": 299203072.0,
-            "97": 299203072.0,
-            "98": 299203072.0,
-            "99": 299203072.0,
-            "100": 299203072.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 977125888.0,
-            "2": 1042071040.0,
-            "3": 1042071040.0,
-            "4": 1042071040.0,
-            "5": 1042071040.0,
-            "6": 1042071040.0,
-            "7": 1042071040.0,
-            "8": 1042071040.0,
-            "9": 1042071040.0,
-            "10": 1042071040.0,
-            "11": 1042071040.0,
-            "12": 1042071040.0,
-            "13": 1042071040.0,
-            "14": 1042071040.0,
-            "15": 1042071040.0,
-            "16": 1042071040.0,
-            "17": 1042071040.0,
-            "18": 1042071040.0,
-            "19": 1042071040.0,
-            "20": 1042071040.0,
-            "21": 1042071040.0,
-            "22": 1042071040.0,
-            "23": 1042071040.0,
-            "24": 1042071040.0,
-            "25": 1042071040.0,
-            "26": 1042071040.0,
-            "27": 1042071040.0,
-            "28": 1042071040.0,
-            "29": 1042071040.0,
-            "30": 1042071040.0,
-            "31": 1042071040.0,
-            "32": 1042071040.0,
-            "33": 1042071040.0,
-            "34": 1042071040.0,
-            "35": 1042071040.0,
-            "36": 1042071040.0,
-            "37": 1042071040.0,
-            "38": 1042071040.0,
-            "39": 1042071040.0,
-            "40": 1042071040.0,
-            "41": 1042071040.0,
-            "42": 1042071040.0,
-            "43": 1042071040.0,
-            "44": 1042071040.0,
-            "45": 1042071040.0,
-            "46": 1042071040.0,
-            "47": 1042071040.0,
-            "48": 1042071040.0,
-            "49": 1042071040.0,
-            "50": 1042071040.0,
-            "51": 1042071040.0,
-            "52": 1042071040.0,
-            "53": 1042071040.0,
-            "54": 1042071040.0,
-            "55": 1042071040.0,
-            "56": 1042071040.0,
-            "57": 1042071040.0,
-            "58": 1042071040.0,
-            "59": 1042071040.0,
-            "60": 1042071040.0,
-            "61": 1042071040.0,
-            "62": 1042071040.0,
-            "63": 1042071040.0,
-            "64": 1042071040.0,
-            "65": 1042071040.0,
-            "66": 1042071040.0,
-            "67": 1042071040.0,
-            "68": 1042071040.0,
-            "69": 1042071040.0,
-            "70": 1042071040.0,
-            "71": 1042071040.0,
-            "72": 1042071040.0,
-            "73": 1042071040.0,
-            "74": 1042071040.0,
-            "75": 1042071040.0,
-            "76": 1042071040.0,
-            "77": 1042071040.0,
-            "78": 1042071040.0,
-            "79": 1042071040.0,
-            "80": 1042071040.0,
-            "81": 1042071040.0,
-            "82": 1042071040.0,
-            "83": 1042071040.0,
-            "84": 1042071040.0,
-            "85": 1042071040.0,
-            "86": 1042071040.0,
-            "87": 1042071040.0,
-            "88": 1042071040.0,
-            "89": 1042071040.0,
-            "90": 1042071040.0,
-            "91": 1042071040.0,
-            "92": 1042071040.0,
-            "93": 1042071040.0,
-            "94": 1042071040.0,
-            "95": 1042071040.0,
-            "96": 1042071040.0,
-            "97": 1042071040.0,
-            "98": 1042071040.0,
-            "99": 1042071040.0,
-            "100": 1042071040.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 9.40872,
-            "2": 0.25886,
-            "3": 0.22849,
-            "4": 0.21099,
-            "5": 0.21193,
-            "6": 0.20863,
-            "7": 0.20987,
-            "8": 0.21014,
-            "9": 0.21139,
-            "10": 0.21148,
-            "11": 0.21513,
-            "12": 0.21915,
-            "13": 0.21037,
-            "14": 0.20786,
-            "15": 0.20927,
-            "16": 0.20756,
-            "17": 0.21005,
-            "18": 0.21022,
-            "19": 0.21019,
-            "20": 0.21012,
-            "21": 0.20995,
-            "22": 0.21005,
-            "23": 0.21213,
-            "24": 0.20995,
-            "25": 0.20776,
-            "26": 0.21296,
-            "27": 0.20984,
-            "28": 0.21526,
-            "29": 0.21164,
-            "30": 0.21175,
-            "31": 0.21062,
-            "32": 0.21292,
-            "33": 0.20962,
-            "34": 0.21025,
-            "35": 0.20968,
-            "36": 0.21367,
-            "37": 0.20989,
-            "38": 0.21034,
-            "39": 0.20979,
-            "40": 0.21092,
-            "41": 0.21065,
-            "42": 0.20865,
-            "43": 0.20939,
-            "44": 0.21656,
-            "45": 0.21131,
-            "46": 0.21087,
-            "47": 0.23723,
-            "48": 0.21006,
-            "49": 0.21157,
-            "50": 0.20975,
-            "51": 0.21952,
-            "52": 0.21306,
-            "53": 0.21253,
-            "54": 0.21223,
-            "55": 0.21336,
-            "56": 0.21514,
-            "57": 0.21536,
-            "58": 0.21288,
-            "59": 0.21211,
-            "60": 0.21298,
-            "61": 0.21285,
-            "62": 0.21438,
-            "63": 0.21461,
-            "64": 0.21382,
-            "65": 0.22082,
-            "66": 0.21222,
-            "67": 0.21414,
-            "68": 0.21315,
-            "69": 0.2153,
-            "70": 0.2172,
-            "71": 0.21323,
-            "72": 0.21366,
-            "73": 0.21434,
-            "74": 0.21455,
-            "75": 0.21545,
-            "76": 0.21631,
-            "77": 0.21419,
-            "78": 0.21365,
-            "79": 0.21514,
-            "80": 0.21447,
-            "81": 0.21379,
-            "82": 0.21487,
-            "83": 0.21038,
-            "84": 0.21708,
-            "85": 0.21166,
-            "86": 0.2141,
-            "87": 0.21613,
-            "88": 0.21214,
-            "89": 0.21499,
-            "90": 0.21811,
-            "91": 0.21563,
-            "92": 0.2152,
-            "93": 0.21548,
-            "94": 0.21863,
-            "95": 0.21366,
-            "96": 0.21458,
-            "97": 0.21279,
-            "98": 0.21555,
-            "99": 0.213,
-            "100": 0.2112
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_h100.json
index c387be284cf..6937fb9bd55 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_h100.json
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 9.66271,
-            "2": 0.23225,
-            "3": 0.21983,
-            "4": 0.21408,
-            "5": 0.21473,
-            "6": 0.21644,
-            "7": 0.21513,
-            "8": 0.21892,
-            "9": 0.21351,
-            "10": 0.21576,
-            "11": 0.21747,
-            "12": 0.21985,
-            "13": 0.21564,
-            "14": 0.2155,
-            "15": 0.21384,
-            "16": 0.2162,
-            "17": 0.21558,
-            "18": 0.21508,
-            "19": 0.21618,
-            "20": 0.21836,
-            "21": 0.21423,
-            "22": 0.21684,
-            "23": 0.21439,
-            "24": 0.21562,
-            "25": 0.21579,
-            "26": 0.21914,
-            "27": 0.21564,
-            "28": 0.21449,
-            "29": 0.22032,
-            "30": 0.22136,
-            "31": 0.22263,
-            "32": 0.21897,
-            "33": 0.21534,
-            "34": 0.21759,
-            "35": 0.21572,
-            "36": 0.21721,
-            "37": 0.21402,
-            "38": 0.21621,
-            "39": 0.21783,
-            "40": 0.21822,
-            "41": 0.21596,
-            "42": 0.21203,
-            "43": 0.21782,
-            "44": 0.21805,
-            "45": 0.2183,
-            "46": 0.21676,
-            "47": 0.21734,
-            "48": 0.2176,
-            "49": 0.21836,
-            "50": 0.21593,
-            "51": 0.22189,
-            "52": 0.21722,
-            "53": 0.22114,
-            "54": 0.21648,
-            "55": 0.21825,
-            "56": 0.21733,
-            "57": 0.21702,
-            "58": 0.21752,
-            "59": 0.21546,
-            "60": 0.2151,
-            "61": 0.21602,
-            "62": 0.22135,
-            "63": 0.21659,
-            "64": 0.21618,
-            "65": 0.21569,
-            "66": 0.21864,
-            "67": 0.22799,
-            "68": 0.21833,
-            "69": 0.21643,
-            "70": 0.21672,
-            "71": 0.21562,
-            "72": 0.21799,
-            "73": 0.21791,
-            "74": 0.21898,
-            "75": 0.2183,
-            "76": 0.22117,
-            "77": 0.22,
-            "78": 0.2188,
-            "79": 0.21888,
-            "80": 0.21768,
-            "81": 0.22547,
-            "82": 0.2175,
-            "83": 0.2222,
-            "84": 0.21749,
-            "85": 0.22304,
-            "86": 0.22141,
-            "87": 0.22658,
-            "88": 0.21977,
-            "89": 0.21928,
-            "90": 0.21911,
-            "91": 0.22126,
-            "92": 0.21903,
-            "93": 0.22164,
-            "94": 0.21864,
-            "95": 0.21968,
-            "96": 0.21892,
-            "97": 0.21956,
-            "98": 0.21795,
-            "99": 0.22313,
-            "100": 0.2196
+            "1": 9.71841,
+            "2": 0.23136,
+            "3": 0.22493,
+            "4": 0.22779,
+            "5": 0.22663,
+            "6": 0.22036,
+            "7": 0.23806,
+            "8": 0.23483,
+            "9": 0.21894,
+            "10": 0.22798,
+            "11": 0.22166,
+            "12": 0.22477,
+            "13": 0.21586,
+            "14": 0.2289,
+            "15": 0.21846,
+            "16": 0.22439,
+            "17": 0.22351,
+            "18": 0.21894,
+            "19": 0.22165,
+            "20": 0.23,
+            "21": 0.21688,
+            "22": 0.21901,
+            "23": 0.21714,
+            "24": 0.2185,
+            "25": 0.21681,
+            "26": 0.21775,
+            "27": 0.21816,
+            "28": 0.21837,
+            "29": 0.21776,
+            "30": 0.21739,
+            "31": 0.21725,
+            "32": 0.21929,
+            "33": 0.2156,
+            "34": 0.21959,
+            "35": 0.21865,
+            "36": 0.21696,
+            "37": 0.21952,
+            "38": 0.21797,
+            "39": 0.21568,
+            "40": 0.21803,
+            "41": 0.21756,
+            "42": 0.21877,
+            "43": 0.21676,
+            "44": 0.21677,
+            "45": 0.21721,
+            "46": 0.22075,
+            "47": 0.21856,
+            "48": 0.21933,
+            "49": 0.21808,
+            "50": 0.21813,
+            "51": 0.22296,
+            "52": 0.22336,
+            "53": 0.21692,
+            "54": 0.21796,
+            "55": 0.21788,
+            "56": 0.22002,
+            "57": 0.21845,
+            "58": 0.21989,
+            "59": 0.21686,
+            "60": 0.22032,
+            "61": 0.22127,
+            "62": 0.21716,
+            "63": 0.21811,
+            "64": 0.21821,
+            "65": 0.22368,
+            "66": 0.22001,
+            "67": 0.21796,
+            "68": 0.21889,
+            "69": 0.22034,
+            "70": 0.2227,
+            "71": 0.2211,
+            "72": 0.2167,
+            "73": 0.21687,
+            "74": 0.22416,
+            "75": 0.22056,
+            "76": 0.22116,
+            "77": 0.21759,
+            "78": 0.21843,
+            "79": 0.22272,
+            "80": 0.21922,
+            "81": 0.2196,
+            "82": 0.22739,
+            "83": 0.22344,
+            "84": 0.21981,
+            "85": 0.22041,
+            "86": 0.22015,
+            "87": 0.21885,
+            "88": 0.2239,
+            "89": 0.22975,
+            "90": 0.23365,
+            "91": 0.22476,
+            "92": 0.22336,
+            "93": 0.21913,
+            "94": 0.22057,
+            "95": 0.21711,
+            "96": 0.21724,
+            "97": 0.22153,
+            "98": 0.21996,
+            "99": 0.21866,
+            "100": 0.21935
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 0a3544b2d93..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.84163,
-            "2": 10.85598,
-            "3": 10.84413,
-            "4": 10.84124,
-            "5": 10.85872,
-            "6": 10.86316,
-            "7": 10.85184,
-            "8": 10.84645,
-            "9": 10.85647,
-            "10": 10.81849,
-            "11": 10.85923,
-            "12": 10.84285,
-            "13": 10.86432,
-            "14": 10.85423,
-            "15": 10.81015,
-            "16": 10.81588,
-            "17": 10.78949,
-            "18": 10.79683,
-            "19": 10.79073,
-            "20": 10.70819,
-            "21": 10.69322,
-            "22": 10.58504,
-            "23": 10.70217,
-            "24": 10.60546,
-            "25": 10.57102,
-            "26": 10.61967,
-            "27": 10.61501,
-            "28": 10.56369,
-            "29": 10.56725,
-            "30": 10.39695,
-            "31": 10.16591,
-            "32": 10.4573,
-            "33": 10.45199,
-            "34": 10.2392,
-            "35": 10.28351,
-            "36": 10.24677,
-            "37": 10.3427,
-            "38": 10.20546,
-            "39": 10.39187,
-            "40": 10.09767,
-            "41": 10.1526,
-            "42": 10.21051,
-            "43": 9.87726,
-            "44": 9.98291,
-            "45": 9.86165,
-            "46": 9.83587,
-            "47": 10.13369,
-            "48": 9.87212,
-            "49": 9.56121,
-            "50": 9.91045,
-            "51": 9.85839,
-            "52": 9.7506,
-            "53": 10.05817,
-            "54": 9.96076,
-            "55": 9.88738,
-            "56": 9.6344,
-            "57": 9.4967,
-            "58": 9.83343,
-            "59": 9.59391,
-            "60": 9.51376,
-            "61": 9.69928,
-            "62": 9.98089,
-            "63": 9.39065,
-            "64": 9.77599,
-            "65": 8.9571,
-            "66": 9.70054,
-            "67": 9.37,
-            "68": 9.78529,
-            "69": 9.78966,
-            "70": 9.74676,
-            "71": 9.61906,
-            "72": 9.58963,
-            "73": 9.49629,
-            "74": 8.94963,
-            "75": 9.42381,
-            "76": 9.07799,
-            "77": 10.07105,
-            "78": 9.72632,
-            "79": 9.37966,
-            "80": 9.40721,
-            "81": 9.48238,
-            "82": 9.70152,
-            "83": 9.30657,
-            "84": 9.41464,
-            "85": 9.61784,
-            "86": 9.08212,
-            "87": 9.59511,
-            "88": 9.75008,
-            "89": 9.60356,
-            "90": 9.82256,
-            "91": 9.33721,
-            "92": 9.35861,
-            "93": 9.07956,
-            "94": 8.83268,
-            "95": 9.51351,
-            "96": 9.52947,
-            "97": 9.31813,
-            "98": 9.67451,
-            "99": 8.88607,
-            "100": 9.40106
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1736.0,
-            "2": 1692.0,
-            "3": 1695.0,
-            "4": 1761.0,
-            "5": 1955.0,
-            "6": 1791.0,
-            "7": 1943.0,
-            "8": 1681.0,
-            "9": 1884.0,
-            "10": 1441.0,
-            "11": 1942.0,
-            "12": 1786.0,
-            "13": 1940.0,
-            "14": 1862.0,
-            "15": 1907.0,
-            "16": 1947.0,
-            "17": 1827.0,
-            "18": 1907.0,
-            "19": 1818.0,
-            "20": 1700.0,
-            "21": 1911.0,
-            "22": 1720.0,
-            "23": 1938.0,
-            "24": 1707.0,
-            "25": 1686.0,
-            "26": 1792.0,
-            "27": 1891.0,
-            "28": 1976.0,
-            "29": 1958.0,
-            "30": 1941.0,
-            "31": 1622.0,
-            "32": 1970.0,
-            "33": 2129.0,
-            "34": 1830.0,
-            "35": 1907.0,
-            "36": 1892.0,
-            "37": 2395.0,
-            "38": 2161.0,
-            "39": 2493.0,
-            "40": 2224.0,
-            "41": 2201.0,
-            "42": 2175.0,
-            "43": 1920.0,
-            "44": 1955.0,
-            "45": 1956.0,
-            "46": 2166.0,
-            "47": 2517.0,
-            "48": 2272.0,
-            "49": 2211.0,
-            "50": 2232.0,
-            "51": 2621.0,
-            "52": 2597.0,
-            "53": 2926.0,
-            "54": 2633.0,
-            "55": 2206.0,
-            "56": 2627.0,
-            "57": 2328.0,
-            "58": 2886.0,
-            "59": 2639.0,
-            "60": 2157.0,
-            "61": 2736.0,
-            "62": 2544.0,
-            "63": 2332.0,
-            "64": 2948.0,
-            "65": 2630.0,
-            "66": 2931.0,
-            "67": 2717.0,
-            "68": 2643.0,
-            "69": 2955.0,
-            "70": 3040.0,
-            "71": 2882.0,
-            "72": 2390.0,
-            "73": 2812.0,
-            "74": 1844.0,
-            "75": 2461.0,
-            "76": 3067.0,
-            "77": 3152.0,
-            "78": 3018.0,
-            "79": 3008.0,
-            "80": 3104.0,
-            "81": 3589.0,
-            "82": 3218.0,
-            "83": 2748.0,
-            "84": 3217.0,
-            "85": 3167.0,
-            "86": 2876.0,
-            "87": 3604.0,
-            "88": 3017.0,
-            "89": 3249.0,
-            "90": 3069.0,
-            "91": 2865.0,
-            "92": 3074.0,
-            "93": 2680.0,
-            "94": 3392.0,
-            "95": 3206.0,
-            "96": 3401.0,
-            "97": 3107.0,
-            "98": 3624.0,
-            "99": 3007.0,
-            "100": 3111.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 299203072.0,
-            "2": 299203072.0,
-            "3": 299203072.0,
-            "4": 299203072.0,
-            "5": 299203072.0,
-            "6": 299203072.0,
-            "7": 299203072.0,
-            "8": 299203072.0,
-            "9": 299203072.0,
-            "10": 299203072.0,
-            "11": 299203072.0,
-            "12": 299203072.0,
-            "13": 299203072.0,
-            "14": 299203072.0,
-            "15": 299203072.0,
-            "16": 299203072.0,
-            "17": 299203072.0,
-            "18": 299203072.0,
-            "19": 299203072.0,
-            "20": 299203072.0,
-            "21": 299203072.0,
-            "22": 299203072.0,
-            "23": 299203072.0,
-            "24": 299203072.0,
-            "25": 299203072.0,
-            "26": 299203072.0,
-            "27": 299203072.0,
-            "28": 299203072.0,
-            "29": 299203072.0,
-            "30": 299203072.0,
-            "31": 299203072.0,
-            "32": 299203072.0,
-            "33": 299203072.0,
-            "34": 299203072.0,
-            "35": 299203072.0,
-            "36": 299203072.0,
-            "37": 299203072.0,
-            "38": 299203072.0,
-            "39": 299203072.0,
-            "40": 299203072.0,
-            "41": 299203072.0,
-            "42": 299203072.0,
-            "43": 299203072.0,
-            "44": 299203072.0,
-            "45": 299203072.0,
-            "46": 299203072.0,
-            "47": 299203072.0,
-            "48": 299203072.0,
-            "49": 299203072.0,
-            "50": 299203072.0,
-            "51": 299203072.0,
-            "52": 299203072.0,
-            "53": 299203072.0,
-            "54": 299203072.0,
-            "55": 299203072.0,
-            "56": 299203072.0,
-            "57": 299203072.0,
-            "58": 299203072.0,
-            "59": 299203072.0,
-            "60": 299203072.0,
-            "61": 299203072.0,
-            "62": 299203072.0,
-            "63": 299203072.0,
-            "64": 299203072.0,
-            "65": 299203072.0,
-            "66": 299203072.0,
-            "67": 299203072.0,
-            "68": 299203072.0,
-            "69": 299203072.0,
-            "70": 299203072.0,
-            "71": 299203072.0,
-            "72": 299203072.0,
-            "73": 299203072.0,
-            "74": 299203072.0,
-            "75": 299203072.0,
-            "76": 299203072.0,
-            "77": 299203072.0,
-            "78": 299203072.0,
-            "79": 299203072.0,
-            "80": 299203072.0,
-            "81": 299203072.0,
-            "82": 299203072.0,
-            "83": 299203072.0,
-            "84": 299203072.0,
-            "85": 299203072.0,
-            "86": 299203072.0,
-            "87": 299203072.0,
-            "88": 299203072.0,
-            "89": 299203072.0,
-            "90": 299203072.0,
-            "91": 299203072.0,
-            "92": 299203072.0,
-            "93": 299203072.0,
-            "94": 299203072.0,
-            "95": 299203072.0,
-            "96": 299203072.0,
-            "97": 299203072.0,
-            "98": 299203072.0,
-            "99": 299203072.0,
-            "100": 299203072.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 977125888.0,
-            "2": 1042071040.0,
-            "3": 1042071040.0,
-            "4": 1042071040.0,
-            "5": 1042071040.0,
-            "6": 1042071040.0,
-            "7": 1042071040.0,
-            "8": 1042071040.0,
-            "9": 1042071040.0,
-            "10": 1042071040.0,
-            "11": 1042071040.0,
-            "12": 1042071040.0,
-            "13": 1042071040.0,
-            "14": 1042071040.0,
-            "15": 1042071040.0,
-            "16": 1042071040.0,
-            "17": 1042071040.0,
-            "18": 1042071040.0,
-            "19": 1042071040.0,
-            "20": 1042071040.0,
-            "21": 1042071040.0,
-            "22": 1042071040.0,
-            "23": 1042071040.0,
-            "24": 1042071040.0,
-            "25": 1042071040.0,
-            "26": 1042071040.0,
-            "27": 1042071040.0,
-            "28": 1042071040.0,
-            "29": 1042071040.0,
-            "30": 1042071040.0,
-            "31": 1042071040.0,
-            "32": 1042071040.0,
-            "33": 1042071040.0,
-            "34": 1042071040.0,
-            "35": 1042071040.0,
-            "36": 1042071040.0,
-            "37": 1042071040.0,
-            "38": 1042071040.0,
-            "39": 1042071040.0,
-            "40": 1042071040.0,
-            "41": 1042071040.0,
-            "42": 1042071040.0,
-            "43": 1042071040.0,
-            "44": 1042071040.0,
-            "45": 1042071040.0,
-            "46": 1042071040.0,
-            "47": 1042071040.0,
-            "48": 1042071040.0,
-            "49": 1042071040.0,
-            "50": 1042071040.0,
-            "51": 1042071040.0,
-            "52": 1042071040.0,
-            "53": 1042071040.0,
-            "54": 1042071040.0,
-            "55": 1042071040.0,
-            "56": 1042071040.0,
-            "57": 1042071040.0,
-            "58": 1042071040.0,
-            "59": 1042071040.0,
-            "60": 1042071040.0,
-            "61": 1042071040.0,
-            "62": 1042071040.0,
-            "63": 1042071040.0,
-            "64": 1042071040.0,
-            "65": 1042071040.0,
-            "66": 1042071040.0,
-            "67": 1042071040.0,
-            "68": 1042071040.0,
-            "69": 1042071040.0,
-            "70": 1042071040.0,
-            "71": 1042071040.0,
-            "72": 1042071040.0,
-            "73": 1042071040.0,
-            "74": 1042071040.0,
-            "75": 1042071040.0,
-            "76": 1042071040.0,
-            "77": 1042071040.0,
-            "78": 1042071040.0,
-            "79": 1042071040.0,
-            "80": 1042071040.0,
-            "81": 1042071040.0,
-            "82": 1042071040.0,
-            "83": 1042071040.0,
-            "84": 1042071040.0,
-            "85": 1042071040.0,
-            "86": 1042071040.0,
-            "87": 1042071040.0,
-            "88": 1042071040.0,
-            "89": 1042071040.0,
-            "90": 1042071040.0,
-            "91": 1042071040.0,
-            "92": 1042071040.0,
-            "93": 1042071040.0,
-            "94": 1042071040.0,
-            "95": 1042071040.0,
-            "96": 1042071040.0,
-            "97": 1042071040.0,
-            "98": 1042071040.0,
-            "99": 1042071040.0,
-            "100": 1042071040.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 9.6125,
-            "2": 0.23356,
-            "3": 0.21314,
-            "4": 0.21148,
-            "5": 0.20775,
-            "6": 0.20509,
-            "7": 0.19583,
-            "8": 0.19566,
-            "9": 0.19148,
-            "10": 0.19484,
-            "11": 0.20705,
-            "12": 0.2015,
-            "13": 0.18887,
-            "14": 0.1904,
-            "15": 0.19036,
-            "16": 0.18983,
-            "17": 0.1895,
-            "18": 0.19146,
-            "19": 0.18958,
-            "20": 0.18946,
-            "21": 0.19061,
-            "22": 0.19252,
-            "23": 0.18928,
-            "24": 0.19105,
-            "25": 0.18924,
-            "26": 0.18957,
-            "27": 0.19008,
-            "28": 0.19134,
-            "29": 0.18909,
-            "30": 0.1922,
-            "31": 0.1908,
-            "32": 0.18951,
-            "33": 0.18928,
-            "34": 0.19468,
-            "35": 0.19052,
-            "36": 0.19049,
-            "37": 0.19173,
-            "38": 0.18825,
-            "39": 0.1911,
-            "40": 0.18942,
-            "41": 0.1919,
-            "42": 0.19303,
-            "43": 0.19325,
-            "44": 0.19049,
-            "45": 0.18935,
-            "46": 0.18861,
-            "47": 0.19155,
-            "48": 0.19149,
-            "49": 0.1913,
-            "50": 0.19586,
-            "51": 0.20004,
-            "52": 0.19367,
-            "53": 0.19138,
-            "54": 0.1927,
-            "55": 0.19196,
-            "56": 0.19084,
-            "57": 0.19081,
-            "58": 0.19132,
-            "59": 0.18829,
-            "60": 0.19212,
-            "61": 0.19275,
-            "62": 0.19577,
-            "63": 0.18781,
-            "64": 0.1893,
-            "65": 0.18899,
-            "66": 0.19016,
-            "67": 0.1858,
-            "68": 0.1931,
-            "69": 0.18841,
-            "70": 0.18896,
-            "71": 0.18966,
-            "72": 0.18842,
-            "73": 0.19129,
-            "74": 0.19147,
-            "75": 0.19408,
-            "76": 0.19017,
-            "77": 0.18501,
-            "78": 0.18992,
-            "79": 0.18844,
-            "80": 0.18811,
-            "81": 0.19097,
-            "82": 0.18879,
-            "83": 0.18908,
-            "84": 0.18763,
-            "85": 0.1877,
-            "86": 0.18953,
-            "87": 0.1893,
-            "88": 0.18802,
-            "89": 0.18961,
-            "90": 0.18878,
-            "91": 0.18927,
-            "92": 0.18915,
-            "93": 0.19047,
-            "94": 0.19,
-            "95": 0.19146,
-            "96": 0.19061,
-            "97": 0.1925,
-            "98": 0.18915,
-            "99": 0.18916,
-            "100": 0.19162
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index 6937fb9bd55..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.84163,
-            "2": 10.85598,
-            "3": 10.84413,
-            "4": 10.84124,
-            "5": 10.85872,
-            "6": 10.86316,
-            "7": 10.85184,
-            "8": 10.84645,
-            "9": 10.85647,
-            "10": 10.81849,
-            "11": 10.85923,
-            "12": 10.84285,
-            "13": 10.86432,
-            "14": 10.85423,
-            "15": 10.81015,
-            "16": 10.81588,
-            "17": 10.78949,
-            "18": 10.79683,
-            "19": 10.79073,
-            "20": 10.70819,
-            "21": 10.69322,
-            "22": 10.58504,
-            "23": 10.70217,
-            "24": 10.60546,
-            "25": 10.57102,
-            "26": 10.61967,
-            "27": 10.61501,
-            "28": 10.56369,
-            "29": 10.56725,
-            "30": 10.39695,
-            "31": 10.16591,
-            "32": 10.4573,
-            "33": 10.45199,
-            "34": 10.2392,
-            "35": 10.28351,
-            "36": 10.24677,
-            "37": 10.3427,
-            "38": 10.20546,
-            "39": 10.39187,
-            "40": 10.09767,
-            "41": 10.1526,
-            "42": 10.21051,
-            "43": 9.87726,
-            "44": 9.98291,
-            "45": 9.86165,
-            "46": 9.83587,
-            "47": 10.13369,
-            "48": 9.87212,
-            "49": 9.56121,
-            "50": 9.91045,
-            "51": 9.85839,
-            "52": 9.7506,
-            "53": 10.05817,
-            "54": 9.96076,
-            "55": 9.88738,
-            "56": 9.6344,
-            "57": 9.4967,
-            "58": 9.83343,
-            "59": 9.59391,
-            "60": 9.51376,
-            "61": 9.69928,
-            "62": 9.98089,
-            "63": 9.39065,
-            "64": 9.77599,
-            "65": 8.9571,
-            "66": 9.70054,
-            "67": 9.37,
-            "68": 9.78529,
-            "69": 9.78966,
-            "70": 9.74676,
-            "71": 9.61906,
-            "72": 9.58963,
-            "73": 9.49629,
-            "74": 8.94963,
-            "75": 9.42381,
-            "76": 9.07799,
-            "77": 10.07105,
-            "78": 9.72632,
-            "79": 9.37966,
-            "80": 9.40721,
-            "81": 9.48238,
-            "82": 9.70152,
-            "83": 9.30657,
-            "84": 9.41464,
-            "85": 9.61784,
-            "86": 9.08212,
-            "87": 9.59511,
-            "88": 9.75008,
-            "89": 9.60356,
-            "90": 9.82256,
-            "91": 9.33721,
-            "92": 9.35861,
-            "93": 9.07956,
-            "94": 8.83268,
-            "95": 9.51351,
-            "96": 9.52947,
-            "97": 9.31813,
-            "98": 9.67451,
-            "99": 8.88607,
-            "100": 9.40106
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1736.0,
-            "2": 1692.0,
-            "3": 1695.0,
-            "4": 1761.0,
-            "5": 1955.0,
-            "6": 1791.0,
-            "7": 1943.0,
-            "8": 1681.0,
-            "9": 1884.0,
-            "10": 1441.0,
-            "11": 1942.0,
-            "12": 1786.0,
-            "13": 1940.0,
-            "14": 1862.0,
-            "15": 1907.0,
-            "16": 1947.0,
-            "17": 1827.0,
-            "18": 1907.0,
-            "19": 1818.0,
-            "20": 1700.0,
-            "21": 1911.0,
-            "22": 1720.0,
-            "23": 1938.0,
-            "24": 1707.0,
-            "25": 1686.0,
-            "26": 1792.0,
-            "27": 1891.0,
-            "28": 1976.0,
-            "29": 1958.0,
-            "30": 1941.0,
-            "31": 1622.0,
-            "32": 1970.0,
-            "33": 2129.0,
-            "34": 1830.0,
-            "35": 1907.0,
-            "36": 1892.0,
-            "37": 2395.0,
-            "38": 2161.0,
-            "39": 2493.0,
-            "40": 2224.0,
-            "41": 2201.0,
-            "42": 2175.0,
-            "43": 1920.0,
-            "44": 1955.0,
-            "45": 1956.0,
-            "46": 2166.0,
-            "47": 2517.0,
-            "48": 2272.0,
-            "49": 2211.0,
-            "50": 2232.0,
-            "51": 2621.0,
-            "52": 2597.0,
-            "53": 2926.0,
-            "54": 2633.0,
-            "55": 2206.0,
-            "56": 2627.0,
-            "57": 2328.0,
-            "58": 2886.0,
-            "59": 2639.0,
-            "60": 2157.0,
-            "61": 2736.0,
-            "62": 2544.0,
-            "63": 2332.0,
-            "64": 2948.0,
-            "65": 2630.0,
-            "66": 2931.0,
-            "67": 2717.0,
-            "68": 2643.0,
-            "69": 2955.0,
-            "70": 3040.0,
-            "71": 2882.0,
-            "72": 2390.0,
-            "73": 2812.0,
-            "74": 1844.0,
-            "75": 2461.0,
-            "76": 3067.0,
-            "77": 3152.0,
-            "78": 3018.0,
-            "79": 3008.0,
-            "80": 3104.0,
-            "81": 3589.0,
-            "82": 3218.0,
-            "83": 2748.0,
-            "84": 3217.0,
-            "85": 3167.0,
-            "86": 2876.0,
-            "87": 3604.0,
-            "88": 3017.0,
-            "89": 3249.0,
-            "90": 3069.0,
-            "91": 2865.0,
-            "92": 3074.0,
-            "93": 2680.0,
-            "94": 3392.0,
-            "95": 3206.0,
-            "96": 3401.0,
-            "97": 3107.0,
-            "98": 3624.0,
-            "99": 3007.0,
-            "100": 3111.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 299203072.0,
-            "2": 299203072.0,
-            "3": 299203072.0,
-            "4": 299203072.0,
-            "5": 299203072.0,
-            "6": 299203072.0,
-            "7": 299203072.0,
-            "8": 299203072.0,
-            "9": 299203072.0,
-            "10": 299203072.0,
-            "11": 299203072.0,
-            "12": 299203072.0,
-            "13": 299203072.0,
-            "14": 299203072.0,
-            "15": 299203072.0,
-            "16": 299203072.0,
-            "17": 299203072.0,
-            "18": 299203072.0,
-            "19": 299203072.0,
-            "20": 299203072.0,
-            "21": 299203072.0,
-            "22": 299203072.0,
-            "23": 299203072.0,
-            "24": 299203072.0,
-            "25": 299203072.0,
-            "26": 299203072.0,
-            "27": 299203072.0,
-            "28": 299203072.0,
-            "29": 299203072.0,
-            "30": 299203072.0,
-            "31": 299203072.0,
-            "32": 299203072.0,
-            "33": 299203072.0,
-            "34": 299203072.0,
-            "35": 299203072.0,
-            "36": 299203072.0,
-            "37": 299203072.0,
-            "38": 299203072.0,
-            "39": 299203072.0,
-            "40": 299203072.0,
-            "41": 299203072.0,
-            "42": 299203072.0,
-            "43": 299203072.0,
-            "44": 299203072.0,
-            "45": 299203072.0,
-            "46": 299203072.0,
-            "47": 299203072.0,
-            "48": 299203072.0,
-            "49": 299203072.0,
-            "50": 299203072.0,
-            "51": 299203072.0,
-            "52": 299203072.0,
-            "53": 299203072.0,
-            "54": 299203072.0,
-            "55": 299203072.0,
-            "56": 299203072.0,
-            "57": 299203072.0,
-            "58": 299203072.0,
-            "59": 299203072.0,
-            "60": 299203072.0,
-            "61": 299203072.0,
-            "62": 299203072.0,
-            "63": 299203072.0,
-            "64": 299203072.0,
-            "65": 299203072.0,
-            "66": 299203072.0,
-            "67": 299203072.0,
-            "68": 299203072.0,
-            "69": 299203072.0,
-            "70": 299203072.0,
-            "71": 299203072.0,
-            "72": 299203072.0,
-            "73": 299203072.0,
-            "74": 299203072.0,
-            "75": 299203072.0,
-            "76": 299203072.0,
-            "77": 299203072.0,
-            "78": 299203072.0,
-            "79": 299203072.0,
-            "80": 299203072.0,
-            "81": 299203072.0,
-            "82": 299203072.0,
-            "83": 299203072.0,
-            "84": 299203072.0,
-            "85": 299203072.0,
-            "86": 299203072.0,
-            "87": 299203072.0,
-            "88": 299203072.0,
-            "89": 299203072.0,
-            "90": 299203072.0,
-            "91": 299203072.0,
-            "92": 299203072.0,
-            "93": 299203072.0,
-            "94": 299203072.0,
-            "95": 299203072.0,
-            "96": 299203072.0,
-            "97": 299203072.0,
-            "98": 299203072.0,
-            "99": 299203072.0,
-            "100": 299203072.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 977125888.0,
-            "2": 1042071040.0,
-            "3": 1042071040.0,
-            "4": 1042071040.0,
-            "5": 1042071040.0,
-            "6": 1042071040.0,
-            "7": 1042071040.0,
-            "8": 1042071040.0,
-            "9": 1042071040.0,
-            "10": 1042071040.0,
-            "11": 1042071040.0,
-            "12": 1042071040.0,
-            "13": 1042071040.0,
-            "14": 1042071040.0,
-            "15": 1042071040.0,
-            "16": 1042071040.0,
-            "17": 1042071040.0,
-            "18": 1042071040.0,
-            "19": 1042071040.0,
-            "20": 1042071040.0,
-            "21": 1042071040.0,
-            "22": 1042071040.0,
-            "23": 1042071040.0,
-            "24": 1042071040.0,
-            "25": 1042071040.0,
-            "26": 1042071040.0,
-            "27": 1042071040.0,
-            "28": 1042071040.0,
-            "29": 1042071040.0,
-            "30": 1042071040.0,
-            "31": 1042071040.0,
-            "32": 1042071040.0,
-            "33": 1042071040.0,
-            "34": 1042071040.0,
-            "35": 1042071040.0,
-            "36": 1042071040.0,
-            "37": 1042071040.0,
-            "38": 1042071040.0,
-            "39": 1042071040.0,
-            "40": 1042071040.0,
-            "41": 1042071040.0,
-            "42": 1042071040.0,
-            "43": 1042071040.0,
-            "44": 1042071040.0,
-            "45": 1042071040.0,
-            "46": 1042071040.0,
-            "47": 1042071040.0,
-            "48": 1042071040.0,
-            "49": 1042071040.0,
-            "50": 1042071040.0,
-            "51": 1042071040.0,
-            "52": 1042071040.0,
-            "53": 1042071040.0,
-            "54": 1042071040.0,
-            "55": 1042071040.0,
-            "56": 1042071040.0,
-            "57": 1042071040.0,
-            "58": 1042071040.0,
-            "59": 1042071040.0,
-            "60": 1042071040.0,
-            "61": 1042071040.0,
-            "62": 1042071040.0,
-            "63": 1042071040.0,
-            "64": 1042071040.0,
-            "65": 1042071040.0,
-            "66": 1042071040.0,
-            "67": 1042071040.0,
-            "68": 1042071040.0,
-            "69": 1042071040.0,
-            "70": 1042071040.0,
-            "71": 1042071040.0,
-            "72": 1042071040.0,
-            "73": 1042071040.0,
-            "74": 1042071040.0,
-            "75": 1042071040.0,
-            "76": 1042071040.0,
-            "77": 1042071040.0,
-            "78": 1042071040.0,
-            "79": 1042071040.0,
-            "80": 1042071040.0,
-            "81": 1042071040.0,
-            "82": 1042071040.0,
-            "83": 1042071040.0,
-            "84": 1042071040.0,
-            "85": 1042071040.0,
-            "86": 1042071040.0,
-            "87": 1042071040.0,
-            "88": 1042071040.0,
-            "89": 1042071040.0,
-            "90": 1042071040.0,
-            "91": 1042071040.0,
-            "92": 1042071040.0,
-            "93": 1042071040.0,
-            "94": 1042071040.0,
-            "95": 1042071040.0,
-            "96": 1042071040.0,
-            "97": 1042071040.0,
-            "98": 1042071040.0,
-            "99": 1042071040.0,
-            "100": 1042071040.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 9.71841,
-            "2": 0.23136,
-            "3": 0.22493,
-            "4": 0.22779,
-            "5": 0.22663,
-            "6": 0.22036,
-            "7": 0.23806,
-            "8": 0.23483,
-            "9": 0.21894,
-            "10": 0.22798,
-            "11": 0.22166,
-            "12": 0.22477,
-            "13": 0.21586,
-            "14": 0.2289,
-            "15": 0.21846,
-            "16": 0.22439,
-            "17": 0.22351,
-            "18": 0.21894,
-            "19": 0.22165,
-            "20": 0.23,
-            "21": 0.21688,
-            "22": 0.21901,
-            "23": 0.21714,
-            "24": 0.2185,
-            "25": 0.21681,
-            "26": 0.21775,
-            "27": 0.21816,
-            "28": 0.21837,
-            "29": 0.21776,
-            "30": 0.21739,
-            "31": 0.21725,
-            "32": 0.21929,
-            "33": 0.2156,
-            "34": 0.21959,
-            "35": 0.21865,
-            "36": 0.21696,
-            "37": 0.21952,
-            "38": 0.21797,
-            "39": 0.21568,
-            "40": 0.21803,
-            "41": 0.21756,
-            "42": 0.21877,
-            "43": 0.21676,
-            "44": 0.21677,
-            "45": 0.21721,
-            "46": 0.22075,
-            "47": 0.21856,
-            "48": 0.21933,
-            "49": 0.21808,
-            "50": 0.21813,
-            "51": 0.22296,
-            "52": 0.22336,
-            "53": 0.21692,
-            "54": 0.21796,
-            "55": 0.21788,
-            "56": 0.22002,
-            "57": 0.21845,
-            "58": 0.21989,
-            "59": 0.21686,
-            "60": 0.22032,
-            "61": 0.22127,
-            "62": 0.21716,
-            "63": 0.21811,
-            "64": 0.21821,
-            "65": 0.22368,
-            "66": 0.22001,
-            "67": 0.21796,
-            "68": 0.21889,
-            "69": 0.22034,
-            "70": 0.2227,
-            "71": 0.2211,
-            "72": 0.2167,
-            "73": 0.21687,
-            "74": 0.22416,
-            "75": 0.22056,
-            "76": 0.22116,
-            "77": 0.21759,
-            "78": 0.21843,
-            "79": 0.22272,
-            "80": 0.21922,
-            "81": 0.2196,
-            "82": 0.22739,
-            "83": 0.22344,
-            "84": 0.21981,
-            "85": 0.22041,
-            "86": 0.22015,
-            "87": 0.21885,
-            "88": 0.2239,
-            "89": 0.22975,
-            "90": 0.23365,
-            "91": 0.22476,
-            "92": 0.22336,
-            "93": 0.21913,
-            "94": 0.22057,
-            "95": 0.21711,
-            "96": 0.21724,
-            "97": 0.22153,
-            "98": 0.21996,
-            "99": 0.21866,
-            "100": 0.21935
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_dev_dgx_h100.json
index b194abf2755..d5d1de46cac 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_dev_dgx_h100.json
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 9.98808,
-            "2": 0.31896,
-            "3": 0.2872,
-            "4": 0.28844,
-            "5": 0.29055,
-            "6": 0.28565,
-            "7": 0.29151,
-            "8": 0.2909,
-            "9": 0.28554,
-            "10": 0.28532,
-            "11": 0.28987,
-            "12": 0.29026,
-            "13": 0.28704,
-            "14": 0.28868,
-            "15": 0.29081,
-            "16": 0.29135,
-            "17": 0.29053,
-            "18": 0.29219,
-            "19": 0.28784,
-            "20": 0.29358,
-            "21": 0.30495,
-            "22": 0.29941,
-            "23": 0.29122,
-            "24": 0.29122,
-            "25": 0.29408,
-            "26": 0.29093,
-            "27": 0.2904,
-            "28": 0.29116,
-            "29": 0.29607,
-            "30": 0.29163,
-            "31": 0.29002,
-            "32": 0.29186,
-            "33": 0.28732,
-            "34": 0.28673,
-            "35": 0.29062,
-            "36": 0.2913,
-            "37": 0.28723,
-            "38": 0.28871,
-            "39": 0.29253,
-            "40": 0.2884,
-            "41": 0.28738,
-            "42": 0.28836,
-            "43": 0.28808,
-            "44": 0.28794,
-            "45": 0.29124,
-            "46": 0.29271,
-            "47": 0.28573,
-            "48": 0.28587,
-            "49": 0.28908,
-            "50": 0.28839,
-            "51": 0.30021,
-            "52": 0.30654,
-            "53": 0.3059,
-            "54": 0.29714,
-            "55": 0.28911,
-            "56": 0.29586,
-            "57": 0.29074,
-            "58": 0.28682,
-            "59": 0.29439,
-            "60": 0.28999,
-            "61": 0.29254,
-            "62": 0.28813,
-            "63": 0.29743,
-            "64": 0.28913,
-            "65": 0.29726,
-            "66": 0.29597,
-            "67": 0.28858,
-            "68": 0.29025,
-            "69": 0.29089,
-            "70": 0.29517,
-            "71": 0.28924,
-            "72": 0.29291,
-            "73": 0.29626,
-            "74": 0.29034,
-            "75": 0.28667,
-            "76": 0.29537,
-            "77": 0.29663,
-            "78": 0.29518,
-            "79": 0.29485,
-            "80": 0.29784,
-            "81": 0.2912,
-            "82": 0.29265,
-            "83": 0.29806,
-            "84": 0.29292,
-            "85": 0.29315,
-            "86": 0.31345,
-            "87": 0.31236,
-            "88": 0.29799,
-            "89": 0.2941,
-            "90": 0.29816,
-            "91": 0.29109,
-            "92": 0.2885,
-            "93": 0.29422,
-            "94": 0.29493,
-            "95": 0.28717,
-            "96": 0.29109,
-            "97": 0.29595,
-            "98": 0.29077,
-            "99": 0.29004,
-            "100": 0.29477
+            "1": 9.61367,
+            "2": 0.31935,
+            "3": 0.29274,
+            "4": 0.28637,
+            "5": 0.2844,
+            "6": 0.29788,
+            "7": 0.2902,
+            "8": 0.28573,
+            "9": 0.29136,
+            "10": 0.29884,
+            "11": 0.29048,
+            "12": 0.2896,
+            "13": 0.29421,
+            "14": 0.29008,
+            "15": 0.2871,
+            "16": 0.28903,
+            "17": 0.2924,
+            "18": 0.28887,
+            "19": 0.28926,
+            "20": 0.30241,
+            "21": 0.29571,
+            "22": 0.28966,
+            "23": 0.29177,
+            "24": 0.29106,
+            "25": 0.28884,
+            "26": 0.28921,
+            "27": 0.29461,
+            "28": 0.28664,
+            "29": 0.28881,
+            "30": 0.29392,
+            "31": 0.29062,
+            "32": 0.28778,
+            "33": 0.29055,
+            "34": 0.29409,
+            "35": 0.29169,
+            "36": 0.29211,
+            "37": 0.29809,
+            "38": 0.29114,
+            "39": 0.29052,
+            "40": 0.2919,
+            "41": 0.2953,
+            "42": 0.28957,
+            "43": 0.29349,
+            "44": 0.30062,
+            "45": 0.28999,
+            "46": 0.29486,
+            "47": 0.29689,
+            "48": 0.29092,
+            "49": 0.29024,
+            "50": 0.28916,
+            "51": 0.30865,
+            "52": 0.29957,
+            "53": 0.28833,
+            "54": 0.29375,
+            "55": 0.29176,
+            "56": 0.29338,
+            "57": 0.28952,
+            "58": 0.29232,
+            "59": 0.29026,
+            "60": 0.28767,
+            "61": 0.29364,
+            "62": 0.2935,
+            "63": 0.29522,
+            "64": 0.29495,
+            "65": 0.29509,
+            "66": 0.29643,
+            "67": 0.29584,
+            "68": 0.29853,
+            "69": 0.29821,
+            "70": 0.29334,
+            "71": 0.29579,
+            "72": 0.29325,
+            "73": 0.29403,
+            "74": 0.29671,
+            "75": 0.63106,
+            "76": 0.29142,
+            "77": 0.29491,
+            "78": 0.29437,
+            "79": 0.29239,
+            "80": 0.29453,
+            "81": 0.29509,
+            "82": 0.29493,
+            "83": 0.2915,
+            "84": 0.30181,
+            "85": 0.29305,
+            "86": 0.28823,
+            "87": 0.29337,
+            "88": 0.29025,
+            "89": 0.28953,
+            "90": 0.29694,
+            "91": 0.29077,
+            "92": 0.29411,
+            "93": 0.28767,
+            "94": 0.29313,
+            "95": 0.29276,
+            "96": 0.29197,
+            "97": 0.29466,
+            "98": 0.29321,
+            "99": 0.29311,
+            "100": 0.29175
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index bd823394dd2..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.84194,
-            "2": 10.85713,
-            "3": 10.84346,
-            "4": 10.84202,
-            "5": 10.85873,
-            "6": 10.86412,
-            "7": 10.851,
-            "8": 10.84731,
-            "9": 10.85736,
-            "10": 10.81845,
-            "11": 10.8595,
-            "12": 10.84335,
-            "13": 10.86446,
-            "14": 10.85336,
-            "15": 10.81222,
-            "16": 10.81549,
-            "17": 10.78956,
-            "18": 10.79784,
-            "19": 10.79279,
-            "20": 10.71072,
-            "21": 10.6971,
-            "22": 10.58894,
-            "23": 10.7072,
-            "24": 10.60764,
-            "25": 10.57461,
-            "26": 10.6238,
-            "27": 10.62036,
-            "28": 10.567,
-            "29": 10.57013,
-            "30": 10.40091,
-            "31": 10.17393,
-            "32": 10.46119,
-            "33": 10.45713,
-            "34": 10.24672,
-            "35": 10.28875,
-            "36": 10.25284,
-            "37": 10.3466,
-            "38": 10.20914,
-            "39": 10.39432,
-            "40": 10.10167,
-            "41": 10.159,
-            "42": 10.21413,
-            "43": 9.8848,
-            "44": 9.98809,
-            "45": 9.86955,
-            "46": 9.84366,
-            "47": 10.1377,
-            "48": 9.87973,
-            "49": 9.56916,
-            "50": 9.91374,
-            "51": 9.86379,
-            "52": 9.75652,
-            "53": 10.06157,
-            "54": 9.96418,
-            "55": 9.89204,
-            "56": 9.63681,
-            "57": 9.49807,
-            "58": 9.83504,
-            "59": 9.59701,
-            "60": 9.51573,
-            "61": 9.70155,
-            "62": 9.97973,
-            "63": 9.38914,
-            "64": 9.77552,
-            "65": 8.95939,
-            "66": 9.6978,
-            "67": 9.37174,
-            "68": 9.78449,
-            "69": 9.79058,
-            "70": 9.74555,
-            "71": 9.61867,
-            "72": 9.58317,
-            "73": 9.49175,
-            "74": 8.939,
-            "75": 9.41848,
-            "76": 9.07237,
-            "77": 10.06903,
-            "78": 9.72443,
-            "79": 9.3767,
-            "80": 9.40261,
-            "81": 9.47859,
-            "82": 9.6984,
-            "83": 9.30086,
-            "84": 9.41299,
-            "85": 9.61514,
-            "86": 9.07881,
-            "87": 9.59402,
-            "88": 9.74658,
-            "89": 9.60096,
-            "90": 9.81999,
-            "91": 9.32977,
-            "92": 9.35625,
-            "93": 9.07406,
-            "94": 8.82774,
-            "95": 9.51099,
-            "96": 9.52501,
-            "97": 9.3163,
-            "98": 9.67278,
-            "99": 8.88493,
-            "100": 9.39984
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1664.0,
-            "2": 1707.0,
-            "3": 1836.0,
-            "4": 1861.0,
-            "5": 2007.0,
-            "6": 1868.0,
-            "7": 1826.0,
-            "8": 1697.0,
-            "9": 1815.0,
-            "10": 1469.0,
-            "11": 1876.0,
-            "12": 1879.0,
-            "13": 1979.0,
-            "14": 1902.0,
-            "15": 1992.0,
-            "16": 1988.0,
-            "17": 1879.0,
-            "18": 1802.0,
-            "19": 1886.0,
-            "20": 1767.0,
-            "21": 1929.0,
-            "22": 1714.0,
-            "23": 2031.0,
-            "24": 1685.0,
-            "25": 1747.0,
-            "26": 1811.0,
-            "27": 1915.0,
-            "28": 1929.0,
-            "29": 2020.0,
-            "30": 1936.0,
-            "31": 1680.0,
-            "32": 1878.0,
-            "33": 2204.0,
-            "34": 1888.0,
-            "35": 1963.0,
-            "36": 1928.0,
-            "37": 2383.0,
-            "38": 2177.0,
-            "39": 2388.0,
-            "40": 2274.0,
-            "41": 2194.0,
-            "42": 2167.0,
-            "43": 1922.0,
-            "44": 1978.0,
-            "45": 2043.0,
-            "46": 2112.0,
-            "47": 2556.0,
-            "48": 2251.0,
-            "49": 2320.0,
-            "50": 2278.0,
-            "51": 2563.0,
-            "52": 2431.0,
-            "53": 2917.0,
-            "54": 2655.0,
-            "55": 2307.0,
-            "56": 2605.0,
-            "57": 2385.0,
-            "58": 2952.0,
-            "59": 2730.0,
-            "60": 2287.0,
-            "61": 2904.0,
-            "62": 2601.0,
-            "63": 2452.0,
-            "64": 2810.0,
-            "65": 2544.0,
-            "66": 2914.0,
-            "67": 2664.0,
-            "68": 2709.0,
-            "69": 2967.0,
-            "70": 3049.0,
-            "71": 2936.0,
-            "72": 2410.0,
-            "73": 2991.0,
-            "74": 1882.0,
-            "75": 2539.0,
-            "76": 3060.0,
-            "77": 3219.0,
-            "78": 3023.0,
-            "79": 3084.0,
-            "80": 3101.0,
-            "81": 3530.0,
-            "82": 3298.0,
-            "83": 2666.0,
-            "84": 3154.0,
-            "85": 3288.0,
-            "86": 2827.0,
-            "87": 3720.0,
-            "88": 3168.0,
-            "89": 3275.0,
-            "90": 3168.0,
-            "91": 2919.0,
-            "92": 3071.0,
-            "93": 2751.0,
-            "94": 3412.0,
-            "95": 3186.0,
-            "96": 3429.0,
-            "97": 3083.0,
-            "98": 3477.0,
-            "99": 3093.0,
-            "100": 3212.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 397747712.0,
-            "2": 397747712.0,
-            "3": 397747712.0,
-            "4": 397747712.0,
-            "5": 397747712.0,
-            "6": 397747712.0,
-            "7": 397747712.0,
-            "8": 397747712.0,
-            "9": 397747712.0,
-            "10": 397747712.0,
-            "11": 397747712.0,
-            "12": 397747712.0,
-            "13": 397747712.0,
-            "14": 397747712.0,
-            "15": 397747712.0,
-            "16": 397747712.0,
-            "17": 397747712.0,
-            "18": 397747712.0,
-            "19": 397747712.0,
-            "20": 397747712.0,
-            "21": 397747712.0,
-            "22": 397747712.0,
-            "23": 397747712.0,
-            "24": 397747712.0,
-            "25": 397747712.0,
-            "26": 397747712.0,
-            "27": 397747712.0,
-            "28": 397747712.0,
-            "29": 397747712.0,
-            "30": 397747712.0,
-            "31": 397747712.0,
-            "32": 397747712.0,
-            "33": 397747712.0,
-            "34": 397747712.0,
-            "35": 397747712.0,
-            "36": 397747712.0,
-            "37": 397747712.0,
-            "38": 397747712.0,
-            "39": 397747712.0,
-            "40": 397747712.0,
-            "41": 397747712.0,
-            "42": 397747712.0,
-            "43": 397747712.0,
-            "44": 397747712.0,
-            "45": 397747712.0,
-            "46": 397747712.0,
-            "47": 397747712.0,
-            "48": 397747712.0,
-            "49": 397747712.0,
-            "50": 397747712.0,
-            "51": 397747712.0,
-            "52": 397747712.0,
-            "53": 397747712.0,
-            "54": 397747712.0,
-            "55": 397747712.0,
-            "56": 397747712.0,
-            "57": 397747712.0,
-            "58": 397747712.0,
-            "59": 397747712.0,
-            "60": 397747712.0,
-            "61": 397747712.0,
-            "62": 397747712.0,
-            "63": 397747712.0,
-            "64": 397747712.0,
-            "65": 397747712.0,
-            "66": 397747712.0,
-            "67": 397747712.0,
-            "68": 397747712.0,
-            "69": 397747712.0,
-            "70": 397747712.0,
-            "71": 397747712.0,
-            "72": 397747712.0,
-            "73": 397747712.0,
-            "74": 397747712.0,
-            "75": 397747712.0,
-            "76": 397747712.0,
-            "77": 397747712.0,
-            "78": 397747712.0,
-            "79": 397747712.0,
-            "80": 397747712.0,
-            "81": 397747712.0,
-            "82": 397747712.0,
-            "83": 397747712.0,
-            "84": 397747712.0,
-            "85": 397747712.0,
-            "86": 397747712.0,
-            "87": 397747712.0,
-            "88": 397747712.0,
-            "89": 397747712.0,
-            "90": 397747712.0,
-            "91": 397747712.0,
-            "92": 397747712.0,
-            "93": 397747712.0,
-            "94": 397747712.0,
-            "95": 397747712.0,
-            "96": 397747712.0,
-            "97": 397747712.0,
-            "98": 397747712.0,
-            "99": 397747712.0,
-            "100": 397747712.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1044755968.0,
-            "2": 1177840128.0,
-            "3": 1177840128.0,
-            "4": 1177840128.0,
-            "5": 1177840128.0,
-            "6": 1177840128.0,
-            "7": 1177840128.0,
-            "8": 1177840128.0,
-            "9": 1177840128.0,
-            "10": 1177840128.0,
-            "11": 1177840128.0,
-            "12": 1177840128.0,
-            "13": 1177840128.0,
-            "14": 1177840128.0,
-            "15": 1177840128.0,
-            "16": 1177840128.0,
-            "17": 1177840128.0,
-            "18": 1177840128.0,
-            "19": 1177840128.0,
-            "20": 1177840128.0,
-            "21": 1177840128.0,
-            "22": 1177840128.0,
-            "23": 1177840128.0,
-            "24": 1177840128.0,
-            "25": 1177840128.0,
-            "26": 1177840128.0,
-            "27": 1177840128.0,
-            "28": 1177840128.0,
-            "29": 1177840128.0,
-            "30": 1177840128.0,
-            "31": 1177840128.0,
-            "32": 1177840128.0,
-            "33": 1177840128.0,
-            "34": 1177840128.0,
-            "35": 1177840128.0,
-            "36": 1177840128.0,
-            "37": 1177840128.0,
-            "38": 1177840128.0,
-            "39": 1177840128.0,
-            "40": 1177840128.0,
-            "41": 1177840128.0,
-            "42": 1177840128.0,
-            "43": 1177840128.0,
-            "44": 1177840128.0,
-            "45": 1177840128.0,
-            "46": 1177840128.0,
-            "47": 1177840128.0,
-            "48": 1177840128.0,
-            "49": 1177840128.0,
-            "50": 1177840128.0,
-            "51": 1177840128.0,
-            "52": 1177840128.0,
-            "53": 1177840128.0,
-            "54": 1177840128.0,
-            "55": 1177840128.0,
-            "56": 1177840128.0,
-            "57": 1177840128.0,
-            "58": 1177840128.0,
-            "59": 1177840128.0,
-            "60": 1177840128.0,
-            "61": 1177840128.0,
-            "62": 1177840128.0,
-            "63": 1177840128.0,
-            "64": 1177840128.0,
-            "65": 1177840128.0,
-            "66": 1177840128.0,
-            "67": 1177840128.0,
-            "68": 1177840128.0,
-            "69": 1177840128.0,
-            "70": 1177840128.0,
-            "71": 1177840128.0,
-            "72": 1177840128.0,
-            "73": 1177840128.0,
-            "74": 1177840128.0,
-            "75": 1177840128.0,
-            "76": 1177840128.0,
-            "77": 1177840128.0,
-            "78": 1177840128.0,
-            "79": 1177840128.0,
-            "80": 1177840128.0,
-            "81": 1177840128.0,
-            "82": 1177840128.0,
-            "83": 1177840128.0,
-            "84": 1177840128.0,
-            "85": 1177840128.0,
-            "86": 1177840128.0,
-            "87": 1177840128.0,
-            "88": 1177840128.0,
-            "89": 1177840128.0,
-            "90": 1177840128.0,
-            "91": 1177840128.0,
-            "92": 1177840128.0,
-            "93": 1177840128.0,
-            "94": 1177840128.0,
-            "95": 1177840128.0,
-            "96": 1177840128.0,
-            "97": 1177840128.0,
-            "98": 1177840128.0,
-            "99": 1177840128.0,
-            "100": 1177840128.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 9.95666,
-            "2": 0.32924,
-            "3": 0.25226,
-            "4": 0.25106,
-            "5": 0.25493,
-            "6": 0.25253,
-            "7": 0.25357,
-            "8": 0.25271,
-            "9": 0.25432,
-            "10": 0.25385,
-            "11": 0.25308,
-            "12": 0.25347,
-            "13": 0.25055,
-            "14": 0.25356,
-            "15": 0.26243,
-            "16": 0.26195,
-            "17": 0.25653,
-            "18": 0.25321,
-            "19": 0.25683,
-            "20": 0.253,
-            "21": 0.26002,
-            "22": 0.25583,
-            "23": 0.2569,
-            "24": 0.25453,
-            "25": 0.25674,
-            "26": 0.28427,
-            "27": 0.26846,
-            "28": 0.25669,
-            "29": 0.25979,
-            "30": 0.25506,
-            "31": 0.25795,
-            "32": 0.25594,
-            "33": 0.25547,
-            "34": 0.25599,
-            "35": 0.2592,
-            "36": 0.25766,
-            "37": 0.25711,
-            "38": 0.25265,
-            "39": 0.25683,
-            "40": 0.25734,
-            "41": 0.25589,
-            "42": 0.25063,
-            "43": 0.25742,
-            "44": 0.25967,
-            "45": 0.25573,
-            "46": 0.25687,
-            "47": 0.26161,
-            "48": 0.25952,
-            "49": 0.25626,
-            "50": 0.25429,
-            "51": 0.26173,
-            "52": 0.27578,
-            "53": 0.2696,
-            "54": 0.26719,
-            "55": 0.26842,
-            "56": 0.27282,
-            "57": 0.27059,
-            "58": 0.26573,
-            "59": 0.27553,
-            "60": 0.26764,
-            "61": 0.25837,
-            "62": 0.25923,
-            "63": 0.27037,
-            "64": 0.26917,
-            "65": 0.26615,
-            "66": 0.57271,
-            "67": 0.26906,
-            "68": 0.26543,
-            "69": 0.26985,
-            "70": 0.27165,
-            "71": 0.26533,
-            "72": 0.27015,
-            "73": 0.26666,
-            "74": 0.26902,
-            "75": 0.26747,
-            "76": 0.26725,
-            "77": 0.269,
-            "78": 0.27067,
-            "79": 0.26982,
-            "80": 0.26617,
-            "81": 0.269,
-            "82": 0.26853,
-            "83": 0.26607,
-            "84": 0.26722,
-            "85": 0.27017,
-            "86": 0.2778,
-            "87": 0.27697,
-            "88": 0.27012,
-            "89": 0.27065,
-            "90": 0.26599,
-            "91": 0.26551,
-            "92": 0.27357,
-            "93": 0.27599,
-            "94": 0.26598,
-            "95": 0.27382,
-            "96": 0.27956,
-            "97": 0.26613,
-            "98": 0.26511,
-            "99": 0.26941,
-            "100": 0.27208
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index d5d1de46cac..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.84194,
-            "2": 10.85713,
-            "3": 10.84346,
-            "4": 10.84202,
-            "5": 10.85873,
-            "6": 10.86412,
-            "7": 10.851,
-            "8": 10.84731,
-            "9": 10.85736,
-            "10": 10.81845,
-            "11": 10.8595,
-            "12": 10.84335,
-            "13": 10.86446,
-            "14": 10.85336,
-            "15": 10.81222,
-            "16": 10.81549,
-            "17": 10.78956,
-            "18": 10.79784,
-            "19": 10.79279,
-            "20": 10.71072,
-            "21": 10.6971,
-            "22": 10.58894,
-            "23": 10.7072,
-            "24": 10.60764,
-            "25": 10.57461,
-            "26": 10.6238,
-            "27": 10.62036,
-            "28": 10.567,
-            "29": 10.57013,
-            "30": 10.40091,
-            "31": 10.17393,
-            "32": 10.46119,
-            "33": 10.45713,
-            "34": 10.24672,
-            "35": 10.28875,
-            "36": 10.25284,
-            "37": 10.3466,
-            "38": 10.20914,
-            "39": 10.39432,
-            "40": 10.10167,
-            "41": 10.159,
-            "42": 10.21413,
-            "43": 9.8848,
-            "44": 9.98809,
-            "45": 9.86955,
-            "46": 9.84366,
-            "47": 10.1377,
-            "48": 9.87973,
-            "49": 9.56916,
-            "50": 9.91374,
-            "51": 9.86379,
-            "52": 9.75652,
-            "53": 10.06157,
-            "54": 9.96418,
-            "55": 9.89204,
-            "56": 9.63681,
-            "57": 9.49807,
-            "58": 9.83504,
-            "59": 9.59701,
-            "60": 9.51573,
-            "61": 9.70155,
-            "62": 9.97973,
-            "63": 9.38914,
-            "64": 9.77552,
-            "65": 8.95939,
-            "66": 9.6978,
-            "67": 9.37174,
-            "68": 9.78449,
-            "69": 9.79058,
-            "70": 9.74555,
-            "71": 9.61867,
-            "72": 9.58317,
-            "73": 9.49175,
-            "74": 8.939,
-            "75": 9.41848,
-            "76": 9.07237,
-            "77": 10.06903,
-            "78": 9.72443,
-            "79": 9.3767,
-            "80": 9.40261,
-            "81": 9.47859,
-            "82": 9.6984,
-            "83": 9.30086,
-            "84": 9.41299,
-            "85": 9.61514,
-            "86": 9.07881,
-            "87": 9.59402,
-            "88": 9.74658,
-            "89": 9.60096,
-            "90": 9.81999,
-            "91": 9.32977,
-            "92": 9.35625,
-            "93": 9.07406,
-            "94": 8.82774,
-            "95": 9.51099,
-            "96": 9.52501,
-            "97": 9.3163,
-            "98": 9.67278,
-            "99": 8.88493,
-            "100": 9.39984
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1664.0,
-            "2": 1707.0,
-            "3": 1836.0,
-            "4": 1861.0,
-            "5": 2007.0,
-            "6": 1868.0,
-            "7": 1826.0,
-            "8": 1697.0,
-            "9": 1815.0,
-            "10": 1469.0,
-            "11": 1876.0,
-            "12": 1879.0,
-            "13": 1979.0,
-            "14": 1902.0,
-            "15": 1992.0,
-            "16": 1988.0,
-            "17": 1879.0,
-            "18": 1802.0,
-            "19": 1886.0,
-            "20": 1767.0,
-            "21": 1929.0,
-            "22": 1714.0,
-            "23": 2031.0,
-            "24": 1685.0,
-            "25": 1747.0,
-            "26": 1811.0,
-            "27": 1915.0,
-            "28": 1929.0,
-            "29": 2020.0,
-            "30": 1936.0,
-            "31": 1680.0,
-            "32": 1878.0,
-            "33": 2204.0,
-            "34": 1888.0,
-            "35": 1963.0,
-            "36": 1928.0,
-            "37": 2383.0,
-            "38": 2177.0,
-            "39": 2388.0,
-            "40": 2274.0,
-            "41": 2194.0,
-            "42": 2167.0,
-            "43": 1922.0,
-            "44": 1978.0,
-            "45": 2043.0,
-            "46": 2112.0,
-            "47": 2556.0,
-            "48": 2251.0,
-            "49": 2320.0,
-            "50": 2278.0,
-            "51": 2563.0,
-            "52": 2431.0,
-            "53": 2917.0,
-            "54": 2655.0,
-            "55": 2307.0,
-            "56": 2605.0,
-            "57": 2385.0,
-            "58": 2952.0,
-            "59": 2730.0,
-            "60": 2287.0,
-            "61": 2904.0,
-            "62": 2601.0,
-            "63": 2452.0,
-            "64": 2810.0,
-            "65": 2544.0,
-            "66": 2914.0,
-            "67": 2664.0,
-            "68": 2709.0,
-            "69": 2967.0,
-            "70": 3049.0,
-            "71": 2936.0,
-            "72": 2410.0,
-            "73": 2991.0,
-            "74": 1882.0,
-            "75": 2539.0,
-            "76": 3060.0,
-            "77": 3219.0,
-            "78": 3023.0,
-            "79": 3084.0,
-            "80": 3101.0,
-            "81": 3530.0,
-            "82": 3298.0,
-            "83": 2666.0,
-            "84": 3154.0,
-            "85": 3288.0,
-            "86": 2827.0,
-            "87": 3720.0,
-            "88": 3168.0,
-            "89": 3275.0,
-            "90": 3168.0,
-            "91": 2919.0,
-            "92": 3071.0,
-            "93": 2751.0,
-            "94": 3412.0,
-            "95": 3186.0,
-            "96": 3429.0,
-            "97": 3083.0,
-            "98": 3477.0,
-            "99": 3093.0,
-            "100": 3212.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 397747712.0,
-            "2": 397747712.0,
-            "3": 397747712.0,
-            "4": 397747712.0,
-            "5": 397747712.0,
-            "6": 397747712.0,
-            "7": 397747712.0,
-            "8": 397747712.0,
-            "9": 397747712.0,
-            "10": 397747712.0,
-            "11": 397747712.0,
-            "12": 397747712.0,
-            "13": 397747712.0,
-            "14": 397747712.0,
-            "15": 397747712.0,
-            "16": 397747712.0,
-            "17": 397747712.0,
-            "18": 397747712.0,
-            "19": 397747712.0,
-            "20": 397747712.0,
-            "21": 397747712.0,
-            "22": 397747712.0,
-            "23": 397747712.0,
-            "24": 397747712.0,
-            "25": 397747712.0,
-            "26": 397747712.0,
-            "27": 397747712.0,
-            "28": 397747712.0,
-            "29": 397747712.0,
-            "30": 397747712.0,
-            "31": 397747712.0,
-            "32": 397747712.0,
-            "33": 397747712.0,
-            "34": 397747712.0,
-            "35": 397747712.0,
-            "36": 397747712.0,
-            "37": 397747712.0,
-            "38": 397747712.0,
-            "39": 397747712.0,
-            "40": 397747712.0,
-            "41": 397747712.0,
-            "42": 397747712.0,
-            "43": 397747712.0,
-            "44": 397747712.0,
-            "45": 397747712.0,
-            "46": 397747712.0,
-            "47": 397747712.0,
-            "48": 397747712.0,
-            "49": 397747712.0,
-            "50": 397747712.0,
-            "51": 397747712.0,
-            "52": 397747712.0,
-            "53": 397747712.0,
-            "54": 397747712.0,
-            "55": 397747712.0,
-            "56": 397747712.0,
-            "57": 397747712.0,
-            "58": 397747712.0,
-            "59": 397747712.0,
-            "60": 397747712.0,
-            "61": 397747712.0,
-            "62": 397747712.0,
-            "63": 397747712.0,
-            "64": 397747712.0,
-            "65": 397747712.0,
-            "66": 397747712.0,
-            "67": 397747712.0,
-            "68": 397747712.0,
-            "69": 397747712.0,
-            "70": 397747712.0,
-            "71": 397747712.0,
-            "72": 397747712.0,
-            "73": 397747712.0,
-            "74": 397747712.0,
-            "75": 397747712.0,
-            "76": 397747712.0,
-            "77": 397747712.0,
-            "78": 397747712.0,
-            "79": 397747712.0,
-            "80": 397747712.0,
-            "81": 397747712.0,
-            "82": 397747712.0,
-            "83": 397747712.0,
-            "84": 397747712.0,
-            "85": 397747712.0,
-            "86": 397747712.0,
-            "87": 397747712.0,
-            "88": 397747712.0,
-            "89": 397747712.0,
-            "90": 397747712.0,
-            "91": 397747712.0,
-            "92": 397747712.0,
-            "93": 397747712.0,
-            "94": 397747712.0,
-            "95": 397747712.0,
-            "96": 397747712.0,
-            "97": 397747712.0,
-            "98": 397747712.0,
-            "99": 397747712.0,
-            "100": 397747712.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1044755968.0,
-            "2": 1177840128.0,
-            "3": 1177840128.0,
-            "4": 1177840128.0,
-            "5": 1177840128.0,
-            "6": 1177840128.0,
-            "7": 1177840128.0,
-            "8": 1177840128.0,
-            "9": 1177840128.0,
-            "10": 1177840128.0,
-            "11": 1177840128.0,
-            "12": 1177840128.0,
-            "13": 1177840128.0,
-            "14": 1177840128.0,
-            "15": 1177840128.0,
-            "16": 1177840128.0,
-            "17": 1177840128.0,
-            "18": 1177840128.0,
-            "19": 1177840128.0,
-            "20": 1177840128.0,
-            "21": 1177840128.0,
-            "22": 1177840128.0,
-            "23": 1177840128.0,
-            "24": 1177840128.0,
-            "25": 1177840128.0,
-            "26": 1177840128.0,
-            "27": 1177840128.0,
-            "28": 1177840128.0,
-            "29": 1177840128.0,
-            "30": 1177840128.0,
-            "31": 1177840128.0,
-            "32": 1177840128.0,
-            "33": 1177840128.0,
-            "34": 1177840128.0,
-            "35": 1177840128.0,
-            "36": 1177840128.0,
-            "37": 1177840128.0,
-            "38": 1177840128.0,
-            "39": 1177840128.0,
-            "40": 1177840128.0,
-            "41": 1177840128.0,
-            "42": 1177840128.0,
-            "43": 1177840128.0,
-            "44": 1177840128.0,
-            "45": 1177840128.0,
-            "46": 1177840128.0,
-            "47": 1177840128.0,
-            "48": 1177840128.0,
-            "49": 1177840128.0,
-            "50": 1177840128.0,
-            "51": 1177840128.0,
-            "52": 1177840128.0,
-            "53": 1177840128.0,
-            "54": 1177840128.0,
-            "55": 1177840128.0,
-            "56": 1177840128.0,
-            "57": 1177840128.0,
-            "58": 1177840128.0,
-            "59": 1177840128.0,
-            "60": 1177840128.0,
-            "61": 1177840128.0,
-            "62": 1177840128.0,
-            "63": 1177840128.0,
-            "64": 1177840128.0,
-            "65": 1177840128.0,
-            "66": 1177840128.0,
-            "67": 1177840128.0,
-            "68": 1177840128.0,
-            "69": 1177840128.0,
-            "70": 1177840128.0,
-            "71": 1177840128.0,
-            "72": 1177840128.0,
-            "73": 1177840128.0,
-            "74": 1177840128.0,
-            "75": 1177840128.0,
-            "76": 1177840128.0,
-            "77": 1177840128.0,
-            "78": 1177840128.0,
-            "79": 1177840128.0,
-            "80": 1177840128.0,
-            "81": 1177840128.0,
-            "82": 1177840128.0,
-            "83": 1177840128.0,
-            "84": 1177840128.0,
-            "85": 1177840128.0,
-            "86": 1177840128.0,
-            "87": 1177840128.0,
-            "88": 1177840128.0,
-            "89": 1177840128.0,
-            "90": 1177840128.0,
-            "91": 1177840128.0,
-            "92": 1177840128.0,
-            "93": 1177840128.0,
-            "94": 1177840128.0,
-            "95": 1177840128.0,
-            "96": 1177840128.0,
-            "97": 1177840128.0,
-            "98": 1177840128.0,
-            "99": 1177840128.0,
-            "100": 1177840128.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 9.61367,
-            "2": 0.31935,
-            "3": 0.29274,
-            "4": 0.28637,
-            "5": 0.2844,
-            "6": 0.29788,
-            "7": 0.2902,
-            "8": 0.28573,
-            "9": 0.29136,
-            "10": 0.29884,
-            "11": 0.29048,
-            "12": 0.2896,
-            "13": 0.29421,
-            "14": 0.29008,
-            "15": 0.2871,
-            "16": 0.28903,
-            "17": 0.2924,
-            "18": 0.28887,
-            "19": 0.28926,
-            "20": 0.30241,
-            "21": 0.29571,
-            "22": 0.28966,
-            "23": 0.29177,
-            "24": 0.29106,
-            "25": 0.28884,
-            "26": 0.28921,
-            "27": 0.29461,
-            "28": 0.28664,
-            "29": 0.28881,
-            "30": 0.29392,
-            "31": 0.29062,
-            "32": 0.28778,
-            "33": 0.29055,
-            "34": 0.29409,
-            "35": 0.29169,
-            "36": 0.29211,
-            "37": 0.29809,
-            "38": 0.29114,
-            "39": 0.29052,
-            "40": 0.2919,
-            "41": 0.2953,
-            "42": 0.28957,
-            "43": 0.29349,
-            "44": 0.30062,
-            "45": 0.28999,
-            "46": 0.29486,
-            "47": 0.29689,
-            "48": 0.29092,
-            "49": 0.29024,
-            "50": 0.28916,
-            "51": 0.30865,
-            "52": 0.29957,
-            "53": 0.28833,
-            "54": 0.29375,
-            "55": 0.29176,
-            "56": 0.29338,
-            "57": 0.28952,
-            "58": 0.29232,
-            "59": 0.29026,
-            "60": 0.28767,
-            "61": 0.29364,
-            "62": 0.2935,
-            "63": 0.29522,
-            "64": 0.29495,
-            "65": 0.29509,
-            "66": 0.29643,
-            "67": 0.29584,
-            "68": 0.29853,
-            "69": 0.29821,
-            "70": 0.29334,
-            "71": 0.29579,
-            "72": 0.29325,
-            "73": 0.29403,
-            "74": 0.29671,
-            "75": 0.63106,
-            "76": 0.29142,
-            "77": 0.29491,
-            "78": 0.29437,
-            "79": 0.29239,
-            "80": 0.29453,
-            "81": 0.29509,
-            "82": 0.29493,
-            "83": 0.2915,
-            "84": 0.30181,
-            "85": 0.29305,
-            "86": 0.28823,
-            "87": 0.29337,
-            "88": 0.29025,
-            "89": 0.28953,
-            "90": 0.29694,
-            "91": 0.29077,
-            "92": 0.29411,
-            "93": 0.28767,
-            "94": 0.29313,
-            "95": 0.29276,
-            "96": 0.29197,
-            "97": 0.29466,
-            "98": 0.29321,
-            "99": 0.29311,
-            "100": 0.29175
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_dev_dgx_h100.json
index 2dfc5d0f6ae..5c404dad658 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_dev_dgx_h100.json
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 13.88965,
-            "2": 0.27451,
-            "3": 0.24975,
-            "4": 0.25072,
-            "5": 0.2432,
-            "6": 0.24332,
-            "7": 0.23789,
-            "8": 0.23936,
-            "9": 0.23192,
-            "10": 0.22503,
-            "11": 0.22584,
-            "12": 0.22831,
-            "13": 0.22937,
-            "14": 0.22514,
-            "15": 0.22707,
-            "16": 0.22601,
-            "17": 0.22754,
-            "18": 0.22863,
-            "19": 0.22776,
-            "20": 0.2264,
-            "21": 0.22812,
-            "22": 0.23837,
-            "23": 0.25872,
-            "24": 0.23186,
-            "25": 0.22533,
-            "26": 0.22641,
-            "27": 0.22648,
-            "28": 0.22569,
-            "29": 0.22721,
-            "30": 0.22446,
-            "31": 0.2299,
-            "32": 0.22776,
-            "33": 0.22874,
-            "34": 0.22685,
-            "35": 0.22809,
-            "36": 0.23141,
-            "37": 0.22676,
-            "38": 0.22629,
-            "39": 0.22929,
-            "40": 0.23118,
-            "41": 0.22744,
-            "42": 0.22706,
-            "43": 0.23097,
-            "44": 0.22844,
-            "45": 0.22948,
-            "46": 0.22632,
-            "47": 0.22989,
-            "48": 0.22849,
-            "49": 0.23116,
-            "50": 0.23165,
-            "51": 0.25535,
-            "52": 0.27151,
-            "53": 0.23628,
-            "54": 0.23553,
-            "55": 0.23112,
-            "56": 0.23386,
-            "57": 0.2314,
-            "58": 0.23297,
-            "59": 0.22916,
-            "60": 0.22848,
-            "61": 0.23048,
-            "62": 0.22881,
-            "63": 0.23036,
-            "64": 0.2284,
-            "65": 0.23027,
-            "66": 0.22734,
-            "67": 0.23011,
-            "68": 0.22993,
-            "69": 0.22771,
-            "70": 0.23247,
-            "71": 0.22785,
-            "72": 0.22934,
-            "73": 0.22755,
-            "74": 0.22901,
-            "75": 0.22825,
-            "76": 0.22722,
-            "77": 0.22986,
-            "78": 0.22763,
-            "79": 0.22994,
-            "80": 0.22933,
-            "81": 0.2282,
-            "82": 0.22957,
-            "83": 0.22817,
-            "84": 0.22948,
-            "85": 0.2273,
-            "86": 0.22834,
-            "87": 0.23316,
-            "88": 0.22928,
-            "89": 0.22663,
-            "90": 0.23145,
-            "91": 0.22771,
-            "92": 0.22915,
-            "93": 0.22882,
-            "94": 0.22769,
-            "95": 0.22918,
-            "96": 0.23296,
-            "97": 0.22901,
-            "98": 0.23028,
-            "99": 0.23035,
-            "100": 0.23349
+            "1": 13.61637,
+            "2": 0.24414,
+            "3": 0.22872,
+            "4": 0.22599,
+            "5": 0.22586,
+            "6": 0.22773,
+            "7": 0.22791,
+            "8": 0.22857,
+            "9": 0.2283,
+            "10": 0.22732,
+            "11": 0.22633,
+            "12": 0.22761,
+            "13": 0.22748,
+            "14": 0.23094,
+            "15": 0.22968,
+            "16": 0.22849,
+            "17": 0.22934,
+            "18": 0.22814,
+            "19": 0.22822,
+            "20": 0.22758,
+            "21": 0.22806,
+            "22": 0.25737,
+            "23": 0.24238,
+            "24": 0.23166,
+            "25": 0.22695,
+            "26": 0.22857,
+            "27": 0.23442,
+            "28": 0.22861,
+            "29": 0.2302,
+            "30": 0.2316,
+            "31": 0.23014,
+            "32": 0.22948,
+            "33": 0.23272,
+            "34": 0.23222,
+            "35": 0.23035,
+            "36": 0.23384,
+            "37": 0.23085,
+            "38": 0.23058,
+            "39": 0.23686,
+            "40": 0.23939,
+            "41": 0.23562,
+            "42": 0.23544,
+            "43": 0.23293,
+            "44": 0.22874,
+            "45": 0.234,
+            "46": 0.22942,
+            "47": 0.23036,
+            "48": 0.23404,
+            "49": 0.2686,
+            "50": 0.24831,
+            "51": 0.28415,
+            "52": 0.23699,
+            "53": 0.26129,
+            "54": 0.2273,
+            "55": 0.22639,
+            "56": 0.22691,
+            "57": 0.22504,
+            "58": 0.22822,
+            "59": 0.22913,
+            "60": 0.22577,
+            "61": 0.23097,
+            "62": 0.22702,
+            "63": 0.22579,
+            "64": 0.22717,
+            "65": 0.22986,
+            "66": 0.22481,
+            "67": 0.22676,
+            "68": 0.22643,
+            "69": 0.22933,
+            "70": 0.23566,
+            "71": 0.22795,
+            "72": 0.22654,
+            "73": 0.2256,
+            "74": 0.22941,
+            "75": 0.23701,
+            "76": 0.23527,
+            "77": 0.23476,
+            "78": 0.23472,
+            "79": 0.22599,
+            "80": 0.22758,
+            "81": 0.22717,
+            "82": 0.22657,
+            "83": 0.22688,
+            "84": 0.22827,
+            "85": 0.22612,
+            "86": 0.22871,
+            "87": 0.23133,
+            "88": 0.22934,
+            "89": 0.22859,
+            "90": 0.22635,
+            "91": 0.22606,
+            "92": 0.2297,
+            "93": 0.22713,
+            "94": 0.2261,
+            "95": 0.227,
+            "96": 0.23135,
+            "97": 0.22866,
+            "98": 0.22601,
+            "99": 0.2277,
+            "100": 0.2323
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index ff73ed22db1..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.90433,
-            "2": 10.90931,
-            "3": 10.90937,
-            "4": 10.90764,
-            "5": 10.90709,
-            "6": 10.91174,
-            "7": 10.91413,
-            "8": 10.89808,
-            "9": 10.91252,
-            "10": 10.87838,
-            "11": 10.90538,
-            "12": 10.89588,
-            "13": 10.91234,
-            "14": 10.90596,
-            "15": 10.86278,
-            "16": 10.85987,
-            "17": 10.84211,
-            "18": 10.83508,
-            "19": 10.84021,
-            "20": 10.74667,
-            "21": 10.72431,
-            "22": 10.6337,
-            "23": 10.74257,
-            "24": 10.63399,
-            "25": 10.60185,
-            "26": 10.64659,
-            "27": 10.64193,
-            "28": 10.58695,
-            "29": 10.59421,
-            "30": 10.394,
-            "31": 10.17174,
-            "32": 10.48573,
-            "33": 10.48042,
-            "34": 10.25002,
-            "35": 10.29811,
-            "36": 10.25221,
-            "37": 10.36635,
-            "38": 10.22258,
-            "39": 10.42495,
-            "40": 10.111,
-            "41": 10.17165,
-            "42": 10.22384,
-            "43": 9.86674,
-            "44": 9.99019,
-            "45": 9.8622,
-            "46": 9.84813,
-            "47": 10.16079,
-            "48": 9.87303,
-            "49": 9.55987,
-            "50": 9.92159,
-            "51": 9.8695,
-            "52": 9.76154,
-            "53": 10.08349,
-            "54": 9.97449,
-            "55": 9.89437,
-            "56": 9.6424,
-            "57": 9.50352,
-            "58": 9.84153,
-            "59": 9.60017,
-            "60": 9.51715,
-            "61": 9.70458,
-            "62": 9.98292,
-            "63": 9.39067,
-            "64": 9.7797,
-            "65": 8.96053,
-            "66": 9.70288,
-            "67": 9.3734,
-            "68": 9.78805,
-            "69": 9.79828,
-            "70": 9.74999,
-            "71": 9.62682,
-            "72": 9.59043,
-            "73": 9.49893,
-            "74": 8.94842,
-            "75": 9.42922,
-            "76": 9.08268,
-            "77": 10.07413,
-            "78": 9.73322,
-            "79": 9.38352,
-            "80": 9.40713,
-            "81": 9.48366,
-            "82": 9.70577,
-            "83": 9.3103,
-            "84": 9.41846,
-            "85": 9.62053,
-            "86": 9.08533,
-            "87": 9.59962,
-            "88": 9.75141,
-            "89": 9.60594,
-            "90": 9.8245,
-            "91": 9.33973,
-            "92": 9.36344,
-            "93": 9.08397,
-            "94": 8.83571,
-            "95": 9.51936,
-            "96": 9.53001,
-            "97": 9.31995,
-            "98": 9.67709,
-            "99": 8.88909,
-            "100": 9.40491
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1595.0,
-            "2": 1632.0,
-            "3": 1539.0,
-            "4": 1702.0,
-            "5": 1827.0,
-            "6": 1718.0,
-            "7": 1810.0,
-            "8": 1634.0,
-            "9": 2007.0,
-            "10": 1457.0,
-            "11": 1906.0,
-            "12": 1737.0,
-            "13": 1917.0,
-            "14": 1828.0,
-            "15": 1866.0,
-            "16": 1826.0,
-            "17": 1762.0,
-            "18": 1761.0,
-            "19": 1803.0,
-            "20": 1803.0,
-            "21": 1996.0,
-            "22": 1691.0,
-            "23": 2060.0,
-            "24": 1622.0,
-            "25": 1595.0,
-            "26": 1608.0,
-            "27": 1890.0,
-            "28": 1913.0,
-            "29": 1987.0,
-            "30": 1808.0,
-            "31": 1549.0,
-            "32": 1838.0,
-            "33": 2073.0,
-            "34": 1859.0,
-            "35": 1870.0,
-            "36": 1870.0,
-            "37": 2300.0,
-            "38": 2186.0,
-            "39": 2368.0,
-            "40": 2097.0,
-            "41": 2325.0,
-            "42": 2227.0,
-            "43": 2036.0,
-            "44": 2098.0,
-            "45": 2055.0,
-            "46": 2146.0,
-            "47": 2453.0,
-            "48": 2273.0,
-            "49": 2244.0,
-            "50": 2252.0,
-            "51": 2484.0,
-            "52": 2568.0,
-            "53": 2834.0,
-            "54": 2607.0,
-            "55": 2149.0,
-            "56": 2683.0,
-            "57": 2283.0,
-            "58": 2764.0,
-            "59": 2623.0,
-            "60": 2456.0,
-            "61": 2938.0,
-            "62": 2456.0,
-            "63": 2279.0,
-            "64": 3078.0,
-            "65": 2504.0,
-            "66": 2881.0,
-            "67": 2683.0,
-            "68": 2657.0,
-            "69": 2832.0,
-            "70": 3144.0,
-            "71": 2930.0,
-            "72": 2328.0,
-            "73": 2984.0,
-            "74": 1752.0,
-            "75": 2451.0,
-            "76": 3040.0,
-            "77": 3213.0,
-            "78": 2936.0,
-            "79": 2941.0,
-            "80": 3112.0,
-            "81": 3568.0,
-            "82": 3105.0,
-            "83": 2725.0,
-            "84": 3051.0,
-            "85": 3170.0,
-            "86": 2645.0,
-            "87": 3586.0,
-            "88": 2902.0,
-            "89": 3371.0,
-            "90": 2971.0,
-            "91": 2800.0,
-            "92": 3017.0,
-            "93": 2524.0,
-            "94": 3384.0,
-            "95": 3147.0,
-            "96": 3388.0,
-            "97": 3031.0,
-            "98": 3619.0,
-            "99": 3004.0,
-            "100": 3100.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 312352256.0,
-            "2": 312352256.0,
-            "3": 312352256.0,
-            "4": 312352256.0,
-            "5": 312352256.0,
-            "6": 312352256.0,
-            "7": 312352256.0,
-            "8": 312352256.0,
-            "9": 312352256.0,
-            "10": 312352256.0,
-            "11": 312352256.0,
-            "12": 312352256.0,
-            "13": 312352256.0,
-            "14": 312352256.0,
-            "15": 312352256.0,
-            "16": 312352256.0,
-            "17": 312352256.0,
-            "18": 312352256.0,
-            "19": 312352256.0,
-            "20": 312352256.0,
-            "21": 312352256.0,
-            "22": 312352256.0,
-            "23": 312352256.0,
-            "24": 312352256.0,
-            "25": 312352256.0,
-            "26": 312352256.0,
-            "27": 312352256.0,
-            "28": 312352256.0,
-            "29": 312352256.0,
-            "30": 312352256.0,
-            "31": 312352256.0,
-            "32": 312352256.0,
-            "33": 312352256.0,
-            "34": 312352256.0,
-            "35": 312352256.0,
-            "36": 312352256.0,
-            "37": 312352256.0,
-            "38": 312352256.0,
-            "39": 312352256.0,
-            "40": 312352256.0,
-            "41": 312352256.0,
-            "42": 312352256.0,
-            "43": 312352256.0,
-            "44": 312352256.0,
-            "45": 312352256.0,
-            "46": 312352256.0,
-            "47": 312352256.0,
-            "48": 312352256.0,
-            "49": 312352256.0,
-            "50": 312352256.0,
-            "51": 312352256.0,
-            "52": 312352256.0,
-            "53": 312352256.0,
-            "54": 312352256.0,
-            "55": 312352256.0,
-            "56": 312352256.0,
-            "57": 312352256.0,
-            "58": 312352256.0,
-            "59": 312352256.0,
-            "60": 312352256.0,
-            "61": 312352256.0,
-            "62": 312352256.0,
-            "63": 312352256.0,
-            "64": 312352256.0,
-            "65": 312352256.0,
-            "66": 312352256.0,
-            "67": 312352256.0,
-            "68": 312352256.0,
-            "69": 312352256.0,
-            "70": 312352256.0,
-            "71": 312352256.0,
-            "72": 312352256.0,
-            "73": 312352256.0,
-            "74": 312352256.0,
-            "75": 312352256.0,
-            "76": 312352256.0,
-            "77": 312352256.0,
-            "78": 312352256.0,
-            "79": 312352256.0,
-            "80": 312352256.0,
-            "81": 312352256.0,
-            "82": 312352256.0,
-            "83": 312352256.0,
-            "84": 312352256.0,
-            "85": 312352256.0,
-            "86": 312352256.0,
-            "87": 312352256.0,
-            "88": 312352256.0,
-            "89": 312352256.0,
-            "90": 312352256.0,
-            "91": 312352256.0,
-            "92": 312352256.0,
-            "93": 312352256.0,
-            "94": 312352256.0,
-            "95": 312352256.0,
-            "96": 312352256.0,
-            "97": 312352256.0,
-            "98": 312352256.0,
-            "99": 312352256.0,
-            "100": 312352256.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 754434560.0,
-            "2": 843763200.0,
-            "3": 843763200.0,
-            "4": 843763200.0,
-            "5": 843763200.0,
-            "6": 843763200.0,
-            "7": 843763200.0,
-            "8": 843763200.0,
-            "9": 843763200.0,
-            "10": 843763200.0,
-            "11": 843763200.0,
-            "12": 843763200.0,
-            "13": 843763200.0,
-            "14": 843763200.0,
-            "15": 843763200.0,
-            "16": 843763200.0,
-            "17": 843763200.0,
-            "18": 843763200.0,
-            "19": 843763200.0,
-            "20": 843763200.0,
-            "21": 843763200.0,
-            "22": 843763200.0,
-            "23": 843763200.0,
-            "24": 843763200.0,
-            "25": 843763200.0,
-            "26": 843763200.0,
-            "27": 843763200.0,
-            "28": 843763200.0,
-            "29": 843763200.0,
-            "30": 843763200.0,
-            "31": 843763200.0,
-            "32": 843763200.0,
-            "33": 843763200.0,
-            "34": 843763200.0,
-            "35": 843763200.0,
-            "36": 843763200.0,
-            "37": 843763200.0,
-            "38": 843763200.0,
-            "39": 843763200.0,
-            "40": 843763200.0,
-            "41": 843763200.0,
-            "42": 843763200.0,
-            "43": 843763200.0,
-            "44": 843763200.0,
-            "45": 843763200.0,
-            "46": 843763200.0,
-            "47": 843763200.0,
-            "48": 843763200.0,
-            "49": 843763200.0,
-            "50": 843763200.0,
-            "51": 843763200.0,
-            "52": 843763200.0,
-            "53": 843763200.0,
-            "54": 843763200.0,
-            "55": 843763200.0,
-            "56": 843763200.0,
-            "57": 843763200.0,
-            "58": 843763200.0,
-            "59": 843763200.0,
-            "60": 843763200.0,
-            "61": 843763200.0,
-            "62": 843763200.0,
-            "63": 843763200.0,
-            "64": 843763200.0,
-            "65": 843763200.0,
-            "66": 843763200.0,
-            "67": 843763200.0,
-            "68": 843763200.0,
-            "69": 843763200.0,
-            "70": 843763200.0,
-            "71": 843763200.0,
-            "72": 843763200.0,
-            "73": 843763200.0,
-            "74": 843763200.0,
-            "75": 843763200.0,
-            "76": 843763200.0,
-            "77": 843763200.0,
-            "78": 843763200.0,
-            "79": 843763200.0,
-            "80": 843763200.0,
-            "81": 843763200.0,
-            "82": 843763200.0,
-            "83": 843763200.0,
-            "84": 843763200.0,
-            "85": 843763200.0,
-            "86": 843763200.0,
-            "87": 843763200.0,
-            "88": 843763200.0,
-            "89": 843763200.0,
-            "90": 843763200.0,
-            "91": 843763200.0,
-            "92": 843763200.0,
-            "93": 843763200.0,
-            "94": 843763200.0,
-            "95": 843763200.0,
-            "96": 843763200.0,
-            "97": 843763200.0,
-            "98": 843763200.0,
-            "99": 843763200.0,
-            "100": 843763200.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 14.22764,
-            "2": 0.24357,
-            "3": 0.1983,
-            "4": 0.19798,
-            "5": 0.19753,
-            "6": 0.19867,
-            "7": 0.2023,
-            "8": 0.20916,
-            "9": 0.19896,
-            "10": 0.19379,
-            "11": 0.19485,
-            "12": 0.19576,
-            "13": 0.19787,
-            "14": 0.19429,
-            "15": 0.19302,
-            "16": 0.19471,
-            "17": 0.19504,
-            "18": 0.19198,
-            "19": 0.19495,
-            "20": 0.19263,
-            "21": 0.19416,
-            "22": 0.19641,
-            "23": 0.19469,
-            "24": 0.1929,
-            "25": 0.19216,
-            "26": 0.19363,
-            "27": 0.19398,
-            "28": 0.20085,
-            "29": 0.19636,
-            "30": 0.19368,
-            "31": 0.19607,
-            "32": 0.19525,
-            "33": 0.19664,
-            "34": 0.19678,
-            "35": 0.19781,
-            "36": 0.19903,
-            "37": 0.19855,
-            "38": 0.19741,
-            "39": 0.19904,
-            "40": 0.1946,
-            "41": 0.19866,
-            "42": 0.19875,
-            "43": 0.19854,
-            "44": 0.19999,
-            "45": 0.19615,
-            "46": 0.19571,
-            "47": 0.20067,
-            "48": 0.20086,
-            "49": 0.199,
-            "50": 0.20278,
-            "51": 0.22281,
-            "52": 0.23219,
-            "53": 0.1956,
-            "54": 0.20104,
-            "55": 0.19383,
-            "56": 0.19622,
-            "57": 0.1958,
-            "58": 0.19611,
-            "59": 0.20122,
-            "60": 0.19838,
-            "61": 0.19728,
-            "62": 0.19768,
-            "63": 0.19649,
-            "64": 0.19849,
-            "65": 0.19729,
-            "66": 0.20239,
-            "67": 0.1983,
-            "68": 0.19972,
-            "69": 0.19875,
-            "70": 0.19826,
-            "71": 0.199,
-            "72": 0.20079,
-            "73": 0.19629,
-            "74": 0.19463,
-            "75": 0.19309,
-            "76": 0.19531,
-            "77": 0.19866,
-            "78": 0.19554,
-            "79": 0.19894,
-            "80": 0.19644,
-            "81": 0.19444,
-            "82": 0.1982,
-            "83": 0.19564,
-            "84": 0.19462,
-            "85": 0.19336,
-            "86": 0.19393,
-            "87": 0.19166,
-            "88": 0.19067,
-            "89": 0.19389,
-            "90": 0.19317,
-            "91": 0.19001,
-            "92": 0.19028,
-            "93": 0.19093,
-            "94": 0.19224,
-            "95": 0.19066,
-            "96": 0.19224,
-            "97": 0.18966,
-            "98": 0.19044,
-            "99": 0.19273,
-            "100": 0.20509
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index 5c404dad658..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.90433,
-            "2": 10.90931,
-            "3": 10.90937,
-            "4": 10.90764,
-            "5": 10.90709,
-            "6": 10.91174,
-            "7": 10.91413,
-            "8": 10.89808,
-            "9": 10.91252,
-            "10": 10.87838,
-            "11": 10.90538,
-            "12": 10.89588,
-            "13": 10.91234,
-            "14": 10.90596,
-            "15": 10.86278,
-            "16": 10.85987,
-            "17": 10.84211,
-            "18": 10.83508,
-            "19": 10.84021,
-            "20": 10.74667,
-            "21": 10.72431,
-            "22": 10.6337,
-            "23": 10.74257,
-            "24": 10.63399,
-            "25": 10.60185,
-            "26": 10.64659,
-            "27": 10.64193,
-            "28": 10.58695,
-            "29": 10.59421,
-            "30": 10.394,
-            "31": 10.17174,
-            "32": 10.48573,
-            "33": 10.48042,
-            "34": 10.25002,
-            "35": 10.29811,
-            "36": 10.25221,
-            "37": 10.36635,
-            "38": 10.22258,
-            "39": 10.42495,
-            "40": 10.111,
-            "41": 10.17165,
-            "42": 10.22384,
-            "43": 9.86674,
-            "44": 9.99019,
-            "45": 9.8622,
-            "46": 9.84813,
-            "47": 10.16079,
-            "48": 9.87303,
-            "49": 9.55987,
-            "50": 9.92159,
-            "51": 9.8695,
-            "52": 9.76154,
-            "53": 10.08349,
-            "54": 9.97449,
-            "55": 9.89437,
-            "56": 9.6424,
-            "57": 9.50352,
-            "58": 9.84153,
-            "59": 9.60017,
-            "60": 9.51715,
-            "61": 9.70458,
-            "62": 9.98292,
-            "63": 9.39067,
-            "64": 9.7797,
-            "65": 8.96053,
-            "66": 9.70288,
-            "67": 9.3734,
-            "68": 9.78805,
-            "69": 9.79828,
-            "70": 9.74999,
-            "71": 9.62682,
-            "72": 9.59043,
-            "73": 9.49893,
-            "74": 8.94842,
-            "75": 9.42922,
-            "76": 9.08268,
-            "77": 10.07413,
-            "78": 9.73322,
-            "79": 9.38352,
-            "80": 9.40713,
-            "81": 9.48366,
-            "82": 9.70577,
-            "83": 9.3103,
-            "84": 9.41846,
-            "85": 9.62053,
-            "86": 9.08533,
-            "87": 9.59962,
-            "88": 9.75141,
-            "89": 9.60594,
-            "90": 9.8245,
-            "91": 9.33973,
-            "92": 9.36344,
-            "93": 9.08397,
-            "94": 8.83571,
-            "95": 9.51936,
-            "96": 9.53001,
-            "97": 9.31995,
-            "98": 9.67709,
-            "99": 8.88909,
-            "100": 9.40491
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1595.0,
-            "2": 1632.0,
-            "3": 1539.0,
-            "4": 1702.0,
-            "5": 1827.0,
-            "6": 1718.0,
-            "7": 1810.0,
-            "8": 1634.0,
-            "9": 2007.0,
-            "10": 1457.0,
-            "11": 1906.0,
-            "12": 1737.0,
-            "13": 1917.0,
-            "14": 1828.0,
-            "15": 1866.0,
-            "16": 1826.0,
-            "17": 1762.0,
-            "18": 1761.0,
-            "19": 1803.0,
-            "20": 1803.0,
-            "21": 1996.0,
-            "22": 1691.0,
-            "23": 2060.0,
-            "24": 1622.0,
-            "25": 1595.0,
-            "26": 1608.0,
-            "27": 1890.0,
-            "28": 1913.0,
-            "29": 1987.0,
-            "30": 1808.0,
-            "31": 1549.0,
-            "32": 1838.0,
-            "33": 2073.0,
-            "34": 1859.0,
-            "35": 1870.0,
-            "36": 1870.0,
-            "37": 2300.0,
-            "38": 2186.0,
-            "39": 2368.0,
-            "40": 2097.0,
-            "41": 2325.0,
-            "42": 2227.0,
-            "43": 2036.0,
-            "44": 2098.0,
-            "45": 2055.0,
-            "46": 2146.0,
-            "47": 2453.0,
-            "48": 2273.0,
-            "49": 2244.0,
-            "50": 2252.0,
-            "51": 2484.0,
-            "52": 2568.0,
-            "53": 2834.0,
-            "54": 2607.0,
-            "55": 2149.0,
-            "56": 2683.0,
-            "57": 2283.0,
-            "58": 2764.0,
-            "59": 2623.0,
-            "60": 2456.0,
-            "61": 2938.0,
-            "62": 2456.0,
-            "63": 2279.0,
-            "64": 3078.0,
-            "65": 2504.0,
-            "66": 2881.0,
-            "67": 2683.0,
-            "68": 2657.0,
-            "69": 2832.0,
-            "70": 3144.0,
-            "71": 2930.0,
-            "72": 2328.0,
-            "73": 2984.0,
-            "74": 1752.0,
-            "75": 2451.0,
-            "76": 3040.0,
-            "77": 3213.0,
-            "78": 2936.0,
-            "79": 2941.0,
-            "80": 3112.0,
-            "81": 3568.0,
-            "82": 3105.0,
-            "83": 2725.0,
-            "84": 3051.0,
-            "85": 3170.0,
-            "86": 2645.0,
-            "87": 3586.0,
-            "88": 2902.0,
-            "89": 3371.0,
-            "90": 2971.0,
-            "91": 2800.0,
-            "92": 3017.0,
-            "93": 2524.0,
-            "94": 3384.0,
-            "95": 3147.0,
-            "96": 3388.0,
-            "97": 3031.0,
-            "98": 3619.0,
-            "99": 3004.0,
-            "100": 3100.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 312352256.0,
-            "2": 312352256.0,
-            "3": 312352256.0,
-            "4": 312352256.0,
-            "5": 312352256.0,
-            "6": 312352256.0,
-            "7": 312352256.0,
-            "8": 312352256.0,
-            "9": 312352256.0,
-            "10": 312352256.0,
-            "11": 312352256.0,
-            "12": 312352256.0,
-            "13": 312352256.0,
-            "14": 312352256.0,
-            "15": 312352256.0,
-            "16": 312352256.0,
-            "17": 312352256.0,
-            "18": 312352256.0,
-            "19": 312352256.0,
-            "20": 312352256.0,
-            "21": 312352256.0,
-            "22": 312352256.0,
-            "23": 312352256.0,
-            "24": 312352256.0,
-            "25": 312352256.0,
-            "26": 312352256.0,
-            "27": 312352256.0,
-            "28": 312352256.0,
-            "29": 312352256.0,
-            "30": 312352256.0,
-            "31": 312352256.0,
-            "32": 312352256.0,
-            "33": 312352256.0,
-            "34": 312352256.0,
-            "35": 312352256.0,
-            "36": 312352256.0,
-            "37": 312352256.0,
-            "38": 312352256.0,
-            "39": 312352256.0,
-            "40": 312352256.0,
-            "41": 312352256.0,
-            "42": 312352256.0,
-            "43": 312352256.0,
-            "44": 312352256.0,
-            "45": 312352256.0,
-            "46": 312352256.0,
-            "47": 312352256.0,
-            "48": 312352256.0,
-            "49": 312352256.0,
-            "50": 312352256.0,
-            "51": 312352256.0,
-            "52": 312352256.0,
-            "53": 312352256.0,
-            "54": 312352256.0,
-            "55": 312352256.0,
-            "56": 312352256.0,
-            "57": 312352256.0,
-            "58": 312352256.0,
-            "59": 312352256.0,
-            "60": 312352256.0,
-            "61": 312352256.0,
-            "62": 312352256.0,
-            "63": 312352256.0,
-            "64": 312352256.0,
-            "65": 312352256.0,
-            "66": 312352256.0,
-            "67": 312352256.0,
-            "68": 312352256.0,
-            "69": 312352256.0,
-            "70": 312352256.0,
-            "71": 312352256.0,
-            "72": 312352256.0,
-            "73": 312352256.0,
-            "74": 312352256.0,
-            "75": 312352256.0,
-            "76": 312352256.0,
-            "77": 312352256.0,
-            "78": 312352256.0,
-            "79": 312352256.0,
-            "80": 312352256.0,
-            "81": 312352256.0,
-            "82": 312352256.0,
-            "83": 312352256.0,
-            "84": 312352256.0,
-            "85": 312352256.0,
-            "86": 312352256.0,
-            "87": 312352256.0,
-            "88": 312352256.0,
-            "89": 312352256.0,
-            "90": 312352256.0,
-            "91": 312352256.0,
-            "92": 312352256.0,
-            "93": 312352256.0,
-            "94": 312352256.0,
-            "95": 312352256.0,
-            "96": 312352256.0,
-            "97": 312352256.0,
-            "98": 312352256.0,
-            "99": 312352256.0,
-            "100": 312352256.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 754434560.0,
-            "2": 843763200.0,
-            "3": 843763200.0,
-            "4": 843763200.0,
-            "5": 843763200.0,
-            "6": 843763200.0,
-            "7": 843763200.0,
-            "8": 843763200.0,
-            "9": 843763200.0,
-            "10": 843763200.0,
-            "11": 843763200.0,
-            "12": 843763200.0,
-            "13": 843763200.0,
-            "14": 843763200.0,
-            "15": 843763200.0,
-            "16": 843763200.0,
-            "17": 843763200.0,
-            "18": 843763200.0,
-            "19": 843763200.0,
-            "20": 843763200.0,
-            "21": 843763200.0,
-            "22": 843763200.0,
-            "23": 843763200.0,
-            "24": 843763200.0,
-            "25": 843763200.0,
-            "26": 843763200.0,
-            "27": 843763200.0,
-            "28": 843763200.0,
-            "29": 843763200.0,
-            "30": 843763200.0,
-            "31": 843763200.0,
-            "32": 843763200.0,
-            "33": 843763200.0,
-            "34": 843763200.0,
-            "35": 843763200.0,
-            "36": 843763200.0,
-            "37": 843763200.0,
-            "38": 843763200.0,
-            "39": 843763200.0,
-            "40": 843763200.0,
-            "41": 843763200.0,
-            "42": 843763200.0,
-            "43": 843763200.0,
-            "44": 843763200.0,
-            "45": 843763200.0,
-            "46": 843763200.0,
-            "47": 843763200.0,
-            "48": 843763200.0,
-            "49": 843763200.0,
-            "50": 843763200.0,
-            "51": 843763200.0,
-            "52": 843763200.0,
-            "53": 843763200.0,
-            "54": 843763200.0,
-            "55": 843763200.0,
-            "56": 843763200.0,
-            "57": 843763200.0,
-            "58": 843763200.0,
-            "59": 843763200.0,
-            "60": 843763200.0,
-            "61": 843763200.0,
-            "62": 843763200.0,
-            "63": 843763200.0,
-            "64": 843763200.0,
-            "65": 843763200.0,
-            "66": 843763200.0,
-            "67": 843763200.0,
-            "68": 843763200.0,
-            "69": 843763200.0,
-            "70": 843763200.0,
-            "71": 843763200.0,
-            "72": 843763200.0,
-            "73": 843763200.0,
-            "74": 843763200.0,
-            "75": 843763200.0,
-            "76": 843763200.0,
-            "77": 843763200.0,
-            "78": 843763200.0,
-            "79": 843763200.0,
-            "80": 843763200.0,
-            "81": 843763200.0,
-            "82": 843763200.0,
-            "83": 843763200.0,
-            "84": 843763200.0,
-            "85": 843763200.0,
-            "86": 843763200.0,
-            "87": 843763200.0,
-            "88": 843763200.0,
-            "89": 843763200.0,
-            "90": 843763200.0,
-            "91": 843763200.0,
-            "92": 843763200.0,
-            "93": 843763200.0,
-            "94": 843763200.0,
-            "95": 843763200.0,
-            "96": 843763200.0,
-            "97": 843763200.0,
-            "98": 843763200.0,
-            "99": 843763200.0,
-            "100": 843763200.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 13.61637,
-            "2": 0.24414,
-            "3": 0.22872,
-            "4": 0.22599,
-            "5": 0.22586,
-            "6": 0.22773,
-            "7": 0.22791,
-            "8": 0.22857,
-            "9": 0.2283,
-            "10": 0.22732,
-            "11": 0.22633,
-            "12": 0.22761,
-            "13": 0.22748,
-            "14": 0.23094,
-            "15": 0.22968,
-            "16": 0.22849,
-            "17": 0.22934,
-            "18": 0.22814,
-            "19": 0.22822,
-            "20": 0.22758,
-            "21": 0.22806,
-            "22": 0.25737,
-            "23": 0.24238,
-            "24": 0.23166,
-            "25": 0.22695,
-            "26": 0.22857,
-            "27": 0.23442,
-            "28": 0.22861,
-            "29": 0.2302,
-            "30": 0.2316,
-            "31": 0.23014,
-            "32": 0.22948,
-            "33": 0.23272,
-            "34": 0.23222,
-            "35": 0.23035,
-            "36": 0.23384,
-            "37": 0.23085,
-            "38": 0.23058,
-            "39": 0.23686,
-            "40": 0.23939,
-            "41": 0.23562,
-            "42": 0.23544,
-            "43": 0.23293,
-            "44": 0.22874,
-            "45": 0.234,
-            "46": 0.22942,
-            "47": 0.23036,
-            "48": 0.23404,
-            "49": 0.2686,
-            "50": 0.24831,
-            "51": 0.28415,
-            "52": 0.23699,
-            "53": 0.26129,
-            "54": 0.2273,
-            "55": 0.22639,
-            "56": 0.22691,
-            "57": 0.22504,
-            "58": 0.22822,
-            "59": 0.22913,
-            "60": 0.22577,
-            "61": 0.23097,
-            "62": 0.22702,
-            "63": 0.22579,
-            "64": 0.22717,
-            "65": 0.22986,
-            "66": 0.22481,
-            "67": 0.22676,
-            "68": 0.22643,
-            "69": 0.22933,
-            "70": 0.23566,
-            "71": 0.22795,
-            "72": 0.22654,
-            "73": 0.2256,
-            "74": 0.22941,
-            "75": 0.23701,
-            "76": 0.23527,
-            "77": 0.23476,
-            "78": 0.23472,
-            "79": 0.22599,
-            "80": 0.22758,
-            "81": 0.22717,
-            "82": 0.22657,
-            "83": 0.22688,
-            "84": 0.22827,
-            "85": 0.22612,
-            "86": 0.22871,
-            "87": 0.23133,
-            "88": 0.22934,
-            "89": 0.22859,
-            "90": 0.22635,
-            "91": 0.22606,
-            "92": 0.2297,
-            "93": 0.22713,
-            "94": 0.2261,
-            "95": 0.227,
-            "96": 0.23135,
-            "97": 0.22866,
-            "98": 0.22601,
-            "99": 0.2277,
-            "100": 0.2323
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_h100.json
index fbdb62b88ac..8828025e4b4 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_h100.json
@@ -325,106 +325,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 6.44783,
-            "2": 0.09007,
-            "3": 0.06737,
-            "4": 0.06577,
-            "5": 0.06617,
-            "6": 0.06499,
-            "7": 0.06848,
-            "8": 0.06519,
-            "9": 0.06616,
-            "10": 0.06552,
-            "11": 0.06475,
-            "12": 0.06425,
-            "13": 0.06448,
-            "14": 0.0646,
-            "15": 0.06511,
-            "16": 0.06475,
-            "17": 0.06554,
-            "18": 0.11461,
-            "19": 0.07217,
-            "20": 0.07186,
-            "21": 0.07086,
-            "22": 0.06865,
-            "23": 0.07004,
-            "24": 0.07096,
-            "25": 0.071,
-            "26": 0.07082,
-            "27": 0.07253,
-            "28": 0.07103,
-            "29": 0.07101,
-            "30": 0.07144,
-            "31": 0.07157,
-            "32": 0.07144,
-            "33": 0.07102,
-            "34": 0.0715,
-            "35": 0.07197,
-            "36": 0.07104,
-            "37": 0.07183,
-            "38": 0.07076,
-            "39": 0.07174,
-            "40": 0.07198,
-            "41": 0.0728,
-            "42": 0.07014,
-            "43": 0.07139,
-            "44": 0.07151,
-            "45": 0.0731,
-            "46": 0.07262,
-            "47": 0.07101,
-            "48": 0.07085,
-            "49": 0.07236,
-            "50": 0.07208,
-            "51": 0.10876,
-            "52": 0.07904,
-            "53": 0.07811,
-            "54": 0.07594,
-            "55": 0.07858,
-            "56": 0.08222,
-            "57": 0.08161,
-            "58": 0.0804,
-            "59": 0.07879,
-            "60": 0.07013,
-            "61": 0.06958,
-            "62": 0.07024,
-            "63": 0.06986,
-            "64": 0.07068,
-            "65": 0.07096,
-            "66": 0.07033,
-            "67": 0.07005,
-            "68": 0.07023,
-            "69": 0.07133,
-            "70": 0.07104,
-            "71": 0.0717,
-            "72": 0.07141,
-            "73": 0.07155,
-            "74": 0.07093,
-            "75": 0.07044,
-            "76": 0.06976,
-            "77": 0.07009,
-            "78": 0.07092,
-            "79": 0.07151,
-            "80": 0.07062,
-            "81": 0.07312,
-            "82": 0.07117,
-            "83": 0.07287,
-            "84": 0.07054,
-            "85": 0.07186,
-            "86": 0.0698,
-            "87": 0.07076,
-            "88": 0.0702,
-            "89": 0.07128,
-            "90": 0.07039,
-            "91": 0.07054,
-            "92": 0.07169,
-            "93": 0.07155,
-            "94": 0.07057,
-            "95": 0.07134,
-            "96": 0.07134,
-            "97": 0.07146,
-            "98": 0.07223,
-            "99": 0.07189,
-            "100": 0.07136
+            "1": 6.49307,
+            "2": 0.10356,
+            "3": 0.08062,
+            "4": 0.0772,
+            "5": 0.07555,
+            "6": 0.06677,
+            "7": 0.06434,
+            "8": 0.06228,
+            "9": 0.0624,
+            "10": 0.06213,
+            "11": 0.06353,
+            "12": 0.0622,
+            "13": 0.06377,
+            "14": 0.06323,
+            "15": 0.06296,
+            "16": 0.06251,
+            "17": 0.06382,
+            "18": 0.11433,
+            "19": 0.07262,
+            "20": 0.07222,
+            "21": 0.07613,
+            "22": 0.06977,
+            "23": 0.06664,
+            "24": 0.07256,
+            "25": 0.07344,
+            "26": 0.0723,
+            "27": 0.07264,
+            "28": 0.0697,
+            "29": 0.06998,
+            "30": 0.06785,
+            "31": 0.07022,
+            "32": 0.06834,
+            "33": 0.06679,
+            "34": 0.0678,
+            "35": 0.0679,
+            "36": 0.0679,
+            "37": 0.06826,
+            "38": 0.06821,
+            "39": 0.0665,
+            "40": 0.06798,
+            "41": 0.06816,
+            "42": 0.06816,
+            "43": 0.06901,
+            "44": 0.06772,
+            "45": 0.06849,
+            "46": 0.06843,
+            "47": 0.06773,
+            "48": 0.06705,
+            "49": 0.06755,
+            "50": 0.06844,
+            "51": 0.0971,
+            "52": 0.06968,
+            "53": 0.06915,
+            "54": 0.06982,
+            "55": 0.0703,
+            "56": 0.07014,
+            "57": 0.07047,
+            "58": 0.06835,
+            "59": 0.07077,
+            "60": 0.06886,
+            "61": 0.06929,
+            "62": 0.06887,
+            "63": 0.06946,
+            "64": 0.06924,
+            "65": 0.06987,
+            "66": 0.06898,
+            "67": 0.06873,
+            "68": 0.0695,
+            "69": 0.0712,
+            "70": 0.06928,
+            "71": 0.0692,
+            "72": 0.07014,
+            "73": 0.06964,
+            "74": 0.06884,
+            "75": 0.06897,
+            "76": 0.07036,
+            "77": 0.0693,
+            "78": 0.06905,
+            "79": 0.0698,
+            "80": 0.06831,
+            "81": 0.06969,
+            "82": 0.06871,
+            "83": 0.07059,
+            "84": 0.06905,
+            "85": 0.06955,
+            "86": 0.06926,
+            "87": 0.06905,
+            "88": 0.06912,
+            "89": 0.07039,
+            "90": 0.06895,
+            "91": 0.069,
+            "92": 0.0698,
+            "93": 0.06946,
+            "94": 0.06825,
+            "95": 0.06933,
+            "96": 0.06851,
+            "97": 0.06883,
+            "98": 0.07421,
+            "99": 0.06926,
+            "100": 0.07018
         }
     },
     "num-zeros": {
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 68de1078bf3..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.89618,
-            "2": 10.89538,
-            "3": 10.88915,
-            "4": 10.89094,
-            "5": 10.8927,
-            "6": 10.90148,
-            "7": 10.89392,
-            "8": 10.90369,
-            "9": 10.90794,
-            "10": 10.89108,
-            "11": 10.88762,
-            "12": 10.9076,
-            "13": 10.91429,
-            "14": 10.90654,
-            "15": 10.90227,
-            "16": 10.91042,
-            "17": 10.89896,
-            "18": 10.90666,
-            "19": 10.89908,
-            "20": 10.90133,
-            "21": 10.91713,
-            "22": 10.89139,
-            "23": 10.90085,
-            "24": 10.89366,
-            "25": 10.89372,
-            "26": 10.87372,
-            "27": 10.87917,
-            "28": 10.88756,
-            "29": 10.85461,
-            "30": 10.83891,
-            "31": 10.75166,
-            "32": 10.8278,
-            "33": 10.80306,
-            "34": 10.73559,
-            "35": 10.7301,
-            "36": 10.69318,
-            "37": 10.72854,
-            "38": 10.65364,
-            "39": 10.71672,
-            "40": 10.56996,
-            "41": 10.58467,
-            "42": 10.59853,
-            "43": 10.3948,
-            "44": 10.44431,
-            "45": 10.3452,
-            "46": 10.31919,
-            "47": 10.49671,
-            "48": 10.31281,
-            "49": 10.09084,
-            "50": 10.31089,
-            "51": 10.25547,
-            "52": 10.15856,
-            "53": 10.38114,
-            "54": 10.2992,
-            "55": 10.23806,
-            "56": 10.00726,
-            "57": 9.87765,
-            "58": 10.15279,
-            "59": 9.94207,
-            "60": 9.8666,
-            "61": 10.00032,
-            "62": 10.23443,
-            "63": 9.71917,
-            "64": 10.04209,
-            "65": 9.30009,
-            "66": 9.95537,
-            "67": 9.6499,
-            "68": 10.00402,
-            "69": 9.99988,
-            "70": 9.96383,
-            "71": 9.84259,
-            "72": 9.81258,
-            "73": 9.70921,
-            "74": 9.19832,
-            "75": 9.61686,
-            "76": 9.28859,
-            "77": 10.20416,
-            "78": 9.88378,
-            "79": 9.54296,
-            "80": 9.57095,
-            "81": 9.64006,
-            "82": 9.83648,
-            "83": 9.47691,
-            "84": 9.54866,
-            "85": 9.75198,
-            "86": 9.21427,
-            "87": 9.70607,
-            "88": 9.87307,
-            "89": 9.72876,
-            "90": 9.92353,
-            "91": 9.48236,
-            "92": 9.47671,
-            "93": 9.20895,
-            "94": 8.9625,
-            "95": 9.62369,
-            "96": 9.64228,
-            "97": 9.41575,
-            "98": 9.77515,
-            "99": 9.00692,
-            "100": 9.51305
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 454770688.0,
-            "2": 454770688.0,
-            "3": 454770688.0,
-            "4": 454770688.0,
-            "5": 454770688.0,
-            "6": 454770688.0,
-            "7": 454770688.0,
-            "8": 454770688.0,
-            "9": 454770688.0,
-            "10": 454770688.0,
-            "11": 454770688.0,
-            "12": 454770688.0,
-            "13": 454770688.0,
-            "14": 454770688.0,
-            "15": 454770688.0,
-            "16": 454770688.0,
-            "17": 454770688.0,
-            "18": 518880768.0,
-            "19": 518880768.0,
-            "20": 518880768.0,
-            "21": 518880768.0,
-            "22": 518880768.0,
-            "23": 518880768.0,
-            "24": 518880768.0,
-            "25": 518880768.0,
-            "26": 518880768.0,
-            "27": 518880768.0,
-            "28": 518880768.0,
-            "29": 518880768.0,
-            "30": 518880768.0,
-            "31": 518880768.0,
-            "32": 518880768.0,
-            "33": 518880768.0,
-            "34": 518880768.0,
-            "35": 518880768.0,
-            "36": 518880768.0,
-            "37": 518880768.0,
-            "38": 518880768.0,
-            "39": 518880768.0,
-            "40": 518880768.0,
-            "41": 518880768.0,
-            "42": 518880768.0,
-            "43": 518880768.0,
-            "44": 518880768.0,
-            "45": 518880768.0,
-            "46": 518880768.0,
-            "47": 518880768.0,
-            "48": 518880768.0,
-            "49": 518880768.0,
-            "50": 518880768.0,
-            "51": 518880768.0,
-            "52": 518880768.0,
-            "53": 518880768.0,
-            "54": 518880768.0,
-            "55": 518880768.0,
-            "56": 518880768.0,
-            "57": 518880768.0,
-            "58": 518880768.0,
-            "59": 518880768.0,
-            "60": 518880768.0,
-            "61": 518880768.0,
-            "62": 518880768.0,
-            "63": 518880768.0,
-            "64": 518880768.0,
-            "65": 518880768.0,
-            "66": 518880768.0,
-            "67": 518880768.0,
-            "68": 518880768.0,
-            "69": 518880768.0,
-            "70": 518880768.0,
-            "71": 518880768.0,
-            "72": 518880768.0,
-            "73": 518880768.0,
-            "74": 518880768.0,
-            "75": 518880768.0,
-            "76": 518880768.0,
-            "77": 518880768.0,
-            "78": 518880768.0,
-            "79": 518880768.0,
-            "80": 518880768.0,
-            "81": 518880768.0,
-            "82": 518880768.0,
-            "83": 518880768.0,
-            "84": 518880768.0,
-            "85": 518880768.0,
-            "86": 518880768.0,
-            "87": 518880768.0,
-            "88": 518880768.0,
-            "89": 518880768.0,
-            "90": 518880768.0,
-            "91": 518880768.0,
-            "92": 518880768.0,
-            "93": 518880768.0,
-            "94": 518880768.0,
-            "95": 518880768.0,
-            "96": 518880768.0,
-            "97": 518880768.0,
-            "98": 518880768.0,
-            "99": 518880768.0,
-            "100": 518880768.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 4511150592.0,
-            "2": 4544705536.0,
-            "3": 4544705536.0,
-            "4": 4544705536.0,
-            "5": 4544705536.0,
-            "6": 4544705536.0,
-            "7": 4544705536.0,
-            "8": 4544705536.0,
-            "9": 4544705536.0,
-            "10": 4544705536.0,
-            "11": 4544705536.0,
-            "12": 4544705536.0,
-            "13": 4544705536.0,
-            "14": 4544705536.0,
-            "15": 4544705536.0,
-            "16": 4544705536.0,
-            "17": 4544705536.0,
-            "18": 4544705536.0,
-            "19": 4607767040.0,
-            "20": 4607767040.0,
-            "21": 4607767040.0,
-            "22": 4607767040.0,
-            "23": 4607767040.0,
-            "24": 4607767040.0,
-            "25": 4607767040.0,
-            "26": 4607767040.0,
-            "27": 4607767040.0,
-            "28": 4607767040.0,
-            "29": 4607767040.0,
-            "30": 4607767040.0,
-            "31": 4607767040.0,
-            "32": 4607767040.0,
-            "33": 4607767040.0,
-            "34": 4607767040.0,
-            "35": 4607767040.0,
-            "36": 4607767040.0,
-            "37": 4607767040.0,
-            "38": 4607767040.0,
-            "39": 4607767040.0,
-            "40": 4607767040.0,
-            "41": 4607767040.0,
-            "42": 4607767040.0,
-            "43": 4607767040.0,
-            "44": 4607767040.0,
-            "45": 4607767040.0,
-            "46": 4607767040.0,
-            "47": 4607767040.0,
-            "48": 4607767040.0,
-            "49": 4607767040.0,
-            "50": 4607767040.0,
-            "51": 4607767040.0,
-            "52": 4607767040.0,
-            "53": 4607767040.0,
-            "54": 4607767040.0,
-            "55": 4607767040.0,
-            "56": 4607767040.0,
-            "57": 4607767040.0,
-            "58": 4607767040.0,
-            "59": 4607767040.0,
-            "60": 4607767040.0,
-            "61": 4607767040.0,
-            "62": 4607767040.0,
-            "63": 4607767040.0,
-            "64": 4607767040.0,
-            "65": 4607767040.0,
-            "66": 4607767040.0,
-            "67": 4607767040.0,
-            "68": 4607767040.0,
-            "69": 4607767040.0,
-            "70": 4607767040.0,
-            "71": 4607767040.0,
-            "72": 4607767040.0,
-            "73": 4607767040.0,
-            "74": 4607767040.0,
-            "75": 4607767040.0,
-            "76": 4607767040.0,
-            "77": 4607767040.0,
-            "78": 4607767040.0,
-            "79": 4607767040.0,
-            "80": 4607767040.0,
-            "81": 4607767040.0,
-            "82": 4607767040.0,
-            "83": 4607767040.0,
-            "84": 4607767040.0,
-            "85": 4607767040.0,
-            "86": 4607767040.0,
-            "87": 4607767040.0,
-            "88": 4607767040.0,
-            "89": 4607767040.0,
-            "90": 4607767040.0,
-            "91": 4607767040.0,
-            "92": 4607767040.0,
-            "93": 4607767040.0,
-            "94": 4607767040.0,
-            "95": 4607767040.0,
-            "96": 4607767040.0,
-            "97": 4607767040.0,
-            "98": 4607767040.0,
-            "99": 4607767040.0,
-            "100": 4607767040.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 6.06687,
-            "2": 0.09744,
-            "3": 0.05659,
-            "4": 0.05607,
-            "5": 0.05508,
-            "6": 0.05545,
-            "7": 0.06728,
-            "8": 0.06907,
-            "9": 0.06794,
-            "10": 0.05561,
-            "11": 0.05366,
-            "12": 0.05478,
-            "13": 0.05682,
-            "14": 0.0602,
-            "15": 0.05987,
-            "16": 0.05524,
-            "17": 0.05387,
-            "18": 0.0976,
-            "19": 0.06103,
-            "20": 0.06125,
-            "21": 0.06399,
-            "22": 0.06406,
-            "23": 0.05846,
-            "24": 0.0595,
-            "25": 0.05948,
-            "26": 0.05947,
-            "27": 0.05843,
-            "28": 0.06573,
-            "29": 0.06497,
-            "30": 0.05987,
-            "31": 0.05899,
-            "32": 0.05983,
-            "33": 0.05828,
-            "34": 0.06034,
-            "35": 0.06568,
-            "36": 0.0606,
-            "37": 0.05892,
-            "38": 0.05998,
-            "39": 0.06244,
-            "40": 0.06557,
-            "41": 0.05845,
-            "42": 0.06012,
-            "43": 0.05942,
-            "44": 0.05983,
-            "45": 0.06123,
-            "46": 0.06648,
-            "47": 0.06513,
-            "48": 0.0599,
-            "49": 0.05866,
-            "50": 0.06093,
-            "51": 0.06536,
-            "52": 0.06086,
-            "53": 0.05831,
-            "54": 0.06064,
-            "55": 0.05976,
-            "56": 0.06762,
-            "57": 0.06301,
-            "58": 0.05996,
-            "59": 0.05844,
-            "60": 0.06016,
-            "61": 0.05903,
-            "62": 0.05975,
-            "63": 0.06658,
-            "64": 0.06396,
-            "65": 0.05913,
-            "66": 0.06025,
-            "67": 0.0595,
-            "68": 0.06002,
-            "69": 0.05954,
-            "70": 0.06032,
-            "71": 0.06012,
-            "72": 0.06048,
-            "73": 0.05933,
-            "74": 0.05958,
-            "75": 0.06007,
-            "76": 0.06034,
-            "77": 0.05974,
-            "78": 0.06035,
-            "79": 0.06014,
-            "80": 0.06072,
-            "81": 0.06083,
-            "82": 0.062,
-            "83": 0.05964,
-            "84": 0.06048,
-            "85": 0.0602,
-            "86": 0.0607,
-            "87": 0.05907,
-            "88": 0.0636,
-            "89": 0.06003,
-            "90": 0.06002,
-            "91": 0.05858,
-            "92": 0.06008,
-            "93": 0.05932,
-            "94": 0.05884,
-            "95": 0.05815,
-            "96": 0.05789,
-            "97": 0.05853,
-            "98": 0.05852,
-            "99": 0.05895,
-            "100": 0.0617
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": "nan",
-            "2": "nan",
-            "3": "nan",
-            "4": "nan",
-            "5": "nan",
-            "6": "nan",
-            "7": "nan",
-            "8": "nan",
-            "9": "nan",
-            "10": "nan",
-            "11": "nan",
-            "12": "nan",
-            "13": "nan",
-            "14": "nan",
-            "15": "nan",
-            "16": "nan",
-            "17": "nan",
-            "18": 1155.0,
-            "19": 1454.0,
-            "20": 1095.0,
-            "21": 1230.0,
-            "22": "nan",
-            "23": 1357.0,
-            "24": 1150.0,
-            "25": 1228.0,
-            "26": 1202.0,
-            "27": 1326.0,
-            "28": 1466.0,
-            "29": 1438.0,
-            "30": 1238.0,
-            "31": 1008.0,
-            "32": 1160.0,
-            "33": 1371.0,
-            "34": 1154.0,
-            "35": 1295.0,
-            "36": 1156.0,
-            "37": 1403.0,
-            "38": 1487.0,
-            "39": 1429.0,
-            "40": 1412.0,
-            "41": 1458.0,
-            "42": 1316.0,
-            "43": 1193.0,
-            "44": 1323.0,
-            "45": 1297.0,
-            "46": 1276.0,
-            "47": 1868.0,
-            "48": 1251.0,
-            "49": 1272.0,
-            "50": 1524.0,
-            "51": 1367.0,
-            "52": 1372.0,
-            "53": 1715.0,
-            "54": 1485.0,
-            "55": 1482.0,
-            "56": 1473.0,
-            "57": 1539.0,
-            "58": 1736.0,
-            "59": 1661.0,
-            "60": 1586.0,
-            "61": 1691.0,
-            "62": 1865.0,
-            "63": 1395.0,
-            "64": 1846.0,
-            "65": 1428.0,
-            "66": 1717.0,
-            "67": 1700.0,
-            "68": 1750.0,
-            "69": 1681.0,
-            "70": 1861.0,
-            "71": 2048.0,
-            "72": 1552.0,
-            "73": 2010.0,
-            "74": 1344.0,
-            "75": 1840.0,
-            "76": 1846.0,
-            "77": 2034.0,
-            "78": 2170.0,
-            "79": 1949.0,
-            "80": 2077.0,
-            "81": 2381.0,
-            "82": 2390.0,
-            "83": 1843.0,
-            "84": 2060.0,
-            "85": 2317.0,
-            "86": 1958.0,
-            "87": 2829.0,
-            "88": 2046.0,
-            "89": 2260.0,
-            "90": 2545.0,
-            "91": 1801.0,
-            "92": 2505.0,
-            "93": 2064.0,
-            "94": 2223.0,
-            "95": 2379.0,
-            "96": 2579.0,
-            "97": 2411.0,
-            "98": 2500.0,
-            "99": 2124.0,
-            "100": 2119.0
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index 8828025e4b4..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.89618,
-            "2": 10.89538,
-            "3": 10.88915,
-            "4": 10.89094,
-            "5": 10.8927,
-            "6": 10.90148,
-            "7": 10.89392,
-            "8": 10.90369,
-            "9": 10.90794,
-            "10": 10.89108,
-            "11": 10.88762,
-            "12": 10.9076,
-            "13": 10.91429,
-            "14": 10.90654,
-            "15": 10.90227,
-            "16": 10.91042,
-            "17": 10.89896,
-            "18": 10.90666,
-            "19": 10.89908,
-            "20": 10.90133,
-            "21": 10.91713,
-            "22": 10.89139,
-            "23": 10.90085,
-            "24": 10.89366,
-            "25": 10.89372,
-            "26": 10.87372,
-            "27": 10.87917,
-            "28": 10.88756,
-            "29": 10.85461,
-            "30": 10.83891,
-            "31": 10.75166,
-            "32": 10.8278,
-            "33": 10.80306,
-            "34": 10.73559,
-            "35": 10.7301,
-            "36": 10.69318,
-            "37": 10.72854,
-            "38": 10.65364,
-            "39": 10.71672,
-            "40": 10.56996,
-            "41": 10.58467,
-            "42": 10.59853,
-            "43": 10.3948,
-            "44": 10.44431,
-            "45": 10.3452,
-            "46": 10.31919,
-            "47": 10.49671,
-            "48": 10.31281,
-            "49": 10.09084,
-            "50": 10.31089,
-            "51": 10.25547,
-            "52": 10.15856,
-            "53": 10.38114,
-            "54": 10.2992,
-            "55": 10.23806,
-            "56": 10.00726,
-            "57": 9.87765,
-            "58": 10.15279,
-            "59": 9.94207,
-            "60": 9.8666,
-            "61": 10.00032,
-            "62": 10.23443,
-            "63": 9.71917,
-            "64": 10.04209,
-            "65": 9.30009,
-            "66": 9.95537,
-            "67": 9.6499,
-            "68": 10.00402,
-            "69": 9.99988,
-            "70": 9.96383,
-            "71": 9.84259,
-            "72": 9.81258,
-            "73": 9.70921,
-            "74": 9.19832,
-            "75": 9.61686,
-            "76": 9.28859,
-            "77": 10.20416,
-            "78": 9.88378,
-            "79": 9.54296,
-            "80": 9.57095,
-            "81": 9.64006,
-            "82": 9.83648,
-            "83": 9.47691,
-            "84": 9.54866,
-            "85": 9.75198,
-            "86": 9.21427,
-            "87": 9.70607,
-            "88": 9.87307,
-            "89": 9.72876,
-            "90": 9.92353,
-            "91": 9.48236,
-            "92": 9.47671,
-            "93": 9.20895,
-            "94": 8.9625,
-            "95": 9.62369,
-            "96": 9.64228,
-            "97": 9.41575,
-            "98": 9.77515,
-            "99": 9.00692,
-            "100": 9.51305
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 454770688.0,
-            "2": 454770688.0,
-            "3": 454770688.0,
-            "4": 454770688.0,
-            "5": 454770688.0,
-            "6": 454770688.0,
-            "7": 454770688.0,
-            "8": 454770688.0,
-            "9": 454770688.0,
-            "10": 454770688.0,
-            "11": 454770688.0,
-            "12": 454770688.0,
-            "13": 454770688.0,
-            "14": 454770688.0,
-            "15": 454770688.0,
-            "16": 454770688.0,
-            "17": 454770688.0,
-            "18": 518880768.0,
-            "19": 518880768.0,
-            "20": 518880768.0,
-            "21": 518880768.0,
-            "22": 518880768.0,
-            "23": 518880768.0,
-            "24": 518880768.0,
-            "25": 518880768.0,
-            "26": 518880768.0,
-            "27": 518880768.0,
-            "28": 518880768.0,
-            "29": 518880768.0,
-            "30": 518880768.0,
-            "31": 518880768.0,
-            "32": 518880768.0,
-            "33": 518880768.0,
-            "34": 518880768.0,
-            "35": 518880768.0,
-            "36": 518880768.0,
-            "37": 518880768.0,
-            "38": 518880768.0,
-            "39": 518880768.0,
-            "40": 518880768.0,
-            "41": 518880768.0,
-            "42": 518880768.0,
-            "43": 518880768.0,
-            "44": 518880768.0,
-            "45": 518880768.0,
-            "46": 518880768.0,
-            "47": 518880768.0,
-            "48": 518880768.0,
-            "49": 518880768.0,
-            "50": 518880768.0,
-            "51": 518880768.0,
-            "52": 518880768.0,
-            "53": 518880768.0,
-            "54": 518880768.0,
-            "55": 518880768.0,
-            "56": 518880768.0,
-            "57": 518880768.0,
-            "58": 518880768.0,
-            "59": 518880768.0,
-            "60": 518880768.0,
-            "61": 518880768.0,
-            "62": 518880768.0,
-            "63": 518880768.0,
-            "64": 518880768.0,
-            "65": 518880768.0,
-            "66": 518880768.0,
-            "67": 518880768.0,
-            "68": 518880768.0,
-            "69": 518880768.0,
-            "70": 518880768.0,
-            "71": 518880768.0,
-            "72": 518880768.0,
-            "73": 518880768.0,
-            "74": 518880768.0,
-            "75": 518880768.0,
-            "76": 518880768.0,
-            "77": 518880768.0,
-            "78": 518880768.0,
-            "79": 518880768.0,
-            "80": 518880768.0,
-            "81": 518880768.0,
-            "82": 518880768.0,
-            "83": 518880768.0,
-            "84": 518880768.0,
-            "85": 518880768.0,
-            "86": 518880768.0,
-            "87": 518880768.0,
-            "88": 518880768.0,
-            "89": 518880768.0,
-            "90": 518880768.0,
-            "91": 518880768.0,
-            "92": 518880768.0,
-            "93": 518880768.0,
-            "94": 518880768.0,
-            "95": 518880768.0,
-            "96": 518880768.0,
-            "97": 518880768.0,
-            "98": 518880768.0,
-            "99": 518880768.0,
-            "100": 518880768.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 4511150592.0,
-            "2": 4544705536.0,
-            "3": 4544705536.0,
-            "4": 4544705536.0,
-            "5": 4544705536.0,
-            "6": 4544705536.0,
-            "7": 4544705536.0,
-            "8": 4544705536.0,
-            "9": 4544705536.0,
-            "10": 4544705536.0,
-            "11": 4544705536.0,
-            "12": 4544705536.0,
-            "13": 4544705536.0,
-            "14": 4544705536.0,
-            "15": 4544705536.0,
-            "16": 4544705536.0,
-            "17": 4544705536.0,
-            "18": 4544705536.0,
-            "19": 4607767040.0,
-            "20": 4607767040.0,
-            "21": 4607767040.0,
-            "22": 4607767040.0,
-            "23": 4607767040.0,
-            "24": 4607767040.0,
-            "25": 4607767040.0,
-            "26": 4607767040.0,
-            "27": 4607767040.0,
-            "28": 4607767040.0,
-            "29": 4607767040.0,
-            "30": 4607767040.0,
-            "31": 4607767040.0,
-            "32": 4607767040.0,
-            "33": 4607767040.0,
-            "34": 4607767040.0,
-            "35": 4607767040.0,
-            "36": 4607767040.0,
-            "37": 4607767040.0,
-            "38": 4607767040.0,
-            "39": 4607767040.0,
-            "40": 4607767040.0,
-            "41": 4607767040.0,
-            "42": 4607767040.0,
-            "43": 4607767040.0,
-            "44": 4607767040.0,
-            "45": 4607767040.0,
-            "46": 4607767040.0,
-            "47": 4607767040.0,
-            "48": 4607767040.0,
-            "49": 4607767040.0,
-            "50": 4607767040.0,
-            "51": 4607767040.0,
-            "52": 4607767040.0,
-            "53": 4607767040.0,
-            "54": 4607767040.0,
-            "55": 4607767040.0,
-            "56": 4607767040.0,
-            "57": 4607767040.0,
-            "58": 4607767040.0,
-            "59": 4607767040.0,
-            "60": 4607767040.0,
-            "61": 4607767040.0,
-            "62": 4607767040.0,
-            "63": 4607767040.0,
-            "64": 4607767040.0,
-            "65": 4607767040.0,
-            "66": 4607767040.0,
-            "67": 4607767040.0,
-            "68": 4607767040.0,
-            "69": 4607767040.0,
-            "70": 4607767040.0,
-            "71": 4607767040.0,
-            "72": 4607767040.0,
-            "73": 4607767040.0,
-            "74": 4607767040.0,
-            "75": 4607767040.0,
-            "76": 4607767040.0,
-            "77": 4607767040.0,
-            "78": 4607767040.0,
-            "79": 4607767040.0,
-            "80": 4607767040.0,
-            "81": 4607767040.0,
-            "82": 4607767040.0,
-            "83": 4607767040.0,
-            "84": 4607767040.0,
-            "85": 4607767040.0,
-            "86": 4607767040.0,
-            "87": 4607767040.0,
-            "88": 4607767040.0,
-            "89": 4607767040.0,
-            "90": 4607767040.0,
-            "91": 4607767040.0,
-            "92": 4607767040.0,
-            "93": 4607767040.0,
-            "94": 4607767040.0,
-            "95": 4607767040.0,
-            "96": 4607767040.0,
-            "97": 4607767040.0,
-            "98": 4607767040.0,
-            "99": 4607767040.0,
-            "100": 4607767040.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 6.49307,
-            "2": 0.10356,
-            "3": 0.08062,
-            "4": 0.0772,
-            "5": 0.07555,
-            "6": 0.06677,
-            "7": 0.06434,
-            "8": 0.06228,
-            "9": 0.0624,
-            "10": 0.06213,
-            "11": 0.06353,
-            "12": 0.0622,
-            "13": 0.06377,
-            "14": 0.06323,
-            "15": 0.06296,
-            "16": 0.06251,
-            "17": 0.06382,
-            "18": 0.11433,
-            "19": 0.07262,
-            "20": 0.07222,
-            "21": 0.07613,
-            "22": 0.06977,
-            "23": 0.06664,
-            "24": 0.07256,
-            "25": 0.07344,
-            "26": 0.0723,
-            "27": 0.07264,
-            "28": 0.0697,
-            "29": 0.06998,
-            "30": 0.06785,
-            "31": 0.07022,
-            "32": 0.06834,
-            "33": 0.06679,
-            "34": 0.0678,
-            "35": 0.0679,
-            "36": 0.0679,
-            "37": 0.06826,
-            "38": 0.06821,
-            "39": 0.0665,
-            "40": 0.06798,
-            "41": 0.06816,
-            "42": 0.06816,
-            "43": 0.06901,
-            "44": 0.06772,
-            "45": 0.06849,
-            "46": 0.06843,
-            "47": 0.06773,
-            "48": 0.06705,
-            "49": 0.06755,
-            "50": 0.06844,
-            "51": 0.0971,
-            "52": 0.06968,
-            "53": 0.06915,
-            "54": 0.06982,
-            "55": 0.0703,
-            "56": 0.07014,
-            "57": 0.07047,
-            "58": 0.06835,
-            "59": 0.07077,
-            "60": 0.06886,
-            "61": 0.06929,
-            "62": 0.06887,
-            "63": 0.06946,
-            "64": 0.06924,
-            "65": 0.06987,
-            "66": 0.06898,
-            "67": 0.06873,
-            "68": 0.0695,
-            "69": 0.0712,
-            "70": 0.06928,
-            "71": 0.0692,
-            "72": 0.07014,
-            "73": 0.06964,
-            "74": 0.06884,
-            "75": 0.06897,
-            "76": 0.07036,
-            "77": 0.0693,
-            "78": 0.06905,
-            "79": 0.0698,
-            "80": 0.06831,
-            "81": 0.06969,
-            "82": 0.06871,
-            "83": 0.07059,
-            "84": 0.06905,
-            "85": 0.06955,
-            "86": 0.06926,
-            "87": 0.06905,
-            "88": 0.06912,
-            "89": 0.07039,
-            "90": 0.06895,
-            "91": 0.069,
-            "92": 0.0698,
-            "93": 0.06946,
-            "94": 0.06825,
-            "95": 0.06933,
-            "96": 0.06851,
-            "97": 0.06883,
-            "98": 0.07421,
-            "99": 0.06926,
-            "100": 0.07018
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": "nan",
-            "2": "nan",
-            "3": "nan",
-            "4": "nan",
-            "5": "nan",
-            "6": "nan",
-            "7": "nan",
-            "8": "nan",
-            "9": "nan",
-            "10": "nan",
-            "11": "nan",
-            "12": "nan",
-            "13": "nan",
-            "14": "nan",
-            "15": "nan",
-            "16": "nan",
-            "17": "nan",
-            "18": 1155.0,
-            "19": 1454.0,
-            "20": 1095.0,
-            "21": 1230.0,
-            "22": "nan",
-            "23": 1357.0,
-            "24": 1150.0,
-            "25": 1228.0,
-            "26": 1202.0,
-            "27": 1326.0,
-            "28": 1466.0,
-            "29": 1438.0,
-            "30": 1238.0,
-            "31": 1008.0,
-            "32": 1160.0,
-            "33": 1371.0,
-            "34": 1154.0,
-            "35": 1295.0,
-            "36": 1156.0,
-            "37": 1403.0,
-            "38": 1487.0,
-            "39": 1429.0,
-            "40": 1412.0,
-            "41": 1458.0,
-            "42": 1316.0,
-            "43": 1193.0,
-            "44": 1323.0,
-            "45": 1297.0,
-            "46": 1276.0,
-            "47": 1868.0,
-            "48": 1251.0,
-            "49": 1272.0,
-            "50": 1524.0,
-            "51": 1367.0,
-            "52": 1372.0,
-            "53": 1715.0,
-            "54": 1485.0,
-            "55": 1482.0,
-            "56": 1473.0,
-            "57": 1539.0,
-            "58": 1736.0,
-            "59": 1661.0,
-            "60": 1586.0,
-            "61": 1691.0,
-            "62": 1865.0,
-            "63": 1395.0,
-            "64": 1846.0,
-            "65": 1428.0,
-            "66": 1717.0,
-            "67": 1700.0,
-            "68": 1750.0,
-            "69": 1681.0,
-            "70": 1861.0,
-            "71": 2048.0,
-            "72": 1552.0,
-            "73": 2010.0,
-            "74": 1344.0,
-            "75": 1840.0,
-            "76": 1846.0,
-            "77": 2034.0,
-            "78": 2170.0,
-            "79": 1949.0,
-            "80": 2077.0,
-            "81": 2381.0,
-            "82": 2390.0,
-            "83": 1843.0,
-            "84": 2060.0,
-            "85": 2317.0,
-            "86": 1958.0,
-            "87": 2829.0,
-            "88": 2046.0,
-            "89": 2260.0,
-            "90": 2545.0,
-            "91": 1801.0,
-            "92": 2505.0,
-            "93": 2064.0,
-            "94": 2223.0,
-            "95": 2379.0,
-            "96": 2579.0,
-            "97": 2411.0,
-            "98": 2500.0,
-            "99": 2124.0,
-            "100": 2119.0
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_dev_dgx_h100.json
index f558db5b4f0..6660a5e446e 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_dev_dgx_h100.json
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 8.69225,
-            "2": 0.11422,
-            "3": 0.10425,
-            "4": 0.10234,
-            "5": 0.10569,
-            "6": 0.10564,
-            "7": 0.1017,
-            "8": 0.10104,
-            "9": 0.10184,
-            "10": 0.10389,
-            "11": 0.10239,
-            "12": 0.10308,
-            "13": 0.10366,
-            "14": 0.10282,
-            "15": 0.10527,
-            "16": 0.10468,
-            "17": 0.10379,
-            "18": 0.10311,
-            "19": 0.10589,
-            "20": 0.1039,
-            "21": 0.10317,
-            "22": 0.10318,
-            "23": 0.10407,
-            "24": 0.1045,
-            "25": 0.10518,
-            "26": 0.10372,
-            "27": 0.10299,
-            "28": 0.1034,
-            "29": 0.1018,
-            "30": 0.10184,
-            "31": 0.10197,
-            "32": 0.10201,
-            "33": 0.10166,
-            "34": 0.1031,
-            "35": 0.1016,
-            "36": 0.10083,
-            "37": 0.09963,
-            "38": 0.10028,
-            "39": 0.10032,
-            "40": 0.10016,
-            "41": 0.09952,
-            "42": 0.09904,
-            "43": 0.09972,
-            "44": 0.10089,
-            "45": 0.10162,
-            "46": 0.10079,
-            "47": 0.09922,
-            "48": 0.10128,
-            "49": 0.09992,
-            "50": 0.0985
+            "1": 8.92875,
+            "2": 0.12034,
+            "3": 0.10184,
+            "4": 0.10215,
+            "5": 0.10291,
+            "6": 0.10167,
+            "7": 0.09936,
+            "8": 0.10097,
+            "9": 0.10127,
+            "10": 0.10171,
+            "11": 0.10013,
+            "12": 0.09898,
+            "13": 0.10085,
+            "14": 0.10081,
+            "15": 0.10088,
+            "16": 0.10002,
+            "17": 0.0999,
+            "18": 0.10168,
+            "19": 0.10032,
+            "20": 0.09815,
+            "21": 0.10018,
+            "22": 0.09914,
+            "23": 0.1005,
+            "24": 0.10106,
+            "25": 0.10086,
+            "26": 0.10152,
+            "27": 0.1,
+            "28": 0.10161,
+            "29": 0.10038,
+            "30": 0.10045,
+            "31": 0.10187,
+            "32": 0.10055,
+            "33": 0.11357,
+            "34": 0.10266,
+            "35": 0.10298,
+            "36": 0.10061,
+            "37": 0.10166,
+            "38": 0.10185,
+            "39": 0.09925,
+            "40": 0.10087,
+            "41": 0.10001,
+            "42": 0.1,
+            "43": 0.10286,
+            "44": 0.10227,
+            "45": 0.10327,
+            "46": 0.10041,
+            "47": 0.10091,
+            "48": 0.10215,
+            "49": 0.10017,
+            "50": 0.10055
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 64d215b77ba..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.86114,
-            "2": 10.86847,
-            "3": 10.86465,
-            "4": 10.86473,
-            "5": 10.87296,
-            "6": 10.88615,
-            "7": 10.8645,
-            "8": 10.87335,
-            "9": 10.87481,
-            "10": 10.83903,
-            "11": 10.86614,
-            "12": 10.86169,
-            "13": 10.87354,
-            "14": 10.87593,
-            "15": 10.8216,
-            "16": 10.83071,
-            "17": 10.79411,
-            "18": 10.81433,
-            "19": 10.80011,
-            "20": 10.71697,
-            "21": 10.70154,
-            "22": 10.57235,
-            "23": 10.70749,
-            "24": 10.6006,
-            "25": 10.5566,
-            "26": 10.60138,
-            "27": 10.60955,
-            "28": 10.55626,
-            "29": 10.57268,
-            "30": 10.36032,
-            "31": 10.11454,
-            "32": 10.45937,
-            "33": 10.45389,
-            "34": 10.21168,
-            "35": 10.26583,
-            "36": 10.21483,
-            "37": 10.34814,
-            "38": 10.19787,
-            "39": 10.39713,
-            "40": 10.08719,
-            "41": 10.13539,
-            "42": 10.20638,
-            "43": 9.82769,
-            "44": 9.95444,
-            "45": 9.82374,
-            "46": 9.79864,
-            "47": 10.12579,
-            "48": 9.83547,
-            "49": 9.51888,
-            "50": 9.90498
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1696.0,
-            "2": 1671.0,
-            "3": 1537.0,
-            "4": 1705.0,
-            "5": 1776.0,
-            "6": 1735.0,
-            "7": 1767.0,
-            "8": 1569.0,
-            "9": 1750.0,
-            "10": 1413.0,
-            "11": 1746.0,
-            "12": 1681.0,
-            "13": 1828.0,
-            "14": 1739.0,
-            "15": 1801.0,
-            "16": 1895.0,
-            "17": 1781.0,
-            "18": 1693.0,
-            "19": 1705.0,
-            "20": 1624.0,
-            "21": 1838.0,
-            "22": 1792.0,
-            "23": 2005.0,
-            "24": 1601.0,
-            "25": 1483.0,
-            "26": 1615.0,
-            "27": 1844.0,
-            "28": 1961.0,
-            "29": 2012.0,
-            "30": 1856.0,
-            "31": 1502.0,
-            "32": 1794.0,
-            "33": 2118.0,
-            "34": 1742.0,
-            "35": 1953.0,
-            "36": 1940.0,
-            "37": 2324.0,
-            "38": 2109.0,
-            "39": 2369.0,
-            "40": 2183.0,
-            "41": 2063.0,
-            "42": 2232.0,
-            "43": 1917.0,
-            "44": 2084.0,
-            "45": 2058.0,
-            "46": 2144.0,
-            "47": 2488.0,
-            "48": 2407.0,
-            "49": 2125.0,
-            "50": 2134.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 952847360.0,
-            "2": 952847360.0,
-            "3": 952847360.0,
-            "4": 952847360.0,
-            "5": 952847360.0,
-            "6": 952847360.0,
-            "7": 952847360.0,
-            "8": 952847360.0,
-            "9": 952847360.0,
-            "10": 952847360.0,
-            "11": 952847360.0,
-            "12": 952847360.0,
-            "13": 952847360.0,
-            "14": 952847360.0,
-            "15": 952847360.0,
-            "16": 952847360.0,
-            "17": 952847360.0,
-            "18": 952847360.0,
-            "19": 952847360.0,
-            "20": 952847360.0,
-            "21": 952847360.0,
-            "22": 952847360.0,
-            "23": 952847360.0,
-            "24": 952847360.0,
-            "25": 952847360.0,
-            "26": 952847360.0,
-            "27": 952847360.0,
-            "28": 952847360.0,
-            "29": 952847360.0,
-            "30": 952847360.0,
-            "31": 952847360.0,
-            "32": 952847360.0,
-            "33": 952847360.0,
-            "34": 952847360.0,
-            "35": 952847360.0,
-            "36": 952847360.0,
-            "37": 952847360.0,
-            "38": 952847360.0,
-            "39": 952847360.0,
-            "40": 952847360.0,
-            "41": 952847360.0,
-            "42": 952847360.0,
-            "43": 952847360.0,
-            "44": 952847360.0,
-            "45": 952847360.0,
-            "46": 952847360.0,
-            "47": 952847360.0,
-            "48": 952847360.0,
-            "49": 952847360.0,
-            "50": 952847360.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 3275808768.0,
-            "2": 3637371904.0,
-            "3": 3637371904.0,
-            "4": 3637371904.0,
-            "5": 3637371904.0,
-            "6": 3637371904.0,
-            "7": 3637371904.0,
-            "8": 3637371904.0,
-            "9": 3637371904.0,
-            "10": 3637371904.0,
-            "11": 3637371904.0,
-            "12": 3637371904.0,
-            "13": 3637371904.0,
-            "14": 3637371904.0,
-            "15": 3637371904.0,
-            "16": 3637371904.0,
-            "17": 3637371904.0,
-            "18": 3637371904.0,
-            "19": 3637371904.0,
-            "20": 3637371904.0,
-            "21": 3637371904.0,
-            "22": 3637371904.0,
-            "23": 3637371904.0,
-            "24": 3637371904.0,
-            "25": 3637371904.0,
-            "26": 3637371904.0,
-            "27": 3637371904.0,
-            "28": 3637371904.0,
-            "29": 3637371904.0,
-            "30": 3637371904.0,
-            "31": 3637371904.0,
-            "32": 3637371904.0,
-            "33": 3637371904.0,
-            "34": 3637371904.0,
-            "35": 3637371904.0,
-            "36": 3637371904.0,
-            "37": 3637371904.0,
-            "38": 3637371904.0,
-            "39": 3637371904.0,
-            "40": 3637371904.0,
-            "41": 3637371904.0,
-            "42": 3637371904.0,
-            "43": 3637371904.0,
-            "44": 3637371904.0,
-            "45": 3637371904.0,
-            "46": 3637371904.0,
-            "47": 3637371904.0,
-            "48": 3637371904.0,
-            "49": 3637371904.0,
-            "50": 3637371904.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 7.73281,
-            "2": 0.12339,
-            "3": 0.09356,
-            "4": 0.09244,
-            "5": 0.0876,
-            "6": 0.08746,
-            "7": 0.08714,
-            "8": 0.08631,
-            "9": 0.08986,
-            "10": 0.09011,
-            "11": 0.09237,
-            "12": 0.09085,
-            "13": 0.09077,
-            "14": 0.09007,
-            "15": 0.0931,
-            "16": 0.09275,
-            "17": 0.08996,
-            "18": 0.0933,
-            "19": 0.09008,
-            "20": 0.0898,
-            "21": 0.08974,
-            "22": 0.09148,
-            "23": 0.09027,
-            "24": 0.09097,
-            "25": 0.08936,
-            "26": 0.08932,
-            "27": 0.09046,
-            "28": 0.09053,
-            "29": 0.08937,
-            "30": 0.08941,
-            "31": 0.09008,
-            "32": 0.08969,
-            "33": 0.08975,
-            "34": 0.09039,
-            "35": 0.08967,
-            "36": 0.08981,
-            "37": 0.09109,
-            "38": 0.08894,
-            "39": 0.09029,
-            "40": 0.09,
-            "41": 0.0901,
-            "42": 0.08944,
-            "43": 0.09026,
-            "44": 0.09008,
-            "45": 0.09096,
-            "46": 0.08999,
-            "47": 0.08974,
-            "48": 0.08959,
-            "49": 0.09001,
-            "50": 0.08972
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index 6660a5e446e..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.86114,
-            "2": 10.86847,
-            "3": 10.86465,
-            "4": 10.86473,
-            "5": 10.87296,
-            "6": 10.88615,
-            "7": 10.8645,
-            "8": 10.87335,
-            "9": 10.87481,
-            "10": 10.83903,
-            "11": 10.86614,
-            "12": 10.86169,
-            "13": 10.87354,
-            "14": 10.87593,
-            "15": 10.8216,
-            "16": 10.83071,
-            "17": 10.79411,
-            "18": 10.81433,
-            "19": 10.80011,
-            "20": 10.71697,
-            "21": 10.70154,
-            "22": 10.57235,
-            "23": 10.70749,
-            "24": 10.6006,
-            "25": 10.5566,
-            "26": 10.60138,
-            "27": 10.60955,
-            "28": 10.55626,
-            "29": 10.57268,
-            "30": 10.36032,
-            "31": 10.11454,
-            "32": 10.45937,
-            "33": 10.45389,
-            "34": 10.21168,
-            "35": 10.26583,
-            "36": 10.21483,
-            "37": 10.34814,
-            "38": 10.19787,
-            "39": 10.39713,
-            "40": 10.08719,
-            "41": 10.13539,
-            "42": 10.20638,
-            "43": 9.82769,
-            "44": 9.95444,
-            "45": 9.82374,
-            "46": 9.79864,
-            "47": 10.12579,
-            "48": 9.83547,
-            "49": 9.51888,
-            "50": 9.90498
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1696.0,
-            "2": 1671.0,
-            "3": 1537.0,
-            "4": 1705.0,
-            "5": 1776.0,
-            "6": 1735.0,
-            "7": 1767.0,
-            "8": 1569.0,
-            "9": 1750.0,
-            "10": 1413.0,
-            "11": 1746.0,
-            "12": 1681.0,
-            "13": 1828.0,
-            "14": 1739.0,
-            "15": 1801.0,
-            "16": 1895.0,
-            "17": 1781.0,
-            "18": 1693.0,
-            "19": 1705.0,
-            "20": 1624.0,
-            "21": 1838.0,
-            "22": 1792.0,
-            "23": 2005.0,
-            "24": 1601.0,
-            "25": 1483.0,
-            "26": 1615.0,
-            "27": 1844.0,
-            "28": 1961.0,
-            "29": 2012.0,
-            "30": 1856.0,
-            "31": 1502.0,
-            "32": 1794.0,
-            "33": 2118.0,
-            "34": 1742.0,
-            "35": 1953.0,
-            "36": 1940.0,
-            "37": 2324.0,
-            "38": 2109.0,
-            "39": 2369.0,
-            "40": 2183.0,
-            "41": 2063.0,
-            "42": 2232.0,
-            "43": 1917.0,
-            "44": 2084.0,
-            "45": 2058.0,
-            "46": 2144.0,
-            "47": 2488.0,
-            "48": 2407.0,
-            "49": 2125.0,
-            "50": 2134.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 952847360.0,
-            "2": 952847360.0,
-            "3": 952847360.0,
-            "4": 952847360.0,
-            "5": 952847360.0,
-            "6": 952847360.0,
-            "7": 952847360.0,
-            "8": 952847360.0,
-            "9": 952847360.0,
-            "10": 952847360.0,
-            "11": 952847360.0,
-            "12": 952847360.0,
-            "13": 952847360.0,
-            "14": 952847360.0,
-            "15": 952847360.0,
-            "16": 952847360.0,
-            "17": 952847360.0,
-            "18": 952847360.0,
-            "19": 952847360.0,
-            "20": 952847360.0,
-            "21": 952847360.0,
-            "22": 952847360.0,
-            "23": 952847360.0,
-            "24": 952847360.0,
-            "25": 952847360.0,
-            "26": 952847360.0,
-            "27": 952847360.0,
-            "28": 952847360.0,
-            "29": 952847360.0,
-            "30": 952847360.0,
-            "31": 952847360.0,
-            "32": 952847360.0,
-            "33": 952847360.0,
-            "34": 952847360.0,
-            "35": 952847360.0,
-            "36": 952847360.0,
-            "37": 952847360.0,
-            "38": 952847360.0,
-            "39": 952847360.0,
-            "40": 952847360.0,
-            "41": 952847360.0,
-            "42": 952847360.0,
-            "43": 952847360.0,
-            "44": 952847360.0,
-            "45": 952847360.0,
-            "46": 952847360.0,
-            "47": 952847360.0,
-            "48": 952847360.0,
-            "49": 952847360.0,
-            "50": 952847360.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 3275808768.0,
-            "2": 3637371904.0,
-            "3": 3637371904.0,
-            "4": 3637371904.0,
-            "5": 3637371904.0,
-            "6": 3637371904.0,
-            "7": 3637371904.0,
-            "8": 3637371904.0,
-            "9": 3637371904.0,
-            "10": 3637371904.0,
-            "11": 3637371904.0,
-            "12": 3637371904.0,
-            "13": 3637371904.0,
-            "14": 3637371904.0,
-            "15": 3637371904.0,
-            "16": 3637371904.0,
-            "17": 3637371904.0,
-            "18": 3637371904.0,
-            "19": 3637371904.0,
-            "20": 3637371904.0,
-            "21": 3637371904.0,
-            "22": 3637371904.0,
-            "23": 3637371904.0,
-            "24": 3637371904.0,
-            "25": 3637371904.0,
-            "26": 3637371904.0,
-            "27": 3637371904.0,
-            "28": 3637371904.0,
-            "29": 3637371904.0,
-            "30": 3637371904.0,
-            "31": 3637371904.0,
-            "32": 3637371904.0,
-            "33": 3637371904.0,
-            "34": 3637371904.0,
-            "35": 3637371904.0,
-            "36": 3637371904.0,
-            "37": 3637371904.0,
-            "38": 3637371904.0,
-            "39": 3637371904.0,
-            "40": 3637371904.0,
-            "41": 3637371904.0,
-            "42": 3637371904.0,
-            "43": 3637371904.0,
-            "44": 3637371904.0,
-            "45": 3637371904.0,
-            "46": 3637371904.0,
-            "47": 3637371904.0,
-            "48": 3637371904.0,
-            "49": 3637371904.0,
-            "50": 3637371904.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 8.92875,
-            "2": 0.12034,
-            "3": 0.10184,
-            "4": 0.10215,
-            "5": 0.10291,
-            "6": 0.10167,
-            "7": 0.09936,
-            "8": 0.10097,
-            "9": 0.10127,
-            "10": 0.10171,
-            "11": 0.10013,
-            "12": 0.09898,
-            "13": 0.10085,
-            "14": 0.10081,
-            "15": 0.10088,
-            "16": 0.10002,
-            "17": 0.0999,
-            "18": 0.10168,
-            "19": 0.10032,
-            "20": 0.09815,
-            "21": 0.10018,
-            "22": 0.09914,
-            "23": 0.1005,
-            "24": 0.10106,
-            "25": 0.10086,
-            "26": 0.10152,
-            "27": 0.1,
-            "28": 0.10161,
-            "29": 0.10038,
-            "30": 0.10045,
-            "31": 0.10187,
-            "32": 0.10055,
-            "33": 0.11357,
-            "34": 0.10266,
-            "35": 0.10298,
-            "36": 0.10061,
-            "37": 0.10166,
-            "38": 0.10185,
-            "39": 0.09925,
-            "40": 0.10087,
-            "41": 0.10001,
-            "42": 0.1,
-            "43": 0.10286,
-            "44": 0.10227,
-            "45": 0.10327,
-            "46": 0.10041,
-            "47": 0.10091,
-            "48": 0.10215,
-            "49": 0.10017,
-            "50": 0.10055
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_dev_dgx_h100.json
index 6a5be6c0d9c..5517997e6c1 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_dev_dgx_h100.json
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 11.05472,
-            "2": 0.1429,
-            "3": 0.12828,
-            "4": 0.12976,
-            "5": 0.12969,
-            "6": 0.12181,
-            "7": 0.12512,
-            "8": 0.12267,
-            "9": 0.12362,
-            "10": 0.12382,
-            "11": 0.1219,
-            "12": 0.12295,
-            "13": 0.12406,
-            "14": 0.12396,
-            "15": 0.12483,
-            "16": 0.12596,
-            "17": 0.12252,
-            "18": 0.12284,
-            "19": 0.12465,
-            "20": 0.12674,
-            "21": 0.12398,
-            "22": 0.12376,
-            "23": 0.12244,
-            "24": 0.12641,
-            "25": 0.1234,
-            "26": 0.12355,
-            "27": 0.12183,
-            "28": 0.12355,
-            "29": 0.12372,
-            "30": 0.12258,
-            "31": 0.1231,
-            "32": 0.12444,
-            "33": 0.12266,
-            "34": 0.12208,
-            "35": 0.12181,
-            "36": 0.12028,
-            "37": 0.12298,
-            "38": 0.1214,
-            "39": 0.12242,
-            "40": 0.12058,
-            "41": 0.12169,
-            "42": 0.1223,
-            "43": 0.1221,
-            "44": 0.12176,
-            "45": 0.12039,
-            "46": 0.12206,
-            "47": 0.12138,
-            "48": 0.12715,
-            "49": 0.12339,
-            "50": 0.12175
+            "1": 11.34716,
+            "2": 0.14227,
+            "3": 0.12689,
+            "4": 0.13008,
+            "5": 0.12281,
+            "6": 0.12008,
+            "7": 0.11926,
+            "8": 0.11756,
+            "9": 0.11844,
+            "10": 0.11959,
+            "11": 0.11763,
+            "12": 0.11828,
+            "13": 0.11955,
+            "14": 0.11929,
+            "15": 0.11867,
+            "16": 0.11859,
+            "17": 0.12095,
+            "18": 0.11695,
+            "19": 0.11774,
+            "20": 0.11863,
+            "21": 0.11942,
+            "22": 0.12117,
+            "23": 0.11884,
+            "24": 0.12003,
+            "25": 0.11915,
+            "26": 0.11977,
+            "27": 0.11816,
+            "28": 0.12705,
+            "29": 0.11815,
+            "30": 0.12166,
+            "31": 0.12023,
+            "32": 0.12154,
+            "33": 0.12781,
+            "34": 0.12209,
+            "35": 0.12372,
+            "36": 0.12109,
+            "37": 0.11897,
+            "38": 0.12385,
+            "39": 0.11961,
+            "40": 0.11846,
+            "41": 0.11902,
+            "42": 0.11915,
+            "43": 0.12286,
+            "44": 0.11759,
+            "45": 0.11912,
+            "46": 0.1204,
+            "47": 0.12027,
+            "48": 0.12073,
+            "49": 0.1164,
+            "50": 0.11734
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index e8f7325e5f3..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.85678,
-            "2": 10.86405,
-            "3": 10.86854,
-            "4": 10.85128,
-            "5": 10.88398,
-            "6": 10.89024,
-            "7": 10.86645,
-            "8": 10.86924,
-            "9": 10.87305,
-            "10": 10.84079,
-            "11": 10.87928,
-            "12": 10.8729,
-            "13": 10.87791,
-            "14": 10.8901,
-            "15": 10.82504,
-            "16": 10.8296,
-            "17": 10.80874,
-            "18": 10.8116,
-            "19": 10.81543,
-            "20": 10.71912,
-            "21": 10.70404,
-            "22": 10.56645,
-            "23": 10.71858,
-            "24": 10.60989,
-            "25": 10.55479,
-            "26": 10.60874,
-            "27": 10.62302,
-            "28": 10.56954,
-            "29": 10.57966,
-            "30": 10.35998,
-            "31": 10.11311,
-            "32": 10.46587,
-            "33": 10.45154,
-            "34": 10.20826,
-            "35": 10.26937,
-            "36": 10.21924,
-            "37": 10.33852,
-            "38": 10.186,
-            "39": 10.3997,
-            "40": 10.08396,
-            "41": 10.13418,
-            "42": 10.20887,
-            "43": 9.82537,
-            "44": 9.95906,
-            "45": 9.82563,
-            "46": 9.80623,
-            "47": 10.13499,
-            "48": 9.84002,
-            "49": 9.52482,
-            "50": 9.90725
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1654.0,
-            "2": 1646.0,
-            "3": 1565.0,
-            "4": 1759.0,
-            "5": 1860.0,
-            "6": 1741.0,
-            "7": 1752.0,
-            "8": 1579.0,
-            "9": 1849.0,
-            "10": 1317.0,
-            "11": 1901.0,
-            "12": 1702.0,
-            "13": 1872.0,
-            "14": 1781.0,
-            "15": 1759.0,
-            "16": 1820.0,
-            "17": 1819.0,
-            "18": 1721.0,
-            "19": 1828.0,
-            "20": 1730.0,
-            "21": 1935.0,
-            "22": 1764.0,
-            "23": 1962.0,
-            "24": 1564.0,
-            "25": 1552.0,
-            "26": 1668.0,
-            "27": 1803.0,
-            "28": 1988.0,
-            "29": 1966.0,
-            "30": 1895.0,
-            "31": 1532.0,
-            "32": 1866.0,
-            "33": 2026.0,
-            "34": 1906.0,
-            "35": 1987.0,
-            "36": 1863.0,
-            "37": 2231.0,
-            "38": 2109.0,
-            "39": 2277.0,
-            "40": 2099.0,
-            "41": 2209.0,
-            "42": 2227.0,
-            "43": 1913.0,
-            "44": 2129.0,
-            "45": 1993.0,
-            "46": 2288.0,
-            "47": 2458.0,
-            "48": 2418.0,
-            "49": 2155.0,
-            "50": 2085.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 777900032.0,
-            "2": 777900032.0,
-            "3": 777900032.0,
-            "4": 777900032.0,
-            "5": 777900032.0,
-            "6": 777900032.0,
-            "7": 777900032.0,
-            "8": 777900032.0,
-            "9": 777900032.0,
-            "10": 777900032.0,
-            "11": 777900032.0,
-            "12": 777900032.0,
-            "13": 777900032.0,
-            "14": 777900032.0,
-            "15": 777900032.0,
-            "16": 777900032.0,
-            "17": 777900032.0,
-            "18": 777900032.0,
-            "19": 777900032.0,
-            "20": 777900032.0,
-            "21": 777900032.0,
-            "22": 777900032.0,
-            "23": 777900032.0,
-            "24": 777900032.0,
-            "25": 777900032.0,
-            "26": 777900032.0,
-            "27": 777900032.0,
-            "28": 777900032.0,
-            "29": 777900032.0,
-            "30": 777900032.0,
-            "31": 777900032.0,
-            "32": 777900032.0,
-            "33": 777900032.0,
-            "34": 777900032.0,
-            "35": 777900032.0,
-            "36": 777900032.0,
-            "37": 777900032.0,
-            "38": 777900032.0,
-            "39": 777900032.0,
-            "40": 777900032.0,
-            "41": 777900032.0,
-            "42": 777900032.0,
-            "43": 777900032.0,
-            "44": 777900032.0,
-            "45": 777900032.0,
-            "46": 777900032.0,
-            "47": 777900032.0,
-            "48": 777900032.0,
-            "49": 777900032.0,
-            "50": 777900032.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 2463815680.0,
-            "2": 2744478720.0,
-            "3": 2744478720.0,
-            "4": 2744478720.0,
-            "5": 2744478720.0,
-            "6": 2744478720.0,
-            "7": 2744478720.0,
-            "8": 2744478720.0,
-            "9": 2744478720.0,
-            "10": 2744478720.0,
-            "11": 2744478720.0,
-            "12": 2744478720.0,
-            "13": 2744478720.0,
-            "14": 2744478720.0,
-            "15": 2744478720.0,
-            "16": 2744478720.0,
-            "17": 2744478720.0,
-            "18": 2744478720.0,
-            "19": 2744478720.0,
-            "20": 2744478720.0,
-            "21": 2744478720.0,
-            "22": 2744478720.0,
-            "23": 2744478720.0,
-            "24": 2744478720.0,
-            "25": 2744478720.0,
-            "26": 2744478720.0,
-            "27": 2744478720.0,
-            "28": 2744478720.0,
-            "29": 2744478720.0,
-            "30": 2744478720.0,
-            "31": 2744478720.0,
-            "32": 2744478720.0,
-            "33": 2744478720.0,
-            "34": 2744478720.0,
-            "35": 2744478720.0,
-            "36": 2744478720.0,
-            "37": 2744478720.0,
-            "38": 2744478720.0,
-            "39": 2744478720.0,
-            "40": 2744478720.0,
-            "41": 2744478720.0,
-            "42": 2744478720.0,
-            "43": 2744478720.0,
-            "44": 2744478720.0,
-            "45": 2744478720.0,
-            "46": 2744478720.0,
-            "47": 2744478720.0,
-            "48": 2744478720.0,
-            "49": 2744478720.0,
-            "50": 2744478720.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.35419,
-            "2": 0.13991,
-            "3": 0.10767,
-            "4": 0.10938,
-            "5": 0.10724,
-            "6": 0.10478,
-            "7": 0.10552,
-            "8": 0.10656,
-            "9": 0.10556,
-            "10": 0.10532,
-            "11": 0.10534,
-            "12": 0.10534,
-            "13": 0.10527,
-            "14": 0.10709,
-            "15": 0.10495,
-            "16": 0.10604,
-            "17": 0.10965,
-            "18": 0.1088,
-            "19": 0.1041,
-            "20": 0.10506,
-            "21": 0.1048,
-            "22": 0.10602,
-            "23": 0.10565,
-            "24": 0.1054,
-            "25": 0.10522,
-            "26": 0.10463,
-            "27": 0.10589,
-            "28": 0.10459,
-            "29": 0.10668,
-            "30": 0.10356,
-            "31": 0.10981,
-            "32": 0.10384,
-            "33": 0.1044,
-            "34": 0.10384,
-            "35": 0.10498,
-            "36": 0.10335,
-            "37": 0.10417,
-            "38": 0.10399,
-            "39": 0.10546,
-            "40": 0.10397,
-            "41": 0.10485,
-            "42": 0.104,
-            "43": 0.10561,
-            "44": 0.10556,
-            "45": 0.10548,
-            "46": 0.10502,
-            "47": 0.10566,
-            "48": 0.10496,
-            "49": 0.1064,
-            "50": 0.10702
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index 5517997e6c1..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.85678,
-            "2": 10.86405,
-            "3": 10.86854,
-            "4": 10.85128,
-            "5": 10.88398,
-            "6": 10.89024,
-            "7": 10.86645,
-            "8": 10.86924,
-            "9": 10.87305,
-            "10": 10.84079,
-            "11": 10.87928,
-            "12": 10.8729,
-            "13": 10.87791,
-            "14": 10.8901,
-            "15": 10.82504,
-            "16": 10.8296,
-            "17": 10.80874,
-            "18": 10.8116,
-            "19": 10.81543,
-            "20": 10.71912,
-            "21": 10.70404,
-            "22": 10.56645,
-            "23": 10.71858,
-            "24": 10.60989,
-            "25": 10.55479,
-            "26": 10.60874,
-            "27": 10.62302,
-            "28": 10.56954,
-            "29": 10.57966,
-            "30": 10.35998,
-            "31": 10.11311,
-            "32": 10.46587,
-            "33": 10.45154,
-            "34": 10.20826,
-            "35": 10.26937,
-            "36": 10.21924,
-            "37": 10.33852,
-            "38": 10.186,
-            "39": 10.3997,
-            "40": 10.08396,
-            "41": 10.13418,
-            "42": 10.20887,
-            "43": 9.82537,
-            "44": 9.95906,
-            "45": 9.82563,
-            "46": 9.80623,
-            "47": 10.13499,
-            "48": 9.84002,
-            "49": 9.52482,
-            "50": 9.90725
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1654.0,
-            "2": 1646.0,
-            "3": 1565.0,
-            "4": 1759.0,
-            "5": 1860.0,
-            "6": 1741.0,
-            "7": 1752.0,
-            "8": 1579.0,
-            "9": 1849.0,
-            "10": 1317.0,
-            "11": 1901.0,
-            "12": 1702.0,
-            "13": 1872.0,
-            "14": 1781.0,
-            "15": 1759.0,
-            "16": 1820.0,
-            "17": 1819.0,
-            "18": 1721.0,
-            "19": 1828.0,
-            "20": 1730.0,
-            "21": 1935.0,
-            "22": 1764.0,
-            "23": 1962.0,
-            "24": 1564.0,
-            "25": 1552.0,
-            "26": 1668.0,
-            "27": 1803.0,
-            "28": 1988.0,
-            "29": 1966.0,
-            "30": 1895.0,
-            "31": 1532.0,
-            "32": 1866.0,
-            "33": 2026.0,
-            "34": 1906.0,
-            "35": 1987.0,
-            "36": 1863.0,
-            "37": 2231.0,
-            "38": 2109.0,
-            "39": 2277.0,
-            "40": 2099.0,
-            "41": 2209.0,
-            "42": 2227.0,
-            "43": 1913.0,
-            "44": 2129.0,
-            "45": 1993.0,
-            "46": 2288.0,
-            "47": 2458.0,
-            "48": 2418.0,
-            "49": 2155.0,
-            "50": 2085.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 777900032.0,
-            "2": 777900032.0,
-            "3": 777900032.0,
-            "4": 777900032.0,
-            "5": 777900032.0,
-            "6": 777900032.0,
-            "7": 777900032.0,
-            "8": 777900032.0,
-            "9": 777900032.0,
-            "10": 777900032.0,
-            "11": 777900032.0,
-            "12": 777900032.0,
-            "13": 777900032.0,
-            "14": 777900032.0,
-            "15": 777900032.0,
-            "16": 777900032.0,
-            "17": 777900032.0,
-            "18": 777900032.0,
-            "19": 777900032.0,
-            "20": 777900032.0,
-            "21": 777900032.0,
-            "22": 777900032.0,
-            "23": 777900032.0,
-            "24": 777900032.0,
-            "25": 777900032.0,
-            "26": 777900032.0,
-            "27": 777900032.0,
-            "28": 777900032.0,
-            "29": 777900032.0,
-            "30": 777900032.0,
-            "31": 777900032.0,
-            "32": 777900032.0,
-            "33": 777900032.0,
-            "34": 777900032.0,
-            "35": 777900032.0,
-            "36": 777900032.0,
-            "37": 777900032.0,
-            "38": 777900032.0,
-            "39": 777900032.0,
-            "40": 777900032.0,
-            "41": 777900032.0,
-            "42": 777900032.0,
-            "43": 777900032.0,
-            "44": 777900032.0,
-            "45": 777900032.0,
-            "46": 777900032.0,
-            "47": 777900032.0,
-            "48": 777900032.0,
-            "49": 777900032.0,
-            "50": 777900032.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 2463815680.0,
-            "2": 2744478720.0,
-            "3": 2744478720.0,
-            "4": 2744478720.0,
-            "5": 2744478720.0,
-            "6": 2744478720.0,
-            "7": 2744478720.0,
-            "8": 2744478720.0,
-            "9": 2744478720.0,
-            "10": 2744478720.0,
-            "11": 2744478720.0,
-            "12": 2744478720.0,
-            "13": 2744478720.0,
-            "14": 2744478720.0,
-            "15": 2744478720.0,
-            "16": 2744478720.0,
-            "17": 2744478720.0,
-            "18": 2744478720.0,
-            "19": 2744478720.0,
-            "20": 2744478720.0,
-            "21": 2744478720.0,
-            "22": 2744478720.0,
-            "23": 2744478720.0,
-            "24": 2744478720.0,
-            "25": 2744478720.0,
-            "26": 2744478720.0,
-            "27": 2744478720.0,
-            "28": 2744478720.0,
-            "29": 2744478720.0,
-            "30": 2744478720.0,
-            "31": 2744478720.0,
-            "32": 2744478720.0,
-            "33": 2744478720.0,
-            "34": 2744478720.0,
-            "35": 2744478720.0,
-            "36": 2744478720.0,
-            "37": 2744478720.0,
-            "38": 2744478720.0,
-            "39": 2744478720.0,
-            "40": 2744478720.0,
-            "41": 2744478720.0,
-            "42": 2744478720.0,
-            "43": 2744478720.0,
-            "44": 2744478720.0,
-            "45": 2744478720.0,
-            "46": 2744478720.0,
-            "47": 2744478720.0,
-            "48": 2744478720.0,
-            "49": 2744478720.0,
-            "50": 2744478720.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 11.34716,
-            "2": 0.14227,
-            "3": 0.12689,
-            "4": 0.13008,
-            "5": 0.12281,
-            "6": 0.12008,
-            "7": 0.11926,
-            "8": 0.11756,
-            "9": 0.11844,
-            "10": 0.11959,
-            "11": 0.11763,
-            "12": 0.11828,
-            "13": 0.11955,
-            "14": 0.11929,
-            "15": 0.11867,
-            "16": 0.11859,
-            "17": 0.12095,
-            "18": 0.11695,
-            "19": 0.11774,
-            "20": 0.11863,
-            "21": 0.11942,
-            "22": 0.12117,
-            "23": 0.11884,
-            "24": 0.12003,
-            "25": 0.11915,
-            "26": 0.11977,
-            "27": 0.11816,
-            "28": 0.12705,
-            "29": 0.11815,
-            "30": 0.12166,
-            "31": 0.12023,
-            "32": 0.12154,
-            "33": 0.12781,
-            "34": 0.12209,
-            "35": 0.12372,
-            "36": 0.12109,
-            "37": 0.11897,
-            "38": 0.12385,
-            "39": 0.11961,
-            "40": 0.11846,
-            "41": 0.11902,
-            "42": 0.11915,
-            "43": 0.12286,
-            "44": 0.11759,
-            "45": 0.11912,
-            "46": 0.1204,
-            "47": 0.12027,
-            "48": 0.12073,
-            "49": 0.1164,
-            "50": 0.11734
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_dev_dgx_h100.json
index dabf1673e8e..47fa63fad72 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_dev_dgx_h100.json
@@ -325,106 +325,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 6.78518,
-            "2": 0.23744,
-            "3": 0.21193,
-            "4": 0.21211,
-            "5": 0.21234,
-            "6": 0.21714,
-            "7": 0.21381,
-            "8": 0.21678,
+            "1": 7.02035,
+            "2": 0.23195,
+            "3": 0.20851,
+            "4": 0.20697,
+            "5": 0.20737,
+            "6": 0.20888,
+            "7": 0.2126,
+            "8": 0.21169,
             "9": 0.21057,
-            "10": 0.21454,
-            "11": 0.21268,
-            "12": 0.21347,
-            "13": 0.209,
-            "14": 0.20717,
-            "15": 0.20674,
-            "16": 0.28167,
-            "17": 0.21476,
-            "18": 0.22185,
-            "19": 0.22342,
-            "20": 0.21927,
-            "21": 0.21844,
-            "22": 0.20869,
-            "23": 0.21636,
-            "24": 0.22148,
-            "25": 0.21904,
-            "26": 0.21751,
-            "27": 0.21967,
-            "28": 0.21863,
-            "29": 0.21626,
-            "30": 0.22036,
-            "31": 0.21954,
-            "32": 0.22158,
-            "33": 0.22026,
-            "34": 0.21931,
-            "35": 0.21953,
-            "36": 0.22128,
-            "37": 0.22086,
-            "38": 0.22232,
-            "39": 0.22188,
-            "40": 0.22409,
-            "41": 0.22246,
-            "42": 0.22597,
-            "43": 0.22399,
-            "44": 0.22475,
-            "45": 0.22278,
-            "46": 0.22509,
-            "47": 0.2265,
-            "48": 0.22645,
-            "49": 0.22526,
-            "50": 0.22341,
-            "51": 0.22545,
-            "52": 0.22535,
-            "53": 0.22576,
-            "54": 0.2245,
-            "55": 0.22609,
-            "56": 0.2228,
-            "57": 0.22559,
-            "58": 0.22342,
-            "59": 0.22459,
-            "60": 0.2267,
-            "61": 0.22697,
-            "62": 0.22521,
-            "63": 0.22584,
-            "64": 0.22709,
-            "65": 0.22302,
-            "66": 0.22625,
-            "67": 0.22446,
-            "68": 0.22406,
-            "69": 0.22377,
-            "70": 0.22903,
-            "71": 0.2251,
-            "72": 0.22663,
-            "73": 0.2167,
-            "74": 0.21951,
-            "75": 0.22056,
-            "76": 0.22119,
-            "77": 0.21831,
-            "78": 0.21638,
-            "79": 0.22219,
-            "80": 0.21903,
-            "81": 0.21864,
-            "82": 0.22289,
-            "83": 0.21759,
-            "84": 0.21896,
-            "85": 0.21769,
-            "86": 0.21796,
-            "87": 0.22137,
-            "88": 0.2181,
-            "89": 0.22173,
-            "90": 0.21854,
-            "91": 0.21692,
-            "92": 0.21712,
-            "93": 0.21996,
-            "94": 0.2158,
-            "95": 0.21804,
-            "96": 0.21776,
-            "97": 0.21778,
-            "98": 0.21975,
-            "99": 0.21815,
-            "100": 0.21699
+            "10": 0.21255,
+            "11": 0.21108,
+            "12": 0.21506,
+            "13": 0.21085,
+            "14": 0.21072,
+            "15": 0.20967,
+            "16": 0.28325,
+            "17": 0.21485,
+            "18": 0.21984,
+            "19": 0.22277,
+            "20": 0.22004,
+            "21": 0.2242,
+            "22": 0.21349,
+            "23": 0.22346,
+            "24": 0.22444,
+            "25": 0.22521,
+            "26": 0.22267,
+            "27": 0.22592,
+            "28": 0.22136,
+            "29": 0.22802,
+            "30": 0.2227,
+            "31": 0.22084,
+            "32": 0.22099,
+            "33": 0.22019,
+            "34": 0.22336,
+            "35": 0.23024,
+            "36": 0.23188,
+            "37": 0.21929,
+            "38": 0.22277,
+            "39": 0.22303,
+            "40": 0.22269,
+            "41": 0.22539,
+            "42": 0.22835,
+            "43": 0.22379,
+            "44": 0.22103,
+            "45": 0.21919,
+            "46": 0.22653,
+            "47": 0.21996,
+            "48": 0.22399,
+            "49": 0.22202,
+            "50": 0.22099,
+            "51": 0.21773,
+            "52": 0.22165,
+            "53": 0.2208,
+            "54": 0.22241,
+            "55": 0.22007,
+            "56": 0.22113,
+            "57": 0.22282,
+            "58": 0.22209,
+            "59": 0.22153,
+            "60": 0.22251,
+            "61": 0.22383,
+            "62": 0.22477,
+            "63": 0.22389,
+            "64": 0.22518,
+            "65": 0.22491,
+            "66": 0.22204,
+            "67": 0.23149,
+            "68": 0.22301,
+            "69": 0.2298,
+            "70": 0.23059,
+            "71": 0.22412,
+            "72": 0.21788,
+            "73": 0.2209,
+            "74": 0.22227,
+            "75": 0.22603,
+            "76": 0.22022,
+            "77": 0.22045,
+            "78": 0.22051,
+            "79": 0.22157,
+            "80": 0.22544,
+            "81": 0.22703,
+            "82": 0.23226,
+            "83": 0.23535,
+            "84": 0.22503,
+            "85": 0.21869,
+            "86": 0.21989,
+            "87": 0.21782,
+            "88": 0.22296,
+            "89": 0.24294,
+            "90": 0.27356,
+            "91": 0.2182,
+            "92": 0.22138,
+            "93": 0.21695,
+            "94": 0.22172,
+            "95": 0.21947,
+            "96": 0.21792,
+            "97": 0.22243,
+            "98": 0.21902,
+            "99": 0.2202,
+            "100": 0.22043
         }
     },
     "num-zeros": {
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index eaee6a60f26..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.84277,
-            "2": 10.85562,
-            "3": 10.84568,
-            "4": 10.84364,
-            "5": 10.85979,
-            "6": 10.86413,
-            "7": 10.85362,
-            "8": 10.85066,
-            "9": 10.8615,
-            "10": 10.82586,
-            "11": 10.86811,
-            "12": 10.85685,
-            "13": 10.87827,
-            "14": 10.86894,
-            "15": 10.85888,
-            "16": 10.8685,
-            "17": 10.85105,
-            "18": 10.85939,
-            "19": 10.85704,
-            "20": 10.84526,
-            "21": 10.85808,
-            "22": 10.83215,
-            "23": 10.86717,
-            "24": 10.83773,
-            "25": 10.82744,
-            "26": 10.83163,
-            "27": 10.83573,
-            "28": 10.82373,
-            "29": 10.81624,
-            "30": 10.76486,
-            "31": 10.69044,
-            "32": 10.76257,
-            "33": 10.75455,
-            "34": 10.67733,
-            "35": 10.66335,
-            "36": 10.63634,
-            "37": 10.66856,
-            "38": 10.5969,
-            "39": 10.67599,
-            "40": 10.50898,
-            "41": 10.53945,
-            "42": 10.55263,
-            "43": 10.35003,
-            "44": 10.40418,
-            "45": 10.32106,
-            "46": 10.27724,
-            "47": 10.45205,
-            "48": 10.28913,
-            "49": 10.05779,
-            "50": 10.27777,
-            "51": 10.23471,
-            "52": 10.13764,
-            "53": 10.34797,
-            "54": 10.26738,
-            "55": 10.20734,
-            "56": 9.99527,
-            "57": 9.89333,
-            "58": 10.13452,
-            "59": 9.92856,
-            "60": 9.8551,
-            "61": 9.98264,
-            "62": 10.20686,
-            "63": 9.70842,
-            "64": 10.01687,
-            "65": 9.30409,
-            "66": 9.93326,
-            "67": 9.62677,
-            "68": 9.98429,
-            "69": 9.9755,
-            "70": 9.93956,
-            "71": 9.81005,
-            "72": 9.798,
-            "73": 9.68454,
-            "74": 9.19951,
-            "75": 9.60518,
-            "76": 9.27791,
-            "77": 10.19437,
-            "78": 9.8671,
-            "79": 9.53341,
-            "80": 9.56341,
-            "81": 9.63047,
-            "82": 9.82819,
-            "83": 9.46388,
-            "84": 9.53736,
-            "85": 9.74561,
-            "86": 9.21332,
-            "87": 9.7014,
-            "88": 9.86621,
-            "89": 9.72242,
-            "90": 9.92089,
-            "91": 9.47178,
-            "92": 9.46996,
-            "93": 9.20589,
-            "94": 8.94772,
-            "95": 9.60815,
-            "96": 9.63635,
-            "97": 9.4138,
-            "98": 9.77274,
-            "99": 8.9958,
-            "100": 9.50415
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 284527616.0,
-            "2": 284527616.0,
-            "3": 284527616.0,
-            "4": 284527616.0,
-            "5": 284527616.0,
-            "6": 284527616.0,
-            "7": 284527616.0,
-            "8": 284527616.0,
-            "9": 284527616.0,
-            "10": 284527616.0,
-            "11": 284527616.0,
-            "12": 284527616.0,
-            "13": 284527616.0,
-            "14": 284527616.0,
-            "15": 284527616.0,
-            "16": 416513536.0,
-            "17": 416513536.0,
-            "18": 416513536.0,
-            "19": 416513536.0,
-            "20": 416513536.0,
-            "21": 416513536.0,
-            "22": 416513536.0,
-            "23": 416513536.0,
-            "24": 416513536.0,
-            "25": 416513536.0,
-            "26": 416513536.0,
-            "27": 416513536.0,
-            "28": 416513536.0,
-            "29": 416513536.0,
-            "30": 416513536.0,
-            "31": 416513536.0,
-            "32": 416513536.0,
-            "33": 416513536.0,
-            "34": 416513536.0,
-            "35": 416513536.0,
-            "36": 416513536.0,
-            "37": 416513536.0,
-            "38": 416513536.0,
-            "39": 416513536.0,
-            "40": 416513536.0,
-            "41": 416513536.0,
-            "42": 416513536.0,
-            "43": 416513536.0,
-            "44": 416513536.0,
-            "45": 416513536.0,
-            "46": 416513536.0,
-            "47": 416513536.0,
-            "48": 416513536.0,
-            "49": 416513536.0,
-            "50": 416513536.0,
-            "51": 416513536.0,
-            "52": 416513536.0,
-            "53": 416513536.0,
-            "54": 416513536.0,
-            "55": 416513536.0,
-            "56": 416513536.0,
-            "57": 416513536.0,
-            "58": 416513536.0,
-            "59": 416513536.0,
-            "60": 416513536.0,
-            "61": 416513536.0,
-            "62": 416513536.0,
-            "63": 416513536.0,
-            "64": 416513536.0,
-            "65": 416513536.0,
-            "66": 416513536.0,
-            "67": 416513536.0,
-            "68": 416513536.0,
-            "69": 416513536.0,
-            "70": 416513536.0,
-            "71": 416513536.0,
-            "72": 416513536.0,
-            "73": 416513536.0,
-            "74": 416513536.0,
-            "75": 416513536.0,
-            "76": 416513536.0,
-            "77": 416513536.0,
-            "78": 416513536.0,
-            "79": 416513536.0,
-            "80": 416513536.0,
-            "81": 416513536.0,
-            "82": 416513536.0,
-            "83": 416513536.0,
-            "84": 416513536.0,
-            "85": 416513536.0,
-            "86": 416513536.0,
-            "87": 416513536.0,
-            "88": 416513536.0,
-            "89": 416513536.0,
-            "90": 416513536.0,
-            "91": 416513536.0,
-            "92": 416513536.0,
-            "93": 416513536.0,
-            "94": 416513536.0,
-            "95": 416513536.0,
-            "96": 416513536.0,
-            "97": 416513536.0,
-            "98": 416513536.0,
-            "99": 416513536.0,
-            "100": 416513536.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1465368064.0,
-            "2": 1465368576.0,
-            "3": 1465368576.0,
-            "4": 1465368576.0,
-            "5": 1465368576.0,
-            "6": 1465368576.0,
-            "7": 1465368576.0,
-            "8": 1465368576.0,
-            "9": 1465368576.0,
-            "10": 1465368576.0,
-            "11": 1465368576.0,
-            "12": 1465368576.0,
-            "13": 1465368576.0,
-            "14": 1465368576.0,
-            "15": 1465368576.0,
-            "16": 1465368576.0,
-            "17": 1597092352.0,
-            "18": 1597092352.0,
-            "19": 1597092352.0,
-            "20": 1597092352.0,
-            "21": 1597092352.0,
-            "22": 1597092352.0,
-            "23": 1597092352.0,
-            "24": 1597092352.0,
-            "25": 1597092352.0,
-            "26": 1597092352.0,
-            "27": 1597092352.0,
-            "28": 1597092352.0,
-            "29": 1597092352.0,
-            "30": 1597092352.0,
-            "31": 1597092352.0,
-            "32": 1597092352.0,
-            "33": 1597092352.0,
-            "34": 1597092352.0,
-            "35": 1597092352.0,
-            "36": 1597092352.0,
-            "37": 1597092352.0,
-            "38": 1597092352.0,
-            "39": 1597092352.0,
-            "40": 1597092352.0,
-            "41": 1597092352.0,
-            "42": 1597092352.0,
-            "43": 1597092352.0,
-            "44": 1597092352.0,
-            "45": 1597092352.0,
-            "46": 1597092352.0,
-            "47": 1597092352.0,
-            "48": 1597092352.0,
-            "49": 1597092352.0,
-            "50": 1597092352.0,
-            "51": 1597092352.0,
-            "52": 1597092352.0,
-            "53": 1597092352.0,
-            "54": 1597092352.0,
-            "55": 1597092352.0,
-            "56": 1597092352.0,
-            "57": 1597092352.0,
-            "58": 1597092352.0,
-            "59": 1597092352.0,
-            "60": 1597092352.0,
-            "61": 1597092352.0,
-            "62": 1597092352.0,
-            "63": 1597092352.0,
-            "64": 1597092352.0,
-            "65": 1597092352.0,
-            "66": 1597092352.0,
-            "67": 1597092352.0,
-            "68": 1597092352.0,
-            "69": 1597092352.0,
-            "70": 1597092352.0,
-            "71": 1597092352.0,
-            "72": 1597092352.0,
-            "73": 1597092352.0,
-            "74": 1597092352.0,
-            "75": 1597092352.0,
-            "76": 1597092352.0,
-            "77": 1597092352.0,
-            "78": 1597092352.0,
-            "79": 1597092352.0,
-            "80": 1597092352.0,
-            "81": 1597092352.0,
-            "82": 1597092352.0,
-            "83": 1597092352.0,
-            "84": 1597092352.0,
-            "85": 1597092352.0,
-            "86": 1597092352.0,
-            "87": 1597092352.0,
-            "88": 1597092352.0,
-            "89": 1597092352.0,
-            "90": 1597092352.0,
-            "91": 1597092352.0,
-            "92": 1597092352.0,
-            "93": 1597092352.0,
-            "94": 1597092352.0,
-            "95": 1597092352.0,
-            "96": 1597092352.0,
-            "97": 1597092352.0,
-            "98": 1597092352.0,
-            "99": 1597092352.0,
-            "100": 1597092352.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 7.37179,
-            "2": 0.21537,
-            "3": 0.18911,
-            "4": 0.18458,
-            "5": 0.18487,
-            "6": 0.18754,
-            "7": 0.18665,
-            "8": 0.1878,
-            "9": 0.18553,
-            "10": 0.1849,
-            "11": 0.18796,
-            "12": 0.18834,
-            "13": 0.19005,
-            "14": 0.18356,
-            "15": 0.18558,
-            "16": 0.27381,
-            "17": 0.18936,
-            "18": 0.19528,
-            "19": 0.19364,
-            "20": 0.1953,
-            "21": 0.19158,
-            "22": 0.18527,
-            "23": 0.1891,
-            "24": 0.19114,
-            "25": 0.19216,
-            "26": 0.19001,
-            "27": 0.19218,
-            "28": 0.19054,
-            "29": 0.19151,
-            "30": 0.19191,
-            "31": 0.19643,
-            "32": 0.19421,
-            "33": 0.19414,
-            "34": 0.19615,
-            "35": 0.19402,
-            "36": 0.19651,
-            "37": 0.19212,
-            "38": 0.19469,
-            "39": 0.19904,
-            "40": 0.19924,
-            "41": 0.19587,
-            "42": 0.21217,
-            "43": 0.21187,
-            "44": 0.19529,
-            "45": 0.20033,
-            "46": 0.20271,
-            "47": 0.19543,
-            "48": 0.20218,
-            "49": 0.20489,
-            "50": 0.19921,
-            "51": 0.2115,
-            "52": 0.20718,
-            "53": 0.19391,
-            "54": 0.19638,
-            "55": 0.19472,
-            "56": 0.19481,
-            "57": 0.19264,
-            "58": 0.19802,
-            "59": 0.19862,
-            "60": 0.19826,
-            "61": 0.19634,
-            "62": 0.19752,
-            "63": 0.19602,
-            "64": 0.19649,
-            "65": 0.19524,
-            "66": 0.19483,
-            "67": 0.19471,
-            "68": 0.19619,
-            "69": 0.19456,
-            "70": 0.1972,
-            "71": 0.19562,
-            "72": 0.1963,
-            "73": 0.19559,
-            "74": 0.1958,
-            "75": 0.2007,
-            "76": 0.19838,
-            "77": 0.1931,
-            "78": 0.19809,
-            "79": 0.19589,
-            "80": 0.19799,
-            "81": 0.19659,
-            "82": 0.19661,
-            "83": 0.20092,
-            "84": 0.19558,
-            "85": 0.19886,
-            "86": 0.20355,
-            "87": 0.19808,
-            "88": 0.19948,
-            "89": 0.19521,
-            "90": 0.19741,
-            "91": 0.19953,
-            "92": 0.19688,
-            "93": 0.19645,
-            "94": 0.19575,
-            "95": 0.19574,
-            "96": 0.19609,
-            "97": 0.19745,
-            "98": 0.19491,
-            "99": 0.19618,
-            "100": 0.19576
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": "nan",
-            "2": "nan",
-            "3": "nan",
-            "4": "nan",
-            "5": "nan",
-            "6": "nan",
-            "7": "nan",
-            "8": "nan",
-            "9": "nan",
-            "10": "nan",
-            "11": "nan",
-            "12": "nan",
-            "13": "nan",
-            "14": "nan",
-            "15": "nan",
-            "16": 2365.0,
-            "17": "nan",
-            "18": 2331.0,
-            "19": 2912.0,
-            "20": 1664.0,
-            "21": 2009.0,
-            "22": "nan",
-            "23": 2483.0,
-            "24": 2192.0,
-            "25": 2290.0,
-            "26": 1916.0,
-            "27": 2020.0,
-            "28": 2503.0,
-            "29": 2379.0,
-            "30": 2400.0,
-            "31": 1759.0,
-            "32": 2522.0,
-            "33": 2145.0,
-            "34": 1791.0,
-            "35": 1777.0,
-            "36": 2100.0,
-            "37": 2396.0,
-            "38": 2040.0,
-            "39": 2983.0,
-            "40": 1805.0,
-            "41": 3097.0,
-            "42": 2421.0,
-            "43": 2566.0,
-            "44": 1858.0,
-            "45": 2371.0,
-            "46": 2140.0,
-            "47": 2603.0,
-            "48": 2358.0,
-            "49": 1739.0,
-            "50": 2686.0,
-            "51": 2041.0,
-            "52": 2226.0,
-            "53": 3222.0,
-            "54": 2784.0,
-            "55": 2290.0,
-            "56": 2428.0,
-            "57": 2146.0,
-            "58": 3048.0,
-            "59": 2504.0,
-            "60": 2612.0,
-            "61": 2623.0,
-            "62": 3003.0,
-            "63": 2762.0,
-            "64": 2917.0,
-            "65": 2104.0,
-            "66": 3550.0,
-            "67": 2433.0,
-            "68": 3146.0,
-            "69": 2877.0,
-            "70": 3528.0,
-            "71": 2983.0,
-            "72": 2640.0,
-            "73": 3199.0,
-            "74": 2084.0,
-            "75": 2809.0,
-            "76": 3599.0,
-            "77": 3667.0,
-            "78": 3680.0,
-            "79": 3972.0,
-            "80": 3365.0,
-            "81": 5042.0,
-            "82": 3291.0,
-            "83": 3016.0,
-            "84": 3592.0,
-            "85": 3792.0,
-            "86": 3192.0,
-            "87": 4219.0,
-            "88": 3376.0,
-            "89": 4110.0,
-            "90": 3939.0,
-            "91": 2912.0,
-            "92": 4114.0,
-            "93": 3499.0,
-            "94": 4339.0,
-            "95": 3829.0,
-            "96": 3875.0,
-            "97": 4100.0,
-            "98": 4889.0,
-            "99": 3771.0,
-            "100": 3390.0
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index 47fa63fad72..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.84277,
-            "2": 10.85562,
-            "3": 10.84568,
-            "4": 10.84364,
-            "5": 10.85979,
-            "6": 10.86413,
-            "7": 10.85362,
-            "8": 10.85066,
-            "9": 10.8615,
-            "10": 10.82586,
-            "11": 10.86811,
-            "12": 10.85685,
-            "13": 10.87827,
-            "14": 10.86894,
-            "15": 10.85888,
-            "16": 10.8685,
-            "17": 10.85105,
-            "18": 10.85939,
-            "19": 10.85704,
-            "20": 10.84526,
-            "21": 10.85808,
-            "22": 10.83215,
-            "23": 10.86717,
-            "24": 10.83773,
-            "25": 10.82744,
-            "26": 10.83163,
-            "27": 10.83573,
-            "28": 10.82373,
-            "29": 10.81624,
-            "30": 10.76486,
-            "31": 10.69044,
-            "32": 10.76257,
-            "33": 10.75455,
-            "34": 10.67733,
-            "35": 10.66335,
-            "36": 10.63634,
-            "37": 10.66856,
-            "38": 10.5969,
-            "39": 10.67599,
-            "40": 10.50898,
-            "41": 10.53945,
-            "42": 10.55263,
-            "43": 10.35003,
-            "44": 10.40418,
-            "45": 10.32106,
-            "46": 10.27724,
-            "47": 10.45205,
-            "48": 10.28913,
-            "49": 10.05779,
-            "50": 10.27777,
-            "51": 10.23471,
-            "52": 10.13764,
-            "53": 10.34797,
-            "54": 10.26738,
-            "55": 10.20734,
-            "56": 9.99527,
-            "57": 9.89333,
-            "58": 10.13452,
-            "59": 9.92856,
-            "60": 9.8551,
-            "61": 9.98264,
-            "62": 10.20686,
-            "63": 9.70842,
-            "64": 10.01687,
-            "65": 9.30409,
-            "66": 9.93326,
-            "67": 9.62677,
-            "68": 9.98429,
-            "69": 9.9755,
-            "70": 9.93956,
-            "71": 9.81005,
-            "72": 9.798,
-            "73": 9.68454,
-            "74": 9.19951,
-            "75": 9.60518,
-            "76": 9.27791,
-            "77": 10.19437,
-            "78": 9.8671,
-            "79": 9.53341,
-            "80": 9.56341,
-            "81": 9.63047,
-            "82": 9.82819,
-            "83": 9.46388,
-            "84": 9.53736,
-            "85": 9.74561,
-            "86": 9.21332,
-            "87": 9.7014,
-            "88": 9.86621,
-            "89": 9.72242,
-            "90": 9.92089,
-            "91": 9.47178,
-            "92": 9.46996,
-            "93": 9.20589,
-            "94": 8.94772,
-            "95": 9.60815,
-            "96": 9.63635,
-            "97": 9.4138,
-            "98": 9.77274,
-            "99": 8.9958,
-            "100": 9.50415
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 284527616.0,
-            "2": 284527616.0,
-            "3": 284527616.0,
-            "4": 284527616.0,
-            "5": 284527616.0,
-            "6": 284527616.0,
-            "7": 284527616.0,
-            "8": 284527616.0,
-            "9": 284527616.0,
-            "10": 284527616.0,
-            "11": 284527616.0,
-            "12": 284527616.0,
-            "13": 284527616.0,
-            "14": 284527616.0,
-            "15": 284527616.0,
-            "16": 416513536.0,
-            "17": 416513536.0,
-            "18": 416513536.0,
-            "19": 416513536.0,
-            "20": 416513536.0,
-            "21": 416513536.0,
-            "22": 416513536.0,
-            "23": 416513536.0,
-            "24": 416513536.0,
-            "25": 416513536.0,
-            "26": 416513536.0,
-            "27": 416513536.0,
-            "28": 416513536.0,
-            "29": 416513536.0,
-            "30": 416513536.0,
-            "31": 416513536.0,
-            "32": 416513536.0,
-            "33": 416513536.0,
-            "34": 416513536.0,
-            "35": 416513536.0,
-            "36": 416513536.0,
-            "37": 416513536.0,
-            "38": 416513536.0,
-            "39": 416513536.0,
-            "40": 416513536.0,
-            "41": 416513536.0,
-            "42": 416513536.0,
-            "43": 416513536.0,
-            "44": 416513536.0,
-            "45": 416513536.0,
-            "46": 416513536.0,
-            "47": 416513536.0,
-            "48": 416513536.0,
-            "49": 416513536.0,
-            "50": 416513536.0,
-            "51": 416513536.0,
-            "52": 416513536.0,
-            "53": 416513536.0,
-            "54": 416513536.0,
-            "55": 416513536.0,
-            "56": 416513536.0,
-            "57": 416513536.0,
-            "58": 416513536.0,
-            "59": 416513536.0,
-            "60": 416513536.0,
-            "61": 416513536.0,
-            "62": 416513536.0,
-            "63": 416513536.0,
-            "64": 416513536.0,
-            "65": 416513536.0,
-            "66": 416513536.0,
-            "67": 416513536.0,
-            "68": 416513536.0,
-            "69": 416513536.0,
-            "70": 416513536.0,
-            "71": 416513536.0,
-            "72": 416513536.0,
-            "73": 416513536.0,
-            "74": 416513536.0,
-            "75": 416513536.0,
-            "76": 416513536.0,
-            "77": 416513536.0,
-            "78": 416513536.0,
-            "79": 416513536.0,
-            "80": 416513536.0,
-            "81": 416513536.0,
-            "82": 416513536.0,
-            "83": 416513536.0,
-            "84": 416513536.0,
-            "85": 416513536.0,
-            "86": 416513536.0,
-            "87": 416513536.0,
-            "88": 416513536.0,
-            "89": 416513536.0,
-            "90": 416513536.0,
-            "91": 416513536.0,
-            "92": 416513536.0,
-            "93": 416513536.0,
-            "94": 416513536.0,
-            "95": 416513536.0,
-            "96": 416513536.0,
-            "97": 416513536.0,
-            "98": 416513536.0,
-            "99": 416513536.0,
-            "100": 416513536.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1465368064.0,
-            "2": 1465368576.0,
-            "3": 1465368576.0,
-            "4": 1465368576.0,
-            "5": 1465368576.0,
-            "6": 1465368576.0,
-            "7": 1465368576.0,
-            "8": 1465368576.0,
-            "9": 1465368576.0,
-            "10": 1465368576.0,
-            "11": 1465368576.0,
-            "12": 1465368576.0,
-            "13": 1465368576.0,
-            "14": 1465368576.0,
-            "15": 1465368576.0,
-            "16": 1465368576.0,
-            "17": 1597092352.0,
-            "18": 1597092352.0,
-            "19": 1597092352.0,
-            "20": 1597092352.0,
-            "21": 1597092352.0,
-            "22": 1597092352.0,
-            "23": 1597092352.0,
-            "24": 1597092352.0,
-            "25": 1597092352.0,
-            "26": 1597092352.0,
-            "27": 1597092352.0,
-            "28": 1597092352.0,
-            "29": 1597092352.0,
-            "30": 1597092352.0,
-            "31": 1597092352.0,
-            "32": 1597092352.0,
-            "33": 1597092352.0,
-            "34": 1597092352.0,
-            "35": 1597092352.0,
-            "36": 1597092352.0,
-            "37": 1597092352.0,
-            "38": 1597092352.0,
-            "39": 1597092352.0,
-            "40": 1597092352.0,
-            "41": 1597092352.0,
-            "42": 1597092352.0,
-            "43": 1597092352.0,
-            "44": 1597092352.0,
-            "45": 1597092352.0,
-            "46": 1597092352.0,
-            "47": 1597092352.0,
-            "48": 1597092352.0,
-            "49": 1597092352.0,
-            "50": 1597092352.0,
-            "51": 1597092352.0,
-            "52": 1597092352.0,
-            "53": 1597092352.0,
-            "54": 1597092352.0,
-            "55": 1597092352.0,
-            "56": 1597092352.0,
-            "57": 1597092352.0,
-            "58": 1597092352.0,
-            "59": 1597092352.0,
-            "60": 1597092352.0,
-            "61": 1597092352.0,
-            "62": 1597092352.0,
-            "63": 1597092352.0,
-            "64": 1597092352.0,
-            "65": 1597092352.0,
-            "66": 1597092352.0,
-            "67": 1597092352.0,
-            "68": 1597092352.0,
-            "69": 1597092352.0,
-            "70": 1597092352.0,
-            "71": 1597092352.0,
-            "72": 1597092352.0,
-            "73": 1597092352.0,
-            "74": 1597092352.0,
-            "75": 1597092352.0,
-            "76": 1597092352.0,
-            "77": 1597092352.0,
-            "78": 1597092352.0,
-            "79": 1597092352.0,
-            "80": 1597092352.0,
-            "81": 1597092352.0,
-            "82": 1597092352.0,
-            "83": 1597092352.0,
-            "84": 1597092352.0,
-            "85": 1597092352.0,
-            "86": 1597092352.0,
-            "87": 1597092352.0,
-            "88": 1597092352.0,
-            "89": 1597092352.0,
-            "90": 1597092352.0,
-            "91": 1597092352.0,
-            "92": 1597092352.0,
-            "93": 1597092352.0,
-            "94": 1597092352.0,
-            "95": 1597092352.0,
-            "96": 1597092352.0,
-            "97": 1597092352.0,
-            "98": 1597092352.0,
-            "99": 1597092352.0,
-            "100": 1597092352.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 7.02035,
-            "2": 0.23195,
-            "3": 0.20851,
-            "4": 0.20697,
-            "5": 0.20737,
-            "6": 0.20888,
-            "7": 0.2126,
-            "8": 0.21169,
-            "9": 0.21057,
-            "10": 0.21255,
-            "11": 0.21108,
-            "12": 0.21506,
-            "13": 0.21085,
-            "14": 0.21072,
-            "15": 0.20967,
-            "16": 0.28325,
-            "17": 0.21485,
-            "18": 0.21984,
-            "19": 0.22277,
-            "20": 0.22004,
-            "21": 0.2242,
-            "22": 0.21349,
-            "23": 0.22346,
-            "24": 0.22444,
-            "25": 0.22521,
-            "26": 0.22267,
-            "27": 0.22592,
-            "28": 0.22136,
-            "29": 0.22802,
-            "30": 0.2227,
-            "31": 0.22084,
-            "32": 0.22099,
-            "33": 0.22019,
-            "34": 0.22336,
-            "35": 0.23024,
-            "36": 0.23188,
-            "37": 0.21929,
-            "38": 0.22277,
-            "39": 0.22303,
-            "40": 0.22269,
-            "41": 0.22539,
-            "42": 0.22835,
-            "43": 0.22379,
-            "44": 0.22103,
-            "45": 0.21919,
-            "46": 0.22653,
-            "47": 0.21996,
-            "48": 0.22399,
-            "49": 0.22202,
-            "50": 0.22099,
-            "51": 0.21773,
-            "52": 0.22165,
-            "53": 0.2208,
-            "54": 0.22241,
-            "55": 0.22007,
-            "56": 0.22113,
-            "57": 0.22282,
-            "58": 0.22209,
-            "59": 0.22153,
-            "60": 0.22251,
-            "61": 0.22383,
-            "62": 0.22477,
-            "63": 0.22389,
-            "64": 0.22518,
-            "65": 0.22491,
-            "66": 0.22204,
-            "67": 0.23149,
-            "68": 0.22301,
-            "69": 0.2298,
-            "70": 0.23059,
-            "71": 0.22412,
-            "72": 0.21788,
-            "73": 0.2209,
-            "74": 0.22227,
-            "75": 0.22603,
-            "76": 0.22022,
-            "77": 0.22045,
-            "78": 0.22051,
-            "79": 0.22157,
-            "80": 0.22544,
-            "81": 0.22703,
-            "82": 0.23226,
-            "83": 0.23535,
-            "84": 0.22503,
-            "85": 0.21869,
-            "86": 0.21989,
-            "87": 0.21782,
-            "88": 0.22296,
-            "89": 0.24294,
-            "90": 0.27356,
-            "91": 0.2182,
-            "92": 0.22138,
-            "93": 0.21695,
-            "94": 0.22172,
-            "95": 0.21947,
-            "96": 0.21792,
-            "97": 0.22243,
-            "98": 0.21902,
-            "99": 0.2202,
-            "100": 0.22043
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": "nan",
-            "2": "nan",
-            "3": "nan",
-            "4": "nan",
-            "5": "nan",
-            "6": "nan",
-            "7": "nan",
-            "8": "nan",
-            "9": "nan",
-            "10": "nan",
-            "11": "nan",
-            "12": "nan",
-            "13": "nan",
-            "14": "nan",
-            "15": "nan",
-            "16": 2365.0,
-            "17": "nan",
-            "18": 2331.0,
-            "19": 2912.0,
-            "20": 1664.0,
-            "21": 2009.0,
-            "22": "nan",
-            "23": 2483.0,
-            "24": 2192.0,
-            "25": 2290.0,
-            "26": 1916.0,
-            "27": 2020.0,
-            "28": 2503.0,
-            "29": 2379.0,
-            "30": 2400.0,
-            "31": 1759.0,
-            "32": 2522.0,
-            "33": 2145.0,
-            "34": 1791.0,
-            "35": 1777.0,
-            "36": 2100.0,
-            "37": 2396.0,
-            "38": 2040.0,
-            "39": 2983.0,
-            "40": 1805.0,
-            "41": 3097.0,
-            "42": 2421.0,
-            "43": 2566.0,
-            "44": 1858.0,
-            "45": 2371.0,
-            "46": 2140.0,
-            "47": 2603.0,
-            "48": 2358.0,
-            "49": 1739.0,
-            "50": 2686.0,
-            "51": 2041.0,
-            "52": 2226.0,
-            "53": 3222.0,
-            "54": 2784.0,
-            "55": 2290.0,
-            "56": 2428.0,
-            "57": 2146.0,
-            "58": 3048.0,
-            "59": 2504.0,
-            "60": 2612.0,
-            "61": 2623.0,
-            "62": 3003.0,
-            "63": 2762.0,
-            "64": 2917.0,
-            "65": 2104.0,
-            "66": 3550.0,
-            "67": 2433.0,
-            "68": 3146.0,
-            "69": 2877.0,
-            "70": 3528.0,
-            "71": 2983.0,
-            "72": 2640.0,
-            "73": 3199.0,
-            "74": 2084.0,
-            "75": 2809.0,
-            "76": 3599.0,
-            "77": 3667.0,
-            "78": 3680.0,
-            "79": 3972.0,
-            "80": 3365.0,
-            "81": 5042.0,
-            "82": 3291.0,
-            "83": 3016.0,
-            "84": 3592.0,
-            "85": 3792.0,
-            "86": 3192.0,
-            "87": 4219.0,
-            "88": 3376.0,
-            "89": 4110.0,
-            "90": 3939.0,
-            "91": 2912.0,
-            "92": 4114.0,
-            "93": 3499.0,
-            "94": 4339.0,
-            "95": 3829.0,
-            "96": 3875.0,
-            "97": 4100.0,
-            "98": 4889.0,
-            "99": 3771.0,
-            "100": 3390.0
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_h100.json
index 24b971e51f0..cb0ad3fdb4b 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_h100.json
@@ -325,106 +325,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 6.81983,
-            "2": 0.2794,
-            "3": 0.23686,
-            "4": 0.21148,
-            "5": 0.21241,
-            "6": 0.21432,
-            "7": 0.21203,
-            "8": 0.21066,
-            "9": 0.20958,
-            "10": 0.21304,
-            "11": 0.2134,
-            "12": 0.21369,
-            "13": 0.2107,
-            "14": 0.21366,
-            "15": 0.20862,
-            "16": 0.28561,
-            "17": 0.2165,
-            "18": 0.21953,
-            "19": 0.22122,
-            "20": 0.22177,
-            "21": 0.2229,
-            "22": 0.21407,
-            "23": 0.22275,
-            "24": 0.22407,
-            "25": 0.22273,
-            "26": 0.22637,
-            "27": 0.22313,
-            "28": 0.22384,
-            "29": 0.22193,
-            "30": 0.22359,
-            "31": 0.2209,
-            "32": 0.22301,
-            "33": 0.22023,
-            "34": 0.22191,
-            "35": 0.22291,
-            "36": 0.22174,
-            "37": 0.22136,
-            "38": 0.22212,
-            "39": 0.22108,
-            "40": 0.22197,
-            "41": 0.22185,
-            "42": 0.22093,
-            "43": 0.22393,
-            "44": 0.22166,
-            "45": 0.2211,
-            "46": 0.22759,
-            "47": 0.22278,
-            "48": 0.22181,
-            "49": 0.2205,
-            "50": 0.2208,
-            "51": 0.22217,
-            "52": 0.22209,
-            "53": 0.21851,
-            "54": 0.21953,
-            "55": 0.22284,
-            "56": 0.21873,
-            "57": 0.21994,
-            "58": 0.21738,
-            "59": 0.22216,
-            "60": 0.22091,
-            "61": 0.21912,
-            "62": 0.21916,
-            "63": 0.21618,
-            "64": 0.22037,
-            "65": 0.22084,
-            "66": 0.21741,
-            "67": 0.2191,
-            "68": 0.21708,
-            "69": 0.21714,
-            "70": 0.22023,
-            "71": 0.21802,
-            "72": 0.216,
-            "73": 0.22116,
-            "74": 0.22062,
-            "75": 0.23228,
-            "76": 0.22254,
-            "77": 0.21552,
-            "78": 0.2206,
-            "79": 0.2158,
-            "80": 0.22212,
-            "81": 0.22066,
-            "82": 0.22199,
-            "83": 0.21697,
-            "84": 0.21798,
-            "85": 0.21804,
-            "86": 0.21874,
-            "87": 0.21943,
-            "88": 0.21826,
-            "89": 0.21793,
-            "90": 0.21769,
-            "91": 0.21994,
-            "92": 0.21792,
-            "93": 0.22021,
-            "94": 0.21851,
-            "95": 0.21939,
-            "96": 0.21921,
-            "97": 0.22073,
-            "98": 0.21992,
-            "99": 0.21794,
-            "100": 0.21873
+            "1": 6.78805,
+            "2": 0.23224,
+            "3": 0.20783,
+            "4": 0.21971,
+            "5": 0.22246,
+            "6": 0.23346,
+            "7": 0.21626,
+            "8": 0.20597,
+            "9": 0.2043,
+            "10": 0.20681,
+            "11": 0.20511,
+            "12": 0.20484,
+            "13": 0.21351,
+            "14": 0.20446,
+            "15": 0.21063,
+            "16": 0.28338,
+            "17": 0.21017,
+            "18": 0.21577,
+            "19": 0.21852,
+            "20": 0.23072,
+            "21": 0.25974,
+            "22": 0.21717,
+            "23": 0.22548,
+            "24": 0.21878,
+            "25": 0.21448,
+            "26": 0.21416,
+            "27": 0.22357,
+            "28": 0.21645,
+            "29": 0.21325,
+            "30": 0.21465,
+            "31": 0.21452,
+            "32": 0.21608,
+            "33": 0.23531,
+            "34": 0.227,
+            "35": 0.2188,
+            "36": 0.21248,
+            "37": 0.21694,
+            "38": 0.21269,
+            "39": 0.22285,
+            "40": 0.21458,
+            "41": 0.2134,
+            "42": 0.21991,
+            "43": 0.21621,
+            "44": 0.21422,
+            "45": 0.21339,
+            "46": 0.21332,
+            "47": 0.21892,
+            "48": 0.21384,
+            "49": 0.21668,
+            "50": 0.21806,
+            "51": 0.21958,
+            "52": 0.2173,
+            "53": 0.21642,
+            "54": 0.22157,
+            "55": 0.21549,
+            "56": 0.21528,
+            "57": 0.21789,
+            "58": 0.21634,
+            "59": 0.21649,
+            "60": 0.2141,
+            "61": 0.21447,
+            "62": 0.21596,
+            "63": 0.21545,
+            "64": 0.22145,
+            "65": 0.21603,
+            "66": 0.21504,
+            "67": 0.21551,
+            "68": 0.21918,
+            "69": 0.21831,
+            "70": 0.21943,
+            "71": 0.21537,
+            "72": 0.21937,
+            "73": 0.21783,
+            "74": 0.2246,
+            "75": 0.22031,
+            "76": 0.23249,
+            "77": 0.21862,
+            "78": 0.21663,
+            "79": 0.21806,
+            "80": 0.21694,
+            "81": 0.21684,
+            "82": 0.21559,
+            "83": 0.21877,
+            "84": 0.2151,
+            "85": 0.21819,
+            "86": 0.2167,
+            "87": 0.21768,
+            "88": 0.21415,
+            "89": 0.21694,
+            "90": 0.21444,
+            "91": 0.21616,
+            "92": 0.21967,
+            "93": 0.21672,
+            "94": 0.21699,
+            "95": 0.21892,
+            "96": 0.21871,
+            "97": 0.21805,
+            "98": 0.21674,
+            "99": 0.21639,
+            "100": 0.21581
         }
     },
     "num-zeros": {
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 6d3fed6a4e1..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.84277,
-            "2": 10.85562,
-            "3": 10.84568,
-            "4": 10.84364,
-            "5": 10.85979,
-            "6": 10.86413,
-            "7": 10.85362,
-            "8": 10.85066,
-            "9": 10.8615,
-            "10": 10.82586,
-            "11": 10.86811,
-            "12": 10.85685,
-            "13": 10.87827,
-            "14": 10.86894,
-            "15": 10.85888,
-            "16": 10.8685,
-            "17": 10.85105,
-            "18": 10.85939,
-            "19": 10.85704,
-            "20": 10.84526,
-            "21": 10.85808,
-            "22": 10.83215,
-            "23": 10.86717,
-            "24": 10.83773,
-            "25": 10.82744,
-            "26": 10.83163,
-            "27": 10.83573,
-            "28": 10.82373,
-            "29": 10.81624,
-            "30": 10.76486,
-            "31": 10.69044,
-            "32": 10.76257,
-            "33": 10.75455,
-            "34": 10.67733,
-            "35": 10.66335,
-            "36": 10.63634,
-            "37": 10.66856,
-            "38": 10.5969,
-            "39": 10.67599,
-            "40": 10.50898,
-            "41": 10.53945,
-            "42": 10.55263,
-            "43": 10.35003,
-            "44": 10.40418,
-            "45": 10.32106,
-            "46": 10.27724,
-            "47": 10.45205,
-            "48": 10.28913,
-            "49": 10.05779,
-            "50": 10.27777,
-            "51": 10.23471,
-            "52": 10.13764,
-            "53": 10.34797,
-            "54": 10.26738,
-            "55": 10.20734,
-            "56": 9.99527,
-            "57": 9.89333,
-            "58": 10.13452,
-            "59": 9.92856,
-            "60": 9.8551,
-            "61": 9.98264,
-            "62": 10.20686,
-            "63": 9.70842,
-            "64": 10.01687,
-            "65": 9.30409,
-            "66": 9.93326,
-            "67": 9.62677,
-            "68": 9.98429,
-            "69": 9.9755,
-            "70": 9.93956,
-            "71": 9.81005,
-            "72": 9.798,
-            "73": 9.68454,
-            "74": 9.19951,
-            "75": 9.60518,
-            "76": 9.27791,
-            "77": 10.19437,
-            "78": 9.8671,
-            "79": 9.53341,
-            "80": 9.56341,
-            "81": 9.63047,
-            "82": 9.82819,
-            "83": 9.46388,
-            "84": 9.53736,
-            "85": 9.74561,
-            "86": 9.21332,
-            "87": 9.7014,
-            "88": 9.86621,
-            "89": 9.72242,
-            "90": 9.92089,
-            "91": 9.47178,
-            "92": 9.46996,
-            "93": 9.20589,
-            "94": 8.94772,
-            "95": 9.60815,
-            "96": 9.63635,
-            "97": 9.4138,
-            "98": 9.77274,
-            "99": 8.9958,
-            "100": 9.50415
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 284527616.0,
-            "2": 284527616.0,
-            "3": 284527616.0,
-            "4": 284527616.0,
-            "5": 284527616.0,
-            "6": 284527616.0,
-            "7": 284527616.0,
-            "8": 284527616.0,
-            "9": 284527616.0,
-            "10": 284527616.0,
-            "11": 284527616.0,
-            "12": 284527616.0,
-            "13": 284527616.0,
-            "14": 284527616.0,
-            "15": 284527616.0,
-            "16": 416513536.0,
-            "17": 416513536.0,
-            "18": 416513536.0,
-            "19": 416513536.0,
-            "20": 416513536.0,
-            "21": 416513536.0,
-            "22": 416513536.0,
-            "23": 416513536.0,
-            "24": 416513536.0,
-            "25": 416513536.0,
-            "26": 416513536.0,
-            "27": 416513536.0,
-            "28": 416513536.0,
-            "29": 416513536.0,
-            "30": 416513536.0,
-            "31": 416513536.0,
-            "32": 416513536.0,
-            "33": 416513536.0,
-            "34": 416513536.0,
-            "35": 416513536.0,
-            "36": 416513536.0,
-            "37": 416513536.0,
-            "38": 416513536.0,
-            "39": 416513536.0,
-            "40": 416513536.0,
-            "41": 416513536.0,
-            "42": 416513536.0,
-            "43": 416513536.0,
-            "44": 416513536.0,
-            "45": 416513536.0,
-            "46": 416513536.0,
-            "47": 416513536.0,
-            "48": 416513536.0,
-            "49": 416513536.0,
-            "50": 416513536.0,
-            "51": 416513536.0,
-            "52": 416513536.0,
-            "53": 416513536.0,
-            "54": 416513536.0,
-            "55": 416513536.0,
-            "56": 416513536.0,
-            "57": 416513536.0,
-            "58": 416513536.0,
-            "59": 416513536.0,
-            "60": 416513536.0,
-            "61": 416513536.0,
-            "62": 416513536.0,
-            "63": 416513536.0,
-            "64": 416513536.0,
-            "65": 416513536.0,
-            "66": 416513536.0,
-            "67": 416513536.0,
-            "68": 416513536.0,
-            "69": 416513536.0,
-            "70": 416513536.0,
-            "71": 416513536.0,
-            "72": 416513536.0,
-            "73": 416513536.0,
-            "74": 416513536.0,
-            "75": 416513536.0,
-            "76": 416513536.0,
-            "77": 416513536.0,
-            "78": 416513536.0,
-            "79": 416513536.0,
-            "80": 416513536.0,
-            "81": 416513536.0,
-            "82": 416513536.0,
-            "83": 416513536.0,
-            "84": 416513536.0,
-            "85": 416513536.0,
-            "86": 416513536.0,
-            "87": 416513536.0,
-            "88": 416513536.0,
-            "89": 416513536.0,
-            "90": 416513536.0,
-            "91": 416513536.0,
-            "92": 416513536.0,
-            "93": 416513536.0,
-            "94": 416513536.0,
-            "95": 416513536.0,
-            "96": 416513536.0,
-            "97": 416513536.0,
-            "98": 416513536.0,
-            "99": 416513536.0,
-            "100": 416513536.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1465368064.0,
-            "2": 1465368576.0,
-            "3": 1465368576.0,
-            "4": 1465368576.0,
-            "5": 1465368576.0,
-            "6": 1465368576.0,
-            "7": 1465368576.0,
-            "8": 1465368576.0,
-            "9": 1465368576.0,
-            "10": 1465368576.0,
-            "11": 1465368576.0,
-            "12": 1465368576.0,
-            "13": 1465368576.0,
-            "14": 1465368576.0,
-            "15": 1465368576.0,
-            "16": 1465368576.0,
-            "17": 1597092352.0,
-            "18": 1597092352.0,
-            "19": 1597092352.0,
-            "20": 1597092352.0,
-            "21": 1597092352.0,
-            "22": 1597092352.0,
-            "23": 1597092352.0,
-            "24": 1597092352.0,
-            "25": 1597092352.0,
-            "26": 1597092352.0,
-            "27": 1597092352.0,
-            "28": 1597092352.0,
-            "29": 1597092352.0,
-            "30": 1597092352.0,
-            "31": 1597092352.0,
-            "32": 1597092352.0,
-            "33": 1597092352.0,
-            "34": 1597092352.0,
-            "35": 1597092352.0,
-            "36": 1597092352.0,
-            "37": 1597092352.0,
-            "38": 1597092352.0,
-            "39": 1597092352.0,
-            "40": 1597092352.0,
-            "41": 1597092352.0,
-            "42": 1597092352.0,
-            "43": 1597092352.0,
-            "44": 1597092352.0,
-            "45": 1597092352.0,
-            "46": 1597092352.0,
-            "47": 1597092352.0,
-            "48": 1597092352.0,
-            "49": 1597092352.0,
-            "50": 1597092352.0,
-            "51": 1597092352.0,
-            "52": 1597092352.0,
-            "53": 1597092352.0,
-            "54": 1597092352.0,
-            "55": 1597092352.0,
-            "56": 1597092352.0,
-            "57": 1597092352.0,
-            "58": 1597092352.0,
-            "59": 1597092352.0,
-            "60": 1597092352.0,
-            "61": 1597092352.0,
-            "62": 1597092352.0,
-            "63": 1597092352.0,
-            "64": 1597092352.0,
-            "65": 1597092352.0,
-            "66": 1597092352.0,
-            "67": 1597092352.0,
-            "68": 1597092352.0,
-            "69": 1597092352.0,
-            "70": 1597092352.0,
-            "71": 1597092352.0,
-            "72": 1597092352.0,
-            "73": 1597092352.0,
-            "74": 1597092352.0,
-            "75": 1597092352.0,
-            "76": 1597092352.0,
-            "77": 1597092352.0,
-            "78": 1597092352.0,
-            "79": 1597092352.0,
-            "80": 1597092352.0,
-            "81": 1597092352.0,
-            "82": 1597092352.0,
-            "83": 1597092352.0,
-            "84": 1597092352.0,
-            "85": 1597092352.0,
-            "86": 1597092352.0,
-            "87": 1597092352.0,
-            "88": 1597092352.0,
-            "89": 1597092352.0,
-            "90": 1597092352.0,
-            "91": 1597092352.0,
-            "92": 1597092352.0,
-            "93": 1597092352.0,
-            "94": 1597092352.0,
-            "95": 1597092352.0,
-            "96": 1597092352.0,
-            "97": 1597092352.0,
-            "98": 1597092352.0,
-            "99": 1597092352.0,
-            "100": 1597092352.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 6.88808,
-            "2": 0.20981,
-            "3": 0.18464,
-            "4": 0.18146,
-            "5": 0.18139,
-            "6": 0.18232,
-            "7": 0.18139,
-            "8": 0.19305,
-            "9": 0.20922,
-            "10": 0.21649,
-            "11": 0.21725,
-            "12": 0.21609,
-            "13": 0.21598,
-            "14": 0.20547,
-            "15": 0.17989,
-            "16": 0.28174,
-            "17": 0.18387,
-            "18": 0.18953,
-            "19": 0.18846,
-            "20": 0.19189,
-            "21": 0.19314,
-            "22": 0.18064,
-            "23": 0.18755,
-            "24": 0.18827,
-            "25": 0.18887,
-            "26": 0.19031,
-            "27": 0.1885,
-            "28": 0.18793,
-            "29": 0.19305,
-            "30": 0.19416,
-            "31": 0.19643,
-            "32": 0.1951,
-            "33": 0.19776,
-            "34": 0.1938,
-            "35": 0.19081,
-            "36": 0.19042,
-            "37": 0.18859,
-            "38": 0.19216,
-            "39": 0.1926,
-            "40": 0.19911,
-            "41": 0.19456,
-            "42": 0.19355,
-            "43": 0.1903,
-            "44": 0.1948,
-            "45": 0.19482,
-            "46": 0.19503,
-            "47": 0.19164,
-            "48": 0.19046,
-            "49": 0.19133,
-            "50": 0.19304,
-            "51": 0.19406,
-            "52": 0.20215,
-            "53": 0.18888,
-            "54": 0.19054,
-            "55": 0.1901,
-            "56": 0.18974,
-            "57": 0.18817,
-            "58": 0.18992,
-            "59": 0.18977,
-            "60": 0.19074,
-            "61": 0.1885,
-            "62": 0.18892,
-            "63": 0.18809,
-            "64": 0.19043,
-            "65": 0.19082,
-            "66": 0.19034,
-            "67": 0.19393,
-            "68": 0.18998,
-            "69": 0.19445,
-            "70": 0.19067,
-            "71": 0.19176,
-            "72": 0.18979,
-            "73": 0.18866,
-            "74": 0.18912,
-            "75": 0.19329,
-            "76": 0.19148,
-            "77": 0.19217,
-            "78": 0.18942,
-            "79": 0.19141,
-            "80": 0.19297,
-            "81": 0.19247,
-            "82": 0.19228,
-            "83": 0.19275,
-            "84": 0.19196,
-            "85": 0.19648,
-            "86": 0.20088,
-            "87": 0.20172,
-            "88": 0.1985,
-            "89": 0.20262,
-            "90": 0.20618,
-            "91": 0.19394,
-            "92": 0.1911,
-            "93": 0.19148,
-            "94": 0.50543,
-            "95": 0.19162,
-            "96": 0.19339,
-            "97": 0.1931,
-            "98": 0.19152,
-            "99": 0.19182,
-            "100": 0.1939
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": "nan",
-            "2": "nan",
-            "3": "nan",
-            "4": "nan",
-            "5": "nan",
-            "6": "nan",
-            "7": "nan",
-            "8": "nan",
-            "9": "nan",
-            "10": "nan",
-            "11": "nan",
-            "12": "nan",
-            "13": "nan",
-            "14": "nan",
-            "15": "nan",
-            "16": 2365.0,
-            "17": "nan",
-            "18": 2331.0,
-            "19": 2912.0,
-            "20": 1664.0,
-            "21": 2009.0,
-            "22": "nan",
-            "23": 2483.0,
-            "24": 2192.0,
-            "25": 2290.0,
-            "26": 1916.0,
-            "27": 2020.0,
-            "28": 2503.0,
-            "29": 2379.0,
-            "30": 2400.0,
-            "31": 1759.0,
-            "32": 2522.0,
-            "33": 2145.0,
-            "34": 1791.0,
-            "35": 1777.0,
-            "36": 2100.0,
-            "37": 2396.0,
-            "38": 2040.0,
-            "39": 2983.0,
-            "40": 1805.0,
-            "41": 3097.0,
-            "42": 2421.0,
-            "43": 2566.0,
-            "44": 1858.0,
-            "45": 2371.0,
-            "46": 2140.0,
-            "47": 2603.0,
-            "48": 2358.0,
-            "49": 1739.0,
-            "50": 2686.0,
-            "51": 2041.0,
-            "52": 2226.0,
-            "53": 3222.0,
-            "54": 2784.0,
-            "55": 2290.0,
-            "56": 2428.0,
-            "57": 2146.0,
-            "58": 3048.0,
-            "59": 2504.0,
-            "60": 2612.0,
-            "61": 2623.0,
-            "62": 3003.0,
-            "63": 2762.0,
-            "64": 2917.0,
-            "65": 2104.0,
-            "66": 3550.0,
-            "67": 2433.0,
-            "68": 3146.0,
-            "69": 2877.0,
-            "70": 3528.0,
-            "71": 2983.0,
-            "72": 2640.0,
-            "73": 3199.0,
-            "74": 2084.0,
-            "75": 2809.0,
-            "76": 3599.0,
-            "77": 3667.0,
-            "78": 3680.0,
-            "79": 3972.0,
-            "80": 3365.0,
-            "81": 5042.0,
-            "82": 3291.0,
-            "83": 3016.0,
-            "84": 3592.0,
-            "85": 3792.0,
-            "86": 3192.0,
-            "87": 4219.0,
-            "88": 3376.0,
-            "89": 4110.0,
-            "90": 3939.0,
-            "91": 2912.0,
-            "92": 4114.0,
-            "93": 3499.0,
-            "94": 4339.0,
-            "95": 3829.0,
-            "96": 3875.0,
-            "97": 4100.0,
-            "98": 4889.0,
-            "99": 3771.0,
-            "100": 3390.0
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index cb0ad3fdb4b..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.84277,
-            "2": 10.85562,
-            "3": 10.84568,
-            "4": 10.84364,
-            "5": 10.85979,
-            "6": 10.86413,
-            "7": 10.85362,
-            "8": 10.85066,
-            "9": 10.8615,
-            "10": 10.82586,
-            "11": 10.86811,
-            "12": 10.85685,
-            "13": 10.87827,
-            "14": 10.86894,
-            "15": 10.85888,
-            "16": 10.8685,
-            "17": 10.85105,
-            "18": 10.85939,
-            "19": 10.85704,
-            "20": 10.84526,
-            "21": 10.85808,
-            "22": 10.83215,
-            "23": 10.86717,
-            "24": 10.83773,
-            "25": 10.82744,
-            "26": 10.83163,
-            "27": 10.83573,
-            "28": 10.82373,
-            "29": 10.81624,
-            "30": 10.76486,
-            "31": 10.69044,
-            "32": 10.76257,
-            "33": 10.75455,
-            "34": 10.67733,
-            "35": 10.66335,
-            "36": 10.63634,
-            "37": 10.66856,
-            "38": 10.5969,
-            "39": 10.67599,
-            "40": 10.50898,
-            "41": 10.53945,
-            "42": 10.55263,
-            "43": 10.35003,
-            "44": 10.40418,
-            "45": 10.32106,
-            "46": 10.27724,
-            "47": 10.45205,
-            "48": 10.28913,
-            "49": 10.05779,
-            "50": 10.27777,
-            "51": 10.23471,
-            "52": 10.13764,
-            "53": 10.34797,
-            "54": 10.26738,
-            "55": 10.20734,
-            "56": 9.99527,
-            "57": 9.89333,
-            "58": 10.13452,
-            "59": 9.92856,
-            "60": 9.8551,
-            "61": 9.98264,
-            "62": 10.20686,
-            "63": 9.70842,
-            "64": 10.01687,
-            "65": 9.30409,
-            "66": 9.93326,
-            "67": 9.62677,
-            "68": 9.98429,
-            "69": 9.9755,
-            "70": 9.93956,
-            "71": 9.81005,
-            "72": 9.798,
-            "73": 9.68454,
-            "74": 9.19951,
-            "75": 9.60518,
-            "76": 9.27791,
-            "77": 10.19437,
-            "78": 9.8671,
-            "79": 9.53341,
-            "80": 9.56341,
-            "81": 9.63047,
-            "82": 9.82819,
-            "83": 9.46388,
-            "84": 9.53736,
-            "85": 9.74561,
-            "86": 9.21332,
-            "87": 9.7014,
-            "88": 9.86621,
-            "89": 9.72242,
-            "90": 9.92089,
-            "91": 9.47178,
-            "92": 9.46996,
-            "93": 9.20589,
-            "94": 8.94772,
-            "95": 9.60815,
-            "96": 9.63635,
-            "97": 9.4138,
-            "98": 9.77274,
-            "99": 8.9958,
-            "100": 9.50415
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 284527616.0,
-            "2": 284527616.0,
-            "3": 284527616.0,
-            "4": 284527616.0,
-            "5": 284527616.0,
-            "6": 284527616.0,
-            "7": 284527616.0,
-            "8": 284527616.0,
-            "9": 284527616.0,
-            "10": 284527616.0,
-            "11": 284527616.0,
-            "12": 284527616.0,
-            "13": 284527616.0,
-            "14": 284527616.0,
-            "15": 284527616.0,
-            "16": 416513536.0,
-            "17": 416513536.0,
-            "18": 416513536.0,
-            "19": 416513536.0,
-            "20": 416513536.0,
-            "21": 416513536.0,
-            "22": 416513536.0,
-            "23": 416513536.0,
-            "24": 416513536.0,
-            "25": 416513536.0,
-            "26": 416513536.0,
-            "27": 416513536.0,
-            "28": 416513536.0,
-            "29": 416513536.0,
-            "30": 416513536.0,
-            "31": 416513536.0,
-            "32": 416513536.0,
-            "33": 416513536.0,
-            "34": 416513536.0,
-            "35": 416513536.0,
-            "36": 416513536.0,
-            "37": 416513536.0,
-            "38": 416513536.0,
-            "39": 416513536.0,
-            "40": 416513536.0,
-            "41": 416513536.0,
-            "42": 416513536.0,
-            "43": 416513536.0,
-            "44": 416513536.0,
-            "45": 416513536.0,
-            "46": 416513536.0,
-            "47": 416513536.0,
-            "48": 416513536.0,
-            "49": 416513536.0,
-            "50": 416513536.0,
-            "51": 416513536.0,
-            "52": 416513536.0,
-            "53": 416513536.0,
-            "54": 416513536.0,
-            "55": 416513536.0,
-            "56": 416513536.0,
-            "57": 416513536.0,
-            "58": 416513536.0,
-            "59": 416513536.0,
-            "60": 416513536.0,
-            "61": 416513536.0,
-            "62": 416513536.0,
-            "63": 416513536.0,
-            "64": 416513536.0,
-            "65": 416513536.0,
-            "66": 416513536.0,
-            "67": 416513536.0,
-            "68": 416513536.0,
-            "69": 416513536.0,
-            "70": 416513536.0,
-            "71": 416513536.0,
-            "72": 416513536.0,
-            "73": 416513536.0,
-            "74": 416513536.0,
-            "75": 416513536.0,
-            "76": 416513536.0,
-            "77": 416513536.0,
-            "78": 416513536.0,
-            "79": 416513536.0,
-            "80": 416513536.0,
-            "81": 416513536.0,
-            "82": 416513536.0,
-            "83": 416513536.0,
-            "84": 416513536.0,
-            "85": 416513536.0,
-            "86": 416513536.0,
-            "87": 416513536.0,
-            "88": 416513536.0,
-            "89": 416513536.0,
-            "90": 416513536.0,
-            "91": 416513536.0,
-            "92": 416513536.0,
-            "93": 416513536.0,
-            "94": 416513536.0,
-            "95": 416513536.0,
-            "96": 416513536.0,
-            "97": 416513536.0,
-            "98": 416513536.0,
-            "99": 416513536.0,
-            "100": 416513536.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1465368064.0,
-            "2": 1465368576.0,
-            "3": 1465368576.0,
-            "4": 1465368576.0,
-            "5": 1465368576.0,
-            "6": 1465368576.0,
-            "7": 1465368576.0,
-            "8": 1465368576.0,
-            "9": 1465368576.0,
-            "10": 1465368576.0,
-            "11": 1465368576.0,
-            "12": 1465368576.0,
-            "13": 1465368576.0,
-            "14": 1465368576.0,
-            "15": 1465368576.0,
-            "16": 1465368576.0,
-            "17": 1597092352.0,
-            "18": 1597092352.0,
-            "19": 1597092352.0,
-            "20": 1597092352.0,
-            "21": 1597092352.0,
-            "22": 1597092352.0,
-            "23": 1597092352.0,
-            "24": 1597092352.0,
-            "25": 1597092352.0,
-            "26": 1597092352.0,
-            "27": 1597092352.0,
-            "28": 1597092352.0,
-            "29": 1597092352.0,
-            "30": 1597092352.0,
-            "31": 1597092352.0,
-            "32": 1597092352.0,
-            "33": 1597092352.0,
-            "34": 1597092352.0,
-            "35": 1597092352.0,
-            "36": 1597092352.0,
-            "37": 1597092352.0,
-            "38": 1597092352.0,
-            "39": 1597092352.0,
-            "40": 1597092352.0,
-            "41": 1597092352.0,
-            "42": 1597092352.0,
-            "43": 1597092352.0,
-            "44": 1597092352.0,
-            "45": 1597092352.0,
-            "46": 1597092352.0,
-            "47": 1597092352.0,
-            "48": 1597092352.0,
-            "49": 1597092352.0,
-            "50": 1597092352.0,
-            "51": 1597092352.0,
-            "52": 1597092352.0,
-            "53": 1597092352.0,
-            "54": 1597092352.0,
-            "55": 1597092352.0,
-            "56": 1597092352.0,
-            "57": 1597092352.0,
-            "58": 1597092352.0,
-            "59": 1597092352.0,
-            "60": 1597092352.0,
-            "61": 1597092352.0,
-            "62": 1597092352.0,
-            "63": 1597092352.0,
-            "64": 1597092352.0,
-            "65": 1597092352.0,
-            "66": 1597092352.0,
-            "67": 1597092352.0,
-            "68": 1597092352.0,
-            "69": 1597092352.0,
-            "70": 1597092352.0,
-            "71": 1597092352.0,
-            "72": 1597092352.0,
-            "73": 1597092352.0,
-            "74": 1597092352.0,
-            "75": 1597092352.0,
-            "76": 1597092352.0,
-            "77": 1597092352.0,
-            "78": 1597092352.0,
-            "79": 1597092352.0,
-            "80": 1597092352.0,
-            "81": 1597092352.0,
-            "82": 1597092352.0,
-            "83": 1597092352.0,
-            "84": 1597092352.0,
-            "85": 1597092352.0,
-            "86": 1597092352.0,
-            "87": 1597092352.0,
-            "88": 1597092352.0,
-            "89": 1597092352.0,
-            "90": 1597092352.0,
-            "91": 1597092352.0,
-            "92": 1597092352.0,
-            "93": 1597092352.0,
-            "94": 1597092352.0,
-            "95": 1597092352.0,
-            "96": 1597092352.0,
-            "97": 1597092352.0,
-            "98": 1597092352.0,
-            "99": 1597092352.0,
-            "100": 1597092352.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 6.78805,
-            "2": 0.23224,
-            "3": 0.20783,
-            "4": 0.21971,
-            "5": 0.22246,
-            "6": 0.23346,
-            "7": 0.21626,
-            "8": 0.20597,
-            "9": 0.2043,
-            "10": 0.20681,
-            "11": 0.20511,
-            "12": 0.20484,
-            "13": 0.21351,
-            "14": 0.20446,
-            "15": 0.21063,
-            "16": 0.28338,
-            "17": 0.21017,
-            "18": 0.21577,
-            "19": 0.21852,
-            "20": 0.23072,
-            "21": 0.25974,
-            "22": 0.21717,
-            "23": 0.22548,
-            "24": 0.21878,
-            "25": 0.21448,
-            "26": 0.21416,
-            "27": 0.22357,
-            "28": 0.21645,
-            "29": 0.21325,
-            "30": 0.21465,
-            "31": 0.21452,
-            "32": 0.21608,
-            "33": 0.23531,
-            "34": 0.227,
-            "35": 0.2188,
-            "36": 0.21248,
-            "37": 0.21694,
-            "38": 0.21269,
-            "39": 0.22285,
-            "40": 0.21458,
-            "41": 0.2134,
-            "42": 0.21991,
-            "43": 0.21621,
-            "44": 0.21422,
-            "45": 0.21339,
-            "46": 0.21332,
-            "47": 0.21892,
-            "48": 0.21384,
-            "49": 0.21668,
-            "50": 0.21806,
-            "51": 0.21958,
-            "52": 0.2173,
-            "53": 0.21642,
-            "54": 0.22157,
-            "55": 0.21549,
-            "56": 0.21528,
-            "57": 0.21789,
-            "58": 0.21634,
-            "59": 0.21649,
-            "60": 0.2141,
-            "61": 0.21447,
-            "62": 0.21596,
-            "63": 0.21545,
-            "64": 0.22145,
-            "65": 0.21603,
-            "66": 0.21504,
-            "67": 0.21551,
-            "68": 0.21918,
-            "69": 0.21831,
-            "70": 0.21943,
-            "71": 0.21537,
-            "72": 0.21937,
-            "73": 0.21783,
-            "74": 0.2246,
-            "75": 0.22031,
-            "76": 0.23249,
-            "77": 0.21862,
-            "78": 0.21663,
-            "79": 0.21806,
-            "80": 0.21694,
-            "81": 0.21684,
-            "82": 0.21559,
-            "83": 0.21877,
-            "84": 0.2151,
-            "85": 0.21819,
-            "86": 0.2167,
-            "87": 0.21768,
-            "88": 0.21415,
-            "89": 0.21694,
-            "90": 0.21444,
-            "91": 0.21616,
-            "92": 0.21967,
-            "93": 0.21672,
-            "94": 0.21699,
-            "95": 0.21892,
-            "96": 0.21871,
-            "97": 0.21805,
-            "98": 0.21674,
-            "99": 0.21639,
-            "100": 0.21581
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": "nan",
-            "2": "nan",
-            "3": "nan",
-            "4": "nan",
-            "5": "nan",
-            "6": "nan",
-            "7": "nan",
-            "8": "nan",
-            "9": "nan",
-            "10": "nan",
-            "11": "nan",
-            "12": "nan",
-            "13": "nan",
-            "14": "nan",
-            "15": "nan",
-            "16": 2365.0,
-            "17": "nan",
-            "18": 2331.0,
-            "19": 2912.0,
-            "20": 1664.0,
-            "21": 2009.0,
-            "22": "nan",
-            "23": 2483.0,
-            "24": 2192.0,
-            "25": 2290.0,
-            "26": 1916.0,
-            "27": 2020.0,
-            "28": 2503.0,
-            "29": 2379.0,
-            "30": 2400.0,
-            "31": 1759.0,
-            "32": 2522.0,
-            "33": 2145.0,
-            "34": 1791.0,
-            "35": 1777.0,
-            "36": 2100.0,
-            "37": 2396.0,
-            "38": 2040.0,
-            "39": 2983.0,
-            "40": 1805.0,
-            "41": 3097.0,
-            "42": 2421.0,
-            "43": 2566.0,
-            "44": 1858.0,
-            "45": 2371.0,
-            "46": 2140.0,
-            "47": 2603.0,
-            "48": 2358.0,
-            "49": 1739.0,
-            "50": 2686.0,
-            "51": 2041.0,
-            "52": 2226.0,
-            "53": 3222.0,
-            "54": 2784.0,
-            "55": 2290.0,
-            "56": 2428.0,
-            "57": 2146.0,
-            "58": 3048.0,
-            "59": 2504.0,
-            "60": 2612.0,
-            "61": 2623.0,
-            "62": 3003.0,
-            "63": 2762.0,
-            "64": 2917.0,
-            "65": 2104.0,
-            "66": 3550.0,
-            "67": 2433.0,
-            "68": 3146.0,
-            "69": 2877.0,
-            "70": 3528.0,
-            "71": 2983.0,
-            "72": 2640.0,
-            "73": 3199.0,
-            "74": 2084.0,
-            "75": 2809.0,
-            "76": 3599.0,
-            "77": 3667.0,
-            "78": 3680.0,
-            "79": 3972.0,
-            "80": 3365.0,
-            "81": 5042.0,
-            "82": 3291.0,
-            "83": 3016.0,
-            "84": 3592.0,
-            "85": 3792.0,
-            "86": 3192.0,
-            "87": 4219.0,
-            "88": 3376.0,
-            "89": 4110.0,
-            "90": 3939.0,
-            "91": 2912.0,
-            "92": 4114.0,
-            "93": 3499.0,
-            "94": 4339.0,
-            "95": 3829.0,
-            "96": 3875.0,
-            "97": 4100.0,
-            "98": 4889.0,
-            "99": 3771.0,
-            "100": 3390.0
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 3cc493fd85b..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,10037 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 2000,
-        "step_interval": 1,
-        "values": {
-            "1": 10.85954,
-            "2": 10.88017,
-            "3": 10.87732,
-            "4": 10.8999,
-            "5": 10.88699,
-            "6": 10.87335,
-            "7": 10.88219,
-            "8": 10.87225,
-            "9": 10.87277,
-            "10": 10.87494,
-            "11": 10.85221,
-            "12": 10.84405,
-            "13": 10.84222,
-            "14": 10.86461,
-            "15": 10.78656,
-            "16": 10.81059,
-            "17": 10.77436,
-            "18": 10.81246,
-            "19": 10.72203,
-            "20": 10.69596,
-            "21": 10.64272,
-            "22": 10.64956,
-            "23": 10.65288,
-            "24": 10.54233,
-            "25": 10.55491,
-            "26": 10.63818,
-            "27": 10.44117,
-            "28": 10.46928,
-            "29": 10.34986,
-            "30": 10.24645,
-            "31": 10.42625,
-            "32": 10.33791,
-            "33": 10.19559,
-            "34": 10.14074,
-            "35": 10.22182,
-            "36": 10.13202,
-            "37": 10.07533,
-            "38": 10.01538,
-            "39": 10.02986,
-            "40": 10.05768,
-            "41": 9.93219,
-            "42": 9.93962,
-            "43": 9.8498,
-            "44": 9.97902,
-            "45": 9.99946,
-            "46": 9.83276,
-            "47": 9.99696,
-            "48": 9.80958,
-            "49": 9.94884,
-            "50": 9.94537,
-            "51": 9.58197,
-            "52": 9.79331,
-            "53": 9.62548,
-            "54": 9.88686,
-            "55": 9.73482,
-            "56": 9.84492,
-            "57": 9.85708,
-            "58": 9.87627,
-            "59": 9.54205,
-            "60": 9.64489,
-            "61": 9.88334,
-            "62": 9.75928,
-            "63": 9.68107,
-            "64": 9.82461,
-            "65": 9.59476,
-            "66": 9.62868,
-            "67": 9.74002,
-            "68": 9.60205,
-            "69": 9.29216,
-            "70": 9.42139,
-            "71": 9.78753,
-            "72": 9.7124,
-            "73": 9.61815,
-            "74": 9.44773,
-            "75": 9.23898,
-            "76": 9.50824,
-            "77": 9.5795,
-            "78": 9.56058,
-            "79": 9.30801,
-            "80": 9.35768,
-            "81": 9.45813,
-            "82": 9.55358,
-            "83": 9.53407,
-            "84": 9.35442,
-            "85": 9.3992,
-            "86": 9.65282,
-            "87": 9.23449,
-            "88": 9.48753,
-            "89": 9.22214,
-            "90": 9.41067,
-            "91": 9.38753,
-            "92": 9.37682,
-            "93": 9.36024,
-            "94": 9.51507,
-            "95": 9.42125,
-            "96": 9.33616,
-            "97": 9.20399,
-            "98": 9.4954,
-            "99": 9.29284,
-            "100": 9.35905,
-            "101": 9.24757,
-            "102": 9.24676,
-            "103": 9.07735,
-            "104": 9.16669,
-            "105": 9.37858,
-            "106": 9.1496,
-            "107": 9.1756,
-            "108": 9.316,
-            "109": 9.29109,
-            "110": 9.36426,
-            "111": 9.17995,
-            "112": 9.23471,
-            "113": 9.35297,
-            "114": 9.35265,
-            "115": 9.32672,
-            "116": 9.00223,
-            "117": 9.06476,
-            "118": 9.06643,
-            "119": 9.22418,
-            "120": 9.08485,
-            "121": 9.19671,
-            "122": 9.14164,
-            "123": 9.25933,
-            "124": 9.45506,
-            "125": 9.21512,
-            "126": 9.06416,
-            "127": 9.01814,
-            "128": 9.22131,
-            "129": 8.98184,
-            "130": 9.13972,
-            "131": 9.15856,
-            "132": 9.03559,
-            "133": 8.85977,
-            "134": 9.18539,
-            "135": 8.88999,
-            "136": 9.16801,
-            "137": 9.15771,
-            "138": 9.23511,
-            "139": 9.09197,
-            "140": 8.87218,
-            "141": 9.29906,
-            "142": 9.19961,
-            "143": 9.1169,
-            "144": 9.24305,
-            "145": 9.10446,
-            "146": 8.98709,
-            "147": 8.98617,
-            "148": 9.13261,
-            "149": 9.06335,
-            "150": 9.01504,
-            "151": 8.92787,
-            "152": 8.8739,
-            "153": 9.06335,
-            "154": 9.17913,
-            "155": 9.13381,
-            "156": 9.04889,
-            "157": 9.15064,
-            "158": 9.04955,
-            "159": 9.03261,
-            "160": 8.88987,
-            "161": 9.04543,
-            "162": 8.89584,
-            "163": 8.84272,
-            "164": 8.97534,
-            "165": 8.93132,
-            "166": 8.65959,
-            "167": 8.83243,
-            "168": 8.81953,
-            "169": 8.6566,
-            "170": 9.04622,
-            "171": 8.72286,
-            "172": 8.82159,
-            "173": 8.91163,
-            "174": 8.84751,
-            "175": 8.70611,
-            "176": 8.75439,
-            "177": 8.7626,
-            "178": 8.7201,
-            "179": 8.64046,
-            "180": 8.74053,
-            "181": 8.69404,
-            "182": 8.72193,
-            "183": 9.08364,
-            "184": 8.6088,
-            "185": 8.88346,
-            "186": 8.74191,
-            "187": 8.56949,
-            "188": 8.67975,
-            "189": 8.86478,
-            "190": 8.53542,
-            "191": 8.66632,
-            "192": 8.61266,
-            "193": 8.57469,
-            "194": 8.75195,
-            "195": 8.59279,
-            "196": 8.77393,
-            "197": 8.74234,
-            "198": 8.62722,
-            "199": 8.77454,
-            "200": 8.73803,
-            "201": 8.66979,
-            "202": 8.54593,
-            "203": 8.54185,
-            "204": 8.71307,
-            "205": 8.2228,
-            "206": 8.8603,
-            "207": 8.68157,
-            "208": 8.70896,
-            "209": 8.75303,
-            "210": 8.57807,
-            "211": 8.84258,
-            "212": 8.49127,
-            "213": 8.57327,
-            "214": 8.51199,
-            "215": 8.5645,
-            "216": 8.50863,
-            "217": 8.53183,
-            "218": 8.52998,
-            "219": 8.64367,
-            "220": 8.54746,
-            "221": 8.39991,
-            "222": 8.50528,
-            "223": 8.43775,
-            "224": 8.53014,
-            "225": 8.57091,
-            "226": 8.4394,
-            "227": 8.67918,
-            "228": 8.38473,
-            "229": 8.45045,
-            "230": 8.49717,
-            "231": 8.49832,
-            "232": 8.49783,
-            "233": 8.49539,
-            "234": 8.63795,
-            "235": 8.55875,
-            "236": 8.39461,
-            "237": 8.48826,
-            "238": 8.30522,
-            "239": 8.562,
-            "240": 8.66952,
-            "241": 8.44144,
-            "242": 8.47219,
-            "243": 8.51768,
-            "244": 8.36825,
-            "245": 8.59274,
-            "246": 8.59497,
-            "247": 8.44008,
-            "248": 8.51279,
-            "249": 8.52035,
-            "250": 8.42183,
-            "251": 8.37751,
-            "252": 8.54393,
-            "253": 8.31454,
-            "254": 8.351,
-            "255": 8.29005,
-            "256": 8.20261,
-            "257": 8.394,
-            "258": 8.45386,
-            "259": 8.23708,
-            "260": 8.2437,
-            "261": 8.23617,
-            "262": 8.34919,
-            "263": 8.30683,
-            "264": 8.18831,
-            "265": 8.33481,
-            "266": 8.23369,
-            "267": 7.89923,
-            "268": 8.38063,
-            "269": 8.40466,
-            "270": 8.26271,
-            "271": 8.279,
-            "272": 8.32109,
-            "273": 8.13747,
-            "274": 8.09677,
-            "275": 8.01372,
-            "276": 7.92611,
-            "277": 8.24041,
-            "278": 8.05017,
-            "279": 7.96688,
-            "280": 7.75652,
-            "281": 8.10713,
-            "282": 8.15049,
-            "283": 8.15621,
-            "284": 8.10354,
-            "285": 8.07234,
-            "286": 7.90454,
-            "287": 7.9963,
-            "288": 8.24862,
-            "289": 8.17575,
-            "290": 8.13093,
-            "291": 8.25763,
-            "292": 8.08131,
-            "293": 8.12059,
-            "294": 7.98178,
-            "295": 7.97108,
-            "296": 8.24114,
-            "297": 7.79647,
-            "298": 8.04847,
-            "299": 7.94257,
-            "300": 7.85748,
-            "301": 8.01649,
-            "302": 7.95112,
-            "303": 7.99606,
-            "304": 7.96394,
-            "305": 8.00301,
-            "306": 7.98312,
-            "307": 7.99372,
-            "308": 8.00491,
-            "309": 8.01362,
-            "310": 7.97824,
-            "311": 7.9323,
-            "312": 7.89419,
-            "313": 7.84054,
-            "314": 7.83,
-            "315": 7.8335,
-            "316": 7.75122,
-            "317": 7.934,
-            "318": 7.98841,
-            "319": 7.83343,
-            "320": 7.57896,
-            "321": 7.75427,
-            "322": 7.83781,
-            "323": 7.7769,
-            "324": 7.91623,
-            "325": 7.80539,
-            "326": 7.65641,
-            "327": 7.86989,
-            "328": 7.79369,
-            "329": 7.89137,
-            "330": 7.7586,
-            "331": 7.52885,
-            "332": 7.81946,
-            "333": 7.84359,
-            "334": 7.68375,
-            "335": 7.69975,
-            "336": 7.91931,
-            "337": 7.65356,
-            "338": 7.90277,
-            "339": 7.7307,
-            "340": 7.7606,
-            "341": 7.70898,
-            "342": 7.82827,
-            "343": 7.61824,
-            "344": 7.58818,
-            "345": 7.61602,
-            "346": 7.46415,
-            "347": 7.5612,
-            "348": 7.68737,
-            "349": 7.58361,
-            "350": 7.65762,
-            "351": 7.75424,
-            "352": 7.711,
-            "353": 7.50477,
-            "354": 7.74925,
-            "355": 7.77011,
-            "356": 7.78305,
-            "357": 7.81855,
-            "358": 7.60031,
-            "359": 7.55187,
-            "360": 7.63213,
-            "361": 7.55298,
-            "362": 7.76875,
-            "363": 7.59465,
-            "364": 7.57928,
-            "365": 7.62839,
-            "366": 7.31096,
-            "367": 7.55919,
-            "368": 7.44577,
-            "369": 7.3551,
-            "370": 7.46985,
-            "371": 7.46609,
-            "372": 7.65475,
-            "373": 7.52989,
-            "374": 7.44843,
-            "375": 7.53627,
-            "376": 7.35288,
-            "377": 7.24313,
-            "378": 7.54312,
-            "379": 7.4994,
-            "380": 7.38859,
-            "381": 7.47577,
-            "382": 7.29951,
-            "383": 7.28478,
-            "384": 7.4126,
-            "385": 7.39829,
-            "386": 7.23652,
-            "387": 7.42535,
-            "388": 7.28487,
-            "389": 7.44425,
-            "390": 7.24578,
-            "391": 7.6482,
-            "392": 7.34245,
-            "393": 7.42463,
-            "394": 7.48248,
-            "395": 7.44483,
-            "396": 7.29231,
-            "397": 7.23386,
-            "398": 7.42507,
-            "399": 7.16173,
-            "400": 7.30149,
-            "401": 7.3585,
-            "402": 7.39832,
-            "403": 7.28806,
-            "404": 7.30832,
-            "405": 7.27202,
-            "406": 7.22485,
-            "407": 7.36688,
-            "408": 7.18877,
-            "409": 7.17334,
-            "410": 7.31999,
-            "411": 7.2223,
-            "412": 7.20595,
-            "413": 7.24047,
-            "414": 6.9176,
-            "415": 7.3341,
-            "416": 7.43139,
-            "417": 7.0298,
-            "418": 7.28201,
-            "419": 7.04286,
-            "420": 7.41864,
-            "421": 7.18456,
-            "422": 7.24003,
-            "423": 7.09785,
-            "424": 7.24581,
-            "425": 7.32182,
-            "426": 7.29342,
-            "427": 7.1359,
-            "428": 7.09617,
-            "429": 6.87976,
-            "430": 7.20691,
-            "431": 7.00662,
-            "432": 7.23762,
-            "433": 6.97996,
-            "434": 6.96131,
-            "435": 7.02219,
-            "436": 7.01484,
-            "437": 6.9921,
-            "438": 7.00514,
-            "439": 6.94235,
-            "440": 7.06367,
-            "441": 7.04936,
-            "442": 7.10187,
-            "443": 7.0941,
-            "444": 6.71175,
-            "445": 6.99825,
-            "446": 7.14631,
-            "447": 7.12745,
-            "448": 6.98621,
-            "449": 7.0508,
-            "450": 7.01761,
-            "451": 6.83255,
-            "452": 6.9157,
-            "453": 7.02056,
-            "454": 6.97019,
-            "455": 7.03145,
-            "456": 6.99451,
-            "457": 6.97283,
-            "458": 6.9066,
-            "459": 6.69482,
-            "460": 7.06773,
-            "461": 7.09857,
-            "462": 6.87116,
-            "463": 7.05522,
-            "464": 6.64922,
-            "465": 7.02852,
-            "466": 7.00594,
-            "467": 6.99935,
-            "468": 6.95215,
-            "469": 6.8291,
-            "470": 7.04615,
-            "471": 6.88316,
-            "472": 6.96104,
-            "473": 6.82398,
-            "474": 6.97228,
-            "475": 7.16917,
-            "476": 6.76379,
-            "477": 6.89771,
-            "478": 6.91142,
-            "479": 6.70396,
-            "480": 7.03025,
-            "481": 6.99763,
-            "482": 6.73608,
-            "483": 6.78502,
-            "484": 6.75413,
-            "485": 6.93205,
-            "486": 7.06796,
-            "487": 6.63653,
-            "488": 6.88737,
-            "489": 6.77108,
-            "490": 6.82685,
-            "491": 6.71122,
-            "492": 6.69849,
-            "493": 6.77155,
-            "494": 6.67651,
-            "495": 6.63733,
-            "496": 6.59006,
-            "497": 6.84564,
-            "498": 6.65256,
-            "499": 6.85952,
-            "500": 6.65795,
-            "501": 6.73562,
-            "502": 6.84527,
-            "503": 6.71173,
-            "504": 6.62075,
-            "505": 6.62291,
-            "506": 6.75234,
-            "507": 6.86844,
-            "508": 6.86157,
-            "509": 6.6555,
-            "510": 6.82834,
-            "511": 6.74132,
-            "512": 6.74051,
-            "513": 6.66032,
-            "514": 6.71273,
-            "515": 6.45045,
-            "516": 6.74436,
-            "517": 6.71073,
-            "518": 6.53817,
-            "519": 6.63527,
-            "520": 6.85868,
-            "521": 6.66571,
-            "522": 6.70871,
-            "523": 6.74553,
-            "524": 6.73396,
-            "525": 6.6762,
-            "526": 6.4139,
-            "527": 6.79901,
-            "528": 6.66011,
-            "529": 6.63182,
-            "530": 6.62611,
-            "531": 6.64289,
-            "532": 6.63292,
-            "533": 6.76391,
-            "534": 6.61301,
-            "535": 6.74754,
-            "536": 6.62605,
-            "537": 6.63867,
-            "538": 6.53166,
-            "539": 6.5542,
-            "540": 6.5862,
-            "541": 6.45207,
-            "542": 6.66957,
-            "543": 6.68064,
-            "544": 6.67601,
-            "545": 6.81307,
-            "546": 6.63333,
-            "547": 6.41838,
-            "548": 6.72367,
-            "549": 6.69982,
-            "550": 6.52974,
-            "551": 6.7478,
-            "552": 6.63991,
-            "553": 6.48451,
-            "554": 6.63407,
-            "555": 6.4629,
-            "556": 6.61792,
-            "557": 6.63496,
-            "558": 6.3874,
-            "559": 6.37379,
-            "560": 6.58293,
-            "561": 6.73352,
-            "562": 6.6356,
-            "563": 6.7444,
-            "564": 6.35291,
-            "565": 6.51482,
-            "566": 6.70247,
-            "567": 6.56973,
-            "568": 6.51145,
-            "569": 6.45578,
-            "570": 6.36768,
-            "571": 6.63597,
-            "572": 6.31359,
-            "573": 6.58668,
-            "574": 6.47613,
-            "575": 6.64961,
-            "576": 6.5168,
-            "577": 6.53078,
-            "578": 6.4847,
-            "579": 6.46709,
-            "580": 6.56793,
-            "581": 6.60857,
-            "582": 6.48362,
-            "583": 6.51541,
-            "584": 6.52831,
-            "585": 6.42713,
-            "586": 6.4178,
-            "587": 6.46113,
-            "588": 6.56878,
-            "589": 6.62653,
-            "590": 6.29114,
-            "591": 6.67541,
-            "592": 6.26902,
-            "593": 6.4773,
-            "594": 6.38719,
-            "595": 6.3632,
-            "596": 6.26099,
-            "597": 6.18986,
-            "598": 6.45726,
-            "599": 6.3998,
-            "600": 6.45709,
-            "601": 6.26132,
-            "602": 6.5338,
-            "603": 6.52288,
-            "604": 6.38993,
-            "605": 6.49993,
-            "606": 6.31475,
-            "607": 6.53507,
-            "608": 6.67525,
-            "609": 6.17714,
-            "610": 6.57295,
-            "611": 6.40188,
-            "612": 6.57929,
-            "613": 6.42667,
-            "614": 6.20672,
-            "615": 6.40081,
-            "616": 6.36019,
-            "617": 6.37969,
-            "618": 6.4512,
-            "619": 6.14244,
-            "620": 6.41233,
-            "621": 6.46338,
-            "622": 6.40096,
-            "623": 6.58352,
-            "624": 6.36078,
-            "625": 6.28553,
-            "626": 6.30525,
-            "627": 6.44574,
-            "628": 6.2557,
-            "629": 6.58813,
-            "630": 6.36641,
-            "631": 6.3498,
-            "632": 6.30972,
-            "633": 6.25733,
-            "634": 6.30887,
-            "635": 6.54592,
-            "636": 6.24834,
-            "637": 6.63634,
-            "638": 6.02046,
-            "639": 6.2798,
-            "640": 6.29548,
-            "641": 6.20953,
-            "642": 6.28471,
-            "643": 6.461,
-            "644": 6.25863,
-            "645": 6.25115,
-            "646": 6.40601,
-            "647": 6.33707,
-            "648": 6.35671,
-            "649": 6.3488,
-            "650": 6.48415,
-            "651": 6.33395,
-            "652": 6.25233,
-            "653": 6.3826,
-            "654": 6.45063,
-            "655": 6.52494,
-            "656": 6.32781,
-            "657": 6.43503,
-            "658": 6.24353,
-            "659": 6.1554,
-            "660": 6.39397,
-            "661": 6.17184,
-            "662": 6.27494,
-            "663": 6.37237,
-            "664": 6.33376,
-            "665": 6.40442,
-            "666": 6.16399,
-            "667": 6.1965,
-            "668": 6.2366,
-            "669": 6.21813,
-            "670": 6.24601,
-            "671": 6.24468,
-            "672": 6.49032,
-            "673": 6.34071,
-            "674": 6.2969,
-            "675": 6.38396,
-            "676": 6.39021,
-            "677": 6.30588,
-            "678": 6.27751,
-            "679": 6.23892,
-            "680": 6.2942,
-            "681": 6.20621,
-            "682": 6.08719,
-            "683": 6.27464,
-            "684": 6.32896,
-            "685": 6.30248,
-            "686": 6.15397,
-            "687": 6.2862,
-            "688": 6.20754,
-            "689": 6.6215,
-            "690": 6.17931,
-            "691": 6.18188,
-            "692": 6.2745,
-            "693": 6.14405,
-            "694": 6.23487,
-            "695": 6.32617,
-            "696": 6.11842,
-            "697": 6.15483,
-            "698": 6.23128,
-            "699": 6.46051,
-            "700": 6.0454,
-            "701": 6.06467,
-            "702": 6.25219,
-            "703": 6.18603,
-            "704": 6.21704,
-            "705": 6.13155,
-            "706": 6.07593,
-            "707": 6.25376,
-            "708": 6.31553,
-            "709": 6.01087,
-            "710": 6.16305,
-            "711": 6.26062,
-            "712": 6.18307,
-            "713": 5.89806,
-            "714": 6.10759,
-            "715": 6.11617,
-            "716": 6.41405,
-            "717": 6.19202,
-            "718": 6.2345,
-            "719": 6.27471,
-            "720": 6.26372,
-            "721": 6.26277,
-            "722": 6.23442,
-            "723": 6.0814,
-            "724": 6.22797,
-            "725": 6.04057,
-            "726": 6.30046,
-            "727": 6.01682,
-            "728": 6.04617,
-            "729": 6.09111,
-            "730": 6.18359,
-            "731": 6.10398,
-            "732": 6.08898,
-            "733": 6.12312,
-            "734": 6.38423,
-            "735": 6.27849,
-            "736": 6.18184,
-            "737": 6.36645,
-            "738": 6.13411,
-            "739": 6.14591,
-            "740": 5.87975,
-            "741": 6.00667,
-            "742": 5.98459,
-            "743": 6.17495,
-            "744": 6.02962,
-            "745": 6.15497,
-            "746": 6.03272,
-            "747": 6.09789,
-            "748": 6.23436,
-            "749": 5.94191,
-            "750": 6.16819,
-            "751": 5.9596,
-            "752": 6.01941,
-            "753": 6.02989,
-            "754": 6.28798,
-            "755": 6.13521,
-            "756": 6.25357,
-            "757": 6.02098,
-            "758": 6.20422,
-            "759": 6.23062,
-            "760": 6.02316,
-            "761": 6.19655,
-            "762": 6.22713,
-            "763": 6.03754,
-            "764": 5.9636,
-            "765": 5.93413,
-            "766": 5.97155,
-            "767": 5.81277,
-            "768": 6.18725,
-            "769": 6.27646,
-            "770": 6.29561,
-            "771": 5.78767,
-            "772": 6.03281,
-            "773": 6.18558,
-            "774": 5.88583,
-            "775": 6.03167,
-            "776": 6.13086,
-            "777": 5.88612,
-            "778": 6.05891,
-            "779": 5.87414,
-            "780": 6.14047,
-            "781": 5.85641,
-            "782": 6.04961,
-            "783": 5.95687,
-            "784": 5.91852,
-            "785": 6.09816,
-            "786": 6.10929,
-            "787": 5.66006,
-            "788": 5.99915,
-            "789": 6.21789,
-            "790": 6.26737,
-            "791": 5.79122,
-            "792": 5.99828,
-            "793": 6.18387,
-            "794": 6.02746,
-            "795": 6.0051,
-            "796": 6.17065,
-            "797": 6.05376,
-            "798": 6.06076,
-            "799": 6.11682,
-            "800": 6.02167,
-            "801": 6.15011,
-            "802": 5.98473,
-            "803": 6.15363,
-            "804": 6.00859,
-            "805": 5.83055,
-            "806": 6.08757,
-            "807": 6.04997,
-            "808": 5.92717,
-            "809": 5.77802,
-            "810": 6.01973,
-            "811": 5.93299,
-            "812": 5.91169,
-            "813": 5.96567,
-            "814": 6.0369,
-            "815": 5.8146,
-            "816": 6.12034,
-            "817": 5.94337,
-            "818": 6.0674,
-            "819": 6.01476,
-            "820": 5.7319,
-            "821": 5.95027,
-            "822": 6.20452,
-            "823": 5.83139,
-            "824": 5.98275,
-            "825": 6.18795,
-            "826": 6.20019,
-            "827": 6.05802,
-            "828": 6.06976,
-            "829": 5.89149,
-            "830": 5.94221,
-            "831": 5.89773,
-            "832": 5.97341,
-            "833": 6.06501,
-            "834": 5.99675,
-            "835": 6.00654,
-            "836": 5.79277,
-            "837": 6.11496,
-            "838": 5.86966,
-            "839": 5.83554,
-            "840": 6.18614,
-            "841": 5.78491,
-            "842": 5.89169,
-            "843": 5.95102,
-            "844": 6.00954,
-            "845": 6.09153,
-            "846": 5.68733,
-            "847": 5.75715,
-            "848": 5.96838,
-            "849": 6.09512,
-            "850": 5.84886,
-            "851": 6.01693,
-            "852": 5.75188,
-            "853": 5.99355,
-            "854": 6.01844,
-            "855": 5.81656,
-            "856": 5.99593,
-            "857": 6.00207,
-            "858": 6.05507,
-            "859": 5.95295,
-            "860": 6.09632,
-            "861": 6.07189,
-            "862": 6.00434,
-            "863": 5.83757,
-            "864": 5.84474,
-            "865": 5.93791,
-            "866": 5.89404,
-            "867": 5.87803,
-            "868": 6.06515,
-            "869": 6.08564,
-            "870": 5.97153,
-            "871": 6.04317,
-            "872": 5.89525,
-            "873": 5.84383,
-            "874": 6.02742,
-            "875": 5.9144,
-            "876": 5.96905,
-            "877": 5.92979,
-            "878": 6.09819,
-            "879": 5.76783,
-            "880": 6.01501,
-            "881": 5.99647,
-            "882": 5.9097,
-            "883": 5.67626,
-            "884": 5.96521,
-            "885": 5.74544,
-            "886": 5.99268,
-            "887": 5.90979,
-            "888": 5.83897,
-            "889": 6.01033,
-            "890": 6.02378,
-            "891": 5.95247,
-            "892": 5.70829,
-            "893": 6.0922,
-            "894": 5.73134,
-            "895": 5.84057,
-            "896": 5.84075,
-            "897": 5.8564,
-            "898": 5.9238,
-            "899": 5.93486,
-            "900": 5.89946,
-            "901": 5.95293,
-            "902": 5.83295,
-            "903": 6.05665,
-            "904": 5.93153,
-            "905": 5.90441,
-            "906": 5.6172,
-            "907": 5.91178,
-            "908": 5.73853,
-            "909": 5.99118,
-            "910": 5.86603,
-            "911": 5.70397,
-            "912": 5.70712,
-            "913": 5.76497,
-            "914": 5.83944,
-            "915": 5.80032,
-            "916": 5.8904,
-            "917": 5.86913,
-            "918": 5.82415,
-            "919": 5.81575,
-            "920": 5.89552,
-            "921": 5.84163,
-            "922": 5.62427,
-            "923": 6.03657,
-            "924": 5.60536,
-            "925": 5.62335,
-            "926": 5.86148,
-            "927": 5.96071,
-            "928": 5.84005,
-            "929": 5.82702,
-            "930": 5.95816,
-            "931": 5.765,
-            "932": 5.59211,
-            "933": 5.6351,
-            "934": 5.80541,
-            "935": 5.63715,
-            "936": 5.83772,
-            "937": 5.96629,
-            "938": 5.59109,
-            "939": 5.7899,
-            "940": 5.96726,
-            "941": 5.7264,
-            "942": 5.83547,
-            "943": 5.86622,
-            "944": 5.95478,
-            "945": 5.70263,
-            "946": 5.55832,
-            "947": 5.74831,
-            "948": 5.79312,
-            "949": 5.8268,
-            "950": 5.84353,
-            "951": 5.72242,
-            "952": 5.69295,
-            "953": 5.67852,
-            "954": 5.72473,
-            "955": 5.53107,
-            "956": 5.62074,
-            "957": 5.84076,
-            "958": 5.79676,
-            "959": 5.57317,
-            "960": 5.80125,
-            "961": 5.82952,
-            "962": 5.76695,
-            "963": 5.76461,
-            "964": 5.70677,
-            "965": 5.64012,
-            "966": 5.59617,
-            "967": 5.72434,
-            "968": 5.74036,
-            "969": 5.82392,
-            "970": 5.64422,
-            "971": 5.7065,
-            "972": 5.85308,
-            "973": 5.66884,
-            "974": 5.71841,
-            "975": 5.86273,
-            "976": 5.70493,
-            "977": 5.77104,
-            "978": 5.6858,
-            "979": 5.58655,
-            "980": 5.75924,
-            "981": 5.8969,
-            "982": 5.47038,
-            "983": 5.61817,
-            "984": 5.54504,
-            "985": 5.59032,
-            "986": 5.64132,
-            "987": 5.56966,
-            "988": 5.70939,
-            "989": 5.69379,
-            "990": 5.62195,
-            "991": 5.84899,
-            "992": 5.77877,
-            "993": 5.87022,
-            "994": 5.69735,
-            "995": 5.73242,
-            "996": 5.73704,
-            "997": 5.81329,
-            "998": 5.83634,
-            "999": 5.83399,
-            "1000": 5.68342,
-            "1001": 5.86668,
-            "1002": 5.76052,
-            "1003": 5.64259,
-            "1004": 5.79811,
-            "1005": 5.53617,
-            "1006": 5.326,
-            "1007": 5.76701,
-            "1008": 5.79136,
-            "1009": 5.65046,
-            "1010": 5.77942,
-            "1011": 5.89493,
-            "1012": 5.62303,
-            "1013": 5.61569,
-            "1014": 5.68111,
-            "1015": 5.55747,
-            "1016": 5.87327,
-            "1017": 5.83312,
-            "1018": 5.61865,
-            "1019": 5.73414,
-            "1020": 5.61755,
-            "1021": 5.848,
-            "1022": 5.50045,
-            "1023": 5.65182,
-            "1024": 5.74493,
-            "1025": 5.5692,
-            "1026": 5.41415,
-            "1027": 5.60696,
-            "1028": 5.6928,
-            "1029": 5.68764,
-            "1030": 5.68746,
-            "1031": 5.40696,
-            "1032": 5.78748,
-            "1033": 5.58136,
-            "1034": 5.61937,
-            "1035": 5.71368,
-            "1036": 5.62818,
-            "1037": 5.3679,
-            "1038": 5.66452,
-            "1039": 5.64347,
-            "1040": 5.57004,
-            "1041": 5.59722,
-            "1042": 5.81329,
-            "1043": 5.566,
-            "1044": 5.46906,
-            "1045": 5.9659,
-            "1046": 5.4866,
-            "1047": 5.38954,
-            "1048": 5.50027,
-            "1049": 5.67182,
-            "1050": 5.6991,
-            "1051": 5.57928,
-            "1052": 5.68227,
-            "1053": 5.62737,
-            "1054": 5.45766,
-            "1055": 5.60313,
-            "1056": 5.67386,
-            "1057": 5.75895,
-            "1058": 5.56782,
-            "1059": 5.74888,
-            "1060": 5.82022,
-            "1061": 5.47624,
-            "1062": 5.64897,
-            "1063": 5.50121,
-            "1064": 5.59136,
-            "1065": 5.55347,
-            "1066": 5.74367,
-            "1067": 5.67235,
-            "1068": 5.44068,
-            "1069": 5.60636,
-            "1070": 5.81264,
-            "1071": 5.51129,
-            "1072": 5.61871,
-            "1073": 5.62147,
-            "1074": 5.524,
-            "1075": 5.70529,
-            "1076": 5.5934,
-            "1077": 5.71153,
-            "1078": 5.56524,
-            "1079": 5.61728,
-            "1080": 5.64251,
-            "1081": 5.62319,
-            "1082": 5.49648,
-            "1083": 5.64086,
-            "1084": 5.55389,
-            "1085": 5.40631,
-            "1086": 5.62008,
-            "1087": 5.44148,
-            "1088": 5.51218,
-            "1089": 5.7676,
-            "1090": 5.53165,
-            "1091": 5.51388,
-            "1092": 5.41011,
-            "1093": 5.70025,
-            "1094": 5.57364,
-            "1095": 5.57735,
-            "1096": 5.61585,
-            "1097": 5.64586,
-            "1098": 5.64877,
-            "1099": 5.51631,
-            "1100": 5.63778,
-            "1101": 5.67335,
-            "1102": 5.54037,
-            "1103": 5.54969,
-            "1104": 5.53882,
-            "1105": 5.54754,
-            "1106": 5.68315,
-            "1107": 5.68556,
-            "1108": 5.78611,
-            "1109": 5.53666,
-            "1110": 5.66598,
-            "1111": 5.58973,
-            "1112": 5.58039,
-            "1113": 5.62611,
-            "1114": 5.61279,
-            "1115": 5.59718,
-            "1116": 5.65925,
-            "1117": 5.64676,
-            "1118": 5.65036,
-            "1119": 5.70919,
-            "1120": 5.62738,
-            "1121": 5.37352,
-            "1122": 5.22976,
-            "1123": 5.47237,
-            "1124": 5.64939,
-            "1125": 5.67974,
-            "1126": 5.679,
-            "1127": 5.56811,
-            "1128": 5.61992,
-            "1129": 5.29637,
-            "1130": 5.54359,
-            "1131": 5.63153,
-            "1132": 5.72427,
-            "1133": 5.51914,
-            "1134": 5.56063,
-            "1135": 5.52056,
-            "1136": 5.42646,
-            "1137": 5.45971,
-            "1138": 5.56927,
-            "1139": 5.41452,
-            "1140": 5.2656,
-            "1141": 5.58265,
-            "1142": 5.64152,
-            "1143": 5.38298,
-            "1144": 5.38584,
-            "1145": 5.36231,
-            "1146": 5.63508,
-            "1147": 5.49183,
-            "1148": 5.50524,
-            "1149": 5.52352,
-            "1150": 5.39801,
-            "1151": 5.5563,
-            "1152": 5.41525,
-            "1153": 5.44791,
-            "1154": 5.49757,
-            "1155": 5.43833,
-            "1156": 5.3488,
-            "1157": 5.66444,
-            "1158": 5.39487,
-            "1159": 5.33455,
-            "1160": 5.79503,
-            "1161": 5.53955,
-            "1162": 5.45818,
-            "1163": 5.52563,
-            "1164": 5.3837,
-            "1165": 5.52861,
-            "1166": 5.48753,
-            "1167": 5.36312,
-            "1168": 5.49491,
-            "1169": 5.39842,
-            "1170": 5.59202,
-            "1171": 5.48502,
-            "1172": 5.64238,
-            "1173": 5.62295,
-            "1174": 5.50843,
-            "1175": 5.34639,
-            "1176": 5.38504,
-            "1177": 5.55461,
-            "1178": 5.46852,
-            "1179": 5.49505,
-            "1180": 5.46014,
-            "1181": 5.56031,
-            "1182": 5.59593,
-            "1183": 5.77155,
-            "1184": 5.54926,
-            "1185": 5.29008,
-            "1186": 5.60451,
-            "1187": 5.55363,
-            "1188": 5.51655,
-            "1189": 5.39133,
-            "1190": 5.40482,
-            "1191": 5.39266,
-            "1192": 5.50142,
-            "1193": 5.46347,
-            "1194": 5.45607,
-            "1195": 5.32751,
-            "1196": 5.52219,
-            "1197": 5.4809,
-            "1198": 5.52789,
-            "1199": 5.3874,
-            "1200": 5.33059,
-            "1201": 5.48969,
-            "1202": 5.43584,
-            "1203": 5.49537,
-            "1204": 5.40861,
-            "1205": 5.48971,
-            "1206": 5.3371,
-            "1207": 5.58625,
-            "1208": 5.4312,
-            "1209": 5.29323,
-            "1210": 5.50765,
-            "1211": 5.51506,
-            "1212": 5.59777,
-            "1213": 5.42123,
-            "1214": 5.51018,
-            "1215": 5.23832,
-            "1216": 5.40989,
-            "1217": 5.38537,
-            "1218": 5.45232,
-            "1219": 5.48221,
-            "1220": 5.38594,
-            "1221": 5.44848,
-            "1222": 5.31032,
-            "1223": 5.47835,
-            "1224": 5.42017,
-            "1225": 5.43499,
-            "1226": 5.3238,
-            "1227": 5.47632,
-            "1228": 5.72418,
-            "1229": 5.32629,
-            "1230": 5.40556,
-            "1231": 5.06972,
-            "1232": 5.78794,
-            "1233": 5.28923,
-            "1234": 5.24535,
-            "1235": 5.37092,
-            "1236": 5.48471,
-            "1237": 5.20864,
-            "1238": 5.41643,
-            "1239": 5.40751,
-            "1240": 5.46767,
-            "1241": 5.57266,
-            "1242": 5.4536,
-            "1243": 5.43063,
-            "1244": 5.51812,
-            "1245": 5.19115,
-            "1246": 5.72042,
-            "1247": 5.43187,
-            "1248": 5.30004,
-            "1249": 5.40113,
-            "1250": 5.33798,
-            "1251": 5.42034,
-            "1252": 5.57217,
-            "1253": 5.48773,
-            "1254": 5.30628,
-            "1255": 5.51443,
-            "1256": 5.60755,
-            "1257": 5.4214,
-            "1258": 5.56457,
-            "1259": 5.48027,
-            "1260": 5.51461,
-            "1261": 5.63883,
-            "1262": 5.39531,
-            "1263": 5.32916,
-            "1264": 5.50671,
-            "1265": 5.30632,
-            "1266": 5.23819,
-            "1267": 5.37206,
-            "1268": 5.39267,
-            "1269": 5.15366,
-            "1270": 5.40418,
-            "1271": 5.27732,
-            "1272": 5.5252,
-            "1273": 5.30228,
-            "1274": 5.3516,
-            "1275": 5.38466,
-            "1276": 5.39786,
-            "1277": 5.46218,
-            "1278": 5.34689,
-            "1279": 5.44274,
-            "1280": 5.45919,
-            "1281": 5.40638,
-            "1282": 5.3824,
-            "1283": 5.42204,
-            "1284": 5.34841,
-            "1285": 5.50133,
-            "1286": 5.33557,
-            "1287": 5.58795,
-            "1288": 5.26493,
-            "1289": 5.429,
-            "1290": 5.50282,
-            "1291": 5.50335,
-            "1292": 5.44662,
-            "1293": 5.41955,
-            "1294": 5.49953,
-            "1295": 5.34675,
-            "1296": 5.19062,
-            "1297": 5.17238,
-            "1298": 5.11916,
-            "1299": 5.30339,
-            "1300": 5.21032,
-            "1301": 5.30157,
-            "1302": 5.27472,
-            "1303": 5.36107,
-            "1304": 5.43231,
-            "1305": 5.36999,
-            "1306": 5.25347,
-            "1307": 5.18829,
-            "1308": 5.27033,
-            "1309": 5.40736,
-            "1310": 5.26399,
-            "1311": 5.38109,
-            "1312": 5.35438,
-            "1313": 5.30056,
-            "1314": 5.2953,
-            "1315": 5.42245,
-            "1316": 5.26148,
-            "1317": 5.28065,
-            "1318": 5.2198,
-            "1319": 5.34619,
-            "1320": 5.42093,
-            "1321": 5.44976,
-            "1322": 5.46399,
-            "1323": 5.37327,
-            "1324": 5.25463,
-            "1325": 5.40657,
-            "1326": 5.54082,
-            "1327": 5.39378,
-            "1328": 5.21893,
-            "1329": 5.41851,
-            "1330": 5.40079,
-            "1331": 5.31685,
-            "1332": 5.31253,
-            "1333": 5.37243,
-            "1334": 5.44685,
-            "1335": 5.37136,
-            "1336": 5.43779,
-            "1337": 5.47852,
-            "1338": 5.30292,
-            "1339": 5.14181,
-            "1340": 5.41486,
-            "1341": 5.3443,
-            "1342": 5.36197,
-            "1343": 5.47816,
-            "1344": 5.37832,
-            "1345": 5.34294,
-            "1346": 5.08195,
-            "1347": 5.38558,
-            "1348": 5.4918,
-            "1349": 5.40832,
-            "1350": 5.02622,
-            "1351": 5.3151,
-            "1352": 5.1591,
-            "1353": 5.34674,
-            "1354": 5.35963,
-            "1355": 5.11092,
-            "1356": 5.2587,
-            "1357": 5.29209,
-            "1358": 5.15773,
-            "1359": 5.11035,
-            "1360": 5.17288,
-            "1361": 5.30521,
-            "1362": 5.06318,
-            "1363": 5.2947,
-            "1364": 5.40031,
-            "1365": 5.02241,
-            "1366": 5.11779,
-            "1367": 5.33051,
-            "1368": 5.18648,
-            "1369": 5.22984,
-            "1370": 5.19906,
-            "1371": 5.2839,
-            "1372": 5.26155,
-            "1373": 5.28402,
-            "1374": 5.28112,
-            "1375": 5.46052,
-            "1376": 5.2713,
-            "1377": 5.26467,
-            "1378": 5.31344,
-            "1379": 5.22741,
-            "1380": 5.26107,
-            "1381": 5.47871,
-            "1382": 5.08923,
-            "1383": 5.375,
-            "1384": 5.35914,
-            "1385": 5.38983,
-            "1386": 5.16417,
-            "1387": 5.16094,
-            "1388": 5.28017,
-            "1389": 5.30376,
-            "1390": 5.25514,
-            "1391": 5.26911,
-            "1392": 5.37008,
-            "1393": 5.38307,
-            "1394": 5.40394,
-            "1395": 5.32492,
-            "1396": 5.21356,
-            "1397": 5.28,
-            "1398": 5.37051,
-            "1399": 5.35873,
-            "1400": 5.26512,
-            "1401": 5.35924,
-            "1402": 5.42148,
-            "1403": 5.20238,
-            "1404": 5.28629,
-            "1405": 5.11984,
-            "1406": 4.99128,
-            "1407": 5.40442,
-            "1408": 5.19825,
-            "1409": 5.3964,
-            "1410": 5.37519,
-            "1411": 4.91758,
-            "1412": 5.35561,
-            "1413": 5.41314,
-            "1414": 5.21823,
-            "1415": 5.44159,
-            "1416": 5.32905,
-            "1417": 5.38859,
-            "1418": 5.29946,
-            "1419": 5.31787,
-            "1420": 5.43974,
-            "1421": 5.39414,
-            "1422": 5.41749,
-            "1423": 5.005,
-            "1424": 5.32995,
-            "1425": 5.58618,
-            "1426": 5.23059,
-            "1427": 5.31804,
-            "1428": 5.33277,
-            "1429": 5.07552,
-            "1430": 5.33075,
-            "1431": 5.32688,
-            "1432": 5.33826,
-            "1433": 5.19107,
-            "1434": 5.16341,
-            "1435": 5.19905,
-            "1436": 5.10851,
-            "1437": 5.229,
-            "1438": 5.31867,
-            "1439": 5.34731,
-            "1440": 5.34991,
-            "1441": 5.16484,
-            "1442": 5.22015,
-            "1443": 5.20933,
-            "1444": 5.13701,
-            "1445": 5.07414,
-            "1446": 5.26836,
-            "1447": 5.25895,
-            "1448": 5.2904,
-            "1449": 5.2498,
-            "1450": 5.34281,
-            "1451": 5.07084,
-            "1452": 5.27052,
-            "1453": 5.1668,
-            "1454": 5.01539,
-            "1455": 5.12292,
-            "1456": 5.2717,
-            "1457": 5.18713,
-            "1458": 5.00608,
-            "1459": 5.22304,
-            "1460": 5.23389,
-            "1461": 5.07142,
-            "1462": 4.96923,
-            "1463": 5.14383,
-            "1464": 5.21128,
-            "1465": 5.26911,
-            "1466": 5.34961,
-            "1467": 5.33438,
-            "1468": 5.22205,
-            "1469": 5.04373,
-            "1470": 5.11715,
-            "1471": 5.25199,
-            "1472": 5.12294,
-            "1473": 5.10395,
-            "1474": 5.21775,
-            "1475": 5.18567,
-            "1476": 5.15287,
-            "1477": 5.26203,
-            "1478": 5.30399,
-            "1479": 5.01175,
-            "1480": 5.1809,
-            "1481": 5.24516,
-            "1482": 5.34866,
-            "1483": 5.26395,
-            "1484": 4.92397,
-            "1485": 5.29179,
-            "1486": 5.04178,
-            "1487": 4.88296,
-            "1488": 5.18145,
-            "1489": 5.10246,
-            "1490": 5.04399,
-            "1491": 5.31709,
-            "1492": 5.22469,
-            "1493": 4.94051,
-            "1494": 5.10929,
-            "1495": 5.13424,
-            "1496": 5.05862,
-            "1497": 5.36633,
-            "1498": 5.30967,
-            "1499": 5.13834,
-            "1500": 5.09851,
-            "1501": 5.03466,
-            "1502": 5.15527,
-            "1503": 5.43143,
-            "1504": 5.31968,
-            "1505": 5.00114,
-            "1506": 5.14444,
-            "1507": 5.16068,
-            "1508": 5.16575,
-            "1509": 5.31451,
-            "1510": 5.0185,
-            "1511": 5.11697,
-            "1512": 4.98287,
-            "1513": 5.16993,
-            "1514": 5.33962,
-            "1515": 5.36563,
-            "1516": 5.27715,
-            "1517": 5.22687,
-            "1518": 5.02626,
-            "1519": 5.29861,
-            "1520": 5.1417,
-            "1521": 5.15866,
-            "1522": 5.32824,
-            "1523": 5.24625,
-            "1524": 5.06725,
-            "1525": 5.20424,
-            "1526": 5.27994,
-            "1527": 5.25677,
-            "1528": 5.23589,
-            "1529": 5.18688,
-            "1530": 5.24365,
-            "1531": 5.09964,
-            "1532": 5.15141,
-            "1533": 5.05087,
-            "1534": 5.21589,
-            "1535": 5.1635,
-            "1536": 5.09678,
-            "1537": 5.02713,
-            "1538": 4.91184,
-            "1539": 5.23801,
-            "1540": 5.11515,
-            "1541": 5.25246,
-            "1542": 5.23484,
-            "1543": 5.05152,
-            "1544": 5.07544,
-            "1545": 5.1161,
-            "1546": 5.33085,
-            "1547": 5.11115,
-            "1548": 5.23527,
-            "1549": 5.23735,
-            "1550": 4.97596,
-            "1551": 5.2566,
-            "1552": 5.02944,
-            "1553": 5.14849,
-            "1554": 5.11205,
-            "1555": 5.10901,
-            "1556": 5.19824,
-            "1557": 5.08883,
-            "1558": 5.23067,
-            "1559": 5.00402,
-            "1560": 5.11835,
-            "1561": 5.14529,
-            "1562": 5.17996,
-            "1563": 5.24454,
-            "1564": 5.26389,
-            "1565": 5.08902,
-            "1566": 5.29474,
-            "1567": 5.04166,
-            "1568": 5.09256,
-            "1569": 5.20014,
-            "1570": 5.17348,
-            "1571": 4.95353,
-            "1572": 5.04005,
-            "1573": 5.02897,
-            "1574": 4.99751,
-            "1575": 5.2314,
-            "1576": 5.21263,
-            "1577": 5.12799,
-            "1578": 5.36241,
-            "1579": 4.94367,
-            "1580": 5.12197,
-            "1581": 5.09638,
-            "1582": 5.28497,
-            "1583": 5.04918,
-            "1584": 5.05482,
-            "1585": 5.11977,
-            "1586": 5.30243,
-            "1587": 5.13447,
-            "1588": 5.2184,
-            "1589": 4.83833,
-            "1590": 5.09497,
-            "1591": 5.17411,
-            "1592": 5.13721,
-            "1593": 5.23457,
-            "1594": 5.11805,
-            "1595": 5.10775,
-            "1596": 5.18964,
-            "1597": 5.11486,
-            "1598": 5.15917,
-            "1599": 5.19102,
-            "1600": 4.86871,
-            "1601": 5.11732,
-            "1602": 5.23185,
-            "1603": 5.19543,
-            "1604": 5.05128,
-            "1605": 5.02692,
-            "1606": 4.98659,
-            "1607": 5.07391,
-            "1608": 4.97985,
-            "1609": 5.07337,
-            "1610": 5.04745,
-            "1611": 4.99848,
-            "1612": 4.75205,
-            "1613": 5.03316,
-            "1614": 4.88034,
-            "1615": 5.07442,
-            "1616": 5.23082,
-            "1617": 5.06132,
-            "1618": 4.98704,
-            "1619": 5.18333,
-            "1620": 5.14491,
-            "1621": 5.31452,
-            "1622": 5.05677,
-            "1623": 5.14346,
-            "1624": 5.1355,
-            "1625": 5.12006,
-            "1626": 5.10245,
-            "1627": 5.10987,
-            "1628": 5.06581,
-            "1629": 4.92971,
-            "1630": 5.06799,
-            "1631": 5.06088,
-            "1632": 5.10428,
-            "1633": 4.97515,
-            "1634": 4.9235,
-            "1635": 5.05833,
-            "1636": 4.92289,
-            "1637": 5.24051,
-            "1638": 5.15574,
-            "1639": 4.977,
-            "1640": 5.00918,
-            "1641": 5.12718,
-            "1642": 5.08305,
-            "1643": 5.04894,
-            "1644": 5.1181,
-            "1645": 4.96677,
-            "1646": 5.11931,
-            "1647": 5.03295,
-            "1648": 5.19969,
-            "1649": 4.92396,
-            "1650": 5.05963,
-            "1651": 4.92965,
-            "1652": 5.21121,
-            "1653": 5.15959,
-            "1654": 5.12828,
-            "1655": 5.16263,
-            "1656": 5.34595,
-            "1657": 5.20677,
-            "1658": 5.04112,
-            "1659": 4.9258,
-            "1660": 4.80954,
-            "1661": 5.03086,
-            "1662": 5.14123,
-            "1663": 5.15449,
-            "1664": 4.981,
-            "1665": 5.11714,
-            "1666": 5.10575,
-            "1667": 4.84897,
-            "1668": 5.11513,
-            "1669": 5.06995,
-            "1670": 5.11266,
-            "1671": 5.17201,
-            "1672": 4.77569,
-            "1673": 5.03851,
-            "1674": 4.91569,
-            "1675": 5.05176,
-            "1676": 5.00402,
-            "1677": 4.79944,
-            "1678": 5.02487,
-            "1679": 4.89421,
-            "1680": 5.03847,
-            "1681": 5.06815,
-            "1682": 5.03274,
-            "1683": 4.90688,
-            "1684": 5.06515,
-            "1685": 5.13579,
-            "1686": 5.0732,
-            "1687": 4.97656,
-            "1688": 5.16537,
-            "1689": 5.14707,
-            "1690": 4.99688,
-            "1691": 5.00011,
-            "1692": 4.91822,
-            "1693": 5.01472,
-            "1694": 4.94657,
-            "1695": 4.91341,
-            "1696": 5.08209,
-            "1697": 5.04294,
-            "1698": 4.9511,
-            "1699": 5.00187,
-            "1700": 4.95393,
-            "1701": 5.16563,
-            "1702": 5.07666,
-            "1703": 5.17125,
-            "1704": 5.14332,
-            "1705": 4.96247,
-            "1706": 4.98333,
-            "1707": 4.79005,
-            "1708": 5.03831,
-            "1709": 5.23334,
-            "1710": 5.02934,
-            "1711": 5.19037,
-            "1712": 5.1958,
-            "1713": 5.03582,
-            "1714": 5.04603,
-            "1715": 4.91495,
-            "1716": 4.9332,
-            "1717": 4.86109,
-            "1718": 5.0273,
-            "1719": 5.12334,
-            "1720": 5.02189,
-            "1721": 4.92752,
-            "1722": 5.05412,
-            "1723": 4.93537,
-            "1724": 5.0407,
-            "1725": 5.1914,
-            "1726": 5.06447,
-            "1727": 4.90742,
-            "1728": 5.02116,
-            "1729": 5.04574,
-            "1730": 4.90343,
-            "1731": 4.99945,
-            "1732": 4.92083,
-            "1733": 5.1311,
-            "1734": 4.82837,
-            "1735": 5.20905,
-            "1736": 4.91585,
-            "1737": 4.85859,
-            "1738": 4.97909,
-            "1739": 5.16688,
-            "1740": 4.83514,
-            "1741": 4.77896,
-            "1742": 4.90909,
-            "1743": 5.08523,
-            "1744": 4.9784,
-            "1745": 4.82327,
-            "1746": 4.94833,
-            "1747": 4.87022,
-            "1748": 5.06379,
-            "1749": 4.8705,
-            "1750": 5.01347,
-            "1751": 5.12189,
-            "1752": 4.90364,
-            "1753": 5.09398,
-            "1754": 5.05918,
-            "1755": 4.89649,
-            "1756": 5.02243,
-            "1757": 5.14389,
-            "1758": 4.8716,
-            "1759": 4.94237,
-            "1760": 4.83366,
-            "1761": 5.02233,
-            "1762": 4.81292,
-            "1763": 4.77382,
-            "1764": 4.93787,
-            "1765": 5.14977,
-            "1766": 5.33847,
-            "1767": 5.22339,
-            "1768": 4.95072,
-            "1769": 5.00607,
-            "1770": 4.98077,
-            "1771": 4.96436,
-            "1772": 4.98395,
-            "1773": 4.97312,
-            "1774": 4.86859,
-            "1775": 4.95207,
-            "1776": 4.99761,
-            "1777": 4.94332,
-            "1778": 4.99268,
-            "1779": 5.08376,
-            "1780": 4.83276,
-            "1781": 5.05321,
-            "1782": 4.9968,
-            "1783": 5.01268,
-            "1784": 4.93195,
-            "1785": 5.16736,
-            "1786": 4.81265,
-            "1787": 4.97081,
-            "1788": 4.82725,
-            "1789": 4.88846,
-            "1790": 4.79821,
-            "1791": 4.73741,
-            "1792": 4.87626,
-            "1793": 5.10356,
-            "1794": 4.98084,
-            "1795": 4.96551,
-            "1796": 4.99704,
-            "1797": 4.7903,
-            "1798": 4.76702,
-            "1799": 5.01884,
-            "1800": 4.91364,
-            "1801": 5.04679,
-            "1802": 4.82665,
-            "1803": 4.95171,
-            "1804": 4.88594,
-            "1805": 4.90346,
-            "1806": 4.87351,
-            "1807": 4.92406,
-            "1808": 4.92697,
-            "1809": 5.1451,
-            "1810": 5.09976,
-            "1811": 4.95906,
-            "1812": 4.80139,
-            "1813": 5.09748,
-            "1814": 4.77766,
-            "1815": 4.86134,
-            "1816": 5.05005,
-            "1817": 4.79012,
-            "1818": 4.80376,
-            "1819": 5.02382,
-            "1820": 4.68652,
-            "1821": 5.02661,
-            "1822": 4.66251,
-            "1823": 4.8659,
-            "1824": 4.78635,
-            "1825": 5.06537,
-            "1826": 4.81944,
-            "1827": 4.7895,
-            "1828": 4.94677,
-            "1829": 5.11262,
-            "1830": 4.91236,
-            "1831": 4.89818,
-            "1832": 4.83359,
-            "1833": 4.78363,
-            "1834": 4.9482,
-            "1835": 4.95795,
-            "1836": 4.90747,
-            "1837": 4.67243,
-            "1838": 4.80953,
-            "1839": 4.89546,
-            "1840": 4.90488,
-            "1841": 4.8292,
-            "1842": 4.94678,
-            "1843": 4.70293,
-            "1844": 4.61431,
-            "1845": 5.00086,
-            "1846": 4.74657,
-            "1847": 4.8645,
-            "1848": 4.89695,
-            "1849": 4.85358,
-            "1850": 4.8676,
-            "1851": 5.02236,
-            "1852": 4.97647,
-            "1853": 4.83325,
-            "1854": 4.86791,
-            "1855": 4.8219,
-            "1856": 4.75614,
-            "1857": 4.9619,
-            "1858": 4.96856,
-            "1859": 4.75323,
-            "1860": 4.86592,
-            "1861": 5.20685,
-            "1862": 4.61669,
-            "1863": 4.83385,
-            "1864": 4.7505,
-            "1865": 4.86441,
-            "1866": 4.79455,
-            "1867": 4.99688,
-            "1868": 4.71331,
-            "1869": 4.75634,
-            "1870": 4.93203,
-            "1871": 4.99184,
-            "1872": 4.68332,
-            "1873": 4.69823,
-            "1874": 4.85174,
-            "1875": 4.85999,
-            "1876": 4.7392,
-            "1877": 4.80362,
-            "1878": 4.81239,
-            "1879": 4.82084,
-            "1880": 4.89314,
-            "1881": 4.79389,
-            "1882": 4.79419,
-            "1883": 4.78157,
-            "1884": 4.97086,
-            "1885": 4.91799,
-            "1886": 4.82203,
-            "1887": 4.81334,
-            "1888": 4.97395,
-            "1889": 4.95922,
-            "1890": 4.70676,
-            "1891": 4.65282,
-            "1892": 4.84393,
-            "1893": 4.64594,
-            "1894": 4.90265,
-            "1895": 4.7886,
-            "1896": 4.66112,
-            "1897": 4.78966,
-            "1898": 4.9139,
-            "1899": 4.77532,
-            "1900": 4.91571,
-            "1901": 4.84525,
-            "1902": 4.78411,
-            "1903": 4.75997,
-            "1904": 4.65339,
-            "1905": 4.54188,
-            "1906": 4.81097,
-            "1907": 4.90225,
-            "1908": 5.03012,
-            "1909": 4.88434,
-            "1910": 4.78852,
-            "1911": 4.80477,
-            "1912": 4.64685,
-            "1913": 4.94065,
-            "1914": 4.87965,
-            "1915": 4.85906,
-            "1916": 4.92227,
-            "1917": 4.85425,
-            "1918": 4.87001,
-            "1919": 4.99304,
-            "1920": 4.76319,
-            "1921": 4.88494,
-            "1922": 4.81295,
-            "1923": 4.7592,
-            "1924": 4.82501,
-            "1925": 5.05793,
-            "1926": 4.92996,
-            "1927": 4.92587,
-            "1928": 4.92702,
-            "1929": 4.92705,
-            "1930": 4.91019,
-            "1931": 4.77616,
-            "1932": 4.85963,
-            "1933": 4.83545,
-            "1934": 4.84013,
-            "1935": 5.10729,
-            "1936": 4.88314,
-            "1937": 4.87654,
-            "1938": 4.79463,
-            "1939": 4.71148,
-            "1940": 4.82418,
-            "1941": 4.73372,
-            "1942": 4.87249,
-            "1943": 4.7353,
-            "1944": 4.74198,
-            "1945": 4.6818,
-            "1946": 4.91539,
-            "1947": 4.86756,
-            "1948": 4.59887,
-            "1949": 4.90387,
-            "1950": 4.78785,
-            "1951": 4.95942,
-            "1952": 4.73677,
-            "1953": 4.79496,
-            "1954": 4.73264,
-            "1955": 4.84308,
-            "1956": 4.88233,
-            "1957": 4.73496,
-            "1958": 4.70018,
-            "1959": 4.75966,
-            "1960": 4.76849,
-            "1961": 4.7146,
-            "1962": 4.83392,
-            "1963": 4.82321,
-            "1964": 4.84664,
-            "1965": 4.87523,
-            "1966": 4.78753,
-            "1967": 4.59211,
-            "1968": 4.82724,
-            "1969": 4.59184,
-            "1970": 4.56633,
-            "1971": 4.9072,
-            "1972": 4.90064,
-            "1973": 4.54642,
-            "1974": 4.82423,
-            "1975": 4.82778,
-            "1976": 4.71327,
-            "1977": 4.57967,
-            "1978": 5.0045,
-            "1979": 4.66094,
-            "1980": 4.74256,
-            "1981": 4.86301,
-            "1982": 4.72234,
-            "1983": 4.8786,
-            "1984": 4.64152,
-            "1985": 4.78,
-            "1986": 4.70167,
-            "1987": 4.81036,
-            "1988": 4.8871,
-            "1989": 4.63185,
-            "1990": 4.79636,
-            "1991": 4.69424,
-            "1992": 4.79439,
-            "1993": 4.74063,
-            "1994": 4.84977,
-            "1995": 4.5596,
-            "1996": 4.65161,
-            "1997": 4.80342,
-            "1998": 4.67403,
-            "1999": 4.72284,
-            "2000": 4.61765
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 2000,
-        "step_interval": 1,
-        "values": {
-            "1": 80.0,
-            "2": 70.0,
-            "3": 78.0,
-            "4": 80.0,
-            "5": 75.0,
-            "6": 87.0,
-            "7": 63.0,
-            "8": 77.0,
-            "9": 62.0,
-            "10": 90.0,
-            "11": 74.0,
-            "12": 79.0,
-            "13": 77.0,
-            "14": 83.0,
-            "15": 78.0,
-            "16": 69.0,
-            "17": 64.0,
-            "18": 63.0,
-            "19": 87.0,
-            "20": 90.0,
-            "21": 75.0,
-            "22": 84.0,
-            "23": 81.0,
-            "24": 78.0,
-            "25": 87.0,
-            "26": 69.0,
-            "27": 86.0,
-            "28": 91.0,
-            "29": 94.0,
-            "30": 115.0,
-            "31": 99.0,
-            "32": 109.0,
-            "33": 92.0,
-            "34": 103.0,
-            "35": 118.0,
-            "36": 117.0,
-            "37": 105.0,
-            "38": 129.0,
-            "39": 89.0,
-            "40": 129.0,
-            "41": 114.0,
-            "42": 121.0,
-            "43": 135.0,
-            "44": 128.0,
-            "45": 126.0,
-            "46": 129.0,
-            "47": 133.0,
-            "48": 139.0,
-            "49": 135.0,
-            "50": 157.0,
-            "51": 122.0,
-            "52": 150.0,
-            "53": 108.0,
-            "54": 140.0,
-            "55": 133.0,
-            "56": 156.0,
-            "57": 150.0,
-            "58": 153.0,
-            "59": 135.0,
-            "60": 135.0,
-            "61": 165.0,
-            "62": 145.0,
-            "63": 199.0,
-            "64": 161.0,
-            "65": 162.0,
-            "66": 162.0,
-            "67": 195.0,
-            "68": 140.0,
-            "69": 158.0,
-            "70": 169.0,
-            "71": 188.0,
-            "72": 160.0,
-            "73": 151.0,
-            "74": 154.0,
-            "75": 172.0,
-            "76": 169.0,
-            "77": 165.0,
-            "78": 193.0,
-            "79": 144.0,
-            "80": 173.0,
-            "81": 150.0,
-            "82": 141.0,
-            "83": 186.0,
-            "84": 169.0,
-            "85": 183.0,
-            "86": 196.0,
-            "87": 197.0,
-            "88": 184.0,
-            "89": 169.0,
-            "90": 182.0,
-            "91": 200.0,
-            "92": 179.0,
-            "93": 165.0,
-            "94": 153.0,
-            "95": 176.0,
-            "96": 191.0,
-            "97": 183.0,
-            "98": 199.0,
-            "99": 163.0,
-            "100": 157.0,
-            "101": 144.0,
-            "102": 184.0,
-            "103": 206.0,
-            "104": 171.0,
-            "105": 215.0,
-            "106": 176.0,
-            "107": 172.0,
-            "108": 172.0,
-            "109": 172.0,
-            "110": 216.0,
-            "111": 182.0,
-            "112": 172.0,
-            "113": 167.0,
-            "114": 192.0,
-            "115": 175.0,
-            "116": 181.0,
-            "117": 177.0,
-            "118": 142.0,
-            "119": 212.0,
-            "120": 164.0,
-            "121": 193.0,
-            "122": 160.0,
-            "123": 169.0,
-            "124": 191.0,
-            "125": 214.0,
-            "126": 160.0,
-            "127": 192.0,
-            "128": 160.0,
-            "129": 180.0,
-            "130": 214.0,
-            "131": 219.0,
-            "132": 173.0,
-            "133": 166.0,
-            "134": 171.0,
-            "135": 182.0,
-            "136": 172.0,
-            "137": 176.0,
-            "138": 174.0,
-            "139": 161.0,
-            "140": 178.0,
-            "141": 164.0,
-            "142": 159.0,
-            "143": 192.0,
-            "144": 157.0,
-            "145": 144.0,
-            "146": 149.0,
-            "147": 148.0,
-            "148": 169.0,
-            "149": 143.0,
-            "150": 111.0,
-            "151": 159.0,
-            "152": 115.0,
-            "153": 147.0,
-            "154": 162.0,
-            "155": 185.0,
-            "156": 144.0,
-            "157": 147.0,
-            "158": 130.0,
-            "159": 165.0,
-            "160": 190.0,
-            "161": 141.0,
-            "162": 155.0,
-            "163": 140.0,
-            "164": 174.0,
-            "165": 168.0,
-            "166": 179.0,
-            "167": 147.0,
-            "168": 138.0,
-            "169": 161.0,
-            "170": 159.0,
-            "171": 125.0,
-            "172": 193.0,
-            "173": 172.0,
-            "174": 190.0,
-            "175": 192.0,
-            "176": 146.0,
-            "177": 168.0,
-            "178": 172.0,
-            "179": 177.0,
-            "180": 148.0,
-            "181": 161.0,
-            "182": 213.0,
-            "183": 215.0,
-            "184": 201.0,
-            "185": 154.0,
-            "186": 207.0,
-            "187": 175.0,
-            "188": 183.0,
-            "189": 169.0,
-            "190": 167.0,
-            "191": 163.0,
-            "192": 193.0,
-            "193": 169.0,
-            "194": 161.0,
-            "195": 141.0,
-            "196": 174.0,
-            "197": 188.0,
-            "198": 168.0,
-            "199": 150.0,
-            "200": 187.0,
-            "201": 173.0,
-            "202": 183.0,
-            "203": 142.0,
-            "204": 177.0,
-            "205": 153.0,
-            "206": 198.0,
-            "207": 168.0,
-            "208": 140.0,
-            "209": 179.0,
-            "210": 175.0,
-            "211": 167.0,
-            "212": 194.0,
-            "213": 192.0,
-            "214": 174.0,
-            "215": 188.0,
-            "216": 164.0,
-            "217": 170.0,
-            "218": 171.0,
-            "219": 211.0,
-            "220": 195.0,
-            "221": 181.0,
-            "222": 154.0,
-            "223": 176.0,
-            "224": 173.0,
-            "225": 166.0,
-            "226": 174.0,
-            "227": 211.0,
-            "228": 146.0,
-            "229": 193.0,
-            "230": 149.0,
-            "231": 177.0,
-            "232": 169.0,
-            "233": 193.0,
-            "234": 183.0,
-            "235": 215.0,
-            "236": 200.0,
-            "237": 218.0,
-            "238": 179.0,
-            "239": 139.0,
-            "240": 217.0,
-            "241": 174.0,
-            "242": 193.0,
-            "243": 192.0,
-            "244": 181.0,
-            "245": 206.0,
-            "246": 221.0,
-            "247": 219.0,
-            "248": 175.0,
-            "249": 189.0,
-            "250": 156.0,
-            "251": 205.0,
-            "252": 164.0,
-            "253": 172.0,
-            "254": 184.0,
-            "255": 218.0,
-            "256": 171.0,
-            "257": 208.0,
-            "258": 210.0,
-            "259": 174.0,
-            "260": 199.0,
-            "261": 178.0,
-            "262": 185.0,
-            "263": 181.0,
-            "264": 200.0,
-            "265": 171.0,
-            "266": 149.0,
-            "267": 141.0,
-            "268": 186.0,
-            "269": 198.0,
-            "270": 170.0,
-            "271": 168.0,
-            "272": 210.0,
-            "273": 151.0,
-            "274": 212.0,
-            "275": 182.0,
-            "276": 172.0,
-            "277": 159.0,
-            "278": 169.0,
-            "279": 185.0,
-            "280": 174.0,
-            "281": 160.0,
-            "282": 171.0,
-            "283": 174.0,
-            "284": 183.0,
-            "285": 169.0,
-            "286": 173.0,
-            "287": 203.0,
-            "288": 168.0,
-            "289": 202.0,
-            "290": 157.0,
-            "291": 241.0,
-            "292": 172.0,
-            "293": 209.0,
-            "294": 194.0,
-            "295": 207.0,
-            "296": 217.0,
-            "297": 160.0,
-            "298": 126.0,
-            "299": 170.0,
-            "300": 177.0,
-            "301": 189.0,
-            "302": 209.0,
-            "303": 170.0,
-            "304": 177.0,
-            "305": 148.0,
-            "306": 172.0,
-            "307": 213.0,
-            "308": 184.0,
-            "309": 193.0,
-            "310": 218.0,
-            "311": 159.0,
-            "312": 178.0,
-            "313": 177.0,
-            "314": 199.0,
-            "315": 165.0,
-            "316": 168.0,
-            "317": 185.0,
-            "318": 261.0,
-            "319": 181.0,
-            "320": 196.0,
-            "321": 200.0,
-            "322": 217.0,
-            "323": 198.0,
-            "324": 200.0,
-            "325": 184.0,
-            "326": 283.0,
-            "327": 211.0,
-            "328": 231.0,
-            "329": 189.0,
-            "330": 248.0,
-            "331": 205.0,
-            "332": 208.0,
-            "333": 199.0,
-            "334": 182.0,
-            "335": 202.0,
-            "336": 207.0,
-            "337": 216.0,
-            "338": 231.0,
-            "339": 213.0,
-            "340": 240.0,
-            "341": 207.0,
-            "342": 153.0,
-            "343": 264.0,
-            "344": 214.0,
-            "345": 202.0,
-            "346": 183.0,
-            "347": 194.0,
-            "348": 216.0,
-            "349": 206.0,
-            "350": 218.0,
-            "351": 218.0,
-            "352": 207.0,
-            "353": 225.0,
-            "354": 213.0,
-            "355": 201.0,
-            "356": 227.0,
-            "357": 217.0,
-            "358": 206.0,
-            "359": 186.0,
-            "360": 217.0,
-            "361": 187.0,
-            "362": 256.0,
-            "363": 226.0,
-            "364": 203.0,
-            "365": 200.0,
-            "366": 241.0,
-            "367": 205.0,
-            "368": 192.0,
-            "369": 160.0,
-            "370": 221.0,
-            "371": 212.0,
-            "372": 193.0,
-            "373": 218.0,
-            "374": 164.0,
-            "375": 249.0,
-            "376": 195.0,
-            "377": 197.0,
-            "378": 222.0,
-            "379": 254.0,
-            "380": 210.0,
-            "381": 199.0,
-            "382": 217.0,
-            "383": 208.0,
-            "384": 238.0,
-            "385": 183.0,
-            "386": 221.0,
-            "387": 185.0,
-            "388": 205.0,
-            "389": 185.0,
-            "390": 217.0,
-            "391": 241.0,
-            "392": 212.0,
-            "393": 247.0,
-            "394": 242.0,
-            "395": 247.0,
-            "396": 197.0,
-            "397": 202.0,
-            "398": 191.0,
-            "399": 231.0,
-            "400": 211.0,
-            "401": 200.0,
-            "402": 210.0,
-            "403": 261.0,
-            "404": 211.0,
-            "405": 171.0,
-            "406": 209.0,
-            "407": 200.0,
-            "408": 226.0,
-            "409": 200.0,
-            "410": 220.0,
-            "411": 196.0,
-            "412": 194.0,
-            "413": 168.0,
-            "414": 223.0,
-            "415": 204.0,
-            "416": 225.0,
-            "417": 213.0,
-            "418": 196.0,
-            "419": 203.0,
-            "420": 203.0,
-            "421": 217.0,
-            "422": 200.0,
-            "423": 213.0,
-            "424": 237.0,
-            "425": 239.0,
-            "426": 178.0,
-            "427": 213.0,
-            "428": 196.0,
-            "429": 174.0,
-            "430": 243.0,
-            "431": 169.0,
-            "432": 203.0,
-            "433": 211.0,
-            "434": 194.0,
-            "435": 188.0,
-            "436": 208.0,
-            "437": 170.0,
-            "438": 194.0,
-            "439": 156.0,
-            "440": 199.0,
-            "441": 190.0,
-            "442": 232.0,
-            "443": 225.0,
-            "444": 172.0,
-            "445": 194.0,
-            "446": 221.0,
-            "447": 209.0,
-            "448": 233.0,
-            "449": 257.0,
-            "450": 207.0,
-            "451": 199.0,
-            "452": 177.0,
-            "453": 200.0,
-            "454": 227.0,
-            "455": 263.0,
-            "456": 196.0,
-            "457": 204.0,
-            "458": 169.0,
-            "459": 131.0,
-            "460": 216.0,
-            "461": 223.0,
-            "462": 210.0,
-            "463": 203.0,
-            "464": 208.0,
-            "465": 187.0,
-            "466": 190.0,
-            "467": 192.0,
-            "468": 194.0,
-            "469": 188.0,
-            "470": 193.0,
-            "471": 221.0,
-            "472": 166.0,
-            "473": 191.0,
-            "474": 193.0,
-            "475": 196.0,
-            "476": 192.0,
-            "477": 168.0,
-            "478": 180.0,
-            "479": 176.0,
-            "480": 145.0,
-            "481": 197.0,
-            "482": 167.0,
-            "483": 198.0,
-            "484": 172.0,
-            "485": 175.0,
-            "486": 192.0,
-            "487": 143.0,
-            "488": 182.0,
-            "489": 172.0,
-            "490": 178.0,
-            "491": 175.0,
-            "492": 194.0,
-            "493": 211.0,
-            "494": 159.0,
-            "495": 165.0,
-            "496": 153.0,
-            "497": 145.0,
-            "498": 196.0,
-            "499": 195.0,
-            "500": 165.0,
-            "501": 183.0,
-            "502": 167.0,
-            "503": 175.0,
-            "504": 182.0,
-            "505": 212.0,
-            "506": 177.0,
-            "507": 159.0,
-            "508": 135.0,
-            "509": 195.0,
-            "510": 156.0,
-            "511": 186.0,
-            "512": 177.0,
-            "513": 186.0,
-            "514": 173.0,
-            "515": 190.0,
-            "516": 175.0,
-            "517": 143.0,
-            "518": 169.0,
-            "519": 186.0,
-            "520": 156.0,
-            "521": 146.0,
-            "522": 173.0,
-            "523": 175.0,
-            "524": 172.0,
-            "525": 202.0,
-            "526": 168.0,
-            "527": 178.0,
-            "528": 173.0,
-            "529": 183.0,
-            "530": 168.0,
-            "531": 161.0,
-            "532": 185.0,
-            "533": 172.0,
-            "534": 166.0,
-            "535": 140.0,
-            "536": 164.0,
-            "537": 150.0,
-            "538": 155.0,
-            "539": 125.0,
-            "540": 151.0,
-            "541": 130.0,
-            "542": 153.0,
-            "543": 149.0,
-            "544": 185.0,
-            "545": 132.0,
-            "546": 184.0,
-            "547": 150.0,
-            "548": 155.0,
-            "549": 162.0,
-            "550": 170.0,
-            "551": 144.0,
-            "552": 147.0,
-            "553": 213.0,
-            "554": 182.0,
-            "555": 150.0,
-            "556": 162.0,
-            "557": 154.0,
-            "558": 181.0,
-            "559": 144.0,
-            "560": 194.0,
-            "561": 174.0,
-            "562": 147.0,
-            "563": 125.0,
-            "564": 169.0,
-            "565": 143.0,
-            "566": 136.0,
-            "567": 144.0,
-            "568": 153.0,
-            "569": 167.0,
-            "570": 153.0,
-            "571": 131.0,
-            "572": 143.0,
-            "573": 128.0,
-            "574": 162.0,
-            "575": 133.0,
-            "576": 143.0,
-            "577": 171.0,
-            "578": 167.0,
-            "579": 140.0,
-            "580": 165.0,
-            "581": 164.0,
-            "582": 145.0,
-            "583": 151.0,
-            "584": 146.0,
-            "585": 148.0,
-            "586": 102.0,
-            "587": 147.0,
-            "588": 146.0,
-            "589": 123.0,
-            "590": 146.0,
-            "591": 149.0,
-            "592": 115.0,
-            "593": 166.0,
-            "594": 159.0,
-            "595": 127.0,
-            "596": 113.0,
-            "597": 135.0,
-            "598": 139.0,
-            "599": 157.0,
-            "600": 129.0,
-            "601": 144.0,
-            "602": 129.0,
-            "603": 125.0,
-            "604": 125.0,
-            "605": 139.0,
-            "606": 135.0,
-            "607": 144.0,
-            "608": 149.0,
-            "609": 139.0,
-            "610": 135.0,
-            "611": 148.0,
-            "612": 148.0,
-            "613": 115.0,
-            "614": 150.0,
-            "615": 132.0,
-            "616": 156.0,
-            "617": 120.0,
-            "618": 145.0,
-            "619": 136.0,
-            "620": 170.0,
-            "621": 147.0,
-            "622": 150.0,
-            "623": 119.0,
-            "624": 128.0,
-            "625": 141.0,
-            "626": 122.0,
-            "627": 121.0,
-            "628": 157.0,
-            "629": 126.0,
-            "630": 134.0,
-            "631": 147.0,
-            "632": 146.0,
-            "633": 131.0,
-            "634": 145.0,
-            "635": 174.0,
-            "636": 151.0,
-            "637": 169.0,
-            "638": 128.0,
-            "639": 164.0,
-            "640": 145.0,
-            "641": 136.0,
-            "642": 132.0,
-            "643": 134.0,
-            "644": 124.0,
-            "645": 145.0,
-            "646": 106.0,
-            "647": 123.0,
-            "648": 121.0,
-            "649": 134.0,
-            "650": 153.0,
-            "651": 117.0,
-            "652": 163.0,
-            "653": 155.0,
-            "654": 140.0,
-            "655": 154.0,
-            "656": 124.0,
-            "657": 116.0,
-            "658": 130.0,
-            "659": 114.0,
-            "660": 145.0,
-            "661": 121.0,
-            "662": 143.0,
-            "663": 124.0,
-            "664": 139.0,
-            "665": 138.0,
-            "666": 111.0,
-            "667": 127.0,
-            "668": 144.0,
-            "669": 116.0,
-            "670": 139.0,
-            "671": 132.0,
-            "672": 136.0,
-            "673": 139.0,
-            "674": 119.0,
-            "675": 165.0,
-            "676": 123.0,
-            "677": 127.0,
-            "678": 135.0,
-            "679": 83.0,
-            "680": 139.0,
-            "681": 120.0,
-            "682": 111.0,
-            "683": 119.0,
-            "684": 121.0,
-            "685": 145.0,
-            "686": 127.0,
-            "687": 145.0,
-            "688": 117.0,
-            "689": 119.0,
-            "690": 119.0,
-            "691": 124.0,
-            "692": 118.0,
-            "693": 112.0,
-            "694": 156.0,
-            "695": 114.0,
-            "696": 141.0,
-            "697": 123.0,
-            "698": 130.0,
-            "699": 147.0,
-            "700": 119.0,
-            "701": 139.0,
-            "702": 111.0,
-            "703": 113.0,
-            "704": 118.0,
-            "705": 115.0,
-            "706": 102.0,
-            "707": 121.0,
-            "708": 115.0,
-            "709": 116.0,
-            "710": 95.0,
-            "711": 101.0,
-            "712": 98.0,
-            "713": 117.0,
-            "714": 127.0,
-            "715": 135.0,
-            "716": 124.0,
-            "717": 88.0,
-            "718": 143.0,
-            "719": 114.0,
-            "720": 120.0,
-            "721": 106.0,
-            "722": 117.0,
-            "723": 101.0,
-            "724": 97.0,
-            "725": 106.0,
-            "726": 103.0,
-            "727": 95.0,
-            "728": 123.0,
-            "729": 104.0,
-            "730": 124.0,
-            "731": 111.0,
-            "732": 78.0,
-            "733": 96.0,
-            "734": 129.0,
-            "735": 142.0,
-            "736": 110.0,
-            "737": 132.0,
-            "738": 110.0,
-            "739": 136.0,
-            "740": 106.0,
-            "741": 102.0,
-            "742": 123.0,
-            "743": 133.0,
-            "744": 130.0,
-            "745": 109.0,
-            "746": 122.0,
-            "747": 125.0,
-            "748": 133.0,
-            "749": 114.0,
-            "750": 121.0,
-            "751": 113.0,
-            "752": 111.0,
-            "753": 96.0,
-            "754": 118.0,
-            "755": 87.0,
-            "756": 113.0,
-            "757": 91.0,
-            "758": 105.0,
-            "759": 99.0,
-            "760": 125.0,
-            "761": 106.0,
-            "762": 105.0,
-            "763": 101.0,
-            "764": 109.0,
-            "765": 118.0,
-            "766": 95.0,
-            "767": 133.0,
-            "768": 115.0,
-            "769": 122.0,
-            "770": 106.0,
-            "771": 123.0,
-            "772": 106.0,
-            "773": 136.0,
-            "774": 128.0,
-            "775": 116.0,
-            "776": 112.0,
-            "777": 95.0,
-            "778": 113.0,
-            "779": 119.0,
-            "780": 99.0,
-            "781": 107.0,
-            "782": 80.0,
-            "783": 108.0,
-            "784": 122.0,
-            "785": 111.0,
-            "786": 111.0,
-            "787": 115.0,
-            "788": 116.0,
-            "789": 108.0,
-            "790": 127.0,
-            "791": 83.0,
-            "792": 117.0,
-            "793": 102.0,
-            "794": 106.0,
-            "795": 123.0,
-            "796": 121.0,
-            "797": 124.0,
-            "798": 112.0,
-            "799": 136.0,
-            "800": 99.0,
-            "801": 117.0,
-            "802": 93.0,
-            "803": 166.0,
-            "804": 127.0,
-            "805": 124.0,
-            "806": 97.0,
-            "807": 134.0,
-            "808": 108.0,
-            "809": 121.0,
-            "810": 126.0,
-            "811": 107.0,
-            "812": 116.0,
-            "813": 126.0,
-            "814": 105.0,
-            "815": 98.0,
-            "816": 99.0,
-            "817": 97.0,
-            "818": 97.0,
-            "819": 109.0,
-            "820": 106.0,
-            "821": 88.0,
-            "822": 109.0,
-            "823": 108.0,
-            "824": 127.0,
-            "825": 108.0,
-            "826": 128.0,
-            "827": 134.0,
-            "828": 100.0,
-            "829": 125.0,
-            "830": 113.0,
-            "831": 114.0,
-            "832": 107.0,
-            "833": 113.0,
-            "834": 100.0,
-            "835": 98.0,
-            "836": 123.0,
-            "837": 95.0,
-            "838": 118.0,
-            "839": 96.0,
-            "840": 109.0,
-            "841": 98.0,
-            "842": 114.0,
-            "843": 113.0,
-            "844": 123.0,
-            "845": 108.0,
-            "846": 124.0,
-            "847": 112.0,
-            "848": 115.0,
-            "849": 118.0,
-            "850": 92.0,
-            "851": 145.0,
-            "852": 89.0,
-            "853": 106.0,
-            "854": 101.0,
-            "855": 113.0,
-            "856": 125.0,
-            "857": 105.0,
-            "858": 129.0,
-            "859": 107.0,
-            "860": 118.0,
-            "861": 85.0,
-            "862": 106.0,
-            "863": 95.0,
-            "864": 81.0,
-            "865": 104.0,
-            "866": 105.0,
-            "867": 104.0,
-            "868": 106.0,
-            "869": 109.0,
-            "870": 105.0,
-            "871": 122.0,
-            "872": 114.0,
-            "873": 100.0,
-            "874": 113.0,
-            "875": 108.0,
-            "876": 93.0,
-            "877": 130.0,
-            "878": 110.0,
-            "879": 122.0,
-            "880": 106.0,
-            "881": 103.0,
-            "882": 80.0,
-            "883": 107.0,
-            "884": 115.0,
-            "885": 113.0,
-            "886": 116.0,
-            "887": 131.0,
-            "888": 89.0,
-            "889": 120.0,
-            "890": 110.0,
-            "891": 103.0,
-            "892": 102.0,
-            "893": 106.0,
-            "894": 91.0,
-            "895": 118.0,
-            "896": 110.0,
-            "897": 103.0,
-            "898": 115.0,
-            "899": 119.0,
-            "900": 120.0,
-            "901": 99.0,
-            "902": 100.0,
-            "903": 102.0,
-            "904": 127.0,
-            "905": 105.0,
-            "906": 124.0,
-            "907": 104.0,
-            "908": 117.0,
-            "909": 124.0,
-            "910": 108.0,
-            "911": 102.0,
-            "912": 117.0,
-            "913": 122.0,
-            "914": 130.0,
-            "915": 98.0,
-            "916": 120.0,
-            "917": 113.0,
-            "918": 112.0,
-            "919": 85.0,
-            "920": 110.0,
-            "921": 108.0,
-            "922": 111.0,
-            "923": 116.0,
-            "924": 119.0,
-            "925": 105.0,
-            "926": 128.0,
-            "927": 120.0,
-            "928": 106.0,
-            "929": 94.0,
-            "930": 116.0,
-            "931": 102.0,
-            "932": 123.0,
-            "933": 114.0,
-            "934": 133.0,
-            "935": 86.0,
-            "936": 114.0,
-            "937": 96.0,
-            "938": 118.0,
-            "939": 111.0,
-            "940": 110.0,
-            "941": 102.0,
-            "942": 98.0,
-            "943": 119.0,
-            "944": 107.0,
-            "945": 106.0,
-            "946": 112.0,
-            "947": 93.0,
-            "948": 119.0,
-            "949": 116.0,
-            "950": 124.0,
-            "951": 112.0,
-            "952": 106.0,
-            "953": 97.0,
-            "954": 111.0,
-            "955": 112.0,
-            "956": 87.0,
-            "957": 117.0,
-            "958": 97.0,
-            "959": 91.0,
-            "960": 103.0,
-            "961": 102.0,
-            "962": 103.0,
-            "963": 127.0,
-            "964": 113.0,
-            "965": 120.0,
-            "966": 106.0,
-            "967": 104.0,
-            "968": 119.0,
-            "969": 89.0,
-            "970": 121.0,
-            "971": 115.0,
-            "972": 96.0,
-            "973": 90.0,
-            "974": 113.0,
-            "975": 109.0,
-            "976": 113.0,
-            "977": 85.0,
-            "978": 104.0,
-            "979": 109.0,
-            "980": 100.0,
-            "981": 94.0,
-            "982": 105.0,
-            "983": 84.0,
-            "984": 112.0,
-            "985": 108.0,
-            "986": 92.0,
-            "987": 88.0,
-            "988": 123.0,
-            "989": 106.0,
-            "990": 103.0,
-            "991": 128.0,
-            "992": 104.0,
-            "993": 109.0,
-            "994": 98.0,
-            "995": 104.0,
-            "996": 93.0,
-            "997": 128.0,
-            "998": 121.0,
-            "999": 89.0,
-            "1000": 118.0,
-            "1001": 104.0,
-            "1002": 96.0,
-            "1003": 107.0,
-            "1004": 88.0,
-            "1005": 103.0,
-            "1006": 105.0,
-            "1007": 102.0,
-            "1008": 83.0,
-            "1009": 117.0,
-            "1010": 104.0,
-            "1011": 127.0,
-            "1012": 117.0,
-            "1013": 106.0,
-            "1014": 111.0,
-            "1015": 110.0,
-            "1016": 91.0,
-            "1017": 76.0,
-            "1018": 115.0,
-            "1019": 123.0,
-            "1020": 111.0,
-            "1021": 106.0,
-            "1022": 108.0,
-            "1023": 137.0,
-            "1024": 122.0,
-            "1025": 104.0,
-            "1026": 109.0,
-            "1027": 92.0,
-            "1028": 96.0,
-            "1029": 116.0,
-            "1030": 96.0,
-            "1031": 122.0,
-            "1032": 103.0,
-            "1033": 108.0,
-            "1034": 111.0,
-            "1035": 86.0,
-            "1036": 74.0,
-            "1037": 123.0,
-            "1038": 85.0,
-            "1039": 128.0,
-            "1040": 95.0,
-            "1041": 116.0,
-            "1042": 107.0,
-            "1043": 96.0,
-            "1044": 116.0,
-            "1045": 115.0,
-            "1046": 92.0,
-            "1047": 106.0,
-            "1048": 88.0,
-            "1049": 121.0,
-            "1050": 117.0,
-            "1051": 105.0,
-            "1052": 96.0,
-            "1053": 98.0,
-            "1054": 85.0,
-            "1055": 110.0,
-            "1056": 91.0,
-            "1057": 109.0,
-            "1058": 95.0,
-            "1059": 106.0,
-            "1060": 109.0,
-            "1061": 97.0,
-            "1062": 105.0,
-            "1063": 91.0,
-            "1064": 103.0,
-            "1065": 108.0,
-            "1066": 112.0,
-            "1067": 108.0,
-            "1068": 108.0,
-            "1069": 123.0,
-            "1070": 100.0,
-            "1071": 95.0,
-            "1072": 111.0,
-            "1073": 118.0,
-            "1074": 101.0,
-            "1075": 95.0,
-            "1076": 111.0,
-            "1077": 89.0,
-            "1078": 94.0,
-            "1079": 113.0,
-            "1080": 82.0,
-            "1081": 114.0,
-            "1082": 87.0,
-            "1083": 116.0,
-            "1084": 105.0,
-            "1085": 97.0,
-            "1086": 119.0,
-            "1087": 86.0,
-            "1088": 93.0,
-            "1089": 114.0,
-            "1090": 87.0,
-            "1091": 109.0,
-            "1092": 90.0,
-            "1093": 109.0,
-            "1094": 101.0,
-            "1095": 90.0,
-            "1096": 106.0,
-            "1097": 100.0,
-            "1098": 105.0,
-            "1099": 96.0,
-            "1100": 92.0,
-            "1101": 108.0,
-            "1102": 94.0,
-            "1103": 86.0,
-            "1104": 103.0,
-            "1105": 109.0,
-            "1106": 87.0,
-            "1107": 87.0,
-            "1108": 96.0,
-            "1109": 102.0,
-            "1110": 89.0,
-            "1111": 76.0,
-            "1112": 110.0,
-            "1113": 104.0,
-            "1114": 89.0,
-            "1115": 114.0,
-            "1116": 97.0,
-            "1117": 108.0,
-            "1118": 107.0,
-            "1119": 118.0,
-            "1120": 112.0,
-            "1121": 96.0,
-            "1122": 103.0,
-            "1123": 112.0,
-            "1124": 98.0,
-            "1125": 97.0,
-            "1126": 121.0,
-            "1127": 80.0,
-            "1128": 91.0,
-            "1129": 106.0,
-            "1130": 96.0,
-            "1131": 82.0,
-            "1132": 103.0,
-            "1133": 86.0,
-            "1134": 92.0,
-            "1135": 98.0,
-            "1136": 90.0,
-            "1137": 120.0,
-            "1138": 102.0,
-            "1139": 109.0,
-            "1140": 88.0,
-            "1141": 90.0,
-            "1142": 95.0,
-            "1143": 88.0,
-            "1144": 77.0,
-            "1145": 92.0,
-            "1146": 85.0,
-            "1147": 108.0,
-            "1148": 77.0,
-            "1149": 93.0,
-            "1150": 101.0,
-            "1151": 116.0,
-            "1152": 72.0,
-            "1153": 90.0,
-            "1154": 103.0,
-            "1155": 106.0,
-            "1156": 91.0,
-            "1157": 100.0,
-            "1158": 101.0,
-            "1159": 111.0,
-            "1160": 114.0,
-            "1161": 90.0,
-            "1162": 92.0,
-            "1163": 90.0,
-            "1164": 96.0,
-            "1165": 100.0,
-            "1166": 114.0,
-            "1167": 82.0,
-            "1168": 96.0,
-            "1169": 77.0,
-            "1170": 91.0,
-            "1171": 94.0,
-            "1172": 99.0,
-            "1173": 124.0,
-            "1174": 106.0,
-            "1175": 97.0,
-            "1176": 102.0,
-            "1177": 78.0,
-            "1178": 108.0,
-            "1179": 103.0,
-            "1180": 84.0,
-            "1181": 76.0,
-            "1182": 115.0,
-            "1183": 104.0,
-            "1184": 122.0,
-            "1185": 104.0,
-            "1186": 104.0,
-            "1187": 91.0,
-            "1188": 112.0,
-            "1189": 101.0,
-            "1190": 106.0,
-            "1191": 97.0,
-            "1192": 90.0,
-            "1193": 105.0,
-            "1194": 99.0,
-            "1195": 118.0,
-            "1196": 120.0,
-            "1197": 93.0,
-            "1198": 101.0,
-            "1199": 103.0,
-            "1200": 90.0,
-            "1201": 108.0,
-            "1202": 120.0,
-            "1203": 90.0,
-            "1204": 98.0,
-            "1205": 113.0,
-            "1206": 102.0,
-            "1207": 116.0,
-            "1208": 104.0,
-            "1209": 85.0,
-            "1210": 101.0,
-            "1211": 87.0,
-            "1212": 100.0,
-            "1213": 109.0,
-            "1214": 92.0,
-            "1215": 103.0,
-            "1216": 117.0,
-            "1217": 102.0,
-            "1218": 135.0,
-            "1219": 95.0,
-            "1220": 122.0,
-            "1221": 121.0,
-            "1222": 109.0,
-            "1223": 103.0,
-            "1224": 93.0,
-            "1225": 107.0,
-            "1226": 82.0,
-            "1227": 108.0,
-            "1228": 106.0,
-            "1229": 87.0,
-            "1230": 97.0,
-            "1231": 109.0,
-            "1232": 95.0,
-            "1233": 99.0,
-            "1234": 107.0,
-            "1235": 105.0,
-            "1236": 101.0,
-            "1237": 110.0,
-            "1238": 102.0,
-            "1239": 118.0,
-            "1240": 114.0,
-            "1241": 119.0,
-            "1242": 90.0,
-            "1243": 104.0,
-            "1244": 102.0,
-            "1245": 105.0,
-            "1246": 104.0,
-            "1247": 121.0,
-            "1248": 104.0,
-            "1249": 129.0,
-            "1250": 111.0,
-            "1251": 91.0,
-            "1252": 120.0,
-            "1253": 121.0,
-            "1254": 110.0,
-            "1255": 113.0,
-            "1256": 97.0,
-            "1257": 114.0,
-            "1258": 110.0,
-            "1259": 106.0,
-            "1260": 93.0,
-            "1261": 104.0,
-            "1262": 109.0,
-            "1263": 104.0,
-            "1264": 101.0,
-            "1265": 85.0,
-            "1266": 106.0,
-            "1267": 104.0,
-            "1268": 90.0,
-            "1269": 102.0,
-            "1270": 106.0,
-            "1271": 107.0,
-            "1272": 79.0,
-            "1273": 85.0,
-            "1274": 99.0,
-            "1275": 127.0,
-            "1276": 89.0,
-            "1277": 144.0,
-            "1278": 109.0,
-            "1279": 110.0,
-            "1280": 123.0,
-            "1281": 98.0,
-            "1282": 94.0,
-            "1283": 110.0,
-            "1284": 88.0,
-            "1285": 112.0,
-            "1286": 106.0,
-            "1287": 86.0,
-            "1288": 100.0,
-            "1289": 118.0,
-            "1290": 109.0,
-            "1291": 82.0,
-            "1292": 106.0,
-            "1293": 97.0,
-            "1294": 96.0,
-            "1295": 91.0,
-            "1296": 110.0,
-            "1297": 120.0,
-            "1298": 105.0,
-            "1299": 114.0,
-            "1300": 113.0,
-            "1301": 106.0,
-            "1302": 112.0,
-            "1303": 102.0,
-            "1304": 94.0,
-            "1305": 109.0,
-            "1306": 83.0,
-            "1307": 97.0,
-            "1308": 120.0,
-            "1309": 126.0,
-            "1310": 103.0,
-            "1311": 126.0,
-            "1312": 100.0,
-            "1313": 101.0,
-            "1314": 107.0,
-            "1315": 117.0,
-            "1316": 101.0,
-            "1317": 107.0,
-            "1318": 103.0,
-            "1319": 98.0,
-            "1320": 103.0,
-            "1321": 112.0,
-            "1322": 86.0,
-            "1323": 117.0,
-            "1324": 94.0,
-            "1325": 94.0,
-            "1326": 139.0,
-            "1327": 82.0,
-            "1328": 124.0,
-            "1329": 103.0,
-            "1330": 91.0,
-            "1331": 94.0,
-            "1332": 106.0,
-            "1333": 86.0,
-            "1334": 86.0,
-            "1335": 96.0,
-            "1336": 113.0,
-            "1337": 114.0,
-            "1338": 126.0,
-            "1339": 104.0,
-            "1340": 101.0,
-            "1341": 83.0,
-            "1342": 106.0,
-            "1343": 122.0,
-            "1344": 99.0,
-            "1345": 93.0,
-            "1346": 110.0,
-            "1347": 105.0,
-            "1348": 104.0,
-            "1349": 103.0,
-            "1350": 111.0,
-            "1351": 121.0,
-            "1352": 106.0,
-            "1353": 108.0,
-            "1354": 108.0,
-            "1355": 92.0,
-            "1356": 89.0,
-            "1357": 103.0,
-            "1358": 120.0,
-            "1359": 110.0,
-            "1360": 125.0,
-            "1361": 116.0,
-            "1362": 133.0,
-            "1363": 103.0,
-            "1364": 109.0,
-            "1365": 101.0,
-            "1366": 100.0,
-            "1367": 93.0,
-            "1368": 108.0,
-            "1369": 127.0,
-            "1370": 99.0,
-            "1371": 121.0,
-            "1372": 116.0,
-            "1373": 110.0,
-            "1374": 94.0,
-            "1375": 107.0,
-            "1376": 104.0,
-            "1377": 115.0,
-            "1378": 100.0,
-            "1379": 106.0,
-            "1380": 88.0,
-            "1381": 103.0,
-            "1382": 101.0,
-            "1383": 118.0,
-            "1384": 120.0,
-            "1385": 117.0,
-            "1386": 123.0,
-            "1387": 93.0,
-            "1388": 86.0,
-            "1389": 119.0,
-            "1390": 116.0,
-            "1391": 103.0,
-            "1392": 84.0,
-            "1393": 100.0,
-            "1394": 112.0,
-            "1395": 77.0,
-            "1396": 101.0,
-            "1397": 124.0,
-            "1398": 104.0,
-            "1399": 120.0,
-            "1400": 103.0,
-            "1401": 100.0,
-            "1402": 105.0,
-            "1403": 82.0,
-            "1404": 104.0,
-            "1405": 93.0,
-            "1406": 102.0,
-            "1407": 118.0,
-            "1408": 100.0,
-            "1409": 114.0,
-            "1410": 85.0,
-            "1411": 101.0,
-            "1412": 99.0,
-            "1413": 117.0,
-            "1414": 116.0,
-            "1415": 115.0,
-            "1416": 90.0,
-            "1417": 99.0,
-            "1418": 97.0,
-            "1419": 96.0,
-            "1420": 119.0,
-            "1421": 108.0,
-            "1422": 113.0,
-            "1423": 91.0,
-            "1424": 123.0,
-            "1425": 101.0,
-            "1426": 110.0,
-            "1427": 107.0,
-            "1428": 116.0,
-            "1429": 128.0,
-            "1430": 87.0,
-            "1431": 96.0,
-            "1432": 113.0,
-            "1433": 92.0,
-            "1434": 101.0,
-            "1435": 101.0,
-            "1436": 111.0,
-            "1437": 122.0,
-            "1438": 105.0,
-            "1439": 99.0,
-            "1440": 101.0,
-            "1441": 104.0,
-            "1442": 89.0,
-            "1443": 109.0,
-            "1444": 86.0,
-            "1445": 100.0,
-            "1446": 87.0,
-            "1447": 105.0,
-            "1448": 102.0,
-            "1449": 88.0,
-            "1450": 100.0,
-            "1451": 94.0,
-            "1452": 95.0,
-            "1453": 116.0,
-            "1454": 98.0,
-            "1455": 92.0,
-            "1456": 91.0,
-            "1457": 132.0,
-            "1458": 121.0,
-            "1459": 109.0,
-            "1460": 111.0,
-            "1461": 111.0,
-            "1462": 89.0,
-            "1463": 99.0,
-            "1464": 108.0,
-            "1465": 97.0,
-            "1466": 87.0,
-            "1467": 99.0,
-            "1468": 127.0,
-            "1469": 88.0,
-            "1470": 103.0,
-            "1471": 101.0,
-            "1472": 106.0,
-            "1473": 120.0,
-            "1474": 96.0,
-            "1475": 123.0,
-            "1476": 85.0,
-            "1477": 122.0,
-            "1478": 107.0,
-            "1479": 113.0,
-            "1480": 109.0,
-            "1481": 107.0,
-            "1482": 118.0,
-            "1483": 86.0,
-            "1484": 98.0,
-            "1485": 91.0,
-            "1486": 96.0,
-            "1487": 119.0,
-            "1488": 106.0,
-            "1489": 93.0,
-            "1490": 113.0,
-            "1491": 107.0,
-            "1492": 100.0,
-            "1493": 123.0,
-            "1494": 105.0,
-            "1495": 121.0,
-            "1496": 105.0,
-            "1497": 99.0,
-            "1498": 112.0,
-            "1499": 106.0,
-            "1500": 104.0,
-            "1501": 129.0,
-            "1502": 109.0,
-            "1503": 91.0,
-            "1504": 111.0,
-            "1505": 97.0,
-            "1506": 116.0,
-            "1507": 122.0,
-            "1508": 103.0,
-            "1509": 141.0,
-            "1510": 86.0,
-            "1511": 120.0,
-            "1512": 120.0,
-            "1513": 128.0,
-            "1514": 100.0,
-            "1515": 108.0,
-            "1516": 99.0,
-            "1517": 109.0,
-            "1518": 106.0,
-            "1519": 88.0,
-            "1520": 89.0,
-            "1521": 101.0,
-            "1522": 112.0,
-            "1523": 88.0,
-            "1524": 113.0,
-            "1525": 94.0,
-            "1526": 110.0,
-            "1527": 112.0,
-            "1528": 84.0,
-            "1529": 91.0,
-            "1530": 114.0,
-            "1531": 113.0,
-            "1532": 119.0,
-            "1533": 95.0,
-            "1534": 112.0,
-            "1535": 112.0,
-            "1536": 109.0,
-            "1537": 97.0,
-            "1538": 111.0,
-            "1539": 115.0,
-            "1540": 114.0,
-            "1541": 88.0,
-            "1542": 126.0,
-            "1543": 97.0,
-            "1544": 84.0,
-            "1545": 105.0,
-            "1546": 82.0,
-            "1547": 93.0,
-            "1548": 90.0,
-            "1549": 99.0,
-            "1550": 93.0,
-            "1551": 98.0,
-            "1552": 86.0,
-            "1553": 120.0,
-            "1554": 109.0,
-            "1555": 111.0,
-            "1556": 98.0,
-            "1557": 90.0,
-            "1558": 120.0,
-            "1559": 84.0,
-            "1560": 107.0,
-            "1561": 103.0,
-            "1562": 121.0,
-            "1563": 116.0,
-            "1564": 113.0,
-            "1565": 114.0,
-            "1566": 113.0,
-            "1567": 102.0,
-            "1568": 91.0,
-            "1569": 122.0,
-            "1570": 95.0,
-            "1571": 115.0,
-            "1572": 102.0,
-            "1573": 100.0,
-            "1574": 121.0,
-            "1575": 108.0,
-            "1576": 88.0,
-            "1577": 116.0,
-            "1578": 101.0,
-            "1579": 98.0,
-            "1580": 114.0,
-            "1581": 102.0,
-            "1582": 108.0,
-            "1583": 115.0,
-            "1584": 70.0,
-            "1585": 112.0,
-            "1586": 120.0,
-            "1587": 101.0,
-            "1588": 118.0,
-            "1589": 99.0,
-            "1590": 103.0,
-            "1591": 108.0,
-            "1592": 106.0,
-            "1593": 121.0,
-            "1594": 110.0,
-            "1595": 103.0,
-            "1596": 117.0,
-            "1597": 115.0,
-            "1598": 105.0,
-            "1599": 76.0,
-            "1600": 90.0,
-            "1601": 108.0,
-            "1602": 105.0,
-            "1603": 122.0,
-            "1604": 113.0,
-            "1605": 122.0,
-            "1606": 117.0,
-            "1607": 92.0,
-            "1608": 118.0,
-            "1609": 115.0,
-            "1610": 103.0,
-            "1611": 117.0,
-            "1612": 106.0,
-            "1613": 106.0,
-            "1614": 104.0,
-            "1615": 114.0,
-            "1616": 88.0,
-            "1617": 97.0,
-            "1618": 111.0,
-            "1619": 107.0,
-            "1620": 112.0,
-            "1621": 91.0,
-            "1622": 130.0,
-            "1623": 109.0,
-            "1624": 102.0,
-            "1625": 121.0,
-            "1626": 100.0,
-            "1627": 119.0,
-            "1628": 99.0,
-            "1629": 119.0,
-            "1630": 117.0,
-            "1631": 105.0,
-            "1632": 116.0,
-            "1633": 112.0,
-            "1634": 120.0,
-            "1635": 99.0,
-            "1636": 105.0,
-            "1637": 94.0,
-            "1638": 107.0,
-            "1639": 97.0,
-            "1640": 106.0,
-            "1641": 120.0,
-            "1642": 101.0,
-            "1643": 135.0,
-            "1644": 117.0,
-            "1645": 110.0,
-            "1646": 106.0,
-            "1647": 127.0,
-            "1648": 82.0,
-            "1649": 114.0,
-            "1650": 121.0,
-            "1651": 107.0,
-            "1652": 100.0,
-            "1653": 108.0,
-            "1654": 114.0,
-            "1655": 92.0,
-            "1656": 80.0,
-            "1657": 110.0,
-            "1658": 114.0,
-            "1659": 105.0,
-            "1660": 104.0,
-            "1661": 102.0,
-            "1662": 124.0,
-            "1663": 96.0,
-            "1664": 127.0,
-            "1665": 89.0,
-            "1666": 115.0,
-            "1667": 114.0,
-            "1668": 122.0,
-            "1669": 94.0,
-            "1670": 114.0,
-            "1671": 102.0,
-            "1672": 99.0,
-            "1673": 109.0,
-            "1674": 117.0,
-            "1675": 105.0,
-            "1676": 116.0,
-            "1677": 101.0,
-            "1678": 110.0,
-            "1679": 112.0,
-            "1680": 96.0,
-            "1681": 93.0,
-            "1682": 97.0,
-            "1683": 106.0,
-            "1684": 103.0,
-            "1685": 101.0,
-            "1686": 109.0,
-            "1687": 104.0,
-            "1688": 127.0,
-            "1689": 88.0,
-            "1690": 98.0,
-            "1691": 90.0,
-            "1692": 107.0,
-            "1693": 111.0,
-            "1694": 125.0,
-            "1695": 129.0,
-            "1696": 112.0,
-            "1697": 126.0,
-            "1698": 104.0,
-            "1699": 124.0,
-            "1700": 112.0,
-            "1701": 120.0,
-            "1702": 89.0,
-            "1703": 103.0,
-            "1704": 103.0,
-            "1705": 111.0,
-            "1706": 124.0,
-            "1707": 93.0,
-            "1708": 96.0,
-            "1709": 116.0,
-            "1710": 133.0,
-            "1711": 107.0,
-            "1712": 100.0,
-            "1713": 91.0,
-            "1714": 122.0,
-            "1715": 108.0,
-            "1716": 110.0,
-            "1717": 121.0,
-            "1718": 101.0,
-            "1719": 110.0,
-            "1720": 121.0,
-            "1721": 109.0,
-            "1722": 96.0,
-            "1723": 125.0,
-            "1724": 118.0,
-            "1725": 122.0,
-            "1726": 113.0,
-            "1727": 99.0,
-            "1728": 98.0,
-            "1729": 115.0,
-            "1730": 106.0,
-            "1731": 96.0,
-            "1732": 95.0,
-            "1733": 115.0,
-            "1734": 106.0,
-            "1735": 102.0,
-            "1736": 104.0,
-            "1737": 122.0,
-            "1738": 94.0,
-            "1739": 92.0,
-            "1740": 105.0,
-            "1741": 113.0,
-            "1742": 129.0,
-            "1743": 113.0,
-            "1744": 110.0,
-            "1745": 113.0,
-            "1746": 127.0,
-            "1747": 108.0,
-            "1748": 120.0,
-            "1749": 115.0,
-            "1750": 104.0,
-            "1751": 114.0,
-            "1752": 122.0,
-            "1753": 113.0,
-            "1754": 123.0,
-            "1755": 114.0,
-            "1756": 115.0,
-            "1757": 126.0,
-            "1758": 105.0,
-            "1759": 109.0,
-            "1760": 136.0,
-            "1761": 111.0,
-            "1762": 104.0,
-            "1763": 104.0,
-            "1764": 105.0,
-            "1765": 133.0,
-            "1766": 118.0,
-            "1767": 108.0,
-            "1768": 114.0,
-            "1769": 105.0,
-            "1770": 98.0,
-            "1771": 112.0,
-            "1772": 92.0,
-            "1773": 77.0,
-            "1774": 130.0,
-            "1775": 104.0,
-            "1776": 85.0,
-            "1777": 106.0,
-            "1778": 84.0,
-            "1779": 111.0,
-            "1780": 109.0,
-            "1781": 124.0,
-            "1782": 109.0,
-            "1783": 128.0,
-            "1784": 117.0,
-            "1785": 118.0,
-            "1786": 111.0,
-            "1787": 112.0,
-            "1788": 104.0,
-            "1789": 135.0,
-            "1790": 105.0,
-            "1791": 115.0,
-            "1792": 130.0,
-            "1793": 119.0,
-            "1794": 128.0,
-            "1795": 110.0,
-            "1796": 130.0,
-            "1797": 97.0,
-            "1798": 139.0,
-            "1799": 104.0,
-            "1800": 103.0,
-            "1801": 94.0,
-            "1802": 134.0,
-            "1803": 117.0,
-            "1804": 139.0,
-            "1805": 124.0,
-            "1806": 127.0,
-            "1807": 128.0,
-            "1808": 99.0,
-            "1809": 92.0,
-            "1810": 116.0,
-            "1811": 104.0,
-            "1812": 103.0,
-            "1813": 122.0,
-            "1814": 129.0,
-            "1815": 94.0,
-            "1816": 104.0,
-            "1817": 98.0,
-            "1818": 128.0,
-            "1819": 112.0,
-            "1820": 99.0,
-            "1821": 126.0,
-            "1822": 83.0,
-            "1823": 117.0,
-            "1824": 96.0,
-            "1825": 95.0,
-            "1826": 127.0,
-            "1827": 124.0,
-            "1828": 120.0,
-            "1829": 110.0,
-            "1830": 123.0,
-            "1831": 110.0,
-            "1832": 92.0,
-            "1833": 100.0,
-            "1834": 113.0,
-            "1835": 120.0,
-            "1836": 113.0,
-            "1837": 114.0,
-            "1838": 99.0,
-            "1839": 123.0,
-            "1840": 109.0,
-            "1841": 95.0,
-            "1842": 101.0,
-            "1843": 122.0,
-            "1844": 113.0,
-            "1845": 127.0,
-            "1846": 100.0,
-            "1847": 117.0,
-            "1848": 133.0,
-            "1849": 87.0,
-            "1850": 103.0,
-            "1851": 89.0,
-            "1852": 99.0,
-            "1853": 93.0,
-            "1854": 99.0,
-            "1855": 107.0,
-            "1856": 111.0,
-            "1857": 121.0,
-            "1858": 92.0,
-            "1859": 105.0,
-            "1860": 115.0,
-            "1861": 92.0,
-            "1862": 91.0,
-            "1863": 112.0,
-            "1864": 109.0,
-            "1865": 125.0,
-            "1866": 124.0,
-            "1867": 110.0,
-            "1868": 113.0,
-            "1869": 119.0,
-            "1870": 137.0,
-            "1871": 126.0,
-            "1872": 95.0,
-            "1873": 119.0,
-            "1874": 105.0,
-            "1875": 128.0,
-            "1876": 104.0,
-            "1877": 120.0,
-            "1878": 95.0,
-            "1879": 99.0,
-            "1880": 123.0,
-            "1881": 99.0,
-            "1882": 97.0,
-            "1883": 101.0,
-            "1884": 115.0,
-            "1885": 106.0,
-            "1886": 123.0,
-            "1887": 121.0,
-            "1888": 121.0,
-            "1889": 114.0,
-            "1890": 100.0,
-            "1891": 110.0,
-            "1892": 107.0,
-            "1893": 113.0,
-            "1894": 134.0,
-            "1895": 114.0,
-            "1896": 111.0,
-            "1897": 122.0,
-            "1898": 108.0,
-            "1899": 94.0,
-            "1900": 123.0,
-            "1901": 125.0,
-            "1902": 115.0,
-            "1903": 112.0,
-            "1904": 113.0,
-            "1905": 109.0,
-            "1906": 115.0,
-            "1907": 95.0,
-            "1908": 113.0,
-            "1909": 79.0,
-            "1910": 97.0,
-            "1911": 135.0,
-            "1912": 122.0,
-            "1913": 105.0,
-            "1914": 112.0,
-            "1915": 129.0,
-            "1916": 117.0,
-            "1917": 115.0,
-            "1918": 113.0,
-            "1919": 117.0,
-            "1920": 122.0,
-            "1921": 105.0,
-            "1922": 86.0,
-            "1923": 113.0,
-            "1924": 111.0,
-            "1925": 110.0,
-            "1926": 112.0,
-            "1927": 103.0,
-            "1928": 108.0,
-            "1929": 113.0,
-            "1930": 121.0,
-            "1931": 111.0,
-            "1932": 106.0,
-            "1933": 114.0,
-            "1934": 117.0,
-            "1935": 93.0,
-            "1936": 109.0,
-            "1937": 121.0,
-            "1938": 108.0,
-            "1939": 132.0,
-            "1940": 127.0,
-            "1941": 126.0,
-            "1942": 101.0,
-            "1943": 120.0,
-            "1944": 87.0,
-            "1945": 114.0,
-            "1946": 105.0,
-            "1947": 109.0,
-            "1948": 109.0,
-            "1949": 106.0,
-            "1950": 111.0,
-            "1951": 120.0,
-            "1952": 104.0,
-            "1953": 113.0,
-            "1954": 116.0,
-            "1955": 131.0,
-            "1956": 91.0,
-            "1957": 118.0,
-            "1958": 139.0,
-            "1959": 114.0,
-            "1960": 96.0,
-            "1961": 109.0,
-            "1962": 113.0,
-            "1963": 125.0,
-            "1964": 112.0,
-            "1965": 108.0,
-            "1966": 130.0,
-            "1967": 120.0,
-            "1968": 110.0,
-            "1969": 96.0,
-            "1970": 110.0,
-            "1971": 121.0,
-            "1972": 104.0,
-            "1973": 103.0,
-            "1974": 110.0,
-            "1975": 101.0,
-            "1976": 144.0,
-            "1977": 122.0,
-            "1978": 118.0,
-            "1979": 121.0,
-            "1980": 115.0,
-            "1981": 114.0,
-            "1982": 136.0,
-            "1983": 123.0,
-            "1984": 112.0,
-            "1985": 116.0,
-            "1986": 104.0,
-            "1987": 133.0,
-            "1988": 107.0,
-            "1989": 100.0,
-            "1990": 112.0,
-            "1991": 119.0,
-            "1992": 103.0,
-            "1993": 133.0,
-            "1994": 123.0,
-            "1995": 118.0,
-            "1996": 109.0,
-            "1997": 119.0,
-            "1998": 107.0,
-            "1999": 119.0,
-            "2000": 134.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 2000,
-        "step_interval": 1,
-        "values": {
-            "1": 442899968.0,
-            "2": 442899968.0,
-            "3": 442899968.0,
-            "4": 442899968.0,
-            "5": 442899968.0,
-            "6": 442899968.0,
-            "7": 442899968.0,
-            "8": 442899968.0,
-            "9": 442899968.0,
-            "10": 442899968.0,
-            "11": 442899968.0,
-            "12": 442899968.0,
-            "13": 442899968.0,
-            "14": 442899968.0,
-            "15": 442899968.0,
-            "16": 442899968.0,
-            "17": 442899968.0,
-            "18": 442899968.0,
-            "19": 442899968.0,
-            "20": 442899968.0,
-            "21": 442899968.0,
-            "22": 442899968.0,
-            "23": 442899968.0,
-            "24": 442899968.0,
-            "25": 442899968.0,
-            "26": 442899968.0,
-            "27": 442899968.0,
-            "28": 442899968.0,
-            "29": 442899968.0,
-            "30": 442899968.0,
-            "31": 442899968.0,
-            "32": 442899968.0,
-            "33": 442899968.0,
-            "34": 442899968.0,
-            "35": 442899968.0,
-            "36": 442899968.0,
-            "37": 442899968.0,
-            "38": 442899968.0,
-            "39": 442899968.0,
-            "40": 442899968.0,
-            "41": 442899968.0,
-            "42": 442899968.0,
-            "43": 442899968.0,
-            "44": 442899968.0,
-            "45": 442899968.0,
-            "46": 442899968.0,
-            "47": 442899968.0,
-            "48": 442899968.0,
-            "49": 442899968.0,
-            "50": 442899968.0,
-            "51": 442899968.0,
-            "52": 442899968.0,
-            "53": 442899968.0,
-            "54": 442899968.0,
-            "55": 442899968.0,
-            "56": 442899968.0,
-            "57": 442899968.0,
-            "58": 442899968.0,
-            "59": 442899968.0,
-            "60": 442899968.0,
-            "61": 442899968.0,
-            "62": 442899968.0,
-            "63": 442899968.0,
-            "64": 442899968.0,
-            "65": 442899968.0,
-            "66": 442899968.0,
-            "67": 442899968.0,
-            "68": 442899968.0,
-            "69": 442899968.0,
-            "70": 442899968.0,
-            "71": 442899968.0,
-            "72": 442899968.0,
-            "73": 442899968.0,
-            "74": 442899968.0,
-            "75": 442899968.0,
-            "76": 442899968.0,
-            "77": 442899968.0,
-            "78": 442899968.0,
-            "79": 442899968.0,
-            "80": 442899968.0,
-            "81": 442899968.0,
-            "82": 442899968.0,
-            "83": 442899968.0,
-            "84": 442899968.0,
-            "85": 442899968.0,
-            "86": 442899968.0,
-            "87": 442899968.0,
-            "88": 442899968.0,
-            "89": 442899968.0,
-            "90": 442899968.0,
-            "91": 442899968.0,
-            "92": 442899968.0,
-            "93": 442899968.0,
-            "94": 442899968.0,
-            "95": 442899968.0,
-            "96": 442899968.0,
-            "97": 442899968.0,
-            "98": 442899968.0,
-            "99": 442899968.0,
-            "100": 442899968.0,
-            "101": 442899968.0,
-            "102": 442899968.0,
-            "103": 442899968.0,
-            "104": 442899968.0,
-            "105": 442899968.0,
-            "106": 442899968.0,
-            "107": 442899968.0,
-            "108": 442899968.0,
-            "109": 442899968.0,
-            "110": 442899968.0,
-            "111": 442899968.0,
-            "112": 442899968.0,
-            "113": 442899968.0,
-            "114": 442899968.0,
-            "115": 442899968.0,
-            "116": 442899968.0,
-            "117": 442899968.0,
-            "118": 442899968.0,
-            "119": 442899968.0,
-            "120": 442899968.0,
-            "121": 442899968.0,
-            "122": 442899968.0,
-            "123": 442899968.0,
-            "124": 442899968.0,
-            "125": 442899968.0,
-            "126": 442899968.0,
-            "127": 442899968.0,
-            "128": 442899968.0,
-            "129": 442899968.0,
-            "130": 442899968.0,
-            "131": 442899968.0,
-            "132": 442899968.0,
-            "133": 442899968.0,
-            "134": 442899968.0,
-            "135": 442899968.0,
-            "136": 442899968.0,
-            "137": 442899968.0,
-            "138": 442899968.0,
-            "139": 442899968.0,
-            "140": 442899968.0,
-            "141": 442899968.0,
-            "142": 442899968.0,
-            "143": 442899968.0,
-            "144": 442899968.0,
-            "145": 442899968.0,
-            "146": 442899968.0,
-            "147": 442899968.0,
-            "148": 442899968.0,
-            "149": 442899968.0,
-            "150": 442899968.0,
-            "151": 442899968.0,
-            "152": 442899968.0,
-            "153": 442899968.0,
-            "154": 442899968.0,
-            "155": 442899968.0,
-            "156": 442899968.0,
-            "157": 442899968.0,
-            "158": 442899968.0,
-            "159": 442899968.0,
-            "160": 442899968.0,
-            "161": 442899968.0,
-            "162": 442899968.0,
-            "163": 442899968.0,
-            "164": 442899968.0,
-            "165": 442899968.0,
-            "166": 442899968.0,
-            "167": 442899968.0,
-            "168": 442899968.0,
-            "169": 442899968.0,
-            "170": 442899968.0,
-            "171": 442899968.0,
-            "172": 442899968.0,
-            "173": 442899968.0,
-            "174": 442899968.0,
-            "175": 442899968.0,
-            "176": 442899968.0,
-            "177": 442899968.0,
-            "178": 442899968.0,
-            "179": 442899968.0,
-            "180": 442899968.0,
-            "181": 442899968.0,
-            "182": 442899968.0,
-            "183": 442899968.0,
-            "184": 442899968.0,
-            "185": 442899968.0,
-            "186": 442899968.0,
-            "187": 442899968.0,
-            "188": 442899968.0,
-            "189": 442899968.0,
-            "190": 442899968.0,
-            "191": 442899968.0,
-            "192": 442899968.0,
-            "193": 442899968.0,
-            "194": 442899968.0,
-            "195": 442899968.0,
-            "196": 442899968.0,
-            "197": 442899968.0,
-            "198": 442899968.0,
-            "199": 442899968.0,
-            "200": 442899968.0,
-            "201": 442899968.0,
-            "202": 442899968.0,
-            "203": 442899968.0,
-            "204": 442899968.0,
-            "205": 442899968.0,
-            "206": 442899968.0,
-            "207": 442899968.0,
-            "208": 442899968.0,
-            "209": 442899968.0,
-            "210": 442899968.0,
-            "211": 442899968.0,
-            "212": 442899968.0,
-            "213": 442899968.0,
-            "214": 442899968.0,
-            "215": 442899968.0,
-            "216": 442899968.0,
-            "217": 442899968.0,
-            "218": 442899968.0,
-            "219": 442899968.0,
-            "220": 442899968.0,
-            "221": 442899968.0,
-            "222": 442899968.0,
-            "223": 442899968.0,
-            "224": 442899968.0,
-            "225": 442899968.0,
-            "226": 442899968.0,
-            "227": 442899968.0,
-            "228": 442899968.0,
-            "229": 442899968.0,
-            "230": 442899968.0,
-            "231": 442899968.0,
-            "232": 442899968.0,
-            "233": 442899968.0,
-            "234": 442899968.0,
-            "235": 442899968.0,
-            "236": 442899968.0,
-            "237": 442899968.0,
-            "238": 442899968.0,
-            "239": 442899968.0,
-            "240": 442899968.0,
-            "241": 442899968.0,
-            "242": 442899968.0,
-            "243": 442899968.0,
-            "244": 442899968.0,
-            "245": 442899968.0,
-            "246": 442899968.0,
-            "247": 442899968.0,
-            "248": 442899968.0,
-            "249": 442899968.0,
-            "250": 442899968.0,
-            "251": 442899968.0,
-            "252": 442899968.0,
-            "253": 442899968.0,
-            "254": 442899968.0,
-            "255": 442899968.0,
-            "256": 442899968.0,
-            "257": 442899968.0,
-            "258": 442899968.0,
-            "259": 442899968.0,
-            "260": 442899968.0,
-            "261": 442899968.0,
-            "262": 442899968.0,
-            "263": 442899968.0,
-            "264": 442899968.0,
-            "265": 442899968.0,
-            "266": 442899968.0,
-            "267": 442899968.0,
-            "268": 442899968.0,
-            "269": 442899968.0,
-            "270": 442899968.0,
-            "271": 442899968.0,
-            "272": 442899968.0,
-            "273": 442899968.0,
-            "274": 442899968.0,
-            "275": 442899968.0,
-            "276": 442899968.0,
-            "277": 442899968.0,
-            "278": 442899968.0,
-            "279": 442899968.0,
-            "280": 442899968.0,
-            "281": 442899968.0,
-            "282": 442899968.0,
-            "283": 442899968.0,
-            "284": 442899968.0,
-            "285": 442899968.0,
-            "286": 442899968.0,
-            "287": 442899968.0,
-            "288": 442899968.0,
-            "289": 442899968.0,
-            "290": 442899968.0,
-            "291": 442899968.0,
-            "292": 442899968.0,
-            "293": 442899968.0,
-            "294": 442899968.0,
-            "295": 442899968.0,
-            "296": 442899968.0,
-            "297": 442899968.0,
-            "298": 442899968.0,
-            "299": 442899968.0,
-            "300": 442899968.0,
-            "301": 442899968.0,
-            "302": 442899968.0,
-            "303": 442899968.0,
-            "304": 442899968.0,
-            "305": 442899968.0,
-            "306": 442899968.0,
-            "307": 442899968.0,
-            "308": 442899968.0,
-            "309": 442899968.0,
-            "310": 442899968.0,
-            "311": 442899968.0,
-            "312": 442899968.0,
-            "313": 442899968.0,
-            "314": 442899968.0,
-            "315": 442899968.0,
-            "316": 442899968.0,
-            "317": 442899968.0,
-            "318": 442899968.0,
-            "319": 442899968.0,
-            "320": 442899968.0,
-            "321": 442899968.0,
-            "322": 442899968.0,
-            "323": 442899968.0,
-            "324": 442899968.0,
-            "325": 442899968.0,
-            "326": 442899968.0,
-            "327": 442899968.0,
-            "328": 442899968.0,
-            "329": 442899968.0,
-            "330": 442899968.0,
-            "331": 442899968.0,
-            "332": 442899968.0,
-            "333": 442899968.0,
-            "334": 442899968.0,
-            "335": 442899968.0,
-            "336": 442899968.0,
-            "337": 442899968.0,
-            "338": 442899968.0,
-            "339": 442899968.0,
-            "340": 442899968.0,
-            "341": 442899968.0,
-            "342": 442899968.0,
-            "343": 442899968.0,
-            "344": 442899968.0,
-            "345": 442899968.0,
-            "346": 442899968.0,
-            "347": 442899968.0,
-            "348": 442899968.0,
-            "349": 442899968.0,
-            "350": 442899968.0,
-            "351": 442899968.0,
-            "352": 442899968.0,
-            "353": 442899968.0,
-            "354": 442899968.0,
-            "355": 442899968.0,
-            "356": 442899968.0,
-            "357": 442899968.0,
-            "358": 442899968.0,
-            "359": 442899968.0,
-            "360": 442899968.0,
-            "361": 442899968.0,
-            "362": 442899968.0,
-            "363": 442899968.0,
-            "364": 442899968.0,
-            "365": 442899968.0,
-            "366": 442899968.0,
-            "367": 442899968.0,
-            "368": 442899968.0,
-            "369": 442899968.0,
-            "370": 442899968.0,
-            "371": 442899968.0,
-            "372": 442899968.0,
-            "373": 442899968.0,
-            "374": 442899968.0,
-            "375": 442899968.0,
-            "376": 442899968.0,
-            "377": 442899968.0,
-            "378": 442899968.0,
-            "379": 442899968.0,
-            "380": 442899968.0,
-            "381": 442899968.0,
-            "382": 442899968.0,
-            "383": 442899968.0,
-            "384": 442899968.0,
-            "385": 442899968.0,
-            "386": 442899968.0,
-            "387": 442899968.0,
-            "388": 442899968.0,
-            "389": 442899968.0,
-            "390": 442899968.0,
-            "391": 442899968.0,
-            "392": 442899968.0,
-            "393": 442899968.0,
-            "394": 442899968.0,
-            "395": 442899968.0,
-            "396": 442899968.0,
-            "397": 442899968.0,
-            "398": 442899968.0,
-            "399": 442899968.0,
-            "400": 442899968.0,
-            "401": 442899968.0,
-            "402": 442899968.0,
-            "403": 442899968.0,
-            "404": 442899968.0,
-            "405": 442899968.0,
-            "406": 442899968.0,
-            "407": 442899968.0,
-            "408": 442899968.0,
-            "409": 442899968.0,
-            "410": 442899968.0,
-            "411": 442899968.0,
-            "412": 442899968.0,
-            "413": 442899968.0,
-            "414": 442899968.0,
-            "415": 442899968.0,
-            "416": 442899968.0,
-            "417": 442899968.0,
-            "418": 442899968.0,
-            "419": 442899968.0,
-            "420": 442899968.0,
-            "421": 442899968.0,
-            "422": 442899968.0,
-            "423": 442899968.0,
-            "424": 442899968.0,
-            "425": 442899968.0,
-            "426": 442899968.0,
-            "427": 442899968.0,
-            "428": 442899968.0,
-            "429": 442899968.0,
-            "430": 442899968.0,
-            "431": 442899968.0,
-            "432": 442899968.0,
-            "433": 442899968.0,
-            "434": 442899968.0,
-            "435": 442899968.0,
-            "436": 442899968.0,
-            "437": 442899968.0,
-            "438": 442899968.0,
-            "439": 442899968.0,
-            "440": 442899968.0,
-            "441": 442899968.0,
-            "442": 442899968.0,
-            "443": 442899968.0,
-            "444": 442899968.0,
-            "445": 442899968.0,
-            "446": 442899968.0,
-            "447": 442899968.0,
-            "448": 442899968.0,
-            "449": 442899968.0,
-            "450": 442899968.0,
-            "451": 442899968.0,
-            "452": 442899968.0,
-            "453": 442899968.0,
-            "454": 442899968.0,
-            "455": 442899968.0,
-            "456": 442899968.0,
-            "457": 442899968.0,
-            "458": 442899968.0,
-            "459": 442899968.0,
-            "460": 442899968.0,
-            "461": 442899968.0,
-            "462": 442899968.0,
-            "463": 442899968.0,
-            "464": 442899968.0,
-            "465": 442899968.0,
-            "466": 442899968.0,
-            "467": 442899968.0,
-            "468": 442899968.0,
-            "469": 442899968.0,
-            "470": 442899968.0,
-            "471": 442899968.0,
-            "472": 442899968.0,
-            "473": 442899968.0,
-            "474": 442899968.0,
-            "475": 442899968.0,
-            "476": 442899968.0,
-            "477": 442899968.0,
-            "478": 442899968.0,
-            "479": 442899968.0,
-            "480": 442899968.0,
-            "481": 442899968.0,
-            "482": 442899968.0,
-            "483": 442899968.0,
-            "484": 442899968.0,
-            "485": 442899968.0,
-            "486": 442899968.0,
-            "487": 442899968.0,
-            "488": 442899968.0,
-            "489": 442899968.0,
-            "490": 442899968.0,
-            "491": 442899968.0,
-            "492": 442899968.0,
-            "493": 442899968.0,
-            "494": 442899968.0,
-            "495": 442899968.0,
-            "496": 442899968.0,
-            "497": 442899968.0,
-            "498": 442899968.0,
-            "499": 442899968.0,
-            "500": 442899968.0,
-            "501": 442899968.0,
-            "502": 442899968.0,
-            "503": 442899968.0,
-            "504": 442899968.0,
-            "505": 442899968.0,
-            "506": 442899968.0,
-            "507": 442899968.0,
-            "508": 442899968.0,
-            "509": 442899968.0,
-            "510": 442899968.0,
-            "511": 442899968.0,
-            "512": 442899968.0,
-            "513": 442899968.0,
-            "514": 442899968.0,
-            "515": 442899968.0,
-            "516": 442899968.0,
-            "517": 442899968.0,
-            "518": 442899968.0,
-            "519": 442899968.0,
-            "520": 442899968.0,
-            "521": 442899968.0,
-            "522": 442899968.0,
-            "523": 442899968.0,
-            "524": 442899968.0,
-            "525": 442899968.0,
-            "526": 442899968.0,
-            "527": 442899968.0,
-            "528": 442899968.0,
-            "529": 442899968.0,
-            "530": 442899968.0,
-            "531": 442899968.0,
-            "532": 442899968.0,
-            "533": 442899968.0,
-            "534": 442899968.0,
-            "535": 442899968.0,
-            "536": 442899968.0,
-            "537": 442899968.0,
-            "538": 442899968.0,
-            "539": 442899968.0,
-            "540": 442899968.0,
-            "541": 442899968.0,
-            "542": 442899968.0,
-            "543": 442899968.0,
-            "544": 442899968.0,
-            "545": 442899968.0,
-            "546": 442899968.0,
-            "547": 442899968.0,
-            "548": 442899968.0,
-            "549": 442899968.0,
-            "550": 442899968.0,
-            "551": 442899968.0,
-            "552": 442899968.0,
-            "553": 442899968.0,
-            "554": 442899968.0,
-            "555": 442899968.0,
-            "556": 442899968.0,
-            "557": 442899968.0,
-            "558": 442899968.0,
-            "559": 442899968.0,
-            "560": 442899968.0,
-            "561": 442899968.0,
-            "562": 442899968.0,
-            "563": 442899968.0,
-            "564": 442899968.0,
-            "565": 442899968.0,
-            "566": 442899968.0,
-            "567": 442899968.0,
-            "568": 442899968.0,
-            "569": 442899968.0,
-            "570": 442899968.0,
-            "571": 442899968.0,
-            "572": 442899968.0,
-            "573": 442899968.0,
-            "574": 442899968.0,
-            "575": 442899968.0,
-            "576": 442899968.0,
-            "577": 442899968.0,
-            "578": 442899968.0,
-            "579": 442899968.0,
-            "580": 442899968.0,
-            "581": 442899968.0,
-            "582": 442899968.0,
-            "583": 442899968.0,
-            "584": 442899968.0,
-            "585": 442899968.0,
-            "586": 442899968.0,
-            "587": 442899968.0,
-            "588": 442899968.0,
-            "589": 442899968.0,
-            "590": 442899968.0,
-            "591": 442899968.0,
-            "592": 442899968.0,
-            "593": 442899968.0,
-            "594": 442899968.0,
-            "595": 442899968.0,
-            "596": 442899968.0,
-            "597": 442899968.0,
-            "598": 442899968.0,
-            "599": 442899968.0,
-            "600": 442899968.0,
-            "601": 442899968.0,
-            "602": 442899968.0,
-            "603": 442899968.0,
-            "604": 442899968.0,
-            "605": 442899968.0,
-            "606": 442899968.0,
-            "607": 442899968.0,
-            "608": 442899968.0,
-            "609": 442899968.0,
-            "610": 442899968.0,
-            "611": 442899968.0,
-            "612": 442899968.0,
-            "613": 442899968.0,
-            "614": 442899968.0,
-            "615": 442899968.0,
-            "616": 442899968.0,
-            "617": 442899968.0,
-            "618": 442899968.0,
-            "619": 442899968.0,
-            "620": 442899968.0,
-            "621": 442899968.0,
-            "622": 442899968.0,
-            "623": 442899968.0,
-            "624": 442899968.0,
-            "625": 442899968.0,
-            "626": 442899968.0,
-            "627": 442899968.0,
-            "628": 442899968.0,
-            "629": 442899968.0,
-            "630": 442899968.0,
-            "631": 442899968.0,
-            "632": 442899968.0,
-            "633": 442899968.0,
-            "634": 442899968.0,
-            "635": 442899968.0,
-            "636": 442899968.0,
-            "637": 442899968.0,
-            "638": 442899968.0,
-            "639": 442899968.0,
-            "640": 442899968.0,
-            "641": 442899968.0,
-            "642": 442899968.0,
-            "643": 442899968.0,
-            "644": 442899968.0,
-            "645": 442899968.0,
-            "646": 442899968.0,
-            "647": 442899968.0,
-            "648": 442899968.0,
-            "649": 442899968.0,
-            "650": 442899968.0,
-            "651": 442899968.0,
-            "652": 442899968.0,
-            "653": 442899968.0,
-            "654": 442899968.0,
-            "655": 442899968.0,
-            "656": 442899968.0,
-            "657": 442899968.0,
-            "658": 442899968.0,
-            "659": 442899968.0,
-            "660": 442899968.0,
-            "661": 442899968.0,
-            "662": 442899968.0,
-            "663": 442899968.0,
-            "664": 442899968.0,
-            "665": 442899968.0,
-            "666": 442899968.0,
-            "667": 442899968.0,
-            "668": 442899968.0,
-            "669": 442899968.0,
-            "670": 442899968.0,
-            "671": 442899968.0,
-            "672": 442899968.0,
-            "673": 442899968.0,
-            "674": 442899968.0,
-            "675": 442899968.0,
-            "676": 442899968.0,
-            "677": 442899968.0,
-            "678": 442899968.0,
-            "679": 442899968.0,
-            "680": 442899968.0,
-            "681": 442899968.0,
-            "682": 442899968.0,
-            "683": 442899968.0,
-            "684": 442899968.0,
-            "685": 442899968.0,
-            "686": 442899968.0,
-            "687": 442899968.0,
-            "688": 442899968.0,
-            "689": 442899968.0,
-            "690": 442899968.0,
-            "691": 442899968.0,
-            "692": 442899968.0,
-            "693": 442899968.0,
-            "694": 442899968.0,
-            "695": 442899968.0,
-            "696": 442899968.0,
-            "697": 442899968.0,
-            "698": 442899968.0,
-            "699": 442899968.0,
-            "700": 442899968.0,
-            "701": 442899968.0,
-            "702": 442899968.0,
-            "703": 442899968.0,
-            "704": 442899968.0,
-            "705": 442899968.0,
-            "706": 442899968.0,
-            "707": 442899968.0,
-            "708": 442899968.0,
-            "709": 442899968.0,
-            "710": 442899968.0,
-            "711": 442899968.0,
-            "712": 442899968.0,
-            "713": 442899968.0,
-            "714": 442899968.0,
-            "715": 442899968.0,
-            "716": 442899968.0,
-            "717": 442899968.0,
-            "718": 442899968.0,
-            "719": 442899968.0,
-            "720": 442899968.0,
-            "721": 442899968.0,
-            "722": 442899968.0,
-            "723": 442899968.0,
-            "724": 442899968.0,
-            "725": 442899968.0,
-            "726": 442899968.0,
-            "727": 442899968.0,
-            "728": 442899968.0,
-            "729": 442899968.0,
-            "730": 442899968.0,
-            "731": 442899968.0,
-            "732": 442899968.0,
-            "733": 442899968.0,
-            "734": 442899968.0,
-            "735": 442899968.0,
-            "736": 442899968.0,
-            "737": 442899968.0,
-            "738": 442899968.0,
-            "739": 442899968.0,
-            "740": 442899968.0,
-            "741": 442899968.0,
-            "742": 442899968.0,
-            "743": 442899968.0,
-            "744": 442899968.0,
-            "745": 442899968.0,
-            "746": 442899968.0,
-            "747": 442899968.0,
-            "748": 442899968.0,
-            "749": 442899968.0,
-            "750": 442899968.0,
-            "751": 442899968.0,
-            "752": 442899968.0,
-            "753": 442899968.0,
-            "754": 442899968.0,
-            "755": 442899968.0,
-            "756": 442899968.0,
-            "757": 442899968.0,
-            "758": 442899968.0,
-            "759": 442899968.0,
-            "760": 442899968.0,
-            "761": 442899968.0,
-            "762": 442899968.0,
-            "763": 442899968.0,
-            "764": 442899968.0,
-            "765": 442899968.0,
-            "766": 442899968.0,
-            "767": 442899968.0,
-            "768": 442899968.0,
-            "769": 442899968.0,
-            "770": 442899968.0,
-            "771": 442899968.0,
-            "772": 442899968.0,
-            "773": 442899968.0,
-            "774": 442899968.0,
-            "775": 442899968.0,
-            "776": 442899968.0,
-            "777": 442899968.0,
-            "778": 442899968.0,
-            "779": 442899968.0,
-            "780": 442899968.0,
-            "781": 442899968.0,
-            "782": 442899968.0,
-            "783": 442899968.0,
-            "784": 442899968.0,
-            "785": 442899968.0,
-            "786": 442899968.0,
-            "787": 442899968.0,
-            "788": 442899968.0,
-            "789": 442899968.0,
-            "790": 442899968.0,
-            "791": 442899968.0,
-            "792": 442899968.0,
-            "793": 442899968.0,
-            "794": 442899968.0,
-            "795": 442899968.0,
-            "796": 442899968.0,
-            "797": 442899968.0,
-            "798": 442899968.0,
-            "799": 442899968.0,
-            "800": 442899968.0,
-            "801": 442899968.0,
-            "802": 442899968.0,
-            "803": 442899968.0,
-            "804": 442899968.0,
-            "805": 442899968.0,
-            "806": 442899968.0,
-            "807": 442899968.0,
-            "808": 442899968.0,
-            "809": 442899968.0,
-            "810": 442899968.0,
-            "811": 442899968.0,
-            "812": 442899968.0,
-            "813": 442899968.0,
-            "814": 442899968.0,
-            "815": 442899968.0,
-            "816": 442899968.0,
-            "817": 442899968.0,
-            "818": 442899968.0,
-            "819": 442899968.0,
-            "820": 442899968.0,
-            "821": 442899968.0,
-            "822": 442899968.0,
-            "823": 442899968.0,
-            "824": 442899968.0,
-            "825": 442899968.0,
-            "826": 442899968.0,
-            "827": 442899968.0,
-            "828": 442899968.0,
-            "829": 442899968.0,
-            "830": 442899968.0,
-            "831": 442899968.0,
-            "832": 442899968.0,
-            "833": 442899968.0,
-            "834": 442899968.0,
-            "835": 442899968.0,
-            "836": 442899968.0,
-            "837": 442899968.0,
-            "838": 442899968.0,
-            "839": 442899968.0,
-            "840": 442899968.0,
-            "841": 442899968.0,
-            "842": 442899968.0,
-            "843": 442899968.0,
-            "844": 442899968.0,
-            "845": 442899968.0,
-            "846": 442899968.0,
-            "847": 442899968.0,
-            "848": 442899968.0,
-            "849": 442899968.0,
-            "850": 442899968.0,
-            "851": 442899968.0,
-            "852": 442899968.0,
-            "853": 442899968.0,
-            "854": 442899968.0,
-            "855": 442899968.0,
-            "856": 442899968.0,
-            "857": 442899968.0,
-            "858": 442899968.0,
-            "859": 442899968.0,
-            "860": 442899968.0,
-            "861": 442899968.0,
-            "862": 442899968.0,
-            "863": 442899968.0,
-            "864": 442899968.0,
-            "865": 442899968.0,
-            "866": 442899968.0,
-            "867": 442899968.0,
-            "868": 442899968.0,
-            "869": 442899968.0,
-            "870": 442899968.0,
-            "871": 442899968.0,
-            "872": 442899968.0,
-            "873": 442899968.0,
-            "874": 442899968.0,
-            "875": 442899968.0,
-            "876": 442899968.0,
-            "877": 442899968.0,
-            "878": 442899968.0,
-            "879": 442899968.0,
-            "880": 442899968.0,
-            "881": 442899968.0,
-            "882": 442899968.0,
-            "883": 442899968.0,
-            "884": 442899968.0,
-            "885": 442899968.0,
-            "886": 442899968.0,
-            "887": 442899968.0,
-            "888": 442899968.0,
-            "889": 442899968.0,
-            "890": 442899968.0,
-            "891": 442899968.0,
-            "892": 442899968.0,
-            "893": 442899968.0,
-            "894": 442899968.0,
-            "895": 442899968.0,
-            "896": 442899968.0,
-            "897": 442899968.0,
-            "898": 442899968.0,
-            "899": 442899968.0,
-            "900": 442899968.0,
-            "901": 442899968.0,
-            "902": 442899968.0,
-            "903": 442899968.0,
-            "904": 442899968.0,
-            "905": 442899968.0,
-            "906": 442899968.0,
-            "907": 442899968.0,
-            "908": 442899968.0,
-            "909": 442899968.0,
-            "910": 442899968.0,
-            "911": 442899968.0,
-            "912": 442899968.0,
-            "913": 442899968.0,
-            "914": 442899968.0,
-            "915": 442899968.0,
-            "916": 442899968.0,
-            "917": 442899968.0,
-            "918": 442899968.0,
-            "919": 442899968.0,
-            "920": 442899968.0,
-            "921": 442899968.0,
-            "922": 442899968.0,
-            "923": 442899968.0,
-            "924": 442899968.0,
-            "925": 442899968.0,
-            "926": 442899968.0,
-            "927": 442899968.0,
-            "928": 442899968.0,
-            "929": 442899968.0,
-            "930": 442899968.0,
-            "931": 442899968.0,
-            "932": 442899968.0,
-            "933": 442899968.0,
-            "934": 442899968.0,
-            "935": 442899968.0,
-            "936": 442899968.0,
-            "937": 442899968.0,
-            "938": 442899968.0,
-            "939": 442899968.0,
-            "940": 442899968.0,
-            "941": 442899968.0,
-            "942": 442899968.0,
-            "943": 442899968.0,
-            "944": 442899968.0,
-            "945": 442899968.0,
-            "946": 442899968.0,
-            "947": 442899968.0,
-            "948": 442899968.0,
-            "949": 442899968.0,
-            "950": 442899968.0,
-            "951": 442899968.0,
-            "952": 442899968.0,
-            "953": 442899968.0,
-            "954": 442899968.0,
-            "955": 442899968.0,
-            "956": 442899968.0,
-            "957": 442899968.0,
-            "958": 442899968.0,
-            "959": 442899968.0,
-            "960": 442899968.0,
-            "961": 442899968.0,
-            "962": 442899968.0,
-            "963": 442899968.0,
-            "964": 442899968.0,
-            "965": 442899968.0,
-            "966": 442899968.0,
-            "967": 442899968.0,
-            "968": 442899968.0,
-            "969": 442899968.0,
-            "970": 442899968.0,
-            "971": 442899968.0,
-            "972": 442899968.0,
-            "973": 442899968.0,
-            "974": 442899968.0,
-            "975": 442899968.0,
-            "976": 442899968.0,
-            "977": 442899968.0,
-            "978": 442899968.0,
-            "979": 442899968.0,
-            "980": 442899968.0,
-            "981": 442899968.0,
-            "982": 442899968.0,
-            "983": 442899968.0,
-            "984": 442899968.0,
-            "985": 442899968.0,
-            "986": 442899968.0,
-            "987": 442899968.0,
-            "988": 442899968.0,
-            "989": 442899968.0,
-            "990": 442899968.0,
-            "991": 442899968.0,
-            "992": 442899968.0,
-            "993": 442899968.0,
-            "994": 442899968.0,
-            "995": 442899968.0,
-            "996": 442899968.0,
-            "997": 442899968.0,
-            "998": 442899968.0,
-            "999": 442899968.0,
-            "1000": 442899968.0,
-            "1001": 442899968.0,
-            "1002": 442899968.0,
-            "1003": 442899968.0,
-            "1004": 442899968.0,
-            "1005": 442899968.0,
-            "1006": 442899968.0,
-            "1007": 442899968.0,
-            "1008": 442899968.0,
-            "1009": 442899968.0,
-            "1010": 442899968.0,
-            "1011": 442899968.0,
-            "1012": 442899968.0,
-            "1013": 442899968.0,
-            "1014": 442899968.0,
-            "1015": 442899968.0,
-            "1016": 442899968.0,
-            "1017": 442899968.0,
-            "1018": 442899968.0,
-            "1019": 442899968.0,
-            "1020": 442899968.0,
-            "1021": 442899968.0,
-            "1022": 442899968.0,
-            "1023": 442899968.0,
-            "1024": 442899968.0,
-            "1025": 442899968.0,
-            "1026": 442899968.0,
-            "1027": 442899968.0,
-            "1028": 442899968.0,
-            "1029": 442899968.0,
-            "1030": 442899968.0,
-            "1031": 442899968.0,
-            "1032": 442899968.0,
-            "1033": 442899968.0,
-            "1034": 442899968.0,
-            "1035": 442899968.0,
-            "1036": 442899968.0,
-            "1037": 442899968.0,
-            "1038": 442899968.0,
-            "1039": 442899968.0,
-            "1040": 442899968.0,
-            "1041": 442899968.0,
-            "1042": 442899968.0,
-            "1043": 442899968.0,
-            "1044": 442899968.0,
-            "1045": 442899968.0,
-            "1046": 442899968.0,
-            "1047": 442899968.0,
-            "1048": 442899968.0,
-            "1049": 442899968.0,
-            "1050": 442899968.0,
-            "1051": 442899968.0,
-            "1052": 442899968.0,
-            "1053": 442899968.0,
-            "1054": 442899968.0,
-            "1055": 442899968.0,
-            "1056": 442899968.0,
-            "1057": 442899968.0,
-            "1058": 442899968.0,
-            "1059": 442899968.0,
-            "1060": 442899968.0,
-            "1061": 442899968.0,
-            "1062": 442899968.0,
-            "1063": 442899968.0,
-            "1064": 442899968.0,
-            "1065": 442899968.0,
-            "1066": 442899968.0,
-            "1067": 442899968.0,
-            "1068": 442899968.0,
-            "1069": 442899968.0,
-            "1070": 442899968.0,
-            "1071": 442899968.0,
-            "1072": 442899968.0,
-            "1073": 442899968.0,
-            "1074": 442899968.0,
-            "1075": 442899968.0,
-            "1076": 442899968.0,
-            "1077": 442899968.0,
-            "1078": 442899968.0,
-            "1079": 442899968.0,
-            "1080": 442899968.0,
-            "1081": 442899968.0,
-            "1082": 442899968.0,
-            "1083": 442899968.0,
-            "1084": 442899968.0,
-            "1085": 442899968.0,
-            "1086": 442899968.0,
-            "1087": 442899968.0,
-            "1088": 442899968.0,
-            "1089": 442899968.0,
-            "1090": 442899968.0,
-            "1091": 442899968.0,
-            "1092": 442899968.0,
-            "1093": 442899968.0,
-            "1094": 442899968.0,
-            "1095": 442899968.0,
-            "1096": 442899968.0,
-            "1097": 442899968.0,
-            "1098": 442899968.0,
-            "1099": 442899968.0,
-            "1100": 442899968.0,
-            "1101": 442899968.0,
-            "1102": 442899968.0,
-            "1103": 442899968.0,
-            "1104": 442899968.0,
-            "1105": 442899968.0,
-            "1106": 442899968.0,
-            "1107": 442899968.0,
-            "1108": 442899968.0,
-            "1109": 442899968.0,
-            "1110": 442899968.0,
-            "1111": 442899968.0,
-            "1112": 442899968.0,
-            "1113": 442899968.0,
-            "1114": 442899968.0,
-            "1115": 442899968.0,
-            "1116": 442899968.0,
-            "1117": 442899968.0,
-            "1118": 442899968.0,
-            "1119": 442899968.0,
-            "1120": 442899968.0,
-            "1121": 442899968.0,
-            "1122": 442899968.0,
-            "1123": 442899968.0,
-            "1124": 442899968.0,
-            "1125": 442899968.0,
-            "1126": 442899968.0,
-            "1127": 442899968.0,
-            "1128": 442899968.0,
-            "1129": 442899968.0,
-            "1130": 442899968.0,
-            "1131": 442899968.0,
-            "1132": 442899968.0,
-            "1133": 442899968.0,
-            "1134": 442899968.0,
-            "1135": 442899968.0,
-            "1136": 442899968.0,
-            "1137": 442899968.0,
-            "1138": 442899968.0,
-            "1139": 442899968.0,
-            "1140": 442899968.0,
-            "1141": 442899968.0,
-            "1142": 442899968.0,
-            "1143": 442899968.0,
-            "1144": 442899968.0,
-            "1145": 442899968.0,
-            "1146": 442899968.0,
-            "1147": 442899968.0,
-            "1148": 442899968.0,
-            "1149": 442899968.0,
-            "1150": 442899968.0,
-            "1151": 442899968.0,
-            "1152": 442899968.0,
-            "1153": 442899968.0,
-            "1154": 442899968.0,
-            "1155": 442899968.0,
-            "1156": 442899968.0,
-            "1157": 442899968.0,
-            "1158": 442899968.0,
-            "1159": 442899968.0,
-            "1160": 442899968.0,
-            "1161": 442899968.0,
-            "1162": 442899968.0,
-            "1163": 442899968.0,
-            "1164": 442899968.0,
-            "1165": 442899968.0,
-            "1166": 442899968.0,
-            "1167": 442899968.0,
-            "1168": 442899968.0,
-            "1169": 442899968.0,
-            "1170": 442899968.0,
-            "1171": 442899968.0,
-            "1172": 442899968.0,
-            "1173": 442899968.0,
-            "1174": 442899968.0,
-            "1175": 442899968.0,
-            "1176": 442899968.0,
-            "1177": 442899968.0,
-            "1178": 442899968.0,
-            "1179": 442899968.0,
-            "1180": 442899968.0,
-            "1181": 442899968.0,
-            "1182": 442899968.0,
-            "1183": 442899968.0,
-            "1184": 442899968.0,
-            "1185": 442899968.0,
-            "1186": 442899968.0,
-            "1187": 442899968.0,
-            "1188": 442899968.0,
-            "1189": 442899968.0,
-            "1190": 442899968.0,
-            "1191": 442899968.0,
-            "1192": 442899968.0,
-            "1193": 442899968.0,
-            "1194": 442899968.0,
-            "1195": 442899968.0,
-            "1196": 442899968.0,
-            "1197": 442899968.0,
-            "1198": 442899968.0,
-            "1199": 442899968.0,
-            "1200": 442899968.0,
-            "1201": 442899968.0,
-            "1202": 442899968.0,
-            "1203": 442899968.0,
-            "1204": 442899968.0,
-            "1205": 442899968.0,
-            "1206": 442899968.0,
-            "1207": 442899968.0,
-            "1208": 442899968.0,
-            "1209": 442899968.0,
-            "1210": 442899968.0,
-            "1211": 442899968.0,
-            "1212": 442899968.0,
-            "1213": 442899968.0,
-            "1214": 442899968.0,
-            "1215": 442899968.0,
-            "1216": 442899968.0,
-            "1217": 442899968.0,
-            "1218": 442899968.0,
-            "1219": 442899968.0,
-            "1220": 442899968.0,
-            "1221": 442899968.0,
-            "1222": 442899968.0,
-            "1223": 442899968.0,
-            "1224": 442899968.0,
-            "1225": 442899968.0,
-            "1226": 442899968.0,
-            "1227": 442899968.0,
-            "1228": 442899968.0,
-            "1229": 442899968.0,
-            "1230": 442899968.0,
-            "1231": 442899968.0,
-            "1232": 442899968.0,
-            "1233": 442899968.0,
-            "1234": 442899968.0,
-            "1235": 442899968.0,
-            "1236": 442899968.0,
-            "1237": 442899968.0,
-            "1238": 442899968.0,
-            "1239": 442899968.0,
-            "1240": 442899968.0,
-            "1241": 442899968.0,
-            "1242": 442899968.0,
-            "1243": 442899968.0,
-            "1244": 442899968.0,
-            "1245": 442899968.0,
-            "1246": 442899968.0,
-            "1247": 442899968.0,
-            "1248": 442899968.0,
-            "1249": 442899968.0,
-            "1250": 442899968.0,
-            "1251": 442899968.0,
-            "1252": 442899968.0,
-            "1253": 442899968.0,
-            "1254": 442899968.0,
-            "1255": 442899968.0,
-            "1256": 442899968.0,
-            "1257": 442899968.0,
-            "1258": 442899968.0,
-            "1259": 442899968.0,
-            "1260": 442899968.0,
-            "1261": 442899968.0,
-            "1262": 442899968.0,
-            "1263": 442899968.0,
-            "1264": 442899968.0,
-            "1265": 442899968.0,
-            "1266": 442899968.0,
-            "1267": 442899968.0,
-            "1268": 442899968.0,
-            "1269": 442899968.0,
-            "1270": 442899968.0,
-            "1271": 442899968.0,
-            "1272": 442899968.0,
-            "1273": 442899968.0,
-            "1274": 442899968.0,
-            "1275": 442899968.0,
-            "1276": 442899968.0,
-            "1277": 442899968.0,
-            "1278": 442899968.0,
-            "1279": 442899968.0,
-            "1280": 442899968.0,
-            "1281": 442899968.0,
-            "1282": 442899968.0,
-            "1283": 442899968.0,
-            "1284": 442899968.0,
-            "1285": 442899968.0,
-            "1286": 442899968.0,
-            "1287": 442899968.0,
-            "1288": 442899968.0,
-            "1289": 442899968.0,
-            "1290": 442899968.0,
-            "1291": 442899968.0,
-            "1292": 442899968.0,
-            "1293": 442899968.0,
-            "1294": 442899968.0,
-            "1295": 442899968.0,
-            "1296": 442899968.0,
-            "1297": 442899968.0,
-            "1298": 442899968.0,
-            "1299": 442899968.0,
-            "1300": 442899968.0,
-            "1301": 442899968.0,
-            "1302": 442899968.0,
-            "1303": 442899968.0,
-            "1304": 442899968.0,
-            "1305": 442899968.0,
-            "1306": 442899968.0,
-            "1307": 442899968.0,
-            "1308": 442899968.0,
-            "1309": 442899968.0,
-            "1310": 442899968.0,
-            "1311": 442899968.0,
-            "1312": 442899968.0,
-            "1313": 442899968.0,
-            "1314": 442899968.0,
-            "1315": 442899968.0,
-            "1316": 442899968.0,
-            "1317": 442899968.0,
-            "1318": 442899968.0,
-            "1319": 442899968.0,
-            "1320": 442899968.0,
-            "1321": 442899968.0,
-            "1322": 442899968.0,
-            "1323": 442899968.0,
-            "1324": 442899968.0,
-            "1325": 442899968.0,
-            "1326": 442899968.0,
-            "1327": 442899968.0,
-            "1328": 442899968.0,
-            "1329": 442899968.0,
-            "1330": 442899968.0,
-            "1331": 442899968.0,
-            "1332": 442899968.0,
-            "1333": 442899968.0,
-            "1334": 442899968.0,
-            "1335": 442899968.0,
-            "1336": 442899968.0,
-            "1337": 442899968.0,
-            "1338": 442899968.0,
-            "1339": 442899968.0,
-            "1340": 442899968.0,
-            "1341": 442899968.0,
-            "1342": 442899968.0,
-            "1343": 442899968.0,
-            "1344": 442899968.0,
-            "1345": 442899968.0,
-            "1346": 442899968.0,
-            "1347": 442899968.0,
-            "1348": 442899968.0,
-            "1349": 442899968.0,
-            "1350": 442899968.0,
-            "1351": 442899968.0,
-            "1352": 442899968.0,
-            "1353": 442899968.0,
-            "1354": 442899968.0,
-            "1355": 442899968.0,
-            "1356": 442899968.0,
-            "1357": 442899968.0,
-            "1358": 442899968.0,
-            "1359": 442899968.0,
-            "1360": 442899968.0,
-            "1361": 442899968.0,
-            "1362": 442899968.0,
-            "1363": 442899968.0,
-            "1364": 442899968.0,
-            "1365": 442899968.0,
-            "1366": 442899968.0,
-            "1367": 442899968.0,
-            "1368": 442899968.0,
-            "1369": 442899968.0,
-            "1370": 442899968.0,
-            "1371": 442899968.0,
-            "1372": 442899968.0,
-            "1373": 442899968.0,
-            "1374": 442899968.0,
-            "1375": 442899968.0,
-            "1376": 442899968.0,
-            "1377": 442899968.0,
-            "1378": 442899968.0,
-            "1379": 442899968.0,
-            "1380": 442899968.0,
-            "1381": 442899968.0,
-            "1382": 442899968.0,
-            "1383": 442899968.0,
-            "1384": 442899968.0,
-            "1385": 442899968.0,
-            "1386": 442899968.0,
-            "1387": 442899968.0,
-            "1388": 442899968.0,
-            "1389": 442899968.0,
-            "1390": 442899968.0,
-            "1391": 442899968.0,
-            "1392": 442899968.0,
-            "1393": 442899968.0,
-            "1394": 442899968.0,
-            "1395": 442899968.0,
-            "1396": 442899968.0,
-            "1397": 442899968.0,
-            "1398": 442899968.0,
-            "1399": 442899968.0,
-            "1400": 442899968.0,
-            "1401": 442899968.0,
-            "1402": 442899968.0,
-            "1403": 442899968.0,
-            "1404": 442899968.0,
-            "1405": 442899968.0,
-            "1406": 442899968.0,
-            "1407": 442899968.0,
-            "1408": 442899968.0,
-            "1409": 442899968.0,
-            "1410": 442899968.0,
-            "1411": 442899968.0,
-            "1412": 442899968.0,
-            "1413": 442899968.0,
-            "1414": 442899968.0,
-            "1415": 442899968.0,
-            "1416": 442899968.0,
-            "1417": 442899968.0,
-            "1418": 442899968.0,
-            "1419": 442899968.0,
-            "1420": 442899968.0,
-            "1421": 442899968.0,
-            "1422": 442899968.0,
-            "1423": 442899968.0,
-            "1424": 442899968.0,
-            "1425": 442899968.0,
-            "1426": 442899968.0,
-            "1427": 442899968.0,
-            "1428": 442899968.0,
-            "1429": 442899968.0,
-            "1430": 442899968.0,
-            "1431": 442899968.0,
-            "1432": 442899968.0,
-            "1433": 442899968.0,
-            "1434": 442899968.0,
-            "1435": 442899968.0,
-            "1436": 442899968.0,
-            "1437": 442899968.0,
-            "1438": 442899968.0,
-            "1439": 442899968.0,
-            "1440": 442899968.0,
-            "1441": 442899968.0,
-            "1442": 442899968.0,
-            "1443": 442899968.0,
-            "1444": 442899968.0,
-            "1445": 442899968.0,
-            "1446": 442899968.0,
-            "1447": 442899968.0,
-            "1448": 442899968.0,
-            "1449": 442899968.0,
-            "1450": 442899968.0,
-            "1451": 442899968.0,
-            "1452": 442899968.0,
-            "1453": 442899968.0,
-            "1454": 442899968.0,
-            "1455": 442899968.0,
-            "1456": 442899968.0,
-            "1457": 442899968.0,
-            "1458": 442899968.0,
-            "1459": 442899968.0,
-            "1460": 442899968.0,
-            "1461": 442899968.0,
-            "1462": 442899968.0,
-            "1463": 442899968.0,
-            "1464": 442899968.0,
-            "1465": 442899968.0,
-            "1466": 442899968.0,
-            "1467": 442899968.0,
-            "1468": 442899968.0,
-            "1469": 442899968.0,
-            "1470": 442899968.0,
-            "1471": 442899968.0,
-            "1472": 442899968.0,
-            "1473": 442899968.0,
-            "1474": 442899968.0,
-            "1475": 442899968.0,
-            "1476": 442899968.0,
-            "1477": 442899968.0,
-            "1478": 442899968.0,
-            "1479": 442899968.0,
-            "1480": 442899968.0,
-            "1481": 442899968.0,
-            "1482": 442899968.0,
-            "1483": 442899968.0,
-            "1484": 442899968.0,
-            "1485": 442899968.0,
-            "1486": 442899968.0,
-            "1487": 442899968.0,
-            "1488": 442899968.0,
-            "1489": 442899968.0,
-            "1490": 442899968.0,
-            "1491": 442899968.0,
-            "1492": 442899968.0,
-            "1493": 442899968.0,
-            "1494": 442899968.0,
-            "1495": 442899968.0,
-            "1496": 442899968.0,
-            "1497": 442899968.0,
-            "1498": 442899968.0,
-            "1499": 442899968.0,
-            "1500": 442899968.0,
-            "1501": 442899968.0,
-            "1502": 442899968.0,
-            "1503": 442899968.0,
-            "1504": 442899968.0,
-            "1505": 442899968.0,
-            "1506": 442899968.0,
-            "1507": 442899968.0,
-            "1508": 442899968.0,
-            "1509": 442899968.0,
-            "1510": 442899968.0,
-            "1511": 442899968.0,
-            "1512": 442899968.0,
-            "1513": 442899968.0,
-            "1514": 442899968.0,
-            "1515": 442899968.0,
-            "1516": 442899968.0,
-            "1517": 442899968.0,
-            "1518": 442899968.0,
-            "1519": 442899968.0,
-            "1520": 442899968.0,
-            "1521": 442899968.0,
-            "1522": 442899968.0,
-            "1523": 442899968.0,
-            "1524": 442899968.0,
-            "1525": 442899968.0,
-            "1526": 442899968.0,
-            "1527": 442899968.0,
-            "1528": 442899968.0,
-            "1529": 442899968.0,
-            "1530": 442899968.0,
-            "1531": 442899968.0,
-            "1532": 442899968.0,
-            "1533": 442899968.0,
-            "1534": 442899968.0,
-            "1535": 442899968.0,
-            "1536": 442899968.0,
-            "1537": 442899968.0,
-            "1538": 442899968.0,
-            "1539": 442899968.0,
-            "1540": 442899968.0,
-            "1541": 442899968.0,
-            "1542": 442899968.0,
-            "1543": 442899968.0,
-            "1544": 442899968.0,
-            "1545": 442899968.0,
-            "1546": 442899968.0,
-            "1547": 442899968.0,
-            "1548": 442899968.0,
-            "1549": 442899968.0,
-            "1550": 442899968.0,
-            "1551": 442899968.0,
-            "1552": 442899968.0,
-            "1553": 442899968.0,
-            "1554": 442899968.0,
-            "1555": 442899968.0,
-            "1556": 442899968.0,
-            "1557": 442899968.0,
-            "1558": 442899968.0,
-            "1559": 442899968.0,
-            "1560": 442899968.0,
-            "1561": 442899968.0,
-            "1562": 442899968.0,
-            "1563": 442899968.0,
-            "1564": 442899968.0,
-            "1565": 442899968.0,
-            "1566": 442899968.0,
-            "1567": 442899968.0,
-            "1568": 442899968.0,
-            "1569": 442899968.0,
-            "1570": 442899968.0,
-            "1571": 442899968.0,
-            "1572": 442899968.0,
-            "1573": 442899968.0,
-            "1574": 442899968.0,
-            "1575": 442899968.0,
-            "1576": 442899968.0,
-            "1577": 442899968.0,
-            "1578": 442899968.0,
-            "1579": 442899968.0,
-            "1580": 442899968.0,
-            "1581": 442899968.0,
-            "1582": 442899968.0,
-            "1583": 442899968.0,
-            "1584": 442899968.0,
-            "1585": 442899968.0,
-            "1586": 442899968.0,
-            "1587": 442899968.0,
-            "1588": 442899968.0,
-            "1589": 442899968.0,
-            "1590": 442899968.0,
-            "1591": 442899968.0,
-            "1592": 442899968.0,
-            "1593": 442899968.0,
-            "1594": 442899968.0,
-            "1595": 442899968.0,
-            "1596": 442899968.0,
-            "1597": 442899968.0,
-            "1598": 442899968.0,
-            "1599": 442899968.0,
-            "1600": 442899968.0,
-            "1601": 442899968.0,
-            "1602": 442899968.0,
-            "1603": 442899968.0,
-            "1604": 442899968.0,
-            "1605": 442899968.0,
-            "1606": 442899968.0,
-            "1607": 442899968.0,
-            "1608": 442899968.0,
-            "1609": 442899968.0,
-            "1610": 442899968.0,
-            "1611": 442899968.0,
-            "1612": 442899968.0,
-            "1613": 442899968.0,
-            "1614": 442899968.0,
-            "1615": 442899968.0,
-            "1616": 442899968.0,
-            "1617": 442899968.0,
-            "1618": 442899968.0,
-            "1619": 442899968.0,
-            "1620": 442899968.0,
-            "1621": 442899968.0,
-            "1622": 442899968.0,
-            "1623": 442899968.0,
-            "1624": 442899968.0,
-            "1625": 442899968.0,
-            "1626": 442899968.0,
-            "1627": 442899968.0,
-            "1628": 442899968.0,
-            "1629": 442899968.0,
-            "1630": 442899968.0,
-            "1631": 442899968.0,
-            "1632": 442899968.0,
-            "1633": 442899968.0,
-            "1634": 442899968.0,
-            "1635": 442899968.0,
-            "1636": 442899968.0,
-            "1637": 442899968.0,
-            "1638": 442899968.0,
-            "1639": 442899968.0,
-            "1640": 442899968.0,
-            "1641": 442899968.0,
-            "1642": 442899968.0,
-            "1643": 442899968.0,
-            "1644": 442899968.0,
-            "1645": 442899968.0,
-            "1646": 442899968.0,
-            "1647": 442899968.0,
-            "1648": 442899968.0,
-            "1649": 442899968.0,
-            "1650": 442899968.0,
-            "1651": 442899968.0,
-            "1652": 442899968.0,
-            "1653": 442899968.0,
-            "1654": 442899968.0,
-            "1655": 442899968.0,
-            "1656": 442899968.0,
-            "1657": 442899968.0,
-            "1658": 442899968.0,
-            "1659": 442899968.0,
-            "1660": 442899968.0,
-            "1661": 442899968.0,
-            "1662": 442899968.0,
-            "1663": 442899968.0,
-            "1664": 442899968.0,
-            "1665": 442899968.0,
-            "1666": 442899968.0,
-            "1667": 442899968.0,
-            "1668": 442899968.0,
-            "1669": 442899968.0,
-            "1670": 442899968.0,
-            "1671": 442899968.0,
-            "1672": 442899968.0,
-            "1673": 442899968.0,
-            "1674": 442899968.0,
-            "1675": 442899968.0,
-            "1676": 442899968.0,
-            "1677": 442899968.0,
-            "1678": 442899968.0,
-            "1679": 442899968.0,
-            "1680": 442899968.0,
-            "1681": 442899968.0,
-            "1682": 442899968.0,
-            "1683": 442899968.0,
-            "1684": 442899968.0,
-            "1685": 442899968.0,
-            "1686": 442899968.0,
-            "1687": 442899968.0,
-            "1688": 442899968.0,
-            "1689": 442899968.0,
-            "1690": 442899968.0,
-            "1691": 442899968.0,
-            "1692": 442899968.0,
-            "1693": 442899968.0,
-            "1694": 442899968.0,
-            "1695": 442899968.0,
-            "1696": 442899968.0,
-            "1697": 442899968.0,
-            "1698": 442899968.0,
-            "1699": 442899968.0,
-            "1700": 442899968.0,
-            "1701": 442899968.0,
-            "1702": 442899968.0,
-            "1703": 442899968.0,
-            "1704": 442899968.0,
-            "1705": 442899968.0,
-            "1706": 442899968.0,
-            "1707": 442899968.0,
-            "1708": 442899968.0,
-            "1709": 442899968.0,
-            "1710": 442899968.0,
-            "1711": 442899968.0,
-            "1712": 442899968.0,
-            "1713": 442899968.0,
-            "1714": 442899968.0,
-            "1715": 442899968.0,
-            "1716": 442899968.0,
-            "1717": 442899968.0,
-            "1718": 442899968.0,
-            "1719": 442899968.0,
-            "1720": 442899968.0,
-            "1721": 442899968.0,
-            "1722": 442899968.0,
-            "1723": 442899968.0,
-            "1724": 442899968.0,
-            "1725": 442899968.0,
-            "1726": 442899968.0,
-            "1727": 442899968.0,
-            "1728": 442899968.0,
-            "1729": 442899968.0,
-            "1730": 442899968.0,
-            "1731": 442899968.0,
-            "1732": 442899968.0,
-            "1733": 442899968.0,
-            "1734": 442899968.0,
-            "1735": 442899968.0,
-            "1736": 442899968.0,
-            "1737": 442899968.0,
-            "1738": 442899968.0,
-            "1739": 442899968.0,
-            "1740": 442899968.0,
-            "1741": 442899968.0,
-            "1742": 442899968.0,
-            "1743": 442899968.0,
-            "1744": 442899968.0,
-            "1745": 442899968.0,
-            "1746": 442899968.0,
-            "1747": 442899968.0,
-            "1748": 442899968.0,
-            "1749": 442899968.0,
-            "1750": 442899968.0,
-            "1751": 442899968.0,
-            "1752": 442899968.0,
-            "1753": 442899968.0,
-            "1754": 442899968.0,
-            "1755": 442899968.0,
-            "1756": 442899968.0,
-            "1757": 442899968.0,
-            "1758": 442899968.0,
-            "1759": 442899968.0,
-            "1760": 442899968.0,
-            "1761": 442899968.0,
-            "1762": 442899968.0,
-            "1763": 442899968.0,
-            "1764": 442899968.0,
-            "1765": 442899968.0,
-            "1766": 442899968.0,
-            "1767": 442899968.0,
-            "1768": 442899968.0,
-            "1769": 442899968.0,
-            "1770": 442899968.0,
-            "1771": 442899968.0,
-            "1772": 442899968.0,
-            "1773": 442899968.0,
-            "1774": 442899968.0,
-            "1775": 442899968.0,
-            "1776": 442899968.0,
-            "1777": 442899968.0,
-            "1778": 442899968.0,
-            "1779": 442899968.0,
-            "1780": 442899968.0,
-            "1781": 442899968.0,
-            "1782": 442899968.0,
-            "1783": 442899968.0,
-            "1784": 442899968.0,
-            "1785": 442899968.0,
-            "1786": 442899968.0,
-            "1787": 442899968.0,
-            "1788": 442899968.0,
-            "1789": 442899968.0,
-            "1790": 442899968.0,
-            "1791": 442899968.0,
-            "1792": 442899968.0,
-            "1793": 442899968.0,
-            "1794": 442899968.0,
-            "1795": 442899968.0,
-            "1796": 442899968.0,
-            "1797": 442899968.0,
-            "1798": 442899968.0,
-            "1799": 442899968.0,
-            "1800": 442899968.0,
-            "1801": 442899968.0,
-            "1802": 442899968.0,
-            "1803": 442899968.0,
-            "1804": 442899968.0,
-            "1805": 442899968.0,
-            "1806": 442899968.0,
-            "1807": 442899968.0,
-            "1808": 442899968.0,
-            "1809": 442899968.0,
-            "1810": 442899968.0,
-            "1811": 442899968.0,
-            "1812": 442899968.0,
-            "1813": 442899968.0,
-            "1814": 442899968.0,
-            "1815": 442899968.0,
-            "1816": 442899968.0,
-            "1817": 442899968.0,
-            "1818": 442899968.0,
-            "1819": 442899968.0,
-            "1820": 442899968.0,
-            "1821": 442899968.0,
-            "1822": 442899968.0,
-            "1823": 442899968.0,
-            "1824": 442899968.0,
-            "1825": 442899968.0,
-            "1826": 442899968.0,
-            "1827": 442899968.0,
-            "1828": 442899968.0,
-            "1829": 442899968.0,
-            "1830": 442899968.0,
-            "1831": 442899968.0,
-            "1832": 442899968.0,
-            "1833": 442899968.0,
-            "1834": 442899968.0,
-            "1835": 442899968.0,
-            "1836": 442899968.0,
-            "1837": 442899968.0,
-            "1838": 442899968.0,
-            "1839": 442899968.0,
-            "1840": 442899968.0,
-            "1841": 442899968.0,
-            "1842": 442899968.0,
-            "1843": 442899968.0,
-            "1844": 442899968.0,
-            "1845": 442899968.0,
-            "1846": 442899968.0,
-            "1847": 442899968.0,
-            "1848": 442899968.0,
-            "1849": 442899968.0,
-            "1850": 442899968.0,
-            "1851": 442899968.0,
-            "1852": 442899968.0,
-            "1853": 442899968.0,
-            "1854": 442899968.0,
-            "1855": 442899968.0,
-            "1856": 442899968.0,
-            "1857": 442899968.0,
-            "1858": 442899968.0,
-            "1859": 442899968.0,
-            "1860": 442899968.0,
-            "1861": 442899968.0,
-            "1862": 442899968.0,
-            "1863": 442899968.0,
-            "1864": 442899968.0,
-            "1865": 442899968.0,
-            "1866": 442899968.0,
-            "1867": 442899968.0,
-            "1868": 442899968.0,
-            "1869": 442899968.0,
-            "1870": 442899968.0,
-            "1871": 442899968.0,
-            "1872": 442899968.0,
-            "1873": 442899968.0,
-            "1874": 442899968.0,
-            "1875": 442899968.0,
-            "1876": 442899968.0,
-            "1877": 442899968.0,
-            "1878": 442899968.0,
-            "1879": 442899968.0,
-            "1880": 442899968.0,
-            "1881": 442899968.0,
-            "1882": 442899968.0,
-            "1883": 442899968.0,
-            "1884": 442899968.0,
-            "1885": 442899968.0,
-            "1886": 442899968.0,
-            "1887": 442899968.0,
-            "1888": 442899968.0,
-            "1889": 442899968.0,
-            "1890": 442899968.0,
-            "1891": 442899968.0,
-            "1892": 442899968.0,
-            "1893": 442899968.0,
-            "1894": 442899968.0,
-            "1895": 442899968.0,
-            "1896": 442899968.0,
-            "1897": 442899968.0,
-            "1898": 442899968.0,
-            "1899": 442899968.0,
-            "1900": 442899968.0,
-            "1901": 442899968.0,
-            "1902": 442899968.0,
-            "1903": 442899968.0,
-            "1904": 442899968.0,
-            "1905": 442899968.0,
-            "1906": 442899968.0,
-            "1907": 442899968.0,
-            "1908": 442899968.0,
-            "1909": 442899968.0,
-            "1910": 442899968.0,
-            "1911": 442899968.0,
-            "1912": 442899968.0,
-            "1913": 442899968.0,
-            "1914": 442899968.0,
-            "1915": 442899968.0,
-            "1916": 442899968.0,
-            "1917": 442899968.0,
-            "1918": 442899968.0,
-            "1919": 442899968.0,
-            "1920": 442899968.0,
-            "1921": 442899968.0,
-            "1922": 442899968.0,
-            "1923": 442899968.0,
-            "1924": 442899968.0,
-            "1925": 442899968.0,
-            "1926": 442899968.0,
-            "1927": 442899968.0,
-            "1928": 442899968.0,
-            "1929": 442899968.0,
-            "1930": 442899968.0,
-            "1931": 442899968.0,
-            "1932": 442899968.0,
-            "1933": 442899968.0,
-            "1934": 442899968.0,
-            "1935": 442899968.0,
-            "1936": 442899968.0,
-            "1937": 442899968.0,
-            "1938": 442899968.0,
-            "1939": 442899968.0,
-            "1940": 442899968.0,
-            "1941": 442899968.0,
-            "1942": 442899968.0,
-            "1943": 442899968.0,
-            "1944": 442899968.0,
-            "1945": 442899968.0,
-            "1946": 442899968.0,
-            "1947": 442899968.0,
-            "1948": 442899968.0,
-            "1949": 442899968.0,
-            "1950": 442899968.0,
-            "1951": 442899968.0,
-            "1952": 442899968.0,
-            "1953": 442899968.0,
-            "1954": 442899968.0,
-            "1955": 442899968.0,
-            "1956": 442899968.0,
-            "1957": 442899968.0,
-            "1958": 442899968.0,
-            "1959": 442899968.0,
-            "1960": 442899968.0,
-            "1961": 442899968.0,
-            "1962": 442899968.0,
-            "1963": 442899968.0,
-            "1964": 442899968.0,
-            "1965": 442899968.0,
-            "1966": 442899968.0,
-            "1967": 442899968.0,
-            "1968": 442899968.0,
-            "1969": 442899968.0,
-            "1970": 442899968.0,
-            "1971": 442899968.0,
-            "1972": 442899968.0,
-            "1973": 442899968.0,
-            "1974": 442899968.0,
-            "1975": 442899968.0,
-            "1976": 442899968.0,
-            "1977": 442899968.0,
-            "1978": 442899968.0,
-            "1979": 442899968.0,
-            "1980": 442899968.0,
-            "1981": 442899968.0,
-            "1982": 442899968.0,
-            "1983": 442899968.0,
-            "1984": 442899968.0,
-            "1985": 442899968.0,
-            "1986": 442899968.0,
-            "1987": 442899968.0,
-            "1988": 442899968.0,
-            "1989": 442899968.0,
-            "1990": 442899968.0,
-            "1991": 442899968.0,
-            "1992": 442899968.0,
-            "1993": 442899968.0,
-            "1994": 442899968.0,
-            "1995": 442899968.0,
-            "1996": 442899968.0,
-            "1997": 442899968.0,
-            "1998": 442899968.0,
-            "1999": 442899968.0,
-            "2000": 442899968.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 2000,
-        "step_interval": 1,
-        "values": {
-            "1": 761165312.0,
-            "2": 849603072.0,
-            "3": 849603072.0,
-            "4": 849603072.0,
-            "5": 849603072.0,
-            "6": 849603072.0,
-            "7": 849603072.0,
-            "8": 849603072.0,
-            "9": 849603072.0,
-            "10": 849603072.0,
-            "11": 849603072.0,
-            "12": 849603072.0,
-            "13": 849603072.0,
-            "14": 849603072.0,
-            "15": 849603072.0,
-            "16": 849603072.0,
-            "17": 849603072.0,
-            "18": 849603072.0,
-            "19": 849603072.0,
-            "20": 849603072.0,
-            "21": 849603072.0,
-            "22": 849603072.0,
-            "23": 849603072.0,
-            "24": 849603072.0,
-            "25": 849603072.0,
-            "26": 849603072.0,
-            "27": 849603072.0,
-            "28": 849603072.0,
-            "29": 849603072.0,
-            "30": 849603072.0,
-            "31": 849603072.0,
-            "32": 849603072.0,
-            "33": 849603072.0,
-            "34": 849603072.0,
-            "35": 849603072.0,
-            "36": 849603072.0,
-            "37": 849603072.0,
-            "38": 849603072.0,
-            "39": 849603072.0,
-            "40": 849603072.0,
-            "41": 849603072.0,
-            "42": 849603072.0,
-            "43": 849603072.0,
-            "44": 849603072.0,
-            "45": 849603072.0,
-            "46": 849603072.0,
-            "47": 849603072.0,
-            "48": 849603072.0,
-            "49": 849603072.0,
-            "50": 849603072.0,
-            "51": 849603072.0,
-            "52": 849603072.0,
-            "53": 849603072.0,
-            "54": 849603072.0,
-            "55": 849603072.0,
-            "56": 849603072.0,
-            "57": 849603072.0,
-            "58": 849603072.0,
-            "59": 849603072.0,
-            "60": 849603072.0,
-            "61": 849603072.0,
-            "62": 849603072.0,
-            "63": 849603072.0,
-            "64": 849603072.0,
-            "65": 849603072.0,
-            "66": 849603072.0,
-            "67": 849603072.0,
-            "68": 849603072.0,
-            "69": 849603072.0,
-            "70": 849603072.0,
-            "71": 849603072.0,
-            "72": 849603072.0,
-            "73": 849603072.0,
-            "74": 849603072.0,
-            "75": 849603072.0,
-            "76": 849603072.0,
-            "77": 849603072.0,
-            "78": 849603072.0,
-            "79": 849603072.0,
-            "80": 849603072.0,
-            "81": 849603072.0,
-            "82": 849603072.0,
-            "83": 849603072.0,
-            "84": 849603072.0,
-            "85": 849603072.0,
-            "86": 849603072.0,
-            "87": 849603072.0,
-            "88": 849603072.0,
-            "89": 849603072.0,
-            "90": 849603072.0,
-            "91": 849603072.0,
-            "92": 849603072.0,
-            "93": 849603072.0,
-            "94": 849603072.0,
-            "95": 849603072.0,
-            "96": 849603072.0,
-            "97": 849603072.0,
-            "98": 849603072.0,
-            "99": 849603072.0,
-            "100": 849603072.0,
-            "101": 849603072.0,
-            "102": 849603072.0,
-            "103": 849603072.0,
-            "104": 849603072.0,
-            "105": 849603072.0,
-            "106": 849603072.0,
-            "107": 849603072.0,
-            "108": 849603072.0,
-            "109": 849603072.0,
-            "110": 849603072.0,
-            "111": 849603072.0,
-            "112": 849603072.0,
-            "113": 849603072.0,
-            "114": 849603072.0,
-            "115": 849603072.0,
-            "116": 849603072.0,
-            "117": 849603072.0,
-            "118": 849603072.0,
-            "119": 849603072.0,
-            "120": 849603072.0,
-            "121": 849603072.0,
-            "122": 849603072.0,
-            "123": 849603072.0,
-            "124": 849603072.0,
-            "125": 849603072.0,
-            "126": 849603072.0,
-            "127": 849603072.0,
-            "128": 849603072.0,
-            "129": 849603072.0,
-            "130": 849603072.0,
-            "131": 849603072.0,
-            "132": 849603072.0,
-            "133": 849603072.0,
-            "134": 849603072.0,
-            "135": 849603072.0,
-            "136": 849603072.0,
-            "137": 849603072.0,
-            "138": 849603072.0,
-            "139": 849603072.0,
-            "140": 849603072.0,
-            "141": 849603072.0,
-            "142": 849603072.0,
-            "143": 849603072.0,
-            "144": 849603072.0,
-            "145": 849603072.0,
-            "146": 849603072.0,
-            "147": 849603072.0,
-            "148": 849603072.0,
-            "149": 849603072.0,
-            "150": 849603072.0,
-            "151": 849603072.0,
-            "152": 849603072.0,
-            "153": 849603072.0,
-            "154": 849603072.0,
-            "155": 849603072.0,
-            "156": 849603072.0,
-            "157": 849603072.0,
-            "158": 849603072.0,
-            "159": 849603072.0,
-            "160": 849603072.0,
-            "161": 849603072.0,
-            "162": 849603072.0,
-            "163": 849603072.0,
-            "164": 849603072.0,
-            "165": 849603072.0,
-            "166": 849603072.0,
-            "167": 849603072.0,
-            "168": 849603072.0,
-            "169": 849603072.0,
-            "170": 849603072.0,
-            "171": 849603072.0,
-            "172": 849603072.0,
-            "173": 849603072.0,
-            "174": 849603072.0,
-            "175": 849603072.0,
-            "176": 849603072.0,
-            "177": 849603072.0,
-            "178": 849603072.0,
-            "179": 849603072.0,
-            "180": 849603072.0,
-            "181": 849603072.0,
-            "182": 849603072.0,
-            "183": 849603072.0,
-            "184": 849603072.0,
-            "185": 849603072.0,
-            "186": 849603072.0,
-            "187": 849603072.0,
-            "188": 849603072.0,
-            "189": 849603072.0,
-            "190": 849603072.0,
-            "191": 849603072.0,
-            "192": 849603072.0,
-            "193": 849603072.0,
-            "194": 849603072.0,
-            "195": 849603072.0,
-            "196": 849603072.0,
-            "197": 849603072.0,
-            "198": 849603072.0,
-            "199": 849603072.0,
-            "200": 849603072.0,
-            "201": 849603072.0,
-            "202": 849603072.0,
-            "203": 849603072.0,
-            "204": 849603072.0,
-            "205": 849603072.0,
-            "206": 849603072.0,
-            "207": 849603072.0,
-            "208": 849603072.0,
-            "209": 849603072.0,
-            "210": 849603072.0,
-            "211": 849603072.0,
-            "212": 849603072.0,
-            "213": 849603072.0,
-            "214": 849603072.0,
-            "215": 849603072.0,
-            "216": 849603072.0,
-            "217": 849603072.0,
-            "218": 849603072.0,
-            "219": 849603072.0,
-            "220": 849603072.0,
-            "221": 849603072.0,
-            "222": 849603072.0,
-            "223": 849603072.0,
-            "224": 849603072.0,
-            "225": 849603072.0,
-            "226": 849603072.0,
-            "227": 849603072.0,
-            "228": 849603072.0,
-            "229": 849603072.0,
-            "230": 849603072.0,
-            "231": 849603072.0,
-            "232": 849603072.0,
-            "233": 849603072.0,
-            "234": 849603072.0,
-            "235": 849603072.0,
-            "236": 849603072.0,
-            "237": 849603072.0,
-            "238": 849603072.0,
-            "239": 849603072.0,
-            "240": 849603072.0,
-            "241": 849603072.0,
-            "242": 849603072.0,
-            "243": 849603072.0,
-            "244": 849603072.0,
-            "245": 849603072.0,
-            "246": 849603072.0,
-            "247": 849603072.0,
-            "248": 849603072.0,
-            "249": 849603072.0,
-            "250": 849603072.0,
-            "251": 849603072.0,
-            "252": 849603072.0,
-            "253": 849603072.0,
-            "254": 849603072.0,
-            "255": 849603072.0,
-            "256": 849603072.0,
-            "257": 849603072.0,
-            "258": 849603072.0,
-            "259": 849603072.0,
-            "260": 849603072.0,
-            "261": 849603072.0,
-            "262": 849603072.0,
-            "263": 849603072.0,
-            "264": 849603072.0,
-            "265": 849603072.0,
-            "266": 849603072.0,
-            "267": 849603072.0,
-            "268": 849603072.0,
-            "269": 849603072.0,
-            "270": 849603072.0,
-            "271": 849603072.0,
-            "272": 849603072.0,
-            "273": 849603072.0,
-            "274": 849603072.0,
-            "275": 849603072.0,
-            "276": 849603072.0,
-            "277": 849603072.0,
-            "278": 849603072.0,
-            "279": 849603072.0,
-            "280": 849603072.0,
-            "281": 849603072.0,
-            "282": 849603072.0,
-            "283": 849603072.0,
-            "284": 849603072.0,
-            "285": 849603072.0,
-            "286": 849603072.0,
-            "287": 849603072.0,
-            "288": 849603072.0,
-            "289": 849603072.0,
-            "290": 849603072.0,
-            "291": 849603072.0,
-            "292": 849603072.0,
-            "293": 849603072.0,
-            "294": 849603072.0,
-            "295": 849603072.0,
-            "296": 849603072.0,
-            "297": 849603072.0,
-            "298": 849603072.0,
-            "299": 849603072.0,
-            "300": 849603072.0,
-            "301": 849603072.0,
-            "302": 849603072.0,
-            "303": 849603072.0,
-            "304": 849603072.0,
-            "305": 849603072.0,
-            "306": 849603072.0,
-            "307": 849603072.0,
-            "308": 849603072.0,
-            "309": 849603072.0,
-            "310": 849603072.0,
-            "311": 849603072.0,
-            "312": 849603072.0,
-            "313": 849603072.0,
-            "314": 849603072.0,
-            "315": 849603072.0,
-            "316": 849603072.0,
-            "317": 849603072.0,
-            "318": 849603072.0,
-            "319": 849603072.0,
-            "320": 849603072.0,
-            "321": 849603072.0,
-            "322": 849603072.0,
-            "323": 849603072.0,
-            "324": 849603072.0,
-            "325": 849603072.0,
-            "326": 849603072.0,
-            "327": 849603072.0,
-            "328": 849603072.0,
-            "329": 849603072.0,
-            "330": 849603072.0,
-            "331": 849603072.0,
-            "332": 849603072.0,
-            "333": 849603072.0,
-            "334": 849603072.0,
-            "335": 849603072.0,
-            "336": 849603072.0,
-            "337": 849603072.0,
-            "338": 849603072.0,
-            "339": 849603072.0,
-            "340": 849603072.0,
-            "341": 849603072.0,
-            "342": 849603072.0,
-            "343": 849603072.0,
-            "344": 849603072.0,
-            "345": 849603072.0,
-            "346": 849603072.0,
-            "347": 849603072.0,
-            "348": 849603072.0,
-            "349": 849603072.0,
-            "350": 849603072.0,
-            "351": 849603072.0,
-            "352": 849603072.0,
-            "353": 849603072.0,
-            "354": 849603072.0,
-            "355": 849603072.0,
-            "356": 849603072.0,
-            "357": 849603072.0,
-            "358": 849603072.0,
-            "359": 849603072.0,
-            "360": 849603072.0,
-            "361": 849603072.0,
-            "362": 849603072.0,
-            "363": 849603072.0,
-            "364": 849603072.0,
-            "365": 849603072.0,
-            "366": 849603072.0,
-            "367": 849603072.0,
-            "368": 849603072.0,
-            "369": 849603072.0,
-            "370": 849603072.0,
-            "371": 849603072.0,
-            "372": 849603072.0,
-            "373": 849603072.0,
-            "374": 849603072.0,
-            "375": 849603072.0,
-            "376": 849603072.0,
-            "377": 849603072.0,
-            "378": 849603072.0,
-            "379": 849603072.0,
-            "380": 849603072.0,
-            "381": 849603072.0,
-            "382": 849603072.0,
-            "383": 849603072.0,
-            "384": 849603072.0,
-            "385": 849603072.0,
-            "386": 849603072.0,
-            "387": 849603072.0,
-            "388": 849603072.0,
-            "389": 849603072.0,
-            "390": 849603072.0,
-            "391": 849603072.0,
-            "392": 849603072.0,
-            "393": 849603072.0,
-            "394": 849603072.0,
-            "395": 849603072.0,
-            "396": 849603072.0,
-            "397": 849603072.0,
-            "398": 849603072.0,
-            "399": 849603072.0,
-            "400": 849603072.0,
-            "401": 849603072.0,
-            "402": 849603072.0,
-            "403": 849603072.0,
-            "404": 849603072.0,
-            "405": 849603072.0,
-            "406": 849603072.0,
-            "407": 849603072.0,
-            "408": 849603072.0,
-            "409": 849603072.0,
-            "410": 849603072.0,
-            "411": 849603072.0,
-            "412": 849603072.0,
-            "413": 849603072.0,
-            "414": 849603072.0,
-            "415": 849603072.0,
-            "416": 849603072.0,
-            "417": 849603072.0,
-            "418": 849603072.0,
-            "419": 849603072.0,
-            "420": 849603072.0,
-            "421": 849603072.0,
-            "422": 849603072.0,
-            "423": 849603072.0,
-            "424": 849603072.0,
-            "425": 849603072.0,
-            "426": 849603072.0,
-            "427": 849603072.0,
-            "428": 849603072.0,
-            "429": 849603072.0,
-            "430": 849603072.0,
-            "431": 849603072.0,
-            "432": 849603072.0,
-            "433": 849603072.0,
-            "434": 849603072.0,
-            "435": 849603072.0,
-            "436": 849603072.0,
-            "437": 849603072.0,
-            "438": 849603072.0,
-            "439": 849603072.0,
-            "440": 849603072.0,
-            "441": 849603072.0,
-            "442": 849603072.0,
-            "443": 849603072.0,
-            "444": 849603072.0,
-            "445": 849603072.0,
-            "446": 849603072.0,
-            "447": 849603072.0,
-            "448": 849603072.0,
-            "449": 849603072.0,
-            "450": 849603072.0,
-            "451": 849603072.0,
-            "452": 849603072.0,
-            "453": 849603072.0,
-            "454": 849603072.0,
-            "455": 849603072.0,
-            "456": 849603072.0,
-            "457": 849603072.0,
-            "458": 849603072.0,
-            "459": 849603072.0,
-            "460": 849603072.0,
-            "461": 849603072.0,
-            "462": 849603072.0,
-            "463": 849603072.0,
-            "464": 849603072.0,
-            "465": 849603072.0,
-            "466": 849603072.0,
-            "467": 849603072.0,
-            "468": 849603072.0,
-            "469": 849603072.0,
-            "470": 849603072.0,
-            "471": 849603072.0,
-            "472": 849603072.0,
-            "473": 849603072.0,
-            "474": 849603072.0,
-            "475": 849603072.0,
-            "476": 849603072.0,
-            "477": 849603072.0,
-            "478": 849603072.0,
-            "479": 849603072.0,
-            "480": 849603072.0,
-            "481": 849603072.0,
-            "482": 849603072.0,
-            "483": 849603072.0,
-            "484": 849603072.0,
-            "485": 849603072.0,
-            "486": 849603072.0,
-            "487": 849603072.0,
-            "488": 849603072.0,
-            "489": 849603072.0,
-            "490": 849603072.0,
-            "491": 849603072.0,
-            "492": 849603072.0,
-            "493": 849603072.0,
-            "494": 849603072.0,
-            "495": 849603072.0,
-            "496": 849603072.0,
-            "497": 849603072.0,
-            "498": 849603072.0,
-            "499": 849603072.0,
-            "500": 849603072.0,
-            "501": 849603072.0,
-            "502": 849603072.0,
-            "503": 849603072.0,
-            "504": 849603072.0,
-            "505": 849603072.0,
-            "506": 849603072.0,
-            "507": 849603072.0,
-            "508": 849603072.0,
-            "509": 849603072.0,
-            "510": 849603072.0,
-            "511": 849603072.0,
-            "512": 849603072.0,
-            "513": 849603072.0,
-            "514": 849603072.0,
-            "515": 849603072.0,
-            "516": 849603072.0,
-            "517": 849603072.0,
-            "518": 849603072.0,
-            "519": 849603072.0,
-            "520": 849603072.0,
-            "521": 849603072.0,
-            "522": 849603072.0,
-            "523": 849603072.0,
-            "524": 849603072.0,
-            "525": 849603072.0,
-            "526": 849603072.0,
-            "527": 849603072.0,
-            "528": 849603072.0,
-            "529": 849603072.0,
-            "530": 849603072.0,
-            "531": 849603072.0,
-            "532": 849603072.0,
-            "533": 849603072.0,
-            "534": 849603072.0,
-            "535": 849603072.0,
-            "536": 849603072.0,
-            "537": 849603072.0,
-            "538": 849603072.0,
-            "539": 849603072.0,
-            "540": 849603072.0,
-            "541": 849603072.0,
-            "542": 849603072.0,
-            "543": 849603072.0,
-            "544": 849603072.0,
-            "545": 849603072.0,
-            "546": 849603072.0,
-            "547": 849603072.0,
-            "548": 849603072.0,
-            "549": 849603072.0,
-            "550": 849603072.0,
-            "551": 849603072.0,
-            "552": 849603072.0,
-            "553": 849603072.0,
-            "554": 849603072.0,
-            "555": 849603072.0,
-            "556": 849603072.0,
-            "557": 849603072.0,
-            "558": 849603072.0,
-            "559": 849603072.0,
-            "560": 849603072.0,
-            "561": 849603072.0,
-            "562": 849603072.0,
-            "563": 849603072.0,
-            "564": 849603072.0,
-            "565": 849603072.0,
-            "566": 849603072.0,
-            "567": 849603072.0,
-            "568": 849603072.0,
-            "569": 849603072.0,
-            "570": 849603072.0,
-            "571": 849603072.0,
-            "572": 849603072.0,
-            "573": 849603072.0,
-            "574": 849603072.0,
-            "575": 849603072.0,
-            "576": 849603072.0,
-            "577": 849603072.0,
-            "578": 849603072.0,
-            "579": 849603072.0,
-            "580": 849603072.0,
-            "581": 849603072.0,
-            "582": 849603072.0,
-            "583": 849603072.0,
-            "584": 849603072.0,
-            "585": 849603072.0,
-            "586": 849603072.0,
-            "587": 849603072.0,
-            "588": 849603072.0,
-            "589": 849603072.0,
-            "590": 849603072.0,
-            "591": 849603072.0,
-            "592": 849603072.0,
-            "593": 849603072.0,
-            "594": 849603072.0,
-            "595": 849603072.0,
-            "596": 849603072.0,
-            "597": 849603072.0,
-            "598": 849603072.0,
-            "599": 849603072.0,
-            "600": 849603072.0,
-            "601": 849603072.0,
-            "602": 849603072.0,
-            "603": 849603072.0,
-            "604": 849603072.0,
-            "605": 849603072.0,
-            "606": 849603072.0,
-            "607": 849603072.0,
-            "608": 849603072.0,
-            "609": 849603072.0,
-            "610": 849603072.0,
-            "611": 849603072.0,
-            "612": 849603072.0,
-            "613": 849603072.0,
-            "614": 849603072.0,
-            "615": 849603072.0,
-            "616": 849603072.0,
-            "617": 849603072.0,
-            "618": 849603072.0,
-            "619": 849603072.0,
-            "620": 849603072.0,
-            "621": 849603072.0,
-            "622": 849603072.0,
-            "623": 849603072.0,
-            "624": 849603072.0,
-            "625": 849603072.0,
-            "626": 849603072.0,
-            "627": 849603072.0,
-            "628": 849603072.0,
-            "629": 849603072.0,
-            "630": 849603072.0,
-            "631": 849603072.0,
-            "632": 849603072.0,
-            "633": 849603072.0,
-            "634": 849603072.0,
-            "635": 849603072.0,
-            "636": 849603072.0,
-            "637": 849603072.0,
-            "638": 849603072.0,
-            "639": 849603072.0,
-            "640": 849603072.0,
-            "641": 849603072.0,
-            "642": 849603072.0,
-            "643": 849603072.0,
-            "644": 849603072.0,
-            "645": 849603072.0,
-            "646": 849603072.0,
-            "647": 849603072.0,
-            "648": 849603072.0,
-            "649": 849603072.0,
-            "650": 849603072.0,
-            "651": 849603072.0,
-            "652": 849603072.0,
-            "653": 849603072.0,
-            "654": 849603072.0,
-            "655": 849603072.0,
-            "656": 849603072.0,
-            "657": 849603072.0,
-            "658": 849603072.0,
-            "659": 849603072.0,
-            "660": 849603072.0,
-            "661": 849603072.0,
-            "662": 849603072.0,
-            "663": 849603072.0,
-            "664": 849603072.0,
-            "665": 849603072.0,
-            "666": 849603072.0,
-            "667": 849603072.0,
-            "668": 849603072.0,
-            "669": 849603072.0,
-            "670": 849603072.0,
-            "671": 849603072.0,
-            "672": 849603072.0,
-            "673": 849603072.0,
-            "674": 849603072.0,
-            "675": 849603072.0,
-            "676": 849603072.0,
-            "677": 849603072.0,
-            "678": 849603072.0,
-            "679": 849603072.0,
-            "680": 849603072.0,
-            "681": 849603072.0,
-            "682": 849603072.0,
-            "683": 849603072.0,
-            "684": 849603072.0,
-            "685": 849603072.0,
-            "686": 849603072.0,
-            "687": 849603072.0,
-            "688": 849603072.0,
-            "689": 849603072.0,
-            "690": 849603072.0,
-            "691": 849603072.0,
-            "692": 849603072.0,
-            "693": 849603072.0,
-            "694": 849603072.0,
-            "695": 849603072.0,
-            "696": 849603072.0,
-            "697": 849603072.0,
-            "698": 849603072.0,
-            "699": 849603072.0,
-            "700": 849603072.0,
-            "701": 849603072.0,
-            "702": 849603072.0,
-            "703": 849603072.0,
-            "704": 849603072.0,
-            "705": 849603072.0,
-            "706": 849603072.0,
-            "707": 849603072.0,
-            "708": 849603072.0,
-            "709": 849603072.0,
-            "710": 849603072.0,
-            "711": 849603072.0,
-            "712": 849603072.0,
-            "713": 849603072.0,
-            "714": 849603072.0,
-            "715": 849603072.0,
-            "716": 849603072.0,
-            "717": 849603072.0,
-            "718": 849603072.0,
-            "719": 849603072.0,
-            "720": 849603072.0,
-            "721": 849603072.0,
-            "722": 849603072.0,
-            "723": 849603072.0,
-            "724": 849603072.0,
-            "725": 849603072.0,
-            "726": 849603072.0,
-            "727": 849603072.0,
-            "728": 849603072.0,
-            "729": 849603072.0,
-            "730": 849603072.0,
-            "731": 849603072.0,
-            "732": 849603072.0,
-            "733": 849603072.0,
-            "734": 849603072.0,
-            "735": 849603072.0,
-            "736": 849603072.0,
-            "737": 849603072.0,
-            "738": 849603072.0,
-            "739": 849603072.0,
-            "740": 849603072.0,
-            "741": 849603072.0,
-            "742": 849603072.0,
-            "743": 849603072.0,
-            "744": 849603072.0,
-            "745": 849603072.0,
-            "746": 849603072.0,
-            "747": 849603072.0,
-            "748": 849603072.0,
-            "749": 849603072.0,
-            "750": 849603072.0,
-            "751": 849603072.0,
-            "752": 849603072.0,
-            "753": 849603072.0,
-            "754": 849603072.0,
-            "755": 849603072.0,
-            "756": 849603072.0,
-            "757": 849603072.0,
-            "758": 849603072.0,
-            "759": 849603072.0,
-            "760": 849603072.0,
-            "761": 849603072.0,
-            "762": 849603072.0,
-            "763": 849603072.0,
-            "764": 849603072.0,
-            "765": 849603072.0,
-            "766": 849603072.0,
-            "767": 849603072.0,
-            "768": 849603072.0,
-            "769": 849603072.0,
-            "770": 849603072.0,
-            "771": 849603072.0,
-            "772": 849603072.0,
-            "773": 849603072.0,
-            "774": 849603072.0,
-            "775": 849603072.0,
-            "776": 849603072.0,
-            "777": 849603072.0,
-            "778": 849603072.0,
-            "779": 849603072.0,
-            "780": 849603072.0,
-            "781": 849603072.0,
-            "782": 849603072.0,
-            "783": 849603072.0,
-            "784": 849603072.0,
-            "785": 849603072.0,
-            "786": 849603072.0,
-            "787": 849603072.0,
-            "788": 849603072.0,
-            "789": 849603072.0,
-            "790": 849603072.0,
-            "791": 849603072.0,
-            "792": 849603072.0,
-            "793": 849603072.0,
-            "794": 849603072.0,
-            "795": 849603072.0,
-            "796": 849603072.0,
-            "797": 849603072.0,
-            "798": 849603072.0,
-            "799": 849603072.0,
-            "800": 849603072.0,
-            "801": 849603072.0,
-            "802": 849603072.0,
-            "803": 849603072.0,
-            "804": 849603072.0,
-            "805": 849603072.0,
-            "806": 849603072.0,
-            "807": 849603072.0,
-            "808": 849603072.0,
-            "809": 849603072.0,
-            "810": 849603072.0,
-            "811": 849603072.0,
-            "812": 849603072.0,
-            "813": 849603072.0,
-            "814": 849603072.0,
-            "815": 849603072.0,
-            "816": 849603072.0,
-            "817": 849603072.0,
-            "818": 849603072.0,
-            "819": 849603072.0,
-            "820": 849603072.0,
-            "821": 849603072.0,
-            "822": 849603072.0,
-            "823": 849603072.0,
-            "824": 849603072.0,
-            "825": 849603072.0,
-            "826": 849603072.0,
-            "827": 849603072.0,
-            "828": 849603072.0,
-            "829": 849603072.0,
-            "830": 849603072.0,
-            "831": 849603072.0,
-            "832": 849603072.0,
-            "833": 849603072.0,
-            "834": 849603072.0,
-            "835": 849603072.0,
-            "836": 849603072.0,
-            "837": 849603072.0,
-            "838": 849603072.0,
-            "839": 849603072.0,
-            "840": 849603072.0,
-            "841": 849603072.0,
-            "842": 849603072.0,
-            "843": 849603072.0,
-            "844": 849603072.0,
-            "845": 849603072.0,
-            "846": 849603072.0,
-            "847": 849603072.0,
-            "848": 849603072.0,
-            "849": 849603072.0,
-            "850": 849603072.0,
-            "851": 849603072.0,
-            "852": 849603072.0,
-            "853": 849603072.0,
-            "854": 849603072.0,
-            "855": 849603072.0,
-            "856": 849603072.0,
-            "857": 849603072.0,
-            "858": 849603072.0,
-            "859": 849603072.0,
-            "860": 849603072.0,
-            "861": 849603072.0,
-            "862": 849603072.0,
-            "863": 849603072.0,
-            "864": 849603072.0,
-            "865": 849603072.0,
-            "866": 849603072.0,
-            "867": 849603072.0,
-            "868": 849603072.0,
-            "869": 849603072.0,
-            "870": 849603072.0,
-            "871": 849603072.0,
-            "872": 849603072.0,
-            "873": 849603072.0,
-            "874": 849603072.0,
-            "875": 849603072.0,
-            "876": 849603072.0,
-            "877": 849603072.0,
-            "878": 849603072.0,
-            "879": 849603072.0,
-            "880": 849603072.0,
-            "881": 849603072.0,
-            "882": 849603072.0,
-            "883": 849603072.0,
-            "884": 849603072.0,
-            "885": 849603072.0,
-            "886": 849603072.0,
-            "887": 849603072.0,
-            "888": 849603072.0,
-            "889": 849603072.0,
-            "890": 849603072.0,
-            "891": 849603072.0,
-            "892": 849603072.0,
-            "893": 849603072.0,
-            "894": 849603072.0,
-            "895": 849603072.0,
-            "896": 849603072.0,
-            "897": 849603072.0,
-            "898": 849603072.0,
-            "899": 849603072.0,
-            "900": 849603072.0,
-            "901": 849603072.0,
-            "902": 849603072.0,
-            "903": 849603072.0,
-            "904": 849603072.0,
-            "905": 849603072.0,
-            "906": 849603072.0,
-            "907": 849603072.0,
-            "908": 849603072.0,
-            "909": 849603072.0,
-            "910": 849603072.0,
-            "911": 849603072.0,
-            "912": 849603072.0,
-            "913": 849603072.0,
-            "914": 849603072.0,
-            "915": 849603072.0,
-            "916": 849603072.0,
-            "917": 849603072.0,
-            "918": 849603072.0,
-            "919": 849603072.0,
-            "920": 849603072.0,
-            "921": 849603072.0,
-            "922": 849603072.0,
-            "923": 849603072.0,
-            "924": 849603072.0,
-            "925": 849603072.0,
-            "926": 849603072.0,
-            "927": 849603072.0,
-            "928": 849603072.0,
-            "929": 849603072.0,
-            "930": 849603072.0,
-            "931": 849603072.0,
-            "932": 849603072.0,
-            "933": 849603072.0,
-            "934": 849603072.0,
-            "935": 849603072.0,
-            "936": 849603072.0,
-            "937": 849603072.0,
-            "938": 849603072.0,
-            "939": 849603072.0,
-            "940": 849603072.0,
-            "941": 849603072.0,
-            "942": 849603072.0,
-            "943": 849603072.0,
-            "944": 849603072.0,
-            "945": 849603072.0,
-            "946": 849603072.0,
-            "947": 849603072.0,
-            "948": 849603072.0,
-            "949": 849603072.0,
-            "950": 849603072.0,
-            "951": 849603072.0,
-            "952": 849603072.0,
-            "953": 849603072.0,
-            "954": 849603072.0,
-            "955": 849603072.0,
-            "956": 849603072.0,
-            "957": 849603072.0,
-            "958": 849603072.0,
-            "959": 849603072.0,
-            "960": 849603072.0,
-            "961": 849603072.0,
-            "962": 849603072.0,
-            "963": 849603072.0,
-            "964": 849603072.0,
-            "965": 849603072.0,
-            "966": 849603072.0,
-            "967": 849603072.0,
-            "968": 849603072.0,
-            "969": 849603072.0,
-            "970": 849603072.0,
-            "971": 849603072.0,
-            "972": 849603072.0,
-            "973": 849603072.0,
-            "974": 849603072.0,
-            "975": 849603072.0,
-            "976": 849603072.0,
-            "977": 849603072.0,
-            "978": 849603072.0,
-            "979": 849603072.0,
-            "980": 849603072.0,
-            "981": 849603072.0,
-            "982": 849603072.0,
-            "983": 849603072.0,
-            "984": 849603072.0,
-            "985": 849603072.0,
-            "986": 849603072.0,
-            "987": 849603072.0,
-            "988": 849603072.0,
-            "989": 849603072.0,
-            "990": 849603072.0,
-            "991": 849603072.0,
-            "992": 849603072.0,
-            "993": 849603072.0,
-            "994": 849603072.0,
-            "995": 849603072.0,
-            "996": 849603072.0,
-            "997": 849603072.0,
-            "998": 849603072.0,
-            "999": 849603072.0,
-            "1000": 849603072.0,
-            "1001": 849603072.0,
-            "1002": 849603072.0,
-            "1003": 849603072.0,
-            "1004": 849603072.0,
-            "1005": 849603072.0,
-            "1006": 849603072.0,
-            "1007": 849603072.0,
-            "1008": 849603072.0,
-            "1009": 849603072.0,
-            "1010": 849603072.0,
-            "1011": 849603072.0,
-            "1012": 849603072.0,
-            "1013": 849603072.0,
-            "1014": 849603072.0,
-            "1015": 849603072.0,
-            "1016": 849603072.0,
-            "1017": 849603072.0,
-            "1018": 849603072.0,
-            "1019": 849603072.0,
-            "1020": 849603072.0,
-            "1021": 849603072.0,
-            "1022": 849603072.0,
-            "1023": 849603072.0,
-            "1024": 849603072.0,
-            "1025": 849603072.0,
-            "1026": 849603072.0,
-            "1027": 849603072.0,
-            "1028": 849603072.0,
-            "1029": 849603072.0,
-            "1030": 849603072.0,
-            "1031": 849603072.0,
-            "1032": 849603072.0,
-            "1033": 849603072.0,
-            "1034": 849603072.0,
-            "1035": 849603072.0,
-            "1036": 849603072.0,
-            "1037": 849603072.0,
-            "1038": 849603072.0,
-            "1039": 849603072.0,
-            "1040": 849603072.0,
-            "1041": 849603072.0,
-            "1042": 849603072.0,
-            "1043": 849603072.0,
-            "1044": 849603072.0,
-            "1045": 849603072.0,
-            "1046": 849603072.0,
-            "1047": 849603072.0,
-            "1048": 849603072.0,
-            "1049": 849603072.0,
-            "1050": 849603072.0,
-            "1051": 849603072.0,
-            "1052": 849603072.0,
-            "1053": 849603072.0,
-            "1054": 849603072.0,
-            "1055": 849603072.0,
-            "1056": 849603072.0,
-            "1057": 849603072.0,
-            "1058": 849603072.0,
-            "1059": 849603072.0,
-            "1060": 849603072.0,
-            "1061": 849603072.0,
-            "1062": 849603072.0,
-            "1063": 849603072.0,
-            "1064": 849603072.0,
-            "1065": 849603072.0,
-            "1066": 849603072.0,
-            "1067": 849603072.0,
-            "1068": 849603072.0,
-            "1069": 849603072.0,
-            "1070": 849603072.0,
-            "1071": 849603072.0,
-            "1072": 849603072.0,
-            "1073": 849603072.0,
-            "1074": 849603072.0,
-            "1075": 849603072.0,
-            "1076": 849603072.0,
-            "1077": 849603072.0,
-            "1078": 849603072.0,
-            "1079": 849603072.0,
-            "1080": 849603072.0,
-            "1081": 849603072.0,
-            "1082": 849603072.0,
-            "1083": 849603072.0,
-            "1084": 849603072.0,
-            "1085": 849603072.0,
-            "1086": 849603072.0,
-            "1087": 849603072.0,
-            "1088": 849603072.0,
-            "1089": 849603072.0,
-            "1090": 849603072.0,
-            "1091": 849603072.0,
-            "1092": 849603072.0,
-            "1093": 849603072.0,
-            "1094": 849603072.0,
-            "1095": 849603072.0,
-            "1096": 849603072.0,
-            "1097": 849603072.0,
-            "1098": 849603072.0,
-            "1099": 849603072.0,
-            "1100": 849603072.0,
-            "1101": 849603072.0,
-            "1102": 849603072.0,
-            "1103": 849603072.0,
-            "1104": 849603072.0,
-            "1105": 849603072.0,
-            "1106": 849603072.0,
-            "1107": 849603072.0,
-            "1108": 849603072.0,
-            "1109": 849603072.0,
-            "1110": 849603072.0,
-            "1111": 849603072.0,
-            "1112": 849603072.0,
-            "1113": 849603072.0,
-            "1114": 849603072.0,
-            "1115": 849603072.0,
-            "1116": 849603072.0,
-            "1117": 849603072.0,
-            "1118": 849603072.0,
-            "1119": 849603072.0,
-            "1120": 849603072.0,
-            "1121": 849603072.0,
-            "1122": 849603072.0,
-            "1123": 849603072.0,
-            "1124": 849603072.0,
-            "1125": 849603072.0,
-            "1126": 849603072.0,
-            "1127": 849603072.0,
-            "1128": 849603072.0,
-            "1129": 849603072.0,
-            "1130": 849603072.0,
-            "1131": 849603072.0,
-            "1132": 849603072.0,
-            "1133": 849603072.0,
-            "1134": 849603072.0,
-            "1135": 849603072.0,
-            "1136": 849603072.0,
-            "1137": 849603072.0,
-            "1138": 849603072.0,
-            "1139": 849603072.0,
-            "1140": 849603072.0,
-            "1141": 849603072.0,
-            "1142": 849603072.0,
-            "1143": 849603072.0,
-            "1144": 849603072.0,
-            "1145": 849603072.0,
-            "1146": 849603072.0,
-            "1147": 849603072.0,
-            "1148": 849603072.0,
-            "1149": 849603072.0,
-            "1150": 849603072.0,
-            "1151": 849603072.0,
-            "1152": 849603072.0,
-            "1153": 849603072.0,
-            "1154": 849603072.0,
-            "1155": 849603072.0,
-            "1156": 849603072.0,
-            "1157": 849603072.0,
-            "1158": 849603072.0,
-            "1159": 849603072.0,
-            "1160": 849603072.0,
-            "1161": 849603072.0,
-            "1162": 849603072.0,
-            "1163": 849603072.0,
-            "1164": 849603072.0,
-            "1165": 849603072.0,
-            "1166": 849603072.0,
-            "1167": 849603072.0,
-            "1168": 849603072.0,
-            "1169": 849603072.0,
-            "1170": 849603072.0,
-            "1171": 849603072.0,
-            "1172": 849603072.0,
-            "1173": 849603072.0,
-            "1174": 849603072.0,
-            "1175": 849603072.0,
-            "1176": 849603072.0,
-            "1177": 849603072.0,
-            "1178": 849603072.0,
-            "1179": 849603072.0,
-            "1180": 849603072.0,
-            "1181": 849603072.0,
-            "1182": 849603072.0,
-            "1183": 849603072.0,
-            "1184": 849603072.0,
-            "1185": 849603072.0,
-            "1186": 849603072.0,
-            "1187": 849603072.0,
-            "1188": 849603072.0,
-            "1189": 849603072.0,
-            "1190": 849603072.0,
-            "1191": 849603072.0,
-            "1192": 849603072.0,
-            "1193": 849603072.0,
-            "1194": 849603072.0,
-            "1195": 849603072.0,
-            "1196": 849603072.0,
-            "1197": 849603072.0,
-            "1198": 849603072.0,
-            "1199": 849603072.0,
-            "1200": 849603072.0,
-            "1201": 849603072.0,
-            "1202": 849603072.0,
-            "1203": 849603072.0,
-            "1204": 849603072.0,
-            "1205": 849603072.0,
-            "1206": 849603072.0,
-            "1207": 849603072.0,
-            "1208": 849603072.0,
-            "1209": 849603072.0,
-            "1210": 849603072.0,
-            "1211": 849603072.0,
-            "1212": 849603072.0,
-            "1213": 849603072.0,
-            "1214": 849603072.0,
-            "1215": 849603072.0,
-            "1216": 849603072.0,
-            "1217": 849603072.0,
-            "1218": 849603072.0,
-            "1219": 849603072.0,
-            "1220": 849603072.0,
-            "1221": 849603072.0,
-            "1222": 849603072.0,
-            "1223": 849603072.0,
-            "1224": 849603072.0,
-            "1225": 849603072.0,
-            "1226": 849603072.0,
-            "1227": 849603072.0,
-            "1228": 849603072.0,
-            "1229": 849603072.0,
-            "1230": 849603072.0,
-            "1231": 849603072.0,
-            "1232": 849603072.0,
-            "1233": 849603072.0,
-            "1234": 849603072.0,
-            "1235": 849603072.0,
-            "1236": 849603072.0,
-            "1237": 849603072.0,
-            "1238": 849603072.0,
-            "1239": 849603072.0,
-            "1240": 849603072.0,
-            "1241": 849603072.0,
-            "1242": 849603072.0,
-            "1243": 849603072.0,
-            "1244": 849603072.0,
-            "1245": 849603072.0,
-            "1246": 849603072.0,
-            "1247": 849603072.0,
-            "1248": 849603072.0,
-            "1249": 849603072.0,
-            "1250": 849603072.0,
-            "1251": 849603072.0,
-            "1252": 849603072.0,
-            "1253": 849603072.0,
-            "1254": 849603072.0,
-            "1255": 849603072.0,
-            "1256": 849603072.0,
-            "1257": 849603072.0,
-            "1258": 849603072.0,
-            "1259": 849603072.0,
-            "1260": 849603072.0,
-            "1261": 849603072.0,
-            "1262": 849603072.0,
-            "1263": 849603072.0,
-            "1264": 849603072.0,
-            "1265": 849603072.0,
-            "1266": 849603072.0,
-            "1267": 849603072.0,
-            "1268": 849603072.0,
-            "1269": 849603072.0,
-            "1270": 849603072.0,
-            "1271": 849603072.0,
-            "1272": 849603072.0,
-            "1273": 849603072.0,
-            "1274": 849603072.0,
-            "1275": 849603072.0,
-            "1276": 849603072.0,
-            "1277": 849603072.0,
-            "1278": 849603072.0,
-            "1279": 849603072.0,
-            "1280": 849603072.0,
-            "1281": 849603072.0,
-            "1282": 849603072.0,
-            "1283": 849603072.0,
-            "1284": 849603072.0,
-            "1285": 849603072.0,
-            "1286": 849603072.0,
-            "1287": 849603072.0,
-            "1288": 849603072.0,
-            "1289": 849603072.0,
-            "1290": 849603072.0,
-            "1291": 849603072.0,
-            "1292": 849603072.0,
-            "1293": 849603072.0,
-            "1294": 849603072.0,
-            "1295": 849603072.0,
-            "1296": 849603072.0,
-            "1297": 849603072.0,
-            "1298": 849603072.0,
-            "1299": 849603072.0,
-            "1300": 849603072.0,
-            "1301": 849603072.0,
-            "1302": 849603072.0,
-            "1303": 849603072.0,
-            "1304": 849603072.0,
-            "1305": 849603072.0,
-            "1306": 849603072.0,
-            "1307": 849603072.0,
-            "1308": 849603072.0,
-            "1309": 849603072.0,
-            "1310": 849603072.0,
-            "1311": 849603072.0,
-            "1312": 849603072.0,
-            "1313": 849603072.0,
-            "1314": 849603072.0,
-            "1315": 849603072.0,
-            "1316": 849603072.0,
-            "1317": 849603072.0,
-            "1318": 849603072.0,
-            "1319": 849603072.0,
-            "1320": 849603072.0,
-            "1321": 849603072.0,
-            "1322": 849603072.0,
-            "1323": 849603072.0,
-            "1324": 849603072.0,
-            "1325": 849603072.0,
-            "1326": 849603072.0,
-            "1327": 849603072.0,
-            "1328": 849603072.0,
-            "1329": 849603072.0,
-            "1330": 849603072.0,
-            "1331": 849603072.0,
-            "1332": 849603072.0,
-            "1333": 849603072.0,
-            "1334": 849603072.0,
-            "1335": 849603072.0,
-            "1336": 849603072.0,
-            "1337": 849603072.0,
-            "1338": 849603072.0,
-            "1339": 849603072.0,
-            "1340": 849603072.0,
-            "1341": 849603072.0,
-            "1342": 849603072.0,
-            "1343": 849603072.0,
-            "1344": 849603072.0,
-            "1345": 849603072.0,
-            "1346": 849603072.0,
-            "1347": 849603072.0,
-            "1348": 849603072.0,
-            "1349": 849603072.0,
-            "1350": 849603072.0,
-            "1351": 849603072.0,
-            "1352": 849603072.0,
-            "1353": 849603072.0,
-            "1354": 849603072.0,
-            "1355": 849603072.0,
-            "1356": 849603072.0,
-            "1357": 849603072.0,
-            "1358": 849603072.0,
-            "1359": 849603072.0,
-            "1360": 849603072.0,
-            "1361": 849603072.0,
-            "1362": 849603072.0,
-            "1363": 849603072.0,
-            "1364": 849603072.0,
-            "1365": 849603072.0,
-            "1366": 849603072.0,
-            "1367": 849603072.0,
-            "1368": 849603072.0,
-            "1369": 849603072.0,
-            "1370": 849603072.0,
-            "1371": 849603072.0,
-            "1372": 849603072.0,
-            "1373": 849603072.0,
-            "1374": 849603072.0,
-            "1375": 849603072.0,
-            "1376": 849603072.0,
-            "1377": 849603072.0,
-            "1378": 849603072.0,
-            "1379": 849603072.0,
-            "1380": 849603072.0,
-            "1381": 849603072.0,
-            "1382": 849603072.0,
-            "1383": 849603072.0,
-            "1384": 849603072.0,
-            "1385": 849603072.0,
-            "1386": 849603072.0,
-            "1387": 849603072.0,
-            "1388": 849603072.0,
-            "1389": 849603072.0,
-            "1390": 849603072.0,
-            "1391": 849603072.0,
-            "1392": 849603072.0,
-            "1393": 849603072.0,
-            "1394": 849603072.0,
-            "1395": 849603072.0,
-            "1396": 849603072.0,
-            "1397": 849603072.0,
-            "1398": 849603072.0,
-            "1399": 849603072.0,
-            "1400": 849603072.0,
-            "1401": 849603072.0,
-            "1402": 849603072.0,
-            "1403": 849603072.0,
-            "1404": 849603072.0,
-            "1405": 849603072.0,
-            "1406": 849603072.0,
-            "1407": 849603072.0,
-            "1408": 849603072.0,
-            "1409": 849603072.0,
-            "1410": 849603072.0,
-            "1411": 849603072.0,
-            "1412": 849603072.0,
-            "1413": 849603072.0,
-            "1414": 849603072.0,
-            "1415": 849603072.0,
-            "1416": 849603072.0,
-            "1417": 849603072.0,
-            "1418": 849603072.0,
-            "1419": 849603072.0,
-            "1420": 849603072.0,
-            "1421": 849603072.0,
-            "1422": 849603072.0,
-            "1423": 849603072.0,
-            "1424": 849603072.0,
-            "1425": 849603072.0,
-            "1426": 849603072.0,
-            "1427": 849603072.0,
-            "1428": 849603072.0,
-            "1429": 849603072.0,
-            "1430": 849603072.0,
-            "1431": 849603072.0,
-            "1432": 849603072.0,
-            "1433": 849603072.0,
-            "1434": 849603072.0,
-            "1435": 849603072.0,
-            "1436": 849603072.0,
-            "1437": 849603072.0,
-            "1438": 849603072.0,
-            "1439": 849603072.0,
-            "1440": 849603072.0,
-            "1441": 849603072.0,
-            "1442": 849603072.0,
-            "1443": 849603072.0,
-            "1444": 849603072.0,
-            "1445": 849603072.0,
-            "1446": 849603072.0,
-            "1447": 849603072.0,
-            "1448": 849603072.0,
-            "1449": 849603072.0,
-            "1450": 849603072.0,
-            "1451": 849603072.0,
-            "1452": 849603072.0,
-            "1453": 849603072.0,
-            "1454": 849603072.0,
-            "1455": 849603072.0,
-            "1456": 849603072.0,
-            "1457": 849603072.0,
-            "1458": 849603072.0,
-            "1459": 849603072.0,
-            "1460": 849603072.0,
-            "1461": 849603072.0,
-            "1462": 849603072.0,
-            "1463": 849603072.0,
-            "1464": 849603072.0,
-            "1465": 849603072.0,
-            "1466": 849603072.0,
-            "1467": 849603072.0,
-            "1468": 849603072.0,
-            "1469": 849603072.0,
-            "1470": 849603072.0,
-            "1471": 849603072.0,
-            "1472": 849603072.0,
-            "1473": 849603072.0,
-            "1474": 849603072.0,
-            "1475": 849603072.0,
-            "1476": 849603072.0,
-            "1477": 849603072.0,
-            "1478": 849603072.0,
-            "1479": 849603072.0,
-            "1480": 849603072.0,
-            "1481": 849603072.0,
-            "1482": 849603072.0,
-            "1483": 849603072.0,
-            "1484": 849603072.0,
-            "1485": 849603072.0,
-            "1486": 849603072.0,
-            "1487": 849603072.0,
-            "1488": 849603072.0,
-            "1489": 849603072.0,
-            "1490": 849603072.0,
-            "1491": 849603072.0,
-            "1492": 849603072.0,
-            "1493": 849603072.0,
-            "1494": 849603072.0,
-            "1495": 849603072.0,
-            "1496": 849603072.0,
-            "1497": 849603072.0,
-            "1498": 849603072.0,
-            "1499": 849603072.0,
-            "1500": 849603072.0,
-            "1501": 849603072.0,
-            "1502": 849603072.0,
-            "1503": 849603072.0,
-            "1504": 849603072.0,
-            "1505": 849603072.0,
-            "1506": 849603072.0,
-            "1507": 849603072.0,
-            "1508": 849603072.0,
-            "1509": 849603072.0,
-            "1510": 849603072.0,
-            "1511": 849603072.0,
-            "1512": 849603072.0,
-            "1513": 849603072.0,
-            "1514": 849603072.0,
-            "1515": 849603072.0,
-            "1516": 849603072.0,
-            "1517": 849603072.0,
-            "1518": 849603072.0,
-            "1519": 849603072.0,
-            "1520": 849603072.0,
-            "1521": 849603072.0,
-            "1522": 849603072.0,
-            "1523": 849603072.0,
-            "1524": 849603072.0,
-            "1525": 849603072.0,
-            "1526": 849603072.0,
-            "1527": 849603072.0,
-            "1528": 849603072.0,
-            "1529": 849603072.0,
-            "1530": 849603072.0,
-            "1531": 849603072.0,
-            "1532": 849603072.0,
-            "1533": 849603072.0,
-            "1534": 849603072.0,
-            "1535": 849603072.0,
-            "1536": 849603072.0,
-            "1537": 849603072.0,
-            "1538": 849603072.0,
-            "1539": 849603072.0,
-            "1540": 849603072.0,
-            "1541": 849603072.0,
-            "1542": 849603072.0,
-            "1543": 849603072.0,
-            "1544": 849603072.0,
-            "1545": 849603072.0,
-            "1546": 849603072.0,
-            "1547": 849603072.0,
-            "1548": 849603072.0,
-            "1549": 849603072.0,
-            "1550": 849603072.0,
-            "1551": 849603072.0,
-            "1552": 849603072.0,
-            "1553": 849603072.0,
-            "1554": 849603072.0,
-            "1555": 849603072.0,
-            "1556": 849603072.0,
-            "1557": 849603072.0,
-            "1558": 849603072.0,
-            "1559": 849603072.0,
-            "1560": 849603072.0,
-            "1561": 849603072.0,
-            "1562": 849603072.0,
-            "1563": 849603072.0,
-            "1564": 849603072.0,
-            "1565": 849603072.0,
-            "1566": 849603072.0,
-            "1567": 849603072.0,
-            "1568": 849603072.0,
-            "1569": 849603072.0,
-            "1570": 849603072.0,
-            "1571": 849603072.0,
-            "1572": 849603072.0,
-            "1573": 849603072.0,
-            "1574": 849603072.0,
-            "1575": 849603072.0,
-            "1576": 849603072.0,
-            "1577": 849603072.0,
-            "1578": 849603072.0,
-            "1579": 849603072.0,
-            "1580": 849603072.0,
-            "1581": 849603072.0,
-            "1582": 849603072.0,
-            "1583": 849603072.0,
-            "1584": 849603072.0,
-            "1585": 849603072.0,
-            "1586": 849603072.0,
-            "1587": 849603072.0,
-            "1588": 849603072.0,
-            "1589": 849603072.0,
-            "1590": 849603072.0,
-            "1591": 849603072.0,
-            "1592": 849603072.0,
-            "1593": 849603072.0,
-            "1594": 849603072.0,
-            "1595": 849603072.0,
-            "1596": 849603072.0,
-            "1597": 849603072.0,
-            "1598": 849603072.0,
-            "1599": 849603072.0,
-            "1600": 849603072.0,
-            "1601": 849603072.0,
-            "1602": 849603072.0,
-            "1603": 849603072.0,
-            "1604": 849603072.0,
-            "1605": 849603072.0,
-            "1606": 849603072.0,
-            "1607": 849603072.0,
-            "1608": 849603072.0,
-            "1609": 849603072.0,
-            "1610": 849603072.0,
-            "1611": 849603072.0,
-            "1612": 849603072.0,
-            "1613": 849603072.0,
-            "1614": 849603072.0,
-            "1615": 849603072.0,
-            "1616": 849603072.0,
-            "1617": 849603072.0,
-            "1618": 849603072.0,
-            "1619": 849603072.0,
-            "1620": 849603072.0,
-            "1621": 849603072.0,
-            "1622": 849603072.0,
-            "1623": 849603072.0,
-            "1624": 849603072.0,
-            "1625": 849603072.0,
-            "1626": 849603072.0,
-            "1627": 849603072.0,
-            "1628": 849603072.0,
-            "1629": 849603072.0,
-            "1630": 849603072.0,
-            "1631": 849603072.0,
-            "1632": 849603072.0,
-            "1633": 849603072.0,
-            "1634": 849603072.0,
-            "1635": 849603072.0,
-            "1636": 849603072.0,
-            "1637": 849603072.0,
-            "1638": 849603072.0,
-            "1639": 849603072.0,
-            "1640": 849603072.0,
-            "1641": 849603072.0,
-            "1642": 849603072.0,
-            "1643": 849603072.0,
-            "1644": 849603072.0,
-            "1645": 849603072.0,
-            "1646": 849603072.0,
-            "1647": 849603072.0,
-            "1648": 849603072.0,
-            "1649": 849603072.0,
-            "1650": 849603072.0,
-            "1651": 849603072.0,
-            "1652": 849603072.0,
-            "1653": 849603072.0,
-            "1654": 849603072.0,
-            "1655": 849603072.0,
-            "1656": 849603072.0,
-            "1657": 849603072.0,
-            "1658": 849603072.0,
-            "1659": 849603072.0,
-            "1660": 849603072.0,
-            "1661": 849603072.0,
-            "1662": 849603072.0,
-            "1663": 849603072.0,
-            "1664": 849603072.0,
-            "1665": 849603072.0,
-            "1666": 849603072.0,
-            "1667": 849603072.0,
-            "1668": 849603072.0,
-            "1669": 849603072.0,
-            "1670": 849603072.0,
-            "1671": 849603072.0,
-            "1672": 849603072.0,
-            "1673": 849603072.0,
-            "1674": 849603072.0,
-            "1675": 849603072.0,
-            "1676": 849603072.0,
-            "1677": 849603072.0,
-            "1678": 849603072.0,
-            "1679": 849603072.0,
-            "1680": 849603072.0,
-            "1681": 849603072.0,
-            "1682": 849603072.0,
-            "1683": 849603072.0,
-            "1684": 849603072.0,
-            "1685": 849603072.0,
-            "1686": 849603072.0,
-            "1687": 849603072.0,
-            "1688": 849603072.0,
-            "1689": 849603072.0,
-            "1690": 849603072.0,
-            "1691": 849603072.0,
-            "1692": 849603072.0,
-            "1693": 849603072.0,
-            "1694": 849603072.0,
-            "1695": 849603072.0,
-            "1696": 849603072.0,
-            "1697": 849603072.0,
-            "1698": 849603072.0,
-            "1699": 849603072.0,
-            "1700": 849603072.0,
-            "1701": 849603072.0,
-            "1702": 849603072.0,
-            "1703": 849603072.0,
-            "1704": 849603072.0,
-            "1705": 849603072.0,
-            "1706": 849603072.0,
-            "1707": 849603072.0,
-            "1708": 849603072.0,
-            "1709": 849603072.0,
-            "1710": 849603072.0,
-            "1711": 849603072.0,
-            "1712": 849603072.0,
-            "1713": 849603072.0,
-            "1714": 849603072.0,
-            "1715": 849603072.0,
-            "1716": 849603072.0,
-            "1717": 849603072.0,
-            "1718": 849603072.0,
-            "1719": 849603072.0,
-            "1720": 849603072.0,
-            "1721": 849603072.0,
-            "1722": 849603072.0,
-            "1723": 849603072.0,
-            "1724": 849603072.0,
-            "1725": 849603072.0,
-            "1726": 849603072.0,
-            "1727": 849603072.0,
-            "1728": 849603072.0,
-            "1729": 849603072.0,
-            "1730": 849603072.0,
-            "1731": 849603072.0,
-            "1732": 849603072.0,
-            "1733": 849603072.0,
-            "1734": 849603072.0,
-            "1735": 849603072.0,
-            "1736": 849603072.0,
-            "1737": 849603072.0,
-            "1738": 849603072.0,
-            "1739": 849603072.0,
-            "1740": 849603072.0,
-            "1741": 849603072.0,
-            "1742": 849603072.0,
-            "1743": 849603072.0,
-            "1744": 849603072.0,
-            "1745": 849603072.0,
-            "1746": 849603072.0,
-            "1747": 849603072.0,
-            "1748": 849603072.0,
-            "1749": 849603072.0,
-            "1750": 849603072.0,
-            "1751": 849603072.0,
-            "1752": 849603072.0,
-            "1753": 849603072.0,
-            "1754": 849603072.0,
-            "1755": 849603072.0,
-            "1756": 849603072.0,
-            "1757": 849603072.0,
-            "1758": 849603072.0,
-            "1759": 849603072.0,
-            "1760": 849603072.0,
-            "1761": 849603072.0,
-            "1762": 849603072.0,
-            "1763": 849603072.0,
-            "1764": 849603072.0,
-            "1765": 849603072.0,
-            "1766": 849603072.0,
-            "1767": 849603072.0,
-            "1768": 849603072.0,
-            "1769": 849603072.0,
-            "1770": 849603072.0,
-            "1771": 849603072.0,
-            "1772": 849603072.0,
-            "1773": 849603072.0,
-            "1774": 849603072.0,
-            "1775": 849603072.0,
-            "1776": 849603072.0,
-            "1777": 849603072.0,
-            "1778": 849603072.0,
-            "1779": 849603072.0,
-            "1780": 849603072.0,
-            "1781": 849603072.0,
-            "1782": 849603072.0,
-            "1783": 849603072.0,
-            "1784": 849603072.0,
-            "1785": 849603072.0,
-            "1786": 849603072.0,
-            "1787": 849603072.0,
-            "1788": 849603072.0,
-            "1789": 849603072.0,
-            "1790": 849603072.0,
-            "1791": 849603072.0,
-            "1792": 849603072.0,
-            "1793": 849603072.0,
-            "1794": 849603072.0,
-            "1795": 849603072.0,
-            "1796": 849603072.0,
-            "1797": 849603072.0,
-            "1798": 849603072.0,
-            "1799": 849603072.0,
-            "1800": 849603072.0,
-            "1801": 849603072.0,
-            "1802": 849603072.0,
-            "1803": 849603072.0,
-            "1804": 849603072.0,
-            "1805": 849603072.0,
-            "1806": 849603072.0,
-            "1807": 849603072.0,
-            "1808": 849603072.0,
-            "1809": 849603072.0,
-            "1810": 849603072.0,
-            "1811": 849603072.0,
-            "1812": 849603072.0,
-            "1813": 849603072.0,
-            "1814": 849603072.0,
-            "1815": 849603072.0,
-            "1816": 849603072.0,
-            "1817": 849603072.0,
-            "1818": 849603072.0,
-            "1819": 849603072.0,
-            "1820": 849603072.0,
-            "1821": 849603072.0,
-            "1822": 849603072.0,
-            "1823": 849603072.0,
-            "1824": 849603072.0,
-            "1825": 849603072.0,
-            "1826": 849603072.0,
-            "1827": 849603072.0,
-            "1828": 849603072.0,
-            "1829": 849603072.0,
-            "1830": 849603072.0,
-            "1831": 849603072.0,
-            "1832": 849603072.0,
-            "1833": 849603072.0,
-            "1834": 849603072.0,
-            "1835": 849603072.0,
-            "1836": 849603072.0,
-            "1837": 849603072.0,
-            "1838": 849603072.0,
-            "1839": 849603072.0,
-            "1840": 849603072.0,
-            "1841": 849603072.0,
-            "1842": 849603072.0,
-            "1843": 849603072.0,
-            "1844": 849603072.0,
-            "1845": 849603072.0,
-            "1846": 849603072.0,
-            "1847": 849603072.0,
-            "1848": 849603072.0,
-            "1849": 849603072.0,
-            "1850": 849603072.0,
-            "1851": 849603072.0,
-            "1852": 849603072.0,
-            "1853": 849603072.0,
-            "1854": 849603072.0,
-            "1855": 849603072.0,
-            "1856": 849603072.0,
-            "1857": 849603072.0,
-            "1858": 849603072.0,
-            "1859": 849603072.0,
-            "1860": 849603072.0,
-            "1861": 849603072.0,
-            "1862": 849603072.0,
-            "1863": 849603072.0,
-            "1864": 849603072.0,
-            "1865": 849603072.0,
-            "1866": 849603072.0,
-            "1867": 849603072.0,
-            "1868": 849603072.0,
-            "1869": 849603072.0,
-            "1870": 849603072.0,
-            "1871": 849603072.0,
-            "1872": 849603072.0,
-            "1873": 849603072.0,
-            "1874": 849603072.0,
-            "1875": 849603072.0,
-            "1876": 849603072.0,
-            "1877": 849603072.0,
-            "1878": 849603072.0,
-            "1879": 849603072.0,
-            "1880": 849603072.0,
-            "1881": 849603072.0,
-            "1882": 849603072.0,
-            "1883": 849603072.0,
-            "1884": 849603072.0,
-            "1885": 849603072.0,
-            "1886": 849603072.0,
-            "1887": 849603072.0,
-            "1888": 849603072.0,
-            "1889": 849603072.0,
-            "1890": 849603072.0,
-            "1891": 849603072.0,
-            "1892": 849603072.0,
-            "1893": 849603072.0,
-            "1894": 849603072.0,
-            "1895": 849603072.0,
-            "1896": 849603072.0,
-            "1897": 849603072.0,
-            "1898": 849603072.0,
-            "1899": 849603072.0,
-            "1900": 849603072.0,
-            "1901": 849603072.0,
-            "1902": 849603072.0,
-            "1903": 849603072.0,
-            "1904": 849603072.0,
-            "1905": 849603072.0,
-            "1906": 849603072.0,
-            "1907": 849603072.0,
-            "1908": 849603072.0,
-            "1909": 849603072.0,
-            "1910": 849603072.0,
-            "1911": 849603072.0,
-            "1912": 849603072.0,
-            "1913": 849603072.0,
-            "1914": 849603072.0,
-            "1915": 849603072.0,
-            "1916": 849603072.0,
-            "1917": 849603072.0,
-            "1918": 849603072.0,
-            "1919": 849603072.0,
-            "1920": 849603072.0,
-            "1921": 849603072.0,
-            "1922": 849603072.0,
-            "1923": 849603072.0,
-            "1924": 849603072.0,
-            "1925": 849603072.0,
-            "1926": 849603072.0,
-            "1927": 849603072.0,
-            "1928": 849603072.0,
-            "1929": 849603072.0,
-            "1930": 849603072.0,
-            "1931": 849603072.0,
-            "1932": 849603072.0,
-            "1933": 849603072.0,
-            "1934": 849603072.0,
-            "1935": 849603072.0,
-            "1936": 849603072.0,
-            "1937": 849603072.0,
-            "1938": 849603072.0,
-            "1939": 849603072.0,
-            "1940": 849603072.0,
-            "1941": 849603072.0,
-            "1942": 849603072.0,
-            "1943": 849603072.0,
-            "1944": 849603072.0,
-            "1945": 849603072.0,
-            "1946": 849603072.0,
-            "1947": 849603072.0,
-            "1948": 849603072.0,
-            "1949": 849603072.0,
-            "1950": 849603072.0,
-            "1951": 849603072.0,
-            "1952": 849603072.0,
-            "1953": 849603072.0,
-            "1954": 849603072.0,
-            "1955": 849603072.0,
-            "1956": 849603072.0,
-            "1957": 849603072.0,
-            "1958": 849603072.0,
-            "1959": 849603072.0,
-            "1960": 849603072.0,
-            "1961": 849603072.0,
-            "1962": 849603072.0,
-            "1963": 849603072.0,
-            "1964": 849603072.0,
-            "1965": 849603072.0,
-            "1966": 849603072.0,
-            "1967": 849603072.0,
-            "1968": 849603072.0,
-            "1969": 849603072.0,
-            "1970": 849603072.0,
-            "1971": 849603072.0,
-            "1972": 849603072.0,
-            "1973": 849603072.0,
-            "1974": 849603072.0,
-            "1975": 849603072.0,
-            "1976": 849603072.0,
-            "1977": 849603072.0,
-            "1978": 849603072.0,
-            "1979": 849603072.0,
-            "1980": 849603072.0,
-            "1981": 849603072.0,
-            "1982": 849603072.0,
-            "1983": 849603072.0,
-            "1984": 849603072.0,
-            "1985": 849603072.0,
-            "1986": 849603072.0,
-            "1987": 849603072.0,
-            "1988": 849603072.0,
-            "1989": 849603072.0,
-            "1990": 849603072.0,
-            "1991": 849603072.0,
-            "1992": 849603072.0,
-            "1993": 849603072.0,
-            "1994": 849603072.0,
-            "1995": 849603072.0,
-            "1996": 849603072.0,
-            "1997": 849603072.0,
-            "1998": 849603072.0,
-            "1999": 849603072.0,
-            "2000": 849603072.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 2000,
-        "step_interval": 1,
-        "values": {
-            "1": 16.3822,
-            "2": 1.31626,
-            "3": 1.13185,
-            "4": 1.12114,
-            "5": 1.19519,
-            "6": 1.12675,
-            "7": 1.12237,
-            "8": 1.1374,
-            "9": 1.14426,
-            "10": 1.15157,
-            "11": 1.14686,
-            "12": 1.14814,
-            "13": 1.14322,
-            "14": 1.14557,
-            "15": 1.14234,
-            "16": 1.13922,
-            "17": 1.12341,
-            "18": 1.11363,
-            "19": 1.10615,
-            "20": 1.11318,
-            "21": 1.12913,
-            "22": 1.15668,
-            "23": 1.12179,
-            "24": 1.13875,
-            "25": 1.11754,
-            "26": 1.11547,
-            "27": 1.12709,
-            "28": 1.11191,
-            "29": 1.12201,
-            "30": 1.11933,
-            "31": 1.11125,
-            "32": 1.11093,
-            "33": 1.11454,
-            "34": 1.12129,
-            "35": 1.1155,
-            "36": 1.11442,
-            "37": 1.12006,
-            "38": 1.11431,
-            "39": 1.11484,
-            "40": 1.11975,
-            "41": 1.1238,
-            "42": 1.11873,
-            "43": 1.12003,
-            "44": 1.10836,
-            "45": 1.11056,
-            "46": 1.11157,
-            "47": 1.10767,
-            "48": 1.13868,
-            "49": 1.11404,
-            "50": 1.10705,
-            "51": 1.12027,
-            "52": 1.12858,
-            "53": 1.11535,
-            "54": 1.10913,
-            "55": 1.11279,
-            "56": 1.11716,
-            "57": 1.10832,
-            "58": 1.11336,
-            "59": 1.12381,
-            "60": 1.13055,
-            "61": 1.11276,
-            "62": 1.11915,
-            "63": 1.11291,
-            "64": 1.12605,
-            "65": 1.10858,
-            "66": 1.1135,
-            "67": 1.1075,
-            "68": 1.10523,
-            "69": 1.10409,
-            "70": 1.10294,
-            "71": 1.10479,
-            "72": 1.10719,
-            "73": 1.11461,
-            "74": 1.10895,
-            "75": 1.12809,
-            "76": 1.12806,
-            "77": 1.12328,
-            "78": 1.11204,
-            "79": 1.11053,
-            "80": 1.11086,
-            "81": 1.10623,
-            "82": 1.10854,
-            "83": 1.10613,
-            "84": 1.10529,
-            "85": 1.10501,
-            "86": 1.10708,
-            "87": 1.13536,
-            "88": 1.11068,
-            "89": 1.10673,
-            "90": 1.10392,
-            "91": 1.11125,
-            "92": 1.10488,
-            "93": 1.11075,
-            "94": 1.1046,
-            "95": 1.10543,
-            "96": 1.10381,
-            "97": 1.10373,
-            "98": 1.1018,
-            "99": 1.10471,
-            "100": 1.10574,
-            "101": 1.10409,
-            "102": 1.10798,
-            "103": 1.12874,
-            "104": 1.10701,
-            "105": 1.11847,
-            "106": 1.11983,
-            "107": 1.10955,
-            "108": 1.10268,
-            "109": 1.10807,
-            "110": 1.10738,
-            "111": 1.10253,
-            "112": 1.10203,
-            "113": 1.10353,
-            "114": 1.12085,
-            "115": 1.10754,
-            "116": 1.11142,
-            "117": 1.10689,
-            "118": 1.11592,
-            "119": 1.11386,
-            "120": 1.12351,
-            "121": 1.11049,
-            "122": 1.11228,
-            "123": 1.1158,
-            "124": 1.10789,
-            "125": 1.11069,
-            "126": 1.10779,
-            "127": 1.12243,
-            "128": 1.1051,
-            "129": 1.10564,
-            "130": 1.11975,
-            "131": 1.11499,
-            "132": 1.11217,
-            "133": 1.10984,
-            "134": 1.1065,
-            "135": 1.10783,
-            "136": 1.11958,
-            "137": 1.10639,
-            "138": 1.11123,
-            "139": 1.1094,
-            "140": 1.10637,
-            "141": 1.11422,
-            "142": 1.13629,
-            "143": 1.10961,
-            "144": 1.12653,
-            "145": 1.13108,
-            "146": 1.12643,
-            "147": 1.12772,
-            "148": 1.12517,
-            "149": 1.12802,
-            "150": 1.13132,
-            "151": 1.12891,
-            "152": 1.10903,
-            "153": 1.10341,
-            "154": 1.10695,
-            "155": 1.10584,
-            "156": 1.11361,
-            "157": 1.11802,
-            "158": 1.11984,
-            "159": 1.11136,
-            "160": 1.10718,
-            "161": 1.10583,
-            "162": 1.10103,
-            "163": 1.11641,
-            "164": 1.12589,
-            "165": 1.11331,
-            "166": 1.11034,
-            "167": 1.11091,
-            "168": 1.11025,
-            "169": 1.13512,
-            "170": 1.11207,
-            "171": 1.10532,
-            "172": 1.10932,
-            "173": 1.10624,
-            "174": 1.11017,
-            "175": 1.10454,
-            "176": 1.10315,
-            "177": 1.11196,
-            "178": 1.11563,
-            "179": 1.1154,
-            "180": 1.11494,
-            "181": 1.11448,
-            "182": 1.11092,
-            "183": 1.12,
-            "184": 1.12385,
-            "185": 1.14872,
-            "186": 1.15815,
-            "187": 1.46045,
-            "188": 1.09814,
-            "189": 1.10415,
-            "190": 1.0991,
-            "191": 1.09991,
-            "192": 1.11523,
-            "193": 1.11471,
-            "194": 1.11889,
-            "195": 1.12119,
-            "196": 1.15807,
-            "197": 1.11588,
-            "198": 1.12125,
-            "199": 1.11707,
-            "200": 1.1141,
-            "201": 1.11171,
-            "202": 1.11313,
-            "203": 1.1165,
-            "204": 1.11853,
-            "205": 1.12204,
-            "206": 1.12933,
-            "207": 1.12743,
-            "208": 1.12408,
-            "209": 1.11992,
-            "210": 1.11663,
-            "211": 1.12193,
-            "212": 1.12976,
-            "213": 1.12566,
-            "214": 1.12316,
-            "215": 1.10736,
-            "216": 1.10934,
-            "217": 1.11518,
-            "218": 1.10907,
-            "219": 1.10605,
-            "220": 1.1093,
-            "221": 1.11504,
-            "222": 1.10934,
-            "223": 1.10627,
-            "224": 1.10755,
-            "225": 1.10875,
-            "226": 1.11221,
-            "227": 1.10778,
-            "228": 1.10614,
-            "229": 1.10444,
-            "230": 1.10623,
-            "231": 1.10771,
-            "232": 1.10428,
-            "233": 1.10253,
-            "234": 1.10767,
-            "235": 1.10776,
-            "236": 1.10487,
-            "237": 1.10475,
-            "238": 1.11613,
-            "239": 1.11721,
-            "240": 1.12147,
-            "241": 1.12327,
-            "242": 1.11145,
-            "243": 1.11793,
-            "244": 1.11618,
-            "245": 1.11009,
-            "246": 1.10263,
-            "247": 1.1097,
-            "248": 1.12563,
-            "249": 1.48178,
-            "250": 1.10441,
-            "251": 1.10813,
-            "252": 1.11027,
-            "253": 1.10786,
-            "254": 1.10675,
-            "255": 1.41789,
-            "256": 1.10691,
-            "257": 1.10828,
-            "258": 1.099,
-            "259": 1.10256,
-            "260": 1.10487,
-            "261": 1.10331,
-            "262": 1.10149,
-            "263": 1.14268,
-            "264": 1.10886,
-            "265": 1.10054,
-            "266": 1.12569,
-            "267": 1.10357,
-            "268": 1.10623,
-            "269": 1.10863,
-            "270": 1.11185,
-            "271": 1.11407,
-            "272": 1.10889,
-            "273": 1.10761,
-            "274": 1.11696,
-            "275": 1.11641,
-            "276": 1.11223,
-            "277": 1.11191,
-            "278": 1.11668,
-            "279": 1.11464,
-            "280": 1.12054,
-            "281": 1.11901,
-            "282": 1.11706,
-            "283": 1.10983,
-            "284": 1.11982,
-            "285": 1.11643,
-            "286": 1.1323,
-            "287": 1.13149,
-            "288": 1.11434,
-            "289": 1.11514,
-            "290": 1.11253,
-            "291": 1.12181,
-            "292": 1.11852,
-            "293": 1.13807,
-            "294": 1.11163,
-            "295": 1.11378,
-            "296": 1.10855,
-            "297": 1.1111,
-            "298": 1.11626,
-            "299": 1.11308,
-            "300": 1.11242,
-            "301": 1.11519,
-            "302": 1.11922,
-            "303": 1.12465,
-            "304": 1.12599,
-            "305": 1.11658,
-            "306": 1.12631,
-            "307": 1.122,
-            "308": 1.12833,
-            "309": 1.12246,
-            "310": 1.1219,
-            "311": 1.11331,
-            "312": 1.11222,
-            "313": 1.11732,
-            "314": 1.11315,
-            "315": 1.11416,
-            "316": 1.11358,
-            "317": 1.11582,
-            "318": 1.11198,
-            "319": 1.11212,
-            "320": 1.13151,
-            "321": 1.11859,
-            "322": 1.1151,
-            "323": 1.11505,
-            "324": 1.1226,
-            "325": 1.11562,
-            "326": 1.11418,
-            "327": 1.11019,
-            "328": 1.10635,
-            "329": 1.10816,
-            "330": 1.10757,
-            "331": 1.11273,
-            "332": 1.1063,
-            "333": 1.13101,
-            "334": 1.10992,
-            "335": 1.11002,
-            "336": 1.10896,
-            "337": 1.11385,
-            "338": 1.10594,
-            "339": 1.10666,
-            "340": 1.11103,
-            "341": 1.10554,
-            "342": 1.1125,
-            "343": 1.10995,
-            "344": 1.11148,
-            "345": 1.11497,
-            "346": 1.10878,
-            "347": 1.12044,
-            "348": 1.12215,
-            "349": 1.11473,
-            "350": 1.11189,
-            "351": 1.11564,
-            "352": 1.11776,
-            "353": 1.11815,
-            "354": 1.11108,
-            "355": 1.11061,
-            "356": 1.11158,
-            "357": 1.11626,
-            "358": 1.11171,
-            "359": 1.11462,
-            "360": 1.11513,
-            "361": 1.11128,
-            "362": 1.1118,
-            "363": 1.13364,
-            "364": 1.11591,
-            "365": 1.11792,
-            "366": 1.11785,
-            "367": 1.11998,
-            "368": 1.12243,
-            "369": 1.12531,
-            "370": 1.11915,
-            "371": 1.13454,
-            "372": 1.11105,
-            "373": 1.11708,
-            "374": 1.11546,
-            "375": 1.12944,
-            "376": 1.11506,
-            "377": 1.10987,
-            "378": 1.11226,
-            "379": 1.11415,
-            "380": 1.11174,
-            "381": 1.11286,
-            "382": 1.11275,
-            "383": 1.10823,
-            "384": 1.11042,
-            "385": 1.11323,
-            "386": 1.11219,
-            "387": 1.11361,
-            "388": 1.10927,
-            "389": 1.1125,
-            "390": 1.10816,
-            "391": 1.11189,
-            "392": 1.10973,
-            "393": 1.10795,
-            "394": 1.10684,
-            "395": 1.1102,
-            "396": 1.13313,
-            "397": 1.1078,
-            "398": 1.10732,
-            "399": 1.1275,
-            "400": 1.11146,
-            "401": 1.11947,
-            "402": 1.12571,
-            "403": 1.11015,
-            "404": 1.1077,
-            "405": 1.11487,
-            "406": 1.10595,
-            "407": 1.10336,
-            "408": 1.10755,
-            "409": 1.11965,
-            "410": 1.1109,
-            "411": 1.10621,
-            "412": 1.11214,
-            "413": 1.11669,
-            "414": 1.11737,
-            "415": 1.119,
-            "416": 1.11822,
-            "417": 1.11548,
-            "418": 1.11883,
-            "419": 1.11532,
-            "420": 1.11406,
-            "421": 1.11635,
-            "422": 1.11248,
-            "423": 1.11374,
-            "424": 1.11915,
-            "425": 1.11751,
-            "426": 1.12598,
-            "427": 1.13257,
-            "428": 1.12825,
-            "429": 1.12235,
-            "430": 1.1279,
-            "431": 1.10798,
-            "432": 1.11938,
-            "433": 1.10883,
-            "434": 1.10719,
-            "435": 1.10833,
-            "436": 1.11647,
-            "437": 1.10955,
-            "438": 1.10733,
-            "439": 1.1097,
-            "440": 1.11166,
-            "441": 1.1081,
-            "442": 1.11483,
-            "443": 1.10943,
-            "444": 1.11112,
-            "445": 1.10834,
-            "446": 1.10892,
-            "447": 1.11433,
-            "448": 1.10593,
-            "449": 1.10415,
-            "450": 1.10542,
-            "451": 1.10434,
-            "452": 1.10567,
-            "453": 1.11824,
-            "454": 1.10563,
-            "455": 1.11149,
-            "456": 1.11916,
-            "457": 1.12274,
-            "458": 1.11194,
-            "459": 1.11157,
-            "460": 1.10464,
-            "461": 1.1112,
-            "462": 1.10473,
-            "463": 1.11276,
-            "464": 1.111,
-            "465": 1.10797,
-            "466": 1.10697,
-            "467": 1.10859,
-            "468": 1.10659,
-            "469": 1.11135,
-            "470": 1.1136,
-            "471": 1.10898,
-            "472": 1.10806,
-            "473": 1.11193,
-            "474": 1.11067,
-            "475": 1.10236,
-            "476": 1.10259,
-            "477": 1.10292,
-            "478": 1.10411,
-            "479": 1.11142,
-            "480": 1.10192,
-            "481": 1.10853,
-            "482": 1.10912,
-            "483": 1.12265,
-            "484": 1.12247,
-            "485": 1.12315,
-            "486": 1.11172,
-            "487": 1.10692,
-            "488": 1.11303,
-            "489": 1.11249,
-            "490": 1.11812,
-            "491": 1.10999,
-            "492": 1.11504,
-            "493": 1.11733,
-            "494": 1.11551,
-            "495": 1.1079,
-            "496": 1.10726,
-            "497": 1.11462,
-            "498": 1.10799,
-            "499": 1.10985,
-            "500": 1.10965,
-            "501": 1.11617,
-            "502": 1.11625,
-            "503": 1.11341,
-            "504": 1.10985,
-            "505": 1.1131,
-            "506": 1.11855,
-            "507": 1.14604,
-            "508": 1.13154,
-            "509": 1.12809,
-            "510": 1.1266,
-            "511": 1.11642,
-            "512": 1.1304,
-            "513": 1.11625,
-            "514": 1.11231,
-            "515": 1.11425,
-            "516": 1.11458,
-            "517": 1.12676,
-            "518": 1.11414,
-            "519": 1.12164,
-            "520": 1.13014,
-            "521": 1.13033,
-            "522": 1.12993,
-            "523": 1.12995,
-            "524": 1.13537,
-            "525": 1.1341,
-            "526": 1.13201,
-            "527": 1.13119,
-            "528": 1.13075,
-            "529": 1.13011,
-            "530": 1.13738,
-            "531": 1.13329,
-            "532": 1.13114,
-            "533": 1.13695,
-            "534": 1.1344,
-            "535": 1.13618,
-            "536": 1.13312,
-            "537": 1.11997,
-            "538": 1.12321,
-            "539": 1.14241,
-            "540": 1.13107,
-            "541": 1.13735,
-            "542": 1.12575,
-            "543": 1.11897,
-            "544": 1.12217,
-            "545": 1.1057,
-            "546": 1.19156,
-            "547": 1.19276,
-            "548": 1.12812,
-            "549": 1.13165,
-            "550": 1.12449,
-            "551": 1.10936,
-            "552": 1.10564,
-            "553": 1.1275,
-            "554": 1.111,
-            "555": 1.10625,
-            "556": 1.1026,
-            "557": 1.09908,
-            "558": 1.10614,
-            "559": 1.1065,
-            "560": 1.10665,
-            "561": 1.10917,
-            "562": 1.10572,
-            "563": 1.11331,
-            "564": 1.10113,
-            "565": 1.10578,
-            "566": 1.12501,
-            "567": 1.11691,
-            "568": 1.11451,
-            "569": 1.1122,
-            "570": 1.1131,
-            "571": 1.112,
-            "572": 1.11721,
-            "573": 1.11259,
-            "574": 1.11569,
-            "575": 1.11467,
-            "576": 1.10724,
-            "577": 1.10561,
-            "578": 1.10933,
-            "579": 1.11134,
-            "580": 1.10264,
-            "581": 1.10279,
-            "582": 1.1022,
-            "583": 1.10338,
-            "584": 1.10602,
-            "585": 1.10808,
-            "586": 1.10642,
-            "587": 1.10413,
-            "588": 1.10396,
-            "589": 1.10987,
-            "590": 1.11702,
-            "591": 1.11411,
-            "592": 1.10927,
-            "593": 1.12893,
-            "594": 1.11306,
-            "595": 1.11887,
-            "596": 1.11524,
-            "597": 1.11498,
-            "598": 1.11873,
-            "599": 1.1204,
-            "600": 1.11897,
-            "601": 1.11948,
-            "602": 1.18294,
-            "603": 1.11302,
-            "604": 1.11134,
-            "605": 1.10445,
-            "606": 1.10724,
-            "607": 1.10874,
-            "608": 1.11111,
-            "609": 1.12004,
-            "610": 1.1141,
-            "611": 1.12314,
-            "612": 1.11563,
-            "613": 1.12115,
-            "614": 1.13046,
-            "615": 1.11289,
-            "616": 1.11363,
-            "617": 1.11725,
-            "618": 1.11448,
-            "619": 1.10595,
-            "620": 1.12068,
-            "621": 1.11137,
-            "622": 1.3999,
-            "623": 1.10818,
-            "624": 1.13027,
-            "625": 1.10737,
-            "626": 1.10357,
-            "627": 1.11405,
-            "628": 1.11246,
-            "629": 1.10616,
-            "630": 1.11526,
-            "631": 1.40252,
-            "632": 1.10978,
-            "633": 1.11606,
-            "634": 1.10439,
-            "635": 1.10446,
-            "636": 1.11092,
-            "637": 1.10347,
-            "638": 1.10119,
-            "639": 1.10471,
-            "640": 1.1031,
-            "641": 1.10298,
-            "642": 1.10636,
-            "643": 1.10161,
-            "644": 1.11466,
-            "645": 1.10507,
-            "646": 1.11445,
-            "647": 1.1197,
-            "648": 1.11545,
-            "649": 1.10703,
-            "650": 1.10554,
-            "651": 1.1068,
-            "652": 1.10194,
-            "653": 1.09773,
-            "654": 1.09907,
-            "655": 1.10346,
-            "656": 1.10393,
-            "657": 1.10961,
-            "658": 1.105,
-            "659": 1.10048,
-            "660": 1.10167,
-            "661": 1.10177,
-            "662": 1.10004,
-            "663": 1.10229,
-            "664": 1.10991,
-            "665": 1.11935,
-            "666": 1.09909,
-            "667": 1.10442,
-            "668": 1.11279,
-            "669": 1.10745,
-            "670": 1.10825,
-            "671": 1.12267,
-            "672": 1.10502,
-            "673": 1.10371,
-            "674": 1.12099,
-            "675": 1.13907,
-            "676": 1.11511,
-            "677": 1.11541,
-            "678": 1.11153,
-            "679": 1.0969,
-            "680": 1.10198,
-            "681": 1.10354,
-            "682": 1.10429,
-            "683": 1.11077,
-            "684": 1.10285,
-            "685": 1.10615,
-            "686": 1.10796,
-            "687": 1.10416,
-            "688": 1.09974,
-            "689": 1.1006,
-            "690": 1.09945,
-            "691": 1.09792,
-            "692": 1.24206,
-            "693": 1.18636,
-            "694": 1.13082,
-            "695": 1.09859,
-            "696": 1.11994,
-            "697": 1.10289,
-            "698": 1.10873,
-            "699": 1.10289,
-            "700": 1.10204,
-            "701": 1.12141,
-            "702": 1.11545,
-            "703": 1.11506,
-            "704": 1.10356,
-            "705": 1.10367,
-            "706": 1.10023,
-            "707": 1.09792,
-            "708": 1.10467,
-            "709": 1.10379,
-            "710": 1.10531,
-            "711": 1.10269,
-            "712": 1.10352,
-            "713": 1.10332,
-            "714": 1.10939,
-            "715": 1.11271,
-            "716": 1.10179,
-            "717": 1.10169,
-            "718": 1.10026,
-            "719": 1.10678,
-            "720": 1.10574,
-            "721": 1.10204,
-            "722": 1.11111,
-            "723": 1.10109,
-            "724": 1.10293,
-            "725": 1.11159,
-            "726": 1.10366,
-            "727": 1.10881,
-            "728": 1.11038,
-            "729": 1.11025,
-            "730": 1.12558,
-            "731": 1.1057,
-            "732": 1.12657,
-            "733": 1.1183,
-            "734": 1.12392,
-            "735": 1.12412,
-            "736": 1.11505,
-            "737": 1.11149,
-            "738": 1.1212,
-            "739": 1.11424,
-            "740": 1.10243,
-            "741": 1.11994,
-            "742": 1.12358,
-            "743": 1.12724,
-            "744": 1.11158,
-            "745": 1.10258,
-            "746": 1.10638,
-            "747": 1.10357,
-            "748": 1.10483,
-            "749": 1.10197,
-            "750": 1.11789,
-            "751": 1.10636,
-            "752": 1.12273,
-            "753": 1.10061,
-            "754": 1.09971,
-            "755": 1.1546,
-            "756": 1.10689,
-            "757": 1.143,
-            "758": 1.10961,
-            "759": 1.11208,
-            "760": 1.11271,
-            "761": 1.11862,
-            "762": 1.12119,
-            "763": 1.11964,
-            "764": 1.12199,
-            "765": 1.11991,
-            "766": 1.11129,
-            "767": 1.1103,
-            "768": 1.11377,
-            "769": 1.11084,
-            "770": 1.12634,
-            "771": 1.10791,
-            "772": 1.10626,
-            "773": 1.10717,
-            "774": 1.10369,
-            "775": 1.11733,
-            "776": 1.11238,
-            "777": 1.10662,
-            "778": 1.10313,
-            "779": 1.11571,
-            "780": 1.11278,
-            "781": 1.11722,
-            "782": 1.12559,
-            "783": 1.12032,
-            "784": 1.13024,
-            "785": 1.12844,
-            "786": 1.11379,
-            "787": 1.11233,
-            "788": 1.11523,
-            "789": 1.1147,
-            "790": 1.11153,
-            "791": 1.11328,
-            "792": 1.11514,
-            "793": 1.11747,
-            "794": 1.12177,
-            "795": 1.1299,
-            "796": 1.11963,
-            "797": 1.12177,
-            "798": 1.12128,
-            "799": 1.11666,
-            "800": 1.14711,
-            "801": 1.13565,
-            "802": 1.11745,
-            "803": 1.12096,
-            "804": 1.11665,
-            "805": 1.11673,
-            "806": 1.11672,
-            "807": 1.10599,
-            "808": 1.10539,
-            "809": 1.12257,
-            "810": 1.12862,
-            "811": 1.11639,
-            "812": 1.14011,
-            "813": 1.11732,
-            "814": 1.10888,
-            "815": 1.09894,
-            "816": 1.09974,
-            "817": 1.1022,
-            "818": 1.09833,
-            "819": 1.10124,
-            "820": 1.09952,
-            "821": 1.10108,
-            "822": 1.10349,
-            "823": 1.11768,
-            "824": 1.11207,
-            "825": 1.11295,
-            "826": 1.41122,
-            "827": 1.11612,
-            "828": 1.11409,
-            "829": 1.12569,
-            "830": 1.11959,
-            "831": 1.1249,
-            "832": 1.39302,
-            "833": 1.12006,
-            "834": 1.10953,
-            "835": 1.11144,
-            "836": 1.1137,
-            "837": 1.10702,
-            "838": 1.10982,
-            "839": 1.13938,
-            "840": 1.11782,
-            "841": 1.10472,
-            "842": 1.1082,
-            "843": 1.10719,
-            "844": 1.10724,
-            "845": 1.10938,
-            "846": 1.1075,
-            "847": 1.11396,
-            "848": 1.10209,
-            "849": 1.10287,
-            "850": 1.10193,
-            "851": 1.1002,
-            "852": 1.10616,
-            "853": 1.10922,
-            "854": 1.117,
-            "855": 1.1099,
-            "856": 1.11958,
-            "857": 1.11649,
-            "858": 1.11303,
-            "859": 1.11553,
-            "860": 1.13785,
-            "861": 1.11151,
-            "862": 1.12515,
-            "863": 1.11567,
-            "864": 1.13629,
-            "865": 1.11301,
-            "866": 1.1266,
-            "867": 1.10701,
-            "868": 1.11119,
-            "869": 1.10717,
-            "870": 1.10269,
-            "871": 1.11072,
-            "872": 1.12808,
-            "873": 1.10479,
-            "874": 1.10454,
-            "875": 1.10992,
-            "876": 1.10482,
-            "877": 1.10557,
-            "878": 1.1044,
-            "879": 1.10567,
-            "880": 1.10865,
-            "881": 1.11638,
-            "882": 1.1107,
-            "883": 1.11574,
-            "884": 1.11332,
-            "885": 1.10882,
-            "886": 1.10735,
-            "887": 1.11154,
-            "888": 1.11008,
-            "889": 1.11011,
-            "890": 1.10784,
-            "891": 1.11221,
-            "892": 1.10131,
-            "893": 1.11975,
-            "894": 1.10767,
-            "895": 1.10533,
-            "896": 1.10524,
-            "897": 1.10507,
-            "898": 1.10311,
-            "899": 1.10493,
-            "900": 1.13867,
-            "901": 1.10496,
-            "902": 1.10219,
-            "903": 1.10371,
-            "904": 1.09927,
-            "905": 1.1021,
-            "906": 1.10321,
-            "907": 1.10461,
-            "908": 1.09939,
-            "909": 1.112,
-            "910": 1.11201,
-            "911": 1.11227,
-            "912": 1.10616,
-            "913": 1.1101,
-            "914": 1.10967,
-            "915": 1.10234,
-            "916": 1.11526,
-            "917": 1.12492,
-            "918": 1.12468,
-            "919": 1.12194,
-            "920": 1.13105,
-            "921": 1.14835,
-            "922": 1.11893,
-            "923": 1.13874,
-            "924": 1.13103,
-            "925": 1.11537,
-            "926": 1.11885,
-            "927": 1.11898,
-            "928": 1.11986,
-            "929": 1.10681,
-            "930": 1.10842,
-            "931": 1.11325,
-            "932": 1.11422,
-            "933": 1.11227,
-            "934": 1.12222,
-            "935": 1.12401,
-            "936": 1.10809,
-            "937": 1.12051,
-            "938": 1.11976,
-            "939": 1.108,
-            "940": 1.11434,
-            "941": 1.12242,
-            "942": 1.12111,
-            "943": 1.11968,
-            "944": 1.11767,
-            "945": 1.1205,
-            "946": 1.11835,
-            "947": 1.13,
-            "948": 1.13766,
-            "949": 1.11754,
-            "950": 1.11677,
-            "951": 1.11988,
-            "952": 1.11461,
-            "953": 1.11709,
-            "954": 1.11529,
-            "955": 1.12479,
-            "956": 1.12035,
-            "957": 1.1211,
-            "958": 1.12178,
-            "959": 1.11983,
-            "960": 1.12659,
-            "961": 1.11924,
-            "962": 1.12163,
-            "963": 1.11775,
-            "964": 1.11476,
-            "965": 1.12358,
-            "966": 1.11714,
-            "967": 1.11203,
-            "968": 1.11555,
-            "969": 1.10879,
-            "970": 1.11419,
-            "971": 1.10508,
-            "972": 1.11276,
-            "973": 1.11,
-            "974": 1.11187,
-            "975": 1.12792,
-            "976": 1.11182,
-            "977": 1.10998,
-            "978": 1.11915,
-            "979": 1.11864,
-            "980": 1.14434,
-            "981": 1.13107,
-            "982": 1.13449,
-            "983": 1.13492,
-            "984": 1.12225,
-            "985": 1.11677,
-            "986": 1.10469,
-            "987": 1.10701,
-            "988": 1.10893,
-            "989": 1.11249,
-            "990": 1.1148,
-            "991": 1.10515,
-            "992": 1.11155,
-            "993": 1.11377,
-            "994": 1.11526,
-            "995": 1.11775,
-            "996": 1.1087,
-            "997": 1.11253,
-            "998": 1.11229,
-            "999": 1.11298,
-            "1000": 1.11148,
-            "1001": 1.11066,
-            "1002": 1.10946,
-            "1003": 1.10475,
-            "1004": 1.10636,
-            "1005": 1.10981,
-            "1006": 1.10863,
-            "1007": 1.10742,
-            "1008": 1.10589,
-            "1009": 1.10693,
-            "1010": 1.10781,
-            "1011": 1.11281,
-            "1012": 1.10578,
-            "1013": 1.11037,
-            "1014": 1.10597,
-            "1015": 1.10681,
-            "1016": 1.1338,
-            "1017": 1.11461,
-            "1018": 1.11116,
-            "1019": 1.1096,
-            "1020": 1.1108,
-            "1021": 1.11146,
-            "1022": 1.11476,
-            "1023": 1.11451,
-            "1024": 1.13013,
-            "1025": 1.11511,
-            "1026": 1.11511,
-            "1027": 1.11514,
-            "1028": 1.11855,
-            "1029": 1.11869,
-            "1030": 1.1118,
-            "1031": 1.11742,
-            "1032": 1.11263,
-            "1033": 1.12033,
-            "1034": 1.12435,
-            "1035": 1.12581,
-            "1036": 1.1272,
-            "1037": 1.46521,
-            "1038": 1.14443,
-            "1039": 1.12999,
-            "1040": 1.12716,
-            "1041": 1.11861,
-            "1042": 1.11514,
-            "1043": 1.12673,
-            "1044": 1.12884,
-            "1045": 1.1161,
-            "1046": 1.11643,
-            "1047": 1.12461,
-            "1048": 1.12817,
-            "1049": 1.11487,
-            "1050": 1.11239,
-            "1051": 1.12763,
-            "1052": 1.11944,
-            "1053": 1.11469,
-            "1054": 1.11702,
-            "1055": 1.40678,
-            "1056": 1.12105,
-            "1057": 1.11851,
-            "1058": 1.11965,
-            "1059": 1.11967,
-            "1060": 1.11713,
-            "1061": 1.11908,
-            "1062": 1.1161,
-            "1063": 1.40239,
-            "1064": 1.11686,
-            "1065": 1.11366,
-            "1066": 1.12523,
-            "1067": 1.11094,
-            "1068": 1.11411,
-            "1069": 1.11926,
-            "1070": 1.11511,
-            "1071": 1.13367,
-            "1072": 1.113,
-            "1073": 1.11679,
-            "1074": 1.11617,
-            "1075": 1.12199,
-            "1076": 1.11141,
-            "1077": 1.11322,
-            "1078": 1.1378,
-            "1079": 1.11023,
-            "1080": 1.11533,
-            "1081": 1.11363,
-            "1082": 1.10896,
-            "1083": 1.11339,
-            "1084": 1.12105,
-            "1085": 1.12039,
-            "1086": 1.1173,
-            "1087": 1.1128,
-            "1088": 1.11157,
-            "1089": 1.11505,
-            "1090": 1.11087,
-            "1091": 1.1241,
-            "1092": 1.11662,
-            "1093": 1.11369,
-            "1094": 1.12153,
-            "1095": 1.11664,
-            "1096": 1.12998,
-            "1097": 1.12103,
-            "1098": 1.12908,
-            "1099": 1.12207,
-            "1100": 1.13222,
-            "1101": 1.12059,
-            "1102": 1.11343,
-            "1103": 1.11517,
-            "1104": 1.11678,
-            "1105": 1.14377,
-            "1106": 1.1159,
-            "1107": 1.11485,
-            "1108": 1.1165,
-            "1109": 1.11059,
-            "1110": 1.11613,
-            "1111": 1.11121,
-            "1112": 1.11707,
-            "1113": 1.11613,
-            "1114": 1.11333,
-            "1115": 1.11172,
-            "1116": 1.11467,
-            "1117": 1.1153,
-            "1118": 1.11519,
-            "1119": 1.1201,
-            "1120": 1.11799,
-            "1121": 1.11431,
-            "1122": 1.11478,
-            "1123": 1.12068,
-            "1124": 1.11421,
-            "1125": 1.11697,
-            "1126": 1.12694,
-            "1127": 1.12194,
-            "1128": 1.11753,
-            "1129": 1.11512,
-            "1130": 1.11508,
-            "1131": 1.12083,
-            "1132": 1.13227,
-            "1133": 1.13691,
-            "1134": 1.11771,
-            "1135": 1.11766,
-            "1136": 1.11448,
-            "1137": 1.11746,
-            "1138": 1.12004,
-            "1139": 1.11505,
-            "1140": 1.11661,
-            "1141": 1.11402,
-            "1142": 1.1249,
-            "1143": 1.11389,
-            "1144": 1.11409,
-            "1145": 1.12676,
-            "1146": 1.11018,
-            "1147": 1.12125,
-            "1148": 1.10921,
-            "1149": 1.11092,
-            "1150": 1.11943,
-            "1151": 1.11813,
-            "1152": 1.1167,
-            "1153": 1.11791,
-            "1154": 1.12869,
-            "1155": 1.13518,
-            "1156": 1.11815,
-            "1157": 1.119,
-            "1158": 1.12852,
-            "1159": 1.12165,
-            "1160": 1.14599,
-            "1161": 1.1293,
-            "1162": 1.12293,
-            "1163": 1.12571,
-            "1164": 1.11905,
-            "1165": 1.11581,
-            "1166": 1.11263,
-            "1167": 1.11353,
-            "1168": 1.12123,
-            "1169": 1.11346,
-            "1170": 1.11658,
-            "1171": 1.11649,
-            "1172": 1.12025,
-            "1173": 1.12534,
-            "1174": 1.11968,
-            "1175": 1.11262,
-            "1176": 1.11349,
-            "1177": 1.12106,
-            "1178": 1.11213,
-            "1179": 1.10954,
-            "1180": 1.10875,
-            "1181": 1.12034,
-            "1182": 1.11482,
-            "1183": 1.11462,
-            "1184": 1.11384,
-            "1185": 1.11286,
-            "1186": 1.1177,
-            "1187": 1.13227,
-            "1188": 1.11466,
-            "1189": 1.11246,
-            "1190": 1.11164,
-            "1191": 1.12586,
-            "1192": 1.12681,
-            "1193": 1.11654,
-            "1194": 1.11442,
-            "1195": 1.13045,
-            "1196": 1.11151,
-            "1197": 1.11333,
-            "1198": 1.12378,
-            "1199": 1.11423,
-            "1200": 1.11295,
-            "1201": 1.11676,
-            "1202": 1.11363,
-            "1203": 1.1113,
-            "1204": 1.1202,
-            "1205": 1.12267,
-            "1206": 1.11346,
-            "1207": 1.11419,
-            "1208": 1.11594,
-            "1209": 1.12099,
-            "1210": 1.1153,
-            "1211": 1.11237,
-            "1212": 1.1181,
-            "1213": 1.11773,
-            "1214": 1.14257,
-            "1215": 1.12045,
-            "1216": 1.11126,
-            "1217": 1.12112,
-            "1218": 1.13026,
-            "1219": 1.13201,
-            "1220": 1.12253,
-            "1221": 1.13053,
-            "1222": 1.12599,
-            "1223": 1.12099,
-            "1224": 1.12177,
-            "1225": 1.11004,
-            "1226": 1.11219,
-            "1227": 1.1131,
-            "1228": 1.11282,
-            "1229": 1.11342,
-            "1230": 1.10754,
-            "1231": 1.11895,
-            "1232": 1.10859,
-            "1233": 1.10476,
-            "1234": 1.10951,
-            "1235": 1.11079,
-            "1236": 1.10578,
-            "1237": 1.11151,
-            "1238": 1.10595,
-            "1239": 1.10785,
-            "1240": 1.10168,
-            "1241": 1.11714,
-            "1242": 1.12125,
-            "1243": 1.1039,
-            "1244": 1.09957,
-            "1245": 1.10586,
-            "1246": 1.12094,
-            "1247": 1.10481,
-            "1248": 1.10891,
-            "1249": 1.10629,
-            "1250": 1.10599,
-            "1251": 1.11845,
-            "1252": 1.11404,
-            "1253": 1.1091,
-            "1254": 1.10704,
-            "1255": 1.11217,
-            "1256": 1.12043,
-            "1257": 1.10467,
-            "1258": 1.11193,
-            "1259": 1.10797,
-            "1260": 1.11266,
-            "1261": 1.10875,
-            "1262": 1.11761,
-            "1263": 1.11578,
-            "1264": 1.10878,
-            "1265": 1.11024,
-            "1266": 1.11064,
-            "1267": 1.1059,
-            "1268": 1.10546,
-            "1269": 1.12973,
-            "1270": 1.10574,
-            "1271": 1.11045,
-            "1272": 1.10717,
-            "1273": 1.10638,
-            "1274": 1.10789,
-            "1275": 1.10883,
-            "1276": 1.10577,
-            "1277": 1.10666,
-            "1278": 1.11556,
-            "1279": 1.12242,
-            "1280": 1.12283,
-            "1281": 1.11497,
-            "1282": 1.1104,
-            "1283": 1.12221,
-            "1284": 1.11656,
-            "1285": 1.13049,
-            "1286": 1.11851,
-            "1287": 1.10153,
-            "1288": 1.10756,
-            "1289": 1.10843,
-            "1290": 1.10635,
-            "1291": 1.10645,
-            "1292": 1.10773,
-            "1293": 1.11936,
-            "1294": 1.10559,
-            "1295": 1.10563,
-            "1296": 1.12369,
-            "1297": 1.10969,
-            "1298": 1.10508,
-            "1299": 1.10513,
-            "1300": 1.10473,
-            "1301": 1.10171,
-            "1302": 1.10278,
-            "1303": 1.10678,
-            "1304": 1.10534,
-            "1305": 1.10375,
-            "1306": 1.12775,
-            "1307": 1.10406,
-            "1308": 1.10526,
-            "1309": 1.10237,
-            "1310": 1.10677,
-            "1311": 1.10872,
-            "1312": 1.1141,
-            "1313": 1.10824,
-            "1314": 1.10607,
-            "1315": 1.10786,
-            "1316": 1.12265,
-            "1317": 1.1217,
-            "1318": 1.11848,
-            "1319": 1.1184,
-            "1320": 1.12693,
-            "1321": 1.12375,
-            "1322": 1.11789,
-            "1323": 1.12367,
-            "1324": 1.12821,
-            "1325": 1.12508,
-            "1326": 1.11575,
-            "1327": 1.11995,
-            "1328": 1.12055,
-            "1329": 1.1206,
-            "1330": 1.12737,
-            "1331": 1.11843,
-            "1332": 1.12029,
-            "1333": 1.11939,
-            "1334": 1.12797,
-            "1335": 1.12774,
-            "1336": 1.12624,
-            "1337": 1.1276,
-            "1338": 1.1301,
-            "1339": 1.14557,
-            "1340": 1.14624,
-            "1341": 1.12473,
-            "1342": 1.13076,
-            "1343": 1.13679,
-            "1344": 1.13154,
-            "1345": 1.12083,
-            "1346": 1.12781,
-            "1347": 1.12555,
-            "1348": 1.12562,
-            "1349": 1.12729,
-            "1350": 1.11788,
-            "1351": 1.11602,
-            "1352": 1.10299,
-            "1353": 1.11048,
-            "1354": 1.10882,
-            "1355": 1.10735,
-            "1356": 1.1062,
-            "1357": 1.10938,
-            "1358": 1.10523,
-            "1359": 1.10725,
-            "1360": 1.11073,
-            "1361": 1.10531,
-            "1362": 1.10659,
-            "1363": 1.10543,
-            "1364": 1.10415,
-            "1365": 1.10181,
-            "1366": 1.16117,
-            "1367": 1.10573,
-            "1368": 1.1018,
-            "1369": 1.101,
-            "1370": 1.11013,
-            "1371": 1.11188,
-            "1372": 1.10471,
-            "1373": 1.10318,
-            "1374": 1.13941,
-            "1375": 1.10188,
-            "1376": 1.10465,
-            "1377": 1.09923,
-            "1378": 1.12276,
-            "1379": 1.10312,
-            "1380": 1.10843,
-            "1381": 1.10297,
-            "1382": 1.11079,
-            "1383": 1.10274,
-            "1384": 1.10318,
-            "1385": 1.10332,
-            "1386": 1.10447,
-            "1387": 1.10282,
-            "1388": 1.10764,
-            "1389": 1.10662,
-            "1390": 1.10421,
-            "1391": 1.10469,
-            "1392": 1.10586,
-            "1393": 1.41634,
-            "1394": 1.13309,
-            "1395": 1.40129,
-            "1396": 1.10954,
-            "1397": 1.10542,
-            "1398": 1.10754,
-            "1399": 1.10352,
-            "1400": 1.10786,
-            "1401": 1.1344,
-            "1402": 1.10716,
-            "1403": 1.12272,
-            "1404": 1.11693,
-            "1405": 1.14259,
-            "1406": 1.11621,
-            "1407": 1.11338,
-            "1408": 1.11569,
-            "1409": 1.10532,
-            "1410": 1.10329,
-            "1411": 1.10301,
-            "1412": 1.10001,
-            "1413": 1.113,
-            "1414": 1.10349,
-            "1415": 1.10506,
-            "1416": 1.10497,
-            "1417": 1.10413,
-            "1418": 1.1011,
-            "1419": 1.11251,
-            "1420": 1.12757,
-            "1421": 1.1165,
-            "1422": 1.11304,
-            "1423": 1.11075,
-            "1424": 1.1116,
-            "1425": 1.11897,
-            "1426": 1.11039,
-            "1427": 1.11621,
-            "1428": 1.10985,
-            "1429": 1.11175,
-            "1430": 1.11197,
-            "1431": 1.12304,
-            "1432": 1.13767,
-            "1433": 1.11696,
-            "1434": 1.11954,
-            "1435": 1.12168,
-            "1436": 1.12214,
-            "1437": 1.12137,
-            "1438": 1.12025,
-            "1439": 1.1187,
-            "1440": 1.1195,
-            "1441": 1.12145,
-            "1442": 1.12582,
-            "1443": 1.13616,
-            "1444": 1.11833,
-            "1445": 1.12576,
-            "1446": 1.11779,
-            "1447": 1.11774,
-            "1448": 1.11406,
-            "1449": 1.10991,
-            "1450": 1.1212,
-            "1451": 1.11254,
-            "1452": 1.11525,
-            "1453": 1.11608,
-            "1454": 1.1193,
-            "1455": 1.10949,
-            "1456": 1.11443,
-            "1457": 1.1121,
-            "1458": 1.11355,
-            "1459": 1.12878,
-            "1460": 1.11678,
-            "1461": 1.10589,
-            "1462": 1.11632,
-            "1463": 1.12169,
-            "1464": 1.13067,
-            "1465": 1.11638,
-            "1466": 1.11568,
-            "1467": 1.13174,
-            "1468": 1.11605,
-            "1469": 1.10912,
-            "1470": 1.12357,
-            "1471": 1.11192,
-            "1472": 1.11393,
-            "1473": 1.12542,
-            "1474": 1.15461,
-            "1475": 1.113,
-            "1476": 1.12504,
-            "1477": 1.16125,
-            "1478": 1.11925,
-            "1479": 1.11058,
-            "1480": 1.11613,
-            "1481": 1.11448,
-            "1482": 1.11369,
-            "1483": 1.11113,
-            "1484": 1.11449,
-            "1485": 1.11378,
-            "1486": 1.11458,
-            "1487": 1.14323,
-            "1488": 1.1119,
-            "1489": 1.11354,
-            "1490": 1.11018,
-            "1491": 1.11334,
-            "1492": 1.10824,
-            "1493": 1.11196,
-            "1494": 1.40268,
-            "1495": 1.12098,
-            "1496": 1.11368,
-            "1497": 1.10287,
-            "1498": 1.10223,
-            "1499": 1.1083,
-            "1500": 1.12089,
-            "1501": 1.11008,
-            "1502": 1.12247,
-            "1503": 1.13708,
-            "1504": 1.40645,
-            "1505": 1.12146,
-            "1506": 1.11128,
-            "1507": 1.12958,
-            "1508": 1.11413,
-            "1509": 1.11304,
-            "1510": 1.12257,
-            "1511": 1.11785,
-            "1512": 1.13657,
-            "1513": 1.11793,
-            "1514": 1.12483,
-            "1515": 1.11905,
-            "1516": 1.11144,
-            "1517": 1.11386,
-            "1518": 1.10343,
-            "1519": 1.10966,
-            "1520": 1.12522,
-            "1521": 1.11012,
-            "1522": 1.1096,
-            "1523": 1.11227,
-            "1524": 1.11295,
-            "1525": 1.10375,
-            "1526": 1.10839,
-            "1527": 1.11904,
-            "1528": 1.11558,
-            "1529": 1.44105,
-            "1530": 1.11797,
-            "1531": 1.11348,
-            "1532": 1.11565,
-            "1533": 1.10993,
-            "1534": 1.11189,
-            "1535": 1.10923,
-            "1536": 1.10764,
-            "1537": 1.10984,
-            "1538": 1.11013,
-            "1539": 1.10654,
-            "1540": 1.10841,
-            "1541": 1.11142,
-            "1542": 1.10719,
-            "1543": 1.1087,
-            "1544": 1.10874,
-            "1545": 1.10589,
-            "1546": 1.10352,
-            "1547": 1.10326,
-            "1548": 1.10345,
-            "1549": 1.11093,
-            "1550": 1.11405,
-            "1551": 1.10808,
-            "1552": 1.11372,
-            "1553": 1.116,
-            "1554": 1.13516,
-            "1555": 1.11439,
-            "1556": 1.14866,
-            "1557": 1.12526,
-            "1558": 1.11653,
-            "1559": 1.11683,
-            "1560": 1.11867,
-            "1561": 1.11597,
-            "1562": 1.11441,
-            "1563": 1.11412,
-            "1564": 1.11632,
-            "1565": 1.12153,
-            "1566": 1.11544,
-            "1567": 1.11279,
-            "1568": 1.13459,
-            "1569": 1.11016,
-            "1570": 1.11858,
-            "1571": 1.11746,
-            "1572": 1.11292,
-            "1573": 1.11361,
-            "1574": 1.14661,
-            "1575": 1.11972,
-            "1576": 1.11435,
-            "1577": 1.11965,
-            "1578": 1.11879,
-            "1579": 1.11963,
-            "1580": 1.11504,
-            "1581": 1.12781,
-            "1582": 1.11778,
-            "1583": 1.12352,
-            "1584": 1.1326,
-            "1585": 1.12227,
-            "1586": 1.11933,
-            "1587": 1.12365,
-            "1588": 1.11305,
-            "1589": 1.12214,
-            "1590": 1.11761,
-            "1591": 1.11593,
-            "1592": 1.11789,
-            "1593": 1.11612,
-            "1594": 1.11238,
-            "1595": 1.12886,
-            "1596": 1.1089,
-            "1597": 1.10897,
-            "1598": 1.11948,
-            "1599": 1.11288,
-            "1600": 1.11223,
-            "1601": 1.38048,
-            "1602": 1.11437,
-            "1603": 1.10932,
-            "1604": 1.11001,
-            "1605": 1.09979,
-            "1606": 1.10089,
-            "1607": 1.11313,
-            "1608": 1.1199,
-            "1609": 1.10314,
-            "1610": 1.11395,
-            "1611": 1.11517,
-            "1612": 1.12089,
-            "1613": 1.12152,
-            "1614": 1.11994,
-            "1615": 1.10528,
-            "1616": 1.10547,
-            "1617": 1.11372,
-            "1618": 1.11094,
-            "1619": 1.11021,
-            "1620": 1.10369,
-            "1621": 1.10744,
-            "1622": 1.12743,
-            "1623": 1.10478,
-            "1624": 1.10577,
-            "1625": 1.10355,
-            "1626": 1.11076,
-            "1627": 1.12261,
-            "1628": 1.10128,
-            "1629": 1.106,
-            "1630": 1.10623,
-            "1631": 1.10764,
-            "1632": 1.10867,
-            "1633": 1.10687,
-            "1634": 1.10481,
-            "1635": 1.11679,
-            "1636": 1.10369,
-            "1637": 1.11026,
-            "1638": 1.10846,
-            "1639": 1.104,
-            "1640": 1.11273,
-            "1641": 1.11581,
-            "1642": 1.1042,
-            "1643": 1.10379,
-            "1644": 1.10885,
-            "1645": 1.10435,
-            "1646": 1.10524,
-            "1647": 1.10651,
-            "1648": 1.10574,
-            "1649": 1.11458,
-            "1650": 1.12338,
-            "1651": 1.10475,
-            "1652": 1.09997,
-            "1653": 1.10576,
-            "1654": 1.10508,
-            "1655": 1.11198,
-            "1656": 1.10565,
-            "1657": 1.11101,
-            "1658": 1.10616,
-            "1659": 1.10436,
-            "1660": 1.1051,
-            "1661": 1.10501,
-            "1662": 1.11262,
-            "1663": 1.10862,
-            "1664": 1.10897,
-            "1665": 1.11063,
-            "1666": 1.10597,
-            "1667": 1.10784,
-            "1668": 1.11731,
-            "1669": 1.10616,
-            "1670": 1.12058,
-            "1671": 1.10877,
-            "1672": 1.10636,
-            "1673": 1.11701,
-            "1674": 1.11423,
-            "1675": 1.10943,
-            "1676": 1.10872,
-            "1677": 1.14089,
-            "1678": 1.11626,
-            "1679": 1.1161,
-            "1680": 1.12156,
-            "1681": 1.11775,
-            "1682": 1.10443,
-            "1683": 1.10737,
-            "1684": 1.10785,
-            "1685": 1.10656,
-            "1686": 1.1095,
-            "1687": 1.10755,
-            "1688": 1.10495,
-            "1689": 1.11951,
-            "1690": 1.10699,
-            "1691": 1.11216,
-            "1692": 1.10995,
-            "1693": 1.11407,
-            "1694": 1.1032,
-            "1695": 1.11141,
-            "1696": 1.10845,
-            "1697": 1.41976,
-            "1698": 1.10848,
-            "1699": 1.1593,
-            "1700": 1.11649,
-            "1701": 1.10571,
-            "1702": 1.11153,
-            "1703": 1.11515,
-            "1704": 1.14179,
-            "1705": 1.1155,
-            "1706": 1.4418,
-            "1707": 1.15778,
-            "1708": 1.10218,
-            "1709": 1.10553,
-            "1710": 1.12037,
-            "1711": 1.11371,
-            "1712": 1.13252,
-            "1713": 1.13971,
-            "1714": 1.1365,
-            "1715": 1.13333,
-            "1716": 1.13463,
-            "1717": 1.14037,
-            "1718": 1.18484,
-            "1719": 1.11963,
-            "1720": 1.13783,
-            "1721": 1.12132,
-            "1722": 1.09651,
-            "1723": 1.09706,
-            "1724": 1.09756,
-            "1725": 1.0985,
-            "1726": 1.09645,
-            "1727": 1.09802,
-            "1728": 1.0949,
-            "1729": 1.09676,
-            "1730": 1.09499,
-            "1731": 1.11914,
-            "1732": 1.09582,
-            "1733": 1.1218,
-            "1734": 1.12356,
-            "1735": 1.14459,
-            "1736": 1.13823,
-            "1737": 1.14751,
-            "1738": 1.13592,
-            "1739": 1.11798,
-            "1740": 1.11501,
-            "1741": 1.11863,
-            "1742": 1.13297,
-            "1743": 1.13497,
-            "1744": 1.13705,
-            "1745": 1.14143,
-            "1746": 1.14353,
-            "1747": 1.14042,
-            "1748": 1.12515,
-            "1749": 1.12663,
-            "1750": 1.10713,
-            "1751": 1.10637,
-            "1752": 1.10651,
-            "1753": 1.11054,
-            "1754": 1.10147,
-            "1755": 1.10785,
-            "1756": 1.13473,
-            "1757": 1.13879,
-            "1758": 1.14707,
-            "1759": 1.10769,
-            "1760": 1.11039,
-            "1761": 1.11477,
-            "1762": 1.10953,
-            "1763": 1.11552,
-            "1764": 1.11916,
-            "1765": 1.1124,
-            "1766": 1.11084,
-            "1767": 1.10169,
-            "1768": 1.11143,
-            "1769": 1.11411,
-            "1770": 1.10393,
-            "1771": 1.10882,
-            "1772": 1.10337,
-            "1773": 1.10338,
-            "1774": 1.10202,
-            "1775": 1.11579,
-            "1776": 1.13617,
-            "1777": 1.13189,
-            "1778": 1.10851,
-            "1779": 1.10806,
-            "1780": 1.10625,
-            "1781": 1.10487,
-            "1782": 1.10703,
-            "1783": 1.13853,
-            "1784": 1.13726,
-            "1785": 1.1071,
-            "1786": 1.1251,
-            "1787": 1.10763,
-            "1788": 1.11187,
-            "1789": 1.10509,
-            "1790": 1.10979,
-            "1791": 1.13632,
-            "1792": 1.13765,
-            "1793": 1.11829,
-            "1794": 1.13113,
-            "1795": 1.14526,
-            "1796": 1.15941,
-            "1797": 1.12778,
-            "1798": 1.12368,
-            "1799": 1.10954,
-            "1800": 1.10765,
-            "1801": 1.15478,
-            "1802": 1.12793,
-            "1803": 1.11102,
-            "1804": 1.13696,
-            "1805": 1.1444,
-            "1806": 1.14333,
-            "1807": 1.12926,
-            "1808": 1.10292,
-            "1809": 1.10378,
-            "1810": 1.11274,
-            "1811": 1.13363,
-            "1812": 1.10576,
-            "1813": 1.12964,
-            "1814": 1.10628,
-            "1815": 1.1074,
-            "1816": 1.10498,
-            "1817": 1.11269,
-            "1818": 1.10966,
-            "1819": 1.1044,
-            "1820": 1.10998,
-            "1821": 1.11705,
-            "1822": 1.10449,
-            "1823": 1.12889,
-            "1824": 1.13857,
-            "1825": 1.14187,
-            "1826": 1.13822,
-            "1827": 1.13902,
-            "1828": 1.1413,
-            "1829": 1.14667,
-            "1830": 1.13763,
-            "1831": 1.14054,
-            "1832": 1.13712,
-            "1833": 1.12949,
-            "1834": 1.11232,
-            "1835": 1.11108,
-            "1836": 1.10715,
-            "1837": 1.10945,
-            "1838": 1.11129,
-            "1839": 1.1088,
-            "1840": 1.11752,
-            "1841": 1.11232,
-            "1842": 1.10148,
-            "1843": 1.10515,
-            "1844": 1.10165,
-            "1845": 1.10562,
-            "1846": 1.10212,
-            "1847": 1.10027,
-            "1848": 1.10635,
-            "1849": 1.11169,
-            "1850": 1.11411,
-            "1851": 1.10318,
-            "1852": 1.10745,
-            "1853": 1.10284,
-            "1854": 1.10228,
-            "1855": 1.10935,
-            "1856": 1.12026,
-            "1857": 1.11386,
-            "1858": 1.11848,
-            "1859": 1.11959,
-            "1860": 1.12773,
-            "1861": 1.14196,
-            "1862": 1.12419,
-            "1863": 1.12144,
-            "1864": 1.13196,
-            "1865": 1.12238,
-            "1866": 1.12095,
-            "1867": 1.12747,
-            "1868": 1.10918,
-            "1869": 1.10503,
-            "1870": 1.10702,
-            "1871": 1.11053,
-            "1872": 1.10841,
-            "1873": 1.10791,
-            "1874": 1.10653,
-            "1875": 1.11342,
-            "1876": 1.10826,
-            "1877": 1.12751,
-            "1878": 1.12001,
-            "1879": 1.11416,
-            "1880": 1.11408,
-            "1881": 1.11212,
-            "1882": 1.11417,
-            "1883": 1.1099,
-            "1884": 1.11153,
-            "1885": 1.11373,
-            "1886": 1.10761,
-            "1887": 1.10939,
-            "1888": 1.11253,
-            "1889": 1.11207,
-            "1890": 1.10772,
-            "1891": 1.10694,
-            "1892": 1.10904,
-            "1893": 1.10838,
-            "1894": 1.12253,
-            "1895": 1.1357,
-            "1896": 1.11013,
-            "1897": 1.10701,
-            "1898": 1.11031,
-            "1899": 1.1067,
-            "1900": 1.10928,
-            "1901": 1.10823,
-            "1902": 1.10728,
-            "1903": 1.11249,
-            "1904": 1.11924,
-            "1905": 1.11419,
-            "1906": 1.12967,
-            "1907": 1.10084,
-            "1908": 1.10414,
-            "1909": 1.10937,
-            "1910": 1.10963,
-            "1911": 1.10384,
-            "1912": 1.10517,
-            "1913": 1.10218,
-            "1914": 1.10323,
-            "1915": 1.10331,
-            "1916": 1.099,
-            "1917": 1.10127,
-            "1918": 1.10547,
-            "1919": 1.10731,
-            "1920": 1.1073,
-            "1921": 1.10395,
-            "1922": 1.13307,
-            "1923": 1.10307,
-            "1924": 1.11397,
-            "1925": 1.12331,
-            "1926": 1.10955,
-            "1927": 1.10007,
-            "1928": 1.11217,
-            "1929": 1.10924,
-            "1930": 1.10912,
-            "1931": 1.11904,
-            "1932": 1.11767,
-            "1933": 1.11872,
-            "1934": 1.11036,
-            "1935": 1.10089,
-            "1936": 1.3947,
-            "1937": 1.10286,
-            "1938": 1.10136,
-            "1939": 1.10786,
-            "1940": 1.13455,
-            "1941": 1.11029,
-            "1942": 1.10824,
-            "1943": 1.10655,
-            "1944": 1.10539,
-            "1945": 1.10912,
-            "1946": 1.10804,
-            "1947": 1.10881,
-            "1948": 1.36477,
-            "1949": 1.14028,
-            "1950": 1.10952,
-            "1951": 1.1117,
-            "1952": 1.11192,
-            "1953": 1.10924,
-            "1954": 1.11644,
-            "1955": 1.11297,
-            "1956": 1.11181,
-            "1957": 1.11337,
-            "1958": 1.12345,
-            "1959": 1.12124,
-            "1960": 1.11197,
-            "1961": 1.11214,
-            "1962": 1.10783,
-            "1963": 1.11588,
-            "1964": 1.11553,
-            "1965": 1.11311,
-            "1966": 1.11204,
-            "1967": 1.40765,
-            "1968": 1.11045,
-            "1969": 1.10471,
-            "1970": 1.10346,
-            "1971": 1.10471,
-            "1972": 1.39141,
-            "1973": 1.10227,
-            "1974": 1.10581,
-            "1975": 1.10766,
-            "1976": 1.12617,
-            "1977": 1.11129,
-            "1978": 1.1156,
-            "1979": 1.115,
-            "1980": 1.11353,
-            "1981": 1.12022,
-            "1982": 1.11157,
-            "1983": 1.12212,
-            "1984": 1.13075,
-            "1985": 1.12995,
-            "1986": 1.13493,
-            "1987": 1.11121,
-            "1988": 1.10968,
-            "1989": 1.109,
-            "1990": 1.10616,
-            "1991": 1.10409,
-            "1992": 1.11005,
-            "1993": 1.10368,
-            "1994": 1.10381,
-            "1995": 1.10648,
-            "1996": 1.10386,
-            "1997": 1.10255,
-            "1998": 1.10384,
-            "1999": 1.10506,
-            "2000": 1.10903
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp4_cp2_native_fp8_tp_sp_cp_tp_overlap/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp4_cp2_native_fp8_tp_sp_cp_tp_overlap/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index b20fb2aa534..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp4_cp2_native_fp8_tp_sp_cp_tp_overlap/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,10037 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 2000,
-        "step_interval": 1,
-        "values": {
-            "1": 10.85229,
-            "2": 10.85951,
-            "3": 10.85469,
-            "4": 10.86843,
-            "5": 10.85304,
-            "6": 10.85362,
-            "7": 10.8602,
-            "8": 10.85298,
-            "9": 10.84874,
-            "10": 10.84674,
-            "11": 10.83863,
-            "12": 10.83549,
-            "13": 10.82524,
-            "14": 10.84078,
-            "15": 10.78613,
-            "16": 10.79372,
-            "17": 10.76553,
-            "18": 10.78902,
-            "19": 10.73057,
-            "20": 10.69489,
-            "21": 10.64595,
-            "22": 10.64791,
-            "23": 10.65524,
-            "24": 10.55349,
-            "25": 10.56424,
-            "26": 10.63262,
-            "27": 10.47084,
-            "28": 10.471,
-            "29": 10.36495,
-            "30": 10.27406,
-            "31": 10.43126,
-            "32": 10.35361,
-            "33": 10.22439,
-            "34": 10.17135,
-            "35": 10.23744,
-            "36": 10.15766,
-            "37": 10.10704,
-            "38": 10.03631,
-            "39": 10.04895,
-            "40": 10.06978,
-            "41": 9.95276,
-            "42": 9.95577,
-            "43": 9.87217,
-            "44": 9.99154,
-            "45": 10.00766,
-            "46": 9.84803,
-            "47": 10.00018,
-            "48": 9.81816,
-            "49": 9.94941,
-            "50": 9.94449,
-            "51": 9.5964,
-            "52": 9.79483,
-            "53": 9.63207,
-            "54": 9.8854,
-            "55": 9.74063,
-            "56": 9.85006,
-            "57": 9.86123,
-            "58": 9.87737,
-            "59": 9.54716,
-            "60": 9.64756,
-            "61": 9.87994,
-            "62": 9.76465,
-            "63": 9.68066,
-            "64": 9.82801,
-            "65": 9.59733,
-            "66": 9.62928,
-            "67": 9.74212,
-            "68": 9.60593,
-            "69": 9.29694,
-            "70": 9.42495,
-            "71": 9.79013,
-            "72": 9.71358,
-            "73": 9.61909,
-            "74": 9.45334,
-            "75": 9.24289,
-            "76": 9.50821,
-            "77": 9.57857,
-            "78": 9.56035,
-            "79": 9.31048,
-            "80": 9.36161,
-            "81": 9.46136,
-            "82": 9.55628,
-            "83": 9.53353,
-            "84": 9.35526,
-            "85": 9.40111,
-            "86": 9.65137,
-            "87": 9.23621,
-            "88": 9.48942,
-            "89": 9.22457,
-            "90": 9.41443,
-            "91": 9.39014,
-            "92": 9.3793,
-            "93": 9.36366,
-            "94": 9.51552,
-            "95": 9.42012,
-            "96": 9.33698,
-            "97": 9.20729,
-            "98": 9.49265,
-            "99": 9.29333,
-            "100": 9.35883,
-            "101": 9.24766,
-            "102": 9.24259,
-            "103": 9.07796,
-            "104": 9.16832,
-            "105": 9.37671,
-            "106": 9.15179,
-            "107": 9.17832,
-            "108": 9.31483,
-            "109": 9.28984,
-            "110": 9.36705,
-            "111": 9.17605,
-            "112": 9.23281,
-            "113": 9.35413,
-            "114": 9.35742,
-            "115": 9.32337,
-            "116": 9.00364,
-            "117": 9.06445,
-            "118": 9.06523,
-            "119": 9.22504,
-            "120": 9.08324,
-            "121": 9.19428,
-            "122": 9.14006,
-            "123": 9.25894,
-            "124": 9.45689,
-            "125": 9.21857,
-            "126": 9.0614,
-            "127": 9.01413,
-            "128": 9.22025,
-            "129": 8.98394,
-            "130": 9.14098,
-            "131": 9.15643,
-            "132": 9.03479,
-            "133": 8.86261,
-            "134": 9.18468,
-            "135": 8.88922,
-            "136": 9.1645,
-            "137": 9.15944,
-            "138": 9.23186,
-            "139": 9.08834,
-            "140": 8.87267,
-            "141": 9.29752,
-            "142": 9.19877,
-            "143": 9.12079,
-            "144": 9.24324,
-            "145": 9.10527,
-            "146": 8.98338,
-            "147": 8.9881,
-            "148": 9.1361,
-            "149": 9.06877,
-            "150": 9.01122,
-            "151": 8.93192,
-            "152": 8.87852,
-            "153": 9.06711,
-            "154": 9.1802,
-            "155": 9.13786,
-            "156": 9.05095,
-            "157": 9.15163,
-            "158": 9.05301,
-            "159": 9.03638,
-            "160": 8.89244,
-            "161": 9.04764,
-            "162": 8.89639,
-            "163": 8.84472,
-            "164": 8.97496,
-            "165": 8.93105,
-            "166": 8.65677,
-            "167": 8.83411,
-            "168": 8.8203,
-            "169": 8.65961,
-            "170": 9.04726,
-            "171": 8.72167,
-            "172": 8.82105,
-            "173": 8.91105,
-            "174": 8.85007,
-            "175": 8.70985,
-            "176": 8.7611,
-            "177": 8.76567,
-            "178": 8.72394,
-            "179": 8.64132,
-            "180": 8.74357,
-            "181": 8.6941,
-            "182": 8.72315,
-            "183": 9.08667,
-            "184": 8.60959,
-            "185": 8.88334,
-            "186": 8.74346,
-            "187": 8.57546,
-            "188": 8.6841,
-            "189": 8.86656,
-            "190": 8.53754,
-            "191": 8.66593,
-            "192": 8.61152,
-            "193": 8.5763,
-            "194": 8.75183,
-            "195": 8.5938,
-            "196": 8.7761,
-            "197": 8.744,
-            "198": 8.63042,
-            "199": 8.77202,
-            "200": 8.73627,
-            "201": 8.67068,
-            "202": 8.55099,
-            "203": 8.54134,
-            "204": 8.71213,
-            "205": 8.22486,
-            "206": 8.85986,
-            "207": 8.67928,
-            "208": 8.70826,
-            "209": 8.75243,
-            "210": 8.58226,
-            "211": 8.84167,
-            "212": 8.4913,
-            "213": 8.57316,
-            "214": 8.51316,
-            "215": 8.56549,
-            "216": 8.50617,
-            "217": 8.53369,
-            "218": 8.53635,
-            "219": 8.64298,
-            "220": 8.54526,
-            "221": 8.39761,
-            "222": 8.50474,
-            "223": 8.44078,
-            "224": 8.52901,
-            "225": 8.5708,
-            "226": 8.44247,
-            "227": 8.67823,
-            "228": 8.3859,
-            "229": 8.4537,
-            "230": 8.4985,
-            "231": 8.50257,
-            "232": 8.49898,
-            "233": 8.49438,
-            "234": 8.64018,
-            "235": 8.5617,
-            "236": 8.39791,
-            "237": 8.49075,
-            "238": 8.30637,
-            "239": 8.56099,
-            "240": 8.67125,
-            "241": 8.447,
-            "242": 8.47179,
-            "243": 8.51685,
-            "244": 8.36975,
-            "245": 8.59641,
-            "246": 8.59557,
-            "247": 8.43962,
-            "248": 8.50986,
-            "249": 8.52277,
-            "250": 8.42301,
-            "251": 8.3783,
-            "252": 8.54698,
-            "253": 8.3164,
-            "254": 8.35246,
-            "255": 8.29609,
-            "256": 8.20858,
-            "257": 8.39462,
-            "258": 8.45148,
-            "259": 8.23213,
-            "260": 8.24039,
-            "261": 8.23733,
-            "262": 8.34866,
-            "263": 8.30632,
-            "264": 8.1907,
-            "265": 8.33202,
-            "266": 8.2336,
-            "267": 7.9013,
-            "268": 8.37861,
-            "269": 8.40384,
-            "270": 8.26475,
-            "271": 8.27885,
-            "272": 8.31844,
-            "273": 8.13253,
-            "274": 8.09818,
-            "275": 8.00901,
-            "276": 7.92522,
-            "277": 8.23699,
-            "278": 8.04701,
-            "279": 7.96356,
-            "280": 7.75515,
-            "281": 8.10016,
-            "282": 8.14722,
-            "283": 8.15666,
-            "284": 8.10022,
-            "285": 8.06894,
-            "286": 7.90037,
-            "287": 7.99127,
-            "288": 8.24359,
-            "289": 8.17176,
-            "290": 8.12684,
-            "291": 8.25357,
-            "292": 8.0756,
-            "293": 8.11914,
-            "294": 7.97501,
-            "295": 7.96533,
-            "296": 8.23576,
-            "297": 7.79081,
-            "298": 8.04236,
-            "299": 7.93831,
-            "300": 7.8498,
-            "301": 8.00964,
-            "302": 7.94515,
-            "303": 7.99053,
-            "304": 7.95899,
-            "305": 7.9946,
-            "306": 7.9738,
-            "307": 7.98707,
-            "308": 7.9953,
-            "309": 8.0059,
-            "310": 7.97168,
-            "311": 7.92562,
-            "312": 7.88182,
-            "313": 7.82955,
-            "314": 7.82035,
-            "315": 7.82475,
-            "316": 7.74495,
-            "317": 7.92567,
-            "318": 7.97631,
-            "319": 7.82443,
-            "320": 7.563,
-            "321": 7.74534,
-            "322": 7.82917,
-            "323": 7.76703,
-            "324": 7.90668,
-            "325": 7.79387,
-            "326": 7.64901,
-            "327": 7.86137,
-            "328": 7.7832,
-            "329": 7.87669,
-            "330": 7.74815,
-            "331": 7.52005,
-            "332": 7.81037,
-            "333": 7.8379,
-            "334": 7.67759,
-            "335": 7.69435,
-            "336": 7.90998,
-            "337": 7.64618,
-            "338": 7.89178,
-            "339": 7.7192,
-            "340": 7.75318,
-            "341": 7.70375,
-            "342": 7.81451,
-            "343": 7.61028,
-            "344": 7.58433,
-            "345": 7.60474,
-            "346": 7.45825,
-            "347": 7.55021,
-            "348": 7.67669,
-            "349": 7.57925,
-            "350": 7.65118,
-            "351": 7.74172,
-            "352": 7.69877,
-            "353": 7.4955,
-            "354": 7.73645,
-            "355": 7.75823,
-            "356": 7.76871,
-            "357": 7.8083,
-            "358": 7.59223,
-            "359": 7.54129,
-            "360": 7.62161,
-            "361": 7.53913,
-            "362": 7.75707,
-            "363": 7.58184,
-            "364": 7.57393,
-            "365": 7.61381,
-            "366": 7.30007,
-            "367": 7.55433,
-            "368": 7.4381,
-            "369": 7.34072,
-            "370": 7.45786,
-            "371": 7.45479,
-            "372": 7.64528,
-            "373": 7.51803,
-            "374": 7.43579,
-            "375": 7.52279,
-            "376": 7.33856,
-            "377": 7.23275,
-            "378": 7.53208,
-            "379": 7.48549,
-            "380": 7.37893,
-            "381": 7.46259,
-            "382": 7.28593,
-            "383": 7.26774,
-            "384": 7.4035,
-            "385": 7.38617,
-            "386": 7.2246,
-            "387": 7.41197,
-            "388": 7.27354,
-            "389": 7.42884,
-            "390": 7.23295,
-            "391": 7.63854,
-            "392": 7.32743,
-            "393": 7.41119,
-            "394": 7.46811,
-            "395": 7.43164,
-            "396": 7.27624,
-            "397": 7.22237,
-            "398": 7.41314,
-            "399": 7.14965,
-            "400": 7.28882,
-            "401": 7.34645,
-            "402": 7.38389,
-            "403": 7.27445,
-            "404": 7.29549,
-            "405": 7.25441,
-            "406": 7.20955,
-            "407": 7.35305,
-            "408": 7.17476,
-            "409": 7.15738,
-            "410": 7.30843,
-            "411": 7.21046,
-            "412": 7.19143,
-            "413": 7.22421,
-            "414": 6.90584,
-            "415": 7.32329,
-            "416": 7.41955,
-            "417": 7.01436,
-            "418": 7.26656,
-            "419": 7.03251,
-            "420": 7.40294,
-            "421": 7.17304,
-            "422": 7.22884,
-            "423": 7.08611,
-            "424": 7.2354,
-            "425": 7.3087,
-            "426": 7.28003,
-            "427": 7.12262,
-            "428": 7.08425,
-            "429": 6.87125,
-            "430": 7.19779,
-            "431": 6.99763,
-            "432": 7.22298,
-            "433": 6.96906,
-            "434": 6.95232,
-            "435": 7.01097,
-            "436": 7.00141,
-            "437": 6.9848,
-            "438": 6.99447,
-            "439": 6.93128,
-            "440": 7.05472,
-            "441": 7.03406,
-            "442": 7.09324,
-            "443": 7.0854,
-            "444": 6.69941,
-            "445": 6.98741,
-            "446": 7.13474,
-            "447": 7.11726,
-            "448": 6.97509,
-            "449": 7.04203,
-            "450": 7.00855,
-            "451": 6.82317,
-            "452": 6.90281,
-            "453": 7.00796,
-            "454": 6.96028,
-            "455": 7.02393,
-            "456": 6.98781,
-            "457": 6.96156,
-            "458": 6.89735,
-            "459": 6.68323,
-            "460": 7.05439,
-            "461": 7.088,
-            "462": 6.86315,
-            "463": 7.04576,
-            "464": 6.64275,
-            "465": 7.02272,
-            "466": 6.99895,
-            "467": 6.99097,
-            "468": 6.94728,
-            "469": 6.82004,
-            "470": 7.0355,
-            "471": 6.87321,
-            "472": 6.95214,
-            "473": 6.81396,
-            "474": 6.96547,
-            "475": 7.1584,
-            "476": 6.75391,
-            "477": 6.88861,
-            "478": 6.89832,
-            "479": 6.69636,
-            "480": 7.01803,
-            "481": 6.98503,
-            "482": 6.72248,
-            "483": 6.77484,
-            "484": 6.74297,
-            "485": 6.92045,
-            "486": 7.05544,
-            "487": 6.62222,
-            "488": 6.87375,
-            "489": 6.76024,
-            "490": 6.81377,
-            "491": 6.69837,
-            "492": 6.68149,
-            "493": 6.75646,
-            "494": 6.66282,
-            "495": 6.62263,
-            "496": 6.57706,
-            "497": 6.8292,
-            "498": 6.63548,
-            "499": 6.84385,
-            "500": 6.64283,
-            "501": 6.71966,
-            "502": 6.82988,
-            "503": 6.69833,
-            "504": 6.60751,
-            "505": 6.6112,
-            "506": 6.73586,
-            "507": 6.85391,
-            "508": 6.84629,
-            "509": 6.6384,
-            "510": 6.81034,
-            "511": 6.72977,
-            "512": 6.72804,
-            "513": 6.64821,
-            "514": 6.70064,
-            "515": 6.43824,
-            "516": 6.73421,
-            "517": 6.69542,
-            "518": 6.52993,
-            "519": 6.62474,
-            "520": 6.84935,
-            "521": 6.65329,
-            "522": 6.6979,
-            "523": 6.73262,
-            "524": 6.72634,
-            "525": 6.6655,
-            "526": 6.40663,
-            "527": 6.79088,
-            "528": 6.65206,
-            "529": 6.62295,
-            "530": 6.61639,
-            "531": 6.63503,
-            "532": 6.62382,
-            "533": 6.75435,
-            "534": 6.60296,
-            "535": 6.74138,
-            "536": 6.61812,
-            "537": 6.63086,
-            "538": 6.52418,
-            "539": 6.54299,
-            "540": 6.57593,
-            "541": 6.44382,
-            "542": 6.66189,
-            "543": 6.67325,
-            "544": 6.66927,
-            "545": 6.80511,
-            "546": 6.6246,
-            "547": 6.40979,
-            "548": 6.71663,
-            "549": 6.68986,
-            "550": 6.51987,
-            "551": 6.74092,
-            "552": 6.63227,
-            "553": 6.47534,
-            "554": 6.62778,
-            "555": 6.45222,
-            "556": 6.60749,
-            "557": 6.62431,
-            "558": 6.37676,
-            "559": 6.36118,
-            "560": 6.5756,
-            "561": 6.72381,
-            "562": 6.62768,
-            "563": 6.73287,
-            "564": 6.34176,
-            "565": 6.50706,
-            "566": 6.6902,
-            "567": 6.55838,
-            "568": 6.50084,
-            "569": 6.44415,
-            "570": 6.35619,
-            "571": 6.62259,
-            "572": 6.30471,
-            "573": 6.5721,
-            "574": 6.46259,
-            "575": 6.63541,
-            "576": 6.50701,
-            "577": 6.51656,
-            "578": 6.47574,
-            "579": 6.45618,
-            "580": 6.5583,
-            "581": 6.59714,
-            "582": 6.46959,
-            "583": 6.50413,
-            "584": 6.51087,
-            "585": 6.41424,
-            "586": 6.40258,
-            "587": 6.4501,
-            "588": 6.55622,
-            "589": 6.61456,
-            "590": 6.27891,
-            "591": 6.66415,
-            "592": 6.2545,
-            "593": 6.46521,
-            "594": 6.37467,
-            "595": 6.34819,
-            "596": 6.25003,
-            "597": 6.18054,
-            "598": 6.44279,
-            "599": 6.38602,
-            "600": 6.44414,
-            "601": 6.25051,
-            "602": 6.51804,
-            "603": 6.50819,
-            "604": 6.37382,
-            "605": 6.48026,
-            "606": 6.3013,
-            "607": 6.51999,
-            "608": 6.66049,
-            "609": 6.16075,
-            "610": 6.55805,
-            "611": 6.38737,
-            "612": 6.56702,
-            "613": 6.41056,
-            "614": 6.18827,
-            "615": 6.38286,
-            "616": 6.34421,
-            "617": 6.36273,
-            "618": 6.43626,
-            "619": 6.12502,
-            "620": 6.3943,
-            "621": 6.44427,
-            "622": 6.38402,
-            "623": 6.56769,
-            "624": 6.34417,
-            "625": 6.26521,
-            "626": 6.28634,
-            "627": 6.4276,
-            "628": 6.24043,
-            "629": 6.57298,
-            "630": 6.3523,
-            "631": 6.33431,
-            "632": 6.29554,
-            "633": 6.24213,
-            "634": 6.29476,
-            "635": 6.53142,
-            "636": 6.23005,
-            "637": 6.62121,
-            "638": 6.00686,
-            "639": 6.26506,
-            "640": 6.2796,
-            "641": 6.19435,
-            "642": 6.27007,
-            "643": 6.44413,
-            "644": 6.2445,
-            "645": 6.23092,
-            "646": 6.38932,
-            "647": 6.3209,
-            "648": 6.34188,
-            "649": 6.33297,
-            "650": 6.47025,
-            "651": 6.31782,
-            "652": 6.23993,
-            "653": 6.36817,
-            "654": 6.43495,
-            "655": 6.5135,
-            "656": 6.31371,
-            "657": 6.4163,
-            "658": 6.22993,
-            "659": 6.1432,
-            "660": 6.3808,
-            "661": 6.15725,
-            "662": 6.2613,
-            "663": 6.36151,
-            "664": 6.32043,
-            "665": 6.39194,
-            "666": 6.15182,
-            "667": 6.18562,
-            "668": 6.22741,
-            "669": 6.20408,
-            "670": 6.23602,
-            "671": 6.22904,
-            "672": 6.47492,
-            "673": 6.32812,
-            "674": 6.28343,
-            "675": 6.37362,
-            "676": 6.38018,
-            "677": 6.29511,
-            "678": 6.26804,
-            "679": 6.22803,
-            "680": 6.28357,
-            "681": 6.19077,
-            "682": 6.07906,
-            "683": 6.26403,
-            "684": 6.31575,
-            "685": 6.2874,
-            "686": 6.14011,
-            "687": 6.27685,
-            "688": 6.19835,
-            "689": 6.61075,
-            "690": 6.16856,
-            "691": 6.17286,
-            "692": 6.2649,
-            "693": 6.13689,
-            "694": 6.22553,
-            "695": 6.31786,
-            "696": 6.1061,
-            "697": 6.14556,
-            "698": 6.21959,
-            "699": 6.45326,
-            "700": 6.03519,
-            "701": 6.05302,
-            "702": 6.23703,
-            "703": 6.17441,
-            "704": 6.20621,
-            "705": 6.11844,
-            "706": 6.06567,
-            "707": 6.24456,
-            "708": 6.30245,
-            "709": 5.99551,
-            "710": 6.15229,
-            "711": 6.2479,
-            "712": 6.17146,
-            "713": 5.88608,
-            "714": 6.09975,
-            "715": 6.10497,
-            "716": 6.40586,
-            "717": 6.18363,
-            "718": 6.23537,
-            "719": 6.26862,
-            "720": 6.25804,
-            "721": 6.25605,
-            "722": 6.22472,
-            "723": 6.07187,
-            "724": 6.22017,
-            "725": 6.0314,
-            "726": 6.29244,
-            "727": 6.00644,
-            "728": 6.03616,
-            "729": 6.0826,
-            "730": 6.17412,
-            "731": 6.09163,
-            "732": 6.07888,
-            "733": 6.11348,
-            "734": 6.37763,
-            "735": 6.26791,
-            "736": 6.17709,
-            "737": 6.36077,
-            "738": 6.13247,
-            "739": 6.14636,
-            "740": 5.87836,
-            "741": 6.00499,
-            "742": 5.98594,
-            "743": 6.17515,
-            "744": 6.02317,
-            "745": 6.14565,
-            "746": 6.03122,
-            "747": 6.09452,
-            "748": 6.22864,
-            "749": 5.93308,
-            "750": 6.16381,
-            "751": 5.95292,
-            "752": 6.01389,
-            "753": 6.02392,
-            "754": 6.28379,
-            "755": 6.12598,
-            "756": 6.2443,
-            "757": 6.01404,
-            "758": 6.19738,
-            "759": 6.22084,
-            "760": 6.02115,
-            "761": 6.1856,
-            "762": 6.21798,
-            "763": 6.02971,
-            "764": 5.95856,
-            "765": 5.92315,
-            "766": 5.96127,
-            "767": 5.81063,
-            "768": 6.18012,
-            "769": 6.27004,
-            "770": 6.28915,
-            "771": 5.78425,
-            "772": 6.0231,
-            "773": 6.17908,
-            "774": 5.87868,
-            "775": 6.02111,
-            "776": 6.12258,
-            "777": 5.875,
-            "778": 6.04901,
-            "779": 5.86583,
-            "780": 6.13275,
-            "781": 5.8451,
-            "782": 6.03644,
-            "783": 5.94982,
-            "784": 5.91239,
-            "785": 6.08718,
-            "786": 6.0949,
-            "787": 5.6498,
-            "788": 5.99117,
-            "789": 6.20208,
-            "790": 6.25533,
-            "791": 5.78584,
-            "792": 5.98398,
-            "793": 6.17232,
-            "794": 6.02303,
-            "795": 5.99758,
-            "796": 6.15575,
-            "797": 6.04799,
-            "798": 6.04773,
-            "799": 6.10394,
-            "800": 6.00523,
-            "801": 6.13976,
-            "802": 5.97143,
-            "803": 6.14303,
-            "804": 5.99897,
-            "805": 5.8162,
-            "806": 6.08016,
-            "807": 6.03933,
-            "808": 5.91779,
-            "809": 5.76774,
-            "810": 6.00748,
-            "811": 5.92407,
-            "812": 5.89853,
-            "813": 5.95603,
-            "814": 6.0199,
-            "815": 5.80113,
-            "816": 6.10732,
-            "817": 5.92704,
-            "818": 6.05349,
-            "819": 5.99954,
-            "820": 5.71925,
-            "821": 5.93871,
-            "822": 6.18742,
-            "823": 5.82051,
-            "824": 5.97479,
-            "825": 6.17898,
-            "826": 6.18992,
-            "827": 6.04811,
-            "828": 6.0618,
-            "829": 5.8808,
-            "830": 5.9338,
-            "831": 5.89066,
-            "832": 5.95946,
-            "833": 6.05775,
-            "834": 5.98694,
-            "835": 5.99225,
-            "836": 5.78808,
-            "837": 6.1001,
-            "838": 5.85774,
-            "839": 5.82603,
-            "840": 6.17451,
-            "841": 5.77389,
-            "842": 5.88244,
-            "843": 5.93827,
-            "844": 6.0037,
-            "845": 6.08214,
-            "846": 5.68388,
-            "847": 5.75348,
-            "848": 5.96075,
-            "849": 6.0909,
-            "850": 5.83839,
-            "851": 6.01221,
-            "852": 5.74277,
-            "853": 5.9819,
-            "854": 6.00994,
-            "855": 5.81104,
-            "856": 5.99027,
-            "857": 5.99462,
-            "858": 6.04349,
-            "859": 5.94378,
-            "860": 6.08776,
-            "861": 6.05806,
-            "862": 5.99259,
-            "863": 5.83184,
-            "864": 5.83727,
-            "865": 5.93014,
-            "866": 5.88373,
-            "867": 5.87071,
-            "868": 6.0603,
-            "869": 6.08011,
-            "870": 5.96321,
-            "871": 6.03762,
-            "872": 5.89053,
-            "873": 5.83933,
-            "874": 6.02181,
-            "875": 5.90658,
-            "876": 5.96303,
-            "877": 5.92074,
-            "878": 6.09702,
-            "879": 5.76213,
-            "880": 6.0073,
-            "881": 5.98795,
-            "882": 5.90217,
-            "883": 5.67039,
-            "884": 5.95748,
-            "885": 5.74054,
-            "886": 5.98445,
-            "887": 5.90648,
-            "888": 5.8314,
-            "889": 6.00733,
-            "890": 6.01123,
-            "891": 5.94286,
-            "892": 5.70277,
-            "893": 6.08459,
-            "894": 5.72165,
-            "895": 5.83588,
-            "896": 5.83978,
-            "897": 5.84943,
-            "898": 5.92347,
-            "899": 5.93201,
-            "900": 5.8958,
-            "901": 5.94689,
-            "902": 5.82987,
-            "903": 6.04738,
-            "904": 5.92586,
-            "905": 5.89894,
-            "906": 5.61575,
-            "907": 5.90522,
-            "908": 5.73333,
-            "909": 5.98526,
-            "910": 5.85686,
-            "911": 5.69844,
-            "912": 5.69856,
-            "913": 5.76407,
-            "914": 5.82436,
-            "915": 5.79681,
-            "916": 5.88608,
-            "917": 5.867,
-            "918": 5.8166,
-            "919": 5.80848,
-            "920": 5.88971,
-            "921": 5.8407,
-            "922": 5.62064,
-            "923": 6.03383,
-            "924": 5.60482,
-            "925": 5.61823,
-            "926": 5.85786,
-            "927": 5.95554,
-            "928": 5.83872,
-            "929": 5.82237,
-            "930": 5.95411,
-            "931": 5.75622,
-            "932": 5.59098,
-            "933": 5.63134,
-            "934": 5.80496,
-            "935": 5.63538,
-            "936": 5.8317,
-            "937": 5.96485,
-            "938": 5.58943,
-            "939": 5.79158,
-            "940": 5.96089,
-            "941": 5.72676,
-            "942": 5.83595,
-            "943": 5.87091,
-            "944": 5.95881,
-            "945": 5.70173,
-            "946": 5.55832,
-            "947": 5.74676,
-            "948": 5.79172,
-            "949": 5.82702,
-            "950": 5.84636,
-            "951": 5.72232,
-            "952": 5.6926,
-            "953": 5.67846,
-            "954": 5.72814,
-            "955": 5.52701,
-            "956": 5.6247,
-            "957": 5.84082,
-            "958": 5.79725,
-            "959": 5.57236,
-            "960": 5.8033,
-            "961": 5.83318,
-            "962": 5.76931,
-            "963": 5.768,
-            "964": 5.70825,
-            "965": 5.63755,
-            "966": 5.60344,
-            "967": 5.72795,
-            "968": 5.74037,
-            "969": 5.82565,
-            "970": 5.64868,
-            "971": 5.70857,
-            "972": 5.85255,
-            "973": 5.67308,
-            "974": 5.7177,
-            "975": 5.86027,
-            "976": 5.71074,
-            "977": 5.77363,
-            "978": 5.68598,
-            "979": 5.5901,
-            "980": 5.76431,
-            "981": 5.89808,
-            "982": 5.47164,
-            "983": 5.61909,
-            "984": 5.54693,
-            "985": 5.58914,
-            "986": 5.6395,
-            "987": 5.57215,
-            "988": 5.71212,
-            "989": 5.69568,
-            "990": 5.62713,
-            "991": 5.85071,
-            "992": 5.77178,
-            "993": 5.87182,
-            "994": 5.69827,
-            "995": 5.7311,
-            "996": 5.73947,
-            "997": 5.81776,
-            "998": 5.83946,
-            "999": 5.83213,
-            "1000": 5.68618,
-            "1001": 5.86902,
-            "1002": 5.75759,
-            "1003": 5.64206,
-            "1004": 5.80056,
-            "1005": 5.53357,
-            "1006": 5.3287,
-            "1007": 5.7697,
-            "1008": 5.79391,
-            "1009": 5.65438,
-            "1010": 5.78459,
-            "1011": 5.89696,
-            "1012": 5.62269,
-            "1013": 5.61367,
-            "1014": 5.67992,
-            "1015": 5.56146,
-            "1016": 5.87263,
-            "1017": 5.83169,
-            "1018": 5.62357,
-            "1019": 5.73336,
-            "1020": 5.61404,
-            "1021": 5.85353,
-            "1022": 5.49696,
-            "1023": 5.65062,
-            "1024": 5.74334,
-            "1025": 5.57222,
-            "1026": 5.40994,
-            "1027": 5.59905,
-            "1028": 5.68935,
-            "1029": 5.68346,
-            "1030": 5.68799,
-            "1031": 5.40526,
-            "1032": 5.78443,
-            "1033": 5.57561,
-            "1034": 5.6274,
-            "1035": 5.71529,
-            "1036": 5.62368,
-            "1037": 5.36621,
-            "1038": 5.66561,
-            "1039": 5.6477,
-            "1040": 5.57324,
-            "1041": 5.59731,
-            "1042": 5.81493,
-            "1043": 5.56271,
-            "1044": 5.46406,
-            "1045": 5.9683,
-            "1046": 5.48617,
-            "1047": 5.39181,
-            "1048": 5.49562,
-            "1049": 5.67791,
-            "1050": 5.69881,
-            "1051": 5.5776,
-            "1052": 5.68149,
-            "1053": 5.63114,
-            "1054": 5.45857,
-            "1055": 5.59887,
-            "1056": 5.67508,
-            "1057": 5.75628,
-            "1058": 5.56524,
-            "1059": 5.74843,
-            "1060": 5.82162,
-            "1061": 5.47233,
-            "1062": 5.65043,
-            "1063": 5.50248,
-            "1064": 5.59125,
-            "1065": 5.55564,
-            "1066": 5.74466,
-            "1067": 5.67043,
-            "1068": 5.44061,
-            "1069": 5.61122,
-            "1070": 5.81207,
-            "1071": 5.51069,
-            "1072": 5.62291,
-            "1073": 5.6192,
-            "1074": 5.52379,
-            "1075": 5.70748,
-            "1076": 5.5951,
-            "1077": 5.70681,
-            "1078": 5.56223,
-            "1079": 5.61677,
-            "1080": 5.64259,
-            "1081": 5.62201,
-            "1082": 5.50149,
-            "1083": 5.64213,
-            "1084": 5.55087,
-            "1085": 5.40393,
-            "1086": 5.62042,
-            "1087": 5.44171,
-            "1088": 5.51111,
-            "1089": 5.76887,
-            "1090": 5.52736,
-            "1091": 5.51307,
-            "1092": 5.40781,
-            "1093": 5.69672,
-            "1094": 5.56925,
-            "1095": 5.5731,
-            "1096": 5.61367,
-            "1097": 5.6454,
-            "1098": 5.65292,
-            "1099": 5.51436,
-            "1100": 5.63973,
-            "1101": 5.67989,
-            "1102": 5.53567,
-            "1103": 5.54943,
-            "1104": 5.53818,
-            "1105": 5.55271,
-            "1106": 5.68243,
-            "1107": 5.68309,
-            "1108": 5.78112,
-            "1109": 5.54014,
-            "1110": 5.6617,
-            "1111": 5.59215,
-            "1112": 5.58702,
-            "1113": 5.62687,
-            "1114": 5.61504,
-            "1115": 5.59863,
-            "1116": 5.66461,
-            "1117": 5.64732,
-            "1118": 5.65418,
-            "1119": 5.70846,
-            "1120": 5.63501,
-            "1121": 5.37809,
-            "1122": 5.23308,
-            "1123": 5.47298,
-            "1124": 5.65454,
-            "1125": 5.68419,
-            "1126": 5.68674,
-            "1127": 5.56954,
-            "1128": 5.62438,
-            "1129": 5.29406,
-            "1130": 5.54548,
-            "1131": 5.6238,
-            "1132": 5.72077,
-            "1133": 5.51615,
-            "1134": 5.55302,
-            "1135": 5.51992,
-            "1136": 5.42021,
-            "1137": 5.46757,
-            "1138": 5.5657,
-            "1139": 5.41524,
-            "1140": 5.26144,
-            "1141": 5.58424,
-            "1142": 5.64054,
-            "1143": 5.385,
-            "1144": 5.3823,
-            "1145": 5.36615,
-            "1146": 5.62886,
-            "1147": 5.49181,
-            "1148": 5.50478,
-            "1149": 5.51839,
-            "1150": 5.39997,
-            "1151": 5.5553,
-            "1152": 5.42174,
-            "1153": 5.4602,
-            "1154": 5.50372,
-            "1155": 5.44072,
-            "1156": 5.34868,
-            "1157": 5.66217,
-            "1158": 5.39889,
-            "1159": 5.33332,
-            "1160": 5.79511,
-            "1161": 5.53597,
-            "1162": 5.45589,
-            "1163": 5.52529,
-            "1164": 5.38319,
-            "1165": 5.52473,
-            "1166": 5.48721,
-            "1167": 5.36058,
-            "1168": 5.49334,
-            "1169": 5.40387,
-            "1170": 5.58667,
-            "1171": 5.48535,
-            "1172": 5.64049,
-            "1173": 5.62012,
-            "1174": 5.51308,
-            "1175": 5.34473,
-            "1176": 5.38256,
-            "1177": 5.55838,
-            "1178": 5.46714,
-            "1179": 5.49373,
-            "1180": 5.46571,
-            "1181": 5.55314,
-            "1182": 5.59825,
-            "1183": 5.76884,
-            "1184": 5.54748,
-            "1185": 5.28691,
-            "1186": 5.60427,
-            "1187": 5.55401,
-            "1188": 5.51546,
-            "1189": 5.38634,
-            "1190": 5.40233,
-            "1191": 5.38976,
-            "1192": 5.49689,
-            "1193": 5.46486,
-            "1194": 5.45443,
-            "1195": 5.32542,
-            "1196": 5.52268,
-            "1197": 5.47666,
-            "1198": 5.52589,
-            "1199": 5.38688,
-            "1200": 5.33164,
-            "1201": 5.49012,
-            "1202": 5.43748,
-            "1203": 5.49375,
-            "1204": 5.40666,
-            "1205": 5.48999,
-            "1206": 5.33478,
-            "1207": 5.58651,
-            "1208": 5.42414,
-            "1209": 5.2931,
-            "1210": 5.49969,
-            "1211": 5.5071,
-            "1212": 5.59732,
-            "1213": 5.41745,
-            "1214": 5.49785,
-            "1215": 5.23706,
-            "1216": 5.41194,
-            "1217": 5.38264,
-            "1218": 5.4506,
-            "1219": 5.48501,
-            "1220": 5.38351,
-            "1221": 5.4519,
-            "1222": 5.31254,
-            "1223": 5.47747,
-            "1224": 5.41418,
-            "1225": 5.42845,
-            "1226": 5.32249,
-            "1227": 5.47547,
-            "1228": 5.73249,
-            "1229": 5.32716,
-            "1230": 5.41211,
-            "1231": 5.07649,
-            "1232": 5.78792,
-            "1233": 5.28531,
-            "1234": 5.24399,
-            "1235": 5.36824,
-            "1236": 5.47881,
-            "1237": 5.20655,
-            "1238": 5.41404,
-            "1239": 5.40719,
-            "1240": 5.46621,
-            "1241": 5.57221,
-            "1242": 5.45465,
-            "1243": 5.43424,
-            "1244": 5.51633,
-            "1245": 5.19115,
-            "1246": 5.71566,
-            "1247": 5.43,
-            "1248": 5.29843,
-            "1249": 5.40246,
-            "1250": 5.34088,
-            "1251": 5.41904,
-            "1252": 5.57108,
-            "1253": 5.489,
-            "1254": 5.31099,
-            "1255": 5.51387,
-            "1256": 5.60708,
-            "1257": 5.42325,
-            "1258": 5.55956,
-            "1259": 5.47585,
-            "1260": 5.50779,
-            "1261": 5.63801,
-            "1262": 5.39496,
-            "1263": 5.32432,
-            "1264": 5.50348,
-            "1265": 5.30656,
-            "1266": 5.23675,
-            "1267": 5.37031,
-            "1268": 5.38615,
-            "1269": 5.14823,
-            "1270": 5.39882,
-            "1271": 5.27753,
-            "1272": 5.52297,
-            "1273": 5.29632,
-            "1274": 5.34638,
-            "1275": 5.37784,
-            "1276": 5.3975,
-            "1277": 5.4606,
-            "1278": 5.35501,
-            "1279": 5.43897,
-            "1280": 5.45708,
-            "1281": 5.4056,
-            "1282": 5.38482,
-            "1283": 5.42347,
-            "1284": 5.34377,
-            "1285": 5.50505,
-            "1286": 5.33544,
-            "1287": 5.58814,
-            "1288": 5.2615,
-            "1289": 5.42995,
-            "1290": 5.49991,
-            "1291": 5.49987,
-            "1292": 5.44631,
-            "1293": 5.4171,
-            "1294": 5.49492,
-            "1295": 5.34499,
-            "1296": 5.18358,
-            "1297": 5.16726,
-            "1298": 5.11761,
-            "1299": 5.30129,
-            "1300": 5.21142,
-            "1301": 5.30283,
-            "1302": 5.27612,
-            "1303": 5.35547,
-            "1304": 5.43158,
-            "1305": 5.36825,
-            "1306": 5.25293,
-            "1307": 5.19217,
-            "1308": 5.27071,
-            "1309": 5.40774,
-            "1310": 5.26053,
-            "1311": 5.37774,
-            "1312": 5.35324,
-            "1313": 5.29428,
-            "1314": 5.29224,
-            "1315": 5.41906,
-            "1316": 5.25856,
-            "1317": 5.27981,
-            "1318": 5.21136,
-            "1319": 5.34401,
-            "1320": 5.4177,
-            "1321": 5.44957,
-            "1322": 5.46219,
-            "1323": 5.37269,
-            "1324": 5.24973,
-            "1325": 5.40538,
-            "1326": 5.53891,
-            "1327": 5.38638,
-            "1328": 5.21164,
-            "1329": 5.41667,
-            "1330": 5.39695,
-            "1331": 5.30979,
-            "1332": 5.3112,
-            "1333": 5.36823,
-            "1334": 5.44451,
-            "1335": 5.36788,
-            "1336": 5.43552,
-            "1337": 5.46933,
-            "1338": 5.30246,
-            "1339": 5.1362,
-            "1340": 5.41205,
-            "1341": 5.34033,
-            "1342": 5.35625,
-            "1343": 5.47387,
-            "1344": 5.37842,
-            "1345": 5.34238,
-            "1346": 5.07927,
-            "1347": 5.38404,
-            "1348": 5.49312,
-            "1349": 5.40746,
-            "1350": 5.02698,
-            "1351": 5.31566,
-            "1352": 5.15947,
-            "1353": 5.3409,
-            "1354": 5.35878,
-            "1355": 5.11364,
-            "1356": 5.25842,
-            "1357": 5.28929,
-            "1358": 5.15831,
-            "1359": 5.10775,
-            "1360": 5.17385,
-            "1361": 5.30604,
-            "1362": 5.06672,
-            "1363": 5.29722,
-            "1364": 5.3953,
-            "1365": 5.01953,
-            "1366": 5.1147,
-            "1367": 5.33054,
-            "1368": 5.18248,
-            "1369": 5.22391,
-            "1370": 5.1961,
-            "1371": 5.27906,
-            "1372": 5.25988,
-            "1373": 5.28404,
-            "1374": 5.2779,
-            "1375": 5.46001,
-            "1376": 5.26713,
-            "1377": 5.26807,
-            "1378": 5.31427,
-            "1379": 5.22765,
-            "1380": 5.25807,
-            "1381": 5.47919,
-            "1382": 5.08739,
-            "1383": 5.37543,
-            "1384": 5.36108,
-            "1385": 5.39028,
-            "1386": 5.16582,
-            "1387": 5.16244,
-            "1388": 5.27616,
-            "1389": 5.30262,
-            "1390": 5.25131,
-            "1391": 5.26406,
-            "1392": 5.36794,
-            "1393": 5.37824,
-            "1394": 5.40104,
-            "1395": 5.32383,
-            "1396": 5.21137,
-            "1397": 5.2828,
-            "1398": 5.36587,
-            "1399": 5.35557,
-            "1400": 5.26522,
-            "1401": 5.35981,
-            "1402": 5.42507,
-            "1403": 5.19768,
-            "1404": 5.27957,
-            "1405": 5.11754,
-            "1406": 4.98933,
-            "1407": 5.39818,
-            "1408": 5.1921,
-            "1409": 5.39429,
-            "1410": 5.37153,
-            "1411": 4.91585,
-            "1412": 5.35244,
-            "1413": 5.41055,
-            "1414": 5.21699,
-            "1415": 5.44044,
-            "1416": 5.32598,
-            "1417": 5.39078,
-            "1418": 5.29894,
-            "1419": 5.31316,
-            "1420": 5.43638,
-            "1421": 5.39683,
-            "1422": 5.41859,
-            "1423": 4.99867,
-            "1424": 5.33177,
-            "1425": 5.58491,
-            "1426": 5.23068,
-            "1427": 5.31742,
-            "1428": 5.33463,
-            "1429": 5.07871,
-            "1430": 5.32748,
-            "1431": 5.32237,
-            "1432": 5.34216,
-            "1433": 5.18496,
-            "1434": 5.16175,
-            "1435": 5.20122,
-            "1436": 5.10715,
-            "1437": 5.22566,
-            "1438": 5.31423,
-            "1439": 5.34769,
-            "1440": 5.34295,
-            "1441": 5.16777,
-            "1442": 5.21935,
-            "1443": 5.20553,
-            "1444": 5.12984,
-            "1445": 5.07414,
-            "1446": 5.26456,
-            "1447": 5.25775,
-            "1448": 5.29302,
-            "1449": 5.24616,
-            "1450": 5.34316,
-            "1451": 5.07004,
-            "1452": 5.26796,
-            "1453": 5.1741,
-            "1454": 5.01458,
-            "1455": 5.12771,
-            "1456": 5.27213,
-            "1457": 5.1882,
-            "1458": 5.00695,
-            "1459": 5.2215,
-            "1460": 5.23955,
-            "1461": 5.08,
-            "1462": 4.97269,
-            "1463": 5.15114,
-            "1464": 5.22113,
-            "1465": 5.27344,
-            "1466": 5.36076,
-            "1467": 5.34631,
-            "1468": 5.2303,
-            "1469": 5.05117,
-            "1470": 5.12322,
-            "1471": 5.25302,
-            "1472": 5.12175,
-            "1473": 5.10167,
-            "1474": 5.21744,
-            "1475": 5.18613,
-            "1476": 5.15517,
-            "1477": 5.26215,
-            "1478": 5.30407,
-            "1479": 5.01063,
-            "1480": 5.182,
-            "1481": 5.25124,
-            "1482": 5.3494,
-            "1483": 5.27058,
-            "1484": 4.92644,
-            "1485": 5.29103,
-            "1486": 5.04435,
-            "1487": 4.88432,
-            "1488": 5.18325,
-            "1489": 5.10139,
-            "1490": 5.04545,
-            "1491": 5.3188,
-            "1492": 5.22283,
-            "1493": 4.94061,
-            "1494": 5.10891,
-            "1495": 5.13402,
-            "1496": 5.05779,
-            "1497": 5.36536,
-            "1498": 5.30609,
-            "1499": 5.143,
-            "1500": 5.09554,
-            "1501": 5.0349,
-            "1502": 5.15423,
-            "1503": 5.43131,
-            "1504": 5.32574,
-            "1505": 5.00836,
-            "1506": 5.14423,
-            "1507": 5.16501,
-            "1508": 5.16864,
-            "1509": 5.3204,
-            "1510": 5.02703,
-            "1511": 5.1198,
-            "1512": 4.98354,
-            "1513": 5.1699,
-            "1514": 5.33407,
-            "1515": 5.36306,
-            "1516": 5.27572,
-            "1517": 5.2256,
-            "1518": 5.02899,
-            "1519": 5.29833,
-            "1520": 5.13757,
-            "1521": 5.15715,
-            "1522": 5.33462,
-            "1523": 5.24144,
-            "1524": 5.06791,
-            "1525": 5.20708,
-            "1526": 5.27861,
-            "1527": 5.25864,
-            "1528": 5.2395,
-            "1529": 5.18253,
-            "1530": 5.23913,
-            "1531": 5.09996,
-            "1532": 5.15679,
-            "1533": 5.05231,
-            "1534": 5.21917,
-            "1535": 5.16769,
-            "1536": 5.102,
-            "1537": 5.0318,
-            "1538": 4.91991,
-            "1539": 5.2394,
-            "1540": 5.11391,
-            "1541": 5.25502,
-            "1542": 5.23775,
-            "1543": 5.05438,
-            "1544": 5.08156,
-            "1545": 5.11794,
-            "1546": 5.32713,
-            "1547": 5.10763,
-            "1548": 5.23418,
-            "1549": 5.23089,
-            "1550": 4.97536,
-            "1551": 5.25942,
-            "1552": 5.0226,
-            "1553": 5.14887,
-            "1554": 5.11051,
-            "1555": 5.11223,
-            "1556": 5.19882,
-            "1557": 5.08844,
-            "1558": 5.22982,
-            "1559": 5.00137,
-            "1560": 5.11269,
-            "1561": 5.14639,
-            "1562": 5.18443,
-            "1563": 5.24639,
-            "1564": 5.26429,
-            "1565": 5.08809,
-            "1566": 5.29393,
-            "1567": 5.04372,
-            "1568": 5.08304,
-            "1569": 5.2002,
-            "1570": 5.17168,
-            "1571": 4.95228,
-            "1572": 5.04524,
-            "1573": 5.02748,
-            "1574": 4.99831,
-            "1575": 5.23124,
-            "1576": 5.20891,
-            "1577": 5.12722,
-            "1578": 5.36355,
-            "1579": 4.94343,
-            "1580": 5.12556,
-            "1581": 5.09739,
-            "1582": 5.28014,
-            "1583": 5.04619,
-            "1584": 5.0566,
-            "1585": 5.11727,
-            "1586": 5.30646,
-            "1587": 5.13281,
-            "1588": 5.22351,
-            "1589": 4.83814,
-            "1590": 5.09825,
-            "1591": 5.18082,
-            "1592": 5.14078,
-            "1593": 5.23646,
-            "1594": 5.11532,
-            "1595": 5.10761,
-            "1596": 5.19194,
-            "1597": 5.11362,
-            "1598": 5.16252,
-            "1599": 5.18865,
-            "1600": 4.86676,
-            "1601": 5.11898,
-            "1602": 5.22827,
-            "1603": 5.19524,
-            "1604": 5.05797,
-            "1605": 5.03277,
-            "1606": 4.98991,
-            "1607": 5.06915,
-            "1608": 4.97927,
-            "1609": 5.07061,
-            "1610": 5.04561,
-            "1611": 4.9918,
-            "1612": 4.75806,
-            "1613": 5.03141,
-            "1614": 4.87811,
-            "1615": 5.07817,
-            "1616": 5.22549,
-            "1617": 5.06182,
-            "1618": 4.98945,
-            "1619": 5.18486,
-            "1620": 5.14429,
-            "1621": 5.31666,
-            "1622": 5.06737,
-            "1623": 5.15063,
-            "1624": 5.1305,
-            "1625": 5.12197,
-            "1626": 5.10206,
-            "1627": 5.1085,
-            "1628": 5.06234,
-            "1629": 4.93316,
-            "1630": 5.06616,
-            "1631": 5.05719,
-            "1632": 5.10145,
-            "1633": 4.97087,
-            "1634": 4.92194,
-            "1635": 5.05013,
-            "1636": 4.9202,
-            "1637": 5.22863,
-            "1638": 5.15783,
-            "1639": 4.9808,
-            "1640": 5.00716,
-            "1641": 5.12367,
-            "1642": 5.0869,
-            "1643": 5.05029,
-            "1644": 5.12283,
-            "1645": 4.96415,
-            "1646": 5.12257,
-            "1647": 5.03267,
-            "1648": 5.1903,
-            "1649": 4.92263,
-            "1650": 5.0596,
-            "1651": 4.93391,
-            "1652": 5.21143,
-            "1653": 5.1587,
-            "1654": 5.13384,
-            "1655": 5.16235,
-            "1656": 5.34793,
-            "1657": 5.21074,
-            "1658": 5.04155,
-            "1659": 4.92889,
-            "1660": 4.8117,
-            "1661": 5.02968,
-            "1662": 5.14515,
-            "1663": 5.15868,
-            "1664": 4.98471,
-            "1665": 5.11027,
-            "1666": 5.10315,
-            "1667": 4.84929,
-            "1668": 5.10956,
-            "1669": 5.07311,
-            "1670": 5.11152,
-            "1671": 5.16545,
-            "1672": 4.77709,
-            "1673": 5.03502,
-            "1674": 4.91572,
-            "1675": 5.04406,
-            "1676": 5.0023,
-            "1677": 4.80013,
-            "1678": 5.02745,
-            "1679": 4.88908,
-            "1680": 5.03791,
-            "1681": 5.06371,
-            "1682": 5.03586,
-            "1683": 4.90255,
-            "1684": 5.06133,
-            "1685": 5.13096,
-            "1686": 5.075,
-            "1687": 4.97679,
-            "1688": 5.17279,
-            "1689": 5.1507,
-            "1690": 4.99681,
-            "1691": 4.99961,
-            "1692": 4.91412,
-            "1693": 5.02305,
-            "1694": 4.94741,
-            "1695": 4.91895,
-            "1696": 5.0846,
-            "1697": 5.05067,
-            "1698": 4.95116,
-            "1699": 5.00638,
-            "1700": 4.94576,
-            "1701": 5.16681,
-            "1702": 5.07316,
-            "1703": 5.16582,
-            "1704": 5.14235,
-            "1705": 4.96408,
-            "1706": 4.98303,
-            "1707": 4.78833,
-            "1708": 5.03283,
-            "1709": 5.2281,
-            "1710": 5.02918,
-            "1711": 5.18873,
-            "1712": 5.19088,
-            "1713": 5.03631,
-            "1714": 5.04689,
-            "1715": 4.91662,
-            "1716": 4.93663,
-            "1717": 4.86445,
-            "1718": 5.02654,
-            "1719": 5.12575,
-            "1720": 5.02353,
-            "1721": 4.9343,
-            "1722": 5.06572,
-            "1723": 4.93302,
-            "1724": 5.03906,
-            "1725": 5.19169,
-            "1726": 5.06497,
-            "1727": 4.91076,
-            "1728": 5.01922,
-            "1729": 5.04885,
-            "1730": 4.91107,
-            "1731": 5.00108,
-            "1732": 4.91468,
-            "1733": 5.12873,
-            "1734": 4.83023,
-            "1735": 5.21293,
-            "1736": 4.91729,
-            "1737": 4.86164,
-            "1738": 4.97933,
-            "1739": 5.16149,
-            "1740": 4.84041,
-            "1741": 4.78298,
-            "1742": 4.91062,
-            "1743": 5.09353,
-            "1744": 4.98531,
-            "1745": 4.82544,
-            "1746": 4.94973,
-            "1747": 4.86843,
-            "1748": 5.06696,
-            "1749": 4.86793,
-            "1750": 5.01333,
-            "1751": 5.12023,
-            "1752": 4.90813,
-            "1753": 5.09204,
-            "1754": 5.05813,
-            "1755": 4.89777,
-            "1756": 5.02216,
-            "1757": 5.14157,
-            "1758": 4.87188,
-            "1759": 4.94434,
-            "1760": 4.83222,
-            "1761": 5.02427,
-            "1762": 4.81507,
-            "1763": 4.77391,
-            "1764": 4.93175,
-            "1765": 5.14727,
-            "1766": 5.33614,
-            "1767": 5.22331,
-            "1768": 4.94712,
-            "1769": 5.0043,
-            "1770": 4.98512,
-            "1771": 4.96473,
-            "1772": 4.98299,
-            "1773": 4.97266,
-            "1774": 4.87138,
-            "1775": 4.9493,
-            "1776": 4.9958,
-            "1777": 4.94665,
-            "1778": 4.99288,
-            "1779": 5.08212,
-            "1780": 4.83608,
-            "1781": 5.05478,
-            "1782": 4.99549,
-            "1783": 5.01236,
-            "1784": 4.93254,
-            "1785": 5.16842,
-            "1786": 4.80892,
-            "1787": 4.9699,
-            "1788": 4.82948,
-            "1789": 4.88554,
-            "1790": 4.80386,
-            "1791": 4.74542,
-            "1792": 4.87988,
-            "1793": 5.11081,
-            "1794": 4.98659,
-            "1795": 4.97147,
-            "1796": 5.00354,
-            "1797": 4.79101,
-            "1798": 4.77029,
-            "1799": 5.01913,
-            "1800": 4.91155,
-            "1801": 5.04891,
-            "1802": 4.82591,
-            "1803": 4.95313,
-            "1804": 4.88492,
-            "1805": 4.90634,
-            "1806": 4.88167,
-            "1807": 4.92894,
-            "1808": 4.92469,
-            "1809": 5.15028,
-            "1810": 5.09708,
-            "1811": 4.96325,
-            "1812": 4.8059,
-            "1813": 5.1023,
-            "1814": 4.7819,
-            "1815": 4.86518,
-            "1816": 5.05104,
-            "1817": 4.79238,
-            "1818": 4.80401,
-            "1819": 5.02672,
-            "1820": 4.68884,
-            "1821": 5.02319,
-            "1822": 4.66224,
-            "1823": 4.86936,
-            "1824": 4.7914,
-            "1825": 5.06607,
-            "1826": 4.81841,
-            "1827": 4.79544,
-            "1828": 4.9506,
-            "1829": 5.10848,
-            "1830": 4.9163,
-            "1831": 4.89965,
-            "1832": 4.83328,
-            "1833": 4.78854,
-            "1834": 4.94794,
-            "1835": 4.96175,
-            "1836": 4.91339,
-            "1837": 4.6762,
-            "1838": 4.80703,
-            "1839": 4.89949,
-            "1840": 4.91213,
-            "1841": 4.84083,
-            "1842": 4.9567,
-            "1843": 4.71182,
-            "1844": 4.6194,
-            "1845": 5.00584,
-            "1846": 4.75435,
-            "1847": 4.86491,
-            "1848": 4.9035,
-            "1849": 4.85124,
-            "1850": 4.87005,
-            "1851": 5.01617,
-            "1852": 4.97859,
-            "1853": 4.82821,
-            "1854": 4.86426,
-            "1855": 4.82455,
-            "1856": 4.75214,
-            "1857": 4.96641,
-            "1858": 4.96711,
-            "1859": 4.7484,
-            "1860": 4.86558,
-            "1861": 5.21257,
-            "1862": 4.61253,
-            "1863": 4.83567,
-            "1864": 4.74748,
-            "1865": 4.86472,
-            "1866": 4.78934,
-            "1867": 5.00307,
-            "1868": 4.72073,
-            "1869": 4.76301,
-            "1870": 4.93972,
-            "1871": 5.00163,
-            "1872": 4.68713,
-            "1873": 4.70038,
-            "1874": 4.85131,
-            "1875": 4.85367,
-            "1876": 4.74378,
-            "1877": 4.80696,
-            "1878": 4.8139,
-            "1879": 4.82462,
-            "1880": 4.89248,
-            "1881": 4.79379,
-            "1882": 4.79882,
-            "1883": 4.78556,
-            "1884": 4.97714,
-            "1885": 4.92363,
-            "1886": 4.82454,
-            "1887": 4.82091,
-            "1888": 4.97246,
-            "1889": 4.96553,
-            "1890": 4.71236,
-            "1891": 4.65764,
-            "1892": 4.85277,
-            "1893": 4.65022,
-            "1894": 4.90165,
-            "1895": 4.79,
-            "1896": 4.66068,
-            "1897": 4.79617,
-            "1898": 4.92161,
-            "1899": 4.77736,
-            "1900": 4.91325,
-            "1901": 4.84998,
-            "1902": 4.787,
-            "1903": 4.76372,
-            "1904": 4.65638,
-            "1905": 4.55077,
-            "1906": 4.81577,
-            "1907": 4.9106,
-            "1908": 5.03029,
-            "1909": 4.89294,
-            "1910": 4.7884,
-            "1911": 4.81269,
-            "1912": 4.653,
-            "1913": 4.95098,
-            "1914": 4.88806,
-            "1915": 4.86687,
-            "1916": 4.9302,
-            "1917": 4.85504,
-            "1918": 4.87427,
-            "1919": 4.99557,
-            "1920": 4.77001,
-            "1921": 4.88729,
-            "1922": 4.8196,
-            "1923": 4.75752,
-            "1924": 4.8297,
-            "1925": 5.05687,
-            "1926": 4.94229,
-            "1927": 4.93308,
-            "1928": 4.92739,
-            "1929": 4.93147,
-            "1930": 4.917,
-            "1931": 4.77692,
-            "1932": 4.86743,
-            "1933": 4.83532,
-            "1934": 4.84373,
-            "1935": 5.11279,
-            "1936": 4.88728,
-            "1937": 4.8824,
-            "1938": 4.80623,
-            "1939": 4.70831,
-            "1940": 4.83067,
-            "1941": 4.74224,
-            "1942": 4.87785,
-            "1943": 4.74082,
-            "1944": 4.7536,
-            "1945": 4.69017,
-            "1946": 4.91953,
-            "1947": 4.87613,
-            "1948": 4.60452,
-            "1949": 4.89888,
-            "1950": 4.79826,
-            "1951": 4.9677,
-            "1952": 4.73855,
-            "1953": 4.79852,
-            "1954": 4.7398,
-            "1955": 4.85209,
-            "1956": 4.88278,
-            "1957": 4.73599,
-            "1958": 4.70215,
-            "1959": 4.76471,
-            "1960": 4.76967,
-            "1961": 4.71471,
-            "1962": 4.83443,
-            "1963": 4.82459,
-            "1964": 4.85019,
-            "1965": 4.87867,
-            "1966": 4.79219,
-            "1967": 4.60013,
-            "1968": 4.83399,
-            "1969": 4.59632,
-            "1970": 4.58346,
-            "1971": 4.90585,
-            "1972": 4.89941,
-            "1973": 4.55559,
-            "1974": 4.8295,
-            "1975": 4.83261,
-            "1976": 4.71818,
-            "1977": 4.58171,
-            "1978": 5.00781,
-            "1979": 4.6663,
-            "1980": 4.74961,
-            "1981": 4.87741,
-            "1982": 4.72647,
-            "1983": 4.89363,
-            "1984": 4.64954,
-            "1985": 4.78941,
-            "1986": 4.70195,
-            "1987": 4.8185,
-            "1988": 4.89272,
-            "1989": 4.63799,
-            "1990": 4.79789,
-            "1991": 4.70399,
-            "1992": 4.80349,
-            "1993": 4.74121,
-            "1994": 4.85611,
-            "1995": 4.5595,
-            "1996": 4.65792,
-            "1997": 4.8133,
-            "1998": 4.68041,
-            "1999": 4.73244,
-            "2000": 4.6301
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 2000,
-        "step_interval": 1,
-        "values": {
-            "1": 26.0,
-            "2": 32.0,
-            "3": 38.0,
-            "4": 33.0,
-            "5": 32.0,
-            "6": 30.0,
-            "7": 33.0,
-            "8": 34.0,
-            "9": 40.0,
-            "10": 31.0,
-            "11": 26.0,
-            "12": 33.0,
-            "13": 28.0,
-            "14": 29.0,
-            "15": 28.0,
-            "16": 27.0,
-            "17": 32.0,
-            "18": 28.0,
-            "19": 31.0,
-            "20": 39.0,
-            "21": 22.0,
-            "22": 29.0,
-            "23": 39.0,
-            "24": 35.0,
-            "25": 31.0,
-            "26": 40.0,
-            "27": 39.0,
-            "28": 42.0,
-            "29": 53.0,
-            "30": 51.0,
-            "31": 48.0,
-            "32": 51.0,
-            "33": 38.0,
-            "34": 48.0,
-            "35": 47.0,
-            "36": 49.0,
-            "37": 42.0,
-            "38": 43.0,
-            "39": 52.0,
-            "40": 55.0,
-            "41": 39.0,
-            "42": 54.0,
-            "43": 57.0,
-            "44": 53.0,
-            "45": 46.0,
-            "46": 61.0,
-            "47": 52.0,
-            "48": 54.0,
-            "49": 64.0,
-            "50": 64.0,
-            "51": 42.0,
-            "52": 55.0,
-            "53": 48.0,
-            "54": 71.0,
-            "55": 56.0,
-            "56": 74.0,
-            "57": 70.0,
-            "58": 57.0,
-            "59": 53.0,
-            "60": 67.0,
-            "61": 63.0,
-            "62": 59.0,
-            "63": 66.0,
-            "64": 70.0,
-            "65": 59.0,
-            "66": 74.0,
-            "67": 81.0,
-            "68": 74.0,
-            "69": 60.0,
-            "70": 60.0,
-            "71": 66.0,
-            "72": 75.0,
-            "73": 67.0,
-            "74": 63.0,
-            "75": 60.0,
-            "76": 60.0,
-            "77": 78.0,
-            "78": 78.0,
-            "79": 58.0,
-            "80": 63.0,
-            "81": 63.0,
-            "82": 50.0,
-            "83": 63.0,
-            "84": 72.0,
-            "85": 69.0,
-            "86": 80.0,
-            "87": 70.0,
-            "88": 68.0,
-            "89": 69.0,
-            "90": 63.0,
-            "91": 58.0,
-            "92": 87.0,
-            "93": 65.0,
-            "94": 50.0,
-            "95": 67.0,
-            "96": 71.0,
-            "97": 70.0,
-            "98": 81.0,
-            "99": 66.0,
-            "100": 76.0,
-            "101": 67.0,
-            "102": 44.0,
-            "103": 60.0,
-            "104": 68.0,
-            "105": 84.0,
-            "106": 61.0,
-            "107": 76.0,
-            "108": 68.0,
-            "109": 76.0,
-            "110": 74.0,
-            "111": 75.0,
-            "112": 78.0,
-            "113": 58.0,
-            "114": 66.0,
-            "115": 71.0,
-            "116": 63.0,
-            "117": 74.0,
-            "118": 52.0,
-            "119": 74.0,
-            "120": 52.0,
-            "121": 76.0,
-            "122": 66.0,
-            "123": 81.0,
-            "124": 76.0,
-            "125": 87.0,
-            "126": 49.0,
-            "127": 56.0,
-            "128": 78.0,
-            "129": 53.0,
-            "130": 76.0,
-            "131": 86.0,
-            "132": 61.0,
-            "133": 72.0,
-            "134": 62.0,
-            "135": 59.0,
-            "136": 60.0,
-            "137": 57.0,
-            "138": 81.0,
-            "139": 74.0,
-            "140": 59.0,
-            "141": 50.0,
-            "142": 64.0,
-            "143": 54.0,
-            "144": 49.0,
-            "145": 57.0,
-            "146": 51.0,
-            "147": 49.0,
-            "148": 69.0,
-            "149": 49.0,
-            "150": 66.0,
-            "151": 57.0,
-            "152": 51.0,
-            "153": 61.0,
-            "154": 58.0,
-            "155": 68.0,
-            "156": 68.0,
-            "157": 51.0,
-            "158": 68.0,
-            "159": 60.0,
-            "160": 64.0,
-            "161": 66.0,
-            "162": 75.0,
-            "163": 40.0,
-            "164": 84.0,
-            "165": 50.0,
-            "166": 68.0,
-            "167": 54.0,
-            "168": 58.0,
-            "169": 65.0,
-            "170": 71.0,
-            "171": 54.0,
-            "172": 64.0,
-            "173": 81.0,
-            "174": 55.0,
-            "175": 63.0,
-            "176": 69.0,
-            "177": 80.0,
-            "178": 68.0,
-            "179": 69.0,
-            "180": 64.0,
-            "181": 41.0,
-            "182": 63.0,
-            "183": 66.0,
-            "184": 67.0,
-            "185": 77.0,
-            "186": 77.0,
-            "187": 61.0,
-            "188": 62.0,
-            "189": 50.0,
-            "190": 57.0,
-            "191": 60.0,
-            "192": 67.0,
-            "193": 70.0,
-            "194": 72.0,
-            "195": 60.0,
-            "196": 81.0,
-            "197": 56.0,
-            "198": 47.0,
-            "199": 50.0,
-            "200": 86.0,
-            "201": 52.0,
-            "202": 64.0,
-            "203": 58.0,
-            "204": 63.0,
-            "205": 40.0,
-            "206": 72.0,
-            "207": 50.0,
-            "208": 42.0,
-            "209": 69.0,
-            "210": 68.0,
-            "211": 56.0,
-            "212": 64.0,
-            "213": 60.0,
-            "214": 62.0,
-            "215": 66.0,
-            "216": 58.0,
-            "217": 59.0,
-            "218": 70.0,
-            "219": 80.0,
-            "220": 81.0,
-            "221": 51.0,
-            "222": 57.0,
-            "223": 67.0,
-            "224": 53.0,
-            "225": 61.0,
-            "226": 68.0,
-            "227": 76.0,
-            "228": 59.0,
-            "229": 44.0,
-            "230": 50.0,
-            "231": 58.0,
-            "232": 65.0,
-            "233": 90.0,
-            "234": 60.0,
-            "235": 98.0,
-            "236": 49.0,
-            "237": 92.0,
-            "238": 71.0,
-            "239": 68.0,
-            "240": 79.0,
-            "241": 67.0,
-            "242": 75.0,
-            "243": 66.0,
-            "244": 59.0,
-            "245": 81.0,
-            "246": 80.0,
-            "247": 88.0,
-            "248": 81.0,
-            "249": 79.0,
-            "250": 80.0,
-            "251": 74.0,
-            "252": 72.0,
-            "253": 57.0,
-            "254": 67.0,
-            "255": 79.0,
-            "256": 86.0,
-            "257": 66.0,
-            "258": 94.0,
-            "259": 69.0,
-            "260": 70.0,
-            "261": 64.0,
-            "262": 77.0,
-            "263": 74.0,
-            "264": 70.0,
-            "265": 68.0,
-            "266": 67.0,
-            "267": 66.0,
-            "268": 59.0,
-            "269": 73.0,
-            "270": 85.0,
-            "271": 67.0,
-            "272": 81.0,
-            "273": 71.0,
-            "274": 69.0,
-            "275": 72.0,
-            "276": 72.0,
-            "277": 82.0,
-            "278": 61.0,
-            "279": 94.0,
-            "280": 56.0,
-            "281": 55.0,
-            "282": 73.0,
-            "283": 90.0,
-            "284": 85.0,
-            "285": 49.0,
-            "286": 50.0,
-            "287": 90.0,
-            "288": 71.0,
-            "289": 85.0,
-            "290": 75.0,
-            "291": 88.0,
-            "292": 88.0,
-            "293": 91.0,
-            "294": 84.0,
-            "295": 85.0,
-            "296": 102.0,
-            "297": 70.0,
-            "298": 65.0,
-            "299": 80.0,
-            "300": 80.0,
-            "301": 91.0,
-            "302": 94.0,
-            "303": 71.0,
-            "304": 74.0,
-            "305": 59.0,
-            "306": 72.0,
-            "307": 73.0,
-            "308": 91.0,
-            "309": 88.0,
-            "310": 82.0,
-            "311": 84.0,
-            "312": 73.0,
-            "313": 97.0,
-            "314": 74.0,
-            "315": 69.0,
-            "316": 96.0,
-            "317": 61.0,
-            "318": 99.0,
-            "319": 67.0,
-            "320": 77.0,
-            "321": 86.0,
-            "322": 70.0,
-            "323": 86.0,
-            "324": 96.0,
-            "325": 74.0,
-            "326": 97.0,
-            "327": 73.0,
-            "328": 99.0,
-            "329": 93.0,
-            "330": 96.0,
-            "331": 81.0,
-            "332": 79.0,
-            "333": 97.0,
-            "334": 81.0,
-            "335": 84.0,
-            "336": 81.0,
-            "337": 99.0,
-            "338": 89.0,
-            "339": 93.0,
-            "340": 101.0,
-            "341": 93.0,
-            "342": 57.0,
-            "343": 81.0,
-            "344": 105.0,
-            "345": 88.0,
-            "346": 85.0,
-            "347": 91.0,
-            "348": 82.0,
-            "349": 78.0,
-            "350": 101.0,
-            "351": 105.0,
-            "352": 76.0,
-            "353": 112.0,
-            "354": 72.0,
-            "355": 79.0,
-            "356": 104.0,
-            "357": 86.0,
-            "358": 77.0,
-            "359": 99.0,
-            "360": 102.0,
-            "361": 64.0,
-            "362": 123.0,
-            "363": 96.0,
-            "364": 95.0,
-            "365": 85.0,
-            "366": 82.0,
-            "367": 84.0,
-            "368": 83.0,
-            "369": 77.0,
-            "370": 118.0,
-            "371": 76.0,
-            "372": 77.0,
-            "373": 96.0,
-            "374": 68.0,
-            "375": 92.0,
-            "376": 84.0,
-            "377": 98.0,
-            "378": 99.0,
-            "379": 108.0,
-            "380": 96.0,
-            "381": 92.0,
-            "382": 75.0,
-            "383": 89.0,
-            "384": 100.0,
-            "385": 73.0,
-            "386": 85.0,
-            "387": 73.0,
-            "388": 93.0,
-            "389": 88.0,
-            "390": 90.0,
-            "391": 115.0,
-            "392": 88.0,
-            "393": 99.0,
-            "394": 104.0,
-            "395": 125.0,
-            "396": 80.0,
-            "397": 78.0,
-            "398": 67.0,
-            "399": 104.0,
-            "400": 96.0,
-            "401": 105.0,
-            "402": 88.0,
-            "403": 97.0,
-            "404": 101.0,
-            "405": 85.0,
-            "406": 114.0,
-            "407": 76.0,
-            "408": 98.0,
-            "409": 84.0,
-            "410": 102.0,
-            "411": 81.0,
-            "412": 56.0,
-            "413": 68.0,
-            "414": 90.0,
-            "415": 95.0,
-            "416": 93.0,
-            "417": 90.0,
-            "418": 60.0,
-            "419": 86.0,
-            "420": 76.0,
-            "421": 110.0,
-            "422": 89.0,
-            "423": 78.0,
-            "424": 82.0,
-            "425": 94.0,
-            "426": 80.0,
-            "427": 96.0,
-            "428": 86.0,
-            "429": 92.0,
-            "430": 84.0,
-            "431": 87.0,
-            "432": 80.0,
-            "433": 81.0,
-            "434": 93.0,
-            "435": 83.0,
-            "436": 82.0,
-            "437": 91.0,
-            "438": 62.0,
-            "439": 72.0,
-            "440": 79.0,
-            "441": 87.0,
-            "442": 106.0,
-            "443": 106.0,
-            "444": 58.0,
-            "445": 93.0,
-            "446": 89.0,
-            "447": 97.0,
-            "448": 79.0,
-            "449": 90.0,
-            "450": 83.0,
-            "451": 63.0,
-            "452": 70.0,
-            "453": 63.0,
-            "454": 80.0,
-            "455": 114.0,
-            "456": 98.0,
-            "457": 101.0,
-            "458": 70.0,
-            "459": 69.0,
-            "460": 65.0,
-            "461": 115.0,
-            "462": 63.0,
-            "463": 73.0,
-            "464": 69.0,
-            "465": 95.0,
-            "466": 76.0,
-            "467": 77.0,
-            "468": 90.0,
-            "469": 65.0,
-            "470": 91.0,
-            "471": 76.0,
-            "472": 60.0,
-            "473": 94.0,
-            "474": 69.0,
-            "475": 90.0,
-            "476": 66.0,
-            "477": 75.0,
-            "478": 78.0,
-            "479": 63.0,
-            "480": 73.0,
-            "481": 80.0,
-            "482": 77.0,
-            "483": 78.0,
-            "484": 84.0,
-            "485": 70.0,
-            "486": 84.0,
-            "487": 69.0,
-            "488": 88.0,
-            "489": 77.0,
-            "490": 59.0,
-            "491": 83.0,
-            "492": 57.0,
-            "493": 83.0,
-            "494": 69.0,
-            "495": 50.0,
-            "496": 56.0,
-            "497": 97.0,
-            "498": 77.0,
-            "499": 75.0,
-            "500": 60.0,
-            "501": 64.0,
-            "502": 64.0,
-            "503": 71.0,
-            "504": 77.0,
-            "505": 68.0,
-            "506": 65.0,
-            "507": 80.0,
-            "508": 42.0,
-            "509": 63.0,
-            "510": 77.0,
-            "511": 81.0,
-            "512": 57.0,
-            "513": 61.0,
-            "514": 60.0,
-            "515": 71.0,
-            "516": 61.0,
-            "517": 85.0,
-            "518": 43.0,
-            "519": 72.0,
-            "520": 82.0,
-            "521": 50.0,
-            "522": 58.0,
-            "523": 74.0,
-            "524": 70.0,
-            "525": 82.0,
-            "526": 60.0,
-            "527": 71.0,
-            "528": 63.0,
-            "529": 66.0,
-            "530": 67.0,
-            "531": 69.0,
-            "532": 72.0,
-            "533": 81.0,
-            "534": 62.0,
-            "535": 66.0,
-            "536": 61.0,
-            "537": 60.0,
-            "538": 55.0,
-            "539": 62.0,
-            "540": 63.0,
-            "541": 61.0,
-            "542": 61.0,
-            "543": 55.0,
-            "544": 64.0,
-            "545": 73.0,
-            "546": 77.0,
-            "547": 69.0,
-            "548": 75.0,
-            "549": 61.0,
-            "550": 61.0,
-            "551": 63.0,
-            "552": 71.0,
-            "553": 78.0,
-            "554": 67.0,
-            "555": 65.0,
-            "556": 74.0,
-            "557": 61.0,
-            "558": 62.0,
-            "559": 62.0,
-            "560": 71.0,
-            "561": 56.0,
-            "562": 65.0,
-            "563": 77.0,
-            "564": 67.0,
-            "565": 55.0,
-            "566": 58.0,
-            "567": 42.0,
-            "568": 70.0,
-            "569": 56.0,
-            "570": 60.0,
-            "571": 58.0,
-            "572": 41.0,
-            "573": 71.0,
-            "574": 69.0,
-            "575": 85.0,
-            "576": 44.0,
-            "577": 50.0,
-            "578": 69.0,
-            "579": 62.0,
-            "580": 67.0,
-            "581": 59.0,
-            "582": 58.0,
-            "583": 55.0,
-            "584": 47.0,
-            "585": 60.0,
-            "586": 41.0,
-            "587": 47.0,
-            "588": 53.0,
-            "589": 55.0,
-            "590": 46.0,
-            "591": 69.0,
-            "592": 50.0,
-            "593": 52.0,
-            "594": 56.0,
-            "595": 47.0,
-            "596": 44.0,
-            "597": 33.0,
-            "598": 61.0,
-            "599": 50.0,
-            "600": 88.0,
-            "601": 55.0,
-            "602": 64.0,
-            "603": 60.0,
-            "604": 57.0,
-            "605": 57.0,
-            "606": 45.0,
-            "607": 54.0,
-            "608": 45.0,
-            "609": 40.0,
-            "610": 45.0,
-            "611": 53.0,
-            "612": 52.0,
-            "613": 73.0,
-            "614": 53.0,
-            "615": 52.0,
-            "616": 64.0,
-            "617": 44.0,
-            "618": 59.0,
-            "619": 50.0,
-            "620": 72.0,
-            "621": 50.0,
-            "622": 58.0,
-            "623": 57.0,
-            "624": 56.0,
-            "625": 56.0,
-            "626": 71.0,
-            "627": 50.0,
-            "628": 49.0,
-            "629": 50.0,
-            "630": 50.0,
-            "631": 40.0,
-            "632": 45.0,
-            "633": 42.0,
-            "634": 38.0,
-            "635": 51.0,
-            "636": 36.0,
-            "637": 55.0,
-            "638": 45.0,
-            "639": 63.0,
-            "640": 52.0,
-            "641": 51.0,
-            "642": 52.0,
-            "643": 49.0,
-            "644": 51.0,
-            "645": 57.0,
-            "646": 57.0,
-            "647": 69.0,
-            "648": 60.0,
-            "649": 49.0,
-            "650": 49.0,
-            "651": 66.0,
-            "652": 49.0,
-            "653": 59.0,
-            "654": 42.0,
-            "655": 42.0,
-            "656": 46.0,
-            "657": 49.0,
-            "658": 50.0,
-            "659": 44.0,
-            "660": 53.0,
-            "661": 46.0,
-            "662": 60.0,
-            "663": 43.0,
-            "664": 61.0,
-            "665": 37.0,
-            "666": 30.0,
-            "667": 42.0,
-            "668": 41.0,
-            "669": 44.0,
-            "670": 44.0,
-            "671": 59.0,
-            "672": 53.0,
-            "673": 47.0,
-            "674": 42.0,
-            "675": 54.0,
-            "676": 43.0,
-            "677": 68.0,
-            "678": 41.0,
-            "679": 38.0,
-            "680": 46.0,
-            "681": 50.0,
-            "682": 33.0,
-            "683": 38.0,
-            "684": 52.0,
-            "685": 40.0,
-            "686": 43.0,
-            "687": 61.0,
-            "688": 57.0,
-            "689": 51.0,
-            "690": 35.0,
-            "691": 45.0,
-            "692": 55.0,
-            "693": 36.0,
-            "694": 50.0,
-            "695": 50.0,
-            "696": 51.0,
-            "697": 41.0,
-            "698": 37.0,
-            "699": 47.0,
-            "700": 42.0,
-            "701": 37.0,
-            "702": 33.0,
-            "703": 39.0,
-            "704": 43.0,
-            "705": 45.0,
-            "706": 32.0,
-            "707": 38.0,
-            "708": 38.0,
-            "709": 46.0,
-            "710": 35.0,
-            "711": 48.0,
-            "712": 35.0,
-            "713": 48.0,
-            "714": 37.0,
-            "715": 48.0,
-            "716": 36.0,
-            "717": 34.0,
-            "718": 26.0,
-            "719": 36.0,
-            "720": 34.0,
-            "721": 36.0,
-            "722": 35.0,
-            "723": 29.0,
-            "724": 47.0,
-            "725": 32.0,
-            "726": 39.0,
-            "727": 40.0,
-            "728": 39.0,
-            "729": 47.0,
-            "730": 36.0,
-            "731": 48.0,
-            "732": 43.0,
-            "733": 39.0,
-            "734": 51.0,
-            "735": 40.0,
-            "736": 49.0,
-            "737": 44.0,
-            "738": 27.0,
-            "739": 46.0,
-            "740": 38.0,
-            "741": 38.0,
-            "742": 45.0,
-            "743": 44.0,
-            "744": 52.0,
-            "745": 48.0,
-            "746": 50.0,
-            "747": 53.0,
-            "748": 52.0,
-            "749": 48.0,
-            "750": 46.0,
-            "751": 40.0,
-            "752": 50.0,
-            "753": 44.0,
-            "754": 43.0,
-            "755": 48.0,
-            "756": 38.0,
-            "757": 45.0,
-            "758": 40.0,
-            "759": 56.0,
-            "760": 46.0,
-            "761": 44.0,
-            "762": 48.0,
-            "763": 54.0,
-            "764": 49.0,
-            "765": 42.0,
-            "766": 57.0,
-            "767": 45.0,
-            "768": 51.0,
-            "769": 60.0,
-            "770": 51.0,
-            "771": 31.0,
-            "772": 41.0,
-            "773": 60.0,
-            "774": 37.0,
-            "775": 43.0,
-            "776": 37.0,
-            "777": 34.0,
-            "778": 42.0,
-            "779": 37.0,
-            "780": 34.0,
-            "781": 41.0,
-            "782": 25.0,
-            "783": 30.0,
-            "784": 39.0,
-            "785": 34.0,
-            "786": 38.0,
-            "787": 47.0,
-            "788": 41.0,
-            "789": 50.0,
-            "790": 44.0,
-            "791": 34.0,
-            "792": 38.0,
-            "793": 53.0,
-            "794": 45.0,
-            "795": 52.0,
-            "796": 39.0,
-            "797": 41.0,
-            "798": 39.0,
-            "799": 44.0,
-            "800": 46.0,
-            "801": 44.0,
-            "802": 40.0,
-            "803": 47.0,
-            "804": 34.0,
-            "805": 45.0,
-            "806": 43.0,
-            "807": 46.0,
-            "808": 36.0,
-            "809": 35.0,
-            "810": 35.0,
-            "811": 44.0,
-            "812": 47.0,
-            "813": 41.0,
-            "814": 36.0,
-            "815": 41.0,
-            "816": 52.0,
-            "817": 43.0,
-            "818": 35.0,
-            "819": 52.0,
-            "820": 40.0,
-            "821": 29.0,
-            "822": 34.0,
-            "823": 44.0,
-            "824": 47.0,
-            "825": 36.0,
-            "826": 40.0,
-            "827": 29.0,
-            "828": 35.0,
-            "829": 32.0,
-            "830": 30.0,
-            "831": 36.0,
-            "832": 34.0,
-            "833": 39.0,
-            "834": 50.0,
-            "835": 38.0,
-            "836": 37.0,
-            "837": 50.0,
-            "838": 45.0,
-            "839": 52.0,
-            "840": 37.0,
-            "841": 35.0,
-            "842": 30.0,
-            "843": 50.0,
-            "844": 23.0,
-            "845": 45.0,
-            "846": 25.0,
-            "847": 32.0,
-            "848": 25.0,
-            "849": 34.0,
-            "850": 39.0,
-            "851": 46.0,
-            "852": 41.0,
-            "853": 43.0,
-            "854": 45.0,
-            "855": 27.0,
-            "856": 47.0,
-            "857": 47.0,
-            "858": 46.0,
-            "859": 35.0,
-            "860": 45.0,
-            "861": 30.0,
-            "862": 39.0,
-            "863": 21.0,
-            "864": 26.0,
-            "865": 46.0,
-            "866": 44.0,
-            "867": 48.0,
-            "868": 27.0,
-            "869": 42.0,
-            "870": 45.0,
-            "871": 33.0,
-            "872": 49.0,
-            "873": 32.0,
-            "874": 56.0,
-            "875": 38.0,
-            "876": 41.0,
-            "877": 40.0,
-            "878": 37.0,
-            "879": 22.0,
-            "880": 39.0,
-            "881": 40.0,
-            "882": 49.0,
-            "883": 39.0,
-            "884": 35.0,
-            "885": 32.0,
-            "886": 45.0,
-            "887": 41.0,
-            "888": 34.0,
-            "889": 35.0,
-            "890": 37.0,
-            "891": 41.0,
-            "892": 42.0,
-            "893": 42.0,
-            "894": 34.0,
-            "895": 38.0,
-            "896": 37.0,
-            "897": 41.0,
-            "898": 33.0,
-            "899": 35.0,
-            "900": 39.0,
-            "901": 37.0,
-            "902": 39.0,
-            "903": 42.0,
-            "904": 38.0,
-            "905": 32.0,
-            "906": 34.0,
-            "907": 38.0,
-            "908": 39.0,
-            "909": 52.0,
-            "910": 34.0,
-            "911": 26.0,
-            "912": 46.0,
-            "913": 40.0,
-            "914": 48.0,
-            "915": 25.0,
-            "916": 49.0,
-            "917": 36.0,
-            "918": 31.0,
-            "919": 26.0,
-            "920": 40.0,
-            "921": 34.0,
-            "922": 38.0,
-            "923": 41.0,
-            "924": 24.0,
-            "925": 27.0,
-            "926": 43.0,
-            "927": 31.0,
-            "928": 40.0,
-            "929": 32.0,
-            "930": 42.0,
-            "931": 33.0,
-            "932": 34.0,
-            "933": 38.0,
-            "934": 41.0,
-            "935": 26.0,
-            "936": 44.0,
-            "937": 36.0,
-            "938": 37.0,
-            "939": 28.0,
-            "940": 33.0,
-            "941": 34.0,
-            "942": 31.0,
-            "943": 26.0,
-            "944": 37.0,
-            "945": 29.0,
-            "946": 31.0,
-            "947": 34.0,
-            "948": 41.0,
-            "949": 31.0,
-            "950": 35.0,
-            "951": 31.0,
-            "952": 38.0,
-            "953": 47.0,
-            "954": 43.0,
-            "955": 46.0,
-            "956": 35.0,
-            "957": 40.0,
-            "958": 37.0,
-            "959": 52.0,
-            "960": 35.0,
-            "961": 38.0,
-            "962": 41.0,
-            "963": 45.0,
-            "964": 43.0,
-            "965": 51.0,
-            "966": 38.0,
-            "967": 31.0,
-            "968": 32.0,
-            "969": 35.0,
-            "970": 48.0,
-            "971": 38.0,
-            "972": 43.0,
-            "973": 38.0,
-            "974": 40.0,
-            "975": 43.0,
-            "976": 29.0,
-            "977": 44.0,
-            "978": 31.0,
-            "979": 43.0,
-            "980": 39.0,
-            "981": 33.0,
-            "982": 30.0,
-            "983": 54.0,
-            "984": 43.0,
-            "985": 48.0,
-            "986": 40.0,
-            "987": 30.0,
-            "988": 38.0,
-            "989": 38.0,
-            "990": 42.0,
-            "991": 36.0,
-            "992": 48.0,
-            "993": 47.0,
-            "994": 50.0,
-            "995": 35.0,
-            "996": 29.0,
-            "997": 51.0,
-            "998": 42.0,
-            "999": 35.0,
-            "1000": 28.0,
-            "1001": 23.0,
-            "1002": 35.0,
-            "1003": 39.0,
-            "1004": 46.0,
-            "1005": 42.0,
-            "1006": 27.0,
-            "1007": 44.0,
-            "1008": 32.0,
-            "1009": 34.0,
-            "1010": 29.0,
-            "1011": 31.0,
-            "1012": 28.0,
-            "1013": 37.0,
-            "1014": 29.0,
-            "1015": 39.0,
-            "1016": 31.0,
-            "1017": 37.0,
-            "1018": 46.0,
-            "1019": 26.0,
-            "1020": 34.0,
-            "1021": 30.0,
-            "1022": 46.0,
-            "1023": 38.0,
-            "1024": 49.0,
-            "1025": 41.0,
-            "1026": 55.0,
-            "1027": 37.0,
-            "1028": 29.0,
-            "1029": 38.0,
-            "1030": 35.0,
-            "1031": 41.0,
-            "1032": 42.0,
-            "1033": 27.0,
-            "1034": 29.0,
-            "1035": 32.0,
-            "1036": 25.0,
-            "1037": 34.0,
-            "1038": 32.0,
-            "1039": 31.0,
-            "1040": 30.0,
-            "1041": 24.0,
-            "1042": 20.0,
-            "1043": 26.0,
-            "1044": 44.0,
-            "1045": 37.0,
-            "1046": 34.0,
-            "1047": 27.0,
-            "1048": 36.0,
-            "1049": 42.0,
-            "1050": 37.0,
-            "1051": 40.0,
-            "1052": 40.0,
-            "1053": 32.0,
-            "1054": 37.0,
-            "1055": 31.0,
-            "1056": 36.0,
-            "1057": 37.0,
-            "1058": 37.0,
-            "1059": 35.0,
-            "1060": 32.0,
-            "1061": 37.0,
-            "1062": 45.0,
-            "1063": 38.0,
-            "1064": 42.0,
-            "1065": 35.0,
-            "1066": 36.0,
-            "1067": 29.0,
-            "1068": 30.0,
-            "1069": 30.0,
-            "1070": 39.0,
-            "1071": 33.0,
-            "1072": 36.0,
-            "1073": 41.0,
-            "1074": 47.0,
-            "1075": 36.0,
-            "1076": 39.0,
-            "1077": 45.0,
-            "1078": 32.0,
-            "1079": 46.0,
-            "1080": 43.0,
-            "1081": 40.0,
-            "1082": 42.0,
-            "1083": 42.0,
-            "1084": 42.0,
-            "1085": 38.0,
-            "1086": 42.0,
-            "1087": 36.0,
-            "1088": 31.0,
-            "1089": 42.0,
-            "1090": 28.0,
-            "1091": 36.0,
-            "1092": 35.0,
-            "1093": 36.0,
-            "1094": 41.0,
-            "1095": 37.0,
-            "1096": 48.0,
-            "1097": 33.0,
-            "1098": 24.0,
-            "1099": 43.0,
-            "1100": 41.0,
-            "1101": 38.0,
-            "1102": 39.0,
-            "1103": 29.0,
-            "1104": 33.0,
-            "1105": 38.0,
-            "1106": 37.0,
-            "1107": 30.0,
-            "1108": 41.0,
-            "1109": 41.0,
-            "1110": 42.0,
-            "1111": 43.0,
-            "1112": 25.0,
-            "1113": 40.0,
-            "1114": 32.0,
-            "1115": 34.0,
-            "1116": 45.0,
-            "1117": 40.0,
-            "1118": 39.0,
-            "1119": 31.0,
-            "1120": 28.0,
-            "1121": 28.0,
-            "1122": 28.0,
-            "1123": 43.0,
-            "1124": 34.0,
-            "1125": 26.0,
-            "1126": 33.0,
-            "1127": 31.0,
-            "1128": 33.0,
-            "1129": 43.0,
-            "1130": 43.0,
-            "1131": 40.0,
-            "1132": 42.0,
-            "1133": 34.0,
-            "1134": 32.0,
-            "1135": 29.0,
-            "1136": 36.0,
-            "1137": 42.0,
-            "1138": 34.0,
-            "1139": 31.0,
-            "1140": 38.0,
-            "1141": 37.0,
-            "1142": 38.0,
-            "1143": 44.0,
-            "1144": 40.0,
-            "1145": 39.0,
-            "1146": 42.0,
-            "1147": 35.0,
-            "1148": 29.0,
-            "1149": 40.0,
-            "1150": 34.0,
-            "1151": 27.0,
-            "1152": 22.0,
-            "1153": 36.0,
-            "1154": 31.0,
-            "1155": 41.0,
-            "1156": 26.0,
-            "1157": 33.0,
-            "1158": 35.0,
-            "1159": 36.0,
-            "1160": 41.0,
-            "1161": 40.0,
-            "1162": 48.0,
-            "1163": 37.0,
-            "1164": 43.0,
-            "1165": 34.0,
-            "1166": 30.0,
-            "1167": 34.0,
-            "1168": 31.0,
-            "1169": 41.0,
-            "1170": 27.0,
-            "1171": 40.0,
-            "1172": 34.0,
-            "1173": 23.0,
-            "1174": 40.0,
-            "1175": 30.0,
-            "1176": 50.0,
-            "1177": 39.0,
-            "1178": 33.0,
-            "1179": 42.0,
-            "1180": 31.0,
-            "1181": 30.0,
-            "1182": 38.0,
-            "1183": 37.0,
-            "1184": 35.0,
-            "1185": 31.0,
-            "1186": 29.0,
-            "1187": 39.0,
-            "1188": 34.0,
-            "1189": 48.0,
-            "1190": 32.0,
-            "1191": 41.0,
-            "1192": 45.0,
-            "1193": 28.0,
-            "1194": 46.0,
-            "1195": 34.0,
-            "1196": 38.0,
-            "1197": 51.0,
-            "1198": 36.0,
-            "1199": 40.0,
-            "1200": 29.0,
-            "1201": 37.0,
-            "1202": 32.0,
-            "1203": 35.0,
-            "1204": 37.0,
-            "1205": 56.0,
-            "1206": 40.0,
-            "1207": 36.0,
-            "1208": 41.0,
-            "1209": 31.0,
-            "1210": 39.0,
-            "1211": 46.0,
-            "1212": 45.0,
-            "1213": 57.0,
-            "1214": 31.0,
-            "1215": 33.0,
-            "1216": 31.0,
-            "1217": 34.0,
-            "1218": 42.0,
-            "1219": 45.0,
-            "1220": 37.0,
-            "1221": 44.0,
-            "1222": 32.0,
-            "1223": 35.0,
-            "1224": 34.0,
-            "1225": 45.0,
-            "1226": 28.0,
-            "1227": 34.0,
-            "1228": 27.0,
-            "1229": 23.0,
-            "1230": 25.0,
-            "1231": 14.0,
-            "1232": 36.0,
-            "1233": 39.0,
-            "1234": 37.0,
-            "1235": 32.0,
-            "1236": 41.0,
-            "1237": 30.0,
-            "1238": 36.0,
-            "1239": 37.0,
-            "1240": 48.0,
-            "1241": 31.0,
-            "1242": 34.0,
-            "1243": 35.0,
-            "1244": 29.0,
-            "1245": 28.0,
-            "1246": 36.0,
-            "1247": 31.0,
-            "1248": 38.0,
-            "1249": 27.0,
-            "1250": 40.0,
-            "1251": 26.0,
-            "1252": 42.0,
-            "1253": 32.0,
-            "1254": 39.0,
-            "1255": 46.0,
-            "1256": 41.0,
-            "1257": 30.0,
-            "1258": 44.0,
-            "1259": 32.0,
-            "1260": 25.0,
-            "1261": 42.0,
-            "1262": 36.0,
-            "1263": 34.0,
-            "1264": 32.0,
-            "1265": 35.0,
-            "1266": 34.0,
-            "1267": 38.0,
-            "1268": 43.0,
-            "1269": 30.0,
-            "1270": 28.0,
-            "1271": 42.0,
-            "1272": 32.0,
-            "1273": 40.0,
-            "1274": 44.0,
-            "1275": 38.0,
-            "1276": 31.0,
-            "1277": 54.0,
-            "1278": 46.0,
-            "1279": 44.0,
-            "1280": 34.0,
-            "1281": 26.0,
-            "1282": 37.0,
-            "1283": 32.0,
-            "1284": 43.0,
-            "1285": 43.0,
-            "1286": 36.0,
-            "1287": 46.0,
-            "1288": 33.0,
-            "1289": 43.0,
-            "1290": 37.0,
-            "1291": 42.0,
-            "1292": 38.0,
-            "1293": 43.0,
-            "1294": 30.0,
-            "1295": 34.0,
-            "1296": 31.0,
-            "1297": 26.0,
-            "1298": 38.0,
-            "1299": 40.0,
-            "1300": 32.0,
-            "1301": 43.0,
-            "1302": 35.0,
-            "1303": 35.0,
-            "1304": 41.0,
-            "1305": 30.0,
-            "1306": 28.0,
-            "1307": 34.0,
-            "1308": 32.0,
-            "1309": 36.0,
-            "1310": 29.0,
-            "1311": 43.0,
-            "1312": 32.0,
-            "1313": 37.0,
-            "1314": 35.0,
-            "1315": 33.0,
-            "1316": 37.0,
-            "1317": 33.0,
-            "1318": 41.0,
-            "1319": 28.0,
-            "1320": 42.0,
-            "1321": 30.0,
-            "1322": 21.0,
-            "1323": 28.0,
-            "1324": 40.0,
-            "1325": 36.0,
-            "1326": 43.0,
-            "1327": 32.0,
-            "1328": 35.0,
-            "1329": 33.0,
-            "1330": 27.0,
-            "1331": 30.0,
-            "1332": 36.0,
-            "1333": 45.0,
-            "1334": 32.0,
-            "1335": 41.0,
-            "1336": 38.0,
-            "1337": 37.0,
-            "1338": 38.0,
-            "1339": 27.0,
-            "1340": 33.0,
-            "1341": 47.0,
-            "1342": 24.0,
-            "1343": 27.0,
-            "1344": 34.0,
-            "1345": 34.0,
-            "1346": 21.0,
-            "1347": 33.0,
-            "1348": 33.0,
-            "1349": 42.0,
-            "1350": 30.0,
-            "1351": 39.0,
-            "1352": 26.0,
-            "1353": 36.0,
-            "1354": 40.0,
-            "1355": 31.0,
-            "1356": 46.0,
-            "1357": 46.0,
-            "1358": 29.0,
-            "1359": 29.0,
-            "1360": 30.0,
-            "1361": 35.0,
-            "1362": 40.0,
-            "1363": 33.0,
-            "1364": 36.0,
-            "1365": 34.0,
-            "1366": 47.0,
-            "1367": 31.0,
-            "1368": 37.0,
-            "1369": 28.0,
-            "1370": 41.0,
-            "1371": 30.0,
-            "1372": 42.0,
-            "1373": 44.0,
-            "1374": 34.0,
-            "1375": 22.0,
-            "1376": 47.0,
-            "1377": 29.0,
-            "1378": 39.0,
-            "1379": 49.0,
-            "1380": 44.0,
-            "1381": 30.0,
-            "1382": 45.0,
-            "1383": 44.0,
-            "1384": 31.0,
-            "1385": 35.0,
-            "1386": 31.0,
-            "1387": 31.0,
-            "1388": 22.0,
-            "1389": 32.0,
-            "1390": 38.0,
-            "1391": 42.0,
-            "1392": 34.0,
-            "1393": 43.0,
-            "1394": 33.0,
-            "1395": 39.0,
-            "1396": 37.0,
-            "1397": 27.0,
-            "1398": 33.0,
-            "1399": 29.0,
-            "1400": 36.0,
-            "1401": 28.0,
-            "1402": 27.0,
-            "1403": 23.0,
-            "1404": 28.0,
-            "1405": 36.0,
-            "1406": 29.0,
-            "1407": 36.0,
-            "1408": 43.0,
-            "1409": 37.0,
-            "1410": 37.0,
-            "1411": 38.0,
-            "1412": 28.0,
-            "1413": 48.0,
-            "1414": 34.0,
-            "1415": 42.0,
-            "1416": 35.0,
-            "1417": 34.0,
-            "1418": 43.0,
-            "1419": 38.0,
-            "1420": 33.0,
-            "1421": 33.0,
-            "1422": 53.0,
-            "1423": 22.0,
-            "1424": 35.0,
-            "1425": 43.0,
-            "1426": 36.0,
-            "1427": 43.0,
-            "1428": 31.0,
-            "1429": 30.0,
-            "1430": 36.0,
-            "1431": 29.0,
-            "1432": 37.0,
-            "1433": 32.0,
-            "1434": 47.0,
-            "1435": 38.0,
-            "1436": 40.0,
-            "1437": 47.0,
-            "1438": 28.0,
-            "1439": 33.0,
-            "1440": 25.0,
-            "1441": 35.0,
-            "1442": 38.0,
-            "1443": 42.0,
-            "1444": 28.0,
-            "1445": 34.0,
-            "1446": 28.0,
-            "1447": 39.0,
-            "1448": 45.0,
-            "1449": 41.0,
-            "1450": 25.0,
-            "1451": 38.0,
-            "1452": 27.0,
-            "1453": 28.0,
-            "1454": 28.0,
-            "1455": 32.0,
-            "1456": 40.0,
-            "1457": 33.0,
-            "1458": 37.0,
-            "1459": 41.0,
-            "1460": 31.0,
-            "1461": 34.0,
-            "1462": 23.0,
-            "1463": 33.0,
-            "1464": 42.0,
-            "1465": 42.0,
-            "1466": 29.0,
-            "1467": 27.0,
-            "1468": 41.0,
-            "1469": 30.0,
-            "1470": 35.0,
-            "1471": 32.0,
-            "1472": 44.0,
-            "1473": 53.0,
-            "1474": 28.0,
-            "1475": 25.0,
-            "1476": 47.0,
-            "1477": 40.0,
-            "1478": 26.0,
-            "1479": 33.0,
-            "1480": 33.0,
-            "1481": 33.0,
-            "1482": 33.0,
-            "1483": 31.0,
-            "1484": 31.0,
-            "1485": 45.0,
-            "1486": 37.0,
-            "1487": 32.0,
-            "1488": 26.0,
-            "1489": 45.0,
-            "1490": 40.0,
-            "1491": 44.0,
-            "1492": 44.0,
-            "1493": 44.0,
-            "1494": 33.0,
-            "1495": 42.0,
-            "1496": 32.0,
-            "1497": 39.0,
-            "1498": 32.0,
-            "1499": 42.0,
-            "1500": 42.0,
-            "1501": 46.0,
-            "1502": 46.0,
-            "1503": 39.0,
-            "1504": 31.0,
-            "1505": 47.0,
-            "1506": 41.0,
-            "1507": 35.0,
-            "1508": 39.0,
-            "1509": 32.0,
-            "1510": 37.0,
-            "1511": 52.0,
-            "1512": 29.0,
-            "1513": 46.0,
-            "1514": 40.0,
-            "1515": 41.0,
-            "1516": 31.0,
-            "1517": 39.0,
-            "1518": 40.0,
-            "1519": 32.0,
-            "1520": 34.0,
-            "1521": 44.0,
-            "1522": 53.0,
-            "1523": 40.0,
-            "1524": 39.0,
-            "1525": 30.0,
-            "1526": 34.0,
-            "1527": 19.0,
-            "1528": 40.0,
-            "1529": 30.0,
-            "1530": 38.0,
-            "1531": 28.0,
-            "1532": 30.0,
-            "1533": 43.0,
-            "1534": 34.0,
-            "1535": 35.0,
-            "1536": 34.0,
-            "1537": 33.0,
-            "1538": 36.0,
-            "1539": 32.0,
-            "1540": 38.0,
-            "1541": 35.0,
-            "1542": 50.0,
-            "1543": 50.0,
-            "1544": 38.0,
-            "1545": 38.0,
-            "1546": 35.0,
-            "1547": 31.0,
-            "1548": 39.0,
-            "1549": 36.0,
-            "1550": 30.0,
-            "1551": 42.0,
-            "1552": 49.0,
-            "1553": 46.0,
-            "1554": 41.0,
-            "1555": 25.0,
-            "1556": 33.0,
-            "1557": 46.0,
-            "1558": 43.0,
-            "1559": 36.0,
-            "1560": 30.0,
-            "1561": 48.0,
-            "1562": 30.0,
-            "1563": 38.0,
-            "1564": 40.0,
-            "1565": 30.0,
-            "1566": 34.0,
-            "1567": 36.0,
-            "1568": 43.0,
-            "1569": 35.0,
-            "1570": 43.0,
-            "1571": 32.0,
-            "1572": 34.0,
-            "1573": 35.0,
-            "1574": 31.0,
-            "1575": 39.0,
-            "1576": 30.0,
-            "1577": 41.0,
-            "1578": 46.0,
-            "1579": 35.0,
-            "1580": 39.0,
-            "1581": 43.0,
-            "1582": 30.0,
-            "1583": 43.0,
-            "1584": 36.0,
-            "1585": 37.0,
-            "1586": 44.0,
-            "1587": 37.0,
-            "1588": 43.0,
-            "1589": 41.0,
-            "1590": 46.0,
-            "1591": 32.0,
-            "1592": 37.0,
-            "1593": 32.0,
-            "1594": 36.0,
-            "1595": 27.0,
-            "1596": 40.0,
-            "1597": 36.0,
-            "1598": 36.0,
-            "1599": 32.0,
-            "1600": 41.0,
-            "1601": 34.0,
-            "1602": 38.0,
-            "1603": 48.0,
-            "1604": 29.0,
-            "1605": 42.0,
-            "1606": 33.0,
-            "1607": 41.0,
-            "1608": 40.0,
-            "1609": 42.0,
-            "1610": 37.0,
-            "1611": 35.0,
-            "1612": 37.0,
-            "1613": 39.0,
-            "1614": 51.0,
-            "1615": 38.0,
-            "1616": 33.0,
-            "1617": 45.0,
-            "1618": 43.0,
-            "1619": 32.0,
-            "1620": 43.0,
-            "1621": 47.0,
-            "1622": 36.0,
-            "1623": 50.0,
-            "1624": 40.0,
-            "1625": 33.0,
-            "1626": 39.0,
-            "1627": 34.0,
-            "1628": 40.0,
-            "1629": 30.0,
-            "1630": 34.0,
-            "1631": 45.0,
-            "1632": 39.0,
-            "1633": 40.0,
-            "1634": 30.0,
-            "1635": 53.0,
-            "1636": 31.0,
-            "1637": 35.0,
-            "1638": 39.0,
-            "1639": 42.0,
-            "1640": 37.0,
-            "1641": 43.0,
-            "1642": 30.0,
-            "1643": 43.0,
-            "1644": 36.0,
-            "1645": 37.0,
-            "1646": 61.0,
-            "1647": 34.0,
-            "1648": 41.0,
-            "1649": 39.0,
-            "1650": 42.0,
-            "1651": 33.0,
-            "1652": 45.0,
-            "1653": 25.0,
-            "1654": 36.0,
-            "1655": 29.0,
-            "1656": 45.0,
-            "1657": 37.0,
-            "1658": 46.0,
-            "1659": 38.0,
-            "1660": 46.0,
-            "1661": 41.0,
-            "1662": 35.0,
-            "1663": 35.0,
-            "1664": 37.0,
-            "1665": 30.0,
-            "1666": 44.0,
-            "1667": 45.0,
-            "1668": 40.0,
-            "1669": 35.0,
-            "1670": 35.0,
-            "1671": 37.0,
-            "1672": 32.0,
-            "1673": 48.0,
-            "1674": 41.0,
-            "1675": 40.0,
-            "1676": 49.0,
-            "1677": 35.0,
-            "1678": 30.0,
-            "1679": 45.0,
-            "1680": 40.0,
-            "1681": 32.0,
-            "1682": 32.0,
-            "1683": 42.0,
-            "1684": 44.0,
-            "1685": 47.0,
-            "1686": 30.0,
-            "1687": 31.0,
-            "1688": 31.0,
-            "1689": 40.0,
-            "1690": 43.0,
-            "1691": 36.0,
-            "1692": 31.0,
-            "1693": 31.0,
-            "1694": 35.0,
-            "1695": 41.0,
-            "1696": 32.0,
-            "1697": 27.0,
-            "1698": 39.0,
-            "1699": 41.0,
-            "1700": 31.0,
-            "1701": 35.0,
-            "1702": 31.0,
-            "1703": 40.0,
-            "1704": 36.0,
-            "1705": 36.0,
-            "1706": 46.0,
-            "1707": 26.0,
-            "1708": 37.0,
-            "1709": 37.0,
-            "1710": 39.0,
-            "1711": 32.0,
-            "1712": 46.0,
-            "1713": 44.0,
-            "1714": 45.0,
-            "1715": 43.0,
-            "1716": 30.0,
-            "1717": 41.0,
-            "1718": 43.0,
-            "1719": 28.0,
-            "1720": 36.0,
-            "1721": 26.0,
-            "1722": 42.0,
-            "1723": 42.0,
-            "1724": 39.0,
-            "1725": 28.0,
-            "1726": 46.0,
-            "1727": 43.0,
-            "1728": 40.0,
-            "1729": 44.0,
-            "1730": 38.0,
-            "1731": 26.0,
-            "1732": 39.0,
-            "1733": 44.0,
-            "1734": 39.0,
-            "1735": 34.0,
-            "1736": 46.0,
-            "1737": 46.0,
-            "1738": 34.0,
-            "1739": 47.0,
-            "1740": 44.0,
-            "1741": 31.0,
-            "1742": 46.0,
-            "1743": 43.0,
-            "1744": 46.0,
-            "1745": 53.0,
-            "1746": 42.0,
-            "1747": 37.0,
-            "1748": 37.0,
-            "1749": 47.0,
-            "1750": 46.0,
-            "1751": 43.0,
-            "1752": 35.0,
-            "1753": 41.0,
-            "1754": 40.0,
-            "1755": 32.0,
-            "1756": 36.0,
-            "1757": 48.0,
-            "1758": 34.0,
-            "1759": 49.0,
-            "1760": 46.0,
-            "1761": 36.0,
-            "1762": 34.0,
-            "1763": 36.0,
-            "1764": 39.0,
-            "1765": 24.0,
-            "1766": 46.0,
-            "1767": 46.0,
-            "1768": 36.0,
-            "1769": 56.0,
-            "1770": 28.0,
-            "1771": 42.0,
-            "1772": 52.0,
-            "1773": 45.0,
-            "1774": 37.0,
-            "1775": 33.0,
-            "1776": 43.0,
-            "1777": 54.0,
-            "1778": 39.0,
-            "1779": 33.0,
-            "1780": 39.0,
-            "1781": 45.0,
-            "1782": 35.0,
-            "1783": 43.0,
-            "1784": 53.0,
-            "1785": 36.0,
-            "1786": 38.0,
-            "1787": 43.0,
-            "1788": 45.0,
-            "1789": 33.0,
-            "1790": 42.0,
-            "1791": 44.0,
-            "1792": 34.0,
-            "1793": 30.0,
-            "1794": 40.0,
-            "1795": 55.0,
-            "1796": 33.0,
-            "1797": 30.0,
-            "1798": 41.0,
-            "1799": 37.0,
-            "1800": 41.0,
-            "1801": 40.0,
-            "1802": 30.0,
-            "1803": 36.0,
-            "1804": 41.0,
-            "1805": 34.0,
-            "1806": 39.0,
-            "1807": 36.0,
-            "1808": 43.0,
-            "1809": 45.0,
-            "1810": 41.0,
-            "1811": 28.0,
-            "1812": 33.0,
-            "1813": 30.0,
-            "1814": 36.0,
-            "1815": 35.0,
-            "1816": 35.0,
-            "1817": 35.0,
-            "1818": 42.0,
-            "1819": 25.0,
-            "1820": 38.0,
-            "1821": 48.0,
-            "1822": 38.0,
-            "1823": 38.0,
-            "1824": 49.0,
-            "1825": 46.0,
-            "1826": 32.0,
-            "1827": 47.0,
-            "1828": 30.0,
-            "1829": 50.0,
-            "1830": 43.0,
-            "1831": 36.0,
-            "1832": 47.0,
-            "1833": 42.0,
-            "1834": 41.0,
-            "1835": 39.0,
-            "1836": 39.0,
-            "1837": 34.0,
-            "1838": 50.0,
-            "1839": 35.0,
-            "1840": 41.0,
-            "1841": 30.0,
-            "1842": 34.0,
-            "1843": 44.0,
-            "1844": 38.0,
-            "1845": 41.0,
-            "1846": 32.0,
-            "1847": 32.0,
-            "1848": 36.0,
-            "1849": 45.0,
-            "1850": 40.0,
-            "1851": 36.0,
-            "1852": 41.0,
-            "1853": 29.0,
-            "1854": 35.0,
-            "1855": 45.0,
-            "1856": 39.0,
-            "1857": 33.0,
-            "1858": 40.0,
-            "1859": 40.0,
-            "1860": 48.0,
-            "1861": 37.0,
-            "1862": 46.0,
-            "1863": 47.0,
-            "1864": 48.0,
-            "1865": 38.0,
-            "1866": 51.0,
-            "1867": 34.0,
-            "1868": 40.0,
-            "1869": 42.0,
-            "1870": 38.0,
-            "1871": 36.0,
-            "1872": 42.0,
-            "1873": 42.0,
-            "1874": 38.0,
-            "1875": 51.0,
-            "1876": 39.0,
-            "1877": 41.0,
-            "1878": 26.0,
-            "1879": 33.0,
-            "1880": 41.0,
-            "1881": 50.0,
-            "1882": 37.0,
-            "1883": 45.0,
-            "1884": 39.0,
-            "1885": 37.0,
-            "1886": 32.0,
-            "1887": 36.0,
-            "1888": 28.0,
-            "1889": 38.0,
-            "1890": 37.0,
-            "1891": 51.0,
-            "1892": 44.0,
-            "1893": 50.0,
-            "1894": 44.0,
-            "1895": 35.0,
-            "1896": 34.0,
-            "1897": 35.0,
-            "1898": 31.0,
-            "1899": 39.0,
-            "1900": 40.0,
-            "1901": 52.0,
-            "1902": 31.0,
-            "1903": 44.0,
-            "1904": 45.0,
-            "1905": 32.0,
-            "1906": 49.0,
-            "1907": 34.0,
-            "1908": 33.0,
-            "1909": 34.0,
-            "1910": 45.0,
-            "1911": 41.0,
-            "1912": 46.0,
-            "1913": 46.0,
-            "1914": 51.0,
-            "1915": 35.0,
-            "1916": 42.0,
-            "1917": 40.0,
-            "1918": 32.0,
-            "1919": 54.0,
-            "1920": 41.0,
-            "1921": 40.0,
-            "1922": 36.0,
-            "1923": 34.0,
-            "1924": 43.0,
-            "1925": 47.0,
-            "1926": 42.0,
-            "1927": 37.0,
-            "1928": 40.0,
-            "1929": 40.0,
-            "1930": 39.0,
-            "1931": 37.0,
-            "1932": 40.0,
-            "1933": 46.0,
-            "1934": 30.0,
-            "1935": 50.0,
-            "1936": 51.0,
-            "1937": 34.0,
-            "1938": 38.0,
-            "1939": 44.0,
-            "1940": 35.0,
-            "1941": 39.0,
-            "1942": 59.0,
-            "1943": 42.0,
-            "1944": 46.0,
-            "1945": 36.0,
-            "1946": 43.0,
-            "1947": 39.0,
-            "1948": 39.0,
-            "1949": 31.0,
-            "1950": 36.0,
-            "1951": 41.0,
-            "1952": 37.0,
-            "1953": 26.0,
-            "1954": 43.0,
-            "1955": 33.0,
-            "1956": 37.0,
-            "1957": 48.0,
-            "1958": 35.0,
-            "1959": 44.0,
-            "1960": 35.0,
-            "1961": 28.0,
-            "1962": 51.0,
-            "1963": 47.0,
-            "1964": 33.0,
-            "1965": 56.0,
-            "1966": 46.0,
-            "1967": 33.0,
-            "1968": 53.0,
-            "1969": 36.0,
-            "1970": 47.0,
-            "1971": 35.0,
-            "1972": 34.0,
-            "1973": 38.0,
-            "1974": 46.0,
-            "1975": 32.0,
-            "1976": 43.0,
-            "1977": 38.0,
-            "1978": 43.0,
-            "1979": 49.0,
-            "1980": 32.0,
-            "1981": 30.0,
-            "1982": 55.0,
-            "1983": 41.0,
-            "1984": 62.0,
-            "1985": 41.0,
-            "1986": 48.0,
-            "1987": 48.0,
-            "1988": 41.0,
-            "1989": 50.0,
-            "1990": 53.0,
-            "1991": 45.0,
-            "1992": 46.0,
-            "1993": 60.0,
-            "1994": 30.0,
-            "1995": 41.0,
-            "1996": 51.0,
-            "1997": 41.0,
-            "1998": 45.0,
-            "1999": 32.0,
-            "2000": 43.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 2000,
-        "step_interval": 1,
-        "values": {
-            "1": 302618112.0,
-            "2": 302618112.0,
-            "3": 302618112.0,
-            "4": 302618112.0,
-            "5": 302618112.0,
-            "6": 302618112.0,
-            "7": 302618112.0,
-            "8": 302618112.0,
-            "9": 302618112.0,
-            "10": 302618112.0,
-            "11": 302618112.0,
-            "12": 302618112.0,
-            "13": 302618112.0,
-            "14": 302618112.0,
-            "15": 302618112.0,
-            "16": 302618112.0,
-            "17": 302618112.0,
-            "18": 302618112.0,
-            "19": 302618112.0,
-            "20": 302618112.0,
-            "21": 302618112.0,
-            "22": 302618112.0,
-            "23": 302618112.0,
-            "24": 302618112.0,
-            "25": 302618112.0,
-            "26": 302618112.0,
-            "27": 302618112.0,
-            "28": 302618112.0,
-            "29": 302618112.0,
-            "30": 302618112.0,
-            "31": 302618112.0,
-            "32": 302618112.0,
-            "33": 302618112.0,
-            "34": 302618112.0,
-            "35": 302618112.0,
-            "36": 302618112.0,
-            "37": 302618112.0,
-            "38": 302618112.0,
-            "39": 302618112.0,
-            "40": 302618112.0,
-            "41": 302618112.0,
-            "42": 302618112.0,
-            "43": 302618112.0,
-            "44": 302618112.0,
-            "45": 302618112.0,
-            "46": 302618112.0,
-            "47": 302618112.0,
-            "48": 302618112.0,
-            "49": 302618112.0,
-            "50": 302618112.0,
-            "51": 302618112.0,
-            "52": 302618112.0,
-            "53": 302618112.0,
-            "54": 302618112.0,
-            "55": 302618112.0,
-            "56": 302618112.0,
-            "57": 302618112.0,
-            "58": 302618112.0,
-            "59": 302618112.0,
-            "60": 302618112.0,
-            "61": 302618112.0,
-            "62": 302618112.0,
-            "63": 302618112.0,
-            "64": 302618112.0,
-            "65": 302618112.0,
-            "66": 302618112.0,
-            "67": 302618112.0,
-            "68": 302618112.0,
-            "69": 302618112.0,
-            "70": 302618112.0,
-            "71": 302618112.0,
-            "72": 302618112.0,
-            "73": 302618112.0,
-            "74": 302618112.0,
-            "75": 302618112.0,
-            "76": 302618112.0,
-            "77": 302618112.0,
-            "78": 302618112.0,
-            "79": 302618112.0,
-            "80": 302618112.0,
-            "81": 302618112.0,
-            "82": 302618112.0,
-            "83": 302618112.0,
-            "84": 302618112.0,
-            "85": 302618112.0,
-            "86": 302618112.0,
-            "87": 302618112.0,
-            "88": 302618112.0,
-            "89": 302618112.0,
-            "90": 302618112.0,
-            "91": 302618112.0,
-            "92": 302618112.0,
-            "93": 302618112.0,
-            "94": 302618112.0,
-            "95": 302618112.0,
-            "96": 302618112.0,
-            "97": 302618112.0,
-            "98": 302618112.0,
-            "99": 302618112.0,
-            "100": 302618112.0,
-            "101": 302618112.0,
-            "102": 302618112.0,
-            "103": 302618112.0,
-            "104": 302618112.0,
-            "105": 302618112.0,
-            "106": 302618112.0,
-            "107": 302618112.0,
-            "108": 302618112.0,
-            "109": 302618112.0,
-            "110": 302618112.0,
-            "111": 302618112.0,
-            "112": 302618112.0,
-            "113": 302618112.0,
-            "114": 302618112.0,
-            "115": 302618112.0,
-            "116": 302618112.0,
-            "117": 302618112.0,
-            "118": 302618112.0,
-            "119": 302618112.0,
-            "120": 302618112.0,
-            "121": 302618112.0,
-            "122": 302618112.0,
-            "123": 302618112.0,
-            "124": 302618112.0,
-            "125": 302618112.0,
-            "126": 302618112.0,
-            "127": 302618112.0,
-            "128": 302618112.0,
-            "129": 302618112.0,
-            "130": 302618112.0,
-            "131": 302618112.0,
-            "132": 302618112.0,
-            "133": 302618112.0,
-            "134": 302618112.0,
-            "135": 302618112.0,
-            "136": 302618112.0,
-            "137": 302618112.0,
-            "138": 302618112.0,
-            "139": 302618112.0,
-            "140": 302618112.0,
-            "141": 302618112.0,
-            "142": 302618112.0,
-            "143": 302618112.0,
-            "144": 302618112.0,
-            "145": 302618112.0,
-            "146": 302618112.0,
-            "147": 302618112.0,
-            "148": 302618112.0,
-            "149": 302618112.0,
-            "150": 302618112.0,
-            "151": 302618112.0,
-            "152": 302618112.0,
-            "153": 302618112.0,
-            "154": 302618112.0,
-            "155": 302618112.0,
-            "156": 302618112.0,
-            "157": 302618112.0,
-            "158": 302618112.0,
-            "159": 302618112.0,
-            "160": 302618112.0,
-            "161": 302618112.0,
-            "162": 302618112.0,
-            "163": 302618112.0,
-            "164": 302618112.0,
-            "165": 302618112.0,
-            "166": 302618112.0,
-            "167": 302618112.0,
-            "168": 302618112.0,
-            "169": 302618112.0,
-            "170": 302618112.0,
-            "171": 302618112.0,
-            "172": 302618112.0,
-            "173": 302618112.0,
-            "174": 302618112.0,
-            "175": 302618112.0,
-            "176": 302618112.0,
-            "177": 302618112.0,
-            "178": 302618112.0,
-            "179": 302618112.0,
-            "180": 302618112.0,
-            "181": 302618112.0,
-            "182": 302618112.0,
-            "183": 302618112.0,
-            "184": 302618112.0,
-            "185": 302618112.0,
-            "186": 302618112.0,
-            "187": 302618112.0,
-            "188": 302618112.0,
-            "189": 302618112.0,
-            "190": 302618112.0,
-            "191": 302618112.0,
-            "192": 302618112.0,
-            "193": 302618112.0,
-            "194": 302618112.0,
-            "195": 302618112.0,
-            "196": 302618112.0,
-            "197": 302618112.0,
-            "198": 302618112.0,
-            "199": 302618112.0,
-            "200": 302618112.0,
-            "201": 302618112.0,
-            "202": 302618112.0,
-            "203": 302618112.0,
-            "204": 302618112.0,
-            "205": 302618112.0,
-            "206": 302618112.0,
-            "207": 302618112.0,
-            "208": 302618112.0,
-            "209": 302618112.0,
-            "210": 302618112.0,
-            "211": 302618112.0,
-            "212": 302618112.0,
-            "213": 302618112.0,
-            "214": 302618112.0,
-            "215": 302618112.0,
-            "216": 302618112.0,
-            "217": 302618112.0,
-            "218": 302618112.0,
-            "219": 302618112.0,
-            "220": 302618112.0,
-            "221": 302618112.0,
-            "222": 302618112.0,
-            "223": 302618112.0,
-            "224": 302618112.0,
-            "225": 302618112.0,
-            "226": 302618112.0,
-            "227": 302618112.0,
-            "228": 302618112.0,
-            "229": 302618112.0,
-            "230": 302618112.0,
-            "231": 302618112.0,
-            "232": 302618112.0,
-            "233": 302618112.0,
-            "234": 302618112.0,
-            "235": 302618112.0,
-            "236": 302618112.0,
-            "237": 302618112.0,
-            "238": 302618112.0,
-            "239": 302618112.0,
-            "240": 302618112.0,
-            "241": 302618112.0,
-            "242": 302618112.0,
-            "243": 302618112.0,
-            "244": 302618112.0,
-            "245": 302618112.0,
-            "246": 302618112.0,
-            "247": 302618112.0,
-            "248": 302618112.0,
-            "249": 302618112.0,
-            "250": 302618112.0,
-            "251": 302618112.0,
-            "252": 302618112.0,
-            "253": 302618112.0,
-            "254": 302618112.0,
-            "255": 302618112.0,
-            "256": 302618112.0,
-            "257": 302618112.0,
-            "258": 302618112.0,
-            "259": 302618112.0,
-            "260": 302618112.0,
-            "261": 302618112.0,
-            "262": 302618112.0,
-            "263": 302618112.0,
-            "264": 302618112.0,
-            "265": 302618112.0,
-            "266": 302618112.0,
-            "267": 302618112.0,
-            "268": 302618112.0,
-            "269": 302618112.0,
-            "270": 302618112.0,
-            "271": 302618112.0,
-            "272": 302618112.0,
-            "273": 302618112.0,
-            "274": 302618112.0,
-            "275": 302618112.0,
-            "276": 302618112.0,
-            "277": 302618112.0,
-            "278": 302618112.0,
-            "279": 302618112.0,
-            "280": 302618112.0,
-            "281": 302618112.0,
-            "282": 302618112.0,
-            "283": 302618112.0,
-            "284": 302618112.0,
-            "285": 302618112.0,
-            "286": 302618112.0,
-            "287": 302618112.0,
-            "288": 302618112.0,
-            "289": 302618112.0,
-            "290": 302618112.0,
-            "291": 302618112.0,
-            "292": 302618112.0,
-            "293": 302618112.0,
-            "294": 302618112.0,
-            "295": 302618112.0,
-            "296": 302618112.0,
-            "297": 302618112.0,
-            "298": 302618112.0,
-            "299": 302618112.0,
-            "300": 302618112.0,
-            "301": 302618112.0,
-            "302": 302618112.0,
-            "303": 302618112.0,
-            "304": 302618112.0,
-            "305": 302618112.0,
-            "306": 302618112.0,
-            "307": 302618112.0,
-            "308": 302618112.0,
-            "309": 302618112.0,
-            "310": 302618112.0,
-            "311": 302618112.0,
-            "312": 302618112.0,
-            "313": 302618112.0,
-            "314": 302618112.0,
-            "315": 302618112.0,
-            "316": 302618112.0,
-            "317": 302618112.0,
-            "318": 302618112.0,
-            "319": 302618112.0,
-            "320": 302618112.0,
-            "321": 302618112.0,
-            "322": 302618112.0,
-            "323": 302618112.0,
-            "324": 302618112.0,
-            "325": 302618112.0,
-            "326": 302618112.0,
-            "327": 302618112.0,
-            "328": 302618112.0,
-            "329": 302618112.0,
-            "330": 302618112.0,
-            "331": 302618112.0,
-            "332": 302618112.0,
-            "333": 302618112.0,
-            "334": 302618112.0,
-            "335": 302618112.0,
-            "336": 302618112.0,
-            "337": 302618112.0,
-            "338": 302618112.0,
-            "339": 302618112.0,
-            "340": 302618112.0,
-            "341": 302618112.0,
-            "342": 302618112.0,
-            "343": 302618112.0,
-            "344": 302618112.0,
-            "345": 302618112.0,
-            "346": 302618112.0,
-            "347": 302618112.0,
-            "348": 302618112.0,
-            "349": 302618112.0,
-            "350": 302618112.0,
-            "351": 302618112.0,
-            "352": 302618112.0,
-            "353": 302618112.0,
-            "354": 302618112.0,
-            "355": 302618112.0,
-            "356": 302618112.0,
-            "357": 302618112.0,
-            "358": 302618112.0,
-            "359": 302618112.0,
-            "360": 302618112.0,
-            "361": 302618112.0,
-            "362": 302618112.0,
-            "363": 302618112.0,
-            "364": 302618112.0,
-            "365": 302618112.0,
-            "366": 302618112.0,
-            "367": 302618112.0,
-            "368": 302618112.0,
-            "369": 302618112.0,
-            "370": 302618112.0,
-            "371": 302618112.0,
-            "372": 302618112.0,
-            "373": 302618112.0,
-            "374": 302618112.0,
-            "375": 302618112.0,
-            "376": 302618112.0,
-            "377": 302618112.0,
-            "378": 302618112.0,
-            "379": 302618112.0,
-            "380": 302618112.0,
-            "381": 302618112.0,
-            "382": 302618112.0,
-            "383": 302618112.0,
-            "384": 302618112.0,
-            "385": 302618112.0,
-            "386": 302618112.0,
-            "387": 302618112.0,
-            "388": 302618112.0,
-            "389": 302618112.0,
-            "390": 302618112.0,
-            "391": 302618112.0,
-            "392": 302618112.0,
-            "393": 302618112.0,
-            "394": 302618112.0,
-            "395": 302618112.0,
-            "396": 302618112.0,
-            "397": 302618112.0,
-            "398": 302618112.0,
-            "399": 302618112.0,
-            "400": 302618112.0,
-            "401": 302618112.0,
-            "402": 302618112.0,
-            "403": 302618112.0,
-            "404": 302618112.0,
-            "405": 302618112.0,
-            "406": 302618112.0,
-            "407": 302618112.0,
-            "408": 302618112.0,
-            "409": 302618112.0,
-            "410": 302618112.0,
-            "411": 302618112.0,
-            "412": 302618112.0,
-            "413": 302618112.0,
-            "414": 302618112.0,
-            "415": 302618112.0,
-            "416": 302618112.0,
-            "417": 302618112.0,
-            "418": 302618112.0,
-            "419": 302618112.0,
-            "420": 302618112.0,
-            "421": 302618112.0,
-            "422": 302618112.0,
-            "423": 302618112.0,
-            "424": 302618112.0,
-            "425": 302618112.0,
-            "426": 302618112.0,
-            "427": 302618112.0,
-            "428": 302618112.0,
-            "429": 302618112.0,
-            "430": 302618112.0,
-            "431": 302618112.0,
-            "432": 302618112.0,
-            "433": 302618112.0,
-            "434": 302618112.0,
-            "435": 302618112.0,
-            "436": 302618112.0,
-            "437": 302618112.0,
-            "438": 302618112.0,
-            "439": 302618112.0,
-            "440": 302618112.0,
-            "441": 302618112.0,
-            "442": 302618112.0,
-            "443": 302618112.0,
-            "444": 302618112.0,
-            "445": 302618112.0,
-            "446": 302618112.0,
-            "447": 302618112.0,
-            "448": 302618112.0,
-            "449": 302618112.0,
-            "450": 302618112.0,
-            "451": 302618112.0,
-            "452": 302618112.0,
-            "453": 302618112.0,
-            "454": 302618112.0,
-            "455": 302618112.0,
-            "456": 302618112.0,
-            "457": 302618112.0,
-            "458": 302618112.0,
-            "459": 302618112.0,
-            "460": 302618112.0,
-            "461": 302618112.0,
-            "462": 302618112.0,
-            "463": 302618112.0,
-            "464": 302618112.0,
-            "465": 302618112.0,
-            "466": 302618112.0,
-            "467": 302618112.0,
-            "468": 302618112.0,
-            "469": 302618112.0,
-            "470": 302618112.0,
-            "471": 302618112.0,
-            "472": 302618112.0,
-            "473": 302618112.0,
-            "474": 302618112.0,
-            "475": 302618112.0,
-            "476": 302618112.0,
-            "477": 302618112.0,
-            "478": 302618112.0,
-            "479": 302618112.0,
-            "480": 302618112.0,
-            "481": 302618112.0,
-            "482": 302618112.0,
-            "483": 302618112.0,
-            "484": 302618112.0,
-            "485": 302618112.0,
-            "486": 302618112.0,
-            "487": 302618112.0,
-            "488": 302618112.0,
-            "489": 302618112.0,
-            "490": 302618112.0,
-            "491": 302618112.0,
-            "492": 302618112.0,
-            "493": 302618112.0,
-            "494": 302618112.0,
-            "495": 302618112.0,
-            "496": 302618112.0,
-            "497": 302618112.0,
-            "498": 302618112.0,
-            "499": 302618112.0,
-            "500": 302618112.0,
-            "501": 302618112.0,
-            "502": 302618112.0,
-            "503": 302618112.0,
-            "504": 302618112.0,
-            "505": 302618112.0,
-            "506": 302618112.0,
-            "507": 302618112.0,
-            "508": 302618112.0,
-            "509": 302618112.0,
-            "510": 302618112.0,
-            "511": 302618112.0,
-            "512": 302618112.0,
-            "513": 302618112.0,
-            "514": 302618112.0,
-            "515": 302618112.0,
-            "516": 302618112.0,
-            "517": 302618112.0,
-            "518": 302618112.0,
-            "519": 302618112.0,
-            "520": 302618112.0,
-            "521": 302618112.0,
-            "522": 302618112.0,
-            "523": 302618112.0,
-            "524": 302618112.0,
-            "525": 302618112.0,
-            "526": 302618112.0,
-            "527": 302618112.0,
-            "528": 302618112.0,
-            "529": 302618112.0,
-            "530": 302618112.0,
-            "531": 302618112.0,
-            "532": 302618112.0,
-            "533": 302618112.0,
-            "534": 302618112.0,
-            "535": 302618112.0,
-            "536": 302618112.0,
-            "537": 302618112.0,
-            "538": 302618112.0,
-            "539": 302618112.0,
-            "540": 302618112.0,
-            "541": 302618112.0,
-            "542": 302618112.0,
-            "543": 302618112.0,
-            "544": 302618112.0,
-            "545": 302618112.0,
-            "546": 302618112.0,
-            "547": 302618112.0,
-            "548": 302618112.0,
-            "549": 302618112.0,
-            "550": 302618112.0,
-            "551": 302618112.0,
-            "552": 302618112.0,
-            "553": 302618112.0,
-            "554": 302618112.0,
-            "555": 302618112.0,
-            "556": 302618112.0,
-            "557": 302618112.0,
-            "558": 302618112.0,
-            "559": 302618112.0,
-            "560": 302618112.0,
-            "561": 302618112.0,
-            "562": 302618112.0,
-            "563": 302618112.0,
-            "564": 302618112.0,
-            "565": 302618112.0,
-            "566": 302618112.0,
-            "567": 302618112.0,
-            "568": 302618112.0,
-            "569": 302618112.0,
-            "570": 302618112.0,
-            "571": 302618112.0,
-            "572": 302618112.0,
-            "573": 302618112.0,
-            "574": 302618112.0,
-            "575": 302618112.0,
-            "576": 302618112.0,
-            "577": 302618112.0,
-            "578": 302618112.0,
-            "579": 302618112.0,
-            "580": 302618112.0,
-            "581": 302618112.0,
-            "582": 302618112.0,
-            "583": 302618112.0,
-            "584": 302618112.0,
-            "585": 302618112.0,
-            "586": 302618112.0,
-            "587": 302618112.0,
-            "588": 302618112.0,
-            "589": 302618112.0,
-            "590": 302618112.0,
-            "591": 302618112.0,
-            "592": 302618112.0,
-            "593": 302618112.0,
-            "594": 302618112.0,
-            "595": 302618112.0,
-            "596": 302618112.0,
-            "597": 302618112.0,
-            "598": 302618112.0,
-            "599": 302618112.0,
-            "600": 302618112.0,
-            "601": 302618112.0,
-            "602": 302618112.0,
-            "603": 302618112.0,
-            "604": 302618112.0,
-            "605": 302618112.0,
-            "606": 302618112.0,
-            "607": 302618112.0,
-            "608": 302618112.0,
-            "609": 302618112.0,
-            "610": 302618112.0,
-            "611": 302618112.0,
-            "612": 302618112.0,
-            "613": 302618112.0,
-            "614": 302618112.0,
-            "615": 302618112.0,
-            "616": 302618112.0,
-            "617": 302618112.0,
-            "618": 302618112.0,
-            "619": 302618112.0,
-            "620": 302618112.0,
-            "621": 302618112.0,
-            "622": 302618112.0,
-            "623": 302618112.0,
-            "624": 302618112.0,
-            "625": 302618112.0,
-            "626": 302618112.0,
-            "627": 302618112.0,
-            "628": 302618112.0,
-            "629": 302618112.0,
-            "630": 302618112.0,
-            "631": 302618112.0,
-            "632": 302618112.0,
-            "633": 302618112.0,
-            "634": 302618112.0,
-            "635": 302618112.0,
-            "636": 302618112.0,
-            "637": 302618112.0,
-            "638": 302618112.0,
-            "639": 302618112.0,
-            "640": 302618112.0,
-            "641": 302618112.0,
-            "642": 302618112.0,
-            "643": 302618112.0,
-            "644": 302618112.0,
-            "645": 302618112.0,
-            "646": 302618112.0,
-            "647": 302618112.0,
-            "648": 302618112.0,
-            "649": 302618112.0,
-            "650": 302618112.0,
-            "651": 302618112.0,
-            "652": 302618112.0,
-            "653": 302618112.0,
-            "654": 302618112.0,
-            "655": 302618112.0,
-            "656": 302618112.0,
-            "657": 302618112.0,
-            "658": 302618112.0,
-            "659": 302618112.0,
-            "660": 302618112.0,
-            "661": 302618112.0,
-            "662": 302618112.0,
-            "663": 302618112.0,
-            "664": 302618112.0,
-            "665": 302618112.0,
-            "666": 302618112.0,
-            "667": 302618112.0,
-            "668": 302618112.0,
-            "669": 302618112.0,
-            "670": 302618112.0,
-            "671": 302618112.0,
-            "672": 302618112.0,
-            "673": 302618112.0,
-            "674": 302618112.0,
-            "675": 302618112.0,
-            "676": 302618112.0,
-            "677": 302618112.0,
-            "678": 302618112.0,
-            "679": 302618112.0,
-            "680": 302618112.0,
-            "681": 302618112.0,
-            "682": 302618112.0,
-            "683": 302618112.0,
-            "684": 302618112.0,
-            "685": 302618112.0,
-            "686": 302618112.0,
-            "687": 302618112.0,
-            "688": 302618112.0,
-            "689": 302618112.0,
-            "690": 302618112.0,
-            "691": 302618112.0,
-            "692": 302618112.0,
-            "693": 302618112.0,
-            "694": 302618112.0,
-            "695": 302618112.0,
-            "696": 302618112.0,
-            "697": 302618112.0,
-            "698": 302618112.0,
-            "699": 302618112.0,
-            "700": 302618112.0,
-            "701": 302618112.0,
-            "702": 302618112.0,
-            "703": 302618112.0,
-            "704": 302618112.0,
-            "705": 302618112.0,
-            "706": 302618112.0,
-            "707": 302618112.0,
-            "708": 302618112.0,
-            "709": 302618112.0,
-            "710": 302618112.0,
-            "711": 302618112.0,
-            "712": 302618112.0,
-            "713": 302618112.0,
-            "714": 302618112.0,
-            "715": 302618112.0,
-            "716": 302618112.0,
-            "717": 302618112.0,
-            "718": 302618112.0,
-            "719": 302618112.0,
-            "720": 302618112.0,
-            "721": 302618112.0,
-            "722": 302618112.0,
-            "723": 302618112.0,
-            "724": 302618112.0,
-            "725": 302618112.0,
-            "726": 302618112.0,
-            "727": 302618112.0,
-            "728": 302618112.0,
-            "729": 302618112.0,
-            "730": 302618112.0,
-            "731": 302618112.0,
-            "732": 302618112.0,
-            "733": 302618112.0,
-            "734": 302618112.0,
-            "735": 302618112.0,
-            "736": 302618112.0,
-            "737": 302618112.0,
-            "738": 302618112.0,
-            "739": 302618112.0,
-            "740": 302618112.0,
-            "741": 302618112.0,
-            "742": 302618112.0,
-            "743": 302618112.0,
-            "744": 302618112.0,
-            "745": 302618112.0,
-            "746": 302618112.0,
-            "747": 302618112.0,
-            "748": 302618112.0,
-            "749": 302618112.0,
-            "750": 302618112.0,
-            "751": 302618112.0,
-            "752": 302618112.0,
-            "753": 302618112.0,
-            "754": 302618112.0,
-            "755": 302618112.0,
-            "756": 302618112.0,
-            "757": 302618112.0,
-            "758": 302618112.0,
-            "759": 302618112.0,
-            "760": 302618112.0,
-            "761": 302618112.0,
-            "762": 302618112.0,
-            "763": 302618112.0,
-            "764": 302618112.0,
-            "765": 302618112.0,
-            "766": 302618112.0,
-            "767": 302618112.0,
-            "768": 302618112.0,
-            "769": 302618112.0,
-            "770": 302618112.0,
-            "771": 302618112.0,
-            "772": 302618112.0,
-            "773": 302618112.0,
-            "774": 302618112.0,
-            "775": 302618112.0,
-            "776": 302618112.0,
-            "777": 302618112.0,
-            "778": 302618112.0,
-            "779": 302618112.0,
-            "780": 302618112.0,
-            "781": 302618112.0,
-            "782": 302618112.0,
-            "783": 302618112.0,
-            "784": 302618112.0,
-            "785": 302618112.0,
-            "786": 302618112.0,
-            "787": 302618112.0,
-            "788": 302618112.0,
-            "789": 302618112.0,
-            "790": 302618112.0,
-            "791": 302618112.0,
-            "792": 302618112.0,
-            "793": 302618112.0,
-            "794": 302618112.0,
-            "795": 302618112.0,
-            "796": 302618112.0,
-            "797": 302618112.0,
-            "798": 302618112.0,
-            "799": 302618112.0,
-            "800": 302618112.0,
-            "801": 302618112.0,
-            "802": 302618112.0,
-            "803": 302618112.0,
-            "804": 302618112.0,
-            "805": 302618112.0,
-            "806": 302618112.0,
-            "807": 302618112.0,
-            "808": 302618112.0,
-            "809": 302618112.0,
-            "810": 302618112.0,
-            "811": 302618112.0,
-            "812": 302618112.0,
-            "813": 302618112.0,
-            "814": 302618112.0,
-            "815": 302618112.0,
-            "816": 302618112.0,
-            "817": 302618112.0,
-            "818": 302618112.0,
-            "819": 302618112.0,
-            "820": 302618112.0,
-            "821": 302618112.0,
-            "822": 302618112.0,
-            "823": 302618112.0,
-            "824": 302618112.0,
-            "825": 302618112.0,
-            "826": 302618112.0,
-            "827": 302618112.0,
-            "828": 302618112.0,
-            "829": 302618112.0,
-            "830": 302618112.0,
-            "831": 302618112.0,
-            "832": 302618112.0,
-            "833": 302618112.0,
-            "834": 302618112.0,
-            "835": 302618112.0,
-            "836": 302618112.0,
-            "837": 302618112.0,
-            "838": 302618112.0,
-            "839": 302618112.0,
-            "840": 302618112.0,
-            "841": 302618112.0,
-            "842": 302618112.0,
-            "843": 302618112.0,
-            "844": 302618112.0,
-            "845": 302618112.0,
-            "846": 302618112.0,
-            "847": 302618112.0,
-            "848": 302618112.0,
-            "849": 302618112.0,
-            "850": 302618112.0,
-            "851": 302618112.0,
-            "852": 302618112.0,
-            "853": 302618112.0,
-            "854": 302618112.0,
-            "855": 302618112.0,
-            "856": 302618112.0,
-            "857": 302618112.0,
-            "858": 302618112.0,
-            "859": 302618112.0,
-            "860": 302618112.0,
-            "861": 302618112.0,
-            "862": 302618112.0,
-            "863": 302618112.0,
-            "864": 302618112.0,
-            "865": 302618112.0,
-            "866": 302618112.0,
-            "867": 302618112.0,
-            "868": 302618112.0,
-            "869": 302618112.0,
-            "870": 302618112.0,
-            "871": 302618112.0,
-            "872": 302618112.0,
-            "873": 302618112.0,
-            "874": 302618112.0,
-            "875": 302618112.0,
-            "876": 302618112.0,
-            "877": 302618112.0,
-            "878": 302618112.0,
-            "879": 302618112.0,
-            "880": 302618112.0,
-            "881": 302618112.0,
-            "882": 302618112.0,
-            "883": 302618112.0,
-            "884": 302618112.0,
-            "885": 302618112.0,
-            "886": 302618112.0,
-            "887": 302618112.0,
-            "888": 302618112.0,
-            "889": 302618112.0,
-            "890": 302618112.0,
-            "891": 302618112.0,
-            "892": 302618112.0,
-            "893": 302618112.0,
-            "894": 302618112.0,
-            "895": 302618112.0,
-            "896": 302618112.0,
-            "897": 302618112.0,
-            "898": 302618112.0,
-            "899": 302618112.0,
-            "900": 302618112.0,
-            "901": 302618112.0,
-            "902": 302618112.0,
-            "903": 302618112.0,
-            "904": 302618112.0,
-            "905": 302618112.0,
-            "906": 302618112.0,
-            "907": 302618112.0,
-            "908": 302618112.0,
-            "909": 302618112.0,
-            "910": 302618112.0,
-            "911": 302618112.0,
-            "912": 302618112.0,
-            "913": 302618112.0,
-            "914": 302618112.0,
-            "915": 302618112.0,
-            "916": 302618112.0,
-            "917": 302618112.0,
-            "918": 302618112.0,
-            "919": 302618112.0,
-            "920": 302618112.0,
-            "921": 302618112.0,
-            "922": 302618112.0,
-            "923": 302618112.0,
-            "924": 302618112.0,
-            "925": 302618112.0,
-            "926": 302618112.0,
-            "927": 302618112.0,
-            "928": 302618112.0,
-            "929": 302618112.0,
-            "930": 302618112.0,
-            "931": 302618112.0,
-            "932": 302618112.0,
-            "933": 302618112.0,
-            "934": 302618112.0,
-            "935": 302618112.0,
-            "936": 302618112.0,
-            "937": 302618112.0,
-            "938": 302618112.0,
-            "939": 302618112.0,
-            "940": 302618112.0,
-            "941": 302618112.0,
-            "942": 302618112.0,
-            "943": 302618112.0,
-            "944": 302618112.0,
-            "945": 302618112.0,
-            "946": 302618112.0,
-            "947": 302618112.0,
-            "948": 302618112.0,
-            "949": 302618112.0,
-            "950": 302618112.0,
-            "951": 302618112.0,
-            "952": 302618112.0,
-            "953": 302618112.0,
-            "954": 302618112.0,
-            "955": 302618112.0,
-            "956": 302618112.0,
-            "957": 302618112.0,
-            "958": 302618112.0,
-            "959": 302618112.0,
-            "960": 302618112.0,
-            "961": 302618112.0,
-            "962": 302618112.0,
-            "963": 302618112.0,
-            "964": 302618112.0,
-            "965": 302618112.0,
-            "966": 302618112.0,
-            "967": 302618112.0,
-            "968": 302618112.0,
-            "969": 302618112.0,
-            "970": 302618112.0,
-            "971": 302618112.0,
-            "972": 302618112.0,
-            "973": 302618112.0,
-            "974": 302618112.0,
-            "975": 302618112.0,
-            "976": 302618112.0,
-            "977": 302618112.0,
-            "978": 302618112.0,
-            "979": 302618112.0,
-            "980": 302618112.0,
-            "981": 302618112.0,
-            "982": 302618112.0,
-            "983": 302618112.0,
-            "984": 302618112.0,
-            "985": 302618112.0,
-            "986": 302618112.0,
-            "987": 302618112.0,
-            "988": 302618112.0,
-            "989": 302618112.0,
-            "990": 302618112.0,
-            "991": 302618112.0,
-            "992": 302618112.0,
-            "993": 302618112.0,
-            "994": 302618112.0,
-            "995": 302618112.0,
-            "996": 302618112.0,
-            "997": 302618112.0,
-            "998": 302618112.0,
-            "999": 302618112.0,
-            "1000": 302618112.0,
-            "1001": 302618112.0,
-            "1002": 302618112.0,
-            "1003": 302618112.0,
-            "1004": 302618112.0,
-            "1005": 302618112.0,
-            "1006": 302618112.0,
-            "1007": 302618112.0,
-            "1008": 302618112.0,
-            "1009": 302618112.0,
-            "1010": 302618112.0,
-            "1011": 302618112.0,
-            "1012": 302618112.0,
-            "1013": 302618112.0,
-            "1014": 302618112.0,
-            "1015": 302618112.0,
-            "1016": 302618112.0,
-            "1017": 302618112.0,
-            "1018": 302618112.0,
-            "1019": 302618112.0,
-            "1020": 302618112.0,
-            "1021": 302618112.0,
-            "1022": 302618112.0,
-            "1023": 302618112.0,
-            "1024": 302618112.0,
-            "1025": 302618112.0,
-            "1026": 302618112.0,
-            "1027": 302618112.0,
-            "1028": 302618112.0,
-            "1029": 302618112.0,
-            "1030": 302618112.0,
-            "1031": 302618112.0,
-            "1032": 302618112.0,
-            "1033": 302618112.0,
-            "1034": 302618112.0,
-            "1035": 302618112.0,
-            "1036": 302618112.0,
-            "1037": 302618112.0,
-            "1038": 302618112.0,
-            "1039": 302618112.0,
-            "1040": 302618112.0,
-            "1041": 302618112.0,
-            "1042": 302618112.0,
-            "1043": 302618112.0,
-            "1044": 302618112.0,
-            "1045": 302618112.0,
-            "1046": 302618112.0,
-            "1047": 302618112.0,
-            "1048": 302618112.0,
-            "1049": 302618112.0,
-            "1050": 302618112.0,
-            "1051": 302618112.0,
-            "1052": 302618112.0,
-            "1053": 302618112.0,
-            "1054": 302618112.0,
-            "1055": 302618112.0,
-            "1056": 302618112.0,
-            "1057": 302618112.0,
-            "1058": 302618112.0,
-            "1059": 302618112.0,
-            "1060": 302618112.0,
-            "1061": 302618112.0,
-            "1062": 302618112.0,
-            "1063": 302618112.0,
-            "1064": 302618112.0,
-            "1065": 302618112.0,
-            "1066": 302618112.0,
-            "1067": 302618112.0,
-            "1068": 302618112.0,
-            "1069": 302618112.0,
-            "1070": 302618112.0,
-            "1071": 302618112.0,
-            "1072": 302618112.0,
-            "1073": 302618112.0,
-            "1074": 302618112.0,
-            "1075": 302618112.0,
-            "1076": 302618112.0,
-            "1077": 302618112.0,
-            "1078": 302618112.0,
-            "1079": 302618112.0,
-            "1080": 302618112.0,
-            "1081": 302618112.0,
-            "1082": 302618112.0,
-            "1083": 302618112.0,
-            "1084": 302618112.0,
-            "1085": 302618112.0,
-            "1086": 302618112.0,
-            "1087": 302618112.0,
-            "1088": 302618112.0,
-            "1089": 302618112.0,
-            "1090": 302618112.0,
-            "1091": 302618112.0,
-            "1092": 302618112.0,
-            "1093": 302618112.0,
-            "1094": 302618112.0,
-            "1095": 302618112.0,
-            "1096": 302618112.0,
-            "1097": 302618112.0,
-            "1098": 302618112.0,
-            "1099": 302618112.0,
-            "1100": 302618112.0,
-            "1101": 302618112.0,
-            "1102": 302618112.0,
-            "1103": 302618112.0,
-            "1104": 302618112.0,
-            "1105": 302618112.0,
-            "1106": 302618112.0,
-            "1107": 302618112.0,
-            "1108": 302618112.0,
-            "1109": 302618112.0,
-            "1110": 302618112.0,
-            "1111": 302618112.0,
-            "1112": 302618112.0,
-            "1113": 302618112.0,
-            "1114": 302618112.0,
-            "1115": 302618112.0,
-            "1116": 302618112.0,
-            "1117": 302618112.0,
-            "1118": 302618112.0,
-            "1119": 302618112.0,
-            "1120": 302618112.0,
-            "1121": 302618112.0,
-            "1122": 302618112.0,
-            "1123": 302618112.0,
-            "1124": 302618112.0,
-            "1125": 302618112.0,
-            "1126": 302618112.0,
-            "1127": 302618112.0,
-            "1128": 302618112.0,
-            "1129": 302618112.0,
-            "1130": 302618112.0,
-            "1131": 302618112.0,
-            "1132": 302618112.0,
-            "1133": 302618112.0,
-            "1134": 302618112.0,
-            "1135": 302618112.0,
-            "1136": 302618112.0,
-            "1137": 302618112.0,
-            "1138": 302618112.0,
-            "1139": 302618112.0,
-            "1140": 302618112.0,
-            "1141": 302618112.0,
-            "1142": 302618112.0,
-            "1143": 302618112.0,
-            "1144": 302618112.0,
-            "1145": 302618112.0,
-            "1146": 302618112.0,
-            "1147": 302618112.0,
-            "1148": 302618112.0,
-            "1149": 302618112.0,
-            "1150": 302618112.0,
-            "1151": 302618112.0,
-            "1152": 302618112.0,
-            "1153": 302618112.0,
-            "1154": 302618112.0,
-            "1155": 302618112.0,
-            "1156": 302618112.0,
-            "1157": 302618112.0,
-            "1158": 302618112.0,
-            "1159": 302618112.0,
-            "1160": 302618112.0,
-            "1161": 302618112.0,
-            "1162": 302618112.0,
-            "1163": 302618112.0,
-            "1164": 302618112.0,
-            "1165": 302618112.0,
-            "1166": 302618112.0,
-            "1167": 302618112.0,
-            "1168": 302618112.0,
-            "1169": 302618112.0,
-            "1170": 302618112.0,
-            "1171": 302618112.0,
-            "1172": 302618112.0,
-            "1173": 302618112.0,
-            "1174": 302618112.0,
-            "1175": 302618112.0,
-            "1176": 302618112.0,
-            "1177": 302618112.0,
-            "1178": 302618112.0,
-            "1179": 302618112.0,
-            "1180": 302618112.0,
-            "1181": 302618112.0,
-            "1182": 302618112.0,
-            "1183": 302618112.0,
-            "1184": 302618112.0,
-            "1185": 302618112.0,
-            "1186": 302618112.0,
-            "1187": 302618112.0,
-            "1188": 302618112.0,
-            "1189": 302618112.0,
-            "1190": 302618112.0,
-            "1191": 302618112.0,
-            "1192": 302618112.0,
-            "1193": 302618112.0,
-            "1194": 302618112.0,
-            "1195": 302618112.0,
-            "1196": 302618112.0,
-            "1197": 302618112.0,
-            "1198": 302618112.0,
-            "1199": 302618112.0,
-            "1200": 302618112.0,
-            "1201": 302618112.0,
-            "1202": 302618112.0,
-            "1203": 302618112.0,
-            "1204": 302618112.0,
-            "1205": 302618112.0,
-            "1206": 302618112.0,
-            "1207": 302618112.0,
-            "1208": 302618112.0,
-            "1209": 302618112.0,
-            "1210": 302618112.0,
-            "1211": 302618112.0,
-            "1212": 302618112.0,
-            "1213": 302618112.0,
-            "1214": 302618112.0,
-            "1215": 302618112.0,
-            "1216": 302618112.0,
-            "1217": 302618112.0,
-            "1218": 302618112.0,
-            "1219": 302618112.0,
-            "1220": 302618112.0,
-            "1221": 302618112.0,
-            "1222": 302618112.0,
-            "1223": 302618112.0,
-            "1224": 302618112.0,
-            "1225": 302618112.0,
-            "1226": 302618112.0,
-            "1227": 302618112.0,
-            "1228": 302618112.0,
-            "1229": 302618112.0,
-            "1230": 302618112.0,
-            "1231": 302618112.0,
-            "1232": 302618112.0,
-            "1233": 302618112.0,
-            "1234": 302618112.0,
-            "1235": 302618112.0,
-            "1236": 302618112.0,
-            "1237": 302618112.0,
-            "1238": 302618112.0,
-            "1239": 302618112.0,
-            "1240": 302618112.0,
-            "1241": 302618112.0,
-            "1242": 302618112.0,
-            "1243": 302618112.0,
-            "1244": 302618112.0,
-            "1245": 302618112.0,
-            "1246": 302618112.0,
-            "1247": 302618112.0,
-            "1248": 302618112.0,
-            "1249": 302618112.0,
-            "1250": 302618112.0,
-            "1251": 302618112.0,
-            "1252": 302618112.0,
-            "1253": 302618112.0,
-            "1254": 302618112.0,
-            "1255": 302618112.0,
-            "1256": 302618112.0,
-            "1257": 302618112.0,
-            "1258": 302618112.0,
-            "1259": 302618112.0,
-            "1260": 302618112.0,
-            "1261": 302618112.0,
-            "1262": 302618112.0,
-            "1263": 302618112.0,
-            "1264": 302618112.0,
-            "1265": 302618112.0,
-            "1266": 302618112.0,
-            "1267": 302618112.0,
-            "1268": 302618112.0,
-            "1269": 302618112.0,
-            "1270": 302618112.0,
-            "1271": 302618112.0,
-            "1272": 302618112.0,
-            "1273": 302618112.0,
-            "1274": 302618112.0,
-            "1275": 302618112.0,
-            "1276": 302618112.0,
-            "1277": 302618112.0,
-            "1278": 302618112.0,
-            "1279": 302618112.0,
-            "1280": 302618112.0,
-            "1281": 302618112.0,
-            "1282": 302618112.0,
-            "1283": 302618112.0,
-            "1284": 302618112.0,
-            "1285": 302618112.0,
-            "1286": 302618112.0,
-            "1287": 302618112.0,
-            "1288": 302618112.0,
-            "1289": 302618112.0,
-            "1290": 302618112.0,
-            "1291": 302618112.0,
-            "1292": 302618112.0,
-            "1293": 302618112.0,
-            "1294": 302618112.0,
-            "1295": 302618112.0,
-            "1296": 302618112.0,
-            "1297": 302618112.0,
-            "1298": 302618112.0,
-            "1299": 302618112.0,
-            "1300": 302618112.0,
-            "1301": 302618112.0,
-            "1302": 302618112.0,
-            "1303": 302618112.0,
-            "1304": 302618112.0,
-            "1305": 302618112.0,
-            "1306": 302618112.0,
-            "1307": 302618112.0,
-            "1308": 302618112.0,
-            "1309": 302618112.0,
-            "1310": 302618112.0,
-            "1311": 302618112.0,
-            "1312": 302618112.0,
-            "1313": 302618112.0,
-            "1314": 302618112.0,
-            "1315": 302618112.0,
-            "1316": 302618112.0,
-            "1317": 302618112.0,
-            "1318": 302618112.0,
-            "1319": 302618112.0,
-            "1320": 302618112.0,
-            "1321": 302618112.0,
-            "1322": 302618112.0,
-            "1323": 302618112.0,
-            "1324": 302618112.0,
-            "1325": 302618112.0,
-            "1326": 302618112.0,
-            "1327": 302618112.0,
-            "1328": 302618112.0,
-            "1329": 302618112.0,
-            "1330": 302618112.0,
-            "1331": 302618112.0,
-            "1332": 302618112.0,
-            "1333": 302618112.0,
-            "1334": 302618112.0,
-            "1335": 302618112.0,
-            "1336": 302618112.0,
-            "1337": 302618112.0,
-            "1338": 302618112.0,
-            "1339": 302618112.0,
-            "1340": 302618112.0,
-            "1341": 302618112.0,
-            "1342": 302618112.0,
-            "1343": 302618112.0,
-            "1344": 302618112.0,
-            "1345": 302618112.0,
-            "1346": 302618112.0,
-            "1347": 302618112.0,
-            "1348": 302618112.0,
-            "1349": 302618112.0,
-            "1350": 302618112.0,
-            "1351": 302618112.0,
-            "1352": 302618112.0,
-            "1353": 302618112.0,
-            "1354": 302618112.0,
-            "1355": 302618112.0,
-            "1356": 302618112.0,
-            "1357": 302618112.0,
-            "1358": 302618112.0,
-            "1359": 302618112.0,
-            "1360": 302618112.0,
-            "1361": 302618112.0,
-            "1362": 302618112.0,
-            "1363": 302618112.0,
-            "1364": 302618112.0,
-            "1365": 302618112.0,
-            "1366": 302618112.0,
-            "1367": 302618112.0,
-            "1368": 302618112.0,
-            "1369": 302618112.0,
-            "1370": 302618112.0,
-            "1371": 302618112.0,
-            "1372": 302618112.0,
-            "1373": 302618112.0,
-            "1374": 302618112.0,
-            "1375": 302618112.0,
-            "1376": 302618112.0,
-            "1377": 302618112.0,
-            "1378": 302618112.0,
-            "1379": 302618112.0,
-            "1380": 302618112.0,
-            "1381": 302618112.0,
-            "1382": 302618112.0,
-            "1383": 302618112.0,
-            "1384": 302618112.0,
-            "1385": 302618112.0,
-            "1386": 302618112.0,
-            "1387": 302618112.0,
-            "1388": 302618112.0,
-            "1389": 302618112.0,
-            "1390": 302618112.0,
-            "1391": 302618112.0,
-            "1392": 302618112.0,
-            "1393": 302618112.0,
-            "1394": 302618112.0,
-            "1395": 302618112.0,
-            "1396": 302618112.0,
-            "1397": 302618112.0,
-            "1398": 302618112.0,
-            "1399": 302618112.0,
-            "1400": 302618112.0,
-            "1401": 302618112.0,
-            "1402": 302618112.0,
-            "1403": 302618112.0,
-            "1404": 302618112.0,
-            "1405": 302618112.0,
-            "1406": 302618112.0,
-            "1407": 302618112.0,
-            "1408": 302618112.0,
-            "1409": 302618112.0,
-            "1410": 302618112.0,
-            "1411": 302618112.0,
-            "1412": 302618112.0,
-            "1413": 302618112.0,
-            "1414": 302618112.0,
-            "1415": 302618112.0,
-            "1416": 302618112.0,
-            "1417": 302618112.0,
-            "1418": 302618112.0,
-            "1419": 302618112.0,
-            "1420": 302618112.0,
-            "1421": 302618112.0,
-            "1422": 302618112.0,
-            "1423": 302618112.0,
-            "1424": 302618112.0,
-            "1425": 302618112.0,
-            "1426": 302618112.0,
-            "1427": 302618112.0,
-            "1428": 302618112.0,
-            "1429": 302618112.0,
-            "1430": 302618112.0,
-            "1431": 302618112.0,
-            "1432": 302618112.0,
-            "1433": 302618112.0,
-            "1434": 302618112.0,
-            "1435": 302618112.0,
-            "1436": 302618112.0,
-            "1437": 302618112.0,
-            "1438": 302618112.0,
-            "1439": 302618112.0,
-            "1440": 302618112.0,
-            "1441": 302618112.0,
-            "1442": 302618112.0,
-            "1443": 302618112.0,
-            "1444": 302618112.0,
-            "1445": 302618112.0,
-            "1446": 302618112.0,
-            "1447": 302618112.0,
-            "1448": 302618112.0,
-            "1449": 302618112.0,
-            "1450": 302618112.0,
-            "1451": 302618112.0,
-            "1452": 302618112.0,
-            "1453": 302618112.0,
-            "1454": 302618112.0,
-            "1455": 302618112.0,
-            "1456": 302618112.0,
-            "1457": 302618112.0,
-            "1458": 302618112.0,
-            "1459": 302618112.0,
-            "1460": 302618112.0,
-            "1461": 302618112.0,
-            "1462": 302618112.0,
-            "1463": 302618112.0,
-            "1464": 302618112.0,
-            "1465": 302618112.0,
-            "1466": 302618112.0,
-            "1467": 302618112.0,
-            "1468": 302618112.0,
-            "1469": 302618112.0,
-            "1470": 302618112.0,
-            "1471": 302618112.0,
-            "1472": 302618112.0,
-            "1473": 302618112.0,
-            "1474": 302618112.0,
-            "1475": 302618112.0,
-            "1476": 302618112.0,
-            "1477": 302618112.0,
-            "1478": 302618112.0,
-            "1479": 302618112.0,
-            "1480": 302618112.0,
-            "1481": 302618112.0,
-            "1482": 302618112.0,
-            "1483": 302618112.0,
-            "1484": 302618112.0,
-            "1485": 302618112.0,
-            "1486": 302618112.0,
-            "1487": 302618112.0,
-            "1488": 302618112.0,
-            "1489": 302618112.0,
-            "1490": 302618112.0,
-            "1491": 302618112.0,
-            "1492": 302618112.0,
-            "1493": 302618112.0,
-            "1494": 302618112.0,
-            "1495": 302618112.0,
-            "1496": 302618112.0,
-            "1497": 302618112.0,
-            "1498": 302618112.0,
-            "1499": 302618112.0,
-            "1500": 302618112.0,
-            "1501": 302618112.0,
-            "1502": 302618112.0,
-            "1503": 302618112.0,
-            "1504": 302618112.0,
-            "1505": 302618112.0,
-            "1506": 302618112.0,
-            "1507": 302618112.0,
-            "1508": 302618112.0,
-            "1509": 302618112.0,
-            "1510": 302618112.0,
-            "1511": 302618112.0,
-            "1512": 302618112.0,
-            "1513": 302618112.0,
-            "1514": 302618112.0,
-            "1515": 302618112.0,
-            "1516": 302618112.0,
-            "1517": 302618112.0,
-            "1518": 302618112.0,
-            "1519": 302618112.0,
-            "1520": 302618112.0,
-            "1521": 302618112.0,
-            "1522": 302618112.0,
-            "1523": 302618112.0,
-            "1524": 302618112.0,
-            "1525": 302618112.0,
-            "1526": 302618112.0,
-            "1527": 302618112.0,
-            "1528": 302618112.0,
-            "1529": 302618112.0,
-            "1530": 302618112.0,
-            "1531": 302618112.0,
-            "1532": 302618112.0,
-            "1533": 302618112.0,
-            "1534": 302618112.0,
-            "1535": 302618112.0,
-            "1536": 302618112.0,
-            "1537": 302618112.0,
-            "1538": 302618112.0,
-            "1539": 302618112.0,
-            "1540": 302618112.0,
-            "1541": 302618112.0,
-            "1542": 302618112.0,
-            "1543": 302618112.0,
-            "1544": 302618112.0,
-            "1545": 302618112.0,
-            "1546": 302618112.0,
-            "1547": 302618112.0,
-            "1548": 302618112.0,
-            "1549": 302618112.0,
-            "1550": 302618112.0,
-            "1551": 302618112.0,
-            "1552": 302618112.0,
-            "1553": 302618112.0,
-            "1554": 302618112.0,
-            "1555": 302618112.0,
-            "1556": 302618112.0,
-            "1557": 302618112.0,
-            "1558": 302618112.0,
-            "1559": 302618112.0,
-            "1560": 302618112.0,
-            "1561": 302618112.0,
-            "1562": 302618112.0,
-            "1563": 302618112.0,
-            "1564": 302618112.0,
-            "1565": 302618112.0,
-            "1566": 302618112.0,
-            "1567": 302618112.0,
-            "1568": 302618112.0,
-            "1569": 302618112.0,
-            "1570": 302618112.0,
-            "1571": 302618112.0,
-            "1572": 302618112.0,
-            "1573": 302618112.0,
-            "1574": 302618112.0,
-            "1575": 302618112.0,
-            "1576": 302618112.0,
-            "1577": 302618112.0,
-            "1578": 302618112.0,
-            "1579": 302618112.0,
-            "1580": 302618112.0,
-            "1581": 302618112.0,
-            "1582": 302618112.0,
-            "1583": 302618112.0,
-            "1584": 302618112.0,
-            "1585": 302618112.0,
-            "1586": 302618112.0,
-            "1587": 302618112.0,
-            "1588": 302618112.0,
-            "1589": 302618112.0,
-            "1590": 302618112.0,
-            "1591": 302618112.0,
-            "1592": 302618112.0,
-            "1593": 302618112.0,
-            "1594": 302618112.0,
-            "1595": 302618112.0,
-            "1596": 302618112.0,
-            "1597": 302618112.0,
-            "1598": 302618112.0,
-            "1599": 302618112.0,
-            "1600": 302618112.0,
-            "1601": 302618112.0,
-            "1602": 302618112.0,
-            "1603": 302618112.0,
-            "1604": 302618112.0,
-            "1605": 302618112.0,
-            "1606": 302618112.0,
-            "1607": 302618112.0,
-            "1608": 302618112.0,
-            "1609": 302618112.0,
-            "1610": 302618112.0,
-            "1611": 302618112.0,
-            "1612": 302618112.0,
-            "1613": 302618112.0,
-            "1614": 302618112.0,
-            "1615": 302618112.0,
-            "1616": 302618112.0,
-            "1617": 302618112.0,
-            "1618": 302618112.0,
-            "1619": 302618112.0,
-            "1620": 302618112.0,
-            "1621": 302618112.0,
-            "1622": 302618112.0,
-            "1623": 302618112.0,
-            "1624": 302618112.0,
-            "1625": 302618112.0,
-            "1626": 302618112.0,
-            "1627": 302618112.0,
-            "1628": 302618112.0,
-            "1629": 302618112.0,
-            "1630": 302618112.0,
-            "1631": 302618112.0,
-            "1632": 302618112.0,
-            "1633": 302618112.0,
-            "1634": 302618112.0,
-            "1635": 302618112.0,
-            "1636": 302618112.0,
-            "1637": 302618112.0,
-            "1638": 302618112.0,
-            "1639": 302618112.0,
-            "1640": 302618112.0,
-            "1641": 302618112.0,
-            "1642": 302618112.0,
-            "1643": 302618112.0,
-            "1644": 302618112.0,
-            "1645": 302618112.0,
-            "1646": 302618112.0,
-            "1647": 302618112.0,
-            "1648": 302618112.0,
-            "1649": 302618112.0,
-            "1650": 302618112.0,
-            "1651": 302618112.0,
-            "1652": 302618112.0,
-            "1653": 302618112.0,
-            "1654": 302618112.0,
-            "1655": 302618112.0,
-            "1656": 302618112.0,
-            "1657": 302618112.0,
-            "1658": 302618112.0,
-            "1659": 302618112.0,
-            "1660": 302618112.0,
-            "1661": 302618112.0,
-            "1662": 302618112.0,
-            "1663": 302618112.0,
-            "1664": 302618112.0,
-            "1665": 302618112.0,
-            "1666": 302618112.0,
-            "1667": 302618112.0,
-            "1668": 302618112.0,
-            "1669": 302618112.0,
-            "1670": 302618112.0,
-            "1671": 302618112.0,
-            "1672": 302618112.0,
-            "1673": 302618112.0,
-            "1674": 302618112.0,
-            "1675": 302618112.0,
-            "1676": 302618112.0,
-            "1677": 302618112.0,
-            "1678": 302618112.0,
-            "1679": 302618112.0,
-            "1680": 302618112.0,
-            "1681": 302618112.0,
-            "1682": 302618112.0,
-            "1683": 302618112.0,
-            "1684": 302618112.0,
-            "1685": 302618112.0,
-            "1686": 302618112.0,
-            "1687": 302618112.0,
-            "1688": 302618112.0,
-            "1689": 302618112.0,
-            "1690": 302618112.0,
-            "1691": 302618112.0,
-            "1692": 302618112.0,
-            "1693": 302618112.0,
-            "1694": 302618112.0,
-            "1695": 302618112.0,
-            "1696": 302618112.0,
-            "1697": 302618112.0,
-            "1698": 302618112.0,
-            "1699": 302618112.0,
-            "1700": 302618112.0,
-            "1701": 302618112.0,
-            "1702": 302618112.0,
-            "1703": 302618112.0,
-            "1704": 302618112.0,
-            "1705": 302618112.0,
-            "1706": 302618112.0,
-            "1707": 302618112.0,
-            "1708": 302618112.0,
-            "1709": 302618112.0,
-            "1710": 302618112.0,
-            "1711": 302618112.0,
-            "1712": 302618112.0,
-            "1713": 302618112.0,
-            "1714": 302618112.0,
-            "1715": 302618112.0,
-            "1716": 302618112.0,
-            "1717": 302618112.0,
-            "1718": 302618112.0,
-            "1719": 302618112.0,
-            "1720": 302618112.0,
-            "1721": 302618112.0,
-            "1722": 302618112.0,
-            "1723": 302618112.0,
-            "1724": 302618112.0,
-            "1725": 302618112.0,
-            "1726": 302618112.0,
-            "1727": 302618112.0,
-            "1728": 302618112.0,
-            "1729": 302618112.0,
-            "1730": 302618112.0,
-            "1731": 302618112.0,
-            "1732": 302618112.0,
-            "1733": 302618112.0,
-            "1734": 302618112.0,
-            "1735": 302618112.0,
-            "1736": 302618112.0,
-            "1737": 302618112.0,
-            "1738": 302618112.0,
-            "1739": 302618112.0,
-            "1740": 302618112.0,
-            "1741": 302618112.0,
-            "1742": 302618112.0,
-            "1743": 302618112.0,
-            "1744": 302618112.0,
-            "1745": 302618112.0,
-            "1746": 302618112.0,
-            "1747": 302618112.0,
-            "1748": 302618112.0,
-            "1749": 302618112.0,
-            "1750": 302618112.0,
-            "1751": 302618112.0,
-            "1752": 302618112.0,
-            "1753": 302618112.0,
-            "1754": 302618112.0,
-            "1755": 302618112.0,
-            "1756": 302618112.0,
-            "1757": 302618112.0,
-            "1758": 302618112.0,
-            "1759": 302618112.0,
-            "1760": 302618112.0,
-            "1761": 302618112.0,
-            "1762": 302618112.0,
-            "1763": 302618112.0,
-            "1764": 302618112.0,
-            "1765": 302618112.0,
-            "1766": 302618112.0,
-            "1767": 302618112.0,
-            "1768": 302618112.0,
-            "1769": 302618112.0,
-            "1770": 302618112.0,
-            "1771": 302618112.0,
-            "1772": 302618112.0,
-            "1773": 302618112.0,
-            "1774": 302618112.0,
-            "1775": 302618112.0,
-            "1776": 302618112.0,
-            "1777": 302618112.0,
-            "1778": 302618112.0,
-            "1779": 302618112.0,
-            "1780": 302618112.0,
-            "1781": 302618112.0,
-            "1782": 302618112.0,
-            "1783": 302618112.0,
-            "1784": 302618112.0,
-            "1785": 302618112.0,
-            "1786": 302618112.0,
-            "1787": 302618112.0,
-            "1788": 302618112.0,
-            "1789": 302618112.0,
-            "1790": 302618112.0,
-            "1791": 302618112.0,
-            "1792": 302618112.0,
-            "1793": 302618112.0,
-            "1794": 302618112.0,
-            "1795": 302618112.0,
-            "1796": 302618112.0,
-            "1797": 302618112.0,
-            "1798": 302618112.0,
-            "1799": 302618112.0,
-            "1800": 302618112.0,
-            "1801": 302618112.0,
-            "1802": 302618112.0,
-            "1803": 302618112.0,
-            "1804": 302618112.0,
-            "1805": 302618112.0,
-            "1806": 302618112.0,
-            "1807": 302618112.0,
-            "1808": 302618112.0,
-            "1809": 302618112.0,
-            "1810": 302618112.0,
-            "1811": 302618112.0,
-            "1812": 302618112.0,
-            "1813": 302618112.0,
-            "1814": 302618112.0,
-            "1815": 302618112.0,
-            "1816": 302618112.0,
-            "1817": 302618112.0,
-            "1818": 302618112.0,
-            "1819": 302618112.0,
-            "1820": 302618112.0,
-            "1821": 302618112.0,
-            "1822": 302618112.0,
-            "1823": 302618112.0,
-            "1824": 302618112.0,
-            "1825": 302618112.0,
-            "1826": 302618112.0,
-            "1827": 302618112.0,
-            "1828": 302618112.0,
-            "1829": 302618112.0,
-            "1830": 302618112.0,
-            "1831": 302618112.0,
-            "1832": 302618112.0,
-            "1833": 302618112.0,
-            "1834": 302618112.0,
-            "1835": 302618112.0,
-            "1836": 302618112.0,
-            "1837": 302618112.0,
-            "1838": 302618112.0,
-            "1839": 302618112.0,
-            "1840": 302618112.0,
-            "1841": 302618112.0,
-            "1842": 302618112.0,
-            "1843": 302618112.0,
-            "1844": 302618112.0,
-            "1845": 302618112.0,
-            "1846": 302618112.0,
-            "1847": 302618112.0,
-            "1848": 302618112.0,
-            "1849": 302618112.0,
-            "1850": 302618112.0,
-            "1851": 302618112.0,
-            "1852": 302618112.0,
-            "1853": 302618112.0,
-            "1854": 302618112.0,
-            "1855": 302618112.0,
-            "1856": 302618112.0,
-            "1857": 302618112.0,
-            "1858": 302618112.0,
-            "1859": 302618112.0,
-            "1860": 302618112.0,
-            "1861": 302618112.0,
-            "1862": 302618112.0,
-            "1863": 302618112.0,
-            "1864": 302618112.0,
-            "1865": 302618112.0,
-            "1866": 302618112.0,
-            "1867": 302618112.0,
-            "1868": 302618112.0,
-            "1869": 302618112.0,
-            "1870": 302618112.0,
-            "1871": 302618112.0,
-            "1872": 302618112.0,
-            "1873": 302618112.0,
-            "1874": 302618112.0,
-            "1875": 302618112.0,
-            "1876": 302618112.0,
-            "1877": 302618112.0,
-            "1878": 302618112.0,
-            "1879": 302618112.0,
-            "1880": 302618112.0,
-            "1881": 302618112.0,
-            "1882": 302618112.0,
-            "1883": 302618112.0,
-            "1884": 302618112.0,
-            "1885": 302618112.0,
-            "1886": 302618112.0,
-            "1887": 302618112.0,
-            "1888": 302618112.0,
-            "1889": 302618112.0,
-            "1890": 302618112.0,
-            "1891": 302618112.0,
-            "1892": 302618112.0,
-            "1893": 302618112.0,
-            "1894": 302618112.0,
-            "1895": 302618112.0,
-            "1896": 302618112.0,
-            "1897": 302618112.0,
-            "1898": 302618112.0,
-            "1899": 302618112.0,
-            "1900": 302618112.0,
-            "1901": 302618112.0,
-            "1902": 302618112.0,
-            "1903": 302618112.0,
-            "1904": 302618112.0,
-            "1905": 302618112.0,
-            "1906": 302618112.0,
-            "1907": 302618112.0,
-            "1908": 302618112.0,
-            "1909": 302618112.0,
-            "1910": 302618112.0,
-            "1911": 302618112.0,
-            "1912": 302618112.0,
-            "1913": 302618112.0,
-            "1914": 302618112.0,
-            "1915": 302618112.0,
-            "1916": 302618112.0,
-            "1917": 302618112.0,
-            "1918": 302618112.0,
-            "1919": 302618112.0,
-            "1920": 302618112.0,
-            "1921": 302618112.0,
-            "1922": 302618112.0,
-            "1923": 302618112.0,
-            "1924": 302618112.0,
-            "1925": 302618112.0,
-            "1926": 302618112.0,
-            "1927": 302618112.0,
-            "1928": 302618112.0,
-            "1929": 302618112.0,
-            "1930": 302618112.0,
-            "1931": 302618112.0,
-            "1932": 302618112.0,
-            "1933": 302618112.0,
-            "1934": 302618112.0,
-            "1935": 302618112.0,
-            "1936": 302618112.0,
-            "1937": 302618112.0,
-            "1938": 302618112.0,
-            "1939": 302618112.0,
-            "1940": 302618112.0,
-            "1941": 302618112.0,
-            "1942": 302618112.0,
-            "1943": 302618112.0,
-            "1944": 302618112.0,
-            "1945": 302618112.0,
-            "1946": 302618112.0,
-            "1947": 302618112.0,
-            "1948": 302618112.0,
-            "1949": 302618112.0,
-            "1950": 302618112.0,
-            "1951": 302618112.0,
-            "1952": 302618112.0,
-            "1953": 302618112.0,
-            "1954": 302618112.0,
-            "1955": 302618112.0,
-            "1956": 302618112.0,
-            "1957": 302618112.0,
-            "1958": 302618112.0,
-            "1959": 302618112.0,
-            "1960": 302618112.0,
-            "1961": 302618112.0,
-            "1962": 302618112.0,
-            "1963": 302618112.0,
-            "1964": 302618112.0,
-            "1965": 302618112.0,
-            "1966": 302618112.0,
-            "1967": 302618112.0,
-            "1968": 302618112.0,
-            "1969": 302618112.0,
-            "1970": 302618112.0,
-            "1971": 302618112.0,
-            "1972": 302618112.0,
-            "1973": 302618112.0,
-            "1974": 302618112.0,
-            "1975": 302618112.0,
-            "1976": 302618112.0,
-            "1977": 302618112.0,
-            "1978": 302618112.0,
-            "1979": 302618112.0,
-            "1980": 302618112.0,
-            "1981": 302618112.0,
-            "1982": 302618112.0,
-            "1983": 302618112.0,
-            "1984": 302618112.0,
-            "1985": 302618112.0,
-            "1986": 302618112.0,
-            "1987": 302618112.0,
-            "1988": 302618112.0,
-            "1989": 302618112.0,
-            "1990": 302618112.0,
-            "1991": 302618112.0,
-            "1992": 302618112.0,
-            "1993": 302618112.0,
-            "1994": 302618112.0,
-            "1995": 302618112.0,
-            "1996": 302618112.0,
-            "1997": 302618112.0,
-            "1998": 302618112.0,
-            "1999": 302618112.0,
-            "2000": 302618112.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 2000,
-        "step_interval": 1,
-        "values": {
-            "1": 362060288.0,
-            "2": 428612096.0,
-            "3": 428612096.0,
-            "4": 428612096.0,
-            "5": 428612096.0,
-            "6": 428612096.0,
-            "7": 428612096.0,
-            "8": 428612096.0,
-            "9": 428612096.0,
-            "10": 428612096.0,
-            "11": 428612096.0,
-            "12": 428612096.0,
-            "13": 428612096.0,
-            "14": 428612096.0,
-            "15": 428612096.0,
-            "16": 428612096.0,
-            "17": 428612096.0,
-            "18": 428612096.0,
-            "19": 428612096.0,
-            "20": 428612096.0,
-            "21": 428612096.0,
-            "22": 428612096.0,
-            "23": 428612096.0,
-            "24": 428612096.0,
-            "25": 428612096.0,
-            "26": 428612096.0,
-            "27": 428612096.0,
-            "28": 428612096.0,
-            "29": 428612096.0,
-            "30": 428612096.0,
-            "31": 428612096.0,
-            "32": 428612096.0,
-            "33": 428612096.0,
-            "34": 428612096.0,
-            "35": 428612096.0,
-            "36": 428612096.0,
-            "37": 428612096.0,
-            "38": 428612096.0,
-            "39": 428612096.0,
-            "40": 428612096.0,
-            "41": 428612096.0,
-            "42": 428612096.0,
-            "43": 428612096.0,
-            "44": 428612096.0,
-            "45": 428612096.0,
-            "46": 428612096.0,
-            "47": 428612096.0,
-            "48": 428612096.0,
-            "49": 428612096.0,
-            "50": 428612096.0,
-            "51": 428612096.0,
-            "52": 428612096.0,
-            "53": 428612096.0,
-            "54": 428612096.0,
-            "55": 428612096.0,
-            "56": 428612096.0,
-            "57": 428612096.0,
-            "58": 428612096.0,
-            "59": 428612096.0,
-            "60": 428612096.0,
-            "61": 428612096.0,
-            "62": 428612096.0,
-            "63": 428612096.0,
-            "64": 428612096.0,
-            "65": 428612096.0,
-            "66": 428612096.0,
-            "67": 428612096.0,
-            "68": 428612096.0,
-            "69": 428612096.0,
-            "70": 428612096.0,
-            "71": 428612096.0,
-            "72": 428612096.0,
-            "73": 428612096.0,
-            "74": 428612096.0,
-            "75": 428612096.0,
-            "76": 428612096.0,
-            "77": 428612096.0,
-            "78": 428612096.0,
-            "79": 428612096.0,
-            "80": 428612096.0,
-            "81": 428612096.0,
-            "82": 428612096.0,
-            "83": 428612096.0,
-            "84": 428612096.0,
-            "85": 428612096.0,
-            "86": 428612096.0,
-            "87": 428612096.0,
-            "88": 428612096.0,
-            "89": 428612096.0,
-            "90": 428612096.0,
-            "91": 428612096.0,
-            "92": 428612096.0,
-            "93": 428612096.0,
-            "94": 428612096.0,
-            "95": 428612096.0,
-            "96": 428612096.0,
-            "97": 428612096.0,
-            "98": 428612096.0,
-            "99": 428612096.0,
-            "100": 428612096.0,
-            "101": 428612096.0,
-            "102": 428612096.0,
-            "103": 428612096.0,
-            "104": 428612096.0,
-            "105": 428612096.0,
-            "106": 428612096.0,
-            "107": 428612096.0,
-            "108": 428612096.0,
-            "109": 428612096.0,
-            "110": 428612096.0,
-            "111": 428612096.0,
-            "112": 428612096.0,
-            "113": 428612096.0,
-            "114": 428612096.0,
-            "115": 428612096.0,
-            "116": 428612096.0,
-            "117": 428612096.0,
-            "118": 428612096.0,
-            "119": 428612096.0,
-            "120": 428612096.0,
-            "121": 428612096.0,
-            "122": 428612096.0,
-            "123": 428612096.0,
-            "124": 428612096.0,
-            "125": 428612096.0,
-            "126": 428612096.0,
-            "127": 428612096.0,
-            "128": 428612096.0,
-            "129": 428612096.0,
-            "130": 428612096.0,
-            "131": 428612096.0,
-            "132": 428612096.0,
-            "133": 428612096.0,
-            "134": 428612096.0,
-            "135": 428612096.0,
-            "136": 428612096.0,
-            "137": 428612096.0,
-            "138": 428612096.0,
-            "139": 428612096.0,
-            "140": 428612096.0,
-            "141": 428612096.0,
-            "142": 428612096.0,
-            "143": 428612096.0,
-            "144": 428612096.0,
-            "145": 428612096.0,
-            "146": 428612096.0,
-            "147": 428612096.0,
-            "148": 428612096.0,
-            "149": 428612096.0,
-            "150": 428612096.0,
-            "151": 428612096.0,
-            "152": 428612096.0,
-            "153": 428612096.0,
-            "154": 428612096.0,
-            "155": 428612096.0,
-            "156": 428612096.0,
-            "157": 428612096.0,
-            "158": 428612096.0,
-            "159": 428612096.0,
-            "160": 428612096.0,
-            "161": 428612096.0,
-            "162": 428612096.0,
-            "163": 428612096.0,
-            "164": 428612096.0,
-            "165": 428612096.0,
-            "166": 428612096.0,
-            "167": 428612096.0,
-            "168": 428612096.0,
-            "169": 428612096.0,
-            "170": 428612096.0,
-            "171": 428612096.0,
-            "172": 428612096.0,
-            "173": 428612096.0,
-            "174": 428612096.0,
-            "175": 428612096.0,
-            "176": 428612096.0,
-            "177": 428612096.0,
-            "178": 428612096.0,
-            "179": 428612096.0,
-            "180": 428612096.0,
-            "181": 428612096.0,
-            "182": 428612096.0,
-            "183": 428612096.0,
-            "184": 428612096.0,
-            "185": 428612096.0,
-            "186": 428612096.0,
-            "187": 428612096.0,
-            "188": 428612096.0,
-            "189": 428612096.0,
-            "190": 428612096.0,
-            "191": 428612096.0,
-            "192": 428612096.0,
-            "193": 428612096.0,
-            "194": 428612096.0,
-            "195": 428612096.0,
-            "196": 428612096.0,
-            "197": 428612096.0,
-            "198": 428612096.0,
-            "199": 428612096.0,
-            "200": 428612096.0,
-            "201": 428612096.0,
-            "202": 428612096.0,
-            "203": 428612096.0,
-            "204": 428612096.0,
-            "205": 428612096.0,
-            "206": 428612096.0,
-            "207": 428612096.0,
-            "208": 428612096.0,
-            "209": 428612096.0,
-            "210": 428612096.0,
-            "211": 428612096.0,
-            "212": 428612096.0,
-            "213": 428612096.0,
-            "214": 428612096.0,
-            "215": 428612096.0,
-            "216": 428612096.0,
-            "217": 428612096.0,
-            "218": 428612096.0,
-            "219": 428612096.0,
-            "220": 428612096.0,
-            "221": 428612096.0,
-            "222": 428612096.0,
-            "223": 428612096.0,
-            "224": 428612096.0,
-            "225": 428612096.0,
-            "226": 428612096.0,
-            "227": 428612096.0,
-            "228": 428612096.0,
-            "229": 428612096.0,
-            "230": 428612096.0,
-            "231": 428612096.0,
-            "232": 428612096.0,
-            "233": 428612096.0,
-            "234": 428612096.0,
-            "235": 428612096.0,
-            "236": 428612096.0,
-            "237": 428612096.0,
-            "238": 428612096.0,
-            "239": 428612096.0,
-            "240": 428612096.0,
-            "241": 428612096.0,
-            "242": 428612096.0,
-            "243": 428612096.0,
-            "244": 428612096.0,
-            "245": 428612096.0,
-            "246": 428612096.0,
-            "247": 428612096.0,
-            "248": 428612096.0,
-            "249": 428612096.0,
-            "250": 428612096.0,
-            "251": 428612096.0,
-            "252": 428612096.0,
-            "253": 428612096.0,
-            "254": 428612096.0,
-            "255": 428612096.0,
-            "256": 428612096.0,
-            "257": 428612096.0,
-            "258": 428612096.0,
-            "259": 428612096.0,
-            "260": 428612096.0,
-            "261": 428612096.0,
-            "262": 428612096.0,
-            "263": 428612096.0,
-            "264": 428612096.0,
-            "265": 428612096.0,
-            "266": 428612096.0,
-            "267": 428612096.0,
-            "268": 428612096.0,
-            "269": 428612096.0,
-            "270": 428612096.0,
-            "271": 428612096.0,
-            "272": 428612096.0,
-            "273": 428612096.0,
-            "274": 428612096.0,
-            "275": 428612096.0,
-            "276": 428612096.0,
-            "277": 428612096.0,
-            "278": 428612096.0,
-            "279": 428612096.0,
-            "280": 428612096.0,
-            "281": 428612096.0,
-            "282": 428612096.0,
-            "283": 428612096.0,
-            "284": 428612096.0,
-            "285": 428612096.0,
-            "286": 428612096.0,
-            "287": 428612096.0,
-            "288": 428612096.0,
-            "289": 428612096.0,
-            "290": 428612096.0,
-            "291": 428612096.0,
-            "292": 428612096.0,
-            "293": 428612096.0,
-            "294": 428612096.0,
-            "295": 428612096.0,
-            "296": 428612096.0,
-            "297": 428612096.0,
-            "298": 428612096.0,
-            "299": 428612096.0,
-            "300": 428612096.0,
-            "301": 428612096.0,
-            "302": 428612096.0,
-            "303": 428612096.0,
-            "304": 428612096.0,
-            "305": 428612096.0,
-            "306": 428612096.0,
-            "307": 428612096.0,
-            "308": 428612096.0,
-            "309": 428612096.0,
-            "310": 428612096.0,
-            "311": 428612096.0,
-            "312": 428612096.0,
-            "313": 428612096.0,
-            "314": 428612096.0,
-            "315": 428612096.0,
-            "316": 428612096.0,
-            "317": 428612096.0,
-            "318": 428612096.0,
-            "319": 428612096.0,
-            "320": 428612096.0,
-            "321": 428612096.0,
-            "322": 428612096.0,
-            "323": 428612096.0,
-            "324": 428612096.0,
-            "325": 428612096.0,
-            "326": 428612096.0,
-            "327": 428612096.0,
-            "328": 428612096.0,
-            "329": 428612096.0,
-            "330": 428612096.0,
-            "331": 428612096.0,
-            "332": 428612096.0,
-            "333": 428612096.0,
-            "334": 428612096.0,
-            "335": 428612096.0,
-            "336": 428612096.0,
-            "337": 428612096.0,
-            "338": 428612096.0,
-            "339": 428612096.0,
-            "340": 428612096.0,
-            "341": 428612096.0,
-            "342": 428612096.0,
-            "343": 428612096.0,
-            "344": 428612096.0,
-            "345": 428612096.0,
-            "346": 428612096.0,
-            "347": 428612096.0,
-            "348": 428612096.0,
-            "349": 428612096.0,
-            "350": 428612096.0,
-            "351": 428612096.0,
-            "352": 428612096.0,
-            "353": 428612096.0,
-            "354": 428612096.0,
-            "355": 428612096.0,
-            "356": 428612096.0,
-            "357": 428612096.0,
-            "358": 428612096.0,
-            "359": 428612096.0,
-            "360": 428612096.0,
-            "361": 428612096.0,
-            "362": 428612096.0,
-            "363": 428612096.0,
-            "364": 428612096.0,
-            "365": 428612096.0,
-            "366": 428612096.0,
-            "367": 428612096.0,
-            "368": 428612096.0,
-            "369": 428612096.0,
-            "370": 428612096.0,
-            "371": 428612096.0,
-            "372": 428612096.0,
-            "373": 428612096.0,
-            "374": 428612096.0,
-            "375": 428612096.0,
-            "376": 428612096.0,
-            "377": 428612096.0,
-            "378": 428612096.0,
-            "379": 428612096.0,
-            "380": 428612096.0,
-            "381": 428612096.0,
-            "382": 428612096.0,
-            "383": 428612096.0,
-            "384": 428612096.0,
-            "385": 428612096.0,
-            "386": 428612096.0,
-            "387": 428612096.0,
-            "388": 428612096.0,
-            "389": 428612096.0,
-            "390": 428612096.0,
-            "391": 428612096.0,
-            "392": 428612096.0,
-            "393": 428612096.0,
-            "394": 428612096.0,
-            "395": 428612096.0,
-            "396": 428612096.0,
-            "397": 428612096.0,
-            "398": 428612096.0,
-            "399": 428612096.0,
-            "400": 428612096.0,
-            "401": 428612096.0,
-            "402": 428612096.0,
-            "403": 428612096.0,
-            "404": 428612096.0,
-            "405": 428612096.0,
-            "406": 428612096.0,
-            "407": 428612096.0,
-            "408": 428612096.0,
-            "409": 428612096.0,
-            "410": 428612096.0,
-            "411": 428612096.0,
-            "412": 428612096.0,
-            "413": 428612096.0,
-            "414": 428612096.0,
-            "415": 428612096.0,
-            "416": 428612096.0,
-            "417": 428612096.0,
-            "418": 428612096.0,
-            "419": 428612096.0,
-            "420": 428612096.0,
-            "421": 428612096.0,
-            "422": 428612096.0,
-            "423": 428612096.0,
-            "424": 428612096.0,
-            "425": 428612096.0,
-            "426": 428612096.0,
-            "427": 428612096.0,
-            "428": 428612096.0,
-            "429": 428612096.0,
-            "430": 428612096.0,
-            "431": 428612096.0,
-            "432": 428612096.0,
-            "433": 428612096.0,
-            "434": 428612096.0,
-            "435": 428612096.0,
-            "436": 428612096.0,
-            "437": 428612096.0,
-            "438": 428612096.0,
-            "439": 428612096.0,
-            "440": 428612096.0,
-            "441": 428612096.0,
-            "442": 428612096.0,
-            "443": 428612096.0,
-            "444": 428612096.0,
-            "445": 428612096.0,
-            "446": 428612096.0,
-            "447": 428612096.0,
-            "448": 428612096.0,
-            "449": 428612096.0,
-            "450": 428612096.0,
-            "451": 428612096.0,
-            "452": 428612096.0,
-            "453": 428612096.0,
-            "454": 428612096.0,
-            "455": 428612096.0,
-            "456": 428612096.0,
-            "457": 428612096.0,
-            "458": 428612096.0,
-            "459": 428612096.0,
-            "460": 428612096.0,
-            "461": 428612096.0,
-            "462": 428612096.0,
-            "463": 428612096.0,
-            "464": 428612096.0,
-            "465": 428612096.0,
-            "466": 428612096.0,
-            "467": 428612096.0,
-            "468": 428612096.0,
-            "469": 428612096.0,
-            "470": 428612096.0,
-            "471": 428612096.0,
-            "472": 428612096.0,
-            "473": 428612096.0,
-            "474": 428612096.0,
-            "475": 428612096.0,
-            "476": 428612096.0,
-            "477": 428612096.0,
-            "478": 428612096.0,
-            "479": 428612096.0,
-            "480": 428612096.0,
-            "481": 428612096.0,
-            "482": 428612096.0,
-            "483": 428612096.0,
-            "484": 428612096.0,
-            "485": 428612096.0,
-            "486": 428612096.0,
-            "487": 428612096.0,
-            "488": 428612096.0,
-            "489": 428612096.0,
-            "490": 428612096.0,
-            "491": 428612096.0,
-            "492": 428612096.0,
-            "493": 428612096.0,
-            "494": 428612096.0,
-            "495": 428612096.0,
-            "496": 428612096.0,
-            "497": 428612096.0,
-            "498": 428612096.0,
-            "499": 428612096.0,
-            "500": 428612096.0,
-            "501": 428612096.0,
-            "502": 428612096.0,
-            "503": 428612096.0,
-            "504": 428612096.0,
-            "505": 428612096.0,
-            "506": 428612096.0,
-            "507": 428612096.0,
-            "508": 428612096.0,
-            "509": 428612096.0,
-            "510": 428612096.0,
-            "511": 428612096.0,
-            "512": 428612096.0,
-            "513": 428612096.0,
-            "514": 428612096.0,
-            "515": 428612096.0,
-            "516": 428612096.0,
-            "517": 428612096.0,
-            "518": 428612096.0,
-            "519": 428612096.0,
-            "520": 428612096.0,
-            "521": 428612096.0,
-            "522": 428612096.0,
-            "523": 428612096.0,
-            "524": 428612096.0,
-            "525": 428612096.0,
-            "526": 428612096.0,
-            "527": 428612096.0,
-            "528": 428612096.0,
-            "529": 428612096.0,
-            "530": 428612096.0,
-            "531": 428612096.0,
-            "532": 428612096.0,
-            "533": 428612096.0,
-            "534": 428612096.0,
-            "535": 428612096.0,
-            "536": 428612096.0,
-            "537": 428612096.0,
-            "538": 428612096.0,
-            "539": 428612096.0,
-            "540": 428612096.0,
-            "541": 428612096.0,
-            "542": 428612096.0,
-            "543": 428612096.0,
-            "544": 428612096.0,
-            "545": 428612096.0,
-            "546": 428612096.0,
-            "547": 428612096.0,
-            "548": 428612096.0,
-            "549": 428612096.0,
-            "550": 428612096.0,
-            "551": 428612096.0,
-            "552": 428612096.0,
-            "553": 428612096.0,
-            "554": 428612096.0,
-            "555": 428612096.0,
-            "556": 428612096.0,
-            "557": 428612096.0,
-            "558": 428612096.0,
-            "559": 428612096.0,
-            "560": 428612096.0,
-            "561": 428612096.0,
-            "562": 428612096.0,
-            "563": 428612096.0,
-            "564": 428612096.0,
-            "565": 428612096.0,
-            "566": 428612096.0,
-            "567": 428612096.0,
-            "568": 428612096.0,
-            "569": 428612096.0,
-            "570": 428612096.0,
-            "571": 428612096.0,
-            "572": 428612096.0,
-            "573": 428612096.0,
-            "574": 428612096.0,
-            "575": 428612096.0,
-            "576": 428612096.0,
-            "577": 428612096.0,
-            "578": 428612096.0,
-            "579": 428612096.0,
-            "580": 428612096.0,
-            "581": 428612096.0,
-            "582": 428612096.0,
-            "583": 428612096.0,
-            "584": 428612096.0,
-            "585": 428612096.0,
-            "586": 428612096.0,
-            "587": 428612096.0,
-            "588": 428612096.0,
-            "589": 428612096.0,
-            "590": 428612096.0,
-            "591": 428612096.0,
-            "592": 428612096.0,
-            "593": 428612096.0,
-            "594": 428612096.0,
-            "595": 428612096.0,
-            "596": 428612096.0,
-            "597": 428612096.0,
-            "598": 428612096.0,
-            "599": 428612096.0,
-            "600": 428612096.0,
-            "601": 428612096.0,
-            "602": 428612096.0,
-            "603": 428612096.0,
-            "604": 428612096.0,
-            "605": 428612096.0,
-            "606": 428612096.0,
-            "607": 428612096.0,
-            "608": 428612096.0,
-            "609": 428612096.0,
-            "610": 428612096.0,
-            "611": 428612096.0,
-            "612": 428612096.0,
-            "613": 428612096.0,
-            "614": 428612096.0,
-            "615": 428612096.0,
-            "616": 428612096.0,
-            "617": 428612096.0,
-            "618": 428612096.0,
-            "619": 428612096.0,
-            "620": 428612096.0,
-            "621": 428612096.0,
-            "622": 428612096.0,
-            "623": 428612096.0,
-            "624": 428612096.0,
-            "625": 428612096.0,
-            "626": 428612096.0,
-            "627": 428612096.0,
-            "628": 428612096.0,
-            "629": 428612096.0,
-            "630": 428612096.0,
-            "631": 428612096.0,
-            "632": 428612096.0,
-            "633": 428612096.0,
-            "634": 428612096.0,
-            "635": 428612096.0,
-            "636": 428612096.0,
-            "637": 428612096.0,
-            "638": 428612096.0,
-            "639": 428612096.0,
-            "640": 428612096.0,
-            "641": 428612096.0,
-            "642": 428612096.0,
-            "643": 428612096.0,
-            "644": 428612096.0,
-            "645": 428612096.0,
-            "646": 428612096.0,
-            "647": 428612096.0,
-            "648": 428612096.0,
-            "649": 428612096.0,
-            "650": 428612096.0,
-            "651": 428612096.0,
-            "652": 428612096.0,
-            "653": 428612096.0,
-            "654": 428612096.0,
-            "655": 428612096.0,
-            "656": 428612096.0,
-            "657": 428612096.0,
-            "658": 428612096.0,
-            "659": 428612096.0,
-            "660": 428612096.0,
-            "661": 428612096.0,
-            "662": 428612096.0,
-            "663": 428612096.0,
-            "664": 428612096.0,
-            "665": 428612096.0,
-            "666": 428612096.0,
-            "667": 428612096.0,
-            "668": 428612096.0,
-            "669": 428612096.0,
-            "670": 428612096.0,
-            "671": 428612096.0,
-            "672": 428612096.0,
-            "673": 428612096.0,
-            "674": 428612096.0,
-            "675": 428612096.0,
-            "676": 428612096.0,
-            "677": 428612096.0,
-            "678": 428612096.0,
-            "679": 428612096.0,
-            "680": 428612096.0,
-            "681": 428612096.0,
-            "682": 428612096.0,
-            "683": 428612096.0,
-            "684": 428612096.0,
-            "685": 428612096.0,
-            "686": 428612096.0,
-            "687": 428612096.0,
-            "688": 428612096.0,
-            "689": 428612096.0,
-            "690": 428612096.0,
-            "691": 428612096.0,
-            "692": 428612096.0,
-            "693": 428612096.0,
-            "694": 428612096.0,
-            "695": 428612096.0,
-            "696": 428612096.0,
-            "697": 428612096.0,
-            "698": 428612096.0,
-            "699": 428612096.0,
-            "700": 428612096.0,
-            "701": 428612096.0,
-            "702": 428612096.0,
-            "703": 428612096.0,
-            "704": 428612096.0,
-            "705": 428612096.0,
-            "706": 428612096.0,
-            "707": 428612096.0,
-            "708": 428612096.0,
-            "709": 428612096.0,
-            "710": 428612096.0,
-            "711": 428612096.0,
-            "712": 428612096.0,
-            "713": 428612096.0,
-            "714": 428612096.0,
-            "715": 428612096.0,
-            "716": 428612096.0,
-            "717": 428612096.0,
-            "718": 428612096.0,
-            "719": 428612096.0,
-            "720": 428612096.0,
-            "721": 428612096.0,
-            "722": 428612096.0,
-            "723": 428612096.0,
-            "724": 428612096.0,
-            "725": 428612096.0,
-            "726": 428612096.0,
-            "727": 428612096.0,
-            "728": 428612096.0,
-            "729": 428612096.0,
-            "730": 428612096.0,
-            "731": 428612096.0,
-            "732": 428612096.0,
-            "733": 428612096.0,
-            "734": 428612096.0,
-            "735": 428612096.0,
-            "736": 428612096.0,
-            "737": 428612096.0,
-            "738": 428612096.0,
-            "739": 428612096.0,
-            "740": 428612096.0,
-            "741": 428612096.0,
-            "742": 428612096.0,
-            "743": 428612096.0,
-            "744": 428612096.0,
-            "745": 428612096.0,
-            "746": 428612096.0,
-            "747": 428612096.0,
-            "748": 428612096.0,
-            "749": 428612096.0,
-            "750": 428612096.0,
-            "751": 428612096.0,
-            "752": 428612096.0,
-            "753": 428612096.0,
-            "754": 428612096.0,
-            "755": 428612096.0,
-            "756": 428612096.0,
-            "757": 428612096.0,
-            "758": 428612096.0,
-            "759": 428612096.0,
-            "760": 428612096.0,
-            "761": 428612096.0,
-            "762": 428612096.0,
-            "763": 428612096.0,
-            "764": 428612096.0,
-            "765": 428612096.0,
-            "766": 428612096.0,
-            "767": 428612096.0,
-            "768": 428612096.0,
-            "769": 428612096.0,
-            "770": 428612096.0,
-            "771": 428612096.0,
-            "772": 428612096.0,
-            "773": 428612096.0,
-            "774": 428612096.0,
-            "775": 428612096.0,
-            "776": 428612096.0,
-            "777": 428612096.0,
-            "778": 428612096.0,
-            "779": 428612096.0,
-            "780": 428612096.0,
-            "781": 428612096.0,
-            "782": 428612096.0,
-            "783": 428612096.0,
-            "784": 428612096.0,
-            "785": 428612096.0,
-            "786": 428612096.0,
-            "787": 428612096.0,
-            "788": 428612096.0,
-            "789": 428612096.0,
-            "790": 428612096.0,
-            "791": 428612096.0,
-            "792": 428612096.0,
-            "793": 428612096.0,
-            "794": 428612096.0,
-            "795": 428612096.0,
-            "796": 428612096.0,
-            "797": 428612096.0,
-            "798": 428612096.0,
-            "799": 428612096.0,
-            "800": 428612096.0,
-            "801": 428612096.0,
-            "802": 428612096.0,
-            "803": 428612096.0,
-            "804": 428612096.0,
-            "805": 428612096.0,
-            "806": 428612096.0,
-            "807": 428612096.0,
-            "808": 428612096.0,
-            "809": 428612096.0,
-            "810": 428612096.0,
-            "811": 428612096.0,
-            "812": 428612096.0,
-            "813": 428612096.0,
-            "814": 428612096.0,
-            "815": 428612096.0,
-            "816": 428612096.0,
-            "817": 428612096.0,
-            "818": 428612096.0,
-            "819": 428612096.0,
-            "820": 428612096.0,
-            "821": 428612096.0,
-            "822": 428612096.0,
-            "823": 428612096.0,
-            "824": 428612096.0,
-            "825": 428612096.0,
-            "826": 428612096.0,
-            "827": 428612096.0,
-            "828": 428612096.0,
-            "829": 428612096.0,
-            "830": 428612096.0,
-            "831": 428612096.0,
-            "832": 428612096.0,
-            "833": 428612096.0,
-            "834": 428612096.0,
-            "835": 428612096.0,
-            "836": 428612096.0,
-            "837": 428612096.0,
-            "838": 428612096.0,
-            "839": 428612096.0,
-            "840": 428612096.0,
-            "841": 428612096.0,
-            "842": 428612096.0,
-            "843": 428612096.0,
-            "844": 428612096.0,
-            "845": 428612096.0,
-            "846": 428612096.0,
-            "847": 428612096.0,
-            "848": 428612096.0,
-            "849": 428612096.0,
-            "850": 428612096.0,
-            "851": 428612096.0,
-            "852": 428612096.0,
-            "853": 428612096.0,
-            "854": 428612096.0,
-            "855": 428612096.0,
-            "856": 428612096.0,
-            "857": 428612096.0,
-            "858": 428612096.0,
-            "859": 428612096.0,
-            "860": 428612096.0,
-            "861": 428612096.0,
-            "862": 428612096.0,
-            "863": 428612096.0,
-            "864": 428612096.0,
-            "865": 428612096.0,
-            "866": 428612096.0,
-            "867": 428612096.0,
-            "868": 428612096.0,
-            "869": 428612096.0,
-            "870": 428612096.0,
-            "871": 428612096.0,
-            "872": 428612096.0,
-            "873": 428612096.0,
-            "874": 428612096.0,
-            "875": 428612096.0,
-            "876": 428612096.0,
-            "877": 428612096.0,
-            "878": 428612096.0,
-            "879": 428612096.0,
-            "880": 428612096.0,
-            "881": 428612096.0,
-            "882": 428612096.0,
-            "883": 428612096.0,
-            "884": 428612096.0,
-            "885": 428612096.0,
-            "886": 428612096.0,
-            "887": 428612096.0,
-            "888": 428612096.0,
-            "889": 428612096.0,
-            "890": 428612096.0,
-            "891": 428612096.0,
-            "892": 428612096.0,
-            "893": 428612096.0,
-            "894": 428612096.0,
-            "895": 428612096.0,
-            "896": 428612096.0,
-            "897": 428612096.0,
-            "898": 428612096.0,
-            "899": 428612096.0,
-            "900": 428612096.0,
-            "901": 428612096.0,
-            "902": 428612096.0,
-            "903": 428612096.0,
-            "904": 428612096.0,
-            "905": 428612096.0,
-            "906": 428612096.0,
-            "907": 428612096.0,
-            "908": 428612096.0,
-            "909": 428612096.0,
-            "910": 428612096.0,
-            "911": 428612096.0,
-            "912": 428612096.0,
-            "913": 428612096.0,
-            "914": 428612096.0,
-            "915": 428612096.0,
-            "916": 428612096.0,
-            "917": 428612096.0,
-            "918": 428612096.0,
-            "919": 428612096.0,
-            "920": 428612096.0,
-            "921": 428612096.0,
-            "922": 428612096.0,
-            "923": 428612096.0,
-            "924": 428612096.0,
-            "925": 428612096.0,
-            "926": 428612096.0,
-            "927": 428612096.0,
-            "928": 428612096.0,
-            "929": 428612096.0,
-            "930": 428612096.0,
-            "931": 428612096.0,
-            "932": 428612096.0,
-            "933": 428612096.0,
-            "934": 428612096.0,
-            "935": 428612096.0,
-            "936": 428612096.0,
-            "937": 428612096.0,
-            "938": 428612096.0,
-            "939": 428612096.0,
-            "940": 428612096.0,
-            "941": 428612096.0,
-            "942": 428612096.0,
-            "943": 428612096.0,
-            "944": 428612096.0,
-            "945": 428612096.0,
-            "946": 428612096.0,
-            "947": 428612096.0,
-            "948": 428612096.0,
-            "949": 428612096.0,
-            "950": 428612096.0,
-            "951": 428612096.0,
-            "952": 428612096.0,
-            "953": 428612096.0,
-            "954": 428612096.0,
-            "955": 428612096.0,
-            "956": 428612096.0,
-            "957": 428612096.0,
-            "958": 428612096.0,
-            "959": 428612096.0,
-            "960": 428612096.0,
-            "961": 428612096.0,
-            "962": 428612096.0,
-            "963": 428612096.0,
-            "964": 428612096.0,
-            "965": 428612096.0,
-            "966": 428612096.0,
-            "967": 428612096.0,
-            "968": 428612096.0,
-            "969": 428612096.0,
-            "970": 428612096.0,
-            "971": 428612096.0,
-            "972": 428612096.0,
-            "973": 428612096.0,
-            "974": 428612096.0,
-            "975": 428612096.0,
-            "976": 428612096.0,
-            "977": 428612096.0,
-            "978": 428612096.0,
-            "979": 428612096.0,
-            "980": 428612096.0,
-            "981": 428612096.0,
-            "982": 428612096.0,
-            "983": 428612096.0,
-            "984": 428612096.0,
-            "985": 428612096.0,
-            "986": 428612096.0,
-            "987": 428612096.0,
-            "988": 428612096.0,
-            "989": 428612096.0,
-            "990": 428612096.0,
-            "991": 428612096.0,
-            "992": 428612096.0,
-            "993": 428612096.0,
-            "994": 428612096.0,
-            "995": 428612096.0,
-            "996": 428612096.0,
-            "997": 428612096.0,
-            "998": 428612096.0,
-            "999": 428612096.0,
-            "1000": 428612096.0,
-            "1001": 428612096.0,
-            "1002": 428612096.0,
-            "1003": 428612096.0,
-            "1004": 428612096.0,
-            "1005": 428612096.0,
-            "1006": 428612096.0,
-            "1007": 428612096.0,
-            "1008": 428612096.0,
-            "1009": 428612096.0,
-            "1010": 428612096.0,
-            "1011": 428612096.0,
-            "1012": 428612096.0,
-            "1013": 428612096.0,
-            "1014": 428612096.0,
-            "1015": 428612096.0,
-            "1016": 428612096.0,
-            "1017": 428612096.0,
-            "1018": 428612096.0,
-            "1019": 428612096.0,
-            "1020": 428612096.0,
-            "1021": 428612096.0,
-            "1022": 428612096.0,
-            "1023": 428612096.0,
-            "1024": 428612096.0,
-            "1025": 428612096.0,
-            "1026": 428612096.0,
-            "1027": 428612096.0,
-            "1028": 428612096.0,
-            "1029": 428612096.0,
-            "1030": 428612096.0,
-            "1031": 428612096.0,
-            "1032": 428612096.0,
-            "1033": 428612096.0,
-            "1034": 428612096.0,
-            "1035": 428612096.0,
-            "1036": 428612096.0,
-            "1037": 428612096.0,
-            "1038": 428612096.0,
-            "1039": 428612096.0,
-            "1040": 428612096.0,
-            "1041": 428612096.0,
-            "1042": 428612096.0,
-            "1043": 428612096.0,
-            "1044": 428612096.0,
-            "1045": 428612096.0,
-            "1046": 428612096.0,
-            "1047": 428612096.0,
-            "1048": 428612096.0,
-            "1049": 428612096.0,
-            "1050": 428612096.0,
-            "1051": 428612096.0,
-            "1052": 428612096.0,
-            "1053": 428612096.0,
-            "1054": 428612096.0,
-            "1055": 428612096.0,
-            "1056": 428612096.0,
-            "1057": 428612096.0,
-            "1058": 428612096.0,
-            "1059": 428612096.0,
-            "1060": 428612096.0,
-            "1061": 428612096.0,
-            "1062": 428612096.0,
-            "1063": 428612096.0,
-            "1064": 428612096.0,
-            "1065": 428612096.0,
-            "1066": 428612096.0,
-            "1067": 428612096.0,
-            "1068": 428612096.0,
-            "1069": 428612096.0,
-            "1070": 428612096.0,
-            "1071": 428612096.0,
-            "1072": 428612096.0,
-            "1073": 428612096.0,
-            "1074": 428612096.0,
-            "1075": 428612096.0,
-            "1076": 428612096.0,
-            "1077": 428612096.0,
-            "1078": 428612096.0,
-            "1079": 428612096.0,
-            "1080": 428612096.0,
-            "1081": 428612096.0,
-            "1082": 428612096.0,
-            "1083": 428612096.0,
-            "1084": 428612096.0,
-            "1085": 428612096.0,
-            "1086": 428612096.0,
-            "1087": 428612096.0,
-            "1088": 428612096.0,
-            "1089": 428612096.0,
-            "1090": 428612096.0,
-            "1091": 428612096.0,
-            "1092": 428612096.0,
-            "1093": 428612096.0,
-            "1094": 428612096.0,
-            "1095": 428612096.0,
-            "1096": 428612096.0,
-            "1097": 428612096.0,
-            "1098": 428612096.0,
-            "1099": 428612096.0,
-            "1100": 428612096.0,
-            "1101": 428612096.0,
-            "1102": 428612096.0,
-            "1103": 428612096.0,
-            "1104": 428612096.0,
-            "1105": 428612096.0,
-            "1106": 428612096.0,
-            "1107": 428612096.0,
-            "1108": 428612096.0,
-            "1109": 428612096.0,
-            "1110": 428612096.0,
-            "1111": 428612096.0,
-            "1112": 428612096.0,
-            "1113": 428612096.0,
-            "1114": 428612096.0,
-            "1115": 428612096.0,
-            "1116": 428612096.0,
-            "1117": 428612096.0,
-            "1118": 428612096.0,
-            "1119": 428612096.0,
-            "1120": 428612096.0,
-            "1121": 428612096.0,
-            "1122": 428612096.0,
-            "1123": 428612096.0,
-            "1124": 428612096.0,
-            "1125": 428612096.0,
-            "1126": 428612096.0,
-            "1127": 428612096.0,
-            "1128": 428612096.0,
-            "1129": 428612096.0,
-            "1130": 428612096.0,
-            "1131": 428612096.0,
-            "1132": 428612096.0,
-            "1133": 428612096.0,
-            "1134": 428612096.0,
-            "1135": 428612096.0,
-            "1136": 428612096.0,
-            "1137": 428612096.0,
-            "1138": 428612096.0,
-            "1139": 428612096.0,
-            "1140": 428612096.0,
-            "1141": 428612096.0,
-            "1142": 428612096.0,
-            "1143": 428612096.0,
-            "1144": 428612096.0,
-            "1145": 428612096.0,
-            "1146": 428612096.0,
-            "1147": 428612096.0,
-            "1148": 428612096.0,
-            "1149": 428612096.0,
-            "1150": 428612096.0,
-            "1151": 428612096.0,
-            "1152": 428612096.0,
-            "1153": 428612096.0,
-            "1154": 428612096.0,
-            "1155": 428612096.0,
-            "1156": 428612096.0,
-            "1157": 428612096.0,
-            "1158": 428612096.0,
-            "1159": 428612096.0,
-            "1160": 428612096.0,
-            "1161": 428612096.0,
-            "1162": 428612096.0,
-            "1163": 428612096.0,
-            "1164": 428612096.0,
-            "1165": 428612096.0,
-            "1166": 428612096.0,
-            "1167": 428612096.0,
-            "1168": 428612096.0,
-            "1169": 428612096.0,
-            "1170": 428612096.0,
-            "1171": 428612096.0,
-            "1172": 428612096.0,
-            "1173": 428612096.0,
-            "1174": 428612096.0,
-            "1175": 428612096.0,
-            "1176": 428612096.0,
-            "1177": 428612096.0,
-            "1178": 428612096.0,
-            "1179": 428612096.0,
-            "1180": 428612096.0,
-            "1181": 428612096.0,
-            "1182": 428612096.0,
-            "1183": 428612096.0,
-            "1184": 428612096.0,
-            "1185": 428612096.0,
-            "1186": 428612096.0,
-            "1187": 428612096.0,
-            "1188": 428612096.0,
-            "1189": 428612096.0,
-            "1190": 428612096.0,
-            "1191": 428612096.0,
-            "1192": 428612096.0,
-            "1193": 428612096.0,
-            "1194": 428612096.0,
-            "1195": 428612096.0,
-            "1196": 428612096.0,
-            "1197": 428612096.0,
-            "1198": 428612096.0,
-            "1199": 428612096.0,
-            "1200": 428612096.0,
-            "1201": 428612096.0,
-            "1202": 428612096.0,
-            "1203": 428612096.0,
-            "1204": 428612096.0,
-            "1205": 428612096.0,
-            "1206": 428612096.0,
-            "1207": 428612096.0,
-            "1208": 428612096.0,
-            "1209": 428612096.0,
-            "1210": 428612096.0,
-            "1211": 428612096.0,
-            "1212": 428612096.0,
-            "1213": 428612096.0,
-            "1214": 428612096.0,
-            "1215": 428612096.0,
-            "1216": 428612096.0,
-            "1217": 428612096.0,
-            "1218": 428612096.0,
-            "1219": 428612096.0,
-            "1220": 428612096.0,
-            "1221": 428612096.0,
-            "1222": 428612096.0,
-            "1223": 428612096.0,
-            "1224": 428612096.0,
-            "1225": 428612096.0,
-            "1226": 428612096.0,
-            "1227": 428612096.0,
-            "1228": 428612096.0,
-            "1229": 428612096.0,
-            "1230": 428612096.0,
-            "1231": 428612096.0,
-            "1232": 428612096.0,
-            "1233": 428612096.0,
-            "1234": 428612096.0,
-            "1235": 428612096.0,
-            "1236": 428612096.0,
-            "1237": 428612096.0,
-            "1238": 428612096.0,
-            "1239": 428612096.0,
-            "1240": 428612096.0,
-            "1241": 428612096.0,
-            "1242": 428612096.0,
-            "1243": 428612096.0,
-            "1244": 428612096.0,
-            "1245": 428612096.0,
-            "1246": 428612096.0,
-            "1247": 428612096.0,
-            "1248": 428612096.0,
-            "1249": 428612096.0,
-            "1250": 428612096.0,
-            "1251": 428612096.0,
-            "1252": 428612096.0,
-            "1253": 428612096.0,
-            "1254": 428612096.0,
-            "1255": 428612096.0,
-            "1256": 428612096.0,
-            "1257": 428612096.0,
-            "1258": 428612096.0,
-            "1259": 428612096.0,
-            "1260": 428612096.0,
-            "1261": 428612096.0,
-            "1262": 428612096.0,
-            "1263": 428612096.0,
-            "1264": 428612096.0,
-            "1265": 428612096.0,
-            "1266": 428612096.0,
-            "1267": 428612096.0,
-            "1268": 428612096.0,
-            "1269": 428612096.0,
-            "1270": 428612096.0,
-            "1271": 428612096.0,
-            "1272": 428612096.0,
-            "1273": 428612096.0,
-            "1274": 428612096.0,
-            "1275": 428612096.0,
-            "1276": 428612096.0,
-            "1277": 428612096.0,
-            "1278": 428612096.0,
-            "1279": 428612096.0,
-            "1280": 428612096.0,
-            "1281": 428612096.0,
-            "1282": 428612096.0,
-            "1283": 428612096.0,
-            "1284": 428612096.0,
-            "1285": 428612096.0,
-            "1286": 428612096.0,
-            "1287": 428612096.0,
-            "1288": 428612096.0,
-            "1289": 428612096.0,
-            "1290": 428612096.0,
-            "1291": 428612096.0,
-            "1292": 428612096.0,
-            "1293": 428612096.0,
-            "1294": 428612096.0,
-            "1295": 428612096.0,
-            "1296": 428612096.0,
-            "1297": 428612096.0,
-            "1298": 428612096.0,
-            "1299": 428612096.0,
-            "1300": 428612096.0,
-            "1301": 428612096.0,
-            "1302": 428612096.0,
-            "1303": 428612096.0,
-            "1304": 428612096.0,
-            "1305": 428612096.0,
-            "1306": 428612096.0,
-            "1307": 428612096.0,
-            "1308": 428612096.0,
-            "1309": 428612096.0,
-            "1310": 428612096.0,
-            "1311": 428612096.0,
-            "1312": 428612096.0,
-            "1313": 428612096.0,
-            "1314": 428612096.0,
-            "1315": 428612096.0,
-            "1316": 428612096.0,
-            "1317": 428612096.0,
-            "1318": 428612096.0,
-            "1319": 428612096.0,
-            "1320": 428612096.0,
-            "1321": 428612096.0,
-            "1322": 428612096.0,
-            "1323": 428612096.0,
-            "1324": 428612096.0,
-            "1325": 428612096.0,
-            "1326": 428612096.0,
-            "1327": 428612096.0,
-            "1328": 428612096.0,
-            "1329": 428612096.0,
-            "1330": 428612096.0,
-            "1331": 428612096.0,
-            "1332": 428612096.0,
-            "1333": 428612096.0,
-            "1334": 428612096.0,
-            "1335": 428612096.0,
-            "1336": 428612096.0,
-            "1337": 428612096.0,
-            "1338": 428612096.0,
-            "1339": 428612096.0,
-            "1340": 428612096.0,
-            "1341": 428612096.0,
-            "1342": 428612096.0,
-            "1343": 428612096.0,
-            "1344": 428612096.0,
-            "1345": 428612096.0,
-            "1346": 428612096.0,
-            "1347": 428612096.0,
-            "1348": 428612096.0,
-            "1349": 428612096.0,
-            "1350": 428612096.0,
-            "1351": 428612096.0,
-            "1352": 428612096.0,
-            "1353": 428612096.0,
-            "1354": 428612096.0,
-            "1355": 428612096.0,
-            "1356": 428612096.0,
-            "1357": 428612096.0,
-            "1358": 428612096.0,
-            "1359": 428612096.0,
-            "1360": 428612096.0,
-            "1361": 428612096.0,
-            "1362": 428612096.0,
-            "1363": 428612096.0,
-            "1364": 428612096.0,
-            "1365": 428612096.0,
-            "1366": 428612096.0,
-            "1367": 428612096.0,
-            "1368": 428612096.0,
-            "1369": 428612096.0,
-            "1370": 428612096.0,
-            "1371": 428612096.0,
-            "1372": 428612096.0,
-            "1373": 428612096.0,
-            "1374": 428612096.0,
-            "1375": 428612096.0,
-            "1376": 428612096.0,
-            "1377": 428612096.0,
-            "1378": 428612096.0,
-            "1379": 428612096.0,
-            "1380": 428612096.0,
-            "1381": 428612096.0,
-            "1382": 428612096.0,
-            "1383": 428612096.0,
-            "1384": 428612096.0,
-            "1385": 428612096.0,
-            "1386": 428612096.0,
-            "1387": 428612096.0,
-            "1388": 428612096.0,
-            "1389": 428612096.0,
-            "1390": 428612096.0,
-            "1391": 428612096.0,
-            "1392": 428612096.0,
-            "1393": 428612096.0,
-            "1394": 428612096.0,
-            "1395": 428612096.0,
-            "1396": 428612096.0,
-            "1397": 428612096.0,
-            "1398": 428612096.0,
-            "1399": 428612096.0,
-            "1400": 428612096.0,
-            "1401": 428612096.0,
-            "1402": 428612096.0,
-            "1403": 428612096.0,
-            "1404": 428612096.0,
-            "1405": 428612096.0,
-            "1406": 428612096.0,
-            "1407": 428612096.0,
-            "1408": 428612096.0,
-            "1409": 428612096.0,
-            "1410": 428612096.0,
-            "1411": 428612096.0,
-            "1412": 428612096.0,
-            "1413": 428612096.0,
-            "1414": 428612096.0,
-            "1415": 428612096.0,
-            "1416": 428612096.0,
-            "1417": 428612096.0,
-            "1418": 428612096.0,
-            "1419": 428612096.0,
-            "1420": 428612096.0,
-            "1421": 428612096.0,
-            "1422": 428612096.0,
-            "1423": 428612096.0,
-            "1424": 428612096.0,
-            "1425": 428612096.0,
-            "1426": 428612096.0,
-            "1427": 428612096.0,
-            "1428": 428612096.0,
-            "1429": 428612096.0,
-            "1430": 428612096.0,
-            "1431": 428612096.0,
-            "1432": 428612096.0,
-            "1433": 428612096.0,
-            "1434": 428612096.0,
-            "1435": 428612096.0,
-            "1436": 428612096.0,
-            "1437": 428612096.0,
-            "1438": 428612096.0,
-            "1439": 428612096.0,
-            "1440": 428612096.0,
-            "1441": 428612096.0,
-            "1442": 428612096.0,
-            "1443": 428612096.0,
-            "1444": 428612096.0,
-            "1445": 428612096.0,
-            "1446": 428612096.0,
-            "1447": 428612096.0,
-            "1448": 428612096.0,
-            "1449": 428612096.0,
-            "1450": 428612096.0,
-            "1451": 428612096.0,
-            "1452": 428612096.0,
-            "1453": 428612096.0,
-            "1454": 428612096.0,
-            "1455": 428612096.0,
-            "1456": 428612096.0,
-            "1457": 428612096.0,
-            "1458": 428612096.0,
-            "1459": 428612096.0,
-            "1460": 428612096.0,
-            "1461": 428612096.0,
-            "1462": 428612096.0,
-            "1463": 428612096.0,
-            "1464": 428612096.0,
-            "1465": 428612096.0,
-            "1466": 428612096.0,
-            "1467": 428612096.0,
-            "1468": 428612096.0,
-            "1469": 428612096.0,
-            "1470": 428612096.0,
-            "1471": 428612096.0,
-            "1472": 428612096.0,
-            "1473": 428612096.0,
-            "1474": 428612096.0,
-            "1475": 428612096.0,
-            "1476": 428612096.0,
-            "1477": 428612096.0,
-            "1478": 428612096.0,
-            "1479": 428612096.0,
-            "1480": 428612096.0,
-            "1481": 428612096.0,
-            "1482": 428612096.0,
-            "1483": 428612096.0,
-            "1484": 428612096.0,
-            "1485": 428612096.0,
-            "1486": 428612096.0,
-            "1487": 428612096.0,
-            "1488": 428612096.0,
-            "1489": 428612096.0,
-            "1490": 428612096.0,
-            "1491": 428612096.0,
-            "1492": 428612096.0,
-            "1493": 428612096.0,
-            "1494": 428612096.0,
-            "1495": 428612096.0,
-            "1496": 428612096.0,
-            "1497": 428612096.0,
-            "1498": 428612096.0,
-            "1499": 428612096.0,
-            "1500": 428612096.0,
-            "1501": 428612096.0,
-            "1502": 428612096.0,
-            "1503": 428612096.0,
-            "1504": 428612096.0,
-            "1505": 428612096.0,
-            "1506": 428612096.0,
-            "1507": 428612096.0,
-            "1508": 428612096.0,
-            "1509": 428612096.0,
-            "1510": 428612096.0,
-            "1511": 428612096.0,
-            "1512": 428612096.0,
-            "1513": 428612096.0,
-            "1514": 428612096.0,
-            "1515": 428612096.0,
-            "1516": 428612096.0,
-            "1517": 428612096.0,
-            "1518": 428612096.0,
-            "1519": 428612096.0,
-            "1520": 428612096.0,
-            "1521": 428612096.0,
-            "1522": 428612096.0,
-            "1523": 428612096.0,
-            "1524": 428612096.0,
-            "1525": 428612096.0,
-            "1526": 428612096.0,
-            "1527": 428612096.0,
-            "1528": 428612096.0,
-            "1529": 428612096.0,
-            "1530": 428612096.0,
-            "1531": 428612096.0,
-            "1532": 428612096.0,
-            "1533": 428612096.0,
-            "1534": 428612096.0,
-            "1535": 428612096.0,
-            "1536": 428612096.0,
-            "1537": 428612096.0,
-            "1538": 428612096.0,
-            "1539": 428612096.0,
-            "1540": 428612096.0,
-            "1541": 428612096.0,
-            "1542": 428612096.0,
-            "1543": 428612096.0,
-            "1544": 428612096.0,
-            "1545": 428612096.0,
-            "1546": 428612096.0,
-            "1547": 428612096.0,
-            "1548": 428612096.0,
-            "1549": 428612096.0,
-            "1550": 428612096.0,
-            "1551": 428612096.0,
-            "1552": 428612096.0,
-            "1553": 428612096.0,
-            "1554": 428612096.0,
-            "1555": 428612096.0,
-            "1556": 428612096.0,
-            "1557": 428612096.0,
-            "1558": 428612096.0,
-            "1559": 428612096.0,
-            "1560": 428612096.0,
-            "1561": 428612096.0,
-            "1562": 428612096.0,
-            "1563": 428612096.0,
-            "1564": 428612096.0,
-            "1565": 428612096.0,
-            "1566": 428612096.0,
-            "1567": 428612096.0,
-            "1568": 428612096.0,
-            "1569": 428612096.0,
-            "1570": 428612096.0,
-            "1571": 428612096.0,
-            "1572": 428612096.0,
-            "1573": 428612096.0,
-            "1574": 428612096.0,
-            "1575": 428612096.0,
-            "1576": 428612096.0,
-            "1577": 428612096.0,
-            "1578": 428612096.0,
-            "1579": 428612096.0,
-            "1580": 428612096.0,
-            "1581": 428612096.0,
-            "1582": 428612096.0,
-            "1583": 428612096.0,
-            "1584": 428612096.0,
-            "1585": 428612096.0,
-            "1586": 428612096.0,
-            "1587": 428612096.0,
-            "1588": 428612096.0,
-            "1589": 428612096.0,
-            "1590": 428612096.0,
-            "1591": 428612096.0,
-            "1592": 428612096.0,
-            "1593": 428612096.0,
-            "1594": 428612096.0,
-            "1595": 428612096.0,
-            "1596": 428612096.0,
-            "1597": 428612096.0,
-            "1598": 428612096.0,
-            "1599": 428612096.0,
-            "1600": 428612096.0,
-            "1601": 428612096.0,
-            "1602": 428612096.0,
-            "1603": 428612096.0,
-            "1604": 428612096.0,
-            "1605": 428612096.0,
-            "1606": 428612096.0,
-            "1607": 428612096.0,
-            "1608": 428612096.0,
-            "1609": 428612096.0,
-            "1610": 428612096.0,
-            "1611": 428612096.0,
-            "1612": 428612096.0,
-            "1613": 428612096.0,
-            "1614": 428612096.0,
-            "1615": 428612096.0,
-            "1616": 428612096.0,
-            "1617": 428612096.0,
-            "1618": 428612096.0,
-            "1619": 428612096.0,
-            "1620": 428612096.0,
-            "1621": 428612096.0,
-            "1622": 428612096.0,
-            "1623": 428612096.0,
-            "1624": 428612096.0,
-            "1625": 428612096.0,
-            "1626": 428612096.0,
-            "1627": 428612096.0,
-            "1628": 428612096.0,
-            "1629": 428612096.0,
-            "1630": 428612096.0,
-            "1631": 428612096.0,
-            "1632": 428612096.0,
-            "1633": 428612096.0,
-            "1634": 428612096.0,
-            "1635": 428612096.0,
-            "1636": 428612096.0,
-            "1637": 428612096.0,
-            "1638": 428612096.0,
-            "1639": 428612096.0,
-            "1640": 428612096.0,
-            "1641": 428612096.0,
-            "1642": 428612096.0,
-            "1643": 428612096.0,
-            "1644": 428612096.0,
-            "1645": 428612096.0,
-            "1646": 428612096.0,
-            "1647": 428612096.0,
-            "1648": 428612096.0,
-            "1649": 428612096.0,
-            "1650": 428612096.0,
-            "1651": 428612096.0,
-            "1652": 428612096.0,
-            "1653": 428612096.0,
-            "1654": 428612096.0,
-            "1655": 428612096.0,
-            "1656": 428612096.0,
-            "1657": 428612096.0,
-            "1658": 428612096.0,
-            "1659": 428612096.0,
-            "1660": 428612096.0,
-            "1661": 428612096.0,
-            "1662": 428612096.0,
-            "1663": 428612096.0,
-            "1664": 428612096.0,
-            "1665": 428612096.0,
-            "1666": 428612096.0,
-            "1667": 428612096.0,
-            "1668": 428612096.0,
-            "1669": 428612096.0,
-            "1670": 428612096.0,
-            "1671": 428612096.0,
-            "1672": 428612096.0,
-            "1673": 428612096.0,
-            "1674": 428612096.0,
-            "1675": 428612096.0,
-            "1676": 428612096.0,
-            "1677": 428612096.0,
-            "1678": 428612096.0,
-            "1679": 428612096.0,
-            "1680": 428612096.0,
-            "1681": 428612096.0,
-            "1682": 428612096.0,
-            "1683": 428612096.0,
-            "1684": 428612096.0,
-            "1685": 428612096.0,
-            "1686": 428612096.0,
-            "1687": 428612096.0,
-            "1688": 428612096.0,
-            "1689": 428612096.0,
-            "1690": 428612096.0,
-            "1691": 428612096.0,
-            "1692": 428612096.0,
-            "1693": 428612096.0,
-            "1694": 428612096.0,
-            "1695": 428612096.0,
-            "1696": 428612096.0,
-            "1697": 428612096.0,
-            "1698": 428612096.0,
-            "1699": 428612096.0,
-            "1700": 428612096.0,
-            "1701": 428612096.0,
-            "1702": 428612096.0,
-            "1703": 428612096.0,
-            "1704": 428612096.0,
-            "1705": 428612096.0,
-            "1706": 428612096.0,
-            "1707": 428612096.0,
-            "1708": 428612096.0,
-            "1709": 428612096.0,
-            "1710": 428612096.0,
-            "1711": 428612096.0,
-            "1712": 428612096.0,
-            "1713": 428612096.0,
-            "1714": 428612096.0,
-            "1715": 428612096.0,
-            "1716": 428612096.0,
-            "1717": 428612096.0,
-            "1718": 428612096.0,
-            "1719": 428612096.0,
-            "1720": 428612096.0,
-            "1721": 428612096.0,
-            "1722": 428612096.0,
-            "1723": 428612096.0,
-            "1724": 428612096.0,
-            "1725": 428612096.0,
-            "1726": 428612096.0,
-            "1727": 428612096.0,
-            "1728": 428612096.0,
-            "1729": 428612096.0,
-            "1730": 428612096.0,
-            "1731": 428612096.0,
-            "1732": 428612096.0,
-            "1733": 428612096.0,
-            "1734": 428612096.0,
-            "1735": 428612096.0,
-            "1736": 428612096.0,
-            "1737": 428612096.0,
-            "1738": 428612096.0,
-            "1739": 428612096.0,
-            "1740": 428612096.0,
-            "1741": 428612096.0,
-            "1742": 428612096.0,
-            "1743": 428612096.0,
-            "1744": 428612096.0,
-            "1745": 428612096.0,
-            "1746": 428612096.0,
-            "1747": 428612096.0,
-            "1748": 428612096.0,
-            "1749": 428612096.0,
-            "1750": 428612096.0,
-            "1751": 428612096.0,
-            "1752": 428612096.0,
-            "1753": 428612096.0,
-            "1754": 428612096.0,
-            "1755": 428612096.0,
-            "1756": 428612096.0,
-            "1757": 428612096.0,
-            "1758": 428612096.0,
-            "1759": 428612096.0,
-            "1760": 428612096.0,
-            "1761": 428612096.0,
-            "1762": 428612096.0,
-            "1763": 428612096.0,
-            "1764": 428612096.0,
-            "1765": 428612096.0,
-            "1766": 428612096.0,
-            "1767": 428612096.0,
-            "1768": 428612096.0,
-            "1769": 428612096.0,
-            "1770": 428612096.0,
-            "1771": 428612096.0,
-            "1772": 428612096.0,
-            "1773": 428612096.0,
-            "1774": 428612096.0,
-            "1775": 428612096.0,
-            "1776": 428612096.0,
-            "1777": 428612096.0,
-            "1778": 428612096.0,
-            "1779": 428612096.0,
-            "1780": 428612096.0,
-            "1781": 428612096.0,
-            "1782": 428612096.0,
-            "1783": 428612096.0,
-            "1784": 428612096.0,
-            "1785": 428612096.0,
-            "1786": 428612096.0,
-            "1787": 428612096.0,
-            "1788": 428612096.0,
-            "1789": 428612096.0,
-            "1790": 428612096.0,
-            "1791": 428612096.0,
-            "1792": 428612096.0,
-            "1793": 428612096.0,
-            "1794": 428612096.0,
-            "1795": 428612096.0,
-            "1796": 428612096.0,
-            "1797": 428612096.0,
-            "1798": 428612096.0,
-            "1799": 428612096.0,
-            "1800": 428612096.0,
-            "1801": 428612096.0,
-            "1802": 428612096.0,
-            "1803": 428612096.0,
-            "1804": 428612096.0,
-            "1805": 428612096.0,
-            "1806": 428612096.0,
-            "1807": 428612096.0,
-            "1808": 428612096.0,
-            "1809": 428612096.0,
-            "1810": 428612096.0,
-            "1811": 428612096.0,
-            "1812": 428612096.0,
-            "1813": 428612096.0,
-            "1814": 428612096.0,
-            "1815": 428612096.0,
-            "1816": 428612096.0,
-            "1817": 428612096.0,
-            "1818": 428612096.0,
-            "1819": 428612096.0,
-            "1820": 428612096.0,
-            "1821": 428612096.0,
-            "1822": 428612096.0,
-            "1823": 428612096.0,
-            "1824": 428612096.0,
-            "1825": 428612096.0,
-            "1826": 428612096.0,
-            "1827": 428612096.0,
-            "1828": 428612096.0,
-            "1829": 428612096.0,
-            "1830": 428612096.0,
-            "1831": 428612096.0,
-            "1832": 428612096.0,
-            "1833": 428612096.0,
-            "1834": 428612096.0,
-            "1835": 428612096.0,
-            "1836": 428612096.0,
-            "1837": 428612096.0,
-            "1838": 428612096.0,
-            "1839": 428612096.0,
-            "1840": 428612096.0,
-            "1841": 428612096.0,
-            "1842": 428612096.0,
-            "1843": 428612096.0,
-            "1844": 428612096.0,
-            "1845": 428612096.0,
-            "1846": 428612096.0,
-            "1847": 428612096.0,
-            "1848": 428612096.0,
-            "1849": 428612096.0,
-            "1850": 428612096.0,
-            "1851": 428612096.0,
-            "1852": 428612096.0,
-            "1853": 428612096.0,
-            "1854": 428612096.0,
-            "1855": 428612096.0,
-            "1856": 428612096.0,
-            "1857": 428612096.0,
-            "1858": 428612096.0,
-            "1859": 428612096.0,
-            "1860": 428612096.0,
-            "1861": 428612096.0,
-            "1862": 428612096.0,
-            "1863": 428612096.0,
-            "1864": 428612096.0,
-            "1865": 428612096.0,
-            "1866": 428612096.0,
-            "1867": 428612096.0,
-            "1868": 428612096.0,
-            "1869": 428612096.0,
-            "1870": 428612096.0,
-            "1871": 428612096.0,
-            "1872": 428612096.0,
-            "1873": 428612096.0,
-            "1874": 428612096.0,
-            "1875": 428612096.0,
-            "1876": 428612096.0,
-            "1877": 428612096.0,
-            "1878": 428612096.0,
-            "1879": 428612096.0,
-            "1880": 428612096.0,
-            "1881": 428612096.0,
-            "1882": 428612096.0,
-            "1883": 428612096.0,
-            "1884": 428612096.0,
-            "1885": 428612096.0,
-            "1886": 428612096.0,
-            "1887": 428612096.0,
-            "1888": 428612096.0,
-            "1889": 428612096.0,
-            "1890": 428612096.0,
-            "1891": 428612096.0,
-            "1892": 428612096.0,
-            "1893": 428612096.0,
-            "1894": 428612096.0,
-            "1895": 428612096.0,
-            "1896": 428612096.0,
-            "1897": 428612096.0,
-            "1898": 428612096.0,
-            "1899": 428612096.0,
-            "1900": 428612096.0,
-            "1901": 428612096.0,
-            "1902": 428612096.0,
-            "1903": 428612096.0,
-            "1904": 428612096.0,
-            "1905": 428612096.0,
-            "1906": 428612096.0,
-            "1907": 428612096.0,
-            "1908": 428612096.0,
-            "1909": 428612096.0,
-            "1910": 428612096.0,
-            "1911": 428612096.0,
-            "1912": 428612096.0,
-            "1913": 428612096.0,
-            "1914": 428612096.0,
-            "1915": 428612096.0,
-            "1916": 428612096.0,
-            "1917": 428612096.0,
-            "1918": 428612096.0,
-            "1919": 428612096.0,
-            "1920": 428612096.0,
-            "1921": 428612096.0,
-            "1922": 428612096.0,
-            "1923": 428612096.0,
-            "1924": 428612096.0,
-            "1925": 428612096.0,
-            "1926": 428612096.0,
-            "1927": 428612096.0,
-            "1928": 428612096.0,
-            "1929": 428612096.0,
-            "1930": 428612096.0,
-            "1931": 428612096.0,
-            "1932": 428612096.0,
-            "1933": 428612096.0,
-            "1934": 428612096.0,
-            "1935": 428612096.0,
-            "1936": 428612096.0,
-            "1937": 428612096.0,
-            "1938": 428612096.0,
-            "1939": 428612096.0,
-            "1940": 428612096.0,
-            "1941": 428612096.0,
-            "1942": 428612096.0,
-            "1943": 428612096.0,
-            "1944": 428612096.0,
-            "1945": 428612096.0,
-            "1946": 428612096.0,
-            "1947": 428612096.0,
-            "1948": 428612096.0,
-            "1949": 428612096.0,
-            "1950": 428612096.0,
-            "1951": 428612096.0,
-            "1952": 428612096.0,
-            "1953": 428612096.0,
-            "1954": 428612096.0,
-            "1955": 428612096.0,
-            "1956": 428612096.0,
-            "1957": 428612096.0,
-            "1958": 428612096.0,
-            "1959": 428612096.0,
-            "1960": 428612096.0,
-            "1961": 428612096.0,
-            "1962": 428612096.0,
-            "1963": 428612096.0,
-            "1964": 428612096.0,
-            "1965": 428612096.0,
-            "1966": 428612096.0,
-            "1967": 428612096.0,
-            "1968": 428612096.0,
-            "1969": 428612096.0,
-            "1970": 428612096.0,
-            "1971": 428612096.0,
-            "1972": 428612096.0,
-            "1973": 428612096.0,
-            "1974": 428612096.0,
-            "1975": 428612096.0,
-            "1976": 428612096.0,
-            "1977": 428612096.0,
-            "1978": 428612096.0,
-            "1979": 428612096.0,
-            "1980": 428612096.0,
-            "1981": 428612096.0,
-            "1982": 428612096.0,
-            "1983": 428612096.0,
-            "1984": 428612096.0,
-            "1985": 428612096.0,
-            "1986": 428612096.0,
-            "1987": 428612096.0,
-            "1988": 428612096.0,
-            "1989": 428612096.0,
-            "1990": 428612096.0,
-            "1991": 428612096.0,
-            "1992": 428612096.0,
-            "1993": 428612096.0,
-            "1994": 428612096.0,
-            "1995": 428612096.0,
-            "1996": 428612096.0,
-            "1997": 428612096.0,
-            "1998": 428612096.0,
-            "1999": 428612096.0,
-            "2000": 428612096.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 2000,
-        "step_interval": 1,
-        "values": {
-            "1": 21.22056,
-            "2": 4.78968,
-            "3": 4.56467,
-            "4": 4.57827,
-            "5": 4.57525,
-            "6": 4.59725,
-            "7": 4.87948,
-            "8": 5.13328,
-            "9": 4.57529,
-            "10": 5.23824,
-            "11": 5.22336,
-            "12": 4.61188,
-            "13": 4.56126,
-            "14": 4.57747,
-            "15": 4.55497,
-            "16": 4.9276,
-            "17": 4.59652,
-            "18": 4.58096,
-            "19": 4.56165,
-            "20": 4.57563,
-            "21": 4.60313,
-            "22": 4.60102,
-            "23": 4.59536,
-            "24": 4.59461,
-            "25": 4.60016,
-            "26": 4.56186,
-            "27": 4.58765,
-            "28": 4.56105,
-            "29": 4.56758,
-            "30": 4.55658,
-            "31": 4.57263,
-            "32": 4.58369,
-            "33": 4.57099,
-            "34": 4.60485,
-            "35": 4.59216,
-            "36": 4.56753,
-            "37": 4.58242,
-            "38": 4.61479,
-            "39": 4.56185,
-            "40": 4.61163,
-            "41": 4.61031,
-            "42": 4.56917,
-            "43": 4.85874,
-            "44": 4.60627,
-            "45": 4.55899,
-            "46": 4.54816,
-            "47": 4.87871,
-            "48": 4.53968,
-            "49": 4.55354,
-            "50": 5.23126,
-            "51": 4.62322,
-            "52": 4.88069,
-            "53": 5.2277,
-            "54": 4.5781,
-            "55": 4.58192,
-            "56": 4.57522,
-            "57": 4.56258,
-            "58": 4.91712,
-            "59": 4.542,
-            "60": 4.57591,
-            "61": 4.55089,
-            "62": 4.56723,
-            "63": 4.5445,
-            "64": 4.58913,
-            "65": 4.55399,
-            "66": 4.55185,
-            "67": 4.54204,
-            "68": 4.57449,
-            "69": 4.59327,
-            "70": 4.60409,
-            "71": 4.57257,
-            "72": 4.5546,
-            "73": 4.58247,
-            "74": 4.55273,
-            "75": 4.5701,
-            "76": 4.58044,
-            "77": 4.56922,
-            "78": 4.8419,
-            "79": 4.54291,
-            "80": 4.54658,
-            "81": 4.5898,
-            "82": 4.9042,
-            "83": 4.53841,
-            "84": 4.55358,
-            "85": 4.53215,
-            "86": 4.57397,
-            "87": 4.83241,
-            "88": 4.67328,
-            "89": 4.54054,
-            "90": 4.56321,
-            "91": 5.2034,
-            "92": 4.5421,
-            "93": 4.86183,
-            "94": 5.19159,
-            "95": 4.55013,
-            "96": 4.55073,
-            "97": 4.52684,
-            "98": 4.53117,
-            "99": 4.60583,
-            "100": 4.90899,
-            "101": 4.56159,
-            "102": 4.56655,
-            "103": 4.56412,
-            "104": 4.52743,
-            "105": 4.55527,
-            "106": 4.53627,
-            "107": 4.53531,
-            "108": 4.51945,
-            "109": 4.52471,
-            "110": 4.53208,
-            "111": 4.53655,
-            "112": 4.57417,
-            "113": 4.875,
-            "114": 4.57774,
-            "115": 4.574,
-            "116": 4.86924,
-            "117": 4.54318,
-            "118": 4.53599,
-            "119": 4.5677,
-            "120": 4.52261,
-            "121": 4.52576,
-            "122": 4.538,
-            "123": 4.57228,
-            "124": 4.52409,
-            "125": 4.58056,
-            "126": 4.54874,
-            "127": 4.5384,
-            "128": 4.53611,
-            "129": 4.57667,
-            "130": 4.595,
-            "131": 4.53198,
-            "132": 4.57644,
-            "133": 5.24568,
-            "134": 4.86112,
-            "135": 4.87831,
-            "136": 4.89277,
-            "137": 4.54359,
-            "138": 4.57983,
-            "139": 4.57779,
-            "140": 4.53983,
-            "141": 4.85645,
-            "142": 4.57672,
-            "143": 4.5485,
-            "144": 4.56989,
-            "145": 4.56789,
-            "146": 4.53908,
-            "147": 4.53984,
-            "148": 4.85133,
-            "149": 4.55397,
-            "150": 4.55435,
-            "151": 4.56831,
-            "152": 4.85443,
-            "153": 4.51943,
-            "154": 4.5199,
-            "155": 4.53495,
-            "156": 4.53745,
-            "157": 4.53504,
-            "158": 4.57382,
-            "159": 4.69924,
-            "160": 4.55989,
-            "161": 4.5323,
-            "162": 4.53464,
-            "163": 4.54868,
-            "164": 4.5542,
-            "165": 4.56798,
-            "166": 4.5189,
-            "167": 4.52985,
-            "168": 4.53416,
-            "169": 5.01955,
-            "170": 4.546,
-            "171": 4.53313,
-            "172": 4.54325,
-            "173": 4.54856,
-            "174": 4.86503,
-            "175": 4.90249,
-            "176": 4.88306,
-            "177": 4.86868,
-            "178": 4.88567,
-            "179": 4.56448,
-            "180": 4.54403,
-            "181": 4.56748,
-            "182": 4.56673,
-            "183": 5.17527,
-            "184": 4.56628,
-            "185": 4.54897,
-            "186": 4.54543,
-            "187": 4.85635,
-            "188": 4.57822,
-            "189": 4.56767,
-            "190": 4.56828,
-            "191": 4.56764,
-            "192": 4.5469,
-            "193": 4.54529,
-            "194": 4.58977,
-            "195": 4.55329,
-            "196": 4.56094,
-            "197": 4.56086,
-            "198": 4.55227,
-            "199": 4.54345,
-            "200": 4.54807,
-            "201": 4.57515,
-            "202": 4.55714,
-            "203": 4.59727,
-            "204": 4.60028,
-            "205": 4.54788,
-            "206": 4.54409,
-            "207": 4.58422,
-            "208": 4.54987,
-            "209": 4.54683,
-            "210": 4.54017,
-            "211": 4.55697,
-            "212": 4.5479,
-            "213": 4.54879,
-            "214": 4.56693,
-            "215": 4.55121,
-            "216": 4.90144,
-            "217": 4.87942,
-            "218": 5.19623,
-            "219": 4.8754,
-            "220": 4.90327,
-            "221": 4.54415,
-            "222": 4.86606,
-            "223": 4.55593,
-            "224": 4.56462,
-            "225": 4.86493,
-            "226": 4.54991,
-            "227": 4.54907,
-            "228": 4.5585,
-            "229": 4.56933,
-            "230": 4.5545,
-            "231": 4.57025,
-            "232": 4.57536,
-            "233": 4.65918,
-            "234": 4.54023,
-            "235": 4.54077,
-            "236": 4.53707,
-            "237": 4.56007,
-            "238": 4.53191,
-            "239": 4.53193,
-            "240": 4.52694,
-            "241": 4.53228,
-            "242": 4.54464,
-            "243": 4.53487,
-            "244": 4.54887,
-            "245": 4.54141,
-            "246": 4.54061,
-            "247": 4.55372,
-            "248": 4.5495,
-            "249": 4.54614,
-            "250": 4.55874,
-            "251": 4.55501,
-            "252": 4.52202,
-            "253": 4.83381,
-            "254": 4.52221,
-            "255": 4.54343,
-            "256": 4.5328,
-            "257": 4.87686,
-            "258": 5.20274,
-            "259": 4.87399,
-            "260": 4.85177,
-            "261": 4.8396,
-            "262": 4.54358,
-            "263": 4.55969,
-            "264": 4.54302,
-            "265": 4.51599,
-            "266": 4.90267,
-            "267": 4.50399,
-            "268": 4.52409,
-            "269": 4.51168,
-            "270": 4.56665,
-            "271": 4.5165,
-            "272": 4.54527,
-            "273": 4.5224,
-            "274": 4.50789,
-            "275": 4.51843,
-            "276": 4.55396,
-            "277": 4.68682,
-            "278": 4.59346,
-            "279": 4.52164,
-            "280": 4.51314,
-            "281": 4.57157,
-            "282": 4.53173,
-            "283": 4.52414,
-            "284": 4.54122,
-            "285": 4.50513,
-            "286": 4.54251,
-            "287": 4.52325,
-            "288": 4.8202,
-            "289": 4.54402,
-            "290": 4.54212,
-            "291": 4.54199,
-            "292": 4.83794,
-            "293": 4.5225,
-            "294": 4.5492,
-            "295": 4.55224,
-            "296": 4.52934,
-            "297": 4.55342,
-            "298": 4.527,
-            "299": 5.21225,
-            "300": 4.54165,
-            "301": 4.87397,
-            "302": 5.18659,
-            "303": 4.55648,
-            "304": 4.55524,
-            "305": 4.54325,
-            "306": 4.54438,
-            "307": 4.58253,
-            "308": 4.92833,
-            "309": 4.59251,
-            "310": 4.5821,
-            "311": 4.55774,
-            "312": 4.57431,
-            "313": 4.55084,
-            "314": 4.59062,
-            "315": 4.54907,
-            "316": 4.55862,
-            "317": 4.5489,
-            "318": 4.54711,
-            "319": 4.54682,
-            "320": 4.57022,
-            "321": 4.59627,
-            "322": 4.72073,
-            "323": 4.89865,
-            "324": 4.53993,
-            "325": 4.5123,
-            "326": 4.51891,
-            "327": 4.81823,
-            "328": 4.50968,
-            "329": 4.51468,
-            "330": 4.53951,
-            "331": 4.54148,
-            "332": 4.5291,
-            "333": 4.55404,
-            "334": 4.51485,
-            "335": 4.5032,
-            "336": 4.53373,
-            "337": 4.51911,
-            "338": 4.58056,
-            "339": 4.52496,
-            "340": 4.74174,
-            "341": 4.85663,
-            "342": 4.84314,
-            "343": 4.85203,
-            "344": 5.1805,
-            "345": 4.55155,
-            "346": 4.53162,
-            "347": 4.5514,
-            "348": 4.52034,
-            "349": 4.90193,
-            "350": 4.52785,
-            "351": 4.58518,
-            "352": 4.54098,
-            "353": 4.53658,
-            "354": 4.5276,
-            "355": 4.5448,
-            "356": 4.56068,
-            "357": 4.53811,
-            "358": 4.85349,
-            "359": 5.0071,
-            "360": 4.54478,
-            "361": 4.54662,
-            "362": 4.83386,
-            "363": 4.53683,
-            "364": 4.55873,
-            "365": 4.54295,
-            "366": 4.5527,
-            "367": 4.57221,
-            "368": 4.52994,
-            "369": 4.56328,
-            "370": 4.50104,
-            "371": 4.49538,
-            "372": 4.53341,
-            "373": 4.49759,
-            "374": 4.4864,
-            "375": 4.4619,
-            "376": 4.47479,
-            "377": 4.50517,
-            "378": 4.47307,
-            "379": 4.50476,
-            "380": 4.47289,
-            "381": 4.51196,
-            "382": 4.71053,
-            "383": 5.16485,
-            "384": 4.81108,
-            "385": 4.84771,
-            "386": 4.79613,
-            "387": 4.47457,
-            "388": 4.46947,
-            "389": 4.52273,
-            "390": 4.53286,
-            "391": 4.50346,
-            "392": 5.14535,
-            "393": 4.50389,
-            "394": 4.53016,
-            "395": 4.50021,
-            "396": 4.53705,
-            "397": 4.80682,
-            "398": 4.49952,
-            "399": 4.49925,
-            "400": 4.47559,
-            "401": 4.50069,
-            "402": 4.50094,
-            "403": 4.51516,
-            "404": 4.4721,
-            "405": 4.47294,
-            "406": 4.4807,
-            "407": 4.46501,
-            "408": 4.49087,
-            "409": 4.49551,
-            "410": 4.4794,
-            "411": 4.51172,
-            "412": 4.48452,
-            "413": 4.48615,
-            "414": 4.50418,
-            "415": 4.50396,
-            "416": 4.50236,
-            "417": 4.48682,
-            "418": 4.47848,
-            "419": 4.49983,
-            "420": 4.48292,
-            "421": 4.56162,
-            "422": 4.48272,
-            "423": 4.50903,
-            "424": 4.47801,
-            "425": 5.15596,
-            "426": 4.83658,
-            "427": 5.18502,
-            "428": 4.87429,
-            "429": 4.52647,
-            "430": 4.50029,
-            "431": 4.54017,
-            "432": 4.81481,
-            "433": 4.86253,
-            "434": 4.5674,
-            "435": 4.52618,
-            "436": 4.49598,
-            "437": 4.50665,
-            "438": 4.48345,
-            "439": 4.49364,
-            "440": 4.5239,
-            "441": 4.54859,
-            "442": 4.55192,
-            "443": 4.53011,
-            "444": 4.48967,
-            "445": 4.49372,
-            "446": 4.5115,
-            "447": 4.49859,
-            "448": 4.50783,
-            "449": 4.51437,
-            "450": 4.49155,
-            "451": 4.51937,
-            "452": 4.51303,
-            "453": 4.8589,
-            "454": 4.53113,
-            "455": 4.51885,
-            "456": 4.5337,
-            "457": 4.50659,
-            "458": 4.52467,
-            "459": 4.50448,
-            "460": 4.4869,
-            "461": 4.50155,
-            "462": 4.80969,
-            "463": 4.5086,
-            "464": 4.52649,
-            "465": 4.52586,
-            "466": 5.14521,
-            "467": 5.18051,
-            "468": 4.8195,
-            "469": 4.83632,
-            "470": 4.48919,
-            "471": 4.55133,
-            "472": 4.52357,
-            "473": 4.50052,
-            "474": 4.56796,
-            "475": 4.85478,
-            "476": 4.51452,
-            "477": 4.51577,
-            "478": 4.50012,
-            "479": 4.50792,
-            "480": 4.53196,
-            "481": 4.51072,
-            "482": 4.54367,
-            "483": 4.50481,
-            "484": 4.51019,
-            "485": 4.50978,
-            "486": 4.50593,
-            "487": 4.52281,
-            "488": 4.51369,
-            "489": 4.51522,
-            "490": 4.536,
-            "491": 4.53444,
-            "492": 4.49736,
-            "493": 4.49474,
-            "494": 4.49111,
-            "495": 4.53259,
-            "496": 4.52045,
-            "497": 4.85347,
-            "498": 4.50301,
-            "499": 4.49039,
-            "500": 4.49505,
-            "501": 4.56165,
-            "502": 5.00963,
-            "503": 4.5406,
-            "504": 4.52163,
-            "505": 4.49914,
-            "506": 4.50767,
-            "507": 4.50734,
-            "508": 5.15541,
-            "509": 4.84221,
-            "510": 4.82207,
-            "511": 4.83723,
-            "512": 4.50232,
-            "513": 4.49977,
-            "514": 4.51404,
-            "515": 4.54361,
-            "516": 4.84988,
-            "517": 4.52919,
-            "518": 4.49156,
-            "519": 4.49177,
-            "520": 4.48306,
-            "521": 4.56214,
-            "522": 4.51039,
-            "523": 4.50754,
-            "524": 4.49108,
-            "525": 4.49752,
-            "526": 4.48621,
-            "527": 4.48277,
-            "528": 4.56294,
-            "529": 4.53303,
-            "530": 4.50149,
-            "531": 4.64309,
-            "532": 4.99778,
-            "533": 4.54165,
-            "534": 4.55814,
-            "535": 4.57517,
-            "536": 4.56509,
-            "537": 4.83722,
-            "538": 4.5403,
-            "539": 4.53558,
-            "540": 4.54591,
-            "541": 4.5473,
-            "542": 4.59402,
-            "543": 4.56259,
-            "544": 4.55381,
-            "545": 4.55218,
-            "546": 4.575,
-            "547": 4.57976,
-            "548": 4.67481,
-            "549": 4.55614,
-            "550": 5.20963,
-            "551": 4.88226,
-            "552": 4.88151,
-            "553": 4.88123,
-            "554": 4.57477,
-            "555": 4.57857,
-            "556": 4.56328,
-            "557": 4.55102,
-            "558": 4.55397,
-            "559": 4.89639,
-            "560": 4.57629,
-            "561": 4.61768,
-            "562": 4.55422,
-            "563": 4.55386,
-            "564": 4.54607,
-            "565": 4.56658,
-            "566": 4.54047,
-            "567": 4.84235,
-            "568": 4.57283,
-            "569": 4.54067,
-            "570": 4.53113,
-            "571": 4.54789,
-            "572": 4.86169,
-            "573": 4.57633,
-            "574": 4.54359,
-            "575": 4.5329,
-            "576": 4.5149,
-            "577": 4.51393,
-            "578": 4.53247,
-            "579": 4.51699,
-            "580": 4.54064,
-            "581": 4.54344,
-            "582": 4.53211,
-            "583": 4.52674,
-            "584": 4.54736,
-            "585": 4.51362,
-            "586": 4.52725,
-            "587": 4.52932,
-            "588": 4.56472,
-            "589": 4.50971,
-            "590": 4.52116,
-            "591": 4.85468,
-            "592": 5.11597,
-            "593": 4.85832,
-            "594": 4.56057,
-            "595": 4.87958,
-            "596": 4.5357,
-            "597": 4.56374,
-            "598": 4.55772,
-            "599": 4.53657,
-            "600": 4.89182,
-            "601": 4.56825,
-            "602": 4.83295,
-            "603": 4.56139,
-            "604": 4.54849,
-            "605": 4.541,
-            "606": 4.55389,
-            "607": 4.85134,
-            "608": 4.52896,
-            "609": 4.54915,
-            "610": 4.51989,
-            "611": 4.52351,
-            "612": 4.51546,
-            "613": 4.58314,
-            "614": 4.52978,
-            "615": 4.52252,
-            "616": 4.52135,
-            "617": 4.52963,
-            "618": 4.55335,
-            "619": 4.54104,
-            "620": 4.5505,
-            "621": 4.55132,
-            "622": 4.57031,
-            "623": 4.54414,
-            "624": 4.55882,
-            "625": 4.5435,
-            "626": 4.59713,
-            "627": 4.58366,
-            "628": 4.63818,
-            "629": 4.58841,
-            "630": 4.63272,
-            "631": 4.55934,
-            "632": 4.55495,
-            "633": 5.51117,
-            "634": 4.60658,
-            "635": 5.10208,
-            "636": 4.89855,
-            "637": 4.87259,
-            "638": 4.56011,
-            "639": 4.58669,
-            "640": 4.59314,
-            "641": 4.90629,
-            "642": 4.86797,
-            "643": 4.56773,
-            "644": 4.5455,
-            "645": 4.56391,
-            "646": 4.59312,
-            "647": 4.60282,
-            "648": 4.58319,
-            "649": 4.57872,
-            "650": 4.56391,
-            "651": 4.57416,
-            "652": 4.61268,
-            "653": 4.62492,
-            "654": 4.5909,
-            "655": 4.58402,
-            "656": 4.58602,
-            "657": 4.60313,
-            "658": 4.58828,
-            "659": 4.58807,
-            "660": 4.60787,
-            "661": 4.5753,
-            "662": 4.58019,
-            "663": 4.57226,
-            "664": 4.58964,
-            "665": 4.58207,
-            "666": 4.60159,
-            "667": 4.59741,
-            "668": 4.58147,
-            "669": 4.58069,
-            "670": 4.57045,
-            "671": 4.56837,
-            "672": 4.93341,
-            "673": 4.61516,
-            "674": 4.83761,
-            "675": 4.89354,
-            "676": 4.87629,
-            "677": 5.51121,
-            "678": 4.59041,
-            "679": 4.62436,
-            "680": 4.62003,
-            "681": 4.57817,
-            "682": 4.59405,
-            "683": 4.89231,
-            "684": 4.58127,
-            "685": 4.58136,
-            "686": 4.6178,
-            "687": 4.58583,
-            "688": 4.57216,
-            "689": 4.57407,
-            "690": 4.58376,
-            "691": 4.57219,
-            "692": 4.56739,
-            "693": 4.57608,
-            "694": 4.56702,
-            "695": 4.56348,
-            "696": 4.54139,
-            "697": 4.58755,
-            "698": 4.56387,
-            "699": 4.60437,
-            "700": 4.56494,
-            "701": 4.56282,
-            "702": 4.58357,
-            "703": 4.55687,
-            "704": 4.5912,
-            "705": 4.57349,
-            "706": 4.86065,
-            "707": 4.52232,
-            "708": 4.5349,
-            "709": 4.54633,
-            "710": 4.56626,
-            "711": 4.62109,
-            "712": 4.81862,
-            "713": 4.99834,
-            "714": 4.63362,
-            "715": 4.78201,
-            "716": 4.75721,
-            "717": 5.50369,
-            "718": 4.92122,
-            "719": 4.59937,
-            "720": 4.86818,
-            "721": 4.53808,
-            "722": 4.56048,
-            "723": 4.60354,
-            "724": 4.57067,
-            "725": 4.88893,
-            "726": 4.55666,
-            "727": 4.59006,
-            "728": 4.60997,
-            "729": 4.65338,
-            "730": 4.80468,
-            "731": 4.69005,
-            "732": 4.57141,
-            "733": 4.54447,
-            "734": 4.59541,
-            "735": 4.6295,
-            "736": 4.62857,
-            "737": 4.56141,
-            "738": 4.60526,
-            "739": 4.67845,
-            "740": 4.54934,
-            "741": 4.85925,
-            "742": 4.61045,
-            "743": 4.54509,
-            "744": 4.5417,
-            "745": 4.60585,
-            "746": 4.54172,
-            "747": 4.53744,
-            "748": 4.84931,
-            "749": 4.5892,
-            "750": 4.52766,
-            "751": 4.54131,
-            "752": 4.56177,
-            "753": 4.55287,
-            "754": 4.55776,
-            "755": 4.55521,
-            "756": 4.59012,
-            "757": 4.55647,
-            "758": 5.48128,
-            "759": 4.57837,
-            "760": 4.91441,
-            "761": 4.90076,
-            "762": 4.59026,
-            "763": 4.57073,
-            "764": 4.55148,
-            "765": 4.56679,
-            "766": 4.56978,
-            "767": 4.86292,
-            "768": 4.56304,
-            "769": 4.55509,
-            "770": 4.53983,
-            "771": 4.5901,
-            "772": 4.53486,
-            "773": 4.54971,
-            "774": 4.58379,
-            "775": 4.63411,
-            "776": 4.85463,
-            "777": 4.55,
-            "778": 4.584,
-            "779": 4.57097,
-            "780": 4.56977,
-            "781": 4.56055,
-            "782": 4.86943,
-            "783": 4.55344,
-            "784": 4.56788,
-            "785": 4.55894,
-            "786": 4.55023,
-            "787": 4.57668,
-            "788": 4.58739,
-            "789": 4.63785,
-            "790": 4.56729,
-            "791": 4.59105,
-            "792": 4.57037,
-            "793": 4.58243,
-            "794": 4.55056,
-            "795": 4.64083,
-            "796": 4.54908,
-            "797": 4.54834,
-            "798": 4.72878,
-            "799": 4.54862,
-            "800": 5.47322,
-            "801": 4.8887,
-            "802": 4.55645,
-            "803": 4.90434,
-            "804": 4.61314,
-            "805": 4.56962,
-            "806": 4.64081,
-            "807": 4.74918,
-            "808": 4.8661,
-            "809": 4.55984,
-            "810": 4.55828,
-            "811": 4.85581,
-            "812": 4.56593,
-            "813": 4.5506,
-            "814": 4.57729,
-            "815": 4.55252,
-            "816": 4.55721,
-            "817": 4.89573,
-            "818": 4.58813,
-            "819": 4.68126,
-            "820": 4.61504,
-            "821": 4.55076,
-            "822": 4.54189,
-            "823": 4.54519,
-            "824": 4.57867,
-            "825": 4.55269,
-            "826": 4.54296,
-            "827": 4.59346,
-            "828": 4.53186,
-            "829": 4.56504,
-            "830": 4.57033,
-            "831": 4.54178,
-            "832": 4.53195,
-            "833": 4.60607,
-            "834": 4.76279,
-            "835": 4.55752,
-            "836": 4.57187,
-            "837": 4.58217,
-            "838": 4.55484,
-            "839": 4.56543,
-            "840": 4.59745,
-            "841": 4.83574,
-            "842": 4.92091,
-            "843": 4.92172,
-            "844": 4.9042,
-            "845": 4.57399,
-            "846": 4.889,
-            "847": 4.61845,
-            "848": 4.59369,
-            "849": 4.61435,
-            "850": 4.92414,
-            "851": 4.56591,
-            "852": 4.88411,
-            "853": 4.59331,
-            "854": 4.58638,
-            "855": 4.59002,
-            "856": 4.60474,
-            "857": 4.58045,
-            "858": 4.57173,
-            "859": 4.57779,
-            "860": 4.60325,
-            "861": 4.58594,
-            "862": 4.58387,
-            "863": 4.61673,
-            "864": 4.59357,
-            "865": 4.58832,
-            "866": 4.57357,
-            "867": 4.56396,
-            "868": 4.55615,
-            "869": 4.58059,
-            "870": 4.57037,
-            "871": 4.56045,
-            "872": 4.5579,
-            "873": 4.56177,
-            "874": 4.54991,
-            "875": 4.55612,
-            "876": 4.56772,
-            "877": 4.54847,
-            "878": 4.55797,
-            "879": 4.58609,
-            "880": 4.5668,
-            "881": 4.8572,
-            "882": 4.55078,
-            "883": 4.9048,
-            "884": 4.81251,
-            "885": 4.8704,
-            "886": 4.57176,
-            "887": 5.18956,
-            "888": 4.56611,
-            "889": 4.57898,
-            "890": 4.54561,
-            "891": 4.57561,
-            "892": 4.86909,
-            "893": 4.56114,
-            "894": 4.64111,
-            "895": 4.7172,
-            "896": 4.58195,
-            "897": 4.55853,
-            "898": 4.57999,
-            "899": 4.57271,
-            "900": 4.55689,
-            "901": 4.56071,
-            "902": 4.55969,
-            "903": 4.58742,
-            "904": 4.57897,
-            "905": 4.57615,
-            "906": 4.56052,
-            "907": 4.57969,
-            "908": 4.57514,
-            "909": 4.57347,
-            "910": 4.6403,
-            "911": 4.58085,
-            "912": 4.59019,
-            "913": 4.56174,
-            "914": 4.55326,
-            "915": 4.55122,
-            "916": 4.85676,
-            "917": 4.56117,
-            "918": 4.53147,
-            "919": 4.51951,
-            "920": 4.51786,
-            "921": 4.5123,
-            "922": 4.87202,
-            "923": 4.53188,
-            "924": 4.54718,
-            "925": 5.1327,
-            "926": 4.83738,
-            "927": 4.5116,
-            "928": 4.82358,
-            "929": 4.52217,
-            "930": 4.53602,
-            "931": 4.5222,
-            "932": 4.50785,
-            "933": 4.78839,
-            "934": 4.51037,
-            "935": 4.50571,
-            "936": 4.53008,
-            "937": 4.53119,
-            "938": 4.57094,
-            "939": 4.50057,
-            "940": 4.49686,
-            "941": 4.50023,
-            "942": 4.53408,
-            "943": 4.51402,
-            "944": 4.51029,
-            "945": 4.48295,
-            "946": 4.48585,
-            "947": 4.48622,
-            "948": 4.48897,
-            "949": 4.52493,
-            "950": 4.48801,
-            "951": 4.79563,
-            "952": 4.49634,
-            "953": 4.50008,
-            "954": 4.50374,
-            "955": 4.50796,
-            "956": 4.50904,
-            "957": 4.55644,
-            "958": 4.80519,
-            "959": 4.48239,
-            "960": 4.49321,
-            "961": 4.50884,
-            "962": 4.51604,
-            "963": 4.49195,
-            "964": 4.53886,
-            "965": 4.48318,
-            "966": 5.42059,
-            "967": 4.83285,
-            "968": 4.52861,
-            "969": 4.85443,
-            "970": 4.5398,
-            "971": 4.50783,
-            "972": 4.52549,
-            "973": 4.49966,
-            "974": 4.53063,
-            "975": 4.79465,
-            "976": 4.51485,
-            "977": 4.54359,
-            "978": 4.5118,
-            "979": 4.50073,
-            "980": 4.61527,
-            "981": 4.51921,
-            "982": 4.5757,
-            "983": 4.74004,
-            "984": 4.56253,
-            "985": 4.50668,
-            "986": 4.81892,
-            "987": 4.54489,
-            "988": 4.50902,
-            "989": 4.51935,
-            "990": 4.52644,
-            "991": 4.5118,
-            "992": 4.50383,
-            "993": 4.82809,
-            "994": 4.51305,
-            "995": 4.51586,
-            "996": 4.50978,
-            "997": 4.66763,
-            "998": 4.50393,
-            "999": 4.5013,
-            "1000": 4.50479,
-            "1001": 4.53989,
-            "1002": 4.53944,
-            "1003": 4.53096,
-            "1004": 4.5741,
-            "1005": 4.55744,
-            "1006": 4.53973,
-            "1007": 5.12192,
-            "1008": 4.89336,
-            "1009": 4.85944,
-            "1010": 4.85197,
-            "1011": 4.53252,
-            "1012": 4.55245,
-            "1013": 4.53538,
-            "1014": 4.53126,
-            "1015": 4.81645,
-            "1016": 4.52592,
-            "1017": 4.59553,
-            "1018": 4.55942,
-            "1019": 4.53005,
-            "1020": 4.81767,
-            "1021": 4.5239,
-            "1022": 4.53663,
-            "1023": 4.55687,
-            "1024": 4.59487,
-            "1025": 4.57336,
-            "1026": 4.83654,
-            "1027": 4.52791,
-            "1028": 4.53329,
-            "1029": 4.54183,
-            "1030": 4.58808,
-            "1031": 4.54852,
-            "1032": 4.52773,
-            "1033": 4.55293,
-            "1034": 4.51931,
-            "1035": 4.52822,
-            "1036": 4.53763,
-            "1037": 4.57365,
-            "1038": 4.58045,
-            "1039": 4.57909,
-            "1040": 4.50711,
-            "1041": 4.51625,
-            "1042": 4.51559,
-            "1043": 4.54161,
-            "1044": 4.55192,
-            "1045": 4.51031,
-            "1046": 4.5042,
-            "1047": 4.52325,
-            "1048": 4.52594,
-            "1049": 5.14376,
-            "1050": 4.86343,
-            "1051": 4.86535,
-            "1052": 4.82753,
-            "1053": 4.53656,
-            "1054": 4.50917,
-            "1055": 4.80499,
-            "1056": 4.52784,
-            "1057": 4.81105,
-            "1058": 4.5565,
-            "1059": 4.50803,
-            "1060": 4.50817,
-            "1061": 4.52123,
-            "1062": 4.81789,
-            "1063": 4.53784,
-            "1064": 4.58311,
-            "1065": 4.53258,
-            "1066": 4.52128,
-            "1067": 4.53853,
-            "1068": 4.54336,
-            "1069": 4.54099,
-            "1070": 4.55513,
-            "1071": 4.54195,
-            "1072": 4.51135,
-            "1073": 4.51495,
-            "1074": 4.52324,
-            "1075": 4.5088,
-            "1076": 4.5092,
-            "1077": 4.55059,
-            "1078": 4.51272,
-            "1079": 4.51824,
-            "1080": 4.53917,
-            "1081": 4.54138,
-            "1082": 4.53262,
-            "1083": 4.61829,
-            "1084": 4.53758,
-            "1085": 4.5203,
-            "1086": 4.52147,
-            "1087": 4.52902,
-            "1088": 4.51255,
-            "1089": 4.78979,
-            "1090": 4.53015,
-            "1091": 5.11537,
-            "1092": 4.85059,
-            "1093": 4.8711,
-            "1094": 4.82234,
-            "1095": 4.52762,
-            "1096": 4.8395,
-            "1097": 4.59169,
-            "1098": 4.55407,
-            "1099": 4.52875,
-            "1100": 4.79872,
-            "1101": 4.52404,
-            "1102": 4.51765,
-            "1103": 4.5654,
-            "1104": 4.52326,
-            "1105": 4.54706,
-            "1106": 4.53234,
-            "1107": 4.53309,
-            "1108": 4.51967,
-            "1109": 4.51838,
-            "1110": 4.55107,
-            "1111": 4.52222,
-            "1112": 4.55113,
-            "1113": 4.56308,
-            "1114": 4.49702,
-            "1115": 4.49636,
-            "1116": 4.53107,
-            "1117": 4.49276,
-            "1118": 4.48678,
-            "1119": 4.49686,
-            "1120": 4.50119,
-            "1121": 4.48849,
-            "1122": 4.49925,
-            "1123": 4.50554,
-            "1124": 4.80567,
-            "1125": 4.49247,
-            "1126": 4.49987,
-            "1127": 4.49209,
-            "1128": 4.51064,
-            "1129": 4.4767,
-            "1130": 4.50512,
-            "1131": 4.50193,
-            "1132": 4.79274,
-            "1133": 5.14623,
-            "1134": 4.82806,
-            "1135": 4.51165,
-            "1136": 4.81707,
-            "1137": 4.52401,
-            "1138": 4.50764,
-            "1139": 4.49637,
-            "1140": 4.49611,
-            "1141": 4.76644,
-            "1142": 4.53512,
-            "1143": 4.58313,
-            "1144": 4.51007,
-            "1145": 4.52133,
-            "1146": 4.49202,
-            "1147": 4.49538,
-            "1148": 4.55856,
-            "1149": 4.51403,
-            "1150": 4.51106,
-            "1151": 4.47726,
-            "1152": 4.47786,
-            "1153": 4.48409,
-            "1154": 4.47602,
-            "1155": 4.48021,
-            "1156": 4.516,
-            "1157": 4.50644,
-            "1158": 4.50542,
-            "1159": 4.48984,
-            "1160": 4.76736,
-            "1161": 4.51437,
-            "1162": 4.50623,
-            "1163": 4.53163,
-            "1164": 4.51829,
-            "1165": 4.48117,
-            "1166": 4.48756,
-            "1167": 4.79712,
-            "1168": 4.49918,
-            "1169": 4.51039,
-            "1170": 4.52474,
-            "1171": 4.49124,
-            "1172": 4.50861,
-            "1173": 4.50899,
-            "1174": 5.0975,
-            "1175": 4.84542,
-            "1176": 5.05949,
-            "1177": 4.77197,
-            "1178": 4.48805,
-            "1179": 4.48702,
-            "1180": 4.47154,
-            "1181": 4.51677,
-            "1182": 4.66185,
-            "1183": 4.81458,
-            "1184": 4.49672,
-            "1185": 4.50284,
-            "1186": 4.50258,
-            "1187": 4.56819,
-            "1188": 4.5649,
-            "1189": 4.56705,
-            "1190": 4.56761,
-            "1191": 4.56367,
-            "1192": 4.53549,
-            "1193": 4.55494,
-            "1194": 4.53864,
-            "1195": 4.88484,
-            "1196": 4.54179,
-            "1197": 4.53038,
-            "1198": 4.53441,
-            "1199": 4.53508,
-            "1200": 4.56789,
-            "1201": 4.52795,
-            "1202": 4.92807,
-            "1203": 4.63325,
-            "1204": 4.52655,
-            "1205": 4.48533,
-            "1206": 4.50758,
-            "1207": 4.51071,
-            "1208": 4.4939,
-            "1209": 4.56044,
-            "1210": 4.59127,
-            "1211": 4.47886,
-            "1212": 4.499,
-            "1213": 4.49879,
-            "1214": 4.55152,
-            "1215": 4.52825,
-            "1216": 4.54373,
-            "1217": 5.47567,
-            "1218": 4.88071,
-            "1219": 4.51663,
-            "1220": 4.76237,
-            "1221": 4.51878,
-            "1222": 4.52274,
-            "1223": 4.50864,
-            "1224": 4.76457,
-            "1225": 4.51752,
-            "1226": 4.51586,
-            "1227": 4.51873,
-            "1228": 4.50112,
-            "1229": 4.55205,
-            "1230": 4.92407,
-            "1231": 4.55821,
-            "1232": 4.65931,
-            "1233": 4.5836,
-            "1234": 4.51542,
-            "1235": 4.51646,
-            "1236": 4.52156,
-            "1237": 4.52979,
-            "1238": 4.80825,
-            "1239": 4.51516,
-            "1240": 4.49909,
-            "1241": 4.5094,
-            "1242": 4.49446,
-            "1243": 4.51811,
-            "1244": 4.54345,
-            "1245": 4.49312,
-            "1246": 4.50793,
-            "1247": 4.54113,
-            "1248": 4.54124,
-            "1249": 4.50663,
-            "1250": 4.53223,
-            "1251": 4.49348,
-            "1252": 4.50272,
-            "1253": 4.49872,
-            "1254": 4.48903,
-            "1255": 4.5059,
-            "1256": 4.53891,
-            "1257": 4.50381,
-            "1258": 4.84039,
-            "1259": 5.42849,
-            "1260": 4.4894,
-            "1261": 4.51592,
-            "1262": 4.79138,
-            "1263": 4.53403,
-            "1264": 4.49645,
-            "1265": 4.79246,
-            "1266": 4.53123,
-            "1267": 4.76558,
-            "1268": 4.49096,
-            "1269": 4.51422,
-            "1270": 4.49454,
-            "1271": 4.49647,
-            "1272": 4.4965,
-            "1273": 4.7854,
-            "1274": 4.50338,
-            "1275": 4.49635,
-            "1276": 4.52239,
-            "1277": 4.51395,
-            "1278": 4.48782,
-            "1279": 4.49345,
-            "1280": 4.50857,
-            "1281": 4.51077,
-            "1282": 4.50314,
-            "1283": 4.52063,
-            "1284": 4.47725,
-            "1285": 4.4848,
-            "1286": 4.48329,
-            "1287": 4.4843,
-            "1288": 4.52376,
-            "1289": 4.51595,
-            "1290": 4.49131,
-            "1291": 4.48591,
-            "1292": 4.53301,
-            "1293": 4.50804,
-            "1294": 4.51418,
-            "1295": 4.50564,
-            "1296": 4.51,
-            "1297": 4.49017,
-            "1298": 4.48123,
-            "1299": 4.78745,
-            "1300": 5.09308,
-            "1301": 5.19731,
-            "1302": 4.5072,
-            "1303": 4.79018,
-            "1304": 4.49191,
-            "1305": 4.50744,
-            "1306": 4.49976,
-            "1307": 4.53129,
-            "1308": 4.82794,
-            "1309": 4.80942,
-            "1310": 4.52638,
-            "1311": 4.50194,
-            "1312": 4.51392,
-            "1313": 4.507,
-            "1314": 4.51351,
-            "1315": 4.50215,
-            "1316": 4.52232,
-            "1317": 4.49203,
-            "1318": 4.50803,
-            "1319": 4.49945,
-            "1320": 4.4937,
-            "1321": 4.50061,
-            "1322": 4.50058,
-            "1323": 4.52518,
-            "1324": 4.48991,
-            "1325": 4.52836,
-            "1326": 4.49649,
-            "1327": 4.51725,
-            "1328": 4.49835,
-            "1329": 4.50652,
-            "1330": 4.4931,
-            "1331": 4.49715,
-            "1332": 4.53935,
-            "1333": 4.49583,
-            "1334": 4.50555,
-            "1335": 4.80249,
-            "1336": 4.50455,
-            "1337": 4.61233,
-            "1338": 4.56009,
-            "1339": 4.50773,
-            "1340": 4.51529,
-            "1341": 4.78988,
-            "1342": 5.09709,
-            "1343": 4.84036,
-            "1344": 4.81915,
-            "1345": 4.77903,
-            "1346": 4.50713,
-            "1347": 4.49424,
-            "1348": 4.47909,
-            "1349": 4.51758,
-            "1350": 4.75998,
-            "1351": 4.49857,
-            "1352": 4.48998,
-            "1353": 4.51725,
-            "1354": 4.4733,
-            "1355": 4.48507,
-            "1356": 4.48961,
-            "1357": 4.49503,
-            "1358": 4.49868,
-            "1359": 4.49706,
-            "1360": 4.48227,
-            "1361": 4.47901,
-            "1362": 4.51218,
-            "1363": 4.49623,
-            "1364": 4.49757,
-            "1365": 4.48799,
-            "1366": 4.49317,
-            "1367": 4.49821,
-            "1368": 4.47949,
-            "1369": 4.51536,
-            "1370": 4.78437,
-            "1371": 4.50724,
-            "1372": 4.50475,
-            "1373": 4.51186,
-            "1374": 4.48718,
-            "1375": 4.48133,
-            "1376": 4.48741,
-            "1377": 4.49718,
-            "1378": 4.77753,
-            "1379": 4.48595,
-            "1380": 4.49418,
-            "1381": 4.47646,
-            "1382": 4.49533,
-            "1383": 4.81464,
-            "1384": 5.13369,
-            "1385": 4.83696,
-            "1386": 4.52033,
-            "1387": 4.78613,
-            "1388": 4.52986,
-            "1389": 4.54648,
-            "1390": 4.53412,
-            "1391": 4.51688,
-            "1392": 4.78276,
-            "1393": 4.52177,
-            "1394": 4.5134,
-            "1395": 4.52332,
-            "1396": 4.51529,
-            "1397": 4.5206,
-            "1398": 4.49982,
-            "1399": 4.48649,
-            "1400": 4.51927,
-            "1401": 4.50537,
-            "1402": 4.50366,
-            "1403": 4.52004,
-            "1404": 4.50137,
-            "1405": 4.83295,
-            "1406": 4.50704,
-            "1407": 4.50168,
-            "1408": 4.49937,
-            "1409": 4.51412,
-            "1410": 4.49592,
-            "1411": 4.5116,
-            "1412": 4.49558,
-            "1413": 4.8534,
-            "1414": 4.4821,
-            "1415": 4.47675,
-            "1416": 4.48975,
-            "1417": 4.47807,
-            "1418": 4.48338,
-            "1419": 4.48394,
-            "1420": 4.52694,
-            "1421": 4.49253,
-            "1422": 4.4879,
-            "1423": 4.49837,
-            "1424": 4.5063,
-            "1425": 4.78556,
-            "1426": 4.85631,
-            "1427": 4.82421,
-            "1428": 4.47794,
-            "1429": 4.75974,
-            "1430": 4.46814,
-            "1431": 4.47496,
-            "1432": 4.50578,
-            "1433": 4.54124,
-            "1434": 4.68203,
-            "1435": 4.7615,
-            "1436": 4.49931,
-            "1437": 4.50576,
-            "1438": 4.4903,
-            "1439": 4.51667,
-            "1440": 4.80242,
-            "1441": 4.50208,
-            "1442": 4.49917,
-            "1443": 4.49553,
-            "1444": 4.48432,
-            "1445": 4.51846,
-            "1446": 4.51729,
-            "1447": 4.50458,
-            "1448": 4.79902,
-            "1449": 4.54828,
-            "1450": 4.53326,
-            "1451": 4.52762,
-            "1452": 4.51324,
-            "1453": 4.50314,
-            "1454": 4.49922,
-            "1455": 4.48208,
-            "1456": 4.51068,
-            "1457": 4.49618,
-            "1458": 4.53123,
-            "1459": 4.5121,
-            "1460": 4.48559,
-            "1461": 4.49931,
-            "1462": 4.4855,
-            "1463": 4.52885,
-            "1464": 4.52899,
-            "1465": 4.54494,
-            "1466": 4.80177,
-            "1467": 4.48918,
-            "1468": 5.45007,
-            "1469": 4.51261,
-            "1470": 4.49701,
-            "1471": 4.76693,
-            "1472": 4.50664,
-            "1473": 4.49173,
-            "1474": 4.49341,
-            "1475": 4.79014,
-            "1476": 4.82994,
-            "1477": 4.54107,
-            "1478": 4.5166,
-            "1479": 4.52387,
-            "1480": 4.51726,
-            "1481": 4.48811,
-            "1482": 4.51791,
-            "1483": 4.81223,
-            "1484": 4.52425,
-            "1485": 4.51937,
-            "1486": 4.50011,
-            "1487": 4.4935,
-            "1488": 4.49413,
-            "1489": 4.52479,
-            "1490": 4.65777,
-            "1491": 4.78196,
-            "1492": 4.58984,
-            "1493": 4.62063,
-            "1494": 4.76334,
-            "1495": 4.53611,
-            "1496": 4.51593,
-            "1497": 4.50296,
-            "1498": 4.48632,
-            "1499": 4.49184,
-            "1500": 4.47598,
-            "1501": 4.50278,
-            "1502": 4.52658,
-            "1503": 4.49717,
-            "1504": 4.48743,
-            "1505": 4.47751,
-            "1506": 4.46254,
-            "1507": 4.50576,
-            "1508": 4.79215,
-            "1509": 5.1298,
-            "1510": 5.14447,
-            "1511": 4.48231,
-            "1512": 4.76111,
-            "1513": 4.49323,
-            "1514": 4.49848,
-            "1515": 4.51943,
-            "1516": 4.53623,
-            "1517": 4.49155,
-            "1518": 5.07625,
-            "1519": 4.48993,
-            "1520": 4.51542,
-            "1521": 4.51184,
-            "1522": 4.5348,
-            "1523": 4.51863,
-            "1524": 4.50636,
-            "1525": 4.5512,
-            "1526": 4.47584,
-            "1527": 4.49135,
-            "1528": 4.48181,
-            "1529": 4.51841,
-            "1530": 4.47704,
-            "1531": 4.47666,
-            "1532": 4.48417,
-            "1533": 4.50135,
-            "1534": 4.49113,
-            "1535": 4.4903,
-            "1536": 4.54672,
-            "1537": 4.47579,
-            "1538": 4.68107,
-            "1539": 4.53459,
-            "1540": 4.53522,
-            "1541": 4.48779,
-            "1542": 4.56257,
-            "1543": 4.51593,
-            "1544": 4.47741,
-            "1545": 4.81106,
-            "1546": 4.49409,
-            "1547": 4.47682,
-            "1548": 4.48573,
-            "1549": 4.51707,
-            "1550": 4.76439,
-            "1551": 4.81182,
-            "1552": 5.15816,
-            "1553": 4.49107,
-            "1554": 4.82182,
-            "1555": 4.82508,
-            "1556": 4.4959,
-            "1557": 4.48175,
-            "1558": 4.53221,
-            "1559": 4.7732,
-            "1560": 4.48736,
-            "1561": 4.48108,
-            "1562": 4.50963,
-            "1563": 4.48468,
-            "1564": 4.51338,
-            "1565": 4.52361,
-            "1566": 4.4869,
-            "1567": 4.48016,
-            "1568": 4.5003,
-            "1569": 4.54972,
-            "1570": 4.54028,
-            "1571": 4.51344,
-            "1572": 4.47504,
-            "1573": 4.4946,
-            "1574": 4.45762,
-            "1575": 4.47682,
-            "1576": 4.50469,
-            "1577": 4.50845,
-            "1578": 4.52089,
-            "1579": 4.47973,
-            "1580": 4.77834,
-            "1581": 4.47945,
-            "1582": 4.50482,
-            "1583": 4.49396,
-            "1584": 4.58849,
-            "1585": 4.54033,
-            "1586": 4.45875,
-            "1587": 4.48628,
-            "1588": 4.45895,
-            "1589": 4.83456,
-            "1590": 4.46207,
-            "1591": 4.52471,
-            "1592": 4.73652,
-            "1593": 4.76004,
-            "1594": 5.06737,
-            "1595": 4.46474,
-            "1596": 4.52252,
-            "1597": 4.73076,
-            "1598": 4.47331,
-            "1599": 4.46552,
-            "1600": 4.46103,
-            "1601": 4.43222,
-            "1602": 4.81702,
-            "1603": 4.47152,
-            "1604": 4.49735,
-            "1605": 4.46295,
-            "1606": 4.45602,
-            "1607": 4.46123,
-            "1608": 4.48159,
-            "1609": 4.52748,
-            "1610": 4.45895,
-            "1611": 4.47017,
-            "1612": 4.46732,
-            "1613": 4.45711,
-            "1614": 4.50394,
-            "1615": 4.79288,
-            "1616": 4.48799,
-            "1617": 4.48805,
-            "1618": 4.48373,
-            "1619": 4.46019,
-            "1620": 4.458,
-            "1621": 4.49119,
-            "1622": 4.4622,
-            "1623": 4.49779,
-            "1624": 4.82632,
-            "1625": 4.46456,
-            "1626": 4.45349,
-            "1627": 4.46903,
-            "1628": 4.45444,
-            "1629": 4.50658,
-            "1630": 4.46656,
-            "1631": 4.49288,
-            "1632": 4.45426,
-            "1633": 4.74333,
-            "1634": 4.77593,
-            "1635": 4.72134,
-            "1636": 4.82871,
-            "1637": 4.46266,
-            "1638": 4.72327,
-            "1639": 4.4701,
-            "1640": 4.47145,
-            "1641": 4.4748,
-            "1642": 4.47522,
-            "1643": 4.48942,
-            "1644": 4.81608,
-            "1645": 4.56411,
-            "1646": 4.46852,
-            "1647": 4.45144,
-            "1648": 4.44782,
-            "1649": 4.46843,
-            "1650": 4.72628,
-            "1651": 4.46418,
-            "1652": 4.56303,
-            "1653": 4.46405,
-            "1654": 4.46575,
-            "1655": 4.45659,
-            "1656": 4.4637,
-            "1657": 4.44969,
-            "1658": 4.5272,
-            "1659": 4.76774,
-            "1660": 4.51903,
-            "1661": 4.47105,
-            "1662": 4.47176,
-            "1663": 4.50589,
-            "1664": 4.48865,
-            "1665": 4.49715,
-            "1666": 4.4899,
-            "1667": 4.47666,
-            "1668": 4.48227,
-            "1669": 4.45989,
-            "1670": 4.50702,
-            "1671": 4.50579,
-            "1672": 4.48617,
-            "1673": 4.48374,
-            "1674": 4.88749,
-            "1675": 4.53124,
-            "1676": 4.82875,
-            "1677": 5.11211,
-            "1678": 4.4824,
-            "1679": 4.46489,
-            "1680": 4.72824,
-            "1681": 4.44455,
-            "1682": 4.44521,
-            "1683": 4.50511,
-            "1684": 4.52462,
-            "1685": 5.05344,
-            "1686": 4.50202,
-            "1687": 4.50222,
-            "1688": 4.48202,
-            "1689": 4.50274,
-            "1690": 4.58034,
-            "1691": 4.52019,
-            "1692": 4.51829,
-            "1693": 4.52491,
-            "1694": 4.80665,
-            "1695": 4.49871,
-            "1696": 4.55171,
-            "1697": 4.54568,
-            "1698": 4.53401,
-            "1699": 4.52065,
-            "1700": 4.53028,
-            "1701": 4.51099,
-            "1702": 4.51043,
-            "1703": 4.54064,
-            "1704": 4.50476,
-            "1705": 4.53757,
-            "1706": 4.50687,
-            "1707": 4.52917,
-            "1708": 4.50914,
-            "1709": 4.51192,
-            "1710": 4.53233,
-            "1711": 4.51539,
-            "1712": 4.54344,
-            "1713": 4.50387,
-            "1714": 4.50093,
-            "1715": 4.5235,
-            "1716": 4.547,
-            "1717": 4.78253,
-            "1718": 4.84861,
-            "1719": 5.13393,
-            "1720": 4.82371,
-            "1721": 4.56264,
-            "1722": 4.7912,
-            "1723": 4.53434,
-            "1724": 4.51383,
-            "1725": 4.51745,
-            "1726": 4.51503,
-            "1727": 4.78105,
-            "1728": 4.5049,
-            "1729": 4.81742,
-            "1730": 4.51852,
-            "1731": 4.50465,
-            "1732": 4.51032,
-            "1733": 4.50824,
-            "1734": 4.52799,
-            "1735": 4.52971,
-            "1736": 4.67047,
-            "1737": 4.51169,
-            "1738": 4.54626,
-            "1739": 4.4991,
-            "1740": 4.54459,
-            "1741": 4.51866,
-            "1742": 4.51089,
-            "1743": 4.5517,
-            "1744": 4.50154,
-            "1745": 4.49657,
-            "1746": 4.51039,
-            "1747": 4.51814,
-            "1748": 4.51162,
-            "1749": 4.54078,
-            "1750": 4.5286,
-            "1751": 4.49906,
-            "1752": 4.51819,
-            "1753": 4.51643,
-            "1754": 4.51641,
-            "1755": 4.83513,
-            "1756": 4.53651,
-            "1757": 4.5112,
-            "1758": 4.80289,
-            "1759": 4.52106,
-            "1760": 4.82885,
-            "1761": 5.15166,
-            "1762": 4.52054,
-            "1763": 4.5833,
-            "1764": 5.09544,
-            "1765": 4.56971,
-            "1766": 4.55542,
-            "1767": 4.5115,
-            "1768": 4.51587,
-            "1769": 4.52438,
-            "1770": 4.81177,
-            "1771": 4.50989,
-            "1772": 4.5309,
-            "1773": 4.52021,
-            "1774": 4.49842,
-            "1775": 4.50526,
-            "1776": 4.55406,
-            "1777": 4.52076,
-            "1778": 4.53065,
-            "1779": 4.53475,
-            "1780": 4.52734,
-            "1781": 4.52703,
-            "1782": 4.51493,
-            "1783": 4.53339,
-            "1784": 4.51323,
-            "1785": 4.50418,
-            "1786": 4.49476,
-            "1787": 4.48915,
-            "1788": 4.49798,
-            "1789": 4.53926,
-            "1790": 4.82287,
-            "1791": 4.511,
-            "1792": 4.52785,
-            "1793": 4.5042,
-            "1794": 4.55488,
-            "1795": 4.49636,
-            "1796": 4.55361,
-            "1797": 4.50035,
-            "1798": 4.50463,
-            "1799": 4.80629,
-            "1800": 4.76451,
-            "1801": 4.7811,
-            "1802": 4.50451,
-            "1803": 5.14058,
-            "1804": 4.49133,
-            "1805": 4.50893,
-            "1806": 4.79161,
-            "1807": 4.492,
-            "1808": 4.49417,
-            "1809": 4.59019,
-            "1810": 4.49855,
-            "1811": 4.78647,
-            "1812": 4.50409,
-            "1813": 4.49288,
-            "1814": 4.48663,
-            "1815": 4.50095,
-            "1816": 4.54354,
-            "1817": 4.4797,
-            "1818": 4.50764,
-            "1819": 4.4909,
-            "1820": 4.49098,
-            "1821": 4.51642,
-            "1822": 4.52037,
-            "1823": 4.51796,
-            "1824": 4.49643,
-            "1825": 4.81129,
-            "1826": 4.50147,
-            "1827": 4.48206,
-            "1828": 4.50247,
-            "1829": 4.55092,
-            "1830": 4.49476,
-            "1831": 4.50771,
-            "1832": 4.47732,
-            "1833": 4.49479,
-            "1834": 4.79649,
-            "1835": 4.51406,
-            "1836": 4.55842,
-            "1837": 4.49301,
-            "1838": 4.49664,
-            "1839": 4.5168,
-            "1840": 4.49948,
-            "1841": 4.79267,
-            "1842": 4.53236,
-            "1843": 4.84098,
-            "1844": 4.78263,
-            "1845": 4.85183,
-            "1846": 4.49722,
-            "1847": 4.79476,
-            "1848": 4.53305,
-            "1849": 4.54665,
-            "1850": 4.91685,
-            "1851": 4.46529,
-            "1852": 4.47106,
-            "1853": 4.77875,
-            "1854": 4.48207,
-            "1855": 4.51165,
-            "1856": 4.49524,
-            "1857": 4.46874,
-            "1858": 4.45437,
-            "1859": 4.44845,
-            "1860": 4.75892,
-            "1861": 4.51639,
-            "1862": 4.47708,
-            "1863": 4.45485,
-            "1864": 4.44782,
-            "1865": 4.46459,
-            "1866": 4.48118,
-            "1867": 4.47965,
-            "1868": 4.47919,
-            "1869": 4.4981,
-            "1870": 4.8003,
-            "1871": 4.49359,
-            "1872": 4.48845,
-            "1873": 4.46674,
-            "1874": 4.46083,
-            "1875": 4.47825,
-            "1876": 4.48788,
-            "1877": 4.45934,
-            "1878": 4.48809,
-            "1879": 4.47563,
-            "1880": 4.47383,
-            "1881": 4.46738,
-            "1882": 4.45491,
-            "1883": 4.46549,
-            "1884": 4.76267,
-            "1885": 4.75205,
-            "1886": 4.81063,
-            "1887": 4.74625,
-            "1888": 4.44847,
-            "1889": 4.47403,
-            "1890": 4.71322,
-            "1891": 4.4603,
-            "1892": 4.45059,
-            "1893": 4.46762,
-            "1894": 4.7614,
-            "1895": 4.81143,
-            "1896": 4.56486,
-            "1897": 4.51076,
-            "1898": 4.55453,
-            "1899": 4.56422,
-            "1900": 4.52593,
-            "1901": 4.5606,
-            "1902": 4.556,
-            "1903": 4.5267,
-            "1904": 4.80038,
-            "1905": 4.51421,
-            "1906": 4.47822,
-            "1907": 4.51268,
-            "1908": 4.54876,
-            "1909": 4.5133,
-            "1910": 4.54333,
-            "1911": 4.52076,
-            "1912": 4.52543,
-            "1913": 4.53635,
-            "1914": 4.56098,
-            "1915": 4.53749,
-            "1916": 4.57784,
-            "1917": 4.49577,
-            "1918": 4.49598,
-            "1919": 4.48283,
-            "1920": 4.48472,
-            "1921": 4.49521,
-            "1922": 4.49251,
-            "1923": 4.55018,
-            "1924": 4.48927,
-            "1925": 4.75945,
-            "1926": 4.74909,
-            "1927": 4.48481,
-            "1928": 4.48601,
-            "1929": 5.1519,
-            "1930": 4.84125,
-            "1931": 4.52286,
-            "1932": 4.7705,
-            "1933": 4.52208,
-            "1934": 4.51802,
-            "1935": 4.51894,
-            "1936": 4.56633,
-            "1937": 4.77322,
-            "1938": 4.49797,
-            "1939": 4.79704,
-            "1940": 4.50555,
-            "1941": 4.574,
-            "1942": 4.4979,
-            "1943": 4.52975,
-            "1944": 4.49502,
-            "1945": 4.51058,
-            "1946": 4.47523,
-            "1947": 4.49119,
-            "1948": 4.54103,
-            "1949": 4.52674,
-            "1950": 4.489,
-            "1951": 4.52304,
-            "1952": 4.49817,
-            "1953": 4.48508,
-            "1954": 4.52291,
-            "1955": 4.4895,
-            "1956": 4.52246,
-            "1957": 4.49799,
-            "1958": 4.46888,
-            "1959": 4.48647,
-            "1960": 4.5478,
-            "1961": 4.53057,
-            "1962": 4.45794,
-            "1963": 4.51956,
-            "1964": 4.47966,
-            "1965": 4.77637,
-            "1966": 4.76216,
-            "1967": 4.53151,
-            "1968": 4.74621,
-            "1969": 4.49406,
-            "1970": 5.04969,
-            "1971": 4.47351,
-            "1972": 4.48777,
-            "1973": 4.76041,
-            "1974": 4.50108,
-            "1975": 4.80161,
-            "1976": 4.52792,
-            "1977": 4.49709,
-            "1978": 4.48775,
-            "1979": 4.82512,
-            "1980": 4.51375,
-            "1981": 4.51276,
-            "1982": 4.49916,
-            "1983": 4.55064,
-            "1984": 4.50342,
-            "1985": 4.52358,
-            "1986": 4.51504,
-            "1987": 4.53255,
-            "1988": 4.50157,
-            "1989": 4.5554,
-            "1990": 4.52494,
-            "1991": 4.50518,
-            "1992": 4.52992,
-            "1993": 4.51871,
-            "1994": 4.49974,
-            "1995": 4.51349,
-            "1996": 4.51178,
-            "1997": 4.50014,
-            "1998": 4.51785,
-            "1999": 4.51632,
-            "2000": 4.83288
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch/golden_values_dev_dgx_h100.json
index 8776674df82..5decbad6a1a 100644
--- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch/golden_values_dev_dgx_h100.json
@@ -34,7 +34,7 @@
             1278,
             2362
         ],
-        "latency": 0.290968656539917,
+        "latency": 0.2756917476654053,
         "cuda_graph_request_count_map": {
             "372": 0,
             "360": 0,
@@ -174,14 +174,5 @@
             -0.7878209352493286
         ]
     },
-    "throughput": [
-        2.3393335747358535,
-        102.34586197079994,
-        103.58898028807208,
-        104.45258510126983,
-        103.72620640365217,
-        104.56994550823111,
-        105.82297004422847,
-        102.44643771631509
-    ]
+    "throughput": 105.62266013491053
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 0e953af50e7..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,178 +0,0 @@
-{
-    "0": {
-        "input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.",
-        "generated_text": " And then you get to the end of the movie, and you realize that this is not New York at all. This is New York at the end",
-        "generated_tokens": [
-            3060,
-            2430,
-            1636,
-            2012,
-            1317,
-            1278,
-            2362,
-            1307,
-            1278,
-            16070,
-            1044,
-            1321,
-            1636,
-            23067,
-            1455,
-            1593,
-            1395,
-            1605,
-            3140,
-            5152,
-            1513,
-            1747,
-            1046,
-            2409,
-            1395,
-            3140,
-            5152,
-            1513,
-            1278,
-            2362
-        ],
-        "latency": 0.2859375476837158,
-        "cuda_graph_request_count_map": {
-            "372": 0,
-            "360": 0,
-            "336": 0,
-            "312": 0,
-            "288": 0,
-            "264": 0,
-            "240": 0,
-            "216": 0,
-            "192": 0,
-            "168": 0,
-            "144": 0,
-            "120": 0,
-            "96": 0,
-            "72": 0,
-            "48": 0,
-            "24": 29
-        },
-        "step_count": 240,
-        "logprobs": [
-            -9.362494468688965,
-            -2.827894449234009,
-            -4.557381629943848,
-            -1.4968647956848145,
-            -0.717312216758728,
-            -1.7262351512908936,
-            -2.522736072540283,
-            -2.1782360076904297,
-            -2.3603432178497314,
-            -6.136383533477783,
-            -1.4676916599273682,
-            -3.468963384628296,
-            -4.424870491027832,
-            -3.7345848083496094,
-            -2.012619972229004,
-            -1.8833301067352295,
-            -3.5708768367767334,
-            -6.8197832107543945,
-            -0.3122292757034302,
-            -0.9820290207862854,
-            -6.532033443450928,
-            -7.498172760009766,
-            -12.615165710449219,
-            -2.409003496170044,
-            -3.8550546169281006,
-            -0.5105050802230835,
-            -4.2802581787109375,
-            -0.06971167027950287,
-            -0.054025799036026,
-            -3.319596767425537,
-            -9.703240394592285,
-            -1.0997297763824463,
-            -6.224854469299316,
-            -5.234503269195557,
-            -3.934987783432007,
-            -2.5263679027557373,
-            -3.1843955516815186,
-            -5.880871295928955,
-            -1.8436813354492188,
-            -5.906496047973633,
-            -12.15787410736084,
-            -12.5841064453125,
-            -0.0819428563117981,
-            -2.6212656497955322,
-            -1.4329369068145752,
-            -2.885145425796509,
-            -1.2901865243911743,
-            -0.006647023372352123,
-            -3.5115818977355957,
-            -12.945953369140625,
-            -3.793078899383545,
-            -3.0094375610351562,
-            -5.966838836669922,
-            -0.8998424410820007,
-            -0.040962252765893936,
-            -1.5467679500579834,
-            -1.0785343647003174,
-            -5.73494815826416,
-            -0.38491737842559814,
-            -5.017007827758789,
-            -0.5568072199821472,
-            -0.5968841910362244,
-            -2.3609962463378906,
-            -13.582086563110352,
-            -0.09050048142671585,
-            -3.7264108657836914,
-            -1.1208789348602295,
-            -6.052675247192383,
-            -0.5848909616470337,
-            -3.5906238555908203,
-            -0.9494907855987549,
-            -1.5676641464233398,
-            -5.127577781677246,
-            -17.19189453125,
-            -6.698403835296631,
-            -1.0449178218841553,
-            -4.365664958953857,
-            -1.1243419647216797,
-            -2.2092156410217285,
-            -1.8081634044647217,
-            -0.23330983519554138,
-            -9.439546585083008,
-            -0.2947109341621399,
-            -7.253565788269043,
-            -2.3855936527252197,
-            -4.629369258880615,
-            -3.4186267852783203,
-            -1.9727531671524048,
-            -2.331681251525879,
-            -1.5606917142868042,
-            -2.454296588897705,
-            -1.5334703922271729,
-            -1.2631131410598755,
-            -2.657367706298828,
-            -0.6480202078819275,
-            -0.4550393521785736,
-            -1.3625166416168213,
-            -0.8142069578170776,
-            -0.4496593475341797,
-            -0.9312890768051147,
-            -1.732723355293274,
-            -0.44613128900527954,
-            -1.6895122528076172,
-            -0.6082233190536499,
-            -1.0978344678878784,
-            -1.1122435331344604,
-            -0.002520838286727667,
-            -1.4072327613830566,
-            -0.007462364621460438,
-            -0.7548662424087524,
-            -0.9937503337860107,
-            -0.0675487294793129,
-            -0.9595617055892944,
-            -0.029961343854665756,
-            -2.205785036087036,
-            -1.2615025043487549,
-            -0.7878209352493286
-        ]
-    },
-    "throughput": 103.54772132523577
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index 5decbad6a1a..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,178 +0,0 @@
-{
-    "0": {
-        "input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.",
-        "generated_text": " And then you get to the end of the movie, and you realize that this is not New York at all. This is New York at the end",
-        "generated_tokens": [
-            3060,
-            2430,
-            1636,
-            2012,
-            1317,
-            1278,
-            2362,
-            1307,
-            1278,
-            16070,
-            1044,
-            1321,
-            1636,
-            23067,
-            1455,
-            1593,
-            1395,
-            1605,
-            3140,
-            5152,
-            1513,
-            1747,
-            1046,
-            2409,
-            1395,
-            3140,
-            5152,
-            1513,
-            1278,
-            2362
-        ],
-        "latency": 0.2756917476654053,
-        "cuda_graph_request_count_map": {
-            "372": 0,
-            "360": 0,
-            "336": 0,
-            "312": 0,
-            "288": 0,
-            "264": 0,
-            "240": 0,
-            "216": 0,
-            "192": 0,
-            "168": 0,
-            "144": 0,
-            "120": 0,
-            "96": 0,
-            "72": 0,
-            "48": 0,
-            "24": 29
-        },
-        "step_count": 240,
-        "logprobs": [
-            -9.362494468688965,
-            -2.827894449234009,
-            -4.557381629943848,
-            -1.4968647956848145,
-            -0.717312216758728,
-            -1.7262351512908936,
-            -2.522736072540283,
-            -2.1782360076904297,
-            -2.3603432178497314,
-            -6.136383533477783,
-            -1.4676916599273682,
-            -3.468963384628296,
-            -4.424870491027832,
-            -3.7345848083496094,
-            -2.012619972229004,
-            -1.8833301067352295,
-            -3.5708768367767334,
-            -6.8197832107543945,
-            -0.3122292757034302,
-            -0.9820290207862854,
-            -6.532033443450928,
-            -7.498172760009766,
-            -12.615165710449219,
-            -2.409003496170044,
-            -3.8550546169281006,
-            -0.5105050802230835,
-            -4.2802581787109375,
-            -0.06971167027950287,
-            -0.054025799036026,
-            -3.319596767425537,
-            -9.703240394592285,
-            -1.0997297763824463,
-            -6.224854469299316,
-            -5.234503269195557,
-            -3.934987783432007,
-            -2.5263679027557373,
-            -3.1843955516815186,
-            -5.880871295928955,
-            -1.8436813354492188,
-            -5.906496047973633,
-            -12.15787410736084,
-            -12.5841064453125,
-            -0.0819428563117981,
-            -2.6212656497955322,
-            -1.4329369068145752,
-            -2.885145425796509,
-            -1.2901865243911743,
-            -0.006647023372352123,
-            -3.5115818977355957,
-            -12.945953369140625,
-            -3.793078899383545,
-            -3.0094375610351562,
-            -5.966838836669922,
-            -0.8998424410820007,
-            -0.040962252765893936,
-            -1.5467679500579834,
-            -1.0785343647003174,
-            -5.73494815826416,
-            -0.38491737842559814,
-            -5.017007827758789,
-            -0.5568072199821472,
-            -0.5968841910362244,
-            -2.3609962463378906,
-            -13.582086563110352,
-            -0.09050048142671585,
-            -3.7264108657836914,
-            -1.1208789348602295,
-            -6.052675247192383,
-            -0.5848909616470337,
-            -3.5906238555908203,
-            -0.9494907855987549,
-            -1.5676641464233398,
-            -5.127577781677246,
-            -17.19189453125,
-            -6.698403835296631,
-            -1.0449178218841553,
-            -4.365664958953857,
-            -1.1243419647216797,
-            -2.2092156410217285,
-            -1.8081634044647217,
-            -0.23330983519554138,
-            -9.439546585083008,
-            -0.2947109341621399,
-            -7.253565788269043,
-            -2.3855936527252197,
-            -4.629369258880615,
-            -3.4186267852783203,
-            -1.9727531671524048,
-            -2.331681251525879,
-            -1.5606917142868042,
-            -2.454296588897705,
-            -1.5334703922271729,
-            -1.2631131410598755,
-            -2.657367706298828,
-            -0.6480202078819275,
-            -0.4550393521785736,
-            -1.3625166416168213,
-            -0.8142069578170776,
-            -0.4496593475341797,
-            -0.9312890768051147,
-            -1.732723355293274,
-            -0.44613128900527954,
-            -1.6895122528076172,
-            -0.6082233190536499,
-            -1.0978344678878784,
-            -1.1122435331344604,
-            -0.002520838286727667,
-            -1.4072327613830566,
-            -0.007462364621460438,
-            -0.7548662424087524,
-            -0.9937503337860107,
-            -0.0675487294793129,
-            -0.9595617055892944,
-            -0.029961343854665756,
-            -2.205785036087036,
-            -1.2615025043487549,
-            -0.7878209352493286
-        ]
-    },
-    "throughput": 105.62266013491053
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_logitsmatch_decode_graphs_only/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_logitsmatch_decode_graphs_only/golden_values_dev_dgx_h100.json
index 409d960170e..20da149d1f1 100644
--- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_logitsmatch_decode_graphs_only/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_logitsmatch_decode_graphs_only/golden_values_dev_dgx_h100.json
@@ -1,159 +1,178 @@
 {
-  "0": {
-    "input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.",
-    "generated_text": " And then you get to the end of the movie, and you realize that this is not New York at all. This is New York at the end",
-    "generated_tokens": [
-      3060,
-      2430,
-      1636,
-      2012,
-      1317,
-      1278,
-      2362,
-      1307,
-      1278,
-      16070,
-      1044,
-      1321,
-      1636,
-      23067,
-      1455,
-      1593,
-      1395,
-      1605,
-      3140,
-      5152,
-      1513,
-      1747,
-      1046,
-      2409,
-      1395,
-      3140,
-      5152,
-      1513,
-      1278,
-      2362
-    ],
-    "latency": 2.491823673248291,
-    "logprobs": [
-      -9.362494468688965,
-      -2.827894449234009,
-      -4.557381629943848,
-      -1.4968647956848145,
-      -0.717312216758728,
-      -1.7262351512908936,
-      -2.522736072540283,
-      -2.1782360076904297,
-      -2.3603432178497314,
-      -6.136383533477783,
-      -1.4676916599273682,
-      -3.468963384628296,
-      -4.424870491027832,
-      -3.7345848083496094,
-      -2.012619972229004,
-      -1.8833301067352295,
-      -3.5708768367767334,
-      -6.8197832107543945,
-      -0.3122292757034302,
-      -0.9820290207862854,
-      -6.532033443450928,
-      -7.498172760009766,
-      -12.615165710449219,
-      -2.409003496170044,
-      -3.8550546169281006,
-      -0.5105050802230835,
-      -4.2802581787109375,
-      -0.06971167027950287,
-      -0.054025799036026,
-      -3.319596767425537,
-      -9.703240394592285,
-      -1.0997297763824463,
-      -6.224854469299316,
-      -5.234503269195557,
-      -3.934987783432007,
-      -2.5263679027557373,
-      -3.1843955516815186,
-      -5.880871295928955,
-      -1.8436813354492188,
-      -5.906496047973633,
-      -12.15787410736084,
-      -12.5841064453125,
-      -0.0819428563117981,
-      -2.6212656497955322,
-      -1.4329369068145752,
-      -2.885145425796509,
-      -1.2901865243911743,
-      -0.006647023372352123,
-      -3.5115818977355957,
-      -12.945953369140625,
-      -3.793078899383545,
-      -3.0094375610351562,
-      -5.966838836669922,
-      -0.8998424410820007,
-      -0.040962252765893936,
-      -1.5467679500579834,
-      -1.0785343647003174,
-      -5.73494815826416,
-      -0.38491737842559814,
-      -5.017007827758789,
-      -0.5568072199821472,
-      -0.5968841910362244,
-      -2.3609962463378906,
-      -13.582086563110352,
-      -0.09050048142671585,
-      -3.7264108657836914,
-      -1.1208789348602295,
-      -6.052675247192383,
-      -0.5848909616470337,
-      -3.5906238555908203,
-      -0.9494907855987549,
-      -1.5676641464233398,
-      -5.127577781677246,
-      -17.19189453125,
-      -6.698403835296631,
-      -1.0449178218841553,
-      -4.365664958953857,
-      -1.1243419647216797,
-      -2.2092156410217285,
-      -1.8081634044647217,
-      -0.23330983519554138,
-      -9.439546585083008,
-      -0.2947109341621399,
-      -7.253565788269043,
-      -2.3855936527252197,
-      -4.629369258880615,
-      -3.4186267852783203,
-      -1.9727531671524048,
-      -2.331681251525879,
-      -1.5606917142868042,
-      -2.454296588897705,
-      -1.5334703922271729,
-      -1.2631131410598755,
-      -2.657367706298828,
-      -0.6480202078819275,
-      -0.4550393521785736,
-      -1.3625166416168213,
-      -0.8142069578170776,
-      -0.4496593475341797,
-      -0.9312890768051147,
-      -1.732723355293274,
-      -0.44613128900527954,
-      -1.6895122528076172,
-      -0.6082233190536499,
-      -1.0978344678878784,
-      -1.1122435331344604,
-      -0.002520838286727667,
-      -1.4072327613830566,
-      -0.007462364621460438,
-      -0.7548662424087524,
-      -0.9937503337860107,
-      -0.0675487294793129,
-      -0.9595617055892944,
-      -0.029961343854665756,
-      -2.205785036087036,
-      -1.2615025043487549,
-      -0.7878209352493286
-    ]
-  },
-  "throughput": 87.33960790550654
+    "0": {
+        "input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.",
+        "generated_text": " And then you get to the end of the movie, and you realize that this is not New York at all. This is New York at the end",
+        "generated_tokens": [
+            3060,
+            2430,
+            1636,
+            2012,
+            1317,
+            1278,
+            2362,
+            1307,
+            1278,
+            16070,
+            1044,
+            1321,
+            1636,
+            23067,
+            1455,
+            1593,
+            1395,
+            1605,
+            3140,
+            5152,
+            1513,
+            1747,
+            1046,
+            2409,
+            1395,
+            3140,
+            5152,
+            1513,
+            1278,
+            2362
+        ],
+        "latency": 0.3700687885284424,
+        "cuda_graph_request_count_map": {
+            "372": 0,
+            "360": 0,
+            "336": 0,
+            "312": 0,
+            "288": 0,
+            "264": 0,
+            "240": 0,
+            "216": 0,
+            "192": 0,
+            "168": 0,
+            "144": 0,
+            "120": 0,
+            "96": 0,
+            "72": 0,
+            "48": 0,
+            "24": 29
+        },
+        "step_count": 240,
+        "logprobs": [
+            -9.362494468688965,
+            -2.827894449234009,
+            -4.557381629943848,
+            -1.4968647956848145,
+            -0.717312216758728,
+            -1.7262351512908936,
+            -2.522736072540283,
+            -2.1782360076904297,
+            -2.3603432178497314,
+            -6.136383533477783,
+            -1.4676916599273682,
+            -3.468963384628296,
+            -4.424870491027832,
+            -3.7345848083496094,
+            -2.012619972229004,
+            -1.8833301067352295,
+            -3.5708768367767334,
+            -6.8197832107543945,
+            -0.3122292757034302,
+            -0.9820290207862854,
+            -6.532033443450928,
+            -7.498172760009766,
+            -12.615165710449219,
+            -2.409003496170044,
+            -3.8550546169281006,
+            -0.5105050802230835,
+            -4.2802581787109375,
+            -0.06971167027950287,
+            -0.054025799036026,
+            -3.319596767425537,
+            -9.703240394592285,
+            -1.0997297763824463,
+            -6.224854469299316,
+            -5.234503269195557,
+            -3.934987783432007,
+            -2.5263679027557373,
+            -3.1843955516815186,
+            -5.880871295928955,
+            -1.8436813354492188,
+            -5.906496047973633,
+            -12.15787410736084,
+            -12.5841064453125,
+            -0.0819428563117981,
+            -2.6212656497955322,
+            -1.4329369068145752,
+            -2.885145425796509,
+            -1.2901865243911743,
+            -0.006647023372352123,
+            -3.5115818977355957,
+            -12.945953369140625,
+            -3.793078899383545,
+            -3.0094375610351562,
+            -5.966838836669922,
+            -0.8998424410820007,
+            -0.040962252765893936,
+            -1.5467679500579834,
+            -1.0785343647003174,
+            -5.73494815826416,
+            -0.38491737842559814,
+            -5.017007827758789,
+            -0.5568072199821472,
+            -0.5968841910362244,
+            -2.3609962463378906,
+            -13.582086563110352,
+            -0.09050048142671585,
+            -3.7264108657836914,
+            -1.1208789348602295,
+            -6.052675247192383,
+            -0.5848909616470337,
+            -3.5906238555908203,
+            -0.9494907855987549,
+            -1.5676641464233398,
+            -5.127577781677246,
+            -17.19189453125,
+            -6.698403835296631,
+            -1.0449178218841553,
+            -4.365664958953857,
+            -1.1243419647216797,
+            -2.2092156410217285,
+            -1.8081634044647217,
+            -0.23330983519554138,
+            -9.439546585083008,
+            -0.2947109341621399,
+            -7.253565788269043,
+            -2.3855936527252197,
+            -4.629369258880615,
+            -3.4186267852783203,
+            -1.9727531671524048,
+            -2.331681251525879,
+            -1.5606917142868042,
+            -2.454296588897705,
+            -1.5334703922271729,
+            -1.2631131410598755,
+            -2.657367706298828,
+            -0.6480202078819275,
+            -0.4550393521785736,
+            -1.3625166416168213,
+            -0.8142069578170776,
+            -0.4496593475341797,
+            -0.9312890768051147,
+            -1.732723355293274,
+            -0.44613128900527954,
+            -1.6895122528076172,
+            -0.6082233190536499,
+            -1.0978344678878784,
+            -1.1122435331344604,
+            -0.002520838286727667,
+            -1.4072327613830566,
+            -0.007462364621460438,
+            -0.7548662424087524,
+            -0.9937503337860107,
+            -0.0675487294793129,
+            -0.9595617055892944,
+            -0.029961343854665756,
+            -2.205785036087036,
+            -1.2615025043487549,
+            -0.7878209352493286
+        ]
+    },
+    "throughput": 79.31454807788677
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_logitsmatch_decode_graphs_only/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_logitsmatch_decode_graphs_only/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 771d0c18307..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_logitsmatch_decode_graphs_only/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,178 +0,0 @@
-{
-    "0": {
-        "input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.",
-        "generated_text": " And then you get to the end of the movie, and you realize that this is not New York at all. This is New York at the end",
-        "generated_tokens": [
-            3060,
-            2430,
-            1636,
-            2012,
-            1317,
-            1278,
-            2362,
-            1307,
-            1278,
-            16070,
-            1044,
-            1321,
-            1636,
-            23067,
-            1455,
-            1593,
-            1395,
-            1605,
-            3140,
-            5152,
-            1513,
-            1747,
-            1046,
-            2409,
-            1395,
-            3140,
-            5152,
-            1513,
-            1278,
-            2362
-        ],
-        "latency": 0.3733036518096924,
-        "cuda_graph_request_count_map": {
-            "372": 0,
-            "360": 0,
-            "336": 0,
-            "312": 0,
-            "288": 0,
-            "264": 0,
-            "240": 0,
-            "216": 0,
-            "192": 0,
-            "168": 0,
-            "144": 0,
-            "120": 0,
-            "96": 0,
-            "72": 0,
-            "48": 0,
-            "24": 29
-        },
-        "step_count": 240,
-        "logprobs": [
-            -9.362494468688965,
-            -2.827894449234009,
-            -4.557381629943848,
-            -1.4968647956848145,
-            -0.717312216758728,
-            -1.7262351512908936,
-            -2.522736072540283,
-            -2.1782360076904297,
-            -2.3603432178497314,
-            -6.136383533477783,
-            -1.4676916599273682,
-            -3.468963384628296,
-            -4.424870491027832,
-            -3.7345848083496094,
-            -2.012619972229004,
-            -1.8833301067352295,
-            -3.5708768367767334,
-            -6.8197832107543945,
-            -0.3122292757034302,
-            -0.9820290207862854,
-            -6.532033443450928,
-            -7.498172760009766,
-            -12.615165710449219,
-            -2.409003496170044,
-            -3.8550546169281006,
-            -0.5105050802230835,
-            -4.2802581787109375,
-            -0.06971167027950287,
-            -0.054025799036026,
-            -3.319596767425537,
-            -9.703240394592285,
-            -1.0997297763824463,
-            -6.224854469299316,
-            -5.234503269195557,
-            -3.934987783432007,
-            -2.5263679027557373,
-            -3.1843955516815186,
-            -5.880871295928955,
-            -1.8436813354492188,
-            -5.906496047973633,
-            -12.15787410736084,
-            -12.5841064453125,
-            -0.0819428563117981,
-            -2.6212656497955322,
-            -1.4329369068145752,
-            -2.885145425796509,
-            -1.2901865243911743,
-            -0.006647023372352123,
-            -3.5115818977355957,
-            -12.945953369140625,
-            -3.793078899383545,
-            -3.0094375610351562,
-            -5.966838836669922,
-            -0.8998424410820007,
-            -0.040962252765893936,
-            -1.5467679500579834,
-            -1.0785343647003174,
-            -5.73494815826416,
-            -0.38491737842559814,
-            -5.017007827758789,
-            -0.5568072199821472,
-            -0.5968841910362244,
-            -2.3609962463378906,
-            -13.582086563110352,
-            -0.09050048142671585,
-            -3.7264108657836914,
-            -1.1208789348602295,
-            -6.052675247192383,
-            -0.5848909616470337,
-            -3.5906238555908203,
-            -0.9494907855987549,
-            -1.5676641464233398,
-            -5.127577781677246,
-            -17.19189453125,
-            -6.698403835296631,
-            -1.0449178218841553,
-            -4.365664958953857,
-            -1.1243419647216797,
-            -2.2092156410217285,
-            -1.8081634044647217,
-            -0.23330983519554138,
-            -9.439546585083008,
-            -0.2947109341621399,
-            -7.253565788269043,
-            -2.3855936527252197,
-            -4.629369258880615,
-            -3.4186267852783203,
-            -1.9727531671524048,
-            -2.331681251525879,
-            -1.5606917142868042,
-            -2.454296588897705,
-            -1.5334703922271729,
-            -1.2631131410598755,
-            -2.657367706298828,
-            -0.6480202078819275,
-            -0.4550393521785736,
-            -1.3625166416168213,
-            -0.8142069578170776,
-            -0.4496593475341797,
-            -0.9312890768051147,
-            -1.732723355293274,
-            -0.44613128900527954,
-            -1.6895122528076172,
-            -0.6082233190536499,
-            -1.0978344678878784,
-            -1.1122435331344604,
-            -0.002520838286727667,
-            -1.4072327613830566,
-            -0.007462364621460438,
-            -0.7548662424087524,
-            -0.9937503337860107,
-            -0.0675487294793129,
-            -0.9595617055892944,
-            -0.029961343854665756,
-            -2.205785036087036,
-            -1.2615025043487549,
-            -0.7878209352493286
-        ]
-    },
-    "throughput": 79.54665108975186
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_logitsmatch_decode_graphs_only/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_logitsmatch_decode_graphs_only/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index 20da149d1f1..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_logitsmatch_decode_graphs_only/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,178 +0,0 @@
-{
-    "0": {
-        "input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.",
-        "generated_text": " And then you get to the end of the movie, and you realize that this is not New York at all. This is New York at the end",
-        "generated_tokens": [
-            3060,
-            2430,
-            1636,
-            2012,
-            1317,
-            1278,
-            2362,
-            1307,
-            1278,
-            16070,
-            1044,
-            1321,
-            1636,
-            23067,
-            1455,
-            1593,
-            1395,
-            1605,
-            3140,
-            5152,
-            1513,
-            1747,
-            1046,
-            2409,
-            1395,
-            3140,
-            5152,
-            1513,
-            1278,
-            2362
-        ],
-        "latency": 0.3700687885284424,
-        "cuda_graph_request_count_map": {
-            "372": 0,
-            "360": 0,
-            "336": 0,
-            "312": 0,
-            "288": 0,
-            "264": 0,
-            "240": 0,
-            "216": 0,
-            "192": 0,
-            "168": 0,
-            "144": 0,
-            "120": 0,
-            "96": 0,
-            "72": 0,
-            "48": 0,
-            "24": 29
-        },
-        "step_count": 240,
-        "logprobs": [
-            -9.362494468688965,
-            -2.827894449234009,
-            -4.557381629943848,
-            -1.4968647956848145,
-            -0.717312216758728,
-            -1.7262351512908936,
-            -2.522736072540283,
-            -2.1782360076904297,
-            -2.3603432178497314,
-            -6.136383533477783,
-            -1.4676916599273682,
-            -3.468963384628296,
-            -4.424870491027832,
-            -3.7345848083496094,
-            -2.012619972229004,
-            -1.8833301067352295,
-            -3.5708768367767334,
-            -6.8197832107543945,
-            -0.3122292757034302,
-            -0.9820290207862854,
-            -6.532033443450928,
-            -7.498172760009766,
-            -12.615165710449219,
-            -2.409003496170044,
-            -3.8550546169281006,
-            -0.5105050802230835,
-            -4.2802581787109375,
-            -0.06971167027950287,
-            -0.054025799036026,
-            -3.319596767425537,
-            -9.703240394592285,
-            -1.0997297763824463,
-            -6.224854469299316,
-            -5.234503269195557,
-            -3.934987783432007,
-            -2.5263679027557373,
-            -3.1843955516815186,
-            -5.880871295928955,
-            -1.8436813354492188,
-            -5.906496047973633,
-            -12.15787410736084,
-            -12.5841064453125,
-            -0.0819428563117981,
-            -2.6212656497955322,
-            -1.4329369068145752,
-            -2.885145425796509,
-            -1.2901865243911743,
-            -0.006647023372352123,
-            -3.5115818977355957,
-            -12.945953369140625,
-            -3.793078899383545,
-            -3.0094375610351562,
-            -5.966838836669922,
-            -0.8998424410820007,
-            -0.040962252765893936,
-            -1.5467679500579834,
-            -1.0785343647003174,
-            -5.73494815826416,
-            -0.38491737842559814,
-            -5.017007827758789,
-            -0.5568072199821472,
-            -0.5968841910362244,
-            -2.3609962463378906,
-            -13.582086563110352,
-            -0.09050048142671585,
-            -3.7264108657836914,
-            -1.1208789348602295,
-            -6.052675247192383,
-            -0.5848909616470337,
-            -3.5906238555908203,
-            -0.9494907855987549,
-            -1.5676641464233398,
-            -5.127577781677246,
-            -17.19189453125,
-            -6.698403835296631,
-            -1.0449178218841553,
-            -4.365664958953857,
-            -1.1243419647216797,
-            -2.2092156410217285,
-            -1.8081634044647217,
-            -0.23330983519554138,
-            -9.439546585083008,
-            -0.2947109341621399,
-            -7.253565788269043,
-            -2.3855936527252197,
-            -4.629369258880615,
-            -3.4186267852783203,
-            -1.9727531671524048,
-            -2.331681251525879,
-            -1.5606917142868042,
-            -2.454296588897705,
-            -1.5334703922271729,
-            -1.2631131410598755,
-            -2.657367706298828,
-            -0.6480202078819275,
-            -0.4550393521785736,
-            -1.3625166416168213,
-            -0.8142069578170776,
-            -0.4496593475341797,
-            -0.9312890768051147,
-            -1.732723355293274,
-            -0.44613128900527954,
-            -1.6895122528076172,
-            -0.6082233190536499,
-            -1.0978344678878784,
-            -1.1122435331344604,
-            -0.002520838286727667,
-            -1.4072327613830566,
-            -0.007462364621460438,
-            -0.7548662424087524,
-            -0.9937503337860107,
-            -0.0675487294793129,
-            -0.9595617055892944,
-            -0.029961343854665756,
-            -2.205785036087036,
-            -1.2615025043487549,
-            -0.7878209352493286
-        ]
-    },
-    "throughput": 79.31454807788677
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_logitsmatch/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_logitsmatch/golden_values_dev_dgx_h100.json
index 31b66789d94..ad16c16b924 100644
--- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_logitsmatch/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_logitsmatch/golden_values_dev_dgx_h100.json
@@ -132,7 +132,7 @@
             1784,
             1010
         ],
-        "latency": 9.833553552627563,
+        "latency": 10.056535482406616,
         "cuda_graph_request_count_map": null,
         "step_count": 6144,
         "logprobs": [
@@ -1132,7 +1132,7 @@
             9369,
             1046
         ],
-        "latency": 49.05716586112976,
+        "latency": 48.19877076148987,
         "cuda_graph_request_count_map": null,
         "step_count": 6144,
         "logprobs": [
@@ -1403,7 +1403,7 @@
             1049,
             1048
         ],
-        "latency": 87.92628955841064,
+        "latency": 86.85381531715393,
         "cuda_graph_request_count_map": null,
         "step_count": 6144,
         "logprobs": [
@@ -2225,7 +2225,7 @@
             5558,
             4318
         ],
-        "latency": 126.90091466903687,
+        "latency": 125.58511328697205,
         "cuda_graph_request_count_map": null,
         "step_count": 6144,
         "logprobs": [
@@ -2502,7 +2502,7 @@
             1855,
             2505
         ],
-        "latency": 156.19056010246277,
+        "latency": 154.75680470466614,
         "cuda_graph_request_count_map": null,
         "step_count": 6144,
         "logprobs": [
@@ -2695,9 +2695,5 @@
             -0.00032085992279462516
         ]
     },
-    "throughput": [
-        92.14086318169623,
-        104.14077061259405,
-        104.70701879377005
-    ]
+    "throughput": 105.49771806099545
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_logitsmatch/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_logitsmatch/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index a76d4f44413..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_logitsmatch/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,2699 +0,0 @@
-{
-    "0": {
-        "input_prompt": "The $500 Cup of coffee?\nConsider this, most Americans spend an average of $1,500-2,000 a year on this bean water.\nI have a few question for you: \nHow has business been the past few months?\nDo you ever feel like your business is stuck?\nDon't feel like you're able to improve performance and make changes required to achieve success ?\nAre your customers spneding less and less and wanting more?\nHave the gas prices affected your business?\nDo you have employees and do they hate you or wish they could quit?\n\nNow, before you and I can decide wheter or not I will be a good fit for your business we should talk this over with coffee.\nAnd, just to warn you this isn't some casual thing. This is not a date or time to be personal or social (but by all means share what you will coz I'll gladly listen).\nTher eare two major talking points and stratagies we will focios on in our lil coffee social\nFor one, we will find your unique selling Proposition (USP).\nDo have the best price? Are you the cheapest in town? Are your customers jerks? Do you haVE REGULARS? Why do people come back?\nwe'll also look for the holes in your business bucket. I'm willing to bet there's a hole or two in your business we'll find together that'll make this 500 dollar cup of Joe pay for itse;f immedietly.\nMany find themselves to be more profitable by just finding out where the dollars are escaping in their business and I like to think of myself as a guy that comes along with some spakel or putty and patch those holes up for you.\nBeleive me, just fixing one hole can mean a lot...just think about a sinking boat that has a hole in it that's about 3\u201d in diameter... it doesn't take long to sink.\nI have no agenda, besides f=getting to know your business and seeing wher I can patch the holes and find what makes you do darn unique (I know this won't take long.)\nMany folks, I bet, will find what they need to get off their chest with a quick phone call and they just paypal me the money and make a coffee at home. Look, that's fine too.\nI just to get you ot of your comfort zone, because this is where it all starts my frind.\nSome smart GOAT entrepreneur will probably get everything they need just out of our lil mini consulatant for the more extensive business consukting I offer, and look, that's fine, too.\nMaybe this cup of coffee will be all you need to gtet back on your feet, but not only surive, but thrive!\nI'm not trying to make frineds, or make a bunch of money, or look down your shirt\nBut this is only going to be a 45 minute (max) coffee chat\nAnd, I know you (and me) both have a lot on our plates. So no messing around\nAfter our consultation I will follow up with you in a few days and see how things are going, then I will be emailing you about once or twice every two weeks, just to make sure you're staying on task and implementing what we went over.\nTghere is no obligation to go any further and will gladly give you back your money if this pre-consultation doesn't put you on the right path or you don't get any value out of it...",
-        "generated_text": " $ is a$ is a $ is a $ is a $ is a $ is a $$1, you\n$ $$ $\n$ $- $\n$\n$\n$ $$$\n$\n$\n$\n$\n$\n$\n$\n$???????, $\n-1\n$\n1.5.\n$\n$, you\n$.\n$\n1,1.1\nI\n$.\nI\n\n\nHow\n$,,,0,1,0,0.0\nIn\nThe\nThe\nThe\n",
-        "generated_tokens": [
-            1659,
-            1395,
-            1261,
-            1036,
-            1395,
-            1261,
-            1659,
-            1395,
-            1261,
-            1659,
-            1395,
-            1261,
-            1659,
-            1395,
-            1261,
-            1659,
-            1395,
-            1261,
-            1659,
-            1036,
-            1049,
-            1044,
-            1636,
-            1010,
-            1036,
-            1659,
-            1036,
-            1659,
-            1010,
-            1036,
-            1659,
-            1045,
-            1659,
-            1010,
-            1036,
-            1010,
-            1036,
-            1010,
-            1036,
-            1659,
-            1036,
-            1036,
-            1010,
-            1036,
-            1010,
-            1036,
-            1010,
-            1036,
-            1010,
-            1036,
-            1010,
-            1036,
-            1010,
-            1036,
-            1010,
-            1036,
-            1010,
-            1036,
-            1063,
-            1063,
-            1063,
-            1063,
-            1063,
-            1063,
-            1063,
-            1044,
-            1659,
-            1010,
-            1045,
-            1049,
-            1010,
-            1036,
-            1010,
-            1049,
-            1046,
-            1053,
-            1046,
-            1010,
-            1036,
-            1010,
-            1036,
-            1044,
-            1636,
-            1010,
-            1036,
-            1046,
-            1010,
-            1036,
-            1010,
-            1049,
-            1044,
-            1049,
-            1046,
-            1049,
-            1010,
-            1073,
-            1010,
-            1036,
-            1046,
-            1010,
-            1073,
-            1010,
-            1010,
-            1010,
-            7801,
-            1010,
-            1036,
-            1044,
-            1044,
-            1044,
-            1048,
-            1044,
-            1049,
-            1044,
-            1048,
-            1044,
-            1048,
-            1046,
-            1048,
-            1010,
-            1785,
-            1010,
-            1784,
-            1010,
-            1784,
-            1010,
-            1784,
-            1010
-        ],
-        "latency": 9.77891230583191,
-        "cuda_graph_request_count_map": null,
-        "step_count": 6144,
-        "logprobs": [
-            -7.7319135665893555,
-            -2.188307285308838,
-            -0.7547445297241211,
-            -0.7294313311576843,
-            -10.238386154174805,
-            -3.3775341510772705,
-            -6.394498825073242,
-            -7.354557037353516,
-            -9.018157958984375,
-            -3.012073040008545,
-            -3.2584073543548584,
-            -5.220732688903809,
-            -4.620487213134766,
-            -2.5078930854797363,
-            -3.752683162689209,
-            -0.13360372185707092,
-            -0.05705544352531433,
-            -0.41462242603302,
-            -1.585279941558838,
-            -1.6438164710998535,
-            -1.9557222127914429,
-            -0.3989897072315216,
-            -0.0365302674472332,
-            -6.368816375732422,
-            -0.8731719255447388,
-            -0.022585075348615646,
-            -0.2775891423225403,
-            -0.0027362785767763853,
-            -0.0006812873762100935,
-            -1.581446647644043,
-            -0.008688976056873798,
-            -0.3532317280769348,
-            -6.071163177490234,
-            -9.162371635437012,
-            -9.965556144714355,
-            -2.400461196899414,
-            -2.9898362159729004,
-            -2.9803032875061035,
-            -2.12601900100708,
-            -3.500912666320801,
-            -7.015069007873535,
-            -2.278961420059204,
-            -0.46380555629730225,
-            -4.078739166259766,
-            -1.9430254697799683,
-            -3.5642244815826416,
-            -3.689701795578003,
-            -6.201474189758301,
-            -6.580833911895752,
-            -2.3081111907958984,
-            -5.42717170715332,
-            -1.1886008977890015,
-            -1.172760248184204,
-            -1.3571951389312744,
-            -1.3551844358444214,
-            -3.376784324645996,
-            -0.05118789151310921,
-            -4.064360618591309,
-            -2.575554847717285,
-            -0.6994737386703491,
-            -2.56724214553833,
-            -2.1888976097106934,
-            -0.4816131591796875,
-            -4.070178985595703,
-            -2.0060782432556152,
-            -6.858033180236816,
-            -0.059200502932071686,
-            -3.214278221130371,
-            -0.9671833515167236,
-            -0.823198676109314,
-            -1.0130078792572021,
-            -4.595561981201172,
-            -0.012724989093840122,
-            -5.214311599731445,
-            -8.246870040893555,
-            -3.1476030349731445,
-            -3.299684524536133,
-            -4.218191146850586,
-            -7.318399429321289,
-            -0.8580498695373535,
-            -3.0894036293029785,
-            -1.886361002922058,
-            -7.217658996582031,
-            -3.271679639816284,
-            -3.9717154502868652,
-            -1.8835484981536865,
-            -10.034332275390625,
-            -11.382490158081055,
-            -5.417011260986328,
-            -7.505967140197754,
-            -2.33837890625,
-            -0.07904055714607239,
-            -3.294971227645874,
-            -7.813640594482422,
-            -1.7646901607513428,
-            -4.025320053100586,
-            -3.5977325439453125,
-            -4.390352249145508,
-            -9.147806167602539,
-            -0.5303041934967041,
-            -7.721246242523193,
-            -0.6311959028244019,
-            -0.8119025230407715,
-            -0.7227814197540283,
-            -1.8369406461715698,
-            -0.20933297276496887,
-            -1.5395950078964233,
-            -4.424448490142822,
-            -4.084965705871582,
-            -3.355497360229492,
-            -1.0475609302520752,
-            -6.479413986206055,
-            -0.7810530662536621,
-            -2.132437229156494,
-            -6.648703098297119,
-            -2.9522438049316406,
-            -1.2485712766647339,
-            -4.040503025054932,
-            -2.3415768146514893,
-            -5.358206748962402,
-            -1.6258506774902344,
-            -3.956300973892212,
-            -0.732298731803894,
-            -7.441117286682129,
-            -1.5242161750793457,
-            -2.4555861949920654,
-            -4.295163154602051,
-            -9.687600135803223,
-            -0.8213484883308411,
-            -1.2446978092193604,
-            -0.01942702941596508,
-            -4.619411468505859,
-            -3.3297007083892822,
-            -2.2139487266540527,
-            -3.691431999206543,
-            -2.6574106216430664,
-            -6.075929641723633,
-            -0.6123450994491577,
-            -1.2942559719085693,
-            -0.6262839436531067,
-            -7.398006439208984,
-            -4.4869890213012695,
-            -4.202048301696777,
-            -4.982994079589844,
-            -0.637227475643158,
-            -3.061023235321045,
-            -10.117584228515625,
-            -3.8567495346069336,
-            -4.0480828285217285,
-            -2.472019672393799,
-            -4.246374607086182,
-            -1.3939155340194702,
-            -7.132441520690918,
-            -0.20108745992183685,
-            -4.986658573150635,
-            -4.387957572937012,
-            -0.01108358334749937,
-            -4.209756851196289,
-            -7.271108627319336,
-            -4.047314643859863,
-            -2.6497321128845215,
-            -1.4763175249099731,
-            -0.28365400433540344,
-            -3.5247769355773926,
-            -1.4226995706558228,
-            -4.327237129211426,
-            -2.0407187938690186,
-            -6.1437907218933105,
-            -1.5190880298614502,
-            -2.5511486530303955,
-            -7.504094123840332,
-            -2.152172565460205,
-            -6.708334922790527,
-            -6.913146495819092,
-            -3.6959621906280518,
-            -6.752341270446777,
-            -0.63083815574646,
-            -0.12433214485645294,
-            -5.0525641441345215,
-            -4.435934066772461,
-            -0.45601028203964233,
-            -6.3459577560424805,
-            -9.882917404174805,
-            -3.1422882080078125,
-            -2.550520658493042,
-            -3.2099051475524902,
-            -6.278127193450928,
-            -0.07764133810997009,
-            -3.155696153640747,
-            -1.933587670326233,
-            -9.61027717590332,
-            -6.211391925811768,
-            -4.664543151855469,
-            -6.783782005310059,
-            -5.676271438598633,
-            -8.605900764465332,
-            -0.0824289619922638,
-            -3.5463995933532715,
-            -13.374168395996094,
-            -1.2401021718978882,
-            -1.8734056949615479,
-            -3.4154422283172607,
-            -1.6733763217926025,
-            -17.633970260620117,
-            -9.345113754272461,
-            -0.6277351975440979,
-            -2.9617538452148438,
-            -2.5565333366394043,
-            -10.10580825805664,
-            -7.130337715148926,
-            -7.36820125579834,
-            -4.098911285400391,
-            -5.747079372406006,
-            -2.945054769515991,
-            -0.7887389063835144,
-            -1.6583149433135986,
-            -1.0165244340896606,
-            -6.581666946411133,
-            -5.926386833190918,
-            -5.845194339752197,
-            -0.9657630920410156,
-            -7.868755340576172,
-            -1.3244551420211792,
-            -0.2657390236854553,
-            -0.06403665244579315,
-            -2.983020782470703,
-            -5.943899631500244,
-            -7.877285957336426,
-            -3.593116283416748,
-            -3.819509506225586,
-            -7.226177215576172,
-            -2.5206997394561768,
-            -3.385587215423584,
-            -0.37499159574508667,
-            -1.4698283672332764,
-            -3.1460342407226562,
-            -0.0077166082337498665,
-            -4.350916862487793,
-            -3.2183218002319336,
-            -0.6242184638977051,
-            -1.4782464504241943,
-            -2.8054311275482178,
-            -3.0831401348114014,
-            -12.17662525177002,
-            -2.113419532775879,
-            -1.6448111534118652,
-            -2.1834323406219482,
-            -0.7630388140678406,
-            -10.1896390914917,
-            -6.234405517578125,
-            -11.46288776397705,
-            -1.003785490989685,
-            -4.211658477783203,
-            -1.5010679960250854,
-            -5.859302043914795,
-            -2.0465080738067627,
-            -3.7468819618225098,
-            -4.684195518493652,
-            -4.318704128265381,
-            -2.7234389781951904,
-            -9.00437068939209,
-            -3.043811321258545,
-            -3.1384406089782715,
-            -2.713779926300049,
-            -2.095993995666504,
-            -2.1484954357147217,
-            -10.274479866027832,
-            -0.682350754737854,
-            -0.25973302125930786,
-            -3.6964316368103027,
-            -13.434456825256348,
-            -2.3368239402770996,
-            -5.382724761962891,
-            -1.9073458909988403,
-            -5.905669212341309,
-            -0.032165709882974625,
-            -1.6530004739761353,
-            -2.728893280029297,
-            -1.640552043914795,
-            -1.1391171216964722,
-            -1.4353511333465576,
-            -4.003787994384766,
-            -0.3450564742088318,
-            -0.7168521285057068,
-            -0.34650325775146484,
-            -0.3616408705711365,
-            -7.062709331512451,
-            -1.2851682901382446,
-            -2.299129009246826,
-            -8.800156593322754,
-            -5.208735466003418,
-            -4.780910491943359,
-            -2.78342342376709,
-            -4.469717979431152,
-            -6.909726619720459,
-            -2.5114197731018066,
-            -0.659822404384613,
-            -0.6915416121482849,
-            -3.2363741397857666,
-            -0.5283617377281189,
-            -0.10473938286304474,
-            -6.215325832366943,
-            -7.283237934112549,
-            -1.6797031164169312,
-            -11.50100040435791,
-            -7.5822978019714355,
-            -3.387317657470703,
-            -11.407575607299805,
-            -5.441976547241211,
-            -3.3264851570129395,
-            -0.7265786528587341,
-            -1.382750153541565,
-            -7.841699600219727,
-            -8.105277061462402,
-            -3.9569506645202637,
-            -4.963083267211914,
-            -0.5492897629737854,
-            -4.6081390380859375,
-            -5.870400905609131,
-            -3.957930088043213,
-            -5.275494575500488,
-            -4.105091094970703,
-            -2.15435528755188,
-            -2.8472700119018555,
-            -1.1278448104858398,
-            -8.226571083068848,
-            -0.40629008412361145,
-            -9.916461944580078,
-            -4.616743087768555,
-            -1.691868543624878,
-            -0.6639478802680969,
-            -2.5716753005981445,
-            -6.676954746246338,
-            -6.535329818725586,
-            -0.4170510768890381,
-            -1.443942904472351,
-            -3.145481824874878,
-            -1.440589427947998,
-            -0.26935356855392456,
-            -0.9647155404090881,
-            -4.335958957672119,
-            -1.5647850036621094,
-            -5.890466690063477,
-            -3.01654052734375,
-            -1.9168468713760376,
-            -3.7365682125091553,
-            -8.001864433288574,
-            -10.680083274841309,
-            -4.489352226257324,
-            -4.6058149337768555,
-            -7.69011116027832,
-            -3.6247005462646484,
-            -1.5600426197052002,
-            -10.2160062789917,
-            -5.004643440246582,
-            -0.19602319598197937,
-            -3.375545024871826,
-            -2.669325590133667,
-            -1.3932737112045288,
-            -1.6410658359527588,
-            -6.847603797912598,
-            -6.744344711303711,
-            -0.5215591192245483,
-            -0.25840020179748535,
-            -1.1448237895965576,
-            -5.57253885269165,
-            -7.251138687133789,
-            -4.221924781799316,
-            -0.7688062787055969,
-            -2.504502534866333,
-            -3.146519660949707,
-            -2.206653356552124,
-            -1.4295082092285156,
-            -7.96943998336792,
-            -4.332189083099365,
-            -2.5750505924224854,
-            -1.7102608680725098,
-            -5.311381816864014,
-            -8.897522926330566,
-            -2.994919538497925,
-            -3.3397974967956543,
-            -2.1794328689575195,
-            -2.437566041946411,
-            -0.3181810975074768,
-            -0.27412793040275574,
-            -0.7914466857910156,
-            -2.3470635414123535,
-            -2.4099245071411133,
-            -2.491870880126953,
-            -3.024170160293579,
-            -1.9719040393829346,
-            -11.373910903930664,
-            -1.4279751777648926,
-            -0.14573107659816742,
-            -2.055763006210327,
-            -6.366893291473389,
-            -4.24091911315918,
-            -0.00709194503724575,
-            -2.0199716091156006,
-            -2.524750232696533,
-            -1.4272525310516357,
-            -0.5185190439224243,
-            -2.927150011062622,
-            -2.7070627212524414,
-            -3.365638017654419,
-            -4.318085193634033,
-            -7.773144721984863,
-            -1.7947180271148682,
-            -7.657534599304199,
-            -8.767786026000977,
-            -14.74280071258545,
-            -1.8042558431625366,
-            -3.2712037563323975,
-            -1.4002125263214111,
-            -4.887944221496582,
-            -1.4821010828018188,
-            -1.5255622863769531,
-            -5.879070281982422,
-            -4.463839530944824,
-            -5.1955976486206055,
-            -5.665647506713867,
-            -0.3775045573711395,
-            -5.9350481033325195,
-            -2.800539255142212,
-            -0.13162286579608917,
-            -3.034379720687866,
-            -4.729524612426758,
-            -4.6252641677856445,
-            -3.850942611694336,
-            -2.4760568141937256,
-            -6.059760093688965,
-            -10.12075138092041,
-            -0.9469369649887085,
-            -11.595907211303711,
-            -6.875324726104736,
-            -4.268826007843018,
-            -2.835529088973999,
-            -3.8626279830932617,
-            -4.876199245452881,
-            -0.013071090914309025,
-            -4.964417934417725,
-            -0.7445687055587769,
-            -5.707155227661133,
-            -6.10660457611084,
-            -4.317755699157715,
-            -4.440443992614746,
-            -2.9202542304992676,
-            -4.743522644042969,
-            -1.2569392919540405,
-            -2.8675737380981445,
-            -2.3151841163635254,
-            -4.318130970001221,
-            -1.9054772853851318,
-            -1.1808521747589111,
-            -0.765956461429596,
-            -2.768916606903076,
-            -6.237791061401367,
-            -1.7224305868148804,
-            -7.137521743774414,
-            -4.512486457824707,
-            -1.9069950580596924,
-            -4.145983695983887,
-            -5.365190505981445,
-            -0.059828490018844604,
-            -2.273892879486084,
-            -3.4013004302978516,
-            -5.035730361938477,
-            -6.501443386077881,
-            -9.903446197509766,
-            -1.6332892179489136,
-            -2.1572084426879883,
-            -1.6149548292160034,
-            -1.4698481559753418,
-            -6.01010799407959,
-            -2.2243528366088867,
-            -6.900836944580078,
-            -6.0930986404418945,
-            -2.974020481109619,
-            -3.225423574447632,
-            -8.423272132873535,
-            -1.3423724174499512,
-            -3.626147508621216,
-            -0.4862469434738159,
-            -6.860866546630859,
-            -3.8910953998565674,
-            -2.33319354057312,
-            -1.7229185104370117,
-            -2.215972423553467,
-            -8.99046516418457,
-            -4.099084854125977,
-            -2.4191012382507324,
-            -8.288970947265625,
-            -2.9641928672790527,
-            -1.5036451816558838,
-            -3.0544614791870117,
-            -0.0715634673833847,
-            -2.444031238555908,
-            -4.520998954772949,
-            -3.972568988800049,
-            -0.4985870122909546,
-            -2.1651363372802734,
-            -3.4427435398101807,
-            -1.730639100074768,
-            -0.9458961486816406,
-            -7.740211009979248,
-            -9.39163875579834,
-            -3.895984172821045,
-            -1.7523534297943115,
-            -5.41331672668457,
-            -8.910720825195312,
-            -12.971094131469727,
-            -3.0455880165100098,
-            -10.501265525817871,
-            -3.3864927291870117,
-            -4.842309951782227,
-            -3.9964733123779297,
-            -7.3046793937683105,
-            -2.6607093811035156,
-            -1.3541781902313232,
-            -5.003270626068115,
-            -3.944551944732666,
-            -0.11356143653392792,
-            -5.174440383911133,
-            -9.628616333007812,
-            -8.654989242553711,
-            -8.980416297912598,
-            -6.670101642608643,
-            -5.488286018371582,
-            -5.943419933319092,
-            -2.126483201980591,
-            -8.054739952087402,
-            -7.458671569824219,
-            -2.5267202854156494,
-            -6.455472946166992,
-            -8.655346870422363,
-            -7.903901100158691,
-            -6.221062660217285,
-            -7.129237174987793,
-            -4.2345380783081055,
-            -2.5375306606292725,
-            -7.697700500488281,
-            -1.567080020904541,
-            -2.084331750869751,
-            -0.25020831823349,
-            -1.5145041942596436,
-            -4.619244575500488,
-            -0.2970108985900879,
-            -0.4977554678916931,
-            -6.197869300842285,
-            -4.030620098114014,
-            -7.232107639312744,
-            -0.21076253056526184,
-            -1.563366174697876,
-            -1.133756160736084,
-            -2.708237648010254,
-            -4.080535888671875,
-            -0.6818401217460632,
-            -0.1864331066608429,
-            -0.49012088775634766,
-            -8.732468605041504,
-            -11.945040702819824,
-            -5.243098735809326,
-            -1.5294703245162964,
-            -0.8935543298721313,
-            -0.6174070835113525,
-            -1.5068217515945435,
-            -3.5766501426696777,
-            -5.393096923828125,
-            -4.202867031097412,
-            -14.765748023986816,
-            -5.2513813972473145,
-            -0.7597705721855164,
-            -0.2502063810825348,
-            -1.7403976917266846,
-            -2.8000779151916504,
-            -1.9808133840560913,
-            -2.1654744148254395,
-            -1.8629226684570312,
-            -3.222038745880127,
-            -0.040942225605249405,
-            -2.3384013175964355,
-            -10.210381507873535,
-            -4.5859761238098145,
-            -0.5805734395980835,
-            -3.7019288539886475,
-            -2.001936674118042,
-            -2.7876083850860596,
-            -2.9799084663391113,
-            -4.349887371063232,
-            -0.0792960673570633,
-            -1.4366114139556885,
-            -1.0813264846801758,
-            -1.3510822057724,
-            -6.7060699462890625,
-            -5.436615943908691,
-            -3.978389263153076,
-            -6.785447597503662,
-            -6.147171497344971,
-            -3.97414231300354,
-            -4.332991600036621,
-            -0.9269428253173828,
-            -5.1237101554870605,
-            -4.486598968505859,
-            -0.04678357392549515,
-            -1.0307552814483643,
-            -1.4249452352523804,
-            -4.517682075500488,
-            -3.561821699142456,
-            -2.0815205574035645,
-            -0.6041194200515747,
-            -5.992964744567871,
-            -7.092092514038086,
-            -0.48916709423065186,
-            -2.6405677795410156,
-            -4.3345723152160645,
-            -3.533582925796509,
-            -3.1233346462249756,
-            -3.107872486114502,
-            -1.9901115894317627,
-            -3.1052846908569336,
-            -1.8440347909927368,
-            -6.21368408203125,
-            -1.8796799182891846,
-            -2.705214738845825,
-            -0.2987763583660126,
-            -4.070865154266357,
-            -1.6675832271575928,
-            -1.3896636962890625,
-            -1.5731089115142822,
-            -3.526170015335083,
-            -2.5088443756103516,
-            -1.208929419517517,
-            -3.673125743865967,
-            -2.501532554626465,
-            -6.875064373016357,
-            -8.512459754943848,
-            -1.042314052581787,
-            -3.657850980758667,
-            -7.0950798988342285,
-            -4.974049091339111,
-            -8.14085578918457,
-            -3.529888153076172,
-            -1.9389504194259644,
-            -7.0902204513549805,
-            -2.409292459487915,
-            -2.9428021907806396,
-            -1.688283085823059,
-            -3.622368335723877,
-            -2.0903351306915283,
-            -4.160663604736328,
-            -3.1683764457702637,
-            -1.2135626077651978,
-            -7.566033363342285,
-            -3.1186251640319824,
-            -5.899919509887695,
-            -0.9518840312957764,
-            -2.656729221343994,
-            -2.2994377613067627,
-            -6.806836128234863,
-            -1.280236840248108,
-            -2.838846206665039,
-            -1.3598848581314087,
-            -11.707776069641113,
-            -3.134333372116089,
-            -0.6230669617652893,
-            -8.219222068786621,
-            -7.562507152557373,
-            -7.489459037780762,
-            -1.5368008613586426,
-            -7.149652481079102,
-            -5.749268054962158,
-            -3.162869691848755,
-            -2.7235195636749268,
-            -6.128931999206543,
-            -1.1934199333190918,
-            -3.986410617828369,
-            -3.76609468460083,
-            -1.712721586227417,
-            -3.195504903793335,
-            -8.397743225097656,
-            -3.1260581016540527,
-            -9.792022705078125,
-            -4.217884540557861,
-            -11.583260536193848,
-            -5.987588882446289,
-            -5.178754806518555,
-            -6.994749069213867,
-            -5.167606353759766,
-            -7.124668121337891,
-            -6.201416015625,
-            -10.203682899475098,
-            -6.858526229858398,
-            -2.733592987060547,
-            -5.078882217407227,
-            -9.003358840942383,
-            -4.704894542694092,
-            -3.9085562229156494,
-            -7.247268199920654,
-            -7.091092109680176,
-            -4.4150166511535645,
-            -7.56699275970459,
-            -9.485116004943848,
-            -1.9977033138275146,
-            -6.65272331237793,
-            -2.236643075942993,
-            -7.518955707550049,
-            -5.525973320007324,
-            -4.67877721786499,
-            -6.608670234680176,
-            -5.536133766174316,
-            -10.772479057312012,
-            -10.8853178024292,
-            -3.6156129837036133,
-            -6.751470565795898,
-            -6.4537434577941895,
-            -3.4220399856567383,
-            -8.251005172729492,
-            -3.2146153450012207,
-            -6.330069541931152,
-            -1.5551663637161255,
-            -6.520583629608154,
-            -10.450878143310547,
-            -5.8788957595825195,
-            -3.7398200035095215,
-            -3.9084208011627197,
-            -0.3640081584453583,
-            -6.961522102355957,
-            -6.066243648529053,
-            -7.270624160766602,
-            -5.098455429077148,
-            -2.7642822265625,
-            -5.460171699523926,
-            -7.362828731536865,
-            -2.558631658554077,
-            -2.186410427093506,
-            -2.5309929847717285,
-            -2.46756649017334,
-            -2.0306026935577393,
-            -1.8713470697402954,
-            -2.108008623123169,
-            -1.2698389291763306,
-            -2.1712756156921387,
-            -2.4432802200317383,
-            -1.1477653980255127,
-            -1.8417484760284424,
-            -2.5971946716308594,
-            -1.8250831365585327,
-            -2.103092670440674,
-            -2.5183165073394775,
-            -2.9367291927337646,
-            -1.9412965774536133,
-            -1.7692793607711792,
-            -2.864521026611328,
-            -3.1332175731658936,
-            -1.098311185836792,
-            -2.946441173553467,
-            -2.2800471782684326,
-            -3.1929852962493896,
-            -2.754260778427124,
-            -3.485616445541382,
-            -3.3010287284851074,
-            -2.5537776947021484,
-            -2.6752865314483643,
-            -3.1617612838745117,
-            -2.4571690559387207,
-            -2.060081958770752,
-            -2.425969362258911,
-            -2.212725877761841,
-            -2.4232254028320312,
-            -3.0587053298950195,
-            -2.4074010848999023,
-            -2.457937479019165,
-            -2.319617986679077,
-            -2.6340954303741455,
-            -2.599524736404419,
-            -2.5302212238311768,
-            -1.6849274635314941,
-            -2.2609786987304688,
-            -2.039928674697876,
-            -1.9474098682403564,
-            -2.3550753593444824,
-            -1.718749761581421,
-            -2.413884162902832,
-            -1.6247628927230835,
-            -2.4784040451049805,
-            -1.828325629234314,
-            -1.3880831003189087,
-            -1.4448199272155762,
-            -1.1477117538452148,
-            -1.1669728755950928,
-            -1.8787822723388672,
-            -1.5565840005874634,
-            -1.6666553020477295,
-            -1.747725248336792,
-            -1.959598422050476,
-            -2.0376486778259277,
-            -2.345367431640625,
-            -2.055098533630371,
-            -1.3940613269805908,
-            -3.4385242462158203,
-            -2.7489635944366455,
-            -3.2590157985687256,
-            -3.1128957271575928,
-            -1.7070379257202148,
-            -3.9010369777679443,
-            -3.21574068069458,
-            -3.3850393295288086,
-            -1.8778185844421387,
-            -2.698211908340454,
-            -1.8060741424560547,
-            -2.0845324993133545,
-            -3.4797585010528564,
-            -2.263254404067993,
-            -3.083108901977539,
-            -1.6589758396148682,
-            -2.687279224395752,
-            -1.77505624294281,
-            -2.6142921447753906,
-            -1.934045672416687,
-            -1.8834377527236938,
-            -2.8038980960845947,
-            -1.550542950630188,
-            -3.4054152965545654,
-            -1.724036693572998,
-            -2.3146564960479736,
-            -1.5134503841400146,
-            -2.9289023876190186,
-            -1.5285141468048096,
-            -3.421035051345825,
-            -1.3757282495498657,
-            -3.441431760787964,
-            -1.5286564826965332,
-            -3.4372904300689697,
-            -3.173043966293335,
-            -1.1313854455947876,
-            -1.486415147781372,
-            -2.506413221359253,
-            -3.5165903568267822,
-            -1.4112176895141602,
-            -3.7175354957580566,
-            -2.2170844078063965,
-            -0.704839289188385,
-            -0.6626103520393372,
-            -2.5483946800231934,
-            -0.949668288230896,
-            -3.1339564323425293,
-            -1.3326977491378784,
-            -3.5493476390838623,
-            -0.6785370111465454,
-            -3.236161470413208,
-            -2.1347084045410156,
-            -3.802447557449341,
-            -2.585503339767456,
-            -3.9361765384674072,
-            -2.290905714035034,
-            -3.714280605316162,
-            -1.793616533279419,
-            -3.7252492904663086,
-            -1.450188159942627,
-            -3.11938738822937,
-            -1.25174880027771
-        ]
-    },
-    "32": {
-        "input_prompt": "create a conversational article",
-        "generated_text": " about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article.",
-        "generated_tokens": [
-            2314,
-            1278,
-            17915,
-            1307,
-            1278,
-            9369,
-            1046,
-            1531,
-            9369,
-            2715,
-            1402,
-            2314,
-            1278,
-            17915,
-            1307,
-            1278,
-            9369,
-            1046,
-            1531,
-            9369,
-            2715,
-            1402,
-            2314,
-            1278,
-            17915,
-            1307,
-            1278,
-            9369,
-            1046,
-            1531,
-            9369,
-            2715,
-            1402,
-            2314,
-            1278,
-            17915,
-            1307,
-            1278,
-            9369,
-            1046,
-            1531,
-            9369,
-            2715,
-            1402,
-            2314,
-            1278,
-            17915,
-            1307,
-            1278,
-            9369,
-            1046,
-            1531,
-            9369,
-            2715,
-            1402,
-            2314,
-            1278,
-            17915,
-            1307,
-            1278,
-            9369,
-            1046,
-            1531,
-            9369,
-            2715,
-            1402,
-            2314,
-            1278,
-            17915,
-            1307,
-            1278,
-            9369,
-            1046,
-            1531,
-            9369,
-            2715,
-            1402,
-            2314,
-            1278,
-            17915,
-            1307,
-            1278,
-            9369,
-            1046,
-            1531,
-            9369,
-            2715,
-            1402,
-            2314,
-            1278,
-            17915,
-            1307,
-            1278,
-            9369,
-            1046,
-            1531,
-            9369,
-            2715,
-            1402,
-            2314,
-            1278,
-            17915,
-            1307,
-            1278,
-            9369,
-            1046,
-            1531,
-            9369,
-            2715,
-            1402,
-            2314,
-            1278,
-            17915,
-            1307,
-            1278,
-            9369,
-            1046,
-            1531,
-            9369,
-            2715,
-            1402,
-            2314,
-            1278,
-            17915,
-            1307,
-            1278,
-            9369,
-            1046
-        ],
-        "latency": 48.63822364807129,
-        "cuda_graph_request_count_map": null,
-        "step_count": 6144,
-        "logprobs": [
-            -4.4165568351745605,
-            -11.358176231384277,
-            -0.0701780766248703,
-            -7.797665119171143,
-            -2.6805992126464844,
-            -1.4707680940628052,
-            -3.0390255451202393,
-            -1.6902849674224854,
-            -1.270594835281372,
-            -1.1936196088790894,
-            -1.2523558139801025,
-            -2.7270259857177734,
-            -1.2371309995651245,
-            -0.9618493318557739,
-            -0.4379909038543701,
-            -1.3917063474655151,
-            -1.1055524349212646,
-            -0.9122569561004639,
-            -0.9911308288574219,
-            -0.08436793833971024,
-            -0.5424078106880188,
-            -0.9181017279624939,
-            -0.5873759388923645,
-            -0.19014373421669006,
-            -0.06655456870794296,
-            -0.15252672135829926,
-            -0.09415211528539658,
-            -0.009787309914827347,
-            -0.013910251669585705,
-            -0.005296128336340189,
-            -0.005677408073097467,
-            -0.02013739012181759,
-            -0.21594694256782532,
-            -0.07153760641813278,
-            -0.0066444179974496365,
-            -0.010198505595326424,
-            -0.011980246752500534,
-            -0.003686776151880622,
-            -0.0037619550712406635,
-            -0.0022467151284217834,
-            -0.004088377580046654,
-            -0.021828632801771164,
-            -0.0012669878778979182,
-            -0.09768074005842209,
-            -0.02652405947446823,
-            -0.0019286142196506262,
-            -0.002283824374899268,
-            -0.0032225127797573805,
-            -0.0009741804678924382,
-            -0.0009415484382770956,
-            -0.001211624126881361,
-            -0.001135300612077117,
-            -0.002340436913073063,
-            -0.0010846928926184773,
-            -0.0509282611310482,
-            -0.03832047060132027,
-            -0.00257422705180943,
-            -0.0022806129418313503,
-            -0.00262785074301064,
-            -0.0008195855189114809,
-            -0.0010239601833745837,
-            -0.0013777059502899647,
-            -0.0009899006690829992,
-            -0.0018756669014692307,
-            -0.0015304292319342494,
-            -0.08506463468074799,
-            -0.01893703266978264,
-            -0.0013797297142446041,
-            -0.0014461545506492257,
-            -0.0013971101725474,
-            -0.0005869334563612938,
-            -0.0005212855176068842,
-            -0.000876757490914315,
-            -0.0005256939912214875,
-            -0.0012863941956311464,
-            -0.0015691122971475124,
-            -0.051276568323373795,
-            -0.00973513163626194,
-            -0.0010469438275322318,
-            -0.0011531615164130926,
-            -0.0009969270322471857,
-            -0.00038342276820912957,
-            -0.0004032037395518273,
-            -0.000730247818864882,
-            -0.0003275334893260151,
-            -0.0008700875914655626,
-            -0.0017572689102962613,
-            -0.044393111020326614,
-            -0.013102858327329159,
-            -0.0011463745031505823,
-            -0.0012070996453985572,
-            -0.0012325793504714966,
-            -0.0005048430757597089,
-            -0.0004876854654867202,
-            -0.0007901645149104297,
-            -0.00041500062798149884,
-            -0.0009869233472272754,
-            -0.0018687656847760081,
-            -0.03943866863846779,
-            -0.014425630681216717,
-            -0.0014756753807887435,
-            -0.001423775334842503,
-            -0.001209719106554985,
-            -0.0005046047735959291,
-            -0.00042167355422861874,
-            -0.0007688426994718611,
-            -0.0002699726028367877,
-            -0.0006598440813831985,
-            -0.0017849955474957824,
-            -0.038999658077955246,
-            -0.012665312737226486,
-            -0.0014427024871110916,
-            -0.0014492495683953166,
-            -0.001016576774418354,
-            -0.00042083943844772875,
-            -0.00033241944038309157,
-            -0.0006403064471669495,
-            -0.00022373080719262362,
-            -0.0007053509471006691,
-            -0.0018597226589918137,
-            -0.030997740104794502,
-            -0.011259939521551132,
-            -0.0012655591126531363,
-            -0.00134151556994766,
-            -0.0008106521563604474,
-            -0.0003513672563713044,
-            -0.0002964295563288033,
-            -0.0006368515896610916,
-            -0.00020180096908006817,
-            -0.0005779979983344674,
-            -0.0016014858847483993,
-            -0.0271126888692379
-        ]
-    },
-    "64": {
-        "input_prompt": "App Concept: \"Eggy's Interactive Adventure World\"\n\nEggy's Interactive Adventure World is a cutting-edge app designed for phones and tablets that combines immersive storytelling with engaging, interactive experiences for both parents and children. This innovative app allows users to explore the world of Eggy and participate in a series of adventures that teach essential life skills, foster personal growth, and strengthen parent-child bonds.\n\nKey Features:\n\n1. Interactive Stories: The app brings the 20 Eggy stories to life through interactive, animated storytelling. Users can read, listen, and actively participate in the stories, making choices that guide Eggy through various adventures, each with unique lessons and experiences.\n2. Augmented Reality (AR) Integration: The app utilizes AR technology, allowing users to interact with Eggy and the story characters in their real-world environment. This immersive experience creates a deeper connection with the stories and encourages engagement.\n3. Personalized Adventures: The app customizes each story based on the user's interests, needs, and personal growth areas. By collecting user input, the app tailors the storylines and learning outcomes to ensure that each adventure resonates with the child's unique experiences.\n4. Parent-Child Cooperative Play: The app encourages parents and children to collaborate and engage in the stories together. Through a split-screen mode or multi-device connectivity, parents and children can actively participate in the stories and learn essential life skills together.\n5. Skill-Building Mini-Games: Interspersed throughout the stories are educational mini-games that reinforce the lessons and themes. These games are designed to be fun, engaging, and accessible to children, encouraging them to practice and develop various skills.\n6. Progress Tracking and Rewards: The app tracks the user's progress through the stories and rewards them with badges, trophies, and other collectibles. This gamification aspect encourages continued engagement and fosters a sense of accomplishment and growth.\n7. Community Features: The app provides a safe, moderated online community where parents can share their experiences, discuss the stories and lessons, and connect with other families who are also using the app. This community fosters support, inspiration, and learning.\n8. Expert Resources: The app includes access to resources from experts in child development, psychology, and education, providing parents with additional tools and guidance on fostering the growth and well-being of their children.\n\nEggy's Interactive Adventure World is a one-of-a-kind, mind-blowing app that revolutionizes the way parents and children engage with stories and learn essential life skills. Combining the power of storytelling, cutting-edge technology, and expert resources, this app is a must-have for families looking to strengthen their bonds and support their children's personal growth.",
-        "generated_text": "</s>The 1999\u20132000 season was the 10th season of the National Hockey League (NHL). The 1999\u20132000 season was the 10th season of the National Hockey League (NHL). The 1999\u20132000 season was the 10th season of the National Hockey League (NHL). The 1999\u20132000 season was the 10th season was the 10th season was the 10 season was the 10 season was the 10",
-        "generated_tokens": [
-            2,
-            1784,
-            1032,
-            1049,
-            1057,
-            1057,
-            1057,
-            1882,
-            1050,
-            1048,
-            1048,
-            1048,
-            5526,
-            1486,
-            1278,
-            1032,
-            1049,
-            1048,
-            1411,
-            5526,
-            1307,
-            1278,
-            5805,
-            46982,
-            8525,
-            1319,
-            19644,
-            1076,
-            1577,
-            1531,
-            1032,
-            1049,
-            1057,
-            1057,
-            1057,
-            1882,
-            1050,
-            1048,
-            1048,
-            1048,
-            5526,
-            1486,
-            1278,
-            1032,
-            1049,
-            1048,
-            1411,
-            5526,
-            1307,
-            1278,
-            5805,
-            46982,
-            8525,
-            1319,
-            19644,
-            1076,
-            1577,
-            1531,
-            1032,
-            1049,
-            1057,
-            1057,
-            1057,
-            1882,
-            1050,
-            1048,
-            1048,
-            1048,
-            5526,
-            1486,
-            1278,
-            1032,
-            1049,
-            1048,
-            1411,
-            5526,
-            1307,
-            1278,
-            5805,
-            46982,
-            8525,
-            1319,
-            19644,
-            1076,
-            1577,
-            1531,
-            1032,
-            1049,
-            1057,
-            1057,
-            1057,
-            1882,
-            1050,
-            1048,
-            1048,
-            1048,
-            5526,
-            1486,
-            1278,
-            1032,
-            1049,
-            1048,
-            1411,
-            5526,
-            1486,
-            1278,
-            1032,
-            1049,
-            1048,
-            1411,
-            5526,
-            1486,
-            1278,
-            1032,
-            1049,
-            1048,
-            5526,
-            1486,
-            1278,
-            1032,
-            1049,
-            1048,
-            5526,
-            1486,
-            1278,
-            1032,
-            1049,
-            1048
-        ],
-        "latency": 87.50420427322388,
-        "cuda_graph_request_count_map": null,
-        "step_count": 6144,
-        "logprobs": [
-            -12.107745170593262,
-            -2.9727728366851807,
-            -3.720092535018921,
-            -5.592433929443359,
-            -10.964235305786133,
-            -3.654498338699341,
-            -9.33439826965332,
-            -4.833785057067871,
-            -5.187321662902832,
-            -2.6944785118103027,
-            -6.9262237548828125,
-            -0.654232919216156,
-            -0.5550781488418579,
-            -0.21346639096736908,
-            -0.0134271876886487,
-            -0.010840100236237049,
-            -1.3878544569015503,
-            -0.6296291351318359,
-            -7.9766011238098145,
-            -0.4393192231655121,
-            -5.639142036437988,
-            -3.277270793914795,
-            -1.0206468105316162,
-            -11.703084945678711,
-            -0.7100943922996521,
-            -0.2809169888496399,
-            -2.771284818649292,
-            -7.190817832946777,
-            -4.048691749572754,
-            -0.012056218460202217,
-            -3.3802318572998047,
-            -0.6807184815406799,
-            -3.4844107627868652,
-            -3.312331199645996,
-            -0.5001641511917114,
-            -2.61255145072937,
-            -4.243694305419922,
-            -4.333778381347656,
-            -6.0625810623168945,
-            -0.011777156963944435,
-            -0.37577226758003235,
-            -0.9490834474563599,
-            -3.5450198650360107,
-            -2.1778035163879395,
-            -0.45957911014556885,
-            -3.00771164894104,
-            -1.7600425481796265,
-            -0.09766030311584473,
-            -2.467618942260742,
-            -1.329679012298584,
-            -0.8384320735931396,
-            -1.1864604949951172,
-            -3.628342866897583,
-            -0.2470003068447113,
-            -1.8938640356063843,
-            -5.168431282043457,
-            -0.05005566030740738,
-            -2.258014678955078,
-            -2.449028968811035,
-            -0.0034086955711245537,
-            -3.9485883712768555,
-            -1.6201664209365845,
-            -5.139942646026611,
-            -4.859354496002197,
-            -0.23686674237251282,
-            -0.5541543364524841,
-            -2.5826025009155273,
-            -6.114635467529297,
-            -4.3380208015441895,
-            -0.7412900924682617,
-            -0.3221715986728668,
-            -0.13805493712425232,
-            -4.1797332763671875,
-            -7.3456268310546875,
-            -0.13762745261192322,
-            -2.0905232429504395,
-            -1.0178627967834473,
-            -4.108260631561279,
-            -0.6007124185562134,
-            -1.0410642623901367,
-            -4.122039794921875,
-            -0.35905471444129944,
-            -1.4274661540985107,
-            -4.139932155609131,
-            -0.4237431585788727,
-            -1.6294409036636353,
-            -0.9811424016952515,
-            -4.132790565490723,
-            -1.1318120956420898,
-            -6.8258256912231445,
-            -1.5455098152160645,
-            -0.6984409093856812,
-            -13.664215087890625,
-            -0.1166313961148262,
-            -1.6347849369049072,
-            -0.28875046968460083,
-            -0.03130083531141281,
-            -1.5293006896972656,
-            -1.6488375663757324,
-            -4.224111557006836,
-            -4.760683059692383,
-            -1.9758747816085815,
-            -1.5828256607055664,
-            -2.8463857173919678,
-            -0.2620386481285095,
-            -1.7243889570236206,
-            -1.7945923805236816,
-            -0.8884308338165283,
-            -0.3766394555568695,
-            -0.34033581614494324,
-            -9.05566692352295,
-            -0.22754782438278198,
-            -0.033802058547735214,
-            -0.34108465909957886,
-            -0.5644669532775879,
-            -2.0925779342651367,
-            -4.547505855560303,
-            -10.870464324951172,
-            -1.1072022914886475,
-            -5.503787994384766,
-            -3.259672164916992,
-            -0.007964519783854485,
-            -3.0111639499664307,
-            -4.246737480163574,
-            -0.7813188433647156,
-            -3.331031322479248,
-            -4.485962867736816,
-            -0.9492117166519165,
-            -2.6757047176361084,
-            -1.1591349840164185,
-            -1.122117519378662,
-            -2.629878044128418,
-            -5.986321926116943,
-            -0.2146703153848648,
-            -0.002392764901742339,
-            -7.372479438781738,
-            -0.007077385671436787,
-            -0.06599216908216476,
-            -0.0970711037516594,
-            -3.2874932289123535,
-            -0.0019583588000386953,
-            -0.9122000336647034,
-            -4.930907249450684,
-            -0.019508399069309235,
-            -0.308611661195755,
-            -0.07778516411781311,
-            -3.8497893810272217,
-            -0.46124517917633057,
-            -0.38821348547935486,
-            -2.668412208557129,
-            -1.845987319946289,
-            -0.06470083445310593,
-            -0.006619549356400967,
-            -1.2610487937927246,
-            -0.13015533983707428,
-            -3.365312099456787,
-            -0.0014690094394609332,
-            -1.6789823770523071,
-            -1.2499005794525146,
-            -3.3992111682891846,
-            -5.563300132751465,
-            -0.823418140411377,
-            -4.24124813079834,
-            -1.6597849130630493,
-            -0.6941139698028564,
-            -1.5637556314468384,
-            -0.5482053756713867,
-            -0.9507225751876831,
-            -3.764758586883545,
-            -0.0006518622976727784,
-            -0.7540555000305176,
-            -5.058262825012207,
-            -0.3302401602268219,
-            -2.8130555152893066,
-            -0.17079885303974152,
-            -2.871047019958496,
-            -0.3991694450378418,
-            -3.1476998329162598,
-            -0.3488404452800751,
-            -2.0545666217803955,
-            -4.201597690582275,
-            -5.164614677429199,
-            -0.0271432027220726,
-            -0.0009785869624465704,
-            -3.3444161415100098,
-            -1.3117046356201172,
-            -6.375423431396484,
-            -0.05535568296909332,
-            -0.3919340968132019,
-            -0.060594215989112854,
-            -6.507473468780518,
-            -0.0023910999298095703,
-            -2.143423318862915,
-            -3.335618257522583,
-            -2.953970432281494,
-            -0.0013383012264966965,
-            -0.8080525398254395,
-            -0.29526084661483765,
-            -0.04036511853337288,
-            -3.231475353240967,
-            -1.0585589408874512,
-            -6.136373043060303,
-            -0.006182829383760691,
-            -0.035548023879528046,
-            -5.509808540344238,
-            -1.8490750789642334,
-            -9.83314037322998,
-            -0.07037576287984848,
-            -3.1621387004852295,
-            -6.762360095977783,
-            -1.3490527868270874,
-            -3.601043462753296,
-            -1.176393985748291,
-            -0.4342959523200989,
-            -0.06266004592180252,
-            -5.464046001434326,
-            -0.017946599051356316,
-            -1.0416009426116943,
-            -1.6117159128189087,
-            -12.289417266845703,
-            -1.5004339218139648,
-            -5.76563835144043,
-            -4.038386821746826,
-            -0.20812086760997772,
-            -3.6306562423706055,
-            -1.3901070356369019,
-            -1.087137222290039,
-            -2.423213243484497,
-            -4.503086090087891,
-            -0.0008031480247154832,
-            -0.03627370297908783,
-            -0.1653430461883545,
-            -7.958648681640625,
-            -1.1018548011779785,
-            -1.290948748588562,
-            -3.8049263954162598,
-            -1.8253734111785889,
-            -0.059022851288318634,
-            -0.0013984196120873094,
-            -4.698851585388184,
-            -2.5421664714813232,
-            -0.024493809789419174,
-            -4.828659534454346,
-            -3.0295286178588867,
-            -3.550312042236328,
-            -0.1185273677110672,
-            -0.22595760226249695,
-            -0.10782183706760406,
-            -1.4033282995224,
-            -0.4485701024532318,
-            -0.2889708876609802,
-            -0.05471855774521828,
-            -0.007632025051862001,
-            -2.1156554222106934,
-            -0.6249589323997498,
-            -4.198577404022217,
-            -0.14178156852722168,
-            -4.284021377563477,
-            -2.227515935897827,
-            -3.5022120475769043,
-            -0.19575819373130798,
-            -15.964509963989258,
-            -4.055960655212402,
-            -11.125024795532227,
-            -0.7681724429130554,
-            -3.0436902046203613,
-            -7.030262470245361,
-            -4.376729488372803,
-            -5.476145267486572,
-            -0.4219042658805847,
-            -3.7689766883850098,
-            -0.060010604560375214,
-            -0.8134393692016602,
-            -0.11386934667825699,
-            -0.025473715737462044,
-            -0.09736856073141098,
-            -4.357361793518066,
-            -0.3670865297317505,
-            -0.08063744008541107,
-            -0.1311480849981308,
-            -1.0903867483139038,
-            -1.2705107927322388,
-            -1.5076212882995605,
-            -4.295275688171387,
-            -0.04185756668448448,
-            -0.19810955226421356,
-            -1.9645220041275024,
-            -0.9597910642623901,
-            -0.13429655134677887,
-            -0.002283110748976469,
-            -7.066074371337891,
-            -3.639211654663086,
-            -1.0263917446136475,
-            -8.124760627746582,
-            -1.132537841796875,
-            -0.09160765260457993,
-            -0.08996370434761047,
-            -10.165366172790527,
-            -3.501585006713867,
-            -0.0019847711082547903,
-            -0.05309417471289635,
-            -0.31209683418273926,
-            -0.15089339017868042,
-            -1.23564875125885,
-            -1.2685208320617676,
-            -7.832758903503418,
-            -0.19271136820316315,
-            -0.014305183663964272,
-            -0.0007532381569035351,
-            -0.44688940048217773,
-            -2.6239724159240723,
-            -1.738666296005249,
-            -1.6480977535247803,
-            -0.46753185987472534,
-            -8.656959533691406,
-            -3.79868483543396,
-            -0.9281394481658936,
-            -2.2381181716918945,
-            -1.7654449939727783,
-            -0.4948798418045044,
-            -0.025028761476278305,
-            -1.5435361862182617,
-            -1.6390818357467651,
-            -1.4962153434753418,
-            -0.3425217270851135,
-            -0.013077914714813232,
-            -0.038474079221487045,
-            -5.3364362716674805,
-            -0.42365288734436035,
-            -1.884093999862671,
-            -3.510357618331909,
-            -6.198029518127441,
-            -0.44375038146972656,
-            -0.0008789013954810798,
-            -3.6025230884552,
-            -1.419615626335144,
-            -2.6723289489746094,
-            -5.775190830230713,
-            -1.1380761861801147,
-            -2.6683366298675537,
-            -0.43395891785621643,
-            -0.003145867260172963,
-            -8.63144302368164,
-            -1.646262764930725,
-            -1.732487678527832,
-            -4.561546802520752,
-            -0.5277953147888184,
-            -0.07333153486251831,
-            -0.5624169707298279,
-            -0.12201295047998428,
-            -2.6561455726623535,
-            -1.1071691513061523,
-            -2.6895060539245605,
-            -0.040864069014787674,
-            -0.04126371443271637,
-            -1.8294739723205566,
-            -0.09022177755832672,
-            -0.3154001832008362,
-            -0.46215569972991943,
-            -2.2462844848632812,
-            -0.30149081349372864,
-            -0.52588951587677,
-            -8.288043975830078,
-            -0.0002057340752799064,
-            -0.8021711707115173,
-            -4.4546098709106445,
-            -0.0001565095444675535,
-            -0.0015961299650371075,
-            -0.15216240286827087,
-            -0.3677564561367035,
-            -5.018707275390625,
-            -0.7850045561790466,
-            -1.9582659006118774,
-            -1.0046892166137695,
-            -10.0401029586792,
-            -0.16878114640712738,
-            -5.944240570068359,
-            -1.5523078441619873,
-            -5.7253522872924805,
-            -0.47948503494262695,
-            -0.44009655714035034,
-            -5.671053886413574,
-            -0.003280022880062461,
-            -0.7937742471694946,
-            -0.9639376401901245,
-            -0.00030048147891648114,
-            -1.0747740268707275,
-            -0.8839919567108154,
-            -3.416811466217041,
-            -1.6602673530578613,
-            -0.2706959843635559,
-            -0.0024333172477781773,
-            -4.478696823120117,
-            -6.20179557800293,
-            -0.11359559744596481,
-            -0.202009916305542,
-            -0.022310219705104828,
-            -2.367263078689575,
-            -1.0405994653701782,
-            -5.984308242797852,
-            -2.105138063430786,
-            -9.583202362060547,
-            -0.0004957877099514008,
-            -3.0655455589294434,
-            -0.0669412910938263,
-            -0.8977450728416443,
-            -2.2271294593811035,
-            -2.6617536544799805,
-            -1.8184051513671875,
-            -0.8291114568710327,
-            -0.4864235818386078,
-            -0.7993525862693787,
-            -3.51106858253479,
-            -2.1530935764312744,
-            -0.257144957780838,
-            -1.3934082984924316,
-            -1.3137131929397583,
-            -0.3384077548980713,
-            -0.1697217971086502,
-            -2.353395938873291,
-            -0.03406282886862755,
-            -0.39059701561927795,
-            -3.422821044921875,
-            -1.7117210626602173,
-            -0.7018465399742126,
-            -1.5995906591415405,
-            -3.6218395233154297,
-            -0.12497704476118088,
-            -0.16966234147548676,
-            -0.7313685417175293,
-            -0.4956285357475281,
-            -1.0840849876403809,
-            -5.042126655578613,
-            -0.00031704644788987935,
-            -7.683258056640625,
-            -0.9210801720619202,
-            -4.687852382659912,
-            -0.0028814247343689203,
-            -0.043382611125707626,
-            -4.1948652267456055,
-            -2.66593337059021,
-            -0.06153333932161331,
-            -0.0023110604379326105,
-            -6.729236602783203,
-            -5.777127742767334,
-            -0.08932067453861237,
-            -0.09890018403530121,
-            -0.009886111132800579,
-            -3.1145148277282715,
-            -3.725565195083618,
-            -0.0021998509764671326,
-            -3.9927196502685547,
-            -2.753793239593506,
-            -1.6037236452102661,
-            -0.17461130023002625,
-            -4.804804801940918,
-            -0.2311229705810547,
-            -0.30256444215774536,
-            -2.235363006591797,
-            -0.006614102050662041,
-            -0.34757524728775024,
-            -1.4946835041046143,
-            -1.222062587738037,
-            -3.658839225769043,
-            -1.356170892715454,
-            -0.5371109843254089,
-            -3.7580835819244385,
-            -4.54621696472168,
-            -0.31577637791633606,
-            -3.677156925201416,
-            -2.7181396484375,
-            -7.4674882888793945,
-            -0.00019369633810129017,
-            -2.3798398971557617,
-            -2.5452184677124023,
-            -0.2858496308326721,
-            -4.315659523010254,
-            -0.025835415348410606,
-            -0.000603493710514158,
-            -0.2546294331550598,
-            -0.12032663822174072,
-            -2.006908655166626,
-            -5.990736961364746,
-            -7.146596908569336,
-            -0.23356498777866364,
-            -0.2201036810874939,
-            -0.01235415879637003,
-            -0.011248741298913956,
-            -1.4155778884887695,
-            -0.40242519974708557,
-            -5.877886772155762,
-            -0.7865053415298462,
-            -0.03231288120150566,
-            -0.004864405374974012,
-            -0.0050629740580916405,
-            -2.7049152851104736,
-            -6.822089195251465,
-            -0.39252761006355286,
-            -1.2290617227554321,
-            -0.007630132604390383,
-            -3.485461711883545,
-            -0.47985684871673584,
-            -6.1813530921936035,
-            -0.03757825121283531,
-            -0.37834712862968445,
-            -0.22192610800266266,
-            -1.165318489074707,
-            -0.5220151543617249,
-            -0.1289423257112503,
-            -3.216222047805786,
-            -1.0787583589553833,
-            -3.0716826915740967,
-            -0.6023419499397278,
-            -2.558605194091797,
-            -0.927433431148529,
-            -0.00364841241389513,
-            -0.14910078048706055,
-            -0.7318926453590393,
-            -6.159773826599121,
-            -0.0015301911626011133,
-            -1.8908276557922363,
-            -1.9641315937042236,
-            -0.021651331335306168,
-            -2.1648828983306885,
-            -2.2700207233428955,
-            -7.833290100097656,
-            -0.03397307172417641,
-            -0.8344621658325195,
-            -0.02225659228861332,
-            -0.06639260798692703,
-            -2.3780317306518555,
-            -3.180129051208496,
-            -0.09030630439519882,
-            -2.4138312339782715,
-            -1.3445552587509155,
-            -1.848326325416565,
-            -0.9726964831352234,
-            -2.851792335510254,
-            -0.0630769282579422,
-            -0.0011394681641831994,
-            -0.05843213573098183,
-            -2.6616668701171875,
-            -1.575437068939209,
-            -0.180197611451149,
-            -5.552371501922607,
-            -0.26108410954475403,
-            -2.529611587524414,
-            -0.37780019640922546,
-            -5.141795635223389,
-            -0.5921107530593872,
-            -0.2474975287914276,
-            -0.10687454044818878,
-            -4.891775131225586,
-            -0.25011152029037476,
-            -2.4100728034973145,
-            -1.358667016029358,
-            -2.790961503982544,
-            -3.8654675483703613,
-            -1.0076243877410889,
-            -0.7456949949264526,
-            -1.5575554370880127,
-            -2.05328631401062,
-            -1.6538066864013672,
-            -0.0558217354118824,
-            -0.0001817776501411572,
-            -0.0011643542675301433,
-            -0.038359593600034714,
-            -1.4208931922912598,
-            -0.542127251625061,
-            -0.3162364959716797,
-            -0.3966117799282074,
-            -1.1765563488006592,
-            -1.7920958995819092,
-            -0.18425509333610535,
-            -0.1092008650302887,
-            -0.46676987409591675,
-            -0.24977745115756989,
-            -1.0375996828079224,
-            -0.5268858671188354,
-            -0.008942908607423306,
-            -0.6404479146003723,
-            -0.0033111530356109142,
-            -5.3165931603871286e-05,
-            -0.5154370665550232,
-            -0.39286962151527405,
-            -1.401839256286621,
-            -0.6232213973999023,
-            -0.02168831042945385,
-            -0.004282470792531967,
-            -0.005199837032705545,
-            -0.09748794883489609,
-            -0.040823787450790405,
-            -0.00014852374442853034,
-            -0.0005832401220686734,
-            -0.005303124897181988,
-            -0.6537013053894043,
-            -0.38026049733161926,
-            -0.04189129173755646,
-            -0.010385753586888313,
-            -0.008756335824728012,
-            -0.013362848199903965,
-            -0.000504723924677819,
-            -0.002797620603814721,
-            -0.0014512732159346342,
-            -0.0013321106089279056,
-            -0.010883613489568233,
-            -0.005159396678209305,
-            -0.004701037425547838,
-            -0.01591104455292225,
-            -0.001474246964789927,
-            -1.2278481335670222e-05,
-            -0.010548785328865051,
-            -0.08341525495052338,
-            -0.03858809545636177,
-            -0.056062061339616776,
-            -0.0009532198309898376,
-            -0.0005789510905742645,
-            -0.0008986725588329136,
-            -0.00710969977080822,
-            -0.0006561510381288826,
-            -1.4781842764932662e-05,
-            -5.578839045483619e-05,
-            -0.0006398299592547119,
-            -0.0028786908369511366,
-            -0.0034092895220965147,
-            -0.008268529549241066,
-            -0.006602259818464518,
-            -0.004517706111073494,
-            -0.02233586646616459,
-            -0.0006323245470412076,
-            -0.009195122867822647,
-            -0.0029284947086125612,
-            -0.004457537550479174,
-            -0.017873765900731087,
-            -0.008801711723208427,
-            -0.0036383166443556547,
-            -0.08078611642122269,
-            -0.006347495596855879,
-            -0.0002177716523874551,
-            -0.04688572511076927,
-            -0.2718890309333801,
-            -0.07996802777051926,
-            -0.04674842208623886,
-            -0.009984076954424381,
-            -0.010000954382121563,
-            -0.050126753747463226,
-            -0.5864179730415344,
-            -0.2915390133857727,
-            -0.008090462535619736,
-            -0.032190412282943726,
-            -0.03461571782827377,
-            -0.2785419523715973,
-            -0.05830562859773636,
-            -0.02893950417637825,
-            -0.12241066247224808,
-            -0.02711048536002636,
-            -0.16450686752796173,
-            -0.09852994978427887,
-            -0.2651047706604004,
-            -0.35559725761413574,
-            -0.12606258690357208,
-            -0.32793670892715454,
-            -0.20878805220127106,
-            -0.7587923407554626,
-            -0.4730657637119293,
-            -1.496794581413269,
-            -0.2486363798379898,
-            -0.20723387598991394,
-            -0.1872958242893219,
-            -0.19151091575622559,
-            -0.3350580036640167,
-            -1.3085839748382568,
-            -0.6109102964401245,
-            -0.2947172224521637,
-            -0.37130236625671387,
-            -0.19888469576835632,
-            -0.18297068774700165,
-            -0.9978674054145813,
-            -0.5471905469894409,
-            -0.4379975199699402,
-            -0.407988041639328,
-            -0.17325688898563385,
-            -0.16938896477222443
-        ]
-    },
-    "96": {
-        "input_prompt": "what is the concept of double materiality in sustainability?",
-        "generated_text": " What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double material",
-        "generated_tokens": [
-            5675,
-            1395,
-            1278,
-            7401,
-            1307,
-            5558,
-            4318,
-            1532,
-            1294,
-            60187,
-            1063,
-            5675,
-            1395,
-            1278,
-            7401,
-            1307,
-            5558,
-            4318,
-            1532,
-            1294,
-            60187,
-            1063,
-            5675,
-            1395,
-            1278,
-            7401,
-            1307,
-            5558,
-            4318,
-            1532,
-            1294,
-            60187,
-            1063,
-            5675,
-            1395,
-            1278,
-            7401,
-            1307,
-            5558,
-            4318,
-            1532,
-            1294,
-            60187,
-            1063,
-            5675,
-            1395,
-            1278,
-            7401,
-            1307,
-            5558,
-            4318,
-            1532,
-            1294,
-            60187,
-            1063,
-            5675,
-            1395,
-            1278,
-            7401,
-            1307,
-            5558,
-            4318,
-            1532,
-            1294,
-            60187,
-            1063,
-            5675,
-            1395,
-            1278,
-            7401,
-            1307,
-            5558,
-            4318,
-            1532,
-            1294,
-            60187,
-            1063,
-            5675,
-            1395,
-            1278,
-            7401,
-            1307,
-            5558,
-            4318,
-            1532,
-            1294,
-            60187,
-            1063,
-            5675,
-            1395,
-            1278,
-            7401,
-            1307,
-            5558,
-            4318,
-            1532,
-            1294,
-            60187,
-            1063,
-            5675,
-            1395,
-            1278,
-            7401,
-            1307,
-            5558,
-            4318,
-            1532,
-            1294,
-            60187,
-            1063,
-            5675,
-            1395,
-            1278,
-            7401,
-            1307,
-            5558,
-            4318,
-            1532,
-            1294,
-            60187,
-            1063,
-            5675,
-            1395,
-            1278,
-            7401,
-            1307,
-            5558,
-            4318
-        ],
-        "latency": 126.4328100681305,
-        "cuda_graph_request_count_map": null,
-        "step_count": 6144,
-        "logprobs": [
-            -4.917365074157715,
-            -0.9960631132125854,
-            -7.875392913818359,
-            -0.2993181347846985,
-            -7.760880470275879,
-            -10.308395385742188,
-            -2.1807961463928223,
-            -1.6412583589553833,
-            -9.521512985229492,
-            -1.627489447593689,
-            -1.8410861492156982,
-            -0.9285702705383301,
-            -0.2576955556869507,
-            -0.9641067981719971,
-            -0.02314644306898117,
-            -0.6696561574935913,
-            -0.07035009562969208,
-            -0.004622488282620907,
-            -0.025748632848262787,
-            -0.06276137381792068,
-            -0.17385317385196686,
-            -0.3285445272922516,
-            -0.0592009499669075,
-            -0.007940039038658142,
-            -0.22664028406143188,
-            -0.0017957051750272512,
-            -0.022929180413484573,
-            -0.005733947269618511,
-            -0.0012996093137189746,
-            -0.006419987417757511,
-            -0.02376849390566349,
-            -0.27800270915031433,
-            -0.4650723934173584,
-            -0.04936715215444565,
-            -0.003972141072154045,
-            -0.01477995328605175,
-            -0.0012044801842421293,
-            -0.014891182072460651,
-            -0.002709767082706094,
-            -0.0009939497103914618,
-            -0.0028436246793717146,
-            -0.006759870797395706,
-            -0.15416178107261658,
-            -0.20121537148952484,
-            -0.016414370387792587,
-            -0.0015769677702337503,
-            -0.008138825185596943,
-            -0.0007713441736996174,
-            -0.013819841668009758,
-            -0.003826678032055497,
-            -0.0005918181850574911,
-            -0.0014938872773200274,
-            -0.00485716899856925,
-            -0.081083282828331,
-            -0.09642580896615982,
-            -0.009630884043872356,
-            -0.0010948146227747202,
-            -0.007085552904754877,
-            -0.0006310140597634017,
-            -0.013073914684355259,
-            -0.0039152647368609905,
-            -0.000364713923772797,
-            -0.001292108790948987,
-            -0.004158303141593933,
-            -0.044283974915742874,
-            -0.05722038820385933,
-            -0.006369172595441341,
-            -0.0007976687629707158,
-            -0.005993015132844448,
-            -0.0004935238393954933,
-            -0.011310506612062454,
-            -0.002951553324237466,
-            -0.000387831823900342,
-            -0.000977038755081594,
-            -0.0036971091758459806,
-            -0.030511993914842606,
-            -0.04246694967150688,
-            -0.004863100592046976,
-            -0.0006927236099727452,
-            -0.005206122528761625,
-            -0.0005129451747052372,
-            -0.00894621666520834,
-            -0.0028565814718604088,
-            -0.00041333239641971886,
-            -0.0009002208826132119,
-            -0.0033131728414446115,
-            -0.021188799291849136,
-            -0.03330245241522789,
-            -0.0038543473929166794,
-            -0.0006504327175207436,
-            -0.004474864806979895,
-            -0.00048029806930571795,
-            -0.009718249551951885,
-            -0.0030443770810961723,
-            -0.0003743662964552641,
-            -0.0009439303539693356,
-            -0.003729770192876458,
-            -0.016505014151334763,
-            -0.0290373582392931,
-            -0.003315192647278309,
-            -0.0005821678787469864,
-            -0.004148805979639292,
-            -0.00042489083716645837,
-            -0.006856840569525957,
-            -0.0028660909738391638,
-            -0.00032574593205936253,
-            -0.0006986799417063594,
-            -0.003671098267659545,
-            -0.012792548164725304,
-            -0.02553274855017662,
-            -0.002730690874159336,
-            -0.0005067494930699468,
-            -0.0036923582665622234,
-            -0.0004451475979294628,
-            -0.007225453853607178,
-            -0.002746859099715948,
-            -0.0003323002893012017,
-            -0.0008517451351508498,
-            -0.003630714723840356,
-            -0.011284693144261837,
-            -0.02186688780784607,
-            -0.0025819556321948767,
-            -0.0004931663861498237,
-            -0.0031721293926239014,
-            -0.00040725519647821784,
-            -0.0062383925542235374,
-            -0.00238693761639297,
-            -0.00040749352774582803,
-            -0.0007970731821842492,
-            -0.003604583442211151,
-            -0.010075542144477367,
-            -0.022386692464351654,
-            -0.0025295186787843704,
-            -0.00046302087139338255,
-            -0.003138143103569746,
-            -0.0004233417857903987,
-            -0.006178564392030239,
-            -0.0021037368569523096
-        ]
-    },
-    "127": {
-        "input_prompt": "as before we talk about using , i usually do if else() to in case the property would be null before.\nif today i public a property and i want to use using to instead , will code be like this?\n\npublic Material Mat;\n\nusing(Mat){\n// Do something;\n}\n\nis that right?",
-        "generated_text": " i am not sure if i am using the wrong syntax or if i am using the syntax correctly but i am not sure if i am using the syntax correctly or if i am using the syntax incorrectly.\n\nI am not sure if i am using the wrong syntax or if i am using the syntax correctly but i am not sure if i am using the syntax correctly or if i am using the syntax incorrectly.\n\nI am not sure if i am using the wrong syntax or if i am using the syntax correctly but i am not sure if i am using the syntax correctly or if i am using the syntax incorrectly.\n\nI am not sure if i am using",
-        "generated_tokens": [
-            1623,
-            1855,
-            1605,
-            5257,
-            1693,
-            1623,
-            1855,
-            2505,
-            1278,
-            8462,
-            22692,
-            1505,
-            1693,
-            1623,
-            1855,
-            2505,
-            1278,
-            22692,
-            17047,
-            1809,
-            1623,
-            1855,
-            1605,
-            5257,
-            1693,
-            1623,
-            1855,
-            2505,
-            1278,
-            22692,
-            17047,
-            1505,
-            1693,
-            1623,
-            1855,
-            2505,
-            1278,
-            22692,
-            73751,
-            1338,
-            1073,
-            1855,
-            1605,
-            5257,
-            1693,
-            1623,
-            1855,
-            2505,
-            1278,
-            8462,
-            22692,
-            1505,
-            1693,
-            1623,
-            1855,
-            2505,
-            1278,
-            22692,
-            17047,
-            1809,
-            1623,
-            1855,
-            1605,
-            5257,
-            1693,
-            1623,
-            1855,
-            2505,
-            1278,
-            22692,
-            17047,
-            1505,
-            1693,
-            1623,
-            1855,
-            2505,
-            1278,
-            22692,
-            73751,
-            1338,
-            1073,
-            1855,
-            1605,
-            5257,
-            1693,
-            1623,
-            1855,
-            2505,
-            1278,
-            8462,
-            22692,
-            1505,
-            1693,
-            1623,
-            1855,
-            2505,
-            1278,
-            22692,
-            17047,
-            1809,
-            1623,
-            1855,
-            1605,
-            5257,
-            1693,
-            1623,
-            1855,
-            2505,
-            1278,
-            22692,
-            17047,
-            1505,
-            1693,
-            1623,
-            1855,
-            2505,
-            1278,
-            22692,
-            73751,
-            1338,
-            1073,
-            1855,
-            1605,
-            5257,
-            1693,
-            1623,
-            1855,
-            2505
-        ],
-        "latency": 155.6906189918518,
-        "cuda_graph_request_count_map": null,
-        "step_count": 6144,
-        "logprobs": [
-            -7.793755531311035,
-            -4.224940776824951,
-            -5.446587562561035,
-            -0.267395555973053,
-            -6.118141174316406,
-            -11.105525970458984,
-            -6.67517614364624,
-            -6.902530670166016,
-            -3.2218151092529297,
-            -5.99644660949707,
-            -8.431673049926758,
-            -8.04836368560791,
-            -3.992594003677368,
-            -7.550079345703125,
-            -2.873685836791992,
-            -3.3877997398376465,
-            -7.9306745529174805,
-            -7.005484580993652,
-            -1.0481306314468384,
-            -2.686237335205078,
-            -6.131283760070801,
-            -6.2994704246521,
-            -7.931419372558594,
-            -11.3147554397583,
-            -8.544670104980469,
-            -12.01729679107666,
-            -3.89847469329834,
-            -1.7964364290237427,
-            -2.952878952026367,
-            -1.9217232465744019,
-            -2.272329330444336,
-            -0.37552154064178467,
-            -1.667820692062378,
-            -7.510344505310059,
-            -3.498040199279785,
-            -7.980632305145264,
-            -7.672002792358398,
-            -4.4999470710754395,
-            -7.155375003814697,
-            -2.4486124515533447,
-            -4.785946846008301,
-            -1.153855800628662,
-            -2.3994438648223877,
-            -4.250652313232422,
-            -12.24446964263916,
-            -8.344388008117676,
-            -2.608186721801758,
-            -5.200589179992676,
-            -8.25888442993164,
-            -3.6245617866516113,
-            -7.689338207244873,
-            -7.345355033874512,
-            -1.2661759853363037,
-            -7.265620231628418,
-            -1.9884108304977417,
-            -6.269482612609863,
-            -2.41705584526062,
-            -1.8929681777954102,
-            -1.8259913921356201,
-            -2.0997350215911865,
-            -2.323200225830078,
-            -1.3998825550079346,
-            -0.8789899945259094,
-            -1.082053542137146,
-            -1.1831339597702026,
-            -1.4462857246398926,
-            -1.6481035947799683,
-            -1.4408715963363647,
-            -1.2603964805603027,
-            -1.5267670154571533,
-            -1.6345772743225098,
-            -1.3796477317810059,
-            -0.7609691023826599,
-            -0.3548354506492615,
-            -0.7552334666252136,
-            -0.44776833057403564,
-            -1.1078286170959473,
-            -1.3036658763885498,
-            -0.5214896202087402,
-            -0.8486822843551636,
-            -0.22470997273921967,
-            -0.4705755412578583,
-            -0.5639711022377014,
-            -0.5388108491897583,
-            -0.6052999496459961,
-            -0.1002030223608017,
-            -0.286334365606308,
-            -0.45798981189727783,
-            -1.0107953548431396,
-            -0.11875647306442261,
-            -0.6969441771507263,
-            -0.4609107971191406,
-            -0.07614769786596298,
-            -0.5035472512245178,
-            -0.1682187020778656,
-            -0.10476160794496536,
-            -0.6586751341819763,
-            -0.35806939005851746,
-            -1.5364394187927246,
-            -2.4093759059906006,
-            -1.977368950843811,
-            -1.6216907501220703,
-            -0.27647316455841064,
-            -0.2991848587989807,
-            -0.2783535420894623,
-            -0.05913994088768959,
-            -0.03023873083293438,
-            -0.043339803814888,
-            -0.7320341467857361,
-            -0.0030677898321300745,
-            -0.0332595594227314,
-            -0.012804670259356499,
-            -0.004041599575430155,
-            -0.0014899593079462647,
-            -0.001948602613992989,
-            -0.0029070996679365635,
-            -0.040939707309007645,
-            -0.013942227698862553,
-            -0.04897322878241539,
-            -0.011005887761712074,
-            -0.0044113704934716225,
-            -0.0013179434463381767,
-            -0.003658389439806342,
-            -0.009758152067661285,
-            -0.0014104428701102734,
-            -0.0016671819612383842,
-            -0.000771939754486084,
-            -0.0015519729349762201,
-            -0.003720743814483285,
-            -0.004249115474522114,
-            -0.00485657574608922,
-            -0.005053604021668434,
-            -0.002336274366825819,
-            -0.0009155849111266434,
-            -0.0004978132783435285,
-            -0.0005953923100605607,
-            -0.0011395872570574284,
-            -0.001485078944824636,
-            -0.3072909712791443,
-            -1.7295066118240356,
-            -0.4807289242744446,
-            -0.1245415136218071,
-            -0.011858444660902023,
-            -0.020613837987184525,
-            -0.011020978912711143,
-            -0.003106294432654977,
-            -0.0009966888464987278,
-            -0.0019349202048033476,
-            -0.037407051771879196,
-            -0.0003496989083942026,
-            -0.005922981072217226,
-            -0.007394562941044569,
-            -0.0006037319544702768,
-            -0.0008836655179038644,
-            -0.0002884448622353375,
-            -0.00047600860125385225,
-            -0.0024947968777269125,
-            -0.00442774873226881,
-            -0.004059052560478449,
-            -0.0018594847060739994,
-            -0.0006179092451930046,
-            -0.00022635281493421644,
-            -0.0006730675231665373,
-            -0.003022746881470084,
-            -0.0002343380037928,
-            -0.00047791501856409013,
-            -9.440929716220126e-05,
-            -0.00021550717065110803,
-            -0.0013523490633815527,
-            -0.0032202552538365126,
-            -0.001157686347141862,
-            -0.004449942149221897,
-            -0.0016590891173109412,
-            -0.00101062236353755,
-            -0.0003079893649555743,
-            -0.00048375347978435457,
-            -0.0021734442561864853,
-            -0.00423036003485322,
-            -0.11514264345169067,
-            -0.8658493757247925,
-            -0.084366075694561,
-            -0.02140468917787075,
-            -0.0060798698104918,
-            -0.008638513274490833,
-            -0.003212531330063939,
-            -0.0009598892065696418,
-            -0.00032085992279462516
-        ]
-    },
-    "throughput": 104.94118337233992
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_logitsmatch/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_logitsmatch/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index ad16c16b924..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_logitsmatch/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,2699 +0,0 @@
-{
-    "0": {
-        "input_prompt": "The $500 Cup of coffee?\nConsider this, most Americans spend an average of $1,500-2,000 a year on this bean water.\nI have a few question for you: \nHow has business been the past few months?\nDo you ever feel like your business is stuck?\nDon't feel like you're able to improve performance and make changes required to achieve success ?\nAre your customers spneding less and less and wanting more?\nHave the gas prices affected your business?\nDo you have employees and do they hate you or wish they could quit?\n\nNow, before you and I can decide wheter or not I will be a good fit for your business we should talk this over with coffee.\nAnd, just to warn you this isn't some casual thing. This is not a date or time to be personal or social (but by all means share what you will coz I'll gladly listen).\nTher eare two major talking points and stratagies we will focios on in our lil coffee social\nFor one, we will find your unique selling Proposition (USP).\nDo have the best price? Are you the cheapest in town? Are your customers jerks? Do you haVE REGULARS? Why do people come back?\nwe'll also look for the holes in your business bucket. I'm willing to bet there's a hole or two in your business we'll find together that'll make this 500 dollar cup of Joe pay for itse;f immedietly.\nMany find themselves to be more profitable by just finding out where the dollars are escaping in their business and I like to think of myself as a guy that comes along with some spakel or putty and patch those holes up for you.\nBeleive me, just fixing one hole can mean a lot...just think about a sinking boat that has a hole in it that's about 3\u201d in diameter... it doesn't take long to sink.\nI have no agenda, besides f=getting to know your business and seeing wher I can patch the holes and find what makes you do darn unique (I know this won't take long.)\nMany folks, I bet, will find what they need to get off their chest with a quick phone call and they just paypal me the money and make a coffee at home. Look, that's fine too.\nI just to get you ot of your comfort zone, because this is where it all starts my frind.\nSome smart GOAT entrepreneur will probably get everything they need just out of our lil mini consulatant for the more extensive business consukting I offer, and look, that's fine, too.\nMaybe this cup of coffee will be all you need to gtet back on your feet, but not only surive, but thrive!\nI'm not trying to make frineds, or make a bunch of money, or look down your shirt\nBut this is only going to be a 45 minute (max) coffee chat\nAnd, I know you (and me) both have a lot on our plates. So no messing around\nAfter our consultation I will follow up with you in a few days and see how things are going, then I will be emailing you about once or twice every two weeks, just to make sure you're staying on task and implementing what we went over.\nTghere is no obligation to go any further and will gladly give you back your money if this pre-consultation doesn't put you on the right path or you don't get any value out of it...",
-        "generated_text": " $ is a$ is a $ is a $ is a $ is a $ is a $$1, you\n$ $$ $\n$ $- $\n$\n$\n$ $$$\n$\n$\n$\n$\n$\n$\n$\n$???????, $\n-1\n$\n1.5.\n$\n$, you\n$.\n$\n1,1.1\nI\n$.\nI\n\n\nHow\n$,,,0,1,0,0.0\nIn\nThe\nThe\nThe\n",
-        "generated_tokens": [
-            1659,
-            1395,
-            1261,
-            1036,
-            1395,
-            1261,
-            1659,
-            1395,
-            1261,
-            1659,
-            1395,
-            1261,
-            1659,
-            1395,
-            1261,
-            1659,
-            1395,
-            1261,
-            1659,
-            1036,
-            1049,
-            1044,
-            1636,
-            1010,
-            1036,
-            1659,
-            1036,
-            1659,
-            1010,
-            1036,
-            1659,
-            1045,
-            1659,
-            1010,
-            1036,
-            1010,
-            1036,
-            1010,
-            1036,
-            1659,
-            1036,
-            1036,
-            1010,
-            1036,
-            1010,
-            1036,
-            1010,
-            1036,
-            1010,
-            1036,
-            1010,
-            1036,
-            1010,
-            1036,
-            1010,
-            1036,
-            1010,
-            1036,
-            1063,
-            1063,
-            1063,
-            1063,
-            1063,
-            1063,
-            1063,
-            1044,
-            1659,
-            1010,
-            1045,
-            1049,
-            1010,
-            1036,
-            1010,
-            1049,
-            1046,
-            1053,
-            1046,
-            1010,
-            1036,
-            1010,
-            1036,
-            1044,
-            1636,
-            1010,
-            1036,
-            1046,
-            1010,
-            1036,
-            1010,
-            1049,
-            1044,
-            1049,
-            1046,
-            1049,
-            1010,
-            1073,
-            1010,
-            1036,
-            1046,
-            1010,
-            1073,
-            1010,
-            1010,
-            1010,
-            7801,
-            1010,
-            1036,
-            1044,
-            1044,
-            1044,
-            1048,
-            1044,
-            1049,
-            1044,
-            1048,
-            1044,
-            1048,
-            1046,
-            1048,
-            1010,
-            1785,
-            1010,
-            1784,
-            1010,
-            1784,
-            1010,
-            1784,
-            1010
-        ],
-        "latency": 10.056535482406616,
-        "cuda_graph_request_count_map": null,
-        "step_count": 6144,
-        "logprobs": [
-            -7.7319135665893555,
-            -2.188307285308838,
-            -0.7547445297241211,
-            -0.7294313311576843,
-            -10.238386154174805,
-            -3.3775341510772705,
-            -6.394498825073242,
-            -7.354557037353516,
-            -9.018157958984375,
-            -3.012073040008545,
-            -3.2584073543548584,
-            -5.220732688903809,
-            -4.620487213134766,
-            -2.5078930854797363,
-            -3.752683162689209,
-            -0.13360372185707092,
-            -0.05705544352531433,
-            -0.41462242603302,
-            -1.585279941558838,
-            -1.6438164710998535,
-            -1.9557222127914429,
-            -0.3989897072315216,
-            -0.0365302674472332,
-            -6.368816375732422,
-            -0.8731719255447388,
-            -0.022585075348615646,
-            -0.2775891423225403,
-            -0.0027362785767763853,
-            -0.0006812873762100935,
-            -1.581446647644043,
-            -0.008688976056873798,
-            -0.3532317280769348,
-            -6.071163177490234,
-            -9.162371635437012,
-            -9.965556144714355,
-            -2.400461196899414,
-            -2.9898362159729004,
-            -2.9803032875061035,
-            -2.12601900100708,
-            -3.500912666320801,
-            -7.015069007873535,
-            -2.278961420059204,
-            -0.46380555629730225,
-            -4.078739166259766,
-            -1.9430254697799683,
-            -3.5642244815826416,
-            -3.689701795578003,
-            -6.201474189758301,
-            -6.580833911895752,
-            -2.3081111907958984,
-            -5.42717170715332,
-            -1.1886008977890015,
-            -1.172760248184204,
-            -1.3571951389312744,
-            -1.3551844358444214,
-            -3.376784324645996,
-            -0.05118789151310921,
-            -4.064360618591309,
-            -2.575554847717285,
-            -0.6994737386703491,
-            -2.56724214553833,
-            -2.1888976097106934,
-            -0.4816131591796875,
-            -4.070178985595703,
-            -2.0060782432556152,
-            -6.858033180236816,
-            -0.059200502932071686,
-            -3.214278221130371,
-            -0.9671833515167236,
-            -0.823198676109314,
-            -1.0130078792572021,
-            -4.595561981201172,
-            -0.012724989093840122,
-            -5.214311599731445,
-            -8.246870040893555,
-            -3.1476030349731445,
-            -3.299684524536133,
-            -4.218191146850586,
-            -7.318399429321289,
-            -0.8580498695373535,
-            -3.0894036293029785,
-            -1.886361002922058,
-            -7.217658996582031,
-            -3.271679639816284,
-            -3.9717154502868652,
-            -1.8835484981536865,
-            -10.034332275390625,
-            -11.382490158081055,
-            -5.417011260986328,
-            -7.505967140197754,
-            -2.33837890625,
-            -0.07904055714607239,
-            -3.294971227645874,
-            -7.813640594482422,
-            -1.7646901607513428,
-            -4.025320053100586,
-            -3.5977325439453125,
-            -4.390352249145508,
-            -9.147806167602539,
-            -0.5303041934967041,
-            -7.721246242523193,
-            -0.6311959028244019,
-            -0.8119025230407715,
-            -0.7227814197540283,
-            -1.8369406461715698,
-            -0.20933297276496887,
-            -1.5395950078964233,
-            -4.424448490142822,
-            -4.084965705871582,
-            -3.355497360229492,
-            -1.0475609302520752,
-            -6.479413986206055,
-            -0.7810530662536621,
-            -2.132437229156494,
-            -6.648703098297119,
-            -2.9522438049316406,
-            -1.2485712766647339,
-            -4.040503025054932,
-            -2.3415768146514893,
-            -5.358206748962402,
-            -1.6258506774902344,
-            -3.956300973892212,
-            -0.732298731803894,
-            -7.441117286682129,
-            -1.5242161750793457,
-            -2.4555861949920654,
-            -4.295163154602051,
-            -9.687600135803223,
-            -0.8213484883308411,
-            -1.2446978092193604,
-            -0.01942702941596508,
-            -4.619411468505859,
-            -3.3297007083892822,
-            -2.2139487266540527,
-            -3.691431999206543,
-            -2.6574106216430664,
-            -6.075929641723633,
-            -0.6123450994491577,
-            -1.2942559719085693,
-            -0.6262839436531067,
-            -7.398006439208984,
-            -4.4869890213012695,
-            -4.202048301696777,
-            -4.982994079589844,
-            -0.637227475643158,
-            -3.061023235321045,
-            -10.117584228515625,
-            -3.8567495346069336,
-            -4.0480828285217285,
-            -2.472019672393799,
-            -4.246374607086182,
-            -1.3939155340194702,
-            -7.132441520690918,
-            -0.20108745992183685,
-            -4.986658573150635,
-            -4.387957572937012,
-            -0.01108358334749937,
-            -4.209756851196289,
-            -7.271108627319336,
-            -4.047314643859863,
-            -2.6497321128845215,
-            -1.4763175249099731,
-            -0.28365400433540344,
-            -3.5247769355773926,
-            -1.4226995706558228,
-            -4.327237129211426,
-            -2.0407187938690186,
-            -6.1437907218933105,
-            -1.5190880298614502,
-            -2.5511486530303955,
-            -7.504094123840332,
-            -2.152172565460205,
-            -6.708334922790527,
-            -6.913146495819092,
-            -3.6959621906280518,
-            -6.752341270446777,
-            -0.63083815574646,
-            -0.12433214485645294,
-            -5.0525641441345215,
-            -4.435934066772461,
-            -0.45601028203964233,
-            -6.3459577560424805,
-            -9.882917404174805,
-            -3.1422882080078125,
-            -2.550520658493042,
-            -3.2099051475524902,
-            -6.278127193450928,
-            -0.07764133810997009,
-            -3.155696153640747,
-            -1.933587670326233,
-            -9.61027717590332,
-            -6.211391925811768,
-            -4.664543151855469,
-            -6.783782005310059,
-            -5.676271438598633,
-            -8.605900764465332,
-            -0.0824289619922638,
-            -3.5463995933532715,
-            -13.374168395996094,
-            -1.2401021718978882,
-            -1.8734056949615479,
-            -3.4154422283172607,
-            -1.6733763217926025,
-            -17.633970260620117,
-            -9.345113754272461,
-            -0.6277351975440979,
-            -2.9617538452148438,
-            -2.5565333366394043,
-            -10.10580825805664,
-            -7.130337715148926,
-            -7.36820125579834,
-            -4.098911285400391,
-            -5.747079372406006,
-            -2.945054769515991,
-            -0.7887389063835144,
-            -1.6583149433135986,
-            -1.0165244340896606,
-            -6.581666946411133,
-            -5.926386833190918,
-            -5.845194339752197,
-            -0.9657630920410156,
-            -7.868755340576172,
-            -1.3244551420211792,
-            -0.2657390236854553,
-            -0.06403665244579315,
-            -2.983020782470703,
-            -5.943899631500244,
-            -7.877285957336426,
-            -3.593116283416748,
-            -3.819509506225586,
-            -7.226177215576172,
-            -2.5206997394561768,
-            -3.385587215423584,
-            -0.37499159574508667,
-            -1.4698283672332764,
-            -3.1460342407226562,
-            -0.0077166082337498665,
-            -4.350916862487793,
-            -3.2183218002319336,
-            -0.6242184638977051,
-            -1.4782464504241943,
-            -2.8054311275482178,
-            -3.0831401348114014,
-            -12.17662525177002,
-            -2.113419532775879,
-            -1.6448111534118652,
-            -2.1834323406219482,
-            -0.7630388140678406,
-            -10.1896390914917,
-            -6.234405517578125,
-            -11.46288776397705,
-            -1.003785490989685,
-            -4.211658477783203,
-            -1.5010679960250854,
-            -5.859302043914795,
-            -2.0465080738067627,
-            -3.7468819618225098,
-            -4.684195518493652,
-            -4.318704128265381,
-            -2.7234389781951904,
-            -9.00437068939209,
-            -3.043811321258545,
-            -3.1384406089782715,
-            -2.713779926300049,
-            -2.095993995666504,
-            -2.1484954357147217,
-            -10.274479866027832,
-            -0.682350754737854,
-            -0.25973302125930786,
-            -3.6964316368103027,
-            -13.434456825256348,
-            -2.3368239402770996,
-            -5.382724761962891,
-            -1.9073458909988403,
-            -5.905669212341309,
-            -0.032165709882974625,
-            -1.6530004739761353,
-            -2.728893280029297,
-            -1.640552043914795,
-            -1.1391171216964722,
-            -1.4353511333465576,
-            -4.003787994384766,
-            -0.3450564742088318,
-            -0.7168521285057068,
-            -0.34650325775146484,
-            -0.3616408705711365,
-            -7.062709331512451,
-            -1.2851682901382446,
-            -2.299129009246826,
-            -8.800156593322754,
-            -5.208735466003418,
-            -4.780910491943359,
-            -2.78342342376709,
-            -4.469717979431152,
-            -6.909726619720459,
-            -2.5114197731018066,
-            -0.659822404384613,
-            -0.6915416121482849,
-            -3.2363741397857666,
-            -0.5283617377281189,
-            -0.10473938286304474,
-            -6.215325832366943,
-            -7.283237934112549,
-            -1.6797031164169312,
-            -11.50100040435791,
-            -7.5822978019714355,
-            -3.387317657470703,
-            -11.407575607299805,
-            -5.441976547241211,
-            -3.3264851570129395,
-            -0.7265786528587341,
-            -1.382750153541565,
-            -7.841699600219727,
-            -8.105277061462402,
-            -3.9569506645202637,
-            -4.963083267211914,
-            -0.5492897629737854,
-            -4.6081390380859375,
-            -5.870400905609131,
-            -3.957930088043213,
-            -5.275494575500488,
-            -4.105091094970703,
-            -2.15435528755188,
-            -2.8472700119018555,
-            -1.1278448104858398,
-            -8.226571083068848,
-            -0.40629008412361145,
-            -9.916461944580078,
-            -4.616743087768555,
-            -1.691868543624878,
-            -0.6639478802680969,
-            -2.5716753005981445,
-            -6.676954746246338,
-            -6.535329818725586,
-            -0.4170510768890381,
-            -1.443942904472351,
-            -3.145481824874878,
-            -1.440589427947998,
-            -0.26935356855392456,
-            -0.9647155404090881,
-            -4.335958957672119,
-            -1.5647850036621094,
-            -5.890466690063477,
-            -3.01654052734375,
-            -1.9168468713760376,
-            -3.7365682125091553,
-            -8.001864433288574,
-            -10.680083274841309,
-            -4.489352226257324,
-            -4.6058149337768555,
-            -7.69011116027832,
-            -3.6247005462646484,
-            -1.5600426197052002,
-            -10.2160062789917,
-            -5.004643440246582,
-            -0.19602319598197937,
-            -3.375545024871826,
-            -2.669325590133667,
-            -1.3932737112045288,
-            -1.6410658359527588,
-            -6.847603797912598,
-            -6.744344711303711,
-            -0.5215591192245483,
-            -0.25840020179748535,
-            -1.1448237895965576,
-            -5.57253885269165,
-            -7.251138687133789,
-            -4.221924781799316,
-            -0.7688062787055969,
-            -2.504502534866333,
-            -3.146519660949707,
-            -2.206653356552124,
-            -1.4295082092285156,
-            -7.96943998336792,
-            -4.332189083099365,
-            -2.5750505924224854,
-            -1.7102608680725098,
-            -5.311381816864014,
-            -8.897522926330566,
-            -2.994919538497925,
-            -3.3397974967956543,
-            -2.1794328689575195,
-            -2.437566041946411,
-            -0.3181810975074768,
-            -0.27412793040275574,
-            -0.7914466857910156,
-            -2.3470635414123535,
-            -2.4099245071411133,
-            -2.491870880126953,
-            -3.024170160293579,
-            -1.9719040393829346,
-            -11.373910903930664,
-            -1.4279751777648926,
-            -0.14573107659816742,
-            -2.055763006210327,
-            -6.366893291473389,
-            -4.24091911315918,
-            -0.00709194503724575,
-            -2.0199716091156006,
-            -2.524750232696533,
-            -1.4272525310516357,
-            -0.5185190439224243,
-            -2.927150011062622,
-            -2.7070627212524414,
-            -3.365638017654419,
-            -4.318085193634033,
-            -7.773144721984863,
-            -1.7947180271148682,
-            -7.657534599304199,
-            -8.767786026000977,
-            -14.74280071258545,
-            -1.8042558431625366,
-            -3.2712037563323975,
-            -1.4002125263214111,
-            -4.887944221496582,
-            -1.4821010828018188,
-            -1.5255622863769531,
-            -5.879070281982422,
-            -4.463839530944824,
-            -5.1955976486206055,
-            -5.665647506713867,
-            -0.3775045573711395,
-            -5.9350481033325195,
-            -2.800539255142212,
-            -0.13162286579608917,
-            -3.034379720687866,
-            -4.729524612426758,
-            -4.6252641677856445,
-            -3.850942611694336,
-            -2.4760568141937256,
-            -6.059760093688965,
-            -10.12075138092041,
-            -0.9469369649887085,
-            -11.595907211303711,
-            -6.875324726104736,
-            -4.268826007843018,
-            -2.835529088973999,
-            -3.8626279830932617,
-            -4.876199245452881,
-            -0.013071090914309025,
-            -4.964417934417725,
-            -0.7445687055587769,
-            -5.707155227661133,
-            -6.10660457611084,
-            -4.317755699157715,
-            -4.440443992614746,
-            -2.9202542304992676,
-            -4.743522644042969,
-            -1.2569392919540405,
-            -2.8675737380981445,
-            -2.3151841163635254,
-            -4.318130970001221,
-            -1.9054772853851318,
-            -1.1808521747589111,
-            -0.765956461429596,
-            -2.768916606903076,
-            -6.237791061401367,
-            -1.7224305868148804,
-            -7.137521743774414,
-            -4.512486457824707,
-            -1.9069950580596924,
-            -4.145983695983887,
-            -5.365190505981445,
-            -0.059828490018844604,
-            -2.273892879486084,
-            -3.4013004302978516,
-            -5.035730361938477,
-            -6.501443386077881,
-            -9.903446197509766,
-            -1.6332892179489136,
-            -2.1572084426879883,
-            -1.6149548292160034,
-            -1.4698481559753418,
-            -6.01010799407959,
-            -2.2243528366088867,
-            -6.900836944580078,
-            -6.0930986404418945,
-            -2.974020481109619,
-            -3.225423574447632,
-            -8.423272132873535,
-            -1.3423724174499512,
-            -3.626147508621216,
-            -0.4862469434738159,
-            -6.860866546630859,
-            -3.8910953998565674,
-            -2.33319354057312,
-            -1.7229185104370117,
-            -2.215972423553467,
-            -8.99046516418457,
-            -4.099084854125977,
-            -2.4191012382507324,
-            -8.288970947265625,
-            -2.9641928672790527,
-            -1.5036451816558838,
-            -3.0544614791870117,
-            -0.0715634673833847,
-            -2.444031238555908,
-            -4.520998954772949,
-            -3.972568988800049,
-            -0.4985870122909546,
-            -2.1651363372802734,
-            -3.4427435398101807,
-            -1.730639100074768,
-            -0.9458961486816406,
-            -7.740211009979248,
-            -9.39163875579834,
-            -3.895984172821045,
-            -1.7523534297943115,
-            -5.41331672668457,
-            -8.910720825195312,
-            -12.971094131469727,
-            -3.0455880165100098,
-            -10.501265525817871,
-            -3.3864927291870117,
-            -4.842309951782227,
-            -3.9964733123779297,
-            -7.3046793937683105,
-            -2.6607093811035156,
-            -1.3541781902313232,
-            -5.003270626068115,
-            -3.944551944732666,
-            -0.11356143653392792,
-            -5.174440383911133,
-            -9.628616333007812,
-            -8.654989242553711,
-            -8.980416297912598,
-            -6.670101642608643,
-            -5.488286018371582,
-            -5.943419933319092,
-            -2.126483201980591,
-            -8.054739952087402,
-            -7.458671569824219,
-            -2.5267202854156494,
-            -6.455472946166992,
-            -8.655346870422363,
-            -7.903901100158691,
-            -6.221062660217285,
-            -7.129237174987793,
-            -4.2345380783081055,
-            -2.5375306606292725,
-            -7.697700500488281,
-            -1.567080020904541,
-            -2.084331750869751,
-            -0.25020831823349,
-            -1.5145041942596436,
-            -4.619244575500488,
-            -0.2970108985900879,
-            -0.4977554678916931,
-            -6.197869300842285,
-            -4.030620098114014,
-            -7.232107639312744,
-            -0.21076253056526184,
-            -1.563366174697876,
-            -1.133756160736084,
-            -2.708237648010254,
-            -4.080535888671875,
-            -0.6818401217460632,
-            -0.1864331066608429,
-            -0.49012088775634766,
-            -8.732468605041504,
-            -11.945040702819824,
-            -5.243098735809326,
-            -1.5294703245162964,
-            -0.8935543298721313,
-            -0.6174070835113525,
-            -1.5068217515945435,
-            -3.5766501426696777,
-            -5.393096923828125,
-            -4.202867031097412,
-            -14.765748023986816,
-            -5.2513813972473145,
-            -0.7597705721855164,
-            -0.2502063810825348,
-            -1.7403976917266846,
-            -2.8000779151916504,
-            -1.9808133840560913,
-            -2.1654744148254395,
-            -1.8629226684570312,
-            -3.222038745880127,
-            -0.040942225605249405,
-            -2.3384013175964355,
-            -10.210381507873535,
-            -4.5859761238098145,
-            -0.5805734395980835,
-            -3.7019288539886475,
-            -2.001936674118042,
-            -2.7876083850860596,
-            -2.9799084663391113,
-            -4.349887371063232,
-            -0.0792960673570633,
-            -1.4366114139556885,
-            -1.0813264846801758,
-            -1.3510822057724,
-            -6.7060699462890625,
-            -5.436615943908691,
-            -3.978389263153076,
-            -6.785447597503662,
-            -6.147171497344971,
-            -3.97414231300354,
-            -4.332991600036621,
-            -0.9269428253173828,
-            -5.1237101554870605,
-            -4.486598968505859,
-            -0.04678357392549515,
-            -1.0307552814483643,
-            -1.4249452352523804,
-            -4.517682075500488,
-            -3.561821699142456,
-            -2.0815205574035645,
-            -0.6041194200515747,
-            -5.992964744567871,
-            -7.092092514038086,
-            -0.48916709423065186,
-            -2.6405677795410156,
-            -4.3345723152160645,
-            -3.533582925796509,
-            -3.1233346462249756,
-            -3.107872486114502,
-            -1.9901115894317627,
-            -3.1052846908569336,
-            -1.8440347909927368,
-            -6.21368408203125,
-            -1.8796799182891846,
-            -2.705214738845825,
-            -0.2987763583660126,
-            -4.070865154266357,
-            -1.6675832271575928,
-            -1.3896636962890625,
-            -1.5731089115142822,
-            -3.526170015335083,
-            -2.5088443756103516,
-            -1.208929419517517,
-            -3.673125743865967,
-            -2.501532554626465,
-            -6.875064373016357,
-            -8.512459754943848,
-            -1.042314052581787,
-            -3.657850980758667,
-            -7.0950798988342285,
-            -4.974049091339111,
-            -8.14085578918457,
-            -3.529888153076172,
-            -1.9389504194259644,
-            -7.0902204513549805,
-            -2.409292459487915,
-            -2.9428021907806396,
-            -1.688283085823059,
-            -3.622368335723877,
-            -2.0903351306915283,
-            -4.160663604736328,
-            -3.1683764457702637,
-            -1.2135626077651978,
-            -7.566033363342285,
-            -3.1186251640319824,
-            -5.899919509887695,
-            -0.9518840312957764,
-            -2.656729221343994,
-            -2.2994377613067627,
-            -6.806836128234863,
-            -1.280236840248108,
-            -2.838846206665039,
-            -1.3598848581314087,
-            -11.707776069641113,
-            -3.134333372116089,
-            -0.6230669617652893,
-            -8.219222068786621,
-            -7.562507152557373,
-            -7.489459037780762,
-            -1.5368008613586426,
-            -7.149652481079102,
-            -5.749268054962158,
-            -3.162869691848755,
-            -2.7235195636749268,
-            -6.128931999206543,
-            -1.1934199333190918,
-            -3.986410617828369,
-            -3.76609468460083,
-            -1.712721586227417,
-            -3.195504903793335,
-            -8.397743225097656,
-            -3.1260581016540527,
-            -9.792022705078125,
-            -4.217884540557861,
-            -11.583260536193848,
-            -5.987588882446289,
-            -5.178754806518555,
-            -6.994749069213867,
-            -5.167606353759766,
-            -7.124668121337891,
-            -6.201416015625,
-            -10.203682899475098,
-            -6.858526229858398,
-            -2.733592987060547,
-            -5.078882217407227,
-            -9.003358840942383,
-            -4.704894542694092,
-            -3.9085562229156494,
-            -7.247268199920654,
-            -7.091092109680176,
-            -4.4150166511535645,
-            -7.56699275970459,
-            -9.485116004943848,
-            -1.9977033138275146,
-            -6.65272331237793,
-            -2.236643075942993,
-            -7.518955707550049,
-            -5.525973320007324,
-            -4.67877721786499,
-            -6.608670234680176,
-            -5.536133766174316,
-            -10.772479057312012,
-            -10.8853178024292,
-            -3.6156129837036133,
-            -6.751470565795898,
-            -6.4537434577941895,
-            -3.4220399856567383,
-            -8.251005172729492,
-            -3.2146153450012207,
-            -6.330069541931152,
-            -1.5551663637161255,
-            -6.520583629608154,
-            -10.450878143310547,
-            -5.8788957595825195,
-            -3.7398200035095215,
-            -3.9084208011627197,
-            -0.3640081584453583,
-            -6.961522102355957,
-            -6.066243648529053,
-            -7.270624160766602,
-            -5.098455429077148,
-            -2.7642822265625,
-            -5.460171699523926,
-            -7.362828731536865,
-            -2.558631658554077,
-            -2.186410427093506,
-            -2.5309929847717285,
-            -2.46756649017334,
-            -2.0306026935577393,
-            -1.8713470697402954,
-            -2.108008623123169,
-            -1.2698389291763306,
-            -2.1712756156921387,
-            -2.4432802200317383,
-            -1.1477653980255127,
-            -1.8417484760284424,
-            -2.5971946716308594,
-            -1.8250831365585327,
-            -2.103092670440674,
-            -2.5183165073394775,
-            -2.9367291927337646,
-            -1.9412965774536133,
-            -1.7692793607711792,
-            -2.864521026611328,
-            -3.1332175731658936,
-            -1.098311185836792,
-            -2.946441173553467,
-            -2.2800471782684326,
-            -3.1929852962493896,
-            -2.754260778427124,
-            -3.485616445541382,
-            -3.3010287284851074,
-            -2.5537776947021484,
-            -2.6752865314483643,
-            -3.1617612838745117,
-            -2.4571690559387207,
-            -2.060081958770752,
-            -2.425969362258911,
-            -2.212725877761841,
-            -2.4232254028320312,
-            -3.0587053298950195,
-            -2.4074010848999023,
-            -2.457937479019165,
-            -2.319617986679077,
-            -2.6340954303741455,
-            -2.599524736404419,
-            -2.5302212238311768,
-            -1.6849274635314941,
-            -2.2609786987304688,
-            -2.039928674697876,
-            -1.9474098682403564,
-            -2.3550753593444824,
-            -1.718749761581421,
-            -2.413884162902832,
-            -1.6247628927230835,
-            -2.4784040451049805,
-            -1.828325629234314,
-            -1.3880831003189087,
-            -1.4448199272155762,
-            -1.1477117538452148,
-            -1.1669728755950928,
-            -1.8787822723388672,
-            -1.5565840005874634,
-            -1.6666553020477295,
-            -1.747725248336792,
-            -1.959598422050476,
-            -2.0376486778259277,
-            -2.345367431640625,
-            -2.055098533630371,
-            -1.3940613269805908,
-            -3.4385242462158203,
-            -2.7489635944366455,
-            -3.2590157985687256,
-            -3.1128957271575928,
-            -1.7070379257202148,
-            -3.9010369777679443,
-            -3.21574068069458,
-            -3.3850393295288086,
-            -1.8778185844421387,
-            -2.698211908340454,
-            -1.8060741424560547,
-            -2.0845324993133545,
-            -3.4797585010528564,
-            -2.263254404067993,
-            -3.083108901977539,
-            -1.6589758396148682,
-            -2.687279224395752,
-            -1.77505624294281,
-            -2.6142921447753906,
-            -1.934045672416687,
-            -1.8834377527236938,
-            -2.8038980960845947,
-            -1.550542950630188,
-            -3.4054152965545654,
-            -1.724036693572998,
-            -2.3146564960479736,
-            -1.5134503841400146,
-            -2.9289023876190186,
-            -1.5285141468048096,
-            -3.421035051345825,
-            -1.3757282495498657,
-            -3.441431760787964,
-            -1.5286564826965332,
-            -3.4372904300689697,
-            -3.173043966293335,
-            -1.1313854455947876,
-            -1.486415147781372,
-            -2.506413221359253,
-            -3.5165903568267822,
-            -1.4112176895141602,
-            -3.7175354957580566,
-            -2.2170844078063965,
-            -0.704839289188385,
-            -0.6626103520393372,
-            -2.5483946800231934,
-            -0.949668288230896,
-            -3.1339564323425293,
-            -1.3326977491378784,
-            -3.5493476390838623,
-            -0.6785370111465454,
-            -3.236161470413208,
-            -2.1347084045410156,
-            -3.802447557449341,
-            -2.585503339767456,
-            -3.9361765384674072,
-            -2.290905714035034,
-            -3.714280605316162,
-            -1.793616533279419,
-            -3.7252492904663086,
-            -1.450188159942627,
-            -3.11938738822937,
-            -1.25174880027771
-        ]
-    },
-    "32": {
-        "input_prompt": "create a conversational article",
-        "generated_text": " about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article. The article should be about the topic of the article.",
-        "generated_tokens": [
-            2314,
-            1278,
-            17915,
-            1307,
-            1278,
-            9369,
-            1046,
-            1531,
-            9369,
-            2715,
-            1402,
-            2314,
-            1278,
-            17915,
-            1307,
-            1278,
-            9369,
-            1046,
-            1531,
-            9369,
-            2715,
-            1402,
-            2314,
-            1278,
-            17915,
-            1307,
-            1278,
-            9369,
-            1046,
-            1531,
-            9369,
-            2715,
-            1402,
-            2314,
-            1278,
-            17915,
-            1307,
-            1278,
-            9369,
-            1046,
-            1531,
-            9369,
-            2715,
-            1402,
-            2314,
-            1278,
-            17915,
-            1307,
-            1278,
-            9369,
-            1046,
-            1531,
-            9369,
-            2715,
-            1402,
-            2314,
-            1278,
-            17915,
-            1307,
-            1278,
-            9369,
-            1046,
-            1531,
-            9369,
-            2715,
-            1402,
-            2314,
-            1278,
-            17915,
-            1307,
-            1278,
-            9369,
-            1046,
-            1531,
-            9369,
-            2715,
-            1402,
-            2314,
-            1278,
-            17915,
-            1307,
-            1278,
-            9369,
-            1046,
-            1531,
-            9369,
-            2715,
-            1402,
-            2314,
-            1278,
-            17915,
-            1307,
-            1278,
-            9369,
-            1046,
-            1531,
-            9369,
-            2715,
-            1402,
-            2314,
-            1278,
-            17915,
-            1307,
-            1278,
-            9369,
-            1046,
-            1531,
-            9369,
-            2715,
-            1402,
-            2314,
-            1278,
-            17915,
-            1307,
-            1278,
-            9369,
-            1046,
-            1531,
-            9369,
-            2715,
-            1402,
-            2314,
-            1278,
-            17915,
-            1307,
-            1278,
-            9369,
-            1046
-        ],
-        "latency": 48.19877076148987,
-        "cuda_graph_request_count_map": null,
-        "step_count": 6144,
-        "logprobs": [
-            -4.4165568351745605,
-            -11.358176231384277,
-            -0.0701780766248703,
-            -7.797665119171143,
-            -2.6805992126464844,
-            -1.4707680940628052,
-            -3.0390255451202393,
-            -1.6902849674224854,
-            -1.270594835281372,
-            -1.1936196088790894,
-            -1.2523558139801025,
-            -2.7270259857177734,
-            -1.2371309995651245,
-            -0.9618493318557739,
-            -0.4379909038543701,
-            -1.3917063474655151,
-            -1.1055524349212646,
-            -0.9122569561004639,
-            -0.9911308288574219,
-            -0.08436793833971024,
-            -0.5424078106880188,
-            -0.9181017279624939,
-            -0.5873759388923645,
-            -0.19014373421669006,
-            -0.06655456870794296,
-            -0.15252672135829926,
-            -0.09415211528539658,
-            -0.009787309914827347,
-            -0.013910251669585705,
-            -0.005296128336340189,
-            -0.005677408073097467,
-            -0.02013739012181759,
-            -0.21594694256782532,
-            -0.07153760641813278,
-            -0.0066444179974496365,
-            -0.010198505595326424,
-            -0.011980246752500534,
-            -0.003686776151880622,
-            -0.0037619550712406635,
-            -0.0022467151284217834,
-            -0.004088377580046654,
-            -0.021828632801771164,
-            -0.0012669878778979182,
-            -0.09768074005842209,
-            -0.02652405947446823,
-            -0.0019286142196506262,
-            -0.002283824374899268,
-            -0.0032225127797573805,
-            -0.0009741804678924382,
-            -0.0009415484382770956,
-            -0.001211624126881361,
-            -0.001135300612077117,
-            -0.002340436913073063,
-            -0.0010846928926184773,
-            -0.0509282611310482,
-            -0.03832047060132027,
-            -0.00257422705180943,
-            -0.0022806129418313503,
-            -0.00262785074301064,
-            -0.0008195855189114809,
-            -0.0010239601833745837,
-            -0.0013777059502899647,
-            -0.0009899006690829992,
-            -0.0018756669014692307,
-            -0.0015304292319342494,
-            -0.08506463468074799,
-            -0.01893703266978264,
-            -0.0013797297142446041,
-            -0.0014461545506492257,
-            -0.0013971101725474,
-            -0.0005869334563612938,
-            -0.0005212855176068842,
-            -0.000876757490914315,
-            -0.0005256939912214875,
-            -0.0012863941956311464,
-            -0.0015691122971475124,
-            -0.051276568323373795,
-            -0.00973513163626194,
-            -0.0010469438275322318,
-            -0.0011531615164130926,
-            -0.0009969270322471857,
-            -0.00038342276820912957,
-            -0.0004032037395518273,
-            -0.000730247818864882,
-            -0.0003275334893260151,
-            -0.0008700875914655626,
-            -0.0017572689102962613,
-            -0.044393111020326614,
-            -0.013102858327329159,
-            -0.0011463745031505823,
-            -0.0012070996453985572,
-            -0.0012325793504714966,
-            -0.0005048430757597089,
-            -0.0004876854654867202,
-            -0.0007901645149104297,
-            -0.00041500062798149884,
-            -0.0009869233472272754,
-            -0.0018687656847760081,
-            -0.03943866863846779,
-            -0.014425630681216717,
-            -0.0014756753807887435,
-            -0.001423775334842503,
-            -0.001209719106554985,
-            -0.0005046047735959291,
-            -0.00042167355422861874,
-            -0.0007688426994718611,
-            -0.0002699726028367877,
-            -0.0006598440813831985,
-            -0.0017849955474957824,
-            -0.038999658077955246,
-            -0.012665312737226486,
-            -0.0014427024871110916,
-            -0.0014492495683953166,
-            -0.001016576774418354,
-            -0.00042083943844772875,
-            -0.00033241944038309157,
-            -0.0006403064471669495,
-            -0.00022373080719262362,
-            -0.0007053509471006691,
-            -0.0018597226589918137,
-            -0.030997740104794502,
-            -0.011259939521551132,
-            -0.0012655591126531363,
-            -0.00134151556994766,
-            -0.0008106521563604474,
-            -0.0003513672563713044,
-            -0.0002964295563288033,
-            -0.0006368515896610916,
-            -0.00020180096908006817,
-            -0.0005779979983344674,
-            -0.0016014858847483993,
-            -0.0271126888692379
-        ]
-    },
-    "64": {
-        "input_prompt": "App Concept: \"Eggy's Interactive Adventure World\"\n\nEggy's Interactive Adventure World is a cutting-edge app designed for phones and tablets that combines immersive storytelling with engaging, interactive experiences for both parents and children. This innovative app allows users to explore the world of Eggy and participate in a series of adventures that teach essential life skills, foster personal growth, and strengthen parent-child bonds.\n\nKey Features:\n\n1. Interactive Stories: The app brings the 20 Eggy stories to life through interactive, animated storytelling. Users can read, listen, and actively participate in the stories, making choices that guide Eggy through various adventures, each with unique lessons and experiences.\n2. Augmented Reality (AR) Integration: The app utilizes AR technology, allowing users to interact with Eggy and the story characters in their real-world environment. This immersive experience creates a deeper connection with the stories and encourages engagement.\n3. Personalized Adventures: The app customizes each story based on the user's interests, needs, and personal growth areas. By collecting user input, the app tailors the storylines and learning outcomes to ensure that each adventure resonates with the child's unique experiences.\n4. Parent-Child Cooperative Play: The app encourages parents and children to collaborate and engage in the stories together. Through a split-screen mode or multi-device connectivity, parents and children can actively participate in the stories and learn essential life skills together.\n5. Skill-Building Mini-Games: Interspersed throughout the stories are educational mini-games that reinforce the lessons and themes. These games are designed to be fun, engaging, and accessible to children, encouraging them to practice and develop various skills.\n6. Progress Tracking and Rewards: The app tracks the user's progress through the stories and rewards them with badges, trophies, and other collectibles. This gamification aspect encourages continued engagement and fosters a sense of accomplishment and growth.\n7. Community Features: The app provides a safe, moderated online community where parents can share their experiences, discuss the stories and lessons, and connect with other families who are also using the app. This community fosters support, inspiration, and learning.\n8. Expert Resources: The app includes access to resources from experts in child development, psychology, and education, providing parents with additional tools and guidance on fostering the growth and well-being of their children.\n\nEggy's Interactive Adventure World is a one-of-a-kind, mind-blowing app that revolutionizes the way parents and children engage with stories and learn essential life skills. Combining the power of storytelling, cutting-edge technology, and expert resources, this app is a must-have for families looking to strengthen their bonds and support their children's personal growth.",
-        "generated_text": "</s>The 1999\u20132000 season was the 10th season of the National Hockey League (NHL). The 1999\u20132000 season was the 10th season of the National Hockey League (NHL). The 1999\u20132000 season was the 10th season of the National Hockey League (NHL). The 1999\u20132000 season was the 10th season was the 10th season was the 10 season was the 10 season was the 10",
-        "generated_tokens": [
-            2,
-            1784,
-            1032,
-            1049,
-            1057,
-            1057,
-            1057,
-            1882,
-            1050,
-            1048,
-            1048,
-            1048,
-            5526,
-            1486,
-            1278,
-            1032,
-            1049,
-            1048,
-            1411,
-            5526,
-            1307,
-            1278,
-            5805,
-            46982,
-            8525,
-            1319,
-            19644,
-            1076,
-            1577,
-            1531,
-            1032,
-            1049,
-            1057,
-            1057,
-            1057,
-            1882,
-            1050,
-            1048,
-            1048,
-            1048,
-            5526,
-            1486,
-            1278,
-            1032,
-            1049,
-            1048,
-            1411,
-            5526,
-            1307,
-            1278,
-            5805,
-            46982,
-            8525,
-            1319,
-            19644,
-            1076,
-            1577,
-            1531,
-            1032,
-            1049,
-            1057,
-            1057,
-            1057,
-            1882,
-            1050,
-            1048,
-            1048,
-            1048,
-            5526,
-            1486,
-            1278,
-            1032,
-            1049,
-            1048,
-            1411,
-            5526,
-            1307,
-            1278,
-            5805,
-            46982,
-            8525,
-            1319,
-            19644,
-            1076,
-            1577,
-            1531,
-            1032,
-            1049,
-            1057,
-            1057,
-            1057,
-            1882,
-            1050,
-            1048,
-            1048,
-            1048,
-            5526,
-            1486,
-            1278,
-            1032,
-            1049,
-            1048,
-            1411,
-            5526,
-            1486,
-            1278,
-            1032,
-            1049,
-            1048,
-            1411,
-            5526,
-            1486,
-            1278,
-            1032,
-            1049,
-            1048,
-            5526,
-            1486,
-            1278,
-            1032,
-            1049,
-            1048,
-            5526,
-            1486,
-            1278,
-            1032,
-            1049,
-            1048
-        ],
-        "latency": 86.85381531715393,
-        "cuda_graph_request_count_map": null,
-        "step_count": 6144,
-        "logprobs": [
-            -12.107745170593262,
-            -2.9727728366851807,
-            -3.720092535018921,
-            -5.592433929443359,
-            -10.964235305786133,
-            -3.654498338699341,
-            -9.33439826965332,
-            -4.833785057067871,
-            -5.187321662902832,
-            -2.6944785118103027,
-            -6.9262237548828125,
-            -0.654232919216156,
-            -0.5550781488418579,
-            -0.21346639096736908,
-            -0.0134271876886487,
-            -0.010840100236237049,
-            -1.3878544569015503,
-            -0.6296291351318359,
-            -7.9766011238098145,
-            -0.4393192231655121,
-            -5.639142036437988,
-            -3.277270793914795,
-            -1.0206468105316162,
-            -11.703084945678711,
-            -0.7100943922996521,
-            -0.2809169888496399,
-            -2.771284818649292,
-            -7.190817832946777,
-            -4.048691749572754,
-            -0.012056218460202217,
-            -3.3802318572998047,
-            -0.6807184815406799,
-            -3.4844107627868652,
-            -3.312331199645996,
-            -0.5001641511917114,
-            -2.61255145072937,
-            -4.243694305419922,
-            -4.333778381347656,
-            -6.0625810623168945,
-            -0.011777156963944435,
-            -0.37577226758003235,
-            -0.9490834474563599,
-            -3.5450198650360107,
-            -2.1778035163879395,
-            -0.45957911014556885,
-            -3.00771164894104,
-            -1.7600425481796265,
-            -0.09766030311584473,
-            -2.467618942260742,
-            -1.329679012298584,
-            -0.8384320735931396,
-            -1.1864604949951172,
-            -3.628342866897583,
-            -0.2470003068447113,
-            -1.8938640356063843,
-            -5.168431282043457,
-            -0.05005566030740738,
-            -2.258014678955078,
-            -2.449028968811035,
-            -0.0034086955711245537,
-            -3.9485883712768555,
-            -1.6201664209365845,
-            -5.139942646026611,
-            -4.859354496002197,
-            -0.23686674237251282,
-            -0.5541543364524841,
-            -2.5826025009155273,
-            -6.114635467529297,
-            -4.3380208015441895,
-            -0.7412900924682617,
-            -0.3221715986728668,
-            -0.13805493712425232,
-            -4.1797332763671875,
-            -7.3456268310546875,
-            -0.13762745261192322,
-            -2.0905232429504395,
-            -1.0178627967834473,
-            -4.108260631561279,
-            -0.6007124185562134,
-            -1.0410642623901367,
-            -4.122039794921875,
-            -0.35905471444129944,
-            -1.4274661540985107,
-            -4.139932155609131,
-            -0.4237431585788727,
-            -1.6294409036636353,
-            -0.9811424016952515,
-            -4.132790565490723,
-            -1.1318120956420898,
-            -6.8258256912231445,
-            -1.5455098152160645,
-            -0.6984409093856812,
-            -13.664215087890625,
-            -0.1166313961148262,
-            -1.6347849369049072,
-            -0.28875046968460083,
-            -0.03130083531141281,
-            -1.5293006896972656,
-            -1.6488375663757324,
-            -4.224111557006836,
-            -4.760683059692383,
-            -1.9758747816085815,
-            -1.5828256607055664,
-            -2.8463857173919678,
-            -0.2620386481285095,
-            -1.7243889570236206,
-            -1.7945923805236816,
-            -0.8884308338165283,
-            -0.3766394555568695,
-            -0.34033581614494324,
-            -9.05566692352295,
-            -0.22754782438278198,
-            -0.033802058547735214,
-            -0.34108465909957886,
-            -0.5644669532775879,
-            -2.0925779342651367,
-            -4.547505855560303,
-            -10.870464324951172,
-            -1.1072022914886475,
-            -5.503787994384766,
-            -3.259672164916992,
-            -0.007964519783854485,
-            -3.0111639499664307,
-            -4.246737480163574,
-            -0.7813188433647156,
-            -3.331031322479248,
-            -4.485962867736816,
-            -0.9492117166519165,
-            -2.6757047176361084,
-            -1.1591349840164185,
-            -1.122117519378662,
-            -2.629878044128418,
-            -5.986321926116943,
-            -0.2146703153848648,
-            -0.002392764901742339,
-            -7.372479438781738,
-            -0.007077385671436787,
-            -0.06599216908216476,
-            -0.0970711037516594,
-            -3.2874932289123535,
-            -0.0019583588000386953,
-            -0.9122000336647034,
-            -4.930907249450684,
-            -0.019508399069309235,
-            -0.308611661195755,
-            -0.07778516411781311,
-            -3.8497893810272217,
-            -0.46124517917633057,
-            -0.38821348547935486,
-            -2.668412208557129,
-            -1.845987319946289,
-            -0.06470083445310593,
-            -0.006619549356400967,
-            -1.2610487937927246,
-            -0.13015533983707428,
-            -3.365312099456787,
-            -0.0014690094394609332,
-            -1.6789823770523071,
-            -1.2499005794525146,
-            -3.3992111682891846,
-            -5.563300132751465,
-            -0.823418140411377,
-            -4.24124813079834,
-            -1.6597849130630493,
-            -0.6941139698028564,
-            -1.5637556314468384,
-            -0.5482053756713867,
-            -0.9507225751876831,
-            -3.764758586883545,
-            -0.0006518622976727784,
-            -0.7540555000305176,
-            -5.058262825012207,
-            -0.3302401602268219,
-            -2.8130555152893066,
-            -0.17079885303974152,
-            -2.871047019958496,
-            -0.3991694450378418,
-            -3.1476998329162598,
-            -0.3488404452800751,
-            -2.0545666217803955,
-            -4.201597690582275,
-            -5.164614677429199,
-            -0.0271432027220726,
-            -0.0009785869624465704,
-            -3.3444161415100098,
-            -1.3117046356201172,
-            -6.375423431396484,
-            -0.05535568296909332,
-            -0.3919340968132019,
-            -0.060594215989112854,
-            -6.507473468780518,
-            -0.0023910999298095703,
-            -2.143423318862915,
-            -3.335618257522583,
-            -2.953970432281494,
-            -0.0013383012264966965,
-            -0.8080525398254395,
-            -0.29526084661483765,
-            -0.04036511853337288,
-            -3.231475353240967,
-            -1.0585589408874512,
-            -6.136373043060303,
-            -0.006182829383760691,
-            -0.035548023879528046,
-            -5.509808540344238,
-            -1.8490750789642334,
-            -9.83314037322998,
-            -0.07037576287984848,
-            -3.1621387004852295,
-            -6.762360095977783,
-            -1.3490527868270874,
-            -3.601043462753296,
-            -1.176393985748291,
-            -0.4342959523200989,
-            -0.06266004592180252,
-            -5.464046001434326,
-            -0.017946599051356316,
-            -1.0416009426116943,
-            -1.6117159128189087,
-            -12.289417266845703,
-            -1.5004339218139648,
-            -5.76563835144043,
-            -4.038386821746826,
-            -0.20812086760997772,
-            -3.6306562423706055,
-            -1.3901070356369019,
-            -1.087137222290039,
-            -2.423213243484497,
-            -4.503086090087891,
-            -0.0008031480247154832,
-            -0.03627370297908783,
-            -0.1653430461883545,
-            -7.958648681640625,
-            -1.1018548011779785,
-            -1.290948748588562,
-            -3.8049263954162598,
-            -1.8253734111785889,
-            -0.059022851288318634,
-            -0.0013984196120873094,
-            -4.698851585388184,
-            -2.5421664714813232,
-            -0.024493809789419174,
-            -4.828659534454346,
-            -3.0295286178588867,
-            -3.550312042236328,
-            -0.1185273677110672,
-            -0.22595760226249695,
-            -0.10782183706760406,
-            -1.4033282995224,
-            -0.4485701024532318,
-            -0.2889708876609802,
-            -0.05471855774521828,
-            -0.007632025051862001,
-            -2.1156554222106934,
-            -0.6249589323997498,
-            -4.198577404022217,
-            -0.14178156852722168,
-            -4.284021377563477,
-            -2.227515935897827,
-            -3.5022120475769043,
-            -0.19575819373130798,
-            -15.964509963989258,
-            -4.055960655212402,
-            -11.125024795532227,
-            -0.7681724429130554,
-            -3.0436902046203613,
-            -7.030262470245361,
-            -4.376729488372803,
-            -5.476145267486572,
-            -0.4219042658805847,
-            -3.7689766883850098,
-            -0.060010604560375214,
-            -0.8134393692016602,
-            -0.11386934667825699,
-            -0.025473715737462044,
-            -0.09736856073141098,
-            -4.357361793518066,
-            -0.3670865297317505,
-            -0.08063744008541107,
-            -0.1311480849981308,
-            -1.0903867483139038,
-            -1.2705107927322388,
-            -1.5076212882995605,
-            -4.295275688171387,
-            -0.04185756668448448,
-            -0.19810955226421356,
-            -1.9645220041275024,
-            -0.9597910642623901,
-            -0.13429655134677887,
-            -0.002283110748976469,
-            -7.066074371337891,
-            -3.639211654663086,
-            -1.0263917446136475,
-            -8.124760627746582,
-            -1.132537841796875,
-            -0.09160765260457993,
-            -0.08996370434761047,
-            -10.165366172790527,
-            -3.501585006713867,
-            -0.0019847711082547903,
-            -0.05309417471289635,
-            -0.31209683418273926,
-            -0.15089339017868042,
-            -1.23564875125885,
-            -1.2685208320617676,
-            -7.832758903503418,
-            -0.19271136820316315,
-            -0.014305183663964272,
-            -0.0007532381569035351,
-            -0.44688940048217773,
-            -2.6239724159240723,
-            -1.738666296005249,
-            -1.6480977535247803,
-            -0.46753185987472534,
-            -8.656959533691406,
-            -3.79868483543396,
-            -0.9281394481658936,
-            -2.2381181716918945,
-            -1.7654449939727783,
-            -0.4948798418045044,
-            -0.025028761476278305,
-            -1.5435361862182617,
-            -1.6390818357467651,
-            -1.4962153434753418,
-            -0.3425217270851135,
-            -0.013077914714813232,
-            -0.038474079221487045,
-            -5.3364362716674805,
-            -0.42365288734436035,
-            -1.884093999862671,
-            -3.510357618331909,
-            -6.198029518127441,
-            -0.44375038146972656,
-            -0.0008789013954810798,
-            -3.6025230884552,
-            -1.419615626335144,
-            -2.6723289489746094,
-            -5.775190830230713,
-            -1.1380761861801147,
-            -2.6683366298675537,
-            -0.43395891785621643,
-            -0.003145867260172963,
-            -8.63144302368164,
-            -1.646262764930725,
-            -1.732487678527832,
-            -4.561546802520752,
-            -0.5277953147888184,
-            -0.07333153486251831,
-            -0.5624169707298279,
-            -0.12201295047998428,
-            -2.6561455726623535,
-            -1.1071691513061523,
-            -2.6895060539245605,
-            -0.040864069014787674,
-            -0.04126371443271637,
-            -1.8294739723205566,
-            -0.09022177755832672,
-            -0.3154001832008362,
-            -0.46215569972991943,
-            -2.2462844848632812,
-            -0.30149081349372864,
-            -0.52588951587677,
-            -8.288043975830078,
-            -0.0002057340752799064,
-            -0.8021711707115173,
-            -4.4546098709106445,
-            -0.0001565095444675535,
-            -0.0015961299650371075,
-            -0.15216240286827087,
-            -0.3677564561367035,
-            -5.018707275390625,
-            -0.7850045561790466,
-            -1.9582659006118774,
-            -1.0046892166137695,
-            -10.0401029586792,
-            -0.16878114640712738,
-            -5.944240570068359,
-            -1.5523078441619873,
-            -5.7253522872924805,
-            -0.47948503494262695,
-            -0.44009655714035034,
-            -5.671053886413574,
-            -0.003280022880062461,
-            -0.7937742471694946,
-            -0.9639376401901245,
-            -0.00030048147891648114,
-            -1.0747740268707275,
-            -0.8839919567108154,
-            -3.416811466217041,
-            -1.6602673530578613,
-            -0.2706959843635559,
-            -0.0024333172477781773,
-            -4.478696823120117,
-            -6.20179557800293,
-            -0.11359559744596481,
-            -0.202009916305542,
-            -0.022310219705104828,
-            -2.367263078689575,
-            -1.0405994653701782,
-            -5.984308242797852,
-            -2.105138063430786,
-            -9.583202362060547,
-            -0.0004957877099514008,
-            -3.0655455589294434,
-            -0.0669412910938263,
-            -0.8977450728416443,
-            -2.2271294593811035,
-            -2.6617536544799805,
-            -1.8184051513671875,
-            -0.8291114568710327,
-            -0.4864235818386078,
-            -0.7993525862693787,
-            -3.51106858253479,
-            -2.1530935764312744,
-            -0.257144957780838,
-            -1.3934082984924316,
-            -1.3137131929397583,
-            -0.3384077548980713,
-            -0.1697217971086502,
-            -2.353395938873291,
-            -0.03406282886862755,
-            -0.39059701561927795,
-            -3.422821044921875,
-            -1.7117210626602173,
-            -0.7018465399742126,
-            -1.5995906591415405,
-            -3.6218395233154297,
-            -0.12497704476118088,
-            -0.16966234147548676,
-            -0.7313685417175293,
-            -0.4956285357475281,
-            -1.0840849876403809,
-            -5.042126655578613,
-            -0.00031704644788987935,
-            -7.683258056640625,
-            -0.9210801720619202,
-            -4.687852382659912,
-            -0.0028814247343689203,
-            -0.043382611125707626,
-            -4.1948652267456055,
-            -2.66593337059021,
-            -0.06153333932161331,
-            -0.0023110604379326105,
-            -6.729236602783203,
-            -5.777127742767334,
-            -0.08932067453861237,
-            -0.09890018403530121,
-            -0.009886111132800579,
-            -3.1145148277282715,
-            -3.725565195083618,
-            -0.0021998509764671326,
-            -3.9927196502685547,
-            -2.753793239593506,
-            -1.6037236452102661,
-            -0.17461130023002625,
-            -4.804804801940918,
-            -0.2311229705810547,
-            -0.30256444215774536,
-            -2.235363006591797,
-            -0.006614102050662041,
-            -0.34757524728775024,
-            -1.4946835041046143,
-            -1.222062587738037,
-            -3.658839225769043,
-            -1.356170892715454,
-            -0.5371109843254089,
-            -3.7580835819244385,
-            -4.54621696472168,
-            -0.31577637791633606,
-            -3.677156925201416,
-            -2.7181396484375,
-            -7.4674882888793945,
-            -0.00019369633810129017,
-            -2.3798398971557617,
-            -2.5452184677124023,
-            -0.2858496308326721,
-            -4.315659523010254,
-            -0.025835415348410606,
-            -0.000603493710514158,
-            -0.2546294331550598,
-            -0.12032663822174072,
-            -2.006908655166626,
-            -5.990736961364746,
-            -7.146596908569336,
-            -0.23356498777866364,
-            -0.2201036810874939,
-            -0.01235415879637003,
-            -0.011248741298913956,
-            -1.4155778884887695,
-            -0.40242519974708557,
-            -5.877886772155762,
-            -0.7865053415298462,
-            -0.03231288120150566,
-            -0.004864405374974012,
-            -0.0050629740580916405,
-            -2.7049152851104736,
-            -6.822089195251465,
-            -0.39252761006355286,
-            -1.2290617227554321,
-            -0.007630132604390383,
-            -3.485461711883545,
-            -0.47985684871673584,
-            -6.1813530921936035,
-            -0.03757825121283531,
-            -0.37834712862968445,
-            -0.22192610800266266,
-            -1.165318489074707,
-            -0.5220151543617249,
-            -0.1289423257112503,
-            -3.216222047805786,
-            -1.0787583589553833,
-            -3.0716826915740967,
-            -0.6023419499397278,
-            -2.558605194091797,
-            -0.927433431148529,
-            -0.00364841241389513,
-            -0.14910078048706055,
-            -0.7318926453590393,
-            -6.159773826599121,
-            -0.0015301911626011133,
-            -1.8908276557922363,
-            -1.9641315937042236,
-            -0.021651331335306168,
-            -2.1648828983306885,
-            -2.2700207233428955,
-            -7.833290100097656,
-            -0.03397307172417641,
-            -0.8344621658325195,
-            -0.02225659228861332,
-            -0.06639260798692703,
-            -2.3780317306518555,
-            -3.180129051208496,
-            -0.09030630439519882,
-            -2.4138312339782715,
-            -1.3445552587509155,
-            -1.848326325416565,
-            -0.9726964831352234,
-            -2.851792335510254,
-            -0.0630769282579422,
-            -0.0011394681641831994,
-            -0.05843213573098183,
-            -2.6616668701171875,
-            -1.575437068939209,
-            -0.180197611451149,
-            -5.552371501922607,
-            -0.26108410954475403,
-            -2.529611587524414,
-            -0.37780019640922546,
-            -5.141795635223389,
-            -0.5921107530593872,
-            -0.2474975287914276,
-            -0.10687454044818878,
-            -4.891775131225586,
-            -0.25011152029037476,
-            -2.4100728034973145,
-            -1.358667016029358,
-            -2.790961503982544,
-            -3.8654675483703613,
-            -1.0076243877410889,
-            -0.7456949949264526,
-            -1.5575554370880127,
-            -2.05328631401062,
-            -1.6538066864013672,
-            -0.0558217354118824,
-            -0.0001817776501411572,
-            -0.0011643542675301433,
-            -0.038359593600034714,
-            -1.4208931922912598,
-            -0.542127251625061,
-            -0.3162364959716797,
-            -0.3966117799282074,
-            -1.1765563488006592,
-            -1.7920958995819092,
-            -0.18425509333610535,
-            -0.1092008650302887,
-            -0.46676987409591675,
-            -0.24977745115756989,
-            -1.0375996828079224,
-            -0.5268858671188354,
-            -0.008942908607423306,
-            -0.6404479146003723,
-            -0.0033111530356109142,
-            -5.3165931603871286e-05,
-            -0.5154370665550232,
-            -0.39286962151527405,
-            -1.401839256286621,
-            -0.6232213973999023,
-            -0.02168831042945385,
-            -0.004282470792531967,
-            -0.005199837032705545,
-            -0.09748794883489609,
-            -0.040823787450790405,
-            -0.00014852374442853034,
-            -0.0005832401220686734,
-            -0.005303124897181988,
-            -0.6537013053894043,
-            -0.38026049733161926,
-            -0.04189129173755646,
-            -0.010385753586888313,
-            -0.008756335824728012,
-            -0.013362848199903965,
-            -0.000504723924677819,
-            -0.002797620603814721,
-            -0.0014512732159346342,
-            -0.0013321106089279056,
-            -0.010883613489568233,
-            -0.005159396678209305,
-            -0.004701037425547838,
-            -0.01591104455292225,
-            -0.001474246964789927,
-            -1.2278481335670222e-05,
-            -0.010548785328865051,
-            -0.08341525495052338,
-            -0.03858809545636177,
-            -0.056062061339616776,
-            -0.0009532198309898376,
-            -0.0005789510905742645,
-            -0.0008986725588329136,
-            -0.00710969977080822,
-            -0.0006561510381288826,
-            -1.4781842764932662e-05,
-            -5.578839045483619e-05,
-            -0.0006398299592547119,
-            -0.0028786908369511366,
-            -0.0034092895220965147,
-            -0.008268529549241066,
-            -0.006602259818464518,
-            -0.004517706111073494,
-            -0.02233586646616459,
-            -0.0006323245470412076,
-            -0.009195122867822647,
-            -0.0029284947086125612,
-            -0.004457537550479174,
-            -0.017873765900731087,
-            -0.008801711723208427,
-            -0.0036383166443556547,
-            -0.08078611642122269,
-            -0.006347495596855879,
-            -0.0002177716523874551,
-            -0.04688572511076927,
-            -0.2718890309333801,
-            -0.07996802777051926,
-            -0.04674842208623886,
-            -0.009984076954424381,
-            -0.010000954382121563,
-            -0.050126753747463226,
-            -0.5864179730415344,
-            -0.2915390133857727,
-            -0.008090462535619736,
-            -0.032190412282943726,
-            -0.03461571782827377,
-            -0.2785419523715973,
-            -0.05830562859773636,
-            -0.02893950417637825,
-            -0.12241066247224808,
-            -0.02711048536002636,
-            -0.16450686752796173,
-            -0.09852994978427887,
-            -0.2651047706604004,
-            -0.35559725761413574,
-            -0.12606258690357208,
-            -0.32793670892715454,
-            -0.20878805220127106,
-            -0.7587923407554626,
-            -0.4730657637119293,
-            -1.496794581413269,
-            -0.2486363798379898,
-            -0.20723387598991394,
-            -0.1872958242893219,
-            -0.19151091575622559,
-            -0.3350580036640167,
-            -1.3085839748382568,
-            -0.6109102964401245,
-            -0.2947172224521637,
-            -0.37130236625671387,
-            -0.19888469576835632,
-            -0.18297068774700165,
-            -0.9978674054145813,
-            -0.5471905469894409,
-            -0.4379975199699402,
-            -0.407988041639328,
-            -0.17325688898563385,
-            -0.16938896477222443
-        ]
-    },
-    "96": {
-        "input_prompt": "what is the concept of double materiality in sustainability?",
-        "generated_text": " What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double materiality in sustainability? What is the concept of double material",
-        "generated_tokens": [
-            5675,
-            1395,
-            1278,
-            7401,
-            1307,
-            5558,
-            4318,
-            1532,
-            1294,
-            60187,
-            1063,
-            5675,
-            1395,
-            1278,
-            7401,
-            1307,
-            5558,
-            4318,
-            1532,
-            1294,
-            60187,
-            1063,
-            5675,
-            1395,
-            1278,
-            7401,
-            1307,
-            5558,
-            4318,
-            1532,
-            1294,
-            60187,
-            1063,
-            5675,
-            1395,
-            1278,
-            7401,
-            1307,
-            5558,
-            4318,
-            1532,
-            1294,
-            60187,
-            1063,
-            5675,
-            1395,
-            1278,
-            7401,
-            1307,
-            5558,
-            4318,
-            1532,
-            1294,
-            60187,
-            1063,
-            5675,
-            1395,
-            1278,
-            7401,
-            1307,
-            5558,
-            4318,
-            1532,
-            1294,
-            60187,
-            1063,
-            5675,
-            1395,
-            1278,
-            7401,
-            1307,
-            5558,
-            4318,
-            1532,
-            1294,
-            60187,
-            1063,
-            5675,
-            1395,
-            1278,
-            7401,
-            1307,
-            5558,
-            4318,
-            1532,
-            1294,
-            60187,
-            1063,
-            5675,
-            1395,
-            1278,
-            7401,
-            1307,
-            5558,
-            4318,
-            1532,
-            1294,
-            60187,
-            1063,
-            5675,
-            1395,
-            1278,
-            7401,
-            1307,
-            5558,
-            4318,
-            1532,
-            1294,
-            60187,
-            1063,
-            5675,
-            1395,
-            1278,
-            7401,
-            1307,
-            5558,
-            4318,
-            1532,
-            1294,
-            60187,
-            1063,
-            5675,
-            1395,
-            1278,
-            7401,
-            1307,
-            5558,
-            4318
-        ],
-        "latency": 125.58511328697205,
-        "cuda_graph_request_count_map": null,
-        "step_count": 6144,
-        "logprobs": [
-            -4.917365074157715,
-            -0.9960631132125854,
-            -7.875392913818359,
-            -0.2993181347846985,
-            -7.760880470275879,
-            -10.308395385742188,
-            -2.1807961463928223,
-            -1.6412583589553833,
-            -9.521512985229492,
-            -1.627489447593689,
-            -1.8410861492156982,
-            -0.9285702705383301,
-            -0.2576955556869507,
-            -0.9641067981719971,
-            -0.02314644306898117,
-            -0.6696561574935913,
-            -0.07035009562969208,
-            -0.004622488282620907,
-            -0.025748632848262787,
-            -0.06276137381792068,
-            -0.17385317385196686,
-            -0.3285445272922516,
-            -0.0592009499669075,
-            -0.007940039038658142,
-            -0.22664028406143188,
-            -0.0017957051750272512,
-            -0.022929180413484573,
-            -0.005733947269618511,
-            -0.0012996093137189746,
-            -0.006419987417757511,
-            -0.02376849390566349,
-            -0.27800270915031433,
-            -0.4650723934173584,
-            -0.04936715215444565,
-            -0.003972141072154045,
-            -0.01477995328605175,
-            -0.0012044801842421293,
-            -0.014891182072460651,
-            -0.002709767082706094,
-            -0.0009939497103914618,
-            -0.0028436246793717146,
-            -0.006759870797395706,
-            -0.15416178107261658,
-            -0.20121537148952484,
-            -0.016414370387792587,
-            -0.0015769677702337503,
-            -0.008138825185596943,
-            -0.0007713441736996174,
-            -0.013819841668009758,
-            -0.003826678032055497,
-            -0.0005918181850574911,
-            -0.0014938872773200274,
-            -0.00485716899856925,
-            -0.081083282828331,
-            -0.09642580896615982,
-            -0.009630884043872356,
-            -0.0010948146227747202,
-            -0.007085552904754877,
-            -0.0006310140597634017,
-            -0.013073914684355259,
-            -0.0039152647368609905,
-            -0.000364713923772797,
-            -0.001292108790948987,
-            -0.004158303141593933,
-            -0.044283974915742874,
-            -0.05722038820385933,
-            -0.006369172595441341,
-            -0.0007976687629707158,
-            -0.005993015132844448,
-            -0.0004935238393954933,
-            -0.011310506612062454,
-            -0.002951553324237466,
-            -0.000387831823900342,
-            -0.000977038755081594,
-            -0.0036971091758459806,
-            -0.030511993914842606,
-            -0.04246694967150688,
-            -0.004863100592046976,
-            -0.0006927236099727452,
-            -0.005206122528761625,
-            -0.0005129451747052372,
-            -0.00894621666520834,
-            -0.0028565814718604088,
-            -0.00041333239641971886,
-            -0.0009002208826132119,
-            -0.0033131728414446115,
-            -0.021188799291849136,
-            -0.03330245241522789,
-            -0.0038543473929166794,
-            -0.0006504327175207436,
-            -0.004474864806979895,
-            -0.00048029806930571795,
-            -0.009718249551951885,
-            -0.0030443770810961723,
-            -0.0003743662964552641,
-            -0.0009439303539693356,
-            -0.003729770192876458,
-            -0.016505014151334763,
-            -0.0290373582392931,
-            -0.003315192647278309,
-            -0.0005821678787469864,
-            -0.004148805979639292,
-            -0.00042489083716645837,
-            -0.006856840569525957,
-            -0.0028660909738391638,
-            -0.00032574593205936253,
-            -0.0006986799417063594,
-            -0.003671098267659545,
-            -0.012792548164725304,
-            -0.02553274855017662,
-            -0.002730690874159336,
-            -0.0005067494930699468,
-            -0.0036923582665622234,
-            -0.0004451475979294628,
-            -0.007225453853607178,
-            -0.002746859099715948,
-            -0.0003323002893012017,
-            -0.0008517451351508498,
-            -0.003630714723840356,
-            -0.011284693144261837,
-            -0.02186688780784607,
-            -0.0025819556321948767,
-            -0.0004931663861498237,
-            -0.0031721293926239014,
-            -0.00040725519647821784,
-            -0.0062383925542235374,
-            -0.00238693761639297,
-            -0.00040749352774582803,
-            -0.0007970731821842492,
-            -0.003604583442211151,
-            -0.010075542144477367,
-            -0.022386692464351654,
-            -0.0025295186787843704,
-            -0.00046302087139338255,
-            -0.003138143103569746,
-            -0.0004233417857903987,
-            -0.006178564392030239,
-            -0.0021037368569523096
-        ]
-    },
-    "127": {
-        "input_prompt": "as before we talk about using , i usually do if else() to in case the property would be null before.\nif today i public a property and i want to use using to instead , will code be like this?\n\npublic Material Mat;\n\nusing(Mat){\n// Do something;\n}\n\nis that right?",
-        "generated_text": " i am not sure if i am using the wrong syntax or if i am using the syntax correctly but i am not sure if i am using the syntax correctly or if i am using the syntax incorrectly.\n\nI am not sure if i am using the wrong syntax or if i am using the syntax correctly but i am not sure if i am using the syntax correctly or if i am using the syntax incorrectly.\n\nI am not sure if i am using the wrong syntax or if i am using the syntax correctly but i am not sure if i am using the syntax correctly or if i am using the syntax incorrectly.\n\nI am not sure if i am using",
-        "generated_tokens": [
-            1623,
-            1855,
-            1605,
-            5257,
-            1693,
-            1623,
-            1855,
-            2505,
-            1278,
-            8462,
-            22692,
-            1505,
-            1693,
-            1623,
-            1855,
-            2505,
-            1278,
-            22692,
-            17047,
-            1809,
-            1623,
-            1855,
-            1605,
-            5257,
-            1693,
-            1623,
-            1855,
-            2505,
-            1278,
-            22692,
-            17047,
-            1505,
-            1693,
-            1623,
-            1855,
-            2505,
-            1278,
-            22692,
-            73751,
-            1338,
-            1073,
-            1855,
-            1605,
-            5257,
-            1693,
-            1623,
-            1855,
-            2505,
-            1278,
-            8462,
-            22692,
-            1505,
-            1693,
-            1623,
-            1855,
-            2505,
-            1278,
-            22692,
-            17047,
-            1809,
-            1623,
-            1855,
-            1605,
-            5257,
-            1693,
-            1623,
-            1855,
-            2505,
-            1278,
-            22692,
-            17047,
-            1505,
-            1693,
-            1623,
-            1855,
-            2505,
-            1278,
-            22692,
-            73751,
-            1338,
-            1073,
-            1855,
-            1605,
-            5257,
-            1693,
-            1623,
-            1855,
-            2505,
-            1278,
-            8462,
-            22692,
-            1505,
-            1693,
-            1623,
-            1855,
-            2505,
-            1278,
-            22692,
-            17047,
-            1809,
-            1623,
-            1855,
-            1605,
-            5257,
-            1693,
-            1623,
-            1855,
-            2505,
-            1278,
-            22692,
-            17047,
-            1505,
-            1693,
-            1623,
-            1855,
-            2505,
-            1278,
-            22692,
-            73751,
-            1338,
-            1073,
-            1855,
-            1605,
-            5257,
-            1693,
-            1623,
-            1855,
-            2505
-        ],
-        "latency": 154.75680470466614,
-        "cuda_graph_request_count_map": null,
-        "step_count": 6144,
-        "logprobs": [
-            -7.793755531311035,
-            -4.224940776824951,
-            -5.446587562561035,
-            -0.267395555973053,
-            -6.118141174316406,
-            -11.105525970458984,
-            -6.67517614364624,
-            -6.902530670166016,
-            -3.2218151092529297,
-            -5.99644660949707,
-            -8.431673049926758,
-            -8.04836368560791,
-            -3.992594003677368,
-            -7.550079345703125,
-            -2.873685836791992,
-            -3.3877997398376465,
-            -7.9306745529174805,
-            -7.005484580993652,
-            -1.0481306314468384,
-            -2.686237335205078,
-            -6.131283760070801,
-            -6.2994704246521,
-            -7.931419372558594,
-            -11.3147554397583,
-            -8.544670104980469,
-            -12.01729679107666,
-            -3.89847469329834,
-            -1.7964364290237427,
-            -2.952878952026367,
-            -1.9217232465744019,
-            -2.272329330444336,
-            -0.37552154064178467,
-            -1.667820692062378,
-            -7.510344505310059,
-            -3.498040199279785,
-            -7.980632305145264,
-            -7.672002792358398,
-            -4.4999470710754395,
-            -7.155375003814697,
-            -2.4486124515533447,
-            -4.785946846008301,
-            -1.153855800628662,
-            -2.3994438648223877,
-            -4.250652313232422,
-            -12.24446964263916,
-            -8.344388008117676,
-            -2.608186721801758,
-            -5.200589179992676,
-            -8.25888442993164,
-            -3.6245617866516113,
-            -7.689338207244873,
-            -7.345355033874512,
-            -1.2661759853363037,
-            -7.265620231628418,
-            -1.9884108304977417,
-            -6.269482612609863,
-            -2.41705584526062,
-            -1.8929681777954102,
-            -1.8259913921356201,
-            -2.0997350215911865,
-            -2.323200225830078,
-            -1.3998825550079346,
-            -0.8789899945259094,
-            -1.082053542137146,
-            -1.1831339597702026,
-            -1.4462857246398926,
-            -1.6481035947799683,
-            -1.4408715963363647,
-            -1.2603964805603027,
-            -1.5267670154571533,
-            -1.6345772743225098,
-            -1.3796477317810059,
-            -0.7609691023826599,
-            -0.3548354506492615,
-            -0.7552334666252136,
-            -0.44776833057403564,
-            -1.1078286170959473,
-            -1.3036658763885498,
-            -0.5214896202087402,
-            -0.8486822843551636,
-            -0.22470997273921967,
-            -0.4705755412578583,
-            -0.5639711022377014,
-            -0.5388108491897583,
-            -0.6052999496459961,
-            -0.1002030223608017,
-            -0.286334365606308,
-            -0.45798981189727783,
-            -1.0107953548431396,
-            -0.11875647306442261,
-            -0.6969441771507263,
-            -0.4609107971191406,
-            -0.07614769786596298,
-            -0.5035472512245178,
-            -0.1682187020778656,
-            -0.10476160794496536,
-            -0.6586751341819763,
-            -0.35806939005851746,
-            -1.5364394187927246,
-            -2.4093759059906006,
-            -1.977368950843811,
-            -1.6216907501220703,
-            -0.27647316455841064,
-            -0.2991848587989807,
-            -0.2783535420894623,
-            -0.05913994088768959,
-            -0.03023873083293438,
-            -0.043339803814888,
-            -0.7320341467857361,
-            -0.0030677898321300745,
-            -0.0332595594227314,
-            -0.012804670259356499,
-            -0.004041599575430155,
-            -0.0014899593079462647,
-            -0.001948602613992989,
-            -0.0029070996679365635,
-            -0.040939707309007645,
-            -0.013942227698862553,
-            -0.04897322878241539,
-            -0.011005887761712074,
-            -0.0044113704934716225,
-            -0.0013179434463381767,
-            -0.003658389439806342,
-            -0.009758152067661285,
-            -0.0014104428701102734,
-            -0.0016671819612383842,
-            -0.000771939754486084,
-            -0.0015519729349762201,
-            -0.003720743814483285,
-            -0.004249115474522114,
-            -0.00485657574608922,
-            -0.005053604021668434,
-            -0.002336274366825819,
-            -0.0009155849111266434,
-            -0.0004978132783435285,
-            -0.0005953923100605607,
-            -0.0011395872570574284,
-            -0.001485078944824636,
-            -0.3072909712791443,
-            -1.7295066118240356,
-            -0.4807289242744446,
-            -0.1245415136218071,
-            -0.011858444660902023,
-            -0.020613837987184525,
-            -0.011020978912711143,
-            -0.003106294432654977,
-            -0.0009966888464987278,
-            -0.0019349202048033476,
-            -0.037407051771879196,
-            -0.0003496989083942026,
-            -0.005922981072217226,
-            -0.007394562941044569,
-            -0.0006037319544702768,
-            -0.0008836655179038644,
-            -0.0002884448622353375,
-            -0.00047600860125385225,
-            -0.0024947968777269125,
-            -0.00442774873226881,
-            -0.004059052560478449,
-            -0.0018594847060739994,
-            -0.0006179092451930046,
-            -0.00022635281493421644,
-            -0.0006730675231665373,
-            -0.003022746881470084,
-            -0.0002343380037928,
-            -0.00047791501856409013,
-            -9.440929716220126e-05,
-            -0.00021550717065110803,
-            -0.0013523490633815527,
-            -0.0032202552538365126,
-            -0.001157686347141862,
-            -0.004449942149221897,
-            -0.0016590891173109412,
-            -0.00101062236353755,
-            -0.0003079893649555743,
-            -0.00048375347978435457,
-            -0.0021734442561864853,
-            -0.00423036003485322,
-            -0.11514264345169067,
-            -0.8658493757247925,
-            -0.084366075694561,
-            -0.02140468917787075,
-            -0.0060798698104918,
-            -0.008638513274490833,
-            -0.003212531330063939,
-            -0.0009598892065696418,
-            -0.00032085992279462516
-        ]
-    },
-    "throughput": 105.49771806099545
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_583m_logitsmatch/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_583m_logitsmatch/golden_values_dev_dgx_h100.json
index adab4197a33..1491284644d 100644
--- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_583m_logitsmatch/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_583m_logitsmatch/golden_values_dev_dgx_h100.json
@@ -1,159 +1,161 @@
 {
- "0": {
-  "input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.",
-  "generated_text": " And that this is the place where you can be yourself, and be yourself in the most beautiful way. And that this is the place where you can",
-  "generated_tokens": [
-   3060,
-   1455,
-   1593,
-   1395,
-   1278,
-   3535,
-   2478,
-   1636,
-   1710,
-   1402,
-   14019,
-   1044,
-   1321,
-   1402,
-   14019,
-   1294,
-   1278,
-   2725,
-   15568,
-   3039,
-   1046,
-   3060,
-   1455,
-   1593,
-   1395,
-   1278,
-   3535,
-   2478,
-   1636,
-   1710
-  ],
-  "latency": 2.020272731781006,
-  "logprobs": [
-   -9.358587265014648,
-   -2.7594826221466064,
-   -4.608366012573242,
-   -1.4093360900878906,
-   -0.6152952313423157,
-   -1.7217562198638916,
-   -2.496668815612793,
-   -2.0547454357147217,
-   -2.441960573196411,
-   -6.280838966369629,
-   -1.5643692016601562,
-   -3.462346076965332,
-   -4.428728103637695,
-   -3.8633861541748047,
-   -1.9936373233795166,
-   -1.8929449319839478,
-   -3.796365737915039,
-   -6.8360137939453125,
-   -0.2901247441768646,
-   -0.9246833324432373,
-   -6.633338928222656,
-   -7.166708469390869,
-   -12.771251678466797,
-   -2.198296308517456,
-   -3.7778120040893555,
-   -0.4983733296394348,
-   -4.381269454956055,
-   -0.0666784718632698,
-   -0.09580295532941818,
-   -3.2437636852264404,
-   -10.079947471618652,
-   -1.172220230102539,
-   -5.977442741394043,
-   -5.046236038208008,
-   -3.855658531188965,
-   -2.5585858821868896,
-   -3.356245994567871,
-   -5.557229518890381,
-   -1.6787731647491455,
-   -5.483290672302246,
-   -12.218501091003418,
-   -12.61402702331543,
-   -0.09662941098213196,
-   -2.5431432723999023,
-   -1.4071024656295776,
-   -2.9154715538024902,
-   -1.1964417695999146,
-   -0.006458481773734093,
-   -3.3625335693359375,
-   -13.262511253356934,
-   -4.314079761505127,
-   -2.617699146270752,
-   -5.987792015075684,
-   -0.778266429901123,
-   -0.048888545483350754,
-   -1.548882007598877,
-   -1.1381981372833252,
-   -5.627166748046875,
-   -0.4078553318977356,
-   -4.958505630493164,
-   -0.6187160611152649,
-   -0.7174848914146423,
-   -2.469533920288086,
-   -13.620073318481445,
-   -0.09088654816150665,
-   -3.526974678039551,
-   -1.4195809364318848,
-   -6.402483940124512,
-   -0.5898402333259583,
-   -3.565917491912842,
-   -0.8561318516731262,
-   -1.6140165328979492,
-   -5.370549201965332,
-   -17.159223556518555,
-   -6.583524703979492,
-   -0.8855001926422119,
-   -4.19431209564209,
-   -1.2012220621109009,
-   -2.2563133239746094,
-   -1.7674944400787354,
-   -0.22064533829689026,
-   -9.292220115661621,
-   -0.12445646524429321,
-   -7.29617977142334,
-   -2.526529312133789,
-   -4.071560859680176,
-   -3.5568013191223145,
-   -1.926215410232544,
-   -2.349026918411255,
-   -2.2132363319396973,
-   -0.3125414550304413,
-   -1.4718132019042969,
-   -2.149106740951538,
-   -1.0855519771575928,
-   -1.631832242012024,
-   -1.3751734495162964,
-   -1.9396103620529175,
-   -1.5293723344802856,
-   -0.8444125056266785,
-   -1.2414811849594116,
-   -1.9522171020507812,
-   -2.4338042736053467,
-   -1.5651824474334717,
-   -0.9498789310455322,
-   -1.8044980764389038,
-   -2.356677770614624,
-   -1.247452974319458,
-   -1.550165057182312,
-   -0.5635553598403931,
-   -0.6177330017089844,
-   -0.4778785705566406,
-   -0.020452087745070457,
-   -0.48500269651412964,
-   -0.23854275047779083,
-   -0.06543659418821335,
-   -0.11837350577116013,
-   -0.0585334412753582
-  ]
- },
- "throughput": 16.0684968505734
+    "0": {
+        "input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.",
+        "generated_text": " And that this is the place where you can be yourself, and be yourself in the most beautiful way. And that this is the place where you can",
+        "generated_tokens": [
+            3060,
+            1455,
+            1593,
+            1395,
+            1278,
+            3535,
+            2478,
+            1636,
+            1710,
+            1402,
+            14019,
+            1044,
+            1321,
+            1402,
+            14019,
+            1294,
+            1278,
+            2725,
+            15568,
+            3039,
+            1046,
+            3060,
+            1455,
+            1593,
+            1395,
+            1278,
+            3535,
+            2478,
+            1636,
+            1710
+        ],
+        "latency": 2.2049803733825684,
+        "cuda_graph_request_count_map": null,
+        "step_count": 240,
+        "logprobs": [
+            -9.358587265014648,
+            -2.7594826221466064,
+            -4.608366012573242,
+            -1.4093360900878906,
+            -0.6152952313423157,
+            -1.7217562198638916,
+            -2.496668815612793,
+            -2.0547454357147217,
+            -2.441960573196411,
+            -6.280838966369629,
+            -1.5643692016601562,
+            -3.462346076965332,
+            -4.428728103637695,
+            -3.8633861541748047,
+            -1.9936373233795166,
+            -1.8929449319839478,
+            -3.796365737915039,
+            -6.8360137939453125,
+            -0.2901247441768646,
+            -0.9246833324432373,
+            -6.633338928222656,
+            -7.166708469390869,
+            -12.771251678466797,
+            -2.198296308517456,
+            -3.7778120040893555,
+            -0.4983733296394348,
+            -4.381269454956055,
+            -0.0666784718632698,
+            -0.09580295532941818,
+            -3.2437636852264404,
+            -10.079947471618652,
+            -1.172220230102539,
+            -5.977442741394043,
+            -5.046236038208008,
+            -3.855658531188965,
+            -2.5585858821868896,
+            -3.356245994567871,
+            -5.557229518890381,
+            -1.6787731647491455,
+            -5.483290672302246,
+            -12.218501091003418,
+            -12.61402702331543,
+            -0.09662941098213196,
+            -2.5431432723999023,
+            -1.4071024656295776,
+            -2.9154715538024902,
+            -1.1964417695999146,
+            -0.006458481773734093,
+            -3.3625335693359375,
+            -13.262511253356934,
+            -4.314079761505127,
+            -2.617699146270752,
+            -5.987792015075684,
+            -0.778266429901123,
+            -0.048888545483350754,
+            -1.548882007598877,
+            -1.1381981372833252,
+            -5.627166748046875,
+            -0.4078553318977356,
+            -4.958505630493164,
+            -0.6187160611152649,
+            -0.7174848914146423,
+            -2.469533920288086,
+            -13.620073318481445,
+            -0.09088654816150665,
+            -3.526974678039551,
+            -1.4195809364318848,
+            -6.402483940124512,
+            -0.5898402333259583,
+            -3.565917491912842,
+            -0.8561318516731262,
+            -1.6140165328979492,
+            -5.370549201965332,
+            -17.159223556518555,
+            -6.583524703979492,
+            -0.8855001926422119,
+            -4.19431209564209,
+            -1.2012220621109009,
+            -2.2563133239746094,
+            -1.7674944400787354,
+            -0.22064533829689026,
+            -9.292220115661621,
+            -0.12445646524429321,
+            -7.29617977142334,
+            -2.526529312133789,
+            -4.071560859680176,
+            -3.5568013191223145,
+            -1.926215410232544,
+            -2.349026918411255,
+            -2.2132363319396973,
+            -0.3125414550304413,
+            -1.4718132019042969,
+            -2.149106740951538,
+            -1.0855519771575928,
+            -1.631832242012024,
+            -1.3751734495162964,
+            -1.9396103620529175,
+            -1.5293723344802856,
+            -0.8444125056266785,
+            -1.2414811849594116,
+            -1.9522171020507812,
+            -2.4338042736053467,
+            -1.5651824474334717,
+            -0.9498789310455322,
+            -1.8044980764389038,
+            -2.356677770614624,
+            -1.247452974319458,
+            -1.550165057182312,
+            -0.5635553598403931,
+            -0.6177330017089844,
+            -0.4778785705566406,
+            -0.020452087745070457,
+            -0.48500269651412964,
+            -0.23854275047779083,
+            -0.06543659418821335,
+            -0.11837350577116013,
+            -0.0585334412753582
+        ]
+    },
+    "throughput": 13.337338555385374
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_583m_logitsmatch/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_583m_logitsmatch/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index e1cada771ca..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_583m_logitsmatch/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,161 +0,0 @@
-{
-    "0": {
-        "input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.",
-        "generated_text": " And that this is the place where you can be yourself, and be yourself in the most beautiful way. And that this is the place where you can",
-        "generated_tokens": [
-            3060,
-            1455,
-            1593,
-            1395,
-            1278,
-            3535,
-            2478,
-            1636,
-            1710,
-            1402,
-            14019,
-            1044,
-            1321,
-            1402,
-            14019,
-            1294,
-            1278,
-            2725,
-            15568,
-            3039,
-            1046,
-            3060,
-            1455,
-            1593,
-            1395,
-            1278,
-            3535,
-            2478,
-            1636,
-            1710
-        ],
-        "latency": 2.1998238563537598,
-        "cuda_graph_request_count_map": null,
-        "step_count": 240,
-        "logprobs": [
-            -9.358587265014648,
-            -2.7594826221466064,
-            -4.608366012573242,
-            -1.4093360900878906,
-            -0.6152952313423157,
-            -1.7217562198638916,
-            -2.496668815612793,
-            -2.0547454357147217,
-            -2.441960573196411,
-            -6.280838966369629,
-            -1.5643692016601562,
-            -3.462346076965332,
-            -4.428728103637695,
-            -3.8633861541748047,
-            -1.9936373233795166,
-            -1.8929449319839478,
-            -3.796365737915039,
-            -6.8360137939453125,
-            -0.2901247441768646,
-            -0.9246833324432373,
-            -6.633338928222656,
-            -7.166708469390869,
-            -12.771251678466797,
-            -2.198296308517456,
-            -3.7778120040893555,
-            -0.4983733296394348,
-            -4.381269454956055,
-            -0.0666784718632698,
-            -0.09580295532941818,
-            -3.2437636852264404,
-            -10.079947471618652,
-            -1.172220230102539,
-            -5.977442741394043,
-            -5.046236038208008,
-            -3.855658531188965,
-            -2.5585858821868896,
-            -3.356245994567871,
-            -5.557229518890381,
-            -1.6787731647491455,
-            -5.483290672302246,
-            -12.218501091003418,
-            -12.61402702331543,
-            -0.09662941098213196,
-            -2.5431432723999023,
-            -1.4071024656295776,
-            -2.9154715538024902,
-            -1.1964417695999146,
-            -0.006458481773734093,
-            -3.3625335693359375,
-            -13.262511253356934,
-            -4.314079761505127,
-            -2.617699146270752,
-            -5.987792015075684,
-            -0.778266429901123,
-            -0.048888545483350754,
-            -1.548882007598877,
-            -1.1381981372833252,
-            -5.627166748046875,
-            -0.4078553318977356,
-            -4.958505630493164,
-            -0.6187160611152649,
-            -0.7174848914146423,
-            -2.469533920288086,
-            -13.620073318481445,
-            -0.09088654816150665,
-            -3.526974678039551,
-            -1.4195809364318848,
-            -6.402483940124512,
-            -0.5898402333259583,
-            -3.565917491912842,
-            -0.8561318516731262,
-            -1.6140165328979492,
-            -5.370549201965332,
-            -17.159223556518555,
-            -6.583524703979492,
-            -0.8855001926422119,
-            -4.19431209564209,
-            -1.2012220621109009,
-            -2.2563133239746094,
-            -1.7674944400787354,
-            -0.22064533829689026,
-            -9.292220115661621,
-            -0.12445646524429321,
-            -7.29617977142334,
-            -2.526529312133789,
-            -4.071560859680176,
-            -3.5568013191223145,
-            -1.926215410232544,
-            -2.349026918411255,
-            -2.2132363319396973,
-            -0.3125414550304413,
-            -1.4718132019042969,
-            -2.149106740951538,
-            -1.0855519771575928,
-            -1.631832242012024,
-            -1.3751734495162964,
-            -1.9396103620529175,
-            -1.5293723344802856,
-            -0.8444125056266785,
-            -1.2414811849594116,
-            -1.9522171020507812,
-            -2.4338042736053467,
-            -1.5651824474334717,
-            -0.9498789310455322,
-            -1.8044980764389038,
-            -2.356677770614624,
-            -1.247452974319458,
-            -1.550165057182312,
-            -0.5635553598403931,
-            -0.6177330017089844,
-            -0.4778785705566406,
-            -0.020452087745070457,
-            -0.48500269651412964,
-            -0.23854275047779083,
-            -0.06543659418821335,
-            -0.11837350577116013,
-            -0.0585334412753582
-        ]
-    },
-    "throughput": 13.581765270525981
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_583m_logitsmatch/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_583m_logitsmatch/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index 1491284644d..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_583m_logitsmatch/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,161 +0,0 @@
-{
-    "0": {
-        "input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.",
-        "generated_text": " And that this is the place where you can be yourself, and be yourself in the most beautiful way. And that this is the place where you can",
-        "generated_tokens": [
-            3060,
-            1455,
-            1593,
-            1395,
-            1278,
-            3535,
-            2478,
-            1636,
-            1710,
-            1402,
-            14019,
-            1044,
-            1321,
-            1402,
-            14019,
-            1294,
-            1278,
-            2725,
-            15568,
-            3039,
-            1046,
-            3060,
-            1455,
-            1593,
-            1395,
-            1278,
-            3535,
-            2478,
-            1636,
-            1710
-        ],
-        "latency": 2.2049803733825684,
-        "cuda_graph_request_count_map": null,
-        "step_count": 240,
-        "logprobs": [
-            -9.358587265014648,
-            -2.7594826221466064,
-            -4.608366012573242,
-            -1.4093360900878906,
-            -0.6152952313423157,
-            -1.7217562198638916,
-            -2.496668815612793,
-            -2.0547454357147217,
-            -2.441960573196411,
-            -6.280838966369629,
-            -1.5643692016601562,
-            -3.462346076965332,
-            -4.428728103637695,
-            -3.8633861541748047,
-            -1.9936373233795166,
-            -1.8929449319839478,
-            -3.796365737915039,
-            -6.8360137939453125,
-            -0.2901247441768646,
-            -0.9246833324432373,
-            -6.633338928222656,
-            -7.166708469390869,
-            -12.771251678466797,
-            -2.198296308517456,
-            -3.7778120040893555,
-            -0.4983733296394348,
-            -4.381269454956055,
-            -0.0666784718632698,
-            -0.09580295532941818,
-            -3.2437636852264404,
-            -10.079947471618652,
-            -1.172220230102539,
-            -5.977442741394043,
-            -5.046236038208008,
-            -3.855658531188965,
-            -2.5585858821868896,
-            -3.356245994567871,
-            -5.557229518890381,
-            -1.6787731647491455,
-            -5.483290672302246,
-            -12.218501091003418,
-            -12.61402702331543,
-            -0.09662941098213196,
-            -2.5431432723999023,
-            -1.4071024656295776,
-            -2.9154715538024902,
-            -1.1964417695999146,
-            -0.006458481773734093,
-            -3.3625335693359375,
-            -13.262511253356934,
-            -4.314079761505127,
-            -2.617699146270752,
-            -5.987792015075684,
-            -0.778266429901123,
-            -0.048888545483350754,
-            -1.548882007598877,
-            -1.1381981372833252,
-            -5.627166748046875,
-            -0.4078553318977356,
-            -4.958505630493164,
-            -0.6187160611152649,
-            -0.7174848914146423,
-            -2.469533920288086,
-            -13.620073318481445,
-            -0.09088654816150665,
-            -3.526974678039551,
-            -1.4195809364318848,
-            -6.402483940124512,
-            -0.5898402333259583,
-            -3.565917491912842,
-            -0.8561318516731262,
-            -1.6140165328979492,
-            -5.370549201965332,
-            -17.159223556518555,
-            -6.583524703979492,
-            -0.8855001926422119,
-            -4.19431209564209,
-            -1.2012220621109009,
-            -2.2563133239746094,
-            -1.7674944400787354,
-            -0.22064533829689026,
-            -9.292220115661621,
-            -0.12445646524429321,
-            -7.29617977142334,
-            -2.526529312133789,
-            -4.071560859680176,
-            -3.5568013191223145,
-            -1.926215410232544,
-            -2.349026918411255,
-            -2.2132363319396973,
-            -0.3125414550304413,
-            -1.4718132019042969,
-            -2.149106740951538,
-            -1.0855519771575928,
-            -1.631832242012024,
-            -1.3751734495162964,
-            -1.9396103620529175,
-            -1.5293723344802856,
-            -0.8444125056266785,
-            -1.2414811849594116,
-            -1.9522171020507812,
-            -2.4338042736053467,
-            -1.5651824474334717,
-            -0.9498789310455322,
-            -1.8044980764389038,
-            -2.356677770614624,
-            -1.247452974319458,
-            -1.550165057182312,
-            -0.5635553598403931,
-            -0.6177330017089844,
-            -0.4778785705566406,
-            -0.020452087745070457,
-            -0.48500269651412964,
-            -0.23854275047779083,
-            -0.06543659418821335,
-            -0.11837350577116013,
-            -0.0585334412753582
-        ]
-    },
-    "throughput": 13.337338555385374
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_16b_multiprompt_tokensmatch/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_16b_multiprompt_tokensmatch/golden_values_dev_dgx_h100.json
index bdfa6878181..f776b6141c9 100644
--- a/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_16b_multiprompt_tokensmatch/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_16b_multiprompt_tokensmatch/golden_values_dev_dgx_h100.json
@@ -1,10 +1 @@
-{
-    "0": {
-        "id": "0",
-        "generated_text": " To the extent this Public License may be interpreted as a contract, You are granted the Licensed Rights in consideration of Your acceptance of these terms and conditions, and the Licensor grants You such rights in consideration of benefits the Licensor receives from making the Licensed Material available under these terms and conditions."
-    },
-    "1": {
-        "id": "1",
-        "generated_text": " The licenses for most software and other practical works are designed to take away your freedom to share and change the works. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change all versions of a program--to make sure it remains free software for all its users. We, the Free Software Foundation, use the GNU General Public License for most of our software; it applies also to any other work released this way by its authors. You can apply it to your programs, too."
-    }
-}
\ No newline at end of file
+{"0": {"input_prompt": "Creative Commons Attribution-ShareAlike 4.0 International Public License\n\nBy exercising the Licensed Rights (defined below), You accept and agree to be bound by the terms and conditions of this Creative Commons Attribution-ShareAlike 4.0 International Public License (\u201cPublic License\u201d).", "generated_text": " To the extent this Public License may be interpreted as a contract, You are granted the Licensed Rights in consideration of Your acceptance of these terms and conditions, and the Licensor grants You such rights in consideration of benefits the Licensor receives from making the Licensed Material available under these terms and conditions.\n\nA \u201cLicense Elements\u201d means the copyright and similar rights held by the Licensor that apply to the", "generated_tokens": [3870, 1278, 13820, 1593, 11227, 56484, 2188, 1402, 27539, 1435, 1261, 8633, 1044, 3213, 1584, 23369, 1278, 29960, 29520, 27868, 1294, 22666, 1307, 9825, 33868, 1307, 2576, 6856, 1321, 5481, 1044, 1321, 1278, 29960, 10648, 47506, 3213, 2516, 10741, 1294, 22666, 1307, 15021, 1278, 29960, 10648, 26510, 1562, 6187, 1278, 29960, 29520, 14736, 5178, 2425, 2576, 6856, 1321, 5481, 1338, 1065, 2129, 93552, 68175, 1414, 4938, 1278, 48896, 1321, 4510, 10741, 6452, 1536, 1278, 29960, 10648, 1455, 11145, 1317, 1278], "tpot": [2.3577029705047607, 18.275739669799805, 0.08520633727312088, 0.0869978591799736, 0.08382764458656311, 0.0822301134467125, 0.08590112626552582, 0.08727782219648361, 0.08878345787525177, 0.08471100777387619, 0.08461007475852966, 0.08351321518421173, 0.08289417624473572, 0.08312425017356873, 0.08245296031236649, 0.08435756713151932, 0.08331241458654404, 0.08185727894306183, 0.08301001787185669, 0.08754003047943115, 0.08676281571388245, 0.08424761891365051, 0.08508463948965073, 0.08286947011947632, 0.08165920525789261, 0.08377916365861893, 0.08504652976989746, 0.08784995228052139, 0.08375603705644608, 0.08359900861978531, 0.0811285451054573, 0.08540806919336319, 0.08691353350877762, 0.08404755592346191, 0.08413244783878326, 0.08345846831798553, 0.08178623765707016, 0.08187522739171982, 0.08339730650186539, 0.08574902266263962, 0.08305907249450684, 0.08382105082273483, 0.08366736024618149, 0.08746163547039032, 0.08585263788700104, 0.08270352333784103, 0.08263132721185684, 0.08329993486404419, 0.08225059509277344, 0.08071305602788925, 0.08310636878013611, 0.08538934588432312, 0.08359072357416153, 0.08530402928590775, 0.08291900902986526, 0.08687645196914673, 0.08650524914264679, 0.08427295833826065, 0.08406547456979752, 0.08324864506721497, 0.08238352090120316, 0.08039174973964691, 0.08248095959424973, 0.08550125360488892, 0.08359305560588837, 0.08234208077192307, 0.08440118283033371, 0.08703510463237762, 0.08404898643493652, 0.08515100926160812, 0.08590294420719147, 0.08325494080781937, 0.0844479650259018, 0.0858670026063919, 0.08481177687644958, 0.08794118463993073, 0.08481676131486893, 0.08290982246398926, 0.08354739099740982, 0.08679670840501785], "latency": 34.809940260000076, "logprobs": [-1.098496675491333, -2.3386900424957275, -1.1673119068145752, -0.005112313199788332, -1.4003636837005615, -0.0022637236397713423, -0.26063042879104614, -4.184158387943171e-05, -0.871678352355957, -1.1722105741500854, -0.00547291524708271, -0.012695327401161194, -0.25205689668655396, -4.834710121154785, -0.06008828803896904, -2.6798672676086426, -5.844006538391113, -7.016114234924316, -1.9206619262695312, -6.664797306060791, -0.10525251924991608, -0.13234537839889526, -0.6356306672096252, -0.009618841111660004, -0.07583192735910416, -0.03442350775003433, -0.027873842045664787, -0.7179068922996521, -0.0050380658358335495, -0.00025459862081333995, -0.01502189226448536, -0.009077942930161953, -0.0031639300286769867, -0.004587483126670122, -0.047591403126716614, -0.021942228078842163, -0.1806652992963791, -0.031514670699834824, -0.0019632368348538876, -0.006458363030105829, -2.4863600730895996, -0.06307659298181534, -0.536941647529602, -0.00014554394874721766, -0.08910489082336426, -0.002558056265115738, -0.003950649406760931, -0.00011550712952157483, -0.027262460440397263, -0.003567879553884268, -0.0020034497138112783, -0.0021588134113699198, -0.07413436472415924, -0.015633877366781235, -0.002499910071492195, -4.0079450607299805, -1.0155415534973145, -0.007392788305878639, -0.8698053359985352, -0.24146771430969238, -0.02043912373483181, -0.00028618055512197316, -0.09781879186630249, -0.12720069289207458, -0.00017188502533826977, -0.0004502712981775403, -5.721882189391181e-05, -0.02532867342233658, -0.0003629264247138053, -4.1483970562694594e-05, -0.004929057322442532, -0.0007027302053757012, -0.251633882522583, -0.007429351564496756, -0.0017940392717719078, -4.410734163684538e-06, -0.1415664702653885, -0.001335205975919962, -3.516612196108326e-05, -0.0005790702416561544, -0.009565354324877262, -2.95634672511369e-05, -0.004719902761280537, -0.03297574073076248, -0.0006598440813831985, -0.15560932457447052, -0.007044123485684395, -0.00019524575327523053, -0.0005635818815790117, -0.005528873298317194, -4.005352093372494e-05, -0.020716596394777298, -0.08699643611907959, -0.03984827548265457, -0.00735350139439106, -5.9602869441732764e-05, -0.004105473402887583, -0.006022046320140362, -4.8636207793606445e-05, -0.0003293210465926677, -2.8967437174287625e-05, -0.010863331146538258, -0.00872336607426405, -0.0015417367685586214, -1.0371154530730564e-05, -0.0010342017048969865, -0.0003013156820088625, -0.054600946605205536, -0.008535332977771759, -0.0011676882859319448, -0.00897043664008379, -0.03422918915748596, -5.6503606174374e-05, -0.2021482288837433, -0.12328055500984192, -0.0018017739057540894, -3.790783375734463e-05, -0.00018094333063345402, -0.17044083774089813, -1.8898742198944092, -0.3761734068393707, -0.5461801290512085, -0.2200641632080078, -0.09655029326677322, -0.6670089960098267, -0.019010722637176514, -0.6865801811218262, -1.3567372560501099, -0.3734486997127533, -0.09606754034757614, -0.9503843784332275, -0.3895794153213501, -0.20220120251178741, -0.014250654727220535, -0.00021288513380568475, -0.5147109031677246, -0.17845982313156128, -0.00036066226311959326, -0.025245800614356995]}, "1": {"input_prompt": "GNU GENERAL PUBLIC LICENSE\nVersion 3, 29 June 2007\n\nPreamble\n\nThe GNU General Public License is a free, copyleft license for software and other kinds of works.", "generated_text": " The licenses for most software and other practical works are designed to take away your freedom to share and change the works. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change all versions of a program--to make sure it remains free software for all its users. We, the Free Software Foundation, use the GNU General Public License for most of our software; it applies", "generated_tokens": [1531, 91700, 1394, 2725, 9314, 1321, 2147, 18107, 6113, 1584, 9543, 1317, 4069, 5109, 2143, 18613, 1317, 9730, 1321, 4036, 1278, 6113, 1046, 5652, 9033, 1044, 1278, 56703, 7487, 11227, 56484, 1395, 13650, 1317, 33152, 2143, 18613, 1317, 9730, 1321, 4036, 1747, 15628, 1307, 1261, 3467, 1742, 1611, 3180, 5257, 1494, 10714, 5370, 9314, 1394, 1747, 2246, 8616, 1046, 2837, 1044, 1278, 16611, 29494, 17364, 1044, 2210, 1278, 56703, 7487, 11227, 56484, 1394, 2725, 1307, 2948, 9314, 1059, 1494, 28735], "tpot": [1.1546974182128906, 0.08373385667800903, 0.08187203109264374, 0.08155548572540283, 0.08117769658565521, 0.08048298209905624, 0.08419129997491837, 0.08177965134382248, 0.08144208043813705, 0.0819595456123352, 0.0853334441781044, 0.0833510085940361, 0.08121215552091599, 0.08525881916284561, 0.08104947209358215, 0.08103965222835541, 0.08007887750864029, 0.0810554251074791, 0.08211436122655869, 0.08134271949529648, 0.08201020956039429, 0.08071065694093704, 0.08450595289468765, 0.08352860808372498, 0.08082559704780579, 0.08080153912305832, 0.08135408163070679, 0.08008108288049698, 0.08088742196559906, 0.08077110350131989, 0.08120112121105194, 0.08833564817905426, 0.08003020286560059, 0.08026659488677979, 0.08333026617765427, 0.08478527516126633, 0.08005865663290024, 0.0812559649348259, 0.07999507337808609, 0.08046876639127731, 0.08038640022277832, 0.08046220988035202, 0.0803716778755188, 0.08111580461263657, 0.08262531459331512, 0.08624562621116638, 0.0816723182797432, 0.08468450605869293, 0.08047766238451004, 0.08025721460580826, 0.08085110783576965, 0.07989709079265594, 0.08004041016101837, 0.07993670552968979, 0.08030915260314941, 0.08094675093889236, 0.08101379126310349, 0.08003273606300354, 0.08091049641370773, 0.08407776057720184, 0.08208595216274261, 0.08009862154722214, 0.08219599723815918, 0.08009026944637299, 0.07981372624635696, 0.07979235053062439, 0.07988684624433517, 0.08031346648931503, 0.08099206537008286, 0.08011443167924881, 0.08001039922237396, 0.08111654967069626, 0.0833376944065094, 0.07980813086032867, 0.0799364447593689, 0.0799906849861145, 0.07996371388435364, 0.0799548476934433, 0.08003644645214081, 0.07994646579027176], "latency": 34.809940260000076, "logprobs": [-7.396169662475586, -5.0146379470825195, -0.14007440209388733, -0.057429201900959015, -0.07256120443344116, -0.0003196682082489133, -0.002333063166588545, -0.7365643978118896, -2.0903806686401367, -0.0028471907135099173, -0.7679573893547058, -0.008302814327180386, -0.0016283836448565125, -0.023581871762871742, -0.001212695729918778, -0.005171137861907482, -0.00023946279543451965, -0.00019929806876461953, -5.9960475482512265e-05, -0.0004343043256085366, -0.00029118589009158313, -2.041377067565918, -6.468316078186035, -0.013627826236188412, -0.0001250427303602919, -0.3696310222148895, -0.21634329855442047, -0.41532525420188904, -0.004539423156529665, -1.597391747054644e-05, -0.006953209172934294, -0.0074744331650435925, -0.012273500673472881, -0.01588617078959942, -0.0021325245033949614, -0.000523430178873241, -8.583032467868179e-06, -1.6331539882230572e-05, -0.0029162520077079535, -0.006122286897152662, -0.08315673470497131, -0.050512515008449554, -0.6366059184074402, -0.01764446310698986, -0.007074781693518162, -0.034392066299915314, -6.926323413848877, -0.5374693274497986, -0.03999913111329079, -0.028230387717485428, -0.0016925308154895902, -0.0006469779182225466, -0.010087697766721249, -7.331102824537084e-05, -0.8897908329963684, -0.00831676460802555, -0.0011313711293041706, -0.000377583724912256, -0.002483262214809656, -0.0012269833823665977, -0.0012473430251702666, -0.04151991754770279, -0.015271874144673347, -0.0020352143328636885, -0.00882688071578741, -0.002042233245447278, -0.0009834696538746357, -0.01229163445532322, -0.007900655269622803, -0.013307209126651287, -0.0007677706307731569, -0.0001919085334520787, -0.0019422968616709113, -0.0009677494526840746, -0.0008469808381050825, -0.0038650347851216793, -0.002514060353860259, -0.0029510778840631247, -0.002314747544005513, -6.329813186312094e-05, -0.004423120059072971, -0.006699836812913418, -0.0014525825390592217, -0.00026222606538794935, -0.0004618293314706534, -0.0012540103634819388, -0.0013656823430210352, -0.0001436368766007945, -0.0007303669699467719, -0.0016121974913403392, -0.00296926312148571, -8.928377064876258e-05, -0.00024244230007752776, -0.13490204513072968, -0.004989197477698326, -0.01744064688682556, -0.000188332938705571, -0.0010556369088590145, -0.003432455938309431, -0.00024077377747744322, -0.025529030710458755, -0.0008046964649111032, -5.507317473529838e-05, -0.001671109232120216, -1.1324817933200393e-05, -0.17158229649066925, -0.01291906088590622, -0.014278624206781387, -0.002704417100176215, -0.007144853472709656, -0.0019179059891030192, -0.000660439720377326, -0.0011501847766339779, -0.0025364153552800417, -0.00022396916756406426, -0.0031780709978193045, -0.009822606109082699, -0.011096670292317867, -0.004411014262586832, -0.01815183460712433, -0.0008997444529086351, -0.0009004590683616698, -0.00660782540217042, -0.0009197533945553005, -0.0010057396721094847, -1.847726889536716e-05, -0.004887656774371862]}}
diff --git a/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_16b_multiprompt_tokensmatch/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_16b_multiprompt_tokensmatch/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index fd5af4716ed..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_16b_multiprompt_tokensmatch/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1 +0,0 @@
-{"0": {"input_prompt": "Creative Commons Attribution-ShareAlike 4.0 International Public License\n\nBy exercising the Licensed Rights (defined below), You accept and agree to be bound by the terms and conditions of this Creative Commons Attribution-ShareAlike 4.0 International Public License (\u201cPublic License\u201d).", "generated_text": " To the extent this Public License may be interpreted as a contract, You are granted the Licensed Rights in consideration of Your acceptance of these terms and conditions, and the Licensor grants You such rights in consideration of benefits the Licensor receives from making the Licensed Material available under these terms and conditions.\n\nA \u201cLicense Elements\u201d means the copyright and similar rights held by the Licensor that apply to the", "generated_tokens": [3870, 1278, 13820, 1593, 11227, 56484, 2188, 1402, 27539, 1435, 1261, 8633, 1044, 3213, 1584, 23369, 1278, 29960, 29520, 27868, 1294, 22666, 1307, 9825, 33868, 1307, 2576, 6856, 1321, 5481, 1044, 1321, 1278, 29960, 10648, 47506, 3213, 2516, 10741, 1294, 22666, 1307, 15021, 1278, 29960, 10648, 26510, 1562, 6187, 1278, 29960, 29520, 14736, 5178, 2425, 2576, 6856, 1321, 5481, 1338, 1065, 2129, 93552, 68175, 1414, 4938, 1278, 48896, 1321, 4510, 10741, 6452, 1536, 1278, 29960, 10648, 1455, 11145, 1317, 1278], "tpot": [2.2433090209960938, 15.383663177490234, 0.07563520222902298, 0.07790876924991608, 0.07560934126377106, 0.07590866833925247, 0.07437056303024292, 0.07271888107061386, 0.07355913519859314, 0.0769873559474945, 0.0765797421336174, 0.07360972464084625, 0.07088422030210495, 0.07171596586704254, 0.07186982780694962, 0.07289580255746841, 0.07401669770479202, 0.0740199014544487, 0.07359184324741364, 0.07246198505163193, 0.07436077296733856, 0.0737471729516983, 0.07560908794403076, 0.07842639833688736, 0.07518447935581207, 0.0765647366642952, 0.07699878513813019, 0.0765637457370758, 0.07817324995994568, 0.07810653001070023, 0.07725285738706589, 0.07324674725532532, 0.07175769656896591, 0.07303091138601303, 0.0716017633676529, 0.06984643638134003, 0.0720287337899208, 0.07311507314443588, 0.07115375995635986, 0.07042809575796127, 0.07141468673944473, 0.07163120061159134, 0.06993798166513443, 0.07137599587440491, 0.07285424321889877, 0.0713927373290062, 0.07034425437450409, 0.07184319943189621, 0.07173626124858856, 0.07247330993413925, 0.07420678436756134, 0.07596342265605927, 0.07280172407627106, 0.0735168606042862, 0.07455551624298096, 0.07507536560297012, 0.088611900806427, 0.10861468315124512, 0.0730150043964386, 0.07172227650880814, 0.07274693995714188, 0.07347641885280609, 0.07395069301128387, 0.07447884976863861, 0.07282613962888718, 0.07232572883367538, 0.072523333132267, 0.07226365059614182, 0.07286006212234497, 0.07140227407217026, 0.0701606348156929, 0.07091417908668518, 0.07061411440372467, 0.06997577846050262, 0.07094505429267883, 0.07352688163518906, 0.07150879502296448, 0.0704091489315033, 0.07131827622652054, 0.07079542428255081], "latency": 30.322727143764496, "logprobs": [-1.098496675491333, -2.3386900424957275, -1.1673119068145752, -0.005112313199788332, -1.4003636837005615, -0.0022637236397713423, -0.26063042879104614, -4.184158387943171e-05, -0.871678352355957, -1.1722105741500854, -0.00547291524708271, -0.012695327401161194, -0.25205689668655396, -4.834710121154785, -0.06008828803896904, -2.6798672676086426, -5.844006538391113, -7.016114234924316, -1.9206619262695312, -6.664797306060791, -0.10525251924991608, -0.13234537839889526, -0.6356306672096252, -0.009618841111660004, -0.07583192735910416, -0.03442350775003433, -0.027873842045664787, -0.7179068922996521, -0.0050380658358335495, -0.00025459862081333995, -0.01502189226448536, -0.009077942930161953, -0.0031639300286769867, -0.004587483126670122, -0.047591403126716614, -0.021942228078842163, -0.1806652992963791, -0.031514670699834824, -0.0019632368348538876, -0.006458363030105829, -2.4863600730895996, -0.06307659298181534, -0.536941647529602, -0.00014554394874721766, -0.08910489082336426, -0.002558056265115738, -0.003950649406760931, -0.00011550712952157483, -0.027262460440397263, -0.003567879553884268, -0.0020034497138112783, -0.0021588134113699198, -0.07413436472415924, -0.015633877366781235, -0.002499910071492195, -4.0079450607299805, -1.0155415534973145, -0.007392788305878639, -0.8698053359985352, -0.24146771430969238, -0.02043912373483181, -0.00028618055512197316, -0.09781879186630249, -0.12720069289207458, -0.00017188502533826977, -0.0004502712981775403, -5.721882189391181e-05, -0.02532867342233658, -0.0003629264247138053, -4.1483970562694594e-05, -0.004929057322442532, -0.0007027302053757012, -0.251633882522583, -0.007429351564496756, -0.0017940392717719078, -4.410734163684538e-06, -0.1415664702653885, -0.001335205975919962, -3.516612196108326e-05, -0.0005790702416561544, -0.009565354324877262, -2.95634672511369e-05, -0.004719902761280537, -0.03297574073076248, -0.0006598440813831985, -0.15560932457447052, -0.007044123485684395, -0.00019524575327523053, -0.0005635818815790117, -0.005528873298317194, -4.005352093372494e-05, -0.020716596394777298, -0.08699643611907959, -0.03984827548265457, -0.00735350139439106, -5.9602869441732764e-05, -0.004105473402887583, -0.006022046320140362, -4.8636207793606445e-05, -0.0003293210465926677, -2.8967437174287625e-05, -0.010863331146538258, -0.00872336607426405, -0.0015417367685586214, -1.0371154530730564e-05, -0.0010342017048969865, -0.0003013156820088625, -0.054600946605205536, -0.008535332977771759, -0.0011676882859319448, -0.00897043664008379, -0.03422918915748596, -5.6503606174374e-05, -0.2021482288837433, -0.12328055500984192, -0.0018017739057540894, -3.790783375734463e-05, -0.00018094333063345402, -0.17044083774089813, -1.8898742198944092, -0.3761734068393707, -0.5461801290512085, -0.2200641632080078, -0.09655029326677322, -0.6670089960098267, -0.019010722637176514, -0.6865801811218262, -1.3567372560501099, -0.3734486997127533, -0.09606754034757614, -0.9503843784332275, -0.3895794153213501, -0.20220120251178741, -0.014250654727220535, -0.00021288513380568475, -0.5147109031677246, -0.17845982313156128, -0.00036066226311959326, -0.025245800614356995]}, "1": {"input_prompt": "GNU GENERAL PUBLIC LICENSE\nVersion 3, 29 June 2007\n\nPreamble\n\nThe GNU General Public License is a free, copyleft license for software and other kinds of works.", "generated_text": " The licenses for most software and other practical works are designed to take away your freedom to share and change the works. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change all versions of a program--to make sure it remains free software for all its users. We, the Free Software Foundation, use the GNU General Public License for most of our software; it applies", "generated_tokens": [1531, 91700, 1394, 2725, 9314, 1321, 2147, 18107, 6113, 1584, 9543, 1317, 4069, 5109, 2143, 18613, 1317, 9730, 1321, 4036, 1278, 6113, 1046, 5652, 9033, 1044, 1278, 56703, 7487, 11227, 56484, 1395, 13650, 1317, 33152, 2143, 18613, 1317, 9730, 1321, 4036, 1747, 15628, 1307, 1261, 3467, 1742, 1611, 3180, 5257, 1494, 10714, 5370, 9314, 1394, 1747, 2246, 8616, 1046, 2837, 1044, 1278, 16611, 29494, 17364, 1044, 2210, 1278, 56703, 7487, 11227, 56484, 1394, 2725, 1307, 2948, 9314, 1059, 1494, 28735], "tpot": [1.2161544561386108, 0.07411491125822067, 0.07506950199604034, 0.07409478724002838, 0.07401148974895477, 0.07300141453742981, 0.07280866801738739, 0.07601497322320938, 0.07731865346431732, 0.07721379399299622, 0.0759664997458458, 0.07493101060390472, 0.0749080628156662, 0.07464345544576645, 0.0766342356801033, 0.07723136246204376, 0.07636982947587967, 0.07504342496395111, 0.07254179567098618, 0.07096108794212341, 0.07076361775398254, 0.07125472277402878, 0.07242108881473541, 0.07069564610719681, 0.07223721593618393, 0.07028210908174515, 0.07061699032783508, 0.07099568098783493, 0.06988637149333954, 0.06995811313390732, 0.07024432718753815, 0.07068377733230591, 0.06901209056377411, 0.07013954967260361, 0.07009964436292648, 0.06961167603731155, 0.07183846086263657, 0.07132533937692642, 0.071567103266716, 0.07132361829280853, 0.07189197093248367, 0.0710214376449585, 0.06954367458820343, 0.07021024078130722, 0.0699218213558197, 0.0694608986377716, 0.07036483287811279, 0.07119859009981155, 0.07749561220407486, 0.0736619159579277, 0.07318511605262756, 0.07099929451942444, 0.07115420699119568, 0.07000540941953659, 0.06953660398721695, 0.07040716707706451, 0.06962710618972778, 0.07120921462774277, 0.07034483551979065, 0.06971962004899979, 0.0700395479798317, 0.07007062435150146, 0.07015158236026764, 0.07531014084815979, 0.07656230032444, 0.07638739049434662, 0.07639992982149124, 0.07658322900533676, 0.07037798315286636, 0.07061280310153961, 0.06969427317380905, 0.07060074061155319, 0.06955331563949585, 0.06973052769899368, 0.06945493817329407, 0.07035842537879944, 0.06949929893016815, 0.07014601677656174, 0.07364838570356369, 0.07122752070426941], "latency": 30.322727143764496, "logprobs": [-7.396169662475586, -5.0146379470825195, -0.14007440209388733, -0.057429201900959015, -0.07256120443344116, -0.0003196682082489133, -0.002333063166588545, -0.7365643978118896, -2.0903806686401367, -0.0028471907135099173, -0.7679573893547058, -0.008302814327180386, -0.0016283836448565125, -0.023581871762871742, -0.001212695729918778, -0.005171137861907482, -0.00023946279543451965, -0.00019929806876461953, -5.9960475482512265e-05, -0.0004343043256085366, -0.00029118589009158313, -2.041377067565918, -6.468316078186035, -0.013627826236188412, -0.0001250427303602919, -0.3696310222148895, -0.21634329855442047, -0.41532525420188904, -0.004539423156529665, -1.597391747054644e-05, -0.006953209172934294, -0.0074744331650435925, -0.012273500673472881, -0.01588617078959942, -0.0021325245033949614, -0.000523430178873241, -8.583032467868179e-06, -1.6331539882230572e-05, -0.0029162520077079535, -0.006122286897152662, -0.08315673470497131, -0.050512515008449554, -0.6366059184074402, -0.01764446310698986, -0.007074781693518162, -0.034392066299915314, -6.926323413848877, -0.5374693274497986, -0.03999913111329079, -0.028230387717485428, -0.0016925308154895902, -0.0006469779182225466, -0.010087697766721249, -7.331102824537084e-05, -0.8897908329963684, -0.00831676460802555, -0.0011313711293041706, -0.000377583724912256, -0.002483262214809656, -0.0012269833823665977, -0.0012473430251702666, -0.04151991754770279, -0.015271874144673347, -0.0020352143328636885, -0.00882688071578741, -0.002042233245447278, -0.0009834696538746357, -0.01229163445532322, -0.007900655269622803, -0.013307209126651287, -0.0007677706307731569, -0.0001919085334520787, -0.0019422968616709113, -0.0009677494526840746, -0.0008469808381050825, -0.0038650347851216793, -0.002514060353860259, -0.0029510778840631247, -0.002314747544005513, -6.329813186312094e-05, -0.004423120059072971, -0.006699836812913418, -0.0014525825390592217, -0.00026222606538794935, -0.0004618293314706534, -0.0012540103634819388, -0.0013656823430210352, -0.0001436368766007945, -0.0007303669699467719, -0.0016121974913403392, -0.00296926312148571, -8.928377064876258e-05, -0.00024244230007752776, -0.13490204513072968, -0.004989197477698326, -0.01744064688682556, -0.000188332938705571, -0.0010556369088590145, -0.003432455938309431, -0.00024077377747744322, -0.025529030710458755, -0.0008046964649111032, -5.507317473529838e-05, -0.001671109232120216, -1.1324817933200393e-05, -0.17158229649066925, -0.01291906088590622, -0.014278624206781387, -0.002704417100176215, -0.007144853472709656, -0.0019179059891030192, -0.000660439720377326, -0.0011501847766339779, -0.0025364153552800417, -0.00022396916756406426, -0.0031780709978193045, -0.009822606109082699, -0.011096670292317867, -0.004411014262586832, -0.01815183460712433, -0.0008997444529086351, -0.0009004590683616698, -0.00660782540217042, -0.0009197533945553005, -0.0010057396721094847, -1.847726889536716e-05, -0.004887656774371862]}}
diff --git a/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_16b_multiprompt_tokensmatch/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_16b_multiprompt_tokensmatch/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index f776b6141c9..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_16b_multiprompt_tokensmatch/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1 +0,0 @@
-{"0": {"input_prompt": "Creative Commons Attribution-ShareAlike 4.0 International Public License\n\nBy exercising the Licensed Rights (defined below), You accept and agree to be bound by the terms and conditions of this Creative Commons Attribution-ShareAlike 4.0 International Public License (\u201cPublic License\u201d).", "generated_text": " To the extent this Public License may be interpreted as a contract, You are granted the Licensed Rights in consideration of Your acceptance of these terms and conditions, and the Licensor grants You such rights in consideration of benefits the Licensor receives from making the Licensed Material available under these terms and conditions.\n\nA \u201cLicense Elements\u201d means the copyright and similar rights held by the Licensor that apply to the", "generated_tokens": [3870, 1278, 13820, 1593, 11227, 56484, 2188, 1402, 27539, 1435, 1261, 8633, 1044, 3213, 1584, 23369, 1278, 29960, 29520, 27868, 1294, 22666, 1307, 9825, 33868, 1307, 2576, 6856, 1321, 5481, 1044, 1321, 1278, 29960, 10648, 47506, 3213, 2516, 10741, 1294, 22666, 1307, 15021, 1278, 29960, 10648, 26510, 1562, 6187, 1278, 29960, 29520, 14736, 5178, 2425, 2576, 6856, 1321, 5481, 1338, 1065, 2129, 93552, 68175, 1414, 4938, 1278, 48896, 1321, 4510, 10741, 6452, 1536, 1278, 29960, 10648, 1455, 11145, 1317, 1278], "tpot": [2.3577029705047607, 18.275739669799805, 0.08520633727312088, 0.0869978591799736, 0.08382764458656311, 0.0822301134467125, 0.08590112626552582, 0.08727782219648361, 0.08878345787525177, 0.08471100777387619, 0.08461007475852966, 0.08351321518421173, 0.08289417624473572, 0.08312425017356873, 0.08245296031236649, 0.08435756713151932, 0.08331241458654404, 0.08185727894306183, 0.08301001787185669, 0.08754003047943115, 0.08676281571388245, 0.08424761891365051, 0.08508463948965073, 0.08286947011947632, 0.08165920525789261, 0.08377916365861893, 0.08504652976989746, 0.08784995228052139, 0.08375603705644608, 0.08359900861978531, 0.0811285451054573, 0.08540806919336319, 0.08691353350877762, 0.08404755592346191, 0.08413244783878326, 0.08345846831798553, 0.08178623765707016, 0.08187522739171982, 0.08339730650186539, 0.08574902266263962, 0.08305907249450684, 0.08382105082273483, 0.08366736024618149, 0.08746163547039032, 0.08585263788700104, 0.08270352333784103, 0.08263132721185684, 0.08329993486404419, 0.08225059509277344, 0.08071305602788925, 0.08310636878013611, 0.08538934588432312, 0.08359072357416153, 0.08530402928590775, 0.08291900902986526, 0.08687645196914673, 0.08650524914264679, 0.08427295833826065, 0.08406547456979752, 0.08324864506721497, 0.08238352090120316, 0.08039174973964691, 0.08248095959424973, 0.08550125360488892, 0.08359305560588837, 0.08234208077192307, 0.08440118283033371, 0.08703510463237762, 0.08404898643493652, 0.08515100926160812, 0.08590294420719147, 0.08325494080781937, 0.0844479650259018, 0.0858670026063919, 0.08481177687644958, 0.08794118463993073, 0.08481676131486893, 0.08290982246398926, 0.08354739099740982, 0.08679670840501785], "latency": 34.809940260000076, "logprobs": [-1.098496675491333, -2.3386900424957275, -1.1673119068145752, -0.005112313199788332, -1.4003636837005615, -0.0022637236397713423, -0.26063042879104614, -4.184158387943171e-05, -0.871678352355957, -1.1722105741500854, -0.00547291524708271, -0.012695327401161194, -0.25205689668655396, -4.834710121154785, -0.06008828803896904, -2.6798672676086426, -5.844006538391113, -7.016114234924316, -1.9206619262695312, -6.664797306060791, -0.10525251924991608, -0.13234537839889526, -0.6356306672096252, -0.009618841111660004, -0.07583192735910416, -0.03442350775003433, -0.027873842045664787, -0.7179068922996521, -0.0050380658358335495, -0.00025459862081333995, -0.01502189226448536, -0.009077942930161953, -0.0031639300286769867, -0.004587483126670122, -0.047591403126716614, -0.021942228078842163, -0.1806652992963791, -0.031514670699834824, -0.0019632368348538876, -0.006458363030105829, -2.4863600730895996, -0.06307659298181534, -0.536941647529602, -0.00014554394874721766, -0.08910489082336426, -0.002558056265115738, -0.003950649406760931, -0.00011550712952157483, -0.027262460440397263, -0.003567879553884268, -0.0020034497138112783, -0.0021588134113699198, -0.07413436472415924, -0.015633877366781235, -0.002499910071492195, -4.0079450607299805, -1.0155415534973145, -0.007392788305878639, -0.8698053359985352, -0.24146771430969238, -0.02043912373483181, -0.00028618055512197316, -0.09781879186630249, -0.12720069289207458, -0.00017188502533826977, -0.0004502712981775403, -5.721882189391181e-05, -0.02532867342233658, -0.0003629264247138053, -4.1483970562694594e-05, -0.004929057322442532, -0.0007027302053757012, -0.251633882522583, -0.007429351564496756, -0.0017940392717719078, -4.410734163684538e-06, -0.1415664702653885, -0.001335205975919962, -3.516612196108326e-05, -0.0005790702416561544, -0.009565354324877262, -2.95634672511369e-05, -0.004719902761280537, -0.03297574073076248, -0.0006598440813831985, -0.15560932457447052, -0.007044123485684395, -0.00019524575327523053, -0.0005635818815790117, -0.005528873298317194, -4.005352093372494e-05, -0.020716596394777298, -0.08699643611907959, -0.03984827548265457, -0.00735350139439106, -5.9602869441732764e-05, -0.004105473402887583, -0.006022046320140362, -4.8636207793606445e-05, -0.0003293210465926677, -2.8967437174287625e-05, -0.010863331146538258, -0.00872336607426405, -0.0015417367685586214, -1.0371154530730564e-05, -0.0010342017048969865, -0.0003013156820088625, -0.054600946605205536, -0.008535332977771759, -0.0011676882859319448, -0.00897043664008379, -0.03422918915748596, -5.6503606174374e-05, -0.2021482288837433, -0.12328055500984192, -0.0018017739057540894, -3.790783375734463e-05, -0.00018094333063345402, -0.17044083774089813, -1.8898742198944092, -0.3761734068393707, -0.5461801290512085, -0.2200641632080078, -0.09655029326677322, -0.6670089960098267, -0.019010722637176514, -0.6865801811218262, -1.3567372560501099, -0.3734486997127533, -0.09606754034757614, -0.9503843784332275, -0.3895794153213501, -0.20220120251178741, -0.014250654727220535, -0.00021288513380568475, -0.5147109031677246, -0.17845982313156128, -0.00036066226311959326, -0.025245800614356995]}, "1": {"input_prompt": "GNU GENERAL PUBLIC LICENSE\nVersion 3, 29 June 2007\n\nPreamble\n\nThe GNU General Public License is a free, copyleft license for software and other kinds of works.", "generated_text": " The licenses for most software and other practical works are designed to take away your freedom to share and change the works. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change all versions of a program--to make sure it remains free software for all its users. We, the Free Software Foundation, use the GNU General Public License for most of our software; it applies", "generated_tokens": [1531, 91700, 1394, 2725, 9314, 1321, 2147, 18107, 6113, 1584, 9543, 1317, 4069, 5109, 2143, 18613, 1317, 9730, 1321, 4036, 1278, 6113, 1046, 5652, 9033, 1044, 1278, 56703, 7487, 11227, 56484, 1395, 13650, 1317, 33152, 2143, 18613, 1317, 9730, 1321, 4036, 1747, 15628, 1307, 1261, 3467, 1742, 1611, 3180, 5257, 1494, 10714, 5370, 9314, 1394, 1747, 2246, 8616, 1046, 2837, 1044, 1278, 16611, 29494, 17364, 1044, 2210, 1278, 56703, 7487, 11227, 56484, 1394, 2725, 1307, 2948, 9314, 1059, 1494, 28735], "tpot": [1.1546974182128906, 0.08373385667800903, 0.08187203109264374, 0.08155548572540283, 0.08117769658565521, 0.08048298209905624, 0.08419129997491837, 0.08177965134382248, 0.08144208043813705, 0.0819595456123352, 0.0853334441781044, 0.0833510085940361, 0.08121215552091599, 0.08525881916284561, 0.08104947209358215, 0.08103965222835541, 0.08007887750864029, 0.0810554251074791, 0.08211436122655869, 0.08134271949529648, 0.08201020956039429, 0.08071065694093704, 0.08450595289468765, 0.08352860808372498, 0.08082559704780579, 0.08080153912305832, 0.08135408163070679, 0.08008108288049698, 0.08088742196559906, 0.08077110350131989, 0.08120112121105194, 0.08833564817905426, 0.08003020286560059, 0.08026659488677979, 0.08333026617765427, 0.08478527516126633, 0.08005865663290024, 0.0812559649348259, 0.07999507337808609, 0.08046876639127731, 0.08038640022277832, 0.08046220988035202, 0.0803716778755188, 0.08111580461263657, 0.08262531459331512, 0.08624562621116638, 0.0816723182797432, 0.08468450605869293, 0.08047766238451004, 0.08025721460580826, 0.08085110783576965, 0.07989709079265594, 0.08004041016101837, 0.07993670552968979, 0.08030915260314941, 0.08094675093889236, 0.08101379126310349, 0.08003273606300354, 0.08091049641370773, 0.08407776057720184, 0.08208595216274261, 0.08009862154722214, 0.08219599723815918, 0.08009026944637299, 0.07981372624635696, 0.07979235053062439, 0.07988684624433517, 0.08031346648931503, 0.08099206537008286, 0.08011443167924881, 0.08001039922237396, 0.08111654967069626, 0.0833376944065094, 0.07980813086032867, 0.0799364447593689, 0.0799906849861145, 0.07996371388435364, 0.0799548476934433, 0.08003644645214081, 0.07994646579027176], "latency": 34.809940260000076, "logprobs": [-7.396169662475586, -5.0146379470825195, -0.14007440209388733, -0.057429201900959015, -0.07256120443344116, -0.0003196682082489133, -0.002333063166588545, -0.7365643978118896, -2.0903806686401367, -0.0028471907135099173, -0.7679573893547058, -0.008302814327180386, -0.0016283836448565125, -0.023581871762871742, -0.001212695729918778, -0.005171137861907482, -0.00023946279543451965, -0.00019929806876461953, -5.9960475482512265e-05, -0.0004343043256085366, -0.00029118589009158313, -2.041377067565918, -6.468316078186035, -0.013627826236188412, -0.0001250427303602919, -0.3696310222148895, -0.21634329855442047, -0.41532525420188904, -0.004539423156529665, -1.597391747054644e-05, -0.006953209172934294, -0.0074744331650435925, -0.012273500673472881, -0.01588617078959942, -0.0021325245033949614, -0.000523430178873241, -8.583032467868179e-06, -1.6331539882230572e-05, -0.0029162520077079535, -0.006122286897152662, -0.08315673470497131, -0.050512515008449554, -0.6366059184074402, -0.01764446310698986, -0.007074781693518162, -0.034392066299915314, -6.926323413848877, -0.5374693274497986, -0.03999913111329079, -0.028230387717485428, -0.0016925308154895902, -0.0006469779182225466, -0.010087697766721249, -7.331102824537084e-05, -0.8897908329963684, -0.00831676460802555, -0.0011313711293041706, -0.000377583724912256, -0.002483262214809656, -0.0012269833823665977, -0.0012473430251702666, -0.04151991754770279, -0.015271874144673347, -0.0020352143328636885, -0.00882688071578741, -0.002042233245447278, -0.0009834696538746357, -0.01229163445532322, -0.007900655269622803, -0.013307209126651287, -0.0007677706307731569, -0.0001919085334520787, -0.0019422968616709113, -0.0009677494526840746, -0.0008469808381050825, -0.0038650347851216793, -0.002514060353860259, -0.0029510778840631247, -0.002314747544005513, -6.329813186312094e-05, -0.004423120059072971, -0.006699836812913418, -0.0014525825390592217, -0.00026222606538794935, -0.0004618293314706534, -0.0012540103634819388, -0.0013656823430210352, -0.0001436368766007945, -0.0007303669699467719, -0.0016121974913403392, -0.00296926312148571, -8.928377064876258e-05, -0.00024244230007752776, -0.13490204513072968, -0.004989197477698326, -0.01744064688682556, -0.000188332938705571, -0.0010556369088590145, -0.003432455938309431, -0.00024077377747744322, -0.025529030710458755, -0.0008046964649111032, -5.507317473529838e-05, -0.001671109232120216, -1.1324817933200393e-05, -0.17158229649066925, -0.01291906088590622, -0.014278624206781387, -0.002704417100176215, -0.007144853472709656, -0.0019179059891030192, -0.000660439720377326, -0.0011501847766339779, -0.0025364153552800417, -0.00022396916756406426, -0.0031780709978193045, -0.009822606109082699, -0.011096670292317867, -0.004411014262586832, -0.01815183460712433, -0.0008997444529086351, -0.0009004590683616698, -0.00660782540217042, -0.0009197533945553005, -0.0010057396721094847, -1.847726889536716e-05, -0.004887656774371862]}}
diff --git a/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_cudagraphs/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_cudagraphs/golden_values_dev_dgx_h100.json
index 9029dfec5dd..18ce65a905f 100644
--- a/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_cudagraphs/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_cudagraphs/golden_values_dev_dgx_h100.json
@@ -1 +1 @@
-{"1": {"input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.", "generated_text": " And then you get to the end of the movie, and you realize that this is not New York at all. This is New York at the end", "generated_tokens": [3060, 2430, 1636, 2012, 1317, 1278, 2362, 1307, 1278, 16070, 1044, 1321, 1636, 23067, 1455, 1593, 1395, 1605, 3140, 5152, 1513, 1747, 1046, 2409, 1395, 3140, 5152, 1513, 1278, 2362], "latency": 0.3714473024010658, "logprobs": [-9.358616828918457, -2.7474308013916016, -4.628000259399414, -1.5015846490859985, -0.6537986993789673, -1.6720777750015259, -2.478705883026123, -2.0523874759674072, -2.4486241340637207, -6.257688522338867, -1.4695018529891968, -3.4444499015808105, -4.394474029541016, -3.875497817993164, -2.0133562088012695, -1.8832889795303345, -3.8004486560821533, -6.784910678863525, -0.2949134111404419, -0.9851954579353333, -6.626471519470215, -7.186152458190918, -12.800604820251465, -2.2686400413513184, -3.7816011905670166, -0.4978560209274292, -4.371628284454346, -0.0696188285946846, -0.09487748891115189, -3.2375073432922363, -10.075444221496582, -1.138173222541809, -5.97689151763916, -5.093283653259277, -3.874396324157715, -2.6073620319366455, -3.466899871826172, -5.642228126525879, -1.6154727935791016, -5.416567325592041, -12.158267974853516, -12.610607147216797, -0.09664110094308853, -2.5213418006896973, -1.3747841119766235, -2.8510401248931885, -1.1877963542938232, -0.006288621574640274, -3.382380962371826, -13.207911491394043, -4.477662086486816, -2.5299136638641357, -6.053747653961182, -0.7650555372238159, -0.04903985932469368, -1.5557448863983154, -1.1315535306930542, -5.610307216644287, -0.4059771001338959, -4.961302280426025, -0.5701270699501038, -0.7174267172813416, -2.4735305309295654, -13.610812187194824, -0.09192369878292084, -3.5248732566833496, -1.3797900676727295, -6.429551124572754, -0.541852593421936, -3.5403199195861816, -0.8477706909179688, -1.5764057636260986, -5.343497276306152, -17.19588851928711, -6.635483741760254, -0.8923014402389526, -4.114314556121826, -1.2193646430969238, -2.2128424644470215, -1.7673423290252686, -0.22567729651927948, -9.320298194885254, -0.1282224804162979, -7.3249101638793945, -2.511319875717163, -4.0696563720703125, -3.5427517890930176, -1.9300249814987183, -2.347038507461548, -1.5178614854812622, -2.366441249847412, -1.744020938873291, -1.1570327281951904, -3.0150983333587646, -0.5272141098976135, -0.4669455885887146, -1.7157398462295532, -0.8362292051315308, -0.41491177678108215, -0.9386503100395203, -1.5008316040039062, -0.4635278284549713, -1.6312834024429321, -0.5320357084274292, -1.2249717712402344, -1.1707526445388794, -0.0023814670275896788, -1.1655761003494263, -0.006950841750949621, -0.7309689521789551, -0.7428325414657593, -0.042878177016973495, -0.8572992086410522, -0.01948782242834568, -2.0537290573120117, -1.2817553281784058, -0.8235744833946228]}}
\ No newline at end of file
+{"1": {"input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.", "generated_text": " And then you get to the end of the movie, and you realize that this is not New York at all. This is New York at the end", "generated_tokens": [3060, 2430, 1636, 2012, 1317, 1278, 2362, 1307, 1278, 16070, 1044, 1321, 1636, 23067, 1455, 1593, 1395, 1605, 3140, 5152, 1513, 1747, 1046, 2409, 1395, 3140, 5152, 1513, 1278, 2362], "tpot": [0.6098978519439697, 0.00587167963385582, 0.00553337624296546, 0.005388895981013775, 0.0052880640141665936, 0.005359936039894819, 0.00534518389031291, 0.005303360056132078, 0.0053532798774540424, 0.005232864059507847, 0.0053773121908307076, 0.005341055803000927, 0.0052644480019807816, 0.005387584213167429, 0.005375008098781109, 0.00524944020435214, 0.0053992001339793205, 0.005333151668310165, 0.0052451519295573235, 0.005348992068320513, 0.005396031774580479, 0.0052389120683074, 0.005332960281521082, 0.005230464041233063, 0.005353568121790886, 0.005343679804354906, 0.005257599987089634, 0.005404096096754074, 0.005395135842263699, 0.005260608159005642], "latency": 0.769633749499917, "logprobs": [-9.358616828918457, -2.7474308013916016, -4.628000259399414, -1.5015846490859985, -0.6537986993789673, -1.6720777750015259, -2.478705883026123, -2.0523874759674072, -2.4486241340637207, -6.257688522338867, -1.4695018529891968, -3.4444499015808105, -4.394474029541016, -3.875497817993164, -2.0133562088012695, -1.8832889795303345, -3.8004486560821533, -6.784910678863525, -0.2949134111404419, -0.9851954579353333, -6.626471519470215, -7.186152458190918, -12.800604820251465, -2.2686400413513184, -3.7816011905670166, -0.4978560209274292, -4.371628284454346, -0.0696188285946846, -0.09487748891115189, -3.2375073432922363, -10.075444221496582, -1.138173222541809, -5.97689151763916, -5.093283653259277, -3.874396324157715, -2.6073620319366455, -3.466899871826172, -5.642228126525879, -1.6154727935791016, -5.416567325592041, -12.158267974853516, -12.610607147216797, -0.09664110094308853, -2.5213418006896973, -1.3747841119766235, -2.8510401248931885, -1.1877963542938232, -0.006288621574640274, -3.382380962371826, -13.207911491394043, -4.477662086486816, -2.5299136638641357, -6.053747653961182, -0.7650555372238159, -0.04903985932469368, -1.5557448863983154, -1.1315535306930542, -5.610307216644287, -0.4059771001338959, -4.961302280426025, -0.5701270699501038, -0.7174267172813416, -2.4735305309295654, -13.610812187194824, -0.09192369878292084, -3.5248732566833496, -1.3797900676727295, -6.429551124572754, -0.541852593421936, -3.5403199195861816, -0.8477706909179688, -1.5764057636260986, -5.343497276306152, -17.19588851928711, -6.635483741760254, -0.8923014402389526, -4.114314556121826, -1.2193646430969238, -2.2128424644470215, -1.7673423290252686, -0.22567729651927948, -9.320298194885254, -0.1282224804162979, -7.3249101638793945, -2.511319875717163, -4.0696563720703125, -3.5427517890930176, -1.9300249814987183, -2.347038507461548, -1.5178614854812622, -2.366441249847412, -1.744020938873291, -1.1570327281951904, -3.0150983333587646, -0.5272141098976135, -0.4669455885887146, -1.7157398462295532, -0.8362292051315308, -0.41491177678108215, -0.9386503100395203, -1.5008316040039062, -0.4635278284549713, -1.6312834024429321, -0.5320357084274292, -1.2249717712402344, -1.1707526445388794, -0.0023814670275896788, -1.1655761003494263, -0.006950841750949621, -0.7309689521789551, -0.7428325414657593, -0.042878177016973495, -0.8572992086410522, -0.01948782242834568, -2.0537290573120117, -1.2817553281784058, -0.8235744833946228]}}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_cudagraphs/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_cudagraphs/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index e7bab115f6e..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_cudagraphs/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1 +0,0 @@
-{"1": {"input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.", "generated_text": " And then you get to the end of the movie, and you realize that this is not New York at all. This is New York at the end", "generated_tokens": [3060, 2430, 1636, 2012, 1317, 1278, 2362, 1307, 1278, 16070, 1044, 1321, 1636, 23067, 1455, 1593, 1395, 1605, 3140, 5152, 1513, 1747, 1046, 2409, 1395, 3140, 5152, 1513, 1278, 2362], "tpot": [0.5686635971069336, 0.006066783796995878, 0.00542214373126626, 0.005529535934329033, 0.005290016066282988, 0.005014463793486357, 0.004941120278090239, 0.004862783942371607, 0.004948512185364962, 0.004847776144742966, 0.004972127731889486, 0.0052157118916511536, 0.005366367753595114, 0.0054197758436203, 0.005486688110977411, 0.005352096166461706, 0.005394879728555679, 0.005450463853776455, 0.005347424186766148, 0.005441728048026562, 0.0054066237062215805, 0.0052277762442827225, 0.005518496036529541, 0.005288544110953808, 0.005351583939045668, 0.005274975672364235, 0.0052535682916641235, 0.005358528345823288, 0.00528879975900054, 0.0052247364073991776], "latency": 0.7284151650965214, "logprobs": [-9.358616828918457, -2.7474308013916016, -4.628000259399414, -1.5015846490859985, -0.6537986993789673, -1.6720777750015259, -2.478705883026123, -2.0523874759674072, -2.4486241340637207, -6.257688522338867, -1.4695018529891968, -3.4444499015808105, -4.394474029541016, -3.875497817993164, -2.0133562088012695, -1.8832889795303345, -3.8004486560821533, -6.784910678863525, -0.2949134111404419, -0.9851954579353333, -6.626471519470215, -7.186152458190918, -12.800604820251465, -2.2686400413513184, -3.7816011905670166, -0.4978560209274292, -4.371628284454346, -0.0696188285946846, -0.09487748891115189, -3.2375073432922363, -10.075444221496582, -1.138173222541809, -5.97689151763916, -5.093283653259277, -3.874396324157715, -2.6073620319366455, -3.466899871826172, -5.642228126525879, -1.6154727935791016, -5.416567325592041, -12.158267974853516, -12.610607147216797, -0.09664110094308853, -2.5213418006896973, -1.3747841119766235, -2.8510401248931885, -1.1877963542938232, -0.006288621574640274, -3.382380962371826, -13.207911491394043, -4.477662086486816, -2.5299136638641357, -6.053747653961182, -0.7650555372238159, -0.04903985932469368, -1.5557448863983154, -1.1315535306930542, -5.610307216644287, -0.4059771001338959, -4.961302280426025, -0.5701270699501038, -0.7174267172813416, -2.4735305309295654, -13.610812187194824, -0.09192369878292084, -3.5248732566833496, -1.3797900676727295, -6.429551124572754, -0.541852593421936, -3.5403199195861816, -0.8477706909179688, -1.5764057636260986, -5.343497276306152, -17.19588851928711, -6.635483741760254, -0.8923014402389526, -4.114314556121826, -1.2193646430969238, -2.2128424644470215, -1.7673423290252686, -0.22567729651927948, -9.320298194885254, -0.1282224804162979, -7.3249101638793945, -2.511319875717163, -4.0696563720703125, -3.5427517890930176, -1.9300249814987183, -2.347038507461548, -1.5178614854812622, -2.366441249847412, -1.744020938873291, -1.1570327281951904, -3.0150983333587646, -0.5272141098976135, -0.4669455885887146, -1.7157398462295532, -0.8362292051315308, -0.41491177678108215, -0.9386503100395203, -1.5008316040039062, -0.4635278284549713, -1.6312834024429321, -0.5320357084274292, -1.2249717712402344, -1.1707526445388794, -0.0023814670275896788, -1.1655761003494263, -0.006950841750949621, -0.7309689521789551, -0.7428325414657593, -0.042878177016973495, -0.8572992086410522, -0.01948782242834568, -2.0537290573120117, -1.2817553281784058, -0.8235744833946228]}}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_cudagraphs/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_cudagraphs/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index 18ce65a905f..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_cudagraphs/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1 +0,0 @@
-{"1": {"input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.", "generated_text": " And then you get to the end of the movie, and you realize that this is not New York at all. This is New York at the end", "generated_tokens": [3060, 2430, 1636, 2012, 1317, 1278, 2362, 1307, 1278, 16070, 1044, 1321, 1636, 23067, 1455, 1593, 1395, 1605, 3140, 5152, 1513, 1747, 1046, 2409, 1395, 3140, 5152, 1513, 1278, 2362], "tpot": [0.6098978519439697, 0.00587167963385582, 0.00553337624296546, 0.005388895981013775, 0.0052880640141665936, 0.005359936039894819, 0.00534518389031291, 0.005303360056132078, 0.0053532798774540424, 0.005232864059507847, 0.0053773121908307076, 0.005341055803000927, 0.0052644480019807816, 0.005387584213167429, 0.005375008098781109, 0.00524944020435214, 0.0053992001339793205, 0.005333151668310165, 0.0052451519295573235, 0.005348992068320513, 0.005396031774580479, 0.0052389120683074, 0.005332960281521082, 0.005230464041233063, 0.005353568121790886, 0.005343679804354906, 0.005257599987089634, 0.005404096096754074, 0.005395135842263699, 0.005260608159005642], "latency": 0.769633749499917, "logprobs": [-9.358616828918457, -2.7474308013916016, -4.628000259399414, -1.5015846490859985, -0.6537986993789673, -1.6720777750015259, -2.478705883026123, -2.0523874759674072, -2.4486241340637207, -6.257688522338867, -1.4695018529891968, -3.4444499015808105, -4.394474029541016, -3.875497817993164, -2.0133562088012695, -1.8832889795303345, -3.8004486560821533, -6.784910678863525, -0.2949134111404419, -0.9851954579353333, -6.626471519470215, -7.186152458190918, -12.800604820251465, -2.2686400413513184, -3.7816011905670166, -0.4978560209274292, -4.371628284454346, -0.0696188285946846, -0.09487748891115189, -3.2375073432922363, -10.075444221496582, -1.138173222541809, -5.97689151763916, -5.093283653259277, -3.874396324157715, -2.6073620319366455, -3.466899871826172, -5.642228126525879, -1.6154727935791016, -5.416567325592041, -12.158267974853516, -12.610607147216797, -0.09664110094308853, -2.5213418006896973, -1.3747841119766235, -2.8510401248931885, -1.1877963542938232, -0.006288621574640274, -3.382380962371826, -13.207911491394043, -4.477662086486816, -2.5299136638641357, -6.053747653961182, -0.7650555372238159, -0.04903985932469368, -1.5557448863983154, -1.1315535306930542, -5.610307216644287, -0.4059771001338959, -4.961302280426025, -0.5701270699501038, -0.7174267172813416, -2.4735305309295654, -13.610812187194824, -0.09192369878292084, -3.5248732566833496, -1.3797900676727295, -6.429551124572754, -0.541852593421936, -3.5403199195861816, -0.8477706909179688, -1.5764057636260986, -5.343497276306152, -17.19588851928711, -6.635483741760254, -0.8923014402389526, -4.114314556121826, -1.2193646430969238, -2.2128424644470215, -1.7673423290252686, -0.22567729651927948, -9.320298194885254, -0.1282224804162979, -7.3249101638793945, -2.511319875717163, -4.0696563720703125, -3.5427517890930176, -1.9300249814987183, -2.347038507461548, -1.5178614854812622, -2.366441249847412, -1.744020938873291, -1.1570327281951904, -3.0150983333587646, -0.5272141098976135, -0.4669455885887146, -1.7157398462295532, -0.8362292051315308, -0.41491177678108215, -0.9386503100395203, -1.5008316040039062, -0.4635278284549713, -1.6312834024429321, -0.5320357084274292, -1.2249717712402344, -1.1707526445388794, -0.0023814670275896788, -1.1655761003494263, -0.006950841750949621, -0.7309689521789551, -0.7428325414657593, -0.042878177016973495, -0.8572992086410522, -0.01948782242834568, -2.0537290573120117, -1.2817553281784058, -0.8235744833946228]}}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_fp8_cudagraphs/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_fp8_cudagraphs/golden_values_dev_dgx_h100.json
index 9a4f6e192ea..6a5ace35ec7 100644
--- a/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_fp8_cudagraphs/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_fp8_cudagraphs/golden_values_dev_dgx_h100.json
@@ -1 +1 @@
-{"1": {"input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.", "generated_text": " And then you get to the end of the movie, and you realize that this is not New York at all. This is New York at the end", "generated_tokens": [3060, 2430, 1636, 2012, 1317, 1278, 2362, 1307, 1278, 16070, 1044, 1321, 1636, 23067, 1455, 1593, 1395, 1605, 3140, 5152, 1513, 1747, 1046, 2409, 1395, 3140, 5152, 1513, 1278, 2362], "tpot": [1.5115115642547607, 0.007229087874293327, 0.00665462389588356, 0.0065788161009550095, 0.006567839998751879, 0.006471551954746246, 0.006619679741561413, 0.006431424058973789, 0.006226592231541872, 0.006571744102984667, 0.0064865280874073505, 0.0072165122255682945, 0.007240608334541321, 0.006648031994700432, 0.006431007757782936, 0.006310080178081989, 0.006376704201102257, 0.006450208369642496, 0.006438720040023327, 0.006362047977745533, 0.006522816140204668, 0.006768255960196257, 0.007029919885098934, 0.007154304068535566, 0.007165535818785429, 0.007210432086139917, 0.007157695945352316, 0.007158207707107067, 0.0068295360542833805, 0.006448736414313316], "latency": 1.7110617719590664, "logprobs": [-9.362524032592773, -2.761181354522705, -4.53175163269043, -1.5617105960845947, -0.7528610229492188, -1.6253626346588135, -2.45941162109375, -2.1533684730529785, -2.346475124359131, -6.157411575317383, -1.3193804025650024, -3.5247979164123535, -4.488514423370361, -3.759702682495117, -2.022449493408203, -1.8945543766021729, -3.6219239234924316, -6.842351913452148, -0.3225390613079071, -0.8537865877151489, -6.520284652709961, -7.550463676452637, -12.595708847045898, -2.9504785537719727, -3.8068642616271973, -0.5890476107597351, -4.3587751388549805, -0.0665372759103775, -0.06955777853727341, -3.3523848056793213, -9.773153305053711, -1.0814638137817383, -6.204980850219727, -5.33505392074585, -3.9411606788635254, -2.7358486652374268, -3.2924106121063232, -6.0152740478515625, -1.8116782903671265, -6.243865013122559, -12.158185958862305, -12.65605354309082, -0.08688803017139435, -2.6079092025756836, -1.4071979522705078, -2.990557909011841, -1.2379846572875977, -0.006849618628621101, -3.4119930267333984, -13.05937671661377, -4.2840399742126465, -2.4802193641662598, -5.933547019958496, -0.9116124510765076, -0.060975510627031326, -1.5681536197662354, -1.0339949131011963, -5.617187023162842, -0.41873589158058167, -4.9402852058410645, -0.5690340995788574, -0.6301103830337524, -2.396580696105957, -13.29629898071289, -0.08181379735469818, -3.6629719734191895, -1.105454683303833, -6.127413749694824, -0.5906393527984619, -3.548814296722412, -0.9948520660400391, -1.5058085918426514, -5.211822509765625, -17.489606857299805, -6.8240861892700195, -0.9539748430252075, -4.2172040939331055, -1.1572864055633545, -2.3540186882019043, -1.798780918121338, -0.2533280849456787, -9.403679847717285, -0.1830129772424698, -7.440906524658203, -2.228740692138672, -4.196046352386475, -3.5180575847625732, -1.9530653953552246, -2.2825613021850586, -1.5544131994247437, -2.3991782665252686, -1.554469347000122, -1.290938377380371, -2.785543203353882, -0.6400948166847229, -0.48503541946411133, -1.432410478591919, -0.9366894960403442, -0.42669478058815, -0.9688448905944824, -1.4787911176681519, -0.43357178568840027, -1.8381303548812866, -0.6210520267486572, -1.0601571798324585, -1.1962573528289795, -0.002758747199550271, -1.2365548610687256, -0.008277395740151405, -0.7464911341667175, -0.8628943562507629, -0.0671280175447464, -0.953361988067627, -0.02595982328057289, -2.139401435852051, -1.1942673921585083, -0.7968283295631409]}}
\ No newline at end of file
+{"1": {"input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.", "generated_text": " And then you get to the end of the movie, and you realize that this is not New York at all. This is New York at the end", "generated_tokens": [3060, 2430, 1636, 2012, 1317, 1278, 2362, 1307, 1278, 16070, 1044, 1321, 1636, 23067, 1455, 1593, 1395, 1605, 3140, 5152, 1513, 1747, 1046, 2409, 1395, 3140, 5152, 1513, 1278, 2362], "tpot": [0.6358857750892639, 0.009907487779855728, 0.010546143166720867, 0.009435135871171951, 0.010123520158231258, 0.009925439953804016, 0.008350367657840252, 0.008556703105568886, 0.008582624606788158, 0.00840403139591217, 0.008557791821658611, 0.008503519929945469, 0.008379808627068996, 0.009403808042407036, 0.009133151732385159, 0.008321152068674564, 0.008845727890729904, 0.008372415788471699, 0.008591103367507458, 0.009211359545588493, 0.009166751988232136, 0.009767616167664528, 0.008620256558060646, 0.009338144212961197, 0.010125535540282726, 0.010068127885460854, 0.009669983759522438, 0.010439807549118996, 0.010279008187353611, 0.0103340158239007], "latency": 0.9097336048725992, "logprobs": [-9.362524032592773, -2.761181354522705, -4.53175163269043, -1.5617105960845947, -0.7528610229492188, -1.6253626346588135, -2.45941162109375, -2.1533684730529785, -2.346475124359131, -6.157411575317383, -1.3193804025650024, -3.5247979164123535, -4.488514423370361, -3.759702682495117, -2.022449493408203, -1.8945543766021729, -3.6219239234924316, -6.842351913452148, -0.3225390613079071, -0.8537865877151489, -6.520284652709961, -7.550463676452637, -12.595708847045898, -2.9504785537719727, -3.8068642616271973, -0.5890476107597351, -4.3587751388549805, -0.0665372759103775, -0.06955777853727341, -3.3523848056793213, -9.773153305053711, -1.0814638137817383, -6.204980850219727, -5.33505392074585, -3.9411606788635254, -2.7358486652374268, -3.2924106121063232, -6.0152740478515625, -1.8116782903671265, -6.243865013122559, -12.158185958862305, -12.65605354309082, -0.08688803017139435, -2.6079092025756836, -1.4071979522705078, -2.990557909011841, -1.2379846572875977, -0.006849618628621101, -3.4119930267333984, -13.05937671661377, -4.2840399742126465, -2.4802193641662598, -5.933547019958496, -0.9116124510765076, -0.060975510627031326, -1.5681536197662354, -1.0339949131011963, -5.617187023162842, -0.41873589158058167, -4.9402852058410645, -0.5690340995788574, -0.6301103830337524, -2.396580696105957, -13.29629898071289, -0.08181379735469818, -3.6629719734191895, -1.105454683303833, -6.127413749694824, -0.5906393527984619, -3.548814296722412, -0.9948520660400391, -1.5058085918426514, -5.211822509765625, -17.489606857299805, -6.8240861892700195, -0.9539748430252075, -4.2172040939331055, -1.1572864055633545, -2.3540186882019043, -1.798780918121338, -0.2533280849456787, -9.403679847717285, -0.1830129772424698, -7.440906524658203, -2.228740692138672, -4.196046352386475, -3.5180575847625732, -1.9530653953552246, -2.2825613021850586, -1.5544131994247437, -2.3991782665252686, -1.554469347000122, -1.290938377380371, -2.785543203353882, -0.6400948166847229, -0.48503541946411133, -1.432410478591919, -0.9366894960403442, -0.42669478058815, -0.9688448905944824, -1.4787911176681519, -0.43357178568840027, -1.8381303548812866, -0.6210520267486572, -1.0601571798324585, -1.1962573528289795, -0.002758747199550271, -1.2365548610687256, -0.008277395740151405, -0.7464911341667175, -0.8628943562507629, -0.0671280175447464, -0.953361988067627, -0.02595982328057289, -2.139401435852051, -1.1942673921585083, -0.7968283295631409]}}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_fp8_cudagraphs/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_fp8_cudagraphs/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 05e16225cd4..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_fp8_cudagraphs/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1 +0,0 @@
-{"1": {"input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.", "generated_text": " And then you get to the end of the movie, and you realize that this is not New York at all. This is New York at the end", "generated_tokens": [3060, 2430, 1636, 2012, 1317, 1278, 2362, 1307, 1278, 16070, 1044, 1321, 1636, 23067, 1455, 1593, 1395, 1605, 3140, 5152, 1513, 1747, 1046, 2409, 1395, 3140, 5152, 1513, 1278, 2362], "tpot": [0.561271607875824, 0.010015103965997696, 0.008491167798638344, 0.007847008295357227, 0.007853696122765541, 0.007908639498054981, 0.0077699837274849415, 0.007929407991468906, 0.007948416285216808, 0.008069856092333794, 0.008628063835203648, 0.00827731192111969, 0.007847904227674007, 0.007874688133597374, 0.008285152725875378, 0.008413120172917843, 0.008548031561076641, 0.008463519625365734, 0.008221376687288284, 0.008037183433771133, 0.007799903862178326, 0.007931231521070004, 0.008392063900828362, 0.008282655850052834, 0.00781238405033946, 0.007775456178933382, 0.007549664005637169, 0.00783606432378292, 0.00781475193798542, 0.00798182375729084], "latency": 0.8031206205487251, "logprobs": [-9.362524032592773, -2.761181354522705, -4.53175163269043, -1.5617105960845947, -0.7528610229492188, -1.6253626346588135, -2.45941162109375, -2.1533684730529785, -2.346475124359131, -6.157411575317383, -1.3193804025650024, -3.5247979164123535, -4.488514423370361, -3.759702682495117, -2.022449493408203, -1.8945543766021729, -3.6219239234924316, -6.842351913452148, -0.3225390613079071, -0.8537865877151489, -6.520284652709961, -7.550463676452637, -12.595708847045898, -2.9504785537719727, -3.8068642616271973, -0.5890476107597351, -4.3587751388549805, -0.0665372759103775, -0.06955777853727341, -3.3523848056793213, -9.773153305053711, -1.0814638137817383, -6.204980850219727, -5.33505392074585, -3.9411606788635254, -2.7358486652374268, -3.2924106121063232, -6.0152740478515625, -1.8116782903671265, -6.243865013122559, -12.158185958862305, -12.65605354309082, -0.08688803017139435, -2.6079092025756836, -1.4071979522705078, -2.990557909011841, -1.2379846572875977, -0.006849618628621101, -3.4119930267333984, -13.05937671661377, -4.2840399742126465, -2.4802193641662598, -5.933547019958496, -0.9116124510765076, -0.060975510627031326, -1.5681536197662354, -1.0339949131011963, -5.617187023162842, -0.41873589158058167, -4.9402852058410645, -0.5690340995788574, -0.6301103830337524, -2.396580696105957, -13.29629898071289, -0.08181379735469818, -3.6629719734191895, -1.105454683303833, -6.127413749694824, -0.5906393527984619, -3.548814296722412, -0.9948520660400391, -1.5058085918426514, -5.211822509765625, -17.489606857299805, -6.8240861892700195, -0.9539748430252075, -4.2172040939331055, -1.1572864055633545, -2.3540186882019043, -1.798780918121338, -0.2533280849456787, -9.403679847717285, -0.1830129772424698, -7.440906524658203, -2.228740692138672, -4.196046352386475, -3.5180575847625732, -1.9530653953552246, -2.2825613021850586, -1.5544131994247437, -2.3991782665252686, -1.554469347000122, -1.290938377380371, -2.785543203353882, -0.6400948166847229, -0.48503541946411133, -1.432410478591919, -0.9366894960403442, -0.42669478058815, -0.9688448905944824, -1.4787911176681519, -0.43357178568840027, -1.8381303548812866, -0.6210520267486572, -1.0601571798324585, -1.1962573528289795, -0.002758747199550271, -1.2365548610687256, -0.008277395740151405, -0.7464911341667175, -0.8628943562507629, -0.0671280175447464, -0.953361988067627, -0.02595982328057289, -2.139401435852051, -1.1942673921585083, -0.7968283295631409]}}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_fp8_cudagraphs/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_fp8_cudagraphs/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index 6a5ace35ec7..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_fp8_cudagraphs/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1 +0,0 @@
-{"1": {"input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.", "generated_text": " And then you get to the end of the movie, and you realize that this is not New York at all. This is New York at the end", "generated_tokens": [3060, 2430, 1636, 2012, 1317, 1278, 2362, 1307, 1278, 16070, 1044, 1321, 1636, 23067, 1455, 1593, 1395, 1605, 3140, 5152, 1513, 1747, 1046, 2409, 1395, 3140, 5152, 1513, 1278, 2362], "tpot": [0.6358857750892639, 0.009907487779855728, 0.010546143166720867, 0.009435135871171951, 0.010123520158231258, 0.009925439953804016, 0.008350367657840252, 0.008556703105568886, 0.008582624606788158, 0.00840403139591217, 0.008557791821658611, 0.008503519929945469, 0.008379808627068996, 0.009403808042407036, 0.009133151732385159, 0.008321152068674564, 0.008845727890729904, 0.008372415788471699, 0.008591103367507458, 0.009211359545588493, 0.009166751988232136, 0.009767616167664528, 0.008620256558060646, 0.009338144212961197, 0.010125535540282726, 0.010068127885460854, 0.009669983759522438, 0.010439807549118996, 0.010279008187353611, 0.0103340158239007], "latency": 0.9097336048725992, "logprobs": [-9.362524032592773, -2.761181354522705, -4.53175163269043, -1.5617105960845947, -0.7528610229492188, -1.6253626346588135, -2.45941162109375, -2.1533684730529785, -2.346475124359131, -6.157411575317383, -1.3193804025650024, -3.5247979164123535, -4.488514423370361, -3.759702682495117, -2.022449493408203, -1.8945543766021729, -3.6219239234924316, -6.842351913452148, -0.3225390613079071, -0.8537865877151489, -6.520284652709961, -7.550463676452637, -12.595708847045898, -2.9504785537719727, -3.8068642616271973, -0.5890476107597351, -4.3587751388549805, -0.0665372759103775, -0.06955777853727341, -3.3523848056793213, -9.773153305053711, -1.0814638137817383, -6.204980850219727, -5.33505392074585, -3.9411606788635254, -2.7358486652374268, -3.2924106121063232, -6.0152740478515625, -1.8116782903671265, -6.243865013122559, -12.158185958862305, -12.65605354309082, -0.08688803017139435, -2.6079092025756836, -1.4071979522705078, -2.990557909011841, -1.2379846572875977, -0.006849618628621101, -3.4119930267333984, -13.05937671661377, -4.2840399742126465, -2.4802193641662598, -5.933547019958496, -0.9116124510765076, -0.060975510627031326, -1.5681536197662354, -1.0339949131011963, -5.617187023162842, -0.41873589158058167, -4.9402852058410645, -0.5690340995788574, -0.6301103830337524, -2.396580696105957, -13.29629898071289, -0.08181379735469818, -3.6629719734191895, -1.105454683303833, -6.127413749694824, -0.5906393527984619, -3.548814296722412, -0.9948520660400391, -1.5058085918426514, -5.211822509765625, -17.489606857299805, -6.8240861892700195, -0.9539748430252075, -4.2172040939331055, -1.1572864055633545, -2.3540186882019043, -1.798780918121338, -0.2533280849456787, -9.403679847717285, -0.1830129772424698, -7.440906524658203, -2.228740692138672, -4.196046352386475, -3.5180575847625732, -1.9530653953552246, -2.2825613021850586, -1.5544131994247437, -2.3991782665252686, -1.554469347000122, -1.290938377380371, -2.785543203353882, -0.6400948166847229, -0.48503541946411133, -1.432410478591919, -0.9366894960403442, -0.42669478058815, -0.9688448905944824, -1.4787911176681519, -0.43357178568840027, -1.8381303548812866, -0.6210520267486572, -1.0601571798324585, -1.1962573528289795, -0.002758747199550271, -1.2365548610687256, -0.008277395740151405, -0.7464911341667175, -0.8628943562507629, -0.0671280175447464, -0.953361988067627, -0.02595982328057289, -2.139401435852051, -1.1942673921585083, -0.7968283295631409]}}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_logitsmatch/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_logitsmatch/golden_values_dev_dgx_h100.json
index 42791eb0343..a4b870809ba 100644
--- a/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_logitsmatch/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_logitsmatch/golden_values_dev_dgx_h100.json
@@ -1 +1 @@
-{"0": {"input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.", "generated_text": " And then you get to the end of the movie, and you realize that this is not New York at all. This is New York at the end", "generated_tokens": [3060, 2430, 1636, 2012, 1317, 1278, 2362, 1307, 1278, 16070, 1044, 1321, 1636, 23067, 1455, 1593, 1395, 1605, 3140, 5152, 1513, 1747, 1046, 2409, 1395, 3140, 5152, 1513, 1278, 2362], "latency": 2.9242165591567755, "logprobs": [-9.358616828918457, -2.7474308013916016, -4.628000259399414, -1.5015846490859985, -0.6537986993789673, -1.6720777750015259, -2.478705883026123, -2.0523874759674072, -2.4486241340637207, -6.257688522338867, -1.4695018529891968, -3.4444499015808105, -4.394474029541016, -3.875497817993164, -2.0133562088012695, -1.8832889795303345, -3.8004486560821533, -6.784910678863525, -0.2949134111404419, -0.9851954579353333, -6.626471519470215, -7.186152458190918, -12.800604820251465, -2.2686400413513184, -3.7816011905670166, -0.4978560209274292, -4.371628284454346, -0.0696188285946846, -0.09487748891115189, -3.2375073432922363, -10.075444221496582, -1.138173222541809, -5.97689151763916, -5.093283653259277, -3.874396324157715, -2.6073620319366455, -3.466899871826172, -5.642228126525879, -1.6154727935791016, -5.416567325592041, -12.158267974853516, -12.610607147216797, -0.09664110094308853, -2.5213418006896973, -1.3747841119766235, -2.8510401248931885, -1.1877963542938232, -0.006288621574640274, -3.382380962371826, -13.207911491394043, -4.477662086486816, -2.5299136638641357, -6.053747653961182, -0.7650555372238159, -0.04903985932469368, -1.5557448863983154, -1.1315535306930542, -5.610307216644287, -0.4059771001338959, -4.961302280426025, -0.5701270699501038, -0.7174267172813416, -2.4735305309295654, -13.610812187194824, -0.09192369878292084, -3.5248732566833496, -1.3797900676727295, -6.429551124572754, -0.541852593421936, -3.5403199195861816, -0.8477706909179688, -1.5764057636260986, -5.343497276306152, -17.19588851928711, -6.635483741760254, -0.8923014402389526, -4.114314556121826, -1.2193646430969238, -2.2128424644470215, -1.7673423290252686, -0.22567729651927948, -9.320298194885254, -0.1282224804162979, -7.3249101638793945, -2.511319875717163, -4.0696563720703125, -3.5427517890930176, -1.9300249814987183, -2.347038507461548, -1.5178614854812622, -2.366441249847412, -1.744020938873291, -1.1570327281951904, -3.0150983333587646, -0.5272141098976135, -0.4669455885887146, -1.7157398462295532, -0.8362292051315308, -0.41491177678108215, -0.9386503100395203, -1.5008316040039062, -0.4635278284549713, -1.6312834024429321, -0.5320357084274292, -1.2249717712402344, -1.1707526445388794, -0.0023814670275896788, -1.1655761003494263, -0.006950841750949621, -0.7309689521789551, -0.7428325414657593, -0.042878177016973495, -0.8572992086410522, -0.01948782242834568, -2.0537290573120117, -1.2817553281784058, -0.8235744833946228]}}
\ No newline at end of file
+{"0": {"input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.", "generated_text": " And then you get to the end of the movie, and you realize that this is not New York at all. This is New York at the end", "generated_tokens": [3060, 2430, 1636, 2012, 1317, 1278, 2362, 1307, 1278, 16070, 1044, 1321, 1636, 23067, 1455, 1593, 1395, 1605, 3140, 5152, 1513, 1747, 1046, 2409, 1395, 3140, 5152, 1513, 1278, 2362], "tpot": [2.2565205097198486, 0.3516305685043335, 0.01722889579832554, 0.018507104367017746, 0.01656815968453884, 0.016881439834833145, 0.0166244488209486, 0.01648310385644436, 0.016350112855434418, 0.018141599372029305, 0.01638089492917061, 0.016720257699489594, 0.01646953634917736, 0.01641814410686493, 0.016365855932235718, 0.018089760094881058, 0.016283327713608742, 0.01690729521214962, 0.019018815830349922, 0.01721513643860817, 0.01676982268691063, 0.018497919663786888, 0.016406463459134102, 0.01895606331527233, 0.018566368147730827, 0.017292767763137817, 0.02004953660070896, 0.0188816636800766, 0.019935935735702515, 0.019367488101124763], "latency": 3.115501318126917, "logprobs": [-9.358616828918457, -2.7474308013916016, -4.628000259399414, -1.5015846490859985, -0.6537986993789673, -1.6720777750015259, -2.478705883026123, -2.0523874759674072, -2.4486241340637207, -6.257688522338867, -1.4695018529891968, -3.4444499015808105, -4.394474029541016, -3.875497817993164, -2.0133562088012695, -1.8832889795303345, -3.8004486560821533, -6.784910678863525, -0.2949134111404419, -0.9851954579353333, -6.626471519470215, -7.186152458190918, -12.800604820251465, -2.2686400413513184, -3.7816011905670166, -0.4978560209274292, -4.371628284454346, -0.0696188285946846, -0.09487748891115189, -3.2375073432922363, -10.075444221496582, -1.138173222541809, -5.97689151763916, -5.093283653259277, -3.874396324157715, -2.6073620319366455, -3.466899871826172, -5.642228126525879, -1.6154727935791016, -5.416567325592041, -12.158267974853516, -12.610607147216797, -0.09664110094308853, -2.5213418006896973, -1.3747841119766235, -2.8510401248931885, -1.1877963542938232, -0.006288621574640274, -3.382380962371826, -13.207911491394043, -4.477662086486816, -2.5299136638641357, -6.053747653961182, -0.7650555372238159, -0.04903985932469368, -1.5557448863983154, -1.1315535306930542, -5.610307216644287, -0.4059771001338959, -4.961302280426025, -0.5701270699501038, -0.7174267172813416, -2.4735305309295654, -13.610812187194824, -0.09192369878292084, -3.5248732566833496, -1.3797900676727295, -6.429551124572754, -0.541852593421936, -3.5403199195861816, -0.8477706909179688, -1.5764057636260986, -5.343497276306152, -17.19588851928711, -6.635483741760254, -0.8923014402389526, -4.114314556121826, -1.2193646430969238, -2.2128424644470215, -1.7673423290252686, -0.22567729651927948, -9.320298194885254, -0.1282224804162979, -7.3249101638793945, -2.511319875717163, -4.0696563720703125, -3.5427517890930176, -1.9300249814987183, -2.347038507461548, -1.5178614854812622, -2.366441249847412, -1.744020938873291, -1.1570327281951904, -3.0150983333587646, -0.5272141098976135, -0.4669455885887146, -1.7157398462295532, -0.8362292051315308, -0.41491177678108215, -0.9386503100395203, -1.5008316040039062, -0.4635278284549713, -1.6312834024429321, -0.5320357084274292, -1.2249717712402344, -1.1707526445388794, -0.0023814670275896788, -1.1655761003494263, -0.006950841750949621, -0.7309689521789551, -0.7428325414657593, -0.042878177016973495, -0.8572992086410522, -0.01948782242834568, -2.0537290573120117, -1.2817553281784058, -0.8235744833946228]}}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_logitsmatch/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_logitsmatch/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index f37c35812e5..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_logitsmatch/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1 +0,0 @@
-{"0": {"input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.", "generated_text": " And then you get to the end of the movie, and you realize that this is not New York at all. This is New York at the end", "generated_tokens": [3060, 2430, 1636, 2012, 1317, 1278, 2362, 1307, 1278, 16070, 1044, 1321, 1636, 23067, 1455, 1593, 1395, 1605, 3140, 5152, 1513, 1747, 1046, 2409, 1395, 3140, 5152, 1513, 1278, 2362], "tpot": [2.1197516918182373, 0.3172459900379181, 0.016708193346858025, 0.015786752104759216, 0.015607455745339394, 0.015449312515556812, 0.015446463599801064, 0.015455200336873531, 0.015508351847529411, 0.016473280265927315, 0.015467967838048935, 0.015407584607601166, 0.015393920242786407, 0.015441760420799255, 0.015666943043470383, 0.015604863874614239, 0.015388128347694874, 0.015523936599493027, 0.015425760298967361, 0.016386207193136215, 0.016847264021635056, 0.016578560695052147, 0.016409022733569145, 0.016199840232729912, 0.015789279714226723, 0.015486880205571651, 0.01539977639913559, 0.016956929117441177, 0.016581375151872635, 0.01746956817805767], "latency": 2.903888032771647, "logprobs": [-9.358616828918457, -2.7474308013916016, -4.628000259399414, -1.5015846490859985, -0.6537986993789673, -1.6720777750015259, -2.478705883026123, -2.0523874759674072, -2.4486241340637207, -6.257688522338867, -1.4695018529891968, -3.4444499015808105, -4.394474029541016, -3.875497817993164, -2.0133562088012695, -1.8832889795303345, -3.8004486560821533, -6.784910678863525, -0.2949134111404419, -0.9851954579353333, -6.626471519470215, -7.186152458190918, -12.800604820251465, -2.2686400413513184, -3.7816011905670166, -0.4978560209274292, -4.371628284454346, -0.0696188285946846, -0.09487748891115189, -3.2375073432922363, -10.075444221496582, -1.138173222541809, -5.97689151763916, -5.093283653259277, -3.874396324157715, -2.6073620319366455, -3.466899871826172, -5.642228126525879, -1.6154727935791016, -5.416567325592041, -12.158267974853516, -12.610607147216797, -0.09664110094308853, -2.5213418006896973, -1.3747841119766235, -2.8510401248931885, -1.1877963542938232, -0.006288621574640274, -3.382380962371826, -13.207911491394043, -4.477662086486816, -2.5299136638641357, -6.053747653961182, -0.7650555372238159, -0.04903985932469368, -1.5557448863983154, -1.1315535306930542, -5.610307216644287, -0.4059771001338959, -4.961302280426025, -0.5701270699501038, -0.7174267172813416, -2.4735305309295654, -13.610812187194824, -0.09192369878292084, -3.5248732566833496, -1.3797900676727295, -6.429551124572754, -0.541852593421936, -3.5403199195861816, -0.8477706909179688, -1.5764057636260986, -5.343497276306152, -17.19588851928711, -6.635483741760254, -0.8923014402389526, -4.114314556121826, -1.2193646430969238, -2.2128424644470215, -1.7673423290252686, -0.22567729651927948, -9.320298194885254, -0.1282224804162979, -7.3249101638793945, -2.511319875717163, -4.0696563720703125, -3.5427517890930176, -1.9300249814987183, -2.347038507461548, -1.5178614854812622, -2.366441249847412, -1.744020938873291, -1.1570327281951904, -3.0150983333587646, -0.5272141098976135, -0.4669455885887146, -1.7157398462295532, -0.8362292051315308, -0.41491177678108215, -0.9386503100395203, -1.5008316040039062, -0.4635278284549713, -1.6312834024429321, -0.5320357084274292, -1.2249717712402344, -1.1707526445388794, -0.0023814670275896788, -1.1655761003494263, -0.006950841750949621, -0.7309689521789551, -0.7428325414657593, -0.042878177016973495, -0.8572992086410522, -0.01948782242834568, -2.0537290573120117, -1.2817553281784058, -0.8235744833946228]}}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_logitsmatch/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_logitsmatch/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index a4b870809ba..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_logitsmatch/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1 +0,0 @@
-{"0": {"input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.", "generated_text": " And then you get to the end of the movie, and you realize that this is not New York at all. This is New York at the end", "generated_tokens": [3060, 2430, 1636, 2012, 1317, 1278, 2362, 1307, 1278, 16070, 1044, 1321, 1636, 23067, 1455, 1593, 1395, 1605, 3140, 5152, 1513, 1747, 1046, 2409, 1395, 3140, 5152, 1513, 1278, 2362], "tpot": [2.2565205097198486, 0.3516305685043335, 0.01722889579832554, 0.018507104367017746, 0.01656815968453884, 0.016881439834833145, 0.0166244488209486, 0.01648310385644436, 0.016350112855434418, 0.018141599372029305, 0.01638089492917061, 0.016720257699489594, 0.01646953634917736, 0.01641814410686493, 0.016365855932235718, 0.018089760094881058, 0.016283327713608742, 0.01690729521214962, 0.019018815830349922, 0.01721513643860817, 0.01676982268691063, 0.018497919663786888, 0.016406463459134102, 0.01895606331527233, 0.018566368147730827, 0.017292767763137817, 0.02004953660070896, 0.0188816636800766, 0.019935935735702515, 0.019367488101124763], "latency": 3.115501318126917, "logprobs": [-9.358616828918457, -2.7474308013916016, -4.628000259399414, -1.5015846490859985, -0.6537986993789673, -1.6720777750015259, -2.478705883026123, -2.0523874759674072, -2.4486241340637207, -6.257688522338867, -1.4695018529891968, -3.4444499015808105, -4.394474029541016, -3.875497817993164, -2.0133562088012695, -1.8832889795303345, -3.8004486560821533, -6.784910678863525, -0.2949134111404419, -0.9851954579353333, -6.626471519470215, -7.186152458190918, -12.800604820251465, -2.2686400413513184, -3.7816011905670166, -0.4978560209274292, -4.371628284454346, -0.0696188285946846, -0.09487748891115189, -3.2375073432922363, -10.075444221496582, -1.138173222541809, -5.97689151763916, -5.093283653259277, -3.874396324157715, -2.6073620319366455, -3.466899871826172, -5.642228126525879, -1.6154727935791016, -5.416567325592041, -12.158267974853516, -12.610607147216797, -0.09664110094308853, -2.5213418006896973, -1.3747841119766235, -2.8510401248931885, -1.1877963542938232, -0.006288621574640274, -3.382380962371826, -13.207911491394043, -4.477662086486816, -2.5299136638641357, -6.053747653961182, -0.7650555372238159, -0.04903985932469368, -1.5557448863983154, -1.1315535306930542, -5.610307216644287, -0.4059771001338959, -4.961302280426025, -0.5701270699501038, -0.7174267172813416, -2.4735305309295654, -13.610812187194824, -0.09192369878292084, -3.5248732566833496, -1.3797900676727295, -6.429551124572754, -0.541852593421936, -3.5403199195861816, -0.8477706909179688, -1.5764057636260986, -5.343497276306152, -17.19588851928711, -6.635483741760254, -0.8923014402389526, -4.114314556121826, -1.2193646430969238, -2.2128424644470215, -1.7673423290252686, -0.22567729651927948, -9.320298194885254, -0.1282224804162979, -7.3249101638793945, -2.511319875717163, -4.0696563720703125, -3.5427517890930176, -1.9300249814987183, -2.347038507461548, -1.5178614854812622, -2.366441249847412, -1.744020938873291, -1.1570327281951904, -3.0150983333587646, -0.5272141098976135, -0.4669455885887146, -1.7157398462295532, -0.8362292051315308, -0.41491177678108215, -0.9386503100395203, -1.5008316040039062, -0.4635278284549713, -1.6312834024429321, -0.5320357084274292, -1.2249717712402344, -1.1707526445388794, -0.0023814670275896788, -1.1655761003494263, -0.006950841750949621, -0.7309689521789551, -0.7428325414657593, -0.042878177016973495, -0.8572992086410522, -0.01948782242834568, -2.0537290573120117, -1.2817553281784058, -0.8235744833946228]}}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp1_cp1_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp1_cp1_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
index f9b98f41237..951506c1571 100644
--- a/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp1_cp1_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp1_cp1_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
@@ -4,56 +4,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 10.97434,
-            "2": 10.976,
-            "3": 10.9787,
-            "4": 10.95784,
-            "5": 11.00373,
-            "6": 11.00618,
-            "7": 10.97996,
-            "8": 10.96861,
-            "9": 10.97919,
+            "1": 10.97443,
+            "2": 10.97602,
+            "3": 10.97873,
+            "4": 10.95791,
+            "5": 11.00372,
+            "6": 11.00622,
+            "7": 10.97989,
+            "8": 10.96858,
+            "9": 10.97927,
             "10": 10.95244,
-            "11": 10.99935,
+            "11": 10.99932,
             "12": 10.96821,
-            "13": 10.96591,
-            "14": 10.99543,
-            "15": 10.85545,
+            "13": 10.96575,
+            "14": 10.99547,
+            "15": 10.85548,
             "16": 10.85544,
-            "17": 10.81736,
-            "18": 10.82741,
-            "19": 10.82166,
-            "20": 10.64041,
-            "21": 10.57938,
-            "22": 10.33552,
-            "23": 10.61311,
-            "24": 10.34969,
-            "25": 10.25934,
-            "26": 10.36367,
-            "27": 10.38735,
-            "28": 10.35703,
-            "29": 10.38231,
-            "30": 9.91506,
-            "31": 9.47491,
-            "32": 10.08956,
-            "33": 10.08418,
-            "34": 9.65437,
-            "35": 9.70727,
-            "36": 9.58843,
-            "37": 9.82211,
-            "38": 9.53615,
-            "39": 9.94103,
-            "40": 9.34234,
-            "41": 9.48854,
-            "42": 9.56996,
-            "43": 9.0355,
+            "17": 10.81733,
+            "18": 10.82754,
+            "19": 10.82177,
+            "20": 10.64038,
+            "21": 10.57929,
+            "22": 10.33542,
+            "23": 10.613,
+            "24": 10.3496,
+            "25": 10.2592,
+            "26": 10.36373,
+            "27": 10.38741,
+            "28": 10.35692,
+            "29": 10.38238,
+            "30": 9.91509,
+            "31": 9.47482,
+            "32": 10.0895,
+            "33": 10.08422,
+            "34": 9.65429,
+            "35": 9.70734,
+            "36": 9.58844,
+            "37": 9.82215,
+            "38": 9.53607,
+            "39": 9.94104,
+            "40": 9.3422,
+            "41": 9.48847,
+            "42": 9.56993,
+            "43": 9.03549,
             "44": 9.15623,
-            "45": 9.00188,
-            "46": 9.06394,
-            "47": 9.49292,
-            "48": 9.04259,
-            "49": 8.58802,
-            "50": 9.12597
+            "45": 9.00183,
+            "46": 9.06402,
+            "47": 9.49291,
+            "48": 9.04257,
+            "49": 8.58806,
+            "50": 9.12599
         }
     },
     "num-zeros": {
@@ -61,56 +61,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 20919.0,
-            "2": 21891.0,
-            "3": 21096.0,
-            "4": 20712.0,
-            "5": 23549.0,
-            "6": 24113.0,
-            "7": 23323.0,
-            "8": 21849.0,
-            "9": 22954.0,
-            "10": 19196.0,
-            "11": 24647.0,
-            "12": 23707.0,
-            "13": 24320.0,
-            "14": 24596.0,
-            "15": 23689.0,
-            "16": 23647.0,
-            "17": 22594.0,
-            "18": 22957.0,
-            "19": 23469.0,
-            "20": 21794.0,
-            "21": 22831.0,
-            "22": 19274.0,
-            "23": 24548.0,
-            "24": 19712.0,
-            "25": 19775.0,
-            "26": 21249.0,
-            "27": 22519.0,
-            "28": 23834.0,
-            "29": 23280.0,
-            "30": 20509.0,
-            "31": 17408.0,
-            "32": 21974.0,
-            "33": 22884.0,
-            "34": 21870.0,
-            "35": 22283.0,
-            "36": 21004.0,
-            "37": 22759.0,
-            "38": 22719.0,
-            "39": 22051.0,
-            "40": 23748.0,
-            "41": 24092.0,
-            "42": 23517.0,
-            "43": 22267.0,
-            "44": 22001.0,
-            "45": 21520.0,
-            "46": 22824.0,
-            "47": 25650.0,
-            "48": 25468.0,
-            "49": 25463.0,
-            "50": 28240.0
+            "1": 21181.0,
+            "2": 22037.0,
+            "3": 21249.0,
+            "4": 20277.0,
+            "5": 23590.0,
+            "6": 24135.0,
+            "7": 23650.0,
+            "8": 21651.0,
+            "9": 22980.0,
+            "10": 19092.0,
+            "11": 25008.0,
+            "12": 23782.0,
+            "13": 24367.0,
+            "14": 24697.0,
+            "15": 23602.0,
+            "16": 23837.0,
+            "17": 22509.0,
+            "18": 22645.0,
+            "19": 23485.0,
+            "20": 21887.0,
+            "21": 22872.0,
+            "22": 19313.0,
+            "23": 24389.0,
+            "24": 19718.0,
+            "25": 19814.0,
+            "26": 21274.0,
+            "27": 22560.0,
+            "28": 23731.0,
+            "29": 23099.0,
+            "30": 19997.0,
+            "31": 17111.0,
+            "32": 22093.0,
+            "33": 23200.0,
+            "34": 21525.0,
+            "35": 21837.0,
+            "36": 21070.0,
+            "37": 22975.0,
+            "38": 22727.0,
+            "39": 22485.0,
+            "40": 23583.0,
+            "41": 24012.0,
+            "42": 23529.0,
+            "43": 22092.0,
+            "44": 21911.0,
+            "45": 21790.0,
+            "46": 23173.0,
+            "47": 25505.0,
+            "48": 25316.0,
+            "49": 25527.0,
+            "50": 28117.0
         }
     },
     "mem-allocated-bytes": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 75.46828,
-            "2": 0.20357,
-            "3": 0.19791,
-            "4": 0.20172,
-            "5": 0.17347,
-            "6": 0.17767,
-            "7": 0.18123,
-            "8": 0.18059,
-            "9": 0.18281,
-            "10": 0.17733,
-            "11": 1.43978,
-            "12": 0.16875,
-            "13": 0.17029,
-            "14": 0.16961,
-            "15": 0.16995,
-            "16": 0.16814,
-            "17": 0.16932,
-            "18": 0.16845,
-            "19": 0.16867,
-            "20": 0.1725,
-            "21": 1.37727,
-            "22": 0.16984,
-            "23": 0.16887,
-            "24": 0.17009,
-            "25": 0.17014,
-            "26": 0.16727,
-            "27": 0.16686,
-            "28": 0.16832,
-            "29": 0.16702,
-            "30": 0.17035,
-            "31": 1.37603,
-            "32": 0.17102,
-            "33": 0.16863,
-            "34": 0.17081,
-            "35": 0.17287,
-            "36": 0.1713,
-            "37": 0.17386,
-            "38": 0.16722,
-            "39": 0.17073,
-            "40": 0.17394,
-            "41": 1.39311,
-            "42": 0.17219,
-            "43": 0.1735,
-            "44": 0.18156,
-            "45": 0.17372,
-            "46": 0.17432,
-            "47": 0.17103,
-            "48": 0.172,
-            "49": 0.17515,
-            "50": 0.17623
+            "1": 74.91474,
+            "2": 0.1754,
+            "3": 0.17452,
+            "4": 0.16679,
+            "5": 0.16348,
+            "6": 0.16445,
+            "7": 0.16736,
+            "8": 0.16603,
+            "9": 0.16532,
+            "10": 0.16307,
+            "11": 1.37857,
+            "12": 0.16928,
+            "13": 0.53834,
+            "14": 0.57224,
+            "15": 0.16953,
+            "16": 0.16333,
+            "17": 0.16457,
+            "18": 0.16634,
+            "19": 0.51067,
+            "20": 0.16795,
+            "21": 1.3646,
+            "22": 0.16877,
+            "23": 0.16233,
+            "24": 0.16456,
+            "25": 0.16106,
+            "26": 0.16403,
+            "27": 0.16543,
+            "28": 0.52927,
+            "29": 0.16526,
+            "30": 0.16671,
+            "31": 1.34815,
+            "32": 0.1712,
+            "33": 0.16615,
+            "34": 0.16654,
+            "35": 0.16776,
+            "36": 0.16433,
+            "37": 0.16743,
+            "38": 0.5814,
+            "39": 0.17894,
+            "40": 0.16539,
+            "41": 1.61892,
+            "42": 0.1694,
+            "43": 0.16828,
+            "44": 0.16546,
+            "45": 0.16549,
+            "46": 0.16556,
+            "47": 0.51526,
+            "48": 0.16791,
+            "49": 0.16886,
+            "50": 0.16634
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp1_cp1_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp1_cp1_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 5649c8c02c0..00000000000
--- a/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp1_cp1_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.97433,
-            "2": 10.97599,
-            "3": 10.97873,
-            "4": 10.95776,
-            "5": 11.00374,
-            "6": 11.00622,
-            "7": 10.9799,
-            "8": 10.96858,
-            "9": 10.97924,
-            "10": 10.95251,
-            "11": 10.99936,
-            "12": 10.96824,
-            "13": 10.96591,
-            "14": 10.99554,
-            "15": 10.85561,
-            "16": 10.85538,
-            "17": 10.81726,
-            "18": 10.82754,
-            "19": 10.82158,
-            "20": 10.6404,
-            "21": 10.57926,
-            "22": 10.33548,
-            "23": 10.61314,
-            "24": 10.34966,
-            "25": 10.25929,
-            "26": 10.36381,
-            "27": 10.38733,
-            "28": 10.35697,
-            "29": 10.38233,
-            "30": 9.91499,
-            "31": 9.47474,
-            "32": 10.08958,
-            "33": 10.08413,
-            "34": 9.65424,
-            "35": 9.70719,
-            "36": 9.58835,
-            "37": 9.82205,
-            "38": 9.53609,
-            "39": 9.94086,
-            "40": 9.34225,
-            "41": 9.48846,
-            "42": 9.56986,
-            "43": 9.03547,
-            "44": 9.15612,
-            "45": 9.00184,
-            "46": 9.06401,
-            "47": 9.49282,
-            "48": 9.04255,
-            "49": 8.58799,
-            "50": 9.12592
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 20988.0,
-            "2": 21880.0,
-            "3": 21325.0,
-            "4": 20724.0,
-            "5": 23551.0,
-            "6": 23815.0,
-            "7": 23302.0,
-            "8": 21521.0,
-            "9": 22934.0,
-            "10": 19185.0,
-            "11": 25126.0,
-            "12": 23590.0,
-            "13": 24504.0,
-            "14": 24677.0,
-            "15": 23380.0,
-            "16": 23738.0,
-            "17": 22330.0,
-            "18": 22602.0,
-            "19": 23748.0,
-            "20": 21759.0,
-            "21": 23060.0,
-            "22": 19355.0,
-            "23": 24789.0,
-            "24": 19586.0,
-            "25": 19683.0,
-            "26": 21141.0,
-            "27": 22031.0,
-            "28": 23567.0,
-            "29": 23130.0,
-            "30": 20321.0,
-            "31": 17223.0,
-            "32": 21718.0,
-            "33": 23067.0,
-            "34": 21566.0,
-            "35": 22023.0,
-            "36": 21047.0,
-            "37": 22678.0,
-            "38": 22771.0,
-            "39": 22336.0,
-            "40": 23698.0,
-            "41": 23997.0,
-            "42": 23556.0,
-            "43": 21934.0,
-            "44": 21967.0,
-            "45": 21610.0,
-            "46": 23283.0,
-            "47": 25289.0,
-            "48": 25472.0,
-            "49": 25458.0,
-            "50": 28167.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 3117478912.0,
-            "2": 3117478912.0,
-            "3": 3117478912.0,
-            "4": 3117478912.0,
-            "5": 3117478912.0,
-            "6": 3117478912.0,
-            "7": 3117478912.0,
-            "8": 3117478912.0,
-            "9": 3117478912.0,
-            "10": 3117478912.0,
-            "11": 3117478912.0,
-            "12": 3117478912.0,
-            "13": 3117478912.0,
-            "14": 3117478912.0,
-            "15": 3117478912.0,
-            "16": 3117478912.0,
-            "17": 3117478912.0,
-            "18": 3117478912.0,
-            "19": 3117478912.0,
-            "20": 3117478912.0,
-            "21": 3117478912.0,
-            "22": 3117478912.0,
-            "23": 3117478912.0,
-            "24": 3117478912.0,
-            "25": 3117478912.0,
-            "26": 3117478912.0,
-            "27": 3117478912.0,
-            "28": 3117478912.0,
-            "29": 3117478912.0,
-            "30": 3117478912.0,
-            "31": 3117478912.0,
-            "32": 3117478912.0,
-            "33": 3117478912.0,
-            "34": 3117478912.0,
-            "35": 3117478912.0,
-            "36": 3117478912.0,
-            "37": 3117478912.0,
-            "38": 3117478912.0,
-            "39": 3117478912.0,
-            "40": 3117478912.0,
-            "41": 3117478912.0,
-            "42": 3117478912.0,
-            "43": 3117478912.0,
-            "44": 3117478912.0,
-            "45": 3117478912.0,
-            "46": 3117478912.0,
-            "47": 3117478912.0,
-            "48": 3117478912.0,
-            "49": 3117478912.0,
-            "50": 3117478912.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 9708208128.0,
-            "2": 10145497088.0,
-            "3": 10145497088.0,
-            "4": 10145497088.0,
-            "5": 10145497088.0,
-            "6": 10145497088.0,
-            "7": 10145497088.0,
-            "8": 10145497088.0,
-            "9": 10145497088.0,
-            "10": 10145497088.0,
-            "11": 10145497088.0,
-            "12": 10145497088.0,
-            "13": 10145497088.0,
-            "14": 10145497088.0,
-            "15": 10145497088.0,
-            "16": 10145497088.0,
-            "17": 10145497088.0,
-            "18": 10145497088.0,
-            "19": 10145497088.0,
-            "20": 10145497088.0,
-            "21": 10145497088.0,
-            "22": 10145497088.0,
-            "23": 10145497088.0,
-            "24": 10145497088.0,
-            "25": 10145497088.0,
-            "26": 10145497088.0,
-            "27": 10145497088.0,
-            "28": 10145497088.0,
-            "29": 10145497088.0,
-            "30": 10145497088.0,
-            "31": 10145497088.0,
-            "32": 10145497088.0,
-            "33": 10145497088.0,
-            "34": 10145497088.0,
-            "35": 10145497088.0,
-            "36": 10145497088.0,
-            "37": 10145497088.0,
-            "38": 10145497088.0,
-            "39": 10145497088.0,
-            "40": 10145497088.0,
-            "41": 10145497088.0,
-            "42": 10145497088.0,
-            "43": 10145497088.0,
-            "44": 10145497088.0,
-            "45": 10145497088.0,
-            "46": 10145497088.0,
-            "47": 10145497088.0,
-            "48": 10145497088.0,
-            "49": 10145497088.0,
-            "50": 10145497088.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 71.98615,
-            "2": 0.17824,
-            "3": 0.15658,
-            "4": 0.15553,
-            "5": 0.15552,
-            "6": 0.15497,
-            "7": 0.15557,
-            "8": 0.1611,
-            "9": 0.15455,
-            "10": 0.15318,
-            "11": 1.21675,
-            "12": 0.15852,
-            "13": 0.15923,
-            "14": 0.15544,
-            "15": 0.15619,
-            "16": 0.15301,
-            "17": 0.15568,
-            "18": 0.15352,
-            "19": 0.15601,
-            "20": 0.15832,
-            "21": 1.19636,
-            "22": 0.15369,
-            "23": 0.16001,
-            "24": 0.49798,
-            "25": 0.1566,
-            "26": 0.15462,
-            "27": 0.15479,
-            "28": 0.15431,
-            "29": 0.15608,
-            "30": 0.15697,
-            "31": 1.19237,
-            "32": 0.18057,
-            "33": 0.1804,
-            "34": 0.63136,
-            "35": 0.15799,
-            "36": 0.1573,
-            "37": 0.15724,
-            "38": 0.15688,
-            "39": 0.15684,
-            "40": 0.15532,
-            "41": 1.20433,
-            "42": 0.1556,
-            "43": 0.15643,
-            "44": 0.47664,
-            "45": 0.15538,
-            "46": 0.15623,
-            "47": 0.15655,
-            "48": 0.15632,
-            "49": 0.15651,
-            "50": 0.15611
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp1_cp1_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp1_cp1_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index 951506c1571..00000000000
--- a/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp1_cp1_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.97443,
-            "2": 10.97602,
-            "3": 10.97873,
-            "4": 10.95791,
-            "5": 11.00372,
-            "6": 11.00622,
-            "7": 10.97989,
-            "8": 10.96858,
-            "9": 10.97927,
-            "10": 10.95244,
-            "11": 10.99932,
-            "12": 10.96821,
-            "13": 10.96575,
-            "14": 10.99547,
-            "15": 10.85548,
-            "16": 10.85544,
-            "17": 10.81733,
-            "18": 10.82754,
-            "19": 10.82177,
-            "20": 10.64038,
-            "21": 10.57929,
-            "22": 10.33542,
-            "23": 10.613,
-            "24": 10.3496,
-            "25": 10.2592,
-            "26": 10.36373,
-            "27": 10.38741,
-            "28": 10.35692,
-            "29": 10.38238,
-            "30": 9.91509,
-            "31": 9.47482,
-            "32": 10.0895,
-            "33": 10.08422,
-            "34": 9.65429,
-            "35": 9.70734,
-            "36": 9.58844,
-            "37": 9.82215,
-            "38": 9.53607,
-            "39": 9.94104,
-            "40": 9.3422,
-            "41": 9.48847,
-            "42": 9.56993,
-            "43": 9.03549,
-            "44": 9.15623,
-            "45": 9.00183,
-            "46": 9.06402,
-            "47": 9.49291,
-            "48": 9.04257,
-            "49": 8.58806,
-            "50": 9.12599
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 21181.0,
-            "2": 22037.0,
-            "3": 21249.0,
-            "4": 20277.0,
-            "5": 23590.0,
-            "6": 24135.0,
-            "7": 23650.0,
-            "8": 21651.0,
-            "9": 22980.0,
-            "10": 19092.0,
-            "11": 25008.0,
-            "12": 23782.0,
-            "13": 24367.0,
-            "14": 24697.0,
-            "15": 23602.0,
-            "16": 23837.0,
-            "17": 22509.0,
-            "18": 22645.0,
-            "19": 23485.0,
-            "20": 21887.0,
-            "21": 22872.0,
-            "22": 19313.0,
-            "23": 24389.0,
-            "24": 19718.0,
-            "25": 19814.0,
-            "26": 21274.0,
-            "27": 22560.0,
-            "28": 23731.0,
-            "29": 23099.0,
-            "30": 19997.0,
-            "31": 17111.0,
-            "32": 22093.0,
-            "33": 23200.0,
-            "34": 21525.0,
-            "35": 21837.0,
-            "36": 21070.0,
-            "37": 22975.0,
-            "38": 22727.0,
-            "39": 22485.0,
-            "40": 23583.0,
-            "41": 24012.0,
-            "42": 23529.0,
-            "43": 22092.0,
-            "44": 21911.0,
-            "45": 21790.0,
-            "46": 23173.0,
-            "47": 25505.0,
-            "48": 25316.0,
-            "49": 25527.0,
-            "50": 28117.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 3117478912.0,
-            "2": 3117478912.0,
-            "3": 3117478912.0,
-            "4": 3117478912.0,
-            "5": 3117478912.0,
-            "6": 3117478912.0,
-            "7": 3117478912.0,
-            "8": 3117478912.0,
-            "9": 3117478912.0,
-            "10": 3117478912.0,
-            "11": 3117478912.0,
-            "12": 3117478912.0,
-            "13": 3117478912.0,
-            "14": 3117478912.0,
-            "15": 3117478912.0,
-            "16": 3117478912.0,
-            "17": 3117478912.0,
-            "18": 3117478912.0,
-            "19": 3117478912.0,
-            "20": 3117478912.0,
-            "21": 3117478912.0,
-            "22": 3117478912.0,
-            "23": 3117478912.0,
-            "24": 3117478912.0,
-            "25": 3117478912.0,
-            "26": 3117478912.0,
-            "27": 3117478912.0,
-            "28": 3117478912.0,
-            "29": 3117478912.0,
-            "30": 3117478912.0,
-            "31": 3117478912.0,
-            "32": 3117478912.0,
-            "33": 3117478912.0,
-            "34": 3117478912.0,
-            "35": 3117478912.0,
-            "36": 3117478912.0,
-            "37": 3117478912.0,
-            "38": 3117478912.0,
-            "39": 3117478912.0,
-            "40": 3117478912.0,
-            "41": 3117478912.0,
-            "42": 3117478912.0,
-            "43": 3117478912.0,
-            "44": 3117478912.0,
-            "45": 3117478912.0,
-            "46": 3117478912.0,
-            "47": 3117478912.0,
-            "48": 3117478912.0,
-            "49": 3117478912.0,
-            "50": 3117478912.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 9708208128.0,
-            "2": 10145497088.0,
-            "3": 10145497088.0,
-            "4": 10145497088.0,
-            "5": 10145497088.0,
-            "6": 10145497088.0,
-            "7": 10145497088.0,
-            "8": 10145497088.0,
-            "9": 10145497088.0,
-            "10": 10145497088.0,
-            "11": 10145497088.0,
-            "12": 10145497088.0,
-            "13": 10145497088.0,
-            "14": 10145497088.0,
-            "15": 10145497088.0,
-            "16": 10145497088.0,
-            "17": 10145497088.0,
-            "18": 10145497088.0,
-            "19": 10145497088.0,
-            "20": 10145497088.0,
-            "21": 10145497088.0,
-            "22": 10145497088.0,
-            "23": 10145497088.0,
-            "24": 10145497088.0,
-            "25": 10145497088.0,
-            "26": 10145497088.0,
-            "27": 10145497088.0,
-            "28": 10145497088.0,
-            "29": 10145497088.0,
-            "30": 10145497088.0,
-            "31": 10145497088.0,
-            "32": 10145497088.0,
-            "33": 10145497088.0,
-            "34": 10145497088.0,
-            "35": 10145497088.0,
-            "36": 10145497088.0,
-            "37": 10145497088.0,
-            "38": 10145497088.0,
-            "39": 10145497088.0,
-            "40": 10145497088.0,
-            "41": 10145497088.0,
-            "42": 10145497088.0,
-            "43": 10145497088.0,
-            "44": 10145497088.0,
-            "45": 10145497088.0,
-            "46": 10145497088.0,
-            "47": 10145497088.0,
-            "48": 10145497088.0,
-            "49": 10145497088.0,
-            "50": 10145497088.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 74.91474,
-            "2": 0.1754,
-            "3": 0.17452,
-            "4": 0.16679,
-            "5": 0.16348,
-            "6": 0.16445,
-            "7": 0.16736,
-            "8": 0.16603,
-            "9": 0.16532,
-            "10": 0.16307,
-            "11": 1.37857,
-            "12": 0.16928,
-            "13": 0.53834,
-            "14": 0.57224,
-            "15": 0.16953,
-            "16": 0.16333,
-            "17": 0.16457,
-            "18": 0.16634,
-            "19": 0.51067,
-            "20": 0.16795,
-            "21": 1.3646,
-            "22": 0.16877,
-            "23": 0.16233,
-            "24": 0.16456,
-            "25": 0.16106,
-            "26": 0.16403,
-            "27": 0.16543,
-            "28": 0.52927,
-            "29": 0.16526,
-            "30": 0.16671,
-            "31": 1.34815,
-            "32": 0.1712,
-            "33": 0.16615,
-            "34": 0.16654,
-            "35": 0.16776,
-            "36": 0.16433,
-            "37": 0.16743,
-            "38": 0.5814,
-            "39": 0.17894,
-            "40": 0.16539,
-            "41": 1.61892,
-            "42": 0.1694,
-            "43": 0.16828,
-            "44": 0.16546,
-            "45": 0.16549,
-            "46": 0.16556,
-            "47": 0.51526,
-            "48": 0.16791,
-            "49": 0.16886,
-            "50": 0.16634
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp1_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp1_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
index 66d5b70c4e7..f9118a22780 100644
--- a/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp1_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp1_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
@@ -6,54 +6,54 @@
         "values": {
             "1": 10.98115,
             "2": 10.98342,
-            "3": 10.97937,
-            "4": 10.95855,
-            "5": 10.99632,
-            "6": 11.00381,
-            "7": 10.98294,
-            "8": 10.97489,
-            "9": 10.97741,
-            "10": 10.94819,
-            "11": 10.99293,
-            "12": 10.96683,
-            "13": 10.97205,
-            "14": 10.97917,
-            "15": 10.85381,
-            "16": 10.85123,
-            "17": 10.80904,
-            "18": 10.82571,
-            "19": 10.80813,
-            "20": 10.61863,
-            "21": 10.56868,
-            "22": 10.31924,
-            "23": 10.59307,
-            "24": 10.33426,
-            "25": 10.23213,
-            "26": 10.34313,
-            "27": 10.34586,
-            "28": 10.32458,
+            "3": 10.9794,
+            "4": 10.95853,
+            "5": 10.99622,
+            "6": 11.00371,
+            "7": 10.98299,
+            "8": 10.9748,
+            "9": 10.97742,
+            "10": 10.94806,
+            "11": 10.99306,
+            "12": 10.96672,
+            "13": 10.97199,
+            "14": 10.97915,
+            "15": 10.85402,
+            "16": 10.85122,
+            "17": 10.8089,
+            "18": 10.82572,
+            "19": 10.8081,
+            "20": 10.61854,
+            "21": 10.56862,
+            "22": 10.31926,
+            "23": 10.59295,
+            "24": 10.3343,
+            "25": 10.23216,
+            "26": 10.34315,
+            "27": 10.34581,
+            "28": 10.3247,
             "29": 10.336,
-            "30": 9.88868,
-            "31": 9.42985,
-            "32": 10.0556,
-            "33": 10.04592,
-            "34": 9.60415,
-            "35": 9.64742,
-            "36": 9.5255,
-            "37": 9.7709,
-            "38": 9.49245,
-            "39": 9.87216,
-            "40": 9.29935,
-            "41": 9.44523,
-            "42": 9.52844,
-            "43": 9.015,
-            "44": 9.13046,
-            "45": 8.96483,
-            "46": 9.02876,
+            "30": 9.88877,
+            "31": 9.42992,
+            "32": 10.05572,
+            "33": 10.0459,
+            "34": 9.6042,
+            "35": 9.64743,
+            "36": 9.52544,
+            "37": 9.77085,
+            "38": 9.49252,
+            "39": 9.87217,
+            "40": 9.29929,
+            "41": 9.44531,
+            "42": 9.52839,
+            "43": 9.01499,
+            "44": 9.13044,
+            "45": 8.96478,
+            "46": 9.02875,
             "47": 9.45483,
-            "48": 9.0228,
-            "49": 8.56611,
-            "50": 9.11105
+            "48": 9.02282,
+            "49": 8.56615,
+            "50": 9.11114
         }
     },
     "num-zeros": {
@@ -61,56 +61,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 21057.0,
+            "1": 21211.0,
             "2": 22047.0,
-            "3": 21328.0,
-            "4": 20691.0,
-            "5": 23440.0,
-            "6": 23720.0,
-            "7": 23130.0,
-            "8": 21638.0,
-            "9": 22493.0,
-            "10": 18970.0,
-            "11": 24200.0,
-            "12": 23107.0,
-            "13": 24299.0,
-            "14": 24369.0,
-            "15": 23049.0,
-            "16": 23303.0,
-            "17": 21870.0,
-            "18": 22441.0,
-            "19": 23208.0,
-            "20": 21271.0,
-            "21": 22375.0,
-            "22": 19133.0,
-            "23": 23782.0,
-            "24": 19264.0,
-            "25": 19271.0,
-            "26": 20494.0,
-            "27": 21625.0,
-            "28": 23068.0,
-            "29": 22509.0,
-            "30": 19530.0,
-            "31": 16898.0,
-            "32": 21514.0,
-            "33": 22417.0,
-            "34": 21007.0,
-            "35": 21257.0,
-            "36": 20531.0,
-            "37": 23012.0,
-            "38": 22644.0,
-            "39": 22981.0,
-            "40": 23871.0,
-            "41": 23909.0,
-            "42": 23938.0,
-            "43": 22901.0,
-            "44": 22451.0,
-            "45": 22771.0,
-            "46": 23764.0,
-            "47": 25110.0,
-            "48": 26221.0,
-            "49": 26736.0,
-            "50": 27671.0
+            "3": 20892.0,
+            "4": 20624.0,
+            "5": 23413.0,
+            "6": 23493.0,
+            "7": 22797.0,
+            "8": 21401.0,
+            "9": 22665.0,
+            "10": 19047.0,
+            "11": 24508.0,
+            "12": 23266.0,
+            "13": 24271.0,
+            "14": 24293.0,
+            "15": 22782.0,
+            "16": 23282.0,
+            "17": 21824.0,
+            "18": 22133.0,
+            "19": 23099.0,
+            "20": 21505.0,
+            "21": 22490.0,
+            "22": 18675.0,
+            "23": 23908.0,
+            "24": 19148.0,
+            "25": 19388.0,
+            "26": 20532.0,
+            "27": 21766.0,
+            "28": 22571.0,
+            "29": 22352.0,
+            "30": 19883.0,
+            "31": 16703.0,
+            "32": 21084.0,
+            "33": 22377.0,
+            "34": 20576.0,
+            "35": 21216.0,
+            "36": 20603.0,
+            "37": 22812.0,
+            "38": 22830.0,
+            "39": 22708.0,
+            "40": 23830.0,
+            "41": 24061.0,
+            "42": 24003.0,
+            "43": 22790.0,
+            "44": 22703.0,
+            "45": 22360.0,
+            "46": 23642.0,
+            "47": 25112.0,
+            "48": 26185.0,
+            "49": 26666.0,
+            "50": 27765.0
         }
     },
     "mem-allocated-bytes": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 77.32153,
-            "2": 0.35381,
-            "3": 0.31954,
-            "4": 0.31994,
-            "5": 0.32133,
-            "6": 0.32343,
-            "7": 0.63691,
-            "8": 0.32502,
-            "9": 0.32218,
-            "10": 0.31839,
-            "11": 1.20693,
-            "12": 0.33292,
-            "13": 0.32979,
-            "14": 0.31793,
-            "15": 0.32907,
-            "16": 0.31632,
-            "17": 0.3213,
-            "18": 0.32431,
-            "19": 0.68468,
-            "20": 0.32501,
-            "21": 0.91375,
-            "22": 0.32148,
-            "23": 0.32164,
-            "24": 0.32358,
-            "25": 0.32444,
-            "26": 0.31929,
-            "27": 0.32159,
-            "28": 0.32567,
-            "29": 0.31799,
-            "30": 0.36795,
-            "31": 0.98526,
-            "32": 0.32231,
-            "33": 0.31619,
-            "34": 0.31784,
-            "35": 0.31943,
-            "36": 0.31897,
-            "37": 0.31509,
-            "38": 0.33279,
-            "39": 0.32732,
-            "40": 0.31631,
-            "41": 0.91813,
-            "42": 0.32108,
-            "43": 0.31789,
-            "44": 0.31862,
-            "45": 0.32451,
-            "46": 0.31705,
-            "47": 0.31711,
-            "48": 0.32216,
-            "49": 0.31997,
-            "50": 0.31833
+            "1": 76.70816,
+            "2": 0.44479,
+            "3": 0.37638,
+            "4": 0.32493,
+            "5": 0.32865,
+            "6": 0.3221,
+            "7": 0.33027,
+            "8": 0.32627,
+            "9": 0.69409,
+            "10": 0.66689,
+            "11": 0.94476,
+            "12": 0.6757,
+            "13": 0.32571,
+            "14": 0.3194,
+            "15": 0.31954,
+            "16": 0.32142,
+            "17": 0.32144,
+            "18": 0.3188,
+            "19": 0.32023,
+            "20": 0.70348,
+            "21": 1.36061,
+            "22": 0.32306,
+            "23": 0.32129,
+            "24": 0.31927,
+            "25": 0.32503,
+            "26": 0.322,
+            "27": 0.31994,
+            "28": 0.32043,
+            "29": 0.31651,
+            "30": 0.31907,
+            "31": 1.31856,
+            "32": 0.32016,
+            "33": 0.31758,
+            "34": 0.31966,
+            "35": 0.31765,
+            "36": 0.31717,
+            "37": 0.3191,
+            "38": 0.31591,
+            "39": 0.3156,
+            "40": 0.31599,
+            "41": 0.90957,
+            "42": 0.32017,
+            "43": 0.31902,
+            "44": 0.32013,
+            "45": 0.32183,
+            "46": 0.31561,
+            "47": 0.31628,
+            "48": 0.31911,
+            "49": 0.31753,
+            "50": 0.31636
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp1_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp1_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 5f9d24a49c3..00000000000
--- a/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp1_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.98115,
-            "2": 10.98342,
-            "3": 10.97937,
-            "4": 10.95855,
-            "5": 10.99622,
-            "6": 11.00384,
-            "7": 10.98297,
-            "8": 10.97483,
-            "9": 10.97753,
-            "10": 10.94815,
-            "11": 10.99296,
-            "12": 10.9669,
-            "13": 10.97214,
-            "14": 10.97925,
-            "15": 10.85387,
-            "16": 10.85117,
-            "17": 10.80894,
-            "18": 10.82573,
-            "19": 10.80812,
-            "20": 10.61863,
-            "21": 10.56868,
-            "22": 10.31918,
-            "23": 10.59297,
-            "24": 10.33422,
-            "25": 10.23218,
-            "26": 10.34314,
-            "27": 10.34572,
-            "28": 10.32477,
-            "29": 10.33598,
-            "30": 9.88873,
-            "31": 9.42999,
-            "32": 10.05561,
-            "33": 10.04589,
-            "34": 9.60423,
-            "35": 9.64746,
-            "36": 9.52548,
-            "37": 9.77088,
-            "38": 9.49242,
-            "39": 9.87225,
-            "40": 9.29943,
-            "41": 9.44525,
-            "42": 9.5284,
-            "43": 9.01502,
-            "44": 9.13045,
-            "45": 8.96484,
-            "46": 9.02877,
-            "47": 9.45487,
-            "48": 9.02277,
-            "49": 8.56605,
-            "50": 9.11107
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 21057.0,
-            "2": 22047.0,
-            "3": 21328.0,
-            "4": 20740.0,
-            "5": 23155.0,
-            "6": 23469.0,
-            "7": 22812.0,
-            "8": 21546.0,
-            "9": 22384.0,
-            "10": 18987.0,
-            "11": 24537.0,
-            "12": 23328.0,
-            "13": 24082.0,
-            "14": 24376.0,
-            "15": 23046.0,
-            "16": 23314.0,
-            "17": 21746.0,
-            "18": 22157.0,
-            "19": 23070.0,
-            "20": 21363.0,
-            "21": 22466.0,
-            "22": 18866.0,
-            "23": 24216.0,
-            "24": 19337.0,
-            "25": 19268.0,
-            "26": 20380.0,
-            "27": 21682.0,
-            "28": 23020.0,
-            "29": 22578.0,
-            "30": 20050.0,
-            "31": 16804.0,
-            "32": 21380.0,
-            "33": 22738.0,
-            "34": 20871.0,
-            "35": 21397.0,
-            "36": 20460.0,
-            "37": 22858.0,
-            "38": 22666.0,
-            "39": 22907.0,
-            "40": 23932.0,
-            "41": 23824.0,
-            "42": 23844.0,
-            "43": 22807.0,
-            "44": 22751.0,
-            "45": 22450.0,
-            "46": 23609.0,
-            "47": 25413.0,
-            "48": 26266.0,
-            "49": 26747.0,
-            "50": 27543.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1917381632.0,
-            "2": 1917381632.0,
-            "3": 1917381632.0,
-            "4": 1917381632.0,
-            "5": 1917381632.0,
-            "6": 1917381632.0,
-            "7": 1917381632.0,
-            "8": 1917381632.0,
-            "9": 1917381632.0,
-            "10": 1917381632.0,
-            "11": 1917381632.0,
-            "12": 1917381632.0,
-            "13": 1917381632.0,
-            "14": 1917381632.0,
-            "15": 1917381632.0,
-            "16": 1917381632.0,
-            "17": 1917381632.0,
-            "18": 1917381632.0,
-            "19": 1917381632.0,
-            "20": 1917381632.0,
-            "21": 1917381632.0,
-            "22": 1917381632.0,
-            "23": 1917381632.0,
-            "24": 1917381632.0,
-            "25": 1917381632.0,
-            "26": 1917381632.0,
-            "27": 1917381632.0,
-            "28": 1917381632.0,
-            "29": 1917381632.0,
-            "30": 1917381632.0,
-            "31": 1917381632.0,
-            "32": 1917381632.0,
-            "33": 1917381632.0,
-            "34": 1917381632.0,
-            "35": 1917381632.0,
-            "36": 1917381632.0,
-            "37": 1917381632.0,
-            "38": 1917381632.0,
-            "39": 1917381632.0,
-            "40": 1917381632.0,
-            "41": 1917381632.0,
-            "42": 1917381632.0,
-            "43": 1917381632.0,
-            "44": 1917381632.0,
-            "45": 1917381632.0,
-            "46": 1917381632.0,
-            "47": 1917381632.0,
-            "48": 1917381632.0,
-            "49": 1917381632.0,
-            "50": 1917381632.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 5502737408.0,
-            "2": 5907581952.0,
-            "3": 5907581952.0,
-            "4": 5907581952.0,
-            "5": 5907581952.0,
-            "6": 5907581952.0,
-            "7": 5907581952.0,
-            "8": 5907581952.0,
-            "9": 5907581952.0,
-            "10": 5907581952.0,
-            "11": 5907581952.0,
-            "12": 5907581952.0,
-            "13": 5907581952.0,
-            "14": 5907581952.0,
-            "15": 5907581952.0,
-            "16": 5907581952.0,
-            "17": 5907581952.0,
-            "18": 5907581952.0,
-            "19": 5907581952.0,
-            "20": 5907581952.0,
-            "21": 5907581952.0,
-            "22": 5907581952.0,
-            "23": 5907581952.0,
-            "24": 5907581952.0,
-            "25": 5907581952.0,
-            "26": 5907581952.0,
-            "27": 5907581952.0,
-            "28": 5907581952.0,
-            "29": 5907581952.0,
-            "30": 5907581952.0,
-            "31": 5907581952.0,
-            "32": 5907581952.0,
-            "33": 5907581952.0,
-            "34": 5907581952.0,
-            "35": 5907581952.0,
-            "36": 5907581952.0,
-            "37": 5907581952.0,
-            "38": 5907581952.0,
-            "39": 5907581952.0,
-            "40": 5907581952.0,
-            "41": 5907581952.0,
-            "42": 5907581952.0,
-            "43": 5907581952.0,
-            "44": 5907581952.0,
-            "45": 5907581952.0,
-            "46": 5907581952.0,
-            "47": 5907581952.0,
-            "48": 5907581952.0,
-            "49": 5907581952.0,
-            "50": 5907581952.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 72.69145,
-            "2": 0.31162,
-            "3": 0.65164,
-            "4": 0.29871,
-            "5": 0.29932,
-            "6": 0.29668,
-            "7": 0.29179,
-            "8": 0.29409,
-            "9": 0.29759,
-            "10": 0.30183,
-            "11": 0.84375,
-            "12": 0.2964,
-            "13": 0.29589,
-            "14": 0.29688,
-            "15": 0.30127,
-            "16": 0.29716,
-            "17": 0.29351,
-            "18": 0.29429,
-            "19": 0.29751,
-            "20": 0.29471,
-            "21": 1.36793,
-            "22": 0.29834,
-            "23": 0.29442,
-            "24": 0.29321,
-            "25": 0.29912,
-            "26": 0.29631,
-            "27": 0.29343,
-            "28": 0.29975,
-            "29": 0.29701,
-            "30": 0.67685,
-            "31": 0.82445,
-            "32": 0.29588,
-            "33": 0.79672,
-            "34": 0.30556,
-            "35": 0.29842,
-            "36": 0.29717,
-            "37": 0.29457,
-            "38": 0.29527,
-            "39": 0.29757,
-            "40": 0.29426,
-            "41": 0.82657,
-            "42": 0.29634,
-            "43": 0.29423,
-            "44": 0.30131,
-            "45": 0.30554,
-            "46": 0.29682,
-            "47": 0.29317,
-            "48": 0.29446,
-            "49": 0.29791,
-            "50": 0.2949
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp1_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp1_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index f9118a22780..00000000000
--- a/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp1_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.98115,
-            "2": 10.98342,
-            "3": 10.9794,
-            "4": 10.95853,
-            "5": 10.99622,
-            "6": 11.00371,
-            "7": 10.98299,
-            "8": 10.9748,
-            "9": 10.97742,
-            "10": 10.94806,
-            "11": 10.99306,
-            "12": 10.96672,
-            "13": 10.97199,
-            "14": 10.97915,
-            "15": 10.85402,
-            "16": 10.85122,
-            "17": 10.8089,
-            "18": 10.82572,
-            "19": 10.8081,
-            "20": 10.61854,
-            "21": 10.56862,
-            "22": 10.31926,
-            "23": 10.59295,
-            "24": 10.3343,
-            "25": 10.23216,
-            "26": 10.34315,
-            "27": 10.34581,
-            "28": 10.3247,
-            "29": 10.336,
-            "30": 9.88877,
-            "31": 9.42992,
-            "32": 10.05572,
-            "33": 10.0459,
-            "34": 9.6042,
-            "35": 9.64743,
-            "36": 9.52544,
-            "37": 9.77085,
-            "38": 9.49252,
-            "39": 9.87217,
-            "40": 9.29929,
-            "41": 9.44531,
-            "42": 9.52839,
-            "43": 9.01499,
-            "44": 9.13044,
-            "45": 8.96478,
-            "46": 9.02875,
-            "47": 9.45483,
-            "48": 9.02282,
-            "49": 8.56615,
-            "50": 9.11114
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 21211.0,
-            "2": 22047.0,
-            "3": 20892.0,
-            "4": 20624.0,
-            "5": 23413.0,
-            "6": 23493.0,
-            "7": 22797.0,
-            "8": 21401.0,
-            "9": 22665.0,
-            "10": 19047.0,
-            "11": 24508.0,
-            "12": 23266.0,
-            "13": 24271.0,
-            "14": 24293.0,
-            "15": 22782.0,
-            "16": 23282.0,
-            "17": 21824.0,
-            "18": 22133.0,
-            "19": 23099.0,
-            "20": 21505.0,
-            "21": 22490.0,
-            "22": 18675.0,
-            "23": 23908.0,
-            "24": 19148.0,
-            "25": 19388.0,
-            "26": 20532.0,
-            "27": 21766.0,
-            "28": 22571.0,
-            "29": 22352.0,
-            "30": 19883.0,
-            "31": 16703.0,
-            "32": 21084.0,
-            "33": 22377.0,
-            "34": 20576.0,
-            "35": 21216.0,
-            "36": 20603.0,
-            "37": 22812.0,
-            "38": 22830.0,
-            "39": 22708.0,
-            "40": 23830.0,
-            "41": 24061.0,
-            "42": 24003.0,
-            "43": 22790.0,
-            "44": 22703.0,
-            "45": 22360.0,
-            "46": 23642.0,
-            "47": 25112.0,
-            "48": 26185.0,
-            "49": 26666.0,
-            "50": 27765.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1917381632.0,
-            "2": 1917381632.0,
-            "3": 1917381632.0,
-            "4": 1917381632.0,
-            "5": 1917381632.0,
-            "6": 1917381632.0,
-            "7": 1917381632.0,
-            "8": 1917381632.0,
-            "9": 1917381632.0,
-            "10": 1917381632.0,
-            "11": 1917381632.0,
-            "12": 1917381632.0,
-            "13": 1917381632.0,
-            "14": 1917381632.0,
-            "15": 1917381632.0,
-            "16": 1917381632.0,
-            "17": 1917381632.0,
-            "18": 1917381632.0,
-            "19": 1917381632.0,
-            "20": 1917381632.0,
-            "21": 1917381632.0,
-            "22": 1917381632.0,
-            "23": 1917381632.0,
-            "24": 1917381632.0,
-            "25": 1917381632.0,
-            "26": 1917381632.0,
-            "27": 1917381632.0,
-            "28": 1917381632.0,
-            "29": 1917381632.0,
-            "30": 1917381632.0,
-            "31": 1917381632.0,
-            "32": 1917381632.0,
-            "33": 1917381632.0,
-            "34": 1917381632.0,
-            "35": 1917381632.0,
-            "36": 1917381632.0,
-            "37": 1917381632.0,
-            "38": 1917381632.0,
-            "39": 1917381632.0,
-            "40": 1917381632.0,
-            "41": 1917381632.0,
-            "42": 1917381632.0,
-            "43": 1917381632.0,
-            "44": 1917381632.0,
-            "45": 1917381632.0,
-            "46": 1917381632.0,
-            "47": 1917381632.0,
-            "48": 1917381632.0,
-            "49": 1917381632.0,
-            "50": 1917381632.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 5502737408.0,
-            "2": 5907581952.0,
-            "3": 5907581952.0,
-            "4": 5907581952.0,
-            "5": 5907581952.0,
-            "6": 5907581952.0,
-            "7": 5907581952.0,
-            "8": 5907581952.0,
-            "9": 5907581952.0,
-            "10": 5907581952.0,
-            "11": 5907581952.0,
-            "12": 5907581952.0,
-            "13": 5907581952.0,
-            "14": 5907581952.0,
-            "15": 5907581952.0,
-            "16": 5907581952.0,
-            "17": 5907581952.0,
-            "18": 5907581952.0,
-            "19": 5907581952.0,
-            "20": 5907581952.0,
-            "21": 5907581952.0,
-            "22": 5907581952.0,
-            "23": 5907581952.0,
-            "24": 5907581952.0,
-            "25": 5907581952.0,
-            "26": 5907581952.0,
-            "27": 5907581952.0,
-            "28": 5907581952.0,
-            "29": 5907581952.0,
-            "30": 5907581952.0,
-            "31": 5907581952.0,
-            "32": 5907581952.0,
-            "33": 5907581952.0,
-            "34": 5907581952.0,
-            "35": 5907581952.0,
-            "36": 5907581952.0,
-            "37": 5907581952.0,
-            "38": 5907581952.0,
-            "39": 5907581952.0,
-            "40": 5907581952.0,
-            "41": 5907581952.0,
-            "42": 5907581952.0,
-            "43": 5907581952.0,
-            "44": 5907581952.0,
-            "45": 5907581952.0,
-            "46": 5907581952.0,
-            "47": 5907581952.0,
-            "48": 5907581952.0,
-            "49": 5907581952.0,
-            "50": 5907581952.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 76.70816,
-            "2": 0.44479,
-            "3": 0.37638,
-            "4": 0.32493,
-            "5": 0.32865,
-            "6": 0.3221,
-            "7": 0.33027,
-            "8": 0.32627,
-            "9": 0.69409,
-            "10": 0.66689,
-            "11": 0.94476,
-            "12": 0.6757,
-            "13": 0.32571,
-            "14": 0.3194,
-            "15": 0.31954,
-            "16": 0.32142,
-            "17": 0.32144,
-            "18": 0.3188,
-            "19": 0.32023,
-            "20": 0.70348,
-            "21": 1.36061,
-            "22": 0.32306,
-            "23": 0.32129,
-            "24": 0.31927,
-            "25": 0.32503,
-            "26": 0.322,
-            "27": 0.31994,
-            "28": 0.32043,
-            "29": 0.31651,
-            "30": 0.31907,
-            "31": 1.31856,
-            "32": 0.32016,
-            "33": 0.31758,
-            "34": 0.31966,
-            "35": 0.31765,
-            "36": 0.31717,
-            "37": 0.3191,
-            "38": 0.31591,
-            "39": 0.3156,
-            "40": 0.31599,
-            "41": 0.90957,
-            "42": 0.32017,
-            "43": 0.31902,
-            "44": 0.32013,
-            "45": 0.32183,
-            "46": 0.31561,
-            "47": 0.31628,
-            "48": 0.31911,
-            "49": 0.31753,
-            "50": 0.31636
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp4_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp4_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
index 42f6add1cac..baf1fa52671 100644
--- a/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp4_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp4_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
@@ -6,54 +6,54 @@
         "values": {
             "1": 10.98296,
             "2": 10.98234,
-            "3": 10.98048,
-            "4": 10.96506,
-            "5": 10.99783,
-            "6": 11.00523,
-            "7": 10.98269,
-            "8": 10.97586,
-            "9": 10.97815,
+            "3": 10.98046,
+            "4": 10.96512,
+            "5": 10.99789,
+            "6": 11.00517,
+            "7": 10.98273,
+            "8": 10.97596,
+            "9": 10.9783,
             "10": 10.9452,
-            "11": 10.9926,
-            "12": 10.96812,
-            "13": 10.97042,
-            "14": 10.98195,
-            "15": 10.85378,
-            "16": 10.85001,
-            "17": 10.80676,
-            "18": 10.82651,
-            "19": 10.81114,
-            "20": 10.62181,
-            "21": 10.56061,
-            "22": 10.32111,
-            "23": 10.59523,
-            "24": 10.32471,
-            "25": 10.23316,
-            "26": 10.33835,
-            "27": 10.34872,
-            "28": 10.32088,
-            "29": 10.33079,
-            "30": 9.88567,
-            "31": 9.43004,
+            "11": 10.99257,
+            "12": 10.96815,
+            "13": 10.9703,
+            "14": 10.98207,
+            "15": 10.85381,
+            "16": 10.85003,
+            "17": 10.80667,
+            "18": 10.82648,
+            "19": 10.81123,
+            "20": 10.62194,
+            "21": 10.56069,
+            "22": 10.32105,
+            "23": 10.59531,
+            "24": 10.32461,
+            "25": 10.23318,
+            "26": 10.33828,
+            "27": 10.34879,
+            "28": 10.32094,
+            "29": 10.33068,
+            "30": 9.8856,
+            "31": 9.42999,
             "32": 10.05321,
             "33": 10.0429,
-            "34": 9.60531,
-            "35": 9.64985,
-            "36": 9.52945,
-            "37": 9.76829,
-            "38": 9.48586,
-            "39": 9.87467,
-            "40": 9.30029,
-            "41": 9.44905,
-            "42": 9.52868,
-            "43": 9.01596,
-            "44": 9.12962,
-            "45": 8.96833,
-            "46": 9.03055,
-            "47": 9.45737,
-            "48": 9.02116,
-            "49": 8.569,
-            "50": 9.10992
+            "34": 9.6053,
+            "35": 9.64984,
+            "36": 9.52934,
+            "37": 9.76834,
+            "38": 9.48585,
+            "39": 9.87468,
+            "40": 9.30022,
+            "41": 9.44909,
+            "42": 9.52866,
+            "43": 9.01602,
+            "44": 9.12963,
+            "45": 8.96826,
+            "46": 9.03049,
+            "47": 9.45732,
+            "48": 9.02119,
+            "49": 8.56905,
+            "50": 9.10994
         }
     },
     "num-zeros": {
@@ -61,56 +61,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 2981.0,
-            "2": 3050.0,
-            "3": 3036.0,
-            "4": 2803.0,
-            "5": 3277.0,
-            "6": 3332.0,
-            "7": 3180.0,
-            "8": 3031.0,
-            "9": 3010.0,
-            "10": 2837.0,
-            "11": 3454.0,
-            "12": 3290.0,
-            "13": 3425.0,
-            "14": 3543.0,
-            "15": 3264.0,
-            "16": 3165.0,
-            "17": 3109.0,
-            "18": 3150.0,
-            "19": 3225.0,
-            "20": 3006.0,
-            "21": 3072.0,
-            "22": 2636.0,
-            "23": 3329.0,
-            "24": 2773.0,
-            "25": 2778.0,
-            "26": 2782.0,
-            "27": 3018.0,
-            "28": 3154.0,
-            "29": 3221.0,
-            "30": 2661.0,
-            "31": 2317.0,
-            "32": 3059.0,
-            "33": 3139.0,
-            "34": 2875.0,
-            "35": 2919.0,
-            "36": 2956.0,
-            "37": 3114.0,
-            "38": 3011.0,
-            "39": 3102.0,
-            "40": 3052.0,
-            "41": 3056.0,
-            "42": 3312.0,
-            "43": 2849.0,
-            "44": 2950.0,
-            "45": 2930.0,
-            "46": 2991.0,
-            "47": 3237.0,
-            "48": 3285.0,
-            "49": 3389.0,
-            "50": 3341.0
+            "1": 2992.0,
+            "2": 2911.0,
+            "3": 2981.0,
+            "4": 2784.0,
+            "5": 3153.0,
+            "6": 3292.0,
+            "7": 3123.0,
+            "8": 3104.0,
+            "9": 3123.0,
+            "10": 2796.0,
+            "11": 3497.0,
+            "12": 3305.0,
+            "13": 3271.0,
+            "14": 3414.0,
+            "15": 3082.0,
+            "16": 3257.0,
+            "17": 3088.0,
+            "18": 3113.0,
+            "19": 3283.0,
+            "20": 2980.0,
+            "21": 3045.0,
+            "22": 2623.0,
+            "23": 3281.0,
+            "24": 2774.0,
+            "25": 2745.0,
+            "26": 2827.0,
+            "27": 3106.0,
+            "28": 3227.0,
+            "29": 3118.0,
+            "30": 2695.0,
+            "31": 2326.0,
+            "32": 3058.0,
+            "33": 3138.0,
+            "34": 2755.0,
+            "35": 2931.0,
+            "36": 2947.0,
+            "37": 3169.0,
+            "38": 3016.0,
+            "39": 3187.0,
+            "40": 3076.0,
+            "41": 3043.0,
+            "42": 3245.0,
+            "43": 2813.0,
+            "44": 2934.0,
+            "45": 2868.0,
+            "46": 3015.0,
+            "47": 3294.0,
+            "48": 3327.0,
+            "49": 3253.0,
+            "50": 3403.0
         }
     },
     "mem-allocated-bytes": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 93.29155,
-            "2": 1.49946,
-            "3": 1.49367,
-            "4": 1.4955,
-            "5": 1.49263,
-            "6": 1.48524,
-            "7": 1.54794,
-            "8": 1.57222,
-            "9": 1.48844,
-            "10": 1.48601,
-            "11": 2.09056,
-            "12": 1.49068,
-            "13": 1.57264,
-            "14": 1.49736,
-            "15": 1.48278,
-            "16": 1.48267,
-            "17": 1.48508,
-            "18": 1.48364,
-            "19": 1.48751,
-            "20": 1.61513,
-            "21": 2.08969,
-            "22": 1.48879,
-            "23": 1.48515,
-            "24": 1.48483,
-            "25": 1.48865,
-            "26": 1.57806,
-            "27": 1.51158,
-            "28": 1.49095,
-            "29": 1.49422,
-            "30": 1.48732,
-            "31": 2.0932,
-            "32": 1.5259,
-            "33": 1.56274,
-            "34": 1.48919,
-            "35": 1.48483,
-            "36": 1.49146,
-            "37": 1.48123,
-            "38": 1.48759,
-            "39": 1.56751,
-            "40": 1.51104,
-            "41": 2.08583,
-            "42": 1.48897,
-            "43": 1.48816,
-            "44": 1.49366,
-            "45": 1.50945,
-            "46": 1.59565,
-            "47": 1.49573,
-            "48": 1.48593,
-            "49": 1.49004,
-            "50": 1.49426
+            "1": 92.52278,
+            "2": 1.52203,
+            "3": 1.50103,
+            "4": 1.51627,
+            "5": 1.49943,
+            "6": 1.61325,
+            "7": 1.5622,
+            "8": 1.50668,
+            "9": 1.50122,
+            "10": 1.50749,
+            "11": 2.12764,
+            "12": 1.51111,
+            "13": 1.50973,
+            "14": 1.51712,
+            "15": 1.50952,
+            "16": 1.51343,
+            "17": 1.50742,
+            "18": 1.52017,
+            "19": 1.50622,
+            "20": 1.51648,
+            "21": 2.13229,
+            "22": 1.50789,
+            "23": 1.52087,
+            "24": 1.50668,
+            "25": 1.51534,
+            "26": 1.5016,
+            "27": 1.50737,
+            "28": 1.49873,
+            "29": 1.50715,
+            "30": 1.49941,
+            "31": 2.11492,
+            "32": 1.50348,
+            "33": 1.50106,
+            "34": 1.50093,
+            "35": 1.50813,
+            "36": 1.4988,
+            "37": 1.49847,
+            "38": 1.49777,
+            "39": 1.49937,
+            "40": 1.50456,
+            "41": 2.11318,
+            "42": 1.50605,
+            "43": 1.50721,
+            "44": 1.51813,
+            "45": 1.50211,
+            "46": 1.51633,
+            "47": 1.5019,
+            "48": 1.52386,
+            "49": 1.49987,
+            "50": 1.50829
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp4_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp4_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index c10a5cde1e8..00000000000
--- a/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp4_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.98296,
-            "2": 10.98234,
-            "3": 10.98053,
-            "4": 10.96517,
-            "5": 10.9979,
-            "6": 11.00523,
-            "7": 10.98274,
-            "8": 10.97592,
-            "9": 10.97818,
-            "10": 10.94511,
-            "11": 10.99258,
-            "12": 10.96821,
-            "13": 10.97041,
-            "14": 10.98206,
-            "15": 10.85379,
-            "16": 10.84986,
-            "17": 10.8067,
-            "18": 10.82647,
-            "19": 10.81124,
-            "20": 10.62204,
-            "21": 10.56064,
-            "22": 10.32092,
-            "23": 10.59523,
-            "24": 10.32467,
-            "25": 10.2333,
-            "26": 10.33822,
-            "27": 10.34883,
-            "28": 10.32085,
-            "29": 10.33072,
-            "30": 9.88565,
-            "31": 9.43005,
-            "32": 10.05329,
-            "33": 10.04284,
-            "34": 9.60526,
-            "35": 9.64982,
-            "36": 9.52942,
-            "37": 9.7683,
-            "38": 9.48583,
-            "39": 9.87461,
-            "40": 9.30023,
-            "41": 9.44902,
-            "42": 9.52875,
-            "43": 9.01605,
-            "44": 9.12966,
-            "45": 8.96824,
-            "46": 9.03047,
-            "47": 9.45728,
-            "48": 9.02121,
-            "49": 8.56895,
-            "50": 9.1099
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 2975.0,
-            "2": 3053.0,
-            "3": 3035.0,
-            "4": 2876.0,
-            "5": 3232.0,
-            "6": 3471.0,
-            "7": 3136.0,
-            "8": 3055.0,
-            "9": 3098.0,
-            "10": 2850.0,
-            "11": 3481.0,
-            "12": 3323.0,
-            "13": 3340.0,
-            "14": 3441.0,
-            "15": 3128.0,
-            "16": 3234.0,
-            "17": 2908.0,
-            "18": 3136.0,
-            "19": 3105.0,
-            "20": 2933.0,
-            "21": 3024.0,
-            "22": 2661.0,
-            "23": 3271.0,
-            "24": 2839.0,
-            "25": 2707.0,
-            "26": 2894.0,
-            "27": 3076.0,
-            "28": 3167.0,
-            "29": 3152.0,
-            "30": 2676.0,
-            "31": 2303.0,
-            "32": 3067.0,
-            "33": 3156.0,
-            "34": 2735.0,
-            "35": 2962.0,
-            "36": 2820.0,
-            "37": 3125.0,
-            "38": 2908.0,
-            "39": 3089.0,
-            "40": 3006.0,
-            "41": 3005.0,
-            "42": 3262.0,
-            "43": 2920.0,
-            "44": 2865.0,
-            "45": 2829.0,
-            "46": 3050.0,
-            "47": 3247.0,
-            "48": 3311.0,
-            "49": 3262.0,
-            "50": 3449.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1917251584.0,
-            "2": 1917251584.0,
-            "3": 1917251584.0,
-            "4": 1917251584.0,
-            "5": 1917251584.0,
-            "6": 1917251584.0,
-            "7": 1917251584.0,
-            "8": 1917251584.0,
-            "9": 1917251584.0,
-            "10": 1917251584.0,
-            "11": 1917251584.0,
-            "12": 1917251584.0,
-            "13": 1917251584.0,
-            "14": 1917251584.0,
-            "15": 1917251584.0,
-            "16": 1917251584.0,
-            "17": 1917251584.0,
-            "18": 1917251584.0,
-            "19": 1917251584.0,
-            "20": 1917251584.0,
-            "21": 1917251584.0,
-            "22": 1917251584.0,
-            "23": 1917251584.0,
-            "24": 1917251584.0,
-            "25": 1917251584.0,
-            "26": 1917251584.0,
-            "27": 1917251584.0,
-            "28": 1917251584.0,
-            "29": 1917251584.0,
-            "30": 1917251584.0,
-            "31": 1917251584.0,
-            "32": 1917251584.0,
-            "33": 1917251584.0,
-            "34": 1917251584.0,
-            "35": 1917251584.0,
-            "36": 1917251584.0,
-            "37": 1917251584.0,
-            "38": 1917251584.0,
-            "39": 1917251584.0,
-            "40": 1917251584.0,
-            "41": 1917251584.0,
-            "42": 1917251584.0,
-            "43": 1917251584.0,
-            "44": 1917251584.0,
-            "45": 1917251584.0,
-            "46": 1917251584.0,
-            "47": 1917251584.0,
-            "48": 1917251584.0,
-            "49": 1917251584.0,
-            "50": 1917251584.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 2520653312.0,
-            "2": 2743788032.0,
-            "3": 2743788032.0,
-            "4": 2743788032.0,
-            "5": 2743788032.0,
-            "6": 2743788032.0,
-            "7": 2743788032.0,
-            "8": 2743788032.0,
-            "9": 2743788032.0,
-            "10": 2743788032.0,
-            "11": 2743788032.0,
-            "12": 2743788032.0,
-            "13": 2743788032.0,
-            "14": 2743788032.0,
-            "15": 2743788032.0,
-            "16": 2743788032.0,
-            "17": 2743788032.0,
-            "18": 2743788032.0,
-            "19": 2743788032.0,
-            "20": 2743788032.0,
-            "21": 2743788032.0,
-            "22": 2743788032.0,
-            "23": 2743788032.0,
-            "24": 2743788032.0,
-            "25": 2743788032.0,
-            "26": 2743788032.0,
-            "27": 2743788032.0,
-            "28": 2743788032.0,
-            "29": 2743788032.0,
-            "30": 2743788032.0,
-            "31": 2743788032.0,
-            "32": 2743788032.0,
-            "33": 2743788032.0,
-            "34": 2743788032.0,
-            "35": 2743788032.0,
-            "36": 2743788032.0,
-            "37": 2743788032.0,
-            "38": 2743788032.0,
-            "39": 2743788032.0,
-            "40": 2743788032.0,
-            "41": 2743788032.0,
-            "42": 2743788032.0,
-            "43": 2743788032.0,
-            "44": 2743788032.0,
-            "45": 2743788032.0,
-            "46": 2743788032.0,
-            "47": 2743788032.0,
-            "48": 2743788032.0,
-            "49": 2743788032.0,
-            "50": 2743788032.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 92.34219,
-            "2": 1.4515,
-            "3": 1.36887,
-            "4": 1.37341,
-            "5": 1.37602,
-            "6": 1.39004,
-            "7": 1.3836,
-            "8": 1.38196,
-            "9": 1.38896,
-            "10": 1.45857,
-            "11": 1.94935,
-            "12": 1.39106,
-            "13": 1.39805,
-            "14": 1.39033,
-            "15": 1.38482,
-            "16": 1.39457,
-            "17": 1.44864,
-            "18": 1.39068,
-            "19": 1.3833,
-            "20": 1.38815,
-            "21": 1.94703,
-            "22": 1.38309,
-            "23": 1.42093,
-            "24": 1.3998,
-            "25": 1.38693,
-            "26": 1.38436,
-            "27": 1.40235,
-            "28": 1.40751,
-            "29": 1.37396,
-            "30": 1.4111,
-            "31": 1.93813,
-            "32": 1.35926,
-            "33": 1.36462,
-            "34": 1.36782,
-            "35": 1.36782,
-            "36": 1.36568,
-            "37": 1.37148,
-            "38": 1.37963,
-            "39": 1.37862,
-            "40": 1.36625,
-            "41": 1.9063,
-            "42": 1.38764,
-            "43": 1.37219,
-            "44": 1.37186,
-            "45": 1.38575,
-            "46": 1.3857,
-            "47": 1.37676,
-            "48": 1.39862,
-            "49": 1.3615,
-            "50": 1.35892
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp4_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp4_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index baf1fa52671..00000000000
--- a/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp4_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.98296,
-            "2": 10.98234,
-            "3": 10.98046,
-            "4": 10.96512,
-            "5": 10.99789,
-            "6": 11.00517,
-            "7": 10.98273,
-            "8": 10.97596,
-            "9": 10.9783,
-            "10": 10.9452,
-            "11": 10.99257,
-            "12": 10.96815,
-            "13": 10.9703,
-            "14": 10.98207,
-            "15": 10.85381,
-            "16": 10.85003,
-            "17": 10.80667,
-            "18": 10.82648,
-            "19": 10.81123,
-            "20": 10.62194,
-            "21": 10.56069,
-            "22": 10.32105,
-            "23": 10.59531,
-            "24": 10.32461,
-            "25": 10.23318,
-            "26": 10.33828,
-            "27": 10.34879,
-            "28": 10.32094,
-            "29": 10.33068,
-            "30": 9.8856,
-            "31": 9.42999,
-            "32": 10.05321,
-            "33": 10.0429,
-            "34": 9.6053,
-            "35": 9.64984,
-            "36": 9.52934,
-            "37": 9.76834,
-            "38": 9.48585,
-            "39": 9.87468,
-            "40": 9.30022,
-            "41": 9.44909,
-            "42": 9.52866,
-            "43": 9.01602,
-            "44": 9.12963,
-            "45": 8.96826,
-            "46": 9.03049,
-            "47": 9.45732,
-            "48": 9.02119,
-            "49": 8.56905,
-            "50": 9.10994
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 2992.0,
-            "2": 2911.0,
-            "3": 2981.0,
-            "4": 2784.0,
-            "5": 3153.0,
-            "6": 3292.0,
-            "7": 3123.0,
-            "8": 3104.0,
-            "9": 3123.0,
-            "10": 2796.0,
-            "11": 3497.0,
-            "12": 3305.0,
-            "13": 3271.0,
-            "14": 3414.0,
-            "15": 3082.0,
-            "16": 3257.0,
-            "17": 3088.0,
-            "18": 3113.0,
-            "19": 3283.0,
-            "20": 2980.0,
-            "21": 3045.0,
-            "22": 2623.0,
-            "23": 3281.0,
-            "24": 2774.0,
-            "25": 2745.0,
-            "26": 2827.0,
-            "27": 3106.0,
-            "28": 3227.0,
-            "29": 3118.0,
-            "30": 2695.0,
-            "31": 2326.0,
-            "32": 3058.0,
-            "33": 3138.0,
-            "34": 2755.0,
-            "35": 2931.0,
-            "36": 2947.0,
-            "37": 3169.0,
-            "38": 3016.0,
-            "39": 3187.0,
-            "40": 3076.0,
-            "41": 3043.0,
-            "42": 3245.0,
-            "43": 2813.0,
-            "44": 2934.0,
-            "45": 2868.0,
-            "46": 3015.0,
-            "47": 3294.0,
-            "48": 3327.0,
-            "49": 3253.0,
-            "50": 3403.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1917251584.0,
-            "2": 1917251584.0,
-            "3": 1917251584.0,
-            "4": 1917251584.0,
-            "5": 1917251584.0,
-            "6": 1917251584.0,
-            "7": 1917251584.0,
-            "8": 1917251584.0,
-            "9": 1917251584.0,
-            "10": 1917251584.0,
-            "11": 1917251584.0,
-            "12": 1917251584.0,
-            "13": 1917251584.0,
-            "14": 1917251584.0,
-            "15": 1917251584.0,
-            "16": 1917251584.0,
-            "17": 1917251584.0,
-            "18": 1917251584.0,
-            "19": 1917251584.0,
-            "20": 1917251584.0,
-            "21": 1917251584.0,
-            "22": 1917251584.0,
-            "23": 1917251584.0,
-            "24": 1917251584.0,
-            "25": 1917251584.0,
-            "26": 1917251584.0,
-            "27": 1917251584.0,
-            "28": 1917251584.0,
-            "29": 1917251584.0,
-            "30": 1917251584.0,
-            "31": 1917251584.0,
-            "32": 1917251584.0,
-            "33": 1917251584.0,
-            "34": 1917251584.0,
-            "35": 1917251584.0,
-            "36": 1917251584.0,
-            "37": 1917251584.0,
-            "38": 1917251584.0,
-            "39": 1917251584.0,
-            "40": 1917251584.0,
-            "41": 1917251584.0,
-            "42": 1917251584.0,
-            "43": 1917251584.0,
-            "44": 1917251584.0,
-            "45": 1917251584.0,
-            "46": 1917251584.0,
-            "47": 1917251584.0,
-            "48": 1917251584.0,
-            "49": 1917251584.0,
-            "50": 1917251584.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 2520653312.0,
-            "2": 2743788032.0,
-            "3": 2743788032.0,
-            "4": 2743788032.0,
-            "5": 2743788032.0,
-            "6": 2743788032.0,
-            "7": 2743788032.0,
-            "8": 2743788032.0,
-            "9": 2743788032.0,
-            "10": 2743788032.0,
-            "11": 2743788032.0,
-            "12": 2743788032.0,
-            "13": 2743788032.0,
-            "14": 2743788032.0,
-            "15": 2743788032.0,
-            "16": 2743788032.0,
-            "17": 2743788032.0,
-            "18": 2743788032.0,
-            "19": 2743788032.0,
-            "20": 2743788032.0,
-            "21": 2743788032.0,
-            "22": 2743788032.0,
-            "23": 2743788032.0,
-            "24": 2743788032.0,
-            "25": 2743788032.0,
-            "26": 2743788032.0,
-            "27": 2743788032.0,
-            "28": 2743788032.0,
-            "29": 2743788032.0,
-            "30": 2743788032.0,
-            "31": 2743788032.0,
-            "32": 2743788032.0,
-            "33": 2743788032.0,
-            "34": 2743788032.0,
-            "35": 2743788032.0,
-            "36": 2743788032.0,
-            "37": 2743788032.0,
-            "38": 2743788032.0,
-            "39": 2743788032.0,
-            "40": 2743788032.0,
-            "41": 2743788032.0,
-            "42": 2743788032.0,
-            "43": 2743788032.0,
-            "44": 2743788032.0,
-            "45": 2743788032.0,
-            "46": 2743788032.0,
-            "47": 2743788032.0,
-            "48": 2743788032.0,
-            "49": 2743788032.0,
-            "50": 2743788032.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 92.52278,
-            "2": 1.52203,
-            "3": 1.50103,
-            "4": 1.51627,
-            "5": 1.49943,
-            "6": 1.61325,
-            "7": 1.5622,
-            "8": 1.50668,
-            "9": 1.50122,
-            "10": 1.50749,
-            "11": 2.12764,
-            "12": 1.51111,
-            "13": 1.50973,
-            "14": 1.51712,
-            "15": 1.50952,
-            "16": 1.51343,
-            "17": 1.50742,
-            "18": 1.52017,
-            "19": 1.50622,
-            "20": 1.51648,
-            "21": 2.13229,
-            "22": 1.50789,
-            "23": 1.52087,
-            "24": 1.50668,
-            "25": 1.51534,
-            "26": 1.5016,
-            "27": 1.50737,
-            "28": 1.49873,
-            "29": 1.50715,
-            "30": 1.49941,
-            "31": 2.11492,
-            "32": 1.50348,
-            "33": 1.50106,
-            "34": 1.50093,
-            "35": 1.50813,
-            "36": 1.4988,
-            "37": 1.49847,
-            "38": 1.49777,
-            "39": 1.49937,
-            "40": 1.50456,
-            "41": 2.11318,
-            "42": 1.50605,
-            "43": 1.50721,
-            "44": 1.51813,
-            "45": 1.50211,
-            "46": 1.51633,
-            "47": 1.5019,
-            "48": 1.52386,
-            "49": 1.49987,
-            "50": 1.50829
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/hybrid/hybrid_static_inference_tp1_pp1_2B_cudagraphs/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/hybrid/hybrid_static_inference_tp1_pp1_2B_cudagraphs/golden_values_dev_dgx_h100.json
index ce2b36bb87e..6246418cd60 100644
--- a/tests/functional_tests/test_cases/hybrid/hybrid_static_inference_tp1_pp1_2B_cudagraphs/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/hybrid/hybrid_static_inference_tp1_pp1_2B_cudagraphs/golden_values_dev_dgx_h100.json
@@ -1 +1 @@
-{"1": {"input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.", "generated_text": " Then, when you're ready, go home and watch the movie again.", "generated_tokens": [6830, 1044, 2200, 1636, 6185, 11831, 1044, 1974, 4590, 1321, 9951, 1278, 16070, 2790, 1046], "tpot": [10.183162689208984, 0.016820641234517097, 0.01422195229679346, 0.014564319513738155, 0.014471391215920448, 0.014275263994932175, 0.01404259167611599, 0.014173919335007668, 0.014819711446762085, 0.014060895889997482, 0.014177439734339714, 0.014058719389140606, 0.01418211217969656, 0.01443132758140564, 0.014239359647035599], "latency": 10.410111263161525, "logprobs": [-9.485179901123047, -3.7365002632141113, -3.0747694969177246, -1.744485855102539, -0.29669833183288574, -1.4020814895629883, -2.432681083679199, -1.7664837837219238, -1.4741225242614746, -6.42724084854126, -0.8153547048568726, -1.7931451797485352, -3.650665044784546, -3.698770046234131, -1.608336091041565, -1.6549599170684814, -2.8460211753845215, -6.670064926147461, -0.06550002098083496, -1.2442623376846313, -6.04405403137207, -9.507080078125, -10.461563110351562, -1.5952650308609009, -4.6770920753479, -0.745125412940979, -2.1571977138519287, -0.013643701560795307, -0.03557091951370239, -3.090214252471924, -8.740396499633789, -1.5405625104904175, -5.852315902709961, -3.09045672416687, -3.9833602905273438, -3.7632288932800293, -2.444291591644287, -2.273496627807617, -0.4683297276496887, -1.020460605621338, -5.3351545333862305, -8.249643325805664, -0.01584932766854763, -2.8506340980529785, -1.251563549041748, -3.7786898612976074, -1.0169645547866821, -0.002681709360331297, -3.0970988273620605, -11.113213539123535, -3.8127267360687256, -2.329777479171753, -4.672338485717773, -0.09791824221611023, -0.06286392360925674, -1.3320130109786987, -2.1521241664886475, -4.375304222106934, -0.43500134348869324, -3.9912281036376953, -0.5796594023704529, -0.26420092582702637, -2.811892509460449, -13.508228302001953, -0.10134205967187881, -3.5013256072998047, -0.8109210729598999, -5.298563480377197, -0.3272246718406677, -2.333836555480957, -0.5356347560882568, -1.288033366203308, -4.895185947418213, -15.548847198486328, -4.934615612030029, -0.22137367725372314, -6.583427429199219, -0.9010066986083984, -2.237170696258545, -1.8670732975006104, -0.20016230642795563, -5.921288013458252, -0.005614227149635553, -7.52609920501709, -3.284144878387451, -3.6920413970947266, -2.0169901847839355, -2.9249799251556396, -1.4761977195739746, -2.442007064819336, -1.2341548204421997, -1.9563146829605103, -1.9621782302856445, -0.2520584762096405, -2.0551767349243164, -1.0418215990066528, -1.1905516386032104, -2.827965021133423, -1.693838357925415, -2.2978851795196533, -1.5457404851913452, -1.2437852621078491]}}
\ No newline at end of file
+{"1": {"input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.", "generated_text": " Then, when you're ready, go home and watch the movie again.", "generated_tokens": [6830, 1044, 2200, 1636, 6185, 11831, 1044, 1974, 4590, 1321, 9951, 1278, 16070, 2790, 1046], "tpot": [10.642894744873047, 0.007773248013108969, 0.006957856006920338, 0.006698175799101591, 0.006544159725308418, 0.0065354881808161736, 0.006621247623115778, 0.006504479795694351, 0.006506048142910004, 0.006563680246472359, 0.006534495856612921, 0.006482687778770924, 0.006613088306039572, 0.006460864096879959, 0.00653372798115015], "latency": 10.747625022020657, "logprobs": [-9.485179901123047, -3.7365002632141113, -3.0747694969177246, -1.744485855102539, -0.29669833183288574, -1.4020814895629883, -2.432681083679199, -1.7664837837219238, -1.4741225242614746, -6.42724084854126, -0.8153547048568726, -1.7931451797485352, -3.650665044784546, -3.698770046234131, -1.608336091041565, -1.6549599170684814, -2.8460211753845215, -6.670064926147461, -0.06550002098083496, -1.2442623376846313, -6.04405403137207, -9.507080078125, -10.461563110351562, -1.5952650308609009, -4.6770920753479, -0.745125412940979, -2.1571977138519287, -0.013643701560795307, -0.03557091951370239, -3.090214252471924, -8.740396499633789, -1.5405625104904175, -5.852315902709961, -3.09045672416687, -3.9833602905273438, -3.7632288932800293, -2.444291591644287, -2.273496627807617, -0.4683297276496887, -1.020460605621338, -5.3351545333862305, -8.249643325805664, -0.01584932766854763, -2.8506340980529785, -1.251563549041748, -3.7786898612976074, -1.0169645547866821, -0.002681709360331297, -3.0970988273620605, -11.113213539123535, -3.8127267360687256, -2.329777479171753, -4.672338485717773, -0.09791824221611023, -0.06286392360925674, -1.3320130109786987, -2.1521241664886475, -4.375304222106934, -0.43500134348869324, -3.9912281036376953, -0.5796594023704529, -0.26420092582702637, -2.811892509460449, -13.508228302001953, -0.10134205967187881, -3.5013256072998047, -0.8109210729598999, -5.298563480377197, -0.3272246718406677, -2.333836555480957, -0.5356347560882568, -1.288033366203308, -4.895185947418213, -15.548847198486328, -4.934615612030029, -0.22137367725372314, -6.583427429199219, -0.9010066986083984, -2.237170696258545, -1.8670732975006104, -0.20016230642795563, -5.921288013458252, -0.005614227149635553, -7.52609920501709, -3.284144878387451, -3.6920413970947266, -2.0169901847839355, -2.9249799251556396, -1.4761977195739746, -2.442007064819336, -1.2341548204421997, -1.9563146829605103, -1.9621782302856445, -0.2520584762096405, -2.0551767349243164, -1.0418215990066528, -1.1905516386032104, -2.827965021133423, -1.693838357925415, -2.2978851795196533, -1.5457404851913452, -1.2437852621078491]}}
diff --git a/tests/functional_tests/test_cases/hybrid/hybrid_static_inference_tp1_pp1_2B_cudagraphs/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/hybrid/hybrid_static_inference_tp1_pp1_2B_cudagraphs/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index c3fc59367ba..00000000000
--- a/tests/functional_tests/test_cases/hybrid/hybrid_static_inference_tp1_pp1_2B_cudagraphs/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1 +0,0 @@
-{"1": {"input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.", "generated_text": " Then, when you're ready, go home and watch the movie again.", "generated_tokens": [6830, 1044, 2200, 1636, 6185, 11831, 1044, 1974, 4590, 1321, 9951, 1278, 16070, 2790, 1046], "tpot": [13.922733306884766, 0.007188416086137295, 0.0065686400048434734, 0.00621231971308589, 0.00605961587280035, 0.0059919035993516445, 0.006008191965520382, 0.0059331199154257774, 0.0058839041739702225, 0.005978687666356564, 0.005963231902569532, 0.005926112178713083, 0.00591459171846509, 0.0059004477225244045, 0.00604671984910965], "latency": 14.020416846993612, "logprobs": [-9.485179901123047, -3.7365002632141113, -3.0747694969177246, -1.744485855102539, -0.29669833183288574, -1.4020814895629883, -2.432681083679199, -1.7664837837219238, -1.4741225242614746, -6.42724084854126, -0.8153547048568726, -1.7931451797485352, -3.650665044784546, -3.698770046234131, -1.608336091041565, -1.6549599170684814, -2.8460211753845215, -6.670064926147461, -0.06550002098083496, -1.2442623376846313, -6.04405403137207, -9.507080078125, -10.461563110351562, -1.5952650308609009, -4.6770920753479, -0.745125412940979, -2.1571977138519287, -0.013643701560795307, -0.03557091951370239, -3.090214252471924, -8.740396499633789, -1.5405625104904175, -5.852315902709961, -3.09045672416687, -3.9833602905273438, -3.7632288932800293, -2.444291591644287, -2.273496627807617, -0.4683297276496887, -1.020460605621338, -5.3351545333862305, -8.249643325805664, -0.01584932766854763, -2.8506340980529785, -1.251563549041748, -3.7786898612976074, -1.0169645547866821, -0.002681709360331297, -3.0970988273620605, -11.113213539123535, -3.8127267360687256, -2.329777479171753, -4.672338485717773, -0.09791824221611023, -0.06286392360925674, -1.3320130109786987, -2.1521241664886475, -4.375304222106934, -0.43500134348869324, -3.9912281036376953, -0.5796594023704529, -0.26420092582702637, -2.811892509460449, -13.508228302001953, -0.10134205967187881, -3.5013256072998047, -0.8109210729598999, -5.298563480377197, -0.3272246718406677, -2.333836555480957, -0.5356347560882568, -1.288033366203308, -4.895185947418213, -15.548847198486328, -4.934615612030029, -0.22137367725372314, -6.583427429199219, -0.9010066986083984, -2.237170696258545, -1.8670732975006104, -0.20016230642795563, -5.921288013458252, -0.005614227149635553, -7.52609920501709, -3.284144878387451, -3.6920413970947266, -2.0169901847839355, -2.9249799251556396, -1.4761977195739746, -2.442007064819336, -1.2341548204421997, -1.9563146829605103, -1.9621782302856445, -0.2520584762096405, -2.0551767349243164, -1.0418215990066528, -1.1905516386032104, -2.827965021133423, -1.693838357925415, -2.2978851795196533, -1.5457404851913452, -1.2437852621078491]}}
diff --git a/tests/functional_tests/test_cases/hybrid/hybrid_static_inference_tp1_pp1_2B_cudagraphs/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/hybrid/hybrid_static_inference_tp1_pp1_2B_cudagraphs/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index 6246418cd60..00000000000
--- a/tests/functional_tests/test_cases/hybrid/hybrid_static_inference_tp1_pp1_2B_cudagraphs/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1 +0,0 @@
-{"1": {"input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.", "generated_text": " Then, when you're ready, go home and watch the movie again.", "generated_tokens": [6830, 1044, 2200, 1636, 6185, 11831, 1044, 1974, 4590, 1321, 9951, 1278, 16070, 2790, 1046], "tpot": [10.642894744873047, 0.007773248013108969, 0.006957856006920338, 0.006698175799101591, 0.006544159725308418, 0.0065354881808161736, 0.006621247623115778, 0.006504479795694351, 0.006506048142910004, 0.006563680246472359, 0.006534495856612921, 0.006482687778770924, 0.006613088306039572, 0.006460864096879959, 0.00653372798115015], "latency": 10.747625022020657, "logprobs": [-9.485179901123047, -3.7365002632141113, -3.0747694969177246, -1.744485855102539, -0.29669833183288574, -1.4020814895629883, -2.432681083679199, -1.7664837837219238, -1.4741225242614746, -6.42724084854126, -0.8153547048568726, -1.7931451797485352, -3.650665044784546, -3.698770046234131, -1.608336091041565, -1.6549599170684814, -2.8460211753845215, -6.670064926147461, -0.06550002098083496, -1.2442623376846313, -6.04405403137207, -9.507080078125, -10.461563110351562, -1.5952650308609009, -4.6770920753479, -0.745125412940979, -2.1571977138519287, -0.013643701560795307, -0.03557091951370239, -3.090214252471924, -8.740396499633789, -1.5405625104904175, -5.852315902709961, -3.09045672416687, -3.9833602905273438, -3.7632288932800293, -2.444291591644287, -2.273496627807617, -0.4683297276496887, -1.020460605621338, -5.3351545333862305, -8.249643325805664, -0.01584932766854763, -2.8506340980529785, -1.251563549041748, -3.7786898612976074, -1.0169645547866821, -0.002681709360331297, -3.0970988273620605, -11.113213539123535, -3.8127267360687256, -2.329777479171753, -4.672338485717773, -0.09791824221611023, -0.06286392360925674, -1.3320130109786987, -2.1521241664886475, -4.375304222106934, -0.43500134348869324, -3.9912281036376953, -0.5796594023704529, -0.26420092582702637, -2.811892509460449, -13.508228302001953, -0.10134205967187881, -3.5013256072998047, -0.8109210729598999, -5.298563480377197, -0.3272246718406677, -2.333836555480957, -0.5356347560882568, -1.288033366203308, -4.895185947418213, -15.548847198486328, -4.934615612030029, -0.22137367725372314, -6.583427429199219, -0.9010066986083984, -2.237170696258545, -1.8670732975006104, -0.20016230642795563, -5.921288013458252, -0.005614227149635553, -7.52609920501709, -3.284144878387451, -3.6920413970947266, -2.0169901847839355, -2.9249799251556396, -1.4761977195739746, -2.442007064819336, -1.2341548204421997, -1.9563146829605103, -1.9621782302856445, -0.2520584762096405, -2.0551767349243164, -1.0418215990066528, -1.1905516386032104, -2.827965021133423, -1.693838357925415, -2.2978851795196533, -1.5457404851913452, -1.2437852621078491]}}
diff --git a/tests/functional_tests/test_cases/hybrid/hybrid_static_inference_tp1_pp1_2B_logitsmatch/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/hybrid/hybrid_static_inference_tp1_pp1_2B_logitsmatch/golden_values_dev_dgx_h100.json
index 4a80c4c0b51..d60a800e143 100644
--- a/tests/functional_tests/test_cases/hybrid/hybrid_static_inference_tp1_pp1_2B_logitsmatch/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/hybrid/hybrid_static_inference_tp1_pp1_2B_logitsmatch/golden_values_dev_dgx_h100.json
@@ -1 +1 @@
-{"0": {"input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.", "generated_text": " Then, when you're ready, go home and watch the movie again.", "generated_tokens": [6830, 1044, 2200, 1636, 6185, 11831, 1044, 1974, 4590, 1321, 9951, 1278, 16070, 2790, 1046], "tpot": [24.208621978759766, 1.0423665046691895, 0.051222365349531174, 0.050853632390499115, 0.05333216115832329, 0.05053724721074104, 0.050583455711603165, 0.05053670331835747, 0.05055004730820656, 0.051941920071840286, 0.049437277019023895, 0.04941171407699585, 0.04994342476129532, 0.04925638437271118, 0.049597952514886856], "latency": 26.002709535881877, "logprobs": [-9.485179901123047, -3.7365002632141113, -3.0747694969177246, -1.744485855102539, -0.29669833183288574, -1.4020814895629883, -2.432681083679199, -1.7664837837219238, -1.4741225242614746, -6.42724084854126, -0.8153547048568726, -1.7931451797485352, -3.650665044784546, -3.698770046234131, -1.608336091041565, -1.6549599170684814, -2.8460211753845215, -6.670064926147461, -0.06550002098083496, -1.2442623376846313, -6.04405403137207, -9.507080078125, -10.461563110351562, -1.5952650308609009, -4.6770920753479, -0.745125412940979, -2.1571977138519287, -0.013643701560795307, -0.03557091951370239, -3.090214252471924, -8.740396499633789, -1.5405625104904175, -5.852315902709961, -3.09045672416687, -3.9833602905273438, -3.7632288932800293, -2.444291591644287, -2.273496627807617, -0.4683297276496887, -1.020460605621338, -5.3351545333862305, -8.249643325805664, -0.01584932766854763, -2.8506340980529785, -1.251563549041748, -3.7786898612976074, -1.0169645547866821, -0.002681709360331297, -3.0970988273620605, -11.113213539123535, -3.8127267360687256, -2.329777479171753, -4.672338485717773, -0.09791824221611023, -0.06286392360925674, -1.3320130109786987, -2.1521241664886475, -4.375304222106934, -0.43500134348869324, -3.9912281036376953, -0.5796594023704529, -0.26420092582702637, -2.811892509460449, -13.508228302001953, -0.10134205967187881, -3.5013256072998047, -0.8109210729598999, -5.298563480377197, -0.3272246718406677, -2.333836555480957, -0.5356347560882568, -1.288033366203308, -4.895185947418213, -15.548847198486328, -4.934615612030029, -0.22137367725372314, -6.583427429199219, -0.9010066986083984, -2.237170696258545, -1.8670732975006104, -0.20016230642795563, -5.921288013458252, -0.005614227149635553, -7.52609920501709, -3.284144878387451, -3.6920413970947266, -2.0169901847839355, -2.9249799251556396, -1.4761977195739746, -2.442007064819336, -1.2341548204421997, -1.9563146829605103, -1.9621782302856445, -0.2520584762096405, -2.0551767349243164, -1.0418215990066528, -1.1905516386032104, -2.827965021133423, -1.693838357925415, -2.2978851795196533, -1.5457404851913452, -1.2437852621078491]}}
\ No newline at end of file
+{"0": {"input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.", "generated_text": " Then, when you're ready, go home and watch the movie again.", "generated_tokens": [6830, 1044, 2200, 1636, 6185, 11831, 1044, 1974, 4590, 1321, 9951, 1278, 16070, 2790, 1046], "tpot": [23.324947357177734, 0.9724376797676086, 0.030179519206285477, 0.032699745148420334, 0.030718816444277763, 0.029437121003866196, 0.03135587275028229, 0.02818169631063938, 0.02772153727710247, 0.028379999101161957, 0.029912160709500313, 0.02787875197827816, 0.027760416269302368, 0.030058398842811584, 0.0278977919369936], "latency": 24.7525888918899, "logprobs": [-9.485179901123047, -3.7365002632141113, -3.0747694969177246, -1.744485855102539, -0.29669833183288574, -1.4020814895629883, -2.432681083679199, -1.7664837837219238, -1.4741225242614746, -6.42724084854126, -0.8153547048568726, -1.7931451797485352, -3.650665044784546, -3.698770046234131, -1.608336091041565, -1.6549599170684814, -2.8460211753845215, -6.670064926147461, -0.06550002098083496, -1.2442623376846313, -6.04405403137207, -9.507080078125, -10.461563110351562, -1.5952650308609009, -4.6770920753479, -0.745125412940979, -2.1571977138519287, -0.013643701560795307, -0.03557091951370239, -3.090214252471924, -8.740396499633789, -1.5405625104904175, -5.852315902709961, -3.09045672416687, -3.9833602905273438, -3.7632288932800293, -2.444291591644287, -2.273496627807617, -0.4683297276496887, -1.020460605621338, -5.3351545333862305, -8.249643325805664, -0.01584932766854763, -2.8506340980529785, -1.251563549041748, -3.7786898612976074, -1.0169645547866821, -0.002681709360331297, -3.0970988273620605, -11.113213539123535, -3.8127267360687256, -2.329777479171753, -4.672338485717773, -0.09791824221611023, -0.06286392360925674, -1.3320130109786987, -2.1521241664886475, -4.375304222106934, -0.43500134348869324, -3.9912281036376953, -0.5796594023704529, -0.26420092582702637, -2.811892509460449, -13.508228302001953, -0.10134205967187881, -3.5013256072998047, -0.8109210729598999, -5.298563480377197, -0.3272246718406677, -2.333836555480957, -0.5356347560882568, -1.288033366203308, -4.895185947418213, -15.548847198486328, -4.934615612030029, -0.22137367725372314, -6.583427429199219, -0.9010066986083984, -2.237170696258545, -1.8670732975006104, -0.20016230642795563, -5.921288013458252, -0.005614227149635553, -7.52609920501709, -3.284144878387451, -3.6920413970947266, -2.0169901847839355, -2.9249799251556396, -1.4761977195739746, -2.442007064819336, -1.2341548204421997, -1.9563146829605103, -1.9621782302856445, -0.2520584762096405, -2.0551767349243164, -1.0418215990066528, -1.1905516386032104, -2.827965021133423, -1.693838357925415, -2.2978851795196533, -1.5457404851913452, -1.2437852621078491]}}
diff --git a/tests/functional_tests/test_cases/hybrid/hybrid_static_inference_tp1_pp1_2B_logitsmatch/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/hybrid/hybrid_static_inference_tp1_pp1_2B_logitsmatch/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 7e77a430f74..00000000000
--- a/tests/functional_tests/test_cases/hybrid/hybrid_static_inference_tp1_pp1_2B_logitsmatch/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1 +0,0 @@
-{"0": {"input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.", "generated_text": " Then, when you're ready, go home and watch the movie again.", "generated_tokens": [6830, 1044, 2200, 1636, 6185, 11831, 1044, 1974, 4590, 1321, 9951, 1278, 16070, 2790, 1046], "tpot": [21.894718170166016, 0.8281252980232239, 0.026725759729743004, 0.025482559576630592, 0.02529049664735794, 0.02504662238061428, 0.02536112070083618, 0.025379488244652748, 0.025550976395606995, 0.02537020854651928, 0.02508240006864071, 0.024979550391435623, 0.02515292726457119, 0.02680159918963909, 0.025211617350578308], "latency": 23.120823610574007, "logprobs": [-9.485179901123047, -3.7365002632141113, -3.0747694969177246, -1.744485855102539, -0.29669833183288574, -1.4020814895629883, -2.432681083679199, -1.7664837837219238, -1.4741225242614746, -6.42724084854126, -0.8153547048568726, -1.7931451797485352, -3.650665044784546, -3.698770046234131, -1.608336091041565, -1.6549599170684814, -2.8460211753845215, -6.670064926147461, -0.06550002098083496, -1.2442623376846313, -6.04405403137207, -9.507080078125, -10.461563110351562, -1.5952650308609009, -4.6770920753479, -0.745125412940979, -2.1571977138519287, -0.013643701560795307, -0.03557091951370239, -3.090214252471924, -8.740396499633789, -1.5405625104904175, -5.852315902709961, -3.09045672416687, -3.9833602905273438, -3.7632288932800293, -2.444291591644287, -2.273496627807617, -0.4683297276496887, -1.020460605621338, -5.3351545333862305, -8.249643325805664, -0.01584932766854763, -2.8506340980529785, -1.251563549041748, -3.7786898612976074, -1.0169645547866821, -0.002681709360331297, -3.0970988273620605, -11.113213539123535, -3.8127267360687256, -2.329777479171753, -4.672338485717773, -0.09791824221611023, -0.06286392360925674, -1.3320130109786987, -2.1521241664886475, -4.375304222106934, -0.43500134348869324, -3.9912281036376953, -0.5796594023704529, -0.26420092582702637, -2.811892509460449, -13.508228302001953, -0.10134205967187881, -3.5013256072998047, -0.8109210729598999, -5.298563480377197, -0.3272246718406677, -2.333836555480957, -0.5356347560882568, -1.288033366203308, -4.895185947418213, -15.548847198486328, -4.934615612030029, -0.22137367725372314, -6.583427429199219, -0.9010066986083984, -2.237170696258545, -1.8670732975006104, -0.20016230642795563, -5.921288013458252, -0.005614227149635553, -7.52609920501709, -3.284144878387451, -3.6920413970947266, -2.0169901847839355, -2.9249799251556396, -1.4761977195739746, -2.442007064819336, -1.2341548204421997, -1.9563146829605103, -1.9621782302856445, -0.2520584762096405, -2.0551767349243164, -1.0418215990066528, -1.1905516386032104, -2.827965021133423, -1.693838357925415, -2.2978851795196533, -1.5457404851913452, -1.2437852621078491]}}
diff --git a/tests/functional_tests/test_cases/hybrid/hybrid_static_inference_tp1_pp1_2B_logitsmatch/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/hybrid/hybrid_static_inference_tp1_pp1_2B_logitsmatch/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index d60a800e143..00000000000
--- a/tests/functional_tests/test_cases/hybrid/hybrid_static_inference_tp1_pp1_2B_logitsmatch/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1 +0,0 @@
-{"0": {"input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.", "generated_text": " Then, when you're ready, go home and watch the movie again.", "generated_tokens": [6830, 1044, 2200, 1636, 6185, 11831, 1044, 1974, 4590, 1321, 9951, 1278, 16070, 2790, 1046], "tpot": [23.324947357177734, 0.9724376797676086, 0.030179519206285477, 0.032699745148420334, 0.030718816444277763, 0.029437121003866196, 0.03135587275028229, 0.02818169631063938, 0.02772153727710247, 0.028379999101161957, 0.029912160709500313, 0.02787875197827816, 0.027760416269302368, 0.030058398842811584, 0.0278977919369936], "latency": 24.7525888918899, "logprobs": [-9.485179901123047, -3.7365002632141113, -3.0747694969177246, -1.744485855102539, -0.29669833183288574, -1.4020814895629883, -2.432681083679199, -1.7664837837219238, -1.4741225242614746, -6.42724084854126, -0.8153547048568726, -1.7931451797485352, -3.650665044784546, -3.698770046234131, -1.608336091041565, -1.6549599170684814, -2.8460211753845215, -6.670064926147461, -0.06550002098083496, -1.2442623376846313, -6.04405403137207, -9.507080078125, -10.461563110351562, -1.5952650308609009, -4.6770920753479, -0.745125412940979, -2.1571977138519287, -0.013643701560795307, -0.03557091951370239, -3.090214252471924, -8.740396499633789, -1.5405625104904175, -5.852315902709961, -3.09045672416687, -3.9833602905273438, -3.7632288932800293, -2.444291591644287, -2.273496627807617, -0.4683297276496887, -1.020460605621338, -5.3351545333862305, -8.249643325805664, -0.01584932766854763, -2.8506340980529785, -1.251563549041748, -3.7786898612976074, -1.0169645547866821, -0.002681709360331297, -3.0970988273620605, -11.113213539123535, -3.8127267360687256, -2.329777479171753, -4.672338485717773, -0.09791824221611023, -0.06286392360925674, -1.3320130109786987, -2.1521241664886475, -4.375304222106934, -0.43500134348869324, -3.9912281036376953, -0.5796594023704529, -0.26420092582702637, -2.811892509460449, -13.508228302001953, -0.10134205967187881, -3.5013256072998047, -0.8109210729598999, -5.298563480377197, -0.3272246718406677, -2.333836555480957, -0.5356347560882568, -1.288033366203308, -4.895185947418213, -15.548847198486328, -4.934615612030029, -0.22137367725372314, -6.583427429199219, -0.9010066986083984, -2.237170696258545, -1.8670732975006104, -0.20016230642795563, -5.921288013458252, -0.005614227149635553, -7.52609920501709, -3.284144878387451, -3.6920413970947266, -2.0169901847839355, -2.9249799251556396, -1.4761977195739746, -2.442007064819336, -1.2341548204421997, -1.9563146829605103, -1.9621782302856445, -0.2520584762096405, -2.0551767349243164, -1.0418215990066528, -1.1905516386032104, -2.827965021133423, -1.693838357925415, -2.2978851795196533, -1.5457404851913452, -1.2437852621078491]}}
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgx_h100.json
index fd720368e7c..e4e01388a15 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgx_h100.json
@@ -6,54 +6,54 @@
         "values": {
             "1": 10.7999,
             "2": 10.80046,
-            "3": 10.80856,
-            "4": 10.78236,
-            "5": 10.82529,
-            "6": 10.83582,
-            "7": 10.81653,
-            "8": 10.81185,
-            "9": 10.81091,
-            "10": 10.77387,
-            "11": 10.85526,
-            "12": 10.82697,
-            "13": 10.85098,
-            "14": 10.85469,
-            "15": 10.7827,
-            "16": 10.77374,
-            "17": 10.7504,
-            "18": 10.78334,
-            "19": 10.75924,
-            "20": 10.69944,
-            "21": 10.67297,
-            "22": 10.51442,
-            "23": 10.68096,
-            "24": 10.57187,
-            "25": 10.51823,
-            "26": 10.57662,
-            "27": 10.59187,
+            "3": 10.8089,
+            "4": 10.78245,
+            "5": 10.82504,
+            "6": 10.83657,
+            "7": 10.81628,
+            "8": 10.81184,
+            "9": 10.8108,
+            "10": 10.7742,
+            "11": 10.85482,
+            "12": 10.82663,
+            "13": 10.85131,
+            "14": 10.85461,
+            "15": 10.78253,
+            "16": 10.77375,
+            "17": 10.74989,
+            "18": 10.78346,
+            "19": 10.75877,
+            "20": 10.69982,
+            "21": 10.67287,
+            "22": 10.5142,
+            "23": 10.68053,
+            "24": 10.57164,
+            "25": 10.51814,
+            "26": 10.57591,
+            "27": 10.59136,
             "28": 10.55398,
-            "29": 10.57092,
-            "30": 10.36453,
-            "31": 10.10911,
-            "32": 10.45339,
-            "33": 10.43673,
-            "34": 10.19971,
-            "35": 10.25406,
-            "36": 10.23349,
-            "37": 10.35406,
-            "38": 10.20448,
-            "39": 10.39919,
-            "40": 10.10198,
-            "41": 10.12753,
-            "42": 10.21106,
-            "43": 9.83709,
-            "44": 9.96212,
-            "45": 9.84265,
-            "46": 9.80647,
-            "47": 10.14286,
-            "48": 9.86668,
-            "49": 9.5387,
-            "50": 9.92563
+            "29": 10.57104,
+            "30": 10.36425,
+            "31": 10.10945,
+            "32": 10.45329,
+            "33": 10.43693,
+            "34": 10.20011,
+            "35": 10.25443,
+            "36": 10.23318,
+            "37": 10.3536,
+            "38": 10.20421,
+            "39": 10.3993,
+            "40": 10.10241,
+            "41": 10.12765,
+            "42": 10.21115,
+            "43": 9.83746,
+            "44": 9.96186,
+            "45": 9.84266,
+            "46": 9.80686,
+            "47": 10.14266,
+            "48": 9.86672,
+            "49": 9.53822,
+            "50": 9.92595
         }
     },
     "num-zeros": {
@@ -61,56 +61,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 4859.0,
-            "2": 4958.0,
-            "3": 5062.0,
-            "4": 4978.0,
-            "5": 5447.0,
-            "6": 5701.0,
-            "7": 5288.0,
-            "8": 5091.0,
-            "9": 5455.0,
-            "10": 4456.0,
-            "11": 5940.0,
-            "12": 5333.0,
-            "13": 5833.0,
-            "14": 5618.0,
-            "15": 5332.0,
-            "16": 5494.0,
-            "17": 5290.0,
-            "18": 5259.0,
-            "19": 5322.0,
-            "20": 4889.0,
-            "21": 5334.0,
-            "22": 4823.0,
-            "23": 5689.0,
-            "24": 5082.0,
-            "25": 4963.0,
-            "26": 5289.0,
-            "27": 5273.0,
-            "28": 5740.0,
-            "29": 6004.0,
-            "30": 5295.0,
-            "31": 4876.0,
-            "32": 5709.0,
-            "33": 6098.0,
-            "34": 5165.0,
-            "35": 5500.0,
-            "36": 5505.0,
-            "37": 6376.0,
-            "38": 5826.0,
-            "39": 6773.0,
-            "40": 5824.0,
-            "41": 5809.0,
-            "42": 6386.0,
-            "43": 5747.0,
-            "44": 5860.0,
-            "45": 5732.0,
-            "46": 5948.0,
-            "47": 6430.0,
-            "48": 6500.0,
+            "1": 4752.0,
+            "2": 5040.0,
+            "3": 5112.0,
+            "4": 5072.0,
+            "5": 5472.0,
+            "6": 5619.0,
+            "7": 5255.0,
+            "8": 5065.0,
+            "9": 5483.0,
+            "10": 4607.0,
+            "11": 5862.0,
+            "12": 5377.0,
+            "13": 5783.0,
+            "14": 5830.0,
+            "15": 5249.0,
+            "16": 5346.0,
+            "17": 5291.0,
+            "18": 5277.0,
+            "19": 5352.0,
+            "20": 4942.0,
+            "21": 5465.0,
+            "22": 4878.0,
+            "23": 5807.0,
+            "24": 5145.0,
+            "25": 4873.0,
+            "26": 5380.0,
+            "27": 5479.0,
+            "28": 5739.0,
+            "29": 5950.0,
+            "30": 5363.0,
+            "31": 4730.0,
+            "32": 5732.0,
+            "33": 5963.0,
+            "34": 5261.0,
+            "35": 5660.0,
+            "36": 5422.0,
+            "37": 6362.0,
+            "38": 6114.0,
+            "39": 6803.0,
+            "40": 5731.0,
+            "41": 5808.0,
+            "42": 6485.0,
+            "43": 5742.0,
+            "44": 5843.0,
+            "45": 5876.0,
+            "46": 6024.0,
+            "47": 6554.0,
+            "48": 6354.0,
             "49": 6497.0,
-            "50": 6719.0
+            "50": 6526.0
         }
     },
     "mem-allocated-bytes": {
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 1145716736.0,
-            "2": 1145714688.0,
-            "3": 1145715200.0,
-            "4": 1145714176.0,
-            "5": 1146209792.0,
-            "6": 1146210816.0,
-            "7": 1145717248.0,
-            "8": 1146209280.0,
-            "9": 1145714688.0,
-            "10": 1146214912.0,
-            "11": 1146209792.0,
-            "12": 1145714176.0,
-            "13": 1145713152.0,
-            "14": 1146209280.0,
-            "15": 1145713152.0,
-            "16": 1146210816.0,
-            "17": 1145713664.0,
-            "18": 1146210304.0,
-            "19": 1145714176.0,
-            "20": 1145715200.0,
-            "21": 1146210304.0,
-            "22": 1145715712.0,
-            "23": 1145715712.0,
-            "24": 1145713152.0,
-            "25": 1145712128.0,
-            "26": 1145715200.0,
-            "27": 1146210304.0,
-            "28": 1145713664.0,
-            "29": 1145711104.0,
-            "30": 1145714688.0,
-            "31": 1146213376.0,
-            "32": 1145713664.0,
-            "33": 1145714688.0,
-            "34": 1145715200.0,
-            "35": 1146212864.0,
-            "36": 1145713152.0,
-            "37": 1145712128.0,
-            "38": 1146207744.0,
-            "39": 1145715200.0,
-            "40": 1146210816.0,
-            "41": 1145714688.0,
-            "42": 1145712128.0,
-            "43": 1145715712.0,
-            "44": 1145717760.0,
-            "45": 1146210304.0,
-            "46": 1146214400.0,
-            "47": 1145714688.0,
-            "48": 1145717760.0,
-            "49": 1145719296.0,
-            "50": 1145716224.0
+            "1": 1144115200.0,
+            "2": 1144113152.0,
+            "3": 1144113664.0,
+            "4": 1144112640.0,
+            "5": 1144113664.0,
+            "6": 1144113152.0,
+            "7": 1144115200.0,
+            "8": 1144112640.0,
+            "9": 1144113152.0,
+            "10": 1144118272.0,
+            "11": 1144112640.0,
+            "12": 1144112128.0,
+            "13": 1144110592.0,
+            "14": 1144112640.0,
+            "15": 1144111616.0,
+            "16": 1144112640.0,
+            "17": 1144112128.0,
+            "18": 1144113152.0,
+            "19": 1144112640.0,
+            "20": 1144113664.0,
+            "21": 1144113152.0,
+            "22": 1144114176.0,
+            "23": 1144113664.0,
+            "24": 1144111616.0,
+            "25": 1144110592.0,
+            "26": 1144113664.0,
+            "27": 1144113664.0,
+            "28": 1144112128.0,
+            "29": 1144110080.0,
+            "30": 1144113152.0,
+            "31": 1144116224.0,
+            "32": 1144112128.0,
+            "33": 1144113152.0,
+            "34": 1144113664.0,
+            "35": 1144115712.0,
+            "36": 1144111616.0,
+            "37": 1144111104.0,
+            "38": 1144110592.0,
+            "39": 1144113664.0,
+            "40": 1144113664.0,
+            "41": 1144114176.0,
+            "42": 1144109056.0,
+            "43": 1144114176.0,
+            "44": 1144115200.0,
+            "45": 1144113152.0,
+            "46": 1144117760.0,
+            "47": 1144113152.0,
+            "48": 1144115712.0,
+            "49": 1144117760.0,
+            "50": 1144114176.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -176,55 +176,55 @@
         "step_interval": 1,
         "values": {
             "1": 1593775104.0,
-            "2": 2051463168.0,
-            "3": 2052978176.0,
-            "4": 2052978176.0,
-            "5": 2052978176.0,
-            "6": 2053324288.0,
-            "7": 2053986816.0,
-            "8": 2053986816.0,
-            "9": 2057060864.0,
-            "10": 2057060864.0,
-            "11": 2057060864.0,
-            "12": 2057060864.0,
-            "13": 2057060864.0,
-            "14": 2057060864.0,
-            "15": 2057060864.0,
-            "16": 2057060864.0,
-            "17": 2057060864.0,
-            "18": 2057060864.0,
-            "19": 2057060864.0,
-            "20": 2057060864.0,
-            "21": 2057060864.0,
-            "22": 2057060864.0,
-            "23": 2057060864.0,
-            "24": 2057060864.0,
-            "25": 2057060864.0,
-            "26": 2057060864.0,
-            "27": 2057060864.0,
-            "28": 2057060864.0,
-            "29": 2057060864.0,
-            "30": 2057060864.0,
-            "31": 2057060864.0,
-            "32": 2057060864.0,
-            "33": 2057060864.0,
-            "34": 2057060864.0,
-            "35": 2057060864.0,
-            "36": 2057060864.0,
-            "37": 2057060864.0,
-            "38": 2057060864.0,
-            "39": 2057060864.0,
-            "40": 2057060864.0,
-            "41": 2057060864.0,
-            "42": 2057060864.0,
-            "43": 2057060864.0,
-            "44": 2057060864.0,
-            "45": 2057060864.0,
-            "46": 2057060864.0,
-            "47": 2057060864.0,
-            "48": 2057060864.0,
-            "49": 2057060864.0,
-            "50": 2057060864.0
+            "2": 2049587200.0,
+            "3": 2050487808.0,
+            "4": 2050487808.0,
+            "5": 2050487808.0,
+            "6": 2051877376.0,
+            "7": 2052037632.0,
+            "8": 2052037632.0,
+            "9": 2053219840.0,
+            "10": 2055123968.0,
+            "11": 2055123968.0,
+            "12": 2055123968.0,
+            "13": 2055123968.0,
+            "14": 2055123968.0,
+            "15": 2055123968.0,
+            "16": 2055123968.0,
+            "17": 2055123968.0,
+            "18": 2055123968.0,
+            "19": 2055123968.0,
+            "20": 2055123968.0,
+            "21": 2055123968.0,
+            "22": 2055123968.0,
+            "23": 2055123968.0,
+            "24": 2055123968.0,
+            "25": 2055123968.0,
+            "26": 2055123968.0,
+            "27": 2055123968.0,
+            "28": 2055123968.0,
+            "29": 2055123968.0,
+            "30": 2055123968.0,
+            "31": 2055123968.0,
+            "32": 2055123968.0,
+            "33": 2055123968.0,
+            "34": 2055123968.0,
+            "35": 2055123968.0,
+            "36": 2055123968.0,
+            "37": 2055123968.0,
+            "38": 2055123968.0,
+            "39": 2055123968.0,
+            "40": 2055123968.0,
+            "41": 2055123968.0,
+            "42": 2055123968.0,
+            "43": 2055123968.0,
+            "44": 2055123968.0,
+            "45": 2055123968.0,
+            "46": 2055123968.0,
+            "47": 2055123968.0,
+            "48": 2055123968.0,
+            "49": 2055123968.0,
+            "50": 2055123968.0
         }
     },
     "iteration-time": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 18.20596,
-            "2": 0.35903,
-            "3": 0.29783,
-            "4": 0.32647,
-            "5": 0.27756,
-            "6": 0.27374,
-            "7": 0.30378,
-            "8": 0.27695,
-            "9": 0.2803,
-            "10": 0.28715,
-            "11": 0.26455,
-            "12": 0.26231,
-            "13": 0.2664,
-            "14": 0.25756,
-            "15": 0.26997,
-            "16": 0.26004,
-            "17": 0.27036,
-            "18": 0.26235,
-            "19": 0.25926,
-            "20": 0.2633,
-            "21": 0.27365,
-            "22": 0.28244,
-            "23": 0.27106,
-            "24": 0.26252,
-            "25": 0.27913,
-            "26": 0.26128,
-            "27": 0.25745,
-            "28": 0.28971,
-            "29": 0.25557,
-            "30": 0.26227,
-            "31": 0.28393,
-            "32": 0.2742,
-            "33": 0.25918,
-            "34": 0.2839,
-            "35": 0.26183,
-            "36": 0.26351,
-            "37": 0.25935,
-            "38": 0.27055,
-            "39": 0.25969,
-            "40": 0.25776,
-            "41": 0.26414,
-            "42": 0.26164,
-            "43": 0.27671,
-            "44": 0.26781,
-            "45": 0.25691,
-            "46": 0.28709,
-            "47": 0.26291,
-            "48": 0.26119,
-            "49": 0.27305,
-            "50": 0.26323
+            "1": 17.54696,
+            "2": 0.35381,
+            "3": 0.30805,
+            "4": 0.32999,
+            "5": 0.28074,
+            "6": 0.27713,
+            "7": 0.30692,
+            "8": 0.27076,
+            "9": 0.28178,
+            "10": 0.28798,
+            "11": 0.26657,
+            "12": 0.27288,
+            "13": 0.27118,
+            "14": 0.26505,
+            "15": 0.27307,
+            "16": 0.26745,
+            "17": 0.28092,
+            "18": 0.25951,
+            "19": 0.26123,
+            "20": 0.27117,
+            "21": 0.26705,
+            "22": 0.27657,
+            "23": 0.2785,
+            "24": 0.27138,
+            "25": 0.27542,
+            "26": 0.26549,
+            "27": 0.26436,
+            "28": 0.2817,
+            "29": 0.26002,
+            "30": 0.26437,
+            "31": 0.29073,
+            "32": 0.27239,
+            "33": 0.26215,
+            "34": 0.2748,
+            "35": 0.2623,
+            "36": 0.25929,
+            "37": 0.26086,
+            "38": 0.26996,
+            "39": 0.25721,
+            "40": 0.25938,
+            "41": 0.26959,
+            "42": 0.25657,
+            "43": 0.26426,
+            "44": 0.25689,
+            "45": 0.26206,
+            "46": 0.27753,
+            "47": 0.27998,
+            "48": 0.26838,
+            "49": 0.27354,
+            "50": 0.26097
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 72d650fcb5a..00000000000
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.7999,
-            "2": 10.80046,
-            "3": 10.80906,
-            "4": 10.78256,
-            "5": 10.82566,
-            "6": 10.83616,
-            "7": 10.81688,
-            "8": 10.81159,
-            "9": 10.81058,
-            "10": 10.77421,
-            "11": 10.8555,
-            "12": 10.82696,
-            "13": 10.85081,
-            "14": 10.85457,
-            "15": 10.78256,
-            "16": 10.77334,
-            "17": 10.75077,
-            "18": 10.78391,
-            "19": 10.75873,
-            "20": 10.70038,
-            "21": 10.67229,
-            "22": 10.51412,
-            "23": 10.68126,
-            "24": 10.57156,
-            "25": 10.51795,
-            "26": 10.57588,
-            "27": 10.59132,
-            "28": 10.55287,
-            "29": 10.57112,
-            "30": 10.36497,
-            "31": 10.10959,
-            "32": 10.45338,
-            "33": 10.43695,
-            "34": 10.20008,
-            "35": 10.25443,
-            "36": 10.23362,
-            "37": 10.35422,
-            "38": 10.20437,
-            "39": 10.39909,
-            "40": 10.10235,
-            "41": 10.12745,
-            "42": 10.21091,
-            "43": 9.83755,
-            "44": 9.96198,
-            "45": 9.8428,
-            "46": 9.80664,
-            "47": 10.14256,
-            "48": 9.86637,
-            "49": 9.53809,
-            "50": 9.92581
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 4832.0,
-            "2": 4993.0,
-            "3": 5015.0,
-            "4": 5101.0,
-            "5": 5493.0,
-            "6": 5733.0,
-            "7": 5202.0,
-            "8": 5069.0,
-            "9": 5607.0,
-            "10": 4607.0,
-            "11": 5837.0,
-            "12": 5394.0,
-            "13": 5775.0,
-            "14": 5823.0,
-            "15": 5240.0,
-            "16": 5310.0,
-            "17": 5304.0,
-            "18": 5229.0,
-            "19": 5439.0,
-            "20": 4899.0,
-            "21": 5406.0,
-            "22": 4858.0,
-            "23": 5868.0,
-            "24": 5135.0,
-            "25": 4824.0,
-            "26": 5375.0,
-            "27": 5395.0,
-            "28": 5877.0,
-            "29": 5992.0,
-            "30": 5324.0,
-            "31": 4919.0,
-            "32": 5852.0,
-            "33": 6135.0,
-            "34": 5147.0,
-            "35": 5560.0,
-            "36": 5414.0,
-            "37": 6415.0,
-            "38": 5968.0,
-            "39": 6734.0,
-            "40": 5818.0,
-            "41": 5767.0,
-            "42": 6510.0,
-            "43": 5734.0,
-            "44": 5802.0,
-            "45": 5717.0,
-            "46": 5997.0,
-            "47": 6519.0,
-            "48": 6573.0,
-            "49": 6525.0,
-            "50": 6552.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1145716736.0,
-            "2": 1145714688.0,
-            "3": 1145715200.0,
-            "4": 1145713152.0,
-            "5": 1146210816.0,
-            "6": 1146210304.0,
-            "7": 1145716736.0,
-            "8": 1146209280.0,
-            "9": 1145714688.0,
-            "10": 1146214912.0,
-            "11": 1146210816.0,
-            "12": 1145713664.0,
-            "13": 1145713152.0,
-            "14": 1146210304.0,
-            "15": 1145713152.0,
-            "16": 1145714688.0,
-            "17": 1145713664.0,
-            "18": 1146212352.0,
-            "19": 1145714176.0,
-            "20": 1145715200.0,
-            "21": 1146210304.0,
-            "22": 1145715712.0,
-            "23": 1145715200.0,
-            "24": 1145713152.0,
-            "25": 1145712128.0,
-            "26": 1145715200.0,
-            "27": 1145715200.0,
-            "28": 1145713664.0,
-            "29": 1145711616.0,
-            "30": 1145714688.0,
-            "31": 1146213376.0,
-            "32": 1145713152.0,
-            "33": 1145714688.0,
-            "34": 1146210304.0,
-            "35": 1146212864.0,
-            "36": 1145713664.0,
-            "37": 1145712640.0,
-            "38": 1146207744.0,
-            "39": 1145715200.0,
-            "40": 1146210816.0,
-            "41": 1145715712.0,
-            "42": 1146207744.0,
-            "43": 1146211328.0,
-            "44": 1145716736.0,
-            "45": 1146210304.0,
-            "46": 1146214400.0,
-            "47": 1145714688.0,
-            "48": 1145717248.0,
-            "49": 1146215936.0,
-            "50": 1145716224.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1593775104.0,
-            "2": 2051463168.0,
-            "3": 2052791808.0,
-            "4": 2052791808.0,
-            "5": 2052791808.0,
-            "6": 2053601792.0,
-            "7": 2054070272.0,
-            "8": 2054225408.0,
-            "9": 2056797696.0,
-            "10": 2057079296.0,
-            "11": 2057079296.0,
-            "12": 2057079296.0,
-            "13": 2057079296.0,
-            "14": 2057079296.0,
-            "15": 2057079296.0,
-            "16": 2057079296.0,
-            "17": 2057079296.0,
-            "18": 2057079296.0,
-            "19": 2057079296.0,
-            "20": 2057079296.0,
-            "21": 2057079296.0,
-            "22": 2057079296.0,
-            "23": 2057079296.0,
-            "24": 2057079296.0,
-            "25": 2057079296.0,
-            "26": 2057079296.0,
-            "27": 2057079296.0,
-            "28": 2057079296.0,
-            "29": 2057079296.0,
-            "30": 2057079296.0,
-            "31": 2057079296.0,
-            "32": 2057079296.0,
-            "33": 2057079296.0,
-            "34": 2057079296.0,
-            "35": 2057079296.0,
-            "36": 2057079296.0,
-            "37": 2057079296.0,
-            "38": 2057079296.0,
-            "39": 2057079296.0,
-            "40": 2057079296.0,
-            "41": 2057079296.0,
-            "42": 2057079296.0,
-            "43": 2057079296.0,
-            "44": 2057079296.0,
-            "45": 2057079296.0,
-            "46": 2057079296.0,
-            "47": 2057079296.0,
-            "48": 2057079296.0,
-            "49": 2057079296.0,
-            "50": 2057079296.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 19.78346,
-            "2": 0.3309,
-            "3": 0.26692,
-            "4": 0.30511,
-            "5": 0.25944,
-            "6": 0.25055,
-            "7": 0.26908,
-            "8": 0.24453,
-            "9": 0.23731,
-            "10": 0.24901,
-            "11": 0.23286,
-            "12": 0.22911,
-            "13": 0.2292,
-            "14": 0.23339,
-            "15": 0.24721,
-            "16": 0.24166,
-            "17": 0.22756,
-            "18": 0.2223,
-            "19": 0.22427,
-            "20": 0.23111,
-            "21": 0.23175,
-            "22": 0.2573,
-            "23": 0.24989,
-            "24": 0.23707,
-            "25": 0.23317,
-            "26": 0.23062,
-            "27": 0.22667,
-            "28": 0.24009,
-            "29": 0.22295,
-            "30": 0.22987,
-            "31": 0.25103,
-            "32": 0.24353,
-            "33": 0.22584,
-            "34": 0.23541,
-            "35": 0.23768,
-            "36": 0.22699,
-            "37": 0.22446,
-            "38": 0.24288,
-            "39": 0.22484,
-            "40": 0.2277,
-            "41": 0.23059,
-            "42": 0.22349,
-            "43": 0.23202,
-            "44": 0.23787,
-            "45": 0.24589,
-            "46": 0.27096,
-            "47": 0.23921,
-            "48": 0.24334,
-            "49": 0.24986,
-            "50": 0.24759
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index e4e01388a15..00000000000
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.7999,
-            "2": 10.80046,
-            "3": 10.8089,
-            "4": 10.78245,
-            "5": 10.82504,
-            "6": 10.83657,
-            "7": 10.81628,
-            "8": 10.81184,
-            "9": 10.8108,
-            "10": 10.7742,
-            "11": 10.85482,
-            "12": 10.82663,
-            "13": 10.85131,
-            "14": 10.85461,
-            "15": 10.78253,
-            "16": 10.77375,
-            "17": 10.74989,
-            "18": 10.78346,
-            "19": 10.75877,
-            "20": 10.69982,
-            "21": 10.67287,
-            "22": 10.5142,
-            "23": 10.68053,
-            "24": 10.57164,
-            "25": 10.51814,
-            "26": 10.57591,
-            "27": 10.59136,
-            "28": 10.55398,
-            "29": 10.57104,
-            "30": 10.36425,
-            "31": 10.10945,
-            "32": 10.45329,
-            "33": 10.43693,
-            "34": 10.20011,
-            "35": 10.25443,
-            "36": 10.23318,
-            "37": 10.3536,
-            "38": 10.20421,
-            "39": 10.3993,
-            "40": 10.10241,
-            "41": 10.12765,
-            "42": 10.21115,
-            "43": 9.83746,
-            "44": 9.96186,
-            "45": 9.84266,
-            "46": 9.80686,
-            "47": 10.14266,
-            "48": 9.86672,
-            "49": 9.53822,
-            "50": 9.92595
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 4752.0,
-            "2": 5040.0,
-            "3": 5112.0,
-            "4": 5072.0,
-            "5": 5472.0,
-            "6": 5619.0,
-            "7": 5255.0,
-            "8": 5065.0,
-            "9": 5483.0,
-            "10": 4607.0,
-            "11": 5862.0,
-            "12": 5377.0,
-            "13": 5783.0,
-            "14": 5830.0,
-            "15": 5249.0,
-            "16": 5346.0,
-            "17": 5291.0,
-            "18": 5277.0,
-            "19": 5352.0,
-            "20": 4942.0,
-            "21": 5465.0,
-            "22": 4878.0,
-            "23": 5807.0,
-            "24": 5145.0,
-            "25": 4873.0,
-            "26": 5380.0,
-            "27": 5479.0,
-            "28": 5739.0,
-            "29": 5950.0,
-            "30": 5363.0,
-            "31": 4730.0,
-            "32": 5732.0,
-            "33": 5963.0,
-            "34": 5261.0,
-            "35": 5660.0,
-            "36": 5422.0,
-            "37": 6362.0,
-            "38": 6114.0,
-            "39": 6803.0,
-            "40": 5731.0,
-            "41": 5808.0,
-            "42": 6485.0,
-            "43": 5742.0,
-            "44": 5843.0,
-            "45": 5876.0,
-            "46": 6024.0,
-            "47": 6554.0,
-            "48": 6354.0,
-            "49": 6497.0,
-            "50": 6526.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1144115200.0,
-            "2": 1144113152.0,
-            "3": 1144113664.0,
-            "4": 1144112640.0,
-            "5": 1144113664.0,
-            "6": 1144113152.0,
-            "7": 1144115200.0,
-            "8": 1144112640.0,
-            "9": 1144113152.0,
-            "10": 1144118272.0,
-            "11": 1144112640.0,
-            "12": 1144112128.0,
-            "13": 1144110592.0,
-            "14": 1144112640.0,
-            "15": 1144111616.0,
-            "16": 1144112640.0,
-            "17": 1144112128.0,
-            "18": 1144113152.0,
-            "19": 1144112640.0,
-            "20": 1144113664.0,
-            "21": 1144113152.0,
-            "22": 1144114176.0,
-            "23": 1144113664.0,
-            "24": 1144111616.0,
-            "25": 1144110592.0,
-            "26": 1144113664.0,
-            "27": 1144113664.0,
-            "28": 1144112128.0,
-            "29": 1144110080.0,
-            "30": 1144113152.0,
-            "31": 1144116224.0,
-            "32": 1144112128.0,
-            "33": 1144113152.0,
-            "34": 1144113664.0,
-            "35": 1144115712.0,
-            "36": 1144111616.0,
-            "37": 1144111104.0,
-            "38": 1144110592.0,
-            "39": 1144113664.0,
-            "40": 1144113664.0,
-            "41": 1144114176.0,
-            "42": 1144109056.0,
-            "43": 1144114176.0,
-            "44": 1144115200.0,
-            "45": 1144113152.0,
-            "46": 1144117760.0,
-            "47": 1144113152.0,
-            "48": 1144115712.0,
-            "49": 1144117760.0,
-            "50": 1144114176.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1593775104.0,
-            "2": 2049587200.0,
-            "3": 2050487808.0,
-            "4": 2050487808.0,
-            "5": 2050487808.0,
-            "6": 2051877376.0,
-            "7": 2052037632.0,
-            "8": 2052037632.0,
-            "9": 2053219840.0,
-            "10": 2055123968.0,
-            "11": 2055123968.0,
-            "12": 2055123968.0,
-            "13": 2055123968.0,
-            "14": 2055123968.0,
-            "15": 2055123968.0,
-            "16": 2055123968.0,
-            "17": 2055123968.0,
-            "18": 2055123968.0,
-            "19": 2055123968.0,
-            "20": 2055123968.0,
-            "21": 2055123968.0,
-            "22": 2055123968.0,
-            "23": 2055123968.0,
-            "24": 2055123968.0,
-            "25": 2055123968.0,
-            "26": 2055123968.0,
-            "27": 2055123968.0,
-            "28": 2055123968.0,
-            "29": 2055123968.0,
-            "30": 2055123968.0,
-            "31": 2055123968.0,
-            "32": 2055123968.0,
-            "33": 2055123968.0,
-            "34": 2055123968.0,
-            "35": 2055123968.0,
-            "36": 2055123968.0,
-            "37": 2055123968.0,
-            "38": 2055123968.0,
-            "39": 2055123968.0,
-            "40": 2055123968.0,
-            "41": 2055123968.0,
-            "42": 2055123968.0,
-            "43": 2055123968.0,
-            "44": 2055123968.0,
-            "45": 2055123968.0,
-            "46": 2055123968.0,
-            "47": 2055123968.0,
-            "48": 2055123968.0,
-            "49": 2055123968.0,
-            "50": 2055123968.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 17.54696,
-            "2": 0.35381,
-            "3": 0.30805,
-            "4": 0.32999,
-            "5": 0.28074,
-            "6": 0.27713,
-            "7": 0.30692,
-            "8": 0.27076,
-            "9": 0.28178,
-            "10": 0.28798,
-            "11": 0.26657,
-            "12": 0.27288,
-            "13": 0.27118,
-            "14": 0.26505,
-            "15": 0.27307,
-            "16": 0.26745,
-            "17": 0.28092,
-            "18": 0.25951,
-            "19": 0.26123,
-            "20": 0.27117,
-            "21": 0.26705,
-            "22": 0.27657,
-            "23": 0.2785,
-            "24": 0.27138,
-            "25": 0.27542,
-            "26": 0.26549,
-            "27": 0.26436,
-            "28": 0.2817,
-            "29": 0.26002,
-            "30": 0.26437,
-            "31": 0.29073,
-            "32": 0.27239,
-            "33": 0.26215,
-            "34": 0.2748,
-            "35": 0.2623,
-            "36": 0.25929,
-            "37": 0.26086,
-            "38": 0.26996,
-            "39": 0.25721,
-            "40": 0.25938,
-            "41": 0.26959,
-            "42": 0.25657,
-            "43": 0.26426,
-            "44": 0.25689,
-            "45": 0.26206,
-            "46": 0.27753,
-            "47": 0.27998,
-            "48": 0.26838,
-            "49": 0.27354,
-            "50": 0.26097
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgx_h100.json
index d74ca1632d3..d342471ff77 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgx_h100.json
@@ -6,54 +6,54 @@
         "values": {
             "1": 10.7999,
             "2": 10.80046,
-            "3": 10.8086,
-            "4": 10.78211,
-            "5": 10.8253,
-            "6": 10.83613,
-            "7": 10.81656,
-            "8": 10.81172,
-            "9": 10.81127,
-            "10": 10.77365,
-            "11": 10.8551,
-            "12": 10.82716,
-            "13": 10.85093,
-            "14": 10.85516,
-            "15": 10.78294,
-            "16": 10.7735,
-            "17": 10.75018,
-            "18": 10.78378,
-            "19": 10.75892,
-            "20": 10.6994,
-            "21": 10.67278,
-            "22": 10.51458,
-            "23": 10.68081,
-            "24": 10.57159,
-            "25": 10.51778,
-            "26": 10.57633,
-            "27": 10.59163,
-            "28": 10.55359,
-            "29": 10.57084,
-            "30": 10.3646,
-            "31": 10.1091,
-            "32": 10.45327,
-            "33": 10.43719,
-            "34": 10.20028,
-            "35": 10.25449,
-            "36": 10.23294,
-            "37": 10.35395,
-            "38": 10.20435,
-            "39": 10.3991,
-            "40": 10.10257,
-            "41": 10.12803,
-            "42": 10.21095,
-            "43": 9.83714,
-            "44": 9.96175,
-            "45": 9.84268,
-            "46": 9.80685,
-            "47": 10.14284,
-            "48": 9.86671,
-            "49": 9.53845,
-            "50": 9.92551
+            "3": 10.80877,
+            "4": 10.78226,
+            "5": 10.8254,
+            "6": 10.83596,
+            "7": 10.81676,
+            "8": 10.81163,
+            "9": 10.81106,
+            "10": 10.77366,
+            "11": 10.85495,
+            "12": 10.82711,
+            "13": 10.85109,
+            "14": 10.8546,
+            "15": 10.78267,
+            "16": 10.77358,
+            "17": 10.75036,
+            "18": 10.78319,
+            "19": 10.75876,
+            "20": 10.6992,
+            "21": 10.67244,
+            "22": 10.51382,
+            "23": 10.68112,
+            "24": 10.57174,
+            "25": 10.51756,
+            "26": 10.57624,
+            "27": 10.59185,
+            "28": 10.55401,
+            "29": 10.57113,
+            "30": 10.36465,
+            "31": 10.10866,
+            "32": 10.45338,
+            "33": 10.43764,
+            "34": 10.20033,
+            "35": 10.25433,
+            "36": 10.23362,
+            "37": 10.35369,
+            "38": 10.20443,
+            "39": 10.39917,
+            "40": 10.10245,
+            "41": 10.12765,
+            "42": 10.21106,
+            "43": 9.83722,
+            "44": 9.962,
+            "45": 9.84252,
+            "46": 9.80612,
+            "47": 10.14257,
+            "48": 9.86665,
+            "49": 9.5383,
+            "50": 9.92576
         }
     },
     "num-zeros": {
@@ -61,56 +61,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 4814.0,
-            "2": 4952.0,
-            "3": 5040.0,
-            "4": 5015.0,
-            "5": 5519.0,
-            "6": 5551.0,
-            "7": 5268.0,
-            "8": 4810.0,
-            "9": 5397.0,
-            "10": 4501.0,
-            "11": 5891.0,
-            "12": 5339.0,
-            "13": 5837.0,
-            "14": 5809.0,
-            "15": 5355.0,
-            "16": 5453.0,
-            "17": 5423.0,
-            "18": 5110.0,
-            "19": 5401.0,
-            "20": 4905.0,
-            "21": 5349.0,
-            "22": 4914.0,
-            "23": 5700.0,
-            "24": 5043.0,
-            "25": 4863.0,
-            "26": 5343.0,
-            "27": 5411.0,
-            "28": 5792.0,
-            "29": 6026.0,
-            "30": 5282.0,
-            "31": 4823.0,
-            "32": 5676.0,
-            "33": 6043.0,
-            "34": 5245.0,
-            "35": 5629.0,
-            "36": 5372.0,
-            "37": 6399.0,
-            "38": 5915.0,
-            "39": 6572.0,
-            "40": 5759.0,
-            "41": 5969.0,
-            "42": 6425.0,
-            "43": 5757.0,
-            "44": 5808.0,
-            "45": 5780.0,
-            "46": 6040.0,
-            "47": 6533.0,
-            "48": 6375.0,
-            "49": 6343.0,
-            "50": 6648.0
+            "1": 4827.0,
+            "2": 4935.0,
+            "3": 5030.0,
+            "4": 4956.0,
+            "5": 5583.0,
+            "6": 5594.0,
+            "7": 5325.0,
+            "8": 5098.0,
+            "9": 5335.0,
+            "10": 4581.0,
+            "11": 5895.0,
+            "12": 5249.0,
+            "13": 5692.0,
+            "14": 5736.0,
+            "15": 5303.0,
+            "16": 5347.0,
+            "17": 5361.0,
+            "18": 5322.0,
+            "19": 5407.0,
+            "20": 4961.0,
+            "21": 5441.0,
+            "22": 4776.0,
+            "23": 5752.0,
+            "24": 5157.0,
+            "25": 4897.0,
+            "26": 5202.0,
+            "27": 5455.0,
+            "28": 5769.0,
+            "29": 5911.0,
+            "30": 5256.0,
+            "31": 4674.0,
+            "32": 5854.0,
+            "33": 6080.0,
+            "34": 5278.0,
+            "35": 5743.0,
+            "36": 5523.0,
+            "37": 6477.0,
+            "38": 5839.0,
+            "39": 6711.0,
+            "40": 5852.0,
+            "41": 6062.0,
+            "42": 6501.0,
+            "43": 5605.0,
+            "44": 5883.0,
+            "45": 5763.0,
+            "46": 6076.0,
+            "47": 6613.0,
+            "48": 6348.0,
+            "49": 6430.0,
+            "50": 6699.0
         }
     },
     "mem-allocated-bytes": {
@@ -120,53 +120,53 @@
         "values": {
             "1": 1145716736.0,
             "2": 1145714688.0,
-            "3": 1146211840.0,
-            "4": 1145713152.0,
+            "3": 1145715200.0,
+            "4": 1145714176.0,
             "5": 1146210816.0,
-            "6": 1145713664.0,
-            "7": 1145717248.0,
-            "8": 1145713664.0,
+            "6": 1146210304.0,
+            "7": 1145716736.0,
+            "8": 1146209792.0,
             "9": 1145714688.0,
             "10": 1146214912.0,
             "11": 1145714176.0,
-            "12": 1145714176.0,
-            "13": 1146208768.0,
+            "12": 1145713664.0,
+            "13": 1145712128.0,
             "14": 1146209280.0,
             "15": 1145713152.0,
             "16": 1146210304.0,
             "17": 1145713664.0,
-            "18": 1146209280.0,
+            "18": 1146210304.0,
             "19": 1145714176.0,
             "20": 1145715200.0,
             "21": 1146210304.0,
             "22": 1145715712.0,
-            "23": 1145715200.0,
+            "23": 1145716224.0,
             "24": 1145713152.0,
             "25": 1145712128.0,
             "26": 1145715200.0,
-            "27": 1145715200.0,
+            "27": 1146210304.0,
             "28": 1145713664.0,
-            "29": 1145711616.0,
+            "29": 1145711104.0,
             "30": 1145714688.0,
-            "31": 1145717760.0,
-            "32": 1145713664.0,
+            "31": 1146213376.0,
+            "32": 1145713152.0,
             "33": 1145714688.0,
-            "34": 1145715200.0,
-            "35": 1146212352.0,
-            "36": 1145713152.0,
+            "34": 1145714688.0,
+            "35": 1146213376.0,
+            "36": 1145713664.0,
             "37": 1145712128.0,
-            "38": 1146208256.0,
+            "38": 1146207744.0,
             "39": 1145715200.0,
             "40": 1146210816.0,
-            "41": 1145715712.0,
-            "42": 1145712640.0,
+            "41": 1145714688.0,
+            "42": 1145711104.0,
             "43": 1146211840.0,
-            "44": 1145716736.0,
-            "45": 1146209280.0,
+            "44": 1145717248.0,
+            "45": 1145714688.0,
             "46": 1146214400.0,
             "47": 1145714688.0,
-            "48": 1145717760.0,
-            "49": 1146215424.0,
+            "48": 1145717248.0,
+            "49": 1146214912.0,
             "50": 1145716224.0
         }
     },
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 1593775104.0,
+            "1": 1593766912.0,
             "2": 2051463168.0,
-            "3": 2052884992.0,
-            "4": 2052884992.0,
-            "5": 2052884992.0,
-            "6": 2053490176.0,
-            "7": 2054021632.0,
-            "8": 2054517248.0,
-            "9": 2057131520.0,
-            "10": 2057131520.0,
-            "11": 2057131520.0,
-            "12": 2057131520.0,
-            "13": 2057131520.0,
-            "14": 2057131520.0,
-            "15": 2057131520.0,
-            "16": 2057131520.0,
-            "17": 2057131520.0,
-            "18": 2057131520.0,
-            "19": 2057131520.0,
-            "20": 2057131520.0,
-            "21": 2057131520.0,
-            "22": 2057131520.0,
-            "23": 2057131520.0,
-            "24": 2057131520.0,
-            "25": 2057131520.0,
-            "26": 2057131520.0,
-            "27": 2057131520.0,
-            "28": 2057131520.0,
-            "29": 2057131520.0,
-            "30": 2057131520.0,
-            "31": 2057131520.0,
-            "32": 2057131520.0,
-            "33": 2057131520.0,
-            "34": 2057131520.0,
-            "35": 2057131520.0,
-            "36": 2057131520.0,
-            "37": 2057131520.0,
-            "38": 2057131520.0,
-            "39": 2057131520.0,
-            "40": 2057131520.0,
-            "41": 2057131520.0,
-            "42": 2057131520.0,
-            "43": 2057131520.0,
-            "44": 2057131520.0,
-            "45": 2057131520.0,
-            "46": 2057131520.0,
-            "47": 2057131520.0,
-            "48": 2057131520.0,
-            "49": 2057131520.0,
-            "50": 2057131520.0
+            "3": 2052584960.0,
+            "4": 2052584960.0,
+            "5": 2052584960.0,
+            "6": 2053404160.0,
+            "7": 2054199296.0,
+            "8": 2054199296.0,
+            "9": 2056971776.0,
+            "10": 2057138688.0,
+            "11": 2057138688.0,
+            "12": 2057138688.0,
+            "13": 2057138688.0,
+            "14": 2057138688.0,
+            "15": 2057138688.0,
+            "16": 2057138688.0,
+            "17": 2057138688.0,
+            "18": 2057138688.0,
+            "19": 2057138688.0,
+            "20": 2057138688.0,
+            "21": 2057138688.0,
+            "22": 2057138688.0,
+            "23": 2057138688.0,
+            "24": 2057138688.0,
+            "25": 2057138688.0,
+            "26": 2057138688.0,
+            "27": 2057138688.0,
+            "28": 2057138688.0,
+            "29": 2057138688.0,
+            "30": 2057138688.0,
+            "31": 2057138688.0,
+            "32": 2057138688.0,
+            "33": 2057138688.0,
+            "34": 2057138688.0,
+            "35": 2057138688.0,
+            "36": 2057138688.0,
+            "37": 2057138688.0,
+            "38": 2057138688.0,
+            "39": 2057138688.0,
+            "40": 2057138688.0,
+            "41": 2057138688.0,
+            "42": 2057138688.0,
+            "43": 2057138688.0,
+            "44": 2057138688.0,
+            "45": 2057138688.0,
+            "46": 2057138688.0,
+            "47": 2057138688.0,
+            "48": 2057138688.0,
+            "49": 2057138688.0,
+            "50": 2057138688.0
         }
     },
     "iteration-time": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 17.92077,
-            "2": 0.34824,
-            "3": 0.30032,
-            "4": 0.32972,
-            "5": 0.27324,
-            "6": 0.26945,
-            "7": 0.29877,
-            "8": 0.27354,
-            "9": 0.26617,
-            "10": 0.28282,
-            "11": 0.26525,
-            "12": 0.2586,
-            "13": 0.27078,
-            "14": 0.25807,
-            "15": 0.27244,
-            "16": 0.26017,
-            "17": 0.27564,
-            "18": 0.26003,
-            "19": 0.25894,
-            "20": 0.26689,
-            "21": 0.26403,
-            "22": 0.26923,
-            "23": 0.27423,
-            "24": 0.25699,
-            "25": 0.26351,
-            "26": 0.26238,
-            "27": 0.26331,
-            "28": 0.27004,
-            "29": 0.2532,
-            "30": 0.2563,
-            "31": 0.27893,
-            "32": 0.27696,
-            "33": 0.25765,
-            "34": 0.27112,
-            "35": 0.26525,
-            "36": 0.25555,
-            "37": 0.25575,
-            "38": 0.26372,
-            "39": 0.25643,
-            "40": 0.25561,
-            "41": 0.26327,
-            "42": 0.25857,
-            "43": 0.26139,
-            "44": 0.26205,
-            "45": 0.25417,
-            "46": 0.28594,
-            "47": 0.27128,
-            "48": 0.2658,
-            "49": 0.27152,
-            "50": 0.26917
+            "1": 17.99317,
+            "2": 0.35408,
+            "3": 0.30455,
+            "4": 0.32631,
+            "5": 0.27174,
+            "6": 0.27168,
+            "7": 0.29847,
+            "8": 0.27152,
+            "9": 0.27606,
+            "10": 0.27991,
+            "11": 0.25875,
+            "12": 0.25854,
+            "13": 0.26351,
+            "14": 0.2599,
+            "15": 0.26827,
+            "16": 0.25734,
+            "17": 0.26876,
+            "18": 0.26302,
+            "19": 0.25791,
+            "20": 0.26587,
+            "21": 0.26207,
+            "22": 0.2718,
+            "23": 0.27036,
+            "24": 0.2557,
+            "25": 0.27098,
+            "26": 0.2562,
+            "27": 0.25663,
+            "28": 0.28209,
+            "29": 0.25678,
+            "30": 0.26198,
+            "31": 0.27896,
+            "32": 0.26879,
+            "33": 0.25449,
+            "34": 0.27377,
+            "35": 0.25725,
+            "36": 0.25349,
+            "37": 0.2537,
+            "38": 0.26246,
+            "39": 0.25527,
+            "40": 0.25676,
+            "41": 0.26427,
+            "42": 0.25718,
+            "43": 0.26206,
+            "44": 0.25615,
+            "45": 0.261,
+            "46": 0.28413,
+            "47": 0.27633,
+            "48": 0.26455,
+            "49": 0.2706,
+            "50": 0.25944
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 0c2d8bc15ac..00000000000
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.7999,
-            "2": 10.80046,
-            "3": 10.80882,
-            "4": 10.78271,
-            "5": 10.82527,
-            "6": 10.83559,
-            "7": 10.81654,
-            "8": 10.81189,
-            "9": 10.81027,
-            "10": 10.77395,
-            "11": 10.85546,
-            "12": 10.82687,
-            "13": 10.85063,
-            "14": 10.85519,
-            "15": 10.78219,
-            "16": 10.77344,
-            "17": 10.75025,
-            "18": 10.78337,
-            "19": 10.75865,
-            "20": 10.69949,
-            "21": 10.67201,
-            "22": 10.51454,
-            "23": 10.68053,
-            "24": 10.57151,
-            "25": 10.51842,
-            "26": 10.57602,
-            "27": 10.59131,
-            "28": 10.55338,
-            "29": 10.5705,
-            "30": 10.36499,
-            "31": 10.10913,
-            "32": 10.45347,
-            "33": 10.43732,
-            "34": 10.20004,
-            "35": 10.2548,
-            "36": 10.23345,
-            "37": 10.35402,
-            "38": 10.2041,
-            "39": 10.39978,
-            "40": 10.10252,
-            "41": 10.12783,
-            "42": 10.21103,
-            "43": 9.83757,
-            "44": 9.96217,
-            "45": 9.84252,
-            "46": 9.80674,
-            "47": 10.14274,
-            "48": 9.86654,
-            "49": 9.53815,
-            "50": 9.92567
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 4865.0,
-            "2": 4889.0,
-            "3": 5053.0,
-            "4": 5128.0,
-            "5": 5538.0,
-            "6": 5637.0,
-            "7": 5195.0,
-            "8": 4942.0,
-            "9": 5569.0,
-            "10": 4503.0,
-            "11": 6001.0,
-            "12": 5343.0,
-            "13": 5607.0,
-            "14": 5820.0,
-            "15": 5246.0,
-            "16": 5419.0,
-            "17": 5489.0,
-            "18": 5301.0,
-            "19": 5323.0,
-            "20": 4805.0,
-            "21": 5272.0,
-            "22": 4832.0,
-            "23": 5649.0,
-            "24": 5122.0,
-            "25": 4835.0,
-            "26": 5369.0,
-            "27": 5430.0,
-            "28": 5771.0,
-            "29": 6155.0,
-            "30": 5193.0,
-            "31": 4946.0,
-            "32": 5822.0,
-            "33": 6136.0,
-            "34": 5157.0,
-            "35": 5508.0,
-            "36": 5439.0,
-            "37": 6566.0,
-            "38": 6146.0,
-            "39": 6504.0,
-            "40": 5752.0,
-            "41": 5973.0,
-            "42": 6371.0,
-            "43": 5634.0,
-            "44": 5975.0,
-            "45": 5779.0,
-            "46": 5939.0,
-            "47": 6534.0,
-            "48": 6362.0,
-            "49": 6390.0,
-            "50": 6421.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1144115200.0,
-            "2": 1144113152.0,
-            "3": 1144113664.0,
-            "4": 1144112640.0,
-            "5": 1144113664.0,
-            "6": 1144113664.0,
-            "7": 1144115200.0,
-            "8": 1144112128.0,
-            "9": 1144113152.0,
-            "10": 1144117248.0,
-            "11": 1144112640.0,
-            "12": 1144112640.0,
-            "13": 1144110592.0,
-            "14": 1144113664.0,
-            "15": 1144111616.0,
-            "16": 1144113152.0,
-            "17": 1144112128.0,
-            "18": 1144114176.0,
-            "19": 1144112640.0,
-            "20": 1144113664.0,
-            "21": 1144113152.0,
-            "22": 1144113664.0,
-            "23": 1144114176.0,
-            "24": 1144111616.0,
-            "25": 1144110592.0,
-            "26": 1144114688.0,
-            "27": 1144113664.0,
-            "28": 1144112128.0,
-            "29": 1144109568.0,
-            "30": 1144113152.0,
-            "31": 1144116224.0,
-            "32": 1144112128.0,
-            "33": 1144113152.0,
-            "34": 1144113664.0,
-            "35": 1144115712.0,
-            "36": 1144112128.0,
-            "37": 1144110592.0,
-            "38": 1144110592.0,
-            "39": 1144113664.0,
-            "40": 1144113664.0,
-            "41": 1144114176.0,
-            "42": 1144111104.0,
-            "43": 1144114176.0,
-            "44": 1144116224.0,
-            "45": 1144112640.0,
-            "46": 1144116736.0,
-            "47": 1144113152.0,
-            "48": 1144116224.0,
-            "49": 1144117760.0,
-            "50": 1144114688.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1593766912.0,
-            "2": 2049587200.0,
-            "3": 2050706944.0,
-            "4": 2050706944.0,
-            "5": 2050706944.0,
-            "6": 2051856896.0,
-            "7": 2052133888.0,
-            "8": 2052133888.0,
-            "9": 2053136896.0,
-            "10": 2054898688.0,
-            "11": 2054898688.0,
-            "12": 2054898688.0,
-            "13": 2054898688.0,
-            "14": 2054898688.0,
-            "15": 2054898688.0,
-            "16": 2054898688.0,
-            "17": 2054898688.0,
-            "18": 2054898688.0,
-            "19": 2054898688.0,
-            "20": 2054898688.0,
-            "21": 2054898688.0,
-            "22": 2054898688.0,
-            "23": 2054898688.0,
-            "24": 2054898688.0,
-            "25": 2054898688.0,
-            "26": 2054898688.0,
-            "27": 2054898688.0,
-            "28": 2054898688.0,
-            "29": 2054898688.0,
-            "30": 2054898688.0,
-            "31": 2054898688.0,
-            "32": 2054898688.0,
-            "33": 2054898688.0,
-            "34": 2054898688.0,
-            "35": 2054898688.0,
-            "36": 2054898688.0,
-            "37": 2054898688.0,
-            "38": 2054898688.0,
-            "39": 2054898688.0,
-            "40": 2054898688.0,
-            "41": 2054898688.0,
-            "42": 2054898688.0,
-            "43": 2054898688.0,
-            "44": 2054898688.0,
-            "45": 2054898688.0,
-            "46": 2054898688.0,
-            "47": 2054898688.0,
-            "48": 2054898688.0,
-            "49": 2054898688.0,
-            "50": 2054898688.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 19.95177,
-            "2": 0.34433,
-            "3": 0.26792,
-            "4": 0.28931,
-            "5": 0.24286,
-            "6": 0.23522,
-            "7": 0.26191,
-            "8": 0.24179,
-            "9": 0.23443,
-            "10": 0.2479,
-            "11": 0.22843,
-            "12": 0.23568,
-            "13": 0.22851,
-            "14": 0.22301,
-            "15": 0.23496,
-            "16": 0.22557,
-            "17": 0.23185,
-            "18": 0.22478,
-            "19": 0.21988,
-            "20": 0.22721,
-            "21": 0.22747,
-            "22": 0.25032,
-            "23": 0.23584,
-            "24": 0.22392,
-            "25": 0.24076,
-            "26": 0.22602,
-            "27": 0.21942,
-            "28": 0.25471,
-            "29": 0.22059,
-            "30": 0.22483,
-            "31": 0.24893,
-            "32": 0.23382,
-            "33": 0.2228,
-            "34": 0.24334,
-            "35": 0.22325,
-            "36": 0.22492,
-            "37": 0.22009,
-            "38": 0.22761,
-            "39": 0.22117,
-            "40": 0.22618,
-            "41": 0.23324,
-            "42": 0.23137,
-            "43": 0.23,
-            "44": 0.23628,
-            "45": 0.22927,
-            "46": 0.24977,
-            "47": 0.23757,
-            "48": 0.24069,
-            "49": 0.254,
-            "50": 0.23443
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index d342471ff77..00000000000
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.7999,
-            "2": 10.80046,
-            "3": 10.80877,
-            "4": 10.78226,
-            "5": 10.8254,
-            "6": 10.83596,
-            "7": 10.81676,
-            "8": 10.81163,
-            "9": 10.81106,
-            "10": 10.77366,
-            "11": 10.85495,
-            "12": 10.82711,
-            "13": 10.85109,
-            "14": 10.8546,
-            "15": 10.78267,
-            "16": 10.77358,
-            "17": 10.75036,
-            "18": 10.78319,
-            "19": 10.75876,
-            "20": 10.6992,
-            "21": 10.67244,
-            "22": 10.51382,
-            "23": 10.68112,
-            "24": 10.57174,
-            "25": 10.51756,
-            "26": 10.57624,
-            "27": 10.59185,
-            "28": 10.55401,
-            "29": 10.57113,
-            "30": 10.36465,
-            "31": 10.10866,
-            "32": 10.45338,
-            "33": 10.43764,
-            "34": 10.20033,
-            "35": 10.25433,
-            "36": 10.23362,
-            "37": 10.35369,
-            "38": 10.20443,
-            "39": 10.39917,
-            "40": 10.10245,
-            "41": 10.12765,
-            "42": 10.21106,
-            "43": 9.83722,
-            "44": 9.962,
-            "45": 9.84252,
-            "46": 9.80612,
-            "47": 10.14257,
-            "48": 9.86665,
-            "49": 9.5383,
-            "50": 9.92576
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 4827.0,
-            "2": 4935.0,
-            "3": 5030.0,
-            "4": 4956.0,
-            "5": 5583.0,
-            "6": 5594.0,
-            "7": 5325.0,
-            "8": 5098.0,
-            "9": 5335.0,
-            "10": 4581.0,
-            "11": 5895.0,
-            "12": 5249.0,
-            "13": 5692.0,
-            "14": 5736.0,
-            "15": 5303.0,
-            "16": 5347.0,
-            "17": 5361.0,
-            "18": 5322.0,
-            "19": 5407.0,
-            "20": 4961.0,
-            "21": 5441.0,
-            "22": 4776.0,
-            "23": 5752.0,
-            "24": 5157.0,
-            "25": 4897.0,
-            "26": 5202.0,
-            "27": 5455.0,
-            "28": 5769.0,
-            "29": 5911.0,
-            "30": 5256.0,
-            "31": 4674.0,
-            "32": 5854.0,
-            "33": 6080.0,
-            "34": 5278.0,
-            "35": 5743.0,
-            "36": 5523.0,
-            "37": 6477.0,
-            "38": 5839.0,
-            "39": 6711.0,
-            "40": 5852.0,
-            "41": 6062.0,
-            "42": 6501.0,
-            "43": 5605.0,
-            "44": 5883.0,
-            "45": 5763.0,
-            "46": 6076.0,
-            "47": 6613.0,
-            "48": 6348.0,
-            "49": 6430.0,
-            "50": 6699.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1145716736.0,
-            "2": 1145714688.0,
-            "3": 1145715200.0,
-            "4": 1145714176.0,
-            "5": 1146210816.0,
-            "6": 1146210304.0,
-            "7": 1145716736.0,
-            "8": 1146209792.0,
-            "9": 1145714688.0,
-            "10": 1146214912.0,
-            "11": 1145714176.0,
-            "12": 1145713664.0,
-            "13": 1145712128.0,
-            "14": 1146209280.0,
-            "15": 1145713152.0,
-            "16": 1146210304.0,
-            "17": 1145713664.0,
-            "18": 1146210304.0,
-            "19": 1145714176.0,
-            "20": 1145715200.0,
-            "21": 1146210304.0,
-            "22": 1145715712.0,
-            "23": 1145716224.0,
-            "24": 1145713152.0,
-            "25": 1145712128.0,
-            "26": 1145715200.0,
-            "27": 1146210304.0,
-            "28": 1145713664.0,
-            "29": 1145711104.0,
-            "30": 1145714688.0,
-            "31": 1146213376.0,
-            "32": 1145713152.0,
-            "33": 1145714688.0,
-            "34": 1145714688.0,
-            "35": 1146213376.0,
-            "36": 1145713664.0,
-            "37": 1145712128.0,
-            "38": 1146207744.0,
-            "39": 1145715200.0,
-            "40": 1146210816.0,
-            "41": 1145714688.0,
-            "42": 1145711104.0,
-            "43": 1146211840.0,
-            "44": 1145717248.0,
-            "45": 1145714688.0,
-            "46": 1146214400.0,
-            "47": 1145714688.0,
-            "48": 1145717248.0,
-            "49": 1146214912.0,
-            "50": 1145716224.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1593766912.0,
-            "2": 2051463168.0,
-            "3": 2052584960.0,
-            "4": 2052584960.0,
-            "5": 2052584960.0,
-            "6": 2053404160.0,
-            "7": 2054199296.0,
-            "8": 2054199296.0,
-            "9": 2056971776.0,
-            "10": 2057138688.0,
-            "11": 2057138688.0,
-            "12": 2057138688.0,
-            "13": 2057138688.0,
-            "14": 2057138688.0,
-            "15": 2057138688.0,
-            "16": 2057138688.0,
-            "17": 2057138688.0,
-            "18": 2057138688.0,
-            "19": 2057138688.0,
-            "20": 2057138688.0,
-            "21": 2057138688.0,
-            "22": 2057138688.0,
-            "23": 2057138688.0,
-            "24": 2057138688.0,
-            "25": 2057138688.0,
-            "26": 2057138688.0,
-            "27": 2057138688.0,
-            "28": 2057138688.0,
-            "29": 2057138688.0,
-            "30": 2057138688.0,
-            "31": 2057138688.0,
-            "32": 2057138688.0,
-            "33": 2057138688.0,
-            "34": 2057138688.0,
-            "35": 2057138688.0,
-            "36": 2057138688.0,
-            "37": 2057138688.0,
-            "38": 2057138688.0,
-            "39": 2057138688.0,
-            "40": 2057138688.0,
-            "41": 2057138688.0,
-            "42": 2057138688.0,
-            "43": 2057138688.0,
-            "44": 2057138688.0,
-            "45": 2057138688.0,
-            "46": 2057138688.0,
-            "47": 2057138688.0,
-            "48": 2057138688.0,
-            "49": 2057138688.0,
-            "50": 2057138688.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 17.99317,
-            "2": 0.35408,
-            "3": 0.30455,
-            "4": 0.32631,
-            "5": 0.27174,
-            "6": 0.27168,
-            "7": 0.29847,
-            "8": 0.27152,
-            "9": 0.27606,
-            "10": 0.27991,
-            "11": 0.25875,
-            "12": 0.25854,
-            "13": 0.26351,
-            "14": 0.2599,
-            "15": 0.26827,
-            "16": 0.25734,
-            "17": 0.26876,
-            "18": 0.26302,
-            "19": 0.25791,
-            "20": 0.26587,
-            "21": 0.26207,
-            "22": 0.2718,
-            "23": 0.27036,
-            "24": 0.2557,
-            "25": 0.27098,
-            "26": 0.2562,
-            "27": 0.25663,
-            "28": 0.28209,
-            "29": 0.25678,
-            "30": 0.26198,
-            "31": 0.27896,
-            "32": 0.26879,
-            "33": 0.25449,
-            "34": 0.27377,
-            "35": 0.25725,
-            "36": 0.25349,
-            "37": 0.2537,
-            "38": 0.26246,
-            "39": 0.25527,
-            "40": 0.25676,
-            "41": 0.26427,
-            "42": 0.25718,
-            "43": 0.26206,
-            "44": 0.25615,
-            "45": 0.261,
-            "46": 0.28413,
-            "47": 0.27633,
-            "48": 0.26455,
-            "49": 0.2706,
-            "50": 0.25944
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer/golden_values_dev_dgx_h100.json
index 73cf979651d..d869313b50f 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer/golden_values_dev_dgx_h100.json
@@ -6,104 +6,104 @@
         "values": {
             "1": 10.81565,
             "2": 10.81048,
-            "3": 10.81233,
-            "4": 10.79117,
-            "5": 10.83746,
-            "6": 10.85118,
-            "7": 10.82091,
-            "8": 10.82093,
-            "9": 10.8306,
-            "10": 10.78973,
-            "11": 10.86282,
-            "12": 10.84288,
-            "13": 10.85757,
-            "14": 10.86228,
-            "15": 10.80658,
-            "16": 10.80321,
-            "17": 10.77911,
-            "18": 10.80744,
-            "19": 10.79401,
-            "20": 10.7468,
-            "21": 10.72178,
-            "22": 10.58777,
-            "23": 10.72976,
-            "24": 10.63294,
-            "25": 10.57502,
-            "26": 10.63703,
-            "27": 10.65005,
-            "28": 10.63549,
-            "29": 10.64376,
-            "30": 10.44681,
-            "31": 10.1944,
-            "32": 10.52431,
-            "33": 10.51785,
-            "34": 10.28836,
-            "35": 10.33178,
-            "36": 10.31279,
-            "37": 10.42677,
-            "38": 10.27938,
-            "39": 10.47551,
-            "40": 10.19739,
-            "41": 10.21538,
-            "42": 10.28746,
-            "43": 9.94274,
-            "44": 10.05688,
-            "45": 9.94329,
-            "46": 9.90894,
-            "47": 10.21235,
-            "48": 9.95052,
-            "49": 9.63658,
-            "50": 10.00313,
-            "51": 9.92286,
-            "52": 9.82764,
-            "53": 10.14637,
-            "54": 10.0431,
-            "55": 9.9628,
-            "56": 9.70471,
-            "57": 9.58557,
-            "58": 9.91688,
-            "59": 9.66027,
-            "60": 9.60417,
-            "61": 9.77863,
-            "62": 10.06255,
-            "63": 9.47237,
-            "64": 9.85394,
-            "65": 9.02479,
-            "66": 9.79388,
-            "67": 9.43332,
-            "68": 9.85348,
-            "69": 9.84692,
-            "70": 9.81038,
-            "71": 9.68427,
-            "72": 9.6602,
-            "73": 9.57277,
-            "74": 9.05997,
-            "75": 9.50545,
-            "76": 9.17937,
-            "77": 10.12733,
-            "78": 9.77455,
-            "79": 9.44211,
-            "80": 9.46753,
-            "81": 9.53839,
-            "82": 9.75754,
-            "83": 9.38711,
-            "84": 9.46669,
-            "85": 9.67912,
-            "86": 9.13537,
-            "87": 9.63456,
-            "88": 9.80822,
-            "89": 9.67886,
-            "90": 9.8558,
-            "91": 9.41297,
-            "92": 9.41787,
-            "93": 9.15369,
-            "94": 8.90217,
-            "95": 9.56536,
-            "96": 9.58437,
-            "97": 9.35832,
-            "98": 9.73042,
-            "99": 8.9586,
-            "100": 9.454
+            "3": 10.8127,
+            "4": 10.79089,
+            "5": 10.83784,
+            "6": 10.85116,
+            "7": 10.82036,
+            "8": 10.82117,
+            "9": 10.83043,
+            "10": 10.78955,
+            "11": 10.86357,
+            "12": 10.84268,
+            "13": 10.85799,
+            "14": 10.86268,
+            "15": 10.80594,
+            "16": 10.80356,
+            "17": 10.77851,
+            "18": 10.80762,
+            "19": 10.79465,
+            "20": 10.747,
+            "21": 10.72249,
+            "22": 10.58742,
+            "23": 10.72933,
+            "24": 10.63238,
+            "25": 10.575,
+            "26": 10.638,
+            "27": 10.64966,
+            "28": 10.63496,
+            "29": 10.64307,
+            "30": 10.44635,
+            "31": 10.19441,
+            "32": 10.52449,
+            "33": 10.51815,
+            "34": 10.28843,
+            "35": 10.33138,
+            "36": 10.3123,
+            "37": 10.4265,
+            "38": 10.27866,
+            "39": 10.47612,
+            "40": 10.19821,
+            "41": 10.21536,
+            "42": 10.28769,
+            "43": 9.94235,
+            "44": 10.05775,
+            "45": 9.94354,
+            "46": 9.90902,
+            "47": 10.21214,
+            "48": 9.94982,
+            "49": 9.63605,
+            "50": 10.00335,
+            "51": 9.92304,
+            "52": 9.82779,
+            "53": 10.14656,
+            "54": 10.04338,
+            "55": 9.96311,
+            "56": 9.70508,
+            "57": 9.58542,
+            "58": 9.91687,
+            "59": 9.66061,
+            "60": 9.60393,
+            "61": 9.77855,
+            "62": 10.0624,
+            "63": 9.47205,
+            "64": 9.85428,
+            "65": 9.02467,
+            "66": 9.79454,
+            "67": 9.43333,
+            "68": 9.85327,
+            "69": 9.847,
+            "70": 9.81072,
+            "71": 9.684,
+            "72": 9.66023,
+            "73": 9.57314,
+            "74": 9.05973,
+            "75": 9.50551,
+            "76": 9.17942,
+            "77": 10.12761,
+            "78": 9.77438,
+            "79": 9.44209,
+            "80": 9.46747,
+            "81": 9.53873,
+            "82": 9.75725,
+            "83": 9.38702,
+            "84": 9.46662,
+            "85": 9.67918,
+            "86": 9.13556,
+            "87": 9.63426,
+            "88": 9.80794,
+            "89": 9.67925,
+            "90": 9.85561,
+            "91": 9.41267,
+            "92": 9.41773,
+            "93": 9.15396,
+            "94": 8.90227,
+            "95": 9.56526,
+            "96": 9.58425,
+            "97": 9.35836,
+            "98": 9.7302,
+            "99": 8.95917,
+            "100": 9.45408
         }
     },
     "num-zeros": {
@@ -111,106 +111,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 31083.0,
-            "2": 32874.0,
-            "3": 33614.0,
-            "4": 30796.0,
-            "5": 35950.0,
-            "6": 37383.0,
-            "7": 35302.0,
-            "8": 31308.0,
-            "9": 34522.0,
-            "10": 29757.0,
-            "11": 38942.0,
-            "12": 34991.0,
-            "13": 37045.0,
-            "14": 37494.0,
-            "15": 34692.0,
-            "16": 36080.0,
-            "17": 35060.0,
-            "18": 34989.0,
-            "19": 36144.0,
-            "20": 32462.0,
-            "21": 33369.0,
-            "22": 29795.0,
-            "23": 37622.0,
-            "24": 32511.0,
-            "25": 31055.0,
-            "26": 34301.0,
-            "27": 36030.0,
-            "28": 36741.0,
-            "29": 38257.0,
-            "30": 32928.0,
-            "31": 30048.0,
-            "32": 36406.0,
-            "33": 37595.0,
-            "34": 32918.0,
-            "35": 33986.0,
-            "36": 35154.0,
-            "37": 37803.0,
-            "38": 35542.0,
-            "39": 39006.0,
-            "40": 35753.0,
-            "41": 35748.0,
-            "42": 37390.0,
-            "43": 34087.0,
-            "44": 33554.0,
-            "45": 35464.0,
-            "46": 37091.0,
-            "47": 40542.0,
-            "48": 36522.0,
-            "49": 36534.0,
-            "50": 38785.0,
-            "51": 37126.0,
-            "52": 36939.0,
-            "53": 41763.0,
-            "54": 41138.0,
-            "55": 37048.0,
-            "56": 40483.0,
-            "57": 36998.0,
-            "58": 41877.0,
-            "59": 39208.0,
-            "60": 40087.0,
-            "61": 40325.0,
-            "62": 44268.0,
-            "63": 38629.0,
-            "64": 43656.0,
-            "65": 40940.0,
-            "66": 44302.0,
-            "67": 40075.0,
-            "68": 40632.0,
-            "69": 40527.0,
-            "70": 45260.0,
-            "71": 41111.0,
-            "72": 40161.0,
-            "73": 44972.0,
-            "74": 34095.0,
-            "75": 38490.0,
-            "76": 46162.0,
-            "77": 46055.0,
-            "78": 46750.0,
-            "79": 47560.0,
-            "80": 46440.0,
-            "81": 49629.0,
-            "82": 49227.0,
-            "83": 44834.0,
-            "84": 45877.0,
-            "85": 49064.0,
-            "86": 45232.0,
-            "87": 49124.0,
-            "88": 46347.0,
-            "89": 48837.0,
-            "90": 49499.0,
-            "91": 44289.0,
-            "92": 47277.0,
-            "93": 46847.0,
-            "94": 46311.0,
-            "95": 47245.0,
-            "96": 50336.0,
-            "97": 47016.0,
-            "98": 49606.0,
-            "99": 47799.0,
-            "100": 43700.0
+            "1": 30991.0,
+            "2": 32927.0,
+            "3": 33481.0,
+            "4": 30866.0,
+            "5": 36255.0,
+            "6": 37186.0,
+            "7": 35644.0,
+            "8": 31356.0,
+            "9": 34832.0,
+            "10": 29855.0,
+            "11": 38396.0,
+            "12": 35164.0,
+            "13": 37118.0,
+            "14": 38011.0,
+            "15": 34458.0,
+            "16": 35843.0,
+            "17": 34836.0,
+            "18": 35149.0,
+            "19": 36044.0,
+            "20": 32823.0,
+            "21": 33340.0,
+            "22": 30040.0,
+            "23": 37733.0,
+            "24": 31992.0,
+            "25": 31045.0,
+            "26": 34280.0,
+            "27": 36064.0,
+            "28": 36993.0,
+            "29": 38087.0,
+            "30": 32689.0,
+            "31": 30361.0,
+            "32": 36050.0,
+            "33": 37627.0,
+            "34": 33149.0,
+            "35": 34316.0,
+            "36": 35026.0,
+            "37": 37852.0,
+            "38": 35490.0,
+            "39": 38325.0,
+            "40": 35730.0,
+            "41": 35890.0,
+            "42": 37811.0,
+            "43": 34239.0,
+            "44": 33282.0,
+            "45": 35354.0,
+            "46": 37112.0,
+            "47": 40323.0,
+            "48": 36296.0,
+            "49": 36098.0,
+            "50": 38996.0,
+            "51": 37187.0,
+            "52": 36798.0,
+            "53": 41385.0,
+            "54": 41151.0,
+            "55": 36715.0,
+            "56": 40382.0,
+            "57": 36942.0,
+            "58": 42415.0,
+            "59": 39138.0,
+            "60": 39766.0,
+            "61": 40532.0,
+            "62": 43919.0,
+            "63": 38747.0,
+            "64": 43509.0,
+            "65": 40794.0,
+            "66": 44093.0,
+            "67": 40369.0,
+            "68": 40509.0,
+            "69": 40728.0,
+            "70": 45431.0,
+            "71": 41117.0,
+            "72": 39982.0,
+            "73": 44758.0,
+            "74": 34170.0,
+            "75": 38601.0,
+            "76": 46113.0,
+            "77": 45621.0,
+            "78": 47007.0,
+            "79": 47410.0,
+            "80": 46647.0,
+            "81": 50449.0,
+            "82": 49494.0,
+            "83": 45080.0,
+            "84": 46331.0,
+            "85": 48470.0,
+            "86": 45870.0,
+            "87": 49138.0,
+            "88": 46357.0,
+            "89": 48274.0,
+            "90": 50049.0,
+            "91": 43937.0,
+            "92": 47318.0,
+            "93": 46654.0,
+            "94": 46515.0,
+            "95": 47167.0,
+            "96": 50587.0,
+            "97": 46623.0,
+            "98": 49830.0,
+            "99": 48092.0,
+            "100": 43643.0
         }
     },
     "mem-allocated-bytes": {
@@ -221,103 +221,103 @@
             "1": 1016564224.0,
             "2": 1016563712.0,
             "3": 1016564224.0,
-            "4": 1016563712.0,
-            "5": 1016564736.0,
+            "4": 1017172480.0,
+            "5": 1016564224.0,
             "6": 1016565248.0,
             "7": 1016564736.0,
             "8": 1016565248.0,
             "9": 1016562688.0,
             "10": 1016564736.0,
-            "11": 1016562176.0,
-            "12": 1016564224.0,
-            "13": 1016563200.0,
-            "14": 1016563712.0,
+            "11": 1016562688.0,
+            "12": 1016565248.0,
+            "13": 1016564736.0,
+            "14": 1016564224.0,
             "15": 1016564736.0,
-            "16": 1016562688.0,
-            "17": 1016565248.0,
-            "18": 1016564736.0,
+            "16": 1016562176.0,
+            "17": 1016564736.0,
+            "18": 1016565760.0,
             "19": 1016563200.0,
-            "20": 1016563712.0,
+            "20": 1016563200.0,
             "21": 1016564224.0,
-            "22": 1016564736.0,
+            "22": 1016566272.0,
             "23": 1016564736.0,
-            "24": 1016563200.0,
-            "25": 1016565248.0,
+            "24": 1016564224.0,
+            "25": 1016564736.0,
             "26": 1016562176.0,
-            "27": 1016562688.0,
-            "28": 1016562176.0,
+            "27": 1016563200.0,
+            "28": 1016562688.0,
             "29": 1016562688.0,
-            "30": 1016566784.0,
-            "31": 1016569344.0,
-            "32": 1016565248.0,
+            "30": 1016566272.0,
+            "31": 1016569856.0,
+            "32": 1016564736.0,
             "33": 1016564736.0,
             "34": 1016565248.0,
-            "35": 1016565248.0,
-            "36": 1016565760.0,
-            "37": 1016564736.0,
+            "35": 1017459712.0,
+            "36": 1016565248.0,
+            "37": 1016565248.0,
             "38": 1016564224.0,
-            "39": 1016562688.0,
-            "40": 1016945152.0,
+            "39": 1016562176.0,
+            "40": 1016565248.0,
             "41": 1016567808.0,
             "42": 1016564224.0,
             "43": 1016568320.0,
             "44": 1016565760.0,
-            "45": 1016565248.0,
-            "46": 1016569344.0,
-            "47": 1016564224.0,
+            "45": 1016565760.0,
+            "46": 1016570368.0,
+            "47": 1016565248.0,
             "48": 1016569856.0,
-            "49": 1017010688.0,
-            "50": 1016567296.0,
+            "49": 1016568832.0,
+            "50": 1016565760.0,
             "51": 1016566272.0,
-            "52": 1016575488.0,
-            "53": 1016568320.0,
-            "54": 1016567296.0,
-            "55": 1016569344.0,
+            "52": 1016574976.0,
+            "53": 1016567808.0,
+            "54": 1016566784.0,
+            "55": 1016569856.0,
             "56": 1016565248.0,
-            "57": 1016575488.0,
-            "58": 1016569856.0,
+            "57": 1016574976.0,
+            "58": 1017110528.0,
             "59": 1016574976.0,
-            "60": 1016571392.0,
-            "61": 1016567808.0,
-            "62": 1016566272.0,
-            "63": 1016576512.0,
-            "64": 1016572416.0,
-            "65": 1016584192.0,
-            "66": 1016569344.0,
-            "67": 1016570368.0,
+            "60": 1016571904.0,
+            "61": 1016567296.0,
+            "62": 1016565760.0,
+            "63": 1016576000.0,
+            "64": 1016572928.0,
+            "65": 1016585216.0,
+            "66": 1016568832.0,
+            "67": 1016569344.0,
             "68": 1016566272.0,
-            "69": 1016570880.0,
+            "69": 1016569856.0,
             "70": 1016569344.0,
-            "71": 1016566784.0,
-            "72": 1016915968.0,
-            "73": 1016572928.0,
+            "71": 1016566272.0,
+            "72": 1016571392.0,
+            "73": 1016572416.0,
             "74": 1016577536.0,
             "75": 1016567296.0,
             "76": 1016565760.0,
-            "77": 1016567296.0,
+            "77": 1016566272.0,
             "78": 1016572928.0,
-            "79": 1016569344.0,
-            "80": 1016572928.0,
-            "81": 1016569856.0,
-            "82": 1016572416.0,
+            "79": 1016568832.0,
+            "80": 1016572416.0,
+            "81": 1016570368.0,
+            "82": 1016571904.0,
             "83": 1016568832.0,
             "84": 1016573440.0,
-            "85": 1016574976.0,
+            "85": 1016575488.0,
             "86": 1016574976.0,
-            "87": 1016568832.0,
-            "88": 1016571904.0,
-            "89": 1016578048.0,
-            "90": 1016568832.0,
+            "87": 1016568320.0,
+            "88": 1016816640.0,
+            "89": 1016577024.0,
+            "90": 1016569344.0,
             "91": 1016566784.0,
             "92": 1016566784.0,
-            "93": 1016570368.0,
-            "94": 1016571904.0,
+            "93": 1016569856.0,
+            "94": 1016571392.0,
             "95": 1016567808.0,
             "96": 1016566784.0,
-            "97": 1016573440.0,
-            "98": 1016566272.0,
-            "99": 1016578048.0,
-            "100": 1016573952.0
+            "97": 1016573952.0,
+            "98": 1016565760.0,
+            "99": 1016577024.0,
+            "100": 1016574464.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -327,104 +327,104 @@
         "values": {
             "1": 2560655872.0,
             "2": 2827037696.0,
-            "3": 2827377152.0,
-            "4": 2827377152.0,
-            "5": 2827506688.0,
-            "6": 2827618816.0,
-            "7": 2828691456.0,
-            "8": 2828691456.0,
-            "9": 2828691456.0,
-            "10": 2828691456.0,
-            "11": 2828691456.0,
-            "12": 2828691456.0,
-            "13": 2828691456.0,
-            "14": 2828691456.0,
-            "15": 2829756416.0,
-            "16": 2829756416.0,
-            "17": 2830923264.0,
-            "18": 2830923264.0,
-            "19": 2830923264.0,
-            "20": 2830923264.0,
-            "21": 2830923264.0,
-            "22": 2830923264.0,
-            "23": 2830923264.0,
-            "24": 2830923264.0,
-            "25": 2830923264.0,
-            "26": 2830923264.0,
-            "27": 2830923264.0,
-            "28": 2830923264.0,
-            "29": 2830923264.0,
-            "30": 2833604608.0,
-            "31": 2833604608.0,
-            "32": 2833604608.0,
-            "33": 2833604608.0,
-            "34": 2833604608.0,
-            "35": 2833604608.0,
-            "36": 2833604608.0,
-            "37": 2833604608.0,
-            "38": 2833604608.0,
-            "39": 2833604608.0,
-            "40": 2833604608.0,
-            "41": 2835652608.0,
-            "42": 2835652608.0,
-            "43": 2835652608.0,
-            "44": 2835652608.0,
-            "45": 2835652608.0,
-            "46": 2836792832.0,
-            "47": 2836792832.0,
-            "48": 2837318656.0,
-            "49": 2837318656.0,
-            "50": 2837318656.0,
-            "51": 2837318656.0,
-            "52": 2841922048.0,
-            "53": 2841922048.0,
-            "54": 2841922048.0,
-            "55": 2841922048.0,
-            "56": 2844188672.0,
-            "57": 2847232512.0,
-            "58": 2847232512.0,
-            "59": 2847232512.0,
-            "60": 2847232512.0,
-            "61": 2847232512.0,
-            "62": 2847232512.0,
-            "63": 2847301120.0,
-            "64": 2847301120.0,
-            "65": 2858460160.0,
-            "66": 2858460160.0,
-            "67": 2858460160.0,
-            "68": 2858460160.0,
-            "69": 2858460160.0,
-            "70": 2858460160.0,
-            "71": 2858460160.0,
-            "72": 2858460160.0,
-            "73": 2858460160.0,
-            "74": 2858460160.0,
-            "75": 2858460160.0,
-            "76": 2858460160.0,
-            "77": 2858460160.0,
-            "78": 2858460160.0,
-            "79": 2858460160.0,
-            "80": 2858460160.0,
-            "81": 2858460160.0,
-            "82": 2858460160.0,
-            "83": 2858460160.0,
-            "84": 2858460160.0,
-            "85": 2858460160.0,
-            "86": 2858460160.0,
-            "87": 2858460160.0,
-            "88": 2858460160.0,
-            "89": 2858460160.0,
-            "90": 2858460160.0,
-            "91": 2858460160.0,
-            "92": 2858460160.0,
-            "93": 2858460160.0,
-            "94": 2858460160.0,
-            "95": 2858460160.0,
-            "96": 2858460160.0,
-            "97": 2858460160.0,
-            "98": 2858460160.0,
-            "99": 2858460160.0,
-            "100": 2858460160.0
+            "3": 2827771392.0,
+            "4": 2828163584.0,
+            "5": 2828163584.0,
+            "6": 2828163584.0,
+            "7": 2829373440.0,
+            "8": 2829373440.0,
+            "9": 2829373440.0,
+            "10": 2829925376.0,
+            "11": 2829925376.0,
+            "12": 2829925376.0,
+            "13": 2829925376.0,
+            "14": 2829925376.0,
+            "15": 2830320640.0,
+            "16": 2830320640.0,
+            "17": 2830320640.0,
+            "18": 2830320640.0,
+            "19": 2830320640.0,
+            "20": 2830320640.0,
+            "21": 2830320640.0,
+            "22": 2830406144.0,
+            "23": 2830406144.0,
+            "24": 2830406144.0,
+            "25": 2830406144.0,
+            "26": 2830406144.0,
+            "27": 2830406144.0,
+            "28": 2830406144.0,
+            "29": 2830406144.0,
+            "30": 2831433216.0,
+            "31": 2836904960.0,
+            "32": 2836904960.0,
+            "33": 2836904960.0,
+            "34": 2836904960.0,
+            "35": 2836904960.0,
+            "36": 2836904960.0,
+            "37": 2836904960.0,
+            "38": 2836904960.0,
+            "39": 2836904960.0,
+            "40": 2836904960.0,
+            "41": 2836904960.0,
+            "42": 2836904960.0,
+            "43": 2836904960.0,
+            "44": 2836904960.0,
+            "45": 2836904960.0,
+            "46": 2837527040.0,
+            "47": 2837527040.0,
+            "48": 2837527040.0,
+            "49": 2837527040.0,
+            "50": 2837527040.0,
+            "51": 2837527040.0,
+            "52": 2844526592.0,
+            "53": 2844526592.0,
+            "54": 2844526592.0,
+            "55": 2844526592.0,
+            "56": 2844526592.0,
+            "57": 2845833216.0,
+            "58": 2845833216.0,
+            "59": 2845833216.0,
+            "60": 2845833216.0,
+            "61": 2845833216.0,
+            "62": 2845833216.0,
+            "63": 2847350784.0,
+            "64": 2847350784.0,
+            "65": 2859365376.0,
+            "66": 2859365376.0,
+            "67": 2859365376.0,
+            "68": 2859365376.0,
+            "69": 2859365376.0,
+            "70": 2859365376.0,
+            "71": 2859365376.0,
+            "72": 2859365376.0,
+            "73": 2859365376.0,
+            "74": 2859365376.0,
+            "75": 2859365376.0,
+            "76": 2859365376.0,
+            "77": 2859365376.0,
+            "78": 2859365376.0,
+            "79": 2859365376.0,
+            "80": 2859365376.0,
+            "81": 2859365376.0,
+            "82": 2859365376.0,
+            "83": 2859365376.0,
+            "84": 2859365376.0,
+            "85": 2859365376.0,
+            "86": 2859365376.0,
+            "87": 2859365376.0,
+            "88": 2859365376.0,
+            "89": 2859365376.0,
+            "90": 2859365376.0,
+            "91": 2859365376.0,
+            "92": 2859365376.0,
+            "93": 2859365376.0,
+            "94": 2859365376.0,
+            "95": 2859365376.0,
+            "96": 2859365376.0,
+            "97": 2859365376.0,
+            "98": 2859365376.0,
+            "99": 2859365376.0,
+            "100": 2859365376.0
         }
     },
     "iteration-time": {
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 17.04363,
-            "2": 0.27177,
-            "3": 0.19697,
-            "4": 0.20207,
-            "5": 0.17488,
-            "6": 0.1736,
-            "7": 0.18134,
-            "8": 0.17934,
-            "9": 0.17175,
-            "10": 0.16904,
-            "11": 0.17256,
-            "12": 0.16161,
-            "13": 0.166,
-            "14": 0.16567,
-            "15": 0.18106,
-            "16": 0.16499,
-            "17": 0.17792,
-            "18": 0.16846,
-            "19": 0.16132,
-            "20": 0.16075,
-            "21": 0.163,
-            "22": 0.17697,
-            "23": 0.16348,
-            "24": 0.16046,
-            "25": 0.16003,
-            "26": 0.16209,
-            "27": 0.16858,
-            "28": 0.16512,
-            "29": 0.15718,
-            "30": 0.17279,
-            "31": 0.20344,
-            "32": 0.17311,
-            "33": 0.1614,
-            "34": 0.18789,
-            "35": 0.16679,
-            "36": 0.16768,
-            "37": 0.15911,
-            "38": 0.16709,
-            "39": 0.16032,
-            "40": 0.18009,
-            "41": 0.16959,
-            "42": 0.16653,
-            "43": 0.17964,
-            "44": 0.1656,
-            "45": 0.16422,
-            "46": 0.18029,
-            "47": 0.16168,
-            "48": 0.19024,
-            "49": 0.22183,
-            "50": 0.16427,
-            "51": 0.17603,
-            "52": 0.17568,
-            "53": 0.16571,
-            "54": 0.16402,
-            "55": 0.17797,
-            "56": 0.22204,
-            "57": 0.17949,
-            "58": 0.1779,
-            "59": 0.18785,
-            "60": 0.1904,
-            "61": 0.1671,
-            "62": 0.17396,
-            "63": 0.17822,
-            "64": 0.17482,
-            "65": 0.24849,
-            "66": 0.17181,
-            "67": 0.23022,
-            "68": 0.19374,
-            "69": 0.17091,
-            "70": 0.17566,
-            "71": 0.19661,
-            "72": 0.17367,
-            "73": 0.21284,
-            "74": 0.19024,
-            "75": 0.18071,
-            "76": 0.20274,
-            "77": 0.17462,
-            "78": 0.18216,
-            "79": 0.18476,
-            "80": 0.18669,
-            "81": 0.17032,
-            "82": 0.16285,
-            "83": 0.17256,
-            "84": 0.19021,
-            "85": 0.16572,
-            "86": 0.20934,
-            "87": 0.17261,
-            "88": 0.16413,
-            "89": 0.17944,
-            "90": 0.1661,
-            "91": 0.19779,
-            "92": 0.17507,
-            "93": 0.18998,
-            "94": 0.20674,
-            "95": 0.16927,
-            "96": 0.16793,
-            "97": 0.17702,
-            "98": 0.16074,
-            "99": 0.17652,
-            "100": 0.17041
+            "1": 17.55161,
+            "2": 0.27584,
+            "3": 0.20906,
+            "4": 0.18821,
+            "5": 0.17883,
+            "6": 0.17484,
+            "7": 0.18214,
+            "8": 0.18025,
+            "9": 0.16785,
+            "10": 0.16718,
+            "11": 0.17122,
+            "12": 0.16341,
+            "13": 0.16356,
+            "14": 0.16447,
+            "15": 0.17469,
+            "16": 0.16231,
+            "17": 0.17002,
+            "18": 0.1621,
+            "19": 0.16543,
+            "20": 0.16097,
+            "21": 0.16113,
+            "22": 0.17866,
+            "23": 0.16939,
+            "24": 0.16784,
+            "25": 0.16322,
+            "26": 0.15752,
+            "27": 0.16042,
+            "28": 0.16296,
+            "29": 0.16022,
+            "30": 0.16569,
+            "31": 0.20634,
+            "32": 0.16627,
+            "33": 0.16203,
+            "34": 0.18965,
+            "35": 0.1656,
+            "36": 0.17227,
+            "37": 0.16394,
+            "38": 0.16364,
+            "39": 0.15966,
+            "40": 0.17482,
+            "41": 0.16992,
+            "42": 0.16079,
+            "43": 0.17541,
+            "44": 0.1626,
+            "45": 0.16436,
+            "46": 0.1838,
+            "47": 0.15773,
+            "48": 0.18504,
+            "49": 0.22116,
+            "50": 0.16497,
+            "51": 0.17193,
+            "52": 0.17228,
+            "53": 0.15999,
+            "54": 0.15946,
+            "55": 0.1611,
+            "56": 0.21983,
+            "57": 0.18423,
+            "58": 0.16229,
+            "59": 0.18268,
+            "60": 0.17406,
+            "61": 0.15956,
+            "62": 0.16172,
+            "63": 0.17465,
+            "64": 0.17307,
+            "65": 0.25477,
+            "66": 0.15926,
+            "67": 0.23477,
+            "68": 0.16872,
+            "69": 0.16094,
+            "70": 0.16631,
+            "71": 0.18552,
+            "72": 0.16728,
+            "73": 0.1889,
+            "74": 0.17586,
+            "75": 0.17577,
+            "76": 0.21503,
+            "77": 0.16576,
+            "78": 0.17284,
+            "79": 0.18166,
+            "80": 0.19235,
+            "81": 0.17347,
+            "82": 0.1597,
+            "83": 0.17024,
+            "84": 0.17843,
+            "85": 0.15917,
+            "86": 0.20315,
+            "87": 0.16523,
+            "88": 0.16367,
+            "89": 0.18499,
+            "90": 0.16286,
+            "91": 0.19025,
+            "92": 0.17186,
+            "93": 0.19123,
+            "94": 0.19378,
+            "95": 0.16849,
+            "96": 0.16781,
+            "97": 0.17705,
+            "98": 0.15729,
+            "99": 0.17119,
+            "100": 0.16
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index dca66d633f5..00000000000
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.81565,
-            "2": 10.81048,
-            "3": 10.81268,
-            "4": 10.79108,
-            "5": 10.83781,
-            "6": 10.85065,
-            "7": 10.82134,
-            "8": 10.8202,
-            "9": 10.83075,
-            "10": 10.79026,
-            "11": 10.86297,
-            "12": 10.84282,
-            "13": 10.85729,
-            "14": 10.86207,
-            "15": 10.80535,
-            "16": 10.80362,
-            "17": 10.77916,
-            "18": 10.80764,
-            "19": 10.79451,
-            "20": 10.74621,
-            "21": 10.72181,
-            "22": 10.58717,
-            "23": 10.72927,
-            "24": 10.63248,
-            "25": 10.57614,
-            "26": 10.63793,
-            "27": 10.64955,
-            "28": 10.63533,
-            "29": 10.64332,
-            "30": 10.44626,
-            "31": 10.19362,
-            "32": 10.52448,
-            "33": 10.51821,
-            "34": 10.28825,
-            "35": 10.33113,
-            "36": 10.31229,
-            "37": 10.42674,
-            "38": 10.279,
-            "39": 10.47591,
-            "40": 10.19781,
-            "41": 10.21483,
-            "42": 10.28721,
-            "43": 9.94225,
-            "44": 10.05777,
-            "45": 9.9434,
-            "46": 9.90939,
-            "47": 10.21227,
-            "48": 9.95,
-            "49": 9.63638,
-            "50": 10.00366,
-            "51": 9.92331,
-            "52": 9.8284,
-            "53": 10.14655,
-            "54": 10.04302,
-            "55": 9.9627,
-            "56": 9.70496,
-            "57": 9.58521,
-            "58": 9.91705,
-            "59": 9.66061,
-            "60": 9.60423,
-            "61": 9.77841,
-            "62": 10.06213,
-            "63": 9.47178,
-            "64": 9.85438,
-            "65": 9.02476,
-            "66": 9.79406,
-            "67": 9.43345,
-            "68": 9.8534,
-            "69": 9.847,
-            "70": 9.81051,
-            "71": 9.68406,
-            "72": 9.6601,
-            "73": 9.57296,
-            "74": 9.0603,
-            "75": 9.50552,
-            "76": 9.17947,
-            "77": 10.12779,
-            "78": 9.77444,
-            "79": 9.44215,
-            "80": 9.46725,
-            "81": 9.53865,
-            "82": 9.75696,
-            "83": 9.3874,
-            "84": 9.46663,
-            "85": 9.67947,
-            "86": 9.13533,
-            "87": 9.63433,
-            "88": 9.80834,
-            "89": 9.67888,
-            "90": 9.85563,
-            "91": 9.41308,
-            "92": 9.41812,
-            "93": 9.15371,
-            "94": 8.90222,
-            "95": 9.56497,
-            "96": 9.58428,
-            "97": 9.35825,
-            "98": 9.72999,
-            "99": 8.95886,
-            "100": 9.45414
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 30853.0,
-            "2": 33000.0,
-            "3": 33775.0,
-            "4": 30857.0,
-            "5": 35956.0,
-            "6": 37573.0,
-            "7": 35446.0,
-            "8": 31027.0,
-            "9": 34894.0,
-            "10": 29923.0,
-            "11": 38736.0,
-            "12": 35245.0,
-            "13": 36983.0,
-            "14": 38078.0,
-            "15": 34560.0,
-            "16": 36096.0,
-            "17": 34585.0,
-            "18": 34936.0,
-            "19": 36301.0,
-            "20": 32788.0,
-            "21": 33385.0,
-            "22": 29942.0,
-            "23": 37625.0,
-            "24": 32018.0,
-            "25": 31043.0,
-            "26": 34310.0,
-            "27": 35942.0,
-            "28": 37348.0,
-            "29": 38027.0,
-            "30": 32865.0,
-            "31": 30072.0,
-            "32": 36198.0,
-            "33": 37604.0,
-            "34": 32768.0,
-            "35": 34129.0,
-            "36": 34811.0,
-            "37": 37917.0,
-            "38": 35861.0,
-            "39": 38592.0,
-            "40": 35652.0,
-            "41": 35428.0,
-            "42": 37701.0,
-            "43": 33967.0,
-            "44": 33425.0,
-            "45": 35778.0,
-            "46": 37279.0,
-            "47": 40356.0,
-            "48": 36144.0,
-            "49": 36492.0,
-            "50": 39148.0,
-            "51": 37394.0,
-            "52": 36918.0,
-            "53": 41574.0,
-            "54": 40654.0,
-            "55": 37274.0,
-            "56": 40316.0,
-            "57": 36713.0,
-            "58": 42042.0,
-            "59": 39264.0,
-            "60": 39816.0,
-            "61": 40579.0,
-            "62": 44097.0,
-            "63": 38397.0,
-            "64": 43253.0,
-            "65": 40953.0,
-            "66": 44326.0,
-            "67": 40344.0,
-            "68": 40398.0,
-            "69": 40614.0,
-            "70": 45248.0,
-            "71": 41445.0,
-            "72": 39901.0,
-            "73": 44369.0,
-            "74": 33925.0,
-            "75": 38833.0,
-            "76": 46358.0,
-            "77": 46064.0,
-            "78": 46904.0,
-            "79": 47560.0,
-            "80": 46979.0,
-            "81": 50283.0,
-            "82": 49634.0,
-            "83": 45153.0,
-            "84": 45874.0,
-            "85": 49161.0,
-            "86": 45106.0,
-            "87": 49057.0,
-            "88": 46592.0,
-            "89": 48712.0,
-            "90": 49552.0,
-            "91": 43836.0,
-            "92": 47360.0,
-            "93": 46675.0,
-            "94": 46653.0,
-            "95": 46726.0,
-            "96": 50152.0,
-            "97": 47102.0,
-            "98": 50317.0,
-            "99": 48088.0,
-            "100": 43362.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1016564224.0,
-            "2": 1016563712.0,
-            "3": 1016564224.0,
-            "4": 1016563200.0,
-            "5": 1016564736.0,
-            "6": 1016565248.0,
-            "7": 1016563712.0,
-            "8": 1016565248.0,
-            "9": 1016562688.0,
-            "10": 1016564736.0,
-            "11": 1016562688.0,
-            "12": 1016564224.0,
-            "13": 1016563200.0,
-            "14": 1016563712.0,
-            "15": 1017374720.0,
-            "16": 1016562176.0,
-            "17": 1016565248.0,
-            "18": 1016566272.0,
-            "19": 1016563712.0,
-            "20": 1016564224.0,
-            "21": 1016564224.0,
-            "22": 1016566272.0,
-            "23": 1016563712.0,
-            "24": 1016563200.0,
-            "25": 1016565248.0,
-            "26": 1016833024.0,
-            "27": 1016562688.0,
-            "28": 1016562176.0,
-            "29": 1016562688.0,
-            "30": 1016565760.0,
-            "31": 1016568832.0,
-            "32": 1016565248.0,
-            "33": 1016564736.0,
-            "34": 1016564736.0,
-            "35": 1016565248.0,
-            "36": 1016901120.0,
-            "37": 1016564736.0,
-            "38": 1016564224.0,
-            "39": 1016562688.0,
-            "40": 1016563712.0,
-            "41": 1016567296.0,
-            "42": 1016564736.0,
-            "43": 1016567808.0,
-            "44": 1016564736.0,
-            "45": 1016565760.0,
-            "46": 1016569856.0,
-            "47": 1016564224.0,
-            "48": 1016569856.0,
-            "49": 1016568320.0,
-            "50": 1017070592.0,
-            "51": 1016566272.0,
-            "52": 1016575488.0,
-            "53": 1016567808.0,
-            "54": 1016976896.0,
-            "55": 1016569856.0,
-            "56": 1016565248.0,
-            "57": 1016574976.0,
-            "58": 1017060352.0,
-            "59": 1016573952.0,
-            "60": 1016571904.0,
-            "61": 1016568320.0,
-            "62": 1016566784.0,
-            "63": 1016576512.0,
-            "64": 1016572416.0,
-            "65": 1016584192.0,
-            "66": 1016568832.0,
-            "67": 1016570368.0,
-            "68": 1016566272.0,
-            "69": 1016570880.0,
-            "70": 1016937984.0,
-            "71": 1016567296.0,
-            "72": 1016571904.0,
-            "73": 1016572416.0,
-            "74": 1016577024.0,
-            "75": 1016567296.0,
-            "76": 1016565248.0,
-            "77": 1016566272.0,
-            "78": 1016572928.0,
-            "79": 1016568320.0,
-            "80": 1016572416.0,
-            "81": 1016570368.0,
-            "82": 1016571392.0,
-            "83": 1016568320.0,
-            "84": 1016573440.0,
-            "85": 1016574976.0,
-            "86": 1016574976.0,
-            "87": 1016567808.0,
-            "88": 1016570880.0,
-            "89": 1016577024.0,
-            "90": 1016568320.0,
-            "91": 1016566784.0,
-            "92": 1016567808.0,
-            "93": 1016569856.0,
-            "94": 1016571904.0,
-            "95": 1016568320.0,
-            "96": 1016718336.0,
-            "97": 1016573440.0,
-            "98": 1016565248.0,
-            "99": 1016578560.0,
-            "100": 1016574464.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 2560655872.0,
-            "2": 2827037696.0,
-            "3": 2827638272.0,
-            "4": 2827638272.0,
-            "5": 2827638272.0,
-            "6": 2828292608.0,
-            "7": 2829339648.0,
-            "8": 2829339648.0,
-            "9": 2829339648.0,
-            "10": 2831441920.0,
-            "11": 2831441920.0,
-            "12": 2831441920.0,
-            "13": 2831441920.0,
-            "14": 2831441920.0,
-            "15": 2831441920.0,
-            "16": 2831441920.0,
-            "17": 2831441920.0,
-            "18": 2831441920.0,
-            "19": 2831441920.0,
-            "20": 2831441920.0,
-            "21": 2831441920.0,
-            "22": 2831441920.0,
-            "23": 2831441920.0,
-            "24": 2831441920.0,
-            "25": 2831441920.0,
-            "26": 2831441920.0,
-            "27": 2831441920.0,
-            "28": 2831441920.0,
-            "29": 2831441920.0,
-            "30": 2831441920.0,
-            "31": 2836701184.0,
-            "32": 2836701184.0,
-            "33": 2836701184.0,
-            "34": 2836701184.0,
-            "35": 2836701184.0,
-            "36": 2836701184.0,
-            "37": 2836701184.0,
-            "38": 2836701184.0,
-            "39": 2836701184.0,
-            "40": 2836701184.0,
-            "41": 2836701184.0,
-            "42": 2836701184.0,
-            "43": 2836701184.0,
-            "44": 2836701184.0,
-            "45": 2836701184.0,
-            "46": 2836701184.0,
-            "47": 2836701184.0,
-            "48": 2836701184.0,
-            "49": 2836701184.0,
-            "50": 2836701184.0,
-            "51": 2836701184.0,
-            "52": 2842246656.0,
-            "53": 2842246656.0,
-            "54": 2842246656.0,
-            "55": 2842246656.0,
-            "56": 2843695104.0,
-            "57": 2848199680.0,
-            "58": 2848199680.0,
-            "59": 2848199680.0,
-            "60": 2848199680.0,
-            "61": 2848199680.0,
-            "62": 2848199680.0,
-            "63": 2848199680.0,
-            "64": 2848199680.0,
-            "65": 2859411456.0,
-            "66": 2859411456.0,
-            "67": 2859411456.0,
-            "68": 2859411456.0,
-            "69": 2859411456.0,
-            "70": 2859411456.0,
-            "71": 2859411456.0,
-            "72": 2859411456.0,
-            "73": 2859411456.0,
-            "74": 2859411456.0,
-            "75": 2859411456.0,
-            "76": 2859411456.0,
-            "77": 2859411456.0,
-            "78": 2859411456.0,
-            "79": 2859411456.0,
-            "80": 2859411456.0,
-            "81": 2859411456.0,
-            "82": 2859411456.0,
-            "83": 2859411456.0,
-            "84": 2859411456.0,
-            "85": 2859411456.0,
-            "86": 2859411456.0,
-            "87": 2859411456.0,
-            "88": 2859411456.0,
-            "89": 2859411456.0,
-            "90": 2859411456.0,
-            "91": 2859411456.0,
-            "92": 2859411456.0,
-            "93": 2859411456.0,
-            "94": 2859411456.0,
-            "95": 2859411456.0,
-            "96": 2859411456.0,
-            "97": 2859411456.0,
-            "98": 2859411456.0,
-            "99": 2859411456.0,
-            "100": 2859411456.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 18.49276,
-            "2": 0.25843,
-            "3": 0.17872,
-            "4": 0.17622,
-            "5": 0.16425,
-            "6": 0.15462,
-            "7": 0.16221,
-            "8": 0.15923,
-            "9": 0.1611,
-            "10": 0.1478,
-            "11": 0.15494,
-            "12": 0.14547,
-            "13": 0.14411,
-            "14": 0.14989,
-            "15": 0.16302,
-            "16": 0.14821,
-            "17": 0.16657,
-            "18": 0.14513,
-            "19": 0.15296,
-            "20": 0.14437,
-            "21": 0.14735,
-            "22": 0.17451,
-            "23": 0.16059,
-            "24": 0.152,
-            "25": 0.15395,
-            "26": 0.15115,
-            "27": 0.15887,
-            "28": 0.15234,
-            "29": 0.1421,
-            "30": 0.15091,
-            "31": 0.18973,
-            "32": 0.14778,
-            "33": 0.14785,
-            "34": 0.1727,
-            "35": 0.15646,
-            "36": 0.16437,
-            "37": 0.1441,
-            "38": 0.15823,
-            "39": 0.14495,
-            "40": 0.16334,
-            "41": 0.14314,
-            "42": 0.14405,
-            "43": 0.15348,
-            "44": 0.14397,
-            "45": 0.15389,
-            "46": 0.17277,
-            "47": 0.14442,
-            "48": 0.16289,
-            "49": 0.21224,
-            "50": 0.14457,
-            "51": 0.17927,
-            "52": 0.15446,
-            "53": 0.14459,
-            "54": 0.14896,
-            "55": 0.1558,
-            "56": 0.2105,
-            "57": 0.17156,
-            "58": 0.146,
-            "59": 0.15771,
-            "60": 0.162,
-            "61": 0.14241,
-            "62": 0.14184,
-            "63": 0.15693,
-            "64": 0.16199,
-            "65": 0.22761,
-            "66": 0.14583,
-            "67": 0.22988,
-            "68": 0.15495,
-            "69": 0.15509,
-            "70": 0.15156,
-            "71": 0.17782,
-            "72": 0.15675,
-            "73": 0.18088,
-            "74": 0.17013,
-            "75": 0.16039,
-            "76": 0.17974,
-            "77": 0.13903,
-            "78": 0.15719,
-            "79": 0.1635,
-            "80": 0.17904,
-            "81": 0.14997,
-            "82": 0.15986,
-            "83": 0.1669,
-            "84": 0.17349,
-            "85": 0.14723,
-            "86": 0.19019,
-            "87": 0.15235,
-            "88": 0.14689,
-            "89": 0.16952,
-            "90": 0.1487,
-            "91": 0.1826,
-            "92": 0.15727,
-            "93": 0.17286,
-            "94": 0.18554,
-            "95": 0.14872,
-            "96": 0.14426,
-            "97": 0.15953,
-            "98": 0.14361,
-            "99": 0.15897,
-            "100": 0.14814
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index d869313b50f..00000000000
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.81565,
-            "2": 10.81048,
-            "3": 10.8127,
-            "4": 10.79089,
-            "5": 10.83784,
-            "6": 10.85116,
-            "7": 10.82036,
-            "8": 10.82117,
-            "9": 10.83043,
-            "10": 10.78955,
-            "11": 10.86357,
-            "12": 10.84268,
-            "13": 10.85799,
-            "14": 10.86268,
-            "15": 10.80594,
-            "16": 10.80356,
-            "17": 10.77851,
-            "18": 10.80762,
-            "19": 10.79465,
-            "20": 10.747,
-            "21": 10.72249,
-            "22": 10.58742,
-            "23": 10.72933,
-            "24": 10.63238,
-            "25": 10.575,
-            "26": 10.638,
-            "27": 10.64966,
-            "28": 10.63496,
-            "29": 10.64307,
-            "30": 10.44635,
-            "31": 10.19441,
-            "32": 10.52449,
-            "33": 10.51815,
-            "34": 10.28843,
-            "35": 10.33138,
-            "36": 10.3123,
-            "37": 10.4265,
-            "38": 10.27866,
-            "39": 10.47612,
-            "40": 10.19821,
-            "41": 10.21536,
-            "42": 10.28769,
-            "43": 9.94235,
-            "44": 10.05775,
-            "45": 9.94354,
-            "46": 9.90902,
-            "47": 10.21214,
-            "48": 9.94982,
-            "49": 9.63605,
-            "50": 10.00335,
-            "51": 9.92304,
-            "52": 9.82779,
-            "53": 10.14656,
-            "54": 10.04338,
-            "55": 9.96311,
-            "56": 9.70508,
-            "57": 9.58542,
-            "58": 9.91687,
-            "59": 9.66061,
-            "60": 9.60393,
-            "61": 9.77855,
-            "62": 10.0624,
-            "63": 9.47205,
-            "64": 9.85428,
-            "65": 9.02467,
-            "66": 9.79454,
-            "67": 9.43333,
-            "68": 9.85327,
-            "69": 9.847,
-            "70": 9.81072,
-            "71": 9.684,
-            "72": 9.66023,
-            "73": 9.57314,
-            "74": 9.05973,
-            "75": 9.50551,
-            "76": 9.17942,
-            "77": 10.12761,
-            "78": 9.77438,
-            "79": 9.44209,
-            "80": 9.46747,
-            "81": 9.53873,
-            "82": 9.75725,
-            "83": 9.38702,
-            "84": 9.46662,
-            "85": 9.67918,
-            "86": 9.13556,
-            "87": 9.63426,
-            "88": 9.80794,
-            "89": 9.67925,
-            "90": 9.85561,
-            "91": 9.41267,
-            "92": 9.41773,
-            "93": 9.15396,
-            "94": 8.90227,
-            "95": 9.56526,
-            "96": 9.58425,
-            "97": 9.35836,
-            "98": 9.7302,
-            "99": 8.95917,
-            "100": 9.45408
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 30991.0,
-            "2": 32927.0,
-            "3": 33481.0,
-            "4": 30866.0,
-            "5": 36255.0,
-            "6": 37186.0,
-            "7": 35644.0,
-            "8": 31356.0,
-            "9": 34832.0,
-            "10": 29855.0,
-            "11": 38396.0,
-            "12": 35164.0,
-            "13": 37118.0,
-            "14": 38011.0,
-            "15": 34458.0,
-            "16": 35843.0,
-            "17": 34836.0,
-            "18": 35149.0,
-            "19": 36044.0,
-            "20": 32823.0,
-            "21": 33340.0,
-            "22": 30040.0,
-            "23": 37733.0,
-            "24": 31992.0,
-            "25": 31045.0,
-            "26": 34280.0,
-            "27": 36064.0,
-            "28": 36993.0,
-            "29": 38087.0,
-            "30": 32689.0,
-            "31": 30361.0,
-            "32": 36050.0,
-            "33": 37627.0,
-            "34": 33149.0,
-            "35": 34316.0,
-            "36": 35026.0,
-            "37": 37852.0,
-            "38": 35490.0,
-            "39": 38325.0,
-            "40": 35730.0,
-            "41": 35890.0,
-            "42": 37811.0,
-            "43": 34239.0,
-            "44": 33282.0,
-            "45": 35354.0,
-            "46": 37112.0,
-            "47": 40323.0,
-            "48": 36296.0,
-            "49": 36098.0,
-            "50": 38996.0,
-            "51": 37187.0,
-            "52": 36798.0,
-            "53": 41385.0,
-            "54": 41151.0,
-            "55": 36715.0,
-            "56": 40382.0,
-            "57": 36942.0,
-            "58": 42415.0,
-            "59": 39138.0,
-            "60": 39766.0,
-            "61": 40532.0,
-            "62": 43919.0,
-            "63": 38747.0,
-            "64": 43509.0,
-            "65": 40794.0,
-            "66": 44093.0,
-            "67": 40369.0,
-            "68": 40509.0,
-            "69": 40728.0,
-            "70": 45431.0,
-            "71": 41117.0,
-            "72": 39982.0,
-            "73": 44758.0,
-            "74": 34170.0,
-            "75": 38601.0,
-            "76": 46113.0,
-            "77": 45621.0,
-            "78": 47007.0,
-            "79": 47410.0,
-            "80": 46647.0,
-            "81": 50449.0,
-            "82": 49494.0,
-            "83": 45080.0,
-            "84": 46331.0,
-            "85": 48470.0,
-            "86": 45870.0,
-            "87": 49138.0,
-            "88": 46357.0,
-            "89": 48274.0,
-            "90": 50049.0,
-            "91": 43937.0,
-            "92": 47318.0,
-            "93": 46654.0,
-            "94": 46515.0,
-            "95": 47167.0,
-            "96": 50587.0,
-            "97": 46623.0,
-            "98": 49830.0,
-            "99": 48092.0,
-            "100": 43643.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1016564224.0,
-            "2": 1016563712.0,
-            "3": 1016564224.0,
-            "4": 1017172480.0,
-            "5": 1016564224.0,
-            "6": 1016565248.0,
-            "7": 1016564736.0,
-            "8": 1016565248.0,
-            "9": 1016562688.0,
-            "10": 1016564736.0,
-            "11": 1016562688.0,
-            "12": 1016565248.0,
-            "13": 1016564736.0,
-            "14": 1016564224.0,
-            "15": 1016564736.0,
-            "16": 1016562176.0,
-            "17": 1016564736.0,
-            "18": 1016565760.0,
-            "19": 1016563200.0,
-            "20": 1016563200.0,
-            "21": 1016564224.0,
-            "22": 1016566272.0,
-            "23": 1016564736.0,
-            "24": 1016564224.0,
-            "25": 1016564736.0,
-            "26": 1016562176.0,
-            "27": 1016563200.0,
-            "28": 1016562688.0,
-            "29": 1016562688.0,
-            "30": 1016566272.0,
-            "31": 1016569856.0,
-            "32": 1016564736.0,
-            "33": 1016564736.0,
-            "34": 1016565248.0,
-            "35": 1017459712.0,
-            "36": 1016565248.0,
-            "37": 1016565248.0,
-            "38": 1016564224.0,
-            "39": 1016562176.0,
-            "40": 1016565248.0,
-            "41": 1016567808.0,
-            "42": 1016564224.0,
-            "43": 1016568320.0,
-            "44": 1016565760.0,
-            "45": 1016565760.0,
-            "46": 1016570368.0,
-            "47": 1016565248.0,
-            "48": 1016569856.0,
-            "49": 1016568832.0,
-            "50": 1016565760.0,
-            "51": 1016566272.0,
-            "52": 1016574976.0,
-            "53": 1016567808.0,
-            "54": 1016566784.0,
-            "55": 1016569856.0,
-            "56": 1016565248.0,
-            "57": 1016574976.0,
-            "58": 1017110528.0,
-            "59": 1016574976.0,
-            "60": 1016571904.0,
-            "61": 1016567296.0,
-            "62": 1016565760.0,
-            "63": 1016576000.0,
-            "64": 1016572928.0,
-            "65": 1016585216.0,
-            "66": 1016568832.0,
-            "67": 1016569344.0,
-            "68": 1016566272.0,
-            "69": 1016569856.0,
-            "70": 1016569344.0,
-            "71": 1016566272.0,
-            "72": 1016571392.0,
-            "73": 1016572416.0,
-            "74": 1016577536.0,
-            "75": 1016567296.0,
-            "76": 1016565760.0,
-            "77": 1016566272.0,
-            "78": 1016572928.0,
-            "79": 1016568832.0,
-            "80": 1016572416.0,
-            "81": 1016570368.0,
-            "82": 1016571904.0,
-            "83": 1016568832.0,
-            "84": 1016573440.0,
-            "85": 1016575488.0,
-            "86": 1016574976.0,
-            "87": 1016568320.0,
-            "88": 1016816640.0,
-            "89": 1016577024.0,
-            "90": 1016569344.0,
-            "91": 1016566784.0,
-            "92": 1016566784.0,
-            "93": 1016569856.0,
-            "94": 1016571392.0,
-            "95": 1016567808.0,
-            "96": 1016566784.0,
-            "97": 1016573952.0,
-            "98": 1016565760.0,
-            "99": 1016577024.0,
-            "100": 1016574464.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 2560655872.0,
-            "2": 2827037696.0,
-            "3": 2827771392.0,
-            "4": 2828163584.0,
-            "5": 2828163584.0,
-            "6": 2828163584.0,
-            "7": 2829373440.0,
-            "8": 2829373440.0,
-            "9": 2829373440.0,
-            "10": 2829925376.0,
-            "11": 2829925376.0,
-            "12": 2829925376.0,
-            "13": 2829925376.0,
-            "14": 2829925376.0,
-            "15": 2830320640.0,
-            "16": 2830320640.0,
-            "17": 2830320640.0,
-            "18": 2830320640.0,
-            "19": 2830320640.0,
-            "20": 2830320640.0,
-            "21": 2830320640.0,
-            "22": 2830406144.0,
-            "23": 2830406144.0,
-            "24": 2830406144.0,
-            "25": 2830406144.0,
-            "26": 2830406144.0,
-            "27": 2830406144.0,
-            "28": 2830406144.0,
-            "29": 2830406144.0,
-            "30": 2831433216.0,
-            "31": 2836904960.0,
-            "32": 2836904960.0,
-            "33": 2836904960.0,
-            "34": 2836904960.0,
-            "35": 2836904960.0,
-            "36": 2836904960.0,
-            "37": 2836904960.0,
-            "38": 2836904960.0,
-            "39": 2836904960.0,
-            "40": 2836904960.0,
-            "41": 2836904960.0,
-            "42": 2836904960.0,
-            "43": 2836904960.0,
-            "44": 2836904960.0,
-            "45": 2836904960.0,
-            "46": 2837527040.0,
-            "47": 2837527040.0,
-            "48": 2837527040.0,
-            "49": 2837527040.0,
-            "50": 2837527040.0,
-            "51": 2837527040.0,
-            "52": 2844526592.0,
-            "53": 2844526592.0,
-            "54": 2844526592.0,
-            "55": 2844526592.0,
-            "56": 2844526592.0,
-            "57": 2845833216.0,
-            "58": 2845833216.0,
-            "59": 2845833216.0,
-            "60": 2845833216.0,
-            "61": 2845833216.0,
-            "62": 2845833216.0,
-            "63": 2847350784.0,
-            "64": 2847350784.0,
-            "65": 2859365376.0,
-            "66": 2859365376.0,
-            "67": 2859365376.0,
-            "68": 2859365376.0,
-            "69": 2859365376.0,
-            "70": 2859365376.0,
-            "71": 2859365376.0,
-            "72": 2859365376.0,
-            "73": 2859365376.0,
-            "74": 2859365376.0,
-            "75": 2859365376.0,
-            "76": 2859365376.0,
-            "77": 2859365376.0,
-            "78": 2859365376.0,
-            "79": 2859365376.0,
-            "80": 2859365376.0,
-            "81": 2859365376.0,
-            "82": 2859365376.0,
-            "83": 2859365376.0,
-            "84": 2859365376.0,
-            "85": 2859365376.0,
-            "86": 2859365376.0,
-            "87": 2859365376.0,
-            "88": 2859365376.0,
-            "89": 2859365376.0,
-            "90": 2859365376.0,
-            "91": 2859365376.0,
-            "92": 2859365376.0,
-            "93": 2859365376.0,
-            "94": 2859365376.0,
-            "95": 2859365376.0,
-            "96": 2859365376.0,
-            "97": 2859365376.0,
-            "98": 2859365376.0,
-            "99": 2859365376.0,
-            "100": 2859365376.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 17.55161,
-            "2": 0.27584,
-            "3": 0.20906,
-            "4": 0.18821,
-            "5": 0.17883,
-            "6": 0.17484,
-            "7": 0.18214,
-            "8": 0.18025,
-            "9": 0.16785,
-            "10": 0.16718,
-            "11": 0.17122,
-            "12": 0.16341,
-            "13": 0.16356,
-            "14": 0.16447,
-            "15": 0.17469,
-            "16": 0.16231,
-            "17": 0.17002,
-            "18": 0.1621,
-            "19": 0.16543,
-            "20": 0.16097,
-            "21": 0.16113,
-            "22": 0.17866,
-            "23": 0.16939,
-            "24": 0.16784,
-            "25": 0.16322,
-            "26": 0.15752,
-            "27": 0.16042,
-            "28": 0.16296,
-            "29": 0.16022,
-            "30": 0.16569,
-            "31": 0.20634,
-            "32": 0.16627,
-            "33": 0.16203,
-            "34": 0.18965,
-            "35": 0.1656,
-            "36": 0.17227,
-            "37": 0.16394,
-            "38": 0.16364,
-            "39": 0.15966,
-            "40": 0.17482,
-            "41": 0.16992,
-            "42": 0.16079,
-            "43": 0.17541,
-            "44": 0.1626,
-            "45": 0.16436,
-            "46": 0.1838,
-            "47": 0.15773,
-            "48": 0.18504,
-            "49": 0.22116,
-            "50": 0.16497,
-            "51": 0.17193,
-            "52": 0.17228,
-            "53": 0.15999,
-            "54": 0.15946,
-            "55": 0.1611,
-            "56": 0.21983,
-            "57": 0.18423,
-            "58": 0.16229,
-            "59": 0.18268,
-            "60": 0.17406,
-            "61": 0.15956,
-            "62": 0.16172,
-            "63": 0.17465,
-            "64": 0.17307,
-            "65": 0.25477,
-            "66": 0.15926,
-            "67": 0.23477,
-            "68": 0.16872,
-            "69": 0.16094,
-            "70": 0.16631,
-            "71": 0.18552,
-            "72": 0.16728,
-            "73": 0.1889,
-            "74": 0.17586,
-            "75": 0.17577,
-            "76": 0.21503,
-            "77": 0.16576,
-            "78": 0.17284,
-            "79": 0.18166,
-            "80": 0.19235,
-            "81": 0.17347,
-            "82": 0.1597,
-            "83": 0.17024,
-            "84": 0.17843,
-            "85": 0.15917,
-            "86": 0.20315,
-            "87": 0.16523,
-            "88": 0.16367,
-            "89": 0.18499,
-            "90": 0.16286,
-            "91": 0.19025,
-            "92": 0.17186,
-            "93": 0.19123,
-            "94": 0.19378,
-            "95": 0.16849,
-            "96": 0.16781,
-            "97": 0.17705,
-            "98": 0.15729,
-            "99": 0.17119,
-            "100": 0.16
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer/golden_values_dev_dgx_h100.json
index f763ccd7669..c598c8c5c86 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer/golden_values_dev_dgx_h100.json
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 14.31194,
-            "2": 0.35602,
-            "3": 0.27118,
-            "4": 0.26003,
-            "5": 0.25566,
-            "6": 0.23955,
-            "7": 0.25733,
-            "8": 0.24144,
-            "9": 0.24541,
-            "10": 0.24933,
-            "11": 0.24384,
-            "12": 0.23671,
-            "13": 0.23911,
-            "14": 0.23582,
-            "15": 0.24799,
-            "16": 0.24336,
-            "17": 0.25026,
-            "18": 0.2284,
-            "19": 0.23348,
-            "20": 0.23732,
-            "21": 0.23466,
-            "22": 0.23579,
-            "23": 0.23473,
-            "24": 0.24834,
-            "25": 0.23298,
-            "26": 0.2337,
-            "27": 0.2322,
-            "28": 0.23129,
-            "29": 0.23719,
-            "30": 0.24475,
-            "31": 0.27609,
-            "32": 0.24141,
-            "33": 0.23534,
-            "34": 0.25714,
-            "35": 0.24161,
-            "36": 0.23358,
-            "37": 0.23063,
-            "38": 0.23854,
-            "39": 0.23304,
-            "40": 0.2404,
-            "41": 0.23771,
-            "42": 0.2345,
-            "43": 0.24255,
-            "44": 0.23514,
-            "45": 0.25421,
-            "46": 0.26534,
-            "47": 0.23362,
-            "48": 0.25382,
-            "49": 0.27095,
-            "50": 0.23751,
-            "51": 0.2738,
-            "52": 0.26505,
-            "53": 0.23078,
-            "54": 0.23459,
-            "55": 0.2529,
-            "56": 0.29375,
-            "57": 0.26697,
-            "58": 0.24903,
-            "59": 0.24384,
-            "60": 0.24359,
-            "61": 0.2298,
-            "62": 0.2365,
-            "63": 0.24866,
-            "64": 0.23579,
-            "65": 0.30261,
-            "66": 0.23489,
-            "67": 0.28661,
-            "68": 0.2497,
-            "69": 0.2358,
-            "70": 0.23664,
-            "71": 0.26035,
-            "72": 0.24553,
-            "73": 0.27252,
-            "74": 0.26037,
-            "75": 0.24806,
-            "76": 0.26257,
-            "77": 0.23946,
-            "78": 0.24328,
-            "79": 0.24753,
-            "80": 0.25383,
-            "81": 0.23677,
-            "82": 0.23361,
-            "83": 0.23998,
-            "84": 0.2503,
-            "85": 0.2394,
-            "86": 0.24786,
-            "87": 0.22954,
-            "88": 0.23347,
-            "89": 0.24991,
-            "90": 0.23017,
-            "91": 0.25015,
-            "92": 0.23807,
-            "93": 0.24597,
-            "94": 0.26925,
-            "95": 0.25645,
-            "96": 0.23369,
-            "97": 0.24492,
-            "98": 0.22834,
-            "99": 0.23921,
-            "100": 0.23446
+            "1": 14.1374,
+            "2": 0.29466,
+            "3": 0.26236,
+            "4": 0.26156,
+            "5": 0.24237,
+            "6": 0.23849,
+            "7": 0.252,
+            "8": 0.24427,
+            "9": 0.24029,
+            "10": 0.23618,
+            "11": 0.23659,
+            "12": 0.23342,
+            "13": 0.23316,
+            "14": 0.23233,
+            "15": 0.24856,
+            "16": 0.23522,
+            "17": 0.24126,
+            "18": 0.22751,
+            "19": 0.2299,
+            "20": 0.23346,
+            "21": 0.23441,
+            "22": 0.22921,
+            "23": 0.23376,
+            "24": 0.23927,
+            "25": 0.23185,
+            "26": 0.23099,
+            "27": 0.22756,
+            "28": 0.2284,
+            "29": 0.22889,
+            "30": 0.23032,
+            "31": 0.26621,
+            "32": 0.23553,
+            "33": 0.23683,
+            "34": 0.25808,
+            "35": 0.23912,
+            "36": 0.23198,
+            "37": 0.23086,
+            "38": 0.23515,
+            "39": 0.2291,
+            "40": 0.24108,
+            "41": 0.23663,
+            "42": 0.23631,
+            "43": 0.23891,
+            "44": 0.23205,
+            "45": 0.24801,
+            "46": 0.2689,
+            "47": 0.23258,
+            "48": 0.25079,
+            "49": 0.26858,
+            "50": 0.2361,
+            "51": 0.27052,
+            "52": 0.26801,
+            "53": 0.23804,
+            "54": 0.23998,
+            "55": 0.25008,
+            "56": 0.29894,
+            "57": 0.26807,
+            "58": 0.23939,
+            "59": 0.24845,
+            "60": 0.24835,
+            "61": 0.24071,
+            "62": 0.23697,
+            "63": 0.25187,
+            "64": 0.24293,
+            "65": 0.31273,
+            "66": 0.23771,
+            "67": 0.28851,
+            "68": 0.25834,
+            "69": 0.24387,
+            "70": 0.23624,
+            "71": 0.26612,
+            "72": 0.25067,
+            "73": 0.28048,
+            "74": 0.26617,
+            "75": 0.24822,
+            "76": 0.26459,
+            "77": 0.23429,
+            "78": 0.24496,
+            "79": 0.24741,
+            "80": 0.25523,
+            "81": 0.2433,
+            "82": 0.23696,
+            "83": 0.2421,
+            "84": 0.24973,
+            "85": 0.24316,
+            "86": 0.25585,
+            "87": 0.23448,
+            "88": 0.23245,
+            "89": 0.25191,
+            "90": 0.23373,
+            "91": 0.25927,
+            "92": 0.24203,
+            "93": 0.25124,
+            "94": 0.26498,
+            "95": 0.24482,
+            "96": 0.23378,
+            "97": 0.25053,
+            "98": 0.23165,
+            "99": 0.24761,
+            "100": 0.23858
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 12778ad6bb9..00000000000
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.78091,
-            "2": 10.80272,
-            "3": 10.8036,
-            "4": 10.77566,
-            "5": 10.83259,
-            "6": 10.83704,
-            "7": 10.79793,
-            "8": 10.79364,
-            "9": 10.808,
-            "10": 10.76116,
-            "11": 10.85297,
-            "12": 10.84152,
-            "13": 10.8247,
-            "14": 10.85822,
-            "15": 10.78238,
-            "16": 10.77927,
-            "17": 10.74878,
-            "18": 10.7897,
-            "19": 10.7749,
-            "20": 10.71704,
-            "21": 10.70811,
-            "22": 10.54787,
-            "23": 10.72978,
-            "24": 10.60324,
-            "25": 10.55979,
-            "26": 10.61611,
-            "27": 10.6446,
-            "28": 10.62463,
-            "29": 10.63492,
-            "30": 10.42362,
-            "31": 10.16499,
-            "32": 10.51313,
-            "33": 10.5094,
-            "34": 10.2668,
-            "35": 10.32318,
-            "36": 10.28865,
-            "37": 10.41114,
-            "38": 10.26426,
-            "39": 10.45,
-            "40": 10.17473,
-            "41": 10.20958,
-            "42": 10.27824,
-            "43": 9.91831,
-            "44": 10.03131,
-            "45": 9.91995,
-            "46": 9.8862,
-            "47": 10.19255,
-            "48": 9.92803,
-            "49": 9.61616,
-            "50": 9.98532,
-            "51": 9.90528,
-            "52": 9.80364,
-            "53": 10.12728,
-            "54": 10.00036,
-            "55": 9.9362,
-            "56": 9.68506,
-            "57": 9.55805,
-            "58": 9.90514,
-            "59": 9.63857,
-            "60": 9.57451,
-            "61": 9.76864,
-            "62": 10.03802,
-            "63": 9.44503,
-            "64": 9.82796,
-            "65": 9.00712,
-            "66": 9.77422,
-            "67": 9.41277,
-            "68": 9.84111,
-            "69": 9.82784,
-            "70": 9.79011,
-            "71": 9.66957,
-            "72": 9.62799,
-            "73": 9.5473,
-            "74": 9.03663,
-            "75": 9.49153,
-            "76": 9.16783,
-            "77": 10.10857,
-            "78": 9.77081,
-            "79": 9.4383,
-            "80": 9.45436,
-            "81": 9.52266,
-            "82": 9.7424,
-            "83": 9.37076,
-            "84": 9.45377,
-            "85": 9.65832,
-            "86": 9.12522,
-            "87": 9.62697,
-            "88": 9.79619,
-            "89": 9.66054,
-            "90": 9.85081,
-            "91": 9.39408,
-            "92": 9.40744,
-            "93": 9.13595,
-            "94": 8.89048,
-            "95": 9.563,
-            "96": 9.5714,
-            "97": 9.34318,
-            "98": 9.73026,
-            "99": 8.95002,
-            "100": 9.4424
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 30994.0,
-            "2": 32962.0,
-            "3": 33026.0,
-            "4": 30732.0,
-            "5": 36042.0,
-            "6": 37038.0,
-            "7": 34481.0,
-            "8": 31368.0,
-            "9": 33980.0,
-            "10": 29532.0,
-            "11": 37852.0,
-            "12": 34972.0,
-            "13": 36994.0,
-            "14": 37789.0,
-            "15": 34058.0,
-            "16": 36656.0,
-            "17": 34700.0,
-            "18": 34946.0,
-            "19": 35228.0,
-            "20": 32392.0,
-            "21": 33247.0,
-            "22": 30040.0,
-            "23": 37891.0,
-            "24": 32099.0,
-            "25": 30921.0,
-            "26": 34212.0,
-            "27": 34975.0,
-            "28": 36746.0,
-            "29": 37759.0,
-            "30": 32786.0,
-            "31": 30423.0,
-            "32": 35992.0,
-            "33": 36915.0,
-            "34": 32293.0,
-            "35": 33654.0,
-            "36": 34755.0,
-            "37": 37859.0,
-            "38": 36022.0,
-            "39": 38343.0,
-            "40": 35963.0,
-            "41": 35882.0,
-            "42": 36774.0,
-            "43": 34186.0,
-            "44": 33572.0,
-            "45": 35574.0,
-            "46": 37208.0,
-            "47": 40154.0,
-            "48": 36385.0,
-            "49": 36259.0,
-            "50": 38861.0,
-            "51": 38061.0,
-            "52": 37025.0,
-            "53": 41802.0,
-            "54": 41253.0,
-            "55": 37654.0,
-            "56": 41164.0,
-            "57": 37682.0,
-            "58": 41782.0,
-            "59": 39444.0,
-            "60": 40691.0,
-            "61": 40876.0,
-            "62": 43113.0,
-            "63": 38389.0,
-            "64": 43217.0,
-            "65": 41689.0,
-            "66": 45525.0,
-            "67": 41717.0,
-            "68": 40369.0,
-            "69": 41287.0,
-            "70": 45545.0,
-            "71": 41651.0,
-            "72": 41881.0,
-            "73": 45139.0,
-            "74": 35747.0,
-            "75": 39155.0,
-            "76": 44874.0,
-            "77": 45442.0,
-            "78": 46782.0,
-            "79": 48776.0,
-            "80": 47161.0,
-            "81": 51277.0,
-            "82": 49953.0,
-            "83": 45334.0,
-            "84": 46096.0,
-            "85": 49238.0,
-            "86": 46118.0,
-            "87": 49880.0,
-            "88": 47115.0,
-            "89": 48583.0,
-            "90": 49057.0,
-            "91": 45950.0,
-            "92": 47820.0,
-            "93": 46437.0,
-            "94": 47530.0,
-            "95": 48000.0,
-            "96": 50285.0,
-            "97": 46225.0,
-            "98": 49809.0,
-            "99": 47890.0,
-            "100": 44636.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 892864512.0,
-            "2": 892868608.0,
-            "3": 892868608.0,
-            "4": 892864512.0,
-            "5": 892865024.0,
-            "6": 892866560.0,
-            "7": 892866048.0,
-            "8": 892867584.0,
-            "9": 892865536.0,
-            "10": 892867584.0,
-            "11": 892866048.0,
-            "12": 892865536.0,
-            "13": 892865536.0,
-            "14": 892868096.0,
-            "15": 892867584.0,
-            "16": 892867072.0,
-            "17": 892867584.0,
-            "18": 892869632.0,
-            "19": 892868096.0,
-            "20": 892866560.0,
-            "21": 892866560.0,
-            "22": 892863488.0,
-            "23": 892864512.0,
-            "24": 892867072.0,
-            "25": 892863488.0,
-            "26": 892866560.0,
-            "27": 892867072.0,
-            "28": 892865536.0,
-            "29": 892866048.0,
-            "30": 892863488.0,
-            "31": 892862464.0,
-            "32": 892861952.0,
-            "33": 892866048.0,
-            "34": 892865536.0,
-            "35": 892865024.0,
-            "36": 892868608.0,
-            "37": 892867072.0,
-            "38": 892866560.0,
-            "39": 892866048.0,
-            "40": 892867072.0,
-            "41": 892865536.0,
-            "42": 892867584.0,
-            "43": 892861440.0,
-            "44": 892862976.0,
-            "45": 892865024.0,
-            "46": 892864512.0,
-            "47": 892865024.0,
-            "48": 892861440.0,
-            "49": 892863488.0,
-            "50": 892867072.0,
-            "51": 892860416.0,
-            "52": 892858880.0,
-            "53": 892861440.0,
-            "54": 892861440.0,
-            "55": 892862464.0,
-            "56": 892865024.0,
-            "57": 892857344.0,
-            "58": 892859392.0,
-            "59": 892858880.0,
-            "60": 892859904.0,
-            "61": 892868608.0,
-            "62": 892865536.0,
-            "63": 892861952.0,
-            "64": 892863488.0,
-            "65": 892851712.0,
-            "66": 892866048.0,
-            "67": 892861440.0,
-            "68": 892868608.0,
-            "69": 892864512.0,
-            "70": 892866560.0,
-            "71": 892868608.0,
-            "72": 892860416.0,
-            "73": 892868096.0,
-            "74": 892858368.0,
-            "75": 892867072.0,
-            "76": 892866560.0,
-            "77": 892867072.0,
-            "78": 892863488.0,
-            "79": 892864512.0,
-            "80": 892864512.0,
-            "81": 892866048.0,
-            "82": 892864000.0,
-            "83": 892860928.0,
-            "84": 892861440.0,
-            "85": 892861952.0,
-            "86": 892861440.0,
-            "87": 892870144.0,
-            "88": 892862464.0,
-            "89": 892864512.0,
-            "90": 892866048.0,
-            "91": 892867072.0,
-            "92": 892865536.0,
-            "93": 892868608.0,
-            "94": 892864512.0,
-            "95": 892865024.0,
-            "96": 892865024.0,
-            "97": 892862976.0,
-            "98": 892867584.0,
-            "99": 892859904.0,
-            "100": 892861952.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1867566080.0,
-            "2": 2107252736.0,
-            "3": 2107252736.0,
-            "4": 2107252736.0,
-            "5": 2107481600.0,
-            "6": 2107481600.0,
-            "7": 2107481600.0,
-            "8": 2107481600.0,
-            "9": 2107481600.0,
-            "10": 2108814336.0,
-            "11": 2108814336.0,
-            "12": 2108814336.0,
-            "13": 2108814336.0,
-            "14": 2108814336.0,
-            "15": 2108814336.0,
-            "16": 2109139456.0,
-            "17": 2109139456.0,
-            "18": 2109139456.0,
-            "19": 2109139456.0,
-            "20": 2109139456.0,
-            "21": 2109139456.0,
-            "22": 2109139456.0,
-            "23": 2109139456.0,
-            "24": 2109139456.0,
-            "25": 2109139456.0,
-            "26": 2109139456.0,
-            "27": 2109139456.0,
-            "28": 2109139456.0,
-            "29": 2109139456.0,
-            "30": 2109139456.0,
-            "31": 2109139456.0,
-            "32": 2109139456.0,
-            "33": 2109139456.0,
-            "34": 2109139456.0,
-            "35": 2109139456.0,
-            "36": 2109139456.0,
-            "37": 2109139456.0,
-            "38": 2109139456.0,
-            "39": 2109139456.0,
-            "40": 2109139456.0,
-            "41": 2109139456.0,
-            "42": 2109139456.0,
-            "43": 2109139456.0,
-            "44": 2109139456.0,
-            "45": 2109139456.0,
-            "46": 2109139456.0,
-            "47": 2109139456.0,
-            "48": 2109139456.0,
-            "49": 2109139456.0,
-            "50": 2109139456.0,
-            "51": 2109139456.0,
-            "52": 2109139456.0,
-            "53": 2109139456.0,
-            "54": 2109139456.0,
-            "55": 2109139456.0,
-            "56": 2109139456.0,
-            "57": 2109139456.0,
-            "58": 2109139456.0,
-            "59": 2109139456.0,
-            "60": 2109139456.0,
-            "61": 2109139456.0,
-            "62": 2109139456.0,
-            "63": 2109139456.0,
-            "64": 2109139456.0,
-            "65": 2109139456.0,
-            "66": 2109139456.0,
-            "67": 2109139456.0,
-            "68": 2109139456.0,
-            "69": 2109139456.0,
-            "70": 2109139456.0,
-            "71": 2109139456.0,
-            "72": 2109139456.0,
-            "73": 2109139456.0,
-            "74": 2109139456.0,
-            "75": 2109139456.0,
-            "76": 2109139456.0,
-            "77": 2109139456.0,
-            "78": 2109139456.0,
-            "79": 2109139456.0,
-            "80": 2109139456.0,
-            "81": 2109139456.0,
-            "82": 2109139456.0,
-            "83": 2109139456.0,
-            "84": 2109139456.0,
-            "85": 2109139456.0,
-            "86": 2109139456.0,
-            "87": 2109897728.0,
-            "88": 2109897728.0,
-            "89": 2109897728.0,
-            "90": 2109897728.0,
-            "91": 2109897728.0,
-            "92": 2109897728.0,
-            "93": 2109897728.0,
-            "94": 2109897728.0,
-            "95": 2109897728.0,
-            "96": 2109897728.0,
-            "97": 2109897728.0,
-            "98": 2109897728.0,
-            "99": 2109897728.0,
-            "100": 2109897728.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 14.94584,
-            "2": 0.28148,
-            "3": 0.23092,
-            "4": 0.2272,
-            "5": 0.21174,
-            "6": 0.2052,
-            "7": 0.2177,
-            "8": 0.20762,
-            "9": 0.21011,
-            "10": 0.20762,
-            "11": 0.20739,
-            "12": 0.20558,
-            "13": 0.20293,
-            "14": 0.20366,
-            "15": 0.2151,
-            "16": 0.20336,
-            "17": 0.211,
-            "18": 0.20107,
-            "19": 0.19975,
-            "20": 0.19946,
-            "21": 0.20167,
-            "22": 0.20546,
-            "23": 0.2079,
-            "24": 0.21407,
-            "25": 0.20322,
-            "26": 0.20113,
-            "27": 0.2036,
-            "28": 0.20193,
-            "29": 0.20351,
-            "30": 0.20276,
-            "31": 0.24088,
-            "32": 0.20552,
-            "33": 0.2062,
-            "34": 0.22507,
-            "35": 0.21674,
-            "36": 0.20224,
-            "37": 0.2024,
-            "38": 0.20522,
-            "39": 0.20019,
-            "40": 0.20848,
-            "41": 0.20633,
-            "42": 0.20422,
-            "43": 0.22047,
-            "44": 0.21076,
-            "45": 0.22033,
-            "46": 0.23288,
-            "47": 0.20066,
-            "48": 0.2262,
-            "49": 0.25589,
-            "50": 0.2006,
-            "51": 0.21639,
-            "52": 0.23518,
-            "53": 0.20634,
-            "54": 0.20906,
-            "55": 0.22297,
-            "56": 0.2742,
-            "57": 0.23575,
-            "58": 0.21113,
-            "59": 0.21965,
-            "60": 0.21956,
-            "61": 0.20714,
-            "62": 0.20897,
-            "63": 0.21858,
-            "64": 0.21079,
-            "65": 0.26753,
-            "66": 0.2086,
-            "67": 0.2478,
-            "68": 0.22097,
-            "69": 0.20663,
-            "70": 0.20836,
-            "71": 0.22856,
-            "72": 0.21708,
-            "73": 0.24693,
-            "74": 0.23784,
-            "75": 0.21364,
-            "76": 0.23055,
-            "77": 0.20122,
-            "78": 0.21746,
-            "79": 0.21857,
-            "80": 0.22508,
-            "81": 0.21322,
-            "82": 0.21041,
-            "83": 0.24051,
-            "84": 0.26987,
-            "85": 0.27857,
-            "86": 0.28871,
-            "87": 0.24894,
-            "88": 0.21388,
-            "89": 0.22289,
-            "90": 0.20477,
-            "91": 0.22651,
-            "92": 0.21738,
-            "93": 0.22137,
-            "94": 0.23367,
-            "95": 0.21527,
-            "96": 0.20516,
-            "97": 0.22856,
-            "98": 0.20431,
-            "99": 0.21662,
-            "100": 0.2101
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index c598c8c5c86..00000000000
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.78091,
-            "2": 10.80272,
-            "3": 10.8036,
-            "4": 10.77566,
-            "5": 10.83259,
-            "6": 10.83704,
-            "7": 10.79793,
-            "8": 10.79364,
-            "9": 10.808,
-            "10": 10.76116,
-            "11": 10.85297,
-            "12": 10.84152,
-            "13": 10.8247,
-            "14": 10.85822,
-            "15": 10.78238,
-            "16": 10.77927,
-            "17": 10.74878,
-            "18": 10.7897,
-            "19": 10.7749,
-            "20": 10.71704,
-            "21": 10.70811,
-            "22": 10.54787,
-            "23": 10.72978,
-            "24": 10.60324,
-            "25": 10.55979,
-            "26": 10.61611,
-            "27": 10.6446,
-            "28": 10.62463,
-            "29": 10.63492,
-            "30": 10.42362,
-            "31": 10.16499,
-            "32": 10.51313,
-            "33": 10.5094,
-            "34": 10.2668,
-            "35": 10.32318,
-            "36": 10.28865,
-            "37": 10.41114,
-            "38": 10.26426,
-            "39": 10.45,
-            "40": 10.17473,
-            "41": 10.20958,
-            "42": 10.27824,
-            "43": 9.91831,
-            "44": 10.03131,
-            "45": 9.91995,
-            "46": 9.8862,
-            "47": 10.19255,
-            "48": 9.92803,
-            "49": 9.61616,
-            "50": 9.98532,
-            "51": 9.90528,
-            "52": 9.80364,
-            "53": 10.12728,
-            "54": 10.00036,
-            "55": 9.9362,
-            "56": 9.68506,
-            "57": 9.55805,
-            "58": 9.90514,
-            "59": 9.63857,
-            "60": 9.57451,
-            "61": 9.76864,
-            "62": 10.03802,
-            "63": 9.44503,
-            "64": 9.82796,
-            "65": 9.00712,
-            "66": 9.77422,
-            "67": 9.41277,
-            "68": 9.84111,
-            "69": 9.82784,
-            "70": 9.79011,
-            "71": 9.66957,
-            "72": 9.62799,
-            "73": 9.5473,
-            "74": 9.03663,
-            "75": 9.49153,
-            "76": 9.16783,
-            "77": 10.10857,
-            "78": 9.77081,
-            "79": 9.4383,
-            "80": 9.45436,
-            "81": 9.52266,
-            "82": 9.7424,
-            "83": 9.37076,
-            "84": 9.45377,
-            "85": 9.65832,
-            "86": 9.12522,
-            "87": 9.62697,
-            "88": 9.79619,
-            "89": 9.66054,
-            "90": 9.85081,
-            "91": 9.39408,
-            "92": 9.40744,
-            "93": 9.13595,
-            "94": 8.89048,
-            "95": 9.563,
-            "96": 9.5714,
-            "97": 9.34318,
-            "98": 9.73026,
-            "99": 8.95002,
-            "100": 9.4424
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 30994.0,
-            "2": 32962.0,
-            "3": 33026.0,
-            "4": 30732.0,
-            "5": 36042.0,
-            "6": 37038.0,
-            "7": 34481.0,
-            "8": 31368.0,
-            "9": 33980.0,
-            "10": 29532.0,
-            "11": 37852.0,
-            "12": 34972.0,
-            "13": 36994.0,
-            "14": 37789.0,
-            "15": 34058.0,
-            "16": 36656.0,
-            "17": 34700.0,
-            "18": 34946.0,
-            "19": 35228.0,
-            "20": 32392.0,
-            "21": 33247.0,
-            "22": 30040.0,
-            "23": 37891.0,
-            "24": 32099.0,
-            "25": 30921.0,
-            "26": 34212.0,
-            "27": 34975.0,
-            "28": 36746.0,
-            "29": 37759.0,
-            "30": 32786.0,
-            "31": 30423.0,
-            "32": 35992.0,
-            "33": 36915.0,
-            "34": 32293.0,
-            "35": 33654.0,
-            "36": 34755.0,
-            "37": 37859.0,
-            "38": 36022.0,
-            "39": 38343.0,
-            "40": 35963.0,
-            "41": 35882.0,
-            "42": 36774.0,
-            "43": 34186.0,
-            "44": 33572.0,
-            "45": 35574.0,
-            "46": 37208.0,
-            "47": 40154.0,
-            "48": 36385.0,
-            "49": 36259.0,
-            "50": 38861.0,
-            "51": 38061.0,
-            "52": 37025.0,
-            "53": 41802.0,
-            "54": 41253.0,
-            "55": 37654.0,
-            "56": 41164.0,
-            "57": 37682.0,
-            "58": 41782.0,
-            "59": 39444.0,
-            "60": 40691.0,
-            "61": 40876.0,
-            "62": 43113.0,
-            "63": 38389.0,
-            "64": 43217.0,
-            "65": 41689.0,
-            "66": 45525.0,
-            "67": 41717.0,
-            "68": 40369.0,
-            "69": 41287.0,
-            "70": 45545.0,
-            "71": 41651.0,
-            "72": 41881.0,
-            "73": 45139.0,
-            "74": 35747.0,
-            "75": 39155.0,
-            "76": 44874.0,
-            "77": 45442.0,
-            "78": 46782.0,
-            "79": 48776.0,
-            "80": 47161.0,
-            "81": 51277.0,
-            "82": 49953.0,
-            "83": 45334.0,
-            "84": 46096.0,
-            "85": 49238.0,
-            "86": 46118.0,
-            "87": 49880.0,
-            "88": 47115.0,
-            "89": 48583.0,
-            "90": 49057.0,
-            "91": 45950.0,
-            "92": 47820.0,
-            "93": 46437.0,
-            "94": 47530.0,
-            "95": 48000.0,
-            "96": 50285.0,
-            "97": 46225.0,
-            "98": 49809.0,
-            "99": 47890.0,
-            "100": 44636.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 892864512.0,
-            "2": 892868608.0,
-            "3": 892868608.0,
-            "4": 892864512.0,
-            "5": 892865024.0,
-            "6": 892866560.0,
-            "7": 892866048.0,
-            "8": 892867584.0,
-            "9": 892865536.0,
-            "10": 892867584.0,
-            "11": 892866048.0,
-            "12": 892865536.0,
-            "13": 892865536.0,
-            "14": 892868096.0,
-            "15": 892867584.0,
-            "16": 892867072.0,
-            "17": 892867584.0,
-            "18": 892869632.0,
-            "19": 892868096.0,
-            "20": 892866560.0,
-            "21": 892866560.0,
-            "22": 892863488.0,
-            "23": 892864512.0,
-            "24": 892867072.0,
-            "25": 892863488.0,
-            "26": 892866560.0,
-            "27": 892867072.0,
-            "28": 892865536.0,
-            "29": 892866048.0,
-            "30": 892863488.0,
-            "31": 892862464.0,
-            "32": 892861952.0,
-            "33": 892866048.0,
-            "34": 892865536.0,
-            "35": 892865024.0,
-            "36": 892868608.0,
-            "37": 892867072.0,
-            "38": 892866560.0,
-            "39": 892866048.0,
-            "40": 892867072.0,
-            "41": 892865536.0,
-            "42": 892867584.0,
-            "43": 892861440.0,
-            "44": 892862976.0,
-            "45": 892865024.0,
-            "46": 892864512.0,
-            "47": 892865024.0,
-            "48": 892861440.0,
-            "49": 892863488.0,
-            "50": 892867072.0,
-            "51": 892860416.0,
-            "52": 892858880.0,
-            "53": 892861440.0,
-            "54": 892861440.0,
-            "55": 892862464.0,
-            "56": 892865024.0,
-            "57": 892857344.0,
-            "58": 892859392.0,
-            "59": 892858880.0,
-            "60": 892859904.0,
-            "61": 892868608.0,
-            "62": 892865536.0,
-            "63": 892861952.0,
-            "64": 892863488.0,
-            "65": 892851712.0,
-            "66": 892866048.0,
-            "67": 892861440.0,
-            "68": 892868608.0,
-            "69": 892864512.0,
-            "70": 892866560.0,
-            "71": 892868608.0,
-            "72": 892860416.0,
-            "73": 892868096.0,
-            "74": 892858368.0,
-            "75": 892867072.0,
-            "76": 892866560.0,
-            "77": 892867072.0,
-            "78": 892863488.0,
-            "79": 892864512.0,
-            "80": 892864512.0,
-            "81": 892866048.0,
-            "82": 892864000.0,
-            "83": 892860928.0,
-            "84": 892861440.0,
-            "85": 892861952.0,
-            "86": 892861440.0,
-            "87": 892870144.0,
-            "88": 892862464.0,
-            "89": 892864512.0,
-            "90": 892866048.0,
-            "91": 892867072.0,
-            "92": 892865536.0,
-            "93": 892868608.0,
-            "94": 892864512.0,
-            "95": 892865024.0,
-            "96": 892865024.0,
-            "97": 892862976.0,
-            "98": 892867584.0,
-            "99": 892859904.0,
-            "100": 892861952.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1867566080.0,
-            "2": 2107252736.0,
-            "3": 2107252736.0,
-            "4": 2107252736.0,
-            "5": 2107481600.0,
-            "6": 2107481600.0,
-            "7": 2107481600.0,
-            "8": 2107481600.0,
-            "9": 2107481600.0,
-            "10": 2108814336.0,
-            "11": 2108814336.0,
-            "12": 2108814336.0,
-            "13": 2108814336.0,
-            "14": 2108814336.0,
-            "15": 2108814336.0,
-            "16": 2109139456.0,
-            "17": 2109139456.0,
-            "18": 2109139456.0,
-            "19": 2109139456.0,
-            "20": 2109139456.0,
-            "21": 2109139456.0,
-            "22": 2109139456.0,
-            "23": 2109139456.0,
-            "24": 2109139456.0,
-            "25": 2109139456.0,
-            "26": 2109139456.0,
-            "27": 2109139456.0,
-            "28": 2109139456.0,
-            "29": 2109139456.0,
-            "30": 2109139456.0,
-            "31": 2109139456.0,
-            "32": 2109139456.0,
-            "33": 2109139456.0,
-            "34": 2109139456.0,
-            "35": 2109139456.0,
-            "36": 2109139456.0,
-            "37": 2109139456.0,
-            "38": 2109139456.0,
-            "39": 2109139456.0,
-            "40": 2109139456.0,
-            "41": 2109139456.0,
-            "42": 2109139456.0,
-            "43": 2109139456.0,
-            "44": 2109139456.0,
-            "45": 2109139456.0,
-            "46": 2109139456.0,
-            "47": 2109139456.0,
-            "48": 2109139456.0,
-            "49": 2109139456.0,
-            "50": 2109139456.0,
-            "51": 2109139456.0,
-            "52": 2109139456.0,
-            "53": 2109139456.0,
-            "54": 2109139456.0,
-            "55": 2109139456.0,
-            "56": 2109139456.0,
-            "57": 2109139456.0,
-            "58": 2109139456.0,
-            "59": 2109139456.0,
-            "60": 2109139456.0,
-            "61": 2109139456.0,
-            "62": 2109139456.0,
-            "63": 2109139456.0,
-            "64": 2109139456.0,
-            "65": 2109139456.0,
-            "66": 2109139456.0,
-            "67": 2109139456.0,
-            "68": 2109139456.0,
-            "69": 2109139456.0,
-            "70": 2109139456.0,
-            "71": 2109139456.0,
-            "72": 2109139456.0,
-            "73": 2109139456.0,
-            "74": 2109139456.0,
-            "75": 2109139456.0,
-            "76": 2109139456.0,
-            "77": 2109139456.0,
-            "78": 2109139456.0,
-            "79": 2109139456.0,
-            "80": 2109139456.0,
-            "81": 2109139456.0,
-            "82": 2109139456.0,
-            "83": 2109139456.0,
-            "84": 2109139456.0,
-            "85": 2109139456.0,
-            "86": 2109139456.0,
-            "87": 2109897728.0,
-            "88": 2109897728.0,
-            "89": 2109897728.0,
-            "90": 2109897728.0,
-            "91": 2109897728.0,
-            "92": 2109897728.0,
-            "93": 2109897728.0,
-            "94": 2109897728.0,
-            "95": 2109897728.0,
-            "96": 2109897728.0,
-            "97": 2109897728.0,
-            "98": 2109897728.0,
-            "99": 2109897728.0,
-            "100": 2109897728.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 14.1374,
-            "2": 0.29466,
-            "3": 0.26236,
-            "4": 0.26156,
-            "5": 0.24237,
-            "6": 0.23849,
-            "7": 0.252,
-            "8": 0.24427,
-            "9": 0.24029,
-            "10": 0.23618,
-            "11": 0.23659,
-            "12": 0.23342,
-            "13": 0.23316,
-            "14": 0.23233,
-            "15": 0.24856,
-            "16": 0.23522,
-            "17": 0.24126,
-            "18": 0.22751,
-            "19": 0.2299,
-            "20": 0.23346,
-            "21": 0.23441,
-            "22": 0.22921,
-            "23": 0.23376,
-            "24": 0.23927,
-            "25": 0.23185,
-            "26": 0.23099,
-            "27": 0.22756,
-            "28": 0.2284,
-            "29": 0.22889,
-            "30": 0.23032,
-            "31": 0.26621,
-            "32": 0.23553,
-            "33": 0.23683,
-            "34": 0.25808,
-            "35": 0.23912,
-            "36": 0.23198,
-            "37": 0.23086,
-            "38": 0.23515,
-            "39": 0.2291,
-            "40": 0.24108,
-            "41": 0.23663,
-            "42": 0.23631,
-            "43": 0.23891,
-            "44": 0.23205,
-            "45": 0.24801,
-            "46": 0.2689,
-            "47": 0.23258,
-            "48": 0.25079,
-            "49": 0.26858,
-            "50": 0.2361,
-            "51": 0.27052,
-            "52": 0.26801,
-            "53": 0.23804,
-            "54": 0.23998,
-            "55": 0.25008,
-            "56": 0.29894,
-            "57": 0.26807,
-            "58": 0.23939,
-            "59": 0.24845,
-            "60": 0.24835,
-            "61": 0.24071,
-            "62": 0.23697,
-            "63": 0.25187,
-            "64": 0.24293,
-            "65": 0.31273,
-            "66": 0.23771,
-            "67": 0.28851,
-            "68": 0.25834,
-            "69": 0.24387,
-            "70": 0.23624,
-            "71": 0.26612,
-            "72": 0.25067,
-            "73": 0.28048,
-            "74": 0.26617,
-            "75": 0.24822,
-            "76": 0.26459,
-            "77": 0.23429,
-            "78": 0.24496,
-            "79": 0.24741,
-            "80": 0.25523,
-            "81": 0.2433,
-            "82": 0.23696,
-            "83": 0.2421,
-            "84": 0.24973,
-            "85": 0.24316,
-            "86": 0.25585,
-            "87": 0.23448,
-            "88": 0.23245,
-            "89": 0.25191,
-            "90": 0.23373,
-            "91": 0.25927,
-            "92": 0.24203,
-            "93": 0.25124,
-            "94": 0.26498,
-            "95": 0.24482,
-            "96": 0.23378,
-            "97": 0.25053,
-            "98": 0.23165,
-            "99": 0.24761,
-            "100": 0.23858
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances/golden_values_dev_dgx_h100.json
index 0938c76ab04..1a09e73e300 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances/golden_values_dev_dgx_h100.json
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 16.45406,
-            "2": 0.30376,
-            "3": 0.27406,
-            "4": 0.26359,
-            "5": 0.25039,
-            "6": 0.25242,
-            "7": 0.26015,
-            "8": 0.2474,
-            "9": 0.25416,
-            "10": 0.2407,
-            "11": 0.24653,
-            "12": 0.23844,
-            "13": 0.2391,
-            "14": 0.2434,
-            "15": 0.25985,
-            "16": 0.24412,
-            "17": 0.25323,
-            "18": 0.24184,
-            "19": 0.23932,
-            "20": 0.23754,
-            "21": 0.23862,
-            "22": 0.24163,
-            "23": 0.24143,
-            "24": 0.23752,
-            "25": 0.23707,
-            "26": 0.24138,
-            "27": 0.23747,
-            "28": 0.2399,
-            "29": 0.2399,
-            "30": 0.24117,
-            "31": 0.28742,
-            "32": 0.24862,
-            "33": 0.24794,
-            "34": 0.28035,
-            "35": 0.24832,
-            "36": 0.24669,
-            "37": 0.23974,
-            "38": 0.25045,
-            "39": 0.239,
-            "40": 0.26253,
-            "41": 0.24423,
-            "42": 0.25718,
-            "43": 0.25559,
-            "44": 0.24336,
-            "45": 0.27381,
-            "46": 0.27372,
-            "47": 0.24664,
-            "48": 0.25954,
-            "49": 0.30788,
-            "50": 0.25811,
-            "51": 0.26735,
-            "52": 0.27368,
-            "53": 0.24833,
-            "54": 0.24973,
-            "55": 0.25579,
-            "56": 0.30268,
-            "57": 0.26237,
-            "58": 0.24805,
-            "59": 0.25916,
-            "60": 0.25631,
-            "61": 0.54796,
-            "62": 0.24754,
-            "63": 0.27021,
-            "64": 0.25819,
-            "65": 0.32296,
-            "66": 0.2505,
-            "67": 0.30141,
-            "68": 0.26641,
-            "69": 0.24765,
-            "70": 0.2537,
-            "71": 0.26961,
-            "72": 0.25601,
-            "73": 0.27973,
-            "74": 0.27306,
-            "75": 0.25761,
-            "76": 0.27858,
-            "77": 0.24804,
-            "78": 0.26307,
-            "79": 0.25987,
-            "80": 0.26126,
-            "81": 0.25077,
-            "82": 0.24475,
-            "83": 0.25581,
-            "84": 0.267,
-            "85": 0.25176,
-            "86": 0.2659,
-            "87": 0.24692,
-            "88": 0.24749,
-            "89": 0.26384,
-            "90": 0.24272,
-            "91": 0.26651,
-            "92": 0.25574,
-            "93": 0.26453,
-            "94": 0.27259,
-            "95": 0.25268,
-            "96": 0.24969,
-            "97": 0.2596,
-            "98": 0.24136,
-            "99": 0.25695,
-            "100": 0.25268
+            "1": 16.55217,
+            "2": 0.35181,
+            "3": 0.30566,
+            "4": 0.27474,
+            "5": 0.25821,
+            "6": 0.24756,
+            "7": 0.26543,
+            "8": 0.25377,
+            "9": 0.25669,
+            "10": 0.24857,
+            "11": 0.25265,
+            "12": 0.25052,
+            "13": 0.25023,
+            "14": 0.24925,
+            "15": 0.26244,
+            "16": 0.25012,
+            "17": 0.26253,
+            "18": 0.24643,
+            "19": 0.24809,
+            "20": 0.24556,
+            "21": 0.24394,
+            "22": 0.251,
+            "23": 0.24828,
+            "24": 0.24669,
+            "25": 0.24387,
+            "26": 0.24678,
+            "27": 0.24651,
+            "28": 0.25139,
+            "29": 0.24752,
+            "30": 0.24424,
+            "31": 0.28311,
+            "32": 0.25225,
+            "33": 0.24909,
+            "34": 0.26885,
+            "35": 0.25395,
+            "36": 0.2523,
+            "37": 0.24797,
+            "38": 0.25223,
+            "39": 0.24992,
+            "40": 0.25852,
+            "41": 0.24878,
+            "42": 0.2538,
+            "43": 0.2597,
+            "44": 0.24622,
+            "45": 0.26158,
+            "46": 0.27295,
+            "47": 0.2509,
+            "48": 0.26644,
+            "49": 0.28407,
+            "50": 0.25557,
+            "51": 0.26677,
+            "52": 0.27657,
+            "53": 0.25511,
+            "54": 0.25626,
+            "55": 0.26088,
+            "56": 0.30712,
+            "57": 0.27149,
+            "58": 0.25315,
+            "59": 0.26247,
+            "60": 0.26163,
+            "61": 0.25105,
+            "62": 0.24787,
+            "63": 0.27859,
+            "64": 0.26395,
+            "65": 0.32678,
+            "66": 0.25441,
+            "67": 0.30841,
+            "68": 0.27583,
+            "69": 0.2474,
+            "70": 0.25895,
+            "71": 0.27463,
+            "72": 0.26044,
+            "73": 0.27953,
+            "74": 0.27908,
+            "75": 0.26127,
+            "76": 0.28492,
+            "77": 0.25287,
+            "78": 0.26927,
+            "79": 0.26632,
+            "80": 0.26465,
+            "81": 0.25418,
+            "82": 0.25,
+            "83": 0.26012,
+            "84": 0.27232,
+            "85": 0.25707,
+            "86": 0.26564,
+            "87": 0.25446,
+            "88": 0.24718,
+            "89": 0.26899,
+            "90": 0.24357,
+            "91": 0.27455,
+            "92": 0.25494,
+            "93": 0.26852,
+            "94": 0.27917,
+            "95": 0.258,
+            "96": 0.25134,
+            "97": 0.26377,
+            "98": 0.24669,
+            "99": 0.26096,
+            "100": 0.25411
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 025cf16fd46..00000000000
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.78091,
-            "2": 10.80272,
-            "3": 10.8036,
-            "4": 10.77566,
-            "5": 10.83259,
-            "6": 10.83704,
-            "7": 10.79728,
-            "8": 10.79467,
-            "9": 10.80828,
-            "10": 10.76154,
-            "11": 10.85384,
-            "12": 10.84189,
-            "13": 10.82465,
-            "14": 10.85824,
-            "15": 10.78235,
-            "16": 10.77923,
-            "17": 10.7484,
-            "18": 10.78919,
-            "19": 10.77567,
-            "20": 10.71707,
-            "21": 10.70767,
-            "22": 10.54782,
-            "23": 10.72977,
-            "24": 10.60346,
-            "25": 10.55815,
-            "26": 10.61659,
-            "27": 10.6449,
-            "28": 10.62536,
-            "29": 10.6349,
-            "30": 10.42303,
-            "31": 10.16459,
-            "32": 10.51284,
-            "33": 10.50836,
-            "34": 10.2667,
-            "35": 10.32353,
-            "36": 10.2895,
-            "37": 10.41051,
-            "38": 10.26406,
-            "39": 10.44988,
-            "40": 10.17537,
-            "41": 10.20908,
-            "42": 10.27843,
-            "43": 9.91808,
-            "44": 10.03128,
-            "45": 9.92032,
-            "46": 9.88579,
-            "47": 10.19208,
-            "48": 9.92758,
-            "49": 9.61634,
-            "50": 9.98512,
-            "51": 9.90532,
-            "52": 9.8039,
-            "53": 10.12749,
-            "54": 10.00016,
-            "55": 9.93664,
-            "56": 9.68581,
-            "57": 9.55837,
-            "58": 9.90508,
-            "59": 9.63839,
-            "60": 9.57464,
-            "61": 9.76841,
-            "62": 10.03826,
-            "63": 9.44553,
-            "64": 9.82755,
-            "65": 9.00746,
-            "66": 9.77476,
-            "67": 9.41315,
-            "68": 9.84101,
-            "69": 9.8283,
-            "70": 9.79049,
-            "71": 9.66947,
-            "72": 9.62799,
-            "73": 9.54696,
-            "74": 9.03684,
-            "75": 9.49167,
-            "76": 9.16779,
-            "77": 10.1088,
-            "78": 9.77072,
-            "79": 9.43806,
-            "80": 9.45438,
-            "81": 9.5225,
-            "82": 9.74228,
-            "83": 9.36999,
-            "84": 9.45397,
-            "85": 9.65808,
-            "86": 9.12501,
-            "87": 9.62705,
-            "88": 9.79641,
-            "89": 9.66075,
-            "90": 9.8512,
-            "91": 9.39414,
-            "92": 9.40741,
-            "93": 9.13573,
-            "94": 8.89066,
-            "95": 9.56273,
-            "96": 9.5712,
-            "97": 9.34355,
-            "98": 9.73013,
-            "99": 8.95039,
-            "100": 9.44212
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 30994.0,
-            "2": 32962.0,
-            "3": 33026.0,
-            "4": 30732.0,
-            "5": 36042.0,
-            "6": 36987.0,
-            "7": 34490.0,
-            "8": 31442.0,
-            "9": 33931.0,
-            "10": 29993.0,
-            "11": 37681.0,
-            "12": 34978.0,
-            "13": 36675.0,
-            "14": 37601.0,
-            "15": 34369.0,
-            "16": 36581.0,
-            "17": 34615.0,
-            "18": 34408.0,
-            "19": 35362.0,
-            "20": 32532.0,
-            "21": 33181.0,
-            "22": 30426.0,
-            "23": 37807.0,
-            "24": 32299.0,
-            "25": 30879.0,
-            "26": 33994.0,
-            "27": 34721.0,
-            "28": 36576.0,
-            "29": 37196.0,
-            "30": 32443.0,
-            "31": 30177.0,
-            "32": 35948.0,
-            "33": 37549.0,
-            "34": 32243.0,
-            "35": 33961.0,
-            "36": 34340.0,
-            "37": 37853.0,
-            "38": 35694.0,
-            "39": 38797.0,
-            "40": 36317.0,
-            "41": 35380.0,
-            "42": 36704.0,
-            "43": 34045.0,
-            "44": 33691.0,
-            "45": 35877.0,
-            "46": 36737.0,
-            "47": 40148.0,
-            "48": 36696.0,
-            "49": 36203.0,
-            "50": 38688.0,
-            "51": 37791.0,
-            "52": 37021.0,
-            "53": 41944.0,
-            "54": 40947.0,
-            "55": 37727.0,
-            "56": 40761.0,
-            "57": 37481.0,
-            "58": 41787.0,
-            "59": 39365.0,
-            "60": 40922.0,
-            "61": 41100.0,
-            "62": 43388.0,
-            "63": 38269.0,
-            "64": 43526.0,
-            "65": 41821.0,
-            "66": 44876.0,
-            "67": 42497.0,
-            "68": 39967.0,
-            "69": 41255.0,
-            "70": 45781.0,
-            "71": 42348.0,
-            "72": 42151.0,
-            "73": 45043.0,
-            "74": 35705.0,
-            "75": 39397.0,
-            "76": 45340.0,
-            "77": 45670.0,
-            "78": 46614.0,
-            "79": 49159.0,
-            "80": 47317.0,
-            "81": 51048.0,
-            "82": 49312.0,
-            "83": 45257.0,
-            "84": 45494.0,
-            "85": 49366.0,
-            "86": 45783.0,
-            "87": 50223.0,
-            "88": 47536.0,
-            "89": 48826.0,
-            "90": 49499.0,
-            "91": 45726.0,
-            "92": 47926.0,
-            "93": 46433.0,
-            "94": 47675.0,
-            "95": 47504.0,
-            "96": 50174.0,
-            "97": 46465.0,
-            "98": 49255.0,
-            "99": 48053.0,
-            "100": 44507.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1254501376.0,
-            "2": 1254505472.0,
-            "3": 1254505472.0,
-            "4": 1254501376.0,
-            "5": 1254501888.0,
-            "6": 1254503424.0,
-            "7": 1254503936.0,
-            "8": 1254503936.0,
-            "9": 1254501888.0,
-            "10": 1254503424.0,
-            "11": 1254503936.0,
-            "12": 1254502912.0,
-            "13": 1254500864.0,
-            "14": 1254505472.0,
-            "15": 1254504448.0,
-            "16": 1254503424.0,
-            "17": 1254504448.0,
-            "18": 1254502400.0,
-            "19": 1254503936.0,
-            "20": 1254503424.0,
-            "21": 1254503424.0,
-            "22": 1254501376.0,
-            "23": 1254500864.0,
-            "24": 1254503424.0,
-            "25": 1254500352.0,
-            "26": 1254502400.0,
-            "27": 1254501888.0,
-            "28": 1254502912.0,
-            "29": 1254505472.0,
-            "30": 1254500352.0,
-            "31": 1254499328.0,
-            "32": 1254500352.0,
-            "33": 1254502912.0,
-            "34": 1254502912.0,
-            "35": 1254501888.0,
-            "36": 1254505472.0,
-            "37": 1254503424.0,
-            "38": 1254503936.0,
-            "39": 1254502912.0,
-            "40": 1254502912.0,
-            "41": 1254503424.0,
-            "42": 1254502912.0,
-            "43": 1254499840.0,
-            "44": 1254501376.0,
-            "45": 1254502400.0,
-            "46": 1254500864.0,
-            "47": 1254503936.0,
-            "48": 1254499840.0,
-            "49": 1254500352.0,
-            "50": 1254502912.0,
-            "51": 1254496768.0,
-            "52": 1254496256.0,
-            "53": 1254497792.0,
-            "54": 1254498304.0,
-            "55": 1254500352.0,
-            "56": 1254501888.0,
-            "57": 1254493184.0,
-            "58": 1254498304.0,
-            "59": 1254495232.0,
-            "60": 1254496768.0,
-            "61": 1254504960.0,
-            "62": 1254503936.0,
-            "63": 1254499328.0,
-            "64": 1254498816.0,
-            "65": 1254488576.0,
-            "66": 1254502912.0,
-            "67": 1254498304.0,
-            "68": 1254505984.0,
-            "69": 1254501376.0,
-            "70": 1254502912.0,
-            "71": 1254504960.0,
-            "72": 1254496256.0,
-            "73": 1254504448.0,
-            "74": 1254495232.0,
-            "75": 1254504448.0,
-            "76": 1254503424.0,
-            "77": 1254503936.0,
-            "78": 1254500352.0,
-            "79": 1254500864.0,
-            "80": 1254499840.0,
-            "81": 1254503424.0,
-            "82": 1254500352.0,
-            "83": 1254497792.0,
-            "84": 1254497280.0,
-            "85": 1254499328.0,
-            "86": 1254498816.0,
-            "87": 1254505472.0,
-            "88": 1254499328.0,
-            "89": 1254500864.0,
-            "90": 1254502912.0,
-            "91": 1254505472.0,
-            "92": 1254502912.0,
-            "93": 1254505472.0,
-            "94": 1254500352.0,
-            "95": 1254501888.0,
-            "96": 1254501888.0,
-            "97": 1254499328.0,
-            "98": 1254507520.0,
-            "99": 1254497280.0,
-            "100": 1254499840.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1987779584.0,
-            "2": 2468141568.0,
-            "3": 2468920320.0,
-            "4": 2468920320.0,
-            "5": 2468920320.0,
-            "6": 2468920320.0,
-            "7": 2468920320.0,
-            "8": 2468920320.0,
-            "9": 2469234688.0,
-            "10": 2469234688.0,
-            "11": 2469234688.0,
-            "12": 2469234688.0,
-            "13": 2469234688.0,
-            "14": 2469234688.0,
-            "15": 2469234688.0,
-            "16": 2469234688.0,
-            "17": 2469234688.0,
-            "18": 2469234688.0,
-            "19": 2469234688.0,
-            "20": 2469234688.0,
-            "21": 2469234688.0,
-            "22": 2469234688.0,
-            "23": 2469234688.0,
-            "24": 2469234688.0,
-            "25": 2469234688.0,
-            "26": 2469234688.0,
-            "27": 2469234688.0,
-            "28": 2469234688.0,
-            "29": 2469234688.0,
-            "30": 2469234688.0,
-            "31": 2469234688.0,
-            "32": 2469234688.0,
-            "33": 2469234688.0,
-            "34": 2469234688.0,
-            "35": 2469234688.0,
-            "36": 2469234688.0,
-            "37": 2469234688.0,
-            "38": 2469234688.0,
-            "39": 2469234688.0,
-            "40": 2469234688.0,
-            "41": 2469234688.0,
-            "42": 2469234688.0,
-            "43": 2469234688.0,
-            "44": 2469234688.0,
-            "45": 2469234688.0,
-            "46": 2469234688.0,
-            "47": 2469234688.0,
-            "48": 2469234688.0,
-            "49": 2469234688.0,
-            "50": 2469234688.0,
-            "51": 2469234688.0,
-            "52": 2469234688.0,
-            "53": 2469234688.0,
-            "54": 2469234688.0,
-            "55": 2469234688.0,
-            "56": 2469234688.0,
-            "57": 2469234688.0,
-            "58": 2469234688.0,
-            "59": 2469234688.0,
-            "60": 2469234688.0,
-            "61": 2469234688.0,
-            "62": 2469234688.0,
-            "63": 2469234688.0,
-            "64": 2469234688.0,
-            "65": 2469234688.0,
-            "66": 2469234688.0,
-            "67": 2469234688.0,
-            "68": 2469234688.0,
-            "69": 2469234688.0,
-            "70": 2469234688.0,
-            "71": 2469234688.0,
-            "72": 2469234688.0,
-            "73": 2469234688.0,
-            "74": 2469234688.0,
-            "75": 2469234688.0,
-            "76": 2471084032.0,
-            "77": 2471084032.0,
-            "78": 2471084032.0,
-            "79": 2471084032.0,
-            "80": 2471084032.0,
-            "81": 2471084032.0,
-            "82": 2471084032.0,
-            "83": 2471084032.0,
-            "84": 2471084032.0,
-            "85": 2471084032.0,
-            "86": 2471084032.0,
-            "87": 2471084032.0,
-            "88": 2471084032.0,
-            "89": 2471084032.0,
-            "90": 2471084032.0,
-            "91": 2471084032.0,
-            "92": 2471084032.0,
-            "93": 2471084032.0,
-            "94": 2471084032.0,
-            "95": 2471084032.0,
-            "96": 2471084032.0,
-            "97": 2471084032.0,
-            "98": 2471084032.0,
-            "99": 2471084032.0,
-            "100": 2471084032.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 15.17389,
-            "2": 0.29264,
-            "3": 0.24602,
-            "4": 0.24527,
-            "5": 0.22453,
-            "6": 0.22311,
-            "7": 0.23274,
-            "8": 0.2252,
-            "9": 0.22875,
-            "10": 0.21336,
-            "11": 0.21953,
-            "12": 0.21057,
-            "13": 0.21762,
-            "14": 0.22015,
-            "15": 0.22934,
-            "16": 0.21241,
-            "17": 0.22416,
-            "18": 0.21545,
-            "19": 0.21467,
-            "20": 0.21475,
-            "21": 0.21061,
-            "22": 0.21275,
-            "23": 0.21475,
-            "24": 0.21185,
-            "25": 0.21253,
-            "26": 0.2112,
-            "27": 0.21285,
-            "28": 0.2167,
-            "29": 0.20854,
-            "30": 0.21576,
-            "31": 0.23787,
-            "32": 0.21289,
-            "33": 0.22111,
-            "34": 0.23768,
-            "35": 0.2106,
-            "36": 0.22199,
-            "37": 0.21758,
-            "38": 0.21584,
-            "39": 0.21031,
-            "40": 0.2149,
-            "41": 0.21829,
-            "42": 0.2324,
-            "43": 0.21985,
-            "44": 0.21241,
-            "45": 0.23011,
-            "46": 0.23336,
-            "47": 0.21312,
-            "48": 0.2234,
-            "49": 0.24557,
-            "50": 0.21111,
-            "51": 0.25988,
-            "52": 0.23849,
-            "53": 0.21639,
-            "54": 0.21699,
-            "55": 0.22888,
-            "56": 0.30406,
-            "57": 0.23464,
-            "58": 0.23245,
-            "59": 0.22402,
-            "60": 0.22789,
-            "61": 0.21859,
-            "62": 0.21793,
-            "63": 0.25413,
-            "64": 0.23301,
-            "65": 0.2935,
-            "66": 0.22039,
-            "67": 0.3074,
-            "68": 0.2458,
-            "69": 0.21734,
-            "70": 0.21543,
-            "71": 0.23323,
-            "72": 0.22846,
-            "73": 0.25747,
-            "74": 0.23067,
-            "75": 0.21956,
-            "76": 0.24584,
-            "77": 0.222,
-            "78": 0.22595,
-            "79": 0.23137,
-            "80": 0.22335,
-            "81": 0.22154,
-            "82": 0.21547,
-            "83": 0.22443,
-            "84": 0.22286,
-            "85": 0.22074,
-            "86": 0.2341,
-            "87": 0.21707,
-            "88": 0.21529,
-            "89": 0.2232,
-            "90": 0.21712,
-            "91": 0.23519,
-            "92": 0.22408,
-            "93": 0.23443,
-            "94": 0.24578,
-            "95": 0.22228,
-            "96": 0.21797,
-            "97": 0.22197,
-            "98": 0.21363,
-            "99": 0.22332,
-            "100": 0.22233
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index 1a09e73e300..00000000000
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.78091,
-            "2": 10.80272,
-            "3": 10.8036,
-            "4": 10.77566,
-            "5": 10.83259,
-            "6": 10.83704,
-            "7": 10.79728,
-            "8": 10.79467,
-            "9": 10.80828,
-            "10": 10.76154,
-            "11": 10.85384,
-            "12": 10.84189,
-            "13": 10.82465,
-            "14": 10.85824,
-            "15": 10.78235,
-            "16": 10.77923,
-            "17": 10.7484,
-            "18": 10.78919,
-            "19": 10.77567,
-            "20": 10.71707,
-            "21": 10.70767,
-            "22": 10.54782,
-            "23": 10.72977,
-            "24": 10.60346,
-            "25": 10.55815,
-            "26": 10.61659,
-            "27": 10.6449,
-            "28": 10.62536,
-            "29": 10.6349,
-            "30": 10.42303,
-            "31": 10.16459,
-            "32": 10.51284,
-            "33": 10.50836,
-            "34": 10.2667,
-            "35": 10.32353,
-            "36": 10.2895,
-            "37": 10.41051,
-            "38": 10.26406,
-            "39": 10.44988,
-            "40": 10.17537,
-            "41": 10.20908,
-            "42": 10.27843,
-            "43": 9.91808,
-            "44": 10.03128,
-            "45": 9.92032,
-            "46": 9.88579,
-            "47": 10.19208,
-            "48": 9.92758,
-            "49": 9.61634,
-            "50": 9.98512,
-            "51": 9.90532,
-            "52": 9.8039,
-            "53": 10.12749,
-            "54": 10.00016,
-            "55": 9.93664,
-            "56": 9.68581,
-            "57": 9.55837,
-            "58": 9.90508,
-            "59": 9.63839,
-            "60": 9.57464,
-            "61": 9.76841,
-            "62": 10.03826,
-            "63": 9.44553,
-            "64": 9.82755,
-            "65": 9.00746,
-            "66": 9.77476,
-            "67": 9.41315,
-            "68": 9.84101,
-            "69": 9.8283,
-            "70": 9.79049,
-            "71": 9.66947,
-            "72": 9.62799,
-            "73": 9.54696,
-            "74": 9.03684,
-            "75": 9.49167,
-            "76": 9.16779,
-            "77": 10.1088,
-            "78": 9.77072,
-            "79": 9.43806,
-            "80": 9.45438,
-            "81": 9.5225,
-            "82": 9.74228,
-            "83": 9.36999,
-            "84": 9.45397,
-            "85": 9.65808,
-            "86": 9.12501,
-            "87": 9.62705,
-            "88": 9.79641,
-            "89": 9.66075,
-            "90": 9.8512,
-            "91": 9.39414,
-            "92": 9.40741,
-            "93": 9.13573,
-            "94": 8.89066,
-            "95": 9.56273,
-            "96": 9.5712,
-            "97": 9.34355,
-            "98": 9.73013,
-            "99": 8.95039,
-            "100": 9.44212
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 30994.0,
-            "2": 32962.0,
-            "3": 33026.0,
-            "4": 30732.0,
-            "5": 36042.0,
-            "6": 36987.0,
-            "7": 34490.0,
-            "8": 31442.0,
-            "9": 33931.0,
-            "10": 29993.0,
-            "11": 37681.0,
-            "12": 34978.0,
-            "13": 36675.0,
-            "14": 37601.0,
-            "15": 34369.0,
-            "16": 36581.0,
-            "17": 34615.0,
-            "18": 34408.0,
-            "19": 35362.0,
-            "20": 32532.0,
-            "21": 33181.0,
-            "22": 30426.0,
-            "23": 37807.0,
-            "24": 32299.0,
-            "25": 30879.0,
-            "26": 33994.0,
-            "27": 34721.0,
-            "28": 36576.0,
-            "29": 37196.0,
-            "30": 32443.0,
-            "31": 30177.0,
-            "32": 35948.0,
-            "33": 37549.0,
-            "34": 32243.0,
-            "35": 33961.0,
-            "36": 34340.0,
-            "37": 37853.0,
-            "38": 35694.0,
-            "39": 38797.0,
-            "40": 36317.0,
-            "41": 35380.0,
-            "42": 36704.0,
-            "43": 34045.0,
-            "44": 33691.0,
-            "45": 35877.0,
-            "46": 36737.0,
-            "47": 40148.0,
-            "48": 36696.0,
-            "49": 36203.0,
-            "50": 38688.0,
-            "51": 37791.0,
-            "52": 37021.0,
-            "53": 41944.0,
-            "54": 40947.0,
-            "55": 37727.0,
-            "56": 40761.0,
-            "57": 37481.0,
-            "58": 41787.0,
-            "59": 39365.0,
-            "60": 40922.0,
-            "61": 41100.0,
-            "62": 43388.0,
-            "63": 38269.0,
-            "64": 43526.0,
-            "65": 41821.0,
-            "66": 44876.0,
-            "67": 42497.0,
-            "68": 39967.0,
-            "69": 41255.0,
-            "70": 45781.0,
-            "71": 42348.0,
-            "72": 42151.0,
-            "73": 45043.0,
-            "74": 35705.0,
-            "75": 39397.0,
-            "76": 45340.0,
-            "77": 45670.0,
-            "78": 46614.0,
-            "79": 49159.0,
-            "80": 47317.0,
-            "81": 51048.0,
-            "82": 49312.0,
-            "83": 45257.0,
-            "84": 45494.0,
-            "85": 49366.0,
-            "86": 45783.0,
-            "87": 50223.0,
-            "88": 47536.0,
-            "89": 48826.0,
-            "90": 49499.0,
-            "91": 45726.0,
-            "92": 47926.0,
-            "93": 46433.0,
-            "94": 47675.0,
-            "95": 47504.0,
-            "96": 50174.0,
-            "97": 46465.0,
-            "98": 49255.0,
-            "99": 48053.0,
-            "100": 44507.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1254501376.0,
-            "2": 1254505472.0,
-            "3": 1254505472.0,
-            "4": 1254501376.0,
-            "5": 1254501888.0,
-            "6": 1254503424.0,
-            "7": 1254503936.0,
-            "8": 1254503936.0,
-            "9": 1254501888.0,
-            "10": 1254503424.0,
-            "11": 1254503936.0,
-            "12": 1254502912.0,
-            "13": 1254500864.0,
-            "14": 1254505472.0,
-            "15": 1254504448.0,
-            "16": 1254503424.0,
-            "17": 1254504448.0,
-            "18": 1254502400.0,
-            "19": 1254503936.0,
-            "20": 1254503424.0,
-            "21": 1254503424.0,
-            "22": 1254501376.0,
-            "23": 1254500864.0,
-            "24": 1254503424.0,
-            "25": 1254500352.0,
-            "26": 1254502400.0,
-            "27": 1254501888.0,
-            "28": 1254502912.0,
-            "29": 1254505472.0,
-            "30": 1254500352.0,
-            "31": 1254499328.0,
-            "32": 1254500352.0,
-            "33": 1254502912.0,
-            "34": 1254502912.0,
-            "35": 1254501888.0,
-            "36": 1254505472.0,
-            "37": 1254503424.0,
-            "38": 1254503936.0,
-            "39": 1254502912.0,
-            "40": 1254502912.0,
-            "41": 1254503424.0,
-            "42": 1254502912.0,
-            "43": 1254499840.0,
-            "44": 1254501376.0,
-            "45": 1254502400.0,
-            "46": 1254500864.0,
-            "47": 1254503936.0,
-            "48": 1254499840.0,
-            "49": 1254500352.0,
-            "50": 1254502912.0,
-            "51": 1254496768.0,
-            "52": 1254496256.0,
-            "53": 1254497792.0,
-            "54": 1254498304.0,
-            "55": 1254500352.0,
-            "56": 1254501888.0,
-            "57": 1254493184.0,
-            "58": 1254498304.0,
-            "59": 1254495232.0,
-            "60": 1254496768.0,
-            "61": 1254504960.0,
-            "62": 1254503936.0,
-            "63": 1254499328.0,
-            "64": 1254498816.0,
-            "65": 1254488576.0,
-            "66": 1254502912.0,
-            "67": 1254498304.0,
-            "68": 1254505984.0,
-            "69": 1254501376.0,
-            "70": 1254502912.0,
-            "71": 1254504960.0,
-            "72": 1254496256.0,
-            "73": 1254504448.0,
-            "74": 1254495232.0,
-            "75": 1254504448.0,
-            "76": 1254503424.0,
-            "77": 1254503936.0,
-            "78": 1254500352.0,
-            "79": 1254500864.0,
-            "80": 1254499840.0,
-            "81": 1254503424.0,
-            "82": 1254500352.0,
-            "83": 1254497792.0,
-            "84": 1254497280.0,
-            "85": 1254499328.0,
-            "86": 1254498816.0,
-            "87": 1254505472.0,
-            "88": 1254499328.0,
-            "89": 1254500864.0,
-            "90": 1254502912.0,
-            "91": 1254505472.0,
-            "92": 1254502912.0,
-            "93": 1254505472.0,
-            "94": 1254500352.0,
-            "95": 1254501888.0,
-            "96": 1254501888.0,
-            "97": 1254499328.0,
-            "98": 1254507520.0,
-            "99": 1254497280.0,
-            "100": 1254499840.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1987779584.0,
-            "2": 2468141568.0,
-            "3": 2468920320.0,
-            "4": 2468920320.0,
-            "5": 2468920320.0,
-            "6": 2468920320.0,
-            "7": 2468920320.0,
-            "8": 2468920320.0,
-            "9": 2469234688.0,
-            "10": 2469234688.0,
-            "11": 2469234688.0,
-            "12": 2469234688.0,
-            "13": 2469234688.0,
-            "14": 2469234688.0,
-            "15": 2469234688.0,
-            "16": 2469234688.0,
-            "17": 2469234688.0,
-            "18": 2469234688.0,
-            "19": 2469234688.0,
-            "20": 2469234688.0,
-            "21": 2469234688.0,
-            "22": 2469234688.0,
-            "23": 2469234688.0,
-            "24": 2469234688.0,
-            "25": 2469234688.0,
-            "26": 2469234688.0,
-            "27": 2469234688.0,
-            "28": 2469234688.0,
-            "29": 2469234688.0,
-            "30": 2469234688.0,
-            "31": 2469234688.0,
-            "32": 2469234688.0,
-            "33": 2469234688.0,
-            "34": 2469234688.0,
-            "35": 2469234688.0,
-            "36": 2469234688.0,
-            "37": 2469234688.0,
-            "38": 2469234688.0,
-            "39": 2469234688.0,
-            "40": 2469234688.0,
-            "41": 2469234688.0,
-            "42": 2469234688.0,
-            "43": 2469234688.0,
-            "44": 2469234688.0,
-            "45": 2469234688.0,
-            "46": 2469234688.0,
-            "47": 2469234688.0,
-            "48": 2469234688.0,
-            "49": 2469234688.0,
-            "50": 2469234688.0,
-            "51": 2469234688.0,
-            "52": 2469234688.0,
-            "53": 2469234688.0,
-            "54": 2469234688.0,
-            "55": 2469234688.0,
-            "56": 2469234688.0,
-            "57": 2469234688.0,
-            "58": 2469234688.0,
-            "59": 2469234688.0,
-            "60": 2469234688.0,
-            "61": 2469234688.0,
-            "62": 2469234688.0,
-            "63": 2469234688.0,
-            "64": 2469234688.0,
-            "65": 2469234688.0,
-            "66": 2469234688.0,
-            "67": 2469234688.0,
-            "68": 2469234688.0,
-            "69": 2469234688.0,
-            "70": 2469234688.0,
-            "71": 2469234688.0,
-            "72": 2469234688.0,
-            "73": 2469234688.0,
-            "74": 2469234688.0,
-            "75": 2469234688.0,
-            "76": 2471084032.0,
-            "77": 2471084032.0,
-            "78": 2471084032.0,
-            "79": 2471084032.0,
-            "80": 2471084032.0,
-            "81": 2471084032.0,
-            "82": 2471084032.0,
-            "83": 2471084032.0,
-            "84": 2471084032.0,
-            "85": 2471084032.0,
-            "86": 2471084032.0,
-            "87": 2471084032.0,
-            "88": 2471084032.0,
-            "89": 2471084032.0,
-            "90": 2471084032.0,
-            "91": 2471084032.0,
-            "92": 2471084032.0,
-            "93": 2471084032.0,
-            "94": 2471084032.0,
-            "95": 2471084032.0,
-            "96": 2471084032.0,
-            "97": 2471084032.0,
-            "98": 2471084032.0,
-            "99": 2471084032.0,
-            "100": 2471084032.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 16.55217,
-            "2": 0.35181,
-            "3": 0.30566,
-            "4": 0.27474,
-            "5": 0.25821,
-            "6": 0.24756,
-            "7": 0.26543,
-            "8": 0.25377,
-            "9": 0.25669,
-            "10": 0.24857,
-            "11": 0.25265,
-            "12": 0.25052,
-            "13": 0.25023,
-            "14": 0.24925,
-            "15": 0.26244,
-            "16": 0.25012,
-            "17": 0.26253,
-            "18": 0.24643,
-            "19": 0.24809,
-            "20": 0.24556,
-            "21": 0.24394,
-            "22": 0.251,
-            "23": 0.24828,
-            "24": 0.24669,
-            "25": 0.24387,
-            "26": 0.24678,
-            "27": 0.24651,
-            "28": 0.25139,
-            "29": 0.24752,
-            "30": 0.24424,
-            "31": 0.28311,
-            "32": 0.25225,
-            "33": 0.24909,
-            "34": 0.26885,
-            "35": 0.25395,
-            "36": 0.2523,
-            "37": 0.24797,
-            "38": 0.25223,
-            "39": 0.24992,
-            "40": 0.25852,
-            "41": 0.24878,
-            "42": 0.2538,
-            "43": 0.2597,
-            "44": 0.24622,
-            "45": 0.26158,
-            "46": 0.27295,
-            "47": 0.2509,
-            "48": 0.26644,
-            "49": 0.28407,
-            "50": 0.25557,
-            "51": 0.26677,
-            "52": 0.27657,
-            "53": 0.25511,
-            "54": 0.25626,
-            "55": 0.26088,
-            "56": 0.30712,
-            "57": 0.27149,
-            "58": 0.25315,
-            "59": 0.26247,
-            "60": 0.26163,
-            "61": 0.25105,
-            "62": 0.24787,
-            "63": 0.27859,
-            "64": 0.26395,
-            "65": 0.32678,
-            "66": 0.25441,
-            "67": 0.30841,
-            "68": 0.27583,
-            "69": 0.2474,
-            "70": 0.25895,
-            "71": 0.27463,
-            "72": 0.26044,
-            "73": 0.27953,
-            "74": 0.27908,
-            "75": 0.26127,
-            "76": 0.28492,
-            "77": 0.25287,
-            "78": 0.26927,
-            "79": 0.26632,
-            "80": 0.26465,
-            "81": 0.25418,
-            "82": 0.25,
-            "83": 0.26012,
-            "84": 0.27232,
-            "85": 0.25707,
-            "86": 0.26564,
-            "87": 0.25446,
-            "88": 0.24718,
-            "89": 0.26899,
-            "90": 0.24357,
-            "91": 0.27455,
-            "92": 0.25494,
-            "93": 0.26852,
-            "94": 0.27917,
-            "95": 0.258,
-            "96": 0.25134,
-            "97": 0.26377,
-            "98": 0.24669,
-            "99": 0.26096,
-            "100": 0.25411
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_dev_dgx_h100.json
index 7688d6ec4ea..089545b6f4a 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_dev_dgx_h100.json
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 13.191,
-            "2": 0.30069,
-            "3": 0.25544,
-            "4": 0.25726,
-            "5": 0.25285,
-            "6": 0.23678,
-            "7": 0.24206,
-            "8": 0.23892,
-            "9": 0.23754,
-            "10": 0.23806,
-            "11": 0.22979,
-            "12": 0.23562,
-            "13": 0.24016,
-            "14": 0.22801,
-            "15": 0.25436,
-            "16": 0.23327,
-            "17": 0.24589,
-            "18": 0.23141,
-            "19": 0.23961,
-            "20": 0.23003,
-            "21": 0.22997,
-            "22": 0.23267,
+            "1": 13.20887,
+            "2": 0.29449,
+            "3": 0.26099,
+            "4": 0.25199,
+            "5": 0.24285,
+            "6": 0.23658,
+            "7": 0.24248,
+            "8": 0.23258,
+            "9": 0.22661,
+            "10": 0.23769,
+            "11": 0.22933,
+            "12": 0.23288,
+            "13": 0.23074,
+            "14": 0.22376,
+            "15": 0.25054,
+            "16": 0.22881,
+            "17": 0.23932,
+            "18": 0.22427,
+            "19": 0.23467,
+            "20": 0.22747,
+            "21": 0.22662,
+            "22": 0.22866,
             "23": 0.22726,
-            "24": 0.22991,
-            "25": 0.22721,
-            "26": 0.23348,
-            "27": 0.23492,
-            "28": 0.22428,
-            "29": 0.23121,
-            "30": 0.23005,
-            "31": 0.27744,
-            "32": 0.22525,
-            "33": 0.22626,
-            "34": 0.26339,
-            "35": 0.23208,
-            "36": 0.24495,
-            "37": 0.22722,
-            "38": 0.23099,
-            "39": 0.22752,
-            "40": 0.25494,
-            "41": 0.24054,
-            "42": 0.22921,
-            "43": 0.249,
-            "44": 0.2389,
-            "45": 0.24525,
-            "46": 0.26032,
-            "47": 0.22841,
-            "48": 0.26262,
-            "49": 0.30096,
-            "50": 0.2341
+            "24": 0.22901,
+            "25": 0.22654,
+            "26": 0.22683,
+            "27": 0.22909,
+            "28": 0.2264,
+            "29": 0.23339,
+            "30": 0.23066,
+            "31": 0.27285,
+            "32": 0.22966,
+            "33": 0.23016,
+            "34": 0.24956,
+            "35": 0.23114,
+            "36": 0.24161,
+            "37": 0.22585,
+            "38": 0.23047,
+            "39": 0.22695,
+            "40": 0.24845,
+            "41": 0.23491,
+            "42": 0.22656,
+            "43": 0.23744,
+            "44": 0.23602,
+            "45": 0.24859,
+            "46": 0.25828,
+            "47": 0.2367,
+            "48": 0.2564,
+            "49": 0.27812,
+            "50": 0.23401
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 275dd98287a..00000000000
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.80815,
-            "2": 10.82612,
-            "3": 10.83032,
-            "4": 10.80963,
-            "5": 10.84127,
-            "6": 10.8581,
-            "7": 10.81967,
-            "8": 10.82506,
-            "9": 10.83749,
-            "10": 10.7783,
-            "11": 10.85781,
-            "12": 10.85539,
-            "13": 10.85233,
-            "14": 10.86699,
-            "15": 10.81253,
-            "16": 10.80292,
-            "17": 10.78098,
-            "18": 10.80788,
-            "19": 10.79276,
-            "20": 10.74548,
-            "21": 10.72785,
-            "22": 10.59608,
-            "23": 10.73999,
-            "24": 10.63509,
-            "25": 10.59832,
-            "26": 10.63517,
-            "27": 10.65744,
-            "28": 10.64536,
-            "29": 10.65122,
-            "30": 10.44144,
-            "31": 10.21465,
-            "32": 10.53342,
-            "33": 10.52518,
-            "34": 10.30171,
-            "35": 10.34871,
-            "36": 10.30843,
-            "37": 10.42353,
-            "38": 10.28859,
-            "39": 10.45514,
-            "40": 10.19363,
-            "41": 10.22791,
-            "42": 10.29725,
-            "43": 9.95871,
-            "44": 10.06717,
-            "45": 9.95955,
-            "46": 9.92614,
-            "47": 10.20607,
-            "48": 9.96021,
-            "49": 9.65854,
-            "50": 10.01296
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 31590.0,
-            "2": 32940.0,
-            "3": 33668.0,
-            "4": 31186.0,
-            "5": 36214.0,
-            "6": 37169.0,
-            "7": 34770.0,
-            "8": 31862.0,
-            "9": 34102.0,
-            "10": 30394.0,
-            "11": 38432.0,
-            "12": 35039.0,
-            "13": 37236.0,
-            "14": 37668.0,
-            "15": 34199.0,
-            "16": 36659.0,
-            "17": 34831.0,
-            "18": 35011.0,
-            "19": 35486.0,
-            "20": 33221.0,
-            "21": 33971.0,
-            "22": 30501.0,
-            "23": 38411.0,
-            "24": 32764.0,
-            "25": 31363.0,
-            "26": 34624.0,
-            "27": 36096.0,
-            "28": 37021.0,
-            "29": 37900.0,
-            "30": 33066.0,
-            "31": 29871.0,
-            "32": 36113.0,
-            "33": 38168.0,
-            "34": 33074.0,
-            "35": 34300.0,
-            "36": 35363.0,
-            "37": 38150.0,
-            "38": 35798.0,
-            "39": 38945.0,
-            "40": 35780.0,
-            "41": 35999.0,
-            "42": 36611.0,
-            "43": 33781.0,
-            "44": 34207.0,
-            "45": 35198.0,
-            "46": 36779.0,
-            "47": 40585.0,
-            "48": 36434.0,
-            "49": 35787.0,
-            "50": 38996.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1027085824.0,
-            "2": 1027085824.0,
-            "3": 1027086848.0,
-            "4": 1027086336.0,
-            "5": 1027086848.0,
-            "6": 1027085312.0,
-            "7": 1027081728.0,
-            "8": 1027082752.0,
-            "9": 1027089408.0,
-            "10": 1027083776.0,
-            "11": 1027084288.0,
-            "12": 1027084288.0,
-            "13": 1027086848.0,
-            "14": 1027083776.0,
-            "15": 1027085312.0,
-            "16": 1027086336.0,
-            "17": 1027084288.0,
-            "18": 1027088384.0,
-            "19": 1027086848.0,
-            "20": 1027089920.0,
-            "21": 1027083264.0,
-            "22": 1027086336.0,
-            "23": 1027086848.0,
-            "24": 1027085824.0,
-            "25": 1027084288.0,
-            "26": 1027085312.0,
-            "27": 1027085312.0,
-            "28": 1027082752.0,
-            "29": 1027083776.0,
-            "30": 1027082240.0,
-            "31": 1027074048.0,
-            "32": 1027077120.0,
-            "33": 1027086336.0,
-            "34": 1027083264.0,
-            "35": 1027085312.0,
-            "36": 1027083776.0,
-            "37": 1027084288.0,
-            "38": 1027085312.0,
-            "39": 1027080704.0,
-            "40": 1027081728.0,
-            "41": 1027083264.0,
-            "42": 1027086848.0,
-            "43": 1027079680.0,
-            "44": 1027082752.0,
-            "45": 1027082752.0,
-            "46": 1027073536.0,
-            "47": 1027082752.0,
-            "48": 1027081216.0,
-            "49": 1027077120.0,
-            "50": 1027084800.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 3007080960.0,
-            "2": 3247499776.0,
-            "3": 3247499776.0,
-            "4": 3248093184.0,
-            "5": 3248476160.0,
-            "6": 3248476160.0,
-            "7": 3248476160.0,
-            "8": 3248476160.0,
-            "9": 3248476160.0,
-            "10": 3249142784.0,
-            "11": 3249142784.0,
-            "12": 3249142784.0,
-            "13": 3249142784.0,
-            "14": 3249142784.0,
-            "15": 3249142784.0,
-            "16": 3249142784.0,
-            "17": 3249142784.0,
-            "18": 3249142784.0,
-            "19": 3249142784.0,
-            "20": 3249142784.0,
-            "21": 3249142784.0,
-            "22": 3249860608.0,
-            "23": 3249860608.0,
-            "24": 3249972736.0,
-            "25": 3249972736.0,
-            "26": 3249972736.0,
-            "27": 3249972736.0,
-            "28": 3249972736.0,
-            "29": 3249972736.0,
-            "30": 3249972736.0,
-            "31": 3249972736.0,
-            "32": 3249972736.0,
-            "33": 3249972736.0,
-            "34": 3249972736.0,
-            "35": 3249972736.0,
-            "36": 3249972736.0,
-            "37": 3249972736.0,
-            "38": 3249972736.0,
-            "39": 3249972736.0,
-            "40": 3249972736.0,
-            "41": 3249972736.0,
-            "42": 3249972736.0,
-            "43": 3249972736.0,
-            "44": 3249972736.0,
-            "45": 3249972736.0,
-            "46": 3249972736.0,
-            "47": 3249972736.0,
-            "48": 3249972736.0,
-            "49": 3249972736.0,
-            "50": 3249972736.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 11.71692,
-            "2": 0.26373,
-            "3": 0.22224,
-            "4": 0.22077,
-            "5": 0.21189,
-            "6": 0.20289,
-            "7": 0.21135,
-            "8": 0.20381,
-            "9": 0.19968,
-            "10": 0.20492,
-            "11": 0.19946,
-            "12": 0.20155,
-            "13": 0.20199,
-            "14": 0.19656,
-            "15": 0.22053,
-            "16": 0.20059,
-            "17": 0.21367,
-            "18": 0.19607,
-            "19": 0.20515,
-            "20": 0.19743,
-            "21": 0.19704,
-            "22": 0.20196,
-            "23": 0.19722,
-            "24": 0.20083,
-            "25": 0.19715,
-            "26": 0.19715,
-            "27": 0.19781,
-            "28": 0.19694,
-            "29": 0.20125,
-            "30": 0.19779,
-            "31": 0.23471,
-            "32": 0.19855,
-            "33": 0.19914,
-            "34": 0.22545,
-            "35": 0.19732,
-            "36": 0.21424,
-            "37": 0.19385,
-            "38": 0.2012,
-            "39": 0.19477,
-            "40": 0.21557,
-            "41": 0.20631,
-            "42": 0.20013,
-            "43": 0.20558,
-            "44": 0.2055,
-            "45": 0.2088,
-            "46": 0.21767,
-            "47": 0.19618,
-            "48": 0.22507,
-            "49": 0.24168,
-            "50": 0.19817
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index 089545b6f4a..00000000000
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.80815,
-            "2": 10.82612,
-            "3": 10.83032,
-            "4": 10.80963,
-            "5": 10.84127,
-            "6": 10.8581,
-            "7": 10.81967,
-            "8": 10.82506,
-            "9": 10.83749,
-            "10": 10.7783,
-            "11": 10.85781,
-            "12": 10.85539,
-            "13": 10.85233,
-            "14": 10.86699,
-            "15": 10.81253,
-            "16": 10.80292,
-            "17": 10.78098,
-            "18": 10.80788,
-            "19": 10.79276,
-            "20": 10.74548,
-            "21": 10.72785,
-            "22": 10.59608,
-            "23": 10.73999,
-            "24": 10.63509,
-            "25": 10.59832,
-            "26": 10.63517,
-            "27": 10.65744,
-            "28": 10.64536,
-            "29": 10.65122,
-            "30": 10.44144,
-            "31": 10.21465,
-            "32": 10.53342,
-            "33": 10.52518,
-            "34": 10.30171,
-            "35": 10.34871,
-            "36": 10.30843,
-            "37": 10.42353,
-            "38": 10.28859,
-            "39": 10.45514,
-            "40": 10.19363,
-            "41": 10.22791,
-            "42": 10.29725,
-            "43": 9.95871,
-            "44": 10.06717,
-            "45": 9.95955,
-            "46": 9.92614,
-            "47": 10.20607,
-            "48": 9.96021,
-            "49": 9.65854,
-            "50": 10.01296
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 31590.0,
-            "2": 32940.0,
-            "3": 33668.0,
-            "4": 31186.0,
-            "5": 36214.0,
-            "6": 37169.0,
-            "7": 34770.0,
-            "8": 31862.0,
-            "9": 34102.0,
-            "10": 30394.0,
-            "11": 38432.0,
-            "12": 35039.0,
-            "13": 37236.0,
-            "14": 37668.0,
-            "15": 34199.0,
-            "16": 36659.0,
-            "17": 34831.0,
-            "18": 35011.0,
-            "19": 35486.0,
-            "20": 33221.0,
-            "21": 33971.0,
-            "22": 30501.0,
-            "23": 38411.0,
-            "24": 32764.0,
-            "25": 31363.0,
-            "26": 34624.0,
-            "27": 36096.0,
-            "28": 37021.0,
-            "29": 37900.0,
-            "30": 33066.0,
-            "31": 29871.0,
-            "32": 36113.0,
-            "33": 38168.0,
-            "34": 33074.0,
-            "35": 34300.0,
-            "36": 35363.0,
-            "37": 38150.0,
-            "38": 35798.0,
-            "39": 38945.0,
-            "40": 35780.0,
-            "41": 35999.0,
-            "42": 36611.0,
-            "43": 33781.0,
-            "44": 34207.0,
-            "45": 35198.0,
-            "46": 36779.0,
-            "47": 40585.0,
-            "48": 36434.0,
-            "49": 35787.0,
-            "50": 38996.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1027085824.0,
-            "2": 1027085824.0,
-            "3": 1027086848.0,
-            "4": 1027086336.0,
-            "5": 1027086848.0,
-            "6": 1027085312.0,
-            "7": 1027081728.0,
-            "8": 1027082752.0,
-            "9": 1027089408.0,
-            "10": 1027083776.0,
-            "11": 1027084288.0,
-            "12": 1027084288.0,
-            "13": 1027086848.0,
-            "14": 1027083776.0,
-            "15": 1027085312.0,
-            "16": 1027086336.0,
-            "17": 1027084288.0,
-            "18": 1027088384.0,
-            "19": 1027086848.0,
-            "20": 1027089920.0,
-            "21": 1027083264.0,
-            "22": 1027086336.0,
-            "23": 1027086848.0,
-            "24": 1027085824.0,
-            "25": 1027084288.0,
-            "26": 1027085312.0,
-            "27": 1027085312.0,
-            "28": 1027082752.0,
-            "29": 1027083776.0,
-            "30": 1027082240.0,
-            "31": 1027074048.0,
-            "32": 1027077120.0,
-            "33": 1027086336.0,
-            "34": 1027083264.0,
-            "35": 1027085312.0,
-            "36": 1027083776.0,
-            "37": 1027084288.0,
-            "38": 1027085312.0,
-            "39": 1027080704.0,
-            "40": 1027081728.0,
-            "41": 1027083264.0,
-            "42": 1027086848.0,
-            "43": 1027079680.0,
-            "44": 1027082752.0,
-            "45": 1027082752.0,
-            "46": 1027073536.0,
-            "47": 1027082752.0,
-            "48": 1027081216.0,
-            "49": 1027077120.0,
-            "50": 1027084800.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 3007080960.0,
-            "2": 3247499776.0,
-            "3": 3247499776.0,
-            "4": 3248093184.0,
-            "5": 3248476160.0,
-            "6": 3248476160.0,
-            "7": 3248476160.0,
-            "8": 3248476160.0,
-            "9": 3248476160.0,
-            "10": 3249142784.0,
-            "11": 3249142784.0,
-            "12": 3249142784.0,
-            "13": 3249142784.0,
-            "14": 3249142784.0,
-            "15": 3249142784.0,
-            "16": 3249142784.0,
-            "17": 3249142784.0,
-            "18": 3249142784.0,
-            "19": 3249142784.0,
-            "20": 3249142784.0,
-            "21": 3249142784.0,
-            "22": 3249860608.0,
-            "23": 3249860608.0,
-            "24": 3249972736.0,
-            "25": 3249972736.0,
-            "26": 3249972736.0,
-            "27": 3249972736.0,
-            "28": 3249972736.0,
-            "29": 3249972736.0,
-            "30": 3249972736.0,
-            "31": 3249972736.0,
-            "32": 3249972736.0,
-            "33": 3249972736.0,
-            "34": 3249972736.0,
-            "35": 3249972736.0,
-            "36": 3249972736.0,
-            "37": 3249972736.0,
-            "38": 3249972736.0,
-            "39": 3249972736.0,
-            "40": 3249972736.0,
-            "41": 3249972736.0,
-            "42": 3249972736.0,
-            "43": 3249972736.0,
-            "44": 3249972736.0,
-            "45": 3249972736.0,
-            "46": 3249972736.0,
-            "47": 3249972736.0,
-            "48": 3249972736.0,
-            "49": 3249972736.0,
-            "50": 3249972736.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 13.20887,
-            "2": 0.29449,
-            "3": 0.26099,
-            "4": 0.25199,
-            "5": 0.24285,
-            "6": 0.23658,
-            "7": 0.24248,
-            "8": 0.23258,
-            "9": 0.22661,
-            "10": 0.23769,
-            "11": 0.22933,
-            "12": 0.23288,
-            "13": 0.23074,
-            "14": 0.22376,
-            "15": 0.25054,
-            "16": 0.22881,
-            "17": 0.23932,
-            "18": 0.22427,
-            "19": 0.23467,
-            "20": 0.22747,
-            "21": 0.22662,
-            "22": 0.22866,
-            "23": 0.22726,
-            "24": 0.22901,
-            "25": 0.22654,
-            "26": 0.22683,
-            "27": 0.22909,
-            "28": 0.2264,
-            "29": 0.23339,
-            "30": 0.23066,
-            "31": 0.27285,
-            "32": 0.22966,
-            "33": 0.23016,
-            "34": 0.24956,
-            "35": 0.23114,
-            "36": 0.24161,
-            "37": 0.22585,
-            "38": 0.23047,
-            "39": 0.22695,
-            "40": 0.24845,
-            "41": 0.23491,
-            "42": 0.22656,
-            "43": 0.23744,
-            "44": 0.23602,
-            "45": 0.24859,
-            "46": 0.25828,
-            "47": 0.2367,
-            "48": 0.2564,
-            "49": 0.27812,
-            "50": 0.23401
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM/golden_values_dev_dgx_h100.json
index 5219c47c6db..c49c5a579c0 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM/golden_values_dev_dgx_h100.json
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 13.23313,
-            "2": 0.31808,
-            "3": 0.27025,
-            "4": 0.253,
-            "5": 0.25938,
-            "6": 0.23222,
-            "7": 0.24127,
-            "8": 0.23468,
-            "9": 0.22881,
-            "10": 0.23244,
-            "11": 0.23056,
-            "12": 0.23078,
-            "13": 0.23301,
-            "14": 0.22477,
-            "15": 0.24897,
-            "16": 0.22593,
-            "17": 0.24178,
-            "18": 0.23034,
-            "19": 0.23887,
-            "20": 0.24186,
-            "21": 0.23006,
-            "22": 0.23215,
-            "23": 0.22763,
-            "24": 0.22889,
-            "25": 0.22662,
-            "26": 0.22794,
-            "27": 0.22851,
-            "28": 0.22653,
-            "29": 0.22859,
-            "30": 0.22789,
-            "31": 0.27081,
-            "32": 0.22893,
-            "33": 0.22575,
-            "34": 0.24635,
-            "35": 0.22739,
-            "36": 0.2416,
-            "37": 0.24045,
-            "38": 0.23118,
-            "39": 0.2275,
-            "40": 0.24632,
-            "41": 0.233,
-            "42": 0.22755,
-            "43": 0.25276,
-            "44": 0.2354,
-            "45": 0.2355,
-            "46": 0.25059,
-            "47": 0.22589,
-            "48": 0.25741,
-            "49": 0.27315,
-            "50": 0.22384
+            "1": 13.35552,
+            "2": 0.37785,
+            "3": 0.29632,
+            "4": 0.29599,
+            "5": 0.25057,
+            "6": 0.2376,
+            "7": 0.24788,
+            "8": 0.2386,
+            "9": 0.23567,
+            "10": 0.23981,
+            "11": 0.23457,
+            "12": 0.23608,
+            "13": 0.24093,
+            "14": 0.23076,
+            "15": 0.25524,
+            "16": 0.23573,
+            "17": 0.24636,
+            "18": 0.2348,
+            "19": 0.23922,
+            "20": 0.23445,
+            "21": 0.22924,
+            "22": 0.23872,
+            "23": 0.23172,
+            "24": 0.23116,
+            "25": 0.23103,
+            "26": 0.23556,
+            "27": 0.23228,
+            "28": 0.23323,
+            "29": 0.23495,
+            "30": 0.23011,
+            "31": 0.27652,
+            "32": 0.23015,
+            "33": 0.22902,
+            "34": 0.25666,
+            "35": 0.23045,
+            "36": 0.24626,
+            "37": 0.23146,
+            "38": 0.2344,
+            "39": 0.22864,
+            "40": 0.24642,
+            "41": 0.23788,
+            "42": 0.23274,
+            "43": 0.24326,
+            "44": 0.23733,
+            "45": 0.24263,
+            "46": 0.25392,
+            "47": 0.23328,
+            "48": 0.26156,
+            "49": 0.27837,
+            "50": 0.23303
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index ad63e8c681e..00000000000
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.80815,
-            "2": 10.82612,
-            "3": 10.83032,
-            "4": 10.80963,
-            "5": 10.84127,
-            "6": 10.8581,
-            "7": 10.81967,
-            "8": 10.82506,
-            "9": 10.83749,
-            "10": 10.7783,
-            "11": 10.85781,
-            "12": 10.85539,
-            "13": 10.85233,
-            "14": 10.86699,
-            "15": 10.81253,
-            "16": 10.80292,
-            "17": 10.78098,
-            "18": 10.80788,
-            "19": 10.79276,
-            "20": 10.74548,
-            "21": 10.72785,
-            "22": 10.59608,
-            "23": 10.73999,
-            "24": 10.63509,
-            "25": 10.59832,
-            "26": 10.63517,
-            "27": 10.65744,
-            "28": 10.64536,
-            "29": 10.65122,
-            "30": 10.44144,
-            "31": 10.21465,
-            "32": 10.53342,
-            "33": 10.52518,
-            "34": 10.30171,
-            "35": 10.34871,
-            "36": 10.30843,
-            "37": 10.42353,
-            "38": 10.28859,
-            "39": 10.45514,
-            "40": 10.19363,
-            "41": 10.22791,
-            "42": 10.29725,
-            "43": 9.95871,
-            "44": 10.06717,
-            "45": 9.95955,
-            "46": 9.92614,
-            "47": 10.20607,
-            "48": 9.96021,
-            "49": 9.65854,
-            "50": 10.01296
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 31590.0,
-            "2": 32940.0,
-            "3": 33668.0,
-            "4": 31186.0,
-            "5": 36214.0,
-            "6": 37169.0,
-            "7": 34770.0,
-            "8": 31862.0,
-            "9": 34102.0,
-            "10": 30394.0,
-            "11": 38432.0,
-            "12": 35039.0,
-            "13": 37236.0,
-            "14": 37668.0,
-            "15": 34199.0,
-            "16": 36659.0,
-            "17": 34831.0,
-            "18": 35011.0,
-            "19": 35486.0,
-            "20": 33221.0,
-            "21": 33971.0,
-            "22": 30501.0,
-            "23": 38411.0,
-            "24": 32764.0,
-            "25": 31363.0,
-            "26": 34624.0,
-            "27": 36096.0,
-            "28": 37021.0,
-            "29": 37900.0,
-            "30": 33066.0,
-            "31": 29871.0,
-            "32": 36113.0,
-            "33": 38168.0,
-            "34": 33074.0,
-            "35": 34300.0,
-            "36": 35363.0,
-            "37": 38150.0,
-            "38": 35798.0,
-            "39": 38945.0,
-            "40": 35780.0,
-            "41": 35999.0,
-            "42": 36611.0,
-            "43": 33781.0,
-            "44": 34207.0,
-            "45": 35198.0,
-            "46": 36779.0,
-            "47": 40585.0,
-            "48": 36434.0,
-            "49": 35787.0,
-            "50": 38996.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1027085824.0,
-            "2": 1027085824.0,
-            "3": 1027086848.0,
-            "4": 1027086336.0,
-            "5": 1027086848.0,
-            "6": 1027085312.0,
-            "7": 1027081728.0,
-            "8": 1027082752.0,
-            "9": 1027089408.0,
-            "10": 1027083776.0,
-            "11": 1027084288.0,
-            "12": 1027084288.0,
-            "13": 1027086848.0,
-            "14": 1027083776.0,
-            "15": 1027085312.0,
-            "16": 1027086336.0,
-            "17": 1027084288.0,
-            "18": 1027088384.0,
-            "19": 1027086848.0,
-            "20": 1027089920.0,
-            "21": 1027083264.0,
-            "22": 1027086336.0,
-            "23": 1027086848.0,
-            "24": 1027085824.0,
-            "25": 1027084288.0,
-            "26": 1027085312.0,
-            "27": 1027085312.0,
-            "28": 1027082752.0,
-            "29": 1027083776.0,
-            "30": 1027082240.0,
-            "31": 1027074048.0,
-            "32": 1027077120.0,
-            "33": 1027086336.0,
-            "34": 1027083264.0,
-            "35": 1027085312.0,
-            "36": 1027083776.0,
-            "37": 1027084288.0,
-            "38": 1027085312.0,
-            "39": 1027080704.0,
-            "40": 1027081728.0,
-            "41": 1027083264.0,
-            "42": 1027086848.0,
-            "43": 1027079680.0,
-            "44": 1027082752.0,
-            "45": 1027082752.0,
-            "46": 1027073536.0,
-            "47": 1027082752.0,
-            "48": 1027081216.0,
-            "49": 1027077120.0,
-            "50": 1027084800.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 3007080960.0,
-            "2": 3247499776.0,
-            "3": 3247499776.0,
-            "4": 3248093184.0,
-            "5": 3248476160.0,
-            "6": 3248476160.0,
-            "7": 3248476160.0,
-            "8": 3248476160.0,
-            "9": 3248476160.0,
-            "10": 3249142784.0,
-            "11": 3249142784.0,
-            "12": 3249142784.0,
-            "13": 3249142784.0,
-            "14": 3249142784.0,
-            "15": 3249142784.0,
-            "16": 3249142784.0,
-            "17": 3249142784.0,
-            "18": 3249142784.0,
-            "19": 3249142784.0,
-            "20": 3249142784.0,
-            "21": 3249142784.0,
-            "22": 3249860608.0,
-            "23": 3249860608.0,
-            "24": 3249972736.0,
-            "25": 3249972736.0,
-            "26": 3249972736.0,
-            "27": 3249972736.0,
-            "28": 3249972736.0,
-            "29": 3249972736.0,
-            "30": 3249972736.0,
-            "31": 3249972736.0,
-            "32": 3249972736.0,
-            "33": 3249972736.0,
-            "34": 3249972736.0,
-            "35": 3249972736.0,
-            "36": 3249972736.0,
-            "37": 3249972736.0,
-            "38": 3249972736.0,
-            "39": 3249972736.0,
-            "40": 3249972736.0,
-            "41": 3249972736.0,
-            "42": 3249972736.0,
-            "43": 3249972736.0,
-            "44": 3249972736.0,
-            "45": 3249972736.0,
-            "46": 3249972736.0,
-            "47": 3249972736.0,
-            "48": 3249972736.0,
-            "49": 3249972736.0,
-            "50": 3249972736.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 14.64212,
-            "2": 0.27662,
-            "3": 0.22726,
-            "4": 0.22741,
-            "5": 0.21976,
-            "6": 0.21005,
-            "7": 0.21904,
-            "8": 0.20701,
-            "9": 0.20029,
-            "10": 0.21109,
-            "11": 0.20188,
-            "12": 0.20386,
-            "13": 0.20452,
-            "14": 0.19789,
-            "15": 0.21511,
-            "16": 0.20036,
-            "17": 0.21345,
-            "18": 0.20466,
-            "19": 0.20569,
-            "20": 0.19783,
-            "21": 0.19857,
-            "22": 0.20281,
-            "23": 0.20165,
-            "24": 0.20398,
-            "25": 0.20864,
-            "26": 0.20632,
-            "27": 0.20092,
-            "28": 0.20357,
-            "29": 0.20116,
-            "30": 0.19889,
-            "31": 0.23444,
-            "32": 0.19868,
-            "33": 0.19728,
-            "34": 0.21322,
-            "35": 0.19907,
-            "36": 0.20947,
-            "37": 0.1964,
-            "38": 0.20026,
-            "39": 0.19448,
-            "40": 0.21304,
-            "41": 0.20077,
-            "42": 0.19863,
-            "43": 0.21502,
-            "44": 0.21008,
-            "45": 0.20452,
-            "46": 0.22473,
-            "47": 0.20011,
-            "48": 0.22634,
-            "49": 0.23823,
-            "50": 0.20221
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index c49c5a579c0..00000000000
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.80815,
-            "2": 10.82612,
-            "3": 10.83032,
-            "4": 10.80963,
-            "5": 10.84127,
-            "6": 10.8581,
-            "7": 10.81967,
-            "8": 10.82506,
-            "9": 10.83749,
-            "10": 10.7783,
-            "11": 10.85781,
-            "12": 10.85539,
-            "13": 10.85233,
-            "14": 10.86699,
-            "15": 10.81253,
-            "16": 10.80292,
-            "17": 10.78098,
-            "18": 10.80788,
-            "19": 10.79276,
-            "20": 10.74548,
-            "21": 10.72785,
-            "22": 10.59608,
-            "23": 10.73999,
-            "24": 10.63509,
-            "25": 10.59832,
-            "26": 10.63517,
-            "27": 10.65744,
-            "28": 10.64536,
-            "29": 10.65122,
-            "30": 10.44144,
-            "31": 10.21465,
-            "32": 10.53342,
-            "33": 10.52518,
-            "34": 10.30171,
-            "35": 10.34871,
-            "36": 10.30843,
-            "37": 10.42353,
-            "38": 10.28859,
-            "39": 10.45514,
-            "40": 10.19363,
-            "41": 10.22791,
-            "42": 10.29725,
-            "43": 9.95871,
-            "44": 10.06717,
-            "45": 9.95955,
-            "46": 9.92614,
-            "47": 10.20607,
-            "48": 9.96021,
-            "49": 9.65854,
-            "50": 10.01296
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 31590.0,
-            "2": 32940.0,
-            "3": 33668.0,
-            "4": 31186.0,
-            "5": 36214.0,
-            "6": 37169.0,
-            "7": 34770.0,
-            "8": 31862.0,
-            "9": 34102.0,
-            "10": 30394.0,
-            "11": 38432.0,
-            "12": 35039.0,
-            "13": 37236.0,
-            "14": 37668.0,
-            "15": 34199.0,
-            "16": 36659.0,
-            "17": 34831.0,
-            "18": 35011.0,
-            "19": 35486.0,
-            "20": 33221.0,
-            "21": 33971.0,
-            "22": 30501.0,
-            "23": 38411.0,
-            "24": 32764.0,
-            "25": 31363.0,
-            "26": 34624.0,
-            "27": 36096.0,
-            "28": 37021.0,
-            "29": 37900.0,
-            "30": 33066.0,
-            "31": 29871.0,
-            "32": 36113.0,
-            "33": 38168.0,
-            "34": 33074.0,
-            "35": 34300.0,
-            "36": 35363.0,
-            "37": 38150.0,
-            "38": 35798.0,
-            "39": 38945.0,
-            "40": 35780.0,
-            "41": 35999.0,
-            "42": 36611.0,
-            "43": 33781.0,
-            "44": 34207.0,
-            "45": 35198.0,
-            "46": 36779.0,
-            "47": 40585.0,
-            "48": 36434.0,
-            "49": 35787.0,
-            "50": 38996.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1027085824.0,
-            "2": 1027085824.0,
-            "3": 1027086848.0,
-            "4": 1027086336.0,
-            "5": 1027086848.0,
-            "6": 1027085312.0,
-            "7": 1027081728.0,
-            "8": 1027082752.0,
-            "9": 1027089408.0,
-            "10": 1027083776.0,
-            "11": 1027084288.0,
-            "12": 1027084288.0,
-            "13": 1027086848.0,
-            "14": 1027083776.0,
-            "15": 1027085312.0,
-            "16": 1027086336.0,
-            "17": 1027084288.0,
-            "18": 1027088384.0,
-            "19": 1027086848.0,
-            "20": 1027089920.0,
-            "21": 1027083264.0,
-            "22": 1027086336.0,
-            "23": 1027086848.0,
-            "24": 1027085824.0,
-            "25": 1027084288.0,
-            "26": 1027085312.0,
-            "27": 1027085312.0,
-            "28": 1027082752.0,
-            "29": 1027083776.0,
-            "30": 1027082240.0,
-            "31": 1027074048.0,
-            "32": 1027077120.0,
-            "33": 1027086336.0,
-            "34": 1027083264.0,
-            "35": 1027085312.0,
-            "36": 1027083776.0,
-            "37": 1027084288.0,
-            "38": 1027085312.0,
-            "39": 1027080704.0,
-            "40": 1027081728.0,
-            "41": 1027083264.0,
-            "42": 1027086848.0,
-            "43": 1027079680.0,
-            "44": 1027082752.0,
-            "45": 1027082752.0,
-            "46": 1027073536.0,
-            "47": 1027082752.0,
-            "48": 1027081216.0,
-            "49": 1027077120.0,
-            "50": 1027084800.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 3007080960.0,
-            "2": 3247499776.0,
-            "3": 3247499776.0,
-            "4": 3248093184.0,
-            "5": 3248476160.0,
-            "6": 3248476160.0,
-            "7": 3248476160.0,
-            "8": 3248476160.0,
-            "9": 3248476160.0,
-            "10": 3249142784.0,
-            "11": 3249142784.0,
-            "12": 3249142784.0,
-            "13": 3249142784.0,
-            "14": 3249142784.0,
-            "15": 3249142784.0,
-            "16": 3249142784.0,
-            "17": 3249142784.0,
-            "18": 3249142784.0,
-            "19": 3249142784.0,
-            "20": 3249142784.0,
-            "21": 3249142784.0,
-            "22": 3249860608.0,
-            "23": 3249860608.0,
-            "24": 3249972736.0,
-            "25": 3249972736.0,
-            "26": 3249972736.0,
-            "27": 3249972736.0,
-            "28": 3249972736.0,
-            "29": 3249972736.0,
-            "30": 3249972736.0,
-            "31": 3249972736.0,
-            "32": 3249972736.0,
-            "33": 3249972736.0,
-            "34": 3249972736.0,
-            "35": 3249972736.0,
-            "36": 3249972736.0,
-            "37": 3249972736.0,
-            "38": 3249972736.0,
-            "39": 3249972736.0,
-            "40": 3249972736.0,
-            "41": 3249972736.0,
-            "42": 3249972736.0,
-            "43": 3249972736.0,
-            "44": 3249972736.0,
-            "45": 3249972736.0,
-            "46": 3249972736.0,
-            "47": 3249972736.0,
-            "48": 3249972736.0,
-            "49": 3249972736.0,
-            "50": 3249972736.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 13.35552,
-            "2": 0.37785,
-            "3": 0.29632,
-            "4": 0.29599,
-            "5": 0.25057,
-            "6": 0.2376,
-            "7": 0.24788,
-            "8": 0.2386,
-            "9": 0.23567,
-            "10": 0.23981,
-            "11": 0.23457,
-            "12": 0.23608,
-            "13": 0.24093,
-            "14": 0.23076,
-            "15": 0.25524,
-            "16": 0.23573,
-            "17": 0.24636,
-            "18": 0.2348,
-            "19": 0.23922,
-            "20": 0.23445,
-            "21": 0.22924,
-            "22": 0.23872,
-            "23": 0.23172,
-            "24": 0.23116,
-            "25": 0.23103,
-            "26": 0.23556,
-            "27": 0.23228,
-            "28": 0.23323,
-            "29": 0.23495,
-            "30": 0.23011,
-            "31": 0.27652,
-            "32": 0.23015,
-            "33": 0.22902,
-            "34": 0.25666,
-            "35": 0.23045,
-            "36": 0.24626,
-            "37": 0.23146,
-            "38": 0.2344,
-            "39": 0.22864,
-            "40": 0.24642,
-            "41": 0.23788,
-            "42": 0.23274,
-            "43": 0.24326,
-            "44": 0.23733,
-            "45": 0.24263,
-            "46": 0.25392,
-            "47": 0.23328,
-            "48": 0.26156,
-            "49": 0.27837,
-            "50": 0.23303
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4/golden_values_dev_dgx_h100.json
index 6e2a34b26f8..acf98f05d31 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4/golden_values_dev_dgx_h100.json
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 1563272704.0,
-            "2": 1562858496.0,
-            "3": 1564486144.0,
-            "4": 1564041216.0,
-            "5": 1561823232.0,
-            "6": 1563443712.0,
-            "7": 1564206592.0,
-            "8": 1563517952.0,
-            "9": 1562183680.0,
-            "10": 1565040640.0,
-            "11": 1562508800.0,
-            "12": 1561081344.0,
-            "13": 1562479616.0,
-            "14": 1562858496.0,
-            "15": 1563188736.0,
-            "16": 1562045440.0,
-            "17": 1564147712.0,
-            "18": 1564288512.0,
-            "19": 1562883584.0,
-            "20": 1562017792.0,
-            "21": 1562184704.0,
-            "22": 1562030080.0,
-            "23": 1562267136.0,
-            "24": 1561898496.0,
-            "25": 1563593728.0,
-            "26": 1563150336.0,
-            "27": 1564444160.0,
-            "28": 1562418176.0,
-            "29": 1562973184.0,
-            "30": 1563487744.0,
-            "31": 1563070976.0,
-            "32": 1563377664.0,
-            "33": 1564346368.0,
-            "34": 1561956352.0,
-            "35": 1563001344.0,
-            "36": 1563246080.0,
-            "37": 1564364800.0,
-            "38": 1562608640.0,
-            "39": 1564432896.0,
-            "40": 1563148288.0,
-            "41": 1563740160.0,
-            "42": 1565268480.0,
-            "43": 1565179392.0,
-            "44": 1562279936.0,
-            "45": 1564082176.0,
-            "46": 1563706368.0,
-            "47": 1561835008.0,
-            "48": 1561798144.0,
-            "49": 1562701824.0,
-            "50": 1565224960.0
+            "1": 1561367040.0,
+            "2": 1560972288.0,
+            "3": 1561248256.0,
+            "4": 1560096768.0,
+            "5": 1559926784.0,
+            "6": 1561850368.0,
+            "7": 1560161792.0,
+            "8": 1560285184.0,
+            "9": 1560998912.0,
+            "10": 1561293824.0,
+            "11": 1560700416.0,
+            "12": 1562299904.0,
+            "13": 1560526848.0,
+            "14": 1561499648.0,
+            "15": 1559979520.0,
+            "16": 1561232384.0,
+            "17": 1561337856.0,
+            "18": 1560266240.0,
+            "19": 1561224704.0,
+            "20": 1560222720.0,
+            "21": 1561771008.0,
+            "22": 1559743488.0,
+            "23": 1560801792.0,
+            "24": 1561316864.0,
+            "25": 1560606720.0,
+            "26": 1562301440.0,
+            "27": 1560251904.0,
+            "28": 1559861248.0,
+            "29": 1559861248.0,
+            "30": 1560919552.0,
+            "31": 1561406976.0,
+            "32": 1565212672.0,
+            "33": 1560626176.0,
+            "34": 1561871360.0,
+            "35": 1560959488.0,
+            "36": 1561910784.0,
+            "37": 1559904256.0,
+            "38": 1560347648.0,
+            "39": 1562116608.0,
+            "40": 1562510336.0,
+            "41": 1562299392.0,
+            "42": 1561589248.0,
+            "43": 1560753664.0,
+            "44": 1561721856.0,
+            "45": 1561170944.0,
+            "46": 1561996288.0,
+            "47": 1560805888.0,
+            "48": 1561083392.0,
+            "49": 1560795136.0,
+            "50": 1561778176.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 3678389248.0,
-            "2": 4261802496.0,
-            "3": 4262688768.0,
-            "4": 4262688768.0,
-            "5": 4262688768.0,
-            "6": 4288888832.0,
-            "7": 4288888832.0,
-            "8": 4288888832.0,
-            "9": 4288888832.0,
-            "10": 4288888832.0,
-            "11": 4288888832.0,
-            "12": 4288888832.0,
-            "13": 4288888832.0,
-            "14": 4288888832.0,
-            "15": 4288888832.0,
-            "16": 4288888832.0,
-            "17": 4288888832.0,
-            "18": 4288888832.0,
-            "19": 4288888832.0,
-            "20": 4288888832.0,
-            "21": 4288888832.0,
-            "22": 4288888832.0,
-            "23": 4288888832.0,
-            "24": 4288888832.0,
-            "25": 4288888832.0,
-            "26": 4288888832.0,
-            "27": 4288888832.0,
-            "28": 4288888832.0,
-            "29": 4288888832.0,
-            "30": 4288888832.0,
-            "31": 4288888832.0,
-            "32": 4288888832.0,
-            "33": 4288888832.0,
-            "34": 4288888832.0,
-            "35": 4288888832.0,
-            "36": 4288888832.0,
-            "37": 4288888832.0,
-            "38": 4288888832.0,
-            "39": 4288888832.0,
-            "40": 4288888832.0,
-            "41": 4288888832.0,
-            "42": 4288888832.0,
-            "43": 4288888832.0,
-            "44": 4288888832.0,
-            "45": 4288888832.0,
-            "46": 4288888832.0,
-            "47": 4288888832.0,
-            "48": 4288888832.0,
-            "49": 4288888832.0,
-            "50": 4288888832.0
+            "1": 3680567296.0,
+            "2": 4256236032.0,
+            "3": 4260136960.0,
+            "4": 4260136960.0,
+            "5": 4261063168.0,
+            "6": 4289287168.0,
+            "7": 4289287168.0,
+            "8": 4289287168.0,
+            "9": 4289287168.0,
+            "10": 4289287168.0,
+            "11": 4289287168.0,
+            "12": 4289287168.0,
+            "13": 4289287168.0,
+            "14": 4289287168.0,
+            "15": 4289287168.0,
+            "16": 4289287168.0,
+            "17": 4289287168.0,
+            "18": 4289287168.0,
+            "19": 4289287168.0,
+            "20": 4289287168.0,
+            "21": 4289287168.0,
+            "22": 4289287168.0,
+            "23": 4289287168.0,
+            "24": 4289287168.0,
+            "25": 4289287168.0,
+            "26": 4289287168.0,
+            "27": 4289287168.0,
+            "28": 4289287168.0,
+            "29": 4289287168.0,
+            "30": 4289287168.0,
+            "31": 4289287168.0,
+            "32": 4289287168.0,
+            "33": 4289287168.0,
+            "34": 4289287168.0,
+            "35": 4289287168.0,
+            "36": 4289287168.0,
+            "37": 4289287168.0,
+            "38": 4289287168.0,
+            "39": 4289287168.0,
+            "40": 4289287168.0,
+            "41": 4289287168.0,
+            "42": 4289287168.0,
+            "43": 4289287168.0,
+            "44": 4289287168.0,
+            "45": 4289287168.0,
+            "46": 4289287168.0,
+            "47": 4289287168.0,
+            "48": 4289287168.0,
+            "49": 4289287168.0,
+            "50": 4289287168.0
         }
     },
     "iteration-time": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 19.14758,
-            "2": 0.49766,
-            "3": 0.44107,
-            "4": 0.37175,
-            "5": 0.37026,
-            "6": 0.33176,
-            "7": 0.32446,
-            "8": 0.31735,
-            "9": 0.3291,
-            "10": 0.32512,
-            "11": 0.30495,
-            "12": 0.31438,
+            "1": 18.57368,
+            "2": 0.50382,
+            "3": 0.41522,
+            "4": 0.37227,
+            "5": 0.37501,
+            "6": 0.33117,
+            "7": 0.32515,
+            "8": 0.31941,
+            "9": 0.32367,
+            "10": 0.32326,
+            "11": 0.30606,
+            "12": 0.30616,
             "13": 0.29955,
-            "14": 0.30728,
-            "15": 0.31532,
-            "16": 0.29631,
-            "17": 0.30956,
-            "18": 0.30533,
-            "19": 0.30054,
-            "20": 0.30291,
-            "21": 0.30231,
-            "22": 0.32081,
-            "23": 0.29797,
-            "24": 0.3059,
-            "25": 0.3093,
-            "26": 0.30535,
-            "27": 0.30202,
-            "28": 0.31154,
-            "29": 0.30205,
-            "30": 0.3198,
-            "31": 0.36657,
-            "32": 0.30974,
-            "33": 0.34056,
-            "34": 0.32396,
-            "35": 0.34679,
-            "36": 0.30488,
-            "37": 0.31477,
-            "38": 0.31377,
-            "39": 0.31065,
-            "40": 0.30631,
-            "41": 0.30771,
-            "42": 0.3003,
-            "43": 0.30915,
-            "44": 0.31796,
-            "45": 0.2949,
-            "46": 0.30522,
-            "47": 0.30099,
-            "48": 0.30303,
-            "49": 0.30198,
-            "50": 0.29985
+            "14": 0.30443,
+            "15": 0.30558,
+            "16": 0.29289,
+            "17": 0.30498,
+            "18": 0.29213,
+            "19": 0.29318,
+            "20": 0.29695,
+            "21": 0.29798,
+            "22": 0.31295,
+            "23": 0.29473,
+            "24": 0.29975,
+            "25": 0.29698,
+            "26": 0.30574,
+            "27": 0.29785,
+            "28": 0.30807,
+            "29": 0.29928,
+            "30": 0.3087,
+            "31": 0.30718,
+            "32": 0.30993,
+            "33": 0.30203,
+            "34": 0.31719,
+            "35": 0.30742,
+            "36": 0.30563,
+            "37": 0.31427,
+            "38": 0.31171,
+            "39": 0.31768,
+            "40": 0.30755,
+            "41": 0.30394,
+            "42": 0.29792,
+            "43": 0.30454,
+            "44": 0.31398,
+            "45": 0.29651,
+            "46": 0.31171,
+            "47": 0.29161,
+            "48": 0.3034,
+            "49": 0.2972,
+            "50": 0.29959
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 3c9a1238968..00000000000
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.77518,
-            "2": 10.78038,
-            "3": 10.79302,
-            "4": 10.74107,
-            "5": 10.82013,
-            "6": 10.82951,
-            "7": 10.7953,
-            "8": 10.78263,
-            "9": 10.79278,
-            "10": 10.7446,
-            "11": 10.85147,
-            "12": 10.82613,
-            "13": 10.82825,
-            "14": 10.85504,
-            "15": 10.75536,
-            "16": 10.75777,
-            "17": 10.72319,
-            "18": 10.76274,
-            "19": 10.75075,
-            "20": 10.66587,
-            "21": 10.6419,
-            "22": 10.47523,
-            "23": 10.66959,
-            "24": 10.54157,
-            "25": 10.4825,
-            "26": 10.55255,
-            "27": 10.57459,
-            "28": 10.55159,
-            "29": 10.5668,
-            "30": 10.31134,
-            "31": 10.01921,
-            "32": 10.42655,
-            "33": 10.42294,
-            "34": 10.14739,
-            "35": 10.21574,
-            "36": 10.15811,
-            "37": 10.30279,
-            "38": 10.14031,
-            "39": 10.36301,
-            "40": 10.02669,
-            "41": 10.07635,
-            "42": 10.16156,
-            "43": 9.74374,
-            "44": 9.88962,
-            "45": 9.75874,
-            "46": 9.73618,
-            "47": 10.0844,
-            "48": 9.78532,
-            "49": 9.45072,
-            "50": 9.85634
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 27105.0,
-            "2": 28791.0,
-            "3": 29282.0,
-            "4": 27583.0,
-            "5": 31595.0,
-            "6": 32831.0,
-            "7": 31023.0,
-            "8": 27107.0,
-            "9": 30780.0,
-            "10": 25505.0,
-            "11": 33684.0,
-            "12": 30235.0,
-            "13": 32960.0,
-            "14": 32880.0,
-            "15": 30405.0,
-            "16": 32455.0,
-            "17": 30933.0,
-            "18": 30623.0,
-            "19": 30803.0,
-            "20": 28593.0,
-            "21": 29002.0,
-            "22": 27030.0,
-            "23": 34463.0,
-            "24": 29154.0,
-            "25": 27827.0,
-            "26": 31119.0,
-            "27": 32108.0,
-            "28": 33412.0,
-            "29": 34737.0,
-            "30": 30465.0,
-            "31": 28775.0,
-            "32": 33115.0,
-            "33": 34745.0,
-            "34": 30785.0,
-            "35": 32116.0,
-            "36": 33968.0,
-            "37": 36757.0,
-            "38": 34150.0,
-            "39": 37240.0,
-            "40": 35353.0,
-            "41": 34638.0,
-            "42": 36703.0,
-            "43": 34601.0,
-            "44": 33783.0,
-            "45": 35388.0,
-            "46": 35484.0,
-            "47": 40591.0,
-            "48": 36671.0,
-            "49": 36174.0,
-            "50": 38231.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1560780288.0,
-            "2": 1562661888.0,
-            "3": 1561168384.0,
-            "4": 1562873856.0,
-            "5": 1561988096.0,
-            "6": 1562931712.0,
-            "7": 1560774144.0,
-            "8": 1560396800.0,
-            "9": 1559476224.0,
-            "10": 1561237504.0,
-            "11": 1560092160.0,
-            "12": 1561073152.0,
-            "13": 1560844288.0,
-            "14": 1560660992.0,
-            "15": 1561358848.0,
-            "16": 1562046464.0,
-            "17": 1562270720.0,
-            "18": 1561111040.0,
-            "19": 1560918528.0,
-            "20": 1560393728.0,
-            "21": 1559810048.0,
-            "22": 1560937472.0,
-            "23": 1560980992.0,
-            "24": 1563885056.0,
-            "25": 1564661760.0,
-            "26": 1562321920.0,
-            "27": 1560262144.0,
-            "28": 1561913344.0,
-            "29": 1561421824.0,
-            "30": 1562089984.0,
-            "31": 1563574784.0,
-            "32": 1560473600.0,
-            "33": 1560724480.0,
-            "34": 1560988672.0,
-            "35": 1559951872.0,
-            "36": 1561882112.0,
-            "37": 1560333312.0,
-            "38": 1561226240.0,
-            "39": 1562092032.0,
-            "40": 1563557888.0,
-            "41": 1561459712.0,
-            "42": 1561729536.0,
-            "43": 1562591744.0,
-            "44": 1562273792.0,
-            "45": 1560520704.0,
-            "46": 1565477888.0,
-            "47": 1562011136.0,
-            "48": 1562666496.0,
-            "49": 1560133632.0,
-            "50": 1562494976.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 3682551808.0,
-            "2": 4261875200.0,
-            "3": 4261875200.0,
-            "4": 4261875200.0,
-            "5": 4262492672.0,
-            "6": 4286960640.0,
-            "7": 4286960640.0,
-            "8": 4286960640.0,
-            "9": 4286960640.0,
-            "10": 4286960640.0,
-            "11": 4286960640.0,
-            "12": 4286960640.0,
-            "13": 4286960640.0,
-            "14": 4286960640.0,
-            "15": 4286960640.0,
-            "16": 4286960640.0,
-            "17": 4286960640.0,
-            "18": 4286960640.0,
-            "19": 4286960640.0,
-            "20": 4286960640.0,
-            "21": 4286960640.0,
-            "22": 4286960640.0,
-            "23": 4286960640.0,
-            "24": 4286960640.0,
-            "25": 4286960640.0,
-            "26": 4286960640.0,
-            "27": 4286960640.0,
-            "28": 4286960640.0,
-            "29": 4286960640.0,
-            "30": 4286960640.0,
-            "31": 4286960640.0,
-            "32": 4286960640.0,
-            "33": 4286960640.0,
-            "34": 4286960640.0,
-            "35": 4286960640.0,
-            "36": 4286960640.0,
-            "37": 4286960640.0,
-            "38": 4286960640.0,
-            "39": 4286960640.0,
-            "40": 4286960640.0,
-            "41": 4286960640.0,
-            "42": 4286960640.0,
-            "43": 4286960640.0,
-            "44": 4286960640.0,
-            "45": 4286960640.0,
-            "46": 4286960640.0,
-            "47": 4286960640.0,
-            "48": 4286960640.0,
-            "49": 4286960640.0,
-            "50": 4286960640.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 20.83226,
-            "2": 0.4277,
-            "3": 0.36235,
-            "4": 0.32018,
-            "5": 0.32467,
-            "6": 0.2866,
-            "7": 0.29271,
-            "8": 0.2778,
-            "9": 0.28029,
-            "10": 0.27681,
-            "11": 0.26073,
-            "12": 0.26966,
-            "13": 0.26171,
-            "14": 0.26964,
-            "15": 0.26556,
-            "16": 0.26142,
-            "17": 0.26797,
-            "18": 0.26832,
-            "19": 0.25503,
-            "20": 0.26854,
-            "21": 0.26028,
-            "22": 0.27376,
-            "23": 0.26433,
-            "24": 0.27688,
-            "25": 0.26452,
-            "26": 0.26581,
-            "27": 0.26181,
-            "28": 0.26407,
-            "29": 0.26847,
-            "30": 0.28514,
-            "31": 0.27185,
-            "32": 0.26438,
-            "33": 0.26828,
-            "34": 0.27142,
-            "35": 0.27204,
-            "36": 0.28491,
-            "37": 0.28927,
-            "38": 0.26843,
-            "39": 0.27153,
-            "40": 0.27149,
-            "41": 0.2612,
-            "42": 0.25803,
-            "43": 0.27298,
-            "44": 0.28995,
-            "45": 0.28088,
-            "46": 0.28702,
-            "47": 0.27506,
-            "48": 0.2642,
-            "49": 0.26659,
-            "50": 0.25965
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index acf98f05d31..00000000000
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.77518,
-            "2": 10.78038,
-            "3": 10.79302,
-            "4": 10.74107,
-            "5": 10.82013,
-            "6": 10.82951,
-            "7": 10.7953,
-            "8": 10.78263,
-            "9": 10.79278,
-            "10": 10.7446,
-            "11": 10.85147,
-            "12": 10.82613,
-            "13": 10.82825,
-            "14": 10.85504,
-            "15": 10.75536,
-            "16": 10.75777,
-            "17": 10.72319,
-            "18": 10.76274,
-            "19": 10.75075,
-            "20": 10.66587,
-            "21": 10.6419,
-            "22": 10.47523,
-            "23": 10.66959,
-            "24": 10.54157,
-            "25": 10.4825,
-            "26": 10.55255,
-            "27": 10.57459,
-            "28": 10.55159,
-            "29": 10.5668,
-            "30": 10.31134,
-            "31": 10.01921,
-            "32": 10.42655,
-            "33": 10.42294,
-            "34": 10.14739,
-            "35": 10.21574,
-            "36": 10.15811,
-            "37": 10.30279,
-            "38": 10.14031,
-            "39": 10.36301,
-            "40": 10.02669,
-            "41": 10.07635,
-            "42": 10.16156,
-            "43": 9.74374,
-            "44": 9.88962,
-            "45": 9.75874,
-            "46": 9.73618,
-            "47": 10.0844,
-            "48": 9.78532,
-            "49": 9.45072,
-            "50": 9.85634
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 27105.0,
-            "2": 28791.0,
-            "3": 29282.0,
-            "4": 27583.0,
-            "5": 31595.0,
-            "6": 32831.0,
-            "7": 31023.0,
-            "8": 27107.0,
-            "9": 30780.0,
-            "10": 25505.0,
-            "11": 33684.0,
-            "12": 30235.0,
-            "13": 32960.0,
-            "14": 32880.0,
-            "15": 30405.0,
-            "16": 32455.0,
-            "17": 30933.0,
-            "18": 30623.0,
-            "19": 30803.0,
-            "20": 28593.0,
-            "21": 29002.0,
-            "22": 27030.0,
-            "23": 34463.0,
-            "24": 29154.0,
-            "25": 27827.0,
-            "26": 31119.0,
-            "27": 32108.0,
-            "28": 33412.0,
-            "29": 34737.0,
-            "30": 30465.0,
-            "31": 28775.0,
-            "32": 33115.0,
-            "33": 34745.0,
-            "34": 30785.0,
-            "35": 32116.0,
-            "36": 33968.0,
-            "37": 36757.0,
-            "38": 34150.0,
-            "39": 37240.0,
-            "40": 35353.0,
-            "41": 34638.0,
-            "42": 36703.0,
-            "43": 34601.0,
-            "44": 33783.0,
-            "45": 35388.0,
-            "46": 35484.0,
-            "47": 40591.0,
-            "48": 36671.0,
-            "49": 36174.0,
-            "50": 38231.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1561367040.0,
-            "2": 1560972288.0,
-            "3": 1561248256.0,
-            "4": 1560096768.0,
-            "5": 1559926784.0,
-            "6": 1561850368.0,
-            "7": 1560161792.0,
-            "8": 1560285184.0,
-            "9": 1560998912.0,
-            "10": 1561293824.0,
-            "11": 1560700416.0,
-            "12": 1562299904.0,
-            "13": 1560526848.0,
-            "14": 1561499648.0,
-            "15": 1559979520.0,
-            "16": 1561232384.0,
-            "17": 1561337856.0,
-            "18": 1560266240.0,
-            "19": 1561224704.0,
-            "20": 1560222720.0,
-            "21": 1561771008.0,
-            "22": 1559743488.0,
-            "23": 1560801792.0,
-            "24": 1561316864.0,
-            "25": 1560606720.0,
-            "26": 1562301440.0,
-            "27": 1560251904.0,
-            "28": 1559861248.0,
-            "29": 1559861248.0,
-            "30": 1560919552.0,
-            "31": 1561406976.0,
-            "32": 1565212672.0,
-            "33": 1560626176.0,
-            "34": 1561871360.0,
-            "35": 1560959488.0,
-            "36": 1561910784.0,
-            "37": 1559904256.0,
-            "38": 1560347648.0,
-            "39": 1562116608.0,
-            "40": 1562510336.0,
-            "41": 1562299392.0,
-            "42": 1561589248.0,
-            "43": 1560753664.0,
-            "44": 1561721856.0,
-            "45": 1561170944.0,
-            "46": 1561996288.0,
-            "47": 1560805888.0,
-            "48": 1561083392.0,
-            "49": 1560795136.0,
-            "50": 1561778176.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 3680567296.0,
-            "2": 4256236032.0,
-            "3": 4260136960.0,
-            "4": 4260136960.0,
-            "5": 4261063168.0,
-            "6": 4289287168.0,
-            "7": 4289287168.0,
-            "8": 4289287168.0,
-            "9": 4289287168.0,
-            "10": 4289287168.0,
-            "11": 4289287168.0,
-            "12": 4289287168.0,
-            "13": 4289287168.0,
-            "14": 4289287168.0,
-            "15": 4289287168.0,
-            "16": 4289287168.0,
-            "17": 4289287168.0,
-            "18": 4289287168.0,
-            "19": 4289287168.0,
-            "20": 4289287168.0,
-            "21": 4289287168.0,
-            "22": 4289287168.0,
-            "23": 4289287168.0,
-            "24": 4289287168.0,
-            "25": 4289287168.0,
-            "26": 4289287168.0,
-            "27": 4289287168.0,
-            "28": 4289287168.0,
-            "29": 4289287168.0,
-            "30": 4289287168.0,
-            "31": 4289287168.0,
-            "32": 4289287168.0,
-            "33": 4289287168.0,
-            "34": 4289287168.0,
-            "35": 4289287168.0,
-            "36": 4289287168.0,
-            "37": 4289287168.0,
-            "38": 4289287168.0,
-            "39": 4289287168.0,
-            "40": 4289287168.0,
-            "41": 4289287168.0,
-            "42": 4289287168.0,
-            "43": 4289287168.0,
-            "44": 4289287168.0,
-            "45": 4289287168.0,
-            "46": 4289287168.0,
-            "47": 4289287168.0,
-            "48": 4289287168.0,
-            "49": 4289287168.0,
-            "50": 4289287168.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 18.57368,
-            "2": 0.50382,
-            "3": 0.41522,
-            "4": 0.37227,
-            "5": 0.37501,
-            "6": 0.33117,
-            "7": 0.32515,
-            "8": 0.31941,
-            "9": 0.32367,
-            "10": 0.32326,
-            "11": 0.30606,
-            "12": 0.30616,
-            "13": 0.29955,
-            "14": 0.30443,
-            "15": 0.30558,
-            "16": 0.29289,
-            "17": 0.30498,
-            "18": 0.29213,
-            "19": 0.29318,
-            "20": 0.29695,
-            "21": 0.29798,
-            "22": 0.31295,
-            "23": 0.29473,
-            "24": 0.29975,
-            "25": 0.29698,
-            "26": 0.30574,
-            "27": 0.29785,
-            "28": 0.30807,
-            "29": 0.29928,
-            "30": 0.3087,
-            "31": 0.30718,
-            "32": 0.30993,
-            "33": 0.30203,
-            "34": 0.31719,
-            "35": 0.30742,
-            "36": 0.30563,
-            "37": 0.31427,
-            "38": 0.31171,
-            "39": 0.31768,
-            "40": 0.30755,
-            "41": 0.30394,
-            "42": 0.29792,
-            "43": 0.30454,
-            "44": 0.31398,
-            "45": 0.29651,
-            "46": 0.31171,
-            "47": 0.29161,
-            "48": 0.3034,
-            "49": 0.2972,
-            "50": 0.29959
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgx_h100.json
index d06b2b1d235..01651f27b62 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgx_h100.json
@@ -2,109 +2,343 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 11.04748,
-            "5": 9.53583,
-            "10": 9.0567,
-            "15": 8.0476,
-            "20": 7.89868,
-            "25": 7.67579,
-            "30": 7.64391,
-            "35": 7.20998,
-            "40": 7.54446,
-            "45": 7.18755,
-            "50": 7.03602
+            "1": 11.04722,
+            "2": 11.03572,
+            "3": 9.58802,
+            "4": 9.25807,
+            "5": 9.46595,
+            "6": 9.99646,
+            "7": 9.50952,
+            "8": 8.97596,
+            "9": 8.64768,
+            "10": 9.40103,
+            "11": 8.86557,
+            "12": 8.63562,
+            "13": 8.52126,
+            "14": 8.08764,
+            "15": 8.19553,
+            "16": 8.22117,
+            "17": 8.14088,
+            "18": 7.83923,
+            "19": 8.23508,
+            "20": 7.95432,
+            "21": 7.62712,
+            "22": 7.60353,
+            "23": 7.48451,
+            "24": 7.46602,
+            "25": 7.70409,
+            "26": 7.10906,
+            "27": 7.6443,
+            "28": 7.34234,
+            "29": 7.5189,
+            "30": 7.67585,
+            "31": 7.41996,
+            "32": 7.61477,
+            "33": 7.66691,
+            "34": 7.73349,
+            "35": 7.23566,
+            "36": 7.11008,
+            "37": 7.44958,
+            "38": 7.21125,
+            "39": 7.57837,
+            "40": 7.56809,
+            "41": 7.51465,
+            "42": 7.27318,
+            "43": 7.25818,
+            "44": 7.44014,
+            "45": 7.21234,
+            "46": 6.92392,
+            "47": 7.32631,
+            "48": 7.17263,
+            "49": 7.62149,
+            "50": 7.06495
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 38802612.0,
-            "5": 259189728.0,
-            "10": 744257088.0,
-            "15": 724250816.0,
-            "20": 989207936.0,
-            "25": 843170688.0,
-            "30": 775645184.0,
-            "35": 737655104.0,
-            "40": 607288512.0,
-            "45": 514790528.0,
-            "50": 303063296.0
+            "2": 38543656.0,
+            "3": 38739356.0,
+            "4": 273649600.0,
+            "5": 252887040.0,
+            "6": 255692384.0,
+            "7": 598483264.0,
+            "8": 787737984.0,
+            "9": 696133120.0,
+            "10": 505146400.0,
+            "11": 715718272.0,
+            "12": 872566848.0,
+            "13": 947497344.0,
+            "14": 1076390912.0,
+            "15": 853234624.0,
+            "16": 1045488064.0,
+            "17": 831385088.0,
+            "18": 969961792.0,
+            "19": 973165952.0,
+            "20": 951461376.0,
+            "21": 901033280.0,
+            "22": 897373440.0,
+            "23": 901066560.0,
+            "24": 710038592.0,
+            "25": 912381952.0,
+            "26": 866199936.0,
+            "27": 876109696.0,
+            "28": 912952192.0,
+            "29": 972247104.0,
+            "30": 951806720.0,
+            "31": 960493312.0,
+            "32": 910169408.0,
+            "33": 853655744.0,
+            "34": 834879424.0,
+            "35": 835171520.0,
+            "36": 797371392.0,
+            "37": 777009408.0,
+            "38": 598948480.0,
+            "39": 664393152.0,
+            "40": 767727104.0,
+            "41": 771335168.0,
+            "42": 752681344.0,
+            "43": 715187840.0,
+            "44": 714677440.0,
+            "45": 687806016.0,
+            "46": 501256736.0,
+            "47": 629706368.0,
+            "48": 651967104.0,
+            "49": 629336832.0,
+            "50": 589310016.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 6637267456.0,
-            "5": 6637269504.0,
-            "10": 6637269504.0,
-            "15": 6637269504.0,
-            "20": 6637269504.0,
-            "25": 6637269504.0,
-            "30": 6637269504.0,
-            "35": 6637269504.0,
-            "40": 6637269504.0,
-            "45": 6637269504.0,
-            "50": 6637269504.0
+            "1": 6637272576.0,
+            "2": 6637274624.0,
+            "3": 6637274624.0,
+            "4": 6637274624.0,
+            "5": 6637274624.0,
+            "6": 6637274624.0,
+            "7": 6637274624.0,
+            "8": 6637274624.0,
+            "9": 6637274624.0,
+            "10": 6637274624.0,
+            "11": 6637274624.0,
+            "12": 6637274624.0,
+            "13": 6637274624.0,
+            "14": 6637274624.0,
+            "15": 6637274624.0,
+            "16": 6637274624.0,
+            "17": 6637274624.0,
+            "18": 6637274624.0,
+            "19": 6637274624.0,
+            "20": 6637274624.0,
+            "21": 6637274624.0,
+            "22": 6637274624.0,
+            "23": 6637274624.0,
+            "24": 6637274624.0,
+            "25": 6637274624.0,
+            "26": 6637274624.0,
+            "27": 6637274624.0,
+            "28": 6637274624.0,
+            "29": 6637274624.0,
+            "30": 6637274624.0,
+            "31": 6637274624.0,
+            "32": 6637274624.0,
+            "33": 6637274624.0,
+            "34": 6637274624.0,
+            "35": 6637274624.0,
+            "36": 6637274624.0,
+            "37": 6637274624.0,
+            "38": 6637274624.0,
+            "39": 6637274624.0,
+            "40": 6637274624.0,
+            "41": 6637274624.0,
+            "42": 6637274624.0,
+            "43": 6637274624.0,
+            "44": 6637274624.0,
+            "45": 6637274624.0,
+            "46": 6637274624.0,
+            "47": 6637274624.0,
+            "48": 6637274624.0,
+            "49": 6637274624.0,
+            "50": 6637274624.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 55055331328.0,
-            "5": 57918455808.0,
-            "10": 57918455808.0,
-            "15": 57931390976.0,
-            "20": 57931390976.0,
-            "25": 57931390976.0,
-            "30": 57931390976.0,
-            "35": 58003226624.0,
-            "40": 58003226624.0,
-            "45": 58234208256.0,
-            "50": 58780934144.0
+            "1": 55056003072.0,
+            "2": 57810763776.0,
+            "3": 57920647168.0,
+            "4": 57920647168.0,
+            "5": 57920647168.0,
+            "6": 57920647168.0,
+            "7": 57920647168.0,
+            "8": 57920647168.0,
+            "9": 57920647168.0,
+            "10": 57920647168.0,
+            "11": 57920647168.0,
+            "12": 57920647168.0,
+            "13": 57920647168.0,
+            "14": 57920647168.0,
+            "15": 57920647168.0,
+            "16": 57920647168.0,
+            "17": 57920647168.0,
+            "18": 57920647168.0,
+            "19": 57920647168.0,
+            "20": 57920647168.0,
+            "21": 57920647168.0,
+            "22": 57920647168.0,
+            "23": 57920647168.0,
+            "24": 57920647168.0,
+            "25": 57920647168.0,
+            "26": 57920647168.0,
+            "27": 57920647168.0,
+            "28": 57920647168.0,
+            "29": 57920647168.0,
+            "30": 57920647168.0,
+            "31": 57920647168.0,
+            "32": 57920647168.0,
+            "33": 57920647168.0,
+            "34": 57920647168.0,
+            "35": 57920647168.0,
+            "36": 57920647168.0,
+            "37": 57920647168.0,
+            "38": 57920647168.0,
+            "39": 57920647168.0,
+            "40": 57920647168.0,
+            "41": 57920647168.0,
+            "42": 57920647168.0,
+            "43": 57920647168.0,
+            "44": 57920647168.0,
+            "45": 57920647168.0,
+            "46": 57921617920.0,
+            "47": 57921617920.0,
+            "48": 57921617920.0,
+            "49": 57921617920.0,
+            "50": 57921617920.0
         }
     },
     "mtp_1 loss": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 11.07654,
-            "5": 9.81154,
-            "10": 9.127,
-            "15": 7.99077,
-            "20": 7.82933,
-            "25": 7.61578,
-            "30": 7.58618,
-            "35": 7.15224,
-            "40": 7.47408,
-            "45": 7.11969,
-            "50": 6.9735
+            "1": 11.07648,
+            "2": 11.07404,
+            "3": 10.53854,
+            "4": 10.09813,
+            "5": 9.81166,
+            "6": 10.09741,
+            "7": 9.79481,
+            "8": 9.0642,
+            "9": 8.86016,
+            "10": 9.34039,
+            "11": 8.51318,
+            "12": 8.59468,
+            "13": 8.52921,
+            "14": 7.95758,
+            "15": 8.06962,
+            "16": 8.11803,
+            "17": 8.06994,
+            "18": 7.80584,
+            "19": 8.19191,
+            "20": 7.89063,
+            "21": 7.5707,
+            "22": 7.55089,
+            "23": 7.41603,
+            "24": 7.42509,
+            "25": 7.65319,
+            "26": 7.05604,
+            "27": 7.59797,
+            "28": 7.29977,
+            "29": 7.47274,
+            "30": 7.61938,
+            "31": 7.35308,
+            "32": 7.53089,
+            "33": 7.59296,
+            "34": 7.66429,
+            "35": 7.17544,
+            "36": 7.04045,
+            "37": 7.37008,
+            "38": 7.14419,
+            "39": 7.51022,
+            "40": 7.48928,
+            "41": 7.43717,
+            "42": 7.19432,
+            "43": 7.17612,
+            "44": 7.35764,
+            "45": 7.13893,
+            "46": 6.84092,
+            "47": 7.25121,
+            "48": 7.09497,
+            "49": 7.52321,
+            "50": 6.98958
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 71.27032,
-            "5": 2.09978,
-            "10": 1.95997,
-            "15": 1.137,
-            "20": 1.13455,
-            "25": 1.13415,
-            "30": 1.15078,
-            "35": 1.15064,
-            "40": 1.13889,
-            "45": 1.124,
-            "50": 1.13608
+            "1": 85.33545,
+            "2": 1.29783,
+            "3": 1.20289,
+            "4": 2.24602,
+            "5": 2.32616,
+            "6": 1.7486,
+            "7": 2.17383,
+            "8": 1.65491,
+            "9": 1.70888,
+            "10": 1.05169,
+            "11": 1.03097,
+            "12": 1.02332,
+            "13": 1.0314,
+            "14": 1.03723,
+            "15": 1.02333,
+            "16": 1.04585,
+            "17": 1.05489,
+            "18": 1.05149,
+            "19": 1.04366,
+            "20": 1.04123,
+            "21": 1.04123,
+            "22": 1.05131,
+            "23": 1.04784,
+            "24": 1.05156,
+            "25": 1.05897,
+            "26": 1.05841,
+            "27": 1.03255,
+            "28": 1.03763,
+            "29": 1.0362,
+            "30": 1.04244,
+            "31": 1.03393,
+            "32": 1.04177,
+            "33": 1.06033,
+            "34": 1.06132,
+            "35": 1.06434,
+            "36": 1.05438,
+            "37": 1.64369,
+            "38": 1.06374,
+            "39": 1.07491,
+            "40": 1.07295,
+            "41": 1.06978,
+            "42": 1.06102,
+            "43": 1.05808,
+            "44": 1.06997,
+            "45": 1.06476,
+            "46": 1.06795,
+            "47": 1.06701,
+            "48": 1.06649,
+            "49": 1.06638,
+            "50": 1.06224
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 717ae3f5fa6..00000000000
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,344 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 11.04722,
-            "2": 11.03572,
-            "3": 9.58802,
-            "4": 9.25807,
-            "5": 9.46595,
-            "6": 9.99646,
-            "7": 9.50952,
-            "8": 8.97596,
-            "9": 8.64768,
-            "10": 9.40103,
-            "11": 8.86556,
-            "12": 8.63563,
-            "13": 8.52125,
-            "14": 8.08824,
-            "15": 8.1958,
-            "16": 8.22112,
-            "17": 8.14098,
-            "18": 7.8386,
-            "19": 8.23438,
-            "20": 7.95361,
-            "21": 7.62549,
-            "22": 7.60352,
-            "23": 7.47957,
-            "24": 7.46573,
-            "25": 7.70343,
-            "26": 7.10719,
-            "27": 7.64313,
-            "28": 7.34582,
-            "29": 7.5169,
-            "30": 7.67511,
-            "31": 7.41799,
-            "32": 7.61213,
-            "33": 7.66582,
-            "34": 7.73101,
-            "35": 7.23081,
-            "36": 7.10765,
-            "37": 7.4476,
-            "38": 7.21053,
-            "39": 7.57508,
-            "40": 7.5662,
-            "41": 7.51605,
-            "42": 7.27243,
-            "43": 7.25706,
-            "44": 7.44,
-            "45": 7.21244,
-            "46": 6.92421,
-            "47": 7.32604,
-            "48": 7.17147,
-            "49": 7.62154,
-            "50": 7.0624
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 38802612.0,
-            "2": 38543656.0,
-            "3": 38739356.0,
-            "4": 273649600.0,
-            "5": 252887040.0,
-            "6": 255692384.0,
-            "7": 598483264.0,
-            "8": 787737984.0,
-            "9": 696133120.0,
-            "10": 505146368.0,
-            "11": 718888640.0,
-            "12": 872597184.0,
-            "13": 947495104.0,
-            "14": 1076398976.0,
-            "15": 856390592.0,
-            "16": 1048635648.0,
-            "17": 831370688.0,
-            "18": 963679552.0,
-            "19": 970018240.0,
-            "20": 935737344.0,
-            "21": 904189312.0,
-            "22": 887937280.0,
-            "23": 894777856.0,
-            "24": 703744192.0,
-            "25": 909232512.0,
-            "26": 875633216.0,
-            "27": 894981376.0,
-            "28": 919242816.0,
-            "29": 931351552.0,
-            "30": 929784768.0,
-            "31": 941621376.0,
-            "32": 885000768.0,
-            "33": 828484096.0,
-            "34": 822284800.0,
-            "35": 832032128.0,
-            "36": 787939392.0,
-            "37": 770719808.0,
-            "38": 561204672.0,
-            "39": 617201536.0,
-            "40": 695374592.0,
-            "41": 698978816.0,
-            "42": 692913728.0,
-            "43": 668003776.0,
-            "44": 673780992.0,
-            "45": 631182912.0,
-            "46": 444613312.0,
-            "47": 591957824.0,
-            "48": 617363968.0,
-            "49": 585295808.0,
-            "50": 570423872.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 6637272576.0,
-            "2": 6637274624.0,
-            "3": 6637274624.0,
-            "4": 6637274624.0,
-            "5": 6637274624.0,
-            "6": 6637274624.0,
-            "7": 6637274624.0,
-            "8": 6637274624.0,
-            "9": 6637274624.0,
-            "10": 6637274624.0,
-            "11": 6637274624.0,
-            "12": 6637274624.0,
-            "13": 6637274624.0,
-            "14": 6637274624.0,
-            "15": 6637274624.0,
-            "16": 6637274624.0,
-            "17": 6637274624.0,
-            "18": 6637274624.0,
-            "19": 6637274624.0,
-            "20": 6637274624.0,
-            "21": 6637274624.0,
-            "22": 6637274624.0,
-            "23": 6637274624.0,
-            "24": 6637274624.0,
-            "25": 6637274624.0,
-            "26": 6637274624.0,
-            "27": 6637274624.0,
-            "28": 6637274624.0,
-            "29": 6637274624.0,
-            "30": 6637274624.0,
-            "31": 6637274624.0,
-            "32": 6637274624.0,
-            "33": 6637274624.0,
-            "34": 6637274624.0,
-            "35": 6637274624.0,
-            "36": 6637274624.0,
-            "37": 6637274624.0,
-            "38": 6637274624.0,
-            "39": 6637274624.0,
-            "40": 6637274624.0,
-            "41": 6637274624.0,
-            "42": 6637274624.0,
-            "43": 6637274624.0,
-            "44": 6637274624.0,
-            "45": 6637274624.0,
-            "46": 6637274624.0,
-            "47": 6637274624.0,
-            "48": 6637274624.0,
-            "49": 6637274624.0,
-            "50": 6637274624.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 55056003072.0,
-            "2": 57810763776.0,
-            "3": 57920647168.0,
-            "4": 57920647168.0,
-            "5": 57920647168.0,
-            "6": 57920647168.0,
-            "7": 57920647168.0,
-            "8": 57920647168.0,
-            "9": 57920647168.0,
-            "10": 57920647168.0,
-            "11": 57920647168.0,
-            "12": 57920647168.0,
-            "13": 57920647168.0,
-            "14": 57920647168.0,
-            "15": 57920647168.0,
-            "16": 57920647168.0,
-            "17": 57920647168.0,
-            "18": 57920647168.0,
-            "19": 57920647168.0,
-            "20": 57920647168.0,
-            "21": 57920647168.0,
-            "22": 57920647168.0,
-            "23": 57920647168.0,
-            "24": 57920647168.0,
-            "25": 57920647168.0,
-            "26": 57920647168.0,
-            "27": 57920647168.0,
-            "28": 57920647168.0,
-            "29": 57920647168.0,
-            "30": 57920647168.0,
-            "31": 57920647168.0,
-            "32": 57920647168.0,
-            "33": 57920647168.0,
-            "34": 57961472000.0,
-            "35": 57961472000.0,
-            "36": 57961472000.0,
-            "37": 57961472000.0,
-            "38": 57961472000.0,
-            "39": 57961472000.0,
-            "40": 57961472000.0,
-            "41": 57961472000.0,
-            "42": 57961472000.0,
-            "43": 57961472000.0,
-            "44": 57961472000.0,
-            "45": 57961472000.0,
-            "46": 57961472000.0,
-            "47": 57961472000.0,
-            "48": 57961472000.0,
-            "49": 57961472000.0,
-            "50": 57961472000.0
-        }
-    },
-    "mtp_1 loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 11.07648,
-            "2": 11.07404,
-            "3": 10.53854,
-            "4": 10.09813,
-            "5": 9.81166,
-            "6": 10.09741,
-            "7": 9.79481,
-            "8": 9.0642,
-            "9": 8.86016,
-            "10": 9.34039,
-            "11": 8.51318,
-            "12": 8.59467,
-            "13": 8.5292,
-            "14": 7.95757,
-            "15": 8.06962,
-            "16": 8.11802,
-            "17": 8.06993,
-            "18": 7.80587,
-            "19": 8.19192,
-            "20": 7.8906,
-            "21": 7.57063,
-            "22": 7.55091,
-            "23": 7.41606,
-            "24": 7.42454,
-            "25": 7.65274,
-            "26": 7.05583,
-            "27": 7.59747,
-            "28": 7.29984,
-            "29": 7.472,
-            "30": 7.61908,
-            "31": 7.35179,
-            "32": 7.52979,
-            "33": 7.59161,
-            "34": 7.66287,
-            "35": 7.17383,
-            "36": 7.04133,
-            "37": 7.37081,
-            "38": 7.1443,
-            "39": 7.50879,
-            "40": 7.48921,
-            "41": 7.43802,
-            "42": 7.19405,
-            "43": 7.17581,
-            "44": 7.35785,
-            "45": 7.13985,
-            "46": 6.84014,
-            "47": 7.25094,
-            "48": 7.09407,
-            "49": 7.52321,
-            "50": 6.98987
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 93.39829,
-            "2": 1.82958,
-            "3": 1.3241,
-            "4": 2.19661,
-            "5": 2.13156,
-            "6": 1.75452,
-            "7": 2.08539,
-            "8": 1.58016,
-            "9": 1.60816,
-            "10": 1.03407,
-            "11": 1.01797,
-            "12": 1.0168,
-            "13": 1.01666,
-            "14": 1.0748,
-            "15": 1.04137,
-            "16": 1.05864,
-            "17": 1.05961,
-            "18": 1.03233,
-            "19": 1.02728,
-            "20": 1.02917,
-            "21": 1.04313,
-            "22": 1.03054,
-            "23": 1.0313,
-            "24": 1.03789,
-            "25": 1.04414,
-            "26": 1.05561,
-            "27": 1.03361,
-            "28": 1.03142,
-            "29": 1.02437,
-            "30": 1.02195,
-            "31": 1.0172,
-            "32": 1.03318,
-            "33": 1.03742,
-            "34": 1.03628,
-            "35": 1.03575,
-            "36": 1.05127,
-            "37": 1.03273,
-            "38": 1.03381,
-            "39": 1.02923,
-            "40": 1.02986,
-            "41": 1.03249,
-            "42": 1.033,
-            "43": 1.03169,
-            "44": 1.03818,
-            "45": 1.02736,
-            "46": 1.02698,
-            "47": 1.03158,
-            "48": 1.02471,
-            "49": 1.03674,
-            "50": 1.0291
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index 01651f27b62..00000000000
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,344 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 11.04722,
-            "2": 11.03572,
-            "3": 9.58802,
-            "4": 9.25807,
-            "5": 9.46595,
-            "6": 9.99646,
-            "7": 9.50952,
-            "8": 8.97596,
-            "9": 8.64768,
-            "10": 9.40103,
-            "11": 8.86557,
-            "12": 8.63562,
-            "13": 8.52126,
-            "14": 8.08764,
-            "15": 8.19553,
-            "16": 8.22117,
-            "17": 8.14088,
-            "18": 7.83923,
-            "19": 8.23508,
-            "20": 7.95432,
-            "21": 7.62712,
-            "22": 7.60353,
-            "23": 7.48451,
-            "24": 7.46602,
-            "25": 7.70409,
-            "26": 7.10906,
-            "27": 7.6443,
-            "28": 7.34234,
-            "29": 7.5189,
-            "30": 7.67585,
-            "31": 7.41996,
-            "32": 7.61477,
-            "33": 7.66691,
-            "34": 7.73349,
-            "35": 7.23566,
-            "36": 7.11008,
-            "37": 7.44958,
-            "38": 7.21125,
-            "39": 7.57837,
-            "40": 7.56809,
-            "41": 7.51465,
-            "42": 7.27318,
-            "43": 7.25818,
-            "44": 7.44014,
-            "45": 7.21234,
-            "46": 6.92392,
-            "47": 7.32631,
-            "48": 7.17263,
-            "49": 7.62149,
-            "50": 7.06495
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 38802612.0,
-            "2": 38543656.0,
-            "3": 38739356.0,
-            "4": 273649600.0,
-            "5": 252887040.0,
-            "6": 255692384.0,
-            "7": 598483264.0,
-            "8": 787737984.0,
-            "9": 696133120.0,
-            "10": 505146400.0,
-            "11": 715718272.0,
-            "12": 872566848.0,
-            "13": 947497344.0,
-            "14": 1076390912.0,
-            "15": 853234624.0,
-            "16": 1045488064.0,
-            "17": 831385088.0,
-            "18": 969961792.0,
-            "19": 973165952.0,
-            "20": 951461376.0,
-            "21": 901033280.0,
-            "22": 897373440.0,
-            "23": 901066560.0,
-            "24": 710038592.0,
-            "25": 912381952.0,
-            "26": 866199936.0,
-            "27": 876109696.0,
-            "28": 912952192.0,
-            "29": 972247104.0,
-            "30": 951806720.0,
-            "31": 960493312.0,
-            "32": 910169408.0,
-            "33": 853655744.0,
-            "34": 834879424.0,
-            "35": 835171520.0,
-            "36": 797371392.0,
-            "37": 777009408.0,
-            "38": 598948480.0,
-            "39": 664393152.0,
-            "40": 767727104.0,
-            "41": 771335168.0,
-            "42": 752681344.0,
-            "43": 715187840.0,
-            "44": 714677440.0,
-            "45": 687806016.0,
-            "46": 501256736.0,
-            "47": 629706368.0,
-            "48": 651967104.0,
-            "49": 629336832.0,
-            "50": 589310016.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 6637272576.0,
-            "2": 6637274624.0,
-            "3": 6637274624.0,
-            "4": 6637274624.0,
-            "5": 6637274624.0,
-            "6": 6637274624.0,
-            "7": 6637274624.0,
-            "8": 6637274624.0,
-            "9": 6637274624.0,
-            "10": 6637274624.0,
-            "11": 6637274624.0,
-            "12": 6637274624.0,
-            "13": 6637274624.0,
-            "14": 6637274624.0,
-            "15": 6637274624.0,
-            "16": 6637274624.0,
-            "17": 6637274624.0,
-            "18": 6637274624.0,
-            "19": 6637274624.0,
-            "20": 6637274624.0,
-            "21": 6637274624.0,
-            "22": 6637274624.0,
-            "23": 6637274624.0,
-            "24": 6637274624.0,
-            "25": 6637274624.0,
-            "26": 6637274624.0,
-            "27": 6637274624.0,
-            "28": 6637274624.0,
-            "29": 6637274624.0,
-            "30": 6637274624.0,
-            "31": 6637274624.0,
-            "32": 6637274624.0,
-            "33": 6637274624.0,
-            "34": 6637274624.0,
-            "35": 6637274624.0,
-            "36": 6637274624.0,
-            "37": 6637274624.0,
-            "38": 6637274624.0,
-            "39": 6637274624.0,
-            "40": 6637274624.0,
-            "41": 6637274624.0,
-            "42": 6637274624.0,
-            "43": 6637274624.0,
-            "44": 6637274624.0,
-            "45": 6637274624.0,
-            "46": 6637274624.0,
-            "47": 6637274624.0,
-            "48": 6637274624.0,
-            "49": 6637274624.0,
-            "50": 6637274624.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 55056003072.0,
-            "2": 57810763776.0,
-            "3": 57920647168.0,
-            "4": 57920647168.0,
-            "5": 57920647168.0,
-            "6": 57920647168.0,
-            "7": 57920647168.0,
-            "8": 57920647168.0,
-            "9": 57920647168.0,
-            "10": 57920647168.0,
-            "11": 57920647168.0,
-            "12": 57920647168.0,
-            "13": 57920647168.0,
-            "14": 57920647168.0,
-            "15": 57920647168.0,
-            "16": 57920647168.0,
-            "17": 57920647168.0,
-            "18": 57920647168.0,
-            "19": 57920647168.0,
-            "20": 57920647168.0,
-            "21": 57920647168.0,
-            "22": 57920647168.0,
-            "23": 57920647168.0,
-            "24": 57920647168.0,
-            "25": 57920647168.0,
-            "26": 57920647168.0,
-            "27": 57920647168.0,
-            "28": 57920647168.0,
-            "29": 57920647168.0,
-            "30": 57920647168.0,
-            "31": 57920647168.0,
-            "32": 57920647168.0,
-            "33": 57920647168.0,
-            "34": 57920647168.0,
-            "35": 57920647168.0,
-            "36": 57920647168.0,
-            "37": 57920647168.0,
-            "38": 57920647168.0,
-            "39": 57920647168.0,
-            "40": 57920647168.0,
-            "41": 57920647168.0,
-            "42": 57920647168.0,
-            "43": 57920647168.0,
-            "44": 57920647168.0,
-            "45": 57920647168.0,
-            "46": 57921617920.0,
-            "47": 57921617920.0,
-            "48": 57921617920.0,
-            "49": 57921617920.0,
-            "50": 57921617920.0
-        }
-    },
-    "mtp_1 loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 11.07648,
-            "2": 11.07404,
-            "3": 10.53854,
-            "4": 10.09813,
-            "5": 9.81166,
-            "6": 10.09741,
-            "7": 9.79481,
-            "8": 9.0642,
-            "9": 8.86016,
-            "10": 9.34039,
-            "11": 8.51318,
-            "12": 8.59468,
-            "13": 8.52921,
-            "14": 7.95758,
-            "15": 8.06962,
-            "16": 8.11803,
-            "17": 8.06994,
-            "18": 7.80584,
-            "19": 8.19191,
-            "20": 7.89063,
-            "21": 7.5707,
-            "22": 7.55089,
-            "23": 7.41603,
-            "24": 7.42509,
-            "25": 7.65319,
-            "26": 7.05604,
-            "27": 7.59797,
-            "28": 7.29977,
-            "29": 7.47274,
-            "30": 7.61938,
-            "31": 7.35308,
-            "32": 7.53089,
-            "33": 7.59296,
-            "34": 7.66429,
-            "35": 7.17544,
-            "36": 7.04045,
-            "37": 7.37008,
-            "38": 7.14419,
-            "39": 7.51022,
-            "40": 7.48928,
-            "41": 7.43717,
-            "42": 7.19432,
-            "43": 7.17612,
-            "44": 7.35764,
-            "45": 7.13893,
-            "46": 6.84092,
-            "47": 7.25121,
-            "48": 7.09497,
-            "49": 7.52321,
-            "50": 6.98958
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 85.33545,
-            "2": 1.29783,
-            "3": 1.20289,
-            "4": 2.24602,
-            "5": 2.32616,
-            "6": 1.7486,
-            "7": 2.17383,
-            "8": 1.65491,
-            "9": 1.70888,
-            "10": 1.05169,
-            "11": 1.03097,
-            "12": 1.02332,
-            "13": 1.0314,
-            "14": 1.03723,
-            "15": 1.02333,
-            "16": 1.04585,
-            "17": 1.05489,
-            "18": 1.05149,
-            "19": 1.04366,
-            "20": 1.04123,
-            "21": 1.04123,
-            "22": 1.05131,
-            "23": 1.04784,
-            "24": 1.05156,
-            "25": 1.05897,
-            "26": 1.05841,
-            "27": 1.03255,
-            "28": 1.03763,
-            "29": 1.0362,
-            "30": 1.04244,
-            "31": 1.03393,
-            "32": 1.04177,
-            "33": 1.06033,
-            "34": 1.06132,
-            "35": 1.06434,
-            "36": 1.05438,
-            "37": 1.64369,
-            "38": 1.06374,
-            "39": 1.07491,
-            "40": 1.07295,
-            "41": 1.06978,
-            "42": 1.06102,
-            "43": 1.05808,
-            "44": 1.06997,
-            "45": 1.06476,
-            "46": 1.06795,
-            "47": 1.06701,
-            "48": 1.06649,
-            "49": 1.06638,
-            "50": 1.06224
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/golden_values_dev_dgx_h100.json
index ef8ee741272..b7df693e1f7 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/golden_values_dev_dgx_h100.json
@@ -2,109 +2,343 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 11.0475,
-            "5": 9.43078,
-            "10": 8.89238,
-            "15": 7.93732,
-            "20": 7.77942,
-            "25": 7.61408,
-            "30": 7.57234,
-            "35": 7.15189,
-            "40": 7.48085,
-            "45": 7.12056,
-            "50": 6.96054
+            "1": 11.04577,
+            "2": 11.03578,
+            "3": 9.5968,
+            "4": 9.26068,
+            "5": 9.09365,
+            "6": 8.97825,
+            "7": 9.18096,
+            "8": 8.70673,
+            "9": 8.55632,
+            "10": 8.85377,
+            "11": 8.31245,
+            "12": 8.35862,
+            "13": 8.28114,
+            "14": 7.73951,
+            "15": 7.91242,
+            "16": 7.94944,
+            "17": 7.89918,
+            "18": 7.64375,
+            "19": 8.02647,
+            "20": 7.73813,
+            "21": 7.44557,
+            "22": 7.43367,
+            "23": 7.31291,
+            "24": 7.30268,
+            "25": 7.57549,
+            "26": 6.98093,
+            "27": 7.50005,
+            "28": 7.241,
+            "29": 7.40369,
+            "30": 7.51839,
+            "31": 7.29514,
+            "32": 7.47818,
+            "33": 7.52568,
+            "34": 7.57647,
+            "35": 7.12091,
+            "36": 6.97439,
+            "37": 7.30929,
+            "38": 7.09349,
+            "39": 7.43659,
+            "40": 7.45122,
+            "41": 7.37904,
+            "42": 7.14627,
+            "43": 7.13408,
+            "44": 7.30886,
+            "45": 7.08523,
+            "46": 6.8067,
+            "47": 7.21159,
+            "48": 7.0245,
+            "49": 7.50096,
+            "50": 6.92687
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 38802620.0,
-            "5": 243556240.0,
-            "10": 716187584.0,
-            "15": 614358336.0,
-            "20": 677963584.0,
-            "25": 736321856.0,
-            "30": 505223648.0,
-            "35": 548946176.0,
-            "40": 412329664.0,
-            "45": 376634624.0,
-            "50": 205546672.0
+            "1": 38802664.0,
+            "2": 38543552.0,
+            "3": 38740472.0,
+            "4": 273766176.0,
+            "5": 196515488.0,
+            "6": 432153600.0,
+            "7": 715038528.0,
+            "8": 797328960.0,
+            "9": 696279488.0,
+            "10": 668928192.0,
+            "11": 583742720.0,
+            "12": 595799040.0,
+            "13": 695916288.0,
+            "14": 617245056.0,
+            "15": 629936832.0,
+            "16": 639940800.0,
+            "17": 642766016.0,
+            "18": 664898112.0,
+            "19": 671247104.0,
+            "20": 602545216.0,
+            "21": 542607872.0,
+            "22": 551419008.0,
+            "23": 533094816.0,
+            "24": 527647904.0,
+            "25": 570717824.0,
+            "26": 510874176.0,
+            "27": 498748096.0,
+            "28": 510353632.0,
+            "29": 506802112.0,
+            "30": 486336928.0,
+            "31": 410143360.0,
+            "32": 372280800.0,
+            "33": 369351776.0,
+            "34": 353666688.0,
+            "35": 344549376.0,
+            "36": 278456576.0,
+            "37": 289517152.0,
+            "38": 274950816.0,
+            "39": 242921776.0,
+            "40": 223597264.0,
+            "41": 186386944.0,
+            "42": 180387488.0,
+            "43": 224573440.0,
+            "44": 217714800.0,
+            "45": 143723568.0,
+            "46": 161525888.0,
+            "47": 120124336.0,
+            "48": 183368272.0,
+            "49": 154411968.0,
+            "50": 167778288.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 7321331200.0,
-            "5": 7321333248.0,
-            "10": 7321333248.0,
-            "15": 7321333248.0,
-            "20": 7321333248.0,
-            "25": 7321333248.0,
-            "30": 7321333248.0,
-            "35": 7321333248.0,
-            "40": 7321333248.0,
-            "45": 7321333248.0,
-            "50": 7321333248.0
+            "1": 7321336320.0,
+            "2": 7321338368.0,
+            "3": 7321338368.0,
+            "4": 7321338368.0,
+            "5": 7321338368.0,
+            "6": 7321338368.0,
+            "7": 7321338368.0,
+            "8": 7321338368.0,
+            "9": 7321338368.0,
+            "10": 7321338368.0,
+            "11": 7321338368.0,
+            "12": 7321338368.0,
+            "13": 7321338368.0,
+            "14": 7321338368.0,
+            "15": 7321338368.0,
+            "16": 7321338368.0,
+            "17": 7321338368.0,
+            "18": 7321338368.0,
+            "19": 7321338368.0,
+            "20": 7321338368.0,
+            "21": 7321338368.0,
+            "22": 7321338368.0,
+            "23": 7321338368.0,
+            "24": 7321338368.0,
+            "25": 7321338368.0,
+            "26": 7321338368.0,
+            "27": 7321338368.0,
+            "28": 7321338368.0,
+            "29": 7321338368.0,
+            "30": 7321338368.0,
+            "31": 7321338368.0,
+            "32": 7321338368.0,
+            "33": 7321338368.0,
+            "34": 7321338368.0,
+            "35": 7321338368.0,
+            "36": 7321338368.0,
+            "37": 7321338368.0,
+            "38": 7321338368.0,
+            "39": 7321338368.0,
+            "40": 7321338368.0,
+            "41": 7321338368.0,
+            "42": 7321338368.0,
+            "43": 7321338368.0,
+            "44": 7321338368.0,
+            "45": 7321338368.0,
+            "46": 7321338368.0,
+            "47": 7321338368.0,
+            "48": 7321338368.0,
+            "49": 7321338368.0,
+            "50": 7321338368.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 53176152064.0,
-            "5": 55926337536.0,
-            "10": 55926337536.0,
-            "15": 55926337536.0,
-            "20": 55926337536.0,
-            "25": 56534257664.0,
-            "30": 57393635328.0,
-            "35": 57393635328.0,
-            "40": 57578217472.0,
-            "45": 57578217472.0,
-            "50": 57578217472.0
+            "1": 54402162688.0,
+            "2": 57150373888.0,
+            "3": 57150373888.0,
+            "4": 57150373888.0,
+            "5": 57150373888.0,
+            "6": 57150373888.0,
+            "7": 57150373888.0,
+            "8": 57150373888.0,
+            "9": 57150373888.0,
+            "10": 57150373888.0,
+            "11": 57150373888.0,
+            "12": 57150373888.0,
+            "13": 57150373888.0,
+            "14": 57150373888.0,
+            "15": 57150373888.0,
+            "16": 57150373888.0,
+            "17": 57150373888.0,
+            "18": 57150373888.0,
+            "19": 57150373888.0,
+            "20": 57150373888.0,
+            "21": 57150373888.0,
+            "22": 57150373888.0,
+            "23": 57150373888.0,
+            "24": 57150373888.0,
+            "25": 57150373888.0,
+            "26": 57150373888.0,
+            "27": 57150373888.0,
+            "28": 57150373888.0,
+            "29": 57150373888.0,
+            "30": 57150373888.0,
+            "31": 57150373888.0,
+            "32": 57150373888.0,
+            "33": 57150373888.0,
+            "34": 57150373888.0,
+            "35": 57152438272.0,
+            "36": 57344114688.0,
+            "37": 57344114688.0,
+            "38": 57449279488.0,
+            "39": 57449279488.0,
+            "40": 57449279488.0,
+            "41": 57449279488.0,
+            "42": 57449279488.0,
+            "43": 57449279488.0,
+            "44": 57449279488.0,
+            "45": 57470353408.0,
+            "46": 57470353408.0,
+            "47": 57470353408.0,
+            "48": 57470353408.0,
+            "49": 57470353408.0,
+            "50": 57470353408.0
         }
     },
     "mtp_1 loss": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 11.0776,
-            "5": 9.87653,
-            "10": 9.02332,
-            "15": 7.91471,
-            "20": 7.75886,
-            "25": 7.56825,
-            "30": 7.53841,
-            "35": 7.12192,
-            "40": 7.44579,
-            "45": 7.09307,
-            "50": 6.94739
+            "1": 11.07769,
+            "2": 11.07625,
+            "3": 10.52909,
+            "4": 10.08687,
+            "5": 9.82013,
+            "6": 9.48246,
+            "7": 9.54169,
+            "8": 8.83661,
+            "9": 8.64933,
+            "10": 8.95821,
+            "11": 8.32934,
+            "12": 8.36033,
+            "13": 8.26936,
+            "14": 7.73441,
+            "15": 7.87122,
+            "16": 7.9153,
+            "17": 7.86923,
+            "18": 7.61191,
+            "19": 7.99919,
+            "20": 7.72174,
+            "21": 7.4147,
+            "22": 7.40336,
+            "23": 7.27676,
+            "24": 7.28557,
+            "25": 7.53782,
+            "26": 6.94933,
+            "27": 7.48504,
+            "28": 7.20219,
+            "29": 7.38696,
+            "30": 7.51152,
+            "31": 7.26613,
+            "32": 7.45631,
+            "33": 7.51482,
+            "34": 7.57527,
+            "35": 7.10374,
+            "36": 6.97224,
+            "37": 7.31053,
+            "38": 7.08607,
+            "39": 7.44371,
+            "40": 7.43612,
+            "41": 7.37848,
+            "42": 7.13561,
+            "43": 7.11558,
+            "44": 7.30254,
+            "45": 7.08147,
+            "46": 6.78911,
+            "47": 7.21791,
+            "48": 7.03066,
+            "49": 7.46668,
+            "50": 6.93251
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 51.33936,
-            "5": 1.24167,
-            "10": 1.14623,
-            "15": 1.16973,
-            "20": 1.23165,
-            "25": 1.13719,
-            "30": 1.15864,
-            "35": 1.13509,
-            "40": 1.14729,
-            "45": 1.14136,
-            "50": 1.13625
+            "1": 95.02242,
+            "2": 1.29728,
+            "3": 1.24413,
+            "4": 1.67309,
+            "5": 1.12527,
+            "6": 1.39226,
+            "7": 1.33351,
+            "8": 1.19614,
+            "9": 1.10737,
+            "10": 1.09796,
+            "11": 1.10736,
+            "12": 1.10105,
+            "13": 1.10552,
+            "14": 1.11007,
+            "15": 1.09853,
+            "16": 1.10142,
+            "17": 1.09718,
+            "18": 1.10103,
+            "19": 1.10339,
+            "20": 1.1069,
+            "21": 1.10541,
+            "22": 1.10374,
+            "23": 1.1028,
+            "24": 1.1,
+            "25": 1.09935,
+            "26": 1.09318,
+            "27": 1.09779,
+            "28": 1.09457,
+            "29": 1.09,
+            "30": 1.09267,
+            "31": 1.08899,
+            "32": 1.09268,
+            "33": 1.08757,
+            "34": 1.08991,
+            "35": 1.09705,
+            "36": 1.09429,
+            "37": 1.09459,
+            "38": 1.08857,
+            "39": 1.09547,
+            "40": 1.09224,
+            "41": 1.089,
+            "42": 1.08879,
+            "43": 1.0834,
+            "44": 1.08212,
+            "45": 1.08363,
+            "46": 1.08596,
+            "47": 1.07798,
+            "48": 1.07329,
+            "49": 1.07678,
+            "50": 1.07483
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index adec1b3bd58..00000000000
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,344 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 11.04577,
-            "2": 11.03578,
-            "3": 9.5968,
-            "4": 9.26068,
-            "5": 9.09365,
-            "6": 8.97825,
-            "7": 9.18096,
-            "8": 8.70673,
-            "9": 8.55632,
-            "10": 8.85377,
-            "11": 8.31245,
-            "12": 8.35862,
-            "13": 8.28114,
-            "14": 7.73951,
-            "15": 7.91242,
-            "16": 7.94944,
-            "17": 7.89918,
-            "18": 7.64375,
-            "19": 8.02647,
-            "20": 7.73813,
-            "21": 7.44557,
-            "22": 7.43367,
-            "23": 7.31291,
-            "24": 7.30268,
-            "25": 7.57549,
-            "26": 6.98093,
-            "27": 7.50005,
-            "28": 7.241,
-            "29": 7.40369,
-            "30": 7.51839,
-            "31": 7.29514,
-            "32": 7.47818,
-            "33": 7.52568,
-            "34": 7.57647,
-            "35": 7.12091,
-            "36": 6.97439,
-            "37": 7.30929,
-            "38": 7.09349,
-            "39": 7.43659,
-            "40": 7.45122,
-            "41": 7.37904,
-            "42": 7.14627,
-            "43": 7.13408,
-            "44": 7.30886,
-            "45": 7.08523,
-            "46": 6.8067,
-            "47": 7.21159,
-            "48": 7.0245,
-            "49": 7.50096,
-            "50": 6.92687
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 38802664.0,
-            "2": 38543552.0,
-            "3": 38740472.0,
-            "4": 273766176.0,
-            "5": 196515488.0,
-            "6": 432153600.0,
-            "7": 715038528.0,
-            "8": 797328960.0,
-            "9": 696279488.0,
-            "10": 668928192.0,
-            "11": 583742720.0,
-            "12": 595799040.0,
-            "13": 695916288.0,
-            "14": 617245056.0,
-            "15": 629936832.0,
-            "16": 639940800.0,
-            "17": 642766016.0,
-            "18": 664898112.0,
-            "19": 671247104.0,
-            "20": 602545216.0,
-            "21": 542607872.0,
-            "22": 551419008.0,
-            "23": 533094816.0,
-            "24": 527647904.0,
-            "25": 570717824.0,
-            "26": 510874176.0,
-            "27": 498748096.0,
-            "28": 510353632.0,
-            "29": 506802112.0,
-            "30": 486336928.0,
-            "31": 410143360.0,
-            "32": 372280800.0,
-            "33": 369351776.0,
-            "34": 353666688.0,
-            "35": 344549376.0,
-            "36": 278456576.0,
-            "37": 289517152.0,
-            "38": 274950816.0,
-            "39": 242921776.0,
-            "40": 223597264.0,
-            "41": 186386944.0,
-            "42": 180387488.0,
-            "43": 224573440.0,
-            "44": 217714800.0,
-            "45": 143723568.0,
-            "46": 161525888.0,
-            "47": 120124336.0,
-            "48": 183368272.0,
-            "49": 154411968.0,
-            "50": 167778288.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 7321336320.0,
-            "2": 7321338368.0,
-            "3": 7321338368.0,
-            "4": 7321338368.0,
-            "5": 7321338368.0,
-            "6": 7321338368.0,
-            "7": 7321338368.0,
-            "8": 7321338368.0,
-            "9": 7321338368.0,
-            "10": 7321338368.0,
-            "11": 7321338368.0,
-            "12": 7321338368.0,
-            "13": 7321338368.0,
-            "14": 7321338368.0,
-            "15": 7321338368.0,
-            "16": 7321338368.0,
-            "17": 7321338368.0,
-            "18": 7321338368.0,
-            "19": 7321338368.0,
-            "20": 7321338368.0,
-            "21": 7321338368.0,
-            "22": 7321338368.0,
-            "23": 7321338368.0,
-            "24": 7321338368.0,
-            "25": 7321338368.0,
-            "26": 7321338368.0,
-            "27": 7321338368.0,
-            "28": 7321338368.0,
-            "29": 7321338368.0,
-            "30": 7321338368.0,
-            "31": 7321338368.0,
-            "32": 7321338368.0,
-            "33": 7321338368.0,
-            "34": 7321338368.0,
-            "35": 7321338368.0,
-            "36": 7321338368.0,
-            "37": 7321338368.0,
-            "38": 7321338368.0,
-            "39": 7321338368.0,
-            "40": 7321338368.0,
-            "41": 7321338368.0,
-            "42": 7321338368.0,
-            "43": 7321338368.0,
-            "44": 7321338368.0,
-            "45": 7321338368.0,
-            "46": 7321338368.0,
-            "47": 7321338368.0,
-            "48": 7321338368.0,
-            "49": 7321338368.0,
-            "50": 7321338368.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 54402162688.0,
-            "2": 57150373888.0,
-            "3": 57150373888.0,
-            "4": 57150373888.0,
-            "5": 57150373888.0,
-            "6": 57150373888.0,
-            "7": 57150373888.0,
-            "8": 57150373888.0,
-            "9": 57150373888.0,
-            "10": 57150373888.0,
-            "11": 57150373888.0,
-            "12": 57150373888.0,
-            "13": 57150373888.0,
-            "14": 57150373888.0,
-            "15": 57150373888.0,
-            "16": 57150373888.0,
-            "17": 57150373888.0,
-            "18": 57150373888.0,
-            "19": 57150373888.0,
-            "20": 57150373888.0,
-            "21": 57150373888.0,
-            "22": 57150373888.0,
-            "23": 57150373888.0,
-            "24": 57150373888.0,
-            "25": 57150373888.0,
-            "26": 57150373888.0,
-            "27": 57150373888.0,
-            "28": 57150373888.0,
-            "29": 57150373888.0,
-            "30": 57150373888.0,
-            "31": 57150373888.0,
-            "32": 57150373888.0,
-            "33": 57150373888.0,
-            "34": 57150373888.0,
-            "35": 57152438272.0,
-            "36": 57344114688.0,
-            "37": 57344114688.0,
-            "38": 57449279488.0,
-            "39": 57449279488.0,
-            "40": 57449279488.0,
-            "41": 57449279488.0,
-            "42": 57449279488.0,
-            "43": 57449279488.0,
-            "44": 57449279488.0,
-            "45": 57470353408.0,
-            "46": 57470353408.0,
-            "47": 57470353408.0,
-            "48": 57470353408.0,
-            "49": 57470353408.0,
-            "50": 57470353408.0
-        }
-    },
-    "mtp_1 loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 11.07769,
-            "2": 11.07625,
-            "3": 10.52909,
-            "4": 10.08687,
-            "5": 9.82013,
-            "6": 9.48246,
-            "7": 9.54169,
-            "8": 8.83661,
-            "9": 8.64933,
-            "10": 8.95821,
-            "11": 8.32934,
-            "12": 8.36033,
-            "13": 8.26936,
-            "14": 7.73441,
-            "15": 7.87122,
-            "16": 7.9153,
-            "17": 7.86923,
-            "18": 7.61191,
-            "19": 7.99919,
-            "20": 7.72174,
-            "21": 7.4147,
-            "22": 7.40336,
-            "23": 7.27676,
-            "24": 7.28557,
-            "25": 7.53782,
-            "26": 6.94933,
-            "27": 7.48504,
-            "28": 7.20219,
-            "29": 7.38696,
-            "30": 7.51152,
-            "31": 7.26613,
-            "32": 7.45631,
-            "33": 7.51482,
-            "34": 7.57527,
-            "35": 7.10374,
-            "36": 6.97224,
-            "37": 7.31053,
-            "38": 7.08607,
-            "39": 7.44371,
-            "40": 7.43612,
-            "41": 7.37848,
-            "42": 7.13561,
-            "43": 7.11558,
-            "44": 7.30254,
-            "45": 7.08147,
-            "46": 6.78911,
-            "47": 7.21791,
-            "48": 7.03066,
-            "49": 7.46668,
-            "50": 6.93251
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 92.7075,
-            "2": 1.62502,
-            "3": 1.31213,
-            "4": 1.71707,
-            "5": 1.11852,
-            "6": 1.39151,
-            "7": 1.37049,
-            "8": 1.22293,
-            "9": 1.10694,
-            "10": 1.11053,
-            "11": 1.10169,
-            "12": 1.14642,
-            "13": 1.11639,
-            "14": 1.12927,
-            "15": 1.12868,
-            "16": 1.11899,
-            "17": 1.10545,
-            "18": 1.11542,
-            "19": 1.11417,
-            "20": 1.11349,
-            "21": 1.11071,
-            "22": 1.11032,
-            "23": 1.11836,
-            "24": 1.11402,
-            "25": 1.11546,
-            "26": 1.10471,
-            "27": 1.10368,
-            "28": 1.09929,
-            "29": 1.10324,
-            "30": 1.10507,
-            "31": 1.10255,
-            "32": 1.10727,
-            "33": 1.1043,
-            "34": 1.10476,
-            "35": 1.10252,
-            "36": 1.10053,
-            "37": 1.1068,
-            "38": 1.09229,
-            "39": 1.08165,
-            "40": 1.07889,
-            "41": 1.07583,
-            "42": 1.07174,
-            "43": 1.07738,
-            "44": 1.08604,
-            "45": 1.09529,
-            "46": 1.08309,
-            "47": 1.08896,
-            "48": 1.08318,
-            "49": 1.08597,
-            "50": 1.08649
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index b7df693e1f7..00000000000
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,344 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 11.04577,
-            "2": 11.03578,
-            "3": 9.5968,
-            "4": 9.26068,
-            "5": 9.09365,
-            "6": 8.97825,
-            "7": 9.18096,
-            "8": 8.70673,
-            "9": 8.55632,
-            "10": 8.85377,
-            "11": 8.31245,
-            "12": 8.35862,
-            "13": 8.28114,
-            "14": 7.73951,
-            "15": 7.91242,
-            "16": 7.94944,
-            "17": 7.89918,
-            "18": 7.64375,
-            "19": 8.02647,
-            "20": 7.73813,
-            "21": 7.44557,
-            "22": 7.43367,
-            "23": 7.31291,
-            "24": 7.30268,
-            "25": 7.57549,
-            "26": 6.98093,
-            "27": 7.50005,
-            "28": 7.241,
-            "29": 7.40369,
-            "30": 7.51839,
-            "31": 7.29514,
-            "32": 7.47818,
-            "33": 7.52568,
-            "34": 7.57647,
-            "35": 7.12091,
-            "36": 6.97439,
-            "37": 7.30929,
-            "38": 7.09349,
-            "39": 7.43659,
-            "40": 7.45122,
-            "41": 7.37904,
-            "42": 7.14627,
-            "43": 7.13408,
-            "44": 7.30886,
-            "45": 7.08523,
-            "46": 6.8067,
-            "47": 7.21159,
-            "48": 7.0245,
-            "49": 7.50096,
-            "50": 6.92687
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 38802664.0,
-            "2": 38543552.0,
-            "3": 38740472.0,
-            "4": 273766176.0,
-            "5": 196515488.0,
-            "6": 432153600.0,
-            "7": 715038528.0,
-            "8": 797328960.0,
-            "9": 696279488.0,
-            "10": 668928192.0,
-            "11": 583742720.0,
-            "12": 595799040.0,
-            "13": 695916288.0,
-            "14": 617245056.0,
-            "15": 629936832.0,
-            "16": 639940800.0,
-            "17": 642766016.0,
-            "18": 664898112.0,
-            "19": 671247104.0,
-            "20": 602545216.0,
-            "21": 542607872.0,
-            "22": 551419008.0,
-            "23": 533094816.0,
-            "24": 527647904.0,
-            "25": 570717824.0,
-            "26": 510874176.0,
-            "27": 498748096.0,
-            "28": 510353632.0,
-            "29": 506802112.0,
-            "30": 486336928.0,
-            "31": 410143360.0,
-            "32": 372280800.0,
-            "33": 369351776.0,
-            "34": 353666688.0,
-            "35": 344549376.0,
-            "36": 278456576.0,
-            "37": 289517152.0,
-            "38": 274950816.0,
-            "39": 242921776.0,
-            "40": 223597264.0,
-            "41": 186386944.0,
-            "42": 180387488.0,
-            "43": 224573440.0,
-            "44": 217714800.0,
-            "45": 143723568.0,
-            "46": 161525888.0,
-            "47": 120124336.0,
-            "48": 183368272.0,
-            "49": 154411968.0,
-            "50": 167778288.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 7321336320.0,
-            "2": 7321338368.0,
-            "3": 7321338368.0,
-            "4": 7321338368.0,
-            "5": 7321338368.0,
-            "6": 7321338368.0,
-            "7": 7321338368.0,
-            "8": 7321338368.0,
-            "9": 7321338368.0,
-            "10": 7321338368.0,
-            "11": 7321338368.0,
-            "12": 7321338368.0,
-            "13": 7321338368.0,
-            "14": 7321338368.0,
-            "15": 7321338368.0,
-            "16": 7321338368.0,
-            "17": 7321338368.0,
-            "18": 7321338368.0,
-            "19": 7321338368.0,
-            "20": 7321338368.0,
-            "21": 7321338368.0,
-            "22": 7321338368.0,
-            "23": 7321338368.0,
-            "24": 7321338368.0,
-            "25": 7321338368.0,
-            "26": 7321338368.0,
-            "27": 7321338368.0,
-            "28": 7321338368.0,
-            "29": 7321338368.0,
-            "30": 7321338368.0,
-            "31": 7321338368.0,
-            "32": 7321338368.0,
-            "33": 7321338368.0,
-            "34": 7321338368.0,
-            "35": 7321338368.0,
-            "36": 7321338368.0,
-            "37": 7321338368.0,
-            "38": 7321338368.0,
-            "39": 7321338368.0,
-            "40": 7321338368.0,
-            "41": 7321338368.0,
-            "42": 7321338368.0,
-            "43": 7321338368.0,
-            "44": 7321338368.0,
-            "45": 7321338368.0,
-            "46": 7321338368.0,
-            "47": 7321338368.0,
-            "48": 7321338368.0,
-            "49": 7321338368.0,
-            "50": 7321338368.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 54402162688.0,
-            "2": 57150373888.0,
-            "3": 57150373888.0,
-            "4": 57150373888.0,
-            "5": 57150373888.0,
-            "6": 57150373888.0,
-            "7": 57150373888.0,
-            "8": 57150373888.0,
-            "9": 57150373888.0,
-            "10": 57150373888.0,
-            "11": 57150373888.0,
-            "12": 57150373888.0,
-            "13": 57150373888.0,
-            "14": 57150373888.0,
-            "15": 57150373888.0,
-            "16": 57150373888.0,
-            "17": 57150373888.0,
-            "18": 57150373888.0,
-            "19": 57150373888.0,
-            "20": 57150373888.0,
-            "21": 57150373888.0,
-            "22": 57150373888.0,
-            "23": 57150373888.0,
-            "24": 57150373888.0,
-            "25": 57150373888.0,
-            "26": 57150373888.0,
-            "27": 57150373888.0,
-            "28": 57150373888.0,
-            "29": 57150373888.0,
-            "30": 57150373888.0,
-            "31": 57150373888.0,
-            "32": 57150373888.0,
-            "33": 57150373888.0,
-            "34": 57150373888.0,
-            "35": 57152438272.0,
-            "36": 57344114688.0,
-            "37": 57344114688.0,
-            "38": 57449279488.0,
-            "39": 57449279488.0,
-            "40": 57449279488.0,
-            "41": 57449279488.0,
-            "42": 57449279488.0,
-            "43": 57449279488.0,
-            "44": 57449279488.0,
-            "45": 57470353408.0,
-            "46": 57470353408.0,
-            "47": 57470353408.0,
-            "48": 57470353408.0,
-            "49": 57470353408.0,
-            "50": 57470353408.0
-        }
-    },
-    "mtp_1 loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 11.07769,
-            "2": 11.07625,
-            "3": 10.52909,
-            "4": 10.08687,
-            "5": 9.82013,
-            "6": 9.48246,
-            "7": 9.54169,
-            "8": 8.83661,
-            "9": 8.64933,
-            "10": 8.95821,
-            "11": 8.32934,
-            "12": 8.36033,
-            "13": 8.26936,
-            "14": 7.73441,
-            "15": 7.87122,
-            "16": 7.9153,
-            "17": 7.86923,
-            "18": 7.61191,
-            "19": 7.99919,
-            "20": 7.72174,
-            "21": 7.4147,
-            "22": 7.40336,
-            "23": 7.27676,
-            "24": 7.28557,
-            "25": 7.53782,
-            "26": 6.94933,
-            "27": 7.48504,
-            "28": 7.20219,
-            "29": 7.38696,
-            "30": 7.51152,
-            "31": 7.26613,
-            "32": 7.45631,
-            "33": 7.51482,
-            "34": 7.57527,
-            "35": 7.10374,
-            "36": 6.97224,
-            "37": 7.31053,
-            "38": 7.08607,
-            "39": 7.44371,
-            "40": 7.43612,
-            "41": 7.37848,
-            "42": 7.13561,
-            "43": 7.11558,
-            "44": 7.30254,
-            "45": 7.08147,
-            "46": 6.78911,
-            "47": 7.21791,
-            "48": 7.03066,
-            "49": 7.46668,
-            "50": 6.93251
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 95.02242,
-            "2": 1.29728,
-            "3": 1.24413,
-            "4": 1.67309,
-            "5": 1.12527,
-            "6": 1.39226,
-            "7": 1.33351,
-            "8": 1.19614,
-            "9": 1.10737,
-            "10": 1.09796,
-            "11": 1.10736,
-            "12": 1.10105,
-            "13": 1.10552,
-            "14": 1.11007,
-            "15": 1.09853,
-            "16": 1.10142,
-            "17": 1.09718,
-            "18": 1.10103,
-            "19": 1.10339,
-            "20": 1.1069,
-            "21": 1.10541,
-            "22": 1.10374,
-            "23": 1.1028,
-            "24": 1.1,
-            "25": 1.09935,
-            "26": 1.09318,
-            "27": 1.09779,
-            "28": 1.09457,
-            "29": 1.09,
-            "30": 1.09267,
-            "31": 1.08899,
-            "32": 1.09268,
-            "33": 1.08757,
-            "34": 1.08991,
-            "35": 1.09705,
-            "36": 1.09429,
-            "37": 1.09459,
-            "38": 1.08857,
-            "39": 1.09547,
-            "40": 1.09224,
-            "41": 1.089,
-            "42": 1.08879,
-            "43": 1.0834,
-            "44": 1.08212,
-            "45": 1.08363,
-            "46": 1.08596,
-            "47": 1.07798,
-            "48": 1.07329,
-            "49": 1.07678,
-            "50": 1.07483
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgx_h100.json
index 8f816986adc..dc2c39d712d 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgx_h100.json
@@ -2,91 +2,286 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 10.95004,
-            "5": 9.93941,
-            "10": 9.79742,
-            "15": 9.02444,
-            "20": 8.8595,
-            "25": 8.64304,
-            "30": 8.67283,
-            "35": 8.07934,
-            "40": 8.35513,
-            "45": 8.10277,
-            "50": 7.81491
+            "1": 10.94971,
+            "2": 10.95174,
+            "3": 10.51547,
+            "4": 9.96574,
+            "5": 9.941,
+            "6": 9.67424,
+            "7": 10.20193,
+            "8": 9.50006,
+            "9": 9.54983,
+            "10": 9.79714,
+            "11": 9.30093,
+            "12": 9.40563,
+            "13": 9.39461,
+            "14": 8.84641,
+            "15": 9.02323,
+            "16": 9.07046,
+            "17": 9.04704,
+            "18": 8.75684,
+            "19": 9.18168,
+            "20": 8.86245,
+            "21": 8.53735,
+            "22": 8.55361,
+            "23": 8.42666,
+            "24": 8.37856,
+            "25": 8.64287,
+            "26": 7.9729,
+            "27": 8.56717,
+            "28": 8.19494,
+            "29": 8.39321,
+            "30": 8.67278,
+            "31": 8.2887,
+            "32": 8.43529,
+            "33": 8.5564,
+            "34": 8.65783,
+            "35": 8.07826,
+            "36": 7.94839,
+            "37": 8.29395,
+            "38": 7.9776,
+            "39": 8.39027,
+            "40": 8.35602,
+            "41": 8.31509,
+            "42": 8.06463,
+            "43": 8.03334,
+            "44": 8.24022,
+            "45": 8.10462,
+            "46": 7.61777,
+            "47": 8.15389,
+            "48": 8.0077,
+            "49": 8.38728,
+            "50": 7.81501
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 19403624.0,
-            "5": 148575568.0,
-            "10": 167684736.0,
-            "15": 214304608.0,
-            "20": 187955392.0,
-            "25": 277022272.0,
-            "30": 213302208.0,
-            "35": 191152368.0,
-            "40": 215603968.0,
-            "45": 181922816.0,
-            "50": 160979632.0
+            "1": 19403704.0,
+            "2": 19274202.0,
+            "3": 19372672.0,
+            "4": 84955472.0,
+            "5": 148573088.0,
+            "6": 140513744.0,
+            "7": 176606368.0,
+            "8": 198919440.0,
+            "9": 175143840.0,
+            "10": 164545552.0,
+            "11": 216370368.0,
+            "12": 201999712.0,
+            "13": 239390272.0,
+            "14": 230012880.0,
+            "15": 215921904.0,
+            "16": 211344080.0,
+            "17": 274153920.0,
+            "18": 173627616.0,
+            "19": 176950304.0,
+            "20": 194330304.0,
+            "21": 243134016.0,
+            "22": 234854608.0,
+            "23": 219609264.0,
+            "24": 205630080.0,
+            "25": 198436912.0,
+            "26": 293244384.0,
+            "27": 274552608.0,
+            "28": 277179296.0,
+            "29": 210959616.0,
+            "30": 233757584.0,
+            "31": 236548544.0,
+            "32": 264864608.0,
+            "33": 250754976.0,
+            "34": 258614240.0,
+            "35": 208476240.0,
+            "36": 241437056.0,
+            "37": 177817504.0,
+            "38": 227178000.0,
+            "39": 222169216.0,
+            "40": 214031296.0,
+            "41": 209523040.0,
+            "42": 212816672.0,
+            "43": 195600416.0,
+            "44": 154459088.0,
+            "45": 166289280.0,
+            "46": 116993536.0,
+            "47": 168587312.0,
+            "48": 162414240.0,
+            "49": 119666904.0,
+            "50": 171972272.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 4883602432.0,
-            "5": 4883779072.0,
-            "10": 4883418624.0,
-            "15": 4883738112.0,
-            "20": 4884269568.0,
-            "25": 4885501440.0,
-            "30": 4883240448.0,
-            "35": 4884654592.0,
-            "40": 4884234752.0,
-            "45": 4883198464.0,
-            "50": 4883341824.0
+            "1": 4880827392.0,
+            "2": 4880161280.0,
+            "3": 4879780352.0,
+            "4": 4881006080.0,
+            "5": 4881443328.0,
+            "6": 4880235008.0,
+            "7": 4878593536.0,
+            "8": 4880183808.0,
+            "9": 4878518784.0,
+            "10": 4880639488.0,
+            "11": 4878592512.0,
+            "12": 4879459840.0,
+            "13": 4879073792.0,
+            "14": 4881052160.0,
+            "15": 4878580224.0,
+            "16": 4878705152.0,
+            "17": 4880005632.0,
+            "18": 4880081408.0,
+            "19": 4879190528.0,
+            "20": 4879407616.0,
+            "21": 4878837248.0,
+            "22": 4878897664.0,
+            "23": 4878346752.0,
+            "24": 4880498176.0,
+            "25": 4880417280.0,
+            "26": 4878027264.0,
+            "27": 4878756352.0,
+            "28": 4880044544.0,
+            "29": 4879154688.0,
+            "30": 4879779328.0,
+            "31": 4881071616.0,
+            "32": 4879392256.0,
+            "33": 4879744512.0,
+            "34": 4878250496.0,
+            "35": 4878979584.0,
+            "36": 4880133632.0,
+            "37": 4880431616.0,
+            "38": 4878993920.0,
+            "39": 4878280192.0,
+            "40": 4879473152.0,
+            "41": 4880439808.0,
+            "42": 4879638016.0,
+            "43": 4879913472.0,
+            "44": 4879031808.0,
+            "45": 4879471104.0,
+            "46": 4878890496.0,
+            "47": 4879007232.0,
+            "48": 4879195648.0,
+            "49": 4879473152.0,
+            "50": 4878174720.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 41210470400.0,
-            "5": 41210470400.0,
-            "10": 41210470400.0,
-            "15": 41210470400.0,
-            "20": 41210470400.0,
-            "25": 41210470400.0,
-            "30": 41210470400.0,
-            "35": 41210470400.0,
-            "40": 41210470400.0,
-            "45": 41210470400.0,
-            "50": 41210470400.0
+            "1": 41208373248.0,
+            "2": 41208373248.0,
+            "3": 41208373248.0,
+            "4": 41208373248.0,
+            "5": 41208373248.0,
+            "6": 41208373248.0,
+            "7": 41208373248.0,
+            "8": 41208373248.0,
+            "9": 41208373248.0,
+            "10": 41208373248.0,
+            "11": 41208373248.0,
+            "12": 41208373248.0,
+            "13": 41208373248.0,
+            "14": 41208373248.0,
+            "15": 41208373248.0,
+            "16": 41208373248.0,
+            "17": 41208373248.0,
+            "18": 41208373248.0,
+            "19": 41208373248.0,
+            "20": 41208373248.0,
+            "21": 41208373248.0,
+            "22": 41208373248.0,
+            "23": 41208373248.0,
+            "24": 41208373248.0,
+            "25": 41208373248.0,
+            "26": 41208373248.0,
+            "27": 41208373248.0,
+            "28": 41208373248.0,
+            "29": 41208373248.0,
+            "30": 41208373248.0,
+            "31": 41208373248.0,
+            "32": 41208373248.0,
+            "33": 41208373248.0,
+            "34": 41208373248.0,
+            "35": 41208373248.0,
+            "36": 41208373248.0,
+            "37": 41208373248.0,
+            "38": 41208373248.0,
+            "39": 41208373248.0,
+            "40": 41208373248.0,
+            "41": 41208373248.0,
+            "42": 41208373248.0,
+            "43": 41208373248.0,
+            "44": 41208373248.0,
+            "45": 41208373248.0,
+            "46": 41208373248.0,
+            "47": 41208373248.0,
+            "48": 41208373248.0,
+            "49": 41208373248.0,
+            "50": 41208373248.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 89.07358,
-            "5": 1.66974,
-            "10": 0.9253,
-            "15": 0.92186,
-            "20": 0.90646,
-            "25": 0.90755,
-            "30": 0.90123,
-            "35": 0.90707,
-            "40": 0.89509,
-            "45": 0.89555,
-            "50": 0.88496
+            "1": 94.76465,
+            "2": 1.07136,
+            "3": 0.97804,
+            "4": 0.91812,
+            "5": 1.39406,
+            "6": 1.11113,
+            "7": 1.05399,
+            "8": 1.07764,
+            "9": 0.8817,
+            "10": 0.88267,
+            "11": 0.97121,
+            "12": 0.87696,
+            "13": 0.87547,
+            "14": 0.87457,
+            "15": 0.87326,
+            "16": 0.87868,
+            "17": 0.86846,
+            "18": 0.86669,
+            "19": 0.86508,
+            "20": 0.86847,
+            "21": 0.86661,
+            "22": 0.85614,
+            "23": 0.8576,
+            "24": 0.86445,
+            "25": 0.86658,
+            "26": 0.86708,
+            "27": 0.86226,
+            "28": 0.85806,
+            "29": 0.86248,
+            "30": 0.85836,
+            "31": 0.85969,
+            "32": 0.85739,
+            "33": 0.86134,
+            "34": 0.8621,
+            "35": 0.86104,
+            "36": 0.85793,
+            "37": 0.85834,
+            "38": 0.85618,
+            "39": 0.85754,
+            "40": 0.8554,
+            "41": 0.85094,
+            "42": 0.85738,
+            "43": 0.85524,
+            "44": 0.85844,
+            "45": 0.85739,
+            "46": 0.85581,
+            "47": 0.85717,
+            "48": 0.85118,
+            "49": 0.85577,
+            "50": 0.85127
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 8cea616921e..00000000000
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.94971,
-            "2": 10.95163,
-            "3": 10.51641,
-            "4": 9.9652,
-            "5": 9.94116,
-            "6": 9.67394,
-            "7": 10.19887,
-            "8": 9.50035,
-            "9": 9.54982,
-            "10": 9.79667,
-            "11": 9.30128,
-            "12": 9.40566,
-            "13": 9.39438,
-            "14": 8.84572,
-            "15": 9.02231,
-            "16": 9.06973,
-            "17": 9.04712,
-            "18": 8.75662,
-            "19": 9.18074,
-            "20": 8.86175,
-            "21": 8.53558,
-            "22": 8.55288,
-            "23": 8.42513,
-            "24": 8.37683,
-            "25": 8.64426,
-            "26": 7.9756,
-            "27": 8.57026,
-            "28": 8.1987,
-            "29": 8.39406,
-            "30": 8.67631,
-            "31": 8.29096,
-            "32": 8.43692,
-            "33": 8.55897,
-            "34": 8.66123,
-            "35": 8.08,
-            "36": 7.95214,
-            "37": 8.2979,
-            "38": 7.98177,
-            "39": 8.39281,
-            "40": 8.35852,
-            "41": 8.32006,
-            "42": 8.05954,
-            "43": 8.03381,
-            "44": 8.24236,
-            "45": 8.1025,
-            "46": 7.61814,
-            "47": 8.15364,
-            "48": 8.00693,
-            "49": 8.38704,
-            "50": 7.81592
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 19403704.0,
-            "2": 19274216.0,
-            "3": 22517470.0,
-            "4": 83429816.0,
-            "5": 139167728.0,
-            "6": 138921280.0,
-            "7": 173470304.0,
-            "8": 200511856.0,
-            "9": 165696320.0,
-            "10": 166120112.0,
-            "11": 213254416.0,
-            "12": 187847360.0,
-            "13": 231586656.0,
-            "14": 226879072.0,
-            "15": 219025920.0,
-            "16": 205179664.0,
-            "17": 280450432.0,
-            "18": 181477792.0,
-            "19": 191026096.0,
-            "20": 186395632.0,
-            "21": 233632576.0,
-            "22": 231696832.0,
-            "23": 216390688.0,
-            "24": 215133760.0,
-            "25": 233079504.0,
-            "26": 244437920.0,
-            "27": 222637584.0,
-            "28": 278773952.0,
-            "29": 253409264.0,
-            "30": 240036736.0,
-            "31": 236599008.0,
-            "32": 205066624.0,
-            "33": 263303312.0,
-            "34": 200444544.0,
-            "35": 199033824.0,
-            "36": 243001216.0,
-            "37": 151181872.0,
-            "38": 175301280.0,
-            "39": 219001024.0,
-            "40": 220307936.0,
-            "41": 217385856.0,
-            "42": 230074176.0,
-            "43": 208226784.0,
-            "44": 148172720.0,
-            "45": 141103744.0,
-            "46": 132664976.0,
-            "47": 179619392.0,
-            "48": 118381144.0,
-            "49": 86643984.0,
-            "50": 113798320.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 4883287040.0,
-            "2": 4883441152.0,
-            "3": 4881697280.0,
-            "4": 4883730944.0,
-            "5": 4882556416.0,
-            "6": 4882616832.0,
-            "7": 4883438080.0,
-            "8": 4881568256.0,
-            "9": 4883173888.0,
-            "10": 4882272768.0,
-            "11": 4883676672.0,
-            "12": 4881393152.0,
-            "13": 4883141120.0,
-            "14": 4883697152.0,
-            "15": 4882622976.0,
-            "16": 4881830400.0,
-            "17": 4881658368.0,
-            "18": 4881863168.0,
-            "19": 4883804672.0,
-            "20": 4881795584.0,
-            "21": 4883333632.0,
-            "22": 4882194944.0,
-            "23": 4882084352.0,
-            "24": 4884065792.0,
-            "25": 4881804800.0,
-            "26": 4883596800.0,
-            "27": 4883047936.0,
-            "28": 4882476544.0,
-            "29": 4883087872.0,
-            "30": 4882151936.0,
-            "31": 4882625024.0,
-            "32": 4883104256.0,
-            "33": 4882526720.0,
-            "34": 4882292224.0,
-            "35": 4882485760.0,
-            "36": 4882867712.0,
-            "37": 4882634240.0,
-            "38": 4882610688.0,
-            "39": 4881474048.0,
-            "40": 4881961472.0,
-            "41": 4882663936.0,
-            "42": 4881860096.0,
-            "43": 4881499648.0,
-            "44": 4883392000.0,
-            "45": 4882392576.0,
-            "46": 4882815488.0,
-            "47": 4883113472.0,
-            "48": 4882158080.0,
-            "49": 4881207808.0,
-            "50": 4881588736.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 41208348672.0,
-            "2": 41208348672.0,
-            "3": 41208348672.0,
-            "4": 41208348672.0,
-            "5": 41208348672.0,
-            "6": 41208348672.0,
-            "7": 41208348672.0,
-            "8": 41208348672.0,
-            "9": 41208348672.0,
-            "10": 41208348672.0,
-            "11": 41208348672.0,
-            "12": 41208348672.0,
-            "13": 41208348672.0,
-            "14": 41208348672.0,
-            "15": 41208348672.0,
-            "16": 41208348672.0,
-            "17": 41208348672.0,
-            "18": 41208348672.0,
-            "19": 41208348672.0,
-            "20": 41208348672.0,
-            "21": 41208348672.0,
-            "22": 41208348672.0,
-            "23": 41208348672.0,
-            "24": 41208348672.0,
-            "25": 41208348672.0,
-            "26": 41208348672.0,
-            "27": 41208348672.0,
-            "28": 41208348672.0,
-            "29": 41208348672.0,
-            "30": 41208348672.0,
-            "31": 41208348672.0,
-            "32": 41208348672.0,
-            "33": 41208348672.0,
-            "34": 41208348672.0,
-            "35": 41208348672.0,
-            "36": 41208348672.0,
-            "37": 41208348672.0,
-            "38": 41208348672.0,
-            "39": 41208348672.0,
-            "40": 41208348672.0,
-            "41": 41208348672.0,
-            "42": 41208348672.0,
-            "43": 41208348672.0,
-            "44": 41208348672.0,
-            "45": 41208348672.0,
-            "46": 41208348672.0,
-            "47": 41208348672.0,
-            "48": 41208348672.0,
-            "49": 41208348672.0,
-            "50": 41208348672.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 89.10928,
-            "2": 1.08143,
-            "3": 0.94222,
-            "4": 0.89675,
-            "5": 1.34524,
-            "6": 1.06972,
-            "7": 1.00314,
-            "8": 1.04961,
-            "9": 0.86611,
-            "10": 0.86248,
-            "11": 0.98739,
-            "12": 0.86057,
-            "13": 0.86777,
-            "14": 0.85834,
-            "15": 0.8559,
-            "16": 0.85522,
-            "17": 0.84644,
-            "18": 0.85748,
-            "19": 0.85218,
-            "20": 0.85342,
-            "21": 0.84029,
-            "22": 0.84342,
-            "23": 0.84297,
-            "24": 0.83925,
-            "25": 0.8439,
-            "26": 0.85696,
-            "27": 0.83981,
-            "28": 0.84643,
-            "29": 0.8433,
-            "30": 0.86234,
-            "31": 0.85636,
-            "32": 0.84184,
-            "33": 0.84501,
-            "34": 0.84316,
-            "35": 0.83806,
-            "36": 0.84143,
-            "37": 0.84447,
-            "38": 0.84137,
-            "39": 0.84133,
-            "40": 0.84321,
-            "41": 0.84019,
-            "42": 0.84164,
-            "43": 0.83741,
-            "44": 0.84203,
-            "45": 0.83966,
-            "46": 0.84109,
-            "47": 0.83945,
-            "48": 0.84001,
-            "49": 0.84194,
-            "50": 0.83578
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index dc2c39d712d..00000000000
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.94971,
-            "2": 10.95174,
-            "3": 10.51547,
-            "4": 9.96574,
-            "5": 9.941,
-            "6": 9.67424,
-            "7": 10.20193,
-            "8": 9.50006,
-            "9": 9.54983,
-            "10": 9.79714,
-            "11": 9.30093,
-            "12": 9.40563,
-            "13": 9.39461,
-            "14": 8.84641,
-            "15": 9.02323,
-            "16": 9.07046,
-            "17": 9.04704,
-            "18": 8.75684,
-            "19": 9.18168,
-            "20": 8.86245,
-            "21": 8.53735,
-            "22": 8.55361,
-            "23": 8.42666,
-            "24": 8.37856,
-            "25": 8.64287,
-            "26": 7.9729,
-            "27": 8.56717,
-            "28": 8.19494,
-            "29": 8.39321,
-            "30": 8.67278,
-            "31": 8.2887,
-            "32": 8.43529,
-            "33": 8.5564,
-            "34": 8.65783,
-            "35": 8.07826,
-            "36": 7.94839,
-            "37": 8.29395,
-            "38": 7.9776,
-            "39": 8.39027,
-            "40": 8.35602,
-            "41": 8.31509,
-            "42": 8.06463,
-            "43": 8.03334,
-            "44": 8.24022,
-            "45": 8.10462,
-            "46": 7.61777,
-            "47": 8.15389,
-            "48": 8.0077,
-            "49": 8.38728,
-            "50": 7.81501
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 19403704.0,
-            "2": 19274202.0,
-            "3": 19372672.0,
-            "4": 84955472.0,
-            "5": 148573088.0,
-            "6": 140513744.0,
-            "7": 176606368.0,
-            "8": 198919440.0,
-            "9": 175143840.0,
-            "10": 164545552.0,
-            "11": 216370368.0,
-            "12": 201999712.0,
-            "13": 239390272.0,
-            "14": 230012880.0,
-            "15": 215921904.0,
-            "16": 211344080.0,
-            "17": 274153920.0,
-            "18": 173627616.0,
-            "19": 176950304.0,
-            "20": 194330304.0,
-            "21": 243134016.0,
-            "22": 234854608.0,
-            "23": 219609264.0,
-            "24": 205630080.0,
-            "25": 198436912.0,
-            "26": 293244384.0,
-            "27": 274552608.0,
-            "28": 277179296.0,
-            "29": 210959616.0,
-            "30": 233757584.0,
-            "31": 236548544.0,
-            "32": 264864608.0,
-            "33": 250754976.0,
-            "34": 258614240.0,
-            "35": 208476240.0,
-            "36": 241437056.0,
-            "37": 177817504.0,
-            "38": 227178000.0,
-            "39": 222169216.0,
-            "40": 214031296.0,
-            "41": 209523040.0,
-            "42": 212816672.0,
-            "43": 195600416.0,
-            "44": 154459088.0,
-            "45": 166289280.0,
-            "46": 116993536.0,
-            "47": 168587312.0,
-            "48": 162414240.0,
-            "49": 119666904.0,
-            "50": 171972272.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 4880827392.0,
-            "2": 4880161280.0,
-            "3": 4879780352.0,
-            "4": 4881006080.0,
-            "5": 4881443328.0,
-            "6": 4880235008.0,
-            "7": 4878593536.0,
-            "8": 4880183808.0,
-            "9": 4878518784.0,
-            "10": 4880639488.0,
-            "11": 4878592512.0,
-            "12": 4879459840.0,
-            "13": 4879073792.0,
-            "14": 4881052160.0,
-            "15": 4878580224.0,
-            "16": 4878705152.0,
-            "17": 4880005632.0,
-            "18": 4880081408.0,
-            "19": 4879190528.0,
-            "20": 4879407616.0,
-            "21": 4878837248.0,
-            "22": 4878897664.0,
-            "23": 4878346752.0,
-            "24": 4880498176.0,
-            "25": 4880417280.0,
-            "26": 4878027264.0,
-            "27": 4878756352.0,
-            "28": 4880044544.0,
-            "29": 4879154688.0,
-            "30": 4879779328.0,
-            "31": 4881071616.0,
-            "32": 4879392256.0,
-            "33": 4879744512.0,
-            "34": 4878250496.0,
-            "35": 4878979584.0,
-            "36": 4880133632.0,
-            "37": 4880431616.0,
-            "38": 4878993920.0,
-            "39": 4878280192.0,
-            "40": 4879473152.0,
-            "41": 4880439808.0,
-            "42": 4879638016.0,
-            "43": 4879913472.0,
-            "44": 4879031808.0,
-            "45": 4879471104.0,
-            "46": 4878890496.0,
-            "47": 4879007232.0,
-            "48": 4879195648.0,
-            "49": 4879473152.0,
-            "50": 4878174720.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 41208373248.0,
-            "2": 41208373248.0,
-            "3": 41208373248.0,
-            "4": 41208373248.0,
-            "5": 41208373248.0,
-            "6": 41208373248.0,
-            "7": 41208373248.0,
-            "8": 41208373248.0,
-            "9": 41208373248.0,
-            "10": 41208373248.0,
-            "11": 41208373248.0,
-            "12": 41208373248.0,
-            "13": 41208373248.0,
-            "14": 41208373248.0,
-            "15": 41208373248.0,
-            "16": 41208373248.0,
-            "17": 41208373248.0,
-            "18": 41208373248.0,
-            "19": 41208373248.0,
-            "20": 41208373248.0,
-            "21": 41208373248.0,
-            "22": 41208373248.0,
-            "23": 41208373248.0,
-            "24": 41208373248.0,
-            "25": 41208373248.0,
-            "26": 41208373248.0,
-            "27": 41208373248.0,
-            "28": 41208373248.0,
-            "29": 41208373248.0,
-            "30": 41208373248.0,
-            "31": 41208373248.0,
-            "32": 41208373248.0,
-            "33": 41208373248.0,
-            "34": 41208373248.0,
-            "35": 41208373248.0,
-            "36": 41208373248.0,
-            "37": 41208373248.0,
-            "38": 41208373248.0,
-            "39": 41208373248.0,
-            "40": 41208373248.0,
-            "41": 41208373248.0,
-            "42": 41208373248.0,
-            "43": 41208373248.0,
-            "44": 41208373248.0,
-            "45": 41208373248.0,
-            "46": 41208373248.0,
-            "47": 41208373248.0,
-            "48": 41208373248.0,
-            "49": 41208373248.0,
-            "50": 41208373248.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 94.76465,
-            "2": 1.07136,
-            "3": 0.97804,
-            "4": 0.91812,
-            "5": 1.39406,
-            "6": 1.11113,
-            "7": 1.05399,
-            "8": 1.07764,
-            "9": 0.8817,
-            "10": 0.88267,
-            "11": 0.97121,
-            "12": 0.87696,
-            "13": 0.87547,
-            "14": 0.87457,
-            "15": 0.87326,
-            "16": 0.87868,
-            "17": 0.86846,
-            "18": 0.86669,
-            "19": 0.86508,
-            "20": 0.86847,
-            "21": 0.86661,
-            "22": 0.85614,
-            "23": 0.8576,
-            "24": 0.86445,
-            "25": 0.86658,
-            "26": 0.86708,
-            "27": 0.86226,
-            "28": 0.85806,
-            "29": 0.86248,
-            "30": 0.85836,
-            "31": 0.85969,
-            "32": 0.85739,
-            "33": 0.86134,
-            "34": 0.8621,
-            "35": 0.86104,
-            "36": 0.85793,
-            "37": 0.85834,
-            "38": 0.85618,
-            "39": 0.85754,
-            "40": 0.8554,
-            "41": 0.85094,
-            "42": 0.85738,
-            "43": 0.85524,
-            "44": 0.85844,
-            "45": 0.85739,
-            "46": 0.85581,
-            "47": 0.85717,
-            "48": 0.85118,
-            "49": 0.85577,
-            "50": 0.85127
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgx_h100.json
index 0835e95b926..fe8428055c3 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgx_h100.json
@@ -2,141 +2,536 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 10.82922,
+            "2": 10.84163,
+            "3": 10.84245,
+            "4": 10.82,
             "5": 10.85652,
+            "6": 10.86906,
+            "7": 10.83778,
+            "8": 10.84312,
+            "9": 10.84423,
             "10": 10.79298,
+            "11": 10.86697,
+            "12": 10.86875,
+            "13": 10.86207,
+            "14": 10.86919,
             "15": 10.8067,
+            "16": 10.8057,
+            "17": 10.77686,
+            "18": 10.79541,
+            "19": 10.78384,
             "20": 10.72654,
+            "21": 10.69491,
+            "22": 10.54462,
+            "23": 10.6993,
+            "24": 10.58151,
             "25": 10.53282,
+            "26": 10.58817,
+            "27": 10.601,
+            "28": 10.57563,
+            "29": 10.58022,
             "30": 10.35802,
+            "31": 10.08769,
+            "32": 10.44466,
+            "33": 10.4477,
+            "34": 10.18704,
             "35": 10.24483,
+            "36": 10.19713,
+            "37": 10.32294,
+            "38": 10.17101,
+            "39": 10.37026,
             "40": 10.05533,
+            "41": 10.09491,
+            "42": 10.17971,
+            "43": 9.78263,
+            "44": 9.91346,
             "45": 9.77951,
+            "46": 9.75648,
+            "47": 10.09647,
+            "48": 9.80391,
+            "49": 9.46649,
             "50": 9.86874,
+            "51": 9.79428,
+            "52": 9.68303,
+            "53": 10.03314,
+            "54": 9.9113,
             "55": 9.82995,
+            "56": 9.57839,
+            "57": 9.42377,
+            "58": 9.80549,
+            "59": 9.53292,
             "60": 9.449,
+            "61": 9.65293,
+            "62": 9.95672,
+            "63": 9.33775,
+            "64": 9.74194,
             "65": 8.89366,
+            "66": 9.67317,
+            "67": 9.33002,
+            "68": 9.76517,
+            "69": 9.76336,
             "70": 9.71127,
+            "71": 9.59511,
+            "72": 9.54797,
+            "73": 9.47124,
+            "74": 8.89297,
             "75": 9.39451,
+            "76": 9.04721,
+            "77": 10.04318,
+            "78": 9.70313,
+            "79": 9.35169,
             "80": 9.38198,
+            "81": 9.45146,
+            "82": 9.67546,
+            "83": 9.27658,
+            "84": 9.39241,
             "85": 9.58333,
+            "86": 9.04518,
+            "87": 9.56487,
+            "88": 9.72459,
+            "89": 9.57019,
             "90": 9.79944,
+            "91": 9.30737,
+            "92": 9.3313,
+            "93": 9.04109,
+            "94": 8.80259,
             "95": 9.50213,
+            "96": 9.5021,
+            "97": 9.28183,
+            "98": 9.64883,
+            "99": 8.8594,
             "100": 9.37131
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 27245.0,
+            "2": 28958.0,
+            "3": 29464.0,
+            "4": 28046.0,
             "5": 31369.0,
+            "6": 33287.0,
+            "7": 31200.0,
+            "8": 26921.0,
+            "9": 30008.0,
             "10": 25870.0,
+            "11": 33681.0,
+            "12": 30344.0,
+            "13": 32737.0,
+            "14": 33315.0,
             "15": 29830.0,
+            "16": 32475.0,
+            "17": 30747.0,
+            "18": 30381.0,
+            "19": 31032.0,
             "20": 28243.0,
+            "21": 29224.0,
+            "22": 27340.0,
+            "23": 34119.0,
+            "24": 29049.0,
             "25": 27636.0,
+            "26": 30662.0,
+            "27": 32009.0,
+            "28": 33355.0,
+            "29": 34714.0,
             "30": 30387.0,
+            "31": 28212.0,
+            "32": 33411.0,
+            "33": 34696.0,
+            "34": 30053.0,
             "35": 31488.0,
+            "36": 32943.0,
+            "37": 35829.0,
+            "38": 33740.0,
+            "39": 37632.0,
             "40": 34779.0,
+            "41": 33958.0,
+            "42": 36396.0,
+            "43": 34088.0,
+            "44": 34090.0,
             "45": 35158.0,
+            "46": 36174.0,
+            "47": 39772.0,
+            "48": 36516.0,
+            "49": 36733.0,
             "50": 38234.0,
+            "51": 38608.0,
+            "52": 37030.0,
+            "53": 42442.0,
+            "54": 40944.0,
             "55": 37133.0,
+            "56": 41001.0,
+            "57": 37524.0,
+            "58": 42317.0,
+            "59": 40804.0,
             "60": 40450.0,
+            "61": 41478.0,
+            "62": 39766.0,
+            "63": 37941.0,
+            "64": 42197.0,
             "65": 40947.0,
+            "66": 44094.0,
+            "67": 41958.0,
+            "68": 40060.0,
+            "69": 42189.0,
             "70": 43436.0,
+            "71": 42748.0,
+            "72": 44280.0,
+            "73": 47478.0,
+            "74": 41456.0,
             "75": 39925.0,
+            "76": 43490.0,
+            "77": 45636.0,
+            "78": 2141470.0,
+            "79": 46055.0,
             "80": 51863.0,
+            "81": 151341.0,
+            "82": 49835.0,
+            "83": 143360.0,
+            "84": 2141546.0,
             "85": 2145177.0,
+            "86": 132114.0,
+            "87": 2147022.0,
+            "88": 59899.0,
+            "89": 162883.0,
             "90": 51330.0,
+            "91": 2141901.0,
+            "92": 44946.0,
+            "93": 138194.0,
+            "94": 2145772.0,
             "95": 45247.0,
+            "96": 135045.0,
+            "97": 53170.0,
+            "98": 168576.0,
+            "99": 2141797.0,
             "100": 163741.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 787511296.0,
-            "5": 787542016.0,
-            "10": 787500032.0,
-            "15": 787499008.0,
-            "20": 787500032.0,
-            "25": 787446272.0,
-            "30": 787429888.0,
-            "35": 787413504.0,
-            "40": 787409920.0,
-            "45": 787394560.0,
-            "50": 787384320.0,
-            "55": 787383808.0,
-            "60": 787389952.0,
-            "65": 787346432.0,
-            "70": 787387904.0,
-            "75": 787437568.0,
-            "80": 787405312.0,
-            "85": 787407360.0,
-            "90": 787441664.0,
-            "95": 787445248.0,
-            "100": 787433472.0
+            "1": 787516416.0,
+            "2": 787540992.0,
+            "3": 787524096.0,
+            "4": 787512320.0,
+            "5": 787547136.0,
+            "6": 787537920.0,
+            "7": 787512832.0,
+            "8": 787524608.0,
+            "9": 787528192.0,
+            "10": 787505152.0,
+            "11": 787522048.0,
+            "12": 787520000.0,
+            "13": 787529728.0,
+            "14": 787529216.0,
+            "15": 787504128.0,
+            "16": 787513344.0,
+            "17": 787503104.0,
+            "18": 787489280.0,
+            "19": 787514880.0,
+            "20": 787505152.0,
+            "21": 787479552.0,
+            "22": 787486208.0,
+            "23": 787478528.0,
+            "24": 787486208.0,
+            "25": 787451392.0,
+            "26": 787482112.0,
+            "27": 787470848.0,
+            "28": 787450368.0,
+            "29": 787458048.0,
+            "30": 787435008.0,
+            "31": 787406848.0,
+            "32": 787424256.0,
+            "33": 787435520.0,
+            "34": 787426304.0,
+            "35": 787418624.0,
+            "36": 787436544.0,
+            "37": 787428352.0,
+            "38": 787436544.0,
+            "39": 787417600.0,
+            "40": 787415040.0,
+            "41": 787405824.0,
+            "42": 787415040.0,
+            "43": 787367936.0,
+            "44": 787392512.0,
+            "45": 787399680.0,
+            "46": 787355136.0,
+            "47": 787411456.0,
+            "48": 787354112.0,
+            "49": 787374080.0,
+            "50": 787389440.0,
+            "51": 787375616.0,
+            "52": 787383808.0,
+            "53": 787379712.0,
+            "54": 787384832.0,
+            "55": 787388928.0,
+            "56": 787388928.0,
+            "57": 787351040.0,
+            "58": 787382784.0,
+            "59": 787374080.0,
+            "60": 787395072.0,
+            "61": 787405312.0,
+            "62": 787405824.0,
+            "63": 787373056.0,
+            "64": 787388928.0,
+            "65": 787351552.0,
+            "66": 787386880.0,
+            "67": 787392000.0,
+            "68": 787399168.0,
+            "69": 787383296.0,
+            "70": 787393024.0,
+            "71": 787406848.0,
+            "72": 787400704.0,
+            "73": 787401216.0,
+            "74": 787403264.0,
+            "75": 787442688.0,
+            "76": 787444736.0,
+            "77": 787445760.0,
+            "78": 787395072.0,
+            "79": 787430400.0,
+            "80": 787410432.0,
+            "81": 787412992.0,
+            "82": 787427840.0,
+            "83": 787428864.0,
+            "84": 787412480.0,
+            "85": 787412480.0,
+            "86": 787394560.0,
+            "87": 787452928.0,
+            "88": 787414528.0,
+            "89": 787404800.0,
+            "90": 787446784.0,
+            "91": 787446272.0,
+            "92": 787446784.0,
+            "93": 787430400.0,
+            "94": 787440128.0,
+            "95": 787450368.0,
+            "96": 787454976.0,
+            "97": 787427328.0,
+            "98": 787475968.0,
+            "99": 787419136.0,
+            "100": 787438592.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 2465793024.0,
-            "5": 2492764160.0,
-            "10": 2492764160.0,
-            "15": 2492764160.0,
-            "20": 2492764160.0,
-            "25": 2492764160.0,
-            "30": 2492764160.0,
-            "35": 2492764160.0,
-            "40": 2492764160.0,
-            "45": 2492764160.0,
-            "50": 2492764160.0,
-            "55": 2492764160.0,
-            "60": 2492764160.0,
-            "65": 2492764160.0,
-            "70": 2492764160.0,
-            "75": 2492764160.0,
-            "80": 2492764160.0,
-            "85": 2492764160.0,
-            "90": 2492764160.0,
-            "95": 2492764160.0,
-            "100": 2492764160.0
+            "1": 2579673088.0,
+            "2": 2590714880.0,
+            "3": 2590714880.0,
+            "4": 2590714880.0,
+            "5": 2596039680.0,
+            "6": 2596039680.0,
+            "7": 2596039680.0,
+            "8": 2596039680.0,
+            "9": 2596039680.0,
+            "10": 2596039680.0,
+            "11": 2596039680.0,
+            "12": 2596039680.0,
+            "13": 2596039680.0,
+            "14": 2596039680.0,
+            "15": 2596039680.0,
+            "16": 2596039680.0,
+            "17": 2596039680.0,
+            "18": 2596039680.0,
+            "19": 2596039680.0,
+            "20": 2596039680.0,
+            "21": 2596039680.0,
+            "22": 2596039680.0,
+            "23": 2596039680.0,
+            "24": 2596039680.0,
+            "25": 2596039680.0,
+            "26": 2596039680.0,
+            "27": 2596039680.0,
+            "28": 2596039680.0,
+            "29": 2596039680.0,
+            "30": 2596039680.0,
+            "31": 2596039680.0,
+            "32": 2596039680.0,
+            "33": 2596039680.0,
+            "34": 2596039680.0,
+            "35": 2596039680.0,
+            "36": 2596039680.0,
+            "37": 2596039680.0,
+            "38": 2596039680.0,
+            "39": 2596039680.0,
+            "40": 2596039680.0,
+            "41": 2596039680.0,
+            "42": 2596039680.0,
+            "43": 2596039680.0,
+            "44": 2596039680.0,
+            "45": 2596039680.0,
+            "46": 2596039680.0,
+            "47": 2596039680.0,
+            "48": 2596039680.0,
+            "49": 2596039680.0,
+            "50": 2596039680.0,
+            "51": 2596039680.0,
+            "52": 2596039680.0,
+            "53": 2596039680.0,
+            "54": 2596039680.0,
+            "55": 2596039680.0,
+            "56": 2596039680.0,
+            "57": 2596039680.0,
+            "58": 2596039680.0,
+            "59": 2596039680.0,
+            "60": 2596039680.0,
+            "61": 2596039680.0,
+            "62": 2596039680.0,
+            "63": 2596039680.0,
+            "64": 2596039680.0,
+            "65": 2596039680.0,
+            "66": 2596039680.0,
+            "67": 2596039680.0,
+            "68": 2596039680.0,
+            "69": 2596039680.0,
+            "70": 2596039680.0,
+            "71": 2596039680.0,
+            "72": 2596039680.0,
+            "73": 2596039680.0,
+            "74": 2596039680.0,
+            "75": 2596039680.0,
+            "76": 2596039680.0,
+            "77": 2596039680.0,
+            "78": 2596039680.0,
+            "79": 2596039680.0,
+            "80": 2596039680.0,
+            "81": 2596039680.0,
+            "82": 2596039680.0,
+            "83": 2596039680.0,
+            "84": 2596039680.0,
+            "85": 2596039680.0,
+            "86": 2596039680.0,
+            "87": 2596039680.0,
+            "88": 2596039680.0,
+            "89": 2596039680.0,
+            "90": 2596039680.0,
+            "91": 2596039680.0,
+            "92": 2596039680.0,
+            "93": 2596039680.0,
+            "94": 2596039680.0,
+            "95": 2596039680.0,
+            "96": 2596039680.0,
+            "97": 2596039680.0,
+            "98": 2596039680.0,
+            "99": 2596039680.0,
+            "100": 2596039680.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 9.68104,
-            "5": 0.32859,
-            "10": 0.30772,
-            "15": 0.31234,
-            "20": 0.29254,
-            "25": 0.29296,
-            "30": 0.31344,
-            "35": 0.31026,
-            "40": 0.30514,
-            "45": 0.30481,
-            "50": 0.30324,
-            "55": 0.29929,
-            "60": 0.30103,
-            "65": 0.32008,
-            "70": 0.31307,
-            "75": 0.2933,
-            "80": 0.29351,
-            "85": 0.29283,
-            "90": 0.29375,
-            "95": 0.29458,
-            "100": 0.29103
+            "1": 11.07685,
+            "2": 0.45645,
+            "3": 0.41285,
+            "4": 0.40148,
+            "5": 0.35405,
+            "6": 0.35535,
+            "7": 0.35437,
+            "8": 0.32989,
+            "9": 0.32686,
+            "10": 0.32734,
+            "11": 0.32243,
+            "12": 0.32634,
+            "13": 0.33475,
+            "14": 0.33636,
+            "15": 0.33838,
+            "16": 0.32741,
+            "17": 0.33364,
+            "18": 0.33147,
+            "19": 0.33328,
+            "20": 0.33281,
+            "21": 0.33587,
+            "22": 0.3271,
+            "23": 0.33537,
+            "24": 0.32125,
+            "25": 0.33225,
+            "26": 0.33085,
+            "27": 0.3387,
+            "28": 0.34305,
+            "29": 0.34938,
+            "30": 0.34814,
+            "31": 0.35223,
+            "32": 0.36489,
+            "33": 0.33408,
+            "34": 0.34688,
+            "35": 0.33945,
+            "36": 0.34851,
+            "37": 0.3471,
+            "38": 0.3338,
+            "39": 0.3395,
+            "40": 0.3414,
+            "41": 0.34662,
+            "42": 0.34093,
+            "43": 0.34012,
+            "44": 0.34423,
+            "45": 0.34205,
+            "46": 0.34681,
+            "47": 0.33694,
+            "48": 0.34136,
+            "49": 0.34255,
+            "50": 0.34412,
+            "51": 0.32987,
+            "52": 0.34834,
+            "53": 0.34028,
+            "54": 0.33718,
+            "55": 0.33563,
+            "56": 0.3372,
+            "57": 0.33927,
+            "58": 0.34337,
+            "59": 0.34056,
+            "60": 0.34048,
+            "61": 0.33816,
+            "62": 0.3357,
+            "63": 0.3365,
+            "64": 0.33906,
+            "65": 0.34134,
+            "66": 0.34125,
+            "67": 0.33859,
+            "68": 0.34726,
+            "69": 0.3385,
+            "70": 0.34428,
+            "71": 0.34339,
+            "72": 0.33789,
+            "73": 0.33975,
+            "74": 0.34759,
+            "75": 0.33612,
+            "76": 0.33913,
+            "77": 0.34664,
+            "78": 0.33673,
+            "79": 0.33903,
+            "80": 0.33519,
+            "81": 0.33434,
+            "82": 0.34003,
+            "83": 0.33784,
+            "84": 0.33367,
+            "85": 0.33382,
+            "86": 0.34029,
+            "87": 0.33537,
+            "88": 0.33703,
+            "89": 0.33416,
+            "90": 0.33113,
+            "91": 0.33369,
+            "92": 0.33443,
+            "93": 0.33841,
+            "94": 0.339,
+            "95": 0.33271,
+            "96": 0.33211,
+            "97": 0.33492,
+            "98": 0.33877,
+            "99": 0.33548,
+            "100": 0.33195
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 7e299df5257..00000000000
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.82922,
-            "2": 10.84163,
-            "3": 10.84245,
-            "4": 10.82,
-            "5": 10.85652,
-            "6": 10.86906,
-            "7": 10.83778,
-            "8": 10.84312,
-            "9": 10.84423,
-            "10": 10.79298,
-            "11": 10.86697,
-            "12": 10.86875,
-            "13": 10.86207,
-            "14": 10.86919,
-            "15": 10.8067,
-            "16": 10.8057,
-            "17": 10.77686,
-            "18": 10.79541,
-            "19": 10.78384,
-            "20": 10.72654,
-            "21": 10.69491,
-            "22": 10.54462,
-            "23": 10.6993,
-            "24": 10.58151,
-            "25": 10.53282,
-            "26": 10.58817,
-            "27": 10.601,
-            "28": 10.57563,
-            "29": 10.58022,
-            "30": 10.35802,
-            "31": 10.08769,
-            "32": 10.44466,
-            "33": 10.4477,
-            "34": 10.18704,
-            "35": 10.24483,
-            "36": 10.19713,
-            "37": 10.32294,
-            "38": 10.17101,
-            "39": 10.37026,
-            "40": 10.05533,
-            "41": 10.09491,
-            "42": 10.17971,
-            "43": 9.78263,
-            "44": 9.91346,
-            "45": 9.77951,
-            "46": 9.75648,
-            "47": 10.09647,
-            "48": 9.80391,
-            "49": 9.46649,
-            "50": 9.86874,
-            "51": 9.79428,
-            "52": 9.68303,
-            "53": 10.03314,
-            "54": 9.9113,
-            "55": 9.82995,
-            "56": 9.57839,
-            "57": 9.42377,
-            "58": 9.80549,
-            "59": 9.53292,
-            "60": 9.449,
-            "61": 9.65293,
-            "62": 9.95672,
-            "63": 9.33775,
-            "64": 9.74194,
-            "65": 8.89366,
-            "66": 9.67317,
-            "67": 9.33002,
-            "68": 9.76517,
-            "69": 9.76336,
-            "70": 9.71127,
-            "71": 9.59511,
-            "72": 9.54797,
-            "73": 9.47124,
-            "74": 8.89297,
-            "75": 9.39451,
-            "76": 9.04721,
-            "77": 10.04318,
-            "78": 9.70313,
-            "79": 9.35169,
-            "80": 9.38198,
-            "81": 9.45146,
-            "82": 9.67546,
-            "83": 9.27658,
-            "84": 9.39241,
-            "85": 9.58333,
-            "86": 9.04518,
-            "87": 9.56487,
-            "88": 9.72459,
-            "89": 9.57019,
-            "90": 9.79944,
-            "91": 9.30737,
-            "92": 9.3313,
-            "93": 9.04109,
-            "94": 8.80259,
-            "95": 9.50213,
-            "96": 9.5021,
-            "97": 9.28183,
-            "98": 9.64883,
-            "99": 8.8594,
-            "100": 9.37131
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 27245.0,
-            "2": 28958.0,
-            "3": 29464.0,
-            "4": 28046.0,
-            "5": 31369.0,
-            "6": 33287.0,
-            "7": 31200.0,
-            "8": 26921.0,
-            "9": 30008.0,
-            "10": 25870.0,
-            "11": 33681.0,
-            "12": 30344.0,
-            "13": 32737.0,
-            "14": 33315.0,
-            "15": 29830.0,
-            "16": 32475.0,
-            "17": 30747.0,
-            "18": 30381.0,
-            "19": 31032.0,
-            "20": 28243.0,
-            "21": 29224.0,
-            "22": 27340.0,
-            "23": 34119.0,
-            "24": 29049.0,
-            "25": 27636.0,
-            "26": 30662.0,
-            "27": 32009.0,
-            "28": 33355.0,
-            "29": 34714.0,
-            "30": 30387.0,
-            "31": 28212.0,
-            "32": 33411.0,
-            "33": 34696.0,
-            "34": 30053.0,
-            "35": 31488.0,
-            "36": 32943.0,
-            "37": 35829.0,
-            "38": 33740.0,
-            "39": 37632.0,
-            "40": 34779.0,
-            "41": 33958.0,
-            "42": 36396.0,
-            "43": 34088.0,
-            "44": 34090.0,
-            "45": 35158.0,
-            "46": 36174.0,
-            "47": 39772.0,
-            "48": 36516.0,
-            "49": 36733.0,
-            "50": 38234.0,
-            "51": 38608.0,
-            "52": 37030.0,
-            "53": 42442.0,
-            "54": 40944.0,
-            "55": 37133.0,
-            "56": 41001.0,
-            "57": 37524.0,
-            "58": 42317.0,
-            "59": 40804.0,
-            "60": 40450.0,
-            "61": 41478.0,
-            "62": 39766.0,
-            "63": 37941.0,
-            "64": 42197.0,
-            "65": 40947.0,
-            "66": 44094.0,
-            "67": 41958.0,
-            "68": 40060.0,
-            "69": 42189.0,
-            "70": 43436.0,
-            "71": 42748.0,
-            "72": 44280.0,
-            "73": 47478.0,
-            "74": 41456.0,
-            "75": 39925.0,
-            "76": 43490.0,
-            "77": 45636.0,
-            "78": 2141470.0,
-            "79": 46055.0,
-            "80": 51863.0,
-            "81": 151341.0,
-            "82": 49835.0,
-            "83": 143360.0,
-            "84": 2141546.0,
-            "85": 2145177.0,
-            "86": 132114.0,
-            "87": 2147022.0,
-            "88": 59899.0,
-            "89": 162883.0,
-            "90": 51330.0,
-            "91": 2141901.0,
-            "92": 44946.0,
-            "93": 138194.0,
-            "94": 2145772.0,
-            "95": 45247.0,
-            "96": 135045.0,
-            "97": 53170.0,
-            "98": 168576.0,
-            "99": 2141797.0,
-            "100": 163741.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 787516416.0,
-            "2": 787540992.0,
-            "3": 787524096.0,
-            "4": 787512320.0,
-            "5": 787547136.0,
-            "6": 787537920.0,
-            "7": 787512832.0,
-            "8": 787524608.0,
-            "9": 787528192.0,
-            "10": 787505152.0,
-            "11": 787522048.0,
-            "12": 787520000.0,
-            "13": 787529728.0,
-            "14": 787529216.0,
-            "15": 787504128.0,
-            "16": 787513344.0,
-            "17": 787503104.0,
-            "18": 787489280.0,
-            "19": 787514880.0,
-            "20": 787505152.0,
-            "21": 787479552.0,
-            "22": 787486208.0,
-            "23": 787478528.0,
-            "24": 787486208.0,
-            "25": 787451392.0,
-            "26": 787482112.0,
-            "27": 787470848.0,
-            "28": 787450368.0,
-            "29": 787458048.0,
-            "30": 787435008.0,
-            "31": 787406848.0,
-            "32": 787424256.0,
-            "33": 787435520.0,
-            "34": 787426304.0,
-            "35": 787418624.0,
-            "36": 787436544.0,
-            "37": 787428352.0,
-            "38": 787436544.0,
-            "39": 787417600.0,
-            "40": 787415040.0,
-            "41": 787405824.0,
-            "42": 787415040.0,
-            "43": 787367936.0,
-            "44": 787392512.0,
-            "45": 787399680.0,
-            "46": 787355136.0,
-            "47": 787411456.0,
-            "48": 787354112.0,
-            "49": 787374080.0,
-            "50": 787389440.0,
-            "51": 787375616.0,
-            "52": 787383808.0,
-            "53": 787379712.0,
-            "54": 787384832.0,
-            "55": 787388928.0,
-            "56": 787388928.0,
-            "57": 787351040.0,
-            "58": 787382784.0,
-            "59": 787374080.0,
-            "60": 787395072.0,
-            "61": 787405312.0,
-            "62": 787405824.0,
-            "63": 787373056.0,
-            "64": 787388928.0,
-            "65": 787351552.0,
-            "66": 787386880.0,
-            "67": 787392000.0,
-            "68": 787399168.0,
-            "69": 787383296.0,
-            "70": 787393024.0,
-            "71": 787406848.0,
-            "72": 787400704.0,
-            "73": 787401216.0,
-            "74": 787403264.0,
-            "75": 787442688.0,
-            "76": 787444736.0,
-            "77": 787445760.0,
-            "78": 787395072.0,
-            "79": 787430400.0,
-            "80": 787410432.0,
-            "81": 787412992.0,
-            "82": 787427840.0,
-            "83": 787428864.0,
-            "84": 787412480.0,
-            "85": 787412480.0,
-            "86": 787394560.0,
-            "87": 787452928.0,
-            "88": 787414528.0,
-            "89": 787404800.0,
-            "90": 787446784.0,
-            "91": 787446272.0,
-            "92": 787446784.0,
-            "93": 787430400.0,
-            "94": 787440128.0,
-            "95": 787450368.0,
-            "96": 787454976.0,
-            "97": 787427328.0,
-            "98": 787475968.0,
-            "99": 787419136.0,
-            "100": 787438592.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 2479493120.0,
-            "2": 2485449728.0,
-            "3": 2487249408.0,
-            "4": 2487249408.0,
-            "5": 2495991808.0,
-            "6": 2495991808.0,
-            "7": 2495991808.0,
-            "8": 2495991808.0,
-            "9": 2495991808.0,
-            "10": 2495991808.0,
-            "11": 2495991808.0,
-            "12": 2495991808.0,
-            "13": 2495991808.0,
-            "14": 2495991808.0,
-            "15": 2495991808.0,
-            "16": 2495991808.0,
-            "17": 2495991808.0,
-            "18": 2495991808.0,
-            "19": 2495991808.0,
-            "20": 2495991808.0,
-            "21": 2495991808.0,
-            "22": 2495991808.0,
-            "23": 2495991808.0,
-            "24": 2495991808.0,
-            "25": 2495991808.0,
-            "26": 2495991808.0,
-            "27": 2495991808.0,
-            "28": 2495991808.0,
-            "29": 2495991808.0,
-            "30": 2495991808.0,
-            "31": 2495991808.0,
-            "32": 2495991808.0,
-            "33": 2495991808.0,
-            "34": 2495991808.0,
-            "35": 2495991808.0,
-            "36": 2495991808.0,
-            "37": 2495991808.0,
-            "38": 2495991808.0,
-            "39": 2495991808.0,
-            "40": 2495991808.0,
-            "41": 2495991808.0,
-            "42": 2495991808.0,
-            "43": 2495991808.0,
-            "44": 2495991808.0,
-            "45": 2495991808.0,
-            "46": 2495991808.0,
-            "47": 2495991808.0,
-            "48": 2495991808.0,
-            "49": 2495991808.0,
-            "50": 2495991808.0,
-            "51": 2495991808.0,
-            "52": 2495991808.0,
-            "53": 2495991808.0,
-            "54": 2495991808.0,
-            "55": 2495991808.0,
-            "56": 2495991808.0,
-            "57": 2495991808.0,
-            "58": 2495991808.0,
-            "59": 2495991808.0,
-            "60": 2495991808.0,
-            "61": 2495991808.0,
-            "62": 2495991808.0,
-            "63": 2495991808.0,
-            "64": 2495991808.0,
-            "65": 2495991808.0,
-            "66": 2495991808.0,
-            "67": 2495991808.0,
-            "68": 2495991808.0,
-            "69": 2495991808.0,
-            "70": 2495991808.0,
-            "71": 2495991808.0,
-            "72": 2495991808.0,
-            "73": 2495991808.0,
-            "74": 2495991808.0,
-            "75": 2495991808.0,
-            "76": 2495991808.0,
-            "77": 2495991808.0,
-            "78": 2495991808.0,
-            "79": 2495991808.0,
-            "80": 2495991808.0,
-            "81": 2495991808.0,
-            "82": 2495991808.0,
-            "83": 2495991808.0,
-            "84": 2495991808.0,
-            "85": 2495991808.0,
-            "86": 2495991808.0,
-            "87": 2495991808.0,
-            "88": 2495991808.0,
-            "89": 2495991808.0,
-            "90": 2495991808.0,
-            "91": 2495991808.0,
-            "92": 2495991808.0,
-            "93": 2495991808.0,
-            "94": 2495991808.0,
-            "95": 2495991808.0,
-            "96": 2495991808.0,
-            "97": 2495991808.0,
-            "98": 2495991808.0,
-            "99": 2495991808.0,
-            "100": 2495991808.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 12.11313,
-            "2": 0.4805,
-            "3": 0.36965,
-            "4": 0.36695,
-            "5": 0.31705,
-            "6": 0.31275,
-            "7": 0.31299,
-            "8": 0.29866,
-            "9": 0.28961,
-            "10": 0.28859,
-            "11": 0.29067,
-            "12": 0.29044,
-            "13": 0.29806,
-            "14": 0.29287,
-            "15": 0.29391,
-            "16": 0.3175,
-            "17": 0.28363,
-            "18": 0.2818,
-            "19": 0.29347,
-            "20": 0.28931,
-            "21": 0.29103,
-            "22": 0.28444,
-            "23": 0.28907,
-            "24": 0.27608,
-            "25": 0.28277,
-            "26": 0.28656,
-            "27": 0.28921,
-            "28": 0.30243,
-            "29": 0.30435,
-            "30": 0.31231,
-            "31": 0.30439,
-            "32": 0.31412,
-            "33": 0.28887,
-            "34": 0.29613,
-            "35": 0.29738,
-            "36": 0.29754,
-            "37": 0.3019,
-            "38": 0.2933,
-            "39": 0.2944,
-            "40": 0.29283,
-            "41": 0.29592,
-            "42": 0.29673,
-            "43": 0.29319,
-            "44": 0.30127,
-            "45": 0.29921,
-            "46": 0.29904,
-            "47": 0.28795,
-            "48": 0.29918,
-            "49": 0.28711,
-            "50": 0.29645,
-            "51": 0.28777,
-            "52": 0.29536,
-            "53": 0.2847,
-            "54": 0.28286,
-            "55": 0.2874,
-            "56": 0.28699,
-            "57": 0.28614,
-            "58": 0.29825,
-            "59": 0.28363,
-            "60": 0.29423,
-            "61": 0.29226,
-            "62": 0.2896,
-            "63": 0.28065,
-            "64": 0.29533,
-            "65": 0.29842,
-            "66": 0.28487,
-            "67": 0.28419,
-            "68": 0.29474,
-            "69": 0.28383,
-            "70": 0.28417,
-            "71": 0.29253,
-            "72": 0.28737,
-            "73": 0.27923,
-            "74": 0.28728,
-            "75": 0.29383,
-            "76": 0.28157,
-            "77": 0.64771,
-            "78": 0.29148,
-            "79": 0.28742,
-            "80": 0.29245,
-            "81": 0.28827,
-            "82": 0.28368,
-            "83": 0.28963,
-            "84": 0.29234,
-            "85": 0.28183,
-            "86": 0.28337,
-            "87": 0.27879,
-            "88": 0.28388,
-            "89": 0.28309,
-            "90": 0.28852,
-            "91": 0.28254,
-            "92": 0.28375,
-            "93": 0.28633,
-            "94": 0.28567,
-            "95": 0.28235,
-            "96": 0.28513,
-            "97": 0.27951,
-            "98": 0.27851,
-            "99": 0.28336,
-            "100": 0.27744
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index fe8428055c3..00000000000
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.82922,
-            "2": 10.84163,
-            "3": 10.84245,
-            "4": 10.82,
-            "5": 10.85652,
-            "6": 10.86906,
-            "7": 10.83778,
-            "8": 10.84312,
-            "9": 10.84423,
-            "10": 10.79298,
-            "11": 10.86697,
-            "12": 10.86875,
-            "13": 10.86207,
-            "14": 10.86919,
-            "15": 10.8067,
-            "16": 10.8057,
-            "17": 10.77686,
-            "18": 10.79541,
-            "19": 10.78384,
-            "20": 10.72654,
-            "21": 10.69491,
-            "22": 10.54462,
-            "23": 10.6993,
-            "24": 10.58151,
-            "25": 10.53282,
-            "26": 10.58817,
-            "27": 10.601,
-            "28": 10.57563,
-            "29": 10.58022,
-            "30": 10.35802,
-            "31": 10.08769,
-            "32": 10.44466,
-            "33": 10.4477,
-            "34": 10.18704,
-            "35": 10.24483,
-            "36": 10.19713,
-            "37": 10.32294,
-            "38": 10.17101,
-            "39": 10.37026,
-            "40": 10.05533,
-            "41": 10.09491,
-            "42": 10.17971,
-            "43": 9.78263,
-            "44": 9.91346,
-            "45": 9.77951,
-            "46": 9.75648,
-            "47": 10.09647,
-            "48": 9.80391,
-            "49": 9.46649,
-            "50": 9.86874,
-            "51": 9.79428,
-            "52": 9.68303,
-            "53": 10.03314,
-            "54": 9.9113,
-            "55": 9.82995,
-            "56": 9.57839,
-            "57": 9.42377,
-            "58": 9.80549,
-            "59": 9.53292,
-            "60": 9.449,
-            "61": 9.65293,
-            "62": 9.95672,
-            "63": 9.33775,
-            "64": 9.74194,
-            "65": 8.89366,
-            "66": 9.67317,
-            "67": 9.33002,
-            "68": 9.76517,
-            "69": 9.76336,
-            "70": 9.71127,
-            "71": 9.59511,
-            "72": 9.54797,
-            "73": 9.47124,
-            "74": 8.89297,
-            "75": 9.39451,
-            "76": 9.04721,
-            "77": 10.04318,
-            "78": 9.70313,
-            "79": 9.35169,
-            "80": 9.38198,
-            "81": 9.45146,
-            "82": 9.67546,
-            "83": 9.27658,
-            "84": 9.39241,
-            "85": 9.58333,
-            "86": 9.04518,
-            "87": 9.56487,
-            "88": 9.72459,
-            "89": 9.57019,
-            "90": 9.79944,
-            "91": 9.30737,
-            "92": 9.3313,
-            "93": 9.04109,
-            "94": 8.80259,
-            "95": 9.50213,
-            "96": 9.5021,
-            "97": 9.28183,
-            "98": 9.64883,
-            "99": 8.8594,
-            "100": 9.37131
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 27245.0,
-            "2": 28958.0,
-            "3": 29464.0,
-            "4": 28046.0,
-            "5": 31369.0,
-            "6": 33287.0,
-            "7": 31200.0,
-            "8": 26921.0,
-            "9": 30008.0,
-            "10": 25870.0,
-            "11": 33681.0,
-            "12": 30344.0,
-            "13": 32737.0,
-            "14": 33315.0,
-            "15": 29830.0,
-            "16": 32475.0,
-            "17": 30747.0,
-            "18": 30381.0,
-            "19": 31032.0,
-            "20": 28243.0,
-            "21": 29224.0,
-            "22": 27340.0,
-            "23": 34119.0,
-            "24": 29049.0,
-            "25": 27636.0,
-            "26": 30662.0,
-            "27": 32009.0,
-            "28": 33355.0,
-            "29": 34714.0,
-            "30": 30387.0,
-            "31": 28212.0,
-            "32": 33411.0,
-            "33": 34696.0,
-            "34": 30053.0,
-            "35": 31488.0,
-            "36": 32943.0,
-            "37": 35829.0,
-            "38": 33740.0,
-            "39": 37632.0,
-            "40": 34779.0,
-            "41": 33958.0,
-            "42": 36396.0,
-            "43": 34088.0,
-            "44": 34090.0,
-            "45": 35158.0,
-            "46": 36174.0,
-            "47": 39772.0,
-            "48": 36516.0,
-            "49": 36733.0,
-            "50": 38234.0,
-            "51": 38608.0,
-            "52": 37030.0,
-            "53": 42442.0,
-            "54": 40944.0,
-            "55": 37133.0,
-            "56": 41001.0,
-            "57": 37524.0,
-            "58": 42317.0,
-            "59": 40804.0,
-            "60": 40450.0,
-            "61": 41478.0,
-            "62": 39766.0,
-            "63": 37941.0,
-            "64": 42197.0,
-            "65": 40947.0,
-            "66": 44094.0,
-            "67": 41958.0,
-            "68": 40060.0,
-            "69": 42189.0,
-            "70": 43436.0,
-            "71": 42748.0,
-            "72": 44280.0,
-            "73": 47478.0,
-            "74": 41456.0,
-            "75": 39925.0,
-            "76": 43490.0,
-            "77": 45636.0,
-            "78": 2141470.0,
-            "79": 46055.0,
-            "80": 51863.0,
-            "81": 151341.0,
-            "82": 49835.0,
-            "83": 143360.0,
-            "84": 2141546.0,
-            "85": 2145177.0,
-            "86": 132114.0,
-            "87": 2147022.0,
-            "88": 59899.0,
-            "89": 162883.0,
-            "90": 51330.0,
-            "91": 2141901.0,
-            "92": 44946.0,
-            "93": 138194.0,
-            "94": 2145772.0,
-            "95": 45247.0,
-            "96": 135045.0,
-            "97": 53170.0,
-            "98": 168576.0,
-            "99": 2141797.0,
-            "100": 163741.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 787516416.0,
-            "2": 787540992.0,
-            "3": 787524096.0,
-            "4": 787512320.0,
-            "5": 787547136.0,
-            "6": 787537920.0,
-            "7": 787512832.0,
-            "8": 787524608.0,
-            "9": 787528192.0,
-            "10": 787505152.0,
-            "11": 787522048.0,
-            "12": 787520000.0,
-            "13": 787529728.0,
-            "14": 787529216.0,
-            "15": 787504128.0,
-            "16": 787513344.0,
-            "17": 787503104.0,
-            "18": 787489280.0,
-            "19": 787514880.0,
-            "20": 787505152.0,
-            "21": 787479552.0,
-            "22": 787486208.0,
-            "23": 787478528.0,
-            "24": 787486208.0,
-            "25": 787451392.0,
-            "26": 787482112.0,
-            "27": 787470848.0,
-            "28": 787450368.0,
-            "29": 787458048.0,
-            "30": 787435008.0,
-            "31": 787406848.0,
-            "32": 787424256.0,
-            "33": 787435520.0,
-            "34": 787426304.0,
-            "35": 787418624.0,
-            "36": 787436544.0,
-            "37": 787428352.0,
-            "38": 787436544.0,
-            "39": 787417600.0,
-            "40": 787415040.0,
-            "41": 787405824.0,
-            "42": 787415040.0,
-            "43": 787367936.0,
-            "44": 787392512.0,
-            "45": 787399680.0,
-            "46": 787355136.0,
-            "47": 787411456.0,
-            "48": 787354112.0,
-            "49": 787374080.0,
-            "50": 787389440.0,
-            "51": 787375616.0,
-            "52": 787383808.0,
-            "53": 787379712.0,
-            "54": 787384832.0,
-            "55": 787388928.0,
-            "56": 787388928.0,
-            "57": 787351040.0,
-            "58": 787382784.0,
-            "59": 787374080.0,
-            "60": 787395072.0,
-            "61": 787405312.0,
-            "62": 787405824.0,
-            "63": 787373056.0,
-            "64": 787388928.0,
-            "65": 787351552.0,
-            "66": 787386880.0,
-            "67": 787392000.0,
-            "68": 787399168.0,
-            "69": 787383296.0,
-            "70": 787393024.0,
-            "71": 787406848.0,
-            "72": 787400704.0,
-            "73": 787401216.0,
-            "74": 787403264.0,
-            "75": 787442688.0,
-            "76": 787444736.0,
-            "77": 787445760.0,
-            "78": 787395072.0,
-            "79": 787430400.0,
-            "80": 787410432.0,
-            "81": 787412992.0,
-            "82": 787427840.0,
-            "83": 787428864.0,
-            "84": 787412480.0,
-            "85": 787412480.0,
-            "86": 787394560.0,
-            "87": 787452928.0,
-            "88": 787414528.0,
-            "89": 787404800.0,
-            "90": 787446784.0,
-            "91": 787446272.0,
-            "92": 787446784.0,
-            "93": 787430400.0,
-            "94": 787440128.0,
-            "95": 787450368.0,
-            "96": 787454976.0,
-            "97": 787427328.0,
-            "98": 787475968.0,
-            "99": 787419136.0,
-            "100": 787438592.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 2579673088.0,
-            "2": 2590714880.0,
-            "3": 2590714880.0,
-            "4": 2590714880.0,
-            "5": 2596039680.0,
-            "6": 2596039680.0,
-            "7": 2596039680.0,
-            "8": 2596039680.0,
-            "9": 2596039680.0,
-            "10": 2596039680.0,
-            "11": 2596039680.0,
-            "12": 2596039680.0,
-            "13": 2596039680.0,
-            "14": 2596039680.0,
-            "15": 2596039680.0,
-            "16": 2596039680.0,
-            "17": 2596039680.0,
-            "18": 2596039680.0,
-            "19": 2596039680.0,
-            "20": 2596039680.0,
-            "21": 2596039680.0,
-            "22": 2596039680.0,
-            "23": 2596039680.0,
-            "24": 2596039680.0,
-            "25": 2596039680.0,
-            "26": 2596039680.0,
-            "27": 2596039680.0,
-            "28": 2596039680.0,
-            "29": 2596039680.0,
-            "30": 2596039680.0,
-            "31": 2596039680.0,
-            "32": 2596039680.0,
-            "33": 2596039680.0,
-            "34": 2596039680.0,
-            "35": 2596039680.0,
-            "36": 2596039680.0,
-            "37": 2596039680.0,
-            "38": 2596039680.0,
-            "39": 2596039680.0,
-            "40": 2596039680.0,
-            "41": 2596039680.0,
-            "42": 2596039680.0,
-            "43": 2596039680.0,
-            "44": 2596039680.0,
-            "45": 2596039680.0,
-            "46": 2596039680.0,
-            "47": 2596039680.0,
-            "48": 2596039680.0,
-            "49": 2596039680.0,
-            "50": 2596039680.0,
-            "51": 2596039680.0,
-            "52": 2596039680.0,
-            "53": 2596039680.0,
-            "54": 2596039680.0,
-            "55": 2596039680.0,
-            "56": 2596039680.0,
-            "57": 2596039680.0,
-            "58": 2596039680.0,
-            "59": 2596039680.0,
-            "60": 2596039680.0,
-            "61": 2596039680.0,
-            "62": 2596039680.0,
-            "63": 2596039680.0,
-            "64": 2596039680.0,
-            "65": 2596039680.0,
-            "66": 2596039680.0,
-            "67": 2596039680.0,
-            "68": 2596039680.0,
-            "69": 2596039680.0,
-            "70": 2596039680.0,
-            "71": 2596039680.0,
-            "72": 2596039680.0,
-            "73": 2596039680.0,
-            "74": 2596039680.0,
-            "75": 2596039680.0,
-            "76": 2596039680.0,
-            "77": 2596039680.0,
-            "78": 2596039680.0,
-            "79": 2596039680.0,
-            "80": 2596039680.0,
-            "81": 2596039680.0,
-            "82": 2596039680.0,
-            "83": 2596039680.0,
-            "84": 2596039680.0,
-            "85": 2596039680.0,
-            "86": 2596039680.0,
-            "87": 2596039680.0,
-            "88": 2596039680.0,
-            "89": 2596039680.0,
-            "90": 2596039680.0,
-            "91": 2596039680.0,
-            "92": 2596039680.0,
-            "93": 2596039680.0,
-            "94": 2596039680.0,
-            "95": 2596039680.0,
-            "96": 2596039680.0,
-            "97": 2596039680.0,
-            "98": 2596039680.0,
-            "99": 2596039680.0,
-            "100": 2596039680.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 11.07685,
-            "2": 0.45645,
-            "3": 0.41285,
-            "4": 0.40148,
-            "5": 0.35405,
-            "6": 0.35535,
-            "7": 0.35437,
-            "8": 0.32989,
-            "9": 0.32686,
-            "10": 0.32734,
-            "11": 0.32243,
-            "12": 0.32634,
-            "13": 0.33475,
-            "14": 0.33636,
-            "15": 0.33838,
-            "16": 0.32741,
-            "17": 0.33364,
-            "18": 0.33147,
-            "19": 0.33328,
-            "20": 0.33281,
-            "21": 0.33587,
-            "22": 0.3271,
-            "23": 0.33537,
-            "24": 0.32125,
-            "25": 0.33225,
-            "26": 0.33085,
-            "27": 0.3387,
-            "28": 0.34305,
-            "29": 0.34938,
-            "30": 0.34814,
-            "31": 0.35223,
-            "32": 0.36489,
-            "33": 0.33408,
-            "34": 0.34688,
-            "35": 0.33945,
-            "36": 0.34851,
-            "37": 0.3471,
-            "38": 0.3338,
-            "39": 0.3395,
-            "40": 0.3414,
-            "41": 0.34662,
-            "42": 0.34093,
-            "43": 0.34012,
-            "44": 0.34423,
-            "45": 0.34205,
-            "46": 0.34681,
-            "47": 0.33694,
-            "48": 0.34136,
-            "49": 0.34255,
-            "50": 0.34412,
-            "51": 0.32987,
-            "52": 0.34834,
-            "53": 0.34028,
-            "54": 0.33718,
-            "55": 0.33563,
-            "56": 0.3372,
-            "57": 0.33927,
-            "58": 0.34337,
-            "59": 0.34056,
-            "60": 0.34048,
-            "61": 0.33816,
-            "62": 0.3357,
-            "63": 0.3365,
-            "64": 0.33906,
-            "65": 0.34134,
-            "66": 0.34125,
-            "67": 0.33859,
-            "68": 0.34726,
-            "69": 0.3385,
-            "70": 0.34428,
-            "71": 0.34339,
-            "72": 0.33789,
-            "73": 0.33975,
-            "74": 0.34759,
-            "75": 0.33612,
-            "76": 0.33913,
-            "77": 0.34664,
-            "78": 0.33673,
-            "79": 0.33903,
-            "80": 0.33519,
-            "81": 0.33434,
-            "82": 0.34003,
-            "83": 0.33784,
-            "84": 0.33367,
-            "85": 0.33382,
-            "86": 0.34029,
-            "87": 0.33537,
-            "88": 0.33703,
-            "89": 0.33416,
-            "90": 0.33113,
-            "91": 0.33369,
-            "92": 0.33443,
-            "93": 0.33841,
-            "94": 0.339,
-            "95": 0.33271,
-            "96": 0.33211,
-            "97": 0.33492,
-            "98": 0.33877,
-            "99": 0.33548,
-            "100": 0.33195
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_h100.json
index cd1596da3bc..c9eee5d9463 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_h100.json
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 18.1916,
-            "2": 0.59351,
-            "3": 0.53789,
-            "4": 0.55618,
-            "5": 0.51747,
-            "6": 0.51798,
-            "7": 0.53735,
-            "8": 0.51847,
-            "9": 0.51772,
-            "10": 0.51103,
-            "11": 0.51385,
-            "12": 0.50834,
-            "13": 0.51586,
-            "14": 0.50721,
-            "15": 0.53294,
-            "16": 0.51593,
-            "17": 0.51388,
-            "18": 0.51464,
-            "19": 0.50827,
-            "20": 0.50952,
-            "21": 0.50189,
-            "22": 0.50928,
-            "23": 0.50324,
-            "24": 0.50354,
-            "25": 0.50213,
-            "26": 0.49708,
-            "27": 0.49953,
-            "28": 0.50373,
-            "29": 0.50455,
-            "30": 0.50305,
-            "31": 0.50567,
-            "32": 0.50905,
-            "33": 0.50325,
-            "34": 0.51203,
-            "35": 0.52783,
-            "36": 0.51023,
-            "37": 0.50726,
-            "38": 0.52285,
-            "39": 0.50728,
-            "40": 0.52086,
-            "41": 0.51671,
-            "42": 0.51607,
-            "43": 0.51296,
-            "44": 0.51003,
-            "45": 0.51106,
-            "46": 0.53309,
-            "47": 0.52738,
-            "48": 0.5128,
-            "49": 0.53044,
-            "50": 0.50994
+            "1": 17.75731,
+            "2": 0.59137,
+            "3": 0.52847,
+            "4": 0.55398,
+            "5": 0.51736,
+            "6": 0.51707,
+            "7": 0.52895,
+            "8": 0.51861,
+            "9": 0.5181,
+            "10": 0.51717,
+            "11": 0.51445,
+            "12": 0.51129,
+            "13": 0.51494,
+            "14": 0.51037,
+            "15": 0.51828,
+            "16": 0.50983,
+            "17": 0.51156,
+            "18": 0.51029,
+            "19": 0.51087,
+            "20": 0.51452,
+            "21": 0.5039,
+            "22": 0.51296,
+            "23": 0.50822,
+            "24": 0.51693,
+            "25": 0.51087,
+            "26": 0.51188,
+            "27": 0.51138,
+            "28": 0.51374,
+            "29": 0.50808,
+            "30": 0.50936,
+            "31": 0.51301,
+            "32": 0.5132,
+            "33": 0.51,
+            "34": 0.51133,
+            "35": 0.51556,
+            "36": 0.51397,
+            "37": 0.51183,
+            "38": 0.51721,
+            "39": 0.50468,
+            "40": 0.50915,
+            "41": 0.51802,
+            "42": 0.51064,
+            "43": 0.51335,
+            "44": 0.50717,
+            "45": 0.51189,
+            "46": 0.52735,
+            "47": 0.52015,
+            "48": 0.50421,
+            "49": 0.5285,
+            "50": 0.50368
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index c922ef3f273..00000000000
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.81746,
-            "2": 10.82149,
-            "3": 10.82234,
-            "4": 10.79883,
-            "5": 10.84067,
-            "6": 10.85636,
-            "7": 10.81775,
-            "8": 10.81498,
-            "9": 10.83664,
-            "10": 10.7822,
-            "11": 10.85151,
-            "12": 10.84335,
-            "13": 10.85001,
-            "14": 10.87346,
-            "15": 10.80974,
-            "16": 10.80359,
-            "17": 10.75702,
-            "18": 10.80691,
-            "19": 10.78689,
-            "20": 10.73095,
-            "21": 10.70872,
-            "22": 10.57886,
-            "23": 10.71772,
-            "24": 10.63253,
-            "25": 10.57332,
-            "26": 10.62323,
-            "27": 10.63892,
-            "28": 10.60509,
-            "29": 10.61796,
-            "30": 10.42067,
-            "31": 10.18074,
-            "32": 10.50619,
-            "33": 10.50937,
-            "34": 10.27626,
-            "35": 10.3249,
-            "36": 10.29423,
-            "37": 10.40006,
-            "38": 10.26099,
-            "39": 10.44197,
-            "40": 10.1644,
-            "41": 10.2004,
-            "42": 10.26981,
-            "43": 9.93054,
-            "44": 10.04184,
-            "45": 9.9288,
-            "46": 9.89638,
-            "47": 10.18471,
-            "48": 9.93119,
-            "49": 9.62763,
-            "50": 9.98402
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 5082.0,
-            "2": 5274.0,
-            "3": 5447.0,
-            "4": 5269.0,
-            "5": 6020.0,
-            "6": 6160.0,
-            "7": 5592.0,
-            "8": 5309.0,
-            "9": 5743.0,
-            "10": 4800.0,
-            "11": 6186.0,
-            "12": 5648.0,
-            "13": 6106.0,
-            "14": 6126.0,
-            "15": 5600.0,
-            "16": 5819.0,
-            "17": 5669.0,
-            "18": 5547.0,
-            "19": 5711.0,
-            "20": 5380.0,
-            "21": 5677.0,
-            "22": 5023.0,
-            "23": 6080.0,
-            "24": 5403.0,
-            "25": 5120.0,
-            "26": 5431.0,
-            "27": 5866.0,
-            "28": 6035.0,
-            "29": 6154.0,
-            "30": 5456.0,
-            "31": 4832.0,
-            "32": 5956.0,
-            "33": 6301.0,
-            "34": 5366.0,
-            "35": 5900.0,
-            "36": 5703.0,
-            "37": 6744.0,
-            "38": 6098.0,
-            "39": 6737.0,
-            "40": 5994.0,
-            "41": 6144.0,
-            "42": 6542.0,
-            "43": 5751.0,
-            "44": 5876.0,
-            "45": 5795.0,
-            "46": 6162.0,
-            "47": 6736.0,
-            "48": 6331.0,
-            "49": 6235.0,
-            "50": 6668.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 627718656.0,
-            "2": 627719168.0,
-            "3": 627719168.0,
-            "4": 627720704.0,
-            "5": 627718656.0,
-            "6": 627718656.0,
-            "7": 627718144.0,
-            "8": 627718144.0,
-            "9": 627718144.0,
-            "10": 627719168.0,
-            "11": 627719680.0,
-            "12": 627719168.0,
-            "13": 627719680.0,
-            "14": 627717120.0,
-            "15": 627720192.0,
-            "16": 627717632.0,
-            "17": 627718144.0,
-            "18": 627719680.0,
-            "19": 627719168.0,
-            "20": 627717120.0,
-            "21": 627718144.0,
-            "22": 627720192.0,
-            "23": 627720192.0,
-            "24": 627718144.0,
-            "25": 627718656.0,
-            "26": 627718144.0,
-            "27": 627717120.0,
-            "28": 627718656.0,
-            "29": 627717120.0,
-            "30": 627720192.0,
-            "31": 627715072.0,
-            "32": 627720192.0,
-            "33": 627717632.0,
-            "34": 627719168.0,
-            "35": 627716608.0,
-            "36": 627719168.0,
-            "37": 627718144.0,
-            "38": 627718656.0,
-            "39": 627715584.0,
-            "40": 627717632.0,
-            "41": 627714560.0,
-            "42": 627718144.0,
-            "43": 627713536.0,
-            "44": 627714048.0,
-            "45": 627719168.0,
-            "46": 627716096.0,
-            "47": 627717120.0,
-            "48": 627716608.0,
-            "49": 627715072.0,
-            "50": 627718144.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 870138880.0,
-            "2": 1099332096.0,
-            "3": 1099950080.0,
-            "4": 1102007296.0,
-            "5": 1102007296.0,
-            "6": 1102007296.0,
-            "7": 1102007296.0,
-            "8": 1102007296.0,
-            "9": 1102007296.0,
-            "10": 1102007296.0,
-            "11": 1102007296.0,
-            "12": 1102007296.0,
-            "13": 1103012352.0,
-            "14": 1103012352.0,
-            "15": 1103012352.0,
-            "16": 1103012352.0,
-            "17": 1103012352.0,
-            "18": 1103012352.0,
-            "19": 1103012352.0,
-            "20": 1103012352.0,
-            "21": 1103012352.0,
-            "22": 1103012352.0,
-            "23": 1103012352.0,
-            "24": 1103012352.0,
-            "25": 1103012352.0,
-            "26": 1103012352.0,
-            "27": 1103012352.0,
-            "28": 1103012352.0,
-            "29": 1103012352.0,
-            "30": 1103012352.0,
-            "31": 1103012352.0,
-            "32": 1103012352.0,
-            "33": 1103012352.0,
-            "34": 1103012352.0,
-            "35": 1103012352.0,
-            "36": 1103012352.0,
-            "37": 1103012352.0,
-            "38": 1103012352.0,
-            "39": 1103012352.0,
-            "40": 1103012352.0,
-            "41": 1103012352.0,
-            "42": 1103012352.0,
-            "43": 1103012352.0,
-            "44": 1103012352.0,
-            "45": 1103012352.0,
-            "46": 1103012352.0,
-            "47": 1103012352.0,
-            "48": 1103012352.0,
-            "49": 1103012352.0,
-            "50": 1103012352.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 20.31176,
-            "2": 0.54582,
-            "3": 0.4713,
-            "4": 0.49552,
-            "5": 0.45024,
-            "6": 0.44845,
-            "7": 0.46159,
-            "8": 0.44727,
-            "9": 0.45224,
-            "10": 0.44611,
-            "11": 0.44928,
-            "12": 0.4393,
-            "13": 0.44861,
-            "14": 0.43419,
-            "15": 0.46035,
-            "16": 0.44467,
-            "17": 0.44969,
-            "18": 0.45329,
-            "19": 0.45261,
-            "20": 0.47266,
-            "21": 0.44362,
-            "22": 0.44618,
-            "23": 0.44658,
-            "24": 0.44334,
-            "25": 0.45084,
-            "26": 0.4522,
-            "27": 0.44323,
-            "28": 0.44959,
-            "29": 0.44013,
-            "30": 0.44198,
-            "31": 0.44974,
-            "32": 0.44838,
-            "33": 0.4388,
-            "34": 0.46145,
-            "35": 0.4454,
-            "36": 0.43557,
-            "37": 0.43704,
-            "38": 0.45184,
-            "39": 0.43707,
-            "40": 0.43729,
-            "41": 0.44791,
-            "42": 0.44386,
-            "43": 0.44641,
-            "44": 0.43881,
-            "45": 0.45139,
-            "46": 0.46177,
-            "47": 0.46449,
-            "48": 0.44551,
-            "49": 0.47013,
-            "50": 0.44517
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index c9eee5d9463..00000000000
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.81746,
-            "2": 10.82149,
-            "3": 10.82234,
-            "4": 10.79883,
-            "5": 10.84067,
-            "6": 10.85636,
-            "7": 10.81775,
-            "8": 10.81498,
-            "9": 10.83664,
-            "10": 10.7822,
-            "11": 10.85151,
-            "12": 10.84335,
-            "13": 10.85001,
-            "14": 10.87346,
-            "15": 10.80974,
-            "16": 10.80359,
-            "17": 10.75702,
-            "18": 10.80691,
-            "19": 10.78689,
-            "20": 10.73095,
-            "21": 10.70872,
-            "22": 10.57886,
-            "23": 10.71772,
-            "24": 10.63253,
-            "25": 10.57332,
-            "26": 10.62323,
-            "27": 10.63892,
-            "28": 10.60509,
-            "29": 10.61796,
-            "30": 10.42067,
-            "31": 10.18074,
-            "32": 10.50619,
-            "33": 10.50937,
-            "34": 10.27626,
-            "35": 10.3249,
-            "36": 10.29423,
-            "37": 10.40006,
-            "38": 10.26099,
-            "39": 10.44197,
-            "40": 10.1644,
-            "41": 10.2004,
-            "42": 10.26981,
-            "43": 9.93054,
-            "44": 10.04184,
-            "45": 9.9288,
-            "46": 9.89638,
-            "47": 10.18471,
-            "48": 9.93119,
-            "49": 9.62763,
-            "50": 9.98402
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 5082.0,
-            "2": 5274.0,
-            "3": 5447.0,
-            "4": 5269.0,
-            "5": 6020.0,
-            "6": 6160.0,
-            "7": 5592.0,
-            "8": 5309.0,
-            "9": 5743.0,
-            "10": 4800.0,
-            "11": 6186.0,
-            "12": 5648.0,
-            "13": 6106.0,
-            "14": 6126.0,
-            "15": 5600.0,
-            "16": 5819.0,
-            "17": 5669.0,
-            "18": 5547.0,
-            "19": 5711.0,
-            "20": 5380.0,
-            "21": 5677.0,
-            "22": 5023.0,
-            "23": 6080.0,
-            "24": 5403.0,
-            "25": 5120.0,
-            "26": 5431.0,
-            "27": 5866.0,
-            "28": 6035.0,
-            "29": 6154.0,
-            "30": 5456.0,
-            "31": 4832.0,
-            "32": 5956.0,
-            "33": 6301.0,
-            "34": 5366.0,
-            "35": 5900.0,
-            "36": 5703.0,
-            "37": 6744.0,
-            "38": 6098.0,
-            "39": 6737.0,
-            "40": 5994.0,
-            "41": 6144.0,
-            "42": 6542.0,
-            "43": 5751.0,
-            "44": 5876.0,
-            "45": 5795.0,
-            "46": 6162.0,
-            "47": 6736.0,
-            "48": 6331.0,
-            "49": 6235.0,
-            "50": 6668.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 627718656.0,
-            "2": 627719168.0,
-            "3": 627719168.0,
-            "4": 627720704.0,
-            "5": 627718656.0,
-            "6": 627718656.0,
-            "7": 627718144.0,
-            "8": 627718144.0,
-            "9": 627718144.0,
-            "10": 627719168.0,
-            "11": 627719680.0,
-            "12": 627719168.0,
-            "13": 627719680.0,
-            "14": 627717120.0,
-            "15": 627720192.0,
-            "16": 627717632.0,
-            "17": 627718144.0,
-            "18": 627719680.0,
-            "19": 627719168.0,
-            "20": 627717120.0,
-            "21": 627718144.0,
-            "22": 627720192.0,
-            "23": 627720192.0,
-            "24": 627718144.0,
-            "25": 627718656.0,
-            "26": 627718144.0,
-            "27": 627717120.0,
-            "28": 627718656.0,
-            "29": 627717120.0,
-            "30": 627720192.0,
-            "31": 627715072.0,
-            "32": 627720192.0,
-            "33": 627717632.0,
-            "34": 627719168.0,
-            "35": 627716608.0,
-            "36": 627719168.0,
-            "37": 627718144.0,
-            "38": 627718656.0,
-            "39": 627715584.0,
-            "40": 627717632.0,
-            "41": 627714560.0,
-            "42": 627718144.0,
-            "43": 627713536.0,
-            "44": 627714048.0,
-            "45": 627719168.0,
-            "46": 627716096.0,
-            "47": 627717120.0,
-            "48": 627716608.0,
-            "49": 627715072.0,
-            "50": 627718144.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 870138880.0,
-            "2": 1099332096.0,
-            "3": 1099950080.0,
-            "4": 1102007296.0,
-            "5": 1102007296.0,
-            "6": 1102007296.0,
-            "7": 1102007296.0,
-            "8": 1102007296.0,
-            "9": 1102007296.0,
-            "10": 1102007296.0,
-            "11": 1102007296.0,
-            "12": 1102007296.0,
-            "13": 1103012352.0,
-            "14": 1103012352.0,
-            "15": 1103012352.0,
-            "16": 1103012352.0,
-            "17": 1103012352.0,
-            "18": 1103012352.0,
-            "19": 1103012352.0,
-            "20": 1103012352.0,
-            "21": 1103012352.0,
-            "22": 1103012352.0,
-            "23": 1103012352.0,
-            "24": 1103012352.0,
-            "25": 1103012352.0,
-            "26": 1103012352.0,
-            "27": 1103012352.0,
-            "28": 1103012352.0,
-            "29": 1103012352.0,
-            "30": 1103012352.0,
-            "31": 1103012352.0,
-            "32": 1103012352.0,
-            "33": 1103012352.0,
-            "34": 1103012352.0,
-            "35": 1103012352.0,
-            "36": 1103012352.0,
-            "37": 1103012352.0,
-            "38": 1103012352.0,
-            "39": 1103012352.0,
-            "40": 1103012352.0,
-            "41": 1103012352.0,
-            "42": 1103012352.0,
-            "43": 1103012352.0,
-            "44": 1103012352.0,
-            "45": 1103012352.0,
-            "46": 1103012352.0,
-            "47": 1103012352.0,
-            "48": 1103012352.0,
-            "49": 1103012352.0,
-            "50": 1103012352.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 17.75731,
-            "2": 0.59137,
-            "3": 0.52847,
-            "4": 0.55398,
-            "5": 0.51736,
-            "6": 0.51707,
-            "7": 0.52895,
-            "8": 0.51861,
-            "9": 0.5181,
-            "10": 0.51717,
-            "11": 0.51445,
-            "12": 0.51129,
-            "13": 0.51494,
-            "14": 0.51037,
-            "15": 0.51828,
-            "16": 0.50983,
-            "17": 0.51156,
-            "18": 0.51029,
-            "19": 0.51087,
-            "20": 0.51452,
-            "21": 0.5039,
-            "22": 0.51296,
-            "23": 0.50822,
-            "24": 0.51693,
-            "25": 0.51087,
-            "26": 0.51188,
-            "27": 0.51138,
-            "28": 0.51374,
-            "29": 0.50808,
-            "30": 0.50936,
-            "31": 0.51301,
-            "32": 0.5132,
-            "33": 0.51,
-            "34": 0.51133,
-            "35": 0.51556,
-            "36": 0.51397,
-            "37": 0.51183,
-            "38": 0.51721,
-            "39": 0.50468,
-            "40": 0.50915,
-            "41": 0.51802,
-            "42": 0.51064,
-            "43": 0.51335,
-            "44": 0.50717,
-            "45": 0.51189,
-            "46": 0.52735,
-            "47": 0.52015,
-            "48": 0.50421,
-            "49": 0.5285,
-            "50": 0.50368
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgx_h100.json
index 4918ee299d7..25a8b5ae572 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgx_h100.json
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 17.91075,
-            "2": 0.58262,
-            "3": 0.51891,
-            "4": 0.5535,
-            "5": 0.50364,
-            "6": 0.50993,
-            "7": 0.51644,
-            "8": 0.5062,
-            "9": 0.50479,
-            "10": 0.50352,
-            "11": 0.50142,
-            "12": 0.50105,
-            "13": 0.50984,
-            "14": 0.49899,
-            "15": 0.5144,
-            "16": 0.49725,
-            "17": 0.50222,
-            "18": 0.50011,
-            "19": 0.50584,
-            "20": 0.502,
-            "21": 0.49935,
-            "22": 0.51276,
-            "23": 0.50351,
-            "24": 0.50235,
-            "25": 0.49997,
-            "26": 0.50146,
-            "27": 0.49644,
-            "28": 0.49951,
-            "29": 0.49788,
-            "30": 0.50224,
-            "31": 0.50481,
-            "32": 0.50353,
-            "33": 0.50198,
-            "34": 0.50088,
-            "35": 0.50994,
-            "36": 0.49922,
-            "37": 0.49884,
-            "38": 0.51305,
-            "39": 0.49951,
-            "40": 0.49857,
-            "41": 0.5133,
-            "42": 0.50758,
-            "43": 0.51002,
-            "44": 0.50205,
-            "45": 0.51091,
-            "46": 0.52453,
-            "47": 0.52953,
-            "48": 0.50437,
-            "49": 0.52951,
-            "50": 0.50206
+            "1": 17.91902,
+            "2": 0.59117,
+            "3": 0.52614,
+            "4": 0.54746,
+            "5": 0.5056,
+            "6": 0.50649,
+            "7": 0.52305,
+            "8": 0.50853,
+            "9": 0.50644,
+            "10": 0.50303,
+            "11": 0.50387,
+            "12": 0.50249,
+            "13": 0.51153,
+            "14": 0.49861,
+            "15": 0.51318,
+            "16": 0.50066,
+            "17": 0.50888,
+            "18": 0.50788,
+            "19": 0.51533,
+            "20": 0.51425,
+            "21": 0.51111,
+            "22": 0.5116,
+            "23": 0.50626,
+            "24": 0.5049,
+            "25": 0.51101,
+            "26": 0.50993,
+            "27": 0.5073,
+            "28": 0.50949,
+            "29": 0.50784,
+            "30": 0.50783,
+            "31": 0.51255,
+            "32": 0.51065,
+            "33": 0.50731,
+            "34": 0.50768,
+            "35": 0.51749,
+            "36": 0.50656,
+            "37": 0.51012,
+            "38": 0.51668,
+            "39": 0.50475,
+            "40": 0.50784,
+            "41": 0.51405,
+            "42": 0.51014,
+            "43": 0.51186,
+            "44": 0.50532,
+            "45": 0.51211,
+            "46": 0.52864,
+            "47": 0.52545,
+            "48": 0.50927,
+            "49": 0.52883,
+            "50": 0.50373
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index f80469c23a2..00000000000
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.81746,
-            "2": 10.82149,
-            "3": 10.82234,
-            "4": 10.79883,
-            "5": 10.84067,
-            "6": 10.85636,
-            "7": 10.81775,
-            "8": 10.81498,
-            "9": 10.83664,
-            "10": 10.7822,
-            "11": 10.85151,
-            "12": 10.84335,
-            "13": 10.85001,
-            "14": 10.87346,
-            "15": 10.80974,
-            "16": 10.80359,
-            "17": 10.75702,
-            "18": 10.80691,
-            "19": 10.78689,
-            "20": 10.73095,
-            "21": 10.70872,
-            "22": 10.57886,
-            "23": 10.71772,
-            "24": 10.63253,
-            "25": 10.57332,
-            "26": 10.62323,
-            "27": 10.63892,
-            "28": 10.60509,
-            "29": 10.61796,
-            "30": 10.42067,
-            "31": 10.18074,
-            "32": 10.50619,
-            "33": 10.50937,
-            "34": 10.27626,
-            "35": 10.3249,
-            "36": 10.29423,
-            "37": 10.40006,
-            "38": 10.26099,
-            "39": 10.44197,
-            "40": 10.1644,
-            "41": 10.2004,
-            "42": 10.26981,
-            "43": 9.93054,
-            "44": 10.04184,
-            "45": 9.9288,
-            "46": 9.89638,
-            "47": 10.18471,
-            "48": 9.93119,
-            "49": 9.62763,
-            "50": 9.98402
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 5082.0,
-            "2": 5274.0,
-            "3": 5447.0,
-            "4": 5269.0,
-            "5": 6020.0,
-            "6": 6160.0,
-            "7": 5592.0,
-            "8": 5309.0,
-            "9": 5743.0,
-            "10": 4800.0,
-            "11": 6186.0,
-            "12": 5648.0,
-            "13": 6106.0,
-            "14": 6126.0,
-            "15": 5600.0,
-            "16": 5819.0,
-            "17": 5669.0,
-            "18": 5547.0,
-            "19": 5711.0,
-            "20": 5380.0,
-            "21": 5677.0,
-            "22": 5023.0,
-            "23": 6080.0,
-            "24": 5403.0,
-            "25": 5120.0,
-            "26": 5431.0,
-            "27": 5866.0,
-            "28": 6035.0,
-            "29": 6154.0,
-            "30": 5456.0,
-            "31": 4832.0,
-            "32": 5956.0,
-            "33": 6301.0,
-            "34": 5366.0,
-            "35": 5900.0,
-            "36": 5703.0,
-            "37": 6744.0,
-            "38": 6098.0,
-            "39": 6737.0,
-            "40": 5994.0,
-            "41": 6144.0,
-            "42": 6542.0,
-            "43": 5751.0,
-            "44": 5876.0,
-            "45": 5795.0,
-            "46": 6162.0,
-            "47": 6736.0,
-            "48": 6331.0,
-            "49": 6235.0,
-            "50": 6668.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 627718656.0,
-            "2": 627719168.0,
-            "3": 627719168.0,
-            "4": 627720704.0,
-            "5": 627718656.0,
-            "6": 627718656.0,
-            "7": 627718144.0,
-            "8": 627718144.0,
-            "9": 627718144.0,
-            "10": 627719168.0,
-            "11": 627719680.0,
-            "12": 627719168.0,
-            "13": 627719680.0,
-            "14": 627717120.0,
-            "15": 627720192.0,
-            "16": 627717632.0,
-            "17": 627718144.0,
-            "18": 627719680.0,
-            "19": 627719168.0,
-            "20": 627717120.0,
-            "21": 627718144.0,
-            "22": 627720192.0,
-            "23": 627720192.0,
-            "24": 627718144.0,
-            "25": 627718656.0,
-            "26": 627718144.0,
-            "27": 627717120.0,
-            "28": 627718656.0,
-            "29": 627717120.0,
-            "30": 627720192.0,
-            "31": 627715072.0,
-            "32": 627720192.0,
-            "33": 627717632.0,
-            "34": 627719168.0,
-            "35": 627716608.0,
-            "36": 627719168.0,
-            "37": 627718144.0,
-            "38": 627718656.0,
-            "39": 627715584.0,
-            "40": 627717632.0,
-            "41": 627714560.0,
-            "42": 627718144.0,
-            "43": 627713536.0,
-            "44": 627714048.0,
-            "45": 627719168.0,
-            "46": 627716096.0,
-            "47": 627717120.0,
-            "48": 627716608.0,
-            "49": 627715072.0,
-            "50": 627718144.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 870138880.0,
-            "2": 1099332096.0,
-            "3": 1099950080.0,
-            "4": 1102007296.0,
-            "5": 1102007296.0,
-            "6": 1102007296.0,
-            "7": 1102007296.0,
-            "8": 1102007296.0,
-            "9": 1102007296.0,
-            "10": 1102007296.0,
-            "11": 1102007296.0,
-            "12": 1102007296.0,
-            "13": 1103012352.0,
-            "14": 1103012352.0,
-            "15": 1103012352.0,
-            "16": 1103012352.0,
-            "17": 1103012352.0,
-            "18": 1103012352.0,
-            "19": 1103012352.0,
-            "20": 1103012352.0,
-            "21": 1103012352.0,
-            "22": 1103012352.0,
-            "23": 1103012352.0,
-            "24": 1103012352.0,
-            "25": 1103012352.0,
-            "26": 1103012352.0,
-            "27": 1103012352.0,
-            "28": 1103012352.0,
-            "29": 1103012352.0,
-            "30": 1103012352.0,
-            "31": 1103012352.0,
-            "32": 1103012352.0,
-            "33": 1103012352.0,
-            "34": 1103012352.0,
-            "35": 1103012352.0,
-            "36": 1103012352.0,
-            "37": 1103012352.0,
-            "38": 1103012352.0,
-            "39": 1103012352.0,
-            "40": 1103012352.0,
-            "41": 1103012352.0,
-            "42": 1103012352.0,
-            "43": 1103012352.0,
-            "44": 1103012352.0,
-            "45": 1103012352.0,
-            "46": 1103012352.0,
-            "47": 1103012352.0,
-            "48": 1103012352.0,
-            "49": 1103012352.0,
-            "50": 1103012352.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 19.72199,
-            "2": 0.55482,
-            "3": 0.46042,
-            "4": 0.48082,
-            "5": 0.43967,
-            "6": 0.44947,
-            "7": 0.44996,
-            "8": 0.44231,
-            "9": 0.44422,
-            "10": 0.44437,
-            "11": 0.44012,
-            "12": 0.43933,
-            "13": 0.44783,
-            "14": 0.43652,
-            "15": 0.44961,
-            "16": 0.43438,
-            "17": 0.44393,
-            "18": 0.43947,
-            "19": 0.44737,
-            "20": 0.44146,
-            "21": 0.43755,
-            "22": 0.44263,
-            "23": 0.43321,
-            "24": 0.43572,
-            "25": 0.43146,
-            "26": 0.43427,
-            "27": 0.43127,
-            "28": 0.43972,
-            "29": 0.43162,
-            "30": 0.51076,
-            "31": 0.4451,
-            "32": 0.4416,
-            "33": 0.45169,
-            "34": 0.43371,
-            "35": 0.44399,
-            "36": 0.42875,
-            "37": 0.44051,
-            "38": 0.45464,
-            "39": 0.43269,
-            "40": 0.43351,
-            "41": 0.4407,
-            "42": 0.4495,
-            "43": 0.44929,
-            "44": 0.44083,
-            "45": 0.45508,
-            "46": 0.46229,
-            "47": 0.4728,
-            "48": 0.43019,
-            "49": 0.45756,
-            "50": 0.43145
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index 25a8b5ae572..00000000000
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.81746,
-            "2": 10.82149,
-            "3": 10.82234,
-            "4": 10.79883,
-            "5": 10.84067,
-            "6": 10.85636,
-            "7": 10.81775,
-            "8": 10.81498,
-            "9": 10.83664,
-            "10": 10.7822,
-            "11": 10.85151,
-            "12": 10.84335,
-            "13": 10.85001,
-            "14": 10.87346,
-            "15": 10.80974,
-            "16": 10.80359,
-            "17": 10.75702,
-            "18": 10.80691,
-            "19": 10.78689,
-            "20": 10.73095,
-            "21": 10.70872,
-            "22": 10.57886,
-            "23": 10.71772,
-            "24": 10.63253,
-            "25": 10.57332,
-            "26": 10.62323,
-            "27": 10.63892,
-            "28": 10.60509,
-            "29": 10.61796,
-            "30": 10.42067,
-            "31": 10.18074,
-            "32": 10.50619,
-            "33": 10.50937,
-            "34": 10.27626,
-            "35": 10.3249,
-            "36": 10.29423,
-            "37": 10.40006,
-            "38": 10.26099,
-            "39": 10.44197,
-            "40": 10.1644,
-            "41": 10.2004,
-            "42": 10.26981,
-            "43": 9.93054,
-            "44": 10.04184,
-            "45": 9.9288,
-            "46": 9.89638,
-            "47": 10.18471,
-            "48": 9.93119,
-            "49": 9.62763,
-            "50": 9.98402
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 5082.0,
-            "2": 5274.0,
-            "3": 5447.0,
-            "4": 5269.0,
-            "5": 6020.0,
-            "6": 6160.0,
-            "7": 5592.0,
-            "8": 5309.0,
-            "9": 5743.0,
-            "10": 4800.0,
-            "11": 6186.0,
-            "12": 5648.0,
-            "13": 6106.0,
-            "14": 6126.0,
-            "15": 5600.0,
-            "16": 5819.0,
-            "17": 5669.0,
-            "18": 5547.0,
-            "19": 5711.0,
-            "20": 5380.0,
-            "21": 5677.0,
-            "22": 5023.0,
-            "23": 6080.0,
-            "24": 5403.0,
-            "25": 5120.0,
-            "26": 5431.0,
-            "27": 5866.0,
-            "28": 6035.0,
-            "29": 6154.0,
-            "30": 5456.0,
-            "31": 4832.0,
-            "32": 5956.0,
-            "33": 6301.0,
-            "34": 5366.0,
-            "35": 5900.0,
-            "36": 5703.0,
-            "37": 6744.0,
-            "38": 6098.0,
-            "39": 6737.0,
-            "40": 5994.0,
-            "41": 6144.0,
-            "42": 6542.0,
-            "43": 5751.0,
-            "44": 5876.0,
-            "45": 5795.0,
-            "46": 6162.0,
-            "47": 6736.0,
-            "48": 6331.0,
-            "49": 6235.0,
-            "50": 6668.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 627718656.0,
-            "2": 627719168.0,
-            "3": 627719168.0,
-            "4": 627720704.0,
-            "5": 627718656.0,
-            "6": 627718656.0,
-            "7": 627718144.0,
-            "8": 627718144.0,
-            "9": 627718144.0,
-            "10": 627719168.0,
-            "11": 627719680.0,
-            "12": 627719168.0,
-            "13": 627719680.0,
-            "14": 627717120.0,
-            "15": 627720192.0,
-            "16": 627717632.0,
-            "17": 627718144.0,
-            "18": 627719680.0,
-            "19": 627719168.0,
-            "20": 627717120.0,
-            "21": 627718144.0,
-            "22": 627720192.0,
-            "23": 627720192.0,
-            "24": 627718144.0,
-            "25": 627718656.0,
-            "26": 627718144.0,
-            "27": 627717120.0,
-            "28": 627718656.0,
-            "29": 627717120.0,
-            "30": 627720192.0,
-            "31": 627715072.0,
-            "32": 627720192.0,
-            "33": 627717632.0,
-            "34": 627719168.0,
-            "35": 627716608.0,
-            "36": 627719168.0,
-            "37": 627718144.0,
-            "38": 627718656.0,
-            "39": 627715584.0,
-            "40": 627717632.0,
-            "41": 627714560.0,
-            "42": 627718144.0,
-            "43": 627713536.0,
-            "44": 627714048.0,
-            "45": 627719168.0,
-            "46": 627716096.0,
-            "47": 627717120.0,
-            "48": 627716608.0,
-            "49": 627715072.0,
-            "50": 627718144.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 870138880.0,
-            "2": 1099332096.0,
-            "3": 1099950080.0,
-            "4": 1102007296.0,
-            "5": 1102007296.0,
-            "6": 1102007296.0,
-            "7": 1102007296.0,
-            "8": 1102007296.0,
-            "9": 1102007296.0,
-            "10": 1102007296.0,
-            "11": 1102007296.0,
-            "12": 1102007296.0,
-            "13": 1103012352.0,
-            "14": 1103012352.0,
-            "15": 1103012352.0,
-            "16": 1103012352.0,
-            "17": 1103012352.0,
-            "18": 1103012352.0,
-            "19": 1103012352.0,
-            "20": 1103012352.0,
-            "21": 1103012352.0,
-            "22": 1103012352.0,
-            "23": 1103012352.0,
-            "24": 1103012352.0,
-            "25": 1103012352.0,
-            "26": 1103012352.0,
-            "27": 1103012352.0,
-            "28": 1103012352.0,
-            "29": 1103012352.0,
-            "30": 1103012352.0,
-            "31": 1103012352.0,
-            "32": 1103012352.0,
-            "33": 1103012352.0,
-            "34": 1103012352.0,
-            "35": 1103012352.0,
-            "36": 1103012352.0,
-            "37": 1103012352.0,
-            "38": 1103012352.0,
-            "39": 1103012352.0,
-            "40": 1103012352.0,
-            "41": 1103012352.0,
-            "42": 1103012352.0,
-            "43": 1103012352.0,
-            "44": 1103012352.0,
-            "45": 1103012352.0,
-            "46": 1103012352.0,
-            "47": 1103012352.0,
-            "48": 1103012352.0,
-            "49": 1103012352.0,
-            "50": 1103012352.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 17.91902,
-            "2": 0.59117,
-            "3": 0.52614,
-            "4": 0.54746,
-            "5": 0.5056,
-            "6": 0.50649,
-            "7": 0.52305,
-            "8": 0.50853,
-            "9": 0.50644,
-            "10": 0.50303,
-            "11": 0.50387,
-            "12": 0.50249,
-            "13": 0.51153,
-            "14": 0.49861,
-            "15": 0.51318,
-            "16": 0.50066,
-            "17": 0.50888,
-            "18": 0.50788,
-            "19": 0.51533,
-            "20": 0.51425,
-            "21": 0.51111,
-            "22": 0.5116,
-            "23": 0.50626,
-            "24": 0.5049,
-            "25": 0.51101,
-            "26": 0.50993,
-            "27": 0.5073,
-            "28": 0.50949,
-            "29": 0.50784,
-            "30": 0.50783,
-            "31": 0.51255,
-            "32": 0.51065,
-            "33": 0.50731,
-            "34": 0.50768,
-            "35": 0.51749,
-            "36": 0.50656,
-            "37": 0.51012,
-            "38": 0.51668,
-            "39": 0.50475,
-            "40": 0.50784,
-            "41": 0.51405,
-            "42": 0.51014,
-            "43": 0.51186,
-            "44": 0.50532,
-            "45": 0.51211,
-            "46": 0.52864,
-            "47": 0.52545,
-            "48": 0.50927,
-            "49": 0.52883,
-            "50": 0.50373
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgx_h100.json
index 90c75c99e13..b250bf7ac21 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgx_h100.json
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 13.19467,
-            "2": 0.48448,
-            "3": 0.44871,
-            "4": 0.46924,
-            "5": 0.42566,
-            "6": 0.43083,
-            "7": 0.43901,
-            "8": 0.42599,
-            "9": 0.42583,
-            "10": 0.42829,
-            "11": 0.4235,
-            "12": 0.42225,
-            "13": 0.4285,
-            "14": 0.42372,
-            "15": 0.43098,
-            "16": 0.4172,
-            "17": 0.43302,
-            "18": 0.41927,
-            "19": 0.4331,
-            "20": 0.43471,
-            "21": 0.41939,
-            "22": 0.43275,
-            "23": 0.41768,
-            "24": 0.42806,
-            "25": 0.42095,
-            "26": 0.42731,
-            "27": 0.42655,
-            "28": 0.42892,
-            "29": 0.42736,
-            "30": 0.42769,
-            "31": 0.43481,
-            "32": 0.4238,
-            "33": 0.42194,
-            "34": 0.43633,
-            "35": 0.43921,
-            "36": 0.43121,
-            "37": 0.42193,
-            "38": 0.42605,
-            "39": 0.42408,
-            "40": 0.42556,
-            "41": 0.43247,
-            "42": 0.42213,
-            "43": 0.44451,
-            "44": 0.42353,
-            "45": 0.42949,
-            "46": 0.46147,
-            "47": 0.44954,
-            "48": 0.44275,
-            "49": 0.44961,
-            "50": 0.4304
+            "1": 13.26707,
+            "2": 0.52806,
+            "3": 0.46475,
+            "4": 0.47125,
+            "5": 0.42985,
+            "6": 0.42614,
+            "7": 0.43552,
+            "8": 0.42689,
+            "9": 0.42927,
+            "10": 0.42373,
+            "11": 0.42662,
+            "12": 0.42301,
+            "13": 0.42359,
+            "14": 0.4226,
+            "15": 0.42796,
+            "16": 0.42415,
+            "17": 0.4235,
+            "18": 0.41948,
+            "19": 0.42601,
+            "20": 0.42722,
+            "21": 0.4176,
+            "22": 0.41953,
+            "23": 0.42303,
+            "24": 0.4187,
+            "25": 0.42281,
+            "26": 0.42449,
+            "27": 0.41941,
+            "28": 0.42935,
+            "29": 0.417,
+            "30": 0.4261,
+            "31": 0.42904,
+            "32": 0.41844,
+            "33": 0.41687,
+            "34": 0.43419,
+            "35": 0.43727,
+            "36": 0.42315,
+            "37": 0.42179,
+            "38": 0.42403,
+            "39": 0.4179,
+            "40": 0.42443,
+            "41": 0.42169,
+            "42": 0.42155,
+            "43": 0.43942,
+            "44": 0.42209,
+            "45": 0.41972,
+            "46": 0.46515,
+            "47": 0.43911,
+            "48": 0.43693,
+            "49": 0.44745,
+            "50": 0.4198
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index d9811bb579f..00000000000
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.80475,
-            "2": 10.821,
-            "3": 10.8216,
-            "4": 10.79306,
-            "5": 10.84831,
-            "6": 10.85888,
-            "7": 10.83177,
-            "8": 10.82362,
-            "9": 10.83757,
-            "10": 10.78732,
-            "11": 10.86732,
-            "12": 10.85395,
-            "13": 10.86171,
-            "14": 10.88343,
-            "15": 10.79765,
-            "16": 10.79986,
-            "17": 10.76238,
-            "18": 10.80286,
-            "19": 10.7945,
-            "20": 10.71733,
-            "21": 10.70194,
-            "22": 10.55147,
-            "23": 10.72167,
-            "24": 10.60698,
-            "25": 10.54614,
-            "26": 10.6136,
-            "27": 10.63974,
-            "28": 10.60486,
-            "29": 10.62277,
-            "30": 10.41109,
-            "31": 10.1456,
-            "32": 10.51017,
-            "33": 10.50089,
-            "34": 10.25812,
-            "35": 10.3154,
-            "36": 10.27895,
-            "37": 10.41061,
-            "38": 10.25908,
-            "39": 10.45334,
-            "40": 10.1604,
-            "41": 10.20557,
-            "42": 10.26792,
-            "43": 9.90468,
-            "44": 10.03233,
-            "45": 9.91098,
-            "46": 9.87857,
-            "47": 10.20952,
-            "48": 9.93178,
-            "49": 9.61584,
-            "50": 9.98565
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 5474.0,
-            "2": 5853.0,
-            "3": 5875.0,
-            "4": 6041.0,
-            "5": 6601.0,
-            "6": 6654.0,
-            "7": 6135.0,
-            "8": 5761.0,
-            "9": 6505.0,
-            "10": 5497.0,
-            "11": 6994.0,
-            "12": 6523.0,
-            "13": 6807.0,
-            "14": 6969.0,
-            "15": 6154.0,
-            "16": 6667.0,
-            "17": 6368.0,
-            "18": 6298.0,
-            "19": 6353.0,
-            "20": 5998.0,
-            "21": 6264.0,
-            "22": 5628.0,
-            "23": 6620.0,
-            "24": 6063.0,
-            "25": 5649.0,
-            "26": 6226.0,
-            "27": 6409.0,
-            "28": 6790.0,
-            "29": 7055.0,
-            "30": 6430.0,
-            "31": 5565.0,
-            "32": 6615.0,
-            "33": 6969.0,
-            "34": 6107.0,
-            "35": 6538.0,
-            "36": 6486.0,
-            "37": 7272.0,
-            "38": 6923.0,
-            "39": 7497.0,
-            "40": 6997.0,
-            "41": 6747.0,
-            "42": 7228.0,
-            "43": 6629.0,
-            "44": 6752.0,
-            "45": 6557.0,
-            "46": 6904.0,
-            "47": 7474.0,
-            "48": 7165.0,
-            "49": 7244.0,
-            "50": 7331.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 491766784.0,
-            "2": 491767296.0,
-            "3": 491765760.0,
-            "4": 491767296.0,
-            "5": 491766784.0,
-            "6": 491767808.0,
-            "7": 491767296.0,
-            "8": 491768320.0,
-            "9": 491767808.0,
-            "10": 491767296.0,
-            "11": 491765248.0,
-            "12": 491764736.0,
-            "13": 491766272.0,
-            "14": 491767808.0,
-            "15": 491768832.0,
-            "16": 491769856.0,
-            "17": 491767296.0,
-            "18": 491765248.0,
-            "19": 491766272.0,
-            "20": 491766784.0,
-            "21": 491768320.0,
-            "22": 491768320.0,
-            "23": 491765760.0,
-            "24": 491766272.0,
-            "25": 491766272.0,
-            "26": 491767296.0,
-            "27": 491766784.0,
-            "28": 491767296.0,
-            "29": 491766272.0,
-            "30": 491766272.0,
-            "31": 491767808.0,
-            "32": 491765760.0,
-            "33": 491764736.0,
-            "34": 491768320.0,
-            "35": 491769344.0,
-            "36": 491765760.0,
-            "37": 491765248.0,
-            "38": 491766272.0,
-            "39": 491767808.0,
-            "40": 491765760.0,
-            "41": 491768320.0,
-            "42": 491766272.0,
-            "43": 491768832.0,
-            "44": 491768320.0,
-            "45": 491765248.0,
-            "46": 491768320.0,
-            "47": 491765760.0,
-            "48": 491766784.0,
-            "49": 491766784.0,
-            "50": 491765248.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1047229440.0,
-            "2": 1213900288.0,
-            "3": 1213900288.0,
-            "4": 1213900288.0,
-            "5": 1213900288.0,
-            "6": 1213900288.0,
-            "7": 1213900288.0,
-            "8": 1213900288.0,
-            "9": 1213900288.0,
-            "10": 1213900288.0,
-            "11": 1213900288.0,
-            "12": 1213900288.0,
-            "13": 1213900288.0,
-            "14": 1213900288.0,
-            "15": 1213900288.0,
-            "16": 1213900288.0,
-            "17": 1213900288.0,
-            "18": 1213900288.0,
-            "19": 1213900288.0,
-            "20": 1213900288.0,
-            "21": 1213900288.0,
-            "22": 1213900288.0,
-            "23": 1213900288.0,
-            "24": 1213900288.0,
-            "25": 1213900288.0,
-            "26": 1213900288.0,
-            "27": 1213900288.0,
-            "28": 1213900288.0,
-            "29": 1213900288.0,
-            "30": 1213900288.0,
-            "31": 1213900288.0,
-            "32": 1213900288.0,
-            "33": 1213900288.0,
-            "34": 1213900288.0,
-            "35": 1213900288.0,
-            "36": 1213900288.0,
-            "37": 1213900288.0,
-            "38": 1213900288.0,
-            "39": 1213900288.0,
-            "40": 1213900288.0,
-            "41": 1213900288.0,
-            "42": 1213900288.0,
-            "43": 1213900288.0,
-            "44": 1213900288.0,
-            "45": 1213900288.0,
-            "46": 1213900288.0,
-            "47": 1213900288.0,
-            "48": 1213900288.0,
-            "49": 1213900288.0,
-            "50": 1213900288.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 14.81321,
-            "2": 0.47201,
-            "3": 0.40381,
-            "4": 0.41626,
-            "5": 0.37526,
-            "6": 0.39128,
-            "7": 0.38006,
-            "8": 0.38712,
-            "9": 0.37978,
-            "10": 0.36542,
-            "11": 0.37019,
-            "12": 0.3584,
-            "13": 0.37121,
-            "14": 0.37141,
-            "15": 0.37291,
-            "16": 0.36319,
-            "17": 0.3701,
-            "18": 0.35732,
-            "19": 0.36745,
-            "20": 0.36768,
-            "21": 0.36322,
-            "22": 0.36627,
-            "23": 0.36042,
-            "24": 0.36521,
-            "25": 0.36471,
-            "26": 0.36406,
-            "27": 0.35919,
-            "28": 0.37411,
-            "29": 0.35657,
-            "30": 0.36834,
-            "31": 0.37292,
-            "32": 0.35489,
-            "33": 0.36692,
-            "34": 0.37173,
-            "35": 0.37097,
-            "36": 0.36594,
-            "37": 0.36691,
-            "38": 0.36847,
-            "39": 0.36166,
-            "40": 0.36415,
-            "41": 0.36888,
-            "42": 0.36642,
-            "43": 0.37419,
-            "44": 0.37026,
-            "45": 0.36033,
-            "46": 0.39777,
-            "47": 0.37677,
-            "48": 0.36794,
-            "49": 0.3863,
-            "50": 0.36013
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index b250bf7ac21..00000000000
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.80475,
-            "2": 10.821,
-            "3": 10.8216,
-            "4": 10.79306,
-            "5": 10.84831,
-            "6": 10.85888,
-            "7": 10.83177,
-            "8": 10.82362,
-            "9": 10.83757,
-            "10": 10.78732,
-            "11": 10.86732,
-            "12": 10.85395,
-            "13": 10.86171,
-            "14": 10.88343,
-            "15": 10.79765,
-            "16": 10.79986,
-            "17": 10.76238,
-            "18": 10.80286,
-            "19": 10.7945,
-            "20": 10.71733,
-            "21": 10.70194,
-            "22": 10.55147,
-            "23": 10.72167,
-            "24": 10.60698,
-            "25": 10.54614,
-            "26": 10.6136,
-            "27": 10.63974,
-            "28": 10.60486,
-            "29": 10.62277,
-            "30": 10.41109,
-            "31": 10.1456,
-            "32": 10.51017,
-            "33": 10.50089,
-            "34": 10.25812,
-            "35": 10.3154,
-            "36": 10.27895,
-            "37": 10.41061,
-            "38": 10.25908,
-            "39": 10.45334,
-            "40": 10.1604,
-            "41": 10.20557,
-            "42": 10.26792,
-            "43": 9.90468,
-            "44": 10.03233,
-            "45": 9.91098,
-            "46": 9.87857,
-            "47": 10.20952,
-            "48": 9.93178,
-            "49": 9.61584,
-            "50": 9.98565
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 5474.0,
-            "2": 5853.0,
-            "3": 5875.0,
-            "4": 6041.0,
-            "5": 6601.0,
-            "6": 6654.0,
-            "7": 6135.0,
-            "8": 5761.0,
-            "9": 6505.0,
-            "10": 5497.0,
-            "11": 6994.0,
-            "12": 6523.0,
-            "13": 6807.0,
-            "14": 6969.0,
-            "15": 6154.0,
-            "16": 6667.0,
-            "17": 6368.0,
-            "18": 6298.0,
-            "19": 6353.0,
-            "20": 5998.0,
-            "21": 6264.0,
-            "22": 5628.0,
-            "23": 6620.0,
-            "24": 6063.0,
-            "25": 5649.0,
-            "26": 6226.0,
-            "27": 6409.0,
-            "28": 6790.0,
-            "29": 7055.0,
-            "30": 6430.0,
-            "31": 5565.0,
-            "32": 6615.0,
-            "33": 6969.0,
-            "34": 6107.0,
-            "35": 6538.0,
-            "36": 6486.0,
-            "37": 7272.0,
-            "38": 6923.0,
-            "39": 7497.0,
-            "40": 6997.0,
-            "41": 6747.0,
-            "42": 7228.0,
-            "43": 6629.0,
-            "44": 6752.0,
-            "45": 6557.0,
-            "46": 6904.0,
-            "47": 7474.0,
-            "48": 7165.0,
-            "49": 7244.0,
-            "50": 7331.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 491766784.0,
-            "2": 491767296.0,
-            "3": 491765760.0,
-            "4": 491767296.0,
-            "5": 491766784.0,
-            "6": 491767808.0,
-            "7": 491767296.0,
-            "8": 491768320.0,
-            "9": 491767808.0,
-            "10": 491767296.0,
-            "11": 491765248.0,
-            "12": 491764736.0,
-            "13": 491766272.0,
-            "14": 491767808.0,
-            "15": 491768832.0,
-            "16": 491769856.0,
-            "17": 491767296.0,
-            "18": 491765248.0,
-            "19": 491766272.0,
-            "20": 491766784.0,
-            "21": 491768320.0,
-            "22": 491768320.0,
-            "23": 491765760.0,
-            "24": 491766272.0,
-            "25": 491766272.0,
-            "26": 491767296.0,
-            "27": 491766784.0,
-            "28": 491767296.0,
-            "29": 491766272.0,
-            "30": 491766272.0,
-            "31": 491767808.0,
-            "32": 491765760.0,
-            "33": 491764736.0,
-            "34": 491768320.0,
-            "35": 491769344.0,
-            "36": 491765760.0,
-            "37": 491765248.0,
-            "38": 491766272.0,
-            "39": 491767808.0,
-            "40": 491765760.0,
-            "41": 491768320.0,
-            "42": 491766272.0,
-            "43": 491768832.0,
-            "44": 491768320.0,
-            "45": 491765248.0,
-            "46": 491768320.0,
-            "47": 491765760.0,
-            "48": 491766784.0,
-            "49": 491766784.0,
-            "50": 491765248.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1047229440.0,
-            "2": 1213900288.0,
-            "3": 1213900288.0,
-            "4": 1213900288.0,
-            "5": 1213900288.0,
-            "6": 1213900288.0,
-            "7": 1213900288.0,
-            "8": 1213900288.0,
-            "9": 1213900288.0,
-            "10": 1213900288.0,
-            "11": 1213900288.0,
-            "12": 1213900288.0,
-            "13": 1213900288.0,
-            "14": 1213900288.0,
-            "15": 1213900288.0,
-            "16": 1213900288.0,
-            "17": 1213900288.0,
-            "18": 1213900288.0,
-            "19": 1213900288.0,
-            "20": 1213900288.0,
-            "21": 1213900288.0,
-            "22": 1213900288.0,
-            "23": 1213900288.0,
-            "24": 1213900288.0,
-            "25": 1213900288.0,
-            "26": 1213900288.0,
-            "27": 1213900288.0,
-            "28": 1213900288.0,
-            "29": 1213900288.0,
-            "30": 1213900288.0,
-            "31": 1213900288.0,
-            "32": 1213900288.0,
-            "33": 1213900288.0,
-            "34": 1213900288.0,
-            "35": 1213900288.0,
-            "36": 1213900288.0,
-            "37": 1213900288.0,
-            "38": 1213900288.0,
-            "39": 1213900288.0,
-            "40": 1213900288.0,
-            "41": 1213900288.0,
-            "42": 1213900288.0,
-            "43": 1213900288.0,
-            "44": 1213900288.0,
-            "45": 1213900288.0,
-            "46": 1213900288.0,
-            "47": 1213900288.0,
-            "48": 1213900288.0,
-            "49": 1213900288.0,
-            "50": 1213900288.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 13.26707,
-            "2": 0.52806,
-            "3": 0.46475,
-            "4": 0.47125,
-            "5": 0.42985,
-            "6": 0.42614,
-            "7": 0.43552,
-            "8": 0.42689,
-            "9": 0.42927,
-            "10": 0.42373,
-            "11": 0.42662,
-            "12": 0.42301,
-            "13": 0.42359,
-            "14": 0.4226,
-            "15": 0.42796,
-            "16": 0.42415,
-            "17": 0.4235,
-            "18": 0.41948,
-            "19": 0.42601,
-            "20": 0.42722,
-            "21": 0.4176,
-            "22": 0.41953,
-            "23": 0.42303,
-            "24": 0.4187,
-            "25": 0.42281,
-            "26": 0.42449,
-            "27": 0.41941,
-            "28": 0.42935,
-            "29": 0.417,
-            "30": 0.4261,
-            "31": 0.42904,
-            "32": 0.41844,
-            "33": 0.41687,
-            "34": 0.43419,
-            "35": 0.43727,
-            "36": 0.42315,
-            "37": 0.42179,
-            "38": 0.42403,
-            "39": 0.4179,
-            "40": 0.42443,
-            "41": 0.42169,
-            "42": 0.42155,
-            "43": 0.43942,
-            "44": 0.42209,
-            "45": 0.41972,
-            "46": 0.46515,
-            "47": 0.43911,
-            "48": 0.43693,
-            "49": 0.44745,
-            "50": 0.4198
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgx_h100.json
index eba1757fe35..eb4665ad7e2 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgx_h100.json
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 13.09447,
-            "2": 0.51607,
-            "3": 0.44405,
-            "4": 0.45969,
-            "5": 0.41888,
-            "6": 0.42393,
-            "7": 0.42442,
-            "8": 0.41943,
-            "9": 0.41271,
-            "10": 0.41462,
-            "11": 0.41487,
-            "12": 0.40591,
-            "13": 0.41444,
-            "14": 0.40303,
-            "15": 0.41598,
-            "16": 0.40637,
-            "17": 0.40922,
-            "18": 0.41209,
-            "19": 0.40964,
-            "20": 0.4238,
-            "21": 0.4078,
-            "22": 0.41408,
-            "23": 0.41657,
-            "24": 0.40953,
-            "25": 0.41984,
-            "26": 0.41935,
-            "27": 0.41845,
-            "28": 0.42267,
-            "29": 0.41439,
-            "30": 0.42344,
-            "31": 0.42201,
-            "32": 0.42025,
-            "33": 0.4143,
-            "34": 0.50551,
-            "35": 0.44065,
-            "36": 0.41296,
-            "37": 0.41985,
-            "38": 0.41541,
-            "39": 0.41687,
-            "40": 0.41757,
-            "41": 0.4181,
-            "42": 0.41983,
-            "43": 0.42929,
-            "44": 0.41833,
-            "45": 0.41337,
-            "46": 0.46022,
-            "47": 0.43427,
-            "48": 0.42794,
-            "49": 0.44841,
-            "50": 0.41311
+            "1": 13.31352,
+            "2": 0.50754,
+            "3": 0.44486,
+            "4": 0.4668,
+            "5": 0.42238,
+            "6": 0.42115,
+            "7": 0.42604,
+            "8": 0.4217,
+            "9": 0.42265,
+            "10": 0.41522,
+            "11": 0.41976,
+            "12": 0.41287,
+            "13": 0.42113,
+            "14": 0.41948,
+            "15": 0.4211,
+            "16": 0.41519,
+            "17": 0.42043,
+            "18": 0.415,
+            "19": 0.42142,
+            "20": 0.42878,
+            "21": 0.4145,
+            "22": 0.42054,
+            "23": 0.41581,
+            "24": 0.42934,
+            "25": 0.43897,
+            "26": 0.42648,
+            "27": 0.42242,
+            "28": 0.42576,
+            "29": 0.42795,
+            "30": 0.42485,
+            "31": 0.43439,
+            "32": 0.42257,
+            "33": 0.41924,
+            "34": 0.43519,
+            "35": 0.43865,
+            "36": 0.42518,
+            "37": 0.42435,
+            "38": 0.42597,
+            "39": 0.42134,
+            "40": 0.42937,
+            "41": 0.42822,
+            "42": 0.42413,
+            "43": 0.44197,
+            "44": 0.42413,
+            "45": 0.42687,
+            "46": 0.46081,
+            "47": 0.45208,
+            "48": 0.43527,
+            "49": 0.44658,
+            "50": 0.41965
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 4fb97350a0f..00000000000
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.80475,
-            "2": 10.821,
-            "3": 10.8216,
-            "4": 10.79306,
-            "5": 10.84831,
-            "6": 10.85888,
-            "7": 10.83177,
-            "8": 10.82362,
-            "9": 10.83757,
-            "10": 10.78732,
-            "11": 10.86732,
-            "12": 10.85395,
-            "13": 10.86171,
-            "14": 10.88343,
-            "15": 10.79765,
-            "16": 10.79986,
-            "17": 10.76238,
-            "18": 10.80286,
-            "19": 10.7945,
-            "20": 10.71733,
-            "21": 10.70194,
-            "22": 10.55147,
-            "23": 10.72167,
-            "24": 10.60698,
-            "25": 10.54614,
-            "26": 10.6136,
-            "27": 10.63974,
-            "28": 10.60486,
-            "29": 10.62277,
-            "30": 10.41109,
-            "31": 10.1456,
-            "32": 10.51017,
-            "33": 10.50089,
-            "34": 10.25812,
-            "35": 10.3154,
-            "36": 10.27895,
-            "37": 10.41061,
-            "38": 10.25908,
-            "39": 10.45334,
-            "40": 10.1604,
-            "41": 10.20557,
-            "42": 10.26792,
-            "43": 9.90468,
-            "44": 10.03233,
-            "45": 9.91098,
-            "46": 9.87857,
-            "47": 10.20952,
-            "48": 9.93178,
-            "49": 9.61584,
-            "50": 9.98565
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 5474.0,
-            "2": 5853.0,
-            "3": 5875.0,
-            "4": 6041.0,
-            "5": 6601.0,
-            "6": 6654.0,
-            "7": 6135.0,
-            "8": 5761.0,
-            "9": 6505.0,
-            "10": 5497.0,
-            "11": 6994.0,
-            "12": 6523.0,
-            "13": 6807.0,
-            "14": 6969.0,
-            "15": 6154.0,
-            "16": 6667.0,
-            "17": 6368.0,
-            "18": 6298.0,
-            "19": 6353.0,
-            "20": 5998.0,
-            "21": 6264.0,
-            "22": 5628.0,
-            "23": 6620.0,
-            "24": 6063.0,
-            "25": 5649.0,
-            "26": 6226.0,
-            "27": 6409.0,
-            "28": 6790.0,
-            "29": 7055.0,
-            "30": 6430.0,
-            "31": 5565.0,
-            "32": 6615.0,
-            "33": 6969.0,
-            "34": 6107.0,
-            "35": 6538.0,
-            "36": 6486.0,
-            "37": 7272.0,
-            "38": 6923.0,
-            "39": 7497.0,
-            "40": 6997.0,
-            "41": 6747.0,
-            "42": 7228.0,
-            "43": 6629.0,
-            "44": 6752.0,
-            "45": 6557.0,
-            "46": 6904.0,
-            "47": 7474.0,
-            "48": 7165.0,
-            "49": 7244.0,
-            "50": 7331.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 491766784.0,
-            "2": 491767296.0,
-            "3": 491765760.0,
-            "4": 491767296.0,
-            "5": 491766784.0,
-            "6": 491767808.0,
-            "7": 491767296.0,
-            "8": 491768320.0,
-            "9": 491767808.0,
-            "10": 491767296.0,
-            "11": 491765248.0,
-            "12": 491764736.0,
-            "13": 491766272.0,
-            "14": 491767808.0,
-            "15": 491768832.0,
-            "16": 491769856.0,
-            "17": 491767296.0,
-            "18": 491765248.0,
-            "19": 491766272.0,
-            "20": 491766784.0,
-            "21": 491768320.0,
-            "22": 491768320.0,
-            "23": 491765760.0,
-            "24": 491766272.0,
-            "25": 491766272.0,
-            "26": 491767296.0,
-            "27": 491766784.0,
-            "28": 491767296.0,
-            "29": 491766272.0,
-            "30": 491766272.0,
-            "31": 491767808.0,
-            "32": 491765760.0,
-            "33": 491764736.0,
-            "34": 491768320.0,
-            "35": 491769344.0,
-            "36": 491765760.0,
-            "37": 491765248.0,
-            "38": 491766272.0,
-            "39": 491767808.0,
-            "40": 491765760.0,
-            "41": 491768320.0,
-            "42": 491766272.0,
-            "43": 491768832.0,
-            "44": 491768320.0,
-            "45": 491765248.0,
-            "46": 491768320.0,
-            "47": 491765760.0,
-            "48": 491766784.0,
-            "49": 491766784.0,
-            "50": 491765248.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1047229440.0,
-            "2": 1213900288.0,
-            "3": 1213900288.0,
-            "4": 1213900288.0,
-            "5": 1213900288.0,
-            "6": 1213900288.0,
-            "7": 1213900288.0,
-            "8": 1213900288.0,
-            "9": 1213900288.0,
-            "10": 1213900288.0,
-            "11": 1213900288.0,
-            "12": 1213900288.0,
-            "13": 1213900288.0,
-            "14": 1213900288.0,
-            "15": 1213900288.0,
-            "16": 1213900288.0,
-            "17": 1213900288.0,
-            "18": 1213900288.0,
-            "19": 1213900288.0,
-            "20": 1213900288.0,
-            "21": 1213900288.0,
-            "22": 1213900288.0,
-            "23": 1213900288.0,
-            "24": 1213900288.0,
-            "25": 1213900288.0,
-            "26": 1213900288.0,
-            "27": 1213900288.0,
-            "28": 1213900288.0,
-            "29": 1213900288.0,
-            "30": 1213900288.0,
-            "31": 1213900288.0,
-            "32": 1213900288.0,
-            "33": 1213900288.0,
-            "34": 1213900288.0,
-            "35": 1213900288.0,
-            "36": 1213900288.0,
-            "37": 1213900288.0,
-            "38": 1213900288.0,
-            "39": 1213900288.0,
-            "40": 1213900288.0,
-            "41": 1213900288.0,
-            "42": 1213900288.0,
-            "43": 1213900288.0,
-            "44": 1213900288.0,
-            "45": 1213900288.0,
-            "46": 1213900288.0,
-            "47": 1213900288.0,
-            "48": 1213900288.0,
-            "49": 1213900288.0,
-            "50": 1213900288.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 15.17122,
-            "2": 0.48582,
-            "3": 0.38154,
-            "4": 0.40574,
-            "5": 0.36399,
-            "6": 0.36563,
-            "7": 0.3696,
-            "8": 0.36586,
-            "9": 0.36758,
-            "10": 0.36149,
-            "11": 0.37339,
-            "12": 0.36971,
-            "13": 0.36807,
-            "14": 0.36325,
-            "15": 0.36851,
-            "16": 0.36056,
-            "17": 0.36306,
-            "18": 0.36443,
-            "19": 0.36656,
-            "20": 0.36899,
-            "21": 0.35832,
-            "22": 0.35751,
-            "23": 0.36137,
-            "24": 0.35806,
-            "25": 0.35888,
-            "26": 0.36389,
-            "27": 0.35895,
-            "28": 0.36593,
-            "29": 0.36043,
-            "30": 0.36535,
-            "31": 0.38123,
-            "32": 0.36798,
-            "33": 0.36325,
-            "34": 0.3734,
-            "35": 0.37508,
-            "36": 0.37043,
-            "37": 0.38008,
-            "38": 0.37006,
-            "39": 0.37268,
-            "40": 0.37049,
-            "41": 0.37086,
-            "42": 0.36713,
-            "43": 0.37942,
-            "44": 0.38971,
-            "45": 0.37293,
-            "46": 0.41366,
-            "47": 0.39088,
-            "48": 0.37854,
-            "49": 0.41143,
-            "50": 0.37319
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index eb4665ad7e2..00000000000
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.80475,
-            "2": 10.821,
-            "3": 10.8216,
-            "4": 10.79306,
-            "5": 10.84831,
-            "6": 10.85888,
-            "7": 10.83177,
-            "8": 10.82362,
-            "9": 10.83757,
-            "10": 10.78732,
-            "11": 10.86732,
-            "12": 10.85395,
-            "13": 10.86171,
-            "14": 10.88343,
-            "15": 10.79765,
-            "16": 10.79986,
-            "17": 10.76238,
-            "18": 10.80286,
-            "19": 10.7945,
-            "20": 10.71733,
-            "21": 10.70194,
-            "22": 10.55147,
-            "23": 10.72167,
-            "24": 10.60698,
-            "25": 10.54614,
-            "26": 10.6136,
-            "27": 10.63974,
-            "28": 10.60486,
-            "29": 10.62277,
-            "30": 10.41109,
-            "31": 10.1456,
-            "32": 10.51017,
-            "33": 10.50089,
-            "34": 10.25812,
-            "35": 10.3154,
-            "36": 10.27895,
-            "37": 10.41061,
-            "38": 10.25908,
-            "39": 10.45334,
-            "40": 10.1604,
-            "41": 10.20557,
-            "42": 10.26792,
-            "43": 9.90468,
-            "44": 10.03233,
-            "45": 9.91098,
-            "46": 9.87857,
-            "47": 10.20952,
-            "48": 9.93178,
-            "49": 9.61584,
-            "50": 9.98565
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 5474.0,
-            "2": 5853.0,
-            "3": 5875.0,
-            "4": 6041.0,
-            "5": 6601.0,
-            "6": 6654.0,
-            "7": 6135.0,
-            "8": 5761.0,
-            "9": 6505.0,
-            "10": 5497.0,
-            "11": 6994.0,
-            "12": 6523.0,
-            "13": 6807.0,
-            "14": 6969.0,
-            "15": 6154.0,
-            "16": 6667.0,
-            "17": 6368.0,
-            "18": 6298.0,
-            "19": 6353.0,
-            "20": 5998.0,
-            "21": 6264.0,
-            "22": 5628.0,
-            "23": 6620.0,
-            "24": 6063.0,
-            "25": 5649.0,
-            "26": 6226.0,
-            "27": 6409.0,
-            "28": 6790.0,
-            "29": 7055.0,
-            "30": 6430.0,
-            "31": 5565.0,
-            "32": 6615.0,
-            "33": 6969.0,
-            "34": 6107.0,
-            "35": 6538.0,
-            "36": 6486.0,
-            "37": 7272.0,
-            "38": 6923.0,
-            "39": 7497.0,
-            "40": 6997.0,
-            "41": 6747.0,
-            "42": 7228.0,
-            "43": 6629.0,
-            "44": 6752.0,
-            "45": 6557.0,
-            "46": 6904.0,
-            "47": 7474.0,
-            "48": 7165.0,
-            "49": 7244.0,
-            "50": 7331.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 491766784.0,
-            "2": 491767296.0,
-            "3": 491765760.0,
-            "4": 491767296.0,
-            "5": 491766784.0,
-            "6": 491767808.0,
-            "7": 491767296.0,
-            "8": 491768320.0,
-            "9": 491767808.0,
-            "10": 491767296.0,
-            "11": 491765248.0,
-            "12": 491764736.0,
-            "13": 491766272.0,
-            "14": 491767808.0,
-            "15": 491768832.0,
-            "16": 491769856.0,
-            "17": 491767296.0,
-            "18": 491765248.0,
-            "19": 491766272.0,
-            "20": 491766784.0,
-            "21": 491768320.0,
-            "22": 491768320.0,
-            "23": 491765760.0,
-            "24": 491766272.0,
-            "25": 491766272.0,
-            "26": 491767296.0,
-            "27": 491766784.0,
-            "28": 491767296.0,
-            "29": 491766272.0,
-            "30": 491766272.0,
-            "31": 491767808.0,
-            "32": 491765760.0,
-            "33": 491764736.0,
-            "34": 491768320.0,
-            "35": 491769344.0,
-            "36": 491765760.0,
-            "37": 491765248.0,
-            "38": 491766272.0,
-            "39": 491767808.0,
-            "40": 491765760.0,
-            "41": 491768320.0,
-            "42": 491766272.0,
-            "43": 491768832.0,
-            "44": 491768320.0,
-            "45": 491765248.0,
-            "46": 491768320.0,
-            "47": 491765760.0,
-            "48": 491766784.0,
-            "49": 491766784.0,
-            "50": 491765248.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1047229440.0,
-            "2": 1213900288.0,
-            "3": 1213900288.0,
-            "4": 1213900288.0,
-            "5": 1213900288.0,
-            "6": 1213900288.0,
-            "7": 1213900288.0,
-            "8": 1213900288.0,
-            "9": 1213900288.0,
-            "10": 1213900288.0,
-            "11": 1213900288.0,
-            "12": 1213900288.0,
-            "13": 1213900288.0,
-            "14": 1213900288.0,
-            "15": 1213900288.0,
-            "16": 1213900288.0,
-            "17": 1213900288.0,
-            "18": 1213900288.0,
-            "19": 1213900288.0,
-            "20": 1213900288.0,
-            "21": 1213900288.0,
-            "22": 1213900288.0,
-            "23": 1213900288.0,
-            "24": 1213900288.0,
-            "25": 1213900288.0,
-            "26": 1213900288.0,
-            "27": 1213900288.0,
-            "28": 1213900288.0,
-            "29": 1213900288.0,
-            "30": 1213900288.0,
-            "31": 1213900288.0,
-            "32": 1213900288.0,
-            "33": 1213900288.0,
-            "34": 1213900288.0,
-            "35": 1213900288.0,
-            "36": 1213900288.0,
-            "37": 1213900288.0,
-            "38": 1213900288.0,
-            "39": 1213900288.0,
-            "40": 1213900288.0,
-            "41": 1213900288.0,
-            "42": 1213900288.0,
-            "43": 1213900288.0,
-            "44": 1213900288.0,
-            "45": 1213900288.0,
-            "46": 1213900288.0,
-            "47": 1213900288.0,
-            "48": 1213900288.0,
-            "49": 1213900288.0,
-            "50": 1213900288.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 13.31352,
-            "2": 0.50754,
-            "3": 0.44486,
-            "4": 0.4668,
-            "5": 0.42238,
-            "6": 0.42115,
-            "7": 0.42604,
-            "8": 0.4217,
-            "9": 0.42265,
-            "10": 0.41522,
-            "11": 0.41976,
-            "12": 0.41287,
-            "13": 0.42113,
-            "14": 0.41948,
-            "15": 0.4211,
-            "16": 0.41519,
-            "17": 0.42043,
-            "18": 0.415,
-            "19": 0.42142,
-            "20": 0.42878,
-            "21": 0.4145,
-            "22": 0.42054,
-            "23": 0.41581,
-            "24": 0.42934,
-            "25": 0.43897,
-            "26": 0.42648,
-            "27": 0.42242,
-            "28": 0.42576,
-            "29": 0.42795,
-            "30": 0.42485,
-            "31": 0.43439,
-            "32": 0.42257,
-            "33": 0.41924,
-            "34": 0.43519,
-            "35": 0.43865,
-            "36": 0.42518,
-            "37": 0.42435,
-            "38": 0.42597,
-            "39": 0.42134,
-            "40": 0.42937,
-            "41": 0.42822,
-            "42": 0.42413,
-            "43": 0.44197,
-            "44": 0.42413,
-            "45": 0.42687,
-            "46": 0.46081,
-            "47": 0.45208,
-            "48": 0.43527,
-            "49": 0.44658,
-            "50": 0.41965
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_h100.json
index daecd2a50e1..eb013c007ca 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_h100.json
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 13.70163,
-            "2": 0.32995,
-            "3": 0.28329,
-            "4": 0.30327,
-            "5": 0.26887,
-            "6": 0.26248,
-            "7": 0.28317,
-            "8": 0.26472,
-            "9": 0.26858,
-            "10": 0.26512,
-            "11": 0.28434,
-            "12": 0.25515,
-            "13": 0.26048,
-            "14": 0.25624,
-            "15": 0.27581,
-            "16": 0.25102,
-            "17": 0.25664,
-            "18": 0.25657,
-            "19": 0.25806,
-            "20": 0.2591,
-            "21": 0.25054,
-            "22": 0.26613,
-            "23": 0.2877,
-            "24": 0.2503,
-            "25": 0.25227,
-            "26": 0.26224,
-            "27": 0.25269,
-            "28": 0.26737,
-            "29": 0.25139,
-            "30": 0.25065,
-            "31": 0.30552,
-            "32": 0.25136,
-            "33": 0.2573,
-            "34": 0.26376,
-            "35": 0.25668,
-            "36": 0.25566,
-            "37": 0.25143,
-            "38": 0.2666,
-            "39": 0.25121,
-            "40": 0.25249,
-            "41": 0.25912,
-            "42": 0.25442,
-            "43": 0.2721,
-            "44": 0.25368,
-            "45": 0.26494,
-            "46": 0.27206,
-            "47": 0.25676,
-            "48": 0.27981,
-            "49": 0.31376,
-            "50": 0.26619
+            "1": 13.92506,
+            "2": 0.34079,
+            "3": 0.28891,
+            "4": 0.30652,
+            "5": 0.27326,
+            "6": 0.26908,
+            "7": 0.28337,
+            "8": 0.26429,
+            "9": 0.27048,
+            "10": 0.26866,
+            "11": 0.28689,
+            "12": 0.25961,
+            "13": 0.26511,
+            "14": 0.26065,
+            "15": 0.27834,
+            "16": 0.26398,
+            "17": 0.26064,
+            "18": 0.26661,
+            "19": 0.26487,
+            "20": 0.27686,
+            "21": 0.26249,
+            "22": 0.2677,
+            "23": 0.26859,
+            "24": 0.26049,
+            "25": 0.26086,
+            "26": 0.26279,
+            "27": 0.25983,
+            "28": 0.26561,
+            "29": 0.26345,
+            "30": 0.26142,
+            "31": 0.30613,
+            "32": 0.26049,
+            "33": 0.26142,
+            "34": 0.27278,
+            "35": 0.25691,
+            "36": 0.26151,
+            "37": 0.25654,
+            "38": 0.25753,
+            "39": 0.2576,
+            "40": 0.25839,
+            "41": 0.27219,
+            "42": 0.25851,
+            "43": 0.2668,
+            "44": 0.26229,
+            "45": 0.27182,
+            "46": 0.27691,
+            "47": 0.26299,
+            "48": 0.27152,
+            "49": 0.31513,
+            "50": 0.25813
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 2906cfee84e..00000000000
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.81692,
-            "2": 10.82534,
-            "3": 10.82401,
-            "4": 10.79801,
-            "5": 10.8415,
-            "6": 10.85912,
-            "7": 10.81927,
-            "8": 10.81789,
-            "9": 10.83554,
-            "10": 10.78266,
-            "11": 10.85455,
-            "12": 10.84582,
-            "13": 10.84996,
-            "14": 10.87821,
-            "15": 10.80684,
-            "16": 10.80662,
-            "17": 10.76305,
-            "18": 10.80188,
-            "19": 10.79303,
-            "20": 10.73474,
-            "21": 10.71067,
-            "22": 10.57636,
-            "23": 10.7196,
-            "24": 10.63305,
-            "25": 10.56916,
-            "26": 10.62589,
-            "27": 10.64466,
-            "28": 10.60792,
-            "29": 10.61761,
-            "30": 10.42214,
-            "31": 10.17719,
-            "32": 10.50701,
-            "33": 10.50561,
-            "34": 10.27485,
-            "35": 10.3276,
-            "36": 10.29275,
-            "37": 10.40262,
-            "38": 10.25679,
-            "39": 10.43615,
-            "40": 10.16589,
-            "41": 10.20032,
-            "42": 10.27424,
-            "43": 9.93044,
-            "44": 10.04415,
-            "45": 9.92936,
-            "46": 9.89984,
-            "47": 10.18573,
-            "48": 9.93082,
-            "49": 9.6257,
-            "50": 9.98437
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 12899.0,
-            "2": 14592.0,
-            "3": 14243.0,
-            "4": 13886.0,
-            "5": 15732.0,
-            "6": 16250.0,
-            "7": 15453.0,
-            "8": 13386.0,
-            "9": 15159.0,
-            "10": 12804.0,
-            "11": 16441.0,
-            "12": 14951.0,
-            "13": 16151.0,
-            "14": 16330.0,
-            "15": 15144.0,
-            "16": 15588.0,
-            "17": 15315.0,
-            "18": 14902.0,
-            "19": 15436.0,
-            "20": 13814.0,
-            "21": 13977.0,
-            "22": 12814.0,
-            "23": 16615.0,
-            "24": 13785.0,
-            "25": 13451.0,
-            "26": 14681.0,
-            "27": 15288.0,
-            "28": 16290.0,
-            "29": 16880.0,
-            "30": 14583.0,
-            "31": 13272.0,
-            "32": 15972.0,
-            "33": 16904.0,
-            "34": 14406.0,
-            "35": 14981.0,
-            "36": 15576.0,
-            "37": 17584.0,
-            "38": 16136.0,
-            "39": 17650.0,
-            "40": 16506.0,
-            "41": 16391.0,
-            "42": 17008.0,
-            "43": 15459.0,
-            "44": 15097.0,
-            "45": 16136.0,
-            "46": 16845.0,
-            "47": 19101.0,
-            "48": 16405.0,
-            "49": 16558.0,
-            "50": 18439.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 659394560.0,
-            "2": 659346944.0,
-            "3": 659401728.0,
-            "4": 659351040.0,
-            "5": 659623424.0,
-            "6": 659348480.0,
-            "7": 659508736.0,
-            "8": 659353088.0,
-            "9": 659383296.0,
-            "10": 659347456.0,
-            "11": 659350016.0,
-            "12": 659437056.0,
-            "13": 659356160.0,
-            "14": 659702272.0,
-            "15": 659658240.0,
-            "16": 659450880.0,
-            "17": 659438080.0,
-            "18": 659384320.0,
-            "19": 659492352.0,
-            "20": 659372544.0,
-            "21": 659350016.0,
-            "22": 659347456.0,
-            "23": 659348992.0,
-            "24": 659430400.0,
-            "25": 659347968.0,
-            "26": 659378176.0,
-            "27": 659353088.0,
-            "28": 659346944.0,
-            "29": 659440640.0,
-            "30": 659732480.0,
-            "31": 659361792.0,
-            "32": 659345920.0,
-            "33": 659473920.0,
-            "34": 660008448.0,
-            "35": 659819520.0,
-            "36": 659363840.0,
-            "37": 659418624.0,
-            "38": 659351040.0,
-            "39": 659449344.0,
-            "40": 659586560.0,
-            "41": 659387392.0,
-            "42": 659476480.0,
-            "43": 659567104.0,
-            "44": 659344384.0,
-            "45": 659346944.0,
-            "46": 659466752.0,
-            "47": 659345408.0,
-            "48": 659835392.0,
-            "49": 659494400.0,
-            "50": 659346432.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1853294080.0,
-            "2": 2083995136.0,
-            "3": 2084402688.0,
-            "4": 2084433408.0,
-            "5": 2084433408.0,
-            "6": 2084433408.0,
-            "7": 2085503488.0,
-            "8": 2085503488.0,
-            "9": 2085503488.0,
-            "10": 2085503488.0,
-            "11": 2085503488.0,
-            "12": 2085503488.0,
-            "13": 2085503488.0,
-            "14": 2085503488.0,
-            "15": 2085503488.0,
-            "16": 2085503488.0,
-            "17": 2085503488.0,
-            "18": 2085503488.0,
-            "19": 2085503488.0,
-            "20": 2085503488.0,
-            "21": 2085503488.0,
-            "22": 2085503488.0,
-            "23": 2085503488.0,
-            "24": 2085503488.0,
-            "25": 2085503488.0,
-            "26": 2085503488.0,
-            "27": 2085503488.0,
-            "28": 2085503488.0,
-            "29": 2085503488.0,
-            "30": 2085503488.0,
-            "31": 2085503488.0,
-            "32": 2085503488.0,
-            "33": 2085503488.0,
-            "34": 2085503488.0,
-            "35": 2085503488.0,
-            "36": 2085503488.0,
-            "37": 2085503488.0,
-            "38": 2085503488.0,
-            "39": 2085503488.0,
-            "40": 2085503488.0,
-            "41": 2085503488.0,
-            "42": 2085503488.0,
-            "43": 2085503488.0,
-            "44": 2085503488.0,
-            "45": 2085503488.0,
-            "46": 2085503488.0,
-            "47": 2085503488.0,
-            "48": 2085503488.0,
-            "49": 2085503488.0,
-            "50": 2085503488.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 15.33188,
-            "2": 0.39945,
-            "3": 0.26382,
-            "4": 0.2701,
-            "5": 0.24001,
-            "6": 0.23463,
-            "7": 0.24587,
-            "8": 0.23051,
-            "9": 0.23491,
-            "10": 0.23256,
-            "11": 0.2548,
-            "12": 0.23554,
-            "13": 0.24407,
-            "14": 0.23603,
-            "15": 0.24759,
-            "16": 0.23243,
-            "17": 0.23641,
-            "18": 0.23374,
-            "19": 0.22953,
-            "20": 0.23517,
-            "21": 0.22989,
-            "22": 0.2361,
-            "23": 0.24153,
-            "24": 0.23019,
-            "25": 0.22803,
-            "26": 0.23226,
-            "27": 0.22872,
-            "28": 0.23463,
-            "29": 0.23254,
-            "30": 0.22883,
-            "31": 0.27127,
-            "32": 0.22829,
-            "33": 0.24048,
-            "34": 0.26445,
-            "35": 0.2532,
-            "36": 0.24919,
-            "37": 0.22702,
-            "38": 0.22443,
-            "39": 0.22286,
-            "40": 0.21951,
-            "41": 0.22887,
-            "42": 0.22125,
-            "43": 0.23026,
-            "44": 0.22208,
-            "45": 0.23148,
-            "46": 0.24241,
-            "47": 0.22735,
-            "48": 0.22857,
-            "49": 0.27512,
-            "50": 0.22154
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index eb013c007ca..00000000000
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.81692,
-            "2": 10.82534,
-            "3": 10.82401,
-            "4": 10.79801,
-            "5": 10.8415,
-            "6": 10.85912,
-            "7": 10.81927,
-            "8": 10.81789,
-            "9": 10.83554,
-            "10": 10.78266,
-            "11": 10.85455,
-            "12": 10.84582,
-            "13": 10.84996,
-            "14": 10.87821,
-            "15": 10.80684,
-            "16": 10.80662,
-            "17": 10.76305,
-            "18": 10.80188,
-            "19": 10.79303,
-            "20": 10.73474,
-            "21": 10.71067,
-            "22": 10.57636,
-            "23": 10.7196,
-            "24": 10.63305,
-            "25": 10.56916,
-            "26": 10.62589,
-            "27": 10.64466,
-            "28": 10.60792,
-            "29": 10.61761,
-            "30": 10.42214,
-            "31": 10.17719,
-            "32": 10.50701,
-            "33": 10.50561,
-            "34": 10.27485,
-            "35": 10.3276,
-            "36": 10.29275,
-            "37": 10.40262,
-            "38": 10.25679,
-            "39": 10.43615,
-            "40": 10.16589,
-            "41": 10.20032,
-            "42": 10.27424,
-            "43": 9.93044,
-            "44": 10.04415,
-            "45": 9.92936,
-            "46": 9.89984,
-            "47": 10.18573,
-            "48": 9.93082,
-            "49": 9.6257,
-            "50": 9.98437
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 12899.0,
-            "2": 14592.0,
-            "3": 14243.0,
-            "4": 13886.0,
-            "5": 15732.0,
-            "6": 16250.0,
-            "7": 15453.0,
-            "8": 13386.0,
-            "9": 15159.0,
-            "10": 12804.0,
-            "11": 16441.0,
-            "12": 14951.0,
-            "13": 16151.0,
-            "14": 16330.0,
-            "15": 15144.0,
-            "16": 15588.0,
-            "17": 15315.0,
-            "18": 14902.0,
-            "19": 15436.0,
-            "20": 13814.0,
-            "21": 13977.0,
-            "22": 12814.0,
-            "23": 16615.0,
-            "24": 13785.0,
-            "25": 13451.0,
-            "26": 14681.0,
-            "27": 15288.0,
-            "28": 16290.0,
-            "29": 16880.0,
-            "30": 14583.0,
-            "31": 13272.0,
-            "32": 15972.0,
-            "33": 16904.0,
-            "34": 14406.0,
-            "35": 14981.0,
-            "36": 15576.0,
-            "37": 17584.0,
-            "38": 16136.0,
-            "39": 17650.0,
-            "40": 16506.0,
-            "41": 16391.0,
-            "42": 17008.0,
-            "43": 15459.0,
-            "44": 15097.0,
-            "45": 16136.0,
-            "46": 16845.0,
-            "47": 19101.0,
-            "48": 16405.0,
-            "49": 16558.0,
-            "50": 18439.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 659394560.0,
-            "2": 659346944.0,
-            "3": 659401728.0,
-            "4": 659351040.0,
-            "5": 659623424.0,
-            "6": 659348480.0,
-            "7": 659508736.0,
-            "8": 659353088.0,
-            "9": 659383296.0,
-            "10": 659347456.0,
-            "11": 659350016.0,
-            "12": 659437056.0,
-            "13": 659356160.0,
-            "14": 659702272.0,
-            "15": 659658240.0,
-            "16": 659450880.0,
-            "17": 659438080.0,
-            "18": 659384320.0,
-            "19": 659492352.0,
-            "20": 659372544.0,
-            "21": 659350016.0,
-            "22": 659347456.0,
-            "23": 659348992.0,
-            "24": 659430400.0,
-            "25": 659347968.0,
-            "26": 659378176.0,
-            "27": 659353088.0,
-            "28": 659346944.0,
-            "29": 659440640.0,
-            "30": 659732480.0,
-            "31": 659361792.0,
-            "32": 659345920.0,
-            "33": 659473920.0,
-            "34": 660008448.0,
-            "35": 659819520.0,
-            "36": 659363840.0,
-            "37": 659418624.0,
-            "38": 659351040.0,
-            "39": 659449344.0,
-            "40": 659586560.0,
-            "41": 659387392.0,
-            "42": 659476480.0,
-            "43": 659567104.0,
-            "44": 659344384.0,
-            "45": 659346944.0,
-            "46": 659466752.0,
-            "47": 659345408.0,
-            "48": 659835392.0,
-            "49": 659494400.0,
-            "50": 659346432.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1853294080.0,
-            "2": 2083995136.0,
-            "3": 2084402688.0,
-            "4": 2084433408.0,
-            "5": 2084433408.0,
-            "6": 2084433408.0,
-            "7": 2085503488.0,
-            "8": 2085503488.0,
-            "9": 2085503488.0,
-            "10": 2085503488.0,
-            "11": 2085503488.0,
-            "12": 2085503488.0,
-            "13": 2085503488.0,
-            "14": 2085503488.0,
-            "15": 2085503488.0,
-            "16": 2085503488.0,
-            "17": 2085503488.0,
-            "18": 2085503488.0,
-            "19": 2085503488.0,
-            "20": 2085503488.0,
-            "21": 2085503488.0,
-            "22": 2085503488.0,
-            "23": 2085503488.0,
-            "24": 2085503488.0,
-            "25": 2085503488.0,
-            "26": 2085503488.0,
-            "27": 2085503488.0,
-            "28": 2085503488.0,
-            "29": 2085503488.0,
-            "30": 2085503488.0,
-            "31": 2085503488.0,
-            "32": 2085503488.0,
-            "33": 2085503488.0,
-            "34": 2085503488.0,
-            "35": 2085503488.0,
-            "36": 2085503488.0,
-            "37": 2085503488.0,
-            "38": 2085503488.0,
-            "39": 2085503488.0,
-            "40": 2085503488.0,
-            "41": 2085503488.0,
-            "42": 2085503488.0,
-            "43": 2085503488.0,
-            "44": 2085503488.0,
-            "45": 2085503488.0,
-            "46": 2085503488.0,
-            "47": 2085503488.0,
-            "48": 2085503488.0,
-            "49": 2085503488.0,
-            "50": 2085503488.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 13.92506,
-            "2": 0.34079,
-            "3": 0.28891,
-            "4": 0.30652,
-            "5": 0.27326,
-            "6": 0.26908,
-            "7": 0.28337,
-            "8": 0.26429,
-            "9": 0.27048,
-            "10": 0.26866,
-            "11": 0.28689,
-            "12": 0.25961,
-            "13": 0.26511,
-            "14": 0.26065,
-            "15": 0.27834,
-            "16": 0.26398,
-            "17": 0.26064,
-            "18": 0.26661,
-            "19": 0.26487,
-            "20": 0.27686,
-            "21": 0.26249,
-            "22": 0.2677,
-            "23": 0.26859,
-            "24": 0.26049,
-            "25": 0.26086,
-            "26": 0.26279,
-            "27": 0.25983,
-            "28": 0.26561,
-            "29": 0.26345,
-            "30": 0.26142,
-            "31": 0.30613,
-            "32": 0.26049,
-            "33": 0.26142,
-            "34": 0.27278,
-            "35": 0.25691,
-            "36": 0.26151,
-            "37": 0.25654,
-            "38": 0.25753,
-            "39": 0.2576,
-            "40": 0.25839,
-            "41": 0.27219,
-            "42": 0.25851,
-            "43": 0.2668,
-            "44": 0.26229,
-            "45": 0.27182,
-            "46": 0.27691,
-            "47": 0.26299,
-            "48": 0.27152,
-            "49": 0.31513,
-            "50": 0.25813
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json
index a7b4d2b32ca..ad7af2bddb0 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json
@@ -218,106 +218,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 788517888.0,
-            "2": 788488192.0,
-            "3": 788535296.0,
-            "4": 788513280.0,
-            "5": 788537344.0,
-            "6": 788479488.0,
-            "7": 788502528.0,
-            "8": 788510208.0,
-            "9": 788526080.0,
-            "10": 788538368.0,
-            "11": 788513280.0,
-            "12": 788484096.0,
-            "13": 788542464.0,
-            "14": 788451328.0,
-            "15": 788503040.0,
-            "16": 788440576.0,
-            "17": 788558336.0,
-            "18": 788535296.0,
-            "19": 788542464.0,
-            "20": 788470784.0,
-            "21": 788508672.0,
-            "22": 788594176.0,
-            "23": 788573696.0,
-            "24": 788513280.0,
-            "25": 788655616.0,
-            "26": 788566016.0,
-            "27": 788630528.0,
-            "28": 788568576.0,
-            "29": 788610560.0,
-            "30": 788587520.0,
-            "31": 788647424.0,
-            "32": 788602880.0,
-            "33": 788616704.0,
-            "34": 788577792.0,
-            "35": 788616704.0,
-            "36": 788642304.0,
-            "37": 788597760.0,
-            "38": 788650496.0,
-            "39": 788663296.0,
-            "40": 788550144.0,
-            "41": 788591616.0,
-            "42": 788575232.0,
-            "43": 788541952.0,
-            "44": 788623872.0,
-            "45": 788491264.0,
-            "46": 788503552.0,
-            "47": 788572160.0,
-            "48": 788488704.0,
-            "49": 788461568.0,
-            "50": 788487168.0,
-            "51": 788523008.0,
-            "52": 788483584.0,
-            "53": 788513792.0,
-            "54": 788503552.0,
-            "55": 788499968.0,
-            "56": 788459008.0,
-            "57": 788456448.0,
-            "58": 788499968.0,
-            "59": 788503552.0,
-            "60": 788491264.0,
-            "61": 788463616.0,
-            "62": 788497408.0,
-            "63": 788449792.0,
-            "64": 788465664.0,
-            "65": 788408320.0,
-            "66": 788445696.0,
-            "67": 788445696.0,
-            "68": 788456448.0,
-            "69": 788473856.0,
-            "70": 788497408.0,
-            "71": 788453888.0,
-            "72": 788413952.0,
-            "73": 788444160.0,
-            "74": 788419072.0,
-            "75": 788441600.0,
-            "76": 788412928.0,
-            "77": 788471296.0,
-            "78": 788462592.0,
-            "79": 788419072.0,
-            "80": 788411392.0,
-            "81": 788430848.0,
-            "82": 788439040.0,
-            "83": 788435456.0,
-            "84": 788471296.0,
-            "85": 788461056.0,
-            "86": 788395008.0,
-            "87": 788490752.0,
-            "88": 788493312.0,
-            "89": 788501504.0,
-            "90": 788531712.0,
-            "91": 788513792.0,
-            "92": 788516864.0,
-            "93": 788487168.0,
-            "94": 788506624.0,
-            "95": 788543488.0,
-            "96": 788563456.0,
-            "97": 788579840.0,
-            "98": 788590592.0,
-            "99": 788514816.0,
-            "100": 788570624.0
+            "1": 788523008.0,
+            "2": 788493312.0,
+            "3": 788540416.0,
+            "4": 788518400.0,
+            "5": 788542464.0,
+            "6": 788484608.0,
+            "7": 788507648.0,
+            "8": 788515328.0,
+            "9": 788531200.0,
+            "10": 788543488.0,
+            "11": 788518400.0,
+            "12": 788489216.0,
+            "13": 788547584.0,
+            "14": 788456448.0,
+            "15": 788508160.0,
+            "16": 788445696.0,
+            "17": 788563456.0,
+            "18": 788540416.0,
+            "19": 788547584.0,
+            "20": 788475904.0,
+            "21": 788513792.0,
+            "22": 788599296.0,
+            "23": 788578816.0,
+            "24": 788518400.0,
+            "25": 788660736.0,
+            "26": 788571136.0,
+            "27": 788635648.0,
+            "28": 788573696.0,
+            "29": 788615680.0,
+            "30": 788592640.0,
+            "31": 788652544.0,
+            "32": 788608000.0,
+            "33": 788621824.0,
+            "34": 788582912.0,
+            "35": 788621824.0,
+            "36": 788647424.0,
+            "37": 788602880.0,
+            "38": 788655616.0,
+            "39": 788668416.0,
+            "40": 788555264.0,
+            "41": 788596736.0,
+            "42": 788580352.0,
+            "43": 788547072.0,
+            "44": 788628992.0,
+            "45": 788496384.0,
+            "46": 788508672.0,
+            "47": 788577280.0,
+            "48": 788493824.0,
+            "49": 788466688.0,
+            "50": 788492288.0,
+            "51": 788528128.0,
+            "52": 788488704.0,
+            "53": 788518912.0,
+            "54": 788508672.0,
+            "55": 788505088.0,
+            "56": 788464128.0,
+            "57": 788461568.0,
+            "58": 788505088.0,
+            "59": 788508672.0,
+            "60": 788496384.0,
+            "61": 788468736.0,
+            "62": 788502528.0,
+            "63": 788454912.0,
+            "64": 788470784.0,
+            "65": 788413440.0,
+            "66": 788450816.0,
+            "67": 788450816.0,
+            "68": 788461568.0,
+            "69": 788478976.0,
+            "70": 788502528.0,
+            "71": 788459008.0,
+            "72": 788419072.0,
+            "73": 788449280.0,
+            "74": 788424192.0,
+            "75": 788446720.0,
+            "76": 788418048.0,
+            "77": 788476416.0,
+            "78": 788467712.0,
+            "79": 788424192.0,
+            "80": 788416512.0,
+            "81": 788435968.0,
+            "82": 788444160.0,
+            "83": 788440576.0,
+            "84": 788476416.0,
+            "85": 788466176.0,
+            "86": 788400128.0,
+            "87": 788495872.0,
+            "88": 788498432.0,
+            "89": 788506624.0,
+            "90": 788536832.0,
+            "91": 788518912.0,
+            "92": 788521984.0,
+            "93": 788492288.0,
+            "94": 788511744.0,
+            "95": 788548608.0,
+            "96": 788568576.0,
+            "97": 788584960.0,
+            "98": 788595712.0,
+            "99": 788519936.0,
+            "100": 788575744.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -325,106 +325,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 3023035904.0,
-            "2": 3179259392.0,
-            "3": 3206071808.0,
-            "4": 3206071808.0,
-            "5": 3206539776.0,
-            "6": 3206539776.0,
-            "7": 3206539776.0,
-            "8": 3206539776.0,
-            "9": 3206539776.0,
-            "10": 3206539776.0,
-            "11": 3206539776.0,
-            "12": 3206539776.0,
-            "13": 3207718400.0,
-            "14": 3207718400.0,
-            "15": 3207718400.0,
-            "16": 3207718400.0,
-            "17": 3219952640.0,
-            "18": 3219952640.0,
-            "19": 3219952640.0,
-            "20": 3219952640.0,
-            "21": 3219952640.0,
-            "22": 3239834624.0,
-            "23": 3239834624.0,
-            "24": 3239834624.0,
-            "25": 3276544000.0,
-            "26": 3276544000.0,
-            "27": 3276544000.0,
-            "28": 3276544000.0,
-            "29": 3276544000.0,
-            "30": 3276544000.0,
-            "31": 3276544000.0,
-            "32": 3276544000.0,
-            "33": 3276544000.0,
-            "34": 3276544000.0,
-            "35": 3276544000.0,
-            "36": 3276544000.0,
-            "37": 3276544000.0,
-            "38": 3276544000.0,
-            "39": 3281670656.0,
-            "40": 3281670656.0,
-            "41": 3281670656.0,
-            "42": 3281670656.0,
-            "43": 3281670656.0,
-            "44": 3281670656.0,
-            "45": 3281670656.0,
-            "46": 3281670656.0,
-            "47": 3281670656.0,
-            "48": 3281670656.0,
-            "49": 3281670656.0,
-            "50": 3281670656.0,
-            "51": 3281670656.0,
-            "52": 3281670656.0,
-            "53": 3281670656.0,
-            "54": 3281670656.0,
-            "55": 3281670656.0,
-            "56": 3281670656.0,
-            "57": 3281670656.0,
-            "58": 3281670656.0,
-            "59": 3281670656.0,
-            "60": 3281670656.0,
-            "61": 3281670656.0,
-            "62": 3281670656.0,
-            "63": 3281670656.0,
-            "64": 3281670656.0,
-            "65": 3281670656.0,
-            "66": 3281670656.0,
-            "67": 3281670656.0,
-            "68": 3281670656.0,
-            "69": 3281670656.0,
-            "70": 3281670656.0,
-            "71": 3281670656.0,
-            "72": 3281670656.0,
-            "73": 3281670656.0,
-            "74": 3281670656.0,
-            "75": 3281670656.0,
-            "76": 3281670656.0,
-            "77": 3281670656.0,
-            "78": 3281670656.0,
-            "79": 3281670656.0,
-            "80": 3281670656.0,
-            "81": 3281670656.0,
-            "82": 3281670656.0,
-            "83": 3281670656.0,
-            "84": 3281670656.0,
-            "85": 3281670656.0,
-            "86": 3281670656.0,
-            "87": 3281670656.0,
-            "88": 3281670656.0,
-            "89": 3281670656.0,
-            "90": 3281670656.0,
-            "91": 3281670656.0,
-            "92": 3281670656.0,
-            "93": 3281670656.0,
-            "94": 3281670656.0,
-            "95": 3281670656.0,
-            "96": 3281670656.0,
-            "97": 3281670656.0,
-            "98": 3281670656.0,
-            "99": 3281670656.0,
-            "100": 3281670656.0
+            "1": 3022964224.0,
+            "2": 3177559552.0,
+            "3": 3206005248.0,
+            "4": 3206005248.0,
+            "5": 3206005248.0,
+            "6": 3206005248.0,
+            "7": 3206005248.0,
+            "8": 3206005248.0,
+            "9": 3206005248.0,
+            "10": 3206005248.0,
+            "11": 3206005248.0,
+            "12": 3206005248.0,
+            "13": 3208181248.0,
+            "14": 3208181248.0,
+            "15": 3208181248.0,
+            "16": 3208181248.0,
+            "17": 3216008192.0,
+            "18": 3216008192.0,
+            "19": 3216008192.0,
+            "20": 3216008192.0,
+            "21": 3216008192.0,
+            "22": 3238043648.0,
+            "23": 3238043648.0,
+            "24": 3238043648.0,
+            "25": 3281027072.0,
+            "26": 3281027072.0,
+            "27": 3281027072.0,
+            "28": 3281027072.0,
+            "29": 3281027072.0,
+            "30": 3281027072.0,
+            "31": 3281027072.0,
+            "32": 3281027072.0,
+            "33": 3281027072.0,
+            "34": 3281027072.0,
+            "35": 3281027072.0,
+            "36": 3281027072.0,
+            "37": 3281027072.0,
+            "38": 3281027072.0,
+            "39": 3281027072.0,
+            "40": 3281027072.0,
+            "41": 3281027072.0,
+            "42": 3281027072.0,
+            "43": 3281027072.0,
+            "44": 3281027072.0,
+            "45": 3281027072.0,
+            "46": 3281027072.0,
+            "47": 3281027072.0,
+            "48": 3281027072.0,
+            "49": 3281027072.0,
+            "50": 3281027072.0,
+            "51": 3281027072.0,
+            "52": 3281027072.0,
+            "53": 3281027072.0,
+            "54": 3281027072.0,
+            "55": 3281027072.0,
+            "56": 3281027072.0,
+            "57": 3281027072.0,
+            "58": 3281027072.0,
+            "59": 3281027072.0,
+            "60": 3281027072.0,
+            "61": 3281027072.0,
+            "62": 3281027072.0,
+            "63": 3281027072.0,
+            "64": 3281027072.0,
+            "65": 3281027072.0,
+            "66": 3281027072.0,
+            "67": 3281027072.0,
+            "68": 3281027072.0,
+            "69": 3281027072.0,
+            "70": 3281027072.0,
+            "71": 3281027072.0,
+            "72": 3281027072.0,
+            "73": 3281027072.0,
+            "74": 3281027072.0,
+            "75": 3281027072.0,
+            "76": 3281027072.0,
+            "77": 3281027072.0,
+            "78": 3281027072.0,
+            "79": 3281027072.0,
+            "80": 3281027072.0,
+            "81": 3281027072.0,
+            "82": 3281027072.0,
+            "83": 3281027072.0,
+            "84": 3281027072.0,
+            "85": 3281027072.0,
+            "86": 3281027072.0,
+            "87": 3281027072.0,
+            "88": 3281027072.0,
+            "89": 3281027072.0,
+            "90": 3281027072.0,
+            "91": 3281027072.0,
+            "92": 3281027072.0,
+            "93": 3281027072.0,
+            "94": 3281027072.0,
+            "95": 3281027072.0,
+            "96": 3281027072.0,
+            "97": 3281027072.0,
+            "98": 3281027072.0,
+            "99": 3281027072.0,
+            "100": 3281027072.0
         }
     },
     "iteration-time": {
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 11.7037,
-            "2": 0.22491,
-            "3": 0.19533,
-            "4": 0.17539,
-            "5": 0.18483,
-            "6": 0.16647,
-            "7": 0.1641,
-            "8": 0.16288,
-            "9": 0.15397,
-            "10": 0.15258,
-            "11": 0.15812,
-            "12": 0.15338,
-            "13": 0.14727,
-            "14": 0.15276,
-            "15": 0.1431,
-            "16": 0.1553,
-            "17": 0.14923,
-            "18": 0.15041,
-            "19": 0.15216,
-            "20": 0.15811,
-            "21": 0.14566,
-            "22": 0.14796,
-            "23": 0.15503,
-            "24": 0.15065,
-            "25": 0.15039,
-            "26": 0.15548,
-            "27": 0.158,
-            "28": 0.16038,
-            "29": 0.16862,
-            "30": 0.16712,
-            "31": 0.16858,
-            "32": 0.16095,
-            "33": 0.163,
-            "34": 0.1624,
-            "35": 0.16519,
-            "36": 0.16981,
-            "37": 0.16271,
-            "38": 0.16155,
-            "39": 0.17014,
-            "40": 0.1593,
-            "41": 0.167,
-            "42": 0.16495,
-            "43": 0.1718,
-            "44": 0.16565,
-            "45": 0.16518,
-            "46": 0.16648,
-            "47": 0.16483,
-            "48": 0.16244,
-            "49": 0.16707,
-            "50": 0.16226,
-            "51": 0.1715,
-            "52": 0.16281,
-            "53": 0.16077,
-            "54": 0.15821,
-            "55": 0.15951,
-            "56": 0.16684,
-            "57": 0.16109,
-            "58": 0.16192,
-            "59": 0.16349,
-            "60": 0.16237,
-            "61": 0.15955,
-            "62": 0.15954,
-            "63": 0.15968,
-            "64": 0.16092,
-            "65": 0.1539,
-            "66": 0.16199,
-            "67": 0.15811,
-            "68": 0.1652,
-            "69": 0.16307,
-            "70": 0.17014,
-            "71": 0.15399,
-            "72": 0.16312,
-            "73": 0.15787,
-            "74": 0.16598,
-            "75": 0.16279,
-            "76": 0.15216,
-            "77": 0.16031,
-            "78": 0.15503,
-            "79": 0.16083,
-            "80": 0.16046,
-            "81": 0.15996,
-            "82": 0.15176,
-            "83": 0.16328,
-            "84": 0.16094,
-            "85": 0.16065,
-            "86": 0.1554,
-            "87": 0.15864,
-            "88": 0.16406,
-            "89": 0.15924,
-            "90": 0.15731,
-            "91": 0.15776,
-            "92": 0.16339,
-            "93": 0.15877,
-            "94": 0.15733,
-            "95": 0.15774,
-            "96": 0.15579,
-            "97": 0.16338,
-            "98": 0.15898,
-            "99": 0.16066,
-            "100": 0.15749
+            "1": 12.96093,
+            "2": 0.20892,
+            "3": 0.18473,
+            "4": 0.18131,
+            "5": 0.18523,
+            "6": 0.15261,
+            "7": 0.15478,
+            "8": 0.15961,
+            "9": 0.14304,
+            "10": 0.14479,
+            "11": 0.14001,
+            "12": 0.14477,
+            "13": 0.13539,
+            "14": 0.14122,
+            "15": 0.12814,
+            "16": 0.1422,
+            "17": 0.14026,
+            "18": 0.1393,
+            "19": 0.13844,
+            "20": 0.14704,
+            "21": 0.13226,
+            "22": 0.12909,
+            "23": 0.13878,
+            "24": 0.13814,
+            "25": 0.13861,
+            "26": 0.14021,
+            "27": 0.15004,
+            "28": 0.14508,
+            "29": 0.15539,
+            "30": 0.14923,
+            "31": 0.15897,
+            "32": 0.14709,
+            "33": 0.15008,
+            "34": 0.14672,
+            "35": 0.15075,
+            "36": 0.15567,
+            "37": 0.14723,
+            "38": 0.15175,
+            "39": 0.14843,
+            "40": 0.15144,
+            "41": 0.14498,
+            "42": 0.15026,
+            "43": 0.15467,
+            "44": 0.14949,
+            "45": 0.14547,
+            "46": 0.16159,
+            "47": 0.14865,
+            "48": 0.13694,
+            "49": 0.1448,
+            "50": 0.14252,
+            "51": 0.1539,
+            "52": 0.14596,
+            "53": 0.14405,
+            "54": 0.13597,
+            "55": 0.13684,
+            "56": 0.1422,
+            "57": 0.14574,
+            "58": 0.15689,
+            "59": 0.14026,
+            "60": 0.15291,
+            "61": 0.14644,
+            "62": 0.14867,
+            "63": 0.14378,
+            "64": 0.14841,
+            "65": 0.13208,
+            "66": 0.13289,
+            "67": 0.13565,
+            "68": 0.13616,
+            "69": 0.1404,
+            "70": 0.15207,
+            "71": 0.12955,
+            "72": 0.13978,
+            "73": 0.13699,
+            "74": 0.13757,
+            "75": 0.13284,
+            "76": 0.12662,
+            "77": 0.13897,
+            "78": 0.13046,
+            "79": 0.13331,
+            "80": 0.13187,
+            "81": 0.13684,
+            "82": 0.12702,
+            "83": 0.13369,
+            "84": 0.14567,
+            "85": 0.13204,
+            "86": 0.12582,
+            "87": 0.12655,
+            "88": 0.13008,
+            "89": 0.12999,
+            "90": 0.13521,
+            "91": 0.12701,
+            "92": 0.13282,
+            "93": 0.12621,
+            "94": 0.12513,
+            "95": 0.12172,
+            "96": 0.12142,
+            "97": 0.13611,
+            "98": 0.12449,
+            "99": 0.12809,
+            "100": 0.12496
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 14972762850..00000000000
--- a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.81131,
-            "2": 10.83052,
-            "3": 10.82093,
-            "4": 10.81347,
-            "5": 10.84338,
-            "6": 10.84743,
-            "7": 10.85254,
-            "8": 10.83482,
-            "9": 10.84276,
-            "10": 10.77693,
-            "11": 10.8459,
-            "12": 10.85115,
-            "13": 10.84165,
-            "14": 10.8714,
-            "15": 10.83613,
-            "16": 10.79815,
-            "17": 10.77288,
-            "18": 10.8075,
-            "19": 10.78773,
-            "20": 10.73433,
-            "21": 10.69461,
-            "22": 10.56597,
-            "23": 10.71611,
-            "24": 10.61321,
-            "25": 10.552,
-            "26": 10.61364,
-            "27": 10.62702,
-            "28": 10.59546,
-            "29": 10.59195,
-            "30": 10.3916,
-            "31": 10.14615,
-            "32": 10.47399,
-            "33": 10.47051,
-            "34": 10.23435,
-            "35": 10.29318,
-            "36": 10.26627,
-            "37": 10.37219,
-            "38": 10.2254,
-            "39": 10.42101,
-            "40": 10.13002,
-            "41": 10.16265,
-            "42": 10.24278,
-            "43": 9.88237,
-            "44": 9.99105,
-            "45": 9.87295,
-            "46": 9.85181,
-            "47": 10.15633,
-            "48": 9.8915,
-            "49": 9.58889,
-            "50": 9.9543,
-            "51": 9.8849,
-            "52": 9.78004,
-            "53": 10.10188,
-            "54": 9.98715,
-            "55": 9.9027,
-            "56": 9.66837,
-            "57": 9.53524,
-            "58": 9.89495,
-            "59": 9.62892,
-            "60": 9.54308,
-            "61": 9.72727,
-            "62": 10.0332,
-            "63": 9.45215,
-            "64": 9.83179,
-            "65": 8.99109,
-            "66": 9.76394,
-            "67": 9.40349,
-            "68": 9.83129,
-            "69": 9.81856,
-            "70": 9.77262,
-            "71": 9.658,
-            "72": 9.64033,
-            "73": 9.55124,
-            "74": 9.02026,
-            "75": 9.47695,
-            "76": 9.13586,
-            "77": 10.09787,
-            "78": 9.75274,
-            "79": 9.41697,
-            "80": 9.45074,
-            "81": 9.52041,
-            "82": 9.73203,
-            "83": 9.36912,
-            "84": 9.45039,
-            "85": 9.65229,
-            "86": 9.1123,
-            "87": 9.61119,
-            "88": 9.78708,
-            "89": 9.64625,
-            "90": 9.83474,
-            "91": 9.39429,
-            "92": 9.39178,
-            "93": 9.12787,
-            "94": 8.86637,
-            "95": 9.54352,
-            "96": 9.55716,
-            "97": 9.332,
-            "98": 9.69189,
-            "99": 8.92072,
-            "100": 9.41916
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1216.0,
-            "2": 1361.0,
-            "3": 1221.0,
-            "4": 1222.0,
-            "5": 1385.0,
-            "6": 1467.0,
-            "7": 1252.0,
-            "8": 1355.0,
-            "9": 1346.0,
-            "10": 1335.0,
-            "11": 1278.0,
-            "12": 1185.0,
-            "13": 1203.0,
-            "14": 1385.0,
-            "15": 1303.0,
-            "16": 1377.0,
-            "17": 1229.0,
-            "18": 1291.0,
-            "19": 1244.0,
-            "20": 1183.0,
-            "21": 1262.0,
-            "22": 1122.0,
-            "23": 1301.0,
-            "24": 1066.0,
-            "25": 1182.0,
-            "26": 1263.0,
-            "27": 1162.0,
-            "28": 1262.0,
-            "29": 1179.0,
-            "30": 1168.0,
-            "31": 991.0,
-            "32": 1092.0,
-            "33": 1183.0,
-            "34": 1081.0,
-            "35": 1146.0,
-            "36": 1076.0,
-            "37": 1252.0,
-            "38": 1176.0,
-            "39": 1225.0,
-            "40": 1303.0,
-            "41": 1104.0,
-            "42": 1210.0,
-            "43": 1116.0,
-            "44": 1165.0,
-            "45": 1097.0,
-            "46": 1308.0,
-            "47": 1165.0,
-            "48": 1134.0,
-            "49": 1272.0,
-            "50": 1083.0,
-            "51": 1234.0,
-            "52": 1274.0,
-            "53": 1393.0,
-            "54": 1299.0,
-            "55": 1186.0,
-            "56": 1267.0,
-            "57": 1161.0,
-            "58": 1326.0,
-            "59": 1403.0,
-            "60": 1177.0,
-            "61": 1363.0,
-            "62": 1302.0,
-            "63": 1245.0,
-            "64": 1378.0,
-            "65": 1330.0,
-            "66": 1363.0,
-            "67": 1286.0,
-            "68": 1313.0,
-            "69": 1295.0,
-            "70": 1459.0,
-            "71": 1374.0,
-            "72": 1092.0,
-            "73": 1274.0,
-            "74": 943.0,
-            "75": 1059.0,
-            "76": 1323.0,
-            "77": 1475.0,
-            "78": 1487.0,
-            "79": 1496.0,
-            "80": 1382.0,
-            "81": 1470.0,
-            "82": 1417.0,
-            "83": 1177.0,
-            "84": 1506.0,
-            "85": 1420.0,
-            "86": 1281.0,
-            "87": 1540.0,
-            "88": 1467.0,
-            "89": 1452.0,
-            "90": 1350.0,
-            "91": 1010.0,
-            "92": 1324.0,
-            "93": 1349.0,
-            "94": 1197.0,
-            "95": 2503.0,
-            "96": 2373.0,
-            "97": 1490.0,
-            "98": 2541.0,
-            "99": 1367.0,
-            "100": 1122.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 788523008.0,
-            "2": 788493312.0,
-            "3": 788540416.0,
-            "4": 788518400.0,
-            "5": 788542464.0,
-            "6": 788484608.0,
-            "7": 788507648.0,
-            "8": 788515328.0,
-            "9": 788531200.0,
-            "10": 788543488.0,
-            "11": 788518400.0,
-            "12": 788489216.0,
-            "13": 788547584.0,
-            "14": 788456448.0,
-            "15": 788508160.0,
-            "16": 788445696.0,
-            "17": 788563456.0,
-            "18": 788540416.0,
-            "19": 788547584.0,
-            "20": 788475904.0,
-            "21": 788513792.0,
-            "22": 788599296.0,
-            "23": 788578816.0,
-            "24": 788518400.0,
-            "25": 788660736.0,
-            "26": 788571136.0,
-            "27": 788635648.0,
-            "28": 788573696.0,
-            "29": 788615680.0,
-            "30": 788592640.0,
-            "31": 788652544.0,
-            "32": 788608000.0,
-            "33": 788621824.0,
-            "34": 788582912.0,
-            "35": 788621824.0,
-            "36": 788647424.0,
-            "37": 788602880.0,
-            "38": 788655616.0,
-            "39": 788668416.0,
-            "40": 788555264.0,
-            "41": 788596736.0,
-            "42": 788580352.0,
-            "43": 788547072.0,
-            "44": 788628992.0,
-            "45": 788496384.0,
-            "46": 788508672.0,
-            "47": 788577280.0,
-            "48": 788493824.0,
-            "49": 788466688.0,
-            "50": 788492288.0,
-            "51": 788528128.0,
-            "52": 788488704.0,
-            "53": 788518912.0,
-            "54": 788508672.0,
-            "55": 788505088.0,
-            "56": 788464128.0,
-            "57": 788461568.0,
-            "58": 788505088.0,
-            "59": 788508672.0,
-            "60": 788496384.0,
-            "61": 788468736.0,
-            "62": 788502528.0,
-            "63": 788454912.0,
-            "64": 788470784.0,
-            "65": 788413440.0,
-            "66": 788450816.0,
-            "67": 788450816.0,
-            "68": 788461568.0,
-            "69": 788478976.0,
-            "70": 788502528.0,
-            "71": 788459008.0,
-            "72": 788419072.0,
-            "73": 788449280.0,
-            "74": 788424192.0,
-            "75": 788446720.0,
-            "76": 788418048.0,
-            "77": 788476416.0,
-            "78": 788467712.0,
-            "79": 788424192.0,
-            "80": 788416512.0,
-            "81": 788435968.0,
-            "82": 788444160.0,
-            "83": 788440576.0,
-            "84": 788476416.0,
-            "85": 788466176.0,
-            "86": 788400128.0,
-            "87": 788495872.0,
-            "88": 788498432.0,
-            "89": 788506624.0,
-            "90": 788536832.0,
-            "91": 788518912.0,
-            "92": 788521984.0,
-            "93": 788492288.0,
-            "94": 788511744.0,
-            "95": 788548608.0,
-            "96": 788568576.0,
-            "97": 788584960.0,
-            "98": 788595712.0,
-            "99": 788519936.0,
-            "100": 788575744.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 3022964224.0,
-            "2": 3177559552.0,
-            "3": 3206005248.0,
-            "4": 3206005248.0,
-            "5": 3206005248.0,
-            "6": 3206005248.0,
-            "7": 3206005248.0,
-            "8": 3206005248.0,
-            "9": 3206005248.0,
-            "10": 3206005248.0,
-            "11": 3206005248.0,
-            "12": 3206005248.0,
-            "13": 3208181248.0,
-            "14": 3208181248.0,
-            "15": 3208181248.0,
-            "16": 3208181248.0,
-            "17": 3216008192.0,
-            "18": 3216008192.0,
-            "19": 3216008192.0,
-            "20": 3216008192.0,
-            "21": 3216008192.0,
-            "22": 3238043648.0,
-            "23": 3238043648.0,
-            "24": 3238043648.0,
-            "25": 3281027072.0,
-            "26": 3281027072.0,
-            "27": 3281027072.0,
-            "28": 3281027072.0,
-            "29": 3281027072.0,
-            "30": 3281027072.0,
-            "31": 3281027072.0,
-            "32": 3281027072.0,
-            "33": 3281027072.0,
-            "34": 3281027072.0,
-            "35": 3281027072.0,
-            "36": 3281027072.0,
-            "37": 3281027072.0,
-            "38": 3281027072.0,
-            "39": 3281027072.0,
-            "40": 3281027072.0,
-            "41": 3281027072.0,
-            "42": 3281027072.0,
-            "43": 3281027072.0,
-            "44": 3281027072.0,
-            "45": 3281027072.0,
-            "46": 3281027072.0,
-            "47": 3281027072.0,
-            "48": 3281027072.0,
-            "49": 3281027072.0,
-            "50": 3281027072.0,
-            "51": 3281027072.0,
-            "52": 3281027072.0,
-            "53": 3281027072.0,
-            "54": 3281027072.0,
-            "55": 3281027072.0,
-            "56": 3281027072.0,
-            "57": 3281027072.0,
-            "58": 3281027072.0,
-            "59": 3281027072.0,
-            "60": 3281027072.0,
-            "61": 3281027072.0,
-            "62": 3281027072.0,
-            "63": 3281027072.0,
-            "64": 3281027072.0,
-            "65": 3281027072.0,
-            "66": 3281027072.0,
-            "67": 3281027072.0,
-            "68": 3281027072.0,
-            "69": 3281027072.0,
-            "70": 3281027072.0,
-            "71": 3281027072.0,
-            "72": 3281027072.0,
-            "73": 3281027072.0,
-            "74": 3281027072.0,
-            "75": 3281027072.0,
-            "76": 3281027072.0,
-            "77": 3281027072.0,
-            "78": 3281027072.0,
-            "79": 3281027072.0,
-            "80": 3281027072.0,
-            "81": 3281027072.0,
-            "82": 3281027072.0,
-            "83": 3281027072.0,
-            "84": 3281027072.0,
-            "85": 3281027072.0,
-            "86": 3281027072.0,
-            "87": 3281027072.0,
-            "88": 3281027072.0,
-            "89": 3281027072.0,
-            "90": 3281027072.0,
-            "91": 3281027072.0,
-            "92": 3281027072.0,
-            "93": 3281027072.0,
-            "94": 3281027072.0,
-            "95": 3281027072.0,
-            "96": 3281027072.0,
-            "97": 3281027072.0,
-            "98": 3281027072.0,
-            "99": 3281027072.0,
-            "100": 3281027072.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 14.59486,
-            "2": 0.20522,
-            "3": 0.14409,
-            "4": 0.13459,
-            "5": 0.14839,
-            "6": 0.12995,
-            "7": 0.13407,
-            "8": 0.12713,
-            "9": 0.12178,
-            "10": 0.11811,
-            "11": 0.11949,
-            "12": 0.1203,
-            "13": 0.11786,
-            "14": 0.12448,
-            "15": 0.10836,
-            "16": 0.12419,
-            "17": 0.11702,
-            "18": 0.11973,
-            "19": 0.12031,
-            "20": 0.1267,
-            "21": 0.11327,
-            "22": 0.11017,
-            "23": 0.12459,
-            "24": 0.12101,
-            "25": 0.11561,
-            "26": 0.12012,
-            "27": 0.12422,
-            "28": 0.12577,
-            "29": 0.13564,
-            "30": 0.12861,
-            "31": 0.13599,
-            "32": 0.1271,
-            "33": 0.12578,
-            "34": 0.12634,
-            "35": 0.1355,
-            "36": 0.13385,
-            "37": 0.14205,
-            "38": 0.13154,
-            "39": 0.13191,
-            "40": 0.12366,
-            "41": 0.12753,
-            "42": 0.12856,
-            "43": 0.13682,
-            "44": 0.1283,
-            "45": 0.12622,
-            "46": 0.13319,
-            "47": 0.12377,
-            "48": 0.11631,
-            "49": 0.12398,
-            "50": 0.121,
-            "51": 0.13378,
-            "52": 0.12924,
-            "53": 0.13049,
-            "54": 0.1168,
-            "55": 0.1198,
-            "56": 0.12032,
-            "57": 0.11994,
-            "58": 0.11921,
-            "59": 0.11243,
-            "60": 0.12093,
-            "61": 0.1135,
-            "62": 0.11749,
-            "63": 0.11655,
-            "64": 0.11597,
-            "65": 0.11018,
-            "66": 0.11227,
-            "67": 0.11266,
-            "68": 0.11633,
-            "69": 0.11588,
-            "70": 0.13029,
-            "71": 0.1094,
-            "72": 0.11993,
-            "73": 0.11372,
-            "74": 0.11794,
-            "75": 0.1148,
-            "76": 0.10579,
-            "77": 0.11832,
-            "78": 0.10909,
-            "79": 0.11571,
-            "80": 0.11101,
-            "81": 0.11465,
-            "82": 0.10553,
-            "83": 0.11218,
-            "84": 0.11587,
-            "85": 0.11305,
-            "86": 0.10812,
-            "87": 0.10767,
-            "88": 0.11083,
-            "89": 0.10984,
-            "90": 0.10798,
-            "91": 0.1054,
-            "92": 0.11557,
-            "93": 0.10735,
-            "94": 0.10489,
-            "95": 0.10498,
-            "96": 0.10432,
-            "97": 0.11565,
-            "98": 0.1033,
-            "99": 0.10857,
-            "100": 0.11532
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index ad7af2bddb0..00000000000
--- a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.81131,
-            "2": 10.83052,
-            "3": 10.82093,
-            "4": 10.81347,
-            "5": 10.84338,
-            "6": 10.84743,
-            "7": 10.85254,
-            "8": 10.83482,
-            "9": 10.84276,
-            "10": 10.77693,
-            "11": 10.8459,
-            "12": 10.85115,
-            "13": 10.84165,
-            "14": 10.8714,
-            "15": 10.83613,
-            "16": 10.79815,
-            "17": 10.77288,
-            "18": 10.8075,
-            "19": 10.78773,
-            "20": 10.73433,
-            "21": 10.69461,
-            "22": 10.56597,
-            "23": 10.71611,
-            "24": 10.61321,
-            "25": 10.552,
-            "26": 10.61364,
-            "27": 10.62702,
-            "28": 10.59546,
-            "29": 10.59195,
-            "30": 10.3916,
-            "31": 10.14615,
-            "32": 10.47399,
-            "33": 10.47051,
-            "34": 10.23435,
-            "35": 10.29318,
-            "36": 10.26627,
-            "37": 10.37219,
-            "38": 10.2254,
-            "39": 10.42101,
-            "40": 10.13002,
-            "41": 10.16265,
-            "42": 10.24278,
-            "43": 9.88237,
-            "44": 9.99105,
-            "45": 9.87295,
-            "46": 9.85181,
-            "47": 10.15633,
-            "48": 9.8915,
-            "49": 9.58889,
-            "50": 9.9543,
-            "51": 9.8849,
-            "52": 9.78004,
-            "53": 10.10188,
-            "54": 9.98715,
-            "55": 9.9027,
-            "56": 9.66837,
-            "57": 9.53524,
-            "58": 9.89495,
-            "59": 9.62892,
-            "60": 9.54308,
-            "61": 9.72727,
-            "62": 10.0332,
-            "63": 9.45215,
-            "64": 9.83179,
-            "65": 8.99109,
-            "66": 9.76394,
-            "67": 9.40349,
-            "68": 9.83129,
-            "69": 9.81856,
-            "70": 9.77262,
-            "71": 9.658,
-            "72": 9.64033,
-            "73": 9.55124,
-            "74": 9.02026,
-            "75": 9.47695,
-            "76": 9.13586,
-            "77": 10.09787,
-            "78": 9.75274,
-            "79": 9.41697,
-            "80": 9.45074,
-            "81": 9.52041,
-            "82": 9.73203,
-            "83": 9.36912,
-            "84": 9.45039,
-            "85": 9.65229,
-            "86": 9.1123,
-            "87": 9.61119,
-            "88": 9.78708,
-            "89": 9.64625,
-            "90": 9.83474,
-            "91": 9.39429,
-            "92": 9.39178,
-            "93": 9.12787,
-            "94": 8.86637,
-            "95": 9.54352,
-            "96": 9.55716,
-            "97": 9.332,
-            "98": 9.69189,
-            "99": 8.92072,
-            "100": 9.41916
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1216.0,
-            "2": 1361.0,
-            "3": 1221.0,
-            "4": 1222.0,
-            "5": 1385.0,
-            "6": 1467.0,
-            "7": 1252.0,
-            "8": 1355.0,
-            "9": 1346.0,
-            "10": 1335.0,
-            "11": 1278.0,
-            "12": 1185.0,
-            "13": 1203.0,
-            "14": 1385.0,
-            "15": 1303.0,
-            "16": 1377.0,
-            "17": 1229.0,
-            "18": 1291.0,
-            "19": 1244.0,
-            "20": 1183.0,
-            "21": 1262.0,
-            "22": 1122.0,
-            "23": 1301.0,
-            "24": 1066.0,
-            "25": 1182.0,
-            "26": 1263.0,
-            "27": 1162.0,
-            "28": 1262.0,
-            "29": 1179.0,
-            "30": 1168.0,
-            "31": 991.0,
-            "32": 1092.0,
-            "33": 1183.0,
-            "34": 1081.0,
-            "35": 1146.0,
-            "36": 1076.0,
-            "37": 1252.0,
-            "38": 1176.0,
-            "39": 1225.0,
-            "40": 1303.0,
-            "41": 1104.0,
-            "42": 1210.0,
-            "43": 1116.0,
-            "44": 1165.0,
-            "45": 1097.0,
-            "46": 1308.0,
-            "47": 1165.0,
-            "48": 1134.0,
-            "49": 1272.0,
-            "50": 1083.0,
-            "51": 1234.0,
-            "52": 1274.0,
-            "53": 1393.0,
-            "54": 1299.0,
-            "55": 1186.0,
-            "56": 1267.0,
-            "57": 1161.0,
-            "58": 1326.0,
-            "59": 1403.0,
-            "60": 1177.0,
-            "61": 1363.0,
-            "62": 1302.0,
-            "63": 1245.0,
-            "64": 1378.0,
-            "65": 1330.0,
-            "66": 1363.0,
-            "67": 1286.0,
-            "68": 1313.0,
-            "69": 1295.0,
-            "70": 1459.0,
-            "71": 1374.0,
-            "72": 1092.0,
-            "73": 1274.0,
-            "74": 943.0,
-            "75": 1059.0,
-            "76": 1323.0,
-            "77": 1475.0,
-            "78": 1487.0,
-            "79": 1496.0,
-            "80": 1382.0,
-            "81": 1470.0,
-            "82": 1417.0,
-            "83": 1177.0,
-            "84": 1506.0,
-            "85": 1420.0,
-            "86": 1281.0,
-            "87": 1540.0,
-            "88": 1467.0,
-            "89": 1452.0,
-            "90": 1350.0,
-            "91": 1010.0,
-            "92": 1324.0,
-            "93": 1349.0,
-            "94": 1197.0,
-            "95": 2503.0,
-            "96": 2373.0,
-            "97": 1490.0,
-            "98": 2541.0,
-            "99": 1367.0,
-            "100": 1122.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 788523008.0,
-            "2": 788493312.0,
-            "3": 788540416.0,
-            "4": 788518400.0,
-            "5": 788542464.0,
-            "6": 788484608.0,
-            "7": 788507648.0,
-            "8": 788515328.0,
-            "9": 788531200.0,
-            "10": 788543488.0,
-            "11": 788518400.0,
-            "12": 788489216.0,
-            "13": 788547584.0,
-            "14": 788456448.0,
-            "15": 788508160.0,
-            "16": 788445696.0,
-            "17": 788563456.0,
-            "18": 788540416.0,
-            "19": 788547584.0,
-            "20": 788475904.0,
-            "21": 788513792.0,
-            "22": 788599296.0,
-            "23": 788578816.0,
-            "24": 788518400.0,
-            "25": 788660736.0,
-            "26": 788571136.0,
-            "27": 788635648.0,
-            "28": 788573696.0,
-            "29": 788615680.0,
-            "30": 788592640.0,
-            "31": 788652544.0,
-            "32": 788608000.0,
-            "33": 788621824.0,
-            "34": 788582912.0,
-            "35": 788621824.0,
-            "36": 788647424.0,
-            "37": 788602880.0,
-            "38": 788655616.0,
-            "39": 788668416.0,
-            "40": 788555264.0,
-            "41": 788596736.0,
-            "42": 788580352.0,
-            "43": 788547072.0,
-            "44": 788628992.0,
-            "45": 788496384.0,
-            "46": 788508672.0,
-            "47": 788577280.0,
-            "48": 788493824.0,
-            "49": 788466688.0,
-            "50": 788492288.0,
-            "51": 788528128.0,
-            "52": 788488704.0,
-            "53": 788518912.0,
-            "54": 788508672.0,
-            "55": 788505088.0,
-            "56": 788464128.0,
-            "57": 788461568.0,
-            "58": 788505088.0,
-            "59": 788508672.0,
-            "60": 788496384.0,
-            "61": 788468736.0,
-            "62": 788502528.0,
-            "63": 788454912.0,
-            "64": 788470784.0,
-            "65": 788413440.0,
-            "66": 788450816.0,
-            "67": 788450816.0,
-            "68": 788461568.0,
-            "69": 788478976.0,
-            "70": 788502528.0,
-            "71": 788459008.0,
-            "72": 788419072.0,
-            "73": 788449280.0,
-            "74": 788424192.0,
-            "75": 788446720.0,
-            "76": 788418048.0,
-            "77": 788476416.0,
-            "78": 788467712.0,
-            "79": 788424192.0,
-            "80": 788416512.0,
-            "81": 788435968.0,
-            "82": 788444160.0,
-            "83": 788440576.0,
-            "84": 788476416.0,
-            "85": 788466176.0,
-            "86": 788400128.0,
-            "87": 788495872.0,
-            "88": 788498432.0,
-            "89": 788506624.0,
-            "90": 788536832.0,
-            "91": 788518912.0,
-            "92": 788521984.0,
-            "93": 788492288.0,
-            "94": 788511744.0,
-            "95": 788548608.0,
-            "96": 788568576.0,
-            "97": 788584960.0,
-            "98": 788595712.0,
-            "99": 788519936.0,
-            "100": 788575744.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 3022964224.0,
-            "2": 3177559552.0,
-            "3": 3206005248.0,
-            "4": 3206005248.0,
-            "5": 3206005248.0,
-            "6": 3206005248.0,
-            "7": 3206005248.0,
-            "8": 3206005248.0,
-            "9": 3206005248.0,
-            "10": 3206005248.0,
-            "11": 3206005248.0,
-            "12": 3206005248.0,
-            "13": 3208181248.0,
-            "14": 3208181248.0,
-            "15": 3208181248.0,
-            "16": 3208181248.0,
-            "17": 3216008192.0,
-            "18": 3216008192.0,
-            "19": 3216008192.0,
-            "20": 3216008192.0,
-            "21": 3216008192.0,
-            "22": 3238043648.0,
-            "23": 3238043648.0,
-            "24": 3238043648.0,
-            "25": 3281027072.0,
-            "26": 3281027072.0,
-            "27": 3281027072.0,
-            "28": 3281027072.0,
-            "29": 3281027072.0,
-            "30": 3281027072.0,
-            "31": 3281027072.0,
-            "32": 3281027072.0,
-            "33": 3281027072.0,
-            "34": 3281027072.0,
-            "35": 3281027072.0,
-            "36": 3281027072.0,
-            "37": 3281027072.0,
-            "38": 3281027072.0,
-            "39": 3281027072.0,
-            "40": 3281027072.0,
-            "41": 3281027072.0,
-            "42": 3281027072.0,
-            "43": 3281027072.0,
-            "44": 3281027072.0,
-            "45": 3281027072.0,
-            "46": 3281027072.0,
-            "47": 3281027072.0,
-            "48": 3281027072.0,
-            "49": 3281027072.0,
-            "50": 3281027072.0,
-            "51": 3281027072.0,
-            "52": 3281027072.0,
-            "53": 3281027072.0,
-            "54": 3281027072.0,
-            "55": 3281027072.0,
-            "56": 3281027072.0,
-            "57": 3281027072.0,
-            "58": 3281027072.0,
-            "59": 3281027072.0,
-            "60": 3281027072.0,
-            "61": 3281027072.0,
-            "62": 3281027072.0,
-            "63": 3281027072.0,
-            "64": 3281027072.0,
-            "65": 3281027072.0,
-            "66": 3281027072.0,
-            "67": 3281027072.0,
-            "68": 3281027072.0,
-            "69": 3281027072.0,
-            "70": 3281027072.0,
-            "71": 3281027072.0,
-            "72": 3281027072.0,
-            "73": 3281027072.0,
-            "74": 3281027072.0,
-            "75": 3281027072.0,
-            "76": 3281027072.0,
-            "77": 3281027072.0,
-            "78": 3281027072.0,
-            "79": 3281027072.0,
-            "80": 3281027072.0,
-            "81": 3281027072.0,
-            "82": 3281027072.0,
-            "83": 3281027072.0,
-            "84": 3281027072.0,
-            "85": 3281027072.0,
-            "86": 3281027072.0,
-            "87": 3281027072.0,
-            "88": 3281027072.0,
-            "89": 3281027072.0,
-            "90": 3281027072.0,
-            "91": 3281027072.0,
-            "92": 3281027072.0,
-            "93": 3281027072.0,
-            "94": 3281027072.0,
-            "95": 3281027072.0,
-            "96": 3281027072.0,
-            "97": 3281027072.0,
-            "98": 3281027072.0,
-            "99": 3281027072.0,
-            "100": 3281027072.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 12.96093,
-            "2": 0.20892,
-            "3": 0.18473,
-            "4": 0.18131,
-            "5": 0.18523,
-            "6": 0.15261,
-            "7": 0.15478,
-            "8": 0.15961,
-            "9": 0.14304,
-            "10": 0.14479,
-            "11": 0.14001,
-            "12": 0.14477,
-            "13": 0.13539,
-            "14": 0.14122,
-            "15": 0.12814,
-            "16": 0.1422,
-            "17": 0.14026,
-            "18": 0.1393,
-            "19": 0.13844,
-            "20": 0.14704,
-            "21": 0.13226,
-            "22": 0.12909,
-            "23": 0.13878,
-            "24": 0.13814,
-            "25": 0.13861,
-            "26": 0.14021,
-            "27": 0.15004,
-            "28": 0.14508,
-            "29": 0.15539,
-            "30": 0.14923,
-            "31": 0.15897,
-            "32": 0.14709,
-            "33": 0.15008,
-            "34": 0.14672,
-            "35": 0.15075,
-            "36": 0.15567,
-            "37": 0.14723,
-            "38": 0.15175,
-            "39": 0.14843,
-            "40": 0.15144,
-            "41": 0.14498,
-            "42": 0.15026,
-            "43": 0.15467,
-            "44": 0.14949,
-            "45": 0.14547,
-            "46": 0.16159,
-            "47": 0.14865,
-            "48": 0.13694,
-            "49": 0.1448,
-            "50": 0.14252,
-            "51": 0.1539,
-            "52": 0.14596,
-            "53": 0.14405,
-            "54": 0.13597,
-            "55": 0.13684,
-            "56": 0.1422,
-            "57": 0.14574,
-            "58": 0.15689,
-            "59": 0.14026,
-            "60": 0.15291,
-            "61": 0.14644,
-            "62": 0.14867,
-            "63": 0.14378,
-            "64": 0.14841,
-            "65": 0.13208,
-            "66": 0.13289,
-            "67": 0.13565,
-            "68": 0.13616,
-            "69": 0.1404,
-            "70": 0.15207,
-            "71": 0.12955,
-            "72": 0.13978,
-            "73": 0.13699,
-            "74": 0.13757,
-            "75": 0.13284,
-            "76": 0.12662,
-            "77": 0.13897,
-            "78": 0.13046,
-            "79": 0.13331,
-            "80": 0.13187,
-            "81": 0.13684,
-            "82": 0.12702,
-            "83": 0.13369,
-            "84": 0.14567,
-            "85": 0.13204,
-            "86": 0.12582,
-            "87": 0.12655,
-            "88": 0.13008,
-            "89": 0.12999,
-            "90": 0.13521,
-            "91": 0.12701,
-            "92": 0.13282,
-            "93": 0.12621,
-            "94": 0.12513,
-            "95": 0.12172,
-            "96": 0.12142,
-            "97": 0.13611,
-            "98": 0.12449,
-            "99": 0.12809,
-            "100": 0.12496
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 73fb00c9231..00000000000
--- a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,344 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 11.07559,
-            "2": 11.03834,
-            "3": 9.66022,
-            "4": 9.91367,
-            "5": 9.3291,
-            "6": 9.13927,
-            "7": 9.13591,
-            "8": 8.65527,
-            "9": 8.51396,
-            "10": 8.84095,
-            "11": 8.29144,
-            "12": 8.34584,
-            "13": 8.25509,
-            "14": 7.73685,
-            "15": 7.86273,
-            "16": 7.93699,
-            "17": 7.89257,
-            "18": 7.63116,
-            "19": 7.99719,
-            "20": 7.7453,
-            "21": 7.44298,
-            "22": 7.42242,
-            "23": 7.29721,
-            "24": 7.27467,
-            "25": 7.54562,
-            "26": 6.96839,
-            "27": 7.50569,
-            "28": 7.22761,
-            "29": 7.36579,
-            "30": 7.52635,
-            "31": 7.27036,
-            "32": 7.45548,
-            "33": 7.50952,
-            "34": 7.55694,
-            "35": 7.10212,
-            "36": 6.96414,
-            "37": 7.28438,
-            "38": 7.08049,
-            "39": 7.40908,
-            "40": 7.4335,
-            "41": 7.38491,
-            "42": 7.15766,
-            "43": 7.15867,
-            "44": 7.28831,
-            "45": 7.16729,
-            "46": 6.78429,
-            "47": 7.40937,
-            "48": 7.00259,
-            "49": 7.46241,
-            "50": 6.92143
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 911219392.0,
-            "2": 910960384.0,
-            "3": 911156288.0,
-            "4": 913253376.0,
-            "5": 921845056.0,
-            "6": 941436672.0,
-            "7": 993745472.0,
-            "8": 974360512.0,
-            "9": 999146112.0,
-            "10": 992706944.0,
-            "11": 991438144.0,
-            "12": 979442048.0,
-            "13": 1029190272.0,
-            "14": 1008214656.0,
-            "15": 988472000.0,
-            "16": 988861120.0,
-            "17": 979173312.0,
-            "18": 996164608.0,
-            "19": 979453440.0,
-            "20": 982914688.0,
-            "21": 975473344.0,
-            "22": 955037568.0,
-            "23": 969208128.0,
-            "24": 965840832.0,
-            "25": 953269440.0,
-            "26": 949025536.0,
-            "27": 948458304.0,
-            "28": 951741184.0,
-            "29": 943926272.0,
-            "30": 935020160.0,
-            "31": 933230336.0,
-            "32": 930086848.0,
-            "33": 922853952.0,
-            "34": 927140800.0,
-            "35": 925348224.0,
-            "36": 925295168.0,
-            "37": 922758272.0,
-            "38": 922930752.0,
-            "39": 922322880.0,
-            "40": 921856640.0,
-            "41": 920227776.0,
-            "42": 918353664.0,
-            "43": 919655616.0,
-            "44": 914948224.0,
-            "45": 916392512.0,
-            "46": 914344448.0,
-            "47": 911769536.0,
-            "48": 912013248.0,
-            "49": 910349376.0,
-            "50": 914351616.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 5498353152.0,
-            "2": 5499147776.0,
-            "3": 5499940352.0,
-            "4": 5500732928.0,
-            "5": 5501525504.0,
-            "6": 5502318080.0,
-            "7": 5503110656.0,
-            "8": 5503903232.0,
-            "9": 5497958912.0,
-            "10": 5498751488.0,
-            "11": 5499544064.0,
-            "12": 5500336640.0,
-            "13": 5501129216.0,
-            "14": 5501921792.0,
-            "15": 5502714368.0,
-            "16": 5503506944.0,
-            "17": 5504299520.0,
-            "18": 5505092096.0,
-            "19": 5505884672.0,
-            "20": 5506677248.0,
-            "21": 5507469824.0,
-            "22": 5508262400.0,
-            "23": 5509054976.0,
-            "24": 5509847552.0,
-            "25": 5510640128.0,
-            "26": 5511432704.0,
-            "27": 5512225280.0,
-            "28": 5513017856.0,
-            "29": 5513810432.0,
-            "30": 5514603008.0,
-            "31": 5515395584.0,
-            "32": 5516188160.0,
-            "33": 5516980736.0,
-            "34": 5517773312.0,
-            "35": 5518565888.0,
-            "36": 5519358464.0,
-            "37": 5520151040.0,
-            "38": 5520943616.0,
-            "39": 5521736192.0,
-            "40": 5522528768.0,
-            "41": 5523321344.0,
-            "42": 5524113920.0,
-            "43": 5524906496.0,
-            "44": 5525699072.0,
-            "45": 5526491648.0,
-            "46": 5527284224.0,
-            "47": 5528076800.0,
-            "48": 5528869376.0,
-            "49": 5529661952.0,
-            "50": 5530454528.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 41740259328.0,
-            "2": 43687292928.0,
-            "3": 43687292928.0,
-            "4": 43984064512.0,
-            "5": 43984064512.0,
-            "6": 43984064512.0,
-            "7": 43984064512.0,
-            "8": 44026380288.0,
-            "9": 44041506816.0,
-            "10": 44041506816.0,
-            "11": 44041506816.0,
-            "12": 44041506816.0,
-            "13": 44041506816.0,
-            "14": 44041506816.0,
-            "15": 44041506816.0,
-            "16": 44041506816.0,
-            "17": 44041506816.0,
-            "18": 44041506816.0,
-            "19": 44041506816.0,
-            "20": 44041506816.0,
-            "21": 44041506816.0,
-            "22": 44041506816.0,
-            "23": 44041506816.0,
-            "24": 44041506816.0,
-            "25": 44041506816.0,
-            "26": 44041506816.0,
-            "27": 44041506816.0,
-            "28": 44041506816.0,
-            "29": 44044173312.0,
-            "30": 44164231168.0,
-            "31": 44221079552.0,
-            "32": 44271415296.0,
-            "33": 44290232320.0,
-            "34": 44290232320.0,
-            "35": 44290232320.0,
-            "36": 44290232320.0,
-            "37": 44290232320.0,
-            "38": 44290232320.0,
-            "39": 44290232320.0,
-            "40": 44290232320.0,
-            "41": 44290232320.0,
-            "42": 44290232320.0,
-            "43": 44290232320.0,
-            "44": 44290232320.0,
-            "45": 44290232320.0,
-            "46": 44290232320.0,
-            "47": 44290232320.0,
-            "48": 44290232320.0,
-            "49": 44290232320.0,
-            "50": 44290232320.0
-        }
-    },
-    "mtp_1 loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 11.08623,
-            "2": 11.1047,
-            "3": 10.47999,
-            "4": 10.13471,
-            "5": 9.79045,
-            "6": 9.50607,
-            "7": 9.51139,
-            "8": 8.85331,
-            "9": 8.66688,
-            "10": 8.95867,
-            "11": 8.29318,
-            "12": 8.36986,
-            "13": 8.25545,
-            "14": 7.73323,
-            "15": 7.86639,
-            "16": 7.92438,
-            "17": 7.86276,
-            "18": 7.61004,
-            "19": 8.00261,
-            "20": 7.73004,
-            "21": 7.41636,
-            "22": 7.41466,
-            "23": 7.28656,
-            "24": 7.27882,
-            "25": 7.54458,
-            "26": 6.96533,
-            "27": 7.5053,
-            "28": 7.20603,
-            "29": 7.37687,
-            "30": 7.52783,
-            "31": 7.27097,
-            "32": 7.46043,
-            "33": 7.51419,
-            "34": 7.56879,
-            "35": 7.09276,
-            "36": 6.96019,
-            "37": 7.29843,
-            "38": 7.07417,
-            "39": 7.43338,
-            "40": 7.43134,
-            "41": 7.40946,
-            "42": 7.15527,
-            "43": 7.15684,
-            "44": 7.30429,
-            "45": 7.18917,
-            "46": 6.77286,
-            "47": 7.44985,
-            "48": 7.02383,
-            "49": 7.4572,
-            "50": 6.92645
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 89.89187,
-            "2": 2.19484,
-            "3": 3.80506,
-            "4": 1.63188,
-            "5": 2.52939,
-            "6": 2.46374,
-            "7": 1.5097,
-            "8": 1.75664,
-            "9": 1.62191,
-            "10": 1.35808,
-            "11": 1.04295,
-            "12": 1.35317,
-            "13": 1.07545,
-            "14": 1.42301,
-            "15": 1.10347,
-            "16": 1.28287,
-            "17": 1.22104,
-            "18": 1.07676,
-            "19": 1.08763,
-            "20": 1.12221,
-            "21": 1.25145,
-            "22": 1.04596,
-            "23": 1.22539,
-            "24": 1.06194,
-            "25": 1.11205,
-            "26": 1.05389,
-            "27": 1.03357,
-            "28": 1.0291,
-            "29": 1.04027,
-            "30": 1.06631,
-            "31": 1.18617,
-            "32": 1.142,
-            "33": 1.03842,
-            "34": 1.12457,
-            "35": 1.04164,
-            "36": 1.04698,
-            "37": 1.07674,
-            "38": 1.03833,
-            "39": 1.03043,
-            "40": 1.02697,
-            "41": 1.11388,
-            "42": 1.04538,
-            "43": 1.03328,
-            "44": 1.04873,
-            "45": 1.03241,
-            "46": 1.03847,
-            "47": 1.04164,
-            "48": 1.04077,
-            "49": 1.03715,
-            "50": 1.02734
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/model_config.yaml
index d9ec0456190..487382042b7 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/model_config.yaml
@@ -131,7 +131,7 @@ MODEL_ARGS:
   --overlap-moe-expert-parallel-comm: true
 TEST_TYPE: regular # Usually ckpt-resume, but as a WAR to #513 set to regular
 METRICS:
-  - "iteration-time"
+  # - "iteration-time"
   - "lm loss"
   - "num-zeros"
   - "mem-allocated-bytes"
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_dgxh100_eos.json
rename to tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 4e979e64295..00000000000
--- a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 11.04266,
-            "2": 11.02309,
-            "3": 9.43552,
-            "4": 10.04614,
-            "5": 9.38535,
-            "6": 9.14543,
-            "7": 9.21141,
-            "8": 8.63458,
-            "9": 8.48937,
-            "10": 8.82763,
-            "11": 8.29457,
-            "12": 8.3282,
-            "13": 8.23008,
-            "14": 7.71714,
-            "15": 7.86981,
-            "16": 7.92286,
-            "17": 7.8604,
-            "18": 7.62039,
-            "19": 7.98493,
-            "20": 7.72023,
-            "21": 7.39758,
-            "22": 7.39771,
-            "23": 7.28314,
-            "24": 7.25048,
-            "25": 7.53113,
-            "26": 6.95329,
-            "27": 7.49432,
-            "28": 7.20394,
-            "29": 7.37282,
-            "30": 7.50232,
-            "31": 7.25348,
-            "32": 7.4305,
-            "33": 7.48364,
-            "34": 7.53486,
-            "35": 7.10336,
-            "36": 6.94516,
-            "37": 7.26117,
-            "38": 7.07009,
-            "39": 7.40543,
-            "40": 7.42044,
-            "41": 7.34202,
-            "42": 7.11816,
-            "43": 7.11373,
-            "44": 7.27067,
-            "45": 7.07036,
-            "46": 6.77823,
-            "47": 7.1875,
-            "48": 6.99998,
-            "49": 7.45868,
-            "50": 6.90956
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 844114112.0,
-            "2": 843855104.0,
-            "3": 844048640.0,
-            "4": 842998144.0,
-            "5": 855786112.0,
-            "6": 874329728.0,
-            "7": 925591552.0,
-            "8": 915644608.0,
-            "9": 935187584.0,
-            "10": 927702400.0,
-            "11": 957888256.0,
-            "12": 923872512.0,
-            "13": 969427072.0,
-            "14": 965228416.0,
-            "15": 952825344.0,
-            "16": 943777088.0,
-            "17": 928845824.0,
-            "18": 925913856.0,
-            "19": 955339136.0,
-            "20": 989208256.0,
-            "21": 924095424.0,
-            "22": 908902272.0,
-            "23": 892664576.0,
-            "24": 900830400.0,
-            "25": 928105472.0,
-            "26": 877724352.0,
-            "27": 912808320.0,
-            "28": 904557696.0,
-            "29": 872625088.0,
-            "30": 864767104.0,
-            "31": 868220416.0,
-            "32": 861931136.0,
-            "33": 859941312.0,
-            "34": 855839104.0,
-            "35": 854046848.0,
-            "36": 852944896.0,
-            "37": 851456704.0,
-            "38": 849532096.0,
-            "39": 849972608.0,
-            "40": 849505792.0,
-            "41": 845780288.0,
-            "42": 846003328.0,
-            "43": 846257472.0,
-            "44": 852034880.0,
-            "45": 847187456.0,
-            "46": 855625856.0,
-            "47": 844661952.0,
-            "48": 851197248.0,
-            "49": 851630464.0,
-            "50": 846195904.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 4419107328.0,
-            "2": 4419108864.0,
-            "3": 4419108864.0,
-            "4": 4419108864.0,
-            "5": 4419108864.0,
-            "6": 4419108864.0,
-            "7": 4419108864.0,
-            "8": 4419108864.0,
-            "9": 4419108864.0,
-            "10": 4419108864.0,
-            "11": 4419108864.0,
-            "12": 4419108864.0,
-            "13": 4419108864.0,
-            "14": 4419108864.0,
-            "15": 4419108864.0,
-            "16": 4419108864.0,
-            "17": 4419108864.0,
-            "18": 4419108864.0,
-            "19": 4419108864.0,
-            "20": 4419108864.0,
-            "21": 4419108864.0,
-            "22": 4419108864.0,
-            "23": 4419108864.0,
-            "24": 4419108864.0,
-            "25": 4419108864.0,
-            "26": 4419108864.0,
-            "27": 4419108864.0,
-            "28": 4419108864.0,
-            "29": 4419108864.0,
-            "30": 4419108864.0,
-            "31": 4419108864.0,
-            "32": 4419108864.0,
-            "33": 4419108864.0,
-            "34": 4419108864.0,
-            "35": 4419108864.0,
-            "36": 4419108864.0,
-            "37": 4419108864.0,
-            "38": 4419108864.0,
-            "39": 4419108864.0,
-            "40": 4419108864.0,
-            "41": 4419108864.0,
-            "42": 4419108864.0,
-            "43": 4419108864.0,
-            "44": 4419108864.0,
-            "45": 4419108864.0,
-            "46": 4419108864.0,
-            "47": 4419108864.0,
-            "48": 4419108864.0,
-            "49": 4419108864.0,
-            "50": 4419108864.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 37959917568.0,
-            "2": 39578677248.0,
-            "3": 39580196864.0,
-            "4": 39580196864.0,
-            "5": 39583309824.0,
-            "6": 39583309824.0,
-            "7": 39583309824.0,
-            "8": 39583309824.0,
-            "9": 39583309824.0,
-            "10": 39583309824.0,
-            "11": 39583309824.0,
-            "12": 39583309824.0,
-            "13": 39583309824.0,
-            "14": 39583309824.0,
-            "15": 39583309824.0,
-            "16": 39583309824.0,
-            "17": 39583309824.0,
-            "18": 39583309824.0,
-            "19": 39583309824.0,
-            "20": 39583309824.0,
-            "21": 39583309824.0,
-            "22": 39583309824.0,
-            "23": 39583309824.0,
-            "24": 39583309824.0,
-            "25": 39583309824.0,
-            "26": 39583309824.0,
-            "27": 39583309824.0,
-            "28": 39583309824.0,
-            "29": 39583309824.0,
-            "30": 39583309824.0,
-            "31": 39583309824.0,
-            "32": 39583309824.0,
-            "33": 39583309824.0,
-            "34": 39583309824.0,
-            "35": 39583309824.0,
-            "36": 39583309824.0,
-            "37": 39583309824.0,
-            "38": 39583309824.0,
-            "39": 39583309824.0,
-            "40": 39583309824.0,
-            "41": 39583309824.0,
-            "42": 39583309824.0,
-            "43": 39583309824.0,
-            "44": 39583309824.0,
-            "45": 39583309824.0,
-            "46": 39583309824.0,
-            "47": 39583309824.0,
-            "48": 39583309824.0,
-            "49": 39583309824.0,
-            "50": 39583309824.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 60.48727,
-            "2": 2.0537,
-            "3": 3.26481,
-            "4": 2.56819,
-            "5": 2.40218,
-            "6": 1.26492,
-            "7": 1.5836,
-            "8": 1.37182,
-            "9": 1.10133,
-            "10": 1.10352,
-            "11": 1.18687,
-            "12": 1.53724,
-            "13": 1.25166,
-            "14": 1.69801,
-            "15": 1.42166,
-            "16": 1.104,
-            "17": 1.22214,
-            "18": 1.34911,
-            "19": 1.09323,
-            "20": 1.08552,
-            "21": 1.22223,
-            "22": 1.19712,
-            "23": 1.05456,
-            "24": 1.03745,
-            "25": 1.14154,
-            "26": 1.07349,
-            "27": 1.05181,
-            "28": 1.0364,
-            "29": 1.17111,
-            "30": 1.02943,
-            "31": 1.0758,
-            "32": 1.03304,
-            "33": 1.04107,
-            "34": 1.03092,
-            "35": 1.07869,
-            "36": 1.02457,
-            "37": 1.08557,
-            "38": 1.00729,
-            "39": 1.07249,
-            "40": 1.08655,
-            "41": 1.02362,
-            "42": 1.02046,
-            "43": 1.07618,
-            "44": 1.08709,
-            "45": 1.00443,
-            "46": 1.00379,
-            "47": 1.06019,
-            "48": 0.98958,
-            "49": 1.08317,
-            "50": 0.9932
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/model_config.yaml
index f4b64722712..28ad106f522 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/model_config.yaml
@@ -127,7 +127,7 @@ MODEL_ARGS:
   --exit-interval: 50
 TEST_TYPE: regular # Usually ckpt-resume, but as a WAR to #513 set to regular
 METRICS:
-  - "iteration-time"
+  # - "iteration-time"
   - "lm loss"
   - "num-zeros"
   - "mem-allocated-bytes"
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json
index ae1c2034cde..68b72267704 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json
@@ -6,104 +6,104 @@
         "values": {
             "1": 10.81442,
             "2": 10.81882,
-            "3": 10.81531,
-            "4": 10.80285,
-            "5": 10.8513,
-            "6": 10.85015,
-            "7": 10.83865,
+            "3": 10.81551,
+            "4": 10.80292,
+            "5": 10.85144,
+            "6": 10.85011,
+            "7": 10.83867,
             "8": 10.83952,
-            "9": 10.82187,
-            "10": 10.77753,
-            "11": 10.86422,
-            "12": 10.83724,
-            "13": 10.85876,
-            "14": 10.86332,
-            "15": 10.79795,
-            "16": 10.79507,
-            "17": 10.77121,
-            "18": 10.78932,
-            "19": 10.78375,
-            "20": 10.71658,
-            "21": 10.68392,
-            "22": 10.53046,
-            "23": 10.69852,
-            "24": 10.58536,
-            "25": 10.52392,
-            "26": 10.58331,
-            "27": 10.60949,
-            "28": 10.57165,
-            "29": 10.59009,
-            "30": 10.35681,
-            "31": 10.09394,
-            "32": 10.45893,
-            "33": 10.45658,
-            "34": 10.20513,
-            "35": 10.26714,
-            "36": 10.22334,
-            "37": 10.35301,
-            "38": 10.19469,
-            "39": 10.4172,
-            "40": 10.08945,
-            "41": 10.12779,
-            "42": 10.21205,
-            "43": 9.83115,
-            "44": 9.9694,
-            "45": 9.83605,
-            "46": 9.81694,
-            "47": 10.15399,
-            "48": 9.85315,
-            "49": 9.53452,
-            "50": 9.91905,
-            "51": 9.85365,
-            "52": 9.74298,
-            "53": 10.07139,
-            "54": 9.96275,
-            "55": 9.88234,
+            "9": 10.82213,
+            "10": 10.77746,
+            "11": 10.86426,
+            "12": 10.83689,
+            "13": 10.85831,
+            "14": 10.86354,
+            "15": 10.79774,
+            "16": 10.79537,
+            "17": 10.77155,
+            "18": 10.78908,
+            "19": 10.78343,
+            "20": 10.71629,
+            "21": 10.6835,
+            "22": 10.53061,
+            "23": 10.69849,
+            "24": 10.58571,
+            "25": 10.52397,
+            "26": 10.58327,
+            "27": 10.60963,
+            "28": 10.57207,
+            "29": 10.59012,
+            "30": 10.35613,
+            "31": 10.09392,
+            "32": 10.45887,
+            "33": 10.45644,
+            "34": 10.20494,
+            "35": 10.26735,
+            "36": 10.22333,
+            "37": 10.35299,
+            "38": 10.19476,
+            "39": 10.41731,
+            "40": 10.08948,
+            "41": 10.12721,
+            "42": 10.21207,
+            "43": 9.8313,
+            "44": 9.96936,
+            "45": 9.83601,
+            "46": 9.81666,
+            "47": 10.1539,
+            "48": 9.85279,
+            "49": 9.53447,
+            "50": 9.91909,
+            "51": 9.85364,
+            "52": 9.74286,
+            "53": 10.07155,
+            "54": 9.96279,
+            "55": 9.88223,
             "56": 9.63465,
-            "57": 9.4865,
-            "58": 9.84855,
-            "59": 9.58914,
-            "60": 9.5108,
-            "61": 9.70318,
-            "62": 9.99619,
-            "63": 9.40059,
-            "64": 9.78463,
-            "65": 8.95371,
-            "66": 9.7179,
-            "67": 9.36926,
-            "68": 9.79814,
-            "69": 9.79668,
-            "70": 9.74892,
-            "71": 9.63192,
-            "72": 9.59949,
-            "73": 9.50317,
-            "74": 8.9522,
-            "75": 9.43106,
-            "76": 9.09064,
-            "77": 10.08076,
-            "78": 9.73534,
-            "79": 9.3887,
-            "80": 9.41432,
-            "81": 9.48416,
-            "82": 9.7092,
-            "83": 9.31507,
-            "84": 9.41846,
-            "85": 9.6224,
-            "86": 9.07938,
-            "87": 9.59206,
-            "88": 9.74951,
-            "89": 9.60449,
+            "57": 9.48633,
+            "58": 9.84878,
+            "59": 9.58904,
+            "60": 9.51094,
+            "61": 9.7032,
+            "62": 9.99637,
+            "63": 9.40044,
+            "64": 9.78465,
+            "65": 8.95366,
+            "66": 9.71808,
+            "67": 9.36931,
+            "68": 9.79818,
+            "69": 9.79667,
+            "70": 9.74899,
+            "71": 9.63213,
+            "72": 9.59956,
+            "73": 9.50308,
+            "74": 8.95202,
+            "75": 9.43084,
+            "76": 9.09067,
+            "77": 10.08102,
+            "78": 9.73521,
+            "79": 9.38853,
+            "80": 9.41418,
+            "81": 9.48403,
+            "82": 9.70907,
+            "83": 9.3152,
+            "84": 9.41838,
+            "85": 9.62222,
+            "86": 9.07945,
+            "87": 9.59202,
+            "88": 9.74953,
+            "89": 9.60441,
             "90": 9.82577,
-            "91": 9.34236,
-            "92": 9.35861,
-            "93": 9.07987,
-            "94": 8.82784,
-            "95": 9.50868,
-            "96": 9.52112,
-            "97": 9.30601,
-            "98": 9.66582,
-            "99": 8.87718,
-            "100": 9.38975
+            "91": 9.34232,
+            "92": 9.35837,
+            "93": 9.07969,
+            "94": 8.82793,
+            "95": 9.50864,
+            "96": 9.52117,
+            "97": 9.30605,
+            "98": 9.6658,
+            "99": 8.87716,
+            "100": 9.38997
         }
     },
     "num-zeros": {
@@ -111,106 +111,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 5476.0,
-            "2": 5726.0,
-            "3": 5820.0,
-            "4": 5738.0,
-            "5": 6334.0,
-            "6": 6609.0,
-            "7": 5986.0,
-            "8": 5915.0,
-            "9": 6387.0,
-            "10": 5090.0,
-            "11": 6596.0,
-            "12": 6165.0,
-            "13": 6559.0,
-            "14": 6568.0,
-            "15": 6041.0,
-            "16": 6363.0,
-            "17": 6226.0,
-            "18": 5986.0,
-            "19": 6413.0,
-            "20": 5738.0,
-            "21": 6248.0,
-            "22": 5765.0,
-            "23": 6895.0,
-            "24": 6096.0,
-            "25": 5736.0,
-            "26": 6113.0,
-            "27": 6495.0,
-            "28": 6754.0,
-            "29": 7066.0,
-            "30": 6254.0,
-            "31": 5809.0,
-            "32": 6893.0,
-            "33": 7278.0,
-            "34": 6486.0,
-            "35": 6750.0,
-            "36": 6625.0,
-            "37": 7510.0,
-            "38": 7131.0,
-            "39": 7741.0,
-            "40": 7222.0,
-            "41": 7096.0,
-            "42": 7656.0,
-            "43": 7205.0,
-            "44": 7138.0,
-            "45": 7019.0,
-            "46": 7235.0,
-            "47": 7542.0,
-            "48": 7734.0,
-            "49": 7610.0,
-            "50": 7710.0,
-            "51": 8076.0,
-            "52": 7867.0,
-            "53": 8874.0,
-            "54": 8747.0,
-            "55": 7601.0,
-            "56": 7891.0,
-            "57": 7603.0,
-            "58": 8731.0,
-            "59": 8257.0,
-            "60": 7964.0,
-            "61": 8450.0,
-            "62": 8632.0,
-            "63": 7806.0,
-            "64": 8923.0,
-            "65": 8276.0,
-            "66": 9208.0,
-            "67": 8240.0,
-            "68": 8439.0,
-            "69": 8765.0,
-            "70": 9578.0,
-            "71": 9145.0,
-            "72": 8894.0,
-            "73": 8946.0,
-            "74": 6930.0,
-            "75": 7952.0,
-            "76": 8482.0,
-            "77": 12156.0,
-            "78": 9554.0,
-            "79": 12899.0,
-            "80": 11642.0,
-            "81": 9977.0,
-            "82": 9786.0,
-            "83": 14238.0,
-            "84": 13757.0,
-            "85": 46448.0,
-            "86": 9803.0,
-            "87": 14740.0,
-            "88": 9790.0,
-            "89": 10097.0,
-            "90": 11246.0,
-            "91": 8938.0,
-            "92": 9088.0,
-            "93": 8203.0,
-            "94": 9445.0,
-            "95": 9762.0,
-            "96": 47617.0,
-            "97": 8875.0,
-            "98": 11078.0,
-            "99": 15373.0,
-            "100": 9275.0
+            "1": 5488.0,
+            "2": 5704.0,
+            "3": 5788.0,
+            "4": 5853.0,
+            "5": 6401.0,
+            "6": 6686.0,
+            "7": 5949.0,
+            "8": 5811.0,
+            "9": 6280.0,
+            "10": 5192.0,
+            "11": 6645.0,
+            "12": 6193.0,
+            "13": 6525.0,
+            "14": 6487.0,
+            "15": 6258.0,
+            "16": 6261.0,
+            "17": 6080.0,
+            "18": 5901.0,
+            "19": 6228.0,
+            "20": 5713.0,
+            "21": 6265.0,
+            "22": 5788.0,
+            "23": 6618.0,
+            "24": 6159.0,
+            "25": 5674.0,
+            "26": 6218.0,
+            "27": 6180.0,
+            "28": 6802.0,
+            "29": 7006.0,
+            "30": 6195.0,
+            "31": 5847.0,
+            "32": 6680.0,
+            "33": 7327.0,
+            "34": 6433.0,
+            "35": 6593.0,
+            "36": 6717.0,
+            "37": 7545.0,
+            "38": 7130.0,
+            "39": 7928.0,
+            "40": 7233.0,
+            "41": 7093.0,
+            "42": 7653.0,
+            "43": 7136.0,
+            "44": 7113.0,
+            "45": 7167.0,
+            "46": 7435.0,
+            "47": 7501.0,
+            "48": 7648.0,
+            "49": 7520.0,
+            "50": 7701.0,
+            "51": 7847.0,
+            "52": 7828.0,
+            "53": 8765.0,
+            "54": 8799.0,
+            "55": 7683.0,
+            "56": 7972.0,
+            "57": 7642.0,
+            "58": 8419.0,
+            "59": 8276.0,
+            "60": 7917.0,
+            "61": 8598.0,
+            "62": 8394.0,
+            "63": 7896.0,
+            "64": 9047.0,
+            "65": 8280.0,
+            "66": 9315.0,
+            "67": 8277.0,
+            "68": 8341.0,
+            "69": 8737.0,
+            "70": 9764.0,
+            "71": 9050.0,
+            "72": 9036.0,
+            "73": 9076.0,
+            "74": 6969.0,
+            "75": 7833.0,
+            "76": 8450.0,
+            "77": 13505.0,
+            "78": 9634.0,
+            "79": 13982.0,
+            "80": 11548.0,
+            "81": 10035.0,
+            "82": 9732.0,
+            "83": 9037.0,
+            "84": 9522.0,
+            "85": 46479.0,
+            "86": 8626.0,
+            "87": 11964.0,
+            "88": 9637.0,
+            "89": 10273.0,
+            "90": 11256.0,
+            "91": 8811.0,
+            "92": 9218.0,
+            "93": 8281.0,
+            "94": 9390.0,
+            "95": 9376.0,
+            "96": 13248.0,
+            "97": 8945.0,
+            "98": 10682.0,
+            "99": 15485.0,
+            "100": 9101.0
         }
     },
     "mem-allocated-bytes": {
@@ -328,103 +328,103 @@
             "1": 966226944.0,
             "2": 1135178752.0,
             "3": 1135178752.0,
-            "4": 1142161920.0,
-            "5": 1142161920.0,
-            "6": 1142161920.0,
-            "7": 1142161920.0,
-            "8": 1142161920.0,
-            "9": 1142161920.0,
-            "10": 1142161920.0,
-            "11": 1142161920.0,
-            "12": 1142161920.0,
-            "13": 1142161920.0,
-            "14": 1142161920.0,
-            "15": 1142161920.0,
-            "16": 1142161920.0,
-            "17": 1142161920.0,
-            "18": 1142161920.0,
-            "19": 1142161920.0,
-            "20": 1142161920.0,
-            "21": 1142161920.0,
-            "22": 1142161920.0,
-            "23": 1142161920.0,
-            "24": 1142161920.0,
-            "25": 1142161920.0,
-            "26": 1142161920.0,
-            "27": 1142161920.0,
-            "28": 1142161920.0,
-            "29": 1142161920.0,
-            "30": 1142161920.0,
-            "31": 1142161920.0,
-            "32": 1142161920.0,
-            "33": 1142161920.0,
-            "34": 1142161920.0,
-            "35": 1142161920.0,
-            "36": 1142161920.0,
-            "37": 1142161920.0,
-            "38": 1142161920.0,
-            "39": 1142161920.0,
-            "40": 1142161920.0,
-            "41": 1142161920.0,
-            "42": 1142161920.0,
-            "43": 1142161920.0,
-            "44": 1142161920.0,
-            "45": 1142161920.0,
-            "46": 1142161920.0,
-            "47": 1142161920.0,
-            "48": 1142161920.0,
-            "49": 1142161920.0,
-            "50": 1142161920.0,
-            "51": 1142161920.0,
-            "52": 1142161920.0,
-            "53": 1142161920.0,
-            "54": 1142161920.0,
-            "55": 1142161920.0,
-            "56": 1142161920.0,
-            "57": 1142161920.0,
-            "58": 1142161920.0,
-            "59": 1142161920.0,
-            "60": 1142161920.0,
-            "61": 1145419776.0,
-            "62": 1145419776.0,
-            "63": 1145419776.0,
-            "64": 1145419776.0,
-            "65": 1145419776.0,
-            "66": 1145419776.0,
-            "67": 1145419776.0,
-            "68": 1145419776.0,
-            "69": 1145419776.0,
-            "70": 1145419776.0,
-            "71": 1145419776.0,
-            "72": 1145419776.0,
-            "73": 1145419776.0,
-            "74": 1145419776.0,
-            "75": 1145419776.0,
-            "76": 1149517312.0,
-            "77": 1149517312.0,
-            "78": 1149517312.0,
-            "79": 1149517312.0,
-            "80": 1149517312.0,
-            "81": 1149517312.0,
-            "82": 1149517312.0,
-            "83": 1149517312.0,
-            "84": 1149517312.0,
-            "85": 1149517312.0,
-            "86": 1149517312.0,
-            "87": 1149517312.0,
-            "88": 1149517312.0,
-            "89": 1149517312.0,
-            "90": 1149517312.0,
-            "91": 1149517312.0,
-            "92": 1149517312.0,
-            "93": 1149517312.0,
-            "94": 1149517312.0,
-            "95": 1149517312.0,
-            "96": 1149517312.0,
-            "97": 1149517312.0,
-            "98": 1149517312.0,
-            "99": 1149517312.0,
-            "100": 1149517312.0
+            "4": 1142154752.0,
+            "5": 1142154752.0,
+            "6": 1142154752.0,
+            "7": 1142154752.0,
+            "8": 1142154752.0,
+            "9": 1142154752.0,
+            "10": 1142154752.0,
+            "11": 1142154752.0,
+            "12": 1142154752.0,
+            "13": 1142154752.0,
+            "14": 1142154752.0,
+            "15": 1142154752.0,
+            "16": 1142154752.0,
+            "17": 1142154752.0,
+            "18": 1142154752.0,
+            "19": 1142154752.0,
+            "20": 1142154752.0,
+            "21": 1142154752.0,
+            "22": 1142154752.0,
+            "23": 1142154752.0,
+            "24": 1142154752.0,
+            "25": 1142154752.0,
+            "26": 1142154752.0,
+            "27": 1142154752.0,
+            "28": 1142154752.0,
+            "29": 1142154752.0,
+            "30": 1142154752.0,
+            "31": 1142154752.0,
+            "32": 1142154752.0,
+            "33": 1142154752.0,
+            "34": 1142154752.0,
+            "35": 1142154752.0,
+            "36": 1142154752.0,
+            "37": 1142154752.0,
+            "38": 1142154752.0,
+            "39": 1142154752.0,
+            "40": 1142154752.0,
+            "41": 1142154752.0,
+            "42": 1142154752.0,
+            "43": 1142154752.0,
+            "44": 1142154752.0,
+            "45": 1142154752.0,
+            "46": 1142154752.0,
+            "47": 1142154752.0,
+            "48": 1142154752.0,
+            "49": 1142154752.0,
+            "50": 1142154752.0,
+            "51": 1142154752.0,
+            "52": 1142154752.0,
+            "53": 1142154752.0,
+            "54": 1142154752.0,
+            "55": 1142154752.0,
+            "56": 1142154752.0,
+            "57": 1142154752.0,
+            "58": 1142154752.0,
+            "59": 1142154752.0,
+            "60": 1142154752.0,
+            "61": 1145444352.0,
+            "62": 1145444352.0,
+            "63": 1145444352.0,
+            "64": 1145444352.0,
+            "65": 1145444352.0,
+            "66": 1145444352.0,
+            "67": 1145444352.0,
+            "68": 1145444352.0,
+            "69": 1145444352.0,
+            "70": 1145444352.0,
+            "71": 1145444352.0,
+            "72": 1145444352.0,
+            "73": 1145444352.0,
+            "74": 1145444352.0,
+            "75": 1145444352.0,
+            "76": 1149560320.0,
+            "77": 1149560320.0,
+            "78": 1149560320.0,
+            "79": 1149560320.0,
+            "80": 1149560320.0,
+            "81": 1149560320.0,
+            "82": 1149560320.0,
+            "83": 1149560320.0,
+            "84": 1149560320.0,
+            "85": 1149560320.0,
+            "86": 1149560320.0,
+            "87": 1149560320.0,
+            "88": 1149560320.0,
+            "89": 1149560320.0,
+            "90": 1149560320.0,
+            "91": 1149560320.0,
+            "92": 1149560320.0,
+            "93": 1149560320.0,
+            "94": 1149560320.0,
+            "95": 1149560320.0,
+            "96": 1149560320.0,
+            "97": 1149560320.0,
+            "98": 1149560320.0,
+            "99": 1149560320.0,
+            "100": 1149560320.0
         }
     },
     "iteration-time": {
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 20.57901,
-            "2": 0.68043,
-            "3": 0.63562,
-            "4": 0.61398,
-            "5": 0.61337,
-            "6": 0.60234,
-            "7": 0.60862,
-            "8": 0.60734,
-            "9": 0.58969,
-            "10": 0.58747,
-            "11": 0.5811,
-            "12": 0.58339,
-            "13": 0.58104,
-            "14": 0.57128,
-            "15": 0.57144,
-            "16": 0.57507,
-            "17": 0.56755,
-            "18": 0.57095,
-            "19": 0.56394,
-            "20": 0.56491,
-            "21": 0.5641,
-            "22": 0.57257,
-            "23": 0.56993,
-            "24": 0.57313,
-            "25": 0.59644,
-            "26": 0.57728,
-            "27": 0.56326,
-            "28": 0.58965,
-            "29": 0.57459,
-            "30": 0.58292,
-            "31": 0.5611,
-            "32": 0.57216,
-            "33": 0.56117,
-            "34": 0.56648,
-            "35": 0.57301,
-            "36": 0.5682,
-            "37": 0.57344,
-            "38": 0.57412,
-            "39": 0.57266,
-            "40": 0.56976,
-            "41": 0.58248,
-            "42": 0.56977,
-            "43": 0.59296,
-            "44": 0.57825,
-            "45": 0.57205,
-            "46": 0.57416,
-            "47": 0.56382,
-            "48": 0.56705,
-            "49": 0.56054,
-            "50": 0.57803,
-            "51": 0.5794,
-            "52": 0.57311,
-            "53": 0.55689,
-            "54": 0.56928,
-            "55": 0.56498,
-            "56": 0.5793,
-            "57": 0.59551,
-            "58": 0.57445,
-            "59": 0.57266,
-            "60": 0.56772,
-            "61": 0.56341,
-            "62": 0.56683,
-            "63": 0.56161,
-            "64": 0.56821,
-            "65": 0.57696,
-            "66": 0.57433,
-            "67": 0.5584,
-            "68": 0.57566,
-            "69": 0.57071,
-            "70": 0.56326,
-            "71": 0.57066,
-            "72": 0.55601,
-            "73": 0.58093,
-            "74": 0.59092,
-            "75": 0.57258,
-            "76": 0.57145,
-            "77": 0.55748,
-            "78": 0.57398,
-            "79": 0.56823,
-            "80": 0.56858,
-            "81": 0.55889,
-            "82": 0.56474,
-            "83": 0.56681,
-            "84": 0.5624,
-            "85": 0.56593,
-            "86": 0.55528,
-            "87": 0.56493,
-            "88": 0.54955,
-            "89": 0.56961,
-            "90": 0.55961,
-            "91": 0.56585,
-            "92": 0.58153,
-            "93": 0.56914,
-            "94": 0.58194,
-            "95": 0.56106,
-            "96": 0.56571,
-            "97": 0.56072,
-            "98": 0.56686,
-            "99": 0.55834,
-            "100": 0.56357
+            "1": 20.38736,
+            "2": 0.68138,
+            "3": 0.62881,
+            "4": 0.61692,
+            "5": 0.61365,
+            "6": 0.60735,
+            "7": 0.60006,
+            "8": 0.59897,
+            "9": 0.59763,
+            "10": 0.6122,
+            "11": 0.59106,
+            "12": 0.59749,
+            "13": 0.60001,
+            "14": 0.58446,
+            "15": 0.57929,
+            "16": 0.58508,
+            "17": 0.5725,
+            "18": 0.57386,
+            "19": 0.57617,
+            "20": 0.57081,
+            "21": 0.57614,
+            "22": 0.57046,
+            "23": 0.57731,
+            "24": 0.56893,
+            "25": 0.58004,
+            "26": 0.56911,
+            "27": 0.60575,
+            "28": 0.61474,
+            "29": 0.58874,
+            "30": 0.57969,
+            "31": 0.57737,
+            "32": 0.58556,
+            "33": 0.5704,
+            "34": 0.57592,
+            "35": 0.58241,
+            "36": 0.57697,
+            "37": 0.57978,
+            "38": 0.57647,
+            "39": 0.56977,
+            "40": 0.58017,
+            "41": 0.57153,
+            "42": 0.57267,
+            "43": 0.5881,
+            "44": 0.57211,
+            "45": 0.59552,
+            "46": 0.56308,
+            "47": 0.5736,
+            "48": 0.58403,
+            "49": 0.57693,
+            "50": 0.57016,
+            "51": 0.57233,
+            "52": 0.55871,
+            "53": 0.5593,
+            "54": 0.55755,
+            "55": 0.56057,
+            "56": 0.56649,
+            "57": 0.56057,
+            "58": 0.56658,
+            "59": 0.55825,
+            "60": 0.57038,
+            "61": 0.5563,
+            "62": 0.56031,
+            "63": 0.56901,
+            "64": 0.56097,
+            "65": 0.56153,
+            "66": 0.56761,
+            "67": 0.5785,
+            "68": 0.57341,
+            "69": 0.57139,
+            "70": 0.56231,
+            "71": 0.55874,
+            "72": 0.55834,
+            "73": 0.55824,
+            "74": 0.5552,
+            "75": 0.5593,
+            "76": 0.56038,
+            "77": 0.56527,
+            "78": 0.56728,
+            "79": 0.56424,
+            "80": 0.55564,
+            "81": 0.55955,
+            "82": 0.55867,
+            "83": 0.56254,
+            "84": 0.55754,
+            "85": 0.55409,
+            "86": 0.55901,
+            "87": 0.55904,
+            "88": 0.57097,
+            "89": 0.5735,
+            "90": 0.55808,
+            "91": 0.55819,
+            "92": 0.58224,
+            "93": 0.55845,
+            "94": 0.56512,
+            "95": 0.5709,
+            "96": 0.56099,
+            "97": 0.56779,
+            "98": 0.55446,
+            "99": 0.56053,
+            "100": 0.56338
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index fac0ec053dd..00000000000
--- a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.81442,
-            "2": 10.81882,
-            "3": 10.81551,
-            "4": 10.80292,
-            "5": 10.85144,
-            "6": 10.85011,
-            "7": 10.83867,
-            "8": 10.83952,
-            "9": 10.82213,
-            "10": 10.77746,
-            "11": 10.86426,
-            "12": 10.83689,
-            "13": 10.85831,
-            "14": 10.86354,
-            "15": 10.79774,
-            "16": 10.79537,
-            "17": 10.77155,
-            "18": 10.78908,
-            "19": 10.78343,
-            "20": 10.71629,
-            "21": 10.6835,
-            "22": 10.53061,
-            "23": 10.69849,
-            "24": 10.58571,
-            "25": 10.52397,
-            "26": 10.58327,
-            "27": 10.60963,
-            "28": 10.57207,
-            "29": 10.59012,
-            "30": 10.35613,
-            "31": 10.09392,
-            "32": 10.45887,
-            "33": 10.45644,
-            "34": 10.20494,
-            "35": 10.26735,
-            "36": 10.22333,
-            "37": 10.35299,
-            "38": 10.19476,
-            "39": 10.41731,
-            "40": 10.08948,
-            "41": 10.12721,
-            "42": 10.21207,
-            "43": 9.8313,
-            "44": 9.96936,
-            "45": 9.83601,
-            "46": 9.81666,
-            "47": 10.1539,
-            "48": 9.85279,
-            "49": 9.53447,
-            "50": 9.91909,
-            "51": 9.85364,
-            "52": 9.74286,
-            "53": 10.07155,
-            "54": 9.96279,
-            "55": 9.88223,
-            "56": 9.63465,
-            "57": 9.48633,
-            "58": 9.84878,
-            "59": 9.58904,
-            "60": 9.51094,
-            "61": 9.7032,
-            "62": 9.99637,
-            "63": 9.40044,
-            "64": 9.78465,
-            "65": 8.95366,
-            "66": 9.71808,
-            "67": 9.36931,
-            "68": 9.79818,
-            "69": 9.79667,
-            "70": 9.74899,
-            "71": 9.63213,
-            "72": 9.59956,
-            "73": 9.50308,
-            "74": 8.95202,
-            "75": 9.43084,
-            "76": 9.09067,
-            "77": 10.08102,
-            "78": 9.73521,
-            "79": 9.38853,
-            "80": 9.41418,
-            "81": 9.48403,
-            "82": 9.70907,
-            "83": 9.3152,
-            "84": 9.41838,
-            "85": 9.62222,
-            "86": 9.07945,
-            "87": 9.59202,
-            "88": 9.74953,
-            "89": 9.60441,
-            "90": 9.82577,
-            "91": 9.34232,
-            "92": 9.35837,
-            "93": 9.07969,
-            "94": 8.82793,
-            "95": 9.50864,
-            "96": 9.52117,
-            "97": 9.30605,
-            "98": 9.6658,
-            "99": 8.87716,
-            "100": 9.38997
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 5488.0,
-            "2": 5704.0,
-            "3": 5788.0,
-            "4": 5853.0,
-            "5": 6401.0,
-            "6": 6686.0,
-            "7": 5949.0,
-            "8": 5811.0,
-            "9": 6280.0,
-            "10": 5192.0,
-            "11": 6645.0,
-            "12": 6193.0,
-            "13": 6525.0,
-            "14": 6487.0,
-            "15": 6258.0,
-            "16": 6261.0,
-            "17": 6080.0,
-            "18": 5901.0,
-            "19": 6228.0,
-            "20": 5713.0,
-            "21": 6265.0,
-            "22": 5788.0,
-            "23": 6618.0,
-            "24": 6159.0,
-            "25": 5674.0,
-            "26": 6218.0,
-            "27": 6180.0,
-            "28": 6802.0,
-            "29": 7006.0,
-            "30": 6195.0,
-            "31": 5847.0,
-            "32": 6680.0,
-            "33": 7327.0,
-            "34": 6433.0,
-            "35": 6593.0,
-            "36": 6717.0,
-            "37": 7545.0,
-            "38": 7130.0,
-            "39": 7928.0,
-            "40": 7233.0,
-            "41": 7093.0,
-            "42": 7653.0,
-            "43": 7136.0,
-            "44": 7113.0,
-            "45": 7167.0,
-            "46": 7435.0,
-            "47": 7501.0,
-            "48": 7648.0,
-            "49": 7520.0,
-            "50": 7701.0,
-            "51": 7847.0,
-            "52": 7828.0,
-            "53": 8765.0,
-            "54": 8799.0,
-            "55": 7683.0,
-            "56": 7972.0,
-            "57": 7642.0,
-            "58": 8419.0,
-            "59": 8276.0,
-            "60": 7917.0,
-            "61": 8598.0,
-            "62": 8394.0,
-            "63": 7896.0,
-            "64": 9047.0,
-            "65": 8280.0,
-            "66": 9315.0,
-            "67": 8277.0,
-            "68": 8341.0,
-            "69": 8737.0,
-            "70": 9764.0,
-            "71": 9050.0,
-            "72": 9036.0,
-            "73": 9076.0,
-            "74": 6969.0,
-            "75": 7833.0,
-            "76": 8450.0,
-            "77": 13505.0,
-            "78": 9634.0,
-            "79": 13982.0,
-            "80": 11548.0,
-            "81": 10035.0,
-            "82": 9732.0,
-            "83": 9037.0,
-            "84": 9522.0,
-            "85": 46479.0,
-            "86": 8626.0,
-            "87": 11964.0,
-            "88": 9637.0,
-            "89": 10273.0,
-            "90": 11256.0,
-            "91": 8811.0,
-            "92": 9218.0,
-            "93": 8281.0,
-            "94": 9390.0,
-            "95": 9376.0,
-            "96": 13248.0,
-            "97": 8945.0,
-            "98": 10682.0,
-            "99": 15485.0,
-            "100": 9101.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 628059136.0,
-            "2": 628060160.0,
-            "3": 628060160.0,
-            "4": 628060160.0,
-            "5": 628060160.0,
-            "6": 628060160.0,
-            "7": 628060160.0,
-            "8": 628060160.0,
-            "9": 628060160.0,
-            "10": 628060160.0,
-            "11": 628060160.0,
-            "12": 628060160.0,
-            "13": 628060160.0,
-            "14": 628060160.0,
-            "15": 628060160.0,
-            "16": 628060160.0,
-            "17": 628060160.0,
-            "18": 628060160.0,
-            "19": 628060160.0,
-            "20": 628060160.0,
-            "21": 628060160.0,
-            "22": 628060160.0,
-            "23": 628060160.0,
-            "24": 628060160.0,
-            "25": 628060160.0,
-            "26": 628060160.0,
-            "27": 628060160.0,
-            "28": 628060160.0,
-            "29": 628060160.0,
-            "30": 628060160.0,
-            "31": 628060160.0,
-            "32": 628060160.0,
-            "33": 628060160.0,
-            "34": 628060160.0,
-            "35": 628060160.0,
-            "36": 628060160.0,
-            "37": 628060160.0,
-            "38": 628060160.0,
-            "39": 628060160.0,
-            "40": 628060160.0,
-            "41": 628060160.0,
-            "42": 628060160.0,
-            "43": 628060160.0,
-            "44": 628060160.0,
-            "45": 628060160.0,
-            "46": 628060160.0,
-            "47": 628060160.0,
-            "48": 628060160.0,
-            "49": 628060160.0,
-            "50": 628060160.0,
-            "51": 628060160.0,
-            "52": 628060160.0,
-            "53": 628060160.0,
-            "54": 628060160.0,
-            "55": 628060160.0,
-            "56": 628060160.0,
-            "57": 628060160.0,
-            "58": 628060160.0,
-            "59": 628060160.0,
-            "60": 628060160.0,
-            "61": 628060160.0,
-            "62": 628060160.0,
-            "63": 628060160.0,
-            "64": 628060160.0,
-            "65": 628060160.0,
-            "66": 628060160.0,
-            "67": 628060160.0,
-            "68": 628060160.0,
-            "69": 628060160.0,
-            "70": 628060160.0,
-            "71": 628060160.0,
-            "72": 628060160.0,
-            "73": 628060160.0,
-            "74": 628060160.0,
-            "75": 628060160.0,
-            "76": 628060160.0,
-            "77": 628060160.0,
-            "78": 628060160.0,
-            "79": 628060160.0,
-            "80": 628060160.0,
-            "81": 628060160.0,
-            "82": 628060160.0,
-            "83": 628060160.0,
-            "84": 628060160.0,
-            "85": 628060160.0,
-            "86": 628060160.0,
-            "87": 628060160.0,
-            "88": 628060160.0,
-            "89": 628060160.0,
-            "90": 628060160.0,
-            "91": 628060160.0,
-            "92": 628060160.0,
-            "93": 628060160.0,
-            "94": 628060160.0,
-            "95": 628060160.0,
-            "96": 628060160.0,
-            "97": 628060160.0,
-            "98": 628060160.0,
-            "99": 628060160.0,
-            "100": 628060160.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 966226944.0,
-            "2": 1135178752.0,
-            "3": 1135178752.0,
-            "4": 1142154752.0,
-            "5": 1142154752.0,
-            "6": 1142154752.0,
-            "7": 1142154752.0,
-            "8": 1142154752.0,
-            "9": 1142154752.0,
-            "10": 1142154752.0,
-            "11": 1142154752.0,
-            "12": 1142154752.0,
-            "13": 1142154752.0,
-            "14": 1142154752.0,
-            "15": 1142154752.0,
-            "16": 1142154752.0,
-            "17": 1142154752.0,
-            "18": 1142154752.0,
-            "19": 1142154752.0,
-            "20": 1142154752.0,
-            "21": 1142154752.0,
-            "22": 1142154752.0,
-            "23": 1142154752.0,
-            "24": 1142154752.0,
-            "25": 1142154752.0,
-            "26": 1142154752.0,
-            "27": 1142154752.0,
-            "28": 1142154752.0,
-            "29": 1142154752.0,
-            "30": 1142154752.0,
-            "31": 1142154752.0,
-            "32": 1142154752.0,
-            "33": 1142154752.0,
-            "34": 1142154752.0,
-            "35": 1142154752.0,
-            "36": 1142154752.0,
-            "37": 1142154752.0,
-            "38": 1142154752.0,
-            "39": 1142154752.0,
-            "40": 1142154752.0,
-            "41": 1142154752.0,
-            "42": 1142154752.0,
-            "43": 1142154752.0,
-            "44": 1142154752.0,
-            "45": 1142154752.0,
-            "46": 1142154752.0,
-            "47": 1142154752.0,
-            "48": 1142154752.0,
-            "49": 1142154752.0,
-            "50": 1142154752.0,
-            "51": 1142154752.0,
-            "52": 1142154752.0,
-            "53": 1142154752.0,
-            "54": 1142154752.0,
-            "55": 1142154752.0,
-            "56": 1142154752.0,
-            "57": 1142154752.0,
-            "58": 1142154752.0,
-            "59": 1142154752.0,
-            "60": 1142154752.0,
-            "61": 1145444352.0,
-            "62": 1145444352.0,
-            "63": 1145444352.0,
-            "64": 1145444352.0,
-            "65": 1145444352.0,
-            "66": 1145444352.0,
-            "67": 1145444352.0,
-            "68": 1145444352.0,
-            "69": 1145444352.0,
-            "70": 1145444352.0,
-            "71": 1145444352.0,
-            "72": 1145444352.0,
-            "73": 1145444352.0,
-            "74": 1145444352.0,
-            "75": 1145444352.0,
-            "76": 1149560320.0,
-            "77": 1149560320.0,
-            "78": 1149560320.0,
-            "79": 1149560320.0,
-            "80": 1149560320.0,
-            "81": 1149560320.0,
-            "82": 1149560320.0,
-            "83": 1149560320.0,
-            "84": 1149560320.0,
-            "85": 1149560320.0,
-            "86": 1149560320.0,
-            "87": 1149560320.0,
-            "88": 1149560320.0,
-            "89": 1149560320.0,
-            "90": 1149560320.0,
-            "91": 1149560320.0,
-            "92": 1149560320.0,
-            "93": 1149560320.0,
-            "94": 1149560320.0,
-            "95": 1149560320.0,
-            "96": 1149560320.0,
-            "97": 1149560320.0,
-            "98": 1149560320.0,
-            "99": 1149560320.0,
-            "100": 1149560320.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 22.49159,
-            "2": 0.64465,
-            "3": 0.55144,
-            "4": 0.54612,
-            "5": 0.54224,
-            "6": 0.53272,
-            "7": 0.53156,
-            "8": 0.52769,
-            "9": 0.51643,
-            "10": 0.51904,
-            "11": 0.51365,
-            "12": 0.51064,
-            "13": 0.5046,
-            "14": 0.50595,
-            "15": 0.49656,
-            "16": 0.51295,
-            "17": 0.49558,
-            "18": 0.50544,
-            "19": 0.49807,
-            "20": 0.50213,
-            "21": 0.50583,
-            "22": 0.52086,
-            "23": 0.51086,
-            "24": 0.50937,
-            "25": 0.5124,
-            "26": 0.51291,
-            "27": 0.52068,
-            "28": 0.54211,
-            "29": 0.52886,
-            "30": 0.52175,
-            "31": 0.51586,
-            "32": 0.5142,
-            "33": 0.49143,
-            "34": 0.49103,
-            "35": 0.49405,
-            "36": 0.49048,
-            "37": 0.48575,
-            "38": 0.49941,
-            "39": 0.50795,
-            "40": 0.51375,
-            "41": 0.49293,
-            "42": 0.48855,
-            "43": 0.5029,
-            "44": 0.49021,
-            "45": 0.50044,
-            "46": 0.4959,
-            "47": 0.49439,
-            "48": 0.48796,
-            "49": 0.48244,
-            "50": 0.50689,
-            "51": 0.53388,
-            "52": 0.49313,
-            "53": 0.50127,
-            "54": 0.50696,
-            "55": 0.50505,
-            "56": 0.50751,
-            "57": 0.50921,
-            "58": 0.49608,
-            "59": 0.49342,
-            "60": 0.49604,
-            "61": 0.49149,
-            "62": 0.48784,
-            "63": 0.48712,
-            "64": 0.48464,
-            "65": 0.51125,
-            "66": 0.48673,
-            "67": 0.48738,
-            "68": 0.48812,
-            "69": 0.4924,
-            "70": 0.48944,
-            "71": 0.48906,
-            "72": 0.48542,
-            "73": 0.50073,
-            "74": 0.49165,
-            "75": 0.48855,
-            "76": 0.49114,
-            "77": 0.49358,
-            "78": 0.48743,
-            "79": 0.49072,
-            "80": 0.48515,
-            "81": 0.48089,
-            "82": 0.48965,
-            "83": 0.49061,
-            "84": 0.48204,
-            "85": 0.46988,
-            "86": 0.49418,
-            "87": 0.48287,
-            "88": 0.47854,
-            "89": 0.48256,
-            "90": 0.48294,
-            "91": 0.4982,
-            "92": 0.48423,
-            "93": 0.47976,
-            "94": 0.48336,
-            "95": 0.47914,
-            "96": 0.71379,
-            "97": 1.04054,
-            "98": 3.57564,
-            "99": 4.591,
-            "100": 0.98086
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index 68b72267704..00000000000
--- a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.81442,
-            "2": 10.81882,
-            "3": 10.81551,
-            "4": 10.80292,
-            "5": 10.85144,
-            "6": 10.85011,
-            "7": 10.83867,
-            "8": 10.83952,
-            "9": 10.82213,
-            "10": 10.77746,
-            "11": 10.86426,
-            "12": 10.83689,
-            "13": 10.85831,
-            "14": 10.86354,
-            "15": 10.79774,
-            "16": 10.79537,
-            "17": 10.77155,
-            "18": 10.78908,
-            "19": 10.78343,
-            "20": 10.71629,
-            "21": 10.6835,
-            "22": 10.53061,
-            "23": 10.69849,
-            "24": 10.58571,
-            "25": 10.52397,
-            "26": 10.58327,
-            "27": 10.60963,
-            "28": 10.57207,
-            "29": 10.59012,
-            "30": 10.35613,
-            "31": 10.09392,
-            "32": 10.45887,
-            "33": 10.45644,
-            "34": 10.20494,
-            "35": 10.26735,
-            "36": 10.22333,
-            "37": 10.35299,
-            "38": 10.19476,
-            "39": 10.41731,
-            "40": 10.08948,
-            "41": 10.12721,
-            "42": 10.21207,
-            "43": 9.8313,
-            "44": 9.96936,
-            "45": 9.83601,
-            "46": 9.81666,
-            "47": 10.1539,
-            "48": 9.85279,
-            "49": 9.53447,
-            "50": 9.91909,
-            "51": 9.85364,
-            "52": 9.74286,
-            "53": 10.07155,
-            "54": 9.96279,
-            "55": 9.88223,
-            "56": 9.63465,
-            "57": 9.48633,
-            "58": 9.84878,
-            "59": 9.58904,
-            "60": 9.51094,
-            "61": 9.7032,
-            "62": 9.99637,
-            "63": 9.40044,
-            "64": 9.78465,
-            "65": 8.95366,
-            "66": 9.71808,
-            "67": 9.36931,
-            "68": 9.79818,
-            "69": 9.79667,
-            "70": 9.74899,
-            "71": 9.63213,
-            "72": 9.59956,
-            "73": 9.50308,
-            "74": 8.95202,
-            "75": 9.43084,
-            "76": 9.09067,
-            "77": 10.08102,
-            "78": 9.73521,
-            "79": 9.38853,
-            "80": 9.41418,
-            "81": 9.48403,
-            "82": 9.70907,
-            "83": 9.3152,
-            "84": 9.41838,
-            "85": 9.62222,
-            "86": 9.07945,
-            "87": 9.59202,
-            "88": 9.74953,
-            "89": 9.60441,
-            "90": 9.82577,
-            "91": 9.34232,
-            "92": 9.35837,
-            "93": 9.07969,
-            "94": 8.82793,
-            "95": 9.50864,
-            "96": 9.52117,
-            "97": 9.30605,
-            "98": 9.6658,
-            "99": 8.87716,
-            "100": 9.38997
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 5488.0,
-            "2": 5704.0,
-            "3": 5788.0,
-            "4": 5853.0,
-            "5": 6401.0,
-            "6": 6686.0,
-            "7": 5949.0,
-            "8": 5811.0,
-            "9": 6280.0,
-            "10": 5192.0,
-            "11": 6645.0,
-            "12": 6193.0,
-            "13": 6525.0,
-            "14": 6487.0,
-            "15": 6258.0,
-            "16": 6261.0,
-            "17": 6080.0,
-            "18": 5901.0,
-            "19": 6228.0,
-            "20": 5713.0,
-            "21": 6265.0,
-            "22": 5788.0,
-            "23": 6618.0,
-            "24": 6159.0,
-            "25": 5674.0,
-            "26": 6218.0,
-            "27": 6180.0,
-            "28": 6802.0,
-            "29": 7006.0,
-            "30": 6195.0,
-            "31": 5847.0,
-            "32": 6680.0,
-            "33": 7327.0,
-            "34": 6433.0,
-            "35": 6593.0,
-            "36": 6717.0,
-            "37": 7545.0,
-            "38": 7130.0,
-            "39": 7928.0,
-            "40": 7233.0,
-            "41": 7093.0,
-            "42": 7653.0,
-            "43": 7136.0,
-            "44": 7113.0,
-            "45": 7167.0,
-            "46": 7435.0,
-            "47": 7501.0,
-            "48": 7648.0,
-            "49": 7520.0,
-            "50": 7701.0,
-            "51": 7847.0,
-            "52": 7828.0,
-            "53": 8765.0,
-            "54": 8799.0,
-            "55": 7683.0,
-            "56": 7972.0,
-            "57": 7642.0,
-            "58": 8419.0,
-            "59": 8276.0,
-            "60": 7917.0,
-            "61": 8598.0,
-            "62": 8394.0,
-            "63": 7896.0,
-            "64": 9047.0,
-            "65": 8280.0,
-            "66": 9315.0,
-            "67": 8277.0,
-            "68": 8341.0,
-            "69": 8737.0,
-            "70": 9764.0,
-            "71": 9050.0,
-            "72": 9036.0,
-            "73": 9076.0,
-            "74": 6969.0,
-            "75": 7833.0,
-            "76": 8450.0,
-            "77": 13505.0,
-            "78": 9634.0,
-            "79": 13982.0,
-            "80": 11548.0,
-            "81": 10035.0,
-            "82": 9732.0,
-            "83": 9037.0,
-            "84": 9522.0,
-            "85": 46479.0,
-            "86": 8626.0,
-            "87": 11964.0,
-            "88": 9637.0,
-            "89": 10273.0,
-            "90": 11256.0,
-            "91": 8811.0,
-            "92": 9218.0,
-            "93": 8281.0,
-            "94": 9390.0,
-            "95": 9376.0,
-            "96": 13248.0,
-            "97": 8945.0,
-            "98": 10682.0,
-            "99": 15485.0,
-            "100": 9101.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 628059136.0,
-            "2": 628060160.0,
-            "3": 628060160.0,
-            "4": 628060160.0,
-            "5": 628060160.0,
-            "6": 628060160.0,
-            "7": 628060160.0,
-            "8": 628060160.0,
-            "9": 628060160.0,
-            "10": 628060160.0,
-            "11": 628060160.0,
-            "12": 628060160.0,
-            "13": 628060160.0,
-            "14": 628060160.0,
-            "15": 628060160.0,
-            "16": 628060160.0,
-            "17": 628060160.0,
-            "18": 628060160.0,
-            "19": 628060160.0,
-            "20": 628060160.0,
-            "21": 628060160.0,
-            "22": 628060160.0,
-            "23": 628060160.0,
-            "24": 628060160.0,
-            "25": 628060160.0,
-            "26": 628060160.0,
-            "27": 628060160.0,
-            "28": 628060160.0,
-            "29": 628060160.0,
-            "30": 628060160.0,
-            "31": 628060160.0,
-            "32": 628060160.0,
-            "33": 628060160.0,
-            "34": 628060160.0,
-            "35": 628060160.0,
-            "36": 628060160.0,
-            "37": 628060160.0,
-            "38": 628060160.0,
-            "39": 628060160.0,
-            "40": 628060160.0,
-            "41": 628060160.0,
-            "42": 628060160.0,
-            "43": 628060160.0,
-            "44": 628060160.0,
-            "45": 628060160.0,
-            "46": 628060160.0,
-            "47": 628060160.0,
-            "48": 628060160.0,
-            "49": 628060160.0,
-            "50": 628060160.0,
-            "51": 628060160.0,
-            "52": 628060160.0,
-            "53": 628060160.0,
-            "54": 628060160.0,
-            "55": 628060160.0,
-            "56": 628060160.0,
-            "57": 628060160.0,
-            "58": 628060160.0,
-            "59": 628060160.0,
-            "60": 628060160.0,
-            "61": 628060160.0,
-            "62": 628060160.0,
-            "63": 628060160.0,
-            "64": 628060160.0,
-            "65": 628060160.0,
-            "66": 628060160.0,
-            "67": 628060160.0,
-            "68": 628060160.0,
-            "69": 628060160.0,
-            "70": 628060160.0,
-            "71": 628060160.0,
-            "72": 628060160.0,
-            "73": 628060160.0,
-            "74": 628060160.0,
-            "75": 628060160.0,
-            "76": 628060160.0,
-            "77": 628060160.0,
-            "78": 628060160.0,
-            "79": 628060160.0,
-            "80": 628060160.0,
-            "81": 628060160.0,
-            "82": 628060160.0,
-            "83": 628060160.0,
-            "84": 628060160.0,
-            "85": 628060160.0,
-            "86": 628060160.0,
-            "87": 628060160.0,
-            "88": 628060160.0,
-            "89": 628060160.0,
-            "90": 628060160.0,
-            "91": 628060160.0,
-            "92": 628060160.0,
-            "93": 628060160.0,
-            "94": 628060160.0,
-            "95": 628060160.0,
-            "96": 628060160.0,
-            "97": 628060160.0,
-            "98": 628060160.0,
-            "99": 628060160.0,
-            "100": 628060160.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 966226944.0,
-            "2": 1135178752.0,
-            "3": 1135178752.0,
-            "4": 1142154752.0,
-            "5": 1142154752.0,
-            "6": 1142154752.0,
-            "7": 1142154752.0,
-            "8": 1142154752.0,
-            "9": 1142154752.0,
-            "10": 1142154752.0,
-            "11": 1142154752.0,
-            "12": 1142154752.0,
-            "13": 1142154752.0,
-            "14": 1142154752.0,
-            "15": 1142154752.0,
-            "16": 1142154752.0,
-            "17": 1142154752.0,
-            "18": 1142154752.0,
-            "19": 1142154752.0,
-            "20": 1142154752.0,
-            "21": 1142154752.0,
-            "22": 1142154752.0,
-            "23": 1142154752.0,
-            "24": 1142154752.0,
-            "25": 1142154752.0,
-            "26": 1142154752.0,
-            "27": 1142154752.0,
-            "28": 1142154752.0,
-            "29": 1142154752.0,
-            "30": 1142154752.0,
-            "31": 1142154752.0,
-            "32": 1142154752.0,
-            "33": 1142154752.0,
-            "34": 1142154752.0,
-            "35": 1142154752.0,
-            "36": 1142154752.0,
-            "37": 1142154752.0,
-            "38": 1142154752.0,
-            "39": 1142154752.0,
-            "40": 1142154752.0,
-            "41": 1142154752.0,
-            "42": 1142154752.0,
-            "43": 1142154752.0,
-            "44": 1142154752.0,
-            "45": 1142154752.0,
-            "46": 1142154752.0,
-            "47": 1142154752.0,
-            "48": 1142154752.0,
-            "49": 1142154752.0,
-            "50": 1142154752.0,
-            "51": 1142154752.0,
-            "52": 1142154752.0,
-            "53": 1142154752.0,
-            "54": 1142154752.0,
-            "55": 1142154752.0,
-            "56": 1142154752.0,
-            "57": 1142154752.0,
-            "58": 1142154752.0,
-            "59": 1142154752.0,
-            "60": 1142154752.0,
-            "61": 1145444352.0,
-            "62": 1145444352.0,
-            "63": 1145444352.0,
-            "64": 1145444352.0,
-            "65": 1145444352.0,
-            "66": 1145444352.0,
-            "67": 1145444352.0,
-            "68": 1145444352.0,
-            "69": 1145444352.0,
-            "70": 1145444352.0,
-            "71": 1145444352.0,
-            "72": 1145444352.0,
-            "73": 1145444352.0,
-            "74": 1145444352.0,
-            "75": 1145444352.0,
-            "76": 1149560320.0,
-            "77": 1149560320.0,
-            "78": 1149560320.0,
-            "79": 1149560320.0,
-            "80": 1149560320.0,
-            "81": 1149560320.0,
-            "82": 1149560320.0,
-            "83": 1149560320.0,
-            "84": 1149560320.0,
-            "85": 1149560320.0,
-            "86": 1149560320.0,
-            "87": 1149560320.0,
-            "88": 1149560320.0,
-            "89": 1149560320.0,
-            "90": 1149560320.0,
-            "91": 1149560320.0,
-            "92": 1149560320.0,
-            "93": 1149560320.0,
-            "94": 1149560320.0,
-            "95": 1149560320.0,
-            "96": 1149560320.0,
-            "97": 1149560320.0,
-            "98": 1149560320.0,
-            "99": 1149560320.0,
-            "100": 1149560320.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 20.38736,
-            "2": 0.68138,
-            "3": 0.62881,
-            "4": 0.61692,
-            "5": 0.61365,
-            "6": 0.60735,
-            "7": 0.60006,
-            "8": 0.59897,
-            "9": 0.59763,
-            "10": 0.6122,
-            "11": 0.59106,
-            "12": 0.59749,
-            "13": 0.60001,
-            "14": 0.58446,
-            "15": 0.57929,
-            "16": 0.58508,
-            "17": 0.5725,
-            "18": 0.57386,
-            "19": 0.57617,
-            "20": 0.57081,
-            "21": 0.57614,
-            "22": 0.57046,
-            "23": 0.57731,
-            "24": 0.56893,
-            "25": 0.58004,
-            "26": 0.56911,
-            "27": 0.60575,
-            "28": 0.61474,
-            "29": 0.58874,
-            "30": 0.57969,
-            "31": 0.57737,
-            "32": 0.58556,
-            "33": 0.5704,
-            "34": 0.57592,
-            "35": 0.58241,
-            "36": 0.57697,
-            "37": 0.57978,
-            "38": 0.57647,
-            "39": 0.56977,
-            "40": 0.58017,
-            "41": 0.57153,
-            "42": 0.57267,
-            "43": 0.5881,
-            "44": 0.57211,
-            "45": 0.59552,
-            "46": 0.56308,
-            "47": 0.5736,
-            "48": 0.58403,
-            "49": 0.57693,
-            "50": 0.57016,
-            "51": 0.57233,
-            "52": 0.55871,
-            "53": 0.5593,
-            "54": 0.55755,
-            "55": 0.56057,
-            "56": 0.56649,
-            "57": 0.56057,
-            "58": 0.56658,
-            "59": 0.55825,
-            "60": 0.57038,
-            "61": 0.5563,
-            "62": 0.56031,
-            "63": 0.56901,
-            "64": 0.56097,
-            "65": 0.56153,
-            "66": 0.56761,
-            "67": 0.5785,
-            "68": 0.57341,
-            "69": 0.57139,
-            "70": 0.56231,
-            "71": 0.55874,
-            "72": 0.55834,
-            "73": 0.55824,
-            "74": 0.5552,
-            "75": 0.5593,
-            "76": 0.56038,
-            "77": 0.56527,
-            "78": 0.56728,
-            "79": 0.56424,
-            "80": 0.55564,
-            "81": 0.55955,
-            "82": 0.55867,
-            "83": 0.56254,
-            "84": 0.55754,
-            "85": 0.55409,
-            "86": 0.55901,
-            "87": 0.55904,
-            "88": 0.57097,
-            "89": 0.5735,
-            "90": 0.55808,
-            "91": 0.55819,
-            "92": 0.58224,
-            "93": 0.55845,
-            "94": 0.56512,
-            "95": 0.5709,
-            "96": 0.56099,
-            "97": 0.56779,
-            "98": 0.55446,
-            "99": 0.56053,
-            "100": 0.56338
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_scoped_cudagraph/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_scoped_cudagraph/model_config.yaml
index ef2b76069a1..f0d1cc0afd3 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_scoped_cudagraph/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_scoped_cudagraph/model_config.yaml
@@ -88,7 +88,7 @@ MODEL_ARGS:
   --ckpt-assume-constant-structure: true
 TEST_TYPE: ckpt-resume
 METRICS:
-  - "iteration-time"
+  # - "iteration-time"
   - "lm loss"
   - "num-zeros"
   - "mem-allocated-bytes"
diff --git a/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt_dynamic_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/moe/gpt_dynamic_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index d434004007f..00000000000
--- a/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,178 +0,0 @@
-{
-  "0": {
-    "input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.",
-    "generated_text": " Wait for the moment when the music stops, and the lights come up, and the DJ says, \"I'm going to play a song for you",
-    "generated_tokens": [
-      32844,
-      1394,
-      1278,
-      4735,
-      2200,
-      1278,
-      7146,
-      30774,
-      1044,
-      1321,
-      1278,
-      26466,
-      3930,
-      2015,
-      1044,
-      1321,
-      1278,
-      30245,
-      8223,
-      1044,
-      1429,
-      1073,
-      4525,
-      4670,
-      1317,
-      3354,
-      1261,
-      6947,
-      1394,
-      1636
-    ],
-    "latency": 2.0423662662506104,
-    "cuda_graph_request_count_map": {
-      "6584": 0,
-      "6240": 0,
-      "5824": 0,
-      "5408": 0,
-      "4992": 0,
-      "4576": 0,
-      "4160": 0,
-      "3744": 0,
-      "3328": 0,
-      "2912": 0,
-      "2496": 0,
-      "2080": 0,
-      "1664": 0,
-      "1248": 0,
-      "832": 0,
-      "416": 29
-    },
-    "step_count": 30,
-    "logprobs": [
-      -10.737512588500977,
-      -3.724862575531006,
-      -2.833397388458252,
-      -1.2464861869812012,
-      -0.2549239993095398,
-      -1.7607988119125366,
-      -2.419379711151123,
-      -1.9533929824829102,
-      -2.1014301776885986,
-      -6.169030666351318,
-      -0.8734959363937378,
-      -2.4733574390411377,
-      -3.4822516441345215,
-      -4.180896759033203,
-      -1.9767613410949707,
-      -1.8347630500793457,
-      -2.2581257820129395,
-      -7.180149078369141,
-      -0.0453881211578846,
-      -1.9841610193252563,
-      -5.015386581420898,
-      -8.827117919921875,
-      -9.885746002197266,
-      -0.8498678207397461,
-      -4.770059585571289,
-      -0.855280339717865,
-      -2.2494924068450928,
-      -0.017164958640933037,
-      -0.03715415671467781,
-      -3.4830124378204346,
-      -8.635110855102539,
-      -1.2520610094070435,
-      -6.604944705963135,
-      -3.873375177383423,
-      -3.6618807315826416,
-      -4.184391975402832,
-      -2.2140231132507324,
-      -1.0711486339569092,
-      -0.23651468753814697,
-      -0.8028834462165833,
-      -4.772289276123047,
-      -9.09329605102539,
-      -0.013350379653275013,
-      -3.204096555709839,
-      -1.286576271057129,
-      -3.966451644897461,
-      -0.784888744354248,
-      -0.002081372309476137,
-      -2.9676947593688965,
-      -10.666797637939453,
-      -3.026693105697632,
-      -1.1712640523910522,
-      -4.93634033203125,
-      -0.2349349856376648,
-      -0.06406460702419281,
-      -1.3560465574264526,
-      -2.1993725299835205,
-      -4.378607749938965,
-      -0.36503157019615173,
-      -4.032698631286621,
-      -0.3783165216445923,
-      -0.14568912982940674,
-      -2.7145652770996094,
-      -10.591975212097168,
-      -0.043545372784137726,
-      -3.484504461288452,
-      -0.9063424468040466,
-      -4.716145992279053,
-      -0.263183057308197,
-      -2.654480457305908,
-      -0.8381667137145996,
-      -1.677478551864624,
-      -5.816836357116699,
-      -17.05772590637207,
-      -2.634756088256836,
-      -0.1400006264448166,
-      -7.400341987609863,
-      -1.0960910320281982,
-      -2.1047825813293457,
-      -1.5999900102615356,
-      -0.26269301772117615,
-      -5.727797508239746,
-      -0.006663957145065069,
-      -7.748298168182373,
-      -2.6529595851898193,
-      -3.0231595039367676,
-      -2.9622015953063965,
-      -2.3360800743103027,
-      -0.3991503119468689,
-      -1.5159229040145874,
-      -2.289414882659912,
-      -0.6100144386291504,
-      -1.3164187669754028,
-      -1.9431946277618408,
-      -1.7792527675628662,
-      -0.8328706622123718,
-      -0.501052737236023,
-      -1.278053879737854,
-      -1.5683506727218628,
-      -0.9720054864883423,
-      -0.40760406851768494,
-      -0.43419456481933594,
-      -0.04328203946352005,
-      -1.2999448776245117,
-      -2.1266980171203613,
-      -2.6690115928649902,
-      -0.7812177538871765,
-      -0.41717368364334106,
-      -2.8806936740875244,
-      -1.5312169790267944,
-      -1.62917160987854,
-      -0.05274559557437897,
-      -1.362119436264038,
-      -1.337896704673767,
-      -1.2551532983779907,
-      -1.256169080734253,
-      -0.49199968576431274
-    ]
-  },
-  "throughput": 25.35687538450034
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgx_h100.json
index 8149213c714..711eeddfb25 100644
--- a/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgx_h100.json
@@ -34,7 +34,7 @@
    1394,
    1636
   ],
-  "latency": 40.872185468673706,
+  "latency": 37.72101545333862,
   "cuda_graph_request_count_map": null,
   "step_count": 30,
   "logprobs": [
@@ -158,4 +158,4 @@
   ]
  },
  "throughput": 6.543502517233578
-}
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 98fc5c3c4b5..00000000000
--- a/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,161 +0,0 @@
-{
- "0": {
-  "input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.",
-  "generated_text": " Wait for the moment when the music stops, and the lights come up, and the DJ says, \"I'm going to play a song for you",
-  "generated_tokens": [
-   32844,
-   1394,
-   1278,
-   4735,
-   2200,
-   1278,
-   7146,
-   30774,
-   1044,
-   1321,
-   1278,
-   26466,
-   3930,
-   2015,
-   1044,
-   1321,
-   1278,
-   30245,
-   8223,
-   1044,
-   1429,
-   1073,
-   4525,
-   4670,
-   1317,
-   3354,
-   1261,
-   6947,
-   1394,
-   1636
-  ],
-  "latency": 33.938279151916504,
-  "cuda_graph_request_count_map": null,
-  "step_count": 30,
-  "logprobs": [
-   -10.737512588500977,
-   -3.724862575531006,
-   -2.833397388458252,
-   -1.2464861869812012,
-   -0.2549239993095398,
-   -1.7607988119125366,
-   -2.419379711151123,
-   -1.9533929824829102,
-   -2.1014301776885986,
-   -6.169030666351318,
-   -0.8734959363937378,
-   -2.4733574390411377,
-   -3.4822516441345215,
-   -4.180896759033203,
-   -1.9767613410949707,
-   -1.8347630500793457,
-   -2.2581257820129395,
-   -7.180149078369141,
-   -0.0453881211578846,
-   -1.9841610193252563,
-   -5.015386581420898,
-   -8.827117919921875,
-   -9.885746002197266,
-   -0.8498678207397461,
-   -4.770059585571289,
-   -0.855280339717865,
-   -2.2494924068450928,
-   -0.017164958640933037,
-   -0.03715415671467781,
-   -3.4830124378204346,
-   -8.635110855102539,
-   -1.2520610094070435,
-   -6.62324857711792,
-   -3.639960765838623,
-   -3.664339542388916,
-   -4.182392597198486,
-   -2.1796066761016846,
-   -1.0725229978561401,
-   -0.26311880350112915,
-   -0.8036076426506042,
-   -4.6958818435668945,
-   -9.042495727539062,
-   -0.013647346757352352,
-   -3.1747794151306152,
-   -1.322129487991333,
-   -3.949110746383667,
-   -0.7829495072364807,
-   -0.002083513652905822,
-   -2.970266580581665,
-   -10.56244945526123,
-   -3.2369167804718018,
-   -1.1530492305755615,
-   -4.917466163635254,
-   -0.21241025626659393,
-   -0.06490474194288254,
-   -1.372581124305725,
-   -2.224682092666626,
-   -4.3847503662109375,
-   -0.36867555975914,
-   -4.035493850708008,
-   -0.39869019389152527,
-   -0.14373983442783356,
-   -2.716118812561035,
-   -10.687016487121582,
-   -0.04773370549082756,
-   -3.398231267929077,
-   -0.8646175265312195,
-   -4.74052619934082,
-   -0.23649944365024567,
-   -2.6610701084136963,
-   -0.8428961634635925,
-   -1.614527940750122,
-   -5.793307781219482,
-   -16.929147720336914,
-   -2.6586406230926514,
-   -0.1385982781648636,
-   -7.435610771179199,
-   -1.0483647584915161,
-   -2.1261863708496094,
-   -1.5261307954788208,
-   -0.27082547545433044,
-   -5.859070777893066,
-   -0.00648513063788414,
-   -7.732051849365234,
-   -2.712515354156494,
-   -2.9137418270111084,
-   -3.041210651397705,
-   -2.3559694290161133,
-   -0.3973437249660492,
-   -1.4338903427124023,
-   -2.2967660427093506,
-   -0.6096595525741577,
-   -1.3119444847106934,
-   -1.93257474899292,
-   -1.726539134979248,
-   -0.8397530317306519,
-   -0.5014236569404602,
-   -1.2989763021469116,
-   -1.5857150554656982,
-   -1.096572995185852,
-   -0.4009067416191101,
-   -0.43302634358406067,
-   -0.041601795703172684,
-   -1.285712718963623,
-   -2.214778184890747,
-   -2.6971933841705322,
-   -0.8101387619972229,
-   -0.43101266026496887,
-   -2.808060884475708,
-   -1.5226430892944336,
-   -1.6209226846694946,
-   -0.048716772347688675,
-   -1.3497682809829712,
-   -1.343377947807312,
-   -1.2755295038223267,
-   -1.2342015504837036,
-   -0.5394397377967834
-  ]
- },
- "throughput": 6.543502517233578
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index 711eeddfb25..00000000000
--- a/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,161 +0,0 @@
-{
- "0": {
-  "input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.",
-  "generated_text": " Wait for the moment when the music stops, and the lights come up, and the DJ says, \"I'm going to play a song for you",
-  "generated_tokens": [
-   32844,
-   1394,
-   1278,
-   4735,
-   2200,
-   1278,
-   7146,
-   30774,
-   1044,
-   1321,
-   1278,
-   26466,
-   3930,
-   2015,
-   1044,
-   1321,
-   1278,
-   30245,
-   8223,
-   1044,
-   1429,
-   1073,
-   4525,
-   4670,
-   1317,
-   3354,
-   1261,
-   6947,
-   1394,
-   1636
-  ],
-  "latency": 37.72101545333862,
-  "cuda_graph_request_count_map": null,
-  "step_count": 30,
-  "logprobs": [
-   -10.737512588500977,
-   -3.724862575531006,
-   -2.833397388458252,
-   -1.2464861869812012,
-   -0.2549239993095398,
-   -1.7607988119125366,
-   -2.419379711151123,
-   -1.9533929824829102,
-   -2.1014301776885986,
-   -6.169030666351318,
-   -0.8734959363937378,
-   -2.4733574390411377,
-   -3.4822516441345215,
-   -4.180896759033203,
-   -1.9767613410949707,
-   -1.8347630500793457,
-   -2.2581257820129395,
-   -7.180149078369141,
-   -0.0453881211578846,
-   -1.9841610193252563,
-   -5.015386581420898,
-   -8.827117919921875,
-   -9.885746002197266,
-   -0.8498678207397461,
-   -4.770059585571289,
-   -0.855280339717865,
-   -2.2494924068450928,
-   -0.017164958640933037,
-   -0.03715415671467781,
-   -3.4830124378204346,
-   -8.635110855102539,
-   -1.2520610094070435,
-   -6.62324857711792,
-   -3.639960765838623,
-   -3.664339542388916,
-   -4.182392597198486,
-   -2.1796066761016846,
-   -1.0725229978561401,
-   -0.26311880350112915,
-   -0.8036076426506042,
-   -4.6958818435668945,
-   -9.042495727539062,
-   -0.013647346757352352,
-   -3.1747794151306152,
-   -1.322129487991333,
-   -3.949110746383667,
-   -0.7829495072364807,
-   -0.002083513652905822,
-   -2.970266580581665,
-   -10.56244945526123,
-   -3.2369167804718018,
-   -1.1530492305755615,
-   -4.917466163635254,
-   -0.21241025626659393,
-   -0.06490474194288254,
-   -1.372581124305725,
-   -2.224682092666626,
-   -4.3847503662109375,
-   -0.36867555975914,
-   -4.035493850708008,
-   -0.39869019389152527,
-   -0.14373983442783356,
-   -2.716118812561035,
-   -10.687016487121582,
-   -0.04773370549082756,
-   -3.398231267929077,
-   -0.8646175265312195,
-   -4.74052619934082,
-   -0.23649944365024567,
-   -2.6610701084136963,
-   -0.8428961634635925,
-   -1.614527940750122,
-   -5.793307781219482,
-   -16.929147720336914,
-   -2.6586406230926514,
-   -0.1385982781648636,
-   -7.435610771179199,
-   -1.0483647584915161,
-   -2.1261863708496094,
-   -1.5261307954788208,
-   -0.27082547545433044,
-   -5.859070777893066,
-   -0.00648513063788414,
-   -7.732051849365234,
-   -2.712515354156494,
-   -2.9137418270111084,
-   -3.041210651397705,
-   -2.3559694290161133,
-   -0.3973437249660492,
-   -1.4338903427124023,
-   -2.2967660427093506,
-   -0.6096595525741577,
-   -1.3119444847106934,
-   -1.93257474899292,
-   -1.726539134979248,
-   -0.8397530317306519,
-   -0.5014236569404602,
-   -1.2989763021469116,
-   -1.5857150554656982,
-   -1.096572995185852,
-   -0.4009067416191101,
-   -0.43302634358406067,
-   -0.041601795703172684,
-   -1.285712718963623,
-   -2.214778184890747,
-   -2.6971933841705322,
-   -0.8101387619972229,
-   -0.43101266026496887,
-   -2.808060884475708,
-   -1.5226430892944336,
-   -1.6209226846694946,
-   -0.048716772347688675,
-   -1.3497682809829712,
-   -1.343377947807312,
-   -1.2755295038223267,
-   -1.2342015504837036,
-   -0.5394397377967834
-  ]
- },
- "throughput": 6.543502517233578
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt_static_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt_static_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgx_h100.json
similarity index 100%
rename from tests/functional_tests/test_cases/moe/gpt_static_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgxh100_coreweave.json
rename to tests/functional_tests/test_cases/moe/gpt_static_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgx_h100.json
diff --git a/tests/functional_tests/test_cases/moe/gpt_static_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt_static_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index 0a32a2c875c..00000000000
--- a/tests/functional_tests/test_cases/moe/gpt_static_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1 +0,0 @@
-{"1": {"input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.", "generated_text": " Wait for the moment when the music stops, and the lights come up, and the DJ says, \"I'm going to play a song for you", "generated_tokens": [32844, 1394, 1278, 4735, 2200, 1278, 7146, 30774, 1044, 1321, 1278, 26466, 3930, 2015, 1044, 1321, 1278, 30245, 8223, 1044, 1429, 1073, 4525, 4670, 1317, 3354, 1261, 6947, 1394, 1636], "tpot": [0.4429202973842621, 0.017771262675523758, 0.01790841668844223, 0.016864703968167305, 0.016803359612822533, 0.016811424866318703, 0.016884256154298782, 0.016778528690338135, 0.016986047849059105, 0.016792479902505875, 0.016880257055163383, 0.016891231760382652, 0.016854560002684593, 0.01682988740503788, 0.016822686418890953, 0.01692502386868, 0.016792958602309227, 0.0169671680778265, 0.01693049632012844, 0.0170868169516325, 0.01686326414346695, 0.017535584047436714, 0.0174525436013937, 0.017324192449450493, 0.016914688050746918, 0.01683216169476509, 0.016909919679164886, 0.016884224489331245, 0.017001086845993996, 0.016849694773554802], "latency": 0.9421291537582874, "logprobs": [-10.737512588500977, -3.6907906532287598, -2.8354406356811523, -1.241209626197815, -0.23157396912574768, -1.8544931411743164, -2.305788278579712, -1.9660029411315918, -2.139838695526123, -6.157798767089844, -0.8562318086624146, -2.4214887619018555, -3.5043883323669434, -4.133810043334961, -2.009009838104248, -1.8133208751678467, -2.3323073387145996, -7.160175323486328, -0.040603119879961014, -1.9747259616851807, -5.076613903045654, -8.853288650512695, -9.848663330078125, -0.783089280128479, -4.775578022003174, -0.8501623868942261, -2.353159189224243, -0.019392186775803566, -0.035699184983968735, -3.369636058807373, -8.736637115478516, -1.2479770183563232, -6.668802261352539, -3.84025239944458, -3.774880886077881, -4.180184364318848, -2.2127902507781982, -1.07676362991333, -0.2321961224079132, -0.8445965647697449, -4.720583915710449, -9.144975662231445, -0.013737889938056469, -3.1713855266571045, -1.316046953201294, -3.976555824279785, -0.7929940223693848, -0.0020036876667290926, -2.9234514236450195, -10.630117416381836, -3.2423582077026367, -1.1527093648910522, -4.902451515197754, -0.20881010591983795, -0.06518254429101944, -1.3553434610366821, -2.205620765686035, -4.443068981170654, -0.3349221646785736, -4.0811614990234375, -0.40434733033180237, -0.14260707795619965, -2.7138302326202393, -10.61572551727295, -0.05091002210974693, -3.3788461685180664, -0.8990436792373657, -4.757172584533691, -0.2625967562198639, -2.6857080459594727, -0.8338347673416138, -1.5987446308135986, -5.796599388122559, -17.023239135742188, -2.5919642448425293, -0.1391627937555313, -7.425058841705322, -1.0969927310943604, -2.1373608112335205, -1.5555475950241089, -0.29913192987442017, -5.805688381195068, -0.006563534028828144, -7.741588592529297, -2.729809284210205, -2.989825487136841, -2.937342643737793, -2.452791690826416, -0.39692243933677673, -1.4191737174987793, -2.281113862991333, -0.6101264357566833, -1.3127052783966064, -1.93826162815094, -1.759519100189209, -0.8280774354934692, -0.48737525939941406, -1.2929327487945557, -1.4731515645980835, -1.0149478912353516, -0.402925580739975, -0.4662020802497864, -0.04289804771542549, -1.2809830904006958, -2.1367523670196533, -2.672316074371338, -0.832058310508728, -0.3975365459918976, -2.8649744987487793, -1.5586214065551758, -1.6164027452468872, -0.048774562776088715, -1.3553334474563599, -1.374987006187439, -1.2671791315078735, -1.29192054271698, -0.49132436513900757]}}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt_static_inference_tp1_pp1_ep1_16B_logitsmatch/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt_static_inference_tp1_pp1_ep1_16B_logitsmatch/golden_values_dev_dgx_h100.json
index b71288e5fff..01a2e6772c9 100644
--- a/tests/functional_tests/test_cases/moe/gpt_static_inference_tp1_pp1_ep1_16B_logitsmatch/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt_static_inference_tp1_pp1_ep1_16B_logitsmatch/golden_values_dev_dgx_h100.json
@@ -1 +1 @@
-{"0": {"input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.", "generated_text": " Wait for the moment when the music stops, and the lights come up, and the DJ says, \"I'm going to play a song for you", "generated_tokens": [32844, 1394, 1278, 4735, 2200, 1278, 7146, 30774, 1044, 1321, 1278, 26466, 3930, 2015, 1044, 1321, 1278, 30245, 8223, 1044, 1429, 1073, 4525, 4670, 1317, 3354, 1261, 6947, 1394, 1636], "tpot": [4.229366779327393, 10.345046043395996, 0.10295645147562027, 0.10304233431816101, 0.1028008684515953, 0.10085539519786835, 0.1008114218711853, 0.10335596650838852, 0.10208207368850708, 0.1044926792383194, 0.10087865591049194, 0.10412614792585373, 0.10776477307081223, 0.10179980099201202, 0.10344681888818741, 0.10525891184806824, 0.10479948669672012, 0.10187084972858429, 0.10307282954454422, 0.101053886115551, 0.10133238136768341, 0.10253030061721802, 0.1010429784655571, 0.10082294791936874, 0.10104255378246307, 0.10241932421922684, 0.1018621176481247, 0.10322844982147217, 0.10089481621980667, 0.10253510624170303], "latency": 17.466641287785023, "logprobs": [-10.748703956604004, -3.675847053527832, -2.8152527809143066, -1.2499192953109741, -0.2585306465625763, -1.7650476694107056, -2.4413700103759766, -1.9855635166168213, -2.1556897163391113, -6.126346588134766, -0.8885424733161926, -2.466485023498535, -3.53129506111145, -4.1022443771362305, -1.973730444908142, -1.8129527568817139, -2.3135061264038086, -7.073224067687988, -0.0406799241900444, -1.9924827814102173, -5.044793128967285, -8.79849910736084, -9.896184921264648, -0.9244536757469177, -4.819119453430176, -0.8409886360168457, -2.3493337631225586, -0.019546041265130043, -0.03429899737238884, -3.486131429672241, -8.708669662475586, -1.2524677515029907, -6.648501396179199, -3.6543850898742676, -3.5817432403564453, -4.293689250946045, -2.213235855102539, -1.026153802871704, -0.22022850811481476, -0.7749938368797302, -4.7083001136779785, -9.260919570922852, -0.013350849971175194, -3.177624464035034, -1.3237272500991821, -3.991711139678955, -0.7711713314056396, -0.0020787552930414677, -2.9259750843048096, -10.556608200073242, -3.0338008403778076, -1.165448546409607, -4.884476184844971, -0.22491267323493958, -0.06299388408660889, -1.2974224090576172, -2.228250503540039, -4.375787258148193, -0.3615659773349762, -4.020719528198242, -0.3728649318218231, -0.16031591594219208, -2.7166409492492676, -10.650144577026367, -0.057426948100328445, -3.3819196224212646, -0.8289875388145447, -4.716109752655029, -0.2623739540576935, -2.6586318016052246, -0.846296489238739, -1.6911215782165527, -5.863524436950684, -17.074047088623047, -2.9786670207977295, -0.12697581946849823, -7.423051834106445, -1.1104215383529663, -2.125497579574585, -1.481943130493164, -0.26388564705848694, -5.852108001708984, -0.006604391150176525, -7.682407379150391, -2.7386088371276855, -2.9692039489746094, -3.0358991622924805, -2.434255838394165, -0.4008456766605377, -1.4501973390579224, -2.3068716526031494, -0.5563173294067383, -1.3114793300628662, -1.9436699151992798, -1.6950371265411377, -0.7694160342216492, -0.504065215587616, -1.2403564453125, -1.5687276124954224, -1.0141794681549072, -0.4076817035675049, -0.48037511110305786, -0.04258028045296669, -1.3669413328170776, -2.1299216747283936, -2.6870312690734863, -0.7604206800460815, -0.39242351055145264, -2.869314432144165, -1.464285135269165, -1.643430471420288, -0.04816753789782524, -1.4055166244506836, -1.3622899055480957, -1.2863339185714722, -1.2075212001800537, -0.5359172821044922]}}
+{"0": {"input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.", "generated_text": " Wait for the moment when the music stops, and the lights come up, and the DJ says, \"I'm going to play a song for you", "generated_tokens": [32844, 1394, 1278, 4735, 2200, 1278, 7146, 30774, 1044, 1321, 1278, 26466, 3930, 2015, 1044, 1321, 1278, 30245, 8223, 1044, 1429, 1073, 4525, 4670, 1317, 3354, 1261, 6947, 1394, 1636], "tpot": [2.715181350708008, 11.115615844726562, 0.08171491324901581, 0.08067888021469116, 0.08026569336652756, 0.08201305568218231, 0.08297079056501389, 0.07990239560604095, 0.07923731207847595, 0.08088396489620209, 0.08342364430427551, 0.07902496308088303, 0.07883366197347641, 0.0821300819516182, 0.07879510521888733, 0.07889033854007721, 0.08096816390752792, 0.07890790700912476, 0.07898931205272675, 0.07891136407852173, 0.07872992008924484, 0.07891993969678879, 0.07908882945775986, 0.08219005167484283, 0.07928377389907837, 0.0791754499077797, 0.08204790204763412, 0.07909702509641647, 0.07994850724935532, 0.0790436789393425], "latency": 16.090769955015276, "logprobs": [-10.748703956604004, -3.675847053527832, -2.8152527809143066, -1.2499192953109741, -0.2585306465625763, -1.7650476694107056, -2.4413700103759766, -1.9855635166168213, -2.1556897163391113, -6.126346588134766, -0.8885424733161926, -2.466485023498535, -3.53129506111145, -4.1022443771362305, -1.973730444908142, -1.8129527568817139, -2.3135061264038086, -7.073224067687988, -0.0406799241900444, -1.9924827814102173, -5.044793128967285, -8.79849910736084, -9.896184921264648, -0.9244536757469177, -4.819119453430176, -0.8409886360168457, -2.3493337631225586, -0.019546041265130043, -0.03429899737238884, -3.486131429672241, -8.708669662475586, -1.2524677515029907, -6.648501396179199, -3.6543850898742676, -3.5817432403564453, -4.293689250946045, -2.213235855102539, -1.026153802871704, -0.22022850811481476, -0.7749938368797302, -4.7083001136779785, -9.260919570922852, -0.013350849971175194, -3.177624464035034, -1.3237272500991821, -3.991711139678955, -0.7711713314056396, -0.0020787552930414677, -2.9259750843048096, -10.556608200073242, -3.0338008403778076, -1.165448546409607, -4.884476184844971, -0.22491267323493958, -0.06299388408660889, -1.2974224090576172, -2.228250503540039, -4.375787258148193, -0.3615659773349762, -4.020719528198242, -0.3728649318218231, -0.16031591594219208, -2.7166409492492676, -10.650144577026367, -0.057426948100328445, -3.3819196224212646, -0.8289875388145447, -4.716109752655029, -0.2623739540576935, -2.6586318016052246, -0.846296489238739, -1.6911215782165527, -5.863524436950684, -17.074047088623047, -2.9786670207977295, -0.12697581946849823, -7.423051834106445, -1.1104215383529663, -2.125497579574585, -1.481943130493164, -0.26388564705848694, -5.852108001708984, -0.006604391150176525, -7.682407379150391, -2.7386088371276855, -2.9692039489746094, -3.0358991622924805, -2.434255838394165, -0.4008456766605377, -1.4501973390579224, -2.3068716526031494, -0.5563173294067383, -1.3114793300628662, -1.9436699151992798, -1.6950371265411377, -0.7694160342216492, -0.504065215587616, -1.2403564453125, -1.5687276124954224, -1.0141794681549072, -0.4076817035675049, -0.48037511110305786, -0.04258028045296669, -1.3669413328170776, -2.1299216747283936, -2.6870312690734863, -0.7604206800460815, -0.39242351055145264, -2.869314432144165, -1.464285135269165, -1.643430471420288, -0.04816753789782524, -1.4055166244506836, -1.3622899055480957, -1.2863339185714722, -1.2075212001800537, -0.5359172821044922]}}
diff --git a/tests/functional_tests/test_cases/moe/gpt_static_inference_tp1_pp1_ep1_16B_logitsmatch/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt_static_inference_tp1_pp1_ep1_16B_logitsmatch/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 31734e5da9b..00000000000
--- a/tests/functional_tests/test_cases/moe/gpt_static_inference_tp1_pp1_ep1_16B_logitsmatch/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1 +0,0 @@
-{"0": {"input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.", "generated_text": " Wait for the moment when the music stops, and the lights come up, and the DJ says, \"I'm going to play a song for you", "generated_tokens": [32844, 1394, 1278, 4735, 2200, 1278, 7146, 30774, 1044, 1321, 1278, 26466, 3930, 2015, 1044, 1321, 1278, 30245, 8223, 1044, 1429, 1073, 4525, 4670, 1317, 3354, 1261, 6947, 1394, 1636], "tpot": [2.996647834777832, 14.6605224609375, 0.07150812447071075, 0.07167161256074905, 0.0723005160689354, 0.06977993994951248, 0.07046061009168625, 0.07005241513252258, 0.07104329764842987, 0.07023292779922485, 0.07123715430498123, 0.0695519745349884, 0.06912309676408768, 0.06864956766366959, 0.06987708806991577, 0.06915238499641418, 0.06914495676755905, 0.06921103596687317, 0.06989885121583939, 0.06870169937610626, 0.06912226974964142, 0.06892534345388412, 0.0700610876083374, 0.06968579441308975, 0.07055693119764328, 0.0689401626586914, 0.06906444579362869, 0.06870873272418976, 0.06880857795476913, 0.06925567984580994], "latency": 19.62837644596584, "logprobs": [-10.748703956604004, -3.675847053527832, -2.8152527809143066, -1.2499192953109741, -0.2585306465625763, -1.7650476694107056, -2.4413700103759766, -1.9855635166168213, -2.1556897163391113, -6.126346588134766, -0.8885424733161926, -2.466485023498535, -3.53129506111145, -4.1022443771362305, -1.973730444908142, -1.8129527568817139, -2.3135061264038086, -7.073224067687988, -0.0406799241900444, -1.9924827814102173, -5.044793128967285, -8.79849910736084, -9.896184921264648, -0.9244536757469177, -4.819119453430176, -0.8409886360168457, -2.3493337631225586, -0.019546041265130043, -0.03429899737238884, -3.486131429672241, -8.708669662475586, -1.2524677515029907, -6.648501396179199, -3.6543850898742676, -3.5817432403564453, -4.293689250946045, -2.213235855102539, -1.026153802871704, -0.22022850811481476, -0.7749938368797302, -4.7083001136779785, -9.260919570922852, -0.013350849971175194, -3.177624464035034, -1.3237272500991821, -3.991711139678955, -0.7711713314056396, -0.0020787552930414677, -2.9259750843048096, -10.556608200073242, -3.0338008403778076, -1.165448546409607, -4.884476184844971, -0.22491267323493958, -0.06299388408660889, -1.2974224090576172, -2.228250503540039, -4.375787258148193, -0.3615659773349762, -4.020719528198242, -0.3728649318218231, -0.16031591594219208, -2.7166409492492676, -10.650144577026367, -0.057426948100328445, -3.3819196224212646, -0.8289875388145447, -4.716109752655029, -0.2623739540576935, -2.6586318016052246, -0.846296489238739, -1.6911215782165527, -5.863524436950684, -17.074047088623047, -2.9786670207977295, -0.12697581946849823, -7.423051834106445, -1.1104215383529663, -2.125497579574585, -1.481943130493164, -0.26388564705848694, -5.852108001708984, -0.006604391150176525, -7.682407379150391, -2.7386088371276855, -2.9692039489746094, -3.0358991622924805, -2.434255838394165, -0.4008456766605377, -1.4501973390579224, -2.3068716526031494, -0.5563173294067383, -1.3114793300628662, -1.9436699151992798, -1.6950371265411377, -0.7694160342216492, -0.504065215587616, -1.2403564453125, -1.5687276124954224, -1.0141794681549072, -0.4076817035675049, -0.48037511110305786, -0.04258028045296669, -1.3669413328170776, -2.1299216747283936, -2.6870312690734863, -0.7604206800460815, -0.39242351055145264, -2.869314432144165, -1.464285135269165, -1.643430471420288, -0.04816753789782524, -1.4055166244506836, -1.3622899055480957, -1.2863339185714722, -1.2075212001800537, -0.5359172821044922]}}
diff --git a/tests/functional_tests/test_cases/moe/gpt_static_inference_tp1_pp1_ep1_16B_logitsmatch/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt_static_inference_tp1_pp1_ep1_16B_logitsmatch/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index 01a2e6772c9..00000000000
--- a/tests/functional_tests/test_cases/moe/gpt_static_inference_tp1_pp1_ep1_16B_logitsmatch/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1 +0,0 @@
-{"0": {"input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.", "generated_text": " Wait for the moment when the music stops, and the lights come up, and the DJ says, \"I'm going to play a song for you", "generated_tokens": [32844, 1394, 1278, 4735, 2200, 1278, 7146, 30774, 1044, 1321, 1278, 26466, 3930, 2015, 1044, 1321, 1278, 30245, 8223, 1044, 1429, 1073, 4525, 4670, 1317, 3354, 1261, 6947, 1394, 1636], "tpot": [2.715181350708008, 11.115615844726562, 0.08171491324901581, 0.08067888021469116, 0.08026569336652756, 0.08201305568218231, 0.08297079056501389, 0.07990239560604095, 0.07923731207847595, 0.08088396489620209, 0.08342364430427551, 0.07902496308088303, 0.07883366197347641, 0.0821300819516182, 0.07879510521888733, 0.07889033854007721, 0.08096816390752792, 0.07890790700912476, 0.07898931205272675, 0.07891136407852173, 0.07872992008924484, 0.07891993969678879, 0.07908882945775986, 0.08219005167484283, 0.07928377389907837, 0.0791754499077797, 0.08204790204763412, 0.07909702509641647, 0.07994850724935532, 0.0790436789393425], "latency": 16.090769955015276, "logprobs": [-10.748703956604004, -3.675847053527832, -2.8152527809143066, -1.2499192953109741, -0.2585306465625763, -1.7650476694107056, -2.4413700103759766, -1.9855635166168213, -2.1556897163391113, -6.126346588134766, -0.8885424733161926, -2.466485023498535, -3.53129506111145, -4.1022443771362305, -1.973730444908142, -1.8129527568817139, -2.3135061264038086, -7.073224067687988, -0.0406799241900444, -1.9924827814102173, -5.044793128967285, -8.79849910736084, -9.896184921264648, -0.9244536757469177, -4.819119453430176, -0.8409886360168457, -2.3493337631225586, -0.019546041265130043, -0.03429899737238884, -3.486131429672241, -8.708669662475586, -1.2524677515029907, -6.648501396179199, -3.6543850898742676, -3.5817432403564453, -4.293689250946045, -2.213235855102539, -1.026153802871704, -0.22022850811481476, -0.7749938368797302, -4.7083001136779785, -9.260919570922852, -0.013350849971175194, -3.177624464035034, -1.3237272500991821, -3.991711139678955, -0.7711713314056396, -0.0020787552930414677, -2.9259750843048096, -10.556608200073242, -3.0338008403778076, -1.165448546409607, -4.884476184844971, -0.22491267323493958, -0.06299388408660889, -1.2974224090576172, -2.228250503540039, -4.375787258148193, -0.3615659773349762, -4.020719528198242, -0.3728649318218231, -0.16031591594219208, -2.7166409492492676, -10.650144577026367, -0.057426948100328445, -3.3819196224212646, -0.8289875388145447, -4.716109752655029, -0.2623739540576935, -2.6586318016052246, -0.846296489238739, -1.6911215782165527, -5.863524436950684, -17.074047088623047, -2.9786670207977295, -0.12697581946849823, -7.423051834106445, -1.1104215383529663, -2.125497579574585, -1.481943130493164, -0.26388564705848694, -5.852108001708984, -0.006604391150176525, -7.682407379150391, -2.7386088371276855, -2.9692039489746094, -3.0358991622924805, -2.434255838394165, -0.4008456766605377, -1.4501973390579224, -2.3068716526031494, -0.5563173294067383, -1.3114793300628662, -1.9436699151992798, -1.6950371265411377, -0.7694160342216492, -0.504065215587616, -1.2403564453125, -1.5687276124954224, -1.0141794681549072, -0.4076817035675049, -0.48037511110305786, -0.04258028045296669, -1.3669413328170776, -2.1299216747283936, -2.6870312690734863, -0.7604206800460815, -0.39242351055145264, -2.869314432144165, -1.464285135269165, -1.643430471420288, -0.04816753789782524, -1.4055166244506836, -1.3622899055480957, -1.2863339185714722, -1.2075212001800537, -0.5359172821044922]}}
diff --git a/tests/functional_tests/test_cases/moe/gpt_static_inference_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt_static_inference_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgx_h100.json
index da0d589ba94..d048a17bd6c 100644
--- a/tests/functional_tests/test_cases/moe/gpt_static_inference_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt_static_inference_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgx_h100.json
@@ -1 +1 @@
-{"0": {"input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.", "generated_text": " Wait for the moment when the music stops, and the lights come up, and the DJ says, \"I'm going to play a song for you", "generated_tokens": [32844, 1394, 1278, 4735, 2200, 1278, 7146, 30774, 1044, 1321, 1278, 26466, 3930, 2015, 1044, 1321, 1278, 30245, 8223, 1044, 1429, 1073, 4525, 4670, 1317, 3354, 1261, 6947, 1394, 1636], "tpot": [31.094524383544922, 1.047832727432251, 0.11428937315940857, 0.11675266921520233, 0.11250355839729309, 0.11199619621038437, 0.11299033463001251, 0.11174160242080688, 0.11143574863672256, 0.11445407569408417, 0.12384764850139618, 0.11566876620054245, 0.11276528239250183, 0.11200784146785736, 0.11736413091421127, 0.1179768294095993, 0.11327452957630157, 0.11167241632938385, 0.11270242929458618, 0.11315468698740005, 0.1131347194314003, 0.11182639747858047, 0.11193427443504333, 0.11525030434131622, 0.11580963432788849, 0.11145094037055969, 0.11169030517339706, 0.11076860874891281, 0.11473065614700317, 0.11137907207012177], "latency": 35.35041487775743, "logprobs": [-10.737512588500977, -3.6907906532287598, -2.8354406356811523, -1.241209626197815, -0.23157396912574768, -1.8544931411743164, -2.305788278579712, -1.9660029411315918, -2.139838695526123, -6.157798767089844, -0.8562318086624146, -2.4214887619018555, -3.5043883323669434, -4.133810043334961, -2.009009838104248, -1.8133208751678467, -2.3323073387145996, -7.160175323486328, -0.040603119879961014, -1.9747259616851807, -5.076613903045654, -8.853288650512695, -9.848663330078125, -0.783089280128479, -4.775578022003174, -0.8501623868942261, -2.353159189224243, -0.019392186775803566, -0.035699184983968735, -3.369636058807373, -8.736637115478516, -1.2479770183563232, -6.668802261352539, -3.84025239944458, -3.774880886077881, -4.180184364318848, -2.2127902507781982, -1.07676362991333, -0.2321961224079132, -0.8445965647697449, -4.720583915710449, -9.144975662231445, -0.013737889938056469, -3.1713855266571045, -1.316046953201294, -3.976555824279785, -0.7929940223693848, -0.0020036876667290926, -2.9234514236450195, -10.630117416381836, -3.2423582077026367, -1.1527093648910522, -4.902451515197754, -0.20881010591983795, -0.06518254429101944, -1.3553434610366821, -2.205620765686035, -4.443068981170654, -0.3349221646785736, -4.0811614990234375, -0.40434733033180237, -0.14260707795619965, -2.7138302326202393, -10.61572551727295, -0.05091002210974693, -3.3788461685180664, -0.8990436792373657, -4.757172584533691, -0.2625967562198639, -2.6857080459594727, -0.8338347673416138, -1.5987446308135986, -5.796599388122559, -17.023239135742188, -2.5919642448425293, -0.1391627937555313, -7.425058841705322, -1.0969927310943604, -2.1373608112335205, -1.5555475950241089, -0.29913192987442017, -5.805688381195068, -0.006563534028828144, -7.741588592529297, -2.729809284210205, -2.989825487136841, -2.937342643737793, -2.452791690826416, -0.39692243933677673, -1.4191737174987793, -2.281113862991333, -0.6101264357566833, -1.3127052783966064, -1.93826162815094, -1.759519100189209, -0.8280774354934692, -0.48737525939941406, -1.2929327487945557, -1.4731515645980835, -1.0149478912353516, -0.402925580739975, -0.4662020802497864, -0.04289804771542549, -1.2809830904006958, -2.1367523670196533, -2.672316074371338, -0.832058310508728, -0.3975365459918976, -2.8649744987487793, -1.5586214065551758, -1.6164027452468872, -0.048774562776088715, -1.3553334474563599, -1.374987006187439, -1.2671791315078735, -1.29192054271698, -0.49132436513900757]}}
+{"0": {"input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.", "generated_text": " Wait for the moment when the music stops, and the lights come up, and the DJ says, \"I'm going to play a song for you", "generated_tokens": [32844, 1394, 1278, 4735, 2200, 1278, 7146, 30774, 1044, 1321, 1278, 26466, 3930, 2015, 1044, 1321, 1278, 30245, 8223, 1044, 1429, 1073, 4525, 4670, 1317, 3354, 1261, 6947, 1394, 1636], "tpot": [32.35166549682617, 0.833970844745636, 0.09404217451810837, 0.09242991358041763, 0.09451283514499664, 0.09116563200950623, 0.0916728675365448, 0.09169203042984009, 0.0920996442437172, 0.09247440099716187, 0.09316505491733551, 0.09183433651924133, 0.09311366081237793, 0.0922863706946373, 0.09139427542686462, 0.0916166678071022, 0.09881363064050674, 0.09086793661117554, 0.09085418283939362, 0.0913468450307846, 0.0913306251168251, 0.09422652423381805, 0.09134646505117416, 0.09283513575792313, 0.09112297743558884, 0.09120230376720428, 0.09097100794315338, 0.09246265143156052, 0.09317846596240997, 0.09015017747879028], "latency": 35.78577698091976, "logprobs": [-10.737512588500977, -3.6907906532287598, -2.8354406356811523, -1.241209626197815, -0.23157396912574768, -1.8544931411743164, -2.305788278579712, -1.9660029411315918, -2.139838695526123, -6.157798767089844, -0.8562318086624146, -2.4214887619018555, -3.5043883323669434, -4.133810043334961, -2.009009838104248, -1.8133208751678467, -2.3323073387145996, -7.160175323486328, -0.040603119879961014, -1.9747259616851807, -5.076613903045654, -8.853288650512695, -9.848663330078125, -0.783089280128479, -4.775578022003174, -0.8501623868942261, -2.353159189224243, -0.019392186775803566, -0.035699184983968735, -3.369636058807373, -8.736637115478516, -1.2479770183563232, -6.668802261352539, -3.84025239944458, -3.774880886077881, -4.180184364318848, -2.2127902507781982, -1.07676362991333, -0.2321961224079132, -0.8445965647697449, -4.720583915710449, -9.144975662231445, -0.013737889938056469, -3.1713855266571045, -1.316046953201294, -3.976555824279785, -0.7929940223693848, -0.0020036876667290926, -2.9234514236450195, -10.630117416381836, -3.2423582077026367, -1.1527093648910522, -4.902451515197754, -0.20881010591983795, -0.06518254429101944, -1.3553434610366821, -2.205620765686035, -4.443068981170654, -0.3349221646785736, -4.0811614990234375, -0.40434733033180237, -0.14260707795619965, -2.7138302326202393, -10.61572551727295, -0.05091002210974693, -3.3788461685180664, -0.8990436792373657, -4.757172584533691, -0.2625967562198639, -2.6857080459594727, -0.8338347673416138, -1.5987446308135986, -5.796599388122559, -17.023239135742188, -2.5919642448425293, -0.1391627937555313, -7.425058841705322, -1.0969927310943604, -2.1373608112335205, -1.5555475950241089, -0.29913192987442017, -5.805688381195068, -0.006563534028828144, -7.741588592529297, -2.729809284210205, -2.989825487136841, -2.937342643737793, -2.452791690826416, -0.39692243933677673, -1.4191737174987793, -2.281113862991333, -0.6101264357566833, -1.3127052783966064, -1.93826162815094, -1.759519100189209, -0.8280774354934692, -0.48737525939941406, -1.2929327487945557, -1.4731515645980835, -1.0149478912353516, -0.402925580739975, -0.4662020802497864, -0.04289804771542549, -1.2809830904006958, -2.1367523670196533, -2.672316074371338, -0.832058310508728, -0.3975365459918976, -2.8649744987487793, -1.5586214065551758, -1.6164027452468872, -0.048774562776088715, -1.3553334474563599, -1.374987006187439, -1.2671791315078735, -1.29192054271698, -0.49132436513900757]}}
diff --git a/tests/functional_tests/test_cases/moe/gpt_static_inference_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt_static_inference_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 0a517ab3094..00000000000
--- a/tests/functional_tests/test_cases/moe/gpt_static_inference_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1 +0,0 @@
-{"0": {"input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.", "generated_text": " Wait for the moment when the music stops, and the lights come up, and the DJ says, \"I'm going to play a song for you", "generated_tokens": [32844, 1394, 1278, 4735, 2200, 1278, 7146, 30774, 1044, 1321, 1278, 26466, 3930, 2015, 1044, 1321, 1278, 30245, 8223, 1044, 1429, 1073, 4525, 4670, 1317, 3354, 1261, 6947, 1394, 1636], "tpot": [28.100296020507812, 0.7455244064331055, 0.08808960020542145, 0.08723868429660797, 0.0797291174530983, 0.07997014373540878, 0.08027699589729309, 0.08037219196557999, 0.0816013440489769, 0.08401136100292206, 0.08508748561143875, 0.08631545305252075, 0.0850793644785881, 0.08465024083852768, 0.08535295724868774, 0.0854158103466034, 0.08499104529619217, 0.083737313747406, 0.08036348968744278, 0.07885941863059998, 0.08097212761640549, 0.07789097726345062, 0.07819133251905441, 0.07952764630317688, 0.07876288145780563, 0.07915037125349045, 0.07721177488565445, 0.07972448319196701, 0.07933385670185089, 0.07806576043367386], "latency": 31.1545644197613, "logprobs": [-10.737512588500977, -3.6907906532287598, -2.8354406356811523, -1.241209626197815, -0.23157396912574768, -1.8544931411743164, -2.305788278579712, -1.9660029411315918, -2.139838695526123, -6.157798767089844, -0.8562318086624146, -2.4214887619018555, -3.5043883323669434, -4.133810043334961, -2.009009838104248, -1.8133208751678467, -2.3323073387145996, -7.160175323486328, -0.040603119879961014, -1.9747259616851807, -5.076613903045654, -8.853288650512695, -9.848663330078125, -0.783089280128479, -4.775578022003174, -0.8501623868942261, -2.353159189224243, -0.019392186775803566, -0.035699184983968735, -3.369636058807373, -8.736637115478516, -1.2479770183563232, -6.668802261352539, -3.84025239944458, -3.774880886077881, -4.180184364318848, -2.2127902507781982, -1.07676362991333, -0.2321961224079132, -0.8445965647697449, -4.720583915710449, -9.144975662231445, -0.013737889938056469, -3.1713855266571045, -1.316046953201294, -3.976555824279785, -0.7929940223693848, -0.0020036876667290926, -2.9234514236450195, -10.630117416381836, -3.2423582077026367, -1.1527093648910522, -4.902451515197754, -0.20881010591983795, -0.06518254429101944, -1.3553434610366821, -2.205620765686035, -4.443068981170654, -0.3349221646785736, -4.0811614990234375, -0.40434733033180237, -0.14260707795619965, -2.7138302326202393, -10.61572551727295, -0.05091002210974693, -3.3788461685180664, -0.8990436792373657, -4.757172584533691, -0.2625967562198639, -2.6857080459594727, -0.8338347673416138, -1.5987446308135986, -5.796599388122559, -17.023239135742188, -2.5919642448425293, -0.1391627937555313, -7.425058841705322, -1.0969927310943604, -2.1373608112335205, -1.5555475950241089, -0.29913192987442017, -5.805688381195068, -0.006563534028828144, -7.741588592529297, -2.729809284210205, -2.989825487136841, -2.937342643737793, -2.452791690826416, -0.39692243933677673, -1.4191737174987793, -2.281113862991333, -0.6101264357566833, -1.3127052783966064, -1.93826162815094, -1.759519100189209, -0.8280774354934692, -0.48737525939941406, -1.2929327487945557, -1.4731515645980835, -1.0149478912353516, -0.402925580739975, -0.4662020802497864, -0.04289804771542549, -1.2809830904006958, -2.1367523670196533, -2.672316074371338, -0.832058310508728, -0.3975365459918976, -2.8649744987487793, -1.5586214065551758, -1.6164027452468872, -0.048774562776088715, -1.3553334474563599, -1.374987006187439, -1.2671791315078735, -1.29192054271698, -0.49132436513900757]}}
diff --git a/tests/functional_tests/test_cases/moe/gpt_static_inference_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt_static_inference_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index d048a17bd6c..00000000000
--- a/tests/functional_tests/test_cases/moe/gpt_static_inference_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1 +0,0 @@
-{"0": {"input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.", "generated_text": " Wait for the moment when the music stops, and the lights come up, and the DJ says, \"I'm going to play a song for you", "generated_tokens": [32844, 1394, 1278, 4735, 2200, 1278, 7146, 30774, 1044, 1321, 1278, 26466, 3930, 2015, 1044, 1321, 1278, 30245, 8223, 1044, 1429, 1073, 4525, 4670, 1317, 3354, 1261, 6947, 1394, 1636], "tpot": [32.35166549682617, 0.833970844745636, 0.09404217451810837, 0.09242991358041763, 0.09451283514499664, 0.09116563200950623, 0.0916728675365448, 0.09169203042984009, 0.0920996442437172, 0.09247440099716187, 0.09316505491733551, 0.09183433651924133, 0.09311366081237793, 0.0922863706946373, 0.09139427542686462, 0.0916166678071022, 0.09881363064050674, 0.09086793661117554, 0.09085418283939362, 0.0913468450307846, 0.0913306251168251, 0.09422652423381805, 0.09134646505117416, 0.09283513575792313, 0.09112297743558884, 0.09120230376720428, 0.09097100794315338, 0.09246265143156052, 0.09317846596240997, 0.09015017747879028], "latency": 35.78577698091976, "logprobs": [-10.737512588500977, -3.6907906532287598, -2.8354406356811523, -1.241209626197815, -0.23157396912574768, -1.8544931411743164, -2.305788278579712, -1.9660029411315918, -2.139838695526123, -6.157798767089844, -0.8562318086624146, -2.4214887619018555, -3.5043883323669434, -4.133810043334961, -2.009009838104248, -1.8133208751678467, -2.3323073387145996, -7.160175323486328, -0.040603119879961014, -1.9747259616851807, -5.076613903045654, -8.853288650512695, -9.848663330078125, -0.783089280128479, -4.775578022003174, -0.8501623868942261, -2.353159189224243, -0.019392186775803566, -0.035699184983968735, -3.369636058807373, -8.736637115478516, -1.2479770183563232, -6.668802261352539, -3.84025239944458, -3.774880886077881, -4.180184364318848, -2.2127902507781982, -1.07676362991333, -0.2321961224079132, -0.8445965647697449, -4.720583915710449, -9.144975662231445, -0.013737889938056469, -3.1713855266571045, -1.316046953201294, -3.976555824279785, -0.7929940223693848, -0.0020036876667290926, -2.9234514236450195, -10.630117416381836, -3.2423582077026367, -1.1527093648910522, -4.902451515197754, -0.20881010591983795, -0.06518254429101944, -1.3553434610366821, -2.205620765686035, -4.443068981170654, -0.3349221646785736, -4.0811614990234375, -0.40434733033180237, -0.14260707795619965, -2.7138302326202393, -10.61572551727295, -0.05091002210974693, -3.3788461685180664, -0.8990436792373657, -4.757172584533691, -0.2625967562198639, -2.6857080459594727, -0.8338347673416138, -1.5987446308135986, -5.796599388122559, -17.023239135742188, -2.5919642448425293, -0.1391627937555313, -7.425058841705322, -1.0969927310943604, -2.1373608112335205, -1.5555475950241089, -0.29913192987442017, -5.805688381195068, -0.006563534028828144, -7.741588592529297, -2.729809284210205, -2.989825487136841, -2.937342643737793, -2.452791690826416, -0.39692243933677673, -1.4191737174987793, -2.281113862991333, -0.6101264357566833, -1.3127052783966064, -1.93826162815094, -1.759519100189209, -0.8280774354934692, -0.48737525939941406, -1.2929327487945557, -1.4731515645980835, -1.0149478912353516, -0.402925580739975, -0.4662020802497864, -0.04289804771542549, -1.2809830904006958, -2.1367523670196533, -2.672316074371338, -0.832058310508728, -0.3975365459918976, -2.8649744987487793, -1.5586214065551758, -1.6164027452468872, -0.048774562776088715, -1.3553334474563599, -1.374987006187439, -1.2671791315078735, -1.29192054271698, -0.49132436513900757]}}
diff --git a/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp1_pp1/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp1_pp1/golden_values_dev_dgx_h100.json
index f428949c364..7dbf0c3c806 100644
--- a/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp1_pp1/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp1_pp1/golden_values_dev_dgx_h100.json
@@ -1 +1,287 @@
-{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 9.14877, "5": 9.15057, "10": 9.15231, "15": 9.13419, "20": 9.09777, "25": 9.05505, "30": 8.99697, "35": 8.94993, "40": 8.87122, "45": 8.81228, "50": 8.76196}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 3477955.0, "5": 3783960.0, "10": 3544311.0, "15": 3421163.0, "20": 3465915.0, "25": 3457588.0, "30": 3708011.0, "35": 3432484.0, "40": 3614258.0, "45": 3452202.0, "50": 3412019.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 2433624576.0, "5": 2433624576.0, "10": 2433624576.0, "15": 2433624576.0, "20": 2433624576.0, "25": 2433624576.0, "30": 2433624576.0, "35": 2433624576.0, "40": 2433624576.0, "45": 2433624576.0, "50": 2433624576.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 14739764224.0, "5": 15775233024.0, "10": 15775233024.0, "15": 15775233024.0, "20": 15775233024.0, "25": 15775233024.0, "30": 15775233024.0, "35": 15775233024.0, "40": 15775233024.0, "45": 15775233024.0, "50": 15775233024.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 6.02452, "5": 0.16813, "10": 0.16806, "15": 0.16951, "20": 0.16888, "25": 0.16596, "30": 0.16631, "35": 0.18479, "40": 0.16611, "45": 0.16572, "50": 0.16419}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 9.14877,
+            "2": 9.15171,
+            "3": 9.14691,
+            "4": 9.15346,
+            "5": 9.15057,
+            "6": 9.14683,
+            "7": 9.14378,
+            "8": 9.14363,
+            "9": 9.15069,
+            "10": 9.15231,
+            "11": 9.14609,
+            "12": 9.14125,
+            "13": 9.1414,
+            "14": 9.14248,
+            "15": 9.13419,
+            "16": 9.12601,
+            "17": 9.12407,
+            "18": 9.12053,
+            "19": 9.11789,
+            "20": 9.09777,
+            "21": 9.06948,
+            "22": 9.06985,
+            "23": 9.07079,
+            "24": 9.06043,
+            "25": 9.05505,
+            "26": 9.05713,
+            "27": 9.04089,
+            "28": 9.0186,
+            "29": 9.00353,
+            "30": 8.99697,
+            "31": 8.99484,
+            "32": 8.98416,
+            "33": 8.97763,
+            "34": 8.98617,
+            "35": 8.94993,
+            "36": 8.94557,
+            "37": 8.92133,
+            "38": 8.94104,
+            "39": 8.92482,
+            "40": 8.87122,
+            "41": 8.89627,
+            "42": 8.87601,
+            "43": 8.87414,
+            "44": 8.8411,
+            "45": 8.81228,
+            "46": 8.79564,
+            "47": 8.84576,
+            "48": 8.77191,
+            "49": 8.78047,
+            "50": 8.76196
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 3477955.0,
+            "2": 3392302.0,
+            "3": 3630021.0,
+            "4": 3532452.0,
+            "5": 3783960.0,
+            "6": 3584449.0,
+            "7": 3478372.0,
+            "8": 3414330.0,
+            "9": 3511649.0,
+            "10": 3544311.0,
+            "11": 3475468.0,
+            "12": 3518965.0,
+            "13": 3591786.0,
+            "14": 3549396.0,
+            "15": 3421163.0,
+            "16": 3383319.0,
+            "17": 3424120.0,
+            "18": 3509184.0,
+            "19": 3426107.0,
+            "20": 3465915.0,
+            "21": 3700118.0,
+            "22": 3474397.0,
+            "23": 3693474.0,
+            "24": 3405657.0,
+            "25": 3457588.0,
+            "26": 3479130.0,
+            "27": 3555371.0,
+            "28": 3496999.0,
+            "29": 3561842.0,
+            "30": 3708011.0,
+            "31": 3397663.0,
+            "32": 3467970.0,
+            "33": 3515742.0,
+            "34": 3501589.0,
+            "35": 3432484.0,
+            "36": 3453953.0,
+            "37": 3958777.0,
+            "38": 3488640.0,
+            "39": 3409958.0,
+            "40": 3614258.0,
+            "41": 3425709.0,
+            "42": 3643603.0,
+            "43": 3473029.0,
+            "44": 3448331.0,
+            "45": 3452202.0,
+            "46": 3585738.0,
+            "47": 3467386.0,
+            "48": 3462962.0,
+            "49": 3529813.0,
+            "50": 3412019.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 2431335424.0,
+            "2": 2431335424.0,
+            "3": 2431335424.0,
+            "4": 2431335424.0,
+            "5": 2431335424.0,
+            "6": 2431335424.0,
+            "7": 2431335424.0,
+            "8": 2431335424.0,
+            "9": 2431335424.0,
+            "10": 2431335424.0,
+            "11": 2431335424.0,
+            "12": 2431335424.0,
+            "13": 2431335424.0,
+            "14": 2431335424.0,
+            "15": 2431335424.0,
+            "16": 2431335424.0,
+            "17": 2431335424.0,
+            "18": 2431335424.0,
+            "19": 2431335424.0,
+            "20": 2431335424.0,
+            "21": 2431335424.0,
+            "22": 2431335424.0,
+            "23": 2431335424.0,
+            "24": 2431335424.0,
+            "25": 2431335424.0,
+            "26": 2431335424.0,
+            "27": 2431335424.0,
+            "28": 2431335424.0,
+            "29": 2431335424.0,
+            "30": 2431335424.0,
+            "31": 2431335424.0,
+            "32": 2431335424.0,
+            "33": 2431335424.0,
+            "34": 2431335424.0,
+            "35": 2431335424.0,
+            "36": 2431335424.0,
+            "37": 2431335424.0,
+            "38": 2431335424.0,
+            "39": 2431335424.0,
+            "40": 2431335424.0,
+            "41": 2431335424.0,
+            "42": 2431335424.0,
+            "43": 2431335424.0,
+            "44": 2431335424.0,
+            "45": 2431335424.0,
+            "46": 2431335424.0,
+            "47": 2431335424.0,
+            "48": 2431335424.0,
+            "49": 2431335424.0,
+            "50": 2431335424.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 14740086784.0,
+            "2": 15773663232.0,
+            "3": 15773663232.0,
+            "4": 15773663232.0,
+            "5": 15773663232.0,
+            "6": 15773663232.0,
+            "7": 15773663232.0,
+            "8": 15773663232.0,
+            "9": 15773663232.0,
+            "10": 15773663232.0,
+            "11": 15773663232.0,
+            "12": 15773663232.0,
+            "13": 15773663232.0,
+            "14": 15773663232.0,
+            "15": 15773663232.0,
+            "16": 15773663232.0,
+            "17": 15773663232.0,
+            "18": 15773663232.0,
+            "19": 15773663232.0,
+            "20": 15773663232.0,
+            "21": 15773663232.0,
+            "22": 15773663232.0,
+            "23": 15773663232.0,
+            "24": 15773663232.0,
+            "25": 15773663232.0,
+            "26": 15773663232.0,
+            "27": 15773663232.0,
+            "28": 15773663232.0,
+            "29": 15773663232.0,
+            "30": 15773663232.0,
+            "31": 15773663232.0,
+            "32": 15773663232.0,
+            "33": 15773663232.0,
+            "34": 15773663232.0,
+            "35": 15773663232.0,
+            "36": 15773663232.0,
+            "37": 15773663232.0,
+            "38": 15773663232.0,
+            "39": 15773663232.0,
+            "40": 15773663232.0,
+            "41": 15773663232.0,
+            "42": 15773663232.0,
+            "43": 15773663232.0,
+            "44": 15773663232.0,
+            "45": 15773663232.0,
+            "46": 15773663232.0,
+            "47": 15773663232.0,
+            "48": 15773663232.0,
+            "49": 15773663232.0,
+            "50": 15773663232.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 5.97454,
+            "2": 0.19297,
+            "3": 0.18331,
+            "4": 0.18419,
+            "5": 0.18099,
+            "6": 0.18354,
+            "7": 0.18332,
+            "8": 0.18477,
+            "9": 0.18391,
+            "10": 0.18412,
+            "11": 0.18154,
+            "12": 0.18441,
+            "13": 0.18338,
+            "14": 0.1859,
+            "15": 0.18316,
+            "16": 0.18298,
+            "17": 0.18167,
+            "18": 0.18385,
+            "19": 0.18358,
+            "20": 0.18325,
+            "21": 0.18392,
+            "22": 0.1826,
+            "23": 0.18266,
+            "24": 0.18333,
+            "25": 0.18413,
+            "26": 0.185,
+            "27": 0.18218,
+            "28": 0.18361,
+            "29": 0.18161,
+            "30": 0.18366,
+            "31": 0.18238,
+            "32": 0.18355,
+            "33": 0.18274,
+            "34": 0.18399,
+            "35": 0.18232,
+            "36": 0.18405,
+            "37": 0.18325,
+            "38": 0.18367,
+            "39": 0.18313,
+            "40": 0.18319,
+            "41": 0.18244,
+            "42": 0.18305,
+            "43": 0.18287,
+            "44": 0.18263,
+            "45": 0.18326,
+            "46": 0.18213,
+            "47": 0.18261,
+            "48": 0.18333,
+            "49": 0.18287,
+            "50": 0.18284
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp1_pp1/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp1_pp1/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index c9c6ca750a5..00000000000
--- a/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp1_pp1/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 9.14877,
-            "2": 9.15171,
-            "3": 9.14691,
-            "4": 9.15346,
-            "5": 9.15057,
-            "6": 9.14683,
-            "7": 9.14378,
-            "8": 9.14363,
-            "9": 9.15069,
-            "10": 9.15231,
-            "11": 9.14609,
-            "12": 9.14125,
-            "13": 9.1414,
-            "14": 9.14248,
-            "15": 9.13419,
-            "16": 9.12601,
-            "17": 9.12407,
-            "18": 9.12053,
-            "19": 9.11789,
-            "20": 9.09777,
-            "21": 9.06948,
-            "22": 9.06985,
-            "23": 9.07079,
-            "24": 9.06043,
-            "25": 9.05505,
-            "26": 9.05713,
-            "27": 9.04089,
-            "28": 9.0186,
-            "29": 9.00353,
-            "30": 8.99697,
-            "31": 8.99484,
-            "32": 8.98416,
-            "33": 8.97763,
-            "34": 8.98617,
-            "35": 8.94993,
-            "36": 8.94557,
-            "37": 8.92133,
-            "38": 8.94104,
-            "39": 8.92482,
-            "40": 8.87122,
-            "41": 8.89627,
-            "42": 8.87601,
-            "43": 8.87414,
-            "44": 8.8411,
-            "45": 8.81228,
-            "46": 8.79564,
-            "47": 8.84576,
-            "48": 8.77191,
-            "49": 8.78047,
-            "50": 8.76196
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 3477955.0,
-            "2": 3392302.0,
-            "3": 3630021.0,
-            "4": 3532452.0,
-            "5": 3783960.0,
-            "6": 3584449.0,
-            "7": 3478372.0,
-            "8": 3414330.0,
-            "9": 3511649.0,
-            "10": 3544311.0,
-            "11": 3475468.0,
-            "12": 3518965.0,
-            "13": 3591786.0,
-            "14": 3549396.0,
-            "15": 3421163.0,
-            "16": 3383319.0,
-            "17": 3424120.0,
-            "18": 3509184.0,
-            "19": 3426107.0,
-            "20": 3465915.0,
-            "21": 3700118.0,
-            "22": 3474397.0,
-            "23": 3693474.0,
-            "24": 3405657.0,
-            "25": 3457588.0,
-            "26": 3479130.0,
-            "27": 3555371.0,
-            "28": 3496999.0,
-            "29": 3561842.0,
-            "30": 3708011.0,
-            "31": 3397663.0,
-            "32": 3467970.0,
-            "33": 3515742.0,
-            "34": 3501589.0,
-            "35": 3432484.0,
-            "36": 3453953.0,
-            "37": 3958777.0,
-            "38": 3488640.0,
-            "39": 3409958.0,
-            "40": 3614258.0,
-            "41": 3425709.0,
-            "42": 3643603.0,
-            "43": 3473029.0,
-            "44": 3448331.0,
-            "45": 3452202.0,
-            "46": 3585738.0,
-            "47": 3467386.0,
-            "48": 3462962.0,
-            "49": 3529813.0,
-            "50": 3412019.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 2431335424.0,
-            "2": 2431335424.0,
-            "3": 2431335424.0,
-            "4": 2431335424.0,
-            "5": 2431335424.0,
-            "6": 2431335424.0,
-            "7": 2431335424.0,
-            "8": 2431335424.0,
-            "9": 2431335424.0,
-            "10": 2431335424.0,
-            "11": 2431335424.0,
-            "12": 2431335424.0,
-            "13": 2431335424.0,
-            "14": 2431335424.0,
-            "15": 2431335424.0,
-            "16": 2431335424.0,
-            "17": 2431335424.0,
-            "18": 2431335424.0,
-            "19": 2431335424.0,
-            "20": 2431335424.0,
-            "21": 2431335424.0,
-            "22": 2431335424.0,
-            "23": 2431335424.0,
-            "24": 2431335424.0,
-            "25": 2431335424.0,
-            "26": 2431335424.0,
-            "27": 2431335424.0,
-            "28": 2431335424.0,
-            "29": 2431335424.0,
-            "30": 2431335424.0,
-            "31": 2431335424.0,
-            "32": 2431335424.0,
-            "33": 2431335424.0,
-            "34": 2431335424.0,
-            "35": 2431335424.0,
-            "36": 2431335424.0,
-            "37": 2431335424.0,
-            "38": 2431335424.0,
-            "39": 2431335424.0,
-            "40": 2431335424.0,
-            "41": 2431335424.0,
-            "42": 2431335424.0,
-            "43": 2431335424.0,
-            "44": 2431335424.0,
-            "45": 2431335424.0,
-            "46": 2431335424.0,
-            "47": 2431335424.0,
-            "48": 2431335424.0,
-            "49": 2431335424.0,
-            "50": 2431335424.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 14740086784.0,
-            "2": 15773663232.0,
-            "3": 15773663232.0,
-            "4": 15773663232.0,
-            "5": 15773663232.0,
-            "6": 15773663232.0,
-            "7": 15773663232.0,
-            "8": 15773663232.0,
-            "9": 15773663232.0,
-            "10": 15773663232.0,
-            "11": 15773663232.0,
-            "12": 15773663232.0,
-            "13": 15773663232.0,
-            "14": 15773663232.0,
-            "15": 15773663232.0,
-            "16": 15773663232.0,
-            "17": 15773663232.0,
-            "18": 15773663232.0,
-            "19": 15773663232.0,
-            "20": 15773663232.0,
-            "21": 15773663232.0,
-            "22": 15773663232.0,
-            "23": 15773663232.0,
-            "24": 15773663232.0,
-            "25": 15773663232.0,
-            "26": 15773663232.0,
-            "27": 15773663232.0,
-            "28": 15773663232.0,
-            "29": 15773663232.0,
-            "30": 15773663232.0,
-            "31": 15773663232.0,
-            "32": 15773663232.0,
-            "33": 15773663232.0,
-            "34": 15773663232.0,
-            "35": 15773663232.0,
-            "36": 15773663232.0,
-            "37": 15773663232.0,
-            "38": 15773663232.0,
-            "39": 15773663232.0,
-            "40": 15773663232.0,
-            "41": 15773663232.0,
-            "42": 15773663232.0,
-            "43": 15773663232.0,
-            "44": 15773663232.0,
-            "45": 15773663232.0,
-            "46": 15773663232.0,
-            "47": 15773663232.0,
-            "48": 15773663232.0,
-            "49": 15773663232.0,
-            "50": 15773663232.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 6.39505,
-            "2": 0.21516,
-            "3": 0.18624,
-            "4": 0.175,
-            "5": 0.17379,
-            "6": 0.17879,
-            "7": 0.17408,
-            "8": 0.17518,
-            "9": 0.17364,
-            "10": 0.17554,
-            "11": 0.17315,
-            "12": 0.17503,
-            "13": 0.17414,
-            "14": 0.17548,
-            "15": 0.17545,
-            "16": 0.17826,
-            "17": 0.17718,
-            "18": 0.19728,
-            "19": 0.18692,
-            "20": 0.17494,
-            "21": 0.17798,
-            "22": 0.19601,
-            "23": 0.19365,
-            "24": 0.17678,
-            "25": 0.17574,
-            "26": 0.17806,
-            "27": 0.17921,
-            "28": 0.18107,
-            "29": 0.17587,
-            "30": 0.18109,
-            "31": 0.18577,
-            "32": 0.1776,
-            "33": 0.17358,
-            "34": 0.18514,
-            "35": 0.18404,
-            "36": 0.18319,
-            "37": 0.17375,
-            "38": 0.19861,
-            "39": 0.18522,
-            "40": 0.17986,
-            "41": 0.18196,
-            "42": 0.17906,
-            "43": 0.1816,
-            "44": 0.17873,
-            "45": 0.1842,
-            "46": 0.18193,
-            "47": 0.18207,
-            "48": 0.18599,
-            "49": 0.17271,
-            "50": 0.18388
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp1_pp1/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp1_pp1/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index 7dbf0c3c806..00000000000
--- a/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp1_pp1/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 9.14877,
-            "2": 9.15171,
-            "3": 9.14691,
-            "4": 9.15346,
-            "5": 9.15057,
-            "6": 9.14683,
-            "7": 9.14378,
-            "8": 9.14363,
-            "9": 9.15069,
-            "10": 9.15231,
-            "11": 9.14609,
-            "12": 9.14125,
-            "13": 9.1414,
-            "14": 9.14248,
-            "15": 9.13419,
-            "16": 9.12601,
-            "17": 9.12407,
-            "18": 9.12053,
-            "19": 9.11789,
-            "20": 9.09777,
-            "21": 9.06948,
-            "22": 9.06985,
-            "23": 9.07079,
-            "24": 9.06043,
-            "25": 9.05505,
-            "26": 9.05713,
-            "27": 9.04089,
-            "28": 9.0186,
-            "29": 9.00353,
-            "30": 8.99697,
-            "31": 8.99484,
-            "32": 8.98416,
-            "33": 8.97763,
-            "34": 8.98617,
-            "35": 8.94993,
-            "36": 8.94557,
-            "37": 8.92133,
-            "38": 8.94104,
-            "39": 8.92482,
-            "40": 8.87122,
-            "41": 8.89627,
-            "42": 8.87601,
-            "43": 8.87414,
-            "44": 8.8411,
-            "45": 8.81228,
-            "46": 8.79564,
-            "47": 8.84576,
-            "48": 8.77191,
-            "49": 8.78047,
-            "50": 8.76196
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 3477955.0,
-            "2": 3392302.0,
-            "3": 3630021.0,
-            "4": 3532452.0,
-            "5": 3783960.0,
-            "6": 3584449.0,
-            "7": 3478372.0,
-            "8": 3414330.0,
-            "9": 3511649.0,
-            "10": 3544311.0,
-            "11": 3475468.0,
-            "12": 3518965.0,
-            "13": 3591786.0,
-            "14": 3549396.0,
-            "15": 3421163.0,
-            "16": 3383319.0,
-            "17": 3424120.0,
-            "18": 3509184.0,
-            "19": 3426107.0,
-            "20": 3465915.0,
-            "21": 3700118.0,
-            "22": 3474397.0,
-            "23": 3693474.0,
-            "24": 3405657.0,
-            "25": 3457588.0,
-            "26": 3479130.0,
-            "27": 3555371.0,
-            "28": 3496999.0,
-            "29": 3561842.0,
-            "30": 3708011.0,
-            "31": 3397663.0,
-            "32": 3467970.0,
-            "33": 3515742.0,
-            "34": 3501589.0,
-            "35": 3432484.0,
-            "36": 3453953.0,
-            "37": 3958777.0,
-            "38": 3488640.0,
-            "39": 3409958.0,
-            "40": 3614258.0,
-            "41": 3425709.0,
-            "42": 3643603.0,
-            "43": 3473029.0,
-            "44": 3448331.0,
-            "45": 3452202.0,
-            "46": 3585738.0,
-            "47": 3467386.0,
-            "48": 3462962.0,
-            "49": 3529813.0,
-            "50": 3412019.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 2431335424.0,
-            "2": 2431335424.0,
-            "3": 2431335424.0,
-            "4": 2431335424.0,
-            "5": 2431335424.0,
-            "6": 2431335424.0,
-            "7": 2431335424.0,
-            "8": 2431335424.0,
-            "9": 2431335424.0,
-            "10": 2431335424.0,
-            "11": 2431335424.0,
-            "12": 2431335424.0,
-            "13": 2431335424.0,
-            "14": 2431335424.0,
-            "15": 2431335424.0,
-            "16": 2431335424.0,
-            "17": 2431335424.0,
-            "18": 2431335424.0,
-            "19": 2431335424.0,
-            "20": 2431335424.0,
-            "21": 2431335424.0,
-            "22": 2431335424.0,
-            "23": 2431335424.0,
-            "24": 2431335424.0,
-            "25": 2431335424.0,
-            "26": 2431335424.0,
-            "27": 2431335424.0,
-            "28": 2431335424.0,
-            "29": 2431335424.0,
-            "30": 2431335424.0,
-            "31": 2431335424.0,
-            "32": 2431335424.0,
-            "33": 2431335424.0,
-            "34": 2431335424.0,
-            "35": 2431335424.0,
-            "36": 2431335424.0,
-            "37": 2431335424.0,
-            "38": 2431335424.0,
-            "39": 2431335424.0,
-            "40": 2431335424.0,
-            "41": 2431335424.0,
-            "42": 2431335424.0,
-            "43": 2431335424.0,
-            "44": 2431335424.0,
-            "45": 2431335424.0,
-            "46": 2431335424.0,
-            "47": 2431335424.0,
-            "48": 2431335424.0,
-            "49": 2431335424.0,
-            "50": 2431335424.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 14740086784.0,
-            "2": 15773663232.0,
-            "3": 15773663232.0,
-            "4": 15773663232.0,
-            "5": 15773663232.0,
-            "6": 15773663232.0,
-            "7": 15773663232.0,
-            "8": 15773663232.0,
-            "9": 15773663232.0,
-            "10": 15773663232.0,
-            "11": 15773663232.0,
-            "12": 15773663232.0,
-            "13": 15773663232.0,
-            "14": 15773663232.0,
-            "15": 15773663232.0,
-            "16": 15773663232.0,
-            "17": 15773663232.0,
-            "18": 15773663232.0,
-            "19": 15773663232.0,
-            "20": 15773663232.0,
-            "21": 15773663232.0,
-            "22": 15773663232.0,
-            "23": 15773663232.0,
-            "24": 15773663232.0,
-            "25": 15773663232.0,
-            "26": 15773663232.0,
-            "27": 15773663232.0,
-            "28": 15773663232.0,
-            "29": 15773663232.0,
-            "30": 15773663232.0,
-            "31": 15773663232.0,
-            "32": 15773663232.0,
-            "33": 15773663232.0,
-            "34": 15773663232.0,
-            "35": 15773663232.0,
-            "36": 15773663232.0,
-            "37": 15773663232.0,
-            "38": 15773663232.0,
-            "39": 15773663232.0,
-            "40": 15773663232.0,
-            "41": 15773663232.0,
-            "42": 15773663232.0,
-            "43": 15773663232.0,
-            "44": 15773663232.0,
-            "45": 15773663232.0,
-            "46": 15773663232.0,
-            "47": 15773663232.0,
-            "48": 15773663232.0,
-            "49": 15773663232.0,
-            "50": 15773663232.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 5.97454,
-            "2": 0.19297,
-            "3": 0.18331,
-            "4": 0.18419,
-            "5": 0.18099,
-            "6": 0.18354,
-            "7": 0.18332,
-            "8": 0.18477,
-            "9": 0.18391,
-            "10": 0.18412,
-            "11": 0.18154,
-            "12": 0.18441,
-            "13": 0.18338,
-            "14": 0.1859,
-            "15": 0.18316,
-            "16": 0.18298,
-            "17": 0.18167,
-            "18": 0.18385,
-            "19": 0.18358,
-            "20": 0.18325,
-            "21": 0.18392,
-            "22": 0.1826,
-            "23": 0.18266,
-            "24": 0.18333,
-            "25": 0.18413,
-            "26": 0.185,
-            "27": 0.18218,
-            "28": 0.18361,
-            "29": 0.18161,
-            "30": 0.18366,
-            "31": 0.18238,
-            "32": 0.18355,
-            "33": 0.18274,
-            "34": 0.18399,
-            "35": 0.18232,
-            "36": 0.18405,
-            "37": 0.18325,
-            "38": 0.18367,
-            "39": 0.18313,
-            "40": 0.18319,
-            "41": 0.18244,
-            "42": 0.18305,
-            "43": 0.18287,
-            "44": 0.18263,
-            "45": 0.18326,
-            "46": 0.18213,
-            "47": 0.18261,
-            "48": 0.18333,
-            "49": 0.18287,
-            "50": 0.18284
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp4_sp_cp2/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp4_sp_cp2/golden_values_dev_dgx_h100.json
index 5e195fce69e..bf52c8e8fd4 100644
--- a/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp4_sp_cp2/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp4_sp_cp2/golden_values_dev_dgx_h100.json
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 14.37901,
-            "2": 1.00945,
-            "3": 0.97719,
-            "4": 1.00246,
-            "5": 0.95207,
-            "6": 0.95,
-            "7": 0.94753,
-            "8": 0.94707,
-            "9": 0.94823,
-            "10": 0.95034,
-            "11": 0.97925,
-            "12": 0.97702,
-            "13": 0.94374,
-            "14": 1.21224,
-            "15": 0.94966,
-            "16": 0.9451,
-            "17": 0.94563,
-            "18": 0.94303,
-            "19": 1.24824,
-            "20": 0.9452,
-            "21": 0.97627,
-            "22": 0.98348,
-            "23": 1.30411,
-            "24": 0.94959,
-            "25": 0.94296,
-            "26": 0.95158,
-            "27": 0.94465,
-            "28": 0.94877,
-            "29": 0.94644,
-            "30": 0.94814,
-            "31": 1.31598,
-            "32": 0.98424,
-            "33": 1.24311,
-            "34": 0.94977,
-            "35": 1.30685,
-            "36": 0.94683,
-            "37": 0.95372,
-            "38": 0.94948,
-            "39": 0.95294,
-            "40": 1.3288,
-            "41": 0.97347,
-            "42": 0.9497,
-            "43": 1.30833,
-            "44": 0.94555,
-            "45": 0.94659,
-            "46": 0.95663,
-            "47": 0.95211,
-            "48": 0.95051,
-            "49": 0.94741,
-            "50": 0.96304
+            "1": 14.64684,
+            "2": 0.98193,
+            "3": 0.95861,
+            "4": 0.96167,
+            "5": 0.96222,
+            "6": 0.96444,
+            "7": 0.95334,
+            "8": 0.95675,
+            "9": 0.95004,
+            "10": 0.9526,
+            "11": 0.94782,
+            "12": 0.95256,
+            "13": 0.95466,
+            "14": 0.95046,
+            "15": 0.96366,
+            "16": 0.95156,
+            "17": 0.95425,
+            "18": 0.9544,
+            "19": 1.2298,
+            "20": 0.95303,
+            "21": 0.95634,
+            "22": 0.95632,
+            "23": 0.95424,
+            "24": 0.95464,
+            "25": 0.96269,
+            "26": 0.96616,
+            "27": 0.94874,
+            "28": 0.94988,
+            "29": 1.26385,
+            "30": 0.95465,
+            "31": 1.2033,
+            "32": 0.9571,
+            "33": 0.956,
+            "34": 0.95832,
+            "35": 1.32667,
+            "36": 0.95679,
+            "37": 0.95623,
+            "38": 0.96193,
+            "39": 0.96003,
+            "40": 1.25799,
+            "41": 0.95599,
+            "42": 0.95891,
+            "43": 1.55786,
+            "44": 0.96371,
+            "45": 0.96764,
+            "46": 0.95894,
+            "47": 0.96017,
+            "48": 0.95646,
+            "49": 0.961,
+            "50": 0.96278
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp4_sp_cp2/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp4_sp_cp2/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 40e463c4e4e..00000000000
--- a/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp4_sp_cp2/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 9.28651,
-            "2": 9.28395,
-            "3": 9.28076,
-            "4": 9.28861,
-            "5": 9.27695,
-            "6": 9.28726,
-            "7": 9.27836,
-            "8": 9.28267,
-            "9": 9.28528,
-            "10": 9.28293,
-            "11": 9.28342,
-            "12": 9.27384,
-            "13": 9.27126,
-            "14": 9.27209,
-            "15": 9.25309,
-            "16": 9.24492,
-            "17": 9.24857,
-            "18": 9.22951,
-            "19": 9.23151,
-            "20": 9.20817,
-            "21": 9.17046,
-            "22": 9.15049,
-            "23": 9.16842,
-            "24": 9.15079,
-            "25": 9.1444,
-            "26": 9.14727,
-            "27": 9.12295,
-            "28": 9.09719,
-            "29": 9.09388,
-            "30": 9.0783,
-            "31": 8.97175,
-            "32": 9.03158,
-            "33": 9.02021,
-            "34": 8.98662,
-            "35": 8.95924,
-            "36": 8.97139,
-            "37": 8.91443,
-            "38": 8.88795,
-            "39": 8.88883,
-            "40": 8.90642,
-            "41": 8.81811,
-            "42": 8.87405,
-            "43": 8.85666,
-            "44": 8.81697,
-            "45": 8.81379,
-            "46": 8.84457,
-            "47": 8.73721,
-            "48": 8.66931,
-            "49": 8.70107,
-            "50": 8.73494
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 5959400.0,
-            "2": 6553837.0,
-            "3": 7313493.0,
-            "4": 6377142.0,
-            "5": 6498093.0,
-            "6": 7151947.0,
-            "7": 6210401.0,
-            "8": 6334645.0,
-            "9": 6624584.0,
-            "10": 6529058.0,
-            "11": 7466715.0,
-            "12": 6471579.0,
-            "13": 6003497.0,
-            "14": 8071952.0,
-            "15": 6530023.0,
-            "16": 7526922.0,
-            "17": 6034909.0,
-            "18": 6289605.0,
-            "19": 6162573.0,
-            "20": 6527801.0,
-            "21": 6981914.0,
-            "22": 7132792.0,
-            "23": 5928465.0,
-            "24": 6210239.0,
-            "25": 6993035.0,
-            "26": 6471579.0,
-            "27": 6355357.0,
-            "28": 6877112.0,
-            "29": 6380110.0,
-            "30": 6468659.0,
-            "31": 8165130.0,
-            "32": 6765448.0,
-            "33": 6355561.0,
-            "34": 6662237.0,
-            "35": 7065192.0,
-            "36": 6076915.0,
-            "37": 7785518.0,
-            "38": 6727009.0,
-            "39": 7315902.0,
-            "40": 6555154.0,
-            "41": 7314617.0,
-            "42": 6591869.0,
-            "43": 6928017.0,
-            "44": 7274417.0,
-            "45": 6680008.0,
-            "46": 6232372.0,
-            "47": 6496696.0,
-            "48": 6809696.0,
-            "49": 6753491.0,
-            "50": 6238169.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1653820416.0,
-            "2": 1653820416.0,
-            "3": 1653820416.0,
-            "4": 1653820416.0,
-            "5": 1653820416.0,
-            "6": 1653820416.0,
-            "7": 1653820416.0,
-            "8": 1653820416.0,
-            "9": 1653820416.0,
-            "10": 1653820416.0,
-            "11": 1653820416.0,
-            "12": 1653820416.0,
-            "13": 1653820416.0,
-            "14": 1653820416.0,
-            "15": 1653820416.0,
-            "16": 1653820416.0,
-            "17": 1653820416.0,
-            "18": 1653820416.0,
-            "19": 1653820416.0,
-            "20": 1653820416.0,
-            "21": 1653820416.0,
-            "22": 1653820416.0,
-            "23": 1653820416.0,
-            "24": 1653820416.0,
-            "25": 1653820416.0,
-            "26": 1653820416.0,
-            "27": 1653820416.0,
-            "28": 1653820416.0,
-            "29": 1653820416.0,
-            "30": 1653820416.0,
-            "31": 1653820416.0,
-            "32": 1653820416.0,
-            "33": 1653820416.0,
-            "34": 1653820416.0,
-            "35": 1653820416.0,
-            "36": 1653820416.0,
-            "37": 1653820416.0,
-            "38": 1653820416.0,
-            "39": 1653820416.0,
-            "40": 1653820416.0,
-            "41": 1653820416.0,
-            "42": 1653820416.0,
-            "43": 1653820416.0,
-            "44": 1653820416.0,
-            "45": 1653820416.0,
-            "46": 1653820416.0,
-            "47": 1653820416.0,
-            "48": 1653820416.0,
-            "49": 1653820416.0,
-            "50": 1653820416.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1653824512.0,
-            "2": 2142515200.0,
-            "3": 2142515200.0,
-            "4": 2142515200.0,
-            "5": 2142515200.0,
-            "6": 2142515200.0,
-            "7": 2142515200.0,
-            "8": 2142515200.0,
-            "9": 2142515200.0,
-            "10": 2142515200.0,
-            "11": 2142515200.0,
-            "12": 2142515200.0,
-            "13": 2142515200.0,
-            "14": 2142515200.0,
-            "15": 2142515200.0,
-            "16": 2142515200.0,
-            "17": 2142515200.0,
-            "18": 2142515200.0,
-            "19": 2142515200.0,
-            "20": 2142515200.0,
-            "21": 2142515200.0,
-            "22": 2142515200.0,
-            "23": 2142515200.0,
-            "24": 2142515200.0,
-            "25": 2142515200.0,
-            "26": 2142515200.0,
-            "27": 2142515200.0,
-            "28": 2142515200.0,
-            "29": 2142515200.0,
-            "30": 2142515200.0,
-            "31": 2142515200.0,
-            "32": 2142515200.0,
-            "33": 2142515200.0,
-            "34": 2142515200.0,
-            "35": 2142515200.0,
-            "36": 2142515200.0,
-            "37": 2142515200.0,
-            "38": 2142515200.0,
-            "39": 2142515200.0,
-            "40": 2142515200.0,
-            "41": 2142515200.0,
-            "42": 2142515200.0,
-            "43": 2142515200.0,
-            "44": 2142515200.0,
-            "45": 2142515200.0,
-            "46": 2142515200.0,
-            "47": 2142515200.0,
-            "48": 2142515200.0,
-            "49": 2142515200.0,
-            "50": 2142515200.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 15.02389,
-            "2": 0.90938,
-            "3": 0.833,
-            "4": 0.83139,
-            "5": 0.87938,
-            "6": 0.8436,
-            "7": 0.84341,
-            "8": 0.84254,
-            "9": 0.83392,
-            "10": 0.8484,
-            "11": 0.84151,
-            "12": 0.84392,
-            "13": 0.84466,
-            "14": 0.85987,
-            "15": 0.85033,
-            "16": 0.84631,
-            "17": 0.86049,
-            "18": 0.84475,
-            "19": 1.16176,
-            "20": 0.84338,
-            "21": 0.8904,
-            "22": 0.85197,
-            "23": 1.15742,
-            "24": 0.84195,
-            "25": 0.84346,
-            "26": 0.84406,
-            "27": 0.84866,
-            "28": 0.87098,
-            "29": 0.83524,
-            "30": 1.14004,
-            "31": 1.16138,
-            "32": 0.8533,
-            "33": 0.84361,
-            "34": 0.84484,
-            "35": 0.84276,
-            "36": 0.83752,
-            "37": 0.84209,
-            "38": 0.84471,
-            "39": 0.8405,
-            "40": 1.1684,
-            "41": 0.84052,
-            "42": 0.83772,
-            "43": 1.16777,
-            "44": 1.14427,
-            "45": 0.84262,
-            "46": 1.19422,
-            "47": 0.84418,
-            "48": 0.85685,
-            "49": 0.84021,
-            "50": 0.84726
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp4_sp_cp2/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp4_sp_cp2/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index bf52c8e8fd4..00000000000
--- a/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp4_sp_cp2/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 9.28651,
-            "2": 9.28395,
-            "3": 9.28076,
-            "4": 9.28861,
-            "5": 9.27695,
-            "6": 9.28726,
-            "7": 9.27836,
-            "8": 9.28267,
-            "9": 9.28528,
-            "10": 9.28293,
-            "11": 9.28342,
-            "12": 9.27384,
-            "13": 9.27126,
-            "14": 9.27209,
-            "15": 9.25309,
-            "16": 9.24492,
-            "17": 9.24857,
-            "18": 9.22951,
-            "19": 9.23151,
-            "20": 9.20817,
-            "21": 9.17046,
-            "22": 9.15049,
-            "23": 9.16842,
-            "24": 9.15079,
-            "25": 9.1444,
-            "26": 9.14727,
-            "27": 9.12295,
-            "28": 9.09719,
-            "29": 9.09388,
-            "30": 9.0783,
-            "31": 8.97175,
-            "32": 9.03158,
-            "33": 9.02021,
-            "34": 8.98662,
-            "35": 8.95924,
-            "36": 8.97139,
-            "37": 8.91443,
-            "38": 8.88795,
-            "39": 8.88883,
-            "40": 8.90642,
-            "41": 8.81811,
-            "42": 8.87405,
-            "43": 8.85666,
-            "44": 8.81697,
-            "45": 8.81379,
-            "46": 8.84457,
-            "47": 8.73721,
-            "48": 8.66931,
-            "49": 8.70107,
-            "50": 8.73494
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 5959400.0,
-            "2": 6553837.0,
-            "3": 7313493.0,
-            "4": 6377142.0,
-            "5": 6498093.0,
-            "6": 7151947.0,
-            "7": 6210401.0,
-            "8": 6334645.0,
-            "9": 6624584.0,
-            "10": 6529058.0,
-            "11": 7466715.0,
-            "12": 6471579.0,
-            "13": 6003497.0,
-            "14": 8071952.0,
-            "15": 6530023.0,
-            "16": 7526922.0,
-            "17": 6034909.0,
-            "18": 6289605.0,
-            "19": 6162573.0,
-            "20": 6527801.0,
-            "21": 6981914.0,
-            "22": 7132792.0,
-            "23": 5928465.0,
-            "24": 6210239.0,
-            "25": 6993035.0,
-            "26": 6471579.0,
-            "27": 6355357.0,
-            "28": 6877112.0,
-            "29": 6380110.0,
-            "30": 6468659.0,
-            "31": 8165130.0,
-            "32": 6765448.0,
-            "33": 6355561.0,
-            "34": 6662237.0,
-            "35": 7065192.0,
-            "36": 6076915.0,
-            "37": 7785518.0,
-            "38": 6727009.0,
-            "39": 7315902.0,
-            "40": 6555154.0,
-            "41": 7314617.0,
-            "42": 6591869.0,
-            "43": 6928017.0,
-            "44": 7274417.0,
-            "45": 6680008.0,
-            "46": 6232372.0,
-            "47": 6496696.0,
-            "48": 6809696.0,
-            "49": 6753491.0,
-            "50": 6238169.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1653820416.0,
-            "2": 1653820416.0,
-            "3": 1653820416.0,
-            "4": 1653820416.0,
-            "5": 1653820416.0,
-            "6": 1653820416.0,
-            "7": 1653820416.0,
-            "8": 1653820416.0,
-            "9": 1653820416.0,
-            "10": 1653820416.0,
-            "11": 1653820416.0,
-            "12": 1653820416.0,
-            "13": 1653820416.0,
-            "14": 1653820416.0,
-            "15": 1653820416.0,
-            "16": 1653820416.0,
-            "17": 1653820416.0,
-            "18": 1653820416.0,
-            "19": 1653820416.0,
-            "20": 1653820416.0,
-            "21": 1653820416.0,
-            "22": 1653820416.0,
-            "23": 1653820416.0,
-            "24": 1653820416.0,
-            "25": 1653820416.0,
-            "26": 1653820416.0,
-            "27": 1653820416.0,
-            "28": 1653820416.0,
-            "29": 1653820416.0,
-            "30": 1653820416.0,
-            "31": 1653820416.0,
-            "32": 1653820416.0,
-            "33": 1653820416.0,
-            "34": 1653820416.0,
-            "35": 1653820416.0,
-            "36": 1653820416.0,
-            "37": 1653820416.0,
-            "38": 1653820416.0,
-            "39": 1653820416.0,
-            "40": 1653820416.0,
-            "41": 1653820416.0,
-            "42": 1653820416.0,
-            "43": 1653820416.0,
-            "44": 1653820416.0,
-            "45": 1653820416.0,
-            "46": 1653820416.0,
-            "47": 1653820416.0,
-            "48": 1653820416.0,
-            "49": 1653820416.0,
-            "50": 1653820416.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1653824512.0,
-            "2": 2142515200.0,
-            "3": 2142515200.0,
-            "4": 2142515200.0,
-            "5": 2142515200.0,
-            "6": 2142515200.0,
-            "7": 2142515200.0,
-            "8": 2142515200.0,
-            "9": 2142515200.0,
-            "10": 2142515200.0,
-            "11": 2142515200.0,
-            "12": 2142515200.0,
-            "13": 2142515200.0,
-            "14": 2142515200.0,
-            "15": 2142515200.0,
-            "16": 2142515200.0,
-            "17": 2142515200.0,
-            "18": 2142515200.0,
-            "19": 2142515200.0,
-            "20": 2142515200.0,
-            "21": 2142515200.0,
-            "22": 2142515200.0,
-            "23": 2142515200.0,
-            "24": 2142515200.0,
-            "25": 2142515200.0,
-            "26": 2142515200.0,
-            "27": 2142515200.0,
-            "28": 2142515200.0,
-            "29": 2142515200.0,
-            "30": 2142515200.0,
-            "31": 2142515200.0,
-            "32": 2142515200.0,
-            "33": 2142515200.0,
-            "34": 2142515200.0,
-            "35": 2142515200.0,
-            "36": 2142515200.0,
-            "37": 2142515200.0,
-            "38": 2142515200.0,
-            "39": 2142515200.0,
-            "40": 2142515200.0,
-            "41": 2142515200.0,
-            "42": 2142515200.0,
-            "43": 2142515200.0,
-            "44": 2142515200.0,
-            "45": 2142515200.0,
-            "46": 2142515200.0,
-            "47": 2142515200.0,
-            "48": 2142515200.0,
-            "49": 2142515200.0,
-            "50": 2142515200.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 14.64684,
-            "2": 0.98193,
-            "3": 0.95861,
-            "4": 0.96167,
-            "5": 0.96222,
-            "6": 0.96444,
-            "7": 0.95334,
-            "8": 0.95675,
-            "9": 0.95004,
-            "10": 0.9526,
-            "11": 0.94782,
-            "12": 0.95256,
-            "13": 0.95466,
-            "14": 0.95046,
-            "15": 0.96366,
-            "16": 0.95156,
-            "17": 0.95425,
-            "18": 0.9544,
-            "19": 1.2298,
-            "20": 0.95303,
-            "21": 0.95634,
-            "22": 0.95632,
-            "23": 0.95424,
-            "24": 0.95464,
-            "25": 0.96269,
-            "26": 0.96616,
-            "27": 0.94874,
-            "28": 0.94988,
-            "29": 1.26385,
-            "30": 0.95465,
-            "31": 1.2033,
-            "32": 0.9571,
-            "33": 0.956,
-            "34": 0.95832,
-            "35": 1.32667,
-            "36": 0.95679,
-            "37": 0.95623,
-            "38": 0.96193,
-            "39": 0.96003,
-            "40": 1.25799,
-            "41": 0.95599,
-            "42": 0.95891,
-            "43": 1.55786,
-            "44": 0.96371,
-            "45": 0.96764,
-            "46": 0.95894,
-            "47": 0.96017,
-            "48": 0.95646,
-            "49": 0.961,
-            "50": 0.96278
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_11b_mcore_tp4_pp1/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/t5/t5_11b_mcore_tp4_pp1/golden_values_dev_dgx_h100.json
index 0bff8d085b5..45c06ac2f7e 100644
--- a/tests/functional_tests/test_cases/t5/t5_11b_mcore_tp4_pp1/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/t5/t5_11b_mcore_tp4_pp1/golden_values_dev_dgx_h100.json
@@ -132,31 +132,31 @@
         "end_step": 25,
         "step_interval": 1,
         "values": {
-            "1": 10.24757,
-            "2": 0.4815,
-            "3": 0.41556,
-            "4": 0.40564,
-            "5": 0.40743,
-            "6": 0.40813,
-            "7": 0.42484,
-            "8": 0.41261,
-            "9": 0.40523,
-            "10": 0.41064,
-            "11": 0.40795,
-            "12": 0.409,
-            "13": 0.41219,
-            "14": 0.41524,
-            "15": 0.41267,
-            "16": 0.40783,
-            "17": 0.40886,
-            "18": 0.41321,
-            "19": 0.40795,
-            "20": 0.41032,
-            "21": 0.41828,
-            "22": 0.40867,
-            "23": 0.42317,
-            "24": 0.40771,
-            "25": 0.4176
+            "1": 12.25468,
+            "2": 0.47853,
+            "3": 0.41459,
+            "4": 0.41066,
+            "5": 0.4125,
+            "6": 0.42243,
+            "7": 0.40926,
+            "8": 0.41832,
+            "9": 0.4068,
+            "10": 0.41071,
+            "11": 0.41068,
+            "12": 0.41187,
+            "13": 0.42064,
+            "14": 0.4228,
+            "15": 0.41026,
+            "16": 0.81409,
+            "17": 0.41651,
+            "18": 0.41416,
+            "19": 0.41418,
+            "20": 0.41217,
+            "21": 0.42084,
+            "22": 0.4131,
+            "23": 0.41106,
+            "24": 0.41518,
+            "25": 0.41106
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_11b_mcore_tp4_pp1/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/t5/t5_11b_mcore_tp4_pp1/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index a5fc1a5f4c5..00000000000
--- a/tests/functional_tests/test_cases/t5/t5_11b_mcore_tp4_pp1/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,162 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 25,
-        "step_interval": 1,
-        "values": {
-            "1": 10.74903,
-            "2": 10.75924,
-            "3": 16.15622,
-            "4": 20.1728,
-            "5": 11.07413,
-            "6": 10.29087,
-            "7": 10.31369,
-            "8": 10.31557,
-            "9": 9.68992,
-            "10": 9.25112,
-            "11": 9.43376,
-            "12": 9.8267,
-            "13": 8.88334,
-            "14": 8.49023,
-            "15": 8.79113,
-            "16": 7.95739,
-            "17": 7.70005,
-            "18": 7.81826,
-            "19": 8.21562,
-            "20": 8.16452,
-            "21": 7.833,
-            "22": 7.71899,
-            "23": 7.88724,
-            "24": 7.70093,
-            "25": 7.78994
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 25,
-        "step_interval": 1,
-        "values": {
-            "1": 245867.0,
-            "2": 256817.0,
-            "3": 248438.0,
-            "4": 233541.0,
-            "5": 251594.0,
-            "6": 259588.0,
-            "7": 256938.0,
-            "8": 237612.0,
-            "9": 241154.0,
-            "10": 252461.0,
-            "11": 288146.0,
-            "12": 248712.0,
-            "13": 241371.0,
-            "14": 228365.0,
-            "15": 261948.0,
-            "16": 237032.0,
-            "17": 249760.0,
-            "18": 251590.0,
-            "19": 257104.0,
-            "20": 248292.0,
-            "21": 231805.0,
-            "22": 223805.0,
-            "23": 247959.0,
-            "24": 250798.0,
-            "25": 237325.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 25,
-        "step_interval": 1,
-        "values": {
-            "1": 40735711232.0,
-            "2": 40735711232.0,
-            "3": 40735711232.0,
-            "4": 40735711232.0,
-            "5": 40735711232.0,
-            "6": 40735711232.0,
-            "7": 40735711232.0,
-            "8": 40735711232.0,
-            "9": 40735711232.0,
-            "10": 40735711232.0,
-            "11": 40735711232.0,
-            "12": 40735711232.0,
-            "13": 40735711232.0,
-            "14": 40735711232.0,
-            "15": 40735711232.0,
-            "16": 40735711232.0,
-            "17": 40735711232.0,
-            "18": 40735711232.0,
-            "19": 40735711232.0,
-            "20": 40735711232.0,
-            "21": 40735711232.0,
-            "22": 40735711232.0,
-            "23": 40735711232.0,
-            "24": 40735711232.0,
-            "25": 40735711232.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 25,
-        "step_interval": 1,
-        "values": {
-            "1": 40735711232.0,
-            "2": 44991991808.0,
-            "3": 44993564672.0,
-            "4": 44993564672.0,
-            "5": 44993564672.0,
-            "6": 44993564672.0,
-            "7": 44993564672.0,
-            "8": 44993564672.0,
-            "9": 44993564672.0,
-            "10": 44993564672.0,
-            "11": 44993564672.0,
-            "12": 44993564672.0,
-            "13": 44993564672.0,
-            "14": 44993564672.0,
-            "15": 44993564672.0,
-            "16": 44993564672.0,
-            "17": 44993564672.0,
-            "18": 44993564672.0,
-            "19": 44993564672.0,
-            "20": 44993564672.0,
-            "21": 44993564672.0,
-            "22": 44993564672.0,
-            "23": 44993564672.0,
-            "24": 44993564672.0,
-            "25": 44993564672.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 25,
-        "step_interval": 1,
-        "values": {
-            "1": 13.38163,
-            "2": 0.76932,
-            "3": 0.59621,
-            "4": 0.3807,
-            "5": 0.37959,
-            "6": 0.38757,
-            "7": 0.38242,
-            "8": 0.39662,
-            "9": 0.38425,
-            "10": 0.38671,
-            "11": 0.3878,
-            "12": 0.37911,
-            "13": 0.38138,
-            "14": 0.38215,
-            "15": 0.37904,
-            "16": 0.3847,
-            "17": 0.38241,
-            "18": 0.38681,
-            "19": 0.39003,
-            "20": 0.37797,
-            "21": 0.3854,
-            "22": 0.71416,
-            "23": 0.38609,
-            "24": 0.37862,
-            "25": 0.37919
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_11b_mcore_tp4_pp1/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/t5/t5_11b_mcore_tp4_pp1/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index 45c06ac2f7e..00000000000
--- a/tests/functional_tests/test_cases/t5/t5_11b_mcore_tp4_pp1/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,162 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 25,
-        "step_interval": 1,
-        "values": {
-            "1": 10.74903,
-            "2": 10.75924,
-            "3": 16.15622,
-            "4": 20.1728,
-            "5": 11.07413,
-            "6": 10.29087,
-            "7": 10.31369,
-            "8": 10.31557,
-            "9": 9.68992,
-            "10": 9.25112,
-            "11": 9.43376,
-            "12": 9.8267,
-            "13": 8.88334,
-            "14": 8.49023,
-            "15": 8.79113,
-            "16": 7.95739,
-            "17": 7.70005,
-            "18": 7.81826,
-            "19": 8.21562,
-            "20": 8.16452,
-            "21": 7.833,
-            "22": 7.71899,
-            "23": 7.88724,
-            "24": 7.70093,
-            "25": 7.78994
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 25,
-        "step_interval": 1,
-        "values": {
-            "1": 245867.0,
-            "2": 256817.0,
-            "3": 248438.0,
-            "4": 233541.0,
-            "5": 251594.0,
-            "6": 259588.0,
-            "7": 256938.0,
-            "8": 237612.0,
-            "9": 241154.0,
-            "10": 252461.0,
-            "11": 288146.0,
-            "12": 248712.0,
-            "13": 241371.0,
-            "14": 228365.0,
-            "15": 261948.0,
-            "16": 237032.0,
-            "17": 249760.0,
-            "18": 251590.0,
-            "19": 257104.0,
-            "20": 248292.0,
-            "21": 231805.0,
-            "22": 223805.0,
-            "23": 247959.0,
-            "24": 250798.0,
-            "25": 237325.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 25,
-        "step_interval": 1,
-        "values": {
-            "1": 40735711232.0,
-            "2": 40735711232.0,
-            "3": 40735711232.0,
-            "4": 40735711232.0,
-            "5": 40735711232.0,
-            "6": 40735711232.0,
-            "7": 40735711232.0,
-            "8": 40735711232.0,
-            "9": 40735711232.0,
-            "10": 40735711232.0,
-            "11": 40735711232.0,
-            "12": 40735711232.0,
-            "13": 40735711232.0,
-            "14": 40735711232.0,
-            "15": 40735711232.0,
-            "16": 40735711232.0,
-            "17": 40735711232.0,
-            "18": 40735711232.0,
-            "19": 40735711232.0,
-            "20": 40735711232.0,
-            "21": 40735711232.0,
-            "22": 40735711232.0,
-            "23": 40735711232.0,
-            "24": 40735711232.0,
-            "25": 40735711232.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 25,
-        "step_interval": 1,
-        "values": {
-            "1": 40735711232.0,
-            "2": 44991991808.0,
-            "3": 44993564672.0,
-            "4": 44993564672.0,
-            "5": 44993564672.0,
-            "6": 44993564672.0,
-            "7": 44993564672.0,
-            "8": 44993564672.0,
-            "9": 44993564672.0,
-            "10": 44993564672.0,
-            "11": 44993564672.0,
-            "12": 44993564672.0,
-            "13": 44993564672.0,
-            "14": 44993564672.0,
-            "15": 44993564672.0,
-            "16": 44993564672.0,
-            "17": 44993564672.0,
-            "18": 44993564672.0,
-            "19": 44993564672.0,
-            "20": 44993564672.0,
-            "21": 44993564672.0,
-            "22": 44993564672.0,
-            "23": 44993564672.0,
-            "24": 44993564672.0,
-            "25": 44993564672.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 25,
-        "step_interval": 1,
-        "values": {
-            "1": 12.25468,
-            "2": 0.47853,
-            "3": 0.41459,
-            "4": 0.41066,
-            "5": 0.4125,
-            "6": 0.42243,
-            "7": 0.40926,
-            "8": 0.41832,
-            "9": 0.4068,
-            "10": 0.41071,
-            "11": 0.41068,
-            "12": 0.41187,
-            "13": 0.42064,
-            "14": 0.4228,
-            "15": 0.41026,
-            "16": 0.81409,
-            "17": 0.41651,
-            "18": 0.41416,
-            "19": 0.41418,
-            "20": 0.41217,
-            "21": 0.42084,
-            "22": 0.4131,
-            "23": 0.41106,
-            "24": 0.41518,
-            "25": 0.41106
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_h100.json
index 9a9cb7962ee..8809a47cd54 100644
--- a/tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_h100.json
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 7.18555,
-            "2": 0.22912,
-            "3": 0.19495,
-            "4": 0.19292,
-            "5": 0.1933,
-            "6": 0.20082,
-            "7": 0.1898,
-            "8": 0.19078,
-            "9": 0.19631,
-            "10": 0.18961,
-            "11": 0.19602,
-            "12": 0.19712,
-            "13": 0.19248,
-            "14": 0.19302,
-            "15": 0.19445,
-            "16": 0.19515,
-            "17": 0.19565,
-            "18": 0.18839,
-            "19": 0.19044,
-            "20": 0.1878,
-            "21": 0.19199,
-            "22": 0.19051,
-            "23": 0.19216,
-            "24": 0.19009,
-            "25": 0.18449,
-            "26": 0.19206,
-            "27": 0.19,
-            "28": 0.19154,
-            "29": 0.19019,
-            "30": 0.18961,
-            "31": 0.18739,
-            "32": 0.19441,
-            "33": 0.18956,
-            "34": 0.19188,
-            "35": 0.20225,
-            "36": 0.1956,
-            "37": 0.20085,
-            "38": 0.20338,
-            "39": 0.19512,
-            "40": 0.20945,
-            "41": 0.20775,
-            "42": 0.20695,
-            "43": 0.20502,
-            "44": 0.19536,
-            "45": 0.1972,
-            "46": 0.19693,
-            "47": 0.2056,
-            "48": 0.19367,
-            "49": 0.19288,
-            "50": 0.19187,
-            "51": 0.19233,
-            "52": 0.19557,
-            "53": 0.19068,
-            "54": 0.18458,
-            "55": 0.18565,
-            "56": 0.18636,
-            "57": 0.19313,
-            "58": 0.18633,
-            "59": 0.18858,
-            "60": 0.18486,
-            "61": 0.18799,
-            "62": 0.18531,
-            "63": 0.19385,
-            "64": 0.18893,
-            "65": 0.1968,
-            "66": 0.19472,
-            "67": 0.19267,
-            "68": 0.19586,
-            "69": 0.22272,
-            "70": 0.22071,
-            "71": 0.18794,
-            "72": 0.19924,
-            "73": 0.19888,
-            "74": 0.22693,
-            "75": 0.20741,
-            "76": 0.19831,
-            "77": 0.20398,
-            "78": 0.19269,
-            "79": 0.19066,
-            "80": 0.18543,
-            "81": 0.18666,
-            "82": 0.18559,
-            "83": 0.19153,
-            "84": 0.18527,
-            "85": 0.18623,
-            "86": 0.48843,
-            "87": 0.18991,
-            "88": 0.18251,
-            "89": 0.18473,
-            "90": 0.18511,
-            "91": 0.19021,
-            "92": 0.19055,
-            "93": 0.18545,
-            "94": 0.1853,
-            "95": 0.18396,
-            "96": 0.1848,
-            "97": 0.19407,
-            "98": 0.18533,
-            "99": 0.18593,
-            "100": 0.48771
+            "1": 7.22025,
+            "2": 0.31576,
+            "3": 0.19278,
+            "4": 0.19432,
+            "5": 0.18909,
+            "6": 0.19307,
+            "7": 0.18922,
+            "8": 0.19506,
+            "9": 0.18834,
+            "10": 0.19233,
+            "11": 0.18825,
+            "12": 0.19571,
+            "13": 0.19081,
+            "14": 0.19613,
+            "15": 0.18954,
+            "16": 0.18825,
+            "17": 0.18583,
+            "18": 0.18933,
+            "19": 0.1896,
+            "20": 0.19136,
+            "21": 0.18842,
+            "22": 0.19581,
+            "23": 0.18752,
+            "24": 0.19277,
+            "25": 0.18759,
+            "26": 0.19405,
+            "27": 0.18784,
+            "28": 0.18762,
+            "29": 0.19232,
+            "30": 0.18798,
+            "31": 0.18713,
+            "32": 0.18948,
+            "33": 0.18968,
+            "34": 0.19011,
+            "35": 0.18907,
+            "36": 0.18983,
+            "37": 0.18857,
+            "38": 0.18728,
+            "39": 0.18835,
+            "40": 0.18777,
+            "41": 0.188,
+            "42": 0.18818,
+            "43": 0.18602,
+            "44": 0.18972,
+            "45": 0.19276,
+            "46": 0.18816,
+            "47": 0.18794,
+            "48": 0.19299,
+            "49": 0.19241,
+            "50": 0.18805,
+            "51": 0.18895,
+            "52": 0.19459,
+            "53": 0.18821,
+            "54": 0.18597,
+            "55": 0.189,
+            "56": 0.18748,
+            "57": 0.18709,
+            "58": 0.19127,
+            "59": 0.19097,
+            "60": 0.18702,
+            "61": 0.18725,
+            "62": 0.18762,
+            "63": 0.19407,
+            "64": 0.19411,
+            "65": 0.20071,
+            "66": 0.19555,
+            "67": 0.22543,
+            "68": 0.21724,
+            "69": 0.22635,
+            "70": 0.52922,
+            "71": 0.19086,
+            "72": 0.19899,
+            "73": 0.51667,
+            "74": 0.20138,
+            "75": 0.19507,
+            "76": 0.24987,
+            "77": 0.22838,
+            "78": 0.51523,
+            "79": 0.19126,
+            "80": 0.18911,
+            "81": 0.19269,
+            "82": 0.18816,
+            "83": 0.18902,
+            "84": 0.18942,
+            "85": 0.19004,
+            "86": 0.50868,
+            "87": 0.19274,
+            "88": 0.18813,
+            "89": 0.19169,
+            "90": 0.50854,
+            "91": 0.1924,
+            "92": 0.18906,
+            "93": 0.19016,
+            "94": 0.1902,
+            "95": 0.19338,
+            "96": 0.51468,
+            "97": 0.19597,
+            "98": 0.19147,
+            "99": 0.19626,
+            "100": 0.18852
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index c1e5927389e..00000000000
--- a/tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.3313,
-            "2": 10.35273,
-            "3": 9.79594,
-            "4": 9.60954,
-            "5": 9.42267,
-            "6": 9.45134,
-            "7": 9.34339,
-            "8": 9.27517,
-            "9": 9.09683,
-            "10": 9.07209,
-            "11": 8.8835,
-            "12": 8.83706,
-            "13": 8.86832,
-            "14": 8.71037,
-            "15": 8.68183,
-            "16": 8.56139,
-            "17": 8.52303,
-            "18": 8.43962,
-            "19": 8.40445,
-            "20": 8.29516,
-            "21": 8.27051,
-            "22": 8.17907,
-            "23": 8.12669,
-            "24": 8.14854,
-            "25": 7.99081,
-            "26": 8.12208,
-            "27": 7.90451,
-            "28": 7.98651,
-            "29": 7.80842,
-            "30": 7.86913,
-            "31": 7.83557,
-            "32": 7.7216,
-            "33": 7.80364,
-            "34": 7.59209,
-            "35": 7.68371,
-            "36": 7.53869,
-            "37": 7.47624,
-            "38": 7.51683,
-            "39": 7.49967,
-            "40": 7.51717,
-            "41": 7.43167,
-            "42": 7.40089,
-            "43": 7.4492,
-            "44": 7.3892,
-            "45": 7.3802,
-            "46": 7.29486,
-            "47": 7.44839,
-            "48": 7.282,
-            "49": 7.34647,
-            "50": 7.17125,
-            "51": 7.37351,
-            "52": 7.13362,
-            "53": 7.11248,
-            "54": 7.23395,
-            "55": 7.14784,
-            "56": 7.2278,
-            "57": 7.33273,
-            "58": 6.99464,
-            "59": 7.11597,
-            "60": 7.13216,
-            "61": 7.10561,
-            "62": 7.26519,
-            "63": 7.14764,
-            "64": 7.08702,
-            "65": 6.98658,
-            "66": 7.04733,
-            "67": 7.04745,
-            "68": 7.14076,
-            "69": 7.24347,
-            "70": 7.05974,
-            "71": 6.89358,
-            "72": 6.99793,
-            "73": 6.97928,
-            "74": 6.91973,
-            "75": 7.05295,
-            "76": 6.96054,
-            "77": 7.07939,
-            "78": 7.0137,
-            "79": 6.88344,
-            "80": 6.93032,
-            "81": 6.96568,
-            "82": 7.05273,
-            "83": 6.98785,
-            "84": 7.00434,
-            "85": 6.84596,
-            "86": 7.03651,
-            "87": 6.96347,
-            "88": 6.91343,
-            "89": 6.80657,
-            "90": 7.23629,
-            "91": 6.70068,
-            "92": 7.05694,
-            "93": 6.89292,
-            "94": 7.05848,
-            "95": 6.84802,
-            "96": 6.9679,
-            "97": 6.9429,
-            "98": 6.87432,
-            "99": 7.01828,
-            "100": 6.98491
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 43340.0,
-            "2": 44096.0,
-            "3": 44784.0,
-            "4": 42468.0,
-            "5": 45416.0,
-            "6": 40967.0,
-            "7": 43183.0,
-            "8": 45463.0,
-            "9": 42562.0,
-            "10": 45358.0,
-            "11": 44024.0,
-            "12": 44607.0,
-            "13": 43921.0,
-            "14": 46213.0,
-            "15": 43945.0,
-            "16": 41749.0,
-            "17": 43868.0,
-            "18": 44723.0,
-            "19": 42609.0,
-            "20": 44784.0,
-            "21": 44794.0,
-            "22": 41882.0,
-            "23": 45474.0,
-            "24": 43082.0,
-            "25": 42696.0,
-            "26": 43952.0,
-            "27": 46262.0,
-            "28": 46418.0,
-            "29": 46154.0,
-            "30": 44052.0,
-            "31": 41259.0,
-            "32": 43443.0,
-            "33": 45485.0,
-            "34": 43346.0,
-            "35": 43279.0,
-            "36": 42498.0,
-            "37": 40653.0,
-            "38": 42538.0,
-            "39": 44772.0,
-            "40": 43278.0,
-            "41": 44664.0,
-            "42": 43297.0,
-            "43": 45448.0,
-            "44": 44622.0,
-            "45": 43354.0,
-            "46": 43931.0,
-            "47": 42505.0,
-            "48": 44726.0,
-            "49": 43168.0,
-            "50": 43402.0,
-            "51": 41200.0,
-            "52": 43884.0,
-            "53": 43946.0,
-            "54": 41916.0,
-            "55": 43925.0,
-            "56": 43252.0,
-            "57": 42636.0,
-            "58": 43941.0,
-            "59": 44619.0,
-            "60": 41400.0,
-            "61": 39750.0,
-            "62": 44764.0,
-            "63": 44671.0,
-            "64": 45375.0,
-            "65": 44753.0,
-            "66": 45404.0,
-            "67": 43154.0,
-            "68": 42551.0,
-            "69": 43844.0,
-            "70": 45537.0,
-            "71": 43335.0,
-            "72": 44839.0,
-            "73": 45372.0,
-            "74": 42511.0,
-            "75": 44712.0,
-            "76": 43930.0,
-            "77": 42073.0,
-            "78": 40535.0,
-            "79": 38992.0,
-            "80": 41092.0,
-            "81": 45382.0,
-            "82": 43275.0,
-            "83": 38475.0,
-            "84": 42418.0,
-            "85": 43979.0,
-            "86": 45691.0,
-            "87": 41145.0,
-            "88": 41782.0,
-            "89": 41042.0,
-            "90": 44713.0,
-            "91": 46270.0,
-            "92": 41845.0,
-            "93": 43272.0,
-            "94": 39536.0,
-            "95": 44085.0,
-            "96": 44689.0,
-            "97": 45411.0,
-            "98": 41858.0,
-            "99": 45575.0,
-            "100": 42501.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 4168870400.0,
-            "2": 4168870400.0,
-            "3": 4168870400.0,
-            "4": 4168870400.0,
-            "5": 4168870400.0,
-            "6": 4168870400.0,
-            "7": 4168870400.0,
-            "8": 4168870400.0,
-            "9": 4168870400.0,
-            "10": 4168870400.0,
-            "11": 4168870400.0,
-            "12": 4168870400.0,
-            "13": 4168870400.0,
-            "14": 4168870400.0,
-            "15": 4168870400.0,
-            "16": 4168870400.0,
-            "17": 4168870400.0,
-            "18": 4168870400.0,
-            "19": 4168870400.0,
-            "20": 4168870400.0,
-            "21": 4168870400.0,
-            "22": 4168870400.0,
-            "23": 4168870400.0,
-            "24": 4168870400.0,
-            "25": 4168870400.0,
-            "26": 4168870400.0,
-            "27": 4168870400.0,
-            "28": 4168870400.0,
-            "29": 4168870400.0,
-            "30": 4168870400.0,
-            "31": 4168870400.0,
-            "32": 4168870400.0,
-            "33": 4168870400.0,
-            "34": 4168870400.0,
-            "35": 4168870400.0,
-            "36": 4168870400.0,
-            "37": 4168870400.0,
-            "38": 4168870400.0,
-            "39": 4168870400.0,
-            "40": 4168870400.0,
-            "41": 4168870400.0,
-            "42": 4168870400.0,
-            "43": 4168870400.0,
-            "44": 4168870400.0,
-            "45": 4168870400.0,
-            "46": 4168870400.0,
-            "47": 4168870400.0,
-            "48": 4168870400.0,
-            "49": 4168870400.0,
-            "50": 4168870400.0,
-            "51": 4168870400.0,
-            "52": 4168870400.0,
-            "53": 4168870400.0,
-            "54": 4168870400.0,
-            "55": 4168870400.0,
-            "56": 4168870400.0,
-            "57": 4168870400.0,
-            "58": 4168870400.0,
-            "59": 4168870400.0,
-            "60": 4168870400.0,
-            "61": 4168870400.0,
-            "62": 4168870400.0,
-            "63": 4168870400.0,
-            "64": 4168870400.0,
-            "65": 4168870400.0,
-            "66": 4168870400.0,
-            "67": 4168870400.0,
-            "68": 4168870400.0,
-            "69": 4168870400.0,
-            "70": 4168870400.0,
-            "71": 4168870400.0,
-            "72": 4168870400.0,
-            "73": 4168870400.0,
-            "74": 4168870400.0,
-            "75": 4168870400.0,
-            "76": 4168870400.0,
-            "77": 4168870400.0,
-            "78": 4168870400.0,
-            "79": 4168870400.0,
-            "80": 4168870400.0,
-            "81": 4168870400.0,
-            "82": 4168870400.0,
-            "83": 4168870400.0,
-            "84": 4168870400.0,
-            "85": 4168870400.0,
-            "86": 4168870400.0,
-            "87": 4168870400.0,
-            "88": 4168870400.0,
-            "89": 4168870400.0,
-            "90": 4168870400.0,
-            "91": 4168870400.0,
-            "92": 4168870400.0,
-            "93": 4168870400.0,
-            "94": 4168870400.0,
-            "95": 4168870400.0,
-            "96": 4168870400.0,
-            "97": 4168870400.0,
-            "98": 4168870400.0,
-            "99": 4168870400.0,
-            "100": 4168870400.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 4375071232.0,
-            "2": 6204402688.0,
-            "3": 6206499840.0,
-            "4": 6206499840.0,
-            "5": 6206499840.0,
-            "6": 6206499840.0,
-            "7": 6206499840.0,
-            "8": 6206499840.0,
-            "9": 6206499840.0,
-            "10": 6206499840.0,
-            "11": 6206499840.0,
-            "12": 6206499840.0,
-            "13": 6206499840.0,
-            "14": 6206499840.0,
-            "15": 6206499840.0,
-            "16": 6206499840.0,
-            "17": 6206499840.0,
-            "18": 6206499840.0,
-            "19": 6206499840.0,
-            "20": 6206499840.0,
-            "21": 6206499840.0,
-            "22": 6206499840.0,
-            "23": 6206499840.0,
-            "24": 6206499840.0,
-            "25": 6206499840.0,
-            "26": 6206499840.0,
-            "27": 6206499840.0,
-            "28": 6206499840.0,
-            "29": 6206499840.0,
-            "30": 6206499840.0,
-            "31": 6206499840.0,
-            "32": 6206499840.0,
-            "33": 6206499840.0,
-            "34": 6206499840.0,
-            "35": 6206499840.0,
-            "36": 6206499840.0,
-            "37": 6206499840.0,
-            "38": 6206499840.0,
-            "39": 6206499840.0,
-            "40": 6206499840.0,
-            "41": 6206499840.0,
-            "42": 6206499840.0,
-            "43": 6206499840.0,
-            "44": 6206499840.0,
-            "45": 6206499840.0,
-            "46": 6206499840.0,
-            "47": 6206499840.0,
-            "48": 6206499840.0,
-            "49": 6206499840.0,
-            "50": 6206499840.0,
-            "51": 6206499840.0,
-            "52": 6206499840.0,
-            "53": 6206499840.0,
-            "54": 6206499840.0,
-            "55": 6206499840.0,
-            "56": 6206499840.0,
-            "57": 6206499840.0,
-            "58": 6206499840.0,
-            "59": 6206499840.0,
-            "60": 6206499840.0,
-            "61": 6206499840.0,
-            "62": 6206499840.0,
-            "63": 6206499840.0,
-            "64": 6206499840.0,
-            "65": 6206499840.0,
-            "66": 6206499840.0,
-            "67": 6206499840.0,
-            "68": 6206499840.0,
-            "69": 6206499840.0,
-            "70": 6206499840.0,
-            "71": 6206499840.0,
-            "72": 6206499840.0,
-            "73": 6206499840.0,
-            "74": 6206499840.0,
-            "75": 6206499840.0,
-            "76": 6206499840.0,
-            "77": 6206499840.0,
-            "78": 6206499840.0,
-            "79": 6206499840.0,
-            "80": 6206499840.0,
-            "81": 6206499840.0,
-            "82": 6206499840.0,
-            "83": 6206499840.0,
-            "84": 6206499840.0,
-            "85": 6206499840.0,
-            "86": 6206499840.0,
-            "87": 6206499840.0,
-            "88": 6206499840.0,
-            "89": 6206499840.0,
-            "90": 6206499840.0,
-            "91": 6206499840.0,
-            "92": 6206499840.0,
-            "93": 6206499840.0,
-            "94": 6206499840.0,
-            "95": 6206499840.0,
-            "96": 6206499840.0,
-            "97": 6206499840.0,
-            "98": 6206499840.0,
-            "99": 6206499840.0,
-            "100": 6206499840.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 7.56951,
-            "2": 0.36564,
-            "3": 0.16506,
-            "4": 0.16216,
-            "5": 0.16401,
-            "6": 0.1643,
-            "7": 0.16404,
-            "8": 0.16401,
-            "9": 0.16504,
-            "10": 0.1617,
-            "11": 0.16576,
-            "12": 0.16229,
-            "13": 0.16499,
-            "14": 0.16561,
-            "15": 0.16438,
-            "16": 0.16356,
-            "17": 0.16261,
-            "18": 0.16022,
-            "19": 0.16185,
-            "20": 0.1635,
-            "21": 0.16599,
-            "22": 0.16234,
-            "23": 0.16167,
-            "24": 0.16807,
-            "25": 0.16164,
-            "26": 0.16553,
-            "27": 0.16403,
-            "28": 0.16811,
-            "29": 0.16239,
-            "30": 0.16649,
-            "31": 0.16267,
-            "32": 0.16749,
-            "33": 0.1637,
-            "34": 0.16943,
-            "35": 0.16268,
-            "36": 0.17031,
-            "37": 0.16717,
-            "38": 0.17077,
-            "39": 0.16691,
-            "40": 0.17033,
-            "41": 0.16714,
-            "42": 0.1713,
-            "43": 0.16706,
-            "44": 0.16889,
-            "45": 0.1679,
-            "46": 0.16944,
-            "47": 0.16158,
-            "48": 0.16604,
-            "49": 0.16504,
-            "50": 0.17162,
-            "51": 0.16897,
-            "52": 0.17155,
-            "53": 0.16436,
-            "54": 0.17087,
-            "55": 0.16555,
-            "56": 0.16962,
-            "57": 0.16191,
-            "58": 0.17048,
-            "59": 0.1671,
-            "60": 0.16952,
-            "61": 0.16638,
-            "62": 0.1732,
-            "63": 0.19062,
-            "64": 0.17721,
-            "65": 0.16282,
-            "66": 0.16924,
-            "67": 0.16252,
-            "68": 0.16523,
-            "69": 0.16729,
-            "70": 0.53751,
-            "71": 0.16521,
-            "72": 0.17116,
-            "73": 0.16408,
-            "74": 0.16918,
-            "75": 0.16612,
-            "76": 0.21043,
-            "77": 0.17541,
-            "78": 0.20915,
-            "79": 0.19264,
-            "80": 0.16783,
-            "81": 0.16133,
-            "82": 0.16441,
-            "83": 0.16468,
-            "84": 0.16274,
-            "85": 0.16617,
-            "86": 0.16466,
-            "87": 0.16539,
-            "88": 0.16381,
-            "89": 0.1685,
-            "90": 0.1636,
-            "91": 0.17069,
-            "92": 0.16636,
-            "93": 0.16881,
-            "94": 0.16448,
-            "95": 0.16838,
-            "96": 0.16612,
-            "97": 0.1674,
-            "98": 0.16485,
-            "99": 0.17249,
-            "100": 0.16394
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index 8809a47cd54..00000000000
--- a/tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.3313,
-            "2": 10.35273,
-            "3": 9.79594,
-            "4": 9.60954,
-            "5": 9.42267,
-            "6": 9.45134,
-            "7": 9.34339,
-            "8": 9.27517,
-            "9": 9.09683,
-            "10": 9.07209,
-            "11": 8.8835,
-            "12": 8.83706,
-            "13": 8.86832,
-            "14": 8.71037,
-            "15": 8.68183,
-            "16": 8.56139,
-            "17": 8.52303,
-            "18": 8.43962,
-            "19": 8.40445,
-            "20": 8.29516,
-            "21": 8.27051,
-            "22": 8.17907,
-            "23": 8.12669,
-            "24": 8.14854,
-            "25": 7.99081,
-            "26": 8.12208,
-            "27": 7.90451,
-            "28": 7.98651,
-            "29": 7.80842,
-            "30": 7.86913,
-            "31": 7.83557,
-            "32": 7.7216,
-            "33": 7.80364,
-            "34": 7.59209,
-            "35": 7.68371,
-            "36": 7.53869,
-            "37": 7.47624,
-            "38": 7.51683,
-            "39": 7.49967,
-            "40": 7.51717,
-            "41": 7.43167,
-            "42": 7.40089,
-            "43": 7.4492,
-            "44": 7.3892,
-            "45": 7.3802,
-            "46": 7.29486,
-            "47": 7.44839,
-            "48": 7.282,
-            "49": 7.34647,
-            "50": 7.17125,
-            "51": 7.37351,
-            "52": 7.13362,
-            "53": 7.11248,
-            "54": 7.23395,
-            "55": 7.14784,
-            "56": 7.2278,
-            "57": 7.33273,
-            "58": 6.99464,
-            "59": 7.11597,
-            "60": 7.13216,
-            "61": 7.10561,
-            "62": 7.26519,
-            "63": 7.14764,
-            "64": 7.08702,
-            "65": 6.98658,
-            "66": 7.04733,
-            "67": 7.04745,
-            "68": 7.14076,
-            "69": 7.24347,
-            "70": 7.05974,
-            "71": 6.89358,
-            "72": 6.99793,
-            "73": 6.97928,
-            "74": 6.91973,
-            "75": 7.05295,
-            "76": 6.96054,
-            "77": 7.07939,
-            "78": 7.0137,
-            "79": 6.88344,
-            "80": 6.93032,
-            "81": 6.96568,
-            "82": 7.05273,
-            "83": 6.98785,
-            "84": 7.00434,
-            "85": 6.84596,
-            "86": 7.03651,
-            "87": 6.96347,
-            "88": 6.91343,
-            "89": 6.80657,
-            "90": 7.23629,
-            "91": 6.70068,
-            "92": 7.05694,
-            "93": 6.89292,
-            "94": 7.05848,
-            "95": 6.84802,
-            "96": 6.9679,
-            "97": 6.9429,
-            "98": 6.87432,
-            "99": 7.01828,
-            "100": 6.98491
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 43340.0,
-            "2": 44096.0,
-            "3": 44784.0,
-            "4": 42468.0,
-            "5": 45416.0,
-            "6": 40967.0,
-            "7": 43183.0,
-            "8": 45463.0,
-            "9": 42562.0,
-            "10": 45358.0,
-            "11": 44024.0,
-            "12": 44607.0,
-            "13": 43921.0,
-            "14": 46213.0,
-            "15": 43945.0,
-            "16": 41749.0,
-            "17": 43868.0,
-            "18": 44723.0,
-            "19": 42609.0,
-            "20": 44784.0,
-            "21": 44794.0,
-            "22": 41882.0,
-            "23": 45474.0,
-            "24": 43082.0,
-            "25": 42696.0,
-            "26": 43952.0,
-            "27": 46262.0,
-            "28": 46418.0,
-            "29": 46154.0,
-            "30": 44052.0,
-            "31": 41259.0,
-            "32": 43443.0,
-            "33": 45485.0,
-            "34": 43346.0,
-            "35": 43279.0,
-            "36": 42498.0,
-            "37": 40653.0,
-            "38": 42538.0,
-            "39": 44772.0,
-            "40": 43278.0,
-            "41": 44664.0,
-            "42": 43297.0,
-            "43": 45448.0,
-            "44": 44622.0,
-            "45": 43354.0,
-            "46": 43931.0,
-            "47": 42505.0,
-            "48": 44726.0,
-            "49": 43168.0,
-            "50": 43402.0,
-            "51": 41200.0,
-            "52": 43884.0,
-            "53": 43946.0,
-            "54": 41916.0,
-            "55": 43925.0,
-            "56": 43252.0,
-            "57": 42636.0,
-            "58": 43941.0,
-            "59": 44619.0,
-            "60": 41400.0,
-            "61": 39750.0,
-            "62": 44764.0,
-            "63": 44671.0,
-            "64": 45375.0,
-            "65": 44753.0,
-            "66": 45404.0,
-            "67": 43154.0,
-            "68": 42551.0,
-            "69": 43844.0,
-            "70": 45537.0,
-            "71": 43335.0,
-            "72": 44839.0,
-            "73": 45372.0,
-            "74": 42511.0,
-            "75": 44712.0,
-            "76": 43930.0,
-            "77": 42073.0,
-            "78": 40535.0,
-            "79": 38992.0,
-            "80": 41092.0,
-            "81": 45382.0,
-            "82": 43275.0,
-            "83": 38475.0,
-            "84": 42418.0,
-            "85": 43979.0,
-            "86": 45691.0,
-            "87": 41145.0,
-            "88": 41782.0,
-            "89": 41042.0,
-            "90": 44713.0,
-            "91": 46270.0,
-            "92": 41845.0,
-            "93": 43272.0,
-            "94": 39536.0,
-            "95": 44085.0,
-            "96": 44689.0,
-            "97": 45411.0,
-            "98": 41858.0,
-            "99": 45575.0,
-            "100": 42501.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 4168870400.0,
-            "2": 4168870400.0,
-            "3": 4168870400.0,
-            "4": 4168870400.0,
-            "5": 4168870400.0,
-            "6": 4168870400.0,
-            "7": 4168870400.0,
-            "8": 4168870400.0,
-            "9": 4168870400.0,
-            "10": 4168870400.0,
-            "11": 4168870400.0,
-            "12": 4168870400.0,
-            "13": 4168870400.0,
-            "14": 4168870400.0,
-            "15": 4168870400.0,
-            "16": 4168870400.0,
-            "17": 4168870400.0,
-            "18": 4168870400.0,
-            "19": 4168870400.0,
-            "20": 4168870400.0,
-            "21": 4168870400.0,
-            "22": 4168870400.0,
-            "23": 4168870400.0,
-            "24": 4168870400.0,
-            "25": 4168870400.0,
-            "26": 4168870400.0,
-            "27": 4168870400.0,
-            "28": 4168870400.0,
-            "29": 4168870400.0,
-            "30": 4168870400.0,
-            "31": 4168870400.0,
-            "32": 4168870400.0,
-            "33": 4168870400.0,
-            "34": 4168870400.0,
-            "35": 4168870400.0,
-            "36": 4168870400.0,
-            "37": 4168870400.0,
-            "38": 4168870400.0,
-            "39": 4168870400.0,
-            "40": 4168870400.0,
-            "41": 4168870400.0,
-            "42": 4168870400.0,
-            "43": 4168870400.0,
-            "44": 4168870400.0,
-            "45": 4168870400.0,
-            "46": 4168870400.0,
-            "47": 4168870400.0,
-            "48": 4168870400.0,
-            "49": 4168870400.0,
-            "50": 4168870400.0,
-            "51": 4168870400.0,
-            "52": 4168870400.0,
-            "53": 4168870400.0,
-            "54": 4168870400.0,
-            "55": 4168870400.0,
-            "56": 4168870400.0,
-            "57": 4168870400.0,
-            "58": 4168870400.0,
-            "59": 4168870400.0,
-            "60": 4168870400.0,
-            "61": 4168870400.0,
-            "62": 4168870400.0,
-            "63": 4168870400.0,
-            "64": 4168870400.0,
-            "65": 4168870400.0,
-            "66": 4168870400.0,
-            "67": 4168870400.0,
-            "68": 4168870400.0,
-            "69": 4168870400.0,
-            "70": 4168870400.0,
-            "71": 4168870400.0,
-            "72": 4168870400.0,
-            "73": 4168870400.0,
-            "74": 4168870400.0,
-            "75": 4168870400.0,
-            "76": 4168870400.0,
-            "77": 4168870400.0,
-            "78": 4168870400.0,
-            "79": 4168870400.0,
-            "80": 4168870400.0,
-            "81": 4168870400.0,
-            "82": 4168870400.0,
-            "83": 4168870400.0,
-            "84": 4168870400.0,
-            "85": 4168870400.0,
-            "86": 4168870400.0,
-            "87": 4168870400.0,
-            "88": 4168870400.0,
-            "89": 4168870400.0,
-            "90": 4168870400.0,
-            "91": 4168870400.0,
-            "92": 4168870400.0,
-            "93": 4168870400.0,
-            "94": 4168870400.0,
-            "95": 4168870400.0,
-            "96": 4168870400.0,
-            "97": 4168870400.0,
-            "98": 4168870400.0,
-            "99": 4168870400.0,
-            "100": 4168870400.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 4375071232.0,
-            "2": 6204402688.0,
-            "3": 6206499840.0,
-            "4": 6206499840.0,
-            "5": 6206499840.0,
-            "6": 6206499840.0,
-            "7": 6206499840.0,
-            "8": 6206499840.0,
-            "9": 6206499840.0,
-            "10": 6206499840.0,
-            "11": 6206499840.0,
-            "12": 6206499840.0,
-            "13": 6206499840.0,
-            "14": 6206499840.0,
-            "15": 6206499840.0,
-            "16": 6206499840.0,
-            "17": 6206499840.0,
-            "18": 6206499840.0,
-            "19": 6206499840.0,
-            "20": 6206499840.0,
-            "21": 6206499840.0,
-            "22": 6206499840.0,
-            "23": 6206499840.0,
-            "24": 6206499840.0,
-            "25": 6206499840.0,
-            "26": 6206499840.0,
-            "27": 6206499840.0,
-            "28": 6206499840.0,
-            "29": 6206499840.0,
-            "30": 6206499840.0,
-            "31": 6206499840.0,
-            "32": 6206499840.0,
-            "33": 6206499840.0,
-            "34": 6206499840.0,
-            "35": 6206499840.0,
-            "36": 6206499840.0,
-            "37": 6206499840.0,
-            "38": 6206499840.0,
-            "39": 6206499840.0,
-            "40": 6206499840.0,
-            "41": 6206499840.0,
-            "42": 6206499840.0,
-            "43": 6206499840.0,
-            "44": 6206499840.0,
-            "45": 6206499840.0,
-            "46": 6206499840.0,
-            "47": 6206499840.0,
-            "48": 6206499840.0,
-            "49": 6206499840.0,
-            "50": 6206499840.0,
-            "51": 6206499840.0,
-            "52": 6206499840.0,
-            "53": 6206499840.0,
-            "54": 6206499840.0,
-            "55": 6206499840.0,
-            "56": 6206499840.0,
-            "57": 6206499840.0,
-            "58": 6206499840.0,
-            "59": 6206499840.0,
-            "60": 6206499840.0,
-            "61": 6206499840.0,
-            "62": 6206499840.0,
-            "63": 6206499840.0,
-            "64": 6206499840.0,
-            "65": 6206499840.0,
-            "66": 6206499840.0,
-            "67": 6206499840.0,
-            "68": 6206499840.0,
-            "69": 6206499840.0,
-            "70": 6206499840.0,
-            "71": 6206499840.0,
-            "72": 6206499840.0,
-            "73": 6206499840.0,
-            "74": 6206499840.0,
-            "75": 6206499840.0,
-            "76": 6206499840.0,
-            "77": 6206499840.0,
-            "78": 6206499840.0,
-            "79": 6206499840.0,
-            "80": 6206499840.0,
-            "81": 6206499840.0,
-            "82": 6206499840.0,
-            "83": 6206499840.0,
-            "84": 6206499840.0,
-            "85": 6206499840.0,
-            "86": 6206499840.0,
-            "87": 6206499840.0,
-            "88": 6206499840.0,
-            "89": 6206499840.0,
-            "90": 6206499840.0,
-            "91": 6206499840.0,
-            "92": 6206499840.0,
-            "93": 6206499840.0,
-            "94": 6206499840.0,
-            "95": 6206499840.0,
-            "96": 6206499840.0,
-            "97": 6206499840.0,
-            "98": 6206499840.0,
-            "99": 6206499840.0,
-            "100": 6206499840.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 7.22025,
-            "2": 0.31576,
-            "3": 0.19278,
-            "4": 0.19432,
-            "5": 0.18909,
-            "6": 0.19307,
-            "7": 0.18922,
-            "8": 0.19506,
-            "9": 0.18834,
-            "10": 0.19233,
-            "11": 0.18825,
-            "12": 0.19571,
-            "13": 0.19081,
-            "14": 0.19613,
-            "15": 0.18954,
-            "16": 0.18825,
-            "17": 0.18583,
-            "18": 0.18933,
-            "19": 0.1896,
-            "20": 0.19136,
-            "21": 0.18842,
-            "22": 0.19581,
-            "23": 0.18752,
-            "24": 0.19277,
-            "25": 0.18759,
-            "26": 0.19405,
-            "27": 0.18784,
-            "28": 0.18762,
-            "29": 0.19232,
-            "30": 0.18798,
-            "31": 0.18713,
-            "32": 0.18948,
-            "33": 0.18968,
-            "34": 0.19011,
-            "35": 0.18907,
-            "36": 0.18983,
-            "37": 0.18857,
-            "38": 0.18728,
-            "39": 0.18835,
-            "40": 0.18777,
-            "41": 0.188,
-            "42": 0.18818,
-            "43": 0.18602,
-            "44": 0.18972,
-            "45": 0.19276,
-            "46": 0.18816,
-            "47": 0.18794,
-            "48": 0.19299,
-            "49": 0.19241,
-            "50": 0.18805,
-            "51": 0.18895,
-            "52": 0.19459,
-            "53": 0.18821,
-            "54": 0.18597,
-            "55": 0.189,
-            "56": 0.18748,
-            "57": 0.18709,
-            "58": 0.19127,
-            "59": 0.19097,
-            "60": 0.18702,
-            "61": 0.18725,
-            "62": 0.18762,
-            "63": 0.19407,
-            "64": 0.19411,
-            "65": 0.20071,
-            "66": 0.19555,
-            "67": 0.22543,
-            "68": 0.21724,
-            "69": 0.22635,
-            "70": 0.52922,
-            "71": 0.19086,
-            "72": 0.19899,
-            "73": 0.51667,
-            "74": 0.20138,
-            "75": 0.19507,
-            "76": 0.24987,
-            "77": 0.22838,
-            "78": 0.51523,
-            "79": 0.19126,
-            "80": 0.18911,
-            "81": 0.19269,
-            "82": 0.18816,
-            "83": 0.18902,
-            "84": 0.18942,
-            "85": 0.19004,
-            "86": 0.50868,
-            "87": 0.19274,
-            "88": 0.18813,
-            "89": 0.19169,
-            "90": 0.50854,
-            "91": 0.1924,
-            "92": 0.18906,
-            "93": 0.19016,
-            "94": 0.1902,
-            "95": 0.19338,
-            "96": 0.51468,
-            "97": 0.19597,
-            "98": 0.19147,
-            "99": 0.19626,
-            "100": 0.18852
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1/golden_values_dev_dgx_h100.json
index 24fbb5008a6..89582b25851 100644
--- a/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1/golden_values_dev_dgx_h100.json
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 9.51792,
-            "2": 0.37696,
-            "3": 0.35384,
-            "4": 0.34824,
-            "5": 0.34677,
-            "6": 0.36735,
-            "7": 0.37639,
-            "8": 0.37373,
-            "9": 0.37798,
-            "10": 0.37384,
-            "11": 0.37808,
-            "12": 0.37762,
-            "13": 0.37479,
-            "14": 0.38389,
-            "15": 0.37511,
-            "16": 0.3766,
-            "17": 0.37666,
-            "18": 0.37513,
-            "19": 0.36239,
-            "20": 0.34482,
-            "21": 0.36935,
-            "22": 0.37904,
-            "23": 0.36041,
-            "24": 0.35765,
-            "25": 0.36227,
-            "26": 0.3603,
-            "27": 0.36061,
-            "28": 0.35888,
-            "29": 0.36254,
-            "30": 0.3638,
-            "31": 0.36821,
-            "32": 0.36371,
-            "33": 0.36426,
-            "34": 0.63693,
-            "35": 0.38755,
-            "36": 0.37078,
-            "37": 0.36346,
-            "38": 0.36485,
-            "39": 0.36467,
-            "40": 0.43549,
-            "41": 0.35057,
-            "42": 0.35472,
-            "43": 0.35255,
-            "44": 0.34681,
-            "45": 0.34612,
-            "46": 0.3502,
-            "47": 0.34647,
-            "48": 0.7097,
-            "49": 0.34958,
-            "50": 0.34947,
-            "51": 0.68193,
-            "52": 0.66437,
-            "53": 0.6483,
-            "54": 0.35744,
-            "55": 0.34501,
-            "56": 0.35464,
-            "57": 0.3506,
-            "58": 0.34648,
-            "59": 0.35134,
-            "60": 0.34883,
-            "61": 0.34803,
-            "62": 0.35208,
-            "63": 0.3458,
-            "64": 0.34919,
-            "65": 0.35351,
-            "66": 0.35034,
-            "67": 0.34776,
-            "68": 0.35303,
-            "69": 0.34862,
-            "70": 0.35025,
-            "71": 0.35221,
-            "72": 0.34546,
-            "73": 0.34844,
-            "74": 0.35311,
-            "75": 0.34698,
-            "76": 0.34803,
-            "77": 0.34856,
-            "78": 0.34471,
-            "79": 0.64787,
-            "80": 0.34702,
-            "81": 0.35417,
-            "82": 0.34815,
-            "83": 0.34811,
-            "84": 0.36328,
-            "85": 0.35053,
-            "86": 0.34968,
-            "87": 0.641,
-            "88": 0.35086,
-            "89": 0.35762,
-            "90": 0.34969,
-            "91": 0.35083,
-            "92": 0.36212,
-            "93": 0.35255,
-            "94": 0.35084,
-            "95": 0.35297,
-            "96": 0.34869,
-            "97": 0.3518,
-            "98": 0.3551,
-            "99": 0.35073,
-            "100": 0.35332
+            "1": 9.37156,
+            "2": 0.38887,
+            "3": 0.36602,
+            "4": 0.35866,
+            "5": 0.36165,
+            "6": 0.37465,
+            "7": 0.35731,
+            "8": 0.3641,
+            "9": 0.35988,
+            "10": 0.35622,
+            "11": 0.36397,
+            "12": 0.36059,
+            "13": 0.35322,
+            "14": 0.36378,
+            "15": 0.35044,
+            "16": 0.351,
+            "17": 0.3614,
+            "18": 0.3499,
+            "19": 0.3502,
+            "20": 0.35899,
+            "21": 0.34832,
+            "22": 0.35463,
+            "23": 0.36264,
+            "24": 0.3582,
+            "25": 0.68028,
+            "26": 0.35807,
+            "27": 0.36086,
+            "28": 0.3546,
+            "29": 0.35008,
+            "30": 0.36639,
+            "31": 0.35917,
+            "32": 0.35093,
+            "33": 0.42545,
+            "34": 0.36458,
+            "35": 0.36139,
+            "36": 0.66018,
+            "37": 0.36179,
+            "38": 0.35264,
+            "39": 0.35347,
+            "40": 0.35947,
+            "41": 0.65933,
+            "42": 0.36488,
+            "43": 0.35596,
+            "44": 0.35639,
+            "45": 0.35817,
+            "46": 0.35914,
+            "47": 0.65482,
+            "48": 0.35543,
+            "49": 0.3548,
+            "50": 0.36559,
+            "51": 0.3585,
+            "52": 0.35668,
+            "53": 0.3592,
+            "54": 0.35503,
+            "55": 0.36108,
+            "56": 0.74128,
+            "57": 0.36657,
+            "58": 0.36018,
+            "59": 0.35608,
+            "60": 0.36593,
+            "61": 0.35388,
+            "62": 0.35617,
+            "63": 0.63145,
+            "64": 0.35737,
+            "65": 0.36509,
+            "66": 0.35793,
+            "67": 0.36215,
+            "68": 0.35502,
+            "69": 0.35608,
+            "70": 0.36406,
+            "71": 0.35939,
+            "72": 0.36012,
+            "73": 0.36102,
+            "74": 0.35997,
+            "75": 0.35821,
+            "76": 0.36372,
+            "77": 0.36015,
+            "78": 0.36089,
+            "79": 0.3626,
+            "80": 0.36632,
+            "81": 0.36481,
+            "82": 0.38444,
+            "83": 0.36154,
+            "84": 0.37204,
+            "85": 0.35784,
+            "86": 0.35591,
+            "87": 0.36678,
+            "88": 0.73353,
+            "89": 0.36867,
+            "90": 0.36231,
+            "91": 0.36826,
+            "92": 0.35945,
+            "93": 0.36394,
+            "94": 0.43835,
+            "95": 0.36152,
+            "96": 0.36154,
+            "97": 0.35778,
+            "98": 0.35857,
+            "99": 0.36061,
+            "100": 0.35857
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index bda6217caaa..00000000000
--- a/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.37205,
-            "2": 10.36993,
-            "3": 9.85245,
-            "4": 9.61997,
-            "5": 9.40867,
-            "6": 9.43219,
-            "7": 9.31484,
-            "8": 9.27336,
-            "9": 9.11412,
-            "10": 9.03968,
-            "11": 8.87198,
-            "12": 8.80862,
-            "13": 8.83469,
-            "14": 8.69021,
-            "15": 8.66221,
-            "16": 8.54816,
-            "17": 8.50088,
-            "18": 8.42516,
-            "19": 8.38808,
-            "20": 8.28073,
-            "21": 8.26592,
-            "22": 8.15988,
-            "23": 8.11241,
-            "24": 8.14271,
-            "25": 7.98425,
-            "26": 8.10594,
-            "27": 7.88954,
-            "28": 7.9705,
-            "29": 7.81272,
-            "30": 7.87636,
-            "31": 7.82505,
-            "32": 7.70262,
-            "33": 7.80169,
-            "34": 7.56872,
-            "35": 7.67373,
-            "36": 7.54686,
-            "37": 7.47401,
-            "38": 7.50726,
-            "39": 7.49794,
-            "40": 7.51081,
-            "41": 7.41055,
-            "42": 7.37984,
-            "43": 7.44091,
-            "44": 7.39372,
-            "45": 7.37241,
-            "46": 7.28404,
-            "47": 7.46627,
-            "48": 7.29038,
-            "49": 7.35015,
-            "50": 7.17193,
-            "51": 7.37002,
-            "52": 7.14463,
-            "53": 7.12651,
-            "54": 7.23742,
-            "55": 7.15579,
-            "56": 7.23152,
-            "57": 7.3354,
-            "58": 7.01365,
-            "59": 7.11427,
-            "60": 7.15124,
-            "61": 7.1088,
-            "62": 7.26824,
-            "63": 7.15182,
-            "64": 7.08401,
-            "65": 6.99127,
-            "66": 7.05305,
-            "67": 7.04353,
-            "68": 7.13973,
-            "69": 7.03243,
-            "70": 7.05831,
-            "71": 6.90378,
-            "72": 6.99805,
-            "73": 6.97678,
-            "74": 6.91757,
-            "75": 7.06665,
-            "76": 6.95719,
-            "77": 7.08701,
-            "78": 7.03266,
-            "79": 6.8532,
-            "80": 6.93633,
-            "81": 6.97582,
-            "82": 7.0624,
-            "83": 6.98226,
-            "84": 7.00923,
-            "85": 6.8507,
-            "86": 7.04663,
-            "87": 6.97947,
-            "88": 6.91093,
-            "89": 6.8168,
-            "90": 7.24561,
-            "91": 6.7048,
-            "92": 7.05407,
-            "93": 6.89399,
-            "94": 7.0542,
-            "95": 6.85047,
-            "96": 6.96463,
-            "97": 6.95624,
-            "98": 6.8829,
-            "99": 7.00419,
-            "100": 6.98982
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 43288.0,
-            "2": 44033.0,
-            "3": 44733.0,
-            "4": 42406.0,
-            "5": 45371.0,
-            "6": 40945.0,
-            "7": 43173.0,
-            "8": 45430.0,
-            "9": 42421.0,
-            "10": 45369.0,
-            "11": 43974.0,
-            "12": 44588.0,
-            "13": 43908.0,
-            "14": 46215.0,
-            "15": 43901.0,
-            "16": 41603.0,
-            "17": 43832.0,
-            "18": 44695.0,
-            "19": 42547.0,
-            "20": 44758.0,
-            "21": 44777.0,
-            "22": 41821.0,
-            "23": 45434.0,
-            "24": 43080.0,
-            "25": 42439.0,
-            "26": 43936.0,
-            "27": 46214.0,
-            "28": 46342.0,
-            "29": 46135.0,
-            "30": 43995.0,
-            "31": 41271.0,
-            "32": 43336.0,
-            "33": 45440.0,
-            "34": 43287.0,
-            "35": 43240.0,
-            "36": 42490.0,
-            "37": 40078.0,
-            "38": 42510.0,
-            "39": 44722.0,
-            "40": 43230.0,
-            "41": 44669.0,
-            "42": 43262.0,
-            "43": 45476.0,
-            "44": 44624.0,
-            "45": 43326.0,
-            "46": 43945.0,
-            "47": 42395.0,
-            "48": 44675.0,
-            "49": 43169.0,
-            "50": 43381.0,
-            "51": 41131.0,
-            "52": 43830.0,
-            "53": 43914.0,
-            "54": 42004.0,
-            "55": 43871.0,
-            "56": 43227.0,
-            "57": 42550.0,
-            "58": 43816.0,
-            "59": 44631.0,
-            "60": 41183.0,
-            "61": 39721.0,
-            "62": 44752.0,
-            "63": 44696.0,
-            "64": 45351.0,
-            "65": 44694.0,
-            "66": 45350.0,
-            "67": 43132.0,
-            "68": 42535.0,
-            "69": 43829.0,
-            "70": 45533.0,
-            "71": 43322.0,
-            "72": 44749.0,
-            "73": 45365.0,
-            "74": 42492.0,
-            "75": 44655.0,
-            "76": 43920.0,
-            "77": 42080.0,
-            "78": 40298.0,
-            "79": 38909.0,
-            "80": 41117.0,
-            "81": 45370.0,
-            "82": 43206.0,
-            "83": 38501.0,
-            "84": 42484.0,
-            "85": 43986.0,
-            "86": 45704.0,
-            "87": 40839.0,
-            "88": 41828.0,
-            "89": 41074.0,
-            "90": 44663.0,
-            "91": 46169.0,
-            "92": 41807.0,
-            "93": 43228.0,
-            "94": 39549.0,
-            "95": 44090.0,
-            "96": 44711.0,
-            "97": 45390.0,
-            "98": 41799.0,
-            "99": 45426.0,
-            "100": 42443.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 2194357248.0,
-            "2": 2194357248.0,
-            "3": 2194357248.0,
-            "4": 2194357248.0,
-            "5": 2194357248.0,
-            "6": 2194357248.0,
-            "7": 2194357248.0,
-            "8": 2194357248.0,
-            "9": 2194357248.0,
-            "10": 2194357248.0,
-            "11": 2194357248.0,
-            "12": 2194357248.0,
-            "13": 2194357248.0,
-            "14": 2194357248.0,
-            "15": 2194357248.0,
-            "16": 2194357248.0,
-            "17": 2194357248.0,
-            "18": 2194357248.0,
-            "19": 2194357248.0,
-            "20": 2194357248.0,
-            "21": 2194357248.0,
-            "22": 2194357248.0,
-            "23": 2194357248.0,
-            "24": 2194357248.0,
-            "25": 2194357248.0,
-            "26": 2194357248.0,
-            "27": 2194357248.0,
-            "28": 2194357248.0,
-            "29": 2194357248.0,
-            "30": 2194357248.0,
-            "31": 2194357248.0,
-            "32": 2194357248.0,
-            "33": 2194357248.0,
-            "34": 2194357248.0,
-            "35": 2194357248.0,
-            "36": 2194357248.0,
-            "37": 2194357248.0,
-            "38": 2194357248.0,
-            "39": 2194357248.0,
-            "40": 2194357248.0,
-            "41": 2194357248.0,
-            "42": 2194357248.0,
-            "43": 2194357248.0,
-            "44": 2194357248.0,
-            "45": 2194357248.0,
-            "46": 2194357248.0,
-            "47": 2194357248.0,
-            "48": 2194357248.0,
-            "49": 2194357248.0,
-            "50": 2194357248.0,
-            "51": 2194357248.0,
-            "52": 2194357248.0,
-            "53": 2194357248.0,
-            "54": 2194357248.0,
-            "55": 2194357248.0,
-            "56": 2194357248.0,
-            "57": 2194357248.0,
-            "58": 2194357248.0,
-            "59": 2194357248.0,
-            "60": 2194357248.0,
-            "61": 2194357248.0,
-            "62": 2194357248.0,
-            "63": 2194357248.0,
-            "64": 2194357248.0,
-            "65": 2194357248.0,
-            "66": 2194357248.0,
-            "67": 2194357248.0,
-            "68": 2194357248.0,
-            "69": 2194357248.0,
-            "70": 2194357248.0,
-            "71": 2194357248.0,
-            "72": 2194357248.0,
-            "73": 2194357248.0,
-            "74": 2194357248.0,
-            "75": 2194357248.0,
-            "76": 2194357248.0,
-            "77": 2194357248.0,
-            "78": 2194357248.0,
-            "79": 2194357248.0,
-            "80": 2194357248.0,
-            "81": 2194357248.0,
-            "82": 2194357248.0,
-            "83": 2194357248.0,
-            "84": 2194357248.0,
-            "85": 2194357248.0,
-            "86": 2194357248.0,
-            "87": 2194357248.0,
-            "88": 2194357248.0,
-            "89": 2194357248.0,
-            "90": 2194357248.0,
-            "91": 2194357248.0,
-            "92": 2194357248.0,
-            "93": 2194357248.0,
-            "94": 2194357248.0,
-            "95": 2194357248.0,
-            "96": 2194357248.0,
-            "97": 2194357248.0,
-            "98": 2194357248.0,
-            "99": 2194357248.0,
-            "100": 2194357248.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 2443624960.0,
-            "2": 3375193600.0,
-            "3": 3375193600.0,
-            "4": 3375193600.0,
-            "5": 3375193600.0,
-            "6": 3375193600.0,
-            "7": 3375193600.0,
-            "8": 3375193600.0,
-            "9": 3375193600.0,
-            "10": 3375193600.0,
-            "11": 3375193600.0,
-            "12": 3375193600.0,
-            "13": 3375193600.0,
-            "14": 3375193600.0,
-            "15": 3375193600.0,
-            "16": 3375193600.0,
-            "17": 3375193600.0,
-            "18": 3375193600.0,
-            "19": 3375193600.0,
-            "20": 3375193600.0,
-            "21": 3375193600.0,
-            "22": 3375193600.0,
-            "23": 3375193600.0,
-            "24": 3375193600.0,
-            "25": 3375193600.0,
-            "26": 3375193600.0,
-            "27": 3375193600.0,
-            "28": 3375193600.0,
-            "29": 3375193600.0,
-            "30": 3375193600.0,
-            "31": 3375193600.0,
-            "32": 3375193600.0,
-            "33": 3375193600.0,
-            "34": 3375193600.0,
-            "35": 3375193600.0,
-            "36": 3375193600.0,
-            "37": 3375193600.0,
-            "38": 3375193600.0,
-            "39": 3375193600.0,
-            "40": 3375193600.0,
-            "41": 3375193600.0,
-            "42": 3375193600.0,
-            "43": 3375193600.0,
-            "44": 3375193600.0,
-            "45": 3375193600.0,
-            "46": 3375193600.0,
-            "47": 3375193600.0,
-            "48": 3375193600.0,
-            "49": 3375193600.0,
-            "50": 3375193600.0,
-            "51": 3375193600.0,
-            "52": 3375193600.0,
-            "53": 3375193600.0,
-            "54": 3375193600.0,
-            "55": 3375193600.0,
-            "56": 3375193600.0,
-            "57": 3375193600.0,
-            "58": 3375193600.0,
-            "59": 3375193600.0,
-            "60": 3375193600.0,
-            "61": 3375193600.0,
-            "62": 3375193600.0,
-            "63": 3375193600.0,
-            "64": 3375193600.0,
-            "65": 3375193600.0,
-            "66": 3375193600.0,
-            "67": 3375193600.0,
-            "68": 3375193600.0,
-            "69": 3375193600.0,
-            "70": 3375193600.0,
-            "71": 3375193600.0,
-            "72": 3375193600.0,
-            "73": 3375193600.0,
-            "74": 3375193600.0,
-            "75": 3375193600.0,
-            "76": 3375193600.0,
-            "77": 3375193600.0,
-            "78": 3375193600.0,
-            "79": 3375193600.0,
-            "80": 3375193600.0,
-            "81": 3375193600.0,
-            "82": 3375193600.0,
-            "83": 3375193600.0,
-            "84": 3375193600.0,
-            "85": 3375193600.0,
-            "86": 3375193600.0,
-            "87": 3375193600.0,
-            "88": 3375193600.0,
-            "89": 3375193600.0,
-            "90": 3375193600.0,
-            "91": 3375193600.0,
-            "92": 3375193600.0,
-            "93": 3375193600.0,
-            "94": 3375193600.0,
-            "95": 3375193600.0,
-            "96": 3375193600.0,
-            "97": 3375193600.0,
-            "98": 3375193600.0,
-            "99": 3375193600.0,
-            "100": 3375193600.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 9.22746,
-            "2": 0.38672,
-            "3": 0.30057,
-            "4": 0.29952,
-            "5": 0.29937,
-            "6": 0.29647,
-            "7": 0.29649,
-            "8": 0.29992,
-            "9": 0.29725,
-            "10": 0.29982,
-            "11": 0.29727,
-            "12": 0.3034,
-            "13": 0.29711,
-            "14": 0.29921,
-            "15": 0.2997,
-            "16": 0.29771,
-            "17": 0.29978,
-            "18": 0.30707,
-            "19": 0.30368,
-            "20": 0.30288,
-            "21": 0.30688,
-            "22": 0.30971,
-            "23": 0.29768,
-            "24": 0.30093,
-            "25": 0.30176,
-            "26": 0.30414,
-            "27": 0.29913,
-            "28": 0.29878,
-            "29": 0.29642,
-            "30": 0.3006,
-            "31": 0.30797,
-            "32": 0.30896,
-            "33": 0.30968,
-            "34": 0.3612,
-            "35": 0.30538,
-            "36": 0.30053,
-            "37": 0.59472,
-            "38": 0.30268,
-            "39": 0.306,
-            "40": 0.29983,
-            "41": 0.30255,
-            "42": 0.30761,
-            "43": 0.30015,
-            "44": 0.30214,
-            "45": 0.29904,
-            "46": 0.29871,
-            "47": 0.63098,
-            "48": 0.58973,
-            "49": 0.29989,
-            "50": 0.29759,
-            "51": 0.29699,
-            "52": 0.30117,
-            "53": 0.61374,
-            "54": 0.30194,
-            "55": 0.29408,
-            "56": 0.6341,
-            "57": 0.29608,
-            "58": 0.29787,
-            "59": 0.29707,
-            "60": 0.30154,
-            "61": 0.29779,
-            "62": 0.29855,
-            "63": 0.60825,
-            "64": 0.29897,
-            "65": 0.30635,
-            "66": 0.61882,
-            "67": 0.29871,
-            "68": 0.29693,
-            "69": 0.30148,
-            "70": 0.31212,
-            "71": 0.30211,
-            "72": 0.29679,
-            "73": 0.30078,
-            "74": 0.29883,
-            "75": 0.2978,
-            "76": 0.30303,
-            "77": 0.29772,
-            "78": 0.29776,
-            "79": 0.29689,
-            "80": 0.30425,
-            "81": 0.29967,
-            "82": 0.29825,
-            "83": 0.297,
-            "84": 0.30863,
-            "85": 0.30218,
-            "86": 0.30302,
-            "87": 0.30826,
-            "88": 0.30068,
-            "89": 0.29946,
-            "90": 0.60541,
-            "91": 0.30424,
-            "92": 0.30059,
-            "93": 0.30421,
-            "94": 0.30633,
-            "95": 0.29891,
-            "96": 0.35038,
-            "97": 0.29632,
-            "98": 0.29835,
-            "99": 0.29931,
-            "100": 0.30272
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index 89582b25851..00000000000
--- a/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.37205,
-            "2": 10.36993,
-            "3": 9.85245,
-            "4": 9.61997,
-            "5": 9.40867,
-            "6": 9.43219,
-            "7": 9.31484,
-            "8": 9.27336,
-            "9": 9.11412,
-            "10": 9.03968,
-            "11": 8.87198,
-            "12": 8.80862,
-            "13": 8.83469,
-            "14": 8.69021,
-            "15": 8.66221,
-            "16": 8.54816,
-            "17": 8.50088,
-            "18": 8.42516,
-            "19": 8.38808,
-            "20": 8.28073,
-            "21": 8.26592,
-            "22": 8.15988,
-            "23": 8.11241,
-            "24": 8.14271,
-            "25": 7.98425,
-            "26": 8.10594,
-            "27": 7.88954,
-            "28": 7.9705,
-            "29": 7.81272,
-            "30": 7.87636,
-            "31": 7.82505,
-            "32": 7.70262,
-            "33": 7.80169,
-            "34": 7.56872,
-            "35": 7.67373,
-            "36": 7.54686,
-            "37": 7.47401,
-            "38": 7.50726,
-            "39": 7.49794,
-            "40": 7.51081,
-            "41": 7.41055,
-            "42": 7.37984,
-            "43": 7.44091,
-            "44": 7.39372,
-            "45": 7.37241,
-            "46": 7.28404,
-            "47": 7.46627,
-            "48": 7.29038,
-            "49": 7.35015,
-            "50": 7.17193,
-            "51": 7.37002,
-            "52": 7.14463,
-            "53": 7.12651,
-            "54": 7.23742,
-            "55": 7.15579,
-            "56": 7.23152,
-            "57": 7.3354,
-            "58": 7.01365,
-            "59": 7.11427,
-            "60": 7.15124,
-            "61": 7.1088,
-            "62": 7.26824,
-            "63": 7.15182,
-            "64": 7.08401,
-            "65": 6.99127,
-            "66": 7.05305,
-            "67": 7.04353,
-            "68": 7.13973,
-            "69": 7.03243,
-            "70": 7.05831,
-            "71": 6.90378,
-            "72": 6.99805,
-            "73": 6.97678,
-            "74": 6.91757,
-            "75": 7.06665,
-            "76": 6.95719,
-            "77": 7.08701,
-            "78": 7.03266,
-            "79": 6.8532,
-            "80": 6.93633,
-            "81": 6.97582,
-            "82": 7.0624,
-            "83": 6.98226,
-            "84": 7.00923,
-            "85": 6.8507,
-            "86": 7.04663,
-            "87": 6.97947,
-            "88": 6.91093,
-            "89": 6.8168,
-            "90": 7.24561,
-            "91": 6.7048,
-            "92": 7.05407,
-            "93": 6.89399,
-            "94": 7.0542,
-            "95": 6.85047,
-            "96": 6.96463,
-            "97": 6.95624,
-            "98": 6.8829,
-            "99": 7.00419,
-            "100": 6.98982
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 43288.0,
-            "2": 44033.0,
-            "3": 44733.0,
-            "4": 42406.0,
-            "5": 45371.0,
-            "6": 40945.0,
-            "7": 43173.0,
-            "8": 45430.0,
-            "9": 42421.0,
-            "10": 45369.0,
-            "11": 43974.0,
-            "12": 44588.0,
-            "13": 43908.0,
-            "14": 46215.0,
-            "15": 43901.0,
-            "16": 41603.0,
-            "17": 43832.0,
-            "18": 44695.0,
-            "19": 42547.0,
-            "20": 44758.0,
-            "21": 44777.0,
-            "22": 41821.0,
-            "23": 45434.0,
-            "24": 43080.0,
-            "25": 42439.0,
-            "26": 43936.0,
-            "27": 46214.0,
-            "28": 46342.0,
-            "29": 46135.0,
-            "30": 43995.0,
-            "31": 41271.0,
-            "32": 43336.0,
-            "33": 45440.0,
-            "34": 43287.0,
-            "35": 43240.0,
-            "36": 42490.0,
-            "37": 40078.0,
-            "38": 42510.0,
-            "39": 44722.0,
-            "40": 43230.0,
-            "41": 44669.0,
-            "42": 43262.0,
-            "43": 45476.0,
-            "44": 44624.0,
-            "45": 43326.0,
-            "46": 43945.0,
-            "47": 42395.0,
-            "48": 44675.0,
-            "49": 43169.0,
-            "50": 43381.0,
-            "51": 41131.0,
-            "52": 43830.0,
-            "53": 43914.0,
-            "54": 42004.0,
-            "55": 43871.0,
-            "56": 43227.0,
-            "57": 42550.0,
-            "58": 43816.0,
-            "59": 44631.0,
-            "60": 41183.0,
-            "61": 39721.0,
-            "62": 44752.0,
-            "63": 44696.0,
-            "64": 45351.0,
-            "65": 44694.0,
-            "66": 45350.0,
-            "67": 43132.0,
-            "68": 42535.0,
-            "69": 43829.0,
-            "70": 45533.0,
-            "71": 43322.0,
-            "72": 44749.0,
-            "73": 45365.0,
-            "74": 42492.0,
-            "75": 44655.0,
-            "76": 43920.0,
-            "77": 42080.0,
-            "78": 40298.0,
-            "79": 38909.0,
-            "80": 41117.0,
-            "81": 45370.0,
-            "82": 43206.0,
-            "83": 38501.0,
-            "84": 42484.0,
-            "85": 43986.0,
-            "86": 45704.0,
-            "87": 40839.0,
-            "88": 41828.0,
-            "89": 41074.0,
-            "90": 44663.0,
-            "91": 46169.0,
-            "92": 41807.0,
-            "93": 43228.0,
-            "94": 39549.0,
-            "95": 44090.0,
-            "96": 44711.0,
-            "97": 45390.0,
-            "98": 41799.0,
-            "99": 45426.0,
-            "100": 42443.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 2194357248.0,
-            "2": 2194357248.0,
-            "3": 2194357248.0,
-            "4": 2194357248.0,
-            "5": 2194357248.0,
-            "6": 2194357248.0,
-            "7": 2194357248.0,
-            "8": 2194357248.0,
-            "9": 2194357248.0,
-            "10": 2194357248.0,
-            "11": 2194357248.0,
-            "12": 2194357248.0,
-            "13": 2194357248.0,
-            "14": 2194357248.0,
-            "15": 2194357248.0,
-            "16": 2194357248.0,
-            "17": 2194357248.0,
-            "18": 2194357248.0,
-            "19": 2194357248.0,
-            "20": 2194357248.0,
-            "21": 2194357248.0,
-            "22": 2194357248.0,
-            "23": 2194357248.0,
-            "24": 2194357248.0,
-            "25": 2194357248.0,
-            "26": 2194357248.0,
-            "27": 2194357248.0,
-            "28": 2194357248.0,
-            "29": 2194357248.0,
-            "30": 2194357248.0,
-            "31": 2194357248.0,
-            "32": 2194357248.0,
-            "33": 2194357248.0,
-            "34": 2194357248.0,
-            "35": 2194357248.0,
-            "36": 2194357248.0,
-            "37": 2194357248.0,
-            "38": 2194357248.0,
-            "39": 2194357248.0,
-            "40": 2194357248.0,
-            "41": 2194357248.0,
-            "42": 2194357248.0,
-            "43": 2194357248.0,
-            "44": 2194357248.0,
-            "45": 2194357248.0,
-            "46": 2194357248.0,
-            "47": 2194357248.0,
-            "48": 2194357248.0,
-            "49": 2194357248.0,
-            "50": 2194357248.0,
-            "51": 2194357248.0,
-            "52": 2194357248.0,
-            "53": 2194357248.0,
-            "54": 2194357248.0,
-            "55": 2194357248.0,
-            "56": 2194357248.0,
-            "57": 2194357248.0,
-            "58": 2194357248.0,
-            "59": 2194357248.0,
-            "60": 2194357248.0,
-            "61": 2194357248.0,
-            "62": 2194357248.0,
-            "63": 2194357248.0,
-            "64": 2194357248.0,
-            "65": 2194357248.0,
-            "66": 2194357248.0,
-            "67": 2194357248.0,
-            "68": 2194357248.0,
-            "69": 2194357248.0,
-            "70": 2194357248.0,
-            "71": 2194357248.0,
-            "72": 2194357248.0,
-            "73": 2194357248.0,
-            "74": 2194357248.0,
-            "75": 2194357248.0,
-            "76": 2194357248.0,
-            "77": 2194357248.0,
-            "78": 2194357248.0,
-            "79": 2194357248.0,
-            "80": 2194357248.0,
-            "81": 2194357248.0,
-            "82": 2194357248.0,
-            "83": 2194357248.0,
-            "84": 2194357248.0,
-            "85": 2194357248.0,
-            "86": 2194357248.0,
-            "87": 2194357248.0,
-            "88": 2194357248.0,
-            "89": 2194357248.0,
-            "90": 2194357248.0,
-            "91": 2194357248.0,
-            "92": 2194357248.0,
-            "93": 2194357248.0,
-            "94": 2194357248.0,
-            "95": 2194357248.0,
-            "96": 2194357248.0,
-            "97": 2194357248.0,
-            "98": 2194357248.0,
-            "99": 2194357248.0,
-            "100": 2194357248.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 2443624960.0,
-            "2": 3375193600.0,
-            "3": 3375193600.0,
-            "4": 3375193600.0,
-            "5": 3375193600.0,
-            "6": 3375193600.0,
-            "7": 3375193600.0,
-            "8": 3375193600.0,
-            "9": 3375193600.0,
-            "10": 3375193600.0,
-            "11": 3375193600.0,
-            "12": 3375193600.0,
-            "13": 3375193600.0,
-            "14": 3375193600.0,
-            "15": 3375193600.0,
-            "16": 3375193600.0,
-            "17": 3375193600.0,
-            "18": 3375193600.0,
-            "19": 3375193600.0,
-            "20": 3375193600.0,
-            "21": 3375193600.0,
-            "22": 3375193600.0,
-            "23": 3375193600.0,
-            "24": 3375193600.0,
-            "25": 3375193600.0,
-            "26": 3375193600.0,
-            "27": 3375193600.0,
-            "28": 3375193600.0,
-            "29": 3375193600.0,
-            "30": 3375193600.0,
-            "31": 3375193600.0,
-            "32": 3375193600.0,
-            "33": 3375193600.0,
-            "34": 3375193600.0,
-            "35": 3375193600.0,
-            "36": 3375193600.0,
-            "37": 3375193600.0,
-            "38": 3375193600.0,
-            "39": 3375193600.0,
-            "40": 3375193600.0,
-            "41": 3375193600.0,
-            "42": 3375193600.0,
-            "43": 3375193600.0,
-            "44": 3375193600.0,
-            "45": 3375193600.0,
-            "46": 3375193600.0,
-            "47": 3375193600.0,
-            "48": 3375193600.0,
-            "49": 3375193600.0,
-            "50": 3375193600.0,
-            "51": 3375193600.0,
-            "52": 3375193600.0,
-            "53": 3375193600.0,
-            "54": 3375193600.0,
-            "55": 3375193600.0,
-            "56": 3375193600.0,
-            "57": 3375193600.0,
-            "58": 3375193600.0,
-            "59": 3375193600.0,
-            "60": 3375193600.0,
-            "61": 3375193600.0,
-            "62": 3375193600.0,
-            "63": 3375193600.0,
-            "64": 3375193600.0,
-            "65": 3375193600.0,
-            "66": 3375193600.0,
-            "67": 3375193600.0,
-            "68": 3375193600.0,
-            "69": 3375193600.0,
-            "70": 3375193600.0,
-            "71": 3375193600.0,
-            "72": 3375193600.0,
-            "73": 3375193600.0,
-            "74": 3375193600.0,
-            "75": 3375193600.0,
-            "76": 3375193600.0,
-            "77": 3375193600.0,
-            "78": 3375193600.0,
-            "79": 3375193600.0,
-            "80": 3375193600.0,
-            "81": 3375193600.0,
-            "82": 3375193600.0,
-            "83": 3375193600.0,
-            "84": 3375193600.0,
-            "85": 3375193600.0,
-            "86": 3375193600.0,
-            "87": 3375193600.0,
-            "88": 3375193600.0,
-            "89": 3375193600.0,
-            "90": 3375193600.0,
-            "91": 3375193600.0,
-            "92": 3375193600.0,
-            "93": 3375193600.0,
-            "94": 3375193600.0,
-            "95": 3375193600.0,
-            "96": 3375193600.0,
-            "97": 3375193600.0,
-            "98": 3375193600.0,
-            "99": 3375193600.0,
-            "100": 3375193600.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 9.37156,
-            "2": 0.38887,
-            "3": 0.36602,
-            "4": 0.35866,
-            "5": 0.36165,
-            "6": 0.37465,
-            "7": 0.35731,
-            "8": 0.3641,
-            "9": 0.35988,
-            "10": 0.35622,
-            "11": 0.36397,
-            "12": 0.36059,
-            "13": 0.35322,
-            "14": 0.36378,
-            "15": 0.35044,
-            "16": 0.351,
-            "17": 0.3614,
-            "18": 0.3499,
-            "19": 0.3502,
-            "20": 0.35899,
-            "21": 0.34832,
-            "22": 0.35463,
-            "23": 0.36264,
-            "24": 0.3582,
-            "25": 0.68028,
-            "26": 0.35807,
-            "27": 0.36086,
-            "28": 0.3546,
-            "29": 0.35008,
-            "30": 0.36639,
-            "31": 0.35917,
-            "32": 0.35093,
-            "33": 0.42545,
-            "34": 0.36458,
-            "35": 0.36139,
-            "36": 0.66018,
-            "37": 0.36179,
-            "38": 0.35264,
-            "39": 0.35347,
-            "40": 0.35947,
-            "41": 0.65933,
-            "42": 0.36488,
-            "43": 0.35596,
-            "44": 0.35639,
-            "45": 0.35817,
-            "46": 0.35914,
-            "47": 0.65482,
-            "48": 0.35543,
-            "49": 0.3548,
-            "50": 0.36559,
-            "51": 0.3585,
-            "52": 0.35668,
-            "53": 0.3592,
-            "54": 0.35503,
-            "55": 0.36108,
-            "56": 0.74128,
-            "57": 0.36657,
-            "58": 0.36018,
-            "59": 0.35608,
-            "60": 0.36593,
-            "61": 0.35388,
-            "62": 0.35617,
-            "63": 0.63145,
-            "64": 0.35737,
-            "65": 0.36509,
-            "66": 0.35793,
-            "67": 0.36215,
-            "68": 0.35502,
-            "69": 0.35608,
-            "70": 0.36406,
-            "71": 0.35939,
-            "72": 0.36012,
-            "73": 0.36102,
-            "74": 0.35997,
-            "75": 0.35821,
-            "76": 0.36372,
-            "77": 0.36015,
-            "78": 0.36089,
-            "79": 0.3626,
-            "80": 0.36632,
-            "81": 0.36481,
-            "82": 0.38444,
-            "83": 0.36154,
-            "84": 0.37204,
-            "85": 0.35784,
-            "86": 0.35591,
-            "87": 0.36678,
-            "88": 0.73353,
-            "89": 0.36867,
-            "90": 0.36231,
-            "91": 0.36826,
-            "92": 0.35945,
-            "93": 0.36394,
-            "94": 0.43835,
-            "95": 0.36152,
-            "96": 0.36154,
-            "97": 0.35778,
-            "98": 0.35857,
-            "99": 0.36061,
-            "100": 0.35857
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgx_h100.json
index a2d102b7a2b..30c495148f4 100644
--- a/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgx_h100.json
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 9.77057,
-            "2": 0.47803,
-            "3": 0.39521,
-            "4": 0.3896,
-            "5": 0.40677,
-            "6": 0.40092,
-            "7": 0.37896,
-            "8": 0.41825,
-            "9": 0.38419,
-            "10": 0.38253,
-            "11": 0.388,
-            "12": 0.37925,
-            "13": 0.38239,
-            "14": 0.38417,
-            "15": 0.38038,
-            "16": 0.38563,
-            "17": 0.37955,
-            "18": 0.37924,
-            "19": 0.38589,
-            "20": 0.38224,
-            "21": 0.38465,
-            "22": 0.39351,
-            "23": 0.39472,
-            "24": 0.41255,
-            "25": 0.37965,
-            "26": 0.38355,
-            "27": 0.38309,
-            "28": 0.38253,
-            "29": 0.38831,
-            "30": 0.39434,
-            "31": 0.38798,
-            "32": 0.39078,
-            "33": 0.38911,
-            "34": 0.39627,
-            "35": 0.39394,
-            "36": 0.38355,
-            "37": 0.39453,
-            "38": 0.39933,
-            "39": 0.77019,
-            "40": 0.39504,
-            "41": 0.39035,
-            "42": 0.38272,
-            "43": 0.69367,
-            "44": 0.38983,
-            "45": 0.38622,
-            "46": 0.39091,
-            "47": 0.38234,
-            "48": 0.40833,
-            "49": 0.39525,
-            "50": 0.39478,
-            "51": 0.38185,
-            "52": 0.72146,
-            "53": 0.71311,
-            "54": 0.39457,
-            "55": 0.38277,
-            "56": 0.38969,
-            "57": 0.38363,
-            "58": 0.39928,
-            "59": 0.38579,
-            "60": 0.74396,
-            "61": 0.38508,
-            "62": 0.70202,
-            "63": 0.38295,
-            "64": 0.38027,
-            "65": 0.38758,
-            "66": 0.38184,
-            "67": 0.38386,
-            "68": 0.39654,
-            "69": 0.4087,
-            "70": 0.38668,
-            "71": 0.38146,
-            "72": 0.3836,
-            "73": 0.38965,
-            "74": 0.38207,
-            "75": 0.39256,
-            "76": 0.38363,
-            "77": 0.38092,
-            "78": 0.39131,
-            "79": 0.38231,
-            "80": 0.38962,
-            "81": 0.39663,
-            "82": 0.3956,
-            "83": 0.38416,
-            "84": 0.38159,
-            "85": 0.40841,
-            "86": 0.40201,
-            "87": 0.37934,
-            "88": 0.38888,
-            "89": 0.38181,
-            "90": 0.38763,
-            "91": 0.38558,
-            "92": 0.3862,
-            "93": 0.39397,
-            "94": 0.39231,
-            "95": 0.38616,
-            "96": 0.39411,
-            "97": 0.39063,
-            "98": 0.39664,
-            "99": 0.39039,
-            "100": 0.38619
+            "1": 9.46115,
+            "2": 0.46835,
+            "3": 0.38416,
+            "4": 0.37391,
+            "5": 0.37703,
+            "6": 0.38173,
+            "7": 0.37456,
+            "8": 0.37696,
+            "9": 0.37338,
+            "10": 0.37687,
+            "11": 0.38251,
+            "12": 0.38037,
+            "13": 0.37996,
+            "14": 0.38264,
+            "15": 0.37959,
+            "16": 0.38232,
+            "17": 0.37852,
+            "18": 0.37735,
+            "19": 0.3812,
+            "20": 0.37493,
+            "21": 0.38227,
+            "22": 0.38196,
+            "23": 0.37745,
+            "24": 0.3782,
+            "25": 0.37181,
+            "26": 0.37935,
+            "27": 0.38539,
+            "28": 0.38393,
+            "29": 0.3826,
+            "30": 0.37839,
+            "31": 0.38438,
+            "32": 0.64523,
+            "33": 0.37971,
+            "34": 0.38082,
+            "35": 0.74313,
+            "36": 0.3848,
+            "37": 0.38169,
+            "38": 0.38154,
+            "39": 0.40495,
+            "40": 0.40243,
+            "41": 0.37972,
+            "42": 0.37792,
+            "43": 0.38261,
+            "44": 0.37607,
+            "45": 0.37463,
+            "46": 0.37881,
+            "47": 0.37293,
+            "48": 0.37592,
+            "49": 0.659,
+            "50": 0.37783,
+            "51": 0.38158,
+            "52": 0.73901,
+            "53": 0.37684,
+            "54": 0.37707,
+            "55": 0.42405,
+            "56": 0.38184,
+            "57": 0.37936,
+            "58": 0.37539,
+            "59": 0.37591,
+            "60": 0.72267,
+            "61": 0.37815,
+            "62": 0.77277,
+            "63": 0.38815,
+            "64": 0.3807,
+            "65": 0.37848,
+            "66": 0.38143,
+            "67": 0.37999,
+            "68": 0.38158,
+            "69": 0.38427,
+            "70": 0.37479,
+            "71": 0.38252,
+            "72": 0.38036,
+            "73": 0.38116,
+            "74": 0.38336,
+            "75": 0.3771,
+            "76": 0.37876,
+            "77": 0.38102,
+            "78": 0.37864,
+            "79": 0.38095,
+            "80": 0.37954,
+            "81": 0.37575,
+            "82": 0.38084,
+            "83": 0.38192,
+            "84": 0.38267,
+            "85": 0.38765,
+            "86": 0.38467,
+            "87": 0.3817,
+            "88": 0.37395,
+            "89": 0.37751,
+            "90": 0.38076,
+            "91": 0.37565,
+            "92": 0.38237,
+            "93": 0.37738,
+            "94": 0.37726,
+            "95": 0.38237,
+            "96": 0.38018,
+            "97": 0.38525,
+            "98": 0.40815,
+            "99": 0.38117,
+            "100": 0.38201
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 3be9df673c7..00000000000
--- a/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.36406,
-            "2": 10.37672,
-            "3": 9.84285,
-            "4": 9.61995,
-            "5": 9.4049,
-            "6": 9.42891,
-            "7": 9.31288,
-            "8": 9.27047,
-            "9": 9.10629,
-            "10": 9.03569,
-            "11": 8.86423,
-            "12": 8.80988,
-            "13": 8.8329,
-            "14": 8.69011,
-            "15": 8.66187,
-            "16": 8.54768,
-            "17": 8.50183,
-            "18": 8.42362,
-            "19": 8.38674,
-            "20": 8.27993,
-            "21": 8.26472,
-            "22": 8.15738,
-            "23": 8.11148,
-            "24": 8.14234,
-            "25": 7.98343,
-            "26": 8.10636,
-            "27": 7.88853,
-            "28": 7.97024,
-            "29": 7.8121,
-            "30": 7.87698,
-            "31": 7.82339,
-            "32": 7.70086,
-            "33": 7.80317,
-            "34": 7.56843,
-            "35": 7.67276,
-            "36": 7.54942,
-            "37": 7.475,
-            "38": 7.51068,
-            "39": 7.49979,
-            "40": 7.51131,
-            "41": 7.41252,
-            "42": 7.38333,
-            "43": 7.4414,
-            "44": 7.39857,
-            "45": 7.37352,
-            "46": 7.28824,
-            "47": 7.4683,
-            "48": 7.29457,
-            "49": 7.35181,
-            "50": 7.17223,
-            "51": 7.37216,
-            "52": 7.14588,
-            "53": 7.12384,
-            "54": 7.23984,
-            "55": 7.15454,
-            "56": 7.23308,
-            "57": 7.33501,
-            "58": 7.01226,
-            "59": 7.12063,
-            "60": 7.15043,
-            "61": 7.11076,
-            "62": 7.26458,
-            "63": 7.1544,
-            "64": 7.08651,
-            "65": 6.99077,
-            "66": 7.05503,
-            "67": 7.04463,
-            "68": 7.136,
-            "69": 7.03404,
-            "70": 7.05994,
-            "71": 6.90146,
-            "72": 6.99845,
-            "73": 6.97783,
-            "74": 6.92205,
-            "75": 7.06268,
-            "76": 6.95612,
-            "77": 7.08838,
-            "78": 7.02608,
-            "79": 6.85354,
-            "80": 6.93543,
-            "81": 6.97396,
-            "82": 7.05854,
-            "83": 6.98003,
-            "84": 7.00602,
-            "85": 6.84771,
-            "86": 7.04197,
-            "87": 6.97366,
-            "88": 6.90817,
-            "89": 6.80902,
-            "90": 7.23999,
-            "91": 6.70221,
-            "92": 7.0543,
-            "93": 6.89332,
-            "94": 7.05002,
-            "95": 6.84547,
-            "96": 6.96202,
-            "97": 6.95355,
-            "98": 6.8731,
-            "99": 6.99831,
-            "100": 6.98508
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 43317.0,
-            "2": 44065.0,
-            "3": 44730.0,
-            "4": 42374.0,
-            "5": 45387.0,
-            "6": 40937.0,
-            "7": 43166.0,
-            "8": 45433.0,
-            "9": 42439.0,
-            "10": 45374.0,
-            "11": 43947.0,
-            "12": 44584.0,
-            "13": 43908.0,
-            "14": 46205.0,
-            "15": 43901.0,
-            "16": 41607.0,
-            "17": 43831.0,
-            "18": 44698.0,
-            "19": 42543.0,
-            "20": 44759.0,
-            "21": 44734.0,
-            "22": 41850.0,
-            "23": 45416.0,
-            "24": 43069.0,
-            "25": 42442.0,
-            "26": 43923.0,
-            "27": 46212.0,
-            "28": 46362.0,
-            "29": 46133.0,
-            "30": 43978.0,
-            "31": 41220.0,
-            "32": 43307.0,
-            "33": 45440.0,
-            "34": 43284.0,
-            "35": 43248.0,
-            "36": 42437.0,
-            "37": 40066.0,
-            "38": 42483.0,
-            "39": 44702.0,
-            "40": 43230.0,
-            "41": 44672.0,
-            "42": 43202.0,
-            "43": 45459.0,
-            "44": 44609.0,
-            "45": 43265.0,
-            "46": 43915.0,
-            "47": 42366.0,
-            "48": 44650.0,
-            "49": 43139.0,
-            "50": 43399.0,
-            "51": 41159.0,
-            "52": 43818.0,
-            "53": 43924.0,
-            "54": 41952.0,
-            "55": 43866.0,
-            "56": 43239.0,
-            "57": 42540.0,
-            "58": 43856.0,
-            "59": 44589.0,
-            "60": 41152.0,
-            "61": 39709.0,
-            "62": 44822.0,
-            "63": 44663.0,
-            "64": 45372.0,
-            "65": 44676.0,
-            "66": 45345.0,
-            "67": 43130.0,
-            "68": 42567.0,
-            "69": 43812.0,
-            "70": 45538.0,
-            "71": 43282.0,
-            "72": 44765.0,
-            "73": 45354.0,
-            "74": 42517.0,
-            "75": 44666.0,
-            "76": 43904.0,
-            "77": 42041.0,
-            "78": 40320.0,
-            "79": 38914.0,
-            "80": 41081.0,
-            "81": 45333.0,
-            "82": 43195.0,
-            "83": 38489.0,
-            "84": 42436.0,
-            "85": 43978.0,
-            "86": 45680.0,
-            "87": 40832.0,
-            "88": 41797.0,
-            "89": 41083.0,
-            "90": 44676.0,
-            "91": 46190.0,
-            "92": 41837.0,
-            "93": 43234.0,
-            "94": 39504.0,
-            "95": 44067.0,
-            "96": 44684.0,
-            "97": 45419.0,
-            "98": 41854.0,
-            "99": 45431.0,
-            "100": 42479.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 2195405824.0,
-            "2": 2195405824.0,
-            "3": 2195405824.0,
-            "4": 2195405824.0,
-            "5": 2195405824.0,
-            "6": 2195405824.0,
-            "7": 2195405824.0,
-            "8": 2195405824.0,
-            "9": 2195405824.0,
-            "10": 2195405824.0,
-            "11": 2195405824.0,
-            "12": 2195405824.0,
-            "13": 2195405824.0,
-            "14": 2195405824.0,
-            "15": 2195405824.0,
-            "16": 2195405824.0,
-            "17": 2195405824.0,
-            "18": 2195405824.0,
-            "19": 2195405824.0,
-            "20": 2195405824.0,
-            "21": 2195405824.0,
-            "22": 2195405824.0,
-            "23": 2195405824.0,
-            "24": 2195405824.0,
-            "25": 2195405824.0,
-            "26": 2195405824.0,
-            "27": 2195405824.0,
-            "28": 2195405824.0,
-            "29": 2195405824.0,
-            "30": 2195405824.0,
-            "31": 2195405824.0,
-            "32": 2195405824.0,
-            "33": 2195405824.0,
-            "34": 2195405824.0,
-            "35": 2195405824.0,
-            "36": 2195405824.0,
-            "37": 2195405824.0,
-            "38": 2195405824.0,
-            "39": 2195405824.0,
-            "40": 2195405824.0,
-            "41": 2195405824.0,
-            "42": 2195405824.0,
-            "43": 2195405824.0,
-            "44": 2195405824.0,
-            "45": 2195405824.0,
-            "46": 2195405824.0,
-            "47": 2195405824.0,
-            "48": 2195405824.0,
-            "49": 2195405824.0,
-            "50": 2195405824.0,
-            "51": 2195405824.0,
-            "52": 2195405824.0,
-            "53": 2195405824.0,
-            "54": 2195405824.0,
-            "55": 2195405824.0,
-            "56": 2195405824.0,
-            "57": 2195405824.0,
-            "58": 2195405824.0,
-            "59": 2195405824.0,
-            "60": 2195405824.0,
-            "61": 2195405824.0,
-            "62": 2195405824.0,
-            "63": 2195405824.0,
-            "64": 2195405824.0,
-            "65": 2195405824.0,
-            "66": 2195405824.0,
-            "67": 2195405824.0,
-            "68": 2195405824.0,
-            "69": 2195405824.0,
-            "70": 2195405824.0,
-            "71": 2195405824.0,
-            "72": 2195405824.0,
-            "73": 2195405824.0,
-            "74": 2195405824.0,
-            "75": 2195405824.0,
-            "76": 2195405824.0,
-            "77": 2195405824.0,
-            "78": 2195405824.0,
-            "79": 2195405824.0,
-            "80": 2195405824.0,
-            "81": 2195405824.0,
-            "82": 2195405824.0,
-            "83": 2195405824.0,
-            "84": 2195405824.0,
-            "85": 2195405824.0,
-            "86": 2195405824.0,
-            "87": 2195405824.0,
-            "88": 2195405824.0,
-            "89": 2195405824.0,
-            "90": 2195405824.0,
-            "91": 2195405824.0,
-            "92": 2195405824.0,
-            "93": 2195405824.0,
-            "94": 2195405824.0,
-            "95": 2195405824.0,
-            "96": 2195405824.0,
-            "97": 2195405824.0,
-            "98": 2195405824.0,
-            "99": 2195405824.0,
-            "100": 2195405824.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 2302114304.0,
-            "2": 3236697600.0,
-            "3": 3236697600.0,
-            "4": 3236697600.0,
-            "5": 3236697600.0,
-            "6": 3236697600.0,
-            "7": 3236697600.0,
-            "8": 3236697600.0,
-            "9": 3236697600.0,
-            "10": 3236697600.0,
-            "11": 3236697600.0,
-            "12": 3236697600.0,
-            "13": 3236697600.0,
-            "14": 3236697600.0,
-            "15": 3236697600.0,
-            "16": 3236697600.0,
-            "17": 3236697600.0,
-            "18": 3236697600.0,
-            "19": 3236697600.0,
-            "20": 3236697600.0,
-            "21": 3236697600.0,
-            "22": 3236697600.0,
-            "23": 3236697600.0,
-            "24": 3236697600.0,
-            "25": 3236697600.0,
-            "26": 3236697600.0,
-            "27": 3236697600.0,
-            "28": 3236697600.0,
-            "29": 3236697600.0,
-            "30": 3236697600.0,
-            "31": 3236697600.0,
-            "32": 3236697600.0,
-            "33": 3236697600.0,
-            "34": 3236697600.0,
-            "35": 3236697600.0,
-            "36": 3236697600.0,
-            "37": 3236697600.0,
-            "38": 3236697600.0,
-            "39": 3236697600.0,
-            "40": 3236697600.0,
-            "41": 3236697600.0,
-            "42": 3236697600.0,
-            "43": 3236697600.0,
-            "44": 3236697600.0,
-            "45": 3236697600.0,
-            "46": 3236697600.0,
-            "47": 3236697600.0,
-            "48": 3236697600.0,
-            "49": 3236697600.0,
-            "50": 3236697600.0,
-            "51": 3236697600.0,
-            "52": 3236697600.0,
-            "53": 3236697600.0,
-            "54": 3236697600.0,
-            "55": 3236697600.0,
-            "56": 3236697600.0,
-            "57": 3236697600.0,
-            "58": 3236697600.0,
-            "59": 3236697600.0,
-            "60": 3236697600.0,
-            "61": 3236697600.0,
-            "62": 3236697600.0,
-            "63": 3236697600.0,
-            "64": 3236697600.0,
-            "65": 3236697600.0,
-            "66": 3236697600.0,
-            "67": 3236697600.0,
-            "68": 3236697600.0,
-            "69": 3236697600.0,
-            "70": 3236697600.0,
-            "71": 3236697600.0,
-            "72": 3236697600.0,
-            "73": 3236697600.0,
-            "74": 3236697600.0,
-            "75": 3236697600.0,
-            "76": 3236697600.0,
-            "77": 3236697600.0,
-            "78": 3236697600.0,
-            "79": 3236697600.0,
-            "80": 3236697600.0,
-            "81": 3236697600.0,
-            "82": 3236697600.0,
-            "83": 3236697600.0,
-            "84": 3236697600.0,
-            "85": 3236697600.0,
-            "86": 3236697600.0,
-            "87": 3236697600.0,
-            "88": 3236697600.0,
-            "89": 3236697600.0,
-            "90": 3236697600.0,
-            "91": 3236697600.0,
-            "92": 3236697600.0,
-            "93": 3236697600.0,
-            "94": 3236697600.0,
-            "95": 3236697600.0,
-            "96": 3236697600.0,
-            "97": 3236697600.0,
-            "98": 3236697600.0,
-            "99": 3236697600.0,
-            "100": 3236697600.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 9.39562,
-            "2": 0.44691,
-            "3": 0.3459,
-            "4": 0.34935,
-            "5": 0.34659,
-            "6": 0.35056,
-            "7": 0.3495,
-            "8": 0.35113,
-            "9": 0.34945,
-            "10": 0.35049,
-            "11": 0.35158,
-            "12": 0.34969,
-            "13": 0.34855,
-            "14": 0.35082,
-            "15": 0.35148,
-            "16": 0.35346,
-            "17": 0.35991,
-            "18": 0.35857,
-            "19": 0.35651,
-            "20": 0.35734,
-            "21": 0.36107,
-            "22": 0.35291,
-            "23": 0.34878,
-            "24": 0.34924,
-            "25": 0.34966,
-            "26": 0.35397,
-            "27": 0.35048,
-            "28": 0.39139,
-            "29": 0.35978,
-            "30": 0.35049,
-            "31": 0.35472,
-            "32": 0.34768,
-            "33": 0.3681,
-            "34": 0.37086,
-            "35": 0.35372,
-            "36": 0.35661,
-            "37": 0.96115,
-            "38": 0.69943,
-            "39": 0.35304,
-            "40": 0.39899,
-            "41": 0.3519,
-            "42": 0.35367,
-            "43": 0.35089,
-            "44": 0.35181,
-            "45": 0.85196,
-            "46": 0.353,
-            "47": 0.35065,
-            "48": 0.34986,
-            "49": 0.34987,
-            "50": 0.35017,
-            "51": 0.35243,
-            "52": 0.34764,
-            "53": 0.68786,
-            "54": 0.35071,
-            "55": 0.35502,
-            "56": 0.36533,
-            "57": 0.34855,
-            "58": 0.35098,
-            "59": 0.34751,
-            "60": 0.66551,
-            "61": 0.35376,
-            "62": 0.65487,
-            "63": 0.36102,
-            "64": 0.35122,
-            "65": 0.35654,
-            "66": 0.36028,
-            "67": 0.36743,
-            "68": 0.36013,
-            "69": 0.36151,
-            "70": 0.36618,
-            "71": 0.34619,
-            "72": 0.36448,
-            "73": 0.35934,
-            "74": 0.36235,
-            "75": 0.35742,
-            "76": 0.35529,
-            "77": 0.36633,
-            "78": 0.35551,
-            "79": 0.35185,
-            "80": 0.34938,
-            "81": 0.34965,
-            "82": 0.35454,
-            "83": 0.34716,
-            "84": 0.36305,
-            "85": 0.35771,
-            "86": 0.34829,
-            "87": 0.35483,
-            "88": 0.34874,
-            "89": 0.34898,
-            "90": 0.35072,
-            "91": 0.34969,
-            "92": 0.3539,
-            "93": 0.34627,
-            "94": 0.34706,
-            "95": 0.34587,
-            "96": 0.34804,
-            "97": 0.34773,
-            "98": 0.36076,
-            "99": 0.38382,
-            "100": 0.35651
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index 30c495148f4..00000000000
--- a/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.36406,
-            "2": 10.37672,
-            "3": 9.84285,
-            "4": 9.61995,
-            "5": 9.4049,
-            "6": 9.42891,
-            "7": 9.31288,
-            "8": 9.27047,
-            "9": 9.10629,
-            "10": 9.03569,
-            "11": 8.86423,
-            "12": 8.80988,
-            "13": 8.8329,
-            "14": 8.69011,
-            "15": 8.66187,
-            "16": 8.54768,
-            "17": 8.50183,
-            "18": 8.42362,
-            "19": 8.38674,
-            "20": 8.27993,
-            "21": 8.26472,
-            "22": 8.15738,
-            "23": 8.11148,
-            "24": 8.14234,
-            "25": 7.98343,
-            "26": 8.10636,
-            "27": 7.88853,
-            "28": 7.97024,
-            "29": 7.8121,
-            "30": 7.87698,
-            "31": 7.82339,
-            "32": 7.70086,
-            "33": 7.80317,
-            "34": 7.56843,
-            "35": 7.67276,
-            "36": 7.54942,
-            "37": 7.475,
-            "38": 7.51068,
-            "39": 7.49979,
-            "40": 7.51131,
-            "41": 7.41252,
-            "42": 7.38333,
-            "43": 7.4414,
-            "44": 7.39857,
-            "45": 7.37352,
-            "46": 7.28824,
-            "47": 7.4683,
-            "48": 7.29457,
-            "49": 7.35181,
-            "50": 7.17223,
-            "51": 7.37216,
-            "52": 7.14588,
-            "53": 7.12384,
-            "54": 7.23984,
-            "55": 7.15454,
-            "56": 7.23308,
-            "57": 7.33501,
-            "58": 7.01226,
-            "59": 7.12063,
-            "60": 7.15043,
-            "61": 7.11076,
-            "62": 7.26458,
-            "63": 7.1544,
-            "64": 7.08651,
-            "65": 6.99077,
-            "66": 7.05503,
-            "67": 7.04463,
-            "68": 7.136,
-            "69": 7.03404,
-            "70": 7.05994,
-            "71": 6.90146,
-            "72": 6.99845,
-            "73": 6.97783,
-            "74": 6.92205,
-            "75": 7.06268,
-            "76": 6.95612,
-            "77": 7.08838,
-            "78": 7.02608,
-            "79": 6.85354,
-            "80": 6.93543,
-            "81": 6.97396,
-            "82": 7.05854,
-            "83": 6.98003,
-            "84": 7.00602,
-            "85": 6.84771,
-            "86": 7.04197,
-            "87": 6.97366,
-            "88": 6.90817,
-            "89": 6.80902,
-            "90": 7.23999,
-            "91": 6.70221,
-            "92": 7.0543,
-            "93": 6.89332,
-            "94": 7.05002,
-            "95": 6.84547,
-            "96": 6.96202,
-            "97": 6.95355,
-            "98": 6.8731,
-            "99": 6.99831,
-            "100": 6.98508
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 43317.0,
-            "2": 44065.0,
-            "3": 44730.0,
-            "4": 42374.0,
-            "5": 45387.0,
-            "6": 40937.0,
-            "7": 43166.0,
-            "8": 45433.0,
-            "9": 42439.0,
-            "10": 45374.0,
-            "11": 43947.0,
-            "12": 44584.0,
-            "13": 43908.0,
-            "14": 46205.0,
-            "15": 43901.0,
-            "16": 41607.0,
-            "17": 43831.0,
-            "18": 44698.0,
-            "19": 42543.0,
-            "20": 44759.0,
-            "21": 44734.0,
-            "22": 41850.0,
-            "23": 45416.0,
-            "24": 43069.0,
-            "25": 42442.0,
-            "26": 43923.0,
-            "27": 46212.0,
-            "28": 46362.0,
-            "29": 46133.0,
-            "30": 43978.0,
-            "31": 41220.0,
-            "32": 43307.0,
-            "33": 45440.0,
-            "34": 43284.0,
-            "35": 43248.0,
-            "36": 42437.0,
-            "37": 40066.0,
-            "38": 42483.0,
-            "39": 44702.0,
-            "40": 43230.0,
-            "41": 44672.0,
-            "42": 43202.0,
-            "43": 45459.0,
-            "44": 44609.0,
-            "45": 43265.0,
-            "46": 43915.0,
-            "47": 42366.0,
-            "48": 44650.0,
-            "49": 43139.0,
-            "50": 43399.0,
-            "51": 41159.0,
-            "52": 43818.0,
-            "53": 43924.0,
-            "54": 41952.0,
-            "55": 43866.0,
-            "56": 43239.0,
-            "57": 42540.0,
-            "58": 43856.0,
-            "59": 44589.0,
-            "60": 41152.0,
-            "61": 39709.0,
-            "62": 44822.0,
-            "63": 44663.0,
-            "64": 45372.0,
-            "65": 44676.0,
-            "66": 45345.0,
-            "67": 43130.0,
-            "68": 42567.0,
-            "69": 43812.0,
-            "70": 45538.0,
-            "71": 43282.0,
-            "72": 44765.0,
-            "73": 45354.0,
-            "74": 42517.0,
-            "75": 44666.0,
-            "76": 43904.0,
-            "77": 42041.0,
-            "78": 40320.0,
-            "79": 38914.0,
-            "80": 41081.0,
-            "81": 45333.0,
-            "82": 43195.0,
-            "83": 38489.0,
-            "84": 42436.0,
-            "85": 43978.0,
-            "86": 45680.0,
-            "87": 40832.0,
-            "88": 41797.0,
-            "89": 41083.0,
-            "90": 44676.0,
-            "91": 46190.0,
-            "92": 41837.0,
-            "93": 43234.0,
-            "94": 39504.0,
-            "95": 44067.0,
-            "96": 44684.0,
-            "97": 45419.0,
-            "98": 41854.0,
-            "99": 45431.0,
-            "100": 42479.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 2195405824.0,
-            "2": 2195405824.0,
-            "3": 2195405824.0,
-            "4": 2195405824.0,
-            "5": 2195405824.0,
-            "6": 2195405824.0,
-            "7": 2195405824.0,
-            "8": 2195405824.0,
-            "9": 2195405824.0,
-            "10": 2195405824.0,
-            "11": 2195405824.0,
-            "12": 2195405824.0,
-            "13": 2195405824.0,
-            "14": 2195405824.0,
-            "15": 2195405824.0,
-            "16": 2195405824.0,
-            "17": 2195405824.0,
-            "18": 2195405824.0,
-            "19": 2195405824.0,
-            "20": 2195405824.0,
-            "21": 2195405824.0,
-            "22": 2195405824.0,
-            "23": 2195405824.0,
-            "24": 2195405824.0,
-            "25": 2195405824.0,
-            "26": 2195405824.0,
-            "27": 2195405824.0,
-            "28": 2195405824.0,
-            "29": 2195405824.0,
-            "30": 2195405824.0,
-            "31": 2195405824.0,
-            "32": 2195405824.0,
-            "33": 2195405824.0,
-            "34": 2195405824.0,
-            "35": 2195405824.0,
-            "36": 2195405824.0,
-            "37": 2195405824.0,
-            "38": 2195405824.0,
-            "39": 2195405824.0,
-            "40": 2195405824.0,
-            "41": 2195405824.0,
-            "42": 2195405824.0,
-            "43": 2195405824.0,
-            "44": 2195405824.0,
-            "45": 2195405824.0,
-            "46": 2195405824.0,
-            "47": 2195405824.0,
-            "48": 2195405824.0,
-            "49": 2195405824.0,
-            "50": 2195405824.0,
-            "51": 2195405824.0,
-            "52": 2195405824.0,
-            "53": 2195405824.0,
-            "54": 2195405824.0,
-            "55": 2195405824.0,
-            "56": 2195405824.0,
-            "57": 2195405824.0,
-            "58": 2195405824.0,
-            "59": 2195405824.0,
-            "60": 2195405824.0,
-            "61": 2195405824.0,
-            "62": 2195405824.0,
-            "63": 2195405824.0,
-            "64": 2195405824.0,
-            "65": 2195405824.0,
-            "66": 2195405824.0,
-            "67": 2195405824.0,
-            "68": 2195405824.0,
-            "69": 2195405824.0,
-            "70": 2195405824.0,
-            "71": 2195405824.0,
-            "72": 2195405824.0,
-            "73": 2195405824.0,
-            "74": 2195405824.0,
-            "75": 2195405824.0,
-            "76": 2195405824.0,
-            "77": 2195405824.0,
-            "78": 2195405824.0,
-            "79": 2195405824.0,
-            "80": 2195405824.0,
-            "81": 2195405824.0,
-            "82": 2195405824.0,
-            "83": 2195405824.0,
-            "84": 2195405824.0,
-            "85": 2195405824.0,
-            "86": 2195405824.0,
-            "87": 2195405824.0,
-            "88": 2195405824.0,
-            "89": 2195405824.0,
-            "90": 2195405824.0,
-            "91": 2195405824.0,
-            "92": 2195405824.0,
-            "93": 2195405824.0,
-            "94": 2195405824.0,
-            "95": 2195405824.0,
-            "96": 2195405824.0,
-            "97": 2195405824.0,
-            "98": 2195405824.0,
-            "99": 2195405824.0,
-            "100": 2195405824.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 2302114304.0,
-            "2": 3236697600.0,
-            "3": 3236697600.0,
-            "4": 3236697600.0,
-            "5": 3236697600.0,
-            "6": 3236697600.0,
-            "7": 3236697600.0,
-            "8": 3236697600.0,
-            "9": 3236697600.0,
-            "10": 3236697600.0,
-            "11": 3236697600.0,
-            "12": 3236697600.0,
-            "13": 3236697600.0,
-            "14": 3236697600.0,
-            "15": 3236697600.0,
-            "16": 3236697600.0,
-            "17": 3236697600.0,
-            "18": 3236697600.0,
-            "19": 3236697600.0,
-            "20": 3236697600.0,
-            "21": 3236697600.0,
-            "22": 3236697600.0,
-            "23": 3236697600.0,
-            "24": 3236697600.0,
-            "25": 3236697600.0,
-            "26": 3236697600.0,
-            "27": 3236697600.0,
-            "28": 3236697600.0,
-            "29": 3236697600.0,
-            "30": 3236697600.0,
-            "31": 3236697600.0,
-            "32": 3236697600.0,
-            "33": 3236697600.0,
-            "34": 3236697600.0,
-            "35": 3236697600.0,
-            "36": 3236697600.0,
-            "37": 3236697600.0,
-            "38": 3236697600.0,
-            "39": 3236697600.0,
-            "40": 3236697600.0,
-            "41": 3236697600.0,
-            "42": 3236697600.0,
-            "43": 3236697600.0,
-            "44": 3236697600.0,
-            "45": 3236697600.0,
-            "46": 3236697600.0,
-            "47": 3236697600.0,
-            "48": 3236697600.0,
-            "49": 3236697600.0,
-            "50": 3236697600.0,
-            "51": 3236697600.0,
-            "52": 3236697600.0,
-            "53": 3236697600.0,
-            "54": 3236697600.0,
-            "55": 3236697600.0,
-            "56": 3236697600.0,
-            "57": 3236697600.0,
-            "58": 3236697600.0,
-            "59": 3236697600.0,
-            "60": 3236697600.0,
-            "61": 3236697600.0,
-            "62": 3236697600.0,
-            "63": 3236697600.0,
-            "64": 3236697600.0,
-            "65": 3236697600.0,
-            "66": 3236697600.0,
-            "67": 3236697600.0,
-            "68": 3236697600.0,
-            "69": 3236697600.0,
-            "70": 3236697600.0,
-            "71": 3236697600.0,
-            "72": 3236697600.0,
-            "73": 3236697600.0,
-            "74": 3236697600.0,
-            "75": 3236697600.0,
-            "76": 3236697600.0,
-            "77": 3236697600.0,
-            "78": 3236697600.0,
-            "79": 3236697600.0,
-            "80": 3236697600.0,
-            "81": 3236697600.0,
-            "82": 3236697600.0,
-            "83": 3236697600.0,
-            "84": 3236697600.0,
-            "85": 3236697600.0,
-            "86": 3236697600.0,
-            "87": 3236697600.0,
-            "88": 3236697600.0,
-            "89": 3236697600.0,
-            "90": 3236697600.0,
-            "91": 3236697600.0,
-            "92": 3236697600.0,
-            "93": 3236697600.0,
-            "94": 3236697600.0,
-            "95": 3236697600.0,
-            "96": 3236697600.0,
-            "97": 3236697600.0,
-            "98": 3236697600.0,
-            "99": 3236697600.0,
-            "100": 3236697600.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 9.46115,
-            "2": 0.46835,
-            "3": 0.38416,
-            "4": 0.37391,
-            "5": 0.37703,
-            "6": 0.38173,
-            "7": 0.37456,
-            "8": 0.37696,
-            "9": 0.37338,
-            "10": 0.37687,
-            "11": 0.38251,
-            "12": 0.38037,
-            "13": 0.37996,
-            "14": 0.38264,
-            "15": 0.37959,
-            "16": 0.38232,
-            "17": 0.37852,
-            "18": 0.37735,
-            "19": 0.3812,
-            "20": 0.37493,
-            "21": 0.38227,
-            "22": 0.38196,
-            "23": 0.37745,
-            "24": 0.3782,
-            "25": 0.37181,
-            "26": 0.37935,
-            "27": 0.38539,
-            "28": 0.38393,
-            "29": 0.3826,
-            "30": 0.37839,
-            "31": 0.38438,
-            "32": 0.64523,
-            "33": 0.37971,
-            "34": 0.38082,
-            "35": 0.74313,
-            "36": 0.3848,
-            "37": 0.38169,
-            "38": 0.38154,
-            "39": 0.40495,
-            "40": 0.40243,
-            "41": 0.37972,
-            "42": 0.37792,
-            "43": 0.38261,
-            "44": 0.37607,
-            "45": 0.37463,
-            "46": 0.37881,
-            "47": 0.37293,
-            "48": 0.37592,
-            "49": 0.659,
-            "50": 0.37783,
-            "51": 0.38158,
-            "52": 0.73901,
-            "53": 0.37684,
-            "54": 0.37707,
-            "55": 0.42405,
-            "56": 0.38184,
-            "57": 0.37936,
-            "58": 0.37539,
-            "59": 0.37591,
-            "60": 0.72267,
-            "61": 0.37815,
-            "62": 0.77277,
-            "63": 0.38815,
-            "64": 0.3807,
-            "65": 0.37848,
-            "66": 0.38143,
-            "67": 0.37999,
-            "68": 0.38158,
-            "69": 0.38427,
-            "70": 0.37479,
-            "71": 0.38252,
-            "72": 0.38036,
-            "73": 0.38116,
-            "74": 0.38336,
-            "75": 0.3771,
-            "76": 0.37876,
-            "77": 0.38102,
-            "78": 0.37864,
-            "79": 0.38095,
-            "80": 0.37954,
-            "81": 0.37575,
-            "82": 0.38084,
-            "83": 0.38192,
-            "84": 0.38267,
-            "85": 0.38765,
-            "86": 0.38467,
-            "87": 0.3817,
-            "88": 0.37395,
-            "89": 0.37751,
-            "90": 0.38076,
-            "91": 0.37565,
-            "92": 0.38237,
-            "93": 0.37738,
-            "94": 0.37726,
-            "95": 0.38237,
-            "96": 0.38018,
-            "97": 0.38525,
-            "98": 0.40815,
-            "99": 0.38117,
-            "100": 0.38201
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1/golden_values_dev_dgx_h100.json
index 8284e160db8..2400879202c 100644
--- a/tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1/golden_values_dev_dgx_h100.json
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 9.73359,
-            "2": 0.67213,
-            "3": 0.64227,
-            "4": 0.63808,
-            "5": 0.64274,
-            "6": 0.67444,
-            "7": 0.656,
-            "8": 0.64304,
-            "9": 0.64801,
-            "10": 0.6494,
-            "11": 0.64362,
-            "12": 0.64541,
-            "13": 0.64198,
-            "14": 0.64063,
-            "15": 0.64548,
-            "16": 0.64104,
+            "1": 9.54009,
+            "2": 0.66845,
+            "3": 0.64084,
+            "4": 0.64526,
+            "5": 0.64331,
+            "6": 0.65463,
+            "7": 0.63991,
+            "8": 0.63854,
+            "9": 0.64034,
+            "10": 0.63886,
+            "11": 0.63968,
+            "12": 0.64441,
+            "13": 0.63828,
+            "14": 0.64647,
+            "15": 0.64199,
+            "16": 0.63783,
             "17": 0.64359,
-            "18": 0.64166,
-            "19": 0.65505,
-            "20": 0.73426,
-            "21": 0.95714,
-            "22": 0.65,
-            "23": 0.63689,
-            "24": 0.6432,
-            "25": 0.96753,
-            "26": 1.01279,
-            "27": 0.6456,
-            "28": 0.64422,
-            "29": 0.64535,
-            "30": 1.02938,
-            "31": 0.64295,
-            "32": 0.64549,
-            "33": 1.10839,
-            "34": 0.66812,
-            "35": 0.64537,
-            "36": 0.64987,
-            "37": 0.64712,
-            "38": 0.6499,
-            "39": 0.64672,
-            "40": 0.64485,
-            "41": 0.64456,
-            "42": 0.64313,
-            "43": 0.64617,
-            "44": 0.64605,
-            "45": 0.64551,
-            "46": 0.64651,
-            "47": 0.70467,
-            "48": 0.67348,
-            "49": 0.65815,
-            "50": 0.65354,
-            "51": 0.64544,
-            "52": 0.6421,
-            "53": 0.64328,
-            "54": 0.64635,
-            "55": 0.6411,
-            "56": 0.64965,
-            "57": 0.64264,
-            "58": 0.64835,
-            "59": 0.64574,
-            "60": 0.64782,
-            "61": 0.64933,
-            "62": 0.65052,
-            "63": 0.64609,
-            "64": 0.68144,
-            "65": 0.64542,
-            "66": 0.64402,
-            "67": 0.64496,
-            "68": 0.64484,
-            "69": 0.64035,
-            "70": 0.64288,
-            "71": 0.64575,
-            "72": 0.69431,
-            "73": 0.64645,
-            "74": 0.64787,
-            "75": 0.65414,
-            "76": 0.64408,
-            "77": 0.64637,
-            "78": 0.64886,
-            "79": 0.66194,
-            "80": 0.65332,
-            "81": 0.65413,
-            "82": 0.65243,
-            "83": 0.64364,
-            "84": 0.64934,
-            "85": 0.6425,
-            "86": 0.96767,
-            "87": 0.92546,
-            "88": 0.6477,
-            "89": 0.64523,
-            "90": 0.64767,
-            "91": 0.65445,
-            "92": 0.64953,
-            "93": 0.65409,
-            "94": 0.69319,
-            "95": 0.65121,
-            "96": 0.64906,
-            "97": 0.65378,
-            "98": 0.6511,
-            "99": 0.65393,
-            "100": 0.65491
+            "18": 0.66439,
+            "19": 0.64718,
+            "20": 0.63999,
+            "21": 0.65677,
+            "22": 0.95191,
+            "23": 0.64765,
+            "24": 0.98317,
+            "25": 1.63221,
+            "26": 0.64915,
+            "27": 0.64318,
+            "28": 0.99238,
+            "29": 0.64655,
+            "30": 0.64693,
+            "31": 0.64241,
+            "32": 0.98967,
+            "33": 0.64928,
+            "34": 0.64294,
+            "35": 0.65629,
+            "36": 0.64358,
+            "37": 0.64814,
+            "38": 0.64325,
+            "39": 0.64509,
+            "40": 0.64733,
+            "41": 0.64693,
+            "42": 0.65392,
+            "43": 0.64721,
+            "44": 0.64487,
+            "45": 0.64766,
+            "46": 0.65872,
+            "47": 0.65402,
+            "48": 0.65486,
+            "49": 0.64433,
+            "50": 0.64917,
+            "51": 0.64197,
+            "52": 0.64647,
+            "53": 0.64656,
+            "54": 0.64815,
+            "55": 0.64573,
+            "56": 0.6539,
+            "57": 0.64582,
+            "58": 0.64668,
+            "59": 0.64431,
+            "60": 0.64957,
+            "61": 0.64703,
+            "62": 0.64671,
+            "63": 0.65979,
+            "64": 0.64599,
+            "65": 0.6466,
+            "66": 0.64754,
+            "67": 0.6471,
+            "68": 0.64756,
+            "69": 0.64621,
+            "70": 0.65906,
+            "71": 0.64587,
+            "72": 0.65969,
+            "73": 0.64476,
+            "74": 0.65304,
+            "75": 0.64786,
+            "76": 0.65077,
+            "77": 0.66405,
+            "78": 0.6472,
+            "79": 0.64431,
+            "80": 0.64472,
+            "81": 0.64407,
+            "82": 0.64326,
+            "83": 0.93161,
+            "84": 0.65573,
+            "85": 0.63999,
+            "86": 0.64393,
+            "87": 0.92064,
+            "88": 0.64399,
+            "89": 0.64306,
+            "90": 0.64439,
+            "91": 0.6414,
+            "92": 0.64504,
+            "93": 0.64858,
+            "94": 0.64041,
+            "95": 0.64497,
+            "96": 0.64493,
+            "97": 0.64508,
+            "98": 0.6444,
+            "99": 0.64587,
+            "100": 0.64886
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 4d566ec6c1b..00000000000
--- a/tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.34904,
-            "2": 10.34488,
-            "3": 9.79407,
-            "4": 9.59568,
-            "5": 9.42065,
-            "6": 9.41856,
-            "7": 9.28073,
-            "8": 9.18973,
-            "9": 9.06584,
-            "10": 9.00206,
-            "11": 8.81497,
-            "12": 8.78107,
-            "13": 8.82506,
-            "14": 8.6728,
-            "15": 8.6368,
-            "16": 8.51926,
-            "17": 8.45732,
-            "18": 8.37037,
-            "19": 8.36068,
-            "20": 8.25456,
-            "21": 8.24268,
-            "22": 8.13404,
-            "23": 8.06818,
-            "24": 8.11464,
-            "25": 7.95146,
-            "26": 8.08186,
-            "27": 7.86814,
-            "28": 7.94027,
-            "29": 7.77604,
-            "30": 7.84595,
-            "31": 7.81568,
-            "32": 7.65964,
-            "33": 7.77905,
-            "34": 7.53277,
-            "35": 7.6586,
-            "36": 7.51541,
-            "37": 7.44748,
-            "38": 7.4824,
-            "39": 7.46523,
-            "40": 7.49146,
-            "41": 7.40822,
-            "42": 7.35649,
-            "43": 7.43806,
-            "44": 7.35517,
-            "45": 7.35103,
-            "46": 7.27859,
-            "47": 7.44152,
-            "48": 7.2683,
-            "49": 7.32389,
-            "50": 7.14549,
-            "51": 7.36541,
-            "52": 7.12192,
-            "53": 7.09189,
-            "54": 7.22759,
-            "55": 7.13584,
-            "56": 7.20822,
-            "57": 7.31316,
-            "58": 6.99088,
-            "59": 7.09934,
-            "60": 7.12683,
-            "61": 7.1014,
-            "62": 7.23954,
-            "63": 7.14417,
-            "64": 7.06836,
-            "65": 6.98412,
-            "66": 7.03768,
-            "67": 7.02847,
-            "68": 7.1299,
-            "69": 7.01456,
-            "70": 7.04997,
-            "71": 6.89408,
-            "72": 6.98553,
-            "73": 6.96694,
-            "74": 6.90297,
-            "75": 7.0574,
-            "76": 6.9581,
-            "77": 7.06903,
-            "78": 7.02133,
-            "79": 6.8504,
-            "80": 6.91935,
-            "81": 6.95874,
-            "82": 7.04745,
-            "83": 6.98522,
-            "84": 6.99712,
-            "85": 6.83565,
-            "86": 7.04156,
-            "87": 6.96476,
-            "88": 6.89883,
-            "89": 6.80051,
-            "90": 7.22593,
-            "91": 6.70562,
-            "92": 7.0381,
-            "93": 6.88685,
-            "94": 7.03908,
-            "95": 6.84815,
-            "96": 6.95281,
-            "97": 6.94344,
-            "98": 6.86987,
-            "99": 6.99502,
-            "100": 6.96683
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 43274.0,
-            "2": 44071.0,
-            "3": 44760.0,
-            "4": 42385.0,
-            "5": 45378.0,
-            "6": 40938.0,
-            "7": 43150.0,
-            "8": 45450.0,
-            "9": 42428.0,
-            "10": 45373.0,
-            "11": 43974.0,
-            "12": 44591.0,
-            "13": 43897.0,
-            "14": 46204.0,
-            "15": 43924.0,
-            "16": 41613.0,
-            "17": 43852.0,
-            "18": 44669.0,
-            "19": 42579.0,
-            "20": 44769.0,
-            "21": 44761.0,
-            "22": 41873.0,
-            "23": 45441.0,
-            "24": 43081.0,
-            "25": 42452.0,
-            "26": 43947.0,
-            "27": 46247.0,
-            "28": 46419.0,
-            "29": 46169.0,
-            "30": 44035.0,
-            "31": 41152.0,
-            "32": 43347.0,
-            "33": 45435.0,
-            "34": 43300.0,
-            "35": 43284.0,
-            "36": 42483.0,
-            "37": 40070.0,
-            "38": 42561.0,
-            "39": 44706.0,
-            "40": 43260.0,
-            "41": 44642.0,
-            "42": 43192.0,
-            "43": 45439.0,
-            "44": 44588.0,
-            "45": 43274.0,
-            "46": 43921.0,
-            "47": 42364.0,
-            "48": 44740.0,
-            "49": 43152.0,
-            "50": 43348.0,
-            "51": 41112.0,
-            "52": 43837.0,
-            "53": 43913.0,
-            "54": 41704.0,
-            "55": 43870.0,
-            "56": 43209.0,
-            "57": 42636.0,
-            "58": 43841.0,
-            "59": 44630.0,
-            "60": 41219.0,
-            "61": 39702.0,
-            "62": 44739.0,
-            "63": 44651.0,
-            "64": 45372.0,
-            "65": 44682.0,
-            "66": 45351.0,
-            "67": 43174.0,
-            "68": 42502.0,
-            "69": 43834.0,
-            "70": 45514.0,
-            "71": 43291.0,
-            "72": 44767.0,
-            "73": 45384.0,
-            "74": 42457.0,
-            "75": 44673.0,
-            "76": 43876.0,
-            "77": 42026.0,
-            "78": 40350.0,
-            "79": 38918.0,
-            "80": 41092.0,
-            "81": 45364.0,
-            "82": 43198.0,
-            "83": 38467.0,
-            "84": 42477.0,
-            "85": 43981.0,
-            "86": 45667.0,
-            "87": 40863.0,
-            "88": 41772.0,
-            "89": 41104.0,
-            "90": 44669.0,
-            "91": 46134.0,
-            "92": 41634.0,
-            "93": 43241.0,
-            "94": 39538.0,
-            "95": 43915.0,
-            "96": 44683.0,
-            "97": 45405.0,
-            "98": 41791.0,
-            "99": 45414.0,
-            "100": 42458.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1132053504.0,
-            "2": 1132053504.0,
-            "3": 1132053504.0,
-            "4": 1132053504.0,
-            "5": 1132053504.0,
-            "6": 1132053504.0,
-            "7": 1132053504.0,
-            "8": 1132053504.0,
-            "9": 1132053504.0,
-            "10": 1132053504.0,
-            "11": 1132053504.0,
-            "12": 1132053504.0,
-            "13": 1132053504.0,
-            "14": 1132053504.0,
-            "15": 1132053504.0,
-            "16": 1132053504.0,
-            "17": 1132053504.0,
-            "18": 1132053504.0,
-            "19": 1132053504.0,
-            "20": 1132053504.0,
-            "21": 1132053504.0,
-            "22": 1132053504.0,
-            "23": 1132053504.0,
-            "24": 1132053504.0,
-            "25": 1132053504.0,
-            "26": 1132053504.0,
-            "27": 1132053504.0,
-            "28": 1132053504.0,
-            "29": 1132053504.0,
-            "30": 1132053504.0,
-            "31": 1132053504.0,
-            "32": 1132053504.0,
-            "33": 1132053504.0,
-            "34": 1132053504.0,
-            "35": 1132053504.0,
-            "36": 1132053504.0,
-            "37": 1132053504.0,
-            "38": 1132053504.0,
-            "39": 1132053504.0,
-            "40": 1132053504.0,
-            "41": 1132053504.0,
-            "42": 1132053504.0,
-            "43": 1132053504.0,
-            "44": 1132053504.0,
-            "45": 1132053504.0,
-            "46": 1132053504.0,
-            "47": 1132053504.0,
-            "48": 1132053504.0,
-            "49": 1132053504.0,
-            "50": 1132053504.0,
-            "51": 1132053504.0,
-            "52": 1132053504.0,
-            "53": 1132053504.0,
-            "54": 1132053504.0,
-            "55": 1132053504.0,
-            "56": 1132053504.0,
-            "57": 1132053504.0,
-            "58": 1132053504.0,
-            "59": 1132053504.0,
-            "60": 1132053504.0,
-            "61": 1132053504.0,
-            "62": 1132053504.0,
-            "63": 1132053504.0,
-            "64": 1132053504.0,
-            "65": 1132053504.0,
-            "66": 1132053504.0,
-            "67": 1132053504.0,
-            "68": 1132053504.0,
-            "69": 1132053504.0,
-            "70": 1132053504.0,
-            "71": 1132053504.0,
-            "72": 1132053504.0,
-            "73": 1132053504.0,
-            "74": 1132053504.0,
-            "75": 1132053504.0,
-            "76": 1132053504.0,
-            "77": 1132053504.0,
-            "78": 1132053504.0,
-            "79": 1132053504.0,
-            "80": 1132053504.0,
-            "81": 1132053504.0,
-            "82": 1132053504.0,
-            "83": 1132053504.0,
-            "84": 1132053504.0,
-            "85": 1132053504.0,
-            "86": 1132053504.0,
-            "87": 1132053504.0,
-            "88": 1132053504.0,
-            "89": 1132053504.0,
-            "90": 1132053504.0,
-            "91": 1132053504.0,
-            "92": 1132053504.0,
-            "93": 1132053504.0,
-            "94": 1132053504.0,
-            "95": 1132053504.0,
-            "96": 1132053504.0,
-            "97": 1132053504.0,
-            "98": 1132053504.0,
-            "99": 1132053504.0,
-            "100": 1132053504.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1409266176.0,
-            "2": 1864166912.0,
-            "3": 1864166912.0,
-            "4": 1864166912.0,
-            "5": 1864166912.0,
-            "6": 1864166912.0,
-            "7": 1864166912.0,
-            "8": 1864166912.0,
-            "9": 1864166912.0,
-            "10": 1864166912.0,
-            "11": 1864166912.0,
-            "12": 1864166912.0,
-            "13": 1864166912.0,
-            "14": 1864166912.0,
-            "15": 1864166912.0,
-            "16": 1864166912.0,
-            "17": 1864166912.0,
-            "18": 1864166912.0,
-            "19": 1864166912.0,
-            "20": 1864166912.0,
-            "21": 1864166912.0,
-            "22": 1864166912.0,
-            "23": 1864166912.0,
-            "24": 1864166912.0,
-            "25": 1864166912.0,
-            "26": 1864166912.0,
-            "27": 1864166912.0,
-            "28": 1864166912.0,
-            "29": 1864166912.0,
-            "30": 1864166912.0,
-            "31": 1864166912.0,
-            "32": 1864166912.0,
-            "33": 1864166912.0,
-            "34": 1864166912.0,
-            "35": 1864166912.0,
-            "36": 1864166912.0,
-            "37": 1864166912.0,
-            "38": 1864166912.0,
-            "39": 1864166912.0,
-            "40": 1864166912.0,
-            "41": 1864166912.0,
-            "42": 1864166912.0,
-            "43": 1864166912.0,
-            "44": 1864166912.0,
-            "45": 1864166912.0,
-            "46": 1864166912.0,
-            "47": 1864166912.0,
-            "48": 1864166912.0,
-            "49": 1864166912.0,
-            "50": 1864166912.0,
-            "51": 1864166912.0,
-            "52": 1864166912.0,
-            "53": 1864166912.0,
-            "54": 1864166912.0,
-            "55": 1864166912.0,
-            "56": 1864166912.0,
-            "57": 1864166912.0,
-            "58": 1864166912.0,
-            "59": 1864166912.0,
-            "60": 1864166912.0,
-            "61": 1864166912.0,
-            "62": 1864166912.0,
-            "63": 1864166912.0,
-            "64": 1864166912.0,
-            "65": 1864166912.0,
-            "66": 1864166912.0,
-            "67": 1864166912.0,
-            "68": 1864166912.0,
-            "69": 1864166912.0,
-            "70": 1864166912.0,
-            "71": 1864166912.0,
-            "72": 1864166912.0,
-            "73": 1864166912.0,
-            "74": 1864166912.0,
-            "75": 1864166912.0,
-            "76": 1864166912.0,
-            "77": 1864166912.0,
-            "78": 1864166912.0,
-            "79": 1864166912.0,
-            "80": 1864166912.0,
-            "81": 1864166912.0,
-            "82": 1864166912.0,
-            "83": 1864166912.0,
-            "84": 1864166912.0,
-            "85": 1864166912.0,
-            "86": 1864166912.0,
-            "87": 1864166912.0,
-            "88": 1864166912.0,
-            "89": 1864166912.0,
-            "90": 1864166912.0,
-            "91": 1864166912.0,
-            "92": 1864166912.0,
-            "93": 1864166912.0,
-            "94": 1864166912.0,
-            "95": 1864166912.0,
-            "96": 1864166912.0,
-            "97": 1864166912.0,
-            "98": 1864166912.0,
-            "99": 1864166912.0,
-            "100": 1864166912.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.29236,
-            "2": 0.67893,
-            "3": 0.58934,
-            "4": 0.59882,
-            "5": 0.5783,
-            "6": 0.57112,
-            "7": 0.5684,
-            "8": 0.55955,
-            "9": 0.5654,
-            "10": 0.56541,
-            "11": 0.57111,
-            "12": 0.57899,
-            "13": 0.56135,
-            "14": 0.56951,
-            "15": 0.56653,
-            "16": 0.56906,
-            "17": 0.5749,
-            "18": 0.56365,
-            "19": 0.56829,
-            "20": 0.93294,
-            "21": 0.56791,
-            "22": 0.56512,
-            "23": 0.57032,
-            "24": 0.56889,
-            "25": 0.56027,
-            "26": 0.87556,
-            "27": 0.56766,
-            "28": 0.88828,
-            "29": 0.56306,
-            "30": 0.56316,
-            "31": 0.88671,
-            "32": 1.03162,
-            "33": 0.90854,
-            "34": 0.88126,
-            "35": 0.56957,
-            "36": 0.56621,
-            "37": 0.56647,
-            "38": 0.56957,
-            "39": 0.56463,
-            "40": 0.5668,
-            "41": 0.56277,
-            "42": 0.58937,
-            "43": 0.56553,
-            "44": 0.5682,
-            "45": 0.56815,
-            "46": 0.56571,
-            "47": 0.57199,
-            "48": 0.57128,
-            "49": 0.59172,
-            "50": 0.56455,
-            "51": 0.56546,
-            "52": 0.56259,
-            "53": 0.56063,
-            "54": 0.56207,
-            "55": 0.55985,
-            "56": 0.57542,
-            "57": 0.56257,
-            "58": 0.55932,
-            "59": 0.56051,
-            "60": 0.56182,
-            "61": 0.58999,
-            "62": 0.55986,
-            "63": 0.56154,
-            "64": 0.56167,
-            "65": 0.56072,
-            "66": 0.57597,
-            "67": 0.56011,
-            "68": 0.55956,
-            "69": 0.56507,
-            "70": 0.58296,
-            "71": 0.56017,
-            "72": 0.56437,
-            "73": 0.56838,
-            "74": 0.56548,
-            "75": 0.57028,
-            "76": 0.56574,
-            "77": 0.56397,
-            "78": 0.56279,
-            "79": 0.56782,
-            "80": 0.56585,
-            "81": 0.56243,
-            "82": 0.5641,
-            "83": 0.56477,
-            "84": 0.5852,
-            "85": 0.56257,
-            "86": 0.84754,
-            "87": 0.56761,
-            "88": 0.56425,
-            "89": 0.57197,
-            "90": 0.85557,
-            "91": 0.56904,
-            "92": 0.57069,
-            "93": 0.56223,
-            "94": 0.56609,
-            "95": 0.565,
-            "96": 0.56747,
-            "97": 0.56431,
-            "98": 0.58797,
-            "99": 0.89814,
-            "100": 0.5783
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index 2400879202c..00000000000
--- a/tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.34904,
-            "2": 10.34488,
-            "3": 9.79407,
-            "4": 9.59568,
-            "5": 9.42065,
-            "6": 9.41856,
-            "7": 9.28073,
-            "8": 9.18973,
-            "9": 9.06584,
-            "10": 9.00206,
-            "11": 8.81497,
-            "12": 8.78107,
-            "13": 8.82506,
-            "14": 8.6728,
-            "15": 8.6368,
-            "16": 8.51926,
-            "17": 8.45732,
-            "18": 8.37037,
-            "19": 8.36068,
-            "20": 8.25456,
-            "21": 8.24268,
-            "22": 8.13404,
-            "23": 8.06818,
-            "24": 8.11464,
-            "25": 7.95146,
-            "26": 8.08186,
-            "27": 7.86814,
-            "28": 7.94027,
-            "29": 7.77604,
-            "30": 7.84595,
-            "31": 7.81568,
-            "32": 7.65964,
-            "33": 7.77905,
-            "34": 7.53277,
-            "35": 7.6586,
-            "36": 7.51541,
-            "37": 7.44748,
-            "38": 7.4824,
-            "39": 7.46523,
-            "40": 7.49146,
-            "41": 7.40822,
-            "42": 7.35649,
-            "43": 7.43806,
-            "44": 7.35517,
-            "45": 7.35103,
-            "46": 7.27859,
-            "47": 7.44152,
-            "48": 7.2683,
-            "49": 7.32389,
-            "50": 7.14549,
-            "51": 7.36541,
-            "52": 7.12192,
-            "53": 7.09189,
-            "54": 7.22759,
-            "55": 7.13584,
-            "56": 7.20822,
-            "57": 7.31316,
-            "58": 6.99088,
-            "59": 7.09934,
-            "60": 7.12683,
-            "61": 7.1014,
-            "62": 7.23954,
-            "63": 7.14417,
-            "64": 7.06836,
-            "65": 6.98412,
-            "66": 7.03768,
-            "67": 7.02847,
-            "68": 7.1299,
-            "69": 7.01456,
-            "70": 7.04997,
-            "71": 6.89408,
-            "72": 6.98553,
-            "73": 6.96694,
-            "74": 6.90297,
-            "75": 7.0574,
-            "76": 6.9581,
-            "77": 7.06903,
-            "78": 7.02133,
-            "79": 6.8504,
-            "80": 6.91935,
-            "81": 6.95874,
-            "82": 7.04745,
-            "83": 6.98522,
-            "84": 6.99712,
-            "85": 6.83565,
-            "86": 7.04156,
-            "87": 6.96476,
-            "88": 6.89883,
-            "89": 6.80051,
-            "90": 7.22593,
-            "91": 6.70562,
-            "92": 7.0381,
-            "93": 6.88685,
-            "94": 7.03908,
-            "95": 6.84815,
-            "96": 6.95281,
-            "97": 6.94344,
-            "98": 6.86987,
-            "99": 6.99502,
-            "100": 6.96683
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 43274.0,
-            "2": 44071.0,
-            "3": 44760.0,
-            "4": 42385.0,
-            "5": 45378.0,
-            "6": 40938.0,
-            "7": 43150.0,
-            "8": 45450.0,
-            "9": 42428.0,
-            "10": 45373.0,
-            "11": 43974.0,
-            "12": 44591.0,
-            "13": 43897.0,
-            "14": 46204.0,
-            "15": 43924.0,
-            "16": 41613.0,
-            "17": 43852.0,
-            "18": 44669.0,
-            "19": 42579.0,
-            "20": 44769.0,
-            "21": 44761.0,
-            "22": 41873.0,
-            "23": 45441.0,
-            "24": 43081.0,
-            "25": 42452.0,
-            "26": 43947.0,
-            "27": 46247.0,
-            "28": 46419.0,
-            "29": 46169.0,
-            "30": 44035.0,
-            "31": 41152.0,
-            "32": 43347.0,
-            "33": 45435.0,
-            "34": 43300.0,
-            "35": 43284.0,
-            "36": 42483.0,
-            "37": 40070.0,
-            "38": 42561.0,
-            "39": 44706.0,
-            "40": 43260.0,
-            "41": 44642.0,
-            "42": 43192.0,
-            "43": 45439.0,
-            "44": 44588.0,
-            "45": 43274.0,
-            "46": 43921.0,
-            "47": 42364.0,
-            "48": 44740.0,
-            "49": 43152.0,
-            "50": 43348.0,
-            "51": 41112.0,
-            "52": 43837.0,
-            "53": 43913.0,
-            "54": 41704.0,
-            "55": 43870.0,
-            "56": 43209.0,
-            "57": 42636.0,
-            "58": 43841.0,
-            "59": 44630.0,
-            "60": 41219.0,
-            "61": 39702.0,
-            "62": 44739.0,
-            "63": 44651.0,
-            "64": 45372.0,
-            "65": 44682.0,
-            "66": 45351.0,
-            "67": 43174.0,
-            "68": 42502.0,
-            "69": 43834.0,
-            "70": 45514.0,
-            "71": 43291.0,
-            "72": 44767.0,
-            "73": 45384.0,
-            "74": 42457.0,
-            "75": 44673.0,
-            "76": 43876.0,
-            "77": 42026.0,
-            "78": 40350.0,
-            "79": 38918.0,
-            "80": 41092.0,
-            "81": 45364.0,
-            "82": 43198.0,
-            "83": 38467.0,
-            "84": 42477.0,
-            "85": 43981.0,
-            "86": 45667.0,
-            "87": 40863.0,
-            "88": 41772.0,
-            "89": 41104.0,
-            "90": 44669.0,
-            "91": 46134.0,
-            "92": 41634.0,
-            "93": 43241.0,
-            "94": 39538.0,
-            "95": 43915.0,
-            "96": 44683.0,
-            "97": 45405.0,
-            "98": 41791.0,
-            "99": 45414.0,
-            "100": 42458.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1132053504.0,
-            "2": 1132053504.0,
-            "3": 1132053504.0,
-            "4": 1132053504.0,
-            "5": 1132053504.0,
-            "6": 1132053504.0,
-            "7": 1132053504.0,
-            "8": 1132053504.0,
-            "9": 1132053504.0,
-            "10": 1132053504.0,
-            "11": 1132053504.0,
-            "12": 1132053504.0,
-            "13": 1132053504.0,
-            "14": 1132053504.0,
-            "15": 1132053504.0,
-            "16": 1132053504.0,
-            "17": 1132053504.0,
-            "18": 1132053504.0,
-            "19": 1132053504.0,
-            "20": 1132053504.0,
-            "21": 1132053504.0,
-            "22": 1132053504.0,
-            "23": 1132053504.0,
-            "24": 1132053504.0,
-            "25": 1132053504.0,
-            "26": 1132053504.0,
-            "27": 1132053504.0,
-            "28": 1132053504.0,
-            "29": 1132053504.0,
-            "30": 1132053504.0,
-            "31": 1132053504.0,
-            "32": 1132053504.0,
-            "33": 1132053504.0,
-            "34": 1132053504.0,
-            "35": 1132053504.0,
-            "36": 1132053504.0,
-            "37": 1132053504.0,
-            "38": 1132053504.0,
-            "39": 1132053504.0,
-            "40": 1132053504.0,
-            "41": 1132053504.0,
-            "42": 1132053504.0,
-            "43": 1132053504.0,
-            "44": 1132053504.0,
-            "45": 1132053504.0,
-            "46": 1132053504.0,
-            "47": 1132053504.0,
-            "48": 1132053504.0,
-            "49": 1132053504.0,
-            "50": 1132053504.0,
-            "51": 1132053504.0,
-            "52": 1132053504.0,
-            "53": 1132053504.0,
-            "54": 1132053504.0,
-            "55": 1132053504.0,
-            "56": 1132053504.0,
-            "57": 1132053504.0,
-            "58": 1132053504.0,
-            "59": 1132053504.0,
-            "60": 1132053504.0,
-            "61": 1132053504.0,
-            "62": 1132053504.0,
-            "63": 1132053504.0,
-            "64": 1132053504.0,
-            "65": 1132053504.0,
-            "66": 1132053504.0,
-            "67": 1132053504.0,
-            "68": 1132053504.0,
-            "69": 1132053504.0,
-            "70": 1132053504.0,
-            "71": 1132053504.0,
-            "72": 1132053504.0,
-            "73": 1132053504.0,
-            "74": 1132053504.0,
-            "75": 1132053504.0,
-            "76": 1132053504.0,
-            "77": 1132053504.0,
-            "78": 1132053504.0,
-            "79": 1132053504.0,
-            "80": 1132053504.0,
-            "81": 1132053504.0,
-            "82": 1132053504.0,
-            "83": 1132053504.0,
-            "84": 1132053504.0,
-            "85": 1132053504.0,
-            "86": 1132053504.0,
-            "87": 1132053504.0,
-            "88": 1132053504.0,
-            "89": 1132053504.0,
-            "90": 1132053504.0,
-            "91": 1132053504.0,
-            "92": 1132053504.0,
-            "93": 1132053504.0,
-            "94": 1132053504.0,
-            "95": 1132053504.0,
-            "96": 1132053504.0,
-            "97": 1132053504.0,
-            "98": 1132053504.0,
-            "99": 1132053504.0,
-            "100": 1132053504.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1409266176.0,
-            "2": 1864166912.0,
-            "3": 1864166912.0,
-            "4": 1864166912.0,
-            "5": 1864166912.0,
-            "6": 1864166912.0,
-            "7": 1864166912.0,
-            "8": 1864166912.0,
-            "9": 1864166912.0,
-            "10": 1864166912.0,
-            "11": 1864166912.0,
-            "12": 1864166912.0,
-            "13": 1864166912.0,
-            "14": 1864166912.0,
-            "15": 1864166912.0,
-            "16": 1864166912.0,
-            "17": 1864166912.0,
-            "18": 1864166912.0,
-            "19": 1864166912.0,
-            "20": 1864166912.0,
-            "21": 1864166912.0,
-            "22": 1864166912.0,
-            "23": 1864166912.0,
-            "24": 1864166912.0,
-            "25": 1864166912.0,
-            "26": 1864166912.0,
-            "27": 1864166912.0,
-            "28": 1864166912.0,
-            "29": 1864166912.0,
-            "30": 1864166912.0,
-            "31": 1864166912.0,
-            "32": 1864166912.0,
-            "33": 1864166912.0,
-            "34": 1864166912.0,
-            "35": 1864166912.0,
-            "36": 1864166912.0,
-            "37": 1864166912.0,
-            "38": 1864166912.0,
-            "39": 1864166912.0,
-            "40": 1864166912.0,
-            "41": 1864166912.0,
-            "42": 1864166912.0,
-            "43": 1864166912.0,
-            "44": 1864166912.0,
-            "45": 1864166912.0,
-            "46": 1864166912.0,
-            "47": 1864166912.0,
-            "48": 1864166912.0,
-            "49": 1864166912.0,
-            "50": 1864166912.0,
-            "51": 1864166912.0,
-            "52": 1864166912.0,
-            "53": 1864166912.0,
-            "54": 1864166912.0,
-            "55": 1864166912.0,
-            "56": 1864166912.0,
-            "57": 1864166912.0,
-            "58": 1864166912.0,
-            "59": 1864166912.0,
-            "60": 1864166912.0,
-            "61": 1864166912.0,
-            "62": 1864166912.0,
-            "63": 1864166912.0,
-            "64": 1864166912.0,
-            "65": 1864166912.0,
-            "66": 1864166912.0,
-            "67": 1864166912.0,
-            "68": 1864166912.0,
-            "69": 1864166912.0,
-            "70": 1864166912.0,
-            "71": 1864166912.0,
-            "72": 1864166912.0,
-            "73": 1864166912.0,
-            "74": 1864166912.0,
-            "75": 1864166912.0,
-            "76": 1864166912.0,
-            "77": 1864166912.0,
-            "78": 1864166912.0,
-            "79": 1864166912.0,
-            "80": 1864166912.0,
-            "81": 1864166912.0,
-            "82": 1864166912.0,
-            "83": 1864166912.0,
-            "84": 1864166912.0,
-            "85": 1864166912.0,
-            "86": 1864166912.0,
-            "87": 1864166912.0,
-            "88": 1864166912.0,
-            "89": 1864166912.0,
-            "90": 1864166912.0,
-            "91": 1864166912.0,
-            "92": 1864166912.0,
-            "93": 1864166912.0,
-            "94": 1864166912.0,
-            "95": 1864166912.0,
-            "96": 1864166912.0,
-            "97": 1864166912.0,
-            "98": 1864166912.0,
-            "99": 1864166912.0,
-            "100": 1864166912.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 9.54009,
-            "2": 0.66845,
-            "3": 0.64084,
-            "4": 0.64526,
-            "5": 0.64331,
-            "6": 0.65463,
-            "7": 0.63991,
-            "8": 0.63854,
-            "9": 0.64034,
-            "10": 0.63886,
-            "11": 0.63968,
-            "12": 0.64441,
-            "13": 0.63828,
-            "14": 0.64647,
-            "15": 0.64199,
-            "16": 0.63783,
-            "17": 0.64359,
-            "18": 0.66439,
-            "19": 0.64718,
-            "20": 0.63999,
-            "21": 0.65677,
-            "22": 0.95191,
-            "23": 0.64765,
-            "24": 0.98317,
-            "25": 1.63221,
-            "26": 0.64915,
-            "27": 0.64318,
-            "28": 0.99238,
-            "29": 0.64655,
-            "30": 0.64693,
-            "31": 0.64241,
-            "32": 0.98967,
-            "33": 0.64928,
-            "34": 0.64294,
-            "35": 0.65629,
-            "36": 0.64358,
-            "37": 0.64814,
-            "38": 0.64325,
-            "39": 0.64509,
-            "40": 0.64733,
-            "41": 0.64693,
-            "42": 0.65392,
-            "43": 0.64721,
-            "44": 0.64487,
-            "45": 0.64766,
-            "46": 0.65872,
-            "47": 0.65402,
-            "48": 0.65486,
-            "49": 0.64433,
-            "50": 0.64917,
-            "51": 0.64197,
-            "52": 0.64647,
-            "53": 0.64656,
-            "54": 0.64815,
-            "55": 0.64573,
-            "56": 0.6539,
-            "57": 0.64582,
-            "58": 0.64668,
-            "59": 0.64431,
-            "60": 0.64957,
-            "61": 0.64703,
-            "62": 0.64671,
-            "63": 0.65979,
-            "64": 0.64599,
-            "65": 0.6466,
-            "66": 0.64754,
-            "67": 0.6471,
-            "68": 0.64756,
-            "69": 0.64621,
-            "70": 0.65906,
-            "71": 0.64587,
-            "72": 0.65969,
-            "73": 0.64476,
-            "74": 0.65304,
-            "75": 0.64786,
-            "76": 0.65077,
-            "77": 0.66405,
-            "78": 0.6472,
-            "79": 0.64431,
-            "80": 0.64472,
-            "81": 0.64407,
-            "82": 0.64326,
-            "83": 0.93161,
-            "84": 0.65573,
-            "85": 0.63999,
-            "86": 0.64393,
-            "87": 0.92064,
-            "88": 0.64399,
-            "89": 0.64306,
-            "90": 0.64439,
-            "91": 0.6414,
-            "92": 0.64504,
-            "93": 0.64858,
-            "94": 0.64041,
-            "95": 0.64497,
-            "96": 0.64493,
-            "97": 0.64508,
-            "98": 0.6444,
-            "99": 0.64587,
-            "100": 0.64886
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_h100.json
index 899d650d38b..11ef3fbd8c5 100644
--- a/tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_h100.json
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 9.67922,
-            "2": 0.68152,
-            "3": 0.65295,
-            "4": 0.64618,
-            "5": 0.65142,
-            "6": 0.64889,
-            "7": 0.65383,
-            "8": 0.6456,
-            "9": 0.66119,
-            "10": 0.65998,
-            "11": 0.6579,
-            "12": 0.65779,
-            "13": 0.6603,
-            "14": 0.65806,
-            "15": 1.0135,
-            "16": 0.65488,
-            "17": 0.931,
-            "18": 1.08662,
-            "19": 0.66372,
-            "20": 0.66034,
-            "21": 0.65544,
-            "22": 0.66308,
-            "23": 0.66077,
-            "24": 1.04108,
-            "25": 0.6666,
-            "26": 0.97428,
-            "27": 0.65856,
-            "28": 0.66326,
-            "29": 0.65747,
-            "30": 0.6582,
-            "31": 1.10061,
-            "32": 1.04733,
-            "33": 0.65682,
-            "34": 0.65788,
-            "35": 0.66349,
-            "36": 0.65804,
-            "37": 0.66396,
-            "38": 0.65876,
-            "39": 0.65606,
-            "40": 0.6586,
-            "41": 0.65742,
-            "42": 0.66367,
-            "43": 0.66411,
-            "44": 0.65879,
-            "45": 0.66227,
-            "46": 0.66361,
-            "47": 0.66004,
-            "48": 0.6614,
-            "49": 0.65707,
-            "50": 0.65748,
-            "51": 0.66048,
-            "52": 0.65517,
-            "53": 0.65236,
-            "54": 0.6505,
-            "55": 0.65061,
-            "56": 0.65419,
-            "57": 0.64612,
-            "58": 0.6508,
-            "59": 0.64828,
-            "60": 0.64805,
-            "61": 0.99903,
-            "62": 0.6529,
-            "63": 0.65264,
-            "64": 0.64941,
-            "65": 0.65259,
-            "66": 0.64896,
-            "67": 0.64907,
-            "68": 0.65692,
-            "69": 0.64922,
-            "70": 0.65143,
-            "71": 0.64786,
-            "72": 0.6595,
-            "73": 0.65025,
-            "74": 0.64993,
-            "75": 0.64539,
-            "76": 0.65147,
-            "77": 0.65111,
-            "78": 0.64894,
-            "79": 0.65192,
-            "80": 0.94887,
-            "81": 0.64772,
-            "82": 0.64406,
-            "83": 0.64869,
-            "84": 0.95425,
-            "85": 0.64926,
-            "86": 0.64526,
-            "87": 0.64401,
-            "88": 0.95609,
-            "89": 0.64807,
-            "90": 0.64544,
-            "91": 0.9603,
-            "92": 0.64218,
-            "93": 0.64853,
-            "94": 0.64394,
-            "95": 1.01268,
-            "96": 1.05755,
-            "97": 0.65312,
-            "98": 0.65341,
-            "99": 0.65751,
-            "100": 0.64782
+            "1": 9.74091,
+            "2": 0.66943,
+            "3": 0.64954,
+            "4": 0.64695,
+            "5": 0.65419,
+            "6": 0.6513,
+            "7": 0.64556,
+            "8": 0.6385,
+            "9": 0.64307,
+            "10": 0.63679,
+            "11": 0.64386,
+            "12": 0.64012,
+            "13": 0.63889,
+            "14": 0.63958,
+            "15": 0.64024,
+            "16": 0.63721,
+            "17": 0.6492,
+            "18": 0.65247,
+            "19": 0.64523,
+            "20": 1.0041,
+            "21": 0.64739,
+            "22": 1.02158,
+            "23": 0.96313,
+            "24": 0.64631,
+            "25": 0.64337,
+            "26": 0.64702,
+            "27": 0.64516,
+            "28": 0.64748,
+            "29": 0.64657,
+            "30": 0.95958,
+            "31": 1.05772,
+            "32": 0.64319,
+            "33": 0.64455,
+            "34": 0.64044,
+            "35": 0.6445,
+            "36": 0.64649,
+            "37": 0.64593,
+            "38": 0.64912,
+            "39": 0.64665,
+            "40": 0.64585,
+            "41": 0.64603,
+            "42": 0.64765,
+            "43": 0.64548,
+            "44": 0.64732,
+            "45": 0.64996,
+            "46": 0.65909,
+            "47": 0.66335,
+            "48": 0.64625,
+            "49": 0.64641,
+            "50": 0.64822,
+            "51": 0.65982,
+            "52": 0.64882,
+            "53": 0.64892,
+            "54": 0.64636,
+            "55": 0.64591,
+            "56": 0.65232,
+            "57": 0.64591,
+            "58": 0.64572,
+            "59": 0.64949,
+            "60": 0.64277,
+            "61": 0.64766,
+            "62": 0.64726,
+            "63": 0.64637,
+            "64": 0.64901,
+            "65": 0.6476,
+            "66": 0.64458,
+            "67": 0.64951,
+            "68": 0.64438,
+            "69": 0.64854,
+            "70": 0.65268,
+            "71": 0.64762,
+            "72": 1.02587,
+            "73": 0.65274,
+            "74": 0.65942,
+            "75": 0.65091,
+            "76": 0.65181,
+            "77": 0.65582,
+            "78": 0.64434,
+            "79": 0.65116,
+            "80": 0.65073,
+            "81": 0.64645,
+            "82": 0.65405,
+            "83": 0.65107,
+            "84": 0.64883,
+            "85": 0.94272,
+            "86": 0.65641,
+            "87": 0.99204,
+            "88": 0.96199,
+            "89": 0.64856,
+            "90": 0.65165,
+            "91": 0.65163,
+            "92": 0.6506,
+            "93": 0.64828,
+            "94": 0.64682,
+            "95": 1.01586,
+            "96": 1.04151,
+            "97": 0.65481,
+            "98": 0.64703,
+            "99": 0.64964,
+            "100": 0.65343
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1_resume_torch_dist/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1_resume_torch_dist/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 47d23248800..00000000000
--- a/tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1_resume_torch_dist/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.34904,
-            "2": 10.34488,
-            "3": 9.79407,
-            "4": 9.59568,
-            "5": 9.42065,
-            "6": 9.41856,
-            "7": 9.28073,
-            "8": 9.18973,
-            "9": 9.06584,
-            "10": 9.00206,
-            "11": 8.81497,
-            "12": 8.78107,
-            "13": 8.82506,
-            "14": 8.6728,
-            "15": 8.6368,
-            "16": 8.51926,
-            "17": 8.45732,
-            "18": 8.37037,
-            "19": 8.36068,
-            "20": 8.25456,
-            "21": 8.24268,
-            "22": 8.13404,
-            "23": 8.06818,
-            "24": 8.11464,
-            "25": 7.95146,
-            "26": 8.08186,
-            "27": 7.86814,
-            "28": 7.94027,
-            "29": 7.77604,
-            "30": 7.84595,
-            "31": 7.81568,
-            "32": 7.65964,
-            "33": 7.77905,
-            "34": 7.53277,
-            "35": 7.6586,
-            "36": 7.51541,
-            "37": 7.44748,
-            "38": 7.4824,
-            "39": 7.46523,
-            "40": 7.49146,
-            "41": 7.40822,
-            "42": 7.35649,
-            "43": 7.43806,
-            "44": 7.35517,
-            "45": 7.35103,
-            "46": 7.27859,
-            "47": 7.44152,
-            "48": 7.2683,
-            "49": 7.32389,
-            "50": 7.14549,
-            "51": 7.36541,
-            "52": 7.12192,
-            "53": 7.09189,
-            "54": 7.22759,
-            "55": 7.13584,
-            "56": 7.20822,
-            "57": 7.31316,
-            "58": 6.99088,
-            "59": 7.09934,
-            "60": 7.12683,
-            "61": 7.1014,
-            "62": 7.23954,
-            "63": 7.14417,
-            "64": 7.06836,
-            "65": 6.98412,
-            "66": 7.03768,
-            "67": 7.02847,
-            "68": 7.1299,
-            "69": 7.01456,
-            "70": 7.04997,
-            "71": 6.89408,
-            "72": 6.98553,
-            "73": 6.96694,
-            "74": 6.90297,
-            "75": 7.0574,
-            "76": 6.9581,
-            "77": 7.06903,
-            "78": 7.02133,
-            "79": 6.8504,
-            "80": 6.91935,
-            "81": 6.95874,
-            "82": 7.04745,
-            "83": 6.98522,
-            "84": 6.99712,
-            "85": 6.83565,
-            "86": 7.04156,
-            "87": 6.96476,
-            "88": 6.89883,
-            "89": 6.80051,
-            "90": 7.22593,
-            "91": 6.70562,
-            "92": 7.0381,
-            "93": 6.88685,
-            "94": 7.03908,
-            "95": 6.84815,
-            "96": 6.95281,
-            "97": 6.94344,
-            "98": 6.86987,
-            "99": 6.99502,
-            "100": 6.96683
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 43274.0,
-            "2": 44071.0,
-            "3": 44760.0,
-            "4": 42385.0,
-            "5": 45378.0,
-            "6": 40938.0,
-            "7": 43150.0,
-            "8": 45450.0,
-            "9": 42428.0,
-            "10": 45373.0,
-            "11": 43974.0,
-            "12": 44591.0,
-            "13": 43897.0,
-            "14": 46204.0,
-            "15": 43924.0,
-            "16": 41613.0,
-            "17": 43852.0,
-            "18": 44669.0,
-            "19": 42579.0,
-            "20": 44769.0,
-            "21": 44761.0,
-            "22": 41873.0,
-            "23": 45441.0,
-            "24": 43081.0,
-            "25": 42452.0,
-            "26": 43947.0,
-            "27": 46247.0,
-            "28": 46419.0,
-            "29": 46169.0,
-            "30": 44035.0,
-            "31": 41152.0,
-            "32": 43347.0,
-            "33": 45435.0,
-            "34": 43300.0,
-            "35": 43284.0,
-            "36": 42483.0,
-            "37": 40070.0,
-            "38": 42561.0,
-            "39": 44706.0,
-            "40": 43260.0,
-            "41": 44642.0,
-            "42": 43192.0,
-            "43": 45439.0,
-            "44": 44588.0,
-            "45": 43274.0,
-            "46": 43921.0,
-            "47": 42364.0,
-            "48": 44740.0,
-            "49": 43152.0,
-            "50": 43348.0,
-            "51": 41112.0,
-            "52": 43837.0,
-            "53": 43913.0,
-            "54": 41704.0,
-            "55": 43870.0,
-            "56": 43209.0,
-            "57": 42636.0,
-            "58": 43841.0,
-            "59": 44630.0,
-            "60": 41219.0,
-            "61": 39702.0,
-            "62": 44739.0,
-            "63": 44651.0,
-            "64": 45372.0,
-            "65": 44682.0,
-            "66": 45351.0,
-            "67": 43174.0,
-            "68": 42502.0,
-            "69": 43834.0,
-            "70": 45514.0,
-            "71": 43291.0,
-            "72": 44767.0,
-            "73": 45384.0,
-            "74": 42457.0,
-            "75": 44673.0,
-            "76": 43876.0,
-            "77": 42026.0,
-            "78": 40350.0,
-            "79": 38918.0,
-            "80": 41092.0,
-            "81": 45364.0,
-            "82": 43198.0,
-            "83": 38467.0,
-            "84": 42477.0,
-            "85": 43981.0,
-            "86": 45667.0,
-            "87": 40863.0,
-            "88": 41772.0,
-            "89": 41104.0,
-            "90": 44669.0,
-            "91": 46134.0,
-            "92": 41634.0,
-            "93": 43241.0,
-            "94": 39538.0,
-            "95": 43915.0,
-            "96": 44683.0,
-            "97": 45405.0,
-            "98": 41791.0,
-            "99": 45414.0,
-            "100": 42458.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1132053504.0,
-            "2": 1132053504.0,
-            "3": 1132053504.0,
-            "4": 1132053504.0,
-            "5": 1132053504.0,
-            "6": 1132053504.0,
-            "7": 1132053504.0,
-            "8": 1132053504.0,
-            "9": 1132053504.0,
-            "10": 1132053504.0,
-            "11": 1132053504.0,
-            "12": 1132053504.0,
-            "13": 1132053504.0,
-            "14": 1132053504.0,
-            "15": 1132053504.0,
-            "16": 1132053504.0,
-            "17": 1132053504.0,
-            "18": 1132053504.0,
-            "19": 1132053504.0,
-            "20": 1132053504.0,
-            "21": 1132053504.0,
-            "22": 1132053504.0,
-            "23": 1132053504.0,
-            "24": 1132053504.0,
-            "25": 1132053504.0,
-            "26": 1132053504.0,
-            "27": 1132053504.0,
-            "28": 1132053504.0,
-            "29": 1132053504.0,
-            "30": 1132053504.0,
-            "31": 1132053504.0,
-            "32": 1132053504.0,
-            "33": 1132053504.0,
-            "34": 1132053504.0,
-            "35": 1132053504.0,
-            "36": 1132053504.0,
-            "37": 1132053504.0,
-            "38": 1132053504.0,
-            "39": 1132053504.0,
-            "40": 1132053504.0,
-            "41": 1132053504.0,
-            "42": 1132053504.0,
-            "43": 1132053504.0,
-            "44": 1132053504.0,
-            "45": 1132053504.0,
-            "46": 1132053504.0,
-            "47": 1132053504.0,
-            "48": 1132053504.0,
-            "49": 1132053504.0,
-            "50": 1132053504.0,
-            "51": 1132053504.0,
-            "52": 1132053504.0,
-            "53": 1132053504.0,
-            "54": 1132053504.0,
-            "55": 1132053504.0,
-            "56": 1132053504.0,
-            "57": 1132053504.0,
-            "58": 1132053504.0,
-            "59": 1132053504.0,
-            "60": 1132053504.0,
-            "61": 1132053504.0,
-            "62": 1132053504.0,
-            "63": 1132053504.0,
-            "64": 1132053504.0,
-            "65": 1132053504.0,
-            "66": 1132053504.0,
-            "67": 1132053504.0,
-            "68": 1132053504.0,
-            "69": 1132053504.0,
-            "70": 1132053504.0,
-            "71": 1132053504.0,
-            "72": 1132053504.0,
-            "73": 1132053504.0,
-            "74": 1132053504.0,
-            "75": 1132053504.0,
-            "76": 1132053504.0,
-            "77": 1132053504.0,
-            "78": 1132053504.0,
-            "79": 1132053504.0,
-            "80": 1132053504.0,
-            "81": 1132053504.0,
-            "82": 1132053504.0,
-            "83": 1132053504.0,
-            "84": 1132053504.0,
-            "85": 1132053504.0,
-            "86": 1132053504.0,
-            "87": 1132053504.0,
-            "88": 1132053504.0,
-            "89": 1132053504.0,
-            "90": 1132053504.0,
-            "91": 1132053504.0,
-            "92": 1132053504.0,
-            "93": 1132053504.0,
-            "94": 1132053504.0,
-            "95": 1132053504.0,
-            "96": 1132053504.0,
-            "97": 1132053504.0,
-            "98": 1132053504.0,
-            "99": 1132053504.0,
-            "100": 1132053504.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1409266176.0,
-            "2": 1864166912.0,
-            "3": 1864166912.0,
-            "4": 1864166912.0,
-            "5": 1864166912.0,
-            "6": 1864166912.0,
-            "7": 1864166912.0,
-            "8": 1864166912.0,
-            "9": 1864166912.0,
-            "10": 1864166912.0,
-            "11": 1864166912.0,
-            "12": 1864166912.0,
-            "13": 1864166912.0,
-            "14": 1864166912.0,
-            "15": 1864166912.0,
-            "16": 1864166912.0,
-            "17": 1864166912.0,
-            "18": 1864166912.0,
-            "19": 1864166912.0,
-            "20": 1864166912.0,
-            "21": 1864166912.0,
-            "22": 1864166912.0,
-            "23": 1864166912.0,
-            "24": 1864166912.0,
-            "25": 1864166912.0,
-            "26": 1864166912.0,
-            "27": 1864166912.0,
-            "28": 1864166912.0,
-            "29": 1864166912.0,
-            "30": 1864166912.0,
-            "31": 1864166912.0,
-            "32": 1864166912.0,
-            "33": 1864166912.0,
-            "34": 1864166912.0,
-            "35": 1864166912.0,
-            "36": 1864166912.0,
-            "37": 1864166912.0,
-            "38": 1864166912.0,
-            "39": 1864166912.0,
-            "40": 1864166912.0,
-            "41": 1864166912.0,
-            "42": 1864166912.0,
-            "43": 1864166912.0,
-            "44": 1864166912.0,
-            "45": 1864166912.0,
-            "46": 1864166912.0,
-            "47": 1864166912.0,
-            "48": 1864166912.0,
-            "49": 1864166912.0,
-            "50": 1864166912.0,
-            "51": 1864166912.0,
-            "52": 1864166912.0,
-            "53": 1864166912.0,
-            "54": 1864166912.0,
-            "55": 1864166912.0,
-            "56": 1864166912.0,
-            "57": 1864166912.0,
-            "58": 1864166912.0,
-            "59": 1864166912.0,
-            "60": 1864166912.0,
-            "61": 1864166912.0,
-            "62": 1864166912.0,
-            "63": 1864166912.0,
-            "64": 1864166912.0,
-            "65": 1864166912.0,
-            "66": 1864166912.0,
-            "67": 1864166912.0,
-            "68": 1864166912.0,
-            "69": 1864166912.0,
-            "70": 1864166912.0,
-            "71": 1864166912.0,
-            "72": 1864166912.0,
-            "73": 1864166912.0,
-            "74": 1864166912.0,
-            "75": 1864166912.0,
-            "76": 1864166912.0,
-            "77": 1864166912.0,
-            "78": 1864166912.0,
-            "79": 1864166912.0,
-            "80": 1864166912.0,
-            "81": 1864166912.0,
-            "82": 1864166912.0,
-            "83": 1864166912.0,
-            "84": 1864166912.0,
-            "85": 1864166912.0,
-            "86": 1864166912.0,
-            "87": 1864166912.0,
-            "88": 1864166912.0,
-            "89": 1864166912.0,
-            "90": 1864166912.0,
-            "91": 1864166912.0,
-            "92": 1864166912.0,
-            "93": 1864166912.0,
-            "94": 1864166912.0,
-            "95": 1864166912.0,
-            "96": 1864166912.0,
-            "97": 1864166912.0,
-            "98": 1864166912.0,
-            "99": 1864166912.0,
-            "100": 1864166912.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.0714,
-            "2": 0.65344,
-            "3": 0.61776,
-            "4": 0.55941,
-            "5": 0.56517,
-            "6": 0.55953,
-            "7": 0.56488,
-            "8": 0.56168,
-            "9": 0.55963,
-            "10": 0.56502,
-            "11": 0.56812,
-            "12": 0.58499,
-            "13": 0.58777,
-            "14": 0.56659,
-            "15": 0.55908,
-            "16": 0.56702,
-            "17": 0.56652,
-            "18": 0.56368,
-            "19": 0.57588,
-            "20": 0.57328,
-            "21": 0.57961,
-            "22": 0.56693,
-            "23": 0.87697,
-            "24": 0.56276,
-            "25": 0.56409,
-            "26": 0.89777,
-            "27": 0.89041,
-            "28": 0.56631,
-            "29": 0.5637,
-            "30": 0.56457,
-            "31": 0.56285,
-            "32": 0.56729,
-            "33": 1.2087,
-            "34": 1.26391,
-            "35": 0.57364,
-            "36": 0.56616,
-            "37": 0.56143,
-            "38": 0.56332,
-            "39": 0.56267,
-            "40": 0.56706,
-            "41": 0.56887,
-            "42": 0.5604,
-            "43": 0.56419,
-            "44": 0.55389,
-            "45": 0.55665,
-            "46": 0.56256,
-            "47": 0.5757,
-            "48": 0.62949,
-            "49": 0.55714,
-            "50": 0.55326,
-            "51": 0.56303,
-            "52": 0.56765,
-            "53": 0.56019,
-            "54": 0.56447,
-            "55": 0.56674,
-            "56": 0.55563,
-            "57": 0.55623,
-            "58": 0.55651,
-            "59": 0.55616,
-            "60": 0.55374,
-            "61": 0.55657,
-            "62": 0.55473,
-            "63": 0.56052,
-            "64": 0.55785,
-            "65": 0.55653,
-            "66": 0.56406,
-            "67": 0.56415,
-            "68": 0.56582,
-            "69": 0.55566,
-            "70": 0.555,
-            "71": 0.55709,
-            "72": 0.56314,
-            "73": 0.55571,
-            "74": 0.55495,
-            "75": 0.56028,
-            "76": 0.88389,
-            "77": 0.56277,
-            "78": 0.56491,
-            "79": 0.57616,
-            "80": 0.58894,
-            "81": 0.56216,
-            "82": 0.56187,
-            "83": 0.56108,
-            "84": 0.56853,
-            "85": 0.55814,
-            "86": 0.56093,
-            "87": 0.56078,
-            "88": 0.913,
-            "89": 0.55681,
-            "90": 0.55754,
-            "91": 0.56679,
-            "92": 0.55927,
-            "93": 0.89203,
-            "94": 0.56272,
-            "95": 0.55822,
-            "96": 0.56068,
-            "97": 0.91075,
-            "98": 0.56624,
-            "99": 0.92145,
-            "100": 0.88359
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1_resume_torch_dist/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1_resume_torch_dist/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index 11ef3fbd8c5..00000000000
--- a/tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1_resume_torch_dist/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.34904,
-            "2": 10.34488,
-            "3": 9.79407,
-            "4": 9.59568,
-            "5": 9.42065,
-            "6": 9.41856,
-            "7": 9.28073,
-            "8": 9.18973,
-            "9": 9.06584,
-            "10": 9.00206,
-            "11": 8.81497,
-            "12": 8.78107,
-            "13": 8.82506,
-            "14": 8.6728,
-            "15": 8.6368,
-            "16": 8.51926,
-            "17": 8.45732,
-            "18": 8.37037,
-            "19": 8.36068,
-            "20": 8.25456,
-            "21": 8.24268,
-            "22": 8.13404,
-            "23": 8.06818,
-            "24": 8.11464,
-            "25": 7.95146,
-            "26": 8.08186,
-            "27": 7.86814,
-            "28": 7.94027,
-            "29": 7.77604,
-            "30": 7.84595,
-            "31": 7.81568,
-            "32": 7.65964,
-            "33": 7.77905,
-            "34": 7.53277,
-            "35": 7.6586,
-            "36": 7.51541,
-            "37": 7.44748,
-            "38": 7.4824,
-            "39": 7.46523,
-            "40": 7.49146,
-            "41": 7.40822,
-            "42": 7.35649,
-            "43": 7.43806,
-            "44": 7.35517,
-            "45": 7.35103,
-            "46": 7.27859,
-            "47": 7.44152,
-            "48": 7.2683,
-            "49": 7.32389,
-            "50": 7.14549,
-            "51": 7.36541,
-            "52": 7.12192,
-            "53": 7.09189,
-            "54": 7.22759,
-            "55": 7.13584,
-            "56": 7.20822,
-            "57": 7.31316,
-            "58": 6.99088,
-            "59": 7.09934,
-            "60": 7.12683,
-            "61": 7.1014,
-            "62": 7.23954,
-            "63": 7.14417,
-            "64": 7.06836,
-            "65": 6.98412,
-            "66": 7.03768,
-            "67": 7.02847,
-            "68": 7.1299,
-            "69": 7.01456,
-            "70": 7.04997,
-            "71": 6.89408,
-            "72": 6.98553,
-            "73": 6.96694,
-            "74": 6.90297,
-            "75": 7.0574,
-            "76": 6.9581,
-            "77": 7.06903,
-            "78": 7.02133,
-            "79": 6.8504,
-            "80": 6.91935,
-            "81": 6.95874,
-            "82": 7.04745,
-            "83": 6.98522,
-            "84": 6.99712,
-            "85": 6.83565,
-            "86": 7.04156,
-            "87": 6.96476,
-            "88": 6.89883,
-            "89": 6.80051,
-            "90": 7.22593,
-            "91": 6.70562,
-            "92": 7.0381,
-            "93": 6.88685,
-            "94": 7.03908,
-            "95": 6.84815,
-            "96": 6.95281,
-            "97": 6.94344,
-            "98": 6.86987,
-            "99": 6.99502,
-            "100": 6.96683
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 43274.0,
-            "2": 44071.0,
-            "3": 44760.0,
-            "4": 42385.0,
-            "5": 45378.0,
-            "6": 40938.0,
-            "7": 43150.0,
-            "8": 45450.0,
-            "9": 42428.0,
-            "10": 45373.0,
-            "11": 43974.0,
-            "12": 44591.0,
-            "13": 43897.0,
-            "14": 46204.0,
-            "15": 43924.0,
-            "16": 41613.0,
-            "17": 43852.0,
-            "18": 44669.0,
-            "19": 42579.0,
-            "20": 44769.0,
-            "21": 44761.0,
-            "22": 41873.0,
-            "23": 45441.0,
-            "24": 43081.0,
-            "25": 42452.0,
-            "26": 43947.0,
-            "27": 46247.0,
-            "28": 46419.0,
-            "29": 46169.0,
-            "30": 44035.0,
-            "31": 41152.0,
-            "32": 43347.0,
-            "33": 45435.0,
-            "34": 43300.0,
-            "35": 43284.0,
-            "36": 42483.0,
-            "37": 40070.0,
-            "38": 42561.0,
-            "39": 44706.0,
-            "40": 43260.0,
-            "41": 44642.0,
-            "42": 43192.0,
-            "43": 45439.0,
-            "44": 44588.0,
-            "45": 43274.0,
-            "46": 43921.0,
-            "47": 42364.0,
-            "48": 44740.0,
-            "49": 43152.0,
-            "50": 43348.0,
-            "51": 41112.0,
-            "52": 43837.0,
-            "53": 43913.0,
-            "54": 41704.0,
-            "55": 43870.0,
-            "56": 43209.0,
-            "57": 42636.0,
-            "58": 43841.0,
-            "59": 44630.0,
-            "60": 41219.0,
-            "61": 39702.0,
-            "62": 44739.0,
-            "63": 44651.0,
-            "64": 45372.0,
-            "65": 44682.0,
-            "66": 45351.0,
-            "67": 43174.0,
-            "68": 42502.0,
-            "69": 43834.0,
-            "70": 45514.0,
-            "71": 43291.0,
-            "72": 44767.0,
-            "73": 45384.0,
-            "74": 42457.0,
-            "75": 44673.0,
-            "76": 43876.0,
-            "77": 42026.0,
-            "78": 40350.0,
-            "79": 38918.0,
-            "80": 41092.0,
-            "81": 45364.0,
-            "82": 43198.0,
-            "83": 38467.0,
-            "84": 42477.0,
-            "85": 43981.0,
-            "86": 45667.0,
-            "87": 40863.0,
-            "88": 41772.0,
-            "89": 41104.0,
-            "90": 44669.0,
-            "91": 46134.0,
-            "92": 41634.0,
-            "93": 43241.0,
-            "94": 39538.0,
-            "95": 43915.0,
-            "96": 44683.0,
-            "97": 45405.0,
-            "98": 41791.0,
-            "99": 45414.0,
-            "100": 42458.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1132053504.0,
-            "2": 1132053504.0,
-            "3": 1132053504.0,
-            "4": 1132053504.0,
-            "5": 1132053504.0,
-            "6": 1132053504.0,
-            "7": 1132053504.0,
-            "8": 1132053504.0,
-            "9": 1132053504.0,
-            "10": 1132053504.0,
-            "11": 1132053504.0,
-            "12": 1132053504.0,
-            "13": 1132053504.0,
-            "14": 1132053504.0,
-            "15": 1132053504.0,
-            "16": 1132053504.0,
-            "17": 1132053504.0,
-            "18": 1132053504.0,
-            "19": 1132053504.0,
-            "20": 1132053504.0,
-            "21": 1132053504.0,
-            "22": 1132053504.0,
-            "23": 1132053504.0,
-            "24": 1132053504.0,
-            "25": 1132053504.0,
-            "26": 1132053504.0,
-            "27": 1132053504.0,
-            "28": 1132053504.0,
-            "29": 1132053504.0,
-            "30": 1132053504.0,
-            "31": 1132053504.0,
-            "32": 1132053504.0,
-            "33": 1132053504.0,
-            "34": 1132053504.0,
-            "35": 1132053504.0,
-            "36": 1132053504.0,
-            "37": 1132053504.0,
-            "38": 1132053504.0,
-            "39": 1132053504.0,
-            "40": 1132053504.0,
-            "41": 1132053504.0,
-            "42": 1132053504.0,
-            "43": 1132053504.0,
-            "44": 1132053504.0,
-            "45": 1132053504.0,
-            "46": 1132053504.0,
-            "47": 1132053504.0,
-            "48": 1132053504.0,
-            "49": 1132053504.0,
-            "50": 1132053504.0,
-            "51": 1132053504.0,
-            "52": 1132053504.0,
-            "53": 1132053504.0,
-            "54": 1132053504.0,
-            "55": 1132053504.0,
-            "56": 1132053504.0,
-            "57": 1132053504.0,
-            "58": 1132053504.0,
-            "59": 1132053504.0,
-            "60": 1132053504.0,
-            "61": 1132053504.0,
-            "62": 1132053504.0,
-            "63": 1132053504.0,
-            "64": 1132053504.0,
-            "65": 1132053504.0,
-            "66": 1132053504.0,
-            "67": 1132053504.0,
-            "68": 1132053504.0,
-            "69": 1132053504.0,
-            "70": 1132053504.0,
-            "71": 1132053504.0,
-            "72": 1132053504.0,
-            "73": 1132053504.0,
-            "74": 1132053504.0,
-            "75": 1132053504.0,
-            "76": 1132053504.0,
-            "77": 1132053504.0,
-            "78": 1132053504.0,
-            "79": 1132053504.0,
-            "80": 1132053504.0,
-            "81": 1132053504.0,
-            "82": 1132053504.0,
-            "83": 1132053504.0,
-            "84": 1132053504.0,
-            "85": 1132053504.0,
-            "86": 1132053504.0,
-            "87": 1132053504.0,
-            "88": 1132053504.0,
-            "89": 1132053504.0,
-            "90": 1132053504.0,
-            "91": 1132053504.0,
-            "92": 1132053504.0,
-            "93": 1132053504.0,
-            "94": 1132053504.0,
-            "95": 1132053504.0,
-            "96": 1132053504.0,
-            "97": 1132053504.0,
-            "98": 1132053504.0,
-            "99": 1132053504.0,
-            "100": 1132053504.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1409266176.0,
-            "2": 1864166912.0,
-            "3": 1864166912.0,
-            "4": 1864166912.0,
-            "5": 1864166912.0,
-            "6": 1864166912.0,
-            "7": 1864166912.0,
-            "8": 1864166912.0,
-            "9": 1864166912.0,
-            "10": 1864166912.0,
-            "11": 1864166912.0,
-            "12": 1864166912.0,
-            "13": 1864166912.0,
-            "14": 1864166912.0,
-            "15": 1864166912.0,
-            "16": 1864166912.0,
-            "17": 1864166912.0,
-            "18": 1864166912.0,
-            "19": 1864166912.0,
-            "20": 1864166912.0,
-            "21": 1864166912.0,
-            "22": 1864166912.0,
-            "23": 1864166912.0,
-            "24": 1864166912.0,
-            "25": 1864166912.0,
-            "26": 1864166912.0,
-            "27": 1864166912.0,
-            "28": 1864166912.0,
-            "29": 1864166912.0,
-            "30": 1864166912.0,
-            "31": 1864166912.0,
-            "32": 1864166912.0,
-            "33": 1864166912.0,
-            "34": 1864166912.0,
-            "35": 1864166912.0,
-            "36": 1864166912.0,
-            "37": 1864166912.0,
-            "38": 1864166912.0,
-            "39": 1864166912.0,
-            "40": 1864166912.0,
-            "41": 1864166912.0,
-            "42": 1864166912.0,
-            "43": 1864166912.0,
-            "44": 1864166912.0,
-            "45": 1864166912.0,
-            "46": 1864166912.0,
-            "47": 1864166912.0,
-            "48": 1864166912.0,
-            "49": 1864166912.0,
-            "50": 1864166912.0,
-            "51": 1864166912.0,
-            "52": 1864166912.0,
-            "53": 1864166912.0,
-            "54": 1864166912.0,
-            "55": 1864166912.0,
-            "56": 1864166912.0,
-            "57": 1864166912.0,
-            "58": 1864166912.0,
-            "59": 1864166912.0,
-            "60": 1864166912.0,
-            "61": 1864166912.0,
-            "62": 1864166912.0,
-            "63": 1864166912.0,
-            "64": 1864166912.0,
-            "65": 1864166912.0,
-            "66": 1864166912.0,
-            "67": 1864166912.0,
-            "68": 1864166912.0,
-            "69": 1864166912.0,
-            "70": 1864166912.0,
-            "71": 1864166912.0,
-            "72": 1864166912.0,
-            "73": 1864166912.0,
-            "74": 1864166912.0,
-            "75": 1864166912.0,
-            "76": 1864166912.0,
-            "77": 1864166912.0,
-            "78": 1864166912.0,
-            "79": 1864166912.0,
-            "80": 1864166912.0,
-            "81": 1864166912.0,
-            "82": 1864166912.0,
-            "83": 1864166912.0,
-            "84": 1864166912.0,
-            "85": 1864166912.0,
-            "86": 1864166912.0,
-            "87": 1864166912.0,
-            "88": 1864166912.0,
-            "89": 1864166912.0,
-            "90": 1864166912.0,
-            "91": 1864166912.0,
-            "92": 1864166912.0,
-            "93": 1864166912.0,
-            "94": 1864166912.0,
-            "95": 1864166912.0,
-            "96": 1864166912.0,
-            "97": 1864166912.0,
-            "98": 1864166912.0,
-            "99": 1864166912.0,
-            "100": 1864166912.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 9.74091,
-            "2": 0.66943,
-            "3": 0.64954,
-            "4": 0.64695,
-            "5": 0.65419,
-            "6": 0.6513,
-            "7": 0.64556,
-            "8": 0.6385,
-            "9": 0.64307,
-            "10": 0.63679,
-            "11": 0.64386,
-            "12": 0.64012,
-            "13": 0.63889,
-            "14": 0.63958,
-            "15": 0.64024,
-            "16": 0.63721,
-            "17": 0.6492,
-            "18": 0.65247,
-            "19": 0.64523,
-            "20": 1.0041,
-            "21": 0.64739,
-            "22": 1.02158,
-            "23": 0.96313,
-            "24": 0.64631,
-            "25": 0.64337,
-            "26": 0.64702,
-            "27": 0.64516,
-            "28": 0.64748,
-            "29": 0.64657,
-            "30": 0.95958,
-            "31": 1.05772,
-            "32": 0.64319,
-            "33": 0.64455,
-            "34": 0.64044,
-            "35": 0.6445,
-            "36": 0.64649,
-            "37": 0.64593,
-            "38": 0.64912,
-            "39": 0.64665,
-            "40": 0.64585,
-            "41": 0.64603,
-            "42": 0.64765,
-            "43": 0.64548,
-            "44": 0.64732,
-            "45": 0.64996,
-            "46": 0.65909,
-            "47": 0.66335,
-            "48": 0.64625,
-            "49": 0.64641,
-            "50": 0.64822,
-            "51": 0.65982,
-            "52": 0.64882,
-            "53": 0.64892,
-            "54": 0.64636,
-            "55": 0.64591,
-            "56": 0.65232,
-            "57": 0.64591,
-            "58": 0.64572,
-            "59": 0.64949,
-            "60": 0.64277,
-            "61": 0.64766,
-            "62": 0.64726,
-            "63": 0.64637,
-            "64": 0.64901,
-            "65": 0.6476,
-            "66": 0.64458,
-            "67": 0.64951,
-            "68": 0.64438,
-            "69": 0.64854,
-            "70": 0.65268,
-            "71": 0.64762,
-            "72": 1.02587,
-            "73": 0.65274,
-            "74": 0.65942,
-            "75": 0.65091,
-            "76": 0.65181,
-            "77": 0.65582,
-            "78": 0.64434,
-            "79": 0.65116,
-            "80": 0.65073,
-            "81": 0.64645,
-            "82": 0.65405,
-            "83": 0.65107,
-            "84": 0.64883,
-            "85": 0.94272,
-            "86": 0.65641,
-            "87": 0.99204,
-            "88": 0.96199,
-            "89": 0.64856,
-            "90": 0.65165,
-            "91": 0.65163,
-            "92": 0.6506,
-            "93": 0.64828,
-            "94": 0.64682,
-            "95": 1.01586,
-            "96": 1.04151,
-            "97": 0.65481,
-            "98": 0.64703,
-            "99": 0.64964,
-            "100": 0.65343
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1/golden_values_dev_dgx_h100.json
index 438130bae1c..e788215b20a 100644
--- a/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1/golden_values_dev_dgx_h100.json
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 7.07395,
-            "2": 0.19501,
-            "3": 0.16284,
-            "4": 0.15592,
-            "5": 0.16485,
-            "6": 0.15452,
-            "7": 0.1627,
-            "8": 0.15835,
-            "9": 0.15975,
-            "10": 0.15881,
-            "11": 0.16294,
-            "12": 0.15929,
-            "13": 0.16216,
-            "14": 0.15673,
-            "15": 0.16042,
-            "16": 0.15452,
-            "17": 0.16802,
-            "18": 0.15623,
-            "19": 0.16501,
-            "20": 0.15961,
-            "21": 0.16269,
-            "22": 0.15556,
-            "23": 0.16412,
-            "24": 0.1564,
-            "25": 0.1614,
-            "26": 0.15776,
-            "27": 0.16056,
-            "28": 0.16086,
-            "29": 0.16026,
-            "30": 0.15782,
-            "31": 0.1619,
-            "32": 0.1567,
-            "33": 0.16353,
-            "34": 0.1553,
-            "35": 0.16202,
-            "36": 0.15695,
-            "37": 0.16347,
-            "38": 0.15703,
-            "39": 0.1638,
-            "40": 0.1549,
-            "41": 0.15808,
-            "42": 0.1603,
-            "43": 0.15931,
-            "44": 0.15772,
-            "45": 0.16421,
-            "46": 0.15573,
-            "47": 0.16133,
-            "48": 0.1567,
-            "49": 0.16354,
-            "50": 0.15698,
-            "51": 0.15998,
-            "52": 0.15347,
-            "53": 0.16223,
-            "54": 0.1565,
-            "55": 0.16429,
-            "56": 0.15654,
-            "57": 0.16548,
-            "58": 0.15761,
-            "59": 0.16437,
-            "60": 0.15677,
-            "61": 0.16238,
-            "62": 0.15845,
-            "63": 0.16393,
-            "64": 0.16321,
-            "65": 0.16208,
-            "66": 0.15975,
-            "67": 0.16831,
-            "68": 0.15965,
-            "69": 0.16375,
-            "70": 0.16321,
-            "71": 0.17306,
-            "72": 0.15973,
-            "73": 0.16591,
-            "74": 0.1637,
-            "75": 0.16984,
-            "76": 0.16123,
-            "77": 0.17281,
-            "78": 0.16826,
-            "79": 0.17136,
-            "80": 0.16673,
-            "81": 0.16135,
-            "82": 0.16815,
-            "83": 0.20097,
-            "84": 0.19663,
-            "85": 0.16475,
-            "86": 0.16782,
-            "87": 0.16163,
-            "88": 0.16356,
-            "89": 0.16018,
-            "90": 0.16416,
-            "91": 0.15961,
-            "92": 0.16129,
-            "93": 0.15562,
-            "94": 0.1646,
-            "95": 0.15685,
-            "96": 0.16321,
-            "97": 0.15621,
-            "98": 0.16585,
-            "99": 0.15667,
-            "100": 0.17074
+            "1": 7.09171,
+            "2": 0.19937,
+            "3": 0.15739,
+            "4": 0.15626,
+            "5": 0.15726,
+            "6": 0.16596,
+            "7": 0.15866,
+            "8": 0.16018,
+            "9": 0.16342,
+            "10": 0.15848,
+            "11": 0.1563,
+            "12": 0.15949,
+            "13": 0.16471,
+            "14": 0.1653,
+            "15": 0.15904,
+            "16": 0.15673,
+            "17": 0.15845,
+            "18": 0.15591,
+            "19": 0.15809,
+            "20": 0.1593,
+            "21": 0.15934,
+            "22": 0.1588,
+            "23": 0.15615,
+            "24": 0.15816,
+            "25": 0.15513,
+            "26": 0.16623,
+            "27": 0.1635,
+            "28": 0.15796,
+            "29": 0.15745,
+            "30": 0.15659,
+            "31": 0.15757,
+            "32": 0.15805,
+            "33": 0.16121,
+            "34": 0.15918,
+            "35": 0.15628,
+            "36": 0.16015,
+            "37": 0.15954,
+            "38": 0.15711,
+            "39": 0.16207,
+            "40": 0.16543,
+            "41": 0.16329,
+            "42": 0.15895,
+            "43": 0.15771,
+            "44": 0.16372,
+            "45": 0.15827,
+            "46": 0.16205,
+            "47": 0.16175,
+            "48": 0.15754,
+            "49": 0.15916,
+            "50": 0.15618,
+            "51": 0.15693,
+            "52": 0.16151,
+            "53": 0.16143,
+            "54": 0.16281,
+            "55": 0.15891,
+            "56": 0.16235,
+            "57": 0.16248,
+            "58": 0.16949,
+            "59": 0.16264,
+            "60": 0.15666,
+            "61": 0.19456,
+            "62": 0.19414,
+            "63": 0.16346,
+            "64": 0.16675,
+            "65": 0.16803,
+            "66": 0.1748,
+            "67": 0.16431,
+            "68": 0.1587,
+            "69": 0.16219,
+            "70": 0.16457,
+            "71": 0.1716,
+            "72": 0.16546,
+            "73": 0.16711,
+            "74": 0.16142,
+            "75": 0.17042,
+            "76": 0.17092,
+            "77": 0.16596,
+            "78": 0.16577,
+            "79": 0.15743,
+            "80": 0.15851,
+            "81": 0.15791,
+            "82": 0.16001,
+            "83": 0.15783,
+            "84": 0.15788,
+            "85": 0.15665,
+            "86": 0.16107,
+            "87": 0.15608,
+            "88": 0.15928,
+            "89": 0.16138,
+            "90": 0.15621,
+            "91": 0.15886,
+            "92": 0.15808,
+            "93": 0.15911,
+            "94": 0.16777,
+            "95": 0.16017,
+            "96": 0.15821,
+            "97": 0.15642,
+            "98": 0.16061,
+            "99": 0.157,
+            "100": 0.15975
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index a044dd0e135..00000000000
--- a/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.33127,
-            "2": 10.35281,
-            "3": 9.79613,
-            "4": 9.60968,
-            "5": 9.42269,
-            "6": 9.45137,
-            "7": 9.34348,
-            "8": 9.27525,
-            "9": 9.09676,
-            "10": 9.0722,
-            "11": 8.8835,
-            "12": 8.83711,
-            "13": 8.86836,
-            "14": 8.71039,
-            "15": 8.68191,
-            "16": 8.56149,
-            "17": 8.52311,
-            "18": 8.43963,
-            "19": 8.40439,
-            "20": 8.29506,
-            "21": 8.27059,
-            "22": 8.17902,
-            "23": 8.12669,
-            "24": 8.14846,
-            "25": 7.9909,
-            "26": 8.12216,
-            "27": 7.90453,
-            "28": 7.98655,
-            "29": 7.80845,
-            "30": 7.86918,
-            "31": 7.83571,
-            "32": 7.72178,
-            "33": 7.80378,
-            "34": 7.59229,
-            "35": 7.68371,
-            "36": 7.53883,
-            "37": 7.47609,
-            "38": 7.5168,
-            "39": 7.49978,
-            "40": 7.51704,
-            "41": 7.43174,
-            "42": 7.40104,
-            "43": 7.44926,
-            "44": 7.38919,
-            "45": 7.38016,
-            "46": 7.29476,
-            "47": 7.44829,
-            "48": 7.28213,
-            "49": 7.34657,
-            "50": 7.17116,
-            "51": 7.37361,
-            "52": 7.13381,
-            "53": 7.11244,
-            "54": 7.23402,
-            "55": 7.14785,
-            "56": 7.22775,
-            "57": 7.33273,
-            "58": 6.99461,
-            "59": 7.11599,
-            "60": 7.13222,
-            "61": 7.1056,
-            "62": 7.26513,
-            "63": 7.14772,
-            "64": 7.08696,
-            "65": 6.98643,
-            "66": 7.04728,
-            "67": 7.04697,
-            "68": 7.14062,
-            "69": 7.2435,
-            "70": 7.05957,
-            "71": 6.89356,
-            "72": 6.99769,
-            "73": 6.97897,
-            "74": 6.91983,
-            "75": 7.05297,
-            "76": 6.96036,
-            "77": 7.0791,
-            "78": 7.01392,
-            "79": 6.88358,
-            "80": 6.93014,
-            "81": 6.96553,
-            "82": 7.05265,
-            "83": 6.98788,
-            "84": 7.00427,
-            "85": 6.84577,
-            "86": 7.03621,
-            "87": 6.96327,
-            "88": 6.9137,
-            "89": 6.80631,
-            "90": 7.23619,
-            "91": 6.70015,
-            "92": 7.05679,
-            "93": 6.89287,
-            "94": 7.05835,
-            "95": 6.84786,
-            "96": 6.96771,
-            "97": 6.94258,
-            "98": 6.87388,
-            "99": 7.01816,
-            "100": 6.98466
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 43334.0,
-            "2": 44100.0,
-            "3": 44771.0,
-            "4": 42457.0,
-            "5": 45411.0,
-            "6": 40966.0,
-            "7": 43193.0,
-            "8": 45457.0,
-            "9": 42550.0,
-            "10": 45360.0,
-            "11": 44029.0,
-            "12": 44605.0,
-            "13": 43917.0,
-            "14": 46219.0,
-            "15": 43943.0,
-            "16": 41732.0,
-            "17": 43861.0,
-            "18": 44721.0,
-            "19": 42597.0,
-            "20": 44797.0,
-            "21": 44792.0,
-            "22": 41891.0,
-            "23": 45473.0,
-            "24": 43081.0,
-            "25": 42682.0,
-            "26": 43950.0,
-            "27": 46253.0,
-            "28": 46447.0,
-            "29": 46164.0,
-            "30": 44042.0,
-            "31": 41263.0,
-            "32": 43440.0,
-            "33": 45483.0,
-            "34": 43349.0,
-            "35": 43273.0,
-            "36": 42490.0,
-            "37": 40647.0,
-            "38": 42549.0,
-            "39": 44766.0,
-            "40": 43281.0,
-            "41": 44669.0,
-            "42": 43287.0,
-            "43": 45454.0,
-            "44": 44627.0,
-            "45": 43353.0,
-            "46": 43925.0,
-            "47": 42498.0,
-            "48": 44758.0,
-            "49": 43173.0,
-            "50": 43402.0,
-            "51": 41198.0,
-            "52": 43900.0,
-            "53": 43938.0,
-            "54": 41922.0,
-            "55": 43916.0,
-            "56": 43237.0,
-            "57": 42634.0,
-            "58": 43916.0,
-            "59": 44616.0,
-            "60": 41414.0,
-            "61": 39759.0,
-            "62": 44750.0,
-            "63": 44673.0,
-            "64": 45378.0,
-            "65": 44765.0,
-            "66": 45401.0,
-            "67": 43155.0,
-            "68": 42552.0,
-            "69": 43831.0,
-            "70": 45546.0,
-            "71": 43332.0,
-            "72": 44847.0,
-            "73": 45376.0,
-            "74": 42503.0,
-            "75": 44704.0,
-            "76": 43916.0,
-            "77": 42101.0,
-            "78": 40543.0,
-            "79": 38997.0,
-            "80": 41079.0,
-            "81": 45377.0,
-            "82": 43254.0,
-            "83": 38473.0,
-            "84": 42420.0,
-            "85": 43989.0,
-            "86": 45694.0,
-            "87": 41164.0,
-            "88": 41773.0,
-            "89": 41047.0,
-            "90": 44710.0,
-            "91": 46274.0,
-            "92": 41823.0,
-            "93": 43286.0,
-            "94": 39530.0,
-            "95": 44074.0,
-            "96": 44686.0,
-            "97": 45424.0,
-            "98": 41849.0,
-            "99": 45567.0,
-            "100": 42485.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 4158515200.0,
-            "2": 4158515200.0,
-            "3": 4158515200.0,
-            "4": 4158515200.0,
-            "5": 4158515200.0,
-            "6": 4158515200.0,
-            "7": 4158515200.0,
-            "8": 4158515200.0,
-            "9": 4158515200.0,
-            "10": 4158515200.0,
-            "11": 4158515200.0,
-            "12": 4158515200.0,
-            "13": 4158515200.0,
-            "14": 4158515200.0,
-            "15": 4158515200.0,
-            "16": 4158515200.0,
-            "17": 4158515200.0,
-            "18": 4158515200.0,
-            "19": 4158515200.0,
-            "20": 4158515200.0,
-            "21": 4158515200.0,
-            "22": 4158515200.0,
-            "23": 4158515200.0,
-            "24": 4158515200.0,
-            "25": 4158515200.0,
-            "26": 4158515200.0,
-            "27": 4158515200.0,
-            "28": 4158515200.0,
-            "29": 4158515200.0,
-            "30": 4158515200.0,
-            "31": 4158515200.0,
-            "32": 4158515200.0,
-            "33": 4158515200.0,
-            "34": 4158515200.0,
-            "35": 4158515200.0,
-            "36": 4158515200.0,
-            "37": 4158515200.0,
-            "38": 4158515200.0,
-            "39": 4158515200.0,
-            "40": 4158515200.0,
-            "41": 4158515200.0,
-            "42": 4158515200.0,
-            "43": 4158515200.0,
-            "44": 4158515200.0,
-            "45": 4158515200.0,
-            "46": 4158515200.0,
-            "47": 4158515200.0,
-            "48": 4158515200.0,
-            "49": 4158515200.0,
-            "50": 4158515200.0,
-            "51": 4158515200.0,
-            "52": 4158515200.0,
-            "53": 4158515200.0,
-            "54": 4158515200.0,
-            "55": 4158515200.0,
-            "56": 4158515200.0,
-            "57": 4158515200.0,
-            "58": 4158515200.0,
-            "59": 4158515200.0,
-            "60": 4158515200.0,
-            "61": 4158515200.0,
-            "62": 4158515200.0,
-            "63": 4158515200.0,
-            "64": 4158515200.0,
-            "65": 4158515200.0,
-            "66": 4158515200.0,
-            "67": 4158515200.0,
-            "68": 4158515200.0,
-            "69": 4158515200.0,
-            "70": 4158515200.0,
-            "71": 4158515200.0,
-            "72": 4158515200.0,
-            "73": 4158515200.0,
-            "74": 4158515200.0,
-            "75": 4158515200.0,
-            "76": 4158515200.0,
-            "77": 4158515200.0,
-            "78": 4158515200.0,
-            "79": 4158515200.0,
-            "80": 4158515200.0,
-            "81": 4158515200.0,
-            "82": 4158515200.0,
-            "83": 4158515200.0,
-            "84": 4158515200.0,
-            "85": 4158515200.0,
-            "86": 4158515200.0,
-            "87": 4158515200.0,
-            "88": 4158515200.0,
-            "89": 4158515200.0,
-            "90": 4158515200.0,
-            "91": 4158515200.0,
-            "92": 4158515200.0,
-            "93": 4158515200.0,
-            "94": 4158515200.0,
-            "95": 4158515200.0,
-            "96": 4158515200.0,
-            "97": 4158515200.0,
-            "98": 4158515200.0,
-            "99": 4158515200.0,
-            "100": 4158515200.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 4349380608.0,
-            "2": 6185459712.0,
-            "3": 6187556864.0,
-            "4": 6187556864.0,
-            "5": 6187556864.0,
-            "6": 6187556864.0,
-            "7": 6187556864.0,
-            "8": 6187556864.0,
-            "9": 6187556864.0,
-            "10": 6187556864.0,
-            "11": 6187556864.0,
-            "12": 6187556864.0,
-            "13": 6187556864.0,
-            "14": 6187556864.0,
-            "15": 6187556864.0,
-            "16": 6187556864.0,
-            "17": 6187556864.0,
-            "18": 6187556864.0,
-            "19": 6187556864.0,
-            "20": 6187556864.0,
-            "21": 6187556864.0,
-            "22": 6187556864.0,
-            "23": 6187556864.0,
-            "24": 6187556864.0,
-            "25": 6187556864.0,
-            "26": 6187556864.0,
-            "27": 6187556864.0,
-            "28": 6187556864.0,
-            "29": 6187556864.0,
-            "30": 6187556864.0,
-            "31": 6187556864.0,
-            "32": 6187556864.0,
-            "33": 6187556864.0,
-            "34": 6187556864.0,
-            "35": 6187556864.0,
-            "36": 6187556864.0,
-            "37": 6187556864.0,
-            "38": 6187556864.0,
-            "39": 6187556864.0,
-            "40": 6187556864.0,
-            "41": 6187556864.0,
-            "42": 6187556864.0,
-            "43": 6187556864.0,
-            "44": 6187556864.0,
-            "45": 6187556864.0,
-            "46": 6187556864.0,
-            "47": 6187556864.0,
-            "48": 6187556864.0,
-            "49": 6187556864.0,
-            "50": 6187556864.0,
-            "51": 6187556864.0,
-            "52": 6187556864.0,
-            "53": 6187556864.0,
-            "54": 6187556864.0,
-            "55": 6187556864.0,
-            "56": 6187556864.0,
-            "57": 6187556864.0,
-            "58": 6187556864.0,
-            "59": 6187556864.0,
-            "60": 6187556864.0,
-            "61": 6187556864.0,
-            "62": 6187556864.0,
-            "63": 6187556864.0,
-            "64": 6187556864.0,
-            "65": 6187556864.0,
-            "66": 6187556864.0,
-            "67": 6187556864.0,
-            "68": 6187556864.0,
-            "69": 6187556864.0,
-            "70": 6187556864.0,
-            "71": 6187556864.0,
-            "72": 6187556864.0,
-            "73": 6187556864.0,
-            "74": 6187556864.0,
-            "75": 6187556864.0,
-            "76": 6187556864.0,
-            "77": 6187556864.0,
-            "78": 6187556864.0,
-            "79": 6187556864.0,
-            "80": 6187556864.0,
-            "81": 6187556864.0,
-            "82": 6187556864.0,
-            "83": 6187556864.0,
-            "84": 6187556864.0,
-            "85": 6187556864.0,
-            "86": 6187556864.0,
-            "87": 6187556864.0,
-            "88": 6187556864.0,
-            "89": 6187556864.0,
-            "90": 6187556864.0,
-            "91": 6187556864.0,
-            "92": 6187556864.0,
-            "93": 6187556864.0,
-            "94": 6187556864.0,
-            "95": 6187556864.0,
-            "96": 6187556864.0,
-            "97": 6187556864.0,
-            "98": 6187556864.0,
-            "99": 6187556864.0,
-            "100": 6187556864.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 6.68377,
-            "2": 0.24636,
-            "3": 0.14697,
-            "4": 0.14068,
-            "5": 0.14575,
-            "6": 0.13961,
-            "7": 0.14621,
-            "8": 0.14223,
-            "9": 0.14582,
-            "10": 0.13865,
-            "11": 0.1453,
-            "12": 0.13885,
-            "13": 0.14702,
-            "14": 0.14162,
-            "15": 0.1468,
-            "16": 0.14692,
-            "17": 0.14326,
-            "18": 0.14146,
-            "19": 0.15015,
-            "20": 0.13999,
-            "21": 0.14878,
-            "22": 0.13993,
-            "23": 0.14535,
-            "24": 0.1378,
-            "25": 0.15024,
-            "26": 0.1375,
-            "27": 0.13991,
-            "28": 0.14118,
-            "29": 0.14057,
-            "30": 0.14015,
-            "31": 0.1384,
-            "32": 0.13865,
-            "33": 0.14194,
-            "34": 0.14009,
-            "35": 0.14432,
-            "36": 0.14051,
-            "37": 0.1489,
-            "38": 0.13976,
-            "39": 0.14433,
-            "40": 0.13889,
-            "41": 0.14744,
-            "42": 0.14045,
-            "43": 0.14474,
-            "44": 0.14195,
-            "45": 0.14259,
-            "46": 0.13761,
-            "47": 0.14569,
-            "48": 0.15734,
-            "49": 0.18844,
-            "50": 0.14153,
-            "51": 0.14057,
-            "52": 0.14132,
-            "53": 0.14241,
-            "54": 0.14306,
-            "55": 0.1436,
-            "56": 0.14347,
-            "57": 0.13981,
-            "58": 0.13906,
-            "59": 0.14322,
-            "60": 0.13735,
-            "61": 0.14083,
-            "62": 0.14416,
-            "63": 0.14191,
-            "64": 0.14246,
-            "65": 0.13711,
-            "66": 0.1364,
-            "67": 0.13655,
-            "68": 0.1365,
-            "69": 0.13935,
-            "70": 0.15757,
-            "71": 0.13997,
-            "72": 0.13995,
-            "73": 0.14045,
-            "74": 0.1419,
-            "75": 0.14171,
-            "76": 0.14479,
-            "77": 0.17363,
-            "78": 0.15289,
-            "79": 0.1416,
-            "80": 0.14577,
-            "81": 0.14478,
-            "82": 0.14716,
-            "83": 0.14872,
-            "84": 0.15369,
-            "85": 0.15016,
-            "86": 0.13782,
-            "87": 0.1585,
-            "88": 0.15072,
-            "89": 0.13834,
-            "90": 0.13681,
-            "91": 0.139,
-            "92": 0.13751,
-            "93": 0.13694,
-            "94": 0.13764,
-            "95": 0.13659,
-            "96": 0.13726,
-            "97": 0.13676,
-            "98": 0.13872,
-            "99": 0.13604,
-            "100": 0.13543
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index e788215b20a..00000000000
--- a/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.33127,
-            "2": 10.35281,
-            "3": 9.79613,
-            "4": 9.60968,
-            "5": 9.42269,
-            "6": 9.45137,
-            "7": 9.34348,
-            "8": 9.27525,
-            "9": 9.09676,
-            "10": 9.0722,
-            "11": 8.8835,
-            "12": 8.83711,
-            "13": 8.86836,
-            "14": 8.71039,
-            "15": 8.68191,
-            "16": 8.56149,
-            "17": 8.52311,
-            "18": 8.43963,
-            "19": 8.40439,
-            "20": 8.29506,
-            "21": 8.27059,
-            "22": 8.17902,
-            "23": 8.12669,
-            "24": 8.14846,
-            "25": 7.9909,
-            "26": 8.12216,
-            "27": 7.90453,
-            "28": 7.98655,
-            "29": 7.80845,
-            "30": 7.86918,
-            "31": 7.83571,
-            "32": 7.72178,
-            "33": 7.80378,
-            "34": 7.59229,
-            "35": 7.68371,
-            "36": 7.53883,
-            "37": 7.47609,
-            "38": 7.5168,
-            "39": 7.49978,
-            "40": 7.51704,
-            "41": 7.43174,
-            "42": 7.40104,
-            "43": 7.44926,
-            "44": 7.38919,
-            "45": 7.38016,
-            "46": 7.29476,
-            "47": 7.44829,
-            "48": 7.28213,
-            "49": 7.34657,
-            "50": 7.17116,
-            "51": 7.37361,
-            "52": 7.13381,
-            "53": 7.11244,
-            "54": 7.23402,
-            "55": 7.14785,
-            "56": 7.22775,
-            "57": 7.33273,
-            "58": 6.99461,
-            "59": 7.11599,
-            "60": 7.13222,
-            "61": 7.1056,
-            "62": 7.26513,
-            "63": 7.14772,
-            "64": 7.08696,
-            "65": 6.98643,
-            "66": 7.04728,
-            "67": 7.04697,
-            "68": 7.14062,
-            "69": 7.2435,
-            "70": 7.05957,
-            "71": 6.89356,
-            "72": 6.99769,
-            "73": 6.97897,
-            "74": 6.91983,
-            "75": 7.05297,
-            "76": 6.96036,
-            "77": 7.0791,
-            "78": 7.01392,
-            "79": 6.88358,
-            "80": 6.93014,
-            "81": 6.96553,
-            "82": 7.05265,
-            "83": 6.98788,
-            "84": 7.00427,
-            "85": 6.84577,
-            "86": 7.03621,
-            "87": 6.96327,
-            "88": 6.9137,
-            "89": 6.80631,
-            "90": 7.23619,
-            "91": 6.70015,
-            "92": 7.05679,
-            "93": 6.89287,
-            "94": 7.05835,
-            "95": 6.84786,
-            "96": 6.96771,
-            "97": 6.94258,
-            "98": 6.87388,
-            "99": 7.01816,
-            "100": 6.98466
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 43334.0,
-            "2": 44100.0,
-            "3": 44771.0,
-            "4": 42457.0,
-            "5": 45411.0,
-            "6": 40966.0,
-            "7": 43193.0,
-            "8": 45457.0,
-            "9": 42550.0,
-            "10": 45360.0,
-            "11": 44029.0,
-            "12": 44605.0,
-            "13": 43917.0,
-            "14": 46219.0,
-            "15": 43943.0,
-            "16": 41732.0,
-            "17": 43861.0,
-            "18": 44721.0,
-            "19": 42597.0,
-            "20": 44797.0,
-            "21": 44792.0,
-            "22": 41891.0,
-            "23": 45473.0,
-            "24": 43081.0,
-            "25": 42682.0,
-            "26": 43950.0,
-            "27": 46253.0,
-            "28": 46447.0,
-            "29": 46164.0,
-            "30": 44042.0,
-            "31": 41263.0,
-            "32": 43440.0,
-            "33": 45483.0,
-            "34": 43349.0,
-            "35": 43273.0,
-            "36": 42490.0,
-            "37": 40647.0,
-            "38": 42549.0,
-            "39": 44766.0,
-            "40": 43281.0,
-            "41": 44669.0,
-            "42": 43287.0,
-            "43": 45454.0,
-            "44": 44627.0,
-            "45": 43353.0,
-            "46": 43925.0,
-            "47": 42498.0,
-            "48": 44758.0,
-            "49": 43173.0,
-            "50": 43402.0,
-            "51": 41198.0,
-            "52": 43900.0,
-            "53": 43938.0,
-            "54": 41922.0,
-            "55": 43916.0,
-            "56": 43237.0,
-            "57": 42634.0,
-            "58": 43916.0,
-            "59": 44616.0,
-            "60": 41414.0,
-            "61": 39759.0,
-            "62": 44750.0,
-            "63": 44673.0,
-            "64": 45378.0,
-            "65": 44765.0,
-            "66": 45401.0,
-            "67": 43155.0,
-            "68": 42552.0,
-            "69": 43831.0,
-            "70": 45546.0,
-            "71": 43332.0,
-            "72": 44847.0,
-            "73": 45376.0,
-            "74": 42503.0,
-            "75": 44704.0,
-            "76": 43916.0,
-            "77": 42101.0,
-            "78": 40543.0,
-            "79": 38997.0,
-            "80": 41079.0,
-            "81": 45377.0,
-            "82": 43254.0,
-            "83": 38473.0,
-            "84": 42420.0,
-            "85": 43989.0,
-            "86": 45694.0,
-            "87": 41164.0,
-            "88": 41773.0,
-            "89": 41047.0,
-            "90": 44710.0,
-            "91": 46274.0,
-            "92": 41823.0,
-            "93": 43286.0,
-            "94": 39530.0,
-            "95": 44074.0,
-            "96": 44686.0,
-            "97": 45424.0,
-            "98": 41849.0,
-            "99": 45567.0,
-            "100": 42485.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 4158515200.0,
-            "2": 4158515200.0,
-            "3": 4158515200.0,
-            "4": 4158515200.0,
-            "5": 4158515200.0,
-            "6": 4158515200.0,
-            "7": 4158515200.0,
-            "8": 4158515200.0,
-            "9": 4158515200.0,
-            "10": 4158515200.0,
-            "11": 4158515200.0,
-            "12": 4158515200.0,
-            "13": 4158515200.0,
-            "14": 4158515200.0,
-            "15": 4158515200.0,
-            "16": 4158515200.0,
-            "17": 4158515200.0,
-            "18": 4158515200.0,
-            "19": 4158515200.0,
-            "20": 4158515200.0,
-            "21": 4158515200.0,
-            "22": 4158515200.0,
-            "23": 4158515200.0,
-            "24": 4158515200.0,
-            "25": 4158515200.0,
-            "26": 4158515200.0,
-            "27": 4158515200.0,
-            "28": 4158515200.0,
-            "29": 4158515200.0,
-            "30": 4158515200.0,
-            "31": 4158515200.0,
-            "32": 4158515200.0,
-            "33": 4158515200.0,
-            "34": 4158515200.0,
-            "35": 4158515200.0,
-            "36": 4158515200.0,
-            "37": 4158515200.0,
-            "38": 4158515200.0,
-            "39": 4158515200.0,
-            "40": 4158515200.0,
-            "41": 4158515200.0,
-            "42": 4158515200.0,
-            "43": 4158515200.0,
-            "44": 4158515200.0,
-            "45": 4158515200.0,
-            "46": 4158515200.0,
-            "47": 4158515200.0,
-            "48": 4158515200.0,
-            "49": 4158515200.0,
-            "50": 4158515200.0,
-            "51": 4158515200.0,
-            "52": 4158515200.0,
-            "53": 4158515200.0,
-            "54": 4158515200.0,
-            "55": 4158515200.0,
-            "56": 4158515200.0,
-            "57": 4158515200.0,
-            "58": 4158515200.0,
-            "59": 4158515200.0,
-            "60": 4158515200.0,
-            "61": 4158515200.0,
-            "62": 4158515200.0,
-            "63": 4158515200.0,
-            "64": 4158515200.0,
-            "65": 4158515200.0,
-            "66": 4158515200.0,
-            "67": 4158515200.0,
-            "68": 4158515200.0,
-            "69": 4158515200.0,
-            "70": 4158515200.0,
-            "71": 4158515200.0,
-            "72": 4158515200.0,
-            "73": 4158515200.0,
-            "74": 4158515200.0,
-            "75": 4158515200.0,
-            "76": 4158515200.0,
-            "77": 4158515200.0,
-            "78": 4158515200.0,
-            "79": 4158515200.0,
-            "80": 4158515200.0,
-            "81": 4158515200.0,
-            "82": 4158515200.0,
-            "83": 4158515200.0,
-            "84": 4158515200.0,
-            "85": 4158515200.0,
-            "86": 4158515200.0,
-            "87": 4158515200.0,
-            "88": 4158515200.0,
-            "89": 4158515200.0,
-            "90": 4158515200.0,
-            "91": 4158515200.0,
-            "92": 4158515200.0,
-            "93": 4158515200.0,
-            "94": 4158515200.0,
-            "95": 4158515200.0,
-            "96": 4158515200.0,
-            "97": 4158515200.0,
-            "98": 4158515200.0,
-            "99": 4158515200.0,
-            "100": 4158515200.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 4349380608.0,
-            "2": 6185459712.0,
-            "3": 6187556864.0,
-            "4": 6187556864.0,
-            "5": 6187556864.0,
-            "6": 6187556864.0,
-            "7": 6187556864.0,
-            "8": 6187556864.0,
-            "9": 6187556864.0,
-            "10": 6187556864.0,
-            "11": 6187556864.0,
-            "12": 6187556864.0,
-            "13": 6187556864.0,
-            "14": 6187556864.0,
-            "15": 6187556864.0,
-            "16": 6187556864.0,
-            "17": 6187556864.0,
-            "18": 6187556864.0,
-            "19": 6187556864.0,
-            "20": 6187556864.0,
-            "21": 6187556864.0,
-            "22": 6187556864.0,
-            "23": 6187556864.0,
-            "24": 6187556864.0,
-            "25": 6187556864.0,
-            "26": 6187556864.0,
-            "27": 6187556864.0,
-            "28": 6187556864.0,
-            "29": 6187556864.0,
-            "30": 6187556864.0,
-            "31": 6187556864.0,
-            "32": 6187556864.0,
-            "33": 6187556864.0,
-            "34": 6187556864.0,
-            "35": 6187556864.0,
-            "36": 6187556864.0,
-            "37": 6187556864.0,
-            "38": 6187556864.0,
-            "39": 6187556864.0,
-            "40": 6187556864.0,
-            "41": 6187556864.0,
-            "42": 6187556864.0,
-            "43": 6187556864.0,
-            "44": 6187556864.0,
-            "45": 6187556864.0,
-            "46": 6187556864.0,
-            "47": 6187556864.0,
-            "48": 6187556864.0,
-            "49": 6187556864.0,
-            "50": 6187556864.0,
-            "51": 6187556864.0,
-            "52": 6187556864.0,
-            "53": 6187556864.0,
-            "54": 6187556864.0,
-            "55": 6187556864.0,
-            "56": 6187556864.0,
-            "57": 6187556864.0,
-            "58": 6187556864.0,
-            "59": 6187556864.0,
-            "60": 6187556864.0,
-            "61": 6187556864.0,
-            "62": 6187556864.0,
-            "63": 6187556864.0,
-            "64": 6187556864.0,
-            "65": 6187556864.0,
-            "66": 6187556864.0,
-            "67": 6187556864.0,
-            "68": 6187556864.0,
-            "69": 6187556864.0,
-            "70": 6187556864.0,
-            "71": 6187556864.0,
-            "72": 6187556864.0,
-            "73": 6187556864.0,
-            "74": 6187556864.0,
-            "75": 6187556864.0,
-            "76": 6187556864.0,
-            "77": 6187556864.0,
-            "78": 6187556864.0,
-            "79": 6187556864.0,
-            "80": 6187556864.0,
-            "81": 6187556864.0,
-            "82": 6187556864.0,
-            "83": 6187556864.0,
-            "84": 6187556864.0,
-            "85": 6187556864.0,
-            "86": 6187556864.0,
-            "87": 6187556864.0,
-            "88": 6187556864.0,
-            "89": 6187556864.0,
-            "90": 6187556864.0,
-            "91": 6187556864.0,
-            "92": 6187556864.0,
-            "93": 6187556864.0,
-            "94": 6187556864.0,
-            "95": 6187556864.0,
-            "96": 6187556864.0,
-            "97": 6187556864.0,
-            "98": 6187556864.0,
-            "99": 6187556864.0,
-            "100": 6187556864.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 7.09171,
-            "2": 0.19937,
-            "3": 0.15739,
-            "4": 0.15626,
-            "5": 0.15726,
-            "6": 0.16596,
-            "7": 0.15866,
-            "8": 0.16018,
-            "9": 0.16342,
-            "10": 0.15848,
-            "11": 0.1563,
-            "12": 0.15949,
-            "13": 0.16471,
-            "14": 0.1653,
-            "15": 0.15904,
-            "16": 0.15673,
-            "17": 0.15845,
-            "18": 0.15591,
-            "19": 0.15809,
-            "20": 0.1593,
-            "21": 0.15934,
-            "22": 0.1588,
-            "23": 0.15615,
-            "24": 0.15816,
-            "25": 0.15513,
-            "26": 0.16623,
-            "27": 0.1635,
-            "28": 0.15796,
-            "29": 0.15745,
-            "30": 0.15659,
-            "31": 0.15757,
-            "32": 0.15805,
-            "33": 0.16121,
-            "34": 0.15918,
-            "35": 0.15628,
-            "36": 0.16015,
-            "37": 0.15954,
-            "38": 0.15711,
-            "39": 0.16207,
-            "40": 0.16543,
-            "41": 0.16329,
-            "42": 0.15895,
-            "43": 0.15771,
-            "44": 0.16372,
-            "45": 0.15827,
-            "46": 0.16205,
-            "47": 0.16175,
-            "48": 0.15754,
-            "49": 0.15916,
-            "50": 0.15618,
-            "51": 0.15693,
-            "52": 0.16151,
-            "53": 0.16143,
-            "54": 0.16281,
-            "55": 0.15891,
-            "56": 0.16235,
-            "57": 0.16248,
-            "58": 0.16949,
-            "59": 0.16264,
-            "60": 0.15666,
-            "61": 0.19456,
-            "62": 0.19414,
-            "63": 0.16346,
-            "64": 0.16675,
-            "65": 0.16803,
-            "66": 0.1748,
-            "67": 0.16431,
-            "68": 0.1587,
-            "69": 0.16219,
-            "70": 0.16457,
-            "71": 0.1716,
-            "72": 0.16546,
-            "73": 0.16711,
-            "74": 0.16142,
-            "75": 0.17042,
-            "76": 0.17092,
-            "77": 0.16596,
-            "78": 0.16577,
-            "79": 0.15743,
-            "80": 0.15851,
-            "81": 0.15791,
-            "82": 0.16001,
-            "83": 0.15783,
-            "84": 0.15788,
-            "85": 0.15665,
-            "86": 0.16107,
-            "87": 0.15608,
-            "88": 0.15928,
-            "89": 0.16138,
-            "90": 0.15621,
-            "91": 0.15886,
-            "92": 0.15808,
-            "93": 0.15911,
-            "94": 0.16777,
-            "95": 0.16017,
-            "96": 0.15821,
-            "97": 0.15642,
-            "98": 0.16061,
-            "99": 0.157,
-            "100": 0.15975
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_h100.json
index 522245541ce..e0a55371afb 100644
--- a/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_h100.json
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 7.04606,
-            "2": 0.19929,
-            "3": 0.2017,
-            "4": 0.19828,
-            "5": 0.15529,
-            "6": 0.15657,
-            "7": 0.1562,
-            "8": 0.15746,
-            "9": 0.15848,
-            "10": 0.1552,
-            "11": 0.15643,
-            "12": 0.15719,
-            "13": 0.15888,
-            "14": 0.15791,
-            "15": 0.15908,
-            "16": 0.15414,
-            "17": 0.1552,
-            "18": 0.15205,
-            "19": 0.18443,
-            "20": 0.19907,
-            "21": 0.16002,
-            "22": 0.1541,
-            "23": 0.1541,
-            "24": 0.15347,
-            "25": 0.15557,
-            "26": 0.15649,
-            "27": 0.16008,
-            "28": 0.15592,
-            "29": 0.15544,
-            "30": 0.15449,
-            "31": 0.15601,
-            "32": 0.15477,
-            "33": 0.159,
-            "34": 0.15733,
-            "35": 0.15695,
-            "36": 0.15477,
-            "37": 0.15376,
-            "38": 0.15585,
-            "39": 0.15472,
-            "40": 0.16007,
-            "41": 0.15379,
-            "42": 0.15522,
-            "43": 0.15668,
-            "44": 0.15453,
-            "45": 0.15571,
-            "46": 0.15742,
-            "47": 0.1588,
-            "48": 0.15282,
-            "49": 0.15611,
-            "50": 0.15733,
-            "51": 0.15969,
-            "52": 0.15894,
-            "53": 0.16067,
-            "54": 0.16019,
-            "55": 0.15633,
-            "56": 0.15774,
-            "57": 0.15905,
-            "58": 0.16207,
-            "59": 0.16104,
-            "60": 0.15837,
-            "61": 0.15701,
-            "62": 0.15604,
-            "63": 0.15894,
-            "64": 0.15836,
-            "65": 0.16179,
-            "66": 0.16196,
-            "67": 0.16049,
-            "68": 0.15825,
-            "69": 0.15755,
-            "70": 0.15963,
-            "71": 0.16471,
-            "72": 0.16654,
-            "73": 0.16164,
-            "74": 0.15823,
-            "75": 0.16142,
-            "76": 0.16113,
-            "77": 0.16286,
-            "78": 0.16729,
-            "79": 0.16051,
-            "80": 0.1567,
-            "81": 0.15597,
-            "82": 0.15346,
-            "83": 0.15578,
-            "84": 0.15723,
-            "85": 0.1555,
-            "86": 0.15702,
-            "87": 0.15866,
-            "88": 0.15938,
-            "89": 0.15659,
-            "90": 0.15777,
-            "91": 0.1688,
-            "92": 0.15804,
-            "93": 0.15347,
-            "94": 0.15467,
-            "95": 0.15963,
-            "96": 0.15485,
-            "97": 0.1585,
-            "98": 0.17109,
-            "99": 0.15645,
-            "100": 0.15472
+            "1": 6.98463,
+            "2": 0.19558,
+            "3": 0.15734,
+            "4": 0.15695,
+            "5": 0.15774,
+            "6": 0.15468,
+            "7": 0.15373,
+            "8": 0.15721,
+            "9": 0.15375,
+            "10": 0.15555,
+            "11": 0.15762,
+            "12": 0.15358,
+            "13": 0.15446,
+            "14": 0.15343,
+            "15": 0.15567,
+            "16": 0.15597,
+            "17": 0.19986,
+            "18": 0.19685,
+            "19": 0.15757,
+            "20": 0.16418,
+            "21": 0.1662,
+            "22": 0.1633,
+            "23": 0.15542,
+            "24": 0.16131,
+            "25": 0.15713,
+            "26": 0.16116,
+            "27": 0.15731,
+            "28": 0.16645,
+            "29": 0.1581,
+            "30": 0.16334,
+            "31": 0.15469,
+            "32": 0.1607,
+            "33": 0.15565,
+            "34": 0.16369,
+            "35": 0.15592,
+            "36": 0.16404,
+            "37": 0.15034,
+            "38": 0.15864,
+            "39": 0.15017,
+            "40": 0.1607,
+            "41": 0.15387,
+            "42": 0.17077,
+            "43": 0.15397,
+            "44": 0.1563,
+            "45": 0.15512,
+            "46": 0.16115,
+            "47": 0.15635,
+            "48": 0.16292,
+            "49": 0.15581,
+            "50": 0.16402,
+            "51": 0.15457,
+            "52": 0.16232,
+            "53": 0.156,
+            "54": 0.16433,
+            "55": 0.15283,
+            "56": 0.19434,
+            "57": 0.19273,
+            "58": 0.15955,
+            "59": 0.15405,
+            "60": 0.15503,
+            "61": 0.15418,
+            "62": 0.15446,
+            "63": 0.15778,
+            "64": 0.1578,
+            "65": 0.16024,
+            "66": 0.15656,
+            "67": 0.15524,
+            "68": 0.15394,
+            "69": 0.16041,
+            "70": 0.16082,
+            "71": 0.16503,
+            "72": 0.16142,
+            "73": 0.16242,
+            "74": 0.15995,
+            "75": 0.15816,
+            "76": 0.16199,
+            "77": 0.16827,
+            "78": 0.15987,
+            "79": 0.15797,
+            "80": 0.15617,
+            "81": 0.15308,
+            "82": 0.15484,
+            "83": 0.15382,
+            "84": 0.16856,
+            "85": 0.15976,
+            "86": 0.15794,
+            "87": 0.15409,
+            "88": 0.15333,
+            "89": 0.15511,
+            "90": 0.15333,
+            "91": 0.17162,
+            "92": 0.15418,
+            "93": 0.15421,
+            "94": 0.15169,
+            "95": 0.15479,
+            "96": 0.15268,
+            "97": 0.1552,
+            "98": 0.1575,
+            "99": 0.15403,
+            "100": 0.15379
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index f1c0511f9d6..00000000000
--- a/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.33127,
-            "2": 10.35281,
-            "3": 9.79613,
-            "4": 9.60968,
-            "5": 9.42269,
-            "6": 9.45137,
-            "7": 9.34348,
-            "8": 9.27525,
-            "9": 9.09676,
-            "10": 9.0722,
-            "11": 8.8835,
-            "12": 8.83711,
-            "13": 8.86836,
-            "14": 8.71039,
-            "15": 8.68191,
-            "16": 8.56149,
-            "17": 8.52311,
-            "18": 8.43963,
-            "19": 8.40439,
-            "20": 8.29506,
-            "21": 8.27059,
-            "22": 8.17902,
-            "23": 8.12669,
-            "24": 8.14846,
-            "25": 7.9909,
-            "26": 8.12216,
-            "27": 7.90453,
-            "28": 7.98655,
-            "29": 7.80845,
-            "30": 7.86918,
-            "31": 7.83571,
-            "32": 7.72178,
-            "33": 7.80378,
-            "34": 7.59229,
-            "35": 7.68371,
-            "36": 7.53883,
-            "37": 7.47609,
-            "38": 7.5168,
-            "39": 7.49978,
-            "40": 7.51704,
-            "41": 7.43174,
-            "42": 7.40104,
-            "43": 7.44926,
-            "44": 7.38919,
-            "45": 7.38016,
-            "46": 7.29476,
-            "47": 7.44829,
-            "48": 7.28213,
-            "49": 7.34657,
-            "50": 7.17116,
-            "51": 7.37361,
-            "52": 7.13381,
-            "53": 7.11244,
-            "54": 7.23402,
-            "55": 7.14785,
-            "56": 7.22775,
-            "57": 7.33273,
-            "58": 6.99461,
-            "59": 7.11599,
-            "60": 7.13222,
-            "61": 7.1056,
-            "62": 7.26513,
-            "63": 7.14772,
-            "64": 7.08696,
-            "65": 6.98643,
-            "66": 7.04728,
-            "67": 7.04697,
-            "68": 7.14062,
-            "69": 7.2435,
-            "70": 7.05957,
-            "71": 6.89356,
-            "72": 6.99769,
-            "73": 6.97897,
-            "74": 6.91983,
-            "75": 7.05297,
-            "76": 6.96036,
-            "77": 7.0791,
-            "78": 7.01392,
-            "79": 6.88358,
-            "80": 6.93014,
-            "81": 6.96553,
-            "82": 7.05265,
-            "83": 6.98788,
-            "84": 7.00427,
-            "85": 6.84577,
-            "86": 7.03621,
-            "87": 6.96327,
-            "88": 6.9137,
-            "89": 6.80631,
-            "90": 7.23619,
-            "91": 6.70015,
-            "92": 7.05679,
-            "93": 6.89287,
-            "94": 7.05835,
-            "95": 6.84786,
-            "96": 6.96771,
-            "97": 6.94258,
-            "98": 6.87388,
-            "99": 7.01816,
-            "100": 6.98466
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 43334.0,
-            "2": 44100.0,
-            "3": 44771.0,
-            "4": 42457.0,
-            "5": 45411.0,
-            "6": 40966.0,
-            "7": 43193.0,
-            "8": 45457.0,
-            "9": 42550.0,
-            "10": 45360.0,
-            "11": 44029.0,
-            "12": 44605.0,
-            "13": 43917.0,
-            "14": 46219.0,
-            "15": 43943.0,
-            "16": 41732.0,
-            "17": 43861.0,
-            "18": 44721.0,
-            "19": 42597.0,
-            "20": 44797.0,
-            "21": 44792.0,
-            "22": 41891.0,
-            "23": 45473.0,
-            "24": 43081.0,
-            "25": 42682.0,
-            "26": 43950.0,
-            "27": 46253.0,
-            "28": 46447.0,
-            "29": 46164.0,
-            "30": 44042.0,
-            "31": 41263.0,
-            "32": 43440.0,
-            "33": 45483.0,
-            "34": 43349.0,
-            "35": 43273.0,
-            "36": 42490.0,
-            "37": 40647.0,
-            "38": 42549.0,
-            "39": 44766.0,
-            "40": 43281.0,
-            "41": 44669.0,
-            "42": 43287.0,
-            "43": 45454.0,
-            "44": 44627.0,
-            "45": 43353.0,
-            "46": 43925.0,
-            "47": 42498.0,
-            "48": 44758.0,
-            "49": 43173.0,
-            "50": 43402.0,
-            "51": 41198.0,
-            "52": 43900.0,
-            "53": 43938.0,
-            "54": 41922.0,
-            "55": 43916.0,
-            "56": 43237.0,
-            "57": 42634.0,
-            "58": 43916.0,
-            "59": 44616.0,
-            "60": 41414.0,
-            "61": 39759.0,
-            "62": 44750.0,
-            "63": 44673.0,
-            "64": 45378.0,
-            "65": 44765.0,
-            "66": 45401.0,
-            "67": 43155.0,
-            "68": 42552.0,
-            "69": 43831.0,
-            "70": 45546.0,
-            "71": 43332.0,
-            "72": 44847.0,
-            "73": 45376.0,
-            "74": 42503.0,
-            "75": 44704.0,
-            "76": 43916.0,
-            "77": 42101.0,
-            "78": 40543.0,
-            "79": 38997.0,
-            "80": 41079.0,
-            "81": 45377.0,
-            "82": 43254.0,
-            "83": 38473.0,
-            "84": 42420.0,
-            "85": 43989.0,
-            "86": 45694.0,
-            "87": 41164.0,
-            "88": 41773.0,
-            "89": 41047.0,
-            "90": 44710.0,
-            "91": 46274.0,
-            "92": 41823.0,
-            "93": 43286.0,
-            "94": 39530.0,
-            "95": 44074.0,
-            "96": 44686.0,
-            "97": 45424.0,
-            "98": 41849.0,
-            "99": 45567.0,
-            "100": 42485.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 4158515200.0,
-            "2": 4158515200.0,
-            "3": 4158515200.0,
-            "4": 4158515200.0,
-            "5": 4158515200.0,
-            "6": 4158515200.0,
-            "7": 4158515200.0,
-            "8": 4158515200.0,
-            "9": 4158515200.0,
-            "10": 4158515200.0,
-            "11": 4158515200.0,
-            "12": 4158515200.0,
-            "13": 4158515200.0,
-            "14": 4158515200.0,
-            "15": 4158515200.0,
-            "16": 4158515200.0,
-            "17": 4158515200.0,
-            "18": 4158515200.0,
-            "19": 4158515200.0,
-            "20": 4158515200.0,
-            "21": 4158515200.0,
-            "22": 4158515200.0,
-            "23": 4158515200.0,
-            "24": 4158515200.0,
-            "25": 4158515200.0,
-            "26": 4158515200.0,
-            "27": 4158515200.0,
-            "28": 4158515200.0,
-            "29": 4158515200.0,
-            "30": 4158515200.0,
-            "31": 4158515200.0,
-            "32": 4158515200.0,
-            "33": 4158515200.0,
-            "34": 4158515200.0,
-            "35": 4158515200.0,
-            "36": 4158515200.0,
-            "37": 4158515200.0,
-            "38": 4158515200.0,
-            "39": 4158515200.0,
-            "40": 4158515200.0,
-            "41": 4158515200.0,
-            "42": 4158515200.0,
-            "43": 4158515200.0,
-            "44": 4158515200.0,
-            "45": 4158515200.0,
-            "46": 4158515200.0,
-            "47": 4158515200.0,
-            "48": 4158515200.0,
-            "49": 4158515200.0,
-            "50": 4158515200.0,
-            "51": 4158515200.0,
-            "52": 4158515200.0,
-            "53": 4158515200.0,
-            "54": 4158515200.0,
-            "55": 4158515200.0,
-            "56": 4158515200.0,
-            "57": 4158515200.0,
-            "58": 4158515200.0,
-            "59": 4158515200.0,
-            "60": 4158515200.0,
-            "61": 4158515200.0,
-            "62": 4158515200.0,
-            "63": 4158515200.0,
-            "64": 4158515200.0,
-            "65": 4158515200.0,
-            "66": 4158515200.0,
-            "67": 4158515200.0,
-            "68": 4158515200.0,
-            "69": 4158515200.0,
-            "70": 4158515200.0,
-            "71": 4158515200.0,
-            "72": 4158515200.0,
-            "73": 4158515200.0,
-            "74": 4158515200.0,
-            "75": 4158515200.0,
-            "76": 4158515200.0,
-            "77": 4158515200.0,
-            "78": 4158515200.0,
-            "79": 4158515200.0,
-            "80": 4158515200.0,
-            "81": 4158515200.0,
-            "82": 4158515200.0,
-            "83": 4158515200.0,
-            "84": 4158515200.0,
-            "85": 4158515200.0,
-            "86": 4158515200.0,
-            "87": 4158515200.0,
-            "88": 4158515200.0,
-            "89": 4158515200.0,
-            "90": 4158515200.0,
-            "91": 4158515200.0,
-            "92": 4158515200.0,
-            "93": 4158515200.0,
-            "94": 4158515200.0,
-            "95": 4158515200.0,
-            "96": 4158515200.0,
-            "97": 4158515200.0,
-            "98": 4158515200.0,
-            "99": 4158515200.0,
-            "100": 4158515200.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 4349380608.0,
-            "2": 6185459712.0,
-            "3": 6187556864.0,
-            "4": 6187556864.0,
-            "5": 6187556864.0,
-            "6": 6187556864.0,
-            "7": 6187556864.0,
-            "8": 6187556864.0,
-            "9": 6187556864.0,
-            "10": 6187556864.0,
-            "11": 6187556864.0,
-            "12": 6187556864.0,
-            "13": 6187556864.0,
-            "14": 6187556864.0,
-            "15": 6187556864.0,
-            "16": 6187556864.0,
-            "17": 6187556864.0,
-            "18": 6187556864.0,
-            "19": 6187556864.0,
-            "20": 6187556864.0,
-            "21": 6187556864.0,
-            "22": 6187556864.0,
-            "23": 6187556864.0,
-            "24": 6187556864.0,
-            "25": 6187556864.0,
-            "26": 6187556864.0,
-            "27": 6187556864.0,
-            "28": 6187556864.0,
-            "29": 6187556864.0,
-            "30": 6187556864.0,
-            "31": 6187556864.0,
-            "32": 6187556864.0,
-            "33": 6187556864.0,
-            "34": 6187556864.0,
-            "35": 6187556864.0,
-            "36": 6187556864.0,
-            "37": 6187556864.0,
-            "38": 6187556864.0,
-            "39": 6187556864.0,
-            "40": 6187556864.0,
-            "41": 6187556864.0,
-            "42": 6187556864.0,
-            "43": 6187556864.0,
-            "44": 6187556864.0,
-            "45": 6187556864.0,
-            "46": 6187556864.0,
-            "47": 6187556864.0,
-            "48": 6187556864.0,
-            "49": 6187556864.0,
-            "50": 6187556864.0,
-            "51": 6187556864.0,
-            "52": 6187556864.0,
-            "53": 6187556864.0,
-            "54": 6187556864.0,
-            "55": 6187556864.0,
-            "56": 6187556864.0,
-            "57": 6187556864.0,
-            "58": 6187556864.0,
-            "59": 6187556864.0,
-            "60": 6187556864.0,
-            "61": 6187556864.0,
-            "62": 6187556864.0,
-            "63": 6187556864.0,
-            "64": 6187556864.0,
-            "65": 6187556864.0,
-            "66": 6187556864.0,
-            "67": 6187556864.0,
-            "68": 6187556864.0,
-            "69": 6187556864.0,
-            "70": 6187556864.0,
-            "71": 6187556864.0,
-            "72": 6187556864.0,
-            "73": 6187556864.0,
-            "74": 6187556864.0,
-            "75": 6187556864.0,
-            "76": 6187556864.0,
-            "77": 6187556864.0,
-            "78": 6187556864.0,
-            "79": 6187556864.0,
-            "80": 6187556864.0,
-            "81": 6187556864.0,
-            "82": 6187556864.0,
-            "83": 6187556864.0,
-            "84": 6187556864.0,
-            "85": 6187556864.0,
-            "86": 6187556864.0,
-            "87": 6187556864.0,
-            "88": 6187556864.0,
-            "89": 6187556864.0,
-            "90": 6187556864.0,
-            "91": 6187556864.0,
-            "92": 6187556864.0,
-            "93": 6187556864.0,
-            "94": 6187556864.0,
-            "95": 6187556864.0,
-            "96": 6187556864.0,
-            "97": 6187556864.0,
-            "98": 6187556864.0,
-            "99": 6187556864.0,
-            "100": 6187556864.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 7.44745,
-            "2": 0.28877,
-            "3": 0.13863,
-            "4": 0.13991,
-            "5": 0.1386,
-            "6": 0.1688,
-            "7": 0.13897,
-            "8": 0.14655,
-            "9": 0.14408,
-            "10": 0.14011,
-            "11": 0.14086,
-            "12": 0.13894,
-            "13": 0.13997,
-            "14": 0.15002,
-            "15": 0.14424,
-            "16": 0.14057,
-            "17": 0.13971,
-            "18": 0.14204,
-            "19": 0.13911,
-            "20": 0.13847,
-            "21": 0.1511,
-            "22": 0.1466,
-            "23": 0.13965,
-            "24": 0.13912,
-            "25": 0.1401,
-            "26": 0.13945,
-            "27": 0.13889,
-            "28": 0.14975,
-            "29": 0.14768,
-            "30": 0.14096,
-            "31": 0.1397,
-            "32": 0.13848,
-            "33": 0.14003,
-            "34": 0.13906,
-            "35": 0.15106,
-            "36": 0.14946,
-            "37": 0.13936,
-            "38": 0.13863,
-            "39": 0.13854,
-            "40": 0.13912,
-            "41": 0.13768,
-            "42": 0.16204,
-            "43": 0.14058,
-            "44": 0.14047,
-            "45": 0.14051,
-            "46": 0.13844,
-            "47": 0.14085,
-            "48": 0.14712,
-            "49": 0.14538,
-            "50": 0.14262,
-            "51": 0.14224,
-            "52": 0.14099,
-            "53": 0.14182,
-            "54": 0.14142,
-            "55": 0.14151,
-            "56": 0.17071,
-            "57": 0.16514,
-            "58": 0.14109,
-            "59": 0.14613,
-            "60": 0.13996,
-            "61": 0.1438,
-            "62": 0.1439,
-            "63": 0.1704,
-            "64": 0.17016,
-            "65": 0.14013,
-            "66": 0.1408,
-            "67": 0.14073,
-            "68": 0.14112,
-            "69": 0.14885,
-            "70": 0.15051,
-            "71": 0.1459,
-            "72": 0.14741,
-            "73": 0.14647,
-            "74": 0.14559,
-            "75": 0.14518,
-            "76": 0.14651,
-            "77": 0.18065,
-            "78": 0.17614,
-            "79": 0.14661,
-            "80": 0.14187,
-            "81": 0.14198,
-            "82": 0.13988,
-            "83": 0.14058,
-            "84": 0.14152,
-            "85": 0.14263,
-            "86": 0.14317,
-            "87": 0.14179,
-            "88": 0.14281,
-            "89": 0.13999,
-            "90": 0.14469,
-            "91": 0.142,
-            "92": 0.14198,
-            "93": 0.14441,
-            "94": 0.14544,
-            "95": 0.14559,
-            "96": 0.14352,
-            "97": 0.14163,
-            "98": 0.14642,
-            "99": 0.14323,
-            "100": 0.14598
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index e0a55371afb..00000000000
--- a/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.33127,
-            "2": 10.35281,
-            "3": 9.79613,
-            "4": 9.60968,
-            "5": 9.42269,
-            "6": 9.45137,
-            "7": 9.34348,
-            "8": 9.27525,
-            "9": 9.09676,
-            "10": 9.0722,
-            "11": 8.8835,
-            "12": 8.83711,
-            "13": 8.86836,
-            "14": 8.71039,
-            "15": 8.68191,
-            "16": 8.56149,
-            "17": 8.52311,
-            "18": 8.43963,
-            "19": 8.40439,
-            "20": 8.29506,
-            "21": 8.27059,
-            "22": 8.17902,
-            "23": 8.12669,
-            "24": 8.14846,
-            "25": 7.9909,
-            "26": 8.12216,
-            "27": 7.90453,
-            "28": 7.98655,
-            "29": 7.80845,
-            "30": 7.86918,
-            "31": 7.83571,
-            "32": 7.72178,
-            "33": 7.80378,
-            "34": 7.59229,
-            "35": 7.68371,
-            "36": 7.53883,
-            "37": 7.47609,
-            "38": 7.5168,
-            "39": 7.49978,
-            "40": 7.51704,
-            "41": 7.43174,
-            "42": 7.40104,
-            "43": 7.44926,
-            "44": 7.38919,
-            "45": 7.38016,
-            "46": 7.29476,
-            "47": 7.44829,
-            "48": 7.28213,
-            "49": 7.34657,
-            "50": 7.17116,
-            "51": 7.37361,
-            "52": 7.13381,
-            "53": 7.11244,
-            "54": 7.23402,
-            "55": 7.14785,
-            "56": 7.22775,
-            "57": 7.33273,
-            "58": 6.99461,
-            "59": 7.11599,
-            "60": 7.13222,
-            "61": 7.1056,
-            "62": 7.26513,
-            "63": 7.14772,
-            "64": 7.08696,
-            "65": 6.98643,
-            "66": 7.04728,
-            "67": 7.04697,
-            "68": 7.14062,
-            "69": 7.2435,
-            "70": 7.05957,
-            "71": 6.89356,
-            "72": 6.99769,
-            "73": 6.97897,
-            "74": 6.91983,
-            "75": 7.05297,
-            "76": 6.96036,
-            "77": 7.0791,
-            "78": 7.01392,
-            "79": 6.88358,
-            "80": 6.93014,
-            "81": 6.96553,
-            "82": 7.05265,
-            "83": 6.98788,
-            "84": 7.00427,
-            "85": 6.84577,
-            "86": 7.03621,
-            "87": 6.96327,
-            "88": 6.9137,
-            "89": 6.80631,
-            "90": 7.23619,
-            "91": 6.70015,
-            "92": 7.05679,
-            "93": 6.89287,
-            "94": 7.05835,
-            "95": 6.84786,
-            "96": 6.96771,
-            "97": 6.94258,
-            "98": 6.87388,
-            "99": 7.01816,
-            "100": 6.98466
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 43334.0,
-            "2": 44100.0,
-            "3": 44771.0,
-            "4": 42457.0,
-            "5": 45411.0,
-            "6": 40966.0,
-            "7": 43193.0,
-            "8": 45457.0,
-            "9": 42550.0,
-            "10": 45360.0,
-            "11": 44029.0,
-            "12": 44605.0,
-            "13": 43917.0,
-            "14": 46219.0,
-            "15": 43943.0,
-            "16": 41732.0,
-            "17": 43861.0,
-            "18": 44721.0,
-            "19": 42597.0,
-            "20": 44797.0,
-            "21": 44792.0,
-            "22": 41891.0,
-            "23": 45473.0,
-            "24": 43081.0,
-            "25": 42682.0,
-            "26": 43950.0,
-            "27": 46253.0,
-            "28": 46447.0,
-            "29": 46164.0,
-            "30": 44042.0,
-            "31": 41263.0,
-            "32": 43440.0,
-            "33": 45483.0,
-            "34": 43349.0,
-            "35": 43273.0,
-            "36": 42490.0,
-            "37": 40647.0,
-            "38": 42549.0,
-            "39": 44766.0,
-            "40": 43281.0,
-            "41": 44669.0,
-            "42": 43287.0,
-            "43": 45454.0,
-            "44": 44627.0,
-            "45": 43353.0,
-            "46": 43925.0,
-            "47": 42498.0,
-            "48": 44758.0,
-            "49": 43173.0,
-            "50": 43402.0,
-            "51": 41198.0,
-            "52": 43900.0,
-            "53": 43938.0,
-            "54": 41922.0,
-            "55": 43916.0,
-            "56": 43237.0,
-            "57": 42634.0,
-            "58": 43916.0,
-            "59": 44616.0,
-            "60": 41414.0,
-            "61": 39759.0,
-            "62": 44750.0,
-            "63": 44673.0,
-            "64": 45378.0,
-            "65": 44765.0,
-            "66": 45401.0,
-            "67": 43155.0,
-            "68": 42552.0,
-            "69": 43831.0,
-            "70": 45546.0,
-            "71": 43332.0,
-            "72": 44847.0,
-            "73": 45376.0,
-            "74": 42503.0,
-            "75": 44704.0,
-            "76": 43916.0,
-            "77": 42101.0,
-            "78": 40543.0,
-            "79": 38997.0,
-            "80": 41079.0,
-            "81": 45377.0,
-            "82": 43254.0,
-            "83": 38473.0,
-            "84": 42420.0,
-            "85": 43989.0,
-            "86": 45694.0,
-            "87": 41164.0,
-            "88": 41773.0,
-            "89": 41047.0,
-            "90": 44710.0,
-            "91": 46274.0,
-            "92": 41823.0,
-            "93": 43286.0,
-            "94": 39530.0,
-            "95": 44074.0,
-            "96": 44686.0,
-            "97": 45424.0,
-            "98": 41849.0,
-            "99": 45567.0,
-            "100": 42485.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 4158515200.0,
-            "2": 4158515200.0,
-            "3": 4158515200.0,
-            "4": 4158515200.0,
-            "5": 4158515200.0,
-            "6": 4158515200.0,
-            "7": 4158515200.0,
-            "8": 4158515200.0,
-            "9": 4158515200.0,
-            "10": 4158515200.0,
-            "11": 4158515200.0,
-            "12": 4158515200.0,
-            "13": 4158515200.0,
-            "14": 4158515200.0,
-            "15": 4158515200.0,
-            "16": 4158515200.0,
-            "17": 4158515200.0,
-            "18": 4158515200.0,
-            "19": 4158515200.0,
-            "20": 4158515200.0,
-            "21": 4158515200.0,
-            "22": 4158515200.0,
-            "23": 4158515200.0,
-            "24": 4158515200.0,
-            "25": 4158515200.0,
-            "26": 4158515200.0,
-            "27": 4158515200.0,
-            "28": 4158515200.0,
-            "29": 4158515200.0,
-            "30": 4158515200.0,
-            "31": 4158515200.0,
-            "32": 4158515200.0,
-            "33": 4158515200.0,
-            "34": 4158515200.0,
-            "35": 4158515200.0,
-            "36": 4158515200.0,
-            "37": 4158515200.0,
-            "38": 4158515200.0,
-            "39": 4158515200.0,
-            "40": 4158515200.0,
-            "41": 4158515200.0,
-            "42": 4158515200.0,
-            "43": 4158515200.0,
-            "44": 4158515200.0,
-            "45": 4158515200.0,
-            "46": 4158515200.0,
-            "47": 4158515200.0,
-            "48": 4158515200.0,
-            "49": 4158515200.0,
-            "50": 4158515200.0,
-            "51": 4158515200.0,
-            "52": 4158515200.0,
-            "53": 4158515200.0,
-            "54": 4158515200.0,
-            "55": 4158515200.0,
-            "56": 4158515200.0,
-            "57": 4158515200.0,
-            "58": 4158515200.0,
-            "59": 4158515200.0,
-            "60": 4158515200.0,
-            "61": 4158515200.0,
-            "62": 4158515200.0,
-            "63": 4158515200.0,
-            "64": 4158515200.0,
-            "65": 4158515200.0,
-            "66": 4158515200.0,
-            "67": 4158515200.0,
-            "68": 4158515200.0,
-            "69": 4158515200.0,
-            "70": 4158515200.0,
-            "71": 4158515200.0,
-            "72": 4158515200.0,
-            "73": 4158515200.0,
-            "74": 4158515200.0,
-            "75": 4158515200.0,
-            "76": 4158515200.0,
-            "77": 4158515200.0,
-            "78": 4158515200.0,
-            "79": 4158515200.0,
-            "80": 4158515200.0,
-            "81": 4158515200.0,
-            "82": 4158515200.0,
-            "83": 4158515200.0,
-            "84": 4158515200.0,
-            "85": 4158515200.0,
-            "86": 4158515200.0,
-            "87": 4158515200.0,
-            "88": 4158515200.0,
-            "89": 4158515200.0,
-            "90": 4158515200.0,
-            "91": 4158515200.0,
-            "92": 4158515200.0,
-            "93": 4158515200.0,
-            "94": 4158515200.0,
-            "95": 4158515200.0,
-            "96": 4158515200.0,
-            "97": 4158515200.0,
-            "98": 4158515200.0,
-            "99": 4158515200.0,
-            "100": 4158515200.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 4349380608.0,
-            "2": 6185459712.0,
-            "3": 6187556864.0,
-            "4": 6187556864.0,
-            "5": 6187556864.0,
-            "6": 6187556864.0,
-            "7": 6187556864.0,
-            "8": 6187556864.0,
-            "9": 6187556864.0,
-            "10": 6187556864.0,
-            "11": 6187556864.0,
-            "12": 6187556864.0,
-            "13": 6187556864.0,
-            "14": 6187556864.0,
-            "15": 6187556864.0,
-            "16": 6187556864.0,
-            "17": 6187556864.0,
-            "18": 6187556864.0,
-            "19": 6187556864.0,
-            "20": 6187556864.0,
-            "21": 6187556864.0,
-            "22": 6187556864.0,
-            "23": 6187556864.0,
-            "24": 6187556864.0,
-            "25": 6187556864.0,
-            "26": 6187556864.0,
-            "27": 6187556864.0,
-            "28": 6187556864.0,
-            "29": 6187556864.0,
-            "30": 6187556864.0,
-            "31": 6187556864.0,
-            "32": 6187556864.0,
-            "33": 6187556864.0,
-            "34": 6187556864.0,
-            "35": 6187556864.0,
-            "36": 6187556864.0,
-            "37": 6187556864.0,
-            "38": 6187556864.0,
-            "39": 6187556864.0,
-            "40": 6187556864.0,
-            "41": 6187556864.0,
-            "42": 6187556864.0,
-            "43": 6187556864.0,
-            "44": 6187556864.0,
-            "45": 6187556864.0,
-            "46": 6187556864.0,
-            "47": 6187556864.0,
-            "48": 6187556864.0,
-            "49": 6187556864.0,
-            "50": 6187556864.0,
-            "51": 6187556864.0,
-            "52": 6187556864.0,
-            "53": 6187556864.0,
-            "54": 6187556864.0,
-            "55": 6187556864.0,
-            "56": 6187556864.0,
-            "57": 6187556864.0,
-            "58": 6187556864.0,
-            "59": 6187556864.0,
-            "60": 6187556864.0,
-            "61": 6187556864.0,
-            "62": 6187556864.0,
-            "63": 6187556864.0,
-            "64": 6187556864.0,
-            "65": 6187556864.0,
-            "66": 6187556864.0,
-            "67": 6187556864.0,
-            "68": 6187556864.0,
-            "69": 6187556864.0,
-            "70": 6187556864.0,
-            "71": 6187556864.0,
-            "72": 6187556864.0,
-            "73": 6187556864.0,
-            "74": 6187556864.0,
-            "75": 6187556864.0,
-            "76": 6187556864.0,
-            "77": 6187556864.0,
-            "78": 6187556864.0,
-            "79": 6187556864.0,
-            "80": 6187556864.0,
-            "81": 6187556864.0,
-            "82": 6187556864.0,
-            "83": 6187556864.0,
-            "84": 6187556864.0,
-            "85": 6187556864.0,
-            "86": 6187556864.0,
-            "87": 6187556864.0,
-            "88": 6187556864.0,
-            "89": 6187556864.0,
-            "90": 6187556864.0,
-            "91": 6187556864.0,
-            "92": 6187556864.0,
-            "93": 6187556864.0,
-            "94": 6187556864.0,
-            "95": 6187556864.0,
-            "96": 6187556864.0,
-            "97": 6187556864.0,
-            "98": 6187556864.0,
-            "99": 6187556864.0,
-            "100": 6187556864.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 6.98463,
-            "2": 0.19558,
-            "3": 0.15734,
-            "4": 0.15695,
-            "5": 0.15774,
-            "6": 0.15468,
-            "7": 0.15373,
-            "8": 0.15721,
-            "9": 0.15375,
-            "10": 0.15555,
-            "11": 0.15762,
-            "12": 0.15358,
-            "13": 0.15446,
-            "14": 0.15343,
-            "15": 0.15567,
-            "16": 0.15597,
-            "17": 0.19986,
-            "18": 0.19685,
-            "19": 0.15757,
-            "20": 0.16418,
-            "21": 0.1662,
-            "22": 0.1633,
-            "23": 0.15542,
-            "24": 0.16131,
-            "25": 0.15713,
-            "26": 0.16116,
-            "27": 0.15731,
-            "28": 0.16645,
-            "29": 0.1581,
-            "30": 0.16334,
-            "31": 0.15469,
-            "32": 0.1607,
-            "33": 0.15565,
-            "34": 0.16369,
-            "35": 0.15592,
-            "36": 0.16404,
-            "37": 0.15034,
-            "38": 0.15864,
-            "39": 0.15017,
-            "40": 0.1607,
-            "41": 0.15387,
-            "42": 0.17077,
-            "43": 0.15397,
-            "44": 0.1563,
-            "45": 0.15512,
-            "46": 0.16115,
-            "47": 0.15635,
-            "48": 0.16292,
-            "49": 0.15581,
-            "50": 0.16402,
-            "51": 0.15457,
-            "52": 0.16232,
-            "53": 0.156,
-            "54": 0.16433,
-            "55": 0.15283,
-            "56": 0.19434,
-            "57": 0.19273,
-            "58": 0.15955,
-            "59": 0.15405,
-            "60": 0.15503,
-            "61": 0.15418,
-            "62": 0.15446,
-            "63": 0.15778,
-            "64": 0.1578,
-            "65": 0.16024,
-            "66": 0.15656,
-            "67": 0.15524,
-            "68": 0.15394,
-            "69": 0.16041,
-            "70": 0.16082,
-            "71": 0.16503,
-            "72": 0.16142,
-            "73": 0.16242,
-            "74": 0.15995,
-            "75": 0.15816,
-            "76": 0.16199,
-            "77": 0.16827,
-            "78": 0.15987,
-            "79": 0.15797,
-            "80": 0.15617,
-            "81": 0.15308,
-            "82": 0.15484,
-            "83": 0.15382,
-            "84": 0.16856,
-            "85": 0.15976,
-            "86": 0.15794,
-            "87": 0.15409,
-            "88": 0.15333,
-            "89": 0.15511,
-            "90": 0.15333,
-            "91": 0.17162,
-            "92": 0.15418,
-            "93": 0.15421,
-            "94": 0.15169,
-            "95": 0.15479,
-            "96": 0.15268,
-            "97": 0.1552,
-            "98": 0.1575,
-            "99": 0.15403,
-            "100": 0.15379
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_mcore_tp2_pp1_vp1/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp2_pp1_vp1/golden_values_dev_dgx_h100.json
index b7f4830a0c8..81670d237ce 100644
--- a/tests/functional_tests/test_cases/t5/t5_mcore_tp2_pp1_vp1/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/t5/t5_mcore_tp2_pp1_vp1/golden_values_dev_dgx_h100.json
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 9.03109,
-            "2": 0.35076,
-            "3": 0.33208,
-            "4": 0.30024,
-            "5": 0.29051,
-            "6": 0.29151,
-            "7": 0.2915,
-            "8": 0.29069,
-            "9": 0.28128,
-            "10": 0.28633,
-            "11": 0.28968,
-            "12": 0.29187,
-            "13": 0.28737,
-            "14": 0.28701,
-            "15": 0.29554,
-            "16": 0.28451,
-            "17": 0.28904,
-            "18": 0.28765,
-            "19": 0.2927,
-            "20": 0.29433,
-            "21": 0.28956,
-            "22": 0.28517,
-            "23": 0.29568,
-            "24": 0.29372,
-            "25": 0.28702,
-            "26": 0.27993,
-            "27": 0.28025,
-            "28": 0.28025,
-            "29": 0.28655,
-            "30": 0.28192,
-            "31": 0.28723,
-            "32": 0.29054,
-            "33": 0.29967,
-            "34": 0.28855,
-            "35": 0.31974,
-            "36": 0.32479,
-            "37": 0.28367,
-            "38": 0.29414,
-            "39": 0.30161,
-            "40": 0.29066,
-            "41": 0.2857,
-            "42": 0.29152,
-            "43": 0.28567,
-            "44": 0.28393,
-            "45": 0.29254,
-            "46": 0.28887,
-            "47": 0.29566,
-            "48": 0.2879,
-            "49": 0.28337,
-            "50": 0.28858,
-            "51": 0.28557,
-            "52": 0.28641,
-            "53": 0.28977,
-            "54": 0.28532,
-            "55": 0.28322,
-            "56": 0.2855,
-            "57": 0.29617,
-            "58": 0.28816,
-            "59": 0.28781,
-            "60": 0.28732,
-            "61": 0.28426,
-            "62": 0.29092,
-            "63": 0.29263,
-            "64": 0.28875,
-            "65": 0.28714,
-            "66": 0.29018,
-            "67": 0.28162,
-            "68": 0.28703,
-            "69": 0.29503,
-            "70": 0.29276,
-            "71": 0.2824,
-            "72": 0.29151,
-            "73": 0.29279,
-            "74": 0.28282,
-            "75": 0.28454,
-            "76": 0.28479,
-            "77": 0.28239,
-            "78": 0.28785,
-            "79": 0.29392,
-            "80": 0.28563,
-            "81": 0.282,
-            "82": 0.29276,
-            "83": 0.29502,
-            "84": 0.28441,
-            "85": 0.28063,
-            "86": 0.29172,
-            "87": 0.2867,
-            "88": 0.29629,
-            "89": 0.29585,
-            "90": 0.29326,
-            "91": 0.28326,
-            "92": 0.28263,
-            "93": 0.2913,
-            "94": 0.2943,
-            "95": 0.28216,
-            "96": 0.29001,
-            "97": 0.29031,
-            "98": 0.28912,
-            "99": 0.68367,
-            "100": 0.296
+            "1": 9.16897,
+            "2": 0.35143,
+            "3": 0.28496,
+            "4": 0.28172,
+            "5": 0.28308,
+            "6": 0.2855,
+            "7": 0.28287,
+            "8": 0.28079,
+            "9": 0.2809,
+            "10": 0.28329,
+            "11": 0.28038,
+            "12": 0.28371,
+            "13": 0.28032,
+            "14": 0.28362,
+            "15": 0.28125,
+            "16": 0.28046,
+            "17": 0.28421,
+            "18": 0.28132,
+            "19": 0.2808,
+            "20": 0.28432,
+            "21": 0.28578,
+            "22": 0.28205,
+            "23": 0.28411,
+            "24": 0.28378,
+            "25": 0.28227,
+            "26": 0.28231,
+            "27": 0.28353,
+            "28": 0.28497,
+            "29": 0.29981,
+            "30": 0.28557,
+            "31": 0.28777,
+            "32": 0.28808,
+            "33": 0.28609,
+            "34": 0.32585,
+            "35": 0.341,
+            "36": 0.2886,
+            "37": 0.28157,
+            "38": 0.2916,
+            "39": 0.28501,
+            "40": 0.27952,
+            "41": 0.27767,
+            "42": 0.28062,
+            "43": 0.28781,
+            "44": 0.2839,
+            "45": 0.282,
+            "46": 0.27837,
+            "47": 0.27883,
+            "48": 0.27865,
+            "49": 0.28179,
+            "50": 0.27881,
+            "51": 0.27669,
+            "52": 0.28063,
+            "53": 0.27909,
+            "54": 0.27716,
+            "55": 0.27807,
+            "56": 0.2785,
+            "57": 0.27679,
+            "58": 0.28004,
+            "59": 0.27659,
+            "60": 0.27984,
+            "61": 0.2771,
+            "62": 0.27714,
+            "63": 0.2802,
+            "64": 0.2918,
+            "65": 0.27948,
+            "66": 0.27839,
+            "67": 0.28573,
+            "68": 0.27933,
+            "69": 0.27893,
+            "70": 0.27964,
+            "71": 0.2767,
+            "72": 0.27816,
+            "73": 0.28004,
+            "74": 0.27997,
+            "75": 0.28095,
+            "76": 0.27752,
+            "77": 0.27912,
+            "78": 0.28068,
+            "79": 0.27992,
+            "80": 0.28771,
+            "81": 0.28046,
+            "82": 0.28352,
+            "83": 0.28376,
+            "84": 0.28337,
+            "85": 0.28197,
+            "86": 0.27949,
+            "87": 0.27909,
+            "88": 0.28479,
+            "89": 0.28248,
+            "90": 0.27742,
+            "91": 0.27819,
+            "92": 0.2809,
+            "93": 0.28123,
+            "94": 0.27933,
+            "95": 0.28364,
+            "96": 0.28523,
+            "97": 0.28365,
+            "98": 0.27822,
+            "99": 0.28382,
+            "100": 0.28917
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_mcore_tp2_pp1_vp1/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp2_pp1_vp1/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index f6481fb6aae..00000000000
--- a/tests/functional_tests/test_cases/t5/t5_mcore_tp2_pp1_vp1/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.372,
-            "2": 10.37006,
-            "3": 9.85232,
-            "4": 9.61996,
-            "5": 9.40868,
-            "6": 9.43215,
-            "7": 9.31482,
-            "8": 9.27336,
-            "9": 9.1139,
-            "10": 9.03962,
-            "11": 8.87218,
-            "12": 8.80873,
-            "13": 8.83468,
-            "14": 8.69011,
-            "15": 8.66228,
-            "16": 8.54828,
-            "17": 8.50093,
-            "18": 8.42525,
-            "19": 8.3881,
-            "20": 8.2807,
-            "21": 8.26609,
-            "22": 8.16003,
-            "23": 8.1124,
-            "24": 8.14262,
-            "25": 7.98432,
-            "26": 8.10592,
-            "27": 7.88963,
-            "28": 7.97037,
-            "29": 7.81276,
-            "30": 7.87638,
-            "31": 7.82516,
-            "32": 7.70248,
-            "33": 7.80198,
-            "34": 7.56872,
-            "35": 7.67379,
-            "36": 7.54691,
-            "37": 7.47408,
-            "38": 7.50739,
-            "39": 7.49773,
-            "40": 7.51091,
-            "41": 7.41065,
-            "42": 7.37995,
-            "43": 7.44078,
-            "44": 7.39393,
-            "45": 7.37239,
-            "46": 7.28427,
-            "47": 7.46631,
-            "48": 7.2905,
-            "49": 7.35025,
-            "50": 7.17204,
-            "51": 7.37012,
-            "52": 7.14467,
-            "53": 7.12652,
-            "54": 7.23751,
-            "55": 7.15586,
-            "56": 7.23154,
-            "57": 7.33541,
-            "58": 7.01363,
-            "59": 7.11431,
-            "60": 7.15121,
-            "61": 7.10904,
-            "62": 7.26834,
-            "63": 7.15176,
-            "64": 7.08415,
-            "65": 6.99114,
-            "66": 7.05301,
-            "67": 7.04354,
-            "68": 7.1398,
-            "69": 7.03224,
-            "70": 7.05832,
-            "71": 6.90372,
-            "72": 6.99794,
-            "73": 6.9769,
-            "74": 6.91759,
-            "75": 7.06626,
-            "76": 6.95758,
-            "77": 7.0871,
-            "78": 7.03238,
-            "79": 6.85274,
-            "80": 6.93633,
-            "81": 6.97617,
-            "82": 7.06196,
-            "83": 6.98213,
-            "84": 7.00931,
-            "85": 6.85082,
-            "86": 7.04673,
-            "87": 6.97907,
-            "88": 6.91096,
-            "89": 6.81719,
-            "90": 7.2459,
-            "91": 6.7046,
-            "92": 7.05377,
-            "93": 6.89397,
-            "94": 7.0542,
-            "95": 6.85031,
-            "96": 6.96441,
-            "97": 6.95632,
-            "98": 6.88246,
-            "99": 7.00392,
-            "100": 6.98993
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 43299.0,
-            "2": 44047.0,
-            "3": 44744.0,
-            "4": 42405.0,
-            "5": 45385.0,
-            "6": 40946.0,
-            "7": 43183.0,
-            "8": 45446.0,
-            "9": 42445.0,
-            "10": 45361.0,
-            "11": 43966.0,
-            "12": 44593.0,
-            "13": 43907.0,
-            "14": 46210.0,
-            "15": 43904.0,
-            "16": 41614.0,
-            "17": 43840.0,
-            "18": 44687.0,
-            "19": 42536.0,
-            "20": 44746.0,
-            "21": 44767.0,
-            "22": 41831.0,
-            "23": 45449.0,
-            "24": 43072.0,
-            "25": 42457.0,
-            "26": 43921.0,
-            "27": 46208.0,
-            "28": 46361.0,
-            "29": 46146.0,
-            "30": 43976.0,
-            "31": 41272.0,
-            "32": 43348.0,
-            "33": 45431.0,
-            "34": 43295.0,
-            "35": 43264.0,
-            "36": 42493.0,
-            "37": 40075.0,
-            "38": 42518.0,
-            "39": 44713.0,
-            "40": 43230.0,
-            "41": 44666.0,
-            "42": 43251.0,
-            "43": 45471.0,
-            "44": 44600.0,
-            "45": 43330.0,
-            "46": 43932.0,
-            "47": 42400.0,
-            "48": 44673.0,
-            "49": 43149.0,
-            "50": 43373.0,
-            "51": 41142.0,
-            "52": 43824.0,
-            "53": 43917.0,
-            "54": 42023.0,
-            "55": 43883.0,
-            "56": 43235.0,
-            "57": 42536.0,
-            "58": 43829.0,
-            "59": 44648.0,
-            "60": 41187.0,
-            "61": 39720.0,
-            "62": 44740.0,
-            "63": 44690.0,
-            "64": 45358.0,
-            "65": 44695.0,
-            "66": 45364.0,
-            "67": 43138.0,
-            "68": 42538.0,
-            "69": 43820.0,
-            "70": 45549.0,
-            "71": 43324.0,
-            "72": 44760.0,
-            "73": 45363.0,
-            "74": 42473.0,
-            "75": 44666.0,
-            "76": 43903.0,
-            "77": 42082.0,
-            "78": 40295.0,
-            "79": 38890.0,
-            "80": 41131.0,
-            "81": 45363.0,
-            "82": 43206.0,
-            "83": 38487.0,
-            "84": 42462.0,
-            "85": 43985.0,
-            "86": 45695.0,
-            "87": 40826.0,
-            "88": 41822.0,
-            "89": 41069.0,
-            "90": 44664.0,
-            "91": 46170.0,
-            "92": 41797.0,
-            "93": 43208.0,
-            "94": 39552.0,
-            "95": 44106.0,
-            "96": 44697.0,
-            "97": 45398.0,
-            "98": 41792.0,
-            "99": 45429.0,
-            "100": 42437.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 2171550208.0,
-            "2": 2171550208.0,
-            "3": 2171550208.0,
-            "4": 2171550208.0,
-            "5": 2171550208.0,
-            "6": 2171550208.0,
-            "7": 2171550208.0,
-            "8": 2171550208.0,
-            "9": 2171550208.0,
-            "10": 2171550208.0,
-            "11": 2171550208.0,
-            "12": 2171550208.0,
-            "13": 2171550208.0,
-            "14": 2171550208.0,
-            "15": 2171550208.0,
-            "16": 2171550208.0,
-            "17": 2171550208.0,
-            "18": 2171550208.0,
-            "19": 2171550208.0,
-            "20": 2171550208.0,
-            "21": 2171550208.0,
-            "22": 2171550208.0,
-            "23": 2171550208.0,
-            "24": 2171550208.0,
-            "25": 2171550208.0,
-            "26": 2171550208.0,
-            "27": 2171550208.0,
-            "28": 2171550208.0,
-            "29": 2171550208.0,
-            "30": 2171550208.0,
-            "31": 2171550208.0,
-            "32": 2171550208.0,
-            "33": 2171550208.0,
-            "34": 2171550208.0,
-            "35": 2171550208.0,
-            "36": 2171550208.0,
-            "37": 2171550208.0,
-            "38": 2171550208.0,
-            "39": 2171550208.0,
-            "40": 2171550208.0,
-            "41": 2171550208.0,
-            "42": 2171550208.0,
-            "43": 2171550208.0,
-            "44": 2171550208.0,
-            "45": 2171550208.0,
-            "46": 2171550208.0,
-            "47": 2171550208.0,
-            "48": 2171550208.0,
-            "49": 2171550208.0,
-            "50": 2171550208.0,
-            "51": 2171550208.0,
-            "52": 2171550208.0,
-            "53": 2171550208.0,
-            "54": 2171550208.0,
-            "55": 2171550208.0,
-            "56": 2171550208.0,
-            "57": 2171550208.0,
-            "58": 2171550208.0,
-            "59": 2171550208.0,
-            "60": 2171550208.0,
-            "61": 2171550208.0,
-            "62": 2171550208.0,
-            "63": 2171550208.0,
-            "64": 2171550208.0,
-            "65": 2171550208.0,
-            "66": 2171550208.0,
-            "67": 2171550208.0,
-            "68": 2171550208.0,
-            "69": 2171550208.0,
-            "70": 2171550208.0,
-            "71": 2171550208.0,
-            "72": 2171550208.0,
-            "73": 2171550208.0,
-            "74": 2171550208.0,
-            "75": 2171550208.0,
-            "76": 2171550208.0,
-            "77": 2171550208.0,
-            "78": 2171550208.0,
-            "79": 2171550208.0,
-            "80": 2171550208.0,
-            "81": 2171550208.0,
-            "82": 2171550208.0,
-            "83": 2171550208.0,
-            "84": 2171550208.0,
-            "85": 2171550208.0,
-            "86": 2171550208.0,
-            "87": 2171550208.0,
-            "88": 2171550208.0,
-            "89": 2171550208.0,
-            "90": 2171550208.0,
-            "91": 2171550208.0,
-            "92": 2171550208.0,
-            "93": 2171550208.0,
-            "94": 2171550208.0,
-            "95": 2171550208.0,
-            "96": 2171550208.0,
-            "97": 2171550208.0,
-            "98": 2171550208.0,
-            "99": 2171550208.0,
-            "100": 2171550208.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 2407642624.0,
-            "2": 3336458752.0,
-            "3": 3336458752.0,
-            "4": 3336458752.0,
-            "5": 3336458752.0,
-            "6": 3336458752.0,
-            "7": 3336458752.0,
-            "8": 3336458752.0,
-            "9": 3336458752.0,
-            "10": 3336458752.0,
-            "11": 3336458752.0,
-            "12": 3336458752.0,
-            "13": 3336458752.0,
-            "14": 3336458752.0,
-            "15": 3336458752.0,
-            "16": 3336458752.0,
-            "17": 3336458752.0,
-            "18": 3336458752.0,
-            "19": 3336458752.0,
-            "20": 3336458752.0,
-            "21": 3336458752.0,
-            "22": 3336458752.0,
-            "23": 3336458752.0,
-            "24": 3336458752.0,
-            "25": 3336458752.0,
-            "26": 3336458752.0,
-            "27": 3336458752.0,
-            "28": 3336458752.0,
-            "29": 3336458752.0,
-            "30": 3336458752.0,
-            "31": 3336458752.0,
-            "32": 3336458752.0,
-            "33": 3336458752.0,
-            "34": 3336458752.0,
-            "35": 3336458752.0,
-            "36": 3336458752.0,
-            "37": 3336458752.0,
-            "38": 3336458752.0,
-            "39": 3336458752.0,
-            "40": 3336458752.0,
-            "41": 3336458752.0,
-            "42": 3336458752.0,
-            "43": 3336458752.0,
-            "44": 3336458752.0,
-            "45": 3336458752.0,
-            "46": 3336458752.0,
-            "47": 3336458752.0,
-            "48": 3336458752.0,
-            "49": 3336458752.0,
-            "50": 3336458752.0,
-            "51": 3336458752.0,
-            "52": 3336458752.0,
-            "53": 3336458752.0,
-            "54": 3336458752.0,
-            "55": 3336458752.0,
-            "56": 3336458752.0,
-            "57": 3336458752.0,
-            "58": 3336458752.0,
-            "59": 3336458752.0,
-            "60": 3336458752.0,
-            "61": 3336458752.0,
-            "62": 3336458752.0,
-            "63": 3336458752.0,
-            "64": 3336458752.0,
-            "65": 3336458752.0,
-            "66": 3336458752.0,
-            "67": 3336458752.0,
-            "68": 3336458752.0,
-            "69": 3336458752.0,
-            "70": 3336458752.0,
-            "71": 3336458752.0,
-            "72": 3336458752.0,
-            "73": 3336458752.0,
-            "74": 3336458752.0,
-            "75": 3336458752.0,
-            "76": 3336458752.0,
-            "77": 3336458752.0,
-            "78": 3336458752.0,
-            "79": 3336458752.0,
-            "80": 3336458752.0,
-            "81": 3336458752.0,
-            "82": 3336458752.0,
-            "83": 3336458752.0,
-            "84": 3336458752.0,
-            "85": 3336458752.0,
-            "86": 3336458752.0,
-            "87": 3336458752.0,
-            "88": 3336458752.0,
-            "89": 3336458752.0,
-            "90": 3336458752.0,
-            "91": 3336458752.0,
-            "92": 3336458752.0,
-            "93": 3336458752.0,
-            "94": 3336458752.0,
-            "95": 3336458752.0,
-            "96": 3336458752.0,
-            "97": 3336458752.0,
-            "98": 3336458752.0,
-            "99": 3336458752.0,
-            "100": 3336458752.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 9.60166,
-            "2": 0.33673,
-            "3": 0.25171,
-            "4": 0.25375,
-            "5": 0.25753,
-            "6": 0.27787,
-            "7": 0.24971,
-            "8": 0.2503,
-            "9": 0.25048,
-            "10": 0.24978,
-            "11": 0.25041,
-            "12": 0.24978,
-            "13": 0.25194,
-            "14": 0.2514,
-            "15": 0.25318,
-            "16": 0.25109,
-            "17": 0.25362,
-            "18": 0.24882,
-            "19": 0.24704,
-            "20": 0.25004,
-            "21": 0.27982,
-            "22": 0.24826,
-            "23": 0.24772,
-            "24": 0.251,
-            "25": 0.24928,
-            "26": 0.24917,
-            "27": 0.25053,
-            "28": 0.25787,
-            "29": 0.24964,
-            "30": 0.24738,
-            "31": 0.24871,
-            "32": 0.24723,
-            "33": 0.25394,
-            "34": 0.24523,
-            "35": 0.26602,
-            "36": 0.25389,
-            "37": 0.25278,
-            "38": 0.24491,
-            "39": 0.2522,
-            "40": 0.25493,
-            "41": 0.25366,
-            "42": 0.27735,
-            "43": 0.2544,
-            "44": 0.25245,
-            "45": 0.25589,
-            "46": 0.24817,
-            "47": 0.24991,
-            "48": 0.2536,
-            "49": 0.27661,
-            "50": 0.25098,
-            "51": 0.252,
-            "52": 0.25923,
-            "53": 0.26278,
-            "54": 0.25083,
-            "55": 0.25065,
-            "56": 0.281,
-            "57": 0.25168,
-            "58": 0.25062,
-            "59": 0.24811,
-            "60": 0.25419,
-            "61": 0.2513,
-            "62": 0.24774,
-            "63": 0.24385,
-            "64": 0.24558,
-            "65": 0.24527,
-            "66": 0.24409,
-            "67": 0.24307,
-            "68": 0.24418,
-            "69": 0.24735,
-            "70": 0.26794,
-            "71": 0.24394,
-            "72": 0.24559,
-            "73": 0.24851,
-            "74": 0.24204,
-            "75": 0.24385,
-            "76": 0.24384,
-            "77": 0.2634,
-            "78": 0.24391,
-            "79": 0.24432,
-            "80": 0.24643,
-            "81": 0.24693,
-            "82": 0.2446,
-            "83": 0.24366,
-            "84": 0.24512,
-            "85": 0.25101,
-            "86": 0.24393,
-            "87": 0.24582,
-            "88": 0.24672,
-            "89": 0.24434,
-            "90": 0.24628,
-            "91": 0.24503,
-            "92": 0.24574,
-            "93": 0.25036,
-            "94": 0.25184,
-            "95": 0.254,
-            "96": 0.24924,
-            "97": 0.25063,
-            "98": 0.25449,
-            "99": 0.24818,
-            "100": 0.24724
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_mcore_tp2_pp1_vp1/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp2_pp1_vp1/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index 81670d237ce..00000000000
--- a/tests/functional_tests/test_cases/t5/t5_mcore_tp2_pp1_vp1/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.372,
-            "2": 10.37006,
-            "3": 9.85232,
-            "4": 9.61996,
-            "5": 9.40868,
-            "6": 9.43215,
-            "7": 9.31482,
-            "8": 9.27336,
-            "9": 9.1139,
-            "10": 9.03962,
-            "11": 8.87218,
-            "12": 8.80873,
-            "13": 8.83468,
-            "14": 8.69011,
-            "15": 8.66228,
-            "16": 8.54828,
-            "17": 8.50093,
-            "18": 8.42525,
-            "19": 8.3881,
-            "20": 8.2807,
-            "21": 8.26609,
-            "22": 8.16003,
-            "23": 8.1124,
-            "24": 8.14262,
-            "25": 7.98432,
-            "26": 8.10592,
-            "27": 7.88963,
-            "28": 7.97037,
-            "29": 7.81276,
-            "30": 7.87638,
-            "31": 7.82516,
-            "32": 7.70248,
-            "33": 7.80198,
-            "34": 7.56872,
-            "35": 7.67379,
-            "36": 7.54691,
-            "37": 7.47408,
-            "38": 7.50739,
-            "39": 7.49773,
-            "40": 7.51091,
-            "41": 7.41065,
-            "42": 7.37995,
-            "43": 7.44078,
-            "44": 7.39393,
-            "45": 7.37239,
-            "46": 7.28427,
-            "47": 7.46631,
-            "48": 7.2905,
-            "49": 7.35025,
-            "50": 7.17204,
-            "51": 7.37012,
-            "52": 7.14467,
-            "53": 7.12652,
-            "54": 7.23751,
-            "55": 7.15586,
-            "56": 7.23154,
-            "57": 7.33541,
-            "58": 7.01363,
-            "59": 7.11431,
-            "60": 7.15121,
-            "61": 7.10904,
-            "62": 7.26834,
-            "63": 7.15176,
-            "64": 7.08415,
-            "65": 6.99114,
-            "66": 7.05301,
-            "67": 7.04354,
-            "68": 7.1398,
-            "69": 7.03224,
-            "70": 7.05832,
-            "71": 6.90372,
-            "72": 6.99794,
-            "73": 6.9769,
-            "74": 6.91759,
-            "75": 7.06626,
-            "76": 6.95758,
-            "77": 7.0871,
-            "78": 7.03238,
-            "79": 6.85274,
-            "80": 6.93633,
-            "81": 6.97617,
-            "82": 7.06196,
-            "83": 6.98213,
-            "84": 7.00931,
-            "85": 6.85082,
-            "86": 7.04673,
-            "87": 6.97907,
-            "88": 6.91096,
-            "89": 6.81719,
-            "90": 7.2459,
-            "91": 6.7046,
-            "92": 7.05377,
-            "93": 6.89397,
-            "94": 7.0542,
-            "95": 6.85031,
-            "96": 6.96441,
-            "97": 6.95632,
-            "98": 6.88246,
-            "99": 7.00392,
-            "100": 6.98993
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 43299.0,
-            "2": 44047.0,
-            "3": 44744.0,
-            "4": 42405.0,
-            "5": 45385.0,
-            "6": 40946.0,
-            "7": 43183.0,
-            "8": 45446.0,
-            "9": 42445.0,
-            "10": 45361.0,
-            "11": 43966.0,
-            "12": 44593.0,
-            "13": 43907.0,
-            "14": 46210.0,
-            "15": 43904.0,
-            "16": 41614.0,
-            "17": 43840.0,
-            "18": 44687.0,
-            "19": 42536.0,
-            "20": 44746.0,
-            "21": 44767.0,
-            "22": 41831.0,
-            "23": 45449.0,
-            "24": 43072.0,
-            "25": 42457.0,
-            "26": 43921.0,
-            "27": 46208.0,
-            "28": 46361.0,
-            "29": 46146.0,
-            "30": 43976.0,
-            "31": 41272.0,
-            "32": 43348.0,
-            "33": 45431.0,
-            "34": 43295.0,
-            "35": 43264.0,
-            "36": 42493.0,
-            "37": 40075.0,
-            "38": 42518.0,
-            "39": 44713.0,
-            "40": 43230.0,
-            "41": 44666.0,
-            "42": 43251.0,
-            "43": 45471.0,
-            "44": 44600.0,
-            "45": 43330.0,
-            "46": 43932.0,
-            "47": 42400.0,
-            "48": 44673.0,
-            "49": 43149.0,
-            "50": 43373.0,
-            "51": 41142.0,
-            "52": 43824.0,
-            "53": 43917.0,
-            "54": 42023.0,
-            "55": 43883.0,
-            "56": 43235.0,
-            "57": 42536.0,
-            "58": 43829.0,
-            "59": 44648.0,
-            "60": 41187.0,
-            "61": 39720.0,
-            "62": 44740.0,
-            "63": 44690.0,
-            "64": 45358.0,
-            "65": 44695.0,
-            "66": 45364.0,
-            "67": 43138.0,
-            "68": 42538.0,
-            "69": 43820.0,
-            "70": 45549.0,
-            "71": 43324.0,
-            "72": 44760.0,
-            "73": 45363.0,
-            "74": 42473.0,
-            "75": 44666.0,
-            "76": 43903.0,
-            "77": 42082.0,
-            "78": 40295.0,
-            "79": 38890.0,
-            "80": 41131.0,
-            "81": 45363.0,
-            "82": 43206.0,
-            "83": 38487.0,
-            "84": 42462.0,
-            "85": 43985.0,
-            "86": 45695.0,
-            "87": 40826.0,
-            "88": 41822.0,
-            "89": 41069.0,
-            "90": 44664.0,
-            "91": 46170.0,
-            "92": 41797.0,
-            "93": 43208.0,
-            "94": 39552.0,
-            "95": 44106.0,
-            "96": 44697.0,
-            "97": 45398.0,
-            "98": 41792.0,
-            "99": 45429.0,
-            "100": 42437.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 2171550208.0,
-            "2": 2171550208.0,
-            "3": 2171550208.0,
-            "4": 2171550208.0,
-            "5": 2171550208.0,
-            "6": 2171550208.0,
-            "7": 2171550208.0,
-            "8": 2171550208.0,
-            "9": 2171550208.0,
-            "10": 2171550208.0,
-            "11": 2171550208.0,
-            "12": 2171550208.0,
-            "13": 2171550208.0,
-            "14": 2171550208.0,
-            "15": 2171550208.0,
-            "16": 2171550208.0,
-            "17": 2171550208.0,
-            "18": 2171550208.0,
-            "19": 2171550208.0,
-            "20": 2171550208.0,
-            "21": 2171550208.0,
-            "22": 2171550208.0,
-            "23": 2171550208.0,
-            "24": 2171550208.0,
-            "25": 2171550208.0,
-            "26": 2171550208.0,
-            "27": 2171550208.0,
-            "28": 2171550208.0,
-            "29": 2171550208.0,
-            "30": 2171550208.0,
-            "31": 2171550208.0,
-            "32": 2171550208.0,
-            "33": 2171550208.0,
-            "34": 2171550208.0,
-            "35": 2171550208.0,
-            "36": 2171550208.0,
-            "37": 2171550208.0,
-            "38": 2171550208.0,
-            "39": 2171550208.0,
-            "40": 2171550208.0,
-            "41": 2171550208.0,
-            "42": 2171550208.0,
-            "43": 2171550208.0,
-            "44": 2171550208.0,
-            "45": 2171550208.0,
-            "46": 2171550208.0,
-            "47": 2171550208.0,
-            "48": 2171550208.0,
-            "49": 2171550208.0,
-            "50": 2171550208.0,
-            "51": 2171550208.0,
-            "52": 2171550208.0,
-            "53": 2171550208.0,
-            "54": 2171550208.0,
-            "55": 2171550208.0,
-            "56": 2171550208.0,
-            "57": 2171550208.0,
-            "58": 2171550208.0,
-            "59": 2171550208.0,
-            "60": 2171550208.0,
-            "61": 2171550208.0,
-            "62": 2171550208.0,
-            "63": 2171550208.0,
-            "64": 2171550208.0,
-            "65": 2171550208.0,
-            "66": 2171550208.0,
-            "67": 2171550208.0,
-            "68": 2171550208.0,
-            "69": 2171550208.0,
-            "70": 2171550208.0,
-            "71": 2171550208.0,
-            "72": 2171550208.0,
-            "73": 2171550208.0,
-            "74": 2171550208.0,
-            "75": 2171550208.0,
-            "76": 2171550208.0,
-            "77": 2171550208.0,
-            "78": 2171550208.0,
-            "79": 2171550208.0,
-            "80": 2171550208.0,
-            "81": 2171550208.0,
-            "82": 2171550208.0,
-            "83": 2171550208.0,
-            "84": 2171550208.0,
-            "85": 2171550208.0,
-            "86": 2171550208.0,
-            "87": 2171550208.0,
-            "88": 2171550208.0,
-            "89": 2171550208.0,
-            "90": 2171550208.0,
-            "91": 2171550208.0,
-            "92": 2171550208.0,
-            "93": 2171550208.0,
-            "94": 2171550208.0,
-            "95": 2171550208.0,
-            "96": 2171550208.0,
-            "97": 2171550208.0,
-            "98": 2171550208.0,
-            "99": 2171550208.0,
-            "100": 2171550208.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 2407642624.0,
-            "2": 3336458752.0,
-            "3": 3336458752.0,
-            "4": 3336458752.0,
-            "5": 3336458752.0,
-            "6": 3336458752.0,
-            "7": 3336458752.0,
-            "8": 3336458752.0,
-            "9": 3336458752.0,
-            "10": 3336458752.0,
-            "11": 3336458752.0,
-            "12": 3336458752.0,
-            "13": 3336458752.0,
-            "14": 3336458752.0,
-            "15": 3336458752.0,
-            "16": 3336458752.0,
-            "17": 3336458752.0,
-            "18": 3336458752.0,
-            "19": 3336458752.0,
-            "20": 3336458752.0,
-            "21": 3336458752.0,
-            "22": 3336458752.0,
-            "23": 3336458752.0,
-            "24": 3336458752.0,
-            "25": 3336458752.0,
-            "26": 3336458752.0,
-            "27": 3336458752.0,
-            "28": 3336458752.0,
-            "29": 3336458752.0,
-            "30": 3336458752.0,
-            "31": 3336458752.0,
-            "32": 3336458752.0,
-            "33": 3336458752.0,
-            "34": 3336458752.0,
-            "35": 3336458752.0,
-            "36": 3336458752.0,
-            "37": 3336458752.0,
-            "38": 3336458752.0,
-            "39": 3336458752.0,
-            "40": 3336458752.0,
-            "41": 3336458752.0,
-            "42": 3336458752.0,
-            "43": 3336458752.0,
-            "44": 3336458752.0,
-            "45": 3336458752.0,
-            "46": 3336458752.0,
-            "47": 3336458752.0,
-            "48": 3336458752.0,
-            "49": 3336458752.0,
-            "50": 3336458752.0,
-            "51": 3336458752.0,
-            "52": 3336458752.0,
-            "53": 3336458752.0,
-            "54": 3336458752.0,
-            "55": 3336458752.0,
-            "56": 3336458752.0,
-            "57": 3336458752.0,
-            "58": 3336458752.0,
-            "59": 3336458752.0,
-            "60": 3336458752.0,
-            "61": 3336458752.0,
-            "62": 3336458752.0,
-            "63": 3336458752.0,
-            "64": 3336458752.0,
-            "65": 3336458752.0,
-            "66": 3336458752.0,
-            "67": 3336458752.0,
-            "68": 3336458752.0,
-            "69": 3336458752.0,
-            "70": 3336458752.0,
-            "71": 3336458752.0,
-            "72": 3336458752.0,
-            "73": 3336458752.0,
-            "74": 3336458752.0,
-            "75": 3336458752.0,
-            "76": 3336458752.0,
-            "77": 3336458752.0,
-            "78": 3336458752.0,
-            "79": 3336458752.0,
-            "80": 3336458752.0,
-            "81": 3336458752.0,
-            "82": 3336458752.0,
-            "83": 3336458752.0,
-            "84": 3336458752.0,
-            "85": 3336458752.0,
-            "86": 3336458752.0,
-            "87": 3336458752.0,
-            "88": 3336458752.0,
-            "89": 3336458752.0,
-            "90": 3336458752.0,
-            "91": 3336458752.0,
-            "92": 3336458752.0,
-            "93": 3336458752.0,
-            "94": 3336458752.0,
-            "95": 3336458752.0,
-            "96": 3336458752.0,
-            "97": 3336458752.0,
-            "98": 3336458752.0,
-            "99": 3336458752.0,
-            "100": 3336458752.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 9.16897,
-            "2": 0.35143,
-            "3": 0.28496,
-            "4": 0.28172,
-            "5": 0.28308,
-            "6": 0.2855,
-            "7": 0.28287,
-            "8": 0.28079,
-            "9": 0.2809,
-            "10": 0.28329,
-            "11": 0.28038,
-            "12": 0.28371,
-            "13": 0.28032,
-            "14": 0.28362,
-            "15": 0.28125,
-            "16": 0.28046,
-            "17": 0.28421,
-            "18": 0.28132,
-            "19": 0.2808,
-            "20": 0.28432,
-            "21": 0.28578,
-            "22": 0.28205,
-            "23": 0.28411,
-            "24": 0.28378,
-            "25": 0.28227,
-            "26": 0.28231,
-            "27": 0.28353,
-            "28": 0.28497,
-            "29": 0.29981,
-            "30": 0.28557,
-            "31": 0.28777,
-            "32": 0.28808,
-            "33": 0.28609,
-            "34": 0.32585,
-            "35": 0.341,
-            "36": 0.2886,
-            "37": 0.28157,
-            "38": 0.2916,
-            "39": 0.28501,
-            "40": 0.27952,
-            "41": 0.27767,
-            "42": 0.28062,
-            "43": 0.28781,
-            "44": 0.2839,
-            "45": 0.282,
-            "46": 0.27837,
-            "47": 0.27883,
-            "48": 0.27865,
-            "49": 0.28179,
-            "50": 0.27881,
-            "51": 0.27669,
-            "52": 0.28063,
-            "53": 0.27909,
-            "54": 0.27716,
-            "55": 0.27807,
-            "56": 0.2785,
-            "57": 0.27679,
-            "58": 0.28004,
-            "59": 0.27659,
-            "60": 0.27984,
-            "61": 0.2771,
-            "62": 0.27714,
-            "63": 0.2802,
-            "64": 0.2918,
-            "65": 0.27948,
-            "66": 0.27839,
-            "67": 0.28573,
-            "68": 0.27933,
-            "69": 0.27893,
-            "70": 0.27964,
-            "71": 0.2767,
-            "72": 0.27816,
-            "73": 0.28004,
-            "74": 0.27997,
-            "75": 0.28095,
-            "76": 0.27752,
-            "77": 0.27912,
-            "78": 0.28068,
-            "79": 0.27992,
-            "80": 0.28771,
-            "81": 0.28046,
-            "82": 0.28352,
-            "83": 0.28376,
-            "84": 0.28337,
-            "85": 0.28197,
-            "86": 0.27949,
-            "87": 0.27909,
-            "88": 0.28479,
-            "89": 0.28248,
-            "90": 0.27742,
-            "91": 0.27819,
-            "92": 0.2809,
-            "93": 0.28123,
-            "94": 0.27933,
-            "95": 0.28364,
-            "96": 0.28523,
-            "97": 0.28365,
-            "98": 0.27822,
-            "99": 0.28382,
-            "100": 0.28917
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1/golden_values_dev_dgx_h100.json
index 702c35ca9af..2e0ee7ee230 100644
--- a/tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1/golden_values_dev_dgx_h100.json
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 9.42728,
-            "2": 0.63617,
-            "3": 0.52215,
-            "4": 0.51838,
-            "5": 0.5248,
-            "6": 0.52221,
-            "7": 0.53157,
-            "8": 0.52268,
-            "9": 0.51794,
-            "10": 0.52148,
-            "11": 0.51655,
-            "12": 0.52503,
-            "13": 0.5178,
-            "14": 0.52926,
-            "15": 0.52639,
-            "16": 0.53361,
-            "17": 0.52309,
-            "18": 0.52324,
-            "19": 0.51834,
-            "20": 0.54965,
-            "21": 0.5586,
-            "22": 0.53836,
-            "23": 0.5225,
-            "24": 0.51851,
-            "25": 0.5199,
-            "26": 0.51853,
-            "27": 0.51882,
-            "28": 0.52551,
-            "29": 0.52254,
-            "30": 0.5192,
-            "31": 0.52201,
-            "32": 0.521,
-            "33": 0.52114,
-            "34": 0.51459,
-            "35": 0.52645,
-            "36": 0.51875,
-            "37": 0.5214,
-            "38": 0.52019,
-            "39": 0.54698,
-            "40": 0.54492,
-            "41": 0.51667,
-            "42": 0.52631,
-            "43": 0.52495,
-            "44": 0.52655,
-            "45": 0.52461,
-            "46": 0.53027,
-            "47": 0.5196,
-            "48": 0.52577,
-            "49": 0.51681,
-            "50": 0.53016,
-            "51": 0.51782,
-            "52": 0.52245,
-            "53": 0.51733,
-            "54": 0.523,
-            "55": 0.51904,
-            "56": 0.53679,
-            "57": 0.52102,
-            "58": 0.55143,
-            "59": 0.55915,
-            "60": 0.5493,
-            "61": 0.525,
-            "62": 0.52356,
-            "63": 0.53373,
-            "64": 0.81727,
-            "65": 0.52459,
-            "66": 0.79536,
-            "67": 0.52103,
-            "68": 0.5317,
-            "69": 0.52528,
-            "70": 0.78794,
-            "71": 0.53084,
-            "72": 0.51933,
-            "73": 0.53233,
-            "74": 0.52693,
-            "75": 0.53508,
-            "76": 0.56134,
-            "77": 0.53435,
-            "78": 0.51717,
-            "79": 0.52701,
-            "80": 0.52068,
-            "81": 0.52531,
-            "82": 0.5217,
-            "83": 0.52326,
-            "84": 0.52412,
-            "85": 0.84182,
-            "86": 0.52908,
-            "87": 0.51925,
-            "88": 0.52315,
-            "89": 0.52102,
-            "90": 0.52827,
-            "91": 0.54314,
-            "92": 0.52504,
-            "93": 0.52556,
-            "94": 0.8296,
-            "95": 0.83995,
-            "96": 0.85045,
-            "97": 0.78149,
-            "98": 0.54296,
-            "99": 0.5427,
-            "100": 0.55085
+            "1": 9.3446,
+            "2": 0.55186,
+            "3": 0.52074,
+            "4": 0.52226,
+            "5": 0.51961,
+            "6": 0.52672,
+            "7": 0.52451,
+            "8": 0.52369,
+            "9": 0.54507,
+            "10": 0.53931,
+            "11": 0.55505,
+            "12": 0.52851,
+            "13": 0.51692,
+            "14": 0.52026,
+            "15": 0.51979,
+            "16": 0.53317,
+            "17": 0.52489,
+            "18": 0.59625,
+            "19": 0.52238,
+            "20": 0.53197,
+            "21": 0.52211,
+            "22": 0.51979,
+            "23": 0.52551,
+            "24": 0.52413,
+            "25": 0.52676,
+            "26": 0.5192,
+            "27": 0.52336,
+            "28": 0.53671,
+            "29": 0.53561,
+            "30": 0.51609,
+            "31": 0.55983,
+            "32": 0.5166,
+            "33": 0.53721,
+            "34": 0.52158,
+            "35": 0.53727,
+            "36": 0.5279,
+            "37": 0.51655,
+            "38": 0.51986,
+            "39": 0.5223,
+            "40": 0.52388,
+            "41": 0.52083,
+            "42": 0.52801,
+            "43": 0.52136,
+            "44": 0.52414,
+            "45": 0.52048,
+            "46": 0.53415,
+            "47": 0.54831,
+            "48": 0.58827,
+            "49": 0.55044,
+            "50": 0.52682,
+            "51": 0.52339,
+            "52": 0.51726,
+            "53": 0.518,
+            "54": 0.51935,
+            "55": 0.52073,
+            "56": 0.52732,
+            "57": 0.51867,
+            "58": 0.51876,
+            "59": 0.5213,
+            "60": 0.51779,
+            "61": 0.52225,
+            "62": 0.52041,
+            "63": 0.51793,
+            "64": 0.5135,
+            "65": 0.51913,
+            "66": 0.86034,
+            "67": 0.51468,
+            "68": 0.90156,
+            "69": 0.51931,
+            "70": 0.53602,
+            "71": 0.51818,
+            "72": 0.51744,
+            "73": 0.54454,
+            "74": 0.51831,
+            "75": 0.521,
+            "76": 0.52894,
+            "77": 0.53227,
+            "78": 0.51806,
+            "79": 0.51818,
+            "80": 0.51632,
+            "81": 0.51704,
+            "82": 0.51542,
+            "83": 0.51861,
+            "84": 0.53204,
+            "85": 0.52011,
+            "86": 0.53043,
+            "87": 0.94359,
+            "88": 0.51776,
+            "89": 0.51799,
+            "90": 0.51773,
+            "91": 0.51828,
+            "92": 0.52318,
+            "93": 0.51688,
+            "94": 0.51939,
+            "95": 0.51554,
+            "96": 0.9,
+            "97": 0.96079,
+            "98": 0.52856,
+            "99": 0.51996,
+            "100": 0.52921
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 9abfa38cf9f..00000000000
--- a/tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.34897,
-            "2": 10.34482,
-            "3": 9.79428,
-            "4": 9.59585,
-            "5": 9.42074,
-            "6": 9.41847,
-            "7": 9.28062,
-            "8": 9.18972,
-            "9": 9.06519,
-            "10": 9.00183,
-            "11": 8.81475,
-            "12": 8.7808,
-            "13": 8.82493,
-            "14": 8.67261,
-            "15": 8.6364,
-            "16": 8.51896,
-            "17": 8.45704,
-            "18": 8.37007,
-            "19": 8.36039,
-            "20": 8.25417,
-            "21": 8.2421,
-            "22": 8.13324,
-            "23": 8.06764,
-            "24": 8.1142,
-            "25": 7.95082,
-            "26": 8.08156,
-            "27": 7.86764,
-            "28": 7.93993,
-            "29": 7.77566,
-            "30": 7.84559,
-            "31": 7.8152,
-            "32": 7.65941,
-            "33": 7.77856,
-            "34": 7.53188,
-            "35": 7.65804,
-            "36": 7.51464,
-            "37": 7.44686,
-            "38": 7.48161,
-            "39": 7.46435,
-            "40": 7.49084,
-            "41": 7.40827,
-            "42": 7.35625,
-            "43": 7.43764,
-            "44": 7.35439,
-            "45": 7.35042,
-            "46": 7.27853,
-            "47": 7.4405,
-            "48": 7.26763,
-            "49": 7.32341,
-            "50": 7.14486,
-            "51": 7.36469,
-            "52": 7.12044,
-            "53": 7.09167,
-            "54": 7.22712,
-            "55": 7.13495,
-            "56": 7.20751,
-            "57": 7.31287,
-            "58": 6.99063,
-            "59": 7.09849,
-            "60": 7.12665,
-            "61": 7.10047,
-            "62": 7.23974,
-            "63": 7.14358,
-            "64": 7.06717,
-            "65": 6.98408,
-            "66": 7.03692,
-            "67": 7.02875,
-            "68": 7.12914,
-            "69": 7.01425,
-            "70": 7.04954,
-            "71": 6.89312,
-            "72": 6.98513,
-            "73": 6.96734,
-            "74": 6.90236,
-            "75": 7.05611,
-            "76": 6.95986,
-            "77": 7.06862,
-            "78": 7.0204,
-            "79": 6.8505,
-            "80": 6.92019,
-            "81": 6.95982,
-            "82": 7.04575,
-            "83": 6.98617,
-            "84": 6.99991,
-            "85": 6.83511,
-            "86": 7.04087,
-            "87": 6.96604,
-            "88": 6.90125,
-            "89": 6.80345,
-            "90": 7.22384,
-            "91": 6.70505,
-            "92": 7.03979,
-            "93": 6.8857,
-            "94": 7.04044,
-            "95": 6.84746,
-            "96": 6.9546,
-            "97": 6.94425,
-            "98": 6.86865,
-            "99": 6.9948,
-            "100": 6.96761
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 43289.0,
-            "2": 44062.0,
-            "3": 44747.0,
-            "4": 42377.0,
-            "5": 45372.0,
-            "6": 40957.0,
-            "7": 43147.0,
-            "8": 45474.0,
-            "9": 42425.0,
-            "10": 45380.0,
-            "11": 43984.0,
-            "12": 44594.0,
-            "13": 43914.0,
-            "14": 46203.0,
-            "15": 43914.0,
-            "16": 41632.0,
-            "17": 43870.0,
-            "18": 44691.0,
-            "19": 42574.0,
-            "20": 44769.0,
-            "21": 44757.0,
-            "22": 41854.0,
-            "23": 45440.0,
-            "24": 43066.0,
-            "25": 42458.0,
-            "26": 43949.0,
-            "27": 46224.0,
-            "28": 46395.0,
-            "29": 46168.0,
-            "30": 44028.0,
-            "31": 41131.0,
-            "32": 43348.0,
-            "33": 45441.0,
-            "34": 43316.0,
-            "35": 43258.0,
-            "36": 42459.0,
-            "37": 40074.0,
-            "38": 42544.0,
-            "39": 44707.0,
-            "40": 43237.0,
-            "41": 44652.0,
-            "42": 43196.0,
-            "43": 45435.0,
-            "44": 44591.0,
-            "45": 43263.0,
-            "46": 43930.0,
-            "47": 42373.0,
-            "48": 44713.0,
-            "49": 43128.0,
-            "50": 43361.0,
-            "51": 41133.0,
-            "52": 43849.0,
-            "53": 43899.0,
-            "54": 41704.0,
-            "55": 43863.0,
-            "56": 43205.0,
-            "57": 42636.0,
-            "58": 43835.0,
-            "59": 44623.0,
-            "60": 41226.0,
-            "61": 39705.0,
-            "62": 44732.0,
-            "63": 44659.0,
-            "64": 45371.0,
-            "65": 44682.0,
-            "66": 45341.0,
-            "67": 43169.0,
-            "68": 42486.0,
-            "69": 43829.0,
-            "70": 45529.0,
-            "71": 43294.0,
-            "72": 44745.0,
-            "73": 45364.0,
-            "74": 42463.0,
-            "75": 44679.0,
-            "76": 43882.0,
-            "77": 42042.0,
-            "78": 40356.0,
-            "79": 38928.0,
-            "80": 41079.0,
-            "81": 45349.0,
-            "82": 43226.0,
-            "83": 38474.0,
-            "84": 42415.0,
-            "85": 43989.0,
-            "86": 45673.0,
-            "87": 40850.0,
-            "88": 41756.0,
-            "89": 41065.0,
-            "90": 44686.0,
-            "91": 46135.0,
-            "92": 41609.0,
-            "93": 43267.0,
-            "94": 39525.0,
-            "95": 43921.0,
-            "96": 44683.0,
-            "97": 45412.0,
-            "98": 41832.0,
-            "99": 45416.0,
-            "100": 42457.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1104069120.0,
-            "2": 1104069120.0,
-            "3": 1104069120.0,
-            "4": 1104069120.0,
-            "5": 1104069120.0,
-            "6": 1104069120.0,
-            "7": 1104069120.0,
-            "8": 1104069120.0,
-            "9": 1104069120.0,
-            "10": 1104069120.0,
-            "11": 1104069120.0,
-            "12": 1104069120.0,
-            "13": 1104069120.0,
-            "14": 1104069120.0,
-            "15": 1104069120.0,
-            "16": 1104069120.0,
-            "17": 1104069120.0,
-            "18": 1104069120.0,
-            "19": 1104069120.0,
-            "20": 1104069120.0,
-            "21": 1104069120.0,
-            "22": 1104069120.0,
-            "23": 1104069120.0,
-            "24": 1104069120.0,
-            "25": 1104069120.0,
-            "26": 1104069120.0,
-            "27": 1104069120.0,
-            "28": 1104069120.0,
-            "29": 1104069120.0,
-            "30": 1104069120.0,
-            "31": 1104069120.0,
-            "32": 1104069120.0,
-            "33": 1104069120.0,
-            "34": 1104069120.0,
-            "35": 1104069120.0,
-            "36": 1104069120.0,
-            "37": 1104069120.0,
-            "38": 1104069120.0,
-            "39": 1104069120.0,
-            "40": 1104069120.0,
-            "41": 1104069120.0,
-            "42": 1104069120.0,
-            "43": 1104069120.0,
-            "44": 1104069120.0,
-            "45": 1104069120.0,
-            "46": 1104069120.0,
-            "47": 1104069120.0,
-            "48": 1104069120.0,
-            "49": 1104069120.0,
-            "50": 1104069120.0,
-            "51": 1104069120.0,
-            "52": 1104069120.0,
-            "53": 1104069120.0,
-            "54": 1104069120.0,
-            "55": 1104069120.0,
-            "56": 1104069120.0,
-            "57": 1104069120.0,
-            "58": 1104069120.0,
-            "59": 1104069120.0,
-            "60": 1104069120.0,
-            "61": 1104069120.0,
-            "62": 1104069120.0,
-            "63": 1104069120.0,
-            "64": 1104069120.0,
-            "65": 1104069120.0,
-            "66": 1104069120.0,
-            "67": 1104069120.0,
-            "68": 1104069120.0,
-            "69": 1104069120.0,
-            "70": 1104069120.0,
-            "71": 1104069120.0,
-            "72": 1104069120.0,
-            "73": 1104069120.0,
-            "74": 1104069120.0,
-            "75": 1104069120.0,
-            "76": 1104069120.0,
-            "77": 1104069120.0,
-            "78": 1104069120.0,
-            "79": 1104069120.0,
-            "80": 1104069120.0,
-            "81": 1104069120.0,
-            "82": 1104069120.0,
-            "83": 1104069120.0,
-            "84": 1104069120.0,
-            "85": 1104069120.0,
-            "86": 1104069120.0,
-            "87": 1104069120.0,
-            "88": 1104069120.0,
-            "89": 1104069120.0,
-            "90": 1104069120.0,
-            "91": 1104069120.0,
-            "92": 1104069120.0,
-            "93": 1104069120.0,
-            "94": 1104069120.0,
-            "95": 1104069120.0,
-            "96": 1104069120.0,
-            "97": 1104069120.0,
-            "98": 1104069120.0,
-            "99": 1104069120.0,
-            "100": 1104069120.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1368630784.0,
-            "2": 1833295360.0,
-            "3": 1833295360.0,
-            "4": 1833295360.0,
-            "5": 1833295360.0,
-            "6": 1833295360.0,
-            "7": 1833295360.0,
-            "8": 1833295360.0,
-            "9": 1833295360.0,
-            "10": 1833295360.0,
-            "11": 1833295360.0,
-            "12": 1833295360.0,
-            "13": 1833295360.0,
-            "14": 1833295360.0,
-            "15": 1833295360.0,
-            "16": 1833295360.0,
-            "17": 1833295360.0,
-            "18": 1833295360.0,
-            "19": 1833295360.0,
-            "20": 1833295360.0,
-            "21": 1833295360.0,
-            "22": 1833295360.0,
-            "23": 1833295360.0,
-            "24": 1833295360.0,
-            "25": 1833295360.0,
-            "26": 1833295360.0,
-            "27": 1833295360.0,
-            "28": 1833295360.0,
-            "29": 1833295360.0,
-            "30": 1833295360.0,
-            "31": 1833295360.0,
-            "32": 1833295360.0,
-            "33": 1833295360.0,
-            "34": 1833295360.0,
-            "35": 1833295360.0,
-            "36": 1833295360.0,
-            "37": 1833295360.0,
-            "38": 1833295360.0,
-            "39": 1833295360.0,
-            "40": 1833295360.0,
-            "41": 1833295360.0,
-            "42": 1833295360.0,
-            "43": 1833295360.0,
-            "44": 1833295360.0,
-            "45": 1833295360.0,
-            "46": 1833295360.0,
-            "47": 1833295360.0,
-            "48": 1833295360.0,
-            "49": 1833295360.0,
-            "50": 1833295360.0,
-            "51": 1833295360.0,
-            "52": 1833295360.0,
-            "53": 1833295360.0,
-            "54": 1833295360.0,
-            "55": 1833295360.0,
-            "56": 1833295360.0,
-            "57": 1833295360.0,
-            "58": 1833295360.0,
-            "59": 1833295360.0,
-            "60": 1833295360.0,
-            "61": 1833295360.0,
-            "62": 1833295360.0,
-            "63": 1833295360.0,
-            "64": 1833295360.0,
-            "65": 1833295360.0,
-            "66": 1833295360.0,
-            "67": 1833295360.0,
-            "68": 1833295360.0,
-            "69": 1833295360.0,
-            "70": 1833295360.0,
-            "71": 1833295360.0,
-            "72": 1833295360.0,
-            "73": 1833295360.0,
-            "74": 1833295360.0,
-            "75": 1833295360.0,
-            "76": 1833295360.0,
-            "77": 1833295360.0,
-            "78": 1833295360.0,
-            "79": 1833295360.0,
-            "80": 1833295360.0,
-            "81": 1833295360.0,
-            "82": 1833295360.0,
-            "83": 1833295360.0,
-            "84": 1833295360.0,
-            "85": 1833295360.0,
-            "86": 1833295360.0,
-            "87": 1833295360.0,
-            "88": 1833295360.0,
-            "89": 1833295360.0,
-            "90": 1833295360.0,
-            "91": 1833295360.0,
-            "92": 1833295360.0,
-            "93": 1833295360.0,
-            "94": 1833295360.0,
-            "95": 1833295360.0,
-            "96": 1833295360.0,
-            "97": 1833295360.0,
-            "98": 1833295360.0,
-            "99": 1833295360.0,
-            "100": 1833295360.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 9.97888,
-            "2": 0.55212,
-            "3": 0.46939,
-            "4": 0.48338,
-            "5": 0.4977,
-            "6": 0.48497,
-            "7": 0.48521,
-            "8": 0.48365,
-            "9": 0.47845,
-            "10": 0.48441,
-            "11": 0.48622,
-            "12": 0.49049,
-            "13": 0.49384,
-            "14": 0.48918,
-            "15": 0.48451,
-            "16": 0.49344,
-            "17": 0.49291,
-            "18": 0.49613,
-            "19": 0.49898,
-            "20": 0.49079,
-            "21": 0.48153,
-            "22": 0.48369,
-            "23": 0.4824,
-            "24": 0.4958,
-            "25": 0.48572,
-            "26": 0.50758,
-            "27": 0.48722,
-            "28": 0.47977,
-            "29": 0.5598,
-            "30": 0.47951,
-            "31": 1.06254,
-            "32": 0.7493,
-            "33": 1.59176,
-            "34": 0.85052,
-            "35": 2.25233,
-            "36": 1.66198,
-            "37": 0.68722,
-            "38": 0.4632,
-            "39": 0.46558,
-            "40": 0.52308,
-            "41": 0.47497,
-            "42": 0.46579,
-            "43": 0.46956,
-            "44": 0.46788,
-            "45": 0.47342,
-            "46": 0.53067,
-            "47": 0.48889,
-            "48": 0.47648,
-            "49": 0.47372,
-            "50": 0.46927,
-            "51": 0.46862,
-            "52": 0.47754,
-            "53": 0.47724,
-            "54": 0.47513,
-            "55": 0.46395,
-            "56": 0.46587,
-            "57": 0.78252,
-            "58": 0.46515,
-            "59": 0.46114,
-            "60": 0.46011,
-            "61": 0.45394,
-            "62": 0.45518,
-            "63": 0.48166,
-            "64": 0.47197,
-            "65": 0.97766,
-            "66": 0.45863,
-            "67": 0.45331,
-            "68": 0.45132,
-            "69": 0.4828,
-            "70": 0.45508,
-            "71": 0.45601,
-            "72": 1.14428,
-            "73": 0.45179,
-            "74": 0.4534,
-            "75": 0.46049,
-            "76": 0.46918,
-            "77": 0.45685,
-            "78": 0.45627,
-            "79": 0.46018,
-            "80": 0.46056,
-            "81": 0.46543,
-            "82": 0.45359,
-            "83": 0.78935,
-            "84": 0.46472,
-            "85": 0.45517,
-            "86": 0.46043,
-            "87": 0.45426,
-            "88": 0.45214,
-            "89": 0.45913,
-            "90": 0.45237,
-            "91": 0.46312,
-            "92": 0.79955,
-            "93": 0.45537,
-            "94": 0.45217,
-            "95": 0.45359,
-            "96": 0.45058,
-            "97": 0.45281,
-            "98": 0.46149,
-            "99": 0.45894,
-            "100": 0.46912
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index 2e0ee7ee230..00000000000
--- a/tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.34897,
-            "2": 10.34482,
-            "3": 9.79428,
-            "4": 9.59585,
-            "5": 9.42074,
-            "6": 9.41847,
-            "7": 9.28062,
-            "8": 9.18972,
-            "9": 9.06519,
-            "10": 9.00183,
-            "11": 8.81475,
-            "12": 8.7808,
-            "13": 8.82493,
-            "14": 8.67261,
-            "15": 8.6364,
-            "16": 8.51896,
-            "17": 8.45704,
-            "18": 8.37007,
-            "19": 8.36039,
-            "20": 8.25417,
-            "21": 8.2421,
-            "22": 8.13324,
-            "23": 8.06764,
-            "24": 8.1142,
-            "25": 7.95082,
-            "26": 8.08156,
-            "27": 7.86764,
-            "28": 7.93993,
-            "29": 7.77566,
-            "30": 7.84559,
-            "31": 7.8152,
-            "32": 7.65941,
-            "33": 7.77856,
-            "34": 7.53188,
-            "35": 7.65804,
-            "36": 7.51464,
-            "37": 7.44686,
-            "38": 7.48161,
-            "39": 7.46435,
-            "40": 7.49084,
-            "41": 7.40827,
-            "42": 7.35625,
-            "43": 7.43764,
-            "44": 7.35439,
-            "45": 7.35042,
-            "46": 7.27853,
-            "47": 7.4405,
-            "48": 7.26763,
-            "49": 7.32341,
-            "50": 7.14486,
-            "51": 7.36469,
-            "52": 7.12044,
-            "53": 7.09167,
-            "54": 7.22712,
-            "55": 7.13495,
-            "56": 7.20751,
-            "57": 7.31287,
-            "58": 6.99063,
-            "59": 7.09849,
-            "60": 7.12665,
-            "61": 7.10047,
-            "62": 7.23974,
-            "63": 7.14358,
-            "64": 7.06717,
-            "65": 6.98408,
-            "66": 7.03692,
-            "67": 7.02875,
-            "68": 7.12914,
-            "69": 7.01425,
-            "70": 7.04954,
-            "71": 6.89312,
-            "72": 6.98513,
-            "73": 6.96734,
-            "74": 6.90236,
-            "75": 7.05611,
-            "76": 6.95986,
-            "77": 7.06862,
-            "78": 7.0204,
-            "79": 6.8505,
-            "80": 6.92019,
-            "81": 6.95982,
-            "82": 7.04575,
-            "83": 6.98617,
-            "84": 6.99991,
-            "85": 6.83511,
-            "86": 7.04087,
-            "87": 6.96604,
-            "88": 6.90125,
-            "89": 6.80345,
-            "90": 7.22384,
-            "91": 6.70505,
-            "92": 7.03979,
-            "93": 6.8857,
-            "94": 7.04044,
-            "95": 6.84746,
-            "96": 6.9546,
-            "97": 6.94425,
-            "98": 6.86865,
-            "99": 6.9948,
-            "100": 6.96761
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 43289.0,
-            "2": 44062.0,
-            "3": 44747.0,
-            "4": 42377.0,
-            "5": 45372.0,
-            "6": 40957.0,
-            "7": 43147.0,
-            "8": 45474.0,
-            "9": 42425.0,
-            "10": 45380.0,
-            "11": 43984.0,
-            "12": 44594.0,
-            "13": 43914.0,
-            "14": 46203.0,
-            "15": 43914.0,
-            "16": 41632.0,
-            "17": 43870.0,
-            "18": 44691.0,
-            "19": 42574.0,
-            "20": 44769.0,
-            "21": 44757.0,
-            "22": 41854.0,
-            "23": 45440.0,
-            "24": 43066.0,
-            "25": 42458.0,
-            "26": 43949.0,
-            "27": 46224.0,
-            "28": 46395.0,
-            "29": 46168.0,
-            "30": 44028.0,
-            "31": 41131.0,
-            "32": 43348.0,
-            "33": 45441.0,
-            "34": 43316.0,
-            "35": 43258.0,
-            "36": 42459.0,
-            "37": 40074.0,
-            "38": 42544.0,
-            "39": 44707.0,
-            "40": 43237.0,
-            "41": 44652.0,
-            "42": 43196.0,
-            "43": 45435.0,
-            "44": 44591.0,
-            "45": 43263.0,
-            "46": 43930.0,
-            "47": 42373.0,
-            "48": 44713.0,
-            "49": 43128.0,
-            "50": 43361.0,
-            "51": 41133.0,
-            "52": 43849.0,
-            "53": 43899.0,
-            "54": 41704.0,
-            "55": 43863.0,
-            "56": 43205.0,
-            "57": 42636.0,
-            "58": 43835.0,
-            "59": 44623.0,
-            "60": 41226.0,
-            "61": 39705.0,
-            "62": 44732.0,
-            "63": 44659.0,
-            "64": 45371.0,
-            "65": 44682.0,
-            "66": 45341.0,
-            "67": 43169.0,
-            "68": 42486.0,
-            "69": 43829.0,
-            "70": 45529.0,
-            "71": 43294.0,
-            "72": 44745.0,
-            "73": 45364.0,
-            "74": 42463.0,
-            "75": 44679.0,
-            "76": 43882.0,
-            "77": 42042.0,
-            "78": 40356.0,
-            "79": 38928.0,
-            "80": 41079.0,
-            "81": 45349.0,
-            "82": 43226.0,
-            "83": 38474.0,
-            "84": 42415.0,
-            "85": 43989.0,
-            "86": 45673.0,
-            "87": 40850.0,
-            "88": 41756.0,
-            "89": 41065.0,
-            "90": 44686.0,
-            "91": 46135.0,
-            "92": 41609.0,
-            "93": 43267.0,
-            "94": 39525.0,
-            "95": 43921.0,
-            "96": 44683.0,
-            "97": 45412.0,
-            "98": 41832.0,
-            "99": 45416.0,
-            "100": 42457.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1104069120.0,
-            "2": 1104069120.0,
-            "3": 1104069120.0,
-            "4": 1104069120.0,
-            "5": 1104069120.0,
-            "6": 1104069120.0,
-            "7": 1104069120.0,
-            "8": 1104069120.0,
-            "9": 1104069120.0,
-            "10": 1104069120.0,
-            "11": 1104069120.0,
-            "12": 1104069120.0,
-            "13": 1104069120.0,
-            "14": 1104069120.0,
-            "15": 1104069120.0,
-            "16": 1104069120.0,
-            "17": 1104069120.0,
-            "18": 1104069120.0,
-            "19": 1104069120.0,
-            "20": 1104069120.0,
-            "21": 1104069120.0,
-            "22": 1104069120.0,
-            "23": 1104069120.0,
-            "24": 1104069120.0,
-            "25": 1104069120.0,
-            "26": 1104069120.0,
-            "27": 1104069120.0,
-            "28": 1104069120.0,
-            "29": 1104069120.0,
-            "30": 1104069120.0,
-            "31": 1104069120.0,
-            "32": 1104069120.0,
-            "33": 1104069120.0,
-            "34": 1104069120.0,
-            "35": 1104069120.0,
-            "36": 1104069120.0,
-            "37": 1104069120.0,
-            "38": 1104069120.0,
-            "39": 1104069120.0,
-            "40": 1104069120.0,
-            "41": 1104069120.0,
-            "42": 1104069120.0,
-            "43": 1104069120.0,
-            "44": 1104069120.0,
-            "45": 1104069120.0,
-            "46": 1104069120.0,
-            "47": 1104069120.0,
-            "48": 1104069120.0,
-            "49": 1104069120.0,
-            "50": 1104069120.0,
-            "51": 1104069120.0,
-            "52": 1104069120.0,
-            "53": 1104069120.0,
-            "54": 1104069120.0,
-            "55": 1104069120.0,
-            "56": 1104069120.0,
-            "57": 1104069120.0,
-            "58": 1104069120.0,
-            "59": 1104069120.0,
-            "60": 1104069120.0,
-            "61": 1104069120.0,
-            "62": 1104069120.0,
-            "63": 1104069120.0,
-            "64": 1104069120.0,
-            "65": 1104069120.0,
-            "66": 1104069120.0,
-            "67": 1104069120.0,
-            "68": 1104069120.0,
-            "69": 1104069120.0,
-            "70": 1104069120.0,
-            "71": 1104069120.0,
-            "72": 1104069120.0,
-            "73": 1104069120.0,
-            "74": 1104069120.0,
-            "75": 1104069120.0,
-            "76": 1104069120.0,
-            "77": 1104069120.0,
-            "78": 1104069120.0,
-            "79": 1104069120.0,
-            "80": 1104069120.0,
-            "81": 1104069120.0,
-            "82": 1104069120.0,
-            "83": 1104069120.0,
-            "84": 1104069120.0,
-            "85": 1104069120.0,
-            "86": 1104069120.0,
-            "87": 1104069120.0,
-            "88": 1104069120.0,
-            "89": 1104069120.0,
-            "90": 1104069120.0,
-            "91": 1104069120.0,
-            "92": 1104069120.0,
-            "93": 1104069120.0,
-            "94": 1104069120.0,
-            "95": 1104069120.0,
-            "96": 1104069120.0,
-            "97": 1104069120.0,
-            "98": 1104069120.0,
-            "99": 1104069120.0,
-            "100": 1104069120.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1368630784.0,
-            "2": 1833295360.0,
-            "3": 1833295360.0,
-            "4": 1833295360.0,
-            "5": 1833295360.0,
-            "6": 1833295360.0,
-            "7": 1833295360.0,
-            "8": 1833295360.0,
-            "9": 1833295360.0,
-            "10": 1833295360.0,
-            "11": 1833295360.0,
-            "12": 1833295360.0,
-            "13": 1833295360.0,
-            "14": 1833295360.0,
-            "15": 1833295360.0,
-            "16": 1833295360.0,
-            "17": 1833295360.0,
-            "18": 1833295360.0,
-            "19": 1833295360.0,
-            "20": 1833295360.0,
-            "21": 1833295360.0,
-            "22": 1833295360.0,
-            "23": 1833295360.0,
-            "24": 1833295360.0,
-            "25": 1833295360.0,
-            "26": 1833295360.0,
-            "27": 1833295360.0,
-            "28": 1833295360.0,
-            "29": 1833295360.0,
-            "30": 1833295360.0,
-            "31": 1833295360.0,
-            "32": 1833295360.0,
-            "33": 1833295360.0,
-            "34": 1833295360.0,
-            "35": 1833295360.0,
-            "36": 1833295360.0,
-            "37": 1833295360.0,
-            "38": 1833295360.0,
-            "39": 1833295360.0,
-            "40": 1833295360.0,
-            "41": 1833295360.0,
-            "42": 1833295360.0,
-            "43": 1833295360.0,
-            "44": 1833295360.0,
-            "45": 1833295360.0,
-            "46": 1833295360.0,
-            "47": 1833295360.0,
-            "48": 1833295360.0,
-            "49": 1833295360.0,
-            "50": 1833295360.0,
-            "51": 1833295360.0,
-            "52": 1833295360.0,
-            "53": 1833295360.0,
-            "54": 1833295360.0,
-            "55": 1833295360.0,
-            "56": 1833295360.0,
-            "57": 1833295360.0,
-            "58": 1833295360.0,
-            "59": 1833295360.0,
-            "60": 1833295360.0,
-            "61": 1833295360.0,
-            "62": 1833295360.0,
-            "63": 1833295360.0,
-            "64": 1833295360.0,
-            "65": 1833295360.0,
-            "66": 1833295360.0,
-            "67": 1833295360.0,
-            "68": 1833295360.0,
-            "69": 1833295360.0,
-            "70": 1833295360.0,
-            "71": 1833295360.0,
-            "72": 1833295360.0,
-            "73": 1833295360.0,
-            "74": 1833295360.0,
-            "75": 1833295360.0,
-            "76": 1833295360.0,
-            "77": 1833295360.0,
-            "78": 1833295360.0,
-            "79": 1833295360.0,
-            "80": 1833295360.0,
-            "81": 1833295360.0,
-            "82": 1833295360.0,
-            "83": 1833295360.0,
-            "84": 1833295360.0,
-            "85": 1833295360.0,
-            "86": 1833295360.0,
-            "87": 1833295360.0,
-            "88": 1833295360.0,
-            "89": 1833295360.0,
-            "90": 1833295360.0,
-            "91": 1833295360.0,
-            "92": 1833295360.0,
-            "93": 1833295360.0,
-            "94": 1833295360.0,
-            "95": 1833295360.0,
-            "96": 1833295360.0,
-            "97": 1833295360.0,
-            "98": 1833295360.0,
-            "99": 1833295360.0,
-            "100": 1833295360.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 9.3446,
-            "2": 0.55186,
-            "3": 0.52074,
-            "4": 0.52226,
-            "5": 0.51961,
-            "6": 0.52672,
-            "7": 0.52451,
-            "8": 0.52369,
-            "9": 0.54507,
-            "10": 0.53931,
-            "11": 0.55505,
-            "12": 0.52851,
-            "13": 0.51692,
-            "14": 0.52026,
-            "15": 0.51979,
-            "16": 0.53317,
-            "17": 0.52489,
-            "18": 0.59625,
-            "19": 0.52238,
-            "20": 0.53197,
-            "21": 0.52211,
-            "22": 0.51979,
-            "23": 0.52551,
-            "24": 0.52413,
-            "25": 0.52676,
-            "26": 0.5192,
-            "27": 0.52336,
-            "28": 0.53671,
-            "29": 0.53561,
-            "30": 0.51609,
-            "31": 0.55983,
-            "32": 0.5166,
-            "33": 0.53721,
-            "34": 0.52158,
-            "35": 0.53727,
-            "36": 0.5279,
-            "37": 0.51655,
-            "38": 0.51986,
-            "39": 0.5223,
-            "40": 0.52388,
-            "41": 0.52083,
-            "42": 0.52801,
-            "43": 0.52136,
-            "44": 0.52414,
-            "45": 0.52048,
-            "46": 0.53415,
-            "47": 0.54831,
-            "48": 0.58827,
-            "49": 0.55044,
-            "50": 0.52682,
-            "51": 0.52339,
-            "52": 0.51726,
-            "53": 0.518,
-            "54": 0.51935,
-            "55": 0.52073,
-            "56": 0.52732,
-            "57": 0.51867,
-            "58": 0.51876,
-            "59": 0.5213,
-            "60": 0.51779,
-            "61": 0.52225,
-            "62": 0.52041,
-            "63": 0.51793,
-            "64": 0.5135,
-            "65": 0.51913,
-            "66": 0.86034,
-            "67": 0.51468,
-            "68": 0.90156,
-            "69": 0.51931,
-            "70": 0.53602,
-            "71": 0.51818,
-            "72": 0.51744,
-            "73": 0.54454,
-            "74": 0.51831,
-            "75": 0.521,
-            "76": 0.52894,
-            "77": 0.53227,
-            "78": 0.51806,
-            "79": 0.51818,
-            "80": 0.51632,
-            "81": 0.51704,
-            "82": 0.51542,
-            "83": 0.51861,
-            "84": 0.53204,
-            "85": 0.52011,
-            "86": 0.53043,
-            "87": 0.94359,
-            "88": 0.51776,
-            "89": 0.51799,
-            "90": 0.51773,
-            "91": 0.51828,
-            "92": 0.52318,
-            "93": 0.51688,
-            "94": 0.51939,
-            "95": 0.51554,
-            "96": 0.9,
-            "97": 0.96079,
-            "98": 0.52856,
-            "99": 0.51996,
-            "100": 0.52921
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_h100.json
index 791f5758ea5..b9a799c779f 100644
--- a/tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_h100.json
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 9.43749,
-            "2": 0.56177,
-            "3": 0.54092,
-            "4": 0.53069,
-            "5": 0.54015,
-            "6": 0.52654,
-            "7": 0.52537,
-            "8": 0.529,
-            "9": 0.52024,
-            "10": 0.54001,
-            "11": 0.52228,
-            "12": 0.52764,
-            "13": 0.52112,
-            "14": 0.52842,
-            "15": 0.53159,
-            "16": 0.52768,
-            "17": 0.53602,
-            "18": 0.52711,
-            "19": 0.5217,
-            "20": 0.53787,
-            "21": 0.52947,
-            "22": 0.52812,
-            "23": 0.522,
-            "24": 0.525,
-            "25": 0.5262,
-            "26": 0.5262,
-            "27": 0.52831,
-            "28": 0.5236,
-            "29": 0.54456,
-            "30": 0.51906,
-            "31": 0.52674,
-            "32": 0.52164,
-            "33": 0.5315,
-            "34": 0.52077,
-            "35": 0.53196,
-            "36": 0.52142,
-            "37": 0.52841,
-            "38": 0.52733,
-            "39": 0.52595,
-            "40": 0.52329,
-            "41": 0.52463,
-            "42": 0.52373,
-            "43": 0.5242,
-            "44": 0.53002,
-            "45": 0.52375,
-            "46": 0.52927,
-            "47": 0.52485,
-            "48": 0.54174,
-            "49": 0.52535,
-            "50": 0.52504,
-            "51": 0.53766,
-            "52": 0.52768,
-            "53": 0.52759,
-            "54": 0.52754,
-            "55": 0.53938,
-            "56": 0.53362,
-            "57": 0.53077,
-            "58": 0.52676,
-            "59": 0.53132,
-            "60": 0.52333,
-            "61": 0.52796,
-            "62": 0.53758,
-            "63": 0.53371,
-            "64": 0.52937,
-            "65": 0.53002,
-            "66": 0.53001,
-            "67": 0.52768,
-            "68": 0.52999,
-            "69": 0.52873,
-            "70": 0.54329,
-            "71": 0.52577,
-            "72": 0.53281,
-            "73": 0.52373,
-            "74": 0.53896,
-            "75": 0.53536,
-            "76": 0.52444,
-            "77": 0.53551,
-            "78": 0.55804,
-            "79": 0.55697,
-            "80": 0.53175,
-            "81": 0.53929,
-            "82": 0.52759,
-            "83": 0.53135,
-            "84": 0.53043,
-            "85": 0.53678,
-            "86": 0.58197,
-            "87": 0.54322,
-            "88": 0.52771,
-            "89": 0.88532,
-            "90": 0.5352,
-            "91": 0.5432,
-            "92": 0.53256,
-            "93": 0.53,
-            "94": 0.53231,
-            "95": 0.53588,
-            "96": 0.5246,
-            "97": 0.53401,
-            "98": 0.53042,
-            "99": 0.53172,
-            "100": 0.52281
+            "1": 9.38956,
+            "2": 0.54892,
+            "3": 0.53756,
+            "4": 0.52845,
+            "5": 0.52687,
+            "6": 0.51818,
+            "7": 0.52819,
+            "8": 0.52051,
+            "9": 0.52526,
+            "10": 0.52865,
+            "11": 0.52834,
+            "12": 0.52573,
+            "13": 0.52783,
+            "14": 0.52938,
+            "15": 0.51899,
+            "16": 0.53517,
+            "17": 0.52289,
+            "18": 0.5363,
+            "19": 0.5954,
+            "20": 0.55838,
+            "21": 0.52166,
+            "22": 0.54146,
+            "23": 0.53649,
+            "24": 0.52785,
+            "25": 0.52349,
+            "26": 0.52481,
+            "27": 0.52376,
+            "28": 0.52226,
+            "29": 0.5291,
+            "30": 0.52613,
+            "31": 0.52719,
+            "32": 0.52341,
+            "33": 0.52646,
+            "34": 0.52272,
+            "35": 0.53016,
+            "36": 0.51941,
+            "37": 0.52643,
+            "38": 0.51914,
+            "39": 0.53109,
+            "40": 0.52353,
+            "41": 0.55102,
+            "42": 0.52656,
+            "43": 0.53223,
+            "44": 0.53438,
+            "45": 0.53126,
+            "46": 0.53776,
+            "47": 0.52511,
+            "48": 0.53521,
+            "49": 0.52743,
+            "50": 0.52883,
+            "51": 0.54078,
+            "52": 0.52088,
+            "53": 0.53221,
+            "54": 0.52473,
+            "55": 0.54396,
+            "56": 0.52771,
+            "57": 0.52699,
+            "58": 0.53079,
+            "59": 0.52445,
+            "60": 0.53037,
+            "61": 0.52164,
+            "62": 0.532,
+            "63": 0.52392,
+            "64": 0.53062,
+            "65": 0.52269,
+            "66": 0.53306,
+            "67": 0.5173,
+            "68": 0.54063,
+            "69": 0.52464,
+            "70": 0.92233,
+            "71": 0.53301,
+            "72": 0.52584,
+            "73": 0.55029,
+            "74": 0.54931,
+            "75": 0.54907,
+            "76": 0.53191,
+            "77": 0.53522,
+            "78": 0.53487,
+            "79": 0.52543,
+            "80": 0.53474,
+            "81": 0.52635,
+            "82": 0.54801,
+            "83": 0.52605,
+            "84": 0.53393,
+            "85": 0.52523,
+            "86": 0.53947,
+            "87": 0.52933,
+            "88": 0.53447,
+            "89": 0.53,
+            "90": 0.5287,
+            "91": 0.53326,
+            "92": 0.54604,
+            "93": 0.53649,
+            "94": 0.5297,
+            "95": 0.54163,
+            "96": 0.52549,
+            "97": 0.53256,
+            "98": 0.53104,
+            "99": 0.54062,
+            "100": 0.52332
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgxh100_coreweave.json
deleted file mode 100644
index 7f620001acb..00000000000
--- a/tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgxh100_coreweave.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.34897,
-            "2": 10.34482,
-            "3": 9.79428,
-            "4": 9.59585,
-            "5": 9.42074,
-            "6": 9.41847,
-            "7": 9.28062,
-            "8": 9.18972,
-            "9": 9.06519,
-            "10": 9.00183,
-            "11": 8.81475,
-            "12": 8.7808,
-            "13": 8.82493,
-            "14": 8.67261,
-            "15": 8.6364,
-            "16": 8.51896,
-            "17": 8.45704,
-            "18": 8.37007,
-            "19": 8.36039,
-            "20": 8.25417,
-            "21": 8.2421,
-            "22": 8.13324,
-            "23": 8.06764,
-            "24": 8.1142,
-            "25": 7.95082,
-            "26": 8.08156,
-            "27": 7.86764,
-            "28": 7.93993,
-            "29": 7.77566,
-            "30": 7.84559,
-            "31": 7.8152,
-            "32": 7.65941,
-            "33": 7.77856,
-            "34": 7.53188,
-            "35": 7.65804,
-            "36": 7.51464,
-            "37": 7.44686,
-            "38": 7.48161,
-            "39": 7.46435,
-            "40": 7.49084,
-            "41": 7.40827,
-            "42": 7.35625,
-            "43": 7.43764,
-            "44": 7.35439,
-            "45": 7.35042,
-            "46": 7.27853,
-            "47": 7.4405,
-            "48": 7.26763,
-            "49": 7.32341,
-            "50": 7.14486,
-            "51": 7.36469,
-            "52": 7.12044,
-            "53": 7.09167,
-            "54": 7.22712,
-            "55": 7.13495,
-            "56": 7.20751,
-            "57": 7.31287,
-            "58": 6.99063,
-            "59": 7.09849,
-            "60": 7.12665,
-            "61": 7.10047,
-            "62": 7.23974,
-            "63": 7.14358,
-            "64": 7.06717,
-            "65": 6.98408,
-            "66": 7.03692,
-            "67": 7.02875,
-            "68": 7.12914,
-            "69": 7.01425,
-            "70": 7.04954,
-            "71": 6.89312,
-            "72": 6.98513,
-            "73": 6.96734,
-            "74": 6.90236,
-            "75": 7.05611,
-            "76": 6.95986,
-            "77": 7.06862,
-            "78": 7.0204,
-            "79": 6.8505,
-            "80": 6.92019,
-            "81": 6.95982,
-            "82": 7.04575,
-            "83": 6.98617,
-            "84": 6.99991,
-            "85": 6.83511,
-            "86": 7.04087,
-            "87": 6.96604,
-            "88": 6.90125,
-            "89": 6.80345,
-            "90": 7.22384,
-            "91": 6.70505,
-            "92": 7.03979,
-            "93": 6.8857,
-            "94": 7.04044,
-            "95": 6.84746,
-            "96": 6.9546,
-            "97": 6.94425,
-            "98": 6.86865,
-            "99": 6.9948,
-            "100": 6.96761
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 43289.0,
-            "2": 44062.0,
-            "3": 44747.0,
-            "4": 42377.0,
-            "5": 45372.0,
-            "6": 40957.0,
-            "7": 43147.0,
-            "8": 45474.0,
-            "9": 42425.0,
-            "10": 45380.0,
-            "11": 43984.0,
-            "12": 44594.0,
-            "13": 43914.0,
-            "14": 46203.0,
-            "15": 43914.0,
-            "16": 41632.0,
-            "17": 43870.0,
-            "18": 44691.0,
-            "19": 42574.0,
-            "20": 44769.0,
-            "21": 44757.0,
-            "22": 41854.0,
-            "23": 45440.0,
-            "24": 43066.0,
-            "25": 42458.0,
-            "26": 43949.0,
-            "27": 46224.0,
-            "28": 46395.0,
-            "29": 46168.0,
-            "30": 44028.0,
-            "31": 41131.0,
-            "32": 43348.0,
-            "33": 45441.0,
-            "34": 43316.0,
-            "35": 43258.0,
-            "36": 42459.0,
-            "37": 40074.0,
-            "38": 42544.0,
-            "39": 44707.0,
-            "40": 43237.0,
-            "41": 44652.0,
-            "42": 43196.0,
-            "43": 45435.0,
-            "44": 44591.0,
-            "45": 43263.0,
-            "46": 43930.0,
-            "47": 42373.0,
-            "48": 44713.0,
-            "49": 43128.0,
-            "50": 43361.0,
-            "51": 41133.0,
-            "52": 43849.0,
-            "53": 43899.0,
-            "54": 41704.0,
-            "55": 43863.0,
-            "56": 43205.0,
-            "57": 42636.0,
-            "58": 43835.0,
-            "59": 44623.0,
-            "60": 41226.0,
-            "61": 39705.0,
-            "62": 44732.0,
-            "63": 44659.0,
-            "64": 45371.0,
-            "65": 44682.0,
-            "66": 45341.0,
-            "67": 43169.0,
-            "68": 42486.0,
-            "69": 43829.0,
-            "70": 45529.0,
-            "71": 43294.0,
-            "72": 44745.0,
-            "73": 45364.0,
-            "74": 42463.0,
-            "75": 44679.0,
-            "76": 43882.0,
-            "77": 42042.0,
-            "78": 40356.0,
-            "79": 38928.0,
-            "80": 41079.0,
-            "81": 45349.0,
-            "82": 43226.0,
-            "83": 38474.0,
-            "84": 42415.0,
-            "85": 43989.0,
-            "86": 45673.0,
-            "87": 40850.0,
-            "88": 41756.0,
-            "89": 41065.0,
-            "90": 44686.0,
-            "91": 46135.0,
-            "92": 41609.0,
-            "93": 43267.0,
-            "94": 39525.0,
-            "95": 43921.0,
-            "96": 44683.0,
-            "97": 45412.0,
-            "98": 41832.0,
-            "99": 45416.0,
-            "100": 42457.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1104069120.0,
-            "2": 1104069120.0,
-            "3": 1104069120.0,
-            "4": 1104069120.0,
-            "5": 1104069120.0,
-            "6": 1104069120.0,
-            "7": 1104069120.0,
-            "8": 1104069120.0,
-            "9": 1104069120.0,
-            "10": 1104069120.0,
-            "11": 1104069120.0,
-            "12": 1104069120.0,
-            "13": 1104069120.0,
-            "14": 1104069120.0,
-            "15": 1104069120.0,
-            "16": 1104069120.0,
-            "17": 1104069120.0,
-            "18": 1104069120.0,
-            "19": 1104069120.0,
-            "20": 1104069120.0,
-            "21": 1104069120.0,
-            "22": 1104069120.0,
-            "23": 1104069120.0,
-            "24": 1104069120.0,
-            "25": 1104069120.0,
-            "26": 1104069120.0,
-            "27": 1104069120.0,
-            "28": 1104069120.0,
-            "29": 1104069120.0,
-            "30": 1104069120.0,
-            "31": 1104069120.0,
-            "32": 1104069120.0,
-            "33": 1104069120.0,
-            "34": 1104069120.0,
-            "35": 1104069120.0,
-            "36": 1104069120.0,
-            "37": 1104069120.0,
-            "38": 1104069120.0,
-            "39": 1104069120.0,
-            "40": 1104069120.0,
-            "41": 1104069120.0,
-            "42": 1104069120.0,
-            "43": 1104069120.0,
-            "44": 1104069120.0,
-            "45": 1104069120.0,
-            "46": 1104069120.0,
-            "47": 1104069120.0,
-            "48": 1104069120.0,
-            "49": 1104069120.0,
-            "50": 1104069120.0,
-            "51": 1104069120.0,
-            "52": 1104069120.0,
-            "53": 1104069120.0,
-            "54": 1104069120.0,
-            "55": 1104069120.0,
-            "56": 1104069120.0,
-            "57": 1104069120.0,
-            "58": 1104069120.0,
-            "59": 1104069120.0,
-            "60": 1104069120.0,
-            "61": 1104069120.0,
-            "62": 1104069120.0,
-            "63": 1104069120.0,
-            "64": 1104069120.0,
-            "65": 1104069120.0,
-            "66": 1104069120.0,
-            "67": 1104069120.0,
-            "68": 1104069120.0,
-            "69": 1104069120.0,
-            "70": 1104069120.0,
-            "71": 1104069120.0,
-            "72": 1104069120.0,
-            "73": 1104069120.0,
-            "74": 1104069120.0,
-            "75": 1104069120.0,
-            "76": 1104069120.0,
-            "77": 1104069120.0,
-            "78": 1104069120.0,
-            "79": 1104069120.0,
-            "80": 1104069120.0,
-            "81": 1104069120.0,
-            "82": 1104069120.0,
-            "83": 1104069120.0,
-            "84": 1104069120.0,
-            "85": 1104069120.0,
-            "86": 1104069120.0,
-            "87": 1104069120.0,
-            "88": 1104069120.0,
-            "89": 1104069120.0,
-            "90": 1104069120.0,
-            "91": 1104069120.0,
-            "92": 1104069120.0,
-            "93": 1104069120.0,
-            "94": 1104069120.0,
-            "95": 1104069120.0,
-            "96": 1104069120.0,
-            "97": 1104069120.0,
-            "98": 1104069120.0,
-            "99": 1104069120.0,
-            "100": 1104069120.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1368630784.0,
-            "2": 1833295360.0,
-            "3": 1833295360.0,
-            "4": 1833295360.0,
-            "5": 1833295360.0,
-            "6": 1833295360.0,
-            "7": 1833295360.0,
-            "8": 1833295360.0,
-            "9": 1833295360.0,
-            "10": 1833295360.0,
-            "11": 1833295360.0,
-            "12": 1833295360.0,
-            "13": 1833295360.0,
-            "14": 1833295360.0,
-            "15": 1833295360.0,
-            "16": 1833295360.0,
-            "17": 1833295360.0,
-            "18": 1833295360.0,
-            "19": 1833295360.0,
-            "20": 1833295360.0,
-            "21": 1833295360.0,
-            "22": 1833295360.0,
-            "23": 1833295360.0,
-            "24": 1833295360.0,
-            "25": 1833295360.0,
-            "26": 1833295360.0,
-            "27": 1833295360.0,
-            "28": 1833295360.0,
-            "29": 1833295360.0,
-            "30": 1833295360.0,
-            "31": 1833295360.0,
-            "32": 1833295360.0,
-            "33": 1833295360.0,
-            "34": 1833295360.0,
-            "35": 1833295360.0,
-            "36": 1833295360.0,
-            "37": 1833295360.0,
-            "38": 1833295360.0,
-            "39": 1833295360.0,
-            "40": 1833295360.0,
-            "41": 1833295360.0,
-            "42": 1833295360.0,
-            "43": 1833295360.0,
-            "44": 1833295360.0,
-            "45": 1833295360.0,
-            "46": 1833295360.0,
-            "47": 1833295360.0,
-            "48": 1833295360.0,
-            "49": 1833295360.0,
-            "50": 1833295360.0,
-            "51": 1833295360.0,
-            "52": 1833295360.0,
-            "53": 1833295360.0,
-            "54": 1833295360.0,
-            "55": 1833295360.0,
-            "56": 1833295360.0,
-            "57": 1833295360.0,
-            "58": 1833295360.0,
-            "59": 1833295360.0,
-            "60": 1833295360.0,
-            "61": 1833295360.0,
-            "62": 1833295360.0,
-            "63": 1833295360.0,
-            "64": 1833295360.0,
-            "65": 1833295360.0,
-            "66": 1833295360.0,
-            "67": 1833295360.0,
-            "68": 1833295360.0,
-            "69": 1833295360.0,
-            "70": 1833295360.0,
-            "71": 1833295360.0,
-            "72": 1833295360.0,
-            "73": 1833295360.0,
-            "74": 1833295360.0,
-            "75": 1833295360.0,
-            "76": 1833295360.0,
-            "77": 1833295360.0,
-            "78": 1833295360.0,
-            "79": 1833295360.0,
-            "80": 1833295360.0,
-            "81": 1833295360.0,
-            "82": 1833295360.0,
-            "83": 1833295360.0,
-            "84": 1833295360.0,
-            "85": 1833295360.0,
-            "86": 1833295360.0,
-            "87": 1833295360.0,
-            "88": 1833295360.0,
-            "89": 1833295360.0,
-            "90": 1833295360.0,
-            "91": 1833295360.0,
-            "92": 1833295360.0,
-            "93": 1833295360.0,
-            "94": 1833295360.0,
-            "95": 1833295360.0,
-            "96": 1833295360.0,
-            "97": 1833295360.0,
-            "98": 1833295360.0,
-            "99": 1833295360.0,
-            "100": 1833295360.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 8.41131,
-            "2": 0.5911,
-            "3": 0.46668,
-            "4": 0.46572,
-            "5": 0.48182,
-            "6": 0.47419,
-            "7": 0.45962,
-            "8": 0.46076,
-            "9": 0.46022,
-            "10": 0.46056,
-            "11": 0.45992,
-            "12": 0.46724,
-            "13": 0.46712,
-            "14": 0.46827,
-            "15": 0.4727,
-            "16": 0.49253,
-            "17": 0.47082,
-            "18": 0.47424,
-            "19": 0.46849,
-            "20": 0.45979,
-            "21": 0.47104,
-            "22": 0.46485,
-            "23": 0.46326,
-            "24": 0.47218,
-            "25": 0.46353,
-            "26": 0.46063,
-            "27": 0.45609,
-            "28": 0.4748,
-            "29": 0.45917,
-            "30": 0.46344,
-            "31": 0.45858,
-            "32": 0.46504,
-            "33": 0.46109,
-            "34": 0.46003,
-            "35": 0.46415,
-            "36": 0.466,
-            "37": 0.46298,
-            "38": 0.46081,
-            "39": 0.46051,
-            "40": 0.46065,
-            "41": 0.46838,
-            "42": 0.49321,
-            "43": 0.47091,
-            "44": 0.46781,
-            "45": 0.45909,
-            "46": 0.4623,
-            "47": 0.46684,
-            "48": 0.46817,
-            "49": 0.47488,
-            "50": 0.46159,
-            "51": 0.4696,
-            "52": 0.46902,
-            "53": 0.46394,
-            "54": 0.46398,
-            "55": 0.48419,
-            "56": 0.48174,
-            "57": 0.46979,
-            "58": 0.46441,
-            "59": 0.46756,
-            "60": 0.45954,
-            "61": 0.46551,
-            "62": 0.46355,
-            "63": 0.4631,
-            "64": 0.46313,
-            "65": 0.47693,
-            "66": 0.46943,
-            "67": 0.45954,
-            "68": 0.46555,
-            "69": 0.46002,
-            "70": 0.47351,
-            "71": 0.46163,
-            "72": 0.46815,
-            "73": 0.46171,
-            "74": 0.46772,
-            "75": 0.75351,
-            "76": 0.46342,
-            "77": 0.47886,
-            "78": 0.47771,
-            "79": 0.47646,
-            "80": 0.47943,
-            "81": 0.47905,
-            "82": 0.47,
-            "83": 0.46092,
-            "84": 1.47835,
-            "85": 0.47794,
-            "86": 0.97054,
-            "87": 3.1063,
-            "88": 0.466,
-            "89": 1.9497,
-            "90": 0.4647,
-            "91": 0.47038,
-            "92": 0.46503,
-            "93": 0.47547,
-            "94": 0.48315,
-            "95": 0.48851,
-            "96": 0.50856,
-            "97": 0.49788,
-            "98": 0.48078,
-            "99": 0.5127,
-            "100": 0.46344
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgxh100_eos.json
deleted file mode 100644
index b9a799c779f..00000000000
--- a/tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgxh100_eos.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.34897,
-            "2": 10.34482,
-            "3": 9.79428,
-            "4": 9.59585,
-            "5": 9.42074,
-            "6": 9.41847,
-            "7": 9.28062,
-            "8": 9.18972,
-            "9": 9.06519,
-            "10": 9.00183,
-            "11": 8.81475,
-            "12": 8.7808,
-            "13": 8.82493,
-            "14": 8.67261,
-            "15": 8.6364,
-            "16": 8.51896,
-            "17": 8.45704,
-            "18": 8.37007,
-            "19": 8.36039,
-            "20": 8.25417,
-            "21": 8.2421,
-            "22": 8.13324,
-            "23": 8.06764,
-            "24": 8.1142,
-            "25": 7.95082,
-            "26": 8.08156,
-            "27": 7.86764,
-            "28": 7.93993,
-            "29": 7.77566,
-            "30": 7.84559,
-            "31": 7.8152,
-            "32": 7.65941,
-            "33": 7.77856,
-            "34": 7.53188,
-            "35": 7.65804,
-            "36": 7.51464,
-            "37": 7.44686,
-            "38": 7.48161,
-            "39": 7.46435,
-            "40": 7.49084,
-            "41": 7.40827,
-            "42": 7.35625,
-            "43": 7.43764,
-            "44": 7.35439,
-            "45": 7.35042,
-            "46": 7.27853,
-            "47": 7.4405,
-            "48": 7.26763,
-            "49": 7.32341,
-            "50": 7.14486,
-            "51": 7.36469,
-            "52": 7.12044,
-            "53": 7.09167,
-            "54": 7.22712,
-            "55": 7.13495,
-            "56": 7.20751,
-            "57": 7.31287,
-            "58": 6.99063,
-            "59": 7.09849,
-            "60": 7.12665,
-            "61": 7.10047,
-            "62": 7.23974,
-            "63": 7.14358,
-            "64": 7.06717,
-            "65": 6.98408,
-            "66": 7.03692,
-            "67": 7.02875,
-            "68": 7.12914,
-            "69": 7.01425,
-            "70": 7.04954,
-            "71": 6.89312,
-            "72": 6.98513,
-            "73": 6.96734,
-            "74": 6.90236,
-            "75": 7.05611,
-            "76": 6.95986,
-            "77": 7.06862,
-            "78": 7.0204,
-            "79": 6.8505,
-            "80": 6.92019,
-            "81": 6.95982,
-            "82": 7.04575,
-            "83": 6.98617,
-            "84": 6.99991,
-            "85": 6.83511,
-            "86": 7.04087,
-            "87": 6.96604,
-            "88": 6.90125,
-            "89": 6.80345,
-            "90": 7.22384,
-            "91": 6.70505,
-            "92": 7.03979,
-            "93": 6.8857,
-            "94": 7.04044,
-            "95": 6.84746,
-            "96": 6.9546,
-            "97": 6.94425,
-            "98": 6.86865,
-            "99": 6.9948,
-            "100": 6.96761
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 43289.0,
-            "2": 44062.0,
-            "3": 44747.0,
-            "4": 42377.0,
-            "5": 45372.0,
-            "6": 40957.0,
-            "7": 43147.0,
-            "8": 45474.0,
-            "9": 42425.0,
-            "10": 45380.0,
-            "11": 43984.0,
-            "12": 44594.0,
-            "13": 43914.0,
-            "14": 46203.0,
-            "15": 43914.0,
-            "16": 41632.0,
-            "17": 43870.0,
-            "18": 44691.0,
-            "19": 42574.0,
-            "20": 44769.0,
-            "21": 44757.0,
-            "22": 41854.0,
-            "23": 45440.0,
-            "24": 43066.0,
-            "25": 42458.0,
-            "26": 43949.0,
-            "27": 46224.0,
-            "28": 46395.0,
-            "29": 46168.0,
-            "30": 44028.0,
-            "31": 41131.0,
-            "32": 43348.0,
-            "33": 45441.0,
-            "34": 43316.0,
-            "35": 43258.0,
-            "36": 42459.0,
-            "37": 40074.0,
-            "38": 42544.0,
-            "39": 44707.0,
-            "40": 43237.0,
-            "41": 44652.0,
-            "42": 43196.0,
-            "43": 45435.0,
-            "44": 44591.0,
-            "45": 43263.0,
-            "46": 43930.0,
-            "47": 42373.0,
-            "48": 44713.0,
-            "49": 43128.0,
-            "50": 43361.0,
-            "51": 41133.0,
-            "52": 43849.0,
-            "53": 43899.0,
-            "54": 41704.0,
-            "55": 43863.0,
-            "56": 43205.0,
-            "57": 42636.0,
-            "58": 43835.0,
-            "59": 44623.0,
-            "60": 41226.0,
-            "61": 39705.0,
-            "62": 44732.0,
-            "63": 44659.0,
-            "64": 45371.0,
-            "65": 44682.0,
-            "66": 45341.0,
-            "67": 43169.0,
-            "68": 42486.0,
-            "69": 43829.0,
-            "70": 45529.0,
-            "71": 43294.0,
-            "72": 44745.0,
-            "73": 45364.0,
-            "74": 42463.0,
-            "75": 44679.0,
-            "76": 43882.0,
-            "77": 42042.0,
-            "78": 40356.0,
-            "79": 38928.0,
-            "80": 41079.0,
-            "81": 45349.0,
-            "82": 43226.0,
-            "83": 38474.0,
-            "84": 42415.0,
-            "85": 43989.0,
-            "86": 45673.0,
-            "87": 40850.0,
-            "88": 41756.0,
-            "89": 41065.0,
-            "90": 44686.0,
-            "91": 46135.0,
-            "92": 41609.0,
-            "93": 43267.0,
-            "94": 39525.0,
-            "95": 43921.0,
-            "96": 44683.0,
-            "97": 45412.0,
-            "98": 41832.0,
-            "99": 45416.0,
-            "100": 42457.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1104069120.0,
-            "2": 1104069120.0,
-            "3": 1104069120.0,
-            "4": 1104069120.0,
-            "5": 1104069120.0,
-            "6": 1104069120.0,
-            "7": 1104069120.0,
-            "8": 1104069120.0,
-            "9": 1104069120.0,
-            "10": 1104069120.0,
-            "11": 1104069120.0,
-            "12": 1104069120.0,
-            "13": 1104069120.0,
-            "14": 1104069120.0,
-            "15": 1104069120.0,
-            "16": 1104069120.0,
-            "17": 1104069120.0,
-            "18": 1104069120.0,
-            "19": 1104069120.0,
-            "20": 1104069120.0,
-            "21": 1104069120.0,
-            "22": 1104069120.0,
-            "23": 1104069120.0,
-            "24": 1104069120.0,
-            "25": 1104069120.0,
-            "26": 1104069120.0,
-            "27": 1104069120.0,
-            "28": 1104069120.0,
-            "29": 1104069120.0,
-            "30": 1104069120.0,
-            "31": 1104069120.0,
-            "32": 1104069120.0,
-            "33": 1104069120.0,
-            "34": 1104069120.0,
-            "35": 1104069120.0,
-            "36": 1104069120.0,
-            "37": 1104069120.0,
-            "38": 1104069120.0,
-            "39": 1104069120.0,
-            "40": 1104069120.0,
-            "41": 1104069120.0,
-            "42": 1104069120.0,
-            "43": 1104069120.0,
-            "44": 1104069120.0,
-            "45": 1104069120.0,
-            "46": 1104069120.0,
-            "47": 1104069120.0,
-            "48": 1104069120.0,
-            "49": 1104069120.0,
-            "50": 1104069120.0,
-            "51": 1104069120.0,
-            "52": 1104069120.0,
-            "53": 1104069120.0,
-            "54": 1104069120.0,
-            "55": 1104069120.0,
-            "56": 1104069120.0,
-            "57": 1104069120.0,
-            "58": 1104069120.0,
-            "59": 1104069120.0,
-            "60": 1104069120.0,
-            "61": 1104069120.0,
-            "62": 1104069120.0,
-            "63": 1104069120.0,
-            "64": 1104069120.0,
-            "65": 1104069120.0,
-            "66": 1104069120.0,
-            "67": 1104069120.0,
-            "68": 1104069120.0,
-            "69": 1104069120.0,
-            "70": 1104069120.0,
-            "71": 1104069120.0,
-            "72": 1104069120.0,
-            "73": 1104069120.0,
-            "74": 1104069120.0,
-            "75": 1104069120.0,
-            "76": 1104069120.0,
-            "77": 1104069120.0,
-            "78": 1104069120.0,
-            "79": 1104069120.0,
-            "80": 1104069120.0,
-            "81": 1104069120.0,
-            "82": 1104069120.0,
-            "83": 1104069120.0,
-            "84": 1104069120.0,
-            "85": 1104069120.0,
-            "86": 1104069120.0,
-            "87": 1104069120.0,
-            "88": 1104069120.0,
-            "89": 1104069120.0,
-            "90": 1104069120.0,
-            "91": 1104069120.0,
-            "92": 1104069120.0,
-            "93": 1104069120.0,
-            "94": 1104069120.0,
-            "95": 1104069120.0,
-            "96": 1104069120.0,
-            "97": 1104069120.0,
-            "98": 1104069120.0,
-            "99": 1104069120.0,
-            "100": 1104069120.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1368630784.0,
-            "2": 1833295360.0,
-            "3": 1833295360.0,
-            "4": 1833295360.0,
-            "5": 1833295360.0,
-            "6": 1833295360.0,
-            "7": 1833295360.0,
-            "8": 1833295360.0,
-            "9": 1833295360.0,
-            "10": 1833295360.0,
-            "11": 1833295360.0,
-            "12": 1833295360.0,
-            "13": 1833295360.0,
-            "14": 1833295360.0,
-            "15": 1833295360.0,
-            "16": 1833295360.0,
-            "17": 1833295360.0,
-            "18": 1833295360.0,
-            "19": 1833295360.0,
-            "20": 1833295360.0,
-            "21": 1833295360.0,
-            "22": 1833295360.0,
-            "23": 1833295360.0,
-            "24": 1833295360.0,
-            "25": 1833295360.0,
-            "26": 1833295360.0,
-            "27": 1833295360.0,
-            "28": 1833295360.0,
-            "29": 1833295360.0,
-            "30": 1833295360.0,
-            "31": 1833295360.0,
-            "32": 1833295360.0,
-            "33": 1833295360.0,
-            "34": 1833295360.0,
-            "35": 1833295360.0,
-            "36": 1833295360.0,
-            "37": 1833295360.0,
-            "38": 1833295360.0,
-            "39": 1833295360.0,
-            "40": 1833295360.0,
-            "41": 1833295360.0,
-            "42": 1833295360.0,
-            "43": 1833295360.0,
-            "44": 1833295360.0,
-            "45": 1833295360.0,
-            "46": 1833295360.0,
-            "47": 1833295360.0,
-            "48": 1833295360.0,
-            "49": 1833295360.0,
-            "50": 1833295360.0,
-            "51": 1833295360.0,
-            "52": 1833295360.0,
-            "53": 1833295360.0,
-            "54": 1833295360.0,
-            "55": 1833295360.0,
-            "56": 1833295360.0,
-            "57": 1833295360.0,
-            "58": 1833295360.0,
-            "59": 1833295360.0,
-            "60": 1833295360.0,
-            "61": 1833295360.0,
-            "62": 1833295360.0,
-            "63": 1833295360.0,
-            "64": 1833295360.0,
-            "65": 1833295360.0,
-            "66": 1833295360.0,
-            "67": 1833295360.0,
-            "68": 1833295360.0,
-            "69": 1833295360.0,
-            "70": 1833295360.0,
-            "71": 1833295360.0,
-            "72": 1833295360.0,
-            "73": 1833295360.0,
-            "74": 1833295360.0,
-            "75": 1833295360.0,
-            "76": 1833295360.0,
-            "77": 1833295360.0,
-            "78": 1833295360.0,
-            "79": 1833295360.0,
-            "80": 1833295360.0,
-            "81": 1833295360.0,
-            "82": 1833295360.0,
-            "83": 1833295360.0,
-            "84": 1833295360.0,
-            "85": 1833295360.0,
-            "86": 1833295360.0,
-            "87": 1833295360.0,
-            "88": 1833295360.0,
-            "89": 1833295360.0,
-            "90": 1833295360.0,
-            "91": 1833295360.0,
-            "92": 1833295360.0,
-            "93": 1833295360.0,
-            "94": 1833295360.0,
-            "95": 1833295360.0,
-            "96": 1833295360.0,
-            "97": 1833295360.0,
-            "98": 1833295360.0,
-            "99": 1833295360.0,
-            "100": 1833295360.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 9.38956,
-            "2": 0.54892,
-            "3": 0.53756,
-            "4": 0.52845,
-            "5": 0.52687,
-            "6": 0.51818,
-            "7": 0.52819,
-            "8": 0.52051,
-            "9": 0.52526,
-            "10": 0.52865,
-            "11": 0.52834,
-            "12": 0.52573,
-            "13": 0.52783,
-            "14": 0.52938,
-            "15": 0.51899,
-            "16": 0.53517,
-            "17": 0.52289,
-            "18": 0.5363,
-            "19": 0.5954,
-            "20": 0.55838,
-            "21": 0.52166,
-            "22": 0.54146,
-            "23": 0.53649,
-            "24": 0.52785,
-            "25": 0.52349,
-            "26": 0.52481,
-            "27": 0.52376,
-            "28": 0.52226,
-            "29": 0.5291,
-            "30": 0.52613,
-            "31": 0.52719,
-            "32": 0.52341,
-            "33": 0.52646,
-            "34": 0.52272,
-            "35": 0.53016,
-            "36": 0.51941,
-            "37": 0.52643,
-            "38": 0.51914,
-            "39": 0.53109,
-            "40": 0.52353,
-            "41": 0.55102,
-            "42": 0.52656,
-            "43": 0.53223,
-            "44": 0.53438,
-            "45": 0.53126,
-            "46": 0.53776,
-            "47": 0.52511,
-            "48": 0.53521,
-            "49": 0.52743,
-            "50": 0.52883,
-            "51": 0.54078,
-            "52": 0.52088,
-            "53": 0.53221,
-            "54": 0.52473,
-            "55": 0.54396,
-            "56": 0.52771,
-            "57": 0.52699,
-            "58": 0.53079,
-            "59": 0.52445,
-            "60": 0.53037,
-            "61": 0.52164,
-            "62": 0.532,
-            "63": 0.52392,
-            "64": 0.53062,
-            "65": 0.52269,
-            "66": 0.53306,
-            "67": 0.5173,
-            "68": 0.54063,
-            "69": 0.52464,
-            "70": 0.92233,
-            "71": 0.53301,
-            "72": 0.52584,
-            "73": 0.55029,
-            "74": 0.54931,
-            "75": 0.54907,
-            "76": 0.53191,
-            "77": 0.53522,
-            "78": 0.53487,
-            "79": 0.52543,
-            "80": 0.53474,
-            "81": 0.52635,
-            "82": 0.54801,
-            "83": 0.52605,
-            "84": 0.53393,
-            "85": 0.52523,
-            "86": 0.53947,
-            "87": 0.52933,
-            "88": 0.53447,
-            "89": 0.53,
-            "90": 0.5287,
-            "91": 0.53326,
-            "92": 0.54604,
-            "93": 0.53649,
-            "94": 0.5297,
-            "95": 0.54163,
-            "96": 0.52549,
-            "97": 0.53256,
-            "98": 0.53104,
-            "99": 0.54062,
-            "100": 0.52332
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/test_utils/python_scripts/generate_local_jobs.py b/tests/test_utils/python_scripts/generate_local_jobs.py
index 4a7cf2d7c13..140797ef5c9 100644
--- a/tests/test_utils/python_scripts/generate_local_jobs.py
+++ b/tests/test_utils/python_scripts/generate_local_jobs.py
@@ -1,3 +1,5 @@
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
 """Generate launch scripts for local execution.
 
 This script allows to generate pre-filled launch scripts that allow for local execution of Megatron-LM functional tests inside containerized enviroments (i.e. Slurm enroot or Docker).
@@ -24,7 +26,9 @@ def load_script(config_path: str) -> str:
 
 @click.command()
 @click.option("--model", required=False, type=str, help="Filters all tests by matching model")
-@click.option("--scope", required=False, type=str, help="Filters all tests by matching scope")
+@click.option(
+    "--scope", required=False, type=str, default="mr", help="Filters all tests by matching scope"
+)
 @click.option(
     "--test-case", required=False, type=str, help="Returns a single test-case with matching name."
 )
@@ -77,8 +81,6 @@ def main(
         container_tag="none",
     )
 
-    print(workloads)
-
     for workload in workloads:
         if workload.type == "build":
             continue
@@ -103,6 +105,7 @@ def main(
                 f'export OUTPUT_PATH={output_path}/runs/$(python3 -c "import uuid; print(uuid.uuid4())")\n'
             )
             fh.write(workload.spec["script"].format(**magic_values))
+            fh.write("\n\necho This test wrote results into $OUTPUT_PATH\n")
 
 
 if __name__ == "__main__":
diff --git a/tests/test_utils/recipes/gpt-dynamic-inference-with-coordinator.yaml b/tests/test_utils/recipes/gpt-dynamic-inference-with-coordinator.yaml
index dd8cf6b945d..c61128aaca2 100644
--- a/tests/test_utils/recipes/gpt-dynamic-inference-with-coordinator.yaml
+++ b/tests/test_utils/recipes/gpt-dynamic-inference-with-coordinator.yaml
@@ -45,7 +45,7 @@ spec:
         "TRAINING_PARAMS_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml"
         "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_{platforms}.json"
         "OUTPUT_PATH={assets_dir}"
-        "TENSORBOARD_PATH={assets_dir}/generations_{environment}_$CLUSTER.json"
+        "TENSORBOARD_PATH={assets_dir}/generations_{environment}_{platforms}.json"
         "N_REPEAT={n_repeat}"
         "ENABLE_LIGHTWEIGHT_MODE=${{ENABLE_LIGHTWEIGHT_MODE}}"
         "RECORD_CHECKPOINTS=${{RECORD_CHECKPOINTS}}"
diff --git a/tests/test_utils/recipes/gpt-dynamic-inference.yaml b/tests/test_utils/recipes/gpt-dynamic-inference.yaml
index b992f7fac2f..6837fb33c61 100644
--- a/tests/test_utils/recipes/gpt-dynamic-inference.yaml
+++ b/tests/test_utils/recipes/gpt-dynamic-inference.yaml
@@ -43,9 +43,9 @@ spec:
         "DATA_CACHE_PATH=/workspace/data/cache" 
         "TRAINING_SCRIPT_PATH=examples/inference/gpt/gpt_dynamic_inference.py"
         "TRAINING_PARAMS_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml"
-        "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_$CLUSTER.json"
+        "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_{platforms}.json"
         "OUTPUT_PATH={assets_dir}"
-        "TENSORBOARD_PATH={assets_dir}/generations_{environment}_$CLUSTER.json"
+        "TENSORBOARD_PATH={assets_dir}/generations_{environment}_{platforms}.json"
         "N_REPEAT={n_repeat}"
         "ENABLE_LIGHTWEIGHT_MODE=${{ENABLE_LIGHTWEIGHT_MODE}}"
         "RECORD_CHECKPOINTS=${{RECORD_CHECKPOINTS}}"
diff --git a/tests/test_utils/recipes/gpt-grads.yaml b/tests/test_utils/recipes/gpt-grads.yaml
index bf048542410..28fee0b02e9 100644
--- a/tests/test_utils/recipes/gpt-grads.yaml
+++ b/tests/test_utils/recipes/gpt-grads.yaml
@@ -50,7 +50,7 @@ spec:
         "CHECKPOINT_LOAD_PATH=/mnt/artifacts"
         "TRAINING_SCRIPT_PATH=pretrain_gpt.py"
         "TRAINING_PARAMS_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml"
-        "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_$CLUSTER.json"
+        "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_{platforms}.json"
         "N_REPEAT=1" 
         "ENABLE_LIGHTWEIGHT_MODE=${{ENABLE_LIGHTWEIGHT_MODE}}"
         "RECORD_CHECKPOINTS=${{RECORD_CHECKPOINTS}}"
diff --git a/tests/test_utils/recipes/gpt-nemo.yaml b/tests/test_utils/recipes/gpt-nemo.yaml
index 14c2106ed31..be225592653 100644
--- a/tests/test_utils/recipes/gpt-nemo.yaml
+++ b/tests/test_utils/recipes/gpt-nemo.yaml
@@ -47,7 +47,7 @@ spec:
         "CHECKPOINT_LOAD_PATH=/mnt/artifacts/model/{name}"
         "TRAINING_SCRIPT_PATH=\"nemo llm pretrain -y --factory {nemo_model}\""
         "TRAINING_PARAMS_PATH=/opt/megatron-lm/tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml"
-        "GOLDEN_VALUES_PATH=/opt/megatron-lm/tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_$CLUSTER.json"
+        "GOLDEN_VALUES_PATH=/opt/megatron-lm/tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_{platforms}.json"
         "N_REPEAT={n_repeat}"
     )
 
diff --git a/tests/test_utils/recipes/gpt-static-inference.yaml b/tests/test_utils/recipes/gpt-static-inference.yaml
index 9ed7f6c09f9..033c6c35116 100644
--- a/tests/test_utils/recipes/gpt-static-inference.yaml
+++ b/tests/test_utils/recipes/gpt-static-inference.yaml
@@ -43,9 +43,9 @@ spec:
         "DATA_CACHE_PATH=/workspace/data/cache" 
         "TRAINING_SCRIPT_PATH=examples/inference/gpt/gpt_static_inference.py"
         "TRAINING_PARAMS_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml"
-        "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_$CLUSTER.json"
+        "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_{platforms}.json"
         "OUTPUT_PATH={assets_dir}"
-        "TENSORBOARD_PATH={assets_dir}/generations_{environment}_$CLUSTER.json"
+        "TENSORBOARD_PATH={assets_dir}/generations_{environment}_{platforms}.json"
         "N_REPEAT={n_repeat}"
         "ENABLE_LIGHTWEIGHT_MODE=${{ENABLE_LIGHTWEIGHT_MODE}}"
         "RECORD_CHECKPOINTS=${{RECORD_CHECKPOINTS}}"
diff --git a/tests/test_utils/recipes/gpt.yaml b/tests/test_utils/recipes/gpt.yaml
index 488f3747a0f..aa3b4d77916 100644
--- a/tests/test_utils/recipes/gpt.yaml
+++ b/tests/test_utils/recipes/gpt.yaml
@@ -48,7 +48,7 @@ spec:
         "CHECKPOINT_LOAD_PATH=/mnt/artifacts/"
         "TRAINING_SCRIPT_PATH=pretrain_gpt.py"
         "TRAINING_PARAMS_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml"
-        "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_$CLUSTER.json"
+        "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_{platforms}.json"
         "N_REPEAT={n_repeat}"
         "ENABLE_LIGHTWEIGHT_MODE=${{ENABLE_LIGHTWEIGHT_MODE}}"
         "RECORD_CHECKPOINTS=${{RECORD_CHECKPOINTS}}"
diff --git a/tests/test_utils/recipes/mamba-static-inference.yaml b/tests/test_utils/recipes/mamba-static-inference.yaml
index 79a5ab4eee2..06107618916 100644
--- a/tests/test_utils/recipes/mamba-static-inference.yaml
+++ b/tests/test_utils/recipes/mamba-static-inference.yaml
@@ -43,9 +43,9 @@ spec:
         "DATA_CACHE_PATH=/workspace/data/cache"
         "TRAINING_SCRIPT_PATH=examples/inference/gpt/gpt_static_inference.py"
         "TRAINING_PARAMS_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml"
-        "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_$CLUSTER.json"
+        "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_{platforms}.json"
         "OUTPUT_PATH={assets_dir}"
-        "TENSORBOARD_PATH={assets_dir}/generations_{environment}_$CLUSTER.json"
+        "TENSORBOARD_PATH={assets_dir}/generations_{environment}_{platforms}.json"
         "N_REPEAT={n_repeat}"
         "ENABLE_LIGHTWEIGHT_MODE=${{ENABLE_LIGHTWEIGHT_MODE}}"
         "RECORD_CHECKPOINTS=${{RECORD_CHECKPOINTS}}"
diff --git a/tests/test_utils/recipes/mamba.yaml b/tests/test_utils/recipes/mamba.yaml
index 40d1d095aa4..bb742200d26 100644
--- a/tests/test_utils/recipes/mamba.yaml
+++ b/tests/test_utils/recipes/mamba.yaml
@@ -46,7 +46,7 @@ spec:
         "CHECKPOINT_LOAD_PATH=/mnt/artifacts/model/{name}"
         "TRAINING_SCRIPT_PATH=pretrain_mamba.py"
         "TRAINING_PARAMS_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml"
-        "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_$CLUSTER.json"
+        "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_{platforms}.json"
         "N_REPEAT={n_repeat}"
         "ENABLE_LIGHTWEIGHT_MODE=${{ENABLE_LIGHTWEIGHT_MODE}}"
         "RECORD_CHECKPOINTS=${{RECORD_CHECKPOINTS}}"
diff --git a/tests/test_utils/recipes/mimo.yaml b/tests/test_utils/recipes/mimo.yaml
index 41e735776f9..88b17815ede 100644
--- a/tests/test_utils/recipes/mimo.yaml
+++ b/tests/test_utils/recipes/mimo.yaml
@@ -52,7 +52,7 @@ spec:
         "CHECKPOINT_LOAD_PATH=/mnt/artifacts"
         "TRAINING_SCRIPT_PATH=./examples/mimo/train.py"
         "TRAINING_PARAMS_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml"
-        "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_$CLUSTER.json"
+        "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_{platforms}.json"
         "N_REPEAT={n_repeat}"
     )
 
diff --git a/tests/test_utils/recipes/moe-dynamic-inference.yaml b/tests/test_utils/recipes/moe-dynamic-inference.yaml
index d477bdeda4a..9bb23f8a322 100644
--- a/tests/test_utils/recipes/moe-dynamic-inference.yaml
+++ b/tests/test_utils/recipes/moe-dynamic-inference.yaml
@@ -43,9 +43,9 @@ spec:
         "DATA_CACHE_PATH=/workspace/data/cache"
         "TRAINING_SCRIPT_PATH=examples/inference/gpt/gpt_dynamic_inference.py"
         "TRAINING_PARAMS_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml"
-        "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_$CLUSTER.json"
+        "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_{platforms}.json"
         "OUTPUT_PATH={assets_dir}"
-        "TENSORBOARD_PATH={assets_dir}/generations_{environment}_$CLUSTER.json"
+        "TENSORBOARD_PATH={assets_dir}/generations_{environment}_{platforms}.json"
         "N_REPEAT={n_repeat}"
         "ENABLE_LIGHTWEIGHT_MODE=${{ENABLE_LIGHTWEIGHT_MODE}}"
         "RECORD_CHECKPOINTS=${{RECORD_CHECKPOINTS}}"
@@ -57,7 +57,7 @@ products:
   - test_case: [gpt_dynamic_inference_tp4_pp1_ep4_16B_logitsmatch]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr-broken, mr-github]
         platforms: [dgx_h100]
   - test_case: [gpt_dynamic_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch]
     products:
diff --git a/tests/test_utils/recipes/moe-static-inference.yaml b/tests/test_utils/recipes/moe-static-inference.yaml
index bd7c4ca0f50..136606d0955 100644
--- a/tests/test_utils/recipes/moe-static-inference.yaml
+++ b/tests/test_utils/recipes/moe-static-inference.yaml
@@ -43,9 +43,9 @@ spec:
         "DATA_CACHE_PATH=/workspace/data/cache"
         "TRAINING_SCRIPT_PATH=examples/inference/gpt/gpt_static_inference.py"
         "TRAINING_PARAMS_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml"
-        "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_$CLUSTER.json"
+        "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_{platforms}.json"
         "OUTPUT_PATH={assets_dir}"
-        "TENSORBOARD_PATH={assets_dir}/generations_{environment}_$CLUSTER.json"
+        "TENSORBOARD_PATH={assets_dir}/generations_{environment}_{platforms}.json"
         "N_REPEAT={n_repeat}"
         "ENABLE_LIGHTWEIGHT_MODE=${{ENABLE_LIGHTWEIGHT_MODE}}"
         "RECORD_CHECKPOINTS=${{RECORD_CHECKPOINTS}}"
diff --git a/tests/test_utils/recipes/moe.yaml b/tests/test_utils/recipes/moe.yaml
index 477a495ac54..94790f27ba2 100644
--- a/tests/test_utils/recipes/moe.yaml
+++ b/tests/test_utils/recipes/moe.yaml
@@ -48,7 +48,7 @@ spec:
         "CHECKPOINT_LOAD_PATH=/mnt/artifacts"
         "TRAINING_SCRIPT_PATH=pretrain_gpt.py"
         "TRAINING_PARAMS_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml"
-        "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_$CLUSTER.json"
+        "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_{platforms}.json"
         "N_REPEAT={n_repeat}"
         "ENABLE_LIGHTWEIGHT_MODE=${{ENABLE_LIGHTWEIGHT_MODE}}"
         "RECORD_CHECKPOINTS=${{RECORD_CHECKPOINTS}}"
diff --git a/tests/test_utils/recipes/multimodal-llava.yaml b/tests/test_utils/recipes/multimodal-llava.yaml
index 80a30f050bc..0e199764c09 100644
--- a/tests/test_utils/recipes/multimodal-llava.yaml
+++ b/tests/test_utils/recipes/multimodal-llava.yaml
@@ -49,7 +49,7 @@ spec:
         "CHECKPOINT_LOAD_PATH=/mnt/artifacts/model/{name}/checkpoints"
         "TRAINING_SCRIPT_PATH=pretrain_vlm.py"
         "TRAINING_PARAMS_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml"
-        "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_$CLUSTER.json"
+        "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_{platforms}.json"
         "N_REPEAT={n_repeat}"
         "ENABLE_LIGHTWEIGHT_MODE=${{ENABLE_LIGHTWEIGHT_MODE}}"
         "RECORD_CHECKPOINTS=${{RECORD_CHECKPOINTS}}"

From 9d05926e13b387ae2d2404ed09ee4f0d5882a499 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Sun, 2 Nov 2025 11:40:37 +0000
Subject: [PATCH 094/334] ci: Disable inference test
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 tests/test_utils/recipes/moe.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_utils/recipes/moe.yaml b/tests/test_utils/recipes/moe.yaml
index 94790f27ba2..03fcee619fb 100644
--- a/tests/test_utils/recipes/moe.yaml
+++ b/tests/test_utils/recipes/moe.yaml
@@ -134,7 +134,7 @@ products:
   - test_case: [gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_scoped_cudagraph]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr-broken]
         platforms: [dgx_h100]
   #######################################################################
   # Super important mr, mr-github tests that run for both DEV and LTS per mr, mr-github       #

From effebd81f410bc6566fffee6c320b6f8f762e06d Mon Sep 17 00:00:00 2001
From: Li Tao <lit@nvidia.com>
Date: Mon, 3 Nov 2025 07:12:17 +0800
Subject: [PATCH 095/334] [Dev] feat(moe): Support placing MTP layers into
 standalone stages (#1916)

Signed-off-by: Li Tao <lit@nvidia.com>
Co-authored-by: shifangx <shifangx@nvidia.com>
Co-authored-by: Pingtian Li <pingtianl@nvidia.com>
---
 gpt_builders.py                               |  26 ++--
 .../blended_megatron_dataset_builder.py       |   4 +-
 .../core/distributed/finalize_model_grads.py  |  12 +-
 megatron/core/model_parallel_config.py        |   6 +-
 .../common/language_module/language_module.py |   7 +-
 .../common/model_chunk_schedule_plan.py       |  44 ++++---
 .../core/models/gpt/fine_grained_callables.py |  36 ++----
 megatron/core/models/gpt/gpt_layer_specs.py   |  28 +++-
 megatron/core/models/gpt/gpt_model.py         |  24 +---
 .../pipeline_parallel/p2p_communication.py    |  18 +--
 .../transformer/multi_token_prediction.py     | 122 +++++++++++++++---
 .../pipeline_parallel_layer_layout.py         |  25 +++-
 .../core/transformer/transformer_block.py     |  31 ++++-
 .../core/transformer/transformer_config.py    |   2 +-
 megatron/training/training.py                 |  18 ++-
 megatron/training/utils.py                    |  19 +--
 pretrain_gpt.py                               |  40 +++++-
 .../pipeline_parallel/test_pipeline_layout.py |  35 +++--
 18 files changed, 342 insertions(+), 155 deletions(-)

diff --git a/gpt_builders.py b/gpt_builders.py
index 591f74bb20c..4fe832028be 100644
--- a/gpt_builders.py
+++ b/gpt_builders.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 from megatron.core.models.gpt import GPTModel
 from megatron.core.models.gpt.gpt_layer_specs import (
@@ -6,6 +6,7 @@
     get_gpt_layer_local_spec,
     get_gpt_layer_with_transformer_engine_spec,
     get_gpt_mtp_block_spec,
+    get_gpt_decoder_layer_specs,
 )
 from megatron.core.models.gpt.heterogeneous.heterogeneous_layer_specs import (
     get_gpt_heterogeneous_layer_spec,
@@ -57,18 +58,19 @@ def gpt_builder(args, pre_process, post_process, vp_stage=None, config=None):
                 transformer_layer_spec = _get_transformer_layer_spec(use_te, config)
         mtp_block_spec = None
         if args.mtp_num_layers is not None:
-            if (
-                hasattr(transformer_layer_spec, 'layer_specs')
-                and len(transformer_layer_spec.layer_specs) == 0
-            ):
-                # Get the decoder layer spec explicitly if no decoder layer in the last stage,
-                # Only happens with block spec (TransformerBlockSubmodules) when using MoE.
-                transformer_layer_spec_for_mtp = _get_transformer_layer_spec(use_te, config)
+            # Get GPT decoder layer specs for the model.
+            if args.spec is not None:
+                mtp_transformer_layer_spec = import_module(args.spec)
             else:
-                transformer_layer_spec_for_mtp = transformer_layer_spec
+                # Define the decoder block spec
+                decoder_layer_specs = get_gpt_decoder_layer_specs(
+                    config, use_transformer_engine=use_te, normalization=args.normalization, qk_l2_norm=args.qk_l2_norm, vp_stage=vp_stage
+                )
+                mtp_transformer_layer_spec = decoder_layer_specs[-1]
+            # Use spec of the last layer in decoder block as spec of the transformer layer in MTP
             mtp_block_spec = get_gpt_mtp_block_spec(
                 config,
-                transformer_layer_spec_for_mtp,
+                mtp_transformer_layer_spec,
                 use_transformer_engine=use_te,
                 vp_stage=vp_stage,
             )
@@ -96,12 +98,12 @@ def gpt_builder(args, pre_process, post_process, vp_stage=None, config=None):
 
 def _get_transformer_layer_spec(use_te, config):
     """Get transformer layer specification based on configuration.
-
+    
     Args:
         use_te (bool): Whether to use Transformer Engine
         args: Training arguments
         config: Model configuration
-
+        
     Returns:
         transformer_layer_spec: The transformer layer specification
     """
diff --git a/megatron/core/datasets/blended_megatron_dataset_builder.py b/megatron/core/datasets/blended_megatron_dataset_builder.py
index b1a000df167..3a663843f9b 100644
--- a/megatron/core/datasets/blended_megatron_dataset_builder.py
+++ b/megatron/core/datasets/blended_megatron_dataset_builder.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 import logging
 import math
@@ -47,11 +47,13 @@ def __init__(
         sizes: List[int],
         is_built_on_rank: Callable,
         config: BlendedMegatronDatasetConfig,
+        vp_stage: Optional[int] = None,
     ):
         self.cls = cls
         self.sizes = sizes
         self.is_built_on_rank = is_built_on_rank
         self.config = config
+        self.vp_stage = vp_stage
 
         log_single_rank(
             logger,
diff --git a/megatron/core/distributed/finalize_model_grads.py b/megatron/core/distributed/finalize_model_grads.py
index 55663acdc10..ddaeb7e8d84 100644
--- a/megatron/core/distributed/finalize_model_grads.py
+++ b/megatron/core/distributed/finalize_model_grads.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 from functools import partial
 from typing import Callable, List, Optional, Union
@@ -193,7 +193,11 @@ def _allreduce_word_embedding_grads(
             pp_group = parallel_state.get_pipeline_model_parallel_group()
 
     _allreduce_embedding_grad(
-        model, embd_group, pp_group, partial(_get_shared_word_embedding_weight, config=config)
+        model,
+        embd_group,
+        pp_group,
+        partial(_get_shared_word_embedding_weight, config=config),
+        config=config,
     )
 
 
@@ -203,6 +207,7 @@ def _allreduce_embedding_grad(
     pp_group: torch.distributed.ProcessGroup,
     weight_getter: Callable[[torch.nn.Module], Optional[torch.nn.Parameter]],
     skip_if_none: bool = True,
+    config: TransformerConfig = None,
 ):
     """Unified helper to all-reduce embedding parameters across pipeline stages.
 
@@ -229,6 +234,9 @@ def _allreduce_embedding_grad(
             model_module = model[0]
         elif is_pp_last_stage(pp_group):
             model_module = model[-1]
+        elif getattr(config, 'mtp_num_layers', None) is not None and config.mtp_num_layers > 0:
+            # Embedding for MTP layers is in the last virtual pipeline model parallel stage.
+            model_module = model[-1]
         else:  # We do not support an interleaved schedule for models with encoders yet.
             model_module = model[0]
 
diff --git a/megatron/core/model_parallel_config.py b/megatron/core/model_parallel_config.py
index 2758c9c0747..0d92e81c5bb 100644
--- a/megatron/core/model_parallel_config.py
+++ b/megatron/core/model_parallel_config.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 from dataclasses import dataclass
 from typing import Callable, ContextManager, Optional
@@ -315,6 +315,10 @@ class ModelParallelConfig:
        rank 1 |   0 1 2 0 1 2 3 4 3 4
     """
 
+    mtp_standalone: bool = False
+    """This will be set automatically according to the pipeline layout, 
+    and will be set to True if MTP is in a separate vpp stage."""
+
     ###################
     # CPU Offloading
     ###################
diff --git a/megatron/core/models/common/language_module/language_module.py b/megatron/core/models/common/language_module/language_module.py
index 8f90fb3ba47..0badbce1bfd 100644
--- a/megatron/core/models/common/language_module/language_module.py
+++ b/megatron/core/models/common/language_module/language_module.py
@@ -62,6 +62,8 @@ def _is_in_embd_group(self):
         if torch.distributed.get_rank() in torch.distributed.get_process_group_ranks(
             self.embd_group
         ):
+            if getattr(self, 'mtp_process', False):
+                return True
             if (
                 torch.distributed.get_rank()
                 == torch.distributed.get_process_group_ranks(self.embd_group)[0]
@@ -201,7 +203,10 @@ def setup_embeddings_and_output_layer(self) -> None:
         ):
             self.shared_embedding_or_output_weight().shared_embedding = True
 
-        if (self.post_process or getattr(self, 'mtp_process', False)) and not self.pre_process:
+        if (
+            (self.post_process and self.share_embeddings_and_output_weights)
+            or getattr(self, 'mtp_process', False)
+        ) and not self.pre_process:
             assert not (
                 is_vp_first_stage(self.vp_stage, self.vp_size) and is_pp_first_stage(self.pp_group)
             )
diff --git a/megatron/core/models/common/model_chunk_schedule_plan.py b/megatron/core/models/common/model_chunk_schedule_plan.py
index 74b9a90764d..401d9a81a97 100644
--- a/megatron/core/models/common/model_chunk_schedule_plan.py
+++ b/megatron/core/models/common/model_chunk_schedule_plan.py
@@ -17,7 +17,6 @@
     get_comm_stream,
     get_comp_stream,
 )
-from megatron.core.transformer.multi_token_prediction import get_mtp_num_layers_to_build
 
 
 class ModelChunkState:
@@ -319,37 +318,40 @@ def __init__(
         self._model_chunk_state.context_mask = None
         self._model_chunk_state.attention_bias = None
 
-        transformer_num_layers = model.decoder.num_layers_per_pipeline_rank
-        mtp_num_layers = get_mtp_num_layers_to_build(model.config, vp_stage=self.vp_stage)
-
         # build preprocess
         self.pre_process = PreProcessNode(model, self._model_chunk_state, self._event, comp_stream)
-        # build layer schedule plan for each layer
-        for layer_idx in range(transformer_num_layers):
-            layer = model.decoder._get_layer(layer_idx)
-            layer_plan = TransformerLayerSchedulePlan(
-                layer, self._event, self._model_chunk_state, comp_stream, comm_stream
+
+        # build layer schedule plan for each layer.
+        # The methods to obtain layers are different for MTP so we need the other build plan for
+        # MTP. Also, this can help annotate MTP layer so that it can know where MTP is.
+        self._build_layer_schedule_plan(model.decoder, comp_stream, comm_stream)
+        self._build_layer_schedule_plan(getattr(model, "mtp", None), comp_stream, comm_stream)
+
+        # build post process
+        if model.post_process:
+            self.post_process = PostProcessNode(
+                model, self._model_chunk_state, self._event, comp_stream
             )
-            self._transformer_layers.append(layer_plan)
 
-        # build mtp layers
-        for layer_idx in range(mtp_num_layers):
+    def _build_layer_schedule_plan(self, module, comp_stream, comm_stream):
+        if module is None:
+            return
+        num_layers = len(module.layers)
+        for layer_idx in range(num_layers):
             extra_args = {
                 "is_first_layer": layer_idx == 0,
-                "is_last_layer": layer_idx == mtp_num_layers - 1,
+                "is_last_layer": layer_idx == num_layers - 1,
             }
-            layer = model.mtp.layers[layer_idx]
             layer_plan = TransformerLayerSchedulePlan(
-                layer, self.event, self.state, comp_stream, comm_stream, extra_args
+                module.layers[layer_idx],
+                self.event,
+                self.state,
+                comp_stream,
+                comm_stream,
+                extra_args,
             )
             self._transformer_layers.append(layer_plan)
 
-        # build post process
-        if model.post_process:
-            self.post_process = PostProcessNode(
-                model, self._model_chunk_state, self._event, comp_stream
-            )
-
     @property
     def event(self):
         """Gets the CUDA event for synchronization."""
diff --git a/megatron/core/models/gpt/fine_grained_callables.py b/megatron/core/models/gpt/fine_grained_callables.py
index f3fd63a5a32..952b83f95fb 100644
--- a/megatron/core/models/gpt/fine_grained_callables.py
+++ b/megatron/core/models/gpt/fine_grained_callables.py
@@ -158,26 +158,19 @@ def forward_impl(self, hidden_states):
         """Implements the forward pass for postprocessing.
 
         This method handles:
-        1. Final layer normalization
-        2. Output layer computation
-        3. Loss computation if labels are provided
+        1. Output layer computation
+        2. Loss computation if labels are provided
 
         Args:
             hidden_states: The hidden states from the transformer layers.
 
         Returns:
             The logits or loss depending on whether labels are provided.
-        """
-        # Final layer norm from Decoder
-        if self.gpt_model.decoder.final_layernorm and not self.gpt_model.mtp_process:
-            hidden_states = self.gpt_model.decoder.final_layernorm(hidden_states)
-            # TENorm produces a "viewed" tensor. This will result in schedule.py's
-            # deallocate_output_tensor() throwing an error, so a viewless tensor is
-            # created to prevent this.
-            hidden_states = make_viewless_tensor(
-                inp=hidden_states, requires_grad=True, keep_graph=True
-            )
 
+        Note:
+            Final layernorm now has been moved from the post-process stage to the
+            last decoder layer, so we don't need to run the final layer norm here.
+        """
         # Run GPTModel._postprocess
         loss = self.gpt_model._postprocess(
             hidden_states=hidden_states,
@@ -251,6 +244,7 @@ def __init__(
         self.submodule = submodule
         self.detached = tuple()
         self.before_detached = tuple()
+        self.is_mtp = extra_args.get("is_mtp", False)
 
         # Create flags to indicate first and last layer
         self.is_first_layer = extra_args.get("is_first_layer", False)
@@ -460,6 +454,12 @@ def submodule_combine_forward(
 
         # release tensor reference after use
         node.layer_state.residual = None
+
+        # final layer norm from decoder
+        final_layernorm = node.chunk_state.model.decoder.final_layernorm
+        if not node.is_mtp and final_layernorm and node.is_last_layer:
+            output = final_layernorm(output)
+            output = make_viewless_tensor(inp=output, requires_grad=True, keep_graph=True)
         return output
 
     def mlp_wrapper(node: ScheduleNode, *args, **kwargs):
@@ -499,15 +499,7 @@ def build_mtp_layer_callables(layer):
     def submodule_mtp_attn_forward(node, hidden_states):
         # MTP Block Preprocess
         if node.is_first_layer:
-            # Final layer norm from Decoder
-            final_layernorm = node.chunk_state.model.decoder.final_layernorm
-            if final_layernorm:
-                hidden_states = final_layernorm(hidden_states)
-                hidden_states = make_viewless_tensor(
-                    inp=hidden_states, requires_grad=True, keep_graph=True
-                )
-                hidden_states = node.detach(hidden_states)
-            offset = get_mtp_layer_offset(layer.config)
+            offset = get_mtp_layer_offset(layer.config, node.chunk_state.model.vp_stage)
             node.chunk_state.mtp_hidden_states = list(torch.chunk(hidden_states, 1 + offset, dim=0))
             hidden_states = node.chunk_state.mtp_hidden_states[offset]
 
diff --git a/megatron/core/models/gpt/gpt_layer_specs.py b/megatron/core/models/gpt/gpt_layer_specs.py
index e3ef7f20141..196c21ebe44 100755
--- a/megatron/core/models/gpt/gpt_layer_specs.py
+++ b/megatron/core/models/gpt/gpt_layer_specs.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 import warnings
 from typing import Optional, Union
@@ -477,7 +477,7 @@ def get_mlp_module_spec_for_backend(
         )
 
 
-def get_gpt_decoder_block_spec(
+def get_gpt_decoder_layer_specs(
     config: TransformerConfig,
     use_transformer_engine: bool,
     normalization: Optional[str] = None,
@@ -600,6 +600,21 @@ def get_gpt_decoder_block_spec(
             raise ValueError(f"Invalid layer spec key: {layer_spec_key}")
         layer_specs.append(layer_spec_dict[layer_spec_key])
 
+    return layer_specs
+
+
+def get_gpt_decoder_block_spec(
+    config: TransformerConfig,
+    use_transformer_engine: bool,
+    normalization: Optional[str] = None,
+    qk_l2_norm: Optional[bool] = False,
+    vp_stage: Optional[int] = None,
+    pp_rank: Optional[int] = None,
+) -> TransformerBlockSubmodules:
+    """GPT block spec."""
+    layer_specs = get_gpt_decoder_layer_specs(
+        config, use_transformer_engine, normalization, qk_l2_norm
+    )
     # Slice the layer specs to only include the layers that are built in this pipeline stage.
     # Note: MCore layer_number starts at 1
     num_layers_to_build = get_num_layers_to_build(config, vp_stage=vp_stage, pp_rank=pp_rank)
@@ -615,6 +630,10 @@ def get_gpt_decoder_block_spec(
         offset = get_transformer_layer_offset(config, vp_stage=vp_stage, pp_rank=pp_rank)
         local_layer_specs = layer_specs[offset : offset + num_layers_to_build]
 
+    if use_transformer_engine:
+        layer_norm_impl = TENorm
+    else:
+        layer_norm_impl = LNImpl
     # Block spec.
     block_spec = TransformerBlockSubmodules(
         layer_specs=local_layer_specs, layer_norm=layer_norm_impl
@@ -674,13 +693,10 @@ def get_gpt_mtp_block_spec_for_backend(
     mtp_num_layers = config.mtp_num_layers if config.mtp_num_layers else 0
     mtp_layer_specs = [mtp_layer_spec] * mtp_num_layers
 
-    offset = get_mtp_layer_offset(config)
+    offset = get_mtp_layer_offset(config, vp_stage=vp_stage)
     # split the mtp layer specs to only include the layers that are built in this pipeline stage.
     mtp_layer_specs = mtp_layer_specs[offset : offset + num_layers_to_build]
     if len(mtp_layer_specs) > 0:
-        assert (
-            len(mtp_layer_specs) == config.mtp_num_layers
-        ), +f"currently all of the mtp layers must stage in the same pipeline stage."
         mtp_block_spec = MultiTokenPredictionBlockSubmodules(layer_specs=mtp_layer_specs)
     else:
         mtp_block_spec = None
diff --git a/megatron/core/models/gpt/gpt_model.py b/megatron/core/models/gpt/gpt_model.py
index 98294d84630..cd13a598d9e 100644
--- a/megatron/core/models/gpt/gpt_model.py
+++ b/megatron/core/models/gpt/gpt_model.py
@@ -30,7 +30,6 @@
     MTPLossLoggingHelper,
     MultiTokenPredictionBlock,
     roll_tensor,
-    tie_output_layer_state_dict,
     tie_word_embeddings_state_dict,
 )
 from megatron.core.transformer.spec_utils import ModuleSpec
@@ -250,7 +249,7 @@ def __init__(
                 tp_group=self.pg_collection.tp,
             )
 
-        if self.pre_process or self.post_process:
+        if self.pre_process or self.post_process or self.mtp_process:
             self.setup_embeddings_and_output_layer()
 
         if has_config_logger_enabled(self.config):
@@ -543,7 +542,6 @@ def _postprocess(
         output_weight = None
         if self.share_embeddings_and_output_weights:
             output_weight = self.shared_embedding_or_output_weight()
-
         if mtp_in_postprocess:
             hidden_states = self.mtp(
                 input_ids=input_ids,
@@ -563,7 +561,7 @@ def _postprocess(
         if not self.post_process:
             return hidden_states
 
-        if self.mtp_process:
+        if self.config.mtp_num_layers is not None:
             mtp_labels = labels.clone()
             hidden_states_list = torch.chunk(hidden_states, 1 + self.config.mtp_num_layers, dim=0)
             hidden_states = hidden_states_list[0]
@@ -605,6 +603,7 @@ def _postprocess(
                         hidden_states, mtp_loss_scale * mtp_loss / num_tokens
                     )
         sequence_parallel_override = False
+
         if in_inference_mode and inference_context.materialize_only_last_token_logits:
             if inference_context.is_static_batching():
                 hidden_states = hidden_states[-1:, :, :]
@@ -769,27 +768,14 @@ def sharded_state_dict(
             output_extra_state and output_extra_state.data
         ), f'Expected output layer extra state to be empty, got: {output_extra_state}'
 
-        # Multi-Token Prediction (MTP) need both embedding layer and output layer in
-        # mtp process stage.
+        # Multi-Token Prediction (MTP) need embedding layer in mtp process stage.
         # If MTP is not placed in the pre processing stage, we need to maintain a copy of
         # embedding layer in the mtp process stage and tie it to the embedding in the pre
         # processing stage.
-        # Also, if MTP is not placed in the post processing stage, we need to maintain a copy
-        # of output layer in the mtp process stage and tie it to the output layer in the post
-        # processing stage.
+        # Now MTP loss is computed in post processing stage, so the output_layer is not needed.
         if self.mtp_process and not self.pre_process:
             emb_weight_key = f'{prefix}embedding.word_embeddings.weight'
             emb_weight = self.embedding.word_embeddings.weight
             tie_word_embeddings_state_dict(sharded_state_dict, emb_weight, emb_weight_key)
-        if self.mtp_process and not self.post_process:
-            # We only need to tie the output layer weight if share_embeddings_and_output_weights
-            # is False. Because if share_embeddings_and_output_weights is True, the shared weight
-            # will be stored in embedding layer, and output layer will not have any weight.
-            if not self.share_embeddings_and_output_weights:
-                output_layer_weight_key = f'{prefix}output_layer.weight'
-                output_layer_weight = self.output_layer.weight
-                tie_output_layer_state_dict(
-                    sharded_state_dict, output_layer_weight, output_layer_weight_key
-                )
 
         return sharded_state_dict
diff --git a/megatron/core/pipeline_parallel/p2p_communication.py b/megatron/core/pipeline_parallel/p2p_communication.py
index 63ee9d1f537..ac839c21f18 100644
--- a/megatron/core/pipeline_parallel/p2p_communication.py
+++ b/megatron/core/pipeline_parallel/p2p_communication.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 
 from typing import List, Optional, Tuple, Union
@@ -214,22 +214,22 @@ def _communicate_shapes(self, tensor_send_next, tensor_send_prev, recv_prev, rec
             ops = []
             if send_prev_shape_tensor is not None:
                 send_prev_op = torch.distributed.P2POp(
-                    torch.distributed.isend, send_prev_shape_tensor, self.prev_rank
+                    torch.distributed.isend, send_prev_shape_tensor, self.prev_rank, self.pp_group
                 )
                 ops.append(send_prev_op)
             if recv_prev_shape_tensor is not None:
                 recv_prev_op = torch.distributed.P2POp(
-                    torch.distributed.irecv, recv_prev_shape_tensor, self.prev_rank
+                    torch.distributed.irecv, recv_prev_shape_tensor, self.prev_rank, self.pp_group
                 )
                 ops.append(recv_prev_op)
             if send_next_shape_tensor is not None:
                 send_next_op = torch.distributed.P2POp(
-                    torch.distributed.isend, send_next_shape_tensor, self.next_rank
+                    torch.distributed.isend, send_next_shape_tensor, self.next_rank, self.pp_group
                 )
                 ops.append(send_next_op)
             if recv_next_shape_tensor is not None:
                 recv_next_op = torch.distributed.P2POp(
-                    torch.distributed.irecv, recv_next_shape_tensor, self.next_rank
+                    torch.distributed.irecv, recv_next_shape_tensor, self.next_rank, self.pp_group
                 )
                 ops.append(recv_next_op)
             if len(ops) > 0:
@@ -298,13 +298,13 @@ def _communicate(
         tensor_recv_prev_func = None
         tensor_recv_next_func = None
 
-        if not config.variable_seq_lengths:
-            recv_prev_shape = tensor_shape
-            recv_next_shape = tensor_shape
-        else:
+        if config.variable_seq_lengths or config.mtp_standalone:
             recv_prev_shape, recv_next_shape = self._communicate_shapes(
                 tensor_send_next, tensor_send_prev, recv_prev, recv_next
             )
+        else:
+            recv_prev_shape = tensor_shape
+            recv_next_shape = tensor_shape
 
         def create_tensor_recv_prev():
             return torch.empty(
diff --git a/megatron/core/transformer/multi_token_prediction.py b/megatron/core/transformer/multi_token_prediction.py
index a619b9ffa55..945682741d4 100755
--- a/megatron/core/transformer/multi_token_prediction.py
+++ b/megatron/core/transformer/multi_token_prediction.py
@@ -13,20 +13,17 @@
 from megatron.core.fp8_utils import get_fp8_context
 from megatron.core.models.backends import BackendSpecProvider, LocalSpecProvider
 from megatron.core.packed_seq_params import PackedSeqParams
-from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
-    fine_grained_offloading_set_last_layer,
-)
-from megatron.core.pipeline_parallel.utils import is_vp_last_stage
 from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.tensor_parallel import (
     gather_from_tensor_model_parallel_region,
     scatter_to_sequence_parallel_region,
 )
-from megatron.core.transformer.enums import AttnMaskType
+from megatron.core.transformer.enums import AttnMaskType, LayerType
 from megatron.core.transformer.module import MegatronModule
 from megatron.core.transformer.spec_utils import ModuleSpec, build_module
 from megatron.core.transformer.transformer_block import TransformerBlockSubmodules
 from megatron.core.transformer.transformer_config import TransformerConfig
+from megatron.core.transformer.transformer_layer import get_transformer_layer_offset
 from megatron.core.utils import (
     is_torch_min_version,
     make_tp_sharded_tensor_for_checkpoint,
@@ -335,25 +332,103 @@ def get_mtp_layer_spec_for_backend(
     return mtp_layer_spec
 
 
-def get_mtp_layer_offset(config: TransformerConfig) -> int:
+def mtp_on_this_rank(
+    config: TransformerConfig, ignore_virtual: Optional[bool] = True, vp_stage: Optional[int] = None
+) -> bool:
+    """
+    Check if there is MTP on the current rank.
+
+    Behavior:
+        - If a custom pipeline model parallel layout is provided in the config:
+            - If virtual pipeline parallelism is enabled (and `ignore_virtual` is False), checks
+              whether any MTP layers are present on this (pp_rank, vp_stage) pair.
+            - Otherwise, checks all virtual pipeline ranks of the current pipeline rank. Returns
+              True if any virtual sub-rank includes at least one MTP layer.
+        - If no custom layout is provided, assumes all MTP layers (if any) are placed on the last
+          pipeline stage. The function returns True only on the last pipeline stage.
+    """
+    mtp_on_this_rank = False
+    pp_rank = parallel_state.get_pipeline_model_parallel_rank()
+    if config.pipeline_model_parallel_layout is not None:
+        # with custom PP layout, we support put MTP layers on any pipeline stage
+        layout = config.pipeline_model_parallel_layout.layout
+        if (
+            not ignore_virtual
+            and parallel_state.get_virtual_pipeline_model_parallel_world_size() is not None
+        ):
+            assert vp_stage is not None, "vp_stage must be passed if virtual pipeline is enabled"
+            num_layers_to_build = layout[pp_rank][vp_stage].count(LayerType.mtp)
+            mtp_on_this_rank = num_layers_to_build > 0
+        else:
+            for vpp_rank in range(len(layout[pp_rank])):
+                num_layers_to_build = layout[pp_rank][vpp_rank].count(LayerType.mtp)
+                if num_layers_to_build > 0:
+                    mtp_on_this_rank = True
+                    break
+    else:
+        # without custom PP layout, we only support put all of MTP layers on the last pipeline stage
+        if config.mtp_num_layers is not None:
+            mtp_on_this_rank = parallel_state.is_pipeline_last_stage(
+                ignore_virtual=ignore_virtual, vp_stage=vp_stage
+            )
+        else:
+            mtp_on_this_rank = False
+    return mtp_on_this_rank
+
+
+def get_mtp_ranks(pp_ranks: List[int], config: TransformerConfig) -> List[int]:
+    """Get the ranks of the MTP layers."""
+    mtp_ranks = set()
+    if config.mtp_num_layers is None:
+        return []
+    if config.pipeline_model_parallel_layout is None:
+        return [pp_ranks[-1]]
+    layout = config.pipeline_model_parallel_layout.layout
+    for pp_rank in range(len(layout)):
+        for vpp_rank in range(len(layout[pp_rank])):
+            num_layers_to_build = layout[pp_rank][vpp_rank].count(LayerType.mtp)
+            if num_layers_to_build:
+                mtp_ranks.add(pp_ranks[pp_rank])
+    return list(mtp_ranks)
+
+
+def get_mtp_layer_offset(config: TransformerConfig, vp_stage: Optional[int] = None) -> int:
     """Get the offset of the MTP layer."""
-    # Currently, we only support put all of MTP layers on the last pipeline stage.
-    return 0
+    # TODO(shifangx): Currently, we only support put all of MTP layers
+    # on the last pipeline stage, so the offset is always 0.
+    # We will support more flexible MTP placement in the future.
+    if config.pipeline_model_parallel_size > 1:
+        if config.pipeline_model_parallel_layout:
+            offset = config.pipeline_model_parallel_layout.get_layer_offset(
+                layer_type=LayerType.mtp, vp_stage=vp_stage
+            )
+        else:
+            offset = 0
+    else:
+        offset = 0
+    return offset
 
 
 def get_mtp_num_layers_to_build(
     config: TransformerConfig, vp_stage: Optional[int] = None, pp_rank: Optional[int] = None
 ) -> int:
     """Get the number of MTP layers to build."""
-    # Currently, we only support put all of MTP layers on the last pipeline stage.
-    vp_size = config.virtual_pipeline_model_parallel_size
-    if pp_rank is None:
-        pp_rank = parallel_state.get_pipeline_model_parallel_rank()
-    is_last_pp_stage = pp_rank == config.pipeline_model_parallel_size - 1
-    if is_vp_last_stage(vp_stage=vp_stage, vp_size=vp_size) and is_last_pp_stage:
-        return config.mtp_num_layers if config.mtp_num_layers else 0
+    if config.pipeline_model_parallel_layout is not None:
+        # If we have a custom PP layout, get the number of mtp layers in the layout array.
+        num_layers_to_build = config.pipeline_model_parallel_layout.get_num_layers_to_build(
+            layer_type=LayerType.mtp, vp_stage=vp_stage
+        )
+        assert num_layers_to_build == config.mtp_num_layers or num_layers_to_build == 0, (
+            f"Currently, we only support put all of MTP layers on the last pipeline stage, "
+            f"so the number of MTP layers to build ({num_layers_to_build}) must match "
+            f"mtp_num_layers ({config.mtp_num_layers}) or be 0."
+        )
     else:
-        return 0
+        if parallel_state.is_pipeline_last_stage(ignore_virtual=False, vp_stage=vp_stage):
+            num_layers_to_build = config.mtp_num_layers if config.mtp_num_layers else 0
+        else:
+            num_layers_to_build = 0
+    return num_layers_to_build
 
 
 class MTPLossAutoScaler(torch.autograd.Function):
@@ -433,7 +508,7 @@ def __init__(
         super().__init__(config=config)
         self.sequence_parallel = config.sequence_parallel
         self.submodules = submodules
-        self.layer_number = layer_number
+        self.layer_number = layer_number + get_mtp_layer_offset(self.config, vp_stage)
         self.vp_stage = vp_stage
         self.cp_group = pg_collection.cp
 
@@ -475,8 +550,15 @@ def __init__(
             skip_bias_add=False,
             is_expert=False,
         )
+
+        diff_transformer_layer_offset = self.config.num_layers - get_transformer_layer_offset(
+            self.config, vp_stage
+        )
         self.transformer_layer = build_module(
-            self.submodules.transformer_layer, config=self.config, vp_stage=vp_stage
+            self.submodules.transformer_layer,
+            config=self.config,
+            vp_stage=vp_stage,
+            layer_number=self.layer_number + diff_transformer_layer_offset,
         )
 
         self.final_layernorm = build_module(
@@ -900,7 +982,7 @@ def forward(
             (Tensor): The mtp loss tensor of shape [b, s].
         """
         # get hidden states from previous mtp stages
-        offset = get_mtp_layer_offset(self.config)
+        offset = get_mtp_layer_offset(self.config, self.vp_stage)
         hidden_states_list = list(torch.chunk(hidden_states, 1 + offset, dim=0))
         hidden_states = hidden_states_list[offset]
         for layer_number in range(len(self.layers)):
@@ -947,7 +1029,7 @@ def sharded_state_dict(
         sharded_state_dict = super().sharded_state_dict(prefix, sharded_offsets, metadata)
         layer_prefix = f'{prefix}layers.'
         for layer in self.layers:
-            offset = get_mtp_layer_offset(self.config)
+            offset = get_mtp_layer_offset(self.config, self.vp_stage)
             sharded_prefix = f'{layer_prefix}{layer.layer_number - 1 }.'
 
             state_dict_prefix = f'{layer_prefix}{layer.layer_number - 1 - offset}.'
diff --git a/megatron/core/transformer/pipeline_parallel_layer_layout.py b/megatron/core/transformer/pipeline_parallel_layer_layout.py
index 835d8b5c33a..ac850a208aa 100644
--- a/megatron/core/transformer/pipeline_parallel_layer_layout.py
+++ b/megatron/core/transformer/pipeline_parallel_layer_layout.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 import copy
 import logging
@@ -124,15 +124,28 @@ def validate_layer_layout(self, num_layers: int, mtp_num_layers: int):
             if LayerType.mtp in self.layout[pp_rank][-1]:
                 assert (
                     self.layout[pp_rank][-1].count(LayerType.mtp) == mtp_num_layers
-                ), "All of the MTP layers must be in the same stage"
-                assert (
-                    pp_rank == self.pipeline_model_parallel_size - 1
-                    and LayerType.loss in self.layout[pp_rank][-1]
-                ), "MTP layers must be in the last stage together with Loss stage."
+                ), "All of the MTP layers must be in the same one virtual pipeline stage"
+        for vpp_rank in range(self.virtual_pipeline_model_parallel_size - 1):
+            assert LayerType.mtp not in self.layout[0][vpp_rank], (
+                f"Corrently we restrict that the MTP should not be in the first pp rank."
+                f"But got {self.layout[0]} for the first pp rank."
+            )
+        ## Detect MTP standalone usage.
+        mtp_standalone = False
+        for pp_rank in range(self.pipeline_model_parallel_size):
+            if (
+                LayerType.mtp in self.layout[pp_rank][-1]
+                and pp_rank != self.pipeline_model_parallel_size - 1
+            ):
+                mtp_standalone = True
+                break
+
         # TODO: remove them in the future once they are supported
         if self.flatten_layout.count(LayerType.encoder) > 0:
             raise NotImplementedError("Encoder layer is not supported for flexible pipeline layout")
 
+        return mtp_standalone
+
     def get_num_layers_to_build(
         self,
         layer_type: LayerType = LayerType.decoder,
diff --git a/megatron/core/transformer/transformer_block.py b/megatron/core/transformer/transformer_block.py
index aa0b94a7c33..f189d5fa7a4 100755
--- a/megatron/core/transformer/transformer_block.py
+++ b/megatron/core/transformer/transformer_block.py
@@ -376,7 +376,7 @@ def build_layer(layer_spec, layer_number):
         # @TODO: add back account_for_embedding_in_pipeline_split (see issue #293)
         # In pipeline parallelism, we want to add this LN only to the last stage of the pipeline
         # self.post_process and self.post_layer_norm guide this behavior
-        if self.submodules.layer_norm and self.post_process and self.post_layer_norm:
+        if self.has_final_layernorm_in_this_stage():
             self.final_layernorm = build_module(
                 self.submodules.layer_norm,
                 config=self.config,
@@ -386,6 +386,35 @@ def build_layer(layer_spec, layer_number):
         else:
             self.final_layernorm = None  # Either this or nn.Identity
 
+    def has_final_layernorm_in_this_stage(self):
+        """
+        Check if this vpp stage contains the final layernorm.
+
+        Note:
+            Final layernorm now has been moved from the post-process stage to the last decoder
+            layer by using this function.
+            There will be a small numeric difference because of grad norm reduction when final
+            layernorm is placed in different pipeline stages in deterministic mode. It can still
+            be bitwise aligned by disabling grad norm clipping.
+        """
+        if self.config.mtp_num_layers is None:
+            # for model without MTPLayer, the final layernorm is set in the stage which does
+            # post_process
+            return self.submodules.layer_norm and self.post_process and self.post_layer_norm
+        else:
+            # for model with MTPLayer, the final layernorm is set in the stage which has the
+            # last layer of the decoder
+            has_final_layernorm_in_this_stage = False
+            for layer in self.layers:
+                if layer.layer_number == self.config.num_layers:
+                    has_final_layernorm_in_this_stage = True
+                    break
+            return (
+                self.submodules.layer_norm
+                and has_final_layernorm_in_this_stage
+                and self.post_layer_norm
+            )
+
     def _get_layer(self, layer_number: int):
         return self.layers[layer_number]
 
diff --git a/megatron/core/transformer/transformer_config.py b/megatron/core/transformer/transformer_config.py
index 722329b7ee2..6b8209ef6a7 100644
--- a/megatron/core/transformer/transformer_config.py
+++ b/megatron/core/transformer/transformer_config.py
@@ -1226,7 +1226,7 @@ def __post_init__(self):
                 self.virtual_pipeline_model_parallel_size = detected_vpp_size
 
             # Check whether the layout is valid.
-            self.pipeline_model_parallel_layout.validate_layer_layout(
+            self.mtp_standalone = self.pipeline_model_parallel_layout.validate_layer_layout(
                 num_layers=self.num_layers, mtp_num_layers=self.mtp_num_layers
             )
 
diff --git a/megatron/training/training.py b/megatron/training/training.py
index 27a64f10fee..d9759b3a364 100644
--- a/megatron/training/training.py
+++ b/megatron/training/training.py
@@ -2890,18 +2890,20 @@ def get_train_valid_test_num_samples():
     return (train_samples, eval_samples, test_iters * args.global_batch_size)
 
 
-def build_train_valid_test_datasets(build_train_valid_test_datasets_provider, train_valid_test_num_samples=None):
+def build_train_valid_test_datasets(build_train_valid_test_datasets_provider, train_valid_test_num_samples=None, vp_stage=None):
     """Build pretraining datasets."""
     if train_valid_test_num_samples is None:
         train_valid_test_num_samples = get_train_valid_test_num_samples()
-    print_rank_0(' > datasets target sizes (minimum size):')
     print_rank_0('    train:      {}'.format(train_valid_test_num_samples[0]))
     print_rank_0('    validation: {}'.format(train_valid_test_num_samples[1]))
     print_rank_0('    test:       {}'.format(train_valid_test_num_samples[2]))
-    return build_train_valid_test_datasets_provider(train_valid_test_num_samples)
+    if vp_stage is not None:
+        return build_train_valid_test_datasets_provider(train_valid_test_num_samples, vp_stage=vp_stage)
+    else:
+        return build_train_valid_test_datasets_provider(train_valid_test_num_samples)
 
 
-def build_train_valid_test_data_loaders(build_train_valid_test_datasets_provider):
+def build_train_valid_test_data_loaders(build_train_valid_test_datasets_provider, vp_stage=None):
     """Build pretraining data loaders."""
 
     args = get_args()
@@ -2930,7 +2932,8 @@ def build_train_valid_test_data_loaders(build_train_valid_test_datasets_provider
 
         # Build datasets.
         train_ds, valid_ds, test_ds = build_train_valid_test_datasets(
-            build_train_valid_test_datasets_provider, (1, 1, 1) if getattr(args, 'perform_rl_step', False) else None
+            build_train_valid_test_datasets_provider, (1, 1, 1) if getattr(args, 'perform_rl_step', False) else None,
+            vp_stage=vp_stage,
         )
         valid_ds = [valid_ds] if not isinstance(valid_ds, list) else valid_ds
 
@@ -2972,14 +2975,15 @@ def build_train_valid_test_data_loaders(build_train_valid_test_datasets_provider
     return train_dataloader, valid_dataloaders, test_dataloader
 
 
-def build_train_valid_test_data_iterators(build_train_valid_test_datasets_provider):
+def build_train_valid_test_data_iterators(build_train_valid_test_datasets_provider, vp_stage=None):
     """Build pretraining data iterators."""
 
     args = get_args()
 
     # Build loaders.
     train_dataloader, valid_dataloaders, test_dataloader = build_train_valid_test_data_loaders(
-        build_train_valid_test_datasets_provider
+        build_train_valid_test_datasets_provider,
+        vp_stage=vp_stage
     )
 
     # Build iterators.
diff --git a/megatron/training/utils.py b/megatron/training/utils.py
index ee46991bce5..3be7b6c8914 100644
--- a/megatron/training/utils.py
+++ b/megatron/training/utils.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 """General utilities."""
 import json
@@ -504,7 +504,7 @@ def get_blend_and_blend_per_split(args):
     return blend, blend_per_split
 
 
-def get_batch_on_this_tp_rank(data_iterator):
+def get_batch_on_this_tp_rank(data_iterator, mtp_on_this_rank: bool = False):
 
     args = get_args()
 
@@ -532,7 +532,7 @@ def _broadcast(item):
             'position_ids': data["position_ids"].cuda(non_blocking=True),
         }
 
-        if args.pipeline_model_parallel_size == 1:
+        if args.pipeline_model_parallel_size == 1 or mtp_on_this_rank:
             _broadcast(batch['tokens'])
             _broadcast(batch['labels'])
             _broadcast(batch['loss_mask'])
@@ -548,9 +548,6 @@ def _broadcast(item):
             # Multi-Token Prediction (MTP) layers need tokens and position_ids to calculate embedding.
             # Currently the Multi-Token Prediction (MTP) layers is fixed on the last stage, so we need
             # to broadcast tokens and position_ids to all of the tensor parallel ranks on the last stage.
-            if args.mtp_num_layers is not None:
-                _broadcast(batch['tokens'])
-                _broadcast(batch['position_ids'])
             _broadcast(batch['labels'])
             _broadcast(batch['loss_mask'])
             _broadcast(batch['attention_mask'])
@@ -586,7 +583,7 @@ def _broadcast(item):
             device=torch.cuda.current_device(),
         )
 
-        if args.pipeline_model_parallel_size == 1:
+        if args.pipeline_model_parallel_size == 1 or mtp_on_this_rank:
             _broadcast(tokens)
             _broadcast(labels)
             _broadcast(loss_mask)
@@ -605,12 +602,8 @@ def _broadcast(item):
             # Multi-Token Prediction (MTP) layers need tokens and position_ids to calculate embedding.
             # Currently the Multi-Token Prediction (MTP) layers is fixed on the last stage, so we need
             # to broadcast tokens and position_ids to all of the tensor parallel ranks on the last stage.
-            if args.mtp_num_layers is not None:
-                _broadcast(tokens)
-                _broadcast(position_ids)
-            else:
-                tokens = None
-                position_ids = None
+            tokens = None
+            position_ids = None
 
             _broadcast(labels)
             _broadcast(loss_mask)
diff --git a/pretrain_gpt.py b/pretrain_gpt.py
index 6316aef03bf..f57dd0c08fa 100644
--- a/pretrain_gpt.py
+++ b/pretrain_gpt.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 """Pretrain and SFT GPT."""
 
@@ -15,7 +15,9 @@
 from megatron.core.rerun_state_machine import get_rerun_state_machine
 from megatron.core.utils import get_attr_wrapped_model, StragglerDetector
 from megatron.core.tokenizers.text.utils.build_tokenizer import build_tokenizer
+from megatron.core.transformer.multi_token_prediction import mtp_on_this_rank, get_mtp_ranks
 from megatron.training import get_args, get_timers, get_tokenizer, pretrain, print_rank_0
+from megatron.training.arguments import core_transformer_config_from_args
 from megatron.training.utils import (
     get_batch_on_this_cp_rank,
     get_batch_on_this_tp_rank,
@@ -37,14 +39,20 @@
 stimer = StragglerDetector()
 
 
-def get_batch(data_iterator, vp_stage=None):
+def get_batch(data_iterator, vp_stage: Optional[int] = None):
     """Generate a batch."""
+    args = get_args()
+    config = core_transformer_config_from_args(args)
     # TODO: this is pretty hacky, find a better way
-    if not is_first_or_last_pipeline_stage(vp_stage):
+    if not is_first_or_last_pipeline_stage(vp_stage) and (
+    (not mtp_on_this_rank(config, ignore_virtual=False, vp_stage=vp_stage))):
         return None, None, None, None, None
 
     # get batches based on the TP rank you are on
-    batch = get_batch_on_this_tp_rank(data_iterator)
+    batch = get_batch_on_this_tp_rank(
+        data_iterator,
+        mtp_on_this_rank=mtp_on_this_rank(config, ignore_virtual=False, vp_stage=vp_stage)
+        )
 
     # slice batch along sequence dimension for context parallelism
     batch = get_batch_on_this_cp_rank(batch)
@@ -158,7 +166,12 @@ def forward_step(data_iterator, model: GPTModel, return_schedule_plan: bool = Fa
 
 
 def is_dataset_built_on_rank(vp_stage=None):
-    return is_first_or_last_pipeline_stage(vp_stage) and parallel_state.get_tensor_model_parallel_rank() == 0
+    args = get_args()
+    config = core_transformer_config_from_args(args)
+    return (
+        is_first_or_last_pipeline_stage(vp_stage)
+        or mtp_on_this_rank(config, ignore_virtual=False, vp_stage=vp_stage)
+    ) and parallel_state.get_tensor_model_parallel_rank() == 0
 
 
 def core_gpt_dataset_config_from_args(args):
@@ -214,6 +227,7 @@ def train_valid_test_datasets_provider(train_val_test_num_samples, vp_stage=None
 
     print_rank_0("> building train, validation, and test datasets for GPT ...")
 
+    is_dataset_built = partial(is_dataset_built_on_rank, vp_stage=vp_stage)
     train_ds, valid_ds, test_ds = BlendedMegatronDatasetBuilder(
         dataset_type, train_val_test_num_samples, partial(is_dataset_built_on_rank, vp_stage=vp_stage), config
     ).build()
@@ -223,6 +237,21 @@ def train_valid_test_datasets_provider(train_val_test_num_samples, vp_stage=None
     return train_ds, valid_ds, test_ds
 
 
+def get_embedding_ranks(pp_ranks: List[int]):
+    """Get the embedding ranks."""
+    embedding_ranks = [pp_ranks[0]]
+    if len(pp_ranks) > 1:
+        args = get_args()
+        if not args.untie_embeddings_and_output_weights:
+            embedding_ranks.append(pp_ranks[-1])
+        config = core_transformer_config_from_args(args)
+        mtp_ranks = get_mtp_ranks(pp_ranks, config)
+        embedding_ranks.extend(mtp_ranks)
+    embedding_ranks = list(set(embedding_ranks))
+    embedding_ranks = sorted(embedding_ranks)
+    return embedding_ranks
+
+
 if __name__ == "__main__":
 
     # Temporary for transition to core datasets
@@ -239,4 +268,5 @@ def train_valid_test_datasets_provider(train_val_test_num_samples, vp_stage=None
         args_defaults={'tokenizer_type': 'GPT2BPETokenizer'},
         extra_args_provider=add_modelopt_args if has_nvidia_modelopt else None,
         store=store,
+        get_embedding_ranks=get_embedding_ranks,
     )
diff --git a/tests/unit_tests/pipeline_parallel/test_pipeline_layout.py b/tests/unit_tests/pipeline_parallel/test_pipeline_layout.py
index 04880fb432c..9b1261312b2 100644
--- a/tests/unit_tests/pipeline_parallel/test_pipeline_layout.py
+++ b/tests/unit_tests/pipeline_parallel/test_pipeline_layout.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 import os
 from pathlib import Path
@@ -21,6 +21,7 @@
 )
 from megatron.core.tensor_parallel.random import model_parallel_cuda_manual_seed
 from megatron.core.transformer.enums import ModelType
+from megatron.core.transformer.multi_token_prediction import mtp_on_this_rank
 from megatron.core.transformer.transformer_config import TransformerConfig
 from megatron.training.checkpointing import load_checkpoint, save_checkpoint
 from megatron.training.global_vars import set_args
@@ -53,6 +54,8 @@ def initialize_gpt_model(
         virtual_pipeline_model_parallel_size=virtual_pipeline_model_parallel_size,
         hidden_dropout=0.0,
         attention_dropout=0.0,
+        mtp_num_layers=1 if with_mtp else None,
+        mtp_loss_scaling_factor=1.0 if with_mtp else None,
     )
     default_config_kwargs.update(**config_kwargs)
     transformer_config = TransformerConfig(**default_config_kwargs)
@@ -61,9 +64,6 @@ def initialize_gpt_model(
         transformer_config.moe_ffn_hidden_size = 128
         transformer_config.num_moe_experts = 4
         transformer_config.add_bias_linear = False
-    if with_mtp:
-        transformer_config.mtp_num_layers = 1
-        transformer_config.mtp_loss_scaling_factor = 1.0
     model = []
     for i in range(virtual_pipeline_model_parallel_size or 1):
         if is_moe:
@@ -71,8 +71,11 @@ def initialize_gpt_model(
         else:
             layer_spec = layer_spec_fn()
 
-        if is_moe and with_mtp and mpu.is_pipeline_last_stage(ignore_virtual=False, vp_stage=i):
-            transformer_layer_spec_for_mtp = gpt_te_spec(transformer_config)
+        if with_mtp and mtp_on_this_rank(transformer_config, ignore_virtual=False, vp_stage=i):
+            if is_moe:
+                transformer_layer_spec_for_mtp = gpt_te_spec(transformer_config)
+            else:
+                transformer_layer_spec_for_mtp = layer_spec
             mtp_block_spec = get_gpt_mtp_block_spec(
                 transformer_config,
                 transformer_layer_spec_for_mtp,
@@ -81,6 +84,10 @@ def initialize_gpt_model(
             )
         else:
             mtp_block_spec = None
+
+        # print("========================")
+        # print("[DEBUG] mtp_block_spec is ", mtp_block_spec)
+        # exit()
         pre_process = mpu.is_pipeline_first_stage(ignore_virtual=False, vp_stage=i)
         post_process = mpu.is_pipeline_last_stage(ignore_virtual=False, vp_stage=i)
         this_model = (
@@ -163,7 +170,7 @@ def create_args():
                 [],
                 ["decoder"],
                 ["decoder"],
-                ["decoder"] * 2 + ["loss"],
+                ["decoder"] * 2 + ["mtp"] + ["loss"],
             ],
             False,
             True,
@@ -185,7 +192,19 @@ def create_args():
             False,
         ),
         ((1, 2, None), [["embedding"] + ["decoder"] * 4, ["decoder"] * 4 + ["loss"]], True, False),
-        ((1, 4, 2), "E|t*3|(t|)*5L", True, True),
+        ((1, 4, 2), "E|t*3|(t|)*5mL", True, True),  # mtp in the last stage
+        (
+            (1, 4, 2),
+            "E|t*3|(t|)*4tm|L",
+            True,
+            True,
+        ),  # mtp in the second last stage with a decoder layer
+        (
+            (1, 4, 2),
+            "E|t*3|(t|)*3tt|m|L",
+            True,
+            True,
+        ),  # mtp in the second last stage with no other layers
     ],
 )
 def test_forward_vpp(create_args, tmp_path_dist_ckpt, tp_pp_vpp, pp_layout, is_moe, with_mtp):

From 1d9edcf8c7c16c66ca6acc3801265b21588116a3 Mon Sep 17 00:00:00 2001
From: Xin Yao <xiny@nvidia.com>
Date: Mon, 3 Nov 2025 15:54:27 +0800
Subject: [PATCH 096/334] [Dev] A Guide to Reproduce DeepSeek-V3 Pre-training
 Performance on GB200 (#1996)

Signed-off-by: Xin Yao <xiny@nvidia.com>
---
 README.md                                     |   3 +-
 docs/discussions/README.md                    |   4 +
 .../deepseek-v3-gb200-optimization.md         |   3 +-
 .../deepseek-v3-gb200-reproduce-guide.md      | 354 ++++++++++++++++++
 4 files changed, 362 insertions(+), 2 deletions(-)
 create mode 100644 docs/discussions/deepseek-v3-gb200-optimization/deepseek-v3-gb200-reproduce-guide.md

diff --git a/README.md b/README.md
index 4b8e2d4b840..abb581c9b34 100644
--- a/README.md
+++ b/README.md
@@ -61,7 +61,8 @@ pip install -e .[mlm,dev]
 
 ## Performance & Benchmarking
 
-🚧 **Coming Soon** - We will update this section with performance benchmarks of experimental features as they become available.
+- 🚀 [2025/11] [Optimizing DeepSeek-V3 Training Performance on NVIDIA GB200 NVL72](docs/discussions/deepseek-v3-gb200-optimization/deepseek-v3-gb200-optimization.md).
+- ⚡ [2025/11] [A Guide to Reproduce DeepSeek-V3 Pre-training Performance on GB200](docs/discussions/deepseek-v3-gb200-optimization/deepseek-v3-gb200-reproduce-guide.md).
 
 ## Community & Support
 
diff --git a/docs/discussions/README.md b/docs/discussions/README.md
index 5dc19181842..26a2a8e1648 100644
--- a/docs/discussions/README.md
+++ b/docs/discussions/README.md
@@ -10,6 +10,10 @@ This directory contains in-depth guides, tutorials, and discussions about optimi
   
   A comprehensive guide on optimizing DeepSeek-V3 model training on NVIDIA GB200 NVL72 systems, covering profiling techniques, performance bottlenecks, and optimization strategies.
 
+- **[A Guide to Reproduce DeepSeek-V3 Pre-training Performance on GB200](deepseek-v3-gb200-optimization/deepseek-v3-gb200-reproduce-guide.md)**
+  
+  A detailed guide on how to reproduce the DeepSeek-V3 pre-training performance on GB200, incluing the dockerfile, package requirements and training scripts.
+
 ## Contributing
 
 If you'd like to contribute a guide or tutorial, please follow this structure:
diff --git a/docs/discussions/deepseek-v3-gb200-optimization/deepseek-v3-gb200-optimization.md b/docs/discussions/deepseek-v3-gb200-optimization/deepseek-v3-gb200-optimization.md
index e3573fa76ba..5436ccf1c0b 100644
--- a/docs/discussions/deepseek-v3-gb200-optimization/deepseek-v3-gb200-optimization.md
+++ b/docs/discussions/deepseek-v3-gb200-optimization/deepseek-v3-gb200-optimization.md
@@ -4,7 +4,7 @@
 
 ---
 
-This guide describes how we used Megatron Core (MCore) and Transformer Engine (TE) to pre-train the DeepSeek-V3 model with MXFP8 precision on 256 GB200 GPUs. We will detail the step-by-step process of optimizing performance to **970 TFLOPS/GPU**, which is a **2.55x** speedup compared to the estimated 380 TFLOPS on H100/H800 (refer to the estimation in this article \[[1](https://zhuanlan.zhihu.com/p/16480858047)\] in Chinese). The related features have been or will be open-sourced to the [Megatron Core](https://github.com/NVIDIA/Megatron-LM) and [Transformer Engine](https://github.com/NVIDIA/TransformerEngine) repositories.
+This guide describes how we used Megatron Core (MCore) and Transformer Engine (TE) to pre-train the DeepSeek-V3 model with MXFP8 precision on 256 GB200 GPUs. We will detail the step-by-step process of optimizing performance to **970 TFLOPS/GPU**, which is a **2.55x** speedup compared to the estimated 380 TFLOPS on H100/H800 (refer to the estimation in this article \[[1](https://zhuanlan.zhihu.com/p/16480858047)\] in Chinese). The related features have been or will be open-sourced to the [Megatron Core](https://github.com/NVIDIA/Megatron-LM) and [Transformer Engine](https://github.com/NVIDIA/TransformerEngine) repositories. Refer to [the guide](./deepseek-v3-gb200-reproduce-guide.md) to reproduce the performance.
 
 ## **0. Methodology**
 
@@ -242,6 +242,7 @@ We started from a baseline of 494 TFLOPS, and through multiple rounds of perform
 
 **Complete Training Examples**
 
+* [Reproduce Guide](./deepseek-v3-gb200-reproduce-guide.md)
 * [DeepSeek-V3 Training Scripts](https://github.com/yanring/Megatron-MoE-ModelZoo) \- End-to-end training configurations and launch scripts
 
 **Papers and Technical Reports**
diff --git a/docs/discussions/deepseek-v3-gb200-optimization/deepseek-v3-gb200-reproduce-guide.md b/docs/discussions/deepseek-v3-gb200-optimization/deepseek-v3-gb200-reproduce-guide.md
new file mode 100644
index 00000000000..61bd8289c66
--- /dev/null
+++ b/docs/discussions/deepseek-v3-gb200-optimization/deepseek-v3-gb200-reproduce-guide.md
@@ -0,0 +1,354 @@
+# A Guide to Reproduce DeepSeek-V3 Pre-training Performance on GB200
+
+## 1. Dockerfile
+
+Requirements:
+- Transformer Engine: We recommend using commit [d2945c6](https://github.com/NVIDIA/TransformerEngine/commit/d2945c6a571e3978677614d1fe08779966a5a4ef) with PR [2146](https://github.com/NVIDIA/TransformerEngine/pull/2146) and [2150](https://github.com/NVIDIA/TransformerEngine/pull/2150). You could prepare the branch by yourself, or use this [branch](https://github.com/hxbai/TransformerEngine/commits/dev_20251024/) based on TE v2.9 plus the above three commits/PRs.
+- cuDNN: v9.14 is required.
+- HybridEP: Install it from [here](https://github.com/deepseek-ai/DeepEP/commits/3f601f7ac1c062c46502646ff04c535013bfca00).
+
+Dockerfile for reference.
+
+```dockerfile
+FROM nvcr.io/nvidia/pytorch:25.09-py3 AS base
+
+ENV SHELL=/bin/bash
+
+# =========================
+# Install system packages
+# =========================
+RUN rm -rf /opt/megatron-lm && \
+    apt-get update && \
+    apt-get install -y sudo gdb bash-builtins git zsh autojump tmux curl gettext libfabric-dev && \
+    wget https://github.com/mikefarah/yq/releases/download/v4.27.5/yq_linux_arm64 -O /usr/bin/yq && \
+    chmod +x /usr/bin/yq
+
+# =========================
+# Install Python packages
+# =========================
+# NOTE: `unset PIP_CONSTRAINT` to install packages that do not meet the default constraint in the base image.
+# Some package requirements and related versions are from 
+#   https://github.com/NVIDIA/Megatron-LM/blob/core_v0.12.0/Dockerfile.linting.
+#   https://github.com/NVIDIA/Megatron-LM/blob/core_v0.12.0/requirements_mlm.txt.
+#   https://github.com/NVIDIA/Megatron-LM/blob/core_v0.12.0/requirements_ci.txt.
+RUN unset PIP_CONSTRAINT && pip install --no-cache-dir debugpy dm-tree torch_tb_profiler einops wandb \
+    sentencepiece tokenizers transformers torchvision ftfy modelcards datasets tqdm pydantic \
+    nvidia-pytriton py-spy yapf darker \
+    tiktoken flask-restful \
+    nltk wrapt pytest pytest_asyncio pytest-cov pytest_mock pytest-random-order \
+    black==24.4.2 isort==5.13.2 flake8==7.1.0 pylint==3.2.6 coverage mypy \
+    setuptools==69.5.1
+
+# =========================
+# Install cudnn 9.14.0.64 for correct mxfp8 quantization and layernorm fusion
+# =========================
+RUN apt-get update && \
+    wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/sbsa/cuda-keyring_1.1-1_all.deb && \
+    dpkg -i cuda-keyring_1.1-1_all.deb && \
+    apt-get update && \
+    apt-get -y install libcudnn9-cuda-13
+
+# =========================
+# Install latest TE
+# Use a specific commit instead of main to make it more stable.
+# This is based on release_v2.9 branch and contains some CPU and quantization optimizations.
+# =========================
+ARG COMMIT="7dd3914726abb79bc99ff5a5db1449458ed64151"
+ARG TE="git+https://github.com/hxbai/TransformerEngine.git@${COMMIT}"
+RUN pip install nvidia-mathdx==25.1.1 && \
+    unset PIP_CONSTRAINT && \
+    NVTE_CUDA_ARCHS="100" NVTE_BUILD_THREADS_PER_JOB=8 NVTE_FRAMEWORK=pytorch pip install --no-build-isolation --no-cache-dir $TE
+
+# =========================
+# Install HybridEP
+# =========================
+WORKDIR /home/
+RUN git clone --branch hybrid-ep https://github.com/deepseek-ai/DeepEP.git && \
+    cd DeepEP && git checkout 3f601f7ac1c062c46502646ff04c535013bfca00 && \
+    TORCH_CUDA_ARCH_LIST="10.0" pip install --no-build-isolation .
+
+# =========================
+# Clean cache
+# =========================
+RUN rm -rf /root/.cache /tmp/*
+```
+
+> [!Tip]
+>
+> If you prefer to use CUDA 12.9, please change the base container to `nvcr.io/nvidia/pytorch:25.06-py3` and the cuDNN to be installed to `libcudnn9-cuda-12`. 
+
+## 2. Megatron-Core
+
+We recommend using the [dev branch](https://github.com/NVIDIA/Megatron-LM/tree/dev) after PR [1917](https://github.com/NVIDIA/Megatron-LM/pull/1917).
+
+```bash
+git clone https://github.com/NVIDIA/Megatron-LM.git && \
+cd Megatron-LM &&
+git checkout effebd81f410bc6566fffee6c320b6f8f762e06d
+```
+
+## 3. Cluster Configuration
+
+Since we're using EP 32 on NVL72, it's important to make sure
+
+> [!Important]
+> **Every 32 GB200 GPUs (8 nodes) are in the same NVL domain (or rack)**.
+
+Usually you can make it via your cluster workload manager. Taking Slurm as an example, you could pass `--segment 8` to the sbatch command to ensure that every segment of 8 nodes will be scheduled to a rack.
+
+## 4. Training scripts
+
+### Environment variables
+
+```bash
+CUDA_DEVICE_MAX_CONNECTIONS=1
+NVTE_FWD_LAYERNORM_SM_MARGIN=0
+NVTE_BWD_LAYERNORM_SM_MARGIN=0
+NVLINK_DOMAIN_SIZE=72
+NVTE_ALLOW_NONDETERMINISTIC_ALGO=1
+PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
+NCCL_NVLS_ENABLE=0
+NVTE_FUSED_ATTN=1
+NVTE_NORM_FWD_USE_CUDNN=1
+NVTE_NORM_BWD_USE_CUDNN=1
+PYTHONWARNINGS=ignore
+NCCL_DEBUG=VERSION
+NCCL_GRAPH_REGISTER=0
+```
+
+### bindpcie
+
+Download [bindpcie](https://github.com/NVIDIA/mlperf-common/blob/main/client/bindpcie) to your workdir, make it executable, 
+
+```bash
+wget https://raw.githubusercontent.com/NVIDIA/mlperf-common/refs/heads/main/client/bindpcie &&
+chmod 755 bindpcie
+```
+
+and then
+
+> [!Important]
+> **Place it at the beginning of your launch command in every process.**
+
+Taking Slurm as an example, your script should look like
+
+```bash
+#!/bin/bash
+
+#SBATCH [... sbatch args]
+
+srun [... srun args] /path/to/bindpcie /path/to/pretrain_gpt.py [... mcore arguments]
+```
+
+This is a very important step on GB200.
+
+### Launch script
+
+```bash
+/path/to/bindpcie \
+/path/to/megatron-lm/pretrain_gpt.py \
+--distributed-timeout-minutes 60 \
+--tensor-model-parallel-size 1 \
+--pipeline-model-parallel-size 8 \
+--expert-model-parallel-size 32 \
+--context-parallel-size 1 \
+--expert-tensor-parallel-size 1 \
+--use-distributed-optimizer \
+--overlap-grad-reduce \
+--overlap-param-gather \
+--use-mcore-models \
+--sequence-parallel \
+--use-flash-attn \
+--disable-bias-linear \
+--micro-batch-size 1 \
+--global-batch-size 2048 \
+--train-samples 585937500 \
+--exit-duration-in-mins 220 \
+--no-save-optim \
+--no-check-for-nan-in-loss-and-grad \
+--cross-entropy-loss-fusion \
+--cross-entropy-fusion-impl te \
+--manual-gc \
+--manual-gc-interval 10 \
+--enable-experimental \
+--transformer-impl transformer_engine \
+--seq-length 4096 \
+--data-cache-path /path/to/data_cache \
+--tokenizer-type HuggingFaceTokenizer \
+--tokenizer-model unsloth/DeepSeek-V3 \
+--data-path /path/to/data \
+--split 99,1,0 \
+--no-mmap-bin-files \
+--no-create-attention-mask-in-dataloader \
+--num-workers 6 \
+--num-layers 61 \
+--hidden-size 7168 \
+--ffn-hidden-size 18432 \
+--num-attention-heads 128 \
+--kv-channels 128 \
+--max-position-embeddings 4096 \
+--position-embedding-type rope \
+--rotary-base 10000 \
+--make-vocab-size-divisible-by 3232 \
+--normalization RMSNorm \
+--norm-epsilon 1e-6 \
+--swiglu \
+--untie-embeddings-and-output-weights \
+--multi-latent-attention \
+--attention-dropout 0.0 \
+--hidden-dropout 0.0 \
+--clip-grad 1.0 \
+--weight-decay 0.1 \
+--qk-layernorm \
+--lr-decay-samples 584765624 \
+--lr-warmup-samples 1536000 \
+--lr-warmup-init 3.9e-7 \
+--lr 3.9e-6 \
+--min-lr 3.9e-7 \
+--lr-decay-style cosine \
+--adam-beta1 0.9 \
+--adam-beta2 0.95 \
+--num-experts 256 \
+--moe-layer-freq ([0]*3+[1]*58) \
+--moe-ffn-hidden-size 2048 \
+--moe-shared-expert-intermediate-size 2048 \
+--moe-router-load-balancing-type seq_aux_loss \
+--moe-router-topk 8 \
+--moe-grouped-gemm \
+--moe-aux-loss-coeff 1e-4 \
+--moe-router-group-topk 4 \
+--moe-router-num-groups 8 \
+--moe-router-pre-softmax \
+--moe-router-padding-for-quantization \
+--moe-router-topk-scaling-factor 2.5 \
+--moe-router-score-function sigmoid \
+--moe-router-enable-expert-bias \
+--moe-router-bias-update-rate 1e-3 \
+--moe-router-dtype fp32 \
+--moe-permute-fusion \
+--moe-router-fusion \
+--q-lora-rank 1536 \
+--kv-lora-rank 512 \
+--qk-head-dim 128 \
+--qk-pos-emb-head-dim 64 \
+--v-head-dim 128 \
+--rotary-scaling-factor 40 \
+--mscale 1.0 \
+--mscale-all-dim 1.0 \
+--eval-iters 32 \
+--eval-interval 200 \
+--no-load-optim \
+--no-load-rng \
+--auto-detect-ckpt-format \
+--load None \
+--save /path/to/checkpoints \
+--save-interval 500 \
+--dist-ckpt-strictness log_all \
+--init-method-std 0.02 \
+--log-timers-to-tensorboard \
+--log-memory-to-tensorboard \
+--log-validation-ppl-to-tensorboard \
+--log-throughput \
+--log-interval 1 \
+--logging-level 40 \
+--tensorboard-dir /path/to/tensorboard \
+--wandb-project deepseek-v3-benchmarking-v0.15 \
+--wandb-exp-name DeepSeek-V3-TP1PP8EP32CP1VPP4-MBS1GBS2048-v0.15 \
+--bf16 \
+--enable-experimental \
+--recompute-granularity selective \
+--recompute-modules moe_act mlp \
+--cuda-graph-impl transformer_engine \
+--cuda-graph-scope attn moe_router moe_preprocess \
+--te-rng-tracker \
+--pipeline-model-parallel-layout "Et|(tt|)*30L" \
+--moe-router-force-load-balancing \
+--moe-token-dispatcher-type flex \
+--moe-flex-dispatcher-backend hybridep \
+--moe-hybridep-num-sms 32 \
+--fp8-recipe mxfp8 \
+--fp8-format e4m3 \
+--fp8-param-gather \
+--reuse-grad-buf-for-mxfp8-param-ag \
+--use-precision-aware-optimizer \
+--main-grads-dtype fp32 \
+--main-params-dtype fp32 \
+--exp-avg-dtype bf16 \
+--exp-avg-sq-dtype bf16 \
+```
+
+### Explanation of arguments
+
+The following arguments indicate key optimizations.
+
+- Pipeline parallel layout
+
+```bash
+--pipeline-model-parallel-layout "Et|(tt|)*30L"
+```
+
+`E` stands for embedding, `t` for transformer layer, `L` for Loss. So it's interpreted as a total of 32 stages, where the first stage is Embedding + 1 transformer layer, the last stage is Loss, and the middle 30 stages are 2 transformer layers.
+
+- Fine-grained recompute
+
+```bash
+--recompute-granularity selective \
+--recompute-modules moe_act mlp \
+```
+
+- Partial CUDA Graphs
+
+```bash
+--cuda-graph-impl transformer_engine \
+--cuda-graph-scope attn moe_router moe_preprocess \
+--te-rng-tracker \
+```
+
+- Force load balancing for performance benchmark
+
+```bash
+--moe-router-force-load-balancing \
+```
+
+- HybridEP
+
+```bash
+--moe-token-dispatcher-type flex \
+--moe-flex-dispatcher-backend hybridep \
+--moe-hybridep-num-sms 32 \
+```
+
+- MXFP8 recipe
+
+```bash
+--fp8-recipe mxfp8 \
+--fp8-format e4m3 \
+--fp8-param-gather \
+--reuse-grad-buf-for-mxfp8-param-ag \
+```
+
+- BF16 optimizer states
+
+```bash
+--use-precision-aware-optimizer \
+--main-grads-dtype fp32 \
+--main-params-dtype fp32 \
+--exp-avg-dtype bf16 \
+--exp-avg-sq-dtype bf16 \
+```
+
+- Kernel fusions
+
+```bash
+--cross-entropy-loss-fusion \
+--cross-entropy-fusion-impl te \
+--moe-permute-fusion \
+--moe-router-fusion \
+```
+
+- Manual GC to make ranks better synchronized
+
+```bash
+--manual-gc \
+--manual-gc-interval 10 \
+```

From eb0783b6d35607ef1953eaca60b37b886b1a25d0 Mon Sep 17 00:00:00 2001
From: Xin Yao <xiny@nvidia.com>
Date: Mon, 3 Nov 2025 22:38:23 +0800
Subject: [PATCH 097/334] [Dev] Minor updates to the guide (#2103)

Signed-off-by: Xin Yao <xiny@nvidia.com>
---
 .../deepseek-v3-gb200-optimization.md                       | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/discussions/deepseek-v3-gb200-optimization/deepseek-v3-gb200-optimization.md b/docs/discussions/deepseek-v3-gb200-optimization/deepseek-v3-gb200-optimization.md
index 5436ccf1c0b..24ab150f889 100644
--- a/docs/discussions/deepseek-v3-gb200-optimization/deepseek-v3-gb200-optimization.md
+++ b/docs/discussions/deepseek-v3-gb200-optimization/deepseek-v3-gb200-optimization.md
@@ -20,7 +20,7 @@ DeepSeek-V3 innovatively uses FP8 mixed precision for pre-training, which saves
 
 On the Blackwell platform, thanks to the native support of the fifth-generation Tensor Core for the MXFP8 format, we adopted the MXFP8 recipe, a more fine-grained quantization scheme for training. Both activations and weights are quantized at a 1x32 granularity, and E8M0 is used as the format for the scaling factor.
 
-Here, we will briefly introduce the difference in implementation between MXFP8 GEMM on the Blackwell platform and Blockwise FP8 GEMM on the Hopper platform. On the Hopper platform, since the Tensor Core itself does not support multiplication with a scale, after the matrix multiplication of each tile, it is necessary to multiply by the scale and accumulate the result with the CUDA Core. This also determines that on the Hopper platform, 1x128 is almost the finest quantization granularity available. If a finer granularity was used for quantization, the GEMM performance would suffer a great loss. On the other hand, since the Blackwell platform natively supports MXFP8, the dequantization process in GEMM (i.e., multiplying by the scale) is completed inside the Tensor Core, so the CUDA Core is not involved throughout the process, which can achieve better performance and support finer-grained quantization (1x32).
+Here, we will briefly introduce the difference in implementation between MXFP8 GEMM on the Blackwell platform and Blockwise FP8 GEMM on the Hopper platform. On the Hopper platform, since the Tensor Core does not support multiplication with vectors of scales, the quantization granularity must be greater or equal to the GEMM tiles. This also determines that on the Hopper platform, 1x128 is almost the finest quantization granularity available. If a finer granularity was used for quantization, the GEMM performance would suffer a great loss due to small GEMM tiles. On the other hand, since the Blackwell platform natively supports MXFP8, the dequantization process in GEMM (i.e., multiplying by the scale) is completed inside the Tensor Core, so the CUDA Core is not involved throughout the process, which can achieve better performance and support finer-grained quantization (1x32).
 
 When we started optimizing DeepSeek-V3 on the GB200 NVL72 platform with MCore, our baseline already included the following features:
 
@@ -242,8 +242,8 @@ We started from a baseline of 494 TFLOPS, and through multiple rounds of perform
 
 **Complete Training Examples**
 
-* [Reproduce Guide](./deepseek-v3-gb200-reproduce-guide.md)
-* [DeepSeek-V3 Training Scripts](https://github.com/yanring/Megatron-MoE-ModelZoo) \- End-to-end training configurations and launch scripts
+* [Reproduce Guide](./deepseek-v3-gb200-reproduce-guide.md), including the Dockerfile, dependencies, cluster configuration and launch scripts.
+* [Megatron-MoE-ModelZoo](https://github.com/yanring/Megatron-MoE-ModelZoo) \- End-to-end training configurations and launch scripts for popular MoE models, including Deepseek-V3, Qwen3, etc.
 
 **Papers and Technical Reports**
 

From ad226e42140ee2ee5cfbcdef39be4cbcdc140f6f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Mon, 3 Nov 2025 16:58:32 +0000
Subject: [PATCH 098/334] Revert "[Dev] feat(moe): Support placing MTP layers
 into standalone stages (#1916)"

This reverts commit effebd81f410bc6566fffee6c320b6f8f762e06d.
---
 gpt_builders.py                               |  26 ++--
 .../blended_megatron_dataset_builder.py       |   4 +-
 .../core/distributed/finalize_model_grads.py  |  12 +-
 megatron/core/model_parallel_config.py        |   6 +-
 .../common/language_module/language_module.py |   7 +-
 .../common/model_chunk_schedule_plan.py       |  44 +++----
 .../core/models/gpt/fine_grained_callables.py |  36 ++++--
 megatron/core/models/gpt/gpt_layer_specs.py   |  28 +---
 megatron/core/models/gpt/gpt_model.py         |  24 +++-
 .../pipeline_parallel/p2p_communication.py    |  18 +--
 .../transformer/multi_token_prediction.py     | 122 +++---------------
 .../pipeline_parallel_layer_layout.py         |  25 +---
 .../core/transformer/transformer_block.py     |  31 +----
 .../core/transformer/transformer_config.py    |   2 +-
 megatron/training/training.py                 |  18 +--
 megatron/training/utils.py                    |  19 ++-
 pretrain_gpt.py                               |  40 +-----
 .../pipeline_parallel/test_pipeline_layout.py |  35 ++---
 18 files changed, 155 insertions(+), 342 deletions(-)

diff --git a/gpt_builders.py b/gpt_builders.py
index 4fe832028be..591f74bb20c 100644
--- a/gpt_builders.py
+++ b/gpt_builders.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 
 from megatron.core.models.gpt import GPTModel
 from megatron.core.models.gpt.gpt_layer_specs import (
@@ -6,7 +6,6 @@
     get_gpt_layer_local_spec,
     get_gpt_layer_with_transformer_engine_spec,
     get_gpt_mtp_block_spec,
-    get_gpt_decoder_layer_specs,
 )
 from megatron.core.models.gpt.heterogeneous.heterogeneous_layer_specs import (
     get_gpt_heterogeneous_layer_spec,
@@ -58,19 +57,18 @@ def gpt_builder(args, pre_process, post_process, vp_stage=None, config=None):
                 transformer_layer_spec = _get_transformer_layer_spec(use_te, config)
         mtp_block_spec = None
         if args.mtp_num_layers is not None:
-            # Get GPT decoder layer specs for the model.
-            if args.spec is not None:
-                mtp_transformer_layer_spec = import_module(args.spec)
+            if (
+                hasattr(transformer_layer_spec, 'layer_specs')
+                and len(transformer_layer_spec.layer_specs) == 0
+            ):
+                # Get the decoder layer spec explicitly if no decoder layer in the last stage,
+                # Only happens with block spec (TransformerBlockSubmodules) when using MoE.
+                transformer_layer_spec_for_mtp = _get_transformer_layer_spec(use_te, config)
             else:
-                # Define the decoder block spec
-                decoder_layer_specs = get_gpt_decoder_layer_specs(
-                    config, use_transformer_engine=use_te, normalization=args.normalization, qk_l2_norm=args.qk_l2_norm, vp_stage=vp_stage
-                )
-                mtp_transformer_layer_spec = decoder_layer_specs[-1]
-            # Use spec of the last layer in decoder block as spec of the transformer layer in MTP
+                transformer_layer_spec_for_mtp = transformer_layer_spec
             mtp_block_spec = get_gpt_mtp_block_spec(
                 config,
-                mtp_transformer_layer_spec,
+                transformer_layer_spec_for_mtp,
                 use_transformer_engine=use_te,
                 vp_stage=vp_stage,
             )
@@ -98,12 +96,12 @@ def gpt_builder(args, pre_process, post_process, vp_stage=None, config=None):
 
 def _get_transformer_layer_spec(use_te, config):
     """Get transformer layer specification based on configuration.
-    
+
     Args:
         use_te (bool): Whether to use Transformer Engine
         args: Training arguments
         config: Model configuration
-        
+
     Returns:
         transformer_layer_spec: The transformer layer specification
     """
diff --git a/megatron/core/datasets/blended_megatron_dataset_builder.py b/megatron/core/datasets/blended_megatron_dataset_builder.py
index 3a663843f9b..b1a000df167 100644
--- a/megatron/core/datasets/blended_megatron_dataset_builder.py
+++ b/megatron/core/datasets/blended_megatron_dataset_builder.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
 
 import logging
 import math
@@ -47,13 +47,11 @@ def __init__(
         sizes: List[int],
         is_built_on_rank: Callable,
         config: BlendedMegatronDatasetConfig,
-        vp_stage: Optional[int] = None,
     ):
         self.cls = cls
         self.sizes = sizes
         self.is_built_on_rank = is_built_on_rank
         self.config = config
-        self.vp_stage = vp_stage
 
         log_single_rank(
             logger,
diff --git a/megatron/core/distributed/finalize_model_grads.py b/megatron/core/distributed/finalize_model_grads.py
index ddaeb7e8d84..55663acdc10 100644
--- a/megatron/core/distributed/finalize_model_grads.py
+++ b/megatron/core/distributed/finalize_model_grads.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
 
 from functools import partial
 from typing import Callable, List, Optional, Union
@@ -193,11 +193,7 @@ def _allreduce_word_embedding_grads(
             pp_group = parallel_state.get_pipeline_model_parallel_group()
 
     _allreduce_embedding_grad(
-        model,
-        embd_group,
-        pp_group,
-        partial(_get_shared_word_embedding_weight, config=config),
-        config=config,
+        model, embd_group, pp_group, partial(_get_shared_word_embedding_weight, config=config)
     )
 
 
@@ -207,7 +203,6 @@ def _allreduce_embedding_grad(
     pp_group: torch.distributed.ProcessGroup,
     weight_getter: Callable[[torch.nn.Module], Optional[torch.nn.Parameter]],
     skip_if_none: bool = True,
-    config: TransformerConfig = None,
 ):
     """Unified helper to all-reduce embedding parameters across pipeline stages.
 
@@ -234,9 +229,6 @@ def _allreduce_embedding_grad(
             model_module = model[0]
         elif is_pp_last_stage(pp_group):
             model_module = model[-1]
-        elif getattr(config, 'mtp_num_layers', None) is not None and config.mtp_num_layers > 0:
-            # Embedding for MTP layers is in the last virtual pipeline model parallel stage.
-            model_module = model[-1]
         else:  # We do not support an interleaved schedule for models with encoders yet.
             model_module = model[0]
 
diff --git a/megatron/core/model_parallel_config.py b/megatron/core/model_parallel_config.py
index 0d92e81c5bb..2758c9c0747 100644
--- a/megatron/core/model_parallel_config.py
+++ b/megatron/core/model_parallel_config.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
 
 from dataclasses import dataclass
 from typing import Callable, ContextManager, Optional
@@ -315,10 +315,6 @@ class ModelParallelConfig:
        rank 1 |   0 1 2 0 1 2 3 4 3 4
     """
 
-    mtp_standalone: bool = False
-    """This will be set automatically according to the pipeline layout, 
-    and will be set to True if MTP is in a separate vpp stage."""
-
     ###################
     # CPU Offloading
     ###################
diff --git a/megatron/core/models/common/language_module/language_module.py b/megatron/core/models/common/language_module/language_module.py
index 0badbce1bfd..8f90fb3ba47 100644
--- a/megatron/core/models/common/language_module/language_module.py
+++ b/megatron/core/models/common/language_module/language_module.py
@@ -62,8 +62,6 @@ def _is_in_embd_group(self):
         if torch.distributed.get_rank() in torch.distributed.get_process_group_ranks(
             self.embd_group
         ):
-            if getattr(self, 'mtp_process', False):
-                return True
             if (
                 torch.distributed.get_rank()
                 == torch.distributed.get_process_group_ranks(self.embd_group)[0]
@@ -203,10 +201,7 @@ def setup_embeddings_and_output_layer(self) -> None:
         ):
             self.shared_embedding_or_output_weight().shared_embedding = True
 
-        if (
-            (self.post_process and self.share_embeddings_and_output_weights)
-            or getattr(self, 'mtp_process', False)
-        ) and not self.pre_process:
+        if (self.post_process or getattr(self, 'mtp_process', False)) and not self.pre_process:
             assert not (
                 is_vp_first_stage(self.vp_stage, self.vp_size) and is_pp_first_stage(self.pp_group)
             )
diff --git a/megatron/core/models/common/model_chunk_schedule_plan.py b/megatron/core/models/common/model_chunk_schedule_plan.py
index 401d9a81a97..74b9a90764d 100644
--- a/megatron/core/models/common/model_chunk_schedule_plan.py
+++ b/megatron/core/models/common/model_chunk_schedule_plan.py
@@ -17,6 +17,7 @@
     get_comm_stream,
     get_comp_stream,
 )
+from megatron.core.transformer.multi_token_prediction import get_mtp_num_layers_to_build
 
 
 class ModelChunkState:
@@ -318,40 +319,37 @@ def __init__(
         self._model_chunk_state.context_mask = None
         self._model_chunk_state.attention_bias = None
 
+        transformer_num_layers = model.decoder.num_layers_per_pipeline_rank
+        mtp_num_layers = get_mtp_num_layers_to_build(model.config, vp_stage=self.vp_stage)
+
         # build preprocess
         self.pre_process = PreProcessNode(model, self._model_chunk_state, self._event, comp_stream)
-
-        # build layer schedule plan for each layer.
-        # The methods to obtain layers are different for MTP so we need the other build plan for
-        # MTP. Also, this can help annotate MTP layer so that it can know where MTP is.
-        self._build_layer_schedule_plan(model.decoder, comp_stream, comm_stream)
-        self._build_layer_schedule_plan(getattr(model, "mtp", None), comp_stream, comm_stream)
-
-        # build post process
-        if model.post_process:
-            self.post_process = PostProcessNode(
-                model, self._model_chunk_state, self._event, comp_stream
+        # build layer schedule plan for each layer
+        for layer_idx in range(transformer_num_layers):
+            layer = model.decoder._get_layer(layer_idx)
+            layer_plan = TransformerLayerSchedulePlan(
+                layer, self._event, self._model_chunk_state, comp_stream, comm_stream
             )
+            self._transformer_layers.append(layer_plan)
 
-    def _build_layer_schedule_plan(self, module, comp_stream, comm_stream):
-        if module is None:
-            return
-        num_layers = len(module.layers)
-        for layer_idx in range(num_layers):
+        # build mtp layers
+        for layer_idx in range(mtp_num_layers):
             extra_args = {
                 "is_first_layer": layer_idx == 0,
-                "is_last_layer": layer_idx == num_layers - 1,
+                "is_last_layer": layer_idx == mtp_num_layers - 1,
             }
+            layer = model.mtp.layers[layer_idx]
             layer_plan = TransformerLayerSchedulePlan(
-                module.layers[layer_idx],
-                self.event,
-                self.state,
-                comp_stream,
-                comm_stream,
-                extra_args,
+                layer, self.event, self.state, comp_stream, comm_stream, extra_args
             )
             self._transformer_layers.append(layer_plan)
 
+        # build post process
+        if model.post_process:
+            self.post_process = PostProcessNode(
+                model, self._model_chunk_state, self._event, comp_stream
+            )
+
     @property
     def event(self):
         """Gets the CUDA event for synchronization."""
diff --git a/megatron/core/models/gpt/fine_grained_callables.py b/megatron/core/models/gpt/fine_grained_callables.py
index 952b83f95fb..f3fd63a5a32 100644
--- a/megatron/core/models/gpt/fine_grained_callables.py
+++ b/megatron/core/models/gpt/fine_grained_callables.py
@@ -158,19 +158,26 @@ def forward_impl(self, hidden_states):
         """Implements the forward pass for postprocessing.
 
         This method handles:
-        1. Output layer computation
-        2. Loss computation if labels are provided
+        1. Final layer normalization
+        2. Output layer computation
+        3. Loss computation if labels are provided
 
         Args:
             hidden_states: The hidden states from the transformer layers.
 
         Returns:
             The logits or loss depending on whether labels are provided.
-
-        Note:
-            Final layernorm now has been moved from the post-process stage to the
-            last decoder layer, so we don't need to run the final layer norm here.
         """
+        # Final layer norm from Decoder
+        if self.gpt_model.decoder.final_layernorm and not self.gpt_model.mtp_process:
+            hidden_states = self.gpt_model.decoder.final_layernorm(hidden_states)
+            # TENorm produces a "viewed" tensor. This will result in schedule.py's
+            # deallocate_output_tensor() throwing an error, so a viewless tensor is
+            # created to prevent this.
+            hidden_states = make_viewless_tensor(
+                inp=hidden_states, requires_grad=True, keep_graph=True
+            )
+
         # Run GPTModel._postprocess
         loss = self.gpt_model._postprocess(
             hidden_states=hidden_states,
@@ -244,7 +251,6 @@ def __init__(
         self.submodule = submodule
         self.detached = tuple()
         self.before_detached = tuple()
-        self.is_mtp = extra_args.get("is_mtp", False)
 
         # Create flags to indicate first and last layer
         self.is_first_layer = extra_args.get("is_first_layer", False)
@@ -454,12 +460,6 @@ def submodule_combine_forward(
 
         # release tensor reference after use
         node.layer_state.residual = None
-
-        # final layer norm from decoder
-        final_layernorm = node.chunk_state.model.decoder.final_layernorm
-        if not node.is_mtp and final_layernorm and node.is_last_layer:
-            output = final_layernorm(output)
-            output = make_viewless_tensor(inp=output, requires_grad=True, keep_graph=True)
         return output
 
     def mlp_wrapper(node: ScheduleNode, *args, **kwargs):
@@ -499,7 +499,15 @@ def build_mtp_layer_callables(layer):
     def submodule_mtp_attn_forward(node, hidden_states):
         # MTP Block Preprocess
         if node.is_first_layer:
-            offset = get_mtp_layer_offset(layer.config, node.chunk_state.model.vp_stage)
+            # Final layer norm from Decoder
+            final_layernorm = node.chunk_state.model.decoder.final_layernorm
+            if final_layernorm:
+                hidden_states = final_layernorm(hidden_states)
+                hidden_states = make_viewless_tensor(
+                    inp=hidden_states, requires_grad=True, keep_graph=True
+                )
+                hidden_states = node.detach(hidden_states)
+            offset = get_mtp_layer_offset(layer.config)
             node.chunk_state.mtp_hidden_states = list(torch.chunk(hidden_states, 1 + offset, dim=0))
             hidden_states = node.chunk_state.mtp_hidden_states[offset]
 
diff --git a/megatron/core/models/gpt/gpt_layer_specs.py b/megatron/core/models/gpt/gpt_layer_specs.py
index 196c21ebe44..e3ef7f20141 100755
--- a/megatron/core/models/gpt/gpt_layer_specs.py
+++ b/megatron/core/models/gpt/gpt_layer_specs.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
 
 import warnings
 from typing import Optional, Union
@@ -477,7 +477,7 @@ def get_mlp_module_spec_for_backend(
         )
 
 
-def get_gpt_decoder_layer_specs(
+def get_gpt_decoder_block_spec(
     config: TransformerConfig,
     use_transformer_engine: bool,
     normalization: Optional[str] = None,
@@ -600,21 +600,6 @@ def get_gpt_decoder_layer_specs(
             raise ValueError(f"Invalid layer spec key: {layer_spec_key}")
         layer_specs.append(layer_spec_dict[layer_spec_key])
 
-    return layer_specs
-
-
-def get_gpt_decoder_block_spec(
-    config: TransformerConfig,
-    use_transformer_engine: bool,
-    normalization: Optional[str] = None,
-    qk_l2_norm: Optional[bool] = False,
-    vp_stage: Optional[int] = None,
-    pp_rank: Optional[int] = None,
-) -> TransformerBlockSubmodules:
-    """GPT block spec."""
-    layer_specs = get_gpt_decoder_layer_specs(
-        config, use_transformer_engine, normalization, qk_l2_norm
-    )
     # Slice the layer specs to only include the layers that are built in this pipeline stage.
     # Note: MCore layer_number starts at 1
     num_layers_to_build = get_num_layers_to_build(config, vp_stage=vp_stage, pp_rank=pp_rank)
@@ -630,10 +615,6 @@ def get_gpt_decoder_block_spec(
         offset = get_transformer_layer_offset(config, vp_stage=vp_stage, pp_rank=pp_rank)
         local_layer_specs = layer_specs[offset : offset + num_layers_to_build]
 
-    if use_transformer_engine:
-        layer_norm_impl = TENorm
-    else:
-        layer_norm_impl = LNImpl
     # Block spec.
     block_spec = TransformerBlockSubmodules(
         layer_specs=local_layer_specs, layer_norm=layer_norm_impl
@@ -693,10 +674,13 @@ def get_gpt_mtp_block_spec_for_backend(
     mtp_num_layers = config.mtp_num_layers if config.mtp_num_layers else 0
     mtp_layer_specs = [mtp_layer_spec] * mtp_num_layers
 
-    offset = get_mtp_layer_offset(config, vp_stage=vp_stage)
+    offset = get_mtp_layer_offset(config)
     # split the mtp layer specs to only include the layers that are built in this pipeline stage.
     mtp_layer_specs = mtp_layer_specs[offset : offset + num_layers_to_build]
     if len(mtp_layer_specs) > 0:
+        assert (
+            len(mtp_layer_specs) == config.mtp_num_layers
+        ), +f"currently all of the mtp layers must stage in the same pipeline stage."
         mtp_block_spec = MultiTokenPredictionBlockSubmodules(layer_specs=mtp_layer_specs)
     else:
         mtp_block_spec = None
diff --git a/megatron/core/models/gpt/gpt_model.py b/megatron/core/models/gpt/gpt_model.py
index cd13a598d9e..98294d84630 100644
--- a/megatron/core/models/gpt/gpt_model.py
+++ b/megatron/core/models/gpt/gpt_model.py
@@ -30,6 +30,7 @@
     MTPLossLoggingHelper,
     MultiTokenPredictionBlock,
     roll_tensor,
+    tie_output_layer_state_dict,
     tie_word_embeddings_state_dict,
 )
 from megatron.core.transformer.spec_utils import ModuleSpec
@@ -249,7 +250,7 @@ def __init__(
                 tp_group=self.pg_collection.tp,
             )
 
-        if self.pre_process or self.post_process or self.mtp_process:
+        if self.pre_process or self.post_process:
             self.setup_embeddings_and_output_layer()
 
         if has_config_logger_enabled(self.config):
@@ -542,6 +543,7 @@ def _postprocess(
         output_weight = None
         if self.share_embeddings_and_output_weights:
             output_weight = self.shared_embedding_or_output_weight()
+
         if mtp_in_postprocess:
             hidden_states = self.mtp(
                 input_ids=input_ids,
@@ -561,7 +563,7 @@ def _postprocess(
         if not self.post_process:
             return hidden_states
 
-        if self.config.mtp_num_layers is not None:
+        if self.mtp_process:
             mtp_labels = labels.clone()
             hidden_states_list = torch.chunk(hidden_states, 1 + self.config.mtp_num_layers, dim=0)
             hidden_states = hidden_states_list[0]
@@ -603,7 +605,6 @@ def _postprocess(
                         hidden_states, mtp_loss_scale * mtp_loss / num_tokens
                     )
         sequence_parallel_override = False
-
         if in_inference_mode and inference_context.materialize_only_last_token_logits:
             if inference_context.is_static_batching():
                 hidden_states = hidden_states[-1:, :, :]
@@ -768,14 +769,27 @@ def sharded_state_dict(
             output_extra_state and output_extra_state.data
         ), f'Expected output layer extra state to be empty, got: {output_extra_state}'
 
-        # Multi-Token Prediction (MTP) need embedding layer in mtp process stage.
+        # Multi-Token Prediction (MTP) need both embedding layer and output layer in
+        # mtp process stage.
         # If MTP is not placed in the pre processing stage, we need to maintain a copy of
         # embedding layer in the mtp process stage and tie it to the embedding in the pre
         # processing stage.
-        # Now MTP loss is computed in post processing stage, so the output_layer is not needed.
+        # Also, if MTP is not placed in the post processing stage, we need to maintain a copy
+        # of output layer in the mtp process stage and tie it to the output layer in the post
+        # processing stage.
         if self.mtp_process and not self.pre_process:
             emb_weight_key = f'{prefix}embedding.word_embeddings.weight'
             emb_weight = self.embedding.word_embeddings.weight
             tie_word_embeddings_state_dict(sharded_state_dict, emb_weight, emb_weight_key)
+        if self.mtp_process and not self.post_process:
+            # We only need to tie the output layer weight if share_embeddings_and_output_weights
+            # is False. Because if share_embeddings_and_output_weights is True, the shared weight
+            # will be stored in embedding layer, and output layer will not have any weight.
+            if not self.share_embeddings_and_output_weights:
+                output_layer_weight_key = f'{prefix}output_layer.weight'
+                output_layer_weight = self.output_layer.weight
+                tie_output_layer_state_dict(
+                    sharded_state_dict, output_layer_weight, output_layer_weight_key
+                )
 
         return sharded_state_dict
diff --git a/megatron/core/pipeline_parallel/p2p_communication.py b/megatron/core/pipeline_parallel/p2p_communication.py
index ac839c21f18..63ee9d1f537 100644
--- a/megatron/core/pipeline_parallel/p2p_communication.py
+++ b/megatron/core/pipeline_parallel/p2p_communication.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
 
 
 from typing import List, Optional, Tuple, Union
@@ -214,22 +214,22 @@ def _communicate_shapes(self, tensor_send_next, tensor_send_prev, recv_prev, rec
             ops = []
             if send_prev_shape_tensor is not None:
                 send_prev_op = torch.distributed.P2POp(
-                    torch.distributed.isend, send_prev_shape_tensor, self.prev_rank, self.pp_group
+                    torch.distributed.isend, send_prev_shape_tensor, self.prev_rank
                 )
                 ops.append(send_prev_op)
             if recv_prev_shape_tensor is not None:
                 recv_prev_op = torch.distributed.P2POp(
-                    torch.distributed.irecv, recv_prev_shape_tensor, self.prev_rank, self.pp_group
+                    torch.distributed.irecv, recv_prev_shape_tensor, self.prev_rank
                 )
                 ops.append(recv_prev_op)
             if send_next_shape_tensor is not None:
                 send_next_op = torch.distributed.P2POp(
-                    torch.distributed.isend, send_next_shape_tensor, self.next_rank, self.pp_group
+                    torch.distributed.isend, send_next_shape_tensor, self.next_rank
                 )
                 ops.append(send_next_op)
             if recv_next_shape_tensor is not None:
                 recv_next_op = torch.distributed.P2POp(
-                    torch.distributed.irecv, recv_next_shape_tensor, self.next_rank, self.pp_group
+                    torch.distributed.irecv, recv_next_shape_tensor, self.next_rank
                 )
                 ops.append(recv_next_op)
             if len(ops) > 0:
@@ -298,13 +298,13 @@ def _communicate(
         tensor_recv_prev_func = None
         tensor_recv_next_func = None
 
-        if config.variable_seq_lengths or config.mtp_standalone:
+        if not config.variable_seq_lengths:
+            recv_prev_shape = tensor_shape
+            recv_next_shape = tensor_shape
+        else:
             recv_prev_shape, recv_next_shape = self._communicate_shapes(
                 tensor_send_next, tensor_send_prev, recv_prev, recv_next
             )
-        else:
-            recv_prev_shape = tensor_shape
-            recv_next_shape = tensor_shape
 
         def create_tensor_recv_prev():
             return torch.empty(
diff --git a/megatron/core/transformer/multi_token_prediction.py b/megatron/core/transformer/multi_token_prediction.py
index 945682741d4..a619b9ffa55 100755
--- a/megatron/core/transformer/multi_token_prediction.py
+++ b/megatron/core/transformer/multi_token_prediction.py
@@ -13,17 +13,20 @@
 from megatron.core.fp8_utils import get_fp8_context
 from megatron.core.models.backends import BackendSpecProvider, LocalSpecProvider
 from megatron.core.packed_seq_params import PackedSeqParams
+from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
+    fine_grained_offloading_set_last_layer,
+)
+from megatron.core.pipeline_parallel.utils import is_vp_last_stage
 from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.tensor_parallel import (
     gather_from_tensor_model_parallel_region,
     scatter_to_sequence_parallel_region,
 )
-from megatron.core.transformer.enums import AttnMaskType, LayerType
+from megatron.core.transformer.enums import AttnMaskType
 from megatron.core.transformer.module import MegatronModule
 from megatron.core.transformer.spec_utils import ModuleSpec, build_module
 from megatron.core.transformer.transformer_block import TransformerBlockSubmodules
 from megatron.core.transformer.transformer_config import TransformerConfig
-from megatron.core.transformer.transformer_layer import get_transformer_layer_offset
 from megatron.core.utils import (
     is_torch_min_version,
     make_tp_sharded_tensor_for_checkpoint,
@@ -332,103 +335,25 @@ def get_mtp_layer_spec_for_backend(
     return mtp_layer_spec
 
 
-def mtp_on_this_rank(
-    config: TransformerConfig, ignore_virtual: Optional[bool] = True, vp_stage: Optional[int] = None
-) -> bool:
-    """
-    Check if there is MTP on the current rank.
-
-    Behavior:
-        - If a custom pipeline model parallel layout is provided in the config:
-            - If virtual pipeline parallelism is enabled (and `ignore_virtual` is False), checks
-              whether any MTP layers are present on this (pp_rank, vp_stage) pair.
-            - Otherwise, checks all virtual pipeline ranks of the current pipeline rank. Returns
-              True if any virtual sub-rank includes at least one MTP layer.
-        - If no custom layout is provided, assumes all MTP layers (if any) are placed on the last
-          pipeline stage. The function returns True only on the last pipeline stage.
-    """
-    mtp_on_this_rank = False
-    pp_rank = parallel_state.get_pipeline_model_parallel_rank()
-    if config.pipeline_model_parallel_layout is not None:
-        # with custom PP layout, we support put MTP layers on any pipeline stage
-        layout = config.pipeline_model_parallel_layout.layout
-        if (
-            not ignore_virtual
-            and parallel_state.get_virtual_pipeline_model_parallel_world_size() is not None
-        ):
-            assert vp_stage is not None, "vp_stage must be passed if virtual pipeline is enabled"
-            num_layers_to_build = layout[pp_rank][vp_stage].count(LayerType.mtp)
-            mtp_on_this_rank = num_layers_to_build > 0
-        else:
-            for vpp_rank in range(len(layout[pp_rank])):
-                num_layers_to_build = layout[pp_rank][vpp_rank].count(LayerType.mtp)
-                if num_layers_to_build > 0:
-                    mtp_on_this_rank = True
-                    break
-    else:
-        # without custom PP layout, we only support put all of MTP layers on the last pipeline stage
-        if config.mtp_num_layers is not None:
-            mtp_on_this_rank = parallel_state.is_pipeline_last_stage(
-                ignore_virtual=ignore_virtual, vp_stage=vp_stage
-            )
-        else:
-            mtp_on_this_rank = False
-    return mtp_on_this_rank
-
-
-def get_mtp_ranks(pp_ranks: List[int], config: TransformerConfig) -> List[int]:
-    """Get the ranks of the MTP layers."""
-    mtp_ranks = set()
-    if config.mtp_num_layers is None:
-        return []
-    if config.pipeline_model_parallel_layout is None:
-        return [pp_ranks[-1]]
-    layout = config.pipeline_model_parallel_layout.layout
-    for pp_rank in range(len(layout)):
-        for vpp_rank in range(len(layout[pp_rank])):
-            num_layers_to_build = layout[pp_rank][vpp_rank].count(LayerType.mtp)
-            if num_layers_to_build:
-                mtp_ranks.add(pp_ranks[pp_rank])
-    return list(mtp_ranks)
-
-
-def get_mtp_layer_offset(config: TransformerConfig, vp_stage: Optional[int] = None) -> int:
+def get_mtp_layer_offset(config: TransformerConfig) -> int:
     """Get the offset of the MTP layer."""
-    # TODO(shifangx): Currently, we only support put all of MTP layers
-    # on the last pipeline stage, so the offset is always 0.
-    # We will support more flexible MTP placement in the future.
-    if config.pipeline_model_parallel_size > 1:
-        if config.pipeline_model_parallel_layout:
-            offset = config.pipeline_model_parallel_layout.get_layer_offset(
-                layer_type=LayerType.mtp, vp_stage=vp_stage
-            )
-        else:
-            offset = 0
-    else:
-        offset = 0
-    return offset
+    # Currently, we only support put all of MTP layers on the last pipeline stage.
+    return 0
 
 
 def get_mtp_num_layers_to_build(
     config: TransformerConfig, vp_stage: Optional[int] = None, pp_rank: Optional[int] = None
 ) -> int:
     """Get the number of MTP layers to build."""
-    if config.pipeline_model_parallel_layout is not None:
-        # If we have a custom PP layout, get the number of mtp layers in the layout array.
-        num_layers_to_build = config.pipeline_model_parallel_layout.get_num_layers_to_build(
-            layer_type=LayerType.mtp, vp_stage=vp_stage
-        )
-        assert num_layers_to_build == config.mtp_num_layers or num_layers_to_build == 0, (
-            f"Currently, we only support put all of MTP layers on the last pipeline stage, "
-            f"so the number of MTP layers to build ({num_layers_to_build}) must match "
-            f"mtp_num_layers ({config.mtp_num_layers}) or be 0."
-        )
+    # Currently, we only support put all of MTP layers on the last pipeline stage.
+    vp_size = config.virtual_pipeline_model_parallel_size
+    if pp_rank is None:
+        pp_rank = parallel_state.get_pipeline_model_parallel_rank()
+    is_last_pp_stage = pp_rank == config.pipeline_model_parallel_size - 1
+    if is_vp_last_stage(vp_stage=vp_stage, vp_size=vp_size) and is_last_pp_stage:
+        return config.mtp_num_layers if config.mtp_num_layers else 0
     else:
-        if parallel_state.is_pipeline_last_stage(ignore_virtual=False, vp_stage=vp_stage):
-            num_layers_to_build = config.mtp_num_layers if config.mtp_num_layers else 0
-        else:
-            num_layers_to_build = 0
-    return num_layers_to_build
+        return 0
 
 
 class MTPLossAutoScaler(torch.autograd.Function):
@@ -508,7 +433,7 @@ def __init__(
         super().__init__(config=config)
         self.sequence_parallel = config.sequence_parallel
         self.submodules = submodules
-        self.layer_number = layer_number + get_mtp_layer_offset(self.config, vp_stage)
+        self.layer_number = layer_number
         self.vp_stage = vp_stage
         self.cp_group = pg_collection.cp
 
@@ -550,15 +475,8 @@ def __init__(
             skip_bias_add=False,
             is_expert=False,
         )
-
-        diff_transformer_layer_offset = self.config.num_layers - get_transformer_layer_offset(
-            self.config, vp_stage
-        )
         self.transformer_layer = build_module(
-            self.submodules.transformer_layer,
-            config=self.config,
-            vp_stage=vp_stage,
-            layer_number=self.layer_number + diff_transformer_layer_offset,
+            self.submodules.transformer_layer, config=self.config, vp_stage=vp_stage
         )
 
         self.final_layernorm = build_module(
@@ -982,7 +900,7 @@ def forward(
             (Tensor): The mtp loss tensor of shape [b, s].
         """
         # get hidden states from previous mtp stages
-        offset = get_mtp_layer_offset(self.config, self.vp_stage)
+        offset = get_mtp_layer_offset(self.config)
         hidden_states_list = list(torch.chunk(hidden_states, 1 + offset, dim=0))
         hidden_states = hidden_states_list[offset]
         for layer_number in range(len(self.layers)):
@@ -1029,7 +947,7 @@ def sharded_state_dict(
         sharded_state_dict = super().sharded_state_dict(prefix, sharded_offsets, metadata)
         layer_prefix = f'{prefix}layers.'
         for layer in self.layers:
-            offset = get_mtp_layer_offset(self.config, self.vp_stage)
+            offset = get_mtp_layer_offset(self.config)
             sharded_prefix = f'{layer_prefix}{layer.layer_number - 1 }.'
 
             state_dict_prefix = f'{layer_prefix}{layer.layer_number - 1 - offset}.'
diff --git a/megatron/core/transformer/pipeline_parallel_layer_layout.py b/megatron/core/transformer/pipeline_parallel_layer_layout.py
index ac850a208aa..835d8b5c33a 100644
--- a/megatron/core/transformer/pipeline_parallel_layer_layout.py
+++ b/megatron/core/transformer/pipeline_parallel_layer_layout.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
 
 import copy
 import logging
@@ -124,28 +124,15 @@ def validate_layer_layout(self, num_layers: int, mtp_num_layers: int):
             if LayerType.mtp in self.layout[pp_rank][-1]:
                 assert (
                     self.layout[pp_rank][-1].count(LayerType.mtp) == mtp_num_layers
-                ), "All of the MTP layers must be in the same one virtual pipeline stage"
-        for vpp_rank in range(self.virtual_pipeline_model_parallel_size - 1):
-            assert LayerType.mtp not in self.layout[0][vpp_rank], (
-                f"Corrently we restrict that the MTP should not be in the first pp rank."
-                f"But got {self.layout[0]} for the first pp rank."
-            )
-        ## Detect MTP standalone usage.
-        mtp_standalone = False
-        for pp_rank in range(self.pipeline_model_parallel_size):
-            if (
-                LayerType.mtp in self.layout[pp_rank][-1]
-                and pp_rank != self.pipeline_model_parallel_size - 1
-            ):
-                mtp_standalone = True
-                break
-
+                ), "All of the MTP layers must be in the same stage"
+                assert (
+                    pp_rank == self.pipeline_model_parallel_size - 1
+                    and LayerType.loss in self.layout[pp_rank][-1]
+                ), "MTP layers must be in the last stage together with Loss stage."
         # TODO: remove them in the future once they are supported
         if self.flatten_layout.count(LayerType.encoder) > 0:
             raise NotImplementedError("Encoder layer is not supported for flexible pipeline layout")
 
-        return mtp_standalone
-
     def get_num_layers_to_build(
         self,
         layer_type: LayerType = LayerType.decoder,
diff --git a/megatron/core/transformer/transformer_block.py b/megatron/core/transformer/transformer_block.py
index f189d5fa7a4..aa0b94a7c33 100755
--- a/megatron/core/transformer/transformer_block.py
+++ b/megatron/core/transformer/transformer_block.py
@@ -376,7 +376,7 @@ def build_layer(layer_spec, layer_number):
         # @TODO: add back account_for_embedding_in_pipeline_split (see issue #293)
         # In pipeline parallelism, we want to add this LN only to the last stage of the pipeline
         # self.post_process and self.post_layer_norm guide this behavior
-        if self.has_final_layernorm_in_this_stage():
+        if self.submodules.layer_norm and self.post_process and self.post_layer_norm:
             self.final_layernorm = build_module(
                 self.submodules.layer_norm,
                 config=self.config,
@@ -386,35 +386,6 @@ def build_layer(layer_spec, layer_number):
         else:
             self.final_layernorm = None  # Either this or nn.Identity
 
-    def has_final_layernorm_in_this_stage(self):
-        """
-        Check if this vpp stage contains the final layernorm.
-
-        Note:
-            Final layernorm now has been moved from the post-process stage to the last decoder
-            layer by using this function.
-            There will be a small numeric difference because of grad norm reduction when final
-            layernorm is placed in different pipeline stages in deterministic mode. It can still
-            be bitwise aligned by disabling grad norm clipping.
-        """
-        if self.config.mtp_num_layers is None:
-            # for model without MTPLayer, the final layernorm is set in the stage which does
-            # post_process
-            return self.submodules.layer_norm and self.post_process and self.post_layer_norm
-        else:
-            # for model with MTPLayer, the final layernorm is set in the stage which has the
-            # last layer of the decoder
-            has_final_layernorm_in_this_stage = False
-            for layer in self.layers:
-                if layer.layer_number == self.config.num_layers:
-                    has_final_layernorm_in_this_stage = True
-                    break
-            return (
-                self.submodules.layer_norm
-                and has_final_layernorm_in_this_stage
-                and self.post_layer_norm
-            )
-
     def _get_layer(self, layer_number: int):
         return self.layers[layer_number]
 
diff --git a/megatron/core/transformer/transformer_config.py b/megatron/core/transformer/transformer_config.py
index 6b8209ef6a7..722329b7ee2 100644
--- a/megatron/core/transformer/transformer_config.py
+++ b/megatron/core/transformer/transformer_config.py
@@ -1226,7 +1226,7 @@ def __post_init__(self):
                 self.virtual_pipeline_model_parallel_size = detected_vpp_size
 
             # Check whether the layout is valid.
-            self.mtp_standalone = self.pipeline_model_parallel_layout.validate_layer_layout(
+            self.pipeline_model_parallel_layout.validate_layer_layout(
                 num_layers=self.num_layers, mtp_num_layers=self.mtp_num_layers
             )
 
diff --git a/megatron/training/training.py b/megatron/training/training.py
index d9759b3a364..27a64f10fee 100644
--- a/megatron/training/training.py
+++ b/megatron/training/training.py
@@ -2890,20 +2890,18 @@ def get_train_valid_test_num_samples():
     return (train_samples, eval_samples, test_iters * args.global_batch_size)
 
 
-def build_train_valid_test_datasets(build_train_valid_test_datasets_provider, train_valid_test_num_samples=None, vp_stage=None):
+def build_train_valid_test_datasets(build_train_valid_test_datasets_provider, train_valid_test_num_samples=None):
     """Build pretraining datasets."""
     if train_valid_test_num_samples is None:
         train_valid_test_num_samples = get_train_valid_test_num_samples()
+    print_rank_0(' > datasets target sizes (minimum size):')
     print_rank_0('    train:      {}'.format(train_valid_test_num_samples[0]))
     print_rank_0('    validation: {}'.format(train_valid_test_num_samples[1]))
     print_rank_0('    test:       {}'.format(train_valid_test_num_samples[2]))
-    if vp_stage is not None:
-        return build_train_valid_test_datasets_provider(train_valid_test_num_samples, vp_stage=vp_stage)
-    else:
-        return build_train_valid_test_datasets_provider(train_valid_test_num_samples)
+    return build_train_valid_test_datasets_provider(train_valid_test_num_samples)
 
 
-def build_train_valid_test_data_loaders(build_train_valid_test_datasets_provider, vp_stage=None):
+def build_train_valid_test_data_loaders(build_train_valid_test_datasets_provider):
     """Build pretraining data loaders."""
 
     args = get_args()
@@ -2932,8 +2930,7 @@ def build_train_valid_test_data_loaders(build_train_valid_test_datasets_provider
 
         # Build datasets.
         train_ds, valid_ds, test_ds = build_train_valid_test_datasets(
-            build_train_valid_test_datasets_provider, (1, 1, 1) if getattr(args, 'perform_rl_step', False) else None,
-            vp_stage=vp_stage,
+            build_train_valid_test_datasets_provider, (1, 1, 1) if getattr(args, 'perform_rl_step', False) else None
         )
         valid_ds = [valid_ds] if not isinstance(valid_ds, list) else valid_ds
 
@@ -2975,15 +2972,14 @@ def build_train_valid_test_data_loaders(build_train_valid_test_datasets_provider
     return train_dataloader, valid_dataloaders, test_dataloader
 
 
-def build_train_valid_test_data_iterators(build_train_valid_test_datasets_provider, vp_stage=None):
+def build_train_valid_test_data_iterators(build_train_valid_test_datasets_provider):
     """Build pretraining data iterators."""
 
     args = get_args()
 
     # Build loaders.
     train_dataloader, valid_dataloaders, test_dataloader = build_train_valid_test_data_loaders(
-        build_train_valid_test_datasets_provider,
-        vp_stage=vp_stage
+        build_train_valid_test_datasets_provider
     )
 
     # Build iterators.
diff --git a/megatron/training/utils.py b/megatron/training/utils.py
index 3be7b6c8914..ee46991bce5 100644
--- a/megatron/training/utils.py
+++ b/megatron/training/utils.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
 
 """General utilities."""
 import json
@@ -504,7 +504,7 @@ def get_blend_and_blend_per_split(args):
     return blend, blend_per_split
 
 
-def get_batch_on_this_tp_rank(data_iterator, mtp_on_this_rank: bool = False):
+def get_batch_on_this_tp_rank(data_iterator):
 
     args = get_args()
 
@@ -532,7 +532,7 @@ def _broadcast(item):
             'position_ids': data["position_ids"].cuda(non_blocking=True),
         }
 
-        if args.pipeline_model_parallel_size == 1 or mtp_on_this_rank:
+        if args.pipeline_model_parallel_size == 1:
             _broadcast(batch['tokens'])
             _broadcast(batch['labels'])
             _broadcast(batch['loss_mask'])
@@ -548,6 +548,9 @@ def _broadcast(item):
             # Multi-Token Prediction (MTP) layers need tokens and position_ids to calculate embedding.
             # Currently the Multi-Token Prediction (MTP) layers is fixed on the last stage, so we need
             # to broadcast tokens and position_ids to all of the tensor parallel ranks on the last stage.
+            if args.mtp_num_layers is not None:
+                _broadcast(batch['tokens'])
+                _broadcast(batch['position_ids'])
             _broadcast(batch['labels'])
             _broadcast(batch['loss_mask'])
             _broadcast(batch['attention_mask'])
@@ -583,7 +586,7 @@ def _broadcast(item):
             device=torch.cuda.current_device(),
         )
 
-        if args.pipeline_model_parallel_size == 1 or mtp_on_this_rank:
+        if args.pipeline_model_parallel_size == 1:
             _broadcast(tokens)
             _broadcast(labels)
             _broadcast(loss_mask)
@@ -602,8 +605,12 @@ def _broadcast(item):
             # Multi-Token Prediction (MTP) layers need tokens and position_ids to calculate embedding.
             # Currently the Multi-Token Prediction (MTP) layers is fixed on the last stage, so we need
             # to broadcast tokens and position_ids to all of the tensor parallel ranks on the last stage.
-            tokens = None
-            position_ids = None
+            if args.mtp_num_layers is not None:
+                _broadcast(tokens)
+                _broadcast(position_ids)
+            else:
+                tokens = None
+                position_ids = None
 
             _broadcast(labels)
             _broadcast(loss_mask)
diff --git a/pretrain_gpt.py b/pretrain_gpt.py
index f57dd0c08fa..6316aef03bf 100644
--- a/pretrain_gpt.py
+++ b/pretrain_gpt.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
 
 """Pretrain and SFT GPT."""
 
@@ -15,9 +15,7 @@
 from megatron.core.rerun_state_machine import get_rerun_state_machine
 from megatron.core.utils import get_attr_wrapped_model, StragglerDetector
 from megatron.core.tokenizers.text.utils.build_tokenizer import build_tokenizer
-from megatron.core.transformer.multi_token_prediction import mtp_on_this_rank, get_mtp_ranks
 from megatron.training import get_args, get_timers, get_tokenizer, pretrain, print_rank_0
-from megatron.training.arguments import core_transformer_config_from_args
 from megatron.training.utils import (
     get_batch_on_this_cp_rank,
     get_batch_on_this_tp_rank,
@@ -39,20 +37,14 @@
 stimer = StragglerDetector()
 
 
-def get_batch(data_iterator, vp_stage: Optional[int] = None):
+def get_batch(data_iterator, vp_stage=None):
     """Generate a batch."""
-    args = get_args()
-    config = core_transformer_config_from_args(args)
     # TODO: this is pretty hacky, find a better way
-    if not is_first_or_last_pipeline_stage(vp_stage) and (
-    (not mtp_on_this_rank(config, ignore_virtual=False, vp_stage=vp_stage))):
+    if not is_first_or_last_pipeline_stage(vp_stage):
         return None, None, None, None, None
 
     # get batches based on the TP rank you are on
-    batch = get_batch_on_this_tp_rank(
-        data_iterator,
-        mtp_on_this_rank=mtp_on_this_rank(config, ignore_virtual=False, vp_stage=vp_stage)
-        )
+    batch = get_batch_on_this_tp_rank(data_iterator)
 
     # slice batch along sequence dimension for context parallelism
     batch = get_batch_on_this_cp_rank(batch)
@@ -166,12 +158,7 @@ def forward_step(data_iterator, model: GPTModel, return_schedule_plan: bool = Fa
 
 
 def is_dataset_built_on_rank(vp_stage=None):
-    args = get_args()
-    config = core_transformer_config_from_args(args)
-    return (
-        is_first_or_last_pipeline_stage(vp_stage)
-        or mtp_on_this_rank(config, ignore_virtual=False, vp_stage=vp_stage)
-    ) and parallel_state.get_tensor_model_parallel_rank() == 0
+    return is_first_or_last_pipeline_stage(vp_stage) and parallel_state.get_tensor_model_parallel_rank() == 0
 
 
 def core_gpt_dataset_config_from_args(args):
@@ -227,7 +214,6 @@ def train_valid_test_datasets_provider(train_val_test_num_samples, vp_stage=None
 
     print_rank_0("> building train, validation, and test datasets for GPT ...")
 
-    is_dataset_built = partial(is_dataset_built_on_rank, vp_stage=vp_stage)
     train_ds, valid_ds, test_ds = BlendedMegatronDatasetBuilder(
         dataset_type, train_val_test_num_samples, partial(is_dataset_built_on_rank, vp_stage=vp_stage), config
     ).build()
@@ -237,21 +223,6 @@ def train_valid_test_datasets_provider(train_val_test_num_samples, vp_stage=None
     return train_ds, valid_ds, test_ds
 
 
-def get_embedding_ranks(pp_ranks: List[int]):
-    """Get the embedding ranks."""
-    embedding_ranks = [pp_ranks[0]]
-    if len(pp_ranks) > 1:
-        args = get_args()
-        if not args.untie_embeddings_and_output_weights:
-            embedding_ranks.append(pp_ranks[-1])
-        config = core_transformer_config_from_args(args)
-        mtp_ranks = get_mtp_ranks(pp_ranks, config)
-        embedding_ranks.extend(mtp_ranks)
-    embedding_ranks = list(set(embedding_ranks))
-    embedding_ranks = sorted(embedding_ranks)
-    return embedding_ranks
-
-
 if __name__ == "__main__":
 
     # Temporary for transition to core datasets
@@ -268,5 +239,4 @@ def get_embedding_ranks(pp_ranks: List[int]):
         args_defaults={'tokenizer_type': 'GPT2BPETokenizer'},
         extra_args_provider=add_modelopt_args if has_nvidia_modelopt else None,
         store=store,
-        get_embedding_ranks=get_embedding_ranks,
     )
diff --git a/tests/unit_tests/pipeline_parallel/test_pipeline_layout.py b/tests/unit_tests/pipeline_parallel/test_pipeline_layout.py
index 9b1261312b2..04880fb432c 100644
--- a/tests/unit_tests/pipeline_parallel/test_pipeline_layout.py
+++ b/tests/unit_tests/pipeline_parallel/test_pipeline_layout.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
 
 import os
 from pathlib import Path
@@ -21,7 +21,6 @@
 )
 from megatron.core.tensor_parallel.random import model_parallel_cuda_manual_seed
 from megatron.core.transformer.enums import ModelType
-from megatron.core.transformer.multi_token_prediction import mtp_on_this_rank
 from megatron.core.transformer.transformer_config import TransformerConfig
 from megatron.training.checkpointing import load_checkpoint, save_checkpoint
 from megatron.training.global_vars import set_args
@@ -54,8 +53,6 @@ def initialize_gpt_model(
         virtual_pipeline_model_parallel_size=virtual_pipeline_model_parallel_size,
         hidden_dropout=0.0,
         attention_dropout=0.0,
-        mtp_num_layers=1 if with_mtp else None,
-        mtp_loss_scaling_factor=1.0 if with_mtp else None,
     )
     default_config_kwargs.update(**config_kwargs)
     transformer_config = TransformerConfig(**default_config_kwargs)
@@ -64,6 +61,9 @@ def initialize_gpt_model(
         transformer_config.moe_ffn_hidden_size = 128
         transformer_config.num_moe_experts = 4
         transformer_config.add_bias_linear = False
+    if with_mtp:
+        transformer_config.mtp_num_layers = 1
+        transformer_config.mtp_loss_scaling_factor = 1.0
     model = []
     for i in range(virtual_pipeline_model_parallel_size or 1):
         if is_moe:
@@ -71,11 +71,8 @@ def initialize_gpt_model(
         else:
             layer_spec = layer_spec_fn()
 
-        if with_mtp and mtp_on_this_rank(transformer_config, ignore_virtual=False, vp_stage=i):
-            if is_moe:
-                transformer_layer_spec_for_mtp = gpt_te_spec(transformer_config)
-            else:
-                transformer_layer_spec_for_mtp = layer_spec
+        if is_moe and with_mtp and mpu.is_pipeline_last_stage(ignore_virtual=False, vp_stage=i):
+            transformer_layer_spec_for_mtp = gpt_te_spec(transformer_config)
             mtp_block_spec = get_gpt_mtp_block_spec(
                 transformer_config,
                 transformer_layer_spec_for_mtp,
@@ -84,10 +81,6 @@ def initialize_gpt_model(
             )
         else:
             mtp_block_spec = None
-
-        # print("========================")
-        # print("[DEBUG] mtp_block_spec is ", mtp_block_spec)
-        # exit()
         pre_process = mpu.is_pipeline_first_stage(ignore_virtual=False, vp_stage=i)
         post_process = mpu.is_pipeline_last_stage(ignore_virtual=False, vp_stage=i)
         this_model = (
@@ -170,7 +163,7 @@ def create_args():
                 [],
                 ["decoder"],
                 ["decoder"],
-                ["decoder"] * 2 + ["mtp"] + ["loss"],
+                ["decoder"] * 2 + ["loss"],
             ],
             False,
             True,
@@ -192,19 +185,7 @@ def create_args():
             False,
         ),
         ((1, 2, None), [["embedding"] + ["decoder"] * 4, ["decoder"] * 4 + ["loss"]], True, False),
-        ((1, 4, 2), "E|t*3|(t|)*5mL", True, True),  # mtp in the last stage
-        (
-            (1, 4, 2),
-            "E|t*3|(t|)*4tm|L",
-            True,
-            True,
-        ),  # mtp in the second last stage with a decoder layer
-        (
-            (1, 4, 2),
-            "E|t*3|(t|)*3tt|m|L",
-            True,
-            True,
-        ),  # mtp in the second last stage with no other layers
+        ((1, 4, 2), "E|t*3|(t|)*5L", True, True),
     ],
 )
 def test_forward_vpp(create_args, tmp_path_dist_ckpt, tp_pp_vpp, pp_layout, is_moe, with_mtp):

From 35eeab7d4425aa30db3406692360cc99b08db2e4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Mon, 3 Nov 2025 17:23:49 +0000
Subject: [PATCH 099/334] Reapply "[Dev] feat(moe): Support placing MTP layers
 into standalone stages (#1916)"

This reverts commit ad226e42140ee2ee5cfbcdef39be4cbcdc140f6f.
---
 gpt_builders.py                               |  26 ++--
 .../blended_megatron_dataset_builder.py       |   4 +-
 .../core/distributed/finalize_model_grads.py  |  12 +-
 megatron/core/model_parallel_config.py        |   6 +-
 .../common/language_module/language_module.py |   7 +-
 .../common/model_chunk_schedule_plan.py       |  44 ++++---
 .../core/models/gpt/fine_grained_callables.py |  36 ++----
 megatron/core/models/gpt/gpt_layer_specs.py   |  28 +++-
 megatron/core/models/gpt/gpt_model.py         |  24 +---
 .../pipeline_parallel/p2p_communication.py    |  18 +--
 .../transformer/multi_token_prediction.py     | 122 +++++++++++++++---
 .../pipeline_parallel_layer_layout.py         |  25 +++-
 .../core/transformer/transformer_block.py     |  31 ++++-
 .../core/transformer/transformer_config.py    |   2 +-
 megatron/training/training.py                 |  18 ++-
 megatron/training/utils.py                    |  19 +--
 pretrain_gpt.py                               |  40 +++++-
 .../pipeline_parallel/test_pipeline_layout.py |  35 +++--
 18 files changed, 342 insertions(+), 155 deletions(-)

diff --git a/gpt_builders.py b/gpt_builders.py
index 591f74bb20c..4fe832028be 100644
--- a/gpt_builders.py
+++ b/gpt_builders.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 from megatron.core.models.gpt import GPTModel
 from megatron.core.models.gpt.gpt_layer_specs import (
@@ -6,6 +6,7 @@
     get_gpt_layer_local_spec,
     get_gpt_layer_with_transformer_engine_spec,
     get_gpt_mtp_block_spec,
+    get_gpt_decoder_layer_specs,
 )
 from megatron.core.models.gpt.heterogeneous.heterogeneous_layer_specs import (
     get_gpt_heterogeneous_layer_spec,
@@ -57,18 +58,19 @@ def gpt_builder(args, pre_process, post_process, vp_stage=None, config=None):
                 transformer_layer_spec = _get_transformer_layer_spec(use_te, config)
         mtp_block_spec = None
         if args.mtp_num_layers is not None:
-            if (
-                hasattr(transformer_layer_spec, 'layer_specs')
-                and len(transformer_layer_spec.layer_specs) == 0
-            ):
-                # Get the decoder layer spec explicitly if no decoder layer in the last stage,
-                # Only happens with block spec (TransformerBlockSubmodules) when using MoE.
-                transformer_layer_spec_for_mtp = _get_transformer_layer_spec(use_te, config)
+            # Get GPT decoder layer specs for the model.
+            if args.spec is not None:
+                mtp_transformer_layer_spec = import_module(args.spec)
             else:
-                transformer_layer_spec_for_mtp = transformer_layer_spec
+                # Define the decoder block spec
+                decoder_layer_specs = get_gpt_decoder_layer_specs(
+                    config, use_transformer_engine=use_te, normalization=args.normalization, qk_l2_norm=args.qk_l2_norm, vp_stage=vp_stage
+                )
+                mtp_transformer_layer_spec = decoder_layer_specs[-1]
+            # Use spec of the last layer in decoder block as spec of the transformer layer in MTP
             mtp_block_spec = get_gpt_mtp_block_spec(
                 config,
-                transformer_layer_spec_for_mtp,
+                mtp_transformer_layer_spec,
                 use_transformer_engine=use_te,
                 vp_stage=vp_stage,
             )
@@ -96,12 +98,12 @@ def gpt_builder(args, pre_process, post_process, vp_stage=None, config=None):
 
 def _get_transformer_layer_spec(use_te, config):
     """Get transformer layer specification based on configuration.
-
+    
     Args:
         use_te (bool): Whether to use Transformer Engine
         args: Training arguments
         config: Model configuration
-
+        
     Returns:
         transformer_layer_spec: The transformer layer specification
     """
diff --git a/megatron/core/datasets/blended_megatron_dataset_builder.py b/megatron/core/datasets/blended_megatron_dataset_builder.py
index b1a000df167..3a663843f9b 100644
--- a/megatron/core/datasets/blended_megatron_dataset_builder.py
+++ b/megatron/core/datasets/blended_megatron_dataset_builder.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 import logging
 import math
@@ -47,11 +47,13 @@ def __init__(
         sizes: List[int],
         is_built_on_rank: Callable,
         config: BlendedMegatronDatasetConfig,
+        vp_stage: Optional[int] = None,
     ):
         self.cls = cls
         self.sizes = sizes
         self.is_built_on_rank = is_built_on_rank
         self.config = config
+        self.vp_stage = vp_stage
 
         log_single_rank(
             logger,
diff --git a/megatron/core/distributed/finalize_model_grads.py b/megatron/core/distributed/finalize_model_grads.py
index 55663acdc10..ddaeb7e8d84 100644
--- a/megatron/core/distributed/finalize_model_grads.py
+++ b/megatron/core/distributed/finalize_model_grads.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 from functools import partial
 from typing import Callable, List, Optional, Union
@@ -193,7 +193,11 @@ def _allreduce_word_embedding_grads(
             pp_group = parallel_state.get_pipeline_model_parallel_group()
 
     _allreduce_embedding_grad(
-        model, embd_group, pp_group, partial(_get_shared_word_embedding_weight, config=config)
+        model,
+        embd_group,
+        pp_group,
+        partial(_get_shared_word_embedding_weight, config=config),
+        config=config,
     )
 
 
@@ -203,6 +207,7 @@ def _allreduce_embedding_grad(
     pp_group: torch.distributed.ProcessGroup,
     weight_getter: Callable[[torch.nn.Module], Optional[torch.nn.Parameter]],
     skip_if_none: bool = True,
+    config: TransformerConfig = None,
 ):
     """Unified helper to all-reduce embedding parameters across pipeline stages.
 
@@ -229,6 +234,9 @@ def _allreduce_embedding_grad(
             model_module = model[0]
         elif is_pp_last_stage(pp_group):
             model_module = model[-1]
+        elif getattr(config, 'mtp_num_layers', None) is not None and config.mtp_num_layers > 0:
+            # Embedding for MTP layers is in the last virtual pipeline model parallel stage.
+            model_module = model[-1]
         else:  # We do not support an interleaved schedule for models with encoders yet.
             model_module = model[0]
 
diff --git a/megatron/core/model_parallel_config.py b/megatron/core/model_parallel_config.py
index 2758c9c0747..0d92e81c5bb 100644
--- a/megatron/core/model_parallel_config.py
+++ b/megatron/core/model_parallel_config.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 from dataclasses import dataclass
 from typing import Callable, ContextManager, Optional
@@ -315,6 +315,10 @@ class ModelParallelConfig:
        rank 1 |   0 1 2 0 1 2 3 4 3 4
     """
 
+    mtp_standalone: bool = False
+    """This will be set automatically according to the pipeline layout, 
+    and will be set to True if MTP is in a separate vpp stage."""
+
     ###################
     # CPU Offloading
     ###################
diff --git a/megatron/core/models/common/language_module/language_module.py b/megatron/core/models/common/language_module/language_module.py
index 8f90fb3ba47..0badbce1bfd 100644
--- a/megatron/core/models/common/language_module/language_module.py
+++ b/megatron/core/models/common/language_module/language_module.py
@@ -62,6 +62,8 @@ def _is_in_embd_group(self):
         if torch.distributed.get_rank() in torch.distributed.get_process_group_ranks(
             self.embd_group
         ):
+            if getattr(self, 'mtp_process', False):
+                return True
             if (
                 torch.distributed.get_rank()
                 == torch.distributed.get_process_group_ranks(self.embd_group)[0]
@@ -201,7 +203,10 @@ def setup_embeddings_and_output_layer(self) -> None:
         ):
             self.shared_embedding_or_output_weight().shared_embedding = True
 
-        if (self.post_process or getattr(self, 'mtp_process', False)) and not self.pre_process:
+        if (
+            (self.post_process and self.share_embeddings_and_output_weights)
+            or getattr(self, 'mtp_process', False)
+        ) and not self.pre_process:
             assert not (
                 is_vp_first_stage(self.vp_stage, self.vp_size) and is_pp_first_stage(self.pp_group)
             )
diff --git a/megatron/core/models/common/model_chunk_schedule_plan.py b/megatron/core/models/common/model_chunk_schedule_plan.py
index 74b9a90764d..401d9a81a97 100644
--- a/megatron/core/models/common/model_chunk_schedule_plan.py
+++ b/megatron/core/models/common/model_chunk_schedule_plan.py
@@ -17,7 +17,6 @@
     get_comm_stream,
     get_comp_stream,
 )
-from megatron.core.transformer.multi_token_prediction import get_mtp_num_layers_to_build
 
 
 class ModelChunkState:
@@ -319,37 +318,40 @@ def __init__(
         self._model_chunk_state.context_mask = None
         self._model_chunk_state.attention_bias = None
 
-        transformer_num_layers = model.decoder.num_layers_per_pipeline_rank
-        mtp_num_layers = get_mtp_num_layers_to_build(model.config, vp_stage=self.vp_stage)
-
         # build preprocess
         self.pre_process = PreProcessNode(model, self._model_chunk_state, self._event, comp_stream)
-        # build layer schedule plan for each layer
-        for layer_idx in range(transformer_num_layers):
-            layer = model.decoder._get_layer(layer_idx)
-            layer_plan = TransformerLayerSchedulePlan(
-                layer, self._event, self._model_chunk_state, comp_stream, comm_stream
+
+        # build layer schedule plan for each layer.
+        # The methods to obtain layers are different for MTP so we need the other build plan for
+        # MTP. Also, this can help annotate MTP layer so that it can know where MTP is.
+        self._build_layer_schedule_plan(model.decoder, comp_stream, comm_stream)
+        self._build_layer_schedule_plan(getattr(model, "mtp", None), comp_stream, comm_stream)
+
+        # build post process
+        if model.post_process:
+            self.post_process = PostProcessNode(
+                model, self._model_chunk_state, self._event, comp_stream
             )
-            self._transformer_layers.append(layer_plan)
 
-        # build mtp layers
-        for layer_idx in range(mtp_num_layers):
+    def _build_layer_schedule_plan(self, module, comp_stream, comm_stream):
+        if module is None:
+            return
+        num_layers = len(module.layers)
+        for layer_idx in range(num_layers):
             extra_args = {
                 "is_first_layer": layer_idx == 0,
-                "is_last_layer": layer_idx == mtp_num_layers - 1,
+                "is_last_layer": layer_idx == num_layers - 1,
             }
-            layer = model.mtp.layers[layer_idx]
             layer_plan = TransformerLayerSchedulePlan(
-                layer, self.event, self.state, comp_stream, comm_stream, extra_args
+                module.layers[layer_idx],
+                self.event,
+                self.state,
+                comp_stream,
+                comm_stream,
+                extra_args,
             )
             self._transformer_layers.append(layer_plan)
 
-        # build post process
-        if model.post_process:
-            self.post_process = PostProcessNode(
-                model, self._model_chunk_state, self._event, comp_stream
-            )
-
     @property
     def event(self):
         """Gets the CUDA event for synchronization."""
diff --git a/megatron/core/models/gpt/fine_grained_callables.py b/megatron/core/models/gpt/fine_grained_callables.py
index f3fd63a5a32..952b83f95fb 100644
--- a/megatron/core/models/gpt/fine_grained_callables.py
+++ b/megatron/core/models/gpt/fine_grained_callables.py
@@ -158,26 +158,19 @@ def forward_impl(self, hidden_states):
         """Implements the forward pass for postprocessing.
 
         This method handles:
-        1. Final layer normalization
-        2. Output layer computation
-        3. Loss computation if labels are provided
+        1. Output layer computation
+        2. Loss computation if labels are provided
 
         Args:
             hidden_states: The hidden states from the transformer layers.
 
         Returns:
             The logits or loss depending on whether labels are provided.
-        """
-        # Final layer norm from Decoder
-        if self.gpt_model.decoder.final_layernorm and not self.gpt_model.mtp_process:
-            hidden_states = self.gpt_model.decoder.final_layernorm(hidden_states)
-            # TENorm produces a "viewed" tensor. This will result in schedule.py's
-            # deallocate_output_tensor() throwing an error, so a viewless tensor is
-            # created to prevent this.
-            hidden_states = make_viewless_tensor(
-                inp=hidden_states, requires_grad=True, keep_graph=True
-            )
 
+        Note:
+            Final layernorm now has been moved from the post-process stage to the
+            last decoder layer, so we don't need to run the final layer norm here.
+        """
         # Run GPTModel._postprocess
         loss = self.gpt_model._postprocess(
             hidden_states=hidden_states,
@@ -251,6 +244,7 @@ def __init__(
         self.submodule = submodule
         self.detached = tuple()
         self.before_detached = tuple()
+        self.is_mtp = extra_args.get("is_mtp", False)
 
         # Create flags to indicate first and last layer
         self.is_first_layer = extra_args.get("is_first_layer", False)
@@ -460,6 +454,12 @@ def submodule_combine_forward(
 
         # release tensor reference after use
         node.layer_state.residual = None
+
+        # final layer norm from decoder
+        final_layernorm = node.chunk_state.model.decoder.final_layernorm
+        if not node.is_mtp and final_layernorm and node.is_last_layer:
+            output = final_layernorm(output)
+            output = make_viewless_tensor(inp=output, requires_grad=True, keep_graph=True)
         return output
 
     def mlp_wrapper(node: ScheduleNode, *args, **kwargs):
@@ -499,15 +499,7 @@ def build_mtp_layer_callables(layer):
     def submodule_mtp_attn_forward(node, hidden_states):
         # MTP Block Preprocess
         if node.is_first_layer:
-            # Final layer norm from Decoder
-            final_layernorm = node.chunk_state.model.decoder.final_layernorm
-            if final_layernorm:
-                hidden_states = final_layernorm(hidden_states)
-                hidden_states = make_viewless_tensor(
-                    inp=hidden_states, requires_grad=True, keep_graph=True
-                )
-                hidden_states = node.detach(hidden_states)
-            offset = get_mtp_layer_offset(layer.config)
+            offset = get_mtp_layer_offset(layer.config, node.chunk_state.model.vp_stage)
             node.chunk_state.mtp_hidden_states = list(torch.chunk(hidden_states, 1 + offset, dim=0))
             hidden_states = node.chunk_state.mtp_hidden_states[offset]
 
diff --git a/megatron/core/models/gpt/gpt_layer_specs.py b/megatron/core/models/gpt/gpt_layer_specs.py
index e3ef7f20141..196c21ebe44 100755
--- a/megatron/core/models/gpt/gpt_layer_specs.py
+++ b/megatron/core/models/gpt/gpt_layer_specs.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 import warnings
 from typing import Optional, Union
@@ -477,7 +477,7 @@ def get_mlp_module_spec_for_backend(
         )
 
 
-def get_gpt_decoder_block_spec(
+def get_gpt_decoder_layer_specs(
     config: TransformerConfig,
     use_transformer_engine: bool,
     normalization: Optional[str] = None,
@@ -600,6 +600,21 @@ def get_gpt_decoder_block_spec(
             raise ValueError(f"Invalid layer spec key: {layer_spec_key}")
         layer_specs.append(layer_spec_dict[layer_spec_key])
 
+    return layer_specs
+
+
+def get_gpt_decoder_block_spec(
+    config: TransformerConfig,
+    use_transformer_engine: bool,
+    normalization: Optional[str] = None,
+    qk_l2_norm: Optional[bool] = False,
+    vp_stage: Optional[int] = None,
+    pp_rank: Optional[int] = None,
+) -> TransformerBlockSubmodules:
+    """GPT block spec."""
+    layer_specs = get_gpt_decoder_layer_specs(
+        config, use_transformer_engine, normalization, qk_l2_norm
+    )
     # Slice the layer specs to only include the layers that are built in this pipeline stage.
     # Note: MCore layer_number starts at 1
     num_layers_to_build = get_num_layers_to_build(config, vp_stage=vp_stage, pp_rank=pp_rank)
@@ -615,6 +630,10 @@ def get_gpt_decoder_block_spec(
         offset = get_transformer_layer_offset(config, vp_stage=vp_stage, pp_rank=pp_rank)
         local_layer_specs = layer_specs[offset : offset + num_layers_to_build]
 
+    if use_transformer_engine:
+        layer_norm_impl = TENorm
+    else:
+        layer_norm_impl = LNImpl
     # Block spec.
     block_spec = TransformerBlockSubmodules(
         layer_specs=local_layer_specs, layer_norm=layer_norm_impl
@@ -674,13 +693,10 @@ def get_gpt_mtp_block_spec_for_backend(
     mtp_num_layers = config.mtp_num_layers if config.mtp_num_layers else 0
     mtp_layer_specs = [mtp_layer_spec] * mtp_num_layers
 
-    offset = get_mtp_layer_offset(config)
+    offset = get_mtp_layer_offset(config, vp_stage=vp_stage)
     # split the mtp layer specs to only include the layers that are built in this pipeline stage.
     mtp_layer_specs = mtp_layer_specs[offset : offset + num_layers_to_build]
     if len(mtp_layer_specs) > 0:
-        assert (
-            len(mtp_layer_specs) == config.mtp_num_layers
-        ), +f"currently all of the mtp layers must stage in the same pipeline stage."
         mtp_block_spec = MultiTokenPredictionBlockSubmodules(layer_specs=mtp_layer_specs)
     else:
         mtp_block_spec = None
diff --git a/megatron/core/models/gpt/gpt_model.py b/megatron/core/models/gpt/gpt_model.py
index 98294d84630..cd13a598d9e 100644
--- a/megatron/core/models/gpt/gpt_model.py
+++ b/megatron/core/models/gpt/gpt_model.py
@@ -30,7 +30,6 @@
     MTPLossLoggingHelper,
     MultiTokenPredictionBlock,
     roll_tensor,
-    tie_output_layer_state_dict,
     tie_word_embeddings_state_dict,
 )
 from megatron.core.transformer.spec_utils import ModuleSpec
@@ -250,7 +249,7 @@ def __init__(
                 tp_group=self.pg_collection.tp,
             )
 
-        if self.pre_process or self.post_process:
+        if self.pre_process or self.post_process or self.mtp_process:
             self.setup_embeddings_and_output_layer()
 
         if has_config_logger_enabled(self.config):
@@ -543,7 +542,6 @@ def _postprocess(
         output_weight = None
         if self.share_embeddings_and_output_weights:
             output_weight = self.shared_embedding_or_output_weight()
-
         if mtp_in_postprocess:
             hidden_states = self.mtp(
                 input_ids=input_ids,
@@ -563,7 +561,7 @@ def _postprocess(
         if not self.post_process:
             return hidden_states
 
-        if self.mtp_process:
+        if self.config.mtp_num_layers is not None:
             mtp_labels = labels.clone()
             hidden_states_list = torch.chunk(hidden_states, 1 + self.config.mtp_num_layers, dim=0)
             hidden_states = hidden_states_list[0]
@@ -605,6 +603,7 @@ def _postprocess(
                         hidden_states, mtp_loss_scale * mtp_loss / num_tokens
                     )
         sequence_parallel_override = False
+
         if in_inference_mode and inference_context.materialize_only_last_token_logits:
             if inference_context.is_static_batching():
                 hidden_states = hidden_states[-1:, :, :]
@@ -769,27 +768,14 @@ def sharded_state_dict(
             output_extra_state and output_extra_state.data
         ), f'Expected output layer extra state to be empty, got: {output_extra_state}'
 
-        # Multi-Token Prediction (MTP) need both embedding layer and output layer in
-        # mtp process stage.
+        # Multi-Token Prediction (MTP) need embedding layer in mtp process stage.
         # If MTP is not placed in the pre processing stage, we need to maintain a copy of
         # embedding layer in the mtp process stage and tie it to the embedding in the pre
         # processing stage.
-        # Also, if MTP is not placed in the post processing stage, we need to maintain a copy
-        # of output layer in the mtp process stage and tie it to the output layer in the post
-        # processing stage.
+        # Now MTP loss is computed in post processing stage, so the output_layer is not needed.
         if self.mtp_process and not self.pre_process:
             emb_weight_key = f'{prefix}embedding.word_embeddings.weight'
             emb_weight = self.embedding.word_embeddings.weight
             tie_word_embeddings_state_dict(sharded_state_dict, emb_weight, emb_weight_key)
-        if self.mtp_process and not self.post_process:
-            # We only need to tie the output layer weight if share_embeddings_and_output_weights
-            # is False. Because if share_embeddings_and_output_weights is True, the shared weight
-            # will be stored in embedding layer, and output layer will not have any weight.
-            if not self.share_embeddings_and_output_weights:
-                output_layer_weight_key = f'{prefix}output_layer.weight'
-                output_layer_weight = self.output_layer.weight
-                tie_output_layer_state_dict(
-                    sharded_state_dict, output_layer_weight, output_layer_weight_key
-                )
 
         return sharded_state_dict
diff --git a/megatron/core/pipeline_parallel/p2p_communication.py b/megatron/core/pipeline_parallel/p2p_communication.py
index 63ee9d1f537..ac839c21f18 100644
--- a/megatron/core/pipeline_parallel/p2p_communication.py
+++ b/megatron/core/pipeline_parallel/p2p_communication.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 
 from typing import List, Optional, Tuple, Union
@@ -214,22 +214,22 @@ def _communicate_shapes(self, tensor_send_next, tensor_send_prev, recv_prev, rec
             ops = []
             if send_prev_shape_tensor is not None:
                 send_prev_op = torch.distributed.P2POp(
-                    torch.distributed.isend, send_prev_shape_tensor, self.prev_rank
+                    torch.distributed.isend, send_prev_shape_tensor, self.prev_rank, self.pp_group
                 )
                 ops.append(send_prev_op)
             if recv_prev_shape_tensor is not None:
                 recv_prev_op = torch.distributed.P2POp(
-                    torch.distributed.irecv, recv_prev_shape_tensor, self.prev_rank
+                    torch.distributed.irecv, recv_prev_shape_tensor, self.prev_rank, self.pp_group
                 )
                 ops.append(recv_prev_op)
             if send_next_shape_tensor is not None:
                 send_next_op = torch.distributed.P2POp(
-                    torch.distributed.isend, send_next_shape_tensor, self.next_rank
+                    torch.distributed.isend, send_next_shape_tensor, self.next_rank, self.pp_group
                 )
                 ops.append(send_next_op)
             if recv_next_shape_tensor is not None:
                 recv_next_op = torch.distributed.P2POp(
-                    torch.distributed.irecv, recv_next_shape_tensor, self.next_rank
+                    torch.distributed.irecv, recv_next_shape_tensor, self.next_rank, self.pp_group
                 )
                 ops.append(recv_next_op)
             if len(ops) > 0:
@@ -298,13 +298,13 @@ def _communicate(
         tensor_recv_prev_func = None
         tensor_recv_next_func = None
 
-        if not config.variable_seq_lengths:
-            recv_prev_shape = tensor_shape
-            recv_next_shape = tensor_shape
-        else:
+        if config.variable_seq_lengths or config.mtp_standalone:
             recv_prev_shape, recv_next_shape = self._communicate_shapes(
                 tensor_send_next, tensor_send_prev, recv_prev, recv_next
             )
+        else:
+            recv_prev_shape = tensor_shape
+            recv_next_shape = tensor_shape
 
         def create_tensor_recv_prev():
             return torch.empty(
diff --git a/megatron/core/transformer/multi_token_prediction.py b/megatron/core/transformer/multi_token_prediction.py
index a619b9ffa55..945682741d4 100755
--- a/megatron/core/transformer/multi_token_prediction.py
+++ b/megatron/core/transformer/multi_token_prediction.py
@@ -13,20 +13,17 @@
 from megatron.core.fp8_utils import get_fp8_context
 from megatron.core.models.backends import BackendSpecProvider, LocalSpecProvider
 from megatron.core.packed_seq_params import PackedSeqParams
-from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
-    fine_grained_offloading_set_last_layer,
-)
-from megatron.core.pipeline_parallel.utils import is_vp_last_stage
 from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.tensor_parallel import (
     gather_from_tensor_model_parallel_region,
     scatter_to_sequence_parallel_region,
 )
-from megatron.core.transformer.enums import AttnMaskType
+from megatron.core.transformer.enums import AttnMaskType, LayerType
 from megatron.core.transformer.module import MegatronModule
 from megatron.core.transformer.spec_utils import ModuleSpec, build_module
 from megatron.core.transformer.transformer_block import TransformerBlockSubmodules
 from megatron.core.transformer.transformer_config import TransformerConfig
+from megatron.core.transformer.transformer_layer import get_transformer_layer_offset
 from megatron.core.utils import (
     is_torch_min_version,
     make_tp_sharded_tensor_for_checkpoint,
@@ -335,25 +332,103 @@ def get_mtp_layer_spec_for_backend(
     return mtp_layer_spec
 
 
-def get_mtp_layer_offset(config: TransformerConfig) -> int:
+def mtp_on_this_rank(
+    config: TransformerConfig, ignore_virtual: Optional[bool] = True, vp_stage: Optional[int] = None
+) -> bool:
+    """
+    Check if there is MTP on the current rank.
+
+    Behavior:
+        - If a custom pipeline model parallel layout is provided in the config:
+            - If virtual pipeline parallelism is enabled (and `ignore_virtual` is False), checks
+              whether any MTP layers are present on this (pp_rank, vp_stage) pair.
+            - Otherwise, checks all virtual pipeline ranks of the current pipeline rank. Returns
+              True if any virtual sub-rank includes at least one MTP layer.
+        - If no custom layout is provided, assumes all MTP layers (if any) are placed on the last
+          pipeline stage. The function returns True only on the last pipeline stage.
+    """
+    mtp_on_this_rank = False
+    pp_rank = parallel_state.get_pipeline_model_parallel_rank()
+    if config.pipeline_model_parallel_layout is not None:
+        # with custom PP layout, we support put MTP layers on any pipeline stage
+        layout = config.pipeline_model_parallel_layout.layout
+        if (
+            not ignore_virtual
+            and parallel_state.get_virtual_pipeline_model_parallel_world_size() is not None
+        ):
+            assert vp_stage is not None, "vp_stage must be passed if virtual pipeline is enabled"
+            num_layers_to_build = layout[pp_rank][vp_stage].count(LayerType.mtp)
+            mtp_on_this_rank = num_layers_to_build > 0
+        else:
+            for vpp_rank in range(len(layout[pp_rank])):
+                num_layers_to_build = layout[pp_rank][vpp_rank].count(LayerType.mtp)
+                if num_layers_to_build > 0:
+                    mtp_on_this_rank = True
+                    break
+    else:
+        # without custom PP layout, we only support put all of MTP layers on the last pipeline stage
+        if config.mtp_num_layers is not None:
+            mtp_on_this_rank = parallel_state.is_pipeline_last_stage(
+                ignore_virtual=ignore_virtual, vp_stage=vp_stage
+            )
+        else:
+            mtp_on_this_rank = False
+    return mtp_on_this_rank
+
+
+def get_mtp_ranks(pp_ranks: List[int], config: TransformerConfig) -> List[int]:
+    """Get the ranks of the MTP layers."""
+    mtp_ranks = set()
+    if config.mtp_num_layers is None:
+        return []
+    if config.pipeline_model_parallel_layout is None:
+        return [pp_ranks[-1]]
+    layout = config.pipeline_model_parallel_layout.layout
+    for pp_rank in range(len(layout)):
+        for vpp_rank in range(len(layout[pp_rank])):
+            num_layers_to_build = layout[pp_rank][vpp_rank].count(LayerType.mtp)
+            if num_layers_to_build:
+                mtp_ranks.add(pp_ranks[pp_rank])
+    return list(mtp_ranks)
+
+
+def get_mtp_layer_offset(config: TransformerConfig, vp_stage: Optional[int] = None) -> int:
     """Get the offset of the MTP layer."""
-    # Currently, we only support put all of MTP layers on the last pipeline stage.
-    return 0
+    # TODO(shifangx): Currently, we only support put all of MTP layers
+    # on the last pipeline stage, so the offset is always 0.
+    # We will support more flexible MTP placement in the future.
+    if config.pipeline_model_parallel_size > 1:
+        if config.pipeline_model_parallel_layout:
+            offset = config.pipeline_model_parallel_layout.get_layer_offset(
+                layer_type=LayerType.mtp, vp_stage=vp_stage
+            )
+        else:
+            offset = 0
+    else:
+        offset = 0
+    return offset
 
 
 def get_mtp_num_layers_to_build(
     config: TransformerConfig, vp_stage: Optional[int] = None, pp_rank: Optional[int] = None
 ) -> int:
     """Get the number of MTP layers to build."""
-    # Currently, we only support put all of MTP layers on the last pipeline stage.
-    vp_size = config.virtual_pipeline_model_parallel_size
-    if pp_rank is None:
-        pp_rank = parallel_state.get_pipeline_model_parallel_rank()
-    is_last_pp_stage = pp_rank == config.pipeline_model_parallel_size - 1
-    if is_vp_last_stage(vp_stage=vp_stage, vp_size=vp_size) and is_last_pp_stage:
-        return config.mtp_num_layers if config.mtp_num_layers else 0
+    if config.pipeline_model_parallel_layout is not None:
+        # If we have a custom PP layout, get the number of mtp layers in the layout array.
+        num_layers_to_build = config.pipeline_model_parallel_layout.get_num_layers_to_build(
+            layer_type=LayerType.mtp, vp_stage=vp_stage
+        )
+        assert num_layers_to_build == config.mtp_num_layers or num_layers_to_build == 0, (
+            f"Currently, we only support put all of MTP layers on the last pipeline stage, "
+            f"so the number of MTP layers to build ({num_layers_to_build}) must match "
+            f"mtp_num_layers ({config.mtp_num_layers}) or be 0."
+        )
     else:
-        return 0
+        if parallel_state.is_pipeline_last_stage(ignore_virtual=False, vp_stage=vp_stage):
+            num_layers_to_build = config.mtp_num_layers if config.mtp_num_layers else 0
+        else:
+            num_layers_to_build = 0
+    return num_layers_to_build
 
 
 class MTPLossAutoScaler(torch.autograd.Function):
@@ -433,7 +508,7 @@ def __init__(
         super().__init__(config=config)
         self.sequence_parallel = config.sequence_parallel
         self.submodules = submodules
-        self.layer_number = layer_number
+        self.layer_number = layer_number + get_mtp_layer_offset(self.config, vp_stage)
         self.vp_stage = vp_stage
         self.cp_group = pg_collection.cp
 
@@ -475,8 +550,15 @@ def __init__(
             skip_bias_add=False,
             is_expert=False,
         )
+
+        diff_transformer_layer_offset = self.config.num_layers - get_transformer_layer_offset(
+            self.config, vp_stage
+        )
         self.transformer_layer = build_module(
-            self.submodules.transformer_layer, config=self.config, vp_stage=vp_stage
+            self.submodules.transformer_layer,
+            config=self.config,
+            vp_stage=vp_stage,
+            layer_number=self.layer_number + diff_transformer_layer_offset,
         )
 
         self.final_layernorm = build_module(
@@ -900,7 +982,7 @@ def forward(
             (Tensor): The mtp loss tensor of shape [b, s].
         """
         # get hidden states from previous mtp stages
-        offset = get_mtp_layer_offset(self.config)
+        offset = get_mtp_layer_offset(self.config, self.vp_stage)
         hidden_states_list = list(torch.chunk(hidden_states, 1 + offset, dim=0))
         hidden_states = hidden_states_list[offset]
         for layer_number in range(len(self.layers)):
@@ -947,7 +1029,7 @@ def sharded_state_dict(
         sharded_state_dict = super().sharded_state_dict(prefix, sharded_offsets, metadata)
         layer_prefix = f'{prefix}layers.'
         for layer in self.layers:
-            offset = get_mtp_layer_offset(self.config)
+            offset = get_mtp_layer_offset(self.config, self.vp_stage)
             sharded_prefix = f'{layer_prefix}{layer.layer_number - 1 }.'
 
             state_dict_prefix = f'{layer_prefix}{layer.layer_number - 1 - offset}.'
diff --git a/megatron/core/transformer/pipeline_parallel_layer_layout.py b/megatron/core/transformer/pipeline_parallel_layer_layout.py
index 835d8b5c33a..ac850a208aa 100644
--- a/megatron/core/transformer/pipeline_parallel_layer_layout.py
+++ b/megatron/core/transformer/pipeline_parallel_layer_layout.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 import copy
 import logging
@@ -124,15 +124,28 @@ def validate_layer_layout(self, num_layers: int, mtp_num_layers: int):
             if LayerType.mtp in self.layout[pp_rank][-1]:
                 assert (
                     self.layout[pp_rank][-1].count(LayerType.mtp) == mtp_num_layers
-                ), "All of the MTP layers must be in the same stage"
-                assert (
-                    pp_rank == self.pipeline_model_parallel_size - 1
-                    and LayerType.loss in self.layout[pp_rank][-1]
-                ), "MTP layers must be in the last stage together with Loss stage."
+                ), "All of the MTP layers must be in the same one virtual pipeline stage"
+        for vpp_rank in range(self.virtual_pipeline_model_parallel_size - 1):
+            assert LayerType.mtp not in self.layout[0][vpp_rank], (
+                f"Corrently we restrict that the MTP should not be in the first pp rank."
+                f"But got {self.layout[0]} for the first pp rank."
+            )
+        ## Detect MTP standalone usage.
+        mtp_standalone = False
+        for pp_rank in range(self.pipeline_model_parallel_size):
+            if (
+                LayerType.mtp in self.layout[pp_rank][-1]
+                and pp_rank != self.pipeline_model_parallel_size - 1
+            ):
+                mtp_standalone = True
+                break
+
         # TODO: remove them in the future once they are supported
         if self.flatten_layout.count(LayerType.encoder) > 0:
             raise NotImplementedError("Encoder layer is not supported for flexible pipeline layout")
 
+        return mtp_standalone
+
     def get_num_layers_to_build(
         self,
         layer_type: LayerType = LayerType.decoder,
diff --git a/megatron/core/transformer/transformer_block.py b/megatron/core/transformer/transformer_block.py
index aa0b94a7c33..f189d5fa7a4 100755
--- a/megatron/core/transformer/transformer_block.py
+++ b/megatron/core/transformer/transformer_block.py
@@ -376,7 +376,7 @@ def build_layer(layer_spec, layer_number):
         # @TODO: add back account_for_embedding_in_pipeline_split (see issue #293)
         # In pipeline parallelism, we want to add this LN only to the last stage of the pipeline
         # self.post_process and self.post_layer_norm guide this behavior
-        if self.submodules.layer_norm and self.post_process and self.post_layer_norm:
+        if self.has_final_layernorm_in_this_stage():
             self.final_layernorm = build_module(
                 self.submodules.layer_norm,
                 config=self.config,
@@ -386,6 +386,35 @@ def build_layer(layer_spec, layer_number):
         else:
             self.final_layernorm = None  # Either this or nn.Identity
 
+    def has_final_layernorm_in_this_stage(self):
+        """
+        Check if this vpp stage contains the final layernorm.
+
+        Note:
+            Final layernorm now has been moved from the post-process stage to the last decoder
+            layer by using this function.
+            There will be a small numeric difference because of grad norm reduction when final
+            layernorm is placed in different pipeline stages in deterministic mode. It can still
+            be bitwise aligned by disabling grad norm clipping.
+        """
+        if self.config.mtp_num_layers is None:
+            # for model without MTPLayer, the final layernorm is set in the stage which does
+            # post_process
+            return self.submodules.layer_norm and self.post_process and self.post_layer_norm
+        else:
+            # for model with MTPLayer, the final layernorm is set in the stage which has the
+            # last layer of the decoder
+            has_final_layernorm_in_this_stage = False
+            for layer in self.layers:
+                if layer.layer_number == self.config.num_layers:
+                    has_final_layernorm_in_this_stage = True
+                    break
+            return (
+                self.submodules.layer_norm
+                and has_final_layernorm_in_this_stage
+                and self.post_layer_norm
+            )
+
     def _get_layer(self, layer_number: int):
         return self.layers[layer_number]
 
diff --git a/megatron/core/transformer/transformer_config.py b/megatron/core/transformer/transformer_config.py
index 722329b7ee2..6b8209ef6a7 100644
--- a/megatron/core/transformer/transformer_config.py
+++ b/megatron/core/transformer/transformer_config.py
@@ -1226,7 +1226,7 @@ def __post_init__(self):
                 self.virtual_pipeline_model_parallel_size = detected_vpp_size
 
             # Check whether the layout is valid.
-            self.pipeline_model_parallel_layout.validate_layer_layout(
+            self.mtp_standalone = self.pipeline_model_parallel_layout.validate_layer_layout(
                 num_layers=self.num_layers, mtp_num_layers=self.mtp_num_layers
             )
 
diff --git a/megatron/training/training.py b/megatron/training/training.py
index 27a64f10fee..d9759b3a364 100644
--- a/megatron/training/training.py
+++ b/megatron/training/training.py
@@ -2890,18 +2890,20 @@ def get_train_valid_test_num_samples():
     return (train_samples, eval_samples, test_iters * args.global_batch_size)
 
 
-def build_train_valid_test_datasets(build_train_valid_test_datasets_provider, train_valid_test_num_samples=None):
+def build_train_valid_test_datasets(build_train_valid_test_datasets_provider, train_valid_test_num_samples=None, vp_stage=None):
     """Build pretraining datasets."""
     if train_valid_test_num_samples is None:
         train_valid_test_num_samples = get_train_valid_test_num_samples()
-    print_rank_0(' > datasets target sizes (minimum size):')
     print_rank_0('    train:      {}'.format(train_valid_test_num_samples[0]))
     print_rank_0('    validation: {}'.format(train_valid_test_num_samples[1]))
     print_rank_0('    test:       {}'.format(train_valid_test_num_samples[2]))
-    return build_train_valid_test_datasets_provider(train_valid_test_num_samples)
+    if vp_stage is not None:
+        return build_train_valid_test_datasets_provider(train_valid_test_num_samples, vp_stage=vp_stage)
+    else:
+        return build_train_valid_test_datasets_provider(train_valid_test_num_samples)
 
 
-def build_train_valid_test_data_loaders(build_train_valid_test_datasets_provider):
+def build_train_valid_test_data_loaders(build_train_valid_test_datasets_provider, vp_stage=None):
     """Build pretraining data loaders."""
 
     args = get_args()
@@ -2930,7 +2932,8 @@ def build_train_valid_test_data_loaders(build_train_valid_test_datasets_provider
 
         # Build datasets.
         train_ds, valid_ds, test_ds = build_train_valid_test_datasets(
-            build_train_valid_test_datasets_provider, (1, 1, 1) if getattr(args, 'perform_rl_step', False) else None
+            build_train_valid_test_datasets_provider, (1, 1, 1) if getattr(args, 'perform_rl_step', False) else None,
+            vp_stage=vp_stage,
         )
         valid_ds = [valid_ds] if not isinstance(valid_ds, list) else valid_ds
 
@@ -2972,14 +2975,15 @@ def build_train_valid_test_data_loaders(build_train_valid_test_datasets_provider
     return train_dataloader, valid_dataloaders, test_dataloader
 
 
-def build_train_valid_test_data_iterators(build_train_valid_test_datasets_provider):
+def build_train_valid_test_data_iterators(build_train_valid_test_datasets_provider, vp_stage=None):
     """Build pretraining data iterators."""
 
     args = get_args()
 
     # Build loaders.
     train_dataloader, valid_dataloaders, test_dataloader = build_train_valid_test_data_loaders(
-        build_train_valid_test_datasets_provider
+        build_train_valid_test_datasets_provider,
+        vp_stage=vp_stage
     )
 
     # Build iterators.
diff --git a/megatron/training/utils.py b/megatron/training/utils.py
index ee46991bce5..3be7b6c8914 100644
--- a/megatron/training/utils.py
+++ b/megatron/training/utils.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 """General utilities."""
 import json
@@ -504,7 +504,7 @@ def get_blend_and_blend_per_split(args):
     return blend, blend_per_split
 
 
-def get_batch_on_this_tp_rank(data_iterator):
+def get_batch_on_this_tp_rank(data_iterator, mtp_on_this_rank: bool = False):
 
     args = get_args()
 
@@ -532,7 +532,7 @@ def _broadcast(item):
             'position_ids': data["position_ids"].cuda(non_blocking=True),
         }
 
-        if args.pipeline_model_parallel_size == 1:
+        if args.pipeline_model_parallel_size == 1 or mtp_on_this_rank:
             _broadcast(batch['tokens'])
             _broadcast(batch['labels'])
             _broadcast(batch['loss_mask'])
@@ -548,9 +548,6 @@ def _broadcast(item):
             # Multi-Token Prediction (MTP) layers need tokens and position_ids to calculate embedding.
             # Currently the Multi-Token Prediction (MTP) layers is fixed on the last stage, so we need
             # to broadcast tokens and position_ids to all of the tensor parallel ranks on the last stage.
-            if args.mtp_num_layers is not None:
-                _broadcast(batch['tokens'])
-                _broadcast(batch['position_ids'])
             _broadcast(batch['labels'])
             _broadcast(batch['loss_mask'])
             _broadcast(batch['attention_mask'])
@@ -586,7 +583,7 @@ def _broadcast(item):
             device=torch.cuda.current_device(),
         )
 
-        if args.pipeline_model_parallel_size == 1:
+        if args.pipeline_model_parallel_size == 1 or mtp_on_this_rank:
             _broadcast(tokens)
             _broadcast(labels)
             _broadcast(loss_mask)
@@ -605,12 +602,8 @@ def _broadcast(item):
             # Multi-Token Prediction (MTP) layers need tokens and position_ids to calculate embedding.
             # Currently the Multi-Token Prediction (MTP) layers is fixed on the last stage, so we need
             # to broadcast tokens and position_ids to all of the tensor parallel ranks on the last stage.
-            if args.mtp_num_layers is not None:
-                _broadcast(tokens)
-                _broadcast(position_ids)
-            else:
-                tokens = None
-                position_ids = None
+            tokens = None
+            position_ids = None
 
             _broadcast(labels)
             _broadcast(loss_mask)
diff --git a/pretrain_gpt.py b/pretrain_gpt.py
index 6316aef03bf..f57dd0c08fa 100644
--- a/pretrain_gpt.py
+++ b/pretrain_gpt.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 """Pretrain and SFT GPT."""
 
@@ -15,7 +15,9 @@
 from megatron.core.rerun_state_machine import get_rerun_state_machine
 from megatron.core.utils import get_attr_wrapped_model, StragglerDetector
 from megatron.core.tokenizers.text.utils.build_tokenizer import build_tokenizer
+from megatron.core.transformer.multi_token_prediction import mtp_on_this_rank, get_mtp_ranks
 from megatron.training import get_args, get_timers, get_tokenizer, pretrain, print_rank_0
+from megatron.training.arguments import core_transformer_config_from_args
 from megatron.training.utils import (
     get_batch_on_this_cp_rank,
     get_batch_on_this_tp_rank,
@@ -37,14 +39,20 @@
 stimer = StragglerDetector()
 
 
-def get_batch(data_iterator, vp_stage=None):
+def get_batch(data_iterator, vp_stage: Optional[int] = None):
     """Generate a batch."""
+    args = get_args()
+    config = core_transformer_config_from_args(args)
     # TODO: this is pretty hacky, find a better way
-    if not is_first_or_last_pipeline_stage(vp_stage):
+    if not is_first_or_last_pipeline_stage(vp_stage) and (
+    (not mtp_on_this_rank(config, ignore_virtual=False, vp_stage=vp_stage))):
         return None, None, None, None, None
 
     # get batches based on the TP rank you are on
-    batch = get_batch_on_this_tp_rank(data_iterator)
+    batch = get_batch_on_this_tp_rank(
+        data_iterator,
+        mtp_on_this_rank=mtp_on_this_rank(config, ignore_virtual=False, vp_stage=vp_stage)
+        )
 
     # slice batch along sequence dimension for context parallelism
     batch = get_batch_on_this_cp_rank(batch)
@@ -158,7 +166,12 @@ def forward_step(data_iterator, model: GPTModel, return_schedule_plan: bool = Fa
 
 
 def is_dataset_built_on_rank(vp_stage=None):
-    return is_first_or_last_pipeline_stage(vp_stage) and parallel_state.get_tensor_model_parallel_rank() == 0
+    args = get_args()
+    config = core_transformer_config_from_args(args)
+    return (
+        is_first_or_last_pipeline_stage(vp_stage)
+        or mtp_on_this_rank(config, ignore_virtual=False, vp_stage=vp_stage)
+    ) and parallel_state.get_tensor_model_parallel_rank() == 0
 
 
 def core_gpt_dataset_config_from_args(args):
@@ -214,6 +227,7 @@ def train_valid_test_datasets_provider(train_val_test_num_samples, vp_stage=None
 
     print_rank_0("> building train, validation, and test datasets for GPT ...")
 
+    is_dataset_built = partial(is_dataset_built_on_rank, vp_stage=vp_stage)
     train_ds, valid_ds, test_ds = BlendedMegatronDatasetBuilder(
         dataset_type, train_val_test_num_samples, partial(is_dataset_built_on_rank, vp_stage=vp_stage), config
     ).build()
@@ -223,6 +237,21 @@ def train_valid_test_datasets_provider(train_val_test_num_samples, vp_stage=None
     return train_ds, valid_ds, test_ds
 
 
+def get_embedding_ranks(pp_ranks: List[int]):
+    """Get the embedding ranks."""
+    embedding_ranks = [pp_ranks[0]]
+    if len(pp_ranks) > 1:
+        args = get_args()
+        if not args.untie_embeddings_and_output_weights:
+            embedding_ranks.append(pp_ranks[-1])
+        config = core_transformer_config_from_args(args)
+        mtp_ranks = get_mtp_ranks(pp_ranks, config)
+        embedding_ranks.extend(mtp_ranks)
+    embedding_ranks = list(set(embedding_ranks))
+    embedding_ranks = sorted(embedding_ranks)
+    return embedding_ranks
+
+
 if __name__ == "__main__":
 
     # Temporary for transition to core datasets
@@ -239,4 +268,5 @@ def train_valid_test_datasets_provider(train_val_test_num_samples, vp_stage=None
         args_defaults={'tokenizer_type': 'GPT2BPETokenizer'},
         extra_args_provider=add_modelopt_args if has_nvidia_modelopt else None,
         store=store,
+        get_embedding_ranks=get_embedding_ranks,
     )
diff --git a/tests/unit_tests/pipeline_parallel/test_pipeline_layout.py b/tests/unit_tests/pipeline_parallel/test_pipeline_layout.py
index 04880fb432c..9b1261312b2 100644
--- a/tests/unit_tests/pipeline_parallel/test_pipeline_layout.py
+++ b/tests/unit_tests/pipeline_parallel/test_pipeline_layout.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 import os
 from pathlib import Path
@@ -21,6 +21,7 @@
 )
 from megatron.core.tensor_parallel.random import model_parallel_cuda_manual_seed
 from megatron.core.transformer.enums import ModelType
+from megatron.core.transformer.multi_token_prediction import mtp_on_this_rank
 from megatron.core.transformer.transformer_config import TransformerConfig
 from megatron.training.checkpointing import load_checkpoint, save_checkpoint
 from megatron.training.global_vars import set_args
@@ -53,6 +54,8 @@ def initialize_gpt_model(
         virtual_pipeline_model_parallel_size=virtual_pipeline_model_parallel_size,
         hidden_dropout=0.0,
         attention_dropout=0.0,
+        mtp_num_layers=1 if with_mtp else None,
+        mtp_loss_scaling_factor=1.0 if with_mtp else None,
     )
     default_config_kwargs.update(**config_kwargs)
     transformer_config = TransformerConfig(**default_config_kwargs)
@@ -61,9 +64,6 @@ def initialize_gpt_model(
         transformer_config.moe_ffn_hidden_size = 128
         transformer_config.num_moe_experts = 4
         transformer_config.add_bias_linear = False
-    if with_mtp:
-        transformer_config.mtp_num_layers = 1
-        transformer_config.mtp_loss_scaling_factor = 1.0
     model = []
     for i in range(virtual_pipeline_model_parallel_size or 1):
         if is_moe:
@@ -71,8 +71,11 @@ def initialize_gpt_model(
         else:
             layer_spec = layer_spec_fn()
 
-        if is_moe and with_mtp and mpu.is_pipeline_last_stage(ignore_virtual=False, vp_stage=i):
-            transformer_layer_spec_for_mtp = gpt_te_spec(transformer_config)
+        if with_mtp and mtp_on_this_rank(transformer_config, ignore_virtual=False, vp_stage=i):
+            if is_moe:
+                transformer_layer_spec_for_mtp = gpt_te_spec(transformer_config)
+            else:
+                transformer_layer_spec_for_mtp = layer_spec
             mtp_block_spec = get_gpt_mtp_block_spec(
                 transformer_config,
                 transformer_layer_spec_for_mtp,
@@ -81,6 +84,10 @@ def initialize_gpt_model(
             )
         else:
             mtp_block_spec = None
+
+        # print("========================")
+        # print("[DEBUG] mtp_block_spec is ", mtp_block_spec)
+        # exit()
         pre_process = mpu.is_pipeline_first_stage(ignore_virtual=False, vp_stage=i)
         post_process = mpu.is_pipeline_last_stage(ignore_virtual=False, vp_stage=i)
         this_model = (
@@ -163,7 +170,7 @@ def create_args():
                 [],
                 ["decoder"],
                 ["decoder"],
-                ["decoder"] * 2 + ["loss"],
+                ["decoder"] * 2 + ["mtp"] + ["loss"],
             ],
             False,
             True,
@@ -185,7 +192,19 @@ def create_args():
             False,
         ),
         ((1, 2, None), [["embedding"] + ["decoder"] * 4, ["decoder"] * 4 + ["loss"]], True, False),
-        ((1, 4, 2), "E|t*3|(t|)*5L", True, True),
+        ((1, 4, 2), "E|t*3|(t|)*5mL", True, True),  # mtp in the last stage
+        (
+            (1, 4, 2),
+            "E|t*3|(t|)*4tm|L",
+            True,
+            True,
+        ),  # mtp in the second last stage with a decoder layer
+        (
+            (1, 4, 2),
+            "E|t*3|(t|)*3tt|m|L",
+            True,
+            True,
+        ),  # mtp in the second last stage with no other layers
     ],
 )
 def test_forward_vpp(create_args, tmp_path_dist_ckpt, tp_pp_vpp, pp_layout, is_moe, with_mtp):

From 21355dd19f51cc87c4b50d8dbc882b53cf437c28 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Mon, 3 Nov 2025 17:25:39 +0000
Subject: [PATCH 100/334] ci: Disable broken unit
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 tests/unit_tests/transformer/test_cuda_graphs.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/unit_tests/transformer/test_cuda_graphs.py b/tests/unit_tests/transformer/test_cuda_graphs.py
index b4da65aa056..0ed4a835c69 100644
--- a/tests/unit_tests/transformer/test_cuda_graphs.py
+++ b/tests/unit_tests/transformer/test_cuda_graphs.py
@@ -934,6 +934,8 @@ def _run_test_helper(
     @pytest.mark.parametrize("ep_size", [1, 4])
     @pytest.mark.parametrize("moe_dropless_dispatcher", [False, True])
     @pytest.mark.parametrize("moe_dispatcher_type", ["alltoall", "deepep", "hybridep"])
+    @pytest.mark.flaky_in_dev
+    @pytest.mark.flaky
     def test_moe_partial_cudagraph(self, ep_size, moe_dropless_dispatcher, moe_dispatcher_type):
         extra_kwargs = {}
         if moe_dispatcher_type == "deepep":

From 4facf29f2a05d8e9419761f2324a6bf9d3549184 Mon Sep 17 00:00:00 2001
From: Chen Cui <chcui@nvidia.com>
Date: Mon, 3 Nov 2025 16:23:54 -0800
Subject: [PATCH 101/334] [Dev] Fixes for gpt-oss (#2116)

Signed-off-by: Chen Cui <chcui@nvidia.com>
---
 .../models/common/embeddings/rope_utils.py    | 51 ++++++++++---------
 .../embeddings/yarn_rotary_pos_embedding.py   | 11 ++--
 .../core/transformer/dot_product_attention.py | 10 +++-
 megatron/core/transformer/utils.py            |  3 --
 .../unit_tests/fusions/test_torch_softmax.py  |  8 +--
 5 files changed, 43 insertions(+), 40 deletions(-)

diff --git a/megatron/core/models/common/embeddings/rope_utils.py b/megatron/core/models/common/embeddings/rope_utils.py
index 05cece2c275..e39540eb1d1 100644
--- a/megatron/core/models/common/embeddings/rope_utils.py
+++ b/megatron/core/models/common/embeddings/rope_utils.py
@@ -268,20 +268,21 @@ def apply_rotary_pos_emb(
     if config.apply_rope_fusion:
         if cu_seqlens is None:
             # NOTE: TE backends do not support mRoPE in bshd format when bs > 1.
+            use_unfused = False
             if config.mrope_section is not None and freqs.shape[1] > 1:
                 # TODO: Add a check in TransformerConfig and remove this unfused implementation.
                 warnings.warn(
                     "apply_rope_fusion does not support mRoPE in bshd format when bs > 1. "
                     "Please set apply_rope_fusion to false. This will become an error in v0.16."
                 )
-                return _apply_rotary_pos_emb_bshd(
-                    t,
-                    freqs,
-                    rotary_interleaved=config.rotary_interleaved,
-                    multi_latent_attention=config.multi_latent_attention,
-                    mscale=mscale,
+                use_unfused = True
+            if mscale != 1.0:
+                warnings.warn(
+                    f"mscale={mscale} is not supported by TE's fused RoPE. "
+                    "Using unfused implementation."
                 )
-            else:
+                use_unfused = True
+            if not use_unfused:
                 assert fused_apply_rotary_pos_emb is not None, "apply_rope_fusion is not available."
                 return fused_apply_rotary_pos_emb(t, freqs, interleaved=config.rotary_interleaved)
         else:
@@ -289,25 +290,25 @@ def apply_rotary_pos_emb(
             return fused_apply_rotary_pos_emb_thd(
                 t, cu_seqlens, freqs, cp_size=cp_group.size(), cp_rank=cp_group.rank()
             )
+    # use unfused implementation
+    if cu_seqlens is None:
+        return _apply_rotary_pos_emb_bshd(
+            t,
+            freqs,
+            rotary_interleaved=config.rotary_interleaved,
+            multi_latent_attention=config.multi_latent_attention,
+            mscale=mscale,
+        )
     else:
-        if cu_seqlens is None:
-            return _apply_rotary_pos_emb_bshd(
-                t,
-                freqs,
-                rotary_interleaved=config.rotary_interleaved,
-                multi_latent_attention=config.multi_latent_attention,
-                mscale=mscale,
-            )
-        else:
-            return _apply_rotary_pos_emb_thd(
-                t,
-                cu_seqlens,
-                freqs,
-                rotary_interleaved=config.rotary_interleaved,
-                multi_latent_attention=config.multi_latent_attention,
-                mscale=mscale,
-                cp_group=cp_group,
-            )
+        return _apply_rotary_pos_emb_thd(
+            t,
+            cu_seqlens,
+            freqs,
+            rotary_interleaved=config.rotary_interleaved,
+            multi_latent_attention=config.multi_latent_attention,
+            mscale=mscale,
+            cp_group=cp_group,
+        )
 
 
 def apply_rotary_pos_emb_with_cos_sin(
diff --git a/megatron/core/models/common/embeddings/yarn_rotary_pos_embedding.py b/megatron/core/models/common/embeddings/yarn_rotary_pos_embedding.py
index 455a7757d28..bcbb74b0dff 100644
--- a/megatron/core/models/common/embeddings/yarn_rotary_pos_embedding.py
+++ b/megatron/core/models/common/embeddings/yarn_rotary_pos_embedding.py
@@ -228,22 +228,25 @@ def _yarn_get_mscale(scale: float = 1, mscale: float = 1) -> float:
 
 @lru_cache(maxsize=8)
 def _yarn_get_concentration_factor(
-    scaling_factor: float, mscale: float, mscale_all_dim: float
+    scaling_factor: float, mscale: Optional[float], mscale_all_dim: Optional[float]
 ) -> float:
     """
     Get the concentration factor (factor multiplied to the sine and cosine components of the
     embedding). This factor is also known as attention factor, and sometimes homonymously known as
     "mscale"
     """
+    if mscale is None or mscale_all_dim is None:
+        return _yarn_get_mscale(scaling_factor)
     return float(
         _yarn_get_mscale(scaling_factor, mscale) / _yarn_get_mscale(scaling_factor, mscale_all_dim)
     )
 
 
 def _yarn_get_concentration_factor_from_config(config: TransformerConfig) -> float:
-    fields = ["yarn_rotary_scaling_factor", "yarn_mscale", "yarn_mscale_all_dim"]
-    if all(hasattr(config, f) for f in fields):
+    if hasattr(config, "yarn_rotary_scaling_factor"):
         return _yarn_get_concentration_factor(
-            config.yarn_rotary_scaling_factor, config.yarn_mscale, config.yarn_mscale_all_dim
+            config.yarn_rotary_scaling_factor,
+            getattr(config, "yarn_mscale", None),
+            getattr(config, "yarn_mscale_all_dim", None),
         )
     return 1.0
diff --git a/megatron/core/transformer/dot_product_attention.py b/megatron/core/transformer/dot_product_attention.py
index 2a6ac65a685..f3711c86ebd 100644
--- a/megatron/core/transformer/dot_product_attention.py
+++ b/megatron/core/transformer/dot_product_attention.py
@@ -116,13 +116,19 @@ def __init__(
         if self.config.softmax_type == "vanilla":
             self.softmax_offset = None
         elif self.config.softmax_type == "off-by-one":
-            self.softmax_offset = torch.zeros(self.num_attention_heads_per_partition)
+            self.softmax_offset = torch.zeros(
+                self.num_attention_heads_per_partition,
+                device=torch.cuda.current_device(),
+                dtype=self.config.params_dtype,
+            )
         elif self.config.softmax_type == "learnable":
             self.register_parameter(
                 "softmax_offset",
                 torch.nn.Parameter(
                     torch.empty(
-                        self.num_attention_heads_per_partition, dtype=self.config.params_dtype
+                        self.num_attention_heads_per_partition,
+                        device=torch.cuda.current_device(),
+                        dtype=self.config.params_dtype,
                     )
                 ),
             )
diff --git a/megatron/core/transformer/utils.py b/megatron/core/transformer/utils.py
index ac00e6557cf..373c06f0991 100644
--- a/megatron/core/transformer/utils.py
+++ b/megatron/core/transformer/utils.py
@@ -1,7 +1,6 @@
 # Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
 
 """Utilities for transformer layers."""
-from functools import lru_cache
 from operator import itemgetter
 from typing import TYPE_CHECKING, Any, Dict, Iterable, Optional, Tuple, Union
 
@@ -29,13 +28,11 @@ def get_linear_layer(rows, columns, init_method, perform_initialization=True):
     return layer
 
 
-@lru_cache(maxsize=32)
 def get_default_causal_mask(sq: int) -> torch.Tensor:
     """Return the causal upper triangular mask for softmax input."""
     return torch.triu(torch.ones(sq, sq, device="cuda"), diagonal=1).bool()
 
 
-@lru_cache(maxsize=32)
 def get_sliding_window_causal_mask(sq, skv, window_size):
     """Create the equivalent attention mask for SWA in [sq, skv] shape"""
     m = torch.ones(sq, skv, dtype=torch.bool, device="cuda")
diff --git a/tests/unit_tests/fusions/test_torch_softmax.py b/tests/unit_tests/fusions/test_torch_softmax.py
index edc650a0994..af7e016b253 100644
--- a/tests/unit_tests/fusions/test_torch_softmax.py
+++ b/tests/unit_tests/fusions/test_torch_softmax.py
@@ -1,3 +1,5 @@
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
 import pytest
 import torch
 
@@ -21,9 +23,6 @@ def setup_method(self, method):
             scale=None,
         )
 
-    def teardown_method(self):
-        get_default_causal_mask.cache_clear()
-
     def test_output_shape(self):
         x = torch.randn(8, 2, 4, 4, device="cuda")
         y = self.softmax(x, None, None)
@@ -126,9 +125,6 @@ def test_causal_mask_equal_scores(self):
 class TestFusedScaleMaskSoftmaxComprehensive:
     """Comprehensive tests for FusedScaleMaskSoftmax including window attention and scaling."""
 
-    def teardown_method(self):
-        get_default_causal_mask.cache_clear()
-
     def test_scaling_factor(self):
         """Test softmax with different scaling factors."""
         x = torch.randn(2, 4, 8, 8, device="cuda")

From bd199dc1c1354b81c11f5bf9663b5173a736e86d Mon Sep 17 00:00:00 2001
From: Boxiang Wang <boxiangw@nvidia.com>
Date: Mon, 3 Nov 2025 17:23:10 -0800
Subject: [PATCH 102/334] [Dev] Support LayerWiseDistributedOptimizer with
 torch_dist checkpoint format (#1928)

Signed-off-by: Boxiang Wang <boxiangw@nvidia.com>
Co-authored-by: Deyu Fu <Deyu.Foo@gmail.com>
---
 .../core/optimizer/layer_wise_optimizer.py    | 53 +++++++++++++++----
 megatron/core/optimizer/muon.py               | 36 +++++++++++--
 megatron/training/arguments.py                |  2 +-
 megatron/training/checkpointing.py            | 14 +----
 tests/unit_tests/test_layer_wise_optimizer.py | 45 +++++++++-------
 5 files changed, 103 insertions(+), 47 deletions(-)

diff --git a/megatron/core/optimizer/layer_wise_optimizer.py b/megatron/core/optimizer/layer_wise_optimizer.py
index 620b1a1994e..2b311dfe659 100644
--- a/megatron/core/optimizer/layer_wise_optimizer.py
+++ b/megatron/core/optimizer/layer_wise_optimizer.py
@@ -1,9 +1,12 @@
 # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
 
-from typing import List, Optional
+from typing import Callable, List, Optional
 
 import torch
 
+from megatron.core.dist_checkpointing import ShardedTensor
+from megatron.core.dist_checkpointing.dict_utils import nested_values
+from megatron.core.dist_checkpointing.mapping import ShardedStateDict
 from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.utils import get_pg_rank, get_pg_size
 
@@ -36,17 +39,36 @@ def __init__(
         optimizers: List[MegatronOptimizer],
         config: OptimizerConfig,
         pg_collection: Optional[ProcessGroupCollection] = None,
+        init_state_fn_list: Optional[List[Callable]] = None,
     ) -> None:
+        """
+        Initialize LayerWiseDistributedOptimizer.
+
+        Args:
+            optimizers: List of MegatronOptimizers.
+            config: OptimizerConfig.
+            pg_collection: ProcessGroupCollection.
+            init_state_fn_list: List of init state functions.
+        """
+
         self.pg_collection = pg_collection
         self.shard_params(optimizers)
         # wrap optimizer after sharding to avoid unnecessary master weight creation
         # TODO(deyuf): check if underlying optimizer.config need to fixed and if so can use
         # that instead of passing
+        if init_state_fn_list is None:
+            init_state_fn_list = [None] * len(optimizers)
+        else:
+            assert len(init_state_fn_list) == len(optimizers), (
+                "init_state_fn_list must be the " "same length as optimizers if provided"
+            )
+
         if config.bf16:
             if isinstance(optimizers[0], Float16OptimizerWithFloat16Params):
                 raise TypeError('LayerWiseDistributedOptimizer received Float16 optimizer already.')
             optimizers = [
-                Float16OptimizerWithFloat16Params(optim, config, None, None) for optim in optimizers
+                Float16OptimizerWithFloat16Params(optim, config, None, init_state_fn_list[idx])
+                for idx, optim in enumerate(optimizers)
             ]
         super().__init__(optimizers)
 
@@ -152,14 +174,23 @@ def step(self):  # type: ignore[no-untyped-def]
 
         return update_successful, grad_norm, num_zeros_in_grad
 
-    def save_state_dict_to_file(self, filename: str) -> None:
-        """Save the parameter state of the optimizer.
-
-        Args:
-            filename: The filename to save the parameter state.
+    def sharded_state_dict(
+        self, model_sharded_state_dict: ShardedStateDict, is_loading: bool = False, **kwargs
+    ):
+        """
+        Sharded state dict for torch_dist format checkpointing.
+        For fixed DP usage only, set replica_id to 0 for all ShardedTensor.
         """
-        torch.save(super().state_dict(), filename)
+        sharded_state_dict = super().sharded_state_dict(
+            model_sharded_state_dict, is_loading, **kwargs
+        )
+
+        # for fixed DP usage only
+        for sh_base in nested_values(sharded_state_dict):
+            if isinstance(sh_base, ShardedTensor):
+                assert (
+                    len(sh_base.replica_id) == 3
+                ), f'Expected replica_id format (PP, TP, DP), got: {sh_base}'
+                sh_base.replica_id = (*sh_base.replica_id[:2], 0)
 
-    def load_state_dict_from_file(self, filename: str) -> None:
-        """Load the parameter state of the optimizer."""
-        super().load_state_dict(torch.load(filename))
+        return sharded_state_dict
diff --git a/megatron/core/optimizer/muon.py b/megatron/core/optimizer/muon.py
index 700ad17e630..a31c84a6e8a 100644
--- a/megatron/core/optimizer/muon.py
+++ b/megatron/core/optimizer/muon.py
@@ -280,22 +280,45 @@ def get_megatron_muon_optimizer(
     # TODO(deyuf): allow user to select optimizer mix and relax ChainedOptimizer design
     config.optimizer = 'adam'
 
+    # Needed for torch_dist ckpt_format, unlike torch ckpt_format
+    # For other emerging optimizers, need to implement init_state_fn as well
+    # TODO(boxiangw): Improve usability after optimizer refactor
+    # TODO(boxiangw): support precision aware optimizer
+    def muon_init_state_fn(opt, config=None):
+        for group in opt.param_groups:
+            for p in group['params']:
+                if len(opt.state[p]) == 0:
+                    opt.state[p]['momentum_buffer'] = torch.zeros_like(p.data)
+
+    def adam_init_state_fn(opt, config=None):
+        for group in opt.param_groups:
+            for p in group['params']:
+                if len(opt.state[p]) == 0:
+                    if config is None or not config.use_precision_aware_optimizer:
+                        opt.state[p]['exp_avg'] = torch.zeros_like(p.data)
+                        opt.state[p]['exp_avg_sq'] = torch.zeros_like(p.data)
+                    else:
+                        opt.initialize_state(p)
+
     # need to wrap into megatron mix precision optimizer. (only support bf16 w/o loss scale now)
     if config.fp16:
         raise Exception('muon with fp16 is not supported.')
+
     reset_config_bf16 = False
     if config.bf16:
         if layer_wise_distributed_optimizer:
             # creating master weight before layerwise sharding will lead to unnecessary master
-            # weight  so here we delay master weight creation into layer_wise unset config.bf16
+            # weight so here we delay master weight creation into layer_wise unset config.bf16
             # will also result in all optimizers below(adam) to also not be wrapped
             config.bf16 = False
             reset_config_bf16 = True
         else:
             # if not using layer_wise wrapper, just create master weight here is fine
-            optimizer = Float16OptimizerWithFloat16Params(optimizer, config, None, None)
+            optimizer = Float16OptimizerWithFloat16Params(
+                optimizer, config, None, muon_init_state_fn
+            )
     else:
-        optimizer = FP32Optimizer(optimizer, config, None)
+        optimizer = FP32Optimizer(optimizer, config, muon_init_state_fn)
 
     optimizers.append(optimizer)
 
@@ -321,5 +344,10 @@ def get_megatron_muon_optimizer(
         log_single_rank(logger, logging.INFO, 'Using LayerWiseDistributedOptimizer for Muon')
         if reset_config_bf16:
             config.bf16 = True
-        return LayerWiseDistributedOptimizer(optimizers, config, pg_collection)
+        return LayerWiseDistributedOptimizer(
+            optimizers,
+            config,
+            pg_collection,
+            init_state_fn_list=[muon_init_state_fn, adam_init_state_fn],
+        )
     return ChainedOptimizer(optimizers)
diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py
index 9174df437e3..3413d1e1547 100644
--- a/megatron/training/arguments.py
+++ b/megatron/training/arguments.py
@@ -1181,7 +1181,7 @@ def validate_args(args, defaults={}):
         assert not args.use_distributed_optimizer, "Muon optimizer does not support distributed optimizer for now."
         assert not args.use_torch_fsdp2, "Muon optimizer does not support Torch-FSDP2 for now."
         assert not args.use_megatron_fsdp, "Muon optimizer does not support Megatron-FSDP for now."
-        assert args.ckpt_format == "torch", "Muon optimizer only supports torch checkpoint format for now."
+        assert args.ckpt_format in ["torch", "torch_dist"], "Muon optimizer supports torch and torch_dist checkpoint format."
 
     # Optimizer CPU offload check
     if args.optimizer_cpu_offload:
diff --git a/megatron/training/checkpointing.py b/megatron/training/checkpointing.py
index 93c23255f4c..88104f1d8fc 100644
--- a/megatron/training/checkpointing.py
+++ b/megatron/training/checkpointing.py
@@ -479,14 +479,6 @@ def save_checkpoint(iteration, model, optimizer, opt_param_scheduler, num_floati
         if not optimizer.is_stub_optimizer:
             optimizer.save_parameter_state(optim_checkpoint_name)
 
-    # LayerWiseDistributedOptimizer save
-    if getattr(args, "optimizer", "adam").startswith("dist_"):
-        dp_rank = mpu.get_data_parallel_rank()
-        optim_checkpoint_name = os.path.join(os.path.dirname(checkpoint_name), f"layer_wise_optimizer_{dp_rank}.pt")
-        ensure_directory_exists(optim_checkpoint_name)
-        if not optimizer.is_stub_optimizer:
-            optimizer.save_state_dict_to_file(optim_checkpoint_name)
-
     async_save_request = None
     if args.async_save:
         if ckpt_type == CheckpointType.LEGACY:
@@ -1661,11 +1653,7 @@ def load_model_state_dict(module, state_dict, strict: bool):
     if not release and not args.finetune and not args.no_load_optim:
         try:
             # Load state dict.
-            if getattr(args, "optimizer", "adam").startswith("dist_"):
-                dp_rank = mpu.get_data_parallel_rank()
-                optim_checkpoint_name = os.path.join(os.path.dirname(checkpoint_name), f"layer_wise_optimizer_{dp_rank}.pt")
-                optimizer.load_state_dict_from_file(optim_checkpoint_name)
-            elif not skip_load_to_model_and_opt and optimizer is not None and not optimizer.is_stub_optimizer:
+            if not skip_load_to_model_and_opt and optimizer is not None and not optimizer.is_stub_optimizer:
                 optimizer.load_state_dict(state_dict['optimizer'])
 
             # Load distributed optimizer's custom parameter state.
diff --git a/tests/unit_tests/test_layer_wise_optimizer.py b/tests/unit_tests/test_layer_wise_optimizer.py
index 3993e217734..c9dd542cf25 100644
--- a/tests/unit_tests/test_layer_wise_optimizer.py
+++ b/tests/unit_tests/test_layer_wise_optimizer.py
@@ -1,5 +1,5 @@
+# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
 import os
-import tempfile
 
 import pytest
 import torch
@@ -224,29 +224,38 @@ def test_state_dict(self):
         # TODO(deyuf): fix this. not going through get() will cause missing keys like wd_mult
         # optimizer.load_state_dict(state_dict)
 
-    def test_save_load_file(self):
-        """Test LayerWiseDistributedOptimizer save and load state dict to/from file."""
+    def test_sharded_state_dict(self):
+        """Test LayerWiseDistributedOptimizer sharded_state_dict method."""
         model, optimizer, pg_collection = self.create_model_and_optimizer()
 
         for param in model.parameters():
             param.grad = torch.randn_like(param)
         optimizer.step()
 
-        # Test save to file
-        with tempfile.NamedTemporaryFile(delete=False, suffix='.pt') as tmp_file:
-            temp_filename = tmp_file.name
-
-        try:
-            optimizer.save_state_dict_to_file(temp_filename)
-            assert os.path.exists(temp_filename), "State dict file should be created"
-
-            # Test load from file
-            # TODO(deyuf): fix this. not going through get() will cause missing keys like wd_mult
-            # optimizer.load_state_dict_from_file(temp_filename)
-        finally:
-            # Clean up temporary file
-            if os.path.exists(temp_filename):
-                os.remove(temp_filename)
+        # Get model sharded state dict
+        model_sharded_state_dict = model.sharded_state_dict()
+
+        # Test sharded_state_dict
+        sharded_state_dict = optimizer.sharded_state_dict(model_sharded_state_dict)
+
+        # Verify the sharded_state_dict is not None and has expected structure
+        assert sharded_state_dict is not None, "Sharded state dict should not be None"
+        assert (
+            'optimizer' in sharded_state_dict
+        ), "Sharded state dict should contain 'optimizer' key"
+
+        # Verify that replica_id is set correctly (should be 0 for DP dimension)
+        from megatron.core.dist_checkpointing import ShardedTensor
+        from megatron.core.dist_checkpointing.dict_utils import nested_values
+
+        for sh_base in nested_values(sharded_state_dict):
+            if isinstance(sh_base, ShardedTensor):
+                assert (
+                    len(sh_base.replica_id) == 3
+                ), f'Expected replica_id format (PP, TP, DP), got: {sh_base.replica_id}'
+                assert (
+                    sh_base.replica_id[2] == 0
+                ), f'Expected DP replica_id to be 0 for layer-wise optimizer, got: {sh_base.replica_id[2]}'
 
     def test_multiple_optimizers(self):
         """Test LayerWiseDistributedOptimizer with multiple chained optimizers.

From ee14b5b8c81e0be74454593bfe99661ca2253167 Mon Sep 17 00:00:00 2001
From: Chen Cui <chcui@nvidia.com>
Date: Mon, 3 Nov 2025 17:45:24 -0800
Subject: [PATCH 103/334] [Dev] Nemotron nano v2 vl (#2115)

Signed-off-by: Chen Cui <chcui@nvidia.com>
---
 .../core/models/multimodal/llava_model.py     |   3 +-
 megatron/core/ssm/mamba_block.py              |  13 +--
 megatron/core/ssm/mamba_mixer.py              | 102 +++++++++---------
 3 files changed, 63 insertions(+), 55 deletions(-)

diff --git a/megatron/core/models/multimodal/llava_model.py b/megatron/core/models/multimodal/llava_model.py
index 2ac2657c1cd..dae9a02b780 100644
--- a/megatron/core/models/multimodal/llava_model.py
+++ b/megatron/core/models/multimodal/llava_model.py
@@ -124,6 +124,7 @@ def __init__(
         max_num_tiles: int = 0,
         tokenizer_type: str = "",
         vp_stage: Optional[int] = None,
+        use_vision_backbone_fp8_arch: bool = False,
     ) -> None:
         super().__init__(config=language_transformer_config)
 
@@ -295,7 +296,7 @@ def __init__(
                     ln_post_impl = None
                     use_mask_token = False
 
-                if vision_transformer_config.fp8:
+                if vision_transformer_config.fp8 or use_vision_backbone_fp8_arch:
                     # FP8 padding for final sequence length to be a multiple of 16 or 32.
                     class_token_len = 32 if vision_transformer_config.fp8_recipe == "mxfp8" else 16
 
diff --git a/megatron/core/ssm/mamba_block.py b/megatron/core/ssm/mamba_block.py
index 8e23a3b2aae..61ecd170140 100644
--- a/megatron/core/ssm/mamba_block.py
+++ b/megatron/core/ssm/mamba_block.py
@@ -203,13 +203,14 @@ def __init__(
                 eps=self.config.layernorm_epsilon,
             )
 
-        self.apply(
-            partial(
-                _init_weights,
-                n_layer=self.config.num_layers,
-                initializer_range=self.config.init_method_std,
+        if self.config.perform_initialization:
+            self.apply(
+                partial(
+                    _init_weights,
+                    n_layer=self.config.num_layers,
+                    initializer_range=self.config.init_method_std,
+                )
             )
-        )
 
     def _select_layers_for_pipeline_parallel(self, layer_type_list):
         num_layers_per_pipeline_rank = self.config.num_layers // self.pp_group.size()
diff --git a/megatron/core/ssm/mamba_mixer.py b/megatron/core/ssm/mamba_mixer.py
index 2caa36fb1e9..d3d428d5f93 100644
--- a/megatron/core/ssm/mamba_mixer.py
+++ b/megatron/core/ssm/mamba_mixer.py
@@ -268,60 +268,66 @@ def __init__(
             )
 
         conv_dim = self.d_inner_local_tp + 2 * self.ngroups_local_tp * self.d_state  # x B C
-        with get_cuda_rng_tracker().fork():
-            # weight shape: [conv_dim, 1, d_conv]
-            # bias shape: [conv_dim]
-            self.conv1d = nn.Conv1d(
-                in_channels=conv_dim,
-                out_channels=conv_dim,
-                bias=conv_bias,
-                kernel_size=d_conv,
-                groups=conv_dim,
-                padding=d_conv - 1,
-                device=torch.cuda.current_device(),
-                dtype=config.params_dtype,
-            )
-            setattr(self.conv1d.weight, "tensor_model_parallel", True)
-            setattr(self.conv1d.bias, "tensor_model_parallel", True)
+        # weight shape: [conv_dim, 1, d_conv]
+        # bias shape: [conv_dim]
+        self.conv1d = nn.Conv1d(
+            in_channels=conv_dim,
+            out_channels=conv_dim,
+            bias=conv_bias,
+            kernel_size=d_conv,
+            groups=conv_dim,
+            padding=d_conv - 1,
+            device=torch.cuda.current_device(),
+            dtype=config.params_dtype,
+        )
+        setattr(self.conv1d.weight, "tensor_model_parallel", True)
+        setattr(self.conv1d.bias, "tensor_model_parallel", True)
 
-            if self.conv_init is not None:
+        if self.config.perform_initialization and self.conv_init is not None:
+            with get_cuda_rng_tracker().fork():
                 nn.init.uniform_(self.conv1d.weight, -self.conv_init, self.conv_init)
 
         self.activation = "silu"
         self.act = nn.SiLU()
 
-        with get_cuda_rng_tracker().fork():
-            # Initialize dt bias so that F.softplus(dt_bias) is between dt_min and dt_max
-            dt = torch.exp(
-                torch.rand(
-                    self.nheads_local_tp,
-                    device=torch.cuda.current_device(),
-                    dtype=config.params_dtype,
-                )
-                * (math.log(dt_max) - math.log(dt_min))
-                + math.log(dt_min)
-            ).clamp(min=dt_init_floor)
-            # Inverse of softplus: https://github.com/pytorch/pytorch/issues/72759
-            inv_dt = dt + torch.log(-torch.expm1(-dt))
-            self.dt_bias = nn.Parameter(inv_dt)
-            # Our initialization would set all Linear.bias to zero,
-            # need to mark this one as _no_reinit
-            self.dt_bias._no_reinit = True
-            # Just to be explicit. Without this we already don't
-            # put wd on dt_bias because of the check
-            # name.endswith("bias") in param_grouping.py
-            self.dt_bias._no_weight_decay = True
-            setattr(self.dt_bias, "tensor_model_parallel", True)
-
-            # A parameter
-            assert A_init_range[0] > 0 and A_init_range[1] >= A_init_range[0]
-            A = torch.empty(
-                self.nheads_local_tp, dtype=torch.float32, device=torch.cuda.current_device()
-            ).uniform_(*A_init_range)
-            A_log = torch.log(A)  # Keep A_log in fp32
-            self.A_log = nn.Parameter(A_log)
-            self.A_log._no_weight_decay = True
-            setattr(self.A_log, "tensor_model_parallel", True)
+        if self.config.perform_initialization:
+            with get_cuda_rng_tracker().fork():
+                # Initialize dt bias so that F.softplus(dt_bias) is between dt_min and dt_max
+                dt = torch.exp(
+                    torch.rand(
+                        self.nheads_local_tp,
+                        device=torch.cuda.current_device(),
+                        dtype=config.params_dtype,
+                    )
+                    * (math.log(dt_max) - math.log(dt_min))
+                    + math.log(dt_min)
+                ).clamp(min=dt_init_floor)
+                # Inverse of softplus: https://github.com/pytorch/pytorch/issues/72759
+                inv_dt = dt + torch.log(-torch.expm1(-dt))
+        else:
+            inv_dt = torch.empty(self.nheads_local_tp)
+
+        self.dt_bias = nn.Parameter(inv_dt)
+        # Our initialization would set all Linear.bias to zero,
+        # need to mark this one as _no_reinit
+        self.dt_bias._no_reinit = True
+        # Just to be explicit. Without this we already don't
+        # put wd on dt_bias because of the check
+        # name.endswith("bias") in param_grouping.py
+        self.dt_bias._no_weight_decay = True
+        setattr(self.dt_bias, "tensor_model_parallel", True)
+
+        # A parameter
+        assert A_init_range[0] > 0 and A_init_range[1] >= A_init_range[0]
+        A = torch.empty(
+            self.nheads_local_tp, dtype=torch.float32, device=torch.cuda.current_device()
+        )
+        if self.config.perform_initialization:
+            A = A.uniform_(*A_init_range)
+        A_log = torch.log(A)  # Keep A_log in fp32
+        self.A_log = nn.Parameter(A_log)
+        self.A_log._no_weight_decay = True
+        setattr(self.A_log, "tensor_model_parallel", True)
 
         # D "skip" parameter
         self.D = nn.Parameter(

From 09abfad8f0d35a54a5fb3d2b956f15542bdd5cc1 Mon Sep 17 00:00:00 2001
From: Ananth Subramaniam <ansubramania@nvidia.com>
Date: Mon, 3 Nov 2025 22:30:28 -0800
Subject: [PATCH 104/334] [DEV] remove training dependency from megatron core
 for fsdp checkpoint with EP (#2114)

Signed-off-by: Ananth Subramaniam <ansubramania@nvidia.com>
---
 megatron/core/optimizer/distrib_optimizer.py  |  3 +-
 .../transformer/fsdp_dtensor_checkpoint.py    | 70 ++++++++++++++-----
 megatron/training/checkpointing.py            |  2 +-
 3 files changed, 55 insertions(+), 20 deletions(-)

diff --git a/megatron/core/optimizer/distrib_optimizer.py b/megatron/core/optimizer/distrib_optimizer.py
index 8b4740516e2..6e093f96f7e 100644
--- a/megatron/core/optimizer/distrib_optimizer.py
+++ b/megatron/core/optimizer/distrib_optimizer.py
@@ -1153,7 +1153,8 @@ def _param_name(self, param: torch.nn.Parameter) -> str:
                         "Ensure that each model chunk has unique parameter names."
                     )
                 name_to_param.update(_name_to_param)
-            name_to_param = handle_experts_in_state_dict(name_to_param)
+            num_experts = self.model_chunks[0].config.num_moe_experts if self.model_chunks else None
+            name_to_param = handle_experts_in_state_dict(name_to_param, num_experts)
             self.param_to_name = {param: name for name, param in name_to_param.items()}
         assert (
             param in self.param_to_name
diff --git a/megatron/core/transformer/fsdp_dtensor_checkpoint.py b/megatron/core/transformer/fsdp_dtensor_checkpoint.py
index 9ef3f1f1b82..65e2f5f9dff 100644
--- a/megatron/core/transformer/fsdp_dtensor_checkpoint.py
+++ b/megatron/core/transformer/fsdp_dtensor_checkpoint.py
@@ -47,29 +47,35 @@
 from megatron.core.transformer.transformer_layer import TransformerLayer
 
 
-def get_ep_layer_offset():
+def get_ep_layer_offset(num_experts: int | None = None) -> int:
     """
     Get the expert layer offset for the current model.
-    """
-    from megatron.training.global_vars import get_args
 
-    args = get_args()
+    Args:
+        num_experts: Total number of experts in the model. If None, returns 0.
+
+    Returns:
+        The expert layer offset for the current EP rank.
+    """
     ep_size = parallel_state.get_expert_model_parallel_world_size()
     ep_rank = parallel_state.get_expert_model_parallel_rank()
-    num_local_experts = args.num_experts // ep_size if args.num_experts else 0
+    num_local_experts = num_experts // ep_size if num_experts else 0
     local_expert_offset = ep_rank * num_local_experts
 
     return local_expert_offset
 
 
-def get_total_num_experts():
+def get_total_num_experts(num_experts: int | None = None) -> int:
     """
     Get the total number of experts for the current model.
-    """
-    from megatron.training.global_vars import get_args
 
-    args = get_args()
-    return args.num_experts if args.num_experts else 0
+    Args:
+        num_experts: Total number of experts in the model. If None, returns 0.
+
+    Returns:
+        The total number of experts.
+    """
+    return num_experts if num_experts else 0
 
 
 def get_expert_index_from_key(key):
@@ -96,12 +102,19 @@ def get_expert_index_from_key(key):
     return None
 
 
-def handle_experts_in_state_dict(state_dict):
+def handle_experts_in_state_dict(state_dict, num_experts: int | None = None):
     """
     Rewrite expert keys in state dict.
+
+    Args:
+        state_dict: The state dictionary to process.
+        num_experts: Total number of experts in the model. If None, no expert processing occurs.
+
+    Returns:
+        The processed state dictionary with rewritten expert keys.
     """
-    local_expert_start = get_ep_layer_offset()
-    local_expert_end = get_total_num_experts()
+    local_expert_start = get_ep_layer_offset(num_experts)
+    local_expert_end = get_total_num_experts(num_experts)
 
     def should_keep_expert_key(expert_index):
         """Determine if this rank should keep this expert key based on expert index"""
@@ -147,9 +160,17 @@ def replace_expert_index_in_key(key, expert_index, state_dict):
     return state_dict
 
 
-def expert_param_local_key(key):
-    """Get the module parameter corresponding to the key."""
-    local_expert_offset = get_ep_layer_offset()
+def expert_param_local_key(key: str, num_experts: int | None = None) -> str:
+    """Get the module parameter corresponding to the key.
+
+    Args:
+        key: The parameter key to process.
+        num_experts: Total number of experts in the model. If None, no expert processing occurs.
+
+    Returns:
+        The local parameter key with adjusted expert indices.
+    """
+    local_expert_offset = get_ep_layer_offset(num_experts)
     expert_index = get_expert_index_from_key(key)
     if expert_index is not None:
         new_expert_index = expert_index - local_expert_offset
@@ -174,6 +195,9 @@ def handle_swiglu_in_state_dict(model, model_state_dict, optimizer_state_dict):
     """
     assert HAVE_MEGATRON_FSDP, "This function requires Megatron-FSDP to be installed."
 
+    # Extract num_experts from model config for expert parameter processing
+    num_experts = model.config.num_moe_experts if hasattr(model, 'config') else None
+
     def intersection(s1, s2):
         # Only works for step=1
         start = max(s1.start, s2.start)
@@ -297,7 +321,9 @@ def split_swiglu_linear_fc1(data, dist_param, swiglu_shard_axis, is_expert_param
                 new_opt_state_dict[f"{key}_w"] = opt_state_dict[key].copy()
                 new_opt_state_dict[f"{key}_v"] = opt_state_dict[key].copy()
                 for subkey in ["exp_avg", "exp_avg_sq"]:
-                    dist_param = model.get_parameter(expert_param_local_key(key[len("module.") :]))
+                    dist_param = model.get_parameter(
+                        expert_param_local_key(key[len("module.") :], num_experts)
+                    )
                     weight_w, weight_v = split_swiglu_linear_fc1(
                         opt_state_dict[key][subkey],
                         dist_param,
@@ -426,6 +452,13 @@ def validate_loaded_state_dict(state_dict, checkpoint_path):
 def get_global_unique_param_name(model_chunks, param):
     """
     Get the global unique parameter name for a given model and parameter.
+
+    Args:
+        model_chunks: List of model chunks to search for the parameter.
+        param: The parameter to find the name for.
+
+    Returns:
+        The global unique parameter name.
     """
     param_name = None
     for model in model_chunks:
@@ -450,6 +483,7 @@ def get_global_unique_param_name(model_chunks, param):
             param_name = re.sub(r"layers\.(\d+)", f"layers.{tf_layer_number - 1}", param_name)
 
     # Get EP unique parameter name
-    param_name = list(handle_experts_in_state_dict({param_name: None}).keys())[0]
+    num_experts = model_chunks[0].config.num_moe_experts if model_chunks else None
+    param_name = list(handle_experts_in_state_dict({param_name: None}, num_experts).keys())[0]
 
     return param_name
diff --git a/megatron/training/checkpointing.py b/megatron/training/checkpointing.py
index 88104f1d8fc..6ddf9f9196d 100644
--- a/megatron/training/checkpointing.py
+++ b/megatron/training/checkpointing.py
@@ -862,7 +862,7 @@ def preprocess_fsdp_dtensor_state_dict(args, raw_state_dict, model):
             )
             state_dict["model"] = model_state_dict
     if args.num_experts:
-        state_dict["model"] = handle_experts_in_state_dict(state_dict["model"])
+        state_dict["model"] = handle_experts_in_state_dict(state_dict["model"], args.num_experts)
     preprocess_state_dict_for_uneven_dtensor(state_dict)
 
     return state_dict

From ec37ae32595826689ccd06f4a111a0a105f88927 Mon Sep 17 00:00:00 2001
From: Li Tao <lit@nvidia.com>
Date: Tue, 4 Nov 2025 14:41:35 +0800
Subject: [PATCH 105/334] [Dev] Fix UT `TestPartialCudaGraph` which incorrectly
 set MTP in UT (#2122)

Signed-off-by: lit <lit@nvidia.com>
---
 tests/unit_tests/transformer/test_cuda_graphs.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/tests/unit_tests/transformer/test_cuda_graphs.py b/tests/unit_tests/transformer/test_cuda_graphs.py
index 0ed4a835c69..1302369266a 100644
--- a/tests/unit_tests/transformer/test_cuda_graphs.py
+++ b/tests/unit_tests/transformer/test_cuda_graphs.py
@@ -26,6 +26,7 @@
 from megatron.core.models.gpt.gpt_layer_specs import (
     get_gpt_layer_local_spec,
     get_gpt_layer_with_transformer_engine_spec,
+    get_gpt_mtp_block_spec,
 )
 from megatron.core.models.gpt.gpt_model import GPTModel
 from megatron.core.models.mamba.mamba_layer_specs import mamba_stack_spec
@@ -774,6 +775,12 @@ def model_provider(
         args = get_args()
         config = core_transformer_config_from_args(args)
         transformer_layer_spec = layer_spec_fn()
+        if args.mtp_num_layers:
+            mtp_block_spec = get_gpt_mtp_block_spec(
+                config, transformer_layer_spec, use_transformer_engine=True
+            )
+        else:
+            mtp_block_spec = None
         return GPTModel(
             config=config,
             transformer_layer_spec=transformer_layer_spec,
@@ -786,6 +793,7 @@ def model_provider(
             share_embeddings_and_output_weights=not args.untie_embeddings_and_output_weights,
             position_embedding_type=args.position_embedding_type,
             rotary_percent=args.rotary_percent,
+            mtp_block_spec=mtp_block_spec,
         )
 
     def create_test_args(
@@ -934,8 +942,6 @@ def _run_test_helper(
     @pytest.mark.parametrize("ep_size", [1, 4])
     @pytest.mark.parametrize("moe_dropless_dispatcher", [False, True])
     @pytest.mark.parametrize("moe_dispatcher_type", ["alltoall", "deepep", "hybridep"])
-    @pytest.mark.flaky_in_dev
-    @pytest.mark.flaky
     def test_moe_partial_cudagraph(self, ep_size, moe_dropless_dispatcher, moe_dispatcher_type):
         extra_kwargs = {}
         if moe_dispatcher_type == "deepep":

From f3cbf03086a8e5e99362ae1e9bac3b1e1cbb16f0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Tue, 4 Nov 2025 11:47:19 +0000
Subject: [PATCH 106/334] ci: Restore
 `gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_scoped_cudagraph`
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 tests/test_utils/recipes/moe.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_utils/recipes/moe.yaml b/tests/test_utils/recipes/moe.yaml
index 03fcee619fb..94790f27ba2 100644
--- a/tests/test_utils/recipes/moe.yaml
+++ b/tests/test_utils/recipes/moe.yaml
@@ -134,7 +134,7 @@ products:
   - test_case: [gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_scoped_cudagraph]
     products:
       - environment: [dev]
-        scope: [mr-broken]
+        scope: [mr]
         platforms: [dgx_h100]
   #######################################################################
   # Super important mr, mr-github tests that run for both DEV and LTS per mr, mr-github       #

From ecbfe70aa352f5eebc2df8811d808c9df5cf666c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Tue, 4 Nov 2025 23:05:14 +0100
Subject: [PATCH 107/334] chore: Fix autoformatter (#2073) (#2134)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 tools/autoformat.sh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/autoformat.sh b/tools/autoformat.sh
index fffc7725eb4..049ccbf10a8 100755
--- a/tools/autoformat.sh
+++ b/tools/autoformat.sh
@@ -15,9 +15,9 @@ CHECK_ONLY=${CHECK_ONLY:-false}
 SKIP_DOCS=${SKIP_DOCS:-false}
 
 BASE_REF=${BASE_REF:-main}
-git remote set-url origin "https://github.com/NVIDIA/Megatron-LM.git"
-git fetch origin ${BASE_REF}
-CHANGED_FILES=$(git diff --name-only --diff-filter=d --merge-base origin/${BASE_REF} megatron/core tests/ | grep '\.py$' || true)
+git remote add autoformatter-remote "https://github.com/NVIDIA/Megatron-LM.git" || true
+git fetch autoformatter-remote ${BASE_REF}
+CHANGED_FILES=$(git diff --name-only --diff-filter=d --merge-base autoformatter-remote/${BASE_REF} megatron/core tests/ | grep '\.py$' || true)
 ADDITIONAL_ARGS=""
 ADDITIONAL_BLACK_ARGS=""
 ADDITIONAL_PYLINT_ARGS=""

From 79e8592bb7a356c6480559a411fe48cb4fed9e98 Mon Sep 17 00:00:00 2001
From: Maanu Grover <109391026+maanug-nv@users.noreply.github.com>
Date: Wed, 5 Nov 2025 12:27:18 -0800
Subject: [PATCH 108/334] add device and dtype to empty inv_dt init (#2137)

Signed-off-by: Maanu Grover <maanug@nvidia.com>
---
 megatron/core/ssm/mamba_mixer.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/megatron/core/ssm/mamba_mixer.py b/megatron/core/ssm/mamba_mixer.py
index d3d428d5f93..9eb79354dba 100644
--- a/megatron/core/ssm/mamba_mixer.py
+++ b/megatron/core/ssm/mamba_mixer.py
@@ -305,7 +305,9 @@ def __init__(
                 # Inverse of softplus: https://github.com/pytorch/pytorch/issues/72759
                 inv_dt = dt + torch.log(-torch.expm1(-dt))
         else:
-            inv_dt = torch.empty(self.nheads_local_tp)
+            inv_dt = torch.empty(
+                self.nheads_local_tp, device=torch.cuda.current_device(), dtype=config.params_dtype
+            )
 
         self.dt_bias = nn.Parameter(inv_dt)
         # Our initialization would set all Linear.bias to zero,

From 3c1b98efdc3add92460b9c78f6bd3aa5afebf58a Mon Sep 17 00:00:00 2001
From: Santosh Bhavani <santosh.bhavani@live.com>
Date: Wed, 5 Nov 2025 22:21:00 -0800
Subject: [PATCH 109/334] Remove DS-V3 doc - draft being updated (#2155)

Signed-off-by: Santosh Bhavani <santosh.bhavani@live.com>
---
 .../deepseek-v3-gb200-optimization.md         | 253 ------------------
 1 file changed, 253 deletions(-)
 delete mode 100644 docs/discussions/deepseek-v3-gb200-optimization/deepseek-v3-gb200-optimization.md

diff --git a/docs/discussions/deepseek-v3-gb200-optimization/deepseek-v3-gb200-optimization.md b/docs/discussions/deepseek-v3-gb200-optimization/deepseek-v3-gb200-optimization.md
deleted file mode 100644
index 24ab150f889..00000000000
--- a/docs/discussions/deepseek-v3-gb200-optimization/deepseek-v3-gb200-optimization.md
+++ /dev/null
@@ -1,253 +0,0 @@
-# **Optimizing DeepSeek-V3 Training Performance on NVIDIA GB200 NVL72**
-
-**Authors:** Xin Yao (@yaox12), Hongxiao Bai (@hxbai), Yaobin Zhang (@buptzyb), Tong Liu (@Autumn1998), Fan Yu (@HWZealot), Kunlun Li (@kunlunl), Zhongbo Zhu (@zhongbozhu), Zijie Yan (@yanring)
-
----
-
-This guide describes how we used Megatron Core (MCore) and Transformer Engine (TE) to pre-train the DeepSeek-V3 model with MXFP8 precision on 256 GB200 GPUs. We will detail the step-by-step process of optimizing performance to **970 TFLOPS/GPU**, which is a **2.55x** speedup compared to the estimated 380 TFLOPS on H100/H800 (refer to the estimation in this article \[[1](https://zhuanlan.zhihu.com/p/16480858047)\] in Chinese). The related features have been or will be open-sourced to the [Megatron Core](https://github.com/NVIDIA/Megatron-LM) and [Transformer Engine](https://github.com/NVIDIA/TransformerEngine) repositories. Refer to [the guide](./deepseek-v3-gb200-reproduce-guide.md) to reproduce the performance.
-
-## **0. Methodology**
-
-To optimize the pre-training performance of a model, our methodology is generally as follows:
-
-1. Find a performance baseline. This baseline is usually the best performance that the current software stack can achieve on a given hardware platform and training precision by adjusting model parallelism, recomputation, and other configurations.  
-2. Use performance analysis tools such as [Nsight Systems](https://developer.nvidia.com/nsight-systems) (Nsys) or [PyTorch Profiler](https://docs.pytorch.org/tutorials/recipes/recipes/profiler_recipe.html) to capture a profile file (also called a timeline or trace) and analyze it to find performance bottlenecks. For example, are there significant exposed communications, kernels with a significantly high proportion, or whether the GPU kernel layout is dense? We usually prefer to use Nsys because, with the help of NVTX, it provides a clearer display of CUDA API and GPU kernel execution.   
-3. Optimize for performance bottlenecks. Then repeat steps 1-3 until the performance expectations are met.
-
-## **1. Baseline**
-
-DeepSeek-V3 innovatively uses FP8 mixed precision for pre-training, which saves memory and improves training speed without sacrificing model accuracy. We refer to the FP8 recipe used by DeepSeek-V3, where activations are quantized at a 1x128 granularity and weights are quantized at a 128x128 granularity, as the blockwise scaling recipe. MCore (v0.13+) and TE (v2.3+) also support it.
-
-On the Blackwell platform, thanks to the native support of the fifth-generation Tensor Core for the MXFP8 format, we adopted the MXFP8 recipe, a more fine-grained quantization scheme for training. Both activations and weights are quantized at a 1x32 granularity, and E8M0 is used as the format for the scaling factor.
-
-Here, we will briefly introduce the difference in implementation between MXFP8 GEMM on the Blackwell platform and Blockwise FP8 GEMM on the Hopper platform. On the Hopper platform, since the Tensor Core does not support multiplication with vectors of scales, the quantization granularity must be greater or equal to the GEMM tiles. This also determines that on the Hopper platform, 1x128 is almost the finest quantization granularity available. If a finer granularity was used for quantization, the GEMM performance would suffer a great loss due to small GEMM tiles. On the other hand, since the Blackwell platform natively supports MXFP8, the dequantization process in GEMM (i.e., multiplying by the scale) is completed inside the Tensor Core, so the CUDA Core is not involved throughout the process, which can achieve better performance and support finer-grained quantization (1x32).
-
-When we started optimizing DeepSeek-V3 on the GB200 NVL72 platform with MCore, our baseline already included the following features:
-
-1. **MXFP8 recipe**, where the fprop/wgrad/dgrad inputs of all linear layers in the model are quantized at a 1x32 granularity, while Scaled Dot Product Attention (SDPA)/Embedding/LM Head/Router/Loss/Optimizer, etc., remain at their original high precision. For details on the FP8 recipe, please refer to our presentation at the NVIDIA AI Open Day in June 2025 (Video \[[2](https://www.bilibili.com/video/BV1mpMwz9Ey5/)\] in Chinese) and GTC 2025 (Video \[[3](https://www.nvidia.com/en-us/on-demand/session/gtc25-s72778/)\] in English). The option to enable this in MCore is `--fp8-recipe mxfp8 --fp8-format e4m3`.  
-2. **Multi-head Latent Attention (MLA) kernels** on the Blackwell platform, provided by cuDNN 9.11.  
-3. **MXFP8 Grouped GEMM**, implemented using multi-stream \+ cuBLAS. The advantage of this implementation is that we can support various quantization schemes at the fastest speed: as long as the single GEMM is ready, we can have a Grouped GEMM implementation with good performance. Our multi-stream \+ cuBLAS solution can achieve 2,672 TFLOP/s (flush L2) on the shape K=7,168, N=2,048, which is basically equivalent to a highly optimized Grouped GEMM \[[4](https://cursor.com/cn/blog/kernels)\]. We will continue to optimize the performance of Grouped GEMM. The option to enable this in MCore is `--moe-grouped-gemm`.  
-4. **Kernel fusions**, such as:  
-   1. Yarn RoPE fusion, enabled by default.  
-   2. Permute fusion, the option to enable this in MCore is `--moe-permute-fusion`.  
-   3. Cross-entropy loss fusion, the option to enable this in MCore is `--cross-entropy-loss-fusion`.  
-5. **Flexible Pipeline Parallelism (PP) layout**, making PP more balanced. The corresponding option in MCore is `--pipeline-model-parallel-layout [layout]`.  
-6. **Primary weights in FP8**. FP8 mixed-precision training supports two weight schemes:  
-   1. Dual precision weights (default): Maintains both BF16 and FP8 weight copies. Simple implementation but uses more memory than BF16 training alone.  
-   2. FP8 only weights: Stores only FP8 weights, saving memory and enables FP8 AllGather of the updated parameters for per-tensor and blockwise FP8 recipes when using Distributed Optimizer (ZeRO-1). Complex implementation requiring recipe-specific handling. The option to enable this in MCore is `--fp8-param-gather`.  
-7. **BF16 optimizer states**. According to the technical report, DeepSeek-v3 uses BF16 for optimizer states. This feature is orthogonal to the training precision, and it can be used for both BF16 and FP8 training. The options to enable this in MCore are `--use-precision-aware-optimizer --main-grads-dtype fp32 --main-params-dtype fp32 --exp-avg-dtype bf16 --exp-avg-sq-dtype bf16`.  
-8. **Fine-grained recompute**. By recomputing some modules with smaller computational workload but larger memory occupation, a large amount of memory is saved at a small recomputation cost, thereby minimizing model parallel sizes. In our baseline version, fine-grained recompute only supports BF16, and FP8 training is currently not supported. The options to enable this in MCore are `--recompute-granularity selective --recompute-modules [modules]`.  
-9. **Token dispatcher** supports both NCCL AlltoAll and DeepEP backends. However, at the time we tested the baseline performance, DeepEP did not support the Multi-Node NVLink (MNNVL) of GB200, so we could only use the NCCL AlltoAll backend. The option to use the AlltoAll dispatcher in MCore is `--moe-token-dispatcher-type alltoall`.
-
-On the above software stack, using the parallel configuration of TP1/PP8/VPP4/EP32/MBS1/GBS2048 on 256 GB200s, enabling recomputation of the MLP part of the dense layers (i.e., the first three layers of DeepSeek-v3) and the MLA up projection (`--recompute-modules mlp up_proj`), with the PP layout as `--pipeline-model-parallel-layout Et|(tt|)*30L` (a total of 32 stages, where the first stage is Embedding \+ 1 transformer layer, the last stage is Loss, and the middle 30 stages are 2 transformer layers), using the AlltoAll token dispatcher (NCCL backend), and enabling BF16 optimizer states, we achieved a performance of 494 TFLOPS/GPU. This performance is obviously not satisfactory, and we will optimize it from several aspects.
-
-## **2. Performance Optimization**
-
-By capturing and analyzing the Nsys timeline corresponding to the baseline, taking a forward iteration as an example, we can see that the biggest performance issue is that there are large gaps between kernels, and the CPU kernel launch speed cannot keep up with the kernel execution speed on GPU. We call this phenomenon *CPU overhead* or *host boundedness*. This overhead mainly comes from Python code (such as loops, `getattr`, etc.), PyTorch's Python and C++ logic code (for example, a simple `torch.empty` will not call any CUDA kernel, but it will generate a few microseconds of overhead on the host side), CUDA kernel launch, etc. The reason for this phenomenon is that, on the one hand, the speed of GPU executing kernels is getting faster and faster, resulting in not enough time to overlap the CPU execution time. On the other hand, FP8 training and fine-grained MoE models introduce more quantization, router, and other kernels. The main idea to solve CPU overhead is to reduce the number of kernels through kernel fusion and use CUDA Graphs for graph launch to bypass repeated work on the CPU side.
-
-![images/image1.png](images/image1.png)
-
-In addition to CPU overhead, we can also see several other obvious problems:
-
-* The length of the Permute kernel is clearly abnormal, suggesting that this kernel needs to be optimized.  
-* Before the GEMM in the Expert part, there are a large number of small, fragmented kernels. This is obviously abnormal, and we need to locate what these kernels are doing and whether they can be eliminated or fused.
-* The NCCL-based token dispatcher, which requires explicit global token permutation, is not optimal.
-* The overhead of recomputing MLA up projection is not as small as expected due to the CPU overhead.
-
-Therefore, our optimization plan is roughly as follows:
-
-1. Kernel fusion and optimization  
-2. Memory saving to allow more optimizations  
-3. CUDA Graphs to resolve CPU-side overhead  
-4. CPU-side optimizations  
-5. HybridEP: An Expert Parallel (EP) communication library developed based on a new set of API, with functions similar to DeepEP, but able to achieve higher bandwidth with fewer SMs, and fully supporting MNNVL.
-
-### **2.1 Kernel Fusion and Optimization**
-
-#### **2.1.1 Optimizing the Permute Kernel**
-
-The permute operation in the MoE model rearranges tokens in memory for communication and computation. The AlltoAll dispatcher using the NCCL backend requires one global and one local permute before and after EP communication, respectively. The Flex Dispatcher of DeepEP or HybridEP fuses the global permute into the communication kernel, eliminating the need to explicitly copy the tokens top-k times, but still requires a permute kernel to copy and rearrange the tokens distributed to different local experts after EP communication. TE [PR 1927](https://github.com/NVIDIA/TransformerEngine/pull/1927) significantly improves performance when top-k is much smaller than the number of experts (e.g., DeepSeek-v3's 256 experts with top-8), with up to a 10x unit speedup. The option to enable this in MCore is `--moe-permute-fusion`, and we recommend setting `--enable-experimental` for more aggressive fusions.
-
-#### **2.1.2 Fused Memory Allocation for the MXFP8 Quantization**
-
-By comparing the code and the Nsys GPU trace timeline, we found that there are mainly two types of fragmented kernels in the Expert part: `torch.zeros` kernels that allocate the scaling factor for MXFP8, and the kernels that swizzle the MXFP8 scaling factors. The reason for using `torch.zeros` instead of `torch.empty` to allocate memory for the scaling factor is that the Tensor Core requires the scaling factor to be padded to a specific shape, with the padded part filled with 0. In optimization 2.1.3, we fuse the zero-padding to the swizzle scaling factor kernel to avoid `torch.zeros` kernels.
-
-When performing MXFP8 quantization for each tensor, four tensors need to be allocated, namely {row-wise, col-wise} * {data, scaling factor}. As mentioned earlier, even when using `torch.empty` to allocate memory, each PyTorch API call introduces several microseconds of overhead, resulting in significant CPU overhead. Our solution here is to pre-allocate a large memory buffer for data and scaling factors, and then construct tensors from this buffer using the `aten::from_blob` API by calculating pointer offsets, thus avoiding a large number of tiny `torch.empty/zeros`. For the specific implementation, please refer to TE PR [1793](https://github.com/NVIDIA/TransformerEngine/pull/1793), [1934](https://github.com/NVIDIA/TransformerEngine/pull/1934), and [2134](https://github.com/NVIDIA/TransformerEngine/pull/2134). This optimization replaces the previous implementation and is enabled by default.
-
-#### **2.1.3 Fused Multiple Swizzle Scaling Factor Kernels**
-
-As mentioned earlier, the second type of fragmented kernels in the Expert part is swizzling the scaling factor. This is because the Tensor Core requires the scaling factors to be swizzled according to certain rules (refer to the [cuBLAS documentation](https://docs.nvidia.com/cuda/cublas/#d-block-scaling-factors-layout)). We fused the swizzle operations of the scaling factors of multiple input tensors into a single kernel, and handled padding with 0 in it. This completely eliminates the `torch.zeros` kernel when allocating the buffer mentioned above, reduces the number of kernels, and alleviates CPU overhead. For the specific implementation, please refer to TE [PR 2019](https://github.com/NVIDIA/TransformerEngine/pull/2019). This optimization replaces the previous implementation and is enabled by default.
-
-In addition, theoretically, we can fuse the swizzle scaling factor into the quantization kernel. The main reason we haven't done so yet is to consider that when MXFP8 data needs to be communicated, such as in TP and EP Dispatch (which are not yet supported), un-swizzled scaling factors are more convenient for communication. Of course, the ideal situation is to make the quantization kernel configurable, so that it does not perform swizzling where communication is needed, and performs swizzling otherwise, thus avoiding redundant operations.
-
-#### **2.1.4 Kernel Fusion in the Router Part**
-
-The Router part contains a large number of element-wise operators, mainly for calculating the routing map, i.e., which experts the tokens should be assigned to, and for calculating and counting the aux loss. We fused some of these kernels, reducing the total number of kernels in the router part from 72 to 31. For the specific implementation, please refer to TE [PR 1883](https://github.com/NVIDIA/TransformerEngine/pull/1883). The option to enable this in MCore is `--moe-router-fusion`. 
-
-The reason why it cannot be completely fused is that the remaining kernels are separated by communication kernels of global auxiliary losses calculation, which are not easy to fuse. There are also many kernels scattered in different Python logic codes. If they are forcibly fused, it will mess up the code structure of Python. Moreover, we will apply CUDA Graphs for the router part later, which can already solve the CPU overhead problem well, so there is little performance gain from fusing the remaining kernels.
-
-#### **2.1.5 Quantization Fused to Normalization**
-
-cuDNN supports fusing MXFP8 quantization into normalization, including layer norm and RMS norm. To enable this feature, we suggest using cuDNN 9.14 or later and set the following environment variables.
-
-```shell
-NVTE_NORM_FWD_USE_CUDNN=1
-NVTE_NORM_BWD_USE_CUDNN=1
-```
-
-Under the same parallel configuration, we measured that optimizations 2.1.1 and 2.1.2 improved the end-to-end (E2E) performance by 35 TFLOPS, optimization 2.1.3 improved it by 35.5 TFLOPS, optimization 2.1.4 improved it by 10.5 TFLOPS, and optimization 2.1.5 improved it by 13.8 TFLOPS. The Nsys timeline with optimizations 2.1.1, 2.1.2, and 2.1.4 enabled is as follows (the reason for not including 2.1.3 nor 2.1.5 is that they were done later, and at that time the timeline had already been superimposed with other optimizations, so it could not be directly compared):
-
-![images/image2.png](images/image2.png)
-
-Although it still doesn't look very satisfactory, it has improved.
-
-### **2.2 Memory Saving to Allow More Optimizations**
-
-#### **2.2.1 DeepEP**
-
-Theoretically, on the GB200 NVL72 system, all EP communication is within the NVLink domain. Thanks to the bidirectional 1.8 TB/s bandwidth of MNNVL on the GB200, EP communication will be greatly accelerated. However, DeepEP still does not officially support scenarios where the NVLink domain is larger than 8. We have supported the EP32 scenario based on [this community PR](https://github.com/deepseek-ai/DeepEP/pull/218). But this support is not well-optimized. In the EP32 scenario, the dispatch can only reach about 400 GB/s and the combine can only reach about 190 GB/s algorithm bandwidth with 24 SMs, which is a large gap from the unidirectional bandwidth of 900 GB/s for MNNVL on the GB200 NVL72. Therefore, after switching to DeepEP, we did not get the communication benefits, but got some memory-saving benefits (DeepEP does not need explicit global permute, so it reduces the peak memory consumption), and reduced CPU overhead (DeepEP uses a fused kernel for the EP communication preprocess, further reducing the number of kernels in the router and preprocess parts to 17), so we put DeepEP in the memory optimization part.
-
-The options to enable DeepEP in MCore are:
-
-```shell
---moe-token-dispatcher-type flex
---moe-flex-dispatcher-backend deepep
-```
-
-#### **2.2.2 Fine-grained Recompute for FP8**
-
-The conventional recomputation method recomputes multiple modules to save all intermediate activations of a Transformer layer, but recomputing a single module does not take effect. We want to do more fine-grained recomputation, that is, recomputing some modules within a Transformer layer with low computational intensity but high memory consumption, to save more memory at a lower performance cost. Therefore, we implemented the [output discarding recompute](https://github.com/NVIDIA/Megatron-LM/blob/e000263e21ac89571123303c4043ec9ea7261513/megatron/core/tensor_parallel/random.py#L521) in MCore to support recomputing a single module.
-
-In addition, for FP8, we need to consider that the FP8 quantized version of the discarded output may be saved by subsequent layers, which would not achieve the goal of saving memory. Therefore, we need to tell the FP8 module to save the original input (so that it can be correctly discarded) instead of the quantized version. The cost is that we need to re-quantize during the backward pass. For implementation details, please refer to \[[MCore commit](https://github.com/NVIDIA/Megatron-LM/commit/781e765818b86b8f2e03ac6bb6b09aaaa9d17074)\] and \[[TE PR 1865](https://github.com/NVIDIA/TransformerEngine/pull/1865)\].
-
-This technique is also applicable to SDPA and the subsequent Linear module (called Projection Linear). Because SDPA is a special module, it saves its own output for backward computation, while Projection Linear saves the input for backward computation. In BF16 training, these two tensors are actually the same tensor, occupying only one copy of memory. In FP8 training, SDPA saves a BF16 output tensor, while Projection Linear saves an FP8 tensor quantized from the input tensor. These two tensors do not share memory, so it actually saves 1.5 times the size. We can use a similar method to tell Projection Linear to save the original input instead of the quantized version to save memory. Similarly, the cost is that it needs to be re-quantized during the backward pass.
-
-![images/image3.png](images/image3.png)
-
-E2E testing shows that enabling DeepEP reduces the CPU overhead of the router and preprocess, improving performance by 54.3 TFLOPS. By using fine-grained recompute, the redundant activation saved between SDPA and Projection is eliminated, allowing us to turn off the recomputation of MLA up projection, which improves performance by 44.7 TFLOPS. The reason is that although the MLA up projection has a low computational density and the cost of recomputation is theoretically small, it also has serious CPU overhead, so turning off recomputation can achieve a certain performance improvement. Correspondingly, the recomputation parameters were changed to `--recompute-modules mlp moe_act`. The following figure shows the Nsys timeline with DeepEP enabled and using new recompute parameters:
-
-![images/image4.png](images/image4.png)
-
-### **2.3 CUDA Graphs to Resolve CPU-side Overhead**
-
-CUDA Graphs significantly reduce CPU overhead by capturing GPU kernels into a static graph that replays entire kernel sequences in subsequent iterations, bypassing most CPU logic. However, captured parts must be static with no dynamic shapes allowed. In Dropless MoE models, routed experts are dynamic while attention, router, EP preprocess, and shared experts remain static, so we capture these static components to minimize CPU overhead.
-
-We have developed the Partial CUDA Graphs feature in MCore and TE, which allows us to capture only a part of the model. The parameter in MCore is `--cuda-graph-scope`, and the supported options are:
-
-* `attn`: capture the attention part.  
-* `mlp`: capture the MLP part of the dense layer, for example, the first three layers of DeepSeek-V3 are dense layers.  
-* `moe`: capture the moe part, only supports token-drop MoE.  
-* `moe_router`: capture the moe router part. Also capture shared experts unless the shared experts overlap is enabled.
-* `moe_preprocess`: capture the EP preprocess part, must be used with `moe_router`.  
-* `mamba`: captures the mamba layer.
-
-In DeepSeek-v3, we finally used `--cuda-graph-impl transformer_engine --cuda-graph-scope attn moe_router moe_preprocess` to capture attention, router, EP preprocess, and shared experts of each layer. The partial CUDA Graphs feature is temporarily only available in `--cuda-graph-impl transformer_engine` implementation. Another implementation is called `local`, which introduces full-layer and full-iteration CUDA Graphs support, but not feasible for MoE models due to the dynamic shape issue.
-
-One limitation of CUDA Graphs is that it occupies additional memory. The number of CUDA Graphs we need to capture is `L*M*2`, where `L` is the number of layers per GPU and `M` is the number of micro-batches in one iteration. `*2` because we need to capture both forward and backward graphs. This additional memory of these graphs comes from three aspects.
-
-1. The structure of CUDA Graphs itself occupies some memory. This memory usage increases with the number of nodes in the graph, but the amount is typically negligible.
-2. CUDA Graphs need to use an independent memory pool. PyTorch’s caching allocator cannot reuse the memory in this pool for operators outside of CUDA Graphs.
-3. CUDA Graphs need static memory buffers for input and output data of the graphs.
-
-We have made a series of optimizations to optimize the memory consumption of CUDA Graphs, especially targeting 2 and 3. For 2, though graphed and non-graphed parts must use separate pools, we managed to make all graphs share one pool by capturing them in the same order they will be replayed. For 3, we reuse the static memory buffers between graphs as much as possible following its PP pattern. For details, please refer to the `_order` and `_reuse_graph_input_output_buffers` arguments in TE [make_graphed_callables()](https://github.com/NVIDIA/TransformerEngine/blob/release_v2.8/transformer_engine/pytorch/graph.py#L847-L863) API. In addition, we have also made a series of adaptations and optimizations for CUDA Graphs for MoE models, different FP8 recipes, MTP support, flexible PP layouts, and precision alignment to ensure it works correctly and efficiently.
-
-The following figure shows our timeline after enabling CUDA Graphs (this figure also includes 2.1.3 fuse swizzle scaling factor). It can be seen that the CPU overhead problem has been greatly alleviated, and currently only the routed experts part still has some CPU overhead. Enabling CUDA Graphs has improved the E2E performance by a total of 84.8 TFLOPS.
-
-![images/image5.png](images/image5.png)
-
-At this point, we can see that the performance problem of DeepEP is beginning to become a bottleneck, and we will have work to optimize it later.
-
-### **2.4 CPU-side Optimizations**
-
-Adding [bindpcie](https://github.com/NVIDIA/mlperf-common/blob/main/client/bindpcie) to the startup phase of each training process, so as to automatically detect the GPU/NUMA topology of the local machine based on the rank of the local process, and use `numactl` to bind the CPU and memory of the process to the local NUMA node corresponding to its GPU. This reduces per-GPU kernel launch latency and the latency variation among GPUs, and improves E2E performance by 70.6 TFLOPS.
-
-It is worth mentioning that since CPU overhead is a major performance issue in FP8 training, and in language model training tasks where the data loading pressure is small, usually only a few CPU cores are responsible for launching kernels and are in a high-load state. For example, on a DGX/HGX NVL8 system, if core binding is performed, then 8 GPUs correspond to 8 processes, which correspond to 8 CPU cores. Therefore, we recommend configuring the CPU to a mode that allows some cores to boost to the highest frequency, which can significantly improve the performance of FP8 training.
-
-With the help of CPU-side profiling, we're working on simplifying the host-side code of TE, such as removing unnecessary checks, PyTorch APIs, and CUDA calls. In addition, we are working with CPU experts to explore other CPU-side optimizations.
-
-### **2.5 HybridEP**
-
-HybridEP is a new EP communication library developed by NVIDIA, with functions similar to DeepEP, but it can fully release the performance potential of the NVL72 architecture and also supports intra-node and inter-node communication on the Hopper platform. HybridEP mainly has the following features:
-
-* Fully adapted to the NVL72 architecture. Within the NVLink domain, Tensor Memory Accelerator (TMA) is used for data copy to minimize the number of instructions and reduce resource occupation.  
-* Deeply optimized RDMA communication across NVLink domains using IBGDA technology.  
-* Ensured that there is no redundant communication during data distribution.  
-* Completely asynchronous at the kernel level and adapted to CUDA Graphs.  
-* Can flexibly adjust the number of occupied SMs and achieve excellent performance with as few SMs as possible.
-
-HybridEP is fully adapted to the NVL72 architecture and can achieve high transmission bandwidth with fewer SM resources.  
-![images/image6.png](images/image6.png)
-
-It is worth mentioning that although we only report the performance of EP36 here, HybridEP actually supports the full NVL72. Therefore, if future models are designed with the number of experts being a multiple of 72, HybridEP can fully utilize the bandwidth of NVL72. This also reflects the philosophy of model and hardware architecture co-design.
-
-When integrating HybridEP into MCore, we need to solve a problem: in the implementation, we need to register some special buffers so that they can be accessed by other ranks in the same NVLink domain. And since the output of dispatch and the input of combine both exist in the buffer managed by HybridEP itself. This buffer is globally unique on the current rank and is reused between layers. We need an extra D2D (Device to Device) copy to copy the output of the dispatch kernel from the buffer to the downstream required PyTorch tensor, or to copy the input of the combine kernel from the upstream PyTorch tensor to the combine input buffer. And the duration of this D2D copy is about 10%-20% of the communication time.
-
-Considering that the MoE permute operation following dispatch, we’re doing
-
-1. EP communication over NVLink: dispatch -> HybridEP managed buffer
-2. D2D copy: HybridEP managed buffer -> output buffer in PyTorch tensors
-3. Permute: output buffer -> permuted tensors to be fed into experts
-
-Therefore, we choose to fuse this D2D copy with the subsequent permute, that is, while permuting, we also complete the data transfer between the HybridEP managed buffer and the ordinary PyTorch tensor. Furthermore, since cuBLAS FP8 GEMM requires the input M dimension to be aligned to 16 (per-tensor recipe or blockwise recipe) or 32 (MXFP8 recipe), and the output generated by permute is very likely not to meet this requirement, it needs to be padded in the M dimension. This padding task is also essentially a D2D copy, and we also fuse it into the permute process.
-
-The options to enable HybridEP in MCore are:
-
-```shell
---moe-token-dispatcher-type flex
---moe-flex-dispatcher-backend hybridep
-```
-
-The figure below shows the timeline after we used HybridEP to optimize EP communication and permute/pad, which improved the E2E performance by 113.6 TFLOPS.
-
-![images/image7.png](images/image7.png)
-
-HybridEP has been open-sourced as an [independent branch](https://github.com/deepseek-ai/DeepEP/tree/hybrid-ep) in the DeepEP repository, have a try now!
-
-## **3. Summary and Outlook**
-
-We started from a baseline of 494 TFLOPS, and through multiple rounds of performance analysis and optimization, we finally reached 970 TFLOPS, achieving a 1.96x performance improvement. The following is our optimization history sorted by time:
-
-| Model | System | Precision | Dispatcher | Feature Roadmap | TFLOPS/GPU |
-| ----- | ----- | ----- | ----- | ----- | ----- |
-| DeepSeek-V3 | GB200 | MXFP8 | AlltoAll | Baseline | 494.46 |
-| DeepSeek-V3 | GB200 | MXFP8 | AlltoAll | Fuse torch.zeros for scaling factor allocation & Permute kernel Optimization | 529.55 |
-| DeepSeek-V3 | GB200 | MXFP8 | AlltoAll | Router fusion | 540.00 |
-| DeepSeek-V3 | GB200 | MXFP8 | DeepEP | Enable DeepEP (Will switch to HybridEP) | 566.07 |
-| DeepSeek-V3 | GB200 | MXFP8 | DeepEP | Remove up\_proj recompute | 610.71 |
-| DeepSeek-V3 | GB200 | MXFP8 | DeepEP | CUDA Graphs | 663.27 |
-| DeepSeek-V3 | GB200 | MXFP8 | DeepEP | Tune DeepEP (Will switch to HybridEP) | 691.49 |
-| DeepSeek-V3 | GB200 | MXFP8 | DeepEP | CPU-side optimization | 762.12 |
-| DeepSeek-V3 | GB200 | MXFP8 | DeepEP | PDL for quantization kernels & Fuse MXFP8 swizzle scaling factor | 797.67 |
-| DeepSeek-V3 | GB200 | MXFP8 | DeepEP | CUDA Graphs capture shared expert | 829.93 |
-| DeepSeek-V3 | GB200 | MXFP8 | HybridEP | HybridEP | 943.56 |
-| DeepSeek-V3 | GB200 | MXFP8 | HybridEP | CPU-side optimization | 956.21 |
-| DeepSeek-V3 | GB200 | MXFP8 | HybridEP | Fuse quantization to normalization (cuDNN 9.14) | 970.01 |
-
-### **3.1 Future Work**
-
-1. Completely eliminate CPU overhead. We hope to eliminate the device-host sync in the MoE model (its purpose is to get the tokens per expert information), so that we can use CUDA Graphs for the entire model and completely eliminate CPU overhead. We used a small proxy model to estimate that this optimization can achieve at least a 10% additional performance gain. Please refer to the MCore MoE [roadmap](https://github.com/NVIDIA/Megatron-LM/issues/1729).  
-2. Scale to a larger amount of GPUs. Our current parallel configuration is already limited by the number of GPUs (EP32 * PP8 = 256 GPUs). If we expand to 512 cards, we can explore the performance of EP64. Theoretically, since EP64 is still within the NVLink domain, its communication overhead is still small. And a large EP can reduce the number of local experts, thereby reducing quantization and other overheads, and improving the performance of Grouped GEMM.  
-3. Explore the use of NVLink-C2C's CPU offloading technology. Since the GB200 NVL72 system has NVLink-C2C, the connection between CPU and GPU is faster than PCIe 5.0, so offloading is a very promising feature. For example, with the help of CPU offloading, can we increase MBS to 2? If so, it will greatly improve the computational intensity, and many of the CPU overhead problems mentioned earlier may no longer exist.
-
-### **3.2 Some Discussions**
-
-1. Why didn't we use FP8 dispatch on the GB200?  
-   * FP8 dispatch is not a free lunch. Since we can only transmit row-wise FP8 data, we need some extra "de-quantize and re-quantize" kernels to calculate col-wise FP8 data for backward computation. The overhead of these kernels offsets the communication time saved by FP8 dispatch.  
-2. Why didn't we use 1F1B AlltoAll overlap on the GB200 (a kind of inter-batch overlap scheme similar to DualPipe, for details see MCore commits [8333bd5](https://github.com/NVIDIA/Megatron-LM/commit/8333bd5bb6de2bdbdb3ebebf224b4a339a04ec90), [ae1c882](https://github.com/NVIDIA/Megatron-LM/commit/ae1c88296f465ab4ac9c503d75a57ba4044c47d1), [d7bf5aa](https://github.com/NVIDIA/Megatron-LM/commit/d7bf5aaaa8e331f901366621db009b0c2880c8fd))?  
-   * First, thanks to NVL72, EP communication is very fast, and the necessity of overlap is not great. Second, 1F1B AlltoAll overlap is not a free lunch either. It divides the forward and backward into multiple stages for scheduling, and there is some synchronization between different stages, which aggravates the CPU overhead, so the overall benefit is negative on the GB200. If we can further solve the CPU overhead problem, we can re-evaluate the benefits of 1F1B AlltoAll overlap.  
-3. How much performance improvement is there compared to the H100?  
-   * DeepSeek's technical report did not announce the TFLOPS during its pre-training phase, but some article \[[1](https://zhuanlan.zhihu.com/p/16480858047)\] (in Chinese, we recommend reading it by translation) has estimated it to be around 380 TFLOPS, so the 970 TFLOPS on the GB200 is a 2.55x performance improvement. This surpasses the 2.5x improvement of the GB200 over the H100 in FP8 computing power. This significant performance gain is attributed to leveraging MNNVL on the GB200 for optimized EP communication and utilizing the substantially larger device memory on the GB200 to explore enhanced parallel configurations. 
-
-## **4. Resources**
-
-**Complete Training Examples**
-
-* [Reproduce Guide](./deepseek-v3-gb200-reproduce-guide.md), including the Dockerfile, dependencies, cluster configuration and launch scripts.
-* [Megatron-MoE-ModelZoo](https://github.com/yanring/Megatron-MoE-ModelZoo) \- End-to-end training configurations and launch scripts for popular MoE models, including Deepseek-V3, Qwen3, etc.
-
-**Papers and Technical Reports**
-
-1. [DeepSeek-V3 MFU Estimation](https://zhuanlan.zhihu.com/p/16480858047). An article in Chinese estimates the MFU of DeepSeek-V3 training.  
-2. [FP8 Training Recipes, Performance and Convergence](https://www.bilibili.com/video/BV1mpMwz9Ey5/). A video in Chinese introduces FP8 training recipes, performance and convergence.  
-3. [Stable and Scalable FP8 Deep Learning Training on Blackwell](https://www.nvidia.com/en-us/on-demand/session/gtc25-s72778/). GTC talk on FP8 training on Blackwell.  
-4. [Cursor's Blog on Faster Grouped GEMM Kernels and MoE Training.](https://cursor.com/cn/blog/kernels)

From b2fdd94b44f26b80b9db5146ca08f3622e4a8d7a Mon Sep 17 00:00:00 2001
From: Deyu Fu <Deyu.Foo@gmail.com>
Date: Thu, 6 Nov 2025 16:38:57 +0800
Subject: [PATCH 110/334] [DEV] torch_dist fixes, speed improvements and memory
 reduction for layerwise distopt (#1965)

Signed-off-by: Deyu Fu <deyuf@nvidia.com>
---
 .../core/optimizer/layer_wise_optimizer.py    | 204 ++++++++++++------
 tests/unit_tests/test_layer_wise_optimizer.py |  39 +++-
 2 files changed, 177 insertions(+), 66 deletions(-)

diff --git a/megatron/core/optimizer/layer_wise_optimizer.py b/megatron/core/optimizer/layer_wise_optimizer.py
index 2b311dfe659..64eac03d626 100644
--- a/megatron/core/optimizer/layer_wise_optimizer.py
+++ b/megatron/core/optimizer/layer_wise_optimizer.py
@@ -1,37 +1,39 @@
-# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 from typing import Callable, List, Optional
 
 import torch
+from torch._utils import _flatten_dense_tensors, _unflatten_dense_tensors
 
-from megatron.core.dist_checkpointing import ShardedTensor
 from megatron.core.dist_checkpointing.dict_utils import nested_values
 from megatron.core.dist_checkpointing.mapping import ShardedStateDict
 from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.utils import get_pg_rank, get_pg_size
 
 from .clip_grads import count_zeros_fp32, get_grad_norm_fp32
-from .optimizer import ChainedOptimizer, Float16OptimizerWithFloat16Params, MegatronOptimizer
+from .optimizer import (
+    ChainedOptimizer,
+    Float16OptimizerWithFloat16Params,
+    FP32Optimizer,
+    MegatronOptimizer,
+)
 from .optimizer_config import OptimizerConfig
 
 
 class LayerWiseDistributedOptimizer(ChainedOptimizer):
     """Layer-wise distributed optimizer for Megatron-core models.
 
-    This is a experimental distributed optimizer wrapper that distributes weight to DP ranks
-    by full layer. Implemented as ChainedOptimizer to support different weights use different
-    optimizers (e.g. muon+adam). When using, keep all megatron distributed optimizer related
-    options OFF.
+    Experimental distributed optimizer wrapper that distributes weight to DP ranks by layer.
+    Implemented as ChainedOptimizer to support multiple optimizers (e.g. muon + adamW)
+    When using, keep all megatron distributed-optimizer related options OFF.
 
     How LayerWiseDistributedOptimizer work:
     1. weights are splited into lists and each rank only keep its shard in its optimizer
-    2. Megatron DDP handle allreduce grad for all params, note that each rank have full model
-    and grad.
+    2. Megatron DDP handle allreduce grad, note that each rank have full model and grad
     3. optimizer is already modified so only param belong to this DP rank is updated
-    3. grad_norm and zero counting will reduce metrics globally in step function
-    4. Do regular update with chained optimizers, optimizer is already modified so partial update
-    happens.
-    5. allgather updated params to every rank(currently through broadcast loop)
+    4. grad_norm and zero counting will reduce metrics globally in step function
+    5. Do regular update with chained optimizers, modified optimizer only update shard
+    6. allgather updated params to every rank
     """
 
     def __init__(
@@ -53,40 +55,42 @@ def __init__(
 
         self.pg_collection = pg_collection
         self.shard_params(optimizers)
-        # wrap optimizer after sharding to avoid unnecessary master weight creation
-        # TODO(deyuf): check if underlying optimizer.config need to fixed and if so can use
-        # that instead of passing
-        if init_state_fn_list is None:
-            init_state_fn_list = [None] * len(optimizers)
-        else:
-            assert len(init_state_fn_list) == len(optimizers), (
-                "init_state_fn_list must be the " "same length as optimizers if provided"
-            )
+        if init_state_fn_list:
+            assert len(init_state_fn_list) == len(
+                optimizers
+            ), "init_state_fn_list must be the same length as optimizers if provided"
 
+        # wrap optimizer after sharding to avoid unnecessary master weight creation
+        # for higher precision, optimizers are wrapped with megatron already
         if config.bf16:
-            if isinstance(optimizers[0], Float16OptimizerWithFloat16Params):
-                raise TypeError('LayerWiseDistributedOptimizer received Float16 optimizer already.')
-            optimizers = [
-                Float16OptimizerWithFloat16Params(optim, config, None, init_state_fn_list[idx])
-                for idx, optim in enumerate(optimizers)
-            ]
+            # unwrap FP32 optimizer, possibly from reusing get_megatron_optimizer for adam
+            for i in range(len(optimizers)):
+                opt = optimizers[i]
+                if isinstance(opt, Float16OptimizerWithFloat16Params):
+                    raise TypeError(
+                        'LayerWiseDistributedOptimizer received Float16 optimizer already.'
+                    )
+                # unwrap FP32 optimizer from reusing get_megatron_optimizer for adam
+                if isinstance(opt, FP32Optimizer):
+                    opt = opt.optimizer
+                optimizers[i] = Float16OptimizerWithFloat16Params(
+                    opt, config, None, init_state_fn_list[i] if init_state_fn_list else None
+                )
+
         super().__init__(optimizers)
 
         # TODO(kunlun, deyuf): potential future perf optimization
-        # since allreduce is unchanged and handled by megatron DDP, they're already in contiguous
-        # gbuf, so instead of shard param by layer randomly, we can still shard by buf range but
-        # keep some "extras" to keep boundary weight not sharded. This way each rank do some
-        # duplicated work but we can call single allgather later and all current distopt
-        # optimization can be applied.
+        # since allreduce is unchanged and handled by megatron DDP, they're already in
+        # contiguous gbuf. So instead of shard param by layer randomly, we can shard by
+        # buf range but keep some "extras" to keep boundary weight not sharded.
+        # This way each rank do some duplicated work but allgather_v is no longer needed
+        # All current distopt optimization can also be potentially applied
 
     def shard_params(self, optimizers):
         """Shard all params into lists by rank."""
-        # We'll optimize sharding later if there is perf issue. should be ok since linear are
-        # grouped already.
-        # Key is to create separate sharding for dp/expt parallel, saved in dp_cp_params_list,
-        # expt_dp_params_list.
-        # Example of 4 dp rank and 10 non-expert parameters p0-p9, then dp_cp_params_list will
-        # look like: [[p0, p4, p8], [p1, p5, p9], [p2, p6], [p3, p7]]
+        # list of parameter are sorted by numel and assigned to ranks in ping-pong style
+        # example of 4 ranks and 10 parameters p0-p9 after sorting, then dp_cp_params_list will be
+        # [[p0, p7, p8], [p1, p6, p9], [p2, p5], [p3, p4]]
 
         # simplify when dp_cp group size is 1
         if get_pg_size(self.pg_collection.dp_cp) == 1:
@@ -97,40 +101,87 @@ def shard_params(self, optimizers):
         dp_cp_idx, expt_dp_idx = 0, 0
         dp_cp_size = get_pg_size(self.pg_collection.dp_cp)
         expt_dp_size = get_pg_size(self.pg_collection.expt_dp)
+        # create ping-pong style loop so memory is more balanced
+        dp_cp_loop = list(range(dp_cp_size)) + list(range(dp_cp_size))[::-1]
+        expt_dp_loop = list(range(expt_dp_size)) + list(range(expt_dp_size))[::-1]
         self.dp_cp_params_list = [[] for _ in range(dp_cp_size)]
         self.expt_dp_params_list = [[] for _ in range(expt_dp_size)]
-        # get all param groups, this is called before init so cannot rely on
-        # Chained optimizer method
+        # get all param groups
         param_groups = []
         for optimizer in optimizers:
             param_groups += optimizer.param_groups
-        for group in param_groups:
-            params_this_rank = []
-            if group.get("is_expert_parallel", False):
-                for p in group["params"]:
-                    if expt_dp_idx == get_pg_rank(self.pg_collection.expt_dp):
-                        params_this_rank.append(p)
-                    self.expt_dp_params_list[expt_dp_idx].append(p)
-                    expt_dp_idx = (expt_dp_idx + 1) % expt_dp_size
+
+        # sort param in all groups by param numel and assign to each rank evenly
+        param_list = []
+        for group_index, group in enumerate(param_groups):
+            for p in group["params"]:
+                param_list.append((p, group_index))
+        param_list.sort(key=lambda x: x[0].numel())
+        param_groups_this_rank = [[] for g in param_groups]
+
+        # assign params to rank in ping-pong style loop
+        for p, group_index in param_list:
+            if param_groups[group_index].get("is_expert_parallel", False):
+                if expt_dp_loop[expt_dp_idx] == get_pg_rank(self.pg_collection.expt_dp):
+                    param_groups_this_rank[group_index].append(p)
+                self.expt_dp_params_list[expt_dp_loop[expt_dp_idx]].append(p)
+                expt_dp_idx = (expt_dp_idx + 1) % len(expt_dp_loop)
             else:
-                for p in group["params"]:
-                    if dp_cp_idx == get_pg_rank(self.pg_collection.dp_cp):
-                        params_this_rank.append(p)
-                    self.dp_cp_params_list[dp_cp_idx].append(p)
-                    dp_cp_idx = (dp_cp_idx + 1) % dp_cp_size
-            # now we modify the group to only handle local params
-            group["params"] = params_this_rank
+                if dp_cp_loop[dp_cp_idx] == get_pg_rank(self.pg_collection.dp_cp):
+                    param_groups_this_rank[group_index].append(p)
+                self.dp_cp_params_list[dp_cp_loop[dp_cp_idx]].append(p)
+                dp_cp_idx = (dp_cp_idx + 1) % len(dp_cp_loop)
+
+        # now we modify the group to only handle local params
+        for groups, params in zip(param_groups, param_groups_this_rank):
+            groups["params"] = params
 
         # simplify when expt_dp group size is 1 or expert parallel is off
         if expt_dp_size == 1 or len(self.expt_dp_params_list[0]) == 0:
             self.expt_dp_params_list = None
 
+    @torch.no_grad()
+    def allgather_params(self) -> None:
+        """All-gather updated params from all ranks."""
+
+        # helper function to flatten local params, allgather, unflatten and copy to model params
+        def _allgather_helper(params_list, group):
+            # flatten this rank's params and create empty tensor output list
+            device = params_list[0][0].device
+            dtype = params_list[0][0].dtype
+            rank = get_pg_rank(group)
+            # for rank without params create empty tensor and participate in allgather
+            src = (
+                _flatten_dense_tensors(params_list[rank])
+                if len(params_list[rank]) > 0
+                else torch.empty(0, device=device, dtype=dtype)
+            )
+            output_list = [
+                torch.empty(sum([p.numel() for p in params]), device=device, dtype=dtype)
+                for params in params_list
+            ]
+            # single all_gather_v to collect all updated params
+            torch.distributed.all_gather(output_list, src, group=group)
+            # unflatten and copy gathered params for each rank i
+            for idx, (flat_params, params) in enumerate(zip(output_list, params_list)):
+                # skip local params and empty tensors
+                if len(params) == 0 or idx == rank:
+                    continue
+                updated_params = _unflatten_dense_tensors(flat_params, params)
+                for updated_p, model_p in zip(updated_params, params):
+                    model_p.data.copy_(updated_p)
+
+        if self.pg_collection is None:
+            return
+        if self.dp_cp_params_list:
+            _allgather_helper(self.dp_cp_params_list, self.pg_collection.dp_cp)
+        if self.expt_dp_params_list:
+            _allgather_helper(self.expt_dp_params_list, self.pg_collection.expt_dp)
+
     @torch.no_grad()
     def broadcast_params(self):
-        """All rank broadcast updated local params(allgatherv)."""
-        # Broadcast linear layer weights to all other ranks.
-        # This may not be slower than PyTorch allgatherv which calls broadcast internally.
-        # TODO(skyw): Profile and implement more efficient version.
+        """All rank broadcast updated local params."""
+        # Broadcast linear layer weights to all other ranks. Kept as reference test.
         if self.dp_cp_params_list is None:
             return
         for i, params in enumerate(self.dp_cp_params_list):
@@ -170,7 +221,7 @@ def step(self):  # type: ignore[no-untyped-def]
         update_successful, grad_norm, num_zeros_in_grad = super().step()
 
         # All gather updated params.
-        self.broadcast_params()
+        self.allgather_params()
 
         return update_successful, grad_norm, num_zeros_in_grad
 
@@ -187,10 +238,33 @@ def sharded_state_dict(
 
         # for fixed DP usage only
         for sh_base in nested_values(sharded_state_dict):
-            if isinstance(sh_base, ShardedTensor):
+            if hasattr(sh_base, 'replica_id'):
                 assert (
-                    len(sh_base.replica_id) == 3
-                ), f'Expected replica_id format (PP, TP, DP), got: {sh_base}'
-                sh_base.replica_id = (*sh_base.replica_id[:2], 0)
+                    isinstance(sh_base.replica_id, int) or len(sh_base.replica_id) == 3
+                ), f'Expected replica_id as int or (PP, TP, DP), got: {sh_base}'
+                sh_base.replica_id = (
+                    0 if isinstance(sh_base.replica_id, int) else (*sh_base.replica_id[:2], 0)
+                )
+
+        if len(self.chained_optimizers) == 1:
+            wrapped_sharded_state_dict = {1: sharded_state_dict}
+        else:
+            wrapped_sharded_state_dict = sharded_state_dict
+        # Adjust dict due to possible empty rank 0 which output common_dict
+        for sd in wrapped_sharded_state_dict.values():
+            # Drop empty group state to avoid save in common dict (non-empty rank still save)
+            if 'fp32_from_fp16_params' in sd:
+                sd['fp32_from_fp16_params'][:] = [
+                    group for group in sd['fp32_from_fp16_params'] if group
+                ]
+            # TODO(deyuf): 'common_step' code path is broken and 'step' is saved in 'param_groups'
+            # Find next 'step' if present. note this still break if rank0 adam is fully empty
+            step = next(
+                (group['step'] for group in sd['optimizer']['param_groups'] if 'step' in group),
+                None,
+            )
+            if step is not None:
+                for group in sd['optimizer']['param_groups']:
+                    group['step'] = step
 
         return sharded_state_dict
diff --git a/tests/unit_tests/test_layer_wise_optimizer.py b/tests/unit_tests/test_layer_wise_optimizer.py
index c9dd542cf25..05ce26bcfa0 100644
--- a/tests/unit_tests/test_layer_wise_optimizer.py
+++ b/tests/unit_tests/test_layer_wise_optimizer.py
@@ -1,4 +1,5 @@
-# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
 import os
 
 import pytest
@@ -401,3 +402,39 @@ def test_parameter_updates_insufficient_parameters(self):
         This will be insufficient when world size > 2.
         """
         self._run_parameter_update_test(model_class=TinyModel)
+
+    def test_broadcast_vs_allgather(self):
+        """Test LayerWiseDistributedOptimizer allgather code agains broadcast code."""
+        model, optimizer, pg_collection = self.create_model_and_optimizer(model_class=SimpleModel)
+
+        # Create reference model and optimizer using the same function
+        reference_model, reference_optimizer, _ = self.create_model_and_optimizer(
+            model_class=SimpleModel, copy_from=model
+        )
+
+        # Set same gradients on both models
+        for param, ref_param in zip(model.parameters(), reference_model.parameters()):
+            assert torch.equal(param.data, ref_param.data)
+            torch.testing.assert_close(param.data, ref_param.data, rtol=0, atol=0)
+            grad_value = torch.randn_like(param)
+            torch.distributed.broadcast(grad_value, src=0, group=pg_collection.dp_cp)
+            param.main_grad = grad_value.clone().detach()
+            ref_param.main_grad = grad_value.clone().detach()
+
+        optimizer.step()
+
+        # Verify at least some parameters were updated
+        params_updated = 0
+        for param, ref_param in zip(model.parameters(), reference_model.parameters()):
+            if not torch.equal(param.data, ref_param.data):
+                params_updated += 1
+
+        assert params_updated > 0, "At least some parameters should be updated"
+
+        # step() internal call allgather_params. replace reference object with bcast
+        reference_optimizer.allgather_params = reference_optimizer.broadcast_params
+        reference_optimizer.step()
+
+        # Verify updated values match reference optimizer
+        for param, ref_param in zip(model.parameters(), reference_model.parameters()):
+            torch.testing.assert_close(param.data, ref_param.data, rtol=0, atol=0)

From 6cc224db3bdd86995f020d2d13cf47d1d0e1f9a5 Mon Sep 17 00:00:00 2001
From: Yuzhong Wang <yuzhongw@nvidia.com>
Date: Thu, 6 Nov 2025 19:15:15 +0800
Subject: [PATCH 111/334] [Dev] Fix Qwen3-Next hang on Blackwell, add a flag to
 control torch.compile  (#2058)

Co-authored-by: Dennis(Zhenhuan) Liu <denliu@nvidia.com>
---
 megatron/core/jit.py                          | 27 ++++++++++++++-----
 megatron/core/ssm/gated_delta_net.py          |  7 ++---
 megatron/core/transformer/attention.py        |  7 ++---
 .../core/transformer/transformer_block.py     |  6 +++++
 megatron/training/arguments.py                |  2 ++
 megatron/training/global_vars.py              |  4 +++
 6 files changed, 41 insertions(+), 12 deletions(-)

diff --git a/megatron/core/jit.py b/megatron/core/jit.py
index b1aa3e0b611..b67810f2e34 100644
--- a/megatron/core/jit.py
+++ b/megatron/core/jit.py
@@ -7,12 +7,27 @@
 jit_fuser = torch.jit.script
 # nvFuser is deprecated in PyTorch JIT starting from 2.2
 
-try:
-    if is_torch_min_version("2.2.0a0"):
-        jit_fuser = torch.compile
-except ImportError:
 
-    def noop_decorator(func):
-        return func
+def noop_decorator(func):
+    '''No-op decorator'''
+    return func
 
+
+def enable_jit_fuser():
+    '''Enable the JIT fuser'''
+    global jit_fuser
+    try:
+        if is_torch_min_version("2.2.0a0"):
+            jit_fuser = torch.compile
+    except ImportError:
+
+        jit_fuser = noop_decorator
+
+
+def disable_jit_fuser():
+    '''Disable the JIT fuser'''
+    global jit_fuser
     jit_fuser = noop_decorator
+
+
+enable_jit_fuser()
diff --git a/megatron/core/ssm/gated_delta_net.py b/megatron/core/ssm/gated_delta_net.py
index e12dfd68062..4df1c0df294 100644
--- a/megatron/core/ssm/gated_delta_net.py
+++ b/megatron/core/ssm/gated_delta_net.py
@@ -18,6 +18,7 @@
 from megatron.core.dist_checkpointing.mapping import ReplicaId, ShardedTensorFactory
 from megatron.core.fp8_utils import get_fp8_align_size
 from megatron.core.inference.contexts import BaseInferenceContext
+from megatron.core.jit import jit_fuser
 from megatron.core.packed_seq_params import PackedSeqParams
 from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.tensor_parallel import get_cuda_rng_tracker
@@ -384,7 +385,7 @@ def forward(
 
         # RMSNorm
         nvtx_range_push(suffix="gated_norm")
-        norm_out = self._torch_compiled_gated_norm(core_attn_out, gate)
+        norm_out = self._apply_gated_norm(core_attn_out, gate)
         nvtx_range_pop(suffix="gated_norm")
 
         # Transpose: b s x --> s b x
@@ -399,8 +400,8 @@ def forward(
 
         return out, out_bias
 
-    @torch.compile
-    def _torch_compiled_gated_norm(self, x, gate):
+    @jit_fuser
+    def _apply_gated_norm(self, x, gate):
         # Output Norm
         x_dtype = x.dtype
         x = x.reshape(-1, x.shape[-1])
diff --git a/megatron/core/transformer/attention.py b/megatron/core/transformer/attention.py
index c4738ea84b5..65bac394034 100644
--- a/megatron/core/transformer/attention.py
+++ b/megatron/core/transformer/attention.py
@@ -9,6 +9,7 @@
 
 from megatron.core import tensor_parallel
 from megatron.core.inference.contexts import BaseInferenceContext
+from megatron.core.jit import jit_fuser
 from megatron.core.models.common.embeddings.rope_utils import (
     apply_rotary_pos_emb,
     apply_rotary_pos_emb_with_cos_sin,
@@ -958,7 +959,7 @@ def forward(
         # Output gate
         if gate is not None:
             nvtx_range_push(suffix="output_gate")
-            core_attn_out = self._torch_compiled_output_gate(core_attn_out, gate)
+            core_attn_out = self._apply_output_gate(core_attn_out, gate)
             nvtx_range_pop(suffix="output_gate")
 
         # =================
@@ -978,8 +979,8 @@ def forward(
 
         return output, bias
 
-    @torch.compile
-    def _torch_compiled_output_gate(self, x, gate):
+    @jit_fuser
+    def _apply_output_gate(self, x, gate):
         x_dtype = x.dtype
         gate = gate.contiguous()
         gate = gate.view(*x.shape)
diff --git a/megatron/core/transformer/transformer_block.py b/megatron/core/transformer/transformer_block.py
index f189d5fa7a4..c3187350c43 100755
--- a/megatron/core/transformer/transformer_block.py
+++ b/megatron/core/transformer/transformer_block.py
@@ -801,6 +801,12 @@ def sharded_state_dict(
         elif isinstance(self.config.moe_layer_freq, list):
             non_homogeneous_layers = True
 
+        if isinstance(self.config.linear_attention_freq, int):
+            if self.config.linear_attention_freq > 1:
+                non_homogeneous_layers = True
+        elif isinstance(self.config.linear_attention_freq, list):
+            non_homogeneous_layers = True
+
         if self.config.heterogeneous_block_specs:
             non_homogeneous_layers = True
 
diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py
index 3413d1e1547..3056f2007f2 100644
--- a/megatron/training/arguments.py
+++ b/megatron/training/arguments.py
@@ -2349,6 +2349,8 @@ def _add_training_args(parser):
                        help='The submodules to offload its input. Choices: "attn_norm", "core_attn", "attn_proj", "mlp_norm", "expert_fc1", "moe_act".')
     group.add_argument('--min-offloaded-tensor-size', type=int, default=1024*1024,
                        help='The minimum size of the tensor to be offloaded.')
+    group.add_argument('--disable-jit-fuser', action='store_true',
+                       help='Disable the JIT fuser.')
     return parser
 
 
diff --git a/megatron/training/global_vars.py b/megatron/training/global_vars.py
index 17bda9cbac5..ec402263d29 100644
--- a/megatron/training/global_vars.py
+++ b/megatron/training/global_vars.py
@@ -9,6 +9,7 @@
 from megatron.core import Timers
 from megatron.core.config import set_experimental_flag
 from megatron.core.energy_monitor import EnergyMonitor
+from megatron.core.jit import disable_jit_fuser
 from megatron.core.num_microbatches_calculator import init_num_microbatches_calculator, unset_num_microbatches_calculator
 from megatron.training import dist_signal_handler
 from megatron.training.tokenizer import build_tokenizer
@@ -111,6 +112,9 @@ def set_global_variables(args, build_tokenizer=True):
     if args.exit_signal_handler:
         _set_signal_handler()
 
+    if args.disable_jit_fuser:
+        disable_jit_fuser()
+
 
 def unset_global_variables():
     """Unset global vars.

From f4bd87e6d9d7a675588a8d7b50bef246ff6778a8 Mon Sep 17 00:00:00 2001
From: Ananth Subramaniam <ansubramania@nvidia.com>
Date: Thu, 6 Nov 2025 10:06:21 -0800
Subject: [PATCH 112/334] [dev] Fix cuda graph scope check in
 `language_model.py` (#2158)

Signed-off-by: Ananth Subramaniam <ansubramania@nvidia.com>
---
 megatron/core/models/common/language_module/language_module.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/megatron/core/models/common/language_module/language_module.py b/megatron/core/models/common/language_module/language_module.py
index 0badbce1bfd..8d6c1118a94 100644
--- a/megatron/core/models/common/language_module/language_module.py
+++ b/megatron/core/models/common/language_module/language_module.py
@@ -138,6 +138,7 @@ def compute_language_model_loss(self, labels: Tensor, logits: Tensor) -> Tensor:
                     # Use is_cg_capturable=True for full iteration CUDA graphs to avoid torch.equal checks
                     is_cg_capturable = (
                         hasattr(self.config, 'cuda_graph_scope')
+                        and self.config.cuda_graph_scope
                         and 'full_iteration' in self.config.cuda_graph_scope
                     )
                     if is_cg_capturable and not is_te_min_version("2.7.0"):

From 3207c23b96df9aa5ec28b2149ca8bad433f30a7a Mon Sep 17 00:00:00 2001
From: "Dennis(Zhenhuan) Liu" <denliu@nvidia.com>
Date: Mon, 10 Nov 2025 14:30:06 +0800
Subject: [PATCH 113/334] [Dev] Remove experimental tags for fused kernels
 (#2172)

---
 megatron/core/fusions/fused_indices_converter.py   | 3 +--
 megatron/core/fusions/fused_mla_yarn_rope_apply.py | 4 +---
 megatron/core/fusions/fused_pad_routing_map.py     | 3 +--
 3 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/megatron/core/fusions/fused_indices_converter.py b/megatron/core/fusions/fused_indices_converter.py
index 4bba3307738..c7cff48d311 100644
--- a/megatron/core/fusions/fused_indices_converter.py
+++ b/megatron/core/fusions/fused_indices_converter.py
@@ -6,7 +6,7 @@
 import torch
 from packaging import version
 
-from megatron.core.utils import experimental_fn, null_decorator
+from megatron.core.utils import null_decorator
 
 try:
     import triton
@@ -279,7 +279,6 @@ def backward(ctx, grad_multihot_indices, grad_probs_in_multihot):
         return None, grad_probs_indices, None, None
 
 
-@experimental_fn(introduced_with_version='0.11.0rc0')
 def fused_indices_to_multihot(indices, probs_indices, num_of_local_experts):
     """Convert moe topk indices to multihot representation.
 
diff --git a/megatron/core/fusions/fused_mla_yarn_rope_apply.py b/megatron/core/fusions/fused_mla_yarn_rope_apply.py
index a1bfea2d78e..1fd5dcfae37 100644
--- a/megatron/core/fusions/fused_mla_yarn_rope_apply.py
+++ b/megatron/core/fusions/fused_mla_yarn_rope_apply.py
@@ -6,7 +6,7 @@
 import torch
 from packaging import version
 
-from megatron.core.utils import experimental_fn, null_decorator
+from megatron.core.utils import null_decorator
 
 try:
     import triton
@@ -324,7 +324,6 @@ def backward(ctx, grad):
         return grad, None, None, None, None, None, None, None, None
 
 
-@experimental_fn(introduced_with_version="0.13.0")
 def fused_apply_mla_rope_for_q(
     t: torch.Tensor,
     cos: torch.Tensor,
@@ -733,7 +732,6 @@ def backward(ctx, dk, dv):
         return d_kv, d_emb, None, None, None, None, None, None, None, None, None
 
 
-@experimental_fn(introduced_with_version="0.13.0")
 def fused_apply_mla_rope_for_kv(
     kv: torch.Tensor,
     k_pos_emb: torch.Tensor,
diff --git a/megatron/core/fusions/fused_pad_routing_map.py b/megatron/core/fusions/fused_pad_routing_map.py
index 8e4d1763270..c382178b6c9 100644
--- a/megatron/core/fusions/fused_pad_routing_map.py
+++ b/megatron/core/fusions/fused_pad_routing_map.py
@@ -6,7 +6,7 @@
 from packaging import version
 
 from megatron.core.jit import jit_fuser
-from megatron.core.utils import experimental_fn, null_decorator
+from megatron.core.utils import null_decorator
 
 try:
     import triton
@@ -70,7 +70,6 @@ def _pad_routing_map_kernel(
     tl.store(output_row_ptr + token_indices, output_row, mask=token_mask)
 
 
-@experimental_fn(introduced_with_version="0.13.0")
 @jit_fuser
 def fused_pad_routing_map(routing_map: torch.Tensor, pad_multiple: int) -> torch.Tensor:
     """Fused version of pad_routing_map.

From 6b59d710c5d125f36b6d9fa6bcfe78f07ce84786 Mon Sep 17 00:00:00 2001
From: Chen Cui <chcui@nvidia.com>
Date: Tue, 11 Nov 2025 00:02:10 -0800
Subject: [PATCH 114/334] fix(transformer_config): Initialize cuda_graph_scope
 if not set regardless of cuda_graph_impl (#2166)

Signed-off-by: Chen Cui <chcui@nvidia.com>
---
 megatron/core/transformer/transformer_config.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/megatron/core/transformer/transformer_config.py b/megatron/core/transformer/transformer_config.py
index 6b8209ef6a7..63d0d2efd27 100644
--- a/megatron/core/transformer/transformer_config.py
+++ b/megatron/core/transformer/transformer_config.py
@@ -1551,6 +1551,8 @@ def __post_init__(self):
                     'use cuda_graph_impl=transformer_engine instead.'
                 )
                 self.cuda_graph_impl = "transformer_engine"
+        if self.cuda_graph_scope is None:
+            self.cuda_graph_scope = []
         if self.cuda_graph_impl != "none":
             assert self.cuda_graph_impl in [
                 "transformer_engine",
@@ -1559,8 +1561,6 @@ def __post_init__(self):
             if self.cpu_offloading:
                 raise ValueError("CUDA graphs not supported with CPU offloading.")
 
-            if self.cuda_graph_scope is None:
-                self.cuda_graph_scope = []
             elif not isinstance(self.cuda_graph_scope, list):
                 assert isinstance(self.cuda_graph_scope, str), (
                     "cuda_graph_scope must be a string or a list of strings, "

From 442a7f25440cfe00fbc10938dcac062301a59937 Mon Sep 17 00:00:00 2001
From: Hongbin Liu <lhb8125@users.noreply.github.com>
Date: Tue, 11 Nov 2025 16:04:20 +0800
Subject: [PATCH 115/334] [Dev] fix(offloading): Accuracy mismatch when
 offloading and recomputing same module (#2123)

Signed-off-by: Hongbin Liu <hongbinl@nvidia.com>
---
 megatron/core/tensor_parallel/random.py       |   9 +
 .../transformer/multi_token_prediction.py     |   3 +
 megatron/training/arguments.py                |   2 +-
 .../golden_values_dev_dgx_h100.json           | 600 +++++++++---------
 .../model_config.yaml                         |   6 +-
 .../golden_values_dev_dgx_h100.json           | 500 +++++++--------
 .../model_config.yaml                         |   6 +-
 7 files changed, 569 insertions(+), 557 deletions(-)

diff --git a/megatron/core/tensor_parallel/random.py b/megatron/core/tensor_parallel/random.py
index 5a44c38713d..396e5c54a2d 100644
--- a/megatron/core/tensor_parallel/random.py
+++ b/megatron/core/tensor_parallel/random.py
@@ -579,6 +579,15 @@ def _recompute(self, _):
 
             # Store the inputs for backward pass
             inputs = self.ctx.saved_tensors
+
+            def detach(t):
+                if isinstance(t, torch.Tensor):
+                    requires_grad = t.requires_grad
+                    t = t.detach()
+                    t.requires_grad_(requires_grad)
+                return t
+
+            inputs = tuple(detach(t) for t in inputs)
             with torch.enable_grad(), fp8_ctx, recompute_ctx:
                 outputs = self.run_function(*inputs)
 
diff --git a/megatron/core/transformer/multi_token_prediction.py b/megatron/core/transformer/multi_token_prediction.py
index 945682741d4..d8d20039e45 100755
--- a/megatron/core/transformer/multi_token_prediction.py
+++ b/megatron/core/transformer/multi_token_prediction.py
@@ -13,6 +13,9 @@
 from megatron.core.fp8_utils import get_fp8_context
 from megatron.core.models.backends import BackendSpecProvider, LocalSpecProvider
 from megatron.core.packed_seq_params import PackedSeqParams
+from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
+    fine_grained_offloading_set_last_layer,
+)
 from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.tensor_parallel import (
     gather_from_tensor_model_parallel_region,
diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py
index 7ecb1e71003..507c21e6883 100644
--- a/megatron/training/arguments.py
+++ b/megatron/training/arguments.py
@@ -2358,7 +2358,7 @@ def _add_training_args(parser):
     group.add_argument('--fine-grained-activation-offloading', action='store_true',
                        help='Enable fine-grained activation offloading.')
     group.add_argument('--offload-modules', nargs='*', type=str, default=[],
-                       help='The submodules to offload its input. Choices: "attn_norm", "core_attn", "attn_proj", "mlp_norm", "expert_fc1", "moe_act".')
+                       help='The submodules to offload its input. Choices: "attn_norm", "qkv_linear", "core_attn", "attn_proj", "mlp_norm", "expert_fc1", "moe_act".')
     group.add_argument('--min-offloaded-tensor-size', type=int, default=1024*1024,
                        help='The minimum size of the tensor to be offloaded.')
     group.add_argument('--disable-jit-fuser', action='store_true',
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_dgx_h100.json
index 0a6724a3e95..e7f62bbe4af 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_dgx_h100.json
@@ -4,56 +4,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 11.07559,
-            "2": 11.03834,
-            "3": 9.66022,
-            "4": 9.91367,
-            "5": 9.3291,
-            "6": 9.13927,
-            "7": 9.13591,
-            "8": 8.65527,
-            "9": 8.51396,
-            "10": 8.84095,
-            "11": 8.29144,
-            "12": 8.34584,
-            "13": 8.25509,
-            "14": 7.73685,
-            "15": 7.86273,
-            "16": 7.93699,
-            "17": 7.89257,
-            "18": 7.63116,
-            "19": 7.99719,
-            "20": 7.7453,
-            "21": 7.44298,
-            "22": 7.42242,
-            "23": 7.29721,
-            "24": 7.27467,
-            "25": 7.54562,
-            "26": 6.96839,
-            "27": 7.50569,
-            "28": 7.22761,
-            "29": 7.36579,
-            "30": 7.52635,
-            "31": 7.27036,
-            "32": 7.45548,
-            "33": 7.50952,
-            "34": 7.55694,
-            "35": 7.10212,
-            "36": 6.96414,
-            "37": 7.28438,
-            "38": 7.08049,
-            "39": 7.40908,
-            "40": 7.4335,
-            "41": 7.38491,
-            "42": 7.15766,
-            "43": 7.15867,
-            "44": 7.28831,
-            "45": 7.16729,
-            "46": 6.78429,
-            "47": 7.40937,
-            "48": 7.00259,
-            "49": 7.46241,
-            "50": 6.92143
+            "1": 11.06715,
+            "2": 11.06051,
+            "3": 10.21154,
+            "4": 9.95175,
+            "5": 10.12622,
+            "6": 8.82146,
+            "7": 9.52879,
+            "8": 8.442,
+            "9": 7.84738,
+            "10": 7.07075,
+            "11": 9.31042,
+            "12": 9.16013,
+            "13": 7.87292,
+            "14": 8.2102,
+            "15": 8.22483,
+            "16": 8.17879,
+            "17": 8.21121,
+            "18": 7.50325,
+            "19": 8.08274,
+            "20": 7.62562,
+            "21": 7.95058,
+            "22": 7.29789,
+            "23": 7.93775,
+            "24": 7.44169,
+            "25": 8.23817,
+            "26": 7.74959,
+            "27": 7.69344,
+            "28": 7.65487,
+            "29": 7.75173,
+            "30": 7.56007,
+            "31": 7.81567,
+            "32": 6.46589,
+            "33": 7.20401,
+            "34": 7.77921,
+            "35": 7.72944,
+            "36": 6.71776,
+            "37": 8.08311,
+            "38": 7.6137,
+            "39": 7.96476,
+            "40": 7.50072,
+            "41": 7.50304,
+            "42": 6.11349,
+            "43": 7.59404,
+            "44": 7.91361,
+            "45": 6.83615,
+            "46": 7.41293,
+            "47": 7.79226,
+            "48": 7.87549,
+            "49": 7.58763,
+            "50": 6.84525
         }
     },
     "num-zeros": {
@@ -61,56 +61,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 911219392.0,
-            "2": 910960384.0,
-            "3": 911156288.0,
-            "4": 913253376.0,
-            "5": 921845056.0,
-            "6": 941436672.0,
-            "7": 993745472.0,
-            "8": 974360512.0,
-            "9": 999146112.0,
-            "10": 992706944.0,
-            "11": 991438144.0,
-            "12": 979442048.0,
-            "13": 1029190272.0,
-            "14": 1008214656.0,
-            "15": 988472000.0,
-            "16": 988861120.0,
-            "17": 979173312.0,
-            "18": 996164608.0,
-            "19": 979453440.0,
-            "20": 982914688.0,
-            "21": 975473344.0,
-            "22": 955037568.0,
-            "23": 969208128.0,
-            "24": 965840832.0,
-            "25": 953269440.0,
-            "26": 949025536.0,
-            "27": 948458304.0,
-            "28": 951741184.0,
-            "29": 943926272.0,
-            "30": 935020160.0,
-            "31": 933230336.0,
-            "32": 930086848.0,
-            "33": 922853952.0,
-            "34": 927140800.0,
-            "35": 925348224.0,
-            "36": 925295168.0,
-            "37": 922758272.0,
-            "38": 922930752.0,
-            "39": 922322880.0,
-            "40": 921856640.0,
-            "41": 920227776.0,
-            "42": 918353664.0,
-            "43": 919655616.0,
-            "44": 914948224.0,
-            "45": 916392512.0,
-            "46": 914344448.0,
-            "47": 911769536.0,
-            "48": 912013248.0,
-            "49": 910349376.0,
-            "50": 914351616.0
+            "1": 47165192.0,
+            "2": 46897912.0,
+            "3": 52684456.0,
+            "4": 297127552.0,
+            "5": 562950784.0,
+            "6": 668142144.0,
+            "7": 1027449536.0,
+            "8": 752259328.0,
+            "9": 830947776.0,
+            "10": 718307136.0,
+            "11": 823731840.0,
+            "12": 804867840.0,
+            "13": 639461056.0,
+            "14": 625408576.0,
+            "15": 716256960.0,
+            "16": 870866752.0,
+            "17": 673817856.0,
+            "18": 811900096.0,
+            "19": 892689024.0,
+            "20": 878114112.0,
+            "21": 666859968.0,
+            "22": 792718848.0,
+            "23": 783683200.0,
+            "24": 770686976.0,
+            "25": 651376640.0,
+            "26": 780070272.0,
+            "27": 801722496.0,
+            "28": 670273664.0,
+            "29": 647960768.0,
+            "30": 789867776.0,
+            "31": 801385856.0,
+            "32": 787688640.0,
+            "33": 783506816.0,
+            "34": 792837760.0,
+            "35": 776103936.0,
+            "36": 761920512.0,
+            "37": 775085824.0,
+            "38": 752868608.0,
+            "39": 754997184.0,
+            "40": 745075072.0,
+            "41": 713941440.0,
+            "42": 689968512.0,
+            "43": 663461824.0,
+            "44": 680285632.0,
+            "45": 644628992.0,
+            "46": 641672704.0,
+            "47": 642439616.0,
+            "48": 597700608.0,
+            "49": 603523520.0,
+            "50": 601014528.0
         }
     },
     "mem-allocated-bytes": {
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 5498353152.0,
-            "2": 5499147776.0,
-            "3": 5499940352.0,
-            "4": 5500732928.0,
-            "5": 5501525504.0,
-            "6": 5502318080.0,
-            "7": 5503110656.0,
-            "8": 5503903232.0,
-            "9": 5497958912.0,
-            "10": 5498751488.0,
-            "11": 5499544064.0,
-            "12": 5500336640.0,
-            "13": 5501129216.0,
-            "14": 5501921792.0,
-            "15": 5502714368.0,
-            "16": 5503506944.0,
-            "17": 5504299520.0,
-            "18": 5505092096.0,
-            "19": 5505884672.0,
-            "20": 5506677248.0,
-            "21": 5507469824.0,
-            "22": 5508262400.0,
-            "23": 5509054976.0,
-            "24": 5509847552.0,
-            "25": 5510640128.0,
-            "26": 5511432704.0,
-            "27": 5512225280.0,
-            "28": 5513017856.0,
-            "29": 5513810432.0,
-            "30": 5514603008.0,
-            "31": 5515395584.0,
-            "32": 5516188160.0,
-            "33": 5516980736.0,
-            "34": 5517773312.0,
-            "35": 5518565888.0,
-            "36": 5519358464.0,
-            "37": 5520151040.0,
-            "38": 5520943616.0,
-            "39": 5521736192.0,
-            "40": 5522528768.0,
-            "41": 5523321344.0,
-            "42": 5524113920.0,
-            "43": 5524906496.0,
-            "44": 5525699072.0,
-            "45": 5526491648.0,
-            "46": 5527284224.0,
-            "47": 5528076800.0,
-            "48": 5528869376.0,
-            "49": 5529661952.0,
-            "50": 5530454528.0
+            "1": 5290944000.0,
+            "2": 5291148800.0,
+            "3": 5291351552.0,
+            "4": 5290946048.0,
+            "5": 5291148800.0,
+            "6": 5291351552.0,
+            "7": 5291554304.0,
+            "8": 5291757056.0,
+            "9": 5291959808.0,
+            "10": 5292162560.0,
+            "11": 5292365312.0,
+            "12": 5292568064.0,
+            "13": 5292770816.0,
+            "14": 5292973568.0,
+            "15": 5293176320.0,
+            "16": 5293379072.0,
+            "17": 5293581824.0,
+            "18": 5293784576.0,
+            "19": 5293987328.0,
+            "20": 5294190080.0,
+            "21": 5294392832.0,
+            "22": 5294595584.0,
+            "23": 5294798336.0,
+            "24": 5295001088.0,
+            "25": 5295203840.0,
+            "26": 5295406592.0,
+            "27": 5295609344.0,
+            "28": 5295812096.0,
+            "29": 5296014848.0,
+            "30": 5296217600.0,
+            "31": 5296420352.0,
+            "32": 5296623104.0,
+            "33": 5296825856.0,
+            "34": 5297028608.0,
+            "35": 5297231360.0,
+            "36": 5297434112.0,
+            "37": 5297636864.0,
+            "38": 5297839616.0,
+            "39": 5298042368.0,
+            "40": 5298245120.0,
+            "41": 5298447872.0,
+            "42": 5298650624.0,
+            "43": 5298853376.0,
+            "44": 5299056128.0,
+            "45": 5299258880.0,
+            "46": 5299461632.0,
+            "47": 5299664384.0,
+            "48": 5299867136.0,
+            "49": 5300069888.0,
+            "50": 5300272640.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 41740259328.0,
-            "2": 43687292928.0,
-            "3": 43687292928.0,
-            "4": 43984064512.0,
-            "5": 43984064512.0,
-            "6": 43984064512.0,
-            "7": 43984064512.0,
-            "8": 44026380288.0,
-            "9": 44041506816.0,
-            "10": 44041506816.0,
-            "11": 44041506816.0,
-            "12": 44041506816.0,
-            "13": 44041506816.0,
-            "14": 44041506816.0,
-            "15": 44041506816.0,
-            "16": 44041506816.0,
-            "17": 44041506816.0,
-            "18": 44041506816.0,
-            "19": 44041506816.0,
-            "20": 44041506816.0,
-            "21": 44041506816.0,
-            "22": 44041506816.0,
-            "23": 44041506816.0,
-            "24": 44041506816.0,
-            "25": 44041506816.0,
-            "26": 44041506816.0,
-            "27": 44041506816.0,
-            "28": 44041506816.0,
-            "29": 44044173312.0,
-            "30": 44164231168.0,
-            "31": 44221079552.0,
-            "32": 44271415296.0,
-            "33": 44290232320.0,
-            "34": 44290232320.0,
-            "35": 44290232320.0,
-            "36": 44290232320.0,
-            "37": 44290232320.0,
-            "38": 44290232320.0,
-            "39": 44290232320.0,
-            "40": 44290232320.0,
-            "41": 44290232320.0,
-            "42": 44290232320.0,
-            "43": 44290232320.0,
-            "44": 44290232320.0,
-            "45": 44290232320.0,
-            "46": 44290232320.0,
-            "47": 44290232320.0,
-            "48": 44290232320.0,
-            "49": 44290232320.0,
-            "50": 44290232320.0
+            "1": 6180783616.0,
+            "2": 8225679872.0,
+            "3": 8225679872.0,
+            "4": 8225679872.0,
+            "5": 8225679872.0,
+            "6": 8225679872.0,
+            "7": 8225679872.0,
+            "8": 8225679872.0,
+            "9": 8225679872.0,
+            "10": 8225679872.0,
+            "11": 8239991296.0,
+            "12": 8239991296.0,
+            "13": 8239991296.0,
+            "14": 8239991296.0,
+            "15": 8239991296.0,
+            "16": 8239991296.0,
+            "17": 8244914688.0,
+            "18": 8244914688.0,
+            "19": 8244914688.0,
+            "20": 8265598464.0,
+            "21": 8265598464.0,
+            "22": 8265598464.0,
+            "23": 8265598464.0,
+            "24": 8265598464.0,
+            "25": 8265598464.0,
+            "26": 8265598464.0,
+            "27": 8265598464.0,
+            "28": 8265598464.0,
+            "29": 8271664640.0,
+            "30": 8316803584.0,
+            "31": 8316803584.0,
+            "32": 8316803584.0,
+            "33": 8316803584.0,
+            "34": 8316803584.0,
+            "35": 8316803584.0,
+            "36": 8316803584.0,
+            "37": 8316803584.0,
+            "38": 8316803584.0,
+            "39": 8318923264.0,
+            "40": 8318923264.0,
+            "41": 8318923264.0,
+            "42": 8318923264.0,
+            "43": 8318923264.0,
+            "44": 8318923264.0,
+            "45": 8318923264.0,
+            "46": 8318923264.0,
+            "47": 8318923264.0,
+            "48": 8318923264.0,
+            "49": 8318923264.0,
+            "50": 8318923264.0
         }
     },
     "mtp_1 loss": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 11.08623,
-            "2": 11.1047,
-            "3": 10.47999,
-            "4": 10.13471,
-            "5": 9.79045,
-            "6": 9.50607,
-            "7": 9.51139,
-            "8": 8.85331,
-            "9": 8.66688,
-            "10": 8.95867,
-            "11": 8.29318,
-            "12": 8.36986,
-            "13": 8.25545,
-            "14": 7.73323,
-            "15": 7.86639,
-            "16": 7.92438,
-            "17": 7.86276,
-            "18": 7.61004,
-            "19": 8.00261,
-            "20": 7.73004,
-            "21": 7.41636,
-            "22": 7.41466,
-            "23": 7.28656,
-            "24": 7.27882,
-            "25": 7.54458,
-            "26": 6.96533,
-            "27": 7.5053,
-            "28": 7.20603,
-            "29": 7.37687,
-            "30": 7.52783,
-            "31": 7.27097,
-            "32": 7.46043,
-            "33": 7.51419,
-            "34": 7.56879,
-            "35": 7.09276,
-            "36": 6.96019,
-            "37": 7.29843,
-            "38": 7.07417,
-            "39": 7.43338,
-            "40": 7.43134,
-            "41": 7.40946,
-            "42": 7.15527,
-            "43": 7.15684,
-            "44": 7.30429,
-            "45": 7.18917,
-            "46": 6.77286,
-            "47": 7.44985,
-            "48": 7.02383,
-            "49": 7.4572,
-            "50": 6.92645
+            "1": 11.07395,
+            "2": 11.0927,
+            "3": 10.82648,
+            "4": 10.27524,
+            "5": 10.45343,
+            "6": 8.32789,
+            "7": 9.82687,
+            "8": 8.01561,
+            "9": 7.47686,
+            "10": 6.75778,
+            "11": 8.92977,
+            "12": 8.98867,
+            "13": 7.80263,
+            "14": 8.02637,
+            "15": 8.11184,
+            "16": 8.13967,
+            "17": 8.13444,
+            "18": 7.44744,
+            "19": 8.03657,
+            "20": 7.53993,
+            "21": 7.90129,
+            "22": 7.27518,
+            "23": 7.88304,
+            "24": 7.37567,
+            "25": 8.16836,
+            "26": 7.69935,
+            "27": 7.6262,
+            "28": 7.61271,
+            "29": 7.69819,
+            "30": 7.4848,
+            "31": 7.73967,
+            "32": 6.36884,
+            "33": 7.14295,
+            "34": 7.71844,
+            "35": 7.63485,
+            "36": 6.61195,
+            "37": 8.02821,
+            "38": 7.57841,
+            "39": 7.89473,
+            "40": 7.41461,
+            "41": 7.42116,
+            "42": 6.01344,
+            "43": 7.4906,
+            "44": 7.86418,
+            "45": 6.74814,
+            "46": 7.30484,
+            "47": 7.72617,
+            "48": 7.79074,
+            "49": 7.49049,
+            "50": 6.75504
         }
     },
     "iteration-time": {
@@ -289,56 +289,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 85.92313,
-            "2": 1.99152,
-            "3": 3.91366,
-            "4": 1.68454,
-            "5": 2.53883,
-            "6": 2.55539,
-            "7": 1.60104,
-            "8": 1.70562,
-            "9": 1.72325,
-            "10": 1.4332,
-            "11": 1.07958,
-            "12": 1.399,
-            "13": 1.10259,
-            "14": 1.43922,
-            "15": 1.12046,
-            "16": 1.33695,
-            "17": 1.24765,
-            "18": 1.11257,
-            "19": 1.10335,
-            "20": 1.12919,
-            "21": 1.27711,
-            "22": 1.09482,
-            "23": 1.27635,
-            "24": 1.112,
-            "25": 1.17791,
-            "26": 1.10426,
-            "27": 1.09103,
-            "28": 1.08338,
-            "29": 1.07904,
-            "30": 1.08709,
-            "31": 1.2237,
-            "32": 1.18059,
-            "33": 1.07913,
-            "34": 1.17232,
-            "35": 1.09059,
-            "36": 1.09648,
-            "37": 1.12683,
-            "38": 1.10153,
-            "39": 1.09557,
-            "40": 1.07747,
-            "41": 1.12905,
-            "42": 1.09275,
-            "43": 1.08609,
-            "44": 1.08042,
-            "45": 1.08321,
-            "46": 1.0732,
-            "47": 1.08666,
-            "48": 1.08865,
-            "49": 1.08808,
-            "50": 1.08086
+            "1": 90.97535,
+            "2": 4.15413,
+            "3": 4.25282,
+            "4": 5.50314,
+            "5": 4.36528,
+            "6": 4.16016,
+            "7": 4.60989,
+            "8": 3.68392,
+            "9": 3.70951,
+            "10": 3.66417,
+            "11": 3.64904,
+            "12": 3.66094,
+            "13": 3.68824,
+            "14": 3.64996,
+            "15": 3.64159,
+            "16": 3.68269,
+            "17": 3.66905,
+            "18": 4.10783,
+            "19": 3.63362,
+            "20": 3.65129,
+            "21": 3.6431,
+            "22": 3.64946,
+            "23": 3.6411,
+            "24": 3.59707,
+            "25": 3.55364,
+            "26": 3.61478,
+            "27": 3.59779,
+            "28": 3.58741,
+            "29": 3.62545,
+            "30": 3.63538,
+            "31": 3.58264,
+            "32": 3.65914,
+            "33": 3.62764,
+            "34": 3.61962,
+            "35": 3.57076,
+            "36": 3.59244,
+            "37": 3.68499,
+            "38": 3.6803,
+            "39": 3.5849,
+            "40": 3.59019,
+            "41": 3.62068,
+            "42": 3.69144,
+            "43": 3.71863,
+            "44": 3.67193,
+            "45": 3.65673,
+            "46": 3.66919,
+            "47": 3.58334,
+            "48": 3.57229,
+            "49": 3.66195,
+            "50": 3.64157
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/model_config.yaml
index 487382042b7..c657b9087e7 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/model_config.yaml
@@ -23,8 +23,8 @@ MODEL_ARGS:
   --use-mcore-models: true
   --sequence-parallel: true
   --disable-bias-linear: true
-  --micro-batch-size: 4
-  --global-batch-size: 32
+  --micro-batch-size: 1
+  --global-batch-size: 8
   --train-iters: 50
   --exit-duration-in-mins: 230
   --no-check-for-nan-in-loss-and-grad: true
@@ -36,7 +36,7 @@ MODEL_ARGS:
   --recompute-granularity: selective
   --recompute-modules: "[layernorm mla_up_proj mlp moe_act]"
   --fine-grained-activation-offloading: true
-  --offload-modules: "[expert_fc1 moe_act attn_norm mlp_norm]"
+  --offload-modules: "[expert_fc1 moe_act attn_norm mlp_norm qkv_linear core_attn attn_proj]"
   # Transformer Engine args
   --transformer-impl: transformer_engine
   # Data args
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_dgx_h100.json
index eca2cabacaf..f31e8584055 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_dgx_h100.json
@@ -4,56 +4,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 11.04276,
-            "2": 11.02298,
-            "3": 9.43542,
-            "4": 10.04672,
-            "5": 9.38572,
-            "6": 9.14547,
-            "7": 9.21155,
-            "8": 8.63445,
-            "9": 8.48944,
-            "10": 8.82764,
-            "11": 8.29479,
-            "12": 8.32819,
-            "13": 8.23003,
-            "14": 7.71724,
-            "15": 7.86963,
-            "16": 7.9228,
-            "17": 7.86049,
-            "18": 7.62035,
-            "19": 7.9851,
-            "20": 7.72027,
-            "21": 7.39754,
-            "22": 7.39767,
-            "23": 7.28334,
-            "24": 7.25057,
-            "25": 7.53131,
-            "26": 6.95335,
-            "27": 7.49421,
-            "28": 7.20415,
-            "29": 7.373,
-            "30": 7.50279,
-            "31": 7.25342,
-            "32": 7.43069,
-            "33": 7.48385,
-            "34": 7.53476,
-            "35": 7.10325,
-            "36": 6.94471,
-            "37": 7.26141,
-            "38": 7.07026,
-            "39": 7.40536,
-            "40": 7.42025,
-            "41": 7.34194,
-            "42": 7.11724,
-            "43": 7.11421,
-            "44": 7.27077,
-            "45": 7.0701,
-            "46": 6.77811,
-            "47": 7.18895,
-            "48": 7.00013,
-            "49": 7.45875,
-            "50": 6.90988
+            "1": 11.01686,
+            "2": 11.06264,
+            "3": 10.17793,
+            "4": 10.86283,
+            "5": 9.81719,
+            "6": 9.10416,
+            "7": 9.61067,
+            "8": 8.39543,
+            "9": 7.79835,
+            "10": 7.15295,
+            "11": 9.06686,
+            "12": 12.40969,
+            "13": 8.05055,
+            "14": 8.2476,
+            "15": 8.25138,
+            "16": 8.32761,
+            "17": 8.33769,
+            "18": 7.57521,
+            "19": 8.18843,
+            "20": 7.70464,
+            "21": 8.00008,
+            "22": 7.35567,
+            "23": 7.9428,
+            "24": 7.49828,
+            "25": 8.31989,
+            "26": 7.79139,
+            "27": 7.72813,
+            "28": 7.70354,
+            "29": 7.77157,
+            "30": 7.56925,
+            "31": 7.85097,
+            "32": 6.53309,
+            "33": 7.24762,
+            "34": 7.79993,
+            "35": 7.74601,
+            "36": 6.74083,
+            "37": 8.15463,
+            "38": 7.62637,
+            "39": 7.97973,
+            "40": 7.52426,
+            "41": 7.52118,
+            "42": 6.11695,
+            "43": 7.60509,
+            "44": 7.96979,
+            "45": 6.84567,
+            "46": 7.4309,
+            "47": 7.82486,
+            "48": 7.87887,
+            "49": 7.59924,
+            "50": 6.85064
         }
     },
     "num-zeros": {
@@ -61,56 +61,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 844114112.0,
-            "2": 843855296.0,
-            "3": 844048640.0,
-            "4": 842998208.0,
-            "5": 855786112.0,
-            "6": 878524160.0,
-            "7": 924542976.0,
-            "8": 917741504.0,
-            "9": 932042112.0,
-            "10": 930847360.0,
-            "11": 954742400.0,
-            "12": 922824128.0,
-            "13": 968378816.0,
-            "14": 965228416.0,
-            "15": 951776640.0,
-            "16": 941679424.0,
-            "17": 929894336.0,
-            "18": 928011136.0,
-            "19": 955339264.0,
-            "20": 987111232.0,
-            "21": 924095488.0,
-            "22": 906805504.0,
-            "23": 895810432.0,
-            "24": 902927680.0,
-            "25": 927056960.0,
-            "26": 879821440.0,
-            "27": 911759744.0,
-            "28": 902460416.0,
-            "29": 872625216.0,
-            "30": 865815744.0,
-            "31": 868220352.0,
-            "32": 865076800.0,
-            "33": 864135552.0,
-            "34": 855839104.0,
-            "35": 854046784.0,
-            "36": 855042176.0,
-            "37": 850408192.0,
-            "38": 850580480.0,
-            "39": 849972608.0,
-            "40": 849505792.0,
-            "41": 845780352.0,
-            "42": 846003392.0,
-            "43": 848354688.0,
-            "44": 850986496.0,
-            "45": 848236160.0,
-            "46": 855625856.0,
-            "47": 843613312.0,
-            "48": 851197312.0,
-            "49": 851630464.0,
-            "50": 846195968.0
+            "1": 47167816.0,
+            "2": 46900776.0,
+            "3": 77860808.0,
+            "4": 237329376.0,
+            "5": 471709792.0,
+            "6": 558041536.0,
+            "7": 948826176.0,
+            "8": 723939584.0,
+            "9": 786891776.0,
+            "10": 734021888.0,
+            "11": 688478400.0,
+            "12": 553228736.0,
+            "13": 608009792.0,
+            "14": 741806976.0,
+            "15": 766532736.0,
+            "16": 685280512.0,
+            "17": 654899648.0,
+            "18": 730146112.0,
+            "19": 751163904.0,
+            "20": 884406592.0,
+            "21": 723541120.0,
+            "22": 805299648.0,
+            "23": 789975808.0,
+            "24": 610294016.0,
+            "25": 830610048.0,
+            "26": 824111232.0,
+            "27": 757678144.0,
+            "28": 774057088.0,
+            "29": 805232640.0,
+            "30": 770995712.0,
+            "31": 801384640.0,
+            "32": 790830656.0,
+            "33": 758341184.0,
+            "34": 726777280.0,
+            "35": 750934144.0,
+            "36": 717880064.0,
+            "37": 740480704.0,
+            "38": 724556544.0,
+            "39": 710957376.0,
+            "40": 716765760.0,
+            "41": 531516928.0,
+            "42": 658507328.0,
+            "43": 676045888.0,
+            "44": 680286208.0,
+            "45": 606880576.0,
+            "46": 641672384.0,
+            "47": 633002368.0,
+            "48": 607136576.0,
+            "49": 430551968.0,
+            "50": 563263808.0
         }
     },
     "mem-allocated-bytes": {
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 4419107328.0,
-            "2": 4419108864.0,
-            "3": 4419108864.0,
-            "4": 4419108864.0,
-            "5": 4419108864.0,
-            "6": 4419108864.0,
-            "7": 4419108864.0,
-            "8": 4419108864.0,
-            "9": 4419108864.0,
-            "10": 4419108864.0,
-            "11": 4419108864.0,
-            "12": 4419108864.0,
-            "13": 4419108864.0,
-            "14": 4419108864.0,
-            "15": 4419108864.0,
-            "16": 4419108864.0,
-            "17": 4419108864.0,
-            "18": 4419108864.0,
-            "19": 4419108864.0,
-            "20": 4419108864.0,
-            "21": 4419108864.0,
-            "22": 4419108864.0,
-            "23": 4419108864.0,
-            "24": 4419108864.0,
-            "25": 4419108864.0,
-            "26": 4419108864.0,
-            "27": 4419108864.0,
-            "28": 4419108864.0,
-            "29": 4419108864.0,
-            "30": 4419108864.0,
-            "31": 4419108864.0,
-            "32": 4419108864.0,
-            "33": 4419108864.0,
-            "34": 4419108864.0,
-            "35": 4419108864.0,
-            "36": 4419108864.0,
-            "37": 4419108864.0,
-            "38": 4419108864.0,
-            "39": 4419108864.0,
-            "40": 4419108864.0,
-            "41": 4419108864.0,
-            "42": 4419108864.0,
-            "43": 4419108864.0,
-            "44": 4419108864.0,
-            "45": 4419108864.0,
-            "46": 4419108864.0,
-            "47": 4419108864.0,
-            "48": 4419108864.0,
-            "49": 4419108864.0,
-            "50": 4419108864.0
+            "1": 4315544064.0,
+            "2": 4315545600.0,
+            "3": 4315545600.0,
+            "4": 4315545600.0,
+            "5": 4315545600.0,
+            "6": 4315545600.0,
+            "7": 4315545600.0,
+            "8": 4315545600.0,
+            "9": 4315545600.0,
+            "10": 4315545600.0,
+            "11": 4315545600.0,
+            "12": 4315545600.0,
+            "13": 4315545600.0,
+            "14": 4315545600.0,
+            "15": 4315545600.0,
+            "16": 4315545600.0,
+            "17": 4315545600.0,
+            "18": 4315545600.0,
+            "19": 4315545600.0,
+            "20": 4315545600.0,
+            "21": 4315545600.0,
+            "22": 4315545600.0,
+            "23": 4315545600.0,
+            "24": 4315545600.0,
+            "25": 4315545600.0,
+            "26": 4315545600.0,
+            "27": 4315545600.0,
+            "28": 4315545600.0,
+            "29": 4315545600.0,
+            "30": 4315545600.0,
+            "31": 4315545600.0,
+            "32": 4315545600.0,
+            "33": 4315545600.0,
+            "34": 4315545600.0,
+            "35": 4315545600.0,
+            "36": 4315545600.0,
+            "37": 4315545600.0,
+            "38": 4315545600.0,
+            "39": 4315545600.0,
+            "40": 4315545600.0,
+            "41": 4315545600.0,
+            "42": 4315545600.0,
+            "43": 4315545600.0,
+            "44": 4315545600.0,
+            "45": 4315545600.0,
+            "46": 4315545600.0,
+            "47": 4315545600.0,
+            "48": 4315545600.0,
+            "49": 4315545600.0,
+            "50": 4315545600.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 37959917568.0,
-            "2": 39578673152.0,
-            "3": 39580192768.0,
-            "4": 39580192768.0,
-            "5": 39583301632.0,
-            "6": 39583301632.0,
-            "7": 39583301632.0,
-            "8": 39583301632.0,
-            "9": 39583301632.0,
-            "10": 39583301632.0,
-            "11": 39583301632.0,
-            "12": 39583301632.0,
-            "13": 39583301632.0,
-            "14": 39583301632.0,
-            "15": 39583301632.0,
-            "16": 39583301632.0,
-            "17": 39583301632.0,
-            "18": 39583301632.0,
-            "19": 39583301632.0,
-            "20": 39583301632.0,
-            "21": 39583301632.0,
-            "22": 39583301632.0,
-            "23": 39583301632.0,
-            "24": 39583301632.0,
-            "25": 39583301632.0,
-            "26": 39583301632.0,
-            "27": 39583301632.0,
-            "28": 39583301632.0,
-            "29": 39583301632.0,
-            "30": 39583301632.0,
-            "31": 39583301632.0,
-            "32": 39583301632.0,
-            "33": 39583301632.0,
-            "34": 39583301632.0,
-            "35": 39583301632.0,
-            "36": 39583301632.0,
-            "37": 39583301632.0,
-            "38": 39583301632.0,
-            "39": 39583301632.0,
-            "40": 39583301632.0,
-            "41": 39583301632.0,
-            "42": 39583301632.0,
-            "43": 39583301632.0,
-            "44": 39583301632.0,
-            "45": 39583301632.0,
-            "46": 39583301632.0,
-            "47": 39583301632.0,
-            "48": 39583301632.0,
-            "49": 39583301632.0,
-            "50": 39583301632.0
+            "1": 4919527424.0,
+            "2": 5861408768.0,
+            "3": 5861408768.0,
+            "4": 5865549824.0,
+            "5": 5865549824.0,
+            "6": 5865549824.0,
+            "7": 5865549824.0,
+            "8": 5865549824.0,
+            "9": 5865549824.0,
+            "10": 5865549824.0,
+            "11": 5865549824.0,
+            "12": 5865549824.0,
+            "13": 5865549824.0,
+            "14": 5865549824.0,
+            "15": 5865549824.0,
+            "16": 5865549824.0,
+            "17": 5865549824.0,
+            "18": 5865549824.0,
+            "19": 5866154496.0,
+            "20": 5866154496.0,
+            "21": 5866154496.0,
+            "22": 5866154496.0,
+            "23": 5866154496.0,
+            "24": 5866154496.0,
+            "25": 5866154496.0,
+            "26": 5866154496.0,
+            "27": 5866154496.0,
+            "28": 5866154496.0,
+            "29": 5866154496.0,
+            "30": 5866154496.0,
+            "31": 5866154496.0,
+            "32": 5866154496.0,
+            "33": 5866154496.0,
+            "34": 5866154496.0,
+            "35": 5866154496.0,
+            "36": 5866154496.0,
+            "37": 5866154496.0,
+            "38": 5866154496.0,
+            "39": 5866154496.0,
+            "40": 5866154496.0,
+            "41": 5866154496.0,
+            "42": 5866154496.0,
+            "43": 5866154496.0,
+            "44": 5866154496.0,
+            "45": 5866154496.0,
+            "46": 5866154496.0,
+            "47": 5866154496.0,
+            "48": 5866154496.0,
+            "49": 5866154496.0,
+            "50": 5866154496.0
         }
     },
     "iteration-time": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 89.14162,
-            "2": 2.00665,
-            "3": 3.2832,
-            "4": 2.63833,
-            "5": 2.43073,
-            "6": 1.4868,
-            "7": 1.81732,
-            "8": 2.74562,
-            "9": 1.18286,
-            "10": 1.18542,
-            "11": 1.27273,
-            "12": 1.63885,
-            "13": 1.31323,
-            "14": 2.29007,
-            "15": 1.52021,
-            "16": 1.87975,
-            "17": 1.3507,
-            "18": 1.48627,
-            "19": 1.17842,
-            "20": 1.17004,
-            "21": 1.30369,
-            "22": 1.24781,
-            "23": 1.13565,
-            "24": 1.13418,
-            "25": 1.21915,
-            "26": 1.24288,
-            "27": 1.15052,
-            "28": 1.12573,
-            "29": 1.15398,
-            "30": 1.13143,
-            "31": 1.17104,
-            "32": 1.12919,
-            "33": 1.1286,
-            "34": 1.14327,
-            "35": 1.1721,
-            "36": 1.12494,
-            "37": 1.2626,
-            "38": 1.11425,
-            "39": 1.14594,
-            "40": 1.18189,
-            "41": 1.09297,
-            "42": 1.09247,
-            "43": 1.18621,
-            "44": 1.19564,
-            "45": 1.08252,
-            "46": 1.08511,
-            "47": 1.23319,
-            "48": 1.08249,
-            "49": 1.0979,
-            "50": 1.07182
+            "1": 86.37903,
+            "2": 4.30499,
+            "3": 5.51749,
+            "4": 4.16842,
+            "5": 5.35652,
+            "6": 3.7018,
+            "7": 3.68633,
+            "8": 3.75304,
+            "9": 3.67596,
+            "10": 3.70408,
+            "11": 3.70621,
+            "12": 3.71713,
+            "13": 3.73785,
+            "14": 3.64923,
+            "15": 3.63825,
+            "16": 3.64129,
+            "17": 3.71791,
+            "18": 3.69956,
+            "19": 4.27786,
+            "20": 4.04035,
+            "21": 3.67423,
+            "22": 3.66455,
+            "23": 3.67758,
+            "24": 4.16675,
+            "25": 3.71546,
+            "26": 3.71205,
+            "27": 3.71193,
+            "28": 3.60188,
+            "29": 3.69233,
+            "30": 3.68235,
+            "31": 3.69734,
+            "32": 3.69173,
+            "33": 3.64974,
+            "34": 3.73647,
+            "35": 3.68627,
+            "36": 3.70357,
+            "37": 3.71094,
+            "38": 3.72508,
+            "39": 3.70553,
+            "40": 3.6995,
+            "41": 3.61312,
+            "42": 3.63624,
+            "43": 3.68714,
+            "44": 3.70371,
+            "45": 3.67257,
+            "46": 3.73701,
+            "47": 3.69639,
+            "48": 3.65815,
+            "49": 3.63754,
+            "50": 3.71569
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/model_config.yaml
index 28ad106f522..5b177ed116d 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/model_config.yaml
@@ -23,8 +23,8 @@ MODEL_ARGS:
   --use-mcore-models: true
   --sequence-parallel: true
   --disable-bias-linear: true
-  --micro-batch-size: 4
-  --global-batch-size: 32
+  --micro-batch-size: 1
+  --global-batch-size: 8
   --train-iters: 50
   --exit-duration-in-mins: 230
   --no-check-for-nan-in-loss-and-grad: true
@@ -36,7 +36,7 @@ MODEL_ARGS:
   --recompute-granularity: selective
   --recompute-modules: "[layernorm mla_up_proj mlp moe_act]"
   --fine-grained-activation-offloading: true
-  --offload-modules: "[expert_fc1 moe_act attn_norm mlp_norm]"
+  --offload-modules: "[expert_fc1 moe_act attn_norm mlp_norm qkv_linear core_attn attn_proj]"
   # Transformer Engine args
   --transformer-impl: transformer_engine
   # Data args

From 6b01330fec037805a5ab2fa09513c78a3bcbecd8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Tue, 11 Nov 2025 11:33:20 +0100
Subject: [PATCH 116/334] Ko3n1g/chore/update dev release settings (#2099)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
Signed-off-by: NeMo Bot <nemo-bot@nvidia.com>
---
 docker/Dockerfile.ci.dev                       |  1 +
 .../bert/bert_release/model_config.yaml        |  2 +-
 .../model_config.yaml                          | 18 ++++++++----------
 .../test_cases/t5/t5_release/model_config.yaml |  2 +-
 .../transformer/moe/test_token_dispatcher.py   |  7 ++++++-
 5 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/docker/Dockerfile.ci.dev b/docker/Dockerfile.ci.dev
index 179c2aa21a6..9ebda435f99 100644
--- a/docker/Dockerfile.ci.dev
+++ b/docker/Dockerfile.ci.dev
@@ -72,6 +72,7 @@ EOF
 
 COPY assets/ /opt/data/
 ENV UV_PYTHON=$UV_PROJECT_ENVIRONMENT/bin/python
+COPY . /opt/megatron-lm/
 
 ##### For NVIDIANS only #####
 FROM main as jet
diff --git a/tests/functional_tests/test_cases/bert/bert_release/model_config.yaml b/tests/functional_tests/test_cases/bert/bert_release/model_config.yaml
index ab5558fa7d2..278ad6c17a8 100644
--- a/tests/functional_tests/test_cases/bert/bert_release/model_config.yaml
+++ b/tests/functional_tests/test_cases/bert/bert_release/model_config.yaml
@@ -27,7 +27,7 @@ MODEL_ARGS:
   --pipeline-model-parallel-size: 8
   # Data args
   --data-path: ${DATA_BLEND}
-  --vocab-file: ${DATA_PATH}/text/the_pile/bert_shard00/vocab.txt
+  --vocab-file: ${DATA_PATH}/vocab.txt
   --split: 949,50,1
   --data-cache-path: ${DATA_CACHE_PATH}
   # EVAL_AND_LOGGING_ARGS
diff --git a/tests/functional_tests/test_cases/mixtral/deepseekv3_proxy_flex_tp1pp4emp16etp1cp1_release/model_config.yaml b/tests/functional_tests/test_cases/mixtral/deepseekv3_proxy_flex_tp1pp4emp16etp1cp1_release/model_config.yaml
index c16fedc7860..080f669e6a4 100644
--- a/tests/functional_tests/test_cases/mixtral/deepseekv3_proxy_flex_tp1pp4emp16etp1cp1_release/model_config.yaml
+++ b/tests/functional_tests/test_cases/mixtral/deepseekv3_proxy_flex_tp1pp4emp16etp1cp1_release/model_config.yaml
@@ -12,11 +12,12 @@ ENV_VARS:
   NCCL_DEBUG: VERSION
   NON_DETERMINSTIC_RESULTS: 1
   NVSHMEM_IB_ENABLE_IBGDA: 0
+  CUDA_DEVICE_MAX_CONNECTIONS: 1
 TEST_TYPE: "release"
 MODEL_ARGS:
   # Distributed args
   --distributed-timeout-minutes: 60
-  --tensor-model-parallel-size: 1
+  --tensor-model-parallel-size: 2
   --pipeline-model-parallel-size: 4
   --pipeline-model-parallel-layout: Et*2\\|\\(tt\\|\\)*5t\\|tmL # Et*2|(tt|)*5t|tmL
   --expert-model-parallel-size: 16
@@ -47,8 +48,8 @@ MODEL_ARGS:
   # Data args
   --seq-length: 4096
   --data-cache-path: ${DATA_CACHE_PATH}
-  --tokenizer-type: HuggingFaceTokenizer
-  --tokenizer-model: ${TOKENIZER_PATH}
+  --tokenizer-type: GPTSentencePieceTokenizer
+  --tokenizer-model: ${DATA_PATH}/utils/nemotron_2_256k.model
   --data-path: $DATA_BLEND
   --split: 99,1,0
   --no-mmap-bin-files: true
@@ -81,12 +82,11 @@ MODEL_ARGS:
   --qk-layernorm: true
 
   # Add learning rate args
-  --lr-decay-samples: 584765624
+  --lr-decay-samples: 24413696
   --lr-warmup-samples: 1536000
-  # Learning rate scaled down from 7.3e-6 (DeepSeek-V3 technical report, GBS=15360) to 3.9e-6 (GBS=8192)
-  --lr-warmup-init: 3.9e-7
-  --lr: 3.9e-6
-  --min-lr: 3.9e-7
+  --lr-warmup-init: 1e-7
+  --lr: 1e-5
+  --min-lr: 1e-6
   --lr-decay-style: cosine
   --adam-beta1: 0.9
   --adam-beta2: 0.95
@@ -127,8 +127,6 @@ MODEL_ARGS:
   --eval-interval: 200
 
   # Add checkpointing args
-  --no-load-optim: true
-  --no-load-rng: true
   --auto-detect-ckpt-format:
     true
     # Add checkpointing args
diff --git a/tests/functional_tests/test_cases/t5/t5_release/model_config.yaml b/tests/functional_tests/test_cases/t5/t5_release/model_config.yaml
index 852fbf9819d..b684a2ebb54 100644
--- a/tests/functional_tests/test_cases/t5/t5_release/model_config.yaml
+++ b/tests/functional_tests/test_cases/t5/t5_release/model_config.yaml
@@ -37,7 +37,7 @@ MODEL_ARGS:
   --pipeline-model-parallel-size: 1
   # Data args
   --data-path: ${DATA_BLEND}
-  --vocab-file: ${DATA_PATH}/text/the_pile/t5_shard00/bert-large-cased-vocab.txt
+  --vocab-file: ${DATA_PATH}/bert-large-cased-vocab.txt
   --tokenizer-type: BertWordPieceCase
   --split: 99982,9,9
   --data-cache-path: ${DATA_CACHE_PATH}
diff --git a/tests/unit_tests/transformer/moe/test_token_dispatcher.py b/tests/unit_tests/transformer/moe/test_token_dispatcher.py
index 57ff1df6de2..80b0d2bca69 100644
--- a/tests/unit_tests/transformer/moe/test_token_dispatcher.py
+++ b/tests/unit_tests/transformer/moe/test_token_dispatcher.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 import copy
 import dataclasses
@@ -115,6 +115,7 @@ def __del__(self):
         torch.cuda.synchronize()
         Utils.destroy_model_parallel()
 
+    @pytest.mark.flaky_in_dev
     @pytest.mark.internal
     def dispatcher_dropless_test(self):
         moe_layer = self.moe_layer
@@ -157,6 +158,7 @@ def dispatcher_dropless_test(self):
             hidden_states.grad, ans
         ), "Restored hidden states do not match original hidden states"
 
+    @pytest.mark.flaky_in_dev
     @pytest.mark.internal
     def dispatcher_capacity_test(self):
         moe_layer = self.moe_layer
@@ -210,6 +212,7 @@ def dispatcher_capacity_test(self):
             hidden_states.grad, restored_hidden_states_answer
         ), "Gradient of hidden states should be same as hidden states"
 
+    @pytest.mark.flaky_in_dev
     @pytest.mark.internal
     def dispatcher_drop_and_pad_test(self):
         """Test if the tokens are dropped and padded correctly.
@@ -275,6 +278,7 @@ def dispatcher_drop_and_pad_test(self):
             hidden_states.grad, backward_answer
         ), "Gradient of hidden states should be same as hidden states"
 
+    @pytest.mark.flaky_in_dev
     @pytest.mark.internal
     def dispatcher_router_padding_for_fp8_test(self):
         """Test if the routing map is padded correctly for FP8 training.
@@ -367,6 +371,7 @@ def teardown_method(self, method):
     @pytest.mark.internal
     @pytest.mark.parametrize("tp_size,ep_size", [(8, 1), (1, 8), (2, 4), (1, 1)])
     @pytest.mark.parametrize("permute_fusion", permute_fusion_params)
+    @pytest.mark.flaky_in_dev
     def test_forward_backward(self, tp_size, ep_size, permute_fusion):
         container = MoEModelTestContainer(
             tp_size=tp_size,

From 56019e6376934aac077f593607a9517697e09682 Mon Sep 17 00:00:00 2001
From: Yu Yao <54727607+yaoyu-33@users.noreply.github.com>
Date: Tue, 11 Nov 2025 11:47:07 -0800
Subject: [PATCH 117/334] [DEV] Cherry-pick: M4 + Dist Checkpoint: Replace
 global parallel state with explicit group parameters (#2152)

Signed-off-by: dimapihtar <dpihtar@gmail.com>
Co-authored-by: dimapihtar <dpihtar@gmail.com>
---
 .../core/extensions/transformer_engine.py     | 54 +++++++++--
 megatron/core/models/bert/bert_model.py       |  4 +-
 .../common/language_module/language_module.py | 19 +++-
 megatron/core/models/gpt/gpt_model.py         |  8 +-
 .../cpu_offloading/hybrid_optimizer.py        |  2 +-
 .../core/post_training/modelopt/layers.py     | 12 ++-
 megatron/core/ssm/gated_delta_net.py          | 18 +++-
 megatron/core/ssm/mamba_block.py              |  7 +-
 megatron/core/ssm/mamba_mixer.py              | 24 ++++-
 megatron/core/tensor_parallel/layers.py       | 16 ++-
 megatron/core/transformer/attention.py        |  1 +
 .../core/transformer/dot_product_attention.py |  9 +-
 megatron/core/transformer/mlp.py              |  2 +-
 megatron/core/transformer/module.py           | 18 +++-
 megatron/core/transformer/moe/experts.py      | 62 +++---------
 megatron/core/transformer/moe/moe_layer.py    |  2 +-
 .../core/transformer/moe/shared_experts.py    |  8 +-
 .../transformer/multi_token_prediction.py     | 25 ++++-
 .../core/transformer/transformer_block.py     |  7 +-
 .../core/transformer/transformer_layer.py     |  1 +
 megatron/core/transformer/utils.py            | 59 ++++++++++-
 megatron/core/utils.py                        | 70 +++++++++++--
 megatron/training/checkpointing.py            | 97 +++++++++++++++----
 .../dist_checkpointing/models/common.py       |  3 +-
 .../dist_checkpointing/models/test_mamba.py   |  2 +
 .../dist_checkpointing/models/test_mlp_glu.py |  3 +
 .../models/test_moe_experts.py                |  9 ++
 .../dist_checkpointing/test_local.py          |  8 +-
 .../test_modelopt_module_spec.py              |  9 +-
 29 files changed, 441 insertions(+), 118 deletions(-)

diff --git a/megatron/core/extensions/transformer_engine.py b/megatron/core/extensions/transformer_engine.py
index e807ee54fbf..808ac14a2e4 100644
--- a/megatron/core/extensions/transformer_engine.py
+++ b/megatron/core/extensions/transformer_engine.py
@@ -42,6 +42,7 @@
 from megatron.core.transformer.mlp import MLP
 from megatron.core.transformer.transformer_config import TransformerConfig
 from megatron.core.transformer.utils import (
+    ensure_metadata_has_dp_cp_group,
     is_layer_window_attention,
     make_sharded_tensors_for_checkpoint,
 )
@@ -420,6 +421,9 @@ def __init__(
                     # duplicated across TP ranks
                     setattr(param, "sequence_parallel", self.config.sequence_parallel)
 
+        tp_group = get_tensor_model_parallel_group_if_none(tp_group, is_expert=is_expert)
+        self._tp_group = tp_group
+
     def forward(self, x):
         """Forward."""
         _is_first_microbatch = (
@@ -444,7 +448,14 @@ def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None):
             self.parallel_mode is None
         ), "TELinear sharded_state_dict can only be used with duplicated parallel mode"
         state_dict = self.state_dict(prefix="", keep_vars=True)
-        return make_sharded_tensors_for_checkpoint(state_dict, prefix, None, sharded_offsets)
+        return make_sharded_tensors_for_checkpoint(
+            state_dict,
+            prefix,
+            None,
+            sharded_offsets,
+            tp_group=self._tp_group,
+            dp_cp_group=metadata["dp_cp_group"],
+        )
 
     def backward_dw(self):
         """Compute weight gradients during the backward pass if delay_wgrad_compute is enabled."""
@@ -492,6 +503,7 @@ def __init__(
 
         # TODO: For backward compatibility, remove in v0.15.
         tp_group = get_tensor_model_parallel_group_if_none(tp_group, is_expert=is_expert)
+        self._tp_group = tp_group
 
         # TE returns a zero length Tensor when bias=False and
         # return_bias=True, but we prefer None.  So in that case we
@@ -625,9 +637,15 @@ def forward(self, x):
 
     def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None):
         """Sharding along axis 0, bias sharded"""
+        metadata = ensure_metadata_has_dp_cp_group(metadata)
         state_dict = self.state_dict(prefix="", keep_vars=True)
         return make_sharded_tensors_for_checkpoint(
-            state_dict, prefix, {"weight": 0, "bias": 0}, sharded_offsets
+            state_dict,
+            prefix,
+            {"weight": 0, "bias": 0},
+            sharded_offsets,
+            tp_group=self._tp_group,
+            dp_cp_group=metadata["dp_cp_group"],
         )
 
     def __repr__(self):
@@ -670,6 +688,7 @@ def __init__(
         if gather_output:
             raise ValueError("Transformer Engine linear layers do not support gather_output = True")
         tp_group = get_tensor_model_parallel_group_if_none(tp_group, is_expert=is_expert)
+        self._tp_group = tp_group
         world_size = get_pg_size(tp_group)
         rank = get_pg_rank(tp_group)
 
@@ -720,7 +739,12 @@ def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None):
         """Sharding along axis 0, bias sharded"""
         state_dict = self.state_dict(prefix="", keep_vars=True)
         return make_sharded_tensors_for_checkpoint(
-            state_dict, prefix, {"weight": 0, "bias": 0}, sharded_offsets
+            state_dict,
+            prefix,
+            {"weight": 0, "bias": 0},
+            sharded_offsets,
+            tp_group=self._tp_group,
+            dp_cp_group=metadata["dp_cp_group"],
         )
 
     def __repr__(self):
@@ -764,6 +788,7 @@ def __init__(
                 "Transformer Engine linear layers do not support input_is_parallel = False"
             )
         tp_group = get_tensor_model_parallel_group_if_none(tp_group, is_expert=is_expert)
+        self._tp_group = tp_group
 
         super().__init__(
             input_size=input_size,
@@ -814,7 +839,12 @@ def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None):
         """Sharding along axis 1, bias not sharded"""
         state_dict = self.state_dict(prefix="", keep_vars=True)
         return make_sharded_tensors_for_checkpoint(
-            state_dict, prefix, {"weight": 1}, sharded_offsets
+            state_dict,
+            prefix,
+            {"weight": 1},
+            sharded_offsets,
+            tp_group=self._tp_group,
+            dp_cp_group=metadata["dp_cp_group"],
         )
 
     def __repr__(self):
@@ -901,6 +931,7 @@ def __init__(
                 assert hasattr(
                     pg_collection, "hcp"
                 ), "TEDotProductAttention pg_collection must have hierarchical cp pg"
+        self._tp_group = pg_collection.tp
 
         if is_te_min_version("0.10.0"):
             extra_kwargs["attention_type"] = attention_type
@@ -1078,7 +1109,12 @@ def sharded_state_dict(
         else:
             state_dict = {}
         return make_sharded_tensors_for_checkpoint(
-            state_dict, prefix, {'softmax_offset': 0}, sharded_offsets
+            state_dict,
+            prefix,
+            {'softmax_offset': 0},
+            sharded_offsets,
+            tp_group=self._tp_group,
+            dp_cp_group=metadata["dp_cp_group"],
         )
 
 
@@ -1138,6 +1174,7 @@ def __init__(
             # The comms between TP and EP group is explicitly handled by MoE token dispatcher.
             # So we disable comms by making TE agnostic of model parallel.
             tp_group = get_tensor_model_parallel_group_if_none(tp_group, is_expert=is_expert)
+            self._tp_group = tp_group
             tp_size = get_pg_size(tp_group)
 
             self.explicit_expert_comm = is_expert and (tp_size > 1 or self.expert_parallel)
@@ -1372,7 +1409,12 @@ def _sharded_state_dict_grouped(
                         (ep_axis, global_expert_idx, num_global_experts),
                     )
                 sub_sd = make_sharded_tensors_for_checkpoint(
-                    state_dict, '', tp_axis_map, new_sharded_offsets
+                    state_dict,
+                    '',
+                    tp_axis_map,
+                    new_sharded_offsets,
+                    tp_group=self._tp_group,
+                    dp_cp_group=metadata["dp_cp_group"],
                 )
                 # Remove expert layers indexing from sharded keys
                 replace_prefix_for_sharding(sub_sd, f"{gemm_idx}.", expert_prefix)
diff --git a/megatron/core/models/bert/bert_model.py b/megatron/core/models/bert/bert_model.py
index b7b9bfc73f3..0655a1e6167 100644
--- a/megatron/core/models/bert/bert_model.py
+++ b/megatron/core/models/bert/bert_model.py
@@ -14,6 +14,7 @@
 from megatron.core.models.common.embeddings.language_model_embedding import LanguageModelEmbedding
 from megatron.core.models.common.embeddings.rotary_pos_embedding import RotaryEmbedding
 from megatron.core.models.common.language_module.language_module import LanguageModule
+from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.transformer.dot_product_attention import (
     DotProductAttention as MCoreDotProductAttention,
 )
@@ -73,9 +74,10 @@ def __init__(
         seq_len_interpolation_factor: Optional[float] = None,
         add_binary_head=True,
         return_embeddings=False,
+        pg_collection: Optional[ProcessGroupCollection] = None,
         vp_stage: Optional[int] = None,
     ):
-        super(BertModel, self).__init__(config=config)
+        super(BertModel, self).__init__(config=config, pg_collection=pg_collection)
 
         if has_config_logger_enabled(config):
             log_config_to_disk(config, locals(), prefix=type(self).__name__)
diff --git a/megatron/core/models/common/language_module/language_module.py b/megatron/core/models/common/language_module/language_module.py
index 8d6c1118a94..de2ecfb8011 100644
--- a/megatron/core/models/common/language_module/language_module.py
+++ b/megatron/core/models/common/language_module/language_module.py
@@ -24,7 +24,12 @@
 from megatron.core.transformer.enums import AttnBackend
 from megatron.core.transformer.module import MegatronModule
 from megatron.core.transformer.transformer_config import TransformerConfig
-from megatron.core.utils import is_te_min_version, make_tp_sharded_tensor_for_checkpoint
+from megatron.core.transformer.utils import ensure_metadata_has_dp_cp_group
+from megatron.core.utils import (
+    get_tensor_model_parallel_group_if_none,
+    is_te_min_version,
+    make_tp_sharded_tensor_for_checkpoint,
+)
 
 
 class LanguageModule(MegatronModule):
@@ -44,6 +49,7 @@ def __init__(
             pg_collection = ProcessGroupCollection.use_mpu_process_groups()
         self.pg_collection = pg_collection
         self.cp_group = pg_collection.cp
+        self.tp_group = get_tensor_model_parallel_group_if_none(pg_collection.tp)
         self.pp_group = pg_collection.pp
         assert hasattr(self.pg_collection, 'embd'), (
             "pg_collection must have a embd. In previous version, it used default "
@@ -278,6 +284,10 @@ def sharded_state_dict(
             ShardedStateDict: sharded state dict for the LanguageModel
         """
         assert not sharded_offsets, "Unexpected sharded offsets"
+
+        # Guard for cases metadata is not provided
+        metadata = ensure_metadata_has_dp_cp_group(metadata)
+
         sharded_state_dict = super().sharded_state_dict(prefix, sharded_offsets, metadata)
 
         first_stage_word_emb_key = f'{prefix}embedding.word_embeddings.weight'
@@ -286,7 +296,7 @@ def sharded_state_dict(
 
         if self.share_embeddings_and_output_weights:
             self.tie_embeddings_and_output_weights_state_dict(
-                sharded_state_dict, output_layer_weight_key, first_stage_word_emb_key
+                sharded_state_dict, output_layer_weight_key, first_stage_word_emb_key, metadata
             )
         elif self.post_process:
             # Make sure the output layer follows the embeddings padding logic
@@ -303,6 +313,7 @@ def tie_embeddings_and_output_weights_state_dict(
         sharded_state_dict: ShardedStateDict,
         output_layer_weight_key: str,
         first_stage_word_emb_key: str,
+        metadata: Optional[dict] = None,
     ) -> None:
         """Ties the embedding and output weights in a given sharded state dict.
 
@@ -312,9 +323,11 @@ def tie_embeddings_and_output_weights_state_dict(
                 This entry will be replaced with a tied version
             first_stage_word_emb_key (str): this must be the same as the
                 ShardedTensor.key of the first stage word embeddings.
+            metadata (Optional[Dict]): metadata controlling sharded state dict creation.
 
         Returns: None, acts in-place
         """
+        metadata = ensure_metadata_has_dp_cp_group(metadata)
         if not self.post_process:
             # No output layer
             assert output_layer_weight_key not in sharded_state_dict, sharded_state_dict.keys()
@@ -347,4 +360,6 @@ def tie_embeddings_and_output_weights_state_dict(
             key=first_stage_word_emb_key,
             replica_id=last_stage_word_emb_replica_id,
             allow_shape_mismatch=True,
+            tp_group=self.tp_group,
+            dp_cp_group=metadata['dp_cp_group'],
         )
diff --git a/megatron/core/models/gpt/gpt_model.py b/megatron/core/models/gpt/gpt_model.py
index e676fc4367e..a1156012106 100644
--- a/megatron/core/models/gpt/gpt_model.py
+++ b/megatron/core/models/gpt/gpt_model.py
@@ -775,6 +775,12 @@ def sharded_state_dict(
         if self.mtp_process and not self.pre_process:
             emb_weight_key = f'{prefix}embedding.word_embeddings.weight'
             emb_weight = self.embedding.word_embeddings.weight
-            tie_word_embeddings_state_dict(sharded_state_dict, emb_weight, emb_weight_key)
+            tie_word_embeddings_state_dict(
+                sharded_state_dict,
+                emb_weight,
+                emb_weight_key,
+                tp_group=self.tp_group,
+                dp_cp_group=metadata['dp_cp_group'],
+            )
 
         return sharded_state_dict
diff --git a/megatron/core/optimizer/cpu_offloading/hybrid_optimizer.py b/megatron/core/optimizer/cpu_offloading/hybrid_optimizer.py
index 6f9999f0803..28487c3b367 100644
--- a/megatron/core/optimizer/cpu_offloading/hybrid_optimizer.py
+++ b/megatron/core/optimizer/cpu_offloading/hybrid_optimizer.py
@@ -52,7 +52,7 @@ def __init__(
         pin_cpu_grads: bool = True,
         pin_cpu_params: bool = True,
         overlap_cpu_optimizer_d2h_h2d: bool = True,
-        **kwargs
+        **kwargs,
     ):
         super(HybridDeviceOptimizer, self).__init__(
             params,
diff --git a/megatron/core/post_training/modelopt/layers.py b/megatron/core/post_training/modelopt/layers.py
index 0ca4a8e4070..e502b81ac2f 100644
--- a/megatron/core/post_training/modelopt/layers.py
+++ b/megatron/core/post_training/modelopt/layers.py
@@ -1,5 +1,6 @@
 # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
 
+import logging
 from typing import Callable, List, Optional
 
 import torch
@@ -10,6 +11,8 @@
 from megatron.core.transformer.transformer_layer import TransformerLayer
 from megatron.core.transformer.utils import make_sharded_tensors_for_checkpoint
 
+logger = logging.getLogger(__name__)
+
 try:
     import transformer_engine as te
 
@@ -116,6 +119,7 @@ def __init__(
         tp_group: Optional[torch.distributed.ProcessGroup] = None,
     ):
         self.config = config
+        self.tp_group = tp_group
 
         self._return_bias = skip_bias_add and bias
 
@@ -153,7 +157,11 @@ def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None):
                 if v.ndim == 0:
                     state_dict[k] = v.view(1)
         sharded_state_dict = make_sharded_tensors_for_checkpoint(
-            state_dict, prefix, sharded_offsets=sharded_offsets
+            state_dict,
+            prefix,
+            sharded_offsets=sharded_offsets,
+            tp_group=self.tp_group,
+            dp_cp_group=metadata['dp_cp_group'],
         )
         return sharded_state_dict
 
@@ -229,7 +237,7 @@ def _report_quantize_tensor_info(self):
                 if not isinstance(v, torch.Tensor):
                     continue
                 original_dtype, original_shape = self._original_tensor_info.get(k, ("-", "-"))
-                print(
+                logger.info(
                     "{:<64} {:<16} {:<32} {:<16} {:<32}".format(
                         k, original_dtype, original_shape, str(v.dtype), str(v.shape)
                     )
diff --git a/megatron/core/ssm/gated_delta_net.py b/megatron/core/ssm/gated_delta_net.py
index 4df1c0df294..1bef6416ded 100644
--- a/megatron/core/ssm/gated_delta_net.py
+++ b/megatron/core/ssm/gated_delta_net.py
@@ -27,6 +27,7 @@
 from megatron.core.transformer.module import MegatronModule
 from megatron.core.transformer.spec_utils import ModuleSpec, build_module
 from megatron.core.transformer.utils import (
+    ensure_metadata_has_dp_cp_group,
     make_sharded_tensors_for_checkpoint,
     sharded_state_dict_default,
 )
@@ -412,8 +413,11 @@ def _apply_gated_norm(self, x, gate):
         y = y.to(x_dtype)
         return y
 
-    def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None):
+    def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None, tp_group=None):
         """Provide a sharded state dictionary for distributed checkpointing."""
+        # Guard for cases metadata is not provided
+        metadata = ensure_metadata_has_dp_cp_group(metadata)
+
         sharded_state_dict = {}
         # Parameters
         self._save_to_state_dict(sharded_state_dict, "", keep_vars=True)
@@ -425,8 +429,11 @@ def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None):
                 "dt_bias": 0,
             },  # parameters sharded across TP
             sharded_offsets=sharded_offsets,
+            tp_group=(tp_group if tp_group is not None else self.pg_collection.tp),
+            dp_cp_group=metadata['dp_cp_group'],
         )
         # Submodules
+        tp_group = tp_group if tp_group is not None else self.pg_collection.tp
         for name, module in self.named_children():
             if name == "conv1d":
                 # Add TP sharding for Conv1d
@@ -435,11 +442,16 @@ def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None):
                 if self.conv_bias:
                     tp_sharding_map[f"bias"] = 0
                 module_sharded_sd = make_sharded_tensors_for_checkpoint(
-                    module_sd, f"{prefix}{name}.", tp_sharding_map, sharded_offsets
+                    module_sd,
+                    f"{prefix}{name}.",
+                    tp_sharding_map,
+                    sharded_offsets,
+                    tp_group=tp_group,
+                    dp_cp_group=metadata['dp_cp_group'],
                 )
             else:
                 module_sharded_sd = sharded_state_dict_default(
-                    module, f"{prefix}{name}.", sharded_offsets, metadata
+                    module, f"{prefix}{name}.", sharded_offsets, metadata, tp_group=tp_group
                 )
 
             sharded_state_dict.update(module_sharded_sd)
diff --git a/megatron/core/ssm/mamba_block.py b/megatron/core/ssm/mamba_block.py
index 61ecd170140..5f426ea347b 100644
--- a/megatron/core/ssm/mamba_block.py
+++ b/megatron/core/ssm/mamba_block.py
@@ -139,6 +139,7 @@ def __init__(
         assert pg_collection is not None, "pg_collection must be provided for MambaStack"
 
         self.pp_group = pg_collection.pp
+        self.tp_group = pg_collection.tp
 
         # Required for pipeline parallel schedules
         self.input_tensor = None
@@ -417,7 +418,11 @@ def sharded_state_dict(
             if not module is self.layers:
                 sharded_state_dict.update(
                     sharded_state_dict_default(
-                        module, f'{prefix}{name}.', sharded_offsets, metadata
+                        module,
+                        f'{prefix}{name}.',
+                        sharded_offsets,
+                        metadata,
+                        tp_group=self.tp_group,
                     )
                 )
 
diff --git a/megatron/core/ssm/mamba_mixer.py b/megatron/core/ssm/mamba_mixer.py
index 9eb79354dba..00067783ffa 100644
--- a/megatron/core/ssm/mamba_mixer.py
+++ b/megatron/core/ssm/mamba_mixer.py
@@ -15,6 +15,7 @@
 import torch.nn as nn
 import torch.nn.functional as F
 
+from megatron.core import parallel_state
 from megatron.core.dist_checkpointing import ShardedTensor
 from megatron.core.dist_checkpointing.mapping import ReplicaId, ShardedTensorFactory
 from megatron.core.inference.contexts import BaseInferenceContext, DynamicInferenceContext
@@ -24,6 +25,7 @@
 from megatron.core.transformer.module import MegatronModule
 from megatron.core.transformer.spec_utils import ModuleSpec, build_module
 from megatron.core.transformer.utils import (
+    ensure_metadata_has_dp_cp_group,
     make_sharded_tensors_for_checkpoint,
     sharded_state_dict_default,
 )
@@ -74,9 +76,16 @@ class ExtendedRMSNorm(RMSNormGated):
 
     def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None):
         """Sharding along axis 0, bias not sharded"""
+        if not hasattr(self, 'tp_group'):
+            self.tp_group = parallel_state.get_tensor_model_parallel_group()
         state_dict = self.state_dict(prefix="", keep_vars=True)
         return make_sharded_tensors_for_checkpoint(
-            state_dict, prefix, {"weight": 0}, sharded_offsets
+            state_dict,
+            prefix,
+            {"weight": 0},
+            sharded_offsets,
+            tp_group=self.tp_group,
+            dp_cp_group=metadata["dp_cp_group"],
         )
 
 
@@ -385,6 +394,7 @@ def __init__(
             D_cp1=self.D,
             D_has_hdim=self.D_has_hdim,
         )
+        self.tp_group = pg_collection.tp
 
     def forward(
         self,
@@ -796,6 +806,9 @@ def _get_states_from_cache(self, inference_context, batch_size, *, inference_par
 
     def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None):
         """Provide a sharded state dictionary for distributed checkpointing."""
+        # Guard for cases metadata is not provided
+        metadata = ensure_metadata_has_dp_cp_group(metadata)
+
         sharded_state_dict = {}
         # Parameters
         self._save_to_state_dict(sharded_state_dict, "", keep_vars=True)
@@ -815,12 +828,17 @@ def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None):
                 # Add TP sharding for Conv1d
                 module_sd = module.state_dict(prefix="", keep_vars=True)
                 module_sharded_sd = make_sharded_tensors_for_checkpoint(
-                    module_sd, f"{prefix}{name}.", {f"weight": 0, f"bias": 0}, sharded_offsets
+                    module_sd,
+                    f"{prefix}{name}.",
+                    {f"weight": 0, f"bias": 0},
+                    sharded_offsets,
+                    tp_group=self.tp_group,
+                    dp_cp_group=metadata['dp_cp_group'],
                 )
 
             else:
                 module_sharded_sd = sharded_state_dict_default(
-                    module, f"{prefix}{name}.", sharded_offsets, metadata
+                    module, f"{prefix}{name}.", sharded_offsets, metadata, tp_group=self.tp_group
                 )
 
             sharded_state_dict.update(module_sharded_sd)
diff --git a/megatron/core/tensor_parallel/layers.py b/megatron/core/tensor_parallel/layers.py
index e79d55b9fa3..221f3327e50 100644
--- a/megatron/core/tensor_parallel/layers.py
+++ b/megatron/core/tensor_parallel/layers.py
@@ -320,6 +320,8 @@ def sharded_state_dict(
                 key=weight_prefix,
                 allow_shape_mismatch=True,
                 prepend_offsets=sharded_offsets,
+                tp_group=self.tp_group,
+                dp_cp_group=metadata["dp_cp_group"],
             )
         }
 
@@ -1064,7 +1066,12 @@ def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None):
         """Sharding along axis 0, bias sharded"""
         state_dict = self.state_dict(prefix="", keep_vars=True)
         return make_sharded_tensors_for_checkpoint(
-            state_dict, prefix, {"weight": 0, "bias": 0}, sharded_offsets
+            state_dict,
+            prefix,
+            {"weight": 0, "bias": 0},
+            sharded_offsets,
+            tp_group=self.tp_group,
+            dp_cp_group=metadata['dp_cp_group'],
         )
 
     def set_extra_state(self, state: Any):
@@ -1310,7 +1317,12 @@ def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None):
         """Sharding along axis 1, bias not sharded"""
         state_dict = self.state_dict(prefix="", keep_vars=True)
         return make_sharded_tensors_for_checkpoint(
-            state_dict, prefix, {"weight": 1}, sharded_offsets
+            state_dict,
+            prefix,
+            {"weight": 1},
+            sharded_offsets,
+            tp_group=self.tp_group,
+            dp_cp_group=metadata['dp_cp_group'],
         )
 
     def set_extra_state(self, state: Any):
diff --git a/megatron/core/transformer/attention.py b/megatron/core/transformer/attention.py
index 9426fc5535b..be52fe10d20 100644
--- a/megatron/core/transformer/attention.py
+++ b/megatron/core/transformer/attention.py
@@ -176,6 +176,7 @@ def __init__(
                 pg_collection, 'cp'
             ), "Attention pg_collection must have cp process group"
         self.pg_collection = pg_collection
+        self.tp_group = pg_collection.tp
 
         # Per attention head and per partition values
         world_size = get_pg_size(self.pg_collection.tp)
diff --git a/megatron/core/transformer/dot_product_attention.py b/megatron/core/transformer/dot_product_attention.py
index f3711c86ebd..32f9a08cfa1 100644
--- a/megatron/core/transformer/dot_product_attention.py
+++ b/megatron/core/transformer/dot_product_attention.py
@@ -71,6 +71,8 @@ def __init__(
             assert hasattr(
                 pg_collection, 'tp'
             ), "DotProductAttention pg_collection must have tp process group"
+        self.pg_collection = pg_collection
+        self.tp_group = self.pg_collection.tp
 
         world_size = pg_collection.tp.size()
         self.hidden_size_per_partition = divide(projection_size, world_size)
@@ -260,5 +262,10 @@ def sharded_state_dict(
         else:
             state_dict = {}
         return make_sharded_tensors_for_checkpoint(
-            state_dict, prefix, {'softmax_offset': 0}, sharded_offsets
+            state_dict,
+            prefix,
+            {'softmax_offset': 0},
+            sharded_offsets,
+            tp_group=self.tp_group,
+            dp_cp_group=metadata['dp_cp_group'],
         )
diff --git a/megatron/core/transformer/mlp.py b/megatron/core/transformer/mlp.py
index 9602beb2f71..8dcf196da94 100644
--- a/megatron/core/transformer/mlp.py
+++ b/megatron/core/transformer/mlp.py
@@ -87,7 +87,7 @@ def __init__(
 
         self.input_size = input_size if input_size != None else self.config.hidden_size
 
-        tp_group = get_tensor_model_parallel_group_if_none(tp_group, is_expert=is_expert)
+        self.tp_group = get_tensor_model_parallel_group_if_none(tp_group, is_expert=is_expert)
         if ffn_hidden_size is None:
             if is_expert:
                 raise ValueError("MoE MLP requires `ffn_hidden_size`, but it was not provided.")
diff --git a/megatron/core/transformer/module.py b/megatron/core/transformer/module.py
index 4fdcacb791b..1058a207b12 100644
--- a/megatron/core/transformer/module.py
+++ b/megatron/core/transformer/module.py
@@ -11,6 +11,7 @@
 from megatron.core.dist_checkpointing.mapping import ShardedStateDict
 from megatron.core.transformer.transformer_config import TransformerConfig
 from megatron.core.transformer.utils import (
+    ensure_metadata_has_dp_cp_group,
     make_sharded_tensors_for_checkpoint,
     sharded_state_dict_default,
 )
@@ -77,13 +78,26 @@ def sharded_state_dict(
         sharded_state_dict = {}
         # Save parameters
         self._save_to_state_dict(sharded_state_dict, '', keep_vars=True)
+        if not hasattr(self, 'tp_group'):
+            # some model interface hasn't updated for m4, fallback needed
+            tp_group = parallel_state.get_tensor_model_parallel_group()
+        else:
+            tp_group = self.tp_group
+        # Guard for cases metadata is not provided
+        metadata = ensure_metadata_has_dp_cp_group(metadata)
         sharded_state_dict = make_sharded_tensors_for_checkpoint(
-            sharded_state_dict, prefix, sharded_offsets=sharded_offsets
+            sharded_state_dict,
+            prefix,
+            sharded_offsets=sharded_offsets,
+            tp_group=tp_group,
+            dp_cp_group=metadata['dp_cp_group'],
         )
         # Recurse into submodules
         for name, module in self.named_children():
             sharded_state_dict.update(
-                sharded_state_dict_default(module, f'{prefix}{name}.', sharded_offsets, metadata)
+                sharded_state_dict_default(
+                    module, f'{prefix}{name}.', sharded_offsets, metadata, tp_group=tp_group
+                )
             )
         return sharded_state_dict
 
diff --git a/megatron/core/transformer/moe/experts.py b/megatron/core/transformer/moe/experts.py
index ca308da0d21..8bb5caddc4b 100644
--- a/megatron/core/transformer/moe/experts.py
+++ b/megatron/core/transformer/moe/experts.py
@@ -3,7 +3,7 @@
 import copy
 import itertools
 from copy import deepcopy
-from functools import partial, wraps
+from functools import partial
 from math import ceil
 from typing import Optional, Tuple
 
@@ -11,7 +11,7 @@
 import torch.nn.functional as F
 from torch.nn.parameter import Parameter
 
-from megatron.core import parallel_state, tensor_parallel
+from megatron.core import tensor_parallel
 from megatron.core.activations import squared_relu
 from megatron.core.dist_checkpointing import ShardedTensor
 from megatron.core.dist_checkpointing.mapping import (
@@ -45,6 +45,7 @@
 from megatron.core.transformer.spec_utils import build_module
 from megatron.core.transformer.transformer_config import TransformerConfig
 from megatron.core.transformer.utils import (
+    ensure_metadata_has_dp_cp_group,
     make_sharded_object_for_checkpoint,
     sharded_state_dict_default,
 )
@@ -61,49 +62,6 @@
     HAVE_TE = False
 
 
-# TODO(Hepteract): delete the usage of the global parallel_state.
-# Currently we still have to use the global parallel_state in expert_dist_ckpt_decorator(),
-# in order to set sub-module's process group while getting sharded_state_dict.
-# After sub-module's refactoring is done, we can pass pg_collection to sub-module
-# and delete the function expert_dist_ckpt_decorator.
-def expert_dist_ckpt_decorator(func):
-    """Decorator of shared_state_dict in expert layer for distributed checkpoint.
-
-    Since !1940, the TP size for Expert layer can be different with Attention.
-    To make distributed checkpoint work in such cases, we use a decorator to
-    replace the default TP parallel states with expert-TP parallel states.
-    """
-
-    @wraps(func)
-    def wrapper(*args, **kwargs):
-        # Store original states
-        original_rank = parallel_state._MPU_TENSOR_MODEL_PARALLEL_RANK
-        original_size = parallel_state._MPU_TENSOR_MODEL_PARALLEL_WORLD_SIZE
-        original_group = parallel_state._TENSOR_MODEL_PARALLEL_GROUP
-        try:
-            # Set new states
-            parallel_state._MPU_TENSOR_MODEL_PARALLEL_RANK = (
-                parallel_state.get_expert_tensor_parallel_rank()
-            )
-            parallel_state._MPU_TENSOR_MODEL_PARALLEL_WORLD_SIZE = (
-                parallel_state.get_expert_tensor_parallel_world_size()
-            )
-            parallel_state._TENSOR_MODEL_PARALLEL_GROUP = (
-                parallel_state.get_expert_tensor_parallel_group()
-            )
-
-            # Execute the function
-            result = func(*args, **kwargs)
-        finally:
-            # Restore original states
-            parallel_state._MPU_TENSOR_MODEL_PARALLEL_RANK = original_rank
-            parallel_state._MPU_TENSOR_MODEL_PARALLEL_WORLD_SIZE = original_size
-            parallel_state._TENSOR_MODEL_PARALLEL_GROUP = original_group
-        return result
-
-    return wrapper
-
-
 class GroupedMLP(MegatronModule):
     """An efficient implementation of the Experts layer using GroupedGEMM.
 
@@ -330,7 +288,6 @@ def forward(
 
         return fc2_output, None
 
-    @expert_dist_ckpt_decorator
     def sharded_state_dict(self, prefix='', sharded_offsets=(), metadata=None):
         """
         Maps local expert to global experts.
@@ -789,6 +746,7 @@ def __init__(
         ), "bias_dropout_fusion is not supported in TEGroupedMLP when add_bias_linear=True"
 
         self.ep_group = pg_collection.ep
+        self.tp_group = pg_collection.expt_tp
 
         # Double the output width with gated linear unit, see https://arxiv.org/pdf/2002.05202.pdf
         ffn_hidden_size = self.config.moe_ffn_hidden_size
@@ -1023,7 +981,6 @@ def glu(x):
 
         return output, output_bias
 
-    @expert_dist_ckpt_decorator
     def sharded_state_dict(
         self, prefix: str = '', sharded_offsets: tuple = (), metadata: Optional[dict] = None
     ) -> ShardedStateDict:
@@ -1031,10 +988,14 @@ def sharded_state_dict(
         Maps local expert to global experts.
         The sharded state dict is interchangable with SequentialMLP's.
         """
+        # Guard for cases metadata is not provided
+        metadata = ensure_metadata_has_dp_cp_group(metadata)
         singleton_local_shards = (metadata or {}).get('singleton_local_shards', False)
         sharded_state_dict = {}
         for name, module in self._modules.items():
-            sub_sd = sharded_state_dict_default(module, f'{name}.', sharded_offsets, metadata)
+            sub_sd = sharded_state_dict_default(
+                module, f'{name}.', sharded_offsets, metadata, tp_group=self.tp_group
+            )
             if name == 'linear_fc1' and self.config.gated_linear_unit:
                 num_global_experts = self.ep_group.size() * self.num_local_experts
                 local_expert_indices_offset = self.ep_group.rank() * self.num_local_experts
@@ -1098,6 +1059,7 @@ def __init__(
         self.num_local_experts = num_local_experts
         self.local_experts = torch.nn.ModuleList()
         self.ep_group = pg_collection.ep
+        self.tp_group = pg_collection.expt_tp
         # use pg_collection.expt_dp_group as data parallel group in this module.
         # TODO (Hepteract): expt_dp wont be needed here once distributed checkpoint is refactored
         self.dp_group = pg_collection.expt_dp
@@ -1193,9 +1155,11 @@ def backward_dw(self):
         for expert in self.local_experts:
             expert.backward_dw()
 
-    @expert_dist_ckpt_decorator
     def sharded_state_dict(self, prefix='', sharded_offsets=(), metadata=None):
         """Maps local expert to global experts."""
+        # Guard for cases metadata is not provided
+        metadata = ensure_metadata_has_dp_cp_group(metadata)
+
         sharded_state_dict = {}
         num_global_experts = self.ep_group.size() * self.num_local_experts
         local_expert_indices_offset = self.ep_group.rank() * self.num_local_experts
diff --git a/megatron/core/transformer/moe/moe_layer.py b/megatron/core/transformer/moe/moe_layer.py
index 893b2e7b99a..095e6526934 100644
--- a/megatron/core/transformer/moe/moe_layer.py
+++ b/megatron/core/transformer/moe/moe_layer.py
@@ -127,7 +127,7 @@ def __init__(
 
         # Initialize router
         self.router = TopKRouter(config=self.config, pg_collection=pg_collection)
-
+        self.tp_group = pg_collection.tp
         # Initialize token dispatcher
         if config.moe_token_dispatcher_type == "allgather":
             self.token_dispatcher = MoEAllGatherTokenDispatcher(
diff --git a/megatron/core/transformer/moe/shared_experts.py b/megatron/core/transformer/moe/shared_experts.py
index 93e6ad04531..c63e074e1b1 100644
--- a/megatron/core/transformer/moe/shared_experts.py
+++ b/megatron/core/transformer/moe/shared_experts.py
@@ -49,7 +49,7 @@ def __init__(
 
         config.ffn_hidden_size = config.moe_shared_expert_intermediate_size
         # TODO(Hepteract): pass pg_collection to MLP after refactoring MLP
-        super().__init__(config=config, submodules=submodules)
+        super().__init__(config=config, submodules=submodules, tp_group=pg_collection.tp)
 
         self.use_shared_expert_gate = gate
         if self.use_shared_expert_gate:
@@ -137,7 +137,11 @@ def sharded_state_dict(
             state_dict = self.state_dict(prefix='', keep_vars=True)
             sub_sd = {
                 f'{prefix}{name}': make_sharded_tensor_for_checkpoint(
-                    state_dict[name], f'{prefix}{name}', prepend_offsets=sharded_offsets
+                    state_dict[name],
+                    f'{prefix}{name}',
+                    prepend_offsets=sharded_offsets,
+                    tp_group=self.tp_group,
+                    dp_cp_group=metadata['dp_cp_group'],
                 )
             }
             sharded_state_dict.update(sub_sd)
diff --git a/megatron/core/transformer/multi_token_prediction.py b/megatron/core/transformer/multi_token_prediction.py
index d8d20039e45..94fbfb23677 100755
--- a/megatron/core/transformer/multi_token_prediction.py
+++ b/megatron/core/transformer/multi_token_prediction.py
@@ -28,6 +28,7 @@
 from megatron.core.transformer.transformer_config import TransformerConfig
 from megatron.core.transformer.transformer_layer import get_transformer_layer_offset
 from megatron.core.utils import (
+    get_pg_rank,
     is_torch_min_version,
     make_tp_sharded_tensor_for_checkpoint,
     make_viewless_tensor,
@@ -56,7 +57,11 @@
 
 
 def tie_word_embeddings_state_dict(
-    sharded_state_dict: ShardedStateDict, word_emb_weight: Tensor, word_emb_weight_key: str
+    sharded_state_dict: ShardedStateDict,
+    word_emb_weight: Tensor,
+    word_emb_weight_key: str,
+    tp_group: torch.distributed.ProcessGroup,
+    dp_cp_group: torch.distributed.ProcessGroup,
 ) -> None:
     """tie the embedding of the mtp processing stage in a given sharded state dict.
 
@@ -64,13 +69,15 @@ def tie_word_embeddings_state_dict(
         sharded_state_dict (ShardedStateDict): state dict with the weight to tie.
         word_emb_weight (Tensor): weight of the word embedding.
         word_emb_weight_key (str): key of the word embedding in the sharded state dict.
+        tp_group (torch.distributed.ProcessGroup): The tensor parallel group
+        dp_cp_group (torch.distributed.ProcessGroup): The dp-cp comm group
 
     Returns: None, acts in-place
     """
     mtp_word_emb_replica_id = (
         1,  # copy of embedding in pre processing stage
         0,
-        parallel_state.get_data_parallel_rank(with_context_parallel=True),
+        get_pg_rank(dp_cp_group),
     )
     assert word_emb_weight_key in sharded_state_dict
     del sharded_state_dict[word_emb_weight_key]
@@ -79,11 +86,17 @@ def tie_word_embeddings_state_dict(
         key=word_emb_weight_key,
         replica_id=mtp_word_emb_replica_id,
         allow_shape_mismatch=True,
+        tp_group=tp_group,
+        dp_cp_group=dp_cp_group,
     )
 
 
 def tie_output_layer_state_dict(
-    sharded_state_dict: ShardedStateDict, output_layer_weight: Tensor, output_layer_weight_key: str
+    sharded_state_dict: ShardedStateDict,
+    output_layer_weight: Tensor,
+    output_layer_weight_key: str,
+    tp_group: torch.distributed.ProcessGroup,
+    dp_cp_group: torch.distributed.ProcessGroup,
 ) -> None:
     """tie the output layer of the mtp processing stage in a given sharded state dict.
 
@@ -91,13 +104,15 @@ def tie_output_layer_state_dict(
         sharded_state_dict (ShardedStateDict): state dict with the weight to tie.
         output_layer_weight (Tensor): weight of the output layer.
         output_layer_weight_key (str): key of the output layer in the sharded state dict.
+        tp_group (torch.distributed.ProcessGroup): The tensor parallel group
+        dp_cp_group (torch.distributed.ProcessGroup): The dp-cp comm group
 
     Returns: None, acts in-place
     """
     mtp_output_layer_replica_id = (
         1,  # copy of output layer in post processing stage
         0,
-        parallel_state.get_data_parallel_rank(with_context_parallel=True),
+        get_pg_rank(dp_cp_group),
     )
     assert output_layer_weight_key in sharded_state_dict
     del sharded_state_dict[output_layer_weight_key]
@@ -106,6 +121,8 @@ def tie_output_layer_state_dict(
         key=output_layer_weight_key,
         replica_id=mtp_output_layer_replica_id,
         allow_shape_mismatch=True,
+        tp_group=tp_group,
+        dp_cp_group=dp_cp_group,
     )
 
 
diff --git a/megatron/core/transformer/transformer_block.py b/megatron/core/transformer/transformer_block.py
index c3187350c43..6f69927e9e8 100755
--- a/megatron/core/transformer/transformer_block.py
+++ b/megatron/core/transformer/transformer_block.py
@@ -284,6 +284,7 @@ def __init__(
         if pg_collection is None:
             pg_collection = ProcessGroupCollection.use_mpu_process_groups()
         self.pg_collection = pg_collection
+        self.tp_group = pg_collection.tp
 
         pp_group = self.pg_collection.pp if hasattr(self.pg_collection, 'pp') else None
         pp_rank = get_pg_rank(pp_group)
@@ -851,7 +852,11 @@ def sharded_state_dict(
             if not module is self.layers:
                 sharded_state_dict.update(
                     sharded_state_dict_default(
-                        module, f'{prefix}{name}.', sharded_offsets, metadata
+                        module,
+                        f'{prefix}{name}.',
+                        sharded_offsets,
+                        metadata,
+                        tp_group=self.tp_group,
                     )
                 )
 
diff --git a/megatron/core/transformer/transformer_layer.py b/megatron/core/transformer/transformer_layer.py
index cacfb9d01b8..c322788af2f 100644
--- a/megatron/core/transformer/transformer_layer.py
+++ b/megatron/core/transformer/transformer_layer.py
@@ -273,6 +273,7 @@ def __init__(
         if pg_collection is None:
             pg_collection = ProcessGroupCollection.use_mpu_process_groups()
         self.pg_collection = pg_collection
+        self.tp_group = pg_collection.tp
 
         self.submodules_config = submodules
         self.layer_number = layer_number + get_transformer_layer_offset(
diff --git a/megatron/core/transformer/utils.py b/megatron/core/transformer/utils.py
index 373c06f0991..880c5309933 100644
--- a/megatron/core/transformer/utils.py
+++ b/megatron/core/transformer/utils.py
@@ -10,6 +10,8 @@
 from megatron.core.dist_checkpointing.mapping import ShardedObject, ShardedStateDict, StateDict
 from megatron.core.jit import jit_fuser
 from megatron.core.utils import (
+    get_pg_rank,
+    get_tensor_model_parallel_group_if_none,
     make_sharded_tensor_for_checkpoint,
     make_tp_sharded_tensor_for_checkpoint,
 )
@@ -76,6 +78,8 @@ def make_sharded_tensors_for_checkpoint(
     tensor_parallel_layers_axis_map: Optional[Dict[str, int]] = None,
     sharded_offsets: Iterable[Tuple[int, int, int]] = (),
     extra_state_suffix: str = '_extra_state',
+    tp_group: Optional[torch.distributed.ProcessGroup] = None,
+    dp_cp_group: Optional[torch.distributed.ProcessGroup] = None,
 ):
     """Wraps tensors from transformer layers with ShardedTensor or ShardedObject.
 
@@ -93,31 +97,52 @@ def make_sharded_tensors_for_checkpoint(
             applied (e.g. PP related), passed along to ShardedTensor
         extra_state_suffix (str, default = '_extra_state'): layers with this
             suffix will be wrapped with ShardedObject instead of ShardedTensor.
+        tp_group (Optional[torch.distributed.ProcessGroup], optional): tensor parallel group.
+            If None, defaults to parallel_state.get_tensor_model_parallel_group()
+        dp_cp_group (Optional[torch.distributed.ProcessGroup], optional): data parallel group
+            with context parallel. If None, defaults to
+            parallel_state.get_data_parallel_group(with_context_parallel=True)
 
     """
 
     if tensor_parallel_layers_axis_map is None:
         tensor_parallel_layers_axis_map = {}
 
+    if tp_group is None and dp_cp_group is None:
+        tp_group = get_tensor_model_parallel_group_if_none(tp_group)
+        dp_cp_group = parallel_state.get_data_parallel_group(with_context_parallel=True)
+
     sharded_state_dict = {}
     for layer_name in state_dict.keys():
         tensor = state_dict[layer_name]
         layer_key = f'{prefix}{layer_name}'
 
         if layer_name.endswith(extra_state_suffix):
+            # Compute replica_id when groups are provided
+            replica_id = (0, get_pg_rank(tp_group), get_pg_rank(dp_cp_group))
+
             sharded_state_dict[layer_key] = make_sharded_object_for_checkpoint(
-                tensor, layer_key, sharded_offsets
+                tensor, layer_key, sharded_offsets, replica_id=replica_id
             )
 
         elif layer_name in tensor_parallel_layers_axis_map:
             tp_axis = tensor_parallel_layers_axis_map[layer_name]
             sharded_state_dict[layer_key] = make_tp_sharded_tensor_for_checkpoint(
-                tensor, layer_key, tp_axis, prepend_offsets=sharded_offsets
+                tensor,
+                layer_key,
+                tp_axis,
+                prepend_offsets=sharded_offsets,
+                tp_group=tp_group,
+                dp_cp_group=dp_cp_group,
             )
 
         else:
             sharded_state_dict[layer_key] = make_sharded_tensor_for_checkpoint(
-                tensor, layer_key, prepend_offsets=sharded_offsets
+                tensor,
+                layer_key,
+                prepend_offsets=sharded_offsets,
+                tp_group=tp_group,
+                dp_cp_group=dp_cp_group,
             )
 
     return sharded_state_dict
@@ -166,11 +191,27 @@ def _get_extra_state_offsets(
     return extra_state_shape, extra_state_offset
 
 
+def ensure_metadata_has_dp_cp_group(metadata: Optional[dict]) -> dict:
+    """Ensure `metadata` is a dict containing `dp_cp_group` entry.
+
+    If `metadata` is None, a new dict is returned with `dp_cp_group` set.
+    If `metadata` is a dict and missing `dp_cp_group`, it is updated in-place.
+    Otherwise, asserts that `dp_cp_group` exists.
+    """
+    if metadata is None:
+        return {'dp_cp_group': parallel_state.get_data_parallel_group(with_context_parallel=True)}
+    assert isinstance(metadata, dict), "metadata must be a dict with dp_cp_group as key"
+    if 'dp_cp_group' not in metadata:
+        metadata['dp_cp_group'] = parallel_state.get_data_parallel_group(with_context_parallel=True)
+    return metadata
+
+
 def sharded_state_dict_default(
     module: torch.nn.Module,
     prefix: str = '',
     sharded_offsets: Tuple[Tuple[int, int, int]] = (),
     metadata: Optional[dict] = None,
+    tp_group: Optional[torch.distributed.ProcessGroup] = None,
 ) -> ShardedStateDict:
     """Provides implementation for sharded_state_dict method for non-MegatronModules.
 
@@ -186,11 +227,16 @@ def sharded_state_dict_default(
         sharded_offsets (Tuple[Tuple[int, int, int]], optional): sharding already
             applied (e.g. PP related) by sup-modules. Passed along to ShardedTensor
         metadata (dict, optional): metadata passed to module sharded_state_dict method
+        tp_group (Optional[torch.distributed.ProcessGroup], optional): tensor parallel group.
+            If None, defaults to parallel_state.get_tensor_model_parallel_group()
 
     Returns:
         dict: dictionary of state dict keys mapped to ShardedTensors
     """
 
+    # Guard for cases metadata is not provided
+    metadata = ensure_metadata_has_dp_cp_group(metadata)
+
     if hasattr(module, 'sharded_state_dict'):
         module_sharded_sd = module.sharded_state_dict(
             prefix=prefix, sharded_offsets=sharded_offsets, metadata=metadata
@@ -198,7 +244,12 @@ def sharded_state_dict_default(
     else:
         module_sd = module.state_dict(prefix='', keep_vars=True)
         module_sharded_sd = make_sharded_tensors_for_checkpoint(
-            module_sd, prefix, {}, sharded_offsets
+            module_sd,
+            prefix,
+            {},
+            sharded_offsets,
+            tp_group=tp_group,
+            dp_cp_group=metadata['dp_cp_group'],
         )
     return module_sharded_sd
 
diff --git a/megatron/core/utils.py b/megatron/core/utils.py
index abfaf7f6320..9947b8da683 100644
--- a/megatron/core/utils.py
+++ b/megatron/core/utils.py
@@ -793,15 +793,42 @@ def make_tp_sharded_tensor_for_checkpoint(
     is sharded across TP group.
 
     Optionally, can provide offsets which prepend new dimensions to the tensor.
+
+    Args:
+        tensor: Tensor to shard
+        key: Key for the sharded tensor
+        tp_axis: Axis to shard across tensor parallel group (default: 0)
+        replica_id: Replica ID for the tensor (default: None)
+        prepend_offsets: Offsets to prepend to tensor dimensions (default: ())
+        **kwargs: Additional arguments. May include:
+            - tp_group: Tensor parallel group
+            - dp_cp_group: Data parallel + context parallel group
     """
+    # Pop group parameters from kwargs
+    tp_group = kwargs.pop('tp_group', None)
+    dp_cp_group = kwargs.pop('dp_cp_group', None)
+    # If there are any additional kwargs left, surface them for visibility
+    # (these will be forwarded to ShardedTensor.from_rank_offsets).
+    if kwargs:
+        logger.warning(
+            "make_tp_sharded_tensor_for_checkpoint received extra kwargs: %s", list(kwargs.keys())
+        )
+
     prepend_axis_num = len(prepend_offsets)
 
     new_offsets = []
-    tp_rank = parallel_state.get_tensor_model_parallel_rank()
-    dp_rank = parallel_state.get_data_parallel_rank(with_context_parallel=True)
-    tp_size = parallel_state.get_tensor_model_parallel_world_size()
-    dp_size = parallel_state.get_data_parallel_world_size(with_context_parallel=True)
-    dp_replica_id = parallel_state.get_data_parallel_rank(with_context_parallel=True)
+
+    # Get groups with fallback to parallel_state
+    if tp_group is None and dp_cp_group is None:
+        tp_group = parallel_state.get_tensor_model_parallel_group()
+        dp_cp_group = parallel_state.get_data_parallel_group(with_context_parallel=True)
+
+    # Use local get_pg_rank and get_pg_size functions
+    tp_rank = get_pg_rank(tp_group)
+    dp_rank = get_pg_rank(dp_cp_group)
+    tp_size = get_pg_size(tp_group)
+    dp_size = get_pg_size(dp_cp_group)
+    dp_replica_id = get_pg_rank(dp_cp_group)
 
     new_offsets.append((tp_axis + prepend_axis_num, tp_rank, tp_size))
 
@@ -837,14 +864,39 @@ def make_sharded_tensor_for_checkpoint(tensor, key, prepend_offsets=(), replica_
     """Helper for instantiating a non-sharded ShardedTensor (replicated across TP and DP group).
 
     Optionally, can provide offsets which prepend new dimensions to the tensor.
+
+    Keyword Args:
+        tensor: Tensor to create sharded tensor for
+        key: Key for the sharded tensor
+        prepend_offsets: Offsets to prepend to tensor dimensions (default: ())
+        replica_id: Replica ID for the tensor (default: None)
+        **kwargs: Additional arguments. May include:
+            - tp_group: Tensor parallel group
+            - dp_cp_group: Data parallel + context parallel group
     """
+    # Pop group parameters from kwargs
+    tp_group = kwargs.pop('tp_group', None)
+    dp_cp_group = kwargs.pop('dp_cp_group', None)
+    # If there are any additional kwargs left, surface them for visibility
+    # (these will be forwarded to ShardedTensor.from_rank_offsets).
+    if kwargs:
+        logger.warning(
+            "make_sharded_tensor_for_checkpoint received extra kwargs: %s", list(kwargs.keys())
+        )
 
     prepend_axis_num = len(prepend_offsets)
 
     new_offsets = []
-    dp_rank = parallel_state.get_data_parallel_rank(with_context_parallel=True)
-    dp_size = parallel_state.get_data_parallel_world_size(with_context_parallel=True)
-    dp_replica_id = parallel_state.get_data_parallel_rank(with_context_parallel=True)
+
+    # Get groups with fallback to parallel_state
+    if tp_group is None and dp_cp_group is None:
+        tp_group = parallel_state.get_tensor_model_parallel_group()
+        dp_cp_group = parallel_state.get_data_parallel_group(with_context_parallel=True)
+
+    # Use local get_pg_rank and get_pg_size functions
+    dp_rank = get_pg_rank(dp_cp_group)
+    dp_size = get_pg_size(dp_cp_group)
+    dp_replica_id = get_pg_rank(dp_cp_group)
 
     if HAVE_DTENSOR and isinstance(tensor, DTensor):
         # FSDP2 sharding
@@ -853,7 +905,7 @@ def make_sharded_tensor_for_checkpoint(tensor, key, prepend_offsets=(), replica_
         new_offsets.append((prepend_axis_num, dp_rank, dp_size))
 
     if replica_id is None:
-        replica_id = (0, parallel_state.get_tensor_model_parallel_rank(), dp_replica_id)
+        replica_id = (0, get_pg_rank(tp_group), dp_replica_id)
 
     return ShardedTensor.from_rank_offsets(
         key,
diff --git a/megatron/training/checkpointing.py b/megatron/training/checkpointing.py
index 743ebd915cf..f7428ececcf 100644
--- a/megatron/training/checkpointing.py
+++ b/megatron/training/checkpointing.py
@@ -17,6 +17,7 @@
 
 import numpy as np
 import torch
+from typing import Optional, Union, List, Dict, Any
 from torch.distributed.checkpoint import FileSystemReader, default_planner
 
 from megatron.core import dist_checkpointing, mpu, tensor_parallel
@@ -28,6 +29,7 @@
 )
 from megatron.core.msc_utils import MultiStorageClientFeature, open_file
 from megatron.core.num_microbatches_calculator import update_num_microbatches
+from megatron.core.utils import get_pg_rank, get_pg_size
 from megatron.core.optimizer import DistributedOptimizer
 from megatron.core.rerun_state_machine import get_rerun_state_machine
 from megatron.core.utils import get_torch_version, is_torch_min_version
@@ -306,7 +308,7 @@ def read_metadata(tracker_filename):
     return max_iter, release
 
 
-def get_rng_state(ckpt_format: str):
+def get_rng_state(ckpt_format: str, tp_group: torch.distributed.ProcessGroup, pp_group: torch.distributed.ProcessGroup) -> Union[List[Dict[str, Any]], ShardedObject]:
     """Collect rng state across data parallel ranks."""
     args = get_args()
     rng_state = {
@@ -329,10 +331,10 @@ def get_rng_state(ckpt_format: str):
         rng_state_list = [rng_state]
 
     if ckpt_format == "torch_dist":
-        pp_rank = mpu.get_pipeline_model_parallel_rank()
-        pp_size = mpu.get_pipeline_model_parallel_world_size()
-        tp_rank = mpu.get_tensor_model_parallel_rank()
-        tp_size = mpu.get_tensor_model_parallel_world_size()
+        pp_rank = get_pg_rank(pp_group)
+        pp_size = get_pg_size(pp_group)
+        tp_rank = get_pg_rank(tp_group)
+        tp_size = get_pg_size(tp_group)
         rng_state_list = ShardedObject('rng_state', rng_state_list, (pp_size, tp_size), (pp_rank, tp_rank),
                                        replica_id=mpu.get_data_parallel_rank(with_context_parallel=True))
     elif ckpt_format == "fsdp_dtensor":
@@ -351,7 +353,25 @@ class CheckpointType(Enum):
     TORCH_DCP = auto()
     FSDP_DTENSOR = auto()
 
-def _build_sharded_state_dict_metadata(args: Namespace) -> dict:
+def _clean_metadata_for_serialization(metadata: dict) -> dict:
+    """Create a clean copy of metadata for serialization by removing non-serializable objects.
+
+    Args:
+        metadata: Original metadata dict
+
+    Returns:
+        Clean metadata dict suitable for serialization
+    """
+    if metadata is None:
+        return None
+
+    clean_metadata = metadata.copy()
+    # Remove dp_cp_group as it's not serializable
+    clean_metadata.pop('dp_cp_group', None)
+    return clean_metadata
+
+
+def _build_sharded_state_dict_metadata(args: Namespace, dp_cp_group: Optional[torch.distributed.ProcessGroup] = None) -> dict:
     """Builds metadata used for sharded_state_dict versioning.
 
     The whole content metadata is passed to ``shared_state_dict`` model and optimizer methods
@@ -361,6 +381,10 @@ def _build_sharded_state_dict_metadata(args: Namespace) -> dict:
     In particular, a simple integer (or SemVer) versioning flag (e.g. `metadata['version'] = 3.4`)
     is discouraged, because the metadata serves for all models and optimizers and it's practically
     impossible to enforce a linearly increasing versioning for this whole space.
+
+    Args:
+        args: Arguments namespace
+        dp_cp_group: Data parallel + context parallel group (default: None, falls back to mpu API)
     """
     metadata = {}
 
@@ -389,11 +413,15 @@ def _build_sharded_state_dict_metadata(args: Namespace) -> dict:
 
     metadata['singleton_local_shards'] = False
     metadata['chained_optim_avoid_prefix'] = True
+    # Add dp_cp_group to metadata. If not provided, fallback to global parallel state.
+    if dp_cp_group is None:
+        dp_cp_group = mpu.get_data_parallel_group(with_context_parallel=True)
+    metadata['dp_cp_group'] = dp_cp_group
     return metadata
 
 def save_checkpoint(iteration, model, optimizer, opt_param_scheduler, num_floating_point_operations_so_far,
                     checkpointing_context=None, pipeline_rank=None, expert_rank=None, tensor_rank=None, pipeline_parallel=None, expert_parallel=None, non_persistent_ckpt=False,
-                    train_data_iterator=None, preprocess_common_state_dict_fn = None, release=False):
+                    train_data_iterator=None, preprocess_common_state_dict_fn = None, release=False, tp_group: Optional[torch.distributed.ProcessGroup] = None, pp_group: Optional[torch.distributed.ProcessGroup] = None, dp_cp_group: Optional[torch.distributed.ProcessGroup] = None):
     """Save a model, optimizer and optionally dataloader checkpoint.
 
     Checkpointing context is used to persist some checkpointing state
@@ -407,6 +435,9 @@ def save_checkpoint(iteration, model, optimizer, opt_param_scheduler, num_floati
 
     Dataloader checkpoint is only saved if the dataloader supports it. Currently this applies only
     to the Megatron Energon dataloader (multimodal) and not the built-in Megatron dataloader (text-only).
+
+    Args:
+        dp_cp_group: Data parallel + context parallel group (default: None, falls back to mpu API)
     """
     start_ckpt = time()
     args = get_args()
@@ -450,7 +481,10 @@ def save_checkpoint(iteration, model, optimizer, opt_param_scheduler, num_floati
         iteration, save_dir, ckpt_format))
 
     # Collect rng state across data parallel ranks.
-    rng_state = get_rng_state(args.ckpt_format)
+    if tp_group is None and pp_group is None:
+        tp_group = mpu.get_tensor_model_parallel_group()
+        pp_group = mpu.get_pipeline_model_parallel_group()
+    rng_state = get_rng_state(args.ckpt_format, tp_group, pp_group)
 
     # Collect rerun state across all ranks
     rerun_state_machine = get_rerun_state_machine()
@@ -493,7 +527,7 @@ def save_checkpoint(iteration, model, optimizer, opt_param_scheduler, num_floati
             or mpu.get_expert_data_parallel_rank() == 0 \
             or ckpt_type != CheckpointType.LEGACY:
         if ckpt_type != CheckpointType.LEGACY:
-            sharded_sd_metadata = _build_sharded_state_dict_metadata(args)
+            sharded_sd_metadata = _build_sharded_state_dict_metadata(args, dp_cp_group=dp_cp_group)
             if args.use_distributed_optimizer:
                 print_rank_0(f'Storing distributed optimizer sharded state of type'
                              f' {sharded_sd_metadata["distrib_optim_sharding_type"]}')
@@ -545,7 +579,7 @@ def save_checkpoint(iteration, model, optimizer, opt_param_scheduler, num_floati
                                                          async_sharded_save=args.async_save,
                                                          validate_access_integrity=validate_sharding_integrity,
                                                          preprocess_common_before_consistancy_check=preprocess_common_state_dict_fn,
-                                                         content_metadata=sharded_sd_metadata)
+                                                         content_metadata=_clean_metadata_for_serialization(sharded_sd_metadata))
             # [ModelOpt]: save sharded modelopt_state
             if has_nvidia_modelopt:
                 save_sharded_modelopt_state(model, checkpoint_name, (args.ckpt_format, 1))
@@ -806,7 +840,13 @@ def generate_state_dict(
             key = f"model{i}"
 
         if args.ckpt_format == "torch_dist":
-            model_sd = model[i].sharded_state_dict(**(model_sd_kwargs or {}))
+            model_sd = model[i].sharded_state_dict(
+                **(model_sd_kwargs or {
+                    "metadata": {
+                        "dp_cp_group": mpu.get_data_parallel_group(with_context_parallel=True)
+                    }
+                })
+            )
         else:   # torch, torch_dcp, fsdp_dtensor
             model_sd = model[i].state_dict_for_save_checkpoint()
 
@@ -815,10 +855,16 @@ def generate_state_dict(
     # Optimizer stuff.
     if not args.no_save_optim:
         if optimizer is not None and not optimizer.is_stub_optimizer:
-            optimizer_sd = None
 
             if args.ckpt_format == "torch_dist":
-                optimizer_sd = optimizer.sharded_state_dict(state_dict, **(optim_sd_kwargs or {}))
+                optimizer_sd = optimizer.sharded_state_dict(
+                    state_dict,
+                    **(optim_sd_kwargs or {
+                        "metadata": {
+                            "dp_cp_group": mpu.get_data_parallel_group(with_context_parallel=True)
+                        }
+                    })
+                )
             elif args.ckpt_format == "fsdp_dtensor":
                 if optim_sd_kwargs is None:
                     optim_sd_kwargs = {}
@@ -1361,7 +1407,7 @@ def _set_arg(arg_name, old_arg_name=None, force=False):
 
 
 def load_checkpoint(ddp_model, optimizer, opt_param_scheduler, load_arg='load', strict=True,
-                    checkpointing_context=None, skip_load_to_model_and_opt=False):
+                    checkpointing_context=None, skip_load_to_model_and_opt=False, tp_group: Optional[torch.distributed.ProcessGroup] = None, pp_group: Optional[torch.distributed.ProcessGroup] = None, dp_cp_group: Optional[torch.distributed.ProcessGroup] = None):
     """Load a model checkpoint and return the iteration.
     strict (bool): whether to strictly enforce that the keys in
         :attr:`state_dict` of the checkpoint match the names of
@@ -1369,6 +1415,7 @@ def load_checkpoint(ddp_model, optimizer, opt_param_scheduler, load_arg='load',
     skip_load_to_model_and_opt (bool): whether to call `load_state_dict`
         for :attr:`model` and :attr:`optimizer`. In case of running FSDP2 with mcore distributed
         checkpointing, the tensors are already loaded in-place by `_load_base_checkpoint`.
+    dp_cp_group: Data parallel + context parallel group (default: None, falls back to mpu API)
     """
     args = get_args()
     load_dir = getattr(args, load_arg)
@@ -1442,7 +1489,10 @@ def load_checkpoint(ddp_model, optimizer, opt_param_scheduler, load_arg='load',
         # Determine if RNG state will be loaded
         if (ckpt_tp_pp == run_tp_pp and not release and not args.finetune and not args.no_load_rng
                 and not getattr(ckpt_args, 'no_save_rng', False)):
-            gen_sd_rng_state = get_rng_state(args.ckpt_format)  # we can load the rng state
+            if tp_group is None and pp_group is None:
+                tp_group = mpu.get_tensor_model_parallel_group()
+                pp_group = mpu.get_pipeline_model_parallel_group()
+            gen_sd_rng_state = get_rng_state(args.ckpt_format, tp_group, pp_group)  # we can load the rng state
         else:
             ignore_rng_state = True
             gen_sd_rng_state = None
@@ -1454,6 +1504,7 @@ def load_checkpoint(ddp_model, optimizer, opt_param_scheduler, load_arg='load',
         else:
             sharded_sd_metadata = dist_checkpointing.load_content_metadata(preloaded_state_dict=state_dict)
         print_rank_0(f'sharded_state_dict metadata loaded from the checkpoint: {sharded_sd_metadata}')
+
         # Determine if optimizer state will be loaded
         if (not release and not args.finetune and not args.no_load_optim
                 and not getattr(ckpt_args, 'no_save_optim', False)):
@@ -1487,6 +1538,15 @@ def load_checkpoint(ddp_model, optimizer, opt_param_scheduler, load_arg='load',
             gen_sd_optim = None
             gen_sd_opt_param_scheduler = None
 
+        if dp_cp_group is None:
+            dp_cp_group = mpu.get_data_parallel_group(with_context_parallel=True)
+
+        # dist_checkpointing.load_content_metadata(...) may return None.
+        # Ensure we have a dict before updating to avoid NoneType AttributeError.
+        if sharded_sd_metadata is None:
+            sharded_sd_metadata = {}
+        sharded_sd_metadata["dp_cp_group"] = dp_cp_group
+
         optim_sd_kwargs = dict(metadata=sharded_sd_metadata, is_loading=True)
         model_sd_kwargs = dict(metadata=sharded_sd_metadata)
 
@@ -1528,12 +1588,15 @@ def load_checkpoint(ddp_model, optimizer, opt_param_scheduler, load_arg='load',
     elif args.ckpt_format == "torch_dcp":
         model_sd = model[0].state_dict()
         optimizer_sd = optimizer.state_dict(is_loading=True)
+        if tp_group is None and pp_group is None:
+            tp_group = mpu.get_tensor_model_parallel_group()
+            pp_group = mpu.get_pipeline_model_parallel_group()
         sharded_state_dict = {
             "model": model_sd,
             "optimizer": optimizer_sd,
             "args": None,
             "iteration": 1,
-            "rng_state": get_rng_state(args.ckpt_format),
+            "rng_state": get_rng_state(args.ckpt_format, tp_group, pp_group),
             "checkpoint_version": None,
             "opt_param_scheduler": opt_param_scheduler.state_dict(),
             "num_floating_point_operations_so_far": 0,
@@ -1556,7 +1619,7 @@ def load_checkpoint(ddp_model, optimizer, opt_param_scheduler, load_arg='load',
                     data_iterator=None, ckpt_format=ckpt_format, force=True,
                 )
             if not args.no_load_rng:
-                gen_sd_rng_state = get_rng_state(args.ckpt_format)
+                gen_sd_rng_state = get_rng_state(args.ckpt_format, tp_group, pp_group)
             if not args.no_load_optim:
                 gen_sd_optim = optimizer
                 gen_sd_opt_param_scheduler = opt_param_scheduler
diff --git a/tests/unit_tests/dist_checkpointing/models/common.py b/tests/unit_tests/dist_checkpointing/models/common.py
index 31b5d9db3c9..8cb1dc4df65 100644
--- a/tests/unit_tests/dist_checkpointing/models/common.py
+++ b/tests/unit_tests/dist_checkpointing/models/common.py
@@ -91,7 +91,8 @@ def common_test_parallel_reconfiguration_e2e(
         save(gpt_model_A.sharded_state_dict(metadata=metadata), ckpt_dir_A, save_strategy)
         regular_state_dict_A = gpt_model_A.state_dict()
         Utils.destroy_model_parallel()
-
+        if metadata is not None:
+            metadata.pop("dp_cp_group")
         # Load checkpoint A with different TP/PP and save as checkpoint B
         # No FPS this time, only FPL
         Utils.initialize_model_parallel(*dest_tp_pp, **(dst_tp_pp_kwargs or {}), order=store_order)
diff --git a/tests/unit_tests/dist_checkpointing/models/test_mamba.py b/tests/unit_tests/dist_checkpointing/models/test_mamba.py
index ff2c6309977..85fbe5dd045 100644
--- a/tests/unit_tests/dist_checkpointing/models/test_mamba.py
+++ b/tests/unit_tests/dist_checkpointing/models/test_mamba.py
@@ -130,6 +130,8 @@ def test_parallel_reconfiguration_e2e(
                 )
             save(sharded_state_dict, ckpt_dir_A, save_strategy)
             Utils.destroy_model_parallel()
+            if metadata is not None:
+                metadata.pop("dp_cp_group")
 
             # Load checkpoint A with different TP/PP/expert/CP and save as checkpoint B
             # No FPS this time, only FPL
diff --git a/tests/unit_tests/dist_checkpointing/models/test_mlp_glu.py b/tests/unit_tests/dist_checkpointing/models/test_mlp_glu.py
index 18cfbf67cee..0970e2adc8a 100644
--- a/tests/unit_tests/dist_checkpointing/models/test_mlp_glu.py
+++ b/tests/unit_tests/dist_checkpointing/models/test_mlp_glu.py
@@ -71,6 +71,9 @@ def test_parallel_reconfiguration_e2e(
             save(mlp_A.sharded_state_dict(prefix=layer_prefix, metadata=metadata), ckpt_dir_A)
             Utils.destroy_model_parallel()
 
+            if "dp_cp_group" in metadata.keys():
+                del metadata["dp_cp_group"]
+
             # Load checkpoint A with different TP/PP and save as checkpoint B
             Utils.initialize_model_parallel(*dest_tp_pp)
             mlp_B = initialize_mlp()
diff --git a/tests/unit_tests/dist_checkpointing/models/test_moe_experts.py b/tests/unit_tests/dist_checkpointing/models/test_moe_experts.py
index b116d2cb603..ca546d746af 100644
--- a/tests/unit_tests/dist_checkpointing/models/test_moe_experts.py
+++ b/tests/unit_tests/dist_checkpointing/models/test_moe_experts.py
@@ -190,6 +190,9 @@ def test_parallel_reconfiguration_e2e(
             save(sharded_state_dict, ckpt_dir_A, save_strategy)
             Utils.destroy_model_parallel()
 
+            if "dp_cp_group" in metadata.keys():
+                del metadata["dp_cp_group"]
+
             # Load checkpoint A with different TP/PP/EP and save as checkpoint B
             # No FPS this time, only FPL
             Utils.initialize_model_parallel(
@@ -276,6 +279,9 @@ def test_sequential_grouped_mlp_interchangeable(
             save(sharded_state_dict, ckpt_dir_A, save_strategy)
             Utils.destroy_model_parallel()
 
+            if "dp_cp_group" in metadata.keys():
+                del metadata["dp_cp_group"]
+
             Utils.initialize_model_parallel(dest_tp, dest_pp, expert_model_parallel_size=dest_exp)
             model_B = initialize_expert_layer(1, use_glu, expert_type=dest_module)
             load_strategy = None
@@ -351,6 +357,9 @@ def test_sequential_grouped_mlp_extra_state(
             save(sharded_state_dict, ckpt_dir_A, save_strategy)
             Utils.destroy_model_parallel()
 
+            if "dp_cp_group" in metadata.keys():
+                del metadata["dp_cp_group"]
+
             Utils.initialize_model_parallel(dest_tp, dest_pp, expert_model_parallel_size=dest_exp)
             load_strategy = None
 
diff --git a/tests/unit_tests/dist_checkpointing/test_local.py b/tests/unit_tests/dist_checkpointing/test_local.py
index 1b8597e1f1c..5ce3422c726 100644
--- a/tests/unit_tests/dist_checkpointing/test_local.py
+++ b/tests/unit_tests/dist_checkpointing/test_local.py
@@ -26,6 +26,7 @@
     LocalCheckpointManager,
 )
 
+from megatron.core import parallel_state
 from megatron.core.dist_checkpointing import ShardedTensor
 from megatron.core.dist_checkpointing.dict_utils import diff
 from megatron.core.dist_checkpointing.mapping import ShardedBase, ShardedTensorFactory
@@ -78,7 +79,11 @@ def test_sharded_tensors(self, tp, pp, use_torch_fsdp2):
         opt_param_scheduler = None
         rng_state = None
         iteration = None
-        optim_sd_kwargs = dict(sharding_type='fully_sharded_model_space')
+        metadata = dict(
+            dp_cp_group=parallel_state.get_data_parallel_group(with_context_parallel=True)
+        )
+        model_sd_kwargs = dict(metadata=metadata)
+        optim_sd_kwargs = dict(sharding_type='fully_sharded_model_space', metadata=metadata)
         mock_args = parse_args(ignore_unknown_args=True)
         mock_args.no_save_optim = False
         mock_args.no_save_rng = True
@@ -91,6 +96,7 @@ def test_sharded_tensors(self, tp, pp, use_torch_fsdp2):
             opt_param_scheduler,
             rng_state,
             iteration=iteration,
+            model_sd_kwargs=model_sd_kwargs,
             optim_sd_kwargs=optim_sd_kwargs,
         )
         sharded_tensor_factories = find_matching_values(
diff --git a/tests/unit_tests/post_training/test_modelopt_module_spec.py b/tests/unit_tests/post_training/test_modelopt_module_spec.py
index f27a22390f7..ec80fcb1a72 100644
--- a/tests/unit_tests/post_training/test_modelopt_module_spec.py
+++ b/tests/unit_tests/post_training/test_modelopt_module_spec.py
@@ -6,7 +6,7 @@
 import torch
 from packaging.version import Version
 
-from megatron.core import dist_checkpointing
+from megatron.core import dist_checkpointing, parallel_state
 from megatron.core.inference.contexts import StaticInferenceContext
 from megatron.core.models.gpt.gpt_layer_specs import (
     get_gpt_decoder_block_spec,
@@ -92,8 +92,11 @@ def setup_method(self, method):
     def test_sharded_state_dict_restore(self, tmp_path_dist_ckpt):
         """Save with the default TE spec and restore using the ModelOpt spec."""
         _dist_checkpoint_name = "default_model"
-        te_fused_sharded_state_dict = self.default_model.sharded_state_dict()
-        modelopt_sharded_state_dict = self.modelopt_model.sharded_state_dict()
+        metadata = {
+            "dp_cp_group": parallel_state.get_data_parallel_group(with_context_parallel=True)
+        }
+        te_fused_sharded_state_dict = self.default_model.sharded_state_dict(metadata=metadata)
+        modelopt_sharded_state_dict = self.modelopt_model.sharded_state_dict(metadata=metadata)
 
         with TempNamedDir(tmp_path_dist_ckpt / _dist_checkpoint_name, sync=True) as tmpdirname:
             dist_checkpointing.save(te_fused_sharded_state_dict, tmpdirname)

From 2c2ee22f2202469b5150c79e63b7f26d42062a1f Mon Sep 17 00:00:00 2001
From: Li Tao <lit@nvidia.com>
Date: Wed, 12 Nov 2025 08:14:25 +0800
Subject: [PATCH 118/334] [Dev] Remove redundant reduce in aux_loss logging
 (#2094)

Signed-off-by: Li Tao <lit@nvidia.com>
Signed-off-by: lit <lit@nvidia.com>
---
 megatron/core/transformer/moe/moe_utils.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/megatron/core/transformer/moe/moe_utils.py b/megatron/core/transformer/moe/moe_utils.py
index 5a0793ef5b9..e8c6a05340c 100644
--- a/megatron/core/transformer/moe/moe_utils.py
+++ b/megatron/core/transformer/moe/moe_utils.py
@@ -770,12 +770,15 @@ def reduce_aux_losses_tracker_across_ranks(track_names: Optional[List[str]] = No
             torch.distributed.all_reduce(
                 values, group=tracker[name]['avg_group'], op=torch.distributed.ReduceOp.AVG
             )
-        # This ensures proper loss averaging across all ranks including CP ranks
-        torch.distributed.all_reduce(
-            values,
-            group=parallel_state.get_data_parallel_group(with_context_parallel=True),
-            op=torch.distributed.ReduceOp.AVG,
-        )
+        # Average aux losses across data parallel ranks.
+        # The `global_load_balancing_loss` already uses `tp_dp_cp_group` in `reduce_group`,
+        # so we don't need to reduce it again. Others use `tp_cp_group` in `reduce_group`.
+        if name != "global_load_balancing_loss":
+            torch.distributed.all_reduce(
+                values,
+                group=parallel_state.get_data_parallel_group(with_context_parallel=False),
+                op=torch.distributed.ReduceOp.AVG,
+            )
 
 
 def track_moe_metrics(

From b7c1e75e9b89a1341d1915d188cea59b13575484 Mon Sep 17 00:00:00 2001
From: Kunlun Li <94586211+kunlunl@users.noreply.github.com>
Date: Wed, 12 Nov 2025 15:42:34 +0800
Subject: [PATCH 119/334] [DEV] Make CUDA graph compatible with FP8 params
 (tensorwise & blockwise). (#2087)

Signed-off-by: kunlunl <kunlunl@nvidia.com>
---
 .../distributed/distributed_data_parallel.py  |  50 ++++---
 .../core/distributed/param_and_grad_buffer.py |  20 ++-
 megatron/core/fp8_utils.py                    |  31 +++-
 .../core/transformer/transformer_config.py    |   2 +-
 tests/unit_tests/test_fp8_param.py            | 133 +++++++++++++++++-
 .../transformer/test_cuda_graphs.py           |   2 +
 6 files changed, 208 insertions(+), 30 deletions(-)

diff --git a/megatron/core/distributed/distributed_data_parallel.py b/megatron/core/distributed/distributed_data_parallel.py
index df5bccd71ca..df1d7ae94db 100644
--- a/megatron/core/distributed/distributed_data_parallel.py
+++ b/megatron/core/distributed/distributed_data_parallel.py
@@ -8,7 +8,7 @@
 
 from .. import parallel_state
 from ..config_logger import has_config_logger_enabled, log_config_to_disk
-from ..fp8_utils import is_float8tensor
+from ..fp8_utils import is_float8tensor, post_all_gather_processing
 from ..process_groups_config import ProcessGroupCollection
 from ..transformer.cuda_graphs import is_graph_capturing
 from ..transformer.transformer_config import TransformerConfig
@@ -500,26 +500,34 @@ def start_param_sync(self, *unused, force_sync: bool = False, force_dispatch: bo
 
         for bucket_group in self.bucket_groups + self.expert_parallel_bucket_groups:
             bucket_group.start_param_sync(force_sync=force_sync)
-            # For MXFP8 params, we need to copy the all-gathered param data from the buffer to
-            # the param.data, since param buffer is not mapped to model params for MXFP8 case.
-            # The paramaters are cast from bf16 to MXFP8 during copy.
-            # In the case of "overlap_param_gather=True", the param copy is done
-            # in "finish_param_sync" stage after zeroing the shared gardient buffers.
-            if (
-                self.ddp_config.reuse_grad_buf_for_mxfp8_param_ag
-                and not self.ddp_config.overlap_param_gather
-            ):
-                for bucket in bucket_group.buckets:
-                    for param in bucket.params:
-                        param_start, param_end = bucket.param_to_index[param]
-                        param_slice = bucket.param_data.view(-1)[param_start:param_end]
-                        param.data.copy_(param_slice.view(param.data.shape))
-                    # All-gathered params are not needed after being copied to param.data.
-                    # Zero out the param buffer (shared with grad buffer) for gradient accumulation.
-                    # We cannot zero out the entire grad buffer because one grad buffer may
-                    # correspond to multiple param buffers. If we zero out the entire grad buffer,
-                    # it would clear the data of those param buffers that have not yet completed AG.
-                    bucket.param_data.zero_()
+
+            if not self.ddp_config.overlap_param_gather:
+                # For MXFP8 params, we need to copy the all-gathered param data from the buffer to
+                # the param.data, since param buffer is not mapped to model params for MXFP8 case.
+                # The paramaters are cast from bf16 to MXFP8 during copy.
+                # In the case of "overlap_param_gather=True", the param copy is done
+                # in "finish_param_sync" stage after zeroing the shared gardient buffers.
+                if self.ddp_config.reuse_grad_buf_for_mxfp8_param_ag:
+                    for bucket in bucket_group.buckets:
+                        for param in bucket.params:
+                            param_start, param_end = bucket.param_to_index[param]
+                            param_slice = bucket.param_data.view(-1)[param_start:param_end]
+                            param.data.copy_(param_slice.view(param.data.shape))
+                        # All-gathered params are not needed after being copied to param.data.
+                        # Zero out the param buffer (shared with grad buffer) for gradient
+                        # accumulation. We cannot zero out the entire grad buffer because one grad
+                        # buffer may correspond to multiple param buffers. If we zero out the entire
+                        # grad buffer, it would clear the data of those param buffers that have not
+                        # yet completed AG.
+                        bucket.param_data.zero_()
+                else:
+                    fp8_params = []
+                    for bucket in bucket_group.buckets:
+                        for param in bucket.params:
+                            if is_float8tensor(param):
+                                fp8_params.append(param)
+                    if len(fp8_params) > 0:
+                        post_all_gather_processing(fp8_params)
 
     def start_grad_sync(self, *unused):
         """
diff --git a/megatron/core/distributed/param_and_grad_buffer.py b/megatron/core/distributed/param_and_grad_buffer.py
index 806defa5b34..d34fdebaf75 100644
--- a/megatron/core/distributed/param_and_grad_buffer.py
+++ b/megatron/core/distributed/param_and_grad_buffer.py
@@ -17,7 +17,12 @@
 from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.rerun_state_machine import get_rerun_state_machine
 
-from ..fp8_utils import is_float8tensor, is_mxfp8tensor, modify_underlying_storage
+from ..fp8_utils import (
+    is_float8tensor,
+    is_mxfp8tensor,
+    modify_underlying_storage,
+    post_all_gather_processing,
+)
 from ..utils import is_torch_min_version, log_on_each_pipeline_stage
 from .distributed_data_parallel_config import DistributedDataParallelConfig
 from .reduce_scatter_with_fp32_accumulation import reduce_scatter_with_fp32_accumulation
@@ -311,10 +316,7 @@ def finish_param_sync(self, skip_next_bucket_dispatch: bool = False):
             # For the mxfp8_param with "reuse_grad_buf_for_mxfp8_param_ag=True",
             # we need to copy the param_data from the shared_param/grad_buffer to param.data
             # after the param all-gather.
-            if (
-                self.ddp_config.reuse_grad_buf_for_mxfp8_param_ag
-                and self.ddp_config.overlap_param_gather
-            ):
+            if self.ddp_config.reuse_grad_buf_for_mxfp8_param_ag:
                 for bucket in self.buckets:
                     for param in bucket.params:
                         param_start, param_end = bucket.param_to_index[param]
@@ -326,6 +328,14 @@ def finish_param_sync(self, skip_next_bucket_dispatch: bool = False):
                     # correspond to multiple param buffers. If we zero out the entire grad buffer,
                     # it would clear the data of those param buffers that have not yet completed AG.
                     bucket.param_data.zero_()
+            else:
+                fp8_params = []
+                for bucket in self.buckets:
+                    for param in bucket.params:
+                        if is_float8tensor(param):
+                            fp8_params.append(param)
+                if len(fp8_params) > 0:
+                    post_all_gather_processing(fp8_params)
 
     def start_grad_sync(self):
         """
diff --git a/megatron/core/fp8_utils.py b/megatron/core/fp8_utils.py
index c6ea15776bd..7c3591ae5f7 100644
--- a/megatron/core/fp8_utils.py
+++ b/megatron/core/fp8_utils.py
@@ -78,6 +78,13 @@
     Fp8Padding = None
     Fp8Unpadding = None
 
+try:
+    from transformer_engine.pytorch.tensor.utils import (
+        post_all_gather_processing as te_post_all_gather_processing,
+    )
+except ImportError:
+    te_post_all_gather_processing = None
+
 
 def is_float8tensor(tensor: torch.Tensor) -> bool:
     """Check if a tensor is a Transformer Engine Float8Tensor.
@@ -172,7 +179,15 @@ def _quantize_param_shard_impl(
                 raise NotImplementedError(
                     f"FSDP with --fp8-param-gather is not supported in TE v{get_te_version()}"
                 )
-        cast_master_weights_to_fp8(*args)
+
+        # For newer TE versions (i.e., have post_all_gather_processing function), we keep the
+        # columnwise data and manually call post_all_gather_processing after all-gather, this
+        # makes fp8 params compatible with CUDA graph.
+        kwargs = {}
+        if te_post_all_gather_processing is not None:
+            kwargs["manual_post_all_gather_processing"] = True
+
+        cast_master_weights_to_fp8(*args, **kwargs)
 
     def _correct_amax_history_if_needed_impl(model: List[torch.nn.Module]) -> None:
         pass
@@ -406,6 +421,20 @@ def correct_amax_history_if_needed(model: List[torch.nn.Module]):
     _correct_amax_history_if_needed_impl(model)
 
 
+def post_all_gather_processing(model_params):
+    """
+    Post-processing after all-gather for weights in distributed optimizer.
+    - tensorwise: may need to create a transposed view to match backend GEMM.
+    - blockwise: create column-wise storage.
+    """
+    if te_post_all_gather_processing is not None:
+        te_post_all_gather_processing(model_params)
+    else:
+        # If the TE version is old and does not have post_all_gather_processing function, this is
+        # a no-op, and the transpose/columnwise data will be created in the next forward pass.
+        pass
+
+
 def is_first_last_bf16_layer(config: TransformerConfig, layer_no: int):
     """Check if the layer is in bf16."""
     num_bf16_layers_at_start = (
diff --git a/megatron/core/transformer/transformer_config.py b/megatron/core/transformer/transformer_config.py
index 63d0d2efd27..aab137b6430 100644
--- a/megatron/core/transformer/transformer_config.py
+++ b/megatron/core/transformer/transformer_config.py
@@ -693,7 +693,7 @@ class TransformerConfig(ModelParallelConfig):
     cuda_graph_scope: Optional[List[str]] = None
     """Determines the CUDA graphs capturing scope.
     When cuda_graph_impl is set to "transformer_engine", valid values are "attn", "mlp", "moe",
-    "moe_router", "moe_preprocess", "mamba". None means ["attn", "mlp"].
+    "moe_router", "moe_preprocess", "mamba". None means the full layer.
     When cuda_graph_impl is set to "local", "full_iteration" can be specified as cuda_graph_scope
     to enable whole iteration CUDA graph. All other values enable layerwise CUDA graph."""
 
diff --git a/tests/unit_tests/test_fp8_param.py b/tests/unit_tests/test_fp8_param.py
index 0cbba273507..0b8d41769ec 100644
--- a/tests/unit_tests/test_fp8_param.py
+++ b/tests/unit_tests/test_fp8_param.py
@@ -1,6 +1,7 @@
 # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
 
 import contextlib
+import gc
 import os
 import sys
 
@@ -8,6 +9,7 @@
 import torch
 from transformer_engine.pytorch.fp8 import check_fp8_support
 
+from megatron.core.distributed import DistributedDataParallel as DDP
 from megatron.core.enums import ModelType
 from megatron.core.fp8_utils import is_float8tensor
 from megatron.core.models.gpt.gpt_layer_specs import get_gpt_layer_with_transformer_engine_spec
@@ -29,6 +31,34 @@
 _SEED = 1234
 fp8_available, reason_for_no_fp8 = check_fp8_support()
 
+cuda_graph_supported = False
+reason_for_no_cuda_graph = ""
+try:
+    from transformer_engine.pytorch.tensor.utils import post_all_gather_processing
+
+    cuda_graph_supported = True
+except ImportError:
+    reason_for_no_cuda_graph = "Need newer TransformerEngine"
+
+
+def enable_forward_pre_hook(model_chunks):
+    for model_chunk in model_chunks:
+        assert isinstance(model_chunk, DDP)
+        model_chunk.enable_forward_pre_hook()
+
+
+def disable_forward_pre_hook(model_chunks, param_sync=True):
+    for model_chunk in model_chunks:
+        assert isinstance(model_chunk, DDP)
+        model_chunk.disable_forward_pre_hook(param_sync=param_sync)
+
+
+def should_disable_forward_pre_hook(args):
+    """Block forward pre-hook for certain configurations."""
+    return (
+        not args.use_megatron_fsdp and args.use_distributed_optimizer and args.overlap_param_gather
+    )
+
 
 class TestFP8Param:
 
@@ -41,6 +71,7 @@ def teardown_method(self, method):
         Utils.destroy_model_parallel()
         destroy_global_vars()
         destroy_num_microbatches_calculator()
+        gc.collect()
 
     def model_provider(
         self,
@@ -68,7 +99,15 @@ def model_provider(
         )
 
     def create_test_args(
-        self, tp, recipe, sequence_length, micro_batch_size, inference, fp8_param_gather, **kwargs
+        self,
+        tp,
+        recipe,
+        sequence_length,
+        micro_batch_size,
+        inference,
+        fp8_param_gather,
+        use_cuda_graph,
+        **kwargs,
     ):
         destroy_global_vars()
         destroy_num_microbatches_calculator()
@@ -102,6 +141,10 @@ def create_test_args(
         if recipe == "mxfp8" and fp8_param_gather:
             args.reuse_grad_buf_for_mxfp8_param_ag = True
 
+        if use_cuda_graph:
+            args.cuda_graph_impl = "transformer_engine"
+            args.cuda_graph_warmup_steps = 0
+
         for key, value in kwargs.items():
             assert hasattr(args, key)
             setattr(args, key, value)
@@ -122,7 +165,13 @@ def get_batch(self, seq_length, micro_batch_size):
         return input_ids, labels, position_ids, attention_mask, loss_mask
 
     def _run_test_helper(
-        self, tp_size, recipe, inference: bool = False, fp8_param_gather: bool = True, **kwargs
+        self,
+        tp_size,
+        recipe,
+        inference: bool = False,
+        fp8_param_gather: bool = True,
+        use_cuda_graph: bool = False,
+        **kwargs,
     ):
         """Test fp8_param with gpt_model."""
         args = self.create_test_args(
@@ -132,6 +181,7 @@ def _run_test_helper(
             self.micro_batch_size,
             inference,
             fp8_param_gather,
+            use_cuda_graph,
             **kwargs,
         )
 
@@ -143,6 +193,7 @@ def _run_test_helper(
         set_args(args)
         torch.manual_seed(_SEED)
         Utils.initialize_model_parallel(tensor_model_parallel_size=tp_size)
+
         input_ids, labels, position_ids, attention_mask, loss_mask = self.get_batch(
             self.seq_length, self.micro_batch_size
         )
@@ -158,6 +209,20 @@ def _run_test_helper(
             )
         assert len(gpt_model) == 1  # Assume only one model in the model provider.
 
+        cuda_graph_helper = None
+        # Hard coded to use cuda_graph_impl="transformer_engine"
+        cuda_graph_impl = "transformer_engine"
+        if use_cuda_graph and cuda_graph_impl == "transformer_engine":
+            from megatron.core.transformer.cuda_graphs import TECudaGraphHelper
+
+            cuda_graph_helper = TECudaGraphHelper(
+                model=gpt_model,
+                config=gpt_model[0].config,
+                seq_length=self.seq_length,
+                micro_batch_size=self.micro_batch_size,
+                optimizers=[optimizer],
+            )
+
         num_fp8_params = 0
         for _, param in gpt_model[0].named_parameters():
             if not inference:
@@ -182,6 +247,17 @@ def _run_test_helper(
                 gpt_model[0].zero_grad_buffer()
                 optimizer.zero_grad()
 
+            # Capture CUDA graphs after warmup if helper is provided.
+            # Hard coded cuda_graph_warmup_steps = 0.
+            cuda_graph_warmup_steps = 0
+            if cuda_graph_helper is not None and i == cuda_graph_warmup_steps:
+                if should_disable_forward_pre_hook(args):
+                    disable_forward_pre_hook(gpt_model, param_sync=False)
+                cuda_graph_helper.create_cudagraphs()
+                if should_disable_forward_pre_hook(args):
+                    enable_forward_pre_hook(gpt_model)
+                    cuda_graph_helper.cuda_graph_set_manual_hooks()
+
             # For the mxfp8_param with reuse_grad_buf_for_mxfp8_param_ag and dp_ag_overlap,
             # we need to call the _copy_main_params_to_param_buffer() after the grad buffer
             # is zeroed by zero_grad_buffer() because param and grad buffer are shared.
@@ -239,6 +315,15 @@ def run_test(self, tp_size, recipe, inference: bool = False, **kwargs):
                 )
                 torch.testing.assert_close(loss_list, loss_list_ref, atol=1e-4, rtol=1e-4)
 
+    def run_test_with_cuda_graph(self, tp_size, recipe, **kwargs):
+        loss = self._run_test_helper(
+            tp_size, recipe, fp8_param_gather=True, use_cuda_graph=True, **kwargs
+        )
+        loss_ref = self._run_test_helper(
+            tp_size, recipe, fp8_param_gather=True, use_cuda_graph=False, **kwargs
+        )
+        torch.testing.assert_close(loss, loss_ref, atol=0, rtol=0)
+
     @pytest.mark.skipif(not fp8_available, reason=reason_for_no_fp8)
     @pytest.mark.parametrize("tp_size", [2])
     @pytest.mark.parametrize("dp_overlap", [(True, True)])
@@ -246,6 +331,14 @@ def test_delayed_scaling(self, tp_size, dp_overlap):
         kwargs = {"overlap_param_gather": dp_overlap[0], "overlap_grad_reduce": dp_overlap[1]}
         self.run_test(tp_size=tp_size, recipe="delayed", **kwargs)
 
+    @pytest.mark.skipif(not fp8_available, reason=reason_for_no_fp8)
+    @pytest.mark.parametrize("tp_size", [2])
+    @pytest.mark.parametrize("dp_overlap", [(True, True)])
+    @pytest.mark.skipif(not cuda_graph_supported, reason=reason_for_no_cuda_graph)
+    def test_delayed_scaling_with_cuda_graph(self, tp_size, dp_overlap):
+        kwargs = {"overlap_param_gather": dp_overlap[0], "overlap_grad_reduce": dp_overlap[1]}
+        self.run_test_with_cuda_graph(tp_size, "delayed", **kwargs)
+
     @pytest.mark.skipif(not fp8_available, reason=reason_for_no_fp8)
     @pytest.mark.skipif(not is_te_min_version("2.2.0"), reason="TE 2.2.0 is required")
     @pytest.mark.parametrize("tp_size", [2])
@@ -260,6 +353,15 @@ def test_tensorwise_scaling(self, tp_size, dp_overlap):
     def test_tensorwise_scaling_inference(self, tp_size):
         self.run_test(tp_size=tp_size, recipe="tensorwise", inference=True)
 
+    @pytest.mark.skipif(not fp8_available, reason=reason_for_no_fp8)
+    @pytest.mark.skipif(not is_te_min_version("2.2.0"), reason="TE 2.2.0 is required")
+    @pytest.mark.parametrize("tp_size", [2])
+    @pytest.mark.parametrize("dp_overlap", [(True, True)])
+    @pytest.mark.skipif(not cuda_graph_supported, reason=reason_for_no_cuda_graph)
+    def test_tensorwise_scaling_with_cuda_graph(self, tp_size, dp_overlap):
+        kwargs = {"overlap_param_gather": dp_overlap[0], "overlap_grad_reduce": dp_overlap[1]}
+        self.run_test_with_cuda_graph(tp_size, "tensorwise", **kwargs)
+
     @pytest.mark.skipif(not fp8_available, reason=reason_for_no_fp8)
     @pytest.mark.skipif(not is_te_min_version("2.2.0"), reason="TE 2.2.0 is required")
     @pytest.mark.parametrize("tp_size", [2])
@@ -282,6 +384,18 @@ def test_blockwise_scaling(self, tp_size, dp_overlap):
         kwargs = {"overlap_param_gather": dp_overlap[0], "overlap_grad_reduce": dp_overlap[1]}
         self.run_test(tp_size=tp_size, recipe="blockwise")
 
+    @pytest.mark.skipif(
+        get_device_arch_version() != 9, reason="blockwise is only supported on Hopper architecture"
+    )
+    @pytest.mark.skipif(not fp8_available, reason=reason_for_no_fp8)
+    @pytest.mark.skipif(not is_te_min_version("2.4.0.dev0"), reason="TE 2.4.0.dev0 is required")
+    @pytest.mark.parametrize("tp_size", [2])
+    @pytest.mark.parametrize("dp_overlap", [(True, True)])
+    @pytest.mark.skipif(not cuda_graph_supported, reason=reason_for_no_cuda_graph)
+    def test_blockwise_scaling_with_cuda_graph(self, tp_size, dp_overlap):
+        kwargs = {"overlap_param_gather": dp_overlap[0], "overlap_grad_reduce": dp_overlap[1]}
+        self.run_test_with_cuda_graph(tp_size, "blockwise", **kwargs)
+
     @pytest.mark.skipif(
         get_device_arch_version() < 10, reason="MXFP8 is supported since Blackwell architecture"
     )
@@ -296,6 +410,21 @@ def test_mxfp8(self, tp_size, dp_overlap):
         kwargs = {"overlap_param_gather": dp_overlap[0], "overlap_grad_reduce": dp_overlap[1]}
         self.run_test(tp_size=tp_size, recipe="mxfp8", **kwargs)
 
+    @pytest.mark.skipif(
+        get_device_arch_version() < 10, reason="MXFP8 is supported since Blackwell architecture"
+    )
+    @pytest.mark.skipif(not fp8_available, reason=reason_for_no_fp8)
+    @pytest.mark.skipif(not is_te_min_version("2.3.0.dev0"), reason="TE 2.3.0.dev0 is required")
+    @pytest.mark.parametrize("tp_size", [2])
+    @pytest.mark.parametrize("dp_overlap", [(False, False), (False, True), (True, True)])
+    @pytest.mark.skipif(not cuda_graph_supported, reason=reason_for_no_cuda_graph)
+    def test_mxfp8_with_cuda_graph(self, tp_size, dp_overlap):
+        """
+        dp_overlap: (overlap_param_gather, overlap_grad_reduce)
+        """
+        kwargs = {"overlap_param_gather": dp_overlap[0], "overlap_grad_reduce": dp_overlap[1]}
+        self.run_test_with_cuda_graph(tp_size=tp_size, recipe="mxfp8", **kwargs)
+
     @pytest.mark.skipif(
         get_device_arch_version() != 9, reason="blockwise is only supported on Hopper architecture"
     )
diff --git a/tests/unit_tests/transformer/test_cuda_graphs.py b/tests/unit_tests/transformer/test_cuda_graphs.py
index 1302369266a..fb3567074f1 100644
--- a/tests/unit_tests/transformer/test_cuda_graphs.py
+++ b/tests/unit_tests/transformer/test_cuda_graphs.py
@@ -1,5 +1,6 @@
 # Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
+import gc
 import os
 import random
 import sys
@@ -763,6 +764,7 @@ def teardown_method(self, method):
         Utils.destroy_model_parallel()
         destroy_global_vars()
         destroy_num_microbatches_calculator()
+        gc.collect()
 
     def model_provider(
         self,

From ca683954ec293809c264e234cab31260e1dd64d7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Wed, 12 Nov 2025 09:04:31 +0000
Subject: [PATCH 120/334] remove workflow
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 .github/workflows/multi-approval-bot.yml | 75 ------------------------
 1 file changed, 75 deletions(-)
 delete mode 100644 .github/workflows/multi-approval-bot.yml

diff --git a/.github/workflows/multi-approval-bot.yml b/.github/workflows/multi-approval-bot.yml
deleted file mode 100644
index e8507605aa7..00000000000
--- a/.github/workflows/multi-approval-bot.yml
+++ /dev/null
@@ -1,75 +0,0 @@
-name: "Codeowners Approval Workflow"
-
-on:
-  push:
-    branches:
-      - "pull-request/[0-9]+"
-  merge_group:
-    types: [checks_requested]
-
-jobs:
-  pre-flight:
-    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_cicd_preflight.yml@v0.65.5
-    if: github.repository == 'NVIDIA/Megatron-LM'
-
-  codeowners-approval:
-    needs: [pre-flight]
-    runs-on: ubuntu-latest
-    environment: nemo-ci
-    if: |
-      !(needs.pre-flight.outputs.docs_only == 'true'
-      || needs.pre-flight.outputs.is_merge_group == 'true'
-      || needs.pre-flight.outputs.is_deployment_workflow == 'true')
-    steps:
-      - name: Get PR info
-        id: get-pr-info
-        if: startsWith(github.ref, 'refs/heads/pull-request/')
-        uses: nv-gha-runners/get-pr-info@main
-
-      - name: Checkout action
-        uses: actions/checkout@v3
-        with:
-          repository: noamelf/codeowner-multi-approval-action
-          ref: v0.1
-          path: codeowner-multi-approval-action
-
-      - name: Check Codeowners Approval
-        uses: ./codeowner-multi-approval-action
-        with:
-          pr-number: ${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number }}
-          repo-name: ${{ github.repository }}
-          github-token: ${{ secrets.PAT }}
-
-  multi-approval-bot-summary:
-    needs: [pre-flight, codeowners-approval]
-    if: |
-      (
-        needs.pre-flight.outputs.docs_only == 'true'
-        || needs.pre-flight.outputs.is_merge_group == 'true'
-        || needs.pre-flight.outputs.is_deployment_workflow == 'true'
-        || always()
-      )
-      && github.repository == 'NVIDIA/Megatron-LM'
-      && !cancelled()
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v4
-
-      - name: Result
-        env:
-          GH_TOKEN: ${{ github.token }}
-          GITHUB_RUN_ID: ${{ github.run_id }}
-          SKIPPING_IS_ALLOWED: ${{ needs.pre-flight.outputs.docs_only == 'true' || needs.pre-flight.outputs.is_deployment_workflow == 'true' || needs.pre-flight.outputs.is_merge_group == 'true' || needs.pre-flight.outputs.is_ci_workload == 'true' }}
-        run: |
-          FAILED_JOBS=$(gh run view $GITHUB_RUN_ID --json jobs --jq '[.jobs[] | select(.status == "completed" and .conclusion != "success")] | length') || echo 0
-
-          if [ "${FAILED_JOBS:-0}" -eq 0 ] || [ "$SKIPPING_IS_ALLOWED" == "true" ]; then
-              echo "✅ All previous jobs completed successfully"
-              exit 0
-          else
-              echo "❌ Found $FAILED_JOBS failed job(s)"
-              # Show which jobs failed
-              gh run view $GITHUB_RUN_ID --json jobs --jq '.jobs[] | select(.status == "completed" and .conclusion != "success") | .name'
-              exit 1
-          fi

From 1d502cda8254980f203d3a307aa18a45d7eaaefd Mon Sep 17 00:00:00 2001
From: Xin Yao <xiny@nvidia.com>
Date: Wed, 12 Nov 2025 18:44:21 +0800
Subject: [PATCH 121/334] [Dev] Reduce Overhead in Timers (#2208)

Signed-off-by: Xin Yao <xiny@nvidia.com>
---
 megatron/core/timers.py       | 18 +++++++----
 megatron/training/training.py | 57 +++++++++++++++++++----------------
 2 files changed, 43 insertions(+), 32 deletions(-)

diff --git a/megatron/core/timers.py b/megatron/core/timers.py
index 75387c33d3a..03d1f2bd25b 100644
--- a/megatron/core/timers.py
+++ b/megatron/core/timers.py
@@ -270,6 +270,9 @@ def _get_elapsed_time_all_ranks(self, names, reset, barrier):
             torch.tensor: Tensor of size [world_size, len(names)] with times in float.
         """
 
+        if len(names) == 0:
+            return None
+
         # First make sure all the callers are in sync.
         if barrier:
             torch.distributed.barrier()
@@ -302,16 +305,19 @@ def _get_global_min_max_time(self, names, reset, barrier, normalizer):
         """Report only min and max times across all ranks."""
 
         rank_name_to_time = self._get_elapsed_time_all_ranks(names, reset, barrier)
+        # Using Python built-in methods to avoid the overhead of PyTorch operations.
+        rank_name_to_time = (
+            rank_name_to_time.permute(1, 0).tolist() if rank_name_to_time is not None else None
+        )
         name_to_min_max_time = {}
         for i, name in enumerate(names):
-            rank_to_time = rank_name_to_time[:, i]
             # filter out the ones we did not have any timings for
-            rank_to_time = rank_to_time[rank_to_time > 0.0]
+            rank_to_time = list(filter(lambda x: x > 0.0, rank_name_to_time[i]))
             # If the timer exists:
-            if rank_to_time.numel() > 0:
+            if len(rank_to_time) > 0:
                 name_to_min_max_time[name] = (
-                    rank_to_time.min().item() / normalizer,
-                    rank_to_time.max().item() / normalizer,
+                    min(rank_to_time) / normalizer,
+                    max(rank_to_time) / normalizer,
                 )
         return name_to_min_max_time
 
@@ -427,7 +433,7 @@ def log(
         if rank is None:
             rank = torch.distributed.get_world_size() - 1
         if rank == torch.distributed.get_rank() and output_string is not None:
-            print(output_string, flush=True)
+            print(output_string, flush=True)  # pylint: disable=W0141
 
     def write(
         self,
diff --git a/megatron/training/training.py b/megatron/training/training.py
index 10c33029fdf..06dad540fed 100644
--- a/megatron/training/training.py
+++ b/megatron/training/training.py
@@ -1528,32 +1528,37 @@ def training_log(
     total_loss_dict[nan_iters_key] = total_loss_dict.get(nan_iters_key, 0) + int(got_nan)
 
     # Logging.
-    timers_to_log = [
-        'forward-backward',
-        'forward-compute',
-        'backward-compute',
-        'batch-generator',
-        'forward-recv',
-        'forward-send',
-        'backward-recv',
-        'backward-send',
-        'forward-send-forward-recv',
-        'forward-send-backward-recv',
-        'backward-send-forward-recv',
-        'backward-send-backward-recv',
-        'forward-backward-send-forward-backward-recv',
-        'layernorm-grads-all-reduce',
-        'embedding-grads-all-reduce',
-        'all-grads-sync',
-        'params-all-gather',
-        'optimizer-copy-to-main-grad',
-        'optimizer-unscale-and-check-inf',
-        'optimizer-clip-main-grad',
-        'optimizer-count-zeros',
-        'optimizer-inner-step',
-        'optimizer-copy-main-to-model-params',
-        'optimizer',
-    ]
+    timers_to_log = []
+    if args.timing_log_level >= 1:
+        timers_to_log.extend([
+            'forward-backward',
+            'layernorm-grads-all-reduce',
+            'embedding-grads-all-reduce',
+            'all-grads-sync',
+            'params-all-gather',
+            'optimizer-copy-to-main-grad',
+            'optimizer-unscale-and-check-inf',
+            'optimizer-clip-main-grad',
+            'optimizer-count-zeros',
+            'optimizer-inner-step',
+            'optimizer-copy-main-to-model-params',
+            'optimizer',
+        ])
+    if args.timing_log_level >= 2:
+        timers_to_log.extend([
+            'batch-generator',
+            'forward-compute',
+            'backward-compute',
+            'forward-recv',
+            'forward-send',
+            'backward-recv',
+            'backward-send',
+            'forward-send-forward-recv',
+            'forward-send-backward-recv',
+            'backward-send-forward-recv',
+            'backward-send-backward-recv',
+            'forward-backward-send-forward-backward-recv',
+        ])
     # Add timers from RL loop if needed.
     if getattr(args, 'perform_rl_step', False):
         timers_to_log.extend(['rollout-collection', 'inference-setup', 'collect-rollouts', 'postrollout-gc-collect',

From a2048c803924e4d587627adabec9691d1704902c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Wed, 12 Nov 2025 15:44:42 +0000
Subject: [PATCH 122/334] Revert "[DEV] Cherry-pick: M4 + Dist Checkpoint:
 Replace global parallel state with explicit group parameters (#2152)"

This reverts commit 56019e6376934aac077f593607a9517697e09682.
---
 .../core/extensions/transformer_engine.py     | 54 ++---------
 megatron/core/models/bert/bert_model.py       |  4 +-
 .../common/language_module/language_module.py | 19 +---
 megatron/core/models/gpt/gpt_model.py         |  8 +-
 .../cpu_offloading/hybrid_optimizer.py        |  2 +-
 .../core/post_training/modelopt/layers.py     | 12 +--
 megatron/core/ssm/gated_delta_net.py          | 18 +---
 megatron/core/ssm/mamba_block.py              |  7 +-
 megatron/core/ssm/mamba_mixer.py              | 24 +----
 megatron/core/tensor_parallel/layers.py       | 16 +--
 megatron/core/transformer/attention.py        |  1 -
 .../core/transformer/dot_product_attention.py |  9 +-
 megatron/core/transformer/mlp.py              |  2 +-
 megatron/core/transformer/module.py           | 18 +---
 megatron/core/transformer/moe/experts.py      | 62 +++++++++---
 megatron/core/transformer/moe/moe_layer.py    |  2 +-
 .../core/transformer/moe/shared_experts.py    |  8 +-
 .../transformer/multi_token_prediction.py     | 25 +----
 .../core/transformer/transformer_block.py     |  7 +-
 .../core/transformer/transformer_layer.py     |  1 -
 megatron/core/transformer/utils.py            | 59 +----------
 megatron/core/utils.py                        | 70 ++-----------
 megatron/training/checkpointing.py            | 97 ++++---------------
 .../dist_checkpointing/models/common.py       |  3 +-
 .../dist_checkpointing/models/test_mamba.py   |  2 -
 .../dist_checkpointing/models/test_mlp_glu.py |  3 -
 .../models/test_moe_experts.py                |  9 --
 .../dist_checkpointing/test_local.py          |  8 +-
 .../test_modelopt_module_spec.py              |  9 +-
 29 files changed, 118 insertions(+), 441 deletions(-)

diff --git a/megatron/core/extensions/transformer_engine.py b/megatron/core/extensions/transformer_engine.py
index 808ac14a2e4..e807ee54fbf 100644
--- a/megatron/core/extensions/transformer_engine.py
+++ b/megatron/core/extensions/transformer_engine.py
@@ -42,7 +42,6 @@
 from megatron.core.transformer.mlp import MLP
 from megatron.core.transformer.transformer_config import TransformerConfig
 from megatron.core.transformer.utils import (
-    ensure_metadata_has_dp_cp_group,
     is_layer_window_attention,
     make_sharded_tensors_for_checkpoint,
 )
@@ -421,9 +420,6 @@ def __init__(
                     # duplicated across TP ranks
                     setattr(param, "sequence_parallel", self.config.sequence_parallel)
 
-        tp_group = get_tensor_model_parallel_group_if_none(tp_group, is_expert=is_expert)
-        self._tp_group = tp_group
-
     def forward(self, x):
         """Forward."""
         _is_first_microbatch = (
@@ -448,14 +444,7 @@ def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None):
             self.parallel_mode is None
         ), "TELinear sharded_state_dict can only be used with duplicated parallel mode"
         state_dict = self.state_dict(prefix="", keep_vars=True)
-        return make_sharded_tensors_for_checkpoint(
-            state_dict,
-            prefix,
-            None,
-            sharded_offsets,
-            tp_group=self._tp_group,
-            dp_cp_group=metadata["dp_cp_group"],
-        )
+        return make_sharded_tensors_for_checkpoint(state_dict, prefix, None, sharded_offsets)
 
     def backward_dw(self):
         """Compute weight gradients during the backward pass if delay_wgrad_compute is enabled."""
@@ -503,7 +492,6 @@ def __init__(
 
         # TODO: For backward compatibility, remove in v0.15.
         tp_group = get_tensor_model_parallel_group_if_none(tp_group, is_expert=is_expert)
-        self._tp_group = tp_group
 
         # TE returns a zero length Tensor when bias=False and
         # return_bias=True, but we prefer None.  So in that case we
@@ -637,15 +625,9 @@ def forward(self, x):
 
     def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None):
         """Sharding along axis 0, bias sharded"""
-        metadata = ensure_metadata_has_dp_cp_group(metadata)
         state_dict = self.state_dict(prefix="", keep_vars=True)
         return make_sharded_tensors_for_checkpoint(
-            state_dict,
-            prefix,
-            {"weight": 0, "bias": 0},
-            sharded_offsets,
-            tp_group=self._tp_group,
-            dp_cp_group=metadata["dp_cp_group"],
+            state_dict, prefix, {"weight": 0, "bias": 0}, sharded_offsets
         )
 
     def __repr__(self):
@@ -688,7 +670,6 @@ def __init__(
         if gather_output:
             raise ValueError("Transformer Engine linear layers do not support gather_output = True")
         tp_group = get_tensor_model_parallel_group_if_none(tp_group, is_expert=is_expert)
-        self._tp_group = tp_group
         world_size = get_pg_size(tp_group)
         rank = get_pg_rank(tp_group)
 
@@ -739,12 +720,7 @@ def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None):
         """Sharding along axis 0, bias sharded"""
         state_dict = self.state_dict(prefix="", keep_vars=True)
         return make_sharded_tensors_for_checkpoint(
-            state_dict,
-            prefix,
-            {"weight": 0, "bias": 0},
-            sharded_offsets,
-            tp_group=self._tp_group,
-            dp_cp_group=metadata["dp_cp_group"],
+            state_dict, prefix, {"weight": 0, "bias": 0}, sharded_offsets
         )
 
     def __repr__(self):
@@ -788,7 +764,6 @@ def __init__(
                 "Transformer Engine linear layers do not support input_is_parallel = False"
             )
         tp_group = get_tensor_model_parallel_group_if_none(tp_group, is_expert=is_expert)
-        self._tp_group = tp_group
 
         super().__init__(
             input_size=input_size,
@@ -839,12 +814,7 @@ def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None):
         """Sharding along axis 1, bias not sharded"""
         state_dict = self.state_dict(prefix="", keep_vars=True)
         return make_sharded_tensors_for_checkpoint(
-            state_dict,
-            prefix,
-            {"weight": 1},
-            sharded_offsets,
-            tp_group=self._tp_group,
-            dp_cp_group=metadata["dp_cp_group"],
+            state_dict, prefix, {"weight": 1}, sharded_offsets
         )
 
     def __repr__(self):
@@ -931,7 +901,6 @@ def __init__(
                 assert hasattr(
                     pg_collection, "hcp"
                 ), "TEDotProductAttention pg_collection must have hierarchical cp pg"
-        self._tp_group = pg_collection.tp
 
         if is_te_min_version("0.10.0"):
             extra_kwargs["attention_type"] = attention_type
@@ -1109,12 +1078,7 @@ def sharded_state_dict(
         else:
             state_dict = {}
         return make_sharded_tensors_for_checkpoint(
-            state_dict,
-            prefix,
-            {'softmax_offset': 0},
-            sharded_offsets,
-            tp_group=self._tp_group,
-            dp_cp_group=metadata["dp_cp_group"],
+            state_dict, prefix, {'softmax_offset': 0}, sharded_offsets
         )
 
 
@@ -1174,7 +1138,6 @@ def __init__(
             # The comms between TP and EP group is explicitly handled by MoE token dispatcher.
             # So we disable comms by making TE agnostic of model parallel.
             tp_group = get_tensor_model_parallel_group_if_none(tp_group, is_expert=is_expert)
-            self._tp_group = tp_group
             tp_size = get_pg_size(tp_group)
 
             self.explicit_expert_comm = is_expert and (tp_size > 1 or self.expert_parallel)
@@ -1409,12 +1372,7 @@ def _sharded_state_dict_grouped(
                         (ep_axis, global_expert_idx, num_global_experts),
                     )
                 sub_sd = make_sharded_tensors_for_checkpoint(
-                    state_dict,
-                    '',
-                    tp_axis_map,
-                    new_sharded_offsets,
-                    tp_group=self._tp_group,
-                    dp_cp_group=metadata["dp_cp_group"],
+                    state_dict, '', tp_axis_map, new_sharded_offsets
                 )
                 # Remove expert layers indexing from sharded keys
                 replace_prefix_for_sharding(sub_sd, f"{gemm_idx}.", expert_prefix)
diff --git a/megatron/core/models/bert/bert_model.py b/megatron/core/models/bert/bert_model.py
index 0655a1e6167..b7b9bfc73f3 100644
--- a/megatron/core/models/bert/bert_model.py
+++ b/megatron/core/models/bert/bert_model.py
@@ -14,7 +14,6 @@
 from megatron.core.models.common.embeddings.language_model_embedding import LanguageModelEmbedding
 from megatron.core.models.common.embeddings.rotary_pos_embedding import RotaryEmbedding
 from megatron.core.models.common.language_module.language_module import LanguageModule
-from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.transformer.dot_product_attention import (
     DotProductAttention as MCoreDotProductAttention,
 )
@@ -74,10 +73,9 @@ def __init__(
         seq_len_interpolation_factor: Optional[float] = None,
         add_binary_head=True,
         return_embeddings=False,
-        pg_collection: Optional[ProcessGroupCollection] = None,
         vp_stage: Optional[int] = None,
     ):
-        super(BertModel, self).__init__(config=config, pg_collection=pg_collection)
+        super(BertModel, self).__init__(config=config)
 
         if has_config_logger_enabled(config):
             log_config_to_disk(config, locals(), prefix=type(self).__name__)
diff --git a/megatron/core/models/common/language_module/language_module.py b/megatron/core/models/common/language_module/language_module.py
index de2ecfb8011..8d6c1118a94 100644
--- a/megatron/core/models/common/language_module/language_module.py
+++ b/megatron/core/models/common/language_module/language_module.py
@@ -24,12 +24,7 @@
 from megatron.core.transformer.enums import AttnBackend
 from megatron.core.transformer.module import MegatronModule
 from megatron.core.transformer.transformer_config import TransformerConfig
-from megatron.core.transformer.utils import ensure_metadata_has_dp_cp_group
-from megatron.core.utils import (
-    get_tensor_model_parallel_group_if_none,
-    is_te_min_version,
-    make_tp_sharded_tensor_for_checkpoint,
-)
+from megatron.core.utils import is_te_min_version, make_tp_sharded_tensor_for_checkpoint
 
 
 class LanguageModule(MegatronModule):
@@ -49,7 +44,6 @@ def __init__(
             pg_collection = ProcessGroupCollection.use_mpu_process_groups()
         self.pg_collection = pg_collection
         self.cp_group = pg_collection.cp
-        self.tp_group = get_tensor_model_parallel_group_if_none(pg_collection.tp)
         self.pp_group = pg_collection.pp
         assert hasattr(self.pg_collection, 'embd'), (
             "pg_collection must have a embd. In previous version, it used default "
@@ -284,10 +278,6 @@ def sharded_state_dict(
             ShardedStateDict: sharded state dict for the LanguageModel
         """
         assert not sharded_offsets, "Unexpected sharded offsets"
-
-        # Guard for cases metadata is not provided
-        metadata = ensure_metadata_has_dp_cp_group(metadata)
-
         sharded_state_dict = super().sharded_state_dict(prefix, sharded_offsets, metadata)
 
         first_stage_word_emb_key = f'{prefix}embedding.word_embeddings.weight'
@@ -296,7 +286,7 @@ def sharded_state_dict(
 
         if self.share_embeddings_and_output_weights:
             self.tie_embeddings_and_output_weights_state_dict(
-                sharded_state_dict, output_layer_weight_key, first_stage_word_emb_key, metadata
+                sharded_state_dict, output_layer_weight_key, first_stage_word_emb_key
             )
         elif self.post_process:
             # Make sure the output layer follows the embeddings padding logic
@@ -313,7 +303,6 @@ def tie_embeddings_and_output_weights_state_dict(
         sharded_state_dict: ShardedStateDict,
         output_layer_weight_key: str,
         first_stage_word_emb_key: str,
-        metadata: Optional[dict] = None,
     ) -> None:
         """Ties the embedding and output weights in a given sharded state dict.
 
@@ -323,11 +312,9 @@ def tie_embeddings_and_output_weights_state_dict(
                 This entry will be replaced with a tied version
             first_stage_word_emb_key (str): this must be the same as the
                 ShardedTensor.key of the first stage word embeddings.
-            metadata (Optional[Dict]): metadata controlling sharded state dict creation.
 
         Returns: None, acts in-place
         """
-        metadata = ensure_metadata_has_dp_cp_group(metadata)
         if not self.post_process:
             # No output layer
             assert output_layer_weight_key not in sharded_state_dict, sharded_state_dict.keys()
@@ -360,6 +347,4 @@ def tie_embeddings_and_output_weights_state_dict(
             key=first_stage_word_emb_key,
             replica_id=last_stage_word_emb_replica_id,
             allow_shape_mismatch=True,
-            tp_group=self.tp_group,
-            dp_cp_group=metadata['dp_cp_group'],
         )
diff --git a/megatron/core/models/gpt/gpt_model.py b/megatron/core/models/gpt/gpt_model.py
index a1156012106..e676fc4367e 100644
--- a/megatron/core/models/gpt/gpt_model.py
+++ b/megatron/core/models/gpt/gpt_model.py
@@ -775,12 +775,6 @@ def sharded_state_dict(
         if self.mtp_process and not self.pre_process:
             emb_weight_key = f'{prefix}embedding.word_embeddings.weight'
             emb_weight = self.embedding.word_embeddings.weight
-            tie_word_embeddings_state_dict(
-                sharded_state_dict,
-                emb_weight,
-                emb_weight_key,
-                tp_group=self.tp_group,
-                dp_cp_group=metadata['dp_cp_group'],
-            )
+            tie_word_embeddings_state_dict(sharded_state_dict, emb_weight, emb_weight_key)
 
         return sharded_state_dict
diff --git a/megatron/core/optimizer/cpu_offloading/hybrid_optimizer.py b/megatron/core/optimizer/cpu_offloading/hybrid_optimizer.py
index 28487c3b367..6f9999f0803 100644
--- a/megatron/core/optimizer/cpu_offloading/hybrid_optimizer.py
+++ b/megatron/core/optimizer/cpu_offloading/hybrid_optimizer.py
@@ -52,7 +52,7 @@ def __init__(
         pin_cpu_grads: bool = True,
         pin_cpu_params: bool = True,
         overlap_cpu_optimizer_d2h_h2d: bool = True,
-        **kwargs,
+        **kwargs
     ):
         super(HybridDeviceOptimizer, self).__init__(
             params,
diff --git a/megatron/core/post_training/modelopt/layers.py b/megatron/core/post_training/modelopt/layers.py
index e502b81ac2f..0ca4a8e4070 100644
--- a/megatron/core/post_training/modelopt/layers.py
+++ b/megatron/core/post_training/modelopt/layers.py
@@ -1,6 +1,5 @@
 # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
 
-import logging
 from typing import Callable, List, Optional
 
 import torch
@@ -11,8 +10,6 @@
 from megatron.core.transformer.transformer_layer import TransformerLayer
 from megatron.core.transformer.utils import make_sharded_tensors_for_checkpoint
 
-logger = logging.getLogger(__name__)
-
 try:
     import transformer_engine as te
 
@@ -119,7 +116,6 @@ def __init__(
         tp_group: Optional[torch.distributed.ProcessGroup] = None,
     ):
         self.config = config
-        self.tp_group = tp_group
 
         self._return_bias = skip_bias_add and bias
 
@@ -157,11 +153,7 @@ def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None):
                 if v.ndim == 0:
                     state_dict[k] = v.view(1)
         sharded_state_dict = make_sharded_tensors_for_checkpoint(
-            state_dict,
-            prefix,
-            sharded_offsets=sharded_offsets,
-            tp_group=self.tp_group,
-            dp_cp_group=metadata['dp_cp_group'],
+            state_dict, prefix, sharded_offsets=sharded_offsets
         )
         return sharded_state_dict
 
@@ -237,7 +229,7 @@ def _report_quantize_tensor_info(self):
                 if not isinstance(v, torch.Tensor):
                     continue
                 original_dtype, original_shape = self._original_tensor_info.get(k, ("-", "-"))
-                logger.info(
+                print(
                     "{:<64} {:<16} {:<32} {:<16} {:<32}".format(
                         k, original_dtype, original_shape, str(v.dtype), str(v.shape)
                     )
diff --git a/megatron/core/ssm/gated_delta_net.py b/megatron/core/ssm/gated_delta_net.py
index 1bef6416ded..4df1c0df294 100644
--- a/megatron/core/ssm/gated_delta_net.py
+++ b/megatron/core/ssm/gated_delta_net.py
@@ -27,7 +27,6 @@
 from megatron.core.transformer.module import MegatronModule
 from megatron.core.transformer.spec_utils import ModuleSpec, build_module
 from megatron.core.transformer.utils import (
-    ensure_metadata_has_dp_cp_group,
     make_sharded_tensors_for_checkpoint,
     sharded_state_dict_default,
 )
@@ -413,11 +412,8 @@ def _apply_gated_norm(self, x, gate):
         y = y.to(x_dtype)
         return y
 
-    def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None, tp_group=None):
+    def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None):
         """Provide a sharded state dictionary for distributed checkpointing."""
-        # Guard for cases metadata is not provided
-        metadata = ensure_metadata_has_dp_cp_group(metadata)
-
         sharded_state_dict = {}
         # Parameters
         self._save_to_state_dict(sharded_state_dict, "", keep_vars=True)
@@ -429,11 +425,8 @@ def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None, tp_gr
                 "dt_bias": 0,
             },  # parameters sharded across TP
             sharded_offsets=sharded_offsets,
-            tp_group=(tp_group if tp_group is not None else self.pg_collection.tp),
-            dp_cp_group=metadata['dp_cp_group'],
         )
         # Submodules
-        tp_group = tp_group if tp_group is not None else self.pg_collection.tp
         for name, module in self.named_children():
             if name == "conv1d":
                 # Add TP sharding for Conv1d
@@ -442,16 +435,11 @@ def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None, tp_gr
                 if self.conv_bias:
                     tp_sharding_map[f"bias"] = 0
                 module_sharded_sd = make_sharded_tensors_for_checkpoint(
-                    module_sd,
-                    f"{prefix}{name}.",
-                    tp_sharding_map,
-                    sharded_offsets,
-                    tp_group=tp_group,
-                    dp_cp_group=metadata['dp_cp_group'],
+                    module_sd, f"{prefix}{name}.", tp_sharding_map, sharded_offsets
                 )
             else:
                 module_sharded_sd = sharded_state_dict_default(
-                    module, f"{prefix}{name}.", sharded_offsets, metadata, tp_group=tp_group
+                    module, f"{prefix}{name}.", sharded_offsets, metadata
                 )
 
             sharded_state_dict.update(module_sharded_sd)
diff --git a/megatron/core/ssm/mamba_block.py b/megatron/core/ssm/mamba_block.py
index 5f426ea347b..61ecd170140 100644
--- a/megatron/core/ssm/mamba_block.py
+++ b/megatron/core/ssm/mamba_block.py
@@ -139,7 +139,6 @@ def __init__(
         assert pg_collection is not None, "pg_collection must be provided for MambaStack"
 
         self.pp_group = pg_collection.pp
-        self.tp_group = pg_collection.tp
 
         # Required for pipeline parallel schedules
         self.input_tensor = None
@@ -418,11 +417,7 @@ def sharded_state_dict(
             if not module is self.layers:
                 sharded_state_dict.update(
                     sharded_state_dict_default(
-                        module,
-                        f'{prefix}{name}.',
-                        sharded_offsets,
-                        metadata,
-                        tp_group=self.tp_group,
+                        module, f'{prefix}{name}.', sharded_offsets, metadata
                     )
                 )
 
diff --git a/megatron/core/ssm/mamba_mixer.py b/megatron/core/ssm/mamba_mixer.py
index 00067783ffa..9eb79354dba 100644
--- a/megatron/core/ssm/mamba_mixer.py
+++ b/megatron/core/ssm/mamba_mixer.py
@@ -15,7 +15,6 @@
 import torch.nn as nn
 import torch.nn.functional as F
 
-from megatron.core import parallel_state
 from megatron.core.dist_checkpointing import ShardedTensor
 from megatron.core.dist_checkpointing.mapping import ReplicaId, ShardedTensorFactory
 from megatron.core.inference.contexts import BaseInferenceContext, DynamicInferenceContext
@@ -25,7 +24,6 @@
 from megatron.core.transformer.module import MegatronModule
 from megatron.core.transformer.spec_utils import ModuleSpec, build_module
 from megatron.core.transformer.utils import (
-    ensure_metadata_has_dp_cp_group,
     make_sharded_tensors_for_checkpoint,
     sharded_state_dict_default,
 )
@@ -76,16 +74,9 @@ class ExtendedRMSNorm(RMSNormGated):
 
     def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None):
         """Sharding along axis 0, bias not sharded"""
-        if not hasattr(self, 'tp_group'):
-            self.tp_group = parallel_state.get_tensor_model_parallel_group()
         state_dict = self.state_dict(prefix="", keep_vars=True)
         return make_sharded_tensors_for_checkpoint(
-            state_dict,
-            prefix,
-            {"weight": 0},
-            sharded_offsets,
-            tp_group=self.tp_group,
-            dp_cp_group=metadata["dp_cp_group"],
+            state_dict, prefix, {"weight": 0}, sharded_offsets
         )
 
 
@@ -394,7 +385,6 @@ def __init__(
             D_cp1=self.D,
             D_has_hdim=self.D_has_hdim,
         )
-        self.tp_group = pg_collection.tp
 
     def forward(
         self,
@@ -806,9 +796,6 @@ def _get_states_from_cache(self, inference_context, batch_size, *, inference_par
 
     def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None):
         """Provide a sharded state dictionary for distributed checkpointing."""
-        # Guard for cases metadata is not provided
-        metadata = ensure_metadata_has_dp_cp_group(metadata)
-
         sharded_state_dict = {}
         # Parameters
         self._save_to_state_dict(sharded_state_dict, "", keep_vars=True)
@@ -828,17 +815,12 @@ def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None):
                 # Add TP sharding for Conv1d
                 module_sd = module.state_dict(prefix="", keep_vars=True)
                 module_sharded_sd = make_sharded_tensors_for_checkpoint(
-                    module_sd,
-                    f"{prefix}{name}.",
-                    {f"weight": 0, f"bias": 0},
-                    sharded_offsets,
-                    tp_group=self.tp_group,
-                    dp_cp_group=metadata['dp_cp_group'],
+                    module_sd, f"{prefix}{name}.", {f"weight": 0, f"bias": 0}, sharded_offsets
                 )
 
             else:
                 module_sharded_sd = sharded_state_dict_default(
-                    module, f"{prefix}{name}.", sharded_offsets, metadata, tp_group=self.tp_group
+                    module, f"{prefix}{name}.", sharded_offsets, metadata
                 )
 
             sharded_state_dict.update(module_sharded_sd)
diff --git a/megatron/core/tensor_parallel/layers.py b/megatron/core/tensor_parallel/layers.py
index 221f3327e50..e79d55b9fa3 100644
--- a/megatron/core/tensor_parallel/layers.py
+++ b/megatron/core/tensor_parallel/layers.py
@@ -320,8 +320,6 @@ def sharded_state_dict(
                 key=weight_prefix,
                 allow_shape_mismatch=True,
                 prepend_offsets=sharded_offsets,
-                tp_group=self.tp_group,
-                dp_cp_group=metadata["dp_cp_group"],
             )
         }
 
@@ -1066,12 +1064,7 @@ def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None):
         """Sharding along axis 0, bias sharded"""
         state_dict = self.state_dict(prefix="", keep_vars=True)
         return make_sharded_tensors_for_checkpoint(
-            state_dict,
-            prefix,
-            {"weight": 0, "bias": 0},
-            sharded_offsets,
-            tp_group=self.tp_group,
-            dp_cp_group=metadata['dp_cp_group'],
+            state_dict, prefix, {"weight": 0, "bias": 0}, sharded_offsets
         )
 
     def set_extra_state(self, state: Any):
@@ -1317,12 +1310,7 @@ def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None):
         """Sharding along axis 1, bias not sharded"""
         state_dict = self.state_dict(prefix="", keep_vars=True)
         return make_sharded_tensors_for_checkpoint(
-            state_dict,
-            prefix,
-            {"weight": 1},
-            sharded_offsets,
-            tp_group=self.tp_group,
-            dp_cp_group=metadata['dp_cp_group'],
+            state_dict, prefix, {"weight": 1}, sharded_offsets
         )
 
     def set_extra_state(self, state: Any):
diff --git a/megatron/core/transformer/attention.py b/megatron/core/transformer/attention.py
index be52fe10d20..9426fc5535b 100644
--- a/megatron/core/transformer/attention.py
+++ b/megatron/core/transformer/attention.py
@@ -176,7 +176,6 @@ def __init__(
                 pg_collection, 'cp'
             ), "Attention pg_collection must have cp process group"
         self.pg_collection = pg_collection
-        self.tp_group = pg_collection.tp
 
         # Per attention head and per partition values
         world_size = get_pg_size(self.pg_collection.tp)
diff --git a/megatron/core/transformer/dot_product_attention.py b/megatron/core/transformer/dot_product_attention.py
index 32f9a08cfa1..f3711c86ebd 100644
--- a/megatron/core/transformer/dot_product_attention.py
+++ b/megatron/core/transformer/dot_product_attention.py
@@ -71,8 +71,6 @@ def __init__(
             assert hasattr(
                 pg_collection, 'tp'
             ), "DotProductAttention pg_collection must have tp process group"
-        self.pg_collection = pg_collection
-        self.tp_group = self.pg_collection.tp
 
         world_size = pg_collection.tp.size()
         self.hidden_size_per_partition = divide(projection_size, world_size)
@@ -262,10 +260,5 @@ def sharded_state_dict(
         else:
             state_dict = {}
         return make_sharded_tensors_for_checkpoint(
-            state_dict,
-            prefix,
-            {'softmax_offset': 0},
-            sharded_offsets,
-            tp_group=self.tp_group,
-            dp_cp_group=metadata['dp_cp_group'],
+            state_dict, prefix, {'softmax_offset': 0}, sharded_offsets
         )
diff --git a/megatron/core/transformer/mlp.py b/megatron/core/transformer/mlp.py
index 8dcf196da94..9602beb2f71 100644
--- a/megatron/core/transformer/mlp.py
+++ b/megatron/core/transformer/mlp.py
@@ -87,7 +87,7 @@ def __init__(
 
         self.input_size = input_size if input_size != None else self.config.hidden_size
 
-        self.tp_group = get_tensor_model_parallel_group_if_none(tp_group, is_expert=is_expert)
+        tp_group = get_tensor_model_parallel_group_if_none(tp_group, is_expert=is_expert)
         if ffn_hidden_size is None:
             if is_expert:
                 raise ValueError("MoE MLP requires `ffn_hidden_size`, but it was not provided.")
diff --git a/megatron/core/transformer/module.py b/megatron/core/transformer/module.py
index 1058a207b12..4fdcacb791b 100644
--- a/megatron/core/transformer/module.py
+++ b/megatron/core/transformer/module.py
@@ -11,7 +11,6 @@
 from megatron.core.dist_checkpointing.mapping import ShardedStateDict
 from megatron.core.transformer.transformer_config import TransformerConfig
 from megatron.core.transformer.utils import (
-    ensure_metadata_has_dp_cp_group,
     make_sharded_tensors_for_checkpoint,
     sharded_state_dict_default,
 )
@@ -78,26 +77,13 @@ def sharded_state_dict(
         sharded_state_dict = {}
         # Save parameters
         self._save_to_state_dict(sharded_state_dict, '', keep_vars=True)
-        if not hasattr(self, 'tp_group'):
-            # some model interface hasn't updated for m4, fallback needed
-            tp_group = parallel_state.get_tensor_model_parallel_group()
-        else:
-            tp_group = self.tp_group
-        # Guard for cases metadata is not provided
-        metadata = ensure_metadata_has_dp_cp_group(metadata)
         sharded_state_dict = make_sharded_tensors_for_checkpoint(
-            sharded_state_dict,
-            prefix,
-            sharded_offsets=sharded_offsets,
-            tp_group=tp_group,
-            dp_cp_group=metadata['dp_cp_group'],
+            sharded_state_dict, prefix, sharded_offsets=sharded_offsets
         )
         # Recurse into submodules
         for name, module in self.named_children():
             sharded_state_dict.update(
-                sharded_state_dict_default(
-                    module, f'{prefix}{name}.', sharded_offsets, metadata, tp_group=tp_group
-                )
+                sharded_state_dict_default(module, f'{prefix}{name}.', sharded_offsets, metadata)
             )
         return sharded_state_dict
 
diff --git a/megatron/core/transformer/moe/experts.py b/megatron/core/transformer/moe/experts.py
index 8bb5caddc4b..ca308da0d21 100644
--- a/megatron/core/transformer/moe/experts.py
+++ b/megatron/core/transformer/moe/experts.py
@@ -3,7 +3,7 @@
 import copy
 import itertools
 from copy import deepcopy
-from functools import partial
+from functools import partial, wraps
 from math import ceil
 from typing import Optional, Tuple
 
@@ -11,7 +11,7 @@
 import torch.nn.functional as F
 from torch.nn.parameter import Parameter
 
-from megatron.core import tensor_parallel
+from megatron.core import parallel_state, tensor_parallel
 from megatron.core.activations import squared_relu
 from megatron.core.dist_checkpointing import ShardedTensor
 from megatron.core.dist_checkpointing.mapping import (
@@ -45,7 +45,6 @@
 from megatron.core.transformer.spec_utils import build_module
 from megatron.core.transformer.transformer_config import TransformerConfig
 from megatron.core.transformer.utils import (
-    ensure_metadata_has_dp_cp_group,
     make_sharded_object_for_checkpoint,
     sharded_state_dict_default,
 )
@@ -62,6 +61,49 @@
     HAVE_TE = False
 
 
+# TODO(Hepteract): delete the usage of the global parallel_state.
+# Currently we still have to use the global parallel_state in expert_dist_ckpt_decorator(),
+# in order to set sub-module's process group while getting sharded_state_dict.
+# After sub-module's refactoring is done, we can pass pg_collection to sub-module
+# and delete the function expert_dist_ckpt_decorator.
+def expert_dist_ckpt_decorator(func):
+    """Decorator of shared_state_dict in expert layer for distributed checkpoint.
+
+    Since !1940, the TP size for Expert layer can be different with Attention.
+    To make distributed checkpoint work in such cases, we use a decorator to
+    replace the default TP parallel states with expert-TP parallel states.
+    """
+
+    @wraps(func)
+    def wrapper(*args, **kwargs):
+        # Store original states
+        original_rank = parallel_state._MPU_TENSOR_MODEL_PARALLEL_RANK
+        original_size = parallel_state._MPU_TENSOR_MODEL_PARALLEL_WORLD_SIZE
+        original_group = parallel_state._TENSOR_MODEL_PARALLEL_GROUP
+        try:
+            # Set new states
+            parallel_state._MPU_TENSOR_MODEL_PARALLEL_RANK = (
+                parallel_state.get_expert_tensor_parallel_rank()
+            )
+            parallel_state._MPU_TENSOR_MODEL_PARALLEL_WORLD_SIZE = (
+                parallel_state.get_expert_tensor_parallel_world_size()
+            )
+            parallel_state._TENSOR_MODEL_PARALLEL_GROUP = (
+                parallel_state.get_expert_tensor_parallel_group()
+            )
+
+            # Execute the function
+            result = func(*args, **kwargs)
+        finally:
+            # Restore original states
+            parallel_state._MPU_TENSOR_MODEL_PARALLEL_RANK = original_rank
+            parallel_state._MPU_TENSOR_MODEL_PARALLEL_WORLD_SIZE = original_size
+            parallel_state._TENSOR_MODEL_PARALLEL_GROUP = original_group
+        return result
+
+    return wrapper
+
+
 class GroupedMLP(MegatronModule):
     """An efficient implementation of the Experts layer using GroupedGEMM.
 
@@ -288,6 +330,7 @@ def forward(
 
         return fc2_output, None
 
+    @expert_dist_ckpt_decorator
     def sharded_state_dict(self, prefix='', sharded_offsets=(), metadata=None):
         """
         Maps local expert to global experts.
@@ -746,7 +789,6 @@ def __init__(
         ), "bias_dropout_fusion is not supported in TEGroupedMLP when add_bias_linear=True"
 
         self.ep_group = pg_collection.ep
-        self.tp_group = pg_collection.expt_tp
 
         # Double the output width with gated linear unit, see https://arxiv.org/pdf/2002.05202.pdf
         ffn_hidden_size = self.config.moe_ffn_hidden_size
@@ -981,6 +1023,7 @@ def glu(x):
 
         return output, output_bias
 
+    @expert_dist_ckpt_decorator
     def sharded_state_dict(
         self, prefix: str = '', sharded_offsets: tuple = (), metadata: Optional[dict] = None
     ) -> ShardedStateDict:
@@ -988,14 +1031,10 @@ def sharded_state_dict(
         Maps local expert to global experts.
         The sharded state dict is interchangable with SequentialMLP's.
         """
-        # Guard for cases metadata is not provided
-        metadata = ensure_metadata_has_dp_cp_group(metadata)
         singleton_local_shards = (metadata or {}).get('singleton_local_shards', False)
         sharded_state_dict = {}
         for name, module in self._modules.items():
-            sub_sd = sharded_state_dict_default(
-                module, f'{name}.', sharded_offsets, metadata, tp_group=self.tp_group
-            )
+            sub_sd = sharded_state_dict_default(module, f'{name}.', sharded_offsets, metadata)
             if name == 'linear_fc1' and self.config.gated_linear_unit:
                 num_global_experts = self.ep_group.size() * self.num_local_experts
                 local_expert_indices_offset = self.ep_group.rank() * self.num_local_experts
@@ -1059,7 +1098,6 @@ def __init__(
         self.num_local_experts = num_local_experts
         self.local_experts = torch.nn.ModuleList()
         self.ep_group = pg_collection.ep
-        self.tp_group = pg_collection.expt_tp
         # use pg_collection.expt_dp_group as data parallel group in this module.
         # TODO (Hepteract): expt_dp wont be needed here once distributed checkpoint is refactored
         self.dp_group = pg_collection.expt_dp
@@ -1155,11 +1193,9 @@ def backward_dw(self):
         for expert in self.local_experts:
             expert.backward_dw()
 
+    @expert_dist_ckpt_decorator
     def sharded_state_dict(self, prefix='', sharded_offsets=(), metadata=None):
         """Maps local expert to global experts."""
-        # Guard for cases metadata is not provided
-        metadata = ensure_metadata_has_dp_cp_group(metadata)
-
         sharded_state_dict = {}
         num_global_experts = self.ep_group.size() * self.num_local_experts
         local_expert_indices_offset = self.ep_group.rank() * self.num_local_experts
diff --git a/megatron/core/transformer/moe/moe_layer.py b/megatron/core/transformer/moe/moe_layer.py
index 095e6526934..893b2e7b99a 100644
--- a/megatron/core/transformer/moe/moe_layer.py
+++ b/megatron/core/transformer/moe/moe_layer.py
@@ -127,7 +127,7 @@ def __init__(
 
         # Initialize router
         self.router = TopKRouter(config=self.config, pg_collection=pg_collection)
-        self.tp_group = pg_collection.tp
+
         # Initialize token dispatcher
         if config.moe_token_dispatcher_type == "allgather":
             self.token_dispatcher = MoEAllGatherTokenDispatcher(
diff --git a/megatron/core/transformer/moe/shared_experts.py b/megatron/core/transformer/moe/shared_experts.py
index c63e074e1b1..93e6ad04531 100644
--- a/megatron/core/transformer/moe/shared_experts.py
+++ b/megatron/core/transformer/moe/shared_experts.py
@@ -49,7 +49,7 @@ def __init__(
 
         config.ffn_hidden_size = config.moe_shared_expert_intermediate_size
         # TODO(Hepteract): pass pg_collection to MLP after refactoring MLP
-        super().__init__(config=config, submodules=submodules, tp_group=pg_collection.tp)
+        super().__init__(config=config, submodules=submodules)
 
         self.use_shared_expert_gate = gate
         if self.use_shared_expert_gate:
@@ -137,11 +137,7 @@ def sharded_state_dict(
             state_dict = self.state_dict(prefix='', keep_vars=True)
             sub_sd = {
                 f'{prefix}{name}': make_sharded_tensor_for_checkpoint(
-                    state_dict[name],
-                    f'{prefix}{name}',
-                    prepend_offsets=sharded_offsets,
-                    tp_group=self.tp_group,
-                    dp_cp_group=metadata['dp_cp_group'],
+                    state_dict[name], f'{prefix}{name}', prepend_offsets=sharded_offsets
                 )
             }
             sharded_state_dict.update(sub_sd)
diff --git a/megatron/core/transformer/multi_token_prediction.py b/megatron/core/transformer/multi_token_prediction.py
index 94fbfb23677..d8d20039e45 100755
--- a/megatron/core/transformer/multi_token_prediction.py
+++ b/megatron/core/transformer/multi_token_prediction.py
@@ -28,7 +28,6 @@
 from megatron.core.transformer.transformer_config import TransformerConfig
 from megatron.core.transformer.transformer_layer import get_transformer_layer_offset
 from megatron.core.utils import (
-    get_pg_rank,
     is_torch_min_version,
     make_tp_sharded_tensor_for_checkpoint,
     make_viewless_tensor,
@@ -57,11 +56,7 @@
 
 
 def tie_word_embeddings_state_dict(
-    sharded_state_dict: ShardedStateDict,
-    word_emb_weight: Tensor,
-    word_emb_weight_key: str,
-    tp_group: torch.distributed.ProcessGroup,
-    dp_cp_group: torch.distributed.ProcessGroup,
+    sharded_state_dict: ShardedStateDict, word_emb_weight: Tensor, word_emb_weight_key: str
 ) -> None:
     """tie the embedding of the mtp processing stage in a given sharded state dict.
 
@@ -69,15 +64,13 @@ def tie_word_embeddings_state_dict(
         sharded_state_dict (ShardedStateDict): state dict with the weight to tie.
         word_emb_weight (Tensor): weight of the word embedding.
         word_emb_weight_key (str): key of the word embedding in the sharded state dict.
-        tp_group (torch.distributed.ProcessGroup): The tensor parallel group
-        dp_cp_group (torch.distributed.ProcessGroup): The dp-cp comm group
 
     Returns: None, acts in-place
     """
     mtp_word_emb_replica_id = (
         1,  # copy of embedding in pre processing stage
         0,
-        get_pg_rank(dp_cp_group),
+        parallel_state.get_data_parallel_rank(with_context_parallel=True),
     )
     assert word_emb_weight_key in sharded_state_dict
     del sharded_state_dict[word_emb_weight_key]
@@ -86,17 +79,11 @@ def tie_word_embeddings_state_dict(
         key=word_emb_weight_key,
         replica_id=mtp_word_emb_replica_id,
         allow_shape_mismatch=True,
-        tp_group=tp_group,
-        dp_cp_group=dp_cp_group,
     )
 
 
 def tie_output_layer_state_dict(
-    sharded_state_dict: ShardedStateDict,
-    output_layer_weight: Tensor,
-    output_layer_weight_key: str,
-    tp_group: torch.distributed.ProcessGroup,
-    dp_cp_group: torch.distributed.ProcessGroup,
+    sharded_state_dict: ShardedStateDict, output_layer_weight: Tensor, output_layer_weight_key: str
 ) -> None:
     """tie the output layer of the mtp processing stage in a given sharded state dict.
 
@@ -104,15 +91,13 @@ def tie_output_layer_state_dict(
         sharded_state_dict (ShardedStateDict): state dict with the weight to tie.
         output_layer_weight (Tensor): weight of the output layer.
         output_layer_weight_key (str): key of the output layer in the sharded state dict.
-        tp_group (torch.distributed.ProcessGroup): The tensor parallel group
-        dp_cp_group (torch.distributed.ProcessGroup): The dp-cp comm group
 
     Returns: None, acts in-place
     """
     mtp_output_layer_replica_id = (
         1,  # copy of output layer in post processing stage
         0,
-        get_pg_rank(dp_cp_group),
+        parallel_state.get_data_parallel_rank(with_context_parallel=True),
     )
     assert output_layer_weight_key in sharded_state_dict
     del sharded_state_dict[output_layer_weight_key]
@@ -121,8 +106,6 @@ def tie_output_layer_state_dict(
         key=output_layer_weight_key,
         replica_id=mtp_output_layer_replica_id,
         allow_shape_mismatch=True,
-        tp_group=tp_group,
-        dp_cp_group=dp_cp_group,
     )
 
 
diff --git a/megatron/core/transformer/transformer_block.py b/megatron/core/transformer/transformer_block.py
index 6f69927e9e8..c3187350c43 100755
--- a/megatron/core/transformer/transformer_block.py
+++ b/megatron/core/transformer/transformer_block.py
@@ -284,7 +284,6 @@ def __init__(
         if pg_collection is None:
             pg_collection = ProcessGroupCollection.use_mpu_process_groups()
         self.pg_collection = pg_collection
-        self.tp_group = pg_collection.tp
 
         pp_group = self.pg_collection.pp if hasattr(self.pg_collection, 'pp') else None
         pp_rank = get_pg_rank(pp_group)
@@ -852,11 +851,7 @@ def sharded_state_dict(
             if not module is self.layers:
                 sharded_state_dict.update(
                     sharded_state_dict_default(
-                        module,
-                        f'{prefix}{name}.',
-                        sharded_offsets,
-                        metadata,
-                        tp_group=self.tp_group,
+                        module, f'{prefix}{name}.', sharded_offsets, metadata
                     )
                 )
 
diff --git a/megatron/core/transformer/transformer_layer.py b/megatron/core/transformer/transformer_layer.py
index c322788af2f..cacfb9d01b8 100644
--- a/megatron/core/transformer/transformer_layer.py
+++ b/megatron/core/transformer/transformer_layer.py
@@ -273,7 +273,6 @@ def __init__(
         if pg_collection is None:
             pg_collection = ProcessGroupCollection.use_mpu_process_groups()
         self.pg_collection = pg_collection
-        self.tp_group = pg_collection.tp
 
         self.submodules_config = submodules
         self.layer_number = layer_number + get_transformer_layer_offset(
diff --git a/megatron/core/transformer/utils.py b/megatron/core/transformer/utils.py
index 880c5309933..373c06f0991 100644
--- a/megatron/core/transformer/utils.py
+++ b/megatron/core/transformer/utils.py
@@ -10,8 +10,6 @@
 from megatron.core.dist_checkpointing.mapping import ShardedObject, ShardedStateDict, StateDict
 from megatron.core.jit import jit_fuser
 from megatron.core.utils import (
-    get_pg_rank,
-    get_tensor_model_parallel_group_if_none,
     make_sharded_tensor_for_checkpoint,
     make_tp_sharded_tensor_for_checkpoint,
 )
@@ -78,8 +76,6 @@ def make_sharded_tensors_for_checkpoint(
     tensor_parallel_layers_axis_map: Optional[Dict[str, int]] = None,
     sharded_offsets: Iterable[Tuple[int, int, int]] = (),
     extra_state_suffix: str = '_extra_state',
-    tp_group: Optional[torch.distributed.ProcessGroup] = None,
-    dp_cp_group: Optional[torch.distributed.ProcessGroup] = None,
 ):
     """Wraps tensors from transformer layers with ShardedTensor or ShardedObject.
 
@@ -97,52 +93,31 @@ def make_sharded_tensors_for_checkpoint(
             applied (e.g. PP related), passed along to ShardedTensor
         extra_state_suffix (str, default = '_extra_state'): layers with this
             suffix will be wrapped with ShardedObject instead of ShardedTensor.
-        tp_group (Optional[torch.distributed.ProcessGroup], optional): tensor parallel group.
-            If None, defaults to parallel_state.get_tensor_model_parallel_group()
-        dp_cp_group (Optional[torch.distributed.ProcessGroup], optional): data parallel group
-            with context parallel. If None, defaults to
-            parallel_state.get_data_parallel_group(with_context_parallel=True)
 
     """
 
     if tensor_parallel_layers_axis_map is None:
         tensor_parallel_layers_axis_map = {}
 
-    if tp_group is None and dp_cp_group is None:
-        tp_group = get_tensor_model_parallel_group_if_none(tp_group)
-        dp_cp_group = parallel_state.get_data_parallel_group(with_context_parallel=True)
-
     sharded_state_dict = {}
     for layer_name in state_dict.keys():
         tensor = state_dict[layer_name]
         layer_key = f'{prefix}{layer_name}'
 
         if layer_name.endswith(extra_state_suffix):
-            # Compute replica_id when groups are provided
-            replica_id = (0, get_pg_rank(tp_group), get_pg_rank(dp_cp_group))
-
             sharded_state_dict[layer_key] = make_sharded_object_for_checkpoint(
-                tensor, layer_key, sharded_offsets, replica_id=replica_id
+                tensor, layer_key, sharded_offsets
             )
 
         elif layer_name in tensor_parallel_layers_axis_map:
             tp_axis = tensor_parallel_layers_axis_map[layer_name]
             sharded_state_dict[layer_key] = make_tp_sharded_tensor_for_checkpoint(
-                tensor,
-                layer_key,
-                tp_axis,
-                prepend_offsets=sharded_offsets,
-                tp_group=tp_group,
-                dp_cp_group=dp_cp_group,
+                tensor, layer_key, tp_axis, prepend_offsets=sharded_offsets
             )
 
         else:
             sharded_state_dict[layer_key] = make_sharded_tensor_for_checkpoint(
-                tensor,
-                layer_key,
-                prepend_offsets=sharded_offsets,
-                tp_group=tp_group,
-                dp_cp_group=dp_cp_group,
+                tensor, layer_key, prepend_offsets=sharded_offsets
             )
 
     return sharded_state_dict
@@ -191,27 +166,11 @@ def _get_extra_state_offsets(
     return extra_state_shape, extra_state_offset
 
 
-def ensure_metadata_has_dp_cp_group(metadata: Optional[dict]) -> dict:
-    """Ensure `metadata` is a dict containing `dp_cp_group` entry.
-
-    If `metadata` is None, a new dict is returned with `dp_cp_group` set.
-    If `metadata` is a dict and missing `dp_cp_group`, it is updated in-place.
-    Otherwise, asserts that `dp_cp_group` exists.
-    """
-    if metadata is None:
-        return {'dp_cp_group': parallel_state.get_data_parallel_group(with_context_parallel=True)}
-    assert isinstance(metadata, dict), "metadata must be a dict with dp_cp_group as key"
-    if 'dp_cp_group' not in metadata:
-        metadata['dp_cp_group'] = parallel_state.get_data_parallel_group(with_context_parallel=True)
-    return metadata
-
-
 def sharded_state_dict_default(
     module: torch.nn.Module,
     prefix: str = '',
     sharded_offsets: Tuple[Tuple[int, int, int]] = (),
     metadata: Optional[dict] = None,
-    tp_group: Optional[torch.distributed.ProcessGroup] = None,
 ) -> ShardedStateDict:
     """Provides implementation for sharded_state_dict method for non-MegatronModules.
 
@@ -227,16 +186,11 @@ def sharded_state_dict_default(
         sharded_offsets (Tuple[Tuple[int, int, int]], optional): sharding already
             applied (e.g. PP related) by sup-modules. Passed along to ShardedTensor
         metadata (dict, optional): metadata passed to module sharded_state_dict method
-        tp_group (Optional[torch.distributed.ProcessGroup], optional): tensor parallel group.
-            If None, defaults to parallel_state.get_tensor_model_parallel_group()
 
     Returns:
         dict: dictionary of state dict keys mapped to ShardedTensors
     """
 
-    # Guard for cases metadata is not provided
-    metadata = ensure_metadata_has_dp_cp_group(metadata)
-
     if hasattr(module, 'sharded_state_dict'):
         module_sharded_sd = module.sharded_state_dict(
             prefix=prefix, sharded_offsets=sharded_offsets, metadata=metadata
@@ -244,12 +198,7 @@ def sharded_state_dict_default(
     else:
         module_sd = module.state_dict(prefix='', keep_vars=True)
         module_sharded_sd = make_sharded_tensors_for_checkpoint(
-            module_sd,
-            prefix,
-            {},
-            sharded_offsets,
-            tp_group=tp_group,
-            dp_cp_group=metadata['dp_cp_group'],
+            module_sd, prefix, {}, sharded_offsets
         )
     return module_sharded_sd
 
diff --git a/megatron/core/utils.py b/megatron/core/utils.py
index 9947b8da683..abfaf7f6320 100644
--- a/megatron/core/utils.py
+++ b/megatron/core/utils.py
@@ -793,42 +793,15 @@ def make_tp_sharded_tensor_for_checkpoint(
     is sharded across TP group.
 
     Optionally, can provide offsets which prepend new dimensions to the tensor.
-
-    Args:
-        tensor: Tensor to shard
-        key: Key for the sharded tensor
-        tp_axis: Axis to shard across tensor parallel group (default: 0)
-        replica_id: Replica ID for the tensor (default: None)
-        prepend_offsets: Offsets to prepend to tensor dimensions (default: ())
-        **kwargs: Additional arguments. May include:
-            - tp_group: Tensor parallel group
-            - dp_cp_group: Data parallel + context parallel group
     """
-    # Pop group parameters from kwargs
-    tp_group = kwargs.pop('tp_group', None)
-    dp_cp_group = kwargs.pop('dp_cp_group', None)
-    # If there are any additional kwargs left, surface them for visibility
-    # (these will be forwarded to ShardedTensor.from_rank_offsets).
-    if kwargs:
-        logger.warning(
-            "make_tp_sharded_tensor_for_checkpoint received extra kwargs: %s", list(kwargs.keys())
-        )
-
     prepend_axis_num = len(prepend_offsets)
 
     new_offsets = []
-
-    # Get groups with fallback to parallel_state
-    if tp_group is None and dp_cp_group is None:
-        tp_group = parallel_state.get_tensor_model_parallel_group()
-        dp_cp_group = parallel_state.get_data_parallel_group(with_context_parallel=True)
-
-    # Use local get_pg_rank and get_pg_size functions
-    tp_rank = get_pg_rank(tp_group)
-    dp_rank = get_pg_rank(dp_cp_group)
-    tp_size = get_pg_size(tp_group)
-    dp_size = get_pg_size(dp_cp_group)
-    dp_replica_id = get_pg_rank(dp_cp_group)
+    tp_rank = parallel_state.get_tensor_model_parallel_rank()
+    dp_rank = parallel_state.get_data_parallel_rank(with_context_parallel=True)
+    tp_size = parallel_state.get_tensor_model_parallel_world_size()
+    dp_size = parallel_state.get_data_parallel_world_size(with_context_parallel=True)
+    dp_replica_id = parallel_state.get_data_parallel_rank(with_context_parallel=True)
 
     new_offsets.append((tp_axis + prepend_axis_num, tp_rank, tp_size))
 
@@ -864,39 +837,14 @@ def make_sharded_tensor_for_checkpoint(tensor, key, prepend_offsets=(), replica_
     """Helper for instantiating a non-sharded ShardedTensor (replicated across TP and DP group).
 
     Optionally, can provide offsets which prepend new dimensions to the tensor.
-
-    Keyword Args:
-        tensor: Tensor to create sharded tensor for
-        key: Key for the sharded tensor
-        prepend_offsets: Offsets to prepend to tensor dimensions (default: ())
-        replica_id: Replica ID for the tensor (default: None)
-        **kwargs: Additional arguments. May include:
-            - tp_group: Tensor parallel group
-            - dp_cp_group: Data parallel + context parallel group
     """
-    # Pop group parameters from kwargs
-    tp_group = kwargs.pop('tp_group', None)
-    dp_cp_group = kwargs.pop('dp_cp_group', None)
-    # If there are any additional kwargs left, surface them for visibility
-    # (these will be forwarded to ShardedTensor.from_rank_offsets).
-    if kwargs:
-        logger.warning(
-            "make_sharded_tensor_for_checkpoint received extra kwargs: %s", list(kwargs.keys())
-        )
 
     prepend_axis_num = len(prepend_offsets)
 
     new_offsets = []
-
-    # Get groups with fallback to parallel_state
-    if tp_group is None and dp_cp_group is None:
-        tp_group = parallel_state.get_tensor_model_parallel_group()
-        dp_cp_group = parallel_state.get_data_parallel_group(with_context_parallel=True)
-
-    # Use local get_pg_rank and get_pg_size functions
-    dp_rank = get_pg_rank(dp_cp_group)
-    dp_size = get_pg_size(dp_cp_group)
-    dp_replica_id = get_pg_rank(dp_cp_group)
+    dp_rank = parallel_state.get_data_parallel_rank(with_context_parallel=True)
+    dp_size = parallel_state.get_data_parallel_world_size(with_context_parallel=True)
+    dp_replica_id = parallel_state.get_data_parallel_rank(with_context_parallel=True)
 
     if HAVE_DTENSOR and isinstance(tensor, DTensor):
         # FSDP2 sharding
@@ -905,7 +853,7 @@ def make_sharded_tensor_for_checkpoint(tensor, key, prepend_offsets=(), replica_
         new_offsets.append((prepend_axis_num, dp_rank, dp_size))
 
     if replica_id is None:
-        replica_id = (0, get_pg_rank(tp_group), dp_replica_id)
+        replica_id = (0, parallel_state.get_tensor_model_parallel_rank(), dp_replica_id)
 
     return ShardedTensor.from_rank_offsets(
         key,
diff --git a/megatron/training/checkpointing.py b/megatron/training/checkpointing.py
index f7428ececcf..743ebd915cf 100644
--- a/megatron/training/checkpointing.py
+++ b/megatron/training/checkpointing.py
@@ -17,7 +17,6 @@
 
 import numpy as np
 import torch
-from typing import Optional, Union, List, Dict, Any
 from torch.distributed.checkpoint import FileSystemReader, default_planner
 
 from megatron.core import dist_checkpointing, mpu, tensor_parallel
@@ -29,7 +28,6 @@
 )
 from megatron.core.msc_utils import MultiStorageClientFeature, open_file
 from megatron.core.num_microbatches_calculator import update_num_microbatches
-from megatron.core.utils import get_pg_rank, get_pg_size
 from megatron.core.optimizer import DistributedOptimizer
 from megatron.core.rerun_state_machine import get_rerun_state_machine
 from megatron.core.utils import get_torch_version, is_torch_min_version
@@ -308,7 +306,7 @@ def read_metadata(tracker_filename):
     return max_iter, release
 
 
-def get_rng_state(ckpt_format: str, tp_group: torch.distributed.ProcessGroup, pp_group: torch.distributed.ProcessGroup) -> Union[List[Dict[str, Any]], ShardedObject]:
+def get_rng_state(ckpt_format: str):
     """Collect rng state across data parallel ranks."""
     args = get_args()
     rng_state = {
@@ -331,10 +329,10 @@ def get_rng_state(ckpt_format: str, tp_group: torch.distributed.ProcessGroup, pp
         rng_state_list = [rng_state]
 
     if ckpt_format == "torch_dist":
-        pp_rank = get_pg_rank(pp_group)
-        pp_size = get_pg_size(pp_group)
-        tp_rank = get_pg_rank(tp_group)
-        tp_size = get_pg_size(tp_group)
+        pp_rank = mpu.get_pipeline_model_parallel_rank()
+        pp_size = mpu.get_pipeline_model_parallel_world_size()
+        tp_rank = mpu.get_tensor_model_parallel_rank()
+        tp_size = mpu.get_tensor_model_parallel_world_size()
         rng_state_list = ShardedObject('rng_state', rng_state_list, (pp_size, tp_size), (pp_rank, tp_rank),
                                        replica_id=mpu.get_data_parallel_rank(with_context_parallel=True))
     elif ckpt_format == "fsdp_dtensor":
@@ -353,25 +351,7 @@ class CheckpointType(Enum):
     TORCH_DCP = auto()
     FSDP_DTENSOR = auto()
 
-def _clean_metadata_for_serialization(metadata: dict) -> dict:
-    """Create a clean copy of metadata for serialization by removing non-serializable objects.
-
-    Args:
-        metadata: Original metadata dict
-
-    Returns:
-        Clean metadata dict suitable for serialization
-    """
-    if metadata is None:
-        return None
-
-    clean_metadata = metadata.copy()
-    # Remove dp_cp_group as it's not serializable
-    clean_metadata.pop('dp_cp_group', None)
-    return clean_metadata
-
-
-def _build_sharded_state_dict_metadata(args: Namespace, dp_cp_group: Optional[torch.distributed.ProcessGroup] = None) -> dict:
+def _build_sharded_state_dict_metadata(args: Namespace) -> dict:
     """Builds metadata used for sharded_state_dict versioning.
 
     The whole content metadata is passed to ``shared_state_dict`` model and optimizer methods
@@ -381,10 +361,6 @@ def _build_sharded_state_dict_metadata(args: Namespace, dp_cp_group: Optional[to
     In particular, a simple integer (or SemVer) versioning flag (e.g. `metadata['version'] = 3.4`)
     is discouraged, because the metadata serves for all models and optimizers and it's practically
     impossible to enforce a linearly increasing versioning for this whole space.
-
-    Args:
-        args: Arguments namespace
-        dp_cp_group: Data parallel + context parallel group (default: None, falls back to mpu API)
     """
     metadata = {}
 
@@ -413,15 +389,11 @@ def _build_sharded_state_dict_metadata(args: Namespace, dp_cp_group: Optional[to
 
     metadata['singleton_local_shards'] = False
     metadata['chained_optim_avoid_prefix'] = True
-    # Add dp_cp_group to metadata. If not provided, fallback to global parallel state.
-    if dp_cp_group is None:
-        dp_cp_group = mpu.get_data_parallel_group(with_context_parallel=True)
-    metadata['dp_cp_group'] = dp_cp_group
     return metadata
 
 def save_checkpoint(iteration, model, optimizer, opt_param_scheduler, num_floating_point_operations_so_far,
                     checkpointing_context=None, pipeline_rank=None, expert_rank=None, tensor_rank=None, pipeline_parallel=None, expert_parallel=None, non_persistent_ckpt=False,
-                    train_data_iterator=None, preprocess_common_state_dict_fn = None, release=False, tp_group: Optional[torch.distributed.ProcessGroup] = None, pp_group: Optional[torch.distributed.ProcessGroup] = None, dp_cp_group: Optional[torch.distributed.ProcessGroup] = None):
+                    train_data_iterator=None, preprocess_common_state_dict_fn = None, release=False):
     """Save a model, optimizer and optionally dataloader checkpoint.
 
     Checkpointing context is used to persist some checkpointing state
@@ -435,9 +407,6 @@ def save_checkpoint(iteration, model, optimizer, opt_param_scheduler, num_floati
 
     Dataloader checkpoint is only saved if the dataloader supports it. Currently this applies only
     to the Megatron Energon dataloader (multimodal) and not the built-in Megatron dataloader (text-only).
-
-    Args:
-        dp_cp_group: Data parallel + context parallel group (default: None, falls back to mpu API)
     """
     start_ckpt = time()
     args = get_args()
@@ -481,10 +450,7 @@ def save_checkpoint(iteration, model, optimizer, opt_param_scheduler, num_floati
         iteration, save_dir, ckpt_format))
 
     # Collect rng state across data parallel ranks.
-    if tp_group is None and pp_group is None:
-        tp_group = mpu.get_tensor_model_parallel_group()
-        pp_group = mpu.get_pipeline_model_parallel_group()
-    rng_state = get_rng_state(args.ckpt_format, tp_group, pp_group)
+    rng_state = get_rng_state(args.ckpt_format)
 
     # Collect rerun state across all ranks
     rerun_state_machine = get_rerun_state_machine()
@@ -527,7 +493,7 @@ def save_checkpoint(iteration, model, optimizer, opt_param_scheduler, num_floati
             or mpu.get_expert_data_parallel_rank() == 0 \
             or ckpt_type != CheckpointType.LEGACY:
         if ckpt_type != CheckpointType.LEGACY:
-            sharded_sd_metadata = _build_sharded_state_dict_metadata(args, dp_cp_group=dp_cp_group)
+            sharded_sd_metadata = _build_sharded_state_dict_metadata(args)
             if args.use_distributed_optimizer:
                 print_rank_0(f'Storing distributed optimizer sharded state of type'
                              f' {sharded_sd_metadata["distrib_optim_sharding_type"]}')
@@ -579,7 +545,7 @@ def save_checkpoint(iteration, model, optimizer, opt_param_scheduler, num_floati
                                                          async_sharded_save=args.async_save,
                                                          validate_access_integrity=validate_sharding_integrity,
                                                          preprocess_common_before_consistancy_check=preprocess_common_state_dict_fn,
-                                                         content_metadata=_clean_metadata_for_serialization(sharded_sd_metadata))
+                                                         content_metadata=sharded_sd_metadata)
             # [ModelOpt]: save sharded modelopt_state
             if has_nvidia_modelopt:
                 save_sharded_modelopt_state(model, checkpoint_name, (args.ckpt_format, 1))
@@ -840,13 +806,7 @@ def generate_state_dict(
             key = f"model{i}"
 
         if args.ckpt_format == "torch_dist":
-            model_sd = model[i].sharded_state_dict(
-                **(model_sd_kwargs or {
-                    "metadata": {
-                        "dp_cp_group": mpu.get_data_parallel_group(with_context_parallel=True)
-                    }
-                })
-            )
+            model_sd = model[i].sharded_state_dict(**(model_sd_kwargs or {}))
         else:   # torch, torch_dcp, fsdp_dtensor
             model_sd = model[i].state_dict_for_save_checkpoint()
 
@@ -855,16 +815,10 @@ def generate_state_dict(
     # Optimizer stuff.
     if not args.no_save_optim:
         if optimizer is not None and not optimizer.is_stub_optimizer:
+            optimizer_sd = None
 
             if args.ckpt_format == "torch_dist":
-                optimizer_sd = optimizer.sharded_state_dict(
-                    state_dict,
-                    **(optim_sd_kwargs or {
-                        "metadata": {
-                            "dp_cp_group": mpu.get_data_parallel_group(with_context_parallel=True)
-                        }
-                    })
-                )
+                optimizer_sd = optimizer.sharded_state_dict(state_dict, **(optim_sd_kwargs or {}))
             elif args.ckpt_format == "fsdp_dtensor":
                 if optim_sd_kwargs is None:
                     optim_sd_kwargs = {}
@@ -1407,7 +1361,7 @@ def _set_arg(arg_name, old_arg_name=None, force=False):
 
 
 def load_checkpoint(ddp_model, optimizer, opt_param_scheduler, load_arg='load', strict=True,
-                    checkpointing_context=None, skip_load_to_model_and_opt=False, tp_group: Optional[torch.distributed.ProcessGroup] = None, pp_group: Optional[torch.distributed.ProcessGroup] = None, dp_cp_group: Optional[torch.distributed.ProcessGroup] = None):
+                    checkpointing_context=None, skip_load_to_model_and_opt=False):
     """Load a model checkpoint and return the iteration.
     strict (bool): whether to strictly enforce that the keys in
         :attr:`state_dict` of the checkpoint match the names of
@@ -1415,7 +1369,6 @@ def load_checkpoint(ddp_model, optimizer, opt_param_scheduler, load_arg='load',
     skip_load_to_model_and_opt (bool): whether to call `load_state_dict`
         for :attr:`model` and :attr:`optimizer`. In case of running FSDP2 with mcore distributed
         checkpointing, the tensors are already loaded in-place by `_load_base_checkpoint`.
-    dp_cp_group: Data parallel + context parallel group (default: None, falls back to mpu API)
     """
     args = get_args()
     load_dir = getattr(args, load_arg)
@@ -1489,10 +1442,7 @@ def load_checkpoint(ddp_model, optimizer, opt_param_scheduler, load_arg='load',
         # Determine if RNG state will be loaded
         if (ckpt_tp_pp == run_tp_pp and not release and not args.finetune and not args.no_load_rng
                 and not getattr(ckpt_args, 'no_save_rng', False)):
-            if tp_group is None and pp_group is None:
-                tp_group = mpu.get_tensor_model_parallel_group()
-                pp_group = mpu.get_pipeline_model_parallel_group()
-            gen_sd_rng_state = get_rng_state(args.ckpt_format, tp_group, pp_group)  # we can load the rng state
+            gen_sd_rng_state = get_rng_state(args.ckpt_format)  # we can load the rng state
         else:
             ignore_rng_state = True
             gen_sd_rng_state = None
@@ -1504,7 +1454,6 @@ def load_checkpoint(ddp_model, optimizer, opt_param_scheduler, load_arg='load',
         else:
             sharded_sd_metadata = dist_checkpointing.load_content_metadata(preloaded_state_dict=state_dict)
         print_rank_0(f'sharded_state_dict metadata loaded from the checkpoint: {sharded_sd_metadata}')
-
         # Determine if optimizer state will be loaded
         if (not release and not args.finetune and not args.no_load_optim
                 and not getattr(ckpt_args, 'no_save_optim', False)):
@@ -1538,15 +1487,6 @@ def load_checkpoint(ddp_model, optimizer, opt_param_scheduler, load_arg='load',
             gen_sd_optim = None
             gen_sd_opt_param_scheduler = None
 
-        if dp_cp_group is None:
-            dp_cp_group = mpu.get_data_parallel_group(with_context_parallel=True)
-
-        # dist_checkpointing.load_content_metadata(...) may return None.
-        # Ensure we have a dict before updating to avoid NoneType AttributeError.
-        if sharded_sd_metadata is None:
-            sharded_sd_metadata = {}
-        sharded_sd_metadata["dp_cp_group"] = dp_cp_group
-
         optim_sd_kwargs = dict(metadata=sharded_sd_metadata, is_loading=True)
         model_sd_kwargs = dict(metadata=sharded_sd_metadata)
 
@@ -1588,15 +1528,12 @@ def load_checkpoint(ddp_model, optimizer, opt_param_scheduler, load_arg='load',
     elif args.ckpt_format == "torch_dcp":
         model_sd = model[0].state_dict()
         optimizer_sd = optimizer.state_dict(is_loading=True)
-        if tp_group is None and pp_group is None:
-            tp_group = mpu.get_tensor_model_parallel_group()
-            pp_group = mpu.get_pipeline_model_parallel_group()
         sharded_state_dict = {
             "model": model_sd,
             "optimizer": optimizer_sd,
             "args": None,
             "iteration": 1,
-            "rng_state": get_rng_state(args.ckpt_format, tp_group, pp_group),
+            "rng_state": get_rng_state(args.ckpt_format),
             "checkpoint_version": None,
             "opt_param_scheduler": opt_param_scheduler.state_dict(),
             "num_floating_point_operations_so_far": 0,
@@ -1619,7 +1556,7 @@ def load_checkpoint(ddp_model, optimizer, opt_param_scheduler, load_arg='load',
                     data_iterator=None, ckpt_format=ckpt_format, force=True,
                 )
             if not args.no_load_rng:
-                gen_sd_rng_state = get_rng_state(args.ckpt_format, tp_group, pp_group)
+                gen_sd_rng_state = get_rng_state(args.ckpt_format)
             if not args.no_load_optim:
                 gen_sd_optim = optimizer
                 gen_sd_opt_param_scheduler = opt_param_scheduler
diff --git a/tests/unit_tests/dist_checkpointing/models/common.py b/tests/unit_tests/dist_checkpointing/models/common.py
index 8cb1dc4df65..31b5d9db3c9 100644
--- a/tests/unit_tests/dist_checkpointing/models/common.py
+++ b/tests/unit_tests/dist_checkpointing/models/common.py
@@ -91,8 +91,7 @@ def common_test_parallel_reconfiguration_e2e(
         save(gpt_model_A.sharded_state_dict(metadata=metadata), ckpt_dir_A, save_strategy)
         regular_state_dict_A = gpt_model_A.state_dict()
         Utils.destroy_model_parallel()
-        if metadata is not None:
-            metadata.pop("dp_cp_group")
+
         # Load checkpoint A with different TP/PP and save as checkpoint B
         # No FPS this time, only FPL
         Utils.initialize_model_parallel(*dest_tp_pp, **(dst_tp_pp_kwargs or {}), order=store_order)
diff --git a/tests/unit_tests/dist_checkpointing/models/test_mamba.py b/tests/unit_tests/dist_checkpointing/models/test_mamba.py
index 85fbe5dd045..ff2c6309977 100644
--- a/tests/unit_tests/dist_checkpointing/models/test_mamba.py
+++ b/tests/unit_tests/dist_checkpointing/models/test_mamba.py
@@ -130,8 +130,6 @@ def test_parallel_reconfiguration_e2e(
                 )
             save(sharded_state_dict, ckpt_dir_A, save_strategy)
             Utils.destroy_model_parallel()
-            if metadata is not None:
-                metadata.pop("dp_cp_group")
 
             # Load checkpoint A with different TP/PP/expert/CP and save as checkpoint B
             # No FPS this time, only FPL
diff --git a/tests/unit_tests/dist_checkpointing/models/test_mlp_glu.py b/tests/unit_tests/dist_checkpointing/models/test_mlp_glu.py
index 0970e2adc8a..18cfbf67cee 100644
--- a/tests/unit_tests/dist_checkpointing/models/test_mlp_glu.py
+++ b/tests/unit_tests/dist_checkpointing/models/test_mlp_glu.py
@@ -71,9 +71,6 @@ def test_parallel_reconfiguration_e2e(
             save(mlp_A.sharded_state_dict(prefix=layer_prefix, metadata=metadata), ckpt_dir_A)
             Utils.destroy_model_parallel()
 
-            if "dp_cp_group" in metadata.keys():
-                del metadata["dp_cp_group"]
-
             # Load checkpoint A with different TP/PP and save as checkpoint B
             Utils.initialize_model_parallel(*dest_tp_pp)
             mlp_B = initialize_mlp()
diff --git a/tests/unit_tests/dist_checkpointing/models/test_moe_experts.py b/tests/unit_tests/dist_checkpointing/models/test_moe_experts.py
index ca546d746af..b116d2cb603 100644
--- a/tests/unit_tests/dist_checkpointing/models/test_moe_experts.py
+++ b/tests/unit_tests/dist_checkpointing/models/test_moe_experts.py
@@ -190,9 +190,6 @@ def test_parallel_reconfiguration_e2e(
             save(sharded_state_dict, ckpt_dir_A, save_strategy)
             Utils.destroy_model_parallel()
 
-            if "dp_cp_group" in metadata.keys():
-                del metadata["dp_cp_group"]
-
             # Load checkpoint A with different TP/PP/EP and save as checkpoint B
             # No FPS this time, only FPL
             Utils.initialize_model_parallel(
@@ -279,9 +276,6 @@ def test_sequential_grouped_mlp_interchangeable(
             save(sharded_state_dict, ckpt_dir_A, save_strategy)
             Utils.destroy_model_parallel()
 
-            if "dp_cp_group" in metadata.keys():
-                del metadata["dp_cp_group"]
-
             Utils.initialize_model_parallel(dest_tp, dest_pp, expert_model_parallel_size=dest_exp)
             model_B = initialize_expert_layer(1, use_glu, expert_type=dest_module)
             load_strategy = None
@@ -357,9 +351,6 @@ def test_sequential_grouped_mlp_extra_state(
             save(sharded_state_dict, ckpt_dir_A, save_strategy)
             Utils.destroy_model_parallel()
 
-            if "dp_cp_group" in metadata.keys():
-                del metadata["dp_cp_group"]
-
             Utils.initialize_model_parallel(dest_tp, dest_pp, expert_model_parallel_size=dest_exp)
             load_strategy = None
 
diff --git a/tests/unit_tests/dist_checkpointing/test_local.py b/tests/unit_tests/dist_checkpointing/test_local.py
index 5ce3422c726..1b8597e1f1c 100644
--- a/tests/unit_tests/dist_checkpointing/test_local.py
+++ b/tests/unit_tests/dist_checkpointing/test_local.py
@@ -26,7 +26,6 @@
     LocalCheckpointManager,
 )
 
-from megatron.core import parallel_state
 from megatron.core.dist_checkpointing import ShardedTensor
 from megatron.core.dist_checkpointing.dict_utils import diff
 from megatron.core.dist_checkpointing.mapping import ShardedBase, ShardedTensorFactory
@@ -79,11 +78,7 @@ def test_sharded_tensors(self, tp, pp, use_torch_fsdp2):
         opt_param_scheduler = None
         rng_state = None
         iteration = None
-        metadata = dict(
-            dp_cp_group=parallel_state.get_data_parallel_group(with_context_parallel=True)
-        )
-        model_sd_kwargs = dict(metadata=metadata)
-        optim_sd_kwargs = dict(sharding_type='fully_sharded_model_space', metadata=metadata)
+        optim_sd_kwargs = dict(sharding_type='fully_sharded_model_space')
         mock_args = parse_args(ignore_unknown_args=True)
         mock_args.no_save_optim = False
         mock_args.no_save_rng = True
@@ -96,7 +91,6 @@ def test_sharded_tensors(self, tp, pp, use_torch_fsdp2):
             opt_param_scheduler,
             rng_state,
             iteration=iteration,
-            model_sd_kwargs=model_sd_kwargs,
             optim_sd_kwargs=optim_sd_kwargs,
         )
         sharded_tensor_factories = find_matching_values(
diff --git a/tests/unit_tests/post_training/test_modelopt_module_spec.py b/tests/unit_tests/post_training/test_modelopt_module_spec.py
index ec80fcb1a72..f27a22390f7 100644
--- a/tests/unit_tests/post_training/test_modelopt_module_spec.py
+++ b/tests/unit_tests/post_training/test_modelopt_module_spec.py
@@ -6,7 +6,7 @@
 import torch
 from packaging.version import Version
 
-from megatron.core import dist_checkpointing, parallel_state
+from megatron.core import dist_checkpointing
 from megatron.core.inference.contexts import StaticInferenceContext
 from megatron.core.models.gpt.gpt_layer_specs import (
     get_gpt_decoder_block_spec,
@@ -92,11 +92,8 @@ def setup_method(self, method):
     def test_sharded_state_dict_restore(self, tmp_path_dist_ckpt):
         """Save with the default TE spec and restore using the ModelOpt spec."""
         _dist_checkpoint_name = "default_model"
-        metadata = {
-            "dp_cp_group": parallel_state.get_data_parallel_group(with_context_parallel=True)
-        }
-        te_fused_sharded_state_dict = self.default_model.sharded_state_dict(metadata=metadata)
-        modelopt_sharded_state_dict = self.modelopt_model.sharded_state_dict(metadata=metadata)
+        te_fused_sharded_state_dict = self.default_model.sharded_state_dict()
+        modelopt_sharded_state_dict = self.modelopt_model.sharded_state_dict()
 
         with TempNamedDir(tmp_path_dist_ckpt / _dist_checkpoint_name, sync=True) as tmpdirname:
             dist_checkpointing.save(te_fused_sharded_state_dict, tmpdirname)

From a2a1c893bc70fa2aa83d786eedb0e9bcea62c490 Mon Sep 17 00:00:00 2001
From: Yu Yao <54727607+yaoyu-33@users.noreply.github.com>
Date: Wed, 12 Nov 2025 13:30:10 -0800
Subject: [PATCH 123/334] [Dev] replay: Cherry-pick: M4 + Dist Checkpoint:
 Replace global parallel state with explicit group parameters (#2152) + fix:
 m4 _clean_metadata_for_serialization for training (#2225)

Signed-off-by: dimapihtar <dpihtar@gmail.com>
Co-authored-by: dimapihtar <dpihtar@gmail.com>
---
 .../dist_checkpointing/state_dict_utils.py    |  7 +-
 megatron/core/dist_checkpointing/utils.py     | 17 ++++
 .../core/extensions/transformer_engine.py     | 54 +++++++++++--
 megatron/core/models/bert/bert_model.py       |  4 +-
 .../common/language_module/language_module.py | 19 ++++-
 megatron/core/models/gpt/gpt_model.py         |  8 +-
 .../cpu_offloading/hybrid_optimizer.py        |  2 +-
 .../core/post_training/modelopt/layers.py     | 12 ++-
 megatron/core/ssm/gated_delta_net.py          | 18 ++++-
 megatron/core/ssm/mamba_block.py              |  7 +-
 megatron/core/ssm/mamba_mixer.py              | 24 +++++-
 megatron/core/tensor_parallel/layers.py       | 16 +++-
 megatron/core/transformer/attention.py        |  1 +
 .../core/transformer/dot_product_attention.py |  9 ++-
 megatron/core/transformer/mlp.py              |  2 +-
 megatron/core/transformer/module.py           | 18 ++++-
 megatron/core/transformer/moe/experts.py      | 62 +++-----------
 megatron/core/transformer/moe/moe_layer.py    |  2 +-
 .../core/transformer/moe/shared_experts.py    |  8 +-
 .../transformer/multi_token_prediction.py     | 25 +++++-
 .../core/transformer/transformer_block.py     |  7 +-
 .../core/transformer/transformer_layer.py     |  1 +
 megatron/core/transformer/utils.py            | 59 +++++++++++++-
 megatron/core/utils.py                        | 70 +++++++++++++---
 megatron/training/checkpointing.py            | 81 +++++++++++++++----
 .../dist_checkpointing/models/common.py       |  3 +-
 .../dist_checkpointing/models/test_mamba.py   |  2 +
 .../dist_checkpointing/models/test_mlp_glu.py |  3 +
 .../models/test_moe_experts.py                |  9 +++
 .../dist_checkpointing/test_local.py          |  8 +-
 .../test_modelopt_module_spec.py              |  9 ++-
 31 files changed, 448 insertions(+), 119 deletions(-)

diff --git a/megatron/core/dist_checkpointing/state_dict_utils.py b/megatron/core/dist_checkpointing/state_dict_utils.py
index cfb2379a9d3..59ec18e70ed 100644
--- a/megatron/core/dist_checkpointing/state_dict_utils.py
+++ b/megatron/core/dist_checkpointing/state_dict_utils.py
@@ -13,7 +13,7 @@
     StateDict,
     apply_factories,
 )
-from .utils import extract_nonpersistent, extract_sharded_base
+from .utils import _clean_metadata_for_serialization, extract_nonpersistent, extract_sharded_base
 from .validation import determine_global_metadata, validate_sharding_integrity
 
 
@@ -43,6 +43,11 @@ def save_preprocess(
     sharded_part = filter_out_empty_flatten_tensor(sharded_part)
     if validate_access_integrity:
         preprocessed_common_state_dict = common_state_dict
+        if "content_metadata" in preprocessed_common_state_dict:
+            preprocessed_common_state_dict["content_metadata"] = _clean_metadata_for_serialization(
+                preprocessed_common_state_dict["content_metadata"]
+            )
+
         if preprocess_common_before_consistancy_check:
             preprocessed_common_state_dict = preprocess_common_before_consistancy_check(
                 common_state_dict
diff --git a/megatron/core/dist_checkpointing/utils.py b/megatron/core/dist_checkpointing/utils.py
index 6dcab0c0dda..161a3477725 100644
--- a/megatron/core/dist_checkpointing/utils.py
+++ b/megatron/core/dist_checkpointing/utils.py
@@ -330,3 +330,20 @@ def debug_msg(msg: str):
     """
     with logger_stack(None, None) as (stacked_name, last_logger):
         last_logger.debug(f"{stacked_name} {msg}")
+
+
+def _clean_metadata_for_serialization(metadata: dict) -> dict:
+    """Create a clean copy of metadata for serialization by removing non-serializable objects.
+
+    Args:
+        metadata: Original metadata dict
+
+    Returns:
+        Clean metadata dict suitable for serialization
+    """
+    if metadata is None:
+        return None
+    clean_metadata = metadata.copy()
+    # Remove dp_cp_group as it's not serializable
+    clean_metadata.pop('dp_cp_group', None)
+    return clean_metadata
diff --git a/megatron/core/extensions/transformer_engine.py b/megatron/core/extensions/transformer_engine.py
index e807ee54fbf..808ac14a2e4 100644
--- a/megatron/core/extensions/transformer_engine.py
+++ b/megatron/core/extensions/transformer_engine.py
@@ -42,6 +42,7 @@
 from megatron.core.transformer.mlp import MLP
 from megatron.core.transformer.transformer_config import TransformerConfig
 from megatron.core.transformer.utils import (
+    ensure_metadata_has_dp_cp_group,
     is_layer_window_attention,
     make_sharded_tensors_for_checkpoint,
 )
@@ -420,6 +421,9 @@ def __init__(
                     # duplicated across TP ranks
                     setattr(param, "sequence_parallel", self.config.sequence_parallel)
 
+        tp_group = get_tensor_model_parallel_group_if_none(tp_group, is_expert=is_expert)
+        self._tp_group = tp_group
+
     def forward(self, x):
         """Forward."""
         _is_first_microbatch = (
@@ -444,7 +448,14 @@ def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None):
             self.parallel_mode is None
         ), "TELinear sharded_state_dict can only be used with duplicated parallel mode"
         state_dict = self.state_dict(prefix="", keep_vars=True)
-        return make_sharded_tensors_for_checkpoint(state_dict, prefix, None, sharded_offsets)
+        return make_sharded_tensors_for_checkpoint(
+            state_dict,
+            prefix,
+            None,
+            sharded_offsets,
+            tp_group=self._tp_group,
+            dp_cp_group=metadata["dp_cp_group"],
+        )
 
     def backward_dw(self):
         """Compute weight gradients during the backward pass if delay_wgrad_compute is enabled."""
@@ -492,6 +503,7 @@ def __init__(
 
         # TODO: For backward compatibility, remove in v0.15.
         tp_group = get_tensor_model_parallel_group_if_none(tp_group, is_expert=is_expert)
+        self._tp_group = tp_group
 
         # TE returns a zero length Tensor when bias=False and
         # return_bias=True, but we prefer None.  So in that case we
@@ -625,9 +637,15 @@ def forward(self, x):
 
     def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None):
         """Sharding along axis 0, bias sharded"""
+        metadata = ensure_metadata_has_dp_cp_group(metadata)
         state_dict = self.state_dict(prefix="", keep_vars=True)
         return make_sharded_tensors_for_checkpoint(
-            state_dict, prefix, {"weight": 0, "bias": 0}, sharded_offsets
+            state_dict,
+            prefix,
+            {"weight": 0, "bias": 0},
+            sharded_offsets,
+            tp_group=self._tp_group,
+            dp_cp_group=metadata["dp_cp_group"],
         )
 
     def __repr__(self):
@@ -670,6 +688,7 @@ def __init__(
         if gather_output:
             raise ValueError("Transformer Engine linear layers do not support gather_output = True")
         tp_group = get_tensor_model_parallel_group_if_none(tp_group, is_expert=is_expert)
+        self._tp_group = tp_group
         world_size = get_pg_size(tp_group)
         rank = get_pg_rank(tp_group)
 
@@ -720,7 +739,12 @@ def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None):
         """Sharding along axis 0, bias sharded"""
         state_dict = self.state_dict(prefix="", keep_vars=True)
         return make_sharded_tensors_for_checkpoint(
-            state_dict, prefix, {"weight": 0, "bias": 0}, sharded_offsets
+            state_dict,
+            prefix,
+            {"weight": 0, "bias": 0},
+            sharded_offsets,
+            tp_group=self._tp_group,
+            dp_cp_group=metadata["dp_cp_group"],
         )
 
     def __repr__(self):
@@ -764,6 +788,7 @@ def __init__(
                 "Transformer Engine linear layers do not support input_is_parallel = False"
             )
         tp_group = get_tensor_model_parallel_group_if_none(tp_group, is_expert=is_expert)
+        self._tp_group = tp_group
 
         super().__init__(
             input_size=input_size,
@@ -814,7 +839,12 @@ def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None):
         """Sharding along axis 1, bias not sharded"""
         state_dict = self.state_dict(prefix="", keep_vars=True)
         return make_sharded_tensors_for_checkpoint(
-            state_dict, prefix, {"weight": 1}, sharded_offsets
+            state_dict,
+            prefix,
+            {"weight": 1},
+            sharded_offsets,
+            tp_group=self._tp_group,
+            dp_cp_group=metadata["dp_cp_group"],
         )
 
     def __repr__(self):
@@ -901,6 +931,7 @@ def __init__(
                 assert hasattr(
                     pg_collection, "hcp"
                 ), "TEDotProductAttention pg_collection must have hierarchical cp pg"
+        self._tp_group = pg_collection.tp
 
         if is_te_min_version("0.10.0"):
             extra_kwargs["attention_type"] = attention_type
@@ -1078,7 +1109,12 @@ def sharded_state_dict(
         else:
             state_dict = {}
         return make_sharded_tensors_for_checkpoint(
-            state_dict, prefix, {'softmax_offset': 0}, sharded_offsets
+            state_dict,
+            prefix,
+            {'softmax_offset': 0},
+            sharded_offsets,
+            tp_group=self._tp_group,
+            dp_cp_group=metadata["dp_cp_group"],
         )
 
 
@@ -1138,6 +1174,7 @@ def __init__(
             # The comms between TP and EP group is explicitly handled by MoE token dispatcher.
             # So we disable comms by making TE agnostic of model parallel.
             tp_group = get_tensor_model_parallel_group_if_none(tp_group, is_expert=is_expert)
+            self._tp_group = tp_group
             tp_size = get_pg_size(tp_group)
 
             self.explicit_expert_comm = is_expert and (tp_size > 1 or self.expert_parallel)
@@ -1372,7 +1409,12 @@ def _sharded_state_dict_grouped(
                         (ep_axis, global_expert_idx, num_global_experts),
                     )
                 sub_sd = make_sharded_tensors_for_checkpoint(
-                    state_dict, '', tp_axis_map, new_sharded_offsets
+                    state_dict,
+                    '',
+                    tp_axis_map,
+                    new_sharded_offsets,
+                    tp_group=self._tp_group,
+                    dp_cp_group=metadata["dp_cp_group"],
                 )
                 # Remove expert layers indexing from sharded keys
                 replace_prefix_for_sharding(sub_sd, f"{gemm_idx}.", expert_prefix)
diff --git a/megatron/core/models/bert/bert_model.py b/megatron/core/models/bert/bert_model.py
index b7b9bfc73f3..0655a1e6167 100644
--- a/megatron/core/models/bert/bert_model.py
+++ b/megatron/core/models/bert/bert_model.py
@@ -14,6 +14,7 @@
 from megatron.core.models.common.embeddings.language_model_embedding import LanguageModelEmbedding
 from megatron.core.models.common.embeddings.rotary_pos_embedding import RotaryEmbedding
 from megatron.core.models.common.language_module.language_module import LanguageModule
+from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.transformer.dot_product_attention import (
     DotProductAttention as MCoreDotProductAttention,
 )
@@ -73,9 +74,10 @@ def __init__(
         seq_len_interpolation_factor: Optional[float] = None,
         add_binary_head=True,
         return_embeddings=False,
+        pg_collection: Optional[ProcessGroupCollection] = None,
         vp_stage: Optional[int] = None,
     ):
-        super(BertModel, self).__init__(config=config)
+        super(BertModel, self).__init__(config=config, pg_collection=pg_collection)
 
         if has_config_logger_enabled(config):
             log_config_to_disk(config, locals(), prefix=type(self).__name__)
diff --git a/megatron/core/models/common/language_module/language_module.py b/megatron/core/models/common/language_module/language_module.py
index 8d6c1118a94..de2ecfb8011 100644
--- a/megatron/core/models/common/language_module/language_module.py
+++ b/megatron/core/models/common/language_module/language_module.py
@@ -24,7 +24,12 @@
 from megatron.core.transformer.enums import AttnBackend
 from megatron.core.transformer.module import MegatronModule
 from megatron.core.transformer.transformer_config import TransformerConfig
-from megatron.core.utils import is_te_min_version, make_tp_sharded_tensor_for_checkpoint
+from megatron.core.transformer.utils import ensure_metadata_has_dp_cp_group
+from megatron.core.utils import (
+    get_tensor_model_parallel_group_if_none,
+    is_te_min_version,
+    make_tp_sharded_tensor_for_checkpoint,
+)
 
 
 class LanguageModule(MegatronModule):
@@ -44,6 +49,7 @@ def __init__(
             pg_collection = ProcessGroupCollection.use_mpu_process_groups()
         self.pg_collection = pg_collection
         self.cp_group = pg_collection.cp
+        self.tp_group = get_tensor_model_parallel_group_if_none(pg_collection.tp)
         self.pp_group = pg_collection.pp
         assert hasattr(self.pg_collection, 'embd'), (
             "pg_collection must have a embd. In previous version, it used default "
@@ -278,6 +284,10 @@ def sharded_state_dict(
             ShardedStateDict: sharded state dict for the LanguageModel
         """
         assert not sharded_offsets, "Unexpected sharded offsets"
+
+        # Guard for cases metadata is not provided
+        metadata = ensure_metadata_has_dp_cp_group(metadata)
+
         sharded_state_dict = super().sharded_state_dict(prefix, sharded_offsets, metadata)
 
         first_stage_word_emb_key = f'{prefix}embedding.word_embeddings.weight'
@@ -286,7 +296,7 @@ def sharded_state_dict(
 
         if self.share_embeddings_and_output_weights:
             self.tie_embeddings_and_output_weights_state_dict(
-                sharded_state_dict, output_layer_weight_key, first_stage_word_emb_key
+                sharded_state_dict, output_layer_weight_key, first_stage_word_emb_key, metadata
             )
         elif self.post_process:
             # Make sure the output layer follows the embeddings padding logic
@@ -303,6 +313,7 @@ def tie_embeddings_and_output_weights_state_dict(
         sharded_state_dict: ShardedStateDict,
         output_layer_weight_key: str,
         first_stage_word_emb_key: str,
+        metadata: Optional[dict] = None,
     ) -> None:
         """Ties the embedding and output weights in a given sharded state dict.
 
@@ -312,9 +323,11 @@ def tie_embeddings_and_output_weights_state_dict(
                 This entry will be replaced with a tied version
             first_stage_word_emb_key (str): this must be the same as the
                 ShardedTensor.key of the first stage word embeddings.
+            metadata (Optional[Dict]): metadata controlling sharded state dict creation.
 
         Returns: None, acts in-place
         """
+        metadata = ensure_metadata_has_dp_cp_group(metadata)
         if not self.post_process:
             # No output layer
             assert output_layer_weight_key not in sharded_state_dict, sharded_state_dict.keys()
@@ -347,4 +360,6 @@ def tie_embeddings_and_output_weights_state_dict(
             key=first_stage_word_emb_key,
             replica_id=last_stage_word_emb_replica_id,
             allow_shape_mismatch=True,
+            tp_group=self.tp_group,
+            dp_cp_group=metadata['dp_cp_group'],
         )
diff --git a/megatron/core/models/gpt/gpt_model.py b/megatron/core/models/gpt/gpt_model.py
index e676fc4367e..a1156012106 100644
--- a/megatron/core/models/gpt/gpt_model.py
+++ b/megatron/core/models/gpt/gpt_model.py
@@ -775,6 +775,12 @@ def sharded_state_dict(
         if self.mtp_process and not self.pre_process:
             emb_weight_key = f'{prefix}embedding.word_embeddings.weight'
             emb_weight = self.embedding.word_embeddings.weight
-            tie_word_embeddings_state_dict(sharded_state_dict, emb_weight, emb_weight_key)
+            tie_word_embeddings_state_dict(
+                sharded_state_dict,
+                emb_weight,
+                emb_weight_key,
+                tp_group=self.tp_group,
+                dp_cp_group=metadata['dp_cp_group'],
+            )
 
         return sharded_state_dict
diff --git a/megatron/core/optimizer/cpu_offloading/hybrid_optimizer.py b/megatron/core/optimizer/cpu_offloading/hybrid_optimizer.py
index 6f9999f0803..28487c3b367 100644
--- a/megatron/core/optimizer/cpu_offloading/hybrid_optimizer.py
+++ b/megatron/core/optimizer/cpu_offloading/hybrid_optimizer.py
@@ -52,7 +52,7 @@ def __init__(
         pin_cpu_grads: bool = True,
         pin_cpu_params: bool = True,
         overlap_cpu_optimizer_d2h_h2d: bool = True,
-        **kwargs
+        **kwargs,
     ):
         super(HybridDeviceOptimizer, self).__init__(
             params,
diff --git a/megatron/core/post_training/modelopt/layers.py b/megatron/core/post_training/modelopt/layers.py
index 0ca4a8e4070..e502b81ac2f 100644
--- a/megatron/core/post_training/modelopt/layers.py
+++ b/megatron/core/post_training/modelopt/layers.py
@@ -1,5 +1,6 @@
 # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
 
+import logging
 from typing import Callable, List, Optional
 
 import torch
@@ -10,6 +11,8 @@
 from megatron.core.transformer.transformer_layer import TransformerLayer
 from megatron.core.transformer.utils import make_sharded_tensors_for_checkpoint
 
+logger = logging.getLogger(__name__)
+
 try:
     import transformer_engine as te
 
@@ -116,6 +119,7 @@ def __init__(
         tp_group: Optional[torch.distributed.ProcessGroup] = None,
     ):
         self.config = config
+        self.tp_group = tp_group
 
         self._return_bias = skip_bias_add and bias
 
@@ -153,7 +157,11 @@ def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None):
                 if v.ndim == 0:
                     state_dict[k] = v.view(1)
         sharded_state_dict = make_sharded_tensors_for_checkpoint(
-            state_dict, prefix, sharded_offsets=sharded_offsets
+            state_dict,
+            prefix,
+            sharded_offsets=sharded_offsets,
+            tp_group=self.tp_group,
+            dp_cp_group=metadata['dp_cp_group'],
         )
         return sharded_state_dict
 
@@ -229,7 +237,7 @@ def _report_quantize_tensor_info(self):
                 if not isinstance(v, torch.Tensor):
                     continue
                 original_dtype, original_shape = self._original_tensor_info.get(k, ("-", "-"))
-                print(
+                logger.info(
                     "{:<64} {:<16} {:<32} {:<16} {:<32}".format(
                         k, original_dtype, original_shape, str(v.dtype), str(v.shape)
                     )
diff --git a/megatron/core/ssm/gated_delta_net.py b/megatron/core/ssm/gated_delta_net.py
index 4df1c0df294..1bef6416ded 100644
--- a/megatron/core/ssm/gated_delta_net.py
+++ b/megatron/core/ssm/gated_delta_net.py
@@ -27,6 +27,7 @@
 from megatron.core.transformer.module import MegatronModule
 from megatron.core.transformer.spec_utils import ModuleSpec, build_module
 from megatron.core.transformer.utils import (
+    ensure_metadata_has_dp_cp_group,
     make_sharded_tensors_for_checkpoint,
     sharded_state_dict_default,
 )
@@ -412,8 +413,11 @@ def _apply_gated_norm(self, x, gate):
         y = y.to(x_dtype)
         return y
 
-    def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None):
+    def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None, tp_group=None):
         """Provide a sharded state dictionary for distributed checkpointing."""
+        # Guard for cases metadata is not provided
+        metadata = ensure_metadata_has_dp_cp_group(metadata)
+
         sharded_state_dict = {}
         # Parameters
         self._save_to_state_dict(sharded_state_dict, "", keep_vars=True)
@@ -425,8 +429,11 @@ def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None):
                 "dt_bias": 0,
             },  # parameters sharded across TP
             sharded_offsets=sharded_offsets,
+            tp_group=(tp_group if tp_group is not None else self.pg_collection.tp),
+            dp_cp_group=metadata['dp_cp_group'],
         )
         # Submodules
+        tp_group = tp_group if tp_group is not None else self.pg_collection.tp
         for name, module in self.named_children():
             if name == "conv1d":
                 # Add TP sharding for Conv1d
@@ -435,11 +442,16 @@ def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None):
                 if self.conv_bias:
                     tp_sharding_map[f"bias"] = 0
                 module_sharded_sd = make_sharded_tensors_for_checkpoint(
-                    module_sd, f"{prefix}{name}.", tp_sharding_map, sharded_offsets
+                    module_sd,
+                    f"{prefix}{name}.",
+                    tp_sharding_map,
+                    sharded_offsets,
+                    tp_group=tp_group,
+                    dp_cp_group=metadata['dp_cp_group'],
                 )
             else:
                 module_sharded_sd = sharded_state_dict_default(
-                    module, f"{prefix}{name}.", sharded_offsets, metadata
+                    module, f"{prefix}{name}.", sharded_offsets, metadata, tp_group=tp_group
                 )
 
             sharded_state_dict.update(module_sharded_sd)
diff --git a/megatron/core/ssm/mamba_block.py b/megatron/core/ssm/mamba_block.py
index 61ecd170140..5f426ea347b 100644
--- a/megatron/core/ssm/mamba_block.py
+++ b/megatron/core/ssm/mamba_block.py
@@ -139,6 +139,7 @@ def __init__(
         assert pg_collection is not None, "pg_collection must be provided for MambaStack"
 
         self.pp_group = pg_collection.pp
+        self.tp_group = pg_collection.tp
 
         # Required for pipeline parallel schedules
         self.input_tensor = None
@@ -417,7 +418,11 @@ def sharded_state_dict(
             if not module is self.layers:
                 sharded_state_dict.update(
                     sharded_state_dict_default(
-                        module, f'{prefix}{name}.', sharded_offsets, metadata
+                        module,
+                        f'{prefix}{name}.',
+                        sharded_offsets,
+                        metadata,
+                        tp_group=self.tp_group,
                     )
                 )
 
diff --git a/megatron/core/ssm/mamba_mixer.py b/megatron/core/ssm/mamba_mixer.py
index 9eb79354dba..00067783ffa 100644
--- a/megatron/core/ssm/mamba_mixer.py
+++ b/megatron/core/ssm/mamba_mixer.py
@@ -15,6 +15,7 @@
 import torch.nn as nn
 import torch.nn.functional as F
 
+from megatron.core import parallel_state
 from megatron.core.dist_checkpointing import ShardedTensor
 from megatron.core.dist_checkpointing.mapping import ReplicaId, ShardedTensorFactory
 from megatron.core.inference.contexts import BaseInferenceContext, DynamicInferenceContext
@@ -24,6 +25,7 @@
 from megatron.core.transformer.module import MegatronModule
 from megatron.core.transformer.spec_utils import ModuleSpec, build_module
 from megatron.core.transformer.utils import (
+    ensure_metadata_has_dp_cp_group,
     make_sharded_tensors_for_checkpoint,
     sharded_state_dict_default,
 )
@@ -74,9 +76,16 @@ class ExtendedRMSNorm(RMSNormGated):
 
     def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None):
         """Sharding along axis 0, bias not sharded"""
+        if not hasattr(self, 'tp_group'):
+            self.tp_group = parallel_state.get_tensor_model_parallel_group()
         state_dict = self.state_dict(prefix="", keep_vars=True)
         return make_sharded_tensors_for_checkpoint(
-            state_dict, prefix, {"weight": 0}, sharded_offsets
+            state_dict,
+            prefix,
+            {"weight": 0},
+            sharded_offsets,
+            tp_group=self.tp_group,
+            dp_cp_group=metadata["dp_cp_group"],
         )
 
 
@@ -385,6 +394,7 @@ def __init__(
             D_cp1=self.D,
             D_has_hdim=self.D_has_hdim,
         )
+        self.tp_group = pg_collection.tp
 
     def forward(
         self,
@@ -796,6 +806,9 @@ def _get_states_from_cache(self, inference_context, batch_size, *, inference_par
 
     def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None):
         """Provide a sharded state dictionary for distributed checkpointing."""
+        # Guard for cases metadata is not provided
+        metadata = ensure_metadata_has_dp_cp_group(metadata)
+
         sharded_state_dict = {}
         # Parameters
         self._save_to_state_dict(sharded_state_dict, "", keep_vars=True)
@@ -815,12 +828,17 @@ def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None):
                 # Add TP sharding for Conv1d
                 module_sd = module.state_dict(prefix="", keep_vars=True)
                 module_sharded_sd = make_sharded_tensors_for_checkpoint(
-                    module_sd, f"{prefix}{name}.", {f"weight": 0, f"bias": 0}, sharded_offsets
+                    module_sd,
+                    f"{prefix}{name}.",
+                    {f"weight": 0, f"bias": 0},
+                    sharded_offsets,
+                    tp_group=self.tp_group,
+                    dp_cp_group=metadata['dp_cp_group'],
                 )
 
             else:
                 module_sharded_sd = sharded_state_dict_default(
-                    module, f"{prefix}{name}.", sharded_offsets, metadata
+                    module, f"{prefix}{name}.", sharded_offsets, metadata, tp_group=self.tp_group
                 )
 
             sharded_state_dict.update(module_sharded_sd)
diff --git a/megatron/core/tensor_parallel/layers.py b/megatron/core/tensor_parallel/layers.py
index e79d55b9fa3..221f3327e50 100644
--- a/megatron/core/tensor_parallel/layers.py
+++ b/megatron/core/tensor_parallel/layers.py
@@ -320,6 +320,8 @@ def sharded_state_dict(
                 key=weight_prefix,
                 allow_shape_mismatch=True,
                 prepend_offsets=sharded_offsets,
+                tp_group=self.tp_group,
+                dp_cp_group=metadata["dp_cp_group"],
             )
         }
 
@@ -1064,7 +1066,12 @@ def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None):
         """Sharding along axis 0, bias sharded"""
         state_dict = self.state_dict(prefix="", keep_vars=True)
         return make_sharded_tensors_for_checkpoint(
-            state_dict, prefix, {"weight": 0, "bias": 0}, sharded_offsets
+            state_dict,
+            prefix,
+            {"weight": 0, "bias": 0},
+            sharded_offsets,
+            tp_group=self.tp_group,
+            dp_cp_group=metadata['dp_cp_group'],
         )
 
     def set_extra_state(self, state: Any):
@@ -1310,7 +1317,12 @@ def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None):
         """Sharding along axis 1, bias not sharded"""
         state_dict = self.state_dict(prefix="", keep_vars=True)
         return make_sharded_tensors_for_checkpoint(
-            state_dict, prefix, {"weight": 1}, sharded_offsets
+            state_dict,
+            prefix,
+            {"weight": 1},
+            sharded_offsets,
+            tp_group=self.tp_group,
+            dp_cp_group=metadata['dp_cp_group'],
         )
 
     def set_extra_state(self, state: Any):
diff --git a/megatron/core/transformer/attention.py b/megatron/core/transformer/attention.py
index 9426fc5535b..be52fe10d20 100644
--- a/megatron/core/transformer/attention.py
+++ b/megatron/core/transformer/attention.py
@@ -176,6 +176,7 @@ def __init__(
                 pg_collection, 'cp'
             ), "Attention pg_collection must have cp process group"
         self.pg_collection = pg_collection
+        self.tp_group = pg_collection.tp
 
         # Per attention head and per partition values
         world_size = get_pg_size(self.pg_collection.tp)
diff --git a/megatron/core/transformer/dot_product_attention.py b/megatron/core/transformer/dot_product_attention.py
index f3711c86ebd..32f9a08cfa1 100644
--- a/megatron/core/transformer/dot_product_attention.py
+++ b/megatron/core/transformer/dot_product_attention.py
@@ -71,6 +71,8 @@ def __init__(
             assert hasattr(
                 pg_collection, 'tp'
             ), "DotProductAttention pg_collection must have tp process group"
+        self.pg_collection = pg_collection
+        self.tp_group = self.pg_collection.tp
 
         world_size = pg_collection.tp.size()
         self.hidden_size_per_partition = divide(projection_size, world_size)
@@ -260,5 +262,10 @@ def sharded_state_dict(
         else:
             state_dict = {}
         return make_sharded_tensors_for_checkpoint(
-            state_dict, prefix, {'softmax_offset': 0}, sharded_offsets
+            state_dict,
+            prefix,
+            {'softmax_offset': 0},
+            sharded_offsets,
+            tp_group=self.tp_group,
+            dp_cp_group=metadata['dp_cp_group'],
         )
diff --git a/megatron/core/transformer/mlp.py b/megatron/core/transformer/mlp.py
index 9602beb2f71..8dcf196da94 100644
--- a/megatron/core/transformer/mlp.py
+++ b/megatron/core/transformer/mlp.py
@@ -87,7 +87,7 @@ def __init__(
 
         self.input_size = input_size if input_size != None else self.config.hidden_size
 
-        tp_group = get_tensor_model_parallel_group_if_none(tp_group, is_expert=is_expert)
+        self.tp_group = get_tensor_model_parallel_group_if_none(tp_group, is_expert=is_expert)
         if ffn_hidden_size is None:
             if is_expert:
                 raise ValueError("MoE MLP requires `ffn_hidden_size`, but it was not provided.")
diff --git a/megatron/core/transformer/module.py b/megatron/core/transformer/module.py
index 4fdcacb791b..1058a207b12 100644
--- a/megatron/core/transformer/module.py
+++ b/megatron/core/transformer/module.py
@@ -11,6 +11,7 @@
 from megatron.core.dist_checkpointing.mapping import ShardedStateDict
 from megatron.core.transformer.transformer_config import TransformerConfig
 from megatron.core.transformer.utils import (
+    ensure_metadata_has_dp_cp_group,
     make_sharded_tensors_for_checkpoint,
     sharded_state_dict_default,
 )
@@ -77,13 +78,26 @@ def sharded_state_dict(
         sharded_state_dict = {}
         # Save parameters
         self._save_to_state_dict(sharded_state_dict, '', keep_vars=True)
+        if not hasattr(self, 'tp_group'):
+            # some model interface hasn't updated for m4, fallback needed
+            tp_group = parallel_state.get_tensor_model_parallel_group()
+        else:
+            tp_group = self.tp_group
+        # Guard for cases metadata is not provided
+        metadata = ensure_metadata_has_dp_cp_group(metadata)
         sharded_state_dict = make_sharded_tensors_for_checkpoint(
-            sharded_state_dict, prefix, sharded_offsets=sharded_offsets
+            sharded_state_dict,
+            prefix,
+            sharded_offsets=sharded_offsets,
+            tp_group=tp_group,
+            dp_cp_group=metadata['dp_cp_group'],
         )
         # Recurse into submodules
         for name, module in self.named_children():
             sharded_state_dict.update(
-                sharded_state_dict_default(module, f'{prefix}{name}.', sharded_offsets, metadata)
+                sharded_state_dict_default(
+                    module, f'{prefix}{name}.', sharded_offsets, metadata, tp_group=tp_group
+                )
             )
         return sharded_state_dict
 
diff --git a/megatron/core/transformer/moe/experts.py b/megatron/core/transformer/moe/experts.py
index ca308da0d21..8bb5caddc4b 100644
--- a/megatron/core/transformer/moe/experts.py
+++ b/megatron/core/transformer/moe/experts.py
@@ -3,7 +3,7 @@
 import copy
 import itertools
 from copy import deepcopy
-from functools import partial, wraps
+from functools import partial
 from math import ceil
 from typing import Optional, Tuple
 
@@ -11,7 +11,7 @@
 import torch.nn.functional as F
 from torch.nn.parameter import Parameter
 
-from megatron.core import parallel_state, tensor_parallel
+from megatron.core import tensor_parallel
 from megatron.core.activations import squared_relu
 from megatron.core.dist_checkpointing import ShardedTensor
 from megatron.core.dist_checkpointing.mapping import (
@@ -45,6 +45,7 @@
 from megatron.core.transformer.spec_utils import build_module
 from megatron.core.transformer.transformer_config import TransformerConfig
 from megatron.core.transformer.utils import (
+    ensure_metadata_has_dp_cp_group,
     make_sharded_object_for_checkpoint,
     sharded_state_dict_default,
 )
@@ -61,49 +62,6 @@
     HAVE_TE = False
 
 
-# TODO(Hepteract): delete the usage of the global parallel_state.
-# Currently we still have to use the global parallel_state in expert_dist_ckpt_decorator(),
-# in order to set sub-module's process group while getting sharded_state_dict.
-# After sub-module's refactoring is done, we can pass pg_collection to sub-module
-# and delete the function expert_dist_ckpt_decorator.
-def expert_dist_ckpt_decorator(func):
-    """Decorator of shared_state_dict in expert layer for distributed checkpoint.
-
-    Since !1940, the TP size for Expert layer can be different with Attention.
-    To make distributed checkpoint work in such cases, we use a decorator to
-    replace the default TP parallel states with expert-TP parallel states.
-    """
-
-    @wraps(func)
-    def wrapper(*args, **kwargs):
-        # Store original states
-        original_rank = parallel_state._MPU_TENSOR_MODEL_PARALLEL_RANK
-        original_size = parallel_state._MPU_TENSOR_MODEL_PARALLEL_WORLD_SIZE
-        original_group = parallel_state._TENSOR_MODEL_PARALLEL_GROUP
-        try:
-            # Set new states
-            parallel_state._MPU_TENSOR_MODEL_PARALLEL_RANK = (
-                parallel_state.get_expert_tensor_parallel_rank()
-            )
-            parallel_state._MPU_TENSOR_MODEL_PARALLEL_WORLD_SIZE = (
-                parallel_state.get_expert_tensor_parallel_world_size()
-            )
-            parallel_state._TENSOR_MODEL_PARALLEL_GROUP = (
-                parallel_state.get_expert_tensor_parallel_group()
-            )
-
-            # Execute the function
-            result = func(*args, **kwargs)
-        finally:
-            # Restore original states
-            parallel_state._MPU_TENSOR_MODEL_PARALLEL_RANK = original_rank
-            parallel_state._MPU_TENSOR_MODEL_PARALLEL_WORLD_SIZE = original_size
-            parallel_state._TENSOR_MODEL_PARALLEL_GROUP = original_group
-        return result
-
-    return wrapper
-
-
 class GroupedMLP(MegatronModule):
     """An efficient implementation of the Experts layer using GroupedGEMM.
 
@@ -330,7 +288,6 @@ def forward(
 
         return fc2_output, None
 
-    @expert_dist_ckpt_decorator
     def sharded_state_dict(self, prefix='', sharded_offsets=(), metadata=None):
         """
         Maps local expert to global experts.
@@ -789,6 +746,7 @@ def __init__(
         ), "bias_dropout_fusion is not supported in TEGroupedMLP when add_bias_linear=True"
 
         self.ep_group = pg_collection.ep
+        self.tp_group = pg_collection.expt_tp
 
         # Double the output width with gated linear unit, see https://arxiv.org/pdf/2002.05202.pdf
         ffn_hidden_size = self.config.moe_ffn_hidden_size
@@ -1023,7 +981,6 @@ def glu(x):
 
         return output, output_bias
 
-    @expert_dist_ckpt_decorator
     def sharded_state_dict(
         self, prefix: str = '', sharded_offsets: tuple = (), metadata: Optional[dict] = None
     ) -> ShardedStateDict:
@@ -1031,10 +988,14 @@ def sharded_state_dict(
         Maps local expert to global experts.
         The sharded state dict is interchangable with SequentialMLP's.
         """
+        # Guard for cases metadata is not provided
+        metadata = ensure_metadata_has_dp_cp_group(metadata)
         singleton_local_shards = (metadata or {}).get('singleton_local_shards', False)
         sharded_state_dict = {}
         for name, module in self._modules.items():
-            sub_sd = sharded_state_dict_default(module, f'{name}.', sharded_offsets, metadata)
+            sub_sd = sharded_state_dict_default(
+                module, f'{name}.', sharded_offsets, metadata, tp_group=self.tp_group
+            )
             if name == 'linear_fc1' and self.config.gated_linear_unit:
                 num_global_experts = self.ep_group.size() * self.num_local_experts
                 local_expert_indices_offset = self.ep_group.rank() * self.num_local_experts
@@ -1098,6 +1059,7 @@ def __init__(
         self.num_local_experts = num_local_experts
         self.local_experts = torch.nn.ModuleList()
         self.ep_group = pg_collection.ep
+        self.tp_group = pg_collection.expt_tp
         # use pg_collection.expt_dp_group as data parallel group in this module.
         # TODO (Hepteract): expt_dp wont be needed here once distributed checkpoint is refactored
         self.dp_group = pg_collection.expt_dp
@@ -1193,9 +1155,11 @@ def backward_dw(self):
         for expert in self.local_experts:
             expert.backward_dw()
 
-    @expert_dist_ckpt_decorator
     def sharded_state_dict(self, prefix='', sharded_offsets=(), metadata=None):
         """Maps local expert to global experts."""
+        # Guard for cases metadata is not provided
+        metadata = ensure_metadata_has_dp_cp_group(metadata)
+
         sharded_state_dict = {}
         num_global_experts = self.ep_group.size() * self.num_local_experts
         local_expert_indices_offset = self.ep_group.rank() * self.num_local_experts
diff --git a/megatron/core/transformer/moe/moe_layer.py b/megatron/core/transformer/moe/moe_layer.py
index 893b2e7b99a..095e6526934 100644
--- a/megatron/core/transformer/moe/moe_layer.py
+++ b/megatron/core/transformer/moe/moe_layer.py
@@ -127,7 +127,7 @@ def __init__(
 
         # Initialize router
         self.router = TopKRouter(config=self.config, pg_collection=pg_collection)
-
+        self.tp_group = pg_collection.tp
         # Initialize token dispatcher
         if config.moe_token_dispatcher_type == "allgather":
             self.token_dispatcher = MoEAllGatherTokenDispatcher(
diff --git a/megatron/core/transformer/moe/shared_experts.py b/megatron/core/transformer/moe/shared_experts.py
index 93e6ad04531..c63e074e1b1 100644
--- a/megatron/core/transformer/moe/shared_experts.py
+++ b/megatron/core/transformer/moe/shared_experts.py
@@ -49,7 +49,7 @@ def __init__(
 
         config.ffn_hidden_size = config.moe_shared_expert_intermediate_size
         # TODO(Hepteract): pass pg_collection to MLP after refactoring MLP
-        super().__init__(config=config, submodules=submodules)
+        super().__init__(config=config, submodules=submodules, tp_group=pg_collection.tp)
 
         self.use_shared_expert_gate = gate
         if self.use_shared_expert_gate:
@@ -137,7 +137,11 @@ def sharded_state_dict(
             state_dict = self.state_dict(prefix='', keep_vars=True)
             sub_sd = {
                 f'{prefix}{name}': make_sharded_tensor_for_checkpoint(
-                    state_dict[name], f'{prefix}{name}', prepend_offsets=sharded_offsets
+                    state_dict[name],
+                    f'{prefix}{name}',
+                    prepend_offsets=sharded_offsets,
+                    tp_group=self.tp_group,
+                    dp_cp_group=metadata['dp_cp_group'],
                 )
             }
             sharded_state_dict.update(sub_sd)
diff --git a/megatron/core/transformer/multi_token_prediction.py b/megatron/core/transformer/multi_token_prediction.py
index d8d20039e45..94fbfb23677 100755
--- a/megatron/core/transformer/multi_token_prediction.py
+++ b/megatron/core/transformer/multi_token_prediction.py
@@ -28,6 +28,7 @@
 from megatron.core.transformer.transformer_config import TransformerConfig
 from megatron.core.transformer.transformer_layer import get_transformer_layer_offset
 from megatron.core.utils import (
+    get_pg_rank,
     is_torch_min_version,
     make_tp_sharded_tensor_for_checkpoint,
     make_viewless_tensor,
@@ -56,7 +57,11 @@
 
 
 def tie_word_embeddings_state_dict(
-    sharded_state_dict: ShardedStateDict, word_emb_weight: Tensor, word_emb_weight_key: str
+    sharded_state_dict: ShardedStateDict,
+    word_emb_weight: Tensor,
+    word_emb_weight_key: str,
+    tp_group: torch.distributed.ProcessGroup,
+    dp_cp_group: torch.distributed.ProcessGroup,
 ) -> None:
     """tie the embedding of the mtp processing stage in a given sharded state dict.
 
@@ -64,13 +69,15 @@ def tie_word_embeddings_state_dict(
         sharded_state_dict (ShardedStateDict): state dict with the weight to tie.
         word_emb_weight (Tensor): weight of the word embedding.
         word_emb_weight_key (str): key of the word embedding in the sharded state dict.
+        tp_group (torch.distributed.ProcessGroup): The tensor parallel group
+        dp_cp_group (torch.distributed.ProcessGroup): The dp-cp comm group
 
     Returns: None, acts in-place
     """
     mtp_word_emb_replica_id = (
         1,  # copy of embedding in pre processing stage
         0,
-        parallel_state.get_data_parallel_rank(with_context_parallel=True),
+        get_pg_rank(dp_cp_group),
     )
     assert word_emb_weight_key in sharded_state_dict
     del sharded_state_dict[word_emb_weight_key]
@@ -79,11 +86,17 @@ def tie_word_embeddings_state_dict(
         key=word_emb_weight_key,
         replica_id=mtp_word_emb_replica_id,
         allow_shape_mismatch=True,
+        tp_group=tp_group,
+        dp_cp_group=dp_cp_group,
     )
 
 
 def tie_output_layer_state_dict(
-    sharded_state_dict: ShardedStateDict, output_layer_weight: Tensor, output_layer_weight_key: str
+    sharded_state_dict: ShardedStateDict,
+    output_layer_weight: Tensor,
+    output_layer_weight_key: str,
+    tp_group: torch.distributed.ProcessGroup,
+    dp_cp_group: torch.distributed.ProcessGroup,
 ) -> None:
     """tie the output layer of the mtp processing stage in a given sharded state dict.
 
@@ -91,13 +104,15 @@ def tie_output_layer_state_dict(
         sharded_state_dict (ShardedStateDict): state dict with the weight to tie.
         output_layer_weight (Tensor): weight of the output layer.
         output_layer_weight_key (str): key of the output layer in the sharded state dict.
+        tp_group (torch.distributed.ProcessGroup): The tensor parallel group
+        dp_cp_group (torch.distributed.ProcessGroup): The dp-cp comm group
 
     Returns: None, acts in-place
     """
     mtp_output_layer_replica_id = (
         1,  # copy of output layer in post processing stage
         0,
-        parallel_state.get_data_parallel_rank(with_context_parallel=True),
+        get_pg_rank(dp_cp_group),
     )
     assert output_layer_weight_key in sharded_state_dict
     del sharded_state_dict[output_layer_weight_key]
@@ -106,6 +121,8 @@ def tie_output_layer_state_dict(
         key=output_layer_weight_key,
         replica_id=mtp_output_layer_replica_id,
         allow_shape_mismatch=True,
+        tp_group=tp_group,
+        dp_cp_group=dp_cp_group,
     )
 
 
diff --git a/megatron/core/transformer/transformer_block.py b/megatron/core/transformer/transformer_block.py
index c3187350c43..6f69927e9e8 100755
--- a/megatron/core/transformer/transformer_block.py
+++ b/megatron/core/transformer/transformer_block.py
@@ -284,6 +284,7 @@ def __init__(
         if pg_collection is None:
             pg_collection = ProcessGroupCollection.use_mpu_process_groups()
         self.pg_collection = pg_collection
+        self.tp_group = pg_collection.tp
 
         pp_group = self.pg_collection.pp if hasattr(self.pg_collection, 'pp') else None
         pp_rank = get_pg_rank(pp_group)
@@ -851,7 +852,11 @@ def sharded_state_dict(
             if not module is self.layers:
                 sharded_state_dict.update(
                     sharded_state_dict_default(
-                        module, f'{prefix}{name}.', sharded_offsets, metadata
+                        module,
+                        f'{prefix}{name}.',
+                        sharded_offsets,
+                        metadata,
+                        tp_group=self.tp_group,
                     )
                 )
 
diff --git a/megatron/core/transformer/transformer_layer.py b/megatron/core/transformer/transformer_layer.py
index cacfb9d01b8..c322788af2f 100644
--- a/megatron/core/transformer/transformer_layer.py
+++ b/megatron/core/transformer/transformer_layer.py
@@ -273,6 +273,7 @@ def __init__(
         if pg_collection is None:
             pg_collection = ProcessGroupCollection.use_mpu_process_groups()
         self.pg_collection = pg_collection
+        self.tp_group = pg_collection.tp
 
         self.submodules_config = submodules
         self.layer_number = layer_number + get_transformer_layer_offset(
diff --git a/megatron/core/transformer/utils.py b/megatron/core/transformer/utils.py
index 373c06f0991..880c5309933 100644
--- a/megatron/core/transformer/utils.py
+++ b/megatron/core/transformer/utils.py
@@ -10,6 +10,8 @@
 from megatron.core.dist_checkpointing.mapping import ShardedObject, ShardedStateDict, StateDict
 from megatron.core.jit import jit_fuser
 from megatron.core.utils import (
+    get_pg_rank,
+    get_tensor_model_parallel_group_if_none,
     make_sharded_tensor_for_checkpoint,
     make_tp_sharded_tensor_for_checkpoint,
 )
@@ -76,6 +78,8 @@ def make_sharded_tensors_for_checkpoint(
     tensor_parallel_layers_axis_map: Optional[Dict[str, int]] = None,
     sharded_offsets: Iterable[Tuple[int, int, int]] = (),
     extra_state_suffix: str = '_extra_state',
+    tp_group: Optional[torch.distributed.ProcessGroup] = None,
+    dp_cp_group: Optional[torch.distributed.ProcessGroup] = None,
 ):
     """Wraps tensors from transformer layers with ShardedTensor or ShardedObject.
 
@@ -93,31 +97,52 @@ def make_sharded_tensors_for_checkpoint(
             applied (e.g. PP related), passed along to ShardedTensor
         extra_state_suffix (str, default = '_extra_state'): layers with this
             suffix will be wrapped with ShardedObject instead of ShardedTensor.
+        tp_group (Optional[torch.distributed.ProcessGroup], optional): tensor parallel group.
+            If None, defaults to parallel_state.get_tensor_model_parallel_group()
+        dp_cp_group (Optional[torch.distributed.ProcessGroup], optional): data parallel group
+            with context parallel. If None, defaults to
+            parallel_state.get_data_parallel_group(with_context_parallel=True)
 
     """
 
     if tensor_parallel_layers_axis_map is None:
         tensor_parallel_layers_axis_map = {}
 
+    if tp_group is None and dp_cp_group is None:
+        tp_group = get_tensor_model_parallel_group_if_none(tp_group)
+        dp_cp_group = parallel_state.get_data_parallel_group(with_context_parallel=True)
+
     sharded_state_dict = {}
     for layer_name in state_dict.keys():
         tensor = state_dict[layer_name]
         layer_key = f'{prefix}{layer_name}'
 
         if layer_name.endswith(extra_state_suffix):
+            # Compute replica_id when groups are provided
+            replica_id = (0, get_pg_rank(tp_group), get_pg_rank(dp_cp_group))
+
             sharded_state_dict[layer_key] = make_sharded_object_for_checkpoint(
-                tensor, layer_key, sharded_offsets
+                tensor, layer_key, sharded_offsets, replica_id=replica_id
             )
 
         elif layer_name in tensor_parallel_layers_axis_map:
             tp_axis = tensor_parallel_layers_axis_map[layer_name]
             sharded_state_dict[layer_key] = make_tp_sharded_tensor_for_checkpoint(
-                tensor, layer_key, tp_axis, prepend_offsets=sharded_offsets
+                tensor,
+                layer_key,
+                tp_axis,
+                prepend_offsets=sharded_offsets,
+                tp_group=tp_group,
+                dp_cp_group=dp_cp_group,
             )
 
         else:
             sharded_state_dict[layer_key] = make_sharded_tensor_for_checkpoint(
-                tensor, layer_key, prepend_offsets=sharded_offsets
+                tensor,
+                layer_key,
+                prepend_offsets=sharded_offsets,
+                tp_group=tp_group,
+                dp_cp_group=dp_cp_group,
             )
 
     return sharded_state_dict
@@ -166,11 +191,27 @@ def _get_extra_state_offsets(
     return extra_state_shape, extra_state_offset
 
 
+def ensure_metadata_has_dp_cp_group(metadata: Optional[dict]) -> dict:
+    """Ensure `metadata` is a dict containing `dp_cp_group` entry.
+
+    If `metadata` is None, a new dict is returned with `dp_cp_group` set.
+    If `metadata` is a dict and missing `dp_cp_group`, it is updated in-place.
+    Otherwise, asserts that `dp_cp_group` exists.
+    """
+    if metadata is None:
+        return {'dp_cp_group': parallel_state.get_data_parallel_group(with_context_parallel=True)}
+    assert isinstance(metadata, dict), "metadata must be a dict with dp_cp_group as key"
+    if 'dp_cp_group' not in metadata:
+        metadata['dp_cp_group'] = parallel_state.get_data_parallel_group(with_context_parallel=True)
+    return metadata
+
+
 def sharded_state_dict_default(
     module: torch.nn.Module,
     prefix: str = '',
     sharded_offsets: Tuple[Tuple[int, int, int]] = (),
     metadata: Optional[dict] = None,
+    tp_group: Optional[torch.distributed.ProcessGroup] = None,
 ) -> ShardedStateDict:
     """Provides implementation for sharded_state_dict method for non-MegatronModules.
 
@@ -186,11 +227,16 @@ def sharded_state_dict_default(
         sharded_offsets (Tuple[Tuple[int, int, int]], optional): sharding already
             applied (e.g. PP related) by sup-modules. Passed along to ShardedTensor
         metadata (dict, optional): metadata passed to module sharded_state_dict method
+        tp_group (Optional[torch.distributed.ProcessGroup], optional): tensor parallel group.
+            If None, defaults to parallel_state.get_tensor_model_parallel_group()
 
     Returns:
         dict: dictionary of state dict keys mapped to ShardedTensors
     """
 
+    # Guard for cases metadata is not provided
+    metadata = ensure_metadata_has_dp_cp_group(metadata)
+
     if hasattr(module, 'sharded_state_dict'):
         module_sharded_sd = module.sharded_state_dict(
             prefix=prefix, sharded_offsets=sharded_offsets, metadata=metadata
@@ -198,7 +244,12 @@ def sharded_state_dict_default(
     else:
         module_sd = module.state_dict(prefix='', keep_vars=True)
         module_sharded_sd = make_sharded_tensors_for_checkpoint(
-            module_sd, prefix, {}, sharded_offsets
+            module_sd,
+            prefix,
+            {},
+            sharded_offsets,
+            tp_group=tp_group,
+            dp_cp_group=metadata['dp_cp_group'],
         )
     return module_sharded_sd
 
diff --git a/megatron/core/utils.py b/megatron/core/utils.py
index abfaf7f6320..9947b8da683 100644
--- a/megatron/core/utils.py
+++ b/megatron/core/utils.py
@@ -793,15 +793,42 @@ def make_tp_sharded_tensor_for_checkpoint(
     is sharded across TP group.
 
     Optionally, can provide offsets which prepend new dimensions to the tensor.
+
+    Args:
+        tensor: Tensor to shard
+        key: Key for the sharded tensor
+        tp_axis: Axis to shard across tensor parallel group (default: 0)
+        replica_id: Replica ID for the tensor (default: None)
+        prepend_offsets: Offsets to prepend to tensor dimensions (default: ())
+        **kwargs: Additional arguments. May include:
+            - tp_group: Tensor parallel group
+            - dp_cp_group: Data parallel + context parallel group
     """
+    # Pop group parameters from kwargs
+    tp_group = kwargs.pop('tp_group', None)
+    dp_cp_group = kwargs.pop('dp_cp_group', None)
+    # If there are any additional kwargs left, surface them for visibility
+    # (these will be forwarded to ShardedTensor.from_rank_offsets).
+    if kwargs:
+        logger.warning(
+            "make_tp_sharded_tensor_for_checkpoint received extra kwargs: %s", list(kwargs.keys())
+        )
+
     prepend_axis_num = len(prepend_offsets)
 
     new_offsets = []
-    tp_rank = parallel_state.get_tensor_model_parallel_rank()
-    dp_rank = parallel_state.get_data_parallel_rank(with_context_parallel=True)
-    tp_size = parallel_state.get_tensor_model_parallel_world_size()
-    dp_size = parallel_state.get_data_parallel_world_size(with_context_parallel=True)
-    dp_replica_id = parallel_state.get_data_parallel_rank(with_context_parallel=True)
+
+    # Get groups with fallback to parallel_state
+    if tp_group is None and dp_cp_group is None:
+        tp_group = parallel_state.get_tensor_model_parallel_group()
+        dp_cp_group = parallel_state.get_data_parallel_group(with_context_parallel=True)
+
+    # Use local get_pg_rank and get_pg_size functions
+    tp_rank = get_pg_rank(tp_group)
+    dp_rank = get_pg_rank(dp_cp_group)
+    tp_size = get_pg_size(tp_group)
+    dp_size = get_pg_size(dp_cp_group)
+    dp_replica_id = get_pg_rank(dp_cp_group)
 
     new_offsets.append((tp_axis + prepend_axis_num, tp_rank, tp_size))
 
@@ -837,14 +864,39 @@ def make_sharded_tensor_for_checkpoint(tensor, key, prepend_offsets=(), replica_
     """Helper for instantiating a non-sharded ShardedTensor (replicated across TP and DP group).
 
     Optionally, can provide offsets which prepend new dimensions to the tensor.
+
+    Keyword Args:
+        tensor: Tensor to create sharded tensor for
+        key: Key for the sharded tensor
+        prepend_offsets: Offsets to prepend to tensor dimensions (default: ())
+        replica_id: Replica ID for the tensor (default: None)
+        **kwargs: Additional arguments. May include:
+            - tp_group: Tensor parallel group
+            - dp_cp_group: Data parallel + context parallel group
     """
+    # Pop group parameters from kwargs
+    tp_group = kwargs.pop('tp_group', None)
+    dp_cp_group = kwargs.pop('dp_cp_group', None)
+    # If there are any additional kwargs left, surface them for visibility
+    # (these will be forwarded to ShardedTensor.from_rank_offsets).
+    if kwargs:
+        logger.warning(
+            "make_sharded_tensor_for_checkpoint received extra kwargs: %s", list(kwargs.keys())
+        )
 
     prepend_axis_num = len(prepend_offsets)
 
     new_offsets = []
-    dp_rank = parallel_state.get_data_parallel_rank(with_context_parallel=True)
-    dp_size = parallel_state.get_data_parallel_world_size(with_context_parallel=True)
-    dp_replica_id = parallel_state.get_data_parallel_rank(with_context_parallel=True)
+
+    # Get groups with fallback to parallel_state
+    if tp_group is None and dp_cp_group is None:
+        tp_group = parallel_state.get_tensor_model_parallel_group()
+        dp_cp_group = parallel_state.get_data_parallel_group(with_context_parallel=True)
+
+    # Use local get_pg_rank and get_pg_size functions
+    dp_rank = get_pg_rank(dp_cp_group)
+    dp_size = get_pg_size(dp_cp_group)
+    dp_replica_id = get_pg_rank(dp_cp_group)
 
     if HAVE_DTENSOR and isinstance(tensor, DTensor):
         # FSDP2 sharding
@@ -853,7 +905,7 @@ def make_sharded_tensor_for_checkpoint(tensor, key, prepend_offsets=(), replica_
         new_offsets.append((prepend_axis_num, dp_rank, dp_size))
 
     if replica_id is None:
-        replica_id = (0, parallel_state.get_tensor_model_parallel_rank(), dp_replica_id)
+        replica_id = (0, get_pg_rank(tp_group), dp_replica_id)
 
     return ShardedTensor.from_rank_offsets(
         key,
diff --git a/megatron/training/checkpointing.py b/megatron/training/checkpointing.py
index 743ebd915cf..11afad31a27 100644
--- a/megatron/training/checkpointing.py
+++ b/megatron/training/checkpointing.py
@@ -17,6 +17,7 @@
 
 import numpy as np
 import torch
+from typing import Optional, Union, List, Dict, Any
 from torch.distributed.checkpoint import FileSystemReader, default_planner
 
 from megatron.core import dist_checkpointing, mpu, tensor_parallel
@@ -28,11 +29,13 @@
 )
 from megatron.core.msc_utils import MultiStorageClientFeature, open_file
 from megatron.core.num_microbatches_calculator import update_num_microbatches
+from megatron.core.utils import get_pg_rank, get_pg_size
 from megatron.core.optimizer import DistributedOptimizer
 from megatron.core.rerun_state_machine import get_rerun_state_machine
 from megatron.core.utils import get_torch_version, is_torch_min_version
 
 from ..core.dist_checkpointing.serialization import get_default_save_sharded_strategy
+from ..core.dist_checkpointing.utils import _clean_metadata_for_serialization
 from . import ft_integration, wandb_utils
 from .async_utils import is_empty_async_queue, schedule_async_save
 from .global_vars import get_args
@@ -306,7 +309,7 @@ def read_metadata(tracker_filename):
     return max_iter, release
 
 
-def get_rng_state(ckpt_format: str):
+def get_rng_state(ckpt_format: str, tp_group: torch.distributed.ProcessGroup, pp_group: torch.distributed.ProcessGroup) -> Union[List[Dict[str, Any]], ShardedObject]:
     """Collect rng state across data parallel ranks."""
     args = get_args()
     rng_state = {
@@ -329,10 +332,10 @@ def get_rng_state(ckpt_format: str):
         rng_state_list = [rng_state]
 
     if ckpt_format == "torch_dist":
-        pp_rank = mpu.get_pipeline_model_parallel_rank()
-        pp_size = mpu.get_pipeline_model_parallel_world_size()
-        tp_rank = mpu.get_tensor_model_parallel_rank()
-        tp_size = mpu.get_tensor_model_parallel_world_size()
+        pp_rank = get_pg_rank(pp_group)
+        pp_size = get_pg_size(pp_group)
+        tp_rank = get_pg_rank(tp_group)
+        tp_size = get_pg_size(tp_group)
         rng_state_list = ShardedObject('rng_state', rng_state_list, (pp_size, tp_size), (pp_rank, tp_rank),
                                        replica_id=mpu.get_data_parallel_rank(with_context_parallel=True))
     elif ckpt_format == "fsdp_dtensor":
@@ -351,7 +354,8 @@ class CheckpointType(Enum):
     TORCH_DCP = auto()
     FSDP_DTENSOR = auto()
 
-def _build_sharded_state_dict_metadata(args: Namespace) -> dict:
+
+def _build_sharded_state_dict_metadata(args: Namespace, dp_cp_group: Optional[torch.distributed.ProcessGroup] = None) -> dict:
     """Builds metadata used for sharded_state_dict versioning.
 
     The whole content metadata is passed to ``shared_state_dict`` model and optimizer methods
@@ -361,6 +365,10 @@ def _build_sharded_state_dict_metadata(args: Namespace) -> dict:
     In particular, a simple integer (or SemVer) versioning flag (e.g. `metadata['version'] = 3.4`)
     is discouraged, because the metadata serves for all models and optimizers and it's practically
     impossible to enforce a linearly increasing versioning for this whole space.
+
+    Args:
+        args: Arguments namespace
+        dp_cp_group: Data parallel + context parallel group (default: None, falls back to mpu API)
     """
     metadata = {}
 
@@ -389,11 +397,15 @@ def _build_sharded_state_dict_metadata(args: Namespace) -> dict:
 
     metadata['singleton_local_shards'] = False
     metadata['chained_optim_avoid_prefix'] = True
+    # Add dp_cp_group to metadata. If not provided, fallback to global parallel state.
+    if dp_cp_group is None:
+        dp_cp_group = mpu.get_data_parallel_group(with_context_parallel=True)
+    metadata['dp_cp_group'] = dp_cp_group
     return metadata
 
 def save_checkpoint(iteration, model, optimizer, opt_param_scheduler, num_floating_point_operations_so_far,
                     checkpointing_context=None, pipeline_rank=None, expert_rank=None, tensor_rank=None, pipeline_parallel=None, expert_parallel=None, non_persistent_ckpt=False,
-                    train_data_iterator=None, preprocess_common_state_dict_fn = None, release=False):
+                    train_data_iterator=None, preprocess_common_state_dict_fn = None, release=False, tp_group: Optional[torch.distributed.ProcessGroup] = None, pp_group: Optional[torch.distributed.ProcessGroup] = None, dp_cp_group: Optional[torch.distributed.ProcessGroup] = None):
     """Save a model, optimizer and optionally dataloader checkpoint.
 
     Checkpointing context is used to persist some checkpointing state
@@ -407,6 +419,9 @@ def save_checkpoint(iteration, model, optimizer, opt_param_scheduler, num_floati
 
     Dataloader checkpoint is only saved if the dataloader supports it. Currently this applies only
     to the Megatron Energon dataloader (multimodal) and not the built-in Megatron dataloader (text-only).
+
+    Args:
+        dp_cp_group: Data parallel + context parallel group (default: None, falls back to mpu API)
     """
     start_ckpt = time()
     args = get_args()
@@ -450,7 +465,10 @@ def save_checkpoint(iteration, model, optimizer, opt_param_scheduler, num_floati
         iteration, save_dir, ckpt_format))
 
     # Collect rng state across data parallel ranks.
-    rng_state = get_rng_state(args.ckpt_format)
+    if tp_group is None and pp_group is None:
+        tp_group = mpu.get_tensor_model_parallel_group()
+        pp_group = mpu.get_pipeline_model_parallel_group()
+    rng_state = get_rng_state(args.ckpt_format, tp_group, pp_group)
 
     # Collect rerun state across all ranks
     rerun_state_machine = get_rerun_state_machine()
@@ -493,7 +511,7 @@ def save_checkpoint(iteration, model, optimizer, opt_param_scheduler, num_floati
             or mpu.get_expert_data_parallel_rank() == 0 \
             or ckpt_type != CheckpointType.LEGACY:
         if ckpt_type != CheckpointType.LEGACY:
-            sharded_sd_metadata = _build_sharded_state_dict_metadata(args)
+            sharded_sd_metadata = _build_sharded_state_dict_metadata(args, dp_cp_group=dp_cp_group)
             if args.use_distributed_optimizer:
                 print_rank_0(f'Storing distributed optimizer sharded state of type'
                              f' {sharded_sd_metadata["distrib_optim_sharding_type"]}')
@@ -545,7 +563,7 @@ def save_checkpoint(iteration, model, optimizer, opt_param_scheduler, num_floati
                                                          async_sharded_save=args.async_save,
                                                          validate_access_integrity=validate_sharding_integrity,
                                                          preprocess_common_before_consistancy_check=preprocess_common_state_dict_fn,
-                                                         content_metadata=sharded_sd_metadata)
+                                                         content_metadata=_clean_metadata_for_serialization(sharded_sd_metadata))
             # [ModelOpt]: save sharded modelopt_state
             if has_nvidia_modelopt:
                 save_sharded_modelopt_state(model, checkpoint_name, (args.ckpt_format, 1))
@@ -806,7 +824,13 @@ def generate_state_dict(
             key = f"model{i}"
 
         if args.ckpt_format == "torch_dist":
-            model_sd = model[i].sharded_state_dict(**(model_sd_kwargs or {}))
+            model_sd = model[i].sharded_state_dict(
+                **(model_sd_kwargs or {
+                    "metadata": {
+                        "dp_cp_group": mpu.get_data_parallel_group(with_context_parallel=True)
+                    }
+                })
+            )
         else:   # torch, torch_dcp, fsdp_dtensor
             model_sd = model[i].state_dict_for_save_checkpoint()
 
@@ -815,10 +839,16 @@ def generate_state_dict(
     # Optimizer stuff.
     if not args.no_save_optim:
         if optimizer is not None and not optimizer.is_stub_optimizer:
-            optimizer_sd = None
 
             if args.ckpt_format == "torch_dist":
-                optimizer_sd = optimizer.sharded_state_dict(state_dict, **(optim_sd_kwargs or {}))
+                optimizer_sd = optimizer.sharded_state_dict(
+                    state_dict,
+                    **(optim_sd_kwargs or {
+                        "metadata": {
+                            "dp_cp_group": mpu.get_data_parallel_group(with_context_parallel=True)
+                        }
+                    })
+                )
             elif args.ckpt_format == "fsdp_dtensor":
                 if optim_sd_kwargs is None:
                     optim_sd_kwargs = {}
@@ -1361,7 +1391,7 @@ def _set_arg(arg_name, old_arg_name=None, force=False):
 
 
 def load_checkpoint(ddp_model, optimizer, opt_param_scheduler, load_arg='load', strict=True,
-                    checkpointing_context=None, skip_load_to_model_and_opt=False):
+                    checkpointing_context=None, skip_load_to_model_and_opt=False, tp_group: Optional[torch.distributed.ProcessGroup] = None, pp_group: Optional[torch.distributed.ProcessGroup] = None, dp_cp_group: Optional[torch.distributed.ProcessGroup] = None):
     """Load a model checkpoint and return the iteration.
     strict (bool): whether to strictly enforce that the keys in
         :attr:`state_dict` of the checkpoint match the names of
@@ -1369,6 +1399,7 @@ def load_checkpoint(ddp_model, optimizer, opt_param_scheduler, load_arg='load',
     skip_load_to_model_and_opt (bool): whether to call `load_state_dict`
         for :attr:`model` and :attr:`optimizer`. In case of running FSDP2 with mcore distributed
         checkpointing, the tensors are already loaded in-place by `_load_base_checkpoint`.
+    dp_cp_group: Data parallel + context parallel group (default: None, falls back to mpu API)
     """
     args = get_args()
     load_dir = getattr(args, load_arg)
@@ -1442,7 +1473,10 @@ def load_checkpoint(ddp_model, optimizer, opt_param_scheduler, load_arg='load',
         # Determine if RNG state will be loaded
         if (ckpt_tp_pp == run_tp_pp and not release and not args.finetune and not args.no_load_rng
                 and not getattr(ckpt_args, 'no_save_rng', False)):
-            gen_sd_rng_state = get_rng_state(args.ckpt_format)  # we can load the rng state
+            if tp_group is None and pp_group is None:
+                tp_group = mpu.get_tensor_model_parallel_group()
+                pp_group = mpu.get_pipeline_model_parallel_group()
+            gen_sd_rng_state = get_rng_state(args.ckpt_format, tp_group, pp_group)  # we can load the rng state
         else:
             ignore_rng_state = True
             gen_sd_rng_state = None
@@ -1454,6 +1488,7 @@ def load_checkpoint(ddp_model, optimizer, opt_param_scheduler, load_arg='load',
         else:
             sharded_sd_metadata = dist_checkpointing.load_content_metadata(preloaded_state_dict=state_dict)
         print_rank_0(f'sharded_state_dict metadata loaded from the checkpoint: {sharded_sd_metadata}')
+
         # Determine if optimizer state will be loaded
         if (not release and not args.finetune and not args.no_load_optim
                 and not getattr(ckpt_args, 'no_save_optim', False)):
@@ -1487,6 +1522,15 @@ def load_checkpoint(ddp_model, optimizer, opt_param_scheduler, load_arg='load',
             gen_sd_optim = None
             gen_sd_opt_param_scheduler = None
 
+        if dp_cp_group is None:
+            dp_cp_group = mpu.get_data_parallel_group(with_context_parallel=True)
+
+        # dist_checkpointing.load_content_metadata(...) may return None.
+        # Ensure we have a dict before updating to avoid NoneType AttributeError.
+        if sharded_sd_metadata is None:
+            sharded_sd_metadata = {}
+        sharded_sd_metadata["dp_cp_group"] = dp_cp_group
+
         optim_sd_kwargs = dict(metadata=sharded_sd_metadata, is_loading=True)
         model_sd_kwargs = dict(metadata=sharded_sd_metadata)
 
@@ -1528,12 +1572,15 @@ def load_checkpoint(ddp_model, optimizer, opt_param_scheduler, load_arg='load',
     elif args.ckpt_format == "torch_dcp":
         model_sd = model[0].state_dict()
         optimizer_sd = optimizer.state_dict(is_loading=True)
+        if tp_group is None and pp_group is None:
+            tp_group = mpu.get_tensor_model_parallel_group()
+            pp_group = mpu.get_pipeline_model_parallel_group()
         sharded_state_dict = {
             "model": model_sd,
             "optimizer": optimizer_sd,
             "args": None,
             "iteration": 1,
-            "rng_state": get_rng_state(args.ckpt_format),
+            "rng_state": get_rng_state(args.ckpt_format, tp_group, pp_group),
             "checkpoint_version": None,
             "opt_param_scheduler": opt_param_scheduler.state_dict(),
             "num_floating_point_operations_so_far": 0,
@@ -1556,7 +1603,7 @@ def load_checkpoint(ddp_model, optimizer, opt_param_scheduler, load_arg='load',
                     data_iterator=None, ckpt_format=ckpt_format, force=True,
                 )
             if not args.no_load_rng:
-                gen_sd_rng_state = get_rng_state(args.ckpt_format)
+                gen_sd_rng_state = get_rng_state(args.ckpt_format, tp_group, pp_group)
             if not args.no_load_optim:
                 gen_sd_optim = optimizer
                 gen_sd_opt_param_scheduler = opt_param_scheduler
diff --git a/tests/unit_tests/dist_checkpointing/models/common.py b/tests/unit_tests/dist_checkpointing/models/common.py
index 31b5d9db3c9..8cb1dc4df65 100644
--- a/tests/unit_tests/dist_checkpointing/models/common.py
+++ b/tests/unit_tests/dist_checkpointing/models/common.py
@@ -91,7 +91,8 @@ def common_test_parallel_reconfiguration_e2e(
         save(gpt_model_A.sharded_state_dict(metadata=metadata), ckpt_dir_A, save_strategy)
         regular_state_dict_A = gpt_model_A.state_dict()
         Utils.destroy_model_parallel()
-
+        if metadata is not None:
+            metadata.pop("dp_cp_group")
         # Load checkpoint A with different TP/PP and save as checkpoint B
         # No FPS this time, only FPL
         Utils.initialize_model_parallel(*dest_tp_pp, **(dst_tp_pp_kwargs or {}), order=store_order)
diff --git a/tests/unit_tests/dist_checkpointing/models/test_mamba.py b/tests/unit_tests/dist_checkpointing/models/test_mamba.py
index ff2c6309977..85fbe5dd045 100644
--- a/tests/unit_tests/dist_checkpointing/models/test_mamba.py
+++ b/tests/unit_tests/dist_checkpointing/models/test_mamba.py
@@ -130,6 +130,8 @@ def test_parallel_reconfiguration_e2e(
                 )
             save(sharded_state_dict, ckpt_dir_A, save_strategy)
             Utils.destroy_model_parallel()
+            if metadata is not None:
+                metadata.pop("dp_cp_group")
 
             # Load checkpoint A with different TP/PP/expert/CP and save as checkpoint B
             # No FPS this time, only FPL
diff --git a/tests/unit_tests/dist_checkpointing/models/test_mlp_glu.py b/tests/unit_tests/dist_checkpointing/models/test_mlp_glu.py
index 18cfbf67cee..0970e2adc8a 100644
--- a/tests/unit_tests/dist_checkpointing/models/test_mlp_glu.py
+++ b/tests/unit_tests/dist_checkpointing/models/test_mlp_glu.py
@@ -71,6 +71,9 @@ def test_parallel_reconfiguration_e2e(
             save(mlp_A.sharded_state_dict(prefix=layer_prefix, metadata=metadata), ckpt_dir_A)
             Utils.destroy_model_parallel()
 
+            if "dp_cp_group" in metadata.keys():
+                del metadata["dp_cp_group"]
+
             # Load checkpoint A with different TP/PP and save as checkpoint B
             Utils.initialize_model_parallel(*dest_tp_pp)
             mlp_B = initialize_mlp()
diff --git a/tests/unit_tests/dist_checkpointing/models/test_moe_experts.py b/tests/unit_tests/dist_checkpointing/models/test_moe_experts.py
index b116d2cb603..ca546d746af 100644
--- a/tests/unit_tests/dist_checkpointing/models/test_moe_experts.py
+++ b/tests/unit_tests/dist_checkpointing/models/test_moe_experts.py
@@ -190,6 +190,9 @@ def test_parallel_reconfiguration_e2e(
             save(sharded_state_dict, ckpt_dir_A, save_strategy)
             Utils.destroy_model_parallel()
 
+            if "dp_cp_group" in metadata.keys():
+                del metadata["dp_cp_group"]
+
             # Load checkpoint A with different TP/PP/EP and save as checkpoint B
             # No FPS this time, only FPL
             Utils.initialize_model_parallel(
@@ -276,6 +279,9 @@ def test_sequential_grouped_mlp_interchangeable(
             save(sharded_state_dict, ckpt_dir_A, save_strategy)
             Utils.destroy_model_parallel()
 
+            if "dp_cp_group" in metadata.keys():
+                del metadata["dp_cp_group"]
+
             Utils.initialize_model_parallel(dest_tp, dest_pp, expert_model_parallel_size=dest_exp)
             model_B = initialize_expert_layer(1, use_glu, expert_type=dest_module)
             load_strategy = None
@@ -351,6 +357,9 @@ def test_sequential_grouped_mlp_extra_state(
             save(sharded_state_dict, ckpt_dir_A, save_strategy)
             Utils.destroy_model_parallel()
 
+            if "dp_cp_group" in metadata.keys():
+                del metadata["dp_cp_group"]
+
             Utils.initialize_model_parallel(dest_tp, dest_pp, expert_model_parallel_size=dest_exp)
             load_strategy = None
 
diff --git a/tests/unit_tests/dist_checkpointing/test_local.py b/tests/unit_tests/dist_checkpointing/test_local.py
index 1b8597e1f1c..5ce3422c726 100644
--- a/tests/unit_tests/dist_checkpointing/test_local.py
+++ b/tests/unit_tests/dist_checkpointing/test_local.py
@@ -26,6 +26,7 @@
     LocalCheckpointManager,
 )
 
+from megatron.core import parallel_state
 from megatron.core.dist_checkpointing import ShardedTensor
 from megatron.core.dist_checkpointing.dict_utils import diff
 from megatron.core.dist_checkpointing.mapping import ShardedBase, ShardedTensorFactory
@@ -78,7 +79,11 @@ def test_sharded_tensors(self, tp, pp, use_torch_fsdp2):
         opt_param_scheduler = None
         rng_state = None
         iteration = None
-        optim_sd_kwargs = dict(sharding_type='fully_sharded_model_space')
+        metadata = dict(
+            dp_cp_group=parallel_state.get_data_parallel_group(with_context_parallel=True)
+        )
+        model_sd_kwargs = dict(metadata=metadata)
+        optim_sd_kwargs = dict(sharding_type='fully_sharded_model_space', metadata=metadata)
         mock_args = parse_args(ignore_unknown_args=True)
         mock_args.no_save_optim = False
         mock_args.no_save_rng = True
@@ -91,6 +96,7 @@ def test_sharded_tensors(self, tp, pp, use_torch_fsdp2):
             opt_param_scheduler,
             rng_state,
             iteration=iteration,
+            model_sd_kwargs=model_sd_kwargs,
             optim_sd_kwargs=optim_sd_kwargs,
         )
         sharded_tensor_factories = find_matching_values(
diff --git a/tests/unit_tests/post_training/test_modelopt_module_spec.py b/tests/unit_tests/post_training/test_modelopt_module_spec.py
index f27a22390f7..ec80fcb1a72 100644
--- a/tests/unit_tests/post_training/test_modelopt_module_spec.py
+++ b/tests/unit_tests/post_training/test_modelopt_module_spec.py
@@ -6,7 +6,7 @@
 import torch
 from packaging.version import Version
 
-from megatron.core import dist_checkpointing
+from megatron.core import dist_checkpointing, parallel_state
 from megatron.core.inference.contexts import StaticInferenceContext
 from megatron.core.models.gpt.gpt_layer_specs import (
     get_gpt_decoder_block_spec,
@@ -92,8 +92,11 @@ def setup_method(self, method):
     def test_sharded_state_dict_restore(self, tmp_path_dist_ckpt):
         """Save with the default TE spec and restore using the ModelOpt spec."""
         _dist_checkpoint_name = "default_model"
-        te_fused_sharded_state_dict = self.default_model.sharded_state_dict()
-        modelopt_sharded_state_dict = self.modelopt_model.sharded_state_dict()
+        metadata = {
+            "dp_cp_group": parallel_state.get_data_parallel_group(with_context_parallel=True)
+        }
+        te_fused_sharded_state_dict = self.default_model.sharded_state_dict(metadata=metadata)
+        modelopt_sharded_state_dict = self.modelopt_model.sharded_state_dict(metadata=metadata)
 
         with TempNamedDir(tmp_path_dist_ckpt / _dist_checkpoint_name, sync=True) as tmpdirname:
             dist_checkpointing.save(te_fused_sharded_state_dict, tmpdirname)

From 842758449936838a7bfffd3b41e565b85e6c4851 Mon Sep 17 00:00:00 2001
From: Boxiang Wang <boxiangw@nvidia.com>
Date: Wed, 12 Nov 2025 21:04:27 -0800
Subject: [PATCH 124/334] [Dev]Revert torch ckpt format change for
 LayerwiseDistOpt (#2228)

Signed-off-by: Boxiang Wang <boxiangw@nvidia.com>
---
 megatron/core/optimizer/layer_wise_optimizer.py | 11 +++++++++++
 megatron/training/checkpointing.py              | 15 ++++++++++++++-
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/megatron/core/optimizer/layer_wise_optimizer.py b/megatron/core/optimizer/layer_wise_optimizer.py
index 64eac03d626..e43601d90fd 100644
--- a/megatron/core/optimizer/layer_wise_optimizer.py
+++ b/megatron/core/optimizer/layer_wise_optimizer.py
@@ -268,3 +268,14 @@ def sharded_state_dict(
                     group['step'] = step
 
         return sharded_state_dict
+
+    def save_state_dict_to_file(self, filename: str) -> None:
+        """Save the parameter state of the optimizer. For torch format only.
+        Args:
+            filename: The filename to save the parameter state.
+        """
+        torch.save(super().state_dict(), filename)
+
+    def load_state_dict_from_file(self, filename: str) -> None:
+        """Load the parameter state of the optimizer. For torch format only."""
+        super().load_state_dict(torch.load(filename))
diff --git a/megatron/training/checkpointing.py b/megatron/training/checkpointing.py
index 11afad31a27..feacccba162 100644
--- a/megatron/training/checkpointing.py
+++ b/megatron/training/checkpointing.py
@@ -496,6 +496,14 @@ def save_checkpoint(iteration, model, optimizer, opt_param_scheduler, num_floati
         ensure_directory_exists(optim_checkpoint_name)
         if not optimizer.is_stub_optimizer:
             optimizer.save_parameter_state(optim_checkpoint_name)
+    
+    # LayerWiseDistributedOptimizer save optimizer state to file on different ranks
+    if getattr(args, "optimizer", "adam").startswith("dist_") and args.ckpt_format == 'torch':
+        dp_rank = mpu.get_data_parallel_rank()
+        optim_checkpoint_name = os.path.join(os.path.dirname(checkpoint_name), f"layer_wise_optimizer_{dp_rank}.pt")
+        ensure_directory_exists(optim_checkpoint_name)
+        if not optimizer.is_stub_optimizer:
+            optimizer.save_state_dict_to_file(optim_checkpoint_name)
 
     async_save_request = None
     if args.async_save:
@@ -1700,7 +1708,12 @@ def load_model_state_dict(module, state_dict, strict: bool):
     if not release and not args.finetune and not args.no_load_optim:
         try:
             # Load state dict.
-            if not skip_load_to_model_and_opt and optimizer is not None and not optimizer.is_stub_optimizer:
+            if getattr(args, "optimizer", "adam").startswith("dist_") and args.ckpt_format == 'torch':
+                # LayerWiseDistributedOptimizer load optimizer state from file on different ranks
+                dp_rank = mpu.get_data_parallel_rank()
+                optim_checkpoint_name = os.path.join(os.path.dirname(checkpoint_name), f"layer_wise_optimizer_{dp_rank}.pt")
+                optimizer.load_state_dict_from_file(optim_checkpoint_name)
+            elif not skip_load_to_model_and_opt and optimizer is not None and not optimizer.is_stub_optimizer:
                 optimizer.load_state_dict(state_dict['optimizer'])
 
             # Load distributed optimizer's custom parameter state.

From 7020e1f536118e3f2409ad2c683c163bed00ea29 Mon Sep 17 00:00:00 2001
From: Boxiang Wang <boxiangw@nvidia.com>
Date: Wed, 12 Nov 2025 21:21:22 -0800
Subject: [PATCH 125/334] [Dev] Add more tests for LayerwiseDistOpt with
 dist_ckpt (#2132)

Signed-off-by: Boxiang Wang <boxiangw@nvidia.com>
---
 .../model_config.yaml                         |  66 ++
 .../model_config.yaml                         |  67 ++
 tests/test_utils/recipes/moe.yaml             |  10 +
 .../test_layer_wise_optimizer.py              | 603 ++++++++++++++++++
 tests/unit_tests/dist_checkpointing/utils.py  |  88 ++-
 5 files changed, 815 insertions(+), 19 deletions(-)
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_muon/model_config.yaml
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_muon/model_config.yaml
 create mode 100644 tests/unit_tests/dist_checkpointing/test_layer_wise_optimizer.py

diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_muon/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_muon/model_config.yaml
new file mode 100644
index 00000000000..1d0ef19232e
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_muon/model_config.yaml
@@ -0,0 +1,66 @@
+ENV_VARS:
+  CUDA_DEVICE_MAX_CONNECTIONS: 1
+  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
+  NCCL_ALGO: Ring
+  CUBLAS_WORKSPACE_CONFIG: :4096:8
+MODEL_ARGS:
+  --num-layers: 12
+  --hidden-size: 512
+  --num-attention-heads: 8
+  --log-params-norm: true
+  --log-num-zeros-in-grad: true
+  --log-validation-ppl-to-tensorboard: true
+  --log-timers-to-tensorboard: true
+  --tensorboard-dir: ${TENSORBOARD_PATH}
+  --micro-batch-size: 4
+  --global-batch-size: 32
+  --seq-length: 1024
+  --max-position-embeddings: 1024
+  --disable-bias-linear: true
+  --train-iters: 100
+  --timing-log-level: 0
+  --lr-decay-iters: 320000
+  --save: ${CHECKPOINT_SAVE_PATH}
+  --load: ${CHECKPOINT_LOAD_PATH}
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
+  --split: 949,50,1
+  --distributed-backend: nccl
+  --lr: 0.00015
+  --lr-decay-style: cosine
+  --min-lr: 1.0e-5
+  --weight-decay: 1e-2
+  --clip-grad: 1.0
+  --lr-warmup-fraction: .01
+  --log-interval: 1
+  --save-interval: 50
+  --eval-interval: 1000
+  --eval-iters: 10
+  --transformer-impl: transformer_engine
+  --tensor-model-parallel-size: 1
+  --pipeline-model-parallel-size: 1
+  --expert-model-parallel-size: 8
+  --num-experts: 8
+  --moe-token-dispatcher-type: allgather
+  --moe-router-load-balancing-type: aux_loss
+  --moe-router-topk: 2
+  --moe-router-dtype: fp32
+  --moe-ffn-hidden-size: 1024
+  --moe-grouped-gemm: true
+  --ckpt-fully-parallel-load: true
+  --deterministic-mode: true
+  --no-gradient-accumulation-fusion: true
+  --attention-softmax-in-fp32: true
+  --use-checkpoint-opt_param-scheduler: true
+  --use-mcore-models: true
+  --ckpt-format: torch_dist
+  --data-cache-path: ${DATA_CACHE_PATH}
+  --bf16: true
+  --no-bias-gelu-fusion: true
+  --log-memory-to-tensorboard: true
+  --optimizer: dist_muon
+  --muon-momentum: 0.9
+  --muon-extra-scale-factor: 0.2
+  --muon-scale-mode: spectral
+TEST_TYPE: ckpt-resume
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_muon/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_muon/model_config.yaml
new file mode 100644
index 00000000000..81b023bd86e
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_muon/model_config.yaml
@@ -0,0 +1,67 @@
+ENV_VARS:
+  CUDA_DEVICE_MAX_CONNECTIONS: 1
+  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
+  NCCL_ALGO: Ring
+  CUBLAS_WORKSPACE_CONFIG: :4096:8
+MODEL_ARGS:
+  --num-layers: 12
+  --hidden-size: 512
+  --num-attention-heads: 8
+  --log-params-norm: true
+  --log-num-zeros-in-grad: true
+  --log-validation-ppl-to-tensorboard: true
+  --log-timers-to-tensorboard: true
+  --tensorboard-dir: ${TENSORBOARD_PATH}
+  --micro-batch-size: 4
+  --global-batch-size: 32
+  --seq-length: 1024
+  --max-position-embeddings: 1024
+  --disable-bias-linear: true
+  --train-iters: 100
+  --timing-log-level: 0
+  --lr-decay-iters: 320000
+  --save: ${CHECKPOINT_SAVE_PATH}
+  --load: ${CHECKPOINT_LOAD_PATH}
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
+  --split: 949,50,1
+  --distributed-backend: nccl
+  --lr: 0.00015
+  --lr-decay-style: cosine
+  --min-lr: 1.0e-5
+  --weight-decay: 1e-2
+  --clip-grad: 1.0
+  --lr-warmup-fraction: .01
+  --log-interval: 1
+  --save-interval: 50
+  --eval-interval: 1000
+  --eval-iters: 10
+  --transformer-impl: transformer_engine
+  --tensor-model-parallel-size: 1
+  --pipeline-model-parallel-size: 1
+  --expert-model-parallel-size: 8
+  --num-experts: 8
+  --moe-token-dispatcher-type: allgather
+  --moe-router-load-balancing-type: aux_loss
+  --moe-router-topk: 2
+  --moe-router-dtype: fp32
+  --moe-ffn-hidden-size: 1024
+  --moe-grouped-gemm: true
+  --ckpt-fully-parallel-load: true
+  --deterministic-mode: true
+  --no-gradient-accumulation-fusion: true
+  --attention-softmax-in-fp32: true
+  --use-checkpoint-opt_param-scheduler: true
+  --use-mcore-models: true
+  --ckpt-format: torch_dist
+  --ckpt-assume-constant-structure: true
+  --data-cache-path: ${DATA_CACHE_PATH}
+  --bf16: true
+  --no-bias-gelu-fusion: true
+  --log-memory-to-tensorboard: true
+  --optimizer: muon
+  --muon-momentum: 0.9
+  --muon-extra-scale-factor: 0.2
+  --muon-scale-mode: spectral
+TEST_TYPE: ckpt-resume
diff --git a/tests/test_utils/recipes/moe.yaml b/tests/test_utils/recipes/moe.yaml
index 94790f27ba2..ebba7e0dbf6 100644
--- a/tests/test_utils/recipes/moe.yaml
+++ b/tests/test_utils/recipes/moe.yaml
@@ -121,6 +121,16 @@ products:
       - environment: [dev]
         scope: [mr, mr-github]
         platforms: [dgx_h100]
+  - test_case: [gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_muon]
+    products:
+      - environment: [dev]
+        scope: [mr, mr-github, mr-slim]
+        platforms: [dgx_h100]
+  - test_case: [gpt3_moe_mcore_te_ep8_resume_torch_dist_muon]
+    products:
+      - environment: [dev]
+        scope: [mr, mr-github, mr-slim]
+        platforms: [dgx_h100]
   - test_case: [gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading]
     products:
       - environment: [dev]
diff --git a/tests/unit_tests/dist_checkpointing/test_layer_wise_optimizer.py b/tests/unit_tests/dist_checkpointing/test_layer_wise_optimizer.py
new file mode 100644
index 00000000000..69fc37bb773
--- /dev/null
+++ b/tests/unit_tests/dist_checkpointing/test_layer_wise_optimizer.py
@@ -0,0 +1,603 @@
+# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+
+from copy import deepcopy
+from functools import partial
+from unittest import mock
+
+import pytest
+import torch
+
+from megatron.core import parallel_state
+from megatron.core.dist_checkpointing import load, save
+from megatron.core.dist_checkpointing.dict_utils import nested_values
+from megatron.core.models.gpt.gpt_layer_specs import get_gpt_decoder_block_spec
+from megatron.core.models.gpt.gpt_layer_specs import (
+    get_gpt_layer_with_transformer_engine_spec as gpt_te_spec,
+)
+from megatron.core.models.gpt.gpt_model import GPTModel
+from megatron.core.optimizer import ChainedOptimizer
+from megatron.core.optimizer.layer_wise_optimizer import LayerWiseDistributedOptimizer
+from megatron.core.process_groups_config import ProcessGroupCollection
+from megatron.core.tensor_parallel import model_parallel_cuda_manual_seed
+from megatron.core.transformer import MLATransformerConfig, TransformerConfig
+from megatron.core.utils import get_pg_size
+from megatron.training.arguments import parse_args
+from megatron.training.checkpointing import load_checkpoint, save_checkpoint
+from tests.unit_tests.dist_checkpointing import (
+    TempNamedDir,
+    init_basic_mock_args,
+    init_checkpointing_mock_args,
+    initialize_gpt_model,
+    setup_model_and_optimizer,
+    setup_moe_model_and_optimizer,
+)
+from tests.unit_tests.test_utilities import Utils
+
+
+def check_equal(input_1, input_2):
+    """Check if two inputs are equal, used for checking checkpointing."""
+    if isinstance(input_1, dict) and isinstance(input_2, dict):
+        assert input_1.keys() == input_2.keys()
+        for key in input_1.keys():
+            check_equal(input_1[key], input_2[key])
+    elif isinstance(input_1, list) and isinstance(input_2, list):
+        assert len(input_1) == len(input_2)
+        for i in range(len(input_1)):
+            check_equal(input_1[i], input_2[i])
+    elif isinstance(input_1, torch.Tensor) and isinstance(input_2, torch.Tensor):
+        assert torch.all(input_1 == input_2), f"Input 1: {input_1} != Input 2: {input_2}"
+    elif type(input_1) != type(input_2):
+        assert False, f"Input 1 type: {type(input_1)} != Input 2 type: {type(input_2)}"
+    else:
+        assert input_1 == input_2, f"Input 1: {input_1} != Input 2: {input_2}"
+
+
+def initialize_real_model(
+    seed,
+    pre_process,
+    post_process,
+    vp_stage=None,
+    is_moe=False,
+    is_mla=False,
+    virtual_pipeline_model_parallel_size=None,
+    **config_kwargs,
+):
+    torch.manual_seed(seed)
+    model_parallel_cuda_manual_seed(seed)
+
+    default_config_kwargs = dict(
+        num_layers=6,
+        hidden_size=16,
+        num_attention_heads=8,
+        use_cpu_initialization=True,
+        pipeline_dtype=torch.bfloat16,
+        bf16=True,
+        virtual_pipeline_model_parallel_size=virtual_pipeline_model_parallel_size,
+    )
+    if is_moe:
+        default_config_kwargs["moe_ffn_hidden_size"] = 128
+        default_config_kwargs["num_moe_experts"] = 4
+        default_config_kwargs["add_bias_linear"] = False
+        # Pop unused fields
+        config_kwargs.pop("use_sp")
+        config_kwargs.pop("use_te")
+        config_kwargs.pop("use_grouped_mlp")
+        config_kwargs.pop("use_glu")
+    if is_mla:
+        default_config_kwargs["multi_latent_attention"] = True
+        default_config_kwargs["q_lora_rank"] = 96
+        default_config_kwargs["kv_lora_rank"] = 512
+        default_config_kwargs["qk_head_dim"] = 64
+        default_config_kwargs["qk_pos_emb_head_dim"] = 32
+        default_config_kwargs["v_head_dim"] = 64
+    default_config_kwargs.update(**config_kwargs)
+    config_cls = MLATransformerConfig if is_mla else TransformerConfig
+    transformer_config = config_cls(**default_config_kwargs)
+
+    if is_moe:
+        layer_spec = get_gpt_decoder_block_spec(
+            transformer_config, use_transformer_engine=True, vp_stage=vp_stage
+        )
+    else:
+        layer_spec = gpt_te_spec(multi_latent_attention=is_mla)
+    this_model = GPTModel(
+        config=transformer_config,
+        transformer_layer_spec=layer_spec,
+        vocab_size=128,
+        max_sequence_length=4,
+        pre_process=pre_process,
+        post_process=post_process,
+        vp_stage=vp_stage,
+    )
+
+    return this_model
+
+
+def load_checkpoint_no_arg_checks(*args, **kwargs):
+    with mock.patch('megatron.training.checkpointing.check_checkpoint_args'):
+        with mock.patch('megatron.training.checkpointing.update_num_microbatches'):
+            return load_checkpoint(*args, **kwargs)
+
+
+class TestLayerWiseOptimizer:
+    """Tests for LayerWiseDistributedOptimizer functionality."""
+
+    def setup_method(self, method):
+        pass
+
+    def teardown_method(self, method):
+        Utils.destroy_model_parallel()
+
+    def test_parameter_sharding(self):
+        """Test that parameters are correctly sharded across DP ranks."""
+        Utils.initialize_model_parallel(1, 1)
+
+        model, optimizer = setup_model_and_optimizer(
+            seed=2,
+            tp=1,
+            pp=1,
+            bf16=True,
+            dist_opt=False,
+            initialize_fn=initialize_gpt_model,
+            optimizer='dist_muon',
+        )
+
+        # Check if optimizer is ChainedOptimizer (expected for standard setup)
+        if isinstance(optimizer, ChainedOptimizer):
+            total_params = sum(
+                len(group['params'])
+                for opt in optimizer.chained_optimizers
+                for group in opt.param_groups
+            )
+            assert total_params > 0, "No parameters found in optimizer"
+
+    @pytest.mark.parametrize('tp', [1, 2, 4])
+    @pytest.mark.parametrize('pp', [1, 2, 4])
+    def test_broadcast_params(self, tp, pp):
+        """Test that parameter broadcasting works correctly across DP ranks."""
+        if tp * pp > 8:
+            pytest.skip(f"TP*PP > 8 is larger than world size")
+
+        Utils.initialize_model_parallel(tp, pp)
+
+        model, optimizer = setup_model_and_optimizer(
+            seed=2,
+            tp=tp,
+            pp=pp,
+            bf16=True,
+            dist_opt=False,
+            initialize_fn=initialize_gpt_model,
+            optimizer='dist_muon',
+        )
+
+        # If this is a LayerWiseDistributedOptimizer, test broadcast
+        if isinstance(optimizer, LayerWiseDistributedOptimizer):
+            # Store original param values
+            original_params = {}
+            for name, param in model[0].named_parameters():
+                original_params[name] = param.data.clone()
+
+            # Call broadcast (should be idempotent if no updates)
+            optimizer.broadcast_params()
+
+            # Check params are unchanged after broadcast without step
+            for name, param in model[0].named_parameters():
+                assert torch.allclose(param.data, original_params[name])
+
+    @pytest.mark.parametrize('tp', [1, 2, 4])
+    @pytest.mark.parametrize('pp', [1, 2, 4])
+    @pytest.mark.parametrize('bf16', [True, False])
+    def test_layer_wise_optimizer_save_load(self, tmp_path_dist_ckpt, tp, pp, bf16):
+        """Test save/load of LayerWiseDistributedOptimizer checkpoints."""
+        if tp * pp > 8:
+            pytest.skip(f"TP*PP > 8 is larger than world size")
+
+        Utils.initialize_model_parallel(tp, pp)
+
+        with TempNamedDir(
+            tmp_path_dist_ckpt / 'test_layer_wise_optimizer_A', sync=True
+        ) as ckpt_dir_A:
+            with TempNamedDir(
+                tmp_path_dist_ckpt / 'test_layer_wise_optimizer_B', sync=True
+            ) as ckpt_dir_B:
+                # Create model and optimizer A
+                model_A, optimizer_A = setup_model_and_optimizer(
+                    seed=2,
+                    tp=tp,
+                    pp=pp,
+                    bf16=bf16,
+                    dist_opt=False,
+                    initialize_fn=initialize_gpt_model,
+                    optimizer='dist_muon',
+                )
+
+                # Save checkpoint A
+                model_sharded_sd_A = model_A[0].sharded_state_dict()
+                optim_sd_A = optimizer_A.sharded_state_dict(model_sharded_sd_A)
+                save(optim_sd_A, ckpt_dir_A)
+
+                # Create model and optimizer B with different seed
+                model_B, optimizer_B = setup_model_and_optimizer(
+                    seed=3,
+                    tp=tp,
+                    pp=pp,
+                    bf16=bf16,
+                    dist_opt=False,
+                    initialize_fn=initialize_gpt_model,
+                    optimizer='dist_muon',
+                )
+
+                # Load checkpoint A into optimizer B
+                model_sharded_sd_B = model_B[0].sharded_state_dict()
+                load_sharded_sd = optimizer_B.sharded_state_dict(
+                    model_sharded_sd_B, is_loading=True
+                )
+                state_dict = load(load_sharded_sd, ckpt_dir_A)
+                optimizer_B.load_state_dict(state_dict)
+
+                # Save as checkpoint B
+                optim_sd_B = optimizer_B.sharded_state_dict(model_sharded_sd_B)
+                save(optim_sd_B, ckpt_dir_B)
+
+                Utils.destroy_model_parallel()
+
+                # Compare checkpoints
+                Utils.initialize_model_parallel(1, 1)
+                from megatron.core.dist_checkpointing import load_plain_tensors
+
+                plain_sd_A = load_plain_tensors(ckpt_dir_A)
+                plain_sd_B = load_plain_tensors(ckpt_dir_B)
+
+                check_equal(plain_sd_A, plain_sd_B)
+
+    @pytest.mark.parametrize('tp', [1, 2, 4])
+    @pytest.mark.parametrize('pp', [1, 2, 4])
+    def test_layer_wise_optimizer_grad_norm(self, tp, pp):
+        """Test that gradient norm calculation works correctly."""
+        if tp * pp > 8:
+            pytest.skip(f"TP*PP > 8 is larger than world size")
+
+        Utils.initialize_model_parallel(tp, pp)
+
+        model, optimizer = setup_model_and_optimizer(
+            seed=2,
+            tp=tp,
+            pp=pp,
+            bf16=True,
+            dist_opt=False,
+            initialize_fn=initialize_gpt_model,
+            optimizer='dist_muon',
+        )
+
+        # Create dummy gradients
+        for param in model[0].parameters():
+            if param.requires_grad:
+                param.grad = torch.randn_like(param.data)
+
+        # Test grad norm calculation
+        if isinstance(optimizer, LayerWiseDistributedOptimizer):
+            grad_norm = optimizer.get_grad_norm()
+            assert grad_norm is not None
+            assert grad_norm >= 0
+
+    @pytest.mark.parametrize('tp', [1, 2, 4])
+    @pytest.mark.parametrize('pp', [1, 2, 4])
+    def test_layer_wise_optimizer_count_zeros(self, tp, pp):
+        """Test that zero counting in gradients works correctly."""
+        if tp * pp > 8:
+            pytest.skip(f"TP*PP > 8 is larger than world size")
+
+        Utils.initialize_model_parallel(tp, pp)
+
+        model, optimizer = setup_model_and_optimizer(
+            seed=2,
+            tp=tp,
+            pp=pp,
+            bf16=True,
+            dist_opt=False,
+            initialize_fn=initialize_gpt_model,
+            optimizer='dist_muon',
+        )
+
+        # Create dummy gradients with some zeros
+        for param in model[0].parameters():
+            if param.requires_grad:
+                grad = torch.randn_like(param.data)
+                # Set some values to zero
+                grad[grad < 0] = 0
+                param.grad = grad
+
+        # Test zero counting
+        if isinstance(optimizer, LayerWiseDistributedOptimizer):
+            num_zeros = optimizer.count_zeros()
+            assert num_zeros >= 0
+
+    @pytest.mark.parametrize('src_tp', [1, 2, 4])
+    @pytest.mark.parametrize('src_pp', [1, 2, 4])
+    @pytest.mark.parametrize('dest_tp', [1, 2, 4])
+    @pytest.mark.parametrize('dest_pp', [1, 2, 4])
+    def test_layer_wise_optimizer_resharding(
+        self, tmp_path_dist_ckpt, src_tp, src_pp, dest_tp, dest_pp
+    ):
+        """Test resharding of LayerWiseDistributedOptimizer across different TP/PP."""
+        if src_tp * src_pp > 8:
+            pytest.skip(f"SRC_TP*SRC_PP > 8 is larger than world size")
+
+        if dest_tp * dest_pp > 8:
+            pytest.skip(f"DEST_TP*DEST_PP > 8 is larger than world size")
+
+        Utils.initialize_model_parallel(src_tp, src_pp)
+
+        with TempNamedDir(
+            tmp_path_dist_ckpt / 'test_layer_wise_resharding_A', sync=True
+        ) as ckpt_dir:
+            # Create and save with source configuration
+            model_A, optimizer_A = setup_model_and_optimizer(
+                seed=2,
+                tp=src_tp,
+                pp=src_pp,
+                bf16=True,
+                dist_opt=False,
+                initialize_fn=initialize_gpt_model,
+                optimizer='dist_muon',
+            )
+
+            model_sharded_sd = model_A[0].sharded_state_dict()
+            optim_sd = optimizer_A.sharded_state_dict(model_sharded_sd)
+            save(optim_sd, ckpt_dir)
+
+            Utils.destroy_model_parallel()
+
+            # Load with destination configuration
+            Utils.initialize_model_parallel(dest_tp, dest_pp)
+            model_B, optimizer_B = setup_model_and_optimizer(
+                seed=3,
+                tp=dest_tp,
+                pp=dest_pp,
+                bf16=True,
+                dist_opt=False,
+                initialize_fn=initialize_gpt_model,
+                optimizer='dist_muon',
+            )
+
+            model_sharded_sd = model_B[0].sharded_state_dict()
+            load_sharded_sd = optimizer_B.sharded_state_dict(model_sharded_sd, is_loading=True)
+
+            state_dict = load(load_sharded_sd, ckpt_dir)
+            optimizer_B.load_state_dict(state_dict)
+
+    @pytest.mark.parametrize('tp', [1, 2, 4])
+    @pytest.mark.parametrize('pp', [1, 2, 4])
+    @pytest.mark.parametrize('ep', [1, 2, 4])
+    def test_layer_wise_optimizer_with_moe(self, tmp_path_dist_ckpt, tp, pp, ep):
+        """Test LayerWiseDistributedOptimizer with MoE models."""
+        if tp * pp * ep > 8:
+            pytest.skip(f"TP*PP > 8 is larger than world size")
+
+        Utils.initialize_model_parallel(
+            tensor_model_parallel_size=tp,
+            pipeline_model_parallel_size=pp,
+            expert_model_parallel_size=ep,
+        )
+
+        with TempNamedDir(tmp_path_dist_ckpt / 'test_layer_wise_moe', sync=True) as ckpt_dir:
+            # Create MoE model with optimizer
+            model, optimizer = setup_moe_model_and_optimizer(
+                seed=2, tp=tp, pp=pp, ep=ep, bf16=True, dist_opt=False, optimizer='dist_muon'
+            )
+
+            # Test that optimizer handles expert parallel parameters
+            if isinstance(optimizer, LayerWiseDistributedOptimizer):
+                # Check that expt_dp_params_list exists if EP > 1
+                if ep > 1:
+                    assert hasattr(optimizer, 'expt_dp_params_list')
+
+            # Test save/load
+            model_sharded_sd = model[0].sharded_state_dict()
+            optim_sd = optimizer.sharded_state_dict(model_sharded_sd)
+            save(optim_sd, ckpt_dir)
+
+            # Create new optimizer and load
+            model_new, optimizer_new = setup_moe_model_and_optimizer(
+                seed=3, tp=tp, pp=pp, ep=ep, bf16=True, dist_opt=False, optimizer='dist_muon'
+            )
+
+            model_sharded_sd = model_new[0].sharded_state_dict()
+            load_sharded_sd = optimizer_new.sharded_state_dict(model_sharded_sd, is_loading=True)
+            state_dict = load(load_sharded_sd, ckpt_dir)
+            optimizer_new.load_state_dict(state_dict)
+
+    def test_layer_wise_optimizer_replica_id(self):
+        """Test that LayerWiseDistributedOptimizer sets replica_id correctly."""
+        Utils.initialize_model_parallel(2, 2)
+
+        model, optimizer = setup_model_and_optimizer(
+            seed=2,
+            tp=2,
+            pp=2,
+            bf16=True,
+            dist_opt=False,
+            initialize_fn=initialize_gpt_model,
+            optimizer='dist_muon',
+        )
+
+        if isinstance(optimizer, LayerWiseDistributedOptimizer):
+            model_sharded_sd = model[0].sharded_state_dict()
+            optim_sd = optimizer.sharded_state_dict(model_sharded_sd)
+
+            # Extract ShardedTensors and check replica_id
+            from megatron.core.dist_checkpointing import ShardedTensor
+
+            for sh_base in nested_values(optim_sd):
+                if isinstance(sh_base, ShardedTensor):
+                    # Check that replica_id has been modified
+                    assert len(sh_base.replica_id) == 3
+                    # DP component should be 0 for layer-wise optimizer
+                    assert sh_base.replica_id[2] == 0
+
+    @pytest.mark.parametrize('dp_size', [1, 2, 4])
+    def test_layer_wise_optimizer_dp_sizes(self, dp_size):
+        """Test LayerWiseDistributedOptimizer with different DP sizes."""
+        # Use TP to vary DP size while keeping world size constant
+        world_size = 8
+        if world_size % dp_size != 0:
+            pytest.skip(f"World size {world_size} not divisible by DP size {dp_size}")
+
+        pp = 1
+        tp = world_size // dp_size
+
+        if tp == 0:
+            pytest.skip(f"Invalid TP configuration")
+
+        Utils.initialize_model_parallel(tp, pp)
+
+        model, optimizer = setup_model_and_optimizer(
+            seed=2,
+            tp=tp,
+            pp=1,
+            bf16=True,
+            dist_opt=False,
+            initialize_fn=initialize_gpt_model,
+            optimizer='dist_muon',
+        )
+
+        if isinstance(optimizer, LayerWiseDistributedOptimizer):
+            # Check parameter sharding based on DP size
+            pg_collection = ProcessGroupCollection.use_mpu_process_groups()
+            pg_collection.dp_cp = parallel_state.get_data_parallel_group(with_context_parallel=True)
+
+            actual_dp_size = get_pg_size(pg_collection.dp_cp)
+
+            if actual_dp_size > 1:
+                assert optimizer.dp_cp_params_list is not None
+                assert len(optimizer.dp_cp_params_list) == actual_dp_size
+            else:
+                assert optimizer.dp_cp_params_list is None
+
+    def test_layer_wise_optimizer_step(self):
+        """Test that step function works and returns expected values."""
+        Utils.initialize_model_parallel(2, 2)
+
+        model, optimizer = setup_model_and_optimizer(
+            seed=2,
+            tp=2,
+            pp=2,
+            bf16=True,
+            dist_opt=False,
+            initialize_fn=initialize_gpt_model,
+            optimizer='dist_muon',
+        )
+
+        # Create dummy gradients
+        for param in model[0].parameters():
+            if param.requires_grad:
+                param.grad = torch.randn_like(param.data)
+
+        if isinstance(optimizer, LayerWiseDistributedOptimizer):
+            # Perform step
+            update_successful, grad_norm, num_zeros = optimizer.step()
+
+            # Check return values
+            assert isinstance(update_successful, bool)
+            assert grad_norm is None or grad_norm >= 0
+            assert num_zeros is None or num_zeros >= 0
+
+    # TODO(@boxiangw): Add test for loading with different TP/PP sizes
+    @pytest.mark.parametrize("fully_parallel", [True, False])
+    @pytest.mark.parametrize('optimizer_type', ['dist_muon', 'muon'])
+    @pytest.mark.parametrize('tp', [1, 2, 4])
+    @pytest.mark.parametrize('pp', [1, 2])
+    @pytest.mark.parametrize('ep', [1, 2, 4])
+    @pytest.mark.parametrize('is_moe', [True, False])
+    @pytest.mark.parametrize('is_mla', [True, False])
+    def test_optimizer_common_state_dict(
+        self, tmp_path_dist_ckpt, fully_parallel, tp, pp, ep, is_moe, is_mla, optimizer_type
+    ):
+        if tp * pp * ep > 8:
+            pytest.skip(f"TP*PP*EP > 8 is larger than world size")
+
+        if ep > 1 and not is_moe:
+            pytest.skip(f"EP > 1 needs to be used with MoE")
+
+        initialize_fn = partial(initialize_real_model, is_moe=is_moe, is_mla=is_mla)
+
+        # Initialize parallel
+        Utils.initialize_model_parallel(
+            tensor_model_parallel_size=tp,
+            pipeline_model_parallel_size=pp,
+            expert_model_parallel_size=ep,
+        )
+        rank = torch.distributed.get_rank()
+
+        with TempNamedDir(tmp_path_dist_ckpt / 'test_dp_sharding', sync=True) as ckpt_dir:
+            mock_args = parse_args(ignore_unknown_args=True)
+            mock_args.use_distributed_optimizer = False
+            with mock.patch('megatron.training.checkpointing.get_args', new=lambda: mock_args):
+                # Initialize model and optimizer A
+                if is_moe:
+                    model, optimizer_A = setup_moe_model_and_optimizer(
+                        seed=2,
+                        tp=tp,
+                        pp=pp,
+                        ep=ep,
+                        initialize_fn=initialize_fn,
+                        dist_opt=False,
+                        optimizer=optimizer_type,
+                    )
+                else:
+                    model, optimizer_A = setup_model_and_optimizer(
+                        seed=2,
+                        tp=tp,
+                        pp=pp,
+                        initialize_fn=initialize_fn,
+                        dist_opt=False,
+                        optimizer=optimizer_type,
+                    )
+
+                # Save checkpoint
+                init_checkpointing_mock_args(mock_args, ckpt_dir, fully_parallel=fully_parallel)
+                from megatron.training.training import preprocess_common_state_dict
+
+                save_checkpoint(
+                    10,
+                    model,
+                    optimizer_A,
+                    None,
+                    0,
+                    preprocess_common_state_dict_fn=preprocess_common_state_dict,
+                )
+
+                # Get optimizer A param state
+                optim_param_state_A = optimizer_A.state_dict()
+
+                # Initialize model and optimizer B
+                if is_moe:
+                    model, optimizer_B = setup_moe_model_and_optimizer(
+                        seed=3,
+                        tp=tp,
+                        pp=pp,
+                        ep=ep,
+                        initialize_fn=initialize_fn,
+                        dist_opt=False,
+                        optimizer=optimizer_type,
+                    )
+                else:
+                    model, optimizer_B = setup_model_and_optimizer(
+                        seed=3,
+                        tp=tp,
+                        pp=pp,
+                        initialize_fn=initialize_fn,
+                        dist_opt=False,
+                        optimizer=optimizer_type,
+                    )
+
+                # Load optimizer B from checkpoint
+                load_checkpoint_no_arg_checks(model, optimizer_B, None)
+
+                # Get optimizer B param state
+                optim_param_state_B = optimizer_B.state_dict()
+
+                # Test both param state dicts are equal
+                check_equal(optim_param_state_A, optim_param_state_B)
+
+        Utils.destroy_model_parallel()
diff --git a/tests/unit_tests/dist_checkpointing/utils.py b/tests/unit_tests/dist_checkpointing/utils.py
index e722ebe79ca..60157760a5a 100644
--- a/tests/unit_tests/dist_checkpointing/utils.py
+++ b/tests/unit_tests/dist_checkpointing/utils.py
@@ -1,3 +1,5 @@
+# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+
 from functools import partial
 from typing import Any, Callable, Tuple, Union
 from unittest import mock
@@ -10,6 +12,7 @@
     get_gpt_layer_with_transformer_engine_spec,
 )
 from megatron.core.optimizer import OptimizerConfig, get_megatron_optimizer
+from megatron.core.optimizer.muon import get_megatron_muon_optimizer
 from megatron.core.tensor_parallel import model_parallel_cuda_manual_seed
 from megatron.core.transformer import TransformerConfig
 from megatron.training.arguments import parse_args
@@ -158,14 +161,13 @@ def init_checkpointing_mock_args(args, ckpt_dir, fully_parallel=False):
 
 
 def setup_model_and_optimizer(
-    seed,
-    tp,
-    pp,
-    initialize_fn=initialize_gpt_model,
-    bf16=True,
-    dist_opt=True,
-    data_parallel_sharding_strategy="optim_grads_params",
+    seed, tp, pp, initialize_fn=initialize_gpt_model, bf16=True, dist_opt=True, optimizer='adam'
 ):
+    if 'muon' in optimizer and dist_opt:
+        raise ValueError(
+            "Layer-wise distributed optimizer with Muon is not supported with distributed optimizer."
+        )
+
     mock_args = parse_args(ignore_unknown_args=True)
     with mock.patch('megatron.training.training.get_args', new=lambda: mock_args):
         init_basic_mock_args(mock_args, tp, pp, bf16=bf16)
@@ -184,17 +186,39 @@ def setup_model_and_optimizer(
         bf16=bf16,
         params_dtype=torch.bfloat16 if bf16 else torch.float,
         use_distributed_optimizer=dist_opt,
+        optimizer=optimizer,
     )
-    optimizer = get_megatron_optimizer(config, model)
+
+    if 'muon' in optimizer:
+        # Use layer-wise distributed optimizer with Muon
+        optimizer_type = optimizer
+        optimizer = get_megatron_muon_optimizer(config, model)
+    else:
+        optimizer_type = optimizer
+        optimizer = get_megatron_optimizer(config, model)
 
     torch.manual_seed(seed + 1)
     model_parallel_cuda_manual_seed(seed + 1)
 
-    for group in optimizer.optimizer.param_groups:
-        for p in group['params']:
-            if len(optimizer.optimizer.state[p]) == 0:
-                optimizer.optimizer.state[p]['exp_avg'] = torch.rand_like(p.data)
-                optimizer.optimizer.state[p]['exp_avg_sq'] = torch.rand_like(p.data)
+    if not 'muon' in optimizer_type:
+        for group in optimizer.optimizer.param_groups:
+            for p in group['params']:
+                if len(optimizer.optimizer.state[p]) == 0:
+                    optimizer.optimizer.state[p]['exp_avg'] = torch.rand_like(p.data)
+                    optimizer.optimizer.state[p]['exp_avg_sq'] = torch.rand_like(p.data)
+    else:
+        for group in optimizer.chained_optimizers[0].param_groups:
+            for p in group['params']:
+                if len(optimizer.chained_optimizers[0].state[p]) == 0:
+                    optimizer.chained_optimizers[0].state[p]['momentum_buffer'] = torch.rand_like(
+                        p.data
+                    )
+
+        for group in optimizer.chained_optimizers[1].param_groups:
+            for p in group['params']:
+                if len(optimizer.chained_optimizers[1].state[p]) == 0:
+                    optimizer.chained_optimizers[1].state[p]['exp_avg'] = torch.rand_like(p.data)
+                    optimizer.chained_optimizers[1].state[p]['exp_avg_sq'] = torch.rand_like(p.data)
 
     optimizer.reload_model_params()
 
@@ -237,7 +261,12 @@ def setup_moe_model_and_optimizer(
     use_te=False,
     use_grouped_mlp=False,
     use_glu=False,
+    optimizer='adam',
 ):
+    if 'muon' in optimizer and dist_opt:
+        raise ValueError(
+            "Layer-wise distributed optimizer with Muon is not supported with distributed optimizer."
+        )
     mock_args = parse_args(ignore_unknown_args=True)
     with mock.patch('megatron.training.training.get_args', new=lambda: mock_args):
         init_basic_mock_args(mock_args, tp, pp, bf16=bf16)
@@ -261,18 +290,39 @@ def setup_moe_model_and_optimizer(
         bf16=bf16,
         params_dtype=torch.bfloat16 if bf16 else torch.float,
         use_distributed_optimizer=dist_opt,
+        optimizer=optimizer,
     )
-    optimizer = get_megatron_optimizer(config, model)
+
+    if 'muon' in optimizer:
+        optimizer_type = optimizer
+        optimizer = get_megatron_muon_optimizer(config, model)
+    else:
+        optimizer_type = optimizer
+        optimizer = get_megatron_optimizer(config, model)
 
     torch.manual_seed(seed + 1)
     model_parallel_cuda_manual_seed(seed + 1)
 
-    for opt in optimizer.chained_optimizers:
-        for group in opt.param_groups:
+    if not 'muon' in optimizer_type:
+        for opt in optimizer.chained_optimizers:
+            for group in opt.param_groups:
+                for p in group['params']:
+                    if len(opt.state[p]) == 0:
+                        opt.state[p]['exp_avg'] = torch.rand_like(p.data)
+                        opt.state[p]['exp_avg_sq'] = torch.rand_like(p.data)
+    else:
+        for group in optimizer.chained_optimizers[0].param_groups:
+            for p in group['params']:
+                if len(optimizer.chained_optimizers[0].state[p]) == 0:
+                    optimizer.chained_optimizers[0].state[p]['momentum_buffer'] = torch.rand_like(
+                        p.data
+                    )
+
+        for group in optimizer.chained_optimizers[1].param_groups:
             for p in group['params']:
-                if len(opt.state[p]) == 0:
-                    opt.state[p]['exp_avg'] = torch.rand_like(p.data)
-                    opt.state[p]['exp_avg_sq'] = torch.rand_like(p.data)
+                if len(optimizer.chained_optimizers[1].state[p]) == 0:
+                    optimizer.chained_optimizers[1].state[p]['exp_avg'] = torch.rand_like(p.data)
+                    optimizer.chained_optimizers[1].state[p]['exp_avg_sq'] = torch.rand_like(p.data)
 
     optimizer.reload_model_params()
 

From 693587db5aa68773d4dc7887fcbf1e43f4ef65d1 Mon Sep 17 00:00:00 2001
From: Boxiang Wang <boxiangw@nvidia.com>
Date: Fri, 14 Nov 2025 00:28:25 -0800
Subject: [PATCH 126/334] [Dev] Add muon golden value (#2247)

Signed-off-by: Boxiang Wang <boxiangw@nvidia.com>
---
 .../golden_values_dev_dgx_h100.json           | 537 ++++++++++++++++++
 .../golden_values_dev_dgx_h100.json           | 537 ++++++++++++++++++
 2 files changed, 1074 insertions(+)
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_muon/golden_values_dev_dgx_h100.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_muon/golden_values_dev_dgx_h100.json

diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_muon/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_muon/golden_values_dev_dgx_h100.json
new file mode 100644
index 00000000000..13bfff6c765
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_muon/golden_values_dev_dgx_h100.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.81131,
+            "2": 10.83052,
+            "3": 10.82065,
+            "4": 10.81318,
+            "5": 10.84363,
+            "6": 10.84747,
+            "7": 10.85338,
+            "8": 10.83667,
+            "9": 10.8468,
+            "10": 10.7825,
+            "11": 10.85216,
+            "12": 10.86296,
+            "13": 10.85469,
+            "14": 10.88433,
+            "15": 10.87748,
+            "16": 10.84698,
+            "17": 10.83109,
+            "18": 10.86619,
+            "19": 10.84965,
+            "20": 10.84503,
+            "21": 10.84788,
+            "22": 10.79628,
+            "23": 10.88209,
+            "24": 10.83272,
+            "25": 10.82407,
+            "26": 10.84275,
+            "27": 10.85284,
+            "28": 10.87701,
+            "29": 10.8644,
+            "30": 10.81288,
+            "31": 10.78708,
+            "32": 10.85504,
+            "33": 10.85616,
+            "34": 10.84955,
+            "35": 10.83713,
+            "36": 10.80378,
+            "37": 10.83848,
+            "38": 10.80562,
+            "39": 10.8422,
+            "40": 10.80302,
+            "41": 10.84057,
+            "42": 10.84402,
+            "43": 10.81002,
+            "44": 10.80246,
+            "45": 10.78649,
+            "46": 10.80799,
+            "47": 10.817,
+            "48": 10.80324,
+            "49": 10.78157,
+            "50": 10.80218,
+            "51": 10.82262,
+            "52": 10.80415,
+            "53": 10.83258,
+            "54": 10.81542,
+            "55": 10.82524,
+            "56": 10.77667,
+            "57": 10.75278,
+            "58": 10.8075,
+            "59": 10.79063,
+            "60": 10.73975,
+            "61": 10.79974,
+            "62": 10.81288,
+            "63": 10.72014,
+            "64": 10.78563,
+            "65": 10.68987,
+            "66": 10.76119,
+            "67": 10.73431,
+            "68": 10.80192,
+            "69": 10.78336,
+            "70": 10.77619,
+            "71": 10.76644,
+            "72": 10.73613,
+            "73": 10.72971,
+            "74": 10.62238,
+            "75": 10.69054,
+            "76": 10.65471,
+            "77": 10.82153,
+            "78": 10.76381,
+            "79": 10.705,
+            "80": 10.69388,
+            "81": 10.72432,
+            "82": 10.74257,
+            "83": 10.66783,
+            "84": 10.69845,
+            "85": 10.71465,
+            "86": 10.63873,
+            "87": 10.71762,
+            "88": 10.73506,
+            "89": 10.71394,
+            "90": 10.74649,
+            "91": 10.64881,
+            "92": 10.64684,
+            "93": 10.60201,
+            "94": 10.53283,
+            "95": 10.66127,
+            "96": 10.67245,
+            "97": 10.61405,
+            "98": 10.68482,
+            "99": 10.52006,
+            "100": 10.61575
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1216.0,
+            "2": 1361.0,
+            "3": 1290.0,
+            "4": 1255.0,
+            "5": 1433.0,
+            "6": 1548.0,
+            "7": 1277.0,
+            "8": 1340.0,
+            "9": 1318.0,
+            "10": 1284.0,
+            "11": 1307.0,
+            "12": 1174.0,
+            "13": 1268.0,
+            "14": 1421.0,
+            "15": 1220.0,
+            "16": 1229.0,
+            "17": 1346.0,
+            "18": 1311.0,
+            "19": 1252.0,
+            "20": 1273.0,
+            "21": 1283.0,
+            "22": 1145.0,
+            "23": 1454.0,
+            "24": 1348.0,
+            "25": 1258.0,
+            "26": 1212.0,
+            "27": 1343.0,
+            "28": 1389.0,
+            "29": 1282.0,
+            "30": 1203.0,
+            "31": 1152.0,
+            "32": 1244.0,
+            "33": 1290.0,
+            "34": 1082.0,
+            "35": 1176.0,
+            "36": 1168.0,
+            "37": 1242.0,
+            "38": 1316.0,
+            "39": 1589.0,
+            "40": 1218.0,
+            "41": 1391.0,
+            "42": 1137.0,
+            "43": 1234.0,
+            "44": 1265.0,
+            "45": 1194.0,
+            "46": 1124.0,
+            "47": 1300.0,
+            "48": 1102.0,
+            "49": 1124.0,
+            "50": 1211.0,
+            "51": 1266.0,
+            "52": 1269.0,
+            "53": 1355.0,
+            "54": 1212.0,
+            "55": 1137.0,
+            "56": 1313.0,
+            "57": 1288.0,
+            "58": 1341.0,
+            "59": 1261.0,
+            "60": 1287.0,
+            "61": 1139.0,
+            "62": 1205.0,
+            "63": 1265.0,
+            "64": 1350.0,
+            "65": 1195.0,
+            "66": 1207.0,
+            "67": 1121.0,
+            "68": 1212.0,
+            "69": 1335.0,
+            "70": 1356.0,
+            "71": 1316.0,
+            "72": 1232.0,
+            "73": 1121.0,
+            "74": 1130.0,
+            "75": 1295.0,
+            "76": 1335.0,
+            "77": 1371.0,
+            "78": 1336.0,
+            "79": 1042.0,
+            "80": 1149.0,
+            "81": 1117.0,
+            "82": 1202.0,
+            "83": 1289.0,
+            "84": 1140.0,
+            "85": 1323.0,
+            "86": 1219.0,
+            "87": 1219.0,
+            "88": 1221.0,
+            "89": 1294.0,
+            "90": 1402.0,
+            "91": 1197.0,
+            "92": 1269.0,
+            "93": 1106.0,
+            "94": 960.0,
+            "95": 1192.0,
+            "96": 1253.0,
+            "97": 1148.0,
+            "98": 1218.0,
+            "99": 1273.0,
+            "100": 1249.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 994082816.0,
+            "2": 994053120.0,
+            "3": 994100224.0,
+            "4": 994081280.0,
+            "5": 994103808.0,
+            "6": 994043392.0,
+            "7": 994066944.0,
+            "8": 994074112.0,
+            "9": 994091008.0,
+            "10": 994104320.0,
+            "11": 994077696.0,
+            "12": 994044416.0,
+            "13": 994100736.0,
+            "14": 994012160.0,
+            "15": 994057216.0,
+            "16": 993989120.0,
+            "17": 994107904.0,
+            "18": 994082304.0,
+            "19": 994089472.0,
+            "20": 994008064.0,
+            "21": 994033152.0,
+            "22": 994105344.0,
+            "23": 994081280.0,
+            "24": 994021888.0,
+            "25": 994152960.0,
+            "26": 994058752.0,
+            "27": 994118144.0,
+            "28": 994044416.0,
+            "29": 994075648.0,
+            "30": 994039296.0,
+            "31": 994107392.0,
+            "32": 994037760.0,
+            "33": 994046976.0,
+            "34": 994015232.0,
+            "35": 994064384.0,
+            "36": 994078208.0,
+            "37": 994037248.0,
+            "38": 994120192.0,
+            "39": 994128896.0,
+            "40": 994016768.0,
+            "41": 994044928.0,
+            "42": 994063872.0,
+            "43": 994075648.0,
+            "44": 994180096.0,
+            "45": 994053632.0,
+            "46": 994070016.0,
+            "47": 994091520.0,
+            "48": 994076672.0,
+            "49": 994042368.0,
+            "50": 994061312.0,
+            "51": 994132992.0,
+            "52": 994076160.0,
+            "53": 994139136.0,
+            "54": 994086400.0,
+            "55": 994076160.0,
+            "56": 994066944.0,
+            "57": 994113536.0,
+            "58": 994111488.0,
+            "59": 994096128.0,
+            "60": 994060288.0,
+            "61": 994060800.0,
+            "62": 994054656.0,
+            "63": 994068992.0,
+            "64": 994058752.0,
+            "65": 994064896.0,
+            "66": 994074624.0,
+            "67": 994061824.0,
+            "68": 994071552.0,
+            "69": 994058240.0,
+            "70": 994103808.0,
+            "71": 994077184.0,
+            "72": 994002944.0,
+            "73": 994104320.0,
+            "74": 994116608.0,
+            "75": 994081792.0,
+            "76": 994104320.0,
+            "77": 994054656.0,
+            "78": 994114048.0,
+            "79": 994085376.0,
+            "80": 994039296.0,
+            "81": 994073600.0,
+            "82": 994020864.0,
+            "83": 994123776.0,
+            "84": 994103296.0,
+            "85": 994070528.0,
+            "86": 994070016.0,
+            "87": 994093056.0,
+            "88": 994079232.0,
+            "89": 994066432.0,
+            "90": 994060800.0,
+            "91": 994116096.0,
+            "92": 994098176.0,
+            "93": 994076672.0,
+            "94": 994083840.0,
+            "95": 994082816.0,
+            "96": 994086400.0,
+            "97": 994094080.0,
+            "98": 994070016.0,
+            "99": 994088448.0,
+            "100": 994124800.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 3209166336.0,
+            "2": 3482067456.0,
+            "3": 3514878464.0,
+            "4": 3514878464.0,
+            "5": 3515977728.0,
+            "6": 3515977728.0,
+            "7": 3515977728.0,
+            "8": 3515977728.0,
+            "9": 3515977728.0,
+            "10": 3519236608.0,
+            "11": 3519236608.0,
+            "12": 3519236608.0,
+            "13": 3519236608.0,
+            "14": 3519236608.0,
+            "15": 3519236608.0,
+            "16": 3519236608.0,
+            "17": 3519236608.0,
+            "18": 3519236608.0,
+            "19": 3519236608.0,
+            "20": 3519236608.0,
+            "21": 3519236608.0,
+            "22": 3519236608.0,
+            "23": 3519236608.0,
+            "24": 3519236608.0,
+            "25": 3549031424.0,
+            "26": 3549031424.0,
+            "27": 3549031424.0,
+            "28": 3549031424.0,
+            "29": 3549031424.0,
+            "30": 3549031424.0,
+            "31": 3549031424.0,
+            "32": 3549031424.0,
+            "33": 3549031424.0,
+            "34": 3549031424.0,
+            "35": 3549031424.0,
+            "36": 3549031424.0,
+            "37": 3549031424.0,
+            "38": 3549031424.0,
+            "39": 3549031424.0,
+            "40": 3549031424.0,
+            "41": 3549031424.0,
+            "42": 3549031424.0,
+            "43": 3549031424.0,
+            "44": 3560927744.0,
+            "45": 3560927744.0,
+            "46": 3560927744.0,
+            "47": 3560927744.0,
+            "48": 3560927744.0,
+            "49": 3560927744.0,
+            "50": 3560927744.0,
+            "51": 3560927744.0,
+            "52": 3560927744.0,
+            "53": 3560927744.0,
+            "54": 3560927744.0,
+            "55": 3560927744.0,
+            "56": 3560927744.0,
+            "57": 3560927744.0,
+            "58": 3560927744.0,
+            "59": 3560927744.0,
+            "60": 3560927744.0,
+            "61": 3560927744.0,
+            "62": 3560927744.0,
+            "63": 3560927744.0,
+            "64": 3560927744.0,
+            "65": 3560927744.0,
+            "66": 3560927744.0,
+            "67": 3560927744.0,
+            "68": 3560927744.0,
+            "69": 3560927744.0,
+            "70": 3560927744.0,
+            "71": 3560927744.0,
+            "72": 3560927744.0,
+            "73": 3560927744.0,
+            "74": 3560927744.0,
+            "75": 3560927744.0,
+            "76": 3560927744.0,
+            "77": 3560927744.0,
+            "78": 3560927744.0,
+            "79": 3560927744.0,
+            "80": 3560927744.0,
+            "81": 3560927744.0,
+            "82": 3560927744.0,
+            "83": 3560927744.0,
+            "84": 3560927744.0,
+            "85": 3560927744.0,
+            "86": 3560927744.0,
+            "87": 3560927744.0,
+            "88": 3560927744.0,
+            "89": 3560927744.0,
+            "90": 3560927744.0,
+            "91": 3560927744.0,
+            "92": 3560927744.0,
+            "93": 3560927744.0,
+            "94": 3560927744.0,
+            "95": 3560927744.0,
+            "96": 3560927744.0,
+            "97": 3560927744.0,
+            "98": 3560927744.0,
+            "99": 3560927744.0,
+            "100": 3560927744.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 12.93942,
+            "2": 0.24599,
+            "3": 0.18905,
+            "4": 0.15958,
+            "5": 0.17376,
+            "6": 0.15827,
+            "7": 0.1625,
+            "8": 0.15602,
+            "9": 0.14535,
+            "10": 0.15058,
+            "11": 0.15764,
+            "12": 0.14977,
+            "13": 0.14045,
+            "14": 0.14809,
+            "15": 0.14641,
+            "16": 0.14226,
+            "17": 0.14811,
+            "18": 0.14049,
+            "19": 0.14226,
+            "20": 0.14343,
+            "21": 0.13924,
+            "22": 0.13727,
+            "23": 0.14079,
+            "24": 0.13602,
+            "25": 0.1322,
+            "26": 0.14315,
+            "27": 0.1347,
+            "28": 0.13221,
+            "29": 0.14595,
+            "30": 0.13083,
+            "31": 0.13326,
+            "32": 0.14065,
+            "33": 0.1383,
+            "34": 0.12953,
+            "35": 0.12541,
+            "36": 0.13129,
+            "37": 0.13317,
+            "38": 0.13535,
+            "39": 0.14664,
+            "40": 0.13368,
+            "41": 0.13115,
+            "42": 0.13308,
+            "43": 0.14022,
+            "44": 0.12946,
+            "45": 0.134,
+            "46": 0.12714,
+            "47": 0.13354,
+            "48": 0.13449,
+            "49": 0.13041,
+            "50": 0.13278,
+            "51": 0.14094,
+            "52": 0.12708,
+            "53": 0.13344,
+            "54": 0.13202,
+            "55": 0.13136,
+            "56": 0.13508,
+            "57": 0.13876,
+            "58": 0.13736,
+            "59": 0.12763,
+            "60": 0.13185,
+            "61": 0.12865,
+            "62": 0.13343,
+            "63": 0.13403,
+            "64": 0.12891,
+            "65": 0.13097,
+            "66": 0.12741,
+            "67": 0.13812,
+            "68": 0.13131,
+            "69": 0.13389,
+            "70": 0.13833,
+            "71": 0.12822,
+            "72": 0.12851,
+            "73": 0.13747,
+            "74": 0.13403,
+            "75": 0.12846,
+            "76": 0.13178,
+            "77": 0.12922,
+            "78": 0.12906,
+            "79": 0.12676,
+            "80": 0.13361,
+            "81": 0.12867,
+            "82": 0.1295,
+            "83": 0.12961,
+            "84": 0.12795,
+            "85": 0.13547,
+            "86": 0.13067,
+            "87": 0.13455,
+            "88": 0.13573,
+            "89": 0.12632,
+            "90": 0.13428,
+            "91": 0.13373,
+            "92": 0.12985,
+            "93": 0.1291,
+            "94": 0.12972,
+            "95": 0.13089,
+            "96": 0.13658,
+            "97": 0.12767,
+            "98": 0.14125,
+            "99": 0.13279,
+            "100": 0.12715
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_muon/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_muon/golden_values_dev_dgx_h100.json
new file mode 100644
index 00000000000..d68eac84aa6
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_muon/golden_values_dev_dgx_h100.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.81131,
+            "2": 10.83052,
+            "3": 10.82057,
+            "4": 10.81299,
+            "5": 10.84355,
+            "6": 10.84741,
+            "7": 10.85351,
+            "8": 10.83654,
+            "9": 10.84645,
+            "10": 10.78233,
+            "11": 10.85218,
+            "12": 10.86312,
+            "13": 10.85469,
+            "14": 10.88472,
+            "15": 10.87748,
+            "16": 10.84625,
+            "17": 10.83069,
+            "18": 10.86639,
+            "19": 10.84906,
+            "20": 10.84503,
+            "21": 10.84773,
+            "22": 10.79696,
+            "23": 10.88222,
+            "24": 10.8328,
+            "25": 10.82458,
+            "26": 10.84297,
+            "27": 10.85322,
+            "28": 10.877,
+            "29": 10.86421,
+            "30": 10.81318,
+            "31": 10.78757,
+            "32": 10.85489,
+            "33": 10.85622,
+            "34": 10.84912,
+            "35": 10.83743,
+            "36": 10.80413,
+            "37": 10.83824,
+            "38": 10.80517,
+            "39": 10.84145,
+            "40": 10.8035,
+            "41": 10.84038,
+            "42": 10.84445,
+            "43": 10.80969,
+            "44": 10.80245,
+            "45": 10.78719,
+            "46": 10.80835,
+            "47": 10.81707,
+            "48": 10.80285,
+            "49": 10.78195,
+            "50": 10.80261,
+            "51": 10.82279,
+            "52": 10.80311,
+            "53": 10.83245,
+            "54": 10.81518,
+            "55": 10.8259,
+            "56": 10.77718,
+            "57": 10.75221,
+            "58": 10.80764,
+            "59": 10.79119,
+            "60": 10.73952,
+            "61": 10.8001,
+            "62": 10.81329,
+            "63": 10.72056,
+            "64": 10.7858,
+            "65": 10.68952,
+            "66": 10.76117,
+            "67": 10.73373,
+            "68": 10.80228,
+            "69": 10.78363,
+            "70": 10.77642,
+            "71": 10.76616,
+            "72": 10.73585,
+            "73": 10.72985,
+            "74": 10.62227,
+            "75": 10.69038,
+            "76": 10.65462,
+            "77": 10.82185,
+            "78": 10.76375,
+            "79": 10.70489,
+            "80": 10.69397,
+            "81": 10.72436,
+            "82": 10.74224,
+            "83": 10.66825,
+            "84": 10.69825,
+            "85": 10.71453,
+            "86": 10.6386,
+            "87": 10.7184,
+            "88": 10.73501,
+            "89": 10.7146,
+            "90": 10.74633,
+            "91": 10.64923,
+            "92": 10.64682,
+            "93": 10.60192,
+            "94": 10.53301,
+            "95": 10.66154,
+            "96": 10.6726,
+            "97": 10.61394,
+            "98": 10.68486,
+            "99": 10.52044,
+            "100": 10.61569
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1216.0,
+            "2": 1361.0,
+            "3": 1297.0,
+            "4": 1234.0,
+            "5": 1371.0,
+            "6": 1490.0,
+            "7": 1250.0,
+            "8": 1336.0,
+            "9": 1341.0,
+            "10": 1267.0,
+            "11": 1273.0,
+            "12": 1210.0,
+            "13": 1226.0,
+            "14": 1433.0,
+            "15": 1235.0,
+            "16": 1174.0,
+            "17": 1327.0,
+            "18": 1336.0,
+            "19": 1257.0,
+            "20": 1235.0,
+            "21": 1319.0,
+            "22": 1171.0,
+            "23": 1345.0,
+            "24": 1332.0,
+            "25": 1257.0,
+            "26": 1210.0,
+            "27": 1365.0,
+            "28": 1404.0,
+            "29": 1274.0,
+            "30": 1188.0,
+            "31": 1145.0,
+            "32": 1314.0,
+            "33": 1305.0,
+            "34": 1152.0,
+            "35": 1224.0,
+            "36": 1206.0,
+            "37": 1248.0,
+            "38": 1368.0,
+            "39": 1545.0,
+            "40": 1271.0,
+            "41": 1388.0,
+            "42": 1192.0,
+            "43": 1144.0,
+            "44": 1229.0,
+            "45": 1172.0,
+            "46": 1169.0,
+            "47": 1310.0,
+            "48": 1085.0,
+            "49": 1151.0,
+            "50": 1199.0,
+            "51": 1280.0,
+            "52": 1203.0,
+            "53": 1365.0,
+            "54": 1230.0,
+            "55": 1202.0,
+            "56": 1298.0,
+            "57": 1361.0,
+            "58": 1299.0,
+            "59": 1247.0,
+            "60": 1239.0,
+            "61": 1151.0,
+            "62": 1215.0,
+            "63": 1183.0,
+            "64": 1323.0,
+            "65": 1172.0,
+            "66": 1159.0,
+            "67": 1213.0,
+            "68": 1241.0,
+            "69": 1290.0,
+            "70": 1313.0,
+            "71": 1275.0,
+            "72": 1200.0,
+            "73": 1086.0,
+            "74": 1107.0,
+            "75": 1322.0,
+            "76": 1340.0,
+            "77": 1340.0,
+            "78": 1281.0,
+            "79": 1061.0,
+            "80": 1193.0,
+            "81": 1174.0,
+            "82": 1245.0,
+            "83": 1294.0,
+            "84": 1222.0,
+            "85": 1346.0,
+            "86": 1190.0,
+            "87": 1177.0,
+            "88": 1221.0,
+            "89": 1238.0,
+            "90": 1398.0,
+            "91": 1162.0,
+            "92": 1279.0,
+            "93": 1137.0,
+            "94": 956.0,
+            "95": 1273.0,
+            "96": 1243.0,
+            "97": 1154.0,
+            "98": 1219.0,
+            "99": 1212.0,
+            "100": 1306.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1096900608.0,
+            "2": 1096870912.0,
+            "3": 1096918016.0,
+            "4": 1096898048.0,
+            "5": 1096921088.0,
+            "6": 1096861696.0,
+            "7": 1096884736.0,
+            "8": 1096892928.0,
+            "9": 1096907776.0,
+            "10": 1096920576.0,
+            "11": 1096896512.0,
+            "12": 1096866816.0,
+            "13": 1096919040.0,
+            "14": 1096828416.0,
+            "15": 1096873984.0,
+            "16": 1096805888.0,
+            "17": 1096926208.0,
+            "18": 1096899584.0,
+            "19": 1096906240.0,
+            "20": 1096827904.0,
+            "21": 1096848384.0,
+            "22": 1096922624.0,
+            "23": 1096899072.0,
+            "24": 1096839680.0,
+            "25": 1096972800.0,
+            "26": 1096878080.0,
+            "27": 1096931840.0,
+            "28": 1096861696.0,
+            "29": 1096894464.0,
+            "30": 1096858624.0,
+            "31": 1096924160.0,
+            "32": 1096856576.0,
+            "33": 1096865792.0,
+            "34": 1096830976.0,
+            "35": 1096882688.0,
+            "36": 1096896000.0,
+            "37": 1096853504.0,
+            "38": 1096934912.0,
+            "39": 1096948736.0,
+            "40": 1096834048.0,
+            "41": 1096862208.0,
+            "42": 1096881152.0,
+            "43": 1096893440.0,
+            "44": 1096998400.0,
+            "45": 1096870400.0,
+            "46": 1096885248.0,
+            "47": 1096910336.0,
+            "48": 1096893952.0,
+            "49": 1096860672.0,
+            "50": 1096880640.0,
+            "51": 1096951808.0,
+            "52": 1096895488.0,
+            "53": 1096955392.0,
+            "54": 1096906752.0,
+            "55": 1096894976.0,
+            "56": 1096885760.0,
+            "57": 1096930816.0,
+            "58": 1096929280.0,
+            "59": 1096914432.0,
+            "60": 1096879104.0,
+            "61": 1096877056.0,
+            "62": 1096873472.0,
+            "63": 1096887808.0,
+            "64": 1096876544.0,
+            "65": 1096882176.0,
+            "66": 1096889344.0,
+            "67": 1096878592.0,
+            "68": 1096889856.0,
+            "69": 1096877568.0,
+            "70": 1096923648.0,
+            "71": 1096896512.0,
+            "72": 1096820736.0,
+            "73": 1096922624.0,
+            "74": 1096936960.0,
+            "75": 1096899584.0,
+            "76": 1096921600.0,
+            "77": 1096873984.0,
+            "78": 1096931328.0,
+            "79": 1096902144.0,
+            "80": 1096859136.0,
+            "81": 1096889856.0,
+            "82": 1096840704.0,
+            "83": 1096940032.0,
+            "84": 1096921088.0,
+            "85": 1096888320.0,
+            "86": 1096886784.0,
+            "87": 1096909312.0,
+            "88": 1096896000.0,
+            "89": 1096885760.0,
+            "90": 1096876032.0,
+            "91": 1096931328.0,
+            "92": 1096915968.0,
+            "93": 1096893440.0,
+            "94": 1096902656.0,
+            "95": 1096904704.0,
+            "96": 1096903168.0,
+            "97": 1096911360.0,
+            "98": 1096889856.0,
+            "99": 1096905216.0,
+            "100": 1096941568.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 3260276224.0,
+            "2": 3584739840.0,
+            "3": 3617909248.0,
+            "4": 3617909248.0,
+            "5": 3617909248.0,
+            "6": 3617909248.0,
+            "7": 3617909248.0,
+            "8": 3617909248.0,
+            "9": 3617909248.0,
+            "10": 3620101120.0,
+            "11": 3620101120.0,
+            "12": 3620101120.0,
+            "13": 3620101120.0,
+            "14": 3620101120.0,
+            "15": 3620101120.0,
+            "16": 3620101120.0,
+            "17": 3620950528.0,
+            "18": 3620950528.0,
+            "19": 3620950528.0,
+            "20": 3620950528.0,
+            "21": 3620950528.0,
+            "22": 3620950528.0,
+            "23": 3620950528.0,
+            "24": 3620950528.0,
+            "25": 3649420288.0,
+            "26": 3649420288.0,
+            "27": 3649420288.0,
+            "28": 3649420288.0,
+            "29": 3649420288.0,
+            "30": 3649420288.0,
+            "31": 3649420288.0,
+            "32": 3649420288.0,
+            "33": 3649420288.0,
+            "34": 3649420288.0,
+            "35": 3649420288.0,
+            "36": 3649420288.0,
+            "37": 3649420288.0,
+            "38": 3649420288.0,
+            "39": 3649420288.0,
+            "40": 3649420288.0,
+            "41": 3649420288.0,
+            "42": 3649420288.0,
+            "43": 3649420288.0,
+            "44": 3666221056.0,
+            "45": 3666221056.0,
+            "46": 3666221056.0,
+            "47": 3666221056.0,
+            "48": 3666221056.0,
+            "49": 3666221056.0,
+            "50": 3666221056.0,
+            "51": 3666221056.0,
+            "52": 3666221056.0,
+            "53": 3666221056.0,
+            "54": 3666221056.0,
+            "55": 3666221056.0,
+            "56": 3666221056.0,
+            "57": 3666221056.0,
+            "58": 3666221056.0,
+            "59": 3666221056.0,
+            "60": 3666221056.0,
+            "61": 3666221056.0,
+            "62": 3666221056.0,
+            "63": 3666221056.0,
+            "64": 3666221056.0,
+            "65": 3666221056.0,
+            "66": 3666221056.0,
+            "67": 3666221056.0,
+            "68": 3666221056.0,
+            "69": 3666221056.0,
+            "70": 3666221056.0,
+            "71": 3666221056.0,
+            "72": 3666221056.0,
+            "73": 3666221056.0,
+            "74": 3666221056.0,
+            "75": 3666221056.0,
+            "76": 3666221056.0,
+            "77": 3666221056.0,
+            "78": 3666221056.0,
+            "79": 3666221056.0,
+            "80": 3666221056.0,
+            "81": 3666221056.0,
+            "82": 3666221056.0,
+            "83": 3666221056.0,
+            "84": 3666221056.0,
+            "85": 3666221056.0,
+            "86": 3666221056.0,
+            "87": 3666221056.0,
+            "88": 3666221056.0,
+            "89": 3666221056.0,
+            "90": 3666221056.0,
+            "91": 3666221056.0,
+            "92": 3666221056.0,
+            "93": 3666221056.0,
+            "94": 3666221056.0,
+            "95": 3666221056.0,
+            "96": 3666221056.0,
+            "97": 3666221056.0,
+            "98": 3666221056.0,
+            "99": 3666221056.0,
+            "100": 3666221056.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 11.88847,
+            "2": 0.26215,
+            "3": 0.27604,
+            "4": 0.224,
+            "5": 0.22208,
+            "6": 0.20054,
+            "7": 0.21235,
+            "8": 0.20575,
+            "9": 0.1909,
+            "10": 0.19325,
+            "11": 0.19687,
+            "12": 0.19057,
+            "13": 0.20285,
+            "14": 0.19232,
+            "15": 0.20734,
+            "16": 0.18953,
+            "17": 0.18912,
+            "18": 0.1879,
+            "19": 0.19392,
+            "20": 0.18511,
+            "21": 0.18156,
+            "22": 0.17441,
+            "23": 0.19045,
+            "24": 0.18569,
+            "25": 0.17676,
+            "26": 0.18894,
+            "27": 0.17894,
+            "28": 0.18713,
+            "29": 0.19301,
+            "30": 0.17746,
+            "31": 0.18105,
+            "32": 0.18364,
+            "33": 0.18706,
+            "34": 0.18723,
+            "35": 0.18693,
+            "36": 0.18302,
+            "37": 0.18341,
+            "38": 0.18438,
+            "39": 0.19158,
+            "40": 0.1883,
+            "41": 0.18551,
+            "42": 0.18393,
+            "43": 0.18993,
+            "44": 0.17986,
+            "45": 0.18314,
+            "46": 0.1799,
+            "47": 0.18447,
+            "48": 0.18728,
+            "49": 0.17581,
+            "50": 0.18331,
+            "51": 0.1808,
+            "52": 0.17423,
+            "53": 0.18081,
+            "54": 0.17774,
+            "55": 0.17619,
+            "56": 0.18141,
+            "57": 0.17723,
+            "58": 0.17154,
+            "59": 0.17626,
+            "60": 0.17398,
+            "61": 0.17639,
+            "62": 0.17865,
+            "63": 0.17601,
+            "64": 0.17189,
+            "65": 0.17678,
+            "66": 0.17525,
+            "67": 0.17732,
+            "68": 0.17371,
+            "69": 0.17928,
+            "70": 0.18002,
+            "71": 0.17642,
+            "72": 0.17759,
+            "73": 0.17701,
+            "74": 0.1779,
+            "75": 0.17328,
+            "76": 0.17709,
+            "77": 0.18374,
+            "78": 0.17122,
+            "79": 0.17448,
+            "80": 0.17649,
+            "81": 0.17913,
+            "82": 0.17842,
+            "83": 0.17672,
+            "84": 0.17358,
+            "85": 0.18273,
+            "86": 0.17787,
+            "87": 0.1921,
+            "88": 0.18275,
+            "89": 0.1784,
+            "90": 0.18873,
+            "91": 0.17603,
+            "92": 0.17536,
+            "93": 0.17978,
+            "94": 0.17667,
+            "95": 0.17869,
+            "96": 0.17776,
+            "97": 0.17418,
+            "98": 0.18666,
+            "99": 0.17937,
+            "100": 0.17448
+        }
+    }
+}
\ No newline at end of file

From b55a54404147ce4236c115f5c4d2e488b27ba024 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Fri, 14 Nov 2025 08:34:47 +0000
Subject: [PATCH 127/334] bump deps
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 uv.lock | 482 ++++++++++++++++++++++++++++++--------------------------
 1 file changed, 260 insertions(+), 222 deletions(-)

diff --git a/uv.lock b/uv.lock
index 458e5c463e0..c3dc9b95bb4 100644
--- a/uv.lock
+++ b/uv.lock
@@ -48,7 +48,7 @@ wheels = [
 
 [[package]]
 name = "aiobotocore"
-version = "2.25.1"
+version = "2.25.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "aiohttp" },
@@ -59,9 +59,9 @@ dependencies = [
     { name = "python-dateutil" },
     { name = "wrapt" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/62/94/2e4ec48cf1abb89971cb2612d86f979a6240520f0a659b53a43116d344dc/aiobotocore-2.25.1.tar.gz", hash = "sha256:ea9be739bfd7ece8864f072ec99bb9ed5c7e78ebb2b0b15f29781fbe02daedbc", size = 120560, upload-time = "2025-10-28T22:33:21.787Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/52/48/cf3c88c5e3fecdeed824f97a8a98a9fc0d7ef33e603f8f22c2fd32b9ef09/aiobotocore-2.25.2.tar.gz", hash = "sha256:ae0a512b34127097910b7af60752956254099ae54402a84c2021830768f92cda", size = 120585, upload-time = "2025-11-11T18:51:28.056Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/95/2a/d275ec4ce5cd0096665043995a7d76f5d0524853c76a3d04656de49f8808/aiobotocore-2.25.1-py3-none-any.whl", hash = "sha256:eb6daebe3cbef5b39a0bb2a97cffbe9c7cb46b2fcc399ad141f369f3c2134b1f", size = 86039, upload-time = "2025-10-28T22:33:19.949Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/ad/a2f3964aa37da5a4c94c1e5f3934d6ac1333f991f675fcf08a618397a413/aiobotocore-2.25.2-py3-none-any.whl", hash = "sha256:0cec45c6ba7627dd5e5460337291c86ac38c3b512ec4054ce76407d0f7f2a48f", size = 86048, upload-time = "2025-11-11T18:51:26.139Z" },
 ]
 
 [[package]]
@@ -265,37 +265,37 @@ wheels = [
 
 [[package]]
 name = "apache-tvm-ffi"
-version = "0.1.1"
+version = "0.1.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/d8/e8/7db1ca6db40877d190a8538cc378f740aae247c6fe063815898607c2d2ca/apache_tvm_ffi-0.1.1.tar.gz", hash = "sha256:728ce3f4ae02b89a7147b718f7f670afac3c6d1f96df38d488757274643709fc", size = 1259223, upload-time = "2025-11-04T02:43:38.154Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/8a/ad/550aff4c9652ee8297f90a04c3ab4143ece1d373101010d85b5c9a9a2e7d/apache_tvm_ffi-0.1.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:af0de7bb9581ac9e090276cba37c4e7ffaeed601a2b2b546bf0e2daed3810cec", size = 1723658, upload-time = "2025-11-04T02:42:37.628Z" },
-    { url = "https://files.pythonhosted.org/packages/48/5a/01e65f4a6c2b146f7c40f6d8d663d76b60c3be324159f8fb8223ea505738/apache_tvm_ffi-0.1.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:eb7d6828652803cb8c0e13d1f06d01fc6bfb8e79e77e3de7e6fd4b5fae5ee9d2", size = 1882437, upload-time = "2025-11-04T02:42:39.647Z" },
-    { url = "https://files.pythonhosted.org/packages/6b/bd/b52b71d03637d7a82388c2e90d48dddec2c46121be1333c9851d6a135824/apache_tvm_ffi-0.1.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1fe072b55a7949720a792a9d455c0659aa097825e709a16a4667d720137b8b5c", size = 1954949, upload-time = "2025-11-04T02:42:41.119Z" },
-    { url = "https://files.pythonhosted.org/packages/ac/ef/ff85926928694785f2399a4c5b793bcfecf8c3cf806dedf9202b7db73b8b/apache_tvm_ffi-0.1.1-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b25178b265903dabd9a35bd767db26928be3b7869f681fe1d6e1aed93d7c0799", size = 1837395, upload-time = "2025-11-04T02:42:42.954Z" },
-    { url = "https://files.pythonhosted.org/packages/de/69/f048bda5e5445a89200737062a202cb39097d3b1902e886654de9cd6b624/apache_tvm_ffi-0.1.1-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d5552af3c625750361d1b7d646d499a28caf94858967e74c9cce6ed7d4629b28", size = 1947740, upload-time = "2025-11-04T02:42:44.49Z" },
-    { url = "https://files.pythonhosted.org/packages/dc/df/295f71613502edeb39a39b30c8bbb9ec8fcc06bd95b3043dd99b55fa98a8/apache_tvm_ffi-0.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:c102ba5899ce106c8068a3f21155c106790b5b0141fba52a52ed6e9aeb286aff", size = 1710966, upload-time = "2025-11-04T02:42:46.037Z" },
-    { url = "https://files.pythonhosted.org/packages/8f/a9/544767d7058f825c0ceb5bc25760ad3a821b2efcc6a3dbe2e3988a3aee86/apache_tvm_ffi-0.1.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7cbf31c472920cdc5b3f75f2d2720b8a6b37ddbdb11d573fa94524815ea5a144", size = 1725662, upload-time = "2025-11-04T02:42:47.528Z" },
-    { url = "https://files.pythonhosted.org/packages/54/c3/fe1a9f8968d5ce2d3b674e397c2bf01961e32a72b723817478c67c9780e3/apache_tvm_ffi-0.1.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d7602bc37019387a4705677b6e742059c7e1973a899b6918af235febcb3d3b47", size = 1884278, upload-time = "2025-11-04T02:42:48.998Z" },
-    { url = "https://files.pythonhosted.org/packages/24/b9/80cbba18b2d7d9013031d8c13671986912275b9ca6aaea70a1dd9b361c39/apache_tvm_ffi-0.1.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7941f82a2ae4549f55c07d82d37c5765628d70f29dace98628393fcea525e870", size = 1957018, upload-time = "2025-11-04T02:42:50.538Z" },
-    { url = "https://files.pythonhosted.org/packages/b4/0c/d27beb98d6841a3929468648433ed2c53e4da953fadb73c754b9372b2356/apache_tvm_ffi-0.1.1-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2e0d6d8e0888ee3a3defd2cbe1eff7a65c05900b4e8fa0e18c890048fc6a44a6", size = 1839279, upload-time = "2025-11-04T02:42:52.438Z" },
-    { url = "https://files.pythonhosted.org/packages/0f/10/d7cf7779c65047ad2ca652234a174c2908d936cb69bc4f5156e17382fa91/apache_tvm_ffi-0.1.1-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:549c2150e1c2d7ca7912cad173f62a192aec90cd981c024bd246161283ea5d78", size = 1950476, upload-time = "2025-11-04T02:42:54.159Z" },
-    { url = "https://files.pythonhosted.org/packages/53/71/bb5ee4bca52a37a8f9580ab1f1de1be5366808a194981c324a756dabbe15/apache_tvm_ffi-0.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:3fbcfe526b458bc8edeafdc769388782d3bb4321c46a987e50bcece93ae78af8", size = 1711278, upload-time = "2025-11-04T02:42:55.56Z" },
-    { url = "https://files.pythonhosted.org/packages/d1/1e/f8d16dbe2303d1e7348037b4207d6c1093c554573484c97c8f3cde61a060/apache_tvm_ffi-0.1.1-cp312-abi3-macosx_11_0_arm64.whl", hash = "sha256:f2c0164a5c6286f9c333ddedeb448b855cbc1225688d0a4c9aeab006ddfa1180", size = 1701072, upload-time = "2025-11-04T02:42:57.28Z" },
-    { url = "https://files.pythonhosted.org/packages/3d/47/f7a55e9b5b741f901ed9101a3ef46fd250f2c1519a6479e055432ff4f308/apache_tvm_ffi-0.1.1-cp312-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:33cc35738e0c44f2a67e550457b6b7dc7de9109ca64422a9e7063b1ba43c336e", size = 1854467, upload-time = "2025-11-04T02:43:00.158Z" },
-    { url = "https://files.pythonhosted.org/packages/f2/db/f3adbe1e2d092fbb18908971a25ceb5496669ec65d01a28b7dd57f471ae0/apache_tvm_ffi-0.1.1-cp312-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e9db6484259120b1bdc600f736084ee3d574775b1f4a3e8fef110323e3a9d2b6", size = 1930968, upload-time = "2025-11-04T02:43:01.96Z" },
-    { url = "https://files.pythonhosted.org/packages/3b/da/7f678675ccc8af1c7d313322f3875e2c829f1faaa58c0d982431beeb3b3e/apache_tvm_ffi-0.1.1-cp312-abi3-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c7bd812058ce9046cb69fd7b3e18538d1d0eefa1719822a1441b00bb841f7af4", size = 1811173, upload-time = "2025-11-04T02:43:03.404Z" },
-    { url = "https://files.pythonhosted.org/packages/e1/11/c8b3b7d69ceebd219dcb06f5e4a3997edea3bc2e0bbdd8f57ae65bba4f2f/apache_tvm_ffi-0.1.1-cp312-abi3-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:807def3039fb336a228c120ca8c32eb794bdfd2d7aff218c8611f287ad913736", size = 1922690, upload-time = "2025-11-04T02:43:04.846Z" },
-    { url = "https://files.pythonhosted.org/packages/fd/0b/f816735d761049e53eb388264238655f58fcb42a31e0d1848a4fb6a6556b/apache_tvm_ffi-0.1.1-cp312-abi3-win_amd64.whl", hash = "sha256:624b4430ca3949f85fffd9ef498ebaf1155ff0ac659fc764eec6c6fd66ec7986", size = 1690969, upload-time = "2025-11-04T02:43:06.581Z" },
-    { url = "https://files.pythonhosted.org/packages/12/aa/df81df8f8b39d3c41fbac41b1e6661d192d9987a3ef317fabcefecf727a6/apache_tvm_ffi-0.1.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c93d9de81c1ba9560fcc696cf84d777f88016eb53f05ee2d6288ddcb95a5e72f", size = 1732582, upload-time = "2025-11-04T02:43:08.042Z" },
-    { url = "https://files.pythonhosted.org/packages/a8/55/861090532e4accd855e119f0e67e0e482b42abb866c9505edd8956148ebc/apache_tvm_ffi-0.1.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f9e0227179a0ce83384132b34757fd05f492270f1c031eae615870a5641b5039", size = 1870196, upload-time = "2025-11-04T02:43:09.911Z" },
-    { url = "https://files.pythonhosted.org/packages/2a/c6/470493934559e371ad699e1764649176efc5e022267c6dd0a565217177ad/apache_tvm_ffi-0.1.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:78e75e193d675b9639e6fd0c33c60c3a4259d4c9f848f60baa6a3194df7e1fea", size = 1941999, upload-time = "2025-11-04T02:43:11.467Z" },
-    { url = "https://files.pythonhosted.org/packages/85/b8/84eba0d266c9b10beae59a6863ef5c68044e20a6f12d46a42116e80db774/apache_tvm_ffi-0.1.1-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:49792622720421525a18e378d848411731d32fcb05a00b6e54b84d05ff46cc22", size = 1823965, upload-time = "2025-11-04T02:43:12.941Z" },
-    { url = "https://files.pythonhosted.org/packages/64/73/ca73a43260a1374b1f34d0e6fcf6f8af16f66867a89dfd562b26184af1bd/apache_tvm_ffi-0.1.1-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:039293086d44e7f601bf8231e369198afe7ad38986330969ddb1a5fc7622976b", size = 1933779, upload-time = "2025-11-04T02:43:14.543Z" },
-    { url = "https://files.pythonhosted.org/packages/5b/91/687c3b9ff3313addeebc1188ac50b299a82944ef1784b91890fc6f250ebd/apache_tvm_ffi-0.1.1-cp314-cp314t-win_amd64.whl", hash = "sha256:3f6cbd214bee2e52719d5264f05a2685c955ae7b096980f0361d917a5a9f47a6", size = 1751905, upload-time = "2025-11-04T02:43:16.286Z" },
+sdist = { url = "https://files.pythonhosted.org/packages/78/85/37f89a12961152026e2bca28e7781c3a216835bd6f9ef8f7e9b6b596ae26/apache_tvm_ffi-0.1.2.tar.gz", hash = "sha256:91f6e4e38572f7ce78c6df810cc16bdd1283fd925010b0e503697934d58bb7e7", size = 1266004, upload-time = "2025-11-11T15:29:37.04Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ac/da/9a3784a6f713bfcb23ccdfd772fcddd52575beab06d52c95a2f01abf2f7b/apache_tvm_ffi-0.1.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5c5dd8edbb7c70c2531055aa11d305b4c648d3c603b40be94108ee54c57bfff5", size = 1727564, upload-time = "2025-11-11T15:28:35.837Z" },
+    { url = "https://files.pythonhosted.org/packages/7f/bf/7759592c0fdaa0560b7190295f1d7f760c35f777255535d3a9387abd41bd/apache_tvm_ffi-0.1.2-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:85654383246a92eb93e11413a71bc5f43aeb07b4fcd34e264e0a19e5bb6c74fa", size = 1882846, upload-time = "2025-11-11T15:28:38.097Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/90/7621f35b71537322519257ad989d55c24eddac64228ff08446418b8dd950/apache_tvm_ffi-0.1.2-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1727ccd716bc660a97b436c8b79581c0e5f9e49509c72d998a5c8546319e1d0f", size = 1955889, upload-time = "2025-11-11T15:28:40.047Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/c7/34489858c2ea8040ce1a186c07d0b205932d8fd6a453edcf33e592c2f7dd/apache_tvm_ffi-0.1.2-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4fa95de8aca32b195c18cb69e1e4fddb093eec601b1753877d0d92be31f3385d", size = 1836839, upload-time = "2025-11-11T15:28:41.616Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/d0/03f5022f4444b9dabd719f9aacb32a9fdf773e09ddc46c046c16b3786ed7/apache_tvm_ffi-0.1.2-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c81e317fe4941902b4ea81e7c8cdb379babc516f4531ebece802c4bef2b5ceaf", size = 1947466, upload-time = "2025-11-11T15:28:43.094Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/0e/1ab046ebb387833562e7a3f62fc007eb5b0709ceb4f9ac5a8cca854ce185/apache_tvm_ffi-0.1.2-cp310-cp310-win_amd64.whl", hash = "sha256:93fb39b8f42126f884e173ec763dbe80e490162d8e403bcf7c09a89105e9cca0", size = 1712582, upload-time = "2025-11-11T15:28:44.684Z" },
+    { url = "https://files.pythonhosted.org/packages/5b/50/d7d6ec1aa0ca8ff33aa24d43540570371c00adb3644a704fa04b77ba8027/apache_tvm_ffi-0.1.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:6b8ae1aaa5eb713440950a91191753495cff63aedf9952edec42ba87523ead6e", size = 1727049, upload-time = "2025-11-11T15:28:46.316Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/3e/4aed56991cc9442f6873c344885a38879aaa2adff006038d2083674c507c/apache_tvm_ffi-0.1.2-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:161cb2843753aa7428e20542fa7e90b494a86dca1317173a3846cee7cca66cb0", size = 1882729, upload-time = "2025-11-11T15:28:47.846Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/12/65539ca9f49bdc8a0a87ddddeb4a95176d3bfcc94c7604a62d9646d86c47/apache_tvm_ffi-0.1.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:42fb0fb16a4956b7029beb3e1575dfbdd75a76685f8c99e57f1ab1c743b32446", size = 1955858, upload-time = "2025-11-11T15:28:49.887Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/08/b64ebc2a31c6502b1763a44e81369de7ea6235417b21e4f4acf0d4223f7c/apache_tvm_ffi-0.1.2-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8c80b8defe86c805d679a75a3593a5d09bcf1bde4638dcca41dff7d2503df030", size = 1837206, upload-time = "2025-11-11T15:28:51.764Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/cb/775ff54f965801e1314afe1c39292b14e118402c66470f1baa7e50e638a7/apache_tvm_ffi-0.1.2-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ea8adc68e21d2eb46e8f89ee0c1be1e76f0450de9836bce63c6387b4e8b506f3", size = 1947597, upload-time = "2025-11-11T15:28:53.288Z" },
+    { url = "https://files.pythonhosted.org/packages/50/0b/d52787822a06afc52707bb03cbdd3d5f4859f653bf7888d00492d66a9fbc/apache_tvm_ffi-0.1.2-cp311-cp311-win_amd64.whl", hash = "sha256:dbcb788d94fde7bac932f73ae283d0f5a791f5ce8bc93d8f250b50b9724c92f7", size = 1712343, upload-time = "2025-11-11T15:28:54.856Z" },
+    { url = "https://files.pythonhosted.org/packages/f3/db/b450e65e5a7d231f42a0a54c19bf45f870176d85d631df0b0636704bdc14/apache_tvm_ffi-0.1.2-cp312-abi3-macosx_11_0_arm64.whl", hash = "sha256:fa1b18c3f15089154729ffd6e9f18dc5e1737542b8f770770bdacba75c11d824", size = 1703946, upload-time = "2025-11-11T15:28:56.341Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/60/c199b53def4de5c412fd2d9ffce70dea974f2fdedeadcf67463e82562597/apache_tvm_ffi-0.1.2-cp312-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fc9c12f26514a714e1ffbe3d3d82f8459a705600b2bd91621468dd4c16fadb4b", size = 1855387, upload-time = "2025-11-11T15:28:57.861Z" },
+    { url = "https://files.pythonhosted.org/packages/ca/69/99ce93a028bdd08d35f760d8528773d9fbd79cd55a405f8600779b9deca7/apache_tvm_ffi-0.1.2-cp312-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1637a3069573a3dd47921f2d2c81b1a60ce0d9a3cb0d18534a336e0e0fbfc831", size = 1932336, upload-time = "2025-11-11T15:28:59.323Z" },
+    { url = "https://files.pythonhosted.org/packages/03/d7/dffbd0d454ce02e51712e35f16a5f28c5a4ca0c8cba35bcef98e8b974dec/apache_tvm_ffi-0.1.2-cp312-abi3-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e0b5853895659759453aeb24fbc933cc6865f2f713f18216ab76360c9dd06f19", size = 1809796, upload-time = "2025-11-11T15:29:00.803Z" },
+    { url = "https://files.pythonhosted.org/packages/79/83/5b2567261d9de3d3df32a137fc43bf83f82d95094889b9813c86d11224a9/apache_tvm_ffi-0.1.2-cp312-abi3-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bbe79e942923ee6cd1576c153a728d40cd590e4fb83f7b55c595c6d1bb50173c", size = 1921824, upload-time = "2025-11-11T15:29:02.585Z" },
+    { url = "https://files.pythonhosted.org/packages/88/67/22325ae6eaca866cf745bd796a1c460b6a940b5c57e1868ccead097a8602/apache_tvm_ffi-0.1.2-cp312-abi3-win_amd64.whl", hash = "sha256:78d7c0f724d36ef4c9e9d84b750fa8f8d0ca72e2fc21f7f79368ad2cdfcfea74", size = 1692931, upload-time = "2025-11-11T15:29:04.343Z" },
+    { url = "https://files.pythonhosted.org/packages/44/4b/bd940660a7daf256ab0bd0f3084918c4292899d9cb9549ec80d661fa06e5/apache_tvm_ffi-0.1.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c6578482ec5a682379007e75bda2ed86b40c473c0e1616b8d92078f226b7f10f", size = 1734016, upload-time = "2025-11-11T15:29:06.485Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/30/54806d8cacb27d3acef9176df57003006f324629270880900d57daa89e04/apache_tvm_ffi-0.1.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:6de5455b5b9e457dd0f293cbae281d07ad10064cab62b1b47c1f29084e377295", size = 1869479, upload-time = "2025-11-11T15:29:08.466Z" },
+    { url = "https://files.pythonhosted.org/packages/49/98/119a55517abd0fdbe445e2461b3397f0876027ba2a978644010491a111c6/apache_tvm_ffi-0.1.2-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:43365f39dcc4cf33bcca0a51600add2caf4cc99da02d36c981fdb4da37293c5e", size = 1940519, upload-time = "2025-11-11T15:29:10.484Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/a5/962bde8b751c6f33cea8d2ff014df66583b39ff900a9cd9add55f133f388/apache_tvm_ffi-0.1.2-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d090c0515c852ef4bc1a4497d99286a41bbcaab0dc02d14d4c8930ff6e717fd7", size = 1822032, upload-time = "2025-11-11T15:29:12.112Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/27/bcb603e6426a965592bc62281b055a3cdc468cae8be3c9d16b196d0c92a6/apache_tvm_ffi-0.1.2-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:cdc3a7bf0f142b433dc7b10d920b6583e21af310f7ca5641f14b7988676745e9", size = 1931627, upload-time = "2025-11-11T15:29:13.95Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/48/3efd1d2cb48c5d3c824e0b2b704b8b489561ecd138cd398b1449ed5f4074/apache_tvm_ffi-0.1.2-cp314-cp314t-win_amd64.whl", hash = "sha256:0872bb060ab90d6760296cf0720b2eed88245c17d7890592fea6ef0617b299d2", size = 1752549, upload-time = "2025-11-11T15:29:15.758Z" },
 ]
 
 [[package]]
@@ -618,16 +618,16 @@ wheels = [
 
 [[package]]
 name = "botocore"
-version = "1.40.61"
+version = "1.40.70"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "jmespath" },
     { name = "python-dateutil" },
     { name = "urllib3" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/28/a3/81d3a47c2dbfd76f185d3b894f2ad01a75096c006a2dd91f237dca182188/botocore-1.40.61.tar.gz", hash = "sha256:a2487ad69b090f9cccd64cf07c7021cd80ee9c0655ad974f87045b02f3ef52cd", size = 14393956, upload-time = "2025-10-28T19:26:46.108Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/35/c1/8c4c199ae1663feee579a15861e34f10b29da11ae6ea0ad7b6a847ef3823/botocore-1.40.70.tar.gz", hash = "sha256:61b1f2cecd54d1b28a081116fa113b97bf4e17da57c62ae2c2751fe4c528af1f", size = 14444592, upload-time = "2025-11-10T20:29:04.046Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/38/c5/f6ce561004db45f0b847c2cd9b19c67c6bf348a82018a48cb718be6b58b0/botocore-1.40.61-py3-none-any.whl", hash = "sha256:17ebae412692fd4824f99cde0f08d50126dc97954008e5ba2b522eb049238aa7", size = 14055973, upload-time = "2025-10-28T19:26:42.15Z" },
+    { url = "https://files.pythonhosted.org/packages/55/d2/507fd0ee4dd574d2bdbdeac5df83f39d2cae1ffe97d4622cca6f6bab39f1/botocore-1.40.70-py3-none-any.whl", hash = "sha256:4a394ad25f5d9f1ef0bed610365744523eeb5c22de6862ab25d8c93f9f6d295c", size = 14106829, upload-time = "2025-11-10T20:29:01.101Z" },
 ]
 
 [[package]]
@@ -670,11 +670,11 @@ sdist = { url = "https://files.pythonhosted.org/packages/64/cb/104778c728dc3d5ea
 
 [[package]]
 name = "certifi"
-version = "2025.10.5"
+version = "2025.11.12"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/4c/5b/b6ce21586237c77ce67d01dc5507039d444b630dd76611bbca2d8e5dcd91/certifi-2025.10.5.tar.gz", hash = "sha256:47c09d31ccf2acf0be3f701ea53595ee7e0b8fa08801c6624be771df09ae7b43", size = 164519, upload-time = "2025-10-05T04:12:15.808Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/a2/8c/58f469717fa48465e4a50c014a0400602d3c437d7c0c468e17ada824da3a/certifi-2025.11.12.tar.gz", hash = "sha256:d8ab5478f2ecd78af242878415affce761ca6bc54a22a27e026d7c25357c3316", size = 160538, upload-time = "2025-11-12T02:54:51.517Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/e4/37/af0d2ef3967ac0d6113837b44a4f0bfe1328c2b9763bd5b1744520e5cfed/certifi-2025.10.5-py3-none-any.whl", hash = "sha256:0f212c2744a9bb6de0c56639a6f68afe01ecd92d91f14ae897c4fe7bbeeef0de", size = 163286, upload-time = "2025-10-05T04:12:14.03Z" },
+    { url = "https://files.pythonhosted.org/packages/70/7d/9bc192684cea499815ff478dfcdc13835ddf401365057044fb721ec6bddb/certifi-2025.11.12-py3-none-any.whl", hash = "sha256:97de8790030bbd5c2d96b7ec782fc2f7820ef8dba6db909ccf95449f2d062d4b", size = 159438, upload-time = "2025-11-12T02:54:49.735Z" },
 ]
 
 [[package]]
@@ -1082,40 +1082,40 @@ wheels = [
 
 [[package]]
 name = "cython"
-version = "3.2.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/52/82/01f0b63287cb922e5ba96c5147c30f1e51f541ce91bd178025bb3518b1ba/cython-3.2.0.tar.gz", hash = "sha256:41fdce8237baee2d961c292ed0386903dfe126f131e450a62de0fd7a5280d4b2", size = 3267264, upload-time = "2025-11-05T13:35:04.231Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/57/8d/b2e9578d960d38b1b04a278bf66e13008486aa73e73967186f2015d63d1c/cython-3.2.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ee408125b2d218ec7d7a061e09d24715fcab9bf7ea1a4ac01907c3f8ec8730b3", size = 2953775, upload-time = "2025-11-05T13:35:22.291Z" },
-    { url = "https://files.pythonhosted.org/packages/19/dd/cfd684f98bac9e0f505af1cbb7998498c59d713275e920a72b40dab03bfa/cython-3.2.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c93ce307b05fcd86a5bb0e4a7d7fab238e2f0e9936636097a60bc0e21f2def30", size = 3361627, upload-time = "2025-11-05T13:35:24.519Z" },
-    { url = "https://files.pythonhosted.org/packages/9c/c1/75acdbe9f6292514f0bb92ab1b78df5eedd7049235f4cbd194d2c6c46bfc/cython-3.2.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:191cfc2fa84642ad41a52d5abaacfb330d9a6653a465e4bf0a5681f66197a967", size = 3529751, upload-time = "2025-11-05T13:35:26.341Z" },
-    { url = "https://files.pythonhosted.org/packages/f2/ce/d0468eb6d87b956902b02909f5007ad61e3839d4c07ab235b514911d869b/cython-3.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:a259053037ef82959b743b7fde238bd191ee43f88eb8e51101d5f3d8849f1e32", size = 2758839, upload-time = "2025-11-05T13:35:28.36Z" },
-    { url = "https://files.pythonhosted.org/packages/ff/2b/904493fceda95747ba83971b40a66c8cc29ff009313429903f38ee620140/cython-3.2.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e9e4b2248dc3a98b86aeba65e9862d2cc881d072c163c0fb31b511d4d72e93c8", size = 2946248, upload-time = "2025-11-05T13:35:30.406Z" },
-    { url = "https://files.pythonhosted.org/packages/89/fe/abe926699fe6c580967e30bc4035da54b5e31355ba9b1f4c0cf574228a84/cython-3.2.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:02fb4990a83d5d6f780dda18ed8baa8d587cb6523f57b4d72bc0b41ad3766c96", size = 3236384, upload-time = "2025-11-05T13:35:32.233Z" },
-    { url = "https://files.pythonhosted.org/packages/1b/36/6b6266549802234286438298d494152deb19922a94928d9dcd256659ebd1/cython-3.2.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8a98925517819d62ea25d2cf40057df60a9bcf75fdd1d6ed3882e6ae0730d82f", size = 3372915, upload-time = "2025-11-05T13:35:34.082Z" },
-    { url = "https://files.pythonhosted.org/packages/29/fa/5cf15466b428f9248e38a28515cf0fd98078ae869aa395cfb300315964c4/cython-3.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:4c959a5d4cd6331e8498822ba47200bd2ff4bf74517c0c91475d5bc21da3b4d5", size = 2762735, upload-time = "2025-11-05T13:35:35.806Z" },
-    { url = "https://files.pythonhosted.org/packages/57/d3/2e6f5f2552c860bb9c00653d092103521846114f6a2ae0648ecf84c0816c/cython-3.2.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:511d823d9f8a1b850178ec355d6df0a1731b9c20b08ee6d1a780f68215e9013f", size = 2959932, upload-time = "2025-11-05T13:35:37.518Z" },
-    { url = "https://files.pythonhosted.org/packages/dd/bf/7bdc7f231fff6780f78586f939c1740475adecaa03bf256fcb62b2353952/cython-3.2.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bbadeedcb2d135655bcce7380fb28c9e2a75b6810426c12b6e5a6fe6106fafb4", size = 3218588, upload-time = "2025-11-05T13:35:39.642Z" },
-    { url = "https://files.pythonhosted.org/packages/be/81/7d7a81010897dc5abee59691f5fc85849dcc4c8a7687b22ed01bc8d86a7a/cython-3.2.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:92d2394a3e3fe704210b5324eb8118333b514af72c98b1e02a6503945825b231", size = 3381940, upload-time = "2025-11-05T13:35:41.886Z" },
-    { url = "https://files.pythonhosted.org/packages/4f/9d/35e7fb7b591bd9912685a772fcc773d7bb951a8feb6fb9be20addbc38928/cython-3.2.0-cp312-cp312-win_amd64.whl", hash = "sha256:73435e56654a34ece57d4c3304a4556a8402cc4ae2d0e30f71c237a985dc5246", size = 2750886, upload-time = "2025-11-05T13:35:43.629Z" },
-    { url = "https://files.pythonhosted.org/packages/5d/d0/dc4b260e8fde81b23ab4dca56948b3e69617ef470247ec6a3e09370a9849/cython-3.2.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:d900e58e826f9a5a27b0e2b50e33473e9986a5bae375c39b0f2e19f2c545fa23", size = 2950437, upload-time = "2025-11-05T13:35:45.427Z" },
-    { url = "https://files.pythonhosted.org/packages/c8/53/c322bf0486a938ad954a645866b67e978777d79183cf0a042bda6bea11de/cython-3.2.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a9d38cd3aab720d21fa6d6ee168228352f69aea0a95bd4fb84e8879c6ed38fbb", size = 3209331, upload-time = "2025-11-05T13:35:47.278Z" },
-    { url = "https://files.pythonhosted.org/packages/cd/48/55d02dba0606768d3450afd088e2bbcd6f8a54977dce041c2c3c1894631c/cython-3.2.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:92b31d0b7b0a49b3d2aa94faaf75d44a03174cff2616b341a8853c919e511d51", size = 3370974, upload-time = "2025-11-05T13:35:49.534Z" },
-    { url = "https://files.pythonhosted.org/packages/ce/bd/6dab19652b68464572b7a137d07a91ebe86db2a81c35842ff5e49ef23403/cython-3.2.0-cp313-cp313-win_amd64.whl", hash = "sha256:2847b74e76dbad612f6fc7182c12a5f78cffb0d05808fd2c4b638cf02d1aade6", size = 2746274, upload-time = "2025-11-05T13:35:51.522Z" },
-    { url = "https://files.pythonhosted.org/packages/e2/db/de5331ca6489da1761078825709257e1f24e543b4040f86a2502a4b841f9/cython-3.2.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:a0a8274959d538d12f865193dcd67bb5630906e020190c890d2b7c13d31713c6", size = 2961164, upload-time = "2025-11-05T13:35:53.826Z" },
-    { url = "https://files.pythonhosted.org/packages/54/3e/64e37e419331f7c4c540ad25c0b3e6d8f44d597f21ab8861afbc66aa7e02/cython-3.2.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0a1c800833c25195833805c7c3626a2c30b3baaaa9ba361a1af3bbc379662a8d", size = 3249627, upload-time = "2025-11-05T13:35:55.524Z" },
-    { url = "https://files.pythonhosted.org/packages/9b/fc/9faedfcc2de807f77115d97a4910c260dd4693f4fa9e0e3be0d9ae89e260/cython-3.2.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:df15af08c21c18a2e848df5954d6fd3310735089b60405132fa4111e2cf7482a", size = 3375458, upload-time = "2025-11-05T13:35:57.279Z" },
-    { url = "https://files.pythonhosted.org/packages/31/e0/30d449cd97ee0d6395aba18f2646b61b52ab3dc5a3851a346e2d363a7d85/cython-3.2.0-cp314-cp314-win_amd64.whl", hash = "sha256:9d6876af2132757fff1b42a2f4eaa72482f991863160e3f0dc8f2c812b300ebf", size = 2783210, upload-time = "2025-11-05T13:35:59.54Z" },
-    { url = "https://files.pythonhosted.org/packages/dd/6b/9e1e171fe19274465d84dffa4610d46f434b1ae945e946802db396695d67/cython-3.2.0-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:04821ce06598a3aa5c9e0270d98960cfe6556dedbd1418c65e4479162b8ae74a", size = 2869249, upload-time = "2025-11-05T13:36:08.944Z" },
-    { url = "https://files.pythonhosted.org/packages/c4/f1/f461726f664668a96072b2a245bdfae566d68e2eb1393ec72780cc59c21e/cython-3.2.0-cp39-abi3-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:54b5b1c72a63da822b3f4739a0e31546c0a19f8e834b174906bf817ed5f9d65f", size = 3204332, upload-time = "2025-11-05T13:36:11.386Z" },
-    { url = "https://files.pythonhosted.org/packages/78/d8/73c07ce64cae496e5f5a6dfe3e53574af1a8ef777e2a834d10dae8b67a4e/cython-3.2.0-cp39-abi3-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:6155a6c360e32af1aaa16fa10b0119b49deeadff42a1958973324150870af1b5", size = 2851317, upload-time = "2025-11-05T13:36:13.14Z" },
-    { url = "https://files.pythonhosted.org/packages/bc/d9/d9f321637b8034b5028fa5fe7d1085ffa9351fea350af6510d5cb924c014/cython-3.2.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:861258ac3878b76c57b9b5a379787d772a0bc47fec9167b43986777de542c474", size = 2987155, upload-time = "2025-11-05T13:36:15.018Z" },
-    { url = "https://files.pythonhosted.org/packages/f8/b5/9f9e7d261f083b4066d734b27a7872b0c584fd4c3578196652dbf72b3f62/cython-3.2.0-cp39-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:85dbf955e3193893d0288105afa0fa5f4e835ff587061681f240a4f0487c44fb", size = 2884219, upload-time = "2025-11-05T13:36:17.334Z" },
-    { url = "https://files.pythonhosted.org/packages/88/64/5aeb6e43e0ded9efedc5a516f87a487fdca8e434491cc352e5a805380459/cython-3.2.0-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:3b3f13822526726bac43275c0e92916bbcc2c30e9f559edc4c1132670b70498d", size = 3218067, upload-time = "2025-11-05T13:36:19.493Z" },
-    { url = "https://files.pythonhosted.org/packages/c4/a0/1958f54cd79d8251a330b9c9652b2a5ceba6a3fcec10782dd03e2a23c74f/cython-3.2.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:ab18d09673d219008be5b6174bcbb6dbfd50904e66371f104a8a4698b791472d", size = 3108277, upload-time = "2025-11-05T13:36:21.203Z" },
-    { url = "https://files.pythonhosted.org/packages/9c/84/9b8112160cab922b97edef00616ed18771567d88b5ba9d30d1736880c345/cython-3.2.0-cp39-abi3-win32.whl", hash = "sha256:c9fd986413fc52929b916187630a9abab9f876299951488c4b905ad5346afee6", size = 2430852, upload-time = "2025-11-05T13:36:23.049Z" },
-    { url = "https://files.pythonhosted.org/packages/8f/57/65d3de140b51c45dd6892846bfabdfaaa032e2418f1cb1a2f46058c1fe42/cython-3.2.0-cp39-abi3-win_arm64.whl", hash = "sha256:ee2ea79ddeb721f912e7efea039b9db059c81767ff04fbf9a995f64e1187df99", size = 2435793, upload-time = "2025-11-05T13:36:25.139Z" },
-    { url = "https://files.pythonhosted.org/packages/20/58/1f798ddb7fe6bfddf85f4f97d2d4ad63a491a7b643e85c1e274d0f09138e/cython-3.2.0-py3-none-any.whl", hash = "sha256:73f7f4c75acde5b5b4df05b11fdc2705ec637b99241d1bc2f4ebf345f7a2ea90", size = 1252818, upload-time = "2025-11-05T13:35:00.391Z" },
+version = "3.2.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/83/36/cce2972e13e83ffe58bc73bfd9d37340b5e5113e8243841a57511c7ae1c2/cython-3.2.1.tar.gz", hash = "sha256:2be1e4d0cbdf7f4cd4d9b8284a034e1989b59fd060f6bd4d24bf3729394d2ed8", size = 3270455, upload-time = "2025-11-12T19:02:59.847Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/87/74/f9fe9e7034f24aef407e7816880c012d8e863bedaa6b42b9ff33e79ea139/cython-3.2.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f1d10b3731171a33563ba81fdcba39c229e45087269dfbe07a1c00e7dcb2537f", size = 2957374, upload-time = "2025-11-12T19:03:10.132Z" },
+    { url = "https://files.pythonhosted.org/packages/65/47/f9dd519117f520aaf4d723c88fd9e9139262a0379edc01e71a1e9825e082/cython-3.2.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:92b814b6066d178a5057b557d372e2a03854e947e41cb9dec21db732fbd14c3c", size = 3366838, upload-time = "2025-11-12T19:03:11.742Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/3e/d967acfafef00056c3ba832692b9bb358ede2919f641e4a2d24828adacc6/cython-3.2.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c9fc6abd0532007827d8c6143b2bfedf80c7cb89a3c1c12f058336663489ed2e", size = 3535901, upload-time = "2025-11-12T19:03:13.545Z" },
+    { url = "https://files.pythonhosted.org/packages/68/79/bc46e714ecb010f80a8aa7f7eaf412c53cbabbe7489590d6aba5f4478ba5/cython-3.2.1-cp310-cp310-win_amd64.whl", hash = "sha256:14f1ed135347587cfddcd3c3219667cac4f0ea0b66aa1c4c0187d50a1b92c222", size = 2764043, upload-time = "2025-11-12T19:03:15.584Z" },
+    { url = "https://files.pythonhosted.org/packages/48/d4/ba7b9f341ec168de78bd659600e04bb7de3b2d069bf98b2178a135e88ea4/cython-3.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3cb32c650e7f4476941d1f735cae75a2067d5e3279576273bb8802e8ea907222", size = 2949720, upload-time = "2025-11-12T19:03:17.492Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/47/c42417f424c0b928361f48d7dd0ae72716ee21f647b73ceb16f66b98663e/cython-3.2.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8a2b306813d7f28aa0a2c3e4e63ada1427a8109917532df942cd5429db228252", size = 3242127, upload-time = "2025-11-12T19:03:19.227Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/fc/1040460889129551649ec35be45e05169871fbcf71bd8e13c533e86f9468/cython-3.2.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0959d9a36d4f004ce63acc1474b3c606745af98b65e8ae709efd0c10988e9d6b", size = 3377094, upload-time = "2025-11-12T19:03:21.25Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/f2/8c754298eefa40e21af0ae3592837c6e71254900d5aea1c8859e96b11de5/cython-3.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:60c62e734421365135cc2842013d883136054a26c617c001be494235edfc447a", size = 2767824, upload-time = "2025-11-12T19:03:23.317Z" },
+    { url = "https://files.pythonhosted.org/packages/ee/0e/19d5041b87f98ed19c94c388607cd27c1f7458078c3bad5de2dead55b2e1/cython-3.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ea5097d97afd2ab14e98637b7033eba5146de29a5dedf89f5e946076396ab891", size = 2966736, upload-time = "2025-11-12T19:03:25.064Z" },
+    { url = "https://files.pythonhosted.org/packages/84/b8/bcc36d9d2464348106984956608a52a42a01ab44ea64031207dffdebc078/cython-3.2.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a4bf12de0475bb6a21e2336a4a04dc4a2b4dd0507a2a3c703e045f3484266605", size = 3221633, upload-time = "2025-11-12T19:03:26.754Z" },
+    { url = "https://files.pythonhosted.org/packages/79/20/7d4807fe4ebcef9f20f2e5f93312d0f5d02f9f76524fd4e37706d04e83f7/cython-3.2.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:18c64a0f69a1b8164de70ec7efc72250c589fec21519170de21582300f6aaed9", size = 3389542, upload-time = "2025-11-12T19:03:28.656Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/92/b06ba6721299293bc41e89732070132c453bdbaaeabb8f8cc76851b75345/cython-3.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:5ba14907d5826d8010e82306ce279a0d3650f5b50a4813c80836a17b2213c520", size = 2755307, upload-time = "2025-11-12T19:03:30.684Z" },
+    { url = "https://files.pythonhosted.org/packages/40/28/c6e36c214baeb27ae45b518552e74457536c7c964b1a55b5900b047fa467/cython-3.2.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:b4e850fc7a2f72d19679dd083fe4d20bf66860fceabb4f3207112f240249d708", size = 2957307, upload-time = "2025-11-12T19:03:32.471Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/c8/b0b9ba64f81f2875c42aab5c0979d6454cd1ac6b3c1e2373ad552701565d/cython-3.2.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3d20ca4afe993f7dccad3aeddbf4c3536cb0fd3ad6dc7a225935a666a5655af2", size = 3210919, upload-time = "2025-11-12T19:03:34.274Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/33/5d9ca6abba0e77e1851b843dd1b3c4095fbc6373166935e83c4414f80e88/cython-3.2.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f5a54a757d01ca6a260b02ce5baf17d9db1c2253566ab5844ee4966ff2a69c19", size = 3373350, upload-time = "2025-11-12T19:03:35.927Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/29/4408c3486ff380a2d6ae0d4b71da5195efcef3c4360017113ee7d1cb7335/cython-3.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:1b81e56584727a328e00d91c164f8f0f2c59b02bf6857c3f000cd830fa571453", size = 2753425, upload-time = "2025-11-12T19:03:38.157Z" },
+    { url = "https://files.pythonhosted.org/packages/f0/32/c1aa03ccadda89487ff31b90d8651c3706ce2744bf4f2c2ae213147e89bd/cython-3.2.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:d7af6ad01c0fe1965d1d3badaeb6df53c1f37383ebae1ccb405b73f628f87713", size = 2967833, upload-time = "2025-11-12T19:03:40.233Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/dc/3488d3ade0635408a2ebb05561a3009e2f54616bfefd1f107088dfeb2c4c/cython-3.2.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e3ea7cd085b62acb67c0fbde5cd17a7d9e47992c965e81ec977cf9ea7c59cd65", size = 3256237, upload-time = "2025-11-12T19:03:42.005Z" },
+    { url = "https://files.pythonhosted.org/packages/7b/ba/f3d35d3803c9a424fa8812893847114deb9e2440c1bc67a31ab9ec4b9355/cython-3.2.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:986aea38fdf231e78d73745f83271c5654852c822dc5141a1d3fba64429a6aa6", size = 3383100, upload-time = "2025-11-12T19:03:43.675Z" },
+    { url = "https://files.pythonhosted.org/packages/86/dc/d72dbb2f8e7ca95d2d18fd86f32b2e385996576230e7ecddd7d250786825/cython-3.2.1-cp314-cp314-win_amd64.whl", hash = "sha256:4960e26cd34c1385f21646339f2e0361fcdd2ed3c01cdb50fe734add577ec56a", size = 2790322, upload-time = "2025-11-12T19:03:45.373Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/7e/1194f4ba98b981bbdca945a292e4f49e87ea09d69516b24445409e7cf611/cython-3.2.1-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:4e9167316bf6ecfea33dcca62f074605648fb93cc053ef46b5deb3e5d12fc0d3", size = 2872858, upload-time = "2025-11-12T19:03:55.074Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/1a/393ca8ffec7ad3f02b8e4bffaba3dba4fb62c4a1c4c0b6dbf3b80e709fe3/cython-3.2.1-cp39-abi3-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:3095df6cd470064742f428c937bed7200c5123b9e19ee04aa09ec61281e565a3", size = 3209664, upload-time = "2025-11-12T19:03:56.771Z" },
+    { url = "https://files.pythonhosted.org/packages/37/57/f209f64c609d3d8fac60a572e56da2f621dc1789e399c58db61d5645a31f/cython-3.2.1-cp39-abi3-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:db3f53b2d9afb206075a2605f1150aa019f0733c7795a38eccc6119c2e9c3f7b", size = 2854607, upload-time = "2025-11-12T19:03:59.413Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/af/1e5c73fe52423f40776130b0be914fd9f9f8dc26c4f6ea4c2ed04772d558/cython-3.2.1-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:0fc5e7687ac8f8e2b2fb95648f43e9e074ebaa72fd5cb3d8e20e5f1e8b8e02d9", size = 2991567, upload-time = "2025-11-12T19:04:02.209Z" },
+    { url = "https://files.pythonhosted.org/packages/39/2c/3ea175b6b1fdfb429f9e9c395240d894155b3c0615caced05fef43264cba/cython-3.2.1-cp39-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:bbb3bc152bc0de82b031c8d355418fa4890a92424209d59366c2c0bc9e6cf53c", size = 2889178, upload-time = "2025-11-12T19:04:05.272Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/88/b2ab22a3a3feac78c62354a823c5c0c33659909e9918f53aa05904532b4b/cython-3.2.1-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:a2022bc48ad0c2c0e0485bf0b54902913a3d81086b7d435f4437620c667799f6", size = 3223755, upload-time = "2025-11-12T19:04:07.262Z" },
+    { url = "https://files.pythonhosted.org/packages/0b/56/9ba58629a03cbffb5965a3c65ccd91fa683d95d588c21a875da72fdc249b/cython-3.2.1-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:99fdd4ffc2dcb513f4be9ce71c6fedd895b96b1f814655b6bbab196df497b090", size = 3113456, upload-time = "2025-11-12T19:04:09.175Z" },
+    { url = "https://files.pythonhosted.org/packages/56/5b/148c1a7ea5aebe460a70cad716a77e5fd0205be2de9fc5250491eb13ad8c/cython-3.2.1-cp39-abi3-win32.whl", hash = "sha256:06071f85bd5ce040464d43b2f9f287742a79f905e81b709fe904567230f1ed51", size = 2434223, upload-time = "2025-11-12T19:04:11.294Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/54/bb9b0c9db2a92a5e93747ca3027cfc645741411f8f1c6af2fb2a7b82df5d/cython-3.2.1-cp39-abi3-win_arm64.whl", hash = "sha256:e87c131d59480aee1ebac622b64f287c0e1d665ad1a1b7d498ac48accdb36c6b", size = 2439268, upload-time = "2025-11-12T19:04:12.931Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/30/373775b8d933d781d055c1dd0f110f275a101f320dab724c8c63a7c1b945/cython-3.2.1-py3-none-any.whl", hash = "sha256:cd72c46e7bffe8250c52d400e72c8d5d3086437b6aeec5b0eca99ccd337f5834", size = 1254219, upload-time = "2025-11-12T19:02:56.14Z" },
 ]
 
 [[package]]
@@ -2229,8 +2229,8 @@ dev = [
     { name = "onnxscript", version = "0.5.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.13' and extra == 'extra-13-megatron-core-dev') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "opentelemetry-api" },
     { name = "setuptools" },
-    { name = "tensorstore", version = "0.1.74", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.13' and extra == 'extra-13-megatron-core-dev') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "tensorstore", version = "0.1.78", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.13' and extra == 'extra-13-megatron-core-dev') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "tensorstore", version = "0.1.74", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.13' and extra == 'extra-13-megatron-core-dev') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "tensorstore", version = "0.1.79", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and python_full_version < '3.13' and extra == 'extra-13-megatron-core-dev') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (python_full_version >= '3.13' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "tqdm" },
     { name = "transformer-engine", marker = "extra == 'extra-13-megatron-core-dev'" },
     { name = "wget" },
@@ -2249,8 +2249,8 @@ lts = [
     { name = "onnxscript", version = "0.5.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.13' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "opentelemetry-api" },
     { name = "setuptools" },
-    { name = "tensorstore", version = "0.1.74", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.13' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "tensorstore", version = "0.1.78", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.13' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "tensorstore", version = "0.1.74", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-13-megatron-core-lts') or (python_full_version >= '3.13' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "tensorstore", version = "0.1.79", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and python_full_version < '3.13' and extra == 'extra-13-megatron-core-lts') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (python_full_version >= '3.13' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "tqdm" },
     { name = "wget" },
 ]
@@ -2951,6 +2951,36 @@ dependencies = [
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/05/79/87c45f32e661b25e0aaa1e325ba166511f57be5dff8f0fcabc12d3e73b64/nv_grouped_gemm-1.1.4.post6.tar.gz", hash = "sha256:dad6115f4b4ff7ceb0bc40ad44e923c13a24fc88cfe1e20b1a6b4c9cf24c445c", size = 26508, upload-time = "2025-10-10T18:52:29.508Z" }
 
+[[package]]
+name = "nv-one-logger-core"
+version = "2.3.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "overrides" },
+    { name = "pydantic" },
+    { name = "strenum" },
+    { name = "toml" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/3b/37/963095797035f371e0db6ea761f5aaccb624fc786af217115b423baeb0e2/nv_one_logger_core-2.3.1.tar.gz", hash = "sha256:cbb2f87604c78b96a302f32d87199902129d76153a73a20f8455a250b3246c1d", size = 52640, upload-time = "2025-10-29T21:11:55.812Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ee/c4/ea91554c4fcbff66057f667690101d7a4b965605741350ac661b03fa6c46/nv_one_logger_core-2.3.1-py3-none-any.whl", hash = "sha256:0c8b77bcdac4daa1ea913bf8d4afd2a057bd5526e3654ac39f67caba157341a6", size = 63066, upload-time = "2025-10-29T21:11:52.753Z" },
+]
+
+[[package]]
+name = "nv-one-logger-training-telemetry"
+version = "2.3.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "nv-one-logger-core" },
+    { name = "strenum" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c5/21/016fa067967734d52f1ccf5a2a37a1a65216f2d7053bc2b85872cce956ca/nv_one_logger_training_telemetry-2.3.1.tar.gz", hash = "sha256:8c67940ea71799afaf1f46df3ba2f52f93aea26321c6f1c1d54aae02efc2a4af", size = 44435, upload-time = "2025-10-29T21:21:42.035Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e5/15/97e6e4ddfe5fc35bcee74a45b7c33fb73abb83713c7dfa26420b971a86c3/nv_one_logger_training_telemetry-2.3.1-py3-none-any.whl", hash = "sha256:5319443829b59378a498c3c62ac98973e14f31be675c229ff2b14e2fe109aa0b", size = 44140, upload-time = "2025-10-29T21:21:40.72Z" },
+]
+
 [[package]]
 name = "nvidia-cublas-cu12"
 version = "12.8.4.1"
@@ -3115,10 +3145,10 @@ wheels = [
 
 [[package]]
 name = "nvidia-mathdx"
-version = "25.1.1"
+version = "25.6.0"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/59/00/f1a73ac224d466b31b6eb09794656112e896185678720b05668777e87db3/nvidia_mathdx-25.1.1-py3-none-any.whl", hash = "sha256:4fb948fe4842d24e679f3d0c140c8a0e8e24c3c7ae5eb6e08584253ad94a198b", size = 39894902, upload-time = "2025-05-06T22:58:32.29Z" },
+    { url = "https://files.pythonhosted.org/packages/20/1a/a418b8c1adc58abd87fd69414c19883af5c1b10514e3dbfcc27cde831b13/nvidia_mathdx-25.6.0-py3-none-any.whl", hash = "sha256:22e6ad5d0d005f836be5cbd14e836cf2e9ea42c82deb602707246ce8198eaa96", size = 23013087, upload-time = "2025-11-13T18:25:11.228Z" },
 ]
 
 [[package]]
@@ -3210,24 +3240,25 @@ wheels = [
 
 [[package]]
 name = "nvidia-resiliency-ext"
-version = "0.4.1"
+version = "0.5.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "defusedxml" },
+    { name = "nv-one-logger-core" },
+    { name = "nv-one-logger-training-telemetry" },
     { name = "nvidia-ml-py" },
     { name = "packaging" },
     { name = "psutil" },
-    { name = "pynvml" },
     { name = "pyyaml" },
     { name = "torch", marker = "sys_platform == 'never'" },
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/a7/8c/6547d9fdea9730d4f69a19ca492ccbe221768f8473b82502a78a824acc3d/nvidia_resiliency_ext-0.4.1-cp310-cp310-manylinux_2_31_aarch64.whl", hash = "sha256:cf80599411018ebbf03da64769527dee6b37746b72b8606f919b7999633770b8", size = 442891, upload-time = "2025-07-17T03:53:38.878Z" },
-    { url = "https://files.pythonhosted.org/packages/34/0d/520cab980949ad11bd5291784fea309bcd6654a9c97943a3a87644c1d111/nvidia_resiliency_ext-0.4.1-cp310-cp310-manylinux_2_31_x86_64.whl", hash = "sha256:0c23e621d598ba436549db83deeb3569c19df0194b89fe6169d62b6ead711be3", size = 448044, upload-time = "2025-07-17T03:48:30.851Z" },
-    { url = "https://files.pythonhosted.org/packages/46/77/8cda264b262e2868a4e6ebcddaea112200b1e34b8d5a35a2fe3b4978d137/nvidia_resiliency_ext-0.4.1-cp311-cp311-manylinux_2_31_aarch64.whl", hash = "sha256:d8ca454a8b8abef72e0ff0e33914686c263414e8891471c02a9f6af9d2d6b925", size = 443649, upload-time = "2025-07-17T03:49:16.183Z" },
-    { url = "https://files.pythonhosted.org/packages/3a/53/029cc7493b5833cb8dfa201f15a1e422e2e1cc6308d34c5b0a90028a73fd/nvidia_resiliency_ext-0.4.1-cp311-cp311-manylinux_2_31_x86_64.whl", hash = "sha256:dde6034f29350ac6326cdd861ceec641bdd93be0eddbf034739f4cd9452a4dd9", size = 449189, upload-time = "2025-07-17T03:52:15.24Z" },
-    { url = "https://files.pythonhosted.org/packages/70/05/38d491962273c7905708762279f440520eb79f3c00b67a023497215ad023/nvidia_resiliency_ext-0.4.1-cp312-cp312-manylinux_2_31_aarch64.whl", hash = "sha256:b3bd5f01535574b16d0f38bca6e39afe3806c4a2896eee1b321cd944e00025a7", size = 444570, upload-time = "2025-07-17T03:50:58.877Z" },
-    { url = "https://files.pythonhosted.org/packages/18/8b/4cb8aa2bbdf3705d3034c3f3dacdadb03b3b7dd3dc7f5200e64663fb477f/nvidia_resiliency_ext-0.4.1-cp312-cp312-manylinux_2_31_x86_64.whl", hash = "sha256:ca9f8de465af345952bedbea53c90c0e2323d88cfd830ded0e806fad91845c0e", size = 450280, upload-time = "2025-07-17T03:49:55.327Z" },
+    { url = "https://files.pythonhosted.org/packages/df/18/1898cad3bdd643c6bfa5f7aee125a5ef308ab1701ab15106e3e9c66bb416/nvidia_resiliency_ext-0.5.0-cp310-cp310-manylinux_2_39_aarch64.whl", hash = "sha256:97d4b68d3949f3b8370addb474d8662d6ac5008c3c1296420cdeb93a88d6a804", size = 402915, upload-time = "2025-11-13T21:28:34.578Z" },
+    { url = "https://files.pythonhosted.org/packages/fa/48/10fc3f278898e3b2aacc3bea65f0ac4b579e6e0e8447b467742d75adeec1/nvidia_resiliency_ext-0.5.0-cp310-cp310-manylinux_2_39_x86_64.whl", hash = "sha256:ceb04ec5a7bc9301fd6f14449bda6b0d1f37ead4fbe37aa3bf1d7b2ad5b662d4", size = 406483, upload-time = "2025-11-13T21:28:58.732Z" },
+    { url = "https://files.pythonhosted.org/packages/14/17/c19dfed8d4aced307a1c1404f0917ee6c1b319db8092b3cfe2af4e76de6d/nvidia_resiliency_ext-0.5.0-cp311-cp311-manylinux_2_39_aarch64.whl", hash = "sha256:62d396356adcf898cb86a54956eeece29017a41b5872db0b364c8449d23f2f66", size = 404062, upload-time = "2025-11-13T21:29:46.873Z" },
+    { url = "https://files.pythonhosted.org/packages/7f/99/b4324595171c3cdffb03cef070006ab9a3de7fca90a22403576ec6423b69/nvidia_resiliency_ext-0.5.0-cp311-cp311-manylinux_2_39_x86_64.whl", hash = "sha256:c4fcd006ef69300f753bb30d17efbb6bcee6699f044e3532209b2825d22e9977", size = 407027, upload-time = "2025-11-13T21:30:09.124Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/73/232d9f25558f3c6165ff1d15c980a434b47c13e8f527f999cd265859abcf/nvidia_resiliency_ext-0.5.0-cp312-cp312-manylinux_2_39_aarch64.whl", hash = "sha256:81e3d827885e90bed369e67f76dda6709dd4073c2e5fa1228df85d6987cee495", size = 403317, upload-time = "2025-11-13T21:31:24.603Z" },
+    { url = "https://files.pythonhosted.org/packages/44/89/4d7f39416aa3be72ee9f1260a7af56af40f2570f5add1e039d96279a8764/nvidia_resiliency_ext-0.5.0-cp312-cp312-manylinux_2_39_x86_64.whl", hash = "sha256:eb720cd25feabef07f971d4051c7bcac2f9ec73642a9031953d2663307950cb9", size = 407963, upload-time = "2025-11-13T21:30:28.998Z" },
 ]
 
 [[package]]
@@ -3424,6 +3455,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/05/44/4c45a34def3506122ae61ad684139f0bbc4e00c39555d4f7e20e0e001c8a/opentelemetry_api-1.33.1-py3-none-any.whl", hash = "sha256:4db83ebcf7ea93e64637ec6ee6fabee45c5cbe4abd9cf3da95c43828ddb50b83", size = 65771, upload-time = "2025-05-16T18:52:17.419Z" },
 ]
 
+[[package]]
+name = "overrides"
+version = "7.7.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/36/86/b585f53236dec60aba864e050778b25045f857e17f6e5ea0ae95fe80edd2/overrides-7.7.0.tar.gz", hash = "sha256:55158fa3d93b98cc75299b1e67078ad9003ca27945c76162c1c0766d6f91820a", size = 22812, upload-time = "2024-01-27T21:01:33.423Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/2c/ab/fc8290c6a4c722e5514d80f62b2dc4c4df1a68a41d1364e625c35990fcf3/overrides-7.7.0-py3-none-any.whl", hash = "sha256:c7ed9d062f78b8e4c1a7b70bd8796b35ead4d9f510227ef9c5dc7626c60d7e49", size = 17832, upload-time = "2024-01-27T21:01:31.393Z" },
+]
+
 [[package]]
 name = "packaging"
 version = "25.0"
@@ -3805,17 +3845,17 @@ wheels = [
 
 [[package]]
 name = "protobuf"
-version = "6.33.0"
+version = "6.33.1"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/19/ff/64a6c8f420818bb873713988ca5492cba3a7946be57e027ac63495157d97/protobuf-6.33.0.tar.gz", hash = "sha256:140303d5c8d2037730c548f8c7b93b20bb1dc301be280c378b82b8894589c954", size = 443463, upload-time = "2025-10-15T20:39:52.159Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/0a/03/a1440979a3f74f16cab3b75b0da1a1a7f922d56a8ddea96092391998edc0/protobuf-6.33.1.tar.gz", hash = "sha256:97f65757e8d09870de6fd973aeddb92f85435607235d20b2dfed93405d00c85b", size = 443432, upload-time = "2025-11-13T16:44:18.895Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/7e/ee/52b3fa8feb6db4a833dfea4943e175ce645144532e8a90f72571ad85df4e/protobuf-6.33.0-cp310-abi3-win32.whl", hash = "sha256:d6101ded078042a8f17959eccd9236fb7a9ca20d3b0098bbcb91533a5680d035", size = 425593, upload-time = "2025-10-15T20:39:40.29Z" },
-    { url = "https://files.pythonhosted.org/packages/7b/c6/7a465f1825872c55e0341ff4a80198743f73b69ce5d43ab18043699d1d81/protobuf-6.33.0-cp310-abi3-win_amd64.whl", hash = "sha256:9a031d10f703f03768f2743a1c403af050b6ae1f3480e9c140f39c45f81b13ee", size = 436882, upload-time = "2025-10-15T20:39:42.841Z" },
-    { url = "https://files.pythonhosted.org/packages/e1/a9/b6eee662a6951b9c3640e8e452ab3e09f117d99fc10baa32d1581a0d4099/protobuf-6.33.0-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:905b07a65f1a4b72412314082c7dbfae91a9e8b68a0cc1577515f8df58ecf455", size = 427521, upload-time = "2025-10-15T20:39:43.803Z" },
-    { url = "https://files.pythonhosted.org/packages/10/35/16d31e0f92c6d2f0e77c2a3ba93185130ea13053dd16200a57434c882f2b/protobuf-6.33.0-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:e0697ece353e6239b90ee43a9231318302ad8353c70e6e45499fa52396debf90", size = 324445, upload-time = "2025-10-15T20:39:44.932Z" },
-    { url = "https://files.pythonhosted.org/packages/e6/eb/2a981a13e35cda8b75b5585aaffae2eb904f8f351bdd3870769692acbd8a/protobuf-6.33.0-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:e0a1715e4f27355afd9570f3ea369735afc853a6c3951a6afe1f80d8569ad298", size = 339159, upload-time = "2025-10-15T20:39:46.186Z" },
-    { url = "https://files.pythonhosted.org/packages/21/51/0b1cbad62074439b867b4e04cc09b93f6699d78fd191bed2bbb44562e077/protobuf-6.33.0-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:35be49fd3f4fefa4e6e2aacc35e8b837d6703c37a2168a55ac21e9b1bc7559ef", size = 323172, upload-time = "2025-10-15T20:39:47.465Z" },
-    { url = "https://files.pythonhosted.org/packages/07/d1/0a28c21707807c6aacd5dc9c3704b2aa1effbf37adebd8caeaf68b17a636/protobuf-6.33.0-py3-none-any.whl", hash = "sha256:25c9e1963c6734448ea2d308cfa610e692b801304ba0908d7bfa564ac5132995", size = 170477, upload-time = "2025-10-15T20:39:51.311Z" },
+    { url = "https://files.pythonhosted.org/packages/06/f1/446a9bbd2c60772ca36556bac8bfde40eceb28d9cc7838755bc41e001d8f/protobuf-6.33.1-cp310-abi3-win32.whl", hash = "sha256:f8d3fdbc966aaab1d05046d0240dd94d40f2a8c62856d41eaa141ff64a79de6b", size = 425593, upload-time = "2025-11-13T16:44:06.275Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/79/8780a378c650e3df849b73de8b13cf5412f521ca2ff9b78a45c247029440/protobuf-6.33.1-cp310-abi3-win_amd64.whl", hash = "sha256:923aa6d27a92bf44394f6abf7ea0500f38769d4b07f4be41cb52bd8b1123b9ed", size = 436883, upload-time = "2025-11-13T16:44:09.222Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/93/26213ff72b103ae55bb0d73e7fb91ea570ef407c3ab4fd2f1f27cac16044/protobuf-6.33.1-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:fe34575f2bdde76ac429ec7b570235bf0c788883e70aee90068e9981806f2490", size = 427522, upload-time = "2025-11-13T16:44:10.475Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/32/df4a35247923393aa6b887c3b3244a8c941c32a25681775f96e2b418f90e/protobuf-6.33.1-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:f8adba2e44cde2d7618996b3fc02341f03f5bc3f2748be72dc7b063319276178", size = 324445, upload-time = "2025-11-13T16:44:11.869Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/d0/d796e419e2ec93d2f3fa44888861c3f88f722cde02b7c3488fcc6a166820/protobuf-6.33.1-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:0f4cf01222c0d959c2b399142deb526de420be8236f22c71356e2a544e153c53", size = 339161, upload-time = "2025-11-13T16:44:12.778Z" },
+    { url = "https://files.pythonhosted.org/packages/1d/2a/3c5f05a4af06649547027d288747f68525755de692a26a7720dced3652c0/protobuf-6.33.1-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:8fd7d5e0eb08cd5b87fd3df49bc193f5cfd778701f47e11d127d0afc6c39f1d1", size = 323171, upload-time = "2025-11-13T16:44:14.035Z" },
+    { url = "https://files.pythonhosted.org/packages/08/b4/46310463b4f6ceef310f8348786f3cff181cea671578e3d9743ba61a459e/protobuf-6.33.1-py3-none-any.whl", hash = "sha256:d595a9fd694fdeb061a62fbe10eb039cc1e444df81ec9bb70c7fc59ebcb1eafa", size = 170477, upload-time = "2025-11-13T16:44:17.633Z" },
 ]
 
 [[package]]
@@ -4193,18 +4233,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/35/76/c34426d532e4dce7ff36e4d92cb20f4cbbd94b619964b93d24e8f5b5510f/pynacl-1.6.1-cp38-abi3-win_arm64.whl", hash = "sha256:5953e8b8cfadb10889a6e7bd0f53041a745d1b3d30111386a1bb37af171e6daf", size = 183970, upload-time = "2025-11-10T16:02:05.786Z" },
 ]
 
-[[package]]
-name = "pynvml"
-version = "13.0.1"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "nvidia-ml-py" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/5c/57/da7dc63a79f59e082e26a66ac02d87d69ea316b35b35b7a00d82f3ce3d2f/pynvml-13.0.1.tar.gz", hash = "sha256:1245991d9db786b4d2f277ce66869bd58f38ac654e38c9397d18f243c8f6e48f", size = 35226, upload-time = "2025-09-05T20:33:25.377Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/d7/4a/cac76c174bb439a0c46c9a4413fcbea5c6cabfb01879f7bbdb9fdfaed76c/pynvml-13.0.1-py3-none-any.whl", hash = "sha256:e2b20e0a501eeec951e2455b7ab444759cf048e0e13a57b08049fa2775266aa8", size = 28810, upload-time = "2025-09-05T20:33:24.13Z" },
-]
-
 [[package]]
 name = "pyre-extensions"
 version = "0.0.32"
@@ -5076,11 +5104,11 @@ wheels = [
 
 [[package]]
 name = "slack-sdk"
-version = "3.37.0"
+version = "3.38.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/8e/c2/0a174a155623d7dc3ed4d1360cdf755590acdc2c3fc9ce0d2340f468909f/slack_sdk-3.37.0.tar.gz", hash = "sha256:242d6cffbd9e843af807487ff04853189b812081aeaa22f90a8f159f20220ed9", size = 241612, upload-time = "2025-10-06T23:07:20.856Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/69/90/8c830172b1847bd3084a2cf39aee9d522e2a55d1c3d4e2b066001e9765ee/slack_sdk-3.38.0.tar.gz", hash = "sha256:73f43ef535929c6034982434aba4d5fd04db3b40f4e0cd14c3abfd56419d181d", size = 240091, upload-time = "2025-11-13T16:05:20.905Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/07/fd/a502ee24d8c7d12a8f749878ae0949b8eeb50aeac22dc5a613d417a256d0/slack_sdk-3.37.0-py2.py3-none-any.whl", hash = "sha256:e108a0836eafda74d8a95e76c12c2bcb010e645d504d8497451e4c7ebb229c87", size = 302751, upload-time = "2025-10-06T23:07:19.542Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/fc/0563891d3d1f7c503b2daf0b5b9ae9e605ea112272661897c150fd3d69cb/slack_sdk-3.38.0-py2.py3-none-any.whl", hash = "sha256:6c5e908abd68e97373a88437ef2fa3ff7a4c356807bbc41fcd7d6cbbfa2034d6", size = 302796, upload-time = "2025-11-13T16:05:18.856Z" },
 ]
 
 [[package]]
@@ -5359,6 +5387,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/d9/52/1064f510b141bd54025f9b55105e26d1fa970b9be67ad766380a3c9b74b0/starlette-0.50.0-py3-none-any.whl", hash = "sha256:9e5391843ec9b6e472eed1365a78c8098cfceb7a74bfd4d6b1c0c0095efb3bca", size = 74033, upload-time = "2025-11-01T15:25:25.461Z" },
 ]
 
+[[package]]
+name = "strenum"
+version = "0.4.15"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/85/ad/430fb60d90e1d112a62ff57bdd1f286ec73a2a0331272febfddd21f330e1/StrEnum-0.4.15.tar.gz", hash = "sha256:878fb5ab705442070e4dd1929bb5e2249511c0bcf2b0eeacf3bcd80875c82eff", size = 23384, upload-time = "2023-06-29T22:02:58.399Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/81/69/297302c5f5f59c862faa31e6cb9a4cd74721cd1e052b38e464c5b402df8b/StrEnum-0.4.15-py3-none-any.whl", hash = "sha256:a30cda4af7cc6b5bf52c8055bc4bf4b2b6b14a93b574626da33df53cf7740659", size = 8851, upload-time = "2023-06-29T22:02:56.947Z" },
+]
+
 [[package]]
 name = "sympy"
 version = "1.14.0"
@@ -5419,10 +5456,13 @@ resolution-markers = [
     "python_full_version == '3.13.*' and sys_platform == 'linux'",
     "python_full_version >= '3.14' and sys_platform != 'linux'",
     "python_full_version == '3.13.*' and sys_platform != 'linux'",
+    "python_full_version < '3.11' and sys_platform == 'linux'",
+    "python_full_version < '3.11' and sys_platform != 'linux'",
 ]
 dependencies = [
-    { name = "ml-dtypes", version = "0.4.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.13'" },
-    { name = "numpy", marker = "python_full_version >= '3.13'" },
+    { name = "ml-dtypes", version = "0.4.1", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.13' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.13' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "ml-dtypes", version = "0.5.3", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", marker = "python_full_version < '3.11' or python_full_version >= '3.13'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/3c/b9/ea25aba62c688a87d7d7d9cc5926d602e2f9e84fa72586825486fb180b7e/tensorstore-0.1.74.tar.gz", hash = "sha256:a062875f27283d30ce4959c408c253ecb336fce8e3f9837c064e3d30cda79203", size = 6795605, upload-time = "2025-04-24T15:42:18.829Z" }
 wheels = [
@@ -5450,42 +5490,40 @@ wheels = [
 
 [[package]]
 name = "tensorstore"
-version = "0.1.78"
+version = "0.1.79"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
     "python_full_version == '3.12.*' and sys_platform == 'linux'",
     "python_full_version == '3.12.*' and sys_platform != 'linux'",
     "python_full_version == '3.11.*' and sys_platform == 'linux'",
     "python_full_version == '3.11.*' and sys_platform != 'linux'",
-    "python_full_version < '3.11' and sys_platform == 'linux'",
-    "python_full_version < '3.11' and sys_platform != 'linux'",
 ]
 dependencies = [
-    { name = "ml-dtypes", version = "0.5.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.13'" },
-    { name = "numpy", marker = "python_full_version < '3.13'" },
+    { name = "ml-dtypes", version = "0.5.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' and python_full_version < '3.13'" },
+    { name = "numpy", marker = "python_full_version >= '3.11' and python_full_version < '3.13'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/9f/ee/05eb424437f4db63331c90e4605025eedc0f71da3faff97161d5d7b405af/tensorstore-0.1.78.tar.gz", hash = "sha256:e26074ffe462394cf54197eb76d6569b500f347573cd74da3f4dd5f510a4ad7c", size = 6913502, upload-time = "2025-10-06T17:44:29.649Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/7a/1e/77eff7bb320f72a9cb6e9a19eee4d78bee4a6ac1c28ceef60df28b4ab670/tensorstore-0.1.78-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:f1bc58164ad964d9cc298d20b62ca704ab6241639a21015e47ce6ea5b5cae27f", size = 15710776, upload-time = "2025-10-06T17:43:47.469Z" },
-    { url = "https://files.pythonhosted.org/packages/55/df/f74f8004b246006ae03c90c28e32d71eb8a86a5b325d2d84dda327babdcc/tensorstore-0.1.78-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1910101ea85b6507958da28628ef53712c5311df19a795f449604f82bae6a24b", size = 13771121, upload-time = "2025-10-06T17:43:49.88Z" },
-    { url = "https://files.pythonhosted.org/packages/be/b8/ab0d0b2afc53f47fbfd95c10d9ae21d393019aca45c8513657b8d7002f1f/tensorstore-0.1.78-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1e92195db0c8c3ca749f24b1e930ab93382ac27430ac4ad2e3f53fc8f739323f", size = 18154513, upload-time = "2025-10-06T17:43:51.694Z" },
-    { url = "https://files.pythonhosted.org/packages/f7/ea/c1b4cc6a089a39f63e8d189a55c715e393995628b12b4c8560b3ae4874ba/tensorstore-0.1.78-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:90570b867f9100f7405e4116c73910d0bd283a101500ea5680c5a8a881ea05c6", size = 20048971, upload-time = "2025-10-06T17:43:54.358Z" },
-    { url = "https://files.pythonhosted.org/packages/58/2a/7167087885b12473f20ae4fddb9a8feeed6bd44ea8d42c73ae29ad3d1591/tensorstore-0.1.78-cp310-cp310-win_amd64.whl", hash = "sha256:4de9d4ee93d712cb665890af0738f4d74cac3b9b9a0492d477a3ee63fbbf445b", size = 12707793, upload-time = "2025-10-06T17:43:56.405Z" },
-    { url = "https://files.pythonhosted.org/packages/33/b1/45070c393586306cef44c7bfc47ed2eddfb8930e648aaa847f615e3ae797/tensorstore-0.1.78-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:1c91e7ff93561612bd9868f3ee56702b0e4fecb45079a4c152dff9a6aa751913", size = 15712387, upload-time = "2025-10-06T17:43:58.458Z" },
-    { url = "https://files.pythonhosted.org/packages/a0/d8/c045da71460301f37704e1ab1eec9e7e480dc711dbd281d86dc3d792c50e/tensorstore-0.1.78-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:781e123d392b2d9115e94b01849797a4540f54cd6d34c6ee32b9491f2f2a399c", size = 13773158, upload-time = "2025-10-06T17:44:00.285Z" },
-    { url = "https://files.pythonhosted.org/packages/5b/e8/2b0d48100816649ec516fca31d02ad8028c090324e77b1c309c09a172350/tensorstore-0.1.78-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e650d363ad43754626a828a242785e6359a59fedb171276e9a0c66c0bd963cd4", size = 18154388, upload-time = "2025-10-06T17:44:02.428Z" },
-    { url = "https://files.pythonhosted.org/packages/3e/a1/d9be82de18afe764c0fc7fb21b3d3bb0ad12845d202861fff7189afdb99d/tensorstore-0.1.78-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:33fed0ffa7a42ad24ce203486cf039f81b211723b45bd54859ba237a9d3aedb9", size = 20050304, upload-time = "2025-10-06T17:44:04.673Z" },
-    { url = "https://files.pythonhosted.org/packages/d1/fc/b980958f91a9780e4dbc1038da723d2ad91307dbe30563359606f78926e5/tensorstore-0.1.78-cp311-cp311-win_amd64.whl", hash = "sha256:c02df3d8de4703d9ee42c8f620b2288f41c19a0fd5ffa907b72a736678e22188", size = 12708115, upload-time = "2025-10-06T17:44:06.574Z" },
-    { url = "https://files.pythonhosted.org/packages/d0/5f/5853c04bebaed2d3c0ada9245328ffe3fff8b0f0f1c64f4776f67b42033f/tensorstore-0.1.78-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:ce375a8f6621cdb94638b9cdc5266519db16a58353d4c6920e8b9d6bdd419e21", size = 15727539, upload-time = "2025-10-06T17:44:08.631Z" },
-    { url = "https://files.pythonhosted.org/packages/a2/e2/f67fcca8f90258c1cf1326aa366fe10f559f4c60102f53fdcc6614159c45/tensorstore-0.1.78-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:82f68fa5a3b4c84365a667ea0a7465a53d5d969c4d3909ac990f314d1569ffc3", size = 13780753, upload-time = "2025-10-06T17:44:10.488Z" },
-    { url = "https://files.pythonhosted.org/packages/57/de/95013db6ef3b6a14b4237b95184c21becdf56d16605bf42903bb141f729e/tensorstore-0.1.78-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5dc0bd6361d73e3f67d70980f96f4e8bcbd8e810b5475a01333ca9c37f0785a5", size = 18157446, upload-time = "2025-10-06T17:44:12.831Z" },
-    { url = "https://files.pythonhosted.org/packages/e2/75/6e7cef68cab3a672c6668cc80c399ae6626a498a3ef04b35b3704b41e9cc/tensorstore-0.1.78-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:75a17cef99f05fad9cc6fda37f1a1868d5f1502fd577af13174382931481c948", size = 20060211, upload-time = "2025-10-06T17:44:15.189Z" },
-    { url = "https://files.pythonhosted.org/packages/1e/46/4ff3e395c44348c7442523c8ddd8ccc72d9ac81838e7a8f6afdd92131c3e/tensorstore-0.1.78-cp312-cp312-win_amd64.whl", hash = "sha256:56271d4652a7cb445879089f620af47801c091765d35a005505d6bfb8d00c535", size = 12711274, upload-time = "2025-10-06T17:44:17.586Z" },
-    { url = "https://files.pythonhosted.org/packages/18/36/cfb5a2acf9005896c88f80b93c2aee42f00fab9d0045369fef6e1b297242/tensorstore-0.1.78-cp313-cp313-macosx_10_14_x86_64.whl", hash = "sha256:8a1d0ae7996c80f2e623be5b8cfbc32a307d08dfef3d2dcb455f592908ecd46d", size = 15727334, upload-time = "2025-10-06T17:44:19.93Z" },
-    { url = "https://files.pythonhosted.org/packages/54/cd/d1bcc3aab5be4298616dbc060b5aa2012b686270aaa16a9579c7945d0a1c/tensorstore-0.1.78-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:311846cfb2d644cd4a7861005e521a79816093e76d7924c83de5d06ca323067e", size = 13780722, upload-time = "2025-10-06T17:44:21.822Z" },
-    { url = "https://files.pythonhosted.org/packages/e2/3b/b0bb4440a9d67859b1abb367e436c62b0a27991dd7109f20be9dabff488f/tensorstore-0.1.78-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:630538a66eb9964bd2975c4e09ae83be9984f2e4ebd5f7969983137bfda92071", size = 18157269, upload-time = "2025-10-06T17:44:23.743Z" },
-    { url = "https://files.pythonhosted.org/packages/68/d6/d95cde18ca2475bf317051b2be168cc963c5cfcd67e9c59786326ccdca53/tensorstore-0.1.78-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6886bec93b8ba22f83c4dc9e7c1ee20b11025ea9a5a839de21d0cbf7fd7aada2", size = 20060053, upload-time = "2025-10-06T17:44:25.942Z" },
-    { url = "https://files.pythonhosted.org/packages/db/a2/dbd1af0e97d5d549051309d72c6e3f2fe81fae636f9db3692d21adc9c731/tensorstore-0.1.78-cp313-cp313-win_amd64.whl", hash = "sha256:e0073de8fa3074bc4cc92ced0210310fd89851899faf42a5ba256f0ba87d095c", size = 12711250, upload-time = "2025-10-06T17:44:27.926Z" },
+sdist = { url = "https://files.pythonhosted.org/packages/26/2c/50ab489a0862ca88d2d766130a6fec45ccd5174f0e04081d8b7b07a8aedd/tensorstore-0.1.79.tar.gz", hash = "sha256:8dad44a8a7f2952a5d0030a8bd868b3cfdff048bd40ab53e7226f3d8b0881c5e", size = 7075782, upload-time = "2025-11-11T22:05:23.824Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/68/a9/1695d7ea197c4568c2f02f34b203eef702ec8080422331f00a65c6fb2a37/tensorstore-0.1.79-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:11a2c62694ea9c21770bc5a09938d3d15c4b9662b738ae6e1e513c26ed96251a", size = 16466511, upload-time = "2025-11-11T22:04:18.614Z" },
+    { url = "https://files.pythonhosted.org/packages/db/0e/5ce8a615c7f9ad7cf8ed4ac6e182fe0ef46fd06fef89757e49ba84a6ba9e/tensorstore-0.1.79-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5e152d334bf34fbabdfe8e5bc35b87d1f9947065924ff83c29e659308b36e948", size = 14499810, upload-time = "2025-11-11T22:04:21.725Z" },
+    { url = "https://files.pythonhosted.org/packages/c0/29/2cb9552138fe84ab29421489121350e4af0502eafff31ccd9017490be0d8/tensorstore-0.1.79-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c4230b8fd29795e88e441f749d881973eca8dadf33c5262b367839fb8891f79b", size = 18937510, upload-time = "2025-11-11T22:04:24.221Z" },
+    { url = "https://files.pythonhosted.org/packages/42/70/d2a672a93faebdd176cd8541405cd5614b14d3d8dc812fbeaf2cf46d390a/tensorstore-0.1.79-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:83072ee0e551d6dca582e154b64c8b8066d276ec0759784e3149c28212a61f18", size = 20910324, upload-time = "2025-11-11T22:04:26.769Z" },
+    { url = "https://files.pythonhosted.org/packages/91/d5/7958cbfb614c4ffa5070ae9575874d46937067c0d81a7739e67fb1d62de5/tensorstore-0.1.79-cp311-cp311-win_amd64.whl", hash = "sha256:6c98c6b74c00e00eba7969292144e471d5c45d67088f0dc08e3a4c60a15ee191", size = 13206191, upload-time = "2025-11-11T22:04:29.254Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/a2/a77be16b4a882ace36da0748305795f35306bdad568472f208bd89b96b9d/tensorstore-0.1.79-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:71aa9b45436d888c37b965f7b71195916d15438119b7dccb66a3b0776bfba367", size = 16485740, upload-time = "2025-11-11T22:04:33.478Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/e4/7fe268ec41aa70b71a1c56b1ec83346fbcbf12f4bfbefc79d14fb9c03408/tensorstore-0.1.79-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:108c0e867aa2c87d4982cc6325a2de0c4f5bd63c2bea18adb193a370c40594ce", size = 14508736, upload-time = "2025-11-11T22:04:38.613Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/f1/b1248dae02598ce534834413e841f915a32ab185c36ecd05e4c67bdc8d19/tensorstore-0.1.79-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:debd435042c00be68ba1fb3cf59325a7babb3f4a3cf4744c87dde346802cbbb4", size = 18947817, upload-time = "2025-11-11T22:04:40.768Z" },
+    { url = "https://files.pythonhosted.org/packages/87/4a/60e234147570e21bbab4ac70ab79dd794a5ef9a4945d36c34c1914a73205/tensorstore-0.1.79-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:608f7178ec6e4e4a3c26545b0a44f44bf83438d04bf2d960cd0e7699eaa99ef6", size = 20929832, upload-time = "2025-11-11T22:04:43.613Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/48/0531868bce12a2f520002e810d4200ec6f01ba33a2f27b6bd7289fbc197b/tensorstore-0.1.79-cp312-cp312-win_amd64.whl", hash = "sha256:a071c6c255b7e412957a6aa563bc4250242c7894edad06ae6358e3d30b7d88ce", size = 13211970, upload-time = "2025-11-11T22:04:46.179Z" },
+    { url = "https://files.pythonhosted.org/packages/fa/0b/54a44e55836d8e8f576343134c0e3db71c6c837d39a0ac44699aba5b01df/tensorstore-0.1.79-cp313-cp313-macosx_10_14_x86_64.whl", hash = "sha256:1e8e2d098829919caac6a62cf568902e34789069ceddb28497d6e36ebcb95c0b", size = 16485855, upload-time = "2025-11-11T22:04:48.734Z" },
+    { url = "https://files.pythonhosted.org/packages/04/59/cadb9a45896d480882476df4759cda1659c70669aff87a4d5a4a07ded084/tensorstore-0.1.79-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:29cf4336153af136ac8ac528e2ed46df19367edae7e14e37bca1a8b7c4848ef2", size = 14508277, upload-time = "2025-11-11T22:04:50.775Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/cb/3647bdd03c7692882ebc10c19df9ede49f290c216b2906f785edbdb53ef1/tensorstore-0.1.79-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:94d8fc9df1721b0287046aca7209fd5040889cad4202e7b73a1fdb77cd9b71c6", size = 18949307, upload-time = "2025-11-11T22:04:53.145Z" },
+    { url = "https://files.pythonhosted.org/packages/20/a0/f91ac492cf2ee9f7541aefaaed4ad1258e73e33f3cd3e06cdce5859431db/tensorstore-0.1.79-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c9f2dc3342e4686af98f6e259dc9fb377f1bf657b649c247bf6647bbe4f98090", size = 20930427, upload-time = "2025-11-11T22:04:55.353Z" },
+    { url = "https://files.pythonhosted.org/packages/69/a6/752fd11747eb9fead715b02d389da7fb180a56172b885de0b48b20237d1e/tensorstore-0.1.79-cp313-cp313-win_amd64.whl", hash = "sha256:0fd6165f3df49abc7c9de029b2b72d74bebd2ff2481a5ced003607eb61c56d3e", size = 13212196, upload-time = "2025-11-11T22:05:00.451Z" },
+    { url = "https://files.pythonhosted.org/packages/46/57/1649019893accb3f195780fec55b8bf6793343faf140040bc73f1c28d6a5/tensorstore-0.1.79-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:6f8f5a940eab434a951c2dadcc7c0516c7bef6d8b7a7144054f7a0c56152b5f5", size = 16488849, upload-time = "2025-11-11T22:05:03.014Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/23/2668cb120e855a6a7a8a5eb0eba30e2e7020da932a4d3fa13c6ee3c41f9f/tensorstore-0.1.79-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:97756d2cba3c5ce21e15602c2af5a02521cc0ecda7f9fb6d18da2f3bd51827f4", size = 14511448, upload-time = "2025-11-11T22:05:05.58Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/0e/c38f079f3933cc284aab53d52976f6cb4f1ad43bb6a704ac27e0b710f176/tensorstore-0.1.79-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:847982652273fb7b2d694b789205747aaf3e50ae64738c5cb7b5eb03d86a9947", size = 18949282, upload-time = "2025-11-11T22:05:07.562Z" },
+    { url = "https://files.pythonhosted.org/packages/6f/99/03479deea5bfd27a0d8a8c75d5f1d85417a7bbc9c6c7a90fb85b4a4e347a/tensorstore-0.1.79-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7af9422269c2bfcdecf9dd55309060665ab9c2d7f6c892377ed32c032400feea", size = 20931601, upload-time = "2025-11-11T22:05:10.098Z" },
+    { url = "https://files.pythonhosted.org/packages/26/36/2617edf6c6d6fc73b3ff96d9d0b97332adf0d0c56fa2014a226bf4f7dfa6/tensorstore-0.1.79-cp314-cp314-win_amd64.whl", hash = "sha256:bbd8c1ab7d2e3c03ded3d40bb373ee9a67668e33a564484927865ce43b210386", size = 13599766, upload-time = "2025-11-11T22:05:12.265Z" },
 ]
 
 [[package]]
@@ -5643,7 +5681,7 @@ wheels = [
 
 [[package]]
 name = "torch"
-version = "2.9.0"
+version = "2.9.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "filelock" },
@@ -5672,34 +5710,34 @@ dependencies = [
     { name = "typing-extensions" },
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/bb/86/245c240d2138c17ed572c943c289056c2721abab70810d772c6bf5495b28/torch-2.9.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:030bbfe367379ae6a4ae4042b6c44da25383343b8b3c68abaa9c7231efbaf2dd", size = 104213554, upload-time = "2025-10-15T15:45:59.798Z" },
-    { url = "https://files.pythonhosted.org/packages/58/1d/fd1e88ae0948825efcab7dd66d12bec23f05d4d38ed81573c8d453c14c06/torch-2.9.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:51cb63902182a78e90886e8068befd8ea102af4b00e420263591a3d70c7d3c6c", size = 899795167, upload-time = "2025-10-15T15:47:12.695Z" },
-    { url = "https://files.pythonhosted.org/packages/63/5a/496197b45c14982bef4e079b24c61dc108e3ab0d0cc9718dba9f54f45a46/torch-2.9.0-cp310-cp310-win_amd64.whl", hash = "sha256:3f6aad4d2f0ee2248bac25339d74858ff846c3969b27d14ac235821f055af83d", size = 109310314, upload-time = "2025-10-15T15:46:16.633Z" },
-    { url = "https://files.pythonhosted.org/packages/58/b0/2b4e647b0fc706e88eb6c253d05511865578f5f67b55fad639bf3272a4a1/torch-2.9.0-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:413e1654c9203733138858780e184d9fc59442f0b3b209e16f39354eb893db9b", size = 74452019, upload-time = "2025-10-15T15:46:04.296Z" },
-    { url = "https://files.pythonhosted.org/packages/58/fe/334225e6330e672b36aef23d77451fa906ea12881570c08638a91331a212/torch-2.9.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:c596708b5105d0b199215acf0c9be7c1db5f1680d88eddadf4b75a299259a677", size = 104230578, upload-time = "2025-10-15T15:46:08.182Z" },
-    { url = "https://files.pythonhosted.org/packages/05/cc/49566caaa218872ec9a2912456f470ff92649894a4bc2e5274aa9ef87c4a/torch-2.9.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:51de31219c97c51cf4bf2be94d622e3deb5dcc526c6dc00e97c17eaec0fc1d67", size = 899815990, upload-time = "2025-10-15T15:48:03.336Z" },
-    { url = "https://files.pythonhosted.org/packages/74/25/e9ab21d5925b642d008f139d4a3c9664fc9ee1faafca22913c080cc4c0a5/torch-2.9.0-cp311-cp311-win_amd64.whl", hash = "sha256:dd515c70059afd95f48b8192733764c08ca37a1d19803af6401b5ecad7c8676e", size = 109313698, upload-time = "2025-10-15T15:46:12.425Z" },
-    { url = "https://files.pythonhosted.org/packages/b3/b7/205ef3e94de636feffd64b28bb59a0dfac0771221201b9871acf9236f5ca/torch-2.9.0-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:614a185e4986326d526a91210c8fc1397e76e8cfafa78baf6296a790e53a9eec", size = 74463678, upload-time = "2025-10-15T15:46:29.779Z" },
-    { url = "https://files.pythonhosted.org/packages/d1/d3/3985739f3b8e88675127bf70f82b3a48ae083e39cda56305dbd90398fec0/torch-2.9.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:e5f7af1dc4c0a7c4a260c2534f41ddaf209714f7c89145e644c44712fbd6b642", size = 104107898, upload-time = "2025-10-15T15:46:20.883Z" },
-    { url = "https://files.pythonhosted.org/packages/a5/4b/f4bb2e6c25d0272f798cd6d7a04ed315da76cec68c602d87040c7847287f/torch-2.9.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:01cff95ecd9a212ea2f141db28acccdceb6a4c54f64e6c51091146f5e2a772c6", size = 899738273, upload-time = "2025-10-15T15:50:04.188Z" },
-    { url = "https://files.pythonhosted.org/packages/66/11/c1c5ba6691cda6279087c35bd626536e4fd29521fe740abf5008377a9a02/torch-2.9.0-cp312-cp312-win_amd64.whl", hash = "sha256:4582b162f541651f0cb184d3e291c05c2f556c7117c64a9873e2ee158d40062b", size = 109280887, upload-time = "2025-10-15T15:46:26.228Z" },
-    { url = "https://files.pythonhosted.org/packages/dd/5f/b85bd8c05312d71de9402bf5868d217c38827cfd09d8f8514e5be128a52b/torch-2.9.0-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:33f58e9a102a91259af289d50525c30323b5c9ae1d31322b6447c0814da68695", size = 74478983, upload-time = "2025-10-15T15:46:39.406Z" },
-    { url = "https://files.pythonhosted.org/packages/c2/1c/90eb13833cdf4969ea9707586d7b57095c3b6e2b223a7256bf111689bcb8/torch-2.9.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:c30a17fc83eeab346913e237c64b15b5ba6407fff812f6c541e322e19bc9ea0e", size = 104111330, upload-time = "2025-10-15T15:46:35.238Z" },
-    { url = "https://files.pythonhosted.org/packages/0e/21/2254c54b8d523592c25ef4434769aa23e29b1e6bf5f4c0ad9e27bf442927/torch-2.9.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:8f25033b8667b57857dfd01458fbf2a9e6a6df1f8def23aef0dc46292f6aa642", size = 899750243, upload-time = "2025-10-15T15:48:57.459Z" },
-    { url = "https://files.pythonhosted.org/packages/b7/a5/5cb94fa4fd1e78223455c23c200f30f6dc10c6d4a2bcc8f6e7f2a2588370/torch-2.9.0-cp313-cp313-win_amd64.whl", hash = "sha256:d037f1b4ffd25013be4a7bf3651a0a910c68554956c7b2c92ebe87c76475dece", size = 109284513, upload-time = "2025-10-15T15:46:45.061Z" },
-    { url = "https://files.pythonhosted.org/packages/66/e8/fc414d8656250ee46120b44836ffbb3266343db424b3e18ca79ebbf69d4f/torch-2.9.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e4e5b5cba837a2a8d1a497ba9a58dae46fa392593eaa13b871c42f71847503a5", size = 74830362, upload-time = "2025-10-15T15:46:48.983Z" },
-    { url = "https://files.pythonhosted.org/packages/ed/5f/9474c98fc5ae0cd04b9466035428cd360e6611a86b8352a0fc2fa504acdc/torch-2.9.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:64693568f5dc4dbd5f880a478b1cea0201cc6b510d91d1bc54fea86ac5d1a637", size = 104144940, upload-time = "2025-10-15T15:47:29.076Z" },
-    { url = "https://files.pythonhosted.org/packages/2d/5a/8e0c1cf57830172c109d4bd6be2708cabeaf550983eee7029291322447a0/torch-2.9.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:f8ed31ddd7d10bfb3fbe0b9fe01b1243577f13d75e6f4a0839a283915ce3791e", size = 899744054, upload-time = "2025-10-15T15:48:29.864Z" },
-    { url = "https://files.pythonhosted.org/packages/6d/28/82c28b30fcb4b7c9cdd995763d18bbb830d6521356712faebbad92ffa61d/torch-2.9.0-cp313-cp313t-win_amd64.whl", hash = "sha256:eff527d4e4846e6f70d2afd8058b73825761203d66576a7e04ea2ecfebcb4ab8", size = 109517546, upload-time = "2025-10-15T15:47:33.395Z" },
-    { url = "https://files.pythonhosted.org/packages/ff/c3/a91f96ec74347fa5fd24453fa514bc61c61ecc79196fa760b012a1873d96/torch-2.9.0-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:f8877779cf56d1ce431a7636703bdb13307f5960bb1af49716d8b179225e0e6a", size = 74480732, upload-time = "2025-10-15T15:47:38.002Z" },
-    { url = "https://files.pythonhosted.org/packages/5c/73/9f70af34b334a7e0ef496ceec96b7ec767bd778ea35385ce6f77557534d1/torch-2.9.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:7e614fae699838038d888729f82b687c03413c5989ce2a9481f9a7e7a396e0bb", size = 74433037, upload-time = "2025-10-15T15:47:41.894Z" },
-    { url = "https://files.pythonhosted.org/packages/b7/84/37cf88625901934c97109e583ecc21777d21c6f54cda97a7e5bbad1ee2f2/torch-2.9.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:dfb5b8cd310ba3436c7e14e8b7833ef658cf3045e50d2bdaed23c8fc517065eb", size = 104116482, upload-time = "2025-10-15T15:47:46.266Z" },
-    { url = "https://files.pythonhosted.org/packages/56/8e/ca8b17866943a8d4f4664d402ea84210aa274588b4c5d89918f5caa24eec/torch-2.9.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:b3d29524993a478e46f5d598b249cd824b7ed98d7fba538bd9c4cde6c803948f", size = 899746916, upload-time = "2025-10-15T15:50:40.294Z" },
-    { url = "https://files.pythonhosted.org/packages/43/65/3b17c0fbbdab6501c5b320a52a648628d0d44e7379f64e27d9eef701b6bf/torch-2.9.0-cp314-cp314-win_amd64.whl", hash = "sha256:71c7578984f5ec0eb645eb4816ac8435fcf3e3e2ae1901bcd2f519a9cafb5125", size = 109275151, upload-time = "2025-10-15T15:49:20.715Z" },
-    { url = "https://files.pythonhosted.org/packages/83/36/74f8c051f785500396e42f93542422422dfd874a174f21f8d955d36e5d64/torch-2.9.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:71d9309aee457bbe0b164bce2111cd911c4ed4e847e65d5077dbbcd3aba6befc", size = 74823353, upload-time = "2025-10-15T15:49:16.59Z" },
-    { url = "https://files.pythonhosted.org/packages/62/51/dc3b4e2f9ba98ae27238f0153ca098bf9340b2dafcc67fde645d496dfc2a/torch-2.9.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:c08fb654d783899e204a32cca758a7ce8a45b2d78eeb89517cc937088316f78e", size = 104140340, upload-time = "2025-10-15T15:50:19.67Z" },
-    { url = "https://files.pythonhosted.org/packages/c0/8d/b00657f8141ac16af7bb6cda2e67de18499a3263b78d516b9a93fcbc98e3/torch-2.9.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:ec8feb0099b2daa5728fbc7abb0b05730fd97e0f359ff8bda09865aaa7bd7d4b", size = 899731750, upload-time = "2025-10-15T15:49:36.673Z" },
-    { url = "https://files.pythonhosted.org/packages/fc/29/bd361e0cbb2c79ce6450f42643aaf6919956f89923a50571b0ebfe92d142/torch-2.9.0-cp314-cp314t-win_amd64.whl", hash = "sha256:695ba920f234ad4170c9c50e28d56c848432f8f530e6bc7f88fcb15ddf338e75", size = 109503850, upload-time = "2025-10-15T15:50:24.118Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/56/9577683b23072075ed2e40d725c52c2019d71a972fab8e083763da8e707e/torch-2.9.1-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:1cc208435f6c379f9b8fdfd5ceb5be1e3b72a6bdf1cb46c0d2812aa73472db9e", size = 104207681, upload-time = "2025-11-12T15:19:56.48Z" },
+    { url = "https://files.pythonhosted.org/packages/38/45/be5a74f221df8f4b609b78ff79dc789b0cc9017624544ac4dd1c03973150/torch-2.9.1-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:9fd35c68b3679378c11f5eb73220fdcb4e6f4592295277fbb657d31fd053237c", size = 899794036, upload-time = "2025-11-12T15:21:01.886Z" },
+    { url = "https://files.pythonhosted.org/packages/67/95/a581e8a382596b69385a44bab2733f1273d45c842f5d4a504c0edc3133b6/torch-2.9.1-cp310-cp310-win_amd64.whl", hash = "sha256:2af70e3be4a13becba4655d6cc07dcfec7ae844db6ac38d6c1dafeb245d17d65", size = 110969861, upload-time = "2025-11-12T15:21:30.145Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/51/1756dc128d2bf6ea4e0a915cb89ea5e730315ff33d60c1ff56fd626ba3eb/torch-2.9.1-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:a83b0e84cc375e3318a808d032510dde99d696a85fe9473fc8575612b63ae951", size = 74452222, upload-time = "2025-11-12T15:20:46.223Z" },
+    { url = "https://files.pythonhosted.org/packages/15/db/c064112ac0089af3d2f7a2b5bfbabf4aa407a78b74f87889e524b91c5402/torch-2.9.1-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:62b3fd888277946918cba4478cf849303da5359f0fb4e3bfb86b0533ba2eaf8d", size = 104220430, upload-time = "2025-11-12T15:20:31.705Z" },
+    { url = "https://files.pythonhosted.org/packages/56/be/76eaa36c9cd032d3b01b001e2c5a05943df75f26211f68fae79e62f87734/torch-2.9.1-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:d033ff0ac3f5400df862a51bdde9bad83561f3739ea0046e68f5401ebfa67c1b", size = 899821446, upload-time = "2025-11-12T15:20:15.544Z" },
+    { url = "https://files.pythonhosted.org/packages/47/cc/7a2949e38dfe3244c4df21f0e1c27bce8aedd6c604a587dd44fc21017cb4/torch-2.9.1-cp311-cp311-win_amd64.whl", hash = "sha256:0d06b30a9207b7c3516a9e0102114024755a07045f0c1d2f2a56b1819ac06bcb", size = 110973074, upload-time = "2025-11-12T15:21:39.958Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/ce/7d251155a783fb2c1bb6837b2b7023c622a2070a0a72726ca1df47e7ea34/torch-2.9.1-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:52347912d868653e1528b47cafaf79b285b98be3f4f35d5955389b1b95224475", size = 74463887, upload-time = "2025-11-12T15:20:36.611Z" },
+    { url = "https://files.pythonhosted.org/packages/0f/27/07c645c7673e73e53ded71705045d6cb5bae94c4b021b03aa8d03eee90ab/torch-2.9.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:da5f6f4d7f4940a173e5572791af238cb0b9e21b1aab592bd8b26da4c99f1cd6", size = 104126592, upload-time = "2025-11-12T15:20:41.62Z" },
+    { url = "https://files.pythonhosted.org/packages/19/17/e377a460603132b00760511299fceba4102bd95db1a0ee788da21298ccff/torch-2.9.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:27331cd902fb4322252657f3902adf1c4f6acad9dcad81d8df3ae14c7c4f07c4", size = 899742281, upload-time = "2025-11-12T15:22:17.602Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/1a/64f5769025db846a82567fa5b7d21dba4558a7234ee631712ee4771c436c/torch-2.9.1-cp312-cp312-win_amd64.whl", hash = "sha256:81a285002d7b8cfd3fdf1b98aa8df138d41f1a8334fd9ea37511517cedf43083", size = 110940568, upload-time = "2025-11-12T15:21:18.689Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/ab/07739fd776618e5882661d04c43f5b5586323e2f6a2d7d84aac20d8f20bd/torch-2.9.1-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:c0d25d1d8e531b8343bea0ed811d5d528958f1dcbd37e7245bc686273177ad7e", size = 74479191, upload-time = "2025-11-12T15:21:25.816Z" },
+    { url = "https://files.pythonhosted.org/packages/20/60/8fc5e828d050bddfab469b3fe78e5ab9a7e53dda9c3bdc6a43d17ce99e63/torch-2.9.1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:c29455d2b910b98738131990394da3e50eea8291dfeb4b12de71ecf1fdeb21cb", size = 104135743, upload-time = "2025-11-12T15:21:34.936Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/b7/6d3f80e6918213babddb2a37b46dbb14c15b14c5f473e347869a51f40e1f/torch-2.9.1-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:524de44cd13931208ba2c4bde9ec7741fd4ae6bfd06409a604fc32f6520c2bc9", size = 899749493, upload-time = "2025-11-12T15:24:36.356Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/47/c7843d69d6de8938c1cbb1eba426b1d48ddf375f101473d3e31a5fc52b74/torch-2.9.1-cp313-cp313-win_amd64.whl", hash = "sha256:545844cc16b3f91e08ce3b40e9c2d77012dd33a48d505aed34b7740ed627a1b2", size = 110944162, upload-time = "2025-11-12T15:21:53.151Z" },
+    { url = "https://files.pythonhosted.org/packages/28/0e/2a37247957e72c12151b33a01e4df651d9d155dd74d8cfcbfad15a79b44a/torch-2.9.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:5be4bf7496f1e3ffb1dd44b672adb1ac3f081f204c5ca81eba6442f5f634df8e", size = 74830751, upload-time = "2025-11-12T15:21:43.792Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/f7/7a18745edcd7b9ca2381aa03353647bca8aace91683c4975f19ac233809d/torch-2.9.1-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:30a3e170a84894f3652434b56d59a64a2c11366b0ed5776fab33c2439396bf9a", size = 104142929, upload-time = "2025-11-12T15:21:48.319Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/dd/f1c0d879f2863ef209e18823a988dc7a1bf40470750e3ebe927efdb9407f/torch-2.9.1-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:8301a7b431e51764629208d0edaa4f9e4c33e6df0f2f90b90e261d623df6a4e2", size = 899748978, upload-time = "2025-11-12T15:23:04.568Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/9f/6986b83a53b4d043e36f3f898b798ab51f7f20fdf1a9b01a2720f445043d/torch-2.9.1-cp313-cp313t-win_amd64.whl", hash = "sha256:2e1c42c0ae92bf803a4b2409fdfed85e30f9027a66887f5e7dcdbc014c7531db", size = 111176995, upload-time = "2025-11-12T15:22:01.618Z" },
+    { url = "https://files.pythonhosted.org/packages/40/60/71c698b466dd01e65d0e9514b5405faae200c52a76901baf6906856f17e4/torch-2.9.1-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:2c14b3da5df416cf9cb5efab83aa3056f5b8cd8620b8fde81b4987ecab730587", size = 74480347, upload-time = "2025-11-12T15:21:57.648Z" },
+    { url = "https://files.pythonhosted.org/packages/48/50/c4b5112546d0d13cc9eaa1c732b823d676a9f49ae8b6f97772f795874a03/torch-2.9.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1edee27a7c9897f4e0b7c14cfc2f3008c571921134522d5b9b5ec4ebbc69041a", size = 74433245, upload-time = "2025-11-12T15:22:39.027Z" },
+    { url = "https://files.pythonhosted.org/packages/81/c9/2628f408f0518b3bae49c95f5af3728b6ab498c8624ab1e03a43dd53d650/torch-2.9.1-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:19d144d6b3e29921f1fc70503e9f2fc572cde6a5115c0c0de2f7ca8b1483e8b6", size = 104134804, upload-time = "2025-11-12T15:22:35.222Z" },
+    { url = "https://files.pythonhosted.org/packages/28/fc/5bc91d6d831ae41bf6e9e6da6468f25330522e92347c9156eb3f1cb95956/torch-2.9.1-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:c432d04376f6d9767a9852ea0def7b47a7bbc8e7af3b16ac9cf9ce02b12851c9", size = 899747132, upload-time = "2025-11-12T15:23:36.068Z" },
+    { url = "https://files.pythonhosted.org/packages/63/5d/e8d4e009e52b6b2cf1684bde2a6be157b96fb873732542fb2a9a99e85a83/torch-2.9.1-cp314-cp314-win_amd64.whl", hash = "sha256:d187566a2cdc726fc80138c3cdb260970fab1c27e99f85452721f7759bbd554d", size = 110934845, upload-time = "2025-11-12T15:22:48.367Z" },
+    { url = "https://files.pythonhosted.org/packages/bd/b2/2d15a52516b2ea3f414643b8de68fa4cb220d3877ac8b1028c83dc8ca1c4/torch-2.9.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:cb10896a1f7fedaddbccc2017ce6ca9ecaaf990f0973bdfcf405439750118d2c", size = 74823558, upload-time = "2025-11-12T15:22:43.392Z" },
+    { url = "https://files.pythonhosted.org/packages/86/5c/5b2e5d84f5b9850cd1e71af07524d8cbb74cba19379800f1f9f7c997fc70/torch-2.9.1-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:0a2bd769944991c74acf0c4ef23603b9c777fdf7637f115605a4b2d8023110c7", size = 104145788, upload-time = "2025-11-12T15:23:52.109Z" },
+    { url = "https://files.pythonhosted.org/packages/a9/8c/3da60787bcf70add986c4ad485993026ac0ca74f2fc21410bc4eb1bb7695/torch-2.9.1-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:07c8a9660bc9414c39cac530ac83b1fb1b679d7155824144a40a54f4a47bfa73", size = 899735500, upload-time = "2025-11-12T15:24:08.788Z" },
+    { url = "https://files.pythonhosted.org/packages/db/2b/f7818f6ec88758dfd21da46b6cd46af9d1b3433e53ddbb19ad1e0da17f9b/torch-2.9.1-cp314-cp314t-win_amd64.whl", hash = "sha256:c88d3299ddeb2b35dcc31753305612db485ab6f1823e37fb29451c8b2732b87e", size = 111163659, upload-time = "2025-11-12T15:23:20.009Z" },
 ]
 
 [[package]]
@@ -5718,7 +5756,7 @@ wheels = [
 
 [[package]]
 name = "torchvision"
-version = "0.24.0"
+version = "0.24.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "numpy" },
@@ -5726,34 +5764,34 @@ dependencies = [
     { name = "torch", marker = "sys_platform == 'never'" },
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/63/5b/1404eeab00819df71a30e916c2081654366741f7838fcc4fff86b7bd9e7e/torchvision-0.24.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5e8d5e667deff87bd66d26df6d225f46224bb0782d4f3f8f5d2f3068b5fd4492", size = 1891723, upload-time = "2025-10-15T15:51:08.5Z" },
-    { url = "https://files.pythonhosted.org/packages/88/e3/1b003ecd52bd721f8304aeb66691edfbc2002747ec83d36188ad6abab506/torchvision-0.24.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:a110a51c75e89807a8382b0d8034f5e180fb9319570be3389ffd3d4ac4fd57a9", size = 2418988, upload-time = "2025-10-15T15:51:25.195Z" },
-    { url = "https://files.pythonhosted.org/packages/56/2e/3c19a35e62da0f606baf8f6e2ceeab1eb66aaa2f84c6528538b06b416d54/torchvision-0.24.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:81d5b12a6df1bb2cc8bdbad837b637d6ea446f2866e6d94f1b5d478856331be3", size = 8046769, upload-time = "2025-10-15T15:51:15.221Z" },
-    { url = "https://files.pythonhosted.org/packages/e0/1d/e7ab614a1ace820a2366eab1532679fbe81bd9501ffd6a1b7be14936366d/torchvision-0.24.0-cp310-cp310-win_amd64.whl", hash = "sha256:0839dbb305d34671f5a64f558782095134b04bbeff8b90f11eb80515d7d50092", size = 3686529, upload-time = "2025-10-15T15:51:20.982Z" },
-    { url = "https://files.pythonhosted.org/packages/a3/17/54ed2ec6944ea972b461a86424c8c7f98835982c90cbc45bf59bd962863a/torchvision-0.24.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f771cf918351ad509a28488be475f3e9cc71a750d6b1467842bfb64863a5e986", size = 1891719, upload-time = "2025-10-15T15:51:10.384Z" },
-    { url = "https://files.pythonhosted.org/packages/f8/07/0cd6776eee784742ad3cb2bfd3295383d84cb2f9e87386119333d1587f0f/torchvision-0.24.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:bbd63bf4ebff84c48c50123eba90526cc9f794fe45bc9f5dd07cec19e8c62bce", size = 2420513, upload-time = "2025-10-15T15:51:18.087Z" },
-    { url = "https://files.pythonhosted.org/packages/1a/f4/6026c08011ddcefcbc14161c5aa9dce55c35c6b045e04ef0952e88bf4594/torchvision-0.24.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:78fe414b3bb6dbf7e6f6da6f733ba96881f6b29a9b997228de7c5f603e5ed940", size = 8048018, upload-time = "2025-10-15T15:51:13.579Z" },
-    { url = "https://files.pythonhosted.org/packages/2f/b4/362b4e67ed87cee0fb4f8f0363a852eaeef527968bf62c07ed56f764d729/torchvision-0.24.0-cp311-cp311-win_amd64.whl", hash = "sha256:629584b94e52f32a6278f2a35d85eeaae95fcc38730fcb765064f26c3c96df5d", size = 4027686, upload-time = "2025-10-15T15:51:19.189Z" },
-    { url = "https://files.pythonhosted.org/packages/47/ef/81e4e69e02e2c4650b30e8c11c8974f946682a30e0ab7e9803a831beff76/torchvision-0.24.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c61d40bcd2e2451e932902a702ad495ba1ec6f279e90b1e15cef2bb55dc911e2", size = 1891726, upload-time = "2025-10-15T15:51:16.977Z" },
-    { url = "https://files.pythonhosted.org/packages/00/7b/e3809b3302caea9a12c13f3adebe4fef127188438e719fd6c8dc93db1da6/torchvision-0.24.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:b0531d1483fc322d7da0d83be52f0df860a75114ab87dbeeb9de765feaeda843", size = 2419495, upload-time = "2025-10-15T15:51:11.885Z" },
-    { url = "https://files.pythonhosted.org/packages/7e/e6/7324ead6793075a8c75c56abeed1236d1750de16a5613cfe2ddad164a92a/torchvision-0.24.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:26b9dd9c083f8e5f7ac827de6d5b88c615d9c582dc87666770fbdf16887e4c25", size = 8050480, upload-time = "2025-10-15T15:51:24.012Z" },
-    { url = "https://files.pythonhosted.org/packages/3e/ad/3c56fcd2a0d6e8afa80e115b5ade4302232ec99655220a51d05709819523/torchvision-0.24.0-cp312-cp312-win_amd64.whl", hash = "sha256:060b7c50ed4b3fb0316b08e2e31bfd874ec2f63ef5ae02f81e54341ca4e88703", size = 4292225, upload-time = "2025-10-15T15:51:27.699Z" },
-    { url = "https://files.pythonhosted.org/packages/4f/b5/b2008e4b77a8d6aada828dd0f6a438d8f94befa23fdd2d62fa0ac6e60113/torchvision-0.24.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:84d79cfc6457310107ce4d712de7a3d388b24484bc9aeded4a76d8f8e3a2813d", size = 1891722, upload-time = "2025-10-15T15:51:28.854Z" },
-    { url = "https://files.pythonhosted.org/packages/8f/02/e2f6b0ff93ca4db5751ac9c5be43f13d5e53d9e9412324f464dca1775027/torchvision-0.24.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:fec12a269cf80f6b0b71471c8d498cd3bdd9d8e892c425bf39fecb604852c3b0", size = 2371478, upload-time = "2025-10-15T15:51:37.842Z" },
-    { url = "https://files.pythonhosted.org/packages/77/85/42e5fc4f716ec7b73cf1f32eeb5c77961be4d4054b26cd6a5ff97f20c966/torchvision-0.24.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:7323a9be5e3da695605753f501cdc87824888c5655d27735cdeaa9986b45884c", size = 8050200, upload-time = "2025-10-15T15:51:46.276Z" },
-    { url = "https://files.pythonhosted.org/packages/93/c2/48cb0b6b26276d2120b1e0dbc877579a748eae02b4091a7522ce54f6d5e1/torchvision-0.24.0-cp313-cp313-win_amd64.whl", hash = "sha256:08cad8b204196e945f0b2d73adee952d433db1c03645851d52b22a45f1015b13", size = 4309939, upload-time = "2025-10-15T15:51:39.002Z" },
-    { url = "https://files.pythonhosted.org/packages/7d/d7/3dd10830b047eeb46ae6b465474258d7b4fbb7d8872dca69bd42449f5c82/torchvision-0.24.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:6ab956a6e588623353e0f20d4b03eb1656cb4a3c75ca4dd8b4e32e01bc43271a", size = 2028355, upload-time = "2025-10-15T15:51:22.384Z" },
-    { url = "https://files.pythonhosted.org/packages/f7/cf/2d7e43409089ce7070f5336161f9216d58653ee1cb26bcb5d6c84cc2de36/torchvision-0.24.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:b1b3db80609c32a088554e8e94b4fc31f1033fe5bb4ac0673ec49c3eb03fb4da", size = 2374466, upload-time = "2025-10-15T15:51:35.382Z" },
-    { url = "https://files.pythonhosted.org/packages/e9/30/8f7c328fd7e0a9665da4b6b56b1c627665c18470bfe62f3729ad3eda9aec/torchvision-0.24.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:e6635f100d455c80b43f297df4b8585a76c6a2e114802f6567ddd28d7b5479b0", size = 8217068, upload-time = "2025-10-15T15:51:36.623Z" },
-    { url = "https://files.pythonhosted.org/packages/55/a2/b6f9e40e2904574c80b3bb872c66af20bbd642053e7c8e1b9e99ab396535/torchvision-0.24.0-cp313-cp313t-win_amd64.whl", hash = "sha256:4ce158bbdc3a9086034bced0b5212888bd5b251fee6d08a9eff151d30b4b228a", size = 4273912, upload-time = "2025-10-15T15:51:33.866Z" },
-    { url = "https://files.pythonhosted.org/packages/1b/24/790a39645cc8c71bf442d54a76da9bda5caeb2a44c5f7e02498649cd99d4/torchvision-0.24.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:4bdfc85a5ed706421555f32cdc5e3ddb6d40bf65ef03a274ce3c176393e2904b", size = 2028335, upload-time = "2025-10-15T15:51:26.252Z" },
-    { url = "https://files.pythonhosted.org/packages/b0/d7/69479a066ea773653e88eda99031e38681e9094046f87cb957af5036db0e/torchvision-0.24.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:73576a9c4a593223fbae85a64e8bbd77049abd1101893ecf3c5e981284fd58b4", size = 2371609, upload-time = "2025-10-15T15:51:29.859Z" },
-    { url = "https://files.pythonhosted.org/packages/46/64/3c7fdb3771ec992b9445a1f7a969466b23ce2cdb14e09303b3db351a0655/torchvision-0.24.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:dd565b1b06666ff399d0801d4d1824fa570c0167a179ca700a5be232527b3c62", size = 8214918, upload-time = "2025-10-15T15:51:41.465Z" },
-    { url = "https://files.pythonhosted.org/packages/58/51/abc416bc34d574ad479af738e413d9ebf93027ee92d0f4ae38f966b818f7/torchvision-0.24.0-cp314-cp314-win_amd64.whl", hash = "sha256:eb45d12ac48d757738788fd3fb8e88e647d6b2ab2424134ca87556efc72d81b5", size = 4257776, upload-time = "2025-10-15T15:51:42.642Z" },
-    { url = "https://files.pythonhosted.org/packages/08/f7/261d1353c611820541ecd43046b89da3f1ae998dc786e4288b890a009883/torchvision-0.24.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:68120e7e03c31900e499a10bb7fdd63cfd67f0054c9fa108e7e27f9cd372f315", size = 2028359, upload-time = "2025-10-15T15:51:32.119Z" },
-    { url = "https://files.pythonhosted.org/packages/a2/fd/615d8a86db1578345de7fa1edaf476fbcf4f057bf7e4fd898306b620c487/torchvision-0.24.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:64e54494043eecf9f57a9881c6fdea49c62282782e737c002ae8b1639e6ea80e", size = 2374469, upload-time = "2025-10-15T15:51:40.19Z" },
-    { url = "https://files.pythonhosted.org/packages/04/98/bac11e8fdbf00d6c398246ff2781370aa72c99f2ac685c01ce79354c9a32/torchvision-0.24.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:75ef9546323b321a451239d886f0cb528f7e98bb294da47a3200effd4e572064", size = 8217060, upload-time = "2025-10-15T15:51:45.033Z" },
-    { url = "https://files.pythonhosted.org/packages/47/6f/9fba8abc468c904570699eceeb51588f9622172b8fffa4ab11bcf15598c2/torchvision-0.24.0-cp314-cp314t-win_amd64.whl", hash = "sha256:2efb617667950814fc8bb9437e5893861b3616e214285be33cbc364a3f42c599", size = 4358490, upload-time = "2025-10-15T15:51:43.884Z" },
+    { url = "https://files.pythonhosted.org/packages/f7/09/d51aadf8591138e08b74c64a6eb783630c7a31ca2634416277115a9c3a2b/torchvision-0.24.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ded5e625788572e4e1c4d155d1bbc48805c113794100d70e19c76e39e4d53465", size = 1891441, upload-time = "2025-11-12T15:25:01.687Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/49/a35df863e7c153aad82af7505abd8264a5b510306689712ef86bea862822/torchvision-0.24.1-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:54ed17c3d30e718e08d8da3fd5b30ea44b0311317e55647cb97077a29ecbc25b", size = 2386226, upload-time = "2025-11-12T15:25:05.449Z" },
+    { url = "https://files.pythonhosted.org/packages/49/20/f2d7cd1eea052887c1083afff0b8df5228ec93b53e03759f20b1a3c6d22a/torchvision-0.24.1-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:f476da4e085b7307aaab6f540219617d46d5926aeda24be33e1359771c83778f", size = 8046093, upload-time = "2025-11-12T15:25:09.425Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/cf/0ff4007c09903199307da5f53a192ff5d62b45447069e9ef3a19bdc5ff12/torchvision-0.24.1-cp310-cp310-win_amd64.whl", hash = "sha256:fbdbdae5e540b868a681240b7dbd6473986c862445ee8a138680a6a97d6c34ff", size = 3696202, upload-time = "2025-11-12T15:25:10.657Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/69/30f5f03752aa1a7c23931d2519b31e557f3f10af5089d787cddf3b903ecf/torchvision-0.24.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:056c525dc875f18fe8e9c27079ada166a7b2755cea5a2199b0bc7f1f8364e600", size = 1891436, upload-time = "2025-11-12T15:25:04.3Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/69/49aae86edb75fe16460b59a191fcc0f568c2378f780bb063850db0fe007a/torchvision-0.24.1-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:1e39619de698e2821d71976c92c8a9e50cdfd1e993507dfb340f2688bfdd8283", size = 2387757, upload-time = "2025-11-12T15:25:06.795Z" },
+    { url = "https://files.pythonhosted.org/packages/11/c9/1dfc3db98797b326f1d0c3f3bb61c83b167a813fc7eab6fcd2edb8c7eb9d/torchvision-0.24.1-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:a0f106663e60332aa4fcb1ca2159ef8c3f2ed266b0e6df88de261048a840e0df", size = 8047682, upload-time = "2025-11-12T15:25:21.125Z" },
+    { url = "https://files.pythonhosted.org/packages/fa/bb/cfc6a6f6ccc84a534ed1fdf029ae5716dd6ff04e57ed9dc2dab38bf652d5/torchvision-0.24.1-cp311-cp311-win_amd64.whl", hash = "sha256:a9308cdd37d8a42e14a3e7fd9d271830c7fecb150dd929b642f3c1460514599a", size = 4037588, upload-time = "2025-11-12T15:25:14.402Z" },
+    { url = "https://files.pythonhosted.org/packages/f0/af/18e2c6b9538a045f60718a0c5a058908ccb24f88fde8e6f0fc12d5ff7bd3/torchvision-0.24.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:e48bf6a8ec95872eb45763f06499f87bd2fb246b9b96cb00aae260fda2f96193", size = 1891433, upload-time = "2025-11-12T15:25:03.232Z" },
+    { url = "https://files.pythonhosted.org/packages/9d/43/600e5cfb0643d10d633124f5982d7abc2170dfd7ce985584ff16edab3e76/torchvision-0.24.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:7fb7590c737ebe3e1c077ad60c0e5e2e56bb26e7bccc3b9d04dbfc34fd09f050", size = 2386737, upload-time = "2025-11-12T15:25:08.288Z" },
+    { url = "https://files.pythonhosted.org/packages/93/b1/db2941526ecddd84884132e2742a55c9311296a6a38627f9e2627f5ac889/torchvision-0.24.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:66a98471fc18cad9064123106d810a75f57f0838eee20edc56233fd8484b0cc7", size = 8049868, upload-time = "2025-11-12T15:25:13.058Z" },
+    { url = "https://files.pythonhosted.org/packages/69/98/16e583f59f86cd59949f59d52bfa8fc286f86341a229a9d15cbe7a694f0c/torchvision-0.24.1-cp312-cp312-win_amd64.whl", hash = "sha256:4aa6cb806eb8541e92c9b313e96192c6b826e9eb0042720e2fa250d021079952", size = 4302006, upload-time = "2025-11-12T15:25:16.184Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/97/ab40550f482577f2788304c27220e8ba02c63313bd74cf2f8920526aac20/torchvision-0.24.1-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:8a6696db7fb71eadb2c6a48602106e136c785642e598eb1533e0b27744f2cce6", size = 1891435, upload-time = "2025-11-12T15:25:28.642Z" },
+    { url = "https://files.pythonhosted.org/packages/30/65/ac0a3f9be6abdbe4e1d82c915d7e20de97e7fd0e9a277970508b015309f3/torchvision-0.24.1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:db2125c46f9cb25dc740be831ce3ce99303cfe60439249a41b04fd9f373be671", size = 2338718, upload-time = "2025-11-12T15:25:26.19Z" },
+    { url = "https://files.pythonhosted.org/packages/10/b5/5bba24ff9d325181508501ed7f0c3de8ed3dd2edca0784d48b144b6c5252/torchvision-0.24.1-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:f035f0cacd1f44a8ff6cb7ca3627d84c54d685055961d73a1a9fb9827a5414c8", size = 8049661, upload-time = "2025-11-12T15:25:22.558Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/ec/54a96ae9ab6a0dd66d4bba27771f892e36478a9c3489fa56e51c70abcc4d/torchvision-0.24.1-cp313-cp313-win_amd64.whl", hash = "sha256:16274823b93048e0a29d83415166a2e9e0bf4e1b432668357b657612a4802864", size = 4319808, upload-time = "2025-11-12T15:25:17.318Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/f3/a90a389a7e547f3eb8821b13f96ea7c0563cdefbbbb60a10e08dda9720ff/torchvision-0.24.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e3f96208b4bef54cd60e415545f5200346a65024e04f29a26cd0006dbf9e8e66", size = 2005342, upload-time = "2025-11-12T15:25:11.871Z" },
+    { url = "https://files.pythonhosted.org/packages/a9/fe/ff27d2ed1b524078164bea1062f23d2618a5fc3208e247d6153c18c91a76/torchvision-0.24.1-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:f231f6a4f2aa6522713326d0d2563538fa72d613741ae364f9913027fa52ea35", size = 2341708, upload-time = "2025-11-12T15:25:25.08Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/b9/d6c903495cbdfd2533b3ef6f7b5643ff589ea062f8feb5c206ee79b9d9e5/torchvision-0.24.1-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:1540a9e7f8cf55fe17554482f5a125a7e426347b71de07327d5de6bfd8d17caa", size = 8177239, upload-time = "2025-11-12T15:25:18.554Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/2b/ba02e4261369c3798310483028495cf507e6cb3f394f42e4796981ecf3a7/torchvision-0.24.1-cp313-cp313t-win_amd64.whl", hash = "sha256:d83e16d70ea85d2f196d678bfb702c36be7a655b003abed84e465988b6128938", size = 4251604, upload-time = "2025-11-12T15:25:34.069Z" },
+    { url = "https://files.pythonhosted.org/packages/42/84/577b2cef8f32094add5f52887867da4c2a3e6b4261538447e9b48eb25812/torchvision-0.24.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:cccf4b4fec7fdfcd3431b9ea75d1588c0a8596d0333245dafebee0462abe3388", size = 2005319, upload-time = "2025-11-12T15:25:23.827Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/34/ecb786bffe0159a3b49941a61caaae089853132f3cd1e8f555e3621f7e6f/torchvision-0.24.1-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:1b495edd3a8f9911292424117544f0b4ab780452e998649425d1f4b2bed6695f", size = 2338844, upload-time = "2025-11-12T15:25:32.625Z" },
+    { url = "https://files.pythonhosted.org/packages/51/99/a84623786a6969504c87f2dc3892200f586ee13503f519d282faab0bb4f0/torchvision-0.24.1-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:ab211e1807dc3e53acf8f6638df9a7444c80c0ad050466e8d652b3e83776987b", size = 8175144, upload-time = "2025-11-12T15:25:31.355Z" },
+    { url = "https://files.pythonhosted.org/packages/6d/ba/8fae3525b233e109317ce6a9c1de922ab2881737b029a7e88021f81e068f/torchvision-0.24.1-cp314-cp314-win_amd64.whl", hash = "sha256:18f9cb60e64b37b551cd605a3d62c15730c086362b40682d23e24b616a697d41", size = 4234459, upload-time = "2025-11-12T15:25:19.859Z" },
+    { url = "https://files.pythonhosted.org/packages/50/33/481602c1c72d0485d4b3a6b48c9534b71c2957c9d83bf860eb837bf5a620/torchvision-0.24.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:ec9d7379c519428395e4ffda4dbb99ec56be64b0a75b95989e00f9ec7ae0b2d7", size = 2005336, upload-time = "2025-11-12T15:25:27.225Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/7f/372de60bf3dd8f5593bd0d03f4aecf0d1fd58f5bc6943618d9d913f5e6d5/torchvision-0.24.1-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:af9201184c2712d808bd4eb656899011afdfce1e83721c7cb08000034df353fe", size = 2341704, upload-time = "2025-11-12T15:25:29.857Z" },
+    { url = "https://files.pythonhosted.org/packages/36/9b/0f3b9ff3d0225ee2324ec663de0e7fb3eb855615ca958ac1875f22f1f8e5/torchvision-0.24.1-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:9ef95d819fd6df81bc7cc97b8f21a15d2c0d3ac5dbfaab5cbc2d2ce57114b19e", size = 8177422, upload-time = "2025-11-12T15:25:37.357Z" },
+    { url = "https://files.pythonhosted.org/packages/d6/ab/e2bcc7c2f13d882a58f8b30ff86f794210b075736587ea50f8c545834f8a/torchvision-0.24.1-cp314-cp314t-win_amd64.whl", hash = "sha256:480b271d6edff83ac2e8d69bbb4cf2073f93366516a50d48f140ccfceedb002e", size = 4335190, upload-time = "2025-11-12T15:25:35.745Z" },
 ]
 
 [[package]]
@@ -5826,23 +5864,23 @@ wheels = [
 
 [[package]]
 name = "triton"
-version = "3.5.0"
+version = "3.5.1"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/dd/22/507b6f58a35e05e84381630b2dc2a3cee1a7a2a7eaf4cba857c638a18a24/triton-3.5.0-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6f90de6a6566bb619b4c0adc9855729e1b1b5e26533fca1bf6206e96b6d277a3", size = 159827599, upload-time = "2025-10-15T19:15:43.87Z" },
-    { url = "https://files.pythonhosted.org/packages/0b/eb/09e31d107a5d00eb281aa7e6635ca463e9bca86515944e399480eadb71f8/triton-3.5.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d5d3b3d480debf24eaa739623c9a42446b0b77f95593d30eb1f64cd2278cc1f0", size = 170333110, upload-time = "2025-10-13T16:37:49.588Z" },
-    { url = "https://files.pythonhosted.org/packages/79/f9/b6f60f978397c616fd8dacca2305759fe4f80d397b20ef72534803244bd5/triton-3.5.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8457b22148defefdcb7fa8144b05ce211b9faefad650a1ce85b23df488d5549c", size = 159926731, upload-time = "2025-10-15T19:15:49.682Z" },
-    { url = "https://files.pythonhosted.org/packages/3d/78/949a04391c21956c816523678f0e5fa308eb5b1e7622d88c4e4ef5fceca0/triton-3.5.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f34bfa21c5b3a203c0f0eab28dcc1e49bd1f67d22724e77fb6665a659200a4ec", size = 170433488, upload-time = "2025-10-13T16:37:57.132Z" },
-    { url = "https://files.pythonhosted.org/packages/87/9b/30988039e1e84df7554fba24e6a734d2d0e847af33cabdf9b532b3c51456/triton-3.5.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7da21fccceafc163e3a5e857abe34351ef76345af06cabf9637a914742671f0b", size = 159946647, upload-time = "2025-10-15T19:15:56.325Z" },
-    { url = "https://files.pythonhosted.org/packages/f5/3a/e991574f3102147b642e49637e0281e9bb7c4ba254edb2bab78247c85e01/triton-3.5.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c9e71db82261c4ffa3921cd050cd5faa18322d2d405c30eb56084afaff3b0833", size = 170476535, upload-time = "2025-10-13T16:38:05.18Z" },
-    { url = "https://files.pythonhosted.org/packages/cd/85/e37f1197acb04c8f3d83851d23d5d6ed5060ef74580668b112e23fdfa203/triton-3.5.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:188da5b81fa2f8322c27fec1627703eac24cb9bb7ab0dfbe9925973bc1b070d3", size = 159958970, upload-time = "2025-10-15T19:16:01.717Z" },
-    { url = "https://files.pythonhosted.org/packages/6c/29/10728de8a6e932e517c10773486b8e99f85d1b1d9dd87d9a9616e1fef4a1/triton-3.5.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e6bb9aa5519c084a333acdba443789e50012a4b851cd486c54f0b8dc2a8d3a12", size = 170487289, upload-time = "2025-10-13T16:38:11.662Z" },
-    { url = "https://files.pythonhosted.org/packages/b8/1d/38258f05010ac17a7b058c022911c9cae6526e149b7397134a048cf5a6c2/triton-3.5.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:03127d9b33aaf979c856676b394bc059ec1d68cb6da68ae03f62dd8ad77a04ae", size = 160073012, upload-time = "2025-10-15T19:16:07.477Z" },
-    { url = "https://files.pythonhosted.org/packages/5c/38/db80e48b9220c9bce872b0f616ad0446cdf554a40b85c7865cbca99ab3c2/triton-3.5.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c83f2343e1a220a716c7b3ab9fccfcbe3ad4020d189549200e2d2e8d5868bed9", size = 170577179, upload-time = "2025-10-13T16:38:17.865Z" },
-    { url = "https://files.pythonhosted.org/packages/91/fe/8f5771d00227f4eb1ee034f218ed427102b989366d2275fe3b3c105a3921/triton-3.5.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:468936651d383f4a6d10068d34a627505e13af55be5d002b9f27b987e7a5f0ac", size = 159957460, upload-time = "2025-10-15T19:16:12.626Z" },
-    { url = "https://files.pythonhosted.org/packages/ff/60/1810655d1d856c9a4fcc90ee8966d85f552d98c53a6589f95ab2cbe27bb8/triton-3.5.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:da0fa67ccd76c3dcfb0bffe1b1c57c685136a6bd33d141c24d9655d4185b1289", size = 170487949, upload-time = "2025-10-13T16:38:24.881Z" },
-    { url = "https://files.pythonhosted.org/packages/78/59/99edd103958fe6e42b50b9ad8ce4f223ddf4ccf475259cf7d2b53381dc6c/triton-3.5.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c7ceef21410229ac23173a28eee5cfc0e37c1dfdb8b4bc11ecda2e3ecec7c686", size = 160075629, upload-time = "2025-10-15T19:16:18.746Z" },
-    { url = "https://files.pythonhosted.org/packages/fb/b7/1dec8433ac604c061173d0589d99217fe7bf90a70bdc375e745d044b8aad/triton-3.5.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:317fe477ea8fd4524a6a8c499fb0a36984a56d0b75bf9c9cb6133a1c56d5a6e7", size = 170580176, upload-time = "2025-10-13T16:38:31.14Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/2e/f95e673222afa2c7f0c687d8913e98fcf2589ef0b1405de76894e37fe18f/triton-3.5.1-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f63e34dcb32d7bd3a1d0195f60f30d2aee8b08a69a0424189b71017e23dfc3d2", size = 159821655, upload-time = "2025-11-11T17:51:44.09Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/6e/676ab5019b4dde8b9b7bab71245102fc02778ef3df48218b298686b9ffd6/triton-3.5.1-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5fc53d849f879911ea13f4a877243afc513187bc7ee92d1f2c0f1ba3169e3c94", size = 170320692, upload-time = "2025-11-11T17:40:46.074Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/dc/6ce44d055f2fc2403c4ec6b3cfd3a9b25f57b7d95efadccdea91497f8e81/triton-3.5.1-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:da47169e30a779bade679ce78df4810fca6d78a955843d2ddb11f226adc517dc", size = 159928005, upload-time = "2025-11-11T17:51:50.008Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/72/ec90c3519eaf168f22cb1757ad412f3a2add4782ad3a92861c9ad135d886/triton-3.5.1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:61413522a48add32302353fdbaaf92daaaab06f6b5e3229940d21b5207f47579", size = 170425802, upload-time = "2025-11-11T17:40:53.209Z" },
+    { url = "https://files.pythonhosted.org/packages/db/53/2bcc46879910991f09c063eea07627baef2bc62fe725302ba8f46a2c1ae5/triton-3.5.1-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:275a045b6ed670dd1bd005c3e6c2d61846c74c66f4512d6f33cc027b11de8fd4", size = 159940689, upload-time = "2025-11-11T17:51:55.938Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/50/9a8358d3ef58162c0a415d173cfb45b67de60176e1024f71fbc4d24c0b6d/triton-3.5.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d2c6b915a03888ab931a9fd3e55ba36785e1fe70cbea0b40c6ef93b20fc85232", size = 170470207, upload-time = "2025-11-11T17:41:00.253Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/ba/805684a992ee32d486b7948d36aed2f5e3c643fc63883bf8bdca1c3f3980/triton-3.5.1-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:56765ffe12c554cd560698398b8a268db1f616c120007bfd8829d27139abd24a", size = 159955460, upload-time = "2025-11-11T17:52:01.861Z" },
+    { url = "https://files.pythonhosted.org/packages/27/46/8c3bbb5b0a19313f50edcaa363b599e5a1a5ac9683ead82b9b80fe497c8d/triton-3.5.1-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f3f4346b6ebbd4fad18773f5ba839114f4826037c9f2f34e0148894cd5dd3dba", size = 170470410, upload-time = "2025-11-11T17:41:06.319Z" },
+    { url = "https://files.pythonhosted.org/packages/84/1e/7df59baef41931e21159371c481c31a517ff4c2517343b62503d0cd2be99/triton-3.5.1-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:02c770856f5e407d24d28ddc66e33cf026e6f4d360dcb8b2fabe6ea1fc758621", size = 160072799, upload-time = "2025-11-11T17:52:07.293Z" },
+    { url = "https://files.pythonhosted.org/packages/37/92/e97fcc6b2c27cdb87ce5ee063d77f8f26f19f06916aa680464c8104ef0f6/triton-3.5.1-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0b4d2c70127fca6a23e247f9348b8adde979d2e7a20391bfbabaac6aebc7e6a8", size = 170579924, upload-time = "2025-11-11T17:41:12.455Z" },
+    { url = "https://files.pythonhosted.org/packages/14/f9/0430e879c1e63a1016cb843261528fd3187c872c3a9539132efc39514753/triton-3.5.1-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f617aa7925f9ea9968ec2e1adaf93e87864ff51549c8f04ce658f29bbdb71e2d", size = 159956163, upload-time = "2025-11-11T17:52:12.999Z" },
+    { url = "https://files.pythonhosted.org/packages/a4/e6/c595c35e5c50c4bc56a7bac96493dad321e9e29b953b526bbbe20f9911d0/triton-3.5.1-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d0637b1efb1db599a8e9dc960d53ab6e4637db7d4ab6630a0974705d77b14b60", size = 170480488, upload-time = "2025-11-11T17:41:18.222Z" },
+    { url = "https://files.pythonhosted.org/packages/41/1e/63d367c576c75919e268e4fbc33c1cb33b6dc12bb85e8bfe531c2a8bd5d3/triton-3.5.1-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8932391d7f93698dfe5bc9bead77c47a24f97329e9f20c10786bb230a9083f56", size = 160073620, upload-time = "2025-11-11T17:52:18.403Z" },
+    { url = "https://files.pythonhosted.org/packages/16/b5/b0d3d8b901b6a04ca38df5e24c27e53afb15b93624d7fd7d658c7cd9352a/triton-3.5.1-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bac7f7d959ad0f48c0e97d6643a1cc0fd5786fe61cb1f83b537c6b2d54776478", size = 170582192, upload-time = "2025-11-11T17:41:23.963Z" },
 ]
 
 [[package]]
@@ -5937,7 +5975,7 @@ wheels = [
 
 [[package]]
 name = "wandb"
-version = "0.22.3"
+version = "0.23.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "click" },
@@ -5951,17 +5989,17 @@ dependencies = [
     { name = "sentry-sdk" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/c1/d1/6b70f365ed86bd69debba8ad55dec8606fc21006e7ca703a5a091bd3b719/wandb-0.22.3.tar.gz", hash = "sha256:04468a8ab2769a46f5e384c9c4ada5da0dced005ca689a8424e4b8b5cb2a0291", size = 44337368, upload-time = "2025-10-28T23:59:10.275Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/23/02/87fb60f587ec249f784a40bd91c30de1b2b24d691ee72675d5b66c3d0728/wandb-0.22.3-py3-none-macosx_12_0_arm64.whl", hash = "sha256:81b3b6e405f38342b0a080898b7d00c5b9375432f5ba358942a09e65cdcfe781", size = 18758047, upload-time = "2025-10-28T23:58:46.56Z" },
-    { url = "https://files.pythonhosted.org/packages/26/88/64081740ef2b2efc7fbcb2139a07a849e42bcb09ae0c56ae50c41bd0ad63/wandb-0.22.3-py3-none-macosx_12_0_x86_64.whl", hash = "sha256:d29c16817cca6401b4919069ec7570c781eacb67dc0b1ff2e0096a9a59581720", size = 19798011, upload-time = "2025-10-28T23:58:49.718Z" },
-    { url = "https://files.pythonhosted.org/packages/19/72/c4f922b33dbb84d1c81ee045ff8791dd14e26d79e1e9bbafff964b7043e2/wandb-0.22.3-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb955d73a4ba55df9adc61fafbabef5556784d33fc39c7b5c8165d2694ddeb3b", size = 18542713, upload-time = "2025-10-28T23:58:51.927Z" },
-    { url = "https://files.pythonhosted.org/packages/ad/98/3ce5f6e2086d91b0c51b38ae7ff591109e7da2bb25fe1a12eec0cdbaa494/wandb-0.22.3-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:23f3ebe41a26506117a098fdfd2706ed0e50b37899bfbefe3a0628fcbd70c69d", size = 19984910, upload-time = "2025-10-28T23:58:54.641Z" },
-    { url = "https://files.pythonhosted.org/packages/5e/57/e68cb38427b60490d6ddf1b992e6c7f36be83be1079d291ce87a8d347f48/wandb-0.22.3-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:2973462bed5d4a653b1a97cf9fc350673bb200fb356a2f4eba34beae9b87e0aa", size = 18581776, upload-time = "2025-10-28T23:58:56.975Z" },
-    { url = "https://files.pythonhosted.org/packages/66/6d/543f907ce0c6b6da13628b23d19ca7282c559fd73eb47b04977b9a61d0c6/wandb-0.22.3-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:c5c2bd18f95c1639863c527da0a5818ac6b0e5194f9c691426b265908ddd8b2c", size = 20078800, upload-time = "2025-10-28T23:58:59.217Z" },
-    { url = "https://files.pythonhosted.org/packages/da/91/1decaf1a6ac2017481c782e0fad7f90bc9ae4057f3d76d478cb6527f3dd3/wandb-0.22.3-py3-none-win32.whl", hash = "sha256:09ca1edfe0fd6dc30447d368acddb825668e60ee705c98594a6bbfd30d34d47e", size = 19160297, upload-time = "2025-10-28T23:59:01.536Z" },
-    { url = "https://files.pythonhosted.org/packages/4c/ba/3b092634279994b0c79fe05220532822be09f3a353ae95c54e7142769db8/wandb-0.22.3-py3-none-win_amd64.whl", hash = "sha256:55403bf93872c9978433d101324f51e43e78c70c809bf6d06ca7b2760e39f497", size = 19160300, upload-time = "2025-10-28T23:59:04.06Z" },
-    { url = "https://files.pythonhosted.org/packages/7f/80/4662fce9eebcc8c71f5083e9152ccaf7d43d4ca9c446e1422f9aa784a51c/wandb-0.22.3-py3-none-win_arm64.whl", hash = "sha256:49f66b05882abfa53816cc8d01b3c2435a89c5a090176802fa6928b5979d34d9", size = 17461959, upload-time = "2025-10-28T23:59:07.059Z" },
+sdist = { url = "https://files.pythonhosted.org/packages/ef/8b/db2d44395c967cd452517311fd6ede5d1e07310769f448358d4874248512/wandb-0.23.0.tar.gz", hash = "sha256:e5f98c61a8acc3ee84583ca78057f64344162ce026b9f71cb06eea44aec27c93", size = 44413921, upload-time = "2025-11-11T21:06:30.737Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/41/61/a3220c7fa4cadfb2b2a5c09e3fa401787326584ade86d7c1f58bf1cd43bd/wandb-0.23.0-py3-none-macosx_12_0_arm64.whl", hash = "sha256:b682ec5e38fc97bd2e868ac7615a0ab4fc6a15220ee1159e87270a5ebb7a816d", size = 18992250, upload-time = "2025-11-11T21:06:03.412Z" },
+    { url = "https://files.pythonhosted.org/packages/90/16/e69333cf3d11e7847f424afc6c8ae325e1f6061b2e5118d7a17f41b6525d/wandb-0.23.0-py3-none-macosx_12_0_x86_64.whl", hash = "sha256:ec094eb71b778e77db8c188da19e52c4f96cb9d5b4421d7dc05028afc66fd7e7", size = 20045616, upload-time = "2025-11-11T21:06:07.109Z" },
+    { url = "https://files.pythonhosted.org/packages/62/79/42dc6c7bb0b425775fe77f1a3f1a22d75d392841a06b43e150a3a7f2553a/wandb-0.23.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e43f1f04b98c34f407dcd2744cec0a590abce39bed14a61358287f817514a7b", size = 18758848, upload-time = "2025-11-11T21:06:09.832Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/94/d6ddb78334996ccfc1179444bfcfc0f37ffd07ee79bb98940466da6f68f8/wandb-0.23.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6e5847f98cbb3175caf5291932374410141f5bb3b7c25f9c5e562c1988ce0bf5", size = 20231493, upload-time = "2025-11-11T21:06:12.323Z" },
+    { url = "https://files.pythonhosted.org/packages/52/4d/0ad6df0e750c19dabd24d2cecad0938964f69a072f05fbdab7281bec2b64/wandb-0.23.0-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:6151355fd922539926e870be811474238c9614b96541773b990f1ce53368aef6", size = 18793473, upload-time = "2025-11-11T21:06:14.967Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/da/c2ba49c5573dff93dafc0acce691bb1c3d57361bf834b2f2c58e6193439b/wandb-0.23.0-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:df62e426e448ebc44269140deb7240df474e743b12d4b1f53b753afde4aa06d4", size = 20332882, upload-time = "2025-11-11T21:06:17.865Z" },
+    { url = "https://files.pythonhosted.org/packages/40/65/21bfb10ee5cd93fbcaf794958863c7e05bac4bbeb1cc1b652094aa3743a5/wandb-0.23.0-py3-none-win32.whl", hash = "sha256:6c21d3eadda17aef7df6febdffdddfb0b4835c7754435fc4fe27631724269f5c", size = 19433198, upload-time = "2025-11-11T21:06:21.913Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/33/cbe79e66c171204e32cf940c7fdfb8b5f7d2af7a00f301c632f3a38aa84b/wandb-0.23.0-py3-none-win_amd64.whl", hash = "sha256:b50635fa0e16e528bde25715bf446e9153368428634ca7a5dbd7a22c8ae4e915", size = 19433201, upload-time = "2025-11-11T21:06:24.607Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/a0/5ecfae12d78ea036a746c071e4c13b54b28d641efbba61d2947c73b3e6f9/wandb-0.23.0-py3-none-win_arm64.whl", hash = "sha256:fa0181b02ce4d1993588f4a728d8b73ae487eb3cb341e6ce01c156be7a98ec72", size = 17678649, upload-time = "2025-11-11T21:06:27.289Z" },
 ]
 
 [[package]]

From 658931e711e19268414b09b1113b374f8a2877ff Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Fri, 14 Nov 2025 12:14:57 +0100
Subject: [PATCH 128/334] ci: Create weekly dev branch (#2223)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
Signed-off-by: GitHub Actions <github-actions[bot]@users.noreply.github.com>
Co-authored-by: GitHub Actions <github-actions[bot]@users.noreply.github.com>
---
 .gitlab/stages/00.pre.yml                     |     1 +
 .gitlab/stages/04.functional-tests.yml        |     2 +-
 .../gpt3_15b_8t_release_sm/model_config.yaml  |     9 +-
 .../model_config.yaml                         |     2 +-
 .../golden_values_dev_dgx_h100.json           | 10037 ++++++++++++++++
 .../model_config.yaml                         |     2 +-
 .../golden_values_dev_dgx_h100.json           | 10037 ++++++++++++++++
 .../model_config.yaml                         |   166 +
 8 files changed, 20249 insertions(+), 7 deletions(-)
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/golden_values_dev_dgx_h100.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp4_cp2_native_fp8_tp_sp_cp_tp_overlap/golden_values_dev_dgx_h100.json
 create mode 100644 tests/functional_tests/test_cases/mixtral/deepseekv3_proxy_flex_tp1pp4emp16etp1cp1_release_sm/model_config.yaml

diff --git a/.gitlab/stages/00.pre.yml b/.gitlab/stages/00.pre.yml
index a22c2cf3ea7..2210ddd7d02 100644
--- a/.gitlab/stages/00.pre.yml
+++ b/.gitlab/stages/00.pre.yml
@@ -68,6 +68,7 @@ pre:create_ci_branches_dev:
       - branch: ci-dev-rebuild-mcore-nemo-image
       - branch: ci-dev-mr
       - branch: ci-dev-nightly
+      - branch: ci-dev-weekly
       - branch: ci-dev-upgrade-dependencies
   tags:
     - arch/amd64
diff --git a/.gitlab/stages/04.functional-tests.yml b/.gitlab/stages/04.functional-tests.yml
index dbdef4484f2..ad817a653f3 100644
--- a/.gitlab/stages/04.functional-tests.yml
+++ b/.gitlab/stages/04.functional-tests.yml
@@ -53,7 +53,7 @@ functional:configure:
     - |
       RECORD_CHECKPOINTS=$([[ "$CI_MERGE_REQUEST_LABELS" == *"Record checkpoints"* || "$FUNCTIONAL_TEST_RECORD_CHECKPOINTS" == "yes" ]] && echo "true" || echo "false")
     - |
-      if [[ "$FUNCTIONAL_TEST_SCOPE" == "release" || "$FUNCTIONAL_TEST_SCOPE" == "pre-release" ]]; then
+      if [[ "$FUNCTIONAL_TEST_SCOPE" == "release" || "$FUNCTIONAL_TEST_SCOPE" == "weekly" ]]; then
         FUNCTIONAL_TEST_NAME=$(eval echo $FUNCTIONAL_TEST_NAME)
         RELEASE_ARGS=(
           "--run-name"
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_15b_8t_release_sm/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_15b_8t_release_sm/model_config.yaml
index 8b437ba75e7..32386558710 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_15b_8t_release_sm/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_15b_8t_release_sm/model_config.yaml
@@ -23,7 +23,7 @@ MODEL_ARGS:
   --micro-batch-size: 4
   --rampup-batch-size: "[384 384 97656250]"
   --global-batch-size: 1152
-  --train-samples: 4882812
+  --train-samples: 19531250
   --manual-gc: true
   # Transformer Engine args
   --transformer-impl: transformer_engine
@@ -68,9 +68,10 @@ MODEL_ARGS:
   --eval-iters: 32
   --eval-interval: 2000
   # Add checkpointing args
-  --load: ${CHECKPOINT_LOAD_PATH}
   --save: ${CHECKPOINT_SAVE_PATH}
-  --save-interval: 1000
+  --load: ${CHECKPOINT_LOAD_PATH}
+  --save-interval: 5000
+  --save-retain-interval: 10000
   # Add initialization args
   --init-method-std: 0.0134
   # Add logging args
@@ -86,7 +87,7 @@ MODEL_ARGS:
   --wandb-exp-name: ${WANDB_EXPERIMENT}
   # Add mixed precision args
   --bf16: true
-  --exit-interval: 13000
+  --exit-interval: 10200
   --wandb-save-dir: ${WANDB_SAVE_PATH}
 METRICS:
   - "iteration-time"
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_b200_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_b200_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/model_config.yaml
index f6892ae5c24..048256c3504 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_b200_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_b200_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/model_config.yaml
@@ -41,7 +41,7 @@ MODEL_ARGS:
   --pipeline-model-parallel-size: 2
   --sequence-parallel: true
   --tp-comm-overlap: true
-  --tp-comm-overlap-cfg: tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_b200_1N8G_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/tp_comm_overlap_cfg.yaml
+  --tp-comm-overlap-cfg: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/tp_comm_overlap_cfg.yaml
   --deterministic-mode: true
   --no-gradient-accumulation-fusion: true
   --fp8-format: hybrid
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/golden_values_dev_dgx_h100.json
new file mode 100644
index 00000000000..f56b5fa6f77
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/golden_values_dev_dgx_h100.json
@@ -0,0 +1,10037 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 2000,
+        "step_interval": 1,
+        "values": {
+            "1": 10.85954,
+            "2": 10.88017,
+            "3": 10.87732,
+            "4": 10.8999,
+            "5": 10.88699,
+            "6": 10.87335,
+            "7": 10.88219,
+            "8": 10.87225,
+            "9": 10.87277,
+            "10": 10.87494,
+            "11": 10.85221,
+            "12": 10.84405,
+            "13": 10.84222,
+            "14": 10.86461,
+            "15": 10.78656,
+            "16": 10.81059,
+            "17": 10.77436,
+            "18": 10.81246,
+            "19": 10.72203,
+            "20": 10.69596,
+            "21": 10.64272,
+            "22": 10.64956,
+            "23": 10.65288,
+            "24": 10.54233,
+            "25": 10.55491,
+            "26": 10.63818,
+            "27": 10.44117,
+            "28": 10.46928,
+            "29": 10.34986,
+            "30": 10.24645,
+            "31": 10.42625,
+            "32": 10.33791,
+            "33": 10.19559,
+            "34": 10.14074,
+            "35": 10.22182,
+            "36": 10.13202,
+            "37": 10.07533,
+            "38": 10.01538,
+            "39": 10.02986,
+            "40": 10.05768,
+            "41": 9.93219,
+            "42": 9.93962,
+            "43": 9.8498,
+            "44": 9.97902,
+            "45": 9.99946,
+            "46": 9.83276,
+            "47": 9.99696,
+            "48": 9.80958,
+            "49": 9.94884,
+            "50": 9.94537,
+            "51": 9.58197,
+            "52": 9.79331,
+            "53": 9.62548,
+            "54": 9.88686,
+            "55": 9.73482,
+            "56": 9.84492,
+            "57": 9.85708,
+            "58": 9.87627,
+            "59": 9.54205,
+            "60": 9.64489,
+            "61": 9.88334,
+            "62": 9.75928,
+            "63": 9.68107,
+            "64": 9.82461,
+            "65": 9.59476,
+            "66": 9.62868,
+            "67": 9.74002,
+            "68": 9.60205,
+            "69": 9.29216,
+            "70": 9.42139,
+            "71": 9.78753,
+            "72": 9.7124,
+            "73": 9.61815,
+            "74": 9.44773,
+            "75": 9.23898,
+            "76": 9.50824,
+            "77": 9.5795,
+            "78": 9.56058,
+            "79": 9.30801,
+            "80": 9.35768,
+            "81": 9.45813,
+            "82": 9.55358,
+            "83": 9.53407,
+            "84": 9.35442,
+            "85": 9.3992,
+            "86": 9.65282,
+            "87": 9.23449,
+            "88": 9.48753,
+            "89": 9.22214,
+            "90": 9.41067,
+            "91": 9.38753,
+            "92": 9.37682,
+            "93": 9.36024,
+            "94": 9.51507,
+            "95": 9.42125,
+            "96": 9.33616,
+            "97": 9.20399,
+            "98": 9.4954,
+            "99": 9.29284,
+            "100": 9.35905,
+            "101": 9.24757,
+            "102": 9.24676,
+            "103": 9.07735,
+            "104": 9.16669,
+            "105": 9.37858,
+            "106": 9.1496,
+            "107": 9.1756,
+            "108": 9.316,
+            "109": 9.29109,
+            "110": 9.36426,
+            "111": 9.17995,
+            "112": 9.23471,
+            "113": 9.35297,
+            "114": 9.35265,
+            "115": 9.32672,
+            "116": 9.00223,
+            "117": 9.06476,
+            "118": 9.06643,
+            "119": 9.22418,
+            "120": 9.08485,
+            "121": 9.19671,
+            "122": 9.14164,
+            "123": 9.25933,
+            "124": 9.45506,
+            "125": 9.21512,
+            "126": 9.06416,
+            "127": 9.01814,
+            "128": 9.22131,
+            "129": 8.98184,
+            "130": 9.13972,
+            "131": 9.15856,
+            "132": 9.03559,
+            "133": 8.85977,
+            "134": 9.18539,
+            "135": 8.88999,
+            "136": 9.16801,
+            "137": 9.15771,
+            "138": 9.23511,
+            "139": 9.09197,
+            "140": 8.87218,
+            "141": 9.29906,
+            "142": 9.19961,
+            "143": 9.1169,
+            "144": 9.24305,
+            "145": 9.10446,
+            "146": 8.98709,
+            "147": 8.98617,
+            "148": 9.13261,
+            "149": 9.06335,
+            "150": 9.01504,
+            "151": 8.92787,
+            "152": 8.8739,
+            "153": 9.06335,
+            "154": 9.17913,
+            "155": 9.13381,
+            "156": 9.04889,
+            "157": 9.15064,
+            "158": 9.04955,
+            "159": 9.03261,
+            "160": 8.88987,
+            "161": 9.04543,
+            "162": 8.89584,
+            "163": 8.84272,
+            "164": 8.97534,
+            "165": 8.93132,
+            "166": 8.65959,
+            "167": 8.83243,
+            "168": 8.81953,
+            "169": 8.6566,
+            "170": 9.04622,
+            "171": 8.72286,
+            "172": 8.82159,
+            "173": 8.91163,
+            "174": 8.84751,
+            "175": 8.70611,
+            "176": 8.75439,
+            "177": 8.7626,
+            "178": 8.7201,
+            "179": 8.64046,
+            "180": 8.74053,
+            "181": 8.69404,
+            "182": 8.72193,
+            "183": 9.08364,
+            "184": 8.6088,
+            "185": 8.88346,
+            "186": 8.74191,
+            "187": 8.56949,
+            "188": 8.67975,
+            "189": 8.86478,
+            "190": 8.53542,
+            "191": 8.66632,
+            "192": 8.61266,
+            "193": 8.57469,
+            "194": 8.75195,
+            "195": 8.59279,
+            "196": 8.77393,
+            "197": 8.74234,
+            "198": 8.62722,
+            "199": 8.77454,
+            "200": 8.73803,
+            "201": 8.66979,
+            "202": 8.54593,
+            "203": 8.54185,
+            "204": 8.71307,
+            "205": 8.2228,
+            "206": 8.8603,
+            "207": 8.68157,
+            "208": 8.70896,
+            "209": 8.75303,
+            "210": 8.57807,
+            "211": 8.84258,
+            "212": 8.49127,
+            "213": 8.57327,
+            "214": 8.51199,
+            "215": 8.5645,
+            "216": 8.50863,
+            "217": 8.53183,
+            "218": 8.52998,
+            "219": 8.64367,
+            "220": 8.54746,
+            "221": 8.39991,
+            "222": 8.50528,
+            "223": 8.43775,
+            "224": 8.53014,
+            "225": 8.57091,
+            "226": 8.4394,
+            "227": 8.67918,
+            "228": 8.38473,
+            "229": 8.45045,
+            "230": 8.49717,
+            "231": 8.49832,
+            "232": 8.49783,
+            "233": 8.49539,
+            "234": 8.63795,
+            "235": 8.55875,
+            "236": 8.39461,
+            "237": 8.48826,
+            "238": 8.30522,
+            "239": 8.562,
+            "240": 8.66952,
+            "241": 8.44144,
+            "242": 8.47219,
+            "243": 8.51768,
+            "244": 8.36825,
+            "245": 8.59274,
+            "246": 8.59497,
+            "247": 8.44008,
+            "248": 8.51279,
+            "249": 8.52035,
+            "250": 8.42183,
+            "251": 8.37751,
+            "252": 8.54393,
+            "253": 8.31454,
+            "254": 8.351,
+            "255": 8.29005,
+            "256": 8.20261,
+            "257": 8.394,
+            "258": 8.45386,
+            "259": 8.23708,
+            "260": 8.2437,
+            "261": 8.23617,
+            "262": 8.34919,
+            "263": 8.30683,
+            "264": 8.18831,
+            "265": 8.33481,
+            "266": 8.23369,
+            "267": 7.89923,
+            "268": 8.38063,
+            "269": 8.40466,
+            "270": 8.26271,
+            "271": 8.279,
+            "272": 8.32109,
+            "273": 8.13747,
+            "274": 8.09677,
+            "275": 8.01372,
+            "276": 7.92611,
+            "277": 8.24041,
+            "278": 8.05017,
+            "279": 7.96688,
+            "280": 7.75652,
+            "281": 8.10713,
+            "282": 8.15049,
+            "283": 8.15621,
+            "284": 8.10354,
+            "285": 8.07234,
+            "286": 7.90454,
+            "287": 7.9963,
+            "288": 8.24862,
+            "289": 8.17575,
+            "290": 8.13093,
+            "291": 8.25763,
+            "292": 8.08131,
+            "293": 8.12059,
+            "294": 7.98178,
+            "295": 7.97108,
+            "296": 8.24114,
+            "297": 7.79647,
+            "298": 8.04847,
+            "299": 7.94257,
+            "300": 7.85748,
+            "301": 8.01649,
+            "302": 7.95112,
+            "303": 7.99606,
+            "304": 7.96394,
+            "305": 8.00301,
+            "306": 7.98312,
+            "307": 7.99372,
+            "308": 8.00491,
+            "309": 8.01362,
+            "310": 7.97824,
+            "311": 7.9323,
+            "312": 7.89419,
+            "313": 7.84054,
+            "314": 7.83,
+            "315": 7.8335,
+            "316": 7.75122,
+            "317": 7.934,
+            "318": 7.98841,
+            "319": 7.83343,
+            "320": 7.57896,
+            "321": 7.75427,
+            "322": 7.83781,
+            "323": 7.7769,
+            "324": 7.91623,
+            "325": 7.80539,
+            "326": 7.65641,
+            "327": 7.86989,
+            "328": 7.79369,
+            "329": 7.89137,
+            "330": 7.7586,
+            "331": 7.52885,
+            "332": 7.81946,
+            "333": 7.84359,
+            "334": 7.68375,
+            "335": 7.69975,
+            "336": 7.91931,
+            "337": 7.65356,
+            "338": 7.90277,
+            "339": 7.7307,
+            "340": 7.7606,
+            "341": 7.70898,
+            "342": 7.82827,
+            "343": 7.61824,
+            "344": 7.58818,
+            "345": 7.61602,
+            "346": 7.46415,
+            "347": 7.5612,
+            "348": 7.68737,
+            "349": 7.58361,
+            "350": 7.65762,
+            "351": 7.75424,
+            "352": 7.711,
+            "353": 7.50477,
+            "354": 7.74925,
+            "355": 7.77011,
+            "356": 7.78305,
+            "357": 7.81855,
+            "358": 7.60031,
+            "359": 7.55187,
+            "360": 7.63213,
+            "361": 7.55298,
+            "362": 7.76875,
+            "363": 7.59465,
+            "364": 7.57928,
+            "365": 7.62839,
+            "366": 7.31096,
+            "367": 7.55919,
+            "368": 7.44577,
+            "369": 7.3551,
+            "370": 7.46985,
+            "371": 7.46609,
+            "372": 7.65475,
+            "373": 7.52989,
+            "374": 7.44843,
+            "375": 7.53627,
+            "376": 7.35288,
+            "377": 7.24313,
+            "378": 7.54312,
+            "379": 7.4994,
+            "380": 7.38859,
+            "381": 7.47577,
+            "382": 7.29951,
+            "383": 7.28478,
+            "384": 7.4126,
+            "385": 7.39829,
+            "386": 7.23652,
+            "387": 7.42535,
+            "388": 7.28487,
+            "389": 7.44425,
+            "390": 7.24578,
+            "391": 7.6482,
+            "392": 7.34245,
+            "393": 7.42463,
+            "394": 7.48248,
+            "395": 7.44483,
+            "396": 7.29231,
+            "397": 7.23386,
+            "398": 7.42507,
+            "399": 7.16173,
+            "400": 7.30149,
+            "401": 7.3585,
+            "402": 7.39832,
+            "403": 7.28806,
+            "404": 7.30832,
+            "405": 7.27202,
+            "406": 7.22485,
+            "407": 7.36688,
+            "408": 7.18877,
+            "409": 7.17334,
+            "410": 7.31999,
+            "411": 7.2223,
+            "412": 7.20595,
+            "413": 7.24047,
+            "414": 6.9176,
+            "415": 7.3341,
+            "416": 7.43139,
+            "417": 7.0298,
+            "418": 7.28201,
+            "419": 7.04286,
+            "420": 7.41864,
+            "421": 7.18456,
+            "422": 7.24003,
+            "423": 7.09785,
+            "424": 7.24581,
+            "425": 7.32182,
+            "426": 7.29342,
+            "427": 7.1359,
+            "428": 7.09617,
+            "429": 6.87976,
+            "430": 7.20691,
+            "431": 7.00662,
+            "432": 7.23762,
+            "433": 6.97996,
+            "434": 6.96131,
+            "435": 7.02219,
+            "436": 7.01484,
+            "437": 6.9921,
+            "438": 7.00514,
+            "439": 6.94235,
+            "440": 7.06367,
+            "441": 7.04936,
+            "442": 7.10187,
+            "443": 7.0941,
+            "444": 6.71175,
+            "445": 6.99825,
+            "446": 7.14631,
+            "447": 7.12745,
+            "448": 6.98621,
+            "449": 7.0508,
+            "450": 7.01761,
+            "451": 6.83255,
+            "452": 6.9157,
+            "453": 7.02056,
+            "454": 6.97019,
+            "455": 7.03145,
+            "456": 6.99451,
+            "457": 6.97283,
+            "458": 6.9066,
+            "459": 6.69482,
+            "460": 7.06773,
+            "461": 7.09857,
+            "462": 6.87116,
+            "463": 7.05522,
+            "464": 6.64922,
+            "465": 7.02852,
+            "466": 7.00594,
+            "467": 6.99935,
+            "468": 6.95215,
+            "469": 6.8291,
+            "470": 7.04615,
+            "471": 6.88316,
+            "472": 6.96104,
+            "473": 6.82398,
+            "474": 6.97228,
+            "475": 7.16917,
+            "476": 6.76379,
+            "477": 6.89771,
+            "478": 6.91142,
+            "479": 6.70396,
+            "480": 7.03025,
+            "481": 6.99763,
+            "482": 6.73608,
+            "483": 6.78502,
+            "484": 6.75413,
+            "485": 6.93205,
+            "486": 7.06796,
+            "487": 6.63653,
+            "488": 6.88737,
+            "489": 6.77108,
+            "490": 6.82685,
+            "491": 6.71122,
+            "492": 6.69849,
+            "493": 6.77155,
+            "494": 6.67651,
+            "495": 6.63733,
+            "496": 6.59006,
+            "497": 6.84564,
+            "498": 6.65256,
+            "499": 6.85952,
+            "500": 6.65795,
+            "501": 6.73562,
+            "502": 6.84527,
+            "503": 6.71173,
+            "504": 6.62075,
+            "505": 6.62291,
+            "506": 6.75234,
+            "507": 6.86844,
+            "508": 6.86157,
+            "509": 6.6555,
+            "510": 6.82834,
+            "511": 6.74132,
+            "512": 6.74051,
+            "513": 6.66032,
+            "514": 6.71273,
+            "515": 6.45045,
+            "516": 6.74436,
+            "517": 6.71073,
+            "518": 6.53817,
+            "519": 6.63527,
+            "520": 6.85868,
+            "521": 6.66571,
+            "522": 6.70871,
+            "523": 6.74553,
+            "524": 6.73396,
+            "525": 6.6762,
+            "526": 6.4139,
+            "527": 6.79901,
+            "528": 6.66011,
+            "529": 6.63182,
+            "530": 6.62611,
+            "531": 6.64289,
+            "532": 6.63292,
+            "533": 6.76391,
+            "534": 6.61301,
+            "535": 6.74754,
+            "536": 6.62605,
+            "537": 6.63867,
+            "538": 6.53166,
+            "539": 6.5542,
+            "540": 6.5862,
+            "541": 6.45207,
+            "542": 6.66957,
+            "543": 6.68064,
+            "544": 6.67601,
+            "545": 6.81307,
+            "546": 6.63333,
+            "547": 6.41838,
+            "548": 6.72367,
+            "549": 6.69982,
+            "550": 6.52974,
+            "551": 6.7478,
+            "552": 6.63991,
+            "553": 6.48451,
+            "554": 6.63407,
+            "555": 6.4629,
+            "556": 6.61792,
+            "557": 6.63496,
+            "558": 6.3874,
+            "559": 6.37379,
+            "560": 6.58293,
+            "561": 6.73352,
+            "562": 6.6356,
+            "563": 6.7444,
+            "564": 6.35291,
+            "565": 6.51482,
+            "566": 6.70247,
+            "567": 6.56973,
+            "568": 6.51145,
+            "569": 6.45578,
+            "570": 6.36768,
+            "571": 6.63597,
+            "572": 6.31359,
+            "573": 6.58668,
+            "574": 6.47613,
+            "575": 6.64961,
+            "576": 6.5168,
+            "577": 6.53078,
+            "578": 6.4847,
+            "579": 6.46709,
+            "580": 6.56793,
+            "581": 6.60857,
+            "582": 6.48362,
+            "583": 6.51541,
+            "584": 6.52831,
+            "585": 6.42713,
+            "586": 6.4178,
+            "587": 6.46113,
+            "588": 6.56878,
+            "589": 6.62653,
+            "590": 6.29114,
+            "591": 6.67541,
+            "592": 6.26902,
+            "593": 6.4773,
+            "594": 6.38719,
+            "595": 6.3632,
+            "596": 6.26099,
+            "597": 6.18986,
+            "598": 6.45726,
+            "599": 6.3998,
+            "600": 6.45709,
+            "601": 6.26132,
+            "602": 6.5338,
+            "603": 6.52288,
+            "604": 6.38993,
+            "605": 6.49993,
+            "606": 6.31475,
+            "607": 6.53507,
+            "608": 6.67525,
+            "609": 6.17714,
+            "610": 6.57295,
+            "611": 6.40188,
+            "612": 6.57929,
+            "613": 6.42667,
+            "614": 6.20672,
+            "615": 6.40081,
+            "616": 6.36019,
+            "617": 6.37969,
+            "618": 6.4512,
+            "619": 6.14244,
+            "620": 6.41233,
+            "621": 6.46338,
+            "622": 6.40096,
+            "623": 6.58352,
+            "624": 6.36078,
+            "625": 6.28553,
+            "626": 6.30525,
+            "627": 6.44574,
+            "628": 6.2557,
+            "629": 6.58813,
+            "630": 6.36641,
+            "631": 6.3498,
+            "632": 6.30972,
+            "633": 6.25733,
+            "634": 6.30887,
+            "635": 6.54592,
+            "636": 6.24834,
+            "637": 6.63634,
+            "638": 6.02046,
+            "639": 6.2798,
+            "640": 6.29548,
+            "641": 6.20953,
+            "642": 6.28471,
+            "643": 6.461,
+            "644": 6.25863,
+            "645": 6.25115,
+            "646": 6.40601,
+            "647": 6.33707,
+            "648": 6.35671,
+            "649": 6.3488,
+            "650": 6.48415,
+            "651": 6.33395,
+            "652": 6.25233,
+            "653": 6.3826,
+            "654": 6.45063,
+            "655": 6.52494,
+            "656": 6.32781,
+            "657": 6.43503,
+            "658": 6.24353,
+            "659": 6.1554,
+            "660": 6.39397,
+            "661": 6.17184,
+            "662": 6.27494,
+            "663": 6.37237,
+            "664": 6.33376,
+            "665": 6.40442,
+            "666": 6.16399,
+            "667": 6.1965,
+            "668": 6.2366,
+            "669": 6.21813,
+            "670": 6.24601,
+            "671": 6.24468,
+            "672": 6.49032,
+            "673": 6.34071,
+            "674": 6.2969,
+            "675": 6.38396,
+            "676": 6.39021,
+            "677": 6.30588,
+            "678": 6.27751,
+            "679": 6.23892,
+            "680": 6.2942,
+            "681": 6.20621,
+            "682": 6.08719,
+            "683": 6.27464,
+            "684": 6.32896,
+            "685": 6.30248,
+            "686": 6.15397,
+            "687": 6.2862,
+            "688": 6.20754,
+            "689": 6.6215,
+            "690": 6.17931,
+            "691": 6.18188,
+            "692": 6.2745,
+            "693": 6.14405,
+            "694": 6.23487,
+            "695": 6.32617,
+            "696": 6.11842,
+            "697": 6.15483,
+            "698": 6.23128,
+            "699": 6.46051,
+            "700": 6.0454,
+            "701": 6.06467,
+            "702": 6.25219,
+            "703": 6.18603,
+            "704": 6.21704,
+            "705": 6.13155,
+            "706": 6.07593,
+            "707": 6.25376,
+            "708": 6.31553,
+            "709": 6.01087,
+            "710": 6.16305,
+            "711": 6.26062,
+            "712": 6.18307,
+            "713": 5.89806,
+            "714": 6.10759,
+            "715": 6.11617,
+            "716": 6.41405,
+            "717": 6.19202,
+            "718": 6.2345,
+            "719": 6.27471,
+            "720": 6.26372,
+            "721": 6.26277,
+            "722": 6.23442,
+            "723": 6.0814,
+            "724": 6.22797,
+            "725": 6.04057,
+            "726": 6.30046,
+            "727": 6.01682,
+            "728": 6.04617,
+            "729": 6.09111,
+            "730": 6.18359,
+            "731": 6.10398,
+            "732": 6.08898,
+            "733": 6.12312,
+            "734": 6.38423,
+            "735": 6.27849,
+            "736": 6.18184,
+            "737": 6.36645,
+            "738": 6.13411,
+            "739": 6.14591,
+            "740": 5.87975,
+            "741": 6.00667,
+            "742": 5.98459,
+            "743": 6.17495,
+            "744": 6.02962,
+            "745": 6.15497,
+            "746": 6.03272,
+            "747": 6.09789,
+            "748": 6.23436,
+            "749": 5.94191,
+            "750": 6.16819,
+            "751": 5.9596,
+            "752": 6.01941,
+            "753": 6.02989,
+            "754": 6.28798,
+            "755": 6.13521,
+            "756": 6.25357,
+            "757": 6.02098,
+            "758": 6.20422,
+            "759": 6.23062,
+            "760": 6.02316,
+            "761": 6.19655,
+            "762": 6.22713,
+            "763": 6.03754,
+            "764": 5.9636,
+            "765": 5.93413,
+            "766": 5.97155,
+            "767": 5.81277,
+            "768": 6.18725,
+            "769": 6.27646,
+            "770": 6.29561,
+            "771": 5.78767,
+            "772": 6.03281,
+            "773": 6.18558,
+            "774": 5.88583,
+            "775": 6.03167,
+            "776": 6.13086,
+            "777": 5.88612,
+            "778": 6.05891,
+            "779": 5.87414,
+            "780": 6.14047,
+            "781": 5.85641,
+            "782": 6.04961,
+            "783": 5.95687,
+            "784": 5.91852,
+            "785": 6.09816,
+            "786": 6.10929,
+            "787": 5.66006,
+            "788": 5.99915,
+            "789": 6.21789,
+            "790": 6.26737,
+            "791": 5.79122,
+            "792": 5.99828,
+            "793": 6.18387,
+            "794": 6.02746,
+            "795": 6.0051,
+            "796": 6.17065,
+            "797": 6.05376,
+            "798": 6.06076,
+            "799": 6.11682,
+            "800": 6.02167,
+            "801": 6.15011,
+            "802": 5.98473,
+            "803": 6.15363,
+            "804": 6.00859,
+            "805": 5.83055,
+            "806": 6.08757,
+            "807": 6.04997,
+            "808": 5.92717,
+            "809": 5.77802,
+            "810": 6.01973,
+            "811": 5.93299,
+            "812": 5.91169,
+            "813": 5.96567,
+            "814": 6.0369,
+            "815": 5.8146,
+            "816": 6.12034,
+            "817": 5.94337,
+            "818": 6.0674,
+            "819": 6.01476,
+            "820": 5.7319,
+            "821": 5.95027,
+            "822": 6.20452,
+            "823": 5.83139,
+            "824": 5.98275,
+            "825": 6.18795,
+            "826": 6.20019,
+            "827": 6.05802,
+            "828": 6.06976,
+            "829": 5.89149,
+            "830": 5.94221,
+            "831": 5.89773,
+            "832": 5.97341,
+            "833": 6.06501,
+            "834": 5.99675,
+            "835": 6.00654,
+            "836": 5.79277,
+            "837": 6.11496,
+            "838": 5.86966,
+            "839": 5.83554,
+            "840": 6.18614,
+            "841": 5.78491,
+            "842": 5.89169,
+            "843": 5.95102,
+            "844": 6.00954,
+            "845": 6.09153,
+            "846": 5.68733,
+            "847": 5.75715,
+            "848": 5.96838,
+            "849": 6.09512,
+            "850": 5.84886,
+            "851": 6.01693,
+            "852": 5.75188,
+            "853": 5.99355,
+            "854": 6.01844,
+            "855": 5.81656,
+            "856": 5.99593,
+            "857": 6.00207,
+            "858": 6.05507,
+            "859": 5.95295,
+            "860": 6.09632,
+            "861": 6.07189,
+            "862": 6.00434,
+            "863": 5.83757,
+            "864": 5.84474,
+            "865": 5.93791,
+            "866": 5.89404,
+            "867": 5.87803,
+            "868": 6.06515,
+            "869": 6.08564,
+            "870": 5.97153,
+            "871": 6.04317,
+            "872": 5.89525,
+            "873": 5.84383,
+            "874": 6.02742,
+            "875": 5.9144,
+            "876": 5.96905,
+            "877": 5.92979,
+            "878": 6.09819,
+            "879": 5.76783,
+            "880": 6.01501,
+            "881": 5.99647,
+            "882": 5.9097,
+            "883": 5.67626,
+            "884": 5.96521,
+            "885": 5.74544,
+            "886": 5.99268,
+            "887": 5.90979,
+            "888": 5.83897,
+            "889": 6.01033,
+            "890": 6.02378,
+            "891": 5.95247,
+            "892": 5.70829,
+            "893": 6.0922,
+            "894": 5.73134,
+            "895": 5.84057,
+            "896": 5.84075,
+            "897": 5.8564,
+            "898": 5.9238,
+            "899": 5.93486,
+            "900": 5.89946,
+            "901": 5.95293,
+            "902": 5.83295,
+            "903": 6.05665,
+            "904": 5.93153,
+            "905": 5.90441,
+            "906": 5.6172,
+            "907": 5.91178,
+            "908": 5.73853,
+            "909": 5.99118,
+            "910": 5.86603,
+            "911": 5.70397,
+            "912": 5.70712,
+            "913": 5.76497,
+            "914": 5.83944,
+            "915": 5.80032,
+            "916": 5.8904,
+            "917": 5.86913,
+            "918": 5.82415,
+            "919": 5.81575,
+            "920": 5.89552,
+            "921": 5.84163,
+            "922": 5.62427,
+            "923": 6.03657,
+            "924": 5.60536,
+            "925": 5.62335,
+            "926": 5.86148,
+            "927": 5.96071,
+            "928": 5.84005,
+            "929": 5.82702,
+            "930": 5.95816,
+            "931": 5.765,
+            "932": 5.59211,
+            "933": 5.6351,
+            "934": 5.80541,
+            "935": 5.63715,
+            "936": 5.83772,
+            "937": 5.96629,
+            "938": 5.59109,
+            "939": 5.7899,
+            "940": 5.96726,
+            "941": 5.7264,
+            "942": 5.83547,
+            "943": 5.86622,
+            "944": 5.95478,
+            "945": 5.70263,
+            "946": 5.55832,
+            "947": 5.74831,
+            "948": 5.79312,
+            "949": 5.8268,
+            "950": 5.84353,
+            "951": 5.72242,
+            "952": 5.69295,
+            "953": 5.67852,
+            "954": 5.72473,
+            "955": 5.53107,
+            "956": 5.62074,
+            "957": 5.84076,
+            "958": 5.79676,
+            "959": 5.57317,
+            "960": 5.80125,
+            "961": 5.82952,
+            "962": 5.76695,
+            "963": 5.76461,
+            "964": 5.70677,
+            "965": 5.64012,
+            "966": 5.59617,
+            "967": 5.72434,
+            "968": 5.74036,
+            "969": 5.82392,
+            "970": 5.64422,
+            "971": 5.7065,
+            "972": 5.85308,
+            "973": 5.66884,
+            "974": 5.71841,
+            "975": 5.86273,
+            "976": 5.70493,
+            "977": 5.77104,
+            "978": 5.6858,
+            "979": 5.58655,
+            "980": 5.75924,
+            "981": 5.8969,
+            "982": 5.47038,
+            "983": 5.61817,
+            "984": 5.54504,
+            "985": 5.59032,
+            "986": 5.64132,
+            "987": 5.56966,
+            "988": 5.70939,
+            "989": 5.69379,
+            "990": 5.62195,
+            "991": 5.84899,
+            "992": 5.77877,
+            "993": 5.87022,
+            "994": 5.69735,
+            "995": 5.73242,
+            "996": 5.73704,
+            "997": 5.81329,
+            "998": 5.83634,
+            "999": 5.83399,
+            "1000": 5.68342,
+            "1001": 5.86668,
+            "1002": 5.76052,
+            "1003": 5.64259,
+            "1004": 5.79811,
+            "1005": 5.53617,
+            "1006": 5.326,
+            "1007": 5.76701,
+            "1008": 5.79136,
+            "1009": 5.65046,
+            "1010": 5.77942,
+            "1011": 5.89493,
+            "1012": 5.62303,
+            "1013": 5.61569,
+            "1014": 5.68111,
+            "1015": 5.55747,
+            "1016": 5.87327,
+            "1017": 5.83312,
+            "1018": 5.61865,
+            "1019": 5.73414,
+            "1020": 5.61755,
+            "1021": 5.848,
+            "1022": 5.50045,
+            "1023": 5.65182,
+            "1024": 5.74493,
+            "1025": 5.5692,
+            "1026": 5.41415,
+            "1027": 5.60696,
+            "1028": 5.6928,
+            "1029": 5.68764,
+            "1030": 5.68746,
+            "1031": 5.40696,
+            "1032": 5.78748,
+            "1033": 5.58136,
+            "1034": 5.61937,
+            "1035": 5.71368,
+            "1036": 5.62818,
+            "1037": 5.3679,
+            "1038": 5.66452,
+            "1039": 5.64347,
+            "1040": 5.57004,
+            "1041": 5.59722,
+            "1042": 5.81329,
+            "1043": 5.566,
+            "1044": 5.46906,
+            "1045": 5.9659,
+            "1046": 5.4866,
+            "1047": 5.38954,
+            "1048": 5.50027,
+            "1049": 5.67182,
+            "1050": 5.6991,
+            "1051": 5.57928,
+            "1052": 5.68227,
+            "1053": 5.62737,
+            "1054": 5.45766,
+            "1055": 5.60313,
+            "1056": 5.67386,
+            "1057": 5.75895,
+            "1058": 5.56782,
+            "1059": 5.74888,
+            "1060": 5.82022,
+            "1061": 5.47624,
+            "1062": 5.64897,
+            "1063": 5.50121,
+            "1064": 5.59136,
+            "1065": 5.55347,
+            "1066": 5.74367,
+            "1067": 5.67235,
+            "1068": 5.44068,
+            "1069": 5.60636,
+            "1070": 5.81264,
+            "1071": 5.51129,
+            "1072": 5.61871,
+            "1073": 5.62147,
+            "1074": 5.524,
+            "1075": 5.70529,
+            "1076": 5.5934,
+            "1077": 5.71153,
+            "1078": 5.56524,
+            "1079": 5.61728,
+            "1080": 5.64251,
+            "1081": 5.62319,
+            "1082": 5.49648,
+            "1083": 5.64086,
+            "1084": 5.55389,
+            "1085": 5.40631,
+            "1086": 5.62008,
+            "1087": 5.44148,
+            "1088": 5.51218,
+            "1089": 5.7676,
+            "1090": 5.53165,
+            "1091": 5.51388,
+            "1092": 5.41011,
+            "1093": 5.70025,
+            "1094": 5.57364,
+            "1095": 5.57735,
+            "1096": 5.61585,
+            "1097": 5.64586,
+            "1098": 5.64877,
+            "1099": 5.51631,
+            "1100": 5.63778,
+            "1101": 5.67335,
+            "1102": 5.54037,
+            "1103": 5.54969,
+            "1104": 5.53882,
+            "1105": 5.54754,
+            "1106": 5.68315,
+            "1107": 5.68556,
+            "1108": 5.78611,
+            "1109": 5.53666,
+            "1110": 5.66598,
+            "1111": 5.58973,
+            "1112": 5.58039,
+            "1113": 5.62611,
+            "1114": 5.61279,
+            "1115": 5.59718,
+            "1116": 5.65925,
+            "1117": 5.64676,
+            "1118": 5.65036,
+            "1119": 5.70919,
+            "1120": 5.62738,
+            "1121": 5.37352,
+            "1122": 5.22976,
+            "1123": 5.47237,
+            "1124": 5.64939,
+            "1125": 5.67974,
+            "1126": 5.679,
+            "1127": 5.56811,
+            "1128": 5.61992,
+            "1129": 5.29637,
+            "1130": 5.54359,
+            "1131": 5.63153,
+            "1132": 5.72427,
+            "1133": 5.51914,
+            "1134": 5.56063,
+            "1135": 5.52056,
+            "1136": 5.42646,
+            "1137": 5.45971,
+            "1138": 5.56927,
+            "1139": 5.41452,
+            "1140": 5.2656,
+            "1141": 5.58265,
+            "1142": 5.64152,
+            "1143": 5.38298,
+            "1144": 5.38584,
+            "1145": 5.36231,
+            "1146": 5.63508,
+            "1147": 5.49183,
+            "1148": 5.50524,
+            "1149": 5.52352,
+            "1150": 5.39801,
+            "1151": 5.5563,
+            "1152": 5.41525,
+            "1153": 5.44791,
+            "1154": 5.49757,
+            "1155": 5.43833,
+            "1156": 5.3488,
+            "1157": 5.66444,
+            "1158": 5.39487,
+            "1159": 5.33455,
+            "1160": 5.79503,
+            "1161": 5.53955,
+            "1162": 5.45818,
+            "1163": 5.52563,
+            "1164": 5.3837,
+            "1165": 5.52861,
+            "1166": 5.48753,
+            "1167": 5.36312,
+            "1168": 5.49491,
+            "1169": 5.39842,
+            "1170": 5.59202,
+            "1171": 5.48502,
+            "1172": 5.64238,
+            "1173": 5.62295,
+            "1174": 5.50843,
+            "1175": 5.34639,
+            "1176": 5.38504,
+            "1177": 5.55461,
+            "1178": 5.46852,
+            "1179": 5.49505,
+            "1180": 5.46014,
+            "1181": 5.56031,
+            "1182": 5.59593,
+            "1183": 5.77155,
+            "1184": 5.54926,
+            "1185": 5.29008,
+            "1186": 5.60451,
+            "1187": 5.55363,
+            "1188": 5.51655,
+            "1189": 5.39133,
+            "1190": 5.40482,
+            "1191": 5.39266,
+            "1192": 5.50142,
+            "1193": 5.46347,
+            "1194": 5.45607,
+            "1195": 5.32751,
+            "1196": 5.52219,
+            "1197": 5.4809,
+            "1198": 5.52789,
+            "1199": 5.3874,
+            "1200": 5.33059,
+            "1201": 5.48969,
+            "1202": 5.43584,
+            "1203": 5.49537,
+            "1204": 5.40861,
+            "1205": 5.48971,
+            "1206": 5.3371,
+            "1207": 5.58625,
+            "1208": 5.4312,
+            "1209": 5.29323,
+            "1210": 5.50765,
+            "1211": 5.51506,
+            "1212": 5.59777,
+            "1213": 5.42123,
+            "1214": 5.51018,
+            "1215": 5.23832,
+            "1216": 5.40989,
+            "1217": 5.38537,
+            "1218": 5.45232,
+            "1219": 5.48221,
+            "1220": 5.38594,
+            "1221": 5.44848,
+            "1222": 5.31032,
+            "1223": 5.47835,
+            "1224": 5.42017,
+            "1225": 5.43499,
+            "1226": 5.3238,
+            "1227": 5.47632,
+            "1228": 5.72418,
+            "1229": 5.32629,
+            "1230": 5.40556,
+            "1231": 5.06972,
+            "1232": 5.78794,
+            "1233": 5.28923,
+            "1234": 5.24535,
+            "1235": 5.37092,
+            "1236": 5.48471,
+            "1237": 5.20864,
+            "1238": 5.41643,
+            "1239": 5.40751,
+            "1240": 5.46767,
+            "1241": 5.57266,
+            "1242": 5.4536,
+            "1243": 5.43063,
+            "1244": 5.51812,
+            "1245": 5.19115,
+            "1246": 5.72042,
+            "1247": 5.43187,
+            "1248": 5.30004,
+            "1249": 5.40113,
+            "1250": 5.33798,
+            "1251": 5.42034,
+            "1252": 5.57217,
+            "1253": 5.48773,
+            "1254": 5.30628,
+            "1255": 5.51443,
+            "1256": 5.60755,
+            "1257": 5.4214,
+            "1258": 5.56457,
+            "1259": 5.48027,
+            "1260": 5.51461,
+            "1261": 5.63883,
+            "1262": 5.39531,
+            "1263": 5.32916,
+            "1264": 5.50671,
+            "1265": 5.30632,
+            "1266": 5.23819,
+            "1267": 5.37206,
+            "1268": 5.39267,
+            "1269": 5.15366,
+            "1270": 5.40418,
+            "1271": 5.27732,
+            "1272": 5.5252,
+            "1273": 5.30228,
+            "1274": 5.3516,
+            "1275": 5.38466,
+            "1276": 5.39786,
+            "1277": 5.46218,
+            "1278": 5.34689,
+            "1279": 5.44274,
+            "1280": 5.45919,
+            "1281": 5.40638,
+            "1282": 5.3824,
+            "1283": 5.42204,
+            "1284": 5.34841,
+            "1285": 5.50133,
+            "1286": 5.33557,
+            "1287": 5.58795,
+            "1288": 5.26493,
+            "1289": 5.429,
+            "1290": 5.50282,
+            "1291": 5.50335,
+            "1292": 5.44662,
+            "1293": 5.41955,
+            "1294": 5.49953,
+            "1295": 5.34675,
+            "1296": 5.19062,
+            "1297": 5.17238,
+            "1298": 5.11916,
+            "1299": 5.30339,
+            "1300": 5.21032,
+            "1301": 5.30157,
+            "1302": 5.27472,
+            "1303": 5.36107,
+            "1304": 5.43231,
+            "1305": 5.36999,
+            "1306": 5.25347,
+            "1307": 5.18829,
+            "1308": 5.27033,
+            "1309": 5.40736,
+            "1310": 5.26399,
+            "1311": 5.38109,
+            "1312": 5.35438,
+            "1313": 5.30056,
+            "1314": 5.2953,
+            "1315": 5.42245,
+            "1316": 5.26148,
+            "1317": 5.28065,
+            "1318": 5.2198,
+            "1319": 5.34619,
+            "1320": 5.42093,
+            "1321": 5.44976,
+            "1322": 5.46399,
+            "1323": 5.37327,
+            "1324": 5.25463,
+            "1325": 5.40657,
+            "1326": 5.54082,
+            "1327": 5.39378,
+            "1328": 5.21893,
+            "1329": 5.41851,
+            "1330": 5.40079,
+            "1331": 5.31685,
+            "1332": 5.31253,
+            "1333": 5.37243,
+            "1334": 5.44685,
+            "1335": 5.37136,
+            "1336": 5.43779,
+            "1337": 5.47852,
+            "1338": 5.30292,
+            "1339": 5.14181,
+            "1340": 5.41486,
+            "1341": 5.3443,
+            "1342": 5.36197,
+            "1343": 5.47816,
+            "1344": 5.37832,
+            "1345": 5.34294,
+            "1346": 5.08195,
+            "1347": 5.38558,
+            "1348": 5.4918,
+            "1349": 5.40832,
+            "1350": 5.02622,
+            "1351": 5.3151,
+            "1352": 5.1591,
+            "1353": 5.34674,
+            "1354": 5.35963,
+            "1355": 5.11092,
+            "1356": 5.2587,
+            "1357": 5.29209,
+            "1358": 5.15773,
+            "1359": 5.11035,
+            "1360": 5.17288,
+            "1361": 5.30521,
+            "1362": 5.06318,
+            "1363": 5.2947,
+            "1364": 5.40031,
+            "1365": 5.02241,
+            "1366": 5.11779,
+            "1367": 5.33051,
+            "1368": 5.18648,
+            "1369": 5.22984,
+            "1370": 5.19906,
+            "1371": 5.2839,
+            "1372": 5.26155,
+            "1373": 5.28402,
+            "1374": 5.28112,
+            "1375": 5.46052,
+            "1376": 5.2713,
+            "1377": 5.26467,
+            "1378": 5.31344,
+            "1379": 5.22741,
+            "1380": 5.26107,
+            "1381": 5.47871,
+            "1382": 5.08923,
+            "1383": 5.375,
+            "1384": 5.35914,
+            "1385": 5.38983,
+            "1386": 5.16417,
+            "1387": 5.16094,
+            "1388": 5.28017,
+            "1389": 5.30376,
+            "1390": 5.25514,
+            "1391": 5.26911,
+            "1392": 5.37008,
+            "1393": 5.38307,
+            "1394": 5.40394,
+            "1395": 5.32492,
+            "1396": 5.21356,
+            "1397": 5.28,
+            "1398": 5.37051,
+            "1399": 5.35873,
+            "1400": 5.26512,
+            "1401": 5.35924,
+            "1402": 5.42148,
+            "1403": 5.20238,
+            "1404": 5.28629,
+            "1405": 5.11984,
+            "1406": 4.99128,
+            "1407": 5.40442,
+            "1408": 5.19825,
+            "1409": 5.3964,
+            "1410": 5.37519,
+            "1411": 4.91758,
+            "1412": 5.35561,
+            "1413": 5.41314,
+            "1414": 5.21823,
+            "1415": 5.44159,
+            "1416": 5.32905,
+            "1417": 5.38859,
+            "1418": 5.29946,
+            "1419": 5.31787,
+            "1420": 5.43974,
+            "1421": 5.39414,
+            "1422": 5.41749,
+            "1423": 5.005,
+            "1424": 5.32995,
+            "1425": 5.58618,
+            "1426": 5.23059,
+            "1427": 5.31804,
+            "1428": 5.33277,
+            "1429": 5.07552,
+            "1430": 5.33075,
+            "1431": 5.32688,
+            "1432": 5.33826,
+            "1433": 5.19107,
+            "1434": 5.16341,
+            "1435": 5.19905,
+            "1436": 5.10851,
+            "1437": 5.229,
+            "1438": 5.31867,
+            "1439": 5.34731,
+            "1440": 5.34991,
+            "1441": 5.16484,
+            "1442": 5.22015,
+            "1443": 5.20933,
+            "1444": 5.13701,
+            "1445": 5.07414,
+            "1446": 5.26836,
+            "1447": 5.25895,
+            "1448": 5.2904,
+            "1449": 5.2498,
+            "1450": 5.34281,
+            "1451": 5.07084,
+            "1452": 5.27052,
+            "1453": 5.1668,
+            "1454": 5.01539,
+            "1455": 5.12292,
+            "1456": 5.2717,
+            "1457": 5.18713,
+            "1458": 5.00608,
+            "1459": 5.22304,
+            "1460": 5.23389,
+            "1461": 5.07142,
+            "1462": 4.96923,
+            "1463": 5.14383,
+            "1464": 5.21128,
+            "1465": 5.26911,
+            "1466": 5.34961,
+            "1467": 5.33438,
+            "1468": 5.22205,
+            "1469": 5.04373,
+            "1470": 5.11715,
+            "1471": 5.25199,
+            "1472": 5.12294,
+            "1473": 5.10395,
+            "1474": 5.21775,
+            "1475": 5.18567,
+            "1476": 5.15287,
+            "1477": 5.26203,
+            "1478": 5.30399,
+            "1479": 5.01175,
+            "1480": 5.1809,
+            "1481": 5.24516,
+            "1482": 5.34866,
+            "1483": 5.26395,
+            "1484": 4.92397,
+            "1485": 5.29179,
+            "1486": 5.04178,
+            "1487": 4.88296,
+            "1488": 5.18145,
+            "1489": 5.10246,
+            "1490": 5.04399,
+            "1491": 5.31709,
+            "1492": 5.22469,
+            "1493": 4.94051,
+            "1494": 5.10929,
+            "1495": 5.13424,
+            "1496": 5.05862,
+            "1497": 5.36633,
+            "1498": 5.30967,
+            "1499": 5.13834,
+            "1500": 5.09851,
+            "1501": 5.03466,
+            "1502": 5.15527,
+            "1503": 5.43143,
+            "1504": 5.31968,
+            "1505": 5.00114,
+            "1506": 5.14444,
+            "1507": 5.16068,
+            "1508": 5.16575,
+            "1509": 5.31451,
+            "1510": 5.0185,
+            "1511": 5.11697,
+            "1512": 4.98287,
+            "1513": 5.16993,
+            "1514": 5.33962,
+            "1515": 5.36563,
+            "1516": 5.27715,
+            "1517": 5.22687,
+            "1518": 5.02626,
+            "1519": 5.29861,
+            "1520": 5.1417,
+            "1521": 5.15866,
+            "1522": 5.32824,
+            "1523": 5.24625,
+            "1524": 5.06725,
+            "1525": 5.20424,
+            "1526": 5.27994,
+            "1527": 5.25677,
+            "1528": 5.23589,
+            "1529": 5.18688,
+            "1530": 5.24365,
+            "1531": 5.09964,
+            "1532": 5.15141,
+            "1533": 5.05087,
+            "1534": 5.21589,
+            "1535": 5.1635,
+            "1536": 5.09678,
+            "1537": 5.02713,
+            "1538": 4.91184,
+            "1539": 5.23801,
+            "1540": 5.11515,
+            "1541": 5.25246,
+            "1542": 5.23484,
+            "1543": 5.05152,
+            "1544": 5.07544,
+            "1545": 5.1161,
+            "1546": 5.33085,
+            "1547": 5.11115,
+            "1548": 5.23527,
+            "1549": 5.23735,
+            "1550": 4.97596,
+            "1551": 5.2566,
+            "1552": 5.02944,
+            "1553": 5.14849,
+            "1554": 5.11205,
+            "1555": 5.10901,
+            "1556": 5.19824,
+            "1557": 5.08883,
+            "1558": 5.23067,
+            "1559": 5.00402,
+            "1560": 5.11835,
+            "1561": 5.14529,
+            "1562": 5.17996,
+            "1563": 5.24454,
+            "1564": 5.26389,
+            "1565": 5.08902,
+            "1566": 5.29474,
+            "1567": 5.04166,
+            "1568": 5.09256,
+            "1569": 5.20014,
+            "1570": 5.17348,
+            "1571": 4.95353,
+            "1572": 5.04005,
+            "1573": 5.02897,
+            "1574": 4.99751,
+            "1575": 5.2314,
+            "1576": 5.21263,
+            "1577": 5.12799,
+            "1578": 5.36241,
+            "1579": 4.94367,
+            "1580": 5.12197,
+            "1581": 5.09638,
+            "1582": 5.28497,
+            "1583": 5.04918,
+            "1584": 5.05482,
+            "1585": 5.11977,
+            "1586": 5.30243,
+            "1587": 5.13447,
+            "1588": 5.2184,
+            "1589": 4.83833,
+            "1590": 5.09497,
+            "1591": 5.17411,
+            "1592": 5.13721,
+            "1593": 5.23457,
+            "1594": 5.11805,
+            "1595": 5.10775,
+            "1596": 5.18964,
+            "1597": 5.11486,
+            "1598": 5.15917,
+            "1599": 5.19102,
+            "1600": 4.86871,
+            "1601": 5.11732,
+            "1602": 5.23185,
+            "1603": 5.19543,
+            "1604": 5.05128,
+            "1605": 5.02692,
+            "1606": 4.98659,
+            "1607": 5.07391,
+            "1608": 4.97985,
+            "1609": 5.07337,
+            "1610": 5.04745,
+            "1611": 4.99848,
+            "1612": 4.75205,
+            "1613": 5.03316,
+            "1614": 4.88034,
+            "1615": 5.07442,
+            "1616": 5.23082,
+            "1617": 5.06132,
+            "1618": 4.98704,
+            "1619": 5.18333,
+            "1620": 5.14491,
+            "1621": 5.31452,
+            "1622": 5.05677,
+            "1623": 5.14346,
+            "1624": 5.1355,
+            "1625": 5.12006,
+            "1626": 5.10245,
+            "1627": 5.10987,
+            "1628": 5.06581,
+            "1629": 4.92971,
+            "1630": 5.06799,
+            "1631": 5.06088,
+            "1632": 5.10428,
+            "1633": 4.97515,
+            "1634": 4.9235,
+            "1635": 5.05833,
+            "1636": 4.92289,
+            "1637": 5.24051,
+            "1638": 5.15574,
+            "1639": 4.977,
+            "1640": 5.00918,
+            "1641": 5.12718,
+            "1642": 5.08305,
+            "1643": 5.04894,
+            "1644": 5.1181,
+            "1645": 4.96677,
+            "1646": 5.11931,
+            "1647": 5.03295,
+            "1648": 5.19969,
+            "1649": 4.92396,
+            "1650": 5.05963,
+            "1651": 4.92965,
+            "1652": 5.21121,
+            "1653": 5.15959,
+            "1654": 5.12828,
+            "1655": 5.16263,
+            "1656": 5.34595,
+            "1657": 5.20677,
+            "1658": 5.04112,
+            "1659": 4.9258,
+            "1660": 4.80954,
+            "1661": 5.03086,
+            "1662": 5.14123,
+            "1663": 5.15449,
+            "1664": 4.981,
+            "1665": 5.11714,
+            "1666": 5.10575,
+            "1667": 4.84897,
+            "1668": 5.11513,
+            "1669": 5.06995,
+            "1670": 5.11266,
+            "1671": 5.17201,
+            "1672": 4.77569,
+            "1673": 5.03851,
+            "1674": 4.91569,
+            "1675": 5.05176,
+            "1676": 5.00402,
+            "1677": 4.79944,
+            "1678": 5.02487,
+            "1679": 4.89421,
+            "1680": 5.03847,
+            "1681": 5.06815,
+            "1682": 5.03274,
+            "1683": 4.90688,
+            "1684": 5.06515,
+            "1685": 5.13579,
+            "1686": 5.0732,
+            "1687": 4.97656,
+            "1688": 5.16537,
+            "1689": 5.14707,
+            "1690": 4.99688,
+            "1691": 5.00011,
+            "1692": 4.91822,
+            "1693": 5.01472,
+            "1694": 4.94657,
+            "1695": 4.91341,
+            "1696": 5.08209,
+            "1697": 5.04294,
+            "1698": 4.9511,
+            "1699": 5.00187,
+            "1700": 4.95393,
+            "1701": 5.16563,
+            "1702": 5.07666,
+            "1703": 5.17125,
+            "1704": 5.14332,
+            "1705": 4.96247,
+            "1706": 4.98333,
+            "1707": 4.79005,
+            "1708": 5.03831,
+            "1709": 5.23334,
+            "1710": 5.02934,
+            "1711": 5.19037,
+            "1712": 5.1958,
+            "1713": 5.03582,
+            "1714": 5.04603,
+            "1715": 4.91495,
+            "1716": 4.9332,
+            "1717": 4.86109,
+            "1718": 5.0273,
+            "1719": 5.12334,
+            "1720": 5.02189,
+            "1721": 4.92752,
+            "1722": 5.05412,
+            "1723": 4.93537,
+            "1724": 5.0407,
+            "1725": 5.1914,
+            "1726": 5.06447,
+            "1727": 4.90742,
+            "1728": 5.02116,
+            "1729": 5.04574,
+            "1730": 4.90343,
+            "1731": 4.99945,
+            "1732": 4.92083,
+            "1733": 5.1311,
+            "1734": 4.82837,
+            "1735": 5.20905,
+            "1736": 4.91585,
+            "1737": 4.85859,
+            "1738": 4.97909,
+            "1739": 5.16688,
+            "1740": 4.83514,
+            "1741": 4.77896,
+            "1742": 4.90909,
+            "1743": 5.08523,
+            "1744": 4.9784,
+            "1745": 4.82327,
+            "1746": 4.94833,
+            "1747": 4.87022,
+            "1748": 5.06379,
+            "1749": 4.8705,
+            "1750": 5.01347,
+            "1751": 5.12189,
+            "1752": 4.90364,
+            "1753": 5.09398,
+            "1754": 5.05918,
+            "1755": 4.89649,
+            "1756": 5.02243,
+            "1757": 5.14389,
+            "1758": 4.8716,
+            "1759": 4.94237,
+            "1760": 4.83366,
+            "1761": 5.02233,
+            "1762": 4.81292,
+            "1763": 4.77382,
+            "1764": 4.93787,
+            "1765": 5.14977,
+            "1766": 5.33847,
+            "1767": 5.22339,
+            "1768": 4.95072,
+            "1769": 5.00607,
+            "1770": 4.98077,
+            "1771": 4.96436,
+            "1772": 4.98395,
+            "1773": 4.97312,
+            "1774": 4.86859,
+            "1775": 4.95207,
+            "1776": 4.99761,
+            "1777": 4.94332,
+            "1778": 4.99268,
+            "1779": 5.08376,
+            "1780": 4.83276,
+            "1781": 5.05321,
+            "1782": 4.9968,
+            "1783": 5.01268,
+            "1784": 4.93195,
+            "1785": 5.16736,
+            "1786": 4.81265,
+            "1787": 4.97081,
+            "1788": 4.82725,
+            "1789": 4.88846,
+            "1790": 4.79821,
+            "1791": 4.73741,
+            "1792": 4.87626,
+            "1793": 5.10356,
+            "1794": 4.98084,
+            "1795": 4.96551,
+            "1796": 4.99704,
+            "1797": 4.7903,
+            "1798": 4.76702,
+            "1799": 5.01884,
+            "1800": 4.91364,
+            "1801": 5.04679,
+            "1802": 4.82665,
+            "1803": 4.95171,
+            "1804": 4.88594,
+            "1805": 4.90346,
+            "1806": 4.87351,
+            "1807": 4.92406,
+            "1808": 4.92697,
+            "1809": 5.1451,
+            "1810": 5.09976,
+            "1811": 4.95906,
+            "1812": 4.80139,
+            "1813": 5.09748,
+            "1814": 4.77766,
+            "1815": 4.86134,
+            "1816": 5.05005,
+            "1817": 4.79012,
+            "1818": 4.80376,
+            "1819": 5.02382,
+            "1820": 4.68652,
+            "1821": 5.02661,
+            "1822": 4.66251,
+            "1823": 4.8659,
+            "1824": 4.78635,
+            "1825": 5.06537,
+            "1826": 4.81944,
+            "1827": 4.7895,
+            "1828": 4.94677,
+            "1829": 5.11262,
+            "1830": 4.91236,
+            "1831": 4.89818,
+            "1832": 4.83359,
+            "1833": 4.78363,
+            "1834": 4.9482,
+            "1835": 4.95795,
+            "1836": 4.90747,
+            "1837": 4.67243,
+            "1838": 4.80953,
+            "1839": 4.89546,
+            "1840": 4.90488,
+            "1841": 4.8292,
+            "1842": 4.94678,
+            "1843": 4.70293,
+            "1844": 4.61431,
+            "1845": 5.00086,
+            "1846": 4.74657,
+            "1847": 4.8645,
+            "1848": 4.89695,
+            "1849": 4.85358,
+            "1850": 4.8676,
+            "1851": 5.02236,
+            "1852": 4.97647,
+            "1853": 4.83325,
+            "1854": 4.86791,
+            "1855": 4.8219,
+            "1856": 4.75614,
+            "1857": 4.9619,
+            "1858": 4.96856,
+            "1859": 4.75323,
+            "1860": 4.86592,
+            "1861": 5.20685,
+            "1862": 4.61669,
+            "1863": 4.83385,
+            "1864": 4.7505,
+            "1865": 4.86441,
+            "1866": 4.79455,
+            "1867": 4.99688,
+            "1868": 4.71331,
+            "1869": 4.75634,
+            "1870": 4.93203,
+            "1871": 4.99184,
+            "1872": 4.68332,
+            "1873": 4.69823,
+            "1874": 4.85174,
+            "1875": 4.85999,
+            "1876": 4.7392,
+            "1877": 4.80362,
+            "1878": 4.81239,
+            "1879": 4.82084,
+            "1880": 4.89314,
+            "1881": 4.79389,
+            "1882": 4.79419,
+            "1883": 4.78157,
+            "1884": 4.97086,
+            "1885": 4.91799,
+            "1886": 4.82203,
+            "1887": 4.81334,
+            "1888": 4.97395,
+            "1889": 4.95922,
+            "1890": 4.70676,
+            "1891": 4.65282,
+            "1892": 4.84393,
+            "1893": 4.64594,
+            "1894": 4.90265,
+            "1895": 4.7886,
+            "1896": 4.66112,
+            "1897": 4.78966,
+            "1898": 4.9139,
+            "1899": 4.77532,
+            "1900": 4.91571,
+            "1901": 4.84525,
+            "1902": 4.78411,
+            "1903": 4.75997,
+            "1904": 4.65339,
+            "1905": 4.54188,
+            "1906": 4.81097,
+            "1907": 4.90225,
+            "1908": 5.03012,
+            "1909": 4.88434,
+            "1910": 4.78852,
+            "1911": 4.80477,
+            "1912": 4.64685,
+            "1913": 4.94065,
+            "1914": 4.87965,
+            "1915": 4.85906,
+            "1916": 4.92227,
+            "1917": 4.85425,
+            "1918": 4.87001,
+            "1919": 4.99304,
+            "1920": 4.76319,
+            "1921": 4.88494,
+            "1922": 4.81295,
+            "1923": 4.7592,
+            "1924": 4.82501,
+            "1925": 5.05793,
+            "1926": 4.92996,
+            "1927": 4.92587,
+            "1928": 4.92702,
+            "1929": 4.92705,
+            "1930": 4.91019,
+            "1931": 4.77616,
+            "1932": 4.85963,
+            "1933": 4.83545,
+            "1934": 4.84013,
+            "1935": 5.10729,
+            "1936": 4.88314,
+            "1937": 4.87654,
+            "1938": 4.79463,
+            "1939": 4.71148,
+            "1940": 4.82418,
+            "1941": 4.73372,
+            "1942": 4.87249,
+            "1943": 4.7353,
+            "1944": 4.74198,
+            "1945": 4.6818,
+            "1946": 4.91539,
+            "1947": 4.86756,
+            "1948": 4.59887,
+            "1949": 4.90387,
+            "1950": 4.78785,
+            "1951": 4.95942,
+            "1952": 4.73677,
+            "1953": 4.79496,
+            "1954": 4.73264,
+            "1955": 4.84308,
+            "1956": 4.88233,
+            "1957": 4.73496,
+            "1958": 4.70018,
+            "1959": 4.75966,
+            "1960": 4.76849,
+            "1961": 4.7146,
+            "1962": 4.83392,
+            "1963": 4.82321,
+            "1964": 4.84664,
+            "1965": 4.87523,
+            "1966": 4.78753,
+            "1967": 4.59211,
+            "1968": 4.82724,
+            "1969": 4.59184,
+            "1970": 4.56633,
+            "1971": 4.9072,
+            "1972": 4.90064,
+            "1973": 4.54642,
+            "1974": 4.82423,
+            "1975": 4.82778,
+            "1976": 4.71327,
+            "1977": 4.57967,
+            "1978": 5.0045,
+            "1979": 4.66094,
+            "1980": 4.74256,
+            "1981": 4.86301,
+            "1982": 4.72234,
+            "1983": 4.8786,
+            "1984": 4.64152,
+            "1985": 4.78,
+            "1986": 4.70167,
+            "1987": 4.81036,
+            "1988": 4.8871,
+            "1989": 4.63185,
+            "1990": 4.79636,
+            "1991": 4.69424,
+            "1992": 4.79439,
+            "1993": 4.74063,
+            "1994": 4.84977,
+            "1995": 4.5596,
+            "1996": 4.65161,
+            "1997": 4.80342,
+            "1998": 4.67403,
+            "1999": 4.72284,
+            "2000": 4.61765
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 2000,
+        "step_interval": 1,
+        "values": {
+            "1": 80.0,
+            "2": 70.0,
+            "3": 78.0,
+            "4": 80.0,
+            "5": 75.0,
+            "6": 87.0,
+            "7": 63.0,
+            "8": 77.0,
+            "9": 62.0,
+            "10": 90.0,
+            "11": 74.0,
+            "12": 79.0,
+            "13": 77.0,
+            "14": 83.0,
+            "15": 78.0,
+            "16": 69.0,
+            "17": 64.0,
+            "18": 63.0,
+            "19": 87.0,
+            "20": 90.0,
+            "21": 75.0,
+            "22": 84.0,
+            "23": 81.0,
+            "24": 78.0,
+            "25": 87.0,
+            "26": 69.0,
+            "27": 86.0,
+            "28": 91.0,
+            "29": 94.0,
+            "30": 115.0,
+            "31": 99.0,
+            "32": 109.0,
+            "33": 92.0,
+            "34": 103.0,
+            "35": 118.0,
+            "36": 117.0,
+            "37": 105.0,
+            "38": 129.0,
+            "39": 89.0,
+            "40": 129.0,
+            "41": 114.0,
+            "42": 121.0,
+            "43": 135.0,
+            "44": 128.0,
+            "45": 126.0,
+            "46": 129.0,
+            "47": 133.0,
+            "48": 139.0,
+            "49": 135.0,
+            "50": 157.0,
+            "51": 122.0,
+            "52": 150.0,
+            "53": 108.0,
+            "54": 140.0,
+            "55": 133.0,
+            "56": 156.0,
+            "57": 150.0,
+            "58": 153.0,
+            "59": 135.0,
+            "60": 135.0,
+            "61": 165.0,
+            "62": 145.0,
+            "63": 199.0,
+            "64": 161.0,
+            "65": 162.0,
+            "66": 162.0,
+            "67": 195.0,
+            "68": 140.0,
+            "69": 158.0,
+            "70": 169.0,
+            "71": 188.0,
+            "72": 160.0,
+            "73": 151.0,
+            "74": 154.0,
+            "75": 172.0,
+            "76": 169.0,
+            "77": 165.0,
+            "78": 193.0,
+            "79": 144.0,
+            "80": 173.0,
+            "81": 150.0,
+            "82": 141.0,
+            "83": 186.0,
+            "84": 169.0,
+            "85": 183.0,
+            "86": 196.0,
+            "87": 197.0,
+            "88": 184.0,
+            "89": 169.0,
+            "90": 182.0,
+            "91": 200.0,
+            "92": 179.0,
+            "93": 165.0,
+            "94": 153.0,
+            "95": 176.0,
+            "96": 191.0,
+            "97": 183.0,
+            "98": 199.0,
+            "99": 163.0,
+            "100": 157.0,
+            "101": 144.0,
+            "102": 184.0,
+            "103": 206.0,
+            "104": 171.0,
+            "105": 215.0,
+            "106": 176.0,
+            "107": 172.0,
+            "108": 172.0,
+            "109": 172.0,
+            "110": 216.0,
+            "111": 182.0,
+            "112": 172.0,
+            "113": 167.0,
+            "114": 192.0,
+            "115": 175.0,
+            "116": 181.0,
+            "117": 177.0,
+            "118": 142.0,
+            "119": 212.0,
+            "120": 164.0,
+            "121": 193.0,
+            "122": 160.0,
+            "123": 169.0,
+            "124": 191.0,
+            "125": 214.0,
+            "126": 160.0,
+            "127": 192.0,
+            "128": 160.0,
+            "129": 180.0,
+            "130": 214.0,
+            "131": 219.0,
+            "132": 173.0,
+            "133": 166.0,
+            "134": 171.0,
+            "135": 182.0,
+            "136": 172.0,
+            "137": 176.0,
+            "138": 174.0,
+            "139": 161.0,
+            "140": 178.0,
+            "141": 164.0,
+            "142": 159.0,
+            "143": 192.0,
+            "144": 157.0,
+            "145": 144.0,
+            "146": 149.0,
+            "147": 148.0,
+            "148": 169.0,
+            "149": 143.0,
+            "150": 111.0,
+            "151": 159.0,
+            "152": 115.0,
+            "153": 147.0,
+            "154": 162.0,
+            "155": 185.0,
+            "156": 144.0,
+            "157": 147.0,
+            "158": 130.0,
+            "159": 165.0,
+            "160": 190.0,
+            "161": 141.0,
+            "162": 155.0,
+            "163": 140.0,
+            "164": 174.0,
+            "165": 168.0,
+            "166": 179.0,
+            "167": 147.0,
+            "168": 138.0,
+            "169": 161.0,
+            "170": 159.0,
+            "171": 125.0,
+            "172": 193.0,
+            "173": 172.0,
+            "174": 190.0,
+            "175": 192.0,
+            "176": 146.0,
+            "177": 168.0,
+            "178": 172.0,
+            "179": 177.0,
+            "180": 148.0,
+            "181": 161.0,
+            "182": 213.0,
+            "183": 215.0,
+            "184": 201.0,
+            "185": 154.0,
+            "186": 207.0,
+            "187": 175.0,
+            "188": 183.0,
+            "189": 169.0,
+            "190": 167.0,
+            "191": 163.0,
+            "192": 193.0,
+            "193": 169.0,
+            "194": 161.0,
+            "195": 141.0,
+            "196": 174.0,
+            "197": 188.0,
+            "198": 168.0,
+            "199": 150.0,
+            "200": 187.0,
+            "201": 173.0,
+            "202": 183.0,
+            "203": 142.0,
+            "204": 177.0,
+            "205": 153.0,
+            "206": 198.0,
+            "207": 168.0,
+            "208": 140.0,
+            "209": 179.0,
+            "210": 175.0,
+            "211": 167.0,
+            "212": 194.0,
+            "213": 192.0,
+            "214": 174.0,
+            "215": 188.0,
+            "216": 164.0,
+            "217": 170.0,
+            "218": 171.0,
+            "219": 211.0,
+            "220": 195.0,
+            "221": 181.0,
+            "222": 154.0,
+            "223": 176.0,
+            "224": 173.0,
+            "225": 166.0,
+            "226": 174.0,
+            "227": 211.0,
+            "228": 146.0,
+            "229": 193.0,
+            "230": 149.0,
+            "231": 177.0,
+            "232": 169.0,
+            "233": 193.0,
+            "234": 183.0,
+            "235": 215.0,
+            "236": 200.0,
+            "237": 218.0,
+            "238": 179.0,
+            "239": 139.0,
+            "240": 217.0,
+            "241": 174.0,
+            "242": 193.0,
+            "243": 192.0,
+            "244": 181.0,
+            "245": 206.0,
+            "246": 221.0,
+            "247": 219.0,
+            "248": 175.0,
+            "249": 189.0,
+            "250": 156.0,
+            "251": 205.0,
+            "252": 164.0,
+            "253": 172.0,
+            "254": 184.0,
+            "255": 218.0,
+            "256": 171.0,
+            "257": 208.0,
+            "258": 210.0,
+            "259": 174.0,
+            "260": 199.0,
+            "261": 178.0,
+            "262": 185.0,
+            "263": 181.0,
+            "264": 200.0,
+            "265": 171.0,
+            "266": 149.0,
+            "267": 141.0,
+            "268": 186.0,
+            "269": 198.0,
+            "270": 170.0,
+            "271": 168.0,
+            "272": 210.0,
+            "273": 151.0,
+            "274": 212.0,
+            "275": 182.0,
+            "276": 172.0,
+            "277": 159.0,
+            "278": 169.0,
+            "279": 185.0,
+            "280": 174.0,
+            "281": 160.0,
+            "282": 171.0,
+            "283": 174.0,
+            "284": 183.0,
+            "285": 169.0,
+            "286": 173.0,
+            "287": 203.0,
+            "288": 168.0,
+            "289": 202.0,
+            "290": 157.0,
+            "291": 241.0,
+            "292": 172.0,
+            "293": 209.0,
+            "294": 194.0,
+            "295": 207.0,
+            "296": 217.0,
+            "297": 160.0,
+            "298": 126.0,
+            "299": 170.0,
+            "300": 177.0,
+            "301": 189.0,
+            "302": 209.0,
+            "303": 170.0,
+            "304": 177.0,
+            "305": 148.0,
+            "306": 172.0,
+            "307": 213.0,
+            "308": 184.0,
+            "309": 193.0,
+            "310": 218.0,
+            "311": 159.0,
+            "312": 178.0,
+            "313": 177.0,
+            "314": 199.0,
+            "315": 165.0,
+            "316": 168.0,
+            "317": 185.0,
+            "318": 261.0,
+            "319": 181.0,
+            "320": 196.0,
+            "321": 200.0,
+            "322": 217.0,
+            "323": 198.0,
+            "324": 200.0,
+            "325": 184.0,
+            "326": 283.0,
+            "327": 211.0,
+            "328": 231.0,
+            "329": 189.0,
+            "330": 248.0,
+            "331": 205.0,
+            "332": 208.0,
+            "333": 199.0,
+            "334": 182.0,
+            "335": 202.0,
+            "336": 207.0,
+            "337": 216.0,
+            "338": 231.0,
+            "339": 213.0,
+            "340": 240.0,
+            "341": 207.0,
+            "342": 153.0,
+            "343": 264.0,
+            "344": 214.0,
+            "345": 202.0,
+            "346": 183.0,
+            "347": 194.0,
+            "348": 216.0,
+            "349": 206.0,
+            "350": 218.0,
+            "351": 218.0,
+            "352": 207.0,
+            "353": 225.0,
+            "354": 213.0,
+            "355": 201.0,
+            "356": 227.0,
+            "357": 217.0,
+            "358": 206.0,
+            "359": 186.0,
+            "360": 217.0,
+            "361": 187.0,
+            "362": 256.0,
+            "363": 226.0,
+            "364": 203.0,
+            "365": 200.0,
+            "366": 241.0,
+            "367": 205.0,
+            "368": 192.0,
+            "369": 160.0,
+            "370": 221.0,
+            "371": 212.0,
+            "372": 193.0,
+            "373": 218.0,
+            "374": 164.0,
+            "375": 249.0,
+            "376": 195.0,
+            "377": 197.0,
+            "378": 222.0,
+            "379": 254.0,
+            "380": 210.0,
+            "381": 199.0,
+            "382": 217.0,
+            "383": 208.0,
+            "384": 238.0,
+            "385": 183.0,
+            "386": 221.0,
+            "387": 185.0,
+            "388": 205.0,
+            "389": 185.0,
+            "390": 217.0,
+            "391": 241.0,
+            "392": 212.0,
+            "393": 247.0,
+            "394": 242.0,
+            "395": 247.0,
+            "396": 197.0,
+            "397": 202.0,
+            "398": 191.0,
+            "399": 231.0,
+            "400": 211.0,
+            "401": 200.0,
+            "402": 210.0,
+            "403": 261.0,
+            "404": 211.0,
+            "405": 171.0,
+            "406": 209.0,
+            "407": 200.0,
+            "408": 226.0,
+            "409": 200.0,
+            "410": 220.0,
+            "411": 196.0,
+            "412": 194.0,
+            "413": 168.0,
+            "414": 223.0,
+            "415": 204.0,
+            "416": 225.0,
+            "417": 213.0,
+            "418": 196.0,
+            "419": 203.0,
+            "420": 203.0,
+            "421": 217.0,
+            "422": 200.0,
+            "423": 213.0,
+            "424": 237.0,
+            "425": 239.0,
+            "426": 178.0,
+            "427": 213.0,
+            "428": 196.0,
+            "429": 174.0,
+            "430": 243.0,
+            "431": 169.0,
+            "432": 203.0,
+            "433": 211.0,
+            "434": 194.0,
+            "435": 188.0,
+            "436": 208.0,
+            "437": 170.0,
+            "438": 194.0,
+            "439": 156.0,
+            "440": 199.0,
+            "441": 190.0,
+            "442": 232.0,
+            "443": 225.0,
+            "444": 172.0,
+            "445": 194.0,
+            "446": 221.0,
+            "447": 209.0,
+            "448": 233.0,
+            "449": 257.0,
+            "450": 207.0,
+            "451": 199.0,
+            "452": 177.0,
+            "453": 200.0,
+            "454": 227.0,
+            "455": 263.0,
+            "456": 196.0,
+            "457": 204.0,
+            "458": 169.0,
+            "459": 131.0,
+            "460": 216.0,
+            "461": 223.0,
+            "462": 210.0,
+            "463": 203.0,
+            "464": 208.0,
+            "465": 187.0,
+            "466": 190.0,
+            "467": 192.0,
+            "468": 194.0,
+            "469": 188.0,
+            "470": 193.0,
+            "471": 221.0,
+            "472": 166.0,
+            "473": 191.0,
+            "474": 193.0,
+            "475": 196.0,
+            "476": 192.0,
+            "477": 168.0,
+            "478": 180.0,
+            "479": 176.0,
+            "480": 145.0,
+            "481": 197.0,
+            "482": 167.0,
+            "483": 198.0,
+            "484": 172.0,
+            "485": 175.0,
+            "486": 192.0,
+            "487": 143.0,
+            "488": 182.0,
+            "489": 172.0,
+            "490": 178.0,
+            "491": 175.0,
+            "492": 194.0,
+            "493": 211.0,
+            "494": 159.0,
+            "495": 165.0,
+            "496": 153.0,
+            "497": 145.0,
+            "498": 196.0,
+            "499": 195.0,
+            "500": 165.0,
+            "501": 183.0,
+            "502": 167.0,
+            "503": 175.0,
+            "504": 182.0,
+            "505": 212.0,
+            "506": 177.0,
+            "507": 159.0,
+            "508": 135.0,
+            "509": 195.0,
+            "510": 156.0,
+            "511": 186.0,
+            "512": 177.0,
+            "513": 186.0,
+            "514": 173.0,
+            "515": 190.0,
+            "516": 175.0,
+            "517": 143.0,
+            "518": 169.0,
+            "519": 186.0,
+            "520": 156.0,
+            "521": 146.0,
+            "522": 173.0,
+            "523": 175.0,
+            "524": 172.0,
+            "525": 202.0,
+            "526": 168.0,
+            "527": 178.0,
+            "528": 173.0,
+            "529": 183.0,
+            "530": 168.0,
+            "531": 161.0,
+            "532": 185.0,
+            "533": 172.0,
+            "534": 166.0,
+            "535": 140.0,
+            "536": 164.0,
+            "537": 150.0,
+            "538": 155.0,
+            "539": 125.0,
+            "540": 151.0,
+            "541": 130.0,
+            "542": 153.0,
+            "543": 149.0,
+            "544": 185.0,
+            "545": 132.0,
+            "546": 184.0,
+            "547": 150.0,
+            "548": 155.0,
+            "549": 162.0,
+            "550": 170.0,
+            "551": 144.0,
+            "552": 147.0,
+            "553": 213.0,
+            "554": 182.0,
+            "555": 150.0,
+            "556": 162.0,
+            "557": 154.0,
+            "558": 181.0,
+            "559": 144.0,
+            "560": 194.0,
+            "561": 174.0,
+            "562": 147.0,
+            "563": 125.0,
+            "564": 169.0,
+            "565": 143.0,
+            "566": 136.0,
+            "567": 144.0,
+            "568": 153.0,
+            "569": 167.0,
+            "570": 153.0,
+            "571": 131.0,
+            "572": 143.0,
+            "573": 128.0,
+            "574": 162.0,
+            "575": 133.0,
+            "576": 143.0,
+            "577": 171.0,
+            "578": 167.0,
+            "579": 140.0,
+            "580": 165.0,
+            "581": 164.0,
+            "582": 145.0,
+            "583": 151.0,
+            "584": 146.0,
+            "585": 148.0,
+            "586": 102.0,
+            "587": 147.0,
+            "588": 146.0,
+            "589": 123.0,
+            "590": 146.0,
+            "591": 149.0,
+            "592": 115.0,
+            "593": 166.0,
+            "594": 159.0,
+            "595": 127.0,
+            "596": 113.0,
+            "597": 135.0,
+            "598": 139.0,
+            "599": 157.0,
+            "600": 129.0,
+            "601": 144.0,
+            "602": 129.0,
+            "603": 125.0,
+            "604": 125.0,
+            "605": 139.0,
+            "606": 135.0,
+            "607": 144.0,
+            "608": 149.0,
+            "609": 139.0,
+            "610": 135.0,
+            "611": 148.0,
+            "612": 148.0,
+            "613": 115.0,
+            "614": 150.0,
+            "615": 132.0,
+            "616": 156.0,
+            "617": 120.0,
+            "618": 145.0,
+            "619": 136.0,
+            "620": 170.0,
+            "621": 147.0,
+            "622": 150.0,
+            "623": 119.0,
+            "624": 128.0,
+            "625": 141.0,
+            "626": 122.0,
+            "627": 121.0,
+            "628": 157.0,
+            "629": 126.0,
+            "630": 134.0,
+            "631": 147.0,
+            "632": 146.0,
+            "633": 131.0,
+            "634": 145.0,
+            "635": 174.0,
+            "636": 151.0,
+            "637": 169.0,
+            "638": 128.0,
+            "639": 164.0,
+            "640": 145.0,
+            "641": 136.0,
+            "642": 132.0,
+            "643": 134.0,
+            "644": 124.0,
+            "645": 145.0,
+            "646": 106.0,
+            "647": 123.0,
+            "648": 121.0,
+            "649": 134.0,
+            "650": 153.0,
+            "651": 117.0,
+            "652": 163.0,
+            "653": 155.0,
+            "654": 140.0,
+            "655": 154.0,
+            "656": 124.0,
+            "657": 116.0,
+            "658": 130.0,
+            "659": 114.0,
+            "660": 145.0,
+            "661": 121.0,
+            "662": 143.0,
+            "663": 124.0,
+            "664": 139.0,
+            "665": 138.0,
+            "666": 111.0,
+            "667": 127.0,
+            "668": 144.0,
+            "669": 116.0,
+            "670": 139.0,
+            "671": 132.0,
+            "672": 136.0,
+            "673": 139.0,
+            "674": 119.0,
+            "675": 165.0,
+            "676": 123.0,
+            "677": 127.0,
+            "678": 135.0,
+            "679": 83.0,
+            "680": 139.0,
+            "681": 120.0,
+            "682": 111.0,
+            "683": 119.0,
+            "684": 121.0,
+            "685": 145.0,
+            "686": 127.0,
+            "687": 145.0,
+            "688": 117.0,
+            "689": 119.0,
+            "690": 119.0,
+            "691": 124.0,
+            "692": 118.0,
+            "693": 112.0,
+            "694": 156.0,
+            "695": 114.0,
+            "696": 141.0,
+            "697": 123.0,
+            "698": 130.0,
+            "699": 147.0,
+            "700": 119.0,
+            "701": 139.0,
+            "702": 111.0,
+            "703": 113.0,
+            "704": 118.0,
+            "705": 115.0,
+            "706": 102.0,
+            "707": 121.0,
+            "708": 115.0,
+            "709": 116.0,
+            "710": 95.0,
+            "711": 101.0,
+            "712": 98.0,
+            "713": 117.0,
+            "714": 127.0,
+            "715": 135.0,
+            "716": 124.0,
+            "717": 88.0,
+            "718": 143.0,
+            "719": 114.0,
+            "720": 120.0,
+            "721": 106.0,
+            "722": 117.0,
+            "723": 101.0,
+            "724": 97.0,
+            "725": 106.0,
+            "726": 103.0,
+            "727": 95.0,
+            "728": 123.0,
+            "729": 104.0,
+            "730": 124.0,
+            "731": 111.0,
+            "732": 78.0,
+            "733": 96.0,
+            "734": 129.0,
+            "735": 142.0,
+            "736": 110.0,
+            "737": 132.0,
+            "738": 110.0,
+            "739": 136.0,
+            "740": 106.0,
+            "741": 102.0,
+            "742": 123.0,
+            "743": 133.0,
+            "744": 130.0,
+            "745": 109.0,
+            "746": 122.0,
+            "747": 125.0,
+            "748": 133.0,
+            "749": 114.0,
+            "750": 121.0,
+            "751": 113.0,
+            "752": 111.0,
+            "753": 96.0,
+            "754": 118.0,
+            "755": 87.0,
+            "756": 113.0,
+            "757": 91.0,
+            "758": 105.0,
+            "759": 99.0,
+            "760": 125.0,
+            "761": 106.0,
+            "762": 105.0,
+            "763": 101.0,
+            "764": 109.0,
+            "765": 118.0,
+            "766": 95.0,
+            "767": 133.0,
+            "768": 115.0,
+            "769": 122.0,
+            "770": 106.0,
+            "771": 123.0,
+            "772": 106.0,
+            "773": 136.0,
+            "774": 128.0,
+            "775": 116.0,
+            "776": 112.0,
+            "777": 95.0,
+            "778": 113.0,
+            "779": 119.0,
+            "780": 99.0,
+            "781": 107.0,
+            "782": 80.0,
+            "783": 108.0,
+            "784": 122.0,
+            "785": 111.0,
+            "786": 111.0,
+            "787": 115.0,
+            "788": 116.0,
+            "789": 108.0,
+            "790": 127.0,
+            "791": 83.0,
+            "792": 117.0,
+            "793": 102.0,
+            "794": 106.0,
+            "795": 123.0,
+            "796": 121.0,
+            "797": 124.0,
+            "798": 112.0,
+            "799": 136.0,
+            "800": 99.0,
+            "801": 117.0,
+            "802": 93.0,
+            "803": 166.0,
+            "804": 127.0,
+            "805": 124.0,
+            "806": 97.0,
+            "807": 134.0,
+            "808": 108.0,
+            "809": 121.0,
+            "810": 126.0,
+            "811": 107.0,
+            "812": 116.0,
+            "813": 126.0,
+            "814": 105.0,
+            "815": 98.0,
+            "816": 99.0,
+            "817": 97.0,
+            "818": 97.0,
+            "819": 109.0,
+            "820": 106.0,
+            "821": 88.0,
+            "822": 109.0,
+            "823": 108.0,
+            "824": 127.0,
+            "825": 108.0,
+            "826": 128.0,
+            "827": 134.0,
+            "828": 100.0,
+            "829": 125.0,
+            "830": 113.0,
+            "831": 114.0,
+            "832": 107.0,
+            "833": 113.0,
+            "834": 100.0,
+            "835": 98.0,
+            "836": 123.0,
+            "837": 95.0,
+            "838": 118.0,
+            "839": 96.0,
+            "840": 109.0,
+            "841": 98.0,
+            "842": 114.0,
+            "843": 113.0,
+            "844": 123.0,
+            "845": 108.0,
+            "846": 124.0,
+            "847": 112.0,
+            "848": 115.0,
+            "849": 118.0,
+            "850": 92.0,
+            "851": 145.0,
+            "852": 89.0,
+            "853": 106.0,
+            "854": 101.0,
+            "855": 113.0,
+            "856": 125.0,
+            "857": 105.0,
+            "858": 129.0,
+            "859": 107.0,
+            "860": 118.0,
+            "861": 85.0,
+            "862": 106.0,
+            "863": 95.0,
+            "864": 81.0,
+            "865": 104.0,
+            "866": 105.0,
+            "867": 104.0,
+            "868": 106.0,
+            "869": 109.0,
+            "870": 105.0,
+            "871": 122.0,
+            "872": 114.0,
+            "873": 100.0,
+            "874": 113.0,
+            "875": 108.0,
+            "876": 93.0,
+            "877": 130.0,
+            "878": 110.0,
+            "879": 122.0,
+            "880": 106.0,
+            "881": 103.0,
+            "882": 80.0,
+            "883": 107.0,
+            "884": 115.0,
+            "885": 113.0,
+            "886": 116.0,
+            "887": 131.0,
+            "888": 89.0,
+            "889": 120.0,
+            "890": 110.0,
+            "891": 103.0,
+            "892": 102.0,
+            "893": 106.0,
+            "894": 91.0,
+            "895": 118.0,
+            "896": 110.0,
+            "897": 103.0,
+            "898": 115.0,
+            "899": 119.0,
+            "900": 120.0,
+            "901": 99.0,
+            "902": 100.0,
+            "903": 102.0,
+            "904": 127.0,
+            "905": 105.0,
+            "906": 124.0,
+            "907": 104.0,
+            "908": 117.0,
+            "909": 124.0,
+            "910": 108.0,
+            "911": 102.0,
+            "912": 117.0,
+            "913": 122.0,
+            "914": 130.0,
+            "915": 98.0,
+            "916": 120.0,
+            "917": 113.0,
+            "918": 112.0,
+            "919": 85.0,
+            "920": 110.0,
+            "921": 108.0,
+            "922": 111.0,
+            "923": 116.0,
+            "924": 119.0,
+            "925": 105.0,
+            "926": 128.0,
+            "927": 120.0,
+            "928": 106.0,
+            "929": 94.0,
+            "930": 116.0,
+            "931": 102.0,
+            "932": 123.0,
+            "933": 114.0,
+            "934": 133.0,
+            "935": 86.0,
+            "936": 114.0,
+            "937": 96.0,
+            "938": 118.0,
+            "939": 111.0,
+            "940": 110.0,
+            "941": 102.0,
+            "942": 98.0,
+            "943": 119.0,
+            "944": 107.0,
+            "945": 106.0,
+            "946": 112.0,
+            "947": 93.0,
+            "948": 119.0,
+            "949": 116.0,
+            "950": 124.0,
+            "951": 112.0,
+            "952": 106.0,
+            "953": 97.0,
+            "954": 111.0,
+            "955": 112.0,
+            "956": 87.0,
+            "957": 117.0,
+            "958": 97.0,
+            "959": 91.0,
+            "960": 103.0,
+            "961": 102.0,
+            "962": 103.0,
+            "963": 127.0,
+            "964": 113.0,
+            "965": 120.0,
+            "966": 106.0,
+            "967": 104.0,
+            "968": 119.0,
+            "969": 89.0,
+            "970": 121.0,
+            "971": 115.0,
+            "972": 96.0,
+            "973": 90.0,
+            "974": 113.0,
+            "975": 109.0,
+            "976": 113.0,
+            "977": 85.0,
+            "978": 104.0,
+            "979": 109.0,
+            "980": 100.0,
+            "981": 94.0,
+            "982": 105.0,
+            "983": 84.0,
+            "984": 112.0,
+            "985": 108.0,
+            "986": 92.0,
+            "987": 88.0,
+            "988": 123.0,
+            "989": 106.0,
+            "990": 103.0,
+            "991": 128.0,
+            "992": 104.0,
+            "993": 109.0,
+            "994": 98.0,
+            "995": 104.0,
+            "996": 93.0,
+            "997": 128.0,
+            "998": 121.0,
+            "999": 89.0,
+            "1000": 118.0,
+            "1001": 104.0,
+            "1002": 96.0,
+            "1003": 107.0,
+            "1004": 88.0,
+            "1005": 103.0,
+            "1006": 105.0,
+            "1007": 102.0,
+            "1008": 83.0,
+            "1009": 117.0,
+            "1010": 104.0,
+            "1011": 127.0,
+            "1012": 117.0,
+            "1013": 106.0,
+            "1014": 111.0,
+            "1015": 110.0,
+            "1016": 91.0,
+            "1017": 76.0,
+            "1018": 115.0,
+            "1019": 123.0,
+            "1020": 111.0,
+            "1021": 106.0,
+            "1022": 108.0,
+            "1023": 137.0,
+            "1024": 122.0,
+            "1025": 104.0,
+            "1026": 109.0,
+            "1027": 92.0,
+            "1028": 96.0,
+            "1029": 116.0,
+            "1030": 96.0,
+            "1031": 122.0,
+            "1032": 103.0,
+            "1033": 108.0,
+            "1034": 111.0,
+            "1035": 86.0,
+            "1036": 74.0,
+            "1037": 123.0,
+            "1038": 85.0,
+            "1039": 128.0,
+            "1040": 95.0,
+            "1041": 116.0,
+            "1042": 107.0,
+            "1043": 96.0,
+            "1044": 116.0,
+            "1045": 115.0,
+            "1046": 92.0,
+            "1047": 106.0,
+            "1048": 88.0,
+            "1049": 121.0,
+            "1050": 117.0,
+            "1051": 105.0,
+            "1052": 96.0,
+            "1053": 98.0,
+            "1054": 85.0,
+            "1055": 110.0,
+            "1056": 91.0,
+            "1057": 109.0,
+            "1058": 95.0,
+            "1059": 106.0,
+            "1060": 109.0,
+            "1061": 97.0,
+            "1062": 105.0,
+            "1063": 91.0,
+            "1064": 103.0,
+            "1065": 108.0,
+            "1066": 112.0,
+            "1067": 108.0,
+            "1068": 108.0,
+            "1069": 123.0,
+            "1070": 100.0,
+            "1071": 95.0,
+            "1072": 111.0,
+            "1073": 118.0,
+            "1074": 101.0,
+            "1075": 95.0,
+            "1076": 111.0,
+            "1077": 89.0,
+            "1078": 94.0,
+            "1079": 113.0,
+            "1080": 82.0,
+            "1081": 114.0,
+            "1082": 87.0,
+            "1083": 116.0,
+            "1084": 105.0,
+            "1085": 97.0,
+            "1086": 119.0,
+            "1087": 86.0,
+            "1088": 93.0,
+            "1089": 114.0,
+            "1090": 87.0,
+            "1091": 109.0,
+            "1092": 90.0,
+            "1093": 109.0,
+            "1094": 101.0,
+            "1095": 90.0,
+            "1096": 106.0,
+            "1097": 100.0,
+            "1098": 105.0,
+            "1099": 96.0,
+            "1100": 92.0,
+            "1101": 108.0,
+            "1102": 94.0,
+            "1103": 86.0,
+            "1104": 103.0,
+            "1105": 109.0,
+            "1106": 87.0,
+            "1107": 87.0,
+            "1108": 96.0,
+            "1109": 102.0,
+            "1110": 89.0,
+            "1111": 76.0,
+            "1112": 110.0,
+            "1113": 104.0,
+            "1114": 89.0,
+            "1115": 114.0,
+            "1116": 97.0,
+            "1117": 108.0,
+            "1118": 107.0,
+            "1119": 118.0,
+            "1120": 112.0,
+            "1121": 96.0,
+            "1122": 103.0,
+            "1123": 112.0,
+            "1124": 98.0,
+            "1125": 97.0,
+            "1126": 121.0,
+            "1127": 80.0,
+            "1128": 91.0,
+            "1129": 106.0,
+            "1130": 96.0,
+            "1131": 82.0,
+            "1132": 103.0,
+            "1133": 86.0,
+            "1134": 92.0,
+            "1135": 98.0,
+            "1136": 90.0,
+            "1137": 120.0,
+            "1138": 102.0,
+            "1139": 109.0,
+            "1140": 88.0,
+            "1141": 90.0,
+            "1142": 95.0,
+            "1143": 88.0,
+            "1144": 77.0,
+            "1145": 92.0,
+            "1146": 85.0,
+            "1147": 108.0,
+            "1148": 77.0,
+            "1149": 93.0,
+            "1150": 101.0,
+            "1151": 116.0,
+            "1152": 72.0,
+            "1153": 90.0,
+            "1154": 103.0,
+            "1155": 106.0,
+            "1156": 91.0,
+            "1157": 100.0,
+            "1158": 101.0,
+            "1159": 111.0,
+            "1160": 114.0,
+            "1161": 90.0,
+            "1162": 92.0,
+            "1163": 90.0,
+            "1164": 96.0,
+            "1165": 100.0,
+            "1166": 114.0,
+            "1167": 82.0,
+            "1168": 96.0,
+            "1169": 77.0,
+            "1170": 91.0,
+            "1171": 94.0,
+            "1172": 99.0,
+            "1173": 124.0,
+            "1174": 106.0,
+            "1175": 97.0,
+            "1176": 102.0,
+            "1177": 78.0,
+            "1178": 108.0,
+            "1179": 103.0,
+            "1180": 84.0,
+            "1181": 76.0,
+            "1182": 115.0,
+            "1183": 104.0,
+            "1184": 122.0,
+            "1185": 104.0,
+            "1186": 104.0,
+            "1187": 91.0,
+            "1188": 112.0,
+            "1189": 101.0,
+            "1190": 106.0,
+            "1191": 97.0,
+            "1192": 90.0,
+            "1193": 105.0,
+            "1194": 99.0,
+            "1195": 118.0,
+            "1196": 120.0,
+            "1197": 93.0,
+            "1198": 101.0,
+            "1199": 103.0,
+            "1200": 90.0,
+            "1201": 108.0,
+            "1202": 120.0,
+            "1203": 90.0,
+            "1204": 98.0,
+            "1205": 113.0,
+            "1206": 102.0,
+            "1207": 116.0,
+            "1208": 104.0,
+            "1209": 85.0,
+            "1210": 101.0,
+            "1211": 87.0,
+            "1212": 100.0,
+            "1213": 109.0,
+            "1214": 92.0,
+            "1215": 103.0,
+            "1216": 117.0,
+            "1217": 102.0,
+            "1218": 135.0,
+            "1219": 95.0,
+            "1220": 122.0,
+            "1221": 121.0,
+            "1222": 109.0,
+            "1223": 103.0,
+            "1224": 93.0,
+            "1225": 107.0,
+            "1226": 82.0,
+            "1227": 108.0,
+            "1228": 106.0,
+            "1229": 87.0,
+            "1230": 97.0,
+            "1231": 109.0,
+            "1232": 95.0,
+            "1233": 99.0,
+            "1234": 107.0,
+            "1235": 105.0,
+            "1236": 101.0,
+            "1237": 110.0,
+            "1238": 102.0,
+            "1239": 118.0,
+            "1240": 114.0,
+            "1241": 119.0,
+            "1242": 90.0,
+            "1243": 104.0,
+            "1244": 102.0,
+            "1245": 105.0,
+            "1246": 104.0,
+            "1247": 121.0,
+            "1248": 104.0,
+            "1249": 129.0,
+            "1250": 111.0,
+            "1251": 91.0,
+            "1252": 120.0,
+            "1253": 121.0,
+            "1254": 110.0,
+            "1255": 113.0,
+            "1256": 97.0,
+            "1257": 114.0,
+            "1258": 110.0,
+            "1259": 106.0,
+            "1260": 93.0,
+            "1261": 104.0,
+            "1262": 109.0,
+            "1263": 104.0,
+            "1264": 101.0,
+            "1265": 85.0,
+            "1266": 106.0,
+            "1267": 104.0,
+            "1268": 90.0,
+            "1269": 102.0,
+            "1270": 106.0,
+            "1271": 107.0,
+            "1272": 79.0,
+            "1273": 85.0,
+            "1274": 99.0,
+            "1275": 127.0,
+            "1276": 89.0,
+            "1277": 144.0,
+            "1278": 109.0,
+            "1279": 110.0,
+            "1280": 123.0,
+            "1281": 98.0,
+            "1282": 94.0,
+            "1283": 110.0,
+            "1284": 88.0,
+            "1285": 112.0,
+            "1286": 106.0,
+            "1287": 86.0,
+            "1288": 100.0,
+            "1289": 118.0,
+            "1290": 109.0,
+            "1291": 82.0,
+            "1292": 106.0,
+            "1293": 97.0,
+            "1294": 96.0,
+            "1295": 91.0,
+            "1296": 110.0,
+            "1297": 120.0,
+            "1298": 105.0,
+            "1299": 114.0,
+            "1300": 113.0,
+            "1301": 106.0,
+            "1302": 112.0,
+            "1303": 102.0,
+            "1304": 94.0,
+            "1305": 109.0,
+            "1306": 83.0,
+            "1307": 97.0,
+            "1308": 120.0,
+            "1309": 126.0,
+            "1310": 103.0,
+            "1311": 126.0,
+            "1312": 100.0,
+            "1313": 101.0,
+            "1314": 107.0,
+            "1315": 117.0,
+            "1316": 101.0,
+            "1317": 107.0,
+            "1318": 103.0,
+            "1319": 98.0,
+            "1320": 103.0,
+            "1321": 112.0,
+            "1322": 86.0,
+            "1323": 117.0,
+            "1324": 94.0,
+            "1325": 94.0,
+            "1326": 139.0,
+            "1327": 82.0,
+            "1328": 124.0,
+            "1329": 103.0,
+            "1330": 91.0,
+            "1331": 94.0,
+            "1332": 106.0,
+            "1333": 86.0,
+            "1334": 86.0,
+            "1335": 96.0,
+            "1336": 113.0,
+            "1337": 114.0,
+            "1338": 126.0,
+            "1339": 104.0,
+            "1340": 101.0,
+            "1341": 83.0,
+            "1342": 106.0,
+            "1343": 122.0,
+            "1344": 99.0,
+            "1345": 93.0,
+            "1346": 110.0,
+            "1347": 105.0,
+            "1348": 104.0,
+            "1349": 103.0,
+            "1350": 111.0,
+            "1351": 121.0,
+            "1352": 106.0,
+            "1353": 108.0,
+            "1354": 108.0,
+            "1355": 92.0,
+            "1356": 89.0,
+            "1357": 103.0,
+            "1358": 120.0,
+            "1359": 110.0,
+            "1360": 125.0,
+            "1361": 116.0,
+            "1362": 133.0,
+            "1363": 103.0,
+            "1364": 109.0,
+            "1365": 101.0,
+            "1366": 100.0,
+            "1367": 93.0,
+            "1368": 108.0,
+            "1369": 127.0,
+            "1370": 99.0,
+            "1371": 121.0,
+            "1372": 116.0,
+            "1373": 110.0,
+            "1374": 94.0,
+            "1375": 107.0,
+            "1376": 104.0,
+            "1377": 115.0,
+            "1378": 100.0,
+            "1379": 106.0,
+            "1380": 88.0,
+            "1381": 103.0,
+            "1382": 101.0,
+            "1383": 118.0,
+            "1384": 120.0,
+            "1385": 117.0,
+            "1386": 123.0,
+            "1387": 93.0,
+            "1388": 86.0,
+            "1389": 119.0,
+            "1390": 116.0,
+            "1391": 103.0,
+            "1392": 84.0,
+            "1393": 100.0,
+            "1394": 112.0,
+            "1395": 77.0,
+            "1396": 101.0,
+            "1397": 124.0,
+            "1398": 104.0,
+            "1399": 120.0,
+            "1400": 103.0,
+            "1401": 100.0,
+            "1402": 105.0,
+            "1403": 82.0,
+            "1404": 104.0,
+            "1405": 93.0,
+            "1406": 102.0,
+            "1407": 118.0,
+            "1408": 100.0,
+            "1409": 114.0,
+            "1410": 85.0,
+            "1411": 101.0,
+            "1412": 99.0,
+            "1413": 117.0,
+            "1414": 116.0,
+            "1415": 115.0,
+            "1416": 90.0,
+            "1417": 99.0,
+            "1418": 97.0,
+            "1419": 96.0,
+            "1420": 119.0,
+            "1421": 108.0,
+            "1422": 113.0,
+            "1423": 91.0,
+            "1424": 123.0,
+            "1425": 101.0,
+            "1426": 110.0,
+            "1427": 107.0,
+            "1428": 116.0,
+            "1429": 128.0,
+            "1430": 87.0,
+            "1431": 96.0,
+            "1432": 113.0,
+            "1433": 92.0,
+            "1434": 101.0,
+            "1435": 101.0,
+            "1436": 111.0,
+            "1437": 122.0,
+            "1438": 105.0,
+            "1439": 99.0,
+            "1440": 101.0,
+            "1441": 104.0,
+            "1442": 89.0,
+            "1443": 109.0,
+            "1444": 86.0,
+            "1445": 100.0,
+            "1446": 87.0,
+            "1447": 105.0,
+            "1448": 102.0,
+            "1449": 88.0,
+            "1450": 100.0,
+            "1451": 94.0,
+            "1452": 95.0,
+            "1453": 116.0,
+            "1454": 98.0,
+            "1455": 92.0,
+            "1456": 91.0,
+            "1457": 132.0,
+            "1458": 121.0,
+            "1459": 109.0,
+            "1460": 111.0,
+            "1461": 111.0,
+            "1462": 89.0,
+            "1463": 99.0,
+            "1464": 108.0,
+            "1465": 97.0,
+            "1466": 87.0,
+            "1467": 99.0,
+            "1468": 127.0,
+            "1469": 88.0,
+            "1470": 103.0,
+            "1471": 101.0,
+            "1472": 106.0,
+            "1473": 120.0,
+            "1474": 96.0,
+            "1475": 123.0,
+            "1476": 85.0,
+            "1477": 122.0,
+            "1478": 107.0,
+            "1479": 113.0,
+            "1480": 109.0,
+            "1481": 107.0,
+            "1482": 118.0,
+            "1483": 86.0,
+            "1484": 98.0,
+            "1485": 91.0,
+            "1486": 96.0,
+            "1487": 119.0,
+            "1488": 106.0,
+            "1489": 93.0,
+            "1490": 113.0,
+            "1491": 107.0,
+            "1492": 100.0,
+            "1493": 123.0,
+            "1494": 105.0,
+            "1495": 121.0,
+            "1496": 105.0,
+            "1497": 99.0,
+            "1498": 112.0,
+            "1499": 106.0,
+            "1500": 104.0,
+            "1501": 129.0,
+            "1502": 109.0,
+            "1503": 91.0,
+            "1504": 111.0,
+            "1505": 97.0,
+            "1506": 116.0,
+            "1507": 122.0,
+            "1508": 103.0,
+            "1509": 141.0,
+            "1510": 86.0,
+            "1511": 120.0,
+            "1512": 120.0,
+            "1513": 128.0,
+            "1514": 100.0,
+            "1515": 108.0,
+            "1516": 99.0,
+            "1517": 109.0,
+            "1518": 106.0,
+            "1519": 88.0,
+            "1520": 89.0,
+            "1521": 101.0,
+            "1522": 112.0,
+            "1523": 88.0,
+            "1524": 113.0,
+            "1525": 94.0,
+            "1526": 110.0,
+            "1527": 112.0,
+            "1528": 84.0,
+            "1529": 91.0,
+            "1530": 114.0,
+            "1531": 113.0,
+            "1532": 119.0,
+            "1533": 95.0,
+            "1534": 112.0,
+            "1535": 112.0,
+            "1536": 109.0,
+            "1537": 97.0,
+            "1538": 111.0,
+            "1539": 115.0,
+            "1540": 114.0,
+            "1541": 88.0,
+            "1542": 126.0,
+            "1543": 97.0,
+            "1544": 84.0,
+            "1545": 105.0,
+            "1546": 82.0,
+            "1547": 93.0,
+            "1548": 90.0,
+            "1549": 99.0,
+            "1550": 93.0,
+            "1551": 98.0,
+            "1552": 86.0,
+            "1553": 120.0,
+            "1554": 109.0,
+            "1555": 111.0,
+            "1556": 98.0,
+            "1557": 90.0,
+            "1558": 120.0,
+            "1559": 84.0,
+            "1560": 107.0,
+            "1561": 103.0,
+            "1562": 121.0,
+            "1563": 116.0,
+            "1564": 113.0,
+            "1565": 114.0,
+            "1566": 113.0,
+            "1567": 102.0,
+            "1568": 91.0,
+            "1569": 122.0,
+            "1570": 95.0,
+            "1571": 115.0,
+            "1572": 102.0,
+            "1573": 100.0,
+            "1574": 121.0,
+            "1575": 108.0,
+            "1576": 88.0,
+            "1577": 116.0,
+            "1578": 101.0,
+            "1579": 98.0,
+            "1580": 114.0,
+            "1581": 102.0,
+            "1582": 108.0,
+            "1583": 115.0,
+            "1584": 70.0,
+            "1585": 112.0,
+            "1586": 120.0,
+            "1587": 101.0,
+            "1588": 118.0,
+            "1589": 99.0,
+            "1590": 103.0,
+            "1591": 108.0,
+            "1592": 106.0,
+            "1593": 121.0,
+            "1594": 110.0,
+            "1595": 103.0,
+            "1596": 117.0,
+            "1597": 115.0,
+            "1598": 105.0,
+            "1599": 76.0,
+            "1600": 90.0,
+            "1601": 108.0,
+            "1602": 105.0,
+            "1603": 122.0,
+            "1604": 113.0,
+            "1605": 122.0,
+            "1606": 117.0,
+            "1607": 92.0,
+            "1608": 118.0,
+            "1609": 115.0,
+            "1610": 103.0,
+            "1611": 117.0,
+            "1612": 106.0,
+            "1613": 106.0,
+            "1614": 104.0,
+            "1615": 114.0,
+            "1616": 88.0,
+            "1617": 97.0,
+            "1618": 111.0,
+            "1619": 107.0,
+            "1620": 112.0,
+            "1621": 91.0,
+            "1622": 130.0,
+            "1623": 109.0,
+            "1624": 102.0,
+            "1625": 121.0,
+            "1626": 100.0,
+            "1627": 119.0,
+            "1628": 99.0,
+            "1629": 119.0,
+            "1630": 117.0,
+            "1631": 105.0,
+            "1632": 116.0,
+            "1633": 112.0,
+            "1634": 120.0,
+            "1635": 99.0,
+            "1636": 105.0,
+            "1637": 94.0,
+            "1638": 107.0,
+            "1639": 97.0,
+            "1640": 106.0,
+            "1641": 120.0,
+            "1642": 101.0,
+            "1643": 135.0,
+            "1644": 117.0,
+            "1645": 110.0,
+            "1646": 106.0,
+            "1647": 127.0,
+            "1648": 82.0,
+            "1649": 114.0,
+            "1650": 121.0,
+            "1651": 107.0,
+            "1652": 100.0,
+            "1653": 108.0,
+            "1654": 114.0,
+            "1655": 92.0,
+            "1656": 80.0,
+            "1657": 110.0,
+            "1658": 114.0,
+            "1659": 105.0,
+            "1660": 104.0,
+            "1661": 102.0,
+            "1662": 124.0,
+            "1663": 96.0,
+            "1664": 127.0,
+            "1665": 89.0,
+            "1666": 115.0,
+            "1667": 114.0,
+            "1668": 122.0,
+            "1669": 94.0,
+            "1670": 114.0,
+            "1671": 102.0,
+            "1672": 99.0,
+            "1673": 109.0,
+            "1674": 117.0,
+            "1675": 105.0,
+            "1676": 116.0,
+            "1677": 101.0,
+            "1678": 110.0,
+            "1679": 112.0,
+            "1680": 96.0,
+            "1681": 93.0,
+            "1682": 97.0,
+            "1683": 106.0,
+            "1684": 103.0,
+            "1685": 101.0,
+            "1686": 109.0,
+            "1687": 104.0,
+            "1688": 127.0,
+            "1689": 88.0,
+            "1690": 98.0,
+            "1691": 90.0,
+            "1692": 107.0,
+            "1693": 111.0,
+            "1694": 125.0,
+            "1695": 129.0,
+            "1696": 112.0,
+            "1697": 126.0,
+            "1698": 104.0,
+            "1699": 124.0,
+            "1700": 112.0,
+            "1701": 120.0,
+            "1702": 89.0,
+            "1703": 103.0,
+            "1704": 103.0,
+            "1705": 111.0,
+            "1706": 124.0,
+            "1707": 93.0,
+            "1708": 96.0,
+            "1709": 116.0,
+            "1710": 133.0,
+            "1711": 107.0,
+            "1712": 100.0,
+            "1713": 91.0,
+            "1714": 122.0,
+            "1715": 108.0,
+            "1716": 110.0,
+            "1717": 121.0,
+            "1718": 101.0,
+            "1719": 110.0,
+            "1720": 121.0,
+            "1721": 109.0,
+            "1722": 96.0,
+            "1723": 125.0,
+            "1724": 118.0,
+            "1725": 122.0,
+            "1726": 113.0,
+            "1727": 99.0,
+            "1728": 98.0,
+            "1729": 115.0,
+            "1730": 106.0,
+            "1731": 96.0,
+            "1732": 95.0,
+            "1733": 115.0,
+            "1734": 106.0,
+            "1735": 102.0,
+            "1736": 104.0,
+            "1737": 122.0,
+            "1738": 94.0,
+            "1739": 92.0,
+            "1740": 105.0,
+            "1741": 113.0,
+            "1742": 129.0,
+            "1743": 113.0,
+            "1744": 110.0,
+            "1745": 113.0,
+            "1746": 127.0,
+            "1747": 108.0,
+            "1748": 120.0,
+            "1749": 115.0,
+            "1750": 104.0,
+            "1751": 114.0,
+            "1752": 122.0,
+            "1753": 113.0,
+            "1754": 123.0,
+            "1755": 114.0,
+            "1756": 115.0,
+            "1757": 126.0,
+            "1758": 105.0,
+            "1759": 109.0,
+            "1760": 136.0,
+            "1761": 111.0,
+            "1762": 104.0,
+            "1763": 104.0,
+            "1764": 105.0,
+            "1765": 133.0,
+            "1766": 118.0,
+            "1767": 108.0,
+            "1768": 114.0,
+            "1769": 105.0,
+            "1770": 98.0,
+            "1771": 112.0,
+            "1772": 92.0,
+            "1773": 77.0,
+            "1774": 130.0,
+            "1775": 104.0,
+            "1776": 85.0,
+            "1777": 106.0,
+            "1778": 84.0,
+            "1779": 111.0,
+            "1780": 109.0,
+            "1781": 124.0,
+            "1782": 109.0,
+            "1783": 128.0,
+            "1784": 117.0,
+            "1785": 118.0,
+            "1786": 111.0,
+            "1787": 112.0,
+            "1788": 104.0,
+            "1789": 135.0,
+            "1790": 105.0,
+            "1791": 115.0,
+            "1792": 130.0,
+            "1793": 119.0,
+            "1794": 128.0,
+            "1795": 110.0,
+            "1796": 130.0,
+            "1797": 97.0,
+            "1798": 139.0,
+            "1799": 104.0,
+            "1800": 103.0,
+            "1801": 94.0,
+            "1802": 134.0,
+            "1803": 117.0,
+            "1804": 139.0,
+            "1805": 124.0,
+            "1806": 127.0,
+            "1807": 128.0,
+            "1808": 99.0,
+            "1809": 92.0,
+            "1810": 116.0,
+            "1811": 104.0,
+            "1812": 103.0,
+            "1813": 122.0,
+            "1814": 129.0,
+            "1815": 94.0,
+            "1816": 104.0,
+            "1817": 98.0,
+            "1818": 128.0,
+            "1819": 112.0,
+            "1820": 99.0,
+            "1821": 126.0,
+            "1822": 83.0,
+            "1823": 117.0,
+            "1824": 96.0,
+            "1825": 95.0,
+            "1826": 127.0,
+            "1827": 124.0,
+            "1828": 120.0,
+            "1829": 110.0,
+            "1830": 123.0,
+            "1831": 110.0,
+            "1832": 92.0,
+            "1833": 100.0,
+            "1834": 113.0,
+            "1835": 120.0,
+            "1836": 113.0,
+            "1837": 114.0,
+            "1838": 99.0,
+            "1839": 123.0,
+            "1840": 109.0,
+            "1841": 95.0,
+            "1842": 101.0,
+            "1843": 122.0,
+            "1844": 113.0,
+            "1845": 127.0,
+            "1846": 100.0,
+            "1847": 117.0,
+            "1848": 133.0,
+            "1849": 87.0,
+            "1850": 103.0,
+            "1851": 89.0,
+            "1852": 99.0,
+            "1853": 93.0,
+            "1854": 99.0,
+            "1855": 107.0,
+            "1856": 111.0,
+            "1857": 121.0,
+            "1858": 92.0,
+            "1859": 105.0,
+            "1860": 115.0,
+            "1861": 92.0,
+            "1862": 91.0,
+            "1863": 112.0,
+            "1864": 109.0,
+            "1865": 125.0,
+            "1866": 124.0,
+            "1867": 110.0,
+            "1868": 113.0,
+            "1869": 119.0,
+            "1870": 137.0,
+            "1871": 126.0,
+            "1872": 95.0,
+            "1873": 119.0,
+            "1874": 105.0,
+            "1875": 128.0,
+            "1876": 104.0,
+            "1877": 120.0,
+            "1878": 95.0,
+            "1879": 99.0,
+            "1880": 123.0,
+            "1881": 99.0,
+            "1882": 97.0,
+            "1883": 101.0,
+            "1884": 115.0,
+            "1885": 106.0,
+            "1886": 123.0,
+            "1887": 121.0,
+            "1888": 121.0,
+            "1889": 114.0,
+            "1890": 100.0,
+            "1891": 110.0,
+            "1892": 107.0,
+            "1893": 113.0,
+            "1894": 134.0,
+            "1895": 114.0,
+            "1896": 111.0,
+            "1897": 122.0,
+            "1898": 108.0,
+            "1899": 94.0,
+            "1900": 123.0,
+            "1901": 125.0,
+            "1902": 115.0,
+            "1903": 112.0,
+            "1904": 113.0,
+            "1905": 109.0,
+            "1906": 115.0,
+            "1907": 95.0,
+            "1908": 113.0,
+            "1909": 79.0,
+            "1910": 97.0,
+            "1911": 135.0,
+            "1912": 122.0,
+            "1913": 105.0,
+            "1914": 112.0,
+            "1915": 129.0,
+            "1916": 117.0,
+            "1917": 115.0,
+            "1918": 113.0,
+            "1919": 117.0,
+            "1920": 122.0,
+            "1921": 105.0,
+            "1922": 86.0,
+            "1923": 113.0,
+            "1924": 111.0,
+            "1925": 110.0,
+            "1926": 112.0,
+            "1927": 103.0,
+            "1928": 108.0,
+            "1929": 113.0,
+            "1930": 121.0,
+            "1931": 111.0,
+            "1932": 106.0,
+            "1933": 114.0,
+            "1934": 117.0,
+            "1935": 93.0,
+            "1936": 109.0,
+            "1937": 121.0,
+            "1938": 108.0,
+            "1939": 132.0,
+            "1940": 127.0,
+            "1941": 126.0,
+            "1942": 101.0,
+            "1943": 120.0,
+            "1944": 87.0,
+            "1945": 114.0,
+            "1946": 105.0,
+            "1947": 109.0,
+            "1948": 109.0,
+            "1949": 106.0,
+            "1950": 111.0,
+            "1951": 120.0,
+            "1952": 104.0,
+            "1953": 113.0,
+            "1954": 116.0,
+            "1955": 131.0,
+            "1956": 91.0,
+            "1957": 118.0,
+            "1958": 139.0,
+            "1959": 114.0,
+            "1960": 96.0,
+            "1961": 109.0,
+            "1962": 113.0,
+            "1963": 125.0,
+            "1964": 112.0,
+            "1965": 108.0,
+            "1966": 130.0,
+            "1967": 120.0,
+            "1968": 110.0,
+            "1969": 96.0,
+            "1970": 110.0,
+            "1971": 121.0,
+            "1972": 104.0,
+            "1973": 103.0,
+            "1974": 110.0,
+            "1975": 101.0,
+            "1976": 144.0,
+            "1977": 122.0,
+            "1978": 118.0,
+            "1979": 121.0,
+            "1980": 115.0,
+            "1981": 114.0,
+            "1982": 136.0,
+            "1983": 123.0,
+            "1984": 112.0,
+            "1985": 116.0,
+            "1986": 104.0,
+            "1987": 133.0,
+            "1988": 107.0,
+            "1989": 100.0,
+            "1990": 112.0,
+            "1991": 119.0,
+            "1992": 103.0,
+            "1993": 133.0,
+            "1994": 123.0,
+            "1995": 118.0,
+            "1996": 109.0,
+            "1997": 119.0,
+            "1998": 107.0,
+            "1999": 119.0,
+            "2000": 134.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 2000,
+        "step_interval": 1,
+        "values": {
+            "1": 442918400.0,
+            "2": 442918400.0,
+            "3": 442918400.0,
+            "4": 442918400.0,
+            "5": 442918400.0,
+            "6": 442918400.0,
+            "7": 442918400.0,
+            "8": 442918400.0,
+            "9": 442918400.0,
+            "10": 442918400.0,
+            "11": 442918400.0,
+            "12": 442918400.0,
+            "13": 442918400.0,
+            "14": 442918400.0,
+            "15": 442918400.0,
+            "16": 442918400.0,
+            "17": 442918400.0,
+            "18": 442918400.0,
+            "19": 442918400.0,
+            "20": 442918400.0,
+            "21": 442918400.0,
+            "22": 442918400.0,
+            "23": 442918400.0,
+            "24": 442918400.0,
+            "25": 442918400.0,
+            "26": 442918400.0,
+            "27": 442918400.0,
+            "28": 442918400.0,
+            "29": 442918400.0,
+            "30": 442918400.0,
+            "31": 442918400.0,
+            "32": 442918400.0,
+            "33": 442918400.0,
+            "34": 442918400.0,
+            "35": 442918400.0,
+            "36": 442918400.0,
+            "37": 442918400.0,
+            "38": 442918400.0,
+            "39": 442918400.0,
+            "40": 442918400.0,
+            "41": 442918400.0,
+            "42": 442918400.0,
+            "43": 442918400.0,
+            "44": 442918400.0,
+            "45": 442918400.0,
+            "46": 442918400.0,
+            "47": 442918400.0,
+            "48": 442918400.0,
+            "49": 442918400.0,
+            "50": 442918400.0,
+            "51": 442918400.0,
+            "52": 442918400.0,
+            "53": 442918400.0,
+            "54": 442918400.0,
+            "55": 442918400.0,
+            "56": 442918400.0,
+            "57": 442918400.0,
+            "58": 442918400.0,
+            "59": 442918400.0,
+            "60": 442918400.0,
+            "61": 442918400.0,
+            "62": 442918400.0,
+            "63": 442918400.0,
+            "64": 442918400.0,
+            "65": 442918400.0,
+            "66": 442918400.0,
+            "67": 442918400.0,
+            "68": 442918400.0,
+            "69": 442918400.0,
+            "70": 442918400.0,
+            "71": 442918400.0,
+            "72": 442918400.0,
+            "73": 442918400.0,
+            "74": 442918400.0,
+            "75": 442918400.0,
+            "76": 442918400.0,
+            "77": 442918400.0,
+            "78": 442918400.0,
+            "79": 442918400.0,
+            "80": 442918400.0,
+            "81": 442918400.0,
+            "82": 442918400.0,
+            "83": 442918400.0,
+            "84": 442918400.0,
+            "85": 442918400.0,
+            "86": 442918400.0,
+            "87": 442918400.0,
+            "88": 442918400.0,
+            "89": 442918400.0,
+            "90": 442918400.0,
+            "91": 442918400.0,
+            "92": 442918400.0,
+            "93": 442918400.0,
+            "94": 442918400.0,
+            "95": 442918400.0,
+            "96": 442918400.0,
+            "97": 442918400.0,
+            "98": 442918400.0,
+            "99": 442918400.0,
+            "100": 442918400.0,
+            "101": 442918400.0,
+            "102": 442918400.0,
+            "103": 442918400.0,
+            "104": 442918400.0,
+            "105": 442918400.0,
+            "106": 442918400.0,
+            "107": 442918400.0,
+            "108": 442918400.0,
+            "109": 442918400.0,
+            "110": 442918400.0,
+            "111": 442918400.0,
+            "112": 442918400.0,
+            "113": 442918400.0,
+            "114": 442918400.0,
+            "115": 442918400.0,
+            "116": 442918400.0,
+            "117": 442918400.0,
+            "118": 442918400.0,
+            "119": 442918400.0,
+            "120": 442918400.0,
+            "121": 442918400.0,
+            "122": 442918400.0,
+            "123": 442918400.0,
+            "124": 442918400.0,
+            "125": 442918400.0,
+            "126": 442918400.0,
+            "127": 442918400.0,
+            "128": 442918400.0,
+            "129": 442918400.0,
+            "130": 442918400.0,
+            "131": 442918400.0,
+            "132": 442918400.0,
+            "133": 442918400.0,
+            "134": 442918400.0,
+            "135": 442918400.0,
+            "136": 442918400.0,
+            "137": 442918400.0,
+            "138": 442918400.0,
+            "139": 442918400.0,
+            "140": 442918400.0,
+            "141": 442918400.0,
+            "142": 442918400.0,
+            "143": 442918400.0,
+            "144": 442918400.0,
+            "145": 442918400.0,
+            "146": 442918400.0,
+            "147": 442918400.0,
+            "148": 442918400.0,
+            "149": 442918400.0,
+            "150": 442918400.0,
+            "151": 442918400.0,
+            "152": 442918400.0,
+            "153": 442918400.0,
+            "154": 442918400.0,
+            "155": 442918400.0,
+            "156": 442918400.0,
+            "157": 442918400.0,
+            "158": 442918400.0,
+            "159": 442918400.0,
+            "160": 442918400.0,
+            "161": 442918400.0,
+            "162": 442918400.0,
+            "163": 442918400.0,
+            "164": 442918400.0,
+            "165": 442918400.0,
+            "166": 442918400.0,
+            "167": 442918400.0,
+            "168": 442918400.0,
+            "169": 442918400.0,
+            "170": 442918400.0,
+            "171": 442918400.0,
+            "172": 442918400.0,
+            "173": 442918400.0,
+            "174": 442918400.0,
+            "175": 442918400.0,
+            "176": 442918400.0,
+            "177": 442918400.0,
+            "178": 442918400.0,
+            "179": 442918400.0,
+            "180": 442918400.0,
+            "181": 442918400.0,
+            "182": 442918400.0,
+            "183": 442918400.0,
+            "184": 442918400.0,
+            "185": 442918400.0,
+            "186": 442918400.0,
+            "187": 442918400.0,
+            "188": 442918400.0,
+            "189": 442918400.0,
+            "190": 442918400.0,
+            "191": 442918400.0,
+            "192": 442918400.0,
+            "193": 442918400.0,
+            "194": 442918400.0,
+            "195": 442918400.0,
+            "196": 442918400.0,
+            "197": 442918400.0,
+            "198": 442918400.0,
+            "199": 442918400.0,
+            "200": 442918400.0,
+            "201": 442918400.0,
+            "202": 442918400.0,
+            "203": 442918400.0,
+            "204": 442918400.0,
+            "205": 442918400.0,
+            "206": 442918400.0,
+            "207": 442918400.0,
+            "208": 442918400.0,
+            "209": 442918400.0,
+            "210": 442918400.0,
+            "211": 442918400.0,
+            "212": 442918400.0,
+            "213": 442918400.0,
+            "214": 442918400.0,
+            "215": 442918400.0,
+            "216": 442918400.0,
+            "217": 442918400.0,
+            "218": 442918400.0,
+            "219": 442918400.0,
+            "220": 442918400.0,
+            "221": 442918400.0,
+            "222": 442918400.0,
+            "223": 442918400.0,
+            "224": 442918400.0,
+            "225": 442918400.0,
+            "226": 442918400.0,
+            "227": 442918400.0,
+            "228": 442918400.0,
+            "229": 442918400.0,
+            "230": 442918400.0,
+            "231": 442918400.0,
+            "232": 442918400.0,
+            "233": 442918400.0,
+            "234": 442918400.0,
+            "235": 442918400.0,
+            "236": 442918400.0,
+            "237": 442918400.0,
+            "238": 442918400.0,
+            "239": 442918400.0,
+            "240": 442918400.0,
+            "241": 442918400.0,
+            "242": 442918400.0,
+            "243": 442918400.0,
+            "244": 442918400.0,
+            "245": 442918400.0,
+            "246": 442918400.0,
+            "247": 442918400.0,
+            "248": 442918400.0,
+            "249": 442918400.0,
+            "250": 442918400.0,
+            "251": 442918400.0,
+            "252": 442918400.0,
+            "253": 442918400.0,
+            "254": 442918400.0,
+            "255": 442918400.0,
+            "256": 442918400.0,
+            "257": 442918400.0,
+            "258": 442918400.0,
+            "259": 442918400.0,
+            "260": 442918400.0,
+            "261": 442918400.0,
+            "262": 442918400.0,
+            "263": 442918400.0,
+            "264": 442918400.0,
+            "265": 442918400.0,
+            "266": 442918400.0,
+            "267": 442918400.0,
+            "268": 442918400.0,
+            "269": 442918400.0,
+            "270": 442918400.0,
+            "271": 442918400.0,
+            "272": 442918400.0,
+            "273": 442918400.0,
+            "274": 442918400.0,
+            "275": 442918400.0,
+            "276": 442918400.0,
+            "277": 442918400.0,
+            "278": 442918400.0,
+            "279": 442918400.0,
+            "280": 442918400.0,
+            "281": 442918400.0,
+            "282": 442918400.0,
+            "283": 442918400.0,
+            "284": 442918400.0,
+            "285": 442918400.0,
+            "286": 442918400.0,
+            "287": 442918400.0,
+            "288": 442918400.0,
+            "289": 442918400.0,
+            "290": 442918400.0,
+            "291": 442918400.0,
+            "292": 442918400.0,
+            "293": 442918400.0,
+            "294": 442918400.0,
+            "295": 442918400.0,
+            "296": 442918400.0,
+            "297": 442918400.0,
+            "298": 442918400.0,
+            "299": 442918400.0,
+            "300": 442918400.0,
+            "301": 442918400.0,
+            "302": 442918400.0,
+            "303": 442918400.0,
+            "304": 442918400.0,
+            "305": 442918400.0,
+            "306": 442918400.0,
+            "307": 442918400.0,
+            "308": 442918400.0,
+            "309": 442918400.0,
+            "310": 442918400.0,
+            "311": 442918400.0,
+            "312": 442918400.0,
+            "313": 442918400.0,
+            "314": 442918400.0,
+            "315": 442918400.0,
+            "316": 442918400.0,
+            "317": 442918400.0,
+            "318": 442918400.0,
+            "319": 442918400.0,
+            "320": 442918400.0,
+            "321": 442918400.0,
+            "322": 442918400.0,
+            "323": 442918400.0,
+            "324": 442918400.0,
+            "325": 442918400.0,
+            "326": 442918400.0,
+            "327": 442918400.0,
+            "328": 442918400.0,
+            "329": 442918400.0,
+            "330": 442918400.0,
+            "331": 442918400.0,
+            "332": 442918400.0,
+            "333": 442918400.0,
+            "334": 442918400.0,
+            "335": 442918400.0,
+            "336": 442918400.0,
+            "337": 442918400.0,
+            "338": 442918400.0,
+            "339": 442918400.0,
+            "340": 442918400.0,
+            "341": 442918400.0,
+            "342": 442918400.0,
+            "343": 442918400.0,
+            "344": 442918400.0,
+            "345": 442918400.0,
+            "346": 442918400.0,
+            "347": 442918400.0,
+            "348": 442918400.0,
+            "349": 442918400.0,
+            "350": 442918400.0,
+            "351": 442918400.0,
+            "352": 442918400.0,
+            "353": 442918400.0,
+            "354": 442918400.0,
+            "355": 442918400.0,
+            "356": 442918400.0,
+            "357": 442918400.0,
+            "358": 442918400.0,
+            "359": 442918400.0,
+            "360": 442918400.0,
+            "361": 442918400.0,
+            "362": 442918400.0,
+            "363": 442918400.0,
+            "364": 442918400.0,
+            "365": 442918400.0,
+            "366": 442918400.0,
+            "367": 442918400.0,
+            "368": 442918400.0,
+            "369": 442918400.0,
+            "370": 442918400.0,
+            "371": 442918400.0,
+            "372": 442918400.0,
+            "373": 442918400.0,
+            "374": 442918400.0,
+            "375": 442918400.0,
+            "376": 442918400.0,
+            "377": 442918400.0,
+            "378": 442918400.0,
+            "379": 442918400.0,
+            "380": 442918400.0,
+            "381": 442918400.0,
+            "382": 442918400.0,
+            "383": 442918400.0,
+            "384": 442918400.0,
+            "385": 442918400.0,
+            "386": 442918400.0,
+            "387": 442918400.0,
+            "388": 442918400.0,
+            "389": 442918400.0,
+            "390": 442918400.0,
+            "391": 442918400.0,
+            "392": 442918400.0,
+            "393": 442918400.0,
+            "394": 442918400.0,
+            "395": 442918400.0,
+            "396": 442918400.0,
+            "397": 442918400.0,
+            "398": 442918400.0,
+            "399": 442918400.0,
+            "400": 442918400.0,
+            "401": 442918400.0,
+            "402": 442918400.0,
+            "403": 442918400.0,
+            "404": 442918400.0,
+            "405": 442918400.0,
+            "406": 442918400.0,
+            "407": 442918400.0,
+            "408": 442918400.0,
+            "409": 442918400.0,
+            "410": 442918400.0,
+            "411": 442918400.0,
+            "412": 442918400.0,
+            "413": 442918400.0,
+            "414": 442918400.0,
+            "415": 442918400.0,
+            "416": 442918400.0,
+            "417": 442918400.0,
+            "418": 442918400.0,
+            "419": 442918400.0,
+            "420": 442918400.0,
+            "421": 442918400.0,
+            "422": 442918400.0,
+            "423": 442918400.0,
+            "424": 442918400.0,
+            "425": 442918400.0,
+            "426": 442918400.0,
+            "427": 442918400.0,
+            "428": 442918400.0,
+            "429": 442918400.0,
+            "430": 442918400.0,
+            "431": 442918400.0,
+            "432": 442918400.0,
+            "433": 442918400.0,
+            "434": 442918400.0,
+            "435": 442918400.0,
+            "436": 442918400.0,
+            "437": 442918400.0,
+            "438": 442918400.0,
+            "439": 442918400.0,
+            "440": 442918400.0,
+            "441": 442918400.0,
+            "442": 442918400.0,
+            "443": 442918400.0,
+            "444": 442918400.0,
+            "445": 442918400.0,
+            "446": 442918400.0,
+            "447": 442918400.0,
+            "448": 442918400.0,
+            "449": 442918400.0,
+            "450": 442918400.0,
+            "451": 442918400.0,
+            "452": 442918400.0,
+            "453": 442918400.0,
+            "454": 442918400.0,
+            "455": 442918400.0,
+            "456": 442918400.0,
+            "457": 442918400.0,
+            "458": 442918400.0,
+            "459": 442918400.0,
+            "460": 442918400.0,
+            "461": 442918400.0,
+            "462": 442918400.0,
+            "463": 442918400.0,
+            "464": 442918400.0,
+            "465": 442918400.0,
+            "466": 442918400.0,
+            "467": 442918400.0,
+            "468": 442918400.0,
+            "469": 442918400.0,
+            "470": 442918400.0,
+            "471": 442918400.0,
+            "472": 442918400.0,
+            "473": 442918400.0,
+            "474": 442918400.0,
+            "475": 442918400.0,
+            "476": 442918400.0,
+            "477": 442918400.0,
+            "478": 442918400.0,
+            "479": 442918400.0,
+            "480": 442918400.0,
+            "481": 442918400.0,
+            "482": 442918400.0,
+            "483": 442918400.0,
+            "484": 442918400.0,
+            "485": 442918400.0,
+            "486": 442918400.0,
+            "487": 442918400.0,
+            "488": 442918400.0,
+            "489": 442918400.0,
+            "490": 442918400.0,
+            "491": 442918400.0,
+            "492": 442918400.0,
+            "493": 442918400.0,
+            "494": 442918400.0,
+            "495": 442918400.0,
+            "496": 442918400.0,
+            "497": 442918400.0,
+            "498": 442918400.0,
+            "499": 442918400.0,
+            "500": 442918400.0,
+            "501": 442918400.0,
+            "502": 442918400.0,
+            "503": 442918400.0,
+            "504": 442918400.0,
+            "505": 442918400.0,
+            "506": 442918400.0,
+            "507": 442918400.0,
+            "508": 442918400.0,
+            "509": 442918400.0,
+            "510": 442918400.0,
+            "511": 442918400.0,
+            "512": 442918400.0,
+            "513": 442918400.0,
+            "514": 442918400.0,
+            "515": 442918400.0,
+            "516": 442918400.0,
+            "517": 442918400.0,
+            "518": 442918400.0,
+            "519": 442918400.0,
+            "520": 442918400.0,
+            "521": 442918400.0,
+            "522": 442918400.0,
+            "523": 442918400.0,
+            "524": 442918400.0,
+            "525": 442918400.0,
+            "526": 442918400.0,
+            "527": 442918400.0,
+            "528": 442918400.0,
+            "529": 442918400.0,
+            "530": 442918400.0,
+            "531": 442918400.0,
+            "532": 442918400.0,
+            "533": 442918400.0,
+            "534": 442918400.0,
+            "535": 442918400.0,
+            "536": 442918400.0,
+            "537": 442918400.0,
+            "538": 442918400.0,
+            "539": 442918400.0,
+            "540": 442918400.0,
+            "541": 442918400.0,
+            "542": 442918400.0,
+            "543": 442918400.0,
+            "544": 442918400.0,
+            "545": 442918400.0,
+            "546": 442918400.0,
+            "547": 442918400.0,
+            "548": 442918400.0,
+            "549": 442918400.0,
+            "550": 442918400.0,
+            "551": 442918400.0,
+            "552": 442918400.0,
+            "553": 442918400.0,
+            "554": 442918400.0,
+            "555": 442918400.0,
+            "556": 442918400.0,
+            "557": 442918400.0,
+            "558": 442918400.0,
+            "559": 442918400.0,
+            "560": 442918400.0,
+            "561": 442918400.0,
+            "562": 442918400.0,
+            "563": 442918400.0,
+            "564": 442918400.0,
+            "565": 442918400.0,
+            "566": 442918400.0,
+            "567": 442918400.0,
+            "568": 442918400.0,
+            "569": 442918400.0,
+            "570": 442918400.0,
+            "571": 442918400.0,
+            "572": 442918400.0,
+            "573": 442918400.0,
+            "574": 442918400.0,
+            "575": 442918400.0,
+            "576": 442918400.0,
+            "577": 442918400.0,
+            "578": 442918400.0,
+            "579": 442918400.0,
+            "580": 442918400.0,
+            "581": 442918400.0,
+            "582": 442918400.0,
+            "583": 442918400.0,
+            "584": 442918400.0,
+            "585": 442918400.0,
+            "586": 442918400.0,
+            "587": 442918400.0,
+            "588": 442918400.0,
+            "589": 442918400.0,
+            "590": 442918400.0,
+            "591": 442918400.0,
+            "592": 442918400.0,
+            "593": 442918400.0,
+            "594": 442918400.0,
+            "595": 442918400.0,
+            "596": 442918400.0,
+            "597": 442918400.0,
+            "598": 442918400.0,
+            "599": 442918400.0,
+            "600": 442918400.0,
+            "601": 442918400.0,
+            "602": 442918400.0,
+            "603": 442918400.0,
+            "604": 442918400.0,
+            "605": 442918400.0,
+            "606": 442918400.0,
+            "607": 442918400.0,
+            "608": 442918400.0,
+            "609": 442918400.0,
+            "610": 442918400.0,
+            "611": 442918400.0,
+            "612": 442918400.0,
+            "613": 442918400.0,
+            "614": 442918400.0,
+            "615": 442918400.0,
+            "616": 442918400.0,
+            "617": 442918400.0,
+            "618": 442918400.0,
+            "619": 442918400.0,
+            "620": 442918400.0,
+            "621": 442918400.0,
+            "622": 442918400.0,
+            "623": 442918400.0,
+            "624": 442918400.0,
+            "625": 442918400.0,
+            "626": 442918400.0,
+            "627": 442918400.0,
+            "628": 442918400.0,
+            "629": 442918400.0,
+            "630": 442918400.0,
+            "631": 442918400.0,
+            "632": 442918400.0,
+            "633": 442918400.0,
+            "634": 442918400.0,
+            "635": 442918400.0,
+            "636": 442918400.0,
+            "637": 442918400.0,
+            "638": 442918400.0,
+            "639": 442918400.0,
+            "640": 442918400.0,
+            "641": 442918400.0,
+            "642": 442918400.0,
+            "643": 442918400.0,
+            "644": 442918400.0,
+            "645": 442918400.0,
+            "646": 442918400.0,
+            "647": 442918400.0,
+            "648": 442918400.0,
+            "649": 442918400.0,
+            "650": 442918400.0,
+            "651": 442918400.0,
+            "652": 442918400.0,
+            "653": 442918400.0,
+            "654": 442918400.0,
+            "655": 442918400.0,
+            "656": 442918400.0,
+            "657": 442918400.0,
+            "658": 442918400.0,
+            "659": 442918400.0,
+            "660": 442918400.0,
+            "661": 442918400.0,
+            "662": 442918400.0,
+            "663": 442918400.0,
+            "664": 442918400.0,
+            "665": 442918400.0,
+            "666": 442918400.0,
+            "667": 442918400.0,
+            "668": 442918400.0,
+            "669": 442918400.0,
+            "670": 442918400.0,
+            "671": 442918400.0,
+            "672": 442918400.0,
+            "673": 442918400.0,
+            "674": 442918400.0,
+            "675": 442918400.0,
+            "676": 442918400.0,
+            "677": 442918400.0,
+            "678": 442918400.0,
+            "679": 442918400.0,
+            "680": 442918400.0,
+            "681": 442918400.0,
+            "682": 442918400.0,
+            "683": 442918400.0,
+            "684": 442918400.0,
+            "685": 442918400.0,
+            "686": 442918400.0,
+            "687": 442918400.0,
+            "688": 442918400.0,
+            "689": 442918400.0,
+            "690": 442918400.0,
+            "691": 442918400.0,
+            "692": 442918400.0,
+            "693": 442918400.0,
+            "694": 442918400.0,
+            "695": 442918400.0,
+            "696": 442918400.0,
+            "697": 442918400.0,
+            "698": 442918400.0,
+            "699": 442918400.0,
+            "700": 442918400.0,
+            "701": 442918400.0,
+            "702": 442918400.0,
+            "703": 442918400.0,
+            "704": 442918400.0,
+            "705": 442918400.0,
+            "706": 442918400.0,
+            "707": 442918400.0,
+            "708": 442918400.0,
+            "709": 442918400.0,
+            "710": 442918400.0,
+            "711": 442918400.0,
+            "712": 442918400.0,
+            "713": 442918400.0,
+            "714": 442918400.0,
+            "715": 442918400.0,
+            "716": 442918400.0,
+            "717": 442918400.0,
+            "718": 442918400.0,
+            "719": 442918400.0,
+            "720": 442918400.0,
+            "721": 442918400.0,
+            "722": 442918400.0,
+            "723": 442918400.0,
+            "724": 442918400.0,
+            "725": 442918400.0,
+            "726": 442918400.0,
+            "727": 442918400.0,
+            "728": 442918400.0,
+            "729": 442918400.0,
+            "730": 442918400.0,
+            "731": 442918400.0,
+            "732": 442918400.0,
+            "733": 442918400.0,
+            "734": 442918400.0,
+            "735": 442918400.0,
+            "736": 442918400.0,
+            "737": 442918400.0,
+            "738": 442918400.0,
+            "739": 442918400.0,
+            "740": 442918400.0,
+            "741": 442918400.0,
+            "742": 442918400.0,
+            "743": 442918400.0,
+            "744": 442918400.0,
+            "745": 442918400.0,
+            "746": 442918400.0,
+            "747": 442918400.0,
+            "748": 442918400.0,
+            "749": 442918400.0,
+            "750": 442918400.0,
+            "751": 442918400.0,
+            "752": 442918400.0,
+            "753": 442918400.0,
+            "754": 442918400.0,
+            "755": 442918400.0,
+            "756": 442918400.0,
+            "757": 442918400.0,
+            "758": 442918400.0,
+            "759": 442918400.0,
+            "760": 442918400.0,
+            "761": 442918400.0,
+            "762": 442918400.0,
+            "763": 442918400.0,
+            "764": 442918400.0,
+            "765": 442918400.0,
+            "766": 442918400.0,
+            "767": 442918400.0,
+            "768": 442918400.0,
+            "769": 442918400.0,
+            "770": 442918400.0,
+            "771": 442918400.0,
+            "772": 442918400.0,
+            "773": 442918400.0,
+            "774": 442918400.0,
+            "775": 442918400.0,
+            "776": 442918400.0,
+            "777": 442918400.0,
+            "778": 442918400.0,
+            "779": 442918400.0,
+            "780": 442918400.0,
+            "781": 442918400.0,
+            "782": 442918400.0,
+            "783": 442918400.0,
+            "784": 442918400.0,
+            "785": 442918400.0,
+            "786": 442918400.0,
+            "787": 442918400.0,
+            "788": 442918400.0,
+            "789": 442918400.0,
+            "790": 442918400.0,
+            "791": 442918400.0,
+            "792": 442918400.0,
+            "793": 442918400.0,
+            "794": 442918400.0,
+            "795": 442918400.0,
+            "796": 442918400.0,
+            "797": 442918400.0,
+            "798": 442918400.0,
+            "799": 442918400.0,
+            "800": 442918400.0,
+            "801": 442918400.0,
+            "802": 442918400.0,
+            "803": 442918400.0,
+            "804": 442918400.0,
+            "805": 442918400.0,
+            "806": 442918400.0,
+            "807": 442918400.0,
+            "808": 442918400.0,
+            "809": 442918400.0,
+            "810": 442918400.0,
+            "811": 442918400.0,
+            "812": 442918400.0,
+            "813": 442918400.0,
+            "814": 442918400.0,
+            "815": 442918400.0,
+            "816": 442918400.0,
+            "817": 442918400.0,
+            "818": 442918400.0,
+            "819": 442918400.0,
+            "820": 442918400.0,
+            "821": 442918400.0,
+            "822": 442918400.0,
+            "823": 442918400.0,
+            "824": 442918400.0,
+            "825": 442918400.0,
+            "826": 442918400.0,
+            "827": 442918400.0,
+            "828": 442918400.0,
+            "829": 442918400.0,
+            "830": 442918400.0,
+            "831": 442918400.0,
+            "832": 442918400.0,
+            "833": 442918400.0,
+            "834": 442918400.0,
+            "835": 442918400.0,
+            "836": 442918400.0,
+            "837": 442918400.0,
+            "838": 442918400.0,
+            "839": 442918400.0,
+            "840": 442918400.0,
+            "841": 442918400.0,
+            "842": 442918400.0,
+            "843": 442918400.0,
+            "844": 442918400.0,
+            "845": 442918400.0,
+            "846": 442918400.0,
+            "847": 442918400.0,
+            "848": 442918400.0,
+            "849": 442918400.0,
+            "850": 442918400.0,
+            "851": 442918400.0,
+            "852": 442918400.0,
+            "853": 442918400.0,
+            "854": 442918400.0,
+            "855": 442918400.0,
+            "856": 442918400.0,
+            "857": 442918400.0,
+            "858": 442918400.0,
+            "859": 442918400.0,
+            "860": 442918400.0,
+            "861": 442918400.0,
+            "862": 442918400.0,
+            "863": 442918400.0,
+            "864": 442918400.0,
+            "865": 442918400.0,
+            "866": 442918400.0,
+            "867": 442918400.0,
+            "868": 442918400.0,
+            "869": 442918400.0,
+            "870": 442918400.0,
+            "871": 442918400.0,
+            "872": 442918400.0,
+            "873": 442918400.0,
+            "874": 442918400.0,
+            "875": 442918400.0,
+            "876": 442918400.0,
+            "877": 442918400.0,
+            "878": 442918400.0,
+            "879": 442918400.0,
+            "880": 442918400.0,
+            "881": 442918400.0,
+            "882": 442918400.0,
+            "883": 442918400.0,
+            "884": 442918400.0,
+            "885": 442918400.0,
+            "886": 442918400.0,
+            "887": 442918400.0,
+            "888": 442918400.0,
+            "889": 442918400.0,
+            "890": 442918400.0,
+            "891": 442918400.0,
+            "892": 442918400.0,
+            "893": 442918400.0,
+            "894": 442918400.0,
+            "895": 442918400.0,
+            "896": 442918400.0,
+            "897": 442918400.0,
+            "898": 442918400.0,
+            "899": 442918400.0,
+            "900": 442918400.0,
+            "901": 442918400.0,
+            "902": 442918400.0,
+            "903": 442918400.0,
+            "904": 442918400.0,
+            "905": 442918400.0,
+            "906": 442918400.0,
+            "907": 442918400.0,
+            "908": 442918400.0,
+            "909": 442918400.0,
+            "910": 442918400.0,
+            "911": 442918400.0,
+            "912": 442918400.0,
+            "913": 442918400.0,
+            "914": 442918400.0,
+            "915": 442918400.0,
+            "916": 442918400.0,
+            "917": 442918400.0,
+            "918": 442918400.0,
+            "919": 442918400.0,
+            "920": 442918400.0,
+            "921": 442918400.0,
+            "922": 442918400.0,
+            "923": 442918400.0,
+            "924": 442918400.0,
+            "925": 442918400.0,
+            "926": 442918400.0,
+            "927": 442918400.0,
+            "928": 442918400.0,
+            "929": 442918400.0,
+            "930": 442918400.0,
+            "931": 442918400.0,
+            "932": 442918400.0,
+            "933": 442918400.0,
+            "934": 442918400.0,
+            "935": 442918400.0,
+            "936": 442918400.0,
+            "937": 442918400.0,
+            "938": 442918400.0,
+            "939": 442918400.0,
+            "940": 442918400.0,
+            "941": 442918400.0,
+            "942": 442918400.0,
+            "943": 442918400.0,
+            "944": 442918400.0,
+            "945": 442918400.0,
+            "946": 442918400.0,
+            "947": 442918400.0,
+            "948": 442918400.0,
+            "949": 442918400.0,
+            "950": 442918400.0,
+            "951": 442918400.0,
+            "952": 442918400.0,
+            "953": 442918400.0,
+            "954": 442918400.0,
+            "955": 442918400.0,
+            "956": 442918400.0,
+            "957": 442918400.0,
+            "958": 442918400.0,
+            "959": 442918400.0,
+            "960": 442918400.0,
+            "961": 442918400.0,
+            "962": 442918400.0,
+            "963": 442918400.0,
+            "964": 442918400.0,
+            "965": 442918400.0,
+            "966": 442918400.0,
+            "967": 442918400.0,
+            "968": 442918400.0,
+            "969": 442918400.0,
+            "970": 442918400.0,
+            "971": 442918400.0,
+            "972": 442918400.0,
+            "973": 442918400.0,
+            "974": 442918400.0,
+            "975": 442918400.0,
+            "976": 442918400.0,
+            "977": 442918400.0,
+            "978": 442918400.0,
+            "979": 442918400.0,
+            "980": 442918400.0,
+            "981": 442918400.0,
+            "982": 442918400.0,
+            "983": 442918400.0,
+            "984": 442918400.0,
+            "985": 442918400.0,
+            "986": 442918400.0,
+            "987": 442918400.0,
+            "988": 442918400.0,
+            "989": 442918400.0,
+            "990": 442918400.0,
+            "991": 442918400.0,
+            "992": 442918400.0,
+            "993": 442918400.0,
+            "994": 442918400.0,
+            "995": 442918400.0,
+            "996": 442918400.0,
+            "997": 442918400.0,
+            "998": 442918400.0,
+            "999": 442918400.0,
+            "1000": 442918400.0,
+            "1001": 442918400.0,
+            "1002": 442918400.0,
+            "1003": 442918400.0,
+            "1004": 442918400.0,
+            "1005": 442918400.0,
+            "1006": 442918400.0,
+            "1007": 442918400.0,
+            "1008": 442918400.0,
+            "1009": 442918400.0,
+            "1010": 442918400.0,
+            "1011": 442918400.0,
+            "1012": 442918400.0,
+            "1013": 442918400.0,
+            "1014": 442918400.0,
+            "1015": 442918400.0,
+            "1016": 442918400.0,
+            "1017": 442918400.0,
+            "1018": 442918400.0,
+            "1019": 442918400.0,
+            "1020": 442918400.0,
+            "1021": 442918400.0,
+            "1022": 442918400.0,
+            "1023": 442918400.0,
+            "1024": 442918400.0,
+            "1025": 442918400.0,
+            "1026": 442918400.0,
+            "1027": 442918400.0,
+            "1028": 442918400.0,
+            "1029": 442918400.0,
+            "1030": 442918400.0,
+            "1031": 442918400.0,
+            "1032": 442918400.0,
+            "1033": 442918400.0,
+            "1034": 442918400.0,
+            "1035": 442918400.0,
+            "1036": 442918400.0,
+            "1037": 442918400.0,
+            "1038": 442918400.0,
+            "1039": 442918400.0,
+            "1040": 442918400.0,
+            "1041": 442918400.0,
+            "1042": 442918400.0,
+            "1043": 442918400.0,
+            "1044": 442918400.0,
+            "1045": 442918400.0,
+            "1046": 442918400.0,
+            "1047": 442918400.0,
+            "1048": 442918400.0,
+            "1049": 442918400.0,
+            "1050": 442918400.0,
+            "1051": 442918400.0,
+            "1052": 442918400.0,
+            "1053": 442918400.0,
+            "1054": 442918400.0,
+            "1055": 442918400.0,
+            "1056": 442918400.0,
+            "1057": 442918400.0,
+            "1058": 442918400.0,
+            "1059": 442918400.0,
+            "1060": 442918400.0,
+            "1061": 442918400.0,
+            "1062": 442918400.0,
+            "1063": 442918400.0,
+            "1064": 442918400.0,
+            "1065": 442918400.0,
+            "1066": 442918400.0,
+            "1067": 442918400.0,
+            "1068": 442918400.0,
+            "1069": 442918400.0,
+            "1070": 442918400.0,
+            "1071": 442918400.0,
+            "1072": 442918400.0,
+            "1073": 442918400.0,
+            "1074": 442918400.0,
+            "1075": 442918400.0,
+            "1076": 442918400.0,
+            "1077": 442918400.0,
+            "1078": 442918400.0,
+            "1079": 442918400.0,
+            "1080": 442918400.0,
+            "1081": 442918400.0,
+            "1082": 442918400.0,
+            "1083": 442918400.0,
+            "1084": 442918400.0,
+            "1085": 442918400.0,
+            "1086": 442918400.0,
+            "1087": 442918400.0,
+            "1088": 442918400.0,
+            "1089": 442918400.0,
+            "1090": 442918400.0,
+            "1091": 442918400.0,
+            "1092": 442918400.0,
+            "1093": 442918400.0,
+            "1094": 442918400.0,
+            "1095": 442918400.0,
+            "1096": 442918400.0,
+            "1097": 442918400.0,
+            "1098": 442918400.0,
+            "1099": 442918400.0,
+            "1100": 442918400.0,
+            "1101": 442918400.0,
+            "1102": 442918400.0,
+            "1103": 442918400.0,
+            "1104": 442918400.0,
+            "1105": 442918400.0,
+            "1106": 442918400.0,
+            "1107": 442918400.0,
+            "1108": 442918400.0,
+            "1109": 442918400.0,
+            "1110": 442918400.0,
+            "1111": 442918400.0,
+            "1112": 442918400.0,
+            "1113": 442918400.0,
+            "1114": 442918400.0,
+            "1115": 442918400.0,
+            "1116": 442918400.0,
+            "1117": 442918400.0,
+            "1118": 442918400.0,
+            "1119": 442918400.0,
+            "1120": 442918400.0,
+            "1121": 442918400.0,
+            "1122": 442918400.0,
+            "1123": 442918400.0,
+            "1124": 442918400.0,
+            "1125": 442918400.0,
+            "1126": 442918400.0,
+            "1127": 442918400.0,
+            "1128": 442918400.0,
+            "1129": 442918400.0,
+            "1130": 442918400.0,
+            "1131": 442918400.0,
+            "1132": 442918400.0,
+            "1133": 442918400.0,
+            "1134": 442918400.0,
+            "1135": 442918400.0,
+            "1136": 442918400.0,
+            "1137": 442918400.0,
+            "1138": 442918400.0,
+            "1139": 442918400.0,
+            "1140": 442918400.0,
+            "1141": 442918400.0,
+            "1142": 442918400.0,
+            "1143": 442918400.0,
+            "1144": 442918400.0,
+            "1145": 442918400.0,
+            "1146": 442918400.0,
+            "1147": 442918400.0,
+            "1148": 442918400.0,
+            "1149": 442918400.0,
+            "1150": 442918400.0,
+            "1151": 442918400.0,
+            "1152": 442918400.0,
+            "1153": 442918400.0,
+            "1154": 442918400.0,
+            "1155": 442918400.0,
+            "1156": 442918400.0,
+            "1157": 442918400.0,
+            "1158": 442918400.0,
+            "1159": 442918400.0,
+            "1160": 442918400.0,
+            "1161": 442918400.0,
+            "1162": 442918400.0,
+            "1163": 442918400.0,
+            "1164": 442918400.0,
+            "1165": 442918400.0,
+            "1166": 442918400.0,
+            "1167": 442918400.0,
+            "1168": 442918400.0,
+            "1169": 442918400.0,
+            "1170": 442918400.0,
+            "1171": 442918400.0,
+            "1172": 442918400.0,
+            "1173": 442918400.0,
+            "1174": 442918400.0,
+            "1175": 442918400.0,
+            "1176": 442918400.0,
+            "1177": 442918400.0,
+            "1178": 442918400.0,
+            "1179": 442918400.0,
+            "1180": 442918400.0,
+            "1181": 442918400.0,
+            "1182": 442918400.0,
+            "1183": 442918400.0,
+            "1184": 442918400.0,
+            "1185": 442918400.0,
+            "1186": 442918400.0,
+            "1187": 442918400.0,
+            "1188": 442918400.0,
+            "1189": 442918400.0,
+            "1190": 442918400.0,
+            "1191": 442918400.0,
+            "1192": 442918400.0,
+            "1193": 442918400.0,
+            "1194": 442918400.0,
+            "1195": 442918400.0,
+            "1196": 442918400.0,
+            "1197": 442918400.0,
+            "1198": 442918400.0,
+            "1199": 442918400.0,
+            "1200": 442918400.0,
+            "1201": 442918400.0,
+            "1202": 442918400.0,
+            "1203": 442918400.0,
+            "1204": 442918400.0,
+            "1205": 442918400.0,
+            "1206": 442918400.0,
+            "1207": 442918400.0,
+            "1208": 442918400.0,
+            "1209": 442918400.0,
+            "1210": 442918400.0,
+            "1211": 442918400.0,
+            "1212": 442918400.0,
+            "1213": 442918400.0,
+            "1214": 442918400.0,
+            "1215": 442918400.0,
+            "1216": 442918400.0,
+            "1217": 442918400.0,
+            "1218": 442918400.0,
+            "1219": 442918400.0,
+            "1220": 442918400.0,
+            "1221": 442918400.0,
+            "1222": 442918400.0,
+            "1223": 442918400.0,
+            "1224": 442918400.0,
+            "1225": 442918400.0,
+            "1226": 442918400.0,
+            "1227": 442918400.0,
+            "1228": 442918400.0,
+            "1229": 442918400.0,
+            "1230": 442918400.0,
+            "1231": 442918400.0,
+            "1232": 442918400.0,
+            "1233": 442918400.0,
+            "1234": 442918400.0,
+            "1235": 442918400.0,
+            "1236": 442918400.0,
+            "1237": 442918400.0,
+            "1238": 442918400.0,
+            "1239": 442918400.0,
+            "1240": 442918400.0,
+            "1241": 442918400.0,
+            "1242": 442918400.0,
+            "1243": 442918400.0,
+            "1244": 442918400.0,
+            "1245": 442918400.0,
+            "1246": 442918400.0,
+            "1247": 442918400.0,
+            "1248": 442918400.0,
+            "1249": 442918400.0,
+            "1250": 442918400.0,
+            "1251": 442918400.0,
+            "1252": 442918400.0,
+            "1253": 442918400.0,
+            "1254": 442918400.0,
+            "1255": 442918400.0,
+            "1256": 442918400.0,
+            "1257": 442918400.0,
+            "1258": 442918400.0,
+            "1259": 442918400.0,
+            "1260": 442918400.0,
+            "1261": 442918400.0,
+            "1262": 442918400.0,
+            "1263": 442918400.0,
+            "1264": 442918400.0,
+            "1265": 442918400.0,
+            "1266": 442918400.0,
+            "1267": 442918400.0,
+            "1268": 442918400.0,
+            "1269": 442918400.0,
+            "1270": 442918400.0,
+            "1271": 442918400.0,
+            "1272": 442918400.0,
+            "1273": 442918400.0,
+            "1274": 442918400.0,
+            "1275": 442918400.0,
+            "1276": 442918400.0,
+            "1277": 442918400.0,
+            "1278": 442918400.0,
+            "1279": 442918400.0,
+            "1280": 442918400.0,
+            "1281": 442918400.0,
+            "1282": 442918400.0,
+            "1283": 442918400.0,
+            "1284": 442918400.0,
+            "1285": 442918400.0,
+            "1286": 442918400.0,
+            "1287": 442918400.0,
+            "1288": 442918400.0,
+            "1289": 442918400.0,
+            "1290": 442918400.0,
+            "1291": 442918400.0,
+            "1292": 442918400.0,
+            "1293": 442918400.0,
+            "1294": 442918400.0,
+            "1295": 442918400.0,
+            "1296": 442918400.0,
+            "1297": 442918400.0,
+            "1298": 442918400.0,
+            "1299": 442918400.0,
+            "1300": 442918400.0,
+            "1301": 442918400.0,
+            "1302": 442918400.0,
+            "1303": 442918400.0,
+            "1304": 442918400.0,
+            "1305": 442918400.0,
+            "1306": 442918400.0,
+            "1307": 442918400.0,
+            "1308": 442918400.0,
+            "1309": 442918400.0,
+            "1310": 442918400.0,
+            "1311": 442918400.0,
+            "1312": 442918400.0,
+            "1313": 442918400.0,
+            "1314": 442918400.0,
+            "1315": 442918400.0,
+            "1316": 442918400.0,
+            "1317": 442918400.0,
+            "1318": 442918400.0,
+            "1319": 442918400.0,
+            "1320": 442918400.0,
+            "1321": 442918400.0,
+            "1322": 442918400.0,
+            "1323": 442918400.0,
+            "1324": 442918400.0,
+            "1325": 442918400.0,
+            "1326": 442918400.0,
+            "1327": 442918400.0,
+            "1328": 442918400.0,
+            "1329": 442918400.0,
+            "1330": 442918400.0,
+            "1331": 442918400.0,
+            "1332": 442918400.0,
+            "1333": 442918400.0,
+            "1334": 442918400.0,
+            "1335": 442918400.0,
+            "1336": 442918400.0,
+            "1337": 442918400.0,
+            "1338": 442918400.0,
+            "1339": 442918400.0,
+            "1340": 442918400.0,
+            "1341": 442918400.0,
+            "1342": 442918400.0,
+            "1343": 442918400.0,
+            "1344": 442918400.0,
+            "1345": 442918400.0,
+            "1346": 442918400.0,
+            "1347": 442918400.0,
+            "1348": 442918400.0,
+            "1349": 442918400.0,
+            "1350": 442918400.0,
+            "1351": 442918400.0,
+            "1352": 442918400.0,
+            "1353": 442918400.0,
+            "1354": 442918400.0,
+            "1355": 442918400.0,
+            "1356": 442918400.0,
+            "1357": 442918400.0,
+            "1358": 442918400.0,
+            "1359": 442918400.0,
+            "1360": 442918400.0,
+            "1361": 442918400.0,
+            "1362": 442918400.0,
+            "1363": 442918400.0,
+            "1364": 442918400.0,
+            "1365": 442918400.0,
+            "1366": 442918400.0,
+            "1367": 442918400.0,
+            "1368": 442918400.0,
+            "1369": 442918400.0,
+            "1370": 442918400.0,
+            "1371": 442918400.0,
+            "1372": 442918400.0,
+            "1373": 442918400.0,
+            "1374": 442918400.0,
+            "1375": 442918400.0,
+            "1376": 442918400.0,
+            "1377": 442918400.0,
+            "1378": 442918400.0,
+            "1379": 442918400.0,
+            "1380": 442918400.0,
+            "1381": 442918400.0,
+            "1382": 442918400.0,
+            "1383": 442918400.0,
+            "1384": 442918400.0,
+            "1385": 442918400.0,
+            "1386": 442918400.0,
+            "1387": 442918400.0,
+            "1388": 442918400.0,
+            "1389": 442918400.0,
+            "1390": 442918400.0,
+            "1391": 442918400.0,
+            "1392": 442918400.0,
+            "1393": 442918400.0,
+            "1394": 442918400.0,
+            "1395": 442918400.0,
+            "1396": 442918400.0,
+            "1397": 442918400.0,
+            "1398": 442918400.0,
+            "1399": 442918400.0,
+            "1400": 442918400.0,
+            "1401": 442918400.0,
+            "1402": 442918400.0,
+            "1403": 442918400.0,
+            "1404": 442918400.0,
+            "1405": 442918400.0,
+            "1406": 442918400.0,
+            "1407": 442918400.0,
+            "1408": 442918400.0,
+            "1409": 442918400.0,
+            "1410": 442918400.0,
+            "1411": 442918400.0,
+            "1412": 442918400.0,
+            "1413": 442918400.0,
+            "1414": 442918400.0,
+            "1415": 442918400.0,
+            "1416": 442918400.0,
+            "1417": 442918400.0,
+            "1418": 442918400.0,
+            "1419": 442918400.0,
+            "1420": 442918400.0,
+            "1421": 442918400.0,
+            "1422": 442918400.0,
+            "1423": 442918400.0,
+            "1424": 442918400.0,
+            "1425": 442918400.0,
+            "1426": 442918400.0,
+            "1427": 442918400.0,
+            "1428": 442918400.0,
+            "1429": 442918400.0,
+            "1430": 442918400.0,
+            "1431": 442918400.0,
+            "1432": 442918400.0,
+            "1433": 442918400.0,
+            "1434": 442918400.0,
+            "1435": 442918400.0,
+            "1436": 442918400.0,
+            "1437": 442918400.0,
+            "1438": 442918400.0,
+            "1439": 442918400.0,
+            "1440": 442918400.0,
+            "1441": 442918400.0,
+            "1442": 442918400.0,
+            "1443": 442918400.0,
+            "1444": 442918400.0,
+            "1445": 442918400.0,
+            "1446": 442918400.0,
+            "1447": 442918400.0,
+            "1448": 442918400.0,
+            "1449": 442918400.0,
+            "1450": 442918400.0,
+            "1451": 442918400.0,
+            "1452": 442918400.0,
+            "1453": 442918400.0,
+            "1454": 442918400.0,
+            "1455": 442918400.0,
+            "1456": 442918400.0,
+            "1457": 442918400.0,
+            "1458": 442918400.0,
+            "1459": 442918400.0,
+            "1460": 442918400.0,
+            "1461": 442918400.0,
+            "1462": 442918400.0,
+            "1463": 442918400.0,
+            "1464": 442918400.0,
+            "1465": 442918400.0,
+            "1466": 442918400.0,
+            "1467": 442918400.0,
+            "1468": 442918400.0,
+            "1469": 442918400.0,
+            "1470": 442918400.0,
+            "1471": 442918400.0,
+            "1472": 442918400.0,
+            "1473": 442918400.0,
+            "1474": 442918400.0,
+            "1475": 442918400.0,
+            "1476": 442918400.0,
+            "1477": 442918400.0,
+            "1478": 442918400.0,
+            "1479": 442918400.0,
+            "1480": 442918400.0,
+            "1481": 442918400.0,
+            "1482": 442918400.0,
+            "1483": 442918400.0,
+            "1484": 442918400.0,
+            "1485": 442918400.0,
+            "1486": 442918400.0,
+            "1487": 442918400.0,
+            "1488": 442918400.0,
+            "1489": 442918400.0,
+            "1490": 442918400.0,
+            "1491": 442918400.0,
+            "1492": 442918400.0,
+            "1493": 442918400.0,
+            "1494": 442918400.0,
+            "1495": 442918400.0,
+            "1496": 442918400.0,
+            "1497": 442918400.0,
+            "1498": 442918400.0,
+            "1499": 442918400.0,
+            "1500": 442918400.0,
+            "1501": 442918400.0,
+            "1502": 442918400.0,
+            "1503": 442918400.0,
+            "1504": 442918400.0,
+            "1505": 442918400.0,
+            "1506": 442918400.0,
+            "1507": 442918400.0,
+            "1508": 442918400.0,
+            "1509": 442918400.0,
+            "1510": 442918400.0,
+            "1511": 442918400.0,
+            "1512": 442918400.0,
+            "1513": 442918400.0,
+            "1514": 442918400.0,
+            "1515": 442918400.0,
+            "1516": 442918400.0,
+            "1517": 442918400.0,
+            "1518": 442918400.0,
+            "1519": 442918400.0,
+            "1520": 442918400.0,
+            "1521": 442918400.0,
+            "1522": 442918400.0,
+            "1523": 442918400.0,
+            "1524": 442918400.0,
+            "1525": 442918400.0,
+            "1526": 442918400.0,
+            "1527": 442918400.0,
+            "1528": 442918400.0,
+            "1529": 442918400.0,
+            "1530": 442918400.0,
+            "1531": 442918400.0,
+            "1532": 442918400.0,
+            "1533": 442918400.0,
+            "1534": 442918400.0,
+            "1535": 442918400.0,
+            "1536": 442918400.0,
+            "1537": 442918400.0,
+            "1538": 442918400.0,
+            "1539": 442918400.0,
+            "1540": 442918400.0,
+            "1541": 442918400.0,
+            "1542": 442918400.0,
+            "1543": 442918400.0,
+            "1544": 442918400.0,
+            "1545": 442918400.0,
+            "1546": 442918400.0,
+            "1547": 442918400.0,
+            "1548": 442918400.0,
+            "1549": 442918400.0,
+            "1550": 442918400.0,
+            "1551": 442918400.0,
+            "1552": 442918400.0,
+            "1553": 442918400.0,
+            "1554": 442918400.0,
+            "1555": 442918400.0,
+            "1556": 442918400.0,
+            "1557": 442918400.0,
+            "1558": 442918400.0,
+            "1559": 442918400.0,
+            "1560": 442918400.0,
+            "1561": 442918400.0,
+            "1562": 442918400.0,
+            "1563": 442918400.0,
+            "1564": 442918400.0,
+            "1565": 442918400.0,
+            "1566": 442918400.0,
+            "1567": 442918400.0,
+            "1568": 442918400.0,
+            "1569": 442918400.0,
+            "1570": 442918400.0,
+            "1571": 442918400.0,
+            "1572": 442918400.0,
+            "1573": 442918400.0,
+            "1574": 442918400.0,
+            "1575": 442918400.0,
+            "1576": 442918400.0,
+            "1577": 442918400.0,
+            "1578": 442918400.0,
+            "1579": 442918400.0,
+            "1580": 442918400.0,
+            "1581": 442918400.0,
+            "1582": 442918400.0,
+            "1583": 442918400.0,
+            "1584": 442918400.0,
+            "1585": 442918400.0,
+            "1586": 442918400.0,
+            "1587": 442918400.0,
+            "1588": 442918400.0,
+            "1589": 442918400.0,
+            "1590": 442918400.0,
+            "1591": 442918400.0,
+            "1592": 442918400.0,
+            "1593": 442918400.0,
+            "1594": 442918400.0,
+            "1595": 442918400.0,
+            "1596": 442918400.0,
+            "1597": 442918400.0,
+            "1598": 442918400.0,
+            "1599": 442918400.0,
+            "1600": 442918400.0,
+            "1601": 442918400.0,
+            "1602": 442918400.0,
+            "1603": 442918400.0,
+            "1604": 442918400.0,
+            "1605": 442918400.0,
+            "1606": 442918400.0,
+            "1607": 442918400.0,
+            "1608": 442918400.0,
+            "1609": 442918400.0,
+            "1610": 442918400.0,
+            "1611": 442918400.0,
+            "1612": 442918400.0,
+            "1613": 442918400.0,
+            "1614": 442918400.0,
+            "1615": 442918400.0,
+            "1616": 442918400.0,
+            "1617": 442918400.0,
+            "1618": 442918400.0,
+            "1619": 442918400.0,
+            "1620": 442918400.0,
+            "1621": 442918400.0,
+            "1622": 442918400.0,
+            "1623": 442918400.0,
+            "1624": 442918400.0,
+            "1625": 442918400.0,
+            "1626": 442918400.0,
+            "1627": 442918400.0,
+            "1628": 442918400.0,
+            "1629": 442918400.0,
+            "1630": 442918400.0,
+            "1631": 442918400.0,
+            "1632": 442918400.0,
+            "1633": 442918400.0,
+            "1634": 442918400.0,
+            "1635": 442918400.0,
+            "1636": 442918400.0,
+            "1637": 442918400.0,
+            "1638": 442918400.0,
+            "1639": 442918400.0,
+            "1640": 442918400.0,
+            "1641": 442918400.0,
+            "1642": 442918400.0,
+            "1643": 442918400.0,
+            "1644": 442918400.0,
+            "1645": 442918400.0,
+            "1646": 442918400.0,
+            "1647": 442918400.0,
+            "1648": 442918400.0,
+            "1649": 442918400.0,
+            "1650": 442918400.0,
+            "1651": 442918400.0,
+            "1652": 442918400.0,
+            "1653": 442918400.0,
+            "1654": 442918400.0,
+            "1655": 442918400.0,
+            "1656": 442918400.0,
+            "1657": 442918400.0,
+            "1658": 442918400.0,
+            "1659": 442918400.0,
+            "1660": 442918400.0,
+            "1661": 442918400.0,
+            "1662": 442918400.0,
+            "1663": 442918400.0,
+            "1664": 442918400.0,
+            "1665": 442918400.0,
+            "1666": 442918400.0,
+            "1667": 442918400.0,
+            "1668": 442918400.0,
+            "1669": 442918400.0,
+            "1670": 442918400.0,
+            "1671": 442918400.0,
+            "1672": 442918400.0,
+            "1673": 442918400.0,
+            "1674": 442918400.0,
+            "1675": 442918400.0,
+            "1676": 442918400.0,
+            "1677": 442918400.0,
+            "1678": 442918400.0,
+            "1679": 442918400.0,
+            "1680": 442918400.0,
+            "1681": 442918400.0,
+            "1682": 442918400.0,
+            "1683": 442918400.0,
+            "1684": 442918400.0,
+            "1685": 442918400.0,
+            "1686": 442918400.0,
+            "1687": 442918400.0,
+            "1688": 442918400.0,
+            "1689": 442918400.0,
+            "1690": 442918400.0,
+            "1691": 442918400.0,
+            "1692": 442918400.0,
+            "1693": 442918400.0,
+            "1694": 442918400.0,
+            "1695": 442918400.0,
+            "1696": 442918400.0,
+            "1697": 442918400.0,
+            "1698": 442918400.0,
+            "1699": 442918400.0,
+            "1700": 442918400.0,
+            "1701": 442918400.0,
+            "1702": 442918400.0,
+            "1703": 442918400.0,
+            "1704": 442918400.0,
+            "1705": 442918400.0,
+            "1706": 442918400.0,
+            "1707": 442918400.0,
+            "1708": 442918400.0,
+            "1709": 442918400.0,
+            "1710": 442918400.0,
+            "1711": 442918400.0,
+            "1712": 442918400.0,
+            "1713": 442918400.0,
+            "1714": 442918400.0,
+            "1715": 442918400.0,
+            "1716": 442918400.0,
+            "1717": 442918400.0,
+            "1718": 442918400.0,
+            "1719": 442918400.0,
+            "1720": 442918400.0,
+            "1721": 442918400.0,
+            "1722": 442918400.0,
+            "1723": 442918400.0,
+            "1724": 442918400.0,
+            "1725": 442918400.0,
+            "1726": 442918400.0,
+            "1727": 442918400.0,
+            "1728": 442918400.0,
+            "1729": 442918400.0,
+            "1730": 442918400.0,
+            "1731": 442918400.0,
+            "1732": 442918400.0,
+            "1733": 442918400.0,
+            "1734": 442918400.0,
+            "1735": 442918400.0,
+            "1736": 442918400.0,
+            "1737": 442918400.0,
+            "1738": 442918400.0,
+            "1739": 442918400.0,
+            "1740": 442918400.0,
+            "1741": 442918400.0,
+            "1742": 442918400.0,
+            "1743": 442918400.0,
+            "1744": 442918400.0,
+            "1745": 442918400.0,
+            "1746": 442918400.0,
+            "1747": 442918400.0,
+            "1748": 442918400.0,
+            "1749": 442918400.0,
+            "1750": 442918400.0,
+            "1751": 442918400.0,
+            "1752": 442918400.0,
+            "1753": 442918400.0,
+            "1754": 442918400.0,
+            "1755": 442918400.0,
+            "1756": 442918400.0,
+            "1757": 442918400.0,
+            "1758": 442918400.0,
+            "1759": 442918400.0,
+            "1760": 442918400.0,
+            "1761": 442918400.0,
+            "1762": 442918400.0,
+            "1763": 442918400.0,
+            "1764": 442918400.0,
+            "1765": 442918400.0,
+            "1766": 442918400.0,
+            "1767": 442918400.0,
+            "1768": 442918400.0,
+            "1769": 442918400.0,
+            "1770": 442918400.0,
+            "1771": 442918400.0,
+            "1772": 442918400.0,
+            "1773": 442918400.0,
+            "1774": 442918400.0,
+            "1775": 442918400.0,
+            "1776": 442918400.0,
+            "1777": 442918400.0,
+            "1778": 442918400.0,
+            "1779": 442918400.0,
+            "1780": 442918400.0,
+            "1781": 442918400.0,
+            "1782": 442918400.0,
+            "1783": 442918400.0,
+            "1784": 442918400.0,
+            "1785": 442918400.0,
+            "1786": 442918400.0,
+            "1787": 442918400.0,
+            "1788": 442918400.0,
+            "1789": 442918400.0,
+            "1790": 442918400.0,
+            "1791": 442918400.0,
+            "1792": 442918400.0,
+            "1793": 442918400.0,
+            "1794": 442918400.0,
+            "1795": 442918400.0,
+            "1796": 442918400.0,
+            "1797": 442918400.0,
+            "1798": 442918400.0,
+            "1799": 442918400.0,
+            "1800": 442918400.0,
+            "1801": 442918400.0,
+            "1802": 442918400.0,
+            "1803": 442918400.0,
+            "1804": 442918400.0,
+            "1805": 442918400.0,
+            "1806": 442918400.0,
+            "1807": 442918400.0,
+            "1808": 442918400.0,
+            "1809": 442918400.0,
+            "1810": 442918400.0,
+            "1811": 442918400.0,
+            "1812": 442918400.0,
+            "1813": 442918400.0,
+            "1814": 442918400.0,
+            "1815": 442918400.0,
+            "1816": 442918400.0,
+            "1817": 442918400.0,
+            "1818": 442918400.0,
+            "1819": 442918400.0,
+            "1820": 442918400.0,
+            "1821": 442918400.0,
+            "1822": 442918400.0,
+            "1823": 442918400.0,
+            "1824": 442918400.0,
+            "1825": 442918400.0,
+            "1826": 442918400.0,
+            "1827": 442918400.0,
+            "1828": 442918400.0,
+            "1829": 442918400.0,
+            "1830": 442918400.0,
+            "1831": 442918400.0,
+            "1832": 442918400.0,
+            "1833": 442918400.0,
+            "1834": 442918400.0,
+            "1835": 442918400.0,
+            "1836": 442918400.0,
+            "1837": 442918400.0,
+            "1838": 442918400.0,
+            "1839": 442918400.0,
+            "1840": 442918400.0,
+            "1841": 442918400.0,
+            "1842": 442918400.0,
+            "1843": 442918400.0,
+            "1844": 442918400.0,
+            "1845": 442918400.0,
+            "1846": 442918400.0,
+            "1847": 442918400.0,
+            "1848": 442918400.0,
+            "1849": 442918400.0,
+            "1850": 442918400.0,
+            "1851": 442918400.0,
+            "1852": 442918400.0,
+            "1853": 442918400.0,
+            "1854": 442918400.0,
+            "1855": 442918400.0,
+            "1856": 442918400.0,
+            "1857": 442918400.0,
+            "1858": 442918400.0,
+            "1859": 442918400.0,
+            "1860": 442918400.0,
+            "1861": 442918400.0,
+            "1862": 442918400.0,
+            "1863": 442918400.0,
+            "1864": 442918400.0,
+            "1865": 442918400.0,
+            "1866": 442918400.0,
+            "1867": 442918400.0,
+            "1868": 442918400.0,
+            "1869": 442918400.0,
+            "1870": 442918400.0,
+            "1871": 442918400.0,
+            "1872": 442918400.0,
+            "1873": 442918400.0,
+            "1874": 442918400.0,
+            "1875": 442918400.0,
+            "1876": 442918400.0,
+            "1877": 442918400.0,
+            "1878": 442918400.0,
+            "1879": 442918400.0,
+            "1880": 442918400.0,
+            "1881": 442918400.0,
+            "1882": 442918400.0,
+            "1883": 442918400.0,
+            "1884": 442918400.0,
+            "1885": 442918400.0,
+            "1886": 442918400.0,
+            "1887": 442918400.0,
+            "1888": 442918400.0,
+            "1889": 442918400.0,
+            "1890": 442918400.0,
+            "1891": 442918400.0,
+            "1892": 442918400.0,
+            "1893": 442918400.0,
+            "1894": 442918400.0,
+            "1895": 442918400.0,
+            "1896": 442918400.0,
+            "1897": 442918400.0,
+            "1898": 442918400.0,
+            "1899": 442918400.0,
+            "1900": 442918400.0,
+            "1901": 442918400.0,
+            "1902": 442918400.0,
+            "1903": 442918400.0,
+            "1904": 442918400.0,
+            "1905": 442918400.0,
+            "1906": 442918400.0,
+            "1907": 442918400.0,
+            "1908": 442918400.0,
+            "1909": 442918400.0,
+            "1910": 442918400.0,
+            "1911": 442918400.0,
+            "1912": 442918400.0,
+            "1913": 442918400.0,
+            "1914": 442918400.0,
+            "1915": 442918400.0,
+            "1916": 442918400.0,
+            "1917": 442918400.0,
+            "1918": 442918400.0,
+            "1919": 442918400.0,
+            "1920": 442918400.0,
+            "1921": 442918400.0,
+            "1922": 442918400.0,
+            "1923": 442918400.0,
+            "1924": 442918400.0,
+            "1925": 442918400.0,
+            "1926": 442918400.0,
+            "1927": 442918400.0,
+            "1928": 442918400.0,
+            "1929": 442918400.0,
+            "1930": 442918400.0,
+            "1931": 442918400.0,
+            "1932": 442918400.0,
+            "1933": 442918400.0,
+            "1934": 442918400.0,
+            "1935": 442918400.0,
+            "1936": 442918400.0,
+            "1937": 442918400.0,
+            "1938": 442918400.0,
+            "1939": 442918400.0,
+            "1940": 442918400.0,
+            "1941": 442918400.0,
+            "1942": 442918400.0,
+            "1943": 442918400.0,
+            "1944": 442918400.0,
+            "1945": 442918400.0,
+            "1946": 442918400.0,
+            "1947": 442918400.0,
+            "1948": 442918400.0,
+            "1949": 442918400.0,
+            "1950": 442918400.0,
+            "1951": 442918400.0,
+            "1952": 442918400.0,
+            "1953": 442918400.0,
+            "1954": 442918400.0,
+            "1955": 442918400.0,
+            "1956": 442918400.0,
+            "1957": 442918400.0,
+            "1958": 442918400.0,
+            "1959": 442918400.0,
+            "1960": 442918400.0,
+            "1961": 442918400.0,
+            "1962": 442918400.0,
+            "1963": 442918400.0,
+            "1964": 442918400.0,
+            "1965": 442918400.0,
+            "1966": 442918400.0,
+            "1967": 442918400.0,
+            "1968": 442918400.0,
+            "1969": 442918400.0,
+            "1970": 442918400.0,
+            "1971": 442918400.0,
+            "1972": 442918400.0,
+            "1973": 442918400.0,
+            "1974": 442918400.0,
+            "1975": 442918400.0,
+            "1976": 442918400.0,
+            "1977": 442918400.0,
+            "1978": 442918400.0,
+            "1979": 442918400.0,
+            "1980": 442918400.0,
+            "1981": 442918400.0,
+            "1982": 442918400.0,
+            "1983": 442918400.0,
+            "1984": 442918400.0,
+            "1985": 442918400.0,
+            "1986": 442918400.0,
+            "1987": 442918400.0,
+            "1988": 442918400.0,
+            "1989": 442918400.0,
+            "1990": 442918400.0,
+            "1991": 442918400.0,
+            "1992": 442918400.0,
+            "1993": 442918400.0,
+            "1994": 442918400.0,
+            "1995": 442918400.0,
+            "1996": 442918400.0,
+            "1997": 442918400.0,
+            "1998": 442918400.0,
+            "1999": 442918400.0,
+            "2000": 442918400.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 2000,
+        "step_interval": 1,
+        "values": {
+            "1": 761183744.0,
+            "2": 849621504.0,
+            "3": 849621504.0,
+            "4": 849621504.0,
+            "5": 849621504.0,
+            "6": 849621504.0,
+            "7": 849621504.0,
+            "8": 849621504.0,
+            "9": 849621504.0,
+            "10": 849621504.0,
+            "11": 849621504.0,
+            "12": 849621504.0,
+            "13": 849621504.0,
+            "14": 849621504.0,
+            "15": 849621504.0,
+            "16": 849621504.0,
+            "17": 849621504.0,
+            "18": 849621504.0,
+            "19": 849621504.0,
+            "20": 849621504.0,
+            "21": 849621504.0,
+            "22": 849621504.0,
+            "23": 849621504.0,
+            "24": 849621504.0,
+            "25": 849621504.0,
+            "26": 849621504.0,
+            "27": 849621504.0,
+            "28": 849621504.0,
+            "29": 849621504.0,
+            "30": 849621504.0,
+            "31": 849621504.0,
+            "32": 849621504.0,
+            "33": 849621504.0,
+            "34": 849621504.0,
+            "35": 849621504.0,
+            "36": 849621504.0,
+            "37": 849621504.0,
+            "38": 849621504.0,
+            "39": 849621504.0,
+            "40": 849621504.0,
+            "41": 849621504.0,
+            "42": 849621504.0,
+            "43": 849621504.0,
+            "44": 849621504.0,
+            "45": 849621504.0,
+            "46": 849621504.0,
+            "47": 849621504.0,
+            "48": 849621504.0,
+            "49": 849621504.0,
+            "50": 849621504.0,
+            "51": 849621504.0,
+            "52": 849621504.0,
+            "53": 849621504.0,
+            "54": 849621504.0,
+            "55": 849621504.0,
+            "56": 849621504.0,
+            "57": 849621504.0,
+            "58": 849621504.0,
+            "59": 849621504.0,
+            "60": 849621504.0,
+            "61": 849621504.0,
+            "62": 849621504.0,
+            "63": 849621504.0,
+            "64": 849621504.0,
+            "65": 849621504.0,
+            "66": 849621504.0,
+            "67": 849621504.0,
+            "68": 849621504.0,
+            "69": 849621504.0,
+            "70": 849621504.0,
+            "71": 849621504.0,
+            "72": 849621504.0,
+            "73": 849621504.0,
+            "74": 849621504.0,
+            "75": 849621504.0,
+            "76": 849621504.0,
+            "77": 849621504.0,
+            "78": 849621504.0,
+            "79": 849621504.0,
+            "80": 849621504.0,
+            "81": 849621504.0,
+            "82": 849621504.0,
+            "83": 849621504.0,
+            "84": 849621504.0,
+            "85": 849621504.0,
+            "86": 849621504.0,
+            "87": 849621504.0,
+            "88": 849621504.0,
+            "89": 849621504.0,
+            "90": 849621504.0,
+            "91": 849621504.0,
+            "92": 849621504.0,
+            "93": 849621504.0,
+            "94": 849621504.0,
+            "95": 849621504.0,
+            "96": 849621504.0,
+            "97": 849621504.0,
+            "98": 849621504.0,
+            "99": 849621504.0,
+            "100": 849621504.0,
+            "101": 849621504.0,
+            "102": 849621504.0,
+            "103": 849621504.0,
+            "104": 849621504.0,
+            "105": 849621504.0,
+            "106": 849621504.0,
+            "107": 849621504.0,
+            "108": 849621504.0,
+            "109": 849621504.0,
+            "110": 849621504.0,
+            "111": 849621504.0,
+            "112": 849621504.0,
+            "113": 849621504.0,
+            "114": 849621504.0,
+            "115": 849621504.0,
+            "116": 849621504.0,
+            "117": 849621504.0,
+            "118": 849621504.0,
+            "119": 849621504.0,
+            "120": 849621504.0,
+            "121": 849621504.0,
+            "122": 849621504.0,
+            "123": 849621504.0,
+            "124": 849621504.0,
+            "125": 849621504.0,
+            "126": 849621504.0,
+            "127": 849621504.0,
+            "128": 849621504.0,
+            "129": 849621504.0,
+            "130": 849621504.0,
+            "131": 849621504.0,
+            "132": 849621504.0,
+            "133": 849621504.0,
+            "134": 849621504.0,
+            "135": 849621504.0,
+            "136": 849621504.0,
+            "137": 849621504.0,
+            "138": 849621504.0,
+            "139": 849621504.0,
+            "140": 849621504.0,
+            "141": 849621504.0,
+            "142": 849621504.0,
+            "143": 849621504.0,
+            "144": 849621504.0,
+            "145": 849621504.0,
+            "146": 849621504.0,
+            "147": 849621504.0,
+            "148": 849621504.0,
+            "149": 849621504.0,
+            "150": 849621504.0,
+            "151": 849621504.0,
+            "152": 849621504.0,
+            "153": 849621504.0,
+            "154": 849621504.0,
+            "155": 849621504.0,
+            "156": 849621504.0,
+            "157": 849621504.0,
+            "158": 849621504.0,
+            "159": 849621504.0,
+            "160": 849621504.0,
+            "161": 849621504.0,
+            "162": 849621504.0,
+            "163": 849621504.0,
+            "164": 849621504.0,
+            "165": 849621504.0,
+            "166": 849621504.0,
+            "167": 849621504.0,
+            "168": 849621504.0,
+            "169": 849621504.0,
+            "170": 849621504.0,
+            "171": 849621504.0,
+            "172": 849621504.0,
+            "173": 849621504.0,
+            "174": 849621504.0,
+            "175": 849621504.0,
+            "176": 849621504.0,
+            "177": 849621504.0,
+            "178": 849621504.0,
+            "179": 849621504.0,
+            "180": 849621504.0,
+            "181": 849621504.0,
+            "182": 849621504.0,
+            "183": 849621504.0,
+            "184": 849621504.0,
+            "185": 849621504.0,
+            "186": 849621504.0,
+            "187": 849621504.0,
+            "188": 849621504.0,
+            "189": 849621504.0,
+            "190": 849621504.0,
+            "191": 849621504.0,
+            "192": 849621504.0,
+            "193": 849621504.0,
+            "194": 849621504.0,
+            "195": 849621504.0,
+            "196": 849621504.0,
+            "197": 849621504.0,
+            "198": 849621504.0,
+            "199": 849621504.0,
+            "200": 849621504.0,
+            "201": 849621504.0,
+            "202": 849621504.0,
+            "203": 849621504.0,
+            "204": 849621504.0,
+            "205": 849621504.0,
+            "206": 849621504.0,
+            "207": 849621504.0,
+            "208": 849621504.0,
+            "209": 849621504.0,
+            "210": 849621504.0,
+            "211": 849621504.0,
+            "212": 849621504.0,
+            "213": 849621504.0,
+            "214": 849621504.0,
+            "215": 849621504.0,
+            "216": 849621504.0,
+            "217": 849621504.0,
+            "218": 849621504.0,
+            "219": 849621504.0,
+            "220": 849621504.0,
+            "221": 849621504.0,
+            "222": 849621504.0,
+            "223": 849621504.0,
+            "224": 849621504.0,
+            "225": 849621504.0,
+            "226": 849621504.0,
+            "227": 849621504.0,
+            "228": 849621504.0,
+            "229": 849621504.0,
+            "230": 849621504.0,
+            "231": 849621504.0,
+            "232": 849621504.0,
+            "233": 849621504.0,
+            "234": 849621504.0,
+            "235": 849621504.0,
+            "236": 849621504.0,
+            "237": 849621504.0,
+            "238": 849621504.0,
+            "239": 849621504.0,
+            "240": 849621504.0,
+            "241": 849621504.0,
+            "242": 849621504.0,
+            "243": 849621504.0,
+            "244": 849621504.0,
+            "245": 849621504.0,
+            "246": 849621504.0,
+            "247": 849621504.0,
+            "248": 849621504.0,
+            "249": 849621504.0,
+            "250": 849621504.0,
+            "251": 849621504.0,
+            "252": 849621504.0,
+            "253": 849621504.0,
+            "254": 849621504.0,
+            "255": 849621504.0,
+            "256": 849621504.0,
+            "257": 849621504.0,
+            "258": 849621504.0,
+            "259": 849621504.0,
+            "260": 849621504.0,
+            "261": 849621504.0,
+            "262": 849621504.0,
+            "263": 849621504.0,
+            "264": 849621504.0,
+            "265": 849621504.0,
+            "266": 849621504.0,
+            "267": 849621504.0,
+            "268": 849621504.0,
+            "269": 849621504.0,
+            "270": 849621504.0,
+            "271": 849621504.0,
+            "272": 849621504.0,
+            "273": 849621504.0,
+            "274": 849621504.0,
+            "275": 849621504.0,
+            "276": 849621504.0,
+            "277": 849621504.0,
+            "278": 849621504.0,
+            "279": 849621504.0,
+            "280": 849621504.0,
+            "281": 849621504.0,
+            "282": 849621504.0,
+            "283": 849621504.0,
+            "284": 849621504.0,
+            "285": 849621504.0,
+            "286": 849621504.0,
+            "287": 849621504.0,
+            "288": 849621504.0,
+            "289": 849621504.0,
+            "290": 849621504.0,
+            "291": 849621504.0,
+            "292": 849621504.0,
+            "293": 849621504.0,
+            "294": 849621504.0,
+            "295": 849621504.0,
+            "296": 849621504.0,
+            "297": 849621504.0,
+            "298": 849621504.0,
+            "299": 849621504.0,
+            "300": 849621504.0,
+            "301": 849621504.0,
+            "302": 849621504.0,
+            "303": 849621504.0,
+            "304": 849621504.0,
+            "305": 849621504.0,
+            "306": 849621504.0,
+            "307": 849621504.0,
+            "308": 849621504.0,
+            "309": 849621504.0,
+            "310": 849621504.0,
+            "311": 849621504.0,
+            "312": 849621504.0,
+            "313": 849621504.0,
+            "314": 849621504.0,
+            "315": 849621504.0,
+            "316": 849621504.0,
+            "317": 849621504.0,
+            "318": 849621504.0,
+            "319": 849621504.0,
+            "320": 849621504.0,
+            "321": 849621504.0,
+            "322": 849621504.0,
+            "323": 849621504.0,
+            "324": 849621504.0,
+            "325": 849621504.0,
+            "326": 849621504.0,
+            "327": 849621504.0,
+            "328": 849621504.0,
+            "329": 849621504.0,
+            "330": 849621504.0,
+            "331": 849621504.0,
+            "332": 849621504.0,
+            "333": 849621504.0,
+            "334": 849621504.0,
+            "335": 849621504.0,
+            "336": 849621504.0,
+            "337": 849621504.0,
+            "338": 849621504.0,
+            "339": 849621504.0,
+            "340": 849621504.0,
+            "341": 849621504.0,
+            "342": 849621504.0,
+            "343": 849621504.0,
+            "344": 849621504.0,
+            "345": 849621504.0,
+            "346": 849621504.0,
+            "347": 849621504.0,
+            "348": 849621504.0,
+            "349": 849621504.0,
+            "350": 849621504.0,
+            "351": 849621504.0,
+            "352": 849621504.0,
+            "353": 849621504.0,
+            "354": 849621504.0,
+            "355": 849621504.0,
+            "356": 849621504.0,
+            "357": 849621504.0,
+            "358": 849621504.0,
+            "359": 849621504.0,
+            "360": 849621504.0,
+            "361": 849621504.0,
+            "362": 849621504.0,
+            "363": 849621504.0,
+            "364": 849621504.0,
+            "365": 849621504.0,
+            "366": 849621504.0,
+            "367": 849621504.0,
+            "368": 849621504.0,
+            "369": 849621504.0,
+            "370": 849621504.0,
+            "371": 849621504.0,
+            "372": 849621504.0,
+            "373": 849621504.0,
+            "374": 849621504.0,
+            "375": 849621504.0,
+            "376": 849621504.0,
+            "377": 849621504.0,
+            "378": 849621504.0,
+            "379": 849621504.0,
+            "380": 849621504.0,
+            "381": 849621504.0,
+            "382": 849621504.0,
+            "383": 849621504.0,
+            "384": 849621504.0,
+            "385": 849621504.0,
+            "386": 849621504.0,
+            "387": 849621504.0,
+            "388": 849621504.0,
+            "389": 849621504.0,
+            "390": 849621504.0,
+            "391": 849621504.0,
+            "392": 849621504.0,
+            "393": 849621504.0,
+            "394": 849621504.0,
+            "395": 849621504.0,
+            "396": 849621504.0,
+            "397": 849621504.0,
+            "398": 849621504.0,
+            "399": 849621504.0,
+            "400": 849621504.0,
+            "401": 849621504.0,
+            "402": 849621504.0,
+            "403": 849621504.0,
+            "404": 849621504.0,
+            "405": 849621504.0,
+            "406": 849621504.0,
+            "407": 849621504.0,
+            "408": 849621504.0,
+            "409": 849621504.0,
+            "410": 849621504.0,
+            "411": 849621504.0,
+            "412": 849621504.0,
+            "413": 849621504.0,
+            "414": 849621504.0,
+            "415": 849621504.0,
+            "416": 849621504.0,
+            "417": 849621504.0,
+            "418": 849621504.0,
+            "419": 849621504.0,
+            "420": 849621504.0,
+            "421": 849621504.0,
+            "422": 849621504.0,
+            "423": 849621504.0,
+            "424": 849621504.0,
+            "425": 849621504.0,
+            "426": 849621504.0,
+            "427": 849621504.0,
+            "428": 849621504.0,
+            "429": 849621504.0,
+            "430": 849621504.0,
+            "431": 849621504.0,
+            "432": 849621504.0,
+            "433": 849621504.0,
+            "434": 849621504.0,
+            "435": 849621504.0,
+            "436": 849621504.0,
+            "437": 849621504.0,
+            "438": 849621504.0,
+            "439": 849621504.0,
+            "440": 849621504.0,
+            "441": 849621504.0,
+            "442": 849621504.0,
+            "443": 849621504.0,
+            "444": 849621504.0,
+            "445": 849621504.0,
+            "446": 849621504.0,
+            "447": 849621504.0,
+            "448": 849621504.0,
+            "449": 849621504.0,
+            "450": 849621504.0,
+            "451": 849621504.0,
+            "452": 849621504.0,
+            "453": 849621504.0,
+            "454": 849621504.0,
+            "455": 849621504.0,
+            "456": 849621504.0,
+            "457": 849621504.0,
+            "458": 849621504.0,
+            "459": 849621504.0,
+            "460": 849621504.0,
+            "461": 849621504.0,
+            "462": 849621504.0,
+            "463": 849621504.0,
+            "464": 849621504.0,
+            "465": 849621504.0,
+            "466": 849621504.0,
+            "467": 849621504.0,
+            "468": 849621504.0,
+            "469": 849621504.0,
+            "470": 849621504.0,
+            "471": 849621504.0,
+            "472": 849621504.0,
+            "473": 849621504.0,
+            "474": 849621504.0,
+            "475": 849621504.0,
+            "476": 849621504.0,
+            "477": 849621504.0,
+            "478": 849621504.0,
+            "479": 849621504.0,
+            "480": 849621504.0,
+            "481": 849621504.0,
+            "482": 849621504.0,
+            "483": 849621504.0,
+            "484": 849621504.0,
+            "485": 849621504.0,
+            "486": 849621504.0,
+            "487": 849621504.0,
+            "488": 849621504.0,
+            "489": 849621504.0,
+            "490": 849621504.0,
+            "491": 849621504.0,
+            "492": 849621504.0,
+            "493": 849621504.0,
+            "494": 849621504.0,
+            "495": 849621504.0,
+            "496": 849621504.0,
+            "497": 849621504.0,
+            "498": 849621504.0,
+            "499": 849621504.0,
+            "500": 849621504.0,
+            "501": 849621504.0,
+            "502": 849621504.0,
+            "503": 849621504.0,
+            "504": 849621504.0,
+            "505": 849621504.0,
+            "506": 849621504.0,
+            "507": 849621504.0,
+            "508": 849621504.0,
+            "509": 849621504.0,
+            "510": 849621504.0,
+            "511": 849621504.0,
+            "512": 849621504.0,
+            "513": 849621504.0,
+            "514": 849621504.0,
+            "515": 849621504.0,
+            "516": 849621504.0,
+            "517": 849621504.0,
+            "518": 849621504.0,
+            "519": 849621504.0,
+            "520": 849621504.0,
+            "521": 849621504.0,
+            "522": 849621504.0,
+            "523": 849621504.0,
+            "524": 849621504.0,
+            "525": 849621504.0,
+            "526": 849621504.0,
+            "527": 849621504.0,
+            "528": 849621504.0,
+            "529": 849621504.0,
+            "530": 849621504.0,
+            "531": 849621504.0,
+            "532": 849621504.0,
+            "533": 849621504.0,
+            "534": 849621504.0,
+            "535": 849621504.0,
+            "536": 849621504.0,
+            "537": 849621504.0,
+            "538": 849621504.0,
+            "539": 849621504.0,
+            "540": 849621504.0,
+            "541": 849621504.0,
+            "542": 849621504.0,
+            "543": 849621504.0,
+            "544": 849621504.0,
+            "545": 849621504.0,
+            "546": 849621504.0,
+            "547": 849621504.0,
+            "548": 849621504.0,
+            "549": 849621504.0,
+            "550": 849621504.0,
+            "551": 849621504.0,
+            "552": 849621504.0,
+            "553": 849621504.0,
+            "554": 849621504.0,
+            "555": 849621504.0,
+            "556": 849621504.0,
+            "557": 849621504.0,
+            "558": 849621504.0,
+            "559": 849621504.0,
+            "560": 849621504.0,
+            "561": 849621504.0,
+            "562": 849621504.0,
+            "563": 849621504.0,
+            "564": 849621504.0,
+            "565": 849621504.0,
+            "566": 849621504.0,
+            "567": 849621504.0,
+            "568": 849621504.0,
+            "569": 849621504.0,
+            "570": 849621504.0,
+            "571": 849621504.0,
+            "572": 849621504.0,
+            "573": 849621504.0,
+            "574": 849621504.0,
+            "575": 849621504.0,
+            "576": 849621504.0,
+            "577": 849621504.0,
+            "578": 849621504.0,
+            "579": 849621504.0,
+            "580": 849621504.0,
+            "581": 849621504.0,
+            "582": 849621504.0,
+            "583": 849621504.0,
+            "584": 849621504.0,
+            "585": 849621504.0,
+            "586": 849621504.0,
+            "587": 849621504.0,
+            "588": 849621504.0,
+            "589": 849621504.0,
+            "590": 849621504.0,
+            "591": 849621504.0,
+            "592": 849621504.0,
+            "593": 849621504.0,
+            "594": 849621504.0,
+            "595": 849621504.0,
+            "596": 849621504.0,
+            "597": 849621504.0,
+            "598": 849621504.0,
+            "599": 849621504.0,
+            "600": 849621504.0,
+            "601": 849621504.0,
+            "602": 849621504.0,
+            "603": 849621504.0,
+            "604": 849621504.0,
+            "605": 849621504.0,
+            "606": 849621504.0,
+            "607": 849621504.0,
+            "608": 849621504.0,
+            "609": 849621504.0,
+            "610": 849621504.0,
+            "611": 849621504.0,
+            "612": 849621504.0,
+            "613": 849621504.0,
+            "614": 849621504.0,
+            "615": 849621504.0,
+            "616": 849621504.0,
+            "617": 849621504.0,
+            "618": 849621504.0,
+            "619": 849621504.0,
+            "620": 849621504.0,
+            "621": 849621504.0,
+            "622": 849621504.0,
+            "623": 849621504.0,
+            "624": 849621504.0,
+            "625": 849621504.0,
+            "626": 849621504.0,
+            "627": 849621504.0,
+            "628": 849621504.0,
+            "629": 849621504.0,
+            "630": 849621504.0,
+            "631": 849621504.0,
+            "632": 849621504.0,
+            "633": 849621504.0,
+            "634": 849621504.0,
+            "635": 849621504.0,
+            "636": 849621504.0,
+            "637": 849621504.0,
+            "638": 849621504.0,
+            "639": 849621504.0,
+            "640": 849621504.0,
+            "641": 849621504.0,
+            "642": 849621504.0,
+            "643": 849621504.0,
+            "644": 849621504.0,
+            "645": 849621504.0,
+            "646": 849621504.0,
+            "647": 849621504.0,
+            "648": 849621504.0,
+            "649": 849621504.0,
+            "650": 849621504.0,
+            "651": 849621504.0,
+            "652": 849621504.0,
+            "653": 849621504.0,
+            "654": 849621504.0,
+            "655": 849621504.0,
+            "656": 849621504.0,
+            "657": 849621504.0,
+            "658": 849621504.0,
+            "659": 849621504.0,
+            "660": 849621504.0,
+            "661": 849621504.0,
+            "662": 849621504.0,
+            "663": 849621504.0,
+            "664": 849621504.0,
+            "665": 849621504.0,
+            "666": 849621504.0,
+            "667": 849621504.0,
+            "668": 849621504.0,
+            "669": 849621504.0,
+            "670": 849621504.0,
+            "671": 849621504.0,
+            "672": 849621504.0,
+            "673": 849621504.0,
+            "674": 849621504.0,
+            "675": 849621504.0,
+            "676": 849621504.0,
+            "677": 849621504.0,
+            "678": 849621504.0,
+            "679": 849621504.0,
+            "680": 849621504.0,
+            "681": 849621504.0,
+            "682": 849621504.0,
+            "683": 849621504.0,
+            "684": 849621504.0,
+            "685": 849621504.0,
+            "686": 849621504.0,
+            "687": 849621504.0,
+            "688": 849621504.0,
+            "689": 849621504.0,
+            "690": 849621504.0,
+            "691": 849621504.0,
+            "692": 849621504.0,
+            "693": 849621504.0,
+            "694": 849621504.0,
+            "695": 849621504.0,
+            "696": 849621504.0,
+            "697": 849621504.0,
+            "698": 849621504.0,
+            "699": 849621504.0,
+            "700": 849621504.0,
+            "701": 849621504.0,
+            "702": 849621504.0,
+            "703": 849621504.0,
+            "704": 849621504.0,
+            "705": 849621504.0,
+            "706": 849621504.0,
+            "707": 849621504.0,
+            "708": 849621504.0,
+            "709": 849621504.0,
+            "710": 849621504.0,
+            "711": 849621504.0,
+            "712": 849621504.0,
+            "713": 849621504.0,
+            "714": 849621504.0,
+            "715": 849621504.0,
+            "716": 849621504.0,
+            "717": 849621504.0,
+            "718": 849621504.0,
+            "719": 849621504.0,
+            "720": 849621504.0,
+            "721": 849621504.0,
+            "722": 849621504.0,
+            "723": 849621504.0,
+            "724": 849621504.0,
+            "725": 849621504.0,
+            "726": 849621504.0,
+            "727": 849621504.0,
+            "728": 849621504.0,
+            "729": 849621504.0,
+            "730": 849621504.0,
+            "731": 849621504.0,
+            "732": 849621504.0,
+            "733": 849621504.0,
+            "734": 849621504.0,
+            "735": 849621504.0,
+            "736": 849621504.0,
+            "737": 849621504.0,
+            "738": 849621504.0,
+            "739": 849621504.0,
+            "740": 849621504.0,
+            "741": 849621504.0,
+            "742": 849621504.0,
+            "743": 849621504.0,
+            "744": 849621504.0,
+            "745": 849621504.0,
+            "746": 849621504.0,
+            "747": 849621504.0,
+            "748": 849621504.0,
+            "749": 849621504.0,
+            "750": 849621504.0,
+            "751": 849621504.0,
+            "752": 849621504.0,
+            "753": 849621504.0,
+            "754": 849621504.0,
+            "755": 849621504.0,
+            "756": 849621504.0,
+            "757": 849621504.0,
+            "758": 849621504.0,
+            "759": 849621504.0,
+            "760": 849621504.0,
+            "761": 849621504.0,
+            "762": 849621504.0,
+            "763": 849621504.0,
+            "764": 849621504.0,
+            "765": 849621504.0,
+            "766": 849621504.0,
+            "767": 849621504.0,
+            "768": 849621504.0,
+            "769": 849621504.0,
+            "770": 849621504.0,
+            "771": 849621504.0,
+            "772": 849621504.0,
+            "773": 849621504.0,
+            "774": 849621504.0,
+            "775": 849621504.0,
+            "776": 849621504.0,
+            "777": 849621504.0,
+            "778": 849621504.0,
+            "779": 849621504.0,
+            "780": 849621504.0,
+            "781": 849621504.0,
+            "782": 849621504.0,
+            "783": 849621504.0,
+            "784": 849621504.0,
+            "785": 849621504.0,
+            "786": 849621504.0,
+            "787": 849621504.0,
+            "788": 849621504.0,
+            "789": 849621504.0,
+            "790": 849621504.0,
+            "791": 849621504.0,
+            "792": 849621504.0,
+            "793": 849621504.0,
+            "794": 849621504.0,
+            "795": 849621504.0,
+            "796": 849621504.0,
+            "797": 849621504.0,
+            "798": 849621504.0,
+            "799": 849621504.0,
+            "800": 849621504.0,
+            "801": 849621504.0,
+            "802": 849621504.0,
+            "803": 849621504.0,
+            "804": 849621504.0,
+            "805": 849621504.0,
+            "806": 849621504.0,
+            "807": 849621504.0,
+            "808": 849621504.0,
+            "809": 849621504.0,
+            "810": 849621504.0,
+            "811": 849621504.0,
+            "812": 849621504.0,
+            "813": 849621504.0,
+            "814": 849621504.0,
+            "815": 849621504.0,
+            "816": 849621504.0,
+            "817": 849621504.0,
+            "818": 849621504.0,
+            "819": 849621504.0,
+            "820": 849621504.0,
+            "821": 849621504.0,
+            "822": 849621504.0,
+            "823": 849621504.0,
+            "824": 849621504.0,
+            "825": 849621504.0,
+            "826": 849621504.0,
+            "827": 849621504.0,
+            "828": 849621504.0,
+            "829": 849621504.0,
+            "830": 849621504.0,
+            "831": 849621504.0,
+            "832": 849621504.0,
+            "833": 849621504.0,
+            "834": 849621504.0,
+            "835": 849621504.0,
+            "836": 849621504.0,
+            "837": 849621504.0,
+            "838": 849621504.0,
+            "839": 849621504.0,
+            "840": 849621504.0,
+            "841": 849621504.0,
+            "842": 849621504.0,
+            "843": 849621504.0,
+            "844": 849621504.0,
+            "845": 849621504.0,
+            "846": 849621504.0,
+            "847": 849621504.0,
+            "848": 849621504.0,
+            "849": 849621504.0,
+            "850": 849621504.0,
+            "851": 849621504.0,
+            "852": 849621504.0,
+            "853": 849621504.0,
+            "854": 849621504.0,
+            "855": 849621504.0,
+            "856": 849621504.0,
+            "857": 849621504.0,
+            "858": 849621504.0,
+            "859": 849621504.0,
+            "860": 849621504.0,
+            "861": 849621504.0,
+            "862": 849621504.0,
+            "863": 849621504.0,
+            "864": 849621504.0,
+            "865": 849621504.0,
+            "866": 849621504.0,
+            "867": 849621504.0,
+            "868": 849621504.0,
+            "869": 849621504.0,
+            "870": 849621504.0,
+            "871": 849621504.0,
+            "872": 849621504.0,
+            "873": 849621504.0,
+            "874": 849621504.0,
+            "875": 849621504.0,
+            "876": 849621504.0,
+            "877": 849621504.0,
+            "878": 849621504.0,
+            "879": 849621504.0,
+            "880": 849621504.0,
+            "881": 849621504.0,
+            "882": 849621504.0,
+            "883": 849621504.0,
+            "884": 849621504.0,
+            "885": 849621504.0,
+            "886": 849621504.0,
+            "887": 849621504.0,
+            "888": 849621504.0,
+            "889": 849621504.0,
+            "890": 849621504.0,
+            "891": 849621504.0,
+            "892": 849621504.0,
+            "893": 849621504.0,
+            "894": 849621504.0,
+            "895": 849621504.0,
+            "896": 849621504.0,
+            "897": 849621504.0,
+            "898": 849621504.0,
+            "899": 849621504.0,
+            "900": 849621504.0,
+            "901": 849621504.0,
+            "902": 849621504.0,
+            "903": 849621504.0,
+            "904": 849621504.0,
+            "905": 849621504.0,
+            "906": 849621504.0,
+            "907": 849621504.0,
+            "908": 849621504.0,
+            "909": 849621504.0,
+            "910": 849621504.0,
+            "911": 849621504.0,
+            "912": 849621504.0,
+            "913": 849621504.0,
+            "914": 849621504.0,
+            "915": 849621504.0,
+            "916": 849621504.0,
+            "917": 849621504.0,
+            "918": 849621504.0,
+            "919": 849621504.0,
+            "920": 849621504.0,
+            "921": 849621504.0,
+            "922": 849621504.0,
+            "923": 849621504.0,
+            "924": 849621504.0,
+            "925": 849621504.0,
+            "926": 849621504.0,
+            "927": 849621504.0,
+            "928": 849621504.0,
+            "929": 849621504.0,
+            "930": 849621504.0,
+            "931": 849621504.0,
+            "932": 849621504.0,
+            "933": 849621504.0,
+            "934": 849621504.0,
+            "935": 849621504.0,
+            "936": 849621504.0,
+            "937": 849621504.0,
+            "938": 849621504.0,
+            "939": 849621504.0,
+            "940": 849621504.0,
+            "941": 849621504.0,
+            "942": 849621504.0,
+            "943": 849621504.0,
+            "944": 849621504.0,
+            "945": 849621504.0,
+            "946": 849621504.0,
+            "947": 849621504.0,
+            "948": 849621504.0,
+            "949": 849621504.0,
+            "950": 849621504.0,
+            "951": 849621504.0,
+            "952": 849621504.0,
+            "953": 849621504.0,
+            "954": 849621504.0,
+            "955": 849621504.0,
+            "956": 849621504.0,
+            "957": 849621504.0,
+            "958": 849621504.0,
+            "959": 849621504.0,
+            "960": 849621504.0,
+            "961": 849621504.0,
+            "962": 849621504.0,
+            "963": 849621504.0,
+            "964": 849621504.0,
+            "965": 849621504.0,
+            "966": 849621504.0,
+            "967": 849621504.0,
+            "968": 849621504.0,
+            "969": 849621504.0,
+            "970": 849621504.0,
+            "971": 849621504.0,
+            "972": 849621504.0,
+            "973": 849621504.0,
+            "974": 849621504.0,
+            "975": 849621504.0,
+            "976": 849621504.0,
+            "977": 849621504.0,
+            "978": 849621504.0,
+            "979": 849621504.0,
+            "980": 849621504.0,
+            "981": 849621504.0,
+            "982": 849621504.0,
+            "983": 849621504.0,
+            "984": 849621504.0,
+            "985": 849621504.0,
+            "986": 849621504.0,
+            "987": 849621504.0,
+            "988": 849621504.0,
+            "989": 849621504.0,
+            "990": 849621504.0,
+            "991": 849621504.0,
+            "992": 849621504.0,
+            "993": 849621504.0,
+            "994": 849621504.0,
+            "995": 849621504.0,
+            "996": 849621504.0,
+            "997": 849621504.0,
+            "998": 849621504.0,
+            "999": 849621504.0,
+            "1000": 849621504.0,
+            "1001": 849621504.0,
+            "1002": 849621504.0,
+            "1003": 849621504.0,
+            "1004": 849621504.0,
+            "1005": 849621504.0,
+            "1006": 849621504.0,
+            "1007": 849621504.0,
+            "1008": 849621504.0,
+            "1009": 849621504.0,
+            "1010": 849621504.0,
+            "1011": 849621504.0,
+            "1012": 849621504.0,
+            "1013": 849621504.0,
+            "1014": 849621504.0,
+            "1015": 849621504.0,
+            "1016": 849621504.0,
+            "1017": 849621504.0,
+            "1018": 849621504.0,
+            "1019": 849621504.0,
+            "1020": 849621504.0,
+            "1021": 849621504.0,
+            "1022": 849621504.0,
+            "1023": 849621504.0,
+            "1024": 849621504.0,
+            "1025": 849621504.0,
+            "1026": 849621504.0,
+            "1027": 849621504.0,
+            "1028": 849621504.0,
+            "1029": 849621504.0,
+            "1030": 849621504.0,
+            "1031": 849621504.0,
+            "1032": 849621504.0,
+            "1033": 849621504.0,
+            "1034": 849621504.0,
+            "1035": 849621504.0,
+            "1036": 849621504.0,
+            "1037": 849621504.0,
+            "1038": 849621504.0,
+            "1039": 849621504.0,
+            "1040": 849621504.0,
+            "1041": 849621504.0,
+            "1042": 849621504.0,
+            "1043": 849621504.0,
+            "1044": 849621504.0,
+            "1045": 849621504.0,
+            "1046": 849621504.0,
+            "1047": 849621504.0,
+            "1048": 849621504.0,
+            "1049": 849621504.0,
+            "1050": 849621504.0,
+            "1051": 849621504.0,
+            "1052": 849621504.0,
+            "1053": 849621504.0,
+            "1054": 849621504.0,
+            "1055": 849621504.0,
+            "1056": 849621504.0,
+            "1057": 849621504.0,
+            "1058": 849621504.0,
+            "1059": 849621504.0,
+            "1060": 849621504.0,
+            "1061": 849621504.0,
+            "1062": 849621504.0,
+            "1063": 849621504.0,
+            "1064": 849621504.0,
+            "1065": 849621504.0,
+            "1066": 849621504.0,
+            "1067": 849621504.0,
+            "1068": 849621504.0,
+            "1069": 849621504.0,
+            "1070": 849621504.0,
+            "1071": 849621504.0,
+            "1072": 849621504.0,
+            "1073": 849621504.0,
+            "1074": 849621504.0,
+            "1075": 849621504.0,
+            "1076": 849621504.0,
+            "1077": 849621504.0,
+            "1078": 849621504.0,
+            "1079": 849621504.0,
+            "1080": 849621504.0,
+            "1081": 849621504.0,
+            "1082": 849621504.0,
+            "1083": 849621504.0,
+            "1084": 849621504.0,
+            "1085": 849621504.0,
+            "1086": 849621504.0,
+            "1087": 849621504.0,
+            "1088": 849621504.0,
+            "1089": 849621504.0,
+            "1090": 849621504.0,
+            "1091": 849621504.0,
+            "1092": 849621504.0,
+            "1093": 849621504.0,
+            "1094": 849621504.0,
+            "1095": 849621504.0,
+            "1096": 849621504.0,
+            "1097": 849621504.0,
+            "1098": 849621504.0,
+            "1099": 849621504.0,
+            "1100": 849621504.0,
+            "1101": 849621504.0,
+            "1102": 849621504.0,
+            "1103": 849621504.0,
+            "1104": 849621504.0,
+            "1105": 849621504.0,
+            "1106": 849621504.0,
+            "1107": 849621504.0,
+            "1108": 849621504.0,
+            "1109": 849621504.0,
+            "1110": 849621504.0,
+            "1111": 849621504.0,
+            "1112": 849621504.0,
+            "1113": 849621504.0,
+            "1114": 849621504.0,
+            "1115": 849621504.0,
+            "1116": 849621504.0,
+            "1117": 849621504.0,
+            "1118": 849621504.0,
+            "1119": 849621504.0,
+            "1120": 849621504.0,
+            "1121": 849621504.0,
+            "1122": 849621504.0,
+            "1123": 849621504.0,
+            "1124": 849621504.0,
+            "1125": 849621504.0,
+            "1126": 849621504.0,
+            "1127": 849621504.0,
+            "1128": 849621504.0,
+            "1129": 849621504.0,
+            "1130": 849621504.0,
+            "1131": 849621504.0,
+            "1132": 849621504.0,
+            "1133": 849621504.0,
+            "1134": 849621504.0,
+            "1135": 849621504.0,
+            "1136": 849621504.0,
+            "1137": 849621504.0,
+            "1138": 849621504.0,
+            "1139": 849621504.0,
+            "1140": 849621504.0,
+            "1141": 849621504.0,
+            "1142": 849621504.0,
+            "1143": 849621504.0,
+            "1144": 849621504.0,
+            "1145": 849621504.0,
+            "1146": 849621504.0,
+            "1147": 849621504.0,
+            "1148": 849621504.0,
+            "1149": 849621504.0,
+            "1150": 849621504.0,
+            "1151": 849621504.0,
+            "1152": 849621504.0,
+            "1153": 849621504.0,
+            "1154": 849621504.0,
+            "1155": 849621504.0,
+            "1156": 849621504.0,
+            "1157": 849621504.0,
+            "1158": 849621504.0,
+            "1159": 849621504.0,
+            "1160": 849621504.0,
+            "1161": 849621504.0,
+            "1162": 849621504.0,
+            "1163": 849621504.0,
+            "1164": 849621504.0,
+            "1165": 849621504.0,
+            "1166": 849621504.0,
+            "1167": 849621504.0,
+            "1168": 849621504.0,
+            "1169": 849621504.0,
+            "1170": 849621504.0,
+            "1171": 849621504.0,
+            "1172": 849621504.0,
+            "1173": 849621504.0,
+            "1174": 849621504.0,
+            "1175": 849621504.0,
+            "1176": 849621504.0,
+            "1177": 849621504.0,
+            "1178": 849621504.0,
+            "1179": 849621504.0,
+            "1180": 849621504.0,
+            "1181": 849621504.0,
+            "1182": 849621504.0,
+            "1183": 849621504.0,
+            "1184": 849621504.0,
+            "1185": 849621504.0,
+            "1186": 849621504.0,
+            "1187": 849621504.0,
+            "1188": 849621504.0,
+            "1189": 849621504.0,
+            "1190": 849621504.0,
+            "1191": 849621504.0,
+            "1192": 849621504.0,
+            "1193": 849621504.0,
+            "1194": 849621504.0,
+            "1195": 849621504.0,
+            "1196": 849621504.0,
+            "1197": 849621504.0,
+            "1198": 849621504.0,
+            "1199": 849621504.0,
+            "1200": 849621504.0,
+            "1201": 849621504.0,
+            "1202": 849621504.0,
+            "1203": 849621504.0,
+            "1204": 849621504.0,
+            "1205": 849621504.0,
+            "1206": 849621504.0,
+            "1207": 849621504.0,
+            "1208": 849621504.0,
+            "1209": 849621504.0,
+            "1210": 849621504.0,
+            "1211": 849621504.0,
+            "1212": 849621504.0,
+            "1213": 849621504.0,
+            "1214": 849621504.0,
+            "1215": 849621504.0,
+            "1216": 849621504.0,
+            "1217": 849621504.0,
+            "1218": 849621504.0,
+            "1219": 849621504.0,
+            "1220": 849621504.0,
+            "1221": 849621504.0,
+            "1222": 849621504.0,
+            "1223": 849621504.0,
+            "1224": 849621504.0,
+            "1225": 849621504.0,
+            "1226": 849621504.0,
+            "1227": 849621504.0,
+            "1228": 849621504.0,
+            "1229": 849621504.0,
+            "1230": 849621504.0,
+            "1231": 849621504.0,
+            "1232": 849621504.0,
+            "1233": 849621504.0,
+            "1234": 849621504.0,
+            "1235": 849621504.0,
+            "1236": 849621504.0,
+            "1237": 849621504.0,
+            "1238": 849621504.0,
+            "1239": 849621504.0,
+            "1240": 849621504.0,
+            "1241": 849621504.0,
+            "1242": 849621504.0,
+            "1243": 849621504.0,
+            "1244": 849621504.0,
+            "1245": 849621504.0,
+            "1246": 849621504.0,
+            "1247": 849621504.0,
+            "1248": 849621504.0,
+            "1249": 849621504.0,
+            "1250": 849621504.0,
+            "1251": 849621504.0,
+            "1252": 849621504.0,
+            "1253": 849621504.0,
+            "1254": 849621504.0,
+            "1255": 849621504.0,
+            "1256": 849621504.0,
+            "1257": 849621504.0,
+            "1258": 849621504.0,
+            "1259": 849621504.0,
+            "1260": 849621504.0,
+            "1261": 849621504.0,
+            "1262": 849621504.0,
+            "1263": 849621504.0,
+            "1264": 849621504.0,
+            "1265": 849621504.0,
+            "1266": 849621504.0,
+            "1267": 849621504.0,
+            "1268": 849621504.0,
+            "1269": 849621504.0,
+            "1270": 849621504.0,
+            "1271": 849621504.0,
+            "1272": 849621504.0,
+            "1273": 849621504.0,
+            "1274": 849621504.0,
+            "1275": 849621504.0,
+            "1276": 849621504.0,
+            "1277": 849621504.0,
+            "1278": 849621504.0,
+            "1279": 849621504.0,
+            "1280": 849621504.0,
+            "1281": 849621504.0,
+            "1282": 849621504.0,
+            "1283": 849621504.0,
+            "1284": 849621504.0,
+            "1285": 849621504.0,
+            "1286": 849621504.0,
+            "1287": 849621504.0,
+            "1288": 849621504.0,
+            "1289": 849621504.0,
+            "1290": 849621504.0,
+            "1291": 849621504.0,
+            "1292": 849621504.0,
+            "1293": 849621504.0,
+            "1294": 849621504.0,
+            "1295": 849621504.0,
+            "1296": 849621504.0,
+            "1297": 849621504.0,
+            "1298": 849621504.0,
+            "1299": 849621504.0,
+            "1300": 849621504.0,
+            "1301": 849621504.0,
+            "1302": 849621504.0,
+            "1303": 849621504.0,
+            "1304": 849621504.0,
+            "1305": 849621504.0,
+            "1306": 849621504.0,
+            "1307": 849621504.0,
+            "1308": 849621504.0,
+            "1309": 849621504.0,
+            "1310": 849621504.0,
+            "1311": 849621504.0,
+            "1312": 849621504.0,
+            "1313": 849621504.0,
+            "1314": 849621504.0,
+            "1315": 849621504.0,
+            "1316": 849621504.0,
+            "1317": 849621504.0,
+            "1318": 849621504.0,
+            "1319": 849621504.0,
+            "1320": 849621504.0,
+            "1321": 849621504.0,
+            "1322": 849621504.0,
+            "1323": 849621504.0,
+            "1324": 849621504.0,
+            "1325": 849621504.0,
+            "1326": 849621504.0,
+            "1327": 849621504.0,
+            "1328": 849621504.0,
+            "1329": 849621504.0,
+            "1330": 849621504.0,
+            "1331": 849621504.0,
+            "1332": 849621504.0,
+            "1333": 849621504.0,
+            "1334": 849621504.0,
+            "1335": 849621504.0,
+            "1336": 849621504.0,
+            "1337": 849621504.0,
+            "1338": 849621504.0,
+            "1339": 849621504.0,
+            "1340": 849621504.0,
+            "1341": 849621504.0,
+            "1342": 849621504.0,
+            "1343": 849621504.0,
+            "1344": 849621504.0,
+            "1345": 849621504.0,
+            "1346": 849621504.0,
+            "1347": 849621504.0,
+            "1348": 849621504.0,
+            "1349": 849621504.0,
+            "1350": 849621504.0,
+            "1351": 849621504.0,
+            "1352": 849621504.0,
+            "1353": 849621504.0,
+            "1354": 849621504.0,
+            "1355": 849621504.0,
+            "1356": 849621504.0,
+            "1357": 849621504.0,
+            "1358": 849621504.0,
+            "1359": 849621504.0,
+            "1360": 849621504.0,
+            "1361": 849621504.0,
+            "1362": 849621504.0,
+            "1363": 849621504.0,
+            "1364": 849621504.0,
+            "1365": 849621504.0,
+            "1366": 849621504.0,
+            "1367": 849621504.0,
+            "1368": 849621504.0,
+            "1369": 849621504.0,
+            "1370": 849621504.0,
+            "1371": 849621504.0,
+            "1372": 849621504.0,
+            "1373": 849621504.0,
+            "1374": 849621504.0,
+            "1375": 849621504.0,
+            "1376": 849621504.0,
+            "1377": 849621504.0,
+            "1378": 849621504.0,
+            "1379": 849621504.0,
+            "1380": 849621504.0,
+            "1381": 849621504.0,
+            "1382": 849621504.0,
+            "1383": 849621504.0,
+            "1384": 849621504.0,
+            "1385": 849621504.0,
+            "1386": 849621504.0,
+            "1387": 849621504.0,
+            "1388": 849621504.0,
+            "1389": 849621504.0,
+            "1390": 849621504.0,
+            "1391": 849621504.0,
+            "1392": 849621504.0,
+            "1393": 849621504.0,
+            "1394": 849621504.0,
+            "1395": 849621504.0,
+            "1396": 849621504.0,
+            "1397": 849621504.0,
+            "1398": 849621504.0,
+            "1399": 849621504.0,
+            "1400": 849621504.0,
+            "1401": 849621504.0,
+            "1402": 849621504.0,
+            "1403": 849621504.0,
+            "1404": 849621504.0,
+            "1405": 849621504.0,
+            "1406": 849621504.0,
+            "1407": 849621504.0,
+            "1408": 849621504.0,
+            "1409": 849621504.0,
+            "1410": 849621504.0,
+            "1411": 849621504.0,
+            "1412": 849621504.0,
+            "1413": 849621504.0,
+            "1414": 849621504.0,
+            "1415": 849621504.0,
+            "1416": 849621504.0,
+            "1417": 849621504.0,
+            "1418": 849621504.0,
+            "1419": 849621504.0,
+            "1420": 849621504.0,
+            "1421": 849621504.0,
+            "1422": 849621504.0,
+            "1423": 849621504.0,
+            "1424": 849621504.0,
+            "1425": 849621504.0,
+            "1426": 849621504.0,
+            "1427": 849621504.0,
+            "1428": 849621504.0,
+            "1429": 849621504.0,
+            "1430": 849621504.0,
+            "1431": 849621504.0,
+            "1432": 849621504.0,
+            "1433": 849621504.0,
+            "1434": 849621504.0,
+            "1435": 849621504.0,
+            "1436": 849621504.0,
+            "1437": 849621504.0,
+            "1438": 849621504.0,
+            "1439": 849621504.0,
+            "1440": 849621504.0,
+            "1441": 849621504.0,
+            "1442": 849621504.0,
+            "1443": 849621504.0,
+            "1444": 849621504.0,
+            "1445": 849621504.0,
+            "1446": 849621504.0,
+            "1447": 849621504.0,
+            "1448": 849621504.0,
+            "1449": 849621504.0,
+            "1450": 849621504.0,
+            "1451": 849621504.0,
+            "1452": 849621504.0,
+            "1453": 849621504.0,
+            "1454": 849621504.0,
+            "1455": 849621504.0,
+            "1456": 849621504.0,
+            "1457": 849621504.0,
+            "1458": 849621504.0,
+            "1459": 849621504.0,
+            "1460": 849621504.0,
+            "1461": 849621504.0,
+            "1462": 849621504.0,
+            "1463": 849621504.0,
+            "1464": 849621504.0,
+            "1465": 849621504.0,
+            "1466": 849621504.0,
+            "1467": 849621504.0,
+            "1468": 849621504.0,
+            "1469": 849621504.0,
+            "1470": 849621504.0,
+            "1471": 849621504.0,
+            "1472": 849621504.0,
+            "1473": 849621504.0,
+            "1474": 849621504.0,
+            "1475": 849621504.0,
+            "1476": 849621504.0,
+            "1477": 849621504.0,
+            "1478": 849621504.0,
+            "1479": 849621504.0,
+            "1480": 849621504.0,
+            "1481": 849621504.0,
+            "1482": 849621504.0,
+            "1483": 849621504.0,
+            "1484": 849621504.0,
+            "1485": 849621504.0,
+            "1486": 849621504.0,
+            "1487": 849621504.0,
+            "1488": 849621504.0,
+            "1489": 849621504.0,
+            "1490": 849621504.0,
+            "1491": 849621504.0,
+            "1492": 849621504.0,
+            "1493": 849621504.0,
+            "1494": 849621504.0,
+            "1495": 849621504.0,
+            "1496": 849621504.0,
+            "1497": 849621504.0,
+            "1498": 849621504.0,
+            "1499": 849621504.0,
+            "1500": 849621504.0,
+            "1501": 849621504.0,
+            "1502": 849621504.0,
+            "1503": 849621504.0,
+            "1504": 849621504.0,
+            "1505": 849621504.0,
+            "1506": 849621504.0,
+            "1507": 849621504.0,
+            "1508": 849621504.0,
+            "1509": 849621504.0,
+            "1510": 849621504.0,
+            "1511": 849621504.0,
+            "1512": 849621504.0,
+            "1513": 849621504.0,
+            "1514": 849621504.0,
+            "1515": 849621504.0,
+            "1516": 849621504.0,
+            "1517": 849621504.0,
+            "1518": 849621504.0,
+            "1519": 849621504.0,
+            "1520": 849621504.0,
+            "1521": 849621504.0,
+            "1522": 849621504.0,
+            "1523": 849621504.0,
+            "1524": 849621504.0,
+            "1525": 849621504.0,
+            "1526": 849621504.0,
+            "1527": 849621504.0,
+            "1528": 849621504.0,
+            "1529": 849621504.0,
+            "1530": 849621504.0,
+            "1531": 849621504.0,
+            "1532": 849621504.0,
+            "1533": 849621504.0,
+            "1534": 849621504.0,
+            "1535": 849621504.0,
+            "1536": 849621504.0,
+            "1537": 849621504.0,
+            "1538": 849621504.0,
+            "1539": 849621504.0,
+            "1540": 849621504.0,
+            "1541": 849621504.0,
+            "1542": 849621504.0,
+            "1543": 849621504.0,
+            "1544": 849621504.0,
+            "1545": 849621504.0,
+            "1546": 849621504.0,
+            "1547": 849621504.0,
+            "1548": 849621504.0,
+            "1549": 849621504.0,
+            "1550": 849621504.0,
+            "1551": 849621504.0,
+            "1552": 849621504.0,
+            "1553": 849621504.0,
+            "1554": 849621504.0,
+            "1555": 849621504.0,
+            "1556": 849621504.0,
+            "1557": 849621504.0,
+            "1558": 849621504.0,
+            "1559": 849621504.0,
+            "1560": 849621504.0,
+            "1561": 849621504.0,
+            "1562": 849621504.0,
+            "1563": 849621504.0,
+            "1564": 849621504.0,
+            "1565": 849621504.0,
+            "1566": 849621504.0,
+            "1567": 849621504.0,
+            "1568": 849621504.0,
+            "1569": 849621504.0,
+            "1570": 849621504.0,
+            "1571": 849621504.0,
+            "1572": 849621504.0,
+            "1573": 849621504.0,
+            "1574": 849621504.0,
+            "1575": 849621504.0,
+            "1576": 849621504.0,
+            "1577": 849621504.0,
+            "1578": 849621504.0,
+            "1579": 849621504.0,
+            "1580": 849621504.0,
+            "1581": 849621504.0,
+            "1582": 849621504.0,
+            "1583": 849621504.0,
+            "1584": 849621504.0,
+            "1585": 849621504.0,
+            "1586": 849621504.0,
+            "1587": 849621504.0,
+            "1588": 849621504.0,
+            "1589": 849621504.0,
+            "1590": 849621504.0,
+            "1591": 849621504.0,
+            "1592": 849621504.0,
+            "1593": 849621504.0,
+            "1594": 849621504.0,
+            "1595": 849621504.0,
+            "1596": 849621504.0,
+            "1597": 849621504.0,
+            "1598": 849621504.0,
+            "1599": 849621504.0,
+            "1600": 849621504.0,
+            "1601": 849621504.0,
+            "1602": 849621504.0,
+            "1603": 849621504.0,
+            "1604": 849621504.0,
+            "1605": 849621504.0,
+            "1606": 849621504.0,
+            "1607": 849621504.0,
+            "1608": 849621504.0,
+            "1609": 849621504.0,
+            "1610": 849621504.0,
+            "1611": 849621504.0,
+            "1612": 849621504.0,
+            "1613": 849621504.0,
+            "1614": 849621504.0,
+            "1615": 849621504.0,
+            "1616": 849621504.0,
+            "1617": 849621504.0,
+            "1618": 849621504.0,
+            "1619": 849621504.0,
+            "1620": 849621504.0,
+            "1621": 849621504.0,
+            "1622": 849621504.0,
+            "1623": 849621504.0,
+            "1624": 849621504.0,
+            "1625": 849621504.0,
+            "1626": 849621504.0,
+            "1627": 849621504.0,
+            "1628": 849621504.0,
+            "1629": 849621504.0,
+            "1630": 849621504.0,
+            "1631": 849621504.0,
+            "1632": 849621504.0,
+            "1633": 849621504.0,
+            "1634": 849621504.0,
+            "1635": 849621504.0,
+            "1636": 849621504.0,
+            "1637": 849621504.0,
+            "1638": 849621504.0,
+            "1639": 849621504.0,
+            "1640": 849621504.0,
+            "1641": 849621504.0,
+            "1642": 849621504.0,
+            "1643": 849621504.0,
+            "1644": 849621504.0,
+            "1645": 849621504.0,
+            "1646": 849621504.0,
+            "1647": 849621504.0,
+            "1648": 849621504.0,
+            "1649": 849621504.0,
+            "1650": 849621504.0,
+            "1651": 849621504.0,
+            "1652": 849621504.0,
+            "1653": 849621504.0,
+            "1654": 849621504.0,
+            "1655": 849621504.0,
+            "1656": 849621504.0,
+            "1657": 849621504.0,
+            "1658": 849621504.0,
+            "1659": 849621504.0,
+            "1660": 849621504.0,
+            "1661": 849621504.0,
+            "1662": 849621504.0,
+            "1663": 849621504.0,
+            "1664": 849621504.0,
+            "1665": 849621504.0,
+            "1666": 849621504.0,
+            "1667": 849621504.0,
+            "1668": 849621504.0,
+            "1669": 849621504.0,
+            "1670": 849621504.0,
+            "1671": 849621504.0,
+            "1672": 849621504.0,
+            "1673": 849621504.0,
+            "1674": 849621504.0,
+            "1675": 849621504.0,
+            "1676": 849621504.0,
+            "1677": 849621504.0,
+            "1678": 849621504.0,
+            "1679": 849621504.0,
+            "1680": 849621504.0,
+            "1681": 849621504.0,
+            "1682": 849621504.0,
+            "1683": 849621504.0,
+            "1684": 849621504.0,
+            "1685": 849621504.0,
+            "1686": 849621504.0,
+            "1687": 849621504.0,
+            "1688": 849621504.0,
+            "1689": 849621504.0,
+            "1690": 849621504.0,
+            "1691": 849621504.0,
+            "1692": 849621504.0,
+            "1693": 849621504.0,
+            "1694": 849621504.0,
+            "1695": 849621504.0,
+            "1696": 849621504.0,
+            "1697": 849621504.0,
+            "1698": 849621504.0,
+            "1699": 849621504.0,
+            "1700": 849621504.0,
+            "1701": 849621504.0,
+            "1702": 849621504.0,
+            "1703": 849621504.0,
+            "1704": 849621504.0,
+            "1705": 849621504.0,
+            "1706": 849621504.0,
+            "1707": 849621504.0,
+            "1708": 849621504.0,
+            "1709": 849621504.0,
+            "1710": 849621504.0,
+            "1711": 849621504.0,
+            "1712": 849621504.0,
+            "1713": 849621504.0,
+            "1714": 849621504.0,
+            "1715": 849621504.0,
+            "1716": 849621504.0,
+            "1717": 849621504.0,
+            "1718": 849621504.0,
+            "1719": 849621504.0,
+            "1720": 849621504.0,
+            "1721": 849621504.0,
+            "1722": 849621504.0,
+            "1723": 849621504.0,
+            "1724": 849621504.0,
+            "1725": 849621504.0,
+            "1726": 849621504.0,
+            "1727": 849621504.0,
+            "1728": 849621504.0,
+            "1729": 849621504.0,
+            "1730": 849621504.0,
+            "1731": 849621504.0,
+            "1732": 849621504.0,
+            "1733": 849621504.0,
+            "1734": 849621504.0,
+            "1735": 849621504.0,
+            "1736": 849621504.0,
+            "1737": 849621504.0,
+            "1738": 849621504.0,
+            "1739": 849621504.0,
+            "1740": 849621504.0,
+            "1741": 849621504.0,
+            "1742": 849621504.0,
+            "1743": 849621504.0,
+            "1744": 849621504.0,
+            "1745": 849621504.0,
+            "1746": 849621504.0,
+            "1747": 849621504.0,
+            "1748": 849621504.0,
+            "1749": 849621504.0,
+            "1750": 849621504.0,
+            "1751": 849621504.0,
+            "1752": 849621504.0,
+            "1753": 849621504.0,
+            "1754": 849621504.0,
+            "1755": 849621504.0,
+            "1756": 849621504.0,
+            "1757": 849621504.0,
+            "1758": 849621504.0,
+            "1759": 849621504.0,
+            "1760": 849621504.0,
+            "1761": 849621504.0,
+            "1762": 849621504.0,
+            "1763": 849621504.0,
+            "1764": 849621504.0,
+            "1765": 849621504.0,
+            "1766": 849621504.0,
+            "1767": 849621504.0,
+            "1768": 849621504.0,
+            "1769": 849621504.0,
+            "1770": 849621504.0,
+            "1771": 849621504.0,
+            "1772": 849621504.0,
+            "1773": 849621504.0,
+            "1774": 849621504.0,
+            "1775": 849621504.0,
+            "1776": 849621504.0,
+            "1777": 849621504.0,
+            "1778": 849621504.0,
+            "1779": 849621504.0,
+            "1780": 849621504.0,
+            "1781": 849621504.0,
+            "1782": 849621504.0,
+            "1783": 849621504.0,
+            "1784": 849621504.0,
+            "1785": 849621504.0,
+            "1786": 849621504.0,
+            "1787": 849621504.0,
+            "1788": 849621504.0,
+            "1789": 849621504.0,
+            "1790": 849621504.0,
+            "1791": 849621504.0,
+            "1792": 849621504.0,
+            "1793": 849621504.0,
+            "1794": 849621504.0,
+            "1795": 849621504.0,
+            "1796": 849621504.0,
+            "1797": 849621504.0,
+            "1798": 849621504.0,
+            "1799": 849621504.0,
+            "1800": 849621504.0,
+            "1801": 849621504.0,
+            "1802": 849621504.0,
+            "1803": 849621504.0,
+            "1804": 849621504.0,
+            "1805": 849621504.0,
+            "1806": 849621504.0,
+            "1807": 849621504.0,
+            "1808": 849621504.0,
+            "1809": 849621504.0,
+            "1810": 849621504.0,
+            "1811": 849621504.0,
+            "1812": 849621504.0,
+            "1813": 849621504.0,
+            "1814": 849621504.0,
+            "1815": 849621504.0,
+            "1816": 849621504.0,
+            "1817": 849621504.0,
+            "1818": 849621504.0,
+            "1819": 849621504.0,
+            "1820": 849621504.0,
+            "1821": 849621504.0,
+            "1822": 849621504.0,
+            "1823": 849621504.0,
+            "1824": 849621504.0,
+            "1825": 849621504.0,
+            "1826": 849621504.0,
+            "1827": 849621504.0,
+            "1828": 849621504.0,
+            "1829": 849621504.0,
+            "1830": 849621504.0,
+            "1831": 849621504.0,
+            "1832": 849621504.0,
+            "1833": 849621504.0,
+            "1834": 849621504.0,
+            "1835": 849621504.0,
+            "1836": 849621504.0,
+            "1837": 849621504.0,
+            "1838": 849621504.0,
+            "1839": 849621504.0,
+            "1840": 849621504.0,
+            "1841": 849621504.0,
+            "1842": 849621504.0,
+            "1843": 849621504.0,
+            "1844": 849621504.0,
+            "1845": 849621504.0,
+            "1846": 849621504.0,
+            "1847": 849621504.0,
+            "1848": 849621504.0,
+            "1849": 849621504.0,
+            "1850": 849621504.0,
+            "1851": 849621504.0,
+            "1852": 849621504.0,
+            "1853": 849621504.0,
+            "1854": 849621504.0,
+            "1855": 849621504.0,
+            "1856": 849621504.0,
+            "1857": 849621504.0,
+            "1858": 849621504.0,
+            "1859": 849621504.0,
+            "1860": 849621504.0,
+            "1861": 849621504.0,
+            "1862": 849621504.0,
+            "1863": 849621504.0,
+            "1864": 849621504.0,
+            "1865": 849621504.0,
+            "1866": 849621504.0,
+            "1867": 849621504.0,
+            "1868": 849621504.0,
+            "1869": 849621504.0,
+            "1870": 849621504.0,
+            "1871": 849621504.0,
+            "1872": 849621504.0,
+            "1873": 849621504.0,
+            "1874": 849621504.0,
+            "1875": 849621504.0,
+            "1876": 849621504.0,
+            "1877": 849621504.0,
+            "1878": 849621504.0,
+            "1879": 849621504.0,
+            "1880": 849621504.0,
+            "1881": 849621504.0,
+            "1882": 849621504.0,
+            "1883": 849621504.0,
+            "1884": 849621504.0,
+            "1885": 849621504.0,
+            "1886": 849621504.0,
+            "1887": 849621504.0,
+            "1888": 849621504.0,
+            "1889": 849621504.0,
+            "1890": 849621504.0,
+            "1891": 849621504.0,
+            "1892": 849621504.0,
+            "1893": 849621504.0,
+            "1894": 849621504.0,
+            "1895": 849621504.0,
+            "1896": 849621504.0,
+            "1897": 849621504.0,
+            "1898": 849621504.0,
+            "1899": 849621504.0,
+            "1900": 849621504.0,
+            "1901": 849621504.0,
+            "1902": 849621504.0,
+            "1903": 849621504.0,
+            "1904": 849621504.0,
+            "1905": 849621504.0,
+            "1906": 849621504.0,
+            "1907": 849621504.0,
+            "1908": 849621504.0,
+            "1909": 849621504.0,
+            "1910": 849621504.0,
+            "1911": 849621504.0,
+            "1912": 849621504.0,
+            "1913": 849621504.0,
+            "1914": 849621504.0,
+            "1915": 849621504.0,
+            "1916": 849621504.0,
+            "1917": 849621504.0,
+            "1918": 849621504.0,
+            "1919": 849621504.0,
+            "1920": 849621504.0,
+            "1921": 849621504.0,
+            "1922": 849621504.0,
+            "1923": 849621504.0,
+            "1924": 849621504.0,
+            "1925": 849621504.0,
+            "1926": 849621504.0,
+            "1927": 849621504.0,
+            "1928": 849621504.0,
+            "1929": 849621504.0,
+            "1930": 849621504.0,
+            "1931": 849621504.0,
+            "1932": 849621504.0,
+            "1933": 849621504.0,
+            "1934": 849621504.0,
+            "1935": 849621504.0,
+            "1936": 849621504.0,
+            "1937": 849621504.0,
+            "1938": 849621504.0,
+            "1939": 849621504.0,
+            "1940": 849621504.0,
+            "1941": 849621504.0,
+            "1942": 849621504.0,
+            "1943": 849621504.0,
+            "1944": 849621504.0,
+            "1945": 849621504.0,
+            "1946": 849621504.0,
+            "1947": 849621504.0,
+            "1948": 849621504.0,
+            "1949": 849621504.0,
+            "1950": 849621504.0,
+            "1951": 849621504.0,
+            "1952": 849621504.0,
+            "1953": 849621504.0,
+            "1954": 849621504.0,
+            "1955": 849621504.0,
+            "1956": 849621504.0,
+            "1957": 849621504.0,
+            "1958": 849621504.0,
+            "1959": 849621504.0,
+            "1960": 849621504.0,
+            "1961": 849621504.0,
+            "1962": 849621504.0,
+            "1963": 849621504.0,
+            "1964": 849621504.0,
+            "1965": 849621504.0,
+            "1966": 849621504.0,
+            "1967": 849621504.0,
+            "1968": 849621504.0,
+            "1969": 849621504.0,
+            "1970": 849621504.0,
+            "1971": 849621504.0,
+            "1972": 849621504.0,
+            "1973": 849621504.0,
+            "1974": 849621504.0,
+            "1975": 849621504.0,
+            "1976": 849621504.0,
+            "1977": 849621504.0,
+            "1978": 849621504.0,
+            "1979": 849621504.0,
+            "1980": 849621504.0,
+            "1981": 849621504.0,
+            "1982": 849621504.0,
+            "1983": 849621504.0,
+            "1984": 849621504.0,
+            "1985": 849621504.0,
+            "1986": 849621504.0,
+            "1987": 849621504.0,
+            "1988": 849621504.0,
+            "1989": 849621504.0,
+            "1990": 849621504.0,
+            "1991": 849621504.0,
+            "1992": 849621504.0,
+            "1993": 849621504.0,
+            "1994": 849621504.0,
+            "1995": 849621504.0,
+            "1996": 849621504.0,
+            "1997": 849621504.0,
+            "1998": 849621504.0,
+            "1999": 849621504.0,
+            "2000": 849621504.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 2000,
+        "step_interval": 1,
+        "values": {
+            "1": 14.94115,
+            "2": 1.30868,
+            "3": 1.13391,
+            "4": 1.12792,
+            "5": 1.13103,
+            "6": 1.1383,
+            "7": 1.13573,
+            "8": 1.15789,
+            "9": 1.12704,
+            "10": 1.1241,
+            "11": 1.12786,
+            "12": 1.1288,
+            "13": 1.1399,
+            "14": 1.13165,
+            "15": 1.12333,
+            "16": 1.12398,
+            "17": 1.12493,
+            "18": 1.11586,
+            "19": 1.1123,
+            "20": 1.11192,
+            "21": 1.1266,
+            "22": 1.13629,
+            "23": 1.13171,
+            "24": 1.14969,
+            "25": 1.17022,
+            "26": 1.14634,
+            "27": 1.14242,
+            "28": 1.14353,
+            "29": 1.14554,
+            "30": 1.28826,
+            "31": 1.14265,
+            "32": 1.14023,
+            "33": 1.15286,
+            "34": 1.14975,
+            "35": 1.13988,
+            "36": 1.62757,
+            "37": 2.22703,
+            "38": 1.36074,
+            "39": 1.1325,
+            "40": 1.14106,
+            "41": 1.14114,
+            "42": 1.13305,
+            "43": 1.12375,
+            "44": 1.12631,
+            "45": 1.12358,
+            "46": 1.12334,
+            "47": 1.12398,
+            "48": 1.12749,
+            "49": 1.13897,
+            "50": 1.13563,
+            "51": 1.13628,
+            "52": 1.12935,
+            "53": 1.12779,
+            "54": 1.13147,
+            "55": 1.1279,
+            "56": 1.12777,
+            "57": 1.1269,
+            "58": 1.13989,
+            "59": 1.13378,
+            "60": 1.13552,
+            "61": 1.12879,
+            "62": 1.4796,
+            "63": 1.12843,
+            "64": 1.12488,
+            "65": 1.12888,
+            "66": 1.14028,
+            "67": 1.13532,
+            "68": 1.13278,
+            "69": 1.12779,
+            "70": 1.12468,
+            "71": 1.12483,
+            "72": 1.12423,
+            "73": 1.12335,
+            "74": 1.12699,
+            "75": 1.13379,
+            "76": 1.13001,
+            "77": 1.12994,
+            "78": 1.13166,
+            "79": 1.12415,
+            "80": 1.126,
+            "81": 1.16016,
+            "82": 1.13845,
+            "83": 1.13882,
+            "84": 1.14455,
+            "85": 1.46908,
+            "86": 1.1259,
+            "87": 1.12119,
+            "88": 1.12312,
+            "89": 1.12593,
+            "90": 1.51995,
+            "91": 1.16022,
+            "92": 1.1304,
+            "93": 1.13161,
+            "94": 1.13511,
+            "95": 1.13911,
+            "96": 1.80205,
+            "97": 1.13368,
+            "98": 1.13335,
+            "99": 1.13549,
+            "100": 1.13409,
+            "101": 1.13703,
+            "102": 1.14592,
+            "103": 1.13516,
+            "104": 1.13661,
+            "105": 1.13299,
+            "106": 1.13577,
+            "107": 1.13657,
+            "108": 1.13144,
+            "109": 1.14828,
+            "110": 1.15036,
+            "111": 1.1486,
+            "112": 1.14183,
+            "113": 1.14297,
+            "114": 1.1411,
+            "115": 1.14318,
+            "116": 1.14291,
+            "117": 1.14168,
+            "118": 1.15055,
+            "119": 1.1482,
+            "120": 1.15352,
+            "121": 1.13046,
+            "122": 1.145,
+            "123": 1.14278,
+            "124": 1.1428,
+            "125": 1.14189,
+            "126": 1.13609,
+            "127": 1.14025,
+            "128": 1.14097,
+            "129": 1.13489,
+            "130": 1.13417,
+            "131": 1.13581,
+            "132": 1.13708,
+            "133": 1.17896,
+            "134": 1.13176,
+            "135": 1.12984,
+            "136": 1.1435,
+            "137": 1.15088,
+            "138": 1.14391,
+            "139": 1.14409,
+            "140": 1.14238,
+            "141": 1.14313,
+            "142": 1.1493,
+            "143": 1.13518,
+            "144": 1.13229,
+            "145": 1.13749,
+            "146": 1.15049,
+            "147": 1.16077,
+            "148": 1.14254,
+            "149": 1.14071,
+            "150": 1.14075,
+            "151": 1.13943,
+            "152": 1.15276,
+            "153": 1.15369,
+            "154": 1.14618,
+            "155": 1.14225,
+            "156": 1.14285,
+            "157": 1.14106,
+            "158": 1.14415,
+            "159": 1.14445,
+            "160": 1.14934,
+            "161": 1.14229,
+            "162": 1.14167,
+            "163": 1.14058,
+            "164": 1.14064,
+            "165": 1.14012,
+            "166": 1.15198,
+            "167": 1.15221,
+            "168": 1.1471,
+            "169": 1.14122,
+            "170": 1.14769,
+            "171": 1.14073,
+            "172": 1.14205,
+            "173": 1.14583,
+            "174": 1.14217,
+            "175": 1.14015,
+            "176": 1.14319,
+            "177": 1.14097,
+            "178": 1.14115,
+            "179": 1.14122,
+            "180": 1.15137,
+            "181": 1.14856,
+            "182": 1.15203,
+            "183": 1.14535,
+            "184": 1.13997,
+            "185": 1.15174,
+            "186": 1.18192,
+            "187": 1.14929,
+            "188": 1.14842,
+            "189": 1.14724,
+            "190": 1.14922,
+            "191": 1.14932,
+            "192": 1.14856,
+            "193": 1.1562,
+            "194": 1.153,
+            "195": 1.16371,
+            "196": 1.14525,
+            "197": 1.1411,
+            "198": 1.14592,
+            "199": 1.14301,
+            "200": 1.15088,
+            "201": 1.14229,
+            "202": 1.14171,
+            "203": 1.14083,
+            "204": 1.13968,
+            "205": 1.13977,
+            "206": 1.14177,
+            "207": 1.15548,
+            "208": 1.15609,
+            "209": 1.14509,
+            "210": 1.1487,
+            "211": 1.14163,
+            "212": 1.13971,
+            "213": 1.15326,
+            "214": 1.14129,
+            "215": 1.14055,
+            "216": 1.13893,
+            "217": 1.14191,
+            "218": 1.1418,
+            "219": 1.14249,
+            "220": 1.14162,
+            "221": 1.14077,
+            "222": 1.15513,
+            "223": 1.15668,
+            "224": 1.14515,
+            "225": 1.14589,
+            "226": 1.14548,
+            "227": 1.14318,
+            "228": 1.14204,
+            "229": 1.14391,
+            "230": 1.14565,
+            "231": 1.1439,
+            "232": 1.14309,
+            "233": 1.14396,
+            "234": 1.14146,
+            "235": 1.14229,
+            "236": 1.14106,
+            "237": 1.14362,
+            "238": 1.15203,
+            "239": 1.1942,
+            "240": 1.18025,
+            "241": 1.15197,
+            "242": 1.15276,
+            "243": 1.15399,
+            "244": 1.15628,
+            "245": 1.14958,
+            "246": 1.14931,
+            "247": 1.14093,
+            "248": 1.13869,
+            "249": 1.1385,
+            "250": 1.13897,
+            "251": 1.13787,
+            "252": 1.13939,
+            "253": 1.17282,
+            "254": 1.13361,
+            "255": 1.13502,
+            "256": 1.13895,
+            "257": 1.16245,
+            "258": 1.1352,
+            "259": 1.15685,
+            "260": 1.14637,
+            "261": 1.2867,
+            "262": 1.13699,
+            "263": 1.13959,
+            "264": 1.15414,
+            "265": 1.14324,
+            "266": 1.14515,
+            "267": 1.14328,
+            "268": 1.14359,
+            "269": 1.144,
+            "270": 1.15446,
+            "271": 1.15182,
+            "272": 1.15575,
+            "273": 1.15561,
+            "274": 1.15762,
+            "275": 1.15307,
+            "276": 1.1516,
+            "277": 1.1569,
+            "278": 1.15789,
+            "279": 1.168,
+            "280": 1.16711,
+            "281": 1.16858,
+            "282": 1.16899,
+            "283": 1.15631,
+            "284": 1.15543,
+            "285": 1.15685,
+            "286": 1.15663,
+            "287": 1.15204,
+            "288": 1.15333,
+            "289": 1.15257,
+            "290": 1.14865,
+            "291": 1.15067,
+            "292": 1.15626,
+            "293": 1.15161,
+            "294": 1.15116,
+            "295": 1.15102,
+            "296": 1.15104,
+            "297": 1.17304,
+            "298": 1.17562,
+            "299": 1.17694,
+            "300": 1.15026,
+            "301": 1.15562,
+            "302": 1.15582,
+            "303": 1.15039,
+            "304": 1.14517,
+            "305": 1.14745,
+            "306": 1.15392,
+            "307": 1.15054,
+            "308": 1.14391,
+            "309": 1.1426,
+            "310": 1.1434,
+            "311": 1.14297,
+            "312": 1.14164,
+            "313": 1.15234,
+            "314": 1.14891,
+            "315": 1.14745,
+            "316": 1.15325,
+            "317": 1.15145,
+            "318": 1.51061,
+            "319": 1.13797,
+            "320": 1.13871,
+            "321": 1.20976,
+            "322": 1.19788,
+            "323": 1.14258,
+            "324": 1.14169,
+            "325": 1.14227,
+            "326": 1.1426,
+            "327": 1.14596,
+            "328": 1.14584,
+            "329": 1.14606,
+            "330": 1.13676,
+            "331": 1.14712,
+            "332": 1.14502,
+            "333": 1.14602,
+            "334": 1.14598,
+            "335": 1.15781,
+            "336": 1.15666,
+            "337": 1.1498,
+            "338": 1.15651,
+            "339": 1.15267,
+            "340": 1.14703,
+            "341": 1.14889,
+            "342": 1.14863,
+            "343": 1.14731,
+            "344": 1.1479,
+            "345": 1.20819,
+            "346": 1.15653,
+            "347": 1.15548,
+            "348": 1.15594,
+            "349": 1.15558,
+            "350": 1.15652,
+            "351": 1.15348,
+            "352": 1.15517,
+            "353": 1.15665,
+            "354": 1.15895,
+            "355": 1.15829,
+            "356": 1.16229,
+            "357": 1.17016,
+            "358": 1.16317,
+            "359": 1.18492,
+            "360": 1.20126,
+            "361": 1.19034,
+            "362": 1.18723,
+            "363": 1.16724,
+            "364": 1.14627,
+            "365": 1.14394,
+            "366": 1.14503,
+            "367": 1.14264,
+            "368": 1.14464,
+            "369": 1.14478,
+            "370": 1.14447,
+            "371": 1.15012,
+            "372": 1.14509,
+            "373": 1.14362,
+            "374": 1.14617,
+            "375": 1.14658,
+            "376": 1.13748,
+            "377": 1.15141,
+            "378": 1.14564,
+            "379": 1.14278,
+            "380": 1.14166,
+            "381": 1.14361,
+            "382": 1.14293,
+            "383": 1.14196,
+            "384": 1.14178,
+            "385": 1.14053,
+            "386": 1.14184,
+            "387": 1.14451,
+            "388": 1.14162,
+            "389": 1.1419,
+            "390": 1.14477,
+            "391": 1.15539,
+            "392": 1.16117,
+            "393": 1.16925,
+            "394": 1.16815,
+            "395": 1.1561,
+            "396": 1.15146,
+            "397": 1.15422,
+            "398": 1.14884,
+            "399": 1.14136,
+            "400": 1.14059,
+            "401": 1.14105,
+            "402": 1.14013,
+            "403": 1.15094,
+            "404": 1.13492,
+            "405": 1.1425,
+            "406": 1.14173,
+            "407": 1.14385,
+            "408": 1.14421,
+            "409": 1.14226,
+            "410": 1.1417,
+            "411": 1.1511,
+            "412": 1.15763,
+            "413": 1.15891,
+            "414": 1.15294,
+            "415": 1.15191,
+            "416": 1.15346,
+            "417": 1.15001,
+            "418": 1.15279,
+            "419": 1.14974,
+            "420": 1.14848,
+            "421": 1.14722,
+            "422": 1.15396,
+            "423": 1.1499,
+            "424": 1.15269,
+            "425": 1.15087,
+            "426": 1.14945,
+            "427": 1.15106,
+            "428": 1.15515,
+            "429": 1.14379,
+            "430": 1.16231,
+            "431": 1.18658,
+            "432": 1.17212,
+            "433": 1.16725,
+            "434": 1.17832,
+            "435": 1.16254,
+            "436": 1.16094,
+            "437": 1.15865,
+            "438": 1.16104,
+            "439": 1.1621,
+            "440": 1.13911,
+            "441": 1.13485,
+            "442": 1.13534,
+            "443": 1.13627,
+            "444": 1.13432,
+            "445": 1.13868,
+            "446": 1.13561,
+            "447": 1.13518,
+            "448": 1.1365,
+            "449": 1.13444,
+            "450": 1.13455,
+            "451": 1.14098,
+            "452": 1.15368,
+            "453": 1.1566,
+            "454": 1.15931,
+            "455": 1.18151,
+            "456": 1.16215,
+            "457": 1.16012,
+            "458": 1.15916,
+            "459": 1.15837,
+            "460": 1.16214,
+            "461": 1.1652,
+            "462": 1.16044,
+            "463": 1.16179,
+            "464": 1.163,
+            "465": 1.16332,
+            "466": 1.15968,
+            "467": 1.16196,
+            "468": 1.1592,
+            "469": 1.15988,
+            "470": 1.16081,
+            "471": 1.16128,
+            "472": 1.15868,
+            "473": 1.16004,
+            "474": 1.16125,
+            "475": 1.15956,
+            "476": 1.16733,
+            "477": 1.18857,
+            "478": 1.15838,
+            "479": 1.16068,
+            "480": 1.16004,
+            "481": 1.15956,
+            "482": 1.15757,
+            "483": 1.15802,
+            "484": 1.16061,
+            "485": 1.15848,
+            "486": 1.16058,
+            "487": 1.15819,
+            "488": 1.15991,
+            "489": 1.15831,
+            "490": 1.1589,
+            "491": 1.16144,
+            "492": 1.15934,
+            "493": 1.15973,
+            "494": 1.16104,
+            "495": 1.15933,
+            "496": 1.16173,
+            "497": 1.16203,
+            "498": 1.16059,
+            "499": 1.16461,
+            "500": 1.16533,
+            "501": 1.1723,
+            "502": 1.17075,
+            "503": 1.17256,
+            "504": 1.16176,
+            "505": 1.15972,
+            "506": 1.16185,
+            "507": 1.21311,
+            "508": 1.16326,
+            "509": 1.15384,
+            "510": 1.15071,
+            "511": 1.15307,
+            "512": 1.15748,
+            "513": 1.1518,
+            "514": 1.15181,
+            "515": 1.15338,
+            "516": 1.1524,
+            "517": 1.15481,
+            "518": 1.15358,
+            "519": 1.16302,
+            "520": 1.16218,
+            "521": 1.15461,
+            "522": 1.157,
+            "523": 1.15817,
+            "524": 1.15517,
+            "525": 1.15361,
+            "526": 1.15183,
+            "527": 1.15237,
+            "528": 1.15423,
+            "529": 1.15637,
+            "530": 1.15521,
+            "531": 1.15012,
+            "532": 1.15132,
+            "533": 1.1495,
+            "534": 1.14919,
+            "535": 1.1546,
+            "536": 1.15442,
+            "537": 1.1514,
+            "538": 1.15195,
+            "539": 1.15221,
+            "540": 1.15639,
+            "541": 1.1549,
+            "542": 1.15495,
+            "543": 1.15683,
+            "544": 1.16361,
+            "545": 1.16186,
+            "546": 1.15697,
+            "547": 1.15978,
+            "548": 1.16151,
+            "549": 1.15737,
+            "550": 1.15451,
+            "551": 1.16057,
+            "552": 1.20604,
+            "553": 1.15937,
+            "554": 1.21638,
+            "555": 1.16193,
+            "556": 1.16004,
+            "557": 1.15937,
+            "558": 1.15924,
+            "559": 1.15864,
+            "560": 1.16064,
+            "561": 1.15935,
+            "562": 1.43389,
+            "563": 1.16041,
+            "564": 1.16122,
+            "565": 1.49173,
+            "566": 1.15954,
+            "567": 1.17345,
+            "568": 1.16261,
+            "569": 1.15966,
+            "570": 1.1607,
+            "571": 1.15553,
+            "572": 1.1568,
+            "573": 1.15385,
+            "574": 1.15701,
+            "575": 1.15849,
+            "576": 1.15634,
+            "577": 1.15908,
+            "578": 1.15576,
+            "579": 1.15627,
+            "580": 1.14973,
+            "581": 1.16027,
+            "582": 1.16176,
+            "583": 1.15493,
+            "584": 1.15722,
+            "585": 1.15744,
+            "586": 1.15502,
+            "587": 1.1559,
+            "588": 1.15496,
+            "589": 1.16378,
+            "590": 1.16595,
+            "591": 1.16611,
+            "592": 1.16989,
+            "593": 1.16842,
+            "594": 1.17261,
+            "595": 1.15925,
+            "596": 1.16083,
+            "597": 1.16113,
+            "598": 1.16297,
+            "599": 1.16456,
+            "600": 1.15983,
+            "601": 1.16187,
+            "602": 1.15943,
+            "603": 1.15985,
+            "604": 1.1592,
+            "605": 1.15871,
+            "606": 1.16032,
+            "607": 1.15919,
+            "608": 1.17988,
+            "609": 1.16067,
+            "610": 1.18157,
+            "611": 1.15299,
+            "612": 1.15282,
+            "613": 1.15274,
+            "614": 1.15344,
+            "615": 1.15192,
+            "616": 1.15757,
+            "617": 1.15404,
+            "618": 1.16198,
+            "619": 1.12381,
+            "620": 1.11492,
+            "621": 1.14943,
+            "622": 1.16512,
+            "623": 1.16958,
+            "624": 1.16409,
+            "625": 1.15844,
+            "626": 1.14917,
+            "627": 1.15285,
+            "628": 1.15477,
+            "629": 1.15363,
+            "630": 1.15213,
+            "631": 1.14647,
+            "632": 1.14867,
+            "633": 1.15423,
+            "634": 1.15566,
+            "635": 1.15345,
+            "636": 1.15319,
+            "637": 1.1511,
+            "638": 1.15409,
+            "639": 1.15188,
+            "640": 1.15258,
+            "641": 1.15414,
+            "642": 1.15983,
+            "643": 1.15819,
+            "644": 1.15887,
+            "645": 1.15631,
+            "646": 1.15765,
+            "647": 1.16277,
+            "648": 1.16768,
+            "649": 1.17095,
+            "650": 1.16972,
+            "651": 1.16894,
+            "652": 1.16584,
+            "653": 1.1612,
+            "654": 1.17303,
+            "655": 1.16406,
+            "656": 1.1617,
+            "657": 1.16573,
+            "658": 1.16082,
+            "659": 1.16677,
+            "660": 1.16969,
+            "661": 1.16374,
+            "662": 1.16155,
+            "663": 1.16674,
+            "664": 1.16865,
+            "665": 1.16719,
+            "666": 1.16772,
+            "667": 1.16872,
+            "668": 1.16616,
+            "669": 1.16505,
+            "670": 1.16449,
+            "671": 1.16777,
+            "672": 1.16457,
+            "673": 1.16059,
+            "674": 1.16013,
+            "675": 1.1589,
+            "676": 1.1645,
+            "677": 1.16737,
+            "678": 1.16262,
+            "679": 1.44417,
+            "680": 1.16641,
+            "681": 1.16441,
+            "682": 1.16834,
+            "683": 1.17163,
+            "684": 1.16041,
+            "685": 1.16815,
+            "686": 1.16615,
+            "687": 1.1689,
+            "688": 1.16377,
+            "689": 1.16277,
+            "690": 1.15926,
+            "691": 1.15823,
+            "692": 1.15747,
+            "693": 1.15897,
+            "694": 1.15722,
+            "695": 1.15679,
+            "696": 1.15619,
+            "697": 1.15686,
+            "698": 1.15548,
+            "699": 1.15619,
+            "700": 1.15662,
+            "701": 1.15701,
+            "702": 1.15611,
+            "703": 1.1578,
+            "704": 1.15921,
+            "705": 1.15626,
+            "706": 1.15696,
+            "707": 1.15676,
+            "708": 1.15718,
+            "709": 1.15643,
+            "710": 1.16154,
+            "711": 1.15995,
+            "712": 1.159,
+            "713": 1.16786,
+            "714": 1.15799,
+            "715": 1.15749,
+            "716": 1.52131,
+            "717": 1.15676,
+            "718": 1.16066,
+            "719": 1.15878,
+            "720": 1.16243,
+            "721": 1.15801,
+            "722": 1.16032,
+            "723": 1.15929,
+            "724": 1.16338,
+            "725": 1.15949,
+            "726": 1.16444,
+            "727": 1.31697,
+            "728": 1.15571,
+            "729": 1.15513,
+            "730": 1.15845,
+            "731": 1.16172,
+            "732": 1.15814,
+            "733": 1.1597,
+            "734": 1.15388,
+            "735": 1.15282,
+            "736": 1.15589,
+            "737": 1.15547,
+            "738": 1.1547,
+            "739": 1.15614,
+            "740": 1.15546,
+            "741": 1.15558,
+            "742": 1.15607,
+            "743": 1.15425,
+            "744": 1.15442,
+            "745": 1.16502,
+            "746": 1.15566,
+            "747": 1.15865,
+            "748": 1.15828,
+            "749": 1.16418,
+            "750": 1.15709,
+            "751": 1.15988,
+            "752": 1.15915,
+            "753": 1.15069,
+            "754": 1.15176,
+            "755": 1.15161,
+            "756": 1.1502,
+            "757": 1.14643,
+            "758": 1.7155,
+            "759": 1.15471,
+            "760": 1.15638,
+            "761": 1.15684,
+            "762": 1.16005,
+            "763": 1.1585,
+            "764": 1.16197,
+            "765": 1.22988,
+            "766": 1.16563,
+            "767": 1.16594,
+            "768": 1.16751,
+            "769": 1.16167,
+            "770": 1.16736,
+            "771": 1.16232,
+            "772": 1.16021,
+            "773": 1.16138,
+            "774": 1.16446,
+            "775": 1.15216,
+            "776": 1.15086,
+            "777": 1.15506,
+            "778": 1.15465,
+            "779": 1.15872,
+            "780": 1.15533,
+            "781": 1.15836,
+            "782": 1.15778,
+            "783": 1.21735,
+            "784": 1.15535,
+            "785": 1.14905,
+            "786": 1.14868,
+            "787": 1.14899,
+            "788": 1.1521,
+            "789": 1.1498,
+            "790": 1.15389,
+            "791": 1.15198,
+            "792": 1.14834,
+            "793": 1.14935,
+            "794": 1.14986,
+            "795": 1.15066,
+            "796": 1.15229,
+            "797": 1.15036,
+            "798": 1.15026,
+            "799": 1.15231,
+            "800": 1.15717,
+            "801": 1.15355,
+            "802": 1.15502,
+            "803": 1.15201,
+            "804": 1.15023,
+            "805": 1.15209,
+            "806": 1.15072,
+            "807": 1.48449,
+            "808": 1.15218,
+            "809": 1.1522,
+            "810": 1.15111,
+            "811": 1.15134,
+            "812": 1.15187,
+            "813": 1.15379,
+            "814": 1.15585,
+            "815": 1.16392,
+            "816": 1.15452,
+            "817": 1.15487,
+            "818": 1.15245,
+            "819": 1.14836,
+            "820": 1.14547,
+            "821": 1.74382,
+            "822": 1.14655,
+            "823": 1.13629,
+            "824": 1.15244,
+            "825": 1.14064,
+            "826": 1.14002,
+            "827": 1.14234,
+            "828": 1.1401,
+            "829": 1.13945,
+            "830": 1.14243,
+            "831": 1.14339,
+            "832": 1.13963,
+            "833": 1.14165,
+            "834": 1.13931,
+            "835": 1.13828,
+            "836": 1.13924,
+            "837": 1.13918,
+            "838": 1.14038,
+            "839": 1.14023,
+            "840": 1.13827,
+            "841": 1.14334,
+            "842": 1.26736,
+            "843": 1.15235,
+            "844": 1.16327,
+            "845": 1.15615,
+            "846": 1.15656,
+            "847": 1.14563,
+            "848": 1.14836,
+            "849": 1.14901,
+            "850": 1.14852,
+            "851": 1.15019,
+            "852": 1.14893,
+            "853": 1.14907,
+            "854": 1.14895,
+            "855": 1.14997,
+            "856": 1.14951,
+            "857": 1.15014,
+            "858": 1.14881,
+            "859": 1.15072,
+            "860": 1.16126,
+            "861": 1.15807,
+            "862": 1.15716,
+            "863": 1.15555,
+            "864": 1.15038,
+            "865": 1.15177,
+            "866": 1.15177,
+            "867": 1.14884,
+            "868": 1.14782,
+            "869": 1.15086,
+            "870": 1.14982,
+            "871": 1.14833,
+            "872": 1.14875,
+            "873": 1.15147,
+            "874": 1.15225,
+            "875": 1.29099,
+            "876": 2.39847,
+            "877": 2.16612,
+            "878": 1.53276,
+            "879": 1.14604,
+            "880": 1.1515,
+            "881": 1.16208,
+            "882": 1.15925,
+            "883": 1.14916,
+            "884": 1.14927,
+            "885": 1.1758,
+            "886": 1.17545,
+            "887": 1.17369,
+            "888": 1.17655,
+            "889": 1.16376,
+            "890": 1.14874,
+            "891": 1.148,
+            "892": 1.14787,
+            "893": 1.15123,
+            "894": 1.15168,
+            "895": 1.15419,
+            "896": 1.15535,
+            "897": 1.15242,
+            "898": 1.15508,
+            "899": 1.15225,
+            "900": 1.15072,
+            "901": 1.1534,
+            "902": 1.15136,
+            "903": 1.15481,
+            "904": 1.15989,
+            "905": 1.16184,
+            "906": 1.14716,
+            "907": 1.15192,
+            "908": 1.15696,
+            "909": 1.15328,
+            "910": 1.14059,
+            "911": 1.1604,
+            "912": 1.14941,
+            "913": 1.14972,
+            "914": 1.14954,
+            "915": 1.15073,
+            "916": 1.14475,
+            "917": 1.15414,
+            "918": 1.1385,
+            "919": 1.14185,
+            "920": 1.14089,
+            "921": 1.13784,
+            "922": 1.13875,
+            "923": 1.13882,
+            "924": 1.14141,
+            "925": 1.13908,
+            "926": 1.13874,
+            "927": 1.13823,
+            "928": 1.13737,
+            "929": 1.13836,
+            "930": 1.13809,
+            "931": 1.14893,
+            "932": 1.13972,
+            "933": 1.1369,
+            "934": 1.1362,
+            "935": 1.13765,
+            "936": 1.14369,
+            "937": 1.1504,
+            "938": 1.14208,
+            "939": 1.14841,
+            "940": 1.14975,
+            "941": 1.14225,
+            "942": 1.14185,
+            "943": 1.13864,
+            "944": 1.13915,
+            "945": 1.14062,
+            "946": 1.15111,
+            "947": 1.14071,
+            "948": 1.13898,
+            "949": 1.1399,
+            "950": 1.15937,
+            "951": 1.16785,
+            "952": 1.16807,
+            "953": 1.1506,
+            "954": 1.15006,
+            "955": 1.15045,
+            "956": 1.17067,
+            "957": 1.14856,
+            "958": 1.14992,
+            "959": 1.15251,
+            "960": 1.15045,
+            "961": 1.15121,
+            "962": 1.14957,
+            "963": 1.15095,
+            "964": 1.15,
+            "965": 1.15089,
+            "966": 1.15156,
+            "967": 1.15423,
+            "968": 1.16332,
+            "969": 1.15359,
+            "970": 1.15613,
+            "971": 1.15232,
+            "972": 1.15652,
+            "973": 1.15399,
+            "974": 1.15065,
+            "975": 1.1485,
+            "976": 1.15243,
+            "977": 1.15368,
+            "978": 1.14828,
+            "979": 1.14969,
+            "980": 1.15374,
+            "981": 1.1505,
+            "982": 1.15031,
+            "983": 1.15033,
+            "984": 1.14921,
+            "985": 1.15504,
+            "986": 1.15572,
+            "987": 1.153,
+            "988": 1.15573,
+            "989": 1.14747,
+            "990": 1.14636,
+            "991": 1.14517,
+            "992": 1.1463,
+            "993": 1.14805,
+            "994": 1.14644,
+            "995": 1.14583,
+            "996": 1.14485,
+            "997": 1.14418,
+            "998": 1.14622,
+            "999": 1.14662,
+            "1000": 1.14312,
+            "1001": 1.15227,
+            "1002": 1.14681,
+            "1003": 1.14794,
+            "1004": 1.14889,
+            "1005": 1.15067,
+            "1006": 1.14757,
+            "1007": 1.14767,
+            "1008": 1.15061,
+            "1009": 1.15075,
+            "1010": 1.14894,
+            "1011": 1.14975,
+            "1012": 1.14667,
+            "1013": 1.14688,
+            "1014": 1.14788,
+            "1015": 1.167,
+            "1016": 1.44606,
+            "1017": 1.14923,
+            "1018": 1.15268,
+            "1019": 1.14981,
+            "1020": 1.15011,
+            "1021": 1.47391,
+            "1022": 1.15277,
+            "1023": 1.14774,
+            "1024": 1.146,
+            "1025": 1.15253,
+            "1026": 1.14633,
+            "1027": 1.14525,
+            "1028": 1.14728,
+            "1029": 1.14654,
+            "1030": 1.14663,
+            "1031": 1.14708,
+            "1032": 1.14715,
+            "1033": 1.1454,
+            "1034": 1.14763,
+            "1035": 1.14591,
+            "1036": 1.14493,
+            "1037": 1.14584,
+            "1038": 1.14665,
+            "1039": 1.14812,
+            "1040": 1.14495,
+            "1041": 1.15044,
+            "1042": 1.14701,
+            "1043": 1.14657,
+            "1044": 1.14631,
+            "1045": 1.14822,
+            "1046": 1.14789,
+            "1047": 1.14525,
+            "1048": 1.14815,
+            "1049": 1.14939,
+            "1050": 1.14592,
+            "1051": 1.14667,
+            "1052": 1.15232,
+            "1053": 1.14863,
+            "1054": 1.14908,
+            "1055": 1.14931,
+            "1056": 1.14644,
+            "1057": 1.149,
+            "1058": 1.14751,
+            "1059": 1.14668,
+            "1060": 1.14758,
+            "1061": 1.14789,
+            "1062": 1.43562,
+            "1063": 1.14875,
+            "1064": 1.14846,
+            "1065": 1.14888,
+            "1066": 1.15486,
+            "1067": 1.15212,
+            "1068": 1.14934,
+            "1069": 1.14526,
+            "1070": 1.14506,
+            "1071": 1.14599,
+            "1072": 1.14774,
+            "1073": 1.14651,
+            "1074": 1.14609,
+            "1075": 1.14817,
+            "1076": 1.14662,
+            "1077": 1.15159,
+            "1078": 1.14735,
+            "1079": 1.14525,
+            "1080": 1.1516,
+            "1081": 1.14601,
+            "1082": 1.13989,
+            "1083": 1.13569,
+            "1084": 1.1371,
+            "1085": 1.1366,
+            "1086": 1.13713,
+            "1087": 1.13756,
+            "1088": 1.13768,
+            "1089": 1.13917,
+            "1090": 1.13759,
+            "1091": 1.13884,
+            "1092": 1.13707,
+            "1093": 1.13679,
+            "1094": 1.13513,
+            "1095": 1.1351,
+            "1096": 1.13494,
+            "1097": 1.13589,
+            "1098": 1.14132,
+            "1099": 1.13697,
+            "1100": 1.14195,
+            "1101": 1.14189,
+            "1102": 1.13736,
+            "1103": 1.13781,
+            "1104": 1.14284,
+            "1105": 1.13518,
+            "1106": 1.13585,
+            "1107": 1.13621,
+            "1108": 1.13665,
+            "1109": 1.13792,
+            "1110": 1.13764,
+            "1111": 1.13778,
+            "1112": 1.13619,
+            "1113": 1.13651,
+            "1114": 1.13628,
+            "1115": 1.13802,
+            "1116": 1.13792,
+            "1117": 1.13642,
+            "1118": 1.13784,
+            "1119": 1.14898,
+            "1120": 1.15049,
+            "1121": 1.15028,
+            "1122": 1.14509,
+            "1123": 1.1445,
+            "1124": 1.14756,
+            "1125": 1.15117,
+            "1126": 1.14917,
+            "1127": 1.1475,
+            "1128": 1.1481,
+            "1129": 1.14683,
+            "1130": 1.14088,
+            "1131": 1.13493,
+            "1132": 1.13613,
+            "1133": 1.13537,
+            "1134": 1.13473,
+            "1135": 1.13657,
+            "1136": 1.13516,
+            "1137": 1.13606,
+            "1138": 1.13473,
+            "1139": 1.13442,
+            "1140": 1.13398,
+            "1141": 1.13591,
+            "1142": 1.13975,
+            "1143": 1.13478,
+            "1144": 1.13376,
+            "1145": 1.13428,
+            "1146": 1.1348,
+            "1147": 1.13462,
+            "1148": 1.1351,
+            "1149": 1.13494,
+            "1150": 1.13506,
+            "1151": 1.13487,
+            "1152": 1.14039,
+            "1153": 1.13991,
+            "1154": 1.13825,
+            "1155": 1.1373,
+            "1156": 1.13451,
+            "1157": 1.13683,
+            "1158": 1.13335,
+            "1159": 1.13548,
+            "1160": 1.1339,
+            "1161": 1.13613,
+            "1162": 1.13429,
+            "1163": 1.13448,
+            "1164": 1.13542,
+            "1165": 1.13453,
+            "1166": 1.13398,
+            "1167": 1.13549,
+            "1168": 1.1342,
+            "1169": 1.13502,
+            "1170": 1.13535,
+            "1171": 1.13581,
+            "1172": 1.13532,
+            "1173": 1.13552,
+            "1174": 1.13371,
+            "1175": 1.13456,
+            "1176": 1.13401,
+            "1177": 1.1335,
+            "1178": 1.13628,
+            "1179": 1.13907,
+            "1180": 1.13757,
+            "1181": 1.1538,
+            "1182": 1.15712,
+            "1183": 1.16123,
+            "1184": 1.15318,
+            "1185": 1.14801,
+            "1186": 1.14711,
+            "1187": 1.1471,
+            "1188": 1.15109,
+            "1189": 1.14707,
+            "1190": 1.14787,
+            "1191": 1.1451,
+            "1192": 1.14677,
+            "1193": 1.14621,
+            "1194": 1.14554,
+            "1195": 1.14738,
+            "1196": 1.14756,
+            "1197": 1.14799,
+            "1198": 1.1487,
+            "1199": 1.14616,
+            "1200": 1.14688,
+            "1201": 1.14531,
+            "1202": 1.14639,
+            "1203": 1.14696,
+            "1204": 1.1469,
+            "1205": 1.1472,
+            "1206": 1.14687,
+            "1207": 1.1494,
+            "1208": 1.14873,
+            "1209": 1.15175,
+            "1210": 1.14868,
+            "1211": 1.14793,
+            "1212": 1.14766,
+            "1213": 1.14823,
+            "1214": 1.15557,
+            "1215": 1.15986,
+            "1216": 1.14175,
+            "1217": 1.1392,
+            "1218": 1.13591,
+            "1219": 1.13796,
+            "1220": 1.14086,
+            "1221": 1.14081,
+            "1222": 1.13816,
+            "1223": 1.13977,
+            "1224": 1.14436,
+            "1225": 1.13986,
+            "1226": 1.13821,
+            "1227": 1.13854,
+            "1228": 1.13738,
+            "1229": 1.1384,
+            "1230": 1.13897,
+            "1231": 1.13732,
+            "1232": 1.13852,
+            "1233": 1.14144,
+            "1234": 1.13711,
+            "1235": 1.14105,
+            "1236": 1.13578,
+            "1237": 1.13838,
+            "1238": 1.13809,
+            "1239": 1.13782,
+            "1240": 1.13859,
+            "1241": 1.1381,
+            "1242": 1.13717,
+            "1243": 1.14814,
+            "1244": 1.16451,
+            "1245": 1.17765,
+            "1246": 1.17167,
+            "1247": 1.15708,
+            "1248": 1.15406,
+            "1249": 1.17391,
+            "1250": 1.14803,
+            "1251": 1.14601,
+            "1252": 1.14796,
+            "1253": 1.14706,
+            "1254": 1.14679,
+            "1255": 1.14306,
+            "1256": 1.14387,
+            "1257": 1.14608,
+            "1258": 1.14617,
+            "1259": 1.14999,
+            "1260": 1.1468,
+            "1261": 1.14332,
+            "1262": 1.15005,
+            "1263": 1.1449,
+            "1264": 1.14544,
+            "1265": 1.14292,
+            "1266": 1.14481,
+            "1267": 1.154,
+            "1268": 1.15455,
+            "1269": 1.15329,
+            "1270": 1.15008,
+            "1271": 1.15345,
+            "1272": 1.14616,
+            "1273": 1.15423,
+            "1274": 1.15349,
+            "1275": 1.14785,
+            "1276": 1.14536,
+            "1277": 1.14467,
+            "1278": 1.1456,
+            "1279": 1.14593,
+            "1280": 1.1462,
+            "1281": 1.14599,
+            "1282": 1.14837,
+            "1283": 1.14585,
+            "1284": 1.14656,
+            "1285": 1.14618,
+            "1286": 1.14615,
+            "1287": 1.14657,
+            "1288": 1.44686,
+            "1289": 1.14572,
+            "1290": 1.14398,
+            "1291": 1.1431,
+            "1292": 1.14524,
+            "1293": 1.14421,
+            "1294": 1.14593,
+            "1295": 1.16051,
+            "1296": 1.16214,
+            "1297": 1.15606,
+            "1298": 1.14439,
+            "1299": 1.14445,
+            "1300": 1.1445,
+            "1301": 1.1455,
+            "1302": 1.14117,
+            "1303": 1.14365,
+            "1304": 1.14474,
+            "1305": 1.14456,
+            "1306": 1.14522,
+            "1307": 1.144,
+            "1308": 1.14453,
+            "1309": 1.14471,
+            "1310": 1.1456,
+            "1311": 1.15495,
+            "1312": 1.15256,
+            "1313": 1.14805,
+            "1314": 1.14996,
+            "1315": 1.14425,
+            "1316": 1.14401,
+            "1317": 1.14262,
+            "1318": 1.14556,
+            "1319": 1.14661,
+            "1320": 1.14567,
+            "1321": 1.14648,
+            "1322": 1.14709,
+            "1323": 1.14522,
+            "1324": 1.14764,
+            "1325": 1.14331,
+            "1326": 1.14538,
+            "1327": 1.1453,
+            "1328": 1.14734,
+            "1329": 1.18619,
+            "1330": 1.48212,
+            "1331": 1.14651,
+            "1332": 1.15204,
+            "1333": 1.14629,
+            "1334": 1.14624,
+            "1335": 1.14927,
+            "1336": 1.14601,
+            "1337": 1.15642,
+            "1338": 1.14811,
+            "1339": 1.14508,
+            "1340": 1.15069,
+            "1341": 1.14629,
+            "1342": 1.14635,
+            "1343": 1.14657,
+            "1344": 1.14655,
+            "1345": 1.14564,
+            "1346": 1.14633,
+            "1347": 1.14523,
+            "1348": 1.14691,
+            "1349": 1.14575,
+            "1350": 1.14592,
+            "1351": 1.14631,
+            "1352": 1.14436,
+            "1353": 1.14573,
+            "1354": 1.14471,
+            "1355": 1.14554,
+            "1356": 1.14492,
+            "1357": 1.14301,
+            "1358": 1.141,
+            "1359": 1.14219,
+            "1360": 1.14228,
+            "1361": 1.14109,
+            "1362": 1.1413,
+            "1363": 1.14096,
+            "1364": 1.15355,
+            "1365": 1.14229,
+            "1366": 1.14615,
+            "1367": 1.14174,
+            "1368": 1.13953,
+            "1369": 1.14014,
+            "1370": 1.14132,
+            "1371": 1.14139,
+            "1372": 1.13849,
+            "1373": 1.14304,
+            "1374": 1.14028,
+            "1375": 1.13912,
+            "1376": 1.14082,
+            "1377": 1.1416,
+            "1378": 1.13936,
+            "1379": 1.13866,
+            "1380": 1.13826,
+            "1381": 1.14443,
+            "1382": 1.14029,
+            "1383": 1.13913,
+            "1384": 1.14177,
+            "1385": 1.14492,
+            "1386": 1.1415,
+            "1387": 1.1398,
+            "1388": 1.14017,
+            "1389": 1.14077,
+            "1390": 1.14782,
+            "1391": 1.15011,
+            "1392": 1.15174,
+            "1393": 1.14605,
+            "1394": 1.14761,
+            "1395": 1.14735,
+            "1396": 1.14827,
+            "1397": 1.14566,
+            "1398": 1.14659,
+            "1399": 1.14187,
+            "1400": 1.14737,
+            "1401": 1.14674,
+            "1402": 1.14468,
+            "1403": 1.14534,
+            "1404": 1.14726,
+            "1405": 1.14773,
+            "1406": 1.14711,
+            "1407": 1.14543,
+            "1408": 1.14568,
+            "1409": 1.14559,
+            "1410": 1.14443,
+            "1411": 1.14591,
+            "1412": 1.14444,
+            "1413": 1.14904,
+            "1414": 1.14806,
+            "1415": 1.14757,
+            "1416": 1.14307,
+            "1417": 1.14119,
+            "1418": 1.14392,
+            "1419": 1.14104,
+            "1420": 1.14278,
+            "1421": 1.13949,
+            "1422": 1.14028,
+            "1423": 1.14112,
+            "1424": 1.14151,
+            "1425": 1.14321,
+            "1426": 1.14894,
+            "1427": 1.14281,
+            "1428": 1.14881,
+            "1429": 1.14225,
+            "1430": 1.13905,
+            "1431": 1.14148,
+            "1432": 1.14895,
+            "1433": 1.15186,
+            "1434": 1.14773,
+            "1435": 1.14968,
+            "1436": 1.14689,
+            "1437": 1.1487,
+            "1438": 1.14731,
+            "1439": 1.14746,
+            "1440": 1.14835,
+            "1441": 1.15151,
+            "1442": 1.15182,
+            "1443": 1.15073,
+            "1444": 1.14751,
+            "1445": 1.15081,
+            "1446": 1.15106,
+            "1447": 1.14876,
+            "1448": 1.15178,
+            "1449": 1.15117,
+            "1450": 1.1479,
+            "1451": 1.14851,
+            "1452": 1.14502,
+            "1453": 1.1454,
+            "1454": 1.14722,
+            "1455": 1.14628,
+            "1456": 1.14413,
+            "1457": 1.14761,
+            "1458": 1.14681,
+            "1459": 1.14632,
+            "1460": 1.14804,
+            "1461": 1.14676,
+            "1462": 1.14566,
+            "1463": 1.14599,
+            "1464": 1.14679,
+            "1465": 1.14572,
+            "1466": 1.14995,
+            "1467": 1.14848,
+            "1468": 1.14679,
+            "1469": 1.15027,
+            "1470": 1.14636,
+            "1471": 1.14406,
+            "1472": 1.14039,
+            "1473": 1.13768,
+            "1474": 1.13897,
+            "1475": 1.14331,
+            "1476": 1.1403,
+            "1477": 1.14139,
+            "1478": 1.14985,
+            "1479": 1.14611,
+            "1480": 1.47655,
+            "1481": 1.45511,
+            "1482": 1.14381,
+            "1483": 1.13941,
+            "1484": 1.13782,
+            "1485": 1.13771,
+            "1486": 1.13796,
+            "1487": 1.13795,
+            "1488": 1.13829,
+            "1489": 1.13758,
+            "1490": 1.13822,
+            "1491": 1.13667,
+            "1492": 1.13847,
+            "1493": 1.13787,
+            "1494": 1.14072,
+            "1495": 1.14614,
+            "1496": 1.14436,
+            "1497": 1.14422,
+            "1498": 1.1393,
+            "1499": 1.13987,
+            "1500": 1.13991,
+            "1501": 1.14215,
+            "1502": 1.13842,
+            "1503": 1.13883,
+            "1504": 1.1496,
+            "1505": 1.14028,
+            "1506": 1.13931,
+            "1507": 1.13949,
+            "1508": 1.14063,
+            "1509": 1.13913,
+            "1510": 1.1402,
+            "1511": 1.13931,
+            "1512": 1.13839,
+            "1513": 1.13771,
+            "1514": 1.13848,
+            "1515": 1.13796,
+            "1516": 1.13782,
+            "1517": 1.13889,
+            "1518": 1.13716,
+            "1519": 1.13908,
+            "1520": 1.13972,
+            "1521": 1.13966,
+            "1522": 1.13875,
+            "1523": 1.15781,
+            "1524": 1.15885,
+            "1525": 1.15802,
+            "1526": 1.14191,
+            "1527": 1.14054,
+            "1528": 1.1385,
+            "1529": 1.13922,
+            "1530": 1.12994,
+            "1531": 1.12552,
+            "1532": 1.27166,
+            "1533": 1.12707,
+            "1534": 1.12638,
+            "1535": 1.12608,
+            "1536": 1.12654,
+            "1537": 1.12511,
+            "1538": 1.16008,
+            "1539": 1.13169,
+            "1540": 1.13294,
+            "1541": 1.13386,
+            "1542": 1.13461,
+            "1543": 1.13337,
+            "1544": 1.1331,
+            "1545": 1.13294,
+            "1546": 1.13283,
+            "1547": 1.13316,
+            "1548": 1.13651,
+            "1549": 1.13626,
+            "1550": 1.13638,
+            "1551": 1.13187,
+            "1552": 1.20522,
+            "1553": 1.15894,
+            "1554": 1.14738,
+            "1555": 1.14563,
+            "1556": 1.14409,
+            "1557": 1.15018,
+            "1558": 1.14323,
+            "1559": 1.14591,
+            "1560": 1.14645,
+            "1561": 1.14673,
+            "1562": 1.14543,
+            "1563": 1.14518,
+            "1564": 1.14589,
+            "1565": 1.14486,
+            "1566": 1.14436,
+            "1567": 1.14357,
+            "1568": 1.1454,
+            "1569": 1.14493,
+            "1570": 1.14347,
+            "1571": 1.14477,
+            "1572": 1.14203,
+            "1573": 1.14441,
+            "1574": 1.14468,
+            "1575": 1.14607,
+            "1576": 1.14532,
+            "1577": 1.14389,
+            "1578": 1.1433,
+            "1579": 1.14321,
+            "1580": 1.14391,
+            "1581": 1.1421,
+            "1582": 1.14368,
+            "1583": 1.1444,
+            "1584": 1.14356,
+            "1585": 1.14875,
+            "1586": 1.14497,
+            "1587": 1.14521,
+            "1588": 1.14708,
+            "1589": 1.14631,
+            "1590": 1.14662,
+            "1591": 1.14949,
+            "1592": 1.15354,
+            "1593": 1.14014,
+            "1594": 1.1408,
+            "1595": 1.14166,
+            "1596": 1.14151,
+            "1597": 1.14228,
+            "1598": 1.14126,
+            "1599": 1.14028,
+            "1600": 1.14528,
+            "1601": 1.14125,
+            "1602": 1.14085,
+            "1603": 1.13862,
+            "1604": 1.13487,
+            "1605": 1.13314,
+            "1606": 1.13467,
+            "1607": 1.13153,
+            "1608": 1.12971,
+            "1609": 1.13044,
+            "1610": 1.14013,
+            "1611": 1.13008,
+            "1612": 1.13161,
+            "1613": 1.13128,
+            "1614": 1.13059,
+            "1615": 1.13169,
+            "1616": 1.13043,
+            "1617": 1.13141,
+            "1618": 1.12976,
+            "1619": 1.13071,
+            "1620": 1.12907,
+            "1621": 1.13138,
+            "1622": 1.12994,
+            "1623": 1.12985,
+            "1624": 1.12999,
+            "1625": 1.13035,
+            "1626": 1.13761,
+            "1627": 1.13703,
+            "1628": 1.15487,
+            "1629": 1.13257,
+            "1630": 1.13549,
+            "1631": 1.13358,
+            "1632": 1.13488,
+            "1633": 1.13601,
+            "1634": 1.13282,
+            "1635": 1.13439,
+            "1636": 1.13078,
+            "1637": 1.13147,
+            "1638": 1.13065,
+            "1639": 1.13181,
+            "1640": 1.13227,
+            "1641": 1.13282,
+            "1642": 1.13305,
+            "1643": 1.19491,
+            "1644": 1.15821,
+            "1645": 1.15349,
+            "1646": 1.1437,
+            "1647": 1.1416,
+            "1648": 1.14282,
+            "1649": 1.1408,
+            "1650": 1.13388,
+            "1651": 1.13396,
+            "1652": 1.15414,
+            "1653": 1.13734,
+            "1654": 1.13143,
+            "1655": 1.13124,
+            "1656": 1.13417,
+            "1657": 1.13376,
+            "1658": 1.12932,
+            "1659": 1.13161,
+            "1660": 1.13178,
+            "1661": 1.1315,
+            "1662": 1.13209,
+            "1663": 1.13118,
+            "1664": 1.13332,
+            "1665": 1.12981,
+            "1666": 1.13001,
+            "1667": 1.12943,
+            "1668": 1.12938,
+            "1669": 1.12973,
+            "1670": 1.13031,
+            "1671": 1.14164,
+            "1672": 1.14108,
+            "1673": 1.14165,
+            "1674": 1.14189,
+            "1675": 1.14174,
+            "1676": 1.14802,
+            "1677": 1.14434,
+            "1678": 1.14543,
+            "1679": 1.14285,
+            "1680": 1.14529,
+            "1681": 1.14548,
+            "1682": 1.14333,
+            "1683": 1.14553,
+            "1684": 1.14327,
+            "1685": 1.1476,
+            "1686": 1.1406,
+            "1687": 1.13769,
+            "1688": 1.13364,
+            "1689": 1.13418,
+            "1690": 1.13026,
+            "1691": 1.13222,
+            "1692": 1.13195,
+            "1693": 1.13247,
+            "1694": 1.13264,
+            "1695": 1.13167,
+            "1696": 1.13234,
+            "1697": 1.13335,
+            "1698": 1.13463,
+            "1699": 1.1337,
+            "1700": 1.13362,
+            "1701": 1.13339,
+            "1702": 1.13335,
+            "1703": 1.13412,
+            "1704": 1.1332,
+            "1705": 1.13109,
+            "1706": 1.13306,
+            "1707": 1.42699,
+            "1708": 1.14258,
+            "1709": 1.13227,
+            "1710": 1.13333,
+            "1711": 1.13316,
+            "1712": 1.13147,
+            "1713": 1.1325,
+            "1714": 1.13279,
+            "1715": 1.13509,
+            "1716": 1.132,
+            "1717": 1.13183,
+            "1718": 1.13123,
+            "1719": 1.13209,
+            "1720": 1.13195,
+            "1721": 1.12891,
+            "1722": 1.12633,
+            "1723": 1.12872,
+            "1724": 1.1269,
+            "1725": 1.12641,
+            "1726": 1.12585,
+            "1727": 1.12446,
+            "1728": 1.12583,
+            "1729": 1.1336,
+            "1730": 1.1322,
+            "1731": 1.13153,
+            "1732": 1.132,
+            "1733": 1.13239,
+            "1734": 1.13216,
+            "1735": 1.13252,
+            "1736": 1.13132,
+            "1737": 1.13165,
+            "1738": 1.13359,
+            "1739": 1.126,
+            "1740": 1.124,
+            "1741": 1.12533,
+            "1742": 1.12379,
+            "1743": 1.12474,
+            "1744": 1.12432,
+            "1745": 1.13505,
+            "1746": 1.13795,
+            "1747": 1.13914,
+            "1748": 1.17805,
+            "1749": 1.13962,
+            "1750": 1.13602,
+            "1751": 1.13778,
+            "1752": 1.13639,
+            "1753": 1.14452,
+            "1754": 1.14424,
+            "1755": 1.14388,
+            "1756": 1.14572,
+            "1757": 1.17074,
+            "1758": 1.14596,
+            "1759": 1.14637,
+            "1760": 1.14576,
+            "1761": 1.1441,
+            "1762": 1.13385,
+            "1763": 1.13833,
+            "1764": 1.13995,
+            "1765": 1.14229,
+            "1766": 1.2706,
+            "1767": 1.15999,
+            "1768": 1.13873,
+            "1769": 1.1421,
+            "1770": 1.13078,
+            "1771": 1.13059,
+            "1772": 1.13076,
+            "1773": 1.13527,
+            "1774": 1.13153,
+            "1775": 1.1299,
+            "1776": 1.13144,
+            "1777": 1.13048,
+            "1778": 1.1312,
+            "1779": 1.13109,
+            "1780": 1.13227,
+            "1781": 1.1318,
+            "1782": 1.13195,
+            "1783": 1.13076,
+            "1784": 1.13371,
+            "1785": 1.13513,
+            "1786": 1.13544,
+            "1787": 1.13286,
+            "1788": 1.13114,
+            "1789": 1.12859,
+            "1790": 1.13136,
+            "1791": 1.13775,
+            "1792": 1.1401,
+            "1793": 1.13769,
+            "1794": 1.13564,
+            "1795": 1.13638,
+            "1796": 1.13621,
+            "1797": 1.13614,
+            "1798": 1.13707,
+            "1799": 1.13631,
+            "1800": 1.13547,
+            "1801": 1.13673,
+            "1802": 1.13706,
+            "1803": 1.13765,
+            "1804": 1.13506,
+            "1805": 1.13603,
+            "1806": 1.13717,
+            "1807": 1.13637,
+            "1808": 1.13841,
+            "1809": 1.13734,
+            "1810": 1.1379,
+            "1811": 1.13795,
+            "1812": 1.13826,
+            "1813": 1.13875,
+            "1814": 1.13885,
+            "1815": 1.13773,
+            "1816": 1.13726,
+            "1817": 1.14087,
+            "1818": 1.1378,
+            "1819": 1.13714,
+            "1820": 1.13737,
+            "1821": 1.13928,
+            "1822": 1.1371,
+            "1823": 1.13901,
+            "1824": 1.14485,
+            "1825": 1.12803,
+            "1826": 1.12264,
+            "1827": 1.12651,
+            "1828": 1.13421,
+            "1829": 1.13198,
+            "1830": 1.13242,
+            "1831": 1.13488,
+            "1832": 1.13287,
+            "1833": 1.13394,
+            "1834": 1.13403,
+            "1835": 1.13598,
+            "1836": 1.13357,
+            "1837": 1.13518,
+            "1838": 1.13404,
+            "1839": 1.13577,
+            "1840": 1.13254,
+            "1841": 1.13422,
+            "1842": 1.13496,
+            "1843": 1.135,
+            "1844": 1.13791,
+            "1845": 1.13082,
+            "1846": 1.13135,
+            "1847": 1.13026,
+            "1848": 1.13098,
+            "1849": 1.13032,
+            "1850": 1.13038,
+            "1851": 1.13107,
+            "1852": 1.13535,
+            "1853": 1.1311,
+            "1854": 1.13935,
+            "1855": 1.13148,
+            "1856": 1.13042,
+            "1857": 1.13238,
+            "1858": 1.13034,
+            "1859": 1.13083,
+            "1860": 1.13262,
+            "1861": 1.13117,
+            "1862": 1.13181,
+            "1863": 1.13237,
+            "1864": 1.13125,
+            "1865": 1.13519,
+            "1866": 1.14006,
+            "1867": 1.13476,
+            "1868": 1.13101,
+            "1869": 1.13227,
+            "1870": 1.13399,
+            "1871": 1.13455,
+            "1872": 1.13237,
+            "1873": 1.13088,
+            "1874": 1.13163,
+            "1875": 1.13336,
+            "1876": 1.13121,
+            "1877": 1.13209,
+            "1878": 1.13199,
+            "1879": 1.13177,
+            "1880": 1.13322,
+            "1881": 1.13141,
+            "1882": 1.13236,
+            "1883": 1.12859,
+            "1884": 1.12504,
+            "1885": 1.12493,
+            "1886": 1.12502,
+            "1887": 1.12484,
+            "1888": 1.1248,
+            "1889": 1.12719,
+            "1890": 1.13286,
+            "1891": 1.1293,
+            "1892": 1.13422,
+            "1893": 1.12646,
+            "1894": 1.12508,
+            "1895": 1.12422,
+            "1896": 1.12724,
+            "1897": 1.12903,
+            "1898": 1.13203,
+            "1899": 1.12741,
+            "1900": 1.12527,
+            "1901": 1.12359,
+            "1902": 1.12382,
+            "1903": 1.12536,
+            "1904": 1.12683,
+            "1905": 1.12606,
+            "1906": 1.12607,
+            "1907": 1.12626,
+            "1908": 1.44717,
+            "1909": 1.12543,
+            "1910": 1.12376,
+            "1911": 1.12429,
+            "1912": 1.12442,
+            "1913": 1.12355,
+            "1914": 1.12476,
+            "1915": 1.12331,
+            "1916": 1.12342,
+            "1917": 1.12442,
+            "1918": 1.12472,
+            "1919": 1.12536,
+            "1920": 1.12387,
+            "1921": 1.12347,
+            "1922": 1.12561,
+            "1923": 1.12391,
+            "1924": 1.12342,
+            "1925": 1.12607,
+            "1926": 1.12383,
+            "1927": 1.12305,
+            "1928": 1.125,
+            "1929": 1.12399,
+            "1930": 1.1237,
+            "1931": 1.12459,
+            "1932": 1.12475,
+            "1933": 1.12278,
+            "1934": 1.12413,
+            "1935": 1.12588,
+            "1936": 1.12473,
+            "1937": 1.12412,
+            "1938": 1.12444,
+            "1939": 1.12303,
+            "1940": 1.12421,
+            "1941": 1.12404,
+            "1942": 1.12568,
+            "1943": 1.12645,
+            "1944": 1.12388,
+            "1945": 1.44561,
+            "1946": 1.12748,
+            "1947": 1.44404,
+            "1948": 1.12309,
+            "1949": 1.12591,
+            "1950": 1.124,
+            "1951": 1.12953,
+            "1952": 1.12429,
+            "1953": 1.48105,
+            "1954": 1.12576,
+            "1955": 1.1274,
+            "1956": 1.12693,
+            "1957": 1.1261,
+            "1958": 1.1276,
+            "1959": 1.18913,
+            "1960": 1.12817,
+            "1961": 1.12615,
+            "1962": 1.12581,
+            "1963": 1.12682,
+            "1964": 1.12747,
+            "1965": 1.14301,
+            "1966": 1.14417,
+            "1967": 1.14427,
+            "1968": 1.14017,
+            "1969": 1.13872,
+            "1970": 1.13824,
+            "1971": 1.14731,
+            "1972": 1.13727,
+            "1973": 1.13816,
+            "1974": 1.13684,
+            "1975": 1.13985,
+            "1976": 1.13777,
+            "1977": 1.13833,
+            "1978": 1.14247,
+            "1979": 1.14554,
+            "1980": 1.14074,
+            "1981": 1.1396,
+            "1982": 1.13784,
+            "1983": 1.19896,
+            "1984": 1.13952,
+            "1985": 1.13865,
+            "1986": 1.13959,
+            "1987": 1.13909,
+            "1988": 1.13875,
+            "1989": 1.13947,
+            "1990": 1.13762,
+            "1991": 1.13799,
+            "1992": 1.13904,
+            "1993": 1.13674,
+            "1994": 1.13869,
+            "1995": 1.13884,
+            "1996": 1.13807,
+            "1997": 1.13986,
+            "1998": 1.14151,
+            "1999": 1.13582,
+            "2000": 1.16726
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/model_config.yaml
index 5668a7575e2..15ac9782df5 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/model_config.yaml
@@ -42,7 +42,7 @@ MODEL_ARGS:
   --pipeline-model-parallel-size: 2
   --sequence-parallel: true
   --tp-comm-overlap: true
-  --tp-comm-overlap-cfg: tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/tp_comm_overlap_cfg.yaml
+  --tp-comm-overlap-cfg: tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/tp_comm_overlap_cfg.yaml
   --deterministic-mode: true
   --no-gradient-accumulation-fusion: true
   --fp8-format: hybrid
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp4_cp2_native_fp8_tp_sp_cp_tp_overlap/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp4_cp2_native_fp8_tp_sp_cp_tp_overlap/golden_values_dev_dgx_h100.json
new file mode 100644
index 00000000000..b6e543e2cf8
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp4_cp2_native_fp8_tp_sp_cp_tp_overlap/golden_values_dev_dgx_h100.json
@@ -0,0 +1,10037 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 2000,
+        "step_interval": 1,
+        "values": {
+            "1": 10.85229,
+            "2": 10.85951,
+            "3": 10.85469,
+            "4": 10.86843,
+            "5": 10.85304,
+            "6": 10.85362,
+            "7": 10.8602,
+            "8": 10.85298,
+            "9": 10.84874,
+            "10": 10.84674,
+            "11": 10.83863,
+            "12": 10.83549,
+            "13": 10.82524,
+            "14": 10.84078,
+            "15": 10.78613,
+            "16": 10.79372,
+            "17": 10.76553,
+            "18": 10.78902,
+            "19": 10.73057,
+            "20": 10.69489,
+            "21": 10.64595,
+            "22": 10.64791,
+            "23": 10.65524,
+            "24": 10.55349,
+            "25": 10.56424,
+            "26": 10.63262,
+            "27": 10.47084,
+            "28": 10.471,
+            "29": 10.36495,
+            "30": 10.27406,
+            "31": 10.43126,
+            "32": 10.35361,
+            "33": 10.22439,
+            "34": 10.17135,
+            "35": 10.23744,
+            "36": 10.15766,
+            "37": 10.10704,
+            "38": 10.03631,
+            "39": 10.04895,
+            "40": 10.06978,
+            "41": 9.95276,
+            "42": 9.95577,
+            "43": 9.87217,
+            "44": 9.99154,
+            "45": 10.00766,
+            "46": 9.84803,
+            "47": 10.00018,
+            "48": 9.81816,
+            "49": 9.94941,
+            "50": 9.94449,
+            "51": 9.5964,
+            "52": 9.79483,
+            "53": 9.63207,
+            "54": 9.8854,
+            "55": 9.74063,
+            "56": 9.85006,
+            "57": 9.86123,
+            "58": 9.87737,
+            "59": 9.54716,
+            "60": 9.64756,
+            "61": 9.87994,
+            "62": 9.76465,
+            "63": 9.68066,
+            "64": 9.82801,
+            "65": 9.59733,
+            "66": 9.62928,
+            "67": 9.74212,
+            "68": 9.60593,
+            "69": 9.29694,
+            "70": 9.42495,
+            "71": 9.79013,
+            "72": 9.71358,
+            "73": 9.61909,
+            "74": 9.45334,
+            "75": 9.24289,
+            "76": 9.50821,
+            "77": 9.57857,
+            "78": 9.56035,
+            "79": 9.31048,
+            "80": 9.36161,
+            "81": 9.46136,
+            "82": 9.55628,
+            "83": 9.53353,
+            "84": 9.35526,
+            "85": 9.40111,
+            "86": 9.65137,
+            "87": 9.23621,
+            "88": 9.48942,
+            "89": 9.22457,
+            "90": 9.41443,
+            "91": 9.39014,
+            "92": 9.3793,
+            "93": 9.36366,
+            "94": 9.51552,
+            "95": 9.42012,
+            "96": 9.33698,
+            "97": 9.20729,
+            "98": 9.49265,
+            "99": 9.29333,
+            "100": 9.35883,
+            "101": 9.24766,
+            "102": 9.24259,
+            "103": 9.07796,
+            "104": 9.16832,
+            "105": 9.37671,
+            "106": 9.15179,
+            "107": 9.17832,
+            "108": 9.31483,
+            "109": 9.28984,
+            "110": 9.36705,
+            "111": 9.17605,
+            "112": 9.23281,
+            "113": 9.35413,
+            "114": 9.35742,
+            "115": 9.32337,
+            "116": 9.00364,
+            "117": 9.06445,
+            "118": 9.06523,
+            "119": 9.22504,
+            "120": 9.08324,
+            "121": 9.19428,
+            "122": 9.14006,
+            "123": 9.25894,
+            "124": 9.45689,
+            "125": 9.21857,
+            "126": 9.0614,
+            "127": 9.01413,
+            "128": 9.22025,
+            "129": 8.98394,
+            "130": 9.14098,
+            "131": 9.15643,
+            "132": 9.03479,
+            "133": 8.86261,
+            "134": 9.18468,
+            "135": 8.88922,
+            "136": 9.1645,
+            "137": 9.15944,
+            "138": 9.23186,
+            "139": 9.08834,
+            "140": 8.87267,
+            "141": 9.29752,
+            "142": 9.19877,
+            "143": 9.12079,
+            "144": 9.24324,
+            "145": 9.10527,
+            "146": 8.98338,
+            "147": 8.9881,
+            "148": 9.1361,
+            "149": 9.06877,
+            "150": 9.01122,
+            "151": 8.93192,
+            "152": 8.87852,
+            "153": 9.06711,
+            "154": 9.1802,
+            "155": 9.13786,
+            "156": 9.05095,
+            "157": 9.15163,
+            "158": 9.05301,
+            "159": 9.03638,
+            "160": 8.89244,
+            "161": 9.04764,
+            "162": 8.89639,
+            "163": 8.84472,
+            "164": 8.97496,
+            "165": 8.93105,
+            "166": 8.65677,
+            "167": 8.83411,
+            "168": 8.8203,
+            "169": 8.65961,
+            "170": 9.04726,
+            "171": 8.72167,
+            "172": 8.82105,
+            "173": 8.91105,
+            "174": 8.85007,
+            "175": 8.70985,
+            "176": 8.7611,
+            "177": 8.76567,
+            "178": 8.72394,
+            "179": 8.64132,
+            "180": 8.74357,
+            "181": 8.6941,
+            "182": 8.72315,
+            "183": 9.08667,
+            "184": 8.60959,
+            "185": 8.88334,
+            "186": 8.74346,
+            "187": 8.57546,
+            "188": 8.6841,
+            "189": 8.86656,
+            "190": 8.53754,
+            "191": 8.66593,
+            "192": 8.61152,
+            "193": 8.5763,
+            "194": 8.75183,
+            "195": 8.5938,
+            "196": 8.7761,
+            "197": 8.744,
+            "198": 8.63042,
+            "199": 8.77202,
+            "200": 8.73627,
+            "201": 8.67068,
+            "202": 8.55099,
+            "203": 8.54134,
+            "204": 8.71213,
+            "205": 8.22486,
+            "206": 8.85986,
+            "207": 8.67928,
+            "208": 8.70826,
+            "209": 8.75243,
+            "210": 8.58226,
+            "211": 8.84167,
+            "212": 8.4913,
+            "213": 8.57316,
+            "214": 8.51316,
+            "215": 8.56549,
+            "216": 8.50617,
+            "217": 8.53369,
+            "218": 8.53635,
+            "219": 8.64298,
+            "220": 8.54526,
+            "221": 8.39761,
+            "222": 8.50474,
+            "223": 8.44078,
+            "224": 8.52901,
+            "225": 8.5708,
+            "226": 8.44247,
+            "227": 8.67823,
+            "228": 8.3859,
+            "229": 8.4537,
+            "230": 8.4985,
+            "231": 8.50257,
+            "232": 8.49898,
+            "233": 8.49438,
+            "234": 8.64018,
+            "235": 8.5617,
+            "236": 8.39791,
+            "237": 8.49075,
+            "238": 8.30637,
+            "239": 8.56099,
+            "240": 8.67125,
+            "241": 8.447,
+            "242": 8.47179,
+            "243": 8.51685,
+            "244": 8.36975,
+            "245": 8.59641,
+            "246": 8.59557,
+            "247": 8.43962,
+            "248": 8.50986,
+            "249": 8.52277,
+            "250": 8.42301,
+            "251": 8.3783,
+            "252": 8.54698,
+            "253": 8.3164,
+            "254": 8.35246,
+            "255": 8.29609,
+            "256": 8.20858,
+            "257": 8.39462,
+            "258": 8.45148,
+            "259": 8.23213,
+            "260": 8.24039,
+            "261": 8.23733,
+            "262": 8.34866,
+            "263": 8.30632,
+            "264": 8.1907,
+            "265": 8.33202,
+            "266": 8.2336,
+            "267": 7.9013,
+            "268": 8.37861,
+            "269": 8.40384,
+            "270": 8.26475,
+            "271": 8.27885,
+            "272": 8.31844,
+            "273": 8.13253,
+            "274": 8.09818,
+            "275": 8.00901,
+            "276": 7.92522,
+            "277": 8.23699,
+            "278": 8.04701,
+            "279": 7.96356,
+            "280": 7.75515,
+            "281": 8.10016,
+            "282": 8.14722,
+            "283": 8.15666,
+            "284": 8.10022,
+            "285": 8.06894,
+            "286": 7.90037,
+            "287": 7.99127,
+            "288": 8.24359,
+            "289": 8.17176,
+            "290": 8.12684,
+            "291": 8.25357,
+            "292": 8.0756,
+            "293": 8.11914,
+            "294": 7.97501,
+            "295": 7.96533,
+            "296": 8.23576,
+            "297": 7.79081,
+            "298": 8.04236,
+            "299": 7.93831,
+            "300": 7.8498,
+            "301": 8.00964,
+            "302": 7.94515,
+            "303": 7.99053,
+            "304": 7.95899,
+            "305": 7.9946,
+            "306": 7.9738,
+            "307": 7.98707,
+            "308": 7.9953,
+            "309": 8.0059,
+            "310": 7.97168,
+            "311": 7.92562,
+            "312": 7.88182,
+            "313": 7.82955,
+            "314": 7.82035,
+            "315": 7.82475,
+            "316": 7.74495,
+            "317": 7.92567,
+            "318": 7.97631,
+            "319": 7.82443,
+            "320": 7.563,
+            "321": 7.74534,
+            "322": 7.82917,
+            "323": 7.76703,
+            "324": 7.90668,
+            "325": 7.79387,
+            "326": 7.64901,
+            "327": 7.86137,
+            "328": 7.7832,
+            "329": 7.87669,
+            "330": 7.74815,
+            "331": 7.52005,
+            "332": 7.81037,
+            "333": 7.8379,
+            "334": 7.67759,
+            "335": 7.69435,
+            "336": 7.90998,
+            "337": 7.64618,
+            "338": 7.89178,
+            "339": 7.7192,
+            "340": 7.75318,
+            "341": 7.70375,
+            "342": 7.81451,
+            "343": 7.61028,
+            "344": 7.58433,
+            "345": 7.60474,
+            "346": 7.45825,
+            "347": 7.55021,
+            "348": 7.67669,
+            "349": 7.57925,
+            "350": 7.65118,
+            "351": 7.74172,
+            "352": 7.69877,
+            "353": 7.4955,
+            "354": 7.73645,
+            "355": 7.75823,
+            "356": 7.76871,
+            "357": 7.8083,
+            "358": 7.59223,
+            "359": 7.54129,
+            "360": 7.62161,
+            "361": 7.53913,
+            "362": 7.75707,
+            "363": 7.58184,
+            "364": 7.57393,
+            "365": 7.61381,
+            "366": 7.30007,
+            "367": 7.55433,
+            "368": 7.4381,
+            "369": 7.34072,
+            "370": 7.45786,
+            "371": 7.45479,
+            "372": 7.64528,
+            "373": 7.51803,
+            "374": 7.43579,
+            "375": 7.52279,
+            "376": 7.33856,
+            "377": 7.23275,
+            "378": 7.53208,
+            "379": 7.48549,
+            "380": 7.37893,
+            "381": 7.46259,
+            "382": 7.28593,
+            "383": 7.26774,
+            "384": 7.4035,
+            "385": 7.38617,
+            "386": 7.2246,
+            "387": 7.41197,
+            "388": 7.27354,
+            "389": 7.42884,
+            "390": 7.23295,
+            "391": 7.63854,
+            "392": 7.32743,
+            "393": 7.41119,
+            "394": 7.46811,
+            "395": 7.43164,
+            "396": 7.27624,
+            "397": 7.22237,
+            "398": 7.41314,
+            "399": 7.14965,
+            "400": 7.28882,
+            "401": 7.34645,
+            "402": 7.38389,
+            "403": 7.27445,
+            "404": 7.29549,
+            "405": 7.25441,
+            "406": 7.20955,
+            "407": 7.35305,
+            "408": 7.17476,
+            "409": 7.15738,
+            "410": 7.30843,
+            "411": 7.21046,
+            "412": 7.19143,
+            "413": 7.22421,
+            "414": 6.90584,
+            "415": 7.32329,
+            "416": 7.41955,
+            "417": 7.01436,
+            "418": 7.26656,
+            "419": 7.03251,
+            "420": 7.40294,
+            "421": 7.17304,
+            "422": 7.22884,
+            "423": 7.08611,
+            "424": 7.2354,
+            "425": 7.3087,
+            "426": 7.28003,
+            "427": 7.12262,
+            "428": 7.08425,
+            "429": 6.87125,
+            "430": 7.19779,
+            "431": 6.99763,
+            "432": 7.22298,
+            "433": 6.96906,
+            "434": 6.95232,
+            "435": 7.01097,
+            "436": 7.00141,
+            "437": 6.9848,
+            "438": 6.99447,
+            "439": 6.93128,
+            "440": 7.05472,
+            "441": 7.03406,
+            "442": 7.09324,
+            "443": 7.0854,
+            "444": 6.69941,
+            "445": 6.98741,
+            "446": 7.13474,
+            "447": 7.11726,
+            "448": 6.97509,
+            "449": 7.04203,
+            "450": 7.00855,
+            "451": 6.82317,
+            "452": 6.90281,
+            "453": 7.00796,
+            "454": 6.96028,
+            "455": 7.02393,
+            "456": 6.98781,
+            "457": 6.96156,
+            "458": 6.89735,
+            "459": 6.68323,
+            "460": 7.05439,
+            "461": 7.088,
+            "462": 6.86315,
+            "463": 7.04576,
+            "464": 6.64275,
+            "465": 7.02272,
+            "466": 6.99895,
+            "467": 6.99097,
+            "468": 6.94728,
+            "469": 6.82004,
+            "470": 7.0355,
+            "471": 6.87321,
+            "472": 6.95214,
+            "473": 6.81396,
+            "474": 6.96547,
+            "475": 7.1584,
+            "476": 6.75391,
+            "477": 6.88861,
+            "478": 6.89832,
+            "479": 6.69636,
+            "480": 7.01803,
+            "481": 6.98503,
+            "482": 6.72248,
+            "483": 6.77484,
+            "484": 6.74297,
+            "485": 6.92045,
+            "486": 7.05544,
+            "487": 6.62222,
+            "488": 6.87375,
+            "489": 6.76024,
+            "490": 6.81377,
+            "491": 6.69837,
+            "492": 6.68149,
+            "493": 6.75646,
+            "494": 6.66282,
+            "495": 6.62263,
+            "496": 6.57706,
+            "497": 6.8292,
+            "498": 6.63548,
+            "499": 6.84385,
+            "500": 6.64283,
+            "501": 6.71966,
+            "502": 6.82988,
+            "503": 6.69833,
+            "504": 6.60751,
+            "505": 6.6112,
+            "506": 6.73586,
+            "507": 6.85391,
+            "508": 6.84629,
+            "509": 6.6384,
+            "510": 6.81034,
+            "511": 6.72977,
+            "512": 6.72804,
+            "513": 6.64821,
+            "514": 6.70064,
+            "515": 6.43824,
+            "516": 6.73421,
+            "517": 6.69542,
+            "518": 6.52993,
+            "519": 6.62474,
+            "520": 6.84935,
+            "521": 6.65329,
+            "522": 6.6979,
+            "523": 6.73262,
+            "524": 6.72634,
+            "525": 6.6655,
+            "526": 6.40663,
+            "527": 6.79088,
+            "528": 6.65206,
+            "529": 6.62295,
+            "530": 6.61639,
+            "531": 6.63503,
+            "532": 6.62382,
+            "533": 6.75435,
+            "534": 6.60296,
+            "535": 6.74138,
+            "536": 6.61812,
+            "537": 6.63086,
+            "538": 6.52418,
+            "539": 6.54299,
+            "540": 6.57593,
+            "541": 6.44382,
+            "542": 6.66189,
+            "543": 6.67325,
+            "544": 6.66927,
+            "545": 6.80511,
+            "546": 6.6246,
+            "547": 6.40979,
+            "548": 6.71663,
+            "549": 6.68986,
+            "550": 6.51987,
+            "551": 6.74092,
+            "552": 6.63227,
+            "553": 6.47534,
+            "554": 6.62778,
+            "555": 6.45222,
+            "556": 6.60749,
+            "557": 6.62431,
+            "558": 6.37676,
+            "559": 6.36118,
+            "560": 6.5756,
+            "561": 6.72381,
+            "562": 6.62768,
+            "563": 6.73287,
+            "564": 6.34176,
+            "565": 6.50706,
+            "566": 6.6902,
+            "567": 6.55838,
+            "568": 6.50084,
+            "569": 6.44415,
+            "570": 6.35619,
+            "571": 6.62259,
+            "572": 6.30471,
+            "573": 6.5721,
+            "574": 6.46259,
+            "575": 6.63541,
+            "576": 6.50701,
+            "577": 6.51656,
+            "578": 6.47574,
+            "579": 6.45618,
+            "580": 6.5583,
+            "581": 6.59714,
+            "582": 6.46959,
+            "583": 6.50413,
+            "584": 6.51087,
+            "585": 6.41424,
+            "586": 6.40258,
+            "587": 6.4501,
+            "588": 6.55622,
+            "589": 6.61456,
+            "590": 6.27891,
+            "591": 6.66415,
+            "592": 6.2545,
+            "593": 6.46521,
+            "594": 6.37467,
+            "595": 6.34819,
+            "596": 6.25003,
+            "597": 6.18054,
+            "598": 6.44279,
+            "599": 6.38602,
+            "600": 6.44414,
+            "601": 6.25051,
+            "602": 6.51804,
+            "603": 6.50819,
+            "604": 6.37382,
+            "605": 6.48026,
+            "606": 6.3013,
+            "607": 6.51999,
+            "608": 6.66049,
+            "609": 6.16075,
+            "610": 6.55805,
+            "611": 6.38737,
+            "612": 6.56702,
+            "613": 6.41056,
+            "614": 6.18827,
+            "615": 6.38286,
+            "616": 6.34421,
+            "617": 6.36273,
+            "618": 6.43626,
+            "619": 6.12502,
+            "620": 6.3943,
+            "621": 6.44427,
+            "622": 6.38402,
+            "623": 6.56769,
+            "624": 6.34417,
+            "625": 6.26521,
+            "626": 6.28634,
+            "627": 6.4276,
+            "628": 6.24043,
+            "629": 6.57298,
+            "630": 6.3523,
+            "631": 6.33431,
+            "632": 6.29554,
+            "633": 6.24213,
+            "634": 6.29476,
+            "635": 6.53142,
+            "636": 6.23005,
+            "637": 6.62121,
+            "638": 6.00686,
+            "639": 6.26506,
+            "640": 6.2796,
+            "641": 6.19435,
+            "642": 6.27007,
+            "643": 6.44413,
+            "644": 6.2445,
+            "645": 6.23092,
+            "646": 6.38932,
+            "647": 6.3209,
+            "648": 6.34188,
+            "649": 6.33297,
+            "650": 6.47025,
+            "651": 6.31782,
+            "652": 6.23993,
+            "653": 6.36817,
+            "654": 6.43495,
+            "655": 6.5135,
+            "656": 6.31371,
+            "657": 6.4163,
+            "658": 6.22993,
+            "659": 6.1432,
+            "660": 6.3808,
+            "661": 6.15725,
+            "662": 6.2613,
+            "663": 6.36151,
+            "664": 6.32043,
+            "665": 6.39194,
+            "666": 6.15182,
+            "667": 6.18562,
+            "668": 6.22741,
+            "669": 6.20408,
+            "670": 6.23602,
+            "671": 6.22904,
+            "672": 6.47492,
+            "673": 6.32812,
+            "674": 6.28343,
+            "675": 6.37362,
+            "676": 6.38018,
+            "677": 6.29511,
+            "678": 6.26804,
+            "679": 6.22803,
+            "680": 6.28357,
+            "681": 6.19077,
+            "682": 6.07906,
+            "683": 6.26403,
+            "684": 6.31575,
+            "685": 6.2874,
+            "686": 6.14011,
+            "687": 6.27685,
+            "688": 6.19835,
+            "689": 6.61075,
+            "690": 6.16856,
+            "691": 6.17286,
+            "692": 6.2649,
+            "693": 6.13689,
+            "694": 6.22553,
+            "695": 6.31786,
+            "696": 6.1061,
+            "697": 6.14556,
+            "698": 6.21959,
+            "699": 6.45326,
+            "700": 6.03519,
+            "701": 6.05302,
+            "702": 6.23703,
+            "703": 6.17441,
+            "704": 6.20621,
+            "705": 6.11844,
+            "706": 6.06567,
+            "707": 6.24456,
+            "708": 6.30245,
+            "709": 5.99551,
+            "710": 6.15229,
+            "711": 6.2479,
+            "712": 6.17146,
+            "713": 5.88608,
+            "714": 6.09975,
+            "715": 6.10497,
+            "716": 6.40586,
+            "717": 6.18363,
+            "718": 6.23537,
+            "719": 6.26862,
+            "720": 6.25804,
+            "721": 6.25605,
+            "722": 6.22472,
+            "723": 6.07187,
+            "724": 6.22017,
+            "725": 6.0314,
+            "726": 6.29244,
+            "727": 6.00644,
+            "728": 6.03616,
+            "729": 6.0826,
+            "730": 6.17412,
+            "731": 6.09163,
+            "732": 6.07888,
+            "733": 6.11348,
+            "734": 6.37763,
+            "735": 6.26791,
+            "736": 6.17709,
+            "737": 6.36077,
+            "738": 6.13247,
+            "739": 6.14636,
+            "740": 5.87836,
+            "741": 6.00499,
+            "742": 5.98594,
+            "743": 6.17515,
+            "744": 6.02317,
+            "745": 6.14565,
+            "746": 6.03122,
+            "747": 6.09452,
+            "748": 6.22864,
+            "749": 5.93308,
+            "750": 6.16381,
+            "751": 5.95292,
+            "752": 6.01389,
+            "753": 6.02392,
+            "754": 6.28379,
+            "755": 6.12598,
+            "756": 6.2443,
+            "757": 6.01404,
+            "758": 6.19738,
+            "759": 6.22084,
+            "760": 6.02115,
+            "761": 6.1856,
+            "762": 6.21798,
+            "763": 6.02971,
+            "764": 5.95856,
+            "765": 5.92315,
+            "766": 5.96127,
+            "767": 5.81063,
+            "768": 6.18012,
+            "769": 6.27004,
+            "770": 6.28915,
+            "771": 5.78425,
+            "772": 6.0231,
+            "773": 6.17908,
+            "774": 5.87868,
+            "775": 6.02111,
+            "776": 6.12258,
+            "777": 5.875,
+            "778": 6.04901,
+            "779": 5.86583,
+            "780": 6.13275,
+            "781": 5.8451,
+            "782": 6.03644,
+            "783": 5.94982,
+            "784": 5.91239,
+            "785": 6.08718,
+            "786": 6.0949,
+            "787": 5.6498,
+            "788": 5.99117,
+            "789": 6.20208,
+            "790": 6.25533,
+            "791": 5.78584,
+            "792": 5.98398,
+            "793": 6.17232,
+            "794": 6.02303,
+            "795": 5.99758,
+            "796": 6.15575,
+            "797": 6.04799,
+            "798": 6.04773,
+            "799": 6.10394,
+            "800": 6.00523,
+            "801": 6.13976,
+            "802": 5.97143,
+            "803": 6.14303,
+            "804": 5.99897,
+            "805": 5.8162,
+            "806": 6.08016,
+            "807": 6.03933,
+            "808": 5.91779,
+            "809": 5.76774,
+            "810": 6.00748,
+            "811": 5.92407,
+            "812": 5.89853,
+            "813": 5.95603,
+            "814": 6.0199,
+            "815": 5.80113,
+            "816": 6.10732,
+            "817": 5.92704,
+            "818": 6.05349,
+            "819": 5.99954,
+            "820": 5.71925,
+            "821": 5.93871,
+            "822": 6.18742,
+            "823": 5.82051,
+            "824": 5.97479,
+            "825": 6.17898,
+            "826": 6.18992,
+            "827": 6.04811,
+            "828": 6.0618,
+            "829": 5.8808,
+            "830": 5.9338,
+            "831": 5.89066,
+            "832": 5.95946,
+            "833": 6.05775,
+            "834": 5.98694,
+            "835": 5.99225,
+            "836": 5.78808,
+            "837": 6.1001,
+            "838": 5.85774,
+            "839": 5.82603,
+            "840": 6.17451,
+            "841": 5.77389,
+            "842": 5.88244,
+            "843": 5.93827,
+            "844": 6.0037,
+            "845": 6.08214,
+            "846": 5.68388,
+            "847": 5.75348,
+            "848": 5.96075,
+            "849": 6.0909,
+            "850": 5.83839,
+            "851": 6.01221,
+            "852": 5.74277,
+            "853": 5.9819,
+            "854": 6.00994,
+            "855": 5.81104,
+            "856": 5.99027,
+            "857": 5.99462,
+            "858": 6.04349,
+            "859": 5.94378,
+            "860": 6.08776,
+            "861": 6.05806,
+            "862": 5.99259,
+            "863": 5.83184,
+            "864": 5.83727,
+            "865": 5.93014,
+            "866": 5.88373,
+            "867": 5.87071,
+            "868": 6.0603,
+            "869": 6.08011,
+            "870": 5.96321,
+            "871": 6.03762,
+            "872": 5.89053,
+            "873": 5.83933,
+            "874": 6.02181,
+            "875": 5.90658,
+            "876": 5.96303,
+            "877": 5.92074,
+            "878": 6.09702,
+            "879": 5.76213,
+            "880": 6.0073,
+            "881": 5.98795,
+            "882": 5.90217,
+            "883": 5.67039,
+            "884": 5.95748,
+            "885": 5.74054,
+            "886": 5.98445,
+            "887": 5.90648,
+            "888": 5.8314,
+            "889": 6.00733,
+            "890": 6.01123,
+            "891": 5.94286,
+            "892": 5.70277,
+            "893": 6.08459,
+            "894": 5.72165,
+            "895": 5.83588,
+            "896": 5.83978,
+            "897": 5.84943,
+            "898": 5.92347,
+            "899": 5.93201,
+            "900": 5.8958,
+            "901": 5.94689,
+            "902": 5.82987,
+            "903": 6.04738,
+            "904": 5.92586,
+            "905": 5.89894,
+            "906": 5.61575,
+            "907": 5.90522,
+            "908": 5.73333,
+            "909": 5.98526,
+            "910": 5.85686,
+            "911": 5.69844,
+            "912": 5.69856,
+            "913": 5.76407,
+            "914": 5.82436,
+            "915": 5.79681,
+            "916": 5.88608,
+            "917": 5.867,
+            "918": 5.8166,
+            "919": 5.80848,
+            "920": 5.88971,
+            "921": 5.8407,
+            "922": 5.62064,
+            "923": 6.03383,
+            "924": 5.60482,
+            "925": 5.61823,
+            "926": 5.85786,
+            "927": 5.95554,
+            "928": 5.83872,
+            "929": 5.82237,
+            "930": 5.95411,
+            "931": 5.75622,
+            "932": 5.59098,
+            "933": 5.63134,
+            "934": 5.80496,
+            "935": 5.63538,
+            "936": 5.8317,
+            "937": 5.96485,
+            "938": 5.58943,
+            "939": 5.79158,
+            "940": 5.96089,
+            "941": 5.72676,
+            "942": 5.83595,
+            "943": 5.87091,
+            "944": 5.95881,
+            "945": 5.70173,
+            "946": 5.55832,
+            "947": 5.74676,
+            "948": 5.79172,
+            "949": 5.82702,
+            "950": 5.84636,
+            "951": 5.72232,
+            "952": 5.6926,
+            "953": 5.67846,
+            "954": 5.72814,
+            "955": 5.52701,
+            "956": 5.6247,
+            "957": 5.84082,
+            "958": 5.79725,
+            "959": 5.57236,
+            "960": 5.8033,
+            "961": 5.83318,
+            "962": 5.76931,
+            "963": 5.768,
+            "964": 5.70825,
+            "965": 5.63755,
+            "966": 5.60344,
+            "967": 5.72795,
+            "968": 5.74037,
+            "969": 5.82565,
+            "970": 5.64868,
+            "971": 5.70857,
+            "972": 5.85255,
+            "973": 5.67308,
+            "974": 5.7177,
+            "975": 5.86027,
+            "976": 5.71074,
+            "977": 5.77363,
+            "978": 5.68598,
+            "979": 5.5901,
+            "980": 5.76431,
+            "981": 5.89808,
+            "982": 5.47164,
+            "983": 5.61909,
+            "984": 5.54693,
+            "985": 5.58914,
+            "986": 5.6395,
+            "987": 5.57215,
+            "988": 5.71212,
+            "989": 5.69568,
+            "990": 5.62713,
+            "991": 5.85071,
+            "992": 5.77178,
+            "993": 5.87182,
+            "994": 5.69827,
+            "995": 5.7311,
+            "996": 5.73947,
+            "997": 5.81776,
+            "998": 5.83946,
+            "999": 5.83213,
+            "1000": 5.68618,
+            "1001": 5.86902,
+            "1002": 5.75759,
+            "1003": 5.64206,
+            "1004": 5.80056,
+            "1005": 5.53357,
+            "1006": 5.3287,
+            "1007": 5.7697,
+            "1008": 5.79391,
+            "1009": 5.65438,
+            "1010": 5.78459,
+            "1011": 5.89696,
+            "1012": 5.62269,
+            "1013": 5.61367,
+            "1014": 5.67992,
+            "1015": 5.56146,
+            "1016": 5.87263,
+            "1017": 5.83169,
+            "1018": 5.62357,
+            "1019": 5.73336,
+            "1020": 5.61404,
+            "1021": 5.85353,
+            "1022": 5.49696,
+            "1023": 5.65062,
+            "1024": 5.74334,
+            "1025": 5.57222,
+            "1026": 5.40994,
+            "1027": 5.59905,
+            "1028": 5.68935,
+            "1029": 5.68346,
+            "1030": 5.68799,
+            "1031": 5.40526,
+            "1032": 5.78443,
+            "1033": 5.57561,
+            "1034": 5.6274,
+            "1035": 5.71529,
+            "1036": 5.62368,
+            "1037": 5.36621,
+            "1038": 5.66561,
+            "1039": 5.6477,
+            "1040": 5.57324,
+            "1041": 5.59731,
+            "1042": 5.81493,
+            "1043": 5.56271,
+            "1044": 5.46406,
+            "1045": 5.9683,
+            "1046": 5.48617,
+            "1047": 5.39181,
+            "1048": 5.49562,
+            "1049": 5.67791,
+            "1050": 5.69881,
+            "1051": 5.5776,
+            "1052": 5.68149,
+            "1053": 5.63114,
+            "1054": 5.45857,
+            "1055": 5.59887,
+            "1056": 5.67508,
+            "1057": 5.75628,
+            "1058": 5.56524,
+            "1059": 5.74843,
+            "1060": 5.82162,
+            "1061": 5.47233,
+            "1062": 5.65043,
+            "1063": 5.50248,
+            "1064": 5.59125,
+            "1065": 5.55564,
+            "1066": 5.74466,
+            "1067": 5.67043,
+            "1068": 5.44061,
+            "1069": 5.61122,
+            "1070": 5.81207,
+            "1071": 5.51069,
+            "1072": 5.62291,
+            "1073": 5.6192,
+            "1074": 5.52379,
+            "1075": 5.70748,
+            "1076": 5.5951,
+            "1077": 5.70681,
+            "1078": 5.56223,
+            "1079": 5.61677,
+            "1080": 5.64259,
+            "1081": 5.62201,
+            "1082": 5.50149,
+            "1083": 5.64213,
+            "1084": 5.55087,
+            "1085": 5.40393,
+            "1086": 5.62042,
+            "1087": 5.44171,
+            "1088": 5.51111,
+            "1089": 5.76887,
+            "1090": 5.52736,
+            "1091": 5.51307,
+            "1092": 5.40781,
+            "1093": 5.69672,
+            "1094": 5.56925,
+            "1095": 5.5731,
+            "1096": 5.61367,
+            "1097": 5.6454,
+            "1098": 5.65292,
+            "1099": 5.51436,
+            "1100": 5.63973,
+            "1101": 5.67989,
+            "1102": 5.53567,
+            "1103": 5.54943,
+            "1104": 5.53818,
+            "1105": 5.55271,
+            "1106": 5.68243,
+            "1107": 5.68309,
+            "1108": 5.78112,
+            "1109": 5.54014,
+            "1110": 5.6617,
+            "1111": 5.59215,
+            "1112": 5.58702,
+            "1113": 5.62687,
+            "1114": 5.61504,
+            "1115": 5.59863,
+            "1116": 5.66461,
+            "1117": 5.64732,
+            "1118": 5.65418,
+            "1119": 5.70846,
+            "1120": 5.63501,
+            "1121": 5.37809,
+            "1122": 5.23308,
+            "1123": 5.47298,
+            "1124": 5.65454,
+            "1125": 5.68419,
+            "1126": 5.68674,
+            "1127": 5.56954,
+            "1128": 5.62438,
+            "1129": 5.29406,
+            "1130": 5.54548,
+            "1131": 5.6238,
+            "1132": 5.72077,
+            "1133": 5.51615,
+            "1134": 5.55302,
+            "1135": 5.51992,
+            "1136": 5.42021,
+            "1137": 5.46757,
+            "1138": 5.5657,
+            "1139": 5.41524,
+            "1140": 5.26144,
+            "1141": 5.58424,
+            "1142": 5.64054,
+            "1143": 5.385,
+            "1144": 5.3823,
+            "1145": 5.36615,
+            "1146": 5.62886,
+            "1147": 5.49181,
+            "1148": 5.50478,
+            "1149": 5.51839,
+            "1150": 5.39997,
+            "1151": 5.5553,
+            "1152": 5.42174,
+            "1153": 5.4602,
+            "1154": 5.50372,
+            "1155": 5.44072,
+            "1156": 5.34868,
+            "1157": 5.66217,
+            "1158": 5.39889,
+            "1159": 5.33332,
+            "1160": 5.79511,
+            "1161": 5.53597,
+            "1162": 5.45589,
+            "1163": 5.52529,
+            "1164": 5.38319,
+            "1165": 5.52473,
+            "1166": 5.48721,
+            "1167": 5.36058,
+            "1168": 5.49334,
+            "1169": 5.40387,
+            "1170": 5.58667,
+            "1171": 5.48535,
+            "1172": 5.64049,
+            "1173": 5.62012,
+            "1174": 5.51308,
+            "1175": 5.34473,
+            "1176": 5.38256,
+            "1177": 5.55838,
+            "1178": 5.46714,
+            "1179": 5.49373,
+            "1180": 5.46571,
+            "1181": 5.55314,
+            "1182": 5.59825,
+            "1183": 5.76884,
+            "1184": 5.54748,
+            "1185": 5.28691,
+            "1186": 5.60427,
+            "1187": 5.55401,
+            "1188": 5.51546,
+            "1189": 5.38634,
+            "1190": 5.40233,
+            "1191": 5.38976,
+            "1192": 5.49689,
+            "1193": 5.46486,
+            "1194": 5.45443,
+            "1195": 5.32542,
+            "1196": 5.52268,
+            "1197": 5.47666,
+            "1198": 5.52589,
+            "1199": 5.38688,
+            "1200": 5.33164,
+            "1201": 5.49012,
+            "1202": 5.43748,
+            "1203": 5.49375,
+            "1204": 5.40666,
+            "1205": 5.48999,
+            "1206": 5.33478,
+            "1207": 5.58651,
+            "1208": 5.42414,
+            "1209": 5.2931,
+            "1210": 5.49969,
+            "1211": 5.5071,
+            "1212": 5.59732,
+            "1213": 5.41745,
+            "1214": 5.49785,
+            "1215": 5.23706,
+            "1216": 5.41194,
+            "1217": 5.38264,
+            "1218": 5.4506,
+            "1219": 5.48501,
+            "1220": 5.38351,
+            "1221": 5.4519,
+            "1222": 5.31254,
+            "1223": 5.47747,
+            "1224": 5.41418,
+            "1225": 5.42845,
+            "1226": 5.32249,
+            "1227": 5.47547,
+            "1228": 5.73249,
+            "1229": 5.32716,
+            "1230": 5.41211,
+            "1231": 5.07649,
+            "1232": 5.78792,
+            "1233": 5.28531,
+            "1234": 5.24399,
+            "1235": 5.36824,
+            "1236": 5.47881,
+            "1237": 5.20655,
+            "1238": 5.41404,
+            "1239": 5.40719,
+            "1240": 5.46621,
+            "1241": 5.57221,
+            "1242": 5.45465,
+            "1243": 5.43424,
+            "1244": 5.51633,
+            "1245": 5.19115,
+            "1246": 5.71566,
+            "1247": 5.43,
+            "1248": 5.29843,
+            "1249": 5.40246,
+            "1250": 5.34088,
+            "1251": 5.41904,
+            "1252": 5.57108,
+            "1253": 5.489,
+            "1254": 5.31099,
+            "1255": 5.51387,
+            "1256": 5.60708,
+            "1257": 5.42325,
+            "1258": 5.55956,
+            "1259": 5.47585,
+            "1260": 5.50779,
+            "1261": 5.63801,
+            "1262": 5.39496,
+            "1263": 5.32432,
+            "1264": 5.50348,
+            "1265": 5.30656,
+            "1266": 5.23675,
+            "1267": 5.37031,
+            "1268": 5.38615,
+            "1269": 5.14823,
+            "1270": 5.39882,
+            "1271": 5.27753,
+            "1272": 5.52297,
+            "1273": 5.29632,
+            "1274": 5.34638,
+            "1275": 5.37784,
+            "1276": 5.3975,
+            "1277": 5.4606,
+            "1278": 5.35501,
+            "1279": 5.43897,
+            "1280": 5.45708,
+            "1281": 5.4056,
+            "1282": 5.38482,
+            "1283": 5.42347,
+            "1284": 5.34377,
+            "1285": 5.50505,
+            "1286": 5.33544,
+            "1287": 5.58814,
+            "1288": 5.2615,
+            "1289": 5.42995,
+            "1290": 5.49991,
+            "1291": 5.49987,
+            "1292": 5.44631,
+            "1293": 5.4171,
+            "1294": 5.49492,
+            "1295": 5.34499,
+            "1296": 5.18358,
+            "1297": 5.16726,
+            "1298": 5.11761,
+            "1299": 5.30129,
+            "1300": 5.21142,
+            "1301": 5.30283,
+            "1302": 5.27612,
+            "1303": 5.35547,
+            "1304": 5.43158,
+            "1305": 5.36825,
+            "1306": 5.25293,
+            "1307": 5.19217,
+            "1308": 5.27071,
+            "1309": 5.40774,
+            "1310": 5.26053,
+            "1311": 5.37774,
+            "1312": 5.35324,
+            "1313": 5.29428,
+            "1314": 5.29224,
+            "1315": 5.41906,
+            "1316": 5.25856,
+            "1317": 5.27981,
+            "1318": 5.21136,
+            "1319": 5.34401,
+            "1320": 5.4177,
+            "1321": 5.44957,
+            "1322": 5.46219,
+            "1323": 5.37269,
+            "1324": 5.24973,
+            "1325": 5.40538,
+            "1326": 5.53891,
+            "1327": 5.38638,
+            "1328": 5.21164,
+            "1329": 5.41667,
+            "1330": 5.39695,
+            "1331": 5.30979,
+            "1332": 5.3112,
+            "1333": 5.36823,
+            "1334": 5.44451,
+            "1335": 5.36788,
+            "1336": 5.43552,
+            "1337": 5.46933,
+            "1338": 5.30246,
+            "1339": 5.1362,
+            "1340": 5.41205,
+            "1341": 5.34033,
+            "1342": 5.35625,
+            "1343": 5.47387,
+            "1344": 5.37842,
+            "1345": 5.34238,
+            "1346": 5.07927,
+            "1347": 5.38404,
+            "1348": 5.49312,
+            "1349": 5.40746,
+            "1350": 5.02698,
+            "1351": 5.31566,
+            "1352": 5.15947,
+            "1353": 5.3409,
+            "1354": 5.35878,
+            "1355": 5.11364,
+            "1356": 5.25842,
+            "1357": 5.28929,
+            "1358": 5.15831,
+            "1359": 5.10775,
+            "1360": 5.17385,
+            "1361": 5.30604,
+            "1362": 5.06672,
+            "1363": 5.29722,
+            "1364": 5.3953,
+            "1365": 5.01953,
+            "1366": 5.1147,
+            "1367": 5.33054,
+            "1368": 5.18248,
+            "1369": 5.22391,
+            "1370": 5.1961,
+            "1371": 5.27906,
+            "1372": 5.25988,
+            "1373": 5.28404,
+            "1374": 5.2779,
+            "1375": 5.46001,
+            "1376": 5.26713,
+            "1377": 5.26807,
+            "1378": 5.31427,
+            "1379": 5.22765,
+            "1380": 5.25807,
+            "1381": 5.47919,
+            "1382": 5.08739,
+            "1383": 5.37543,
+            "1384": 5.36108,
+            "1385": 5.39028,
+            "1386": 5.16582,
+            "1387": 5.16244,
+            "1388": 5.27616,
+            "1389": 5.30262,
+            "1390": 5.25131,
+            "1391": 5.26406,
+            "1392": 5.36794,
+            "1393": 5.37824,
+            "1394": 5.40104,
+            "1395": 5.32383,
+            "1396": 5.21137,
+            "1397": 5.2828,
+            "1398": 5.36587,
+            "1399": 5.35557,
+            "1400": 5.26522,
+            "1401": 5.35981,
+            "1402": 5.42507,
+            "1403": 5.19768,
+            "1404": 5.27957,
+            "1405": 5.11754,
+            "1406": 4.98933,
+            "1407": 5.39818,
+            "1408": 5.1921,
+            "1409": 5.39429,
+            "1410": 5.37153,
+            "1411": 4.91585,
+            "1412": 5.35244,
+            "1413": 5.41055,
+            "1414": 5.21699,
+            "1415": 5.44044,
+            "1416": 5.32598,
+            "1417": 5.39078,
+            "1418": 5.29894,
+            "1419": 5.31316,
+            "1420": 5.43638,
+            "1421": 5.39683,
+            "1422": 5.41859,
+            "1423": 4.99867,
+            "1424": 5.33177,
+            "1425": 5.58491,
+            "1426": 5.23068,
+            "1427": 5.31742,
+            "1428": 5.33463,
+            "1429": 5.07871,
+            "1430": 5.32748,
+            "1431": 5.32237,
+            "1432": 5.34216,
+            "1433": 5.18496,
+            "1434": 5.16175,
+            "1435": 5.20122,
+            "1436": 5.10715,
+            "1437": 5.22566,
+            "1438": 5.31423,
+            "1439": 5.34769,
+            "1440": 5.34295,
+            "1441": 5.16777,
+            "1442": 5.21935,
+            "1443": 5.20553,
+            "1444": 5.12984,
+            "1445": 5.07414,
+            "1446": 5.26456,
+            "1447": 5.25775,
+            "1448": 5.29302,
+            "1449": 5.24616,
+            "1450": 5.34316,
+            "1451": 5.07004,
+            "1452": 5.26796,
+            "1453": 5.1741,
+            "1454": 5.01458,
+            "1455": 5.12771,
+            "1456": 5.27213,
+            "1457": 5.1882,
+            "1458": 5.00695,
+            "1459": 5.2215,
+            "1460": 5.23955,
+            "1461": 5.08,
+            "1462": 4.97269,
+            "1463": 5.15114,
+            "1464": 5.22113,
+            "1465": 5.27344,
+            "1466": 5.36076,
+            "1467": 5.34631,
+            "1468": 5.2303,
+            "1469": 5.05117,
+            "1470": 5.12322,
+            "1471": 5.25302,
+            "1472": 5.12175,
+            "1473": 5.10167,
+            "1474": 5.21744,
+            "1475": 5.18613,
+            "1476": 5.15517,
+            "1477": 5.26215,
+            "1478": 5.30407,
+            "1479": 5.01063,
+            "1480": 5.182,
+            "1481": 5.25124,
+            "1482": 5.3494,
+            "1483": 5.27058,
+            "1484": 4.92644,
+            "1485": 5.29103,
+            "1486": 5.04435,
+            "1487": 4.88432,
+            "1488": 5.18325,
+            "1489": 5.10139,
+            "1490": 5.04545,
+            "1491": 5.3188,
+            "1492": 5.22283,
+            "1493": 4.94061,
+            "1494": 5.10891,
+            "1495": 5.13402,
+            "1496": 5.05779,
+            "1497": 5.36536,
+            "1498": 5.30609,
+            "1499": 5.143,
+            "1500": 5.09554,
+            "1501": 5.0349,
+            "1502": 5.15423,
+            "1503": 5.43131,
+            "1504": 5.32574,
+            "1505": 5.00836,
+            "1506": 5.14423,
+            "1507": 5.16501,
+            "1508": 5.16864,
+            "1509": 5.3204,
+            "1510": 5.02703,
+            "1511": 5.1198,
+            "1512": 4.98354,
+            "1513": 5.1699,
+            "1514": 5.33407,
+            "1515": 5.36306,
+            "1516": 5.27572,
+            "1517": 5.2256,
+            "1518": 5.02899,
+            "1519": 5.29833,
+            "1520": 5.13757,
+            "1521": 5.15715,
+            "1522": 5.33462,
+            "1523": 5.24144,
+            "1524": 5.06791,
+            "1525": 5.20708,
+            "1526": 5.27861,
+            "1527": 5.25864,
+            "1528": 5.2395,
+            "1529": 5.18253,
+            "1530": 5.23913,
+            "1531": 5.09996,
+            "1532": 5.15679,
+            "1533": 5.05231,
+            "1534": 5.21917,
+            "1535": 5.16769,
+            "1536": 5.102,
+            "1537": 5.0318,
+            "1538": 4.91991,
+            "1539": 5.2394,
+            "1540": 5.11391,
+            "1541": 5.25502,
+            "1542": 5.23775,
+            "1543": 5.05438,
+            "1544": 5.08156,
+            "1545": 5.11794,
+            "1546": 5.32713,
+            "1547": 5.10763,
+            "1548": 5.23418,
+            "1549": 5.23089,
+            "1550": 4.97536,
+            "1551": 5.25942,
+            "1552": 5.0226,
+            "1553": 5.14887,
+            "1554": 5.11051,
+            "1555": 5.11223,
+            "1556": 5.19882,
+            "1557": 5.08844,
+            "1558": 5.22982,
+            "1559": 5.00137,
+            "1560": 5.11269,
+            "1561": 5.14639,
+            "1562": 5.18443,
+            "1563": 5.24639,
+            "1564": 5.26429,
+            "1565": 5.08809,
+            "1566": 5.29393,
+            "1567": 5.04372,
+            "1568": 5.08304,
+            "1569": 5.2002,
+            "1570": 5.17168,
+            "1571": 4.95228,
+            "1572": 5.04524,
+            "1573": 5.02748,
+            "1574": 4.99831,
+            "1575": 5.23124,
+            "1576": 5.20891,
+            "1577": 5.12722,
+            "1578": 5.36355,
+            "1579": 4.94343,
+            "1580": 5.12556,
+            "1581": 5.09739,
+            "1582": 5.28014,
+            "1583": 5.04619,
+            "1584": 5.0566,
+            "1585": 5.11727,
+            "1586": 5.30646,
+            "1587": 5.13281,
+            "1588": 5.22351,
+            "1589": 4.83814,
+            "1590": 5.09825,
+            "1591": 5.18082,
+            "1592": 5.14078,
+            "1593": 5.23646,
+            "1594": 5.11532,
+            "1595": 5.10761,
+            "1596": 5.19194,
+            "1597": 5.11362,
+            "1598": 5.16252,
+            "1599": 5.18865,
+            "1600": 4.86676,
+            "1601": 5.11898,
+            "1602": 5.22827,
+            "1603": 5.19524,
+            "1604": 5.05797,
+            "1605": 5.03277,
+            "1606": 4.98991,
+            "1607": 5.06915,
+            "1608": 4.97927,
+            "1609": 5.07061,
+            "1610": 5.04561,
+            "1611": 4.9918,
+            "1612": 4.75806,
+            "1613": 5.03141,
+            "1614": 4.87811,
+            "1615": 5.07817,
+            "1616": 5.22549,
+            "1617": 5.06182,
+            "1618": 4.98945,
+            "1619": 5.18486,
+            "1620": 5.14429,
+            "1621": 5.31666,
+            "1622": 5.06737,
+            "1623": 5.15063,
+            "1624": 5.1305,
+            "1625": 5.12197,
+            "1626": 5.10206,
+            "1627": 5.1085,
+            "1628": 5.06234,
+            "1629": 4.93316,
+            "1630": 5.06616,
+            "1631": 5.05719,
+            "1632": 5.10145,
+            "1633": 4.97087,
+            "1634": 4.92194,
+            "1635": 5.05013,
+            "1636": 4.9202,
+            "1637": 5.22863,
+            "1638": 5.15783,
+            "1639": 4.9808,
+            "1640": 5.00716,
+            "1641": 5.12367,
+            "1642": 5.0869,
+            "1643": 5.05029,
+            "1644": 5.12283,
+            "1645": 4.96415,
+            "1646": 5.12257,
+            "1647": 5.03267,
+            "1648": 5.1903,
+            "1649": 4.92263,
+            "1650": 5.0596,
+            "1651": 4.93391,
+            "1652": 5.21143,
+            "1653": 5.1587,
+            "1654": 5.13384,
+            "1655": 5.16235,
+            "1656": 5.34793,
+            "1657": 5.21074,
+            "1658": 5.04155,
+            "1659": 4.92889,
+            "1660": 4.8117,
+            "1661": 5.02968,
+            "1662": 5.14515,
+            "1663": 5.15868,
+            "1664": 4.98471,
+            "1665": 5.11027,
+            "1666": 5.10315,
+            "1667": 4.84929,
+            "1668": 5.10956,
+            "1669": 5.07311,
+            "1670": 5.11152,
+            "1671": 5.16545,
+            "1672": 4.77709,
+            "1673": 5.03502,
+            "1674": 4.91572,
+            "1675": 5.04406,
+            "1676": 5.0023,
+            "1677": 4.80013,
+            "1678": 5.02745,
+            "1679": 4.88908,
+            "1680": 5.03791,
+            "1681": 5.06371,
+            "1682": 5.03586,
+            "1683": 4.90255,
+            "1684": 5.06133,
+            "1685": 5.13096,
+            "1686": 5.075,
+            "1687": 4.97679,
+            "1688": 5.17279,
+            "1689": 5.1507,
+            "1690": 4.99681,
+            "1691": 4.99961,
+            "1692": 4.91412,
+            "1693": 5.02305,
+            "1694": 4.94741,
+            "1695": 4.91895,
+            "1696": 5.0846,
+            "1697": 5.05067,
+            "1698": 4.95116,
+            "1699": 5.00638,
+            "1700": 4.94576,
+            "1701": 5.16681,
+            "1702": 5.07316,
+            "1703": 5.16582,
+            "1704": 5.14235,
+            "1705": 4.96408,
+            "1706": 4.98303,
+            "1707": 4.78833,
+            "1708": 5.03283,
+            "1709": 5.2281,
+            "1710": 5.02918,
+            "1711": 5.18873,
+            "1712": 5.19088,
+            "1713": 5.03631,
+            "1714": 5.04689,
+            "1715": 4.91662,
+            "1716": 4.93663,
+            "1717": 4.86445,
+            "1718": 5.02654,
+            "1719": 5.12575,
+            "1720": 5.02353,
+            "1721": 4.9343,
+            "1722": 5.06572,
+            "1723": 4.93302,
+            "1724": 5.03906,
+            "1725": 5.19169,
+            "1726": 5.06497,
+            "1727": 4.91076,
+            "1728": 5.01922,
+            "1729": 5.04885,
+            "1730": 4.91107,
+            "1731": 5.00108,
+            "1732": 4.91468,
+            "1733": 5.12873,
+            "1734": 4.83023,
+            "1735": 5.21293,
+            "1736": 4.91729,
+            "1737": 4.86164,
+            "1738": 4.97933,
+            "1739": 5.16149,
+            "1740": 4.84041,
+            "1741": 4.78298,
+            "1742": 4.91062,
+            "1743": 5.09353,
+            "1744": 4.98531,
+            "1745": 4.82544,
+            "1746": 4.94973,
+            "1747": 4.86843,
+            "1748": 5.06696,
+            "1749": 4.86793,
+            "1750": 5.01333,
+            "1751": 5.12023,
+            "1752": 4.90813,
+            "1753": 5.09204,
+            "1754": 5.05813,
+            "1755": 4.89777,
+            "1756": 5.02216,
+            "1757": 5.14157,
+            "1758": 4.87188,
+            "1759": 4.94434,
+            "1760": 4.83222,
+            "1761": 5.02427,
+            "1762": 4.81507,
+            "1763": 4.77391,
+            "1764": 4.93175,
+            "1765": 5.14727,
+            "1766": 5.33614,
+            "1767": 5.22331,
+            "1768": 4.94712,
+            "1769": 5.0043,
+            "1770": 4.98512,
+            "1771": 4.96473,
+            "1772": 4.98299,
+            "1773": 4.97266,
+            "1774": 4.87138,
+            "1775": 4.9493,
+            "1776": 4.9958,
+            "1777": 4.94665,
+            "1778": 4.99288,
+            "1779": 5.08212,
+            "1780": 4.83608,
+            "1781": 5.05478,
+            "1782": 4.99549,
+            "1783": 5.01236,
+            "1784": 4.93254,
+            "1785": 5.16842,
+            "1786": 4.80892,
+            "1787": 4.9699,
+            "1788": 4.82948,
+            "1789": 4.88554,
+            "1790": 4.80386,
+            "1791": 4.74542,
+            "1792": 4.87988,
+            "1793": 5.11081,
+            "1794": 4.98659,
+            "1795": 4.97147,
+            "1796": 5.00354,
+            "1797": 4.79101,
+            "1798": 4.77029,
+            "1799": 5.01913,
+            "1800": 4.91155,
+            "1801": 5.04891,
+            "1802": 4.82591,
+            "1803": 4.95313,
+            "1804": 4.88492,
+            "1805": 4.90634,
+            "1806": 4.88167,
+            "1807": 4.92894,
+            "1808": 4.92469,
+            "1809": 5.15028,
+            "1810": 5.09708,
+            "1811": 4.96325,
+            "1812": 4.8059,
+            "1813": 5.1023,
+            "1814": 4.7819,
+            "1815": 4.86518,
+            "1816": 5.05104,
+            "1817": 4.79238,
+            "1818": 4.80401,
+            "1819": 5.02672,
+            "1820": 4.68884,
+            "1821": 5.02319,
+            "1822": 4.66224,
+            "1823": 4.86936,
+            "1824": 4.7914,
+            "1825": 5.06607,
+            "1826": 4.81841,
+            "1827": 4.79544,
+            "1828": 4.9506,
+            "1829": 5.10848,
+            "1830": 4.9163,
+            "1831": 4.89965,
+            "1832": 4.83328,
+            "1833": 4.78854,
+            "1834": 4.94794,
+            "1835": 4.96175,
+            "1836": 4.91339,
+            "1837": 4.6762,
+            "1838": 4.80703,
+            "1839": 4.89949,
+            "1840": 4.91213,
+            "1841": 4.84083,
+            "1842": 4.9567,
+            "1843": 4.71182,
+            "1844": 4.6194,
+            "1845": 5.00584,
+            "1846": 4.75435,
+            "1847": 4.86491,
+            "1848": 4.9035,
+            "1849": 4.85124,
+            "1850": 4.87005,
+            "1851": 5.01617,
+            "1852": 4.97859,
+            "1853": 4.82821,
+            "1854": 4.86426,
+            "1855": 4.82455,
+            "1856": 4.75214,
+            "1857": 4.96641,
+            "1858": 4.96711,
+            "1859": 4.7484,
+            "1860": 4.86558,
+            "1861": 5.21257,
+            "1862": 4.61253,
+            "1863": 4.83567,
+            "1864": 4.74748,
+            "1865": 4.86472,
+            "1866": 4.78934,
+            "1867": 5.00307,
+            "1868": 4.72073,
+            "1869": 4.76301,
+            "1870": 4.93972,
+            "1871": 5.00163,
+            "1872": 4.68713,
+            "1873": 4.70038,
+            "1874": 4.85131,
+            "1875": 4.85367,
+            "1876": 4.74378,
+            "1877": 4.80696,
+            "1878": 4.8139,
+            "1879": 4.82462,
+            "1880": 4.89248,
+            "1881": 4.79379,
+            "1882": 4.79882,
+            "1883": 4.78556,
+            "1884": 4.97714,
+            "1885": 4.92363,
+            "1886": 4.82454,
+            "1887": 4.82091,
+            "1888": 4.97246,
+            "1889": 4.96553,
+            "1890": 4.71236,
+            "1891": 4.65764,
+            "1892": 4.85277,
+            "1893": 4.65022,
+            "1894": 4.90165,
+            "1895": 4.79,
+            "1896": 4.66068,
+            "1897": 4.79617,
+            "1898": 4.92161,
+            "1899": 4.77736,
+            "1900": 4.91325,
+            "1901": 4.84998,
+            "1902": 4.787,
+            "1903": 4.76372,
+            "1904": 4.65638,
+            "1905": 4.55077,
+            "1906": 4.81577,
+            "1907": 4.9106,
+            "1908": 5.03029,
+            "1909": 4.89294,
+            "1910": 4.7884,
+            "1911": 4.81269,
+            "1912": 4.653,
+            "1913": 4.95098,
+            "1914": 4.88806,
+            "1915": 4.86687,
+            "1916": 4.9302,
+            "1917": 4.85504,
+            "1918": 4.87427,
+            "1919": 4.99557,
+            "1920": 4.77001,
+            "1921": 4.88729,
+            "1922": 4.8196,
+            "1923": 4.75752,
+            "1924": 4.8297,
+            "1925": 5.05687,
+            "1926": 4.94229,
+            "1927": 4.93308,
+            "1928": 4.92739,
+            "1929": 4.93147,
+            "1930": 4.917,
+            "1931": 4.77692,
+            "1932": 4.86743,
+            "1933": 4.83532,
+            "1934": 4.84373,
+            "1935": 5.11279,
+            "1936": 4.88728,
+            "1937": 4.8824,
+            "1938": 4.80623,
+            "1939": 4.70831,
+            "1940": 4.83067,
+            "1941": 4.74224,
+            "1942": 4.87785,
+            "1943": 4.74082,
+            "1944": 4.7536,
+            "1945": 4.69017,
+            "1946": 4.91953,
+            "1947": 4.87613,
+            "1948": 4.60452,
+            "1949": 4.89888,
+            "1950": 4.79826,
+            "1951": 4.9677,
+            "1952": 4.73855,
+            "1953": 4.79852,
+            "1954": 4.7398,
+            "1955": 4.85209,
+            "1956": 4.88278,
+            "1957": 4.73599,
+            "1958": 4.70215,
+            "1959": 4.76471,
+            "1960": 4.76967,
+            "1961": 4.71471,
+            "1962": 4.83443,
+            "1963": 4.82459,
+            "1964": 4.85019,
+            "1965": 4.87867,
+            "1966": 4.79219,
+            "1967": 4.60013,
+            "1968": 4.83399,
+            "1969": 4.59632,
+            "1970": 4.58346,
+            "1971": 4.90585,
+            "1972": 4.89941,
+            "1973": 4.55559,
+            "1974": 4.8295,
+            "1975": 4.83261,
+            "1976": 4.71818,
+            "1977": 4.58171,
+            "1978": 5.00781,
+            "1979": 4.6663,
+            "1980": 4.74961,
+            "1981": 4.87741,
+            "1982": 4.72647,
+            "1983": 4.89363,
+            "1984": 4.64954,
+            "1985": 4.78941,
+            "1986": 4.70195,
+            "1987": 4.8185,
+            "1988": 4.89272,
+            "1989": 4.63799,
+            "1990": 4.79789,
+            "1991": 4.70399,
+            "1992": 4.80349,
+            "1993": 4.74121,
+            "1994": 4.85611,
+            "1995": 4.5595,
+            "1996": 4.65792,
+            "1997": 4.8133,
+            "1998": 4.68041,
+            "1999": 4.73244,
+            "2000": 4.6301
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 2000,
+        "step_interval": 1,
+        "values": {
+            "1": 26.0,
+            "2": 32.0,
+            "3": 38.0,
+            "4": 33.0,
+            "5": 32.0,
+            "6": 30.0,
+            "7": 33.0,
+            "8": 34.0,
+            "9": 40.0,
+            "10": 31.0,
+            "11": 26.0,
+            "12": 33.0,
+            "13": 28.0,
+            "14": 29.0,
+            "15": 28.0,
+            "16": 27.0,
+            "17": 32.0,
+            "18": 28.0,
+            "19": 31.0,
+            "20": 39.0,
+            "21": 22.0,
+            "22": 29.0,
+            "23": 39.0,
+            "24": 35.0,
+            "25": 31.0,
+            "26": 40.0,
+            "27": 39.0,
+            "28": 42.0,
+            "29": 53.0,
+            "30": 51.0,
+            "31": 48.0,
+            "32": 51.0,
+            "33": 38.0,
+            "34": 48.0,
+            "35": 47.0,
+            "36": 49.0,
+            "37": 42.0,
+            "38": 43.0,
+            "39": 52.0,
+            "40": 55.0,
+            "41": 39.0,
+            "42": 54.0,
+            "43": 57.0,
+            "44": 53.0,
+            "45": 46.0,
+            "46": 61.0,
+            "47": 52.0,
+            "48": 54.0,
+            "49": 64.0,
+            "50": 64.0,
+            "51": 42.0,
+            "52": 55.0,
+            "53": 48.0,
+            "54": 71.0,
+            "55": 56.0,
+            "56": 74.0,
+            "57": 70.0,
+            "58": 57.0,
+            "59": 53.0,
+            "60": 67.0,
+            "61": 63.0,
+            "62": 59.0,
+            "63": 66.0,
+            "64": 70.0,
+            "65": 59.0,
+            "66": 74.0,
+            "67": 81.0,
+            "68": 74.0,
+            "69": 60.0,
+            "70": 60.0,
+            "71": 66.0,
+            "72": 75.0,
+            "73": 67.0,
+            "74": 63.0,
+            "75": 60.0,
+            "76": 60.0,
+            "77": 78.0,
+            "78": 78.0,
+            "79": 58.0,
+            "80": 63.0,
+            "81": 63.0,
+            "82": 50.0,
+            "83": 63.0,
+            "84": 72.0,
+            "85": 69.0,
+            "86": 80.0,
+            "87": 70.0,
+            "88": 68.0,
+            "89": 69.0,
+            "90": 63.0,
+            "91": 58.0,
+            "92": 87.0,
+            "93": 65.0,
+            "94": 50.0,
+            "95": 67.0,
+            "96": 71.0,
+            "97": 70.0,
+            "98": 81.0,
+            "99": 66.0,
+            "100": 76.0,
+            "101": 67.0,
+            "102": 44.0,
+            "103": 60.0,
+            "104": 68.0,
+            "105": 84.0,
+            "106": 61.0,
+            "107": 76.0,
+            "108": 68.0,
+            "109": 76.0,
+            "110": 74.0,
+            "111": 75.0,
+            "112": 78.0,
+            "113": 58.0,
+            "114": 66.0,
+            "115": 71.0,
+            "116": 63.0,
+            "117": 74.0,
+            "118": 52.0,
+            "119": 74.0,
+            "120": 52.0,
+            "121": 76.0,
+            "122": 66.0,
+            "123": 81.0,
+            "124": 76.0,
+            "125": 87.0,
+            "126": 49.0,
+            "127": 56.0,
+            "128": 78.0,
+            "129": 53.0,
+            "130": 76.0,
+            "131": 86.0,
+            "132": 61.0,
+            "133": 72.0,
+            "134": 62.0,
+            "135": 59.0,
+            "136": 60.0,
+            "137": 57.0,
+            "138": 81.0,
+            "139": 74.0,
+            "140": 59.0,
+            "141": 50.0,
+            "142": 64.0,
+            "143": 54.0,
+            "144": 49.0,
+            "145": 57.0,
+            "146": 51.0,
+            "147": 49.0,
+            "148": 69.0,
+            "149": 49.0,
+            "150": 66.0,
+            "151": 57.0,
+            "152": 51.0,
+            "153": 61.0,
+            "154": 58.0,
+            "155": 68.0,
+            "156": 68.0,
+            "157": 51.0,
+            "158": 68.0,
+            "159": 60.0,
+            "160": 64.0,
+            "161": 66.0,
+            "162": 75.0,
+            "163": 40.0,
+            "164": 84.0,
+            "165": 50.0,
+            "166": 68.0,
+            "167": 54.0,
+            "168": 58.0,
+            "169": 65.0,
+            "170": 71.0,
+            "171": 54.0,
+            "172": 64.0,
+            "173": 81.0,
+            "174": 55.0,
+            "175": 63.0,
+            "176": 69.0,
+            "177": 80.0,
+            "178": 68.0,
+            "179": 69.0,
+            "180": 64.0,
+            "181": 41.0,
+            "182": 63.0,
+            "183": 66.0,
+            "184": 67.0,
+            "185": 77.0,
+            "186": 77.0,
+            "187": 61.0,
+            "188": 62.0,
+            "189": 50.0,
+            "190": 57.0,
+            "191": 60.0,
+            "192": 67.0,
+            "193": 70.0,
+            "194": 72.0,
+            "195": 60.0,
+            "196": 81.0,
+            "197": 56.0,
+            "198": 47.0,
+            "199": 50.0,
+            "200": 86.0,
+            "201": 52.0,
+            "202": 64.0,
+            "203": 58.0,
+            "204": 63.0,
+            "205": 40.0,
+            "206": 72.0,
+            "207": 50.0,
+            "208": 42.0,
+            "209": 69.0,
+            "210": 68.0,
+            "211": 56.0,
+            "212": 64.0,
+            "213": 60.0,
+            "214": 62.0,
+            "215": 66.0,
+            "216": 58.0,
+            "217": 59.0,
+            "218": 70.0,
+            "219": 80.0,
+            "220": 81.0,
+            "221": 51.0,
+            "222": 57.0,
+            "223": 67.0,
+            "224": 53.0,
+            "225": 61.0,
+            "226": 68.0,
+            "227": 76.0,
+            "228": 59.0,
+            "229": 44.0,
+            "230": 50.0,
+            "231": 58.0,
+            "232": 65.0,
+            "233": 90.0,
+            "234": 60.0,
+            "235": 98.0,
+            "236": 49.0,
+            "237": 92.0,
+            "238": 71.0,
+            "239": 68.0,
+            "240": 79.0,
+            "241": 67.0,
+            "242": 75.0,
+            "243": 66.0,
+            "244": 59.0,
+            "245": 81.0,
+            "246": 80.0,
+            "247": 88.0,
+            "248": 81.0,
+            "249": 79.0,
+            "250": 80.0,
+            "251": 74.0,
+            "252": 72.0,
+            "253": 57.0,
+            "254": 67.0,
+            "255": 79.0,
+            "256": 86.0,
+            "257": 66.0,
+            "258": 94.0,
+            "259": 69.0,
+            "260": 70.0,
+            "261": 64.0,
+            "262": 77.0,
+            "263": 74.0,
+            "264": 70.0,
+            "265": 68.0,
+            "266": 67.0,
+            "267": 66.0,
+            "268": 59.0,
+            "269": 73.0,
+            "270": 85.0,
+            "271": 67.0,
+            "272": 81.0,
+            "273": 71.0,
+            "274": 69.0,
+            "275": 72.0,
+            "276": 72.0,
+            "277": 82.0,
+            "278": 61.0,
+            "279": 94.0,
+            "280": 56.0,
+            "281": 55.0,
+            "282": 73.0,
+            "283": 90.0,
+            "284": 85.0,
+            "285": 49.0,
+            "286": 50.0,
+            "287": 90.0,
+            "288": 71.0,
+            "289": 85.0,
+            "290": 75.0,
+            "291": 88.0,
+            "292": 88.0,
+            "293": 91.0,
+            "294": 84.0,
+            "295": 85.0,
+            "296": 102.0,
+            "297": 70.0,
+            "298": 65.0,
+            "299": 80.0,
+            "300": 80.0,
+            "301": 91.0,
+            "302": 94.0,
+            "303": 71.0,
+            "304": 74.0,
+            "305": 59.0,
+            "306": 72.0,
+            "307": 73.0,
+            "308": 91.0,
+            "309": 88.0,
+            "310": 82.0,
+            "311": 84.0,
+            "312": 73.0,
+            "313": 97.0,
+            "314": 74.0,
+            "315": 69.0,
+            "316": 96.0,
+            "317": 61.0,
+            "318": 99.0,
+            "319": 67.0,
+            "320": 77.0,
+            "321": 86.0,
+            "322": 70.0,
+            "323": 86.0,
+            "324": 96.0,
+            "325": 74.0,
+            "326": 97.0,
+            "327": 73.0,
+            "328": 99.0,
+            "329": 93.0,
+            "330": 96.0,
+            "331": 81.0,
+            "332": 79.0,
+            "333": 97.0,
+            "334": 81.0,
+            "335": 84.0,
+            "336": 81.0,
+            "337": 99.0,
+            "338": 89.0,
+            "339": 93.0,
+            "340": 101.0,
+            "341": 93.0,
+            "342": 57.0,
+            "343": 81.0,
+            "344": 105.0,
+            "345": 88.0,
+            "346": 85.0,
+            "347": 91.0,
+            "348": 82.0,
+            "349": 78.0,
+            "350": 101.0,
+            "351": 105.0,
+            "352": 76.0,
+            "353": 112.0,
+            "354": 72.0,
+            "355": 79.0,
+            "356": 104.0,
+            "357": 86.0,
+            "358": 77.0,
+            "359": 99.0,
+            "360": 102.0,
+            "361": 64.0,
+            "362": 123.0,
+            "363": 96.0,
+            "364": 95.0,
+            "365": 85.0,
+            "366": 82.0,
+            "367": 84.0,
+            "368": 83.0,
+            "369": 77.0,
+            "370": 118.0,
+            "371": 76.0,
+            "372": 77.0,
+            "373": 96.0,
+            "374": 68.0,
+            "375": 92.0,
+            "376": 84.0,
+            "377": 98.0,
+            "378": 99.0,
+            "379": 108.0,
+            "380": 96.0,
+            "381": 92.0,
+            "382": 75.0,
+            "383": 89.0,
+            "384": 100.0,
+            "385": 73.0,
+            "386": 85.0,
+            "387": 73.0,
+            "388": 93.0,
+            "389": 88.0,
+            "390": 90.0,
+            "391": 115.0,
+            "392": 88.0,
+            "393": 99.0,
+            "394": 104.0,
+            "395": 125.0,
+            "396": 80.0,
+            "397": 78.0,
+            "398": 67.0,
+            "399": 104.0,
+            "400": 96.0,
+            "401": 105.0,
+            "402": 88.0,
+            "403": 97.0,
+            "404": 101.0,
+            "405": 85.0,
+            "406": 114.0,
+            "407": 76.0,
+            "408": 98.0,
+            "409": 84.0,
+            "410": 102.0,
+            "411": 81.0,
+            "412": 56.0,
+            "413": 68.0,
+            "414": 90.0,
+            "415": 95.0,
+            "416": 93.0,
+            "417": 90.0,
+            "418": 60.0,
+            "419": 86.0,
+            "420": 76.0,
+            "421": 110.0,
+            "422": 89.0,
+            "423": 78.0,
+            "424": 82.0,
+            "425": 94.0,
+            "426": 80.0,
+            "427": 96.0,
+            "428": 86.0,
+            "429": 92.0,
+            "430": 84.0,
+            "431": 87.0,
+            "432": 80.0,
+            "433": 81.0,
+            "434": 93.0,
+            "435": 83.0,
+            "436": 82.0,
+            "437": 91.0,
+            "438": 62.0,
+            "439": 72.0,
+            "440": 79.0,
+            "441": 87.0,
+            "442": 106.0,
+            "443": 106.0,
+            "444": 58.0,
+            "445": 93.0,
+            "446": 89.0,
+            "447": 97.0,
+            "448": 79.0,
+            "449": 90.0,
+            "450": 83.0,
+            "451": 63.0,
+            "452": 70.0,
+            "453": 63.0,
+            "454": 80.0,
+            "455": 114.0,
+            "456": 98.0,
+            "457": 101.0,
+            "458": 70.0,
+            "459": 69.0,
+            "460": 65.0,
+            "461": 115.0,
+            "462": 63.0,
+            "463": 73.0,
+            "464": 69.0,
+            "465": 95.0,
+            "466": 76.0,
+            "467": 77.0,
+            "468": 90.0,
+            "469": 65.0,
+            "470": 91.0,
+            "471": 76.0,
+            "472": 60.0,
+            "473": 94.0,
+            "474": 69.0,
+            "475": 90.0,
+            "476": 66.0,
+            "477": 75.0,
+            "478": 78.0,
+            "479": 63.0,
+            "480": 73.0,
+            "481": 80.0,
+            "482": 77.0,
+            "483": 78.0,
+            "484": 84.0,
+            "485": 70.0,
+            "486": 84.0,
+            "487": 69.0,
+            "488": 88.0,
+            "489": 77.0,
+            "490": 59.0,
+            "491": 83.0,
+            "492": 57.0,
+            "493": 83.0,
+            "494": 69.0,
+            "495": 50.0,
+            "496": 56.0,
+            "497": 97.0,
+            "498": 77.0,
+            "499": 75.0,
+            "500": 60.0,
+            "501": 64.0,
+            "502": 64.0,
+            "503": 71.0,
+            "504": 77.0,
+            "505": 68.0,
+            "506": 65.0,
+            "507": 80.0,
+            "508": 42.0,
+            "509": 63.0,
+            "510": 77.0,
+            "511": 81.0,
+            "512": 57.0,
+            "513": 61.0,
+            "514": 60.0,
+            "515": 71.0,
+            "516": 61.0,
+            "517": 85.0,
+            "518": 43.0,
+            "519": 72.0,
+            "520": 82.0,
+            "521": 50.0,
+            "522": 58.0,
+            "523": 74.0,
+            "524": 70.0,
+            "525": 82.0,
+            "526": 60.0,
+            "527": 71.0,
+            "528": 63.0,
+            "529": 66.0,
+            "530": 67.0,
+            "531": 69.0,
+            "532": 72.0,
+            "533": 81.0,
+            "534": 62.0,
+            "535": 66.0,
+            "536": 61.0,
+            "537": 60.0,
+            "538": 55.0,
+            "539": 62.0,
+            "540": 63.0,
+            "541": 61.0,
+            "542": 61.0,
+            "543": 55.0,
+            "544": 64.0,
+            "545": 73.0,
+            "546": 77.0,
+            "547": 69.0,
+            "548": 75.0,
+            "549": 61.0,
+            "550": 61.0,
+            "551": 63.0,
+            "552": 71.0,
+            "553": 78.0,
+            "554": 67.0,
+            "555": 65.0,
+            "556": 74.0,
+            "557": 61.0,
+            "558": 62.0,
+            "559": 62.0,
+            "560": 71.0,
+            "561": 56.0,
+            "562": 65.0,
+            "563": 77.0,
+            "564": 67.0,
+            "565": 55.0,
+            "566": 58.0,
+            "567": 42.0,
+            "568": 70.0,
+            "569": 56.0,
+            "570": 60.0,
+            "571": 58.0,
+            "572": 41.0,
+            "573": 71.0,
+            "574": 69.0,
+            "575": 85.0,
+            "576": 44.0,
+            "577": 50.0,
+            "578": 69.0,
+            "579": 62.0,
+            "580": 67.0,
+            "581": 59.0,
+            "582": 58.0,
+            "583": 55.0,
+            "584": 47.0,
+            "585": 60.0,
+            "586": 41.0,
+            "587": 47.0,
+            "588": 53.0,
+            "589": 55.0,
+            "590": 46.0,
+            "591": 69.0,
+            "592": 50.0,
+            "593": 52.0,
+            "594": 56.0,
+            "595": 47.0,
+            "596": 44.0,
+            "597": 33.0,
+            "598": 61.0,
+            "599": 50.0,
+            "600": 88.0,
+            "601": 55.0,
+            "602": 64.0,
+            "603": 60.0,
+            "604": 57.0,
+            "605": 57.0,
+            "606": 45.0,
+            "607": 54.0,
+            "608": 45.0,
+            "609": 40.0,
+            "610": 45.0,
+            "611": 53.0,
+            "612": 52.0,
+            "613": 73.0,
+            "614": 53.0,
+            "615": 52.0,
+            "616": 64.0,
+            "617": 44.0,
+            "618": 59.0,
+            "619": 50.0,
+            "620": 72.0,
+            "621": 50.0,
+            "622": 58.0,
+            "623": 57.0,
+            "624": 56.0,
+            "625": 56.0,
+            "626": 71.0,
+            "627": 50.0,
+            "628": 49.0,
+            "629": 50.0,
+            "630": 50.0,
+            "631": 40.0,
+            "632": 45.0,
+            "633": 42.0,
+            "634": 38.0,
+            "635": 51.0,
+            "636": 36.0,
+            "637": 55.0,
+            "638": 45.0,
+            "639": 63.0,
+            "640": 52.0,
+            "641": 51.0,
+            "642": 52.0,
+            "643": 49.0,
+            "644": 51.0,
+            "645": 57.0,
+            "646": 57.0,
+            "647": 69.0,
+            "648": 60.0,
+            "649": 49.0,
+            "650": 49.0,
+            "651": 66.0,
+            "652": 49.0,
+            "653": 59.0,
+            "654": 42.0,
+            "655": 42.0,
+            "656": 46.0,
+            "657": 49.0,
+            "658": 50.0,
+            "659": 44.0,
+            "660": 53.0,
+            "661": 46.0,
+            "662": 60.0,
+            "663": 43.0,
+            "664": 61.0,
+            "665": 37.0,
+            "666": 30.0,
+            "667": 42.0,
+            "668": 41.0,
+            "669": 44.0,
+            "670": 44.0,
+            "671": 59.0,
+            "672": 53.0,
+            "673": 47.0,
+            "674": 42.0,
+            "675": 54.0,
+            "676": 43.0,
+            "677": 68.0,
+            "678": 41.0,
+            "679": 38.0,
+            "680": 46.0,
+            "681": 50.0,
+            "682": 33.0,
+            "683": 38.0,
+            "684": 52.0,
+            "685": 40.0,
+            "686": 43.0,
+            "687": 61.0,
+            "688": 57.0,
+            "689": 51.0,
+            "690": 35.0,
+            "691": 45.0,
+            "692": 55.0,
+            "693": 36.0,
+            "694": 50.0,
+            "695": 50.0,
+            "696": 51.0,
+            "697": 41.0,
+            "698": 37.0,
+            "699": 47.0,
+            "700": 42.0,
+            "701": 37.0,
+            "702": 33.0,
+            "703": 39.0,
+            "704": 43.0,
+            "705": 45.0,
+            "706": 32.0,
+            "707": 38.0,
+            "708": 38.0,
+            "709": 46.0,
+            "710": 35.0,
+            "711": 48.0,
+            "712": 35.0,
+            "713": 48.0,
+            "714": 37.0,
+            "715": 48.0,
+            "716": 36.0,
+            "717": 34.0,
+            "718": 26.0,
+            "719": 36.0,
+            "720": 34.0,
+            "721": 36.0,
+            "722": 35.0,
+            "723": 29.0,
+            "724": 47.0,
+            "725": 32.0,
+            "726": 39.0,
+            "727": 40.0,
+            "728": 39.0,
+            "729": 47.0,
+            "730": 36.0,
+            "731": 48.0,
+            "732": 43.0,
+            "733": 39.0,
+            "734": 51.0,
+            "735": 40.0,
+            "736": 49.0,
+            "737": 44.0,
+            "738": 27.0,
+            "739": 46.0,
+            "740": 38.0,
+            "741": 38.0,
+            "742": 45.0,
+            "743": 44.0,
+            "744": 52.0,
+            "745": 48.0,
+            "746": 50.0,
+            "747": 53.0,
+            "748": 52.0,
+            "749": 48.0,
+            "750": 46.0,
+            "751": 40.0,
+            "752": 50.0,
+            "753": 44.0,
+            "754": 43.0,
+            "755": 48.0,
+            "756": 38.0,
+            "757": 45.0,
+            "758": 40.0,
+            "759": 56.0,
+            "760": 46.0,
+            "761": 44.0,
+            "762": 48.0,
+            "763": 54.0,
+            "764": 49.0,
+            "765": 42.0,
+            "766": 57.0,
+            "767": 45.0,
+            "768": 51.0,
+            "769": 60.0,
+            "770": 51.0,
+            "771": 31.0,
+            "772": 41.0,
+            "773": 60.0,
+            "774": 37.0,
+            "775": 43.0,
+            "776": 37.0,
+            "777": 34.0,
+            "778": 42.0,
+            "779": 37.0,
+            "780": 34.0,
+            "781": 41.0,
+            "782": 25.0,
+            "783": 30.0,
+            "784": 39.0,
+            "785": 34.0,
+            "786": 38.0,
+            "787": 47.0,
+            "788": 41.0,
+            "789": 50.0,
+            "790": 44.0,
+            "791": 34.0,
+            "792": 38.0,
+            "793": 53.0,
+            "794": 45.0,
+            "795": 52.0,
+            "796": 39.0,
+            "797": 41.0,
+            "798": 39.0,
+            "799": 44.0,
+            "800": 46.0,
+            "801": 44.0,
+            "802": 40.0,
+            "803": 47.0,
+            "804": 34.0,
+            "805": 45.0,
+            "806": 43.0,
+            "807": 46.0,
+            "808": 36.0,
+            "809": 35.0,
+            "810": 35.0,
+            "811": 44.0,
+            "812": 47.0,
+            "813": 41.0,
+            "814": 36.0,
+            "815": 41.0,
+            "816": 52.0,
+            "817": 43.0,
+            "818": 35.0,
+            "819": 52.0,
+            "820": 40.0,
+            "821": 29.0,
+            "822": 34.0,
+            "823": 44.0,
+            "824": 47.0,
+            "825": 36.0,
+            "826": 40.0,
+            "827": 29.0,
+            "828": 35.0,
+            "829": 32.0,
+            "830": 30.0,
+            "831": 36.0,
+            "832": 34.0,
+            "833": 39.0,
+            "834": 50.0,
+            "835": 38.0,
+            "836": 37.0,
+            "837": 50.0,
+            "838": 45.0,
+            "839": 52.0,
+            "840": 37.0,
+            "841": 35.0,
+            "842": 30.0,
+            "843": 50.0,
+            "844": 23.0,
+            "845": 45.0,
+            "846": 25.0,
+            "847": 32.0,
+            "848": 25.0,
+            "849": 34.0,
+            "850": 39.0,
+            "851": 46.0,
+            "852": 41.0,
+            "853": 43.0,
+            "854": 45.0,
+            "855": 27.0,
+            "856": 47.0,
+            "857": 47.0,
+            "858": 46.0,
+            "859": 35.0,
+            "860": 45.0,
+            "861": 30.0,
+            "862": 39.0,
+            "863": 21.0,
+            "864": 26.0,
+            "865": 46.0,
+            "866": 44.0,
+            "867": 48.0,
+            "868": 27.0,
+            "869": 42.0,
+            "870": 45.0,
+            "871": 33.0,
+            "872": 49.0,
+            "873": 32.0,
+            "874": 56.0,
+            "875": 38.0,
+            "876": 41.0,
+            "877": 40.0,
+            "878": 37.0,
+            "879": 22.0,
+            "880": 39.0,
+            "881": 40.0,
+            "882": 49.0,
+            "883": 39.0,
+            "884": 35.0,
+            "885": 32.0,
+            "886": 45.0,
+            "887": 41.0,
+            "888": 34.0,
+            "889": 35.0,
+            "890": 37.0,
+            "891": 41.0,
+            "892": 42.0,
+            "893": 42.0,
+            "894": 34.0,
+            "895": 38.0,
+            "896": 37.0,
+            "897": 41.0,
+            "898": 33.0,
+            "899": 35.0,
+            "900": 39.0,
+            "901": 37.0,
+            "902": 39.0,
+            "903": 42.0,
+            "904": 38.0,
+            "905": 32.0,
+            "906": 34.0,
+            "907": 38.0,
+            "908": 39.0,
+            "909": 52.0,
+            "910": 34.0,
+            "911": 26.0,
+            "912": 46.0,
+            "913": 40.0,
+            "914": 48.0,
+            "915": 25.0,
+            "916": 49.0,
+            "917": 36.0,
+            "918": 31.0,
+            "919": 26.0,
+            "920": 40.0,
+            "921": 34.0,
+            "922": 38.0,
+            "923": 41.0,
+            "924": 24.0,
+            "925": 27.0,
+            "926": 43.0,
+            "927": 31.0,
+            "928": 40.0,
+            "929": 32.0,
+            "930": 42.0,
+            "931": 33.0,
+            "932": 34.0,
+            "933": 38.0,
+            "934": 41.0,
+            "935": 26.0,
+            "936": 44.0,
+            "937": 36.0,
+            "938": 37.0,
+            "939": 28.0,
+            "940": 33.0,
+            "941": 34.0,
+            "942": 31.0,
+            "943": 26.0,
+            "944": 37.0,
+            "945": 29.0,
+            "946": 31.0,
+            "947": 34.0,
+            "948": 41.0,
+            "949": 31.0,
+            "950": 35.0,
+            "951": 31.0,
+            "952": 38.0,
+            "953": 47.0,
+            "954": 43.0,
+            "955": 46.0,
+            "956": 35.0,
+            "957": 40.0,
+            "958": 37.0,
+            "959": 52.0,
+            "960": 35.0,
+            "961": 38.0,
+            "962": 41.0,
+            "963": 45.0,
+            "964": 43.0,
+            "965": 51.0,
+            "966": 38.0,
+            "967": 31.0,
+            "968": 32.0,
+            "969": 35.0,
+            "970": 48.0,
+            "971": 38.0,
+            "972": 43.0,
+            "973": 38.0,
+            "974": 40.0,
+            "975": 43.0,
+            "976": 29.0,
+            "977": 44.0,
+            "978": 31.0,
+            "979": 43.0,
+            "980": 39.0,
+            "981": 33.0,
+            "982": 30.0,
+            "983": 54.0,
+            "984": 43.0,
+            "985": 48.0,
+            "986": 40.0,
+            "987": 30.0,
+            "988": 38.0,
+            "989": 38.0,
+            "990": 42.0,
+            "991": 36.0,
+            "992": 48.0,
+            "993": 47.0,
+            "994": 50.0,
+            "995": 35.0,
+            "996": 29.0,
+            "997": 51.0,
+            "998": 42.0,
+            "999": 35.0,
+            "1000": 28.0,
+            "1001": 23.0,
+            "1002": 35.0,
+            "1003": 39.0,
+            "1004": 46.0,
+            "1005": 42.0,
+            "1006": 27.0,
+            "1007": 44.0,
+            "1008": 32.0,
+            "1009": 34.0,
+            "1010": 29.0,
+            "1011": 31.0,
+            "1012": 28.0,
+            "1013": 37.0,
+            "1014": 29.0,
+            "1015": 39.0,
+            "1016": 31.0,
+            "1017": 37.0,
+            "1018": 46.0,
+            "1019": 26.0,
+            "1020": 34.0,
+            "1021": 30.0,
+            "1022": 46.0,
+            "1023": 38.0,
+            "1024": 49.0,
+            "1025": 41.0,
+            "1026": 55.0,
+            "1027": 37.0,
+            "1028": 29.0,
+            "1029": 38.0,
+            "1030": 35.0,
+            "1031": 41.0,
+            "1032": 42.0,
+            "1033": 27.0,
+            "1034": 29.0,
+            "1035": 32.0,
+            "1036": 25.0,
+            "1037": 34.0,
+            "1038": 32.0,
+            "1039": 31.0,
+            "1040": 30.0,
+            "1041": 24.0,
+            "1042": 20.0,
+            "1043": 26.0,
+            "1044": 44.0,
+            "1045": 37.0,
+            "1046": 34.0,
+            "1047": 27.0,
+            "1048": 36.0,
+            "1049": 42.0,
+            "1050": 37.0,
+            "1051": 40.0,
+            "1052": 40.0,
+            "1053": 32.0,
+            "1054": 37.0,
+            "1055": 31.0,
+            "1056": 36.0,
+            "1057": 37.0,
+            "1058": 37.0,
+            "1059": 35.0,
+            "1060": 32.0,
+            "1061": 37.0,
+            "1062": 45.0,
+            "1063": 38.0,
+            "1064": 42.0,
+            "1065": 35.0,
+            "1066": 36.0,
+            "1067": 29.0,
+            "1068": 30.0,
+            "1069": 30.0,
+            "1070": 39.0,
+            "1071": 33.0,
+            "1072": 36.0,
+            "1073": 41.0,
+            "1074": 47.0,
+            "1075": 36.0,
+            "1076": 39.0,
+            "1077": 45.0,
+            "1078": 32.0,
+            "1079": 46.0,
+            "1080": 43.0,
+            "1081": 40.0,
+            "1082": 42.0,
+            "1083": 42.0,
+            "1084": 42.0,
+            "1085": 38.0,
+            "1086": 42.0,
+            "1087": 36.0,
+            "1088": 31.0,
+            "1089": 42.0,
+            "1090": 28.0,
+            "1091": 36.0,
+            "1092": 35.0,
+            "1093": 36.0,
+            "1094": 41.0,
+            "1095": 37.0,
+            "1096": 48.0,
+            "1097": 33.0,
+            "1098": 24.0,
+            "1099": 43.0,
+            "1100": 41.0,
+            "1101": 38.0,
+            "1102": 39.0,
+            "1103": 29.0,
+            "1104": 33.0,
+            "1105": 38.0,
+            "1106": 37.0,
+            "1107": 30.0,
+            "1108": 41.0,
+            "1109": 41.0,
+            "1110": 42.0,
+            "1111": 43.0,
+            "1112": 25.0,
+            "1113": 40.0,
+            "1114": 32.0,
+            "1115": 34.0,
+            "1116": 45.0,
+            "1117": 40.0,
+            "1118": 39.0,
+            "1119": 31.0,
+            "1120": 28.0,
+            "1121": 28.0,
+            "1122": 28.0,
+            "1123": 43.0,
+            "1124": 34.0,
+            "1125": 26.0,
+            "1126": 33.0,
+            "1127": 31.0,
+            "1128": 33.0,
+            "1129": 43.0,
+            "1130": 43.0,
+            "1131": 40.0,
+            "1132": 42.0,
+            "1133": 34.0,
+            "1134": 32.0,
+            "1135": 29.0,
+            "1136": 36.0,
+            "1137": 42.0,
+            "1138": 34.0,
+            "1139": 31.0,
+            "1140": 38.0,
+            "1141": 37.0,
+            "1142": 38.0,
+            "1143": 44.0,
+            "1144": 40.0,
+            "1145": 39.0,
+            "1146": 42.0,
+            "1147": 35.0,
+            "1148": 29.0,
+            "1149": 40.0,
+            "1150": 34.0,
+            "1151": 27.0,
+            "1152": 22.0,
+            "1153": 36.0,
+            "1154": 31.0,
+            "1155": 41.0,
+            "1156": 26.0,
+            "1157": 33.0,
+            "1158": 35.0,
+            "1159": 36.0,
+            "1160": 41.0,
+            "1161": 40.0,
+            "1162": 48.0,
+            "1163": 37.0,
+            "1164": 43.0,
+            "1165": 34.0,
+            "1166": 30.0,
+            "1167": 34.0,
+            "1168": 31.0,
+            "1169": 41.0,
+            "1170": 27.0,
+            "1171": 40.0,
+            "1172": 34.0,
+            "1173": 23.0,
+            "1174": 40.0,
+            "1175": 30.0,
+            "1176": 50.0,
+            "1177": 39.0,
+            "1178": 33.0,
+            "1179": 42.0,
+            "1180": 31.0,
+            "1181": 30.0,
+            "1182": 38.0,
+            "1183": 37.0,
+            "1184": 35.0,
+            "1185": 31.0,
+            "1186": 29.0,
+            "1187": 39.0,
+            "1188": 34.0,
+            "1189": 48.0,
+            "1190": 32.0,
+            "1191": 41.0,
+            "1192": 45.0,
+            "1193": 28.0,
+            "1194": 46.0,
+            "1195": 34.0,
+            "1196": 38.0,
+            "1197": 51.0,
+            "1198": 36.0,
+            "1199": 40.0,
+            "1200": 29.0,
+            "1201": 37.0,
+            "1202": 32.0,
+            "1203": 35.0,
+            "1204": 37.0,
+            "1205": 56.0,
+            "1206": 40.0,
+            "1207": 36.0,
+            "1208": 41.0,
+            "1209": 31.0,
+            "1210": 39.0,
+            "1211": 46.0,
+            "1212": 45.0,
+            "1213": 57.0,
+            "1214": 31.0,
+            "1215": 33.0,
+            "1216": 31.0,
+            "1217": 34.0,
+            "1218": 42.0,
+            "1219": 45.0,
+            "1220": 37.0,
+            "1221": 44.0,
+            "1222": 32.0,
+            "1223": 35.0,
+            "1224": 34.0,
+            "1225": 45.0,
+            "1226": 28.0,
+            "1227": 34.0,
+            "1228": 27.0,
+            "1229": 23.0,
+            "1230": 25.0,
+            "1231": 14.0,
+            "1232": 36.0,
+            "1233": 39.0,
+            "1234": 37.0,
+            "1235": 32.0,
+            "1236": 41.0,
+            "1237": 30.0,
+            "1238": 36.0,
+            "1239": 37.0,
+            "1240": 48.0,
+            "1241": 31.0,
+            "1242": 34.0,
+            "1243": 35.0,
+            "1244": 29.0,
+            "1245": 28.0,
+            "1246": 36.0,
+            "1247": 31.0,
+            "1248": 38.0,
+            "1249": 27.0,
+            "1250": 40.0,
+            "1251": 26.0,
+            "1252": 42.0,
+            "1253": 32.0,
+            "1254": 39.0,
+            "1255": 46.0,
+            "1256": 41.0,
+            "1257": 30.0,
+            "1258": 44.0,
+            "1259": 32.0,
+            "1260": 25.0,
+            "1261": 42.0,
+            "1262": 36.0,
+            "1263": 34.0,
+            "1264": 32.0,
+            "1265": 35.0,
+            "1266": 34.0,
+            "1267": 38.0,
+            "1268": 43.0,
+            "1269": 30.0,
+            "1270": 28.0,
+            "1271": 42.0,
+            "1272": 32.0,
+            "1273": 40.0,
+            "1274": 44.0,
+            "1275": 38.0,
+            "1276": 31.0,
+            "1277": 54.0,
+            "1278": 46.0,
+            "1279": 44.0,
+            "1280": 34.0,
+            "1281": 26.0,
+            "1282": 37.0,
+            "1283": 32.0,
+            "1284": 43.0,
+            "1285": 43.0,
+            "1286": 36.0,
+            "1287": 46.0,
+            "1288": 33.0,
+            "1289": 43.0,
+            "1290": 37.0,
+            "1291": 42.0,
+            "1292": 38.0,
+            "1293": 43.0,
+            "1294": 30.0,
+            "1295": 34.0,
+            "1296": 31.0,
+            "1297": 26.0,
+            "1298": 38.0,
+            "1299": 40.0,
+            "1300": 32.0,
+            "1301": 43.0,
+            "1302": 35.0,
+            "1303": 35.0,
+            "1304": 41.0,
+            "1305": 30.0,
+            "1306": 28.0,
+            "1307": 34.0,
+            "1308": 32.0,
+            "1309": 36.0,
+            "1310": 29.0,
+            "1311": 43.0,
+            "1312": 32.0,
+            "1313": 37.0,
+            "1314": 35.0,
+            "1315": 33.0,
+            "1316": 37.0,
+            "1317": 33.0,
+            "1318": 41.0,
+            "1319": 28.0,
+            "1320": 42.0,
+            "1321": 30.0,
+            "1322": 21.0,
+            "1323": 28.0,
+            "1324": 40.0,
+            "1325": 36.0,
+            "1326": 43.0,
+            "1327": 32.0,
+            "1328": 35.0,
+            "1329": 33.0,
+            "1330": 27.0,
+            "1331": 30.0,
+            "1332": 36.0,
+            "1333": 45.0,
+            "1334": 32.0,
+            "1335": 41.0,
+            "1336": 38.0,
+            "1337": 37.0,
+            "1338": 38.0,
+            "1339": 27.0,
+            "1340": 33.0,
+            "1341": 47.0,
+            "1342": 24.0,
+            "1343": 27.0,
+            "1344": 34.0,
+            "1345": 34.0,
+            "1346": 21.0,
+            "1347": 33.0,
+            "1348": 33.0,
+            "1349": 42.0,
+            "1350": 30.0,
+            "1351": 39.0,
+            "1352": 26.0,
+            "1353": 36.0,
+            "1354": 40.0,
+            "1355": 31.0,
+            "1356": 46.0,
+            "1357": 46.0,
+            "1358": 29.0,
+            "1359": 29.0,
+            "1360": 30.0,
+            "1361": 35.0,
+            "1362": 40.0,
+            "1363": 33.0,
+            "1364": 36.0,
+            "1365": 34.0,
+            "1366": 47.0,
+            "1367": 31.0,
+            "1368": 37.0,
+            "1369": 28.0,
+            "1370": 41.0,
+            "1371": 30.0,
+            "1372": 42.0,
+            "1373": 44.0,
+            "1374": 34.0,
+            "1375": 22.0,
+            "1376": 47.0,
+            "1377": 29.0,
+            "1378": 39.0,
+            "1379": 49.0,
+            "1380": 44.0,
+            "1381": 30.0,
+            "1382": 45.0,
+            "1383": 44.0,
+            "1384": 31.0,
+            "1385": 35.0,
+            "1386": 31.0,
+            "1387": 31.0,
+            "1388": 22.0,
+            "1389": 32.0,
+            "1390": 38.0,
+            "1391": 42.0,
+            "1392": 34.0,
+            "1393": 43.0,
+            "1394": 33.0,
+            "1395": 39.0,
+            "1396": 37.0,
+            "1397": 27.0,
+            "1398": 33.0,
+            "1399": 29.0,
+            "1400": 36.0,
+            "1401": 28.0,
+            "1402": 27.0,
+            "1403": 23.0,
+            "1404": 28.0,
+            "1405": 36.0,
+            "1406": 29.0,
+            "1407": 36.0,
+            "1408": 43.0,
+            "1409": 37.0,
+            "1410": 37.0,
+            "1411": 38.0,
+            "1412": 28.0,
+            "1413": 48.0,
+            "1414": 34.0,
+            "1415": 42.0,
+            "1416": 35.0,
+            "1417": 34.0,
+            "1418": 43.0,
+            "1419": 38.0,
+            "1420": 33.0,
+            "1421": 33.0,
+            "1422": 53.0,
+            "1423": 22.0,
+            "1424": 35.0,
+            "1425": 43.0,
+            "1426": 36.0,
+            "1427": 43.0,
+            "1428": 31.0,
+            "1429": 30.0,
+            "1430": 36.0,
+            "1431": 29.0,
+            "1432": 37.0,
+            "1433": 32.0,
+            "1434": 47.0,
+            "1435": 38.0,
+            "1436": 40.0,
+            "1437": 47.0,
+            "1438": 28.0,
+            "1439": 33.0,
+            "1440": 25.0,
+            "1441": 35.0,
+            "1442": 38.0,
+            "1443": 42.0,
+            "1444": 28.0,
+            "1445": 34.0,
+            "1446": 28.0,
+            "1447": 39.0,
+            "1448": 45.0,
+            "1449": 41.0,
+            "1450": 25.0,
+            "1451": 38.0,
+            "1452": 27.0,
+            "1453": 28.0,
+            "1454": 28.0,
+            "1455": 32.0,
+            "1456": 40.0,
+            "1457": 33.0,
+            "1458": 37.0,
+            "1459": 41.0,
+            "1460": 31.0,
+            "1461": 34.0,
+            "1462": 23.0,
+            "1463": 33.0,
+            "1464": 42.0,
+            "1465": 42.0,
+            "1466": 29.0,
+            "1467": 27.0,
+            "1468": 41.0,
+            "1469": 30.0,
+            "1470": 35.0,
+            "1471": 32.0,
+            "1472": 44.0,
+            "1473": 53.0,
+            "1474": 28.0,
+            "1475": 25.0,
+            "1476": 47.0,
+            "1477": 40.0,
+            "1478": 26.0,
+            "1479": 33.0,
+            "1480": 33.0,
+            "1481": 33.0,
+            "1482": 33.0,
+            "1483": 31.0,
+            "1484": 31.0,
+            "1485": 45.0,
+            "1486": 37.0,
+            "1487": 32.0,
+            "1488": 26.0,
+            "1489": 45.0,
+            "1490": 40.0,
+            "1491": 44.0,
+            "1492": 44.0,
+            "1493": 44.0,
+            "1494": 33.0,
+            "1495": 42.0,
+            "1496": 32.0,
+            "1497": 39.0,
+            "1498": 32.0,
+            "1499": 42.0,
+            "1500": 42.0,
+            "1501": 46.0,
+            "1502": 46.0,
+            "1503": 39.0,
+            "1504": 31.0,
+            "1505": 47.0,
+            "1506": 41.0,
+            "1507": 35.0,
+            "1508": 39.0,
+            "1509": 32.0,
+            "1510": 37.0,
+            "1511": 52.0,
+            "1512": 29.0,
+            "1513": 46.0,
+            "1514": 40.0,
+            "1515": 41.0,
+            "1516": 31.0,
+            "1517": 39.0,
+            "1518": 40.0,
+            "1519": 32.0,
+            "1520": 34.0,
+            "1521": 44.0,
+            "1522": 53.0,
+            "1523": 40.0,
+            "1524": 39.0,
+            "1525": 30.0,
+            "1526": 34.0,
+            "1527": 19.0,
+            "1528": 40.0,
+            "1529": 30.0,
+            "1530": 38.0,
+            "1531": 28.0,
+            "1532": 30.0,
+            "1533": 43.0,
+            "1534": 34.0,
+            "1535": 35.0,
+            "1536": 34.0,
+            "1537": 33.0,
+            "1538": 36.0,
+            "1539": 32.0,
+            "1540": 38.0,
+            "1541": 35.0,
+            "1542": 50.0,
+            "1543": 50.0,
+            "1544": 38.0,
+            "1545": 38.0,
+            "1546": 35.0,
+            "1547": 31.0,
+            "1548": 39.0,
+            "1549": 36.0,
+            "1550": 30.0,
+            "1551": 42.0,
+            "1552": 49.0,
+            "1553": 46.0,
+            "1554": 41.0,
+            "1555": 25.0,
+            "1556": 33.0,
+            "1557": 46.0,
+            "1558": 43.0,
+            "1559": 36.0,
+            "1560": 30.0,
+            "1561": 48.0,
+            "1562": 30.0,
+            "1563": 38.0,
+            "1564": 40.0,
+            "1565": 30.0,
+            "1566": 34.0,
+            "1567": 36.0,
+            "1568": 43.0,
+            "1569": 35.0,
+            "1570": 43.0,
+            "1571": 32.0,
+            "1572": 34.0,
+            "1573": 35.0,
+            "1574": 31.0,
+            "1575": 39.0,
+            "1576": 30.0,
+            "1577": 41.0,
+            "1578": 46.0,
+            "1579": 35.0,
+            "1580": 39.0,
+            "1581": 43.0,
+            "1582": 30.0,
+            "1583": 43.0,
+            "1584": 36.0,
+            "1585": 37.0,
+            "1586": 44.0,
+            "1587": 37.0,
+            "1588": 43.0,
+            "1589": 41.0,
+            "1590": 46.0,
+            "1591": 32.0,
+            "1592": 37.0,
+            "1593": 32.0,
+            "1594": 36.0,
+            "1595": 27.0,
+            "1596": 40.0,
+            "1597": 36.0,
+            "1598": 36.0,
+            "1599": 32.0,
+            "1600": 41.0,
+            "1601": 34.0,
+            "1602": 38.0,
+            "1603": 48.0,
+            "1604": 29.0,
+            "1605": 42.0,
+            "1606": 33.0,
+            "1607": 41.0,
+            "1608": 40.0,
+            "1609": 42.0,
+            "1610": 37.0,
+            "1611": 35.0,
+            "1612": 37.0,
+            "1613": 39.0,
+            "1614": 51.0,
+            "1615": 38.0,
+            "1616": 33.0,
+            "1617": 45.0,
+            "1618": 43.0,
+            "1619": 32.0,
+            "1620": 43.0,
+            "1621": 47.0,
+            "1622": 36.0,
+            "1623": 50.0,
+            "1624": 40.0,
+            "1625": 33.0,
+            "1626": 39.0,
+            "1627": 34.0,
+            "1628": 40.0,
+            "1629": 30.0,
+            "1630": 34.0,
+            "1631": 45.0,
+            "1632": 39.0,
+            "1633": 40.0,
+            "1634": 30.0,
+            "1635": 53.0,
+            "1636": 31.0,
+            "1637": 35.0,
+            "1638": 39.0,
+            "1639": 42.0,
+            "1640": 37.0,
+            "1641": 43.0,
+            "1642": 30.0,
+            "1643": 43.0,
+            "1644": 36.0,
+            "1645": 37.0,
+            "1646": 61.0,
+            "1647": 34.0,
+            "1648": 41.0,
+            "1649": 39.0,
+            "1650": 42.0,
+            "1651": 33.0,
+            "1652": 45.0,
+            "1653": 25.0,
+            "1654": 36.0,
+            "1655": 29.0,
+            "1656": 45.0,
+            "1657": 37.0,
+            "1658": 46.0,
+            "1659": 38.0,
+            "1660": 46.0,
+            "1661": 41.0,
+            "1662": 35.0,
+            "1663": 35.0,
+            "1664": 37.0,
+            "1665": 30.0,
+            "1666": 44.0,
+            "1667": 45.0,
+            "1668": 40.0,
+            "1669": 35.0,
+            "1670": 35.0,
+            "1671": 37.0,
+            "1672": 32.0,
+            "1673": 48.0,
+            "1674": 41.0,
+            "1675": 40.0,
+            "1676": 49.0,
+            "1677": 35.0,
+            "1678": 30.0,
+            "1679": 45.0,
+            "1680": 40.0,
+            "1681": 32.0,
+            "1682": 32.0,
+            "1683": 42.0,
+            "1684": 44.0,
+            "1685": 47.0,
+            "1686": 30.0,
+            "1687": 31.0,
+            "1688": 31.0,
+            "1689": 40.0,
+            "1690": 43.0,
+            "1691": 36.0,
+            "1692": 31.0,
+            "1693": 31.0,
+            "1694": 35.0,
+            "1695": 41.0,
+            "1696": 32.0,
+            "1697": 27.0,
+            "1698": 39.0,
+            "1699": 41.0,
+            "1700": 31.0,
+            "1701": 35.0,
+            "1702": 31.0,
+            "1703": 40.0,
+            "1704": 36.0,
+            "1705": 36.0,
+            "1706": 46.0,
+            "1707": 26.0,
+            "1708": 37.0,
+            "1709": 37.0,
+            "1710": 39.0,
+            "1711": 32.0,
+            "1712": 46.0,
+            "1713": 44.0,
+            "1714": 45.0,
+            "1715": 43.0,
+            "1716": 30.0,
+            "1717": 41.0,
+            "1718": 43.0,
+            "1719": 28.0,
+            "1720": 36.0,
+            "1721": 26.0,
+            "1722": 42.0,
+            "1723": 42.0,
+            "1724": 39.0,
+            "1725": 28.0,
+            "1726": 46.0,
+            "1727": 43.0,
+            "1728": 40.0,
+            "1729": 44.0,
+            "1730": 38.0,
+            "1731": 26.0,
+            "1732": 39.0,
+            "1733": 44.0,
+            "1734": 39.0,
+            "1735": 34.0,
+            "1736": 46.0,
+            "1737": 46.0,
+            "1738": 34.0,
+            "1739": 47.0,
+            "1740": 44.0,
+            "1741": 31.0,
+            "1742": 46.0,
+            "1743": 43.0,
+            "1744": 46.0,
+            "1745": 53.0,
+            "1746": 42.0,
+            "1747": 37.0,
+            "1748": 37.0,
+            "1749": 47.0,
+            "1750": 46.0,
+            "1751": 43.0,
+            "1752": 35.0,
+            "1753": 41.0,
+            "1754": 40.0,
+            "1755": 32.0,
+            "1756": 36.0,
+            "1757": 48.0,
+            "1758": 34.0,
+            "1759": 49.0,
+            "1760": 46.0,
+            "1761": 36.0,
+            "1762": 34.0,
+            "1763": 36.0,
+            "1764": 39.0,
+            "1765": 24.0,
+            "1766": 46.0,
+            "1767": 46.0,
+            "1768": 36.0,
+            "1769": 56.0,
+            "1770": 28.0,
+            "1771": 42.0,
+            "1772": 52.0,
+            "1773": 45.0,
+            "1774": 37.0,
+            "1775": 33.0,
+            "1776": 43.0,
+            "1777": 54.0,
+            "1778": 39.0,
+            "1779": 33.0,
+            "1780": 39.0,
+            "1781": 45.0,
+            "1782": 35.0,
+            "1783": 43.0,
+            "1784": 53.0,
+            "1785": 36.0,
+            "1786": 38.0,
+            "1787": 43.0,
+            "1788": 45.0,
+            "1789": 33.0,
+            "1790": 42.0,
+            "1791": 44.0,
+            "1792": 34.0,
+            "1793": 30.0,
+            "1794": 40.0,
+            "1795": 55.0,
+            "1796": 33.0,
+            "1797": 30.0,
+            "1798": 41.0,
+            "1799": 37.0,
+            "1800": 41.0,
+            "1801": 40.0,
+            "1802": 30.0,
+            "1803": 36.0,
+            "1804": 41.0,
+            "1805": 34.0,
+            "1806": 39.0,
+            "1807": 36.0,
+            "1808": 43.0,
+            "1809": 45.0,
+            "1810": 41.0,
+            "1811": 28.0,
+            "1812": 33.0,
+            "1813": 30.0,
+            "1814": 36.0,
+            "1815": 35.0,
+            "1816": 35.0,
+            "1817": 35.0,
+            "1818": 42.0,
+            "1819": 25.0,
+            "1820": 38.0,
+            "1821": 48.0,
+            "1822": 38.0,
+            "1823": 38.0,
+            "1824": 49.0,
+            "1825": 46.0,
+            "1826": 32.0,
+            "1827": 47.0,
+            "1828": 30.0,
+            "1829": 50.0,
+            "1830": 43.0,
+            "1831": 36.0,
+            "1832": 47.0,
+            "1833": 42.0,
+            "1834": 41.0,
+            "1835": 39.0,
+            "1836": 39.0,
+            "1837": 34.0,
+            "1838": 50.0,
+            "1839": 35.0,
+            "1840": 41.0,
+            "1841": 30.0,
+            "1842": 34.0,
+            "1843": 44.0,
+            "1844": 38.0,
+            "1845": 41.0,
+            "1846": 32.0,
+            "1847": 32.0,
+            "1848": 36.0,
+            "1849": 45.0,
+            "1850": 40.0,
+            "1851": 36.0,
+            "1852": 41.0,
+            "1853": 29.0,
+            "1854": 35.0,
+            "1855": 45.0,
+            "1856": 39.0,
+            "1857": 33.0,
+            "1858": 40.0,
+            "1859": 40.0,
+            "1860": 48.0,
+            "1861": 37.0,
+            "1862": 46.0,
+            "1863": 47.0,
+            "1864": 48.0,
+            "1865": 38.0,
+            "1866": 51.0,
+            "1867": 34.0,
+            "1868": 40.0,
+            "1869": 42.0,
+            "1870": 38.0,
+            "1871": 36.0,
+            "1872": 42.0,
+            "1873": 42.0,
+            "1874": 38.0,
+            "1875": 51.0,
+            "1876": 39.0,
+            "1877": 41.0,
+            "1878": 26.0,
+            "1879": 33.0,
+            "1880": 41.0,
+            "1881": 50.0,
+            "1882": 37.0,
+            "1883": 45.0,
+            "1884": 39.0,
+            "1885": 37.0,
+            "1886": 32.0,
+            "1887": 36.0,
+            "1888": 28.0,
+            "1889": 38.0,
+            "1890": 37.0,
+            "1891": 51.0,
+            "1892": 44.0,
+            "1893": 50.0,
+            "1894": 44.0,
+            "1895": 35.0,
+            "1896": 34.0,
+            "1897": 35.0,
+            "1898": 31.0,
+            "1899": 39.0,
+            "1900": 40.0,
+            "1901": 52.0,
+            "1902": 31.0,
+            "1903": 44.0,
+            "1904": 45.0,
+            "1905": 32.0,
+            "1906": 49.0,
+            "1907": 34.0,
+            "1908": 33.0,
+            "1909": 34.0,
+            "1910": 45.0,
+            "1911": 41.0,
+            "1912": 46.0,
+            "1913": 46.0,
+            "1914": 51.0,
+            "1915": 35.0,
+            "1916": 42.0,
+            "1917": 40.0,
+            "1918": 32.0,
+            "1919": 54.0,
+            "1920": 41.0,
+            "1921": 40.0,
+            "1922": 36.0,
+            "1923": 34.0,
+            "1924": 43.0,
+            "1925": 47.0,
+            "1926": 42.0,
+            "1927": 37.0,
+            "1928": 40.0,
+            "1929": 40.0,
+            "1930": 39.0,
+            "1931": 37.0,
+            "1932": 40.0,
+            "1933": 46.0,
+            "1934": 30.0,
+            "1935": 50.0,
+            "1936": 51.0,
+            "1937": 34.0,
+            "1938": 38.0,
+            "1939": 44.0,
+            "1940": 35.0,
+            "1941": 39.0,
+            "1942": 59.0,
+            "1943": 42.0,
+            "1944": 46.0,
+            "1945": 36.0,
+            "1946": 43.0,
+            "1947": 39.0,
+            "1948": 39.0,
+            "1949": 31.0,
+            "1950": 36.0,
+            "1951": 41.0,
+            "1952": 37.0,
+            "1953": 26.0,
+            "1954": 43.0,
+            "1955": 33.0,
+            "1956": 37.0,
+            "1957": 48.0,
+            "1958": 35.0,
+            "1959": 44.0,
+            "1960": 35.0,
+            "1961": 28.0,
+            "1962": 51.0,
+            "1963": 47.0,
+            "1964": 33.0,
+            "1965": 56.0,
+            "1966": 46.0,
+            "1967": 33.0,
+            "1968": 53.0,
+            "1969": 36.0,
+            "1970": 47.0,
+            "1971": 35.0,
+            "1972": 34.0,
+            "1973": 38.0,
+            "1974": 46.0,
+            "1975": 32.0,
+            "1976": 43.0,
+            "1977": 38.0,
+            "1978": 43.0,
+            "1979": 49.0,
+            "1980": 32.0,
+            "1981": 30.0,
+            "1982": 55.0,
+            "1983": 41.0,
+            "1984": 62.0,
+            "1985": 41.0,
+            "1986": 48.0,
+            "1987": 48.0,
+            "1988": 41.0,
+            "1989": 50.0,
+            "1990": 53.0,
+            "1991": 45.0,
+            "1992": 46.0,
+            "1993": 60.0,
+            "1994": 30.0,
+            "1995": 41.0,
+            "1996": 51.0,
+            "1997": 41.0,
+            "1998": 45.0,
+            "1999": 32.0,
+            "2000": 43.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 2000,
+        "step_interval": 1,
+        "values": {
+            "1": 302618112.0,
+            "2": 302618112.0,
+            "3": 302618112.0,
+            "4": 302618112.0,
+            "5": 302618112.0,
+            "6": 302618112.0,
+            "7": 302618112.0,
+            "8": 302618112.0,
+            "9": 302618112.0,
+            "10": 302618112.0,
+            "11": 302618112.0,
+            "12": 302618112.0,
+            "13": 302618112.0,
+            "14": 302618112.0,
+            "15": 302618112.0,
+            "16": 302618112.0,
+            "17": 302618112.0,
+            "18": 302618112.0,
+            "19": 302618112.0,
+            "20": 302618112.0,
+            "21": 302618112.0,
+            "22": 302618112.0,
+            "23": 302618112.0,
+            "24": 302618112.0,
+            "25": 302618112.0,
+            "26": 302618112.0,
+            "27": 302618112.0,
+            "28": 302618112.0,
+            "29": 302618112.0,
+            "30": 302618112.0,
+            "31": 302618112.0,
+            "32": 302618112.0,
+            "33": 302618112.0,
+            "34": 302618112.0,
+            "35": 302618112.0,
+            "36": 302618112.0,
+            "37": 302618112.0,
+            "38": 302618112.0,
+            "39": 302618112.0,
+            "40": 302618112.0,
+            "41": 302618112.0,
+            "42": 302618112.0,
+            "43": 302618112.0,
+            "44": 302618112.0,
+            "45": 302618112.0,
+            "46": 302618112.0,
+            "47": 302618112.0,
+            "48": 302618112.0,
+            "49": 302618112.0,
+            "50": 302618112.0,
+            "51": 302618112.0,
+            "52": 302618112.0,
+            "53": 302618112.0,
+            "54": 302618112.0,
+            "55": 302618112.0,
+            "56": 302618112.0,
+            "57": 302618112.0,
+            "58": 302618112.0,
+            "59": 302618112.0,
+            "60": 302618112.0,
+            "61": 302618112.0,
+            "62": 302618112.0,
+            "63": 302618112.0,
+            "64": 302618112.0,
+            "65": 302618112.0,
+            "66": 302618112.0,
+            "67": 302618112.0,
+            "68": 302618112.0,
+            "69": 302618112.0,
+            "70": 302618112.0,
+            "71": 302618112.0,
+            "72": 302618112.0,
+            "73": 302618112.0,
+            "74": 302618112.0,
+            "75": 302618112.0,
+            "76": 302618112.0,
+            "77": 302618112.0,
+            "78": 302618112.0,
+            "79": 302618112.0,
+            "80": 302618112.0,
+            "81": 302618112.0,
+            "82": 302618112.0,
+            "83": 302618112.0,
+            "84": 302618112.0,
+            "85": 302618112.0,
+            "86": 302618112.0,
+            "87": 302618112.0,
+            "88": 302618112.0,
+            "89": 302618112.0,
+            "90": 302618112.0,
+            "91": 302618112.0,
+            "92": 302618112.0,
+            "93": 302618112.0,
+            "94": 302618112.0,
+            "95": 302618112.0,
+            "96": 302618112.0,
+            "97": 302618112.0,
+            "98": 302618112.0,
+            "99": 302618112.0,
+            "100": 302618112.0,
+            "101": 302618112.0,
+            "102": 302618112.0,
+            "103": 302618112.0,
+            "104": 302618112.0,
+            "105": 302618112.0,
+            "106": 302618112.0,
+            "107": 302618112.0,
+            "108": 302618112.0,
+            "109": 302618112.0,
+            "110": 302618112.0,
+            "111": 302618112.0,
+            "112": 302618112.0,
+            "113": 302618112.0,
+            "114": 302618112.0,
+            "115": 302618112.0,
+            "116": 302618112.0,
+            "117": 302618112.0,
+            "118": 302618112.0,
+            "119": 302618112.0,
+            "120": 302618112.0,
+            "121": 302618112.0,
+            "122": 302618112.0,
+            "123": 302618112.0,
+            "124": 302618112.0,
+            "125": 302618112.0,
+            "126": 302618112.0,
+            "127": 302618112.0,
+            "128": 302618112.0,
+            "129": 302618112.0,
+            "130": 302618112.0,
+            "131": 302618112.0,
+            "132": 302618112.0,
+            "133": 302618112.0,
+            "134": 302618112.0,
+            "135": 302618112.0,
+            "136": 302618112.0,
+            "137": 302618112.0,
+            "138": 302618112.0,
+            "139": 302618112.0,
+            "140": 302618112.0,
+            "141": 302618112.0,
+            "142": 302618112.0,
+            "143": 302618112.0,
+            "144": 302618112.0,
+            "145": 302618112.0,
+            "146": 302618112.0,
+            "147": 302618112.0,
+            "148": 302618112.0,
+            "149": 302618112.0,
+            "150": 302618112.0,
+            "151": 302618112.0,
+            "152": 302618112.0,
+            "153": 302618112.0,
+            "154": 302618112.0,
+            "155": 302618112.0,
+            "156": 302618112.0,
+            "157": 302618112.0,
+            "158": 302618112.0,
+            "159": 302618112.0,
+            "160": 302618112.0,
+            "161": 302618112.0,
+            "162": 302618112.0,
+            "163": 302618112.0,
+            "164": 302618112.0,
+            "165": 302618112.0,
+            "166": 302618112.0,
+            "167": 302618112.0,
+            "168": 302618112.0,
+            "169": 302618112.0,
+            "170": 302618112.0,
+            "171": 302618112.0,
+            "172": 302618112.0,
+            "173": 302618112.0,
+            "174": 302618112.0,
+            "175": 302618112.0,
+            "176": 302618112.0,
+            "177": 302618112.0,
+            "178": 302618112.0,
+            "179": 302618112.0,
+            "180": 302618112.0,
+            "181": 302618112.0,
+            "182": 302618112.0,
+            "183": 302618112.0,
+            "184": 302618112.0,
+            "185": 302618112.0,
+            "186": 302618112.0,
+            "187": 302618112.0,
+            "188": 302618112.0,
+            "189": 302618112.0,
+            "190": 302618112.0,
+            "191": 302618112.0,
+            "192": 302618112.0,
+            "193": 302618112.0,
+            "194": 302618112.0,
+            "195": 302618112.0,
+            "196": 302618112.0,
+            "197": 302618112.0,
+            "198": 302618112.0,
+            "199": 302618112.0,
+            "200": 302618112.0,
+            "201": 302618112.0,
+            "202": 302618112.0,
+            "203": 302618112.0,
+            "204": 302618112.0,
+            "205": 302618112.0,
+            "206": 302618112.0,
+            "207": 302618112.0,
+            "208": 302618112.0,
+            "209": 302618112.0,
+            "210": 302618112.0,
+            "211": 302618112.0,
+            "212": 302618112.0,
+            "213": 302618112.0,
+            "214": 302618112.0,
+            "215": 302618112.0,
+            "216": 302618112.0,
+            "217": 302618112.0,
+            "218": 302618112.0,
+            "219": 302618112.0,
+            "220": 302618112.0,
+            "221": 302618112.0,
+            "222": 302618112.0,
+            "223": 302618112.0,
+            "224": 302618112.0,
+            "225": 302618112.0,
+            "226": 302618112.0,
+            "227": 302618112.0,
+            "228": 302618112.0,
+            "229": 302618112.0,
+            "230": 302618112.0,
+            "231": 302618112.0,
+            "232": 302618112.0,
+            "233": 302618112.0,
+            "234": 302618112.0,
+            "235": 302618112.0,
+            "236": 302618112.0,
+            "237": 302618112.0,
+            "238": 302618112.0,
+            "239": 302618112.0,
+            "240": 302618112.0,
+            "241": 302618112.0,
+            "242": 302618112.0,
+            "243": 302618112.0,
+            "244": 302618112.0,
+            "245": 302618112.0,
+            "246": 302618112.0,
+            "247": 302618112.0,
+            "248": 302618112.0,
+            "249": 302618112.0,
+            "250": 302618112.0,
+            "251": 302618112.0,
+            "252": 302618112.0,
+            "253": 302618112.0,
+            "254": 302618112.0,
+            "255": 302618112.0,
+            "256": 302618112.0,
+            "257": 302618112.0,
+            "258": 302618112.0,
+            "259": 302618112.0,
+            "260": 302618112.0,
+            "261": 302618112.0,
+            "262": 302618112.0,
+            "263": 302618112.0,
+            "264": 302618112.0,
+            "265": 302618112.0,
+            "266": 302618112.0,
+            "267": 302618112.0,
+            "268": 302618112.0,
+            "269": 302618112.0,
+            "270": 302618112.0,
+            "271": 302618112.0,
+            "272": 302618112.0,
+            "273": 302618112.0,
+            "274": 302618112.0,
+            "275": 302618112.0,
+            "276": 302618112.0,
+            "277": 302618112.0,
+            "278": 302618112.0,
+            "279": 302618112.0,
+            "280": 302618112.0,
+            "281": 302618112.0,
+            "282": 302618112.0,
+            "283": 302618112.0,
+            "284": 302618112.0,
+            "285": 302618112.0,
+            "286": 302618112.0,
+            "287": 302618112.0,
+            "288": 302618112.0,
+            "289": 302618112.0,
+            "290": 302618112.0,
+            "291": 302618112.0,
+            "292": 302618112.0,
+            "293": 302618112.0,
+            "294": 302618112.0,
+            "295": 302618112.0,
+            "296": 302618112.0,
+            "297": 302618112.0,
+            "298": 302618112.0,
+            "299": 302618112.0,
+            "300": 302618112.0,
+            "301": 302618112.0,
+            "302": 302618112.0,
+            "303": 302618112.0,
+            "304": 302618112.0,
+            "305": 302618112.0,
+            "306": 302618112.0,
+            "307": 302618112.0,
+            "308": 302618112.0,
+            "309": 302618112.0,
+            "310": 302618112.0,
+            "311": 302618112.0,
+            "312": 302618112.0,
+            "313": 302618112.0,
+            "314": 302618112.0,
+            "315": 302618112.0,
+            "316": 302618112.0,
+            "317": 302618112.0,
+            "318": 302618112.0,
+            "319": 302618112.0,
+            "320": 302618112.0,
+            "321": 302618112.0,
+            "322": 302618112.0,
+            "323": 302618112.0,
+            "324": 302618112.0,
+            "325": 302618112.0,
+            "326": 302618112.0,
+            "327": 302618112.0,
+            "328": 302618112.0,
+            "329": 302618112.0,
+            "330": 302618112.0,
+            "331": 302618112.0,
+            "332": 302618112.0,
+            "333": 302618112.0,
+            "334": 302618112.0,
+            "335": 302618112.0,
+            "336": 302618112.0,
+            "337": 302618112.0,
+            "338": 302618112.0,
+            "339": 302618112.0,
+            "340": 302618112.0,
+            "341": 302618112.0,
+            "342": 302618112.0,
+            "343": 302618112.0,
+            "344": 302618112.0,
+            "345": 302618112.0,
+            "346": 302618112.0,
+            "347": 302618112.0,
+            "348": 302618112.0,
+            "349": 302618112.0,
+            "350": 302618112.0,
+            "351": 302618112.0,
+            "352": 302618112.0,
+            "353": 302618112.0,
+            "354": 302618112.0,
+            "355": 302618112.0,
+            "356": 302618112.0,
+            "357": 302618112.0,
+            "358": 302618112.0,
+            "359": 302618112.0,
+            "360": 302618112.0,
+            "361": 302618112.0,
+            "362": 302618112.0,
+            "363": 302618112.0,
+            "364": 302618112.0,
+            "365": 302618112.0,
+            "366": 302618112.0,
+            "367": 302618112.0,
+            "368": 302618112.0,
+            "369": 302618112.0,
+            "370": 302618112.0,
+            "371": 302618112.0,
+            "372": 302618112.0,
+            "373": 302618112.0,
+            "374": 302618112.0,
+            "375": 302618112.0,
+            "376": 302618112.0,
+            "377": 302618112.0,
+            "378": 302618112.0,
+            "379": 302618112.0,
+            "380": 302618112.0,
+            "381": 302618112.0,
+            "382": 302618112.0,
+            "383": 302618112.0,
+            "384": 302618112.0,
+            "385": 302618112.0,
+            "386": 302618112.0,
+            "387": 302618112.0,
+            "388": 302618112.0,
+            "389": 302618112.0,
+            "390": 302618112.0,
+            "391": 302618112.0,
+            "392": 302618112.0,
+            "393": 302618112.0,
+            "394": 302618112.0,
+            "395": 302618112.0,
+            "396": 302618112.0,
+            "397": 302618112.0,
+            "398": 302618112.0,
+            "399": 302618112.0,
+            "400": 302618112.0,
+            "401": 302618112.0,
+            "402": 302618112.0,
+            "403": 302618112.0,
+            "404": 302618112.0,
+            "405": 302618112.0,
+            "406": 302618112.0,
+            "407": 302618112.0,
+            "408": 302618112.0,
+            "409": 302618112.0,
+            "410": 302618112.0,
+            "411": 302618112.0,
+            "412": 302618112.0,
+            "413": 302618112.0,
+            "414": 302618112.0,
+            "415": 302618112.0,
+            "416": 302618112.0,
+            "417": 302618112.0,
+            "418": 302618112.0,
+            "419": 302618112.0,
+            "420": 302618112.0,
+            "421": 302618112.0,
+            "422": 302618112.0,
+            "423": 302618112.0,
+            "424": 302618112.0,
+            "425": 302618112.0,
+            "426": 302618112.0,
+            "427": 302618112.0,
+            "428": 302618112.0,
+            "429": 302618112.0,
+            "430": 302618112.0,
+            "431": 302618112.0,
+            "432": 302618112.0,
+            "433": 302618112.0,
+            "434": 302618112.0,
+            "435": 302618112.0,
+            "436": 302618112.0,
+            "437": 302618112.0,
+            "438": 302618112.0,
+            "439": 302618112.0,
+            "440": 302618112.0,
+            "441": 302618112.0,
+            "442": 302618112.0,
+            "443": 302618112.0,
+            "444": 302618112.0,
+            "445": 302618112.0,
+            "446": 302618112.0,
+            "447": 302618112.0,
+            "448": 302618112.0,
+            "449": 302618112.0,
+            "450": 302618112.0,
+            "451": 302618112.0,
+            "452": 302618112.0,
+            "453": 302618112.0,
+            "454": 302618112.0,
+            "455": 302618112.0,
+            "456": 302618112.0,
+            "457": 302618112.0,
+            "458": 302618112.0,
+            "459": 302618112.0,
+            "460": 302618112.0,
+            "461": 302618112.0,
+            "462": 302618112.0,
+            "463": 302618112.0,
+            "464": 302618112.0,
+            "465": 302618112.0,
+            "466": 302618112.0,
+            "467": 302618112.0,
+            "468": 302618112.0,
+            "469": 302618112.0,
+            "470": 302618112.0,
+            "471": 302618112.0,
+            "472": 302618112.0,
+            "473": 302618112.0,
+            "474": 302618112.0,
+            "475": 302618112.0,
+            "476": 302618112.0,
+            "477": 302618112.0,
+            "478": 302618112.0,
+            "479": 302618112.0,
+            "480": 302618112.0,
+            "481": 302618112.0,
+            "482": 302618112.0,
+            "483": 302618112.0,
+            "484": 302618112.0,
+            "485": 302618112.0,
+            "486": 302618112.0,
+            "487": 302618112.0,
+            "488": 302618112.0,
+            "489": 302618112.0,
+            "490": 302618112.0,
+            "491": 302618112.0,
+            "492": 302618112.0,
+            "493": 302618112.0,
+            "494": 302618112.0,
+            "495": 302618112.0,
+            "496": 302618112.0,
+            "497": 302618112.0,
+            "498": 302618112.0,
+            "499": 302618112.0,
+            "500": 302618112.0,
+            "501": 302618112.0,
+            "502": 302618112.0,
+            "503": 302618112.0,
+            "504": 302618112.0,
+            "505": 302618112.0,
+            "506": 302618112.0,
+            "507": 302618112.0,
+            "508": 302618112.0,
+            "509": 302618112.0,
+            "510": 302618112.0,
+            "511": 302618112.0,
+            "512": 302618112.0,
+            "513": 302618112.0,
+            "514": 302618112.0,
+            "515": 302618112.0,
+            "516": 302618112.0,
+            "517": 302618112.0,
+            "518": 302618112.0,
+            "519": 302618112.0,
+            "520": 302618112.0,
+            "521": 302618112.0,
+            "522": 302618112.0,
+            "523": 302618112.0,
+            "524": 302618112.0,
+            "525": 302618112.0,
+            "526": 302618112.0,
+            "527": 302618112.0,
+            "528": 302618112.0,
+            "529": 302618112.0,
+            "530": 302618112.0,
+            "531": 302618112.0,
+            "532": 302618112.0,
+            "533": 302618112.0,
+            "534": 302618112.0,
+            "535": 302618112.0,
+            "536": 302618112.0,
+            "537": 302618112.0,
+            "538": 302618112.0,
+            "539": 302618112.0,
+            "540": 302618112.0,
+            "541": 302618112.0,
+            "542": 302618112.0,
+            "543": 302618112.0,
+            "544": 302618112.0,
+            "545": 302618112.0,
+            "546": 302618112.0,
+            "547": 302618112.0,
+            "548": 302618112.0,
+            "549": 302618112.0,
+            "550": 302618112.0,
+            "551": 302618112.0,
+            "552": 302618112.0,
+            "553": 302618112.0,
+            "554": 302618112.0,
+            "555": 302618112.0,
+            "556": 302618112.0,
+            "557": 302618112.0,
+            "558": 302618112.0,
+            "559": 302618112.0,
+            "560": 302618112.0,
+            "561": 302618112.0,
+            "562": 302618112.0,
+            "563": 302618112.0,
+            "564": 302618112.0,
+            "565": 302618112.0,
+            "566": 302618112.0,
+            "567": 302618112.0,
+            "568": 302618112.0,
+            "569": 302618112.0,
+            "570": 302618112.0,
+            "571": 302618112.0,
+            "572": 302618112.0,
+            "573": 302618112.0,
+            "574": 302618112.0,
+            "575": 302618112.0,
+            "576": 302618112.0,
+            "577": 302618112.0,
+            "578": 302618112.0,
+            "579": 302618112.0,
+            "580": 302618112.0,
+            "581": 302618112.0,
+            "582": 302618112.0,
+            "583": 302618112.0,
+            "584": 302618112.0,
+            "585": 302618112.0,
+            "586": 302618112.0,
+            "587": 302618112.0,
+            "588": 302618112.0,
+            "589": 302618112.0,
+            "590": 302618112.0,
+            "591": 302618112.0,
+            "592": 302618112.0,
+            "593": 302618112.0,
+            "594": 302618112.0,
+            "595": 302618112.0,
+            "596": 302618112.0,
+            "597": 302618112.0,
+            "598": 302618112.0,
+            "599": 302618112.0,
+            "600": 302618112.0,
+            "601": 302618112.0,
+            "602": 302618112.0,
+            "603": 302618112.0,
+            "604": 302618112.0,
+            "605": 302618112.0,
+            "606": 302618112.0,
+            "607": 302618112.0,
+            "608": 302618112.0,
+            "609": 302618112.0,
+            "610": 302618112.0,
+            "611": 302618112.0,
+            "612": 302618112.0,
+            "613": 302618112.0,
+            "614": 302618112.0,
+            "615": 302618112.0,
+            "616": 302618112.0,
+            "617": 302618112.0,
+            "618": 302618112.0,
+            "619": 302618112.0,
+            "620": 302618112.0,
+            "621": 302618112.0,
+            "622": 302618112.0,
+            "623": 302618112.0,
+            "624": 302618112.0,
+            "625": 302618112.0,
+            "626": 302618112.0,
+            "627": 302618112.0,
+            "628": 302618112.0,
+            "629": 302618112.0,
+            "630": 302618112.0,
+            "631": 302618112.0,
+            "632": 302618112.0,
+            "633": 302618112.0,
+            "634": 302618112.0,
+            "635": 302618112.0,
+            "636": 302618112.0,
+            "637": 302618112.0,
+            "638": 302618112.0,
+            "639": 302618112.0,
+            "640": 302618112.0,
+            "641": 302618112.0,
+            "642": 302618112.0,
+            "643": 302618112.0,
+            "644": 302618112.0,
+            "645": 302618112.0,
+            "646": 302618112.0,
+            "647": 302618112.0,
+            "648": 302618112.0,
+            "649": 302618112.0,
+            "650": 302618112.0,
+            "651": 302618112.0,
+            "652": 302618112.0,
+            "653": 302618112.0,
+            "654": 302618112.0,
+            "655": 302618112.0,
+            "656": 302618112.0,
+            "657": 302618112.0,
+            "658": 302618112.0,
+            "659": 302618112.0,
+            "660": 302618112.0,
+            "661": 302618112.0,
+            "662": 302618112.0,
+            "663": 302618112.0,
+            "664": 302618112.0,
+            "665": 302618112.0,
+            "666": 302618112.0,
+            "667": 302618112.0,
+            "668": 302618112.0,
+            "669": 302618112.0,
+            "670": 302618112.0,
+            "671": 302618112.0,
+            "672": 302618112.0,
+            "673": 302618112.0,
+            "674": 302618112.0,
+            "675": 302618112.0,
+            "676": 302618112.0,
+            "677": 302618112.0,
+            "678": 302618112.0,
+            "679": 302618112.0,
+            "680": 302618112.0,
+            "681": 302618112.0,
+            "682": 302618112.0,
+            "683": 302618112.0,
+            "684": 302618112.0,
+            "685": 302618112.0,
+            "686": 302618112.0,
+            "687": 302618112.0,
+            "688": 302618112.0,
+            "689": 302618112.0,
+            "690": 302618112.0,
+            "691": 302618112.0,
+            "692": 302618112.0,
+            "693": 302618112.0,
+            "694": 302618112.0,
+            "695": 302618112.0,
+            "696": 302618112.0,
+            "697": 302618112.0,
+            "698": 302618112.0,
+            "699": 302618112.0,
+            "700": 302618112.0,
+            "701": 302618112.0,
+            "702": 302618112.0,
+            "703": 302618112.0,
+            "704": 302618112.0,
+            "705": 302618112.0,
+            "706": 302618112.0,
+            "707": 302618112.0,
+            "708": 302618112.0,
+            "709": 302618112.0,
+            "710": 302618112.0,
+            "711": 302618112.0,
+            "712": 302618112.0,
+            "713": 302618112.0,
+            "714": 302618112.0,
+            "715": 302618112.0,
+            "716": 302618112.0,
+            "717": 302618112.0,
+            "718": 302618112.0,
+            "719": 302618112.0,
+            "720": 302618112.0,
+            "721": 302618112.0,
+            "722": 302618112.0,
+            "723": 302618112.0,
+            "724": 302618112.0,
+            "725": 302618112.0,
+            "726": 302618112.0,
+            "727": 302618112.0,
+            "728": 302618112.0,
+            "729": 302618112.0,
+            "730": 302618112.0,
+            "731": 302618112.0,
+            "732": 302618112.0,
+            "733": 302618112.0,
+            "734": 302618112.0,
+            "735": 302618112.0,
+            "736": 302618112.0,
+            "737": 302618112.0,
+            "738": 302618112.0,
+            "739": 302618112.0,
+            "740": 302618112.0,
+            "741": 302618112.0,
+            "742": 302618112.0,
+            "743": 302618112.0,
+            "744": 302618112.0,
+            "745": 302618112.0,
+            "746": 302618112.0,
+            "747": 302618112.0,
+            "748": 302618112.0,
+            "749": 302618112.0,
+            "750": 302618112.0,
+            "751": 302618112.0,
+            "752": 302618112.0,
+            "753": 302618112.0,
+            "754": 302618112.0,
+            "755": 302618112.0,
+            "756": 302618112.0,
+            "757": 302618112.0,
+            "758": 302618112.0,
+            "759": 302618112.0,
+            "760": 302618112.0,
+            "761": 302618112.0,
+            "762": 302618112.0,
+            "763": 302618112.0,
+            "764": 302618112.0,
+            "765": 302618112.0,
+            "766": 302618112.0,
+            "767": 302618112.0,
+            "768": 302618112.0,
+            "769": 302618112.0,
+            "770": 302618112.0,
+            "771": 302618112.0,
+            "772": 302618112.0,
+            "773": 302618112.0,
+            "774": 302618112.0,
+            "775": 302618112.0,
+            "776": 302618112.0,
+            "777": 302618112.0,
+            "778": 302618112.0,
+            "779": 302618112.0,
+            "780": 302618112.0,
+            "781": 302618112.0,
+            "782": 302618112.0,
+            "783": 302618112.0,
+            "784": 302618112.0,
+            "785": 302618112.0,
+            "786": 302618112.0,
+            "787": 302618112.0,
+            "788": 302618112.0,
+            "789": 302618112.0,
+            "790": 302618112.0,
+            "791": 302618112.0,
+            "792": 302618112.0,
+            "793": 302618112.0,
+            "794": 302618112.0,
+            "795": 302618112.0,
+            "796": 302618112.0,
+            "797": 302618112.0,
+            "798": 302618112.0,
+            "799": 302618112.0,
+            "800": 302618112.0,
+            "801": 302618112.0,
+            "802": 302618112.0,
+            "803": 302618112.0,
+            "804": 302618112.0,
+            "805": 302618112.0,
+            "806": 302618112.0,
+            "807": 302618112.0,
+            "808": 302618112.0,
+            "809": 302618112.0,
+            "810": 302618112.0,
+            "811": 302618112.0,
+            "812": 302618112.0,
+            "813": 302618112.0,
+            "814": 302618112.0,
+            "815": 302618112.0,
+            "816": 302618112.0,
+            "817": 302618112.0,
+            "818": 302618112.0,
+            "819": 302618112.0,
+            "820": 302618112.0,
+            "821": 302618112.0,
+            "822": 302618112.0,
+            "823": 302618112.0,
+            "824": 302618112.0,
+            "825": 302618112.0,
+            "826": 302618112.0,
+            "827": 302618112.0,
+            "828": 302618112.0,
+            "829": 302618112.0,
+            "830": 302618112.0,
+            "831": 302618112.0,
+            "832": 302618112.0,
+            "833": 302618112.0,
+            "834": 302618112.0,
+            "835": 302618112.0,
+            "836": 302618112.0,
+            "837": 302618112.0,
+            "838": 302618112.0,
+            "839": 302618112.0,
+            "840": 302618112.0,
+            "841": 302618112.0,
+            "842": 302618112.0,
+            "843": 302618112.0,
+            "844": 302618112.0,
+            "845": 302618112.0,
+            "846": 302618112.0,
+            "847": 302618112.0,
+            "848": 302618112.0,
+            "849": 302618112.0,
+            "850": 302618112.0,
+            "851": 302618112.0,
+            "852": 302618112.0,
+            "853": 302618112.0,
+            "854": 302618112.0,
+            "855": 302618112.0,
+            "856": 302618112.0,
+            "857": 302618112.0,
+            "858": 302618112.0,
+            "859": 302618112.0,
+            "860": 302618112.0,
+            "861": 302618112.0,
+            "862": 302618112.0,
+            "863": 302618112.0,
+            "864": 302618112.0,
+            "865": 302618112.0,
+            "866": 302618112.0,
+            "867": 302618112.0,
+            "868": 302618112.0,
+            "869": 302618112.0,
+            "870": 302618112.0,
+            "871": 302618112.0,
+            "872": 302618112.0,
+            "873": 302618112.0,
+            "874": 302618112.0,
+            "875": 302618112.0,
+            "876": 302618112.0,
+            "877": 302618112.0,
+            "878": 302618112.0,
+            "879": 302618112.0,
+            "880": 302618112.0,
+            "881": 302618112.0,
+            "882": 302618112.0,
+            "883": 302618112.0,
+            "884": 302618112.0,
+            "885": 302618112.0,
+            "886": 302618112.0,
+            "887": 302618112.0,
+            "888": 302618112.0,
+            "889": 302618112.0,
+            "890": 302618112.0,
+            "891": 302618112.0,
+            "892": 302618112.0,
+            "893": 302618112.0,
+            "894": 302618112.0,
+            "895": 302618112.0,
+            "896": 302618112.0,
+            "897": 302618112.0,
+            "898": 302618112.0,
+            "899": 302618112.0,
+            "900": 302618112.0,
+            "901": 302618112.0,
+            "902": 302618112.0,
+            "903": 302618112.0,
+            "904": 302618112.0,
+            "905": 302618112.0,
+            "906": 302618112.0,
+            "907": 302618112.0,
+            "908": 302618112.0,
+            "909": 302618112.0,
+            "910": 302618112.0,
+            "911": 302618112.0,
+            "912": 302618112.0,
+            "913": 302618112.0,
+            "914": 302618112.0,
+            "915": 302618112.0,
+            "916": 302618112.0,
+            "917": 302618112.0,
+            "918": 302618112.0,
+            "919": 302618112.0,
+            "920": 302618112.0,
+            "921": 302618112.0,
+            "922": 302618112.0,
+            "923": 302618112.0,
+            "924": 302618112.0,
+            "925": 302618112.0,
+            "926": 302618112.0,
+            "927": 302618112.0,
+            "928": 302618112.0,
+            "929": 302618112.0,
+            "930": 302618112.0,
+            "931": 302618112.0,
+            "932": 302618112.0,
+            "933": 302618112.0,
+            "934": 302618112.0,
+            "935": 302618112.0,
+            "936": 302618112.0,
+            "937": 302618112.0,
+            "938": 302618112.0,
+            "939": 302618112.0,
+            "940": 302618112.0,
+            "941": 302618112.0,
+            "942": 302618112.0,
+            "943": 302618112.0,
+            "944": 302618112.0,
+            "945": 302618112.0,
+            "946": 302618112.0,
+            "947": 302618112.0,
+            "948": 302618112.0,
+            "949": 302618112.0,
+            "950": 302618112.0,
+            "951": 302618112.0,
+            "952": 302618112.0,
+            "953": 302618112.0,
+            "954": 302618112.0,
+            "955": 302618112.0,
+            "956": 302618112.0,
+            "957": 302618112.0,
+            "958": 302618112.0,
+            "959": 302618112.0,
+            "960": 302618112.0,
+            "961": 302618112.0,
+            "962": 302618112.0,
+            "963": 302618112.0,
+            "964": 302618112.0,
+            "965": 302618112.0,
+            "966": 302618112.0,
+            "967": 302618112.0,
+            "968": 302618112.0,
+            "969": 302618112.0,
+            "970": 302618112.0,
+            "971": 302618112.0,
+            "972": 302618112.0,
+            "973": 302618112.0,
+            "974": 302618112.0,
+            "975": 302618112.0,
+            "976": 302618112.0,
+            "977": 302618112.0,
+            "978": 302618112.0,
+            "979": 302618112.0,
+            "980": 302618112.0,
+            "981": 302618112.0,
+            "982": 302618112.0,
+            "983": 302618112.0,
+            "984": 302618112.0,
+            "985": 302618112.0,
+            "986": 302618112.0,
+            "987": 302618112.0,
+            "988": 302618112.0,
+            "989": 302618112.0,
+            "990": 302618112.0,
+            "991": 302618112.0,
+            "992": 302618112.0,
+            "993": 302618112.0,
+            "994": 302618112.0,
+            "995": 302618112.0,
+            "996": 302618112.0,
+            "997": 302618112.0,
+            "998": 302618112.0,
+            "999": 302618112.0,
+            "1000": 302618112.0,
+            "1001": 302618112.0,
+            "1002": 302618112.0,
+            "1003": 302618112.0,
+            "1004": 302618112.0,
+            "1005": 302618112.0,
+            "1006": 302618112.0,
+            "1007": 302618112.0,
+            "1008": 302618112.0,
+            "1009": 302618112.0,
+            "1010": 302618112.0,
+            "1011": 302618112.0,
+            "1012": 302618112.0,
+            "1013": 302618112.0,
+            "1014": 302618112.0,
+            "1015": 302618112.0,
+            "1016": 302618112.0,
+            "1017": 302618112.0,
+            "1018": 302618112.0,
+            "1019": 302618112.0,
+            "1020": 302618112.0,
+            "1021": 302618112.0,
+            "1022": 302618112.0,
+            "1023": 302618112.0,
+            "1024": 302618112.0,
+            "1025": 302618112.0,
+            "1026": 302618112.0,
+            "1027": 302618112.0,
+            "1028": 302618112.0,
+            "1029": 302618112.0,
+            "1030": 302618112.0,
+            "1031": 302618112.0,
+            "1032": 302618112.0,
+            "1033": 302618112.0,
+            "1034": 302618112.0,
+            "1035": 302618112.0,
+            "1036": 302618112.0,
+            "1037": 302618112.0,
+            "1038": 302618112.0,
+            "1039": 302618112.0,
+            "1040": 302618112.0,
+            "1041": 302618112.0,
+            "1042": 302618112.0,
+            "1043": 302618112.0,
+            "1044": 302618112.0,
+            "1045": 302618112.0,
+            "1046": 302618112.0,
+            "1047": 302618112.0,
+            "1048": 302618112.0,
+            "1049": 302618112.0,
+            "1050": 302618112.0,
+            "1051": 302618112.0,
+            "1052": 302618112.0,
+            "1053": 302618112.0,
+            "1054": 302618112.0,
+            "1055": 302618112.0,
+            "1056": 302618112.0,
+            "1057": 302618112.0,
+            "1058": 302618112.0,
+            "1059": 302618112.0,
+            "1060": 302618112.0,
+            "1061": 302618112.0,
+            "1062": 302618112.0,
+            "1063": 302618112.0,
+            "1064": 302618112.0,
+            "1065": 302618112.0,
+            "1066": 302618112.0,
+            "1067": 302618112.0,
+            "1068": 302618112.0,
+            "1069": 302618112.0,
+            "1070": 302618112.0,
+            "1071": 302618112.0,
+            "1072": 302618112.0,
+            "1073": 302618112.0,
+            "1074": 302618112.0,
+            "1075": 302618112.0,
+            "1076": 302618112.0,
+            "1077": 302618112.0,
+            "1078": 302618112.0,
+            "1079": 302618112.0,
+            "1080": 302618112.0,
+            "1081": 302618112.0,
+            "1082": 302618112.0,
+            "1083": 302618112.0,
+            "1084": 302618112.0,
+            "1085": 302618112.0,
+            "1086": 302618112.0,
+            "1087": 302618112.0,
+            "1088": 302618112.0,
+            "1089": 302618112.0,
+            "1090": 302618112.0,
+            "1091": 302618112.0,
+            "1092": 302618112.0,
+            "1093": 302618112.0,
+            "1094": 302618112.0,
+            "1095": 302618112.0,
+            "1096": 302618112.0,
+            "1097": 302618112.0,
+            "1098": 302618112.0,
+            "1099": 302618112.0,
+            "1100": 302618112.0,
+            "1101": 302618112.0,
+            "1102": 302618112.0,
+            "1103": 302618112.0,
+            "1104": 302618112.0,
+            "1105": 302618112.0,
+            "1106": 302618112.0,
+            "1107": 302618112.0,
+            "1108": 302618112.0,
+            "1109": 302618112.0,
+            "1110": 302618112.0,
+            "1111": 302618112.0,
+            "1112": 302618112.0,
+            "1113": 302618112.0,
+            "1114": 302618112.0,
+            "1115": 302618112.0,
+            "1116": 302618112.0,
+            "1117": 302618112.0,
+            "1118": 302618112.0,
+            "1119": 302618112.0,
+            "1120": 302618112.0,
+            "1121": 302618112.0,
+            "1122": 302618112.0,
+            "1123": 302618112.0,
+            "1124": 302618112.0,
+            "1125": 302618112.0,
+            "1126": 302618112.0,
+            "1127": 302618112.0,
+            "1128": 302618112.0,
+            "1129": 302618112.0,
+            "1130": 302618112.0,
+            "1131": 302618112.0,
+            "1132": 302618112.0,
+            "1133": 302618112.0,
+            "1134": 302618112.0,
+            "1135": 302618112.0,
+            "1136": 302618112.0,
+            "1137": 302618112.0,
+            "1138": 302618112.0,
+            "1139": 302618112.0,
+            "1140": 302618112.0,
+            "1141": 302618112.0,
+            "1142": 302618112.0,
+            "1143": 302618112.0,
+            "1144": 302618112.0,
+            "1145": 302618112.0,
+            "1146": 302618112.0,
+            "1147": 302618112.0,
+            "1148": 302618112.0,
+            "1149": 302618112.0,
+            "1150": 302618112.0,
+            "1151": 302618112.0,
+            "1152": 302618112.0,
+            "1153": 302618112.0,
+            "1154": 302618112.0,
+            "1155": 302618112.0,
+            "1156": 302618112.0,
+            "1157": 302618112.0,
+            "1158": 302618112.0,
+            "1159": 302618112.0,
+            "1160": 302618112.0,
+            "1161": 302618112.0,
+            "1162": 302618112.0,
+            "1163": 302618112.0,
+            "1164": 302618112.0,
+            "1165": 302618112.0,
+            "1166": 302618112.0,
+            "1167": 302618112.0,
+            "1168": 302618112.0,
+            "1169": 302618112.0,
+            "1170": 302618112.0,
+            "1171": 302618112.0,
+            "1172": 302618112.0,
+            "1173": 302618112.0,
+            "1174": 302618112.0,
+            "1175": 302618112.0,
+            "1176": 302618112.0,
+            "1177": 302618112.0,
+            "1178": 302618112.0,
+            "1179": 302618112.0,
+            "1180": 302618112.0,
+            "1181": 302618112.0,
+            "1182": 302618112.0,
+            "1183": 302618112.0,
+            "1184": 302618112.0,
+            "1185": 302618112.0,
+            "1186": 302618112.0,
+            "1187": 302618112.0,
+            "1188": 302618112.0,
+            "1189": 302618112.0,
+            "1190": 302618112.0,
+            "1191": 302618112.0,
+            "1192": 302618112.0,
+            "1193": 302618112.0,
+            "1194": 302618112.0,
+            "1195": 302618112.0,
+            "1196": 302618112.0,
+            "1197": 302618112.0,
+            "1198": 302618112.0,
+            "1199": 302618112.0,
+            "1200": 302618112.0,
+            "1201": 302618112.0,
+            "1202": 302618112.0,
+            "1203": 302618112.0,
+            "1204": 302618112.0,
+            "1205": 302618112.0,
+            "1206": 302618112.0,
+            "1207": 302618112.0,
+            "1208": 302618112.0,
+            "1209": 302618112.0,
+            "1210": 302618112.0,
+            "1211": 302618112.0,
+            "1212": 302618112.0,
+            "1213": 302618112.0,
+            "1214": 302618112.0,
+            "1215": 302618112.0,
+            "1216": 302618112.0,
+            "1217": 302618112.0,
+            "1218": 302618112.0,
+            "1219": 302618112.0,
+            "1220": 302618112.0,
+            "1221": 302618112.0,
+            "1222": 302618112.0,
+            "1223": 302618112.0,
+            "1224": 302618112.0,
+            "1225": 302618112.0,
+            "1226": 302618112.0,
+            "1227": 302618112.0,
+            "1228": 302618112.0,
+            "1229": 302618112.0,
+            "1230": 302618112.0,
+            "1231": 302618112.0,
+            "1232": 302618112.0,
+            "1233": 302618112.0,
+            "1234": 302618112.0,
+            "1235": 302618112.0,
+            "1236": 302618112.0,
+            "1237": 302618112.0,
+            "1238": 302618112.0,
+            "1239": 302618112.0,
+            "1240": 302618112.0,
+            "1241": 302618112.0,
+            "1242": 302618112.0,
+            "1243": 302618112.0,
+            "1244": 302618112.0,
+            "1245": 302618112.0,
+            "1246": 302618112.0,
+            "1247": 302618112.0,
+            "1248": 302618112.0,
+            "1249": 302618112.0,
+            "1250": 302618112.0,
+            "1251": 302618112.0,
+            "1252": 302618112.0,
+            "1253": 302618112.0,
+            "1254": 302618112.0,
+            "1255": 302618112.0,
+            "1256": 302618112.0,
+            "1257": 302618112.0,
+            "1258": 302618112.0,
+            "1259": 302618112.0,
+            "1260": 302618112.0,
+            "1261": 302618112.0,
+            "1262": 302618112.0,
+            "1263": 302618112.0,
+            "1264": 302618112.0,
+            "1265": 302618112.0,
+            "1266": 302618112.0,
+            "1267": 302618112.0,
+            "1268": 302618112.0,
+            "1269": 302618112.0,
+            "1270": 302618112.0,
+            "1271": 302618112.0,
+            "1272": 302618112.0,
+            "1273": 302618112.0,
+            "1274": 302618112.0,
+            "1275": 302618112.0,
+            "1276": 302618112.0,
+            "1277": 302618112.0,
+            "1278": 302618112.0,
+            "1279": 302618112.0,
+            "1280": 302618112.0,
+            "1281": 302618112.0,
+            "1282": 302618112.0,
+            "1283": 302618112.0,
+            "1284": 302618112.0,
+            "1285": 302618112.0,
+            "1286": 302618112.0,
+            "1287": 302618112.0,
+            "1288": 302618112.0,
+            "1289": 302618112.0,
+            "1290": 302618112.0,
+            "1291": 302618112.0,
+            "1292": 302618112.0,
+            "1293": 302618112.0,
+            "1294": 302618112.0,
+            "1295": 302618112.0,
+            "1296": 302618112.0,
+            "1297": 302618112.0,
+            "1298": 302618112.0,
+            "1299": 302618112.0,
+            "1300": 302618112.0,
+            "1301": 302618112.0,
+            "1302": 302618112.0,
+            "1303": 302618112.0,
+            "1304": 302618112.0,
+            "1305": 302618112.0,
+            "1306": 302618112.0,
+            "1307": 302618112.0,
+            "1308": 302618112.0,
+            "1309": 302618112.0,
+            "1310": 302618112.0,
+            "1311": 302618112.0,
+            "1312": 302618112.0,
+            "1313": 302618112.0,
+            "1314": 302618112.0,
+            "1315": 302618112.0,
+            "1316": 302618112.0,
+            "1317": 302618112.0,
+            "1318": 302618112.0,
+            "1319": 302618112.0,
+            "1320": 302618112.0,
+            "1321": 302618112.0,
+            "1322": 302618112.0,
+            "1323": 302618112.0,
+            "1324": 302618112.0,
+            "1325": 302618112.0,
+            "1326": 302618112.0,
+            "1327": 302618112.0,
+            "1328": 302618112.0,
+            "1329": 302618112.0,
+            "1330": 302618112.0,
+            "1331": 302618112.0,
+            "1332": 302618112.0,
+            "1333": 302618112.0,
+            "1334": 302618112.0,
+            "1335": 302618112.0,
+            "1336": 302618112.0,
+            "1337": 302618112.0,
+            "1338": 302618112.0,
+            "1339": 302618112.0,
+            "1340": 302618112.0,
+            "1341": 302618112.0,
+            "1342": 302618112.0,
+            "1343": 302618112.0,
+            "1344": 302618112.0,
+            "1345": 302618112.0,
+            "1346": 302618112.0,
+            "1347": 302618112.0,
+            "1348": 302618112.0,
+            "1349": 302618112.0,
+            "1350": 302618112.0,
+            "1351": 302618112.0,
+            "1352": 302618112.0,
+            "1353": 302618112.0,
+            "1354": 302618112.0,
+            "1355": 302618112.0,
+            "1356": 302618112.0,
+            "1357": 302618112.0,
+            "1358": 302618112.0,
+            "1359": 302618112.0,
+            "1360": 302618112.0,
+            "1361": 302618112.0,
+            "1362": 302618112.0,
+            "1363": 302618112.0,
+            "1364": 302618112.0,
+            "1365": 302618112.0,
+            "1366": 302618112.0,
+            "1367": 302618112.0,
+            "1368": 302618112.0,
+            "1369": 302618112.0,
+            "1370": 302618112.0,
+            "1371": 302618112.0,
+            "1372": 302618112.0,
+            "1373": 302618112.0,
+            "1374": 302618112.0,
+            "1375": 302618112.0,
+            "1376": 302618112.0,
+            "1377": 302618112.0,
+            "1378": 302618112.0,
+            "1379": 302618112.0,
+            "1380": 302618112.0,
+            "1381": 302618112.0,
+            "1382": 302618112.0,
+            "1383": 302618112.0,
+            "1384": 302618112.0,
+            "1385": 302618112.0,
+            "1386": 302618112.0,
+            "1387": 302618112.0,
+            "1388": 302618112.0,
+            "1389": 302618112.0,
+            "1390": 302618112.0,
+            "1391": 302618112.0,
+            "1392": 302618112.0,
+            "1393": 302618112.0,
+            "1394": 302618112.0,
+            "1395": 302618112.0,
+            "1396": 302618112.0,
+            "1397": 302618112.0,
+            "1398": 302618112.0,
+            "1399": 302618112.0,
+            "1400": 302618112.0,
+            "1401": 302618112.0,
+            "1402": 302618112.0,
+            "1403": 302618112.0,
+            "1404": 302618112.0,
+            "1405": 302618112.0,
+            "1406": 302618112.0,
+            "1407": 302618112.0,
+            "1408": 302618112.0,
+            "1409": 302618112.0,
+            "1410": 302618112.0,
+            "1411": 302618112.0,
+            "1412": 302618112.0,
+            "1413": 302618112.0,
+            "1414": 302618112.0,
+            "1415": 302618112.0,
+            "1416": 302618112.0,
+            "1417": 302618112.0,
+            "1418": 302618112.0,
+            "1419": 302618112.0,
+            "1420": 302618112.0,
+            "1421": 302618112.0,
+            "1422": 302618112.0,
+            "1423": 302618112.0,
+            "1424": 302618112.0,
+            "1425": 302618112.0,
+            "1426": 302618112.0,
+            "1427": 302618112.0,
+            "1428": 302618112.0,
+            "1429": 302618112.0,
+            "1430": 302618112.0,
+            "1431": 302618112.0,
+            "1432": 302618112.0,
+            "1433": 302618112.0,
+            "1434": 302618112.0,
+            "1435": 302618112.0,
+            "1436": 302618112.0,
+            "1437": 302618112.0,
+            "1438": 302618112.0,
+            "1439": 302618112.0,
+            "1440": 302618112.0,
+            "1441": 302618112.0,
+            "1442": 302618112.0,
+            "1443": 302618112.0,
+            "1444": 302618112.0,
+            "1445": 302618112.0,
+            "1446": 302618112.0,
+            "1447": 302618112.0,
+            "1448": 302618112.0,
+            "1449": 302618112.0,
+            "1450": 302618112.0,
+            "1451": 302618112.0,
+            "1452": 302618112.0,
+            "1453": 302618112.0,
+            "1454": 302618112.0,
+            "1455": 302618112.0,
+            "1456": 302618112.0,
+            "1457": 302618112.0,
+            "1458": 302618112.0,
+            "1459": 302618112.0,
+            "1460": 302618112.0,
+            "1461": 302618112.0,
+            "1462": 302618112.0,
+            "1463": 302618112.0,
+            "1464": 302618112.0,
+            "1465": 302618112.0,
+            "1466": 302618112.0,
+            "1467": 302618112.0,
+            "1468": 302618112.0,
+            "1469": 302618112.0,
+            "1470": 302618112.0,
+            "1471": 302618112.0,
+            "1472": 302618112.0,
+            "1473": 302618112.0,
+            "1474": 302618112.0,
+            "1475": 302618112.0,
+            "1476": 302618112.0,
+            "1477": 302618112.0,
+            "1478": 302618112.0,
+            "1479": 302618112.0,
+            "1480": 302618112.0,
+            "1481": 302618112.0,
+            "1482": 302618112.0,
+            "1483": 302618112.0,
+            "1484": 302618112.0,
+            "1485": 302618112.0,
+            "1486": 302618112.0,
+            "1487": 302618112.0,
+            "1488": 302618112.0,
+            "1489": 302618112.0,
+            "1490": 302618112.0,
+            "1491": 302618112.0,
+            "1492": 302618112.0,
+            "1493": 302618112.0,
+            "1494": 302618112.0,
+            "1495": 302618112.0,
+            "1496": 302618112.0,
+            "1497": 302618112.0,
+            "1498": 302618112.0,
+            "1499": 302618112.0,
+            "1500": 302618112.0,
+            "1501": 302618112.0,
+            "1502": 302618112.0,
+            "1503": 302618112.0,
+            "1504": 302618112.0,
+            "1505": 302618112.0,
+            "1506": 302618112.0,
+            "1507": 302618112.0,
+            "1508": 302618112.0,
+            "1509": 302618112.0,
+            "1510": 302618112.0,
+            "1511": 302618112.0,
+            "1512": 302618112.0,
+            "1513": 302618112.0,
+            "1514": 302618112.0,
+            "1515": 302618112.0,
+            "1516": 302618112.0,
+            "1517": 302618112.0,
+            "1518": 302618112.0,
+            "1519": 302618112.0,
+            "1520": 302618112.0,
+            "1521": 302618112.0,
+            "1522": 302618112.0,
+            "1523": 302618112.0,
+            "1524": 302618112.0,
+            "1525": 302618112.0,
+            "1526": 302618112.0,
+            "1527": 302618112.0,
+            "1528": 302618112.0,
+            "1529": 302618112.0,
+            "1530": 302618112.0,
+            "1531": 302618112.0,
+            "1532": 302618112.0,
+            "1533": 302618112.0,
+            "1534": 302618112.0,
+            "1535": 302618112.0,
+            "1536": 302618112.0,
+            "1537": 302618112.0,
+            "1538": 302618112.0,
+            "1539": 302618112.0,
+            "1540": 302618112.0,
+            "1541": 302618112.0,
+            "1542": 302618112.0,
+            "1543": 302618112.0,
+            "1544": 302618112.0,
+            "1545": 302618112.0,
+            "1546": 302618112.0,
+            "1547": 302618112.0,
+            "1548": 302618112.0,
+            "1549": 302618112.0,
+            "1550": 302618112.0,
+            "1551": 302618112.0,
+            "1552": 302618112.0,
+            "1553": 302618112.0,
+            "1554": 302618112.0,
+            "1555": 302618112.0,
+            "1556": 302618112.0,
+            "1557": 302618112.0,
+            "1558": 302618112.0,
+            "1559": 302618112.0,
+            "1560": 302618112.0,
+            "1561": 302618112.0,
+            "1562": 302618112.0,
+            "1563": 302618112.0,
+            "1564": 302618112.0,
+            "1565": 302618112.0,
+            "1566": 302618112.0,
+            "1567": 302618112.0,
+            "1568": 302618112.0,
+            "1569": 302618112.0,
+            "1570": 302618112.0,
+            "1571": 302618112.0,
+            "1572": 302618112.0,
+            "1573": 302618112.0,
+            "1574": 302618112.0,
+            "1575": 302618112.0,
+            "1576": 302618112.0,
+            "1577": 302618112.0,
+            "1578": 302618112.0,
+            "1579": 302618112.0,
+            "1580": 302618112.0,
+            "1581": 302618112.0,
+            "1582": 302618112.0,
+            "1583": 302618112.0,
+            "1584": 302618112.0,
+            "1585": 302618112.0,
+            "1586": 302618112.0,
+            "1587": 302618112.0,
+            "1588": 302618112.0,
+            "1589": 302618112.0,
+            "1590": 302618112.0,
+            "1591": 302618112.0,
+            "1592": 302618112.0,
+            "1593": 302618112.0,
+            "1594": 302618112.0,
+            "1595": 302618112.0,
+            "1596": 302618112.0,
+            "1597": 302618112.0,
+            "1598": 302618112.0,
+            "1599": 302618112.0,
+            "1600": 302618112.0,
+            "1601": 302618112.0,
+            "1602": 302618112.0,
+            "1603": 302618112.0,
+            "1604": 302618112.0,
+            "1605": 302618112.0,
+            "1606": 302618112.0,
+            "1607": 302618112.0,
+            "1608": 302618112.0,
+            "1609": 302618112.0,
+            "1610": 302618112.0,
+            "1611": 302618112.0,
+            "1612": 302618112.0,
+            "1613": 302618112.0,
+            "1614": 302618112.0,
+            "1615": 302618112.0,
+            "1616": 302618112.0,
+            "1617": 302618112.0,
+            "1618": 302618112.0,
+            "1619": 302618112.0,
+            "1620": 302618112.0,
+            "1621": 302618112.0,
+            "1622": 302618112.0,
+            "1623": 302618112.0,
+            "1624": 302618112.0,
+            "1625": 302618112.0,
+            "1626": 302618112.0,
+            "1627": 302618112.0,
+            "1628": 302618112.0,
+            "1629": 302618112.0,
+            "1630": 302618112.0,
+            "1631": 302618112.0,
+            "1632": 302618112.0,
+            "1633": 302618112.0,
+            "1634": 302618112.0,
+            "1635": 302618112.0,
+            "1636": 302618112.0,
+            "1637": 302618112.0,
+            "1638": 302618112.0,
+            "1639": 302618112.0,
+            "1640": 302618112.0,
+            "1641": 302618112.0,
+            "1642": 302618112.0,
+            "1643": 302618112.0,
+            "1644": 302618112.0,
+            "1645": 302618112.0,
+            "1646": 302618112.0,
+            "1647": 302618112.0,
+            "1648": 302618112.0,
+            "1649": 302618112.0,
+            "1650": 302618112.0,
+            "1651": 302618112.0,
+            "1652": 302618112.0,
+            "1653": 302618112.0,
+            "1654": 302618112.0,
+            "1655": 302618112.0,
+            "1656": 302618112.0,
+            "1657": 302618112.0,
+            "1658": 302618112.0,
+            "1659": 302618112.0,
+            "1660": 302618112.0,
+            "1661": 302618112.0,
+            "1662": 302618112.0,
+            "1663": 302618112.0,
+            "1664": 302618112.0,
+            "1665": 302618112.0,
+            "1666": 302618112.0,
+            "1667": 302618112.0,
+            "1668": 302618112.0,
+            "1669": 302618112.0,
+            "1670": 302618112.0,
+            "1671": 302618112.0,
+            "1672": 302618112.0,
+            "1673": 302618112.0,
+            "1674": 302618112.0,
+            "1675": 302618112.0,
+            "1676": 302618112.0,
+            "1677": 302618112.0,
+            "1678": 302618112.0,
+            "1679": 302618112.0,
+            "1680": 302618112.0,
+            "1681": 302618112.0,
+            "1682": 302618112.0,
+            "1683": 302618112.0,
+            "1684": 302618112.0,
+            "1685": 302618112.0,
+            "1686": 302618112.0,
+            "1687": 302618112.0,
+            "1688": 302618112.0,
+            "1689": 302618112.0,
+            "1690": 302618112.0,
+            "1691": 302618112.0,
+            "1692": 302618112.0,
+            "1693": 302618112.0,
+            "1694": 302618112.0,
+            "1695": 302618112.0,
+            "1696": 302618112.0,
+            "1697": 302618112.0,
+            "1698": 302618112.0,
+            "1699": 302618112.0,
+            "1700": 302618112.0,
+            "1701": 302618112.0,
+            "1702": 302618112.0,
+            "1703": 302618112.0,
+            "1704": 302618112.0,
+            "1705": 302618112.0,
+            "1706": 302618112.0,
+            "1707": 302618112.0,
+            "1708": 302618112.0,
+            "1709": 302618112.0,
+            "1710": 302618112.0,
+            "1711": 302618112.0,
+            "1712": 302618112.0,
+            "1713": 302618112.0,
+            "1714": 302618112.0,
+            "1715": 302618112.0,
+            "1716": 302618112.0,
+            "1717": 302618112.0,
+            "1718": 302618112.0,
+            "1719": 302618112.0,
+            "1720": 302618112.0,
+            "1721": 302618112.0,
+            "1722": 302618112.0,
+            "1723": 302618112.0,
+            "1724": 302618112.0,
+            "1725": 302618112.0,
+            "1726": 302618112.0,
+            "1727": 302618112.0,
+            "1728": 302618112.0,
+            "1729": 302618112.0,
+            "1730": 302618112.0,
+            "1731": 302618112.0,
+            "1732": 302618112.0,
+            "1733": 302618112.0,
+            "1734": 302618112.0,
+            "1735": 302618112.0,
+            "1736": 302618112.0,
+            "1737": 302618112.0,
+            "1738": 302618112.0,
+            "1739": 302618112.0,
+            "1740": 302618112.0,
+            "1741": 302618112.0,
+            "1742": 302618112.0,
+            "1743": 302618112.0,
+            "1744": 302618112.0,
+            "1745": 302618112.0,
+            "1746": 302618112.0,
+            "1747": 302618112.0,
+            "1748": 302618112.0,
+            "1749": 302618112.0,
+            "1750": 302618112.0,
+            "1751": 302618112.0,
+            "1752": 302618112.0,
+            "1753": 302618112.0,
+            "1754": 302618112.0,
+            "1755": 302618112.0,
+            "1756": 302618112.0,
+            "1757": 302618112.0,
+            "1758": 302618112.0,
+            "1759": 302618112.0,
+            "1760": 302618112.0,
+            "1761": 302618112.0,
+            "1762": 302618112.0,
+            "1763": 302618112.0,
+            "1764": 302618112.0,
+            "1765": 302618112.0,
+            "1766": 302618112.0,
+            "1767": 302618112.0,
+            "1768": 302618112.0,
+            "1769": 302618112.0,
+            "1770": 302618112.0,
+            "1771": 302618112.0,
+            "1772": 302618112.0,
+            "1773": 302618112.0,
+            "1774": 302618112.0,
+            "1775": 302618112.0,
+            "1776": 302618112.0,
+            "1777": 302618112.0,
+            "1778": 302618112.0,
+            "1779": 302618112.0,
+            "1780": 302618112.0,
+            "1781": 302618112.0,
+            "1782": 302618112.0,
+            "1783": 302618112.0,
+            "1784": 302618112.0,
+            "1785": 302618112.0,
+            "1786": 302618112.0,
+            "1787": 302618112.0,
+            "1788": 302618112.0,
+            "1789": 302618112.0,
+            "1790": 302618112.0,
+            "1791": 302618112.0,
+            "1792": 302618112.0,
+            "1793": 302618112.0,
+            "1794": 302618112.0,
+            "1795": 302618112.0,
+            "1796": 302618112.0,
+            "1797": 302618112.0,
+            "1798": 302618112.0,
+            "1799": 302618112.0,
+            "1800": 302618112.0,
+            "1801": 302618112.0,
+            "1802": 302618112.0,
+            "1803": 302618112.0,
+            "1804": 302618112.0,
+            "1805": 302618112.0,
+            "1806": 302618112.0,
+            "1807": 302618112.0,
+            "1808": 302618112.0,
+            "1809": 302618112.0,
+            "1810": 302618112.0,
+            "1811": 302618112.0,
+            "1812": 302618112.0,
+            "1813": 302618112.0,
+            "1814": 302618112.0,
+            "1815": 302618112.0,
+            "1816": 302618112.0,
+            "1817": 302618112.0,
+            "1818": 302618112.0,
+            "1819": 302618112.0,
+            "1820": 302618112.0,
+            "1821": 302618112.0,
+            "1822": 302618112.0,
+            "1823": 302618112.0,
+            "1824": 302618112.0,
+            "1825": 302618112.0,
+            "1826": 302618112.0,
+            "1827": 302618112.0,
+            "1828": 302618112.0,
+            "1829": 302618112.0,
+            "1830": 302618112.0,
+            "1831": 302618112.0,
+            "1832": 302618112.0,
+            "1833": 302618112.0,
+            "1834": 302618112.0,
+            "1835": 302618112.0,
+            "1836": 302618112.0,
+            "1837": 302618112.0,
+            "1838": 302618112.0,
+            "1839": 302618112.0,
+            "1840": 302618112.0,
+            "1841": 302618112.0,
+            "1842": 302618112.0,
+            "1843": 302618112.0,
+            "1844": 302618112.0,
+            "1845": 302618112.0,
+            "1846": 302618112.0,
+            "1847": 302618112.0,
+            "1848": 302618112.0,
+            "1849": 302618112.0,
+            "1850": 302618112.0,
+            "1851": 302618112.0,
+            "1852": 302618112.0,
+            "1853": 302618112.0,
+            "1854": 302618112.0,
+            "1855": 302618112.0,
+            "1856": 302618112.0,
+            "1857": 302618112.0,
+            "1858": 302618112.0,
+            "1859": 302618112.0,
+            "1860": 302618112.0,
+            "1861": 302618112.0,
+            "1862": 302618112.0,
+            "1863": 302618112.0,
+            "1864": 302618112.0,
+            "1865": 302618112.0,
+            "1866": 302618112.0,
+            "1867": 302618112.0,
+            "1868": 302618112.0,
+            "1869": 302618112.0,
+            "1870": 302618112.0,
+            "1871": 302618112.0,
+            "1872": 302618112.0,
+            "1873": 302618112.0,
+            "1874": 302618112.0,
+            "1875": 302618112.0,
+            "1876": 302618112.0,
+            "1877": 302618112.0,
+            "1878": 302618112.0,
+            "1879": 302618112.0,
+            "1880": 302618112.0,
+            "1881": 302618112.0,
+            "1882": 302618112.0,
+            "1883": 302618112.0,
+            "1884": 302618112.0,
+            "1885": 302618112.0,
+            "1886": 302618112.0,
+            "1887": 302618112.0,
+            "1888": 302618112.0,
+            "1889": 302618112.0,
+            "1890": 302618112.0,
+            "1891": 302618112.0,
+            "1892": 302618112.0,
+            "1893": 302618112.0,
+            "1894": 302618112.0,
+            "1895": 302618112.0,
+            "1896": 302618112.0,
+            "1897": 302618112.0,
+            "1898": 302618112.0,
+            "1899": 302618112.0,
+            "1900": 302618112.0,
+            "1901": 302618112.0,
+            "1902": 302618112.0,
+            "1903": 302618112.0,
+            "1904": 302618112.0,
+            "1905": 302618112.0,
+            "1906": 302618112.0,
+            "1907": 302618112.0,
+            "1908": 302618112.0,
+            "1909": 302618112.0,
+            "1910": 302618112.0,
+            "1911": 302618112.0,
+            "1912": 302618112.0,
+            "1913": 302618112.0,
+            "1914": 302618112.0,
+            "1915": 302618112.0,
+            "1916": 302618112.0,
+            "1917": 302618112.0,
+            "1918": 302618112.0,
+            "1919": 302618112.0,
+            "1920": 302618112.0,
+            "1921": 302618112.0,
+            "1922": 302618112.0,
+            "1923": 302618112.0,
+            "1924": 302618112.0,
+            "1925": 302618112.0,
+            "1926": 302618112.0,
+            "1927": 302618112.0,
+            "1928": 302618112.0,
+            "1929": 302618112.0,
+            "1930": 302618112.0,
+            "1931": 302618112.0,
+            "1932": 302618112.0,
+            "1933": 302618112.0,
+            "1934": 302618112.0,
+            "1935": 302618112.0,
+            "1936": 302618112.0,
+            "1937": 302618112.0,
+            "1938": 302618112.0,
+            "1939": 302618112.0,
+            "1940": 302618112.0,
+            "1941": 302618112.0,
+            "1942": 302618112.0,
+            "1943": 302618112.0,
+            "1944": 302618112.0,
+            "1945": 302618112.0,
+            "1946": 302618112.0,
+            "1947": 302618112.0,
+            "1948": 302618112.0,
+            "1949": 302618112.0,
+            "1950": 302618112.0,
+            "1951": 302618112.0,
+            "1952": 302618112.0,
+            "1953": 302618112.0,
+            "1954": 302618112.0,
+            "1955": 302618112.0,
+            "1956": 302618112.0,
+            "1957": 302618112.0,
+            "1958": 302618112.0,
+            "1959": 302618112.0,
+            "1960": 302618112.0,
+            "1961": 302618112.0,
+            "1962": 302618112.0,
+            "1963": 302618112.0,
+            "1964": 302618112.0,
+            "1965": 302618112.0,
+            "1966": 302618112.0,
+            "1967": 302618112.0,
+            "1968": 302618112.0,
+            "1969": 302618112.0,
+            "1970": 302618112.0,
+            "1971": 302618112.0,
+            "1972": 302618112.0,
+            "1973": 302618112.0,
+            "1974": 302618112.0,
+            "1975": 302618112.0,
+            "1976": 302618112.0,
+            "1977": 302618112.0,
+            "1978": 302618112.0,
+            "1979": 302618112.0,
+            "1980": 302618112.0,
+            "1981": 302618112.0,
+            "1982": 302618112.0,
+            "1983": 302618112.0,
+            "1984": 302618112.0,
+            "1985": 302618112.0,
+            "1986": 302618112.0,
+            "1987": 302618112.0,
+            "1988": 302618112.0,
+            "1989": 302618112.0,
+            "1990": 302618112.0,
+            "1991": 302618112.0,
+            "1992": 302618112.0,
+            "1993": 302618112.0,
+            "1994": 302618112.0,
+            "1995": 302618112.0,
+            "1996": 302618112.0,
+            "1997": 302618112.0,
+            "1998": 302618112.0,
+            "1999": 302618112.0,
+            "2000": 302618112.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 2000,
+        "step_interval": 1,
+        "values": {
+            "1": 362060288.0,
+            "2": 428612096.0,
+            "3": 428612096.0,
+            "4": 428612096.0,
+            "5": 428612096.0,
+            "6": 428612096.0,
+            "7": 428612096.0,
+            "8": 428612096.0,
+            "9": 428612096.0,
+            "10": 428612096.0,
+            "11": 428612096.0,
+            "12": 428612096.0,
+            "13": 428612096.0,
+            "14": 428612096.0,
+            "15": 428612096.0,
+            "16": 428612096.0,
+            "17": 428612096.0,
+            "18": 428612096.0,
+            "19": 428612096.0,
+            "20": 428612096.0,
+            "21": 428612096.0,
+            "22": 428612096.0,
+            "23": 428612096.0,
+            "24": 428612096.0,
+            "25": 428612096.0,
+            "26": 428612096.0,
+            "27": 428612096.0,
+            "28": 428612096.0,
+            "29": 428612096.0,
+            "30": 428612096.0,
+            "31": 428612096.0,
+            "32": 428612096.0,
+            "33": 428612096.0,
+            "34": 428612096.0,
+            "35": 428612096.0,
+            "36": 428612096.0,
+            "37": 428612096.0,
+            "38": 428612096.0,
+            "39": 428612096.0,
+            "40": 428612096.0,
+            "41": 428612096.0,
+            "42": 428612096.0,
+            "43": 428612096.0,
+            "44": 428612096.0,
+            "45": 428612096.0,
+            "46": 428612096.0,
+            "47": 428612096.0,
+            "48": 428612096.0,
+            "49": 428612096.0,
+            "50": 428612096.0,
+            "51": 428612096.0,
+            "52": 428612096.0,
+            "53": 428612096.0,
+            "54": 428612096.0,
+            "55": 428612096.0,
+            "56": 428612096.0,
+            "57": 428612096.0,
+            "58": 428612096.0,
+            "59": 428612096.0,
+            "60": 428612096.0,
+            "61": 428612096.0,
+            "62": 428612096.0,
+            "63": 428612096.0,
+            "64": 428612096.0,
+            "65": 428612096.0,
+            "66": 428612096.0,
+            "67": 428612096.0,
+            "68": 428612096.0,
+            "69": 428612096.0,
+            "70": 428612096.0,
+            "71": 428612096.0,
+            "72": 428612096.0,
+            "73": 428612096.0,
+            "74": 428612096.0,
+            "75": 428612096.0,
+            "76": 428612096.0,
+            "77": 428612096.0,
+            "78": 428612096.0,
+            "79": 428612096.0,
+            "80": 428612096.0,
+            "81": 428612096.0,
+            "82": 428612096.0,
+            "83": 428612096.0,
+            "84": 428612096.0,
+            "85": 428612096.0,
+            "86": 428612096.0,
+            "87": 428612096.0,
+            "88": 428612096.0,
+            "89": 428612096.0,
+            "90": 428612096.0,
+            "91": 428612096.0,
+            "92": 428612096.0,
+            "93": 428612096.0,
+            "94": 428612096.0,
+            "95": 428612096.0,
+            "96": 428612096.0,
+            "97": 428612096.0,
+            "98": 428612096.0,
+            "99": 428612096.0,
+            "100": 428612096.0,
+            "101": 428612096.0,
+            "102": 428612096.0,
+            "103": 428612096.0,
+            "104": 428612096.0,
+            "105": 428612096.0,
+            "106": 428612096.0,
+            "107": 428612096.0,
+            "108": 428612096.0,
+            "109": 428612096.0,
+            "110": 428612096.0,
+            "111": 428612096.0,
+            "112": 428612096.0,
+            "113": 428612096.0,
+            "114": 428612096.0,
+            "115": 428612096.0,
+            "116": 428612096.0,
+            "117": 428612096.0,
+            "118": 428612096.0,
+            "119": 428612096.0,
+            "120": 428612096.0,
+            "121": 428612096.0,
+            "122": 428612096.0,
+            "123": 428612096.0,
+            "124": 428612096.0,
+            "125": 428612096.0,
+            "126": 428612096.0,
+            "127": 428612096.0,
+            "128": 428612096.0,
+            "129": 428612096.0,
+            "130": 428612096.0,
+            "131": 428612096.0,
+            "132": 428612096.0,
+            "133": 428612096.0,
+            "134": 428612096.0,
+            "135": 428612096.0,
+            "136": 428612096.0,
+            "137": 428612096.0,
+            "138": 428612096.0,
+            "139": 428612096.0,
+            "140": 428612096.0,
+            "141": 428612096.0,
+            "142": 428612096.0,
+            "143": 428612096.0,
+            "144": 428612096.0,
+            "145": 428612096.0,
+            "146": 428612096.0,
+            "147": 428612096.0,
+            "148": 428612096.0,
+            "149": 428612096.0,
+            "150": 428612096.0,
+            "151": 428612096.0,
+            "152": 428612096.0,
+            "153": 428612096.0,
+            "154": 428612096.0,
+            "155": 428612096.0,
+            "156": 428612096.0,
+            "157": 428612096.0,
+            "158": 428612096.0,
+            "159": 428612096.0,
+            "160": 428612096.0,
+            "161": 428612096.0,
+            "162": 428612096.0,
+            "163": 428612096.0,
+            "164": 428612096.0,
+            "165": 428612096.0,
+            "166": 428612096.0,
+            "167": 428612096.0,
+            "168": 428612096.0,
+            "169": 428612096.0,
+            "170": 428612096.0,
+            "171": 428612096.0,
+            "172": 428612096.0,
+            "173": 428612096.0,
+            "174": 428612096.0,
+            "175": 428612096.0,
+            "176": 428612096.0,
+            "177": 428612096.0,
+            "178": 428612096.0,
+            "179": 428612096.0,
+            "180": 428612096.0,
+            "181": 428612096.0,
+            "182": 428612096.0,
+            "183": 428612096.0,
+            "184": 428612096.0,
+            "185": 428612096.0,
+            "186": 428612096.0,
+            "187": 428612096.0,
+            "188": 428612096.0,
+            "189": 428612096.0,
+            "190": 428612096.0,
+            "191": 428612096.0,
+            "192": 428612096.0,
+            "193": 428612096.0,
+            "194": 428612096.0,
+            "195": 428612096.0,
+            "196": 428612096.0,
+            "197": 428612096.0,
+            "198": 428612096.0,
+            "199": 428612096.0,
+            "200": 428612096.0,
+            "201": 428612096.0,
+            "202": 428612096.0,
+            "203": 428612096.0,
+            "204": 428612096.0,
+            "205": 428612096.0,
+            "206": 428612096.0,
+            "207": 428612096.0,
+            "208": 428612096.0,
+            "209": 428612096.0,
+            "210": 428612096.0,
+            "211": 428612096.0,
+            "212": 428612096.0,
+            "213": 428612096.0,
+            "214": 428612096.0,
+            "215": 428612096.0,
+            "216": 428612096.0,
+            "217": 428612096.0,
+            "218": 428612096.0,
+            "219": 428612096.0,
+            "220": 428612096.0,
+            "221": 428612096.0,
+            "222": 428612096.0,
+            "223": 428612096.0,
+            "224": 428612096.0,
+            "225": 428612096.0,
+            "226": 428612096.0,
+            "227": 428612096.0,
+            "228": 428612096.0,
+            "229": 428612096.0,
+            "230": 428612096.0,
+            "231": 428612096.0,
+            "232": 428612096.0,
+            "233": 428612096.0,
+            "234": 428612096.0,
+            "235": 428612096.0,
+            "236": 428612096.0,
+            "237": 428612096.0,
+            "238": 428612096.0,
+            "239": 428612096.0,
+            "240": 428612096.0,
+            "241": 428612096.0,
+            "242": 428612096.0,
+            "243": 428612096.0,
+            "244": 428612096.0,
+            "245": 428612096.0,
+            "246": 428612096.0,
+            "247": 428612096.0,
+            "248": 428612096.0,
+            "249": 428612096.0,
+            "250": 428612096.0,
+            "251": 428612096.0,
+            "252": 428612096.0,
+            "253": 428612096.0,
+            "254": 428612096.0,
+            "255": 428612096.0,
+            "256": 428612096.0,
+            "257": 428612096.0,
+            "258": 428612096.0,
+            "259": 428612096.0,
+            "260": 428612096.0,
+            "261": 428612096.0,
+            "262": 428612096.0,
+            "263": 428612096.0,
+            "264": 428612096.0,
+            "265": 428612096.0,
+            "266": 428612096.0,
+            "267": 428612096.0,
+            "268": 428612096.0,
+            "269": 428612096.0,
+            "270": 428612096.0,
+            "271": 428612096.0,
+            "272": 428612096.0,
+            "273": 428612096.0,
+            "274": 428612096.0,
+            "275": 428612096.0,
+            "276": 428612096.0,
+            "277": 428612096.0,
+            "278": 428612096.0,
+            "279": 428612096.0,
+            "280": 428612096.0,
+            "281": 428612096.0,
+            "282": 428612096.0,
+            "283": 428612096.0,
+            "284": 428612096.0,
+            "285": 428612096.0,
+            "286": 428612096.0,
+            "287": 428612096.0,
+            "288": 428612096.0,
+            "289": 428612096.0,
+            "290": 428612096.0,
+            "291": 428612096.0,
+            "292": 428612096.0,
+            "293": 428612096.0,
+            "294": 428612096.0,
+            "295": 428612096.0,
+            "296": 428612096.0,
+            "297": 428612096.0,
+            "298": 428612096.0,
+            "299": 428612096.0,
+            "300": 428612096.0,
+            "301": 428612096.0,
+            "302": 428612096.0,
+            "303": 428612096.0,
+            "304": 428612096.0,
+            "305": 428612096.0,
+            "306": 428612096.0,
+            "307": 428612096.0,
+            "308": 428612096.0,
+            "309": 428612096.0,
+            "310": 428612096.0,
+            "311": 428612096.0,
+            "312": 428612096.0,
+            "313": 428612096.0,
+            "314": 428612096.0,
+            "315": 428612096.0,
+            "316": 428612096.0,
+            "317": 428612096.0,
+            "318": 428612096.0,
+            "319": 428612096.0,
+            "320": 428612096.0,
+            "321": 428612096.0,
+            "322": 428612096.0,
+            "323": 428612096.0,
+            "324": 428612096.0,
+            "325": 428612096.0,
+            "326": 428612096.0,
+            "327": 428612096.0,
+            "328": 428612096.0,
+            "329": 428612096.0,
+            "330": 428612096.0,
+            "331": 428612096.0,
+            "332": 428612096.0,
+            "333": 428612096.0,
+            "334": 428612096.0,
+            "335": 428612096.0,
+            "336": 428612096.0,
+            "337": 428612096.0,
+            "338": 428612096.0,
+            "339": 428612096.0,
+            "340": 428612096.0,
+            "341": 428612096.0,
+            "342": 428612096.0,
+            "343": 428612096.0,
+            "344": 428612096.0,
+            "345": 428612096.0,
+            "346": 428612096.0,
+            "347": 428612096.0,
+            "348": 428612096.0,
+            "349": 428612096.0,
+            "350": 428612096.0,
+            "351": 428612096.0,
+            "352": 428612096.0,
+            "353": 428612096.0,
+            "354": 428612096.0,
+            "355": 428612096.0,
+            "356": 428612096.0,
+            "357": 428612096.0,
+            "358": 428612096.0,
+            "359": 428612096.0,
+            "360": 428612096.0,
+            "361": 428612096.0,
+            "362": 428612096.0,
+            "363": 428612096.0,
+            "364": 428612096.0,
+            "365": 428612096.0,
+            "366": 428612096.0,
+            "367": 428612096.0,
+            "368": 428612096.0,
+            "369": 428612096.0,
+            "370": 428612096.0,
+            "371": 428612096.0,
+            "372": 428612096.0,
+            "373": 428612096.0,
+            "374": 428612096.0,
+            "375": 428612096.0,
+            "376": 428612096.0,
+            "377": 428612096.0,
+            "378": 428612096.0,
+            "379": 428612096.0,
+            "380": 428612096.0,
+            "381": 428612096.0,
+            "382": 428612096.0,
+            "383": 428612096.0,
+            "384": 428612096.0,
+            "385": 428612096.0,
+            "386": 428612096.0,
+            "387": 428612096.0,
+            "388": 428612096.0,
+            "389": 428612096.0,
+            "390": 428612096.0,
+            "391": 428612096.0,
+            "392": 428612096.0,
+            "393": 428612096.0,
+            "394": 428612096.0,
+            "395": 428612096.0,
+            "396": 428612096.0,
+            "397": 428612096.0,
+            "398": 428612096.0,
+            "399": 428612096.0,
+            "400": 428612096.0,
+            "401": 428612096.0,
+            "402": 428612096.0,
+            "403": 428612096.0,
+            "404": 428612096.0,
+            "405": 428612096.0,
+            "406": 428612096.0,
+            "407": 428612096.0,
+            "408": 428612096.0,
+            "409": 428612096.0,
+            "410": 428612096.0,
+            "411": 428612096.0,
+            "412": 428612096.0,
+            "413": 428612096.0,
+            "414": 428612096.0,
+            "415": 428612096.0,
+            "416": 428612096.0,
+            "417": 428612096.0,
+            "418": 428612096.0,
+            "419": 428612096.0,
+            "420": 428612096.0,
+            "421": 428612096.0,
+            "422": 428612096.0,
+            "423": 428612096.0,
+            "424": 428612096.0,
+            "425": 428612096.0,
+            "426": 428612096.0,
+            "427": 428612096.0,
+            "428": 428612096.0,
+            "429": 428612096.0,
+            "430": 428612096.0,
+            "431": 428612096.0,
+            "432": 428612096.0,
+            "433": 428612096.0,
+            "434": 428612096.0,
+            "435": 428612096.0,
+            "436": 428612096.0,
+            "437": 428612096.0,
+            "438": 428612096.0,
+            "439": 428612096.0,
+            "440": 428612096.0,
+            "441": 428612096.0,
+            "442": 428612096.0,
+            "443": 428612096.0,
+            "444": 428612096.0,
+            "445": 428612096.0,
+            "446": 428612096.0,
+            "447": 428612096.0,
+            "448": 428612096.0,
+            "449": 428612096.0,
+            "450": 428612096.0,
+            "451": 428612096.0,
+            "452": 428612096.0,
+            "453": 428612096.0,
+            "454": 428612096.0,
+            "455": 428612096.0,
+            "456": 428612096.0,
+            "457": 428612096.0,
+            "458": 428612096.0,
+            "459": 428612096.0,
+            "460": 428612096.0,
+            "461": 428612096.0,
+            "462": 428612096.0,
+            "463": 428612096.0,
+            "464": 428612096.0,
+            "465": 428612096.0,
+            "466": 428612096.0,
+            "467": 428612096.0,
+            "468": 428612096.0,
+            "469": 428612096.0,
+            "470": 428612096.0,
+            "471": 428612096.0,
+            "472": 428612096.0,
+            "473": 428612096.0,
+            "474": 428612096.0,
+            "475": 428612096.0,
+            "476": 428612096.0,
+            "477": 428612096.0,
+            "478": 428612096.0,
+            "479": 428612096.0,
+            "480": 428612096.0,
+            "481": 428612096.0,
+            "482": 428612096.0,
+            "483": 428612096.0,
+            "484": 428612096.0,
+            "485": 428612096.0,
+            "486": 428612096.0,
+            "487": 428612096.0,
+            "488": 428612096.0,
+            "489": 428612096.0,
+            "490": 428612096.0,
+            "491": 428612096.0,
+            "492": 428612096.0,
+            "493": 428612096.0,
+            "494": 428612096.0,
+            "495": 428612096.0,
+            "496": 428612096.0,
+            "497": 428612096.0,
+            "498": 428612096.0,
+            "499": 428612096.0,
+            "500": 428612096.0,
+            "501": 428612096.0,
+            "502": 428612096.0,
+            "503": 428612096.0,
+            "504": 428612096.0,
+            "505": 428612096.0,
+            "506": 428612096.0,
+            "507": 428612096.0,
+            "508": 428612096.0,
+            "509": 428612096.0,
+            "510": 428612096.0,
+            "511": 428612096.0,
+            "512": 428612096.0,
+            "513": 428612096.0,
+            "514": 428612096.0,
+            "515": 428612096.0,
+            "516": 428612096.0,
+            "517": 428612096.0,
+            "518": 428612096.0,
+            "519": 428612096.0,
+            "520": 428612096.0,
+            "521": 428612096.0,
+            "522": 428612096.0,
+            "523": 428612096.0,
+            "524": 428612096.0,
+            "525": 428612096.0,
+            "526": 428612096.0,
+            "527": 428612096.0,
+            "528": 428612096.0,
+            "529": 428612096.0,
+            "530": 428612096.0,
+            "531": 428612096.0,
+            "532": 428612096.0,
+            "533": 428612096.0,
+            "534": 428612096.0,
+            "535": 428612096.0,
+            "536": 428612096.0,
+            "537": 428612096.0,
+            "538": 428612096.0,
+            "539": 428612096.0,
+            "540": 428612096.0,
+            "541": 428612096.0,
+            "542": 428612096.0,
+            "543": 428612096.0,
+            "544": 428612096.0,
+            "545": 428612096.0,
+            "546": 428612096.0,
+            "547": 428612096.0,
+            "548": 428612096.0,
+            "549": 428612096.0,
+            "550": 428612096.0,
+            "551": 428612096.0,
+            "552": 428612096.0,
+            "553": 428612096.0,
+            "554": 428612096.0,
+            "555": 428612096.0,
+            "556": 428612096.0,
+            "557": 428612096.0,
+            "558": 428612096.0,
+            "559": 428612096.0,
+            "560": 428612096.0,
+            "561": 428612096.0,
+            "562": 428612096.0,
+            "563": 428612096.0,
+            "564": 428612096.0,
+            "565": 428612096.0,
+            "566": 428612096.0,
+            "567": 428612096.0,
+            "568": 428612096.0,
+            "569": 428612096.0,
+            "570": 428612096.0,
+            "571": 428612096.0,
+            "572": 428612096.0,
+            "573": 428612096.0,
+            "574": 428612096.0,
+            "575": 428612096.0,
+            "576": 428612096.0,
+            "577": 428612096.0,
+            "578": 428612096.0,
+            "579": 428612096.0,
+            "580": 428612096.0,
+            "581": 428612096.0,
+            "582": 428612096.0,
+            "583": 428612096.0,
+            "584": 428612096.0,
+            "585": 428612096.0,
+            "586": 428612096.0,
+            "587": 428612096.0,
+            "588": 428612096.0,
+            "589": 428612096.0,
+            "590": 428612096.0,
+            "591": 428612096.0,
+            "592": 428612096.0,
+            "593": 428612096.0,
+            "594": 428612096.0,
+            "595": 428612096.0,
+            "596": 428612096.0,
+            "597": 428612096.0,
+            "598": 428612096.0,
+            "599": 428612096.0,
+            "600": 428612096.0,
+            "601": 428612096.0,
+            "602": 428612096.0,
+            "603": 428612096.0,
+            "604": 428612096.0,
+            "605": 428612096.0,
+            "606": 428612096.0,
+            "607": 428612096.0,
+            "608": 428612096.0,
+            "609": 428612096.0,
+            "610": 428612096.0,
+            "611": 428612096.0,
+            "612": 428612096.0,
+            "613": 428612096.0,
+            "614": 428612096.0,
+            "615": 428612096.0,
+            "616": 428612096.0,
+            "617": 428612096.0,
+            "618": 428612096.0,
+            "619": 428612096.0,
+            "620": 428612096.0,
+            "621": 428612096.0,
+            "622": 428612096.0,
+            "623": 428612096.0,
+            "624": 428612096.0,
+            "625": 428612096.0,
+            "626": 428612096.0,
+            "627": 428612096.0,
+            "628": 428612096.0,
+            "629": 428612096.0,
+            "630": 428612096.0,
+            "631": 428612096.0,
+            "632": 428612096.0,
+            "633": 428612096.0,
+            "634": 428612096.0,
+            "635": 428612096.0,
+            "636": 428612096.0,
+            "637": 428612096.0,
+            "638": 428612096.0,
+            "639": 428612096.0,
+            "640": 428612096.0,
+            "641": 428612096.0,
+            "642": 428612096.0,
+            "643": 428612096.0,
+            "644": 428612096.0,
+            "645": 428612096.0,
+            "646": 428612096.0,
+            "647": 428612096.0,
+            "648": 428612096.0,
+            "649": 428612096.0,
+            "650": 428612096.0,
+            "651": 428612096.0,
+            "652": 428612096.0,
+            "653": 428612096.0,
+            "654": 428612096.0,
+            "655": 428612096.0,
+            "656": 428612096.0,
+            "657": 428612096.0,
+            "658": 428612096.0,
+            "659": 428612096.0,
+            "660": 428612096.0,
+            "661": 428612096.0,
+            "662": 428612096.0,
+            "663": 428612096.0,
+            "664": 428612096.0,
+            "665": 428612096.0,
+            "666": 428612096.0,
+            "667": 428612096.0,
+            "668": 428612096.0,
+            "669": 428612096.0,
+            "670": 428612096.0,
+            "671": 428612096.0,
+            "672": 428612096.0,
+            "673": 428612096.0,
+            "674": 428612096.0,
+            "675": 428612096.0,
+            "676": 428612096.0,
+            "677": 428612096.0,
+            "678": 428612096.0,
+            "679": 428612096.0,
+            "680": 428612096.0,
+            "681": 428612096.0,
+            "682": 428612096.0,
+            "683": 428612096.0,
+            "684": 428612096.0,
+            "685": 428612096.0,
+            "686": 428612096.0,
+            "687": 428612096.0,
+            "688": 428612096.0,
+            "689": 428612096.0,
+            "690": 428612096.0,
+            "691": 428612096.0,
+            "692": 428612096.0,
+            "693": 428612096.0,
+            "694": 428612096.0,
+            "695": 428612096.0,
+            "696": 428612096.0,
+            "697": 428612096.0,
+            "698": 428612096.0,
+            "699": 428612096.0,
+            "700": 428612096.0,
+            "701": 428612096.0,
+            "702": 428612096.0,
+            "703": 428612096.0,
+            "704": 428612096.0,
+            "705": 428612096.0,
+            "706": 428612096.0,
+            "707": 428612096.0,
+            "708": 428612096.0,
+            "709": 428612096.0,
+            "710": 428612096.0,
+            "711": 428612096.0,
+            "712": 428612096.0,
+            "713": 428612096.0,
+            "714": 428612096.0,
+            "715": 428612096.0,
+            "716": 428612096.0,
+            "717": 428612096.0,
+            "718": 428612096.0,
+            "719": 428612096.0,
+            "720": 428612096.0,
+            "721": 428612096.0,
+            "722": 428612096.0,
+            "723": 428612096.0,
+            "724": 428612096.0,
+            "725": 428612096.0,
+            "726": 428612096.0,
+            "727": 428612096.0,
+            "728": 428612096.0,
+            "729": 428612096.0,
+            "730": 428612096.0,
+            "731": 428612096.0,
+            "732": 428612096.0,
+            "733": 428612096.0,
+            "734": 428612096.0,
+            "735": 428612096.0,
+            "736": 428612096.0,
+            "737": 428612096.0,
+            "738": 428612096.0,
+            "739": 428612096.0,
+            "740": 428612096.0,
+            "741": 428612096.0,
+            "742": 428612096.0,
+            "743": 428612096.0,
+            "744": 428612096.0,
+            "745": 428612096.0,
+            "746": 428612096.0,
+            "747": 428612096.0,
+            "748": 428612096.0,
+            "749": 428612096.0,
+            "750": 428612096.0,
+            "751": 428612096.0,
+            "752": 428612096.0,
+            "753": 428612096.0,
+            "754": 428612096.0,
+            "755": 428612096.0,
+            "756": 428612096.0,
+            "757": 428612096.0,
+            "758": 428612096.0,
+            "759": 428612096.0,
+            "760": 428612096.0,
+            "761": 428612096.0,
+            "762": 428612096.0,
+            "763": 428612096.0,
+            "764": 428612096.0,
+            "765": 428612096.0,
+            "766": 428612096.0,
+            "767": 428612096.0,
+            "768": 428612096.0,
+            "769": 428612096.0,
+            "770": 428612096.0,
+            "771": 428612096.0,
+            "772": 428612096.0,
+            "773": 428612096.0,
+            "774": 428612096.0,
+            "775": 428612096.0,
+            "776": 428612096.0,
+            "777": 428612096.0,
+            "778": 428612096.0,
+            "779": 428612096.0,
+            "780": 428612096.0,
+            "781": 428612096.0,
+            "782": 428612096.0,
+            "783": 428612096.0,
+            "784": 428612096.0,
+            "785": 428612096.0,
+            "786": 428612096.0,
+            "787": 428612096.0,
+            "788": 428612096.0,
+            "789": 428612096.0,
+            "790": 428612096.0,
+            "791": 428612096.0,
+            "792": 428612096.0,
+            "793": 428612096.0,
+            "794": 428612096.0,
+            "795": 428612096.0,
+            "796": 428612096.0,
+            "797": 428612096.0,
+            "798": 428612096.0,
+            "799": 428612096.0,
+            "800": 428612096.0,
+            "801": 428612096.0,
+            "802": 428612096.0,
+            "803": 428612096.0,
+            "804": 428612096.0,
+            "805": 428612096.0,
+            "806": 428612096.0,
+            "807": 428612096.0,
+            "808": 428612096.0,
+            "809": 428612096.0,
+            "810": 428612096.0,
+            "811": 428612096.0,
+            "812": 428612096.0,
+            "813": 428612096.0,
+            "814": 428612096.0,
+            "815": 428612096.0,
+            "816": 428612096.0,
+            "817": 428612096.0,
+            "818": 428612096.0,
+            "819": 428612096.0,
+            "820": 428612096.0,
+            "821": 428612096.0,
+            "822": 428612096.0,
+            "823": 428612096.0,
+            "824": 428612096.0,
+            "825": 428612096.0,
+            "826": 428612096.0,
+            "827": 428612096.0,
+            "828": 428612096.0,
+            "829": 428612096.0,
+            "830": 428612096.0,
+            "831": 428612096.0,
+            "832": 428612096.0,
+            "833": 428612096.0,
+            "834": 428612096.0,
+            "835": 428612096.0,
+            "836": 428612096.0,
+            "837": 428612096.0,
+            "838": 428612096.0,
+            "839": 428612096.0,
+            "840": 428612096.0,
+            "841": 428612096.0,
+            "842": 428612096.0,
+            "843": 428612096.0,
+            "844": 428612096.0,
+            "845": 428612096.0,
+            "846": 428612096.0,
+            "847": 428612096.0,
+            "848": 428612096.0,
+            "849": 428612096.0,
+            "850": 428612096.0,
+            "851": 428612096.0,
+            "852": 428612096.0,
+            "853": 428612096.0,
+            "854": 428612096.0,
+            "855": 428612096.0,
+            "856": 428612096.0,
+            "857": 428612096.0,
+            "858": 428612096.0,
+            "859": 428612096.0,
+            "860": 428612096.0,
+            "861": 428612096.0,
+            "862": 428612096.0,
+            "863": 428612096.0,
+            "864": 428612096.0,
+            "865": 428612096.0,
+            "866": 428612096.0,
+            "867": 428612096.0,
+            "868": 428612096.0,
+            "869": 428612096.0,
+            "870": 428612096.0,
+            "871": 428612096.0,
+            "872": 428612096.0,
+            "873": 428612096.0,
+            "874": 428612096.0,
+            "875": 428612096.0,
+            "876": 428612096.0,
+            "877": 428612096.0,
+            "878": 428612096.0,
+            "879": 428612096.0,
+            "880": 428612096.0,
+            "881": 428612096.0,
+            "882": 428612096.0,
+            "883": 428612096.0,
+            "884": 428612096.0,
+            "885": 428612096.0,
+            "886": 428612096.0,
+            "887": 428612096.0,
+            "888": 428612096.0,
+            "889": 428612096.0,
+            "890": 428612096.0,
+            "891": 428612096.0,
+            "892": 428612096.0,
+            "893": 428612096.0,
+            "894": 428612096.0,
+            "895": 428612096.0,
+            "896": 428612096.0,
+            "897": 428612096.0,
+            "898": 428612096.0,
+            "899": 428612096.0,
+            "900": 428612096.0,
+            "901": 428612096.0,
+            "902": 428612096.0,
+            "903": 428612096.0,
+            "904": 428612096.0,
+            "905": 428612096.0,
+            "906": 428612096.0,
+            "907": 428612096.0,
+            "908": 428612096.0,
+            "909": 428612096.0,
+            "910": 428612096.0,
+            "911": 428612096.0,
+            "912": 428612096.0,
+            "913": 428612096.0,
+            "914": 428612096.0,
+            "915": 428612096.0,
+            "916": 428612096.0,
+            "917": 428612096.0,
+            "918": 428612096.0,
+            "919": 428612096.0,
+            "920": 428612096.0,
+            "921": 428612096.0,
+            "922": 428612096.0,
+            "923": 428612096.0,
+            "924": 428612096.0,
+            "925": 428612096.0,
+            "926": 428612096.0,
+            "927": 428612096.0,
+            "928": 428612096.0,
+            "929": 428612096.0,
+            "930": 428612096.0,
+            "931": 428612096.0,
+            "932": 428612096.0,
+            "933": 428612096.0,
+            "934": 428612096.0,
+            "935": 428612096.0,
+            "936": 428612096.0,
+            "937": 428612096.0,
+            "938": 428612096.0,
+            "939": 428612096.0,
+            "940": 428612096.0,
+            "941": 428612096.0,
+            "942": 428612096.0,
+            "943": 428612096.0,
+            "944": 428612096.0,
+            "945": 428612096.0,
+            "946": 428612096.0,
+            "947": 428612096.0,
+            "948": 428612096.0,
+            "949": 428612096.0,
+            "950": 428612096.0,
+            "951": 428612096.0,
+            "952": 428612096.0,
+            "953": 428612096.0,
+            "954": 428612096.0,
+            "955": 428612096.0,
+            "956": 428612096.0,
+            "957": 428612096.0,
+            "958": 428612096.0,
+            "959": 428612096.0,
+            "960": 428612096.0,
+            "961": 428612096.0,
+            "962": 428612096.0,
+            "963": 428612096.0,
+            "964": 428612096.0,
+            "965": 428612096.0,
+            "966": 428612096.0,
+            "967": 428612096.0,
+            "968": 428612096.0,
+            "969": 428612096.0,
+            "970": 428612096.0,
+            "971": 428612096.0,
+            "972": 428612096.0,
+            "973": 428612096.0,
+            "974": 428612096.0,
+            "975": 428612096.0,
+            "976": 428612096.0,
+            "977": 428612096.0,
+            "978": 428612096.0,
+            "979": 428612096.0,
+            "980": 428612096.0,
+            "981": 428612096.0,
+            "982": 428612096.0,
+            "983": 428612096.0,
+            "984": 428612096.0,
+            "985": 428612096.0,
+            "986": 428612096.0,
+            "987": 428612096.0,
+            "988": 428612096.0,
+            "989": 428612096.0,
+            "990": 428612096.0,
+            "991": 428612096.0,
+            "992": 428612096.0,
+            "993": 428612096.0,
+            "994": 428612096.0,
+            "995": 428612096.0,
+            "996": 428612096.0,
+            "997": 428612096.0,
+            "998": 428612096.0,
+            "999": 428612096.0,
+            "1000": 428612096.0,
+            "1001": 428612096.0,
+            "1002": 428612096.0,
+            "1003": 428612096.0,
+            "1004": 428612096.0,
+            "1005": 428612096.0,
+            "1006": 428612096.0,
+            "1007": 428612096.0,
+            "1008": 428612096.0,
+            "1009": 428612096.0,
+            "1010": 428612096.0,
+            "1011": 428612096.0,
+            "1012": 428612096.0,
+            "1013": 428612096.0,
+            "1014": 428612096.0,
+            "1015": 428612096.0,
+            "1016": 428612096.0,
+            "1017": 428612096.0,
+            "1018": 428612096.0,
+            "1019": 428612096.0,
+            "1020": 428612096.0,
+            "1021": 428612096.0,
+            "1022": 428612096.0,
+            "1023": 428612096.0,
+            "1024": 428612096.0,
+            "1025": 428612096.0,
+            "1026": 428612096.0,
+            "1027": 428612096.0,
+            "1028": 428612096.0,
+            "1029": 428612096.0,
+            "1030": 428612096.0,
+            "1031": 428612096.0,
+            "1032": 428612096.0,
+            "1033": 428612096.0,
+            "1034": 428612096.0,
+            "1035": 428612096.0,
+            "1036": 428612096.0,
+            "1037": 428612096.0,
+            "1038": 428612096.0,
+            "1039": 428612096.0,
+            "1040": 428612096.0,
+            "1041": 428612096.0,
+            "1042": 428612096.0,
+            "1043": 428612096.0,
+            "1044": 428612096.0,
+            "1045": 428612096.0,
+            "1046": 428612096.0,
+            "1047": 428612096.0,
+            "1048": 428612096.0,
+            "1049": 428612096.0,
+            "1050": 428612096.0,
+            "1051": 428612096.0,
+            "1052": 428612096.0,
+            "1053": 428612096.0,
+            "1054": 428612096.0,
+            "1055": 428612096.0,
+            "1056": 428612096.0,
+            "1057": 428612096.0,
+            "1058": 428612096.0,
+            "1059": 428612096.0,
+            "1060": 428612096.0,
+            "1061": 428612096.0,
+            "1062": 428612096.0,
+            "1063": 428612096.0,
+            "1064": 428612096.0,
+            "1065": 428612096.0,
+            "1066": 428612096.0,
+            "1067": 428612096.0,
+            "1068": 428612096.0,
+            "1069": 428612096.0,
+            "1070": 428612096.0,
+            "1071": 428612096.0,
+            "1072": 428612096.0,
+            "1073": 428612096.0,
+            "1074": 428612096.0,
+            "1075": 428612096.0,
+            "1076": 428612096.0,
+            "1077": 428612096.0,
+            "1078": 428612096.0,
+            "1079": 428612096.0,
+            "1080": 428612096.0,
+            "1081": 428612096.0,
+            "1082": 428612096.0,
+            "1083": 428612096.0,
+            "1084": 428612096.0,
+            "1085": 428612096.0,
+            "1086": 428612096.0,
+            "1087": 428612096.0,
+            "1088": 428612096.0,
+            "1089": 428612096.0,
+            "1090": 428612096.0,
+            "1091": 428612096.0,
+            "1092": 428612096.0,
+            "1093": 428612096.0,
+            "1094": 428612096.0,
+            "1095": 428612096.0,
+            "1096": 428612096.0,
+            "1097": 428612096.0,
+            "1098": 428612096.0,
+            "1099": 428612096.0,
+            "1100": 428612096.0,
+            "1101": 428612096.0,
+            "1102": 428612096.0,
+            "1103": 428612096.0,
+            "1104": 428612096.0,
+            "1105": 428612096.0,
+            "1106": 428612096.0,
+            "1107": 428612096.0,
+            "1108": 428612096.0,
+            "1109": 428612096.0,
+            "1110": 428612096.0,
+            "1111": 428612096.0,
+            "1112": 428612096.0,
+            "1113": 428612096.0,
+            "1114": 428612096.0,
+            "1115": 428612096.0,
+            "1116": 428612096.0,
+            "1117": 428612096.0,
+            "1118": 428612096.0,
+            "1119": 428612096.0,
+            "1120": 428612096.0,
+            "1121": 428612096.0,
+            "1122": 428612096.0,
+            "1123": 428612096.0,
+            "1124": 428612096.0,
+            "1125": 428612096.0,
+            "1126": 428612096.0,
+            "1127": 428612096.0,
+            "1128": 428612096.0,
+            "1129": 428612096.0,
+            "1130": 428612096.0,
+            "1131": 428612096.0,
+            "1132": 428612096.0,
+            "1133": 428612096.0,
+            "1134": 428612096.0,
+            "1135": 428612096.0,
+            "1136": 428612096.0,
+            "1137": 428612096.0,
+            "1138": 428612096.0,
+            "1139": 428612096.0,
+            "1140": 428612096.0,
+            "1141": 428612096.0,
+            "1142": 428612096.0,
+            "1143": 428612096.0,
+            "1144": 428612096.0,
+            "1145": 428612096.0,
+            "1146": 428612096.0,
+            "1147": 428612096.0,
+            "1148": 428612096.0,
+            "1149": 428612096.0,
+            "1150": 428612096.0,
+            "1151": 428612096.0,
+            "1152": 428612096.0,
+            "1153": 428612096.0,
+            "1154": 428612096.0,
+            "1155": 428612096.0,
+            "1156": 428612096.0,
+            "1157": 428612096.0,
+            "1158": 428612096.0,
+            "1159": 428612096.0,
+            "1160": 428612096.0,
+            "1161": 428612096.0,
+            "1162": 428612096.0,
+            "1163": 428612096.0,
+            "1164": 428612096.0,
+            "1165": 428612096.0,
+            "1166": 428612096.0,
+            "1167": 428612096.0,
+            "1168": 428612096.0,
+            "1169": 428612096.0,
+            "1170": 428612096.0,
+            "1171": 428612096.0,
+            "1172": 428612096.0,
+            "1173": 428612096.0,
+            "1174": 428612096.0,
+            "1175": 428612096.0,
+            "1176": 428612096.0,
+            "1177": 428612096.0,
+            "1178": 428612096.0,
+            "1179": 428612096.0,
+            "1180": 428612096.0,
+            "1181": 428612096.0,
+            "1182": 428612096.0,
+            "1183": 428612096.0,
+            "1184": 428612096.0,
+            "1185": 428612096.0,
+            "1186": 428612096.0,
+            "1187": 428612096.0,
+            "1188": 428612096.0,
+            "1189": 428612096.0,
+            "1190": 428612096.0,
+            "1191": 428612096.0,
+            "1192": 428612096.0,
+            "1193": 428612096.0,
+            "1194": 428612096.0,
+            "1195": 428612096.0,
+            "1196": 428612096.0,
+            "1197": 428612096.0,
+            "1198": 428612096.0,
+            "1199": 428612096.0,
+            "1200": 428612096.0,
+            "1201": 428612096.0,
+            "1202": 428612096.0,
+            "1203": 428612096.0,
+            "1204": 428612096.0,
+            "1205": 428612096.0,
+            "1206": 428612096.0,
+            "1207": 428612096.0,
+            "1208": 428612096.0,
+            "1209": 428612096.0,
+            "1210": 428612096.0,
+            "1211": 428612096.0,
+            "1212": 428612096.0,
+            "1213": 428612096.0,
+            "1214": 428612096.0,
+            "1215": 428612096.0,
+            "1216": 428612096.0,
+            "1217": 428612096.0,
+            "1218": 428612096.0,
+            "1219": 428612096.0,
+            "1220": 428612096.0,
+            "1221": 428612096.0,
+            "1222": 428612096.0,
+            "1223": 428612096.0,
+            "1224": 428612096.0,
+            "1225": 428612096.0,
+            "1226": 428612096.0,
+            "1227": 428612096.0,
+            "1228": 428612096.0,
+            "1229": 428612096.0,
+            "1230": 428612096.0,
+            "1231": 428612096.0,
+            "1232": 428612096.0,
+            "1233": 428612096.0,
+            "1234": 428612096.0,
+            "1235": 428612096.0,
+            "1236": 428612096.0,
+            "1237": 428612096.0,
+            "1238": 428612096.0,
+            "1239": 428612096.0,
+            "1240": 428612096.0,
+            "1241": 428612096.0,
+            "1242": 428612096.0,
+            "1243": 428612096.0,
+            "1244": 428612096.0,
+            "1245": 428612096.0,
+            "1246": 428612096.0,
+            "1247": 428612096.0,
+            "1248": 428612096.0,
+            "1249": 428612096.0,
+            "1250": 428612096.0,
+            "1251": 428612096.0,
+            "1252": 428612096.0,
+            "1253": 428612096.0,
+            "1254": 428612096.0,
+            "1255": 428612096.0,
+            "1256": 428612096.0,
+            "1257": 428612096.0,
+            "1258": 428612096.0,
+            "1259": 428612096.0,
+            "1260": 428612096.0,
+            "1261": 428612096.0,
+            "1262": 428612096.0,
+            "1263": 428612096.0,
+            "1264": 428612096.0,
+            "1265": 428612096.0,
+            "1266": 428612096.0,
+            "1267": 428612096.0,
+            "1268": 428612096.0,
+            "1269": 428612096.0,
+            "1270": 428612096.0,
+            "1271": 428612096.0,
+            "1272": 428612096.0,
+            "1273": 428612096.0,
+            "1274": 428612096.0,
+            "1275": 428612096.0,
+            "1276": 428612096.0,
+            "1277": 428612096.0,
+            "1278": 428612096.0,
+            "1279": 428612096.0,
+            "1280": 428612096.0,
+            "1281": 428612096.0,
+            "1282": 428612096.0,
+            "1283": 428612096.0,
+            "1284": 428612096.0,
+            "1285": 428612096.0,
+            "1286": 428612096.0,
+            "1287": 428612096.0,
+            "1288": 428612096.0,
+            "1289": 428612096.0,
+            "1290": 428612096.0,
+            "1291": 428612096.0,
+            "1292": 428612096.0,
+            "1293": 428612096.0,
+            "1294": 428612096.0,
+            "1295": 428612096.0,
+            "1296": 428612096.0,
+            "1297": 428612096.0,
+            "1298": 428612096.0,
+            "1299": 428612096.0,
+            "1300": 428612096.0,
+            "1301": 428612096.0,
+            "1302": 428612096.0,
+            "1303": 428612096.0,
+            "1304": 428612096.0,
+            "1305": 428612096.0,
+            "1306": 428612096.0,
+            "1307": 428612096.0,
+            "1308": 428612096.0,
+            "1309": 428612096.0,
+            "1310": 428612096.0,
+            "1311": 428612096.0,
+            "1312": 428612096.0,
+            "1313": 428612096.0,
+            "1314": 428612096.0,
+            "1315": 428612096.0,
+            "1316": 428612096.0,
+            "1317": 428612096.0,
+            "1318": 428612096.0,
+            "1319": 428612096.0,
+            "1320": 428612096.0,
+            "1321": 428612096.0,
+            "1322": 428612096.0,
+            "1323": 428612096.0,
+            "1324": 428612096.0,
+            "1325": 428612096.0,
+            "1326": 428612096.0,
+            "1327": 428612096.0,
+            "1328": 428612096.0,
+            "1329": 428612096.0,
+            "1330": 428612096.0,
+            "1331": 428612096.0,
+            "1332": 428612096.0,
+            "1333": 428612096.0,
+            "1334": 428612096.0,
+            "1335": 428612096.0,
+            "1336": 428612096.0,
+            "1337": 428612096.0,
+            "1338": 428612096.0,
+            "1339": 428612096.0,
+            "1340": 428612096.0,
+            "1341": 428612096.0,
+            "1342": 428612096.0,
+            "1343": 428612096.0,
+            "1344": 428612096.0,
+            "1345": 428612096.0,
+            "1346": 428612096.0,
+            "1347": 428612096.0,
+            "1348": 428612096.0,
+            "1349": 428612096.0,
+            "1350": 428612096.0,
+            "1351": 428612096.0,
+            "1352": 428612096.0,
+            "1353": 428612096.0,
+            "1354": 428612096.0,
+            "1355": 428612096.0,
+            "1356": 428612096.0,
+            "1357": 428612096.0,
+            "1358": 428612096.0,
+            "1359": 428612096.0,
+            "1360": 428612096.0,
+            "1361": 428612096.0,
+            "1362": 428612096.0,
+            "1363": 428612096.0,
+            "1364": 428612096.0,
+            "1365": 428612096.0,
+            "1366": 428612096.0,
+            "1367": 428612096.0,
+            "1368": 428612096.0,
+            "1369": 428612096.0,
+            "1370": 428612096.0,
+            "1371": 428612096.0,
+            "1372": 428612096.0,
+            "1373": 428612096.0,
+            "1374": 428612096.0,
+            "1375": 428612096.0,
+            "1376": 428612096.0,
+            "1377": 428612096.0,
+            "1378": 428612096.0,
+            "1379": 428612096.0,
+            "1380": 428612096.0,
+            "1381": 428612096.0,
+            "1382": 428612096.0,
+            "1383": 428612096.0,
+            "1384": 428612096.0,
+            "1385": 428612096.0,
+            "1386": 428612096.0,
+            "1387": 428612096.0,
+            "1388": 428612096.0,
+            "1389": 428612096.0,
+            "1390": 428612096.0,
+            "1391": 428612096.0,
+            "1392": 428612096.0,
+            "1393": 428612096.0,
+            "1394": 428612096.0,
+            "1395": 428612096.0,
+            "1396": 428612096.0,
+            "1397": 428612096.0,
+            "1398": 428612096.0,
+            "1399": 428612096.0,
+            "1400": 428612096.0,
+            "1401": 428612096.0,
+            "1402": 428612096.0,
+            "1403": 428612096.0,
+            "1404": 428612096.0,
+            "1405": 428612096.0,
+            "1406": 428612096.0,
+            "1407": 428612096.0,
+            "1408": 428612096.0,
+            "1409": 428612096.0,
+            "1410": 428612096.0,
+            "1411": 428612096.0,
+            "1412": 428612096.0,
+            "1413": 428612096.0,
+            "1414": 428612096.0,
+            "1415": 428612096.0,
+            "1416": 428612096.0,
+            "1417": 428612096.0,
+            "1418": 428612096.0,
+            "1419": 428612096.0,
+            "1420": 428612096.0,
+            "1421": 428612096.0,
+            "1422": 428612096.0,
+            "1423": 428612096.0,
+            "1424": 428612096.0,
+            "1425": 428612096.0,
+            "1426": 428612096.0,
+            "1427": 428612096.0,
+            "1428": 428612096.0,
+            "1429": 428612096.0,
+            "1430": 428612096.0,
+            "1431": 428612096.0,
+            "1432": 428612096.0,
+            "1433": 428612096.0,
+            "1434": 428612096.0,
+            "1435": 428612096.0,
+            "1436": 428612096.0,
+            "1437": 428612096.0,
+            "1438": 428612096.0,
+            "1439": 428612096.0,
+            "1440": 428612096.0,
+            "1441": 428612096.0,
+            "1442": 428612096.0,
+            "1443": 428612096.0,
+            "1444": 428612096.0,
+            "1445": 428612096.0,
+            "1446": 428612096.0,
+            "1447": 428612096.0,
+            "1448": 428612096.0,
+            "1449": 428612096.0,
+            "1450": 428612096.0,
+            "1451": 428612096.0,
+            "1452": 428612096.0,
+            "1453": 428612096.0,
+            "1454": 428612096.0,
+            "1455": 428612096.0,
+            "1456": 428612096.0,
+            "1457": 428612096.0,
+            "1458": 428612096.0,
+            "1459": 428612096.0,
+            "1460": 428612096.0,
+            "1461": 428612096.0,
+            "1462": 428612096.0,
+            "1463": 428612096.0,
+            "1464": 428612096.0,
+            "1465": 428612096.0,
+            "1466": 428612096.0,
+            "1467": 428612096.0,
+            "1468": 428612096.0,
+            "1469": 428612096.0,
+            "1470": 428612096.0,
+            "1471": 428612096.0,
+            "1472": 428612096.0,
+            "1473": 428612096.0,
+            "1474": 428612096.0,
+            "1475": 428612096.0,
+            "1476": 428612096.0,
+            "1477": 428612096.0,
+            "1478": 428612096.0,
+            "1479": 428612096.0,
+            "1480": 428612096.0,
+            "1481": 428612096.0,
+            "1482": 428612096.0,
+            "1483": 428612096.0,
+            "1484": 428612096.0,
+            "1485": 428612096.0,
+            "1486": 428612096.0,
+            "1487": 428612096.0,
+            "1488": 428612096.0,
+            "1489": 428612096.0,
+            "1490": 428612096.0,
+            "1491": 428612096.0,
+            "1492": 428612096.0,
+            "1493": 428612096.0,
+            "1494": 428612096.0,
+            "1495": 428612096.0,
+            "1496": 428612096.0,
+            "1497": 428612096.0,
+            "1498": 428612096.0,
+            "1499": 428612096.0,
+            "1500": 428612096.0,
+            "1501": 428612096.0,
+            "1502": 428612096.0,
+            "1503": 428612096.0,
+            "1504": 428612096.0,
+            "1505": 428612096.0,
+            "1506": 428612096.0,
+            "1507": 428612096.0,
+            "1508": 428612096.0,
+            "1509": 428612096.0,
+            "1510": 428612096.0,
+            "1511": 428612096.0,
+            "1512": 428612096.0,
+            "1513": 428612096.0,
+            "1514": 428612096.0,
+            "1515": 428612096.0,
+            "1516": 428612096.0,
+            "1517": 428612096.0,
+            "1518": 428612096.0,
+            "1519": 428612096.0,
+            "1520": 428612096.0,
+            "1521": 428612096.0,
+            "1522": 428612096.0,
+            "1523": 428612096.0,
+            "1524": 428612096.0,
+            "1525": 428612096.0,
+            "1526": 428612096.0,
+            "1527": 428612096.0,
+            "1528": 428612096.0,
+            "1529": 428612096.0,
+            "1530": 428612096.0,
+            "1531": 428612096.0,
+            "1532": 428612096.0,
+            "1533": 428612096.0,
+            "1534": 428612096.0,
+            "1535": 428612096.0,
+            "1536": 428612096.0,
+            "1537": 428612096.0,
+            "1538": 428612096.0,
+            "1539": 428612096.0,
+            "1540": 428612096.0,
+            "1541": 428612096.0,
+            "1542": 428612096.0,
+            "1543": 428612096.0,
+            "1544": 428612096.0,
+            "1545": 428612096.0,
+            "1546": 428612096.0,
+            "1547": 428612096.0,
+            "1548": 428612096.0,
+            "1549": 428612096.0,
+            "1550": 428612096.0,
+            "1551": 428612096.0,
+            "1552": 428612096.0,
+            "1553": 428612096.0,
+            "1554": 428612096.0,
+            "1555": 428612096.0,
+            "1556": 428612096.0,
+            "1557": 428612096.0,
+            "1558": 428612096.0,
+            "1559": 428612096.0,
+            "1560": 428612096.0,
+            "1561": 428612096.0,
+            "1562": 428612096.0,
+            "1563": 428612096.0,
+            "1564": 428612096.0,
+            "1565": 428612096.0,
+            "1566": 428612096.0,
+            "1567": 428612096.0,
+            "1568": 428612096.0,
+            "1569": 428612096.0,
+            "1570": 428612096.0,
+            "1571": 428612096.0,
+            "1572": 428612096.0,
+            "1573": 428612096.0,
+            "1574": 428612096.0,
+            "1575": 428612096.0,
+            "1576": 428612096.0,
+            "1577": 428612096.0,
+            "1578": 428612096.0,
+            "1579": 428612096.0,
+            "1580": 428612096.0,
+            "1581": 428612096.0,
+            "1582": 428612096.0,
+            "1583": 428612096.0,
+            "1584": 428612096.0,
+            "1585": 428612096.0,
+            "1586": 428612096.0,
+            "1587": 428612096.0,
+            "1588": 428612096.0,
+            "1589": 428612096.0,
+            "1590": 428612096.0,
+            "1591": 428612096.0,
+            "1592": 428612096.0,
+            "1593": 428612096.0,
+            "1594": 428612096.0,
+            "1595": 428612096.0,
+            "1596": 428612096.0,
+            "1597": 428612096.0,
+            "1598": 428612096.0,
+            "1599": 428612096.0,
+            "1600": 428612096.0,
+            "1601": 428612096.0,
+            "1602": 428612096.0,
+            "1603": 428612096.0,
+            "1604": 428612096.0,
+            "1605": 428612096.0,
+            "1606": 428612096.0,
+            "1607": 428612096.0,
+            "1608": 428612096.0,
+            "1609": 428612096.0,
+            "1610": 428612096.0,
+            "1611": 428612096.0,
+            "1612": 428612096.0,
+            "1613": 428612096.0,
+            "1614": 428612096.0,
+            "1615": 428612096.0,
+            "1616": 428612096.0,
+            "1617": 428612096.0,
+            "1618": 428612096.0,
+            "1619": 428612096.0,
+            "1620": 428612096.0,
+            "1621": 428612096.0,
+            "1622": 428612096.0,
+            "1623": 428612096.0,
+            "1624": 428612096.0,
+            "1625": 428612096.0,
+            "1626": 428612096.0,
+            "1627": 428612096.0,
+            "1628": 428612096.0,
+            "1629": 428612096.0,
+            "1630": 428612096.0,
+            "1631": 428612096.0,
+            "1632": 428612096.0,
+            "1633": 428612096.0,
+            "1634": 428612096.0,
+            "1635": 428612096.0,
+            "1636": 428612096.0,
+            "1637": 428612096.0,
+            "1638": 428612096.0,
+            "1639": 428612096.0,
+            "1640": 428612096.0,
+            "1641": 428612096.0,
+            "1642": 428612096.0,
+            "1643": 428612096.0,
+            "1644": 428612096.0,
+            "1645": 428612096.0,
+            "1646": 428612096.0,
+            "1647": 428612096.0,
+            "1648": 428612096.0,
+            "1649": 428612096.0,
+            "1650": 428612096.0,
+            "1651": 428612096.0,
+            "1652": 428612096.0,
+            "1653": 428612096.0,
+            "1654": 428612096.0,
+            "1655": 428612096.0,
+            "1656": 428612096.0,
+            "1657": 428612096.0,
+            "1658": 428612096.0,
+            "1659": 428612096.0,
+            "1660": 428612096.0,
+            "1661": 428612096.0,
+            "1662": 428612096.0,
+            "1663": 428612096.0,
+            "1664": 428612096.0,
+            "1665": 428612096.0,
+            "1666": 428612096.0,
+            "1667": 428612096.0,
+            "1668": 428612096.0,
+            "1669": 428612096.0,
+            "1670": 428612096.0,
+            "1671": 428612096.0,
+            "1672": 428612096.0,
+            "1673": 428612096.0,
+            "1674": 428612096.0,
+            "1675": 428612096.0,
+            "1676": 428612096.0,
+            "1677": 428612096.0,
+            "1678": 428612096.0,
+            "1679": 428612096.0,
+            "1680": 428612096.0,
+            "1681": 428612096.0,
+            "1682": 428612096.0,
+            "1683": 428612096.0,
+            "1684": 428612096.0,
+            "1685": 428612096.0,
+            "1686": 428612096.0,
+            "1687": 428612096.0,
+            "1688": 428612096.0,
+            "1689": 428612096.0,
+            "1690": 428612096.0,
+            "1691": 428612096.0,
+            "1692": 428612096.0,
+            "1693": 428612096.0,
+            "1694": 428612096.0,
+            "1695": 428612096.0,
+            "1696": 428612096.0,
+            "1697": 428612096.0,
+            "1698": 428612096.0,
+            "1699": 428612096.0,
+            "1700": 428612096.0,
+            "1701": 428612096.0,
+            "1702": 428612096.0,
+            "1703": 428612096.0,
+            "1704": 428612096.0,
+            "1705": 428612096.0,
+            "1706": 428612096.0,
+            "1707": 428612096.0,
+            "1708": 428612096.0,
+            "1709": 428612096.0,
+            "1710": 428612096.0,
+            "1711": 428612096.0,
+            "1712": 428612096.0,
+            "1713": 428612096.0,
+            "1714": 428612096.0,
+            "1715": 428612096.0,
+            "1716": 428612096.0,
+            "1717": 428612096.0,
+            "1718": 428612096.0,
+            "1719": 428612096.0,
+            "1720": 428612096.0,
+            "1721": 428612096.0,
+            "1722": 428612096.0,
+            "1723": 428612096.0,
+            "1724": 428612096.0,
+            "1725": 428612096.0,
+            "1726": 428612096.0,
+            "1727": 428612096.0,
+            "1728": 428612096.0,
+            "1729": 428612096.0,
+            "1730": 428612096.0,
+            "1731": 428612096.0,
+            "1732": 428612096.0,
+            "1733": 428612096.0,
+            "1734": 428612096.0,
+            "1735": 428612096.0,
+            "1736": 428612096.0,
+            "1737": 428612096.0,
+            "1738": 428612096.0,
+            "1739": 428612096.0,
+            "1740": 428612096.0,
+            "1741": 428612096.0,
+            "1742": 428612096.0,
+            "1743": 428612096.0,
+            "1744": 428612096.0,
+            "1745": 428612096.0,
+            "1746": 428612096.0,
+            "1747": 428612096.0,
+            "1748": 428612096.0,
+            "1749": 428612096.0,
+            "1750": 428612096.0,
+            "1751": 428612096.0,
+            "1752": 428612096.0,
+            "1753": 428612096.0,
+            "1754": 428612096.0,
+            "1755": 428612096.0,
+            "1756": 428612096.0,
+            "1757": 428612096.0,
+            "1758": 428612096.0,
+            "1759": 428612096.0,
+            "1760": 428612096.0,
+            "1761": 428612096.0,
+            "1762": 428612096.0,
+            "1763": 428612096.0,
+            "1764": 428612096.0,
+            "1765": 428612096.0,
+            "1766": 428612096.0,
+            "1767": 428612096.0,
+            "1768": 428612096.0,
+            "1769": 428612096.0,
+            "1770": 428612096.0,
+            "1771": 428612096.0,
+            "1772": 428612096.0,
+            "1773": 428612096.0,
+            "1774": 428612096.0,
+            "1775": 428612096.0,
+            "1776": 428612096.0,
+            "1777": 428612096.0,
+            "1778": 428612096.0,
+            "1779": 428612096.0,
+            "1780": 428612096.0,
+            "1781": 428612096.0,
+            "1782": 428612096.0,
+            "1783": 428612096.0,
+            "1784": 428612096.0,
+            "1785": 428612096.0,
+            "1786": 428612096.0,
+            "1787": 428612096.0,
+            "1788": 428612096.0,
+            "1789": 428612096.0,
+            "1790": 428612096.0,
+            "1791": 428612096.0,
+            "1792": 428612096.0,
+            "1793": 428612096.0,
+            "1794": 428612096.0,
+            "1795": 428612096.0,
+            "1796": 428612096.0,
+            "1797": 428612096.0,
+            "1798": 428612096.0,
+            "1799": 428612096.0,
+            "1800": 428612096.0,
+            "1801": 428612096.0,
+            "1802": 428612096.0,
+            "1803": 428612096.0,
+            "1804": 428612096.0,
+            "1805": 428612096.0,
+            "1806": 428612096.0,
+            "1807": 428612096.0,
+            "1808": 428612096.0,
+            "1809": 428612096.0,
+            "1810": 428612096.0,
+            "1811": 428612096.0,
+            "1812": 428612096.0,
+            "1813": 428612096.0,
+            "1814": 428612096.0,
+            "1815": 428612096.0,
+            "1816": 428612096.0,
+            "1817": 428612096.0,
+            "1818": 428612096.0,
+            "1819": 428612096.0,
+            "1820": 428612096.0,
+            "1821": 428612096.0,
+            "1822": 428612096.0,
+            "1823": 428612096.0,
+            "1824": 428612096.0,
+            "1825": 428612096.0,
+            "1826": 428612096.0,
+            "1827": 428612096.0,
+            "1828": 428612096.0,
+            "1829": 428612096.0,
+            "1830": 428612096.0,
+            "1831": 428612096.0,
+            "1832": 428612096.0,
+            "1833": 428612096.0,
+            "1834": 428612096.0,
+            "1835": 428612096.0,
+            "1836": 428612096.0,
+            "1837": 428612096.0,
+            "1838": 428612096.0,
+            "1839": 428612096.0,
+            "1840": 428612096.0,
+            "1841": 428612096.0,
+            "1842": 428612096.0,
+            "1843": 428612096.0,
+            "1844": 428612096.0,
+            "1845": 428612096.0,
+            "1846": 428612096.0,
+            "1847": 428612096.0,
+            "1848": 428612096.0,
+            "1849": 428612096.0,
+            "1850": 428612096.0,
+            "1851": 428612096.0,
+            "1852": 428612096.0,
+            "1853": 428612096.0,
+            "1854": 428612096.0,
+            "1855": 428612096.0,
+            "1856": 428612096.0,
+            "1857": 428612096.0,
+            "1858": 428612096.0,
+            "1859": 428612096.0,
+            "1860": 428612096.0,
+            "1861": 428612096.0,
+            "1862": 428612096.0,
+            "1863": 428612096.0,
+            "1864": 428612096.0,
+            "1865": 428612096.0,
+            "1866": 428612096.0,
+            "1867": 428612096.0,
+            "1868": 428612096.0,
+            "1869": 428612096.0,
+            "1870": 428612096.0,
+            "1871": 428612096.0,
+            "1872": 428612096.0,
+            "1873": 428612096.0,
+            "1874": 428612096.0,
+            "1875": 428612096.0,
+            "1876": 428612096.0,
+            "1877": 428612096.0,
+            "1878": 428612096.0,
+            "1879": 428612096.0,
+            "1880": 428612096.0,
+            "1881": 428612096.0,
+            "1882": 428612096.0,
+            "1883": 428612096.0,
+            "1884": 428612096.0,
+            "1885": 428612096.0,
+            "1886": 428612096.0,
+            "1887": 428612096.0,
+            "1888": 428612096.0,
+            "1889": 428612096.0,
+            "1890": 428612096.0,
+            "1891": 428612096.0,
+            "1892": 428612096.0,
+            "1893": 428612096.0,
+            "1894": 428612096.0,
+            "1895": 428612096.0,
+            "1896": 428612096.0,
+            "1897": 428612096.0,
+            "1898": 428612096.0,
+            "1899": 428612096.0,
+            "1900": 428612096.0,
+            "1901": 428612096.0,
+            "1902": 428612096.0,
+            "1903": 428612096.0,
+            "1904": 428612096.0,
+            "1905": 428612096.0,
+            "1906": 428612096.0,
+            "1907": 428612096.0,
+            "1908": 428612096.0,
+            "1909": 428612096.0,
+            "1910": 428612096.0,
+            "1911": 428612096.0,
+            "1912": 428612096.0,
+            "1913": 428612096.0,
+            "1914": 428612096.0,
+            "1915": 428612096.0,
+            "1916": 428612096.0,
+            "1917": 428612096.0,
+            "1918": 428612096.0,
+            "1919": 428612096.0,
+            "1920": 428612096.0,
+            "1921": 428612096.0,
+            "1922": 428612096.0,
+            "1923": 428612096.0,
+            "1924": 428612096.0,
+            "1925": 428612096.0,
+            "1926": 428612096.0,
+            "1927": 428612096.0,
+            "1928": 428612096.0,
+            "1929": 428612096.0,
+            "1930": 428612096.0,
+            "1931": 428612096.0,
+            "1932": 428612096.0,
+            "1933": 428612096.0,
+            "1934": 428612096.0,
+            "1935": 428612096.0,
+            "1936": 428612096.0,
+            "1937": 428612096.0,
+            "1938": 428612096.0,
+            "1939": 428612096.0,
+            "1940": 428612096.0,
+            "1941": 428612096.0,
+            "1942": 428612096.0,
+            "1943": 428612096.0,
+            "1944": 428612096.0,
+            "1945": 428612096.0,
+            "1946": 428612096.0,
+            "1947": 428612096.0,
+            "1948": 428612096.0,
+            "1949": 428612096.0,
+            "1950": 428612096.0,
+            "1951": 428612096.0,
+            "1952": 428612096.0,
+            "1953": 428612096.0,
+            "1954": 428612096.0,
+            "1955": 428612096.0,
+            "1956": 428612096.0,
+            "1957": 428612096.0,
+            "1958": 428612096.0,
+            "1959": 428612096.0,
+            "1960": 428612096.0,
+            "1961": 428612096.0,
+            "1962": 428612096.0,
+            "1963": 428612096.0,
+            "1964": 428612096.0,
+            "1965": 428612096.0,
+            "1966": 428612096.0,
+            "1967": 428612096.0,
+            "1968": 428612096.0,
+            "1969": 428612096.0,
+            "1970": 428612096.0,
+            "1971": 428612096.0,
+            "1972": 428612096.0,
+            "1973": 428612096.0,
+            "1974": 428612096.0,
+            "1975": 428612096.0,
+            "1976": 428612096.0,
+            "1977": 428612096.0,
+            "1978": 428612096.0,
+            "1979": 428612096.0,
+            "1980": 428612096.0,
+            "1981": 428612096.0,
+            "1982": 428612096.0,
+            "1983": 428612096.0,
+            "1984": 428612096.0,
+            "1985": 428612096.0,
+            "1986": 428612096.0,
+            "1987": 428612096.0,
+            "1988": 428612096.0,
+            "1989": 428612096.0,
+            "1990": 428612096.0,
+            "1991": 428612096.0,
+            "1992": 428612096.0,
+            "1993": 428612096.0,
+            "1994": 428612096.0,
+            "1995": 428612096.0,
+            "1996": 428612096.0,
+            "1997": 428612096.0,
+            "1998": 428612096.0,
+            "1999": 428612096.0,
+            "2000": 428612096.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 2000,
+        "step_interval": 1,
+        "values": {
+            "1": 22.43653,
+            "2": 5.05,
+            "3": 4.99632,
+            "4": 5.00941,
+            "5": 5.30047,
+            "6": 5.00529,
+            "7": 4.98693,
+            "8": 5.03236,
+            "9": 5.04733,
+            "10": 5.0355,
+            "11": 5.05504,
+            "12": 5.02789,
+            "13": 5.05026,
+            "14": 5.03817,
+            "15": 5.03065,
+            "16": 5.04414,
+            "17": 5.00251,
+            "18": 4.9928,
+            "19": 4.99792,
+            "20": 4.99648,
+            "21": 5.01668,
+            "22": 4.97973,
+            "23": 5.06379,
+            "24": 5.01631,
+            "25": 4.96187,
+            "26": 4.95004,
+            "27": 4.95649,
+            "28": 4.93702,
+            "29": 4.93675,
+            "30": 4.92101,
+            "31": 4.93325,
+            "32": 4.92626,
+            "33": 4.93256,
+            "34": 4.93518,
+            "35": 4.95011,
+            "36": 4.959,
+            "37": 5.41549,
+            "38": 5.7108,
+            "39": 4.96475,
+            "40": 4.95756,
+            "41": 5.03533,
+            "42": 4.94591,
+            "43": 5.30856,
+            "44": 4.93166,
+            "45": 5.29533,
+            "46": 6.02838,
+            "47": 4.99271,
+            "48": 4.93548,
+            "49": 4.93262,
+            "50": 4.93589,
+            "51": 4.93457,
+            "52": 4.9402,
+            "53": 4.93593,
+            "54": 4.93266,
+            "55": 4.93457,
+            "56": 4.926,
+            "57": 4.94015,
+            "58": 4.93606,
+            "59": 4.92819,
+            "60": 4.92679,
+            "61": 4.92853,
+            "62": 4.93744,
+            "63": 4.93014,
+            "64": 4.92895,
+            "65": 4.92774,
+            "66": 4.9263,
+            "67": 4.92483,
+            "68": 4.91654,
+            "69": 4.95386,
+            "70": 4.95969,
+            "71": 4.97371,
+            "72": 4.96736,
+            "73": 4.98575,
+            "74": 4.968,
+            "75": 5.68071,
+            "76": 4.98487,
+            "77": 4.98651,
+            "78": 4.97441,
+            "79": 4.97854,
+            "80": 4.97886,
+            "81": 4.98163,
+            "82": 4.97647,
+            "83": 5.33849,
+            "84": 4.98394,
+            "85": 4.98,
+            "86": 4.96888,
+            "87": 4.9685,
+            "88": 5.33167,
+            "89": 5.40565,
+            "90": 4.97724,
+            "91": 6.05451,
+            "92": 4.9699,
+            "93": 4.96947,
+            "94": 4.97853,
+            "95": 5.03234,
+            "96": 4.9703,
+            "97": 4.9766,
+            "98": 4.96386,
+            "99": 4.97968,
+            "100": 4.96583,
+            "101": 4.956,
+            "102": 4.94425,
+            "103": 4.96789,
+            "104": 4.96252,
+            "105": 4.97853,
+            "106": 4.98313,
+            "107": 4.98,
+            "108": 4.97528,
+            "109": 4.98226,
+            "110": 4.98532,
+            "111": 4.95791,
+            "112": 4.95409,
+            "113": 5.66529,
+            "114": 4.96347,
+            "115": 4.99625,
+            "116": 4.99199,
+            "117": 4.98823,
+            "118": 4.98114,
+            "119": 4.97652,
+            "120": 4.98449,
+            "121": 4.98578,
+            "122": 4.98423,
+            "123": 4.9824,
+            "124": 4.98111,
+            "125": 4.98291,
+            "126": 4.98215,
+            "127": 4.98484,
+            "128": 5.35151,
+            "129": 4.9912,
+            "130": 4.99188,
+            "131": 4.98662,
+            "132": 5.34041,
+            "133": 4.98063,
+            "134": 5.33235,
+            "135": 5.69907,
+            "136": 5.33587,
+            "137": 4.98509,
+            "138": 5.61624,
+            "139": 5.23864,
+            "140": 4.92839,
+            "141": 4.95868,
+            "142": 4.93611,
+            "143": 4.9473,
+            "144": 4.9282,
+            "145": 4.93563,
+            "146": 4.92822,
+            "147": 4.94205,
+            "148": 4.94037,
+            "149": 4.93429,
+            "150": 5.62642,
+            "151": 4.93794,
+            "152": 4.9323,
+            "153": 4.93391,
+            "154": 4.93581,
+            "155": 4.93177,
+            "156": 4.93719,
+            "157": 4.93775,
+            "158": 4.93223,
+            "159": 4.9449,
+            "160": 4.93898,
+            "161": 4.94198,
+            "162": 4.9436,
+            "163": 4.9355,
+            "164": 4.93432,
+            "165": 4.9382,
+            "166": 4.94332,
+            "167": 4.93425,
+            "168": 4.93189,
+            "169": 4.92717,
+            "170": 4.94393,
+            "171": 4.94517,
+            "172": 4.92976,
+            "173": 5.303,
+            "174": 4.92818,
+            "175": 4.92924,
+            "176": 4.9385,
+            "177": 5.27801,
+            "178": 4.93182,
+            "179": 5.28092,
+            "180": 5.99722,
+            "181": 4.92656,
+            "182": 4.92594,
+            "183": 4.92947,
+            "184": 4.93087,
+            "185": 4.92967,
+            "186": 4.93088,
+            "187": 5.62908,
+            "188": 4.93498,
+            "189": 4.9476,
+            "190": 4.93843,
+            "191": 4.94101,
+            "192": 4.93265,
+            "193": 4.93046,
+            "194": 4.93133,
+            "195": 4.94044,
+            "196": 4.93997,
+            "197": 4.93336,
+            "198": 6.32096,
+            "199": 4.95042,
+            "200": 4.91888,
+            "201": 4.91803,
+            "202": 4.92212,
+            "203": 4.91738,
+            "204": 4.93431,
+            "205": 4.93078,
+            "206": 4.9288,
+            "207": 4.9431,
+            "208": 4.93288,
+            "209": 4.93152,
+            "210": 4.92297,
+            "211": 4.92152,
+            "212": 4.92078,
+            "213": 4.93382,
+            "214": 4.92203,
+            "215": 4.92628,
+            "216": 4.92759,
+            "217": 4.91972,
+            "218": 4.93018,
+            "219": 5.30587,
+            "220": 4.92639,
+            "221": 4.92815,
+            "222": 5.28345,
+            "223": 4.93513,
+            "224": 5.62954,
+            "225": 6.35198,
+            "226": 4.94108,
+            "227": 4.94033,
+            "228": 4.94077,
+            "229": 4.9445,
+            "230": 4.95277,
+            "231": 4.93684,
+            "232": 4.94258,
+            "233": 4.9386,
+            "234": 4.94149,
+            "235": 4.94872,
+            "236": 4.95361,
+            "237": 4.94924,
+            "238": 4.93722,
+            "239": 4.94342,
+            "240": 4.95029,
+            "241": 4.94512,
+            "242": 4.9423,
+            "243": 4.93861,
+            "244": 4.93578,
+            "245": 4.93502,
+            "246": 4.94519,
+            "247": 4.93658,
+            "248": 4.93761,
+            "249": 4.94583,
+            "250": 4.94414,
+            "251": 4.94331,
+            "252": 4.94044,
+            "253": 4.94317,
+            "254": 4.94161,
+            "255": 4.95295,
+            "256": 4.95044,
+            "257": 4.94816,
+            "258": 4.94006,
+            "259": 4.94409,
+            "260": 4.9408,
+            "261": 4.94791,
+            "262": 5.63079,
+            "263": 4.95361,
+            "264": 5.3219,
+            "265": 4.96046,
+            "266": 4.95564,
+            "267": 5.30372,
+            "268": 5.30618,
+            "269": 4.94954,
+            "270": 6.01622,
+            "271": 4.9509,
+            "272": 4.9579,
+            "273": 4.9529,
+            "274": 4.95339,
+            "275": 4.94721,
+            "276": 4.95053,
+            "277": 4.9434,
+            "278": 4.9389,
+            "279": 4.94021,
+            "280": 4.93862,
+            "281": 4.93834,
+            "282": 4.93985,
+            "283": 4.94183,
+            "284": 4.93716,
+            "285": 4.9443,
+            "286": 4.94305,
+            "287": 4.93467,
+            "288": 4.93816,
+            "289": 4.93749,
+            "290": 4.9349,
+            "291": 4.939,
+            "292": 4.93482,
+            "293": 4.94665,
+            "294": 4.93648,
+            "295": 4.93823,
+            "296": 4.93522,
+            "297": 4.93472,
+            "298": 4.93288,
+            "299": 5.61551,
+            "300": 4.95418,
+            "301": 4.95347,
+            "302": 4.95005,
+            "303": 4.95224,
+            "304": 5.01672,
+            "305": 4.94451,
+            "306": 4.9469,
+            "307": 4.94674,
+            "308": 4.95506,
+            "309": 5.3147,
+            "310": 4.97913,
+            "311": 5.29357,
+            "312": 4.94239,
+            "313": 5.28356,
+            "314": 5.66502,
+            "315": 5.29945,
+            "316": 4.94213,
+            "317": 4.93439,
+            "318": 4.94085,
+            "319": 4.93452,
+            "320": 4.94083,
+            "321": 4.93407,
+            "322": 4.93596,
+            "323": 4.9411,
+            "324": 4.94091,
+            "325": 4.93723,
+            "326": 4.93682,
+            "327": 4.93712,
+            "328": 4.99643,
+            "329": 4.94011,
+            "330": 4.93777,
+            "331": 4.93553,
+            "332": 4.938,
+            "333": 4.94101,
+            "334": 4.93199,
+            "335": 4.93179,
+            "336": 5.28612,
+            "337": 5.30266,
+            "338": 4.96477,
+            "339": 4.97585,
+            "340": 4.95959,
+            "341": 4.95912,
+            "342": 4.96594,
+            "343": 4.96105,
+            "344": 4.96501,
+            "345": 4.96175,
+            "346": 4.96452,
+            "347": 4.9603,
+            "348": 4.95434,
+            "349": 4.95658,
+            "350": 4.95773,
+            "351": 4.96723,
+            "352": 5.02353,
+            "353": 4.95487,
+            "354": 5.32227,
+            "355": 4.95601,
+            "356": 5.29598,
+            "357": 4.95819,
+            "358": 5.29935,
+            "359": 6.01593,
+            "360": 4.96832,
+            "361": 4.95302,
+            "362": 4.95944,
+            "363": 4.95167,
+            "364": 4.9483,
+            "365": 4.94951,
+            "366": 4.9525,
+            "367": 4.95364,
+            "368": 4.94948,
+            "369": 4.95258,
+            "370": 4.94974,
+            "371": 4.96357,
+            "372": 4.94701,
+            "373": 4.94584,
+            "374": 5.27688,
+            "375": 5.29329,
+            "376": 4.93553,
+            "377": 4.93296,
+            "378": 4.93431,
+            "379": 4.94158,
+            "380": 4.98441,
+            "381": 4.99657,
+            "382": 4.97634,
+            "383": 4.98015,
+            "384": 4.98178,
+            "385": 4.97595,
+            "386": 4.97431,
+            "387": 4.97965,
+            "388": 4.91884,
+            "389": 4.92436,
+            "390": 4.9179,
+            "391": 4.91999,
+            "392": 4.92113,
+            "393": 4.92231,
+            "394": 4.91815,
+            "395": 4.92381,
+            "396": 4.91848,
+            "397": 4.92412,
+            "398": 4.91541,
+            "399": 4.91455,
+            "400": 5.29982,
+            "401": 5.26416,
+            "402": 5.2612,
+            "403": 4.91795,
+            "404": 5.63316,
+            "405": 5.27153,
+            "406": 4.90744,
+            "407": 4.9142,
+            "408": 4.90831,
+            "409": 4.90838,
+            "410": 4.92063,
+            "411": 5.25377,
+            "412": 5.26322,
+            "413": 4.91895,
+            "414": 4.92378,
+            "415": 4.91866,
+            "416": 4.91955,
+            "417": 4.92152,
+            "418": 4.91929,
+            "419": 4.9201,
+            "420": 4.91526,
+            "421": 4.91974,
+            "422": 4.92503,
+            "423": 4.92579,
+            "424": 4.91791,
+            "425": 4.92253,
+            "426": 4.92114,
+            "427": 4.91774,
+            "428": 4.91171,
+            "429": 4.9125,
+            "430": 4.91411,
+            "431": 4.90802,
+            "432": 4.9164,
+            "433": 4.90723,
+            "434": 4.92382,
+            "435": 4.9069,
+            "436": 4.91154,
+            "437": 4.90512,
+            "438": 4.9175,
+            "439": 4.91782,
+            "440": 4.91028,
+            "441": 4.91048,
+            "442": 4.90894,
+            "443": 4.88817,
+            "444": 4.88126,
+            "445": 5.24853,
+            "446": 4.87836,
+            "447": 5.24263,
+            "448": 5.25398,
+            "449": 6.28763,
+            "450": 4.88338,
+            "451": 4.89491,
+            "452": 4.88709,
+            "453": 4.89008,
+            "454": 4.90322,
+            "455": 4.90113,
+            "456": 4.90439,
+            "457": 4.90223,
+            "458": 4.90641,
+            "459": 4.90851,
+            "460": 4.9009,
+            "461": 4.89968,
+            "462": 4.89662,
+            "463": 4.9081,
+            "464": 4.88866,
+            "465": 4.90253,
+            "466": 4.90724,
+            "467": 4.89875,
+            "468": 4.90067,
+            "469": 4.90495,
+            "470": 4.89887,
+            "471": 4.89965,
+            "472": 4.90145,
+            "473": 4.88549,
+            "474": 4.87833,
+            "475": 4.88274,
+            "476": 4.87937,
+            "477": 4.88019,
+            "478": 4.87808,
+            "479": 4.88269,
+            "480": 4.87591,
+            "481": 4.88072,
+            "482": 4.87452,
+            "483": 4.8839,
+            "484": 4.87834,
+            "485": 5.21963,
+            "486": 4.8887,
+            "487": 5.22473,
+            "488": 4.88748,
+            "489": 4.89663,
+            "490": 5.6108,
+            "491": 5.24875,
+            "492": 4.88583,
+            "493": 5.24488,
+            "494": 5.59516,
+            "495": 4.89058,
+            "496": 4.91601,
+            "497": 4.88752,
+            "498": 4.88645,
+            "499": 4.89008,
+            "500": 4.89271,
+            "501": 4.8913,
+            "502": 4.89039,
+            "503": 4.8906,
+            "504": 4.88603,
+            "505": 4.92691,
+            "506": 4.91793,
+            "507": 4.92158,
+            "508": 4.91981,
+            "509": 4.92795,
+            "510": 4.91413,
+            "511": 4.91073,
+            "512": 4.90909,
+            "513": 4.91434,
+            "514": 4.91509,
+            "515": 4.91002,
+            "516": 4.9115,
+            "517": 4.91722,
+            "518": 4.91514,
+            "519": 4.91283,
+            "520": 4.91403,
+            "521": 4.91077,
+            "522": 4.91167,
+            "523": 5.26088,
+            "524": 5.27803,
+            "525": 4.92516,
+            "526": 4.93143,
+            "527": 4.9217,
+            "528": 4.92344,
+            "529": 4.91786,
+            "530": 4.9193,
+            "531": 4.881,
+            "532": 4.87697,
+            "533": 4.88329,
+            "534": 5.23628,
+            "535": 5.26149,
+            "536": 4.88132,
+            "537": 5.23366,
+            "538": 5.92272,
+            "539": 4.8822,
+            "540": 4.87645,
+            "541": 4.87941,
+            "542": 4.8726,
+            "543": 4.87977,
+            "544": 4.88572,
+            "545": 4.97915,
+            "546": 4.94014,
+            "547": 4.9447,
+            "548": 4.94585,
+            "549": 4.93712,
+            "550": 4.95428,
+            "551": 4.9405,
+            "552": 4.94013,
+            "553": 4.94514,
+            "554": 4.94542,
+            "555": 4.94729,
+            "556": 4.93818,
+            "557": 4.94632,
+            "558": 4.95928,
+            "559": 4.94439,
+            "560": 5.29538,
+            "561": 5.29912,
+            "562": 4.95591,
+            "563": 4.94545,
+            "564": 4.9589,
+            "565": 4.9486,
+            "566": 4.94487,
+            "567": 4.94563,
+            "568": 4.96795,
+            "569": 4.96332,
+            "570": 4.95731,
+            "571": 4.95751,
+            "572": 4.94401,
+            "573": 4.94623,
+            "574": 4.9438,
+            "575": 4.9342,
+            "576": 4.93847,
+            "577": 4.94215,
+            "578": 4.94036,
+            "579": 4.95135,
+            "580": 5.28996,
+            "581": 5.66625,
+            "582": 4.93892,
+            "583": 5.64719,
+            "584": 5.28091,
+            "585": 4.95827,
+            "586": 4.95725,
+            "587": 4.96107,
+            "588": 4.95092,
+            "589": 4.95514,
+            "590": 4.94845,
+            "591": 4.94342,
+            "592": 4.9488,
+            "593": 4.93576,
+            "594": 4.93657,
+            "595": 4.93545,
+            "596": 4.93595,
+            "597": 5.29319,
+            "598": 5.28921,
+            "599": 4.95347,
+            "600": 4.94896,
+            "601": 4.94543,
+            "602": 4.95405,
+            "603": 4.94996,
+            "604": 4.94726,
+            "605": 4.94394,
+            "606": 4.9443,
+            "607": 4.99448,
+            "608": 4.93032,
+            "609": 4.96191,
+            "610": 4.95086,
+            "611": 4.94486,
+            "612": 4.94403,
+            "613": 4.94194,
+            "614": 4.94624,
+            "615": 4.94461,
+            "616": 4.96458,
+            "617": 4.94658,
+            "618": 4.94254,
+            "619": 4.93901,
+            "620": 4.94138,
+            "621": 4.94747,
+            "622": 4.95796,
+            "623": 4.94579,
+            "624": 5.30372,
+            "625": 4.94082,
+            "626": 5.66834,
+            "627": 4.93994,
+            "628": 5.97473,
+            "629": 4.94152,
+            "630": 4.94328,
+            "631": 4.9385,
+            "632": 4.9688,
+            "633": 4.93837,
+            "634": 5.25732,
+            "635": 4.9147,
+            "636": 5.25839,
+            "637": 4.92259,
+            "638": 4.91081,
+            "639": 4.92229,
+            "640": 4.92687,
+            "641": 4.91335,
+            "642": 4.91557,
+            "643": 4.91922,
+            "644": 4.91847,
+            "645": 4.92121,
+            "646": 4.92251,
+            "647": 4.91255,
+            "648": 4.91291,
+            "649": 4.91003,
+            "650": 4.90867,
+            "651": 4.91235,
+            "652": 4.90719,
+            "653": 4.90865,
+            "654": 4.90719,
+            "655": 4.91306,
+            "656": 4.90861,
+            "657": 4.90901,
+            "658": 4.91095,
+            "659": 4.90726,
+            "660": 4.90915,
+            "661": 4.91011,
+            "662": 4.90721,
+            "663": 4.90907,
+            "664": 4.91699,
+            "665": 4.91095,
+            "666": 4.90826,
+            "667": 4.90687,
+            "668": 4.90738,
+            "669": 5.25716,
+            "670": 5.25453,
+            "671": 5.28603,
+            "672": 5.25386,
+            "673": 6.29304,
+            "674": 4.91719,
+            "675": 4.9174,
+            "676": 4.92014,
+            "677": 4.92048,
+            "678": 4.90878,
+            "679": 4.90967,
+            "680": 4.90981,
+            "681": 4.91054,
+            "682": 4.90885,
+            "683": 4.90932,
+            "684": 4.915,
+            "685": 4.90701,
+            "686": 4.91124,
+            "687": 4.91733,
+            "688": 4.91577,
+            "689": 4.91189,
+            "690": 4.90854,
+            "691": 4.90631,
+            "692": 4.90689,
+            "693": 4.9142,
+            "694": 4.90933,
+            "695": 4.90064,
+            "696": 4.88962,
+            "697": 4.89317,
+            "698": 4.89665,
+            "699": 4.90473,
+            "700": 4.90675,
+            "701": 4.90072,
+            "702": 4.90347,
+            "703": 4.90535,
+            "704": 4.90243,
+            "705": 4.90653,
+            "706": 4.90494,
+            "707": 4.90715,
+            "708": 4.89971,
+            "709": 5.25068,
+            "710": 5.24447,
+            "711": 4.91173,
+            "712": 4.91607,
+            "713": 5.26011,
+            "714": 4.90966,
+            "715": 4.90512,
+            "716": 5.63181,
+            "717": 5.62011,
+            "718": 5.23301,
+            "719": 4.91317,
+            "720": 4.90779,
+            "721": 4.90675,
+            "722": 4.90612,
+            "723": 4.90554,
+            "724": 4.90952,
+            "725": 4.90669,
+            "726": 4.90589,
+            "727": 4.9062,
+            "728": 4.91028,
+            "729": 4.905,
+            "730": 4.90848,
+            "731": 4.90621,
+            "732": 4.91216,
+            "733": 4.90248,
+            "734": 4.90051,
+            "735": 4.90319,
+            "736": 4.90401,
+            "737": 4.90646,
+            "738": 4.90558,
+            "739": 4.90438,
+            "740": 4.90694,
+            "741": 4.9036,
+            "742": 4.90521,
+            "743": 4.90326,
+            "744": 4.90534,
+            "745": 4.90658,
+            "746": 5.24876,
+            "747": 4.91293,
+            "748": 5.24944,
+            "749": 4.90712,
+            "750": 4.90572,
+            "751": 4.90977,
+            "752": 4.90683,
+            "753": 4.90815,
+            "754": 4.90611,
+            "755": 4.91427,
+            "756": 4.9129,
+            "757": 4.91264,
+            "758": 5.25755,
+            "759": 4.91199,
+            "760": 5.2647,
+            "761": 4.91559,
+            "762": 5.64712,
+            "763": 5.59149,
+            "764": 4.91566,
+            "765": 4.91348,
+            "766": 4.92052,
+            "767": 4.9149,
+            "768": 4.91624,
+            "769": 4.90919,
+            "770": 4.9208,
+            "771": 4.9111,
+            "772": 4.91242,
+            "773": 4.91183,
+            "774": 4.91856,
+            "775": 4.91524,
+            "776": 4.91642,
+            "777": 4.91271,
+            "778": 4.91587,
+            "779": 4.91173,
+            "780": 4.9163,
+            "781": 4.9101,
+            "782": 4.90927,
+            "783": 4.91594,
+            "784": 5.27562,
+            "785": 5.29399,
+            "786": 4.92064,
+            "787": 4.92508,
+            "788": 4.91936,
+            "789": 4.92025,
+            "790": 4.92839,
+            "791": 4.91829,
+            "792": 4.9234,
+            "793": 4.92615,
+            "794": 4.91968,
+            "795": 4.91417,
+            "796": 4.89214,
+            "797": 4.87642,
+            "798": 4.87726,
+            "799": 4.88691,
+            "800": 4.87753,
+            "801": 4.90361,
+            "802": 4.91538,
+            "803": 5.25822,
+            "804": 5.25769,
+            "805": 4.90985,
+            "806": 4.91228,
+            "807": 5.6423,
+            "808": 5.23836,
+            "809": 4.9314,
+            "810": 4.91226,
+            "811": 4.91382,
+            "812": 4.91588,
+            "813": 4.91005,
+            "814": 4.9202,
+            "815": 4.90766,
+            "816": 4.90744,
+            "817": 4.91497,
+            "818": 4.91,
+            "819": 4.90572,
+            "820": 4.91342,
+            "821": 5.26215,
+            "822": 5.25971,
+            "823": 4.92486,
+            "824": 4.92645,
+            "825": 4.91518,
+            "826": 4.91893,
+            "827": 4.90862,
+            "828": 4.9143,
+            "829": 4.91422,
+            "830": 4.91829,
+            "831": 4.90569,
+            "832": 4.91122,
+            "833": 4.90584,
+            "834": 4.90518,
+            "835": 4.90755,
+            "836": 4.90656,
+            "837": 4.90626,
+            "838": 4.90987,
+            "839": 4.91189,
+            "840": 4.90735,
+            "841": 4.90697,
+            "842": 4.91064,
+            "843": 4.90409,
+            "844": 4.90711,
+            "845": 4.90385,
+            "846": 4.90599,
+            "847": 5.24636,
+            "848": 4.89752,
+            "849": 5.24655,
+            "850": 4.90148,
+            "851": 4.89501,
+            "852": 5.98483,
+            "853": 4.89468,
+            "854": 4.89653,
+            "855": 4.8954,
+            "856": 4.89811,
+            "857": 4.90026,
+            "858": 5.24069,
+            "859": 4.91345,
+            "860": 5.2538,
+            "861": 4.91107,
+            "862": 4.90905,
+            "863": 4.90289,
+            "864": 4.90179,
+            "865": 4.90697,
+            "866": 4.89969,
+            "867": 4.89622,
+            "868": 4.89817,
+            "869": 4.89734,
+            "870": 4.89421,
+            "871": 4.902,
+            "872": 4.89737,
+            "873": 4.90082,
+            "874": 4.8986,
+            "875": 4.9034,
+            "876": 4.90213,
+            "877": 4.89969,
+            "878": 4.90652,
+            "879": 4.90216,
+            "880": 4.90541,
+            "881": 4.90491,
+            "882": 4.89798,
+            "883": 4.89325,
+            "884": 4.89662,
+            "885": 4.91,
+            "886": 4.89481,
+            "887": 4.90025,
+            "888": 4.89887,
+            "889": 4.89458,
+            "890": 4.89351,
+            "891": 4.89343,
+            "892": 5.24625,
+            "893": 4.90075,
+            "894": 5.24719,
+            "895": 4.89439,
+            "896": 5.95508,
+            "897": 5.92842,
+            "898": 4.90126,
+            "899": 4.91443,
+            "900": 4.90222,
+            "901": 4.89928,
+            "902": 4.89952,
+            "903": 4.89905,
+            "904": 4.90536,
+            "905": 4.90627,
+            "906": 4.90188,
+            "907": 4.90671,
+            "908": 4.90531,
+            "909": 4.90614,
+            "910": 4.90319,
+            "911": 4.90668,
+            "912": 4.90614,
+            "913": 4.90641,
+            "914": 4.90219,
+            "915": 4.89858,
+            "916": 4.89788,
+            "917": 4.90114,
+            "918": 4.89062,
+            "919": 4.89675,
+            "920": 4.89412,
+            "921": 4.89851,
+            "922": 4.90258,
+            "923": 4.89837,
+            "924": 4.89168,
+            "925": 4.90558,
+            "926": 4.88926,
+            "927": 4.89631,
+            "928": 4.89481,
+            "929": 4.89896,
+            "930": 4.90349,
+            "931": 4.90254,
+            "932": 4.89424,
+            "933": 5.2393,
+            "934": 4.90447,
+            "935": 5.24957,
+            "936": 4.89799,
+            "937": 5.24757,
+            "938": 4.90497,
+            "939": 5.26023,
+            "940": 4.905,
+            "941": 4.90603,
+            "942": 5.89013,
+            "943": 5.2754,
+            "944": 4.89903,
+            "945": 4.90825,
+            "946": 4.90072,
+            "947": 4.91095,
+            "948": 4.89642,
+            "949": 4.90314,
+            "950": 4.9027,
+            "951": 4.90276,
+            "952": 4.90005,
+            "953": 4.90591,
+            "954": 4.89179,
+            "955": 4.89648,
+            "956": 4.89739,
+            "957": 4.90258,
+            "958": 4.90027,
+            "959": 4.90627,
+            "960": 4.89592,
+            "961": 4.89153,
+            "962": 4.89826,
+            "963": 4.89281,
+            "964": 4.88656,
+            "965": 4.9056,
+            "966": 4.88948,
+            "967": 4.89075,
+            "968": 4.89128,
+            "969": 4.88907,
+            "970": 5.23384,
+            "971": 4.91197,
+            "972": 5.24458,
+            "973": 4.90766,
+            "974": 4.90557,
+            "975": 4.9059,
+            "976": 4.90502,
+            "977": 4.90392,
+            "978": 4.90541,
+            "979": 4.89927,
+            "980": 4.9047,
+            "981": 4.90276,
+            "982": 5.2516,
+            "983": 5.25121,
+            "984": 4.90232,
+            "985": 4.90209,
+            "986": 5.26939,
+            "987": 5.52932,
+            "988": 5.28293,
+            "989": 4.91742,
+            "990": 4.90637,
+            "991": 4.90953,
+            "992": 4.90864,
+            "993": 4.9075,
+            "994": 4.90696,
+            "995": 4.90473,
+            "996": 4.90192,
+            "997": 4.90199,
+            "998": 4.89181,
+            "999": 4.89111,
+            "1000": 4.89025,
+            "1001": 4.9168,
+            "1002": 4.90983,
+            "1003": 4.91875,
+            "1004": 4.90892,
+            "1005": 4.92588,
+            "1006": 4.91678,
+            "1007": 5.262,
+            "1008": 4.92447,
+            "1009": 5.26729,
+            "1010": 4.92803,
+            "1011": 4.92461,
+            "1012": 4.92338,
+            "1013": 4.9218,
+            "1014": 4.92051,
+            "1015": 4.92442,
+            "1016": 4.91248,
+            "1017": 4.92113,
+            "1018": 4.92046,
+            "1019": 4.91949,
+            "1020": 4.92623,
+            "1021": 4.92267,
+            "1022": 4.92249,
+            "1023": 4.91899,
+            "1024": 4.92062,
+            "1025": 5.26804,
+            "1026": 4.92131,
+            "1027": 5.26954,
+            "1028": 4.91856,
+            "1029": 4.91681,
+            "1030": 5.90813,
+            "1031": 4.92456,
+            "1032": 4.92325,
+            "1033": 5.3083,
+            "1034": 4.91916,
+            "1035": 4.91422,
+            "1036": 4.91293,
+            "1037": 4.91223,
+            "1038": 4.9211,
+            "1039": 4.92393,
+            "1040": 4.92009,
+            "1041": 4.92106,
+            "1042": 4.9242,
+            "1043": 4.92005,
+            "1044": 5.26878,
+            "1045": 4.92668,
+            "1046": 4.93095,
+            "1047": 5.27312,
+            "1048": 4.92622,
+            "1049": 4.92229,
+            "1050": 4.92078,
+            "1051": 4.9252,
+            "1052": 4.92398,
+            "1053": 4.92467,
+            "1054": 4.92254,
+            "1055": 4.92721,
+            "1056": 4.92594,
+            "1057": 4.93074,
+            "1058": 4.9202,
+            "1059": 4.92339,
+            "1060": 4.92936,
+            "1061": 4.92316,
+            "1062": 4.91832,
+            "1063": 4.9324,
+            "1064": 4.96238,
+            "1065": 4.94321,
+            "1066": 4.96241,
+            "1067": 4.93128,
+            "1068": 4.92665,
+            "1069": 4.93217,
+            "1070": 5.29473,
+            "1071": 5.27044,
+            "1072": 4.91774,
+            "1073": 4.92979,
+            "1074": 5.30092,
+            "1075": 5.57166,
+            "1076": 4.9336,
+            "1077": 4.91975,
+            "1078": 5.29838,
+            "1079": 4.92345,
+            "1080": 4.92265,
+            "1081": 4.93832,
+            "1082": 5.28966,
+            "1083": 4.94183,
+            "1084": 5.28091,
+            "1085": 4.94506,
+            "1086": 4.94668,
+            "1087": 4.94028,
+            "1088": 4.93858,
+            "1089": 4.93937,
+            "1090": 4.9454,
+            "1091": 4.95599,
+            "1092": 4.95023,
+            "1093": 4.94499,
+            "1094": 4.96028,
+            "1095": 4.95213,
+            "1096": 4.96406,
+            "1097": 4.93905,
+            "1098": 4.92198,
+            "1099": 4.93824,
+            "1100": 4.92789,
+            "1101": 4.92981,
+            "1102": 4.93937,
+            "1103": 4.91985,
+            "1104": 4.91889,
+            "1105": 4.93785,
+            "1106": 4.94007,
+            "1107": 4.93618,
+            "1108": 4.94002,
+            "1109": 4.96964,
+            "1110": 4.93965,
+            "1111": 4.89692,
+            "1112": 4.89611,
+            "1113": 4.89245,
+            "1114": 5.24194,
+            "1115": 4.89604,
+            "1116": 5.23738,
+            "1117": 4.89591,
+            "1118": 4.89712,
+            "1119": 6.2207,
+            "1120": 4.89707,
+            "1121": 5.24025,
+            "1122": 4.89987,
+            "1123": 5.27914,
+            "1124": 4.9043,
+            "1125": 4.89477,
+            "1126": 4.89625,
+            "1127": 4.90132,
+            "1128": 4.90216,
+            "1129": 4.90398,
+            "1130": 4.89594,
+            "1131": 4.90153,
+            "1132": 4.89796,
+            "1133": 4.89536,
+            "1134": 4.89807,
+            "1135": 4.89858,
+            "1136": 4.89867,
+            "1137": 4.89681,
+            "1138": 4.92931,
+            "1139": 4.92599,
+            "1140": 4.89538,
+            "1141": 4.89732,
+            "1142": 4.89242,
+            "1143": 4.89262,
+            "1144": 4.89274,
+            "1145": 4.93085,
+            "1146": 4.9294,
+            "1147": 4.92891,
+            "1148": 4.91881,
+            "1149": 4.89129,
+            "1150": 4.89171,
+            "1151": 4.8862,
+            "1152": 4.89315,
+            "1153": 4.89463,
+            "1154": 4.89481,
+            "1155": 4.89194,
+            "1156": 5.23303,
+            "1157": 4.89025,
+            "1158": 4.89312,
+            "1159": 5.24533,
+            "1160": 5.25573,
+            "1161": 5.23949,
+            "1162": 4.8914,
+            "1163": 4.89247,
+            "1164": 4.8896,
+            "1165": 5.88618,
+            "1166": 4.91824,
+            "1167": 4.89232,
+            "1168": 5.27914,
+            "1169": 4.88638,
+            "1170": 4.89624,
+            "1171": 4.90097,
+            "1172": 4.89335,
+            "1173": 4.90022,
+            "1174": 4.88823,
+            "1175": 4.91533,
+            "1176": 4.91702,
+            "1177": 4.91026,
+            "1178": 4.89204,
+            "1179": 4.89341,
+            "1180": 4.88754,
+            "1181": 4.89101,
+            "1182": 4.89528,
+            "1183": 4.89482,
+            "1184": 4.88208,
+            "1185": 4.87829,
+            "1186": 4.88501,
+            "1187": 4.88593,
+            "1188": 4.87526,
+            "1189": 4.88604,
+            "1190": 4.90872,
+            "1191": 4.88218,
+            "1192": 4.8826,
+            "1193": 4.88606,
+            "1194": 5.22378,
+            "1195": 4.88192,
+            "1196": 4.8877,
+            "1197": 5.23842,
+            "1198": 4.89888,
+            "1199": 4.89039,
+            "1200": 4.89543,
+            "1201": 4.8917,
+            "1202": 4.88928,
+            "1203": 4.88428,
+            "1204": 4.91394,
+            "1205": 5.27535,
+            "1206": 5.27273,
+            "1207": 4.92919,
+            "1208": 4.92498,
+            "1209": 5.60645,
+            "1210": 5.23108,
+            "1211": 4.91823,
+            "1212": 4.91107,
+            "1213": 4.90706,
+            "1214": 5.33395,
+            "1215": 4.91341,
+            "1216": 4.92296,
+            "1217": 4.92797,
+            "1218": 4.91436,
+            "1219": 4.93183,
+            "1220": 4.92763,
+            "1221": 4.91189,
+            "1222": 4.91524,
+            "1223": 4.92927,
+            "1224": 4.90762,
+            "1225": 4.91646,
+            "1226": 4.95199,
+            "1227": 4.93657,
+            "1228": 4.91049,
+            "1229": 4.90576,
+            "1230": 4.92418,
+            "1231": 5.24788,
+            "1232": 4.90922,
+            "1233": 4.90828,
+            "1234": 5.28741,
+            "1235": 4.93359,
+            "1236": 4.92651,
+            "1237": 4.92759,
+            "1238": 4.91812,
+            "1239": 4.96161,
+            "1240": 4.92462,
+            "1241": 4.9408,
+            "1242": 4.95151,
+            "1243": 4.92866,
+            "1244": 4.94942,
+            "1245": 4.93202,
+            "1246": 4.93118,
+            "1247": 4.92787,
+            "1248": 4.93195,
+            "1249": 5.31148,
+            "1250": 4.96525,
+            "1251": 5.27677,
+            "1252": 4.95992,
+            "1253": 4.89092,
+            "1254": 5.87598,
+            "1255": 4.89013,
+            "1256": 4.89328,
+            "1257": 4.88679,
+            "1258": 4.89107,
+            "1259": 5.26785,
+            "1260": 4.89071,
+            "1261": 4.89005,
+            "1262": 4.89216,
+            "1263": 4.89212,
+            "1264": 4.88574,
+            "1265": 4.88902,
+            "1266": 4.88642,
+            "1267": 4.89574,
+            "1268": 4.88631,
+            "1269": 5.22724,
+            "1270": 4.88943,
+            "1271": 5.23761,
+            "1272": 4.90353,
+            "1273": 4.89726,
+            "1274": 4.92161,
+            "1275": 4.92347,
+            "1276": 4.91698,
+            "1277": 4.92233,
+            "1278": 4.91979,
+            "1279": 4.9211,
+            "1280": 4.9179,
+            "1281": 4.92209,
+            "1282": 4.94485,
+            "1283": 4.92932,
+            "1284": 4.92976,
+            "1285": 4.91788,
+            "1286": 4.93408,
+            "1287": 4.92359,
+            "1288": 4.92166,
+            "1289": 4.9185,
+            "1290": 4.91424,
+            "1291": 4.91891,
+            "1292": 4.92028,
+            "1293": 4.9117,
+            "1294": 5.27044,
+            "1295": 5.29676,
+            "1296": 4.91703,
+            "1297": 4.92056,
+            "1298": 4.92207,
+            "1299": 5.91394,
+            "1300": 4.9147,
+            "1301": 4.9131,
+            "1302": 4.9176,
+            "1303": 4.93425,
+            "1304": 5.304,
+            "1305": 4.91978,
+            "1306": 5.27498,
+            "1307": 4.92043,
+            "1308": 4.91675,
+            "1309": 5.27831,
+            "1310": 4.93667,
+            "1311": 4.93075,
+            "1312": 4.92766,
+            "1313": 4.92554,
+            "1314": 4.93753,
+            "1315": 4.93323,
+            "1316": 4.92326,
+            "1317": 4.92226,
+            "1318": 4.9254,
+            "1319": 4.91683,
+            "1320": 4.91352,
+            "1321": 4.93361,
+            "1322": 4.9202,
+            "1323": 4.92888,
+            "1324": 4.94749,
+            "1325": 4.92427,
+            "1326": 4.91993,
+            "1327": 4.94147,
+            "1328": 4.91569,
+            "1329": 4.9082,
+            "1330": 4.90808,
+            "1331": 4.92463,
+            "1332": 4.94304,
+            "1333": 4.91833,
+            "1334": 4.91915,
+            "1335": 4.9569,
+            "1336": 4.91253,
+            "1337": 4.91228,
+            "1338": 4.91599,
+            "1339": 5.26886,
+            "1340": 4.94108,
+            "1341": 5.28895,
+            "1342": 4.92166,
+            "1343": 4.93148,
+            "1344": 6.20454,
+            "1345": 4.93732,
+            "1346": 4.94109,
+            "1347": 5.28178,
+            "1348": 4.92597,
+            "1349": 5.31528,
+            "1350": 4.93124,
+            "1351": 4.9199,
+            "1352": 4.92145,
+            "1353": 4.91761,
+            "1354": 4.91599,
+            "1355": 4.91867,
+            "1356": 4.92286,
+            "1357": 4.91965,
+            "1358": 4.92454,
+            "1359": 4.92188,
+            "1360": 4.91921,
+            "1361": 4.92021,
+            "1362": 4.92372,
+            "1363": 4.91207,
+            "1364": 4.96107,
+            "1365": 4.91388,
+            "1366": 4.91683,
+            "1367": 4.91413,
+            "1368": 4.91691,
+            "1369": 4.91871,
+            "1370": 4.92278,
+            "1371": 4.92605,
+            "1372": 4.92653,
+            "1373": 4.9264,
+            "1374": 4.92864,
+            "1375": 4.92839,
+            "1376": 4.93185,
+            "1377": 4.92304,
+            "1378": 4.92916,
+            "1379": 4.92701,
+            "1380": 4.92797,
+            "1381": 5.27325,
+            "1382": 4.89544,
+            "1383": 4.89064,
+            "1384": 5.60494,
+            "1385": 5.00482,
+            "1386": 5.33879,
+            "1387": 4.92912,
+            "1388": 4.92575,
+            "1389": 5.83703,
+            "1390": 4.91691,
+            "1391": 4.91717,
+            "1392": 4.92005,
+            "1393": 4.92211,
+            "1394": 4.91895,
+            "1395": 5.29903,
+            "1396": 4.92143,
+            "1397": 4.91551,
+            "1398": 4.91427,
+            "1399": 4.91348,
+            "1400": 4.92556,
+            "1401": 4.92553,
+            "1402": 4.91884,
+            "1403": 4.91856,
+            "1404": 4.95579,
+            "1405": 4.88917,
+            "1406": 4.88886,
+            "1407": 4.90262,
+            "1408": 4.88379,
+            "1409": 4.88976,
+            "1410": 4.88681,
+            "1411": 4.8751,
+            "1412": 4.89308,
+            "1413": 4.89122,
+            "1414": 4.88458,
+            "1415": 4.89489,
+            "1416": 4.88438,
+            "1417": 4.88183,
+            "1418": 5.229,
+            "1419": 4.96736,
+            "1420": 4.95735,
+            "1421": 5.29839,
+            "1422": 4.92896,
+            "1423": 4.9679,
+            "1424": 4.96109,
+            "1425": 4.96048,
+            "1426": 4.95854,
+            "1427": 4.95558,
+            "1428": 4.90503,
+            "1429": 5.24486,
+            "1430": 5.24901,
+            "1431": 4.8987,
+            "1432": 4.89075,
+            "1433": 5.22736,
+            "1434": 5.47175,
+            "1435": 4.89209,
+            "1436": 4.8986,
+            "1437": 4.8891,
+            "1438": 4.88697,
+            "1439": 4.88974,
+            "1440": 5.27298,
+            "1441": 4.89403,
+            "1442": 4.90495,
+            "1443": 4.89585,
+            "1444": 4.89766,
+            "1445": 4.89344,
+            "1446": 4.89618,
+            "1447": 4.88721,
+            "1448": 4.88735,
+            "1449": 4.89401,
+            "1450": 4.89435,
+            "1451": 4.89143,
+            "1452": 4.88553,
+            "1453": 4.89139,
+            "1454": 4.89347,
+            "1455": 5.23147,
+            "1456": 4.8987,
+            "1457": 4.90447,
+            "1458": 4.89553,
+            "1459": 5.23187,
+            "1460": 4.90546,
+            "1461": 4.89293,
+            "1462": 4.89652,
+            "1463": 4.88806,
+            "1464": 4.94852,
+            "1465": 4.89339,
+            "1466": 4.88888,
+            "1467": 4.89409,
+            "1468": 4.89028,
+            "1469": 4.89198,
+            "1470": 4.89499,
+            "1471": 4.89853,
+            "1472": 4.89989,
+            "1473": 5.245,
+            "1474": 4.89244,
+            "1475": 5.24744,
+            "1476": 4.88786,
+            "1477": 4.88954,
+            "1478": 5.81074,
+            "1479": 4.90603,
+            "1480": 4.8817,
+            "1481": 4.88853,
+            "1482": 4.88913,
+            "1483": 4.88525,
+            "1484": 4.88091,
+            "1485": 5.26103,
+            "1486": 4.88332,
+            "1487": 4.88482,
+            "1488": 4.88349,
+            "1489": 4.93535,
+            "1490": 4.93713,
+            "1491": 4.94008,
+            "1492": 4.93273,
+            "1493": 5.26558,
+            "1494": 4.92625,
+            "1495": 4.93119,
+            "1496": 4.93326,
+            "1497": 5.29661,
+            "1498": 4.94651,
+            "1499": 4.94563,
+            "1500": 4.94732,
+            "1501": 4.94956,
+            "1502": 4.93949,
+            "1503": 4.94314,
+            "1504": 4.949,
+            "1505": 4.93848,
+            "1506": 4.93655,
+            "1507": 4.93352,
+            "1508": 4.93376,
+            "1509": 4.93575,
+            "1510": 4.93237,
+            "1511": 4.93325,
+            "1512": 4.93443,
+            "1513": 4.93608,
+            "1514": 4.92875,
+            "1515": 4.93822,
+            "1516": 4.92271,
+            "1517": 4.93602,
+            "1518": 4.93135,
+            "1519": 5.28269,
+            "1520": 5.28601,
+            "1521": 4.93214,
+            "1522": 4.93238,
+            "1523": 4.9331,
+            "1524": 5.84985,
+            "1525": 4.93183,
+            "1526": 4.9312,
+            "1527": 4.94067,
+            "1528": 4.94179,
+            "1529": 4.93283,
+            "1530": 5.64255,
+            "1531": 4.93012,
+            "1532": 4.93237,
+            "1533": 4.93188,
+            "1534": 5.28642,
+            "1535": 4.93295,
+            "1536": 4.93351,
+            "1537": 4.93687,
+            "1538": 4.93395,
+            "1539": 4.93892,
+            "1540": 4.93329,
+            "1541": 4.93178,
+            "1542": 4.94011,
+            "1543": 4.93223,
+            "1544": 4.9238,
+            "1545": 4.93295,
+            "1546": 4.92789,
+            "1547": 4.92723,
+            "1548": 4.93344,
+            "1549": 4.93081,
+            "1550": 4.93484,
+            "1551": 4.93247,
+            "1552": 4.94286,
+            "1553": 4.93871,
+            "1554": 4.9346,
+            "1555": 4.93508,
+            "1556": 4.93254,
+            "1557": 4.93621,
+            "1558": 4.93402,
+            "1559": 4.92552,
+            "1560": 4.92871,
+            "1561": 4.9342,
+            "1562": 4.93981,
+            "1563": 4.94231,
+            "1564": 5.28559,
+            "1565": 5.2926,
+            "1566": 4.93393,
+            "1567": 5.27554,
+            "1568": 5.55669,
+            "1569": 5.22897,
+            "1570": 4.93426,
+            "1571": 5.28382,
+            "1572": 4.94938,
+            "1573": 4.95055,
+            "1574": 4.94811,
+            "1575": 4.9489,
+            "1576": 5.33208,
+            "1577": 4.94524,
+            "1578": 4.94592,
+            "1579": 4.94832,
+            "1580": 4.94832,
+            "1581": 4.94408,
+            "1582": 4.93963,
+            "1583": 5.06791,
+            "1584": 4.93161,
+            "1585": 4.93335,
+            "1586": 4.93849,
+            "1587": 4.93237,
+            "1588": 4.93556,
+            "1589": 4.93066,
+            "1590": 4.94768,
+            "1591": 4.93099,
+            "1592": 4.93258,
+            "1593": 4.93981,
+            "1594": 4.92949,
+            "1595": 4.93453,
+            "1596": 4.92827,
+            "1597": 4.92584,
+            "1598": 4.93755,
+            "1599": 4.92974,
+            "1600": 4.94804,
+            "1601": 4.93191,
+            "1602": 4.93369,
+            "1603": 4.93286,
+            "1604": 4.93069,
+            "1605": 5.27051,
+            "1606": 4.92329,
+            "1607": 4.92495,
+            "1608": 5.27779,
+            "1609": 5.28346,
+            "1610": 5.29602,
+            "1611": 4.94123,
+            "1612": 4.93638,
+            "1613": 5.856,
+            "1614": 4.94437,
+            "1615": 4.93653,
+            "1616": 4.93875,
+            "1617": 4.93536,
+            "1618": 4.93896,
+            "1619": 4.93356,
+            "1620": 4.93572,
+            "1621": 5.31736,
+            "1622": 4.94531,
+            "1623": 4.94225,
+            "1624": 4.94386,
+            "1625": 4.93406,
+            "1626": 4.93798,
+            "1627": 4.93633,
+            "1628": 4.93917,
+            "1629": 4.93696,
+            "1630": 4.93053,
+            "1631": 4.92648,
+            "1632": 4.92658,
+            "1633": 4.93841,
+            "1634": 4.93342,
+            "1635": 4.9359,
+            "1636": 4.93181,
+            "1637": 4.93503,
+            "1638": 4.93642,
+            "1639": 4.93683,
+            "1640": 4.93436,
+            "1641": 4.9443,
+            "1642": 5.27794,
+            "1643": 4.94268,
+            "1644": 4.91864,
+            "1645": 4.92135,
+            "1646": 5.26653,
+            "1647": 4.93155,
+            "1648": 4.94793,
+            "1649": 4.92681,
+            "1650": 4.92909,
+            "1651": 4.92222,
+            "1652": 4.93308,
+            "1653": 5.27802,
+            "1654": 5.27831,
+            "1655": 4.92527,
+            "1656": 4.92184,
+            "1657": 4.92535,
+            "1658": 5.84478,
+            "1659": 4.93415,
+            "1660": 4.98533,
+            "1661": 4.95752,
+            "1662": 4.94766,
+            "1663": 4.94933,
+            "1664": 4.95355,
+            "1665": 4.94643,
+            "1666": 5.33217,
+            "1667": 4.93611,
+            "1668": 4.93532,
+            "1669": 4.9092,
+            "1670": 4.90894,
+            "1671": 4.9204,
+            "1672": 4.92236,
+            "1673": 4.9082,
+            "1674": 4.91286,
+            "1675": 4.90919,
+            "1676": 4.90864,
+            "1677": 4.91312,
+            "1678": 4.90871,
+            "1679": 4.92308,
+            "1680": 5.26267,
+            "1681": 4.92022,
+            "1682": 4.91096,
+            "1683": 4.91568,
+            "1684": 5.26065,
+            "1685": 4.90909,
+            "1686": 4.90718,
+            "1687": 4.91023,
+            "1688": 4.91504,
+            "1689": 4.9123,
+            "1690": 4.91353,
+            "1691": 4.90838,
+            "1692": 4.90311,
+            "1693": 4.90235,
+            "1694": 4.90376,
+            "1695": 4.90901,
+            "1696": 4.90724,
+            "1697": 4.91094,
+            "1698": 5.25776,
+            "1699": 4.91455,
+            "1700": 5.2613,
+            "1701": 4.90973,
+            "1702": 4.90149,
+            "1703": 5.82797,
+            "1704": 4.9102,
+            "1705": 4.91831,
+            "1706": 4.90187,
+            "1707": 4.89945,
+            "1708": 4.89865,
+            "1709": 4.89632,
+            "1710": 4.90065,
+            "1711": 5.28146,
+            "1712": 4.90271,
+            "1713": 4.90852,
+            "1714": 4.90365,
+            "1715": 4.90463,
+            "1716": 4.91059,
+            "1717": 5.24655,
+            "1718": 4.91868,
+            "1719": 4.90569,
+            "1720": 4.91426,
+            "1721": 4.91116,
+            "1722": 5.25454,
+            "1723": 4.91058,
+            "1724": 4.90906,
+            "1725": 4.92075,
+            "1726": 4.91839,
+            "1727": 4.91564,
+            "1728": 4.91131,
+            "1729": 4.91291,
+            "1730": 4.90884,
+            "1731": 4.91062,
+            "1732": 4.90638,
+            "1733": 4.9061,
+            "1734": 4.90658,
+            "1735": 4.91543,
+            "1736": 4.90614,
+            "1737": 4.91107,
+            "1738": 4.91084,
+            "1739": 4.90842,
+            "1740": 4.91418,
+            "1741": 4.90881,
+            "1742": 4.90792,
+            "1743": 5.26397,
+            "1744": 4.91738,
+            "1745": 5.25587,
+            "1746": 4.90599,
+            "1747": 4.90321,
+            "1748": 5.78796,
+            "1749": 4.90348,
+            "1750": 4.90858,
+            "1751": 4.89993,
+            "1752": 4.90938,
+            "1753": 4.90593,
+            "1754": 5.25406,
+            "1755": 4.9167,
+            "1756": 4.92732,
+            "1757": 5.32154,
+            "1758": 4.93234,
+            "1759": 5.25874,
+            "1760": 4.90683,
+            "1761": 4.90629,
+            "1762": 4.91525,
+            "1763": 4.91544,
+            "1764": 4.91062,
+            "1765": 4.90636,
+            "1766": 4.90873,
+            "1767": 4.91142,
+            "1768": 4.96573,
+            "1769": 4.90448,
+            "1770": 4.8891,
+            "1771": 4.8932,
+            "1772": 4.88066,
+            "1773": 4.87927,
+            "1774": 4.87496,
+            "1775": 4.90017,
+            "1776": 4.88861,
+            "1777": 4.88943,
+            "1778": 4.88632,
+            "1779": 4.89539,
+            "1780": 4.88673,
+            "1781": 4.89482,
+            "1782": 4.89261,
+            "1783": 4.88921,
+            "1784": 4.89935,
+            "1785": 4.88986,
+            "1786": 4.89061,
+            "1787": 4.88853,
+            "1788": 5.24035,
+            "1789": 5.24993,
+            "1790": 4.91207,
+            "1791": 4.91991,
+            "1792": 5.55415,
+            "1793": 5.49039,
+            "1794": 4.899,
+            "1795": 4.88922,
+            "1796": 5.25127,
+            "1797": 4.89889,
+            "1798": 4.90442,
+            "1799": 4.89627,
+            "1800": 4.89346,
+            "1801": 4.89082,
+            "1802": 5.2731,
+            "1803": 4.89886,
+            "1804": 4.87379,
+            "1805": 4.87577,
+            "1806": 4.88484,
+            "1807": 4.87576,
+            "1808": 4.86783,
+            "1809": 4.8917,
+            "1810": 4.87329,
+            "1811": 4.87182,
+            "1812": 4.8594,
+            "1813": 4.86213,
+            "1814": 4.86701,
+            "1815": 4.86025,
+            "1816": 4.86454,
+            "1817": 4.86162,
+            "1818": 4.85688,
+            "1819": 4.85907,
+            "1820": 4.85765,
+            "1821": 4.85878,
+            "1822": 4.86537,
+            "1823": 4.86101,
+            "1824": 4.86218,
+            "1825": 4.86082,
+            "1826": 4.85916,
+            "1827": 4.86304,
+            "1828": 4.86335,
+            "1829": 4.85846,
+            "1830": 5.21054,
+            "1831": 4.87227,
+            "1832": 5.20618,
+            "1833": 4.86815,
+            "1834": 5.55416,
+            "1835": 4.87798,
+            "1836": 4.89752,
+            "1837": 5.79486,
+            "1838": 4.90553,
+            "1839": 4.90533,
+            "1840": 4.89368,
+            "1841": 4.89475,
+            "1842": 4.89469,
+            "1843": 4.88557,
+            "1844": 4.89,
+            "1845": 4.88668,
+            "1846": 4.89537,
+            "1847": 5.26263,
+            "1848": 4.89245,
+            "1849": 4.89348,
+            "1850": 4.88835,
+            "1851": 4.90708,
+            "1852": 4.90228,
+            "1853": 4.86785,
+            "1854": 4.87736,
+            "1855": 4.87369,
+            "1856": 4.87811,
+            "1857": 4.90299,
+            "1858": 4.88442,
+            "1859": 4.87297,
+            "1860": 4.89531,
+            "1861": 4.90241,
+            "1862": 4.89309,
+            "1863": 4.89512,
+            "1864": 4.90549,
+            "1865": 4.90854,
+            "1866": 4.9047,
+            "1867": 5.2401,
+            "1868": 4.89946,
+            "1869": 4.90883,
+            "1870": 4.90522,
+            "1871": 4.93888,
+            "1872": 5.21372,
+            "1873": 4.87709,
+            "1874": 4.86464,
+            "1875": 4.87233,
+            "1876": 4.88054,
+            "1877": 4.84923,
+            "1878": 5.17207,
+            "1879": 5.1976,
+            "1880": 4.8445,
+            "1881": 4.84388,
+            "1882": 4.84797,
+            "1883": 5.73664,
+            "1884": 4.84672,
+            "1885": 4.84557,
+            "1886": 4.85201,
+            "1887": 4.85018,
+            "1888": 4.84932,
+            "1889": 4.85617,
+            "1890": 4.84416,
+            "1891": 4.85089,
+            "1892": 4.84881,
+            "1893": 5.22668,
+            "1894": 4.8491,
+            "1895": 4.84681,
+            "1896": 4.84529,
+            "1897": 4.84998,
+            "1898": 4.8507,
+            "1899": 4.84271,
+            "1900": 4.84844,
+            "1901": 4.84365,
+            "1902": 4.83991,
+            "1903": 4.84228,
+            "1904": 5.17846,
+            "1905": 4.84978,
+            "1906": 4.84285,
+            "1907": 4.85138,
+            "1908": 4.84338,
+            "1909": 5.19721,
+            "1910": 4.85138,
+            "1911": 4.84739,
+            "1912": 4.84478,
+            "1913": 4.85226,
+            "1914": 4.85002,
+            "1915": 4.85039,
+            "1916": 4.85444,
+            "1917": 4.84588,
+            "1918": 4.8495,
+            "1919": 4.85217,
+            "1920": 4.84949,
+            "1921": 4.84631,
+            "1922": 4.84476,
+            "1923": 5.17493,
+            "1924": 5.19107,
+            "1925": 4.85154,
+            "1926": 4.84261,
+            "1927": 5.44494,
+            "1928": 5.14044,
+            "1929": 4.84927,
+            "1930": 4.84493,
+            "1931": 4.84048,
+            "1932": 4.84204,
+            "1933": 4.84664,
+            "1934": 4.84105,
+            "1935": 4.83981,
+            "1936": 4.841,
+            "1937": 4.84038,
+            "1938": 5.22894,
+            "1939": 4.84209,
+            "1940": 4.84356,
+            "1941": 5.20657,
+            "1942": 4.9004,
+            "1943": 4.90813,
+            "1944": 4.90655,
+            "1945": 4.88214,
+            "1946": 5.21239,
+            "1947": 4.86529,
+            "1948": 4.85849,
+            "1949": 4.85084,
+            "1950": 4.86533,
+            "1951": 4.86,
+            "1952": 4.85847,
+            "1953": 4.86113,
+            "1954": 4.85194,
+            "1955": 4.85611,
+            "1956": 4.87124,
+            "1957": 4.8777,
+            "1958": 4.84686,
+            "1959": 4.84732,
+            "1960": 4.86364,
+            "1961": 4.8509,
+            "1962": 4.8663,
+            "1963": 4.87064,
+            "1964": 4.86099,
+            "1965": 4.86103,
+            "1966": 4.84569,
+            "1967": 5.17792,
+            "1968": 4.84796,
+            "1969": 5.20648,
+            "1970": 4.84901,
+            "1971": 4.84838,
+            "1972": 5.74018,
+            "1973": 4.85813,
+            "1974": 4.85367,
+            "1975": 4.86684,
+            "1976": 4.87041,
+            "1977": 4.90603,
+            "1978": 4.90475,
+            "1979": 5.25145,
+            "1980": 4.94444,
+            "1981": 4.92124,
+            "1982": 4.90832,
+            "1983": 4.94722,
+            "1984": 5.67636,
+            "1985": 4.939,
+            "1986": 4.93543,
+            "1987": 4.96136,
+            "1988": 4.92447,
+            "1989": 4.87603,
+            "1990": 4.86128,
+            "1991": 4.86822,
+            "1992": 4.86666,
+            "1993": 4.85995,
+            "1994": 4.86025,
+            "1995": 4.85738,
+            "1996": 4.86953,
+            "1997": 4.86535,
+            "1998": 4.86591,
+            "1999": 4.86231,
+            "2000": 4.86466
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/mixtral/deepseekv3_proxy_flex_tp1pp4emp16etp1cp1_release_sm/model_config.yaml b/tests/functional_tests/test_cases/mixtral/deepseekv3_proxy_flex_tp1pp4emp16etp1cp1_release_sm/model_config.yaml
new file mode 100644
index 00000000000..8bab921aa04
--- /dev/null
+++ b/tests/functional_tests/test_cases/mixtral/deepseekv3_proxy_flex_tp1pp4emp16etp1cp1_release_sm/model_config.yaml
@@ -0,0 +1,166 @@
+# The proxy model is used for local code quality check.
+# The proxy model should contain all the necessary components and settings but fewer parameters.
+ENV_VARS:
+  TORCH_NCCL_AVOID_RECORD_STREAMS: 0
+  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 1
+  PYTORCH_CUDA_ALLOC_CONF: expandable_segments:True
+  NCCL_NVLS_ENABLE: 0
+  NVTE_FUSED_ATTN: 1
+  NVTE_NORM_FWD_USE_CUDNN: 1
+  NVTE_NORM_BWD_USE_CUDNN: 1
+  PYTHONWARNINGS: ignore
+  NCCL_DEBUG: VERSION
+  NON_DETERMINSTIC_RESULTS: 1
+  NVSHMEM_IB_ENABLE_IBGDA: 0
+  CUDA_DEVICE_MAX_CONNECTIONS: 1
+TEST_TYPE: "release"
+MODEL_ARGS:
+  # Distributed args
+  --distributed-timeout-minutes: 60
+  --tensor-model-parallel-size: 2
+  --pipeline-model-parallel-size: 4
+  --pipeline-model-parallel-layout: Et*2\\|\\(tt\\|\\)*5t\\|tmL # Et*2|(tt|)*5t|tmL
+  --expert-model-parallel-size: 16
+  --context-parallel-size: 1
+  --expert-tensor-parallel-size: 1
+  --use-distributed-optimizer: true
+  --overlap-grad-reduce: true
+  --overlap-param-gather: true
+
+  # Training args
+  --use-mcore-models: true
+  --sequence-parallel: true
+  --use-flash-attn: true
+  --disable-bias-linear: true
+  --micro-batch-size: 1
+  --global-batch-size: 512
+  --train-samples: 24414062
+  --exit-duration-in-mins: 220
+  --no-check-for-nan-in-loss-and-grad: true
+  --cross-entropy-loss-fusion: true
+  --cross-entropy-fusion-impl: te
+  --manual-gc: true
+  --manual-gc-interval: 10
+
+  # Transformer Engine args
+  --transformer-impl: transformer_engine
+
+  # Data args
+  --seq-length: 4096
+  --data-cache-path: ${DATA_CACHE_PATH}
+  --tokenizer-type: GPTSentencePieceTokenizer
+  --tokenizer-model: ${DATA_PATH}/utils/nemotron_2_256k.model
+  --data-path: $DATA_BLEND
+  --split: 99,1,0
+  --no-mmap-bin-files: true
+  --no-create-attention-mask-in-dataloader: true
+  --num-workers: 6
+
+  # Add network size args
+  --num-layers: 14 # original 61 layers
+  --hidden-size: 7168
+  --ffn-hidden-size: 18432
+  --num-attention-heads: 128
+  --kv-channels: 128
+  --max-position-embeddings: 4096
+  --position-embedding-type: rope
+  --rotary-base: 10000
+  --make-vocab-size-divisible-by: 3232
+  --normalization: RMSNorm
+  --norm-epsilon: 1e-6
+  --swiglu: true
+  --untie-embeddings-and-output-weights: true
+  --multi-latent-attention: true
+  --mtp-num-layers: 1
+  --mtp-loss-scaling-factor: 0.1
+
+  # Add regularization args
+  --attention-dropout: 0.0
+  --hidden-dropout: 0.0
+  --clip-grad: 1.0
+  --weight-decay: 0.1
+  --qk-layernorm: true
+
+  # Add learning rate args
+  --lr-decay-samples: 24413696
+  --lr-warmup-samples: 1536000
+  --lr-warmup-init: 1e-7
+  --lr: 1e-5
+  --min-lr: 1e-6
+  --lr-decay-style: cosine
+  --adam-beta1: 0.9
+  --adam-beta2: 0.95
+
+  # Add MoE args
+  --num-experts: 64 # local 4 + 1 shared, EP16
+  --moe-layer-freq: ([0]*3+[1]*11)
+  --moe-ffn-hidden-size: 2048
+  --moe-shared-expert-intermediate-size: 2048
+  --moe-router-load-balancing-type: seq_aux_loss
+  --moe-router-topk: 8
+  --moe-token-dispatcher-type: flex
+  --moe-enable-deepep: true
+  --moe-router-pre-softmax: true
+  --moe-grouped-gemm: true
+  --moe-aux-loss-coeff: 1e-4
+  --moe-router-group-topk: 4
+  --moe-router-num-groups: 8
+  --moe-router-topk-scaling-factor: 2.5
+  --moe-router-score-function: sigmoid
+  --moe-router-enable-expert-bias: true
+  --moe-router-bias-update-rate: 1e-3
+  --moe-router-dtype: fp32
+  --moe-permute-fusion: true
+
+  # Add MLA args
+  --q-lora-rank: 1536
+  --kv-lora-rank: 512
+  --qk-head-dim: 128
+  --qk-pos-emb-head-dim: 64
+  --v-head-dim: 128
+  --rotary-scaling-factor: 40
+  --mscale: 1.0
+  --mscale-all-dim: 1.0
+
+  # Add validation args
+  --eval-iters: 32
+  --eval-interval: 200
+
+  # Add checkpointing args
+  --auto-detect-ckpt-format:
+    true
+    # Add checkpointing args
+  --save: ${CHECKPOINT_SAVE_PATH}
+  --load: ${CHECKPOINT_LOAD_PATH}
+  --save-interval: 500
+  --save-retain-interval: 10000
+  --dist-ckpt-strictness: log_all
+
+  # Add initialization args
+  --init-method-std: 0.02
+
+  # Add logging args
+  --log-timers-to-tensorboard: true
+  --log-memory-to-tensorboard: true
+  --log-num-zeros-in-grad: true
+  --log-params-norm: true
+  --log-validation-ppl-to-tensorboard: true
+  --log-throughput: true
+  --log-interval: 1
+  --logging-level: 40
+  --tensorboard-dir: ${TENSORBOARD_PATH}
+  --wandb-project: megatron-core-release-runs
+  --wandb-exp-name: ${WANDB_EXPERIMENT}
+  --wandb-save-dir: ${WANDB_SAVE_PATH}
+
+  # Add mixed precision args
+  --bf16: true
+
+  # enable experimental
+  --enable-experimental: true
+  --exit-interval: 9536
+METRICS:
+  - "iteration-time"
+  - "lm loss"
+  - "mem-allocated-bytes"
+  - "mem-max-allocated-bytes"

From 12113483ea6ff0d42cd631e6bb24345963464b5f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Fri, 14 Nov 2025 12:52:20 +0100
Subject: [PATCH 129/334] [20251111] Ko3n1g/chore/main to dev (#2211)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: dimapihtar <dpihtar@gmail.com>
Signed-off-by: oliver könig <okoenig@nvidia.com>
Signed-off-by: Asha Anoosheh <aanoosheh@nvidia.com>
Signed-off-by: Keshav Santhanam <ksanthanam@nvidia.com>
Signed-off-by: Evgeny <etsykunov@nvidia.com>
Signed-off-by: root <Evgeny>
Co-authored-by: Teodor-Dumitru Ene <34819528+tdene@users.noreply.github.com>
Co-authored-by: Dmytro Pykhtar <37850217+dimapihtar@users.noreply.github.com>
Co-authored-by: Asha Anoosheh <aanoosheh@nvidia.com>
Co-authored-by: Keshav Santhanam <ksanthanam@nvidia.com>
Co-authored-by: Teodor-Dumitru Ene <tene@nvidia.com>
Co-authored-by: Robert Kirby <ArEsKay3@users.noreply.github.com>
Co-authored-by: Lawrence McAfee <85179052+lmcafee-nvidia@users.noreply.github.com>
Co-authored-by: Robert Kirby <rkirby@nvidia.com>
Co-authored-by: Mcore Bot <mcore-bot@nvidia.com>
Co-authored-by: helen ngo <helenn@nvidia.com>
Co-authored-by: Evgeny Tsykunov <e.tsykunov@gmail.com>
---
 .github/copy-pr-bot.yaml                      |   2 +-
 .github/workflows/auto-update-copy-pr-bot.yml |   1 +
 .github/workflows/cicd-main.yml               |  34 +-
 .github/workflows/multi-approval-bot.yml      |  75 --
 .gitlab/stages/01.build.yml                   |   6 +-
 CODEOWNERS                                    |  26 -
 docker/.ngc_version.dev                       |   1 +
 docker/.ngc_version.lts                       |   1 +
 docker/Dockerfile.ci.dev                      |   6 +-
 docker/Dockerfile.ci.lts                      |  98 --
 .../inference/gpt/gpt_dynamic_inference.py    |  71 +-
 .../gpt_dynamic_inference_with_coordinator.py |   4 +
 examples/inference/gpt/utils.py               |   3 +
 .../modelopt/generation_server.py             | 198 ----
 .../modelopt/generation_server.sh             |   6 +-
 megatron/core/datasets/megatron_tokenizer.py  |   9 +
 megatron/core/enums.py                        |   6 +-
 megatron/core/fp4_utils.py                    |   5 +-
 megatron/core/fp8_utils.py                    | 103 +-
 megatron/core/inference/async_stream.py       |  10 +-
 .../attention_context/mamba_metadata.py       | 106 ++
 .../inference/contexts/dynamic_context.py     | 220 +++-
 .../contexts/fused_kv_append_kernel.py        |   4 +-
 .../data_parallel_inference_coordinator.py    |   7 +
 .../core/inference/engines/dynamic_engine.py  |  40 +-
 .../core/inference/engines/static_engine.py   |  10 +-
 megatron/core/inference/inference_client.py   |   4 +-
 megatron/core/inference/sampling_params.py    |   1 +
 .../text_generation_controller.py             |  11 +-
 megatron/core/inference/utils.py              |  28 +
 megatron/core/models/gpt/gpt_model.py         |   4 +-
 megatron/core/models/mamba/mamba_model.py     |  31 +-
 megatron/core/safe_globals.py                 |   2 +
 megatron/core/ssm/mamba_block.py              |  41 +-
 .../core/ssm/mamba_hybrid_layer_allocation.py |  31 +-
 megatron/core/ssm/mamba_layer.py              |  10 +-
 megatron/core/ssm/mamba_mixer.py              | 358 ++++++-
 .../core/transformer/transformer_config.py    |  30 +-
 megatron/core/utils.py                        | 144 ++-
 megatron/post_training/checkpointing.py       |  87 +-
 megatron/rl/__init__.py                       |  41 -
 megatron/rl/agent/api.py                      |   6 +-
 megatron/rl/inference/megatron.py             |   2 +-
 megatron/training/arguments.py                |  12 +-
 megatron/training/tokenizer/sft_tokenizer.py  |   5 +
 megatron/training/training.py                 |  23 +-
 .../test_inference_regular_pipeline.py        |   9 +-
 .../golden_values_lts.json                    | 287 +++++
 .../golden_values_lts_dgx_a100.json           | 315 ++++--
 .../golden_values_lts.json                    | 537 ++++++++++
 .../golden_values_lts_dgx_a100.json           | 615 +++++++++--
 .../golden_values_lts.json                    | 537 ++++++++++
 .../golden_values_lts_dgx_a100.json           | 538 +++++++++-
 .../golden_values_lts.json                    | 537 ++++++++++
 .../golden_values_lts_dgx_a100.json           | 615 +++++++++--
 .../golden_values_lts.json                    | 537 ++++++++++
 .../golden_values_lts_dgx_a100.json           | 538 +++++++++-
 .../golden_values_lts.json                    | 537 ++++++++++
 .../golden_values_lts_dgx_a100.json           | 615 +++++++++--
 .../golden_values_lts.json                    | 287 +++++
 .../golden_values_lts_dgx_a100.json           | 288 ++++-
 .../golden_values_lts.json                    | 287 +++++
 .../golden_values_lts_dgx_a100.json           | 288 ++++-
 .../golden_values_lts.json                    | 537 ++++++++++
 .../golden_values_lts_dgx_a100.json           | 538 +++++++++-
 .../golden_values_lts.json                    | 537 ++++++++++
 .../golden_values_lts_dgx_a100.json           | 538 +++++++++-
 .../golden_values_lts.json                    | 537 ++++++++++
 .../golden_values_lts_dgx_a100.json           | 538 +++++++++-
 .../golden_values_lts.json                    | 537 ++++++++++
 .../golden_values_lts_dgx_a100.json           | 538 +++++++++-
 .../golden_values_lts.json                    | 537 ++++++++++
 .../golden_values_lts_dgx_a100.json           | 538 +++++++++-
 .../golden_values_lts.json                    | 537 ++++++++++
 .../golden_values_lts_dgx_a100.json           | 538 +++++++++-
 .../golden_values_lts.json                    | 287 +++++
 .../golden_values_lts_dgx_a100.json           | 288 ++++-
 .../golden_values_lts.json                    | 287 +++++
 .../golden_values_lts_dgx_a100.json           | 315 ++++--
 .../golden_values_lts.json                    | 287 +++++
 .../golden_values_lts_dgx_a100.json           | 288 ++++-
 .../golden_values_lts.json                    | 537 ++++++++++
 .../golden_values_lts_dgx_a100.json           | 538 +++++++++-
 .../golden_values_lts.json                    | 537 ++++++++++
 .../golden_values_lts_dgx_a100.json           | 615 +++++++++--
 .../golden_values_lts.json                    | 537 ++++++++++
 .../golden_values_lts_dgx_a100.json           | 538 +++++++++-
 .../golden_values_lts.json                    | 537 ++++++++++
 .../golden_values_lts_dgx_a100.json           | 538 +++++++++-
 .../golden_values_lts.json                    | 537 ++++++++++
 .../golden_values_lts_dgx_a100.json           | 538 +++++++++-
 .../golden_values_lts.json                    | 287 +++++
 .../golden_values_lts_dgx_a100.json           | 288 ++++-
 .../golden_values_lts.json                    | 537 ++++++++++
 .../golden_values_lts_dgx_a100.json           | 538 +++++++++-
 .../golden_values_lts.json                    | 537 ++++++++++
 .../golden_values_lts_dgx_a100.json           | 998 +++++++++---------
 .../golden_values_lts.json                    | 537 ++++++++++
 .../golden_values_lts_dgx_a100.json           | 538 +++++++++-
 .../golden_values_lts.json                    | 537 ++++++++++
 .../golden_values_lts_dgx_a100.json           | 521 +++++++--
 .../gpt3_mcore_tp1_pp2/golden_values_lts.json | 287 +++++
 .../golden_values_lts_dgx_a100.json           | 313 ++++--
 .../golden_values_lts.json                    | 537 ++++++++++
 .../golden_values_lts_dgx_a100.json           | 563 ++++++++--
 .../gpt3_mcore_tp1_pp4/golden_values_lts.json | 287 +++++
 .../golden_values_lts_dgx_a100.json           | 313 ++++--
 .../golden_values_lts.json                    | 537 ++++++++++
 .../golden_values_lts_dgx_a100.json           | 538 +++++++++-
 .../golden_values_lts.json                    | 537 ++++++++++
 .../golden_values_lts_dgx_a100.json           | 538 +++++++++-
 .../golden_values_lts.json                    | 537 ++++++++++
 .../golden_values_lts_dgx_a100.json           | 538 +++++++++-
 .../golden_values_dev_dgx_h100.json           |   2 +-
 .../golden_values_dev_dgx_h100.json           |   2 +-
 .../golden_values_dev_dgx_h100.json           |   2 +-
 .../golden_values_dev_dgx_h100.json           |   2 +-
 .../golden_values_lts.json                    | 287 +++++
 .../golden_values_lts_dgx_a100.json           | 288 ++++-
 .../golden_values_lts.json                    | 287 +++++
 .../golden_values_lts_dgx_a100.json           | 288 ++++-
 .../golden_values_dev.json                    | 287 +++++
 .../golden_values_lts.json                    | 287 +++++
 .../golden_values_lts_dgx_a100.json           | 288 ++++-
 .../golden_values_lts.json                    | 287 +++++
 .../golden_values_lts_dgx_a100.json           | 288 ++++-
 .../golden_values_dev.json                    | 287 +++++
 .../golden_values_lts.json                    | 287 +++++
 .../golden_values_lts_dgx_a100.json           | 288 ++++-
 .../golden_values_dev_dgx_h100.json           |   2 +-
 .../golden_values_dev_dgx_h100.json           |   2 +-
 .../python_scripts/download_golden_values.py  |   4 +-
 .../python_scripts/launch_jet_workload.py     |   3 +
 .../contexts/test_dynamic_context.py          | 373 +++++--
 .../inference/engines/test_dynamic_engine.py  | 417 ++++++--
 .../gpt/test_gpt_inference_wrapper.py         |   4 +-
 .../test_simple_text_generation_controller.py |   2 +-
 tools/run_inference_performance_test.py       | 115 +-
 tools/run_text_generation_server.py           |   2 +
 139 files changed, 35885 insertions(+), 2283 deletions(-)
 delete mode 100644 .github/workflows/multi-approval-bot.yml
 delete mode 100644 CODEOWNERS
 create mode 100644 docker/.ngc_version.dev
 create mode 100644 docker/.ngc_version.lts
 delete mode 100644 docker/Dockerfile.ci.lts
 delete mode 100644 examples/post_training/modelopt/generation_server.py
 mode change 100644 => 100755 examples/post_training/modelopt/generation_server.sh
 create mode 100644 megatron/core/inference/contexts/attention_context/mamba_metadata.py
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_lts.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_lts.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_lts.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_lts.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_lts.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_lts.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_lts.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_lts.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_lts.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_lts.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied/golden_values_lts.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap/golden_values_lts.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_lts.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_lts.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_lts.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_lts.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_lts.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_lts.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_lts.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader/golden_values_lts.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_lts.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_lts.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/golden_values_lts.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_lts.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/golden_values_lts.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_lts.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_lts.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_lts.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts_etp1_ep4/golden_values_lts.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4/golden_values_dev.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_lts.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_lts.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_lts.json

diff --git a/.github/copy-pr-bot.yaml b/.github/copy-pr-bot.yaml
index b9e97b18946..7013df60dc2 100644
--- a/.github/copy-pr-bot.yaml
+++ b/.github/copy-pr-bot.yaml
@@ -1,4 +1,4 @@
 enabled: true
 auto_sync_draft: false
 auto_sync_ready: true
-trustees_override: ["AAnoosheh", "ArEsKay3", "Autumn1998", "BestJuly", "BoxiangW", "ChenhanYu", "FDecaYed", "HaochenYuan", "ISEEKYAN", "JRD971000", "QiZhangNV", "ShriyaRishab", "Victarry", "Wohox", "ZhiyuLi-Nvidia", "aklife97", "ananthsub", "asolergi-nv", "buptzyb", "chtruong814", "cspades", "cuichenx", "deepakn94", "dimapihtar", "duncanriach", "erhoo82", "ericharper", "fanshiqing", "gautham-kollu", "hxbai", "jaredcasper", "jkamalu", "jon-barker", "kanz-nv", "kevalmorabia97", "ko3n1g", "kunlunl", "kvareddy", "layalir", "lhb8125", "lmcafee-nvidia", "maanug-nv", "mathemakitten", "matthieule", "mkhona-nvidia", "pablo-garay", "pthombre", "rogerwaleffe", "sanandaraj5597", "santhnm2", "sbak5", "shanmugamr1992", "sidsingh-nvidia", "skyw", "tdene", "theothermike", "thomasdhc", "trintamaki", "tylerpoon", "wdykas", "xiaoyao0115", "yanring", "yaox12", "yaoyu-33", "yashaswikarnati", "yobibyte", "youngeunkwon0405", "yuzhongw-nvidia", "zhongbozhu"]
+trustees_override: ["AAnoosheh", "ArEsKay3", "Autumn1998", "BestJuly", "BoxiangW", "ChenhanYu", "FDecaYed", "HaochenYuan", "ISEEKYAN", "JRD971000", "QiZhangNV", "ShriyaRishab", "Victarry", "Wohox", "ZhiyuLi-Nvidia", "aklife97", "ananthsub", "asolergi-nv", "buptzyb", "chtruong814", "cspades", "cuichenx", "deepakn94", "dimapihtar", "duncanriach", "erhoo82", "ericharper", "fanshiqing", "gautham-kollu", "hxbai", "jaredcasper", "jiemingz", "jkamalu", "jon-barker", "kanz-nv", "kevalmorabia97", "ko3n1g", "kunlunl", "kvareddy", "layalir", "lhb8125", "lmcafee-nvidia", "maanug-nv", "mathemakitten", "matthieule", "mehraakash", "mkhona-nvidia", "pablo-garay", "parthmannan", "pthombre", "rogerwaleffe", "sanandaraj5597", "santhnm2", "sbak5", "shanmugamr1992", "shifangx", "shjwudp", "sidsingh-nvidia", "skyw", "tdene", "theothermike", "thomasdhc", "trintamaki", "tylerpoon", "wdykas", "xiaoyao0115", "xuwchen", "yanring", "yaox12", "yaoyu-33", "yashaswikarnati", "yobibyte", "youngeunkwon0405", "yuzhongw-nvidia", "zhongbozhu"]
diff --git a/.github/workflows/auto-update-copy-pr-bot.yml b/.github/workflows/auto-update-copy-pr-bot.yml
index 25b3a3d2a30..969c46e3fdd 100644
--- a/.github/workflows/auto-update-copy-pr-bot.yml
+++ b/.github/workflows/auto-update-copy-pr-bot.yml
@@ -9,6 +9,7 @@ jobs:
   auto-update-copy-pr-bot:
     runs-on: ubuntu-latest
     environment: nemo-ci
+    if: github.repository == 'NVIDIA/Megatron-LM'
     steps:
       - name: Checkout code
         uses: actions/checkout@v3
diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml
index ca6e59a1625..4a4a1a2cad1 100644
--- a/.github/workflows/cicd-main.yml
+++ b/.github/workflows/cicd-main.yml
@@ -233,11 +233,26 @@ jobs:
         with:
           python-version: 3.12
 
+      - name: Install GH CLI
+        shell: bash -x -e -u -o pipefail {0}
+        run: |
+          apt-get update
+          apt-get install -y gh
+
       - name: Get PR info
         id: get-pr-info
         if: startsWith(github.ref, 'refs/heads/pull-request/')
         uses: nv-gha-runners/get-pr-info@main
 
+      - name: Has lts label
+        id: has-lts-label
+        env:
+          GH_TOKEN: ${{ secrets.PAT }}
+        run: |
+          PR_NUMBER=${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number }}
+          HAS_LTS_LABEL=$(gh pr view $PR_NUMBER --json labels | jq '[.labels[].name] | any(. == "container::lts")') || echo "false"
+          echo "main=$HAS_LTS_LABEL" | tee -a $GITHUB_OUTPUT
+
       - name: Download test data
         shell: bash
         env:
@@ -276,6 +291,22 @@ jobs:
           echo "$LAST_PRS" | tee -a $GITHUB_OUTPUT
           echo "EOF" | tee -a $GITHUB_OUTPUT
 
+      - name: Parse baseimage
+        shell: bash
+        id: base-image
+        env:
+          HAS_LTS_LABEL: ${{ steps.has-lts-label.outputs.main }}
+        run: |
+          if [ "$HAS_LTS_LABEL" == "true" ]; then
+            NGC_VERSION=$(cat docker/.ngc_version.lts)
+            echo "version=$NGC_VERSION" | tee -a $GITHUB_OUTPUT
+            echo "image_type=lts" | tee -a $GITHUB_OUTPUT
+          else
+            NGC_VERSION=$(cat docker/.ngc_version.dev)
+            echo "version=$NGC_VERSION" | tee -a $GITHUB_OUTPUT
+            echo "image_type=dev" | tee -a $GITHUB_OUTPUT
+          fi
+
       - name: Set up Docker Buildx
         uses: docker/setup-buildx-action@v3
 
@@ -287,7 +318,8 @@ jobs:
           context: .
           target: main
           build-args: |
-            FROM_IMAGE_NAME=nvcr.io/nvidia/pytorch:25.09-py3
+            FROM_IMAGE_NAME=${{ steps.base-image.outputs.version }}
+            IMAGE_TYPE=${{ steps.base-image.outputs.image_type }}
           cache-from: |
             type=registry,ref=${{ env.container-registry }}/megatron-lm:${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number || 0 }}-buildcache,mode=max
             type=registry,ref=${{ env.container-registry }}/megatron-lm:main-buildcache,mode=max
diff --git a/.github/workflows/multi-approval-bot.yml b/.github/workflows/multi-approval-bot.yml
deleted file mode 100644
index e8507605aa7..00000000000
--- a/.github/workflows/multi-approval-bot.yml
+++ /dev/null
@@ -1,75 +0,0 @@
-name: "Codeowners Approval Workflow"
-
-on:
-  push:
-    branches:
-      - "pull-request/[0-9]+"
-  merge_group:
-    types: [checks_requested]
-
-jobs:
-  pre-flight:
-    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_cicd_preflight.yml@v0.65.5
-    if: github.repository == 'NVIDIA/Megatron-LM'
-
-  codeowners-approval:
-    needs: [pre-flight]
-    runs-on: ubuntu-latest
-    environment: nemo-ci
-    if: |
-      !(needs.pre-flight.outputs.docs_only == 'true'
-      || needs.pre-flight.outputs.is_merge_group == 'true'
-      || needs.pre-flight.outputs.is_deployment_workflow == 'true')
-    steps:
-      - name: Get PR info
-        id: get-pr-info
-        if: startsWith(github.ref, 'refs/heads/pull-request/')
-        uses: nv-gha-runners/get-pr-info@main
-
-      - name: Checkout action
-        uses: actions/checkout@v3
-        with:
-          repository: noamelf/codeowner-multi-approval-action
-          ref: v0.1
-          path: codeowner-multi-approval-action
-
-      - name: Check Codeowners Approval
-        uses: ./codeowner-multi-approval-action
-        with:
-          pr-number: ${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number }}
-          repo-name: ${{ github.repository }}
-          github-token: ${{ secrets.PAT }}
-
-  multi-approval-bot-summary:
-    needs: [pre-flight, codeowners-approval]
-    if: |
-      (
-        needs.pre-flight.outputs.docs_only == 'true'
-        || needs.pre-flight.outputs.is_merge_group == 'true'
-        || needs.pre-flight.outputs.is_deployment_workflow == 'true'
-        || always()
-      )
-      && github.repository == 'NVIDIA/Megatron-LM'
-      && !cancelled()
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v4
-
-      - name: Result
-        env:
-          GH_TOKEN: ${{ github.token }}
-          GITHUB_RUN_ID: ${{ github.run_id }}
-          SKIPPING_IS_ALLOWED: ${{ needs.pre-flight.outputs.docs_only == 'true' || needs.pre-flight.outputs.is_deployment_workflow == 'true' || needs.pre-flight.outputs.is_merge_group == 'true' || needs.pre-flight.outputs.is_ci_workload == 'true' }}
-        run: |
-          FAILED_JOBS=$(gh run view $GITHUB_RUN_ID --json jobs --jq '[.jobs[] | select(.status == "completed" and .conclusion != "success")] | length') || echo 0
-
-          if [ "${FAILED_JOBS:-0}" -eq 0 ] || [ "$SKIPPING_IS_ALLOWED" == "true" ]; then
-              echo "✅ All previous jobs completed successfully"
-              exit 0
-          else
-              echo "❌ Found $FAILED_JOBS failed job(s)"
-              # Show which jobs failed
-              gh run view $GITHUB_RUN_ID --json jobs --jq '.jobs[] | select(.status == "completed" and .conclusion != "success") | .name'
-              exit 1
-          fi
diff --git a/.gitlab/stages/01.build.yml b/.gitlab/stages/01.build.yml
index 0658daaa9ec..d67225311f6 100644
--- a/.gitlab/stages/01.build.yml
+++ b/.gitlab/stages/01.build.yml
@@ -53,10 +53,12 @@ test:build_image:
   parallel:
     matrix:
       - IMAGE: CI_MCORE_LTS_IMAGE
-        FILE: Dockerfile.ci.lts
-        BASE_IMAGE: nvcr.io/nvidia/pytorch:24.01-py3
+        FILE: Dockerfile.ci.dev
+        IMAGE_TYPE: lts
+        BASE_IMAGE: nvcr.io/nvidia/pytorch:25.09-py3
       - IMAGE: CI_MCORE_DEV_IMAGE
         FILE: Dockerfile.ci.dev
+        IMAGE_TYPE: dev
         BASE_IMAGE: nvcr.io/nvidia/pytorch:25.09-py3
       - IMAGE: UTILITY_IMAGE
         FILE: Dockerfile.linting
diff --git a/CODEOWNERS b/CODEOWNERS
deleted file mode 100644
index 11c8403e584..00000000000
--- a/CODEOWNERS
+++ /dev/null
@@ -1,26 +0,0 @@
-# Core
-[Core] @mcore-reviewers/dev-core
-megatron/core/ 
-
-[CI][1] @mcore-reviewers/ci
-.gitlab/
-.github/
-.gitlab-ci.yml
-docker/
-tests/unit_tests/run_ci_test.sh
-tests/test_utils/python_scripts/
-tests/functional_tests/python_test_utils/
-tests/functional_tests/shell_test_utils/
-megatron/core/transformer/transformer_block.py
-megatron/core/transformer/transformer_layer.py
-
-^[Tests][1] @mcore-reviewers/ci
-tests/functional_tests/test_cases/
-tests/functional_tests/recipes/
-tests/unit_tests/
-
-[RL] @mcore-reviewers/rl
-megatron/rl/
-examples/rl/
-tests/unit_tests/test_rl_utils.py
-train_rl.py
diff --git a/docker/.ngc_version.dev b/docker/.ngc_version.dev
new file mode 100644
index 00000000000..6b72812b34f
--- /dev/null
+++ b/docker/.ngc_version.dev
@@ -0,0 +1 @@
+nvcr.io/nvidia/pytorch:25.09-py3
\ No newline at end of file
diff --git a/docker/.ngc_version.lts b/docker/.ngc_version.lts
new file mode 100644
index 00000000000..6b72812b34f
--- /dev/null
+++ b/docker/.ngc_version.lts
@@ -0,0 +1 @@
+nvcr.io/nvidia/pytorch:25.09-py3
\ No newline at end of file
diff --git a/docker/Dockerfile.ci.dev b/docker/Dockerfile.ci.dev
index 9ebda435f99..95fecdb3f9b 100644
--- a/docker/Dockerfile.ci.dev
+++ b/docker/Dockerfile.ci.dev
@@ -1,8 +1,6 @@
 # syntax=docker/dockerfile:1.3-labs
 
 ARG FROM_IMAGE_NAME
-ARG WHEEL_DIR=/workspace/wheels
-
 FROM ${FROM_IMAGE_NAME} as main
 ENV PIP_CONSTRAINT=""
 ENV DEBIAN_FRONTEND=noninteractive
@@ -25,16 +23,16 @@ RUN bash -ex <<"EOF"
     curl -LsSf https://astral.sh/uv/${UV_VERSION}/install.sh | sh
 EOF
 
-ARG WHEEL_DIR
 COPY README.md pyproject.toml uv.lock /workspace/
 COPY megatron/core/__init__.py /workspace/megatron/core/
 COPY megatron/core/package_info.py /workspace/megatron/core/
+ARG IMAGE_TYPE=dev
 RUN --mount=type=cache,target=/root/.cache/uv \
     bash -ex <<"EOF"
     export NVTE_CUDA_ARCHS="80;90;100"
     uv venv ${UV_PROJECT_ENVIRONMENT} --system-site-packages
     uv sync --only-group build
-    uv sync --extra dev --extra mlm --link-mode copy --locked \
+    uv sync --extra ${IMAGE_TYPE} --extra mlm --link-mode copy --locked \
         --no-install-package torch \
         --no-install-package torchvision \
         --no-install-package triton \
diff --git a/docker/Dockerfile.ci.lts b/docker/Dockerfile.ci.lts
deleted file mode 100644
index 7da27a03f1d..00000000000
--- a/docker/Dockerfile.ci.lts
+++ /dev/null
@@ -1,98 +0,0 @@
-# syntax=docker/dockerfile:1.3-labs
-
-ARG FROM_IMAGE_NAME
-ARG WHEEL_DIR=/workspace/wheels
-
-FROM $FROM_IMAGE_NAME as build_mamba
-WORKDIR /opt
-ARG WHEEL_DIR
-RUN MAMBA_FORCE_BUILD=TRUE pip3 wheel -v git+https://github.com/state-spaces/mamba.git@v2.0.3 -w $WHEEL_DIR
-
-ARG FROM_IMAGE_NAME
-FROM $FROM_IMAGE_NAME as build_causalconv1d
-WORKDIR /opt
-ARG WHEEL_DIR
-RUN CAUSAL_CONV1D_FORCE_BUILD=TRUE pip3 wheel -v git+https://github.com/Dao-AILab/causal-conv1d.git@v1.2.2.post1 -w $WHEEL_DIR
-
-FROM $FROM_IMAGE_NAME as build_groupedgemm
-WORKDIR /opt
-ARG WHEEL_DIR
-RUN pip3 wheel -v git+https://github.com/fanshiqing/grouped_gemm@v1.1.2 -w $WHEEL_DIR
-
-
-ARG FROM_IMAGE_NAME
-FROM $FROM_IMAGE_NAME as main
-ENV DEBIAN_FRONTEND=noninteractive
-
-RUN bash -ex <<"EOF"
-    apt-get update
-    apt-get install -y --no-install-recommends gettext python3-venv psmisc
-    apt-get clean
-    python -m venv /opt/jet
-    wget https://github.com/mikefarah/yq/releases/download/v4.44.1/yq_linux_amd64 -O /usr/local/bin/yq
-    chmod a+x /usr/local/bin/yq
-EOF
-
-ARG UV_VERSION=0.7.2
-ENV PATH="/root/.local/bin:$PATH"
-RUN curl -LsSf https://astral.sh/uv/${UV_VERSION}/install.sh | sh
-ENV UV_PROJECT_ENVIRONMENT=/opt/venv
-ENV PATH="$UV_PROJECT_ENVIRONMENT/bin:$PATH"
-ENV VIRTUAL_ENV=$UV_PROJECT_ENVIRONMENT
-ENV UV_LINK_MODE=copy
-
-RUN
-ARG WHEEL_DIR
-COPY README.md pyproject.toml uv.lock /workspace/
-COPY megatron/core/__init__.py /workspace/megatron/core/
-COPY megatron/core/package_info.py /workspace/megatron/core/
-COPY docker/common/ /workspace/docker/common/
-COPY --from=build_mamba $WHEEL_DIR/*.whl $WHEEL_DIR/
-COPY --from=build_causalconv1d $WHEEL_DIR/*.whl $WHEEL_DIR/
-COPY --from=build_groupedgemm $WHEEL_DIR/*.whl $WHEEL_DIR/
-RUN bash -ex <<"EOF"
-    uv venv ${UV_PROJECT_ENVIRONMENT} --system-site-packages
-
-    uv sync --extra lts --extra mlm --link-mode copy --locked \
-        --no-install-package torch \
-        --no-install-package torchvision \
-        --no-install-package triton \
-        --no-install-package nvidia-cublas-cu12 \
-        --no-install-package nvidia-cuda-cupti-cu12 \
-        --no-install-package nvidia-cuda-nvrtc-cu12 \
-        --no-install-package nvidia-cuda-runtime-cu12 \
-        --no-install-package nvidia-cudnn-cu12 \
-        --no-install-package nvidia-cufft-cu12 \
-        --no-install-package nvidia-cufile-cu12 \
-        --no-install-package nvidia-curand-cu12 \
-        --no-install-package nvidia-cusolver-cu12 \
-        --no-install-package nvidia-cusparse-cu12 \
-        --no-install-package nvidia-cusparselt-cu12 \
-        --no-install-package nvidia-nccl-cu12
-
-    bash docker/common/install_source_wheels.sh --input-wheel-dir $WHEEL_DIR/ --environment lts
-EOF
-ENV PYTHONPATH="/opt/megatron-lm:$PYTHONPATH"
-COPY assets/ /opt/data/
-ENV UV_PYTHON=$UV_PROJECT_ENVIRONMENT/bin/python
-
-##### For NVIDIANS only #####
-FROM main as jet
-ARG JET_API_VERSION
-ENV PATH="$PATH:/opt/jet/bin"
-RUN --mount=type=secret,id=JET_INDEX_URLS bash -ex <<"EOF"
-    JET_INDEX_URLS=$(cat /run/secrets/JET_INDEX_URLS)
-    python -m venv /opt/jet 
-    /opt/jet/bin/pip install --no-cache-dir $JET_INDEX_URLS \
-        jet-api==$JET_API_VERSION
-EOF
-
-RUN --mount=type=secret,id=JET_INDEX_URLS \
-    --mount=type=secret,id=LOGGER_INDEX_URL bash -ex <<"EOF"
-    JET_INDEX_URLS=$(cat /run/secrets/JET_INDEX_URLS)
-    LOGGER_INDEX_URL=$(cat /run/secrets/LOGGER_INDEX_URL)
-    uv pip install --no-cache-dir --upgrade $LOGGER_INDEX_URL "one-logger"
-    uv pip install --no-cache-dir --upgrade "setuptools<80.0.0"
-    uv pip install --no-cache-dir --upgrade $JET_INDEX_URLS "jet-client~=3.0" 
-EOF
-###
\ No newline at end of file
diff --git a/examples/inference/gpt/gpt_dynamic_inference.py b/examples/inference/gpt/gpt_dynamic_inference.py
index e5344fbb8be..251aa100cba 100644
--- a/examples/inference/gpt/gpt_dynamic_inference.py
+++ b/examples/inference/gpt/gpt_dynamic_inference.py
@@ -11,7 +11,7 @@
 from collections import defaultdict
 from functools import partial
 from tqdm import tqdm
-from typing import Dict, List, Optional
+from typing import Dict, List, Tuple, Optional
 
 import torch
 from tqdm import tqdm
@@ -28,18 +28,21 @@
 from megatron.core.inference.text_generation_controllers.text_generation_controller import (
     TextGenerationController,
 )
+from megatron.core.ssm.mamba_hybrid_layer_allocation import Symbols
 from megatron.core.tokenizers.text.utils.build_tokenizer import build_tokenizer
 from megatron.core.transformer.module import MegatronModule
+from megatron.core.utils import get_attr_wrapped_model
 
 sys.path.append(
     os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir, os.path.pardir))
 )
 from megatron.training import get_args, get_model as _get_model, get_tokenizer, initialize_megatron
 from megatron.training.checkpointing import load_checkpoint
-
-from megatron.core.utils import configure_nvtx_profiling
 from model_provider import model_provider
 from gpt_builders import gpt_builder
+from mamba_builders import mamba_builder
+
+from megatron.core.utils import configure_nvtx_profiling
 
 import json
 
@@ -54,17 +57,11 @@
 from megatron.training import get_model as _get_model
 from megatron.training import get_tokenizer, initialize_megatron
 from megatron.training.checkpointing import load_checkpoint
-from pretrain_gpt import model_provider
 
 import torch
 import io
 import megatron
 
-torch.serialization.add_safe_globals([io.BytesIO])
-torch.serialization.add_safe_globals([megatron.core.rerun_state_machine.RerunState])
-torch.serialization.add_safe_globals([megatron.core.rerun_state_machine.RerunDiagnostic])
-
-
 
 def add_dynamic_inference_args(parser: ArgumentParser) -> ArgumentParser:
     """Dynamic inference arguments."""
@@ -91,9 +88,16 @@ def get_model() -> MegatronModule:
 
     args = get_args()
 
+    if args.model_provider == "gpt":
+        model_builder = gpt_builder
+    elif args.model_provider == "mamba":
+        model_builder = mamba_builder
+    else:
+        raise ValueError(f"Invalid model provider {args.model_provider}")
+
     # Build model.
     model = _get_model(
-        partial(model_provider, gpt_builder),
+        partial(model_provider, model_builder),
         wrap_with_ddp=False
     )
 
@@ -120,7 +124,10 @@ def get_model() -> MegatronModule:
 def get_inference_context(
     requests: List[Request],
     sampling_params: Optional[SamplingParams] = None,
-    calculate_max_sequence_length_from_requests: bool = True
+    calculate_max_sequence_length_from_requests: bool = True,
+    layer_type_list: Optional[List[str]] = None,
+    mamba_conv_states_shape: Optional[Tuple[int]] = None,
+    mamba_ssm_states_shape: Optional[Tuple[int]] = None,
 ):
     """The inference context manages the KV cache and other inference state."""
 
@@ -159,6 +166,9 @@ def get_inference_context(
         max_tokens_override=args.inference_dynamic_batching_max_tokens_override,
         tensor_model_parallel_size=args.tensor_model_parallel_size,
         materialize_only_last_token_logits=not args.return_log_probs,
+        layer_type_list=layer_type_list,
+        mamba_conv_states_shape=mamba_conv_states_shape,
+        mamba_ssm_states_shape=mamba_ssm_states_shape,
         cache_mla_latent=args.multi_latent_attention and args.cache_mla_latents,
         kv_lora_rank=args.kv_lora_rank if args.multi_latent_attention else None,
         qk_pos_emb_head_dim=args.qk_pos_emb_head_dim,
@@ -369,21 +379,38 @@ def main():
         termination_id=args.termination_id if args.termination_id is not None else tokenizer.eod,
     )
 
-    # Requests, context, conroller.
     model = get_model()
+
+    # Layer type list for hybrid models
+    decoder = get_attr_wrapped_model(model, "decoder")
+    layer_type_list = getattr(decoder, "layer_type_list", None)
+    if layer_type_list is not None and Symbols.MAMBA in layer_type_list:
+        (mamba_conv_states_shape, mamba_ssm_states_shape) = decoder.mamba_state_shapes_per_request()
+    else:
+        mamba_conv_states_shape = None
+        mamba_ssm_states_shape = None
+
+    # Requests, context, controller.
     requests = build_requests(args, tokenizer, sampling_params)
-    context = get_inference_context(requests, sampling_params)
+    context = get_inference_context(
+        requests,
+        sampling_params,
+        layer_type_list=layer_type_list,
+        mamba_conv_states_shape=mamba_conv_states_shape,
+        mamba_ssm_states_shape=mamba_ssm_states_shape,
+    )
     controller = get_inference_controller(model, context)
 
     # Validate all context_length's <= max_tokens.
-    invalid_prompt_length_map = {}
-    for request_idx, request in enumerate(requests):
-        if len(request.prompt_tokens) > context.max_tokens:
-            invalid_prompt_length_map[request_idx] = len(request.prompt_tokens)
-    assert not invalid_prompt_length_map, (
-        "request idxs with prompts longer than context.max_tokens: "
-        ", ".join(f"{k}({v})" for k, v in invalid_prompt_length_map.items())
-    )
+    if args.disable_chunked_prefill:
+        invalid_prompt_length_map = {}
+        for request_idx, request in enumerate(requests):
+            if len(request.prompt_tokens) > context.max_tokens:
+                invalid_prompt_length_map[request_idx] = len(request.prompt_tokens)
+        assert not invalid_prompt_length_map, (
+            "request idxs with prompts longer than context.max_tokens: "
+            ", ".join(f"{k}({v})" for k, v in invalid_prompt_length_map.items())
+        )
 
     # Inference engine.
     engine = DynamicInferenceEngine(
@@ -423,8 +450,8 @@ def main():
         )
 
     # Print unique prompts + outputs.
-    if torch.distributed.get_rank() == 0:
 
+    if torch.distributed.get_rank() == 0:
         def escape_str(s):
             return s.replace("\n", "\\n")
 
diff --git a/examples/inference/gpt/gpt_dynamic_inference_with_coordinator.py b/examples/inference/gpt/gpt_dynamic_inference_with_coordinator.py
index 7b5de5c21f2..9e2b6bfa983 100644
--- a/examples/inference/gpt/gpt_dynamic_inference_with_coordinator.py
+++ b/examples/inference/gpt/gpt_dynamic_inference_with_coordinator.py
@@ -20,6 +20,10 @@
 from megatron.training.arguments import parse_args
 from megatron.core import parallel_state
 
+import logging
+
+logging.basicConfig(level=logging.INFO, force=True)
+
 async def main(
     engine: DynamicInferenceEngine,
     requests: List[Request],
diff --git a/examples/inference/gpt/utils.py b/examples/inference/gpt/utils.py
index baa25787e83..0ea1f5a3df0 100644
--- a/examples/inference/gpt/utils.py
+++ b/examples/inference/gpt/utils.py
@@ -222,6 +222,9 @@ def arrival(r):
     if len(time_offsets) == 0:
         time_offsets = [0.0]
 
+    # Ensure first time is 0.
+    time_offsets = [to - time_offsets[0] for to in time_offsets]
+    
     # Truncate to num_requests.
     assert len(time_offsets) >= num_requests
     time_offsets = time_offsets[:num_requests]
diff --git a/examples/post_training/modelopt/generation_server.py b/examples/post_training/modelopt/generation_server.py
deleted file mode 100644
index b32cca0d73f..00000000000
--- a/examples/post_training/modelopt/generation_server.py
+++ /dev/null
@@ -1,198 +0,0 @@
-# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
-
-"""Sample Generate"""
-import os
-import sys
-import warnings
-from functools import partial
-
-sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../")))
-import os
-import sys
-from argparse import Namespace
-from contextlib import nullcontext
-
-import torch
-
-from megatron.core import mpu
-from megatron.core.inference.engines import AbstractEngine, StaticInferenceEngine
-from megatron.core.inference.engines.abstract_engine import AbstractEngine
-from megatron.core.inference.model_inference_wrappers.inference_wrapper_config import (
-    InferenceWrapperConfig,
-)
-from megatron.core.inference.sampling_params import SamplingParams
-from megatron.core.inference.text_generation_controllers.text_generation_controller import (
-    TextGenerationController,
-)
-from megatron.core.transformer.module import MegatronModule
-from megatron.inference.text_generation import beam_search_and_post_process
-from megatron.inference.text_generation.mcore_engine_server import (
-    ModelInferenceWrapperServer,
-    run_mcore_engine,
-)
-from megatron.inference.text_generation_server import MegatronServer
-from megatron.post_training.arguments import add_modelopt_args
-from megatron.training import get_args, get_model, get_tokenizer, print_rank_0
-from megatron.training.checkpointing import load_checkpoint
-from megatron.training.initialize import initialize_megatron
-
-
-def get_inference_engine(args: Namespace, model: MegatronModule) -> AbstractEngine:
-    """Get the relevant backend for running inference
-
-    This function will automatically choose the TRTLLMBackend when possible, and default to Mcore
-    backend if the user does not specify any backends. TRTLLMBackend is not implmented yet.
-
-    Args:
-        args (Namespace): The user arguments parsed from command line
-        model (MegatronModule): The megatron model.
-
-    Returns:
-        AbstractBackend: The chosen backend
-    """
-    tokenizer = get_tokenizer()
-
-    inference_wrapper_config = InferenceWrapperConfig(
-        hidden_size=args.hidden_size,
-        inference_batch_times_seqlen_threshold=args.inference_batch_times_seqlen_threshold,
-        fp32_residual_connection=args.fp32_residual_connection,
-        params_dtype=args.params_dtype,
-        padded_vocab_size=args.padded_vocab_size,
-        inference_max_seq_length=args.inference_max_seq_length,
-        inference_max_requests=args.inference_max_batch_size,
-        nccl_all_reduce_for_prefill=args.nccl_all_reduce_for_prefill,
-    )
-
-    inference_wrapped_model = ModelInferenceWrapperServer(model, inference_wrapper_config)
-    text_generation_controller = TextGenerationController(
-        inference_wrapped_model=inference_wrapped_model, tokenizer=tokenizer
-    )
-    return StaticInferenceEngine(
-        text_generation_controller=text_generation_controller,
-        max_batch_size=args.inference_max_batch_size,
-    )
-
-
-def add_text_generate_args(parser):
-    """Adds text generation arguments to parser."""
-    group = parser.add_argument_group(title='text generation')
-    group.add_argument(
-        "--port", type=int, default=5000, help='port for text generation server to run on'
-    )
-    group.add_argument("--temperature", type=float, default=1.0, help='Sampling temperature.')
-    group.add_argument("--top_k", type=int, default=1, help='Top k sampling.')
-    group.add_argument("--top_p", type=float, default=0.0, help='Top p sampling.')
-    group.add_argument(
-        "--return-log-probs",
-        action='store_true',
-        default=True,
-        help='Return the log probabilities of the final output tokens',
-    )
-    group.add_argument(
-        "--num-tokens-to-generate",
-        type=int,
-        default=30,
-        help='Number of tokens to generate for each prompt',
-    )
-    group.add_argument(
-        "--prompts",
-        metavar='N',
-        type=str,
-        nargs='+',
-        help='Input prompts with each prompt within quotes and seperated by space',
-    )
-    group.add_argument(
-        "--max-batch-size",
-        type=int,
-        default=None,
-        help='Deprecated in favor of `--inference-max-batch-size`',
-    )
-    add_modelopt_args(parser)
-    return parser
-
-
-@torch.inference_mode()
-def main(model_provider: str = "gpt"):
-    """Runs the text generation server with the specified model provider."""
-    initialize_megatron(
-        extra_args_provider=add_text_generate_args,
-        args_defaults={
-            'no_load_rng': True,
-            'no_load_optim': True,
-            'exit_on_missing_checkpoint': True,
-        },
-    )
-    args = get_args()
-    if args.num_layers_per_virtual_pipeline_stage is not None:
-        print("Interleaved pipeline schedule is not yet supported for text generation.")
-        exit()
-    print_rank_0("WARNING: Forcing exit_on_missing_checkpoint to True for text " "generation.")
-    args.exit_on_missing_checkpoint = True
-
-    # Set up model and load checkpoint
-    load_context = nullcontext()
-    if args.fp8:
-        from transformer_engine.pytorch.fp8 import fp8_model_init
-
-        load_context = fp8_model_init()
-    with load_context:
-
-        from megatron.post_training.model_builder import modelopt_gpt_mamba_builder
-        from model_provider import model_provider as root_model_provider
-        if model_provider == "gpt":
-            model = get_model(partial(root_model_provider, modelopt_gpt_mamba_builder), wrap_with_ddp=False)
-        elif model_provider == "mamba":
-            pass
-        else:
-            raise ValueError(f"Invalid model provider {model_provider}")
-
-    if args.load is not None:
-        _ = load_checkpoint(model, None, None, strict=False)
-
-    assert len(model) == 1, "Above condition should have caught this"
-    model = model[0]
-    model.eval()
-
-    if args.max_batch_size is not None:
-        assert args.inference_max_batch_size is not None
-        args.inference_max_batch_size = max(args.inference_max_batch_size, args.max_batch_size)
-        warnings.warn(
-            "`--max-batch-size` has been deprecated in favor of `--inference-max-requests`, "
-            f"setting maximum batch size to {args.inference_max_batch_size}"
-        )
-
-    inference_engine = get_inference_engine(args, model)
-
-    if args.cuda_graph_impl == "local":
-        print(f"Running warmup for CUDA graphs...")
-        inference_engine.generate(
-            prompts=["Test prompt"], sampling_params=SamplingParams(num_tokens_to_generate=10)
-        )
-
-    if (
-        mpu.is_pipeline_first_stage()
-        and mpu.get_tensor_model_parallel_rank() == 0
-        and mpu.get_expert_model_parallel_rank() == 0
-    ):
-        server = MegatronServer(inference_engine, args)
-        server.run("0.0.0.0", port=args.port)
-
-    while True:
-        choice = torch.tensor(1, dtype=torch.long, device='cuda')
-        torch.distributed.broadcast(choice, 0)
-        if choice.item() == 0:
-            try:
-                run_mcore_engine(inference_engine)
-            except ValueError as ve:
-                pass
-        elif choice.item() == 1:
-            try:
-                beam_search_and_post_process(
-                    inference_engine.text_generation_controller.inference_wrapped_model.model
-                )
-            except ValueError as ve:
-                pass
-
-
-if __name__ == "__main__":
-    main(model_provider="gpt")
diff --git a/examples/post_training/modelopt/generation_server.sh b/examples/post_training/modelopt/generation_server.sh
old mode 100644
new mode 100755
index a4d7ff2dada..9acd61f3d04
--- a/examples/post_training/modelopt/generation_server.sh
+++ b/examples/post_training/modelopt/generation_server.sh
@@ -14,11 +14,9 @@ if [ -z ${MLM_MODEL_CKPT} ]; then
     exit 1
 fi
 
-if [ -z ${DRAFT_LEN} ]; then
-    DRAFT_LEN=0
-fi
+TOOLS_DIR="$(realpath ${SCRIPT_DIR}/../../../tools)"
 
-${LAUNCH_SCRIPT} ${SCRIPT_DIR}/generation_server.py \
+${LAUNCH_SCRIPT} ${TOOLS_DIR}/run_text_generation_server.py \
     ${MODEL_ARGS} \
     --tensor-model-parallel-size ${TP} \
     --expert-tensor-parallel-size ${ETP} \
diff --git a/megatron/core/datasets/megatron_tokenizer.py b/megatron/core/datasets/megatron_tokenizer.py
index 224de24a9f9..08b602c4766 100644
--- a/megatron/core/datasets/megatron_tokenizer.py
+++ b/megatron/core/datasets/megatron_tokenizer.py
@@ -1,11 +1,14 @@
 # Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
 import json
+import logging
 from abc import ABC, abstractmethod
 from collections import OrderedDict
 from typing import Any
 
 import numpy
 
+logger = logging.getLogger(__name__)
+
 
 class MegatronLegacyTokenizer(ABC):
     """Abstract class for tokenizer
@@ -20,6 +23,12 @@ class MegatronLegacyTokenizer(ABC):
     """
 
     def __init__(self, *tokenizer_paths: str, **tokenizer_options: Any):
+        # Deprecation warning
+        logger.warning(
+            "You’re using the legacy tokenizer system, which is deprecated "
+            "and will be removed in a future release. Please migrate to the new tokenizer system "
+            "(`megatron.core.tokenizers.MegatronTokenizer`)."
+        )
         self.unique_identifiers = OrderedDict()
         self.unique_identifiers["class"] = type(self).__name__
         self.unique_identifiers["tokenizer_path"] = list(tokenizer_paths)
diff --git a/megatron/core/enums.py b/megatron/core/enums.py
index c9a715519f9..fcca219badd 100644
--- a/megatron/core/enums.py
+++ b/megatron/core/enums.py
@@ -20,15 +20,17 @@ def encoder_and_decoder(self):
 
 
 class Fp8Recipe(str, enum.Enum):
-    """FP8 recipe names: delayed, tensorwise, mxfp8, blockwise."""
+    """FP8 recipe names: delayed, tensorwise, mxfp8, blockwise, custom."""
 
     delayed = "delayed"
     tensorwise = "tensorwise"
     mxfp8 = "mxfp8"
     blockwise = "blockwise"
+    custom = "custom"
 
 
 class Fp4Recipe(str, enum.Enum):
-    """FP4 recipe names: nvfp4."""
+    """FP4 recipe names: nvfp4, custom."""
 
     nvfp4 = "nvfp4"
+    custom = "custom"
diff --git a/megatron/core/fp4_utils.py b/megatron/core/fp4_utils.py
index eb02a4796b0..4f997f6be15 100644
--- a/megatron/core/fp4_utils.py
+++ b/megatron/core/fp4_utils.py
@@ -7,6 +7,7 @@
 import torch
 
 from megatron.core.enums import Fp4Recipe
+from megatron.core.fp8_utils import _get_custom_recipe
 from megatron.core.transformer.transformer_config import TransformerConfig
 from megatron.core.utils import is_te_min_version
 
@@ -84,9 +85,11 @@ def get_fp4_recipe(config: TransformerConfig):
                         Transformer Engine. Please make sure you are using TE version 
                         >= 2.7.0.dev0."""
                     )
+            elif config.fp4_recipe == Fp4Recipe.custom:
+                fp4_recipe = _get_custom_recipe(config.fp4_quantizer_factory)
             else:
                 raise ValueError(
-                    "NVFP4BlockScaling is the only supported FP4 recipe. "
+                    "NVFP4BlockScaling and custom are the only supported FP4 recipes. "
                     "Please make sure you are using a compatible TE version >= 2.7.0.dev0."
                 )
         else:
diff --git a/megatron/core/fp8_utils.py b/megatron/core/fp8_utils.py
index 7c3591ae5f7..1c52e965cd7 100644
--- a/megatron/core/fp8_utils.py
+++ b/megatron/core/fp8_utils.py
@@ -2,14 +2,21 @@
 
 """Utility functions related to FP8 that are used throughout Megatron core"""
 
+import importlib
 import weakref
 from contextlib import nullcontext
 from functools import wraps
-from typing import List, Optional
+from typing import List, Optional, Union
 
 import torch
 
-from megatron.core.enums import Fp8Recipe
+from megatron.core.enums import Fp4Recipe, Fp8Recipe
+from megatron.core.tensor_parallel import (
+    ColumnParallelLinear,
+    RowParallelLinear,
+    gather_from_sequence_parallel_region,
+    reduce_scatter_to_sequence_parallel_region,
+)
 from megatron.core.transformer.transformer_config import TransformerConfig
 from megatron.core.utils import get_te_version, is_te_min_version
 
@@ -111,6 +118,53 @@ def dequantize_fp8_tensor(fp8_tensor: torch.Tensor) -> torch.Tensor:
         return fp8_tensor.from_float8()
 
 
+def _resolve_callable_from_python_import_path(dotted_path: str):
+    """Resolve a Python import path like 'pkg.mod.func' to a callable.
+
+    Raises ValueError with clear message on failure.
+    """
+    if not isinstance(dotted_path, str) or not dotted_path:
+        raise ValueError(
+            "fp8_quantizer_factory must be a non-empty string with format 'pkg.mod.func'."
+        )
+
+    parts = dotted_path.rsplit(".", 1)
+    if len(parts) == 1:
+        raise ValueError(f"Invalid fp8_quantizer_factory '{dotted_path}'. Expected 'pkg.mod.func'.")
+    module_path, attr = parts[0], parts[1]
+
+    try:
+        mod = importlib.import_module(module_path)
+    except Exception as exc:
+        raise ValueError(
+            f"Failed to import module '{module_path}' for fp8_quantizer_factory: {exc}"
+        ) from exc
+
+    fn = getattr(mod, attr, None)
+    if fn is None:
+        raise ValueError(
+            f"Attribute '{attr}' not found in module '{module_path}' for fp8_quantizer_factory."
+        )
+    if not callable(fn):
+        raise ValueError(
+            f"Resolved attribute '{module_path}.{attr}' is not callable for fp8_quantizer_factory."
+        )
+    return fn
+
+
+def _get_custom_recipe(quantizer_factory_python_path: str) -> Union[Fp8Recipe, Fp4Recipe]:
+    quantizer_factory = _resolve_callable_from_python_import_path(quantizer_factory_python_path)
+    try:
+        custom_recipe = transformer_engine.common.recipe.CustomRecipe(qfactory=quantizer_factory)
+    except AttributeError:
+        raise ValueError(
+            """CustomRecipe recipe is not available in this version of 
+            Transformer Engine. Please make sure you are using TE version 
+            >= 2.9.0.dev0."""
+        )
+    return custom_recipe
+
+
 def get_fp8_align_size(fp8_recipe: Fp8Recipe) -> int:
     """Get the alignment size required for fp8 GEMM."""
     if fp8_recipe == Fp8Recipe.mxfp8:
@@ -119,6 +173,27 @@ def get_fp8_align_size(fp8_recipe: Fp8Recipe) -> int:
         return 16
 
 
+def is_column_parallel_linear(module):
+    """Returns whether the given module is a ColumnParallelLinear layer."""
+    if HAVE_TE and (
+        isinstance(module, TEColumnParallelLinear)
+        or isinstance(module, TELayerNormColumnParallelLinear)
+    ):
+        return True
+    elif isinstance(module, ColumnParallelLinear):
+        return True
+    return False
+
+
+def is_row_parallel_linear(module):
+    """Returns whether the given module is a RowParallelLinear layer."""
+    if HAVE_TE and isinstance(module, TERowParallelLinear):
+        return True
+    elif isinstance(module, RowParallelLinear):
+        return True
+    return False
+
+
 """
 The code below abstracts the functionalities needed for implementing "--fp8-param-gather" into
 several functions. It provides different implementations for each function based on different
@@ -495,6 +570,8 @@ def get_fp8_recipe(config: TransformerConfig):
                 fp8_recipe = transformer_engine.common.recipe.MXFP8BlockScaling(
                     fp8_format=fp8_format
                 )
+            elif config.fp8_recipe == Fp8Recipe.custom:
+                fp8_recipe = _get_custom_recipe(config.fp8_quantizer_factory)
             else:
                 raise ValueError(
                     "Float8CurrentScaling, MXFP8BlockScaling, Float8BlockwiseScaling and "
@@ -616,6 +693,18 @@ def padded_forward(input_tensor, *args, **kwargs):
             if not FP8GlobalStateManager.is_fp8_enabled():
                 return original_forward(input_tensor, *args, **kwargs)
 
+            # With sequence parallelism we need to all-gather before padding
+            # and reduce-scatter after unpadding
+            if is_sequence_parallel := getattr(module, "sequence_parallel", False):
+                if is_column_parallel_linear(module):
+                    input_tensor = gather_from_sequence_parallel_region(
+                        input_tensor, group=module.tp_group
+                    )
+
+                # Disable sequence parallelism on the module because we are handling the
+                # all-gather and reduce-scatter externally
+                module.sequence_parallel = False
+
             seq_len, batch_size, hidden_size = input_tensor.shape
             # Reshape to (S, B*H) to pad sequence dimension
             input_2d = input_tensor.reshape(seq_len, -1)
@@ -641,6 +730,16 @@ def padded_forward(input_tensor, *args, **kwargs):
             unpadded_output_2d = _unpad_func(output_2d, [seq_len])
             unpadded_output = unpadded_output_2d.reshape(seq_len, batch_size, output_hidden_size)
 
+            if is_sequence_parallel:
+                # Reduce-scatter after unpadding
+                if is_row_parallel_linear(module):
+                    unpadded_output = reduce_scatter_to_sequence_parallel_region(
+                        unpadded_output, group=module.tp_group
+                    )
+
+                # Reset sequence parallelism flag on the module
+                module.sequence_parallel = True
+
             if other_outputs:
                 return (unpadded_output,) + other_outputs
             else:
diff --git a/megatron/core/inference/async_stream.py b/megatron/core/inference/async_stream.py
index 1bf8775e368..6c3242a13db 100644
--- a/megatron/core/inference/async_stream.py
+++ b/megatron/core/inference/async_stream.py
@@ -9,6 +9,7 @@
 from typing import Any, AsyncGenerator, Callable, Optional, Type, Union
 
 from megatron.core.inference.inference_request import InferenceRequest
+from megatron.core.utils import get_asyncio_loop
 
 STOP_ITERATION = Exception()
 
@@ -20,12 +21,17 @@ class AsyncStream:
     Adopted from https://github.com/vllm-project/vllm/blob/eb881ed006ca458b052905e33f0d16dbb428063a/vllm/v1/engine/async_stream.py # pylint: disable=line-too-long
     """
 
-    def __init__(self, request_id: int, cancel: Callable[[str], None]) -> None:
+    def __init__(
+        self,
+        request_id: int,
+        cancel: Callable[[str], None],
+        loop: Optional[asyncio.AbstractEventLoop] = None,
+    ) -> None:
         self._request_id = request_id
         self._cancel = cancel
         self._queue: asyncio.Queue = asyncio.Queue()
         self._finished = False
-        self._loop = asyncio.get_running_loop()
+        self._loop = get_asyncio_loop(loop)
 
     def put(self, item: Union[InferenceRequest, Exception]) -> None:
         """Adds a new value to the stream"""
diff --git a/megatron/core/inference/contexts/attention_context/mamba_metadata.py b/megatron/core/inference/contexts/attention_context/mamba_metadata.py
new file mode 100644
index 00000000000..e9cd99a6c48
--- /dev/null
+++ b/megatron/core/inference/contexts/attention_context/mamba_metadata.py
@@ -0,0 +1,106 @@
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+import torch
+
+
+class MambaMetadata:
+    """Manages the metadata tensors required for Mamba layers during inference."""
+
+    def __init__(self, max_requests: int):
+        """
+        Initializes the Mamba slot allocator.
+
+        Args:
+            max_requests (int): The maximum number of concurrent requests.
+        """
+        self.max_requests = max_requests
+
+        # Metadata for mapping requests to slots in the static Mamba state buffer
+        self.request_to_mamba_state_idx = torch.full(
+            (self.max_requests,), -1, dtype=torch.int32, device=torch.cuda.current_device()
+        )
+
+        # Separate mapping used only for CUDA graph compatibility
+        self.request_to_mamba_state_idx_cudagraph_only = torch.full(
+            (self.max_requests,), -1, dtype=torch.int32, device=torch.cuda.current_device()
+        )
+
+        # Allocator for Mamba state slots
+        self.mamba_state_free_slots = torch.arange(
+            self.max_requests, dtype=torch.int32, device=torch.cuda.current_device()
+        )
+        self.mamba_state_free_slot_count = self.max_requests
+
+    def reset(self) -> None:
+        """
+        Resets all Mamba states and frees all allocated slots.
+        """
+        self.request_to_mamba_state_idx.fill_(-1)
+        self.request_to_mamba_state_idx_cudagraph_only.fill_(-1)
+
+        # Re-initialize the free slot pool
+        self.mamba_state_free_slots = torch.arange(
+            self.max_requests, dtype=torch.int32, device=torch.cuda.current_device()
+        )
+        self.mamba_state_free_slot_count = self.max_requests
+
+    def reset_cudagraph_mapping(self) -> None:
+        """
+        Resets only the CUDA graph mapping tensor.
+        """
+        self.request_to_mamba_state_idx_cudagraph_only.fill_(-1)
+
+    def update_cudagraph_mapping(
+        self, active_mamba_indices: torch.Tensor, num_active_requests: int
+    ) -> None:
+        """
+        Updates the dedicated CUDA graph mapping tensor with the indices
+        of currently active requests.
+
+        Args:
+            active_mamba_indices (Tensor): Tensor containing the Mamba slot indices
+                                           for active requests.
+            num_active_requests (int): The number of active requests.
+        """
+        self.request_to_mamba_state_idx_cudagraph_only[0:num_active_requests] = active_mamba_indices
+
+    def allocate_slot(self) -> int:
+        """
+        Allocates a new slot for a request in the Mamba state buffers.
+
+        Returns:
+            int: The index of the allocated slot.
+            Returns None if no slots are available.
+        """
+        if self.mamba_state_free_slot_count == 0:
+            return None
+
+        # Get a free slot
+        self.mamba_state_free_slot_count -= 1
+        mamba_idx = self.mamba_state_free_slots[self.mamba_state_free_slot_count]
+
+        return mamba_idx
+
+    def free_slots(self, request_indices: torch.Tensor) -> None:
+        """
+        Frees the Mamba state slots associated with the given request indices.
+
+        Args:
+            request_indices (Tensor): A 1D tensor of request indices to free.
+        """
+        # Get the Mamba state indices for finished requests
+        mamba_indices_to_free = self.request_to_mamba_state_idx[request_indices]
+
+        # Filter out any invalid indices (e.g., -1)
+        mamba_indices_to_free = mamba_indices_to_free[mamba_indices_to_free != -1]
+        num_to_free = len(mamba_indices_to_free)
+
+        if num_to_free > 0:
+            # Add the freed indices back to the free slot pool
+            start_idx = self.mamba_state_free_slot_count
+            end_idx = start_idx + num_to_free
+            self.mamba_state_free_slots[start_idx:end_idx] = mamba_indices_to_free
+            self.mamba_state_free_slot_count = end_idx
+
+        # Invalidate the Mamba state index for the finished requests
+        self.request_to_mamba_state_idx[request_indices] = -1
diff --git a/megatron/core/inference/contexts/dynamic_context.py b/megatron/core/inference/contexts/dynamic_context.py
index d6cc2598998..000b58200f8 100644
--- a/megatron/core/inference/contexts/dynamic_context.py
+++ b/megatron/core/inference/contexts/dynamic_context.py
@@ -23,9 +23,14 @@
 from megatron.core.inference.utils import tensor_swap
 from megatron.core.models.common.embeddings.rope_utils import apply_rotary_pos_emb
 from megatron.core.package_info import __version__ as mcore_version
+from megatron.core.ssm.mamba_hybrid_layer_allocation import (
+    Symbols,
+    get_layer_maps_from_layer_type_list,
+)
 from megatron.core.transformer import TransformerConfig
 from megatron.core.utils import divide as core_divide
 
+from .attention_context.mamba_metadata import MambaMetadata
 from .attention_context.mha_metadata import GraphedMHAMetadata, NonGraphedMHAMetadata
 from .base_context import BaseInferenceContext
 from .dynamic_block_allocator import BlockAllocator
@@ -227,8 +232,17 @@ class DynamicInferenceContext(BaseInferenceContext):
             where the cuda graph batch sizes range from 1 to `max_requests` (as
             computed below). Due to rounding, the actual number of cuda graphs may
             not equal this argument.
-        materialize_only_last_token_logits (bool): If True, only the last token logits
-            are materialized in the context.
+        materialize_only_last_token_logits (Optional[bool]): Whether to only
+            materialize logits for the last token. This should be set to False
+            if returning log probs.
+        layer_type_list (Optional[List[str]]): A list of strings that indicates
+            the layer type (Mamba / Attention / MLP) for each layer.
+            See `megatron/core/ssm/mamba_hybrid_layer_allocation.py` for the list
+            of symbols. This must be provided for hybrid models.
+        mamba_conv_states_shape: (Optional[Tuple[int]]): Mamba conv states shape per request.
+            This must be provided for hybrid models.
+        mamba_ssm_states_shape: (Optional[Tuple[int]]): Mamba ssm states shape per request.
+            This must be provided for hybrid models.
         use_cuda_graphs_for_non_decode_steps (bool): If True, use cuda graphs for non-decode
             engine steps.
         unified_memory_level (Optional[int]): Set unified memory usage within the
@@ -259,7 +273,10 @@ def __init__(
         kv_lora_rank: Optional[int] = None,
         qk_pos_emb_head_dim: Optional[int] = None,
         num_cuda_graphs: Optional[int] = None,
-        materialize_only_last_token_logits: bool = True,
+        materialize_only_last_token_logits: Optional[bool] = True,
+        layer_type_list: Optional[List[str]] = None,
+        mamba_conv_states_shape: Optional[Tuple[int]] = None,
+        mamba_ssm_states_shape: Optional[Tuple[int]] = None,
         use_cuda_graphs_for_non_decode_steps: bool = True,
         use_flashinfer_fused_rope: bool = False,
         unified_memory_level: Optional[int] = 0,
@@ -283,6 +300,41 @@ def __init__(
             tp_size = tensor_model_parallel_size
         hidden_size_per_attention_head = core_divide(projection_size, num_attention_heads)
         num_attention_heads_per_partition = core_divide(num_attention_heads, tp_size)
+
+        # Mamba states.
+        self.is_hybrid_model = layer_type_list is not None and Symbols.MAMBA in layer_type_list
+        if self.is_hybrid_model:
+            assert (
+                mamba_conv_states_shape is not None
+            ), "`mamba_conv_states_shape` must be specified for hybrid models"
+            assert (
+                mamba_ssm_states_shape is not None
+            ), "`mamba_ssm_states_shape` must be specified for hybrid models"
+            assert (
+                not use_cuda_graphs_for_non_decode_steps
+            ), "Non-decode CUDA graphs not yet supported for hybrid models"
+
+            # For hybrid models, the layer map converts the global layer index to the
+            # corresponding attention layer index or Mamba layer index depending on the
+            # layer type.
+            attention_layer_map, mamba_layer_map, _ = get_layer_maps_from_layer_type_list(
+                layer_type_list
+            )
+            self.num_attention_layers = len(attention_layer_map)
+            self.num_mamba_layers = len(mamba_layer_map)
+            self.layer_map = attention_layer_map | mamba_layer_map
+        else:
+            # The layer map is the identity function for pure Transformer models.
+            self.num_attention_layers = num_layers
+            self.num_mamba_layers = 0
+            (mamba_conv_states_shape, mamba_ssm_states_shape) = (None, None)
+            self.layer_map = {i: i for i in range(self.num_attention_layers)}
+
+        if self.num_attention_layers == 0:
+            raise NotImplementedError(
+                f"Using `DynamicInferenceContext` with no attention is not supported."
+            )
+
         # Block size tokens, bytes.
         dtype_size_bytes = params_dtype.itemsize
         self.block_size_tokens = block_size_tokens
@@ -297,24 +349,38 @@ def __init__(
             self.block_size_bytes = (
                 dtype_size_bytes
                 * 2  # key, value
-                * num_layers
+                * self.num_attention_layers
                 * self.block_size_tokens
                 * num_attention_heads_per_partition
                 * hidden_size_per_attention_head
             )
+        assert self.block_size_bytes > 0
 
         # Adjust buffer to be a multiple of block size.
         buffer_size_bytes = int(buffer_size_gb * 1024**3)
         buffer_size_bytes_rem = buffer_size_bytes % self.block_size_bytes
         buffer_size_bytes = buffer_size_bytes - buffer_size_bytes_rem
 
-        # Compute max_requets, max_tokens from buffer size and overflow factor.
+        mamba_states_memory_per_request = 0
+        if self.is_hybrid_model:
+            mamba_states_memory_per_request += math.prod(mamba_conv_states_shape)
+            mamba_states_memory_per_request += math.prod(mamba_ssm_states_shape)
+            mamba_states_memory_per_request *= self.num_mamba_layers
+            mamba_states_memory_per_request *= dtype_size_bytes
+
+        # Compute max_requets, max_tokens from buffer size, overflow factor, and Mamba state size.
         def bytes_to_max_requests_and_tokens(n_bytes):
-            n_tokens = n_bytes / self.block_size_bytes * self.block_size_tokens
-            n_requests = n_tokens / max_sequence_length
-            return self.round_up_requests(int(n_requests), tp_size=tp_size), self.round_up_tokens(
-                int(n_tokens), tp_size=tp_size
+            bytes_per_token = self.block_size_bytes / self.block_size_tokens
+            cost_per_request_bytes = (
+                mamba_states_memory_per_request + max_sequence_length * bytes_per_token
             )
+            # TODO(ksanthanam): Leave room for an extra request in the event of padding
+            # for non-decode CUDA graphs
+            n_requests = n_bytes / cost_per_request_bytes
+            n_tokens = n_requests * max_sequence_length
+            n_requests = self.round_up_requests(int(n_requests), tp_size=tp_size)
+            n_tokens = self.round_up_tokens(int(n_tokens), tp_size=tp_size)
+            return n_requests, n_tokens
 
         self.max_requests, self.max_tokens = bytes_to_max_requests_and_tokens(buffer_size_bytes)
         if buffer_overflow_factor is not None:
@@ -339,7 +405,6 @@ def bytes_to_max_requests_and_tokens(n_bytes):
 
         # Initialize context state.
         self.params_dtype = params_dtype
-        self.num_layers = num_layers
         self.max_sequence_length = max_sequence_length
 
         # Unified memory.
@@ -390,8 +455,11 @@ def bytes_to_max_requests_and_tokens(n_bytes):
         self.token_to_position_in_request = torch.empty_like(self.token_to_input_ids)
         self.token_to_local_position_within_kv_block = torch.empty_like(self.token_to_input_ids)
 
-        # Calculate the total number of blocks available in the buffer
-        block_count_total = buffer_size_bytes // self.block_size_bytes
+        # Calculate the total number of chunks available in the buffer
+        total_mamba_states_memory = mamba_states_memory_per_request * self.max_requests
+        block_count_total = (
+            max(0, buffer_size_bytes - total_mamba_states_memory) // self.block_size_bytes
+        )
 
         # Memory buffer.
         ctx_manager = (
@@ -402,7 +470,12 @@ def bytes_to_max_requests_and_tokens(n_bytes):
         with ctx_manager:
             if cache_mla_latent:
                 self.memory_buffer = torch.full(
-                    (self.num_layers, block_count_total, self.block_size_tokens, kv_reduced_dim),
+                    (
+                        self.num_attention_layers,
+                        block_count_total,
+                        self.block_size_tokens,
+                        kv_reduced_dim,
+                    ),
                     -1,
                     dtype=self.params_dtype,
                     device=torch.cuda.current_device(),
@@ -411,7 +484,7 @@ def bytes_to_max_requests_and_tokens(n_bytes):
                 self.memory_buffer = torch.full(
                     (
                         2,  # key and value
-                        self.num_layers,
+                        self.num_attention_layers,
                         block_count_total,
                         self.block_size_tokens,
                         num_attention_heads_per_partition,
@@ -516,14 +589,34 @@ def bytes_to_max_requests_and_tokens(n_bytes):
             block_count_total=block_count_total, gtd_block_count=self.gtd_block_count
         )
 
+        # Optional state tensors for hybrid models
+        if self.is_hybrid_model:
+            self.mamba_metadata = MambaMetadata(max_requests=self.max_requests)
+
+            with ctx_manager:
+                self.mamba_conv_states = torch.zeros(
+                    (self.num_mamba_layers, self.max_requests) + mamba_conv_states_shape,
+                    dtype=self.params_dtype,
+                    device=torch.cuda.current_device(),
+                )
+                self.mamba_ssm_states = torch.zeros(
+                    (self.num_mamba_layers, self.max_requests) + mamba_ssm_states_shape,
+                    dtype=self.params_dtype,
+                    device=torch.cuda.current_device(),
+                )
+
+        else:
+            self.mamba_metadata = None
+
         # Store the dummy block idx reference for convenience
         self.dummy_block_idx = self.block_allocator.dummy_block_idx
 
         # Deal with chunked prefill
         self.chunked_prefill_request_id = -1
 
-        # Reset attention state.
+        # Reset attention and Mamba state.
         self.reset_attention_state()
+        self.reset_mamba_state()
 
         if use_flashinfer_fused_rope is True:
             assert HAVE_FLASHINFER, "flashinfer is not installed"
@@ -628,7 +721,8 @@ def is_decode_only(self) -> bool:
         """Test if all active requests are in decode phase.
 
         For a request in prefill phase active_tokens = query length
-        Once the request moves to decode phase active tokens is 1 for that request. So if all active requests are in decode phase, they will be equal to active token count.
+        Once the request moves to decode phase active tokens is 1 for that request.
+        So if all active requests are in decode phase, they will be equal to active token count.
         """
         total_active_requests = self.total_request_count - self.paused_request_count
         return total_active_requests == self.active_token_count
@@ -664,11 +758,7 @@ def get_max_sequence_lengths(self) -> Tensor:
 
     def get_active_request_count(self):
         """Returns the current number of active requests."""
-        active_sequence_lengths = self.get_active_sequence_lengths()
-        max_sequence_lengths = self.get_max_sequence_lengths()
-        active_requests_mask = torch.less(active_sequence_lengths, max_sequence_lengths).byte()
-        active_request_count = (active_requests_mask == 1).sum().item()
-        return active_request_count
+        return self.total_request_count - self.paused_request_count
 
     def append_key_value_cache(self, layer_number: int, key: Tensor, value: Tensor) -> None:
         """Append to KV cache.
@@ -678,10 +768,12 @@ def append_key_value_cache(self, layer_number: int, key: Tensor, value: Tensor)
             key (Tensor): Key tensor.
             value (Tensor): Value tensor.
         """
+        attention_layer_number = self.layer_map[layer_number - 1]
+
         if triton_append_key_value_cache is not None and not self.cache_mla_latent:
             # currently does not support MLA latent cache
             return triton_append_key_value_cache(
-                layer_number=layer_number,
+                layer_number=attention_layer_number,
                 key=key,
                 value=value,
                 memory_buffer=self.memory_buffer,
@@ -706,14 +798,14 @@ def append_key_value_cache(self, layer_number: int, key: Tensor, value: Tensor)
         if self.cache_mla_latent:
             # We pass the kv_concat as the key in cache_mla_latent
             kv_concat = key
-            self.memory_buffer[layer_number - 1, block_idx, local_kv_seq_idx] = kv_concat[
+            self.memory_buffer[attention_layer_number, block_idx, local_kv_seq_idx] = kv_concat[
                 : self.padded_active_token_count
             ]
         else:
-            self.memory_buffer[0, layer_number - 1, block_idx, local_kv_seq_idx] = key[
+            self.memory_buffer[0, attention_layer_number, block_idx, local_kv_seq_idx] = key[
                 : self.padded_active_token_count
             ]
-            self.memory_buffer[1, layer_number - 1, block_idx, local_kv_seq_idx] = value[
+            self.memory_buffer[1, attention_layer_number, block_idx, local_kv_seq_idx] = value[
                 : self.padded_active_token_count
             ]
 
@@ -727,19 +819,30 @@ def key_value_cache(self, layer_number: int) -> Tuple[Tensor, Tensor]:
             (Tuple[Tensor, Tensor]) The key and value pointer tensors that point
             to blocks within the block-level memory buffer.
         """
+        attention_layer_number = self.layer_map[layer_number - 1]
         if self.cache_mla_latent:
             return (
-                self.memory_buffer[layer_number - 1],
+                self.memory_buffer[attention_layer_number],
                 None,
                 self.active_attn_metadata["mha_metadata"].state_data["block_table"],
             )
         else:
             return (
-                self.memory_buffer[0, layer_number - 1],
-                self.memory_buffer[1, layer_number - 1],
+                self.memory_buffer[0, attention_layer_number],
+                self.memory_buffer[1, attention_layer_number],
                 self.active_attn_metadata["mha_metadata"].state_data["block_table"],
             )
 
+    def mamba_states_cache(self, layer_number: int) -> Tuple[Tensor, Tensor]:
+        """Returns the Mamba state tensors for the given layer."""
+        assert self.is_hybrid_model, "Only hybrid models have Mamba state tensors"
+
+        mamba_layer_number = self.layer_map[layer_number - 1]
+        conv_state = self.mamba_conv_states[mamba_layer_number]
+        ssm_state = self.mamba_ssm_states[mamba_layer_number]
+
+        return (conv_state, ssm_state)
+
     def apply_fused_qk_rotary_emb(
         self, query: Tensor, key: Tensor, cos_sin_emb: Tensor, config: TransformerConfig
     ) -> Tuple[Tensor, Tensor]:
@@ -854,6 +957,16 @@ def reset_attention_state(self) -> None:
             attn_metadata.reset()
         self.active_attn_metadata = None
 
+        if self.is_hybrid_model:
+            self.mamba_metadata.reset_cudagraph_mapping()
+
+    def reset_mamba_state(self) -> None:
+        """Reset state used within Mamba layers."""
+        if self.is_hybrid_model:
+            self.mamba_conv_states.fill_(0)
+            self.mamba_ssm_states.fill_(0)
+            self.mamba_metadata.reset()
+
     def using_cuda_graph_this_step(self) -> bool:
         """Returns True if cuda graphs are being used for this step."""
         has_cuda_graphs = self.cuda_graph_token_counts is not None
@@ -977,6 +1090,17 @@ def initialize_attention_state(
         )
         # All attention metadata calculations are now handled by MHAMetadata.update()
 
+        # Create Mamba state block table if it's a hybrid model
+        if self.is_hybrid_model:
+            active_mamba_indices = self.mamba_metadata.request_to_mamba_state_idx[
+                self.paused_request_count : self.total_request_count
+            ]
+
+            if self.is_decode_only() or self.using_cuda_graph_this_step():
+                self.mamba_metadata.update_cudagraph_mapping(
+                    active_mamba_indices, self.total_request_count - self.paused_request_count
+                )
+
     def reset(self) -> None:
         """Reset entire context.
 
@@ -1018,15 +1142,13 @@ def reset(self) -> None:
 
         # Reset available block count.
         self.reset_attention_state()
+        self.reset_mamba_state()
         self.block_allocator.reset()
         self.request_to_kv_block_ids.fill_(-1)
 
         # Reset chunked prefill state
         self.chunked_prefill_request_id = -1
 
-        # Reset chunked prefill state
-        self.chunked_prefill_request_id = -1
-
     def current_input_and_position_ids(
         self, *, num_warmup_tokens: Optional[int] = None
     ) -> Tuple[Tensor, Tensor]:
@@ -1198,6 +1320,18 @@ def add_request(self, req: DynamicInferenceRequest, chunk_length: Optional[int]
         self.token_to_local_position_within_kv_block[
             self.active_token_count : self.active_token_count + chunk_length
         ] = (token_offset_range % self.block_size_tokens)
+
+        if self.is_hybrid_model and not is_chunked_prefill:
+            # Allocate a slot for Mamba states
+            mamba_idx = self.mamba_metadata.allocate_slot()
+            if mamba_idx is None:
+                raise ContextOverflowError(req.request_id, "No Mamba slots available")
+
+            # Initialize the allocated Mamba state
+            self.mamba_conv_states[:, mamba_idx] = 0.0
+            self.mamba_ssm_states[:, mamba_idx] = 0.0
+            self.mamba_metadata.request_to_mamba_state_idx[self.total_request_count] = mamba_idx
+
         self.active_token_count += chunk_length
         self.total_request_count += 0 if req.finished_chunk_token_count > 0 else 1
 
@@ -1216,6 +1350,11 @@ def _move_book_keeping_tensors(self, src_idxs, dst_idxs, next_tokens):
         self.request_last_kv_block_id[dst_idxs] = self.request_last_kv_block_id[src_idxs]
         self.request_last_kv_block_offset[dst_idxs] = self.request_last_kv_block_offset[src_idxs]
 
+        if self.is_hybrid_model:
+            self.mamba_metadata.request_to_mamba_state_idx[dst_idxs] = (
+                self.mamba_metadata.request_to_mamba_state_idx[src_idxs]
+            )
+
     def _swap_book_keeping_tensors(self, src_idxs, dst_idxs, next_tokens):
         """
         Swaps all the relevent booking tensors with src idxs to dst idxs
@@ -1230,6 +1369,9 @@ def _swap_book_keeping_tensors(self, src_idxs, dst_idxs, next_tokens):
         tensor_swap(self.request_last_kv_block_id, src_idxs, dst_idxs)
         tensor_swap(self.request_last_kv_block_offset, src_idxs, dst_idxs)
 
+        if self.is_hybrid_model:
+            tensor_swap(self.mamba_metadata.request_to_mamba_state_idx, src_idxs, dst_idxs)
+
     # TODO: see if we can compile this function
     def update_requests(self, active_requests_mask: Tensor, new_tokens: Tensor) -> Tensor:
         """Update context state after calling engine.step().
@@ -1301,10 +1443,17 @@ def update_requests(self, active_requests_mask: Tensor, new_tokens: Tensor) -> T
                 non_zero_values_in_kv_memory = kv_blocks_assigned[kv_blocks_assigned != -1]
                 self.block_allocator.release_memory_blocks(non_zero_values_in_kv_memory)
 
+                if self.is_hybrid_model:
+                    self.mamba_metadata.free_slots(finished_idxs)
+
             # Reset request/token counts.
             self.request_to_kv_block_ids.fill_(-1)
             self.total_request_count = 0
             self.active_token_count = 0
+
+            # Reset Mamba state.
+            self.reset_mamba_state()
+
             return
 
         # 3. Concatenate the paused tokens to the active tokens if present.
@@ -1332,6 +1481,10 @@ def update_requests(self, active_requests_mask: Tensor, new_tokens: Tensor) -> T
             # and updates it instead of the original tensor.
             self.request_to_kv_block_ids[finished_idxs] = -1
 
+            if self.is_hybrid_model:
+                # Get the Mamba state indices for finished requests and free them
+                self.mamba_metadata.free_slots(finished_idxs)
+
             if active_request_count > 0:
                 finished_idxs_on_left = (
                     torch.nonzero(active_requests_mask[:active_request_count] == 0, as_tuple=True)[
@@ -1351,8 +1504,10 @@ def update_requests(self, active_requests_mask: Tensor, new_tokens: Tensor) -> T
                     next_tokens=next_tokens,
                 )
 
-                # Reset block ids for recently moved requests.
+                # Reset chunk ids for recently moved requests.
                 self.request_to_kv_block_ids[active_idxs_on_right] = -1
+                if self.is_hybrid_model:
+                    self.mamba_metadata.request_to_mamba_state_idx[active_idxs_on_right] = -1
 
         # 5. We identify requests that require a new block and add them to the paused requests (i.e move them left) :-
         #       a) Put requests that have filled their current block and  require a new one in a pause state temporarily
@@ -1450,6 +1605,7 @@ def update_requests(self, active_requests_mask: Tensor, new_tokens: Tensor) -> T
 
         # 7. We make changes to the request book keeping tesnsors and setup the tokens for next iteration
         self.total_request_count = active_request_count + self.paused_request_count
+
         # All these active requests are in decode phase, so they need only 1 token per request
         self.active_token_count = active_request_count
         # Always the first section of token input ids are only used.
diff --git a/megatron/core/inference/contexts/fused_kv_append_kernel.py b/megatron/core/inference/contexts/fused_kv_append_kernel.py
index 2078878c8f4..db1eed456e1 100644
--- a/megatron/core/inference/contexts/fused_kv_append_kernel.py
+++ b/megatron/core/inference/contexts/fused_kv_append_kernel.py
@@ -119,8 +119,8 @@ def triton_append_key_value_cache(
 
     _, num_heads, h_dim = key.shape
 
-    key_cache = memory_buffer[0, layer_number - 1]
-    value_cache = memory_buffer[1, layer_number - 1]
+    key_cache = memory_buffer[0, layer_number]
+    value_cache = memory_buffer[1, layer_number]
 
     key_to_cache = key[:n_tokens]
     value_to_cache = value[:n_tokens]
diff --git a/megatron/core/inference/data_parallel_inference_coordinator.py b/megatron/core/inference/data_parallel_inference_coordinator.py
index ea0560183d8..0045d5947a1 100644
--- a/megatron/core/inference/data_parallel_inference_coordinator.py
+++ b/megatron/core/inference/data_parallel_inference_coordinator.py
@@ -1,6 +1,8 @@
 # Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
+import faulthandler
 import logging
+import signal
 from collections import deque
 from itertools import cycle
 from multiprocessing import Event
@@ -23,6 +25,11 @@
 except:
     HAVE_MSGPACK = False
 
+# Register faulthandler to emit stack traces upon process kill.
+faulthandler.enable()
+faulthandler.register(signal.SIGTERM, all_threads=False, chain=True)
+faulthandler.register(signal.SIGINT, all_threads=False, chain=True)
+
 
 class DataParallelInferenceCoordinator:
     """
diff --git a/megatron/core/inference/engines/dynamic_engine.py b/megatron/core/inference/engines/dynamic_engine.py
index bcde4f9894d..4bff4f85fa8 100644
--- a/megatron/core/inference/engines/dynamic_engine.py
+++ b/megatron/core/inference/engines/dynamic_engine.py
@@ -33,8 +33,8 @@
 from megatron.core.inference.text_generation_controllers.text_generation_controller import (
     TextGenerationController,
 )
-from megatron.core.inference.utils import Counter
-from megatron.core.utils import get_asyncio_loop
+from megatron.core.inference.utils import Counter, await_process_event
+from megatron.core.utils import get_asyncio_loop, trace_async_exceptions
 
 try:
     from tqdm import tqdm
@@ -293,7 +293,11 @@ def create_cuda_graphs(self, reset_context: bool = True):
         self.capture_stats = capture_stats
 
     async def start_listening_to_data_parallel_coordinator(
-        self, inference_coordinator_port: int, launch_inference_coordinator: bool = True
+        self,
+        inference_coordinator_port: int,
+        launch_inference_coordinator: bool = True,
+        *,
+        loop: Optional[asyncio.AbstractEventLoop] = None,
     ):
         """Initializes ZMQ communication to connect the engine with an inference coordinator.
 
@@ -407,12 +411,14 @@ async def start_listening_to_data_parallel_coordinator(
         torch.distributed.barrier(parallel_state.get_tensor_model_parallel_group())
 
         if launch_inference_coordinator and torch.distributed.get_rank() == 0:
-            coordinator_ready_event.wait()
+            await await_process_event(coordinator_ready_event, self.inference_coordinator_process)
             logging.info("Inference co-ordinator is ready to receive requests!")
 
         # Finally run the engine infinite loop
-        self.engine_loop_task = asyncio.create_task(self.run_engine_with_coordinator())
+        loop = get_asyncio_loop(loop)
+        self.engine_loop_task = loop.create_task(self.run_engine_with_coordinator(loop=loop))
 
+    @trace_async_exceptions
     async def _notify_cond_for_new_request(self):
         """Helper function to notify condition variable when a new request is added."""
         async with self._cond:
@@ -466,7 +472,7 @@ def _add_request(
             self.waiting_request_ids.append(request_id)
 
         # Create a new asyncio Future to notify the user when the request has completed.
-        self.request_completion_futures[request_id] = asyncio.Future()
+        self.request_completion_futures[request_id] = self._loop.create_future()
         return self.request_completion_futures[request_id]
 
     def add_request(
@@ -641,7 +647,7 @@ def schedule_non_chunked_prefill(self):
             if request_can_be_added and request_tokens_can_be_added and kv_cache_available:
                 self.context.add_request(req)
                 self._loop.call_soon_threadsafe(
-                    asyncio.create_task, self._notify_cond_for_new_request()
+                    self._loop.create_task, self._notify_cond_for_new_request()
                 )
                 req.remaining_prompt_tokens = req.remaining_prompt_tokens.new_empty(0)
                 req.add_event_add()
@@ -702,7 +708,7 @@ def schedule_chunked_prefill(self):
 
             # is_continuing_chunked_prefill is True if we are scheduling next
             # chunk of a existing chunked prefill request
-            is_continuing_chunked_prefill = self.context.chunked_prefill_request_id > 0
+            is_continuing_chunked_prefill = self.context.chunked_prefill_request_id >= 0
 
             # Use remaining prompt tokens for scheduling decisions
             remaining_len = len(req.remaining_prompt_tokens)
@@ -720,7 +726,7 @@ def schedule_chunked_prefill(self):
                     self.context.chunked_prefill_request_id = -1
                     self.context.add_request(req)
                     self._loop.call_soon_threadsafe(
-                        asyncio.create_task, self._notify_cond_for_new_request()
+                        self._loop.create_task, self._notify_cond_for_new_request()
                     )
                     req.remaining_prompt_tokens = req.remaining_prompt_tokens.new_empty(0)
                     req.add_event_add()
@@ -732,7 +738,7 @@ def schedule_chunked_prefill(self):
                     chunk_length = self.context.max_tokens - self.context.active_token_count
                     self.context.add_request(req, chunk_length=chunk_length)
                     self._loop.call_soon_threadsafe(
-                        asyncio.create_task, self._notify_cond_for_new_request()
+                        self._loop.create_task, self._notify_cond_for_new_request()
                     )
                     self.context.chunked_prefill_request_id = req.request_id
                     req.remaining_prompt_tokens = req.remaining_prompt_tokens[chunk_length:]
@@ -939,7 +945,7 @@ def generate(
             result = self.step_modern()
             finished_requests_list.extend(result["finished_requests"])
 
-        # Ensure requests are returned in the same order they were passed in.
+        # Ensure requests are returned in the same order they were passed in
         finished_requests_list.sort(key=lambda x: x.request_id)
 
         return finished_requests_list
@@ -1039,8 +1045,12 @@ def stop(self):
         self.zmq_context.term()
         parallel_state.destroy_model_parallel()
 
-    async def run_engine(self, *, verbose: Optional[bool] = False):
+    @trace_async_exceptions
+    async def run_engine(
+        self, *, loop: Optional[asyncio.AbstractEventLoop] = None, verbose: Optional[bool] = False
+    ):
         """Continually steps the engine asynchronously."""
+        self._loop = get_asyncio_loop(loop)
         try:
             while True:
                 # Wait until there are active requests before proceeding.
@@ -1054,8 +1064,12 @@ async def run_engine(self, *, verbose: Optional[bool] = False):
         except asyncio.CancelledError:
             pass
 
-    async def run_engine_with_coordinator(self, *, verbose: Optional[bool] = False):
+    @trace_async_exceptions
+    async def run_engine_with_coordinator(
+        self, *, loop: Optional[asyncio.AbstractEventLoop] = None, verbose: Optional[bool] = False
+    ):
         """Continually steps the engine asynchronously."""
+        self._loop = get_asyncio_loop(loop)
         try:
             while True:
                 self.schedule_requests()
diff --git a/megatron/core/inference/engines/static_engine.py b/megatron/core/inference/engines/static_engine.py
index d084528b8f2..dc86eb775f9 100644
--- a/megatron/core/inference/engines/static_engine.py
+++ b/megatron/core/inference/engines/static_engine.py
@@ -17,6 +17,7 @@
 from megatron.core.inference.text_generation_controllers.text_generation_controller import (
     TextGenerationController,
 )
+from megatron.core.utils import get_asyncio_loop
 
 try:
     from tqdm import tqdm
@@ -217,11 +218,6 @@ def generate_using_dynamic_engine(
             generated tokens, texts and log probs if required
         """
         assert hasattr(self, 'dynamic_engine'), "Dynamic engine not initialized"
-        try:
-            loop = asyncio.get_running_loop()
-        except RuntimeError:  # 'RuntimeError: There is no current event loop...'
-            loop = asyncio.new_event_loop()
-            asyncio.set_event_loop(loop)
 
         if common_inference_params:
             sampling_params = common_inference_params
@@ -385,8 +381,8 @@ def _wrapped_run_engine(self, cuda_device):
         torch.cuda.set_device(cuda_device)
         self.run_engine()
 
-    async def run_engine_async(self):
+    async def run_engine_async(self, loop: Optional[asyncio.AbstractEventLoop] = None):
         """Runs the engine asynchronously using asyncio"""
-        loop = asyncio.get_running_loop()
+        loop = get_asyncio_loop(loop)
 
         await loop.run_in_executor(None, self._wrapped_run_engine, torch.cuda.current_device())
diff --git a/megatron/core/inference/inference_client.py b/megatron/core/inference/inference_client.py
index 59b9144a207..53daac091b0 100644
--- a/megatron/core/inference/inference_client.py
+++ b/megatron/core/inference/inference_client.py
@@ -8,6 +8,7 @@
 
 from megatron.core.inference.inference_request import DynamicInferenceRequest
 from megatron.core.inference.sampling_params import SamplingParams
+from megatron.core.utils import get_asyncio_loop, trace_async_exceptions
 
 from .headers import Headers
 
@@ -103,10 +104,11 @@ def add_request(
         payload_serialized = msgpack.packb(payload, use_bin_type=True)
         self.socket.send(payload_serialized)
         assert request_id not in self.completion_futures
-        self.completion_futures[request_id] = asyncio.get_event_loop().create_future()
+        self.completion_futures[request_id] = get_asyncio_loop().create_future()
         self.request_submission_times[request_id] = time.perf_counter()
         return self.completion_futures[request_id]
 
+    @trace_async_exceptions
     async def _listen_for_completed_requests(self):
         """
         Listens for completed inference requests from the coordinator.
diff --git a/megatron/core/inference/sampling_params.py b/megatron/core/inference/sampling_params.py
index a64e2e56775..e215b3f134b 100644
--- a/megatron/core/inference/sampling_params.py
+++ b/megatron/core/inference/sampling_params.py
@@ -21,6 +21,7 @@ class SamplingParams:
     top_k: int = 0
     top_p: float = 0.0
     return_log_probs: bool = False
+    skip_prompt_log_probs: bool = False
     return_segments: bool = False  # Whether to return individually detokenized tokens
     num_tokens_to_generate: int = 30
     num_tokens_total: Optional[int] = None  # Cannot set both this and num_tokens_to_generate
diff --git a/megatron/core/inference/text_generation_controllers/text_generation_controller.py b/megatron/core/inference/text_generation_controllers/text_generation_controller.py
index ef092173c6a..2bda1425710 100644
--- a/megatron/core/inference/text_generation_controllers/text_generation_controller.py
+++ b/megatron/core/inference/text_generation_controllers/text_generation_controller.py
@@ -626,11 +626,8 @@ def _dynamic_step_calculate_log_probs(
         return_log_probs = False
         for sampling_params, mask in active_sampling_map:
             if sampling_params.return_log_probs:
-                skip_prompt_log_probs_for_dynamic_inference = getattr(
-                    sampling_params, "skip_prompt_log_probs_for_dynamic_inference", False
-                )
                 assert (
-                    skip_prompt_log_probs_for_dynamic_inference
+                    sampling_params.skip_prompt_log_probs
                     or materialize_only_last_token_logits is False
                 ), "Materialize only last token logits must be false for returning log probs"
                 return_log_probs = True
@@ -762,10 +759,12 @@ async def async_generate_output_tokens_dynamic_batch(
 
     @torch.inference_mode()
     def generate_output_tokens_dynamic_batch(
-        self, active_sampling_map: List[Tuple[SamplingParams, List[int]]]
+        self,
+        active_sampling_map: List[Tuple[SamplingParams, List[int]]],
+        loop: Optional[asyncio.AbstractEventLoop] = None,
     ) -> Optional[Dict]:
         """Synchronous wrapper for `self.async_generate_output_tokens_dynamic_batch."""
-        loop = get_asyncio_loop()
+        loop = get_asyncio_loop(loop)
         return loop.run_until_complete(
             self.async_generate_output_tokens_dynamic_batch(active_sampling_map)
         )
diff --git a/megatron/core/inference/utils.py b/megatron/core/inference/utils.py
index 985042f31e3..d58f3c3a652 100644
--- a/megatron/core/inference/utils.py
+++ b/megatron/core/inference/utils.py
@@ -1,5 +1,8 @@
 # Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
 
+import asyncio
+import multiprocessing
+
 import torch
 
 from megatron.core.transformer.moe.moe_layer import MoELayer
@@ -133,3 +136,28 @@ def tensor_swap(x, src_idxs, dst_idxs):
     Swap x[src_idxs] and x[dst_idxs]
     """
     x[dst_idxs], x[src_idxs] = x[src_idxs], x[dst_idxs]
+
+
+async def await_process_event(
+    event: multiprocessing.Event, process: multiprocessing.Process, timeout: float = 1.0
+) -> None:
+    """Repeatedly wait for a multiprocessing event to be set, aborting upon process failure.
+
+    Note that the timeout in this function is only for checking process liveness.
+    Its value should be set to a relatively high number. The only problem a high timeout
+    introduces is that an error is raised slighly later.
+    The timeout does not have any effect on the event-waiting, only on process failure detection.
+
+    Args:
+        event: The multiprocessing event to wait on.
+        process: The process to monitor for failure.
+        timeout: The timeout for each wait iteration in seconds.
+    """
+    while True:
+        signal = await asyncio.to_thread(event.wait, timeout)
+        if signal:
+            return
+        if not process.is_alive():
+            raise RuntimeError(
+                f"Process {process.name} (pid {process.pid}) has exited unexpectedly."
+            )
diff --git a/megatron/core/models/gpt/gpt_model.py b/megatron/core/models/gpt/gpt_model.py
index a1156012106..e840fca99b3 100644
--- a/megatron/core/models/gpt/gpt_model.py
+++ b/megatron/core/models/gpt/gpt_model.py
@@ -611,8 +611,6 @@ def _postprocess(
                     # Perform the sequence parallel gather here instead of after the output layer
                     # because we need to slice the last token logits from the full view of the
                     # packed logits across all requests.
-                    # TODO(ksanthanam): Make the equivalent change in the `MambaModel` code after
-                    # merging in !3722.
                     hidden_states = gather_from_sequence_parallel_region(
                         hidden_states, group=self.pg_collection.tp
                     )
@@ -620,7 +618,7 @@ def _postprocess(
                     sequence_parallel_override = True
 
                 # Reshape [B, 1, H] to [1, B, H] → extract each sample’s true last‐token hidden
-                # state ([B, H]) → unsqueeze back to [1, B, H]
+                # state ([B, H]) → unsqueeze back to [B, 1, H]
                 # (so that the output layer, which expects S×B×H, receives only the final token)
                 hidden_states = inference_context.last_token_logits(
                     hidden_states.squeeze(1).unsqueeze(0)
diff --git a/megatron/core/models/mamba/mamba_model.py b/megatron/core/models/mamba/mamba_model.py
index fb3df5e23f2..378cf7e47d6 100644
--- a/megatron/core/models/mamba/mamba_model.py
+++ b/megatron/core/models/mamba/mamba_model.py
@@ -12,6 +12,7 @@
 from megatron.core.models.common.language_module.language_module import LanguageModule
 from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.quantization.utils import get_quant_config_or_none
+from megatron.core.tensor_parallel import gather_from_sequence_parallel_region
 from megatron.core.transformer import TransformerConfig
 from megatron.core.transformer.enums import ModelType
 from megatron.core.transformer.spec_utils import ModuleSpec, build_module
@@ -244,13 +245,41 @@ def forward(
         if self.share_embeddings_and_output_weights:
             output_weight = self.shared_embedding_or_output_weight()
 
+        sequence_parallel_override = False
         if in_inference_mode and inference_context.materialize_only_last_token_logits:
-            hidden_states = hidden_states[-1, :, :].unsqueeze(0)
+            if inference_context.is_static_batching():
+                hidden_states = hidden_states[-1:, :, :]
+            else:
+                if self.output_layer.sequence_parallel:
+                    # Perform the sequence parallel gather here instead of after the output layer
+                    # because we need to slice the last token logits from the full view of the
+                    # packed logits across all requests.
+                    hidden_states = gather_from_sequence_parallel_region(
+                        hidden_states, group=self.pg_collection.tp
+                    )
+                    self.output_layer.sequence_parallel = False
+                    sequence_parallel_override = True
+
+                # Reshape [B, 1, H] to [1, B, H] → extract each sample’s true last‐token hidden
+                # state ([B, H]) → unsqueeze back to [B, 1, H]
+                # (so that the output layer, which expects S×B×H, receives only the final token)
+                hidden_states = inference_context.last_token_logits(
+                    hidden_states.squeeze(1).unsqueeze(0)
+                ).unsqueeze(1)
 
         logits, _ = self.output_layer(
             hidden_states, weight=output_weight, runtime_gather_output=runtime_gather_output
         )
 
+        # Restore sequence parallel execution to the output layer if necessary.
+        if sequence_parallel_override:
+            assert (
+                in_inference_mode
+                and inference_context.is_dynamic_batching()
+                and inference_context.materialize_only_last_token_logits
+            )
+            self.output_layer.sequence_parallel = True
+
         if labels is None:
             # [s b h] => [b s h]
             return logits.transpose(0, 1).contiguous()
diff --git a/megatron/core/safe_globals.py b/megatron/core/safe_globals.py
index 94a05e03c41..d2baed2a4a0 100755
--- a/megatron/core/safe_globals.py
+++ b/megatron/core/safe_globals.py
@@ -1,6 +1,7 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 
 from argparse import Namespace
+from io import BytesIO
 from pathlib import PosixPath
 from types import SimpleNamespace
 
@@ -26,6 +27,7 @@
     RerunDiagnostic,
     RerunMode,
     RerunState,
+    BytesIO,
 ]
 
 
diff --git a/megatron/core/ssm/mamba_block.py b/megatron/core/ssm/mamba_block.py
index 5f426ea347b..1bcadd0af10 100644
--- a/megatron/core/ssm/mamba_block.py
+++ b/megatron/core/ssm/mamba_block.py
@@ -9,7 +9,7 @@
 from contextlib import nullcontext
 from dataclasses import dataclass
 from functools import partial
-from typing import Optional, Union
+from typing import Optional, Tuple, Union
 
 import torch
 from torch import Tensor, nn
@@ -148,7 +148,7 @@ def __init__(
         self.hybrid_mlp_ratio = hybrid_mlp_ratio
         self.hybrid_override_pattern = hybrid_override_pattern
 
-        layer_type_list = allocate_layers(
+        self.layer_type_list = allocate_layers(
             self.config.num_layers,
             self.hybrid_attention_ratio,
             self.hybrid_mlp_ratio,
@@ -157,12 +157,12 @@ def __init__(
 
         pp_layer_offset = 0
         if self.pp_group.size() > 1:
-            pp_layer_offset, layer_type_list = self._select_layers_for_pipeline_parallel(
-                layer_type_list
+            pp_layer_offset, self.layer_type_list = self._select_layers_for_pipeline_parallel(
+                self.layer_type_list
             )
 
         self.layers = nn.ModuleList()
-        for i, layer_type in enumerate(layer_type_list):
+        for i, layer_type in enumerate(self.layer_type_list):
             fp8_init_context = get_fp8_context(self.config, i + pp_layer_offset, is_init=True)
             with fp8_init_context:
                 if layer_type == LayerSymbols.MAMBA:
@@ -226,22 +226,6 @@ def _select_layers_for_pipeline_parallel(self, layer_type_list):
 
         return offset, selected_list
 
-    def allocate_inference_cache(self, batch_size, max_seqlen, dtype=None):
-        """
-        Allocate inference cache for each layer.
-
-        Args:
-            batch_size (int): The batch size to use for inference.
-            max_seqlen (int): The maximum sequence length to use
-                for inference.
-            dtype (optional): The data type to use for allocation.
-                Defaults to the data type of the model.
-        """
-        return {
-            i: layer.allocate_inference_cache(batch_size, max_seqlen, dtype=dtype)
-            for i, layer in enumerate(self.layers)
-        }
-
     def set_input_tensor(self, input_tensor: Tensor):
         """Set input tensor to be used instead of forward()'s input.
 
@@ -252,6 +236,16 @@ def set_input_tensor(self, input_tensor: Tensor):
         forward_step_func"""
         self.input_tensor = input_tensor
 
+    def mamba_state_shapes_per_request(self) -> Optional[Tuple[Tuple[int], Tuple[int]]]:
+        """
+        Returns the Mamba conv and ssm states shapes per input sequence
+        if this block contains Mamba layers (this may not be the case with PP > 1).
+        """
+        for layer_type, layer in zip(self.layer_type_list, self.layers):
+            if layer_type == LayerSymbols.MAMBA:
+                return layer.mamba_state_shapes_per_request()
+        return None
+
     def forward(
         self,
         hidden_states: Union[Tensor, WrappedTensor],
@@ -289,10 +283,7 @@ def forward(
         if isinstance(hidden_states, WrappedTensor):
             hidden_states = hidden_states.unwrap()
 
-        if inference_context:
-            assert (
-                inference_context.is_static_batching()
-            ), "Mamba currently does not support dynamic inference batching."
+        if inference_context and inference_context.is_static_batching():
             # NOTE(bnorick): match BaseInferenceContext attributes for
             # mamba_ssm.utils.generation.BaseInferenceContext,
             # this hack supports eval
diff --git a/megatron/core/ssm/mamba_hybrid_layer_allocation.py b/megatron/core/ssm/mamba_hybrid_layer_allocation.py
index 26972b5454b..7407bfe899f 100644
--- a/megatron/core/ssm/mamba_hybrid_layer_allocation.py
+++ b/megatron/core/ssm/mamba_hybrid_layer_allocation.py
@@ -1,20 +1,30 @@
 # Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
 
 import logging
+from typing import Dict, List, Tuple
 
 if __name__ != "__main__":
     from megatron.core.utils import log_single_rank
 else:
     from typing import Any
 
+    import torch
+
     def log_single_rank(logger: logging.Logger, *args: Any, rank: int = 0, **kwargs: Any):
-        print(*args[1:], **kwargs)
+        """Logs a message to the given rank."""
+        if torch.distributed.is_initialized():
+            if torch.distributed.get_rank() == rank:
+                logger.log(*args, **kwargs)
+        else:
+            logger.log(*args, **kwargs)
 
 
 logger = logging.getLogger(__name__)
 
 
 class Symbols:
+    """Symbols for different layer types."""
+
     MAMBA = "M"
     ATTENTION = "*"
     MLP = "-"
@@ -87,6 +97,7 @@ def allocate_layers(
     target_mlp_ratio: float,
     override_pattern: str = None,
 ) -> list:
+    """Allocates layers according to the requested distribution of layer types."""
     assert total_layers_count > 0
     assert target_attention_ratio >= 0.0 and target_attention_ratio <= 1.0
     assert target_mlp_ratio >= 0.0 and target_mlp_ratio <= 1.0
@@ -156,6 +167,22 @@ def allocate_layers(
     return layer_type_list
 
 
+def get_layer_maps_from_layer_type_list(
+    layer_type_list: List[str],
+) -> Tuple[Dict[int, int], Dict[int, int], Dict[int, int]]:
+    """
+    Returns maps from global layer index to the corresponding layer index
+    for each layer type in [Attention, Mamba, MLP] given a layer type list.
+    """
+    layer_types = [Symbols.ATTENTION, Symbols.MAMBA, Symbols.MLP]
+    layer_maps = {layer_type: {} for layer_type in layer_types}
+    for global_layer_idx, layer_type in enumerate(layer_type_list):
+        layer_map = layer_maps[layer_type]
+        local_layer_idx = len(layer_map)
+        layer_map[global_layer_idx] = local_layer_idx
+    return [layer_maps[layer_type] for layer_type in layer_types]
+
+
 if __name__ == "__main__":
     test_cases = [
         # (10, 0.2, 0.0),
@@ -187,5 +214,5 @@ def allocate_layers(
         (9, 0.0, 0.0, "MMMMMMMMM"),
     ]
     for t in test_cases:
-        print("")
+        logging.info("")
         allocate_layers(*t)
diff --git a/megatron/core/ssm/mamba_layer.py b/megatron/core/ssm/mamba_layer.py
index d83d518331c..69d5ef21c81 100644
--- a/megatron/core/ssm/mamba_layer.py
+++ b/megatron/core/ssm/mamba_layer.py
@@ -6,7 +6,7 @@
 # LICENSE file in the root directory of this source tree.
 
 from dataclasses import dataclass, field
-from typing import Dict, Optional, Union
+from typing import Dict, Optional, Tuple, Union
 
 import torch
 from torch import Tensor
@@ -82,6 +82,10 @@ def __init__(
         self.mamba_bda = build_module(submodules.mamba_bda)
         self.bias_dropout_add_exec_handler = torch.enable_grad
 
+    def mamba_state_shapes_per_request(self) -> Tuple[Tuple[int], Tuple[int]]:
+        """Returns the Mamba conv and ssm states shapes per request."""
+        return self.mixer.mamba_state_shapes_per_request()
+
     def forward(
         self,
         hidden_states: Tensor,
@@ -127,10 +131,6 @@ def forward(
 
         return hidden_states
 
-    def allocate_inference_cache(self, batch_size, max_seqlen, dtype=None):
-        """Allocate the inference cache."""
-        return self.mixer.allocate_inference_cache(batch_size, max_seqlen, dtype=dtype)
-
     def sharded_state_dict(
         self, prefix: str = '', sharded_offsets: tuple = (), metadata: Optional[dict] = None
     ) -> ShardedStateDict:
diff --git a/megatron/core/ssm/mamba_mixer.py b/megatron/core/ssm/mamba_mixer.py
index 00067783ffa..b792f8a2f1f 100644
--- a/megatron/core/ssm/mamba_mixer.py
+++ b/megatron/core/ssm/mamba_mixer.py
@@ -29,7 +29,12 @@
     make_sharded_tensors_for_checkpoint,
     sharded_state_dict_default,
 )
-from megatron.core.utils import deprecate_inference_params, log_single_rank
+from megatron.core.utils import (
+    check_mamba_sequence_packing_support,
+    deprecate_inference_params,
+    log_single_rank,
+    maybe_cat,
+)
 
 from .mamba_context_parallel import MambaContextParallel
 
@@ -40,6 +45,7 @@
 
 try:
     from causal_conv1d import causal_conv1d_fn, causal_conv1d_update
+    from causal_conv1d.causal_conv1d_varlen import causal_conv1d_varlen_states
 except ImportError:
     causal_conv1d_fn = None
     causal_conv1d_update = None
@@ -65,7 +71,6 @@
 except ImportError:
     HAVE_EINOPS = False
 
-
 logger = logging.getLogger(__name__)
 
 
@@ -410,11 +415,11 @@ def forward(
 
         inference_context = deprecate_inference_params(inference_context, inference_params)
 
-        _, batch, dim = hidden_states.shape
+        in_inference_mode = inference_context is not None and not self.training
 
+        _, batch, dim = hidden_states.shape
         conv_state, ssm_state = None, None
 
-        in_inference_mode = inference_context is not None and not self.training
         if in_inference_mode:
             if inference_context.is_dynamic_batching():
                 return self.dynamic_inference(hidden_states, inference_context)
@@ -442,27 +447,151 @@ def forward(
 
         return out, out_bias
 
-    def dynamic_inference(
-        self, hidden_states, inference_context: DynamicInferenceContext
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Runs inference computation for dynamic batching."""
-        raise NotImplementedError(f"Dynamic inference is not supported.")
+    def dynamic_inference(self, hidden_states: torch.Tensor, context: DynamicInferenceContext):
+        """
+        Executes dynamic inference by separating decode and prefill requests and
+        running them independently. Also runs the chunked prefill request independently
+        if it exists.
+        """
+        sequence_packing_available, reason_for_no_sequence_packing = (
+            check_mamba_sequence_packing_support()
+        )
+        assert sequence_packing_available, reason_for_no_sequence_packing
+
+        conv_state, ssm_state = context.mamba_states_cache(self.layer_number)
+
+        # Fast path: decode-only
+        if context.is_decode_only():
+            batch_indices = context.mamba_metadata.request_to_mamba_state_idx_cudagraph_only[
+                : context.padded_active_token_count
+            ]
+            out, out_bias = self.decode(
+                hidden_states, conv_state, ssm_state, batch_indices=batch_indices
+            )
+            return out, out_bias
+
+        # Compute input projection before splitting into prefill and decode
+        # to ensure sequence parallel all-gather.
+        zxBCdt, _ = self.in_proj(hidden_states)
+
+        # Compute split between decode and prefill.
+        seq_idx, cu_seqlens, return_varlen_states = self._get_varlen_generation_state(context)
+        active_query_lengths = context.request_query_lengths[
+            context.paused_request_count : context.total_request_count
+        ]
+        batch_indices = context.mamba_metadata.request_to_mamba_state_idx
+
+        # First request with query len > 1 is prefill-start.
+        first_prefill_token_idx = torch.nonzero(active_query_lengths > 1)[0].int()
+
+        # Process decode requests if there are any.
+        if first_prefill_token_idx > 0:
+            zxBCdt_decode = zxBCdt[:first_prefill_token_idx]
+            batch_indices_decode = batch_indices[:first_prefill_token_idx]
+            y_decode = self.ssm_decode(
+                zxBCdt_decode.transpose(0, 1), conv_state, ssm_state, batch_indices_decode
+            ).transpose(0, 1)
+        else:
+            y_decode = None
+
+        active_token_count = context.active_token_count
+        active_request_count = context.get_active_request_count()
+        padded_active_token_count = context.padded_active_token_count
+
+        # Process the chunked prefill request if it exists.
+        if context.chunked_prefill_request_id != -1:
+            chunked_prefill_request_token_count = active_query_lengths[-1]
+            zxBCdt_chunked_prefill = zxBCdt[
+                active_token_count - chunked_prefill_request_token_count : active_token_count
+            ]
+            batch_index_chunked_prefill = batch_indices[context.chunked_prefill_request_id]
+
+            y_prefill_chunked = self.ssm_prefill(
+                zxBCdt_chunked_prefill,
+                conv_state=conv_state[batch_index_chunked_prefill].unsqueeze(0),
+                ssm_state=ssm_state[batch_index_chunked_prefill].unsqueeze(0),
+                is_chunked_prefill=True,
+            )
 
-    def decode(self, hidden_states, conv_state, ssm_state) -> Tuple[torch.Tensor, torch.Tensor]:
+            # Remove the chunked prefill request from the request / token counts so
+            # the subsequent prefill computation ignores the chunked prefill request.
+            active_token_count -= chunked_prefill_request_token_count
+            active_request_count -= 1
+        else:
+            y_prefill_chunked = None
+
+        # Process non-chunked prefill requests if there are any.
+        if (remaining_prefill_tokens := active_token_count - first_prefill_token_idx) > 0:
+            zxBCdt_prefill = zxBCdt[first_prefill_token_idx:active_token_count]
+            cu_seqlens_prefill = F.pad(
+                cu_seqlens[first_prefill_token_idx + 1 : active_request_count + 1]
+                - first_prefill_token_idx,
+                (1, 0),
+            )
+            seq_idx_prefill = (
+                seq_idx[:, first_prefill_token_idx:active_token_count] - first_prefill_token_idx
+            )
+            batch_indices_prefill = batch_indices[first_prefill_token_idx:active_request_count]
+
+            y_prefill = self.ssm_prefill(
+                zxBCdt_prefill,
+                conv_state=conv_state,
+                ssm_state=ssm_state,
+                seq_idx=seq_idx_prefill,
+                cu_seqlens=cu_seqlens_prefill,
+                return_varlen_states=return_varlen_states,
+                batch_indices=batch_indices_prefill,
+            )
+        else:
+            y_prefill = None
+
+        # Assemble the final output by concatenating the decode output,
+        # non-chunked prefill output, and chunked prefill output together.
+        y_prefill = maybe_cat(y_prefill, y_prefill_chunked, required=True)
+        y = maybe_cat(y_decode, y_prefill, required=True)
+
+        # Add padding tokens back if necessary. Note that we use the context active token count
+        # in case we modified the local count for chunked prefill above.
+        if (num_padding_tokens := padded_active_token_count - context.active_token_count) > 0:
+            y = torch.cat((y, y.new_zeros(num_padding_tokens, *y.shape[1:])), dim=0)
+
+        # The output projection will perform the sequence parallel reduce-scatter if necessary.
+        out, out_bias = self.out_proj(y)
+
+        return out, out_bias
+
+    def decode(
+        self, hidden_states, conv_state, ssm_state, batch_indices: Optional[torch.Tensor] = None
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
         """Performs inference step for decoding."""
         # assert self.ngroups_local_tp == 1, "Only support ngroups=1 for inference for now"
-        dtype = hidden_states.dtype
-        assert hidden_states.shape[0] == 1, "Only support decoding with 1 token at a time for now"
+        is_dynamic_batching = batch_indices is not None
 
-        #  b d_model --> b p(2d)
+        if not is_dynamic_batching:
+            assert (
+                hidden_states.shape[0] == 1
+            ), "Only support decoding with 1 token at a time for now"
+
+        # (1, b, d_model) -> (1, b, proj_dim)
         zxBCdt, _ = self.in_proj(hidden_states)
 
+        # Make batch size leading dimension since that is 1
+        if is_dynamic_batching:
+            zxBCdt = zxBCdt.transpose(0, 1)
+
         assert self.cp.cp_size == 1, "Context parallel not supported for Mamba inferenece decode"
 
-        y = self.ssm_decode(zxBCdt, conv_state=conv_state, ssm_state=ssm_state)
+        y = self.ssm_decode(
+            zxBCdt, conv_state=conv_state, ssm_state=ssm_state, batch_indices=batch_indices
+        )
+
+        # Restore sequence length as first dimension
+        if is_dynamic_batching:
+            y = y.transpose(0, 1)
 
-        # l b pd --> l b d
+        # y has shape (1, b, d_inner), which is what out_proj expects
         out, out_bias = self.out_proj(y)
+
         return out, out_bias
 
     def ssm_training(self, zxBCdt: torch.Tensor) -> torch.Tensor:
@@ -515,8 +644,34 @@ def ssm_prefill(
         zxBCdt: torch.Tensor,
         conv_state: Optional[torch.Tensor],
         ssm_state: Optional[torch.Tensor],
+        seq_idx: Optional[torch.Tensor] = None,
+        cu_seqlens: Optional[torch.Tensor] = None,
+        return_varlen_states: bool = False,
+        batch_indices: Optional[torch.Tensor] = None,
+        is_chunked_prefill: bool = False,
     ) -> torch.Tensor:
-        """Performs SSM computation for inference prefill step."""
+        """
+        Performs SSM computation for inference prefill step.
+
+        Args:
+            zxBCdt: The input tensor of shape (l, b, d), which is a concatenation of
+                z, x, B, C, and dt projections.
+            conv_state: The convolution state tensor for inference.
+            ssm_state: The selective scan state tensor for inference.
+            seq_idx: A map from token index to request index for variable-length sequences.
+            cu_seqlens: Cumulative sequence lengths for variable-length sequences.
+            return_varlen_states: Whether to return variable-length states from the SSM kernel.
+            batch_indices: A map from batch id to position in the Mamba state tensors for
+                dynamic inference.
+            is_chunked_prefill: Whether the request is a chunked prefill request.
+
+        Returns:
+            The output tensor of shape (l, b, d).
+        """
+        is_dynamic_batching = seq_idx is not None
+        assert not (
+            is_dynamic_batching and is_chunked_prefill
+        ), "Cannot use chunked prefill with dynamic batching"
 
         # transpose: l b pd --> b l pd
         zxBCdt = rearrange(zxBCdt, "l b d -> b l d").contiguous()
@@ -534,29 +689,53 @@ def ssm_prefill(
             dim=-1,
         )
 
-        # transpose: b l pd --> b pd l
-        xBC = rearrange(xBC, "b l d -> b d l").contiguous()
-
         # Compute short convolution
-        if conv_state is not None:
-            # If we just take x[:, :, -self.d_conv :], it will error if seqlen < self.d_conv
-            # Instead F.pad will pad with zeros if seqlen < self.d_conv, and truncate otherwise.
-            conv_state.copy_(F.pad(xBC, (self.d_conv - xBC.shape[-1], 0)))  # Update state (B D W)
+        if conv_state is not None and is_dynamic_batching:
+            # xBC should have shape (b l d) for causal_conv1d_varlen_states
+            assert batch_indices is not None
+            conv_state[batch_indices] = causal_conv1d_varlen_states(
+                xBC.squeeze(0), cu_seqlens, state_len=conv_state.shape[-1]
+            )
+
+            # Maintain channels-last memory layout to use seq_idx for causal_conv1d_fn
+            # See https://github.com/Dao-AILab/causal-conv1d/blob/69e6dadc28b169a4c49cb86b586f64ee90242c70/csrc/causal_conv1d.cpp#L174 # pylint: disable=line-too-long
+            xBC = xBC.transpose(1, 2)
+        elif is_chunked_prefill:
+            # Maintain channels-last memory layout to use initial_states for causal_conv1d_fn
+            # See https://github.com/Dao-AILab/causal-conv1d/blob/69e6dadc28b169a4c49cb86b586f64ee90242c70/csrc/causal_conv1d.cpp#L200 # pylint: disable=line-too-long
+            xBC = xBC.transpose(1, 2)
+        else:
+            # transpose: b l pd --> b pd l
+            xBC = rearrange(xBC, "b l d -> b d l").contiguous()
+            if conv_state is not None:
+                # If we just take x[:, :, -self.d_conv :], it will error if seqlen < self.d_conv
+                # Instead F.pad will pad with zeros if seqlen < self.d_conv, and truncate otherwise.
+                conv_state.copy_(
+                    F.pad(xBC, (self.d_conv - xBC.shape[-1], 0))
+                )  # Update state (B D W)
 
         seqlen = xBC.size(2)
         if causal_conv1d_fn is None:
             xBC = self.act(self.cp.conv1d(xBC)[..., :seqlen])
         else:
             assert self.activation in ["silu", "swish"]
+            if is_chunked_prefill:
+                initial_conv_state = (
+                    conv_state[:, :, 1:].permute(0, 2, 1).contiguous().transpose(1, 2)
+                )
+            else:
+                initial_conv_state = None
             xBC = causal_conv1d_fn(
                 x=xBC,
                 weight=rearrange(self.cp.get_conv1d_weight(), "d 1 w -> d w"),
                 bias=self.cp.get_conv1d_bias(),
                 activation=self.activation,
+                seq_idx=seq_idx,
+                initial_states=initial_conv_state,
             )
 
         # transpose b pd l --> b l pd
-        xBC = rearrange(xBC, "b d l ->  b l d").contiguous()
+        xBC = rearrange(xBC, "b d l -> b l d").contiguous()
 
         x, B, C = torch.split(
             xBC,
@@ -583,6 +762,14 @@ def ssm_prefill(
             self.cp.cp_size == 1 or self.rmsnorm
         ), "Context parallel not supported for use_mem_eff_path==False and rmsnorm==False"
 
+        if is_chunked_prefill:
+            initial_ssm_state = ssm_state
+        else:
+            initial_ssm_state = None
+
+        # Note that both `seq_idx` and `cu_seqlens` must be passed in
+        # for variable length generation.
+        # See https://github.com/state-spaces/mamba/blob/e0761ece1db07e0949dd88b4f4cd440420a19fd9/tests/test_generation.py#L97 # pylint: disable=line-too-long
         y = mamba_chunk_scan_combined(
             x,
             dt,
@@ -599,11 +786,25 @@ def ssm_prefill(
             dt_bias=self.cp.get_dt_bias().float(),
             dt_softplus=True,
             return_final_states=ssm_state is not None,
+            seq_idx=seq_idx,
+            cu_seqlens=cu_seqlens,
+            return_varlen_states=return_varlen_states,
+            initial_states=initial_ssm_state,
         )
 
         if ssm_state is not None:
-            y, last_state = y
-            ssm_state.copy_(last_state)
+            if return_varlen_states:
+                assert batch_indices is not None
+
+                y, _, varlen_states = y
+
+                # This has to be varlen_states, NOT last_state
+                # See reference implementation:
+                # https://github.com/state-spaces/mamba/blob/e0761ece1db07e0949dd88b4f4cd440420a19fd9/mamba_ssm/modules/mamba2.py#L267 # pylint: disable=line-too-long
+                ssm_state[batch_indices] = varlen_states
+            else:
+                y, last_state = y
+                ssm_state.copy_(last_state)
 
         y = rearrange(y, "b l h p -> l b (h p)").contiguous()
         y = self.cp.post_conv_ssm(y)
@@ -616,14 +817,31 @@ def ssm_prefill(
         return y
 
     def ssm_decode(
-        self, zxBCdt: torch.Tensor, conv_state: torch.Tensor, ssm_state: torch.Tensor
+        self,
+        zxBCdt: torch.Tensor,
+        conv_state: torch.Tensor,
+        ssm_state: torch.Tensor,
+        batch_indices: Optional[torch.Tensor] = None,
     ) -> torch.Tensor:
-        """Performs SSM computation for inference decode step."""
-
+        """
+        Performs SSM computation for inference decode step.
+
+        Args:
+            zxBCdt: The input tensor of shape (l, b, d), which is a concatenation of
+                z, x, B, C, and dt projections. For decoding, l must be 1.
+            conv_state: The convolution state tensor for inference.
+            ssm_state: The selective scan state tensor for inference.
+            batch_indices: A map from batch id to position in the Mamba state tensors for
+                dynamic inference.
+
+        Returns:
+            The output tensor of shape (l, b, d).
+        """
+        seq_len, batch_size, _ = zxBCdt.shape
         dtype = zxBCdt.dtype
-        assert zxBCdt.shape[0] == 1, "Only support decoding with 1 token at a time for now"
+        assert seq_len == 1, "Only support decoding with 1 token at a time for now"
 
-        # l b d --> b d
+        # Remove sequence dimension
         zxBCdt = zxBCdt.squeeze(0)
 
         z, xBC, dt = torch.split(
@@ -645,7 +863,7 @@ def ssm_decode(
             )  # (B D)
             if self.conv1d.bias is not None:
                 xBC = xBC + self.conv1d.bias
-            xBC = self.act(xBC).to(dtype=dtype)
+            xBC = self.act(xBC).to(dtype=xBC.dtype)
         else:
             xBC = causal_conv1d_update(
                 xBC,
@@ -653,6 +871,7 @@ def ssm_decode(
                 rearrange(self.conv1d.weight, "d 1 w -> d w"),
                 self.conv1d.bias,
                 self.activation,
+                conv_state_indices=batch_indices,
             )
 
         x, B, C = torch.split(
@@ -733,35 +952,61 @@ def ssm_decode(
                 z=z if not self.rmsnorm else None,
                 dt_bias=dt_bias,
                 dt_softplus=True,
+                state_batch_indices=batch_indices,
             )
             y = rearrange(y, "b h p -> b (h p)")
 
         if self.rmsnorm:
             y = self.norm(y, z)
 
-        # b (h p) -> l b (h p)
+        # Restore sequence dimension
         return y.unsqueeze(0)
 
-    def allocate_inference_cache(self, batch_size, max_seqlen, dtype=None):
-        """
-        allocate inference cache
+    def _get_varlen_generation_state(
+        self, inference_context: Optional[BaseInferenceContext] = None
+    ) -> Tuple[torch.Tensor, torch.Tensor, bool]:
+        """Constructs the variable length generation state for non-decode dynamic inference.
+
+        The returned state includes the following:
+            `seq_idx` (Tensor): A map from token idx to request idx.
+            `cu_seqlens` (Tensor): The cumulative sequence lengths.
+            `return_varlen_states` (bool): Whether to return a varlen states tensor for
+                `mamba_chunk_scan_combined`.
+
+        Returns empty state for training, static inference, or decode-only dynamic inference.
+
+        Args:
+            inference_context (InferenceContext): The inference context.
+
+        Returns:
+            A tuple of (`seq_idx`, `cu_seqlens`, `return_varlen_states`)
         """
-        device = self.out_proj.weight.device
-        conv_dtype = self.conv1d.weight.dtype if dtype is None else dtype
-        conv_state = torch.zeros(
-            batch_size, self.conv1d.weight.shape[0], self.d_conv, device=device, dtype=conv_dtype
-        )
-        ssm_dtype = self.in_proj.weight.dtype if dtype is None else dtype
-        # ssm_dtype = torch.float32
-        ssm_state = torch.zeros(
-            batch_size,
-            self.nheads_local_tp,
-            self.headdim,
-            self.d_state,
-            device=device,
-            dtype=ssm_dtype,
+
+        if (
+            inference_context is None
+            or not inference_context.is_dynamic_batching()
+            or inference_context.is_decode_only()
+        ):
+            return None, None, False
+
+        active_token_count = inference_context.active_token_count
+        seq_idx = (
+            inference_context.token_to_request_idx[:active_token_count]
+            .clone()
+            .to(torch.int32)
+            .unsqueeze(0)
         )
-        return conv_state, ssm_state
+
+        # Get the list of cumulative sequence lengths for active requests.
+        cu_seqlens, _ = inference_context.cu_query_lengths()
+
+        return seq_idx, cu_seqlens, True
+
+    def mamba_state_shapes_per_request(self) -> Tuple[Tuple[int], Tuple[int]]:
+        """Returns the Mamba conv and ssm states shapes per request."""
+        conv_states_shape = (self.conv1d.weight.shape[0], self.d_conv)
+        ssm_states_shape = (self.nheads_local_tp, self.headdim, self.d_state)
+        return (conv_states_shape, ssm_states_shape)
 
     def _get_states_from_cache(self, inference_context, batch_size, *, inference_params=None):
         """Initializes or retrieves the SSM state tensors from the cache.
@@ -774,23 +1019,23 @@ def _get_states_from_cache(self, inference_context, batch_size, *, inference_par
         inference_context = deprecate_inference_params(inference_context, inference_params)
 
         assert inference_context is not None
+        assert inference_context.is_static_batching()
         assert self.layer_number is not None
+
         if (
             self.layer_number not in inference_context.key_value_memory_dict
             or batch_size != self.cached_batch_size
         ):
+            conv_state_shape, ssm_state_shape = self.mamba_state_shapes_per_request()
             conv_state = torch.zeros(
                 batch_size,
-                self.conv1d.weight.shape[0],
-                self.d_conv,
+                *conv_state_shape,
                 device=self.conv1d.weight.device,
                 dtype=self.conv1d.weight.dtype,
             )
             ssm_state = torch.zeros(
                 batch_size,
-                self.nheads_local_tp,
-                self.headdim,
-                self.d_state,
+                *ssm_state_shape,
                 device=self.in_proj.weight.device,
                 dtype=self.in_proj.weight.dtype,
             )
@@ -798,7 +1043,6 @@ def _get_states_from_cache(self, inference_context, batch_size, *, inference_par
             self.cached_batch_size = batch_size
         else:
             conv_state, ssm_state = inference_context.key_value_memory_dict[self.layer_number]
-            # TODO: Remove reference to `inference_context.sequence_len_offset` for dynamic batching
             if inference_context.sequence_len_offset == 0:
                 conv_state.zero_()
                 ssm_state.zero_()
diff --git a/megatron/core/transformer/transformer_config.py b/megatron/core/transformer/transformer_config.py
index aab137b6430..895aef978e2 100644
--- a/megatron/core/transformer/transformer_config.py
+++ b/megatron/core/transformer/transformer_config.py
@@ -7,7 +7,7 @@
 import torch
 import torch.nn.functional as F
 
-from megatron.core.enums import Fp8Recipe
+from megatron.core.enums import Fp4Recipe, Fp8Recipe
 from megatron.core.quantization.quant_config import RecipeConfig
 from megatron.core.transformer.enums import AttnBackend
 from megatron.core.transformer.pipeline_parallel_layer_layout import PipelineParallelLayerLayout
@@ -382,10 +382,10 @@ class TransformerConfig(ModelParallelConfig):
     activation and weight tensors and e5m2 for all FP8 output activation gradient tensors."""
 
     fp8_recipe: Optional[str] = "delayed"
-    """If set, enables the use of FP8 precision through Transformer Engine. There are 3 predefined
+    """If set, enables the use of FP8 precision through Transformer Engine. There are 5 predefined
     choices (1) 'tensorwise' uses per tensor current scaling recipe, (2) 'delayed'
     uses delayed scaling recipe, 3) 'mxfp8' for Blackwell architecture only,
-    4) 'blockwise' for blockwise scaling recipe."""
+    4) 'blockwise' for blockwise scaling recipe, 5) 'custom' for custom quantization recipe."""
 
     fp8_param: bool = False
     """If set, keep the parameters in fp8 precision to save memory. This option must be used
@@ -394,6 +394,10 @@ class TransformerConfig(ModelParallelConfig):
     primarily the weights of GEMMs. The specific parameters that will be converted to fp8 are
     determined by TE."""
 
+    fp8_quantizer_factory: Optional[str] = None
+    """Python import path to a callable quantizer factory, e.g., package.module.quantizer_factory.
+    Required when fp8_recipe is custom."""
+
     fp8_margin: int = 0
     """Margin for the scaling factor computation."""
 
@@ -455,6 +459,10 @@ class TransformerConfig(ModelParallelConfig):
     together with fp4 mode (i.e., TransformerConfig.fp4 is not None). Note that not all parameters
     will be converted to fp4; for example, biases will remain unchanged."""
 
+    fp4_quantizer_factory: Optional[str] = None
+    """Python import path to a callable quantizer factory, e.g., package.module.quantizer_factory.
+    Required when fp4_recipe is custom."""
+
     ####################
     # MoE related
     ####################
@@ -909,6 +917,14 @@ def __post_init__(self):
                         f"({max_bf16_layers_per_pipeline_stage})."
                     )
 
+            if self.fp8_recipe == Fp8Recipe.custom:
+                if not self.fp8_quantizer_factory:
+                    raise ValueError(
+                        "fp8_quantizer_factory must be provided when fp8_recipe is 'custom'. "
+                        "Specify a Python import path (e.g., package.module.quantizer_factory) "
+                        "via --fp8-quantizer-factory."
+                    )
+
         if self.fp8_param and not self.fp8:
             raise ValueError("fp8_param must be used together with fp8 mode.")
 
@@ -919,6 +935,14 @@ def __post_init__(self):
         if self.fp4 and self.fp8:
             raise ValueError("fp4 and fp8 cannot be used simultaneously. Please choose one.")
 
+        if self.fp4 and self.fp4_recipe == Fp4Recipe.custom:
+            if not self.fp4_quantizer_factory:
+                raise ValueError(
+                    "fp4_quantizer_factory must be provided when fp4_recipe is 'custom'. "
+                    "Specify a Python import path (e.g., package.module.quantizer_factory) "
+                    "via --fp4-quantizer-factory."
+                )
+
         if self.apply_query_key_layer_scaling:
             self.attention_softmax_in_fp32 = True
 
diff --git a/megatron/core/utils.py b/megatron/core/utils.py
index 9947b8da683..8d6269e23a4 100644
--- a/megatron/core/utils.py
+++ b/megatron/core/utils.py
@@ -17,14 +17,16 @@
 import time
 import traceback
 import warnings
+from collections import defaultdict
 from contextlib import contextmanager, nullcontext
 from dataclasses import dataclass
 from datetime import datetime
 from functools import lru_cache, reduce, wraps
 from importlib.metadata import version
 from types import TracebackType
-from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Union
+from typing import Any, Callable, Coroutine, Dict, List, Optional, Tuple, Type, Union
 
+import numpy
 import torch
 
 from megatron.core import config
@@ -65,6 +67,8 @@
     _torch_version = PkgVersion("0.0.0") if HAVE_PACKAGING else "0.0.0"
 _te_version = None
 _fa_version = None
+_mamba_ssm_version = None
+_causal_conv1d_version = None
 
 
 @contextmanager
@@ -388,6 +392,79 @@ def is_fa_min_version(version, check_equality=True):
     return get_fa_version() > PkgVersion(version)
 
 
+def get_mamba_version():
+    """Get mamba version from __version__; if not available use pip's. Use caching."""
+    if not HAVE_PACKAGING:
+        raise ImportError(
+            "packaging is not installed. Please install it with `pip install packaging`."
+        )
+
+    def get_mamba_version_str():
+        import mamba_ssm
+
+        if hasattr(mamba_ssm, "__version__"):
+            return str(mamba_ssm.__version__)
+        else:
+            return version("mamba_ssm")
+
+    global _mamba_ssm_version
+    if _mamba_ssm_version is None:
+        _mamba_ssm_version = PkgVersion(get_mamba_version_str())
+    return _mamba_ssm_version
+
+
+def is_mamba_min_version(version, check_equality=True):
+    """Check if minimum version of `mamba_ssm` is installed."""
+    if not HAVE_PACKAGING:
+        raise ImportError(
+            "packaging is not installed. Please install it with `pip install packaging`."
+        )
+    if check_equality:
+        return get_mamba_version() >= PkgVersion(version)
+    return get_mamba_version() > PkgVersion(version)
+
+
+def get_causal_conv1d_version():
+    """Get causal_conv1d version from __version__; if not available use pip's. Use caching."""
+    if not HAVE_PACKAGING:
+        raise ImportError(
+            "packaging is not installed. Please install it with `pip install packaging`."
+        )
+
+    def get_causal_conv1d_version_str():
+        import causal_conv1d
+
+        if hasattr(causal_conv1d, "__version__"):
+            return str(causal_conv1d.__version__)
+        else:
+            return version("causal_conv1d")
+
+    global _causal_conv1d_version
+    if _causal_conv1d_version is None:
+        _causal_conv1d_version = PkgVersion(get_causal_conv1d_version_str())
+    return _causal_conv1d_version
+
+
+def is_causal_conv1d_min_version(version, check_equality=True):
+    """Check if minimum version of `causal_conv1d` is installed."""
+    if not HAVE_PACKAGING:
+        raise ImportError(
+            "packaging is not installed. Please install it with `pip install packaging`."
+        )
+    if check_equality:
+        return get_causal_conv1d_version() >= PkgVersion(version)
+    return get_causal_conv1d_version() > PkgVersion(version)
+
+
+def check_mamba_sequence_packing_support() -> Tuple[bool, Optional[str]]:
+    """Checks whether `causal_conv1d` and `mamba_ssm` support sequence packing."""
+    if not is_causal_conv1d_min_version("1.5.3.post1"):
+        return False, "causal_conv1d >= 1.5.3.post1 is required"
+    elif not is_mamba_min_version("2.2.6.post3"):
+        return False, "mamba_ssm >= 2.2.6.post3 is required"
+    return True, None
+
+
 def ensure_divisibility(numerator, denominator):
     """Ensure that numerator is divisible by the denominator."""
     assert numerator % denominator == 0, "{} is not divisible by {}".format(numerator, denominator)
@@ -2053,6 +2130,16 @@ def unwrap_model(model, module_instances=None):
     return unwrapped_model
 
 
+def maybe_cat(a, b, dim=0, *, required=False):
+    """Concatenates `a` and `b` along `dim` if `a` and `b` exist."""
+    xs = [t for t in (a, b) if t is not None]
+    if not xs:
+        if required:
+            raise ValueError("both tensors are None")
+        return None
+    return xs[0] if len(xs) == 1 else torch.cat(xs, dim=dim)
+
+
 def get_asyncio_loop(loop: asyncio.AbstractEventLoop | None = None) -> asyncio.AbstractEventLoop:
     """Creates an asyncio loop if necessary and then returns the current asyncio loop."""
     if loop is None:
@@ -2062,3 +2149,58 @@ def get_asyncio_loop(loop: asyncio.AbstractEventLoop | None = None) -> asyncio.A
             loop = asyncio.new_event_loop()
             asyncio.set_event_loop(loop)
     return loop
+
+
+_ASYNC_TASK_STATS = defaultdict(lambda: [0, 0.0])  # cnt, total_time
+
+
+def trace_async_exceptions(
+    func: Optional[Callable[..., Coroutine]], *, verbose: bool = False
+) -> Callable[..., Coroutine]:
+    """Decorator to be applied to every coroutine that runs in a separate task.
+
+    This is needed because asyncio tasks do not propagate exceptions.
+    Coroutines running inside separate tasks will fail silently if not decorated.
+
+    Passing in `verbose=True` will print additional lifetime logging information about the task.
+    Such functionality is relied on by some users, and can be enabled as shown below:
+    ```
+        @trace_async_exceptions(verbose=True)
+        async def my_coroutine(...):
+            ...
+    ```
+    """
+
+    def _decorate(fn):
+        if not asyncio.iscoroutinefunction(fn):
+            raise TypeError("trace_async_exceptions can only be used with async functions")
+
+        @functools.wraps(fn)
+        async def wrapper(*args, **kwargs):
+            if verbose:
+                start = time.perf_counter()
+            try:
+                return await fn(*args, **kwargs)
+            except Exception as e:
+                logger.error(f"Exception in async function {fn.__name__}: {e}")
+                traceback.print_exc()
+                sys.exit(1)
+            finally:
+                if verbose:
+                    elapsed = (time.perf_counter() - start) * 1000.0
+                    name = fn.__qualname__
+                    cnt, tot = _ASYNC_TASK_STATS[name]
+                    _ASYNC_TASK_STATS[name] = [cnt + 1, tot + elapsed]
+                    avg = _ASYNC_TASK_STATS[name][1] / _ASYNC_TASK_STATS[name][0]
+
+                    log10 = numpy.log10(max(cnt, 1))
+                    if numpy.isclose(log10, round(log10)):
+                        logger.info(
+                            f"{name} completed in {elapsed:.3f} ms, "
+                            f"lifetime avg: {avg:.3f} ms, "
+                            f"lifetime cnt: {cnt + 1}"
+                        )
+
+        return wrapper
+
+    return _decorate if func is None else _decorate(func)
diff --git a/megatron/post_training/checkpointing.py b/megatron/post_training/checkpointing.py
index cc26d5db2e3..aac59341e37 100644
--- a/megatron/post_training/checkpointing.py
+++ b/megatron/post_training/checkpointing.py
@@ -2,15 +2,13 @@
 
 import logging
 from pathlib import Path
-from typing import Dict, Optional, Tuple, Union
+from typing import Optional, Tuple, Union
 
 import modelopt.torch.opt as mto
-import torch
 import torch.nn as nn
 from modelopt.torch.opt.plugins import restore_sharded_modelopt_state
 
 from megatron.core import dist_checkpointing
-from megatron.core.dist_checkpointing.strategies.common import COMMON_STATE_FNAME
 from megatron.core.utils import get_torch_version, is_torch_min_version
 from megatron.training import get_args
 from megatron.training.checkpointing import _load_base_checkpoint, load_checkpoint
@@ -21,35 +19,36 @@
 NEMO_WEIGHT_DIR_NAMES = {"model_weights": "model.", "weights": "module."}
 
 
-def has_modelopt_state(checkpoint_path: str, ignore_kd_state: bool = False) -> bool:
-    """Check if modelopt_state folder exists inside the checkpoint path.
+def has_modelopt_state(checkpoint_path: str) -> bool:
+    """Check if modelopt_state folder exists inside the checkpoint.
     Args:
         checkpoint_path: Path to the checkpoint directory
-        ignore_kd_state: If True, ignore the knowledge distillation state
 
     Returns:
-        True if modelopt_state folder exists when ignore_kd_state is False,
-        True if modelopt_state folder exists when ignore_kd_state is True and has only
-        distillation state, False otherwise
+        True if modelopt_state exists, False otherwise
     """
-    load_dir, _ = get_sharded_load_dir(checkpoint_path)
-    if load_dir is None:
-        return False
-    modelopt_state_path = load_dir / "modelopt_state"
-    if not modelopt_state_path.is_dir():
-        return False
-    elif ignore_kd_state:
-        return _has_only_kd_state(modelopt_state_path)
-    else:
-        return True
-
+    args = get_args()
 
-def _has_only_kd_state(modelopt_state_path: Path) -> bool:
-    modelopt_state = torch.load(modelopt_state_path / COMMON_STATE_FNAME, weights_only=False)
-    modes_dict = modelopt_state["modelopt_state_dict"]
-    if len(modes_dict) == 1 and modes_dict[0][0] == "kd_loss":
-        return True
-    return False
+    try:
+        if args.ckpt_format == "torch":
+            # Non-sharded
+            state_dict, _, _ = _load_base_checkpoint(checkpoint_path, rank0=False)
+            if state_dict is None:
+                return False
+            if "modelopt_state" not in state_dict:
+                return False
+            return True
+        else:
+            # Sharded
+            load_dir, _ = get_sharded_load_dir(checkpoint_path)
+            if load_dir is None:
+                return False
+            if not (load_dir / "modelopt_state").is_dir():
+                return False
+            return True
+    except Exception as e:
+        print_rank_0(f"Failed to inspect checkpoint in {checkpoint_path}: {e}")
+        return False
 
 
 def get_sharded_load_dir(load_dir: str) -> Tuple[Union[Path, None], str]:
@@ -89,41 +88,41 @@ def get_sharded_load_dir(load_dir: str) -> Tuple[Union[Path, None], str]:
     return sharded_load_dir, sharded_prefix
 
 
-def load_modelopt_state(load_dir: Optional[str] = None, model: Optional[nn.Module] = None) -> Dict:
+def load_modelopt_state(model: nn.Module, load_dir: Optional[str] = None) -> None:
     """Loading modelopt_state without loading the model.
 
-    If --use-dist-ckpt, we try to load from the sharded modelopt_state. This will not load the model
-    state_dict. Otherwise, if the checkpoint is not sharded, we load the base checkpoint (that
-    contains the model state as well) and extract the modelopt_state.
+    If distributed checkpointing in use, we try to load from the sharded modelopt_state. This will not
+    load the model state_dict. Otherwise, if the checkpoint is not sharded, we load the base checkpoint
+    (which contains the model state as well) and extract the modelopt_state.
 
     Args:
+        model: the model to load the modelopt_state into
         load_dir: optionally provide a different loading path
-        model: required when loading a sharded checkpoint
     """
     args = get_args()
+    load_dir = load_dir or args.load
 
-    if load_dir is None:
-        load_dir = args.load
-
-    if args.use_dist_ckpt:
-        assert model is not None, "`model` argument required when `args.use_dist_ckpt is True`"
-        sharded_load_dir, _ = get_sharded_load_dir(load_dir)
-        if sharded_load_dir is None:
-            print_rank_0("No sharded checkpoint found. Skipping loading modelopt_state.")
-            return {}
-        restore_sharded_modelopt_state([model], sharded_load_dir)
-    else:
+    if args.ckpt_format == "torch":
+        # Non-sharded
         print_rank_0(f"Loading ModelOpt state from base checkpoint ({load_dir})")
         try:
             state_dict, _, _ = _load_base_checkpoint(args.load, rank0=False)
         except Exception:
             print_rank_0("Failed to load base checkpoint via megatron _load_base_checkpoint!")
+            return
         if state_dict is None:
             print_rank_0("No checkpoint state_dict found. Skipping loading ModelOpt state.")
-        else:
-            modelopt_state = state_dict.get("modelopt_state", None)
+            return
+        modelopt_state = state_dict.get("modelopt_state", None)
         if modelopt_state is not None:
             mto.restore_from_modelopt_state(model, modelopt_state)
+    else:
+        # Sharded
+        sharded_load_dir, _ = get_sharded_load_dir(load_dir)
+        if sharded_load_dir is None:
+            print_rank_0("No sharded checkpoint found. Skipping loading modelopt_state.")
+            return
+        restore_sharded_modelopt_state([model], sharded_load_dir)
 
 
 def load_modelopt_checkpoint(
diff --git a/megatron/rl/__init__.py b/megatron/rl/__init__.py
index 035da465519..d3ae2fefd16 100644
--- a/megatron/rl/__init__.py
+++ b/megatron/rl/__init__.py
@@ -12,7 +12,6 @@
 from pydantic import BaseModel, ConfigDict, Field
 from typing_extensions import Self, Type
 
-
 def import_class(class_path: str) -> Type:
     """Import a class from a string path.
 
@@ -76,43 +75,3 @@ class Request(BaseModel):
     """Generation Request."""
 
     generation_args: GenericGenerationArgs = GenericGenerationArgs()
-
-
-from collections import defaultdict
-
-_STATS = defaultdict(lambda: [0, 0.0])  # cnt, total_time
-
-
-def trace_async_exceptions(fn: Callable[..., Coroutine]) -> Callable[..., Coroutine]:
-    """Decorator to be applied to every coroutine that runs in a separate task.
-
-    This is needed because asyncio tasks do not propagate exceptions.
-    Coroutines running inside separate tasks will fail silently if not decorated.
-    """
-    if not asyncio.iscoroutinefunction(fn):
-        raise TypeError("trace_async_exceptions can only be used with async functions")
-
-    @functools.wraps(fn)
-    async def wrapper(*args, **kwargs):
-        start = time.perf_counter()
-        try:
-            return await fn(*args, **kwargs)
-        except Exception as e:
-            print(f"Exception in async function {fn.__name__}: {e}")
-            traceback.print_exc()
-            sys.exit(1)
-        finally:
-            elapsed = (time.perf_counter() - start) * 1000.0
-            name = fn.__qualname__
-            cnt, tot = _STATS[name]
-            _STATS[name] = [cnt + 1, tot + elapsed]
-            avg = _STATS[name][1] / _STATS[name][0]
-            import numpy as np
-
-            log10 = np.log10(max(cnt, 1))
-            if np.isclose(log10, round(log10)):
-                print(
-                    f"{name} completed in {elapsed:.3f} ms, lifetime avg: {avg:.3f} ms, lifetime cnt: {cnt + 1}"
-                )
-
-    return wrapper
diff --git a/megatron/rl/agent/api.py b/megatron/rl/agent/api.py
index 3e16f74599f..fce7c3073ee 100644
--- a/megatron/rl/agent/api.py
+++ b/megatron/rl/agent/api.py
@@ -8,7 +8,7 @@
 import numpy as np
 from pydantic import BaseModel
 
-from ..__init__ import Request, TypeLookupable, trace_async_exceptions
+from ..__init__ import Request, TypeLookupable
 from ..inference import (
     ChatInferenceInterface,
     ChatInferenceRequest,
@@ -18,6 +18,8 @@
     ReturnsRaw,
 )
 
+from megatron.core.utils import trace_async_exceptions
+
 
 class AgentBaseModel(BaseModel, extra='allow'):
     pass
@@ -192,7 +194,7 @@ async def get_grouped_rollouts(self, request: GroupedRolloutRequest):
         )
         submitted_groups = 0
 
-        @trace_async_exceptions
+        @trace_async_exceptions(verbose=True)
         async def group_task():
             nonlocal submitted_groups
             while request.num_groups == -1 or submitted_groups < request.num_groups:
diff --git a/megatron/rl/inference/megatron.py b/megatron/rl/inference/megatron.py
index 5cb91d6c952..58613b364a6 100644
--- a/megatron/rl/inference/megatron.py
+++ b/megatron/rl/inference/megatron.py
@@ -183,7 +183,7 @@ async def base_generate(self, request: InferenceRequest):
             top_p=request.generation_args.top_p or 0.0,
             termination_id=self._coordinator.engine.controller.tokenizer.eod,
             return_log_probs=True,
-            skip_prompt_log_probs_for_dynamic_inference=True,
+            skip_prompt_log_probs=True,
             add_BOS=tokenizer.bos is not None,
         )
         request_ids = [
diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py
index 507c21e6883..494c82f7873 100644
--- a/megatron/training/arguments.py
+++ b/megatron/training/arguments.py
@@ -1359,9 +1359,13 @@ def _add_transformer_engine_args(parser):
                        dest='fp8')
     # per tensor current scaling recipe selection
     group.add_argument('--fp8-recipe', default='delayed',
-                       choices=['tensorwise', 'delayed', 'mxfp8', 'blockwise'],
+                       choices=['tensorwise', 'delayed', 'mxfp8', 'blockwise', 'custom'],
                        help='Which fp8 recipe to use for FP8 tensors in the forward and backward pass',
                        dest='fp8_recipe')
+    group.add_argument('--fp8-quantizer-factory', default=None,
+                       help='Python import path to a callable quantizer factory, '
+                            'e.g., package.module.quantizer_factory.',
+                       dest='fp8_quantizer_factory')
     # delayed scaling only configs
     group.add_argument('--fp8-margin', type=int, default=0,
                        help='Scaling margin for fp8',
@@ -1398,9 +1402,13 @@ def _add_transformer_engine_args(parser):
                        help='Which nvfp4 format scheme to use for FP4 tensors in the forward and backward pass',
                        dest='fp4')
     group.add_argument('--fp4-recipe', default='nvfp4',
-                       choices=['nvfp4'],
+                       choices=['nvfp4', 'custom'],
                        help='Which fp4 recipe to use for FP4 tensors in the forward and backward pass',
                        dest='fp4_recipe')
+    group.add_argument('--fp4-quantizer-factory', default=None,
+                       help='Python import path to a callable quantizer factory, '
+                            'e.g., package.module.quantizer_factory.',
+                       dest='fp4_quantizer_factory')
     group.add_argument('--fp4-param-gather', action='store_true',
                        help='Keep the compute param in fp4 (do not use any other intermediate '
                             'dtype) and perform the param all-gather in fp4.',
diff --git a/megatron/training/tokenizer/sft_tokenizer.py b/megatron/training/tokenizer/sft_tokenizer.py
index 4a941fc180b..f525352e892 100644
--- a/megatron/training/tokenizer/sft_tokenizer.py
+++ b/megatron/training/tokenizer/sft_tokenizer.py
@@ -170,6 +170,11 @@ def pad(self):
         """Pad token ID."""
         return self._prompt_config.pad_token_id
 
+    @property
+    def bos(self):
+        """Beginning of sequence token ID."""
+        return self._tokenizer.bos_token_id
+
     @property
     def eod(self):
         """End of sentence token ID."""
diff --git a/megatron/training/training.py b/megatron/training/training.py
index 06dad540fed..967397bec10 100644
--- a/megatron/training/training.py
+++ b/megatron/training/training.py
@@ -952,6 +952,18 @@ def get_model(model_provider_func, model_type=ModelType.encoder_or_decoder, wrap
     args = get_args()
     args.model_type = model_type
 
+    if has_nvidia_modelopt:
+        from megatron.post_training.checkpointing import has_modelopt_state
+        # [ModelOpt]: Check if the checkpoint is a ModelOpt checkpoint and
+        # set a flag to use our model provider if so.
+        if args.load is not None and has_modelopt_state(args.load):
+            print_rank_0(f'ModelOpt checkpoint detected')
+            args.modelopt_enabled = True
+        elif getattr(args, "export_kd_teacher_load", None):
+            # For distillation ckpts without ModelOpt state
+            args.modelopt_enabled = True
+
+
     # Build model.
     def build_model():
         if (
@@ -1178,17 +1190,6 @@ def setup_model_and_optimizer(
     timers = get_timers()
     one_logger = get_one_logger()
 
-    if has_nvidia_modelopt:
-        from megatron.post_training.checkpointing import has_modelopt_state
-        # [ModelOpt]: Check if the checkpoint is a ModelOpt checkpoint and
-        # set a flag to use our model provider if so.
-        if args.load is not None and has_modelopt_state(args.load):
-            print_rank_0(f'ModelOpt checkpoint detected')
-            args.modelopt_enabled = True
-        elif getattr(args, "export_kd_teacher_load", None):
-            # For distillation ckpts without ModelOpt state
-            args.modelopt_enabled = True
-
     model = get_model(model_provider_func, model_type)
     unwrapped_model = unwrap_model(model)
 
diff --git a/tests/functional_tests/python_test_utils/test_inference_regular_pipeline.py b/tests/functional_tests/python_test_utils/test_inference_regular_pipeline.py
index 9a0758d3ba1..ae57db10e55 100644
--- a/tests/functional_tests/python_test_utils/test_inference_regular_pipeline.py
+++ b/tests/functional_tests/python_test_utils/test_inference_regular_pipeline.py
@@ -40,16 +40,17 @@ def test_inference_pipeline(golden_values_path: str, test_values_path: str) -> N
 
         # First warmup iteration is excluded from throughput statistics.
         throughput_sampled = median(output_current["throughput"][1:])
+        throughput_golden = median(output_groundtruth["throughput"][1:])
 
         # 10% is empirically observed to be within hardware variance.
         assert (
-            throughput_sampled >= 0.9 * output_groundtruth["throughput"]
-        ), f"Throughput is slower than expected! Expected to be within 10% of ~{output_groundtruth['throughput']} tok/s but benchmarked {output_current['throughput']} tok/s"
+            throughput_sampled >= 0.9 * throughput_golden
+        ), f"Throughput is slower than expected! Expected to be within 10% of ~{throughput_golden} tok/s but benchmarked {output_current['throughput']} tok/s"
 
         # If throughput is significantly improved (> 20%), update golden values accordingly.
         assert (
-            throughput_sampled < output_groundtruth["throughput"] * 1.2
-        ), f"Throughput has been improved from expected ~{output_groundtruth['throughput']} tok/s to {output_current['throughput']} tok/s. Please update golden values in the functional tests."
+            throughput_sampled < throughput_golden * 1.2
+        ), f"Throughput has been improved from expected ~{throughput_golden} tok/s to {output_current['throughput']} tok/s. Please update golden values in the functional tests."
 
         output_groundtruth.pop('throughput')
 
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_lts.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_lts.json
new file mode 100644
index 00000000000..3f7a0c2cac5
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_lts.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.84081,
+            "2": 10.8354,
+            "3": 10.83237,
+            "4": 10.81882,
+            "5": 10.84098,
+            "6": 10.87094,
+            "7": 10.83285,
+            "8": 10.8395,
+            "9": 10.84275,
+            "10": 10.80913,
+            "11": 10.85185,
+            "12": 10.84426,
+            "13": 10.86366,
+            "14": 10.86332,
+            "15": 10.80028,
+            "16": 10.79303,
+            "17": 10.7753,
+            "18": 10.80133,
+            "19": 10.79138,
+            "20": 10.70502,
+            "21": 10.68161,
+            "22": 10.56472,
+            "23": 10.70185,
+            "24": 10.58,
+            "25": 10.5355,
+            "26": 10.607,
+            "27": 10.59378,
+            "28": 10.56083,
+            "29": 10.57494,
+            "30": 10.35506,
+            "31": 10.12664,
+            "32": 10.46551,
+            "33": 10.45216,
+            "34": 10.22453,
+            "35": 10.27096,
+            "36": 10.22158,
+            "37": 10.33994,
+            "38": 10.18651,
+            "39": 10.39397,
+            "40": 10.07811,
+            "41": 10.13811,
+            "42": 10.20175,
+            "43": 9.83808,
+            "44": 9.94297,
+            "45": 9.82336,
+            "46": 9.82165,
+            "47": 10.13421,
+            "48": 9.84058,
+            "49": 9.52119,
+            "50": 9.90123
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1539.0,
+            "2": 1717.0,
+            "3": 1773.0,
+            "4": 1809.0,
+            "5": 1932.0,
+            "6": 1836.0,
+            "7": 1808.0,
+            "8": 1638.0,
+            "9": 1924.0,
+            "10": 1388.0,
+            "11": 1978.0,
+            "12": 1875.0,
+            "13": 1894.0,
+            "14": 1832.0,
+            "15": 1945.0,
+            "16": 1966.0,
+            "17": 1779.0,
+            "18": 1731.0,
+            "19": 1812.0,
+            "20": 1744.0,
+            "21": 1910.0,
+            "22": 1717.0,
+            "23": 2079.0,
+            "24": 1636.0,
+            "25": 1644.0,
+            "26": 1812.0,
+            "27": 1939.0,
+            "28": 1904.0,
+            "29": 2001.0,
+            "30": 2019.0,
+            "31": 1661.0,
+            "32": 1904.0,
+            "33": 2040.0,
+            "34": 1944.0,
+            "35": 1955.0,
+            "36": 1968.0,
+            "37": 2344.0,
+            "38": 2300.0,
+            "39": 2418.0,
+            "40": 2263.0,
+            "41": 2357.0,
+            "42": 2285.0,
+            "43": 1988.0,
+            "44": 2123.0,
+            "45": 2218.0,
+            "46": 2349.0,
+            "47": 2594.0,
+            "48": 2506.0,
+            "49": 2331.0,
+            "50": 2374.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 552128512.0,
+            "2": 552128512.0,
+            "3": 552128512.0,
+            "4": 552128512.0,
+            "5": 552128512.0,
+            "6": 552128512.0,
+            "7": 552128512.0,
+            "8": 552128512.0,
+            "9": 552128512.0,
+            "10": 552128512.0,
+            "11": 552128512.0,
+            "12": 552128512.0,
+            "13": 552128512.0,
+            "14": 552128512.0,
+            "15": 552128512.0,
+            "16": 552128512.0,
+            "17": 552128512.0,
+            "18": 552128512.0,
+            "19": 552128512.0,
+            "20": 552128512.0,
+            "21": 552128512.0,
+            "22": 552128512.0,
+            "23": 552128512.0,
+            "24": 552128512.0,
+            "25": 552128512.0,
+            "26": 552128512.0,
+            "27": 552128512.0,
+            "28": 552128512.0,
+            "29": 552128512.0,
+            "30": 552128512.0,
+            "31": 552128512.0,
+            "32": 552128512.0,
+            "33": 552128512.0,
+            "34": 552128512.0,
+            "35": 552128512.0,
+            "36": 552128512.0,
+            "37": 552128512.0,
+            "38": 552128512.0,
+            "39": 552128512.0,
+            "40": 552128512.0,
+            "41": 552128512.0,
+            "42": 552128512.0,
+            "43": 552128512.0,
+            "44": 552128512.0,
+            "45": 552128512.0,
+            "46": 552128512.0,
+            "47": 552128512.0,
+            "48": 552128512.0,
+            "49": 552128512.0,
+            "50": 552128512.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 4576453120.0,
+            "2": 4673069568.0,
+            "3": 4673069568.0,
+            "4": 4673069568.0,
+            "5": 4673069568.0,
+            "6": 4673069568.0,
+            "7": 4673069568.0,
+            "8": 4673069568.0,
+            "9": 4673069568.0,
+            "10": 4673069568.0,
+            "11": 4673069568.0,
+            "12": 4673069568.0,
+            "13": 4673069568.0,
+            "14": 4673069568.0,
+            "15": 4673069568.0,
+            "16": 4673069568.0,
+            "17": 4673069568.0,
+            "18": 4673069568.0,
+            "19": 4673069568.0,
+            "20": 4673069568.0,
+            "21": 4673069568.0,
+            "22": 4673069568.0,
+            "23": 4673069568.0,
+            "24": 4673069568.0,
+            "25": 4673069568.0,
+            "26": 4673069568.0,
+            "27": 4673069568.0,
+            "28": 4673069568.0,
+            "29": 4673069568.0,
+            "30": 4673069568.0,
+            "31": 4673069568.0,
+            "32": 4673069568.0,
+            "33": 4673069568.0,
+            "34": 4673069568.0,
+            "35": 4673069568.0,
+            "36": 4673069568.0,
+            "37": 4673069568.0,
+            "38": 4673069568.0,
+            "39": 4673069568.0,
+            "40": 4673069568.0,
+            "41": 4673069568.0,
+            "42": 4673069568.0,
+            "43": 4673069568.0,
+            "44": 4673069568.0,
+            "45": 4673069568.0,
+            "46": 4673069568.0,
+            "47": 4673069568.0,
+            "48": 4673069568.0,
+            "49": 4673069568.0,
+            "50": 4673069568.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 4.18387,
+            "2": 0.12504,
+            "3": 0.09705,
+            "4": 0.09652,
+            "5": 0.09748,
+            "6": 0.09683,
+            "7": 0.09565,
+            "8": 0.09466,
+            "9": 0.09428,
+            "10": 0.09486,
+            "11": 0.09436,
+            "12": 0.09386,
+            "13": 0.09434,
+            "14": 0.09599,
+            "15": 0.09464,
+            "16": 0.0943,
+            "17": 0.09447,
+            "18": 0.09424,
+            "19": 0.0942,
+            "20": 0.09425,
+            "21": 0.09401,
+            "22": 0.09476,
+            "23": 0.09408,
+            "24": 0.09462,
+            "25": 0.09414,
+            "26": 0.09442,
+            "27": 0.0939,
+            "28": 0.09352,
+            "29": 0.09364,
+            "30": 0.09376,
+            "31": 0.09494,
+            "32": 0.09358,
+            "33": 0.09378,
+            "34": 0.09361,
+            "35": 0.09442,
+            "36": 0.09437,
+            "37": 0.09367,
+            "38": 0.0934,
+            "39": 0.09328,
+            "40": 0.09295,
+            "41": 0.09331,
+            "42": 0.09302,
+            "43": 0.09373,
+            "44": 0.09287,
+            "45": 0.09264,
+            "46": 0.10047,
+            "47": 0.09374,
+            "48": 0.09248,
+            "49": 0.09248,
+            "50": 0.09148
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_lts_dgx_a100.json
index a5a8b2f5f7e..dd30f7144c7 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_lts_dgx_a100.json
@@ -2,91 +2,286 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 10.8401,
-            "5": 10.84032,
-            "10": 10.8134,
-            "15": 10.80276,
-            "20": 10.705,
-            "25": 10.53849,
-            "30": 10.35521,
-            "35": 10.27148,
-            "40": 10.08049,
-            "45": 9.8229,
-            "50": 9.90118
+            "1": 10.84081,
+            "2": 10.8354,
+            "3": 10.83237,
+            "4": 10.81882,
+            "5": 10.84098,
+            "6": 10.87094,
+            "7": 10.83285,
+            "8": 10.8395,
+            "9": 10.84275,
+            "10": 10.80913,
+            "11": 10.85185,
+            "12": 10.84426,
+            "13": 10.86366,
+            "14": 10.86332,
+            "15": 10.80028,
+            "16": 10.79303,
+            "17": 10.7753,
+            "18": 10.80133,
+            "19": 10.79138,
+            "20": 10.70502,
+            "21": 10.68161,
+            "22": 10.56472,
+            "23": 10.70185,
+            "24": 10.58,
+            "25": 10.5355,
+            "26": 10.607,
+            "27": 10.59378,
+            "28": 10.56083,
+            "29": 10.57494,
+            "30": 10.35506,
+            "31": 10.12664,
+            "32": 10.46551,
+            "33": 10.45216,
+            "34": 10.22453,
+            "35": 10.27096,
+            "36": 10.22158,
+            "37": 10.33994,
+            "38": 10.18651,
+            "39": 10.39397,
+            "40": 10.07811,
+            "41": 10.13811,
+            "42": 10.20175,
+            "43": 9.83808,
+            "44": 9.94297,
+            "45": 9.82336,
+            "46": 9.82165,
+            "47": 10.13421,
+            "48": 9.84058,
+            "49": 9.52119,
+            "50": 9.90123
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 1670.0,
-            "5": 1970.0,
-            "10": 1397.0,
-            "15": 1936.0,
-            "20": 1715.0,
-            "25": 1703.0,
-            "30": 1957.0,
-            "35": 1991.0,
-            "40": 2239.0,
-            "45": 2144.0,
-            "50": 2377.0
+            "1": 1539.0,
+            "2": 1717.0,
+            "3": 1773.0,
+            "4": 1809.0,
+            "5": 1932.0,
+            "6": 1836.0,
+            "7": 1808.0,
+            "8": 1638.0,
+            "9": 1924.0,
+            "10": 1388.0,
+            "11": 1978.0,
+            "12": 1875.0,
+            "13": 1894.0,
+            "14": 1832.0,
+            "15": 1945.0,
+            "16": 1966.0,
+            "17": 1779.0,
+            "18": 1731.0,
+            "19": 1812.0,
+            "20": 1744.0,
+            "21": 1910.0,
+            "22": 1717.0,
+            "23": 2079.0,
+            "24": 1636.0,
+            "25": 1644.0,
+            "26": 1812.0,
+            "27": 1939.0,
+            "28": 1904.0,
+            "29": 2001.0,
+            "30": 2019.0,
+            "31": 1661.0,
+            "32": 1904.0,
+            "33": 2040.0,
+            "34": 1944.0,
+            "35": 1955.0,
+            "36": 1968.0,
+            "37": 2344.0,
+            "38": 2300.0,
+            "39": 2418.0,
+            "40": 2263.0,
+            "41": 2357.0,
+            "42": 2285.0,
+            "43": 1988.0,
+            "44": 2123.0,
+            "45": 2218.0,
+            "46": 2349.0,
+            "47": 2594.0,
+            "48": 2506.0,
+            "49": 2331.0,
+            "50": 2374.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 552238592.0,
-            "5": 552238592.0,
-            "10": 552238592.0,
-            "15": 552238592.0,
-            "20": 552238592.0,
-            "25": 552238592.0,
-            "30": 552238592.0,
-            "35": 552238592.0,
-            "40": 552238592.0,
-            "45": 552238592.0,
-            "50": 552238592.0
+            "1": 552128512.0,
+            "2": 552128512.0,
+            "3": 552128512.0,
+            "4": 552128512.0,
+            "5": 552128512.0,
+            "6": 552128512.0,
+            "7": 552128512.0,
+            "8": 552128512.0,
+            "9": 552128512.0,
+            "10": 552128512.0,
+            "11": 552128512.0,
+            "12": 552128512.0,
+            "13": 552128512.0,
+            "14": 552128512.0,
+            "15": 552128512.0,
+            "16": 552128512.0,
+            "17": 552128512.0,
+            "18": 552128512.0,
+            "19": 552128512.0,
+            "20": 552128512.0,
+            "21": 552128512.0,
+            "22": 552128512.0,
+            "23": 552128512.0,
+            "24": 552128512.0,
+            "25": 552128512.0,
+            "26": 552128512.0,
+            "27": 552128512.0,
+            "28": 552128512.0,
+            "29": 552128512.0,
+            "30": 552128512.0,
+            "31": 552128512.0,
+            "32": 552128512.0,
+            "33": 552128512.0,
+            "34": 552128512.0,
+            "35": 552128512.0,
+            "36": 552128512.0,
+            "37": 552128512.0,
+            "38": 552128512.0,
+            "39": 552128512.0,
+            "40": 552128512.0,
+            "41": 552128512.0,
+            "42": 552128512.0,
+            "43": 552128512.0,
+            "44": 552128512.0,
+            "45": 552128512.0,
+            "46": 552128512.0,
+            "47": 552128512.0,
+            "48": 552128512.0,
+            "49": 552128512.0,
+            "50": 552128512.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 4576563200.0,
-            "5": 4673179648.0,
-            "10": 4673179648.0,
-            "15": 4673179648.0,
-            "20": 4673179648.0,
-            "25": 4673179648.0,
-            "30": 4673179648.0,
-            "35": 4673179648.0,
-            "40": 4673179648.0,
-            "45": 4673179648.0,
-            "50": 4673179648.0
+            "1": 4576453120.0,
+            "2": 4673069568.0,
+            "3": 4673069568.0,
+            "4": 4673069568.0,
+            "5": 4673069568.0,
+            "6": 4673069568.0,
+            "7": 4673069568.0,
+            "8": 4673069568.0,
+            "9": 4673069568.0,
+            "10": 4673069568.0,
+            "11": 4673069568.0,
+            "12": 4673069568.0,
+            "13": 4673069568.0,
+            "14": 4673069568.0,
+            "15": 4673069568.0,
+            "16": 4673069568.0,
+            "17": 4673069568.0,
+            "18": 4673069568.0,
+            "19": 4673069568.0,
+            "20": 4673069568.0,
+            "21": 4673069568.0,
+            "22": 4673069568.0,
+            "23": 4673069568.0,
+            "24": 4673069568.0,
+            "25": 4673069568.0,
+            "26": 4673069568.0,
+            "27": 4673069568.0,
+            "28": 4673069568.0,
+            "29": 4673069568.0,
+            "30": 4673069568.0,
+            "31": 4673069568.0,
+            "32": 4673069568.0,
+            "33": 4673069568.0,
+            "34": 4673069568.0,
+            "35": 4673069568.0,
+            "36": 4673069568.0,
+            "37": 4673069568.0,
+            "38": 4673069568.0,
+            "39": 4673069568.0,
+            "40": 4673069568.0,
+            "41": 4673069568.0,
+            "42": 4673069568.0,
+            "43": 4673069568.0,
+            "44": 4673069568.0,
+            "45": 4673069568.0,
+            "46": 4673069568.0,
+            "47": 4673069568.0,
+            "48": 4673069568.0,
+            "49": 4673069568.0,
+            "50": 4673069568.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 9.0358,
-            "5": 0.10098,
-            "10": 0.10007,
-            "15": 0.09866,
-            "20": 0.0986,
-            "25": 0.09963,
-            "30": 0.09768,
-            "35": 0.09735,
-            "40": 0.09791,
-            "45": 0.09757,
-            "50": 0.09769
+            "1": 4.57734,
+            "2": 0.12447,
+            "3": 0.1105,
+            "4": 0.11652,
+            "5": 0.11171,
+            "6": 0.10268,
+            "7": 0.0964,
+            "8": 0.09397,
+            "9": 0.09475,
+            "10": 0.09372,
+            "11": 0.09325,
+            "12": 0.09309,
+            "13": 0.09305,
+            "14": 0.09354,
+            "15": 0.09324,
+            "16": 0.09342,
+            "17": 0.09327,
+            "18": 0.09347,
+            "19": 0.09283,
+            "20": 0.09308,
+            "21": 0.09266,
+            "22": 0.09487,
+            "23": 0.09318,
+            "24": 0.09338,
+            "25": 0.09306,
+            "26": 0.09374,
+            "27": 0.09386,
+            "28": 0.09412,
+            "29": 0.09395,
+            "30": 0.09393,
+            "31": 0.09439,
+            "32": 0.09481,
+            "33": 0.09338,
+            "34": 0.09466,
+            "35": 0.0936,
+            "36": 0.09463,
+            "37": 0.09316,
+            "38": 0.09572,
+            "39": 0.09295,
+            "40": 0.09592,
+            "41": 0.09322,
+            "42": 0.09468,
+            "43": 0.09488,
+            "44": 0.09323,
+            "45": 0.09265,
+            "46": 0.09574,
+            "47": 0.09267,
+            "48": 0.09592,
+            "49": 0.09356,
+            "50": 0.09502
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_lts.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_lts.json
new file mode 100644
index 00000000000..d8d087006d9
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_lts.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.84092,
+            "2": 10.83661,
+            "3": 10.83233,
+            "4": 10.81819,
+            "5": 10.84059,
+            "6": 10.86985,
+            "7": 10.83324,
+            "8": 10.83877,
+            "9": 10.84355,
+            "10": 10.80969,
+            "11": 10.85186,
+            "12": 10.84454,
+            "13": 10.8632,
+            "14": 10.86353,
+            "15": 10.79985,
+            "16": 10.79265,
+            "17": 10.77473,
+            "18": 10.80156,
+            "19": 10.79155,
+            "20": 10.70512,
+            "21": 10.68174,
+            "22": 10.56547,
+            "23": 10.70145,
+            "24": 10.5789,
+            "25": 10.53597,
+            "26": 10.60745,
+            "27": 10.59418,
+            "28": 10.56116,
+            "29": 10.57573,
+            "30": 10.35474,
+            "31": 10.12618,
+            "32": 10.46569,
+            "33": 10.45235,
+            "34": 10.22491,
+            "35": 10.27088,
+            "36": 10.22167,
+            "37": 10.33935,
+            "38": 10.18639,
+            "39": 10.39432,
+            "40": 10.07794,
+            "41": 10.13875,
+            "42": 10.20184,
+            "43": 9.83819,
+            "44": 9.94273,
+            "45": 9.82299,
+            "46": 9.82187,
+            "47": 10.13444,
+            "48": 9.84097,
+            "49": 9.52094,
+            "50": 9.90107,
+            "51": 9.83459,
+            "52": 9.73231,
+            "53": 10.04881,
+            "54": 9.93895,
+            "55": 9.86297,
+            "56": 9.613,
+            "57": 9.46964,
+            "58": 9.81136,
+            "59": 9.57107,
+            "60": 9.48153,
+            "61": 9.67881,
+            "62": 9.96579,
+            "63": 9.35276,
+            "64": 9.75644,
+            "65": 8.93769,
+            "66": 9.68152,
+            "67": 9.35669,
+            "68": 9.76806,
+            "69": 9.7739,
+            "70": 9.71012,
+            "71": 9.60009,
+            "72": 9.56796,
+            "73": 9.47739,
+            "74": 8.93177,
+            "75": 9.40721,
+            "76": 9.06847,
+            "77": 10.0464,
+            "78": 9.70984,
+            "79": 9.35731,
+            "80": 9.38978,
+            "81": 9.4662,
+            "82": 9.68056,
+            "83": 9.29144,
+            "84": 9.40194,
+            "85": 9.59734,
+            "86": 9.06207,
+            "87": 9.57921,
+            "88": 9.73262,
+            "89": 9.58838,
+            "90": 9.80354,
+            "91": 9.31991,
+            "92": 9.35013,
+            "93": 9.06378,
+            "94": 8.81909,
+            "95": 9.50572,
+            "96": 9.51068,
+            "97": 9.29244,
+            "98": 9.65579,
+            "99": 8.87401,
+            "100": 9.38837
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1628.0,
+            "2": 1744.0,
+            "3": 1662.0,
+            "4": 1801.0,
+            "5": 1937.0,
+            "6": 1866.0,
+            "7": 1855.0,
+            "8": 1584.0,
+            "9": 1901.0,
+            "10": 1325.0,
+            "11": 1963.0,
+            "12": 1833.0,
+            "13": 1863.0,
+            "14": 1928.0,
+            "15": 1948.0,
+            "16": 1999.0,
+            "17": 1838.0,
+            "18": 1703.0,
+            "19": 1898.0,
+            "20": 1610.0,
+            "21": 1944.0,
+            "22": 1728.0,
+            "23": 2116.0,
+            "24": 1613.0,
+            "25": 1634.0,
+            "26": 1788.0,
+            "27": 2059.0,
+            "28": 2067.0,
+            "29": 1978.0,
+            "30": 1929.0,
+            "31": 1782.0,
+            "32": 1850.0,
+            "33": 2169.0,
+            "34": 1815.0,
+            "35": 2007.0,
+            "36": 2010.0,
+            "37": 2385.0,
+            "38": 2413.0,
+            "39": 2474.0,
+            "40": 2254.0,
+            "41": 2373.0,
+            "42": 2253.0,
+            "43": 1900.0,
+            "44": 2058.0,
+            "45": 2153.0,
+            "46": 2385.0,
+            "47": 2514.0,
+            "48": 2475.0,
+            "49": 2362.0,
+            "50": 2335.0,
+            "51": 2452.0,
+            "52": 2576.0,
+            "53": 2914.0,
+            "54": 2741.0,
+            "55": 2408.0,
+            "56": 2650.0,
+            "57": 2264.0,
+            "58": 2853.0,
+            "59": 2757.0,
+            "60": 2509.0,
+            "61": 3076.0,
+            "62": 2709.0,
+            "63": 2563.0,
+            "64": 3041.0,
+            "65": 2687.0,
+            "66": 3089.0,
+            "67": 2767.0,
+            "68": 2930.0,
+            "69": 2911.0,
+            "70": 3286.0,
+            "71": 3105.0,
+            "72": 2507.0,
+            "73": 3063.0,
+            "74": 2022.0,
+            "75": 2763.0,
+            "76": 3002.0,
+            "77": 3382.0,
+            "78": 3470.0,
+            "79": 3109.0,
+            "80": 3357.0,
+            "81": 3798.0,
+            "82": 3348.0,
+            "83": 2763.0,
+            "84": 3271.0,
+            "85": 3245.0,
+            "86": 2587.0,
+            "87": 3650.0,
+            "88": 3103.0,
+            "89": 3471.0,
+            "90": 3086.0,
+            "91": 3050.0,
+            "92": 3368.0,
+            "93": 2828.0,
+            "94": 3495.0,
+            "95": 3424.0,
+            "96": 3559.0,
+            "97": 3289.0,
+            "98": 3727.0,
+            "99": 3275.0,
+            "100": 3401.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 552128512.0,
+            "2": 552128512.0,
+            "3": 552128512.0,
+            "4": 552128512.0,
+            "5": 552128512.0,
+            "6": 552128512.0,
+            "7": 552128512.0,
+            "8": 552128512.0,
+            "9": 552128512.0,
+            "10": 552128512.0,
+            "11": 552128512.0,
+            "12": 552128512.0,
+            "13": 552128512.0,
+            "14": 552128512.0,
+            "15": 552128512.0,
+            "16": 552128512.0,
+            "17": 552128512.0,
+            "18": 552128512.0,
+            "19": 552128512.0,
+            "20": 552128512.0,
+            "21": 552128512.0,
+            "22": 552128512.0,
+            "23": 552128512.0,
+            "24": 552128512.0,
+            "25": 552128512.0,
+            "26": 552128512.0,
+            "27": 552128512.0,
+            "28": 552128512.0,
+            "29": 552128512.0,
+            "30": 552128512.0,
+            "31": 552128512.0,
+            "32": 552128512.0,
+            "33": 552128512.0,
+            "34": 552128512.0,
+            "35": 552128512.0,
+            "36": 552128512.0,
+            "37": 552128512.0,
+            "38": 552128512.0,
+            "39": 552128512.0,
+            "40": 552128512.0,
+            "41": 552128512.0,
+            "42": 552128512.0,
+            "43": 552128512.0,
+            "44": 552128512.0,
+            "45": 552128512.0,
+            "46": 552128512.0,
+            "47": 552128512.0,
+            "48": 552128512.0,
+            "49": 552128512.0,
+            "50": 552128512.0,
+            "51": 552128512.0,
+            "52": 552128512.0,
+            "53": 552128512.0,
+            "54": 552128512.0,
+            "55": 552128512.0,
+            "56": 552128512.0,
+            "57": 552128512.0,
+            "58": 552128512.0,
+            "59": 552128512.0,
+            "60": 552128512.0,
+            "61": 552128512.0,
+            "62": 552128512.0,
+            "63": 552128512.0,
+            "64": 552128512.0,
+            "65": 552128512.0,
+            "66": 552128512.0,
+            "67": 552128512.0,
+            "68": 552128512.0,
+            "69": 552128512.0,
+            "70": 552128512.0,
+            "71": 552128512.0,
+            "72": 552128512.0,
+            "73": 552128512.0,
+            "74": 552128512.0,
+            "75": 552128512.0,
+            "76": 552128512.0,
+            "77": 552128512.0,
+            "78": 552128512.0,
+            "79": 552128512.0,
+            "80": 552128512.0,
+            "81": 552128512.0,
+            "82": 552128512.0,
+            "83": 552128512.0,
+            "84": 552128512.0,
+            "85": 552128512.0,
+            "86": 552128512.0,
+            "87": 552128512.0,
+            "88": 552128512.0,
+            "89": 552128512.0,
+            "90": 552128512.0,
+            "91": 552128512.0,
+            "92": 552128512.0,
+            "93": 552128512.0,
+            "94": 552128512.0,
+            "95": 552128512.0,
+            "96": 552128512.0,
+            "97": 552128512.0,
+            "98": 552128512.0,
+            "99": 552128512.0,
+            "100": 552128512.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2615097856.0,
+            "2": 2711714304.0,
+            "3": 2711714304.0,
+            "4": 2711714304.0,
+            "5": 2711714304.0,
+            "6": 2711714304.0,
+            "7": 2711714304.0,
+            "8": 2711714304.0,
+            "9": 2711714304.0,
+            "10": 2711714304.0,
+            "11": 2711714304.0,
+            "12": 2711714304.0,
+            "13": 2711714304.0,
+            "14": 2711714304.0,
+            "15": 2711714304.0,
+            "16": 2711714304.0,
+            "17": 2711714304.0,
+            "18": 2711714304.0,
+            "19": 2711714304.0,
+            "20": 2711714304.0,
+            "21": 2711714304.0,
+            "22": 2711714304.0,
+            "23": 2711714304.0,
+            "24": 2711714304.0,
+            "25": 2711714304.0,
+            "26": 2711714304.0,
+            "27": 2711714304.0,
+            "28": 2711714304.0,
+            "29": 2711714304.0,
+            "30": 2711714304.0,
+            "31": 2711714304.0,
+            "32": 2711714304.0,
+            "33": 2711714304.0,
+            "34": 2711714304.0,
+            "35": 2711714304.0,
+            "36": 2711714304.0,
+            "37": 2711714304.0,
+            "38": 2711714304.0,
+            "39": 2711714304.0,
+            "40": 2711714304.0,
+            "41": 2711714304.0,
+            "42": 2711714304.0,
+            "43": 2711714304.0,
+            "44": 2711714304.0,
+            "45": 2711714304.0,
+            "46": 2711714304.0,
+            "47": 2711714304.0,
+            "48": 2711714304.0,
+            "49": 2711714304.0,
+            "50": 2711714304.0,
+            "51": 2711714304.0,
+            "52": 2711714304.0,
+            "53": 2711714304.0,
+            "54": 2711714304.0,
+            "55": 2711714304.0,
+            "56": 2711714304.0,
+            "57": 2711714304.0,
+            "58": 2711714304.0,
+            "59": 2711714304.0,
+            "60": 2711714304.0,
+            "61": 2711714304.0,
+            "62": 2711714304.0,
+            "63": 2711714304.0,
+            "64": 2711714304.0,
+            "65": 2711714304.0,
+            "66": 2711714304.0,
+            "67": 2711714304.0,
+            "68": 2711714304.0,
+            "69": 2711714304.0,
+            "70": 2711714304.0,
+            "71": 2711714304.0,
+            "72": 2711714304.0,
+            "73": 2711714304.0,
+            "74": 2711714304.0,
+            "75": 2711714304.0,
+            "76": 2711714304.0,
+            "77": 2711714304.0,
+            "78": 2711714304.0,
+            "79": 2711714304.0,
+            "80": 2711714304.0,
+            "81": 2711714304.0,
+            "82": 2711714304.0,
+            "83": 2711714304.0,
+            "84": 2711714304.0,
+            "85": 2711714304.0,
+            "86": 2711714304.0,
+            "87": 2711714304.0,
+            "88": 2711714304.0,
+            "89": 2711714304.0,
+            "90": 2711714304.0,
+            "91": 2711714304.0,
+            "92": 2711714304.0,
+            "93": 2711714304.0,
+            "94": 2711714304.0,
+            "95": 2711714304.0,
+            "96": 2711714304.0,
+            "97": 2711714304.0,
+            "98": 2711714304.0,
+            "99": 2711714304.0,
+            "100": 2711714304.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 4.22026,
+            "2": 0.11508,
+            "3": 0.08784,
+            "4": 0.08853,
+            "5": 0.08761,
+            "6": 0.08872,
+            "7": 0.08738,
+            "8": 0.08924,
+            "9": 0.08542,
+            "10": 0.08527,
+            "11": 0.08496,
+            "12": 0.08437,
+            "13": 0.08471,
+            "14": 0.08501,
+            "15": 0.08468,
+            "16": 0.08444,
+            "17": 0.08421,
+            "18": 0.08402,
+            "19": 0.08444,
+            "20": 0.0841,
+            "21": 0.08395,
+            "22": 0.08442,
+            "23": 0.0846,
+            "24": 0.08394,
+            "25": 0.08371,
+            "26": 0.08416,
+            "27": 0.08354,
+            "28": 0.08445,
+            "29": 0.08405,
+            "30": 0.08414,
+            "31": 0.08444,
+            "32": 0.08369,
+            "33": 0.08356,
+            "34": 0.08435,
+            "35": 0.08405,
+            "36": 0.08358,
+            "37": 0.08349,
+            "38": 0.08439,
+            "39": 0.0837,
+            "40": 0.08444,
+            "41": 0.08399,
+            "42": 0.0835,
+            "43": 0.0843,
+            "44": 0.08389,
+            "45": 0.084,
+            "46": 0.08426,
+            "47": 0.0842,
+            "48": 0.08434,
+            "49": 0.08385,
+            "50": 0.08424,
+            "51": 0.09846,
+            "52": 0.08909,
+            "53": 0.08511,
+            "54": 0.0849,
+            "55": 0.1206,
+            "56": 0.0845,
+            "57": 0.08446,
+            "58": 0.08474,
+            "59": 0.08505,
+            "60": 0.08422,
+            "61": 0.08413,
+            "62": 0.0845,
+            "63": 0.08441,
+            "64": 0.08486,
+            "65": 0.08527,
+            "66": 0.08442,
+            "67": 0.08533,
+            "68": 0.08468,
+            "69": 0.08469,
+            "70": 0.08503,
+            "71": 0.08424,
+            "72": 0.085,
+            "73": 0.08469,
+            "74": 0.08484,
+            "75": 0.08396,
+            "76": 0.08437,
+            "77": 0.08458,
+            "78": 0.08553,
+            "79": 0.08492,
+            "80": 0.08459,
+            "81": 0.08431,
+            "82": 0.08515,
+            "83": 0.08407,
+            "84": 0.08429,
+            "85": 0.08413,
+            "86": 0.08499,
+            "87": 0.08442,
+            "88": 0.08461,
+            "89": 0.08478,
+            "90": 0.08469,
+            "91": 0.08469,
+            "92": 0.08478,
+            "93": 0.08453,
+            "94": 0.0842,
+            "95": 0.08391,
+            "96": 0.08383,
+            "97": 0.08459,
+            "98": 0.08469,
+            "99": 0.085,
+            "100": 0.08518
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_lts_dgx_a100.json
index 66e69ad5f5e..9dad9972e22 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_lts_dgx_a100.json
@@ -2,141 +2,536 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 10.8401,
-            "5": 10.84032,
-            "10": 10.8134,
-            "15": 10.80276,
-            "20": 10.705,
-            "25": 10.53849,
-            "30": 10.35521,
-            "35": 10.27148,
-            "40": 10.08049,
-            "45": 9.8229,
-            "50": 9.90118,
-            "55": 9.86424,
-            "60": 9.4803,
-            "65": 8.93743,
-            "70": 9.71026,
-            "75": 9.4088,
-            "80": 9.39078,
-            "85": 9.59741,
-            "90": 9.80389,
-            "95": 9.50562,
-            "100": 9.38808
+            "1": 10.84092,
+            "2": 10.83661,
+            "3": 10.83233,
+            "4": 10.81819,
+            "5": 10.84059,
+            "6": 10.86985,
+            "7": 10.83324,
+            "8": 10.83877,
+            "9": 10.84355,
+            "10": 10.80969,
+            "11": 10.85186,
+            "12": 10.84454,
+            "13": 10.8632,
+            "14": 10.86353,
+            "15": 10.79985,
+            "16": 10.79265,
+            "17": 10.77473,
+            "18": 10.80156,
+            "19": 10.79155,
+            "20": 10.70512,
+            "21": 10.68174,
+            "22": 10.56547,
+            "23": 10.70145,
+            "24": 10.5789,
+            "25": 10.53597,
+            "26": 10.60745,
+            "27": 10.59418,
+            "28": 10.56116,
+            "29": 10.57573,
+            "30": 10.35474,
+            "31": 10.12618,
+            "32": 10.46569,
+            "33": 10.45235,
+            "34": 10.22491,
+            "35": 10.27088,
+            "36": 10.22167,
+            "37": 10.33935,
+            "38": 10.18639,
+            "39": 10.39432,
+            "40": 10.07794,
+            "41": 10.13875,
+            "42": 10.20184,
+            "43": 9.83819,
+            "44": 9.94273,
+            "45": 9.82299,
+            "46": 9.82187,
+            "47": 10.13444,
+            "48": 9.84097,
+            "49": 9.52094,
+            "50": 9.90107,
+            "51": 9.83459,
+            "52": 9.73231,
+            "53": 10.04881,
+            "54": 9.93895,
+            "55": 9.86297,
+            "56": 9.613,
+            "57": 9.46964,
+            "58": 9.81136,
+            "59": 9.57107,
+            "60": 9.48153,
+            "61": 9.67881,
+            "62": 9.96579,
+            "63": 9.35276,
+            "64": 9.75644,
+            "65": 8.93769,
+            "66": 9.68152,
+            "67": 9.35669,
+            "68": 9.76806,
+            "69": 9.7739,
+            "70": 9.71012,
+            "71": 9.60009,
+            "72": 9.56796,
+            "73": 9.47739,
+            "74": 8.93177,
+            "75": 9.40721,
+            "76": 9.06847,
+            "77": 10.0464,
+            "78": 9.70984,
+            "79": 9.35731,
+            "80": 9.38978,
+            "81": 9.4662,
+            "82": 9.68056,
+            "83": 9.29144,
+            "84": 9.40194,
+            "85": 9.59734,
+            "86": 9.06207,
+            "87": 9.57921,
+            "88": 9.73262,
+            "89": 9.58838,
+            "90": 9.80354,
+            "91": 9.31991,
+            "92": 9.35013,
+            "93": 9.06378,
+            "94": 8.81909,
+            "95": 9.50572,
+            "96": 9.51068,
+            "97": 9.29244,
+            "98": 9.65579,
+            "99": 8.87401,
+            "100": 9.38837
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 1670.0,
-            "5": 1970.0,
-            "10": 1397.0,
-            "15": 1936.0,
-            "20": 1715.0,
-            "25": 1703.0,
-            "30": 1957.0,
-            "35": 1991.0,
-            "40": 2239.0,
-            "45": 2144.0,
-            "50": 2377.0,
-            "55": 2436.0,
-            "60": 2445.0,
-            "65": 2645.0,
-            "70": 3337.0,
-            "75": 2726.0,
-            "80": 3356.0,
-            "85": 3336.0,
-            "90": 3044.0,
-            "95": 3484.0,
-            "100": 3467.0
+            "1": 1628.0,
+            "2": 1744.0,
+            "3": 1662.0,
+            "4": 1801.0,
+            "5": 1937.0,
+            "6": 1866.0,
+            "7": 1855.0,
+            "8": 1584.0,
+            "9": 1901.0,
+            "10": 1325.0,
+            "11": 1963.0,
+            "12": 1833.0,
+            "13": 1863.0,
+            "14": 1928.0,
+            "15": 1948.0,
+            "16": 1999.0,
+            "17": 1838.0,
+            "18": 1703.0,
+            "19": 1898.0,
+            "20": 1610.0,
+            "21": 1944.0,
+            "22": 1728.0,
+            "23": 2116.0,
+            "24": 1613.0,
+            "25": 1634.0,
+            "26": 1788.0,
+            "27": 2059.0,
+            "28": 2067.0,
+            "29": 1978.0,
+            "30": 1929.0,
+            "31": 1782.0,
+            "32": 1850.0,
+            "33": 2169.0,
+            "34": 1815.0,
+            "35": 2007.0,
+            "36": 2010.0,
+            "37": 2385.0,
+            "38": 2413.0,
+            "39": 2474.0,
+            "40": 2254.0,
+            "41": 2373.0,
+            "42": 2253.0,
+            "43": 1900.0,
+            "44": 2058.0,
+            "45": 2153.0,
+            "46": 2385.0,
+            "47": 2514.0,
+            "48": 2475.0,
+            "49": 2362.0,
+            "50": 2335.0,
+            "51": 2452.0,
+            "52": 2576.0,
+            "53": 2914.0,
+            "54": 2741.0,
+            "55": 2408.0,
+            "56": 2650.0,
+            "57": 2264.0,
+            "58": 2853.0,
+            "59": 2757.0,
+            "60": 2509.0,
+            "61": 3076.0,
+            "62": 2709.0,
+            "63": 2563.0,
+            "64": 3041.0,
+            "65": 2687.0,
+            "66": 3089.0,
+            "67": 2767.0,
+            "68": 2930.0,
+            "69": 2911.0,
+            "70": 3286.0,
+            "71": 3105.0,
+            "72": 2507.0,
+            "73": 3063.0,
+            "74": 2022.0,
+            "75": 2763.0,
+            "76": 3002.0,
+            "77": 3382.0,
+            "78": 3470.0,
+            "79": 3109.0,
+            "80": 3357.0,
+            "81": 3798.0,
+            "82": 3348.0,
+            "83": 2763.0,
+            "84": 3271.0,
+            "85": 3245.0,
+            "86": 2587.0,
+            "87": 3650.0,
+            "88": 3103.0,
+            "89": 3471.0,
+            "90": 3086.0,
+            "91": 3050.0,
+            "92": 3368.0,
+            "93": 2828.0,
+            "94": 3495.0,
+            "95": 3424.0,
+            "96": 3559.0,
+            "97": 3289.0,
+            "98": 3727.0,
+            "99": 3275.0,
+            "100": 3401.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 552238592.0,
-            "5": 552238592.0,
-            "10": 552238592.0,
-            "15": 552238592.0,
-            "20": 552238592.0,
-            "25": 552238592.0,
-            "30": 552238592.0,
-            "35": 552238592.0,
-            "40": 552238592.0,
-            "45": 552238592.0,
-            "50": 552238592.0,
-            "55": 552238592.0,
-            "60": 552238592.0,
-            "65": 552238592.0,
-            "70": 552238592.0,
-            "75": 552238592.0,
-            "80": 552238592.0,
-            "85": 552238592.0,
-            "90": 552238592.0,
-            "95": 552238592.0,
-            "100": 552238592.0
+            "1": 552128512.0,
+            "2": 552128512.0,
+            "3": 552128512.0,
+            "4": 552128512.0,
+            "5": 552128512.0,
+            "6": 552128512.0,
+            "7": 552128512.0,
+            "8": 552128512.0,
+            "9": 552128512.0,
+            "10": 552128512.0,
+            "11": 552128512.0,
+            "12": 552128512.0,
+            "13": 552128512.0,
+            "14": 552128512.0,
+            "15": 552128512.0,
+            "16": 552128512.0,
+            "17": 552128512.0,
+            "18": 552128512.0,
+            "19": 552128512.0,
+            "20": 552128512.0,
+            "21": 552128512.0,
+            "22": 552128512.0,
+            "23": 552128512.0,
+            "24": 552128512.0,
+            "25": 552128512.0,
+            "26": 552128512.0,
+            "27": 552128512.0,
+            "28": 552128512.0,
+            "29": 552128512.0,
+            "30": 552128512.0,
+            "31": 552128512.0,
+            "32": 552128512.0,
+            "33": 552128512.0,
+            "34": 552128512.0,
+            "35": 552128512.0,
+            "36": 552128512.0,
+            "37": 552128512.0,
+            "38": 552128512.0,
+            "39": 552128512.0,
+            "40": 552128512.0,
+            "41": 552128512.0,
+            "42": 552128512.0,
+            "43": 552128512.0,
+            "44": 552128512.0,
+            "45": 552128512.0,
+            "46": 552128512.0,
+            "47": 552128512.0,
+            "48": 552128512.0,
+            "49": 552128512.0,
+            "50": 552128512.0,
+            "51": 552128512.0,
+            "52": 552128512.0,
+            "53": 552128512.0,
+            "54": 552128512.0,
+            "55": 552128512.0,
+            "56": 552128512.0,
+            "57": 552128512.0,
+            "58": 552128512.0,
+            "59": 552128512.0,
+            "60": 552128512.0,
+            "61": 552128512.0,
+            "62": 552128512.0,
+            "63": 552128512.0,
+            "64": 552128512.0,
+            "65": 552128512.0,
+            "66": 552128512.0,
+            "67": 552128512.0,
+            "68": 552128512.0,
+            "69": 552128512.0,
+            "70": 552128512.0,
+            "71": 552128512.0,
+            "72": 552128512.0,
+            "73": 552128512.0,
+            "74": 552128512.0,
+            "75": 552128512.0,
+            "76": 552128512.0,
+            "77": 552128512.0,
+            "78": 552128512.0,
+            "79": 552128512.0,
+            "80": 552128512.0,
+            "81": 552128512.0,
+            "82": 552128512.0,
+            "83": 552128512.0,
+            "84": 552128512.0,
+            "85": 552128512.0,
+            "86": 552128512.0,
+            "87": 552128512.0,
+            "88": 552128512.0,
+            "89": 552128512.0,
+            "90": 552128512.0,
+            "91": 552128512.0,
+            "92": 552128512.0,
+            "93": 552128512.0,
+            "94": 552128512.0,
+            "95": 552128512.0,
+            "96": 552128512.0,
+            "97": 552128512.0,
+            "98": 552128512.0,
+            "99": 552128512.0,
+            "100": 552128512.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 4576563200.0,
-            "5": 4673179648.0,
-            "10": 4673179648.0,
-            "15": 4673179648.0,
-            "20": 4673179648.0,
-            "25": 4673179648.0,
-            "30": 4673179648.0,
-            "35": 4673179648.0,
-            "40": 4673179648.0,
-            "45": 4673179648.0,
-            "50": 4673179648.0,
-            "55": 4673179648.0,
-            "60": 4673179648.0,
-            "65": 4673179648.0,
-            "70": 4673179648.0,
-            "75": 4673179648.0,
-            "80": 4673179648.0,
-            "85": 4673179648.0,
-            "90": 4673179648.0,
-            "95": 4673179648.0,
-            "100": 4673179648.0
+            "1": 2615097856.0,
+            "2": 2711714304.0,
+            "3": 2711714304.0,
+            "4": 2711714304.0,
+            "5": 2711714304.0,
+            "6": 2711714304.0,
+            "7": 2711714304.0,
+            "8": 2711714304.0,
+            "9": 2711714304.0,
+            "10": 2711714304.0,
+            "11": 2711714304.0,
+            "12": 2711714304.0,
+            "13": 2711714304.0,
+            "14": 2711714304.0,
+            "15": 2711714304.0,
+            "16": 2711714304.0,
+            "17": 2711714304.0,
+            "18": 2711714304.0,
+            "19": 2711714304.0,
+            "20": 2711714304.0,
+            "21": 2711714304.0,
+            "22": 2711714304.0,
+            "23": 2711714304.0,
+            "24": 2711714304.0,
+            "25": 2711714304.0,
+            "26": 2711714304.0,
+            "27": 2711714304.0,
+            "28": 2711714304.0,
+            "29": 2711714304.0,
+            "30": 2711714304.0,
+            "31": 2711714304.0,
+            "32": 2711714304.0,
+            "33": 2711714304.0,
+            "34": 2711714304.0,
+            "35": 2711714304.0,
+            "36": 2711714304.0,
+            "37": 2711714304.0,
+            "38": 2711714304.0,
+            "39": 2711714304.0,
+            "40": 2711714304.0,
+            "41": 2711714304.0,
+            "42": 2711714304.0,
+            "43": 2711714304.0,
+            "44": 2711714304.0,
+            "45": 2711714304.0,
+            "46": 2711714304.0,
+            "47": 2711714304.0,
+            "48": 2711714304.0,
+            "49": 2711714304.0,
+            "50": 2711714304.0,
+            "51": 2711714304.0,
+            "52": 2711714304.0,
+            "53": 2711714304.0,
+            "54": 2711714304.0,
+            "55": 2711714304.0,
+            "56": 2711714304.0,
+            "57": 2711714304.0,
+            "58": 2711714304.0,
+            "59": 2711714304.0,
+            "60": 2711714304.0,
+            "61": 2711714304.0,
+            "62": 2711714304.0,
+            "63": 2711714304.0,
+            "64": 2711714304.0,
+            "65": 2711714304.0,
+            "66": 2711714304.0,
+            "67": 2711714304.0,
+            "68": 2711714304.0,
+            "69": 2711714304.0,
+            "70": 2711714304.0,
+            "71": 2711714304.0,
+            "72": 2711714304.0,
+            "73": 2711714304.0,
+            "74": 2711714304.0,
+            "75": 2711714304.0,
+            "76": 2711714304.0,
+            "77": 2711714304.0,
+            "78": 2711714304.0,
+            "79": 2711714304.0,
+            "80": 2711714304.0,
+            "81": 2711714304.0,
+            "82": 2711714304.0,
+            "83": 2711714304.0,
+            "84": 2711714304.0,
+            "85": 2711714304.0,
+            "86": 2711714304.0,
+            "87": 2711714304.0,
+            "88": 2711714304.0,
+            "89": 2711714304.0,
+            "90": 2711714304.0,
+            "91": 2711714304.0,
+            "92": 2711714304.0,
+            "93": 2711714304.0,
+            "94": 2711714304.0,
+            "95": 2711714304.0,
+            "96": 2711714304.0,
+            "97": 2711714304.0,
+            "98": 2711714304.0,
+            "99": 2711714304.0,
+            "100": 2711714304.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 9.31173,
-            "5": 0.10511,
-            "10": 0.10337,
-            "15": 0.10143,
-            "20": 0.10201,
-            "25": 0.10088,
-            "30": 0.10154,
-            "35": 0.1006,
-            "40": 0.10106,
-            "45": 0.10038,
-            "50": 0.10112,
-            "55": 0.10263,
-            "60": 0.0993,
-            "65": 0.09963,
-            "70": 0.09998,
-            "75": 0.09811,
-            "80": 0.09929,
-            "85": 0.09785,
-            "90": 0.09909,
-            "95": 0.09864,
-            "100": 0.09877
+            "1": 4.10688,
+            "2": 0.11397,
+            "3": 0.08797,
+            "4": 0.08663,
+            "5": 0.08687,
+            "6": 0.08702,
+            "7": 0.08653,
+            "8": 0.08674,
+            "9": 0.08696,
+            "10": 0.08678,
+            "11": 0.08635,
+            "12": 0.08637,
+            "13": 0.08738,
+            "14": 0.08674,
+            "15": 0.08706,
+            "16": 0.08684,
+            "17": 0.08681,
+            "18": 0.08601,
+            "19": 0.08591,
+            "20": 0.08645,
+            "21": 0.08634,
+            "22": 0.08598,
+            "23": 0.08618,
+            "24": 0.08622,
+            "25": 0.08632,
+            "26": 0.08621,
+            "27": 0.08644,
+            "28": 0.08581,
+            "29": 0.08622,
+            "30": 0.08652,
+            "31": 0.08679,
+            "32": 0.08526,
+            "33": 0.08525,
+            "34": 0.08525,
+            "35": 0.08519,
+            "36": 0.08535,
+            "37": 0.08568,
+            "38": 0.0852,
+            "39": 0.08521,
+            "40": 0.08523,
+            "41": 0.08535,
+            "42": 0.08486,
+            "43": 0.08614,
+            "44": 0.08491,
+            "45": 0.08554,
+            "46": 0.08508,
+            "47": 0.08524,
+            "48": 0.08608,
+            "49": 0.08565,
+            "50": 0.08559,
+            "51": 0.10342,
+            "52": 0.09048,
+            "53": 0.08707,
+            "54": 0.08719,
+            "55": 0.08631,
+            "56": 0.11667,
+            "57": 0.08592,
+            "58": 0.08517,
+            "59": 0.08612,
+            "60": 0.08514,
+            "61": 0.0855,
+            "62": 0.08527,
+            "63": 0.08586,
+            "64": 0.08556,
+            "65": 0.08633,
+            "66": 0.08532,
+            "67": 0.08593,
+            "68": 0.08563,
+            "69": 0.08537,
+            "70": 0.08538,
+            "71": 0.08507,
+            "72": 0.08593,
+            "73": 0.08623,
+            "74": 0.08561,
+            "75": 0.08536,
+            "76": 0.08551,
+            "77": 0.08526,
+            "78": 0.0859,
+            "79": 0.08518,
+            "80": 0.08601,
+            "81": 0.08574,
+            "82": 0.08618,
+            "83": 0.08532,
+            "84": 0.08505,
+            "85": 0.08545,
+            "86": 0.08554,
+            "87": 0.08542,
+            "88": 0.08575,
+            "89": 0.0861,
+            "90": 0.08516,
+            "91": 0.08552,
+            "92": 0.08581,
+            "93": 0.08558,
+            "94": 0.08577,
+            "95": 0.08708,
+            "96": 0.08574,
+            "97": 0.08543,
+            "98": 0.0855,
+            "99": 0.08537,
+            "100": 0.08541
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_lts.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_lts.json
new file mode 100644
index 00000000000..c6d7c9c90fa
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_lts.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.84092,
+            "2": 10.83661,
+            "3": 10.83233,
+            "4": 10.81819,
+            "5": 10.84059,
+            "6": 10.86985,
+            "7": 10.83324,
+            "8": 10.83877,
+            "9": 10.84355,
+            "10": 10.80969,
+            "11": 10.85186,
+            "12": 10.84449,
+            "13": 10.86322,
+            "14": 10.86353,
+            "15": 10.79981,
+            "16": 10.79262,
+            "17": 10.77477,
+            "18": 10.80157,
+            "19": 10.79148,
+            "20": 10.70508,
+            "21": 10.68176,
+            "22": 10.56548,
+            "23": 10.70147,
+            "24": 10.57889,
+            "25": 10.53597,
+            "26": 10.60742,
+            "27": 10.59423,
+            "28": 10.56119,
+            "29": 10.57569,
+            "30": 10.35474,
+            "31": 10.12616,
+            "32": 10.46566,
+            "33": 10.45233,
+            "34": 10.22493,
+            "35": 10.27091,
+            "36": 10.22168,
+            "37": 10.33936,
+            "38": 10.18641,
+            "39": 10.39431,
+            "40": 10.07792,
+            "41": 10.13872,
+            "42": 10.20182,
+            "43": 9.83818,
+            "44": 9.94274,
+            "45": 9.82303,
+            "46": 9.82185,
+            "47": 10.13443,
+            "48": 9.84098,
+            "49": 9.52095,
+            "50": 9.90109,
+            "51": 9.83457,
+            "52": 9.73232,
+            "53": 10.0488,
+            "54": 9.93895,
+            "55": 9.863,
+            "56": 9.613,
+            "57": 9.46966,
+            "58": 9.81135,
+            "59": 9.57107,
+            "60": 9.48155,
+            "61": 9.6788,
+            "62": 9.96581,
+            "63": 9.35273,
+            "64": 9.75648,
+            "65": 8.93771,
+            "66": 9.68153,
+            "67": 9.35671,
+            "68": 9.76807,
+            "69": 9.7739,
+            "70": 9.71016,
+            "71": 9.60009,
+            "72": 9.56793,
+            "73": 9.4774,
+            "74": 8.93177,
+            "75": 9.4072,
+            "76": 9.06849,
+            "77": 10.0464,
+            "78": 9.70988,
+            "79": 9.35733,
+            "80": 9.38975,
+            "81": 9.4662,
+            "82": 9.68058,
+            "83": 9.2914,
+            "84": 9.40191,
+            "85": 9.59735,
+            "86": 9.06209,
+            "87": 9.57922,
+            "88": 9.73259,
+            "89": 9.58836,
+            "90": 9.80354,
+            "91": 9.31991,
+            "92": 9.35011,
+            "93": 9.06382,
+            "94": 8.81909,
+            "95": 9.50568,
+            "96": 9.51071,
+            "97": 9.29241,
+            "98": 9.65578,
+            "99": 8.87401,
+            "100": 9.38833
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1628.0,
+            "2": 1744.0,
+            "3": 1662.0,
+            "4": 1801.0,
+            "5": 1937.0,
+            "6": 1866.0,
+            "7": 1855.0,
+            "8": 1584.0,
+            "9": 1901.0,
+            "10": 1325.0,
+            "11": 1966.0,
+            "12": 1767.0,
+            "13": 1876.0,
+            "14": 1881.0,
+            "15": 1928.0,
+            "16": 1875.0,
+            "17": 1790.0,
+            "18": 1736.0,
+            "19": 1804.0,
+            "20": 1690.0,
+            "21": 2008.0,
+            "22": 1765.0,
+            "23": 2073.0,
+            "24": 1618.0,
+            "25": 1720.0,
+            "26": 1807.0,
+            "27": 1861.0,
+            "28": 2026.0,
+            "29": 1982.0,
+            "30": 1981.0,
+            "31": 1688.0,
+            "32": 1913.0,
+            "33": 2123.0,
+            "34": 1893.0,
+            "35": 2007.0,
+            "36": 1987.0,
+            "37": 2334.0,
+            "38": 2223.0,
+            "39": 2417.0,
+            "40": 2370.0,
+            "41": 2352.0,
+            "42": 2269.0,
+            "43": 1967.0,
+            "44": 2183.0,
+            "45": 2150.0,
+            "46": 2350.0,
+            "47": 2555.0,
+            "48": 2463.0,
+            "49": 2326.0,
+            "50": 2270.0,
+            "51": 2508.0,
+            "52": 2495.0,
+            "53": 2856.0,
+            "54": 2692.0,
+            "55": 2482.0,
+            "56": 2614.0,
+            "57": 2283.0,
+            "58": 2894.0,
+            "59": 2659.0,
+            "60": 2561.0,
+            "61": 3006.0,
+            "62": 2671.0,
+            "63": 2488.0,
+            "64": 3092.0,
+            "65": 2622.0,
+            "66": 3108.0,
+            "67": 2741.0,
+            "68": 2942.0,
+            "69": 2983.0,
+            "70": 3347.0,
+            "71": 3034.0,
+            "72": 2438.0,
+            "73": 3075.0,
+            "74": 1931.0,
+            "75": 2722.0,
+            "76": 2960.0,
+            "77": 3387.0,
+            "78": 3268.0,
+            "79": 3079.0,
+            "80": 3404.0,
+            "81": 3674.0,
+            "82": 3192.0,
+            "83": 2791.0,
+            "84": 3224.0,
+            "85": 3237.0,
+            "86": 2646.0,
+            "87": 3840.0,
+            "88": 3114.0,
+            "89": 3410.0,
+            "90": 3184.0,
+            "91": 3073.0,
+            "92": 3396.0,
+            "93": 2711.0,
+            "94": 3530.0,
+            "95": 3387.0,
+            "96": 3530.0,
+            "97": 3277.0,
+            "98": 3775.0,
+            "99": 3421.0,
+            "100": 3350.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1234585088.0,
+            "2": 1234585088.0,
+            "3": 1234585088.0,
+            "4": 1234585088.0,
+            "5": 1234585088.0,
+            "6": 1234585088.0,
+            "7": 1234585088.0,
+            "8": 1234585088.0,
+            "9": 1234585088.0,
+            "10": 1234585088.0,
+            "11": 1234585088.0,
+            "12": 1234585088.0,
+            "13": 1234585088.0,
+            "14": 1234585088.0,
+            "15": 1234585088.0,
+            "16": 1234585088.0,
+            "17": 1234585088.0,
+            "18": 1234585088.0,
+            "19": 1234585088.0,
+            "20": 1234585088.0,
+            "21": 1234585088.0,
+            "22": 1234585088.0,
+            "23": 1234585088.0,
+            "24": 1234585088.0,
+            "25": 1234585088.0,
+            "26": 1234585088.0,
+            "27": 1234585088.0,
+            "28": 1234585088.0,
+            "29": 1234585088.0,
+            "30": 1234585088.0,
+            "31": 1234585088.0,
+            "32": 1234585088.0,
+            "33": 1234585088.0,
+            "34": 1234585088.0,
+            "35": 1234585088.0,
+            "36": 1234585088.0,
+            "37": 1234585088.0,
+            "38": 1234585088.0,
+            "39": 1234585088.0,
+            "40": 1234585088.0,
+            "41": 1234585088.0,
+            "42": 1234585088.0,
+            "43": 1234585088.0,
+            "44": 1234585088.0,
+            "45": 1234585088.0,
+            "46": 1234585088.0,
+            "47": 1234585088.0,
+            "48": 1234585088.0,
+            "49": 1234585088.0,
+            "50": 1234585088.0,
+            "51": 1234585088.0,
+            "52": 1234585088.0,
+            "53": 1234585088.0,
+            "54": 1234585088.0,
+            "55": 1234585088.0,
+            "56": 1234585088.0,
+            "57": 1234585088.0,
+            "58": 1234585088.0,
+            "59": 1234585088.0,
+            "60": 1234585088.0,
+            "61": 1234585088.0,
+            "62": 1234585088.0,
+            "63": 1234585088.0,
+            "64": 1234585088.0,
+            "65": 1234585088.0,
+            "66": 1234585088.0,
+            "67": 1234585088.0,
+            "68": 1234585088.0,
+            "69": 1234585088.0,
+            "70": 1234585088.0,
+            "71": 1234585088.0,
+            "72": 1234585088.0,
+            "73": 1234585088.0,
+            "74": 1234585088.0,
+            "75": 1234585088.0,
+            "76": 1234585088.0,
+            "77": 1234585088.0,
+            "78": 1234585088.0,
+            "79": 1234585088.0,
+            "80": 1234585088.0,
+            "81": 1234585088.0,
+            "82": 1234585088.0,
+            "83": 1234585088.0,
+            "84": 1234585088.0,
+            "85": 1234585088.0,
+            "86": 1234585088.0,
+            "87": 1234585088.0,
+            "88": 1234585088.0,
+            "89": 1234585088.0,
+            "90": 1234585088.0,
+            "91": 1234585088.0,
+            "92": 1234585088.0,
+            "93": 1234585088.0,
+            "94": 1234585088.0,
+            "95": 1234585088.0,
+            "96": 1234585088.0,
+            "97": 1234585088.0,
+            "98": 1234585088.0,
+            "99": 1234585088.0,
+            "100": 1234585088.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1984492544.0,
+            "2": 2536167424.0,
+            "3": 2536167424.0,
+            "4": 2536167424.0,
+            "5": 2536167424.0,
+            "6": 2536167424.0,
+            "7": 2536167424.0,
+            "8": 2536167424.0,
+            "9": 2536167424.0,
+            "10": 2536167424.0,
+            "11": 2536167424.0,
+            "12": 2536167424.0,
+            "13": 2536167424.0,
+            "14": 2536167424.0,
+            "15": 2536167424.0,
+            "16": 2536167424.0,
+            "17": 2536167424.0,
+            "18": 2536167424.0,
+            "19": 2536167424.0,
+            "20": 2536167424.0,
+            "21": 2536167424.0,
+            "22": 2536167424.0,
+            "23": 2536167424.0,
+            "24": 2536167424.0,
+            "25": 2536167424.0,
+            "26": 2536167424.0,
+            "27": 2536167424.0,
+            "28": 2536167424.0,
+            "29": 2536167424.0,
+            "30": 2536167424.0,
+            "31": 2536167424.0,
+            "32": 2536167424.0,
+            "33": 2536167424.0,
+            "34": 2536167424.0,
+            "35": 2536167424.0,
+            "36": 2536167424.0,
+            "37": 2536167424.0,
+            "38": 2536167424.0,
+            "39": 2536167424.0,
+            "40": 2536167424.0,
+            "41": 2536167424.0,
+            "42": 2536167424.0,
+            "43": 2536167424.0,
+            "44": 2536167424.0,
+            "45": 2536167424.0,
+            "46": 2536167424.0,
+            "47": 2536167424.0,
+            "48": 2536167424.0,
+            "49": 2536167424.0,
+            "50": 2536167424.0,
+            "51": 2536167424.0,
+            "52": 2536167424.0,
+            "53": 2536167424.0,
+            "54": 2536167424.0,
+            "55": 2536167424.0,
+            "56": 2536167424.0,
+            "57": 2536167424.0,
+            "58": 2536167424.0,
+            "59": 2536167424.0,
+            "60": 2536167424.0,
+            "61": 2536167424.0,
+            "62": 2536167424.0,
+            "63": 2536167424.0,
+            "64": 2536167424.0,
+            "65": 2536167424.0,
+            "66": 2536167424.0,
+            "67": 2536167424.0,
+            "68": 2536167424.0,
+            "69": 2536167424.0,
+            "70": 2536167424.0,
+            "71": 2536167424.0,
+            "72": 2536167424.0,
+            "73": 2536167424.0,
+            "74": 2536167424.0,
+            "75": 2536167424.0,
+            "76": 2536167424.0,
+            "77": 2536167424.0,
+            "78": 2536167424.0,
+            "79": 2536167424.0,
+            "80": 2536167424.0,
+            "81": 2536167424.0,
+            "82": 2536167424.0,
+            "83": 2536167424.0,
+            "84": 2536167424.0,
+            "85": 2536167424.0,
+            "86": 2536167424.0,
+            "87": 2536167424.0,
+            "88": 2536167424.0,
+            "89": 2536167424.0,
+            "90": 2536167424.0,
+            "91": 2536167424.0,
+            "92": 2536167424.0,
+            "93": 2536167424.0,
+            "94": 2536167424.0,
+            "95": 2536167424.0,
+            "96": 2536167424.0,
+            "97": 2536167424.0,
+            "98": 2536167424.0,
+            "99": 2536167424.0,
+            "100": 2536167424.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 3.55988,
+            "2": 0.14641,
+            "3": 0.12458,
+            "4": 0.12359,
+            "5": 0.12323,
+            "6": 0.12296,
+            "7": 0.12273,
+            "8": 0.12322,
+            "9": 0.12253,
+            "10": 0.12257,
+            "11": 0.12186,
+            "12": 0.12242,
+            "13": 0.12302,
+            "14": 0.12233,
+            "15": 0.12201,
+            "16": 0.12138,
+            "17": 0.12155,
+            "18": 0.12113,
+            "19": 0.12159,
+            "20": 0.12132,
+            "21": 0.12202,
+            "22": 0.12206,
+            "23": 0.12186,
+            "24": 0.12276,
+            "25": 0.12238,
+            "26": 0.122,
+            "27": 0.12177,
+            "28": 0.12203,
+            "29": 0.12255,
+            "30": 0.12285,
+            "31": 0.1224,
+            "32": 0.12276,
+            "33": 0.12359,
+            "34": 0.12194,
+            "35": 0.12249,
+            "36": 0.12276,
+            "37": 0.12249,
+            "38": 0.12249,
+            "39": 0.12333,
+            "40": 0.12327,
+            "41": 0.12316,
+            "42": 0.12307,
+            "43": 0.12249,
+            "44": 0.12267,
+            "45": 0.12282,
+            "46": 0.12405,
+            "47": 0.12264,
+            "48": 0.12412,
+            "49": 0.12277,
+            "50": 0.12365,
+            "51": 0.1271,
+            "52": 0.12708,
+            "53": 0.12522,
+            "54": 0.1263,
+            "55": 0.12587,
+            "56": 0.12762,
+            "57": 0.12527,
+            "58": 0.12651,
+            "59": 0.12671,
+            "60": 0.12654,
+            "61": 0.12604,
+            "62": 0.12577,
+            "63": 0.12494,
+            "64": 0.12609,
+            "65": 0.12576,
+            "66": 0.12652,
+            "67": 0.12628,
+            "68": 0.12655,
+            "69": 0.12565,
+            "70": 0.12576,
+            "71": 0.12521,
+            "72": 0.12593,
+            "73": 0.12578,
+            "74": 0.12645,
+            "75": 0.12537,
+            "76": 0.12616,
+            "77": 0.12525,
+            "78": 0.12803,
+            "79": 0.1252,
+            "80": 0.12678,
+            "81": 0.12525,
+            "82": 0.12597,
+            "83": 0.12596,
+            "84": 0.12603,
+            "85": 0.1257,
+            "86": 0.12623,
+            "87": 0.12511,
+            "88": 0.12609,
+            "89": 0.12568,
+            "90": 0.12585,
+            "91": 0.12495,
+            "92": 0.12654,
+            "93": 0.12549,
+            "94": 0.12609,
+            "95": 0.12518,
+            "96": 0.12593,
+            "97": 0.12598,
+            "98": 0.12611,
+            "99": 0.12441,
+            "100": 0.12715
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_lts_dgx_a100.json
index f24760a1ccb..bd7ca46935f 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_lts_dgx_a100.json
@@ -1 +1,537 @@
-{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.8401, "5": 10.84032, "10": 10.81341, "15": 10.80278, "20": 10.70496, "25": 10.53846, "30": 10.35517, "35": 10.27147, "40": 10.08044, "45": 9.82292, "50": 9.90113, "55": 9.86422, "60": 9.48029, "65": 8.93749, "70": 9.71025, "75": 9.40879, "80": 9.39077, "85": 9.59743, "90": 9.80386, "95": 9.50565, "100": 9.38812}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1670.0, "5": 1970.0, "10": 1436.0, "15": 1918.0, "20": 1786.0, "25": 1610.0, "30": 2039.0, "35": 2001.0, "40": 2321.0, "45": 2205.0, "50": 2365.0, "55": 2489.0, "60": 2508.0, "65": 2719.0, "70": 3241.0, "75": 2643.0, "80": 3368.0, "85": 3336.0, "90": 2961.0, "95": 3533.0, "100": 3432.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1230390272.0, "5": 1230390272.0, "10": 1230390272.0, "15": 1230390272.0, "20": 1230390272.0, "25": 1230390272.0, "30": 1230390272.0, "35": 1230390272.0, "40": 1230390272.0, "45": 1230390272.0, "50": 1230390272.0, "55": 1230390272.0, "60": 1230390272.0, "65": 1230390272.0, "70": 1230390272.0, "75": 1230390272.0, "80": 1230390272.0, "85": 1230390272.0, "90": 1230390272.0, "95": 1230390272.0, "100": 1230390272.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1984492032.0, "5": 2531972608.0, "10": 2531972608.0, "15": 2531972608.0, "20": 2531972608.0, "25": 2531972608.0, "30": 2531972608.0, "35": 2531972608.0, "40": 2531972608.0, "45": 2531972608.0, "50": 2531972608.0, "55": 2531972608.0, "60": 2531972608.0, "65": 2531972608.0, "70": 2531972608.0, "75": 2531972608.0, "80": 2531972608.0, "85": 2531972608.0, "90": 2531972608.0, "95": 2531972608.0, "100": 2531972608.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 6.64146, "5": 0.13017, "10": 0.12892, "15": 0.12866, "20": 0.12752, "25": 0.12949, "30": 0.12766, "35": 0.12823, "40": 0.12721, "45": 0.12802, "50": 0.12653, "55": 0.13387, "60": 0.12431, "65": 0.12501, "70": 0.12535, "75": 0.12473, "80": 0.12431, "85": 0.12451, "90": 0.12436, "95": 0.12406, "100": 0.12423}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.84092,
+            "2": 10.83661,
+            "3": 10.83233,
+            "4": 10.81819,
+            "5": 10.84059,
+            "6": 10.86985,
+            "7": 10.83324,
+            "8": 10.83877,
+            "9": 10.84355,
+            "10": 10.80969,
+            "11": 10.85186,
+            "12": 10.84449,
+            "13": 10.86322,
+            "14": 10.86353,
+            "15": 10.79981,
+            "16": 10.79262,
+            "17": 10.77477,
+            "18": 10.80157,
+            "19": 10.79148,
+            "20": 10.70508,
+            "21": 10.68176,
+            "22": 10.56548,
+            "23": 10.70147,
+            "24": 10.57889,
+            "25": 10.53597,
+            "26": 10.60742,
+            "27": 10.59423,
+            "28": 10.56119,
+            "29": 10.57569,
+            "30": 10.35474,
+            "31": 10.12616,
+            "32": 10.46566,
+            "33": 10.45233,
+            "34": 10.22493,
+            "35": 10.27091,
+            "36": 10.22168,
+            "37": 10.33936,
+            "38": 10.18641,
+            "39": 10.39431,
+            "40": 10.07792,
+            "41": 10.13872,
+            "42": 10.20182,
+            "43": 9.83818,
+            "44": 9.94274,
+            "45": 9.82303,
+            "46": 9.82185,
+            "47": 10.13443,
+            "48": 9.84098,
+            "49": 9.52095,
+            "50": 9.90109,
+            "51": 9.83457,
+            "52": 9.73232,
+            "53": 10.0488,
+            "54": 9.93895,
+            "55": 9.863,
+            "56": 9.613,
+            "57": 9.46966,
+            "58": 9.81135,
+            "59": 9.57107,
+            "60": 9.48155,
+            "61": 9.6788,
+            "62": 9.96581,
+            "63": 9.35273,
+            "64": 9.75648,
+            "65": 8.93771,
+            "66": 9.68153,
+            "67": 9.35671,
+            "68": 9.76807,
+            "69": 9.7739,
+            "70": 9.71016,
+            "71": 9.60009,
+            "72": 9.56793,
+            "73": 9.4774,
+            "74": 8.93177,
+            "75": 9.4072,
+            "76": 9.06849,
+            "77": 10.0464,
+            "78": 9.70988,
+            "79": 9.35733,
+            "80": 9.38975,
+            "81": 9.4662,
+            "82": 9.68058,
+            "83": 9.2914,
+            "84": 9.40191,
+            "85": 9.59735,
+            "86": 9.06209,
+            "87": 9.57922,
+            "88": 9.73259,
+            "89": 9.58836,
+            "90": 9.80354,
+            "91": 9.31991,
+            "92": 9.35011,
+            "93": 9.06382,
+            "94": 8.81909,
+            "95": 9.50568,
+            "96": 9.51071,
+            "97": 9.29241,
+            "98": 9.65578,
+            "99": 8.87401,
+            "100": 9.38833
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1628.0,
+            "2": 1744.0,
+            "3": 1662.0,
+            "4": 1801.0,
+            "5": 1937.0,
+            "6": 1866.0,
+            "7": 1855.0,
+            "8": 1584.0,
+            "9": 1901.0,
+            "10": 1325.0,
+            "11": 1966.0,
+            "12": 1767.0,
+            "13": 1876.0,
+            "14": 1881.0,
+            "15": 1928.0,
+            "16": 1875.0,
+            "17": 1790.0,
+            "18": 1736.0,
+            "19": 1804.0,
+            "20": 1690.0,
+            "21": 2008.0,
+            "22": 1765.0,
+            "23": 2073.0,
+            "24": 1618.0,
+            "25": 1720.0,
+            "26": 1807.0,
+            "27": 1861.0,
+            "28": 2026.0,
+            "29": 1982.0,
+            "30": 1981.0,
+            "31": 1688.0,
+            "32": 1913.0,
+            "33": 2123.0,
+            "34": 1893.0,
+            "35": 2007.0,
+            "36": 1987.0,
+            "37": 2334.0,
+            "38": 2223.0,
+            "39": 2417.0,
+            "40": 2370.0,
+            "41": 2352.0,
+            "42": 2269.0,
+            "43": 1967.0,
+            "44": 2183.0,
+            "45": 2150.0,
+            "46": 2350.0,
+            "47": 2555.0,
+            "48": 2463.0,
+            "49": 2326.0,
+            "50": 2270.0,
+            "51": 2508.0,
+            "52": 2495.0,
+            "53": 2856.0,
+            "54": 2692.0,
+            "55": 2482.0,
+            "56": 2614.0,
+            "57": 2283.0,
+            "58": 2894.0,
+            "59": 2659.0,
+            "60": 2561.0,
+            "61": 3006.0,
+            "62": 2671.0,
+            "63": 2488.0,
+            "64": 3092.0,
+            "65": 2622.0,
+            "66": 3108.0,
+            "67": 2741.0,
+            "68": 2942.0,
+            "69": 2983.0,
+            "70": 3347.0,
+            "71": 3034.0,
+            "72": 2438.0,
+            "73": 3075.0,
+            "74": 1931.0,
+            "75": 2722.0,
+            "76": 2960.0,
+            "77": 3387.0,
+            "78": 3268.0,
+            "79": 3079.0,
+            "80": 3404.0,
+            "81": 3674.0,
+            "82": 3192.0,
+            "83": 2791.0,
+            "84": 3224.0,
+            "85": 3237.0,
+            "86": 2646.0,
+            "87": 3840.0,
+            "88": 3114.0,
+            "89": 3410.0,
+            "90": 3184.0,
+            "91": 3073.0,
+            "92": 3396.0,
+            "93": 2711.0,
+            "94": 3530.0,
+            "95": 3387.0,
+            "96": 3530.0,
+            "97": 3277.0,
+            "98": 3775.0,
+            "99": 3421.0,
+            "100": 3350.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1234585088.0,
+            "2": 1234585088.0,
+            "3": 1234585088.0,
+            "4": 1234585088.0,
+            "5": 1234585088.0,
+            "6": 1234585088.0,
+            "7": 1234585088.0,
+            "8": 1234585088.0,
+            "9": 1234585088.0,
+            "10": 1234585088.0,
+            "11": 1234585088.0,
+            "12": 1234585088.0,
+            "13": 1234585088.0,
+            "14": 1234585088.0,
+            "15": 1234585088.0,
+            "16": 1234585088.0,
+            "17": 1234585088.0,
+            "18": 1234585088.0,
+            "19": 1234585088.0,
+            "20": 1234585088.0,
+            "21": 1234585088.0,
+            "22": 1234585088.0,
+            "23": 1234585088.0,
+            "24": 1234585088.0,
+            "25": 1234585088.0,
+            "26": 1234585088.0,
+            "27": 1234585088.0,
+            "28": 1234585088.0,
+            "29": 1234585088.0,
+            "30": 1234585088.0,
+            "31": 1234585088.0,
+            "32": 1234585088.0,
+            "33": 1234585088.0,
+            "34": 1234585088.0,
+            "35": 1234585088.0,
+            "36": 1234585088.0,
+            "37": 1234585088.0,
+            "38": 1234585088.0,
+            "39": 1234585088.0,
+            "40": 1234585088.0,
+            "41": 1234585088.0,
+            "42": 1234585088.0,
+            "43": 1234585088.0,
+            "44": 1234585088.0,
+            "45": 1234585088.0,
+            "46": 1234585088.0,
+            "47": 1234585088.0,
+            "48": 1234585088.0,
+            "49": 1234585088.0,
+            "50": 1234585088.0,
+            "51": 1234585088.0,
+            "52": 1234585088.0,
+            "53": 1234585088.0,
+            "54": 1234585088.0,
+            "55": 1234585088.0,
+            "56": 1234585088.0,
+            "57": 1234585088.0,
+            "58": 1234585088.0,
+            "59": 1234585088.0,
+            "60": 1234585088.0,
+            "61": 1234585088.0,
+            "62": 1234585088.0,
+            "63": 1234585088.0,
+            "64": 1234585088.0,
+            "65": 1234585088.0,
+            "66": 1234585088.0,
+            "67": 1234585088.0,
+            "68": 1234585088.0,
+            "69": 1234585088.0,
+            "70": 1234585088.0,
+            "71": 1234585088.0,
+            "72": 1234585088.0,
+            "73": 1234585088.0,
+            "74": 1234585088.0,
+            "75": 1234585088.0,
+            "76": 1234585088.0,
+            "77": 1234585088.0,
+            "78": 1234585088.0,
+            "79": 1234585088.0,
+            "80": 1234585088.0,
+            "81": 1234585088.0,
+            "82": 1234585088.0,
+            "83": 1234585088.0,
+            "84": 1234585088.0,
+            "85": 1234585088.0,
+            "86": 1234585088.0,
+            "87": 1234585088.0,
+            "88": 1234585088.0,
+            "89": 1234585088.0,
+            "90": 1234585088.0,
+            "91": 1234585088.0,
+            "92": 1234585088.0,
+            "93": 1234585088.0,
+            "94": 1234585088.0,
+            "95": 1234585088.0,
+            "96": 1234585088.0,
+            "97": 1234585088.0,
+            "98": 1234585088.0,
+            "99": 1234585088.0,
+            "100": 1234585088.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1984492544.0,
+            "2": 2536167424.0,
+            "3": 2536167424.0,
+            "4": 2536167424.0,
+            "5": 2536167424.0,
+            "6": 2536167424.0,
+            "7": 2536167424.0,
+            "8": 2536167424.0,
+            "9": 2536167424.0,
+            "10": 2536167424.0,
+            "11": 2536167424.0,
+            "12": 2536167424.0,
+            "13": 2536167424.0,
+            "14": 2536167424.0,
+            "15": 2536167424.0,
+            "16": 2536167424.0,
+            "17": 2536167424.0,
+            "18": 2536167424.0,
+            "19": 2536167424.0,
+            "20": 2536167424.0,
+            "21": 2536167424.0,
+            "22": 2536167424.0,
+            "23": 2536167424.0,
+            "24": 2536167424.0,
+            "25": 2536167424.0,
+            "26": 2536167424.0,
+            "27": 2536167424.0,
+            "28": 2536167424.0,
+            "29": 2536167424.0,
+            "30": 2536167424.0,
+            "31": 2536167424.0,
+            "32": 2536167424.0,
+            "33": 2536167424.0,
+            "34": 2536167424.0,
+            "35": 2536167424.0,
+            "36": 2536167424.0,
+            "37": 2536167424.0,
+            "38": 2536167424.0,
+            "39": 2536167424.0,
+            "40": 2536167424.0,
+            "41": 2536167424.0,
+            "42": 2536167424.0,
+            "43": 2536167424.0,
+            "44": 2536167424.0,
+            "45": 2536167424.0,
+            "46": 2536167424.0,
+            "47": 2536167424.0,
+            "48": 2536167424.0,
+            "49": 2536167424.0,
+            "50": 2536167424.0,
+            "51": 2536167424.0,
+            "52": 2536167424.0,
+            "53": 2536167424.0,
+            "54": 2536167424.0,
+            "55": 2536167424.0,
+            "56": 2536167424.0,
+            "57": 2536167424.0,
+            "58": 2536167424.0,
+            "59": 2536167424.0,
+            "60": 2536167424.0,
+            "61": 2536167424.0,
+            "62": 2536167424.0,
+            "63": 2536167424.0,
+            "64": 2536167424.0,
+            "65": 2536167424.0,
+            "66": 2536167424.0,
+            "67": 2536167424.0,
+            "68": 2536167424.0,
+            "69": 2536167424.0,
+            "70": 2536167424.0,
+            "71": 2536167424.0,
+            "72": 2536167424.0,
+            "73": 2536167424.0,
+            "74": 2536167424.0,
+            "75": 2536167424.0,
+            "76": 2536167424.0,
+            "77": 2536167424.0,
+            "78": 2536167424.0,
+            "79": 2536167424.0,
+            "80": 2536167424.0,
+            "81": 2536167424.0,
+            "82": 2536167424.0,
+            "83": 2536167424.0,
+            "84": 2536167424.0,
+            "85": 2536167424.0,
+            "86": 2536167424.0,
+            "87": 2536167424.0,
+            "88": 2536167424.0,
+            "89": 2536167424.0,
+            "90": 2536167424.0,
+            "91": 2536167424.0,
+            "92": 2536167424.0,
+            "93": 2536167424.0,
+            "94": 2536167424.0,
+            "95": 2536167424.0,
+            "96": 2536167424.0,
+            "97": 2536167424.0,
+            "98": 2536167424.0,
+            "99": 2536167424.0,
+            "100": 2536167424.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 3.43734,
+            "2": 0.14648,
+            "3": 0.12542,
+            "4": 0.12603,
+            "5": 0.12388,
+            "6": 0.12524,
+            "7": 0.12279,
+            "8": 0.1239,
+            "9": 0.12244,
+            "10": 0.12336,
+            "11": 0.12345,
+            "12": 0.12322,
+            "13": 0.12318,
+            "14": 0.12381,
+            "15": 0.12343,
+            "16": 0.12319,
+            "17": 0.12276,
+            "18": 0.12324,
+            "19": 0.12355,
+            "20": 0.12315,
+            "21": 0.12294,
+            "22": 0.12326,
+            "23": 0.12303,
+            "24": 0.12294,
+            "25": 0.12286,
+            "26": 0.12388,
+            "27": 0.12341,
+            "28": 0.12301,
+            "29": 0.12267,
+            "30": 0.12315,
+            "31": 0.12371,
+            "32": 0.12359,
+            "33": 0.12298,
+            "34": 0.12283,
+            "35": 0.12266,
+            "36": 0.12356,
+            "37": 0.12377,
+            "38": 0.12388,
+            "39": 0.12525,
+            "40": 0.12501,
+            "41": 0.12357,
+            "42": 0.12376,
+            "43": 0.12304,
+            "44": 0.12342,
+            "45": 0.12284,
+            "46": 0.12332,
+            "47": 0.12324,
+            "48": 0.12279,
+            "49": 0.12276,
+            "50": 0.12391,
+            "51": 0.12862,
+            "52": 0.12214,
+            "53": 0.12006,
+            "54": 0.12101,
+            "55": 0.12062,
+            "56": 0.12088,
+            "57": 0.121,
+            "58": 0.12034,
+            "59": 0.12049,
+            "60": 0.12066,
+            "61": 0.11974,
+            "62": 0.11979,
+            "63": 0.12196,
+            "64": 0.12149,
+            "65": 0.12119,
+            "66": 0.12067,
+            "67": 0.12079,
+            "68": 0.12104,
+            "69": 0.12025,
+            "70": 0.12059,
+            "71": 0.12069,
+            "72": 0.12102,
+            "73": 0.12115,
+            "74": 0.1208,
+            "75": 0.12051,
+            "76": 0.12011,
+            "77": 0.11958,
+            "78": 0.12095,
+            "79": 0.11983,
+            "80": 0.12106,
+            "81": 0.1203,
+            "82": 0.12062,
+            "83": 0.12021,
+            "84": 0.12036,
+            "85": 0.12053,
+            "86": 0.12119,
+            "87": 0.12057,
+            "88": 0.12092,
+            "89": 0.12271,
+            "90": 0.12095,
+            "91": 0.1204,
+            "92": 0.12052,
+            "93": 0.12075,
+            "94": 0.12025,
+            "95": 0.12129,
+            "96": 0.12087,
+            "97": 0.12098,
+            "98": 0.12136,
+            "99": 0.12046,
+            "100": 0.12064
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_lts.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_lts.json
new file mode 100644
index 00000000000..02ac3f23ca4
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_lts.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.84865,
+            "2": 10.84835,
+            "3": 10.84054,
+            "4": 10.82381,
+            "5": 10.86283,
+            "6": 10.87554,
+            "7": 10.87089,
+            "8": 10.85554,
+            "9": 10.86811,
+            "10": 10.82287,
+            "11": 10.90334,
+            "12": 10.87985,
+            "13": 10.8831,
+            "14": 10.89653,
+            "15": 10.81406,
+            "16": 10.81973,
+            "17": 10.79918,
+            "18": 10.82428,
+            "19": 10.8195,
+            "20": 10.71958,
+            "21": 10.67864,
+            "22": 10.53625,
+            "23": 10.72125,
+            "24": 10.57435,
+            "25": 10.52997,
+            "26": 10.60096,
+            "27": 10.61502,
+            "28": 10.57231,
+            "29": 10.58614,
+            "30": 10.33749,
+            "31": 10.06516,
+            "32": 10.46436,
+            "33": 10.43612,
+            "34": 10.17278,
+            "35": 10.24173,
+            "36": 10.19042,
+            "37": 10.32282,
+            "38": 10.14881,
+            "39": 10.37709,
+            "40": 10.05124,
+            "41": 10.11355,
+            "42": 10.17253,
+            "43": 9.76298,
+            "44": 9.89293,
+            "45": 9.7664,
+            "46": 9.7601,
+            "47": 10.09424,
+            "48": 9.78753,
+            "49": 9.454,
+            "50": 9.8548,
+            "51": 9.79157,
+            "52": 9.68731,
+            "53": 10.02181,
+            "54": 9.90398,
+            "55": 9.82389,
+            "56": 9.57081,
+            "57": 9.40818,
+            "58": 9.77678,
+            "59": 9.52729,
+            "60": 9.44284,
+            "61": 9.64071,
+            "62": 9.94046,
+            "63": 9.31099,
+            "64": 9.72506,
+            "65": 8.8916,
+            "66": 9.6525,
+            "67": 9.31718,
+            "68": 9.73957,
+            "69": 9.74304,
+            "70": 9.67942,
+            "71": 9.56228,
+            "72": 9.53149,
+            "73": 9.44531,
+            "74": 8.88431,
+            "75": 9.3677,
+            "76": 9.02482,
+            "77": 10.01647,
+            "78": 9.6813,
+            "79": 9.32719,
+            "80": 9.3577,
+            "81": 9.43335,
+            "82": 9.64804,
+            "83": 9.25573,
+            "84": 9.36738,
+            "85": 9.56091,
+            "86": 9.03567,
+            "87": 9.54622,
+            "88": 9.70041,
+            "89": 9.54992,
+            "90": 9.77126,
+            "91": 9.28801,
+            "92": 9.31055,
+            "93": 9.03195,
+            "94": 8.78121,
+            "95": 9.48115,
+            "96": 9.4759,
+            "97": 9.2489,
+            "98": 9.61705,
+            "99": 8.8368,
+            "100": 9.35043
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1658.0,
+            "2": 1892.0,
+            "3": 1844.0,
+            "4": 1851.0,
+            "5": 1895.0,
+            "6": 2058.0,
+            "7": 1902.0,
+            "8": 1808.0,
+            "9": 1894.0,
+            "10": 1583.0,
+            "11": 2044.0,
+            "12": 1889.0,
+            "13": 2101.0,
+            "14": 2062.0,
+            "15": 1981.0,
+            "16": 1922.0,
+            "17": 1845.0,
+            "18": 1842.0,
+            "19": 1890.0,
+            "20": 1769.0,
+            "21": 2048.0,
+            "22": 1800.0,
+            "23": 2134.0,
+            "24": 1822.0,
+            "25": 1861.0,
+            "26": 1864.0,
+            "27": 1886.0,
+            "28": 2050.0,
+            "29": 2009.0,
+            "30": 2118.0,
+            "31": 1688.0,
+            "32": 2112.0,
+            "33": 2153.0,
+            "34": 1963.0,
+            "35": 2014.0,
+            "36": 2023.0,
+            "37": 2222.0,
+            "38": 2239.0,
+            "39": 2367.0,
+            "40": 2161.0,
+            "41": 2422.0,
+            "42": 2240.0,
+            "43": 2116.0,
+            "44": 2343.0,
+            "45": 2127.0,
+            "46": 2189.0,
+            "47": 2411.0,
+            "48": 2347.0,
+            "49": 2271.0,
+            "50": 2345.0,
+            "51": 2482.0,
+            "52": 2570.0,
+            "53": 2835.0,
+            "54": 2589.0,
+            "55": 2450.0,
+            "56": 2744.0,
+            "57": 2429.0,
+            "58": 2684.0,
+            "59": 2748.0,
+            "60": 2464.0,
+            "61": 2995.0,
+            "62": 2518.0,
+            "63": 2570.0,
+            "64": 2843.0,
+            "65": 2648.0,
+            "66": 2842.0,
+            "67": 2954.0,
+            "68": 2833.0,
+            "69": 3027.0,
+            "70": 2993.0,
+            "71": 3010.0,
+            "72": 2597.0,
+            "73": 3002.0,
+            "74": 2325.0,
+            "75": 2882.0,
+            "76": 3143.0,
+            "77": 3062.0,
+            "78": 3272.0,
+            "79": 3303.0,
+            "80": 3280.0,
+            "81": 3517.0,
+            "82": 3283.0,
+            "83": 2834.0,
+            "84": 3365.0,
+            "85": 3288.0,
+            "86": 2562.0,
+            "87": 3493.0,
+            "88": 3388.0,
+            "89": 3102.0,
+            "90": 3230.0,
+            "91": 3154.0,
+            "92": 3263.0,
+            "93": 2967.0,
+            "94": 3520.0,
+            "95": 3175.0,
+            "96": 3317.0,
+            "97": 2999.0,
+            "98": 3549.0,
+            "99": 3248.0,
+            "100": 3227.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 892293120.0,
+            "2": 892293120.0,
+            "3": 892293120.0,
+            "4": 892293120.0,
+            "5": 892293120.0,
+            "6": 892293120.0,
+            "7": 892293120.0,
+            "8": 892293120.0,
+            "9": 892293120.0,
+            "10": 892293120.0,
+            "11": 892293120.0,
+            "12": 892293120.0,
+            "13": 892293120.0,
+            "14": 892293120.0,
+            "15": 892293120.0,
+            "16": 892293120.0,
+            "17": 892293120.0,
+            "18": 892293120.0,
+            "19": 892293120.0,
+            "20": 892293120.0,
+            "21": 892293120.0,
+            "22": 892293120.0,
+            "23": 892293120.0,
+            "24": 892293120.0,
+            "25": 892293120.0,
+            "26": 892293120.0,
+            "27": 892293120.0,
+            "28": 892293120.0,
+            "29": 892293120.0,
+            "30": 892293120.0,
+            "31": 892293120.0,
+            "32": 892293120.0,
+            "33": 892293120.0,
+            "34": 892293120.0,
+            "35": 892293120.0,
+            "36": 892293120.0,
+            "37": 892293120.0,
+            "38": 892293120.0,
+            "39": 892293120.0,
+            "40": 892293120.0,
+            "41": 892293120.0,
+            "42": 892293120.0,
+            "43": 892293120.0,
+            "44": 892293120.0,
+            "45": 892293120.0,
+            "46": 892293120.0,
+            "47": 892293120.0,
+            "48": 892293120.0,
+            "49": 892293120.0,
+            "50": 892293120.0,
+            "51": 892293120.0,
+            "52": 892293120.0,
+            "53": 892293120.0,
+            "54": 892293120.0,
+            "55": 892293120.0,
+            "56": 892293120.0,
+            "57": 892293120.0,
+            "58": 892293120.0,
+            "59": 892293120.0,
+            "60": 892293120.0,
+            "61": 892293120.0,
+            "62": 892293120.0,
+            "63": 892293120.0,
+            "64": 892293120.0,
+            "65": 892293120.0,
+            "66": 892293120.0,
+            "67": 892293120.0,
+            "68": 892293120.0,
+            "69": 892293120.0,
+            "70": 892293120.0,
+            "71": 892293120.0,
+            "72": 892293120.0,
+            "73": 892293120.0,
+            "74": 892293120.0,
+            "75": 892293120.0,
+            "76": 892293120.0,
+            "77": 892293120.0,
+            "78": 892293120.0,
+            "79": 892293120.0,
+            "80": 892293120.0,
+            "81": 892293120.0,
+            "82": 892293120.0,
+            "83": 892293120.0,
+            "84": 892293120.0,
+            "85": 892293120.0,
+            "86": 892293120.0,
+            "87": 892293120.0,
+            "88": 892293120.0,
+            "89": 892293120.0,
+            "90": 892293120.0,
+            "91": 892293120.0,
+            "92": 892293120.0,
+            "93": 892293120.0,
+            "94": 892293120.0,
+            "95": 892293120.0,
+            "96": 892293120.0,
+            "97": 892293120.0,
+            "98": 892293120.0,
+            "99": 892293120.0,
+            "100": 892293120.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2236674048.0,
+            "2": 2599285760.0,
+            "3": 2599285760.0,
+            "4": 2599285760.0,
+            "5": 2599285760.0,
+            "6": 2599285760.0,
+            "7": 2599285760.0,
+            "8": 2599285760.0,
+            "9": 2599285760.0,
+            "10": 2599285760.0,
+            "11": 2599285760.0,
+            "12": 2599285760.0,
+            "13": 2599285760.0,
+            "14": 2599285760.0,
+            "15": 2599285760.0,
+            "16": 2599285760.0,
+            "17": 2599285760.0,
+            "18": 2599285760.0,
+            "19": 2599285760.0,
+            "20": 2599285760.0,
+            "21": 2599285760.0,
+            "22": 2599285760.0,
+            "23": 2599285760.0,
+            "24": 2599285760.0,
+            "25": 2599285760.0,
+            "26": 2599285760.0,
+            "27": 2599285760.0,
+            "28": 2599285760.0,
+            "29": 2599285760.0,
+            "30": 2599285760.0,
+            "31": 2599285760.0,
+            "32": 2599285760.0,
+            "33": 2599285760.0,
+            "34": 2599285760.0,
+            "35": 2599285760.0,
+            "36": 2599285760.0,
+            "37": 2599285760.0,
+            "38": 2599285760.0,
+            "39": 2599285760.0,
+            "40": 2599285760.0,
+            "41": 2599285760.0,
+            "42": 2599285760.0,
+            "43": 2599285760.0,
+            "44": 2599285760.0,
+            "45": 2599285760.0,
+            "46": 2599285760.0,
+            "47": 2599285760.0,
+            "48": 2599285760.0,
+            "49": 2599285760.0,
+            "50": 2599285760.0,
+            "51": 2599285760.0,
+            "52": 2599285760.0,
+            "53": 2599285760.0,
+            "54": 2599285760.0,
+            "55": 2599285760.0,
+            "56": 2599285760.0,
+            "57": 2599285760.0,
+            "58": 2599285760.0,
+            "59": 2599285760.0,
+            "60": 2599285760.0,
+            "61": 2599285760.0,
+            "62": 2599285760.0,
+            "63": 2599285760.0,
+            "64": 2599285760.0,
+            "65": 2599285760.0,
+            "66": 2599285760.0,
+            "67": 2599285760.0,
+            "68": 2599285760.0,
+            "69": 2599285760.0,
+            "70": 2599285760.0,
+            "71": 2599285760.0,
+            "72": 2599285760.0,
+            "73": 2599285760.0,
+            "74": 2599285760.0,
+            "75": 2599285760.0,
+            "76": 2599285760.0,
+            "77": 2599285760.0,
+            "78": 2599285760.0,
+            "79": 2599285760.0,
+            "80": 2599285760.0,
+            "81": 2599285760.0,
+            "82": 2599285760.0,
+            "83": 2599285760.0,
+            "84": 2599285760.0,
+            "85": 2599285760.0,
+            "86": 2599285760.0,
+            "87": 2599285760.0,
+            "88": 2599285760.0,
+            "89": 2599285760.0,
+            "90": 2599285760.0,
+            "91": 2599285760.0,
+            "92": 2599285760.0,
+            "93": 2599285760.0,
+            "94": 2599285760.0,
+            "95": 2599285760.0,
+            "96": 2599285760.0,
+            "97": 2599285760.0,
+            "98": 2599285760.0,
+            "99": 2599285760.0,
+            "100": 2599285760.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 5.33769,
+            "2": 0.18532,
+            "3": 0.14816,
+            "4": 0.14635,
+            "5": 0.14643,
+            "6": 0.14673,
+            "7": 0.1465,
+            "8": 0.14731,
+            "9": 0.14559,
+            "10": 0.1459,
+            "11": 0.14535,
+            "12": 0.14613,
+            "13": 0.14618,
+            "14": 0.14642,
+            "15": 0.1462,
+            "16": 0.14548,
+            "17": 0.14502,
+            "18": 0.1449,
+            "19": 0.145,
+            "20": 0.14696,
+            "21": 0.14496,
+            "22": 0.14536,
+            "23": 0.14489,
+            "24": 0.14464,
+            "25": 0.14477,
+            "26": 0.14552,
+            "27": 0.14534,
+            "28": 0.14506,
+            "29": 0.14487,
+            "30": 0.14509,
+            "31": 0.14642,
+            "32": 0.14499,
+            "33": 0.14538,
+            "34": 0.14461,
+            "35": 0.14541,
+            "36": 0.14464,
+            "37": 0.14539,
+            "38": 0.14569,
+            "39": 0.14515,
+            "40": 0.14583,
+            "41": 0.14836,
+            "42": 0.14589,
+            "43": 0.14625,
+            "44": 0.14559,
+            "45": 0.14588,
+            "46": 0.14644,
+            "47": 0.146,
+            "48": 0.1459,
+            "49": 0.14609,
+            "50": 0.14597,
+            "51": 0.15206,
+            "52": 0.1459,
+            "53": 0.1452,
+            "54": 0.14745,
+            "55": 0.14578,
+            "56": 0.14459,
+            "57": 0.14524,
+            "58": 0.14545,
+            "59": 0.14527,
+            "60": 0.14448,
+            "61": 0.14539,
+            "62": 0.14462,
+            "63": 0.14474,
+            "64": 0.1447,
+            "65": 0.14564,
+            "66": 0.14463,
+            "67": 0.14466,
+            "68": 0.14483,
+            "69": 0.14562,
+            "70": 0.1456,
+            "71": 0.14516,
+            "72": 0.14481,
+            "73": 0.14539,
+            "74": 0.14568,
+            "75": 0.14464,
+            "76": 0.14465,
+            "77": 0.14427,
+            "78": 0.14541,
+            "79": 0.1445,
+            "80": 0.14535,
+            "81": 0.14526,
+            "82": 0.14617,
+            "83": 0.14445,
+            "84": 0.14483,
+            "85": 0.14457,
+            "86": 0.14459,
+            "87": 0.14462,
+            "88": 0.14433,
+            "89": 0.14514,
+            "90": 0.14416,
+            "91": 0.14667,
+            "92": 0.14432,
+            "93": 0.14551,
+            "94": 0.14453,
+            "95": 0.14488,
+            "96": 0.14441,
+            "97": 0.14545,
+            "98": 0.14459,
+            "99": 0.14481,
+            "100": 0.14918
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_lts_dgx_a100.json
index d30c8d5b5e6..48aee8d379f 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_lts_dgx_a100.json
@@ -2,141 +2,536 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 10.84474,
-            "5": 10.86418,
-            "10": 10.82155,
-            "15": 10.81195,
-            "20": 10.71872,
-            "25": 10.53036,
-            "30": 10.3358,
-            "35": 10.24081,
-            "40": 10.05008,
-            "45": 9.76762,
-            "50": 9.85505,
-            "55": 9.82465,
-            "60": 9.44306,
-            "65": 8.89104,
-            "70": 9.67902,
-            "75": 9.36836,
-            "80": 9.35799,
-            "85": 9.56032,
-            "90": 9.77055,
-            "95": 9.48101,
-            "100": 9.34997
+            "1": 10.84865,
+            "2": 10.84835,
+            "3": 10.84054,
+            "4": 10.82381,
+            "5": 10.86283,
+            "6": 10.87554,
+            "7": 10.87089,
+            "8": 10.85554,
+            "9": 10.86811,
+            "10": 10.82287,
+            "11": 10.90334,
+            "12": 10.87985,
+            "13": 10.8831,
+            "14": 10.89653,
+            "15": 10.81406,
+            "16": 10.81973,
+            "17": 10.79918,
+            "18": 10.82428,
+            "19": 10.8195,
+            "20": 10.71958,
+            "21": 10.67864,
+            "22": 10.53625,
+            "23": 10.72125,
+            "24": 10.57435,
+            "25": 10.52997,
+            "26": 10.60096,
+            "27": 10.61502,
+            "28": 10.57231,
+            "29": 10.58614,
+            "30": 10.33749,
+            "31": 10.06516,
+            "32": 10.46436,
+            "33": 10.43612,
+            "34": 10.17278,
+            "35": 10.24173,
+            "36": 10.19042,
+            "37": 10.32282,
+            "38": 10.14881,
+            "39": 10.37709,
+            "40": 10.05124,
+            "41": 10.11355,
+            "42": 10.17253,
+            "43": 9.76298,
+            "44": 9.89293,
+            "45": 9.7664,
+            "46": 9.7601,
+            "47": 10.09424,
+            "48": 9.78753,
+            "49": 9.454,
+            "50": 9.8548,
+            "51": 9.79157,
+            "52": 9.68731,
+            "53": 10.02181,
+            "54": 9.90398,
+            "55": 9.82389,
+            "56": 9.57081,
+            "57": 9.40818,
+            "58": 9.77678,
+            "59": 9.52729,
+            "60": 9.44284,
+            "61": 9.64071,
+            "62": 9.94046,
+            "63": 9.31099,
+            "64": 9.72506,
+            "65": 8.8916,
+            "66": 9.6525,
+            "67": 9.31718,
+            "68": 9.73957,
+            "69": 9.74304,
+            "70": 9.67942,
+            "71": 9.56228,
+            "72": 9.53149,
+            "73": 9.44531,
+            "74": 8.88431,
+            "75": 9.3677,
+            "76": 9.02482,
+            "77": 10.01647,
+            "78": 9.6813,
+            "79": 9.32719,
+            "80": 9.3577,
+            "81": 9.43335,
+            "82": 9.64804,
+            "83": 9.25573,
+            "84": 9.36738,
+            "85": 9.56091,
+            "86": 9.03567,
+            "87": 9.54622,
+            "88": 9.70041,
+            "89": 9.54992,
+            "90": 9.77126,
+            "91": 9.28801,
+            "92": 9.31055,
+            "93": 9.03195,
+            "94": 8.78121,
+            "95": 9.48115,
+            "96": 9.4759,
+            "97": 9.2489,
+            "98": 9.61705,
+            "99": 8.8368,
+            "100": 9.35043
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 1776.0,
-            "5": 2128.0,
-            "10": 1615.0,
-            "15": 2021.0,
-            "20": 1775.0,
-            "25": 1916.0,
-            "30": 2029.0,
-            "35": 2107.0,
-            "40": 2174.0,
-            "45": 2110.0,
-            "50": 2363.0,
-            "55": 2460.0,
-            "60": 2462.0,
-            "65": 2724.0,
-            "70": 2952.0,
-            "75": 2823.0,
-            "80": 3222.0,
-            "85": 3314.0,
-            "90": 3087.0,
-            "95": 3146.0,
-            "100": 3331.0
+            "1": 1658.0,
+            "2": 1892.0,
+            "3": 1844.0,
+            "4": 1851.0,
+            "5": 1895.0,
+            "6": 2058.0,
+            "7": 1902.0,
+            "8": 1808.0,
+            "9": 1894.0,
+            "10": 1583.0,
+            "11": 2044.0,
+            "12": 1889.0,
+            "13": 2101.0,
+            "14": 2062.0,
+            "15": 1981.0,
+            "16": 1922.0,
+            "17": 1845.0,
+            "18": 1842.0,
+            "19": 1890.0,
+            "20": 1769.0,
+            "21": 2048.0,
+            "22": 1800.0,
+            "23": 2134.0,
+            "24": 1822.0,
+            "25": 1861.0,
+            "26": 1864.0,
+            "27": 1886.0,
+            "28": 2050.0,
+            "29": 2009.0,
+            "30": 2118.0,
+            "31": 1688.0,
+            "32": 2112.0,
+            "33": 2153.0,
+            "34": 1963.0,
+            "35": 2014.0,
+            "36": 2023.0,
+            "37": 2222.0,
+            "38": 2239.0,
+            "39": 2367.0,
+            "40": 2161.0,
+            "41": 2422.0,
+            "42": 2240.0,
+            "43": 2116.0,
+            "44": 2343.0,
+            "45": 2127.0,
+            "46": 2189.0,
+            "47": 2411.0,
+            "48": 2347.0,
+            "49": 2271.0,
+            "50": 2345.0,
+            "51": 2482.0,
+            "52": 2570.0,
+            "53": 2835.0,
+            "54": 2589.0,
+            "55": 2450.0,
+            "56": 2744.0,
+            "57": 2429.0,
+            "58": 2684.0,
+            "59": 2748.0,
+            "60": 2464.0,
+            "61": 2995.0,
+            "62": 2518.0,
+            "63": 2570.0,
+            "64": 2843.0,
+            "65": 2648.0,
+            "66": 2842.0,
+            "67": 2954.0,
+            "68": 2833.0,
+            "69": 3027.0,
+            "70": 2993.0,
+            "71": 3010.0,
+            "72": 2597.0,
+            "73": 3002.0,
+            "74": 2325.0,
+            "75": 2882.0,
+            "76": 3143.0,
+            "77": 3062.0,
+            "78": 3272.0,
+            "79": 3303.0,
+            "80": 3280.0,
+            "81": 3517.0,
+            "82": 3283.0,
+            "83": 2834.0,
+            "84": 3365.0,
+            "85": 3288.0,
+            "86": 2562.0,
+            "87": 3493.0,
+            "88": 3388.0,
+            "89": 3102.0,
+            "90": 3230.0,
+            "91": 3154.0,
+            "92": 3263.0,
+            "93": 2967.0,
+            "94": 3520.0,
+            "95": 3175.0,
+            "96": 3317.0,
+            "97": 2999.0,
+            "98": 3549.0,
+            "99": 3248.0,
+            "100": 3227.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 888098304.0,
-            "5": 888098304.0,
-            "10": 888098304.0,
-            "15": 888098304.0,
-            "20": 888098304.0,
-            "25": 888098304.0,
-            "30": 888098304.0,
-            "35": 888098304.0,
-            "40": 888098304.0,
-            "45": 888098304.0,
-            "50": 888098304.0,
-            "55": 888098304.0,
-            "60": 888098304.0,
-            "65": 888098304.0,
-            "70": 888098304.0,
-            "75": 888098304.0,
-            "80": 888098304.0,
-            "85": 888098304.0,
-            "90": 888098304.0,
-            "95": 888098304.0,
-            "100": 888098304.0
+            "1": 892293120.0,
+            "2": 892293120.0,
+            "3": 892293120.0,
+            "4": 892293120.0,
+            "5": 892293120.0,
+            "6": 892293120.0,
+            "7": 892293120.0,
+            "8": 892293120.0,
+            "9": 892293120.0,
+            "10": 892293120.0,
+            "11": 892293120.0,
+            "12": 892293120.0,
+            "13": 892293120.0,
+            "14": 892293120.0,
+            "15": 892293120.0,
+            "16": 892293120.0,
+            "17": 892293120.0,
+            "18": 892293120.0,
+            "19": 892293120.0,
+            "20": 892293120.0,
+            "21": 892293120.0,
+            "22": 892293120.0,
+            "23": 892293120.0,
+            "24": 892293120.0,
+            "25": 892293120.0,
+            "26": 892293120.0,
+            "27": 892293120.0,
+            "28": 892293120.0,
+            "29": 892293120.0,
+            "30": 892293120.0,
+            "31": 892293120.0,
+            "32": 892293120.0,
+            "33": 892293120.0,
+            "34": 892293120.0,
+            "35": 892293120.0,
+            "36": 892293120.0,
+            "37": 892293120.0,
+            "38": 892293120.0,
+            "39": 892293120.0,
+            "40": 892293120.0,
+            "41": 892293120.0,
+            "42": 892293120.0,
+            "43": 892293120.0,
+            "44": 892293120.0,
+            "45": 892293120.0,
+            "46": 892293120.0,
+            "47": 892293120.0,
+            "48": 892293120.0,
+            "49": 892293120.0,
+            "50": 892293120.0,
+            "51": 892293120.0,
+            "52": 892293120.0,
+            "53": 892293120.0,
+            "54": 892293120.0,
+            "55": 892293120.0,
+            "56": 892293120.0,
+            "57": 892293120.0,
+            "58": 892293120.0,
+            "59": 892293120.0,
+            "60": 892293120.0,
+            "61": 892293120.0,
+            "62": 892293120.0,
+            "63": 892293120.0,
+            "64": 892293120.0,
+            "65": 892293120.0,
+            "66": 892293120.0,
+            "67": 892293120.0,
+            "68": 892293120.0,
+            "69": 892293120.0,
+            "70": 892293120.0,
+            "71": 892293120.0,
+            "72": 892293120.0,
+            "73": 892293120.0,
+            "74": 892293120.0,
+            "75": 892293120.0,
+            "76": 892293120.0,
+            "77": 892293120.0,
+            "78": 892293120.0,
+            "79": 892293120.0,
+            "80": 892293120.0,
+            "81": 892293120.0,
+            "82": 892293120.0,
+            "83": 892293120.0,
+            "84": 892293120.0,
+            "85": 892293120.0,
+            "86": 892293120.0,
+            "87": 892293120.0,
+            "88": 892293120.0,
+            "89": 892293120.0,
+            "90": 892293120.0,
+            "91": 892293120.0,
+            "92": 892293120.0,
+            "93": 892293120.0,
+            "94": 892293120.0,
+            "95": 892293120.0,
+            "96": 892293120.0,
+            "97": 892293120.0,
+            "98": 892293120.0,
+            "99": 892293120.0,
+            "100": 892293120.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 3216302592.0,
-            "5": 3575768576.0,
-            "10": 3575768576.0,
-            "15": 3575768576.0,
-            "20": 3575768576.0,
-            "25": 3575768576.0,
-            "30": 3575768576.0,
-            "35": 3575768576.0,
-            "40": 3575768576.0,
-            "45": 3575768576.0,
-            "50": 3575768576.0,
-            "55": 3575768576.0,
-            "60": 3575768576.0,
-            "65": 3575768576.0,
-            "70": 3575768576.0,
-            "75": 3575768576.0,
-            "80": 3575768576.0,
-            "85": 3575768576.0,
-            "90": 3575768576.0,
-            "95": 3575768576.0,
-            "100": 3575768576.0
+            "1": 2236674048.0,
+            "2": 2599285760.0,
+            "3": 2599285760.0,
+            "4": 2599285760.0,
+            "5": 2599285760.0,
+            "6": 2599285760.0,
+            "7": 2599285760.0,
+            "8": 2599285760.0,
+            "9": 2599285760.0,
+            "10": 2599285760.0,
+            "11": 2599285760.0,
+            "12": 2599285760.0,
+            "13": 2599285760.0,
+            "14": 2599285760.0,
+            "15": 2599285760.0,
+            "16": 2599285760.0,
+            "17": 2599285760.0,
+            "18": 2599285760.0,
+            "19": 2599285760.0,
+            "20": 2599285760.0,
+            "21": 2599285760.0,
+            "22": 2599285760.0,
+            "23": 2599285760.0,
+            "24": 2599285760.0,
+            "25": 2599285760.0,
+            "26": 2599285760.0,
+            "27": 2599285760.0,
+            "28": 2599285760.0,
+            "29": 2599285760.0,
+            "30": 2599285760.0,
+            "31": 2599285760.0,
+            "32": 2599285760.0,
+            "33": 2599285760.0,
+            "34": 2599285760.0,
+            "35": 2599285760.0,
+            "36": 2599285760.0,
+            "37": 2599285760.0,
+            "38": 2599285760.0,
+            "39": 2599285760.0,
+            "40": 2599285760.0,
+            "41": 2599285760.0,
+            "42": 2599285760.0,
+            "43": 2599285760.0,
+            "44": 2599285760.0,
+            "45": 2599285760.0,
+            "46": 2599285760.0,
+            "47": 2599285760.0,
+            "48": 2599285760.0,
+            "49": 2599285760.0,
+            "50": 2599285760.0,
+            "51": 2599285760.0,
+            "52": 2599285760.0,
+            "53": 2599285760.0,
+            "54": 2599285760.0,
+            "55": 2599285760.0,
+            "56": 2599285760.0,
+            "57": 2599285760.0,
+            "58": 2599285760.0,
+            "59": 2599285760.0,
+            "60": 2599285760.0,
+            "61": 2599285760.0,
+            "62": 2599285760.0,
+            "63": 2599285760.0,
+            "64": 2599285760.0,
+            "65": 2599285760.0,
+            "66": 2599285760.0,
+            "67": 2599285760.0,
+            "68": 2599285760.0,
+            "69": 2599285760.0,
+            "70": 2599285760.0,
+            "71": 2599285760.0,
+            "72": 2599285760.0,
+            "73": 2599285760.0,
+            "74": 2599285760.0,
+            "75": 2599285760.0,
+            "76": 2599285760.0,
+            "77": 2599285760.0,
+            "78": 2599285760.0,
+            "79": 2599285760.0,
+            "80": 2599285760.0,
+            "81": 2599285760.0,
+            "82": 2599285760.0,
+            "83": 2599285760.0,
+            "84": 2599285760.0,
+            "85": 2599285760.0,
+            "86": 2599285760.0,
+            "87": 2599285760.0,
+            "88": 2599285760.0,
+            "89": 2599285760.0,
+            "90": 2599285760.0,
+            "91": 2599285760.0,
+            "92": 2599285760.0,
+            "93": 2599285760.0,
+            "94": 2599285760.0,
+            "95": 2599285760.0,
+            "96": 2599285760.0,
+            "97": 2599285760.0,
+            "98": 2599285760.0,
+            "99": 2599285760.0,
+            "100": 2599285760.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 11.26216,
-            "5": 0.17522,
-            "10": 0.17308,
-            "15": 0.17129,
-            "20": 0.17153,
-            "25": 0.17072,
-            "30": 0.17154,
-            "35": 0.17096,
-            "40": 0.16956,
-            "45": 0.17006,
-            "50": 0.16739,
-            "55": 0.17674,
-            "60": 0.17809,
-            "65": 0.17803,
-            "70": 0.17656,
-            "75": 0.17665,
-            "80": 0.17686,
-            "85": 0.17744,
-            "90": 0.17611,
-            "95": 0.17687,
-            "100": 0.1753
+            "1": 9.51973,
+            "2": 0.20593,
+            "3": 0.14945,
+            "4": 0.14775,
+            "5": 0.14785,
+            "6": 0.14767,
+            "7": 0.14754,
+            "8": 0.14649,
+            "9": 0.14636,
+            "10": 0.14713,
+            "11": 0.14628,
+            "12": 0.14658,
+            "13": 0.14581,
+            "14": 0.14652,
+            "15": 0.14657,
+            "16": 0.14585,
+            "17": 0.14783,
+            "18": 0.1469,
+            "19": 0.14603,
+            "20": 0.14662,
+            "21": 0.14635,
+            "22": 0.1461,
+            "23": 0.14688,
+            "24": 0.14579,
+            "25": 0.14587,
+            "26": 0.14836,
+            "27": 0.14598,
+            "28": 0.1458,
+            "29": 0.14604,
+            "30": 0.14624,
+            "31": 0.14719,
+            "32": 0.14625,
+            "33": 0.14582,
+            "34": 0.14603,
+            "35": 0.14619,
+            "36": 0.14587,
+            "37": 0.14585,
+            "38": 0.14625,
+            "39": 0.14572,
+            "40": 0.14629,
+            "41": 0.14561,
+            "42": 0.14587,
+            "43": 0.14672,
+            "44": 0.14572,
+            "45": 0.14618,
+            "46": 0.14622,
+            "47": 0.14572,
+            "48": 0.14538,
+            "49": 0.14571,
+            "50": 0.1457,
+            "51": 0.1553,
+            "52": 0.14793,
+            "53": 0.14797,
+            "54": 0.14774,
+            "55": 0.14702,
+            "56": 0.15765,
+            "57": 0.1544,
+            "58": 0.15368,
+            "59": 0.15399,
+            "60": 0.15366,
+            "61": 0.15362,
+            "62": 0.15351,
+            "63": 0.15339,
+            "64": 0.15353,
+            "65": 0.15154,
+            "66": 0.14531,
+            "67": 0.14661,
+            "68": 0.14599,
+            "69": 0.14546,
+            "70": 0.14633,
+            "71": 0.14568,
+            "72": 0.1461,
+            "73": 0.14601,
+            "74": 0.1469,
+            "75": 0.14561,
+            "76": 0.14575,
+            "77": 0.14581,
+            "78": 0.14634,
+            "79": 0.14619,
+            "80": 0.14627,
+            "81": 0.146,
+            "82": 0.14559,
+            "83": 0.14618,
+            "84": 0.14683,
+            "85": 0.14582,
+            "86": 0.1462,
+            "87": 0.14574,
+            "88": 0.14574,
+            "89": 0.14516,
+            "90": 0.14556,
+            "91": 0.146,
+            "92": 0.14702,
+            "93": 0.14541,
+            "94": 0.14625,
+            "95": 0.14586,
+            "96": 0.1455,
+            "97": 0.14559,
+            "98": 0.14614,
+            "99": 0.15005,
+            "100": 0.14598
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_lts.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_lts.json
new file mode 100644
index 00000000000..92fea79bc3d
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_lts.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.79332,
+            "2": 10.80674,
+            "3": 10.8069,
+            "4": 10.77377,
+            "5": 10.84763,
+            "6": 10.86617,
+            "7": 10.8277,
+            "8": 10.81226,
+            "9": 10.83507,
+            "10": 10.77007,
+            "11": 10.89243,
+            "12": 10.84575,
+            "13": 10.85895,
+            "14": 10.883,
+            "15": 10.79054,
+            "16": 10.78048,
+            "17": 10.75666,
+            "18": 10.79491,
+            "19": 10.79656,
+            "20": 10.68058,
+            "21": 10.66001,
+            "22": 10.50257,
+            "23": 10.7118,
+            "24": 10.55327,
+            "25": 10.50697,
+            "26": 10.58286,
+            "27": 10.58643,
+            "28": 10.55758,
+            "29": 10.56227,
+            "30": 10.33257,
+            "31": 10.08654,
+            "32": 10.44724,
+            "33": 10.44499,
+            "34": 10.19999,
+            "35": 10.25465,
+            "36": 10.19443,
+            "37": 10.32044,
+            "38": 10.16641,
+            "39": 10.3774,
+            "40": 10.05603,
+            "41": 10.13739,
+            "42": 10.19161,
+            "43": 9.80954,
+            "44": 9.93054,
+            "45": 9.80619,
+            "46": 9.81395,
+            "47": 10.12881,
+            "48": 9.82729,
+            "49": 9.51291,
+            "50": 9.89126,
+            "51": 9.84055,
+            "52": 9.73438,
+            "53": 10.05482,
+            "54": 9.94058,
+            "55": 9.87124,
+            "56": 9.61045,
+            "57": 9.46116,
+            "58": 9.81654,
+            "59": 9.57887,
+            "60": 9.48507,
+            "61": 9.68515,
+            "62": 9.97438,
+            "63": 9.36298,
+            "64": 9.76793,
+            "65": 8.93913,
+            "66": 9.68918,
+            "67": 9.36638,
+            "68": 9.77507,
+            "69": 9.78344,
+            "70": 9.72196,
+            "71": 9.60806,
+            "72": 9.57714,
+            "73": 9.48934,
+            "74": 8.94008,
+            "75": 9.40867,
+            "76": 9.08075,
+            "77": 10.05717,
+            "78": 9.72281,
+            "79": 9.36465,
+            "80": 9.39746,
+            "81": 9.47553,
+            "82": 9.6886,
+            "83": 9.30263,
+            "84": 9.41008,
+            "85": 9.60793,
+            "86": 9.07115,
+            "87": 9.58676,
+            "88": 9.74129,
+            "89": 9.5986,
+            "90": 9.81041,
+            "91": 9.33113,
+            "92": 9.35502,
+            "93": 9.07481,
+            "94": 8.82745,
+            "95": 9.51149,
+            "96": 9.51876,
+            "97": 9.30173,
+            "98": 9.66726,
+            "99": 8.88087,
+            "100": 9.39727
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1624.0,
+            "2": 1758.0,
+            "3": 1678.0,
+            "4": 1680.0,
+            "5": 1824.0,
+            "6": 1899.0,
+            "7": 1871.0,
+            "8": 1677.0,
+            "9": 1802.0,
+            "10": 1414.0,
+            "11": 1904.0,
+            "12": 1630.0,
+            "13": 1956.0,
+            "14": 1777.0,
+            "15": 1902.0,
+            "16": 1801.0,
+            "17": 1830.0,
+            "18": 1630.0,
+            "19": 1835.0,
+            "20": 1642.0,
+            "21": 1752.0,
+            "22": 1696.0,
+            "23": 2085.0,
+            "24": 1618.0,
+            "25": 1593.0,
+            "26": 1722.0,
+            "27": 1778.0,
+            "28": 2042.0,
+            "29": 1900.0,
+            "30": 2001.0,
+            "31": 1592.0,
+            "32": 1757.0,
+            "33": 2116.0,
+            "34": 1924.0,
+            "35": 1904.0,
+            "36": 1852.0,
+            "37": 2382.0,
+            "38": 2195.0,
+            "39": 2267.0,
+            "40": 2335.0,
+            "41": 2223.0,
+            "42": 2317.0,
+            "43": 2069.0,
+            "44": 2060.0,
+            "45": 2140.0,
+            "46": 2397.0,
+            "47": 2464.0,
+            "48": 2455.0,
+            "49": 2276.0,
+            "50": 2374.0,
+            "51": 2574.0,
+            "52": 2457.0,
+            "53": 2905.0,
+            "54": 2609.0,
+            "55": 2220.0,
+            "56": 2663.0,
+            "57": 2258.0,
+            "58": 2898.0,
+            "59": 2676.0,
+            "60": 2397.0,
+            "61": 3048.0,
+            "62": 2533.0,
+            "63": 2370.0,
+            "64": 2975.0,
+            "65": 2591.0,
+            "66": 3065.0,
+            "67": 2732.0,
+            "68": 2870.0,
+            "69": 2955.0,
+            "70": 3112.0,
+            "71": 2989.0,
+            "72": 2451.0,
+            "73": 2881.0,
+            "74": 1859.0,
+            "75": 2649.0,
+            "76": 3026.0,
+            "77": 3316.0,
+            "78": 3212.0,
+            "79": 3183.0,
+            "80": 3262.0,
+            "81": 3669.0,
+            "82": 3187.0,
+            "83": 2798.0,
+            "84": 3209.0,
+            "85": 3309.0,
+            "86": 2738.0,
+            "87": 3804.0,
+            "88": 2989.0,
+            "89": 3327.0,
+            "90": 3031.0,
+            "91": 2720.0,
+            "92": 2972.0,
+            "93": 2719.0,
+            "94": 3387.0,
+            "95": 3321.0,
+            "96": 3342.0,
+            "97": 3191.0,
+            "98": 3533.0,
+            "99": 3214.0,
+            "100": 3318.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 716834304.0,
+            "2": 716834304.0,
+            "3": 716834304.0,
+            "4": 716834304.0,
+            "5": 716834304.0,
+            "6": 716834304.0,
+            "7": 716834304.0,
+            "8": 716834304.0,
+            "9": 716834304.0,
+            "10": 716834304.0,
+            "11": 716834304.0,
+            "12": 716834304.0,
+            "13": 716834304.0,
+            "14": 716834304.0,
+            "15": 716834304.0,
+            "16": 716834304.0,
+            "17": 716834304.0,
+            "18": 716834304.0,
+            "19": 716834304.0,
+            "20": 716834304.0,
+            "21": 716834304.0,
+            "22": 716834304.0,
+            "23": 716834304.0,
+            "24": 716834304.0,
+            "25": 716834304.0,
+            "26": 716834304.0,
+            "27": 716834304.0,
+            "28": 716834304.0,
+            "29": 716834304.0,
+            "30": 716834304.0,
+            "31": 716834304.0,
+            "32": 716834304.0,
+            "33": 716834304.0,
+            "34": 716834304.0,
+            "35": 716834304.0,
+            "36": 716834304.0,
+            "37": 716834304.0,
+            "38": 716834304.0,
+            "39": 716834304.0,
+            "40": 716834304.0,
+            "41": 716834304.0,
+            "42": 716834304.0,
+            "43": 716834304.0,
+            "44": 716834304.0,
+            "45": 716834304.0,
+            "46": 716834304.0,
+            "47": 716834304.0,
+            "48": 716834304.0,
+            "49": 716834304.0,
+            "50": 716834304.0,
+            "51": 716834304.0,
+            "52": 716834304.0,
+            "53": 716834304.0,
+            "54": 716834304.0,
+            "55": 716834304.0,
+            "56": 716834304.0,
+            "57": 716834304.0,
+            "58": 716834304.0,
+            "59": 716834304.0,
+            "60": 716834304.0,
+            "61": 716834304.0,
+            "62": 716834304.0,
+            "63": 716834304.0,
+            "64": 716834304.0,
+            "65": 716834304.0,
+            "66": 716834304.0,
+            "67": 716834304.0,
+            "68": 716834304.0,
+            "69": 716834304.0,
+            "70": 716834304.0,
+            "71": 716834304.0,
+            "72": 716834304.0,
+            "73": 716834304.0,
+            "74": 716834304.0,
+            "75": 716834304.0,
+            "76": 716834304.0,
+            "77": 716834304.0,
+            "78": 716834304.0,
+            "79": 716834304.0,
+            "80": 716834304.0,
+            "81": 716834304.0,
+            "82": 716834304.0,
+            "83": 716834304.0,
+            "84": 716834304.0,
+            "85": 716834304.0,
+            "86": 716834304.0,
+            "87": 716834304.0,
+            "88": 716834304.0,
+            "89": 716834304.0,
+            "90": 716834304.0,
+            "91": 716834304.0,
+            "92": 716834304.0,
+            "93": 716834304.0,
+            "94": 716834304.0,
+            "95": 716834304.0,
+            "96": 716834304.0,
+            "97": 716834304.0,
+            "98": 716834304.0,
+            "99": 716834304.0,
+            "100": 716834304.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1910424576.0,
+            "2": 2193074176.0,
+            "3": 2193074176.0,
+            "4": 2193074176.0,
+            "5": 2193074176.0,
+            "6": 2193074176.0,
+            "7": 2193074176.0,
+            "8": 2193074176.0,
+            "9": 2193074176.0,
+            "10": 2193074176.0,
+            "11": 2193074176.0,
+            "12": 2193074176.0,
+            "13": 2193074176.0,
+            "14": 2193074176.0,
+            "15": 2193074176.0,
+            "16": 2193074176.0,
+            "17": 2193074176.0,
+            "18": 2193074176.0,
+            "19": 2193074176.0,
+            "20": 2193074176.0,
+            "21": 2193074176.0,
+            "22": 2193074176.0,
+            "23": 2193074176.0,
+            "24": 2193074176.0,
+            "25": 2193074176.0,
+            "26": 2193074176.0,
+            "27": 2193074176.0,
+            "28": 2193074176.0,
+            "29": 2193074176.0,
+            "30": 2193074176.0,
+            "31": 2193074176.0,
+            "32": 2193074176.0,
+            "33": 2193074176.0,
+            "34": 2193074176.0,
+            "35": 2193074176.0,
+            "36": 2193074176.0,
+            "37": 2193074176.0,
+            "38": 2193074176.0,
+            "39": 2193074176.0,
+            "40": 2193074176.0,
+            "41": 2193074176.0,
+            "42": 2193074176.0,
+            "43": 2193074176.0,
+            "44": 2193074176.0,
+            "45": 2193074176.0,
+            "46": 2193074176.0,
+            "47": 2193074176.0,
+            "48": 2193074176.0,
+            "49": 2193074176.0,
+            "50": 2193074176.0,
+            "51": 2193074176.0,
+            "52": 2193074176.0,
+            "53": 2193074176.0,
+            "54": 2193074176.0,
+            "55": 2193074176.0,
+            "56": 2193074176.0,
+            "57": 2193074176.0,
+            "58": 2193074176.0,
+            "59": 2193074176.0,
+            "60": 2193074176.0,
+            "61": 2193074176.0,
+            "62": 2193074176.0,
+            "63": 2193074176.0,
+            "64": 2193074176.0,
+            "65": 2193074176.0,
+            "66": 2193074176.0,
+            "67": 2193074176.0,
+            "68": 2193074176.0,
+            "69": 2193074176.0,
+            "70": 2193074176.0,
+            "71": 2193074176.0,
+            "72": 2193074176.0,
+            "73": 2193074176.0,
+            "74": 2193074176.0,
+            "75": 2193074176.0,
+            "76": 2193074176.0,
+            "77": 2193074176.0,
+            "78": 2193074176.0,
+            "79": 2193074176.0,
+            "80": 2193074176.0,
+            "81": 2193074176.0,
+            "82": 2193074176.0,
+            "83": 2193074176.0,
+            "84": 2193074176.0,
+            "85": 2193074176.0,
+            "86": 2193074176.0,
+            "87": 2193074176.0,
+            "88": 2193074176.0,
+            "89": 2193074176.0,
+            "90": 2193074176.0,
+            "91": 2193074176.0,
+            "92": 2193074176.0,
+            "93": 2193074176.0,
+            "94": 2193074176.0,
+            "95": 2193074176.0,
+            "96": 2193074176.0,
+            "97": 2193074176.0,
+            "98": 2193074176.0,
+            "99": 2193074176.0,
+            "100": 2193074176.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 9.17541,
+            "2": 0.17628,
+            "3": 0.14823,
+            "4": 0.14828,
+            "5": 0.14489,
+            "6": 0.14681,
+            "7": 0.14589,
+            "8": 0.14567,
+            "9": 0.14899,
+            "10": 0.14748,
+            "11": 0.1469,
+            "12": 0.14571,
+            "13": 0.14519,
+            "14": 0.14594,
+            "15": 0.14553,
+            "16": 0.1461,
+            "17": 0.14672,
+            "18": 0.14616,
+            "19": 0.1455,
+            "20": 0.14588,
+            "21": 0.14801,
+            "22": 0.14714,
+            "23": 0.14721,
+            "24": 0.14624,
+            "25": 0.1462,
+            "26": 0.14548,
+            "27": 0.14684,
+            "28": 0.14765,
+            "29": 0.14671,
+            "30": 0.14515,
+            "31": 0.14617,
+            "32": 0.14666,
+            "33": 0.14596,
+            "34": 0.14868,
+            "35": 0.14573,
+            "36": 0.14694,
+            "37": 0.14585,
+            "38": 0.14605,
+            "39": 0.14599,
+            "40": 0.14558,
+            "41": 0.14673,
+            "42": 0.14745,
+            "43": 0.1456,
+            "44": 0.14744,
+            "45": 0.14524,
+            "46": 0.14572,
+            "47": 0.14533,
+            "48": 0.14632,
+            "49": 0.14734,
+            "50": 0.1453,
+            "51": 0.16371,
+            "52": 0.14839,
+            "53": 0.17852,
+            "54": 0.14579,
+            "55": 0.14651,
+            "56": 0.14872,
+            "57": 0.14723,
+            "58": 0.14775,
+            "59": 0.14896,
+            "60": 0.14649,
+            "61": 0.14672,
+            "62": 0.14696,
+            "63": 0.14572,
+            "64": 0.14639,
+            "65": 0.14739,
+            "66": 0.14722,
+            "67": 0.14732,
+            "68": 0.14566,
+            "69": 0.14664,
+            "70": 0.14693,
+            "71": 0.14641,
+            "72": 0.14742,
+            "73": 0.14691,
+            "74": 0.1482,
+            "75": 0.15006,
+            "76": 0.146,
+            "77": 0.14585,
+            "78": 0.14677,
+            "79": 0.14716,
+            "80": 0.14605,
+            "81": 0.14678,
+            "82": 0.14648,
+            "83": 0.14624,
+            "84": 0.14639,
+            "85": 0.14622,
+            "86": 0.14829,
+            "87": 0.14591,
+            "88": 0.14541,
+            "89": 0.14865,
+            "90": 0.14587,
+            "91": 0.14618,
+            "92": 0.14625,
+            "93": 0.14624,
+            "94": 0.14583,
+            "95": 0.14675,
+            "96": 0.14876,
+            "97": 0.14645,
+            "98": 0.14588,
+            "99": 0.14617,
+            "100": 0.14618
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_lts_dgx_a100.json
index 4d218d51297..1246b8727ef 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_lts_dgx_a100.json
@@ -1 +1,537 @@
-{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.79205, "5": 10.84695, "10": 10.77106, "15": 10.79093, "20": 10.68042, "25": 10.50715, "30": 10.33325, "35": 10.25545, "40": 10.05544, "45": 9.80575, "50": 9.89082, "55": 9.87063, "60": 9.48478, "65": 8.94022, "70": 9.72243, "75": 9.40907, "80": 9.3976, "85": 9.60746, "90": 9.81041, "95": 9.5116, "100": 9.39722}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1580.0, "5": 1901.0, "10": 1346.0, "15": 1926.0, "20": 1643.0, "25": 1683.0, "30": 1867.0, "35": 2020.0, "40": 2252.0, "45": 2243.0, "50": 2459.0, "55": 2291.0, "60": 2404.0, "65": 2474.0, "70": 3102.0, "75": 2603.0, "80": 3420.0, "85": 3388.0, "90": 2904.0, "95": 3333.0, "100": 3347.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 714736640.0, "5": 714736640.0, "10": 714736640.0, "15": 714736640.0, "20": 714736640.0, "25": 714736640.0, "30": 714736640.0, "35": 714736640.0, "40": 714736640.0, "45": 714736640.0, "50": 714736640.0, "55": 714736640.0, "60": 714736640.0, "65": 714736640.0, "70": 714736640.0, "75": 714736640.0, "80": 714736640.0, "85": 714736640.0, "90": 714736640.0, "95": 714736640.0, "100": 714736640.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 2399714304.0, "5": 2681315328.0, "10": 2681315328.0, "15": 2681315328.0, "20": 2681315328.0, "25": 2681315328.0, "30": 2681315328.0, "35": 2681315328.0, "40": 2681315328.0, "45": 2681315328.0, "50": 2681315328.0, "55": 2681315328.0, "60": 2681315328.0, "65": 2681315328.0, "70": 2681315328.0, "75": 2681315328.0, "80": 2681315328.0, "85": 2681315328.0, "90": 2681315328.0, "95": 2681315328.0, "100": 2681315328.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 9.04394, "5": 0.16449, "10": 0.16258, "15": 0.16166, "20": 0.18479, "25": 0.17915, "30": 0.18008, "35": 0.16013, "40": 0.15723, "45": 0.15769, "50": 0.15699, "55": 0.15776, "60": 0.15787, "65": 0.15913, "70": 0.15699, "75": 0.15748, "80": 0.15897, "85": 0.15762, "90": 0.15743, "95": 0.15733, "100": 0.15822}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.79332,
+            "2": 10.80674,
+            "3": 10.8069,
+            "4": 10.77377,
+            "5": 10.84763,
+            "6": 10.86617,
+            "7": 10.8277,
+            "8": 10.81226,
+            "9": 10.83507,
+            "10": 10.77007,
+            "11": 10.89243,
+            "12": 10.84575,
+            "13": 10.85895,
+            "14": 10.883,
+            "15": 10.79054,
+            "16": 10.78048,
+            "17": 10.75666,
+            "18": 10.79491,
+            "19": 10.79656,
+            "20": 10.68058,
+            "21": 10.66001,
+            "22": 10.50257,
+            "23": 10.7118,
+            "24": 10.55327,
+            "25": 10.50697,
+            "26": 10.58286,
+            "27": 10.58643,
+            "28": 10.55758,
+            "29": 10.56227,
+            "30": 10.33257,
+            "31": 10.08654,
+            "32": 10.44724,
+            "33": 10.44499,
+            "34": 10.19999,
+            "35": 10.25465,
+            "36": 10.19443,
+            "37": 10.32044,
+            "38": 10.16641,
+            "39": 10.3774,
+            "40": 10.05603,
+            "41": 10.13739,
+            "42": 10.19161,
+            "43": 9.80954,
+            "44": 9.93054,
+            "45": 9.80619,
+            "46": 9.81395,
+            "47": 10.12881,
+            "48": 9.82729,
+            "49": 9.51291,
+            "50": 9.89126,
+            "51": 9.84055,
+            "52": 9.73438,
+            "53": 10.05482,
+            "54": 9.94058,
+            "55": 9.87124,
+            "56": 9.61045,
+            "57": 9.46116,
+            "58": 9.81654,
+            "59": 9.57887,
+            "60": 9.48507,
+            "61": 9.68515,
+            "62": 9.97438,
+            "63": 9.36298,
+            "64": 9.76793,
+            "65": 8.93913,
+            "66": 9.68918,
+            "67": 9.36638,
+            "68": 9.77507,
+            "69": 9.78344,
+            "70": 9.72196,
+            "71": 9.60806,
+            "72": 9.57714,
+            "73": 9.48934,
+            "74": 8.94008,
+            "75": 9.40867,
+            "76": 9.08075,
+            "77": 10.05717,
+            "78": 9.72281,
+            "79": 9.36465,
+            "80": 9.39746,
+            "81": 9.47553,
+            "82": 9.6886,
+            "83": 9.30263,
+            "84": 9.41008,
+            "85": 9.60793,
+            "86": 9.07115,
+            "87": 9.58676,
+            "88": 9.74129,
+            "89": 9.5986,
+            "90": 9.81041,
+            "91": 9.33113,
+            "92": 9.35502,
+            "93": 9.07481,
+            "94": 8.82745,
+            "95": 9.51149,
+            "96": 9.51876,
+            "97": 9.30173,
+            "98": 9.66726,
+            "99": 8.88087,
+            "100": 9.39727
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1624.0,
+            "2": 1758.0,
+            "3": 1678.0,
+            "4": 1680.0,
+            "5": 1824.0,
+            "6": 1899.0,
+            "7": 1871.0,
+            "8": 1677.0,
+            "9": 1802.0,
+            "10": 1414.0,
+            "11": 1904.0,
+            "12": 1630.0,
+            "13": 1956.0,
+            "14": 1777.0,
+            "15": 1902.0,
+            "16": 1801.0,
+            "17": 1830.0,
+            "18": 1630.0,
+            "19": 1835.0,
+            "20": 1642.0,
+            "21": 1752.0,
+            "22": 1696.0,
+            "23": 2085.0,
+            "24": 1618.0,
+            "25": 1593.0,
+            "26": 1722.0,
+            "27": 1778.0,
+            "28": 2042.0,
+            "29": 1900.0,
+            "30": 2001.0,
+            "31": 1592.0,
+            "32": 1757.0,
+            "33": 2116.0,
+            "34": 1924.0,
+            "35": 1904.0,
+            "36": 1852.0,
+            "37": 2382.0,
+            "38": 2195.0,
+            "39": 2267.0,
+            "40": 2335.0,
+            "41": 2223.0,
+            "42": 2317.0,
+            "43": 2069.0,
+            "44": 2060.0,
+            "45": 2140.0,
+            "46": 2397.0,
+            "47": 2464.0,
+            "48": 2455.0,
+            "49": 2276.0,
+            "50": 2374.0,
+            "51": 2574.0,
+            "52": 2457.0,
+            "53": 2905.0,
+            "54": 2609.0,
+            "55": 2220.0,
+            "56": 2663.0,
+            "57": 2258.0,
+            "58": 2898.0,
+            "59": 2676.0,
+            "60": 2397.0,
+            "61": 3048.0,
+            "62": 2533.0,
+            "63": 2370.0,
+            "64": 2975.0,
+            "65": 2591.0,
+            "66": 3065.0,
+            "67": 2732.0,
+            "68": 2870.0,
+            "69": 2955.0,
+            "70": 3112.0,
+            "71": 2989.0,
+            "72": 2451.0,
+            "73": 2881.0,
+            "74": 1859.0,
+            "75": 2649.0,
+            "76": 3026.0,
+            "77": 3316.0,
+            "78": 3212.0,
+            "79": 3183.0,
+            "80": 3262.0,
+            "81": 3669.0,
+            "82": 3187.0,
+            "83": 2798.0,
+            "84": 3209.0,
+            "85": 3309.0,
+            "86": 2738.0,
+            "87": 3804.0,
+            "88": 2989.0,
+            "89": 3327.0,
+            "90": 3031.0,
+            "91": 2720.0,
+            "92": 2972.0,
+            "93": 2719.0,
+            "94": 3387.0,
+            "95": 3321.0,
+            "96": 3342.0,
+            "97": 3191.0,
+            "98": 3533.0,
+            "99": 3214.0,
+            "100": 3318.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 716834304.0,
+            "2": 716834304.0,
+            "3": 716834304.0,
+            "4": 716834304.0,
+            "5": 716834304.0,
+            "6": 716834304.0,
+            "7": 716834304.0,
+            "8": 716834304.0,
+            "9": 716834304.0,
+            "10": 716834304.0,
+            "11": 716834304.0,
+            "12": 716834304.0,
+            "13": 716834304.0,
+            "14": 716834304.0,
+            "15": 716834304.0,
+            "16": 716834304.0,
+            "17": 716834304.0,
+            "18": 716834304.0,
+            "19": 716834304.0,
+            "20": 716834304.0,
+            "21": 716834304.0,
+            "22": 716834304.0,
+            "23": 716834304.0,
+            "24": 716834304.0,
+            "25": 716834304.0,
+            "26": 716834304.0,
+            "27": 716834304.0,
+            "28": 716834304.0,
+            "29": 716834304.0,
+            "30": 716834304.0,
+            "31": 716834304.0,
+            "32": 716834304.0,
+            "33": 716834304.0,
+            "34": 716834304.0,
+            "35": 716834304.0,
+            "36": 716834304.0,
+            "37": 716834304.0,
+            "38": 716834304.0,
+            "39": 716834304.0,
+            "40": 716834304.0,
+            "41": 716834304.0,
+            "42": 716834304.0,
+            "43": 716834304.0,
+            "44": 716834304.0,
+            "45": 716834304.0,
+            "46": 716834304.0,
+            "47": 716834304.0,
+            "48": 716834304.0,
+            "49": 716834304.0,
+            "50": 716834304.0,
+            "51": 716834304.0,
+            "52": 716834304.0,
+            "53": 716834304.0,
+            "54": 716834304.0,
+            "55": 716834304.0,
+            "56": 716834304.0,
+            "57": 716834304.0,
+            "58": 716834304.0,
+            "59": 716834304.0,
+            "60": 716834304.0,
+            "61": 716834304.0,
+            "62": 716834304.0,
+            "63": 716834304.0,
+            "64": 716834304.0,
+            "65": 716834304.0,
+            "66": 716834304.0,
+            "67": 716834304.0,
+            "68": 716834304.0,
+            "69": 716834304.0,
+            "70": 716834304.0,
+            "71": 716834304.0,
+            "72": 716834304.0,
+            "73": 716834304.0,
+            "74": 716834304.0,
+            "75": 716834304.0,
+            "76": 716834304.0,
+            "77": 716834304.0,
+            "78": 716834304.0,
+            "79": 716834304.0,
+            "80": 716834304.0,
+            "81": 716834304.0,
+            "82": 716834304.0,
+            "83": 716834304.0,
+            "84": 716834304.0,
+            "85": 716834304.0,
+            "86": 716834304.0,
+            "87": 716834304.0,
+            "88": 716834304.0,
+            "89": 716834304.0,
+            "90": 716834304.0,
+            "91": 716834304.0,
+            "92": 716834304.0,
+            "93": 716834304.0,
+            "94": 716834304.0,
+            "95": 716834304.0,
+            "96": 716834304.0,
+            "97": 716834304.0,
+            "98": 716834304.0,
+            "99": 716834304.0,
+            "100": 716834304.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1910424576.0,
+            "2": 2193074176.0,
+            "3": 2193074176.0,
+            "4": 2193074176.0,
+            "5": 2193074176.0,
+            "6": 2193074176.0,
+            "7": 2193074176.0,
+            "8": 2193074176.0,
+            "9": 2193074176.0,
+            "10": 2193074176.0,
+            "11": 2193074176.0,
+            "12": 2193074176.0,
+            "13": 2193074176.0,
+            "14": 2193074176.0,
+            "15": 2193074176.0,
+            "16": 2193074176.0,
+            "17": 2193074176.0,
+            "18": 2193074176.0,
+            "19": 2193074176.0,
+            "20": 2193074176.0,
+            "21": 2193074176.0,
+            "22": 2193074176.0,
+            "23": 2193074176.0,
+            "24": 2193074176.0,
+            "25": 2193074176.0,
+            "26": 2193074176.0,
+            "27": 2193074176.0,
+            "28": 2193074176.0,
+            "29": 2193074176.0,
+            "30": 2193074176.0,
+            "31": 2193074176.0,
+            "32": 2193074176.0,
+            "33": 2193074176.0,
+            "34": 2193074176.0,
+            "35": 2193074176.0,
+            "36": 2193074176.0,
+            "37": 2193074176.0,
+            "38": 2193074176.0,
+            "39": 2193074176.0,
+            "40": 2193074176.0,
+            "41": 2193074176.0,
+            "42": 2193074176.0,
+            "43": 2193074176.0,
+            "44": 2193074176.0,
+            "45": 2193074176.0,
+            "46": 2193074176.0,
+            "47": 2193074176.0,
+            "48": 2193074176.0,
+            "49": 2193074176.0,
+            "50": 2193074176.0,
+            "51": 2193074176.0,
+            "52": 2193074176.0,
+            "53": 2193074176.0,
+            "54": 2193074176.0,
+            "55": 2193074176.0,
+            "56": 2193074176.0,
+            "57": 2193074176.0,
+            "58": 2193074176.0,
+            "59": 2193074176.0,
+            "60": 2193074176.0,
+            "61": 2193074176.0,
+            "62": 2193074176.0,
+            "63": 2193074176.0,
+            "64": 2193074176.0,
+            "65": 2193074176.0,
+            "66": 2193074176.0,
+            "67": 2193074176.0,
+            "68": 2193074176.0,
+            "69": 2193074176.0,
+            "70": 2193074176.0,
+            "71": 2193074176.0,
+            "72": 2193074176.0,
+            "73": 2193074176.0,
+            "74": 2193074176.0,
+            "75": 2193074176.0,
+            "76": 2193074176.0,
+            "77": 2193074176.0,
+            "78": 2193074176.0,
+            "79": 2193074176.0,
+            "80": 2193074176.0,
+            "81": 2193074176.0,
+            "82": 2193074176.0,
+            "83": 2193074176.0,
+            "84": 2193074176.0,
+            "85": 2193074176.0,
+            "86": 2193074176.0,
+            "87": 2193074176.0,
+            "88": 2193074176.0,
+            "89": 2193074176.0,
+            "90": 2193074176.0,
+            "91": 2193074176.0,
+            "92": 2193074176.0,
+            "93": 2193074176.0,
+            "94": 2193074176.0,
+            "95": 2193074176.0,
+            "96": 2193074176.0,
+            "97": 2193074176.0,
+            "98": 2193074176.0,
+            "99": 2193074176.0,
+            "100": 2193074176.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 6.43574,
+            "2": 0.18308,
+            "3": 0.16294,
+            "4": 0.15632,
+            "5": 0.15517,
+            "6": 0.15061,
+            "7": 0.15109,
+            "8": 0.1538,
+            "9": 0.15077,
+            "10": 0.15142,
+            "11": 0.15024,
+            "12": 0.15039,
+            "13": 0.14987,
+            "14": 0.15044,
+            "15": 0.1495,
+            "16": 0.15003,
+            "17": 0.14988,
+            "18": 0.1497,
+            "19": 0.15459,
+            "20": 0.15076,
+            "21": 0.1498,
+            "22": 0.15044,
+            "23": 0.15051,
+            "24": 0.15062,
+            "25": 0.14953,
+            "26": 0.15047,
+            "27": 0.14851,
+            "28": 0.14802,
+            "29": 0.14861,
+            "30": 0.1485,
+            "31": 0.1498,
+            "32": 0.14871,
+            "33": 0.1485,
+            "34": 0.14707,
+            "35": 0.14796,
+            "36": 0.14719,
+            "37": 0.15012,
+            "38": 0.14804,
+            "39": 0.1487,
+            "40": 0.14779,
+            "41": 0.14844,
+            "42": 0.1496,
+            "43": 0.15014,
+            "44": 0.14977,
+            "45": 0.1478,
+            "46": 0.14891,
+            "47": 0.14844,
+            "48": 0.1488,
+            "49": 0.14931,
+            "50": 0.14761,
+            "51": 0.15888,
+            "52": 0.1517,
+            "53": 0.14904,
+            "54": 0.17961,
+            "55": 0.14804,
+            "56": 0.1496,
+            "57": 0.1487,
+            "58": 0.14801,
+            "59": 0.14729,
+            "60": 0.14749,
+            "61": 0.14745,
+            "62": 0.1471,
+            "63": 0.14817,
+            "64": 0.1497,
+            "65": 0.14753,
+            "66": 0.14753,
+            "67": 0.14859,
+            "68": 0.14714,
+            "69": 0.14776,
+            "70": 0.14847,
+            "71": 0.14829,
+            "72": 0.14858,
+            "73": 0.14828,
+            "74": 0.14783,
+            "75": 0.14793,
+            "76": 0.14768,
+            "77": 0.14752,
+            "78": 0.14931,
+            "79": 0.15045,
+            "80": 0.14813,
+            "81": 0.1489,
+            "82": 0.1475,
+            "83": 0.14844,
+            "84": 0.1489,
+            "85": 0.14809,
+            "86": 0.14835,
+            "87": 0.14718,
+            "88": 0.14876,
+            "89": 0.14859,
+            "90": 0.1479,
+            "91": 0.14803,
+            "92": 0.14798,
+            "93": 0.14876,
+            "94": 0.14705,
+            "95": 0.14837,
+            "96": 0.14805,
+            "97": 0.14837,
+            "98": 0.14721,
+            "99": 0.14843,
+            "100": 0.14828
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_lts.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_lts.json
new file mode 100644
index 00000000000..801e633e981
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_lts.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.90314,
+            "2": 10.88897,
+            "3": 10.89725,
+            "4": 10.88248,
+            "5": 10.89447,
+            "6": 10.91226,
+            "7": 10.89775,
+            "8": 10.88643,
+            "9": 10.89227,
+            "10": 10.8849,
+            "11": 10.91413,
+            "12": 10.88666,
+            "13": 10.89283,
+            "14": 10.90486,
+            "15": 10.83878,
+            "16": 10.84927,
+            "17": 10.83248,
+            "18": 10.83371,
+            "19": 10.83359,
+            "20": 10.74074,
+            "21": 10.7043,
+            "22": 10.59893,
+            "23": 10.72222,
+            "24": 10.60745,
+            "25": 10.57467,
+            "26": 10.62999,
+            "27": 10.62364,
+            "28": 10.57229,
+            "29": 10.6073,
+            "30": 10.37766,
+            "31": 10.15362,
+            "32": 10.47609,
+            "33": 10.48062,
+            "34": 10.24216,
+            "35": 10.29035,
+            "36": 10.25955,
+            "37": 10.36145,
+            "38": 10.21396,
+            "39": 10.44502,
+            "40": 10.11492,
+            "41": 10.1605,
+            "42": 10.23468,
+            "43": 9.85032,
+            "44": 9.97764,
+            "45": 9.85681,
+            "46": 9.8307,
+            "47": 10.17976,
+            "48": 9.85811,
+            "49": 9.54378,
+            "50": 9.93469,
+            "51": 9.86793,
+            "52": 9.76274,
+            "53": 10.10895,
+            "54": 9.95538,
+            "55": 9.8756,
+            "56": 9.64751,
+            "57": 9.48989,
+            "58": 9.85502,
+            "59": 9.59457,
+            "60": 9.52968,
+            "61": 9.69589,
+            "62": 10.01676,
+            "63": 9.38778,
+            "64": 9.80211,
+            "65": 8.95119,
+            "66": 9.72857,
+            "67": 9.37577,
+            "68": 9.80463,
+            "69": 9.81,
+            "70": 9.7662,
+            "71": 9.63135,
+            "72": 9.5784,
+            "73": 9.52148,
+            "74": 8.94976,
+            "75": 9.43087,
+            "76": 9.08489,
+            "77": 10.089,
+            "78": 9.72754,
+            "79": 9.37612,
+            "80": 9.40849,
+            "81": 9.49766,
+            "82": 9.71298,
+            "83": 9.33332,
+            "84": 9.43928,
+            "85": 9.63373,
+            "86": 9.07038,
+            "87": 9.61245,
+            "88": 9.78304,
+            "89": 9.60878,
+            "90": 9.85164,
+            "91": 9.34542,
+            "92": 9.38281,
+            "93": 9.07319,
+            "94": 8.81684,
+            "95": 9.51809,
+            "96": 9.54033,
+            "97": 9.34061,
+            "98": 9.70134,
+            "99": 8.88786,
+            "100": 9.43285
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 22727000.0,
+            "2": 22925616.0,
+            "3": 22596924.0,
+            "4": 23219540.0,
+            "5": 22714814.0,
+            "6": 23021786.0,
+            "7": 22771458.0,
+            "8": 22926012.0,
+            "9": 22842856.0,
+            "10": 22918360.0,
+            "11": 22500702.0,
+            "12": 22459866.0,
+            "13": 22916820.0,
+            "14": 22389026.0,
+            "15": 22821260.0,
+            "16": 22830812.0,
+            "17": 22818944.0,
+            "18": 22582240.0,
+            "19": 22618380.0,
+            "20": 22694352.0,
+            "21": 22740180.0,
+            "22": 22800024.0,
+            "23": 22540132.0,
+            "24": 22771492.0,
+            "25": 22818970.0,
+            "26": 22546852.0,
+            "27": 22468504.0,
+            "28": 22453052.0,
+            "29": 22529222.0,
+            "30": 22631432.0,
+            "31": 22955696.0,
+            "32": 22585238.0,
+            "33": 22557676.0,
+            "34": 22835412.0,
+            "35": 22788032.0,
+            "36": 22589678.0,
+            "37": 22497140.0,
+            "38": 22896132.0,
+            "39": 22801314.0,
+            "40": 22658064.0,
+            "41": 22659700.0,
+            "42": 22667816.0,
+            "43": 22976356.0,
+            "44": 22746708.0,
+            "45": 22675272.0,
+            "46": 22884382.0,
+            "47": 22634556.0,
+            "48": 22928080.0,
+            "49": 22727538.0,
+            "50": 22905284.0,
+            "51": 22791326.0,
+            "52": 22749392.0,
+            "53": 22925970.0,
+            "54": 22839434.0,
+            "55": 22518416.0,
+            "56": 22877660.0,
+            "57": 23113304.0,
+            "58": 22845008.0,
+            "59": 22715512.0,
+            "60": 22743058.0,
+            "61": 22723950.0,
+            "62": 22673248.0,
+            "63": 22846074.0,
+            "64": 22823228.0,
+            "65": 23060212.0,
+            "66": 22729902.0,
+            "67": 22907278.0,
+            "68": 22610092.0,
+            "69": 22584360.0,
+            "70": 22829348.0,
+            "71": 22749420.0,
+            "72": 22655446.0,
+            "73": 22740974.0,
+            "74": 23048296.0,
+            "75": 23053922.0,
+            "76": 22901008.0,
+            "77": 22272806.0,
+            "78": 22789370.0,
+            "79": 22743288.0,
+            "80": 22706236.0,
+            "81": 22890976.0,
+            "82": 22777092.0,
+            "83": 22839240.0,
+            "84": 23010352.0,
+            "85": 22712004.0,
+            "86": 23103740.0,
+            "87": 22734788.0,
+            "88": 22637620.0,
+            "89": 22499200.0,
+            "90": 22972420.0,
+            "91": 22766428.0,
+            "92": 22808890.0,
+            "93": 22659888.0,
+            "94": 22910970.0,
+            "95": 23048514.0,
+            "96": 22829470.0,
+            "97": 22608826.0,
+            "98": 22763528.0,
+            "99": 22905754.0,
+            "100": 23016268.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 719180288.0,
+            "2": 719180288.0,
+            "3": 719180288.0,
+            "4": 719180288.0,
+            "5": 719180288.0,
+            "6": 719180288.0,
+            "7": 719180288.0,
+            "8": 719180288.0,
+            "9": 719180288.0,
+            "10": 719180288.0,
+            "11": 719180288.0,
+            "12": 719180288.0,
+            "13": 719180288.0,
+            "14": 719180288.0,
+            "15": 719180288.0,
+            "16": 719180288.0,
+            "17": 719180288.0,
+            "18": 719180288.0,
+            "19": 719180288.0,
+            "20": 719180288.0,
+            "21": 719180288.0,
+            "22": 719180288.0,
+            "23": 719180288.0,
+            "24": 719180288.0,
+            "25": 719180288.0,
+            "26": 719180288.0,
+            "27": 719180288.0,
+            "28": 719180288.0,
+            "29": 719180288.0,
+            "30": 719180288.0,
+            "31": 719180288.0,
+            "32": 719180288.0,
+            "33": 719180288.0,
+            "34": 719180288.0,
+            "35": 719180288.0,
+            "36": 719180288.0,
+            "37": 719180288.0,
+            "38": 719180288.0,
+            "39": 719180288.0,
+            "40": 719180288.0,
+            "41": 719180288.0,
+            "42": 719180288.0,
+            "43": 719180288.0,
+            "44": 719180288.0,
+            "45": 719180288.0,
+            "46": 719180288.0,
+            "47": 719180288.0,
+            "48": 719180288.0,
+            "49": 719180288.0,
+            "50": 719180288.0,
+            "51": 719180288.0,
+            "52": 719180288.0,
+            "53": 719180288.0,
+            "54": 719180288.0,
+            "55": 719180288.0,
+            "56": 719180288.0,
+            "57": 719180288.0,
+            "58": 719180288.0,
+            "59": 719180288.0,
+            "60": 719180288.0,
+            "61": 719180288.0,
+            "62": 719180288.0,
+            "63": 719180288.0,
+            "64": 719180288.0,
+            "65": 719180288.0,
+            "66": 719180288.0,
+            "67": 719180288.0,
+            "68": 719180288.0,
+            "69": 719180288.0,
+            "70": 719180288.0,
+            "71": 719180288.0,
+            "72": 719180288.0,
+            "73": 719180288.0,
+            "74": 719180288.0,
+            "75": 719180288.0,
+            "76": 719180288.0,
+            "77": 719180288.0,
+            "78": 719180288.0,
+            "79": 719180288.0,
+            "80": 719180288.0,
+            "81": 719180288.0,
+            "82": 719180288.0,
+            "83": 719180288.0,
+            "84": 719180288.0,
+            "85": 719180288.0,
+            "86": 719180288.0,
+            "87": 719180288.0,
+            "88": 719180288.0,
+            "89": 719180288.0,
+            "90": 719180288.0,
+            "91": 719180288.0,
+            "92": 719180288.0,
+            "93": 719180288.0,
+            "94": 719180288.0,
+            "95": 719180288.0,
+            "96": 719180288.0,
+            "97": 719180288.0,
+            "98": 719180288.0,
+            "99": 719180288.0,
+            "100": 719180288.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1910562816.0,
+            "2": 2195420160.0,
+            "3": 2195420160.0,
+            "4": 2195420160.0,
+            "5": 2195420160.0,
+            "6": 2195420160.0,
+            "7": 2195420160.0,
+            "8": 2195420160.0,
+            "9": 2195420160.0,
+            "10": 2195420160.0,
+            "11": 2195420160.0,
+            "12": 2195420160.0,
+            "13": 2195420160.0,
+            "14": 2195420160.0,
+            "15": 2195420160.0,
+            "16": 2195420160.0,
+            "17": 2195420160.0,
+            "18": 2195420160.0,
+            "19": 2195420160.0,
+            "20": 2195420160.0,
+            "21": 2195420160.0,
+            "22": 2195420160.0,
+            "23": 2195420160.0,
+            "24": 2195420160.0,
+            "25": 2195420160.0,
+            "26": 2195420160.0,
+            "27": 2195420160.0,
+            "28": 2195420160.0,
+            "29": 2195420160.0,
+            "30": 2195420160.0,
+            "31": 2195420160.0,
+            "32": 2195420160.0,
+            "33": 2195420160.0,
+            "34": 2195420160.0,
+            "35": 2195420160.0,
+            "36": 2195420160.0,
+            "37": 2195420160.0,
+            "38": 2195420160.0,
+            "39": 2195420160.0,
+            "40": 2195420160.0,
+            "41": 2195420160.0,
+            "42": 2195420160.0,
+            "43": 2195420160.0,
+            "44": 2195420160.0,
+            "45": 2195420160.0,
+            "46": 2195420160.0,
+            "47": 2195420160.0,
+            "48": 2195420160.0,
+            "49": 2195420160.0,
+            "50": 2195420160.0,
+            "51": 2195420160.0,
+            "52": 2195420160.0,
+            "53": 2195420160.0,
+            "54": 2195420160.0,
+            "55": 2195420160.0,
+            "56": 2195420160.0,
+            "57": 2195420160.0,
+            "58": 2195420160.0,
+            "59": 2195420160.0,
+            "60": 2195420160.0,
+            "61": 2195420160.0,
+            "62": 2195420160.0,
+            "63": 2195420160.0,
+            "64": 2195420160.0,
+            "65": 2195420160.0,
+            "66": 2195420160.0,
+            "67": 2195420160.0,
+            "68": 2195420160.0,
+            "69": 2195420160.0,
+            "70": 2195420160.0,
+            "71": 2195420160.0,
+            "72": 2195420160.0,
+            "73": 2195420160.0,
+            "74": 2195420160.0,
+            "75": 2195420160.0,
+            "76": 2195420160.0,
+            "77": 2195420160.0,
+            "78": 2195420160.0,
+            "79": 2195420160.0,
+            "80": 2195420160.0,
+            "81": 2195420160.0,
+            "82": 2195420160.0,
+            "83": 2195420160.0,
+            "84": 2195420160.0,
+            "85": 2195420160.0,
+            "86": 2195420160.0,
+            "87": 2195420160.0,
+            "88": 2195420160.0,
+            "89": 2195420160.0,
+            "90": 2195420160.0,
+            "91": 2195420160.0,
+            "92": 2195420160.0,
+            "93": 2195420160.0,
+            "94": 2195420160.0,
+            "95": 2195420160.0,
+            "96": 2195420160.0,
+            "97": 2195420160.0,
+            "98": 2195420160.0,
+            "99": 2195420160.0,
+            "100": 2195420160.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 8.03051,
+            "2": 0.1884,
+            "3": 0.27987,
+            "4": 0.15687,
+            "5": 0.15614,
+            "6": 0.1563,
+            "7": 0.15622,
+            "8": 0.1563,
+            "9": 0.15473,
+            "10": 0.15615,
+            "11": 0.15528,
+            "12": 0.15754,
+            "13": 0.15661,
+            "14": 0.15677,
+            "15": 0.15572,
+            "16": 0.15582,
+            "17": 0.1571,
+            "18": 0.15664,
+            "19": 0.1556,
+            "20": 0.15592,
+            "21": 0.15572,
+            "22": 0.15642,
+            "23": 0.15643,
+            "24": 0.15743,
+            "25": 0.15666,
+            "26": 0.15552,
+            "27": 0.15475,
+            "28": 0.15586,
+            "29": 0.15603,
+            "30": 0.15463,
+            "31": 0.15712,
+            "32": 0.15442,
+            "33": 0.15543,
+            "34": 0.1557,
+            "35": 0.15682,
+            "36": 0.15539,
+            "37": 0.1553,
+            "38": 0.15578,
+            "39": 0.15667,
+            "40": 0.15715,
+            "41": 0.15704,
+            "42": 0.1566,
+            "43": 0.15655,
+            "44": 0.15629,
+            "45": 0.15584,
+            "46": 0.15734,
+            "47": 0.15735,
+            "48": 0.15572,
+            "49": 0.15706,
+            "50": 0.15561,
+            "51": 0.16957,
+            "52": 0.1587,
+            "53": 0.16014,
+            "54": 0.15805,
+            "55": 0.1578,
+            "56": 0.15801,
+            "57": 0.15813,
+            "58": 0.1574,
+            "59": 0.15781,
+            "60": 0.15923,
+            "61": 0.15655,
+            "62": 0.15633,
+            "63": 0.15583,
+            "64": 0.15734,
+            "65": 0.15761,
+            "66": 0.15822,
+            "67": 0.15755,
+            "68": 0.15815,
+            "69": 0.15816,
+            "70": 0.15813,
+            "71": 0.15747,
+            "72": 0.1574,
+            "73": 0.15783,
+            "74": 0.15766,
+            "75": 0.15527,
+            "76": 0.15579,
+            "77": 0.15483,
+            "78": 0.15482,
+            "79": 0.15404,
+            "80": 0.15431,
+            "81": 0.154,
+            "82": 0.15512,
+            "83": 0.15513,
+            "84": 0.15371,
+            "85": 0.15488,
+            "86": 0.15465,
+            "87": 0.15412,
+            "88": 0.15403,
+            "89": 0.15487,
+            "90": 0.15518,
+            "91": 0.15549,
+            "92": 0.154,
+            "93": 0.15405,
+            "94": 0.15438,
+            "95": 0.15444,
+            "96": 0.1534,
+            "97": 0.15487,
+            "98": 0.15398,
+            "99": 0.15434,
+            "100": 0.15391
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_lts_dgx_a100.json
index 11578ee6c3e..307cec2659c 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_lts_dgx_a100.json
@@ -2,141 +2,536 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 10.90105,
-            "5": 10.89686,
-            "10": 10.88269,
-            "15": 10.83975,
-            "20": 10.74037,
-            "25": 10.57931,
-            "30": 10.37738,
-            "35": 10.29033,
-            "40": 10.1156,
-            "45": 9.85639,
-            "50": 9.93378,
-            "55": 9.87553,
-            "60": 9.528,
-            "65": 8.95041,
-            "70": 9.76634,
-            "75": 9.4304,
-            "80": 9.40916,
-            "85": 9.63365,
-            "90": 9.8516,
-            "95": 9.51874,
-            "100": 9.43257
+            "1": 10.90314,
+            "2": 10.88897,
+            "3": 10.89725,
+            "4": 10.88248,
+            "5": 10.89447,
+            "6": 10.91226,
+            "7": 10.89775,
+            "8": 10.88643,
+            "9": 10.89227,
+            "10": 10.8849,
+            "11": 10.91413,
+            "12": 10.88666,
+            "13": 10.89283,
+            "14": 10.90486,
+            "15": 10.83878,
+            "16": 10.84927,
+            "17": 10.83248,
+            "18": 10.83371,
+            "19": 10.83359,
+            "20": 10.74074,
+            "21": 10.7043,
+            "22": 10.59893,
+            "23": 10.72222,
+            "24": 10.60745,
+            "25": 10.57467,
+            "26": 10.62999,
+            "27": 10.62364,
+            "28": 10.57229,
+            "29": 10.6073,
+            "30": 10.37766,
+            "31": 10.15362,
+            "32": 10.47609,
+            "33": 10.48062,
+            "34": 10.24216,
+            "35": 10.29035,
+            "36": 10.25955,
+            "37": 10.36145,
+            "38": 10.21396,
+            "39": 10.44502,
+            "40": 10.11492,
+            "41": 10.1605,
+            "42": 10.23468,
+            "43": 9.85032,
+            "44": 9.97764,
+            "45": 9.85681,
+            "46": 9.8307,
+            "47": 10.17976,
+            "48": 9.85811,
+            "49": 9.54378,
+            "50": 9.93469,
+            "51": 9.86793,
+            "52": 9.76274,
+            "53": 10.10895,
+            "54": 9.95538,
+            "55": 9.8756,
+            "56": 9.64751,
+            "57": 9.48989,
+            "58": 9.85502,
+            "59": 9.59457,
+            "60": 9.52968,
+            "61": 9.69589,
+            "62": 10.01676,
+            "63": 9.38778,
+            "64": 9.80211,
+            "65": 8.95119,
+            "66": 9.72857,
+            "67": 9.37577,
+            "68": 9.80463,
+            "69": 9.81,
+            "70": 9.7662,
+            "71": 9.63135,
+            "72": 9.5784,
+            "73": 9.52148,
+            "74": 8.94976,
+            "75": 9.43087,
+            "76": 9.08489,
+            "77": 10.089,
+            "78": 9.72754,
+            "79": 9.37612,
+            "80": 9.40849,
+            "81": 9.49766,
+            "82": 9.71298,
+            "83": 9.33332,
+            "84": 9.43928,
+            "85": 9.63373,
+            "86": 9.07038,
+            "87": 9.61245,
+            "88": 9.78304,
+            "89": 9.60878,
+            "90": 9.85164,
+            "91": 9.34542,
+            "92": 9.38281,
+            "93": 9.07319,
+            "94": 8.81684,
+            "95": 9.51809,
+            "96": 9.54033,
+            "97": 9.34061,
+            "98": 9.70134,
+            "99": 8.88786,
+            "100": 9.43285
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 22727086.0,
-            "5": 22714736.0,
-            "10": 22918912.0,
-            "15": 22821334.0,
-            "20": 22694008.0,
-            "25": 22818912.0,
-            "30": 22631266.0,
-            "35": 22787944.0,
-            "40": 22658196.0,
-            "45": 22675296.0,
-            "50": 22904808.0,
-            "55": 22518754.0,
-            "60": 22743504.0,
-            "65": 23060460.0,
-            "70": 22829772.0,
-            "75": 23054048.0,
-            "80": 22706308.0,
-            "85": 22712054.0,
-            "90": 22972128.0,
-            "95": 23048144.0,
-            "100": 23015824.0
+            "1": 22727000.0,
+            "2": 22925616.0,
+            "3": 22596924.0,
+            "4": 23219540.0,
+            "5": 22714814.0,
+            "6": 23021786.0,
+            "7": 22771458.0,
+            "8": 22926012.0,
+            "9": 22842856.0,
+            "10": 22918360.0,
+            "11": 22500702.0,
+            "12": 22459866.0,
+            "13": 22916820.0,
+            "14": 22389026.0,
+            "15": 22821260.0,
+            "16": 22830812.0,
+            "17": 22818944.0,
+            "18": 22582240.0,
+            "19": 22618380.0,
+            "20": 22694352.0,
+            "21": 22740180.0,
+            "22": 22800024.0,
+            "23": 22540132.0,
+            "24": 22771492.0,
+            "25": 22818970.0,
+            "26": 22546852.0,
+            "27": 22468504.0,
+            "28": 22453052.0,
+            "29": 22529222.0,
+            "30": 22631432.0,
+            "31": 22955696.0,
+            "32": 22585238.0,
+            "33": 22557676.0,
+            "34": 22835412.0,
+            "35": 22788032.0,
+            "36": 22589678.0,
+            "37": 22497140.0,
+            "38": 22896132.0,
+            "39": 22801314.0,
+            "40": 22658064.0,
+            "41": 22659700.0,
+            "42": 22667816.0,
+            "43": 22976356.0,
+            "44": 22746708.0,
+            "45": 22675272.0,
+            "46": 22884382.0,
+            "47": 22634556.0,
+            "48": 22928080.0,
+            "49": 22727538.0,
+            "50": 22905284.0,
+            "51": 22791326.0,
+            "52": 22749392.0,
+            "53": 22925970.0,
+            "54": 22839434.0,
+            "55": 22518416.0,
+            "56": 22877660.0,
+            "57": 23113304.0,
+            "58": 22845008.0,
+            "59": 22715512.0,
+            "60": 22743058.0,
+            "61": 22723950.0,
+            "62": 22673248.0,
+            "63": 22846074.0,
+            "64": 22823228.0,
+            "65": 23060212.0,
+            "66": 22729902.0,
+            "67": 22907278.0,
+            "68": 22610092.0,
+            "69": 22584360.0,
+            "70": 22829348.0,
+            "71": 22749420.0,
+            "72": 22655446.0,
+            "73": 22740974.0,
+            "74": 23048296.0,
+            "75": 23053922.0,
+            "76": 22901008.0,
+            "77": 22272806.0,
+            "78": 22789370.0,
+            "79": 22743288.0,
+            "80": 22706236.0,
+            "81": 22890976.0,
+            "82": 22777092.0,
+            "83": 22839240.0,
+            "84": 23010352.0,
+            "85": 22712004.0,
+            "86": 23103740.0,
+            "87": 22734788.0,
+            "88": 22637620.0,
+            "89": 22499200.0,
+            "90": 22972420.0,
+            "91": 22766428.0,
+            "92": 22808890.0,
+            "93": 22659888.0,
+            "94": 22910970.0,
+            "95": 23048514.0,
+            "96": 22829470.0,
+            "97": 22608826.0,
+            "98": 22763528.0,
+            "99": 22905754.0,
+            "100": 23016268.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 717082624.0,
-            "5": 717082624.0,
-            "10": 717082624.0,
-            "15": 717082624.0,
-            "20": 717082624.0,
-            "25": 717082624.0,
-            "30": 717082624.0,
-            "35": 717082624.0,
-            "40": 717082624.0,
-            "45": 717082624.0,
-            "50": 717082624.0,
-            "55": 717082624.0,
-            "60": 717082624.0,
-            "65": 717082624.0,
-            "70": 717082624.0,
-            "75": 717082624.0,
-            "80": 717082624.0,
-            "85": 717082624.0,
-            "90": 717082624.0,
-            "95": 717082624.0,
-            "100": 717082624.0
+            "1": 719180288.0,
+            "2": 719180288.0,
+            "3": 719180288.0,
+            "4": 719180288.0,
+            "5": 719180288.0,
+            "6": 719180288.0,
+            "7": 719180288.0,
+            "8": 719180288.0,
+            "9": 719180288.0,
+            "10": 719180288.0,
+            "11": 719180288.0,
+            "12": 719180288.0,
+            "13": 719180288.0,
+            "14": 719180288.0,
+            "15": 719180288.0,
+            "16": 719180288.0,
+            "17": 719180288.0,
+            "18": 719180288.0,
+            "19": 719180288.0,
+            "20": 719180288.0,
+            "21": 719180288.0,
+            "22": 719180288.0,
+            "23": 719180288.0,
+            "24": 719180288.0,
+            "25": 719180288.0,
+            "26": 719180288.0,
+            "27": 719180288.0,
+            "28": 719180288.0,
+            "29": 719180288.0,
+            "30": 719180288.0,
+            "31": 719180288.0,
+            "32": 719180288.0,
+            "33": 719180288.0,
+            "34": 719180288.0,
+            "35": 719180288.0,
+            "36": 719180288.0,
+            "37": 719180288.0,
+            "38": 719180288.0,
+            "39": 719180288.0,
+            "40": 719180288.0,
+            "41": 719180288.0,
+            "42": 719180288.0,
+            "43": 719180288.0,
+            "44": 719180288.0,
+            "45": 719180288.0,
+            "46": 719180288.0,
+            "47": 719180288.0,
+            "48": 719180288.0,
+            "49": 719180288.0,
+            "50": 719180288.0,
+            "51": 719180288.0,
+            "52": 719180288.0,
+            "53": 719180288.0,
+            "54": 719180288.0,
+            "55": 719180288.0,
+            "56": 719180288.0,
+            "57": 719180288.0,
+            "58": 719180288.0,
+            "59": 719180288.0,
+            "60": 719180288.0,
+            "61": 719180288.0,
+            "62": 719180288.0,
+            "63": 719180288.0,
+            "64": 719180288.0,
+            "65": 719180288.0,
+            "66": 719180288.0,
+            "67": 719180288.0,
+            "68": 719180288.0,
+            "69": 719180288.0,
+            "70": 719180288.0,
+            "71": 719180288.0,
+            "72": 719180288.0,
+            "73": 719180288.0,
+            "74": 719180288.0,
+            "75": 719180288.0,
+            "76": 719180288.0,
+            "77": 719180288.0,
+            "78": 719180288.0,
+            "79": 719180288.0,
+            "80": 719180288.0,
+            "81": 719180288.0,
+            "82": 719180288.0,
+            "83": 719180288.0,
+            "84": 719180288.0,
+            "85": 719180288.0,
+            "86": 719180288.0,
+            "87": 719180288.0,
+            "88": 719180288.0,
+            "89": 719180288.0,
+            "90": 719180288.0,
+            "91": 719180288.0,
+            "92": 719180288.0,
+            "93": 719180288.0,
+            "94": 719180288.0,
+            "95": 719180288.0,
+            "96": 719180288.0,
+            "97": 719180288.0,
+            "98": 719180288.0,
+            "99": 719180288.0,
+            "100": 719180288.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 2399852544.0,
-            "5": 2683661312.0,
-            "10": 2683661312.0,
-            "15": 2683661312.0,
-            "20": 2683661312.0,
-            "25": 2683661312.0,
-            "30": 2683661312.0,
-            "35": 2683661312.0,
-            "40": 2683661312.0,
-            "45": 2683661312.0,
-            "50": 2683661312.0,
-            "55": 2683661312.0,
-            "60": 2683661312.0,
-            "65": 2683661312.0,
-            "70": 2683661312.0,
-            "75": 2683661312.0,
-            "80": 2683661312.0,
-            "85": 2683661312.0,
-            "90": 2683661312.0,
-            "95": 2683661312.0,
-            "100": 2683661312.0
+            "1": 1910562816.0,
+            "2": 2195420160.0,
+            "3": 2195420160.0,
+            "4": 2195420160.0,
+            "5": 2195420160.0,
+            "6": 2195420160.0,
+            "7": 2195420160.0,
+            "8": 2195420160.0,
+            "9": 2195420160.0,
+            "10": 2195420160.0,
+            "11": 2195420160.0,
+            "12": 2195420160.0,
+            "13": 2195420160.0,
+            "14": 2195420160.0,
+            "15": 2195420160.0,
+            "16": 2195420160.0,
+            "17": 2195420160.0,
+            "18": 2195420160.0,
+            "19": 2195420160.0,
+            "20": 2195420160.0,
+            "21": 2195420160.0,
+            "22": 2195420160.0,
+            "23": 2195420160.0,
+            "24": 2195420160.0,
+            "25": 2195420160.0,
+            "26": 2195420160.0,
+            "27": 2195420160.0,
+            "28": 2195420160.0,
+            "29": 2195420160.0,
+            "30": 2195420160.0,
+            "31": 2195420160.0,
+            "32": 2195420160.0,
+            "33": 2195420160.0,
+            "34": 2195420160.0,
+            "35": 2195420160.0,
+            "36": 2195420160.0,
+            "37": 2195420160.0,
+            "38": 2195420160.0,
+            "39": 2195420160.0,
+            "40": 2195420160.0,
+            "41": 2195420160.0,
+            "42": 2195420160.0,
+            "43": 2195420160.0,
+            "44": 2195420160.0,
+            "45": 2195420160.0,
+            "46": 2195420160.0,
+            "47": 2195420160.0,
+            "48": 2195420160.0,
+            "49": 2195420160.0,
+            "50": 2195420160.0,
+            "51": 2195420160.0,
+            "52": 2195420160.0,
+            "53": 2195420160.0,
+            "54": 2195420160.0,
+            "55": 2195420160.0,
+            "56": 2195420160.0,
+            "57": 2195420160.0,
+            "58": 2195420160.0,
+            "59": 2195420160.0,
+            "60": 2195420160.0,
+            "61": 2195420160.0,
+            "62": 2195420160.0,
+            "63": 2195420160.0,
+            "64": 2195420160.0,
+            "65": 2195420160.0,
+            "66": 2195420160.0,
+            "67": 2195420160.0,
+            "68": 2195420160.0,
+            "69": 2195420160.0,
+            "70": 2195420160.0,
+            "71": 2195420160.0,
+            "72": 2195420160.0,
+            "73": 2195420160.0,
+            "74": 2195420160.0,
+            "75": 2195420160.0,
+            "76": 2195420160.0,
+            "77": 2195420160.0,
+            "78": 2195420160.0,
+            "79": 2195420160.0,
+            "80": 2195420160.0,
+            "81": 2195420160.0,
+            "82": 2195420160.0,
+            "83": 2195420160.0,
+            "84": 2195420160.0,
+            "85": 2195420160.0,
+            "86": 2195420160.0,
+            "87": 2195420160.0,
+            "88": 2195420160.0,
+            "89": 2195420160.0,
+            "90": 2195420160.0,
+            "91": 2195420160.0,
+            "92": 2195420160.0,
+            "93": 2195420160.0,
+            "94": 2195420160.0,
+            "95": 2195420160.0,
+            "96": 2195420160.0,
+            "97": 2195420160.0,
+            "98": 2195420160.0,
+            "99": 2195420160.0,
+            "100": 2195420160.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 18.12809,
-            "5": 0.18057,
-            "10": 0.17706,
-            "15": 0.17756,
-            "20": 0.17731,
-            "25": 0.17728,
-            "30": 0.1768,
-            "35": 0.17506,
-            "40": 0.1753,
-            "45": 0.17703,
-            "50": 0.17481,
-            "55": 0.17327,
-            "60": 0.17184,
-            "65": 0.17221,
-            "70": 0.17198,
-            "75": 0.1719,
-            "80": 0.17197,
-            "85": 0.1726,
-            "90": 0.17157,
-            "95": 0.17135,
-            "100": 0.17214
+            "1": 10.1537,
+            "2": 0.18498,
+            "3": 0.16024,
+            "4": 0.16059,
+            "5": 0.16002,
+            "6": 0.16103,
+            "7": 0.1591,
+            "8": 0.15912,
+            "9": 0.15909,
+            "10": 0.1574,
+            "11": 0.15721,
+            "12": 0.15764,
+            "13": 0.16009,
+            "14": 0.16035,
+            "15": 0.15973,
+            "16": 0.15641,
+            "17": 0.15673,
+            "18": 0.1565,
+            "19": 0.15684,
+            "20": 0.15713,
+            "21": 0.15762,
+            "22": 0.15859,
+            "23": 0.15877,
+            "24": 0.15973,
+            "25": 0.15946,
+            "26": 0.15909,
+            "27": 0.15855,
+            "28": 0.15876,
+            "29": 0.15921,
+            "30": 0.16148,
+            "31": 0.15991,
+            "32": 0.1576,
+            "33": 0.15829,
+            "34": 0.15886,
+            "35": 0.15948,
+            "36": 0.15819,
+            "37": 0.15886,
+            "38": 0.15896,
+            "39": 0.16029,
+            "40": 0.15802,
+            "41": 0.16038,
+            "42": 0.15965,
+            "43": 0.15985,
+            "44": 0.15882,
+            "45": 0.16056,
+            "46": 0.1592,
+            "47": 0.20747,
+            "48": 0.16124,
+            "49": 0.16012,
+            "50": 0.15759,
+            "51": 0.16615,
+            "52": 0.15685,
+            "53": 0.15965,
+            "54": 0.15787,
+            "55": 0.15762,
+            "56": 0.15748,
+            "57": 0.15807,
+            "58": 0.15831,
+            "59": 0.15671,
+            "60": 0.15765,
+            "61": 0.15997,
+            "62": 0.15756,
+            "63": 0.15822,
+            "64": 0.15898,
+            "65": 0.15778,
+            "66": 0.15853,
+            "67": 0.15855,
+            "68": 0.15784,
+            "69": 0.15777,
+            "70": 0.15791,
+            "71": 0.15907,
+            "72": 0.15986,
+            "73": 0.15727,
+            "74": 0.15842,
+            "75": 0.15738,
+            "76": 0.15786,
+            "77": 0.15749,
+            "78": 0.15761,
+            "79": 0.15838,
+            "80": 0.15955,
+            "81": 0.15796,
+            "82": 0.15816,
+            "83": 0.15953,
+            "84": 0.15849,
+            "85": 0.15905,
+            "86": 0.15852,
+            "87": 0.15827,
+            "88": 0.15773,
+            "89": 0.15778,
+            "90": 0.15679,
+            "91": 0.1583,
+            "92": 0.15749,
+            "93": 0.15843,
+            "94": 0.15878,
+            "95": 0.15805,
+            "96": 0.1588,
+            "97": 0.15983,
+            "98": 0.16098,
+            "99": 0.16131,
+            "100": 0.15935
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_lts.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_lts.json
new file mode 100644
index 00000000000..eee2d1c3d29
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_lts.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.81478,
+            "2": 10.82042,
+            "3": 10.81232,
+            "4": 10.78901,
+            "5": 10.85168,
+            "6": 10.87098,
+            "7": 10.83086,
+            "8": 10.83472,
+            "9": 10.83886,
+            "10": 10.78749,
+            "11": 10.87935,
+            "12": 10.86017,
+            "13": 10.86578,
+            "14": 10.8781,
+            "15": 10.79513,
+            "16": 10.7958,
+            "17": 10.76832,
+            "18": 10.8109,
+            "19": 10.79887,
+            "20": 10.69131,
+            "21": 10.6801,
+            "22": 10.52152,
+            "23": 10.7071,
+            "24": 10.57678,
+            "25": 10.52316,
+            "26": 10.59563,
+            "27": 10.58607,
+            "28": 10.56175,
+            "29": 10.56945,
+            "30": 10.34623,
+            "31": 10.10035,
+            "32": 10.45432,
+            "33": 10.44591,
+            "34": 10.2072,
+            "35": 10.26185,
+            "36": 10.21228,
+            "37": 10.32449,
+            "38": 10.16803,
+            "39": 10.38353,
+            "40": 10.07219,
+            "41": 10.13754,
+            "42": 10.19755,
+            "43": 9.81134,
+            "44": 9.93287,
+            "45": 9.80998,
+            "46": 9.80859,
+            "47": 10.12583,
+            "48": 9.82132,
+            "49": 9.50738,
+            "50": 9.88351
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1544.0,
+            "2": 1712.0,
+            "3": 1660.0,
+            "4": 1767.0,
+            "5": 1795.0,
+            "6": 1799.0,
+            "7": 1805.0,
+            "8": 1664.0,
+            "9": 1769.0,
+            "10": 1340.0,
+            "11": 1830.0,
+            "12": 1812.0,
+            "13": 1837.0,
+            "14": 1710.0,
+            "15": 1894.0,
+            "16": 1731.0,
+            "17": 1848.0,
+            "18": 1705.0,
+            "19": 1686.0,
+            "20": 1620.0,
+            "21": 1840.0,
+            "22": 1764.0,
+            "23": 1937.0,
+            "24": 1620.0,
+            "25": 1728.0,
+            "26": 1727.0,
+            "27": 1821.0,
+            "28": 2042.0,
+            "29": 2029.0,
+            "30": 1825.0,
+            "31": 1594.0,
+            "32": 1903.0,
+            "33": 2041.0,
+            "34": 1895.0,
+            "35": 1908.0,
+            "36": 1906.0,
+            "37": 2224.0,
+            "38": 2150.0,
+            "39": 2327.0,
+            "40": 2074.0,
+            "41": 2314.0,
+            "42": 2230.0,
+            "43": 1920.0,
+            "44": 2115.0,
+            "45": 1962.0,
+            "46": 2287.0,
+            "47": 2481.0,
+            "48": 2407.0,
+            "49": 2270.0,
+            "50": 2349.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 730321408.0,
+            "2": 730321408.0,
+            "3": 730321408.0,
+            "4": 730321408.0,
+            "5": 730321408.0,
+            "6": 730321408.0,
+            "7": 730321408.0,
+            "8": 730321408.0,
+            "9": 730321408.0,
+            "10": 730321408.0,
+            "11": 730321408.0,
+            "12": 730321408.0,
+            "13": 730321408.0,
+            "14": 730321408.0,
+            "15": 730321408.0,
+            "16": 730321408.0,
+            "17": 730321408.0,
+            "18": 730321408.0,
+            "19": 730321408.0,
+            "20": 730321408.0,
+            "21": 730321408.0,
+            "22": 730321408.0,
+            "23": 730321408.0,
+            "24": 730321408.0,
+            "25": 730321408.0,
+            "26": 730321408.0,
+            "27": 730321408.0,
+            "28": 730321408.0,
+            "29": 730321408.0,
+            "30": 730321408.0,
+            "31": 730321408.0,
+            "32": 730321408.0,
+            "33": 730321408.0,
+            "34": 730321408.0,
+            "35": 730321408.0,
+            "36": 730321408.0,
+            "37": 730321408.0,
+            "38": 730321408.0,
+            "39": 730321408.0,
+            "40": 730321408.0,
+            "41": 730321408.0,
+            "42": 730321408.0,
+            "43": 730321408.0,
+            "44": 730321408.0,
+            "45": 730321408.0,
+            "46": 730321408.0,
+            "47": 730321408.0,
+            "48": 730321408.0,
+            "49": 730321408.0,
+            "50": 730321408.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 3837453824.0,
+            "2": 4119165440.0,
+            "3": 4119165440.0,
+            "4": 4119165440.0,
+            "5": 4119165440.0,
+            "6": 4119165440.0,
+            "7": 4119165440.0,
+            "8": 4119165440.0,
+            "9": 4119165440.0,
+            "10": 4119165440.0,
+            "11": 4119165440.0,
+            "12": 4119165440.0,
+            "13": 4119165440.0,
+            "14": 4119165440.0,
+            "15": 4119165440.0,
+            "16": 4119165440.0,
+            "17": 4119165440.0,
+            "18": 4119165440.0,
+            "19": 4119165440.0,
+            "20": 4119165440.0,
+            "21": 4119165440.0,
+            "22": 4119165440.0,
+            "23": 4119165440.0,
+            "24": 4119165440.0,
+            "25": 4119165440.0,
+            "26": 4119165440.0,
+            "27": 4119165440.0,
+            "28": 4119165440.0,
+            "29": 4119165440.0,
+            "30": 4119165440.0,
+            "31": 4119165440.0,
+            "32": 4119165440.0,
+            "33": 4119165440.0,
+            "34": 4119165440.0,
+            "35": 4119165440.0,
+            "36": 4119165440.0,
+            "37": 4119165440.0,
+            "38": 4119165440.0,
+            "39": 4119165440.0,
+            "40": 4119165440.0,
+            "41": 4119165440.0,
+            "42": 4119165440.0,
+            "43": 4119165440.0,
+            "44": 4119165440.0,
+            "45": 4119165440.0,
+            "46": 4119165440.0,
+            "47": 4119165440.0,
+            "48": 4119165440.0,
+            "49": 4119165440.0,
+            "50": 4119165440.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 12.00614,
+            "2": 0.18804,
+            "3": 0.28065,
+            "4": 0.14064,
+            "5": 0.1388,
+            "6": 0.13824,
+            "7": 0.13862,
+            "8": 0.13843,
+            "9": 0.14064,
+            "10": 0.13568,
+            "11": 0.13353,
+            "12": 0.13327,
+            "13": 0.13418,
+            "14": 0.13368,
+            "15": 0.13399,
+            "16": 0.13326,
+            "17": 0.13409,
+            "18": 0.13281,
+            "19": 0.13303,
+            "20": 0.13395,
+            "21": 0.13357,
+            "22": 0.13388,
+            "23": 0.13403,
+            "24": 0.1333,
+            "25": 0.13242,
+            "26": 0.13302,
+            "27": 0.134,
+            "28": 0.13304,
+            "29": 0.13302,
+            "30": 0.13398,
+            "31": 0.13424,
+            "32": 0.13315,
+            "33": 0.13365,
+            "34": 0.13391,
+            "35": 0.13392,
+            "36": 0.13316,
+            "37": 0.13254,
+            "38": 0.13292,
+            "39": 0.1333,
+            "40": 0.13401,
+            "41": 0.13408,
+            "42": 0.13349,
+            "43": 0.13469,
+            "44": 0.13282,
+            "45": 0.1344,
+            "46": 0.13536,
+            "47": 0.13291,
+            "48": 0.13374,
+            "49": 0.13338,
+            "50": 0.13336
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_lts_dgx_a100.json
index 8faccf70250..4bf73c8b005 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_lts_dgx_a100.json
@@ -1 +1,287 @@
-{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.82005, "5": 10.85284, "10": 10.78455, "15": 10.79229, "20": 10.69211, "25": 10.52412, "30": 10.34552, "35": 10.26239, "40": 10.07241, "45": 9.81101, "50": 9.88422}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1559.0, "5": 1840.0, "10": 1380.0, "15": 1848.0, "20": 1601.0, "25": 1635.0, "30": 1936.0, "35": 1908.0, "40": 2100.0, "45": 2098.0, "50": 2333.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 732811264.0, "5": 732811264.0, "10": 732811264.0, "15": 732811264.0, "20": 732811264.0, "25": 732811264.0, "30": 732811264.0, "35": 732811264.0, "40": 732811264.0, "45": 732811264.0, "50": 732811264.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 3838895104.0, "5": 4122703872.0, "10": 4122703872.0, "15": 4122703872.0, "20": 4122703872.0, "25": 4122703872.0, "30": 4122703872.0, "35": 4122703872.0, "40": 4122703872.0, "45": 4122703872.0, "50": 4122703872.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 14.9121, "5": 0.1731, "10": 0.17256, "15": 0.1722, "20": 0.17555, "25": 0.17245, "30": 0.17067, "35": 0.17091, "40": 0.17274, "45": 0.17151, "50": 0.17108}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.81478,
+            "2": 10.82042,
+            "3": 10.81232,
+            "4": 10.78901,
+            "5": 10.85168,
+            "6": 10.87098,
+            "7": 10.83086,
+            "8": 10.83472,
+            "9": 10.83886,
+            "10": 10.78749,
+            "11": 10.87935,
+            "12": 10.86017,
+            "13": 10.86578,
+            "14": 10.8781,
+            "15": 10.79513,
+            "16": 10.7958,
+            "17": 10.76832,
+            "18": 10.8109,
+            "19": 10.79887,
+            "20": 10.69131,
+            "21": 10.6801,
+            "22": 10.52152,
+            "23": 10.7071,
+            "24": 10.57678,
+            "25": 10.52316,
+            "26": 10.59563,
+            "27": 10.58607,
+            "28": 10.56175,
+            "29": 10.56945,
+            "30": 10.34623,
+            "31": 10.10035,
+            "32": 10.45432,
+            "33": 10.44591,
+            "34": 10.2072,
+            "35": 10.26185,
+            "36": 10.21228,
+            "37": 10.32449,
+            "38": 10.16803,
+            "39": 10.38353,
+            "40": 10.07219,
+            "41": 10.13754,
+            "42": 10.19755,
+            "43": 9.81134,
+            "44": 9.93287,
+            "45": 9.80998,
+            "46": 9.80859,
+            "47": 10.12583,
+            "48": 9.82132,
+            "49": 9.50738,
+            "50": 9.88351
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1544.0,
+            "2": 1712.0,
+            "3": 1660.0,
+            "4": 1767.0,
+            "5": 1795.0,
+            "6": 1799.0,
+            "7": 1805.0,
+            "8": 1664.0,
+            "9": 1769.0,
+            "10": 1340.0,
+            "11": 1830.0,
+            "12": 1812.0,
+            "13": 1837.0,
+            "14": 1710.0,
+            "15": 1894.0,
+            "16": 1731.0,
+            "17": 1848.0,
+            "18": 1705.0,
+            "19": 1686.0,
+            "20": 1620.0,
+            "21": 1840.0,
+            "22": 1764.0,
+            "23": 1937.0,
+            "24": 1620.0,
+            "25": 1728.0,
+            "26": 1727.0,
+            "27": 1821.0,
+            "28": 2042.0,
+            "29": 2029.0,
+            "30": 1825.0,
+            "31": 1594.0,
+            "32": 1903.0,
+            "33": 2041.0,
+            "34": 1895.0,
+            "35": 1908.0,
+            "36": 1906.0,
+            "37": 2224.0,
+            "38": 2150.0,
+            "39": 2327.0,
+            "40": 2074.0,
+            "41": 2314.0,
+            "42": 2230.0,
+            "43": 1920.0,
+            "44": 2115.0,
+            "45": 1962.0,
+            "46": 2287.0,
+            "47": 2481.0,
+            "48": 2407.0,
+            "49": 2270.0,
+            "50": 2349.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 730321408.0,
+            "2": 730321408.0,
+            "3": 730321408.0,
+            "4": 730321408.0,
+            "5": 730321408.0,
+            "6": 730321408.0,
+            "7": 730321408.0,
+            "8": 730321408.0,
+            "9": 730321408.0,
+            "10": 730321408.0,
+            "11": 730321408.0,
+            "12": 730321408.0,
+            "13": 730321408.0,
+            "14": 730321408.0,
+            "15": 730321408.0,
+            "16": 730321408.0,
+            "17": 730321408.0,
+            "18": 730321408.0,
+            "19": 730321408.0,
+            "20": 730321408.0,
+            "21": 730321408.0,
+            "22": 730321408.0,
+            "23": 730321408.0,
+            "24": 730321408.0,
+            "25": 730321408.0,
+            "26": 730321408.0,
+            "27": 730321408.0,
+            "28": 730321408.0,
+            "29": 730321408.0,
+            "30": 730321408.0,
+            "31": 730321408.0,
+            "32": 730321408.0,
+            "33": 730321408.0,
+            "34": 730321408.0,
+            "35": 730321408.0,
+            "36": 730321408.0,
+            "37": 730321408.0,
+            "38": 730321408.0,
+            "39": 730321408.0,
+            "40": 730321408.0,
+            "41": 730321408.0,
+            "42": 730321408.0,
+            "43": 730321408.0,
+            "44": 730321408.0,
+            "45": 730321408.0,
+            "46": 730321408.0,
+            "47": 730321408.0,
+            "48": 730321408.0,
+            "49": 730321408.0,
+            "50": 730321408.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 3837453824.0,
+            "2": 4119165440.0,
+            "3": 4119165440.0,
+            "4": 4119165440.0,
+            "5": 4119165440.0,
+            "6": 4119165440.0,
+            "7": 4119165440.0,
+            "8": 4119165440.0,
+            "9": 4119165440.0,
+            "10": 4119165440.0,
+            "11": 4119165440.0,
+            "12": 4119165440.0,
+            "13": 4119165440.0,
+            "14": 4119165440.0,
+            "15": 4119165440.0,
+            "16": 4119165440.0,
+            "17": 4119165440.0,
+            "18": 4119165440.0,
+            "19": 4119165440.0,
+            "20": 4119165440.0,
+            "21": 4119165440.0,
+            "22": 4119165440.0,
+            "23": 4119165440.0,
+            "24": 4119165440.0,
+            "25": 4119165440.0,
+            "26": 4119165440.0,
+            "27": 4119165440.0,
+            "28": 4119165440.0,
+            "29": 4119165440.0,
+            "30": 4119165440.0,
+            "31": 4119165440.0,
+            "32": 4119165440.0,
+            "33": 4119165440.0,
+            "34": 4119165440.0,
+            "35": 4119165440.0,
+            "36": 4119165440.0,
+            "37": 4119165440.0,
+            "38": 4119165440.0,
+            "39": 4119165440.0,
+            "40": 4119165440.0,
+            "41": 4119165440.0,
+            "42": 4119165440.0,
+            "43": 4119165440.0,
+            "44": 4119165440.0,
+            "45": 4119165440.0,
+            "46": 4119165440.0,
+            "47": 4119165440.0,
+            "48": 4119165440.0,
+            "49": 4119165440.0,
+            "50": 4119165440.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 9.25479,
+            "2": 0.18004,
+            "3": 0.15444,
+            "4": 0.15284,
+            "5": 0.15391,
+            "6": 0.14333,
+            "7": 0.14244,
+            "8": 0.13997,
+            "9": 0.14112,
+            "10": 0.13863,
+            "11": 0.13707,
+            "12": 0.13575,
+            "13": 0.13558,
+            "14": 0.13535,
+            "15": 0.13556,
+            "16": 0.13648,
+            "17": 0.13495,
+            "18": 0.1343,
+            "19": 0.13442,
+            "20": 0.13441,
+            "21": 0.1344,
+            "22": 0.13478,
+            "23": 0.13473,
+            "24": 0.13476,
+            "25": 0.13536,
+            "26": 0.13345,
+            "27": 0.1342,
+            "28": 0.13421,
+            "29": 0.13479,
+            "30": 0.13378,
+            "31": 0.13418,
+            "32": 0.13411,
+            "33": 0.13351,
+            "34": 0.13374,
+            "35": 0.13406,
+            "36": 0.13396,
+            "37": 0.13435,
+            "38": 0.13356,
+            "39": 0.13367,
+            "40": 0.13361,
+            "41": 0.13454,
+            "42": 0.13463,
+            "43": 0.13524,
+            "44": 0.13356,
+            "45": 0.13403,
+            "46": 0.1347,
+            "47": 0.13379,
+            "48": 0.1343,
+            "49": 0.13391,
+            "50": 0.13371
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_lts.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_lts.json
new file mode 100644
index 00000000000..1553ecdd445
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_lts.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.81478,
+            "2": 10.82042,
+            "3": 10.81232,
+            "4": 10.78901,
+            "5": 10.85168,
+            "6": 10.87098,
+            "7": 10.83086,
+            "8": 10.83472,
+            "9": 10.83886,
+            "10": 10.78749,
+            "11": 10.87935,
+            "12": 10.86017,
+            "13": 10.86578,
+            "14": 10.8781,
+            "15": 10.79514,
+            "16": 10.79576,
+            "17": 10.76832,
+            "18": 10.81092,
+            "19": 10.79889,
+            "20": 10.69131,
+            "21": 10.68008,
+            "22": 10.52146,
+            "23": 10.70713,
+            "24": 10.57677,
+            "25": 10.52315,
+            "26": 10.59564,
+            "27": 10.5861,
+            "28": 10.56176,
+            "29": 10.56942,
+            "30": 10.34624,
+            "31": 10.10032,
+            "32": 10.45433,
+            "33": 10.44592,
+            "34": 10.20725,
+            "35": 10.26186,
+            "36": 10.21223,
+            "37": 10.32453,
+            "38": 10.16801,
+            "39": 10.38354,
+            "40": 10.07222,
+            "41": 10.13752,
+            "42": 10.19756,
+            "43": 9.81134,
+            "44": 9.93285,
+            "45": 9.81001,
+            "46": 9.80858,
+            "47": 10.12582,
+            "48": 9.82129,
+            "49": 9.50739,
+            "50": 9.88351
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1544.0,
+            "2": 1712.0,
+            "3": 1660.0,
+            "4": 1767.0,
+            "5": 1795.0,
+            "6": 1799.0,
+            "7": 1805.0,
+            "8": 1664.0,
+            "9": 1769.0,
+            "10": 1340.0,
+            "11": 1830.0,
+            "12": 1812.0,
+            "13": 1837.0,
+            "14": 1710.0,
+            "15": 1839.0,
+            "16": 1776.0,
+            "17": 1750.0,
+            "18": 1612.0,
+            "19": 1764.0,
+            "20": 1649.0,
+            "21": 1854.0,
+            "22": 1750.0,
+            "23": 1909.0,
+            "24": 1616.0,
+            "25": 1654.0,
+            "26": 1755.0,
+            "27": 1860.0,
+            "28": 2042.0,
+            "29": 1953.0,
+            "30": 1905.0,
+            "31": 1684.0,
+            "32": 1831.0,
+            "33": 2101.0,
+            "34": 1769.0,
+            "35": 1915.0,
+            "36": 1885.0,
+            "37": 2324.0,
+            "38": 2169.0,
+            "39": 2300.0,
+            "40": 2069.0,
+            "41": 2353.0,
+            "42": 2236.0,
+            "43": 1978.0,
+            "44": 2022.0,
+            "45": 2103.0,
+            "46": 2292.0,
+            "47": 2413.0,
+            "48": 2305.0,
+            "49": 2218.0,
+            "50": 2321.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 523371008.0,
+            "2": 523371008.0,
+            "3": 523371008.0,
+            "4": 523371008.0,
+            "5": 523371008.0,
+            "6": 523371008.0,
+            "7": 523371008.0,
+            "8": 523371008.0,
+            "9": 523371008.0,
+            "10": 523371008.0,
+            "11": 523371008.0,
+            "12": 523371008.0,
+            "13": 523371008.0,
+            "14": 523371008.0,
+            "15": 523371008.0,
+            "16": 523371008.0,
+            "17": 523371008.0,
+            "18": 523371008.0,
+            "19": 523371008.0,
+            "20": 523371008.0,
+            "21": 523371008.0,
+            "22": 523371008.0,
+            "23": 523371008.0,
+            "24": 523371008.0,
+            "25": 523371008.0,
+            "26": 523371008.0,
+            "27": 523371008.0,
+            "28": 523371008.0,
+            "29": 523371008.0,
+            "30": 523371008.0,
+            "31": 523371008.0,
+            "32": 523371008.0,
+            "33": 523371008.0,
+            "34": 523371008.0,
+            "35": 523371008.0,
+            "36": 523371008.0,
+            "37": 523371008.0,
+            "38": 523371008.0,
+            "39": 523371008.0,
+            "40": 523371008.0,
+            "41": 523371008.0,
+            "42": 523371008.0,
+            "43": 523371008.0,
+            "44": 523371008.0,
+            "45": 523371008.0,
+            "46": 523371008.0,
+            "47": 523371008.0,
+            "48": 523371008.0,
+            "49": 523371008.0,
+            "50": 523371008.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 3768846848.0,
+            "2": 3913263616.0,
+            "3": 3913263616.0,
+            "4": 3913263616.0,
+            "5": 3913263616.0,
+            "6": 3913263616.0,
+            "7": 3913263616.0,
+            "8": 3913263616.0,
+            "9": 3913263616.0,
+            "10": 3913263616.0,
+            "11": 3913263616.0,
+            "12": 3913263616.0,
+            "13": 3913263616.0,
+            "14": 3913263616.0,
+            "15": 3913263616.0,
+            "16": 3913263616.0,
+            "17": 3913263616.0,
+            "18": 3913263616.0,
+            "19": 3913263616.0,
+            "20": 3913263616.0,
+            "21": 3913263616.0,
+            "22": 3913263616.0,
+            "23": 3913263616.0,
+            "24": 3913263616.0,
+            "25": 3913263616.0,
+            "26": 3913263616.0,
+            "27": 3913263616.0,
+            "28": 3913263616.0,
+            "29": 3913263616.0,
+            "30": 3913263616.0,
+            "31": 3913263616.0,
+            "32": 3913263616.0,
+            "33": 3913263616.0,
+            "34": 3913263616.0,
+            "35": 3913263616.0,
+            "36": 3913263616.0,
+            "37": 3913263616.0,
+            "38": 3913263616.0,
+            "39": 3913263616.0,
+            "40": 3913263616.0,
+            "41": 3913263616.0,
+            "42": 3913263616.0,
+            "43": 3913263616.0,
+            "44": 3913263616.0,
+            "45": 3913263616.0,
+            "46": 3913263616.0,
+            "47": 3913263616.0,
+            "48": 3913263616.0,
+            "49": 3913263616.0,
+            "50": 3913263616.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 11.01582,
+            "2": 0.17662,
+            "3": 0.15135,
+            "4": 0.14999,
+            "5": 0.14829,
+            "6": 0.14621,
+            "7": 0.14816,
+            "8": 0.14731,
+            "9": 0.14766,
+            "10": 0.14515,
+            "11": 0.30054,
+            "12": 0.14534,
+            "13": 0.14429,
+            "14": 0.14592,
+            "15": 0.14632,
+            "16": 0.14618,
+            "17": 0.14537,
+            "18": 0.14666,
+            "19": 0.14384,
+            "20": 0.14453,
+            "21": 0.30388,
+            "22": 0.14466,
+            "23": 0.14511,
+            "24": 0.14435,
+            "25": 0.14401,
+            "26": 0.14328,
+            "27": 0.14376,
+            "28": 0.14434,
+            "29": 0.14386,
+            "30": 0.14418,
+            "31": 0.30313,
+            "32": 0.14394,
+            "33": 0.14406,
+            "34": 0.14377,
+            "35": 0.14417,
+            "36": 0.14415,
+            "37": 0.14393,
+            "38": 0.14577,
+            "39": 0.14494,
+            "40": 0.14489,
+            "41": 0.30235,
+            "42": 0.14494,
+            "43": 0.1472,
+            "44": 0.14577,
+            "45": 0.14497,
+            "46": 0.14619,
+            "47": 0.14474,
+            "48": 0.14551,
+            "49": 0.14554,
+            "50": 0.14507
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_lts_dgx_a100.json
index dc393d0dffc..35ef87a5085 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_lts_dgx_a100.json
@@ -1 +1,287 @@
-{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.82005, "5": 10.85284, "10": 10.78455, "15": 10.7923, "20": 10.69213, "25": 10.5241, "30": 10.34556, "35": 10.26241, "40": 10.07237, "45": 9.811, "50": 9.88419}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1559.0, "5": 1840.0, "10": 1380.0, "15": 1850.0, "20": 1699.0, "25": 1614.0, "30": 1905.0, "35": 1933.0, "40": 2169.0, "45": 2101.0, "50": 2421.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 523004928.0, "5": 523004928.0, "10": 523004928.0, "15": 523004928.0, "20": 523004928.0, "25": 523004928.0, "30": 523004928.0, "35": 523004928.0, "40": 523004928.0, "45": 523004928.0, "50": 523004928.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 3768873984.0, "5": 3912766464.0, "10": 3912766464.0, "15": 3912766464.0, "20": 3912766464.0, "25": 3912766464.0, "30": 3912766464.0, "35": 3912766464.0, "40": 3912766464.0, "45": 3912766464.0, "50": 3912766464.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 18.88705, "5": 0.16956, "10": 0.17448, "15": 0.16853, "20": 0.1715, "25": 0.17071, "30": 0.17343, "35": 0.17213, "40": 0.1719, "45": 0.17357, "50": 0.17228}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.81478,
+            "2": 10.82042,
+            "3": 10.81232,
+            "4": 10.78901,
+            "5": 10.85168,
+            "6": 10.87098,
+            "7": 10.83086,
+            "8": 10.83472,
+            "9": 10.83886,
+            "10": 10.78749,
+            "11": 10.87935,
+            "12": 10.86017,
+            "13": 10.86578,
+            "14": 10.8781,
+            "15": 10.79514,
+            "16": 10.79576,
+            "17": 10.76832,
+            "18": 10.81092,
+            "19": 10.79889,
+            "20": 10.69131,
+            "21": 10.68008,
+            "22": 10.52146,
+            "23": 10.70713,
+            "24": 10.57677,
+            "25": 10.52315,
+            "26": 10.59564,
+            "27": 10.5861,
+            "28": 10.56176,
+            "29": 10.56942,
+            "30": 10.34624,
+            "31": 10.10032,
+            "32": 10.45433,
+            "33": 10.44592,
+            "34": 10.20725,
+            "35": 10.26186,
+            "36": 10.21223,
+            "37": 10.32453,
+            "38": 10.16801,
+            "39": 10.38354,
+            "40": 10.07222,
+            "41": 10.13752,
+            "42": 10.19756,
+            "43": 9.81134,
+            "44": 9.93285,
+            "45": 9.81001,
+            "46": 9.80858,
+            "47": 10.12582,
+            "48": 9.82129,
+            "49": 9.50739,
+            "50": 9.88351
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1544.0,
+            "2": 1712.0,
+            "3": 1660.0,
+            "4": 1767.0,
+            "5": 1795.0,
+            "6": 1799.0,
+            "7": 1805.0,
+            "8": 1664.0,
+            "9": 1769.0,
+            "10": 1340.0,
+            "11": 1830.0,
+            "12": 1812.0,
+            "13": 1837.0,
+            "14": 1710.0,
+            "15": 1839.0,
+            "16": 1776.0,
+            "17": 1750.0,
+            "18": 1612.0,
+            "19": 1764.0,
+            "20": 1649.0,
+            "21": 1854.0,
+            "22": 1750.0,
+            "23": 1909.0,
+            "24": 1616.0,
+            "25": 1654.0,
+            "26": 1755.0,
+            "27": 1860.0,
+            "28": 2042.0,
+            "29": 1953.0,
+            "30": 1905.0,
+            "31": 1684.0,
+            "32": 1831.0,
+            "33": 2101.0,
+            "34": 1769.0,
+            "35": 1915.0,
+            "36": 1885.0,
+            "37": 2324.0,
+            "38": 2169.0,
+            "39": 2300.0,
+            "40": 2069.0,
+            "41": 2353.0,
+            "42": 2236.0,
+            "43": 1978.0,
+            "44": 2022.0,
+            "45": 2103.0,
+            "46": 2292.0,
+            "47": 2413.0,
+            "48": 2305.0,
+            "49": 2218.0,
+            "50": 2321.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 522452480.0,
+            "2": 522452480.0,
+            "3": 522452480.0,
+            "4": 522452480.0,
+            "5": 522452480.0,
+            "6": 522452480.0,
+            "7": 522452480.0,
+            "8": 522452480.0,
+            "9": 523501056.0,
+            "10": 522452480.0,
+            "11": 522452480.0,
+            "12": 523501056.0,
+            "13": 522452480.0,
+            "14": 522452480.0,
+            "15": 522452480.0,
+            "16": 522452480.0,
+            "17": 522452480.0,
+            "18": 522452480.0,
+            "19": 523501056.0,
+            "20": 523501056.0,
+            "21": 522452480.0,
+            "22": 522452480.0,
+            "23": 522452480.0,
+            "24": 523501056.0,
+            "25": 522452480.0,
+            "26": 522452480.0,
+            "27": 522452480.0,
+            "28": 522452480.0,
+            "29": 523501056.0,
+            "30": 522452480.0,
+            "31": 522452480.0,
+            "32": 522452480.0,
+            "33": 522452480.0,
+            "34": 522452480.0,
+            "35": 522452480.0,
+            "36": 522452480.0,
+            "37": 522452480.0,
+            "38": 522452480.0,
+            "39": 522452480.0,
+            "40": 522452480.0,
+            "41": 523371008.0,
+            "42": 522452480.0,
+            "43": 522452480.0,
+            "44": 522452480.0,
+            "45": 522452480.0,
+            "46": 523501056.0,
+            "47": 522452480.0,
+            "48": 522452480.0,
+            "49": 523501056.0,
+            "50": 522452480.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 3768846848.0,
+            "2": 3913263616.0,
+            "3": 3913263616.0,
+            "4": 3913263616.0,
+            "5": 3913263616.0,
+            "6": 3913263616.0,
+            "7": 3913263616.0,
+            "8": 3913263616.0,
+            "9": 3913263616.0,
+            "10": 3913263616.0,
+            "11": 3913263616.0,
+            "12": 3913263616.0,
+            "13": 3913263616.0,
+            "14": 3913263616.0,
+            "15": 3913263616.0,
+            "16": 3913263616.0,
+            "17": 3913263616.0,
+            "18": 3913263616.0,
+            "19": 3913263616.0,
+            "20": 3913263616.0,
+            "21": 3913263616.0,
+            "22": 3913263616.0,
+            "23": 3913263616.0,
+            "24": 3913263616.0,
+            "25": 3913263616.0,
+            "26": 3913263616.0,
+            "27": 3913263616.0,
+            "28": 3913263616.0,
+            "29": 3913263616.0,
+            "30": 3913263616.0,
+            "31": 3913263616.0,
+            "32": 3913263616.0,
+            "33": 3913263616.0,
+            "34": 3913263616.0,
+            "35": 3913263616.0,
+            "36": 3913263616.0,
+            "37": 3913263616.0,
+            "38": 3913263616.0,
+            "39": 3913263616.0,
+            "40": 3913263616.0,
+            "41": 3913263616.0,
+            "42": 3913263616.0,
+            "43": 3913263616.0,
+            "44": 3913263616.0,
+            "45": 3913263616.0,
+            "46": 3913263616.0,
+            "47": 3913263616.0,
+            "48": 3913263616.0,
+            "49": 3913263616.0,
+            "50": 3913263616.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.5204,
+            "2": 0.1877,
+            "3": 0.15851,
+            "4": 0.15284,
+            "5": 0.15092,
+            "6": 0.15084,
+            "7": 0.14786,
+            "8": 0.14787,
+            "9": 0.14698,
+            "10": 0.15104,
+            "11": 0.29695,
+            "12": 0.14795,
+            "13": 0.14771,
+            "14": 0.14662,
+            "15": 0.14409,
+            "16": 0.14378,
+            "17": 0.14431,
+            "18": 0.14329,
+            "19": 0.14334,
+            "20": 0.14441,
+            "21": 0.28011,
+            "22": 0.14378,
+            "23": 0.14643,
+            "24": 0.14572,
+            "25": 0.14331,
+            "26": 0.14307,
+            "27": 0.14541,
+            "28": 0.14512,
+            "29": 0.14536,
+            "30": 0.14358,
+            "31": 0.28944,
+            "32": 0.14533,
+            "33": 0.14477,
+            "34": 0.14423,
+            "35": 0.14395,
+            "36": 0.14486,
+            "37": 0.14319,
+            "38": 0.14455,
+            "39": 0.14454,
+            "40": 0.14537,
+            "41": 0.29312,
+            "42": 0.14458,
+            "43": 0.14749,
+            "44": 0.14448,
+            "45": 0.14501,
+            "46": 0.14588,
+            "47": 0.14249,
+            "48": 0.14564,
+            "49": 0.14388,
+            "50": 0.14222
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_lts.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_lts.json
new file mode 100644
index 00000000000..59dee721816
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_lts.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.81548,
+            "2": 10.8208,
+            "3": 10.81271,
+            "4": 10.78877,
+            "5": 10.85169,
+            "6": 10.8704,
+            "7": 10.831,
+            "8": 10.83427,
+            "9": 10.83995,
+            "10": 10.78684,
+            "11": 10.88021,
+            "12": 10.85971,
+            "13": 10.86589,
+            "14": 10.87818,
+            "15": 10.79463,
+            "16": 10.79607,
+            "17": 10.7688,
+            "18": 10.81045,
+            "19": 10.79836,
+            "20": 10.69045,
+            "21": 10.67932,
+            "22": 10.52101,
+            "23": 10.70743,
+            "24": 10.57665,
+            "25": 10.52275,
+            "26": 10.595,
+            "27": 10.5855,
+            "28": 10.56131,
+            "29": 10.56894,
+            "30": 10.34527,
+            "31": 10.10019,
+            "32": 10.45229,
+            "33": 10.44356,
+            "34": 10.20397,
+            "35": 10.25844,
+            "36": 10.2103,
+            "37": 10.32252,
+            "38": 10.1661,
+            "39": 10.38156,
+            "40": 10.07025,
+            "41": 10.13542,
+            "42": 10.19416,
+            "43": 9.80626,
+            "44": 9.92627,
+            "45": 9.8024,
+            "46": 9.79983,
+            "47": 10.11662,
+            "48": 9.81307,
+            "49": 9.50044,
+            "50": 9.87631,
+            "51": 9.82781,
+            "52": 9.71723,
+            "53": 10.03979,
+            "54": 9.92177,
+            "55": 9.85515,
+            "56": 9.59253,
+            "57": 9.44144,
+            "58": 9.79602,
+            "59": 9.55567,
+            "60": 9.4664,
+            "61": 9.6666,
+            "62": 9.95363,
+            "63": 9.33626,
+            "64": 9.74152,
+            "65": 8.9178,
+            "66": 9.66632,
+            "67": 9.34424,
+            "68": 9.75273,
+            "69": 9.75727,
+            "70": 9.69242,
+            "71": 9.5868,
+            "72": 9.55099,
+            "73": 9.46289,
+            "74": 8.90671,
+            "75": 9.37793,
+            "76": 9.04952,
+            "77": 10.0301,
+            "78": 9.69192,
+            "79": 9.33464,
+            "80": 9.3667,
+            "81": 9.44418,
+            "82": 9.66164,
+            "83": 9.27209,
+            "84": 9.38066,
+            "85": 9.57618,
+            "86": 9.0424,
+            "87": 9.55703,
+            "88": 9.70385,
+            "89": 9.56619,
+            "90": 9.77295,
+            "91": 9.29396,
+            "92": 9.31912,
+            "93": 9.03406,
+            "94": 8.78526,
+            "95": 9.46938,
+            "96": 9.47497,
+            "97": 9.25688,
+            "98": 9.61835,
+            "99": 8.83233,
+            "100": 9.34557
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1518.0,
+            "2": 1697.0,
+            "3": 1668.0,
+            "4": 1721.0,
+            "5": 1897.0,
+            "6": 1823.0,
+            "7": 1719.0,
+            "8": 1637.0,
+            "9": 1742.0,
+            "10": 1358.0,
+            "11": 1882.0,
+            "12": 1781.0,
+            "13": 1847.0,
+            "14": 1753.0,
+            "15": 1883.0,
+            "16": 1755.0,
+            "17": 1752.0,
+            "18": 1683.0,
+            "19": 1817.0,
+            "20": 1663.0,
+            "21": 1795.0,
+            "22": 1698.0,
+            "23": 1996.0,
+            "24": 1620.0,
+            "25": 1658.0,
+            "26": 1727.0,
+            "27": 1781.0,
+            "28": 2085.0,
+            "29": 1952.0,
+            "30": 1821.0,
+            "31": 1646.0,
+            "32": 1879.0,
+            "33": 2034.0,
+            "34": 1861.0,
+            "35": 1834.0,
+            "36": 1913.0,
+            "37": 2333.0,
+            "38": 2070.0,
+            "39": 2245.0,
+            "40": 2126.0,
+            "41": 2311.0,
+            "42": 2213.0,
+            "43": 1907.0,
+            "44": 1951.0,
+            "45": 2001.0,
+            "46": 2218.0,
+            "47": 2533.0,
+            "48": 2436.0,
+            "49": 2188.0,
+            "50": 2342.0,
+            "51": 2562.0,
+            "52": 2529.0,
+            "53": 3031.0,
+            "54": 2744.0,
+            "55": 2264.0,
+            "56": 2794.0,
+            "57": 2183.0,
+            "58": 2882.0,
+            "59": 2769.0,
+            "60": 2399.0,
+            "61": 3031.0,
+            "62": 2706.0,
+            "63": 2388.0,
+            "64": 3046.0,
+            "65": 2597.0,
+            "66": 3092.0,
+            "67": 2730.0,
+            "68": 2858.0,
+            "69": 2982.0,
+            "70": 3202.0,
+            "71": 2964.0,
+            "72": 2450.0,
+            "73": 2817.0,
+            "74": 1834.0,
+            "75": 2609.0,
+            "76": 3000.0,
+            "77": 3180.0,
+            "78": 3113.0,
+            "79": 3145.0,
+            "80": 3258.0,
+            "81": 3645.0,
+            "82": 3075.0,
+            "83": 2812.0,
+            "84": 3295.0,
+            "85": 3368.0,
+            "86": 2730.0,
+            "87": 3717.0,
+            "88": 3056.0,
+            "89": 3252.0,
+            "90": 2954.0,
+            "91": 2798.0,
+            "92": 3089.0,
+            "93": 2742.0,
+            "94": 3420.0,
+            "95": 3225.0,
+            "96": 3362.0,
+            "97": 3118.0,
+            "98": 3671.0,
+            "99": 3341.0,
+            "100": 3428.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 730321408.0,
+            "2": 730321408.0,
+            "3": 730321408.0,
+            "4": 730321408.0,
+            "5": 730321408.0,
+            "6": 730321408.0,
+            "7": 730321408.0,
+            "8": 730321408.0,
+            "9": 730321408.0,
+            "10": 730321408.0,
+            "11": 730321408.0,
+            "12": 730321408.0,
+            "13": 730321408.0,
+            "14": 730321408.0,
+            "15": 730321408.0,
+            "16": 730321408.0,
+            "17": 730321408.0,
+            "18": 730321408.0,
+            "19": 730321408.0,
+            "20": 730321408.0,
+            "21": 730321408.0,
+            "22": 730321408.0,
+            "23": 730321408.0,
+            "24": 730321408.0,
+            "25": 730321408.0,
+            "26": 730321408.0,
+            "27": 730321408.0,
+            "28": 730321408.0,
+            "29": 730321408.0,
+            "30": 730321408.0,
+            "31": 730321408.0,
+            "32": 730321408.0,
+            "33": 730321408.0,
+            "34": 730321408.0,
+            "35": 730321408.0,
+            "36": 730321408.0,
+            "37": 730321408.0,
+            "38": 730321408.0,
+            "39": 730321408.0,
+            "40": 730321408.0,
+            "41": 730321408.0,
+            "42": 730321408.0,
+            "43": 730321408.0,
+            "44": 730321408.0,
+            "45": 730321408.0,
+            "46": 730321408.0,
+            "47": 730321408.0,
+            "48": 730321408.0,
+            "49": 730321408.0,
+            "50": 730321408.0,
+            "51": 730321408.0,
+            "52": 730321408.0,
+            "53": 730321408.0,
+            "54": 730321408.0,
+            "55": 730321408.0,
+            "56": 730321408.0,
+            "57": 730321408.0,
+            "58": 730321408.0,
+            "59": 730321408.0,
+            "60": 730321408.0,
+            "61": 730321408.0,
+            "62": 730321408.0,
+            "63": 730321408.0,
+            "64": 730321408.0,
+            "65": 730321408.0,
+            "66": 730321408.0,
+            "67": 730321408.0,
+            "68": 730321408.0,
+            "69": 730321408.0,
+            "70": 730321408.0,
+            "71": 730321408.0,
+            "72": 730321408.0,
+            "73": 730321408.0,
+            "74": 730321408.0,
+            "75": 730321408.0,
+            "76": 730321408.0,
+            "77": 730321408.0,
+            "78": 730321408.0,
+            "79": 730321408.0,
+            "80": 730321408.0,
+            "81": 730321408.0,
+            "82": 730321408.0,
+            "83": 730321408.0,
+            "84": 730321408.0,
+            "85": 730321408.0,
+            "86": 730321408.0,
+            "87": 730321408.0,
+            "88": 730321408.0,
+            "89": 730321408.0,
+            "90": 730321408.0,
+            "91": 730321408.0,
+            "92": 730321408.0,
+            "93": 730321408.0,
+            "94": 730321408.0,
+            "95": 730321408.0,
+            "96": 730321408.0,
+            "97": 730321408.0,
+            "98": 730321408.0,
+            "99": 730321408.0,
+            "100": 730321408.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2366437376.0,
+            "2": 2648148992.0,
+            "3": 2648148992.0,
+            "4": 2648148992.0,
+            "5": 2648148992.0,
+            "6": 2648148992.0,
+            "7": 2648148992.0,
+            "8": 2648148992.0,
+            "9": 2648148992.0,
+            "10": 2648148992.0,
+            "11": 2648148992.0,
+            "12": 2648148992.0,
+            "13": 2648148992.0,
+            "14": 2648148992.0,
+            "15": 2648148992.0,
+            "16": 2648148992.0,
+            "17": 2648148992.0,
+            "18": 2648148992.0,
+            "19": 2648148992.0,
+            "20": 2648148992.0,
+            "21": 2648148992.0,
+            "22": 2648148992.0,
+            "23": 2648148992.0,
+            "24": 2648148992.0,
+            "25": 2648148992.0,
+            "26": 2648148992.0,
+            "27": 2648148992.0,
+            "28": 2648148992.0,
+            "29": 2648148992.0,
+            "30": 2648148992.0,
+            "31": 2648148992.0,
+            "32": 2648148992.0,
+            "33": 2648148992.0,
+            "34": 2648148992.0,
+            "35": 2648148992.0,
+            "36": 2648148992.0,
+            "37": 2648148992.0,
+            "38": 2648148992.0,
+            "39": 2648148992.0,
+            "40": 2648148992.0,
+            "41": 2648148992.0,
+            "42": 2648148992.0,
+            "43": 2648148992.0,
+            "44": 2648148992.0,
+            "45": 2648148992.0,
+            "46": 2648148992.0,
+            "47": 2648148992.0,
+            "48": 2648148992.0,
+            "49": 2648148992.0,
+            "50": 2648148992.0,
+            "51": 2648148992.0,
+            "52": 2648148992.0,
+            "53": 2648148992.0,
+            "54": 2648148992.0,
+            "55": 2648148992.0,
+            "56": 2648148992.0,
+            "57": 2648148992.0,
+            "58": 2648148992.0,
+            "59": 2648148992.0,
+            "60": 2648148992.0,
+            "61": 2648148992.0,
+            "62": 2648148992.0,
+            "63": 2648148992.0,
+            "64": 2648148992.0,
+            "65": 2648148992.0,
+            "66": 2648148992.0,
+            "67": 2648148992.0,
+            "68": 2648148992.0,
+            "69": 2648148992.0,
+            "70": 2648148992.0,
+            "71": 2648148992.0,
+            "72": 2648148992.0,
+            "73": 2648148992.0,
+            "74": 2648148992.0,
+            "75": 2648148992.0,
+            "76": 2648148992.0,
+            "77": 2648148992.0,
+            "78": 2648148992.0,
+            "79": 2648148992.0,
+            "80": 2648148992.0,
+            "81": 2648148992.0,
+            "82": 2648148992.0,
+            "83": 2648148992.0,
+            "84": 2648148992.0,
+            "85": 2648148992.0,
+            "86": 2648148992.0,
+            "87": 2648148992.0,
+            "88": 2648148992.0,
+            "89": 2648148992.0,
+            "90": 2648148992.0,
+            "91": 2648148992.0,
+            "92": 2648148992.0,
+            "93": 2648148992.0,
+            "94": 2648148992.0,
+            "95": 2648148992.0,
+            "96": 2648148992.0,
+            "97": 2648148992.0,
+            "98": 2648148992.0,
+            "99": 2648148992.0,
+            "100": 2648148992.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 11.77273,
+            "2": 0.15704,
+            "3": 0.19484,
+            "4": 0.13176,
+            "5": 0.13019,
+            "6": 0.12976,
+            "7": 0.1302,
+            "8": 0.12925,
+            "9": 0.12988,
+            "10": 0.13099,
+            "11": 0.13015,
+            "12": 0.1297,
+            "13": 0.12988,
+            "14": 0.13024,
+            "15": 0.12985,
+            "16": 0.12971,
+            "17": 0.12961,
+            "18": 0.1302,
+            "19": 0.12963,
+            "20": 0.12994,
+            "21": 0.1299,
+            "22": 0.13037,
+            "23": 0.13043,
+            "24": 0.12989,
+            "25": 0.13018,
+            "26": 0.13019,
+            "27": 0.12985,
+            "28": 0.13014,
+            "29": 0.13068,
+            "30": 0.13099,
+            "31": 0.13197,
+            "32": 0.13151,
+            "33": 0.13168,
+            "34": 0.1303,
+            "35": 0.13073,
+            "36": 0.13088,
+            "37": 0.1307,
+            "38": 0.13091,
+            "39": 0.13292,
+            "40": 0.13172,
+            "41": 0.134,
+            "42": 0.13157,
+            "43": 0.13272,
+            "44": 0.13144,
+            "45": 0.13142,
+            "46": 0.133,
+            "47": 0.13069,
+            "48": 0.13192,
+            "49": 0.13124,
+            "50": 0.13106,
+            "51": 0.13227,
+            "52": 0.13218,
+            "53": 0.13063,
+            "54": 0.13182,
+            "55": 0.13138,
+            "56": 0.13226,
+            "57": 0.13156,
+            "58": 0.13127,
+            "59": 0.13198,
+            "60": 0.13133,
+            "61": 0.13107,
+            "62": 0.13121,
+            "63": 0.13141,
+            "64": 0.13149,
+            "65": 0.13192,
+            "66": 0.13188,
+            "67": 0.13167,
+            "68": 0.1319,
+            "69": 0.1318,
+            "70": 0.13204,
+            "71": 0.13077,
+            "72": 0.13205,
+            "73": 0.13251,
+            "74": 0.13212,
+            "75": 0.13195,
+            "76": 0.13016,
+            "77": 0.12994,
+            "78": 0.13184,
+            "79": 0.13131,
+            "80": 0.13153,
+            "81": 0.13219,
+            "82": 0.13088,
+            "83": 0.13074,
+            "84": 0.13035,
+            "85": 0.12939,
+            "86": 0.12953,
+            "87": 0.1294,
+            "88": 0.12951,
+            "89": 0.13007,
+            "90": 0.12893,
+            "91": 0.13881,
+            "92": 0.1299,
+            "93": 0.12993,
+            "94": 0.13022,
+            "95": 0.1304,
+            "96": 0.12965,
+            "97": 0.13013,
+            "98": 0.1306,
+            "99": 0.12958,
+            "100": 0.13011
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_lts_dgx_a100.json
index 855eefe74c4..6081b627567 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_lts_dgx_a100.json
@@ -1 +1,537 @@
-{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.82005, "5": 10.85285, "10": 10.78449, "15": 10.79226, "20": 10.69196, "25": 10.52318, "30": 10.34505, "35": 10.25888, "40": 10.07032, "45": 9.803, "50": 9.87675, "55": 9.85523, "60": 9.46634, "65": 8.91657, "70": 9.69276, "75": 9.37813, "80": 9.368, "85": 9.57598, "90": 9.77247, "95": 9.46912, "100": 9.34574}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1559.0, "5": 1915.0, "10": 1361.0, "15": 1831.0, "20": 1695.0, "25": 1650.0, "30": 1861.0, "35": 1916.0, "40": 2109.0, "45": 2098.0, "50": 2424.0, "55": 2309.0, "60": 2432.0, "65": 2660.0, "70": 3132.0, "75": 2555.0, "80": 3370.0, "85": 3434.0, "90": 2868.0, "95": 3273.0, "100": 3359.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 732811264.0, "5": 732811264.0, "10": 732811264.0, "15": 732811264.0, "20": 732811264.0, "25": 732811264.0, "30": 732811264.0, "35": 732811264.0, "40": 732811264.0, "45": 732811264.0, "50": 732811264.0, "55": 732811264.0, "60": 732811264.0, "65": 732811264.0, "70": 732811264.0, "75": 732811264.0, "80": 732811264.0, "85": 732811264.0, "90": 732811264.0, "95": 732811264.0, "100": 732811264.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 3838895104.0, "5": 4122703872.0, "10": 4122703872.0, "15": 4122703872.0, "20": 4122703872.0, "25": 4122703872.0, "30": 4122703872.0, "35": 4122703872.0, "40": 4122703872.0, "45": 4122703872.0, "50": 4122703872.0, "55": 4122703872.0, "60": 4122703872.0, "65": 4122703872.0, "70": 4122703872.0, "75": 4122703872.0, "80": 4122703872.0, "85": 4122703872.0, "90": 4122703872.0, "95": 4122703872.0, "100": 4122703872.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 21.12849, "5": 0.1796, "10": 0.17779, "15": 0.17478, "20": 0.17462, "25": 0.17506, "30": 0.17537, "35": 0.17332, "40": 0.17629, "45": 0.17732, "50": 0.17437, "55": 0.1738, "60": 0.17515, "65": 0.17451, "70": 0.17318, "75": 0.1729, "80": 0.17444, "85": 0.17406, "90": 0.17228, "95": 0.17346, "100": 0.17333}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.81548,
+            "2": 10.8208,
+            "3": 10.81271,
+            "4": 10.78877,
+            "5": 10.85169,
+            "6": 10.8704,
+            "7": 10.831,
+            "8": 10.83427,
+            "9": 10.83995,
+            "10": 10.78684,
+            "11": 10.88021,
+            "12": 10.85971,
+            "13": 10.86589,
+            "14": 10.87818,
+            "15": 10.79463,
+            "16": 10.79607,
+            "17": 10.7688,
+            "18": 10.81045,
+            "19": 10.79836,
+            "20": 10.69045,
+            "21": 10.67932,
+            "22": 10.52101,
+            "23": 10.70743,
+            "24": 10.57665,
+            "25": 10.52275,
+            "26": 10.595,
+            "27": 10.5855,
+            "28": 10.56131,
+            "29": 10.56894,
+            "30": 10.34527,
+            "31": 10.10019,
+            "32": 10.45229,
+            "33": 10.44356,
+            "34": 10.20397,
+            "35": 10.25844,
+            "36": 10.2103,
+            "37": 10.32252,
+            "38": 10.1661,
+            "39": 10.38156,
+            "40": 10.07025,
+            "41": 10.13542,
+            "42": 10.19416,
+            "43": 9.80626,
+            "44": 9.92627,
+            "45": 9.8024,
+            "46": 9.79983,
+            "47": 10.11662,
+            "48": 9.81307,
+            "49": 9.50044,
+            "50": 9.87631,
+            "51": 9.82781,
+            "52": 9.71723,
+            "53": 10.03979,
+            "54": 9.92177,
+            "55": 9.85515,
+            "56": 9.59253,
+            "57": 9.44144,
+            "58": 9.79602,
+            "59": 9.55567,
+            "60": 9.4664,
+            "61": 9.6666,
+            "62": 9.95363,
+            "63": 9.33626,
+            "64": 9.74152,
+            "65": 8.9178,
+            "66": 9.66632,
+            "67": 9.34424,
+            "68": 9.75273,
+            "69": 9.75727,
+            "70": 9.69242,
+            "71": 9.5868,
+            "72": 9.55099,
+            "73": 9.46289,
+            "74": 8.90671,
+            "75": 9.37793,
+            "76": 9.04952,
+            "77": 10.0301,
+            "78": 9.69192,
+            "79": 9.33464,
+            "80": 9.3667,
+            "81": 9.44418,
+            "82": 9.66164,
+            "83": 9.27209,
+            "84": 9.38066,
+            "85": 9.57618,
+            "86": 9.0424,
+            "87": 9.55703,
+            "88": 9.70385,
+            "89": 9.56619,
+            "90": 9.77295,
+            "91": 9.29396,
+            "92": 9.31912,
+            "93": 9.03406,
+            "94": 8.78526,
+            "95": 9.46938,
+            "96": 9.47497,
+            "97": 9.25688,
+            "98": 9.61835,
+            "99": 8.83233,
+            "100": 9.34557
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1518.0,
+            "2": 1697.0,
+            "3": 1668.0,
+            "4": 1721.0,
+            "5": 1897.0,
+            "6": 1823.0,
+            "7": 1719.0,
+            "8": 1637.0,
+            "9": 1742.0,
+            "10": 1358.0,
+            "11": 1882.0,
+            "12": 1781.0,
+            "13": 1847.0,
+            "14": 1753.0,
+            "15": 1883.0,
+            "16": 1755.0,
+            "17": 1752.0,
+            "18": 1683.0,
+            "19": 1817.0,
+            "20": 1663.0,
+            "21": 1795.0,
+            "22": 1698.0,
+            "23": 1996.0,
+            "24": 1620.0,
+            "25": 1658.0,
+            "26": 1727.0,
+            "27": 1781.0,
+            "28": 2085.0,
+            "29": 1952.0,
+            "30": 1821.0,
+            "31": 1646.0,
+            "32": 1879.0,
+            "33": 2034.0,
+            "34": 1861.0,
+            "35": 1834.0,
+            "36": 1913.0,
+            "37": 2333.0,
+            "38": 2070.0,
+            "39": 2245.0,
+            "40": 2126.0,
+            "41": 2311.0,
+            "42": 2213.0,
+            "43": 1907.0,
+            "44": 1951.0,
+            "45": 2001.0,
+            "46": 2218.0,
+            "47": 2533.0,
+            "48": 2436.0,
+            "49": 2188.0,
+            "50": 2342.0,
+            "51": 2562.0,
+            "52": 2529.0,
+            "53": 3031.0,
+            "54": 2744.0,
+            "55": 2264.0,
+            "56": 2794.0,
+            "57": 2183.0,
+            "58": 2882.0,
+            "59": 2769.0,
+            "60": 2399.0,
+            "61": 3031.0,
+            "62": 2706.0,
+            "63": 2388.0,
+            "64": 3046.0,
+            "65": 2597.0,
+            "66": 3092.0,
+            "67": 2730.0,
+            "68": 2858.0,
+            "69": 2982.0,
+            "70": 3202.0,
+            "71": 2964.0,
+            "72": 2450.0,
+            "73": 2817.0,
+            "74": 1834.0,
+            "75": 2609.0,
+            "76": 3000.0,
+            "77": 3180.0,
+            "78": 3113.0,
+            "79": 3145.0,
+            "80": 3258.0,
+            "81": 3645.0,
+            "82": 3075.0,
+            "83": 2812.0,
+            "84": 3295.0,
+            "85": 3368.0,
+            "86": 2730.0,
+            "87": 3717.0,
+            "88": 3056.0,
+            "89": 3252.0,
+            "90": 2954.0,
+            "91": 2798.0,
+            "92": 3089.0,
+            "93": 2742.0,
+            "94": 3420.0,
+            "95": 3225.0,
+            "96": 3362.0,
+            "97": 3118.0,
+            "98": 3671.0,
+            "99": 3341.0,
+            "100": 3428.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 730321408.0,
+            "2": 730321408.0,
+            "3": 730321408.0,
+            "4": 730321408.0,
+            "5": 730321408.0,
+            "6": 730321408.0,
+            "7": 730321408.0,
+            "8": 730321408.0,
+            "9": 730321408.0,
+            "10": 730321408.0,
+            "11": 730321408.0,
+            "12": 730321408.0,
+            "13": 730321408.0,
+            "14": 730321408.0,
+            "15": 730321408.0,
+            "16": 730321408.0,
+            "17": 730321408.0,
+            "18": 730321408.0,
+            "19": 730321408.0,
+            "20": 730321408.0,
+            "21": 730321408.0,
+            "22": 730321408.0,
+            "23": 730321408.0,
+            "24": 730321408.0,
+            "25": 730321408.0,
+            "26": 730321408.0,
+            "27": 730321408.0,
+            "28": 730321408.0,
+            "29": 730321408.0,
+            "30": 730321408.0,
+            "31": 730321408.0,
+            "32": 730321408.0,
+            "33": 730321408.0,
+            "34": 730321408.0,
+            "35": 730321408.0,
+            "36": 730321408.0,
+            "37": 730321408.0,
+            "38": 730321408.0,
+            "39": 730321408.0,
+            "40": 730321408.0,
+            "41": 730321408.0,
+            "42": 730321408.0,
+            "43": 730321408.0,
+            "44": 730321408.0,
+            "45": 730321408.0,
+            "46": 730321408.0,
+            "47": 730321408.0,
+            "48": 730321408.0,
+            "49": 730321408.0,
+            "50": 730321408.0,
+            "51": 730321408.0,
+            "52": 730321408.0,
+            "53": 730321408.0,
+            "54": 730321408.0,
+            "55": 730321408.0,
+            "56": 730321408.0,
+            "57": 730321408.0,
+            "58": 730321408.0,
+            "59": 730321408.0,
+            "60": 730321408.0,
+            "61": 730321408.0,
+            "62": 730321408.0,
+            "63": 730321408.0,
+            "64": 730321408.0,
+            "65": 730321408.0,
+            "66": 730321408.0,
+            "67": 730321408.0,
+            "68": 730321408.0,
+            "69": 730321408.0,
+            "70": 730321408.0,
+            "71": 730321408.0,
+            "72": 730321408.0,
+            "73": 730321408.0,
+            "74": 730321408.0,
+            "75": 730321408.0,
+            "76": 730321408.0,
+            "77": 730321408.0,
+            "78": 730321408.0,
+            "79": 730321408.0,
+            "80": 730321408.0,
+            "81": 730321408.0,
+            "82": 730321408.0,
+            "83": 730321408.0,
+            "84": 730321408.0,
+            "85": 730321408.0,
+            "86": 730321408.0,
+            "87": 730321408.0,
+            "88": 730321408.0,
+            "89": 730321408.0,
+            "90": 730321408.0,
+            "91": 730321408.0,
+            "92": 730321408.0,
+            "93": 730321408.0,
+            "94": 730321408.0,
+            "95": 730321408.0,
+            "96": 730321408.0,
+            "97": 730321408.0,
+            "98": 730321408.0,
+            "99": 730321408.0,
+            "100": 730321408.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2366437376.0,
+            "2": 2648148992.0,
+            "3": 2648148992.0,
+            "4": 2648148992.0,
+            "5": 2648148992.0,
+            "6": 2648148992.0,
+            "7": 2648148992.0,
+            "8": 2648148992.0,
+            "9": 2648148992.0,
+            "10": 2648148992.0,
+            "11": 2648148992.0,
+            "12": 2648148992.0,
+            "13": 2648148992.0,
+            "14": 2648148992.0,
+            "15": 2648148992.0,
+            "16": 2648148992.0,
+            "17": 2648148992.0,
+            "18": 2648148992.0,
+            "19": 2648148992.0,
+            "20": 2648148992.0,
+            "21": 2648148992.0,
+            "22": 2648148992.0,
+            "23": 2648148992.0,
+            "24": 2648148992.0,
+            "25": 2648148992.0,
+            "26": 2648148992.0,
+            "27": 2648148992.0,
+            "28": 2648148992.0,
+            "29": 2648148992.0,
+            "30": 2648148992.0,
+            "31": 2648148992.0,
+            "32": 2648148992.0,
+            "33": 2648148992.0,
+            "34": 2648148992.0,
+            "35": 2648148992.0,
+            "36": 2648148992.0,
+            "37": 2648148992.0,
+            "38": 2648148992.0,
+            "39": 2648148992.0,
+            "40": 2648148992.0,
+            "41": 2648148992.0,
+            "42": 2648148992.0,
+            "43": 2648148992.0,
+            "44": 2648148992.0,
+            "45": 2648148992.0,
+            "46": 2648148992.0,
+            "47": 2648148992.0,
+            "48": 2648148992.0,
+            "49": 2648148992.0,
+            "50": 2648148992.0,
+            "51": 2648148992.0,
+            "52": 2648148992.0,
+            "53": 2648148992.0,
+            "54": 2648148992.0,
+            "55": 2648148992.0,
+            "56": 2648148992.0,
+            "57": 2648148992.0,
+            "58": 2648148992.0,
+            "59": 2648148992.0,
+            "60": 2648148992.0,
+            "61": 2648148992.0,
+            "62": 2648148992.0,
+            "63": 2648148992.0,
+            "64": 2648148992.0,
+            "65": 2648148992.0,
+            "66": 2648148992.0,
+            "67": 2648148992.0,
+            "68": 2648148992.0,
+            "69": 2648148992.0,
+            "70": 2648148992.0,
+            "71": 2648148992.0,
+            "72": 2648148992.0,
+            "73": 2648148992.0,
+            "74": 2648148992.0,
+            "75": 2648148992.0,
+            "76": 2648148992.0,
+            "77": 2648148992.0,
+            "78": 2648148992.0,
+            "79": 2648148992.0,
+            "80": 2648148992.0,
+            "81": 2648148992.0,
+            "82": 2648148992.0,
+            "83": 2648148992.0,
+            "84": 2648148992.0,
+            "85": 2648148992.0,
+            "86": 2648148992.0,
+            "87": 2648148992.0,
+            "88": 2648148992.0,
+            "89": 2648148992.0,
+            "90": 2648148992.0,
+            "91": 2648148992.0,
+            "92": 2648148992.0,
+            "93": 2648148992.0,
+            "94": 2648148992.0,
+            "95": 2648148992.0,
+            "96": 2648148992.0,
+            "97": 2648148992.0,
+            "98": 2648148992.0,
+            "99": 2648148992.0,
+            "100": 2648148992.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 12.81482,
+            "2": 0.16445,
+            "3": 0.16681,
+            "4": 0.12923,
+            "5": 0.12855,
+            "6": 0.12774,
+            "7": 0.12794,
+            "8": 0.12857,
+            "9": 0.12785,
+            "10": 0.12889,
+            "11": 0.13344,
+            "12": 0.1302,
+            "13": 0.13007,
+            "14": 0.12962,
+            "15": 0.13044,
+            "16": 0.12918,
+            "17": 0.13075,
+            "18": 0.13004,
+            "19": 0.13052,
+            "20": 0.13025,
+            "21": 0.12825,
+            "22": 0.13322,
+            "23": 0.13274,
+            "24": 0.13114,
+            "25": 0.13075,
+            "26": 0.12979,
+            "27": 0.13026,
+            "28": 0.13147,
+            "29": 0.13072,
+            "30": 0.13098,
+            "31": 0.13095,
+            "32": 0.13054,
+            "33": 0.13038,
+            "34": 0.13142,
+            "35": 0.13065,
+            "36": 0.12923,
+            "37": 0.13039,
+            "38": 0.12981,
+            "39": 0.12995,
+            "40": 0.13035,
+            "41": 0.12966,
+            "42": 0.13013,
+            "43": 0.13031,
+            "44": 0.13066,
+            "45": 0.12952,
+            "46": 0.13059,
+            "47": 0.12932,
+            "48": 0.13133,
+            "49": 0.13099,
+            "50": 0.13032,
+            "51": 0.13345,
+            "52": 0.13027,
+            "53": 0.13035,
+            "54": 0.13064,
+            "55": 0.13026,
+            "56": 0.13053,
+            "57": 0.13106,
+            "58": 0.13032,
+            "59": 0.13178,
+            "60": 0.13233,
+            "61": 0.13005,
+            "62": 0.13045,
+            "63": 0.13061,
+            "64": 0.13066,
+            "65": 0.13102,
+            "66": 0.13143,
+            "67": 0.13033,
+            "68": 0.13066,
+            "69": 0.12904,
+            "70": 0.13059,
+            "71": 0.13052,
+            "72": 0.13076,
+            "73": 0.13215,
+            "74": 0.13173,
+            "75": 0.13126,
+            "76": 0.12946,
+            "77": 0.13071,
+            "78": 0.12973,
+            "79": 0.12962,
+            "80": 0.12976,
+            "81": 0.12993,
+            "82": 0.12829,
+            "83": 0.13132,
+            "84": 0.1304,
+            "85": 0.13095,
+            "86": 0.13112,
+            "87": 0.12994,
+            "88": 0.13287,
+            "89": 0.1284,
+            "90": 0.1303,
+            "91": 0.12966,
+            "92": 0.13139,
+            "93": 0.12932,
+            "94": 0.12687,
+            "95": 0.13012,
+            "96": 0.12919,
+            "97": 0.13166,
+            "98": 0.12958,
+            "99": 0.13126,
+            "100": 0.1303
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_lts.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_lts.json
new file mode 100644
index 00000000000..f108f63200b
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_lts.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.81548,
+            "2": 10.8208,
+            "3": 10.81272,
+            "4": 10.78885,
+            "5": 10.85169,
+            "6": 10.87038,
+            "7": 10.83098,
+            "8": 10.8343,
+            "9": 10.83996,
+            "10": 10.78686,
+            "11": 10.88025,
+            "12": 10.85974,
+            "13": 10.86596,
+            "14": 10.8782,
+            "15": 10.79466,
+            "16": 10.79613,
+            "17": 10.76887,
+            "18": 10.81076,
+            "19": 10.7986,
+            "20": 10.69061,
+            "21": 10.67945,
+            "22": 10.52114,
+            "23": 10.70755,
+            "24": 10.57741,
+            "25": 10.52373,
+            "26": 10.59597,
+            "27": 10.58619,
+            "28": 10.56176,
+            "29": 10.56945,
+            "30": 10.34573,
+            "31": 10.10078,
+            "32": 10.45392,
+            "33": 10.4461,
+            "34": 10.20721,
+            "35": 10.262,
+            "36": 10.21312,
+            "37": 10.32468,
+            "38": 10.16831,
+            "39": 10.38374,
+            "40": 10.07231,
+            "41": 10.13763,
+            "42": 10.19765,
+            "43": 9.81155,
+            "44": 9.93311,
+            "45": 9.8104,
+            "46": 9.80854,
+            "47": 10.12558,
+            "48": 9.82105,
+            "49": 9.50764,
+            "50": 9.88382,
+            "51": 9.83549,
+            "52": 9.72516,
+            "53": 10.04799,
+            "54": 9.93011,
+            "55": 9.8636,
+            "56": 9.60217,
+            "57": 9.45187,
+            "58": 9.8078,
+            "59": 9.56783,
+            "60": 9.47966,
+            "61": 9.67984,
+            "62": 9.96754,
+            "63": 9.35113,
+            "64": 9.75623,
+            "65": 8.9318,
+            "66": 9.68107,
+            "67": 9.35956,
+            "68": 9.76948,
+            "69": 9.77492,
+            "70": 9.71182,
+            "71": 9.60632,
+            "72": 9.57129,
+            "73": 9.48392,
+            "74": 8.92911,
+            "75": 9.40028,
+            "76": 9.07194,
+            "77": 10.05252,
+            "78": 9.71494,
+            "79": 9.35747,
+            "80": 9.38946,
+            "81": 9.46791,
+            "82": 9.68508,
+            "83": 9.29588,
+            "84": 9.40522,
+            "85": 9.60163,
+            "86": 9.06713,
+            "87": 9.58402,
+            "88": 9.73304,
+            "89": 9.59526,
+            "90": 9.80555,
+            "91": 9.32604,
+            "92": 9.35323,
+            "93": 9.06915,
+            "94": 8.82268,
+            "95": 9.50858,
+            "96": 9.51584,
+            "97": 9.2976,
+            "98": 9.66184,
+            "99": 8.87662,
+            "100": 9.39222
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1518.0,
+            "2": 1697.0,
+            "3": 1678.0,
+            "4": 1709.0,
+            "5": 1873.0,
+            "6": 1786.0,
+            "7": 1813.0,
+            "8": 1519.0,
+            "9": 1684.0,
+            "10": 1417.0,
+            "11": 1943.0,
+            "12": 1723.0,
+            "13": 1939.0,
+            "14": 1759.0,
+            "15": 1728.0,
+            "16": 1773.0,
+            "17": 1805.0,
+            "18": 1702.0,
+            "19": 1803.0,
+            "20": 1627.0,
+            "21": 1822.0,
+            "22": 1748.0,
+            "23": 1938.0,
+            "24": 1642.0,
+            "25": 1696.0,
+            "26": 1760.0,
+            "27": 1809.0,
+            "28": 2025.0,
+            "29": 1900.0,
+            "30": 1902.0,
+            "31": 1645.0,
+            "32": 1876.0,
+            "33": 2105.0,
+            "34": 1881.0,
+            "35": 1913.0,
+            "36": 1864.0,
+            "37": 2322.0,
+            "38": 2194.0,
+            "39": 2318.0,
+            "40": 2010.0,
+            "41": 2358.0,
+            "42": 2155.0,
+            "43": 1980.0,
+            "44": 2105.0,
+            "45": 2082.0,
+            "46": 2221.0,
+            "47": 2537.0,
+            "48": 2367.0,
+            "49": 2190.0,
+            "50": 2352.0,
+            "51": 2441.0,
+            "52": 2482.0,
+            "53": 2916.0,
+            "54": 2550.0,
+            "55": 2347.0,
+            "56": 2765.0,
+            "57": 2116.0,
+            "58": 2968.0,
+            "59": 2810.0,
+            "60": 2384.0,
+            "61": 2912.0,
+            "62": 2554.0,
+            "63": 2364.0,
+            "64": 3035.0,
+            "65": 2648.0,
+            "66": 2979.0,
+            "67": 2741.0,
+            "68": 2799.0,
+            "69": 3071.0,
+            "70": 3098.0,
+            "71": 2950.0,
+            "72": 2342.0,
+            "73": 2829.0,
+            "74": 1840.0,
+            "75": 2426.0,
+            "76": 2941.0,
+            "77": 3245.0,
+            "78": 3272.0,
+            "79": 3066.0,
+            "80": 3221.0,
+            "81": 3565.0,
+            "82": 3162.0,
+            "83": 2876.0,
+            "84": 3180.0,
+            "85": 3410.0,
+            "86": 2778.0,
+            "87": 3752.0,
+            "88": 2995.0,
+            "89": 3264.0,
+            "90": 2940.0,
+            "91": 2791.0,
+            "92": 3118.0,
+            "93": 2634.0,
+            "94": 3464.0,
+            "95": 3344.0,
+            "96": 3499.0,
+            "97": 3122.0,
+            "98": 3568.0,
+            "99": 3272.0,
+            "100": 3476.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 733860352.0,
+            "2": 733860352.0,
+            "3": 733860352.0,
+            "4": 733860352.0,
+            "5": 733860352.0,
+            "6": 733860352.0,
+            "7": 733860352.0,
+            "8": 733860352.0,
+            "9": 733860352.0,
+            "10": 733860352.0,
+            "11": 733860352.0,
+            "12": 733860352.0,
+            "13": 733860352.0,
+            "14": 733860352.0,
+            "15": 733860352.0,
+            "16": 733860352.0,
+            "17": 733860352.0,
+            "18": 733860352.0,
+            "19": 733860352.0,
+            "20": 733860352.0,
+            "21": 733860352.0,
+            "22": 733860352.0,
+            "23": 733860352.0,
+            "24": 733860352.0,
+            "25": 733860352.0,
+            "26": 733860352.0,
+            "27": 733860352.0,
+            "28": 733860352.0,
+            "29": 733860352.0,
+            "30": 733860352.0,
+            "31": 733860352.0,
+            "32": 733860352.0,
+            "33": 733860352.0,
+            "34": 733860352.0,
+            "35": 733860352.0,
+            "36": 733860352.0,
+            "37": 733860352.0,
+            "38": 733860352.0,
+            "39": 733860352.0,
+            "40": 733860352.0,
+            "41": 733860352.0,
+            "42": 733860352.0,
+            "43": 733860352.0,
+            "44": 733860352.0,
+            "45": 733860352.0,
+            "46": 733860352.0,
+            "47": 733860352.0,
+            "48": 733860352.0,
+            "49": 733860352.0,
+            "50": 733860352.0,
+            "51": 733860352.0,
+            "52": 733860352.0,
+            "53": 733860352.0,
+            "54": 733860352.0,
+            "55": 733860352.0,
+            "56": 733860352.0,
+            "57": 733860352.0,
+            "58": 733860352.0,
+            "59": 733860352.0,
+            "60": 733860352.0,
+            "61": 733860352.0,
+            "62": 733860352.0,
+            "63": 733860352.0,
+            "64": 733860352.0,
+            "65": 733860352.0,
+            "66": 733860352.0,
+            "67": 733860352.0,
+            "68": 733860352.0,
+            "69": 733860352.0,
+            "70": 733860352.0,
+            "71": 733860352.0,
+            "72": 733860352.0,
+            "73": 733860352.0,
+            "74": 733860352.0,
+            "75": 733860352.0,
+            "76": 733860352.0,
+            "77": 733860352.0,
+            "78": 733860352.0,
+            "79": 733860352.0,
+            "80": 733860352.0,
+            "81": 733860352.0,
+            "82": 733860352.0,
+            "83": 733860352.0,
+            "84": 733860352.0,
+            "85": 733860352.0,
+            "86": 733860352.0,
+            "87": 733860352.0,
+            "88": 733860352.0,
+            "89": 733860352.0,
+            "90": 733860352.0,
+            "91": 733860352.0,
+            "92": 733860352.0,
+            "93": 733860352.0,
+            "94": 733860352.0,
+            "95": 733860352.0,
+            "96": 733860352.0,
+            "97": 733860352.0,
+            "98": 733860352.0,
+            "99": 733860352.0,
+            "100": 733860352.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2367879168.0,
+            "2": 2651687936.0,
+            "3": 2651687936.0,
+            "4": 2651687936.0,
+            "5": 2651687936.0,
+            "6": 2651687936.0,
+            "7": 2651687936.0,
+            "8": 2651687936.0,
+            "9": 2651687936.0,
+            "10": 2651687936.0,
+            "11": 2651687936.0,
+            "12": 2651687936.0,
+            "13": 2651687936.0,
+            "14": 2651687936.0,
+            "15": 2651687936.0,
+            "16": 2651687936.0,
+            "17": 2651687936.0,
+            "18": 2651687936.0,
+            "19": 2651687936.0,
+            "20": 2651687936.0,
+            "21": 2651687936.0,
+            "22": 2651687936.0,
+            "23": 2651687936.0,
+            "24": 2651687936.0,
+            "25": 2651687936.0,
+            "26": 2651687936.0,
+            "27": 2651687936.0,
+            "28": 2651687936.0,
+            "29": 2651687936.0,
+            "30": 2651687936.0,
+            "31": 2651687936.0,
+            "32": 2651687936.0,
+            "33": 2651687936.0,
+            "34": 2651687936.0,
+            "35": 2651687936.0,
+            "36": 2651687936.0,
+            "37": 2651687936.0,
+            "38": 2651687936.0,
+            "39": 2651687936.0,
+            "40": 2651687936.0,
+            "41": 2651687936.0,
+            "42": 2651687936.0,
+            "43": 2651687936.0,
+            "44": 2651687936.0,
+            "45": 2651687936.0,
+            "46": 2651687936.0,
+            "47": 2651687936.0,
+            "48": 2651687936.0,
+            "49": 2651687936.0,
+            "50": 2651687936.0,
+            "51": 2651687936.0,
+            "52": 2651687936.0,
+            "53": 2651687936.0,
+            "54": 2651687936.0,
+            "55": 2651687936.0,
+            "56": 2651687936.0,
+            "57": 2651687936.0,
+            "58": 2651687936.0,
+            "59": 2651687936.0,
+            "60": 2651687936.0,
+            "61": 2651687936.0,
+            "62": 2651687936.0,
+            "63": 2651687936.0,
+            "64": 2651687936.0,
+            "65": 2651687936.0,
+            "66": 2651687936.0,
+            "67": 2651687936.0,
+            "68": 2651687936.0,
+            "69": 2651687936.0,
+            "70": 2651687936.0,
+            "71": 2651687936.0,
+            "72": 2651687936.0,
+            "73": 2651687936.0,
+            "74": 2651687936.0,
+            "75": 2651687936.0,
+            "76": 2651687936.0,
+            "77": 2651687936.0,
+            "78": 2651687936.0,
+            "79": 2651687936.0,
+            "80": 2651687936.0,
+            "81": 2651687936.0,
+            "82": 2651687936.0,
+            "83": 2651687936.0,
+            "84": 2651687936.0,
+            "85": 2651687936.0,
+            "86": 2651687936.0,
+            "87": 2651687936.0,
+            "88": 2651687936.0,
+            "89": 2651687936.0,
+            "90": 2651687936.0,
+            "91": 2651687936.0,
+            "92": 2651687936.0,
+            "93": 2651687936.0,
+            "94": 2651687936.0,
+            "95": 2651687936.0,
+            "96": 2651687936.0,
+            "97": 2651687936.0,
+            "98": 2651687936.0,
+            "99": 2651687936.0,
+            "100": 2651687936.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 12.1008,
+            "2": 0.16051,
+            "3": 0.12978,
+            "4": 0.12855,
+            "5": 0.12836,
+            "6": 0.12718,
+            "7": 0.12817,
+            "8": 0.12827,
+            "9": 0.12773,
+            "10": 0.12934,
+            "11": 0.1284,
+            "12": 0.1278,
+            "13": 0.12824,
+            "14": 0.12897,
+            "15": 0.12788,
+            "16": 0.12662,
+            "17": 0.12751,
+            "18": 0.12678,
+            "19": 0.12784,
+            "20": 0.12756,
+            "21": 0.12782,
+            "22": 0.12765,
+            "23": 0.12695,
+            "24": 0.12621,
+            "25": 0.12639,
+            "26": 0.12652,
+            "27": 0.1261,
+            "28": 0.12599,
+            "29": 0.12679,
+            "30": 0.12648,
+            "31": 0.12791,
+            "32": 0.1267,
+            "33": 0.12736,
+            "34": 0.1275,
+            "35": 0.12674,
+            "36": 0.12623,
+            "37": 0.12561,
+            "38": 0.12629,
+            "39": 0.12735,
+            "40": 0.12739,
+            "41": 0.12784,
+            "42": 0.12763,
+            "43": 0.12841,
+            "44": 0.12666,
+            "45": 0.12797,
+            "46": 0.12722,
+            "47": 0.12583,
+            "48": 0.1271,
+            "49": 0.12675,
+            "50": 0.12769,
+            "51": 0.13852,
+            "52": 0.1338,
+            "53": 0.1334,
+            "54": 0.13466,
+            "55": 0.13471,
+            "56": 0.13266,
+            "57": 0.13116,
+            "58": 0.13405,
+            "59": 0.13356,
+            "60": 0.13304,
+            "61": 0.13328,
+            "62": 0.13243,
+            "63": 0.13255,
+            "64": 0.13344,
+            "65": 0.13433,
+            "66": 0.13486,
+            "67": 0.13338,
+            "68": 0.13313,
+            "69": 0.13327,
+            "70": 0.1324,
+            "71": 0.13325,
+            "72": 0.13418,
+            "73": 0.1341,
+            "74": 0.1334,
+            "75": 0.13238,
+            "76": 0.13198,
+            "77": 0.13412,
+            "78": 0.1335,
+            "79": 0.13208,
+            "80": 0.13334,
+            "81": 0.13338,
+            "82": 0.13187,
+            "83": 0.13324,
+            "84": 0.13268,
+            "85": 0.13362,
+            "86": 0.13282,
+            "87": 0.13325,
+            "88": 0.13348,
+            "89": 0.13361,
+            "90": 0.13267,
+            "91": 0.13322,
+            "92": 0.13404,
+            "93": 0.13424,
+            "94": 0.13249,
+            "95": 0.1323,
+            "96": 0.13217,
+            "97": 0.16026,
+            "98": 0.13491,
+            "99": 0.13704,
+            "100": 0.13716
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_lts_dgx_a100.json
index fbeaf7f4f0d..0cb12854799 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_lts_dgx_a100.json
@@ -1 +1,537 @@
-{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.82005, "5": 10.85284, "10": 10.78455, "15": 10.79229, "20": 10.69211, "25": 10.52412, "30": 10.34552, "35": 10.26239, "40": 10.07241, "45": 9.81101, "50": 9.88422, "55": 9.86374, "60": 9.47965, "65": 8.93063, "70": 9.71215, "75": 9.40048, "80": 9.39077, "85": 9.60141, "90": 9.80501, "95": 9.50816, "100": 9.3924}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1559.0, "5": 1840.0, "10": 1380.0, "15": 1848.0, "20": 1601.0, "25": 1635.0, "30": 1936.0, "35": 1908.0, "40": 2100.0, "45": 2098.0, "50": 2333.0, "55": 2260.0, "60": 2399.0, "65": 2656.0, "70": 3077.0, "75": 2547.0, "80": 3315.0, "85": 3371.0, "90": 2943.0, "95": 3457.0, "100": 3292.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 732811264.0, "5": 732811264.0, "10": 732811264.0, "15": 732811264.0, "20": 732811264.0, "25": 732811264.0, "30": 732811264.0, "35": 732811264.0, "40": 732811264.0, "45": 732811264.0, "50": 732811264.0, "55": 732811264.0, "60": 732811264.0, "65": 732811264.0, "70": 732811264.0, "75": 732811264.0, "80": 732811264.0, "85": 732811264.0, "90": 732811264.0, "95": 732811264.0, "100": 732811264.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 3838895104.0, "5": 4122703872.0, "10": 4122703872.0, "15": 4122703872.0, "20": 4122703872.0, "25": 4122703872.0, "30": 4122703872.0, "35": 4122703872.0, "40": 4122703872.0, "45": 4122703872.0, "50": 4122703872.0, "55": 4122703872.0, "60": 4122703872.0, "65": 4122703872.0, "70": 4122703872.0, "75": 4122703872.0, "80": 4122703872.0, "85": 4122703872.0, "90": 4122703872.0, "95": 4122703872.0, "100": 4122703872.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 24.14685, "5": 0.1855, "10": 0.18669, "15": 0.18585, "20": 0.18594, "25": 0.18564, "30": 0.18382, "35": 0.18679, "40": 0.1866, "45": 0.18389, "50": 0.18294, "55": 0.17556, "60": 0.17509, "65": 0.17542, "70": 0.17558, "75": 0.176, "80": 0.17523, "85": 0.17384, "90": 0.17335, "95": 0.17222, "100": 0.17356}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.81548,
+            "2": 10.8208,
+            "3": 10.81272,
+            "4": 10.78885,
+            "5": 10.85169,
+            "6": 10.87038,
+            "7": 10.83098,
+            "8": 10.8343,
+            "9": 10.83996,
+            "10": 10.78686,
+            "11": 10.88025,
+            "12": 10.85974,
+            "13": 10.86596,
+            "14": 10.8782,
+            "15": 10.79466,
+            "16": 10.79613,
+            "17": 10.76887,
+            "18": 10.81076,
+            "19": 10.7986,
+            "20": 10.69061,
+            "21": 10.67945,
+            "22": 10.52114,
+            "23": 10.70755,
+            "24": 10.57741,
+            "25": 10.52373,
+            "26": 10.59597,
+            "27": 10.58619,
+            "28": 10.56176,
+            "29": 10.56945,
+            "30": 10.34573,
+            "31": 10.10078,
+            "32": 10.45392,
+            "33": 10.4461,
+            "34": 10.20721,
+            "35": 10.262,
+            "36": 10.21312,
+            "37": 10.32468,
+            "38": 10.16831,
+            "39": 10.38374,
+            "40": 10.07231,
+            "41": 10.13763,
+            "42": 10.19765,
+            "43": 9.81155,
+            "44": 9.93311,
+            "45": 9.8104,
+            "46": 9.80854,
+            "47": 10.12558,
+            "48": 9.82105,
+            "49": 9.50764,
+            "50": 9.88382,
+            "51": 9.83549,
+            "52": 9.72516,
+            "53": 10.04799,
+            "54": 9.93011,
+            "55": 9.8636,
+            "56": 9.60217,
+            "57": 9.45187,
+            "58": 9.8078,
+            "59": 9.56783,
+            "60": 9.47966,
+            "61": 9.67984,
+            "62": 9.96754,
+            "63": 9.35113,
+            "64": 9.75623,
+            "65": 8.9318,
+            "66": 9.68107,
+            "67": 9.35956,
+            "68": 9.76948,
+            "69": 9.77492,
+            "70": 9.71182,
+            "71": 9.60632,
+            "72": 9.57129,
+            "73": 9.48392,
+            "74": 8.92911,
+            "75": 9.40028,
+            "76": 9.07194,
+            "77": 10.05252,
+            "78": 9.71494,
+            "79": 9.35747,
+            "80": 9.38946,
+            "81": 9.46791,
+            "82": 9.68508,
+            "83": 9.29588,
+            "84": 9.40522,
+            "85": 9.60163,
+            "86": 9.06713,
+            "87": 9.58402,
+            "88": 9.73304,
+            "89": 9.59526,
+            "90": 9.80555,
+            "91": 9.32604,
+            "92": 9.35323,
+            "93": 9.06915,
+            "94": 8.82268,
+            "95": 9.50858,
+            "96": 9.51584,
+            "97": 9.2976,
+            "98": 9.66184,
+            "99": 8.87662,
+            "100": 9.39222
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1518.0,
+            "2": 1697.0,
+            "3": 1678.0,
+            "4": 1709.0,
+            "5": 1873.0,
+            "6": 1786.0,
+            "7": 1813.0,
+            "8": 1519.0,
+            "9": 1684.0,
+            "10": 1417.0,
+            "11": 1943.0,
+            "12": 1723.0,
+            "13": 1939.0,
+            "14": 1759.0,
+            "15": 1728.0,
+            "16": 1773.0,
+            "17": 1805.0,
+            "18": 1702.0,
+            "19": 1803.0,
+            "20": 1627.0,
+            "21": 1822.0,
+            "22": 1748.0,
+            "23": 1938.0,
+            "24": 1642.0,
+            "25": 1696.0,
+            "26": 1760.0,
+            "27": 1809.0,
+            "28": 2025.0,
+            "29": 1900.0,
+            "30": 1902.0,
+            "31": 1645.0,
+            "32": 1876.0,
+            "33": 2105.0,
+            "34": 1881.0,
+            "35": 1913.0,
+            "36": 1864.0,
+            "37": 2322.0,
+            "38": 2194.0,
+            "39": 2318.0,
+            "40": 2010.0,
+            "41": 2358.0,
+            "42": 2155.0,
+            "43": 1980.0,
+            "44": 2105.0,
+            "45": 2082.0,
+            "46": 2221.0,
+            "47": 2537.0,
+            "48": 2367.0,
+            "49": 2190.0,
+            "50": 2352.0,
+            "51": 2441.0,
+            "52": 2482.0,
+            "53": 2916.0,
+            "54": 2550.0,
+            "55": 2347.0,
+            "56": 2765.0,
+            "57": 2116.0,
+            "58": 2968.0,
+            "59": 2810.0,
+            "60": 2384.0,
+            "61": 2912.0,
+            "62": 2554.0,
+            "63": 2364.0,
+            "64": 3035.0,
+            "65": 2648.0,
+            "66": 2979.0,
+            "67": 2741.0,
+            "68": 2799.0,
+            "69": 3071.0,
+            "70": 3098.0,
+            "71": 2950.0,
+            "72": 2342.0,
+            "73": 2829.0,
+            "74": 1840.0,
+            "75": 2426.0,
+            "76": 2941.0,
+            "77": 3245.0,
+            "78": 3272.0,
+            "79": 3066.0,
+            "80": 3221.0,
+            "81": 3565.0,
+            "82": 3162.0,
+            "83": 2876.0,
+            "84": 3180.0,
+            "85": 3410.0,
+            "86": 2778.0,
+            "87": 3752.0,
+            "88": 2995.0,
+            "89": 3264.0,
+            "90": 2940.0,
+            "91": 2791.0,
+            "92": 3118.0,
+            "93": 2634.0,
+            "94": 3464.0,
+            "95": 3344.0,
+            "96": 3499.0,
+            "97": 3122.0,
+            "98": 3568.0,
+            "99": 3272.0,
+            "100": 3476.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 733860352.0,
+            "2": 733860352.0,
+            "3": 733860352.0,
+            "4": 733860352.0,
+            "5": 733860352.0,
+            "6": 733860352.0,
+            "7": 733860352.0,
+            "8": 733860352.0,
+            "9": 733860352.0,
+            "10": 733860352.0,
+            "11": 733860352.0,
+            "12": 733860352.0,
+            "13": 733860352.0,
+            "14": 733860352.0,
+            "15": 733860352.0,
+            "16": 733860352.0,
+            "17": 733860352.0,
+            "18": 733860352.0,
+            "19": 733860352.0,
+            "20": 733860352.0,
+            "21": 733860352.0,
+            "22": 733860352.0,
+            "23": 733860352.0,
+            "24": 733860352.0,
+            "25": 733860352.0,
+            "26": 733860352.0,
+            "27": 733860352.0,
+            "28": 733860352.0,
+            "29": 733860352.0,
+            "30": 733860352.0,
+            "31": 733860352.0,
+            "32": 733860352.0,
+            "33": 733860352.0,
+            "34": 733860352.0,
+            "35": 733860352.0,
+            "36": 733860352.0,
+            "37": 733860352.0,
+            "38": 733860352.0,
+            "39": 733860352.0,
+            "40": 733860352.0,
+            "41": 733860352.0,
+            "42": 733860352.0,
+            "43": 733860352.0,
+            "44": 733860352.0,
+            "45": 733860352.0,
+            "46": 733860352.0,
+            "47": 733860352.0,
+            "48": 733860352.0,
+            "49": 733860352.0,
+            "50": 733860352.0,
+            "51": 733860352.0,
+            "52": 733860352.0,
+            "53": 733860352.0,
+            "54": 733860352.0,
+            "55": 733860352.0,
+            "56": 733860352.0,
+            "57": 733860352.0,
+            "58": 733860352.0,
+            "59": 733860352.0,
+            "60": 733860352.0,
+            "61": 733860352.0,
+            "62": 733860352.0,
+            "63": 733860352.0,
+            "64": 733860352.0,
+            "65": 733860352.0,
+            "66": 733860352.0,
+            "67": 733860352.0,
+            "68": 733860352.0,
+            "69": 733860352.0,
+            "70": 733860352.0,
+            "71": 733860352.0,
+            "72": 733860352.0,
+            "73": 733860352.0,
+            "74": 733860352.0,
+            "75": 733860352.0,
+            "76": 733860352.0,
+            "77": 733860352.0,
+            "78": 733860352.0,
+            "79": 733860352.0,
+            "80": 733860352.0,
+            "81": 733860352.0,
+            "82": 733860352.0,
+            "83": 733860352.0,
+            "84": 733860352.0,
+            "85": 733860352.0,
+            "86": 733860352.0,
+            "87": 733860352.0,
+            "88": 733860352.0,
+            "89": 733860352.0,
+            "90": 733860352.0,
+            "91": 733860352.0,
+            "92": 733860352.0,
+            "93": 733860352.0,
+            "94": 733860352.0,
+            "95": 733860352.0,
+            "96": 733860352.0,
+            "97": 733860352.0,
+            "98": 733860352.0,
+            "99": 733860352.0,
+            "100": 733860352.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2367879168.0,
+            "2": 2651687936.0,
+            "3": 2651687936.0,
+            "4": 2651687936.0,
+            "5": 2651687936.0,
+            "6": 2651687936.0,
+            "7": 2651687936.0,
+            "8": 2651687936.0,
+            "9": 2651687936.0,
+            "10": 2651687936.0,
+            "11": 2651687936.0,
+            "12": 2651687936.0,
+            "13": 2651687936.0,
+            "14": 2651687936.0,
+            "15": 2651687936.0,
+            "16": 2651687936.0,
+            "17": 2651687936.0,
+            "18": 2651687936.0,
+            "19": 2651687936.0,
+            "20": 2651687936.0,
+            "21": 2651687936.0,
+            "22": 2651687936.0,
+            "23": 2651687936.0,
+            "24": 2651687936.0,
+            "25": 2651687936.0,
+            "26": 2651687936.0,
+            "27": 2651687936.0,
+            "28": 2651687936.0,
+            "29": 2651687936.0,
+            "30": 2651687936.0,
+            "31": 2651687936.0,
+            "32": 2651687936.0,
+            "33": 2651687936.0,
+            "34": 2651687936.0,
+            "35": 2651687936.0,
+            "36": 2651687936.0,
+            "37": 2651687936.0,
+            "38": 2651687936.0,
+            "39": 2651687936.0,
+            "40": 2651687936.0,
+            "41": 2651687936.0,
+            "42": 2651687936.0,
+            "43": 2651687936.0,
+            "44": 2651687936.0,
+            "45": 2651687936.0,
+            "46": 2651687936.0,
+            "47": 2651687936.0,
+            "48": 2651687936.0,
+            "49": 2651687936.0,
+            "50": 2651687936.0,
+            "51": 2651687936.0,
+            "52": 2651687936.0,
+            "53": 2651687936.0,
+            "54": 2651687936.0,
+            "55": 2651687936.0,
+            "56": 2651687936.0,
+            "57": 2651687936.0,
+            "58": 2651687936.0,
+            "59": 2651687936.0,
+            "60": 2651687936.0,
+            "61": 2651687936.0,
+            "62": 2651687936.0,
+            "63": 2651687936.0,
+            "64": 2651687936.0,
+            "65": 2651687936.0,
+            "66": 2651687936.0,
+            "67": 2651687936.0,
+            "68": 2651687936.0,
+            "69": 2651687936.0,
+            "70": 2651687936.0,
+            "71": 2651687936.0,
+            "72": 2651687936.0,
+            "73": 2651687936.0,
+            "74": 2651687936.0,
+            "75": 2651687936.0,
+            "76": 2651687936.0,
+            "77": 2651687936.0,
+            "78": 2651687936.0,
+            "79": 2651687936.0,
+            "80": 2651687936.0,
+            "81": 2651687936.0,
+            "82": 2651687936.0,
+            "83": 2651687936.0,
+            "84": 2651687936.0,
+            "85": 2651687936.0,
+            "86": 2651687936.0,
+            "87": 2651687936.0,
+            "88": 2651687936.0,
+            "89": 2651687936.0,
+            "90": 2651687936.0,
+            "91": 2651687936.0,
+            "92": 2651687936.0,
+            "93": 2651687936.0,
+            "94": 2651687936.0,
+            "95": 2651687936.0,
+            "96": 2651687936.0,
+            "97": 2651687936.0,
+            "98": 2651687936.0,
+            "99": 2651687936.0,
+            "100": 2651687936.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 12.6402,
+            "2": 0.15932,
+            "3": 0.13183,
+            "4": 0.12969,
+            "5": 0.12913,
+            "6": 0.12942,
+            "7": 0.12823,
+            "8": 0.13014,
+            "9": 0.1305,
+            "10": 0.13004,
+            "11": 0.12983,
+            "12": 0.12943,
+            "13": 0.12925,
+            "14": 0.13022,
+            "15": 0.12947,
+            "16": 0.12988,
+            "17": 0.12984,
+            "18": 0.12989,
+            "19": 0.12987,
+            "20": 0.12935,
+            "21": 0.12974,
+            "22": 0.12965,
+            "23": 0.12983,
+            "24": 0.13037,
+            "25": 0.1293,
+            "26": 0.12914,
+            "27": 0.12908,
+            "28": 0.12909,
+            "29": 0.13186,
+            "30": 0.13433,
+            "31": 0.13401,
+            "32": 0.12902,
+            "33": 0.12808,
+            "34": 0.12907,
+            "35": 0.12884,
+            "36": 0.12913,
+            "37": 0.12932,
+            "38": 0.12992,
+            "39": 0.13072,
+            "40": 0.13131,
+            "41": 0.13172,
+            "42": 0.13072,
+            "43": 0.13259,
+            "44": 0.13124,
+            "45": 0.13129,
+            "46": 0.1291,
+            "47": 0.1308,
+            "48": 0.1301,
+            "49": 0.12906,
+            "50": 0.12828,
+            "51": 0.14265,
+            "52": 0.12979,
+            "53": 0.126,
+            "54": 0.12545,
+            "55": 0.12582,
+            "56": 0.12573,
+            "57": 0.12516,
+            "58": 0.1252,
+            "59": 0.12598,
+            "60": 0.12562,
+            "61": 0.12544,
+            "62": 0.12472,
+            "63": 0.12548,
+            "64": 0.12537,
+            "65": 0.12534,
+            "66": 0.12474,
+            "67": 0.12528,
+            "68": 0.12481,
+            "69": 0.12531,
+            "70": 0.12547,
+            "71": 0.12492,
+            "72": 0.12533,
+            "73": 0.12583,
+            "74": 0.1253,
+            "75": 0.12453,
+            "76": 0.12486,
+            "77": 0.12501,
+            "78": 0.12491,
+            "79": 0.12247,
+            "80": 0.1223,
+            "81": 0.1243,
+            "82": 0.12257,
+            "83": 0.12179,
+            "84": 0.12254,
+            "85": 0.12231,
+            "86": 0.12263,
+            "87": 0.12152,
+            "88": 0.12188,
+            "89": 0.1228,
+            "90": 0.12133,
+            "91": 0.1216,
+            "92": 0.12133,
+            "93": 0.12135,
+            "94": 0.12216,
+            "95": 0.12141,
+            "96": 0.12205,
+            "97": 0.12356,
+            "98": 0.12174,
+            "99": 0.12252,
+            "100": 0.1222
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts.json
new file mode 100644
index 00000000000..9798744d286
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.81548,
+            "2": 10.8208,
+            "3": 10.81272,
+            "4": 10.78885,
+            "5": 10.85169,
+            "6": 10.87038,
+            "7": 10.83098,
+            "8": 10.8343,
+            "9": 10.83996,
+            "10": 10.78686,
+            "11": 10.88025,
+            "12": 10.85974,
+            "13": 10.86596,
+            "14": 10.8782,
+            "15": 10.79466,
+            "16": 10.79611,
+            "17": 10.76888,
+            "18": 10.81074,
+            "19": 10.79865,
+            "20": 10.69058,
+            "21": 10.67945,
+            "22": 10.52113,
+            "23": 10.70755,
+            "24": 10.5774,
+            "25": 10.52375,
+            "26": 10.59601,
+            "27": 10.58621,
+            "28": 10.56178,
+            "29": 10.56944,
+            "30": 10.34575,
+            "31": 10.10079,
+            "32": 10.45392,
+            "33": 10.4461,
+            "34": 10.20725,
+            "35": 10.26198,
+            "36": 10.21312,
+            "37": 10.32467,
+            "38": 10.16829,
+            "39": 10.38371,
+            "40": 10.07233,
+            "41": 10.13762,
+            "42": 10.19766,
+            "43": 9.81156,
+            "44": 9.93307,
+            "45": 9.81043,
+            "46": 9.80854,
+            "47": 10.12558,
+            "48": 9.82104,
+            "49": 9.50761,
+            "50": 9.8838,
+            "51": 9.83548,
+            "52": 9.72518,
+            "53": 10.04799,
+            "54": 9.93007,
+            "55": 9.86362,
+            "56": 9.60218,
+            "57": 9.45185,
+            "58": 9.80781,
+            "59": 9.56786,
+            "60": 9.47966,
+            "61": 9.67985,
+            "62": 9.9675,
+            "63": 9.35111,
+            "64": 9.75622,
+            "65": 8.93178,
+            "66": 9.68108,
+            "67": 9.35959,
+            "68": 9.76948,
+            "69": 9.77494,
+            "70": 9.71179,
+            "71": 9.60631,
+            "72": 9.57134,
+            "73": 9.48393,
+            "74": 8.92913,
+            "75": 9.4003,
+            "76": 9.07189,
+            "77": 10.05248,
+            "78": 9.71492,
+            "79": 9.35744,
+            "80": 9.38946,
+            "81": 9.46798,
+            "82": 9.68509,
+            "83": 9.29591,
+            "84": 9.40521,
+            "85": 9.60161,
+            "86": 9.06713,
+            "87": 9.58406,
+            "88": 9.73301,
+            "89": 9.59528,
+            "90": 9.80559,
+            "91": 9.32603,
+            "92": 9.3532,
+            "93": 9.06916,
+            "94": 8.82266,
+            "95": 9.50858,
+            "96": 9.51587,
+            "97": 9.29763,
+            "98": 9.66187,
+            "99": 8.87661,
+            "100": 9.39222
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1518.0,
+            "2": 1697.0,
+            "3": 1678.0,
+            "4": 1709.0,
+            "5": 1873.0,
+            "6": 1786.0,
+            "7": 1813.0,
+            "8": 1519.0,
+            "9": 1684.0,
+            "10": 1417.0,
+            "11": 1943.0,
+            "12": 1723.0,
+            "13": 1939.0,
+            "14": 1759.0,
+            "15": 1772.0,
+            "16": 1750.0,
+            "17": 1812.0,
+            "18": 1646.0,
+            "19": 1818.0,
+            "20": 1655.0,
+            "21": 1879.0,
+            "22": 1759.0,
+            "23": 1944.0,
+            "24": 1650.0,
+            "25": 1708.0,
+            "26": 1750.0,
+            "27": 1815.0,
+            "28": 2091.0,
+            "29": 1958.0,
+            "30": 1934.0,
+            "31": 1588.0,
+            "32": 1891.0,
+            "33": 2099.0,
+            "34": 1765.0,
+            "35": 1894.0,
+            "36": 1947.0,
+            "37": 2279.0,
+            "38": 2134.0,
+            "39": 2350.0,
+            "40": 2091.0,
+            "41": 2318.0,
+            "42": 2137.0,
+            "43": 1849.0,
+            "44": 2109.0,
+            "45": 2020.0,
+            "46": 2263.0,
+            "47": 2593.0,
+            "48": 2323.0,
+            "49": 2177.0,
+            "50": 2363.0,
+            "51": 2554.0,
+            "52": 2619.0,
+            "53": 2863.0,
+            "54": 2688.0,
+            "55": 2406.0,
+            "56": 2649.0,
+            "57": 2175.0,
+            "58": 2856.0,
+            "59": 2775.0,
+            "60": 2307.0,
+            "61": 2914.0,
+            "62": 2644.0,
+            "63": 2362.0,
+            "64": 2946.0,
+            "65": 2578.0,
+            "66": 3122.0,
+            "67": 2697.0,
+            "68": 2687.0,
+            "69": 2956.0,
+            "70": 3157.0,
+            "71": 3028.0,
+            "72": 2294.0,
+            "73": 2876.0,
+            "74": 1887.0,
+            "75": 2523.0,
+            "76": 2937.0,
+            "77": 3162.0,
+            "78": 3318.0,
+            "79": 3074.0,
+            "80": 3213.0,
+            "81": 3664.0,
+            "82": 3238.0,
+            "83": 2838.0,
+            "84": 3251.0,
+            "85": 3275.0,
+            "86": 2748.0,
+            "87": 3758.0,
+            "88": 3023.0,
+            "89": 3267.0,
+            "90": 3085.0,
+            "91": 2812.0,
+            "92": 3116.0,
+            "93": 2665.0,
+            "94": 3380.0,
+            "95": 3236.0,
+            "96": 3462.0,
+            "97": 3002.0,
+            "98": 3545.0,
+            "99": 3265.0,
+            "100": 3458.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 520868352.0,
+            "2": 520868352.0,
+            "3": 520868352.0,
+            "4": 520868352.0,
+            "5": 520868352.0,
+            "6": 520868352.0,
+            "7": 520868352.0,
+            "8": 520868352.0,
+            "9": 520868352.0,
+            "10": 520868352.0,
+            "11": 520868352.0,
+            "12": 520868352.0,
+            "13": 520868352.0,
+            "14": 520868352.0,
+            "15": 520868352.0,
+            "16": 520868352.0,
+            "17": 520868352.0,
+            "18": 520868352.0,
+            "19": 520868352.0,
+            "20": 520868352.0,
+            "21": 520868352.0,
+            "22": 520868352.0,
+            "23": 520868352.0,
+            "24": 520868352.0,
+            "25": 520868352.0,
+            "26": 520868352.0,
+            "27": 520868352.0,
+            "28": 520868352.0,
+            "29": 520868352.0,
+            "30": 520868352.0,
+            "31": 520868352.0,
+            "32": 520868352.0,
+            "33": 520868352.0,
+            "34": 520868352.0,
+            "35": 520868352.0,
+            "36": 520868352.0,
+            "37": 520868352.0,
+            "38": 520868352.0,
+            "39": 520868352.0,
+            "40": 520868352.0,
+            "41": 520868352.0,
+            "42": 520868352.0,
+            "43": 520868352.0,
+            "44": 520868352.0,
+            "45": 520868352.0,
+            "46": 520868352.0,
+            "47": 520868352.0,
+            "48": 520868352.0,
+            "49": 520868352.0,
+            "50": 520868352.0,
+            "51": 520868352.0,
+            "52": 520868352.0,
+            "53": 520868352.0,
+            "54": 520868352.0,
+            "55": 520868352.0,
+            "56": 520868352.0,
+            "57": 520868352.0,
+            "58": 520868352.0,
+            "59": 520868352.0,
+            "60": 520868352.0,
+            "61": 520868352.0,
+            "62": 520868352.0,
+            "63": 520868352.0,
+            "64": 520868352.0,
+            "65": 520868352.0,
+            "66": 520868352.0,
+            "67": 520868352.0,
+            "68": 520868352.0,
+            "69": 520868352.0,
+            "70": 520868352.0,
+            "71": 520868352.0,
+            "72": 520868352.0,
+            "73": 520868352.0,
+            "74": 520868352.0,
+            "75": 520868352.0,
+            "76": 520868352.0,
+            "77": 520868352.0,
+            "78": 520868352.0,
+            "79": 520868352.0,
+            "80": 520868352.0,
+            "81": 520868352.0,
+            "82": 520868352.0,
+            "83": 520868352.0,
+            "84": 520868352.0,
+            "85": 520868352.0,
+            "86": 520868352.0,
+            "87": 520868352.0,
+            "88": 520868352.0,
+            "89": 520868352.0,
+            "90": 520868352.0,
+            "91": 520868352.0,
+            "92": 520868352.0,
+            "93": 520868352.0,
+            "94": 520868352.0,
+            "95": 520868352.0,
+            "96": 520868352.0,
+            "97": 520868352.0,
+            "98": 520868352.0,
+            "99": 520868352.0,
+            "100": 520868352.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2297829376.0,
+            "2": 2439744512.0,
+            "3": 2439744512.0,
+            "4": 2439744512.0,
+            "5": 2439744512.0,
+            "6": 2439744512.0,
+            "7": 2439744512.0,
+            "8": 2439744512.0,
+            "9": 2439744512.0,
+            "10": 2439744512.0,
+            "11": 2439744512.0,
+            "12": 2439744512.0,
+            "13": 2439744512.0,
+            "14": 2439744512.0,
+            "15": 2439744512.0,
+            "16": 2439744512.0,
+            "17": 2439744512.0,
+            "18": 2439744512.0,
+            "19": 2439744512.0,
+            "20": 2439744512.0,
+            "21": 2439744512.0,
+            "22": 2439744512.0,
+            "23": 2439744512.0,
+            "24": 2439744512.0,
+            "25": 2439744512.0,
+            "26": 2439744512.0,
+            "27": 2439744512.0,
+            "28": 2439744512.0,
+            "29": 2439744512.0,
+            "30": 2439744512.0,
+            "31": 2439744512.0,
+            "32": 2439744512.0,
+            "33": 2439744512.0,
+            "34": 2439744512.0,
+            "35": 2439744512.0,
+            "36": 2439744512.0,
+            "37": 2439744512.0,
+            "38": 2439744512.0,
+            "39": 2439744512.0,
+            "40": 2439744512.0,
+            "41": 2439744512.0,
+            "42": 2439744512.0,
+            "43": 2439744512.0,
+            "44": 2439744512.0,
+            "45": 2439744512.0,
+            "46": 2439744512.0,
+            "47": 2439744512.0,
+            "48": 2439744512.0,
+            "49": 2439744512.0,
+            "50": 2439744512.0,
+            "51": 2439744512.0,
+            "52": 2439744512.0,
+            "53": 2439744512.0,
+            "54": 2439744512.0,
+            "55": 2439744512.0,
+            "56": 2439744512.0,
+            "57": 2439744512.0,
+            "58": 2439744512.0,
+            "59": 2439744512.0,
+            "60": 2439744512.0,
+            "61": 2439744512.0,
+            "62": 2439744512.0,
+            "63": 2439744512.0,
+            "64": 2439744512.0,
+            "65": 2439744512.0,
+            "66": 2439744512.0,
+            "67": 2439744512.0,
+            "68": 2439744512.0,
+            "69": 2439744512.0,
+            "70": 2439744512.0,
+            "71": 2439744512.0,
+            "72": 2439744512.0,
+            "73": 2439744512.0,
+            "74": 2439744512.0,
+            "75": 2439744512.0,
+            "76": 2439744512.0,
+            "77": 2439744512.0,
+            "78": 2439744512.0,
+            "79": 2439744512.0,
+            "80": 2439744512.0,
+            "81": 2439744512.0,
+            "82": 2439744512.0,
+            "83": 2439744512.0,
+            "84": 2439744512.0,
+            "85": 2439744512.0,
+            "86": 2439744512.0,
+            "87": 2439744512.0,
+            "88": 2439744512.0,
+            "89": 2439744512.0,
+            "90": 2439744512.0,
+            "91": 2439744512.0,
+            "92": 2439744512.0,
+            "93": 2439744512.0,
+            "94": 2439744512.0,
+            "95": 2439744512.0,
+            "96": 2439744512.0,
+            "97": 2439744512.0,
+            "98": 2439744512.0,
+            "99": 2439744512.0,
+            "100": 2439744512.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 13.16911,
+            "2": 0.18867,
+            "3": 0.13882,
+            "4": 0.13669,
+            "5": 0.13645,
+            "6": 0.13563,
+            "7": 0.13741,
+            "8": 0.13506,
+            "9": 0.13477,
+            "10": 0.13539,
+            "11": 0.13546,
+            "12": 0.13513,
+            "13": 0.13494,
+            "14": 0.13581,
+            "15": 0.13521,
+            "16": 0.13419,
+            "17": 0.14048,
+            "18": 0.13853,
+            "19": 0.13344,
+            "20": 0.13345,
+            "21": 0.13394,
+            "22": 0.1343,
+            "23": 0.13285,
+            "24": 0.13319,
+            "25": 0.13222,
+            "26": 0.13344,
+            "27": 0.13252,
+            "28": 0.13239,
+            "29": 0.1336,
+            "30": 0.13332,
+            "31": 0.13361,
+            "32": 0.13239,
+            "33": 0.13272,
+            "34": 0.13285,
+            "35": 0.21759,
+            "36": 0.1331,
+            "37": 0.1319,
+            "38": 0.13293,
+            "39": 0.13316,
+            "40": 0.13306,
+            "41": 0.13419,
+            "42": 0.13323,
+            "43": 0.13347,
+            "44": 0.13313,
+            "45": 0.13281,
+            "46": 0.13654,
+            "47": 0.13861,
+            "48": 0.14195,
+            "49": 0.14972,
+            "50": 0.14396,
+            "51": 0.1402,
+            "52": 0.13309,
+            "53": 0.1319,
+            "54": 0.13165,
+            "55": 0.1313,
+            "56": 0.13223,
+            "57": 0.13257,
+            "58": 0.13107,
+            "59": 0.13218,
+            "60": 0.13223,
+            "61": 0.13169,
+            "62": 0.13257,
+            "63": 0.13313,
+            "64": 0.13312,
+            "65": 0.1329,
+            "66": 0.13333,
+            "67": 0.13273,
+            "68": 0.13284,
+            "69": 0.1315,
+            "70": 0.13199,
+            "71": 0.13227,
+            "72": 0.13324,
+            "73": 0.13323,
+            "74": 0.13184,
+            "75": 0.13306,
+            "76": 0.13246,
+            "77": 0.13205,
+            "78": 0.1314,
+            "79": 0.13214,
+            "80": 0.13251,
+            "81": 0.1324,
+            "82": 0.13223,
+            "83": 0.133,
+            "84": 0.13175,
+            "85": 0.1338,
+            "86": 0.13446,
+            "87": 0.13241,
+            "88": 0.13183,
+            "89": 0.13223,
+            "90": 0.13104,
+            "91": 0.13159,
+            "92": 0.13112,
+            "93": 0.13189,
+            "94": 0.13213,
+            "95": 0.13312,
+            "96": 0.13243,
+            "97": 0.21757,
+            "98": 0.1322,
+            "99": 0.13344,
+            "100": 0.13353
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgx_a100.json
index 85ab75a97bf..5d20ab395ec 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgx_a100.json
@@ -1 +1,537 @@
-{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.82005, "5": 10.85284, "10": 10.78455, "15": 10.79229, "20": 10.69212, "25": 10.52413, "30": 10.34552, "35": 10.26241, "40": 10.07236, "45": 9.81098, "50": 9.88418, "55": 9.86374, "60": 9.47963, "65": 8.93065, "70": 9.71218, "75": 9.4005, "80": 9.39078, "85": 9.60138, "90": 9.80502, "95": 9.50821, "100": 9.3924}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1559.0, "5": 1840.0, "10": 1380.0, "15": 1848.0, "20": 1621.0, "25": 1689.0, "30": 1907.0, "35": 1922.0, "40": 2045.0, "45": 2097.0, "50": 2368.0, "55": 2383.0, "60": 2509.0, "65": 2628.0, "70": 3129.0, "75": 2552.0, "80": 3279.0, "85": 3344.0, "90": 2980.0, "95": 3324.0, "100": 3467.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 522989568.0, "5": 522989568.0, "10": 522989568.0, "15": 522989568.0, "20": 522989568.0, "25": 522989568.0, "30": 522989568.0, "35": 522989568.0, "40": 522989568.0, "45": 522989568.0, "50": 522989568.0, "55": 522989568.0, "60": 522989568.0, "65": 522989568.0, "70": 522989568.0, "75": 522989568.0, "80": 522989568.0, "85": 522989568.0, "90": 522989568.0, "95": 522989568.0, "100": 522989568.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 3768872960.0, "5": 3911833600.0, "10": 3911833600.0, "15": 3911833600.0, "20": 3911833600.0, "25": 3911833600.0, "30": 3911833600.0, "35": 3911833600.0, "40": 3911833600.0, "45": 3911833600.0, "50": 3911833600.0, "55": 3911833600.0, "60": 3911833600.0, "65": 3911833600.0, "70": 3911833600.0, "75": 3911833600.0, "80": 3911833600.0, "85": 3911833600.0, "90": 3911833600.0, "95": 3911833600.0, "100": 3911833600.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 18.37086, "5": 0.17677, "10": 0.17771, "15": 0.1763, "20": 0.17346, "25": 0.17343, "30": 0.17441, "35": 0.17427, "40": 0.17562, "45": 0.1738, "50": 0.17384, "55": 0.17214, "60": 0.17159, "65": 0.17227, "70": 0.17173, "75": 0.17264, "80": 0.1723, "85": 0.1716, "90": 0.17085, "95": 0.17315, "100": 0.1716}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.81548,
+            "2": 10.8208,
+            "3": 10.81272,
+            "4": 10.78885,
+            "5": 10.85169,
+            "6": 10.87038,
+            "7": 10.83098,
+            "8": 10.8343,
+            "9": 10.83996,
+            "10": 10.78686,
+            "11": 10.88025,
+            "12": 10.85974,
+            "13": 10.86596,
+            "14": 10.8782,
+            "15": 10.79466,
+            "16": 10.79611,
+            "17": 10.76888,
+            "18": 10.81074,
+            "19": 10.79865,
+            "20": 10.69058,
+            "21": 10.67945,
+            "22": 10.52113,
+            "23": 10.70755,
+            "24": 10.5774,
+            "25": 10.52375,
+            "26": 10.59601,
+            "27": 10.58621,
+            "28": 10.56178,
+            "29": 10.56944,
+            "30": 10.34575,
+            "31": 10.10079,
+            "32": 10.45392,
+            "33": 10.4461,
+            "34": 10.20725,
+            "35": 10.26198,
+            "36": 10.21312,
+            "37": 10.32467,
+            "38": 10.16829,
+            "39": 10.38371,
+            "40": 10.07233,
+            "41": 10.13762,
+            "42": 10.19766,
+            "43": 9.81156,
+            "44": 9.93307,
+            "45": 9.81043,
+            "46": 9.80854,
+            "47": 10.12558,
+            "48": 9.82104,
+            "49": 9.50761,
+            "50": 9.8838,
+            "51": 9.83548,
+            "52": 9.72518,
+            "53": 10.04799,
+            "54": 9.93007,
+            "55": 9.86362,
+            "56": 9.60218,
+            "57": 9.45185,
+            "58": 9.80781,
+            "59": 9.56786,
+            "60": 9.47966,
+            "61": 9.67985,
+            "62": 9.9675,
+            "63": 9.35111,
+            "64": 9.75622,
+            "65": 8.93178,
+            "66": 9.68108,
+            "67": 9.35959,
+            "68": 9.76948,
+            "69": 9.77494,
+            "70": 9.71179,
+            "71": 9.60631,
+            "72": 9.57134,
+            "73": 9.48393,
+            "74": 8.92913,
+            "75": 9.4003,
+            "76": 9.07189,
+            "77": 10.05248,
+            "78": 9.71492,
+            "79": 9.35744,
+            "80": 9.38946,
+            "81": 9.46798,
+            "82": 9.68509,
+            "83": 9.29591,
+            "84": 9.40521,
+            "85": 9.60161,
+            "86": 9.06713,
+            "87": 9.58406,
+            "88": 9.73301,
+            "89": 9.59528,
+            "90": 9.80559,
+            "91": 9.32603,
+            "92": 9.3532,
+            "93": 9.06916,
+            "94": 8.82266,
+            "95": 9.50858,
+            "96": 9.51587,
+            "97": 9.29763,
+            "98": 9.66187,
+            "99": 8.87661,
+            "100": 9.39222
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1518.0,
+            "2": 1697.0,
+            "3": 1678.0,
+            "4": 1709.0,
+            "5": 1873.0,
+            "6": 1786.0,
+            "7": 1813.0,
+            "8": 1519.0,
+            "9": 1684.0,
+            "10": 1417.0,
+            "11": 1943.0,
+            "12": 1723.0,
+            "13": 1939.0,
+            "14": 1759.0,
+            "15": 1772.0,
+            "16": 1750.0,
+            "17": 1812.0,
+            "18": 1646.0,
+            "19": 1818.0,
+            "20": 1655.0,
+            "21": 1879.0,
+            "22": 1759.0,
+            "23": 1944.0,
+            "24": 1650.0,
+            "25": 1708.0,
+            "26": 1750.0,
+            "27": 1815.0,
+            "28": 2091.0,
+            "29": 1958.0,
+            "30": 1934.0,
+            "31": 1588.0,
+            "32": 1891.0,
+            "33": 2099.0,
+            "34": 1765.0,
+            "35": 1894.0,
+            "36": 1947.0,
+            "37": 2279.0,
+            "38": 2134.0,
+            "39": 2350.0,
+            "40": 2091.0,
+            "41": 2318.0,
+            "42": 2137.0,
+            "43": 1849.0,
+            "44": 2109.0,
+            "45": 2020.0,
+            "46": 2263.0,
+            "47": 2593.0,
+            "48": 2323.0,
+            "49": 2177.0,
+            "50": 2363.0,
+            "51": 2554.0,
+            "52": 2619.0,
+            "53": 2863.0,
+            "54": 2688.0,
+            "55": 2406.0,
+            "56": 2649.0,
+            "57": 2175.0,
+            "58": 2856.0,
+            "59": 2775.0,
+            "60": 2307.0,
+            "61": 2914.0,
+            "62": 2644.0,
+            "63": 2362.0,
+            "64": 2946.0,
+            "65": 2578.0,
+            "66": 3122.0,
+            "67": 2697.0,
+            "68": 2687.0,
+            "69": 2956.0,
+            "70": 3157.0,
+            "71": 3028.0,
+            "72": 2294.0,
+            "73": 2876.0,
+            "74": 1887.0,
+            "75": 2523.0,
+            "76": 2937.0,
+            "77": 3162.0,
+            "78": 3318.0,
+            "79": 3074.0,
+            "80": 3213.0,
+            "81": 3664.0,
+            "82": 3238.0,
+            "83": 2838.0,
+            "84": 3251.0,
+            "85": 3275.0,
+            "86": 2748.0,
+            "87": 3758.0,
+            "88": 3023.0,
+            "89": 3267.0,
+            "90": 3085.0,
+            "91": 2812.0,
+            "92": 3116.0,
+            "93": 2665.0,
+            "94": 3380.0,
+            "95": 3236.0,
+            "96": 3462.0,
+            "97": 3002.0,
+            "98": 3545.0,
+            "99": 3265.0,
+            "100": 3458.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 521400832.0,
+            "2": 521400832.0,
+            "3": 521400832.0,
+            "4": 521400832.0,
+            "5": 521400832.0,
+            "6": 521400832.0,
+            "7": 521400832.0,
+            "8": 521400832.0,
+            "9": 521400832.0,
+            "10": 521400832.0,
+            "11": 521400832.0,
+            "12": 521400832.0,
+            "13": 521400832.0,
+            "14": 521400832.0,
+            "15": 521400832.0,
+            "16": 521400832.0,
+            "17": 521400832.0,
+            "18": 521400832.0,
+            "19": 521400832.0,
+            "20": 521400832.0,
+            "21": 521400832.0,
+            "22": 521400832.0,
+            "23": 521400832.0,
+            "24": 521400832.0,
+            "25": 521400832.0,
+            "26": 521400832.0,
+            "27": 521400832.0,
+            "28": 521400832.0,
+            "29": 521400832.0,
+            "30": 521400832.0,
+            "31": 521400832.0,
+            "32": 521400832.0,
+            "33": 521400832.0,
+            "34": 521400832.0,
+            "35": 521400832.0,
+            "36": 521400832.0,
+            "37": 521400832.0,
+            "38": 521400832.0,
+            "39": 521400832.0,
+            "40": 521400832.0,
+            "41": 521400832.0,
+            "42": 521400832.0,
+            "43": 521400832.0,
+            "44": 521400832.0,
+            "45": 521400832.0,
+            "46": 521400832.0,
+            "47": 521400832.0,
+            "48": 521400832.0,
+            "49": 521400832.0,
+            "50": 521400832.0,
+            "51": 521400832.0,
+            "52": 521400832.0,
+            "53": 521400832.0,
+            "54": 521400832.0,
+            "55": 521400832.0,
+            "56": 521400832.0,
+            "57": 521400832.0,
+            "58": 521400832.0,
+            "59": 521400832.0,
+            "60": 521400832.0,
+            "61": 521400832.0,
+            "62": 521400832.0,
+            "63": 521400832.0,
+            "64": 521400832.0,
+            "65": 521400832.0,
+            "66": 521400832.0,
+            "67": 521400832.0,
+            "68": 521400832.0,
+            "69": 521400832.0,
+            "70": 521400832.0,
+            "71": 521400832.0,
+            "72": 521400832.0,
+            "73": 521400832.0,
+            "74": 521400832.0,
+            "75": 521400832.0,
+            "76": 521400832.0,
+            "77": 521400832.0,
+            "78": 521400832.0,
+            "79": 521400832.0,
+            "80": 521400832.0,
+            "81": 521400832.0,
+            "82": 521400832.0,
+            "83": 521400832.0,
+            "84": 521400832.0,
+            "85": 521400832.0,
+            "86": 521400832.0,
+            "87": 521400832.0,
+            "88": 521400832.0,
+            "89": 521400832.0,
+            "90": 521400832.0,
+            "91": 521400832.0,
+            "92": 521400832.0,
+            "93": 521400832.0,
+            "94": 521400832.0,
+            "95": 521400832.0,
+            "96": 521400832.0,
+            "97": 521400832.0,
+            "98": 521400832.0,
+            "99": 521400832.0,
+            "100": 521400832.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2297829376.0,
+            "2": 2439228416.0,
+            "3": 2439228416.0,
+            "4": 2439228416.0,
+            "5": 2439228416.0,
+            "6": 2439228416.0,
+            "7": 2439228416.0,
+            "8": 2439228416.0,
+            "9": 2439228416.0,
+            "10": 2439228416.0,
+            "11": 2439228416.0,
+            "12": 2439228416.0,
+            "13": 2439228416.0,
+            "14": 2439228416.0,
+            "15": 2439228416.0,
+            "16": 2439228416.0,
+            "17": 2439228416.0,
+            "18": 2439228416.0,
+            "19": 2439228416.0,
+            "20": 2439228416.0,
+            "21": 2439228416.0,
+            "22": 2439228416.0,
+            "23": 2439228416.0,
+            "24": 2439228416.0,
+            "25": 2439228416.0,
+            "26": 2439228416.0,
+            "27": 2439228416.0,
+            "28": 2439228416.0,
+            "29": 2439228416.0,
+            "30": 2439228416.0,
+            "31": 2439228416.0,
+            "32": 2439228416.0,
+            "33": 2439228416.0,
+            "34": 2439228416.0,
+            "35": 2439228416.0,
+            "36": 2439228416.0,
+            "37": 2439228416.0,
+            "38": 2439228416.0,
+            "39": 2439228416.0,
+            "40": 2439228416.0,
+            "41": 2439228416.0,
+            "42": 2439228416.0,
+            "43": 2439228416.0,
+            "44": 2439228416.0,
+            "45": 2439228416.0,
+            "46": 2439228416.0,
+            "47": 2439228416.0,
+            "48": 2439228416.0,
+            "49": 2439228416.0,
+            "50": 2439228416.0,
+            "51": 2439228416.0,
+            "52": 2439228416.0,
+            "53": 2439228416.0,
+            "54": 2439228416.0,
+            "55": 2439228416.0,
+            "56": 2439228416.0,
+            "57": 2439228416.0,
+            "58": 2439228416.0,
+            "59": 2439228416.0,
+            "60": 2439228416.0,
+            "61": 2439228416.0,
+            "62": 2439228416.0,
+            "63": 2439228416.0,
+            "64": 2439228416.0,
+            "65": 2439228416.0,
+            "66": 2439228416.0,
+            "67": 2439228416.0,
+            "68": 2439228416.0,
+            "69": 2439228416.0,
+            "70": 2439228416.0,
+            "71": 2439228416.0,
+            "72": 2439228416.0,
+            "73": 2439228416.0,
+            "74": 2439228416.0,
+            "75": 2439228416.0,
+            "76": 2439228416.0,
+            "77": 2439228416.0,
+            "78": 2439228416.0,
+            "79": 2439228416.0,
+            "80": 2439228416.0,
+            "81": 2439228416.0,
+            "82": 2439228416.0,
+            "83": 2439228416.0,
+            "84": 2439228416.0,
+            "85": 2439228416.0,
+            "86": 2439228416.0,
+            "87": 2439228416.0,
+            "88": 2439228416.0,
+            "89": 2439228416.0,
+            "90": 2439228416.0,
+            "91": 2439228416.0,
+            "92": 2439228416.0,
+            "93": 2439228416.0,
+            "94": 2439228416.0,
+            "95": 2439228416.0,
+            "96": 2439228416.0,
+            "97": 2439228416.0,
+            "98": 2439228416.0,
+            "99": 2439228416.0,
+            "100": 2439228416.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 9.8604,
+            "2": 0.16953,
+            "3": 0.13987,
+            "4": 0.13824,
+            "5": 0.13775,
+            "6": 0.13549,
+            "7": 0.13611,
+            "8": 0.13584,
+            "9": 0.13626,
+            "10": 0.13922,
+            "11": 0.13526,
+            "12": 0.13455,
+            "13": 0.13222,
+            "14": 0.13324,
+            "15": 0.1325,
+            "16": 0.13211,
+            "17": 0.13198,
+            "18": 0.13145,
+            "19": 0.13207,
+            "20": 0.13182,
+            "21": 0.13297,
+            "22": 0.1322,
+            "23": 0.13275,
+            "24": 0.1319,
+            "25": 0.13822,
+            "26": 0.13214,
+            "27": 0.13169,
+            "28": 0.13196,
+            "29": 0.13229,
+            "30": 0.13285,
+            "31": 0.13112,
+            "32": 0.13222,
+            "33": 0.13056,
+            "34": 0.13076,
+            "35": 0.13218,
+            "36": 0.13126,
+            "37": 0.13091,
+            "38": 0.13048,
+            "39": 0.13082,
+            "40": 0.1308,
+            "41": 0.13202,
+            "42": 0.1314,
+            "43": 0.13222,
+            "44": 0.13074,
+            "45": 0.13237,
+            "46": 0.13272,
+            "47": 0.13239,
+            "48": 0.13266,
+            "49": 0.13226,
+            "50": 0.13164,
+            "51": 0.13425,
+            "52": 0.13044,
+            "53": 0.13037,
+            "54": 0.13007,
+            "55": 0.1301,
+            "56": 0.13001,
+            "57": 0.13054,
+            "58": 0.12972,
+            "59": 0.13049,
+            "60": 0.13042,
+            "61": 0.12903,
+            "62": 0.13042,
+            "63": 0.13104,
+            "64": 0.13008,
+            "65": 0.13158,
+            "66": 0.13091,
+            "67": 0.13089,
+            "68": 0.13084,
+            "69": 0.12903,
+            "70": 0.13015,
+            "71": 0.12957,
+            "72": 0.12997,
+            "73": 0.13025,
+            "74": 0.12989,
+            "75": 0.13018,
+            "76": 0.12962,
+            "77": 0.13065,
+            "78": 0.12915,
+            "79": 0.13007,
+            "80": 0.12972,
+            "81": 0.1301,
+            "82": 0.12927,
+            "83": 0.1302,
+            "84": 0.12991,
+            "85": 0.13129,
+            "86": 0.13063,
+            "87": 0.13028,
+            "88": 0.1305,
+            "89": 0.13046,
+            "90": 0.12991,
+            "91": 0.13058,
+            "92": 0.13044,
+            "93": 0.13009,
+            "94": 0.1306,
+            "95": 0.13082,
+            "96": 0.13068,
+            "97": 0.13403,
+            "98": 0.13199,
+            "99": 0.13191,
+            "100": 0.13014
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts.json
new file mode 100644
index 00000000000..0532ff11573
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.81548,
+            "2": 10.8208,
+            "3": 10.81272,
+            "4": 10.78885,
+            "5": 10.85169,
+            "6": 10.87038,
+            "7": 10.83098,
+            "8": 10.8343,
+            "9": 10.83996,
+            "10": 10.78686,
+            "11": 10.88025,
+            "12": 10.85974,
+            "13": 10.86596,
+            "14": 10.8782,
+            "15": 10.79466,
+            "16": 10.79611,
+            "17": 10.76888,
+            "18": 10.81074,
+            "19": 10.79865,
+            "20": 10.69058,
+            "21": 10.67945,
+            "22": 10.52113,
+            "23": 10.70755,
+            "24": 10.5774,
+            "25": 10.52375,
+            "26": 10.59601,
+            "27": 10.58621,
+            "28": 10.56178,
+            "29": 10.56944,
+            "30": 10.34575,
+            "31": 10.10079,
+            "32": 10.45392,
+            "33": 10.4461,
+            "34": 10.20725,
+            "35": 10.26198,
+            "36": 10.21312,
+            "37": 10.32467,
+            "38": 10.16829,
+            "39": 10.38371,
+            "40": 10.07233,
+            "41": 10.13762,
+            "42": 10.19766,
+            "43": 9.81156,
+            "44": 9.93307,
+            "45": 9.81043,
+            "46": 9.80854,
+            "47": 10.12558,
+            "48": 9.82104,
+            "49": 9.50761,
+            "50": 9.8838,
+            "51": 9.83548,
+            "52": 9.72518,
+            "53": 10.04799,
+            "54": 9.93007,
+            "55": 9.86362,
+            "56": 9.60218,
+            "57": 9.45185,
+            "58": 9.80781,
+            "59": 9.56786,
+            "60": 9.47966,
+            "61": 9.67985,
+            "62": 9.9675,
+            "63": 9.35111,
+            "64": 9.75622,
+            "65": 8.93178,
+            "66": 9.68108,
+            "67": 9.35959,
+            "68": 9.76948,
+            "69": 9.77494,
+            "70": 9.71179,
+            "71": 9.60631,
+            "72": 9.57134,
+            "73": 9.48393,
+            "74": 8.92913,
+            "75": 9.4003,
+            "76": 9.07189,
+            "77": 10.05248,
+            "78": 9.71492,
+            "79": 9.35744,
+            "80": 9.38946,
+            "81": 9.46798,
+            "82": 9.68509,
+            "83": 9.29591,
+            "84": 9.40521,
+            "85": 9.60161,
+            "86": 9.06713,
+            "87": 9.58406,
+            "88": 9.73301,
+            "89": 9.59528,
+            "90": 9.80559,
+            "91": 9.32603,
+            "92": 9.3532,
+            "93": 9.06916,
+            "94": 8.82266,
+            "95": 9.50858,
+            "96": 9.51587,
+            "97": 9.29763,
+            "98": 9.66187,
+            "99": 8.87661,
+            "100": 9.39222
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1518.0,
+            "2": 1697.0,
+            "3": 1678.0,
+            "4": 1709.0,
+            "5": 1873.0,
+            "6": 1786.0,
+            "7": 1813.0,
+            "8": 1519.0,
+            "9": 1684.0,
+            "10": 1417.0,
+            "11": 1943.0,
+            "12": 1723.0,
+            "13": 1939.0,
+            "14": 1759.0,
+            "15": 1772.0,
+            "16": 1750.0,
+            "17": 1812.0,
+            "18": 1646.0,
+            "19": 1818.0,
+            "20": 1655.0,
+            "21": 1879.0,
+            "22": 1759.0,
+            "23": 1944.0,
+            "24": 1650.0,
+            "25": 1708.0,
+            "26": 1750.0,
+            "27": 1815.0,
+            "28": 2091.0,
+            "29": 1958.0,
+            "30": 1934.0,
+            "31": 1588.0,
+            "32": 1891.0,
+            "33": 2099.0,
+            "34": 1765.0,
+            "35": 1894.0,
+            "36": 1947.0,
+            "37": 2279.0,
+            "38": 2134.0,
+            "39": 2350.0,
+            "40": 2091.0,
+            "41": 2318.0,
+            "42": 2137.0,
+            "43": 1849.0,
+            "44": 2109.0,
+            "45": 2020.0,
+            "46": 2263.0,
+            "47": 2593.0,
+            "48": 2323.0,
+            "49": 2177.0,
+            "50": 2363.0,
+            "51": 2554.0,
+            "52": 2619.0,
+            "53": 2863.0,
+            "54": 2688.0,
+            "55": 2406.0,
+            "56": 2649.0,
+            "57": 2175.0,
+            "58": 2856.0,
+            "59": 2775.0,
+            "60": 2307.0,
+            "61": 2914.0,
+            "62": 2644.0,
+            "63": 2362.0,
+            "64": 2946.0,
+            "65": 2578.0,
+            "66": 3122.0,
+            "67": 2697.0,
+            "68": 2687.0,
+            "69": 2956.0,
+            "70": 3157.0,
+            "71": 3028.0,
+            "72": 2294.0,
+            "73": 2876.0,
+            "74": 1887.0,
+            "75": 2523.0,
+            "76": 2937.0,
+            "77": 3162.0,
+            "78": 3318.0,
+            "79": 3074.0,
+            "80": 3213.0,
+            "81": 3664.0,
+            "82": 3238.0,
+            "83": 2838.0,
+            "84": 3251.0,
+            "85": 3275.0,
+            "86": 2748.0,
+            "87": 3758.0,
+            "88": 3023.0,
+            "89": 3267.0,
+            "90": 3085.0,
+            "91": 2812.0,
+            "92": 3116.0,
+            "93": 2665.0,
+            "94": 3380.0,
+            "95": 3236.0,
+            "96": 3462.0,
+            "97": 3002.0,
+            "98": 3545.0,
+            "99": 3265.0,
+            "100": 3458.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 519038464.0,
+            "2": 519038464.0,
+            "3": 519038464.0,
+            "4": 519038464.0,
+            "5": 519038464.0,
+            "6": 519038464.0,
+            "7": 519038464.0,
+            "8": 519038464.0,
+            "9": 519038464.0,
+            "10": 519038464.0,
+            "11": 519038464.0,
+            "12": 519038464.0,
+            "13": 519038464.0,
+            "14": 519038464.0,
+            "15": 519038464.0,
+            "16": 519038464.0,
+            "17": 519038464.0,
+            "18": 519038464.0,
+            "19": 519038464.0,
+            "20": 519038464.0,
+            "21": 519038464.0,
+            "22": 519038464.0,
+            "23": 519038464.0,
+            "24": 519038464.0,
+            "25": 519038464.0,
+            "26": 519038464.0,
+            "27": 519038464.0,
+            "28": 519038464.0,
+            "29": 519038464.0,
+            "30": 519038464.0,
+            "31": 519038464.0,
+            "32": 519038464.0,
+            "33": 519038464.0,
+            "34": 519038464.0,
+            "35": 519038464.0,
+            "36": 519038464.0,
+            "37": 519038464.0,
+            "38": 519038464.0,
+            "39": 519038464.0,
+            "40": 519038464.0,
+            "41": 519038464.0,
+            "42": 519038464.0,
+            "43": 519038464.0,
+            "44": 519038464.0,
+            "45": 519038464.0,
+            "46": 519038464.0,
+            "47": 519038464.0,
+            "48": 519038464.0,
+            "49": 519038464.0,
+            "50": 519038464.0,
+            "51": 519038464.0,
+            "52": 519038464.0,
+            "53": 519038464.0,
+            "54": 519038464.0,
+            "55": 519038464.0,
+            "56": 519038464.0,
+            "57": 519038464.0,
+            "58": 519038464.0,
+            "59": 519038464.0,
+            "60": 519038464.0,
+            "61": 519038464.0,
+            "62": 519038464.0,
+            "63": 519038464.0,
+            "64": 519038464.0,
+            "65": 519038464.0,
+            "66": 519038464.0,
+            "67": 519038464.0,
+            "68": 519038464.0,
+            "69": 519038464.0,
+            "70": 519038464.0,
+            "71": 519038464.0,
+            "72": 519038464.0,
+            "73": 519038464.0,
+            "74": 519038464.0,
+            "75": 519038464.0,
+            "76": 519038464.0,
+            "77": 519038464.0,
+            "78": 519038464.0,
+            "79": 519038464.0,
+            "80": 519038464.0,
+            "81": 519038464.0,
+            "82": 519038464.0,
+            "83": 519038464.0,
+            "84": 519038464.0,
+            "85": 519038464.0,
+            "86": 519038464.0,
+            "87": 519038464.0,
+            "88": 519038464.0,
+            "89": 519038464.0,
+            "90": 519038464.0,
+            "91": 519038464.0,
+            "92": 519038464.0,
+            "93": 519038464.0,
+            "94": 519038464.0,
+            "95": 519038464.0,
+            "96": 519038464.0,
+            "97": 519038464.0,
+            "98": 519038464.0,
+            "99": 519038464.0,
+            "100": 519038464.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2296009728.0,
+            "2": 2436866048.0,
+            "3": 2436866048.0,
+            "4": 2436866048.0,
+            "5": 2436866048.0,
+            "6": 2436866048.0,
+            "7": 2436866048.0,
+            "8": 2436866048.0,
+            "9": 2436866048.0,
+            "10": 2436866048.0,
+            "11": 2436866048.0,
+            "12": 2436866048.0,
+            "13": 2436866048.0,
+            "14": 2436866048.0,
+            "15": 2436866048.0,
+            "16": 2436866048.0,
+            "17": 2436866048.0,
+            "18": 2436866048.0,
+            "19": 2436866048.0,
+            "20": 2436866048.0,
+            "21": 2436866048.0,
+            "22": 2436866048.0,
+            "23": 2436866048.0,
+            "24": 2436866048.0,
+            "25": 2436866048.0,
+            "26": 2436866048.0,
+            "27": 2436866048.0,
+            "28": 2436866048.0,
+            "29": 2436866048.0,
+            "30": 2436866048.0,
+            "31": 2436866048.0,
+            "32": 2436866048.0,
+            "33": 2436866048.0,
+            "34": 2436866048.0,
+            "35": 2436866048.0,
+            "36": 2436866048.0,
+            "37": 2436866048.0,
+            "38": 2436866048.0,
+            "39": 2436866048.0,
+            "40": 2436866048.0,
+            "41": 2436866048.0,
+            "42": 2436866048.0,
+            "43": 2436866048.0,
+            "44": 2436866048.0,
+            "45": 2436866048.0,
+            "46": 2436866048.0,
+            "47": 2436866048.0,
+            "48": 2436866048.0,
+            "49": 2436866048.0,
+            "50": 2436866048.0,
+            "51": 2436866048.0,
+            "52": 2436866048.0,
+            "53": 2436866048.0,
+            "54": 2436866048.0,
+            "55": 2436866048.0,
+            "56": 2436866048.0,
+            "57": 2436866048.0,
+            "58": 2436866048.0,
+            "59": 2436866048.0,
+            "60": 2436866048.0,
+            "61": 2436866048.0,
+            "62": 2436866048.0,
+            "63": 2436866048.0,
+            "64": 2436866048.0,
+            "65": 2436866048.0,
+            "66": 2436866048.0,
+            "67": 2436866048.0,
+            "68": 2436866048.0,
+            "69": 2436866048.0,
+            "70": 2436866048.0,
+            "71": 2436866048.0,
+            "72": 2436866048.0,
+            "73": 2436866048.0,
+            "74": 2436866048.0,
+            "75": 2436866048.0,
+            "76": 2436866048.0,
+            "77": 2436866048.0,
+            "78": 2436866048.0,
+            "79": 2436866048.0,
+            "80": 2436866048.0,
+            "81": 2436866048.0,
+            "82": 2436866048.0,
+            "83": 2436866048.0,
+            "84": 2436866048.0,
+            "85": 2436866048.0,
+            "86": 2436866048.0,
+            "87": 2436866048.0,
+            "88": 2436866048.0,
+            "89": 2436866048.0,
+            "90": 2436866048.0,
+            "91": 2436866048.0,
+            "92": 2436866048.0,
+            "93": 2436866048.0,
+            "94": 2436866048.0,
+            "95": 2436866048.0,
+            "96": 2436866048.0,
+            "97": 2436866048.0,
+            "98": 2436866048.0,
+            "99": 2436866048.0,
+            "100": 2436866048.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 14.21193,
+            "2": 0.16972,
+            "3": 0.14361,
+            "4": 0.13668,
+            "5": 0.13411,
+            "6": 0.13482,
+            "7": 0.13456,
+            "8": 0.13436,
+            "9": 0.13229,
+            "10": 0.13468,
+            "11": 0.29227,
+            "12": 0.13377,
+            "13": 0.13424,
+            "14": 0.1347,
+            "15": 0.13353,
+            "16": 0.13405,
+            "17": 0.13386,
+            "18": 0.13335,
+            "19": 0.1337,
+            "20": 0.13441,
+            "21": 0.28803,
+            "22": 0.13435,
+            "23": 0.13391,
+            "24": 0.1344,
+            "25": 0.13379,
+            "26": 0.13318,
+            "27": 0.13391,
+            "28": 0.13299,
+            "29": 0.13332,
+            "30": 0.13439,
+            "31": 0.28662,
+            "32": 0.13342,
+            "33": 0.1334,
+            "34": 0.13306,
+            "35": 0.13383,
+            "36": 0.13334,
+            "37": 0.1329,
+            "38": 0.13424,
+            "39": 0.13255,
+            "40": 0.13274,
+            "41": 0.28399,
+            "42": 0.13366,
+            "43": 0.13392,
+            "44": 0.13317,
+            "45": 0.13301,
+            "46": 0.13375,
+            "47": 0.13359,
+            "48": 0.13309,
+            "49": 0.13333,
+            "50": 0.13346,
+            "51": 0.28956,
+            "52": 0.14009,
+            "53": 0.13828,
+            "54": 0.1377,
+            "55": 0.13637,
+            "56": 0.13795,
+            "57": 0.13737,
+            "58": 0.13417,
+            "59": 0.13327,
+            "60": 0.13391,
+            "61": 0.29272,
+            "62": 0.13275,
+            "63": 0.13346,
+            "64": 0.13336,
+            "65": 0.13291,
+            "66": 0.13365,
+            "67": 0.13307,
+            "68": 0.13243,
+            "69": 0.1327,
+            "70": 0.13323,
+            "71": 0.28929,
+            "72": 0.1323,
+            "73": 0.13407,
+            "74": 0.13317,
+            "75": 0.132,
+            "76": 0.13278,
+            "77": 0.13359,
+            "78": 0.13257,
+            "79": 0.13226,
+            "80": 0.13339,
+            "81": 0.28457,
+            "82": 0.13147,
+            "83": 0.13289,
+            "84": 0.13213,
+            "85": 0.13375,
+            "86": 0.13168,
+            "87": 0.13351,
+            "88": 0.13059,
+            "89": 0.13189,
+            "90": 0.13102,
+            "91": 0.28755,
+            "92": 0.13136,
+            "93": 0.13235,
+            "94": 0.13175,
+            "95": 0.13072,
+            "96": 0.13269,
+            "97": 0.13234,
+            "98": 0.1311,
+            "99": 0.13293,
+            "100": 0.13016
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json
index cda47f321c8..e45c3949555 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json
@@ -1 +1,537 @@
-{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.82005, "5": 10.85284, "10": 10.78455, "15": 10.79229, "20": 10.69212, "25": 10.52413, "30": 10.34552, "35": 10.26241, "40": 10.07236, "45": 9.81098, "50": 9.88418, "55": 9.86374, "60": 9.47963, "65": 8.93065, "70": 9.71218, "75": 9.4005, "80": 9.39078, "85": 9.60138, "90": 9.80502, "95": 9.50821, "100": 9.3924}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1559.0, "5": 1840.0, "10": 1380.0, "15": 1848.0, "20": 1621.0, "25": 1689.0, "30": 1907.0, "35": 1922.0, "40": 2045.0, "45": 2097.0, "50": 2368.0, "55": 2383.0, "60": 2509.0, "65": 2628.0, "70": 3129.0, "75": 2552.0, "80": 3279.0, "85": 3344.0, "90": 2980.0, "95": 3324.0, "100": 3467.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 522989568.0, "5": 522989568.0, "10": 522989568.0, "15": 522989568.0, "20": 522989568.0, "25": 522989568.0, "30": 522989568.0, "35": 522989568.0, "40": 522989568.0, "45": 522989568.0, "50": 522989568.0, "55": 522989568.0, "60": 522989568.0, "65": 522989568.0, "70": 522989568.0, "75": 522989568.0, "80": 522989568.0, "85": 522989568.0, "90": 522989568.0, "95": 522989568.0, "100": 522989568.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 3768872960.0, "5": 3911833600.0, "10": 3911833600.0, "15": 3911833600.0, "20": 3911833600.0, "25": 3911833600.0, "30": 3911833600.0, "35": 3911833600.0, "40": 3911833600.0, "45": 3911833600.0, "50": 3911833600.0, "55": 3911833600.0, "60": 3911833600.0, "65": 3911833600.0, "70": 3911833600.0, "75": 3911833600.0, "80": 3911833600.0, "85": 3911833600.0, "90": 3911833600.0, "95": 3911833600.0, "100": 3911833600.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 24.27643, "5": 0.18328, "10": 0.18099, "15": 0.17942, "20": 0.17956, "25": 0.1771, "30": 0.17799, "35": 0.17821, "40": 0.1777, "45": 0.17833, "50": 0.17766, "55": 0.17789, "60": 0.17744, "65": 0.17689, "70": 0.1767, "75": 0.17717, "80": 0.17685, "85": 0.17672, "90": 0.17477, "95": 0.17583, "100": 0.17624}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.81548,
+            "2": 10.8208,
+            "3": 10.81272,
+            "4": 10.78885,
+            "5": 10.85169,
+            "6": 10.87038,
+            "7": 10.83098,
+            "8": 10.8343,
+            "9": 10.83996,
+            "10": 10.78686,
+            "11": 10.88025,
+            "12": 10.85974,
+            "13": 10.86596,
+            "14": 10.8782,
+            "15": 10.79466,
+            "16": 10.79611,
+            "17": 10.76888,
+            "18": 10.81074,
+            "19": 10.79865,
+            "20": 10.69058,
+            "21": 10.67945,
+            "22": 10.52113,
+            "23": 10.70755,
+            "24": 10.5774,
+            "25": 10.52375,
+            "26": 10.59601,
+            "27": 10.58621,
+            "28": 10.56178,
+            "29": 10.56944,
+            "30": 10.34575,
+            "31": 10.10079,
+            "32": 10.45392,
+            "33": 10.4461,
+            "34": 10.20725,
+            "35": 10.26198,
+            "36": 10.21312,
+            "37": 10.32467,
+            "38": 10.16829,
+            "39": 10.38371,
+            "40": 10.07233,
+            "41": 10.13762,
+            "42": 10.19766,
+            "43": 9.81156,
+            "44": 9.93307,
+            "45": 9.81043,
+            "46": 9.80854,
+            "47": 10.12558,
+            "48": 9.82104,
+            "49": 9.50761,
+            "50": 9.8838,
+            "51": 9.83548,
+            "52": 9.72518,
+            "53": 10.04799,
+            "54": 9.93007,
+            "55": 9.86362,
+            "56": 9.60218,
+            "57": 9.45185,
+            "58": 9.80781,
+            "59": 9.56786,
+            "60": 9.47966,
+            "61": 9.67985,
+            "62": 9.9675,
+            "63": 9.35111,
+            "64": 9.75622,
+            "65": 8.93178,
+            "66": 9.68108,
+            "67": 9.35959,
+            "68": 9.76948,
+            "69": 9.77494,
+            "70": 9.71179,
+            "71": 9.60631,
+            "72": 9.57134,
+            "73": 9.48393,
+            "74": 8.92913,
+            "75": 9.4003,
+            "76": 9.07189,
+            "77": 10.05248,
+            "78": 9.71492,
+            "79": 9.35744,
+            "80": 9.38946,
+            "81": 9.46798,
+            "82": 9.68509,
+            "83": 9.29591,
+            "84": 9.40521,
+            "85": 9.60161,
+            "86": 9.06713,
+            "87": 9.58406,
+            "88": 9.73301,
+            "89": 9.59528,
+            "90": 9.80559,
+            "91": 9.32603,
+            "92": 9.3532,
+            "93": 9.06916,
+            "94": 8.82266,
+            "95": 9.50858,
+            "96": 9.51587,
+            "97": 9.29763,
+            "98": 9.66187,
+            "99": 8.87661,
+            "100": 9.39222
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1518.0,
+            "2": 1697.0,
+            "3": 1678.0,
+            "4": 1709.0,
+            "5": 1873.0,
+            "6": 1786.0,
+            "7": 1813.0,
+            "8": 1519.0,
+            "9": 1684.0,
+            "10": 1417.0,
+            "11": 1943.0,
+            "12": 1723.0,
+            "13": 1939.0,
+            "14": 1759.0,
+            "15": 1772.0,
+            "16": 1750.0,
+            "17": 1812.0,
+            "18": 1646.0,
+            "19": 1818.0,
+            "20": 1655.0,
+            "21": 1879.0,
+            "22": 1759.0,
+            "23": 1944.0,
+            "24": 1650.0,
+            "25": 1708.0,
+            "26": 1750.0,
+            "27": 1815.0,
+            "28": 2091.0,
+            "29": 1958.0,
+            "30": 1934.0,
+            "31": 1588.0,
+            "32": 1891.0,
+            "33": 2099.0,
+            "34": 1765.0,
+            "35": 1894.0,
+            "36": 1947.0,
+            "37": 2279.0,
+            "38": 2134.0,
+            "39": 2350.0,
+            "40": 2091.0,
+            "41": 2318.0,
+            "42": 2137.0,
+            "43": 1849.0,
+            "44": 2109.0,
+            "45": 2020.0,
+            "46": 2263.0,
+            "47": 2593.0,
+            "48": 2323.0,
+            "49": 2177.0,
+            "50": 2363.0,
+            "51": 2554.0,
+            "52": 2619.0,
+            "53": 2863.0,
+            "54": 2688.0,
+            "55": 2406.0,
+            "56": 2649.0,
+            "57": 2175.0,
+            "58": 2856.0,
+            "59": 2775.0,
+            "60": 2307.0,
+            "61": 2914.0,
+            "62": 2644.0,
+            "63": 2362.0,
+            "64": 2946.0,
+            "65": 2578.0,
+            "66": 3122.0,
+            "67": 2697.0,
+            "68": 2687.0,
+            "69": 2956.0,
+            "70": 3157.0,
+            "71": 3028.0,
+            "72": 2294.0,
+            "73": 2876.0,
+            "74": 1887.0,
+            "75": 2523.0,
+            "76": 2937.0,
+            "77": 3162.0,
+            "78": 3318.0,
+            "79": 3074.0,
+            "80": 3213.0,
+            "81": 3664.0,
+            "82": 3238.0,
+            "83": 2838.0,
+            "84": 3251.0,
+            "85": 3275.0,
+            "86": 2748.0,
+            "87": 3758.0,
+            "88": 3023.0,
+            "89": 3267.0,
+            "90": 3085.0,
+            "91": 2812.0,
+            "92": 3116.0,
+            "93": 2665.0,
+            "94": 3380.0,
+            "95": 3236.0,
+            "96": 3462.0,
+            "97": 3002.0,
+            "98": 3545.0,
+            "99": 3265.0,
+            "100": 3458.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 519038464.0,
+            "2": 519038464.0,
+            "3": 519038464.0,
+            "4": 519038464.0,
+            "5": 519038464.0,
+            "6": 519038464.0,
+            "7": 519038464.0,
+            "8": 519038464.0,
+            "9": 519038464.0,
+            "10": 519038464.0,
+            "11": 519038464.0,
+            "12": 519038464.0,
+            "13": 519038464.0,
+            "14": 519038464.0,
+            "15": 519038464.0,
+            "16": 519038464.0,
+            "17": 519038464.0,
+            "18": 519038464.0,
+            "19": 519038464.0,
+            "20": 519038464.0,
+            "21": 519038464.0,
+            "22": 519038464.0,
+            "23": 519038464.0,
+            "24": 519038464.0,
+            "25": 519038464.0,
+            "26": 519038464.0,
+            "27": 519038464.0,
+            "28": 519038464.0,
+            "29": 519038464.0,
+            "30": 519038464.0,
+            "31": 519038464.0,
+            "32": 519038464.0,
+            "33": 519038464.0,
+            "34": 519038464.0,
+            "35": 519038464.0,
+            "36": 519038464.0,
+            "37": 519038464.0,
+            "38": 519038464.0,
+            "39": 519038464.0,
+            "40": 519038464.0,
+            "41": 519038464.0,
+            "42": 519038464.0,
+            "43": 519038464.0,
+            "44": 519038464.0,
+            "45": 519038464.0,
+            "46": 519038464.0,
+            "47": 519038464.0,
+            "48": 519038464.0,
+            "49": 519038464.0,
+            "50": 519038464.0,
+            "51": 519038464.0,
+            "52": 519038464.0,
+            "53": 519038464.0,
+            "54": 519038464.0,
+            "55": 519038464.0,
+            "56": 519038464.0,
+            "57": 519038464.0,
+            "58": 519038464.0,
+            "59": 519038464.0,
+            "60": 519038464.0,
+            "61": 519038464.0,
+            "62": 519038464.0,
+            "63": 519038464.0,
+            "64": 519038464.0,
+            "65": 519038464.0,
+            "66": 519038464.0,
+            "67": 519038464.0,
+            "68": 519038464.0,
+            "69": 519038464.0,
+            "70": 519038464.0,
+            "71": 519038464.0,
+            "72": 519038464.0,
+            "73": 519038464.0,
+            "74": 519038464.0,
+            "75": 519038464.0,
+            "76": 519038464.0,
+            "77": 519038464.0,
+            "78": 519038464.0,
+            "79": 519038464.0,
+            "80": 519038464.0,
+            "81": 519038464.0,
+            "82": 519038464.0,
+            "83": 519038464.0,
+            "84": 519038464.0,
+            "85": 519038464.0,
+            "86": 519038464.0,
+            "87": 519038464.0,
+            "88": 519038464.0,
+            "89": 519038464.0,
+            "90": 519038464.0,
+            "91": 519038464.0,
+            "92": 519038464.0,
+            "93": 519038464.0,
+            "94": 519038464.0,
+            "95": 519038464.0,
+            "96": 519038464.0,
+            "97": 519038464.0,
+            "98": 519038464.0,
+            "99": 519038464.0,
+            "100": 519038464.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2296009728.0,
+            "2": 2436866048.0,
+            "3": 2436866048.0,
+            "4": 2436866048.0,
+            "5": 2436866048.0,
+            "6": 2436866048.0,
+            "7": 2436866048.0,
+            "8": 2436866048.0,
+            "9": 2436866048.0,
+            "10": 2436866048.0,
+            "11": 2436866048.0,
+            "12": 2436866048.0,
+            "13": 2436866048.0,
+            "14": 2436866048.0,
+            "15": 2436866048.0,
+            "16": 2436866048.0,
+            "17": 2436866048.0,
+            "18": 2436866048.0,
+            "19": 2436866048.0,
+            "20": 2436866048.0,
+            "21": 2436866048.0,
+            "22": 2436866048.0,
+            "23": 2436866048.0,
+            "24": 2436866048.0,
+            "25": 2436866048.0,
+            "26": 2436866048.0,
+            "27": 2436866048.0,
+            "28": 2436866048.0,
+            "29": 2436866048.0,
+            "30": 2436866048.0,
+            "31": 2436866048.0,
+            "32": 2436866048.0,
+            "33": 2436866048.0,
+            "34": 2436866048.0,
+            "35": 2436866048.0,
+            "36": 2436866048.0,
+            "37": 2436866048.0,
+            "38": 2436866048.0,
+            "39": 2436866048.0,
+            "40": 2436866048.0,
+            "41": 2436866048.0,
+            "42": 2436866048.0,
+            "43": 2436866048.0,
+            "44": 2436866048.0,
+            "45": 2436866048.0,
+            "46": 2436866048.0,
+            "47": 2436866048.0,
+            "48": 2436866048.0,
+            "49": 2436866048.0,
+            "50": 2436866048.0,
+            "51": 2436866048.0,
+            "52": 2436866048.0,
+            "53": 2436866048.0,
+            "54": 2436866048.0,
+            "55": 2436866048.0,
+            "56": 2436866048.0,
+            "57": 2436866048.0,
+            "58": 2436866048.0,
+            "59": 2436866048.0,
+            "60": 2436866048.0,
+            "61": 2436866048.0,
+            "62": 2436866048.0,
+            "63": 2436866048.0,
+            "64": 2436866048.0,
+            "65": 2436866048.0,
+            "66": 2436866048.0,
+            "67": 2436866048.0,
+            "68": 2436866048.0,
+            "69": 2436866048.0,
+            "70": 2436866048.0,
+            "71": 2436866048.0,
+            "72": 2436866048.0,
+            "73": 2436866048.0,
+            "74": 2436866048.0,
+            "75": 2436866048.0,
+            "76": 2436866048.0,
+            "77": 2436866048.0,
+            "78": 2436866048.0,
+            "79": 2436866048.0,
+            "80": 2436866048.0,
+            "81": 2436866048.0,
+            "82": 2436866048.0,
+            "83": 2436866048.0,
+            "84": 2436866048.0,
+            "85": 2436866048.0,
+            "86": 2436866048.0,
+            "87": 2436866048.0,
+            "88": 2436866048.0,
+            "89": 2436866048.0,
+            "90": 2436866048.0,
+            "91": 2436866048.0,
+            "92": 2436866048.0,
+            "93": 2436866048.0,
+            "94": 2436866048.0,
+            "95": 2436866048.0,
+            "96": 2436866048.0,
+            "97": 2436866048.0,
+            "98": 2436866048.0,
+            "99": 2436866048.0,
+            "100": 2436866048.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.90397,
+            "2": 0.16607,
+            "3": 0.13982,
+            "4": 0.14032,
+            "5": 0.13765,
+            "6": 0.13651,
+            "7": 0.13453,
+            "8": 0.13413,
+            "9": 0.13703,
+            "10": 0.13873,
+            "11": 0.28364,
+            "12": 0.13723,
+            "13": 0.13756,
+            "14": 0.1379,
+            "15": 0.14148,
+            "16": 0.1356,
+            "17": 0.13661,
+            "18": 0.13568,
+            "19": 0.13637,
+            "20": 0.1367,
+            "21": 0.28276,
+            "22": 0.13722,
+            "23": 0.13404,
+            "24": 0.13414,
+            "25": 0.1341,
+            "26": 0.13595,
+            "27": 0.13446,
+            "28": 0.13477,
+            "29": 0.13439,
+            "30": 0.13383,
+            "31": 0.27955,
+            "32": 0.13416,
+            "33": 0.13472,
+            "34": 0.13383,
+            "35": 0.13499,
+            "36": 0.13468,
+            "37": 0.13332,
+            "38": 0.13449,
+            "39": 0.13488,
+            "40": 0.1347,
+            "41": 0.2818,
+            "42": 0.13497,
+            "43": 0.13495,
+            "44": 0.13372,
+            "45": 0.13385,
+            "46": 0.13479,
+            "47": 0.13339,
+            "48": 0.13334,
+            "49": 0.13393,
+            "50": 0.13346,
+            "51": 0.2815,
+            "52": 0.13492,
+            "53": 0.13387,
+            "54": 0.13407,
+            "55": 0.13263,
+            "56": 0.13379,
+            "57": 0.13439,
+            "58": 0.13407,
+            "59": 0.13481,
+            "60": 0.13407,
+            "61": 0.28073,
+            "62": 0.13474,
+            "63": 0.13363,
+            "64": 0.13359,
+            "65": 0.13323,
+            "66": 0.13437,
+            "67": 0.13391,
+            "68": 0.13344,
+            "69": 0.21561,
+            "70": 0.1337,
+            "71": 0.27778,
+            "72": 0.13359,
+            "73": 0.13364,
+            "74": 0.13406,
+            "75": 0.13376,
+            "76": 0.13308,
+            "77": 0.13263,
+            "78": 0.13172,
+            "79": 0.13328,
+            "80": 0.13387,
+            "81": 0.28018,
+            "82": 0.13437,
+            "83": 0.13645,
+            "84": 0.13548,
+            "85": 0.13558,
+            "86": 0.13447,
+            "87": 0.13492,
+            "88": 0.13361,
+            "89": 0.13427,
+            "90": 0.13332,
+            "91": 0.27771,
+            "92": 0.13375,
+            "93": 0.1331,
+            "94": 0.13317,
+            "95": 0.13408,
+            "96": 0.13418,
+            "97": 0.13752,
+            "98": 0.13493,
+            "99": 0.13408,
+            "100": 0.13136
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied/golden_values_lts.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied/golden_values_lts.json
new file mode 100644
index 00000000000..6f64205e3c4
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied/golden_values_lts.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.93693,
+            "2": 10.92078,
+            "3": 10.92311,
+            "4": 10.908,
+            "5": 10.93133,
+            "6": 10.93476,
+            "7": 10.92722,
+            "8": 10.91987,
+            "9": 10.93801,
+            "10": 10.91377,
+            "11": 10.94194,
+            "12": 10.93222,
+            "13": 10.92553,
+            "14": 10.94301,
+            "15": 10.86288,
+            "16": 10.87945,
+            "17": 10.86904,
+            "18": 10.88527,
+            "19": 10.86912,
+            "20": 10.77336,
+            "21": 10.75658,
+            "22": 10.62975,
+            "23": 10.76834,
+            "24": 10.65017,
+            "25": 10.60362,
+            "26": 10.66269,
+            "27": 10.66488,
+            "28": 10.60369,
+            "29": 10.64791,
+            "30": 10.40493,
+            "31": 10.16869,
+            "32": 10.5139,
+            "33": 10.50484,
+            "34": 10.27115,
+            "35": 10.31433,
+            "36": 10.27029,
+            "37": 10.38626,
+            "38": 10.23175,
+            "39": 10.45527,
+            "40": 10.12357,
+            "41": 10.19645,
+            "42": 10.25476,
+            "43": 9.86653,
+            "44": 9.99586,
+            "45": 9.87497,
+            "46": 9.86189,
+            "47": 10.19545,
+            "48": 9.87912,
+            "49": 9.56741,
+            "50": 9.94519,
+            "51": 9.89774,
+            "52": 9.78773,
+            "53": 10.12739,
+            "54": 9.98265,
+            "55": 9.90115,
+            "56": 9.66568,
+            "57": 9.49996,
+            "58": 9.87635,
+            "59": 9.61861,
+            "60": 9.55292,
+            "61": 9.71598,
+            "62": 10.03451,
+            "63": 9.41294,
+            "64": 9.81949,
+            "65": 8.96909,
+            "66": 9.7478,
+            "67": 9.39393,
+            "68": 9.82085,
+            "69": 9.82417,
+            "70": 9.77915,
+            "71": 9.64756,
+            "72": 9.59555,
+            "73": 9.53704,
+            "74": 8.96643,
+            "75": 9.44556,
+            "76": 9.09922,
+            "77": 10.10009,
+            "78": 9.73576,
+            "79": 9.38721,
+            "80": 9.41961,
+            "81": 9.50968,
+            "82": 9.72303,
+            "83": 9.34684,
+            "84": 9.44855,
+            "85": 9.64312,
+            "86": 9.07818,
+            "87": 9.61561,
+            "88": 9.79072,
+            "89": 9.61878,
+            "90": 9.85929,
+            "91": 9.35347,
+            "92": 9.38799,
+            "93": 9.07947,
+            "94": 8.82329,
+            "95": 9.52141,
+            "96": 9.54598,
+            "97": 9.34241,
+            "98": 9.70512,
+            "99": 8.8917,
+            "100": 9.43443
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 22727688.0,
+            "2": 22924916.0,
+            "3": 22596752.0,
+            "4": 23219574.0,
+            "5": 22715594.0,
+            "6": 23021444.0,
+            "7": 22771554.0,
+            "8": 22926856.0,
+            "9": 22842096.0,
+            "10": 22918196.0,
+            "11": 22500760.0,
+            "12": 22460572.0,
+            "13": 22917018.0,
+            "14": 22389964.0,
+            "15": 22820720.0,
+            "16": 22830308.0,
+            "17": 22819122.0,
+            "18": 22582876.0,
+            "19": 22617912.0,
+            "20": 22693480.0,
+            "21": 22740412.0,
+            "22": 22799838.0,
+            "23": 22539332.0,
+            "24": 22770580.0,
+            "25": 22818910.0,
+            "26": 22547838.0,
+            "27": 22468232.0,
+            "28": 22452766.0,
+            "29": 22528906.0,
+            "30": 22631718.0,
+            "31": 22955520.0,
+            "32": 22584888.0,
+            "33": 22558558.0,
+            "34": 22836286.0,
+            "35": 22788956.0,
+            "36": 22589940.0,
+            "37": 22496912.0,
+            "38": 22897132.0,
+            "39": 22801712.0,
+            "40": 22657514.0,
+            "41": 22659596.0,
+            "42": 22667300.0,
+            "43": 22976308.0,
+            "44": 22745816.0,
+            "45": 22675136.0,
+            "46": 22884540.0,
+            "47": 22633726.0,
+            "48": 22928020.0,
+            "49": 22727656.0,
+            "50": 22905162.0,
+            "51": 22791660.0,
+            "52": 22748630.0,
+            "53": 22925420.0,
+            "54": 22839176.0,
+            "55": 22518792.0,
+            "56": 22877648.0,
+            "57": 23113592.0,
+            "58": 22845136.0,
+            "59": 22715486.0,
+            "60": 22743726.0,
+            "61": 22724104.0,
+            "62": 22673746.0,
+            "63": 22846740.0,
+            "64": 22823862.0,
+            "65": 23061360.0,
+            "66": 22729628.0,
+            "67": 22907694.0,
+            "68": 22609888.0,
+            "69": 22584610.0,
+            "70": 22829190.0,
+            "71": 22749038.0,
+            "72": 22655052.0,
+            "73": 22739796.0,
+            "74": 23047606.0,
+            "75": 23054054.0,
+            "76": 22901052.0,
+            "77": 22271520.0,
+            "78": 22788892.0,
+            "79": 22743418.0,
+            "80": 22706694.0,
+            "81": 22890786.0,
+            "82": 22777316.0,
+            "83": 22839636.0,
+            "84": 23010088.0,
+            "85": 22712948.0,
+            "86": 23103380.0,
+            "87": 22735258.0,
+            "88": 22636824.0,
+            "89": 22498828.0,
+            "90": 22972694.0,
+            "91": 22767128.0,
+            "92": 22808892.0,
+            "93": 22659368.0,
+            "94": 22911112.0,
+            "95": 23047740.0,
+            "96": 22828678.0,
+            "97": 22608174.0,
+            "98": 22762982.0,
+            "99": 22905800.0,
+            "100": 23015792.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 519038464.0,
+            "2": 519038464.0,
+            "3": 519038464.0,
+            "4": 519038464.0,
+            "5": 519038464.0,
+            "6": 519038464.0,
+            "7": 519038464.0,
+            "8": 519038464.0,
+            "9": 519038464.0,
+            "10": 519038464.0,
+            "11": 519038464.0,
+            "12": 519038464.0,
+            "13": 519038464.0,
+            "14": 519038464.0,
+            "15": 519038464.0,
+            "16": 519038464.0,
+            "17": 519038464.0,
+            "18": 519038464.0,
+            "19": 519038464.0,
+            "20": 519038464.0,
+            "21": 519038464.0,
+            "22": 519038464.0,
+            "23": 519038464.0,
+            "24": 519038464.0,
+            "25": 519038464.0,
+            "26": 519038464.0,
+            "27": 519038464.0,
+            "28": 519038464.0,
+            "29": 519038464.0,
+            "30": 519038464.0,
+            "31": 519038464.0,
+            "32": 519038464.0,
+            "33": 519038464.0,
+            "34": 519038464.0,
+            "35": 519038464.0,
+            "36": 519038464.0,
+            "37": 519038464.0,
+            "38": 519038464.0,
+            "39": 519038464.0,
+            "40": 519038464.0,
+            "41": 519038464.0,
+            "42": 519038464.0,
+            "43": 519038464.0,
+            "44": 519038464.0,
+            "45": 519038464.0,
+            "46": 519038464.0,
+            "47": 519038464.0,
+            "48": 519038464.0,
+            "49": 519038464.0,
+            "50": 519038464.0,
+            "51": 519038464.0,
+            "52": 519038464.0,
+            "53": 519038464.0,
+            "54": 519038464.0,
+            "55": 519038464.0,
+            "56": 519038464.0,
+            "57": 519038464.0,
+            "58": 519038464.0,
+            "59": 519038464.0,
+            "60": 519038464.0,
+            "61": 519038464.0,
+            "62": 519038464.0,
+            "63": 519038464.0,
+            "64": 519038464.0,
+            "65": 519038464.0,
+            "66": 519038464.0,
+            "67": 519038464.0,
+            "68": 519038464.0,
+            "69": 519038464.0,
+            "70": 519038464.0,
+            "71": 519038464.0,
+            "72": 519038464.0,
+            "73": 519038464.0,
+            "74": 519038464.0,
+            "75": 519038464.0,
+            "76": 519038464.0,
+            "77": 519038464.0,
+            "78": 519038464.0,
+            "79": 519038464.0,
+            "80": 519038464.0,
+            "81": 519038464.0,
+            "82": 519038464.0,
+            "83": 519038464.0,
+            "84": 519038464.0,
+            "85": 519038464.0,
+            "86": 519038464.0,
+            "87": 519038464.0,
+            "88": 519038464.0,
+            "89": 519038464.0,
+            "90": 519038464.0,
+            "91": 519038464.0,
+            "92": 519038464.0,
+            "93": 519038464.0,
+            "94": 519038464.0,
+            "95": 519038464.0,
+            "96": 519038464.0,
+            "97": 519038464.0,
+            "98": 519038464.0,
+            "99": 519038464.0,
+            "100": 519038464.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2296009728.0,
+            "2": 2436866048.0,
+            "3": 2436866048.0,
+            "4": 2436866048.0,
+            "5": 2436866048.0,
+            "6": 2436866048.0,
+            "7": 2436866048.0,
+            "8": 2436866048.0,
+            "9": 2436866048.0,
+            "10": 2436866048.0,
+            "11": 2436866048.0,
+            "12": 2436866048.0,
+            "13": 2436866048.0,
+            "14": 2436866048.0,
+            "15": 2436866048.0,
+            "16": 2436866048.0,
+            "17": 2436866048.0,
+            "18": 2436866048.0,
+            "19": 2436866048.0,
+            "20": 2436866048.0,
+            "21": 2436866048.0,
+            "22": 2436866048.0,
+            "23": 2436866048.0,
+            "24": 2436866048.0,
+            "25": 2436866048.0,
+            "26": 2436866048.0,
+            "27": 2436866048.0,
+            "28": 2436866048.0,
+            "29": 2436866048.0,
+            "30": 2436866048.0,
+            "31": 2436866048.0,
+            "32": 2436866048.0,
+            "33": 2436866048.0,
+            "34": 2436866048.0,
+            "35": 2436866048.0,
+            "36": 2436866048.0,
+            "37": 2436866048.0,
+            "38": 2436866048.0,
+            "39": 2436866048.0,
+            "40": 2436866048.0,
+            "41": 2436866048.0,
+            "42": 2436866048.0,
+            "43": 2436866048.0,
+            "44": 2436866048.0,
+            "45": 2436866048.0,
+            "46": 2436866048.0,
+            "47": 2436866048.0,
+            "48": 2436866048.0,
+            "49": 2436866048.0,
+            "50": 2436866048.0,
+            "51": 2436866048.0,
+            "52": 2436866048.0,
+            "53": 2436866048.0,
+            "54": 2436866048.0,
+            "55": 2436866048.0,
+            "56": 2436866048.0,
+            "57": 2436866048.0,
+            "58": 2436866048.0,
+            "59": 2436866048.0,
+            "60": 2436866048.0,
+            "61": 2436866048.0,
+            "62": 2436866048.0,
+            "63": 2436866048.0,
+            "64": 2436866048.0,
+            "65": 2436866048.0,
+            "66": 2436866048.0,
+            "67": 2436866048.0,
+            "68": 2436866048.0,
+            "69": 2436866048.0,
+            "70": 2436866048.0,
+            "71": 2436866048.0,
+            "72": 2436866048.0,
+            "73": 2436866048.0,
+            "74": 2436866048.0,
+            "75": 2436866048.0,
+            "76": 2436866048.0,
+            "77": 2436866048.0,
+            "78": 2436866048.0,
+            "79": 2436866048.0,
+            "80": 2436866048.0,
+            "81": 2436866048.0,
+            "82": 2436866048.0,
+            "83": 2436866048.0,
+            "84": 2436866048.0,
+            "85": 2436866048.0,
+            "86": 2436866048.0,
+            "87": 2436866048.0,
+            "88": 2436866048.0,
+            "89": 2436866048.0,
+            "90": 2436866048.0,
+            "91": 2436866048.0,
+            "92": 2436866048.0,
+            "93": 2436866048.0,
+            "94": 2436866048.0,
+            "95": 2436866048.0,
+            "96": 2436866048.0,
+            "97": 2436866048.0,
+            "98": 2436866048.0,
+            "99": 2436866048.0,
+            "100": 2436866048.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 11.56211,
+            "2": 0.17032,
+            "3": 0.14703,
+            "4": 0.1399,
+            "5": 0.13723,
+            "6": 0.13231,
+            "7": 0.13564,
+            "8": 0.13449,
+            "9": 0.13358,
+            "10": 0.13307,
+            "11": 0.13725,
+            "12": 0.13165,
+            "13": 0.13322,
+            "14": 0.13288,
+            "15": 0.13324,
+            "16": 0.13281,
+            "17": 0.13322,
+            "18": 0.13237,
+            "19": 0.13164,
+            "20": 0.13184,
+            "21": 0.13286,
+            "22": 0.13291,
+            "23": 0.13443,
+            "24": 0.14138,
+            "25": 0.13774,
+            "26": 0.13805,
+            "27": 0.13868,
+            "28": 0.13766,
+            "29": 0.1395,
+            "30": 0.13538,
+            "31": 0.13589,
+            "32": 0.13529,
+            "33": 0.13562,
+            "34": 0.13525,
+            "35": 0.13556,
+            "36": 0.13544,
+            "37": 0.13549,
+            "38": 0.13569,
+            "39": 0.13558,
+            "40": 0.13561,
+            "41": 0.13653,
+            "42": 0.13569,
+            "43": 0.13645,
+            "44": 0.13586,
+            "45": 0.1354,
+            "46": 0.13606,
+            "47": 0.13517,
+            "48": 0.13615,
+            "49": 0.13593,
+            "50": 0.13488,
+            "51": 0.13643,
+            "52": 0.13115,
+            "53": 0.13291,
+            "54": 0.13115,
+            "55": 0.13085,
+            "56": 0.1309,
+            "57": 0.13094,
+            "58": 0.13092,
+            "59": 0.13092,
+            "60": 0.13094,
+            "61": 0.13102,
+            "62": 0.13092,
+            "63": 0.13052,
+            "64": 0.1313,
+            "65": 0.13151,
+            "66": 0.1314,
+            "67": 0.13094,
+            "68": 0.1308,
+            "69": 0.13079,
+            "70": 0.1309,
+            "71": 0.13083,
+            "72": 0.13083,
+            "73": 0.13091,
+            "74": 0.13061,
+            "75": 0.13042,
+            "76": 0.13145,
+            "77": 0.13079,
+            "78": 0.13108,
+            "79": 0.13106,
+            "80": 0.13055,
+            "81": 0.13075,
+            "82": 0.13043,
+            "83": 0.13151,
+            "84": 0.13173,
+            "85": 0.13126,
+            "86": 0.13187,
+            "87": 0.13084,
+            "88": 0.13098,
+            "89": 0.13086,
+            "90": 0.13054,
+            "91": 0.1317,
+            "92": 0.13098,
+            "93": 0.13084,
+            "94": 0.13088,
+            "95": 0.13054,
+            "96": 0.13148,
+            "97": 0.13154,
+            "98": 0.13179,
+            "99": 0.13086,
+            "100": 0.13121
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied/golden_values_lts_dgx_a100.json
index e57b1a644a0..7848ef42dd8 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied/golden_values_lts_dgx_a100.json
@@ -1 +1,537 @@
-{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.9359, "5": 10.93225, "10": 10.91083, "15": 10.85723, "20": 10.77088, "25": 10.60556, "30": 10.40549, "35": 10.31366, "40": 10.12334, "45": 9.87564, "50": 9.94452, "55": 9.90094, "60": 9.55238, "65": 8.96792, "70": 9.77835, "75": 9.44605, "80": 9.42014, "85": 9.64324, "90": 9.85827, "95": 9.52085, "100": 9.43415}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 22727686.0, "5": 22715312.0, "10": 22919060.0, "15": 22821200.0, "20": 22693840.0, "25": 22819524.0, "30": 22631232.0, "35": 22787888.0, "40": 22658144.0, "45": 22674630.0, "50": 22904452.0, "55": 22519094.0, "60": 22743052.0, "65": 23061016.0, "70": 22829332.0, "75": 23054164.0, "80": 22707344.0, "85": 22712182.0, "90": 22971846.0, "95": 23047676.0, "100": 23015938.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 522900480.0, "5": 522900480.0, "10": 522900480.0, "15": 522900480.0, "20": 522900480.0, "25": 522900480.0, "30": 522900480.0, "35": 522900480.0, "40": 522900480.0, "45": 522900480.0, "50": 522900480.0, "55": 522900480.0, "60": 522900480.0, "65": 522900480.0, "70": 522900480.0, "75": 522900480.0, "80": 522900480.0, "85": 522900480.0, "90": 522900480.0, "95": 522900480.0, "100": 522900480.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 3769943040.0, "5": 3912677376.0, "10": 3912677376.0, "15": 3912677376.0, "20": 3912677376.0, "25": 3912677376.0, "30": 3912677376.0, "35": 3912677376.0, "40": 3912677376.0, "45": 3912677376.0, "50": 3912677376.0, "55": 3912677376.0, "60": 3912677376.0, "65": 3912677376.0, "70": 3912677376.0, "75": 3912677376.0, "80": 3912677376.0, "85": 3912677376.0, "90": 3912677376.0, "95": 3912677376.0, "100": 3912677376.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 22.64226, "5": 0.17675, "10": 0.17674, "15": 0.17989, "20": 0.17736, "25": 0.1752, "30": 0.17793, "35": 0.17672, "40": 0.17711, "45": 0.17664, "50": 0.17313, "55": 0.17519, "60": 0.17937, "65": 0.17909, "70": 0.17798, "75": 0.17301, "80": 0.17354, "85": 0.17313, "90": 0.1739, "95": 0.17379, "100": 0.17144}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.93693,
+            "2": 10.92078,
+            "3": 10.92311,
+            "4": 10.908,
+            "5": 10.93133,
+            "6": 10.93476,
+            "7": 10.92722,
+            "8": 10.91987,
+            "9": 10.93801,
+            "10": 10.91377,
+            "11": 10.94194,
+            "12": 10.93222,
+            "13": 10.92553,
+            "14": 10.94301,
+            "15": 10.86288,
+            "16": 10.87945,
+            "17": 10.86904,
+            "18": 10.88527,
+            "19": 10.86912,
+            "20": 10.77336,
+            "21": 10.75658,
+            "22": 10.62975,
+            "23": 10.76834,
+            "24": 10.65017,
+            "25": 10.60362,
+            "26": 10.66269,
+            "27": 10.66488,
+            "28": 10.60369,
+            "29": 10.64791,
+            "30": 10.40493,
+            "31": 10.16869,
+            "32": 10.5139,
+            "33": 10.50484,
+            "34": 10.27115,
+            "35": 10.31433,
+            "36": 10.27029,
+            "37": 10.38626,
+            "38": 10.23175,
+            "39": 10.45527,
+            "40": 10.12357,
+            "41": 10.19645,
+            "42": 10.25476,
+            "43": 9.86653,
+            "44": 9.99586,
+            "45": 9.87497,
+            "46": 9.86189,
+            "47": 10.19545,
+            "48": 9.87912,
+            "49": 9.56741,
+            "50": 9.94519,
+            "51": 9.89774,
+            "52": 9.78773,
+            "53": 10.12739,
+            "54": 9.98265,
+            "55": 9.90115,
+            "56": 9.66568,
+            "57": 9.49996,
+            "58": 9.87635,
+            "59": 9.61861,
+            "60": 9.55292,
+            "61": 9.71598,
+            "62": 10.03451,
+            "63": 9.41294,
+            "64": 9.81949,
+            "65": 8.96909,
+            "66": 9.7478,
+            "67": 9.39393,
+            "68": 9.82085,
+            "69": 9.82417,
+            "70": 9.77915,
+            "71": 9.64756,
+            "72": 9.59555,
+            "73": 9.53704,
+            "74": 8.96643,
+            "75": 9.44556,
+            "76": 9.09922,
+            "77": 10.10009,
+            "78": 9.73576,
+            "79": 9.38721,
+            "80": 9.41961,
+            "81": 9.50968,
+            "82": 9.72303,
+            "83": 9.34684,
+            "84": 9.44855,
+            "85": 9.64312,
+            "86": 9.07818,
+            "87": 9.61561,
+            "88": 9.79072,
+            "89": 9.61878,
+            "90": 9.85929,
+            "91": 9.35347,
+            "92": 9.38799,
+            "93": 9.07947,
+            "94": 8.82329,
+            "95": 9.52141,
+            "96": 9.54598,
+            "97": 9.34241,
+            "98": 9.70512,
+            "99": 8.8917,
+            "100": 9.43443
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 22727688.0,
+            "2": 22924916.0,
+            "3": 22596752.0,
+            "4": 23219574.0,
+            "5": 22715594.0,
+            "6": 23021444.0,
+            "7": 22771554.0,
+            "8": 22926856.0,
+            "9": 22842096.0,
+            "10": 22918196.0,
+            "11": 22500760.0,
+            "12": 22460572.0,
+            "13": 22917018.0,
+            "14": 22389964.0,
+            "15": 22820720.0,
+            "16": 22830308.0,
+            "17": 22819122.0,
+            "18": 22582876.0,
+            "19": 22617912.0,
+            "20": 22693480.0,
+            "21": 22740412.0,
+            "22": 22799838.0,
+            "23": 22539332.0,
+            "24": 22770580.0,
+            "25": 22818910.0,
+            "26": 22547838.0,
+            "27": 22468232.0,
+            "28": 22452766.0,
+            "29": 22528906.0,
+            "30": 22631718.0,
+            "31": 22955520.0,
+            "32": 22584888.0,
+            "33": 22558558.0,
+            "34": 22836286.0,
+            "35": 22788956.0,
+            "36": 22589940.0,
+            "37": 22496912.0,
+            "38": 22897132.0,
+            "39": 22801712.0,
+            "40": 22657514.0,
+            "41": 22659596.0,
+            "42": 22667300.0,
+            "43": 22976308.0,
+            "44": 22745816.0,
+            "45": 22675136.0,
+            "46": 22884540.0,
+            "47": 22633726.0,
+            "48": 22928020.0,
+            "49": 22727656.0,
+            "50": 22905162.0,
+            "51": 22791660.0,
+            "52": 22748630.0,
+            "53": 22925420.0,
+            "54": 22839176.0,
+            "55": 22518792.0,
+            "56": 22877648.0,
+            "57": 23113592.0,
+            "58": 22845136.0,
+            "59": 22715486.0,
+            "60": 22743726.0,
+            "61": 22724104.0,
+            "62": 22673746.0,
+            "63": 22846740.0,
+            "64": 22823862.0,
+            "65": 23061360.0,
+            "66": 22729628.0,
+            "67": 22907694.0,
+            "68": 22609888.0,
+            "69": 22584610.0,
+            "70": 22829190.0,
+            "71": 22749038.0,
+            "72": 22655052.0,
+            "73": 22739796.0,
+            "74": 23047606.0,
+            "75": 23054054.0,
+            "76": 22901052.0,
+            "77": 22271520.0,
+            "78": 22788892.0,
+            "79": 22743418.0,
+            "80": 22706694.0,
+            "81": 22890786.0,
+            "82": 22777316.0,
+            "83": 22839636.0,
+            "84": 23010088.0,
+            "85": 22712948.0,
+            "86": 23103380.0,
+            "87": 22735258.0,
+            "88": 22636824.0,
+            "89": 22498828.0,
+            "90": 22972694.0,
+            "91": 22767128.0,
+            "92": 22808892.0,
+            "93": 22659368.0,
+            "94": 22911112.0,
+            "95": 23047740.0,
+            "96": 22828678.0,
+            "97": 22608174.0,
+            "98": 22762982.0,
+            "99": 22905800.0,
+            "100": 23015792.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 519038464.0,
+            "2": 519038464.0,
+            "3": 519038464.0,
+            "4": 519038464.0,
+            "5": 519038464.0,
+            "6": 519038464.0,
+            "7": 519038464.0,
+            "8": 519038464.0,
+            "9": 519038464.0,
+            "10": 519038464.0,
+            "11": 519038464.0,
+            "12": 519038464.0,
+            "13": 519038464.0,
+            "14": 519038464.0,
+            "15": 519038464.0,
+            "16": 519038464.0,
+            "17": 519038464.0,
+            "18": 519038464.0,
+            "19": 519038464.0,
+            "20": 519038464.0,
+            "21": 519038464.0,
+            "22": 519038464.0,
+            "23": 519038464.0,
+            "24": 519038464.0,
+            "25": 519038464.0,
+            "26": 519038464.0,
+            "27": 519038464.0,
+            "28": 519038464.0,
+            "29": 519038464.0,
+            "30": 519038464.0,
+            "31": 519038464.0,
+            "32": 519038464.0,
+            "33": 519038464.0,
+            "34": 519038464.0,
+            "35": 519038464.0,
+            "36": 519038464.0,
+            "37": 519038464.0,
+            "38": 519038464.0,
+            "39": 519038464.0,
+            "40": 519038464.0,
+            "41": 519038464.0,
+            "42": 519038464.0,
+            "43": 519038464.0,
+            "44": 519038464.0,
+            "45": 519038464.0,
+            "46": 519038464.0,
+            "47": 519038464.0,
+            "48": 519038464.0,
+            "49": 519038464.0,
+            "50": 519038464.0,
+            "51": 519038464.0,
+            "52": 519038464.0,
+            "53": 519038464.0,
+            "54": 519038464.0,
+            "55": 519038464.0,
+            "56": 519038464.0,
+            "57": 519038464.0,
+            "58": 519038464.0,
+            "59": 519038464.0,
+            "60": 519038464.0,
+            "61": 519038464.0,
+            "62": 519038464.0,
+            "63": 519038464.0,
+            "64": 519038464.0,
+            "65": 519038464.0,
+            "66": 519038464.0,
+            "67": 519038464.0,
+            "68": 519038464.0,
+            "69": 519038464.0,
+            "70": 519038464.0,
+            "71": 519038464.0,
+            "72": 519038464.0,
+            "73": 519038464.0,
+            "74": 519038464.0,
+            "75": 519038464.0,
+            "76": 519038464.0,
+            "77": 519038464.0,
+            "78": 519038464.0,
+            "79": 519038464.0,
+            "80": 519038464.0,
+            "81": 519038464.0,
+            "82": 519038464.0,
+            "83": 519038464.0,
+            "84": 519038464.0,
+            "85": 519038464.0,
+            "86": 519038464.0,
+            "87": 519038464.0,
+            "88": 519038464.0,
+            "89": 519038464.0,
+            "90": 519038464.0,
+            "91": 519038464.0,
+            "92": 519038464.0,
+            "93": 519038464.0,
+            "94": 519038464.0,
+            "95": 519038464.0,
+            "96": 519038464.0,
+            "97": 519038464.0,
+            "98": 519038464.0,
+            "99": 519038464.0,
+            "100": 519038464.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2296009728.0,
+            "2": 2436866048.0,
+            "3": 2436866048.0,
+            "4": 2436866048.0,
+            "5": 2436866048.0,
+            "6": 2436866048.0,
+            "7": 2436866048.0,
+            "8": 2436866048.0,
+            "9": 2436866048.0,
+            "10": 2436866048.0,
+            "11": 2436866048.0,
+            "12": 2436866048.0,
+            "13": 2436866048.0,
+            "14": 2436866048.0,
+            "15": 2436866048.0,
+            "16": 2436866048.0,
+            "17": 2436866048.0,
+            "18": 2436866048.0,
+            "19": 2436866048.0,
+            "20": 2436866048.0,
+            "21": 2436866048.0,
+            "22": 2436866048.0,
+            "23": 2436866048.0,
+            "24": 2436866048.0,
+            "25": 2436866048.0,
+            "26": 2436866048.0,
+            "27": 2436866048.0,
+            "28": 2436866048.0,
+            "29": 2436866048.0,
+            "30": 2436866048.0,
+            "31": 2436866048.0,
+            "32": 2436866048.0,
+            "33": 2436866048.0,
+            "34": 2436866048.0,
+            "35": 2436866048.0,
+            "36": 2436866048.0,
+            "37": 2436866048.0,
+            "38": 2436866048.0,
+            "39": 2436866048.0,
+            "40": 2436866048.0,
+            "41": 2436866048.0,
+            "42": 2436866048.0,
+            "43": 2436866048.0,
+            "44": 2436866048.0,
+            "45": 2436866048.0,
+            "46": 2436866048.0,
+            "47": 2436866048.0,
+            "48": 2436866048.0,
+            "49": 2436866048.0,
+            "50": 2436866048.0,
+            "51": 2436866048.0,
+            "52": 2436866048.0,
+            "53": 2436866048.0,
+            "54": 2436866048.0,
+            "55": 2436866048.0,
+            "56": 2436866048.0,
+            "57": 2436866048.0,
+            "58": 2436866048.0,
+            "59": 2436866048.0,
+            "60": 2436866048.0,
+            "61": 2436866048.0,
+            "62": 2436866048.0,
+            "63": 2436866048.0,
+            "64": 2436866048.0,
+            "65": 2436866048.0,
+            "66": 2436866048.0,
+            "67": 2436866048.0,
+            "68": 2436866048.0,
+            "69": 2436866048.0,
+            "70": 2436866048.0,
+            "71": 2436866048.0,
+            "72": 2436866048.0,
+            "73": 2436866048.0,
+            "74": 2436866048.0,
+            "75": 2436866048.0,
+            "76": 2436866048.0,
+            "77": 2436866048.0,
+            "78": 2436866048.0,
+            "79": 2436866048.0,
+            "80": 2436866048.0,
+            "81": 2436866048.0,
+            "82": 2436866048.0,
+            "83": 2436866048.0,
+            "84": 2436866048.0,
+            "85": 2436866048.0,
+            "86": 2436866048.0,
+            "87": 2436866048.0,
+            "88": 2436866048.0,
+            "89": 2436866048.0,
+            "90": 2436866048.0,
+            "91": 2436866048.0,
+            "92": 2436866048.0,
+            "93": 2436866048.0,
+            "94": 2436866048.0,
+            "95": 2436866048.0,
+            "96": 2436866048.0,
+            "97": 2436866048.0,
+            "98": 2436866048.0,
+            "99": 2436866048.0,
+            "100": 2436866048.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 13.44016,
+            "2": 0.17357,
+            "3": 0.14155,
+            "4": 0.14433,
+            "5": 0.14312,
+            "6": 0.14041,
+            "7": 0.14082,
+            "8": 0.13921,
+            "9": 0.1399,
+            "10": 0.13856,
+            "11": 0.13995,
+            "12": 0.13864,
+            "13": 0.13803,
+            "14": 0.13783,
+            "15": 0.13752,
+            "16": 0.13882,
+            "17": 0.13834,
+            "18": 0.13863,
+            "19": 0.13872,
+            "20": 0.1384,
+            "21": 0.13424,
+            "22": 0.13105,
+            "23": 0.13094,
+            "24": 0.1307,
+            "25": 0.13252,
+            "26": 0.13172,
+            "27": 0.12995,
+            "28": 0.13015,
+            "29": 0.13002,
+            "30": 0.13019,
+            "31": 0.13071,
+            "32": 0.13106,
+            "33": 0.1305,
+            "34": 0.13023,
+            "35": 0.13178,
+            "36": 0.13167,
+            "37": 0.13002,
+            "38": 0.13094,
+            "39": 0.13093,
+            "40": 0.13167,
+            "41": 0.13178,
+            "42": 0.13107,
+            "43": 0.1328,
+            "44": 0.13048,
+            "45": 0.13046,
+            "46": 0.13126,
+            "47": 0.12901,
+            "48": 0.12854,
+            "49": 0.12862,
+            "50": 0.12918,
+            "51": 0.14204,
+            "52": 0.13766,
+            "53": 0.13573,
+            "54": 0.13601,
+            "55": 0.13392,
+            "56": 0.13591,
+            "57": 0.13683,
+            "58": 0.13487,
+            "59": 0.13645,
+            "60": 0.13627,
+            "61": 0.13507,
+            "62": 0.13578,
+            "63": 0.13619,
+            "64": 0.13556,
+            "65": 0.13673,
+            "66": 0.13706,
+            "67": 0.13535,
+            "68": 0.13581,
+            "69": 0.1342,
+            "70": 0.13519,
+            "71": 0.13563,
+            "72": 0.13553,
+            "73": 0.13626,
+            "74": 0.13636,
+            "75": 0.1351,
+            "76": 0.13531,
+            "77": 0.1341,
+            "78": 0.13121,
+            "79": 0.13164,
+            "80": 0.1338,
+            "81": 0.13214,
+            "82": 0.13227,
+            "83": 0.13301,
+            "84": 0.13291,
+            "85": 0.13384,
+            "86": 0.13276,
+            "87": 0.13499,
+            "88": 0.13549,
+            "89": 0.13554,
+            "90": 0.13505,
+            "91": 0.13486,
+            "92": 0.13406,
+            "93": 0.13522,
+            "94": 0.13615,
+            "95": 0.1365,
+            "96": 0.13586,
+            "97": 0.13623,
+            "98": 0.13603,
+            "99": 0.13615,
+            "100": 0.13526
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap/golden_values_lts.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap/golden_values_lts.json
new file mode 100644
index 00000000000..03ebc195862
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap/golden_values_lts.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.80831,
+            "2": 10.81091,
+            "3": 10.82881,
+            "4": 10.82888,
+            "5": 10.8518,
+            "6": 10.84129,
+            "7": 10.839,
+            "8": 10.80345,
+            "9": 10.87875,
+            "10": 10.88064,
+            "11": 10.87605,
+            "12": 10.82786,
+            "13": 10.84179,
+            "14": 10.81686,
+            "15": 10.80637,
+            "16": 10.79763,
+            "17": 10.76624,
+            "18": 10.78069,
+            "19": 10.75188,
+            "20": 10.63371,
+            "21": 10.68119,
+            "22": 10.63819,
+            "23": 10.75544,
+            "24": 10.61129,
+            "25": 10.47478,
+            "26": 10.59702,
+            "27": 10.53977,
+            "28": 10.45162,
+            "29": 10.39421,
+            "30": 10.39348,
+            "31": 10.49397,
+            "32": 10.32158,
+            "33": 10.27676,
+            "34": 10.44629,
+            "35": 9.96856,
+            "36": 10.11335,
+            "37": 10.02306,
+            "38": 10.37579,
+            "39": 9.78845,
+            "40": 10.10124,
+            "41": 10.12323,
+            "42": 10.02969,
+            "43": 10.19869,
+            "44": 10.05528,
+            "45": 9.68289,
+            "46": 9.98143,
+            "47": 9.92628,
+            "48": 9.66986,
+            "49": 9.9198,
+            "50": 9.9226,
+            "51": 9.79344,
+            "52": 9.32777,
+            "53": 9.65056,
+            "54": 9.8608,
+            "55": 9.98167,
+            "56": 9.81701,
+            "57": 9.74358,
+            "58": 9.83,
+            "59": 9.32869,
+            "60": 9.35336,
+            "61": 9.45108,
+            "62": 10.19054,
+            "63": 9.35643,
+            "64": 9.62865,
+            "65": 9.70227,
+            "66": 9.52515,
+            "67": 9.6623,
+            "68": 9.58775,
+            "69": 9.38554,
+            "70": 9.73858,
+            "71": 9.87574,
+            "72": 9.69164,
+            "73": 9.39216,
+            "74": 9.43911,
+            "75": 8.95557,
+            "76": 9.56378,
+            "77": 9.6144,
+            "78": 9.39189,
+            "79": 9.52909,
+            "80": 9.31414,
+            "81": 9.70167,
+            "82": 9.90384,
+            "83": 9.31614,
+            "84": 9.47152,
+            "85": 8.97772,
+            "86": 9.66417,
+            "87": 9.43151,
+            "88": 9.58677,
+            "89": 9.52297,
+            "90": 9.55832,
+            "91": 9.62794,
+            "92": 9.14014,
+            "93": 9.42404,
+            "94": 9.54555,
+            "95": 9.13499,
+            "96": 8.75216,
+            "97": 9.58103,
+            "98": 9.79018,
+            "99": 9.37904,
+            "100": 9.21212
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1004.0,
+            "2": 1243.0,
+            "3": 1236.0,
+            "4": 1235.0,
+            "5": 1179.0,
+            "6": 1319.0,
+            "7": 1173.0,
+            "8": 1020.0,
+            "9": 1205.0,
+            "10": 1270.0,
+            "11": 1163.0,
+            "12": 1276.0,
+            "13": 1398.0,
+            "14": 1096.0,
+            "15": 1148.0,
+            "16": 1104.0,
+            "17": 1141.0,
+            "18": 1312.0,
+            "19": 1200.0,
+            "20": 1094.0,
+            "21": 1197.0,
+            "22": 1286.0,
+            "23": 1353.0,
+            "24": 1321.0,
+            "25": 1053.0,
+            "26": 1219.0,
+            "27": 1300.0,
+            "28": 1428.0,
+            "29": 1300.0,
+            "30": 1550.0,
+            "31": 1500.0,
+            "32": 1450.0,
+            "33": 1597.0,
+            "34": 1487.0,
+            "35": 1196.0,
+            "36": 1333.0,
+            "37": 1457.0,
+            "38": 1618.0,
+            "39": 1321.0,
+            "40": 1467.0,
+            "41": 1743.0,
+            "42": 1583.0,
+            "43": 1726.0,
+            "44": 1687.0,
+            "45": 1607.0,
+            "46": 1887.0,
+            "47": 1694.0,
+            "48": 1589.0,
+            "49": 1754.0,
+            "50": 1681.0,
+            "51": 1792.0,
+            "52": 1765.0,
+            "53": 1874.0,
+            "54": 1854.0,
+            "55": 1816.0,
+            "56": 1894.0,
+            "57": 1855.0,
+            "58": 1733.0,
+            "59": 1425.0,
+            "60": 1916.0,
+            "61": 2236.0,
+            "62": 2138.0,
+            "63": 2016.0,
+            "64": 2084.0,
+            "65": 2473.0,
+            "66": 2130.0,
+            "67": 2275.0,
+            "68": 2207.0,
+            "69": 2199.0,
+            "70": 2462.0,
+            "71": 2390.0,
+            "72": 2501.0,
+            "73": 1957.0,
+            "74": 2120.0,
+            "75": 2095.0,
+            "76": 2325.0,
+            "77": 2395.0,
+            "78": 2502.0,
+            "79": 2700.0,
+            "80": 1996.0,
+            "81": 2404.0,
+            "82": 2501.0,
+            "83": 2493.0,
+            "84": 2074.0,
+            "85": 2167.0,
+            "86": 2273.0,
+            "87": 2540.0,
+            "88": 2295.0,
+            "89": 2583.0,
+            "90": 2139.0,
+            "91": 2390.0,
+            "92": 1929.0,
+            "93": 2179.0,
+            "94": 2522.0,
+            "95": 2468.0,
+            "96": 2219.0,
+            "97": 2145.0,
+            "98": 2343.0,
+            "99": 2330.0,
+            "100": 1971.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 730321408.0,
+            "2": 730321408.0,
+            "3": 730321408.0,
+            "4": 730321408.0,
+            "5": 730321408.0,
+            "6": 730321408.0,
+            "7": 730321408.0,
+            "8": 730321408.0,
+            "9": 730321408.0,
+            "10": 730321408.0,
+            "11": 730321408.0,
+            "12": 730321408.0,
+            "13": 730321408.0,
+            "14": 730321408.0,
+            "15": 730321408.0,
+            "16": 730321408.0,
+            "17": 730321408.0,
+            "18": 730321408.0,
+            "19": 730321408.0,
+            "20": 730321408.0,
+            "21": 730321408.0,
+            "22": 730321408.0,
+            "23": 730321408.0,
+            "24": 730321408.0,
+            "25": 730321408.0,
+            "26": 730321408.0,
+            "27": 730321408.0,
+            "28": 730321408.0,
+            "29": 730321408.0,
+            "30": 730321408.0,
+            "31": 730321408.0,
+            "32": 730321408.0,
+            "33": 730321408.0,
+            "34": 730321408.0,
+            "35": 730321408.0,
+            "36": 730321408.0,
+            "37": 730321408.0,
+            "38": 730321408.0,
+            "39": 730321408.0,
+            "40": 730321408.0,
+            "41": 730321408.0,
+            "42": 730321408.0,
+            "43": 730321408.0,
+            "44": 730321408.0,
+            "45": 730321408.0,
+            "46": 730321408.0,
+            "47": 730321408.0,
+            "48": 730321408.0,
+            "49": 730321408.0,
+            "50": 730321408.0,
+            "51": 730321408.0,
+            "52": 730321408.0,
+            "53": 730321408.0,
+            "54": 730321408.0,
+            "55": 730321408.0,
+            "56": 730321408.0,
+            "57": 730321408.0,
+            "58": 730321408.0,
+            "59": 730321408.0,
+            "60": 730321408.0,
+            "61": 730321408.0,
+            "62": 730321408.0,
+            "63": 730321408.0,
+            "64": 730321408.0,
+            "65": 730321408.0,
+            "66": 730321408.0,
+            "67": 730321408.0,
+            "68": 730321408.0,
+            "69": 730321408.0,
+            "70": 730321408.0,
+            "71": 730321408.0,
+            "72": 730321408.0,
+            "73": 730321408.0,
+            "74": 730321408.0,
+            "75": 730321408.0,
+            "76": 730321408.0,
+            "77": 730321408.0,
+            "78": 730321408.0,
+            "79": 730321408.0,
+            "80": 730321408.0,
+            "81": 730321408.0,
+            "82": 730321408.0,
+            "83": 730321408.0,
+            "84": 730321408.0,
+            "85": 730321408.0,
+            "86": 730321408.0,
+            "87": 730321408.0,
+            "88": 730321408.0,
+            "89": 730321408.0,
+            "90": 730321408.0,
+            "91": 730321408.0,
+            "92": 730321408.0,
+            "93": 730321408.0,
+            "94": 730321408.0,
+            "95": 730321408.0,
+            "96": 730321408.0,
+            "97": 730321408.0,
+            "98": 730321408.0,
+            "99": 730321408.0,
+            "100": 730321408.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2513633792.0,
+            "2": 2795345408.0,
+            "3": 2795345408.0,
+            "4": 2795345408.0,
+            "5": 2795345408.0,
+            "6": 2795345408.0,
+            "7": 2795345408.0,
+            "8": 2795345408.0,
+            "9": 2795345408.0,
+            "10": 2795345408.0,
+            "11": 2795345408.0,
+            "12": 2795345408.0,
+            "13": 2795345408.0,
+            "14": 2795345408.0,
+            "15": 2795345408.0,
+            "16": 2795345408.0,
+            "17": 2795345408.0,
+            "18": 2795345408.0,
+            "19": 2795345408.0,
+            "20": 2795345408.0,
+            "21": 2795345408.0,
+            "22": 2795345408.0,
+            "23": 2795345408.0,
+            "24": 2795345408.0,
+            "25": 2795345408.0,
+            "26": 2795345408.0,
+            "27": 2795345408.0,
+            "28": 2795345408.0,
+            "29": 2795345408.0,
+            "30": 2795345408.0,
+            "31": 2795345408.0,
+            "32": 2795345408.0,
+            "33": 2795345408.0,
+            "34": 2795345408.0,
+            "35": 2795345408.0,
+            "36": 2795345408.0,
+            "37": 2795345408.0,
+            "38": 2795345408.0,
+            "39": 2795345408.0,
+            "40": 2795345408.0,
+            "41": 2795345408.0,
+            "42": 2795345408.0,
+            "43": 2795345408.0,
+            "44": 2795345408.0,
+            "45": 2795345408.0,
+            "46": 2795345408.0,
+            "47": 2795345408.0,
+            "48": 2795345408.0,
+            "49": 2795345408.0,
+            "50": 2795345408.0,
+            "51": 2795345408.0,
+            "52": 2795345408.0,
+            "53": 2795345408.0,
+            "54": 2795345408.0,
+            "55": 2795345408.0,
+            "56": 2795345408.0,
+            "57": 2795345408.0,
+            "58": 2795345408.0,
+            "59": 2795345408.0,
+            "60": 2795345408.0,
+            "61": 2795345408.0,
+            "62": 2795345408.0,
+            "63": 2795345408.0,
+            "64": 2795345408.0,
+            "65": 2795345408.0,
+            "66": 2795345408.0,
+            "67": 2795345408.0,
+            "68": 2795345408.0,
+            "69": 2795345408.0,
+            "70": 2795345408.0,
+            "71": 2795345408.0,
+            "72": 2795345408.0,
+            "73": 2795345408.0,
+            "74": 2795345408.0,
+            "75": 2795345408.0,
+            "76": 2795345408.0,
+            "77": 2795345408.0,
+            "78": 2795345408.0,
+            "79": 2795345408.0,
+            "80": 2795345408.0,
+            "81": 2795345408.0,
+            "82": 2795345408.0,
+            "83": 2795345408.0,
+            "84": 2795345408.0,
+            "85": 2795345408.0,
+            "86": 2795345408.0,
+            "87": 2795345408.0,
+            "88": 2795345408.0,
+            "89": 2795345408.0,
+            "90": 2795345408.0,
+            "91": 2795345408.0,
+            "92": 2795345408.0,
+            "93": 2795345408.0,
+            "94": 2795345408.0,
+            "95": 2795345408.0,
+            "96": 2795345408.0,
+            "97": 2795345408.0,
+            "98": 2795345408.0,
+            "99": 2795345408.0,
+            "100": 2795345408.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 11.76623,
+            "2": 0.41645,
+            "3": 0.15729,
+            "4": 0.15655,
+            "5": 0.15439,
+            "6": 0.15381,
+            "7": 0.15343,
+            "8": 0.15339,
+            "9": 0.15322,
+            "10": 0.15355,
+            "11": 0.15239,
+            "12": 0.15196,
+            "13": 0.15235,
+            "14": 0.15283,
+            "15": 0.15313,
+            "16": 0.15425,
+            "17": 0.15411,
+            "18": 0.15593,
+            "19": 0.1533,
+            "20": 0.15547,
+            "21": 0.15314,
+            "22": 0.15551,
+            "23": 0.1542,
+            "24": 0.15396,
+            "25": 0.15334,
+            "26": 0.1528,
+            "27": 0.15224,
+            "28": 0.15377,
+            "29": 0.15378,
+            "30": 0.15296,
+            "31": 0.15277,
+            "32": 0.15329,
+            "33": 0.15349,
+            "34": 0.15291,
+            "35": 0.15336,
+            "36": 0.15204,
+            "37": 0.15238,
+            "38": 0.15344,
+            "39": 0.15283,
+            "40": 0.15272,
+            "41": 0.15285,
+            "42": 0.15614,
+            "43": 0.15406,
+            "44": 0.15389,
+            "45": 0.15591,
+            "46": 0.15294,
+            "47": 0.15419,
+            "48": 0.15358,
+            "49": 0.15525,
+            "50": 0.15464,
+            "51": 0.15896,
+            "52": 0.15708,
+            "53": 0.15757,
+            "54": 0.15758,
+            "55": 0.15641,
+            "56": 0.15661,
+            "57": 0.15804,
+            "58": 0.16037,
+            "59": 0.15723,
+            "60": 0.1577,
+            "61": 0.1593,
+            "62": 0.15586,
+            "63": 0.15828,
+            "64": 0.15836,
+            "65": 0.16023,
+            "66": 0.15618,
+            "67": 0.15583,
+            "68": 0.15756,
+            "69": 0.18281,
+            "70": 0.16635,
+            "71": 0.19055,
+            "72": 0.15756,
+            "73": 0.15973,
+            "74": 0.16035,
+            "75": 0.15764,
+            "76": 0.15689,
+            "77": 0.15696,
+            "78": 0.15869,
+            "79": 0.15855,
+            "80": 0.15884,
+            "81": 0.15812,
+            "82": 0.15771,
+            "83": 0.15837,
+            "84": 0.15744,
+            "85": 0.15771,
+            "86": 0.15721,
+            "87": 0.15692,
+            "88": 0.15759,
+            "89": 0.15908,
+            "90": 0.15803,
+            "91": 0.15786,
+            "92": 0.15817,
+            "93": 0.15819,
+            "94": 0.15883,
+            "95": 0.15933,
+            "96": 0.15749,
+            "97": 0.15871,
+            "98": 0.1577,
+            "99": 0.15695,
+            "100": 0.15802
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap/golden_values_lts_dgx_a100.json
index 3b1e19e8a67..4771e4e3c8c 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap/golden_values_lts_dgx_a100.json
@@ -1 +1,537 @@
-{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.81184, "5": 10.85464, "10": 10.88256, "15": 10.80679, "20": 10.63196, "25": 10.47374, "30": 10.39285, "35": 9.96791, "40": 10.1, "45": 9.68344, "50": 9.92465, "55": 9.98132, "60": 9.3523, "65": 9.70213, "70": 9.73809, "75": 8.95616, "80": 9.31501, "85": 8.97886, "90": 9.55812, "95": 9.13529, "100": 9.21091}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1125.0, "5": 1255.0, "10": 1367.0, "15": 1127.0, "20": 1082.0, "25": 1114.0, "30": 1558.0, "35": 1292.0, "40": 1433.0, "45": 1564.0, "50": 1749.0, "55": 1718.0, "60": 1872.0, "65": 2464.0, "70": 2564.0, "75": 1884.0, "80": 1924.0, "85": 2150.0, "90": 2319.0, "95": 2518.0, "100": 2071.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 733859840.0, "5": 733859840.0, "10": 733859840.0, "15": 733859840.0, "20": 733859840.0, "25": 733859840.0, "30": 733859840.0, "35": 733859840.0, "40": 733859840.0, "45": 733859840.0, "50": 733859840.0, "55": 733859840.0, "60": 733859840.0, "65": 733859840.0, "70": 733859840.0, "75": 733859840.0, "80": 733859840.0, "85": 733859840.0, "90": 733859840.0, "95": 733859840.0, "100": 733859840.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 4312984064.0, "5": 4596792832.0, "10": 4596792832.0, "15": 4596792832.0, "20": 4596792832.0, "25": 4596792832.0, "30": 4596792832.0, "35": 4596792832.0, "40": 4596792832.0, "45": 4596792832.0, "50": 4596792832.0, "55": 4596792832.0, "60": 4596792832.0, "65": 4596792832.0, "70": 4596792832.0, "75": 4596792832.0, "80": 4596792832.0, "85": 4596792832.0, "90": 4596792832.0, "95": 4596792832.0, "100": 4596792832.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 17.99007, "5": 0.37673, "10": 0.36645, "15": 0.3643, "20": 0.36375, "25": 0.3652, "30": 0.36495, "35": 0.36435, "40": 0.36456, "45": 0.3636, "50": 0.36345, "55": 0.36651, "60": 0.36683, "65": 0.36434, "70": 0.36726, "75": 0.36517, "80": 0.56121, "85": 0.36207, "90": 0.35913, "95": 0.36112, "100": 0.36099}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.80831,
+            "2": 10.81091,
+            "3": 10.82881,
+            "4": 10.82888,
+            "5": 10.8518,
+            "6": 10.84129,
+            "7": 10.839,
+            "8": 10.80345,
+            "9": 10.87875,
+            "10": 10.88064,
+            "11": 10.87605,
+            "12": 10.82786,
+            "13": 10.84179,
+            "14": 10.81686,
+            "15": 10.80637,
+            "16": 10.79763,
+            "17": 10.76624,
+            "18": 10.78069,
+            "19": 10.75188,
+            "20": 10.63371,
+            "21": 10.68119,
+            "22": 10.63819,
+            "23": 10.75544,
+            "24": 10.61129,
+            "25": 10.47478,
+            "26": 10.59702,
+            "27": 10.53977,
+            "28": 10.45162,
+            "29": 10.39421,
+            "30": 10.39348,
+            "31": 10.49397,
+            "32": 10.32158,
+            "33": 10.27676,
+            "34": 10.44629,
+            "35": 9.96856,
+            "36": 10.11335,
+            "37": 10.02306,
+            "38": 10.37579,
+            "39": 9.78845,
+            "40": 10.10124,
+            "41": 10.12323,
+            "42": 10.02969,
+            "43": 10.19869,
+            "44": 10.05528,
+            "45": 9.68289,
+            "46": 9.98143,
+            "47": 9.92628,
+            "48": 9.66986,
+            "49": 9.9198,
+            "50": 9.9226,
+            "51": 9.79344,
+            "52": 9.32777,
+            "53": 9.65056,
+            "54": 9.8608,
+            "55": 9.98167,
+            "56": 9.81701,
+            "57": 9.74358,
+            "58": 9.83,
+            "59": 9.32869,
+            "60": 9.35336,
+            "61": 9.45108,
+            "62": 10.19054,
+            "63": 9.35643,
+            "64": 9.62865,
+            "65": 9.70227,
+            "66": 9.52515,
+            "67": 9.6623,
+            "68": 9.58775,
+            "69": 9.38554,
+            "70": 9.73858,
+            "71": 9.87574,
+            "72": 9.69164,
+            "73": 9.39216,
+            "74": 9.43911,
+            "75": 8.95557,
+            "76": 9.56378,
+            "77": 9.6144,
+            "78": 9.39189,
+            "79": 9.52909,
+            "80": 9.31414,
+            "81": 9.70167,
+            "82": 9.90384,
+            "83": 9.31614,
+            "84": 9.47152,
+            "85": 8.97772,
+            "86": 9.66417,
+            "87": 9.43151,
+            "88": 9.58677,
+            "89": 9.52297,
+            "90": 9.55832,
+            "91": 9.62794,
+            "92": 9.14014,
+            "93": 9.42404,
+            "94": 9.54555,
+            "95": 9.13499,
+            "96": 8.75216,
+            "97": 9.58103,
+            "98": 9.79018,
+            "99": 9.37904,
+            "100": 9.21212
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1004.0,
+            "2": 1243.0,
+            "3": 1236.0,
+            "4": 1235.0,
+            "5": 1179.0,
+            "6": 1319.0,
+            "7": 1173.0,
+            "8": 1020.0,
+            "9": 1205.0,
+            "10": 1270.0,
+            "11": 1163.0,
+            "12": 1276.0,
+            "13": 1398.0,
+            "14": 1096.0,
+            "15": 1148.0,
+            "16": 1104.0,
+            "17": 1141.0,
+            "18": 1312.0,
+            "19": 1200.0,
+            "20": 1094.0,
+            "21": 1197.0,
+            "22": 1286.0,
+            "23": 1353.0,
+            "24": 1321.0,
+            "25": 1053.0,
+            "26": 1219.0,
+            "27": 1300.0,
+            "28": 1428.0,
+            "29": 1300.0,
+            "30": 1550.0,
+            "31": 1500.0,
+            "32": 1450.0,
+            "33": 1597.0,
+            "34": 1487.0,
+            "35": 1196.0,
+            "36": 1333.0,
+            "37": 1457.0,
+            "38": 1618.0,
+            "39": 1321.0,
+            "40": 1467.0,
+            "41": 1743.0,
+            "42": 1583.0,
+            "43": 1726.0,
+            "44": 1687.0,
+            "45": 1607.0,
+            "46": 1887.0,
+            "47": 1694.0,
+            "48": 1589.0,
+            "49": 1754.0,
+            "50": 1681.0,
+            "51": 1792.0,
+            "52": 1765.0,
+            "53": 1874.0,
+            "54": 1854.0,
+            "55": 1816.0,
+            "56": 1894.0,
+            "57": 1855.0,
+            "58": 1733.0,
+            "59": 1425.0,
+            "60": 1916.0,
+            "61": 2236.0,
+            "62": 2138.0,
+            "63": 2016.0,
+            "64": 2084.0,
+            "65": 2473.0,
+            "66": 2130.0,
+            "67": 2275.0,
+            "68": 2207.0,
+            "69": 2199.0,
+            "70": 2462.0,
+            "71": 2390.0,
+            "72": 2501.0,
+            "73": 1957.0,
+            "74": 2120.0,
+            "75": 2095.0,
+            "76": 2325.0,
+            "77": 2395.0,
+            "78": 2502.0,
+            "79": 2700.0,
+            "80": 1996.0,
+            "81": 2404.0,
+            "82": 2501.0,
+            "83": 2493.0,
+            "84": 2074.0,
+            "85": 2167.0,
+            "86": 2273.0,
+            "87": 2540.0,
+            "88": 2295.0,
+            "89": 2583.0,
+            "90": 2139.0,
+            "91": 2390.0,
+            "92": 1929.0,
+            "93": 2179.0,
+            "94": 2522.0,
+            "95": 2468.0,
+            "96": 2219.0,
+            "97": 2145.0,
+            "98": 2343.0,
+            "99": 2330.0,
+            "100": 1971.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 730321408.0,
+            "2": 730321408.0,
+            "3": 730321408.0,
+            "4": 730321408.0,
+            "5": 730321408.0,
+            "6": 730321408.0,
+            "7": 730321408.0,
+            "8": 730321408.0,
+            "9": 730321408.0,
+            "10": 730321408.0,
+            "11": 730321408.0,
+            "12": 730321408.0,
+            "13": 730321408.0,
+            "14": 730321408.0,
+            "15": 730321408.0,
+            "16": 730321408.0,
+            "17": 730321408.0,
+            "18": 730321408.0,
+            "19": 730321408.0,
+            "20": 730321408.0,
+            "21": 730321408.0,
+            "22": 730321408.0,
+            "23": 730321408.0,
+            "24": 730321408.0,
+            "25": 730321408.0,
+            "26": 730321408.0,
+            "27": 730321408.0,
+            "28": 730321408.0,
+            "29": 730321408.0,
+            "30": 730321408.0,
+            "31": 730321408.0,
+            "32": 730321408.0,
+            "33": 730321408.0,
+            "34": 730321408.0,
+            "35": 730321408.0,
+            "36": 730321408.0,
+            "37": 730321408.0,
+            "38": 730321408.0,
+            "39": 730321408.0,
+            "40": 730321408.0,
+            "41": 730321408.0,
+            "42": 730321408.0,
+            "43": 730321408.0,
+            "44": 730321408.0,
+            "45": 730321408.0,
+            "46": 730321408.0,
+            "47": 730321408.0,
+            "48": 730321408.0,
+            "49": 730321408.0,
+            "50": 730321408.0,
+            "51": 730321408.0,
+            "52": 730321408.0,
+            "53": 730321408.0,
+            "54": 730321408.0,
+            "55": 730321408.0,
+            "56": 730321408.0,
+            "57": 730321408.0,
+            "58": 730321408.0,
+            "59": 730321408.0,
+            "60": 730321408.0,
+            "61": 730321408.0,
+            "62": 730321408.0,
+            "63": 730321408.0,
+            "64": 730321408.0,
+            "65": 730321408.0,
+            "66": 730321408.0,
+            "67": 730321408.0,
+            "68": 730321408.0,
+            "69": 730321408.0,
+            "70": 730321408.0,
+            "71": 730321408.0,
+            "72": 730321408.0,
+            "73": 730321408.0,
+            "74": 730321408.0,
+            "75": 730321408.0,
+            "76": 730321408.0,
+            "77": 730321408.0,
+            "78": 730321408.0,
+            "79": 730321408.0,
+            "80": 730321408.0,
+            "81": 730321408.0,
+            "82": 730321408.0,
+            "83": 730321408.0,
+            "84": 730321408.0,
+            "85": 730321408.0,
+            "86": 730321408.0,
+            "87": 730321408.0,
+            "88": 730321408.0,
+            "89": 730321408.0,
+            "90": 730321408.0,
+            "91": 730321408.0,
+            "92": 730321408.0,
+            "93": 730321408.0,
+            "94": 730321408.0,
+            "95": 730321408.0,
+            "96": 730321408.0,
+            "97": 730321408.0,
+            "98": 730321408.0,
+            "99": 730321408.0,
+            "100": 730321408.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2513633792.0,
+            "2": 2795345408.0,
+            "3": 2795345408.0,
+            "4": 2795345408.0,
+            "5": 2795345408.0,
+            "6": 2795345408.0,
+            "7": 2795345408.0,
+            "8": 2795345408.0,
+            "9": 2795345408.0,
+            "10": 2795345408.0,
+            "11": 2795345408.0,
+            "12": 2795345408.0,
+            "13": 2795345408.0,
+            "14": 2795345408.0,
+            "15": 2795345408.0,
+            "16": 2795345408.0,
+            "17": 2795345408.0,
+            "18": 2795345408.0,
+            "19": 2795345408.0,
+            "20": 2795345408.0,
+            "21": 2795345408.0,
+            "22": 2795345408.0,
+            "23": 2795345408.0,
+            "24": 2795345408.0,
+            "25": 2795345408.0,
+            "26": 2795345408.0,
+            "27": 2795345408.0,
+            "28": 2795345408.0,
+            "29": 2795345408.0,
+            "30": 2795345408.0,
+            "31": 2795345408.0,
+            "32": 2795345408.0,
+            "33": 2795345408.0,
+            "34": 2795345408.0,
+            "35": 2795345408.0,
+            "36": 2795345408.0,
+            "37": 2795345408.0,
+            "38": 2795345408.0,
+            "39": 2795345408.0,
+            "40": 2795345408.0,
+            "41": 2795345408.0,
+            "42": 2795345408.0,
+            "43": 2795345408.0,
+            "44": 2795345408.0,
+            "45": 2795345408.0,
+            "46": 2795345408.0,
+            "47": 2795345408.0,
+            "48": 2795345408.0,
+            "49": 2795345408.0,
+            "50": 2795345408.0,
+            "51": 2795345408.0,
+            "52": 2795345408.0,
+            "53": 2795345408.0,
+            "54": 2795345408.0,
+            "55": 2795345408.0,
+            "56": 2795345408.0,
+            "57": 2795345408.0,
+            "58": 2795345408.0,
+            "59": 2795345408.0,
+            "60": 2795345408.0,
+            "61": 2795345408.0,
+            "62": 2795345408.0,
+            "63": 2795345408.0,
+            "64": 2795345408.0,
+            "65": 2795345408.0,
+            "66": 2795345408.0,
+            "67": 2795345408.0,
+            "68": 2795345408.0,
+            "69": 2795345408.0,
+            "70": 2795345408.0,
+            "71": 2795345408.0,
+            "72": 2795345408.0,
+            "73": 2795345408.0,
+            "74": 2795345408.0,
+            "75": 2795345408.0,
+            "76": 2795345408.0,
+            "77": 2795345408.0,
+            "78": 2795345408.0,
+            "79": 2795345408.0,
+            "80": 2795345408.0,
+            "81": 2795345408.0,
+            "82": 2795345408.0,
+            "83": 2795345408.0,
+            "84": 2795345408.0,
+            "85": 2795345408.0,
+            "86": 2795345408.0,
+            "87": 2795345408.0,
+            "88": 2795345408.0,
+            "89": 2795345408.0,
+            "90": 2795345408.0,
+            "91": 2795345408.0,
+            "92": 2795345408.0,
+            "93": 2795345408.0,
+            "94": 2795345408.0,
+            "95": 2795345408.0,
+            "96": 2795345408.0,
+            "97": 2795345408.0,
+            "98": 2795345408.0,
+            "99": 2795345408.0,
+            "100": 2795345408.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 8.66407,
+            "2": 0.18828,
+            "3": 0.15715,
+            "4": 0.15685,
+            "5": 0.1544,
+            "6": 0.15356,
+            "7": 0.15196,
+            "8": 0.15101,
+            "9": 0.15114,
+            "10": 0.15067,
+            "11": 0.15113,
+            "12": 0.15109,
+            "13": 0.15255,
+            "14": 0.15181,
+            "15": 0.15165,
+            "16": 0.14989,
+            "17": 0.15094,
+            "18": 0.15062,
+            "19": 0.15148,
+            "20": 0.15014,
+            "21": 0.15114,
+            "22": 0.14973,
+            "23": 0.15192,
+            "24": 0.15003,
+            "25": 0.15228,
+            "26": 0.15066,
+            "27": 0.15209,
+            "28": 0.15056,
+            "29": 0.1516,
+            "30": 0.15083,
+            "31": 0.15211,
+            "32": 0.15028,
+            "33": 0.1518,
+            "34": 0.1494,
+            "35": 0.1521,
+            "36": 0.15002,
+            "37": 0.15257,
+            "38": 0.15095,
+            "39": 0.1517,
+            "40": 0.1501,
+            "41": 0.15352,
+            "42": 0.15453,
+            "43": 0.15187,
+            "44": 0.15281,
+            "45": 0.15294,
+            "46": 0.15214,
+            "47": 0.15376,
+            "48": 0.15363,
+            "49": 0.15977,
+            "50": 0.15249,
+            "51": 0.15543,
+            "52": 0.15363,
+            "53": 0.15379,
+            "54": 0.15555,
+            "55": 0.15252,
+            "56": 0.15295,
+            "57": 0.15496,
+            "58": 0.15756,
+            "59": 0.15345,
+            "60": 0.15784,
+            "61": 0.1581,
+            "62": 0.15302,
+            "63": 0.15579,
+            "64": 0.1536,
+            "65": 0.15523,
+            "66": 0.15593,
+            "67": 0.15868,
+            "68": 0.15303,
+            "69": 0.1554,
+            "70": 0.15409,
+            "71": 0.15229,
+            "72": 0.15299,
+            "73": 0.15495,
+            "74": 0.15601,
+            "75": 0.15285,
+            "76": 0.15774,
+            "77": 0.15171,
+            "78": 0.15423,
+            "79": 0.15398,
+            "80": 0.15445,
+            "81": 0.15381,
+            "82": 0.15311,
+            "83": 0.15584,
+            "84": 0.15556,
+            "85": 0.15506,
+            "86": 0.15314,
+            "87": 0.15269,
+            "88": 0.15515,
+            "89": 0.15923,
+            "90": 0.15325,
+            "91": 0.15755,
+            "92": 0.1543,
+            "93": 0.15481,
+            "94": 0.15321,
+            "95": 0.15397,
+            "96": 0.15322,
+            "97": 0.15471,
+            "98": 0.15631,
+            "99": 0.15271,
+            "100": 0.15653
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_lts.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_lts.json
new file mode 100644
index 00000000000..cf9f4245915
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_lts.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.80779,
+            "2": 10.81028,
+            "3": 10.82881,
+            "4": 10.82931,
+            "5": 10.85227,
+            "6": 10.84112,
+            "7": 10.83921,
+            "8": 10.80433,
+            "9": 10.87802,
+            "10": 10.88031,
+            "11": 10.87647,
+            "12": 10.82798,
+            "13": 10.84122,
+            "14": 10.81649,
+            "15": 10.80572,
+            "16": 10.7979,
+            "17": 10.76665,
+            "18": 10.78004,
+            "19": 10.7521,
+            "20": 10.63378,
+            "21": 10.68086,
+            "22": 10.63834,
+            "23": 10.75546,
+            "24": 10.61158,
+            "25": 10.47402,
+            "26": 10.59699,
+            "27": 10.53929,
+            "28": 10.45199,
+            "29": 10.39324,
+            "30": 10.39328,
+            "31": 10.49337,
+            "32": 10.32183,
+            "33": 10.2767,
+            "34": 10.44619,
+            "35": 9.96854,
+            "36": 10.11277,
+            "37": 10.02313,
+            "38": 10.37525,
+            "39": 9.78827,
+            "40": 10.10093,
+            "41": 10.1232,
+            "42": 10.02925,
+            "43": 10.19868,
+            "44": 10.05532,
+            "45": 9.68289,
+            "46": 9.98153,
+            "47": 9.92617,
+            "48": 9.66965,
+            "49": 9.91955,
+            "50": 9.92258
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1058.0,
+            "2": 1243.0,
+            "3": 1255.0,
+            "4": 1229.0,
+            "5": 1178.0,
+            "6": 1278.0,
+            "7": 1129.0,
+            "8": 955.0,
+            "9": 1299.0,
+            "10": 1314.0,
+            "11": 1220.0,
+            "12": 1334.0,
+            "13": 1364.0,
+            "14": 1153.0,
+            "15": 1214.0,
+            "16": 1150.0,
+            "17": 1176.0,
+            "18": 1304.0,
+            "19": 1090.0,
+            "20": 1170.0,
+            "21": 1286.0,
+            "22": 1325.0,
+            "23": 1353.0,
+            "24": 1341.0,
+            "25": 1100.0,
+            "26": 1174.0,
+            "27": 1357.0,
+            "28": 1293.0,
+            "29": 1259.0,
+            "30": 1539.0,
+            "31": 1485.0,
+            "32": 1422.0,
+            "33": 1410.0,
+            "34": 1525.0,
+            "35": 1163.0,
+            "36": 1230.0,
+            "37": 1392.0,
+            "38": 1570.0,
+            "39": 1260.0,
+            "40": 1481.0,
+            "41": 1687.0,
+            "42": 1606.0,
+            "43": 1696.0,
+            "44": 1648.0,
+            "45": 1620.0,
+            "46": 1901.0,
+            "47": 1627.0,
+            "48": 1595.0,
+            "49": 1750.0,
+            "50": 1723.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 730321408.0,
+            "2": 730321408.0,
+            "3": 730321408.0,
+            "4": 730321408.0,
+            "5": 730321408.0,
+            "6": 730321408.0,
+            "7": 730321408.0,
+            "8": 730321408.0,
+            "9": 730321408.0,
+            "10": 730321408.0,
+            "11": 730321408.0,
+            "12": 730321408.0,
+            "13": 730321408.0,
+            "14": 730321408.0,
+            "15": 730321408.0,
+            "16": 730321408.0,
+            "17": 730321408.0,
+            "18": 730321408.0,
+            "19": 730321408.0,
+            "20": 730321408.0,
+            "21": 730321408.0,
+            "22": 730321408.0,
+            "23": 730321408.0,
+            "24": 730321408.0,
+            "25": 730321408.0,
+            "26": 730321408.0,
+            "27": 730321408.0,
+            "28": 730321408.0,
+            "29": 730321408.0,
+            "30": 730321408.0,
+            "31": 730321408.0,
+            "32": 730321408.0,
+            "33": 730321408.0,
+            "34": 730321408.0,
+            "35": 730321408.0,
+            "36": 730321408.0,
+            "37": 730321408.0,
+            "38": 730321408.0,
+            "39": 730321408.0,
+            "40": 730321408.0,
+            "41": 730321408.0,
+            "42": 730321408.0,
+            "43": 730321408.0,
+            "44": 730321408.0,
+            "45": 730321408.0,
+            "46": 730321408.0,
+            "47": 730321408.0,
+            "48": 730321408.0,
+            "49": 730321408.0,
+            "50": 730321408.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 4311542784.0,
+            "2": 4593254400.0,
+            "3": 4593254400.0,
+            "4": 4593254400.0,
+            "5": 4593254400.0,
+            "6": 4593254400.0,
+            "7": 4593254400.0,
+            "8": 4593254400.0,
+            "9": 4593254400.0,
+            "10": 4593254400.0,
+            "11": 4593254400.0,
+            "12": 4593254400.0,
+            "13": 4593254400.0,
+            "14": 4593254400.0,
+            "15": 4593254400.0,
+            "16": 4593254400.0,
+            "17": 4593254400.0,
+            "18": 4593254400.0,
+            "19": 4593254400.0,
+            "20": 4593254400.0,
+            "21": 4593254400.0,
+            "22": 4593254400.0,
+            "23": 4593254400.0,
+            "24": 4593254400.0,
+            "25": 4593254400.0,
+            "26": 4593254400.0,
+            "27": 4593254400.0,
+            "28": 4593254400.0,
+            "29": 4593254400.0,
+            "30": 4593254400.0,
+            "31": 4593254400.0,
+            "32": 4593254400.0,
+            "33": 4593254400.0,
+            "34": 4593254400.0,
+            "35": 4593254400.0,
+            "36": 4593254400.0,
+            "37": 4593254400.0,
+            "38": 4593254400.0,
+            "39": 4593254400.0,
+            "40": 4593254400.0,
+            "41": 4593254400.0,
+            "42": 4593254400.0,
+            "43": 4593254400.0,
+            "44": 4593254400.0,
+            "45": 4593254400.0,
+            "46": 4593254400.0,
+            "47": 4593254400.0,
+            "48": 4593254400.0,
+            "49": 4593254400.0,
+            "50": 4593254400.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 9.15288,
+            "2": 0.50843,
+            "3": 0.16717,
+            "4": 0.16415,
+            "5": 0.16752,
+            "6": 0.16666,
+            "7": 0.16995,
+            "8": 0.17105,
+            "9": 0.17027,
+            "10": 0.17005,
+            "11": 0.17,
+            "12": 0.16945,
+            "13": 0.1682,
+            "14": 0.1687,
+            "15": 0.1682,
+            "16": 0.16899,
+            "17": 0.16877,
+            "18": 0.16877,
+            "19": 0.1688,
+            "20": 0.16832,
+            "21": 0.1693,
+            "22": 0.16953,
+            "23": 0.16772,
+            "24": 0.16919,
+            "25": 0.169,
+            "26": 0.16905,
+            "27": 0.16801,
+            "28": 0.16843,
+            "29": 0.1685,
+            "30": 0.16864,
+            "31": 0.16799,
+            "32": 0.16279,
+            "33": 0.16411,
+            "34": 0.16258,
+            "35": 0.16352,
+            "36": 0.16277,
+            "37": 0.16443,
+            "38": 0.16253,
+            "39": 0.16392,
+            "40": 0.16225,
+            "41": 0.16316,
+            "42": 0.16241,
+            "43": 0.16358,
+            "44": 0.16211,
+            "45": 0.16324,
+            "46": 0.16385,
+            "47": 0.16427,
+            "48": 0.16287,
+            "49": 0.16404,
+            "50": 0.164
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_lts_dgx_a100.json
index 0a4708ee24f..16019e9879e 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_lts_dgx_a100.json
@@ -1 +1,287 @@
-{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.81184, "5": 10.85464, "10": 10.88256, "15": 10.80679, "20": 10.63196, "25": 10.47374, "30": 10.39285, "35": 9.96791, "40": 10.1, "45": 9.68344, "50": 9.92465}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1125.0, "5": 1255.0, "10": 1367.0, "15": 1127.0, "20": 1082.0, "25": 1114.0, "30": 1558.0, "35": 1292.0, "40": 1433.0, "45": 1564.0, "50": 1749.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 733859840.0, "5": 733859840.0, "10": 733859840.0, "15": 733859840.0, "20": 733859840.0, "25": 733859840.0, "30": 733859840.0, "35": 733859840.0, "40": 733859840.0, "45": 733859840.0, "50": 733859840.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 4312984064.0, "5": 4596792832.0, "10": 4596792832.0, "15": 4596792832.0, "20": 4596792832.0, "25": 4596792832.0, "30": 4596792832.0, "35": 4596792832.0, "40": 4596792832.0, "45": 4596792832.0, "50": 4596792832.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 15.07476, "5": 0.37209, "10": 0.36754, "15": 0.36929, "20": 0.36944, "25": 0.36826, "30": 0.3678, "35": 0.36826, "40": 0.36559, "45": 0.36731, "50": 0.36188}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.80779,
+            "2": 10.81028,
+            "3": 10.82881,
+            "4": 10.82931,
+            "5": 10.85227,
+            "6": 10.84112,
+            "7": 10.83921,
+            "8": 10.80433,
+            "9": 10.87802,
+            "10": 10.88031,
+            "11": 10.87647,
+            "12": 10.82798,
+            "13": 10.84122,
+            "14": 10.81649,
+            "15": 10.80572,
+            "16": 10.7979,
+            "17": 10.76665,
+            "18": 10.78004,
+            "19": 10.7521,
+            "20": 10.63378,
+            "21": 10.68086,
+            "22": 10.63834,
+            "23": 10.75546,
+            "24": 10.61158,
+            "25": 10.47402,
+            "26": 10.59699,
+            "27": 10.53929,
+            "28": 10.45199,
+            "29": 10.39324,
+            "30": 10.39328,
+            "31": 10.49337,
+            "32": 10.32183,
+            "33": 10.2767,
+            "34": 10.44619,
+            "35": 9.96854,
+            "36": 10.11277,
+            "37": 10.02313,
+            "38": 10.37525,
+            "39": 9.78827,
+            "40": 10.10093,
+            "41": 10.1232,
+            "42": 10.02925,
+            "43": 10.19868,
+            "44": 10.05532,
+            "45": 9.68289,
+            "46": 9.98153,
+            "47": 9.92617,
+            "48": 9.66965,
+            "49": 9.91955,
+            "50": 9.92258
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1058.0,
+            "2": 1243.0,
+            "3": 1255.0,
+            "4": 1229.0,
+            "5": 1178.0,
+            "6": 1278.0,
+            "7": 1129.0,
+            "8": 955.0,
+            "9": 1299.0,
+            "10": 1314.0,
+            "11": 1220.0,
+            "12": 1334.0,
+            "13": 1364.0,
+            "14": 1153.0,
+            "15": 1214.0,
+            "16": 1150.0,
+            "17": 1176.0,
+            "18": 1304.0,
+            "19": 1090.0,
+            "20": 1170.0,
+            "21": 1286.0,
+            "22": 1325.0,
+            "23": 1353.0,
+            "24": 1341.0,
+            "25": 1100.0,
+            "26": 1174.0,
+            "27": 1357.0,
+            "28": 1293.0,
+            "29": 1259.0,
+            "30": 1539.0,
+            "31": 1485.0,
+            "32": 1422.0,
+            "33": 1410.0,
+            "34": 1525.0,
+            "35": 1163.0,
+            "36": 1230.0,
+            "37": 1392.0,
+            "38": 1570.0,
+            "39": 1260.0,
+            "40": 1481.0,
+            "41": 1687.0,
+            "42": 1606.0,
+            "43": 1696.0,
+            "44": 1648.0,
+            "45": 1620.0,
+            "46": 1901.0,
+            "47": 1627.0,
+            "48": 1595.0,
+            "49": 1750.0,
+            "50": 1723.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 730321408.0,
+            "2": 730321408.0,
+            "3": 730321408.0,
+            "4": 730321408.0,
+            "5": 730321408.0,
+            "6": 730321408.0,
+            "7": 730321408.0,
+            "8": 730321408.0,
+            "9": 730321408.0,
+            "10": 730321408.0,
+            "11": 730321408.0,
+            "12": 730321408.0,
+            "13": 730321408.0,
+            "14": 730321408.0,
+            "15": 730321408.0,
+            "16": 730321408.0,
+            "17": 730321408.0,
+            "18": 730321408.0,
+            "19": 730321408.0,
+            "20": 730321408.0,
+            "21": 730321408.0,
+            "22": 730321408.0,
+            "23": 730321408.0,
+            "24": 730321408.0,
+            "25": 730321408.0,
+            "26": 730321408.0,
+            "27": 730321408.0,
+            "28": 730321408.0,
+            "29": 730321408.0,
+            "30": 730321408.0,
+            "31": 730321408.0,
+            "32": 730321408.0,
+            "33": 730321408.0,
+            "34": 730321408.0,
+            "35": 730321408.0,
+            "36": 730321408.0,
+            "37": 730321408.0,
+            "38": 730321408.0,
+            "39": 730321408.0,
+            "40": 730321408.0,
+            "41": 730321408.0,
+            "42": 730321408.0,
+            "43": 730321408.0,
+            "44": 730321408.0,
+            "45": 730321408.0,
+            "46": 730321408.0,
+            "47": 730321408.0,
+            "48": 730321408.0,
+            "49": 730321408.0,
+            "50": 730321408.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 4311542784.0,
+            "2": 4593254400.0,
+            "3": 4593254400.0,
+            "4": 4593254400.0,
+            "5": 4593254400.0,
+            "6": 4593254400.0,
+            "7": 4593254400.0,
+            "8": 4593254400.0,
+            "9": 4593254400.0,
+            "10": 4593254400.0,
+            "11": 4593254400.0,
+            "12": 4593254400.0,
+            "13": 4593254400.0,
+            "14": 4593254400.0,
+            "15": 4593254400.0,
+            "16": 4593254400.0,
+            "17": 4593254400.0,
+            "18": 4593254400.0,
+            "19": 4593254400.0,
+            "20": 4593254400.0,
+            "21": 4593254400.0,
+            "22": 4593254400.0,
+            "23": 4593254400.0,
+            "24": 4593254400.0,
+            "25": 4593254400.0,
+            "26": 4593254400.0,
+            "27": 4593254400.0,
+            "28": 4593254400.0,
+            "29": 4593254400.0,
+            "30": 4593254400.0,
+            "31": 4593254400.0,
+            "32": 4593254400.0,
+            "33": 4593254400.0,
+            "34": 4593254400.0,
+            "35": 4593254400.0,
+            "36": 4593254400.0,
+            "37": 4593254400.0,
+            "38": 4593254400.0,
+            "39": 4593254400.0,
+            "40": 4593254400.0,
+            "41": 4593254400.0,
+            "42": 4593254400.0,
+            "43": 4593254400.0,
+            "44": 4593254400.0,
+            "45": 4593254400.0,
+            "46": 4593254400.0,
+            "47": 4593254400.0,
+            "48": 4593254400.0,
+            "49": 4593254400.0,
+            "50": 4593254400.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 9.03263,
+            "2": 0.21266,
+            "3": 0.17373,
+            "4": 0.17827,
+            "5": 0.17392,
+            "6": 0.17641,
+            "7": 0.17509,
+            "8": 0.17211,
+            "9": 0.17464,
+            "10": 0.21373,
+            "11": 0.17143,
+            "12": 0.17137,
+            "13": 0.17701,
+            "14": 0.17242,
+            "15": 0.16945,
+            "16": 0.1686,
+            "17": 0.16945,
+            "18": 0.16793,
+            "19": 0.16997,
+            "20": 0.16992,
+            "21": 0.17016,
+            "22": 0.16832,
+            "23": 0.16853,
+            "24": 0.16912,
+            "25": 0.16822,
+            "26": 0.16908,
+            "27": 0.16609,
+            "28": 0.239,
+            "29": 0.16968,
+            "30": 0.16763,
+            "31": 0.16962,
+            "32": 0.16788,
+            "33": 0.1681,
+            "34": 0.16749,
+            "35": 0.16866,
+            "36": 0.1697,
+            "37": 0.16838,
+            "38": 0.16867,
+            "39": 0.16699,
+            "40": 0.17098,
+            "41": 0.1671,
+            "42": 0.17036,
+            "43": 0.16755,
+            "44": 0.16699,
+            "45": 0.1678,
+            "46": 0.17136,
+            "47": 0.16725,
+            "48": 0.17257,
+            "49": 0.16903,
+            "50": 0.1687
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_lts.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_lts.json
new file mode 100644
index 00000000000..fa4447e6f40
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_lts.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.97215,
+            "2": 10.96419,
+            "3": 10.96484,
+            "4": 10.9625,
+            "5": 10.96069,
+            "6": 10.96314,
+            "7": 10.95542,
+            "8": 10.96012,
+            "9": 10.9642,
+            "10": 10.94938,
+            "11": 10.95571,
+            "12": 10.94758,
+            "13": 10.94696,
+            "14": 10.95407,
+            "15": 10.91083,
+            "16": 10.90945,
+            "17": 10.89001,
+            "18": 10.89837,
+            "19": 10.89189,
+            "20": 10.80856,
+            "21": 10.78598,
+            "22": 10.69678,
+            "23": 10.79518,
+            "24": 10.6983,
+            "25": 10.66637,
+            "26": 10.71173,
+            "27": 10.68383,
+            "28": 10.62344,
+            "29": 10.65067,
+            "30": 10.45487,
+            "31": 10.22475,
+            "32": 10.52171,
+            "33": 10.52706,
+            "34": 10.29181,
+            "35": 10.33574,
+            "36": 10.2871,
+            "37": 10.39557,
+            "38": 10.2536,
+            "39": 10.44971,
+            "40": 10.14791,
+            "41": 10.19255,
+            "42": 10.24716,
+            "43": 9.89482,
+            "44": 10.00764,
+            "45": 9.88792,
+            "46": 9.86452,
+            "47": 10.16578,
+            "48": 9.87929,
+            "49": 9.5693,
+            "50": 9.95204
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 22727298.0,
+            "2": 22924540.0,
+            "3": 22596662.0,
+            "4": 23219864.0,
+            "5": 22714604.0,
+            "6": 23021138.0,
+            "7": 22770420.0,
+            "8": 22926748.0,
+            "9": 22842456.0,
+            "10": 22918536.0,
+            "11": 22500950.0,
+            "12": 22459666.0,
+            "13": 22917172.0,
+            "14": 22388760.0,
+            "15": 22821456.0,
+            "16": 22831856.0,
+            "17": 22818966.0,
+            "18": 22582904.0,
+            "19": 22618308.0,
+            "20": 22695018.0,
+            "21": 22739412.0,
+            "22": 22799506.0,
+            "23": 22539964.0,
+            "24": 22771406.0,
+            "25": 22818944.0,
+            "26": 22548888.0,
+            "27": 22467554.0,
+            "28": 22453820.0,
+            "29": 22529468.0,
+            "30": 22631974.0,
+            "31": 22954914.0,
+            "32": 22584176.0,
+            "33": 22557850.0,
+            "34": 22835592.0,
+            "35": 22787628.0,
+            "36": 22589872.0,
+            "37": 22497168.0,
+            "38": 22896192.0,
+            "39": 22801888.0,
+            "40": 22658038.0,
+            "41": 22659420.0,
+            "42": 22666532.0,
+            "43": 22976852.0,
+            "44": 22746072.0,
+            "45": 22675348.0,
+            "46": 22884600.0,
+            "47": 22633528.0,
+            "48": 22929392.0,
+            "49": 22728496.0,
+            "50": 22905024.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 659815936.0,
+            "2": 659815936.0,
+            "3": 659815936.0,
+            "4": 659815936.0,
+            "5": 659815936.0,
+            "6": 659815936.0,
+            "7": 659815936.0,
+            "8": 659815936.0,
+            "9": 659815936.0,
+            "10": 659815936.0,
+            "11": 659815936.0,
+            "12": 659815936.0,
+            "13": 659815936.0,
+            "14": 659815936.0,
+            "15": 659815936.0,
+            "16": 659815936.0,
+            "17": 659815936.0,
+            "18": 659815936.0,
+            "19": 659815936.0,
+            "20": 659815936.0,
+            "21": 659815936.0,
+            "22": 659815936.0,
+            "23": 659815936.0,
+            "24": 659815936.0,
+            "25": 659815936.0,
+            "26": 659815936.0,
+            "27": 659815936.0,
+            "28": 659815936.0,
+            "29": 659815936.0,
+            "30": 659815936.0,
+            "31": 659815936.0,
+            "32": 659815936.0,
+            "33": 659815936.0,
+            "34": 659815936.0,
+            "35": 659815936.0,
+            "36": 659815936.0,
+            "37": 659815936.0,
+            "38": 659815936.0,
+            "39": 659815936.0,
+            "40": 659815936.0,
+            "41": 659815936.0,
+            "42": 659815936.0,
+            "43": 659815936.0,
+            "44": 659815936.0,
+            "45": 659815936.0,
+            "46": 659815936.0,
+            "47": 659815936.0,
+            "48": 659815936.0,
+            "49": 659815936.0,
+            "50": 659815936.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 2128664064.0,
+            "2": 2387253760.0,
+            "3": 2387253760.0,
+            "4": 2387253760.0,
+            "5": 2387253760.0,
+            "6": 2387253760.0,
+            "7": 2387253760.0,
+            "8": 2387253760.0,
+            "9": 2387253760.0,
+            "10": 2387253760.0,
+            "11": 2387253760.0,
+            "12": 2387253760.0,
+            "13": 2387253760.0,
+            "14": 2387253760.0,
+            "15": 2387253760.0,
+            "16": 2387253760.0,
+            "17": 2387253760.0,
+            "18": 2387253760.0,
+            "19": 2387253760.0,
+            "20": 2387253760.0,
+            "21": 2387253760.0,
+            "22": 2387253760.0,
+            "23": 2387253760.0,
+            "24": 2387253760.0,
+            "25": 2387253760.0,
+            "26": 2387253760.0,
+            "27": 2387253760.0,
+            "28": 2387253760.0,
+            "29": 2387253760.0,
+            "30": 2387253760.0,
+            "31": 2387253760.0,
+            "32": 2387253760.0,
+            "33": 2387253760.0,
+            "34": 2387253760.0,
+            "35": 2387253760.0,
+            "36": 2387253760.0,
+            "37": 2387253760.0,
+            "38": 2387253760.0,
+            "39": 2387253760.0,
+            "40": 2387253760.0,
+            "41": 2387253760.0,
+            "42": 2387253760.0,
+            "43": 2387253760.0,
+            "44": 2387253760.0,
+            "45": 2387253760.0,
+            "46": 2387253760.0,
+            "47": 2387253760.0,
+            "48": 2387253760.0,
+            "49": 2387253760.0,
+            "50": 2387253760.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 8.87306,
+            "2": 0.21422,
+            "3": 0.25104,
+            "4": 0.16762,
+            "5": 0.16515,
+            "6": 0.16439,
+            "7": 0.16456,
+            "8": 0.16608,
+            "9": 0.16475,
+            "10": 0.16561,
+            "11": 0.16487,
+            "12": 0.16315,
+            "13": 0.16552,
+            "14": 0.16522,
+            "15": 0.16481,
+            "16": 0.16526,
+            "17": 0.16423,
+            "18": 0.16401,
+            "19": 0.16489,
+            "20": 0.16413,
+            "21": 0.16376,
+            "22": 0.16471,
+            "23": 0.16552,
+            "24": 0.16391,
+            "25": 0.15853,
+            "26": 0.15805,
+            "27": 0.15784,
+            "28": 0.15821,
+            "29": 0.15801,
+            "30": 0.15776,
+            "31": 0.15957,
+            "32": 0.16205,
+            "33": 0.16126,
+            "34": 0.15977,
+            "35": 0.16204,
+            "36": 0.15979,
+            "37": 0.16009,
+            "38": 0.15888,
+            "39": 0.16022,
+            "40": 0.1597,
+            "41": 0.15982,
+            "42": 0.1593,
+            "43": 0.15759,
+            "44": 0.15811,
+            "45": 0.1573,
+            "46": 0.15807,
+            "47": 0.15605,
+            "48": 0.15694,
+            "49": 0.15675,
+            "50": 0.15757
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_lts_dgx_a100.json
index 0cd9c8700a3..775784e5ee0 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_lts_dgx_a100.json
@@ -2,91 +2,286 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 10.9735,
-            "5": 10.95594,
-            "10": 10.94989,
-            "15": 10.9115,
-            "20": 10.80975,
-            "25": 10.6662,
-            "30": 10.45501,
-            "35": 10.33418,
-            "40": 10.14646,
-            "45": 9.89112,
-            "50": 9.9526
+            "1": 10.97215,
+            "2": 10.96419,
+            "3": 10.96484,
+            "4": 10.9625,
+            "5": 10.96069,
+            "6": 10.96314,
+            "7": 10.95542,
+            "8": 10.96012,
+            "9": 10.9642,
+            "10": 10.94938,
+            "11": 10.95571,
+            "12": 10.94758,
+            "13": 10.94696,
+            "14": 10.95407,
+            "15": 10.91083,
+            "16": 10.90945,
+            "17": 10.89001,
+            "18": 10.89837,
+            "19": 10.89189,
+            "20": 10.80856,
+            "21": 10.78598,
+            "22": 10.69678,
+            "23": 10.79518,
+            "24": 10.6983,
+            "25": 10.66637,
+            "26": 10.71173,
+            "27": 10.68383,
+            "28": 10.62344,
+            "29": 10.65067,
+            "30": 10.45487,
+            "31": 10.22475,
+            "32": 10.52171,
+            "33": 10.52706,
+            "34": 10.29181,
+            "35": 10.33574,
+            "36": 10.2871,
+            "37": 10.39557,
+            "38": 10.2536,
+            "39": 10.44971,
+            "40": 10.14791,
+            "41": 10.19255,
+            "42": 10.24716,
+            "43": 9.89482,
+            "44": 10.00764,
+            "45": 9.88792,
+            "46": 9.86452,
+            "47": 10.16578,
+            "48": 9.87929,
+            "49": 9.5693,
+            "50": 9.95204
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 22727052.0,
-            "5": 22714228.0,
-            "10": 22918376.0,
-            "15": 22820932.0,
-            "20": 22694228.0,
-            "25": 22819528.0,
-            "30": 22631178.0,
-            "35": 22787526.0,
-            "40": 22657932.0,
-            "45": 22674550.0,
-            "50": 22905400.0
+            "1": 22727298.0,
+            "2": 22924540.0,
+            "3": 22596662.0,
+            "4": 23219864.0,
+            "5": 22714604.0,
+            "6": 23021138.0,
+            "7": 22770420.0,
+            "8": 22926748.0,
+            "9": 22842456.0,
+            "10": 22918536.0,
+            "11": 22500950.0,
+            "12": 22459666.0,
+            "13": 22917172.0,
+            "14": 22388760.0,
+            "15": 22821456.0,
+            "16": 22831856.0,
+            "17": 22818966.0,
+            "18": 22582904.0,
+            "19": 22618308.0,
+            "20": 22695018.0,
+            "21": 22739412.0,
+            "22": 22799506.0,
+            "23": 22539964.0,
+            "24": 22771406.0,
+            "25": 22818944.0,
+            "26": 22548888.0,
+            "27": 22467554.0,
+            "28": 22453820.0,
+            "29": 22529468.0,
+            "30": 22631974.0,
+            "31": 22954914.0,
+            "32": 22584176.0,
+            "33": 22557850.0,
+            "34": 22835592.0,
+            "35": 22787628.0,
+            "36": 22589872.0,
+            "37": 22497168.0,
+            "38": 22896192.0,
+            "39": 22801888.0,
+            "40": 22658038.0,
+            "41": 22659420.0,
+            "42": 22666532.0,
+            "43": 22976852.0,
+            "44": 22746072.0,
+            "45": 22675348.0,
+            "46": 22884600.0,
+            "47": 22633528.0,
+            "48": 22929392.0,
+            "49": 22728496.0,
+            "50": 22905024.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 657718272.0,
-            "5": 657718272.0,
-            "10": 657718272.0,
-            "15": 657718272.0,
-            "20": 657718272.0,
-            "25": 657718272.0,
-            "30": 657718272.0,
-            "35": 657718272.0,
-            "40": 657718272.0,
-            "45": 657718272.0,
-            "50": 657718272.0
+            "1": 659815936.0,
+            "2": 659815936.0,
+            "3": 659815936.0,
+            "4": 659815936.0,
+            "5": 659815936.0,
+            "6": 659815936.0,
+            "7": 659815936.0,
+            "8": 659815936.0,
+            "9": 659815936.0,
+            "10": 659815936.0,
+            "11": 659815936.0,
+            "12": 659815936.0,
+            "13": 659815936.0,
+            "14": 659815936.0,
+            "15": 659815936.0,
+            "16": 659815936.0,
+            "17": 659815936.0,
+            "18": 659815936.0,
+            "19": 659815936.0,
+            "20": 659815936.0,
+            "21": 659815936.0,
+            "22": 659815936.0,
+            "23": 659815936.0,
+            "24": 659815936.0,
+            "25": 659815936.0,
+            "26": 659815936.0,
+            "27": 659815936.0,
+            "28": 659815936.0,
+            "29": 659815936.0,
+            "30": 659815936.0,
+            "31": 659815936.0,
+            "32": 659815936.0,
+            "33": 659815936.0,
+            "34": 659815936.0,
+            "35": 659815936.0,
+            "36": 659815936.0,
+            "37": 659815936.0,
+            "38": 659815936.0,
+            "39": 659815936.0,
+            "40": 659815936.0,
+            "41": 659815936.0,
+            "42": 659815936.0,
+            "43": 659815936.0,
+            "44": 659815936.0,
+            "45": 659815936.0,
+            "46": 659815936.0,
+            "47": 659815936.0,
+            "48": 659815936.0,
+            "49": 659815936.0,
+            "50": 659815936.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 2129712128.0,
-            "5": 2385156096.0,
-            "10": 2385156096.0,
-            "15": 2385156096.0,
-            "20": 2385156096.0,
-            "25": 2385156096.0,
-            "30": 2385156096.0,
-            "35": 2385156096.0,
-            "40": 2385156096.0,
-            "45": 2385156096.0,
-            "50": 2385156096.0
+            "1": 2128664064.0,
+            "2": 2387253760.0,
+            "3": 2387253760.0,
+            "4": 2387253760.0,
+            "5": 2387253760.0,
+            "6": 2387253760.0,
+            "7": 2387253760.0,
+            "8": 2387253760.0,
+            "9": 2387253760.0,
+            "10": 2387253760.0,
+            "11": 2387253760.0,
+            "12": 2387253760.0,
+            "13": 2387253760.0,
+            "14": 2387253760.0,
+            "15": 2387253760.0,
+            "16": 2387253760.0,
+            "17": 2387253760.0,
+            "18": 2387253760.0,
+            "19": 2387253760.0,
+            "20": 2387253760.0,
+            "21": 2387253760.0,
+            "22": 2387253760.0,
+            "23": 2387253760.0,
+            "24": 2387253760.0,
+            "25": 2387253760.0,
+            "26": 2387253760.0,
+            "27": 2387253760.0,
+            "28": 2387253760.0,
+            "29": 2387253760.0,
+            "30": 2387253760.0,
+            "31": 2387253760.0,
+            "32": 2387253760.0,
+            "33": 2387253760.0,
+            "34": 2387253760.0,
+            "35": 2387253760.0,
+            "36": 2387253760.0,
+            "37": 2387253760.0,
+            "38": 2387253760.0,
+            "39": 2387253760.0,
+            "40": 2387253760.0,
+            "41": 2387253760.0,
+            "42": 2387253760.0,
+            "43": 2387253760.0,
+            "44": 2387253760.0,
+            "45": 2387253760.0,
+            "46": 2387253760.0,
+            "47": 2387253760.0,
+            "48": 2387253760.0,
+            "49": 2387253760.0,
+            "50": 2387253760.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 20.28193,
-            "5": 0.17568,
-            "10": 0.17422,
-            "15": 0.17474,
-            "20": 0.17247,
-            "25": 0.16917,
-            "30": 0.16924,
-            "35": 0.16826,
-            "40": 0.16896,
-            "45": 0.1662,
-            "50": 0.16732
+            "1": 10.48419,
+            "2": 0.19482,
+            "3": 0.26748,
+            "4": 0.1633,
+            "5": 0.15828,
+            "6": 0.15656,
+            "7": 0.1572,
+            "8": 0.15759,
+            "9": 0.15735,
+            "10": 0.15751,
+            "11": 0.15648,
+            "12": 0.15605,
+            "13": 0.15693,
+            "14": 0.15672,
+            "15": 0.15676,
+            "16": 0.15664,
+            "17": 0.15683,
+            "18": 0.15646,
+            "19": 0.15696,
+            "20": 0.15623,
+            "21": 0.15652,
+            "22": 0.15759,
+            "23": 0.15729,
+            "24": 0.15687,
+            "25": 0.15563,
+            "26": 0.1575,
+            "27": 0.15616,
+            "28": 0.15855,
+            "29": 0.15771,
+            "30": 0.15851,
+            "31": 0.1579,
+            "32": 0.1587,
+            "33": 0.1577,
+            "34": 0.15827,
+            "35": 0.15808,
+            "36": 0.15825,
+            "37": 0.1583,
+            "38": 0.15836,
+            "39": 0.15797,
+            "40": 0.15829,
+            "41": 0.15787,
+            "42": 0.15789,
+            "43": 0.15839,
+            "44": 0.15862,
+            "45": 0.15727,
+            "46": 0.15919,
+            "47": 0.15859,
+            "48": 0.15898,
+            "49": 0.15832,
+            "50": 0.1586
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_lts.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_lts.json
new file mode 100644
index 00000000000..fc938769241
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_lts.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.84562,
+            "2": 10.8536,
+            "3": 10.85978,
+            "4": 10.84186,
+            "5": 10.8756,
+            "6": 10.88172,
+            "7": 10.86719,
+            "8": 10.84992,
+            "9": 10.86792,
+            "10": 10.83289,
+            "11": 10.91083,
+            "12": 10.87501,
+            "13": 10.86463,
+            "14": 10.8928,
+            "15": 10.84145,
+            "16": 10.84108,
+            "17": 10.82014,
+            "18": 10.85327,
+            "19": 10.8576,
+            "20": 10.80111,
+            "21": 10.78705,
+            "22": 10.72832,
+            "23": 10.812,
+            "24": 10.74372,
+            "25": 10.712,
+            "26": 10.76908,
+            "27": 10.78641,
+            "28": 10.73219,
+            "29": 10.75717,
+            "30": 10.58162,
+            "31": 10.43196,
+            "32": 10.68369,
+            "33": 10.66821,
+            "34": 10.49987,
+            "35": 10.5319,
+            "36": 10.52076,
+            "37": 10.59704,
+            "38": 10.45745,
+            "39": 10.62078,
+            "40": 10.35651,
+            "41": 10.40224,
+            "42": 10.45518,
+            "43": 10.11645,
+            "44": 10.24176,
+            "45": 10.1377,
+            "46": 10.11459,
+            "47": 10.39867,
+            "48": 10.14178,
+            "49": 9.8915,
+            "50": 10.20004
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 22727180.0,
+            "2": 22925490.0,
+            "3": 22595358.0,
+            "4": 23219094.0,
+            "5": 22713274.0,
+            "6": 23020840.0,
+            "7": 22770150.0,
+            "8": 22925912.0,
+            "9": 22842304.0,
+            "10": 22917686.0,
+            "11": 22499934.0,
+            "12": 22458902.0,
+            "13": 22916544.0,
+            "14": 22388412.0,
+            "15": 22820828.0,
+            "16": 22829442.0,
+            "17": 22818424.0,
+            "18": 22582572.0,
+            "19": 22616516.0,
+            "20": 22693786.0,
+            "21": 22738788.0,
+            "22": 22800098.0,
+            "23": 22538252.0,
+            "24": 22771304.0,
+            "25": 22818714.0,
+            "26": 22547732.0,
+            "27": 22467548.0,
+            "28": 22452916.0,
+            "29": 22528584.0,
+            "30": 22630192.0,
+            "31": 22954950.0,
+            "32": 22584400.0,
+            "33": 22557234.0,
+            "34": 22834292.0,
+            "35": 22786428.0,
+            "36": 22588576.0,
+            "37": 22496998.0,
+            "38": 22895112.0,
+            "39": 22800900.0,
+            "40": 22657380.0,
+            "41": 22658838.0,
+            "42": 22666328.0,
+            "43": 22975596.0,
+            "44": 22745924.0,
+            "45": 22674268.0,
+            "46": 22884128.0,
+            "47": 22632352.0,
+            "48": 22927496.0,
+            "49": 22727204.0,
+            "50": 22903716.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 611461632.0,
+            "2": 611461632.0,
+            "3": 611461632.0,
+            "4": 611461632.0,
+            "5": 611461632.0,
+            "6": 611461632.0,
+            "7": 611461632.0,
+            "8": 611461632.0,
+            "9": 611461632.0,
+            "10": 611461632.0,
+            "11": 611461632.0,
+            "12": 611461632.0,
+            "13": 611461632.0,
+            "14": 611461632.0,
+            "15": 611461632.0,
+            "16": 611461632.0,
+            "17": 611461632.0,
+            "18": 611461632.0,
+            "19": 611461632.0,
+            "20": 611461632.0,
+            "21": 611461632.0,
+            "22": 611461632.0,
+            "23": 611461632.0,
+            "24": 611461632.0,
+            "25": 611461632.0,
+            "26": 611461632.0,
+            "27": 611461632.0,
+            "28": 611461632.0,
+            "29": 611461632.0,
+            "30": 611461632.0,
+            "31": 611461632.0,
+            "32": 611461632.0,
+            "33": 611461632.0,
+            "34": 611461632.0,
+            "35": 611461632.0,
+            "36": 611461632.0,
+            "37": 611461632.0,
+            "38": 611461632.0,
+            "39": 611461632.0,
+            "40": 611461632.0,
+            "41": 611461632.0,
+            "42": 611461632.0,
+            "43": 611461632.0,
+            "44": 611461632.0,
+            "45": 611461632.0,
+            "46": 611461632.0,
+            "47": 611461632.0,
+            "48": 611461632.0,
+            "49": 611461632.0,
+            "50": 611461632.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 2580665856.0,
+            "2": 2812987904.0,
+            "3": 2812987904.0,
+            "4": 2812987904.0,
+            "5": 2812987904.0,
+            "6": 2812987904.0,
+            "7": 2812987904.0,
+            "8": 2812987904.0,
+            "9": 2812987904.0,
+            "10": 2812987904.0,
+            "11": 2812987904.0,
+            "12": 2812987904.0,
+            "13": 2812987904.0,
+            "14": 2812987904.0,
+            "15": 2812987904.0,
+            "16": 2812987904.0,
+            "17": 2812987904.0,
+            "18": 2812987904.0,
+            "19": 2812987904.0,
+            "20": 2812987904.0,
+            "21": 2812987904.0,
+            "22": 2812987904.0,
+            "23": 2812987904.0,
+            "24": 2812987904.0,
+            "25": 2812987904.0,
+            "26": 2812987904.0,
+            "27": 2812987904.0,
+            "28": 2812987904.0,
+            "29": 2812987904.0,
+            "30": 2812987904.0,
+            "31": 2812987904.0,
+            "32": 2812987904.0,
+            "33": 2812987904.0,
+            "34": 2812987904.0,
+            "35": 2812987904.0,
+            "36": 2812987904.0,
+            "37": 2812987904.0,
+            "38": 2812987904.0,
+            "39": 2812987904.0,
+            "40": 2812987904.0,
+            "41": 2812987904.0,
+            "42": 2812987904.0,
+            "43": 2812987904.0,
+            "44": 2812987904.0,
+            "45": 2812987904.0,
+            "46": 2812987904.0,
+            "47": 2812987904.0,
+            "48": 2812987904.0,
+            "49": 2812987904.0,
+            "50": 2812987904.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.97654,
+            "2": 0.12833,
+            "3": 0.09335,
+            "4": 0.09288,
+            "5": 0.09223,
+            "6": 0.09324,
+            "7": 0.09538,
+            "8": 0.09184,
+            "9": 0.09231,
+            "10": 0.09289,
+            "11": 0.09189,
+            "12": 0.09166,
+            "13": 0.09092,
+            "14": 0.09055,
+            "15": 0.09032,
+            "16": 0.09021,
+            "17": 0.09123,
+            "18": 0.09029,
+            "19": 0.09028,
+            "20": 0.09042,
+            "21": 0.08964,
+            "22": 0.08984,
+            "23": 0.08956,
+            "24": 0.08945,
+            "25": 0.09004,
+            "26": 0.0892,
+            "27": 0.08926,
+            "28": 0.08893,
+            "29": 0.08891,
+            "30": 0.08938,
+            "31": 0.08988,
+            "32": 0.08972,
+            "33": 0.08927,
+            "34": 0.09022,
+            "35": 0.0899,
+            "36": 0.0891,
+            "37": 0.08893,
+            "38": 0.08927,
+            "39": 0.08909,
+            "40": 0.08953,
+            "41": 0.08971,
+            "42": 0.08902,
+            "43": 0.09024,
+            "44": 0.08936,
+            "45": 0.0894,
+            "46": 0.08968,
+            "47": 0.08906,
+            "48": 0.08956,
+            "49": 0.08937,
+            "50": 0.08941
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_lts_dgx_a100.json
index 117df660805..44d53d6e9d6 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_lts_dgx_a100.json
@@ -1 +1,287 @@
-{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.84528, "5": 10.87387, "10": 10.83535, "15": 10.84228, "20": 10.80167, "25": 10.71149, "30": 10.58467, "35": 10.53287, "40": 10.35652, "45": 10.13839, "50": 10.20005}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 22726236.0, "5": 22714030.0, "10": 22918036.0, "15": 22821520.0, "20": 22693656.0, "25": 22819138.0, "30": 22630790.0, "35": 22787508.0, "40": 22657590.0, "45": 22674508.0, "50": 22904058.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 611461120.0, "5": 611461120.0, "10": 611461120.0, "15": 611461120.0, "20": 611461120.0, "25": 611461120.0, "30": 611461120.0, "35": 611461120.0, "40": 611461120.0, "45": 611461120.0, "50": 611461120.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 2582211584.0, "5": 2814533632.0, "10": 2814533632.0, "15": 2814533632.0, "20": 2814533632.0, "25": 2814533632.0, "30": 2814533632.0, "35": 2814533632.0, "40": 2814533632.0, "45": 2814533632.0, "50": 2814533632.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 14.65275, "5": 0.11877, "10": 0.11712, "15": 0.11705, "20": 0.11603, "25": 0.11576, "30": 0.11652, "35": 0.11656, "40": 0.11712, "45": 0.11593, "50": 0.11726}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.84562,
+            "2": 10.8536,
+            "3": 10.85978,
+            "4": 10.84186,
+            "5": 10.8756,
+            "6": 10.88172,
+            "7": 10.86719,
+            "8": 10.84992,
+            "9": 10.86792,
+            "10": 10.83289,
+            "11": 10.91083,
+            "12": 10.87501,
+            "13": 10.86463,
+            "14": 10.8928,
+            "15": 10.84145,
+            "16": 10.84108,
+            "17": 10.82014,
+            "18": 10.85327,
+            "19": 10.8576,
+            "20": 10.80111,
+            "21": 10.78705,
+            "22": 10.72832,
+            "23": 10.812,
+            "24": 10.74372,
+            "25": 10.712,
+            "26": 10.76908,
+            "27": 10.78641,
+            "28": 10.73219,
+            "29": 10.75717,
+            "30": 10.58162,
+            "31": 10.43196,
+            "32": 10.68369,
+            "33": 10.66821,
+            "34": 10.49987,
+            "35": 10.5319,
+            "36": 10.52076,
+            "37": 10.59704,
+            "38": 10.45745,
+            "39": 10.62078,
+            "40": 10.35651,
+            "41": 10.40224,
+            "42": 10.45518,
+            "43": 10.11645,
+            "44": 10.24176,
+            "45": 10.1377,
+            "46": 10.11459,
+            "47": 10.39867,
+            "48": 10.14178,
+            "49": 9.8915,
+            "50": 10.20004
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 22727180.0,
+            "2": 22925490.0,
+            "3": 22595358.0,
+            "4": 23219094.0,
+            "5": 22713274.0,
+            "6": 23020840.0,
+            "7": 22770150.0,
+            "8": 22925912.0,
+            "9": 22842304.0,
+            "10": 22917686.0,
+            "11": 22499934.0,
+            "12": 22458902.0,
+            "13": 22916544.0,
+            "14": 22388412.0,
+            "15": 22820828.0,
+            "16": 22829442.0,
+            "17": 22818424.0,
+            "18": 22582572.0,
+            "19": 22616516.0,
+            "20": 22693786.0,
+            "21": 22738788.0,
+            "22": 22800098.0,
+            "23": 22538252.0,
+            "24": 22771304.0,
+            "25": 22818714.0,
+            "26": 22547732.0,
+            "27": 22467548.0,
+            "28": 22452916.0,
+            "29": 22528584.0,
+            "30": 22630192.0,
+            "31": 22954950.0,
+            "32": 22584400.0,
+            "33": 22557234.0,
+            "34": 22834292.0,
+            "35": 22786428.0,
+            "36": 22588576.0,
+            "37": 22496998.0,
+            "38": 22895112.0,
+            "39": 22800900.0,
+            "40": 22657380.0,
+            "41": 22658838.0,
+            "42": 22666328.0,
+            "43": 22975596.0,
+            "44": 22745924.0,
+            "45": 22674268.0,
+            "46": 22884128.0,
+            "47": 22632352.0,
+            "48": 22927496.0,
+            "49": 22727204.0,
+            "50": 22903716.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 609364480.0,
+            "2": 609364480.0,
+            "3": 609364480.0,
+            "4": 609364480.0,
+            "5": 609364480.0,
+            "6": 609364480.0,
+            "7": 609364480.0,
+            "8": 609364480.0,
+            "9": 609364480.0,
+            "10": 609364480.0,
+            "11": 609364480.0,
+            "12": 609364480.0,
+            "13": 609364480.0,
+            "14": 609364480.0,
+            "15": 609364480.0,
+            "16": 609364480.0,
+            "17": 609364480.0,
+            "18": 609364480.0,
+            "19": 609364480.0,
+            "20": 609364480.0,
+            "21": 609364480.0,
+            "22": 609364480.0,
+            "23": 609364480.0,
+            "24": 609364480.0,
+            "25": 609364480.0,
+            "26": 609364480.0,
+            "27": 609364480.0,
+            "28": 609364480.0,
+            "29": 609364480.0,
+            "30": 609364480.0,
+            "31": 609364480.0,
+            "32": 609364480.0,
+            "33": 609364480.0,
+            "34": 609364480.0,
+            "35": 609364480.0,
+            "36": 609364480.0,
+            "37": 609364480.0,
+            "38": 609364480.0,
+            "39": 609364480.0,
+            "40": 609364480.0,
+            "41": 609364480.0,
+            "42": 609364480.0,
+            "43": 609364480.0,
+            "44": 609364480.0,
+            "45": 609364480.0,
+            "46": 609364480.0,
+            "47": 609364480.0,
+            "48": 609364480.0,
+            "49": 609364480.0,
+            "50": 609364480.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 2580665856.0,
+            "2": 2810890752.0,
+            "3": 2811808256.0,
+            "4": 2811808256.0,
+            "5": 2811808256.0,
+            "6": 2811808256.0,
+            "7": 2811808256.0,
+            "8": 2811808256.0,
+            "9": 2811808256.0,
+            "10": 2811808256.0,
+            "11": 2811808256.0,
+            "12": 2811808256.0,
+            "13": 2811808256.0,
+            "14": 2811808256.0,
+            "15": 2811808256.0,
+            "16": 2811808256.0,
+            "17": 2811808256.0,
+            "18": 2811808256.0,
+            "19": 2811808256.0,
+            "20": 2811808256.0,
+            "21": 2811808256.0,
+            "22": 2811808256.0,
+            "23": 2811808256.0,
+            "24": 2811808256.0,
+            "25": 2811808256.0,
+            "26": 2811808256.0,
+            "27": 2811808256.0,
+            "28": 2811808256.0,
+            "29": 2811808256.0,
+            "30": 2811808256.0,
+            "31": 2811808256.0,
+            "32": 2811808256.0,
+            "33": 2811808256.0,
+            "34": 2811808256.0,
+            "35": 2811808256.0,
+            "36": 2811808256.0,
+            "37": 2811808256.0,
+            "38": 2811808256.0,
+            "39": 2811808256.0,
+            "40": 2811808256.0,
+            "41": 2811808256.0,
+            "42": 2811808256.0,
+            "43": 2811808256.0,
+            "44": 2811808256.0,
+            "45": 2811808256.0,
+            "46": 2811808256.0,
+            "47": 2811808256.0,
+            "48": 2811808256.0,
+            "49": 2811808256.0,
+            "50": 2811808256.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 9.118,
+            "2": 0.12375,
+            "3": 0.31133,
+            "4": 0.09209,
+            "5": 0.09124,
+            "6": 0.09155,
+            "7": 0.09163,
+            "8": 0.0915,
+            "9": 0.09161,
+            "10": 0.09407,
+            "11": 0.09038,
+            "12": 0.09031,
+            "13": 0.09069,
+            "14": 0.09024,
+            "15": 0.09043,
+            "16": 0.08996,
+            "17": 0.09133,
+            "18": 0.09072,
+            "19": 0.09048,
+            "20": 0.09016,
+            "21": 0.09061,
+            "22": 0.09073,
+            "23": 0.09098,
+            "24": 0.09135,
+            "25": 0.09235,
+            "26": 0.09059,
+            "27": 0.09009,
+            "28": 0.09049,
+            "29": 0.09147,
+            "30": 0.09097,
+            "31": 0.09098,
+            "32": 0.09045,
+            "33": 0.09082,
+            "34": 0.08994,
+            "35": 0.09054,
+            "36": 0.09124,
+            "37": 0.09063,
+            "38": 0.08989,
+            "39": 0.09234,
+            "40": 0.09165,
+            "41": 0.09179,
+            "42": 0.09165,
+            "43": 0.09235,
+            "44": 0.09147,
+            "45": 0.0922,
+            "46": 0.09192,
+            "47": 0.09138,
+            "48": 0.09278,
+            "49": 0.09145,
+            "50": 0.09175
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_lts.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_lts.json
new file mode 100644
index 00000000000..57fe2806d3f
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_lts.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.88765,
+            "2": 10.90324,
+            "3": 10.87803,
+            "4": 10.87967,
+            "5": 10.91013,
+            "6": 10.91377,
+            "7": 10.89285,
+            "8": 10.88408,
+            "9": 10.89749,
+            "10": 10.87762,
+            "11": 10.88984,
+            "12": 10.89502,
+            "13": 10.90738,
+            "14": 10.90831,
+            "15": 10.86285,
+            "16": 10.85944,
+            "17": 10.8221,
+            "18": 10.84343,
+            "19": 10.83663,
+            "20": 10.74694,
+            "21": 10.73044,
+            "22": 10.62623,
+            "23": 10.74379,
+            "24": 10.63229,
+            "25": 10.59789,
+            "26": 10.64585,
+            "27": 10.64828,
+            "28": 10.59539,
+            "29": 10.60615,
+            "30": 10.40161,
+            "31": 10.18116,
+            "32": 10.49112,
+            "33": 10.48508,
+            "34": 10.25199,
+            "35": 10.30784,
+            "36": 10.25604,
+            "37": 10.36576,
+            "38": 10.21673,
+            "39": 10.40989,
+            "40": 10.11047,
+            "41": 10.15883,
+            "42": 10.22753,
+            "43": 9.8745,
+            "44": 9.97803,
+            "45": 9.8591,
+            "46": 9.8546,
+            "47": 10.15264,
+            "48": 9.86489,
+            "49": 9.5555,
+            "50": 9.92088,
+            "51": 9.86094,
+            "52": 9.75697,
+            "53": 10.07633,
+            "54": 9.96082,
+            "55": 9.88565,
+            "56": 9.6349,
+            "57": 9.4925,
+            "58": 9.83099,
+            "59": 9.59122,
+            "60": 9.50798,
+            "61": 9.7061,
+            "62": 9.98413,
+            "63": 9.37604,
+            "64": 9.77938,
+            "65": 8.95852,
+            "66": 9.70596,
+            "67": 9.37402,
+            "68": 9.78683,
+            "69": 9.78932,
+            "70": 9.72766,
+            "71": 9.61135,
+            "72": 9.59178,
+            "73": 9.49896,
+            "74": 8.95742,
+            "75": 9.42469,
+            "76": 9.09651,
+            "77": 10.06653,
+            "78": 9.73149,
+            "79": 9.37959,
+            "80": 9.40394,
+            "81": 9.48277,
+            "82": 9.69318,
+            "83": 9.31104,
+            "84": 9.4139,
+            "85": 9.61469,
+            "86": 9.07793,
+            "87": 9.59662,
+            "88": 9.74827,
+            "89": 9.60196,
+            "90": 9.81239,
+            "91": 9.34524,
+            "92": 9.36524,
+            "93": 9.07745,
+            "94": 8.83182,
+            "95": 9.521,
+            "96": 9.52525,
+            "97": 9.31322,
+            "98": 9.677,
+            "99": 8.88904,
+            "100": 9.40063
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1753.0,
+            "2": 1863.0,
+            "3": 1781.0,
+            "4": 1850.0,
+            "5": 2021.0,
+            "6": 1987.0,
+            "7": 2006.0,
+            "8": 1657.0,
+            "9": 1961.0,
+            "10": 1491.0,
+            "11": 2010.0,
+            "12": 1846.0,
+            "13": 1934.0,
+            "14": 1954.0,
+            "15": 1942.0,
+            "16": 1969.0,
+            "17": 1936.0,
+            "18": 1836.0,
+            "19": 1841.0,
+            "20": 1684.0,
+            "21": 1958.0,
+            "22": 1782.0,
+            "23": 2112.0,
+            "24": 1651.0,
+            "25": 1713.0,
+            "26": 1878.0,
+            "27": 1837.0,
+            "28": 2025.0,
+            "29": 2115.0,
+            "30": 1867.0,
+            "31": 1622.0,
+            "32": 1939.0,
+            "33": 2167.0,
+            "34": 1867.0,
+            "35": 2035.0,
+            "36": 2086.0,
+            "37": 2477.0,
+            "38": 2296.0,
+            "39": 2505.0,
+            "40": 2150.0,
+            "41": 2315.0,
+            "42": 2239.0,
+            "43": 1941.0,
+            "44": 2169.0,
+            "45": 2172.0,
+            "46": 2231.0,
+            "47": 2604.0,
+            "48": 2429.0,
+            "49": 2289.0,
+            "50": 2529.0,
+            "51": 2742.0,
+            "52": 2671.0,
+            "53": 3066.0,
+            "54": 2782.0,
+            "55": 2510.0,
+            "56": 2874.0,
+            "57": 2304.0,
+            "58": 3111.0,
+            "59": 2862.0,
+            "60": 2374.0,
+            "61": 2977.0,
+            "62": 2740.0,
+            "63": 2394.0,
+            "64": 3232.0,
+            "65": 2720.0,
+            "66": 3277.0,
+            "67": 2810.0,
+            "68": 2830.0,
+            "69": 3094.0,
+            "70": 3327.0,
+            "71": 3106.0,
+            "72": 2261.0,
+            "73": 3147.0,
+            "74": 1902.0,
+            "75": 2545.0,
+            "76": 2905.0,
+            "77": 3468.0,
+            "78": 3432.0,
+            "79": 3336.0,
+            "80": 3434.0,
+            "81": 3605.0,
+            "82": 3269.0,
+            "83": 2891.0,
+            "84": 3343.0,
+            "85": 3501.0,
+            "86": 2786.0,
+            "87": 3872.0,
+            "88": 3019.0,
+            "89": 3407.0,
+            "90": 3023.0,
+            "91": 2630.0,
+            "92": 3186.0,
+            "93": 2746.0,
+            "94": 3526.0,
+            "95": 3414.0,
+            "96": 3546.0,
+            "97": 3339.0,
+            "98": 3758.0,
+            "99": 3058.0,
+            "100": 3454.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 462388736.0,
+            "2": 462388736.0,
+            "3": 462388736.0,
+            "4": 462388736.0,
+            "5": 462388736.0,
+            "6": 462388736.0,
+            "7": 462388736.0,
+            "8": 462388736.0,
+            "9": 462388736.0,
+            "10": 462388736.0,
+            "11": 462388736.0,
+            "12": 462388736.0,
+            "13": 462388736.0,
+            "14": 462388736.0,
+            "15": 462388736.0,
+            "16": 462388736.0,
+            "17": 462388736.0,
+            "18": 462388736.0,
+            "19": 462388736.0,
+            "20": 462388736.0,
+            "21": 462388736.0,
+            "22": 462388736.0,
+            "23": 462388736.0,
+            "24": 462388736.0,
+            "25": 462388736.0,
+            "26": 462388736.0,
+            "27": 462388736.0,
+            "28": 462388736.0,
+            "29": 462388736.0,
+            "30": 462388736.0,
+            "31": 462388736.0,
+            "32": 462388736.0,
+            "33": 462388736.0,
+            "34": 462388736.0,
+            "35": 462388736.0,
+            "36": 462388736.0,
+            "37": 462388736.0,
+            "38": 462388736.0,
+            "39": 462388736.0,
+            "40": 462388736.0,
+            "41": 462388736.0,
+            "42": 462388736.0,
+            "43": 462388736.0,
+            "44": 462388736.0,
+            "45": 462388736.0,
+            "46": 462388736.0,
+            "47": 462388736.0,
+            "48": 462388736.0,
+            "49": 462388736.0,
+            "50": 462388736.0,
+            "51": 462388736.0,
+            "52": 462388736.0,
+            "53": 462388736.0,
+            "54": 462388736.0,
+            "55": 462388736.0,
+            "56": 462388736.0,
+            "57": 462388736.0,
+            "58": 462388736.0,
+            "59": 462388736.0,
+            "60": 462388736.0,
+            "61": 462388736.0,
+            "62": 462388736.0,
+            "63": 462388736.0,
+            "64": 462388736.0,
+            "65": 462388736.0,
+            "66": 462388736.0,
+            "67": 462388736.0,
+            "68": 462388736.0,
+            "69": 462388736.0,
+            "70": 462388736.0,
+            "71": 462388736.0,
+            "72": 462388736.0,
+            "73": 462388736.0,
+            "74": 462388736.0,
+            "75": 462388736.0,
+            "76": 462388736.0,
+            "77": 462388736.0,
+            "78": 462388736.0,
+            "79": 462388736.0,
+            "80": 462388736.0,
+            "81": 462388736.0,
+            "82": 462388736.0,
+            "83": 462388736.0,
+            "84": 462388736.0,
+            "85": 462388736.0,
+            "86": 462388736.0,
+            "87": 462388736.0,
+            "88": 462388736.0,
+            "89": 462388736.0,
+            "90": 462388736.0,
+            "91": 462388736.0,
+            "92": 462388736.0,
+            "93": 462388736.0,
+            "94": 462388736.0,
+            "95": 462388736.0,
+            "96": 462388736.0,
+            "97": 462388736.0,
+            "98": 462388736.0,
+            "99": 462388736.0,
+            "100": 462388736.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1549081088.0,
+            "2": 1677508096.0,
+            "3": 1677508096.0,
+            "4": 1677508096.0,
+            "5": 1677508096.0,
+            "6": 1677508096.0,
+            "7": 1677508096.0,
+            "8": 1677508096.0,
+            "9": 1677508096.0,
+            "10": 1677508096.0,
+            "11": 1677508096.0,
+            "12": 1677508096.0,
+            "13": 1677508096.0,
+            "14": 1677508096.0,
+            "15": 1677508096.0,
+            "16": 1677508096.0,
+            "17": 1677508096.0,
+            "18": 1677508096.0,
+            "19": 1677508096.0,
+            "20": 1677508096.0,
+            "21": 1677508096.0,
+            "22": 1677508096.0,
+            "23": 1677508096.0,
+            "24": 1677508096.0,
+            "25": 1677508096.0,
+            "26": 1677508096.0,
+            "27": 1677508096.0,
+            "28": 1677508096.0,
+            "29": 1677508096.0,
+            "30": 1677508096.0,
+            "31": 1677508096.0,
+            "32": 1677508096.0,
+            "33": 1677508096.0,
+            "34": 1677508096.0,
+            "35": 1677508096.0,
+            "36": 1677508096.0,
+            "37": 1677508096.0,
+            "38": 1677508096.0,
+            "39": 1677508096.0,
+            "40": 1677508096.0,
+            "41": 1677508096.0,
+            "42": 1677508096.0,
+            "43": 1677508096.0,
+            "44": 1677508096.0,
+            "45": 1677508096.0,
+            "46": 1677508096.0,
+            "47": 1677508096.0,
+            "48": 1677508096.0,
+            "49": 1677508096.0,
+            "50": 1677508096.0,
+            "51": 1677508096.0,
+            "52": 1677508096.0,
+            "53": 1677508096.0,
+            "54": 1677508096.0,
+            "55": 1677508096.0,
+            "56": 1677508096.0,
+            "57": 1677508096.0,
+            "58": 1677508096.0,
+            "59": 1677508096.0,
+            "60": 1677508096.0,
+            "61": 1677508096.0,
+            "62": 1677508096.0,
+            "63": 1677508096.0,
+            "64": 1677508096.0,
+            "65": 1677508096.0,
+            "66": 1677508096.0,
+            "67": 1677508096.0,
+            "68": 1677508096.0,
+            "69": 1677508096.0,
+            "70": 1677508096.0,
+            "71": 1677508096.0,
+            "72": 1677508096.0,
+            "73": 1677508096.0,
+            "74": 1677508096.0,
+            "75": 1677508096.0,
+            "76": 1677508096.0,
+            "77": 1677508096.0,
+            "78": 1677508096.0,
+            "79": 1677508096.0,
+            "80": 1677508096.0,
+            "81": 1677508096.0,
+            "82": 1677508096.0,
+            "83": 1677508096.0,
+            "84": 1677508096.0,
+            "85": 1677508096.0,
+            "86": 1677508096.0,
+            "87": 1677508096.0,
+            "88": 1677508096.0,
+            "89": 1677508096.0,
+            "90": 1677508096.0,
+            "91": 1677508096.0,
+            "92": 1677508096.0,
+            "93": 1677508096.0,
+            "94": 1677508096.0,
+            "95": 1677508096.0,
+            "96": 1677508096.0,
+            "97": 1677508096.0,
+            "98": 1677508096.0,
+            "99": 1677508096.0,
+            "100": 1677508096.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 5.74038,
+            "2": 0.18941,
+            "3": 0.17443,
+            "4": 0.17279,
+            "5": 0.17221,
+            "6": 0.17285,
+            "7": 0.17288,
+            "8": 0.17181,
+            "9": 0.168,
+            "10": 0.16829,
+            "11": 0.16793,
+            "12": 0.16785,
+            "13": 0.16712,
+            "14": 0.17584,
+            "15": 0.16768,
+            "16": 0.16694,
+            "17": 0.16606,
+            "18": 0.16539,
+            "19": 0.16505,
+            "20": 0.16548,
+            "21": 0.16486,
+            "22": 0.16481,
+            "23": 0.1694,
+            "24": 0.16549,
+            "25": 0.17312,
+            "26": 0.16555,
+            "27": 0.17362,
+            "28": 0.16604,
+            "29": 0.1708,
+            "30": 0.16752,
+            "31": 0.16804,
+            "32": 0.16714,
+            "33": 0.16525,
+            "34": 0.16728,
+            "35": 0.16428,
+            "36": 0.17043,
+            "37": 0.16532,
+            "38": 0.16525,
+            "39": 0.16555,
+            "40": 0.16531,
+            "41": 0.16563,
+            "42": 0.16563,
+            "43": 0.16499,
+            "44": 0.16632,
+            "45": 0.16537,
+            "46": 0.16563,
+            "47": 0.16529,
+            "48": 0.1657,
+            "49": 0.16462,
+            "50": 0.16554,
+            "51": 0.17653,
+            "52": 0.17191,
+            "53": 0.17217,
+            "54": 0.17273,
+            "55": 0.17175,
+            "56": 0.1728,
+            "57": 0.17152,
+            "58": 0.17298,
+            "59": 0.17049,
+            "60": 0.16917,
+            "61": 0.16777,
+            "62": 0.16726,
+            "63": 0.16773,
+            "64": 0.16713,
+            "65": 0.16836,
+            "66": 0.1678,
+            "67": 0.1679,
+            "68": 0.16772,
+            "69": 0.16815,
+            "70": 0.16816,
+            "71": 0.16728,
+            "72": 0.16778,
+            "73": 0.16743,
+            "74": 0.16783,
+            "75": 0.16824,
+            "76": 0.16759,
+            "77": 0.16828,
+            "78": 0.16749,
+            "79": 0.16757,
+            "80": 0.16781,
+            "81": 0.16773,
+            "82": 0.16701,
+            "83": 0.16756,
+            "84": 0.16842,
+            "85": 0.16707,
+            "86": 0.16752,
+            "87": 0.16807,
+            "88": 0.17901,
+            "89": 0.17301,
+            "90": 0.1702,
+            "91": 0.17493,
+            "92": 0.17105,
+            "93": 0.16969,
+            "94": 0.16931,
+            "95": 0.17031,
+            "96": 0.16969,
+            "97": 0.17032,
+            "98": 0.17059,
+            "99": 0.17,
+            "100": 0.17572
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_lts_dgx_a100.json
index 053ded61519..3be93706d81 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_lts_dgx_a100.json
@@ -1 +1,537 @@
-{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.88734, "5": 10.90948, "10": 10.87763, "15": 10.86371, "20": 10.7509, "25": 10.59917, "30": 10.40104, "35": 10.30793, "40": 10.10902, "45": 9.85831, "50": 9.92111, "55": 9.88529, "60": 9.50737, "65": 8.95828, "70": 9.72733, "75": 9.42571, "80": 9.40559, "85": 9.61572, "90": 9.81277, "95": 9.52119, "100": 9.40111}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1614.0, "5": 2021.0, "10": 1399.0, "15": 1872.0, "20": 1692.0, "25": 1784.0, "30": 1900.0, "35": 2081.0, "40": 2233.0, "45": 2144.0, "50": 2513.0, "55": 2493.0, "60": 2440.0, "65": 2763.0, "70": 3219.0, "75": 2601.0, "80": 3341.0, "85": 3433.0, "90": 3019.0, "95": 3417.0, "100": 3372.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 462455808.0, "5": 462455808.0, "10": 462455808.0, "15": 462455808.0, "20": 462455808.0, "25": 462455808.0, "30": 462455808.0, "35": 462455808.0, "40": 462455808.0, "45": 462455808.0, "50": 462455808.0, "55": 462455808.0, "60": 462455808.0, "65": 462455808.0, "70": 462455808.0, "75": 462455808.0, "80": 462455808.0, "85": 462455808.0, "90": 462455808.0, "95": 462455808.0, "100": 462455808.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 2529822720.0, "5": 2658249728.0, "10": 2658249728.0, "15": 2658249728.0, "20": 2658249728.0, "25": 2658249728.0, "30": 2658249728.0, "35": 2658249728.0, "40": 2658249728.0, "45": 2658249728.0, "50": 2658249728.0, "55": 2658249728.0, "60": 2658249728.0, "65": 2658249728.0, "70": 2658249728.0, "75": 2658249728.0, "80": 2658249728.0, "85": 2658249728.0, "90": 2658249728.0, "95": 2658249728.0, "100": 2658249728.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 11.76278, "5": 0.18711, "10": 0.18109, "15": 0.18221, "20": 0.18082, "25": 0.18031, "30": 0.18054, "35": 0.17683, "40": 0.17853, "45": 0.17695, "50": 0.17851, "55": 0.1773, "60": 0.17609, "65": 0.18072, "70": 0.18082, "75": 0.17652, "80": 0.17661, "85": 0.18002, "90": 0.17586, "95": 0.17539, "100": 0.17485}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.88765,
+            "2": 10.90324,
+            "3": 10.87803,
+            "4": 10.87967,
+            "5": 10.91013,
+            "6": 10.91377,
+            "7": 10.89285,
+            "8": 10.88408,
+            "9": 10.89749,
+            "10": 10.87762,
+            "11": 10.88984,
+            "12": 10.89502,
+            "13": 10.90738,
+            "14": 10.90831,
+            "15": 10.86285,
+            "16": 10.85944,
+            "17": 10.8221,
+            "18": 10.84343,
+            "19": 10.83663,
+            "20": 10.74694,
+            "21": 10.73044,
+            "22": 10.62623,
+            "23": 10.74379,
+            "24": 10.63229,
+            "25": 10.59789,
+            "26": 10.64585,
+            "27": 10.64828,
+            "28": 10.59539,
+            "29": 10.60615,
+            "30": 10.40161,
+            "31": 10.18116,
+            "32": 10.49112,
+            "33": 10.48508,
+            "34": 10.25199,
+            "35": 10.30784,
+            "36": 10.25604,
+            "37": 10.36576,
+            "38": 10.21673,
+            "39": 10.40989,
+            "40": 10.11047,
+            "41": 10.15883,
+            "42": 10.22753,
+            "43": 9.8745,
+            "44": 9.97803,
+            "45": 9.8591,
+            "46": 9.8546,
+            "47": 10.15264,
+            "48": 9.86489,
+            "49": 9.5555,
+            "50": 9.92088,
+            "51": 9.86094,
+            "52": 9.75697,
+            "53": 10.07633,
+            "54": 9.96082,
+            "55": 9.88565,
+            "56": 9.6349,
+            "57": 9.4925,
+            "58": 9.83099,
+            "59": 9.59122,
+            "60": 9.50798,
+            "61": 9.7061,
+            "62": 9.98413,
+            "63": 9.37604,
+            "64": 9.77938,
+            "65": 8.95852,
+            "66": 9.70596,
+            "67": 9.37402,
+            "68": 9.78683,
+            "69": 9.78932,
+            "70": 9.72766,
+            "71": 9.61135,
+            "72": 9.59178,
+            "73": 9.49896,
+            "74": 8.95742,
+            "75": 9.42469,
+            "76": 9.09651,
+            "77": 10.06653,
+            "78": 9.73149,
+            "79": 9.37959,
+            "80": 9.40394,
+            "81": 9.48277,
+            "82": 9.69318,
+            "83": 9.31104,
+            "84": 9.4139,
+            "85": 9.61469,
+            "86": 9.07793,
+            "87": 9.59662,
+            "88": 9.74827,
+            "89": 9.60196,
+            "90": 9.81239,
+            "91": 9.34524,
+            "92": 9.36524,
+            "93": 9.07745,
+            "94": 8.83182,
+            "95": 9.521,
+            "96": 9.52525,
+            "97": 9.31322,
+            "98": 9.677,
+            "99": 8.88904,
+            "100": 9.40063
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1753.0,
+            "2": 1863.0,
+            "3": 1781.0,
+            "4": 1850.0,
+            "5": 2021.0,
+            "6": 1987.0,
+            "7": 2006.0,
+            "8": 1657.0,
+            "9": 1961.0,
+            "10": 1491.0,
+            "11": 2010.0,
+            "12": 1846.0,
+            "13": 1934.0,
+            "14": 1954.0,
+            "15": 1942.0,
+            "16": 1969.0,
+            "17": 1936.0,
+            "18": 1836.0,
+            "19": 1841.0,
+            "20": 1684.0,
+            "21": 1958.0,
+            "22": 1782.0,
+            "23": 2112.0,
+            "24": 1651.0,
+            "25": 1713.0,
+            "26": 1878.0,
+            "27": 1837.0,
+            "28": 2025.0,
+            "29": 2115.0,
+            "30": 1867.0,
+            "31": 1622.0,
+            "32": 1939.0,
+            "33": 2167.0,
+            "34": 1867.0,
+            "35": 2035.0,
+            "36": 2086.0,
+            "37": 2477.0,
+            "38": 2296.0,
+            "39": 2505.0,
+            "40": 2150.0,
+            "41": 2315.0,
+            "42": 2239.0,
+            "43": 1941.0,
+            "44": 2169.0,
+            "45": 2172.0,
+            "46": 2231.0,
+            "47": 2604.0,
+            "48": 2429.0,
+            "49": 2289.0,
+            "50": 2529.0,
+            "51": 2742.0,
+            "52": 2671.0,
+            "53": 3066.0,
+            "54": 2782.0,
+            "55": 2510.0,
+            "56": 2874.0,
+            "57": 2304.0,
+            "58": 3111.0,
+            "59": 2862.0,
+            "60": 2374.0,
+            "61": 2977.0,
+            "62": 2740.0,
+            "63": 2394.0,
+            "64": 3232.0,
+            "65": 2720.0,
+            "66": 3277.0,
+            "67": 2810.0,
+            "68": 2830.0,
+            "69": 3094.0,
+            "70": 3327.0,
+            "71": 3106.0,
+            "72": 2261.0,
+            "73": 3147.0,
+            "74": 1902.0,
+            "75": 2545.0,
+            "76": 2905.0,
+            "77": 3468.0,
+            "78": 3432.0,
+            "79": 3336.0,
+            "80": 3434.0,
+            "81": 3605.0,
+            "82": 3269.0,
+            "83": 2891.0,
+            "84": 3343.0,
+            "85": 3501.0,
+            "86": 2786.0,
+            "87": 3872.0,
+            "88": 3019.0,
+            "89": 3407.0,
+            "90": 3023.0,
+            "91": 2630.0,
+            "92": 3186.0,
+            "93": 2746.0,
+            "94": 3526.0,
+            "95": 3414.0,
+            "96": 3546.0,
+            "97": 3339.0,
+            "98": 3758.0,
+            "99": 3058.0,
+            "100": 3454.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 462388736.0,
+            "2": 462388736.0,
+            "3": 462388736.0,
+            "4": 462388736.0,
+            "5": 462388736.0,
+            "6": 462388736.0,
+            "7": 462388736.0,
+            "8": 462388736.0,
+            "9": 462388736.0,
+            "10": 462388736.0,
+            "11": 462388736.0,
+            "12": 462388736.0,
+            "13": 462388736.0,
+            "14": 462388736.0,
+            "15": 462388736.0,
+            "16": 462388736.0,
+            "17": 462388736.0,
+            "18": 462388736.0,
+            "19": 462388736.0,
+            "20": 462388736.0,
+            "21": 462388736.0,
+            "22": 462388736.0,
+            "23": 462388736.0,
+            "24": 462388736.0,
+            "25": 462388736.0,
+            "26": 462388736.0,
+            "27": 462388736.0,
+            "28": 462388736.0,
+            "29": 462388736.0,
+            "30": 462388736.0,
+            "31": 462388736.0,
+            "32": 462388736.0,
+            "33": 462388736.0,
+            "34": 462388736.0,
+            "35": 462388736.0,
+            "36": 462388736.0,
+            "37": 462388736.0,
+            "38": 462388736.0,
+            "39": 462388736.0,
+            "40": 462388736.0,
+            "41": 462388736.0,
+            "42": 462388736.0,
+            "43": 462388736.0,
+            "44": 462388736.0,
+            "45": 462388736.0,
+            "46": 462388736.0,
+            "47": 462388736.0,
+            "48": 462388736.0,
+            "49": 462388736.0,
+            "50": 462388736.0,
+            "51": 462388736.0,
+            "52": 462388736.0,
+            "53": 462388736.0,
+            "54": 462388736.0,
+            "55": 462388736.0,
+            "56": 462388736.0,
+            "57": 462388736.0,
+            "58": 462388736.0,
+            "59": 462388736.0,
+            "60": 462388736.0,
+            "61": 462388736.0,
+            "62": 462388736.0,
+            "63": 462388736.0,
+            "64": 462388736.0,
+            "65": 462388736.0,
+            "66": 462388736.0,
+            "67": 462388736.0,
+            "68": 462388736.0,
+            "69": 462388736.0,
+            "70": 462388736.0,
+            "71": 462388736.0,
+            "72": 462388736.0,
+            "73": 462388736.0,
+            "74": 462388736.0,
+            "75": 462388736.0,
+            "76": 462388736.0,
+            "77": 462388736.0,
+            "78": 462388736.0,
+            "79": 462388736.0,
+            "80": 462388736.0,
+            "81": 462388736.0,
+            "82": 462388736.0,
+            "83": 462388736.0,
+            "84": 462388736.0,
+            "85": 462388736.0,
+            "86": 462388736.0,
+            "87": 462388736.0,
+            "88": 462388736.0,
+            "89": 462388736.0,
+            "90": 462388736.0,
+            "91": 462388736.0,
+            "92": 462388736.0,
+            "93": 462388736.0,
+            "94": 462388736.0,
+            "95": 462388736.0,
+            "96": 462388736.0,
+            "97": 462388736.0,
+            "98": 462388736.0,
+            "99": 462388736.0,
+            "100": 462388736.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1549081088.0,
+            "2": 1677508096.0,
+            "3": 1677508096.0,
+            "4": 1677508096.0,
+            "5": 1677508096.0,
+            "6": 1677508096.0,
+            "7": 1677508096.0,
+            "8": 1677508096.0,
+            "9": 1677508096.0,
+            "10": 1677508096.0,
+            "11": 1677508096.0,
+            "12": 1677508096.0,
+            "13": 1677508096.0,
+            "14": 1677508096.0,
+            "15": 1677508096.0,
+            "16": 1677508096.0,
+            "17": 1677508096.0,
+            "18": 1677508096.0,
+            "19": 1677508096.0,
+            "20": 1677508096.0,
+            "21": 1677508096.0,
+            "22": 1677508096.0,
+            "23": 1677508096.0,
+            "24": 1677508096.0,
+            "25": 1677508096.0,
+            "26": 1677508096.0,
+            "27": 1677508096.0,
+            "28": 1677508096.0,
+            "29": 1677508096.0,
+            "30": 1677508096.0,
+            "31": 1677508096.0,
+            "32": 1677508096.0,
+            "33": 1677508096.0,
+            "34": 1677508096.0,
+            "35": 1677508096.0,
+            "36": 1677508096.0,
+            "37": 1677508096.0,
+            "38": 1677508096.0,
+            "39": 1677508096.0,
+            "40": 1677508096.0,
+            "41": 1677508096.0,
+            "42": 1677508096.0,
+            "43": 1677508096.0,
+            "44": 1677508096.0,
+            "45": 1677508096.0,
+            "46": 1677508096.0,
+            "47": 1677508096.0,
+            "48": 1677508096.0,
+            "49": 1677508096.0,
+            "50": 1677508096.0,
+            "51": 1677508096.0,
+            "52": 1677508096.0,
+            "53": 1677508096.0,
+            "54": 1677508096.0,
+            "55": 1677508096.0,
+            "56": 1677508096.0,
+            "57": 1677508096.0,
+            "58": 1677508096.0,
+            "59": 1677508096.0,
+            "60": 1677508096.0,
+            "61": 1677508096.0,
+            "62": 1677508096.0,
+            "63": 1677508096.0,
+            "64": 1677508096.0,
+            "65": 1677508096.0,
+            "66": 1677508096.0,
+            "67": 1677508096.0,
+            "68": 1677508096.0,
+            "69": 1677508096.0,
+            "70": 1677508096.0,
+            "71": 1677508096.0,
+            "72": 1677508096.0,
+            "73": 1677508096.0,
+            "74": 1677508096.0,
+            "75": 1677508096.0,
+            "76": 1677508096.0,
+            "77": 1677508096.0,
+            "78": 1677508096.0,
+            "79": 1677508096.0,
+            "80": 1677508096.0,
+            "81": 1677508096.0,
+            "82": 1677508096.0,
+            "83": 1677508096.0,
+            "84": 1677508096.0,
+            "85": 1677508096.0,
+            "86": 1677508096.0,
+            "87": 1677508096.0,
+            "88": 1677508096.0,
+            "89": 1677508096.0,
+            "90": 1677508096.0,
+            "91": 1677508096.0,
+            "92": 1677508096.0,
+            "93": 1677508096.0,
+            "94": 1677508096.0,
+            "95": 1677508096.0,
+            "96": 1677508096.0,
+            "97": 1677508096.0,
+            "98": 1677508096.0,
+            "99": 1677508096.0,
+            "100": 1677508096.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 9.17153,
+            "2": 0.2103,
+            "3": 0.21541,
+            "4": 0.21948,
+            "5": 0.17282,
+            "6": 0.16921,
+            "7": 0.1711,
+            "8": 0.16967,
+            "9": 0.17064,
+            "10": 0.16972,
+            "11": 0.1696,
+            "12": 0.1701,
+            "13": 0.16923,
+            "14": 0.16942,
+            "15": 0.16782,
+            "16": 0.17,
+            "17": 0.16748,
+            "18": 0.16821,
+            "19": 0.16739,
+            "20": 0.16883,
+            "21": 0.16894,
+            "22": 0.16847,
+            "23": 0.16846,
+            "24": 0.16887,
+            "25": 0.16905,
+            "26": 0.16873,
+            "27": 0.16876,
+            "28": 0.16868,
+            "29": 0.1706,
+            "30": 0.17379,
+            "31": 0.17109,
+            "32": 0.17107,
+            "33": 0.17072,
+            "34": 0.17137,
+            "35": 0.17105,
+            "36": 0.17106,
+            "37": 0.17077,
+            "38": 0.17115,
+            "39": 0.17067,
+            "40": 0.17057,
+            "41": 0.17099,
+            "42": 0.17074,
+            "43": 0.17091,
+            "44": 0.17078,
+            "45": 0.17104,
+            "46": 0.17055,
+            "47": 0.17137,
+            "48": 0.17086,
+            "49": 0.17081,
+            "50": 0.17053,
+            "51": 0.17448,
+            "52": 0.16607,
+            "53": 0.16686,
+            "54": 0.16608,
+            "55": 0.16654,
+            "56": 0.16591,
+            "57": 0.16614,
+            "58": 0.1659,
+            "59": 0.16577,
+            "60": 0.16589,
+            "61": 0.16557,
+            "62": 0.16528,
+            "63": 0.16612,
+            "64": 0.1658,
+            "65": 0.16543,
+            "66": 0.1651,
+            "67": 0.16559,
+            "68": 0.16502,
+            "69": 0.16533,
+            "70": 0.16636,
+            "71": 0.16516,
+            "72": 0.1657,
+            "73": 0.1656,
+            "74": 0.16521,
+            "75": 0.16623,
+            "76": 0.16628,
+            "77": 0.16593,
+            "78": 0.16615,
+            "79": 0.1658,
+            "80": 0.16904,
+            "81": 0.16665,
+            "82": 0.16575,
+            "83": 0.16623,
+            "84": 0.16603,
+            "85": 0.16577,
+            "86": 0.16568,
+            "87": 0.16525,
+            "88": 0.16531,
+            "89": 0.16616,
+            "90": 0.16544,
+            "91": 0.16581,
+            "92": 0.16545,
+            "93": 0.16603,
+            "94": 0.16501,
+            "95": 0.16632,
+            "96": 0.16545,
+            "97": 0.16577,
+            "98": 0.19996,
+            "99": 0.19154,
+            "100": 0.19156
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_lts.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_lts.json
new file mode 100644
index 00000000000..1cf5408b8ba
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_lts.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.92228,
+            "2": 10.92833,
+            "3": 10.91713,
+            "4": 10.90495,
+            "5": 10.92807,
+            "6": 10.93673,
+            "7": 10.90406,
+            "8": 10.92229,
+            "9": 10.91254,
+            "10": 10.90852,
+            "11": 10.89335,
+            "12": 10.9208,
+            "13": 10.91496,
+            "14": 10.92145,
+            "15": 10.88435,
+            "16": 10.87455,
+            "17": 10.83916,
+            "18": 10.87309,
+            "19": 10.85325,
+            "20": 10.77493,
+            "21": 10.74754,
+            "22": 10.63148,
+            "23": 10.75622,
+            "24": 10.65569,
+            "25": 10.59216,
+            "26": 10.65327,
+            "27": 10.64876,
+            "28": 10.59656,
+            "29": 10.61011,
+            "30": 10.39286,
+            "31": 10.15725,
+            "32": 10.49217,
+            "33": 10.47941,
+            "34": 10.24014,
+            "35": 10.2971,
+            "36": 10.24566,
+            "37": 10.35283,
+            "38": 10.20534,
+            "39": 10.40418,
+            "40": 10.09553,
+            "41": 10.15279,
+            "42": 10.2188,
+            "43": 9.85527,
+            "44": 9.96244,
+            "45": 9.84613,
+            "46": 9.83799,
+            "47": 10.13884,
+            "48": 9.85689,
+            "49": 9.53747,
+            "50": 9.90876,
+            "51": 9.84971,
+            "52": 9.74156,
+            "53": 10.06322,
+            "54": 9.94581,
+            "55": 9.87731,
+            "56": 9.62746,
+            "57": 9.47259,
+            "58": 9.82912,
+            "59": 9.583,
+            "60": 9.49181,
+            "61": 9.69961,
+            "62": 9.98089,
+            "63": 9.37212,
+            "64": 9.7756,
+            "65": 8.9433,
+            "66": 9.69993,
+            "67": 9.36414,
+            "68": 9.78706,
+            "69": 9.78397,
+            "70": 9.72288,
+            "71": 9.60749,
+            "72": 9.58416,
+            "73": 9.49093,
+            "74": 8.94864,
+            "75": 9.41807,
+            "76": 9.08721,
+            "77": 10.06283,
+            "78": 9.729,
+            "79": 9.37091,
+            "80": 9.40033,
+            "81": 9.47754,
+            "82": 9.69121,
+            "83": 9.30762,
+            "84": 9.41252,
+            "85": 9.61132,
+            "86": 9.07621,
+            "87": 9.59459,
+            "88": 9.74768,
+            "89": 9.6068,
+            "90": 9.81078,
+            "91": 9.34441,
+            "92": 9.36535,
+            "93": 9.07743,
+            "94": 8.82975,
+            "95": 9.51676,
+            "96": 9.52546,
+            "97": 9.31031,
+            "98": 9.67812,
+            "99": 8.88848,
+            "100": 9.40128
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1681.0,
+            "2": 1822.0,
+            "3": 1757.0,
+            "4": 1751.0,
+            "5": 1937.0,
+            "6": 1886.0,
+            "7": 1837.0,
+            "8": 1786.0,
+            "9": 1935.0,
+            "10": 1542.0,
+            "11": 1896.0,
+            "12": 1808.0,
+            "13": 1960.0,
+            "14": 1846.0,
+            "15": 1948.0,
+            "16": 1903.0,
+            "17": 1900.0,
+            "18": 1725.0,
+            "19": 1812.0,
+            "20": 1647.0,
+            "21": 1886.0,
+            "22": 1675.0,
+            "23": 1992.0,
+            "24": 1621.0,
+            "25": 1624.0,
+            "26": 1778.0,
+            "27": 1925.0,
+            "28": 1996.0,
+            "29": 2074.0,
+            "30": 1899.0,
+            "31": 1539.0,
+            "32": 1956.0,
+            "33": 2254.0,
+            "34": 1927.0,
+            "35": 2029.0,
+            "36": 1986.0,
+            "37": 2377.0,
+            "38": 2234.0,
+            "39": 2396.0,
+            "40": 2123.0,
+            "41": 2316.0,
+            "42": 2245.0,
+            "43": 2077.0,
+            "44": 2179.0,
+            "45": 2078.0,
+            "46": 2280.0,
+            "47": 2573.0,
+            "48": 2440.0,
+            "49": 2213.0,
+            "50": 2532.0,
+            "51": 2735.0,
+            "52": 2607.0,
+            "53": 2951.0,
+            "54": 2672.0,
+            "55": 2451.0,
+            "56": 2712.0,
+            "57": 2392.0,
+            "58": 2979.0,
+            "59": 2869.0,
+            "60": 2435.0,
+            "61": 2938.0,
+            "62": 2669.0,
+            "63": 2392.0,
+            "64": 2998.0,
+            "65": 2689.0,
+            "66": 3285.0,
+            "67": 2782.0,
+            "68": 2753.0,
+            "69": 2958.0,
+            "70": 3271.0,
+            "71": 3040.0,
+            "72": 2504.0,
+            "73": 3096.0,
+            "74": 1910.0,
+            "75": 2617.0,
+            "76": 3081.0,
+            "77": 3390.0,
+            "78": 3186.0,
+            "79": 3320.0,
+            "80": 3483.0,
+            "81": 3782.0,
+            "82": 3516.0,
+            "83": 2864.0,
+            "84": 3396.0,
+            "85": 3247.0,
+            "86": 2785.0,
+            "87": 3762.0,
+            "88": 3102.0,
+            "89": 3483.0,
+            "90": 3076.0,
+            "91": 2643.0,
+            "92": 3198.0,
+            "93": 2666.0,
+            "94": 3390.0,
+            "95": 3410.0,
+            "96": 3508.0,
+            "97": 3178.0,
+            "98": 3865.0,
+            "99": 3143.0,
+            "100": 3357.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 487096832.0,
+            "2": 487096832.0,
+            "3": 487096832.0,
+            "4": 487096832.0,
+            "5": 487096832.0,
+            "6": 487096832.0,
+            "7": 487096832.0,
+            "8": 487096832.0,
+            "9": 487096832.0,
+            "10": 487096832.0,
+            "11": 487096832.0,
+            "12": 487096832.0,
+            "13": 487096832.0,
+            "14": 487096832.0,
+            "15": 487096832.0,
+            "16": 487096832.0,
+            "17": 487096832.0,
+            "18": 487096832.0,
+            "19": 487096832.0,
+            "20": 487096832.0,
+            "21": 487096832.0,
+            "22": 487096832.0,
+            "23": 487096832.0,
+            "24": 487096832.0,
+            "25": 487096832.0,
+            "26": 487096832.0,
+            "27": 487096832.0,
+            "28": 487096832.0,
+            "29": 487096832.0,
+            "30": 487096832.0,
+            "31": 487096832.0,
+            "32": 487096832.0,
+            "33": 487096832.0,
+            "34": 487096832.0,
+            "35": 487096832.0,
+            "36": 487096832.0,
+            "37": 487096832.0,
+            "38": 487096832.0,
+            "39": 487096832.0,
+            "40": 487096832.0,
+            "41": 487096832.0,
+            "42": 487096832.0,
+            "43": 487096832.0,
+            "44": 487096832.0,
+            "45": 487096832.0,
+            "46": 487096832.0,
+            "47": 487096832.0,
+            "48": 487096832.0,
+            "49": 487096832.0,
+            "50": 487096832.0,
+            "51": 487096832.0,
+            "52": 487096832.0,
+            "53": 487096832.0,
+            "54": 487096832.0,
+            "55": 487096832.0,
+            "56": 487096832.0,
+            "57": 487096832.0,
+            "58": 487096832.0,
+            "59": 487096832.0,
+            "60": 487096832.0,
+            "61": 487096832.0,
+            "62": 487096832.0,
+            "63": 487096832.0,
+            "64": 487096832.0,
+            "65": 487096832.0,
+            "66": 487096832.0,
+            "67": 487096832.0,
+            "68": 487096832.0,
+            "69": 487096832.0,
+            "70": 487096832.0,
+            "71": 487096832.0,
+            "72": 487096832.0,
+            "73": 487096832.0,
+            "74": 487096832.0,
+            "75": 487096832.0,
+            "76": 487096832.0,
+            "77": 487096832.0,
+            "78": 487096832.0,
+            "79": 487096832.0,
+            "80": 487096832.0,
+            "81": 487096832.0,
+            "82": 487096832.0,
+            "83": 487096832.0,
+            "84": 487096832.0,
+            "85": 487096832.0,
+            "86": 487096832.0,
+            "87": 487096832.0,
+            "88": 487096832.0,
+            "89": 487096832.0,
+            "90": 487096832.0,
+            "91": 487096832.0,
+            "92": 487096832.0,
+            "93": 487096832.0,
+            "94": 487096832.0,
+            "95": 487096832.0,
+            "96": 487096832.0,
+            "97": 487096832.0,
+            "98": 487096832.0,
+            "99": 487096832.0,
+            "100": 487096832.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1228699136.0,
+            "2": 1409821184.0,
+            "3": 1409821184.0,
+            "4": 1409821184.0,
+            "5": 1409821184.0,
+            "6": 1409821184.0,
+            "7": 1409821184.0,
+            "8": 1409821184.0,
+            "9": 1409821184.0,
+            "10": 1409821184.0,
+            "11": 1409821184.0,
+            "12": 1409821184.0,
+            "13": 1409821184.0,
+            "14": 1409821184.0,
+            "15": 1409821184.0,
+            "16": 1409821184.0,
+            "17": 1409821184.0,
+            "18": 1409821184.0,
+            "19": 1409821184.0,
+            "20": 1409821184.0,
+            "21": 1409821184.0,
+            "22": 1409821184.0,
+            "23": 1409821184.0,
+            "24": 1409821184.0,
+            "25": 1409821184.0,
+            "26": 1409821184.0,
+            "27": 1409821184.0,
+            "28": 1409821184.0,
+            "29": 1409821184.0,
+            "30": 1409821184.0,
+            "31": 1409821184.0,
+            "32": 1409821184.0,
+            "33": 1409821184.0,
+            "34": 1409821184.0,
+            "35": 1409821184.0,
+            "36": 1409821184.0,
+            "37": 1409821184.0,
+            "38": 1409821184.0,
+            "39": 1409821184.0,
+            "40": 1409821184.0,
+            "41": 1409821184.0,
+            "42": 1409821184.0,
+            "43": 1409821184.0,
+            "44": 1409821184.0,
+            "45": 1409821184.0,
+            "46": 1409821184.0,
+            "47": 1409821184.0,
+            "48": 1409821184.0,
+            "49": 1409821184.0,
+            "50": 1409821184.0,
+            "51": 1409821184.0,
+            "52": 1409821184.0,
+            "53": 1409821184.0,
+            "54": 1409821184.0,
+            "55": 1409821184.0,
+            "56": 1409821184.0,
+            "57": 1409821184.0,
+            "58": 1409821184.0,
+            "59": 1409821184.0,
+            "60": 1409821184.0,
+            "61": 1409821184.0,
+            "62": 1409821184.0,
+            "63": 1409821184.0,
+            "64": 1409821184.0,
+            "65": 1409821184.0,
+            "66": 1409821184.0,
+            "67": 1409821184.0,
+            "68": 1409821184.0,
+            "69": 1409821184.0,
+            "70": 1409821184.0,
+            "71": 1409821184.0,
+            "72": 1409821184.0,
+            "73": 1409821184.0,
+            "74": 1409821184.0,
+            "75": 1409821184.0,
+            "76": 1409821184.0,
+            "77": 1409821184.0,
+            "78": 1409821184.0,
+            "79": 1409821184.0,
+            "80": 1409821184.0,
+            "81": 1409821184.0,
+            "82": 1409821184.0,
+            "83": 1409821184.0,
+            "84": 1409821184.0,
+            "85": 1409821184.0,
+            "86": 1409821184.0,
+            "87": 1409821184.0,
+            "88": 1409821184.0,
+            "89": 1409821184.0,
+            "90": 1409821184.0,
+            "91": 1409821184.0,
+            "92": 1409821184.0,
+            "93": 1409821184.0,
+            "94": 1409821184.0,
+            "95": 1409821184.0,
+            "96": 1409821184.0,
+            "97": 1409821184.0,
+            "98": 1409821184.0,
+            "99": 1409821184.0,
+            "100": 1409821184.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 8.7942,
+            "2": 0.22889,
+            "3": 0.19794,
+            "4": 0.19876,
+            "5": 0.19802,
+            "6": 0.19732,
+            "7": 0.19829,
+            "8": 0.19869,
+            "9": 0.20221,
+            "10": 0.19735,
+            "11": 0.1983,
+            "12": 0.19776,
+            "13": 0.19772,
+            "14": 0.19813,
+            "15": 0.20156,
+            "16": 0.19951,
+            "17": 0.19694,
+            "18": 0.19906,
+            "19": 0.19681,
+            "20": 0.19852,
+            "21": 0.20083,
+            "22": 0.19703,
+            "23": 0.19864,
+            "24": 0.1981,
+            "25": 0.19771,
+            "26": 0.19763,
+            "27": 0.19879,
+            "28": 0.20245,
+            "29": 0.19781,
+            "30": 0.19864,
+            "31": 0.19901,
+            "32": 0.19832,
+            "33": 0.19887,
+            "34": 0.20272,
+            "35": 0.19918,
+            "36": 0.19934,
+            "37": 0.19832,
+            "38": 0.19851,
+            "39": 0.19719,
+            "40": 0.20496,
+            "41": 0.19802,
+            "42": 0.19778,
+            "43": 0.19799,
+            "44": 0.19797,
+            "45": 0.21191,
+            "46": 0.20272,
+            "47": 0.20241,
+            "48": 0.202,
+            "49": 0.20032,
+            "50": 0.20198,
+            "51": 0.21717,
+            "52": 0.21085,
+            "53": 0.20963,
+            "54": 0.20989,
+            "55": 0.20877,
+            "56": 0.20983,
+            "57": 0.21317,
+            "58": 0.21422,
+            "59": 0.2112,
+            "60": 0.21308,
+            "61": 0.21283,
+            "62": 0.21356,
+            "63": 0.21093,
+            "64": 0.20492,
+            "65": 0.20132,
+            "66": 0.20054,
+            "67": 0.19914,
+            "68": 0.19918,
+            "69": 0.19966,
+            "70": 0.19998,
+            "71": 0.19998,
+            "72": 0.19839,
+            "73": 0.19973,
+            "74": 0.19856,
+            "75": 0.19805,
+            "76": 0.19968,
+            "77": 0.19757,
+            "78": 0.19789,
+            "79": 0.19675,
+            "80": 0.1975,
+            "81": 0.19765,
+            "82": 0.19742,
+            "83": 0.19785,
+            "84": 0.19665,
+            "85": 0.19764,
+            "86": 0.19704,
+            "87": 0.19705,
+            "88": 0.19866,
+            "89": 0.19749,
+            "90": 0.1966,
+            "91": 0.19783,
+            "92": 0.19848,
+            "93": 0.19735,
+            "94": 0.19683,
+            "95": 0.19659,
+            "96": 0.19746,
+            "97": 0.19792,
+            "98": 0.19802,
+            "99": 0.19675,
+            "100": 0.19692
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_lts_dgx_a100.json
index b47c82b17f0..16e4a038563 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_lts_dgx_a100.json
@@ -2,141 +2,536 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 10.92705,
-            "5": 10.92795,
-            "10": 10.90786,
-            "15": 10.88314,
-            "20": 10.77629,
-            "25": 10.59141,
-            "30": 10.39192,
-            "35": 10.29686,
-            "40": 10.0964,
-            "45": 9.84464,
-            "50": 9.90918,
-            "55": 9.87762,
-            "60": 9.49121,
-            "65": 8.94236,
-            "70": 9.72266,
-            "75": 9.41912,
-            "80": 9.40077,
-            "85": 9.61207,
-            "90": 9.81015,
-            "95": 9.51716,
-            "100": 9.4015
+            "1": 10.92228,
+            "2": 10.92833,
+            "3": 10.91713,
+            "4": 10.90495,
+            "5": 10.92807,
+            "6": 10.93673,
+            "7": 10.90406,
+            "8": 10.92229,
+            "9": 10.91254,
+            "10": 10.90852,
+            "11": 10.89335,
+            "12": 10.9208,
+            "13": 10.91496,
+            "14": 10.92145,
+            "15": 10.88435,
+            "16": 10.87455,
+            "17": 10.83916,
+            "18": 10.87309,
+            "19": 10.85325,
+            "20": 10.77493,
+            "21": 10.74754,
+            "22": 10.63148,
+            "23": 10.75622,
+            "24": 10.65569,
+            "25": 10.59216,
+            "26": 10.65327,
+            "27": 10.64876,
+            "28": 10.59656,
+            "29": 10.61011,
+            "30": 10.39286,
+            "31": 10.15725,
+            "32": 10.49217,
+            "33": 10.47941,
+            "34": 10.24014,
+            "35": 10.2971,
+            "36": 10.24566,
+            "37": 10.35283,
+            "38": 10.20534,
+            "39": 10.40418,
+            "40": 10.09553,
+            "41": 10.15279,
+            "42": 10.2188,
+            "43": 9.85527,
+            "44": 9.96244,
+            "45": 9.84613,
+            "46": 9.83799,
+            "47": 10.13884,
+            "48": 9.85689,
+            "49": 9.53747,
+            "50": 9.90876,
+            "51": 9.84971,
+            "52": 9.74156,
+            "53": 10.06322,
+            "54": 9.94581,
+            "55": 9.87731,
+            "56": 9.62746,
+            "57": 9.47259,
+            "58": 9.82912,
+            "59": 9.583,
+            "60": 9.49181,
+            "61": 9.69961,
+            "62": 9.98089,
+            "63": 9.37212,
+            "64": 9.7756,
+            "65": 8.9433,
+            "66": 9.69993,
+            "67": 9.36414,
+            "68": 9.78706,
+            "69": 9.78397,
+            "70": 9.72288,
+            "71": 9.60749,
+            "72": 9.58416,
+            "73": 9.49093,
+            "74": 8.94864,
+            "75": 9.41807,
+            "76": 9.08721,
+            "77": 10.06283,
+            "78": 9.729,
+            "79": 9.37091,
+            "80": 9.40033,
+            "81": 9.47754,
+            "82": 9.69121,
+            "83": 9.30762,
+            "84": 9.41252,
+            "85": 9.61132,
+            "86": 9.07621,
+            "87": 9.59459,
+            "88": 9.74768,
+            "89": 9.6068,
+            "90": 9.81078,
+            "91": 9.34441,
+            "92": 9.36535,
+            "93": 9.07743,
+            "94": 8.82975,
+            "95": 9.51676,
+            "96": 9.52546,
+            "97": 9.31031,
+            "98": 9.67812,
+            "99": 8.88848,
+            "100": 9.40128
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 1627.0,
-            "5": 2010.0,
-            "10": 1368.0,
-            "15": 1897.0,
-            "20": 1626.0,
-            "25": 1743.0,
-            "30": 1930.0,
-            "35": 1954.0,
-            "40": 2199.0,
-            "45": 2068.0,
-            "50": 2460.0,
-            "55": 2427.0,
-            "60": 2380.0,
-            "65": 2657.0,
-            "70": 3265.0,
-            "75": 2675.0,
-            "80": 3434.0,
-            "85": 3302.0,
-            "90": 3230.0,
-            "95": 3340.0,
-            "100": 3291.0
+            "1": 1681.0,
+            "2": 1822.0,
+            "3": 1757.0,
+            "4": 1751.0,
+            "5": 1937.0,
+            "6": 1886.0,
+            "7": 1837.0,
+            "8": 1786.0,
+            "9": 1935.0,
+            "10": 1542.0,
+            "11": 1896.0,
+            "12": 1808.0,
+            "13": 1960.0,
+            "14": 1846.0,
+            "15": 1948.0,
+            "16": 1903.0,
+            "17": 1900.0,
+            "18": 1725.0,
+            "19": 1812.0,
+            "20": 1647.0,
+            "21": 1886.0,
+            "22": 1675.0,
+            "23": 1992.0,
+            "24": 1621.0,
+            "25": 1624.0,
+            "26": 1778.0,
+            "27": 1925.0,
+            "28": 1996.0,
+            "29": 2074.0,
+            "30": 1899.0,
+            "31": 1539.0,
+            "32": 1956.0,
+            "33": 2254.0,
+            "34": 1927.0,
+            "35": 2029.0,
+            "36": 1986.0,
+            "37": 2377.0,
+            "38": 2234.0,
+            "39": 2396.0,
+            "40": 2123.0,
+            "41": 2316.0,
+            "42": 2245.0,
+            "43": 2077.0,
+            "44": 2179.0,
+            "45": 2078.0,
+            "46": 2280.0,
+            "47": 2573.0,
+            "48": 2440.0,
+            "49": 2213.0,
+            "50": 2532.0,
+            "51": 2735.0,
+            "52": 2607.0,
+            "53": 2951.0,
+            "54": 2672.0,
+            "55": 2451.0,
+            "56": 2712.0,
+            "57": 2392.0,
+            "58": 2979.0,
+            "59": 2869.0,
+            "60": 2435.0,
+            "61": 2938.0,
+            "62": 2669.0,
+            "63": 2392.0,
+            "64": 2998.0,
+            "65": 2689.0,
+            "66": 3285.0,
+            "67": 2782.0,
+            "68": 2753.0,
+            "69": 2958.0,
+            "70": 3271.0,
+            "71": 3040.0,
+            "72": 2504.0,
+            "73": 3096.0,
+            "74": 1910.0,
+            "75": 2617.0,
+            "76": 3081.0,
+            "77": 3390.0,
+            "78": 3186.0,
+            "79": 3320.0,
+            "80": 3483.0,
+            "81": 3782.0,
+            "82": 3516.0,
+            "83": 2864.0,
+            "84": 3396.0,
+            "85": 3247.0,
+            "86": 2785.0,
+            "87": 3762.0,
+            "88": 3102.0,
+            "89": 3483.0,
+            "90": 3076.0,
+            "91": 2643.0,
+            "92": 3198.0,
+            "93": 2666.0,
+            "94": 3390.0,
+            "95": 3410.0,
+            "96": 3508.0,
+            "97": 3178.0,
+            "98": 3865.0,
+            "99": 3143.0,
+            "100": 3357.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 487096320.0,
-            "5": 487096320.0,
-            "10": 487096320.0,
-            "15": 487096320.0,
-            "20": 487096320.0,
-            "25": 487096320.0,
-            "30": 487096320.0,
-            "35": 487096320.0,
-            "40": 487096320.0,
-            "45": 487096320.0,
-            "50": 487096320.0,
-            "55": 487096320.0,
-            "60": 487096320.0,
-            "65": 487096320.0,
-            "70": 487096320.0,
-            "75": 487096320.0,
-            "80": 487096320.0,
-            "85": 487096320.0,
-            "90": 487096320.0,
-            "95": 487096320.0,
-            "100": 487096320.0
+            "1": 487096832.0,
+            "2": 487096832.0,
+            "3": 487096832.0,
+            "4": 487096832.0,
+            "5": 487096832.0,
+            "6": 487096832.0,
+            "7": 487096832.0,
+            "8": 487096832.0,
+            "9": 487096832.0,
+            "10": 487096832.0,
+            "11": 487096832.0,
+            "12": 487096832.0,
+            "13": 487096832.0,
+            "14": 487096832.0,
+            "15": 487096832.0,
+            "16": 487096832.0,
+            "17": 487096832.0,
+            "18": 487096832.0,
+            "19": 487096832.0,
+            "20": 487096832.0,
+            "21": 487096832.0,
+            "22": 487096832.0,
+            "23": 487096832.0,
+            "24": 487096832.0,
+            "25": 487096832.0,
+            "26": 487096832.0,
+            "27": 487096832.0,
+            "28": 487096832.0,
+            "29": 487096832.0,
+            "30": 487096832.0,
+            "31": 487096832.0,
+            "32": 487096832.0,
+            "33": 487096832.0,
+            "34": 487096832.0,
+            "35": 487096832.0,
+            "36": 487096832.0,
+            "37": 487096832.0,
+            "38": 487096832.0,
+            "39": 487096832.0,
+            "40": 487096832.0,
+            "41": 487096832.0,
+            "42": 487096832.0,
+            "43": 487096832.0,
+            "44": 487096832.0,
+            "45": 487096832.0,
+            "46": 487096832.0,
+            "47": 487096832.0,
+            "48": 487096832.0,
+            "49": 487096832.0,
+            "50": 487096832.0,
+            "51": 487096832.0,
+            "52": 487096832.0,
+            "53": 487096832.0,
+            "54": 487096832.0,
+            "55": 487096832.0,
+            "56": 487096832.0,
+            "57": 487096832.0,
+            "58": 487096832.0,
+            "59": 487096832.0,
+            "60": 487096832.0,
+            "61": 487096832.0,
+            "62": 487096832.0,
+            "63": 487096832.0,
+            "64": 487096832.0,
+            "65": 487096832.0,
+            "66": 487096832.0,
+            "67": 487096832.0,
+            "68": 487096832.0,
+            "69": 487096832.0,
+            "70": 487096832.0,
+            "71": 487096832.0,
+            "72": 487096832.0,
+            "73": 487096832.0,
+            "74": 487096832.0,
+            "75": 487096832.0,
+            "76": 487096832.0,
+            "77": 487096832.0,
+            "78": 487096832.0,
+            "79": 487096832.0,
+            "80": 487096832.0,
+            "81": 487096832.0,
+            "82": 487096832.0,
+            "83": 487096832.0,
+            "84": 487096832.0,
+            "85": 487096832.0,
+            "86": 487096832.0,
+            "87": 487096832.0,
+            "88": 487096832.0,
+            "89": 487096832.0,
+            "90": 487096832.0,
+            "91": 487096832.0,
+            "92": 487096832.0,
+            "93": 487096832.0,
+            "94": 487096832.0,
+            "95": 487096832.0,
+            "96": 487096832.0,
+            "97": 487096832.0,
+            "98": 487096832.0,
+            "99": 487096832.0,
+            "100": 487096832.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 1719035904.0,
-            "5": 1900157952.0,
-            "10": 1901729792.0,
-            "15": 1901729792.0,
-            "20": 1901729792.0,
-            "25": 1901729792.0,
-            "30": 1901729792.0,
-            "35": 1901729792.0,
-            "40": 1901729792.0,
-            "45": 1901729792.0,
-            "50": 1901729792.0,
-            "55": 1901729792.0,
-            "60": 1901729792.0,
-            "65": 1901729792.0,
-            "70": 1901729792.0,
-            "75": 1901729792.0,
-            "80": 1901729792.0,
-            "85": 1901729792.0,
-            "90": 1901729792.0,
-            "95": 1901729792.0,
-            "100": 1901729792.0
+            "1": 1228699136.0,
+            "2": 1409821184.0,
+            "3": 1409821184.0,
+            "4": 1409821184.0,
+            "5": 1409821184.0,
+            "6": 1409821184.0,
+            "7": 1409821184.0,
+            "8": 1409821184.0,
+            "9": 1409821184.0,
+            "10": 1409821184.0,
+            "11": 1409821184.0,
+            "12": 1409821184.0,
+            "13": 1409821184.0,
+            "14": 1409821184.0,
+            "15": 1409821184.0,
+            "16": 1409821184.0,
+            "17": 1409821184.0,
+            "18": 1409821184.0,
+            "19": 1409821184.0,
+            "20": 1409821184.0,
+            "21": 1409821184.0,
+            "22": 1409821184.0,
+            "23": 1409821184.0,
+            "24": 1409821184.0,
+            "25": 1409821184.0,
+            "26": 1409821184.0,
+            "27": 1409821184.0,
+            "28": 1409821184.0,
+            "29": 1409821184.0,
+            "30": 1409821184.0,
+            "31": 1409821184.0,
+            "32": 1409821184.0,
+            "33": 1409821184.0,
+            "34": 1409821184.0,
+            "35": 1409821184.0,
+            "36": 1409821184.0,
+            "37": 1409821184.0,
+            "38": 1409821184.0,
+            "39": 1409821184.0,
+            "40": 1409821184.0,
+            "41": 1409821184.0,
+            "42": 1409821184.0,
+            "43": 1409821184.0,
+            "44": 1409821184.0,
+            "45": 1409821184.0,
+            "46": 1409821184.0,
+            "47": 1409821184.0,
+            "48": 1409821184.0,
+            "49": 1409821184.0,
+            "50": 1409821184.0,
+            "51": 1409821184.0,
+            "52": 1409821184.0,
+            "53": 1409821184.0,
+            "54": 1409821184.0,
+            "55": 1409821184.0,
+            "56": 1409821184.0,
+            "57": 1409821184.0,
+            "58": 1409821184.0,
+            "59": 1409821184.0,
+            "60": 1409821184.0,
+            "61": 1409821184.0,
+            "62": 1409821184.0,
+            "63": 1409821184.0,
+            "64": 1409821184.0,
+            "65": 1409821184.0,
+            "66": 1409821184.0,
+            "67": 1409821184.0,
+            "68": 1409821184.0,
+            "69": 1409821184.0,
+            "70": 1409821184.0,
+            "71": 1409821184.0,
+            "72": 1409821184.0,
+            "73": 1409821184.0,
+            "74": 1409821184.0,
+            "75": 1409821184.0,
+            "76": 1409821184.0,
+            "77": 1409821184.0,
+            "78": 1409821184.0,
+            "79": 1409821184.0,
+            "80": 1409821184.0,
+            "81": 1409821184.0,
+            "82": 1409821184.0,
+            "83": 1409821184.0,
+            "84": 1409821184.0,
+            "85": 1409821184.0,
+            "86": 1409821184.0,
+            "87": 1409821184.0,
+            "88": 1409821184.0,
+            "89": 1409821184.0,
+            "90": 1409821184.0,
+            "91": 1409821184.0,
+            "92": 1409821184.0,
+            "93": 1409821184.0,
+            "94": 1409821184.0,
+            "95": 1409821184.0,
+            "96": 1409821184.0,
+            "97": 1409821184.0,
+            "98": 1409821184.0,
+            "99": 1409821184.0,
+            "100": 1409821184.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 14.4466,
-            "5": 0.23058,
-            "10": 0.23299,
-            "15": 0.23075,
-            "20": 0.23163,
-            "25": 0.23303,
-            "30": 0.22991,
-            "35": 0.22977,
-            "40": 0.22747,
-            "45": 0.22974,
-            "50": 0.52211,
-            "55": 0.23173,
-            "60": 0.23326,
-            "65": 0.23498,
-            "70": 0.23013,
-            "75": 0.22876,
-            "80": 0.22947,
-            "85": 0.22706,
-            "90": 0.22612,
-            "95": 0.22523,
-            "100": 0.22404
+            "1": 7.63807,
+            "2": 0.24295,
+            "3": 0.21281,
+            "4": 0.20931,
+            "5": 0.20554,
+            "6": 0.20827,
+            "7": 0.20618,
+            "8": 0.20701,
+            "9": 0.2077,
+            "10": 0.20875,
+            "11": 0.20704,
+            "12": 0.20735,
+            "13": 0.20734,
+            "14": 0.20659,
+            "15": 0.2071,
+            "16": 0.20766,
+            "17": 0.20579,
+            "18": 0.20511,
+            "19": 0.20563,
+            "20": 0.20589,
+            "21": 0.205,
+            "22": 0.20541,
+            "23": 0.2062,
+            "24": 0.20485,
+            "25": 0.20487,
+            "26": 0.20458,
+            "27": 0.20496,
+            "28": 0.20545,
+            "29": 0.20623,
+            "30": 0.20511,
+            "31": 0.20822,
+            "32": 0.20482,
+            "33": 0.20538,
+            "34": 0.20452,
+            "35": 0.21063,
+            "36": 0.20987,
+            "37": 0.20831,
+            "38": 0.2088,
+            "39": 0.20816,
+            "40": 0.20875,
+            "41": 0.20857,
+            "42": 0.20959,
+            "43": 0.20886,
+            "44": 0.2086,
+            "45": 0.20776,
+            "46": 0.20831,
+            "47": 0.20853,
+            "48": 0.2086,
+            "49": 0.20813,
+            "50": 0.209,
+            "51": 0.20574,
+            "52": 0.19892,
+            "53": 0.19904,
+            "54": 0.19867,
+            "55": 0.19897,
+            "56": 0.20031,
+            "57": 0.19874,
+            "58": 0.19971,
+            "59": 0.2002,
+            "60": 0.19847,
+            "61": 0.19948,
+            "62": 0.20017,
+            "63": 0.19926,
+            "64": 0.19923,
+            "65": 0.19974,
+            "66": 0.19915,
+            "67": 0.19992,
+            "68": 0.19949,
+            "69": 0.19842,
+            "70": 0.19824,
+            "71": 0.2012,
+            "72": 0.20144,
+            "73": 0.20339,
+            "74": 0.19815,
+            "75": 0.19802,
+            "76": 0.19898,
+            "77": 0.20003,
+            "78": 0.20017,
+            "79": 0.20157,
+            "80": 0.20266,
+            "81": 0.20004,
+            "82": 0.19937,
+            "83": 0.2008,
+            "84": 0.2009,
+            "85": 0.20194,
+            "86": 0.2015,
+            "87": 0.20004,
+            "88": 0.20091,
+            "89": 0.19998,
+            "90": 0.19993,
+            "91": 0.20008,
+            "92": 0.19991,
+            "93": 0.19979,
+            "94": 0.19939,
+            "95": 0.20098,
+            "96": 0.20045,
+            "97": 0.19917,
+            "98": 0.20012,
+            "99": 0.19963,
+            "100": 0.19848
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_lts.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_lts.json
new file mode 100644
index 00000000000..4378b779118
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_lts.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.92228,
+            "2": 10.92833,
+            "3": 10.91713,
+            "4": 10.90495,
+            "5": 10.92807,
+            "6": 10.93673,
+            "7": 10.90406,
+            "8": 10.92229,
+            "9": 10.91254,
+            "10": 10.90852,
+            "11": 10.89335,
+            "12": 10.9208,
+            "13": 10.91496,
+            "14": 10.92145,
+            "15": 10.88435,
+            "16": 10.87455,
+            "17": 10.83916,
+            "18": 10.87309,
+            "19": 10.85325,
+            "20": 10.77493,
+            "21": 10.74754,
+            "22": 10.63148,
+            "23": 10.75622,
+            "24": 10.65569,
+            "25": 10.59216,
+            "26": 10.65327,
+            "27": 10.64876,
+            "28": 10.59656,
+            "29": 10.61011,
+            "30": 10.39286,
+            "31": 10.15725,
+            "32": 10.49217,
+            "33": 10.47941,
+            "34": 10.24014,
+            "35": 10.2971,
+            "36": 10.24566,
+            "37": 10.35283,
+            "38": 10.20534,
+            "39": 10.40418,
+            "40": 10.09553,
+            "41": 10.15279,
+            "42": 10.2188,
+            "43": 9.85527,
+            "44": 9.96244,
+            "45": 9.84613,
+            "46": 9.83799,
+            "47": 10.13884,
+            "48": 9.85689,
+            "49": 9.53747,
+            "50": 9.90876,
+            "51": 9.84971,
+            "52": 9.74156,
+            "53": 10.06322,
+            "54": 9.94581,
+            "55": 9.87731,
+            "56": 9.62746,
+            "57": 9.47259,
+            "58": 9.82912,
+            "59": 9.583,
+            "60": 9.49181,
+            "61": 9.69961,
+            "62": 9.98089,
+            "63": 9.37212,
+            "64": 9.7756,
+            "65": 8.9433,
+            "66": 9.69993,
+            "67": 9.36414,
+            "68": 9.78706,
+            "69": 9.78397,
+            "70": 9.72288,
+            "71": 9.60749,
+            "72": 9.58416,
+            "73": 9.49093,
+            "74": 8.94864,
+            "75": 9.41807,
+            "76": 9.08721,
+            "77": 10.06283,
+            "78": 9.729,
+            "79": 9.37091,
+            "80": 9.40033,
+            "81": 9.47754,
+            "82": 9.69121,
+            "83": 9.30762,
+            "84": 9.41252,
+            "85": 9.61132,
+            "86": 9.07621,
+            "87": 9.59459,
+            "88": 9.74768,
+            "89": 9.6068,
+            "90": 9.81078,
+            "91": 9.34441,
+            "92": 9.36535,
+            "93": 9.07743,
+            "94": 8.82975,
+            "95": 9.51676,
+            "96": 9.52546,
+            "97": 9.31031,
+            "98": 9.67812,
+            "99": 8.88848,
+            "100": 9.40128
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1681.0,
+            "2": 1822.0,
+            "3": 1757.0,
+            "4": 1751.0,
+            "5": 1937.0,
+            "6": 1886.0,
+            "7": 1837.0,
+            "8": 1786.0,
+            "9": 1935.0,
+            "10": 1542.0,
+            "11": 1896.0,
+            "12": 1808.0,
+            "13": 1960.0,
+            "14": 1846.0,
+            "15": 1948.0,
+            "16": 1903.0,
+            "17": 1900.0,
+            "18": 1725.0,
+            "19": 1812.0,
+            "20": 1647.0,
+            "21": 1886.0,
+            "22": 1675.0,
+            "23": 1992.0,
+            "24": 1621.0,
+            "25": 1624.0,
+            "26": 1778.0,
+            "27": 1925.0,
+            "28": 1996.0,
+            "29": 2074.0,
+            "30": 1899.0,
+            "31": 1539.0,
+            "32": 1956.0,
+            "33": 2254.0,
+            "34": 1927.0,
+            "35": 2029.0,
+            "36": 1986.0,
+            "37": 2377.0,
+            "38": 2234.0,
+            "39": 2396.0,
+            "40": 2123.0,
+            "41": 2316.0,
+            "42": 2245.0,
+            "43": 2077.0,
+            "44": 2179.0,
+            "45": 2078.0,
+            "46": 2280.0,
+            "47": 2573.0,
+            "48": 2440.0,
+            "49": 2213.0,
+            "50": 2532.0,
+            "51": 2735.0,
+            "52": 2607.0,
+            "53": 2951.0,
+            "54": 2672.0,
+            "55": 2451.0,
+            "56": 2712.0,
+            "57": 2392.0,
+            "58": 2979.0,
+            "59": 2869.0,
+            "60": 2435.0,
+            "61": 2938.0,
+            "62": 2669.0,
+            "63": 2392.0,
+            "64": 2998.0,
+            "65": 2689.0,
+            "66": 3285.0,
+            "67": 2782.0,
+            "68": 2753.0,
+            "69": 2958.0,
+            "70": 3271.0,
+            "71": 3040.0,
+            "72": 2504.0,
+            "73": 3096.0,
+            "74": 1910.0,
+            "75": 2617.0,
+            "76": 3081.0,
+            "77": 3390.0,
+            "78": 3186.0,
+            "79": 3320.0,
+            "80": 3483.0,
+            "81": 3782.0,
+            "82": 3516.0,
+            "83": 2864.0,
+            "84": 3396.0,
+            "85": 3247.0,
+            "86": 2785.0,
+            "87": 3762.0,
+            "88": 3102.0,
+            "89": 3483.0,
+            "90": 3076.0,
+            "91": 2643.0,
+            "92": 3198.0,
+            "93": 2666.0,
+            "94": 3390.0,
+            "95": 3410.0,
+            "96": 3508.0,
+            "97": 3178.0,
+            "98": 3865.0,
+            "99": 3143.0,
+            "100": 3357.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 487096832.0,
+            "2": 487096832.0,
+            "3": 487096832.0,
+            "4": 487096832.0,
+            "5": 487096832.0,
+            "6": 487096832.0,
+            "7": 487096832.0,
+            "8": 487096832.0,
+            "9": 487096832.0,
+            "10": 487096832.0,
+            "11": 487096832.0,
+            "12": 487096832.0,
+            "13": 487096832.0,
+            "14": 487096832.0,
+            "15": 487096832.0,
+            "16": 487096832.0,
+            "17": 487096832.0,
+            "18": 487096832.0,
+            "19": 487096832.0,
+            "20": 487096832.0,
+            "21": 487096832.0,
+            "22": 487096832.0,
+            "23": 487096832.0,
+            "24": 487096832.0,
+            "25": 487096832.0,
+            "26": 487096832.0,
+            "27": 487096832.0,
+            "28": 487096832.0,
+            "29": 487096832.0,
+            "30": 487096832.0,
+            "31": 487096832.0,
+            "32": 487096832.0,
+            "33": 487096832.0,
+            "34": 487096832.0,
+            "35": 487096832.0,
+            "36": 487096832.0,
+            "37": 487096832.0,
+            "38": 487096832.0,
+            "39": 487096832.0,
+            "40": 487096832.0,
+            "41": 487096832.0,
+            "42": 487096832.0,
+            "43": 487096832.0,
+            "44": 487096832.0,
+            "45": 487096832.0,
+            "46": 487096832.0,
+            "47": 487096832.0,
+            "48": 487096832.0,
+            "49": 487096832.0,
+            "50": 487096832.0,
+            "51": 487096832.0,
+            "52": 487096832.0,
+            "53": 487096832.0,
+            "54": 487096832.0,
+            "55": 487096832.0,
+            "56": 487096832.0,
+            "57": 487096832.0,
+            "58": 487096832.0,
+            "59": 487096832.0,
+            "60": 487096832.0,
+            "61": 487096832.0,
+            "62": 487096832.0,
+            "63": 487096832.0,
+            "64": 487096832.0,
+            "65": 487096832.0,
+            "66": 487096832.0,
+            "67": 487096832.0,
+            "68": 487096832.0,
+            "69": 487096832.0,
+            "70": 487096832.0,
+            "71": 487096832.0,
+            "72": 487096832.0,
+            "73": 487096832.0,
+            "74": 487096832.0,
+            "75": 487096832.0,
+            "76": 487096832.0,
+            "77": 487096832.0,
+            "78": 487096832.0,
+            "79": 487096832.0,
+            "80": 487096832.0,
+            "81": 487096832.0,
+            "82": 487096832.0,
+            "83": 487096832.0,
+            "84": 487096832.0,
+            "85": 487096832.0,
+            "86": 487096832.0,
+            "87": 487096832.0,
+            "88": 487096832.0,
+            "89": 487096832.0,
+            "90": 487096832.0,
+            "91": 487096832.0,
+            "92": 487096832.0,
+            "93": 487096832.0,
+            "94": 487096832.0,
+            "95": 487096832.0,
+            "96": 487096832.0,
+            "97": 487096832.0,
+            "98": 487096832.0,
+            "99": 487096832.0,
+            "100": 487096832.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1228699136.0,
+            "2": 1409821184.0,
+            "3": 1409821184.0,
+            "4": 1409821184.0,
+            "5": 1409821184.0,
+            "6": 1409821184.0,
+            "7": 1409821184.0,
+            "8": 1409821184.0,
+            "9": 1409821184.0,
+            "10": 1409821184.0,
+            "11": 1409821184.0,
+            "12": 1409821184.0,
+            "13": 1409821184.0,
+            "14": 1409821184.0,
+            "15": 1409821184.0,
+            "16": 1409821184.0,
+            "17": 1409821184.0,
+            "18": 1409821184.0,
+            "19": 1409821184.0,
+            "20": 1409821184.0,
+            "21": 1409821184.0,
+            "22": 1409821184.0,
+            "23": 1409821184.0,
+            "24": 1409821184.0,
+            "25": 1409821184.0,
+            "26": 1409821184.0,
+            "27": 1409821184.0,
+            "28": 1409821184.0,
+            "29": 1409821184.0,
+            "30": 1409821184.0,
+            "31": 1409821184.0,
+            "32": 1409821184.0,
+            "33": 1409821184.0,
+            "34": 1409821184.0,
+            "35": 1409821184.0,
+            "36": 1409821184.0,
+            "37": 1409821184.0,
+            "38": 1409821184.0,
+            "39": 1409821184.0,
+            "40": 1409821184.0,
+            "41": 1409821184.0,
+            "42": 1409821184.0,
+            "43": 1409821184.0,
+            "44": 1409821184.0,
+            "45": 1409821184.0,
+            "46": 1409821184.0,
+            "47": 1409821184.0,
+            "48": 1409821184.0,
+            "49": 1409821184.0,
+            "50": 1409821184.0,
+            "51": 1409821184.0,
+            "52": 1409821184.0,
+            "53": 1409821184.0,
+            "54": 1409821184.0,
+            "55": 1409821184.0,
+            "56": 1409821184.0,
+            "57": 1409821184.0,
+            "58": 1409821184.0,
+            "59": 1409821184.0,
+            "60": 1409821184.0,
+            "61": 1409821184.0,
+            "62": 1409821184.0,
+            "63": 1409821184.0,
+            "64": 1409821184.0,
+            "65": 1409821184.0,
+            "66": 1409821184.0,
+            "67": 1409821184.0,
+            "68": 1409821184.0,
+            "69": 1409821184.0,
+            "70": 1409821184.0,
+            "71": 1409821184.0,
+            "72": 1409821184.0,
+            "73": 1409821184.0,
+            "74": 1409821184.0,
+            "75": 1409821184.0,
+            "76": 1409821184.0,
+            "77": 1409821184.0,
+            "78": 1409821184.0,
+            "79": 1409821184.0,
+            "80": 1409821184.0,
+            "81": 1409821184.0,
+            "82": 1409821184.0,
+            "83": 1409821184.0,
+            "84": 1409821184.0,
+            "85": 1409821184.0,
+            "86": 1409821184.0,
+            "87": 1409821184.0,
+            "88": 1409821184.0,
+            "89": 1409821184.0,
+            "90": 1409821184.0,
+            "91": 1409821184.0,
+            "92": 1409821184.0,
+            "93": 1409821184.0,
+            "94": 1409821184.0,
+            "95": 1409821184.0,
+            "96": 1409821184.0,
+            "97": 1409821184.0,
+            "98": 1409821184.0,
+            "99": 1409821184.0,
+            "100": 1409821184.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 6.19385,
+            "2": 0.24311,
+            "3": 0.20769,
+            "4": 0.20606,
+            "5": 0.21034,
+            "6": 0.20875,
+            "7": 0.20511,
+            "8": 0.2045,
+            "9": 0.20566,
+            "10": 0.20495,
+            "11": 0.20684,
+            "12": 0.20533,
+            "13": 0.20573,
+            "14": 0.20691,
+            "15": 0.20612,
+            "16": 0.20502,
+            "17": 0.20553,
+            "18": 0.20502,
+            "19": 0.20618,
+            "20": 0.20587,
+            "21": 0.20529,
+            "22": 0.20461,
+            "23": 0.20575,
+            "24": 0.2056,
+            "25": 0.20434,
+            "26": 0.20473,
+            "27": 0.20499,
+            "28": 0.20543,
+            "29": 0.20597,
+            "30": 0.20594,
+            "31": 0.20485,
+            "32": 0.20584,
+            "33": 0.20569,
+            "34": 0.20368,
+            "35": 0.20375,
+            "36": 0.20415,
+            "37": 0.20296,
+            "38": 0.20379,
+            "39": 0.20482,
+            "40": 0.20372,
+            "41": 0.20369,
+            "42": 0.2037,
+            "43": 0.20368,
+            "44": 0.2036,
+            "45": 0.20913,
+            "46": 0.20299,
+            "47": 0.20291,
+            "48": 0.20437,
+            "49": 0.2033,
+            "50": 0.20293,
+            "51": 0.21225,
+            "52": 0.20266,
+            "53": 0.20231,
+            "54": 0.20341,
+            "55": 0.20168,
+            "56": 0.20234,
+            "57": 0.20234,
+            "58": 0.20311,
+            "59": 0.20312,
+            "60": 0.20314,
+            "61": 0.20332,
+            "62": 0.20225,
+            "63": 0.20251,
+            "64": 0.20258,
+            "65": 0.20214,
+            "66": 0.2022,
+            "67": 0.20405,
+            "68": 0.20192,
+            "69": 0.2033,
+            "70": 0.20207,
+            "71": 0.20216,
+            "72": 0.20251,
+            "73": 0.20231,
+            "74": 0.20203,
+            "75": 0.20107,
+            "76": 0.20252,
+            "77": 0.20263,
+            "78": 0.20334,
+            "79": 0.20378,
+            "80": 0.20072,
+            "81": 0.20029,
+            "82": 0.19967,
+            "83": 0.20354,
+            "84": 0.20035,
+            "85": 0.201,
+            "86": 0.20061,
+            "87": 0.20132,
+            "88": 0.20043,
+            "89": 0.20079,
+            "90": 0.20125,
+            "91": 0.20103,
+            "92": 0.19988,
+            "93": 0.20137,
+            "94": 0.20191,
+            "95": 0.20085,
+            "96": 0.20182,
+            "97": 0.20169,
+            "98": 0.20353,
+            "99": 0.2004,
+            "100": 0.20044
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_lts_dgx_a100.json
index 053168da21b..97ea213f560 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_lts_dgx_a100.json
@@ -1 +1,537 @@
-{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.92705, "5": 10.92795, "10": 10.90786, "15": 10.88314, "20": 10.77629, "25": 10.59141, "30": 10.39192, "35": 10.29686, "40": 10.0964, "45": 9.84464, "50": 9.90918, "55": 9.87762, "60": 9.49121, "65": 8.94236, "70": 9.72266, "75": 9.41912, "80": 9.40077, "85": 9.61207, "90": 9.81015, "95": 9.51716, "100": 9.4015}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1627.0, "5": 2010.0, "10": 1368.0, "15": 1897.0, "20": 1626.0, "25": 1743.0, "30": 1930.0, "35": 1954.0, "40": 2199.0, "45": 2068.0, "50": 2460.0, "55": 2427.0, "60": 2380.0, "65": 2657.0, "70": 3265.0, "75": 2675.0, "80": 3434.0, "85": 3302.0, "90": 3230.0, "95": 3340.0, "100": 3291.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 487096320.0, "5": 487096320.0, "10": 487096320.0, "15": 487096320.0, "20": 487096320.0, "25": 487096320.0, "30": 487096320.0, "35": 487096320.0, "40": 487096320.0, "45": 487096320.0, "50": 487096320.0, "55": 487096320.0, "60": 487096320.0, "65": 487096320.0, "70": 487096320.0, "75": 487096320.0, "80": 487096320.0, "85": 487096320.0, "90": 487096320.0, "95": 487096320.0, "100": 487096320.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1720084480.0, "5": 1900157952.0, "10": 1900157952.0, "15": 1900157952.0, "20": 1900157952.0, "25": 1900157952.0, "30": 1900157952.0, "35": 1900157952.0, "40": 1900157952.0, "45": 1900157952.0, "50": 1900157952.0, "55": 1900157952.0, "60": 1900157952.0, "65": 1900157952.0, "70": 1900157952.0, "75": 1900157952.0, "80": 1900157952.0, "85": 1900157952.0, "90": 1900157952.0, "95": 1900157952.0, "100": 1900157952.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 12.26256, "5": 0.218, "10": 0.21609, "15": 0.21516, "20": 0.2149, "25": 0.21592, "30": 0.21243, "35": 0.21175, "40": 0.21098, "45": 0.21116, "50": 0.21161, "55": 0.21221, "60": 0.21264, "65": 0.21113, "70": 0.21176, "75": 0.22341, "80": 0.22245, "85": 0.22222, "90": 0.22203, "95": 0.21302, "100": 0.21583}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.92228,
+            "2": 10.92833,
+            "3": 10.91713,
+            "4": 10.90495,
+            "5": 10.92807,
+            "6": 10.93673,
+            "7": 10.90406,
+            "8": 10.92229,
+            "9": 10.91254,
+            "10": 10.90852,
+            "11": 10.89335,
+            "12": 10.9208,
+            "13": 10.91496,
+            "14": 10.92145,
+            "15": 10.88435,
+            "16": 10.87455,
+            "17": 10.83916,
+            "18": 10.87309,
+            "19": 10.85325,
+            "20": 10.77493,
+            "21": 10.74754,
+            "22": 10.63148,
+            "23": 10.75622,
+            "24": 10.65569,
+            "25": 10.59216,
+            "26": 10.65327,
+            "27": 10.64876,
+            "28": 10.59656,
+            "29": 10.61011,
+            "30": 10.39286,
+            "31": 10.15725,
+            "32": 10.49217,
+            "33": 10.47941,
+            "34": 10.24014,
+            "35": 10.2971,
+            "36": 10.24566,
+            "37": 10.35283,
+            "38": 10.20534,
+            "39": 10.40418,
+            "40": 10.09553,
+            "41": 10.15279,
+            "42": 10.2188,
+            "43": 9.85527,
+            "44": 9.96244,
+            "45": 9.84613,
+            "46": 9.83799,
+            "47": 10.13884,
+            "48": 9.85689,
+            "49": 9.53747,
+            "50": 9.90876,
+            "51": 9.84971,
+            "52": 9.74156,
+            "53": 10.06322,
+            "54": 9.94581,
+            "55": 9.87731,
+            "56": 9.62746,
+            "57": 9.47259,
+            "58": 9.82912,
+            "59": 9.583,
+            "60": 9.49181,
+            "61": 9.69961,
+            "62": 9.98089,
+            "63": 9.37212,
+            "64": 9.7756,
+            "65": 8.9433,
+            "66": 9.69993,
+            "67": 9.36414,
+            "68": 9.78706,
+            "69": 9.78397,
+            "70": 9.72288,
+            "71": 9.60749,
+            "72": 9.58416,
+            "73": 9.49093,
+            "74": 8.94864,
+            "75": 9.41807,
+            "76": 9.08721,
+            "77": 10.06283,
+            "78": 9.729,
+            "79": 9.37091,
+            "80": 9.40033,
+            "81": 9.47754,
+            "82": 9.69121,
+            "83": 9.30762,
+            "84": 9.41252,
+            "85": 9.61132,
+            "86": 9.07621,
+            "87": 9.59459,
+            "88": 9.74768,
+            "89": 9.6068,
+            "90": 9.81078,
+            "91": 9.34441,
+            "92": 9.36535,
+            "93": 9.07743,
+            "94": 8.82975,
+            "95": 9.51676,
+            "96": 9.52546,
+            "97": 9.31031,
+            "98": 9.67812,
+            "99": 8.88848,
+            "100": 9.40128
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1681.0,
+            "2": 1822.0,
+            "3": 1757.0,
+            "4": 1751.0,
+            "5": 1937.0,
+            "6": 1886.0,
+            "7": 1837.0,
+            "8": 1786.0,
+            "9": 1935.0,
+            "10": 1542.0,
+            "11": 1896.0,
+            "12": 1808.0,
+            "13": 1960.0,
+            "14": 1846.0,
+            "15": 1948.0,
+            "16": 1903.0,
+            "17": 1900.0,
+            "18": 1725.0,
+            "19": 1812.0,
+            "20": 1647.0,
+            "21": 1886.0,
+            "22": 1675.0,
+            "23": 1992.0,
+            "24": 1621.0,
+            "25": 1624.0,
+            "26": 1778.0,
+            "27": 1925.0,
+            "28": 1996.0,
+            "29": 2074.0,
+            "30": 1899.0,
+            "31": 1539.0,
+            "32": 1956.0,
+            "33": 2254.0,
+            "34": 1927.0,
+            "35": 2029.0,
+            "36": 1986.0,
+            "37": 2377.0,
+            "38": 2234.0,
+            "39": 2396.0,
+            "40": 2123.0,
+            "41": 2316.0,
+            "42": 2245.0,
+            "43": 2077.0,
+            "44": 2179.0,
+            "45": 2078.0,
+            "46": 2280.0,
+            "47": 2573.0,
+            "48": 2440.0,
+            "49": 2213.0,
+            "50": 2532.0,
+            "51": 2735.0,
+            "52": 2607.0,
+            "53": 2951.0,
+            "54": 2672.0,
+            "55": 2451.0,
+            "56": 2712.0,
+            "57": 2392.0,
+            "58": 2979.0,
+            "59": 2869.0,
+            "60": 2435.0,
+            "61": 2938.0,
+            "62": 2669.0,
+            "63": 2392.0,
+            "64": 2998.0,
+            "65": 2689.0,
+            "66": 3285.0,
+            "67": 2782.0,
+            "68": 2753.0,
+            "69": 2958.0,
+            "70": 3271.0,
+            "71": 3040.0,
+            "72": 2504.0,
+            "73": 3096.0,
+            "74": 1910.0,
+            "75": 2617.0,
+            "76": 3081.0,
+            "77": 3390.0,
+            "78": 3186.0,
+            "79": 3320.0,
+            "80": 3483.0,
+            "81": 3782.0,
+            "82": 3516.0,
+            "83": 2864.0,
+            "84": 3396.0,
+            "85": 3247.0,
+            "86": 2785.0,
+            "87": 3762.0,
+            "88": 3102.0,
+            "89": 3483.0,
+            "90": 3076.0,
+            "91": 2643.0,
+            "92": 3198.0,
+            "93": 2666.0,
+            "94": 3390.0,
+            "95": 3410.0,
+            "96": 3508.0,
+            "97": 3178.0,
+            "98": 3865.0,
+            "99": 3143.0,
+            "100": 3357.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 487096832.0,
+            "2": 487096832.0,
+            "3": 487096832.0,
+            "4": 487096832.0,
+            "5": 487096832.0,
+            "6": 487096832.0,
+            "7": 487096832.0,
+            "8": 487096832.0,
+            "9": 487096832.0,
+            "10": 487096832.0,
+            "11": 487096832.0,
+            "12": 487096832.0,
+            "13": 487096832.0,
+            "14": 487096832.0,
+            "15": 487096832.0,
+            "16": 487096832.0,
+            "17": 487096832.0,
+            "18": 487096832.0,
+            "19": 487096832.0,
+            "20": 487096832.0,
+            "21": 487096832.0,
+            "22": 487096832.0,
+            "23": 487096832.0,
+            "24": 487096832.0,
+            "25": 487096832.0,
+            "26": 487096832.0,
+            "27": 487096832.0,
+            "28": 487096832.0,
+            "29": 487096832.0,
+            "30": 487096832.0,
+            "31": 487096832.0,
+            "32": 487096832.0,
+            "33": 487096832.0,
+            "34": 487096832.0,
+            "35": 487096832.0,
+            "36": 487096832.0,
+            "37": 487096832.0,
+            "38": 487096832.0,
+            "39": 487096832.0,
+            "40": 487096832.0,
+            "41": 487096832.0,
+            "42": 487096832.0,
+            "43": 487096832.0,
+            "44": 487096832.0,
+            "45": 487096832.0,
+            "46": 487096832.0,
+            "47": 487096832.0,
+            "48": 487096832.0,
+            "49": 487096832.0,
+            "50": 487096832.0,
+            "51": 487096832.0,
+            "52": 487096832.0,
+            "53": 487096832.0,
+            "54": 487096832.0,
+            "55": 487096832.0,
+            "56": 487096832.0,
+            "57": 487096832.0,
+            "58": 487096832.0,
+            "59": 487096832.0,
+            "60": 487096832.0,
+            "61": 487096832.0,
+            "62": 487096832.0,
+            "63": 487096832.0,
+            "64": 487096832.0,
+            "65": 487096832.0,
+            "66": 487096832.0,
+            "67": 487096832.0,
+            "68": 487096832.0,
+            "69": 487096832.0,
+            "70": 487096832.0,
+            "71": 487096832.0,
+            "72": 487096832.0,
+            "73": 487096832.0,
+            "74": 487096832.0,
+            "75": 487096832.0,
+            "76": 487096832.0,
+            "77": 487096832.0,
+            "78": 487096832.0,
+            "79": 487096832.0,
+            "80": 487096832.0,
+            "81": 487096832.0,
+            "82": 487096832.0,
+            "83": 487096832.0,
+            "84": 487096832.0,
+            "85": 487096832.0,
+            "86": 487096832.0,
+            "87": 487096832.0,
+            "88": 487096832.0,
+            "89": 487096832.0,
+            "90": 487096832.0,
+            "91": 487096832.0,
+            "92": 487096832.0,
+            "93": 487096832.0,
+            "94": 487096832.0,
+            "95": 487096832.0,
+            "96": 487096832.0,
+            "97": 487096832.0,
+            "98": 487096832.0,
+            "99": 487096832.0,
+            "100": 487096832.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1228699136.0,
+            "2": 1409821184.0,
+            "3": 1409821184.0,
+            "4": 1409821184.0,
+            "5": 1409821184.0,
+            "6": 1409821184.0,
+            "7": 1409821184.0,
+            "8": 1409821184.0,
+            "9": 1409821184.0,
+            "10": 1409821184.0,
+            "11": 1409821184.0,
+            "12": 1409821184.0,
+            "13": 1409821184.0,
+            "14": 1409821184.0,
+            "15": 1409821184.0,
+            "16": 1409821184.0,
+            "17": 1409821184.0,
+            "18": 1409821184.0,
+            "19": 1409821184.0,
+            "20": 1409821184.0,
+            "21": 1409821184.0,
+            "22": 1409821184.0,
+            "23": 1409821184.0,
+            "24": 1409821184.0,
+            "25": 1409821184.0,
+            "26": 1409821184.0,
+            "27": 1409821184.0,
+            "28": 1409821184.0,
+            "29": 1409821184.0,
+            "30": 1409821184.0,
+            "31": 1409821184.0,
+            "32": 1409821184.0,
+            "33": 1409821184.0,
+            "34": 1409821184.0,
+            "35": 1409821184.0,
+            "36": 1409821184.0,
+            "37": 1409821184.0,
+            "38": 1409821184.0,
+            "39": 1409821184.0,
+            "40": 1409821184.0,
+            "41": 1409821184.0,
+            "42": 1409821184.0,
+            "43": 1409821184.0,
+            "44": 1409821184.0,
+            "45": 1409821184.0,
+            "46": 1409821184.0,
+            "47": 1409821184.0,
+            "48": 1409821184.0,
+            "49": 1409821184.0,
+            "50": 1409821184.0,
+            "51": 1409821184.0,
+            "52": 1409821184.0,
+            "53": 1409821184.0,
+            "54": 1409821184.0,
+            "55": 1409821184.0,
+            "56": 1409821184.0,
+            "57": 1409821184.0,
+            "58": 1409821184.0,
+            "59": 1409821184.0,
+            "60": 1409821184.0,
+            "61": 1409821184.0,
+            "62": 1409821184.0,
+            "63": 1409821184.0,
+            "64": 1409821184.0,
+            "65": 1409821184.0,
+            "66": 1409821184.0,
+            "67": 1409821184.0,
+            "68": 1409821184.0,
+            "69": 1409821184.0,
+            "70": 1409821184.0,
+            "71": 1409821184.0,
+            "72": 1409821184.0,
+            "73": 1409821184.0,
+            "74": 1409821184.0,
+            "75": 1409821184.0,
+            "76": 1409821184.0,
+            "77": 1409821184.0,
+            "78": 1409821184.0,
+            "79": 1409821184.0,
+            "80": 1409821184.0,
+            "81": 1409821184.0,
+            "82": 1409821184.0,
+            "83": 1409821184.0,
+            "84": 1409821184.0,
+            "85": 1409821184.0,
+            "86": 1409821184.0,
+            "87": 1409821184.0,
+            "88": 1409821184.0,
+            "89": 1409821184.0,
+            "90": 1409821184.0,
+            "91": 1409821184.0,
+            "92": 1409821184.0,
+            "93": 1409821184.0,
+            "94": 1409821184.0,
+            "95": 1409821184.0,
+            "96": 1409821184.0,
+            "97": 1409821184.0,
+            "98": 1409821184.0,
+            "99": 1409821184.0,
+            "100": 1409821184.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 6.25138,
+            "2": 0.23075,
+            "3": 0.20054,
+            "4": 0.20395,
+            "5": 0.20085,
+            "6": 0.19693,
+            "7": 0.1984,
+            "8": 0.19691,
+            "9": 0.19734,
+            "10": 0.19831,
+            "11": 0.19755,
+            "12": 0.20036,
+            "13": 0.19718,
+            "14": 0.20205,
+            "15": 0.19931,
+            "16": 0.1974,
+            "17": 0.19891,
+            "18": 0.19725,
+            "19": 0.19744,
+            "20": 0.19621,
+            "21": 0.19556,
+            "22": 0.1957,
+            "23": 0.19653,
+            "24": 0.19561,
+            "25": 0.19465,
+            "26": 0.19483,
+            "27": 0.19566,
+            "28": 0.19514,
+            "29": 0.19571,
+            "30": 0.19512,
+            "31": 0.19603,
+            "32": 0.19794,
+            "33": 0.19597,
+            "34": 0.20052,
+            "35": 0.19938,
+            "36": 0.19968,
+            "37": 0.19971,
+            "38": 0.19989,
+            "39": 0.20233,
+            "40": 0.20594,
+            "41": 0.20596,
+            "42": 0.20875,
+            "43": 0.20692,
+            "44": 0.20224,
+            "45": 0.20492,
+            "46": 0.20483,
+            "47": 0.20404,
+            "48": 0.20062,
+            "49": 0.1998,
+            "50": 0.19944,
+            "51": 0.21056,
+            "52": 0.20322,
+            "53": 0.20394,
+            "54": 0.20267,
+            "55": 0.20305,
+            "56": 0.20261,
+            "57": 0.20266,
+            "58": 0.2023,
+            "59": 0.20259,
+            "60": 0.20297,
+            "61": 0.20333,
+            "62": 0.20344,
+            "63": 0.20255,
+            "64": 0.20203,
+            "65": 0.20288,
+            "66": 0.20295,
+            "67": 0.20276,
+            "68": 0.20255,
+            "69": 0.20306,
+            "70": 0.20225,
+            "71": 0.20236,
+            "72": 0.20262,
+            "73": 0.2033,
+            "74": 0.20279,
+            "75": 0.20276,
+            "76": 0.20185,
+            "77": 0.20283,
+            "78": 0.20284,
+            "79": 0.2021,
+            "80": 0.20273,
+            "81": 0.20261,
+            "82": 0.20101,
+            "83": 0.20222,
+            "84": 0.20269,
+            "85": 0.20272,
+            "86": 0.20286,
+            "87": 0.20079,
+            "88": 0.20309,
+            "89": 0.2026,
+            "90": 0.20209,
+            "91": 0.20371,
+            "92": 0.20302,
+            "93": 0.20226,
+            "94": 0.20222,
+            "95": 0.20289,
+            "96": 0.20273,
+            "97": 0.20346,
+            "98": 0.20283,
+            "99": 0.20241,
+            "100": 0.20343
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_lts.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_lts.json
new file mode 100644
index 00000000000..f6faa6eb158
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_lts.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.92228,
+            "2": 10.92833,
+            "3": 10.91713,
+            "4": 10.90497,
+            "5": 10.92805,
+            "6": 10.93671,
+            "7": 10.90403,
+            "8": 10.92231,
+            "9": 10.91252,
+            "10": 10.90851,
+            "11": 10.89338,
+            "12": 10.9208,
+            "13": 10.91494,
+            "14": 10.92144,
+            "15": 10.88435,
+            "16": 10.87458,
+            "17": 10.83918,
+            "18": 10.87307,
+            "19": 10.85328,
+            "20": 10.77494,
+            "21": 10.74759,
+            "22": 10.63148,
+            "23": 10.75623,
+            "24": 10.65568,
+            "25": 10.59221,
+            "26": 10.65325,
+            "27": 10.64881,
+            "28": 10.59654,
+            "29": 10.61012,
+            "30": 10.39283,
+            "31": 10.15719,
+            "32": 10.49221,
+            "33": 10.47943,
+            "34": 10.24015,
+            "35": 10.29711,
+            "36": 10.24562,
+            "37": 10.35281,
+            "38": 10.20533,
+            "39": 10.40423,
+            "40": 10.09553,
+            "41": 10.15279,
+            "42": 10.21879,
+            "43": 9.85526,
+            "44": 9.96244,
+            "45": 9.84617,
+            "46": 9.83798,
+            "47": 10.1388,
+            "48": 9.85687,
+            "49": 9.53746,
+            "50": 9.90877,
+            "51": 9.84971,
+            "52": 9.74156,
+            "53": 10.06324,
+            "54": 9.94584,
+            "55": 9.87735,
+            "56": 9.62744,
+            "57": 9.4726,
+            "58": 9.82907,
+            "59": 9.58298,
+            "60": 9.49182,
+            "61": 9.6996,
+            "62": 9.98091,
+            "63": 9.37212,
+            "64": 9.77558,
+            "65": 8.94327,
+            "66": 9.69991,
+            "67": 9.3641,
+            "68": 9.78706,
+            "69": 9.78396,
+            "70": 9.72291,
+            "71": 9.60749,
+            "72": 9.58417,
+            "73": 9.4909,
+            "74": 8.94863,
+            "75": 9.41807,
+            "76": 9.08721,
+            "77": 10.06284,
+            "78": 9.729,
+            "79": 9.37087,
+            "80": 9.40029,
+            "81": 9.47753,
+            "82": 9.69123,
+            "83": 9.30764,
+            "84": 9.4125,
+            "85": 9.61132,
+            "86": 9.07624,
+            "87": 9.59459,
+            "88": 9.74769,
+            "89": 9.60678,
+            "90": 9.81079,
+            "91": 9.34443,
+            "92": 9.36534,
+            "93": 9.07741,
+            "94": 8.82974,
+            "95": 9.51676,
+            "96": 9.52545,
+            "97": 9.31031,
+            "98": 9.67811,
+            "99": 8.88848,
+            "100": 9.40128
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 59.0,
+            "2": 59.0,
+            "3": 57.0,
+            "4": 55.0,
+            "5": 68.0,
+            "6": 60.0,
+            "7": 58.0,
+            "8": 63.0,
+            "9": 62.0,
+            "10": 47.0,
+            "11": 57.0,
+            "12": 48.0,
+            "13": 68.0,
+            "14": 58.0,
+            "15": 75.0,
+            "16": 67.0,
+            "17": 59.0,
+            "18": 56.0,
+            "19": 72.0,
+            "20": 52.0,
+            "21": 60.0,
+            "22": 69.0,
+            "23": 77.0,
+            "24": 58.0,
+            "25": 71.0,
+            "26": 68.0,
+            "27": 55.0,
+            "28": 94.0,
+            "29": 57.0,
+            "30": 66.0,
+            "31": 60.0,
+            "32": 64.0,
+            "33": 69.0,
+            "34": 73.0,
+            "35": 73.0,
+            "36": 72.0,
+            "37": 89.0,
+            "38": 72.0,
+            "39": 75.0,
+            "40": 82.0,
+            "41": 82.0,
+            "42": 74.0,
+            "43": 90.0,
+            "44": 69.0,
+            "45": 83.0,
+            "46": 68.0,
+            "47": 74.0,
+            "48": 95.0,
+            "49": 81.0,
+            "50": 83.0,
+            "51": 77.0,
+            "52": 100.0,
+            "53": 71.0,
+            "54": 67.0,
+            "55": 70.0,
+            "56": 83.0,
+            "57": 74.0,
+            "58": 106.0,
+            "59": 72.0,
+            "60": 98.0,
+            "61": 67.0,
+            "62": 73.0,
+            "63": 77.0,
+            "64": 94.0,
+            "65": 82.0,
+            "66": 87.0,
+            "67": 65.0,
+            "68": 78.0,
+            "69": 59.0,
+            "70": 102.0,
+            "71": 82.0,
+            "72": 60.0,
+            "73": 96.0,
+            "74": 61.0,
+            "75": 64.0,
+            "76": 70.0,
+            "77": 84.0,
+            "78": 93.0,
+            "79": 102.0,
+            "80": 71.0,
+            "81": 88.0,
+            "82": 85.0,
+            "83": 75.0,
+            "84": 69.0,
+            "85": 84.0,
+            "86": 66.0,
+            "87": 93.0,
+            "88": 96.0,
+            "89": 73.0,
+            "90": 77.0,
+            "91": 66.0,
+            "92": 86.0,
+            "93": 63.0,
+            "94": 60.0,
+            "95": 70.0,
+            "96": 65.0,
+            "97": 67.0,
+            "98": 96.0,
+            "99": 54.0,
+            "100": 77.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 516063744.0,
+            "2": 516063744.0,
+            "3": 516063744.0,
+            "4": 516063744.0,
+            "5": 516063744.0,
+            "6": 516063744.0,
+            "7": 516063744.0,
+            "8": 516063744.0,
+            "9": 516063744.0,
+            "10": 516063744.0,
+            "11": 516063744.0,
+            "12": 516063744.0,
+            "13": 516063744.0,
+            "14": 516063744.0,
+            "15": 516063744.0,
+            "16": 516063744.0,
+            "17": 516063744.0,
+            "18": 516063744.0,
+            "19": 516063744.0,
+            "20": 516063744.0,
+            "21": 516063744.0,
+            "22": 516063744.0,
+            "23": 516063744.0,
+            "24": 516063744.0,
+            "25": 516063744.0,
+            "26": 516063744.0,
+            "27": 516063744.0,
+            "28": 516063744.0,
+            "29": 516063744.0,
+            "30": 516063744.0,
+            "31": 516063744.0,
+            "32": 516063744.0,
+            "33": 516063744.0,
+            "34": 516063744.0,
+            "35": 516063744.0,
+            "36": 516063744.0,
+            "37": 516063744.0,
+            "38": 516063744.0,
+            "39": 516063744.0,
+            "40": 516063744.0,
+            "41": 516063744.0,
+            "42": 516063744.0,
+            "43": 516063744.0,
+            "44": 516063744.0,
+            "45": 516063744.0,
+            "46": 516063744.0,
+            "47": 516063744.0,
+            "48": 516063744.0,
+            "49": 516063744.0,
+            "50": 516063744.0,
+            "51": 516063744.0,
+            "52": 516063744.0,
+            "53": 516063744.0,
+            "54": 516063744.0,
+            "55": 516063744.0,
+            "56": 516063744.0,
+            "57": 516063744.0,
+            "58": 516063744.0,
+            "59": 516063744.0,
+            "60": 516063744.0,
+            "61": 516063744.0,
+            "62": 516063744.0,
+            "63": 516063744.0,
+            "64": 516063744.0,
+            "65": 516063744.0,
+            "66": 516063744.0,
+            "67": 516063744.0,
+            "68": 516063744.0,
+            "69": 516063744.0,
+            "70": 516063744.0,
+            "71": 516063744.0,
+            "72": 516063744.0,
+            "73": 516063744.0,
+            "74": 516063744.0,
+            "75": 516063744.0,
+            "76": 516063744.0,
+            "77": 516063744.0,
+            "78": 516063744.0,
+            "79": 516063744.0,
+            "80": 516063744.0,
+            "81": 516063744.0,
+            "82": 516063744.0,
+            "83": 516063744.0,
+            "84": 516063744.0,
+            "85": 516063744.0,
+            "86": 516063744.0,
+            "87": 516063744.0,
+            "88": 516063744.0,
+            "89": 516063744.0,
+            "90": 516063744.0,
+            "91": 516063744.0,
+            "92": 516063744.0,
+            "93": 516063744.0,
+            "94": 516063744.0,
+            "95": 516063744.0,
+            "96": 516063744.0,
+            "97": 516063744.0,
+            "98": 516063744.0,
+            "99": 516063744.0,
+            "100": 516063744.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1697019392.0,
+            "2": 1877092864.0,
+            "3": 1877092864.0,
+            "4": 1877092864.0,
+            "5": 1877092864.0,
+            "6": 1877092864.0,
+            "7": 1877092864.0,
+            "8": 1877092864.0,
+            "9": 1877092864.0,
+            "10": 1877092864.0,
+            "11": 1877092864.0,
+            "12": 1877092864.0,
+            "13": 1877092864.0,
+            "14": 1877092864.0,
+            "15": 1877092864.0,
+            "16": 1877092864.0,
+            "17": 1877092864.0,
+            "18": 1877092864.0,
+            "19": 1877092864.0,
+            "20": 1877092864.0,
+            "21": 1877092864.0,
+            "22": 1877092864.0,
+            "23": 1877092864.0,
+            "24": 1877092864.0,
+            "25": 1877092864.0,
+            "26": 1877092864.0,
+            "27": 1877092864.0,
+            "28": 1877092864.0,
+            "29": 1877092864.0,
+            "30": 1877092864.0,
+            "31": 1877092864.0,
+            "32": 1877092864.0,
+            "33": 1877092864.0,
+            "34": 1877092864.0,
+            "35": 1877092864.0,
+            "36": 1877092864.0,
+            "37": 1877092864.0,
+            "38": 1877092864.0,
+            "39": 1877092864.0,
+            "40": 1877092864.0,
+            "41": 1877092864.0,
+            "42": 1877092864.0,
+            "43": 1877092864.0,
+            "44": 1877092864.0,
+            "45": 1877092864.0,
+            "46": 1877092864.0,
+            "47": 1877092864.0,
+            "48": 1877092864.0,
+            "49": 1877092864.0,
+            "50": 1877092864.0,
+            "51": 1877092864.0,
+            "52": 1877092864.0,
+            "53": 1877092864.0,
+            "54": 1877092864.0,
+            "55": 1877092864.0,
+            "56": 1877092864.0,
+            "57": 1877092864.0,
+            "58": 1877092864.0,
+            "59": 1877092864.0,
+            "60": 1877092864.0,
+            "61": 1877092864.0,
+            "62": 1877092864.0,
+            "63": 1877092864.0,
+            "64": 1877092864.0,
+            "65": 1877092864.0,
+            "66": 1877092864.0,
+            "67": 1877092864.0,
+            "68": 1877092864.0,
+            "69": 1877092864.0,
+            "70": 1877092864.0,
+            "71": 1877092864.0,
+            "72": 1877092864.0,
+            "73": 1877092864.0,
+            "74": 1877092864.0,
+            "75": 1877092864.0,
+            "76": 1877092864.0,
+            "77": 1877092864.0,
+            "78": 1877092864.0,
+            "79": 1877092864.0,
+            "80": 1877092864.0,
+            "81": 1877092864.0,
+            "82": 1877092864.0,
+            "83": 1877092864.0,
+            "84": 1877092864.0,
+            "85": 1877092864.0,
+            "86": 1877092864.0,
+            "87": 1877092864.0,
+            "88": 1877092864.0,
+            "89": 1877092864.0,
+            "90": 1877092864.0,
+            "91": 1877092864.0,
+            "92": 1877092864.0,
+            "93": 1877092864.0,
+            "94": 1877092864.0,
+            "95": 1877092864.0,
+            "96": 1877092864.0,
+            "97": 1877092864.0,
+            "98": 1877092864.0,
+            "99": 1877092864.0,
+            "100": 1877092864.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 5.79688,
+            "2": 0.23426,
+            "3": 0.19644,
+            "4": 0.195,
+            "5": 0.19474,
+            "6": 0.19503,
+            "7": 0.19553,
+            "8": 0.19608,
+            "9": 0.1942,
+            "10": 0.19493,
+            "11": 0.19477,
+            "12": 0.19436,
+            "13": 0.19434,
+            "14": 0.19471,
+            "15": 0.19477,
+            "16": 0.19465,
+            "17": 0.19394,
+            "18": 0.19358,
+            "19": 0.19755,
+            "20": 0.19602,
+            "21": 0.19501,
+            "22": 0.1946,
+            "23": 0.19563,
+            "24": 0.19522,
+            "25": 0.19502,
+            "26": 0.19664,
+            "27": 0.19509,
+            "28": 0.19415,
+            "29": 0.19426,
+            "30": 0.19405,
+            "31": 0.19405,
+            "32": 0.19558,
+            "33": 0.19428,
+            "34": 0.19418,
+            "35": 0.19493,
+            "36": 0.19478,
+            "37": 0.19511,
+            "38": 0.19481,
+            "39": 0.19506,
+            "40": 0.19573,
+            "41": 0.19576,
+            "42": 0.19504,
+            "43": 0.19379,
+            "44": 0.19366,
+            "45": 0.19309,
+            "46": 0.19331,
+            "47": 0.19424,
+            "48": 0.21152,
+            "49": 0.2113,
+            "50": 0.21175,
+            "51": 0.20739,
+            "52": 0.19757,
+            "53": 0.19742,
+            "54": 0.19771,
+            "55": 0.19922,
+            "56": 0.19903,
+            "57": 0.19826,
+            "58": 0.19889,
+            "59": 0.1988,
+            "60": 0.19894,
+            "61": 0.19841,
+            "62": 0.19853,
+            "63": 0.19879,
+            "64": 0.19776,
+            "65": 0.19833,
+            "66": 0.19911,
+            "67": 0.19919,
+            "68": 0.19768,
+            "69": 0.19855,
+            "70": 0.19789,
+            "71": 0.19851,
+            "72": 0.19815,
+            "73": 0.19885,
+            "74": 0.19845,
+            "75": 0.19827,
+            "76": 0.19878,
+            "77": 0.19962,
+            "78": 0.19912,
+            "79": 0.19891,
+            "80": 0.1985,
+            "81": 0.19933,
+            "82": 0.1977,
+            "83": 0.19774,
+            "84": 0.19798,
+            "85": 0.19781,
+            "86": 0.19763,
+            "87": 0.1977,
+            "88": 0.19934,
+            "89": 0.19771,
+            "90": 0.19835,
+            "91": 0.19886,
+            "92": 0.19768,
+            "93": 0.19818,
+            "94": 0.19769,
+            "95": 0.19836,
+            "96": 0.19833,
+            "97": 0.19773,
+            "98": 0.1988,
+            "99": 0.1973,
+            "100": 0.19801
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_lts_dgx_a100.json
index 59a50d2f033..96cf765384a 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_lts_dgx_a100.json
@@ -1 +1,537 @@
-{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.92705, "5": 10.92799, "10": 10.90789, "15": 10.88313, "20": 10.77626, "25": 10.59138, "30": 10.39195, "35": 10.29687, "40": 10.0964, "45": 9.84466, "50": 9.9092, "55": 9.87764, "60": 9.49121, "65": 8.94232, "70": 9.72263, "75": 9.41912, "80": 9.40079, "85": 9.61209, "90": 9.81012, "95": 9.51718, "100": 9.40153}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 68.0, "5": 64.0, "10": 61.0, "15": 58.0, "20": 64.0, "25": 58.0, "30": 85.0, "35": 66.0, "40": 85.0, "45": 84.0, "50": 85.0, "55": 69.0, "60": 67.0, "65": 65.0, "70": 92.0, "75": 51.0, "80": 98.0, "85": 86.0, "90": 83.0, "95": 72.0, "100": 73.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 488144896.0, "5": 488144896.0, "10": 488144896.0, "15": 488144896.0, "20": 488144896.0, "25": 489193472.0, "30": 488144896.0, "35": 488144896.0, "40": 488144896.0, "45": 488144896.0, "50": 488144896.0, "55": 488144896.0, "60": 488144896.0, "65": 488144896.0, "70": 489193472.0, "75": 489193472.0, "80": 489193472.0, "85": 488144896.0, "90": 488144896.0, "95": 488144896.0, "100": 489193472.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 2158389248.0, "5": 2340559872.0, "10": 2340559872.0, "15": 2340559872.0, "20": 2340559872.0, "25": 2340559872.0, "30": 2340559872.0, "35": 2340559872.0, "40": 2340559872.0, "45": 2340559872.0, "50": 2340559872.0, "55": 2340559872.0, "60": 2340559872.0, "65": 2340559872.0, "70": 2340559872.0, "75": 2340559872.0, "80": 2340559872.0, "85": 2340559872.0, "90": 2340559872.0, "95": 2340559872.0, "100": 2340559872.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 15.21811, "5": 0.22614, "10": 0.22685, "15": 0.22633, "20": 0.22582, "25": 0.22604, "30": 0.22747, "35": 0.22529, "40": 0.22441, "45": 0.22458, "50": 0.22397, "55": 0.21965, "60": 0.22066, "65": 0.22265, "70": 0.22234, "75": 0.2195, "80": 0.21889, "85": 0.21987, "90": 0.21875, "95": 0.21918, "100": 0.21913}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.92228,
+            "2": 10.92833,
+            "3": 10.91713,
+            "4": 10.90497,
+            "5": 10.92805,
+            "6": 10.93671,
+            "7": 10.90403,
+            "8": 10.92231,
+            "9": 10.91252,
+            "10": 10.90851,
+            "11": 10.89338,
+            "12": 10.9208,
+            "13": 10.91494,
+            "14": 10.92144,
+            "15": 10.88435,
+            "16": 10.87458,
+            "17": 10.83918,
+            "18": 10.87307,
+            "19": 10.85328,
+            "20": 10.77494,
+            "21": 10.74759,
+            "22": 10.63148,
+            "23": 10.75623,
+            "24": 10.65568,
+            "25": 10.59221,
+            "26": 10.65325,
+            "27": 10.64881,
+            "28": 10.59654,
+            "29": 10.61012,
+            "30": 10.39283,
+            "31": 10.15719,
+            "32": 10.49221,
+            "33": 10.47943,
+            "34": 10.24015,
+            "35": 10.29711,
+            "36": 10.24562,
+            "37": 10.35281,
+            "38": 10.20533,
+            "39": 10.40423,
+            "40": 10.09553,
+            "41": 10.15279,
+            "42": 10.21879,
+            "43": 9.85526,
+            "44": 9.96244,
+            "45": 9.84617,
+            "46": 9.83798,
+            "47": 10.1388,
+            "48": 9.85687,
+            "49": 9.53746,
+            "50": 9.90877,
+            "51": 9.84971,
+            "52": 9.74156,
+            "53": 10.06324,
+            "54": 9.94584,
+            "55": 9.87735,
+            "56": 9.62744,
+            "57": 9.4726,
+            "58": 9.82907,
+            "59": 9.58298,
+            "60": 9.49182,
+            "61": 9.6996,
+            "62": 9.98091,
+            "63": 9.37212,
+            "64": 9.77558,
+            "65": 8.94327,
+            "66": 9.69991,
+            "67": 9.3641,
+            "68": 9.78706,
+            "69": 9.78396,
+            "70": 9.72291,
+            "71": 9.60749,
+            "72": 9.58417,
+            "73": 9.4909,
+            "74": 8.94863,
+            "75": 9.41807,
+            "76": 9.08721,
+            "77": 10.06284,
+            "78": 9.729,
+            "79": 9.37087,
+            "80": 9.40029,
+            "81": 9.47753,
+            "82": 9.69123,
+            "83": 9.30764,
+            "84": 9.4125,
+            "85": 9.61132,
+            "86": 9.07624,
+            "87": 9.59459,
+            "88": 9.74769,
+            "89": 9.60678,
+            "90": 9.81079,
+            "91": 9.34443,
+            "92": 9.36534,
+            "93": 9.07741,
+            "94": 8.82974,
+            "95": 9.51676,
+            "96": 9.52545,
+            "97": 9.31031,
+            "98": 9.67811,
+            "99": 8.88848,
+            "100": 9.40128
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 59.0,
+            "2": 59.0,
+            "3": 57.0,
+            "4": 55.0,
+            "5": 68.0,
+            "6": 60.0,
+            "7": 58.0,
+            "8": 63.0,
+            "9": 62.0,
+            "10": 47.0,
+            "11": 57.0,
+            "12": 48.0,
+            "13": 68.0,
+            "14": 58.0,
+            "15": 75.0,
+            "16": 67.0,
+            "17": 59.0,
+            "18": 56.0,
+            "19": 72.0,
+            "20": 52.0,
+            "21": 60.0,
+            "22": 69.0,
+            "23": 77.0,
+            "24": 58.0,
+            "25": 71.0,
+            "26": 68.0,
+            "27": 55.0,
+            "28": 94.0,
+            "29": 57.0,
+            "30": 66.0,
+            "31": 60.0,
+            "32": 64.0,
+            "33": 69.0,
+            "34": 73.0,
+            "35": 73.0,
+            "36": 72.0,
+            "37": 89.0,
+            "38": 72.0,
+            "39": 75.0,
+            "40": 82.0,
+            "41": 82.0,
+            "42": 74.0,
+            "43": 90.0,
+            "44": 69.0,
+            "45": 83.0,
+            "46": 68.0,
+            "47": 74.0,
+            "48": 95.0,
+            "49": 81.0,
+            "50": 83.0,
+            "51": 77.0,
+            "52": 100.0,
+            "53": 71.0,
+            "54": 67.0,
+            "55": 70.0,
+            "56": 83.0,
+            "57": 74.0,
+            "58": 106.0,
+            "59": 72.0,
+            "60": 98.0,
+            "61": 67.0,
+            "62": 73.0,
+            "63": 77.0,
+            "64": 94.0,
+            "65": 82.0,
+            "66": 87.0,
+            "67": 65.0,
+            "68": 78.0,
+            "69": 59.0,
+            "70": 102.0,
+            "71": 82.0,
+            "72": 60.0,
+            "73": 96.0,
+            "74": 61.0,
+            "75": 64.0,
+            "76": 70.0,
+            "77": 84.0,
+            "78": 93.0,
+            "79": 102.0,
+            "80": 71.0,
+            "81": 88.0,
+            "82": 85.0,
+            "83": 75.0,
+            "84": 69.0,
+            "85": 84.0,
+            "86": 66.0,
+            "87": 93.0,
+            "88": 96.0,
+            "89": 73.0,
+            "90": 77.0,
+            "91": 66.0,
+            "92": 86.0,
+            "93": 63.0,
+            "94": 60.0,
+            "95": 70.0,
+            "96": 65.0,
+            "97": 67.0,
+            "98": 96.0,
+            "99": 54.0,
+            "100": 77.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 516063744.0,
+            "2": 516063744.0,
+            "3": 516063744.0,
+            "4": 516063744.0,
+            "5": 516063744.0,
+            "6": 516063744.0,
+            "7": 516063744.0,
+            "8": 516063744.0,
+            "9": 516063744.0,
+            "10": 516063744.0,
+            "11": 516063744.0,
+            "12": 516063744.0,
+            "13": 516063744.0,
+            "14": 516063744.0,
+            "15": 516063744.0,
+            "16": 516063744.0,
+            "17": 516063744.0,
+            "18": 516063744.0,
+            "19": 516063744.0,
+            "20": 516063744.0,
+            "21": 516063744.0,
+            "22": 516063744.0,
+            "23": 516063744.0,
+            "24": 516063744.0,
+            "25": 516063744.0,
+            "26": 516063744.0,
+            "27": 516063744.0,
+            "28": 516063744.0,
+            "29": 516063744.0,
+            "30": 516063744.0,
+            "31": 516063744.0,
+            "32": 516063744.0,
+            "33": 516063744.0,
+            "34": 516063744.0,
+            "35": 516063744.0,
+            "36": 516063744.0,
+            "37": 516063744.0,
+            "38": 516063744.0,
+            "39": 516063744.0,
+            "40": 516063744.0,
+            "41": 516063744.0,
+            "42": 516063744.0,
+            "43": 516063744.0,
+            "44": 516063744.0,
+            "45": 516063744.0,
+            "46": 516063744.0,
+            "47": 516063744.0,
+            "48": 516063744.0,
+            "49": 516063744.0,
+            "50": 516063744.0,
+            "51": 516063744.0,
+            "52": 516063744.0,
+            "53": 516063744.0,
+            "54": 516063744.0,
+            "55": 516063744.0,
+            "56": 516063744.0,
+            "57": 516063744.0,
+            "58": 516063744.0,
+            "59": 516063744.0,
+            "60": 516063744.0,
+            "61": 516063744.0,
+            "62": 516063744.0,
+            "63": 516063744.0,
+            "64": 516063744.0,
+            "65": 516063744.0,
+            "66": 516063744.0,
+            "67": 516063744.0,
+            "68": 516063744.0,
+            "69": 516063744.0,
+            "70": 516063744.0,
+            "71": 516063744.0,
+            "72": 516063744.0,
+            "73": 516063744.0,
+            "74": 516063744.0,
+            "75": 516063744.0,
+            "76": 516063744.0,
+            "77": 516063744.0,
+            "78": 516063744.0,
+            "79": 516063744.0,
+            "80": 516063744.0,
+            "81": 516063744.0,
+            "82": 516063744.0,
+            "83": 516063744.0,
+            "84": 516063744.0,
+            "85": 516063744.0,
+            "86": 516063744.0,
+            "87": 516063744.0,
+            "88": 516063744.0,
+            "89": 516063744.0,
+            "90": 516063744.0,
+            "91": 516063744.0,
+            "92": 516063744.0,
+            "93": 516063744.0,
+            "94": 516063744.0,
+            "95": 516063744.0,
+            "96": 516063744.0,
+            "97": 516063744.0,
+            "98": 516063744.0,
+            "99": 516063744.0,
+            "100": 516063744.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1697019392.0,
+            "2": 1877092864.0,
+            "3": 1877092864.0,
+            "4": 1877092864.0,
+            "5": 1877092864.0,
+            "6": 1877092864.0,
+            "7": 1877092864.0,
+            "8": 1877092864.0,
+            "9": 1877092864.0,
+            "10": 1877092864.0,
+            "11": 1877092864.0,
+            "12": 1877092864.0,
+            "13": 1877092864.0,
+            "14": 1877092864.0,
+            "15": 1877092864.0,
+            "16": 1877092864.0,
+            "17": 1877092864.0,
+            "18": 1877092864.0,
+            "19": 1877092864.0,
+            "20": 1877092864.0,
+            "21": 1877092864.0,
+            "22": 1877092864.0,
+            "23": 1877092864.0,
+            "24": 1877092864.0,
+            "25": 1877092864.0,
+            "26": 1877092864.0,
+            "27": 1877092864.0,
+            "28": 1877092864.0,
+            "29": 1877092864.0,
+            "30": 1877092864.0,
+            "31": 1877092864.0,
+            "32": 1877092864.0,
+            "33": 1877092864.0,
+            "34": 1877092864.0,
+            "35": 1877092864.0,
+            "36": 1877092864.0,
+            "37": 1877092864.0,
+            "38": 1877092864.0,
+            "39": 1877092864.0,
+            "40": 1877092864.0,
+            "41": 1877092864.0,
+            "42": 1877092864.0,
+            "43": 1877092864.0,
+            "44": 1877092864.0,
+            "45": 1877092864.0,
+            "46": 1877092864.0,
+            "47": 1877092864.0,
+            "48": 1877092864.0,
+            "49": 1877092864.0,
+            "50": 1877092864.0,
+            "51": 1877092864.0,
+            "52": 1877092864.0,
+            "53": 1877092864.0,
+            "54": 1877092864.0,
+            "55": 1877092864.0,
+            "56": 1877092864.0,
+            "57": 1877092864.0,
+            "58": 1877092864.0,
+            "59": 1877092864.0,
+            "60": 1877092864.0,
+            "61": 1877092864.0,
+            "62": 1877092864.0,
+            "63": 1877092864.0,
+            "64": 1877092864.0,
+            "65": 1877092864.0,
+            "66": 1877092864.0,
+            "67": 1877092864.0,
+            "68": 1877092864.0,
+            "69": 1877092864.0,
+            "70": 1877092864.0,
+            "71": 1877092864.0,
+            "72": 1877092864.0,
+            "73": 1877092864.0,
+            "74": 1877092864.0,
+            "75": 1877092864.0,
+            "76": 1877092864.0,
+            "77": 1877092864.0,
+            "78": 1877092864.0,
+            "79": 1877092864.0,
+            "80": 1877092864.0,
+            "81": 1877092864.0,
+            "82": 1877092864.0,
+            "83": 1877092864.0,
+            "84": 1877092864.0,
+            "85": 1877092864.0,
+            "86": 1877092864.0,
+            "87": 1877092864.0,
+            "88": 1877092864.0,
+            "89": 1877092864.0,
+            "90": 1877092864.0,
+            "91": 1877092864.0,
+            "92": 1877092864.0,
+            "93": 1877092864.0,
+            "94": 1877092864.0,
+            "95": 1877092864.0,
+            "96": 1877092864.0,
+            "97": 1877092864.0,
+            "98": 1877092864.0,
+            "99": 1877092864.0,
+            "100": 1877092864.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 6.10882,
+            "2": 0.24563,
+            "3": 0.21507,
+            "4": 0.21225,
+            "5": 0.21165,
+            "6": 0.21127,
+            "7": 0.21406,
+            "8": 0.21402,
+            "9": 0.21175,
+            "10": 0.19518,
+            "11": 0.19565,
+            "12": 0.19461,
+            "13": 0.19428,
+            "14": 0.19385,
+            "15": 0.19329,
+            "16": 0.19311,
+            "17": 0.19391,
+            "18": 0.19383,
+            "19": 0.19364,
+            "20": 0.19408,
+            "21": 0.19327,
+            "22": 0.19729,
+            "23": 0.19599,
+            "24": 0.19601,
+            "25": 0.1965,
+            "26": 0.19683,
+            "27": 0.19626,
+            "28": 0.19667,
+            "29": 0.1989,
+            "30": 0.19644,
+            "31": 0.19728,
+            "32": 0.19614,
+            "33": 0.1973,
+            "34": 0.1971,
+            "35": 0.19674,
+            "36": 0.19628,
+            "37": 0.19578,
+            "38": 0.19629,
+            "39": 0.19673,
+            "40": 0.19712,
+            "41": 0.19593,
+            "42": 0.1969,
+            "43": 0.19639,
+            "44": 0.20378,
+            "45": 0.19737,
+            "46": 0.19738,
+            "47": 0.19532,
+            "48": 0.19579,
+            "49": 0.19617,
+            "50": 0.19695,
+            "51": 0.20318,
+            "52": 0.19428,
+            "53": 0.19415,
+            "54": 0.19663,
+            "55": 0.19266,
+            "56": 0.19426,
+            "57": 0.19455,
+            "58": 0.19473,
+            "59": 0.19413,
+            "60": 0.19467,
+            "61": 0.19511,
+            "62": 0.19475,
+            "63": 0.19464,
+            "64": 0.19452,
+            "65": 0.19445,
+            "66": 0.19395,
+            "67": 0.19423,
+            "68": 0.19431,
+            "69": 0.19512,
+            "70": 0.1941,
+            "71": 0.19453,
+            "72": 0.19467,
+            "73": 0.19615,
+            "74": 0.19355,
+            "75": 0.19419,
+            "76": 0.19407,
+            "77": 0.19455,
+            "78": 0.19511,
+            "79": 0.19498,
+            "80": 0.19577,
+            "81": 0.19399,
+            "82": 0.19362,
+            "83": 0.19425,
+            "84": 0.19418,
+            "85": 0.19432,
+            "86": 0.20057,
+            "87": 0.19522,
+            "88": 0.19447,
+            "89": 0.19472,
+            "90": 0.19377,
+            "91": 0.19433,
+            "92": 0.19432,
+            "93": 0.19456,
+            "94": 0.19394,
+            "95": 0.19417,
+            "96": 0.19476,
+            "97": 0.19423,
+            "98": 0.19401,
+            "99": 0.19403,
+            "100": 0.19364
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader/golden_values_lts.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader/golden_values_lts.json
new file mode 100644
index 00000000000..6a8c9ac9b1d
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader/golden_values_lts.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.92228,
+            "2": 10.92833,
+            "3": 10.91713,
+            "4": 10.90495,
+            "5": 10.92807,
+            "6": 10.93673,
+            "7": 10.90406,
+            "8": 10.92229,
+            "9": 10.91254,
+            "10": 10.90852,
+            "11": 10.89335,
+            "12": 10.9208,
+            "13": 10.91496,
+            "14": 10.92145,
+            "15": 10.88435,
+            "16": 10.87455,
+            "17": 10.83916,
+            "18": 10.87309,
+            "19": 10.85325,
+            "20": 10.77493,
+            "21": 10.74754,
+            "22": 10.63148,
+            "23": 10.75622,
+            "24": 10.65569,
+            "25": 10.59216,
+            "26": 10.65327,
+            "27": 10.64876,
+            "28": 10.59656,
+            "29": 10.61011,
+            "30": 10.39286,
+            "31": 10.15725,
+            "32": 10.49217,
+            "33": 10.47941,
+            "34": 10.24014,
+            "35": 10.2971,
+            "36": 10.24566,
+            "37": 10.35283,
+            "38": 10.20534,
+            "39": 10.40418,
+            "40": 10.09553,
+            "41": 10.15279,
+            "42": 10.2188,
+            "43": 9.85527,
+            "44": 9.96244,
+            "45": 9.84613,
+            "46": 9.83799,
+            "47": 10.13884,
+            "48": 9.85689,
+            "49": 9.53747,
+            "50": 9.90876,
+            "51": 9.84971,
+            "52": 9.74156,
+            "53": 10.06322,
+            "54": 9.94581,
+            "55": 9.87731,
+            "56": 9.62746,
+            "57": 9.47259,
+            "58": 9.82912,
+            "59": 9.583,
+            "60": 9.49181,
+            "61": 9.69961,
+            "62": 9.98089,
+            "63": 9.37212,
+            "64": 9.7756,
+            "65": 8.9433,
+            "66": 9.69993,
+            "67": 9.36414,
+            "68": 9.78706,
+            "69": 9.78397,
+            "70": 9.72288,
+            "71": 9.60749,
+            "72": 9.58416,
+            "73": 9.49093,
+            "74": 8.94864,
+            "75": 9.41807,
+            "76": 9.08721,
+            "77": 10.06283,
+            "78": 9.729,
+            "79": 9.37091,
+            "80": 9.40033,
+            "81": 9.47754,
+            "82": 9.69121,
+            "83": 9.30762,
+            "84": 9.41252,
+            "85": 9.61132,
+            "86": 9.07621,
+            "87": 9.59459,
+            "88": 9.74768,
+            "89": 9.6068,
+            "90": 9.81078,
+            "91": 9.34441,
+            "92": 9.36535,
+            "93": 9.07743,
+            "94": 8.82975,
+            "95": 9.51676,
+            "96": 9.52546,
+            "97": 9.31031,
+            "98": 9.67812,
+            "99": 8.88848,
+            "100": 9.40128
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1681.0,
+            "2": 1822.0,
+            "3": 1757.0,
+            "4": 1751.0,
+            "5": 1937.0,
+            "6": 1886.0,
+            "7": 1837.0,
+            "8": 1786.0,
+            "9": 1935.0,
+            "10": 1542.0,
+            "11": 1896.0,
+            "12": 1808.0,
+            "13": 1960.0,
+            "14": 1846.0,
+            "15": 1948.0,
+            "16": 1903.0,
+            "17": 1900.0,
+            "18": 1725.0,
+            "19": 1812.0,
+            "20": 1647.0,
+            "21": 1886.0,
+            "22": 1675.0,
+            "23": 1992.0,
+            "24": 1621.0,
+            "25": 1624.0,
+            "26": 1778.0,
+            "27": 1925.0,
+            "28": 1996.0,
+            "29": 2074.0,
+            "30": 1899.0,
+            "31": 1539.0,
+            "32": 1956.0,
+            "33": 2254.0,
+            "34": 1927.0,
+            "35": 2029.0,
+            "36": 1986.0,
+            "37": 2377.0,
+            "38": 2234.0,
+            "39": 2396.0,
+            "40": 2123.0,
+            "41": 2316.0,
+            "42": 2245.0,
+            "43": 2077.0,
+            "44": 2179.0,
+            "45": 2078.0,
+            "46": 2280.0,
+            "47": 2573.0,
+            "48": 2440.0,
+            "49": 2213.0,
+            "50": 2532.0,
+            "51": 2735.0,
+            "52": 2607.0,
+            "53": 2951.0,
+            "54": 2672.0,
+            "55": 2451.0,
+            "56": 2712.0,
+            "57": 2392.0,
+            "58": 2979.0,
+            "59": 2869.0,
+            "60": 2435.0,
+            "61": 2938.0,
+            "62": 2669.0,
+            "63": 2392.0,
+            "64": 2998.0,
+            "65": 2689.0,
+            "66": 3285.0,
+            "67": 2782.0,
+            "68": 2753.0,
+            "69": 2958.0,
+            "70": 3271.0,
+            "71": 3040.0,
+            "72": 2504.0,
+            "73": 3096.0,
+            "74": 1910.0,
+            "75": 2617.0,
+            "76": 3081.0,
+            "77": 3390.0,
+            "78": 3186.0,
+            "79": 3320.0,
+            "80": 3483.0,
+            "81": 3782.0,
+            "82": 3516.0,
+            "83": 2864.0,
+            "84": 3396.0,
+            "85": 3247.0,
+            "86": 2785.0,
+            "87": 3762.0,
+            "88": 3102.0,
+            "89": 3483.0,
+            "90": 3076.0,
+            "91": 2643.0,
+            "92": 3198.0,
+            "93": 2666.0,
+            "94": 3390.0,
+            "95": 3410.0,
+            "96": 3508.0,
+            "97": 3178.0,
+            "98": 3865.0,
+            "99": 3143.0,
+            "100": 3357.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 482499072.0,
+            "2": 482499072.0,
+            "3": 482499072.0,
+            "4": 482499072.0,
+            "5": 482499072.0,
+            "6": 482499072.0,
+            "7": 482499072.0,
+            "8": 482499072.0,
+            "9": 482499072.0,
+            "10": 482499072.0,
+            "11": 482499072.0,
+            "12": 482499072.0,
+            "13": 482499072.0,
+            "14": 482499072.0,
+            "15": 482499072.0,
+            "16": 482499072.0,
+            "17": 482499072.0,
+            "18": 482499072.0,
+            "19": 482499072.0,
+            "20": 482499072.0,
+            "21": 482499072.0,
+            "22": 482499072.0,
+            "23": 482499072.0,
+            "24": 482499072.0,
+            "25": 482499072.0,
+            "26": 482499072.0,
+            "27": 482499072.0,
+            "28": 482499072.0,
+            "29": 482499072.0,
+            "30": 482499072.0,
+            "31": 482499072.0,
+            "32": 482499072.0,
+            "33": 482499072.0,
+            "34": 482499072.0,
+            "35": 482499072.0,
+            "36": 482499072.0,
+            "37": 482499072.0,
+            "38": 482499072.0,
+            "39": 482499072.0,
+            "40": 482499072.0,
+            "41": 482499072.0,
+            "42": 482499072.0,
+            "43": 482499072.0,
+            "44": 482499072.0,
+            "45": 482499072.0,
+            "46": 482499072.0,
+            "47": 482499072.0,
+            "48": 482499072.0,
+            "49": 482499072.0,
+            "50": 482499072.0,
+            "51": 482499072.0,
+            "52": 482499072.0,
+            "53": 482499072.0,
+            "54": 482499072.0,
+            "55": 482499072.0,
+            "56": 482499072.0,
+            "57": 482499072.0,
+            "58": 482499072.0,
+            "59": 482499072.0,
+            "60": 482499072.0,
+            "61": 482499072.0,
+            "62": 482499072.0,
+            "63": 482499072.0,
+            "64": 482499072.0,
+            "65": 482499072.0,
+            "66": 482499072.0,
+            "67": 482499072.0,
+            "68": 482499072.0,
+            "69": 482499072.0,
+            "70": 482499072.0,
+            "71": 482499072.0,
+            "72": 482499072.0,
+            "73": 482499072.0,
+            "74": 482499072.0,
+            "75": 482499072.0,
+            "76": 482499072.0,
+            "77": 482499072.0,
+            "78": 482499072.0,
+            "79": 482499072.0,
+            "80": 482499072.0,
+            "81": 482499072.0,
+            "82": 482499072.0,
+            "83": 482499072.0,
+            "84": 482499072.0,
+            "85": 482499072.0,
+            "86": 482499072.0,
+            "87": 482499072.0,
+            "88": 482499072.0,
+            "89": 482499072.0,
+            "90": 482499072.0,
+            "91": 482499072.0,
+            "92": 482499072.0,
+            "93": 482499072.0,
+            "94": 482499072.0,
+            "95": 482499072.0,
+            "96": 482499072.0,
+            "97": 482499072.0,
+            "98": 482499072.0,
+            "99": 482499072.0,
+            "100": 482499072.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1222004224.0,
+            "2": 1401029120.0,
+            "3": 1401029120.0,
+            "4": 1401029120.0,
+            "5": 1401029120.0,
+            "6": 1401029120.0,
+            "7": 1401029120.0,
+            "8": 1401029120.0,
+            "9": 1401029120.0,
+            "10": 1401029120.0,
+            "11": 1401029120.0,
+            "12": 1401029120.0,
+            "13": 1401029120.0,
+            "14": 1401029120.0,
+            "15": 1401029120.0,
+            "16": 1401029120.0,
+            "17": 1401029120.0,
+            "18": 1401029120.0,
+            "19": 1401029120.0,
+            "20": 1401029120.0,
+            "21": 1401029120.0,
+            "22": 1401029120.0,
+            "23": 1401029120.0,
+            "24": 1401029120.0,
+            "25": 1401029120.0,
+            "26": 1401029120.0,
+            "27": 1401029120.0,
+            "28": 1401029120.0,
+            "29": 1401029120.0,
+            "30": 1401029120.0,
+            "31": 1401029120.0,
+            "32": 1401029120.0,
+            "33": 1401029120.0,
+            "34": 1401029120.0,
+            "35": 1401029120.0,
+            "36": 1401029120.0,
+            "37": 1401029120.0,
+            "38": 1401029120.0,
+            "39": 1401029120.0,
+            "40": 1401029120.0,
+            "41": 1401029120.0,
+            "42": 1401029120.0,
+            "43": 1401029120.0,
+            "44": 1401029120.0,
+            "45": 1401029120.0,
+            "46": 1401029120.0,
+            "47": 1401029120.0,
+            "48": 1401029120.0,
+            "49": 1401029120.0,
+            "50": 1401029120.0,
+            "51": 1401029120.0,
+            "52": 1401029120.0,
+            "53": 1401029120.0,
+            "54": 1401029120.0,
+            "55": 1401029120.0,
+            "56": 1401029120.0,
+            "57": 1401029120.0,
+            "58": 1401029120.0,
+            "59": 1401029120.0,
+            "60": 1401029120.0,
+            "61": 1401029120.0,
+            "62": 1401029120.0,
+            "63": 1401029120.0,
+            "64": 1401029120.0,
+            "65": 1401029120.0,
+            "66": 1401029120.0,
+            "67": 1401029120.0,
+            "68": 1401029120.0,
+            "69": 1401029120.0,
+            "70": 1401029120.0,
+            "71": 1401029120.0,
+            "72": 1401029120.0,
+            "73": 1401029120.0,
+            "74": 1401029120.0,
+            "75": 1401029120.0,
+            "76": 1401029120.0,
+            "77": 1401029120.0,
+            "78": 1401029120.0,
+            "79": 1401029120.0,
+            "80": 1401029120.0,
+            "81": 1401029120.0,
+            "82": 1401029120.0,
+            "83": 1401029120.0,
+            "84": 1401029120.0,
+            "85": 1401029120.0,
+            "86": 1401029120.0,
+            "87": 1401029120.0,
+            "88": 1401029120.0,
+            "89": 1401029120.0,
+            "90": 1401029120.0,
+            "91": 1401029120.0,
+            "92": 1401029120.0,
+            "93": 1401029120.0,
+            "94": 1401029120.0,
+            "95": 1401029120.0,
+            "96": 1401029120.0,
+            "97": 1401029120.0,
+            "98": 1401029120.0,
+            "99": 1401029120.0,
+            "100": 1401029120.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 8.51486,
+            "2": 0.24464,
+            "3": 0.21825,
+            "4": 0.21579,
+            "5": 0.21661,
+            "6": 0.22001,
+            "7": 0.22285,
+            "8": 0.21811,
+            "9": 0.21904,
+            "10": 0.22002,
+            "11": 0.21823,
+            "12": 0.21823,
+            "13": 0.21801,
+            "14": 0.2183,
+            "15": 0.21803,
+            "16": 0.21896,
+            "17": 0.22491,
+            "18": 0.21905,
+            "19": 0.21958,
+            "20": 0.21884,
+            "21": 0.21909,
+            "22": 0.21942,
+            "23": 0.21797,
+            "24": 0.21855,
+            "25": 0.21901,
+            "26": 0.21791,
+            "27": 0.2177,
+            "28": 0.2177,
+            "29": 0.21821,
+            "30": 0.21868,
+            "31": 0.21834,
+            "32": 0.22204,
+            "33": 0.22091,
+            "34": 0.21934,
+            "35": 0.21783,
+            "36": 0.21796,
+            "37": 0.21775,
+            "38": 0.22135,
+            "39": 0.22172,
+            "40": 0.21756,
+            "41": 0.2177,
+            "42": 0.21859,
+            "43": 0.21823,
+            "44": 0.21755,
+            "45": 0.21785,
+            "46": 0.21785,
+            "47": 0.21753,
+            "48": 0.21811,
+            "49": 0.21778,
+            "50": 0.218,
+            "51": 0.22344,
+            "52": 0.21422,
+            "53": 0.21383,
+            "54": 0.21399,
+            "55": 0.21397,
+            "56": 0.21365,
+            "57": 0.21387,
+            "58": 0.20817,
+            "59": 0.20847,
+            "60": 0.20906,
+            "61": 0.20895,
+            "62": 0.20893,
+            "63": 0.20812,
+            "64": 0.20751,
+            "65": 0.20797,
+            "66": 0.2078,
+            "67": 0.20815,
+            "68": 0.20705,
+            "69": 0.20775,
+            "70": 0.20774,
+            "71": 0.2069,
+            "72": 0.20722,
+            "73": 0.20718,
+            "74": 0.2072,
+            "75": 0.20697,
+            "76": 0.20716,
+            "77": 0.20758,
+            "78": 0.20779,
+            "79": 0.20721,
+            "80": 0.20748,
+            "81": 0.20797,
+            "82": 0.20844,
+            "83": 0.20732,
+            "84": 0.20744,
+            "85": 0.20706,
+            "86": 0.20715,
+            "87": 0.20731,
+            "88": 0.20708,
+            "89": 0.20778,
+            "90": 0.20769,
+            "91": 0.2074,
+            "92": 0.20797,
+            "93": 0.20707,
+            "94": 0.2072,
+            "95": 0.20676,
+            "96": 0.20732,
+            "97": 0.20728,
+            "98": 0.20675,
+            "99": 0.2068,
+            "100": 0.20724
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader/golden_values_lts_dgx_a100.json
index 339f9b5b061..d6134cdcc5a 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader/golden_values_lts_dgx_a100.json
@@ -1 +1,537 @@
-{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.92705, "5": 10.92795, "10": 10.90786, "15": 10.88314, "20": 10.77629, "25": 10.59141, "30": 10.39192, "35": 10.29686, "40": 10.0964, "45": 9.84464, "50": 9.90918, "55": 9.87762, "60": 9.49121, "65": 8.94236, "70": 9.72266, "75": 9.41912, "80": 9.40077, "85": 9.61207, "90": 9.81015, "95": 9.51716, "100": 9.4015}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1627.0, "5": 2010.0, "10": 1368.0, "15": 1897.0, "20": 1626.0, "25": 1743.0, "30": 1930.0, "35": 1954.0, "40": 2199.0, "45": 2068.0, "50": 2460.0, "55": 2427.0, "60": 2380.0, "65": 2657.0, "70": 3265.0, "75": 2675.0, "80": 3434.0, "85": 3302.0, "90": 3230.0, "95": 3340.0, "100": 3291.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 486047744.0, "5": 486047744.0, "10": 486047744.0, "15": 486047744.0, "20": 486047744.0, "25": 486047744.0, "30": 486047744.0, "35": 486047744.0, "40": 486047744.0, "45": 486047744.0, "50": 486047744.0, "55": 486047744.0, "60": 486047744.0, "65": 486047744.0, "70": 486047744.0, "75": 486047744.0, "80": 486047744.0, "85": 486047744.0, "90": 486047744.0, "95": 486047744.0, "100": 486047744.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1715890176.0, "5": 1895963648.0, "10": 1895963648.0, "15": 1895963648.0, "20": 1895963648.0, "25": 1895963648.0, "30": 1895963648.0, "35": 1895963648.0, "40": 1895963648.0, "45": 1895963648.0, "50": 1895963648.0, "55": 1895963648.0, "60": 1895963648.0, "65": 1895963648.0, "70": 1895963648.0, "75": 1895963648.0, "80": 1895963648.0, "85": 1895963648.0, "90": 1895963648.0, "95": 1895963648.0, "100": 1895963648.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 12.92657, "5": 0.21658, "10": 0.20915, "15": 0.2079, "20": 0.21595, "25": 0.21334, "30": 0.21734, "35": 0.21646, "40": 0.21045, "45": 0.20732, "50": 0.2058, "55": 0.20444, "60": 0.20486, "65": 0.20503, "70": 0.20714, "75": 0.20517, "80": 0.2067, "85": 0.20664, "90": 0.20761, "95": 0.20379, "100": 0.20501}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.92228,
+            "2": 10.92833,
+            "3": 10.91713,
+            "4": 10.90495,
+            "5": 10.92807,
+            "6": 10.93673,
+            "7": 10.90406,
+            "8": 10.92229,
+            "9": 10.91254,
+            "10": 10.90852,
+            "11": 10.89335,
+            "12": 10.9208,
+            "13": 10.91496,
+            "14": 10.92145,
+            "15": 10.88435,
+            "16": 10.87455,
+            "17": 10.83916,
+            "18": 10.87309,
+            "19": 10.85325,
+            "20": 10.77493,
+            "21": 10.74754,
+            "22": 10.63148,
+            "23": 10.75622,
+            "24": 10.65569,
+            "25": 10.59216,
+            "26": 10.65327,
+            "27": 10.64876,
+            "28": 10.59656,
+            "29": 10.61011,
+            "30": 10.39286,
+            "31": 10.15725,
+            "32": 10.49217,
+            "33": 10.47941,
+            "34": 10.24014,
+            "35": 10.2971,
+            "36": 10.24566,
+            "37": 10.35283,
+            "38": 10.20534,
+            "39": 10.40418,
+            "40": 10.09553,
+            "41": 10.15279,
+            "42": 10.2188,
+            "43": 9.85527,
+            "44": 9.96244,
+            "45": 9.84613,
+            "46": 9.83799,
+            "47": 10.13884,
+            "48": 9.85689,
+            "49": 9.53747,
+            "50": 9.90876,
+            "51": 9.84971,
+            "52": 9.74156,
+            "53": 10.06322,
+            "54": 9.94581,
+            "55": 9.87731,
+            "56": 9.62746,
+            "57": 9.47259,
+            "58": 9.82912,
+            "59": 9.583,
+            "60": 9.49181,
+            "61": 9.69961,
+            "62": 9.98089,
+            "63": 9.37212,
+            "64": 9.7756,
+            "65": 8.9433,
+            "66": 9.69993,
+            "67": 9.36414,
+            "68": 9.78706,
+            "69": 9.78397,
+            "70": 9.72288,
+            "71": 9.60749,
+            "72": 9.58416,
+            "73": 9.49093,
+            "74": 8.94864,
+            "75": 9.41807,
+            "76": 9.08721,
+            "77": 10.06283,
+            "78": 9.729,
+            "79": 9.37091,
+            "80": 9.40033,
+            "81": 9.47754,
+            "82": 9.69121,
+            "83": 9.30762,
+            "84": 9.41252,
+            "85": 9.61132,
+            "86": 9.07621,
+            "87": 9.59459,
+            "88": 9.74768,
+            "89": 9.6068,
+            "90": 9.81078,
+            "91": 9.34441,
+            "92": 9.36535,
+            "93": 9.07743,
+            "94": 8.82975,
+            "95": 9.51676,
+            "96": 9.52546,
+            "97": 9.31031,
+            "98": 9.67812,
+            "99": 8.88848,
+            "100": 9.40128
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1681.0,
+            "2": 1822.0,
+            "3": 1757.0,
+            "4": 1751.0,
+            "5": 1937.0,
+            "6": 1886.0,
+            "7": 1837.0,
+            "8": 1786.0,
+            "9": 1935.0,
+            "10": 1542.0,
+            "11": 1896.0,
+            "12": 1808.0,
+            "13": 1960.0,
+            "14": 1846.0,
+            "15": 1948.0,
+            "16": 1903.0,
+            "17": 1900.0,
+            "18": 1725.0,
+            "19": 1812.0,
+            "20": 1647.0,
+            "21": 1886.0,
+            "22": 1675.0,
+            "23": 1992.0,
+            "24": 1621.0,
+            "25": 1624.0,
+            "26": 1778.0,
+            "27": 1925.0,
+            "28": 1996.0,
+            "29": 2074.0,
+            "30": 1899.0,
+            "31": 1539.0,
+            "32": 1956.0,
+            "33": 2254.0,
+            "34": 1927.0,
+            "35": 2029.0,
+            "36": 1986.0,
+            "37": 2377.0,
+            "38": 2234.0,
+            "39": 2396.0,
+            "40": 2123.0,
+            "41": 2316.0,
+            "42": 2245.0,
+            "43": 2077.0,
+            "44": 2179.0,
+            "45": 2078.0,
+            "46": 2280.0,
+            "47": 2573.0,
+            "48": 2440.0,
+            "49": 2213.0,
+            "50": 2532.0,
+            "51": 2735.0,
+            "52": 2607.0,
+            "53": 2951.0,
+            "54": 2672.0,
+            "55": 2451.0,
+            "56": 2712.0,
+            "57": 2392.0,
+            "58": 2979.0,
+            "59": 2869.0,
+            "60": 2435.0,
+            "61": 2938.0,
+            "62": 2669.0,
+            "63": 2392.0,
+            "64": 2998.0,
+            "65": 2689.0,
+            "66": 3285.0,
+            "67": 2782.0,
+            "68": 2753.0,
+            "69": 2958.0,
+            "70": 3271.0,
+            "71": 3040.0,
+            "72": 2504.0,
+            "73": 3096.0,
+            "74": 1910.0,
+            "75": 2617.0,
+            "76": 3081.0,
+            "77": 3390.0,
+            "78": 3186.0,
+            "79": 3320.0,
+            "80": 3483.0,
+            "81": 3782.0,
+            "82": 3516.0,
+            "83": 2864.0,
+            "84": 3396.0,
+            "85": 3247.0,
+            "86": 2785.0,
+            "87": 3762.0,
+            "88": 3102.0,
+            "89": 3483.0,
+            "90": 3076.0,
+            "91": 2643.0,
+            "92": 3198.0,
+            "93": 2666.0,
+            "94": 3390.0,
+            "95": 3410.0,
+            "96": 3508.0,
+            "97": 3178.0,
+            "98": 3865.0,
+            "99": 3143.0,
+            "100": 3357.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 482499072.0,
+            "2": 482499072.0,
+            "3": 482499072.0,
+            "4": 482499072.0,
+            "5": 482499072.0,
+            "6": 482499072.0,
+            "7": 482499072.0,
+            "8": 482499072.0,
+            "9": 482499072.0,
+            "10": 482499072.0,
+            "11": 482499072.0,
+            "12": 482499072.0,
+            "13": 482499072.0,
+            "14": 482499072.0,
+            "15": 482499072.0,
+            "16": 482499072.0,
+            "17": 482499072.0,
+            "18": 482499072.0,
+            "19": 482499072.0,
+            "20": 482499072.0,
+            "21": 482499072.0,
+            "22": 482499072.0,
+            "23": 482499072.0,
+            "24": 482499072.0,
+            "25": 482499072.0,
+            "26": 482499072.0,
+            "27": 482499072.0,
+            "28": 482499072.0,
+            "29": 482499072.0,
+            "30": 482499072.0,
+            "31": 482499072.0,
+            "32": 482499072.0,
+            "33": 482499072.0,
+            "34": 482499072.0,
+            "35": 482499072.0,
+            "36": 482499072.0,
+            "37": 482499072.0,
+            "38": 482499072.0,
+            "39": 482499072.0,
+            "40": 482499072.0,
+            "41": 482499072.0,
+            "42": 482499072.0,
+            "43": 482499072.0,
+            "44": 482499072.0,
+            "45": 482499072.0,
+            "46": 482499072.0,
+            "47": 482499072.0,
+            "48": 482499072.0,
+            "49": 482499072.0,
+            "50": 482499072.0,
+            "51": 482499072.0,
+            "52": 482499072.0,
+            "53": 482499072.0,
+            "54": 482499072.0,
+            "55": 482499072.0,
+            "56": 482499072.0,
+            "57": 482499072.0,
+            "58": 482499072.0,
+            "59": 482499072.0,
+            "60": 482499072.0,
+            "61": 482499072.0,
+            "62": 482499072.0,
+            "63": 482499072.0,
+            "64": 482499072.0,
+            "65": 482499072.0,
+            "66": 482499072.0,
+            "67": 482499072.0,
+            "68": 482499072.0,
+            "69": 482499072.0,
+            "70": 482499072.0,
+            "71": 482499072.0,
+            "72": 482499072.0,
+            "73": 482499072.0,
+            "74": 482499072.0,
+            "75": 482499072.0,
+            "76": 482499072.0,
+            "77": 482499072.0,
+            "78": 482499072.0,
+            "79": 482499072.0,
+            "80": 482499072.0,
+            "81": 482499072.0,
+            "82": 482499072.0,
+            "83": 482499072.0,
+            "84": 482499072.0,
+            "85": 482499072.0,
+            "86": 482499072.0,
+            "87": 482499072.0,
+            "88": 482499072.0,
+            "89": 482499072.0,
+            "90": 482499072.0,
+            "91": 482499072.0,
+            "92": 482499072.0,
+            "93": 482499072.0,
+            "94": 482499072.0,
+            "95": 482499072.0,
+            "96": 482499072.0,
+            "97": 482499072.0,
+            "98": 482499072.0,
+            "99": 482499072.0,
+            "100": 482499072.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1222004224.0,
+            "2": 1401029120.0,
+            "3": 1401029120.0,
+            "4": 1401029120.0,
+            "5": 1401029120.0,
+            "6": 1401029120.0,
+            "7": 1401029120.0,
+            "8": 1401029120.0,
+            "9": 1401029120.0,
+            "10": 1401029120.0,
+            "11": 1401029120.0,
+            "12": 1401029120.0,
+            "13": 1401029120.0,
+            "14": 1401029120.0,
+            "15": 1401029120.0,
+            "16": 1401029120.0,
+            "17": 1401029120.0,
+            "18": 1401029120.0,
+            "19": 1401029120.0,
+            "20": 1401029120.0,
+            "21": 1401029120.0,
+            "22": 1401029120.0,
+            "23": 1401029120.0,
+            "24": 1401029120.0,
+            "25": 1401029120.0,
+            "26": 1401029120.0,
+            "27": 1401029120.0,
+            "28": 1401029120.0,
+            "29": 1401029120.0,
+            "30": 1401029120.0,
+            "31": 1401029120.0,
+            "32": 1401029120.0,
+            "33": 1401029120.0,
+            "34": 1401029120.0,
+            "35": 1401029120.0,
+            "36": 1401029120.0,
+            "37": 1401029120.0,
+            "38": 1401029120.0,
+            "39": 1401029120.0,
+            "40": 1401029120.0,
+            "41": 1401029120.0,
+            "42": 1401029120.0,
+            "43": 1401029120.0,
+            "44": 1401029120.0,
+            "45": 1401029120.0,
+            "46": 1401029120.0,
+            "47": 1401029120.0,
+            "48": 1401029120.0,
+            "49": 1401029120.0,
+            "50": 1401029120.0,
+            "51": 1401029120.0,
+            "52": 1401029120.0,
+            "53": 1401029120.0,
+            "54": 1401029120.0,
+            "55": 1401029120.0,
+            "56": 1401029120.0,
+            "57": 1401029120.0,
+            "58": 1401029120.0,
+            "59": 1401029120.0,
+            "60": 1401029120.0,
+            "61": 1401029120.0,
+            "62": 1401029120.0,
+            "63": 1401029120.0,
+            "64": 1401029120.0,
+            "65": 1401029120.0,
+            "66": 1401029120.0,
+            "67": 1401029120.0,
+            "68": 1401029120.0,
+            "69": 1401029120.0,
+            "70": 1401029120.0,
+            "71": 1401029120.0,
+            "72": 1401029120.0,
+            "73": 1401029120.0,
+            "74": 1401029120.0,
+            "75": 1401029120.0,
+            "76": 1401029120.0,
+            "77": 1401029120.0,
+            "78": 1401029120.0,
+            "79": 1401029120.0,
+            "80": 1401029120.0,
+            "81": 1401029120.0,
+            "82": 1401029120.0,
+            "83": 1401029120.0,
+            "84": 1401029120.0,
+            "85": 1401029120.0,
+            "86": 1401029120.0,
+            "87": 1401029120.0,
+            "88": 1401029120.0,
+            "89": 1401029120.0,
+            "90": 1401029120.0,
+            "91": 1401029120.0,
+            "92": 1401029120.0,
+            "93": 1401029120.0,
+            "94": 1401029120.0,
+            "95": 1401029120.0,
+            "96": 1401029120.0,
+            "97": 1401029120.0,
+            "98": 1401029120.0,
+            "99": 1401029120.0,
+            "100": 1401029120.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 8.20377,
+            "2": 0.2288,
+            "3": 0.19616,
+            "4": 0.19587,
+            "5": 0.19737,
+            "6": 0.19775,
+            "7": 0.19658,
+            "8": 0.19621,
+            "9": 0.19557,
+            "10": 0.19534,
+            "11": 0.19453,
+            "12": 0.1949,
+            "13": 0.19522,
+            "14": 0.19865,
+            "15": 0.20415,
+            "16": 0.19686,
+            "17": 0.1985,
+            "18": 0.19858,
+            "19": 0.19709,
+            "20": 0.19609,
+            "21": 0.19758,
+            "22": 0.19837,
+            "23": 0.19786,
+            "24": 0.19688,
+            "25": 0.1972,
+            "26": 0.19859,
+            "27": 0.19814,
+            "28": 0.1989,
+            "29": 0.1984,
+            "30": 0.19783,
+            "31": 0.19727,
+            "32": 0.19754,
+            "33": 0.19648,
+            "34": 0.19977,
+            "35": 0.19847,
+            "36": 0.19696,
+            "37": 0.20498,
+            "38": 0.20415,
+            "39": 0.20225,
+            "40": 0.19712,
+            "41": 0.19751,
+            "42": 0.19764,
+            "43": 0.19738,
+            "44": 0.19703,
+            "45": 0.19703,
+            "46": 0.19814,
+            "47": 0.19757,
+            "48": 0.19759,
+            "49": 0.19688,
+            "50": 0.20181,
+            "51": 0.22215,
+            "52": 0.2134,
+            "53": 0.2129,
+            "54": 0.2133,
+            "55": 0.21255,
+            "56": 0.21221,
+            "57": 0.21233,
+            "58": 0.2124,
+            "59": 0.21242,
+            "60": 0.21258,
+            "61": 0.21219,
+            "62": 0.21255,
+            "63": 0.21385,
+            "64": 0.2127,
+            "65": 0.21252,
+            "66": 0.21191,
+            "67": 0.21327,
+            "68": 0.21176,
+            "69": 0.2127,
+            "70": 0.21284,
+            "71": 0.21291,
+            "72": 0.21265,
+            "73": 0.21221,
+            "74": 0.21387,
+            "75": 0.21247,
+            "76": 0.21204,
+            "77": 0.21169,
+            "78": 0.21259,
+            "79": 0.21196,
+            "80": 0.21204,
+            "81": 0.21211,
+            "82": 0.21314,
+            "83": 0.21268,
+            "84": 0.21291,
+            "85": 0.21328,
+            "86": 0.2128,
+            "87": 0.21213,
+            "88": 0.21192,
+            "89": 0.21242,
+            "90": 0.21253,
+            "91": 0.21252,
+            "92": 0.21236,
+            "93": 0.21254,
+            "94": 0.21255,
+            "95": 0.21209,
+            "96": 0.21345,
+            "97": 0.21202,
+            "98": 0.21234,
+            "99": 0.21237,
+            "100": 0.21317
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts.json
new file mode 100644
index 00000000000..b276f849ced
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.85961,
+            "2": 10.85774,
+            "3": 10.85857,
+            "4": 10.84879,
+            "5": 10.88247,
+            "6": 10.88701,
+            "7": 10.86152,
+            "8": 10.86792,
+            "9": 10.86512,
+            "10": 10.83742,
+            "11": 10.87758,
+            "12": 10.87232,
+            "13": 10.87849,
+            "14": 10.88848,
+            "15": 10.82866,
+            "16": 10.8307,
+            "17": 10.79817,
+            "18": 10.82417,
+            "19": 10.81181,
+            "20": 10.72691,
+            "21": 10.7183,
+            "22": 10.57059,
+            "23": 10.72817,
+            "24": 10.61403,
+            "25": 10.55842,
+            "26": 10.60676,
+            "27": 10.61661,
+            "28": 10.57482,
+            "29": 10.59587,
+            "30": 10.37815,
+            "31": 10.13027,
+            "32": 10.47778,
+            "33": 10.46854,
+            "34": 10.22658,
+            "35": 10.27984,
+            "36": 10.22842,
+            "37": 10.35309,
+            "38": 10.20319,
+            "39": 10.4096,
+            "40": 10.0888,
+            "41": 10.15947,
+            "42": 10.2147,
+            "43": 9.84971,
+            "44": 9.96501,
+            "45": 9.82545,
+            "46": 9.83484,
+            "47": 10.15515,
+            "48": 9.84462,
+            "49": 9.52863,
+            "50": 9.91253
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1816.0,
+            "2": 1884.0,
+            "3": 1906.0,
+            "4": 1901.0,
+            "5": 2061.0,
+            "6": 2186.0,
+            "7": 2145.0,
+            "8": 1775.0,
+            "9": 2106.0,
+            "10": 1630.0,
+            "11": 2236.0,
+            "12": 2015.0,
+            "13": 2168.0,
+            "14": 2021.0,
+            "15": 2088.0,
+            "16": 2152.0,
+            "17": 1976.0,
+            "18": 1952.0,
+            "19": 2046.0,
+            "20": 1740.0,
+            "21": 2073.0,
+            "22": 1810.0,
+            "23": 2186.0,
+            "24": 1839.0,
+            "25": 1681.0,
+            "26": 1962.0,
+            "27": 2009.0,
+            "28": 2204.0,
+            "29": 2264.0,
+            "30": 2117.0,
+            "31": 1873.0,
+            "32": 2178.0,
+            "33": 2372.0,
+            "34": 2127.0,
+            "35": 2225.0,
+            "36": 2140.0,
+            "37": 2649.0,
+            "38": 2602.0,
+            "39": 2607.0,
+            "40": 2574.0,
+            "41": 2503.0,
+            "42": 2532.0,
+            "43": 2285.0,
+            "44": 2313.0,
+            "45": 2423.0,
+            "46": 2563.0,
+            "47": 2856.0,
+            "48": 2629.0,
+            "49": 2566.0,
+            "50": 2651.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 269842944.0,
+            "2": 269842944.0,
+            "3": 269842944.0,
+            "4": 269842944.0,
+            "5": 269842944.0,
+            "6": 269842944.0,
+            "7": 269842944.0,
+            "8": 269842944.0,
+            "9": 269842944.0,
+            "10": 269842944.0,
+            "11": 269842944.0,
+            "12": 269842944.0,
+            "13": 269842944.0,
+            "14": 269842944.0,
+            "15": 269842944.0,
+            "16": 269842944.0,
+            "17": 269842944.0,
+            "18": 269842944.0,
+            "19": 269842944.0,
+            "20": 269842944.0,
+            "21": 269842944.0,
+            "22": 269842944.0,
+            "23": 269842944.0,
+            "24": 269842944.0,
+            "25": 269842944.0,
+            "26": 269842944.0,
+            "27": 269842944.0,
+            "28": 269842944.0,
+            "29": 269842944.0,
+            "30": 269842944.0,
+            "31": 269842944.0,
+            "32": 269842944.0,
+            "33": 269842944.0,
+            "34": 269842944.0,
+            "35": 269842944.0,
+            "36": 269842944.0,
+            "37": 269842944.0,
+            "38": 269842944.0,
+            "39": 269842944.0,
+            "40": 269842944.0,
+            "41": 269842944.0,
+            "42": 269842944.0,
+            "43": 269842944.0,
+            "44": 269842944.0,
+            "45": 269842944.0,
+            "46": 269842944.0,
+            "47": 269842944.0,
+            "48": 269842944.0,
+            "49": 269842944.0,
+            "50": 269842944.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1449634304.0,
+            "2": 1516021248.0,
+            "3": 1516021248.0,
+            "4": 1516021248.0,
+            "5": 1516021248.0,
+            "6": 1516021248.0,
+            "7": 1516021248.0,
+            "8": 1516021248.0,
+            "9": 1516021248.0,
+            "10": 1516021248.0,
+            "11": 1516021248.0,
+            "12": 1516021248.0,
+            "13": 1516021248.0,
+            "14": 1516021248.0,
+            "15": 1516021248.0,
+            "16": 1516021248.0,
+            "17": 1516021248.0,
+            "18": 1516021248.0,
+            "19": 1516021248.0,
+            "20": 1516021248.0,
+            "21": 1516021248.0,
+            "22": 1516021248.0,
+            "23": 1516021248.0,
+            "24": 1516021248.0,
+            "25": 1516021248.0,
+            "26": 1516021248.0,
+            "27": 1516021248.0,
+            "28": 1516021248.0,
+            "29": 1516021248.0,
+            "30": 1516021248.0,
+            "31": 1516021248.0,
+            "32": 1516021248.0,
+            "33": 1516021248.0,
+            "34": 1516021248.0,
+            "35": 1516021248.0,
+            "36": 1516021248.0,
+            "37": 1516021248.0,
+            "38": 1516021248.0,
+            "39": 1516021248.0,
+            "40": 1516021248.0,
+            "41": 1516021248.0,
+            "42": 1516021248.0,
+            "43": 1516021248.0,
+            "44": 1516021248.0,
+            "45": 1516021248.0,
+            "46": 1516021248.0,
+            "47": 1516021248.0,
+            "48": 1516021248.0,
+            "49": 1516021248.0,
+            "50": 1516021248.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 4.56269,
+            "2": 0.39732,
+            "3": 0.33618,
+            "4": 0.33333,
+            "5": 0.33363,
+            "6": 0.33477,
+            "7": 0.33191,
+            "8": 0.33146,
+            "9": 0.33302,
+            "10": 0.33134,
+            "11": 0.33151,
+            "12": 0.33246,
+            "13": 0.33088,
+            "14": 0.3303,
+            "15": 0.33382,
+            "16": 0.33069,
+            "17": 0.33094,
+            "18": 0.33118,
+            "19": 0.33644,
+            "20": 0.3305,
+            "21": 0.33088,
+            "22": 0.33128,
+            "23": 0.33088,
+            "24": 0.33099,
+            "25": 0.33033,
+            "26": 0.33052,
+            "27": 0.33152,
+            "28": 0.33036,
+            "29": 0.33036,
+            "30": 0.33116,
+            "31": 0.33249,
+            "32": 0.33284,
+            "33": 0.33031,
+            "34": 0.33138,
+            "35": 0.33068,
+            "36": 0.33706,
+            "37": 0.33218,
+            "38": 0.33033,
+            "39": 0.33382,
+            "40": 0.34395,
+            "41": 0.33213,
+            "42": 0.3316,
+            "43": 0.33261,
+            "44": 0.33133,
+            "45": 0.33097,
+            "46": 0.33137,
+            "47": 0.33245,
+            "48": 0.33171,
+            "49": 0.33138,
+            "50": 0.33185
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json
index 28f4a9f3d58..5dd18b2b701 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json
@@ -1 +1,287 @@
-{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.86122, "5": 10.88242, "10": 10.83506, "15": 10.82738, "20": 10.72743, "25": 10.55753, "30": 10.37895, "35": 10.28321, "40": 10.08785, "45": 9.82625, "50": 9.91327}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1778.0, "5": 2219.0, "10": 1530.0, "15": 2125.0, "20": 1867.0, "25": 1773.0, "30": 2129.0, "35": 2169.0, "40": 2486.0, "45": 2335.0, "50": 2687.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 269891584.0, "5": 269891584.0, "10": 269891584.0, "15": 269891584.0, "20": 269891584.0, "25": 269891584.0, "30": 269891584.0, "35": 269891584.0, "40": 269891584.0, "45": 269891584.0, "50": 269891584.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1450730496.0, "5": 1515676160.0, "10": 1515676672.0, "15": 1515676672.0, "20": 1515676672.0, "25": 1515676672.0, "30": 1515676672.0, "35": 1515676672.0, "40": 1515676672.0, "45": 1515676672.0, "50": 1515676672.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 11.10997, "5": 0.31331, "10": 0.32223, "15": 0.31308, "20": 0.3166, "25": 0.31574, "30": 0.31074, "35": 0.31512, "40": 0.31604, "45": 0.32221, "50": 0.31241}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.85961,
+            "2": 10.85774,
+            "3": 10.85857,
+            "4": 10.84879,
+            "5": 10.88247,
+            "6": 10.88701,
+            "7": 10.86152,
+            "8": 10.86792,
+            "9": 10.86512,
+            "10": 10.83742,
+            "11": 10.87758,
+            "12": 10.87232,
+            "13": 10.87849,
+            "14": 10.88848,
+            "15": 10.82866,
+            "16": 10.8307,
+            "17": 10.79817,
+            "18": 10.82417,
+            "19": 10.81181,
+            "20": 10.72691,
+            "21": 10.7183,
+            "22": 10.57059,
+            "23": 10.72817,
+            "24": 10.61403,
+            "25": 10.55842,
+            "26": 10.60676,
+            "27": 10.61661,
+            "28": 10.57482,
+            "29": 10.59587,
+            "30": 10.37815,
+            "31": 10.13027,
+            "32": 10.47778,
+            "33": 10.46854,
+            "34": 10.22658,
+            "35": 10.27984,
+            "36": 10.22842,
+            "37": 10.35309,
+            "38": 10.20319,
+            "39": 10.4096,
+            "40": 10.0888,
+            "41": 10.15947,
+            "42": 10.2147,
+            "43": 9.84971,
+            "44": 9.96501,
+            "45": 9.82545,
+            "46": 9.83484,
+            "47": 10.15515,
+            "48": 9.84462,
+            "49": 9.52863,
+            "50": 9.91253
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1816.0,
+            "2": 1884.0,
+            "3": 1906.0,
+            "4": 1901.0,
+            "5": 2061.0,
+            "6": 2186.0,
+            "7": 2145.0,
+            "8": 1775.0,
+            "9": 2106.0,
+            "10": 1630.0,
+            "11": 2236.0,
+            "12": 2015.0,
+            "13": 2168.0,
+            "14": 2021.0,
+            "15": 2088.0,
+            "16": 2152.0,
+            "17": 1976.0,
+            "18": 1952.0,
+            "19": 2046.0,
+            "20": 1740.0,
+            "21": 2073.0,
+            "22": 1810.0,
+            "23": 2186.0,
+            "24": 1839.0,
+            "25": 1681.0,
+            "26": 1962.0,
+            "27": 2009.0,
+            "28": 2204.0,
+            "29": 2264.0,
+            "30": 2117.0,
+            "31": 1873.0,
+            "32": 2178.0,
+            "33": 2372.0,
+            "34": 2127.0,
+            "35": 2225.0,
+            "36": 2140.0,
+            "37": 2649.0,
+            "38": 2602.0,
+            "39": 2607.0,
+            "40": 2574.0,
+            "41": 2503.0,
+            "42": 2532.0,
+            "43": 2285.0,
+            "44": 2313.0,
+            "45": 2423.0,
+            "46": 2563.0,
+            "47": 2856.0,
+            "48": 2629.0,
+            "49": 2566.0,
+            "50": 2651.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 269842944.0,
+            "2": 269842944.0,
+            "3": 269842944.0,
+            "4": 269842944.0,
+            "5": 269842944.0,
+            "6": 269842944.0,
+            "7": 269842944.0,
+            "8": 269842944.0,
+            "9": 269842944.0,
+            "10": 269842944.0,
+            "11": 269842944.0,
+            "12": 269842944.0,
+            "13": 269842944.0,
+            "14": 269842944.0,
+            "15": 269842944.0,
+            "16": 269842944.0,
+            "17": 269842944.0,
+            "18": 269842944.0,
+            "19": 269842944.0,
+            "20": 269842944.0,
+            "21": 269842944.0,
+            "22": 269842944.0,
+            "23": 269842944.0,
+            "24": 269842944.0,
+            "25": 269842944.0,
+            "26": 269842944.0,
+            "27": 269842944.0,
+            "28": 269842944.0,
+            "29": 269842944.0,
+            "30": 269842944.0,
+            "31": 269842944.0,
+            "32": 269842944.0,
+            "33": 269842944.0,
+            "34": 269842944.0,
+            "35": 269842944.0,
+            "36": 269842944.0,
+            "37": 269842944.0,
+            "38": 269842944.0,
+            "39": 269842944.0,
+            "40": 269842944.0,
+            "41": 269842944.0,
+            "42": 269842944.0,
+            "43": 269842944.0,
+            "44": 269842944.0,
+            "45": 269842944.0,
+            "46": 269842944.0,
+            "47": 269842944.0,
+            "48": 269842944.0,
+            "49": 269842944.0,
+            "50": 269842944.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1449634304.0,
+            "2": 1516021248.0,
+            "3": 1516021248.0,
+            "4": 1516021248.0,
+            "5": 1516021248.0,
+            "6": 1516021248.0,
+            "7": 1516021248.0,
+            "8": 1516021248.0,
+            "9": 1516021248.0,
+            "10": 1516021248.0,
+            "11": 1516021248.0,
+            "12": 1516021248.0,
+            "13": 1516021248.0,
+            "14": 1516021248.0,
+            "15": 1516021248.0,
+            "16": 1516021248.0,
+            "17": 1516021248.0,
+            "18": 1516021248.0,
+            "19": 1516021248.0,
+            "20": 1516021248.0,
+            "21": 1516021248.0,
+            "22": 1516021248.0,
+            "23": 1516021248.0,
+            "24": 1516021248.0,
+            "25": 1516021248.0,
+            "26": 1516021248.0,
+            "27": 1516021248.0,
+            "28": 1516021248.0,
+            "29": 1516021248.0,
+            "30": 1516021248.0,
+            "31": 1516021248.0,
+            "32": 1516021248.0,
+            "33": 1516021248.0,
+            "34": 1516021248.0,
+            "35": 1516021248.0,
+            "36": 1516021248.0,
+            "37": 1516021248.0,
+            "38": 1516021248.0,
+            "39": 1516021248.0,
+            "40": 1516021248.0,
+            "41": 1516021248.0,
+            "42": 1516021248.0,
+            "43": 1516021248.0,
+            "44": 1516021248.0,
+            "45": 1516021248.0,
+            "46": 1516021248.0,
+            "47": 1516021248.0,
+            "48": 1516021248.0,
+            "49": 1516021248.0,
+            "50": 1516021248.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 4.8193,
+            "2": 0.36983,
+            "3": 0.32405,
+            "4": 0.32179,
+            "5": 0.32037,
+            "6": 0.32162,
+            "7": 0.32479,
+            "8": 0.32031,
+            "9": 0.32398,
+            "10": 0.32296,
+            "11": 0.32125,
+            "12": 0.32185,
+            "13": 0.323,
+            "14": 0.32307,
+            "15": 0.32035,
+            "16": 0.31953,
+            "17": 0.32119,
+            "18": 0.32536,
+            "19": 0.32368,
+            "20": 0.32071,
+            "21": 0.32043,
+            "22": 0.32093,
+            "23": 0.32096,
+            "24": 0.31999,
+            "25": 0.32046,
+            "26": 0.31988,
+            "27": 0.32184,
+            "28": 0.32107,
+            "29": 0.32078,
+            "30": 0.32174,
+            "31": 0.32345,
+            "32": 0.32975,
+            "33": 0.32181,
+            "34": 0.32294,
+            "35": 0.32426,
+            "36": 0.32184,
+            "37": 0.32175,
+            "38": 0.32222,
+            "39": 0.32058,
+            "40": 0.32111,
+            "41": 0.33546,
+            "42": 0.32505,
+            "43": 0.32502,
+            "44": 0.32486,
+            "45": 0.32683,
+            "46": 0.32331,
+            "47": 0.322,
+            "48": 0.32205,
+            "49": 0.32128,
+            "50": 0.32053
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts.json
new file mode 100644
index 00000000000..e2187c31d21
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.85932,
+            "2": 10.85709,
+            "3": 10.85896,
+            "4": 10.84972,
+            "5": 10.88254,
+            "6": 10.88712,
+            "7": 10.86187,
+            "8": 10.86848,
+            "9": 10.86474,
+            "10": 10.83743,
+            "11": 10.87761,
+            "12": 10.87275,
+            "13": 10.87805,
+            "14": 10.88829,
+            "15": 10.8292,
+            "16": 10.83093,
+            "17": 10.79858,
+            "18": 10.82391,
+            "19": 10.8112,
+            "20": 10.72714,
+            "21": 10.71846,
+            "22": 10.57021,
+            "23": 10.72839,
+            "24": 10.6137,
+            "25": 10.55895,
+            "26": 10.60667,
+            "27": 10.61746,
+            "28": 10.57512,
+            "29": 10.59634,
+            "30": 10.37833,
+            "31": 10.13006,
+            "32": 10.47788,
+            "33": 10.46827,
+            "34": 10.22647,
+            "35": 10.28047,
+            "36": 10.22818,
+            "37": 10.35344,
+            "38": 10.203,
+            "39": 10.40996,
+            "40": 10.08874,
+            "41": 10.15951,
+            "42": 10.2151,
+            "43": 9.84978,
+            "44": 9.96524,
+            "45": 9.82532,
+            "46": 9.83508,
+            "47": 10.15501,
+            "48": 9.8447,
+            "49": 9.5289,
+            "50": 9.91268,
+            "51": 9.85065,
+            "52": 9.7464,
+            "53": 10.07271,
+            "54": 9.95757,
+            "55": 9.87725,
+            "56": 9.62951,
+            "57": 9.48816,
+            "58": 9.83239,
+            "59": 9.58985,
+            "60": 9.50827,
+            "61": 9.6947,
+            "62": 9.99304,
+            "63": 9.37511,
+            "64": 9.77996,
+            "65": 8.95215,
+            "66": 9.71323,
+            "67": 9.37884,
+            "68": 9.78794,
+            "69": 9.79078,
+            "70": 9.7308,
+            "71": 9.61793,
+            "72": 9.59094,
+            "73": 9.49435,
+            "74": 8.94865,
+            "75": 9.43606,
+            "76": 9.09894,
+            "77": 10.06437,
+            "78": 9.73006,
+            "79": 9.37771,
+            "80": 9.41266,
+            "81": 9.4854,
+            "82": 9.69576,
+            "83": 9.32017,
+            "84": 9.42235,
+            "85": 9.61578,
+            "86": 9.07218,
+            "87": 9.59328,
+            "88": 9.7509,
+            "89": 9.61159,
+            "90": 9.82148,
+            "91": 9.35304,
+            "92": 9.36254,
+            "93": 9.08747,
+            "94": 8.83398,
+            "95": 9.51923,
+            "96": 9.52595,
+            "97": 9.31413,
+            "98": 9.67414,
+            "99": 8.88869,
+            "100": 9.40651
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1754.0,
+            "2": 1848.0,
+            "3": 1845.0,
+            "4": 1871.0,
+            "5": 2173.0,
+            "6": 2054.0,
+            "7": 2021.0,
+            "8": 1737.0,
+            "9": 2044.0,
+            "10": 1524.0,
+            "11": 2198.0,
+            "12": 2030.0,
+            "13": 2211.0,
+            "14": 2094.0,
+            "15": 2036.0,
+            "16": 2165.0,
+            "17": 2044.0,
+            "18": 1842.0,
+            "19": 2002.0,
+            "20": 1774.0,
+            "21": 1976.0,
+            "22": 1869.0,
+            "23": 2232.0,
+            "24": 1769.0,
+            "25": 1714.0,
+            "26": 1866.0,
+            "27": 2106.0,
+            "28": 2287.0,
+            "29": 2221.0,
+            "30": 1967.0,
+            "31": 1788.0,
+            "32": 2187.0,
+            "33": 2432.0,
+            "34": 2059.0,
+            "35": 2308.0,
+            "36": 2173.0,
+            "37": 2619.0,
+            "38": 2572.0,
+            "39": 2618.0,
+            "40": 2548.0,
+            "41": 2531.0,
+            "42": 2447.0,
+            "43": 2283.0,
+            "44": 2358.0,
+            "45": 2398.0,
+            "46": 2572.0,
+            "47": 2818.0,
+            "48": 2599.0,
+            "49": 2579.0,
+            "50": 2731.0,
+            "51": 2873.0,
+            "52": 2946.0,
+            "53": 3158.0,
+            "54": 2907.0,
+            "55": 2740.0,
+            "56": 3029.0,
+            "57": 2489.0,
+            "58": 3327.0,
+            "59": 3042.0,
+            "60": 2780.0,
+            "61": 3302.0,
+            "62": 2961.0,
+            "63": 2702.0,
+            "64": 3318.0,
+            "65": 2909.0,
+            "66": 3513.0,
+            "67": 2959.0,
+            "68": 2963.0,
+            "69": 3171.0,
+            "70": 3547.0,
+            "71": 3246.0,
+            "72": 2586.0,
+            "73": 3301.0,
+            "74": 2135.0,
+            "75": 2752.0,
+            "76": 3275.0,
+            "77": 3648.0,
+            "78": 3472.0,
+            "79": 3536.0,
+            "80": 3685.0,
+            "81": 4159.0,
+            "82": 3488.0,
+            "83": 3179.0,
+            "84": 3639.0,
+            "85": 3631.0,
+            "86": 3045.0,
+            "87": 4315.0,
+            "88": 3481.0,
+            "89": 3819.0,
+            "90": 3323.0,
+            "91": 3014.0,
+            "92": 3581.0,
+            "93": 2932.0,
+            "94": 3715.0,
+            "95": 3593.0,
+            "96": 3764.0,
+            "97": 3582.0,
+            "98": 3998.0,
+            "99": 3406.0,
+            "100": 3521.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 269842944.0,
+            "2": 269842944.0,
+            "3": 269842944.0,
+            "4": 269842944.0,
+            "5": 269842944.0,
+            "6": 269842944.0,
+            "7": 269842944.0,
+            "8": 269842944.0,
+            "9": 269842944.0,
+            "10": 269842944.0,
+            "11": 269842944.0,
+            "12": 269842944.0,
+            "13": 269842944.0,
+            "14": 269842944.0,
+            "15": 269842944.0,
+            "16": 269842944.0,
+            "17": 269842944.0,
+            "18": 269842944.0,
+            "19": 269842944.0,
+            "20": 269842944.0,
+            "21": 269842944.0,
+            "22": 269842944.0,
+            "23": 269842944.0,
+            "24": 269842944.0,
+            "25": 269842944.0,
+            "26": 269842944.0,
+            "27": 269842944.0,
+            "28": 269842944.0,
+            "29": 269842944.0,
+            "30": 269842944.0,
+            "31": 269842944.0,
+            "32": 269842944.0,
+            "33": 269842944.0,
+            "34": 269842944.0,
+            "35": 269842944.0,
+            "36": 269842944.0,
+            "37": 269842944.0,
+            "38": 269842944.0,
+            "39": 269842944.0,
+            "40": 269842944.0,
+            "41": 269842944.0,
+            "42": 269842944.0,
+            "43": 269842944.0,
+            "44": 269842944.0,
+            "45": 269842944.0,
+            "46": 269842944.0,
+            "47": 269842944.0,
+            "48": 269842944.0,
+            "49": 269842944.0,
+            "50": 269842944.0,
+            "51": 269842944.0,
+            "52": 269842944.0,
+            "53": 269842944.0,
+            "54": 269842944.0,
+            "55": 269842944.0,
+            "56": 269842944.0,
+            "57": 269842944.0,
+            "58": 269842944.0,
+            "59": 269842944.0,
+            "60": 269842944.0,
+            "61": 269842944.0,
+            "62": 269842944.0,
+            "63": 269842944.0,
+            "64": 269842944.0,
+            "65": 269842944.0,
+            "66": 269842944.0,
+            "67": 269842944.0,
+            "68": 269842944.0,
+            "69": 269842944.0,
+            "70": 269842944.0,
+            "71": 269842944.0,
+            "72": 269842944.0,
+            "73": 269842944.0,
+            "74": 269842944.0,
+            "75": 269842944.0,
+            "76": 269842944.0,
+            "77": 269842944.0,
+            "78": 269842944.0,
+            "79": 269842944.0,
+            "80": 269842944.0,
+            "81": 269842944.0,
+            "82": 269842944.0,
+            "83": 269842944.0,
+            "84": 269842944.0,
+            "85": 269842944.0,
+            "86": 269842944.0,
+            "87": 269842944.0,
+            "88": 269842944.0,
+            "89": 269842944.0,
+            "90": 269842944.0,
+            "91": 269842944.0,
+            "92": 269842944.0,
+            "93": 269842944.0,
+            "94": 269842944.0,
+            "95": 269842944.0,
+            "96": 269842944.0,
+            "97": 269842944.0,
+            "98": 269842944.0,
+            "99": 269842944.0,
+            "100": 269842944.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 971620864.0,
+            "2": 1036172800.0,
+            "3": 1036172800.0,
+            "4": 1036172800.0,
+            "5": 1036172800.0,
+            "6": 1036172800.0,
+            "7": 1036172800.0,
+            "8": 1036172800.0,
+            "9": 1036172800.0,
+            "10": 1036172800.0,
+            "11": 1036172800.0,
+            "12": 1036172800.0,
+            "13": 1036172800.0,
+            "14": 1036172800.0,
+            "15": 1036172800.0,
+            "16": 1036172800.0,
+            "17": 1036172800.0,
+            "18": 1036172800.0,
+            "19": 1036172800.0,
+            "20": 1036172800.0,
+            "21": 1036172800.0,
+            "22": 1036172800.0,
+            "23": 1036172800.0,
+            "24": 1036172800.0,
+            "25": 1036172800.0,
+            "26": 1036172800.0,
+            "27": 1036172800.0,
+            "28": 1036172800.0,
+            "29": 1036172800.0,
+            "30": 1036172800.0,
+            "31": 1036172800.0,
+            "32": 1036172800.0,
+            "33": 1036172800.0,
+            "34": 1036172800.0,
+            "35": 1036172800.0,
+            "36": 1036172800.0,
+            "37": 1036172800.0,
+            "38": 1036172800.0,
+            "39": 1036172800.0,
+            "40": 1036172800.0,
+            "41": 1036172800.0,
+            "42": 1036172800.0,
+            "43": 1036172800.0,
+            "44": 1036172800.0,
+            "45": 1036172800.0,
+            "46": 1036172800.0,
+            "47": 1036172800.0,
+            "48": 1036172800.0,
+            "49": 1036172800.0,
+            "50": 1036172800.0,
+            "51": 1036172800.0,
+            "52": 1036172800.0,
+            "53": 1036172800.0,
+            "54": 1036172800.0,
+            "55": 1036172800.0,
+            "56": 1036172800.0,
+            "57": 1036172800.0,
+            "58": 1036172800.0,
+            "59": 1036172800.0,
+            "60": 1036172800.0,
+            "61": 1036172800.0,
+            "62": 1036172800.0,
+            "63": 1036172800.0,
+            "64": 1036172800.0,
+            "65": 1036172800.0,
+            "66": 1036172800.0,
+            "67": 1036172800.0,
+            "68": 1036172800.0,
+            "69": 1036172800.0,
+            "70": 1036172800.0,
+            "71": 1036172800.0,
+            "72": 1036172800.0,
+            "73": 1036172800.0,
+            "74": 1036172800.0,
+            "75": 1036172800.0,
+            "76": 1036172800.0,
+            "77": 1036172800.0,
+            "78": 1036172800.0,
+            "79": 1036172800.0,
+            "80": 1036172800.0,
+            "81": 1036172800.0,
+            "82": 1036172800.0,
+            "83": 1036172800.0,
+            "84": 1036172800.0,
+            "85": 1036172800.0,
+            "86": 1036172800.0,
+            "87": 1036172800.0,
+            "88": 1036172800.0,
+            "89": 1036172800.0,
+            "90": 1036172800.0,
+            "91": 1036172800.0,
+            "92": 1036172800.0,
+            "93": 1036172800.0,
+            "94": 1036172800.0,
+            "95": 1036172800.0,
+            "96": 1036172800.0,
+            "97": 1036172800.0,
+            "98": 1036172800.0,
+            "99": 1036172800.0,
+            "100": 1036172800.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 4.72292,
+            "2": 0.40079,
+            "3": 0.30233,
+            "4": 0.29437,
+            "5": 0.29362,
+            "6": 0.29166,
+            "7": 0.29282,
+            "8": 0.29142,
+            "9": 0.29651,
+            "10": 0.29169,
+            "11": 0.29163,
+            "12": 0.29014,
+            "13": 0.29007,
+            "14": 0.28821,
+            "15": 0.28623,
+            "16": 0.28688,
+            "17": 0.28583,
+            "18": 0.2859,
+            "19": 0.28593,
+            "20": 0.28532,
+            "21": 0.28573,
+            "22": 0.28605,
+            "23": 0.2847,
+            "24": 0.28621,
+            "25": 0.28664,
+            "26": 0.28556,
+            "27": 0.2872,
+            "28": 0.28573,
+            "29": 0.28695,
+            "30": 0.28636,
+            "31": 0.28686,
+            "32": 0.28621,
+            "33": 0.28593,
+            "34": 0.28506,
+            "35": 0.28606,
+            "36": 0.28626,
+            "37": 0.28499,
+            "38": 0.28674,
+            "39": 0.28971,
+            "40": 0.28955,
+            "41": 0.28907,
+            "42": 0.29041,
+            "43": 0.28912,
+            "44": 0.28905,
+            "45": 0.28882,
+            "46": 0.28956,
+            "47": 0.28907,
+            "48": 0.28954,
+            "49": 0.28925,
+            "50": 0.28916,
+            "51": 0.2978,
+            "52": 0.29215,
+            "53": 0.30386,
+            "54": 0.28798,
+            "55": 0.30697,
+            "56": 0.32648,
+            "57": 0.29076,
+            "58": 0.29115,
+            "59": 0.29247,
+            "60": 0.29092,
+            "61": 0.29317,
+            "62": 0.28506,
+            "63": 0.28662,
+            "64": 0.2855,
+            "65": 0.28592,
+            "66": 0.28679,
+            "67": 0.28596,
+            "68": 0.28605,
+            "69": 0.28703,
+            "70": 0.28617,
+            "71": 0.28619,
+            "72": 0.28686,
+            "73": 0.28763,
+            "74": 0.28583,
+            "75": 0.28621,
+            "76": 0.28619,
+            "77": 0.28487,
+            "78": 0.28523,
+            "79": 0.28634,
+            "80": 0.28584,
+            "81": 0.28564,
+            "82": 0.28528,
+            "83": 0.28536,
+            "84": 0.28516,
+            "85": 0.28552,
+            "86": 0.28537,
+            "87": 0.2846,
+            "88": 0.28517,
+            "89": 0.28625,
+            "90": 0.28499,
+            "91": 0.28525,
+            "92": 0.28549,
+            "93": 0.28511,
+            "94": 0.28501,
+            "95": 0.28642,
+            "96": 0.28514,
+            "97": 0.28481,
+            "98": 0.2852,
+            "99": 0.28627,
+            "100": 0.2861
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgx_a100.json
index 82a2437ad9d..0cc3719ac53 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgx_a100.json
@@ -1 +1,537 @@
-{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.86122, "5": 10.88242, "10": 10.83506, "15": 10.82738, "20": 10.72743, "25": 10.55753, "30": 10.37895, "35": 10.28321, "40": 10.08785, "45": 9.82625, "50": 9.91327, "55": 9.87788, "60": 9.50874, "65": 8.95103, "70": 9.73167, "75": 9.43681, "80": 9.41156, "85": 9.61613, "90": 9.8217, "95": 9.5191, "100": 9.40588}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1778.0, "5": 2219.0, "10": 1530.0, "15": 2125.0, "20": 1867.0, "25": 1773.0, "30": 2129.0, "35": 2169.0, "40": 2486.0, "45": 2335.0, "50": 2687.0, "55": 2652.0, "60": 2765.0, "65": 2946.0, "70": 3629.0, "75": 2702.0, "80": 3866.0, "85": 3517.0, "90": 3349.0, "95": 3530.0, "100": 3530.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 269891584.0, "5": 269891584.0, "10": 269891584.0, "15": 269891584.0, "20": 269891584.0, "25": 269891584.0, "30": 269891584.0, "35": 269891584.0, "40": 269891584.0, "45": 269891584.0, "50": 269891584.0, "55": 269891584.0, "60": 269891584.0, "65": 269891584.0, "70": 269891584.0, "75": 269891584.0, "80": 269891584.0, "85": 269891584.0, "90": 269891584.0, "95": 269891584.0, "100": 269891584.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1448634368.0, "5": 1515676672.0, "10": 1515676672.0, "15": 1515676672.0, "20": 1515676672.0, "25": 1515676672.0, "30": 1515676672.0, "35": 1515676672.0, "40": 1515676672.0, "45": 1515676672.0, "50": 1515676672.0, "55": 1515676672.0, "60": 1515676672.0, "65": 1515676672.0, "70": 1515676672.0, "75": 1515676672.0, "80": 1515676672.0, "85": 1515676672.0, "90": 1515676672.0, "95": 1515676672.0, "100": 1515676672.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.54144, "5": 0.31998, "10": 0.30758, "15": 0.30768, "20": 0.30227, "25": 0.30186, "30": 0.3093, "35": 0.3126, "40": 0.31097, "45": 0.30984, "50": 0.3102, "55": 0.30366, "60": 0.30466, "65": 0.3097, "70": 0.30131, "75": 0.30287, "80": 0.30479, "85": 0.30276, "90": 0.3042, "95": 0.30274, "100": 0.30335}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.85932,
+            "2": 10.85709,
+            "3": 10.85896,
+            "4": 10.84972,
+            "5": 10.88254,
+            "6": 10.88712,
+            "7": 10.86187,
+            "8": 10.86848,
+            "9": 10.86474,
+            "10": 10.83743,
+            "11": 10.87761,
+            "12": 10.87275,
+            "13": 10.87805,
+            "14": 10.88829,
+            "15": 10.8292,
+            "16": 10.83093,
+            "17": 10.79858,
+            "18": 10.82391,
+            "19": 10.8112,
+            "20": 10.72714,
+            "21": 10.71846,
+            "22": 10.57021,
+            "23": 10.72839,
+            "24": 10.6137,
+            "25": 10.55895,
+            "26": 10.60667,
+            "27": 10.61746,
+            "28": 10.57512,
+            "29": 10.59634,
+            "30": 10.37833,
+            "31": 10.13006,
+            "32": 10.47788,
+            "33": 10.46827,
+            "34": 10.22647,
+            "35": 10.28047,
+            "36": 10.22818,
+            "37": 10.35344,
+            "38": 10.203,
+            "39": 10.40996,
+            "40": 10.08874,
+            "41": 10.15951,
+            "42": 10.2151,
+            "43": 9.84978,
+            "44": 9.96524,
+            "45": 9.82532,
+            "46": 9.83508,
+            "47": 10.15501,
+            "48": 9.8447,
+            "49": 9.5289,
+            "50": 9.91268,
+            "51": 9.85065,
+            "52": 9.7464,
+            "53": 10.07271,
+            "54": 9.95757,
+            "55": 9.87725,
+            "56": 9.62951,
+            "57": 9.48816,
+            "58": 9.83239,
+            "59": 9.58985,
+            "60": 9.50827,
+            "61": 9.6947,
+            "62": 9.99304,
+            "63": 9.37511,
+            "64": 9.77996,
+            "65": 8.95215,
+            "66": 9.71323,
+            "67": 9.37884,
+            "68": 9.78794,
+            "69": 9.79078,
+            "70": 9.7308,
+            "71": 9.61793,
+            "72": 9.59094,
+            "73": 9.49435,
+            "74": 8.94865,
+            "75": 9.43606,
+            "76": 9.09894,
+            "77": 10.06437,
+            "78": 9.73006,
+            "79": 9.37771,
+            "80": 9.41266,
+            "81": 9.4854,
+            "82": 9.69576,
+            "83": 9.32017,
+            "84": 9.42235,
+            "85": 9.61578,
+            "86": 9.07218,
+            "87": 9.59328,
+            "88": 9.7509,
+            "89": 9.61159,
+            "90": 9.82148,
+            "91": 9.35304,
+            "92": 9.36254,
+            "93": 9.08747,
+            "94": 8.83398,
+            "95": 9.51923,
+            "96": 9.52595,
+            "97": 9.31413,
+            "98": 9.67414,
+            "99": 8.88869,
+            "100": 9.40651
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1754.0,
+            "2": 1848.0,
+            "3": 1845.0,
+            "4": 1871.0,
+            "5": 2173.0,
+            "6": 2054.0,
+            "7": 2021.0,
+            "8": 1737.0,
+            "9": 2044.0,
+            "10": 1524.0,
+            "11": 2198.0,
+            "12": 2030.0,
+            "13": 2211.0,
+            "14": 2094.0,
+            "15": 2036.0,
+            "16": 2165.0,
+            "17": 2044.0,
+            "18": 1842.0,
+            "19": 2002.0,
+            "20": 1774.0,
+            "21": 1976.0,
+            "22": 1869.0,
+            "23": 2232.0,
+            "24": 1769.0,
+            "25": 1714.0,
+            "26": 1866.0,
+            "27": 2106.0,
+            "28": 2287.0,
+            "29": 2221.0,
+            "30": 1967.0,
+            "31": 1788.0,
+            "32": 2187.0,
+            "33": 2432.0,
+            "34": 2059.0,
+            "35": 2308.0,
+            "36": 2173.0,
+            "37": 2619.0,
+            "38": 2572.0,
+            "39": 2618.0,
+            "40": 2548.0,
+            "41": 2531.0,
+            "42": 2447.0,
+            "43": 2283.0,
+            "44": 2358.0,
+            "45": 2398.0,
+            "46": 2572.0,
+            "47": 2818.0,
+            "48": 2599.0,
+            "49": 2579.0,
+            "50": 2731.0,
+            "51": 2873.0,
+            "52": 2946.0,
+            "53": 3158.0,
+            "54": 2907.0,
+            "55": 2740.0,
+            "56": 3029.0,
+            "57": 2489.0,
+            "58": 3327.0,
+            "59": 3042.0,
+            "60": 2780.0,
+            "61": 3302.0,
+            "62": 2961.0,
+            "63": 2702.0,
+            "64": 3318.0,
+            "65": 2909.0,
+            "66": 3513.0,
+            "67": 2959.0,
+            "68": 2963.0,
+            "69": 3171.0,
+            "70": 3547.0,
+            "71": 3246.0,
+            "72": 2586.0,
+            "73": 3301.0,
+            "74": 2135.0,
+            "75": 2752.0,
+            "76": 3275.0,
+            "77": 3648.0,
+            "78": 3472.0,
+            "79": 3536.0,
+            "80": 3685.0,
+            "81": 4159.0,
+            "82": 3488.0,
+            "83": 3179.0,
+            "84": 3639.0,
+            "85": 3631.0,
+            "86": 3045.0,
+            "87": 4315.0,
+            "88": 3481.0,
+            "89": 3819.0,
+            "90": 3323.0,
+            "91": 3014.0,
+            "92": 3581.0,
+            "93": 2932.0,
+            "94": 3715.0,
+            "95": 3593.0,
+            "96": 3764.0,
+            "97": 3582.0,
+            "98": 3998.0,
+            "99": 3406.0,
+            "100": 3521.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 269842944.0,
+            "2": 269842944.0,
+            "3": 269842944.0,
+            "4": 269842944.0,
+            "5": 269842944.0,
+            "6": 269842944.0,
+            "7": 269842944.0,
+            "8": 269842944.0,
+            "9": 269842944.0,
+            "10": 269842944.0,
+            "11": 269842944.0,
+            "12": 269842944.0,
+            "13": 269842944.0,
+            "14": 269842944.0,
+            "15": 269842944.0,
+            "16": 269842944.0,
+            "17": 269842944.0,
+            "18": 269842944.0,
+            "19": 269842944.0,
+            "20": 269842944.0,
+            "21": 269842944.0,
+            "22": 269842944.0,
+            "23": 269842944.0,
+            "24": 269842944.0,
+            "25": 269842944.0,
+            "26": 269842944.0,
+            "27": 269842944.0,
+            "28": 269842944.0,
+            "29": 269842944.0,
+            "30": 269842944.0,
+            "31": 269842944.0,
+            "32": 269842944.0,
+            "33": 269842944.0,
+            "34": 269842944.0,
+            "35": 269842944.0,
+            "36": 269842944.0,
+            "37": 269842944.0,
+            "38": 269842944.0,
+            "39": 269842944.0,
+            "40": 269842944.0,
+            "41": 269842944.0,
+            "42": 269842944.0,
+            "43": 269842944.0,
+            "44": 269842944.0,
+            "45": 269842944.0,
+            "46": 269842944.0,
+            "47": 269842944.0,
+            "48": 269842944.0,
+            "49": 269842944.0,
+            "50": 269842944.0,
+            "51": 269842944.0,
+            "52": 269842944.0,
+            "53": 269842944.0,
+            "54": 269842944.0,
+            "55": 269842944.0,
+            "56": 269842944.0,
+            "57": 269842944.0,
+            "58": 269842944.0,
+            "59": 269842944.0,
+            "60": 269842944.0,
+            "61": 269842944.0,
+            "62": 269842944.0,
+            "63": 269842944.0,
+            "64": 269842944.0,
+            "65": 269842944.0,
+            "66": 269842944.0,
+            "67": 269842944.0,
+            "68": 269842944.0,
+            "69": 269842944.0,
+            "70": 269842944.0,
+            "71": 269842944.0,
+            "72": 269842944.0,
+            "73": 269842944.0,
+            "74": 269842944.0,
+            "75": 269842944.0,
+            "76": 269842944.0,
+            "77": 269842944.0,
+            "78": 269842944.0,
+            "79": 269842944.0,
+            "80": 269842944.0,
+            "81": 269842944.0,
+            "82": 269842944.0,
+            "83": 269842944.0,
+            "84": 269842944.0,
+            "85": 269842944.0,
+            "86": 269842944.0,
+            "87": 269842944.0,
+            "88": 269842944.0,
+            "89": 269842944.0,
+            "90": 269842944.0,
+            "91": 269842944.0,
+            "92": 269842944.0,
+            "93": 269842944.0,
+            "94": 269842944.0,
+            "95": 269842944.0,
+            "96": 269842944.0,
+            "97": 269842944.0,
+            "98": 269842944.0,
+            "99": 269842944.0,
+            "100": 269842944.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 971619840.0,
+            "2": 1036172800.0,
+            "3": 1036172800.0,
+            "4": 1036172800.0,
+            "5": 1036172800.0,
+            "6": 1036172800.0,
+            "7": 1036172800.0,
+            "8": 1036172800.0,
+            "9": 1036172800.0,
+            "10": 1036172800.0,
+            "11": 1036172800.0,
+            "12": 1036172800.0,
+            "13": 1036172800.0,
+            "14": 1036172800.0,
+            "15": 1036172800.0,
+            "16": 1036172800.0,
+            "17": 1036172800.0,
+            "18": 1036172800.0,
+            "19": 1036172800.0,
+            "20": 1036172800.0,
+            "21": 1036172800.0,
+            "22": 1036172800.0,
+            "23": 1036172800.0,
+            "24": 1036172800.0,
+            "25": 1036172800.0,
+            "26": 1036172800.0,
+            "27": 1036172800.0,
+            "28": 1036172800.0,
+            "29": 1036172800.0,
+            "30": 1036172800.0,
+            "31": 1036172800.0,
+            "32": 1036172800.0,
+            "33": 1036172800.0,
+            "34": 1036172800.0,
+            "35": 1036172800.0,
+            "36": 1036172800.0,
+            "37": 1036172800.0,
+            "38": 1036172800.0,
+            "39": 1036172800.0,
+            "40": 1036172800.0,
+            "41": 1036172800.0,
+            "42": 1036172800.0,
+            "43": 1036172800.0,
+            "44": 1036172800.0,
+            "45": 1036172800.0,
+            "46": 1036172800.0,
+            "47": 1036172800.0,
+            "48": 1036172800.0,
+            "49": 1036172800.0,
+            "50": 1036172800.0,
+            "51": 1036172800.0,
+            "52": 1036172800.0,
+            "53": 1036172800.0,
+            "54": 1036172800.0,
+            "55": 1036172800.0,
+            "56": 1036172800.0,
+            "57": 1036172800.0,
+            "58": 1036172800.0,
+            "59": 1036172800.0,
+            "60": 1036172800.0,
+            "61": 1036172800.0,
+            "62": 1036172800.0,
+            "63": 1036172800.0,
+            "64": 1036172800.0,
+            "65": 1036172800.0,
+            "66": 1036172800.0,
+            "67": 1036172800.0,
+            "68": 1036172800.0,
+            "69": 1036172800.0,
+            "70": 1036172800.0,
+            "71": 1036172800.0,
+            "72": 1036172800.0,
+            "73": 1036172800.0,
+            "74": 1036172800.0,
+            "75": 1036172800.0,
+            "76": 1036172800.0,
+            "77": 1036172800.0,
+            "78": 1036172800.0,
+            "79": 1036172800.0,
+            "80": 1036172800.0,
+            "81": 1036172800.0,
+            "82": 1036172800.0,
+            "83": 1036172800.0,
+            "84": 1036172800.0,
+            "85": 1036172800.0,
+            "86": 1036172800.0,
+            "87": 1036172800.0,
+            "88": 1036172800.0,
+            "89": 1036172800.0,
+            "90": 1036172800.0,
+            "91": 1036172800.0,
+            "92": 1036172800.0,
+            "93": 1036172800.0,
+            "94": 1036172800.0,
+            "95": 1036172800.0,
+            "96": 1036172800.0,
+            "97": 1036172800.0,
+            "98": 1036172800.0,
+            "99": 1036172800.0,
+            "100": 1036172800.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 7.22987,
+            "2": 0.54363,
+            "3": 0.2879,
+            "4": 0.28745,
+            "5": 0.28509,
+            "6": 0.28364,
+            "7": 0.28401,
+            "8": 0.28235,
+            "9": 0.28321,
+            "10": 0.32258,
+            "11": 0.28697,
+            "12": 0.27808,
+            "13": 0.27857,
+            "14": 0.27833,
+            "15": 0.28035,
+            "16": 0.27859,
+            "17": 0.27841,
+            "18": 0.27879,
+            "19": 0.27874,
+            "20": 0.27806,
+            "21": 0.27812,
+            "22": 0.2783,
+            "23": 0.27919,
+            "24": 0.27841,
+            "25": 0.27852,
+            "26": 0.27871,
+            "27": 0.27891,
+            "28": 0.28056,
+            "29": 0.27909,
+            "30": 0.2797,
+            "31": 0.27903,
+            "32": 0.27895,
+            "33": 0.27929,
+            "34": 0.27838,
+            "35": 0.27904,
+            "36": 0.2787,
+            "37": 0.28662,
+            "38": 0.27812,
+            "39": 0.27805,
+            "40": 0.27846,
+            "41": 0.27884,
+            "42": 0.27807,
+            "43": 0.27794,
+            "44": 0.27825,
+            "45": 0.28052,
+            "46": 0.27856,
+            "47": 0.27832,
+            "48": 0.27799,
+            "49": 0.2783,
+            "50": 0.27861,
+            "51": 0.2915,
+            "52": 0.28668,
+            "53": 0.28545,
+            "54": 0.28632,
+            "55": 0.28616,
+            "56": 0.28735,
+            "57": 0.28738,
+            "58": 0.28556,
+            "59": 0.28453,
+            "60": 0.28543,
+            "61": 0.28452,
+            "62": 0.28404,
+            "63": 0.28542,
+            "64": 0.28492,
+            "65": 0.28488,
+            "66": 0.2861,
+            "67": 0.286,
+            "68": 0.28505,
+            "69": 0.28531,
+            "70": 0.28377,
+            "71": 0.28517,
+            "72": 0.28454,
+            "73": 0.2853,
+            "74": 0.28678,
+            "75": 0.28484,
+            "76": 0.28523,
+            "77": 0.28548,
+            "78": 0.28488,
+            "79": 0.28559,
+            "80": 0.28528,
+            "81": 0.28479,
+            "82": 0.28465,
+            "83": 0.28506,
+            "84": 0.28493,
+            "85": 0.28486,
+            "86": 0.28572,
+            "87": 0.28404,
+            "88": 0.28473,
+            "89": 0.28431,
+            "90": 0.28945,
+            "91": 0.28446,
+            "92": 0.28489,
+            "93": 0.28474,
+            "94": 0.28484,
+            "95": 0.28526,
+            "96": 0.28573,
+            "97": 0.28411,
+            "98": 0.28402,
+            "99": 0.28413,
+            "100": 0.28454
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts.json
new file mode 100644
index 00000000000..a39e093badf
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.85932,
+            "2": 10.85709,
+            "3": 10.85896,
+            "4": 10.84972,
+            "5": 10.88254,
+            "6": 10.88712,
+            "7": 10.86187,
+            "8": 10.86848,
+            "9": 10.86474,
+            "10": 10.83743,
+            "11": 10.87761,
+            "12": 10.87275,
+            "13": 10.87805,
+            "14": 10.88829,
+            "15": 10.8292,
+            "16": 10.83093,
+            "17": 10.79858,
+            "18": 10.82391,
+            "19": 10.8112,
+            "20": 10.72714,
+            "21": 10.71846,
+            "22": 10.57021,
+            "23": 10.72839,
+            "24": 10.6137,
+            "25": 10.55895,
+            "26": 10.60667,
+            "27": 10.61746,
+            "28": 10.57512,
+            "29": 10.59634,
+            "30": 10.37833,
+            "31": 10.13006,
+            "32": 10.47788,
+            "33": 10.46827,
+            "34": 10.22647,
+            "35": 10.28047,
+            "36": 10.22818,
+            "37": 10.35344,
+            "38": 10.203,
+            "39": 10.40996,
+            "40": 10.08874,
+            "41": 10.15951,
+            "42": 10.2151,
+            "43": 9.84978,
+            "44": 9.96524,
+            "45": 9.82532,
+            "46": 9.83508,
+            "47": 10.15501,
+            "48": 9.8447,
+            "49": 9.5289,
+            "50": 9.91268,
+            "51": 9.85065,
+            "52": 9.7464,
+            "53": 10.07271,
+            "54": 9.95757,
+            "55": 9.87725,
+            "56": 9.62951,
+            "57": 9.48816,
+            "58": 9.83239,
+            "59": 9.58985,
+            "60": 9.50827,
+            "61": 9.6947,
+            "62": 9.99304,
+            "63": 9.37511,
+            "64": 9.77996,
+            "65": 8.95215,
+            "66": 9.71323,
+            "67": 9.37884,
+            "68": 9.78794,
+            "69": 9.79078,
+            "70": 9.7308,
+            "71": 9.61793,
+            "72": 9.59094,
+            "73": 9.49435,
+            "74": 8.94865,
+            "75": 9.43606,
+            "76": 9.09894,
+            "77": 10.06437,
+            "78": 9.73006,
+            "79": 9.37771,
+            "80": 9.41266,
+            "81": 9.4854,
+            "82": 9.69576,
+            "83": 9.32017,
+            "84": 9.42235,
+            "85": 9.61578,
+            "86": 9.07218,
+            "87": 9.59328,
+            "88": 9.7509,
+            "89": 9.61159,
+            "90": 9.82148,
+            "91": 9.35304,
+            "92": 9.36254,
+            "93": 9.08747,
+            "94": 8.83398,
+            "95": 9.51923,
+            "96": 9.52595,
+            "97": 9.31413,
+            "98": 9.67414,
+            "99": 8.88869,
+            "100": 9.40651
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1754.0,
+            "2": 1848.0,
+            "3": 1845.0,
+            "4": 1871.0,
+            "5": 2173.0,
+            "6": 2054.0,
+            "7": 2021.0,
+            "8": 1737.0,
+            "9": 2044.0,
+            "10": 1524.0,
+            "11": 2198.0,
+            "12": 2030.0,
+            "13": 2211.0,
+            "14": 2094.0,
+            "15": 2036.0,
+            "16": 2165.0,
+            "17": 2044.0,
+            "18": 1842.0,
+            "19": 2002.0,
+            "20": 1774.0,
+            "21": 1976.0,
+            "22": 1869.0,
+            "23": 2232.0,
+            "24": 1769.0,
+            "25": 1714.0,
+            "26": 1866.0,
+            "27": 2106.0,
+            "28": 2287.0,
+            "29": 2221.0,
+            "30": 1967.0,
+            "31": 1788.0,
+            "32": 2187.0,
+            "33": 2432.0,
+            "34": 2059.0,
+            "35": 2308.0,
+            "36": 2173.0,
+            "37": 2619.0,
+            "38": 2572.0,
+            "39": 2618.0,
+            "40": 2548.0,
+            "41": 2531.0,
+            "42": 2447.0,
+            "43": 2283.0,
+            "44": 2358.0,
+            "45": 2398.0,
+            "46": 2572.0,
+            "47": 2818.0,
+            "48": 2599.0,
+            "49": 2579.0,
+            "50": 2731.0,
+            "51": 2873.0,
+            "52": 2946.0,
+            "53": 3158.0,
+            "54": 2907.0,
+            "55": 2740.0,
+            "56": 3029.0,
+            "57": 2489.0,
+            "58": 3327.0,
+            "59": 3042.0,
+            "60": 2780.0,
+            "61": 3302.0,
+            "62": 2961.0,
+            "63": 2702.0,
+            "64": 3318.0,
+            "65": 2909.0,
+            "66": 3513.0,
+            "67": 2959.0,
+            "68": 2963.0,
+            "69": 3171.0,
+            "70": 3547.0,
+            "71": 3246.0,
+            "72": 2586.0,
+            "73": 3301.0,
+            "74": 2135.0,
+            "75": 2752.0,
+            "76": 3275.0,
+            "77": 3648.0,
+            "78": 3472.0,
+            "79": 3536.0,
+            "80": 3685.0,
+            "81": 4159.0,
+            "82": 3488.0,
+            "83": 3179.0,
+            "84": 3639.0,
+            "85": 3631.0,
+            "86": 3045.0,
+            "87": 4315.0,
+            "88": 3481.0,
+            "89": 3819.0,
+            "90": 3323.0,
+            "91": 3014.0,
+            "92": 3581.0,
+            "93": 2932.0,
+            "94": 3715.0,
+            "95": 3593.0,
+            "96": 3764.0,
+            "97": 3582.0,
+            "98": 3998.0,
+            "99": 3406.0,
+            "100": 3521.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 269842944.0,
+            "2": 269842944.0,
+            "3": 269842944.0,
+            "4": 269842944.0,
+            "5": 269842944.0,
+            "6": 269842944.0,
+            "7": 269842944.0,
+            "8": 269842944.0,
+            "9": 269842944.0,
+            "10": 269842944.0,
+            "11": 269842944.0,
+            "12": 269842944.0,
+            "13": 269842944.0,
+            "14": 269842944.0,
+            "15": 269842944.0,
+            "16": 269842944.0,
+            "17": 269842944.0,
+            "18": 269842944.0,
+            "19": 269842944.0,
+            "20": 269842944.0,
+            "21": 269842944.0,
+            "22": 269842944.0,
+            "23": 269842944.0,
+            "24": 269842944.0,
+            "25": 269842944.0,
+            "26": 269842944.0,
+            "27": 269842944.0,
+            "28": 269842944.0,
+            "29": 269842944.0,
+            "30": 269842944.0,
+            "31": 269842944.0,
+            "32": 269842944.0,
+            "33": 269842944.0,
+            "34": 269842944.0,
+            "35": 269842944.0,
+            "36": 269842944.0,
+            "37": 269842944.0,
+            "38": 269842944.0,
+            "39": 269842944.0,
+            "40": 269842944.0,
+            "41": 269842944.0,
+            "42": 269842944.0,
+            "43": 269842944.0,
+            "44": 269842944.0,
+            "45": 269842944.0,
+            "46": 269842944.0,
+            "47": 269842944.0,
+            "48": 269842944.0,
+            "49": 269842944.0,
+            "50": 269842944.0,
+            "51": 269842944.0,
+            "52": 269842944.0,
+            "53": 269842944.0,
+            "54": 269842944.0,
+            "55": 269842944.0,
+            "56": 269842944.0,
+            "57": 269842944.0,
+            "58": 269842944.0,
+            "59": 269842944.0,
+            "60": 269842944.0,
+            "61": 269842944.0,
+            "62": 269842944.0,
+            "63": 269842944.0,
+            "64": 269842944.0,
+            "65": 269842944.0,
+            "66": 269842944.0,
+            "67": 269842944.0,
+            "68": 269842944.0,
+            "69": 269842944.0,
+            "70": 269842944.0,
+            "71": 269842944.0,
+            "72": 269842944.0,
+            "73": 269842944.0,
+            "74": 269842944.0,
+            "75": 269842944.0,
+            "76": 269842944.0,
+            "77": 269842944.0,
+            "78": 269842944.0,
+            "79": 269842944.0,
+            "80": 269842944.0,
+            "81": 269842944.0,
+            "82": 269842944.0,
+            "83": 269842944.0,
+            "84": 269842944.0,
+            "85": 269842944.0,
+            "86": 269842944.0,
+            "87": 269842944.0,
+            "88": 269842944.0,
+            "89": 269842944.0,
+            "90": 269842944.0,
+            "91": 269842944.0,
+            "92": 269842944.0,
+            "93": 269842944.0,
+            "94": 269842944.0,
+            "95": 269842944.0,
+            "96": 269842944.0,
+            "97": 269842944.0,
+            "98": 269842944.0,
+            "99": 269842944.0,
+            "100": 269842944.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 971619840.0,
+            "2": 1036172288.0,
+            "3": 1036172800.0,
+            "4": 1036172800.0,
+            "5": 1036172800.0,
+            "6": 1036172800.0,
+            "7": 1036172800.0,
+            "8": 1036172800.0,
+            "9": 1036172800.0,
+            "10": 1036172800.0,
+            "11": 1036172800.0,
+            "12": 1036172800.0,
+            "13": 1036172800.0,
+            "14": 1036172800.0,
+            "15": 1036172800.0,
+            "16": 1036172800.0,
+            "17": 1036172800.0,
+            "18": 1036172800.0,
+            "19": 1036172800.0,
+            "20": 1036172800.0,
+            "21": 1036172800.0,
+            "22": 1036172800.0,
+            "23": 1036172800.0,
+            "24": 1036172800.0,
+            "25": 1036172800.0,
+            "26": 1036172800.0,
+            "27": 1036172800.0,
+            "28": 1036172800.0,
+            "29": 1036172800.0,
+            "30": 1036172800.0,
+            "31": 1036172800.0,
+            "32": 1036172800.0,
+            "33": 1036172800.0,
+            "34": 1036172800.0,
+            "35": 1036172800.0,
+            "36": 1036172800.0,
+            "37": 1036172800.0,
+            "38": 1036172800.0,
+            "39": 1036172800.0,
+            "40": 1036172800.0,
+            "41": 1036172800.0,
+            "42": 1036172800.0,
+            "43": 1036172800.0,
+            "44": 1036172800.0,
+            "45": 1036172800.0,
+            "46": 1036172800.0,
+            "47": 1036172800.0,
+            "48": 1036172800.0,
+            "49": 1036172800.0,
+            "50": 1036172800.0,
+            "51": 1036172800.0,
+            "52": 1036172800.0,
+            "53": 1036172800.0,
+            "54": 1036172800.0,
+            "55": 1036172800.0,
+            "56": 1036172800.0,
+            "57": 1036172800.0,
+            "58": 1036172800.0,
+            "59": 1036172800.0,
+            "60": 1036172800.0,
+            "61": 1036172800.0,
+            "62": 1036172800.0,
+            "63": 1036172800.0,
+            "64": 1036172800.0,
+            "65": 1036172800.0,
+            "66": 1036172800.0,
+            "67": 1036172800.0,
+            "68": 1036172800.0,
+            "69": 1036172800.0,
+            "70": 1036172800.0,
+            "71": 1036172800.0,
+            "72": 1036172800.0,
+            "73": 1036172800.0,
+            "74": 1036172800.0,
+            "75": 1036172800.0,
+            "76": 1036172800.0,
+            "77": 1036172800.0,
+            "78": 1036172800.0,
+            "79": 1036172800.0,
+            "80": 1036172800.0,
+            "81": 1036172800.0,
+            "82": 1036172800.0,
+            "83": 1036172800.0,
+            "84": 1036172800.0,
+            "85": 1036172800.0,
+            "86": 1036172800.0,
+            "87": 1036172800.0,
+            "88": 1036172800.0,
+            "89": 1036172800.0,
+            "90": 1036172800.0,
+            "91": 1036172800.0,
+            "92": 1036172800.0,
+            "93": 1036172800.0,
+            "94": 1036172800.0,
+            "95": 1036172800.0,
+            "96": 1036172800.0,
+            "97": 1036172800.0,
+            "98": 1036172800.0,
+            "99": 1036172800.0,
+            "100": 1036172800.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 4.83564,
+            "2": 0.46115,
+            "3": 0.29577,
+            "4": 0.29295,
+            "5": 0.2937,
+            "6": 0.29159,
+            "7": 0.29645,
+            "8": 0.3016,
+            "9": 0.29961,
+            "10": 0.29819,
+            "11": 0.29865,
+            "12": 0.2945,
+            "13": 0.29585,
+            "14": 0.29535,
+            "15": 0.29488,
+            "16": 0.29419,
+            "17": 0.30001,
+            "18": 0.29304,
+            "19": 0.29268,
+            "20": 0.29243,
+            "21": 0.29254,
+            "22": 0.29287,
+            "23": 0.2907,
+            "24": 0.29139,
+            "25": 0.29157,
+            "26": 0.29169,
+            "27": 0.29335,
+            "28": 0.29054,
+            "29": 0.29193,
+            "30": 0.29255,
+            "31": 0.29123,
+            "32": 0.29014,
+            "33": 0.29168,
+            "34": 0.29286,
+            "35": 0.29078,
+            "36": 0.29177,
+            "37": 0.29116,
+            "38": 0.29148,
+            "39": 0.29215,
+            "40": 0.2925,
+            "41": 0.29112,
+            "42": 0.29245,
+            "43": 0.28949,
+            "44": 0.28901,
+            "45": 0.28833,
+            "46": 0.28915,
+            "47": 0.28932,
+            "48": 0.28813,
+            "49": 0.28806,
+            "50": 0.28818,
+            "51": 0.30469,
+            "52": 0.29982,
+            "53": 0.29764,
+            "54": 0.29724,
+            "55": 0.29321,
+            "56": 0.29333,
+            "57": 0.29533,
+            "58": 0.29391,
+            "59": 0.29338,
+            "60": 0.29347,
+            "61": 0.29437,
+            "62": 0.29327,
+            "63": 0.29354,
+            "64": 0.31415,
+            "65": 0.2977,
+            "66": 0.2961,
+            "67": 0.29576,
+            "68": 0.2944,
+            "69": 0.29403,
+            "70": 0.29434,
+            "71": 0.29505,
+            "72": 0.2967,
+            "73": 0.30974,
+            "74": 0.2985,
+            "75": 0.29801,
+            "76": 0.29505,
+            "77": 0.29554,
+            "78": 0.29652,
+            "79": 0.29594,
+            "80": 0.2958,
+            "81": 0.29561,
+            "82": 0.29502,
+            "83": 0.29476,
+            "84": 0.29486,
+            "85": 0.29516,
+            "86": 0.29553,
+            "87": 0.29666,
+            "88": 0.29626,
+            "89": 0.29583,
+            "90": 0.29691,
+            "91": 0.29506,
+            "92": 0.29865,
+            "93": 0.29628,
+            "94": 0.30563,
+            "95": 0.29582,
+            "96": 0.29572,
+            "97": 0.29519,
+            "98": 0.29708,
+            "99": 0.29725,
+            "100": 0.29684
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json
index 54bb3cbea8d..1641ae309dc 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json
@@ -4,106 +4,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 10.86122,
-            "2": 10.85774,
-            "3": 10.86039,
-            "4": 10.84813,
-            "5": 10.88242,
-            "6": 10.88645,
-            "7": 10.86227,
-            "8": 10.86932,
-            "9": 10.86444,
-            "10": 10.83506,
-            "11": 10.87765,
-            "12": 10.87384,
-            "13": 10.87945,
-            "14": 10.88919,
-            "15": 10.82738,
-            "16": 10.83105,
-            "17": 10.79888,
-            "18": 10.82441,
-            "19": 10.81363,
-            "20": 10.72743,
-            "21": 10.71638,
-            "22": 10.57153,
-            "23": 10.7269,
-            "24": 10.61223,
-            "25": 10.55753,
-            "26": 10.60603,
-            "27": 10.61792,
-            "28": 10.57695,
-            "29": 10.59633,
-            "30": 10.37895,
-            "31": 10.13125,
-            "32": 10.47822,
-            "33": 10.46894,
-            "34": 10.22715,
-            "35": 10.28321,
-            "36": 10.22751,
-            "37": 10.35397,
-            "38": 10.20483,
-            "39": 10.40755,
-            "40": 10.08785,
-            "41": 10.1591,
-            "42": 10.21601,
-            "43": 9.84821,
-            "44": 9.9651,
-            "45": 9.82625,
-            "46": 9.83468,
-            "47": 10.15337,
-            "48": 9.84529,
-            "49": 9.52926,
-            "50": 9.91327,
-            "51": 9.8517,
-            "52": 9.74686,
-            "53": 10.07204,
-            "54": 9.95738,
-            "55": 9.87788,
-            "56": 9.62943,
-            "57": 9.48988,
-            "58": 9.83265,
-            "59": 9.58831,
-            "60": 9.50874,
-            "61": 9.69495,
-            "62": 9.99373,
-            "63": 9.377,
-            "64": 9.78004,
-            "65": 8.95103,
-            "66": 9.71392,
+            "1": 10.85932,
+            "2": 10.85709,
+            "3": 10.85896,
+            "4": 10.84972,
+            "5": 10.88254,
+            "6": 10.88712,
+            "7": 10.86187,
+            "8": 10.86848,
+            "9": 10.86474,
+            "10": 10.83743,
+            "11": 10.87761,
+            "12": 10.87275,
+            "13": 10.87805,
+            "14": 10.88829,
+            "15": 10.8292,
+            "16": 10.83093,
+            "17": 10.79858,
+            "18": 10.82391,
+            "19": 10.8112,
+            "20": 10.72714,
+            "21": 10.71846,
+            "22": 10.57021,
+            "23": 10.72839,
+            "24": 10.6137,
+            "25": 10.55895,
+            "26": 10.60667,
+            "27": 10.61746,
+            "28": 10.57512,
+            "29": 10.59634,
+            "30": 10.37833,
+            "31": 10.13006,
+            "32": 10.47788,
+            "33": 10.46827,
+            "34": 10.22647,
+            "35": 10.28047,
+            "36": 10.22818,
+            "37": 10.35344,
+            "38": 10.203,
+            "39": 10.40996,
+            "40": 10.08874,
+            "41": 10.15951,
+            "42": 10.2151,
+            "43": 9.84978,
+            "44": 9.96524,
+            "45": 9.82532,
+            "46": 9.83508,
+            "47": 10.15501,
+            "48": 9.8447,
+            "49": 9.5289,
+            "50": 9.91268,
+            "51": 9.85065,
+            "52": 9.7464,
+            "53": 10.07271,
+            "54": 9.95757,
+            "55": 9.87725,
+            "56": 9.62951,
+            "57": 9.48816,
+            "58": 9.83239,
+            "59": 9.58985,
+            "60": 9.50827,
+            "61": 9.6947,
+            "62": 9.99304,
+            "63": 9.37511,
+            "64": 9.77996,
+            "65": 8.95215,
+            "66": 9.71323,
             "67": 9.37884,
-            "68": 9.78831,
-            "69": 9.79096,
-            "70": 9.73167,
-            "71": 9.61776,
-            "72": 9.59099,
-            "73": 9.49436,
-            "74": 8.95001,
-            "75": 9.43681,
-            "76": 9.09852,
-            "77": 10.06447,
-            "78": 9.72944,
-            "79": 9.37805,
-            "80": 9.41156,
-            "81": 9.48537,
-            "82": 9.69592,
-            "83": 9.31981,
-            "84": 9.42306,
-            "85": 9.61613,
-            "86": 9.07185,
-            "87": 9.59282,
-            "88": 9.75055,
-            "89": 9.61194,
-            "90": 9.8217,
-            "91": 9.35308,
-            "92": 9.36305,
-            "93": 9.08788,
-            "94": 8.83439,
-            "95": 9.5191,
-            "96": 9.52647,
-            "97": 9.31412,
-            "98": 9.67541,
-            "99": 8.88941,
-            "100": 9.40588
+            "68": 9.78794,
+            "69": 9.79078,
+            "70": 9.7308,
+            "71": 9.61793,
+            "72": 9.59094,
+            "73": 9.49435,
+            "74": 8.94865,
+            "75": 9.43606,
+            "76": 9.09894,
+            "77": 10.06437,
+            "78": 9.73006,
+            "79": 9.37771,
+            "80": 9.41266,
+            "81": 9.4854,
+            "82": 9.69576,
+            "83": 9.32017,
+            "84": 9.42235,
+            "85": 9.61578,
+            "86": 9.07218,
+            "87": 9.59328,
+            "88": 9.7509,
+            "89": 9.61159,
+            "90": 9.82148,
+            "91": 9.35304,
+            "92": 9.36254,
+            "93": 9.08747,
+            "94": 8.83398,
+            "95": 9.51923,
+            "96": 9.52595,
+            "97": 9.31413,
+            "98": 9.67414,
+            "99": 8.88869,
+            "100": 9.40651
         }
     },
     "num-zeros": {
@@ -111,106 +111,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 1778.0,
-            "2": 1875.0,
-            "3": 1879.0,
-            "4": 1912.0,
-            "5": 2219.0,
-            "6": 2163.0,
-            "7": 2113.0,
-            "8": 1747.0,
-            "9": 2049.0,
-            "10": 1530.0,
-            "11": 2113.0,
-            "12": 1959.0,
-            "13": 2134.0,
-            "14": 2055.0,
-            "15": 2125.0,
-            "16": 2139.0,
-            "17": 1988.0,
-            "18": 1892.0,
-            "19": 1991.0,
-            "20": 1867.0,
-            "21": 2023.0,
-            "22": 1865.0,
-            "23": 2185.0,
-            "24": 1774.0,
-            "25": 1773.0,
-            "26": 1990.0,
-            "27": 2061.0,
-            "28": 2215.0,
-            "29": 2186.0,
-            "30": 2129.0,
-            "31": 1794.0,
-            "32": 2109.0,
-            "33": 2422.0,
-            "34": 2135.0,
-            "35": 2169.0,
-            "36": 2127.0,
-            "37": 2432.0,
-            "38": 2490.0,
-            "39": 2495.0,
-            "40": 2486.0,
-            "41": 2465.0,
-            "42": 2535.0,
-            "43": 2216.0,
-            "44": 2407.0,
-            "45": 2335.0,
-            "46": 2617.0,
-            "47": 2830.0,
-            "48": 2480.0,
-            "49": 2492.0,
-            "50": 2687.0,
-            "51": 2863.0,
-            "52": 2881.0,
-            "53": 3220.0,
-            "54": 2894.0,
-            "55": 2652.0,
-            "56": 3006.0,
-            "57": 2561.0,
-            "58": 3273.0,
-            "59": 3039.0,
-            "60": 2765.0,
-            "61": 3310.0,
-            "62": 2936.0,
-            "63": 2630.0,
-            "64": 3230.0,
-            "65": 2946.0,
-            "66": 3500.0,
-            "67": 2976.0,
-            "68": 2944.0,
-            "69": 3117.0,
-            "70": 3629.0,
-            "71": 3255.0,
-            "72": 2633.0,
-            "73": 3338.0,
-            "74": 2172.0,
-            "75": 2702.0,
-            "76": 3162.0,
-            "77": 3850.0,
-            "78": 3590.0,
-            "79": 3658.0,
-            "80": 3866.0,
-            "81": 3976.0,
-            "82": 3680.0,
-            "83": 3153.0,
-            "84": 3586.0,
-            "85": 3517.0,
-            "86": 3137.0,
-            "87": 4177.0,
-            "88": 3589.0,
-            "89": 3849.0,
-            "90": 3349.0,
-            "91": 2936.0,
-            "92": 3526.0,
-            "93": 2965.0,
-            "94": 3772.0,
-            "95": 3530.0,
-            "96": 3774.0,
-            "97": 3636.0,
-            "98": 4064.0,
-            "99": 3394.0,
-            "100": 3530.0
+            "1": 1754.0,
+            "2": 1848.0,
+            "3": 1845.0,
+            "4": 1871.0,
+            "5": 2173.0,
+            "6": 2054.0,
+            "7": 2021.0,
+            "8": 1737.0,
+            "9": 2044.0,
+            "10": 1524.0,
+            "11": 2198.0,
+            "12": 2030.0,
+            "13": 2211.0,
+            "14": 2094.0,
+            "15": 2036.0,
+            "16": 2165.0,
+            "17": 2044.0,
+            "18": 1842.0,
+            "19": 2002.0,
+            "20": 1774.0,
+            "21": 1976.0,
+            "22": 1869.0,
+            "23": 2232.0,
+            "24": 1769.0,
+            "25": 1714.0,
+            "26": 1866.0,
+            "27": 2106.0,
+            "28": 2287.0,
+            "29": 2221.0,
+            "30": 1967.0,
+            "31": 1788.0,
+            "32": 2187.0,
+            "33": 2432.0,
+            "34": 2059.0,
+            "35": 2308.0,
+            "36": 2173.0,
+            "37": 2619.0,
+            "38": 2572.0,
+            "39": 2618.0,
+            "40": 2548.0,
+            "41": 2531.0,
+            "42": 2447.0,
+            "43": 2283.0,
+            "44": 2358.0,
+            "45": 2398.0,
+            "46": 2572.0,
+            "47": 2818.0,
+            "48": 2599.0,
+            "49": 2579.0,
+            "50": 2731.0,
+            "51": 2873.0,
+            "52": 2946.0,
+            "53": 3158.0,
+            "54": 2907.0,
+            "55": 2740.0,
+            "56": 3029.0,
+            "57": 2489.0,
+            "58": 3327.0,
+            "59": 3042.0,
+            "60": 2780.0,
+            "61": 3302.0,
+            "62": 2961.0,
+            "63": 2702.0,
+            "64": 3318.0,
+            "65": 2909.0,
+            "66": 3513.0,
+            "67": 2959.0,
+            "68": 2963.0,
+            "69": 3171.0,
+            "70": 3547.0,
+            "71": 3246.0,
+            "72": 2586.0,
+            "73": 3301.0,
+            "74": 2135.0,
+            "75": 2752.0,
+            "76": 3275.0,
+            "77": 3648.0,
+            "78": 3472.0,
+            "79": 3536.0,
+            "80": 3685.0,
+            "81": 4159.0,
+            "82": 3488.0,
+            "83": 3179.0,
+            "84": 3639.0,
+            "85": 3631.0,
+            "86": 3045.0,
+            "87": 4315.0,
+            "88": 3481.0,
+            "89": 3819.0,
+            "90": 3323.0,
+            "91": 3014.0,
+            "92": 3581.0,
+            "93": 2932.0,
+            "94": 3715.0,
+            "95": 3593.0,
+            "96": 3764.0,
+            "97": 3582.0,
+            "98": 3998.0,
+            "99": 3406.0,
+            "100": 3521.0
         }
     },
     "mem-allocated-bytes": {
@@ -218,106 +218,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 269891584.0,
-            "2": 269891584.0,
-            "3": 269891584.0,
-            "4": 269891584.0,
-            "5": 269891584.0,
-            "6": 269891584.0,
-            "7": 269891584.0,
-            "8": 269891584.0,
-            "9": 269891584.0,
-            "10": 269891584.0,
-            "11": 269891584.0,
-            "12": 269891584.0,
-            "13": 269891584.0,
-            "14": 269891584.0,
-            "15": 269891584.0,
-            "16": 269891584.0,
-            "17": 269891584.0,
-            "18": 269891584.0,
-            "19": 269891584.0,
-            "20": 269891584.0,
-            "21": 269891584.0,
-            "22": 269891584.0,
-            "23": 269891584.0,
-            "24": 269891584.0,
-            "25": 269891584.0,
-            "26": 269891584.0,
-            "27": 269891584.0,
-            "28": 269891584.0,
-            "29": 269891584.0,
-            "30": 269891584.0,
-            "31": 269891584.0,
-            "32": 269891584.0,
-            "33": 269891584.0,
-            "34": 269891584.0,
-            "35": 269891584.0,
-            "36": 269891584.0,
-            "37": 269891584.0,
-            "38": 269891584.0,
-            "39": 269891584.0,
-            "40": 269891584.0,
-            "41": 269891584.0,
-            "42": 269891584.0,
-            "43": 269891584.0,
-            "44": 269891584.0,
-            "45": 269891584.0,
-            "46": 269891584.0,
-            "47": 269891584.0,
-            "48": 269891584.0,
-            "49": 269891584.0,
-            "50": 269891584.0,
-            "51": 269891584.0,
-            "52": 269891584.0,
-            "53": 269891584.0,
-            "54": 269891584.0,
-            "55": 269891584.0,
-            "56": 269891584.0,
-            "57": 269891584.0,
-            "58": 269891584.0,
-            "59": 269891584.0,
-            "60": 269891584.0,
-            "61": 269891584.0,
-            "62": 269891584.0,
-            "63": 269891584.0,
-            "64": 269891584.0,
-            "65": 269891584.0,
-            "66": 269891584.0,
-            "67": 269891584.0,
-            "68": 269891584.0,
-            "69": 269891584.0,
-            "70": 269891584.0,
-            "71": 269891584.0,
-            "72": 269891584.0,
-            "73": 269891584.0,
-            "74": 269891584.0,
-            "75": 269891584.0,
-            "76": 269891584.0,
-            "77": 269891584.0,
-            "78": 269891584.0,
-            "79": 269891584.0,
-            "80": 269891584.0,
-            "81": 269891584.0,
-            "82": 269891584.0,
-            "83": 269891584.0,
-            "84": 269891584.0,
-            "85": 269891584.0,
-            "86": 269891584.0,
-            "87": 269891584.0,
-            "88": 269891584.0,
-            "89": 269891584.0,
-            "90": 269891584.0,
-            "91": 269891584.0,
-            "92": 269891584.0,
-            "93": 269891584.0,
-            "94": 269891584.0,
-            "95": 269891584.0,
-            "96": 269891584.0,
-            "97": 269891584.0,
-            "98": 269891584.0,
-            "99": 269891584.0,
-            "100": 269891584.0
+            "1": 269842944.0,
+            "2": 269842944.0,
+            "3": 269842944.0,
+            "4": 269842944.0,
+            "5": 269842944.0,
+            "6": 269842944.0,
+            "7": 269842944.0,
+            "8": 269842944.0,
+            "9": 269842944.0,
+            "10": 269842944.0,
+            "11": 269842944.0,
+            "12": 269842944.0,
+            "13": 269842944.0,
+            "14": 269842944.0,
+            "15": 269842944.0,
+            "16": 269842944.0,
+            "17": 269842944.0,
+            "18": 269842944.0,
+            "19": 269842944.0,
+            "20": 269842944.0,
+            "21": 269842944.0,
+            "22": 269842944.0,
+            "23": 269842944.0,
+            "24": 269842944.0,
+            "25": 269842944.0,
+            "26": 269842944.0,
+            "27": 269842944.0,
+            "28": 269842944.0,
+            "29": 269842944.0,
+            "30": 269842944.0,
+            "31": 269842944.0,
+            "32": 269842944.0,
+            "33": 269842944.0,
+            "34": 269842944.0,
+            "35": 269842944.0,
+            "36": 269842944.0,
+            "37": 269842944.0,
+            "38": 269842944.0,
+            "39": 269842944.0,
+            "40": 269842944.0,
+            "41": 269842944.0,
+            "42": 269842944.0,
+            "43": 269842944.0,
+            "44": 269842944.0,
+            "45": 269842944.0,
+            "46": 269842944.0,
+            "47": 269842944.0,
+            "48": 269842944.0,
+            "49": 269842944.0,
+            "50": 269842944.0,
+            "51": 269842944.0,
+            "52": 269842944.0,
+            "53": 269842944.0,
+            "54": 269842944.0,
+            "55": 269842944.0,
+            "56": 269842944.0,
+            "57": 269842944.0,
+            "58": 269842944.0,
+            "59": 269842944.0,
+            "60": 269842944.0,
+            "61": 269842944.0,
+            "62": 269842944.0,
+            "63": 269842944.0,
+            "64": 269842944.0,
+            "65": 269842944.0,
+            "66": 269842944.0,
+            "67": 269842944.0,
+            "68": 269842944.0,
+            "69": 269842944.0,
+            "70": 269842944.0,
+            "71": 269842944.0,
+            "72": 269842944.0,
+            "73": 269842944.0,
+            "74": 269842944.0,
+            "75": 269842944.0,
+            "76": 269842944.0,
+            "77": 269842944.0,
+            "78": 269842944.0,
+            "79": 269842944.0,
+            "80": 269842944.0,
+            "81": 269842944.0,
+            "82": 269842944.0,
+            "83": 269842944.0,
+            "84": 269842944.0,
+            "85": 269842944.0,
+            "86": 269842944.0,
+            "87": 269842944.0,
+            "88": 269842944.0,
+            "89": 269842944.0,
+            "90": 269842944.0,
+            "91": 269842944.0,
+            "92": 269842944.0,
+            "93": 269842944.0,
+            "94": 269842944.0,
+            "95": 269842944.0,
+            "96": 269842944.0,
+            "97": 269842944.0,
+            "98": 269842944.0,
+            "99": 269842944.0,
+            "100": 269842944.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -325,106 +325,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 1448634368.0,
-            "2": 1515676672.0,
-            "3": 1515676672.0,
-            "4": 1515676672.0,
-            "5": 1515676672.0,
-            "6": 1515676672.0,
-            "7": 1515676672.0,
-            "8": 1515676672.0,
-            "9": 1515676672.0,
-            "10": 1515676672.0,
-            "11": 1515676672.0,
-            "12": 1515676672.0,
-            "13": 1515676672.0,
-            "14": 1515676672.0,
-            "15": 1515676672.0,
-            "16": 1515676672.0,
-            "17": 1515676672.0,
-            "18": 1515676672.0,
-            "19": 1515676672.0,
-            "20": 1515676672.0,
-            "21": 1515676672.0,
-            "22": 1515676672.0,
-            "23": 1515676672.0,
-            "24": 1515676672.0,
-            "25": 1515676672.0,
-            "26": 1515676672.0,
-            "27": 1515676672.0,
-            "28": 1515676672.0,
-            "29": 1515676672.0,
-            "30": 1515676672.0,
-            "31": 1515676672.0,
-            "32": 1515676672.0,
-            "33": 1515676672.0,
-            "34": 1515676672.0,
-            "35": 1515676672.0,
-            "36": 1515676672.0,
-            "37": 1515676672.0,
-            "38": 1515676672.0,
-            "39": 1515676672.0,
-            "40": 1515676672.0,
-            "41": 1515676672.0,
-            "42": 1515676672.0,
-            "43": 1515676672.0,
-            "44": 1515676672.0,
-            "45": 1515676672.0,
-            "46": 1515676672.0,
-            "47": 1515676672.0,
-            "48": 1515676672.0,
-            "49": 1515676672.0,
-            "50": 1515676672.0,
-            "51": 1515676672.0,
-            "52": 1515676672.0,
-            "53": 1515676672.0,
-            "54": 1515676672.0,
-            "55": 1515676672.0,
-            "56": 1515676672.0,
-            "57": 1515676672.0,
-            "58": 1515676672.0,
-            "59": 1515676672.0,
-            "60": 1515676672.0,
-            "61": 1515676672.0,
-            "62": 1515676672.0,
-            "63": 1515676672.0,
-            "64": 1515676672.0,
-            "65": 1515676672.0,
-            "66": 1515676672.0,
-            "67": 1515676672.0,
-            "68": 1515676672.0,
-            "69": 1515676672.0,
-            "70": 1515676672.0,
-            "71": 1515676672.0,
-            "72": 1515676672.0,
-            "73": 1515676672.0,
-            "74": 1515676672.0,
-            "75": 1515676672.0,
-            "76": 1515676672.0,
-            "77": 1515676672.0,
-            "78": 1515676672.0,
-            "79": 1515676672.0,
-            "80": 1515676672.0,
-            "81": 1515676672.0,
-            "82": 1515676672.0,
-            "83": 1515676672.0,
-            "84": 1515676672.0,
-            "85": 1515676672.0,
-            "86": 1515676672.0,
-            "87": 1515676672.0,
-            "88": 1515676672.0,
-            "89": 1515676672.0,
-            "90": 1515676672.0,
-            "91": 1515676672.0,
-            "92": 1515676672.0,
-            "93": 1515676672.0,
-            "94": 1515676672.0,
-            "95": 1515676672.0,
-            "96": 1515676672.0,
-            "97": 1515676672.0,
-            "98": 1515676672.0,
-            "99": 1515676672.0,
-            "100": 1515676672.0
+            "1": 971619840.0,
+            "2": 1036172800.0,
+            "3": 1036172800.0,
+            "4": 1036172800.0,
+            "5": 1036172800.0,
+            "6": 1036172800.0,
+            "7": 1036172800.0,
+            "8": 1036172800.0,
+            "9": 1036172800.0,
+            "10": 1036172800.0,
+            "11": 1036172800.0,
+            "12": 1036172800.0,
+            "13": 1036172800.0,
+            "14": 1036172800.0,
+            "15": 1036172800.0,
+            "16": 1036172800.0,
+            "17": 1036172800.0,
+            "18": 1036172800.0,
+            "19": 1036172800.0,
+            "20": 1036172800.0,
+            "21": 1036172800.0,
+            "22": 1036172800.0,
+            "23": 1036172800.0,
+            "24": 1036172800.0,
+            "25": 1036172800.0,
+            "26": 1036172800.0,
+            "27": 1036172800.0,
+            "28": 1036172800.0,
+            "29": 1036172800.0,
+            "30": 1036172800.0,
+            "31": 1036172800.0,
+            "32": 1036172800.0,
+            "33": 1036172800.0,
+            "34": 1036172800.0,
+            "35": 1036172800.0,
+            "36": 1036172800.0,
+            "37": 1036172800.0,
+            "38": 1036172800.0,
+            "39": 1036172800.0,
+            "40": 1036172800.0,
+            "41": 1036172800.0,
+            "42": 1036172800.0,
+            "43": 1036172800.0,
+            "44": 1036172800.0,
+            "45": 1036172800.0,
+            "46": 1036172800.0,
+            "47": 1036172800.0,
+            "48": 1036172800.0,
+            "49": 1036172800.0,
+            "50": 1036172800.0,
+            "51": 1036172800.0,
+            "52": 1036172800.0,
+            "53": 1036172800.0,
+            "54": 1036172800.0,
+            "55": 1036172800.0,
+            "56": 1036172800.0,
+            "57": 1036172800.0,
+            "58": 1036172800.0,
+            "59": 1036172800.0,
+            "60": 1036172800.0,
+            "61": 1036172800.0,
+            "62": 1036172800.0,
+            "63": 1036172800.0,
+            "64": 1036172800.0,
+            "65": 1036172800.0,
+            "66": 1036172800.0,
+            "67": 1036172800.0,
+            "68": 1036172800.0,
+            "69": 1036172800.0,
+            "70": 1036172800.0,
+            "71": 1036172800.0,
+            "72": 1036172800.0,
+            "73": 1036172800.0,
+            "74": 1036172800.0,
+            "75": 1036172800.0,
+            "76": 1036172800.0,
+            "77": 1036172800.0,
+            "78": 1036172800.0,
+            "79": 1036172800.0,
+            "80": 1036172800.0,
+            "81": 1036172800.0,
+            "82": 1036172800.0,
+            "83": 1036172800.0,
+            "84": 1036172800.0,
+            "85": 1036172800.0,
+            "86": 1036172800.0,
+            "87": 1036172800.0,
+            "88": 1036172800.0,
+            "89": 1036172800.0,
+            "90": 1036172800.0,
+            "91": 1036172800.0,
+            "92": 1036172800.0,
+            "93": 1036172800.0,
+            "94": 1036172800.0,
+            "95": 1036172800.0,
+            "96": 1036172800.0,
+            "97": 1036172800.0,
+            "98": 1036172800.0,
+            "99": 1036172800.0,
+            "100": 1036172800.0
         }
     },
     "iteration-time": {
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 10.29271,
-            "2": 0.42506,
-            "3": 0.68343,
-            "4": 0.36852,
-            "5": 0.35945,
-            "6": 0.70082,
-            "7": 0.36184,
-            "8": 0.36666,
-            "9": 0.36956,
-            "10": 0.36948,
-            "11": 0.34035,
-            "12": 0.33106,
-            "13": 0.32678,
-            "14": 0.50153,
-            "15": 0.32624,
-            "16": 0.32544,
-            "17": 0.33191,
-            "18": 0.32618,
-            "19": 0.3263,
-            "20": 0.33069,
-            "21": 0.32595,
-            "22": 0.3257,
-            "23": 0.33264,
-            "24": 0.32517,
-            "25": 0.32475,
-            "26": 0.33346,
-            "27": 0.33354,
-            "28": 0.32383,
-            "29": 0.33025,
-            "30": 0.32292,
-            "31": 0.32259,
-            "32": 0.33133,
-            "33": 0.32233,
-            "34": 0.32205,
-            "35": 0.32577,
-            "36": 0.33027,
-            "37": 0.32369,
-            "38": 0.3231,
-            "39": 0.32941,
-            "40": 0.32272,
-            "41": 0.32419,
-            "42": 0.32862,
-            "43": 0.32341,
-            "44": 0.32437,
-            "45": 0.3291,
-            "46": 0.32245,
-            "47": 0.32412,
-            "48": 0.32928,
-            "49": 0.32252,
-            "50": 0.3232,
-            "51": 0.3288,
-            "52": 0.32267,
-            "53": 0.32323,
-            "54": 0.33682,
-            "55": 0.32632,
-            "56": 0.32697,
-            "57": 0.33895,
-            "58": 0.32618,
-            "59": 0.32589,
-            "60": 0.3322,
-            "61": 0.3251,
-            "62": 0.32521,
-            "63": 0.33036,
-            "64": 0.32444,
-            "65": 0.32508,
-            "66": 0.33114,
-            "67": 0.32315,
-            "68": 0.32508,
-            "69": 0.3303,
-            "70": 0.32701,
-            "71": 0.32493,
-            "72": 0.32932,
-            "73": 0.32763,
-            "74": 0.32474,
-            "75": 0.32636,
-            "76": 0.33103,
-            "77": 0.32433,
-            "78": 0.32583,
-            "79": 0.33332,
-            "80": 0.32445,
-            "81": 0.32512,
-            "82": 0.33846,
-            "83": 0.32647,
-            "84": 0.32584,
-            "85": 0.33063,
-            "86": 0.32531,
-            "87": 0.32597,
-            "88": 0.33536,
-            "89": 0.32529,
-            "90": 0.32619,
-            "91": 0.33191,
-            "92": 0.32549,
-            "93": 0.32565,
-            "94": 0.33549,
-            "95": 0.32239,
-            "96": 0.32249,
-            "97": 0.32967,
-            "98": 0.3225,
-            "99": 0.32206,
-            "100": 0.32856
+            "1": 5.18846,
+            "2": 0.36168,
+            "3": 0.29466,
+            "4": 0.29234,
+            "5": 0.29276,
+            "6": 0.29792,
+            "7": 0.29352,
+            "8": 0.2936,
+            "9": 0.29237,
+            "10": 0.29769,
+            "11": 0.29346,
+            "12": 0.29527,
+            "13": 0.29315,
+            "14": 0.29363,
+            "15": 0.29305,
+            "16": 0.29641,
+            "17": 0.29489,
+            "18": 0.29861,
+            "19": 0.29574,
+            "20": 0.29312,
+            "21": 0.29388,
+            "22": 0.29283,
+            "23": 0.29431,
+            "24": 0.29335,
+            "25": 0.29314,
+            "26": 0.29296,
+            "27": 0.29356,
+            "28": 0.29335,
+            "29": 0.29568,
+            "30": 0.29411,
+            "31": 0.29379,
+            "32": 0.29273,
+            "33": 0.29354,
+            "34": 0.29433,
+            "35": 0.29411,
+            "36": 0.29363,
+            "37": 0.2938,
+            "38": 0.29351,
+            "39": 0.29356,
+            "40": 0.29298,
+            "41": 0.29347,
+            "42": 0.29413,
+            "43": 0.29252,
+            "44": 0.29273,
+            "45": 0.29334,
+            "46": 0.29356,
+            "47": 0.29382,
+            "48": 0.29398,
+            "49": 0.2936,
+            "50": 0.29316,
+            "51": 0.29514,
+            "52": 0.28916,
+            "53": 0.29005,
+            "54": 0.28929,
+            "55": 0.28956,
+            "56": 0.28848,
+            "57": 0.28858,
+            "58": 0.28768,
+            "59": 0.28853,
+            "60": 0.29008,
+            "61": 0.2889,
+            "62": 0.28847,
+            "63": 0.28786,
+            "64": 0.28795,
+            "65": 0.28879,
+            "66": 0.28923,
+            "67": 0.28915,
+            "68": 0.28861,
+            "69": 0.28895,
+            "70": 0.28885,
+            "71": 0.28882,
+            "72": 0.28775,
+            "73": 0.28792,
+            "74": 0.28799,
+            "75": 0.28754,
+            "76": 0.28789,
+            "77": 0.2888,
+            "78": 0.28929,
+            "79": 0.28854,
+            "80": 0.28894,
+            "81": 0.28751,
+            "82": 0.28815,
+            "83": 0.2885,
+            "84": 0.28813,
+            "85": 0.28933,
+            "86": 0.28794,
+            "87": 0.28758,
+            "88": 0.28772,
+            "89": 0.28903,
+            "90": 0.28798,
+            "91": 0.28695,
+            "92": 0.28757,
+            "93": 0.28831,
+            "94": 0.28828,
+            "95": 0.28871,
+            "96": 0.28746,
+            "97": 0.28767,
+            "98": 0.28881,
+            "99": 0.2875,
+            "100": 0.28775
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_lts.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_lts.json
new file mode 100644
index 00000000000..d8c6189b1ed
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_lts.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.85966,
+            "2": 10.85779,
+            "3": 10.85806,
+            "4": 10.85111,
+            "5": 10.88314,
+            "6": 10.8873,
+            "7": 10.86205,
+            "8": 10.86979,
+            "9": 10.86489,
+            "10": 10.83833,
+            "11": 10.87881,
+            "12": 10.87319,
+            "13": 10.87924,
+            "14": 10.88927,
+            "15": 10.83002,
+            "16": 10.83231,
+            "17": 10.80059,
+            "18": 10.82546,
+            "19": 10.81132,
+            "20": 10.73125,
+            "21": 10.72292,
+            "22": 10.57559,
+            "23": 10.73211,
+            "24": 10.61836,
+            "25": 10.56602,
+            "26": 10.61211,
+            "27": 10.62224,
+            "28": 10.58011,
+            "29": 10.59905,
+            "30": 10.38311,
+            "31": 10.14181,
+            "32": 10.48118,
+            "33": 10.47322,
+            "34": 10.23556,
+            "35": 10.28738,
+            "36": 10.23493,
+            "37": 10.35785,
+            "38": 10.20631,
+            "39": 10.41397,
+            "40": 10.09217,
+            "41": 10.16577,
+            "42": 10.21856,
+            "43": 9.85974,
+            "44": 9.97115,
+            "45": 9.83404,
+            "46": 9.84483,
+            "47": 10.16033,
+            "48": 9.85135,
+            "49": 9.53781,
+            "50": 9.91577,
+            "51": 9.8567,
+            "52": 9.75178,
+            "53": 10.07652,
+            "54": 9.96084,
+            "55": 9.88221,
+            "56": 9.63206,
+            "57": 9.49147,
+            "58": 9.83408,
+            "59": 9.59352,
+            "60": 9.51388,
+            "61": 9.69802,
+            "62": 9.99154,
+            "63": 9.3723,
+            "64": 9.77839,
+            "65": 8.95518,
+            "66": 9.70976,
+            "67": 9.38198,
+            "68": 9.78701,
+            "69": 9.793,
+            "70": 9.73033,
+            "71": 9.61752,
+            "72": 9.58459,
+            "73": 9.48958,
+            "74": 8.94015,
+            "75": 9.43092,
+            "76": 9.09168,
+            "77": 10.06222,
+            "78": 9.72696,
+            "79": 9.37408,
+            "80": 9.40676,
+            "81": 9.47995,
+            "82": 9.69225,
+            "83": 9.31299,
+            "84": 9.41921,
+            "85": 9.61096,
+            "86": 9.06853,
+            "87": 9.59119,
+            "88": 9.74582,
+            "89": 9.60624,
+            "90": 9.81746,
+            "91": 9.34247,
+            "92": 9.35856,
+            "93": 9.07894,
+            "94": 8.82753,
+            "95": 9.51606,
+            "96": 9.52063,
+            "97": 9.31097,
+            "98": 9.67055,
+            "99": 8.88626,
+            "100": 9.40485
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1756.0,
+            "2": 2002.0,
+            "3": 1950.0,
+            "4": 1894.0,
+            "5": 2227.0,
+            "6": 2168.0,
+            "7": 2021.0,
+            "8": 1775.0,
+            "9": 2051.0,
+            "10": 1611.0,
+            "11": 2147.0,
+            "12": 1965.0,
+            "13": 2185.0,
+            "14": 2052.0,
+            "15": 2134.0,
+            "16": 2077.0,
+            "17": 2080.0,
+            "18": 1914.0,
+            "19": 1993.0,
+            "20": 1823.0,
+            "21": 2142.0,
+            "22": 1847.0,
+            "23": 2340.0,
+            "24": 1831.0,
+            "25": 1790.0,
+            "26": 1923.0,
+            "27": 2066.0,
+            "28": 2268.0,
+            "29": 2261.0,
+            "30": 2043.0,
+            "31": 1759.0,
+            "32": 2122.0,
+            "33": 2389.0,
+            "34": 2186.0,
+            "35": 2312.0,
+            "36": 2176.0,
+            "37": 2626.0,
+            "38": 2471.0,
+            "39": 2624.0,
+            "40": 2556.0,
+            "41": 2475.0,
+            "42": 2504.0,
+            "43": 2326.0,
+            "44": 2307.0,
+            "45": 2355.0,
+            "46": 2625.0,
+            "47": 2809.0,
+            "48": 2621.0,
+            "49": 2670.0,
+            "50": 2699.0,
+            "51": 2829.0,
+            "52": 2809.0,
+            "53": 3230.0,
+            "54": 2864.0,
+            "55": 2706.0,
+            "56": 2917.0,
+            "57": 2529.0,
+            "58": 3339.0,
+            "59": 3051.0,
+            "60": 2623.0,
+            "61": 3287.0,
+            "62": 2913.0,
+            "63": 2639.0,
+            "64": 3154.0,
+            "65": 2856.0,
+            "66": 3465.0,
+            "67": 2934.0,
+            "68": 2985.0,
+            "69": 3298.0,
+            "70": 3653.0,
+            "71": 3260.0,
+            "72": 2684.0,
+            "73": 3232.0,
+            "74": 2191.0,
+            "75": 2766.0,
+            "76": 3335.0,
+            "77": 3793.0,
+            "78": 3608.0,
+            "79": 3384.0,
+            "80": 3782.0,
+            "81": 3969.0,
+            "82": 3640.0,
+            "83": 3237.0,
+            "84": 3606.0,
+            "85": 3553.0,
+            "86": 3160.0,
+            "87": 4130.0,
+            "88": 3430.0,
+            "89": 3818.0,
+            "90": 3363.0,
+            "91": 3041.0,
+            "92": 3524.0,
+            "93": 3060.0,
+            "94": 3575.0,
+            "95": 3463.0,
+            "96": 3921.0,
+            "97": 3597.0,
+            "98": 4039.0,
+            "99": 3435.0,
+            "100": 3548.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 368387584.0,
+            "2": 368387584.0,
+            "3": 368387584.0,
+            "4": 368387584.0,
+            "5": 368387584.0,
+            "6": 368387584.0,
+            "7": 368387584.0,
+            "8": 368387584.0,
+            "9": 368387584.0,
+            "10": 368387584.0,
+            "11": 368387584.0,
+            "12": 368387584.0,
+            "13": 368387584.0,
+            "14": 368387584.0,
+            "15": 368387584.0,
+            "16": 368387584.0,
+            "17": 368387584.0,
+            "18": 368387584.0,
+            "19": 368387584.0,
+            "20": 368387584.0,
+            "21": 368387584.0,
+            "22": 368387584.0,
+            "23": 368387584.0,
+            "24": 368387584.0,
+            "25": 368387584.0,
+            "26": 368387584.0,
+            "27": 368387584.0,
+            "28": 368387584.0,
+            "29": 368387584.0,
+            "30": 368387584.0,
+            "31": 368387584.0,
+            "32": 368387584.0,
+            "33": 368387584.0,
+            "34": 368387584.0,
+            "35": 368387584.0,
+            "36": 368387584.0,
+            "37": 368387584.0,
+            "38": 368387584.0,
+            "39": 368387584.0,
+            "40": 368387584.0,
+            "41": 368387584.0,
+            "42": 368387584.0,
+            "43": 368387584.0,
+            "44": 368387584.0,
+            "45": 368387584.0,
+            "46": 368387584.0,
+            "47": 368387584.0,
+            "48": 368387584.0,
+            "49": 368387584.0,
+            "50": 368387584.0,
+            "51": 368387584.0,
+            "52": 368387584.0,
+            "53": 368387584.0,
+            "54": 368387584.0,
+            "55": 368387584.0,
+            "56": 368387584.0,
+            "57": 368387584.0,
+            "58": 368387584.0,
+            "59": 368387584.0,
+            "60": 368387584.0,
+            "61": 368387584.0,
+            "62": 368387584.0,
+            "63": 368387584.0,
+            "64": 368387584.0,
+            "65": 368387584.0,
+            "66": 368387584.0,
+            "67": 368387584.0,
+            "68": 368387584.0,
+            "69": 368387584.0,
+            "70": 368387584.0,
+            "71": 368387584.0,
+            "72": 368387584.0,
+            "73": 368387584.0,
+            "74": 368387584.0,
+            "75": 368387584.0,
+            "76": 368387584.0,
+            "77": 368387584.0,
+            "78": 368387584.0,
+            "79": 368387584.0,
+            "80": 368387584.0,
+            "81": 368387584.0,
+            "82": 368387584.0,
+            "83": 368387584.0,
+            "84": 368387584.0,
+            "85": 368387584.0,
+            "86": 368387584.0,
+            "87": 368387584.0,
+            "88": 368387584.0,
+            "89": 368387584.0,
+            "90": 368387584.0,
+            "91": 368387584.0,
+            "92": 368387584.0,
+            "93": 368387584.0,
+            "94": 368387584.0,
+            "95": 368387584.0,
+            "96": 368387584.0,
+            "97": 368387584.0,
+            "98": 368387584.0,
+            "99": 368387584.0,
+            "100": 368387584.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1030075904.0,
+            "2": 1161062912.0,
+            "3": 1161062912.0,
+            "4": 1161062912.0,
+            "5": 1161062912.0,
+            "6": 1161062912.0,
+            "7": 1161062912.0,
+            "8": 1161062912.0,
+            "9": 1161062912.0,
+            "10": 1161062912.0,
+            "11": 1161062912.0,
+            "12": 1161062912.0,
+            "13": 1161062912.0,
+            "14": 1161062912.0,
+            "15": 1161062912.0,
+            "16": 1161062912.0,
+            "17": 1161062912.0,
+            "18": 1161062912.0,
+            "19": 1161062912.0,
+            "20": 1161062912.0,
+            "21": 1161062912.0,
+            "22": 1161062912.0,
+            "23": 1161062912.0,
+            "24": 1161062912.0,
+            "25": 1161062912.0,
+            "26": 1161062912.0,
+            "27": 1161062912.0,
+            "28": 1161062912.0,
+            "29": 1161062912.0,
+            "30": 1161062912.0,
+            "31": 1161062912.0,
+            "32": 1161062912.0,
+            "33": 1161062912.0,
+            "34": 1161062912.0,
+            "35": 1161062912.0,
+            "36": 1161062912.0,
+            "37": 1161062912.0,
+            "38": 1161062912.0,
+            "39": 1161062912.0,
+            "40": 1161062912.0,
+            "41": 1161062912.0,
+            "42": 1161062912.0,
+            "43": 1161062912.0,
+            "44": 1161062912.0,
+            "45": 1161062912.0,
+            "46": 1161062912.0,
+            "47": 1161062912.0,
+            "48": 1161062912.0,
+            "49": 1161062912.0,
+            "50": 1161062912.0,
+            "51": 1161062912.0,
+            "52": 1161062912.0,
+            "53": 1161062912.0,
+            "54": 1161062912.0,
+            "55": 1161062912.0,
+            "56": 1161062912.0,
+            "57": 1161062912.0,
+            "58": 1161062912.0,
+            "59": 1161062912.0,
+            "60": 1161062912.0,
+            "61": 1161062912.0,
+            "62": 1161062912.0,
+            "63": 1161062912.0,
+            "64": 1161062912.0,
+            "65": 1161062912.0,
+            "66": 1161062912.0,
+            "67": 1161062912.0,
+            "68": 1161062912.0,
+            "69": 1161062912.0,
+            "70": 1161062912.0,
+            "71": 1161062912.0,
+            "72": 1161062912.0,
+            "73": 1161062912.0,
+            "74": 1161062912.0,
+            "75": 1161062912.0,
+            "76": 1161062912.0,
+            "77": 1161062912.0,
+            "78": 1161062912.0,
+            "79": 1161062912.0,
+            "80": 1161062912.0,
+            "81": 1161062912.0,
+            "82": 1161062912.0,
+            "83": 1161062912.0,
+            "84": 1161062912.0,
+            "85": 1161062912.0,
+            "86": 1161062912.0,
+            "87": 1161062912.0,
+            "88": 1161062912.0,
+            "89": 1161062912.0,
+            "90": 1161062912.0,
+            "91": 1161062912.0,
+            "92": 1161062912.0,
+            "93": 1161062912.0,
+            "94": 1161062912.0,
+            "95": 1161062912.0,
+            "96": 1161062912.0,
+            "97": 1161062912.0,
+            "98": 1161062912.0,
+            "99": 1161062912.0,
+            "100": 1161062912.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 4.69857,
+            "2": 0.5607,
+            "3": 0.4224,
+            "4": 0.408,
+            "5": 0.40601,
+            "6": 0.40622,
+            "7": 0.41117,
+            "8": 0.41277,
+            "9": 0.41135,
+            "10": 0.41075,
+            "11": 0.41036,
+            "12": 0.41422,
+            "13": 0.41066,
+            "14": 0.41137,
+            "15": 0.41203,
+            "16": 0.41967,
+            "17": 0.41058,
+            "18": 0.41138,
+            "19": 0.41123,
+            "20": 0.4101,
+            "21": 0.40646,
+            "22": 0.40698,
+            "23": 0.40933,
+            "24": 0.40762,
+            "25": 0.40817,
+            "26": 0.40815,
+            "27": 0.40808,
+            "28": 0.41113,
+            "29": 0.41068,
+            "30": 0.40954,
+            "31": 0.40816,
+            "32": 0.40856,
+            "33": 0.40858,
+            "34": 0.4094,
+            "35": 0.40789,
+            "36": 0.40721,
+            "37": 0.4076,
+            "38": 0.4074,
+            "39": 0.40856,
+            "40": 0.40702,
+            "41": 0.40749,
+            "42": 0.40775,
+            "43": 0.40697,
+            "44": 0.4055,
+            "45": 0.40519,
+            "46": 0.40481,
+            "47": 0.40587,
+            "48": 0.40507,
+            "49": 0.40669,
+            "50": 0.40562,
+            "51": 0.42093,
+            "52": 0.41028,
+            "53": 0.4088,
+            "54": 0.41137,
+            "55": 0.40983,
+            "56": 0.40893,
+            "57": 0.41252,
+            "58": 0.40982,
+            "59": 0.41034,
+            "60": 0.41176,
+            "61": 0.40849,
+            "62": 0.40923,
+            "63": 0.40913,
+            "64": 0.4084,
+            "65": 0.40845,
+            "66": 0.41123,
+            "67": 0.40774,
+            "68": 0.40592,
+            "69": 0.4048,
+            "70": 0.40668,
+            "71": 0.40553,
+            "72": 0.40771,
+            "73": 0.40622,
+            "74": 0.40899,
+            "75": 0.41092,
+            "76": 0.41085,
+            "77": 0.41068,
+            "78": 0.4084,
+            "79": 0.41086,
+            "80": 0.77469,
+            "81": 0.41176,
+            "82": 0.41234,
+            "83": 0.41225,
+            "84": 0.41217,
+            "85": 0.41295,
+            "86": 0.41174,
+            "87": 0.40921,
+            "88": 0.40831,
+            "89": 0.40924,
+            "90": 0.40844,
+            "91": 0.40982,
+            "92": 0.40955,
+            "93": 0.40963,
+            "94": 0.4085,
+            "95": 0.43734,
+            "96": 0.40945,
+            "97": 0.40886,
+            "98": 0.41034,
+            "99": 0.40924,
+            "100": 0.40948
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_lts_dgx_a100.json
index b9747029488..1091699bf9a 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_lts_dgx_a100.json
@@ -1 +1,537 @@
-{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.86209, "5": 10.88253, "10": 10.83642, "15": 10.82909, "20": 10.73237, "25": 10.56392, "30": 10.38484, "35": 10.28951, "40": 10.09133, "45": 9.83487, "50": 9.91604, "55": 9.88232, "60": 9.51376, "65": 8.95392, "70": 9.73094, "75": 9.4313, "80": 9.40597, "85": 9.61137, "90": 9.81743, "95": 9.51571, "100": 9.40429}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1780.0, "5": 2137.0, "10": 1588.0, "15": 2083.0, "20": 1816.0, "25": 1862.0, "30": 2045.0, "35": 2205.0, "40": 2629.0, "45": 2441.0, "50": 2734.0, "55": 2662.0, "60": 2726.0, "65": 3011.0, "70": 3737.0, "75": 2771.0, "80": 3804.0, "85": 3588.0, "90": 3384.0, "95": 3590.0, "100": 3454.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 368387072.0, "5": 368387072.0, "10": 368387072.0, "15": 368387072.0, "20": 368387072.0, "25": 368387072.0, "30": 368387072.0, "35": 368387072.0, "40": 368387072.0, "45": 368387072.0, "50": 368387072.0, "55": 368387072.0, "60": 368387072.0, "65": 368387072.0, "70": 368387072.0, "75": 368387072.0, "80": 368387072.0, "85": 368387072.0, "90": 368387072.0, "95": 368387072.0, "100": 368387072.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1510972416.0, "5": 1645104640.0, "10": 1645105152.0, "15": 1645105152.0, "20": 1645105152.0, "25": 1645105152.0, "30": 1645105152.0, "35": 1645105152.0, "40": 1645105152.0, "45": 1645105152.0, "50": 1645105152.0, "55": 1645105152.0, "60": 1645105152.0, "65": 1645105152.0, "70": 1645105152.0, "75": 1647199744.0, "80": 1647199744.0, "85": 1647199744.0, "90": 1647199744.0, "95": 1647199744.0, "100": 1647199744.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 9.70786, "5": 0.41822, "10": 0.42132, "15": 0.41057, "20": 0.4105, "25": 0.41988, "30": 0.41035, "35": 0.41599, "40": 0.40546, "45": 0.40496, "50": 0.40588, "55": 0.40982, "60": 0.4157, "65": 0.40455, "70": 0.40547, "75": 0.41128, "80": 0.41661, "85": 0.42011, "90": 0.41367, "95": 0.41851, "100": 0.40235}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.85966,
+            "2": 10.85779,
+            "3": 10.85806,
+            "4": 10.85111,
+            "5": 10.88314,
+            "6": 10.8873,
+            "7": 10.86205,
+            "8": 10.86979,
+            "9": 10.86489,
+            "10": 10.83833,
+            "11": 10.87881,
+            "12": 10.87319,
+            "13": 10.87924,
+            "14": 10.88927,
+            "15": 10.83002,
+            "16": 10.83231,
+            "17": 10.80059,
+            "18": 10.82546,
+            "19": 10.81132,
+            "20": 10.73125,
+            "21": 10.72292,
+            "22": 10.57559,
+            "23": 10.73211,
+            "24": 10.61836,
+            "25": 10.56602,
+            "26": 10.61211,
+            "27": 10.62224,
+            "28": 10.58011,
+            "29": 10.59905,
+            "30": 10.38311,
+            "31": 10.14181,
+            "32": 10.48118,
+            "33": 10.47322,
+            "34": 10.23556,
+            "35": 10.28738,
+            "36": 10.23493,
+            "37": 10.35785,
+            "38": 10.20631,
+            "39": 10.41397,
+            "40": 10.09217,
+            "41": 10.16577,
+            "42": 10.21856,
+            "43": 9.85974,
+            "44": 9.97115,
+            "45": 9.83404,
+            "46": 9.84483,
+            "47": 10.16033,
+            "48": 9.85135,
+            "49": 9.53781,
+            "50": 9.91577,
+            "51": 9.8567,
+            "52": 9.75178,
+            "53": 10.07652,
+            "54": 9.96084,
+            "55": 9.88221,
+            "56": 9.63206,
+            "57": 9.49147,
+            "58": 9.83408,
+            "59": 9.59352,
+            "60": 9.51388,
+            "61": 9.69802,
+            "62": 9.99154,
+            "63": 9.3723,
+            "64": 9.77839,
+            "65": 8.95518,
+            "66": 9.70976,
+            "67": 9.38198,
+            "68": 9.78701,
+            "69": 9.793,
+            "70": 9.73033,
+            "71": 9.61752,
+            "72": 9.58459,
+            "73": 9.48958,
+            "74": 8.94015,
+            "75": 9.43092,
+            "76": 9.09168,
+            "77": 10.06222,
+            "78": 9.72696,
+            "79": 9.37408,
+            "80": 9.40676,
+            "81": 9.47995,
+            "82": 9.69225,
+            "83": 9.31299,
+            "84": 9.41921,
+            "85": 9.61096,
+            "86": 9.06853,
+            "87": 9.59119,
+            "88": 9.74582,
+            "89": 9.60624,
+            "90": 9.81746,
+            "91": 9.34247,
+            "92": 9.35856,
+            "93": 9.07894,
+            "94": 8.82753,
+            "95": 9.51606,
+            "96": 9.52063,
+            "97": 9.31097,
+            "98": 9.67055,
+            "99": 8.88626,
+            "100": 9.40485
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1756.0,
+            "2": 2002.0,
+            "3": 1950.0,
+            "4": 1894.0,
+            "5": 2227.0,
+            "6": 2168.0,
+            "7": 2021.0,
+            "8": 1775.0,
+            "9": 2051.0,
+            "10": 1611.0,
+            "11": 2147.0,
+            "12": 1965.0,
+            "13": 2185.0,
+            "14": 2052.0,
+            "15": 2134.0,
+            "16": 2077.0,
+            "17": 2080.0,
+            "18": 1914.0,
+            "19": 1993.0,
+            "20": 1823.0,
+            "21": 2142.0,
+            "22": 1847.0,
+            "23": 2340.0,
+            "24": 1831.0,
+            "25": 1790.0,
+            "26": 1923.0,
+            "27": 2066.0,
+            "28": 2268.0,
+            "29": 2261.0,
+            "30": 2043.0,
+            "31": 1759.0,
+            "32": 2122.0,
+            "33": 2389.0,
+            "34": 2186.0,
+            "35": 2312.0,
+            "36": 2176.0,
+            "37": 2626.0,
+            "38": 2471.0,
+            "39": 2624.0,
+            "40": 2556.0,
+            "41": 2475.0,
+            "42": 2504.0,
+            "43": 2326.0,
+            "44": 2307.0,
+            "45": 2355.0,
+            "46": 2625.0,
+            "47": 2809.0,
+            "48": 2621.0,
+            "49": 2670.0,
+            "50": 2699.0,
+            "51": 2829.0,
+            "52": 2809.0,
+            "53": 3230.0,
+            "54": 2864.0,
+            "55": 2706.0,
+            "56": 2917.0,
+            "57": 2529.0,
+            "58": 3339.0,
+            "59": 3051.0,
+            "60": 2623.0,
+            "61": 3287.0,
+            "62": 2913.0,
+            "63": 2639.0,
+            "64": 3154.0,
+            "65": 2856.0,
+            "66": 3465.0,
+            "67": 2934.0,
+            "68": 2985.0,
+            "69": 3298.0,
+            "70": 3653.0,
+            "71": 3260.0,
+            "72": 2684.0,
+            "73": 3232.0,
+            "74": 2191.0,
+            "75": 2766.0,
+            "76": 3335.0,
+            "77": 3793.0,
+            "78": 3608.0,
+            "79": 3384.0,
+            "80": 3782.0,
+            "81": 3969.0,
+            "82": 3640.0,
+            "83": 3237.0,
+            "84": 3606.0,
+            "85": 3553.0,
+            "86": 3160.0,
+            "87": 4130.0,
+            "88": 3430.0,
+            "89": 3818.0,
+            "90": 3363.0,
+            "91": 3041.0,
+            "92": 3524.0,
+            "93": 3060.0,
+            "94": 3575.0,
+            "95": 3463.0,
+            "96": 3921.0,
+            "97": 3597.0,
+            "98": 4039.0,
+            "99": 3435.0,
+            "100": 3548.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 368387584.0,
+            "2": 368387584.0,
+            "3": 368387584.0,
+            "4": 368387584.0,
+            "5": 368387584.0,
+            "6": 368387584.0,
+            "7": 368387584.0,
+            "8": 368387584.0,
+            "9": 368387584.0,
+            "10": 368387584.0,
+            "11": 368387584.0,
+            "12": 368387584.0,
+            "13": 368387584.0,
+            "14": 368387584.0,
+            "15": 368387584.0,
+            "16": 368387584.0,
+            "17": 368387584.0,
+            "18": 368387584.0,
+            "19": 368387584.0,
+            "20": 368387584.0,
+            "21": 368387584.0,
+            "22": 368387584.0,
+            "23": 368387584.0,
+            "24": 368387584.0,
+            "25": 368387584.0,
+            "26": 368387584.0,
+            "27": 368387584.0,
+            "28": 368387584.0,
+            "29": 368387584.0,
+            "30": 368387584.0,
+            "31": 368387584.0,
+            "32": 368387584.0,
+            "33": 368387584.0,
+            "34": 368387584.0,
+            "35": 368387584.0,
+            "36": 368387584.0,
+            "37": 368387584.0,
+            "38": 368387584.0,
+            "39": 368387584.0,
+            "40": 368387584.0,
+            "41": 368387584.0,
+            "42": 368387584.0,
+            "43": 368387584.0,
+            "44": 368387584.0,
+            "45": 368387584.0,
+            "46": 368387584.0,
+            "47": 368387584.0,
+            "48": 368387584.0,
+            "49": 368387584.0,
+            "50": 368387584.0,
+            "51": 368387584.0,
+            "52": 368387584.0,
+            "53": 368387584.0,
+            "54": 368387584.0,
+            "55": 368387584.0,
+            "56": 368387584.0,
+            "57": 368387584.0,
+            "58": 368387584.0,
+            "59": 368387584.0,
+            "60": 368387584.0,
+            "61": 368387584.0,
+            "62": 368387584.0,
+            "63": 368387584.0,
+            "64": 368387584.0,
+            "65": 368387584.0,
+            "66": 368387584.0,
+            "67": 368387584.0,
+            "68": 368387584.0,
+            "69": 368387584.0,
+            "70": 368387584.0,
+            "71": 368387584.0,
+            "72": 368387584.0,
+            "73": 368387584.0,
+            "74": 368387584.0,
+            "75": 368387584.0,
+            "76": 368387584.0,
+            "77": 368387584.0,
+            "78": 368387584.0,
+            "79": 368387584.0,
+            "80": 368387584.0,
+            "81": 368387584.0,
+            "82": 368387584.0,
+            "83": 368387584.0,
+            "84": 368387584.0,
+            "85": 368387584.0,
+            "86": 368387584.0,
+            "87": 368387584.0,
+            "88": 368387584.0,
+            "89": 368387584.0,
+            "90": 368387584.0,
+            "91": 368387584.0,
+            "92": 368387584.0,
+            "93": 368387584.0,
+            "94": 368387584.0,
+            "95": 368387584.0,
+            "96": 368387584.0,
+            "97": 368387584.0,
+            "98": 368387584.0,
+            "99": 368387584.0,
+            "100": 368387584.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1030075904.0,
+            "2": 1161062912.0,
+            "3": 1161062912.0,
+            "4": 1161062912.0,
+            "5": 1161062912.0,
+            "6": 1161062912.0,
+            "7": 1161062912.0,
+            "8": 1161062912.0,
+            "9": 1161062912.0,
+            "10": 1161062912.0,
+            "11": 1161062912.0,
+            "12": 1161062912.0,
+            "13": 1161062912.0,
+            "14": 1161062912.0,
+            "15": 1161062912.0,
+            "16": 1161062912.0,
+            "17": 1161062912.0,
+            "18": 1161062912.0,
+            "19": 1161062912.0,
+            "20": 1161062912.0,
+            "21": 1161062912.0,
+            "22": 1161062912.0,
+            "23": 1161062912.0,
+            "24": 1161062912.0,
+            "25": 1161062912.0,
+            "26": 1161062912.0,
+            "27": 1161062912.0,
+            "28": 1161062912.0,
+            "29": 1161062912.0,
+            "30": 1161062912.0,
+            "31": 1161062912.0,
+            "32": 1161062912.0,
+            "33": 1161062912.0,
+            "34": 1161062912.0,
+            "35": 1161062912.0,
+            "36": 1161062912.0,
+            "37": 1161062912.0,
+            "38": 1161062912.0,
+            "39": 1161062912.0,
+            "40": 1161062912.0,
+            "41": 1161062912.0,
+            "42": 1161062912.0,
+            "43": 1161062912.0,
+            "44": 1161062912.0,
+            "45": 1161062912.0,
+            "46": 1161062912.0,
+            "47": 1161062912.0,
+            "48": 1161062912.0,
+            "49": 1161062912.0,
+            "50": 1161062912.0,
+            "51": 1161062912.0,
+            "52": 1161062912.0,
+            "53": 1161062912.0,
+            "54": 1161062912.0,
+            "55": 1161062912.0,
+            "56": 1161062912.0,
+            "57": 1161062912.0,
+            "58": 1161062912.0,
+            "59": 1161062912.0,
+            "60": 1161062912.0,
+            "61": 1161062912.0,
+            "62": 1161062912.0,
+            "63": 1161062912.0,
+            "64": 1161062912.0,
+            "65": 1161062912.0,
+            "66": 1161062912.0,
+            "67": 1161062912.0,
+            "68": 1161062912.0,
+            "69": 1161062912.0,
+            "70": 1161062912.0,
+            "71": 1161062912.0,
+            "72": 1161062912.0,
+            "73": 1161062912.0,
+            "74": 1161062912.0,
+            "75": 1161062912.0,
+            "76": 1161062912.0,
+            "77": 1161062912.0,
+            "78": 1161062912.0,
+            "79": 1161062912.0,
+            "80": 1161062912.0,
+            "81": 1161062912.0,
+            "82": 1161062912.0,
+            "83": 1161062912.0,
+            "84": 1161062912.0,
+            "85": 1161062912.0,
+            "86": 1161062912.0,
+            "87": 1161062912.0,
+            "88": 1161062912.0,
+            "89": 1161062912.0,
+            "90": 1161062912.0,
+            "91": 1161062912.0,
+            "92": 1161062912.0,
+            "93": 1161062912.0,
+            "94": 1161062912.0,
+            "95": 1161062912.0,
+            "96": 1161062912.0,
+            "97": 1161062912.0,
+            "98": 1161062912.0,
+            "99": 1161062912.0,
+            "100": 1161062912.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 4.72553,
+            "2": 0.52446,
+            "3": 0.41527,
+            "4": 0.41699,
+            "5": 0.41496,
+            "6": 0.41411,
+            "7": 0.41829,
+            "8": 0.41655,
+            "9": 0.41643,
+            "10": 0.42008,
+            "11": 0.41959,
+            "12": 0.41842,
+            "13": 0.41485,
+            "14": 0.41643,
+            "15": 0.41486,
+            "16": 0.41617,
+            "17": 0.41476,
+            "18": 0.42598,
+            "19": 0.41821,
+            "20": 0.41457,
+            "21": 0.41579,
+            "22": 0.41438,
+            "23": 0.41644,
+            "24": 0.41499,
+            "25": 0.41537,
+            "26": 0.41593,
+            "27": 0.42875,
+            "28": 0.41636,
+            "29": 0.41505,
+            "30": 0.4148,
+            "31": 0.41806,
+            "32": 0.41549,
+            "33": 0.41482,
+            "34": 0.41559,
+            "35": 0.4156,
+            "36": 0.4152,
+            "37": 0.4152,
+            "38": 0.4154,
+            "39": 0.41674,
+            "40": 0.41745,
+            "41": 0.41582,
+            "42": 0.41548,
+            "43": 0.41428,
+            "44": 0.4158,
+            "45": 0.41469,
+            "46": 0.41584,
+            "47": 0.41662,
+            "48": 0.41588,
+            "49": 0.41548,
+            "50": 0.42504,
+            "51": 0.41857,
+            "52": 0.40985,
+            "53": 0.40877,
+            "54": 0.41013,
+            "55": 0.40869,
+            "56": 0.84381,
+            "57": 0.41437,
+            "58": 0.42502,
+            "59": 0.41122,
+            "60": 0.41956,
+            "61": 0.40996,
+            "62": 0.40983,
+            "63": 0.41144,
+            "64": 0.41126,
+            "65": 0.41361,
+            "66": 0.41243,
+            "67": 0.41431,
+            "68": 0.4396,
+            "69": 0.42434,
+            "70": 0.41269,
+            "71": 0.42108,
+            "72": 0.41357,
+            "73": 0.41116,
+            "74": 0.41086,
+            "75": 0.41041,
+            "76": 0.41106,
+            "77": 0.41,
+            "78": 0.41669,
+            "79": 0.41627,
+            "80": 0.41237,
+            "81": 0.41157,
+            "82": 0.41168,
+            "83": 0.41229,
+            "84": 0.41209,
+            "85": 0.41258,
+            "86": 0.41294,
+            "87": 0.41185,
+            "88": 0.41106,
+            "89": 0.41159,
+            "90": 0.41277,
+            "91": 0.41162,
+            "92": 0.41309,
+            "93": 0.41351,
+            "94": 0.40941,
+            "95": 0.40961,
+            "96": 0.41012,
+            "97": 0.40887,
+            "98": 0.40809,
+            "99": 0.40865,
+            "100": 0.40854
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts.json
new file mode 100644
index 00000000000..128bc5b55bb
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.84082,
+            "2": 10.83544,
+            "3": 10.83361,
+            "4": 10.8202,
+            "5": 10.84212,
+            "6": 10.87215,
+            "7": 10.83489,
+            "8": 10.8434,
+            "9": 10.84794,
+            "10": 10.81681,
+            "11": 10.86063,
+            "12": 10.85821,
+            "13": 10.87957,
+            "14": 10.87773,
+            "15": 10.85163,
+            "16": 10.8472,
+            "17": 10.84071,
+            "18": 10.86582,
+            "19": 10.85937,
+            "20": 10.85131,
+            "21": 10.85552,
+            "22": 10.82535,
+            "23": 10.86895,
+            "24": 10.82877,
+            "25": 10.81945,
+            "26": 10.83385,
+            "27": 10.82583,
+            "28": 10.84435,
+            "29": 10.83124,
+            "30": 10.75388,
+            "31": 10.66502,
+            "32": 10.78819,
+            "33": 10.76332,
+            "34": 10.67654,
+            "35": 10.68319,
+            "36": 10.63342,
+            "37": 10.68482,
+            "38": 10.60112,
+            "39": 10.69463,
+            "40": 10.52433,
+            "41": 10.54208,
+            "42": 10.56578,
+            "43": 10.34551,
+            "44": 10.38873,
+            "45": 10.31101,
+            "46": 10.30065,
+            "47": 10.48062,
+            "48": 10.28105,
+            "49": 10.05975,
+            "50": 10.29413,
+            "51": 10.23775,
+            "52": 10.15443,
+            "53": 10.36085,
+            "54": 10.26927,
+            "55": 10.2161,
+            "56": 9.99594,
+            "57": 9.8744,
+            "58": 10.14007,
+            "59": 9.93447,
+            "60": 9.84864,
+            "61": 9.98549,
+            "62": 10.2164,
+            "63": 9.69034,
+            "64": 10.0182,
+            "65": 9.30046,
+            "66": 9.9355,
+            "67": 9.63051,
+            "68": 9.99128,
+            "69": 9.9852,
+            "70": 9.92463,
+            "71": 9.81436,
+            "72": 9.79481,
+            "73": 9.68082,
+            "74": 9.1945,
+            "75": 9.60407,
+            "76": 9.28537,
+            "77": 10.18507,
+            "78": 9.86718,
+            "79": 9.52407,
+            "80": 9.55749,
+            "81": 9.62863,
+            "82": 9.81568,
+            "83": 9.45708,
+            "84": 9.53654,
+            "85": 9.73266,
+            "86": 9.20138,
+            "87": 9.69524,
+            "88": 9.85412,
+            "89": 9.71648,
+            "90": 9.91047,
+            "91": 9.45992,
+            "92": 9.46603,
+            "93": 9.19321,
+            "94": 8.94,
+            "95": 9.60607,
+            "96": 9.62214,
+            "97": 9.39796,
+            "98": 9.76023,
+            "99": 8.99097,
+            "100": 9.49505
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 454770688.0,
+            "2": 454770688.0,
+            "3": 454770688.0,
+            "4": 454770688.0,
+            "5": 454770688.0,
+            "6": 454770688.0,
+            "7": 454770688.0,
+            "8": 454770688.0,
+            "9": 454770688.0,
+            "10": 454770688.0,
+            "11": 454770688.0,
+            "12": 454770688.0,
+            "13": 454770688.0,
+            "14": 454770688.0,
+            "15": 454770688.0,
+            "16": 454770688.0,
+            "17": 454770688.0,
+            "18": 518880768.0,
+            "19": 518880768.0,
+            "20": 518880768.0,
+            "21": 518880768.0,
+            "22": 518880768.0,
+            "23": 518880768.0,
+            "24": 518880768.0,
+            "25": 518880768.0,
+            "26": 518880768.0,
+            "27": 518880768.0,
+            "28": 518880768.0,
+            "29": 518880768.0,
+            "30": 518880768.0,
+            "31": 518880768.0,
+            "32": 518880768.0,
+            "33": 518880768.0,
+            "34": 518880768.0,
+            "35": 518880768.0,
+            "36": 518880768.0,
+            "37": 518880768.0,
+            "38": 518880768.0,
+            "39": 518880768.0,
+            "40": 518880768.0,
+            "41": 518880768.0,
+            "42": 518880768.0,
+            "43": 518880768.0,
+            "44": 518880768.0,
+            "45": 518880768.0,
+            "46": 518880768.0,
+            "47": 518880768.0,
+            "48": 518880768.0,
+            "49": 518880768.0,
+            "50": 518880768.0,
+            "51": 518880768.0,
+            "52": 518880768.0,
+            "53": 518880768.0,
+            "54": 518880768.0,
+            "55": 518880768.0,
+            "56": 518880768.0,
+            "57": 518880768.0,
+            "58": 518880768.0,
+            "59": 518880768.0,
+            "60": 518880768.0,
+            "61": 518880768.0,
+            "62": 518880768.0,
+            "63": 518880768.0,
+            "64": 518880768.0,
+            "65": 518880768.0,
+            "66": 518880768.0,
+            "67": 518880768.0,
+            "68": 518880768.0,
+            "69": 518880768.0,
+            "70": 518880768.0,
+            "71": 518880768.0,
+            "72": 518880768.0,
+            "73": 518880768.0,
+            "74": 518880768.0,
+            "75": 518880768.0,
+            "76": 518880768.0,
+            "77": 518880768.0,
+            "78": 518880768.0,
+            "79": 518880768.0,
+            "80": 518880768.0,
+            "81": 518880768.0,
+            "82": 518880768.0,
+            "83": 518880768.0,
+            "84": 518880768.0,
+            "85": 518880768.0,
+            "86": 518880768.0,
+            "87": 518880768.0,
+            "88": 518880768.0,
+            "89": 518880768.0,
+            "90": 518880768.0,
+            "91": 518880768.0,
+            "92": 518880768.0,
+            "93": 518880768.0,
+            "94": 518880768.0,
+            "95": 518880768.0,
+            "96": 518880768.0,
+            "97": 518880768.0,
+            "98": 518880768.0,
+            "99": 518880768.0,
+            "100": 518880768.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 4511150592.0,
+            "2": 4544705536.0,
+            "3": 4544705536.0,
+            "4": 4544705536.0,
+            "5": 4544705536.0,
+            "6": 4544705536.0,
+            "7": 4544705536.0,
+            "8": 4544705536.0,
+            "9": 4544705536.0,
+            "10": 4544705536.0,
+            "11": 4544705536.0,
+            "12": 4544705536.0,
+            "13": 4544705536.0,
+            "14": 4544705536.0,
+            "15": 4544705536.0,
+            "16": 4544705536.0,
+            "17": 4544705536.0,
+            "18": 4544705536.0,
+            "19": 4607767040.0,
+            "20": 4607767040.0,
+            "21": 4607767040.0,
+            "22": 4607767040.0,
+            "23": 4607767040.0,
+            "24": 4607767040.0,
+            "25": 4607767040.0,
+            "26": 4607767040.0,
+            "27": 4607767040.0,
+            "28": 4607767040.0,
+            "29": 4607767040.0,
+            "30": 4607767040.0,
+            "31": 4607767040.0,
+            "32": 4607767040.0,
+            "33": 4607767040.0,
+            "34": 4607767040.0,
+            "35": 4607767040.0,
+            "36": 4607767040.0,
+            "37": 4607767040.0,
+            "38": 4607767040.0,
+            "39": 4607767040.0,
+            "40": 4607767040.0,
+            "41": 4607767040.0,
+            "42": 4607767040.0,
+            "43": 4607767040.0,
+            "44": 4607767040.0,
+            "45": 4607767040.0,
+            "46": 4607767040.0,
+            "47": 4607767040.0,
+            "48": 4607767040.0,
+            "49": 4607767040.0,
+            "50": 4607767040.0,
+            "51": 4607767040.0,
+            "52": 4607767040.0,
+            "53": 4607767040.0,
+            "54": 4607767040.0,
+            "55": 4607767040.0,
+            "56": 4607767040.0,
+            "57": 4607767040.0,
+            "58": 4607767040.0,
+            "59": 4607767040.0,
+            "60": 4607767040.0,
+            "61": 4607767040.0,
+            "62": 4607767040.0,
+            "63": 4607767040.0,
+            "64": 4607767040.0,
+            "65": 4607767040.0,
+            "66": 4607767040.0,
+            "67": 4607767040.0,
+            "68": 4607767040.0,
+            "69": 4607767040.0,
+            "70": 4607767040.0,
+            "71": 4607767040.0,
+            "72": 4607767040.0,
+            "73": 4607767040.0,
+            "74": 4607767040.0,
+            "75": 4607767040.0,
+            "76": 4607767040.0,
+            "77": 4607767040.0,
+            "78": 4607767040.0,
+            "79": 4607767040.0,
+            "80": 4607767040.0,
+            "81": 4607767040.0,
+            "82": 4607767040.0,
+            "83": 4607767040.0,
+            "84": 4607767040.0,
+            "85": 4607767040.0,
+            "86": 4607767040.0,
+            "87": 4607767040.0,
+            "88": 4607767040.0,
+            "89": 4607767040.0,
+            "90": 4607767040.0,
+            "91": 4607767040.0,
+            "92": 4607767040.0,
+            "93": 4607767040.0,
+            "94": 4607767040.0,
+            "95": 4607767040.0,
+            "96": 4607767040.0,
+            "97": 4607767040.0,
+            "98": 4607767040.0,
+            "99": 4607767040.0,
+            "100": 4607767040.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 8.20459,
+            "2": 0.14512,
+            "3": 0.09601,
+            "4": 0.09045,
+            "5": 0.09053,
+            "6": 0.08966,
+            "7": 0.08974,
+            "8": 0.08915,
+            "9": 0.08902,
+            "10": 0.08919,
+            "11": 0.08955,
+            "12": 0.08907,
+            "13": 0.08947,
+            "14": 0.08882,
+            "15": 0.08864,
+            "16": 0.08897,
+            "17": 0.08902,
+            "18": 0.14106,
+            "19": 0.0977,
+            "20": 0.09649,
+            "21": 0.09724,
+            "22": 0.09168,
+            "23": 0.09714,
+            "24": 0.09658,
+            "25": 0.09708,
+            "26": 0.09693,
+            "27": 0.09615,
+            "28": 0.09697,
+            "29": 0.09661,
+            "30": 0.09679,
+            "31": 0.09716,
+            "32": 0.09671,
+            "33": 0.09697,
+            "34": 0.09691,
+            "35": 0.09689,
+            "36": 0.09592,
+            "37": 0.09711,
+            "38": 0.09635,
+            "39": 0.0969,
+            "40": 0.09642,
+            "41": 0.09689,
+            "42": 0.09616,
+            "43": 0.0971,
+            "44": 0.09592,
+            "45": 0.09674,
+            "46": 0.09586,
+            "47": 0.09669,
+            "48": 0.096,
+            "49": 0.09678,
+            "50": 0.09589,
+            "51": 0.09963,
+            "52": 0.09882,
+            "53": 0.09639,
+            "54": 0.09745,
+            "55": 0.09701,
+            "56": 0.0971,
+            "57": 0.0929,
+            "58": 0.09316,
+            "59": 0.09353,
+            "60": 0.0935,
+            "61": 0.0935,
+            "62": 0.09059,
+            "63": 0.09063,
+            "64": 0.09203,
+            "65": 0.09133,
+            "66": 0.09044,
+            "67": 0.09102,
+            "68": 0.08852,
+            "69": 0.08696,
+            "70": 0.08735,
+            "71": 0.08724,
+            "72": 0.08697,
+            "73": 0.08755,
+            "74": 0.08731,
+            "75": 0.08922,
+            "76": 0.08886,
+            "77": 0.09297,
+            "78": 0.0878,
+            "79": 0.08778,
+            "80": 0.08751,
+            "81": 0.08754,
+            "82": 0.08716,
+            "83": 0.08674,
+            "84": 0.0865,
+            "85": 0.08602,
+            "86": 0.08613,
+            "87": 0.08623,
+            "88": 0.08639,
+            "89": 0.08691,
+            "90": 0.08616,
+            "91": 0.08641,
+            "92": 0.08645,
+            "93": 0.08643,
+            "94": 0.08632,
+            "95": 0.0862,
+            "96": 0.08615,
+            "97": 0.08618,
+            "98": 0.0866,
+            "99": 0.0867,
+            "100": 0.0861
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": 1208.0,
+            "19": 1541.0,
+            "20": 1173.0,
+            "21": 1368.0,
+            "22": "nan",
+            "23": 1317.0,
+            "24": 1131.0,
+            "25": 1119.0,
+            "26": 1067.0,
+            "27": 1300.0,
+            "28": 1549.0,
+            "29": 1501.0,
+            "30": 1354.0,
+            "31": 1044.0,
+            "32": 1202.0,
+            "33": 1503.0,
+            "34": 1276.0,
+            "35": 1207.0,
+            "36": 1217.0,
+            "37": 1430.0,
+            "38": 1473.0,
+            "39": 1525.0,
+            "40": 1413.0,
+            "41": 1441.0,
+            "42": 1487.0,
+            "43": 1151.0,
+            "44": 1217.0,
+            "45": 1314.0,
+            "46": 1265.0,
+            "47": 1868.0,
+            "48": 1314.0,
+            "49": 1105.0,
+            "50": 1476.0,
+            "51": 1508.0,
+            "52": 1400.0,
+            "53": 1740.0,
+            "54": 1498.0,
+            "55": 1551.0,
+            "56": 1363.0,
+            "57": 1465.0,
+            "58": 1610.0,
+            "59": 1574.0,
+            "60": 1599.0,
+            "61": 1727.0,
+            "62": 1804.0,
+            "63": 1590.0,
+            "64": 1813.0,
+            "65": 1398.0,
+            "66": 1738.0,
+            "67": 1536.0,
+            "68": 1764.0,
+            "69": 1781.0,
+            "70": 1926.0,
+            "71": 1950.0,
+            "72": 1461.0,
+            "73": 1985.0,
+            "74": 1345.0,
+            "75": 1871.0,
+            "76": 1732.0,
+            "77": 2086.0,
+            "78": 2075.0,
+            "79": 1992.0,
+            "80": 2260.0,
+            "81": 2300.0,
+            "82": 2290.0,
+            "83": 1774.0,
+            "84": 2172.0,
+            "85": 2216.0,
+            "86": 2038.0,
+            "87": 2741.0,
+            "88": 2079.0,
+            "89": 2349.0,
+            "90": 2315.0,
+            "91": 1875.0,
+            "92": 2611.0,
+            "93": 2053.0,
+            "94": 2220.0,
+            "95": 2296.0,
+            "96": 2665.0,
+            "97": 2516.0,
+            "98": 2549.0,
+            "99": 2378.0,
+            "100": 2257.0
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json
index a6254c12055..8d29fc96a7f 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json
@@ -2,141 +2,536 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 10.84012,
-            "5": 10.84147,
-            "10": 10.82107,
-            "15": 10.85423,
-            "20": 10.85111,
-            "25": 10.82229,
-            "30": 10.75584,
-            "35": 10.68389,
-            "40": 10.52756,
-            "45": 10.3119,
-            "50": 10.29392,
-            "55": 10.21882,
-            "60": 9.8477,
-            "65": 9.30027,
-            "70": 9.92473,
-            "75": 9.60615,
-            "80": 9.55867,
-            "85": 9.7331,
-            "90": 9.91024,
-            "95": 9.60586,
-            "100": 9.49453
+            "1": 10.84082,
+            "2": 10.83544,
+            "3": 10.83361,
+            "4": 10.8202,
+            "5": 10.84212,
+            "6": 10.87215,
+            "7": 10.83489,
+            "8": 10.8434,
+            "9": 10.84794,
+            "10": 10.81681,
+            "11": 10.86063,
+            "12": 10.85821,
+            "13": 10.87957,
+            "14": 10.87773,
+            "15": 10.85163,
+            "16": 10.8472,
+            "17": 10.84071,
+            "18": 10.86582,
+            "19": 10.85937,
+            "20": 10.85131,
+            "21": 10.85552,
+            "22": 10.82535,
+            "23": 10.86895,
+            "24": 10.82877,
+            "25": 10.81945,
+            "26": 10.83385,
+            "27": 10.82583,
+            "28": 10.84435,
+            "29": 10.83124,
+            "30": 10.75388,
+            "31": 10.66502,
+            "32": 10.78819,
+            "33": 10.76332,
+            "34": 10.67654,
+            "35": 10.68319,
+            "36": 10.63342,
+            "37": 10.68482,
+            "38": 10.60112,
+            "39": 10.69463,
+            "40": 10.52433,
+            "41": 10.54208,
+            "42": 10.56578,
+            "43": 10.34551,
+            "44": 10.38873,
+            "45": 10.31101,
+            "46": 10.30065,
+            "47": 10.48062,
+            "48": 10.28105,
+            "49": 10.05975,
+            "50": 10.29413,
+            "51": 10.23775,
+            "52": 10.15443,
+            "53": 10.36085,
+            "54": 10.26927,
+            "55": 10.2161,
+            "56": 9.99594,
+            "57": 9.8744,
+            "58": 10.14007,
+            "59": 9.93447,
+            "60": 9.84864,
+            "61": 9.98549,
+            "62": 10.2164,
+            "63": 9.69034,
+            "64": 10.0182,
+            "65": 9.30046,
+            "66": 9.9355,
+            "67": 9.63051,
+            "68": 9.99128,
+            "69": 9.9852,
+            "70": 9.92463,
+            "71": 9.81436,
+            "72": 9.79481,
+            "73": 9.68082,
+            "74": 9.1945,
+            "75": 9.60407,
+            "76": 9.28537,
+            "77": 10.18507,
+            "78": 9.86718,
+            "79": 9.52407,
+            "80": 9.55749,
+            "81": 9.62863,
+            "82": 9.81568,
+            "83": 9.45708,
+            "84": 9.53654,
+            "85": 9.73266,
+            "86": 9.20138,
+            "87": 9.69524,
+            "88": 9.85412,
+            "89": 9.71648,
+            "90": 9.91047,
+            "91": 9.45992,
+            "92": 9.46603,
+            "93": 9.19321,
+            "94": 8.94,
+            "95": 9.60607,
+            "96": 9.62214,
+            "97": 9.39796,
+            "98": 9.76023,
+            "99": 8.99097,
+            "100": 9.49505
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 454770688.0,
+            "2": 454770688.0,
+            "3": 454770688.0,
+            "4": 454770688.0,
             "5": 454770688.0,
+            "6": 454770688.0,
+            "7": 454770688.0,
+            "8": 454770688.0,
+            "9": 454770688.0,
             "10": 454770688.0,
+            "11": 454770688.0,
+            "12": 454770688.0,
+            "13": 454770688.0,
+            "14": 454770688.0,
             "15": 454770688.0,
+            "16": 454770688.0,
+            "17": 454770688.0,
+            "18": 518880768.0,
+            "19": 518880768.0,
             "20": 518880768.0,
+            "21": 518880768.0,
+            "22": 518880768.0,
+            "23": 518880768.0,
+            "24": 518880768.0,
             "25": 518880768.0,
+            "26": 518880768.0,
+            "27": 518880768.0,
+            "28": 518880768.0,
+            "29": 518880768.0,
             "30": 518880768.0,
+            "31": 518880768.0,
+            "32": 518880768.0,
+            "33": 518880768.0,
+            "34": 518880768.0,
             "35": 518880768.0,
+            "36": 518880768.0,
+            "37": 518880768.0,
+            "38": 518880768.0,
+            "39": 518880768.0,
             "40": 518880768.0,
+            "41": 518880768.0,
+            "42": 518880768.0,
+            "43": 518880768.0,
+            "44": 518880768.0,
             "45": 518880768.0,
+            "46": 518880768.0,
+            "47": 518880768.0,
+            "48": 518880768.0,
+            "49": 518880768.0,
             "50": 518880768.0,
+            "51": 518880768.0,
+            "52": 518880768.0,
+            "53": 518880768.0,
+            "54": 518880768.0,
             "55": 518880768.0,
+            "56": 518880768.0,
+            "57": 518880768.0,
+            "58": 518880768.0,
+            "59": 518880768.0,
             "60": 518880768.0,
+            "61": 518880768.0,
+            "62": 518880768.0,
+            "63": 518880768.0,
+            "64": 518880768.0,
             "65": 518880768.0,
+            "66": 518880768.0,
+            "67": 518880768.0,
+            "68": 518880768.0,
+            "69": 518880768.0,
             "70": 518880768.0,
+            "71": 518880768.0,
+            "72": 518880768.0,
+            "73": 518880768.0,
+            "74": 518880768.0,
             "75": 518880768.0,
+            "76": 518880768.0,
+            "77": 518880768.0,
+            "78": 518880768.0,
+            "79": 518880768.0,
             "80": 518880768.0,
+            "81": 518880768.0,
+            "82": 518880768.0,
+            "83": 518880768.0,
+            "84": 518880768.0,
             "85": 518880768.0,
+            "86": 518880768.0,
+            "87": 518880768.0,
+            "88": 518880768.0,
+            "89": 518880768.0,
             "90": 518880768.0,
+            "91": 518880768.0,
+            "92": 518880768.0,
+            "93": 518880768.0,
+            "94": 518880768.0,
             "95": 518880768.0,
+            "96": 518880768.0,
+            "97": 518880768.0,
+            "98": 518880768.0,
+            "99": 518880768.0,
             "100": 518880768.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 4511150592.0,
+            "2": 4544705536.0,
+            "3": 4544705536.0,
+            "4": 4544705536.0,
             "5": 4544705536.0,
+            "6": 4544705536.0,
+            "7": 4544705536.0,
+            "8": 4544705536.0,
+            "9": 4544705536.0,
             "10": 4544705536.0,
+            "11": 4544705536.0,
+            "12": 4544705536.0,
+            "13": 4544705536.0,
+            "14": 4544705536.0,
             "15": 4544705536.0,
+            "16": 4544705536.0,
+            "17": 4544705536.0,
+            "18": 4544705536.0,
+            "19": 4607767040.0,
             "20": 4607767040.0,
+            "21": 4607767040.0,
+            "22": 4607767040.0,
+            "23": 4607767040.0,
+            "24": 4607767040.0,
             "25": 4607767040.0,
+            "26": 4607767040.0,
+            "27": 4607767040.0,
+            "28": 4607767040.0,
+            "29": 4607767040.0,
             "30": 4607767040.0,
+            "31": 4607767040.0,
+            "32": 4607767040.0,
+            "33": 4607767040.0,
+            "34": 4607767040.0,
             "35": 4607767040.0,
+            "36": 4607767040.0,
+            "37": 4607767040.0,
+            "38": 4607767040.0,
+            "39": 4607767040.0,
             "40": 4607767040.0,
+            "41": 4607767040.0,
+            "42": 4607767040.0,
+            "43": 4607767040.0,
+            "44": 4607767040.0,
             "45": 4607767040.0,
+            "46": 4607767040.0,
+            "47": 4607767040.0,
+            "48": 4607767040.0,
+            "49": 4607767040.0,
             "50": 4607767040.0,
+            "51": 4607767040.0,
+            "52": 4607767040.0,
+            "53": 4607767040.0,
+            "54": 4607767040.0,
             "55": 4607767040.0,
+            "56": 4607767040.0,
+            "57": 4607767040.0,
+            "58": 4607767040.0,
+            "59": 4607767040.0,
             "60": 4607767040.0,
+            "61": 4607767040.0,
+            "62": 4607767040.0,
+            "63": 4607767040.0,
+            "64": 4607767040.0,
             "65": 4607767040.0,
+            "66": 4607767040.0,
+            "67": 4607767040.0,
+            "68": 4607767040.0,
+            "69": 4607767040.0,
             "70": 4607767040.0,
+            "71": 4607767040.0,
+            "72": 4607767040.0,
+            "73": 4607767040.0,
+            "74": 4607767040.0,
             "75": 4607767040.0,
+            "76": 4607767040.0,
+            "77": 4607767040.0,
+            "78": 4607767040.0,
+            "79": 4607767040.0,
             "80": 4607767040.0,
+            "81": 4607767040.0,
+            "82": 4607767040.0,
+            "83": 4607767040.0,
+            "84": 4607767040.0,
             "85": 4607767040.0,
+            "86": 4607767040.0,
+            "87": 4607767040.0,
+            "88": 4607767040.0,
+            "89": 4607767040.0,
             "90": 4607767040.0,
+            "91": 4607767040.0,
+            "92": 4607767040.0,
+            "93": 4607767040.0,
+            "94": 4607767040.0,
             "95": 4607767040.0,
+            "96": 4607767040.0,
+            "97": 4607767040.0,
+            "98": 4607767040.0,
+            "99": 4607767040.0,
             "100": 4607767040.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 9.36364,
-            "5": 0.11207,
-            "10": 0.09807,
-            "15": 0.11189,
-            "20": 0.12039,
-            "25": 0.10422,
-            "30": 0.10453,
-            "35": 0.10312,
-            "40": 0.10542,
-            "45": 0.10362,
-            "50": 0.10448,
-            "55": 0.10212,
-            "60": 0.10194,
-            "65": 0.10259,
-            "70": 0.10183,
-            "75": 0.10119,
-            "80": 0.10042,
-            "85": 0.10084,
-            "90": 0.10028,
-            "95": 0.10004,
-            "100": 0.09998
+            "1": 4.2592,
+            "2": 0.13544,
+            "3": 0.09999,
+            "4": 0.08273,
+            "5": 0.08157,
+            "6": 0.08266,
+            "7": 0.08111,
+            "8": 0.08184,
+            "9": 0.08109,
+            "10": 0.08281,
+            "11": 0.08041,
+            "12": 0.08186,
+            "13": 0.08098,
+            "14": 0.08513,
+            "15": 0.0821,
+            "16": 0.08144,
+            "17": 0.08052,
+            "18": 0.13091,
+            "19": 0.08819,
+            "20": 0.08804,
+            "21": 0.08818,
+            "22": 0.08404,
+            "23": 0.08729,
+            "24": 0.08805,
+            "25": 0.08736,
+            "26": 0.08811,
+            "27": 0.08757,
+            "28": 0.08887,
+            "29": 0.08961,
+            "30": 0.0883,
+            "31": 0.08788,
+            "32": 0.08884,
+            "33": 0.08833,
+            "34": 0.08811,
+            "35": 0.08831,
+            "36": 0.08859,
+            "37": 0.08809,
+            "38": 0.08879,
+            "39": 0.08769,
+            "40": 0.0883,
+            "41": 0.08757,
+            "42": 0.08797,
+            "43": 0.08669,
+            "44": 0.08751,
+            "45": 0.08893,
+            "46": 0.08862,
+            "47": 0.08698,
+            "48": 0.089,
+            "49": 0.08841,
+            "50": 0.08813,
+            "51": 0.09282,
+            "52": 0.08991,
+            "53": 0.08846,
+            "54": 0.08878,
+            "55": 0.08875,
+            "56": 0.0897,
+            "57": 0.0888,
+            "58": 0.08814,
+            "59": 0.08821,
+            "60": 0.08782,
+            "61": 0.0888,
+            "62": 0.08762,
+            "63": 0.08743,
+            "64": 0.0879,
+            "65": 0.08877,
+            "66": 0.08656,
+            "67": 0.08681,
+            "68": 0.08654,
+            "69": 0.08705,
+            "70": 0.08667,
+            "71": 0.08696,
+            "72": 0.08664,
+            "73": 0.08625,
+            "74": 0.08667,
+            "75": 0.08656,
+            "76": 0.08557,
+            "77": 0.08578,
+            "78": 0.08586,
+            "79": 0.08584,
+            "80": 0.08576,
+            "81": 0.08653,
+            "82": 0.08572,
+            "83": 0.08613,
+            "84": 0.08557,
+            "85": 0.08616,
+            "86": 0.08608,
+            "87": 0.08563,
+            "88": 0.08581,
+            "89": 0.08591,
+            "90": 0.08593,
+            "91": 0.08543,
+            "92": 0.08641,
+            "93": 0.08635,
+            "94": 0.08549,
+            "95": 0.08554,
+            "96": 0.08487,
+            "97": 0.08505,
+            "98": 0.08522,
+            "99": 0.08533,
+            "100": 0.08544
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
             "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
             "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
             "15": "nan",
-            "20": 1143.0,
-            "25": 1185.0,
-            "30": 1375.0,
+            "16": "nan",
+            "17": "nan",
+            "18": 1208.0,
+            "19": 1541.0,
+            "20": 1173.0,
+            "21": 1368.0,
+            "22": "nan",
+            "23": 1317.0,
+            "24": 1131.0,
+            "25": 1119.0,
+            "26": 1067.0,
+            "27": 1300.0,
+            "28": 1549.0,
+            "29": 1501.0,
+            "30": 1354.0,
+            "31": 1044.0,
+            "32": 1202.0,
+            "33": 1503.0,
+            "34": 1276.0,
             "35": 1207.0,
-            "40": 1423.0,
-            "45": 1344.0,
-            "50": 1559.0,
-            "55": 1573.0,
-            "60": 1564.0,
-            "65": 1496.0,
-            "70": 1898.0,
-            "75": 1893.0,
-            "80": 2134.0,
-            "85": 2231.0,
-            "90": 2378.0,
-            "95": 2285.0,
-            "100": 2233.0
+            "36": 1217.0,
+            "37": 1430.0,
+            "38": 1473.0,
+            "39": 1525.0,
+            "40": 1413.0,
+            "41": 1441.0,
+            "42": 1487.0,
+            "43": 1151.0,
+            "44": 1217.0,
+            "45": 1314.0,
+            "46": 1265.0,
+            "47": 1868.0,
+            "48": 1314.0,
+            "49": 1105.0,
+            "50": 1476.0,
+            "51": 1508.0,
+            "52": 1400.0,
+            "53": 1740.0,
+            "54": 1498.0,
+            "55": 1551.0,
+            "56": 1363.0,
+            "57": 1465.0,
+            "58": 1610.0,
+            "59": 1574.0,
+            "60": 1599.0,
+            "61": 1727.0,
+            "62": 1804.0,
+            "63": 1590.0,
+            "64": 1813.0,
+            "65": 1398.0,
+            "66": 1738.0,
+            "67": 1536.0,
+            "68": 1764.0,
+            "69": 1781.0,
+            "70": 1926.0,
+            "71": 1950.0,
+            "72": 1461.0,
+            "73": 1985.0,
+            "74": 1345.0,
+            "75": 1871.0,
+            "76": 1732.0,
+            "77": 2086.0,
+            "78": 2075.0,
+            "79": 1992.0,
+            "80": 2260.0,
+            "81": 2300.0,
+            "82": 2290.0,
+            "83": 1774.0,
+            "84": 2172.0,
+            "85": 2216.0,
+            "86": 2038.0,
+            "87": 2741.0,
+            "88": 2079.0,
+            "89": 2349.0,
+            "90": 2315.0,
+            "91": 1875.0,
+            "92": 2611.0,
+            "93": 2053.0,
+            "94": 2220.0,
+            "95": 2296.0,
+            "96": 2665.0,
+            "97": 2516.0,
+            "98": 2549.0,
+            "99": 2378.0,
+            "100": 2257.0
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_lts.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_lts.json
new file mode 100644
index 00000000000..93d8ec41a7b
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_lts.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.83572,
+            "2": 10.8327,
+            "3": 10.83034,
+            "4": 10.80223,
+            "5": 10.85556,
+            "6": 10.86667,
+            "7": 10.84637,
+            "8": 10.84251,
+            "9": 10.85429,
+            "10": 10.80752,
+            "11": 10.88921,
+            "12": 10.87043,
+            "13": 10.87395,
+            "14": 10.88615,
+            "15": 10.79982,
+            "16": 10.81152,
+            "17": 10.78946,
+            "18": 10.82078,
+            "19": 10.80788,
+            "20": 10.71283,
+            "21": 10.68647,
+            "22": 10.54255,
+            "23": 10.71926,
+            "24": 10.58463,
+            "25": 10.54402,
+            "26": 10.61055,
+            "27": 10.61018,
+            "28": 10.57117,
+            "29": 10.59062,
+            "30": 10.35312,
+            "31": 10.0924,
+            "32": 10.46991,
+            "33": 10.45536,
+            "34": 10.19985,
+            "35": 10.26269,
+            "36": 10.21197,
+            "37": 10.33645,
+            "38": 10.17551,
+            "39": 10.39309,
+            "40": 10.07155,
+            "41": 10.13767,
+            "42": 10.19633,
+            "43": 9.81246,
+            "44": 9.93615,
+            "45": 9.81584,
+            "46": 9.80638,
+            "47": 10.12803,
+            "48": 9.82444,
+            "49": 9.50618,
+            "50": 9.89067
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1564.0,
+            "2": 1660.0,
+            "3": 1617.0,
+            "4": 1711.0,
+            "5": 1840.0,
+            "6": 1754.0,
+            "7": 1793.0,
+            "8": 1599.0,
+            "9": 1894.0,
+            "10": 1424.0,
+            "11": 1793.0,
+            "12": 1727.0,
+            "13": 1856.0,
+            "14": 1761.0,
+            "15": 1915.0,
+            "16": 1859.0,
+            "17": 1727.0,
+            "18": 1585.0,
+            "19": 1747.0,
+            "20": 1655.0,
+            "21": 1783.0,
+            "22": 1671.0,
+            "23": 2028.0,
+            "24": 1698.0,
+            "25": 1618.0,
+            "26": 1708.0,
+            "27": 1804.0,
+            "28": 1978.0,
+            "29": 1950.0,
+            "30": 1911.0,
+            "31": 1582.0,
+            "32": 1852.0,
+            "33": 2049.0,
+            "34": 1912.0,
+            "35": 1999.0,
+            "36": 1972.0,
+            "37": 2236.0,
+            "38": 2294.0,
+            "39": 2209.0,
+            "40": 2171.0,
+            "41": 2340.0,
+            "42": 2153.0,
+            "43": 2015.0,
+            "44": 2122.0,
+            "45": 2123.0,
+            "46": 2194.0,
+            "47": 2463.0,
+            "48": 2382.0,
+            "49": 2300.0,
+            "50": 2397.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 952847360.0,
+            "2": 952847360.0,
+            "3": 952847360.0,
+            "4": 952847360.0,
+            "5": 952847360.0,
+            "6": 952847360.0,
+            "7": 952847360.0,
+            "8": 952847360.0,
+            "9": 952847360.0,
+            "10": 952847360.0,
+            "11": 952847360.0,
+            "12": 952847360.0,
+            "13": 952847360.0,
+            "14": 952847360.0,
+            "15": 952847360.0,
+            "16": 952847360.0,
+            "17": 952847360.0,
+            "18": 952847360.0,
+            "19": 952847360.0,
+            "20": 952847360.0,
+            "21": 952847360.0,
+            "22": 952847360.0,
+            "23": 952847360.0,
+            "24": 952847360.0,
+            "25": 952847360.0,
+            "26": 952847360.0,
+            "27": 952847360.0,
+            "28": 952847360.0,
+            "29": 952847360.0,
+            "30": 952847360.0,
+            "31": 952847360.0,
+            "32": 952847360.0,
+            "33": 952847360.0,
+            "34": 952847360.0,
+            "35": 952847360.0,
+            "36": 952847360.0,
+            "37": 952847360.0,
+            "38": 952847360.0,
+            "39": 952847360.0,
+            "40": 952847360.0,
+            "41": 952847360.0,
+            "42": 952847360.0,
+            "43": 952847360.0,
+            "44": 952847360.0,
+            "45": 952847360.0,
+            "46": 952847360.0,
+            "47": 952847360.0,
+            "48": 952847360.0,
+            "49": 952847360.0,
+            "50": 952847360.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 3275808768.0,
+            "2": 3637371904.0,
+            "3": 3637371904.0,
+            "4": 3637371904.0,
+            "5": 3637371904.0,
+            "6": 3637371904.0,
+            "7": 3637371904.0,
+            "8": 3637371904.0,
+            "9": 3637371904.0,
+            "10": 3637371904.0,
+            "11": 3637371904.0,
+            "12": 3637371904.0,
+            "13": 3637371904.0,
+            "14": 3637371904.0,
+            "15": 3637371904.0,
+            "16": 3637371904.0,
+            "17": 3637371904.0,
+            "18": 3637371904.0,
+            "19": 3637371904.0,
+            "20": 3637371904.0,
+            "21": 3637371904.0,
+            "22": 3637371904.0,
+            "23": 3637371904.0,
+            "24": 3637371904.0,
+            "25": 3637371904.0,
+            "26": 3637371904.0,
+            "27": 3637371904.0,
+            "28": 3637371904.0,
+            "29": 3637371904.0,
+            "30": 3637371904.0,
+            "31": 3637371904.0,
+            "32": 3637371904.0,
+            "33": 3637371904.0,
+            "34": 3637371904.0,
+            "35": 3637371904.0,
+            "36": 3637371904.0,
+            "37": 3637371904.0,
+            "38": 3637371904.0,
+            "39": 3637371904.0,
+            "40": 3637371904.0,
+            "41": 3637371904.0,
+            "42": 3637371904.0,
+            "43": 3637371904.0,
+            "44": 3637371904.0,
+            "45": 3637371904.0,
+            "46": 3637371904.0,
+            "47": 3637371904.0,
+            "48": 3637371904.0,
+            "49": 3637371904.0,
+            "50": 3637371904.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 5.51421,
+            "2": 0.1601,
+            "3": 0.13443,
+            "4": 0.13403,
+            "5": 0.13289,
+            "6": 0.13124,
+            "7": 0.13139,
+            "8": 0.13164,
+            "9": 0.12936,
+            "10": 0.1315,
+            "11": 0.13031,
+            "12": 0.12986,
+            "13": 0.13068,
+            "14": 0.13035,
+            "15": 0.12992,
+            "16": 0.12988,
+            "17": 0.12998,
+            "18": 0.13022,
+            "19": 0.13175,
+            "20": 0.13299,
+            "21": 0.13082,
+            "22": 0.13147,
+            "23": 0.13039,
+            "24": 0.13108,
+            "25": 0.13113,
+            "26": 0.1301,
+            "27": 0.12969,
+            "28": 0.12939,
+            "29": 0.13157,
+            "30": 0.13294,
+            "31": 0.132,
+            "32": 0.13107,
+            "33": 0.13092,
+            "34": 0.12992,
+            "35": 0.13029,
+            "36": 0.13044,
+            "37": 0.13082,
+            "38": 0.1305,
+            "39": 0.13098,
+            "40": 0.13116,
+            "41": 0.13121,
+            "42": 0.13136,
+            "43": 0.13117,
+            "44": 0.13317,
+            "45": 0.13163,
+            "46": 0.1312,
+            "47": 0.13036,
+            "48": 0.13105,
+            "49": 0.13091,
+            "50": 0.1304
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_lts_dgx_a100.json
index 45cbf767489..bdc8c7f9895 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_lts_dgx_a100.json
@@ -2,91 +2,286 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 10.8337,
-            "5": 10.85677,
-            "10": 10.80851,
-            "15": 10.79856,
-            "20": 10.71089,
-            "25": 10.5436,
-            "30": 10.35069,
-            "35": 10.26074,
+            "1": 10.83572,
+            "2": 10.8327,
+            "3": 10.83034,
+            "4": 10.80223,
+            "5": 10.85556,
+            "6": 10.86667,
+            "7": 10.84637,
+            "8": 10.84251,
+            "9": 10.85429,
+            "10": 10.80752,
+            "11": 10.88921,
+            "12": 10.87043,
+            "13": 10.87395,
+            "14": 10.88615,
+            "15": 10.79982,
+            "16": 10.81152,
+            "17": 10.78946,
+            "18": 10.82078,
+            "19": 10.80788,
+            "20": 10.71283,
+            "21": 10.68647,
+            "22": 10.54255,
+            "23": 10.71926,
+            "24": 10.58463,
+            "25": 10.54402,
+            "26": 10.61055,
+            "27": 10.61018,
+            "28": 10.57117,
+            "29": 10.59062,
+            "30": 10.35312,
+            "31": 10.0924,
+            "32": 10.46991,
+            "33": 10.45536,
+            "34": 10.19985,
+            "35": 10.26269,
+            "36": 10.21197,
+            "37": 10.33645,
+            "38": 10.17551,
+            "39": 10.39309,
             "40": 10.07155,
-            "45": 9.81507,
-            "50": 9.88952
+            "41": 10.13767,
+            "42": 10.19633,
+            "43": 9.81246,
+            "44": 9.93615,
+            "45": 9.81584,
+            "46": 9.80638,
+            "47": 10.12803,
+            "48": 9.82444,
+            "49": 9.50618,
+            "50": 9.89067
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 1563.0,
-            "5": 1808.0,
-            "10": 1377.0,
-            "15": 1872.0,
-            "20": 1675.0,
-            "25": 1549.0,
-            "30": 2043.0,
-            "35": 2002.0,
-            "40": 2311.0,
-            "45": 2149.0,
-            "50": 2368.0
+            "1": 1564.0,
+            "2": 1660.0,
+            "3": 1617.0,
+            "4": 1711.0,
+            "5": 1840.0,
+            "6": 1754.0,
+            "7": 1793.0,
+            "8": 1599.0,
+            "9": 1894.0,
+            "10": 1424.0,
+            "11": 1793.0,
+            "12": 1727.0,
+            "13": 1856.0,
+            "14": 1761.0,
+            "15": 1915.0,
+            "16": 1859.0,
+            "17": 1727.0,
+            "18": 1585.0,
+            "19": 1747.0,
+            "20": 1655.0,
+            "21": 1783.0,
+            "22": 1671.0,
+            "23": 2028.0,
+            "24": 1698.0,
+            "25": 1618.0,
+            "26": 1708.0,
+            "27": 1804.0,
+            "28": 1978.0,
+            "29": 1950.0,
+            "30": 1911.0,
+            "31": 1582.0,
+            "32": 1852.0,
+            "33": 2049.0,
+            "34": 1912.0,
+            "35": 1999.0,
+            "36": 1972.0,
+            "37": 2236.0,
+            "38": 2294.0,
+            "39": 2209.0,
+            "40": 2171.0,
+            "41": 2340.0,
+            "42": 2153.0,
+            "43": 2015.0,
+            "44": 2122.0,
+            "45": 2123.0,
+            "46": 2194.0,
+            "47": 2463.0,
+            "48": 2382.0,
+            "49": 2300.0,
+            "50": 2397.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 948653056.0,
-            "5": 948653056.0,
-            "10": 948653056.0,
-            "15": 948653056.0,
-            "20": 948653056.0,
-            "25": 948653056.0,
-            "30": 948653056.0,
-            "35": 948653056.0,
-            "40": 948653056.0,
-            "45": 948653056.0,
-            "50": 948653056.0
+            "1": 952847360.0,
+            "2": 952847360.0,
+            "3": 952847360.0,
+            "4": 952847360.0,
+            "5": 952847360.0,
+            "6": 952847360.0,
+            "7": 952847360.0,
+            "8": 952847360.0,
+            "9": 952847360.0,
+            "10": 952847360.0,
+            "11": 952847360.0,
+            "12": 952847360.0,
+            "13": 952847360.0,
+            "14": 952847360.0,
+            "15": 952847360.0,
+            "16": 952847360.0,
+            "17": 952847360.0,
+            "18": 952847360.0,
+            "19": 952847360.0,
+            "20": 952847360.0,
+            "21": 952847360.0,
+            "22": 952847360.0,
+            "23": 952847360.0,
+            "24": 952847360.0,
+            "25": 952847360.0,
+            "26": 952847360.0,
+            "27": 952847360.0,
+            "28": 952847360.0,
+            "29": 952847360.0,
+            "30": 952847360.0,
+            "31": 952847360.0,
+            "32": 952847360.0,
+            "33": 952847360.0,
+            "34": 952847360.0,
+            "35": 952847360.0,
+            "36": 952847360.0,
+            "37": 952847360.0,
+            "38": 952847360.0,
+            "39": 952847360.0,
+            "40": 952847360.0,
+            "41": 952847360.0,
+            "42": 952847360.0,
+            "43": 952847360.0,
+            "44": 952847360.0,
+            "45": 952847360.0,
+            "46": 952847360.0,
+            "47": 952847360.0,
+            "48": 952847360.0,
+            "49": 952847360.0,
+            "50": 952847360.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 3275284480.0,
-            "5": 3632653312.0,
-            "10": 3632653312.0,
-            "15": 3632653312.0,
-            "20": 3632653312.0,
-            "25": 3632653312.0,
-            "30": 3632653312.0,
-            "35": 3632653312.0,
-            "40": 3632653312.0,
-            "45": 3632653312.0,
-            "50": 3632653312.0
+            "1": 3275808768.0,
+            "2": 3637371904.0,
+            "3": 3637371904.0,
+            "4": 3637371904.0,
+            "5": 3637371904.0,
+            "6": 3637371904.0,
+            "7": 3637371904.0,
+            "8": 3637371904.0,
+            "9": 3637371904.0,
+            "10": 3637371904.0,
+            "11": 3637371904.0,
+            "12": 3637371904.0,
+            "13": 3637371904.0,
+            "14": 3637371904.0,
+            "15": 3637371904.0,
+            "16": 3637371904.0,
+            "17": 3637371904.0,
+            "18": 3637371904.0,
+            "19": 3637371904.0,
+            "20": 3637371904.0,
+            "21": 3637371904.0,
+            "22": 3637371904.0,
+            "23": 3637371904.0,
+            "24": 3637371904.0,
+            "25": 3637371904.0,
+            "26": 3637371904.0,
+            "27": 3637371904.0,
+            "28": 3637371904.0,
+            "29": 3637371904.0,
+            "30": 3637371904.0,
+            "31": 3637371904.0,
+            "32": 3637371904.0,
+            "33": 3637371904.0,
+            "34": 3637371904.0,
+            "35": 3637371904.0,
+            "36": 3637371904.0,
+            "37": 3637371904.0,
+            "38": 3637371904.0,
+            "39": 3637371904.0,
+            "40": 3637371904.0,
+            "41": 3637371904.0,
+            "42": 3637371904.0,
+            "43": 3637371904.0,
+            "44": 3637371904.0,
+            "45": 3637371904.0,
+            "46": 3637371904.0,
+            "47": 3637371904.0,
+            "48": 3637371904.0,
+            "49": 3637371904.0,
+            "50": 3637371904.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 11.60457,
-            "5": 0.13759,
-            "10": 0.13772,
-            "15": 0.14601,
-            "20": 0.14893,
-            "25": 0.14839,
-            "30": 0.14809,
-            "35": 0.14777,
-            "40": 0.14825,
-            "45": 0.14788,
-            "50": 0.14852
+            "1": 5.86972,
+            "2": 0.17288,
+            "3": 0.13781,
+            "4": 0.13826,
+            "5": 0.13569,
+            "6": 0.13252,
+            "7": 0.1323,
+            "8": 0.13208,
+            "9": 0.13237,
+            "10": 0.13177,
+            "11": 0.13164,
+            "12": 0.135,
+            "13": 0.13389,
+            "14": 0.13431,
+            "15": 0.13376,
+            "16": 0.1342,
+            "17": 0.13348,
+            "18": 0.13307,
+            "19": 0.13389,
+            "20": 0.13476,
+            "21": 0.13346,
+            "22": 0.13333,
+            "23": 0.13336,
+            "24": 0.13304,
+            "25": 0.13373,
+            "26": 0.13283,
+            "27": 0.1331,
+            "28": 0.13314,
+            "29": 0.13299,
+            "30": 0.13362,
+            "31": 0.13392,
+            "32": 0.13417,
+            "33": 0.13406,
+            "34": 0.13351,
+            "35": 0.13357,
+            "36": 0.13345,
+            "37": 0.13422,
+            "38": 0.1339,
+            "39": 0.13419,
+            "40": 0.13437,
+            "41": 0.13425,
+            "42": 0.13364,
+            "43": 0.13389,
+            "44": 0.13482,
+            "45": 0.13461,
+            "46": 0.134,
+            "47": 0.13363,
+            "48": 0.13416,
+            "49": 0.13386,
+            "50": 0.13343
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/golden_values_lts.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/golden_values_lts.json
new file mode 100644
index 00000000000..f6017e22e9a
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/golden_values_lts.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.83568,
+            "2": 10.83266,
+            "3": 10.83151,
+            "4": 10.80343,
+            "5": 10.8567,
+            "6": 10.86778,
+            "7": 10.84836,
+            "8": 10.84624,
+            "9": 10.85924,
+            "10": 10.81478,
+            "11": 10.89821,
+            "12": 10.88433,
+            "13": 10.88963,
+            "14": 10.90075,
+            "15": 10.85098,
+            "16": 10.86603,
+            "17": 10.85455,
+            "18": 10.88507,
+            "19": 10.8773,
+            "20": 10.85865,
+            "21": 10.85654,
+            "22": 10.79685,
+            "23": 10.88724,
+            "24": 10.82649,
+            "25": 10.81343,
+            "26": 10.82705,
+            "27": 10.84612,
+            "28": 10.84227,
+            "29": 10.85329,
+            "30": 10.74969,
+            "31": 10.63041,
+            "32": 10.79004,
+            "33": 10.77234,
+            "34": 10.65722,
+            "35": 10.65857,
+            "36": 10.61583,
+            "37": 10.67536,
+            "38": 10.58101,
+            "39": 10.69083,
+            "40": 10.50359,
+            "41": 10.52777,
+            "42": 10.55371,
+            "43": 10.28636,
+            "44": 10.36369,
+            "45": 10.2738,
+            "46": 10.24567,
+            "47": 10.45103,
+            "48": 10.23707,
+            "49": 9.99555,
+            "50": 10.25588,
+            "51": 10.20129,
+            "52": 10.10855,
+            "53": 10.34609,
+            "54": 10.24857,
+            "55": 10.18782,
+            "56": 9.95521,
+            "57": 9.81221,
+            "58": 10.10875,
+            "59": 9.8863,
+            "60": 9.80901,
+            "61": 9.94824,
+            "62": 10.1999,
+            "63": 9.64431,
+            "64": 9.9951,
+            "65": 9.24475,
+            "66": 9.90917,
+            "67": 9.59735,
+            "68": 9.97285,
+            "69": 9.96332,
+            "70": 9.91039,
+            "71": 9.78596,
+            "72": 9.77263,
+            "73": 9.6618,
+            "74": 9.16289,
+            "75": 9.5812,
+            "76": 9.26137,
+            "77": 10.17615,
+            "78": 9.85644,
+            "79": 9.50644,
+            "80": 9.54102,
+            "81": 9.61313,
+            "82": 9.80669,
+            "83": 9.44696,
+            "84": 9.52782,
+            "85": 9.72633,
+            "86": 9.19099,
+            "87": 9.68736,
+            "88": 9.85216,
+            "89": 9.71335,
+            "90": 9.90316,
+            "91": 9.46064,
+            "92": 9.46059,
+            "93": 9.19418,
+            "94": 8.93434,
+            "95": 9.60258,
+            "96": 9.61852,
+            "97": 9.39594,
+            "98": 9.76012,
+            "99": 8.98668,
+            "100": 9.49405
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 684610560.0,
+            "2": 685659136.0,
+            "3": 685659136.0,
+            "4": 685659136.0,
+            "5": 685659136.0,
+            "6": 685659136.0,
+            "7": 685659136.0,
+            "8": 685659136.0,
+            "9": 685659136.0,
+            "10": 685659136.0,
+            "11": 685659136.0,
+            "12": 685659136.0,
+            "13": 685659136.0,
+            "14": 685659136.0,
+            "15": 685659136.0,
+            "16": 685659136.0,
+            "17": 1043027456.0,
+            "18": 1043027456.0,
+            "19": 1043027456.0,
+            "20": 1043027456.0,
+            "21": 1043027456.0,
+            "22": 1043027456.0,
+            "23": 1043027456.0,
+            "24": 1043027456.0,
+            "25": 1043027456.0,
+            "26": 1043027456.0,
+            "27": 1043027456.0,
+            "28": 1043027456.0,
+            "29": 1043027456.0,
+            "30": 1043027456.0,
+            "31": 1043027456.0,
+            "32": 1043027456.0,
+            "33": 1043027456.0,
+            "34": 1043027456.0,
+            "35": 1043027456.0,
+            "36": 1043027456.0,
+            "37": 1043027456.0,
+            "38": 1043027456.0,
+            "39": 1043027456.0,
+            "40": 1043027456.0,
+            "41": 1043027456.0,
+            "42": 1043027456.0,
+            "43": 1043027456.0,
+            "44": 1043027456.0,
+            "45": 1043027456.0,
+            "46": 1043027456.0,
+            "47": 1043027456.0,
+            "48": 1043027456.0,
+            "49": 1043027456.0,
+            "50": 1043027456.0,
+            "51": 1043027456.0,
+            "52": 1043027456.0,
+            "53": 1043027456.0,
+            "54": 1043027456.0,
+            "55": 1043027456.0,
+            "56": 1043027456.0,
+            "57": 1043027456.0,
+            "58": 1043027456.0,
+            "59": 1043027456.0,
+            "60": 1043027456.0,
+            "61": 1043027456.0,
+            "62": 1043027456.0,
+            "63": 1043027456.0,
+            "64": 1043027456.0,
+            "65": 1043027456.0,
+            "66": 1043027456.0,
+            "67": 1043027456.0,
+            "68": 1043027456.0,
+            "69": 1043027456.0,
+            "70": 1043027456.0,
+            "71": 1043027456.0,
+            "72": 1043027456.0,
+            "73": 1043027456.0,
+            "74": 1043027456.0,
+            "75": 1043027456.0,
+            "76": 1043027456.0,
+            "77": 1043027456.0,
+            "78": 1043027456.0,
+            "79": 1043027456.0,
+            "80": 1043027456.0,
+            "81": 1043027456.0,
+            "82": 1043027456.0,
+            "83": 1043027456.0,
+            "84": 1043027456.0,
+            "85": 1043027456.0,
+            "86": 1043027456.0,
+            "87": 1043027456.0,
+            "88": 1043027456.0,
+            "89": 1043027456.0,
+            "90": 1043027456.0,
+            "91": 1043027456.0,
+            "92": 1043027456.0,
+            "93": 1043027456.0,
+            "94": 1043027456.0,
+            "95": 1043027456.0,
+            "96": 1043027456.0,
+            "97": 1043027456.0,
+            "98": 1043027456.0,
+            "99": 1043027456.0,
+            "100": 1043027456.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 3187304960.0,
+            "2": 3187305472.0,
+            "3": 3187305472.0,
+            "4": 3187305472.0,
+            "5": 3187305472.0,
+            "6": 3187305472.0,
+            "7": 3187305472.0,
+            "8": 3187305472.0,
+            "9": 3187305472.0,
+            "10": 3187305472.0,
+            "11": 3187305472.0,
+            "12": 3187305472.0,
+            "13": 3187305472.0,
+            "14": 3187305472.0,
+            "15": 3187305472.0,
+            "16": 3187305472.0,
+            "17": 3187305472.0,
+            "18": 3544935936.0,
+            "19": 3544935936.0,
+            "20": 3544935936.0,
+            "21": 3544935936.0,
+            "22": 3544935936.0,
+            "23": 3544935936.0,
+            "24": 3544935936.0,
+            "25": 3544935936.0,
+            "26": 3544935936.0,
+            "27": 3544935936.0,
+            "28": 3544935936.0,
+            "29": 3544935936.0,
+            "30": 3544935936.0,
+            "31": 3544935936.0,
+            "32": 3544935936.0,
+            "33": 3544935936.0,
+            "34": 3544935936.0,
+            "35": 3544935936.0,
+            "36": 3544935936.0,
+            "37": 3544935936.0,
+            "38": 3544935936.0,
+            "39": 3544935936.0,
+            "40": 3544935936.0,
+            "41": 3544935936.0,
+            "42": 3544935936.0,
+            "43": 3544935936.0,
+            "44": 3544935936.0,
+            "45": 3544935936.0,
+            "46": 3544935936.0,
+            "47": 3544935936.0,
+            "48": 3544935936.0,
+            "49": 3544935936.0,
+            "50": 3544935936.0,
+            "51": 3544935936.0,
+            "52": 3544935936.0,
+            "53": 3544935936.0,
+            "54": 3544935936.0,
+            "55": 3544935936.0,
+            "56": 3544935936.0,
+            "57": 3544935936.0,
+            "58": 3544935936.0,
+            "59": 3544935936.0,
+            "60": 3544935936.0,
+            "61": 3544935936.0,
+            "62": 3544935936.0,
+            "63": 3544935936.0,
+            "64": 3544935936.0,
+            "65": 3544935936.0,
+            "66": 3544935936.0,
+            "67": 3544935936.0,
+            "68": 3544935936.0,
+            "69": 3544935936.0,
+            "70": 3544935936.0,
+            "71": 3544935936.0,
+            "72": 3544935936.0,
+            "73": 3544935936.0,
+            "74": 3544935936.0,
+            "75": 3544935936.0,
+            "76": 3544935936.0,
+            "77": 3544935936.0,
+            "78": 3544935936.0,
+            "79": 3544935936.0,
+            "80": 3544935936.0,
+            "81": 3544935936.0,
+            "82": 3544935936.0,
+            "83": 3544935936.0,
+            "84": 3544935936.0,
+            "85": 3544935936.0,
+            "86": 3544935936.0,
+            "87": 3544935936.0,
+            "88": 3544935936.0,
+            "89": 3544935936.0,
+            "90": 3544935936.0,
+            "91": 3544935936.0,
+            "92": 3544935936.0,
+            "93": 3544935936.0,
+            "94": 3544935936.0,
+            "95": 3544935936.0,
+            "96": 3544935936.0,
+            "97": 3544935936.0,
+            "98": 3544935936.0,
+            "99": 3544935936.0,
+            "100": 3544935936.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 5.71216,
+            "2": 0.14973,
+            "3": 0.11823,
+            "4": 0.11589,
+            "5": 0.115,
+            "6": 0.11544,
+            "7": 0.11494,
+            "8": 0.11629,
+            "9": 0.11558,
+            "10": 0.11559,
+            "11": 0.11587,
+            "12": 0.11557,
+            "13": 0.11565,
+            "14": 0.11485,
+            "15": 0.11461,
+            "16": 0.11592,
+            "17": 0.17863,
+            "18": 0.13323,
+            "19": 0.12474,
+            "20": 0.12384,
+            "21": 0.12332,
+            "22": 0.11528,
+            "23": 0.12299,
+            "24": 0.12347,
+            "25": 0.12391,
+            "26": 0.12403,
+            "27": 0.12332,
+            "28": 0.12373,
+            "29": 0.12271,
+            "30": 0.12236,
+            "31": 0.12244,
+            "32": 0.12402,
+            "33": 0.12212,
+            "34": 0.12202,
+            "35": 0.1228,
+            "36": 0.12235,
+            "37": 0.12236,
+            "38": 0.12224,
+            "39": 0.12198,
+            "40": 0.12302,
+            "41": 0.12287,
+            "42": 0.1224,
+            "43": 0.12222,
+            "44": 0.12187,
+            "45": 0.12269,
+            "46": 0.1224,
+            "47": 0.12251,
+            "48": 0.12187,
+            "49": 0.12239,
+            "50": 0.12176,
+            "51": 0.12901,
+            "52": 0.12249,
+            "53": 0.12142,
+            "54": 0.12162,
+            "55": 0.12146,
+            "56": 0.12102,
+            "57": 0.12365,
+            "58": 0.12178,
+            "59": 0.12229,
+            "60": 0.12129,
+            "61": 0.12124,
+            "62": 0.12162,
+            "63": 0.12218,
+            "64": 0.12145,
+            "65": 0.1222,
+            "66": 0.12127,
+            "67": 0.12238,
+            "68": 0.12143,
+            "69": 0.12118,
+            "70": 0.12095,
+            "71": 0.12155,
+            "72": 0.12227,
+            "73": 0.12265,
+            "74": 0.1222,
+            "75": 0.1232,
+            "76": 0.12175,
+            "77": 0.12226,
+            "78": 0.12166,
+            "79": 0.12163,
+            "80": 0.12235,
+            "81": 0.12164,
+            "82": 0.1221,
+            "83": 0.12172,
+            "84": 0.12179,
+            "85": 0.1221,
+            "86": 0.12134,
+            "87": 0.12203,
+            "88": 0.12166,
+            "89": 0.12224,
+            "90": 0.1218,
+            "91": 0.12135,
+            "92": 0.12216,
+            "93": 0.12267,
+            "94": 0.12146,
+            "95": 0.1222,
+            "96": 0.12287,
+            "97": 0.12296,
+            "98": 0.12234,
+            "99": 0.12184,
+            "100": 0.12221
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": 2249.0,
+            "18": 2165.0,
+            "19": 2362.0,
+            "20": 1953.0,
+            "21": 1898.0,
+            "22": "nan",
+            "23": 2371.0,
+            "24": 1984.0,
+            "25": 1818.0,
+            "26": 1980.0,
+            "27": 2078.0,
+            "28": 2467.0,
+            "29": 2395.0,
+            "30": 2298.0,
+            "31": 1682.0,
+            "32": 2236.0,
+            "33": 2192.0,
+            "34": 1800.0,
+            "35": 2083.0,
+            "36": 2139.0,
+            "37": 2498.0,
+            "38": 2218.0,
+            "39": 2642.0,
+            "40": 2287.0,
+            "41": 2344.0,
+            "42": 2304.0,
+            "43": 2098.0,
+            "44": 2107.0,
+            "45": 2243.0,
+            "46": 1960.0,
+            "47": 2729.0,
+            "48": 2418.0,
+            "49": 1910.0,
+            "50": 2426.0,
+            "51": 2335.0,
+            "52": 2407.0,
+            "53": 2888.0,
+            "54": 2477.0,
+            "55": 2440.0,
+            "56": 2286.0,
+            "57": 2340.0,
+            "58": 2652.0,
+            "59": 2321.0,
+            "60": 2493.0,
+            "61": 2812.0,
+            "62": 2711.0,
+            "63": 2367.0,
+            "64": 2802.0,
+            "65": 2411.0,
+            "66": 2869.0,
+            "67": 2577.0,
+            "68": 2859.0,
+            "69": 2524.0,
+            "70": 3119.0,
+            "71": 2926.0,
+            "72": 2251.0,
+            "73": 2929.0,
+            "74": 2110.0,
+            "75": 2884.0,
+            "76": 2992.0,
+            "77": 3380.0,
+            "78": 3484.0,
+            "79": 3533.0,
+            "80": 3549.0,
+            "81": 3616.0,
+            "82": 3347.0,
+            "83": 3124.0,
+            "84": 3276.0,
+            "85": 3721.0,
+            "86": 3207.0,
+            "87": 3941.0,
+            "88": 3250.0,
+            "89": 3863.0,
+            "90": 3452.0,
+            "91": 2630.0,
+            "92": 3431.0,
+            "93": 3123.0,
+            "94": 3671.0,
+            "95": 3340.0,
+            "96": 3874.0,
+            "97": 3519.0,
+            "98": 3727.0,
+            "99": 3447.0,
+            "100": 3338.0
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/golden_values_lts_dgx_a100.json
index 56c78e6ebf6..094be8516a7 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/golden_values_lts_dgx_a100.json
@@ -2,141 +2,536 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 10.8337,
-            "5": 10.85789,
-            "10": 10.81578,
-            "15": 10.84971,
-            "20": 10.85686,
-            "25": 10.81326,
-            "30": 10.74663,
-            "35": 10.65647,
-            "40": 10.50351,
-            "45": 10.27258,
-            "50": 10.25445,
-            "55": 10.18699,
-            "60": 9.8098,
-            "65": 9.24409,
-            "70": 9.91024,
-            "75": 9.58173,
-            "80": 9.54167,
-            "85": 9.7262,
-            "90": 9.90242,
-            "95": 9.60208,
-            "100": 9.49424
+            "1": 10.83568,
+            "2": 10.83266,
+            "3": 10.83151,
+            "4": 10.80343,
+            "5": 10.8567,
+            "6": 10.86778,
+            "7": 10.84836,
+            "8": 10.84624,
+            "9": 10.85924,
+            "10": 10.81478,
+            "11": 10.89821,
+            "12": 10.88433,
+            "13": 10.88963,
+            "14": 10.90075,
+            "15": 10.85098,
+            "16": 10.86603,
+            "17": 10.85455,
+            "18": 10.88507,
+            "19": 10.8773,
+            "20": 10.85865,
+            "21": 10.85654,
+            "22": 10.79685,
+            "23": 10.88724,
+            "24": 10.82649,
+            "25": 10.81343,
+            "26": 10.82705,
+            "27": 10.84612,
+            "28": 10.84227,
+            "29": 10.85329,
+            "30": 10.74969,
+            "31": 10.63041,
+            "32": 10.79004,
+            "33": 10.77234,
+            "34": 10.65722,
+            "35": 10.65857,
+            "36": 10.61583,
+            "37": 10.67536,
+            "38": 10.58101,
+            "39": 10.69083,
+            "40": 10.50359,
+            "41": 10.52777,
+            "42": 10.55371,
+            "43": 10.28636,
+            "44": 10.36369,
+            "45": 10.2738,
+            "46": 10.24567,
+            "47": 10.45103,
+            "48": 10.23707,
+            "49": 9.99555,
+            "50": 10.25588,
+            "51": 10.20129,
+            "52": 10.10855,
+            "53": 10.34609,
+            "54": 10.24857,
+            "55": 10.18782,
+            "56": 9.95521,
+            "57": 9.81221,
+            "58": 10.10875,
+            "59": 9.8863,
+            "60": 9.80901,
+            "61": 9.94824,
+            "62": 10.1999,
+            "63": 9.64431,
+            "64": 9.9951,
+            "65": 9.24475,
+            "66": 9.90917,
+            "67": 9.59735,
+            "68": 9.97285,
+            "69": 9.96332,
+            "70": 9.91039,
+            "71": 9.78596,
+            "72": 9.77263,
+            "73": 9.6618,
+            "74": 9.16289,
+            "75": 9.5812,
+            "76": 9.26137,
+            "77": 10.17615,
+            "78": 9.85644,
+            "79": 9.50644,
+            "80": 9.54102,
+            "81": 9.61313,
+            "82": 9.80669,
+            "83": 9.44696,
+            "84": 9.52782,
+            "85": 9.72633,
+            "86": 9.19099,
+            "87": 9.68736,
+            "88": 9.85216,
+            "89": 9.71335,
+            "90": 9.90316,
+            "91": 9.46064,
+            "92": 9.46059,
+            "93": 9.19418,
+            "94": 8.93434,
+            "95": 9.60258,
+            "96": 9.61852,
+            "97": 9.39594,
+            "98": 9.76012,
+            "99": 8.98668,
+            "100": 9.49405
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 684610560.0,
-            "5": 684610560.0,
-            "10": 684610560.0,
-            "15": 684610560.0,
+            "2": 685659136.0,
+            "3": 685659136.0,
+            "4": 685659136.0,
+            "5": 685659136.0,
+            "6": 685659136.0,
+            "7": 685659136.0,
+            "8": 685659136.0,
+            "9": 685659136.0,
+            "10": 685659136.0,
+            "11": 685659136.0,
+            "12": 685659136.0,
+            "13": 685659136.0,
+            "14": 685659136.0,
+            "15": 685659136.0,
+            "16": 685659136.0,
+            "17": 1043027456.0,
+            "18": 1043027456.0,
+            "19": 1043027456.0,
             "20": 1043027456.0,
+            "21": 1043027456.0,
+            "22": 1043027456.0,
+            "23": 1043027456.0,
+            "24": 1043027456.0,
             "25": 1043027456.0,
+            "26": 1043027456.0,
+            "27": 1043027456.0,
+            "28": 1043027456.0,
+            "29": 1043027456.0,
             "30": 1043027456.0,
+            "31": 1043027456.0,
+            "32": 1043027456.0,
+            "33": 1043027456.0,
+            "34": 1043027456.0,
             "35": 1043027456.0,
+            "36": 1043027456.0,
+            "37": 1043027456.0,
+            "38": 1043027456.0,
+            "39": 1043027456.0,
             "40": 1043027456.0,
+            "41": 1043027456.0,
+            "42": 1043027456.0,
+            "43": 1043027456.0,
+            "44": 1043027456.0,
             "45": 1043027456.0,
+            "46": 1043027456.0,
+            "47": 1043027456.0,
+            "48": 1043027456.0,
+            "49": 1043027456.0,
             "50": 1043027456.0,
+            "51": 1043027456.0,
+            "52": 1043027456.0,
+            "53": 1043027456.0,
+            "54": 1043027456.0,
             "55": 1043027456.0,
+            "56": 1043027456.0,
+            "57": 1043027456.0,
+            "58": 1043027456.0,
+            "59": 1043027456.0,
             "60": 1043027456.0,
+            "61": 1043027456.0,
+            "62": 1043027456.0,
+            "63": 1043027456.0,
+            "64": 1043027456.0,
             "65": 1043027456.0,
+            "66": 1043027456.0,
+            "67": 1043027456.0,
+            "68": 1043027456.0,
+            "69": 1043027456.0,
             "70": 1043027456.0,
+            "71": 1043027456.0,
+            "72": 1043027456.0,
+            "73": 1043027456.0,
+            "74": 1043027456.0,
             "75": 1043027456.0,
+            "76": 1043027456.0,
+            "77": 1043027456.0,
+            "78": 1043027456.0,
+            "79": 1043027456.0,
             "80": 1043027456.0,
+            "81": 1043027456.0,
+            "82": 1043027456.0,
+            "83": 1043027456.0,
+            "84": 1043027456.0,
             "85": 1043027456.0,
+            "86": 1043027456.0,
+            "87": 1043027456.0,
+            "88": 1043027456.0,
+            "89": 1043027456.0,
             "90": 1043027456.0,
+            "91": 1043027456.0,
+            "92": 1043027456.0,
+            "93": 1043027456.0,
+            "94": 1043027456.0,
             "95": 1043027456.0,
+            "96": 1043027456.0,
+            "97": 1043027456.0,
+            "98": 1043027456.0,
+            "99": 1043027456.0,
             "100": 1043027456.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 3187304960.0,
+            "2": 3187305472.0,
+            "3": 3187305472.0,
+            "4": 3187305472.0,
             "5": 3187305472.0,
+            "6": 3187305472.0,
+            "7": 3187305472.0,
+            "8": 3187305472.0,
+            "9": 3187305472.0,
             "10": 3187305472.0,
+            "11": 3187305472.0,
+            "12": 3187305472.0,
+            "13": 3187305472.0,
+            "14": 3187305472.0,
             "15": 3187305472.0,
-            "20": 3547033088.0,
-            "25": 3547033088.0,
-            "30": 3547033088.0,
-            "35": 3547033088.0,
-            "40": 3547033088.0,
-            "45": 3547033088.0,
-            "50": 3547033088.0,
-            "55": 3547033088.0,
-            "60": 3547033088.0,
-            "65": 3547033088.0,
-            "70": 3547033088.0,
-            "75": 3547033088.0,
-            "80": 3547033088.0,
-            "85": 3547033088.0,
-            "90": 3547033088.0,
-            "95": 3547033088.0,
-            "100": 3547033088.0
+            "16": 3187305472.0,
+            "17": 3187305472.0,
+            "18": 3544935936.0,
+            "19": 3544935936.0,
+            "20": 3544935936.0,
+            "21": 3544935936.0,
+            "22": 3544935936.0,
+            "23": 3544935936.0,
+            "24": 3544935936.0,
+            "25": 3544935936.0,
+            "26": 3544935936.0,
+            "27": 3544935936.0,
+            "28": 3544935936.0,
+            "29": 3544935936.0,
+            "30": 3544935936.0,
+            "31": 3544935936.0,
+            "32": 3544935936.0,
+            "33": 3544935936.0,
+            "34": 3544935936.0,
+            "35": 3544935936.0,
+            "36": 3544935936.0,
+            "37": 3544935936.0,
+            "38": 3544935936.0,
+            "39": 3544935936.0,
+            "40": 3544935936.0,
+            "41": 3544935936.0,
+            "42": 3544935936.0,
+            "43": 3544935936.0,
+            "44": 3544935936.0,
+            "45": 3544935936.0,
+            "46": 3544935936.0,
+            "47": 3544935936.0,
+            "48": 3544935936.0,
+            "49": 3544935936.0,
+            "50": 3544935936.0,
+            "51": 3544935936.0,
+            "52": 3544935936.0,
+            "53": 3544935936.0,
+            "54": 3544935936.0,
+            "55": 3544935936.0,
+            "56": 3544935936.0,
+            "57": 3544935936.0,
+            "58": 3544935936.0,
+            "59": 3544935936.0,
+            "60": 3544935936.0,
+            "61": 3544935936.0,
+            "62": 3544935936.0,
+            "63": 3544935936.0,
+            "64": 3544935936.0,
+            "65": 3544935936.0,
+            "66": 3544935936.0,
+            "67": 3544935936.0,
+            "68": 3544935936.0,
+            "69": 3544935936.0,
+            "70": 3544935936.0,
+            "71": 3544935936.0,
+            "72": 3544935936.0,
+            "73": 3544935936.0,
+            "74": 3544935936.0,
+            "75": 3544935936.0,
+            "76": 3544935936.0,
+            "77": 3544935936.0,
+            "78": 3544935936.0,
+            "79": 3544935936.0,
+            "80": 3544935936.0,
+            "81": 3544935936.0,
+            "82": 3544935936.0,
+            "83": 3544935936.0,
+            "84": 3544935936.0,
+            "85": 3544935936.0,
+            "86": 3544935936.0,
+            "87": 3544935936.0,
+            "88": 3544935936.0,
+            "89": 3544935936.0,
+            "90": 3544935936.0,
+            "91": 3544935936.0,
+            "92": 3544935936.0,
+            "93": 3544935936.0,
+            "94": 3544935936.0,
+            "95": 3544935936.0,
+            "96": 3544935936.0,
+            "97": 3544935936.0,
+            "98": 3544935936.0,
+            "99": 3544935936.0,
+            "100": 3544935936.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 10.71226,
-            "5": 0.14227,
-            "10": 0.14747,
-            "15": 0.14757,
-            "20": 0.1588,
-            "25": 0.15692,
-            "30": 0.15748,
-            "35": 0.15684,
-            "40": 0.15704,
-            "45": 0.15798,
-            "50": 0.15606,
-            "55": 0.15074,
-            "60": 0.15245,
-            "65": 0.15193,
-            "70": 0.15162,
-            "75": 0.14856,
-            "80": 0.14952,
-            "85": 0.14876,
-            "90": 0.14933,
-            "95": 0.15012,
-            "100": 0.15591
+            "1": 7.5468,
+            "2": 0.1514,
+            "3": 0.11679,
+            "4": 0.11442,
+            "5": 0.11418,
+            "6": 0.1134,
+            "7": 0.11341,
+            "8": 0.11355,
+            "9": 0.11332,
+            "10": 0.11336,
+            "11": 0.11414,
+            "12": 0.11322,
+            "13": 0.11309,
+            "14": 0.11355,
+            "15": 0.11296,
+            "16": 0.11311,
+            "17": 0.19183,
+            "18": 0.13278,
+            "19": 0.12368,
+            "20": 0.1244,
+            "21": 0.12354,
+            "22": 0.11533,
+            "23": 0.12281,
+            "24": 0.12403,
+            "25": 0.12406,
+            "26": 0.12339,
+            "27": 0.12448,
+            "28": 0.12265,
+            "29": 0.1229,
+            "30": 0.1231,
+            "31": 0.12325,
+            "32": 0.12261,
+            "33": 0.12283,
+            "34": 0.12275,
+            "35": 0.12311,
+            "36": 0.12273,
+            "37": 0.12367,
+            "38": 0.12288,
+            "39": 0.12297,
+            "40": 0.12264,
+            "41": 0.1206,
+            "42": 0.12099,
+            "43": 0.12152,
+            "44": 0.12016,
+            "45": 0.12042,
+            "46": 0.12101,
+            "47": 0.12019,
+            "48": 0.12057,
+            "49": 0.12054,
+            "50": 0.12043,
+            "51": 0.12804,
+            "52": 0.12188,
+            "53": 0.12082,
+            "54": 0.12046,
+            "55": 0.12243,
+            "56": 0.12099,
+            "57": 0.12158,
+            "58": 0.12118,
+            "59": 0.12094,
+            "60": 0.12085,
+            "61": 0.12158,
+            "62": 0.12129,
+            "63": 0.12239,
+            "64": 0.12127,
+            "65": 0.12091,
+            "66": 0.12161,
+            "67": 0.12115,
+            "68": 0.12107,
+            "69": 0.12194,
+            "70": 0.12208,
+            "71": 0.12158,
+            "72": 0.12253,
+            "73": 0.12311,
+            "74": 0.12157,
+            "75": 0.12129,
+            "76": 0.12243,
+            "77": 0.1209,
+            "78": 0.12118,
+            "79": 0.12236,
+            "80": 0.12456,
+            "81": 0.12169,
+            "82": 0.12201,
+            "83": 0.12239,
+            "84": 0.12311,
+            "85": 0.12253,
+            "86": 0.12237,
+            "87": 0.12156,
+            "88": 0.12306,
+            "89": 0.12961,
+            "90": 0.12349,
+            "91": 0.12189,
+            "92": 0.12121,
+            "93": 0.12178,
+            "94": 0.12615,
+            "95": 0.12189,
+            "96": 0.12145,
+            "97": 0.12112,
+            "98": 0.12242,
+            "99": 0.12142,
+            "100": 0.12094
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
             "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
             "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
             "15": "nan",
-            "20": 1969.0,
-            "25": 1863.0,
-            "30": 2312.0,
-            "35": 2063.0,
-            "40": 2212.0,
-            "45": 2333.0,
-            "50": 2369.0,
-            "55": 2357.0,
-            "60": 2500.0,
-            "65": 2438.0,
-            "70": 3075.0,
-            "75": 2803.0,
-            "80": 3568.0,
-            "85": 3675.0,
-            "90": 3397.0,
-            "95": 3468.0,
-            "100": 3267.0
+            "16": "nan",
+            "17": 2249.0,
+            "18": 2165.0,
+            "19": 2362.0,
+            "20": 1953.0,
+            "21": 1898.0,
+            "22": "nan",
+            "23": 2371.0,
+            "24": 1984.0,
+            "25": 1818.0,
+            "26": 1980.0,
+            "27": 2078.0,
+            "28": 2467.0,
+            "29": 2395.0,
+            "30": 2298.0,
+            "31": 1682.0,
+            "32": 2236.0,
+            "33": 2192.0,
+            "34": 1800.0,
+            "35": 2083.0,
+            "36": 2139.0,
+            "37": 2498.0,
+            "38": 2218.0,
+            "39": 2642.0,
+            "40": 2287.0,
+            "41": 2344.0,
+            "42": 2304.0,
+            "43": 2098.0,
+            "44": 2107.0,
+            "45": 2243.0,
+            "46": 1960.0,
+            "47": 2729.0,
+            "48": 2418.0,
+            "49": 1910.0,
+            "50": 2426.0,
+            "51": 2335.0,
+            "52": 2407.0,
+            "53": 2888.0,
+            "54": 2477.0,
+            "55": 2440.0,
+            "56": 2286.0,
+            "57": 2340.0,
+            "58": 2652.0,
+            "59": 2321.0,
+            "60": 2493.0,
+            "61": 2812.0,
+            "62": 2711.0,
+            "63": 2367.0,
+            "64": 2802.0,
+            "65": 2411.0,
+            "66": 2869.0,
+            "67": 2577.0,
+            "68": 2859.0,
+            "69": 2524.0,
+            "70": 3119.0,
+            "71": 2926.0,
+            "72": 2251.0,
+            "73": 2929.0,
+            "74": 2110.0,
+            "75": 2884.0,
+            "76": 2992.0,
+            "77": 3380.0,
+            "78": 3484.0,
+            "79": 3533.0,
+            "80": 3549.0,
+            "81": 3616.0,
+            "82": 3347.0,
+            "83": 3124.0,
+            "84": 3276.0,
+            "85": 3721.0,
+            "86": 3207.0,
+            "87": 3941.0,
+            "88": 3250.0,
+            "89": 3863.0,
+            "90": 3452.0,
+            "91": 2630.0,
+            "92": 3431.0,
+            "93": 3123.0,
+            "94": 3671.0,
+            "95": 3340.0,
+            "96": 3874.0,
+            "97": 3519.0,
+            "98": 3727.0,
+            "99": 3447.0,
+            "100": 3338.0
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_lts.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_lts.json
new file mode 100644
index 00000000000..ed50a469580
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_lts.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.79432,
+            "2": 10.80634,
+            "3": 10.80586,
+            "4": 10.77237,
+            "5": 10.84693,
+            "6": 10.86605,
+            "7": 10.82692,
+            "8": 10.81129,
+            "9": 10.8342,
+            "10": 10.76876,
+            "11": 10.89167,
+            "12": 10.84477,
+            "13": 10.85758,
+            "14": 10.88197,
+            "15": 10.78831,
+            "16": 10.7793,
+            "17": 10.75446,
+            "18": 10.79296,
+            "19": 10.79494,
+            "20": 10.67877,
+            "21": 10.65858,
+            "22": 10.50081,
+            "23": 10.71065,
+            "24": 10.55089,
+            "25": 10.50321,
+            "26": 10.58033,
+            "27": 10.58262,
+            "28": 10.55556,
+            "29": 10.56003,
+            "30": 10.32992,
+            "31": 10.08344,
+            "32": 10.44434,
+            "33": 10.44238,
+            "34": 10.19765,
+            "35": 10.25197,
+            "36": 10.19117,
+            "37": 10.31772,
+            "38": 10.1631,
+            "39": 10.37486,
+            "40": 10.05284,
+            "41": 10.1344,
+            "42": 10.18877,
+            "43": 9.80641,
+            "44": 9.92687,
+            "45": 9.80332,
+            "46": 9.811,
+            "47": 10.12605,
+            "48": 9.82455,
+            "49": 9.50975,
+            "50": 9.88831
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1620.0,
+            "2": 1664.0,
+            "3": 1748.0,
+            "4": 1757.0,
+            "5": 1754.0,
+            "6": 1841.0,
+            "7": 1877.0,
+            "8": 1697.0,
+            "9": 1886.0,
+            "10": 1404.0,
+            "11": 1889.0,
+            "12": 1689.0,
+            "13": 1918.0,
+            "14": 1757.0,
+            "15": 1913.0,
+            "16": 1860.0,
+            "17": 1797.0,
+            "18": 1655.0,
+            "19": 1784.0,
+            "20": 1616.0,
+            "21": 1887.0,
+            "22": 1751.0,
+            "23": 2100.0,
+            "24": 1717.0,
+            "25": 1696.0,
+            "26": 1723.0,
+            "27": 1819.0,
+            "28": 1980.0,
+            "29": 1962.0,
+            "30": 2046.0,
+            "31": 1562.0,
+            "32": 1935.0,
+            "33": 2182.0,
+            "34": 1919.0,
+            "35": 1994.0,
+            "36": 1947.0,
+            "37": 2436.0,
+            "38": 2218.0,
+            "39": 2319.0,
+            "40": 2278.0,
+            "41": 2348.0,
+            "42": 2258.0,
+            "43": 1967.0,
+            "44": 2011.0,
+            "45": 2215.0,
+            "46": 2291.0,
+            "47": 2519.0,
+            "48": 2517.0,
+            "49": 2334.0,
+            "50": 2325.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 779997184.0,
+            "2": 779997184.0,
+            "3": 779997184.0,
+            "4": 779997184.0,
+            "5": 779997184.0,
+            "6": 779997184.0,
+            "7": 779997184.0,
+            "8": 779997184.0,
+            "9": 779997184.0,
+            "10": 779997184.0,
+            "11": 779997184.0,
+            "12": 779997184.0,
+            "13": 779997184.0,
+            "14": 779997184.0,
+            "15": 779997184.0,
+            "16": 779997184.0,
+            "17": 779997184.0,
+            "18": 779997184.0,
+            "19": 779997184.0,
+            "20": 779997184.0,
+            "21": 779997184.0,
+            "22": 779997184.0,
+            "23": 779997184.0,
+            "24": 779997184.0,
+            "25": 779997184.0,
+            "26": 779997184.0,
+            "27": 779997184.0,
+            "28": 779997184.0,
+            "29": 779997184.0,
+            "30": 779997184.0,
+            "31": 779997184.0,
+            "32": 779997184.0,
+            "33": 779997184.0,
+            "34": 779997184.0,
+            "35": 779997184.0,
+            "36": 779997184.0,
+            "37": 779997184.0,
+            "38": 779997184.0,
+            "39": 779997184.0,
+            "40": 779997184.0,
+            "41": 779997184.0,
+            "42": 779997184.0,
+            "43": 779997184.0,
+            "44": 779997184.0,
+            "45": 779997184.0,
+            "46": 779997184.0,
+            "47": 779997184.0,
+            "48": 779997184.0,
+            "49": 779997184.0,
+            "50": 779997184.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 2463815168.0,
+            "2": 2746575872.0,
+            "3": 2746575872.0,
+            "4": 2746575872.0,
+            "5": 2746575872.0,
+            "6": 2746575872.0,
+            "7": 2746575872.0,
+            "8": 2746575872.0,
+            "9": 2746575872.0,
+            "10": 2746575872.0,
+            "11": 2746575872.0,
+            "12": 2746575872.0,
+            "13": 2746575872.0,
+            "14": 2746575872.0,
+            "15": 2746575872.0,
+            "16": 2746575872.0,
+            "17": 2746575872.0,
+            "18": 2746575872.0,
+            "19": 2746575872.0,
+            "20": 2746575872.0,
+            "21": 2746575872.0,
+            "22": 2746575872.0,
+            "23": 2746575872.0,
+            "24": 2746575872.0,
+            "25": 2746575872.0,
+            "26": 2746575872.0,
+            "27": 2746575872.0,
+            "28": 2746575872.0,
+            "29": 2746575872.0,
+            "30": 2746575872.0,
+            "31": 2746575872.0,
+            "32": 2746575872.0,
+            "33": 2746575872.0,
+            "34": 2746575872.0,
+            "35": 2746575872.0,
+            "36": 2746575872.0,
+            "37": 2746575872.0,
+            "38": 2746575872.0,
+            "39": 2746575872.0,
+            "40": 2746575872.0,
+            "41": 2746575872.0,
+            "42": 2746575872.0,
+            "43": 2746575872.0,
+            "44": 2746575872.0,
+            "45": 2746575872.0,
+            "46": 2746575872.0,
+            "47": 2746575872.0,
+            "48": 2746575872.0,
+            "49": 2746575872.0,
+            "50": 2746575872.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 8.01512,
+            "2": 0.18918,
+            "3": 0.2837,
+            "4": 0.16138,
+            "5": 0.1616,
+            "6": 0.16078,
+            "7": 0.16042,
+            "8": 0.15997,
+            "9": 0.16098,
+            "10": 0.16102,
+            "11": 0.16098,
+            "12": 0.16041,
+            "13": 0.16091,
+            "14": 0.16108,
+            "15": 0.16115,
+            "16": 0.16297,
+            "17": 0.16105,
+            "18": 0.16124,
+            "19": 0.16111,
+            "20": 0.16078,
+            "21": 0.16059,
+            "22": 0.16143,
+            "23": 0.1618,
+            "24": 0.16075,
+            "25": 0.1607,
+            "26": 0.16058,
+            "27": 0.16134,
+            "28": 0.16032,
+            "29": 0.16066,
+            "30": 0.1619,
+            "31": 0.16069,
+            "32": 0.16055,
+            "33": 0.16041,
+            "34": 0.16109,
+            "35": 0.1616,
+            "36": 0.16,
+            "37": 0.16147,
+            "38": 0.16094,
+            "39": 0.16098,
+            "40": 0.16007,
+            "41": 0.16179,
+            "42": 0.1607,
+            "43": 0.16132,
+            "44": 0.16157,
+            "45": 0.16066,
+            "46": 0.16082,
+            "47": 0.16056,
+            "48": 0.16095,
+            "49": 0.16092,
+            "50": 0.16103
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_lts_dgx_a100.json
index e7e40a40328..06342d2a540 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_lts_dgx_a100.json
@@ -2,91 +2,286 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 10.79203,
-            "5": 10.84593,
-            "10": 10.76951,
-            "15": 10.78971,
-            "20": 10.67886,
-            "25": 10.50431,
-            "30": 10.33089,
-            "35": 10.25278,
-            "40": 10.05241,
-            "45": 9.80259,
-            "50": 9.88804
+            "1": 10.79432,
+            "2": 10.80634,
+            "3": 10.80586,
+            "4": 10.77237,
+            "5": 10.84693,
+            "6": 10.86605,
+            "7": 10.82692,
+            "8": 10.81129,
+            "9": 10.8342,
+            "10": 10.76876,
+            "11": 10.89167,
+            "12": 10.84477,
+            "13": 10.85758,
+            "14": 10.88197,
+            "15": 10.78831,
+            "16": 10.7793,
+            "17": 10.75446,
+            "18": 10.79296,
+            "19": 10.79494,
+            "20": 10.67877,
+            "21": 10.65858,
+            "22": 10.50081,
+            "23": 10.71065,
+            "24": 10.55089,
+            "25": 10.50321,
+            "26": 10.58033,
+            "27": 10.58262,
+            "28": 10.55556,
+            "29": 10.56003,
+            "30": 10.32992,
+            "31": 10.08344,
+            "32": 10.44434,
+            "33": 10.44238,
+            "34": 10.19765,
+            "35": 10.25197,
+            "36": 10.19117,
+            "37": 10.31772,
+            "38": 10.1631,
+            "39": 10.37486,
+            "40": 10.05284,
+            "41": 10.1344,
+            "42": 10.18877,
+            "43": 9.80641,
+            "44": 9.92687,
+            "45": 9.80332,
+            "46": 9.811,
+            "47": 10.12605,
+            "48": 9.82455,
+            "49": 9.50975,
+            "50": 9.88831
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 1649.0,
-            "5": 1890.0,
-            "10": 1397.0,
-            "15": 1928.0,
-            "20": 1565.0,
-            "25": 1699.0,
-            "30": 1943.0,
-            "35": 2018.0,
-            "40": 2191.0,
-            "45": 2207.0,
-            "50": 2362.0
+            "1": 1620.0,
+            "2": 1664.0,
+            "3": 1748.0,
+            "4": 1757.0,
+            "5": 1754.0,
+            "6": 1841.0,
+            "7": 1877.0,
+            "8": 1697.0,
+            "9": 1886.0,
+            "10": 1404.0,
+            "11": 1889.0,
+            "12": 1689.0,
+            "13": 1918.0,
+            "14": 1757.0,
+            "15": 1913.0,
+            "16": 1860.0,
+            "17": 1797.0,
+            "18": 1655.0,
+            "19": 1784.0,
+            "20": 1616.0,
+            "21": 1887.0,
+            "22": 1751.0,
+            "23": 2100.0,
+            "24": 1717.0,
+            "25": 1696.0,
+            "26": 1723.0,
+            "27": 1819.0,
+            "28": 1980.0,
+            "29": 1962.0,
+            "30": 2046.0,
+            "31": 1562.0,
+            "32": 1935.0,
+            "33": 2182.0,
+            "34": 1919.0,
+            "35": 1994.0,
+            "36": 1947.0,
+            "37": 2436.0,
+            "38": 2218.0,
+            "39": 2319.0,
+            "40": 2278.0,
+            "41": 2348.0,
+            "42": 2258.0,
+            "43": 1967.0,
+            "44": 2011.0,
+            "45": 2215.0,
+            "46": 2291.0,
+            "47": 2519.0,
+            "48": 2517.0,
+            "49": 2334.0,
+            "50": 2325.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 782094336.0,
-            "5": 782094336.0,
-            "10": 782094336.0,
-            "15": 782094336.0,
-            "20": 782094336.0,
-            "25": 782094336.0,
-            "30": 782094336.0,
-            "35": 782094336.0,
-            "40": 782094336.0,
-            "45": 782094336.0,
-            "50": 782094336.0
+            "1": 777900032.0,
+            "2": 777900032.0,
+            "3": 777900032.0,
+            "4": 777900032.0,
+            "5": 777900032.0,
+            "6": 777900032.0,
+            "7": 777900032.0,
+            "8": 777900032.0,
+            "9": 777900032.0,
+            "10": 777900032.0,
+            "11": 777900032.0,
+            "12": 777900032.0,
+            "13": 777900032.0,
+            "14": 777900032.0,
+            "15": 777900032.0,
+            "16": 777900032.0,
+            "17": 777900032.0,
+            "18": 777900032.0,
+            "19": 777900032.0,
+            "20": 777900032.0,
+            "21": 777900032.0,
+            "22": 777900032.0,
+            "23": 777900032.0,
+            "24": 777900032.0,
+            "25": 777900032.0,
+            "26": 777900032.0,
+            "27": 777900032.0,
+            "28": 777900032.0,
+            "29": 777900032.0,
+            "30": 777900032.0,
+            "31": 777900032.0,
+            "32": 777900032.0,
+            "33": 777900032.0,
+            "34": 777900032.0,
+            "35": 777900032.0,
+            "36": 777900032.0,
+            "37": 777900032.0,
+            "38": 777900032.0,
+            "39": 777900032.0,
+            "40": 777900032.0,
+            "41": 777900032.0,
+            "42": 777900032.0,
+            "43": 777900032.0,
+            "44": 777900032.0,
+            "45": 777900032.0,
+            "46": 777900032.0,
+            "47": 777900032.0,
+            "48": 777900032.0,
+            "49": 777900032.0,
+            "50": 777900032.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 2462767104.0,
-            "5": 2748673024.0,
-            "10": 2748673024.0,
-            "15": 2748673024.0,
-            "20": 2748673024.0,
-            "25": 2748673024.0,
-            "30": 2748673024.0,
-            "35": 2748673024.0,
-            "40": 2748673024.0,
-            "45": 2748673024.0,
-            "50": 2748673024.0
+            "2": 2744478720.0,
+            "3": 2744478720.0,
+            "4": 2744478720.0,
+            "5": 2744478720.0,
+            "6": 2744478720.0,
+            "7": 2744478720.0,
+            "8": 2744478720.0,
+            "9": 2744478720.0,
+            "10": 2744478720.0,
+            "11": 2744478720.0,
+            "12": 2744478720.0,
+            "13": 2744478720.0,
+            "14": 2744478720.0,
+            "15": 2744478720.0,
+            "16": 2744478720.0,
+            "17": 2744478720.0,
+            "18": 2744478720.0,
+            "19": 2744478720.0,
+            "20": 2744478720.0,
+            "21": 2744478720.0,
+            "22": 2744478720.0,
+            "23": 2744478720.0,
+            "24": 2744478720.0,
+            "25": 2744478720.0,
+            "26": 2744478720.0,
+            "27": 2744478720.0,
+            "28": 2744478720.0,
+            "29": 2744478720.0,
+            "30": 2744478720.0,
+            "31": 2744478720.0,
+            "32": 2744478720.0,
+            "33": 2744478720.0,
+            "34": 2744478720.0,
+            "35": 2744478720.0,
+            "36": 2744478720.0,
+            "37": 2744478720.0,
+            "38": 2744478720.0,
+            "39": 2744478720.0,
+            "40": 2744478720.0,
+            "41": 2744478720.0,
+            "42": 2744478720.0,
+            "43": 2744478720.0,
+            "44": 2744478720.0,
+            "45": 2744478720.0,
+            "46": 2744478720.0,
+            "47": 2744478720.0,
+            "48": 2744478720.0,
+            "49": 2744478720.0,
+            "50": 2744478720.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 16.61466,
-            "5": 0.16864,
-            "10": 0.16989,
-            "15": 0.1682,
-            "20": 0.16954,
-            "25": 0.16878,
-            "30": 0.2008,
-            "35": 0.18359,
-            "40": 0.16981,
-            "45": 0.17863,
-            "50": 0.18407
+            "1": 9.69348,
+            "2": 0.20058,
+            "3": 0.16793,
+            "4": 0.16851,
+            "5": 0.16769,
+            "6": 0.16776,
+            "7": 0.1679,
+            "8": 0.1698,
+            "9": 0.16773,
+            "10": 0.16689,
+            "11": 0.16616,
+            "12": 0.16649,
+            "13": 0.16602,
+            "14": 0.16651,
+            "15": 0.16681,
+            "16": 0.16794,
+            "17": 0.17068,
+            "18": 0.16616,
+            "19": 0.16604,
+            "20": 0.16664,
+            "21": 0.16675,
+            "22": 0.16587,
+            "23": 0.16669,
+            "24": 0.16593,
+            "25": 0.16666,
+            "26": 0.16624,
+            "27": 0.16546,
+            "28": 0.16503,
+            "29": 0.16469,
+            "30": 0.1651,
+            "31": 0.16508,
+            "32": 0.16533,
+            "33": 0.16475,
+            "34": 0.16518,
+            "35": 0.16543,
+            "36": 0.16422,
+            "37": 0.1648,
+            "38": 0.16453,
+            "39": 0.16423,
+            "40": 0.16482,
+            "41": 0.16457,
+            "42": 0.1653,
+            "43": 0.16536,
+            "44": 0.16541,
+            "45": 0.16481,
+            "46": 0.16481,
+            "47": 0.16542,
+            "48": 0.16607,
+            "49": 0.1639,
+            "50": 0.1641
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/golden_values_lts.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/golden_values_lts.json
new file mode 100644
index 00000000000..44dd2d42336
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/golden_values_lts.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.79449,
+            "2": 10.80656,
+            "3": 10.80727,
+            "4": 10.77389,
+            "5": 10.84829,
+            "6": 10.86736,
+            "7": 10.82922,
+            "8": 10.81537,
+            "9": 10.83956,
+            "10": 10.77652,
+            "11": 10.90107,
+            "12": 10.85927,
+            "13": 10.87396,
+            "14": 10.89723,
+            "15": 10.83961,
+            "16": 10.83508,
+            "17": 10.82101,
+            "18": 10.86029,
+            "19": 10.86558,
+            "20": 10.82896,
+            "21": 10.83275,
+            "22": 10.75286,
+            "23": 10.88062,
+            "24": 10.78219,
+            "25": 10.76607,
+            "26": 10.79522,
+            "27": 10.79866,
+            "28": 10.81697,
+            "29": 10.82169,
+            "30": 10.69891,
+            "31": 10.55698,
+            "32": 10.75759,
+            "33": 10.74362,
+            "34": 10.59976,
+            "35": 10.61772,
+            "36": 10.56389,
+            "37": 10.63614,
+            "38": 10.53029,
+            "39": 10.65358,
+            "40": 10.44072,
+            "41": 10.49636,
+            "42": 10.50954,
+            "43": 10.22362,
+            "44": 10.30902,
+            "45": 10.21065,
+            "46": 10.19943,
+            "47": 10.41641,
+            "48": 10.18128,
+            "49": 9.94311,
+            "50": 10.21224,
+            "51": 10.16759,
+            "52": 10.06895,
+            "53": 10.30707,
+            "54": 10.20911,
+            "55": 10.15688,
+            "56": 9.91474,
+            "57": 9.77696,
+            "58": 10.07417,
+            "59": 9.86333,
+            "60": 9.77328,
+            "61": 9.9292,
+            "62": 10.17156,
+            "63": 9.62041,
+            "64": 9.97113,
+            "65": 9.21979,
+            "66": 9.88693,
+            "67": 9.58363,
+            "68": 9.94922,
+            "69": 9.95271,
+            "70": 9.89312,
+            "71": 9.77658,
+            "72": 9.75435,
+            "73": 9.6497,
+            "74": 9.1439,
+            "75": 9.56121,
+            "76": 9.25111,
+            "77": 10.17063,
+            "78": 9.85402,
+            "79": 9.49965,
+            "80": 9.53086,
+            "81": 9.60555,
+            "82": 9.80179,
+            "83": 9.43744,
+            "84": 9.51987,
+            "85": 9.7196,
+            "86": 9.18595,
+            "87": 9.68687,
+            "88": 9.8443,
+            "89": 9.70586,
+            "90": 9.89977,
+            "91": 9.45029,
+            "92": 9.45356,
+            "93": 9.18554,
+            "94": 8.92968,
+            "95": 9.59767,
+            "96": 9.61491,
+            "97": 9.39084,
+            "98": 9.75667,
+            "99": 8.97921,
+            "100": 9.49001
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 570640384.0,
+            "2": 570640384.0,
+            "3": 570640384.0,
+            "4": 570640384.0,
+            "5": 570640384.0,
+            "6": 570640384.0,
+            "7": 570640384.0,
+            "8": 570640384.0,
+            "9": 570640384.0,
+            "10": 570640384.0,
+            "11": 570640384.0,
+            "12": 570640384.0,
+            "13": 570640384.0,
+            "14": 570640384.0,
+            "15": 570640384.0,
+            "16": 570640384.0,
+            "17": 852351488.0,
+            "18": 852351488.0,
+            "19": 852351488.0,
+            "20": 852351488.0,
+            "21": 852351488.0,
+            "22": 852351488.0,
+            "23": 852351488.0,
+            "24": 852351488.0,
+            "25": 852351488.0,
+            "26": 852351488.0,
+            "27": 852351488.0,
+            "28": 852351488.0,
+            "29": 852351488.0,
+            "30": 852351488.0,
+            "31": 852351488.0,
+            "32": 852351488.0,
+            "33": 852351488.0,
+            "34": 852351488.0,
+            "35": 852351488.0,
+            "36": 852351488.0,
+            "37": 852351488.0,
+            "38": 852351488.0,
+            "39": 852351488.0,
+            "40": 852351488.0,
+            "41": 852351488.0,
+            "42": 852351488.0,
+            "43": 852351488.0,
+            "44": 852351488.0,
+            "45": 852351488.0,
+            "46": 852351488.0,
+            "47": 852351488.0,
+            "48": 852351488.0,
+            "49": 852351488.0,
+            "50": 852351488.0,
+            "51": 852351488.0,
+            "52": 852351488.0,
+            "53": 852351488.0,
+            "54": 852351488.0,
+            "55": 852351488.0,
+            "56": 852351488.0,
+            "57": 852351488.0,
+            "58": 852351488.0,
+            "59": 852351488.0,
+            "60": 852351488.0,
+            "61": 852351488.0,
+            "62": 852351488.0,
+            "63": 852351488.0,
+            "64": 852351488.0,
+            "65": 852351488.0,
+            "66": 852351488.0,
+            "67": 852351488.0,
+            "68": 852351488.0,
+            "69": 852351488.0,
+            "70": 852351488.0,
+            "71": 852351488.0,
+            "72": 852351488.0,
+            "73": 852351488.0,
+            "74": 852351488.0,
+            "75": 852351488.0,
+            "76": 852351488.0,
+            "77": 852351488.0,
+            "78": 852351488.0,
+            "79": 852351488.0,
+            "80": 852351488.0,
+            "81": 852351488.0,
+            "82": 852351488.0,
+            "83": 852351488.0,
+            "84": 852351488.0,
+            "85": 852351488.0,
+            "86": 852351488.0,
+            "87": 852351488.0,
+            "88": 852351488.0,
+            "89": 852351488.0,
+            "90": 852351488.0,
+            "91": 852351488.0,
+            "92": 852351488.0,
+            "93": 852351488.0,
+            "94": 852351488.0,
+            "95": 852351488.0,
+            "96": 852351488.0,
+            "97": 852351488.0,
+            "98": 852351488.0,
+            "99": 852351488.0,
+            "100": 852351488.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2394265600.0,
+            "2": 2394265600.0,
+            "3": 2394265600.0,
+            "4": 2394265600.0,
+            "5": 2394265600.0,
+            "6": 2394265600.0,
+            "7": 2394265600.0,
+            "8": 2394265600.0,
+            "9": 2394265600.0,
+            "10": 2394265600.0,
+            "11": 2394265600.0,
+            "12": 2394265600.0,
+            "13": 2394265600.0,
+            "14": 2394265600.0,
+            "15": 2394265600.0,
+            "16": 2394265600.0,
+            "17": 2394265600.0,
+            "18": 2675191296.0,
+            "19": 2675191296.0,
+            "20": 2675191296.0,
+            "21": 2675191296.0,
+            "22": 2675191296.0,
+            "23": 2675191296.0,
+            "24": 2675191296.0,
+            "25": 2675191296.0,
+            "26": 2675191296.0,
+            "27": 2675191296.0,
+            "28": 2675191296.0,
+            "29": 2675191296.0,
+            "30": 2675191296.0,
+            "31": 2675191296.0,
+            "32": 2675191296.0,
+            "33": 2675191296.0,
+            "34": 2675191296.0,
+            "35": 2675191296.0,
+            "36": 2675191296.0,
+            "37": 2675191296.0,
+            "38": 2675191296.0,
+            "39": 2675191296.0,
+            "40": 2675191296.0,
+            "41": 2675191296.0,
+            "42": 2675191296.0,
+            "43": 2675191296.0,
+            "44": 2675191296.0,
+            "45": 2675191296.0,
+            "46": 2675191296.0,
+            "47": 2675191296.0,
+            "48": 2675191296.0,
+            "49": 2675191296.0,
+            "50": 2675191296.0,
+            "51": 2675191296.0,
+            "52": 2675191296.0,
+            "53": 2675191296.0,
+            "54": 2675191296.0,
+            "55": 2675191296.0,
+            "56": 2675191296.0,
+            "57": 2675191296.0,
+            "58": 2675191296.0,
+            "59": 2675191296.0,
+            "60": 2675191296.0,
+            "61": 2675191296.0,
+            "62": 2675191296.0,
+            "63": 2675191296.0,
+            "64": 2675191296.0,
+            "65": 2675191296.0,
+            "66": 2675191296.0,
+            "67": 2675191296.0,
+            "68": 2675191296.0,
+            "69": 2675191296.0,
+            "70": 2675191296.0,
+            "71": 2675191296.0,
+            "72": 2675191296.0,
+            "73": 2675191296.0,
+            "74": 2675191296.0,
+            "75": 2675191296.0,
+            "76": 2675191296.0,
+            "77": 2675191296.0,
+            "78": 2675191296.0,
+            "79": 2675191296.0,
+            "80": 2675191296.0,
+            "81": 2675191296.0,
+            "82": 2675191296.0,
+            "83": 2675191296.0,
+            "84": 2675191296.0,
+            "85": 2675191296.0,
+            "86": 2675191296.0,
+            "87": 2675191296.0,
+            "88": 2675191296.0,
+            "89": 2675191296.0,
+            "90": 2675191296.0,
+            "91": 2675191296.0,
+            "92": 2675191296.0,
+            "93": 2675191296.0,
+            "94": 2675191296.0,
+            "95": 2675191296.0,
+            "96": 2675191296.0,
+            "97": 2675191296.0,
+            "98": 2675191296.0,
+            "99": 2675191296.0,
+            "100": 2675191296.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 7.94598,
+            "2": 0.18624,
+            "3": 0.25859,
+            "4": 0.15175,
+            "5": 0.15224,
+            "6": 0.15109,
+            "7": 0.15137,
+            "8": 0.1508,
+            "9": 0.1517,
+            "10": 0.15243,
+            "11": 0.15119,
+            "12": 0.15082,
+            "13": 0.15199,
+            "14": 0.1522,
+            "15": 0.15257,
+            "16": 0.15242,
+            "17": 0.21211,
+            "18": 0.16275,
+            "19": 0.15814,
+            "20": 0.15774,
+            "21": 0.15784,
+            "22": 0.15701,
+            "23": 0.15663,
+            "24": 0.15702,
+            "25": 0.15682,
+            "26": 0.15759,
+            "27": 0.15671,
+            "28": 0.15664,
+            "29": 0.15666,
+            "30": 0.15733,
+            "31": 0.15729,
+            "32": 0.15598,
+            "33": 0.1563,
+            "34": 0.15661,
+            "35": 0.15747,
+            "36": 0.15621,
+            "37": 0.15693,
+            "38": 0.15652,
+            "39": 0.15686,
+            "40": 0.15725,
+            "41": 0.15693,
+            "42": 0.1567,
+            "43": 0.15752,
+            "44": 0.15682,
+            "45": 0.15685,
+            "46": 0.15675,
+            "47": 0.15696,
+            "48": 0.15702,
+            "49": 0.15714,
+            "50": 0.15648,
+            "51": 0.1656,
+            "52": 0.15563,
+            "53": 0.1545,
+            "54": 0.15554,
+            "55": 0.15521,
+            "56": 0.15534,
+            "57": 0.15463,
+            "58": 0.15657,
+            "59": 0.15534,
+            "60": 0.15488,
+            "61": 0.15475,
+            "62": 0.15571,
+            "63": 0.15676,
+            "64": 0.15647,
+            "65": 0.1567,
+            "66": 0.15541,
+            "67": 0.15585,
+            "68": 0.15577,
+            "69": 0.15571,
+            "70": 0.15462,
+            "71": 0.15484,
+            "72": 0.15532,
+            "73": 0.15732,
+            "74": 0.15634,
+            "75": 0.15545,
+            "76": 0.1569,
+            "77": 0.15472,
+            "78": 0.15486,
+            "79": 0.15517,
+            "80": 0.15532,
+            "81": 0.15621,
+            "82": 0.15556,
+            "83": 0.15556,
+            "84": 0.15546,
+            "85": 0.15484,
+            "86": 0.15503,
+            "87": 0.15496,
+            "88": 0.15534,
+            "89": 0.15573,
+            "90": 0.15449,
+            "91": 0.15665,
+            "92": 0.15529,
+            "93": 0.15627,
+            "94": 0.15525,
+            "95": 0.15483,
+            "96": 0.15509,
+            "97": 0.15618,
+            "98": 0.15569,
+            "99": 0.15482,
+            "100": 0.15516
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": 2437.0,
+            "18": 2405.0,
+            "19": 2950.0,
+            "20": 1827.0,
+            "21": 2154.0,
+            "22": 2731.0,
+            "23": 2609.0,
+            "24": 2290.0,
+            "25": 2325.0,
+            "26": 2079.0,
+            "27": 2138.0,
+            "28": 2702.0,
+            "29": 2576.0,
+            "30": 2528.0,
+            "31": 1895.0,
+            "32": 2628.0,
+            "33": 2325.0,
+            "34": 1928.0,
+            "35": 2061.0,
+            "36": 2153.0,
+            "37": 2600.0,
+            "38": 2350.0,
+            "39": 2997.0,
+            "40": 2053.0,
+            "41": 3352.0,
+            "42": 2497.0,
+            "43": 2867.0,
+            "44": 2109.0,
+            "45": 2490.0,
+            "46": 2279.0,
+            "47": 3051.0,
+            "48": 2527.0,
+            "49": 1973.0,
+            "50": 2887.0,
+            "51": 2310.0,
+            "52": 2526.0,
+            "53": 3705.0,
+            "54": 2888.0,
+            "55": 2440.0,
+            "56": 2496.0,
+            "57": 2338.0,
+            "58": 3283.0,
+            "59": 2849.0,
+            "60": 2893.0,
+            "61": 2956.0,
+            "62": 3134.0,
+            "63": 3275.0,
+            "64": 3176.0,
+            "65": 2318.0,
+            "66": 3857.0,
+            "67": 2606.0,
+            "68": 3313.0,
+            "69": 2826.0,
+            "70": 3665.0,
+            "71": 3011.0,
+            "72": 2693.0,
+            "73": 3357.0,
+            "74": 2271.0,
+            "75": 2955.0,
+            "76": 3617.0,
+            "77": 3936.0,
+            "78": 3951.0,
+            "79": 4065.0,
+            "80": 3665.0,
+            "81": 5191.0,
+            "82": 3511.0,
+            "83": 3263.0,
+            "84": 3876.0,
+            "85": 4048.0,
+            "86": 3414.0,
+            "87": 3980.0,
+            "88": 3617.0,
+            "89": 4400.0,
+            "90": 3695.0,
+            "91": 2857.0,
+            "92": 4432.0,
+            "93": 3494.0,
+            "94": 4438.0,
+            "95": 4076.0,
+            "96": 3948.0,
+            "97": 4242.0,
+            "98": 4943.0,
+            "99": 3861.0,
+            "100": 3631.0
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/golden_values_lts_dgx_a100.json
index a8c7cb012b6..3b0a03dc6ef 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/golden_values_lts_dgx_a100.json
@@ -1 +1,537 @@
-{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.79219, "5": 10.84727, "10": 10.77729, "15": 10.84106, "20": 10.82889, "25": 10.7666, "30": 10.69961, "35": 10.61845, "40": 10.44051, "45": 10.20859, "50": 10.21168, "55": 10.15675, "60": 9.77265, "65": 9.22128, "70": 9.89371, "75": 9.56098, "80": 9.5311, "85": 9.71911, "90": 9.89983, "95": 9.59785, "100": 9.49009}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 570640384.0, "5": 570640384.0, "10": 570640384.0, "15": 570640384.0, "20": 852351488.0, "25": 852351488.0, "30": 852351488.0, "35": 852351488.0, "40": 852351488.0, "45": 852351488.0, "50": 852351488.0, "55": 852351488.0, "60": 852351488.0, "65": 852351488.0, "70": 852351488.0, "75": 852351488.0, "80": 852351488.0, "85": 852351488.0, "90": 852351488.0, "95": 852351488.0, "100": 852351488.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 2393217536.0, "5": 2393218048.0, "10": 2393218048.0, "15": 2393218048.0, "20": 2675191296.0, "25": 2675191296.0, "30": 2675191296.0, "35": 2675191296.0, "40": 2675191296.0, "45": 2675191296.0, "50": 2675191296.0, "55": 2675191296.0, "60": 2675191296.0, "65": 2675191296.0, "70": 2675191296.0, "75": 2675191296.0, "80": 2675191296.0, "85": 2675191296.0, "90": 2675191296.0, "95": 2675191296.0, "100": 2675191296.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 20.32613, "5": 0.16735, "10": 0.16973, "15": 0.16584, "20": 0.17045, "25": 0.17023, "30": 0.16985, "35": 0.17228, "40": 0.17914, "45": 0.17032, "50": 0.17169, "55": 0.16858, "60": 0.16807, "65": 0.16897, "70": 0.16857, "75": 0.16759, "80": 0.16701, "85": 0.16722, "90": 0.16741, "95": 0.16829, "100": 0.16715}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": "nan", "5": "nan", "10": "nan", "15": "nan", "20": 1788.0, "25": 2300.0, "30": 2419.0, "35": 2097.0, "40": 2126.0, "45": 2520.0, "50": 2929.0, "55": 2457.0, "60": 2961.0, "65": 2359.0, "70": 3633.0, "75": 2932.0, "80": 3595.0, "85": 3959.0, "90": 3785.0, "95": 4072.0, "100": 3581.0}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.79449,
+            "2": 10.80656,
+            "3": 10.80727,
+            "4": 10.77389,
+            "5": 10.84829,
+            "6": 10.86736,
+            "7": 10.82922,
+            "8": 10.81537,
+            "9": 10.83956,
+            "10": 10.77652,
+            "11": 10.90107,
+            "12": 10.85927,
+            "13": 10.87396,
+            "14": 10.89723,
+            "15": 10.83961,
+            "16": 10.83508,
+            "17": 10.82101,
+            "18": 10.86029,
+            "19": 10.86558,
+            "20": 10.82896,
+            "21": 10.83275,
+            "22": 10.75286,
+            "23": 10.88062,
+            "24": 10.78219,
+            "25": 10.76607,
+            "26": 10.79522,
+            "27": 10.79866,
+            "28": 10.81697,
+            "29": 10.82169,
+            "30": 10.69891,
+            "31": 10.55698,
+            "32": 10.75759,
+            "33": 10.74362,
+            "34": 10.59976,
+            "35": 10.61772,
+            "36": 10.56389,
+            "37": 10.63614,
+            "38": 10.53029,
+            "39": 10.65358,
+            "40": 10.44072,
+            "41": 10.49636,
+            "42": 10.50954,
+            "43": 10.22362,
+            "44": 10.30902,
+            "45": 10.21065,
+            "46": 10.19943,
+            "47": 10.41641,
+            "48": 10.18128,
+            "49": 9.94311,
+            "50": 10.21224,
+            "51": 10.16759,
+            "52": 10.06895,
+            "53": 10.30707,
+            "54": 10.20911,
+            "55": 10.15688,
+            "56": 9.91474,
+            "57": 9.77696,
+            "58": 10.07417,
+            "59": 9.86333,
+            "60": 9.77328,
+            "61": 9.9292,
+            "62": 10.17156,
+            "63": 9.62041,
+            "64": 9.97113,
+            "65": 9.21979,
+            "66": 9.88693,
+            "67": 9.58363,
+            "68": 9.94922,
+            "69": 9.95271,
+            "70": 9.89312,
+            "71": 9.77658,
+            "72": 9.75435,
+            "73": 9.6497,
+            "74": 9.1439,
+            "75": 9.56121,
+            "76": 9.25111,
+            "77": 10.17063,
+            "78": 9.85402,
+            "79": 9.49965,
+            "80": 9.53086,
+            "81": 9.60555,
+            "82": 9.80179,
+            "83": 9.43744,
+            "84": 9.51987,
+            "85": 9.7196,
+            "86": 9.18595,
+            "87": 9.68687,
+            "88": 9.8443,
+            "89": 9.70586,
+            "90": 9.89977,
+            "91": 9.45029,
+            "92": 9.45356,
+            "93": 9.18554,
+            "94": 8.92968,
+            "95": 9.59767,
+            "96": 9.61491,
+            "97": 9.39084,
+            "98": 9.75667,
+            "99": 8.97921,
+            "100": 9.49001
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 570640384.0,
+            "2": 570640384.0,
+            "3": 570640384.0,
+            "4": 570640384.0,
+            "5": 570640384.0,
+            "6": 570640384.0,
+            "7": 570640384.0,
+            "8": 570640384.0,
+            "9": 570640384.0,
+            "10": 570640384.0,
+            "11": 570640384.0,
+            "12": 570640384.0,
+            "13": 570640384.0,
+            "14": 570640384.0,
+            "15": 570640384.0,
+            "16": 570640384.0,
+            "17": 852351488.0,
+            "18": 852351488.0,
+            "19": 852351488.0,
+            "20": 852351488.0,
+            "21": 852351488.0,
+            "22": 852351488.0,
+            "23": 852351488.0,
+            "24": 852351488.0,
+            "25": 852351488.0,
+            "26": 852351488.0,
+            "27": 852351488.0,
+            "28": 852351488.0,
+            "29": 852351488.0,
+            "30": 852351488.0,
+            "31": 852351488.0,
+            "32": 852351488.0,
+            "33": 852351488.0,
+            "34": 852351488.0,
+            "35": 852351488.0,
+            "36": 852351488.0,
+            "37": 852351488.0,
+            "38": 852351488.0,
+            "39": 852351488.0,
+            "40": 852351488.0,
+            "41": 852351488.0,
+            "42": 852351488.0,
+            "43": 852351488.0,
+            "44": 852351488.0,
+            "45": 852351488.0,
+            "46": 852351488.0,
+            "47": 852351488.0,
+            "48": 852351488.0,
+            "49": 852351488.0,
+            "50": 852351488.0,
+            "51": 852351488.0,
+            "52": 852351488.0,
+            "53": 852351488.0,
+            "54": 852351488.0,
+            "55": 852351488.0,
+            "56": 852351488.0,
+            "57": 852351488.0,
+            "58": 852351488.0,
+            "59": 852351488.0,
+            "60": 852351488.0,
+            "61": 852351488.0,
+            "62": 852351488.0,
+            "63": 852351488.0,
+            "64": 852351488.0,
+            "65": 852351488.0,
+            "66": 852351488.0,
+            "67": 852351488.0,
+            "68": 852351488.0,
+            "69": 852351488.0,
+            "70": 852351488.0,
+            "71": 852351488.0,
+            "72": 852351488.0,
+            "73": 852351488.0,
+            "74": 852351488.0,
+            "75": 852351488.0,
+            "76": 852351488.0,
+            "77": 852351488.0,
+            "78": 852351488.0,
+            "79": 852351488.0,
+            "80": 852351488.0,
+            "81": 852351488.0,
+            "82": 852351488.0,
+            "83": 852351488.0,
+            "84": 852351488.0,
+            "85": 852351488.0,
+            "86": 852351488.0,
+            "87": 852351488.0,
+            "88": 852351488.0,
+            "89": 852351488.0,
+            "90": 852351488.0,
+            "91": 852351488.0,
+            "92": 852351488.0,
+            "93": 852351488.0,
+            "94": 852351488.0,
+            "95": 852351488.0,
+            "96": 852351488.0,
+            "97": 852351488.0,
+            "98": 852351488.0,
+            "99": 852351488.0,
+            "100": 852351488.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2393217536.0,
+            "2": 2393218048.0,
+            "3": 2393218048.0,
+            "4": 2394266624.0,
+            "5": 2394266624.0,
+            "6": 2394266624.0,
+            "7": 2394266624.0,
+            "8": 2394266624.0,
+            "9": 2394266624.0,
+            "10": 2394266624.0,
+            "11": 2394266624.0,
+            "12": 2394266624.0,
+            "13": 2394266624.0,
+            "14": 2394266624.0,
+            "15": 2394266624.0,
+            "16": 2394266624.0,
+            "17": 2394266624.0,
+            "18": 2675191296.0,
+            "19": 2675191296.0,
+            "20": 2675191296.0,
+            "21": 2675191296.0,
+            "22": 2675191296.0,
+            "23": 2675191296.0,
+            "24": 2675191296.0,
+            "25": 2675191296.0,
+            "26": 2675191296.0,
+            "27": 2675191296.0,
+            "28": 2675191296.0,
+            "29": 2675191296.0,
+            "30": 2675191296.0,
+            "31": 2675191296.0,
+            "32": 2675191296.0,
+            "33": 2675191296.0,
+            "34": 2675191296.0,
+            "35": 2675191296.0,
+            "36": 2675191296.0,
+            "37": 2675191296.0,
+            "38": 2675191296.0,
+            "39": 2675191296.0,
+            "40": 2675191296.0,
+            "41": 2675191296.0,
+            "42": 2675191296.0,
+            "43": 2675191296.0,
+            "44": 2675191296.0,
+            "45": 2675191296.0,
+            "46": 2675191296.0,
+            "47": 2675191296.0,
+            "48": 2675191296.0,
+            "49": 2675191296.0,
+            "50": 2675191296.0,
+            "51": 2675191296.0,
+            "52": 2675191296.0,
+            "53": 2675191296.0,
+            "54": 2675191296.0,
+            "55": 2675191296.0,
+            "56": 2675191296.0,
+            "57": 2675191296.0,
+            "58": 2675191296.0,
+            "59": 2675191296.0,
+            "60": 2675191296.0,
+            "61": 2675191296.0,
+            "62": 2675191296.0,
+            "63": 2675191296.0,
+            "64": 2675191296.0,
+            "65": 2675191296.0,
+            "66": 2675191296.0,
+            "67": 2675191296.0,
+            "68": 2675191296.0,
+            "69": 2675191296.0,
+            "70": 2675191296.0,
+            "71": 2675191296.0,
+            "72": 2675191296.0,
+            "73": 2675191296.0,
+            "74": 2675191296.0,
+            "75": 2675191296.0,
+            "76": 2675191296.0,
+            "77": 2675191296.0,
+            "78": 2675191296.0,
+            "79": 2675191296.0,
+            "80": 2675191296.0,
+            "81": 2675191296.0,
+            "82": 2675191296.0,
+            "83": 2675191296.0,
+            "84": 2675191296.0,
+            "85": 2675191296.0,
+            "86": 2675191296.0,
+            "87": 2675191296.0,
+            "88": 2675191296.0,
+            "89": 2675191296.0,
+            "90": 2675191296.0,
+            "91": 2675191296.0,
+            "92": 2675191296.0,
+            "93": 2675191296.0,
+            "94": 2675191296.0,
+            "95": 2675191296.0,
+            "96": 2675191296.0,
+            "97": 2675191296.0,
+            "98": 2675191296.0,
+            "99": 2675191296.0,
+            "100": 2675191296.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.58025,
+            "2": 0.18555,
+            "3": 0.31194,
+            "4": 0.1522,
+            "5": 0.15205,
+            "6": 0.1496,
+            "7": 0.14979,
+            "8": 0.14921,
+            "9": 0.14957,
+            "10": 0.15024,
+            "11": 0.14887,
+            "12": 0.14852,
+            "13": 0.14925,
+            "14": 0.15079,
+            "15": 0.14925,
+            "16": 0.14936,
+            "17": 0.2057,
+            "18": 0.15996,
+            "19": 0.15397,
+            "20": 0.15414,
+            "21": 0.1543,
+            "22": 0.15499,
+            "23": 0.15504,
+            "24": 0.15679,
+            "25": 0.15462,
+            "26": 0.15509,
+            "27": 0.15394,
+            "28": 0.15487,
+            "29": 0.15522,
+            "30": 0.1553,
+            "31": 0.15536,
+            "32": 0.15406,
+            "33": 0.15461,
+            "34": 0.1548,
+            "35": 0.15472,
+            "36": 0.15413,
+            "37": 0.1548,
+            "38": 0.15446,
+            "39": 0.15545,
+            "40": 0.15442,
+            "41": 0.15567,
+            "42": 0.15413,
+            "43": 0.15585,
+            "44": 0.15428,
+            "45": 0.15497,
+            "46": 0.15438,
+            "47": 0.15508,
+            "48": 0.15481,
+            "49": 0.15466,
+            "50": 0.15476,
+            "51": 0.16245,
+            "52": 0.15411,
+            "53": 0.15376,
+            "54": 0.15405,
+            "55": 0.15375,
+            "56": 0.15402,
+            "57": 0.15434,
+            "58": 0.15404,
+            "59": 0.15454,
+            "60": 0.15434,
+            "61": 0.15384,
+            "62": 0.15505,
+            "63": 0.15431,
+            "64": 0.15388,
+            "65": 0.1547,
+            "66": 0.15453,
+            "67": 0.15364,
+            "68": 0.15388,
+            "69": 0.15362,
+            "70": 0.15366,
+            "71": 0.15425,
+            "72": 0.15393,
+            "73": 0.15476,
+            "74": 0.15414,
+            "75": 0.15415,
+            "76": 0.1535,
+            "77": 0.15481,
+            "78": 0.1541,
+            "79": 0.15382,
+            "80": 0.15363,
+            "81": 0.15386,
+            "82": 0.18555,
+            "83": 0.15422,
+            "84": 0.15393,
+            "85": 0.15462,
+            "86": 0.15512,
+            "87": 0.15391,
+            "88": 0.15431,
+            "89": 0.15431,
+            "90": 0.15521,
+            "91": 0.15475,
+            "92": 0.154,
+            "93": 0.15414,
+            "94": 0.15426,
+            "95": 0.15422,
+            "96": 0.15393,
+            "97": 0.15497,
+            "98": 0.1538,
+            "99": 0.15481,
+            "100": 0.15442
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": 2437.0,
+            "18": 2405.0,
+            "19": 2950.0,
+            "20": 1827.0,
+            "21": 2154.0,
+            "22": 2731.0,
+            "23": 2609.0,
+            "24": 2290.0,
+            "25": 2325.0,
+            "26": 2079.0,
+            "27": 2138.0,
+            "28": 2702.0,
+            "29": 2576.0,
+            "30": 2528.0,
+            "31": 1895.0,
+            "32": 2628.0,
+            "33": 2325.0,
+            "34": 1928.0,
+            "35": 2061.0,
+            "36": 2153.0,
+            "37": 2600.0,
+            "38": 2350.0,
+            "39": 2997.0,
+            "40": 2053.0,
+            "41": 3352.0,
+            "42": 2497.0,
+            "43": 2867.0,
+            "44": 2109.0,
+            "45": 2490.0,
+            "46": 2279.0,
+            "47": 3051.0,
+            "48": 2527.0,
+            "49": 1973.0,
+            "50": 2887.0,
+            "51": 2310.0,
+            "52": 2526.0,
+            "53": 3705.0,
+            "54": 2888.0,
+            "55": 2440.0,
+            "56": 2496.0,
+            "57": 2338.0,
+            "58": 3283.0,
+            "59": 2849.0,
+            "60": 2893.0,
+            "61": 2956.0,
+            "62": 3134.0,
+            "63": 3275.0,
+            "64": 3176.0,
+            "65": 2318.0,
+            "66": 3857.0,
+            "67": 2606.0,
+            "68": 3313.0,
+            "69": 2826.0,
+            "70": 3665.0,
+            "71": 3011.0,
+            "72": 2693.0,
+            "73": 3357.0,
+            "74": 2271.0,
+            "75": 2955.0,
+            "76": 3617.0,
+            "77": 3936.0,
+            "78": 3951.0,
+            "79": 4065.0,
+            "80": 3665.0,
+            "81": 5191.0,
+            "82": 3511.0,
+            "83": 3263.0,
+            "84": 3876.0,
+            "85": 4048.0,
+            "86": 3414.0,
+            "87": 3980.0,
+            "88": 3617.0,
+            "89": 4400.0,
+            "90": 3695.0,
+            "91": 2857.0,
+            "92": 4432.0,
+            "93": 3494.0,
+            "94": 4438.0,
+            "95": 4076.0,
+            "96": 3948.0,
+            "97": 4242.0,
+            "98": 4943.0,
+            "99": 3861.0,
+            "100": 3631.0
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_lts.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_lts.json
new file mode 100644
index 00000000000..646419f3d75
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_lts.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.8595,
+            "2": 10.8576,
+            "3": 10.85984,
+            "4": 10.85001,
+            "5": 10.88364,
+            "6": 10.88818,
+            "7": 10.86353,
+            "8": 10.87179,
+            "9": 10.87022,
+            "10": 10.845,
+            "11": 10.88619,
+            "12": 10.88637,
+            "13": 10.89401,
+            "14": 10.9025,
+            "15": 10.88061,
+            "16": 10.88557,
+            "17": 10.86363,
+            "18": 10.88903,
+            "19": 10.87964,
+            "20": 10.87344,
+            "21": 10.8919,
+            "22": 10.83039,
+            "23": 10.89392,
+            "24": 10.8602,
+            "25": 10.83015,
+            "26": 10.82911,
+            "27": 10.8537,
+            "28": 10.84349,
+            "29": 10.85387,
+            "30": 10.77206,
+            "31": 10.672,
+            "32": 10.78864,
+            "33": 10.77618,
+            "34": 10.67579,
+            "35": 10.67298,
+            "36": 10.63381,
+            "37": 10.69227,
+            "38": 10.60773,
+            "39": 10.70463,
+            "40": 10.5185,
+            "41": 10.5453,
+            "42": 10.56937,
+            "43": 10.32495,
+            "44": 10.3911,
+            "45": 10.28431,
+            "46": 10.2732,
+            "47": 10.48175,
+            "48": 10.25374,
+            "49": 10.01592,
+            "50": 10.27755,
+            "51": 10.21727,
+            "52": 10.1271,
+            "53": 10.36018,
+            "54": 10.25981,
+            "55": 10.20104,
+            "56": 9.98213,
+            "57": 9.84717,
+            "58": 10.12257,
+            "59": 9.90914,
+            "60": 9.83288,
+            "61": 9.9713,
+            "62": 10.22005,
+            "63": 9.67481,
+            "64": 10.01706,
+            "65": 9.27085,
+            "66": 9.93979,
+            "67": 9.62899,
+            "68": 9.98681,
+            "69": 9.9839,
+            "70": 9.92559,
+            "71": 9.81011,
+            "72": 9.79196,
+            "73": 9.68163,
+            "74": 9.17945,
+            "75": 9.61324,
+            "76": 9.28951,
+            "77": 10.19435,
+            "78": 9.8755,
+            "79": 9.5297,
+            "80": 9.56593,
+            "81": 9.63478,
+            "82": 9.82295,
+            "83": 9.47164,
+            "84": 9.54623,
+            "85": 9.74358,
+            "86": 9.20093,
+            "87": 9.70179,
+            "88": 9.86553,
+            "89": 9.73045,
+            "90": 9.92108,
+            "91": 9.48732,
+            "92": 9.47637,
+            "93": 9.21283,
+            "94": 8.94903,
+            "95": 9.6165,
+            "96": 9.63374,
+            "97": 9.41244,
+            "98": 9.7751,
+            "99": 9.00191,
+            "100": 9.50967
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 284527616.0,
+            "2": 284527616.0,
+            "3": 284527616.0,
+            "4": 284527616.0,
+            "5": 284527616.0,
+            "6": 284527616.0,
+            "7": 284527616.0,
+            "8": 284527616.0,
+            "9": 284527616.0,
+            "10": 284527616.0,
+            "11": 284527616.0,
+            "12": 284527616.0,
+            "13": 284527616.0,
+            "14": 284527616.0,
+            "15": 284527616.0,
+            "16": 416513536.0,
+            "17": 416513536.0,
+            "18": 416513536.0,
+            "19": 416513536.0,
+            "20": 416513536.0,
+            "21": 416513536.0,
+            "22": 416513536.0,
+            "23": 416513536.0,
+            "24": 416513536.0,
+            "25": 416513536.0,
+            "26": 416513536.0,
+            "27": 416513536.0,
+            "28": 416513536.0,
+            "29": 416513536.0,
+            "30": 416513536.0,
+            "31": 416513536.0,
+            "32": 416513536.0,
+            "33": 416513536.0,
+            "34": 416513536.0,
+            "35": 416513536.0,
+            "36": 416513536.0,
+            "37": 416513536.0,
+            "38": 416513536.0,
+            "39": 416513536.0,
+            "40": 416513536.0,
+            "41": 416513536.0,
+            "42": 416513536.0,
+            "43": 416513536.0,
+            "44": 416513536.0,
+            "45": 416513536.0,
+            "46": 416513536.0,
+            "47": 416513536.0,
+            "48": 416513536.0,
+            "49": 416513536.0,
+            "50": 416513536.0,
+            "51": 416513536.0,
+            "52": 416513536.0,
+            "53": 416513536.0,
+            "54": 416513536.0,
+            "55": 416513536.0,
+            "56": 416513536.0,
+            "57": 416513536.0,
+            "58": 416513536.0,
+            "59": 416513536.0,
+            "60": 416513536.0,
+            "61": 416513536.0,
+            "62": 416513536.0,
+            "63": 416513536.0,
+            "64": 416513536.0,
+            "65": 416513536.0,
+            "66": 416513536.0,
+            "67": 416513536.0,
+            "68": 416513536.0,
+            "69": 416513536.0,
+            "70": 416513536.0,
+            "71": 416513536.0,
+            "72": 416513536.0,
+            "73": 416513536.0,
+            "74": 416513536.0,
+            "75": 416513536.0,
+            "76": 416513536.0,
+            "77": 416513536.0,
+            "78": 416513536.0,
+            "79": 416513536.0,
+            "80": 416513536.0,
+            "81": 416513536.0,
+            "82": 416513536.0,
+            "83": 416513536.0,
+            "84": 416513536.0,
+            "85": 416513536.0,
+            "86": 416513536.0,
+            "87": 416513536.0,
+            "88": 416513536.0,
+            "89": 416513536.0,
+            "90": 416513536.0,
+            "91": 416513536.0,
+            "92": 416513536.0,
+            "93": 416513536.0,
+            "94": 416513536.0,
+            "95": 416513536.0,
+            "96": 416513536.0,
+            "97": 416513536.0,
+            "98": 416513536.0,
+            "99": 416513536.0,
+            "100": 416513536.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1465367040.0,
+            "2": 1465367040.0,
+            "3": 1465367040.0,
+            "4": 1465367040.0,
+            "5": 1465367040.0,
+            "6": 1465367040.0,
+            "7": 1465368064.0,
+            "8": 1465368064.0,
+            "9": 1465368064.0,
+            "10": 1465368064.0,
+            "11": 1465368064.0,
+            "12": 1465368064.0,
+            "13": 1465368064.0,
+            "14": 1465368064.0,
+            "15": 1465368064.0,
+            "16": 1465368064.0,
+            "17": 1597485568.0,
+            "18": 1597485568.0,
+            "19": 1597485568.0,
+            "20": 1597485568.0,
+            "21": 1597485568.0,
+            "22": 1597485568.0,
+            "23": 1597485568.0,
+            "24": 1597485568.0,
+            "25": 1597485568.0,
+            "26": 1597485568.0,
+            "27": 1597485568.0,
+            "28": 1597485568.0,
+            "29": 1597485568.0,
+            "30": 1597485568.0,
+            "31": 1597485568.0,
+            "32": 1597485568.0,
+            "33": 1597485568.0,
+            "34": 1597485568.0,
+            "35": 1597485568.0,
+            "36": 1597485568.0,
+            "37": 1597485568.0,
+            "38": 1597485568.0,
+            "39": 1597485568.0,
+            "40": 1597485568.0,
+            "41": 1597485568.0,
+            "42": 1597485568.0,
+            "43": 1597485568.0,
+            "44": 1597485568.0,
+            "45": 1597485568.0,
+            "46": 1597485568.0,
+            "47": 1597485568.0,
+            "48": 1597485568.0,
+            "49": 1597485568.0,
+            "50": 1597485568.0,
+            "51": 1597485568.0,
+            "52": 1597485568.0,
+            "53": 1597485568.0,
+            "54": 1597485568.0,
+            "55": 1597485568.0,
+            "56": 1597485568.0,
+            "57": 1597485568.0,
+            "58": 1597485568.0,
+            "59": 1597485568.0,
+            "60": 1597485568.0,
+            "61": 1597485568.0,
+            "62": 1597485568.0,
+            "63": 1597485568.0,
+            "64": 1597485568.0,
+            "65": 1597485568.0,
+            "66": 1597485568.0,
+            "67": 1597485568.0,
+            "68": 1597485568.0,
+            "69": 1597485568.0,
+            "70": 1597485568.0,
+            "71": 1597485568.0,
+            "72": 1597485568.0,
+            "73": 1597485568.0,
+            "74": 1597485568.0,
+            "75": 1597485568.0,
+            "76": 1597485568.0,
+            "77": 1597485568.0,
+            "78": 1597485568.0,
+            "79": 1597485568.0,
+            "80": 1597485568.0,
+            "81": 1597485568.0,
+            "82": 1597485568.0,
+            "83": 1597485568.0,
+            "84": 1597485568.0,
+            "85": 1597485568.0,
+            "86": 1597485568.0,
+            "87": 1597485568.0,
+            "88": 1597485568.0,
+            "89": 1597485568.0,
+            "90": 1597485568.0,
+            "91": 1597485568.0,
+            "92": 1597485568.0,
+            "93": 1597485568.0,
+            "94": 1597485568.0,
+            "95": 1597485568.0,
+            "96": 1597485568.0,
+            "97": 1597485568.0,
+            "98": 1597485568.0,
+            "99": 1597485568.0,
+            "100": 1597485568.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 3.93622,
+            "2": 0.31349,
+            "3": 0.28163,
+            "4": 0.27921,
+            "5": 0.27918,
+            "6": 0.27813,
+            "7": 0.27823,
+            "8": 0.27889,
+            "9": 0.27839,
+            "10": 0.2785,
+            "11": 0.27567,
+            "12": 0.27474,
+            "13": 0.2746,
+            "14": 0.27424,
+            "15": 0.27387,
+            "16": 0.4724,
+            "17": 0.27409,
+            "18": 0.28601,
+            "19": 0.28651,
+            "20": 0.28586,
+            "21": 0.29036,
+            "22": 0.27749,
+            "23": 0.29042,
+            "24": 0.29143,
+            "25": 0.29191,
+            "26": 0.29136,
+            "27": 0.29223,
+            "28": 0.29253,
+            "29": 0.29127,
+            "30": 0.29158,
+            "31": 0.29283,
+            "32": 0.29224,
+            "33": 0.29404,
+            "34": 0.29214,
+            "35": 0.29125,
+            "36": 0.29141,
+            "37": 0.29119,
+            "38": 0.29131,
+            "39": 0.29195,
+            "40": 0.29134,
+            "41": 0.29104,
+            "42": 0.2909,
+            "43": 0.29202,
+            "44": 0.29149,
+            "45": 0.29285,
+            "46": 0.29117,
+            "47": 0.29278,
+            "48": 0.29328,
+            "49": 0.2921,
+            "50": 0.29144,
+            "51": 0.2946,
+            "52": 0.29041,
+            "53": 0.28952,
+            "54": 0.28982,
+            "55": 0.28907,
+            "56": 0.28812,
+            "57": 0.2882,
+            "58": 0.29098,
+            "59": 0.29394,
+            "60": 0.29381,
+            "61": 0.29366,
+            "62": 0.29495,
+            "63": 0.29248,
+            "64": 0.29429,
+            "65": 0.29369,
+            "66": 0.29315,
+            "67": 0.29284,
+            "68": 0.2941,
+            "69": 0.29349,
+            "70": 0.29458,
+            "71": 0.29385,
+            "72": 0.29691,
+            "73": 0.29214,
+            "74": 0.292,
+            "75": 0.29157,
+            "76": 0.29169,
+            "77": 0.29162,
+            "78": 0.2922,
+            "79": 0.29243,
+            "80": 0.29113,
+            "81": 0.2927,
+            "82": 0.29109,
+            "83": 0.29115,
+            "84": 0.29064,
+            "85": 0.29302,
+            "86": 0.29203,
+            "87": 0.29135,
+            "88": 0.29306,
+            "89": 0.29406,
+            "90": 0.29439,
+            "91": 0.29348,
+            "92": 0.30393,
+            "93": 0.29287,
+            "94": 0.29232,
+            "95": 0.29711,
+            "96": 0.29103,
+            "97": 0.29472,
+            "98": 0.29334,
+            "99": 0.29389,
+            "100": 0.29184
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": 2308.0,
+            "17": "nan",
+            "18": 2336.0,
+            "19": 2843.0,
+            "20": 1690.0,
+            "21": 2021.0,
+            "22": "nan",
+            "23": 2506.0,
+            "24": 2095.0,
+            "25": 2035.0,
+            "26": 1875.0,
+            "27": 2000.0,
+            "28": 2435.0,
+            "29": 2512.0,
+            "30": 2279.0,
+            "31": 1598.0,
+            "32": 2494.0,
+            "33": 2128.0,
+            "34": 1682.0,
+            "35": 1868.0,
+            "36": 1970.0,
+            "37": 2549.0,
+            "38": 2069.0,
+            "39": 2950.0,
+            "40": 1905.0,
+            "41": 3069.0,
+            "42": 2521.0,
+            "43": 2635.0,
+            "44": 1835.0,
+            "45": 2455.0,
+            "46": 2204.0,
+            "47": 2799.0,
+            "48": 2423.0,
+            "49": 1759.0,
+            "50": 2677.0,
+            "51": 2195.0,
+            "52": 2330.0,
+            "53": 3549.0,
+            "54": 2650.0,
+            "55": 2247.0,
+            "56": 2422.0,
+            "57": 2195.0,
+            "58": 3241.0,
+            "59": 2626.0,
+            "60": 2775.0,
+            "61": 2747.0,
+            "62": 2926.0,
+            "63": 2898.0,
+            "64": 3090.0,
+            "65": 2245.0,
+            "66": 3827.0,
+            "67": 2655.0,
+            "68": 3117.0,
+            "69": 2656.0,
+            "70": 3659.0,
+            "71": 2819.0,
+            "72": 2710.0,
+            "73": 3355.0,
+            "74": 2210.0,
+            "75": 2927.0,
+            "76": 3577.0,
+            "77": 3727.0,
+            "78": 3855.0,
+            "79": 4237.0,
+            "80": 3462.0,
+            "81": 5157.0,
+            "82": 3426.0,
+            "83": 3234.0,
+            "84": 3878.0,
+            "85": 3734.0,
+            "86": 3184.0,
+            "87": 4090.0,
+            "88": 3594.0,
+            "89": 4234.0,
+            "90": 3744.0,
+            "91": 2967.0,
+            "92": 4509.0,
+            "93": 3649.0,
+            "94": 4486.0,
+            "95": 4215.0,
+            "96": 3851.0,
+            "97": 4098.0,
+            "98": 5029.0,
+            "99": 3975.0,
+            "100": 3445.0
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_lts_dgx_a100.json
index c8abc0e775c..9f83249318a 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_lts_dgx_a100.json
@@ -1 +1,537 @@
-{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.86104, "5": 10.88371, "10": 10.84263, "15": 10.87935, "20": 10.87405, "25": 10.82867, "30": 10.77191, "35": 10.67622, "40": 10.517, "45": 10.28436, "50": 10.27861, "55": 10.20112, "60": 9.83306, "65": 9.26979, "70": 9.92662, "75": 9.61385, "80": 9.56419, "85": 9.74319, "90": 9.92148, "95": 9.6163, "100": 9.5087}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 284527616.0, "5": 284527616.0, "10": 284527616.0, "15": 284527616.0, "20": 416513536.0, "25": 416513536.0, "30": 416513536.0, "35": 416513536.0, "40": 416513536.0, "45": 416513536.0, "50": 416513536.0, "55": 416513536.0, "60": 416513536.0, "65": 416513536.0, "70": 416513536.0, "75": 416513536.0, "80": 416513536.0, "85": 416513536.0, "90": 416513536.0, "95": 416513536.0, "100": 416513536.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1465368064.0, "5": 1465368576.0, "10": 1465368576.0, "15": 1465368576.0, "20": 1596305408.0, "25": 1596305920.0, "30": 1596305920.0, "35": 1596305920.0, "40": 1596305920.0, "45": 1596305920.0, "50": 1596305920.0, "55": 1596305920.0, "60": 1596305920.0, "65": 1596305920.0, "70": 1596305920.0, "75": 1596305920.0, "80": 1596305920.0, "85": 1596305920.0, "90": 1596305920.0, "95": 1596305920.0, "100": 1596305920.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 7.32922, "5": 0.3111, "10": 0.3117, "15": 0.30486, "20": 0.3229, "25": 0.31791, "30": 0.31805, "35": 0.32249, "40": 0.32048, "45": 0.32173, "50": 0.31691, "55": 0.31145, "60": 0.31713, "65": 0.31381, "70": 0.31308, "75": 0.31799, "80": 0.31492, "85": 0.31756, "90": 0.31282, "95": 0.41456, "100": 0.30993}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": "nan", "5": "nan", "10": "nan", "15": "nan", "20": 1777.0, "25": 2124.0, "30": 2262.0, "35": 1763.0, "40": 1875.0, "45": 2434.0, "50": 2634.0, "55": 2383.0, "60": 2832.0, "65": 2223.0, "70": 3836.0, "75": 2913.0, "80": 3398.0, "85": 3618.0, "90": 3676.0, "95": 4082.0, "100": 3502.0}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.8595,
+            "2": 10.8576,
+            "3": 10.85984,
+            "4": 10.85001,
+            "5": 10.88364,
+            "6": 10.88818,
+            "7": 10.86353,
+            "8": 10.87179,
+            "9": 10.87022,
+            "10": 10.845,
+            "11": 10.88619,
+            "12": 10.88637,
+            "13": 10.89401,
+            "14": 10.9025,
+            "15": 10.88061,
+            "16": 10.88557,
+            "17": 10.86363,
+            "18": 10.88903,
+            "19": 10.87964,
+            "20": 10.87344,
+            "21": 10.8919,
+            "22": 10.83039,
+            "23": 10.89392,
+            "24": 10.8602,
+            "25": 10.83015,
+            "26": 10.82911,
+            "27": 10.8537,
+            "28": 10.84349,
+            "29": 10.85387,
+            "30": 10.77206,
+            "31": 10.672,
+            "32": 10.78864,
+            "33": 10.77618,
+            "34": 10.67579,
+            "35": 10.67298,
+            "36": 10.63381,
+            "37": 10.69227,
+            "38": 10.60773,
+            "39": 10.70463,
+            "40": 10.5185,
+            "41": 10.5453,
+            "42": 10.56937,
+            "43": 10.32495,
+            "44": 10.3911,
+            "45": 10.28431,
+            "46": 10.2732,
+            "47": 10.48175,
+            "48": 10.25374,
+            "49": 10.01592,
+            "50": 10.27755,
+            "51": 10.21727,
+            "52": 10.1271,
+            "53": 10.36018,
+            "54": 10.25981,
+            "55": 10.20104,
+            "56": 9.98213,
+            "57": 9.84717,
+            "58": 10.12257,
+            "59": 9.90914,
+            "60": 9.83288,
+            "61": 9.9713,
+            "62": 10.22005,
+            "63": 9.67481,
+            "64": 10.01706,
+            "65": 9.27085,
+            "66": 9.93979,
+            "67": 9.62899,
+            "68": 9.98681,
+            "69": 9.9839,
+            "70": 9.92559,
+            "71": 9.81011,
+            "72": 9.79196,
+            "73": 9.68163,
+            "74": 9.17945,
+            "75": 9.61324,
+            "76": 9.28951,
+            "77": 10.19435,
+            "78": 9.8755,
+            "79": 9.5297,
+            "80": 9.56593,
+            "81": 9.63478,
+            "82": 9.82295,
+            "83": 9.47164,
+            "84": 9.54623,
+            "85": 9.74358,
+            "86": 9.20093,
+            "87": 9.70179,
+            "88": 9.86553,
+            "89": 9.73045,
+            "90": 9.92108,
+            "91": 9.48732,
+            "92": 9.47637,
+            "93": 9.21283,
+            "94": 8.94903,
+            "95": 9.6165,
+            "96": 9.63374,
+            "97": 9.41244,
+            "98": 9.7751,
+            "99": 9.00191,
+            "100": 9.50967
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 284527616.0,
+            "2": 284527616.0,
+            "3": 284527616.0,
+            "4": 284527616.0,
+            "5": 284527616.0,
+            "6": 284527616.0,
+            "7": 284527616.0,
+            "8": 284527616.0,
+            "9": 284527616.0,
+            "10": 284527616.0,
+            "11": 284527616.0,
+            "12": 284527616.0,
+            "13": 284527616.0,
+            "14": 284527616.0,
+            "15": 284527616.0,
+            "16": 416513536.0,
+            "17": 416513536.0,
+            "18": 416513536.0,
+            "19": 416513536.0,
+            "20": 416513536.0,
+            "21": 416513536.0,
+            "22": 416513536.0,
+            "23": 416513536.0,
+            "24": 416513536.0,
+            "25": 416513536.0,
+            "26": 416513536.0,
+            "27": 416513536.0,
+            "28": 416513536.0,
+            "29": 416513536.0,
+            "30": 416513536.0,
+            "31": 416513536.0,
+            "32": 416513536.0,
+            "33": 416513536.0,
+            "34": 416513536.0,
+            "35": 416513536.0,
+            "36": 416513536.0,
+            "37": 416513536.0,
+            "38": 416513536.0,
+            "39": 416513536.0,
+            "40": 416513536.0,
+            "41": 416513536.0,
+            "42": 416513536.0,
+            "43": 416513536.0,
+            "44": 416513536.0,
+            "45": 416513536.0,
+            "46": 416513536.0,
+            "47": 416513536.0,
+            "48": 416513536.0,
+            "49": 416513536.0,
+            "50": 416513536.0,
+            "51": 416513536.0,
+            "52": 416513536.0,
+            "53": 416513536.0,
+            "54": 416513536.0,
+            "55": 416513536.0,
+            "56": 416513536.0,
+            "57": 416513536.0,
+            "58": 416513536.0,
+            "59": 416513536.0,
+            "60": 416513536.0,
+            "61": 416513536.0,
+            "62": 416513536.0,
+            "63": 416513536.0,
+            "64": 416513536.0,
+            "65": 416513536.0,
+            "66": 416513536.0,
+            "67": 416513536.0,
+            "68": 416513536.0,
+            "69": 416513536.0,
+            "70": 416513536.0,
+            "71": 416513536.0,
+            "72": 416513536.0,
+            "73": 416513536.0,
+            "74": 416513536.0,
+            "75": 416513536.0,
+            "76": 416513536.0,
+            "77": 416513536.0,
+            "78": 416513536.0,
+            "79": 416513536.0,
+            "80": 416513536.0,
+            "81": 416513536.0,
+            "82": 416513536.0,
+            "83": 416513536.0,
+            "84": 416513536.0,
+            "85": 416513536.0,
+            "86": 416513536.0,
+            "87": 416513536.0,
+            "88": 416513536.0,
+            "89": 416513536.0,
+            "90": 416513536.0,
+            "91": 416513536.0,
+            "92": 416513536.0,
+            "93": 416513536.0,
+            "94": 416513536.0,
+            "95": 416513536.0,
+            "96": 416513536.0,
+            "97": 416513536.0,
+            "98": 416513536.0,
+            "99": 416513536.0,
+            "100": 416513536.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1465367040.0,
+            "2": 1465367552.0,
+            "3": 1465367552.0,
+            "4": 1465367552.0,
+            "5": 1465367552.0,
+            "6": 1465367552.0,
+            "7": 1465367552.0,
+            "8": 1465367552.0,
+            "9": 1465367552.0,
+            "10": 1465367552.0,
+            "11": 1465367552.0,
+            "12": 1465367552.0,
+            "13": 1465367552.0,
+            "14": 1465367552.0,
+            "15": 1465367552.0,
+            "16": 1465367552.0,
+            "17": 1597485568.0,
+            "18": 1597485568.0,
+            "19": 1597485568.0,
+            "20": 1597485568.0,
+            "21": 1597485568.0,
+            "22": 1597485568.0,
+            "23": 1597485568.0,
+            "24": 1597485568.0,
+            "25": 1597485568.0,
+            "26": 1597485568.0,
+            "27": 1597485568.0,
+            "28": 1597485568.0,
+            "29": 1597485568.0,
+            "30": 1597485568.0,
+            "31": 1597485568.0,
+            "32": 1597485568.0,
+            "33": 1597485568.0,
+            "34": 1597485568.0,
+            "35": 1597485568.0,
+            "36": 1597485568.0,
+            "37": 1597485568.0,
+            "38": 1597485568.0,
+            "39": 1597485568.0,
+            "40": 1597485568.0,
+            "41": 1597485568.0,
+            "42": 1597485568.0,
+            "43": 1597485568.0,
+            "44": 1597485568.0,
+            "45": 1597485568.0,
+            "46": 1597485568.0,
+            "47": 1597485568.0,
+            "48": 1597485568.0,
+            "49": 1597485568.0,
+            "50": 1597485568.0,
+            "51": 1597485568.0,
+            "52": 1597485568.0,
+            "53": 1597485568.0,
+            "54": 1597485568.0,
+            "55": 1597485568.0,
+            "56": 1597485568.0,
+            "57": 1597485568.0,
+            "58": 1597485568.0,
+            "59": 1597485568.0,
+            "60": 1597485568.0,
+            "61": 1597485568.0,
+            "62": 1597485568.0,
+            "63": 1597485568.0,
+            "64": 1597485568.0,
+            "65": 1597485568.0,
+            "66": 1597485568.0,
+            "67": 1597485568.0,
+            "68": 1597485568.0,
+            "69": 1597485568.0,
+            "70": 1597485568.0,
+            "71": 1597485568.0,
+            "72": 1597485568.0,
+            "73": 1597485568.0,
+            "74": 1597485568.0,
+            "75": 1597485568.0,
+            "76": 1597485568.0,
+            "77": 1597485568.0,
+            "78": 1597485568.0,
+            "79": 1597485568.0,
+            "80": 1597485568.0,
+            "81": 1597485568.0,
+            "82": 1597485568.0,
+            "83": 1597485568.0,
+            "84": 1597485568.0,
+            "85": 1597485568.0,
+            "86": 1597485568.0,
+            "87": 1597485568.0,
+            "88": 1597485568.0,
+            "89": 1597485568.0,
+            "90": 1597485568.0,
+            "91": 1597485568.0,
+            "92": 1597485568.0,
+            "93": 1597485568.0,
+            "94": 1597485568.0,
+            "95": 1597485568.0,
+            "96": 1597485568.0,
+            "97": 1597485568.0,
+            "98": 1597485568.0,
+            "99": 1597485568.0,
+            "100": 1597485568.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 3.81628,
+            "2": 0.32142,
+            "3": 0.27555,
+            "4": 0.28299,
+            "5": 0.28901,
+            "6": 0.28043,
+            "7": 0.29138,
+            "8": 0.30944,
+            "9": 0.28461,
+            "10": 0.28789,
+            "11": 0.28709,
+            "12": 0.29186,
+            "13": 0.29114,
+            "14": 0.29464,
+            "15": 0.31626,
+            "16": 0.48847,
+            "17": 0.28436,
+            "18": 0.30264,
+            "19": 0.29287,
+            "20": 0.30599,
+            "21": 0.29335,
+            "22": 0.27957,
+            "23": 0.29491,
+            "24": 0.29371,
+            "25": 0.29398,
+            "26": 0.29344,
+            "27": 0.29457,
+            "28": 0.29449,
+            "29": 0.29412,
+            "30": 0.29337,
+            "31": 0.29404,
+            "32": 0.29391,
+            "33": 0.29483,
+            "34": 0.29389,
+            "35": 0.29433,
+            "36": 0.29449,
+            "37": 0.29463,
+            "38": 0.29428,
+            "39": 0.29385,
+            "40": 0.29379,
+            "41": 0.29345,
+            "42": 0.29404,
+            "43": 0.29413,
+            "44": 0.29357,
+            "45": 0.29308,
+            "46": 0.29302,
+            "47": 0.29311,
+            "48": 0.29341,
+            "49": 0.2946,
+            "50": 0.29365,
+            "51": 0.29978,
+            "52": 0.31599,
+            "53": 0.29361,
+            "54": 0.29341,
+            "55": 0.29321,
+            "56": 0.29262,
+            "57": 0.29474,
+            "58": 0.29427,
+            "59": 0.29281,
+            "60": 0.29314,
+            "61": 0.29219,
+            "62": 0.29346,
+            "63": 0.29348,
+            "64": 0.30211,
+            "65": 0.29324,
+            "66": 0.29357,
+            "67": 0.29314,
+            "68": 0.29229,
+            "69": 0.30197,
+            "70": 0.29329,
+            "71": 0.30206,
+            "72": 0.29435,
+            "73": 0.29495,
+            "74": 0.2943,
+            "75": 0.29926,
+            "76": 0.29332,
+            "77": 0.29464,
+            "78": 0.29342,
+            "79": 0.29434,
+            "80": 0.29439,
+            "81": 0.29391,
+            "82": 0.29436,
+            "83": 0.29426,
+            "84": 0.29408,
+            "85": 0.29452,
+            "86": 0.29406,
+            "87": 0.29421,
+            "88": 0.29373,
+            "89": 0.29437,
+            "90": 0.29425,
+            "91": 0.29383,
+            "92": 0.2933,
+            "93": 0.29369,
+            "94": 0.2937,
+            "95": 0.29465,
+            "96": 0.29439,
+            "97": 0.29435,
+            "98": 0.2952,
+            "99": 0.29361,
+            "100": 0.2936
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": 2308.0,
+            "17": "nan",
+            "18": 2336.0,
+            "19": 2843.0,
+            "20": 1690.0,
+            "21": 2021.0,
+            "22": "nan",
+            "23": 2506.0,
+            "24": 2095.0,
+            "25": 2035.0,
+            "26": 1875.0,
+            "27": 2000.0,
+            "28": 2435.0,
+            "29": 2512.0,
+            "30": 2279.0,
+            "31": 1598.0,
+            "32": 2494.0,
+            "33": 2128.0,
+            "34": 1682.0,
+            "35": 1868.0,
+            "36": 1970.0,
+            "37": 2549.0,
+            "38": 2069.0,
+            "39": 2950.0,
+            "40": 1905.0,
+            "41": 3069.0,
+            "42": 2521.0,
+            "43": 2635.0,
+            "44": 1835.0,
+            "45": 2455.0,
+            "46": 2204.0,
+            "47": 2799.0,
+            "48": 2423.0,
+            "49": 1759.0,
+            "50": 2677.0,
+            "51": 2195.0,
+            "52": 2330.0,
+            "53": 3549.0,
+            "54": 2650.0,
+            "55": 2247.0,
+            "56": 2422.0,
+            "57": 2195.0,
+            "58": 3241.0,
+            "59": 2626.0,
+            "60": 2775.0,
+            "61": 2747.0,
+            "62": 2926.0,
+            "63": 2898.0,
+            "64": 3090.0,
+            "65": 2245.0,
+            "66": 3827.0,
+            "67": 2655.0,
+            "68": 3117.0,
+            "69": 2656.0,
+            "70": 3659.0,
+            "71": 2819.0,
+            "72": 2710.0,
+            "73": 3355.0,
+            "74": 2210.0,
+            "75": 2927.0,
+            "76": 3577.0,
+            "77": 3727.0,
+            "78": 3855.0,
+            "79": 4237.0,
+            "80": 3462.0,
+            "81": 5157.0,
+            "82": 3426.0,
+            "83": 3234.0,
+            "84": 3878.0,
+            "85": 3734.0,
+            "86": 3184.0,
+            "87": 4090.0,
+            "88": 3594.0,
+            "89": 4234.0,
+            "90": 3744.0,
+            "91": 2967.0,
+            "92": 4509.0,
+            "93": 3649.0,
+            "94": 4486.0,
+            "95": 4215.0,
+            "96": 3851.0,
+            "97": 4098.0,
+            "98": 5029.0,
+            "99": 3975.0,
+            "100": 3445.0
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_lts.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_lts.json
new file mode 100644
index 00000000000..a7feec6d207
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_lts.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.8595,
+            "2": 10.8576,
+            "3": 10.85984,
+            "4": 10.85001,
+            "5": 10.88364,
+            "6": 10.88818,
+            "7": 10.86353,
+            "8": 10.87179,
+            "9": 10.87022,
+            "10": 10.845,
+            "11": 10.88619,
+            "12": 10.88637,
+            "13": 10.89401,
+            "14": 10.9025,
+            "15": 10.88061,
+            "16": 10.88557,
+            "17": 10.86363,
+            "18": 10.88903,
+            "19": 10.87964,
+            "20": 10.87344,
+            "21": 10.8919,
+            "22": 10.83039,
+            "23": 10.89392,
+            "24": 10.8602,
+            "25": 10.83015,
+            "26": 10.82911,
+            "27": 10.8537,
+            "28": 10.84349,
+            "29": 10.85387,
+            "30": 10.77206,
+            "31": 10.672,
+            "32": 10.78864,
+            "33": 10.77618,
+            "34": 10.67579,
+            "35": 10.67298,
+            "36": 10.63381,
+            "37": 10.69227,
+            "38": 10.60773,
+            "39": 10.70463,
+            "40": 10.5185,
+            "41": 10.5453,
+            "42": 10.56937,
+            "43": 10.32495,
+            "44": 10.3911,
+            "45": 10.28431,
+            "46": 10.2732,
+            "47": 10.48175,
+            "48": 10.25374,
+            "49": 10.01592,
+            "50": 10.27755,
+            "51": 10.21727,
+            "52": 10.1271,
+            "53": 10.36018,
+            "54": 10.25981,
+            "55": 10.20104,
+            "56": 9.98213,
+            "57": 9.84717,
+            "58": 10.12257,
+            "59": 9.90914,
+            "60": 9.83288,
+            "61": 9.9713,
+            "62": 10.22005,
+            "63": 9.67481,
+            "64": 10.01706,
+            "65": 9.27085,
+            "66": 9.93979,
+            "67": 9.62899,
+            "68": 9.98681,
+            "69": 9.9839,
+            "70": 9.92559,
+            "71": 9.81011,
+            "72": 9.79196,
+            "73": 9.68163,
+            "74": 9.17945,
+            "75": 9.61324,
+            "76": 9.28951,
+            "77": 10.19435,
+            "78": 9.8755,
+            "79": 9.5297,
+            "80": 9.56593,
+            "81": 9.63478,
+            "82": 9.82295,
+            "83": 9.47164,
+            "84": 9.54623,
+            "85": 9.74358,
+            "86": 9.20093,
+            "87": 9.70179,
+            "88": 9.86553,
+            "89": 9.73045,
+            "90": 9.92108,
+            "91": 9.48732,
+            "92": 9.47637,
+            "93": 9.21283,
+            "94": 8.94903,
+            "95": 9.6165,
+            "96": 9.63374,
+            "97": 9.41244,
+            "98": 9.7751,
+            "99": 9.00191,
+            "100": 9.50967
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 284527616.0,
+            "2": 284527616.0,
+            "3": 284527616.0,
+            "4": 284527616.0,
+            "5": 284527616.0,
+            "6": 284527616.0,
+            "7": 284527616.0,
+            "8": 284527616.0,
+            "9": 284527616.0,
+            "10": 284527616.0,
+            "11": 284527616.0,
+            "12": 284527616.0,
+            "13": 284527616.0,
+            "14": 284527616.0,
+            "15": 284527616.0,
+            "16": 416513536.0,
+            "17": 416513536.0,
+            "18": 416513536.0,
+            "19": 416513536.0,
+            "20": 416513536.0,
+            "21": 416513536.0,
+            "22": 416513536.0,
+            "23": 416513536.0,
+            "24": 416513536.0,
+            "25": 416513536.0,
+            "26": 416513536.0,
+            "27": 416513536.0,
+            "28": 416513536.0,
+            "29": 416513536.0,
+            "30": 416513536.0,
+            "31": 416513536.0,
+            "32": 416513536.0,
+            "33": 416513536.0,
+            "34": 416513536.0,
+            "35": 416513536.0,
+            "36": 416513536.0,
+            "37": 416513536.0,
+            "38": 416513536.0,
+            "39": 416513536.0,
+            "40": 416513536.0,
+            "41": 416513536.0,
+            "42": 416513536.0,
+            "43": 416513536.0,
+            "44": 416513536.0,
+            "45": 416513536.0,
+            "46": 416513536.0,
+            "47": 416513536.0,
+            "48": 416513536.0,
+            "49": 416513536.0,
+            "50": 416513536.0,
+            "51": 416513536.0,
+            "52": 416513536.0,
+            "53": 416513536.0,
+            "54": 416513536.0,
+            "55": 416513536.0,
+            "56": 416513536.0,
+            "57": 416513536.0,
+            "58": 416513536.0,
+            "59": 416513536.0,
+            "60": 416513536.0,
+            "61": 416513536.0,
+            "62": 416513536.0,
+            "63": 416513536.0,
+            "64": 416513536.0,
+            "65": 416513536.0,
+            "66": 416513536.0,
+            "67": 416513536.0,
+            "68": 416513536.0,
+            "69": 416513536.0,
+            "70": 416513536.0,
+            "71": 416513536.0,
+            "72": 416513536.0,
+            "73": 416513536.0,
+            "74": 416513536.0,
+            "75": 416513536.0,
+            "76": 416513536.0,
+            "77": 416513536.0,
+            "78": 416513536.0,
+            "79": 416513536.0,
+            "80": 416513536.0,
+            "81": 416513536.0,
+            "82": 416513536.0,
+            "83": 416513536.0,
+            "84": 416513536.0,
+            "85": 416513536.0,
+            "86": 416513536.0,
+            "87": 416513536.0,
+            "88": 416513536.0,
+            "89": 416513536.0,
+            "90": 416513536.0,
+            "91": 416513536.0,
+            "92": 416513536.0,
+            "93": 416513536.0,
+            "94": 416513536.0,
+            "95": 416513536.0,
+            "96": 416513536.0,
+            "97": 416513536.0,
+            "98": 416513536.0,
+            "99": 416513536.0,
+            "100": 416513536.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1465367040.0,
+            "2": 1465368576.0,
+            "3": 1465368576.0,
+            "4": 1465368576.0,
+            "5": 1465368576.0,
+            "6": 1465368576.0,
+            "7": 1465368576.0,
+            "8": 1465368576.0,
+            "9": 1465368576.0,
+            "10": 1465368576.0,
+            "11": 1465368576.0,
+            "12": 1465368576.0,
+            "13": 1465368576.0,
+            "14": 1465368576.0,
+            "15": 1465368576.0,
+            "16": 1465368576.0,
+            "17": 1597092352.0,
+            "18": 1597484544.0,
+            "19": 1597485568.0,
+            "20": 1597485568.0,
+            "21": 1597485568.0,
+            "22": 1597485568.0,
+            "23": 1597485568.0,
+            "24": 1597485568.0,
+            "25": 1597485568.0,
+            "26": 1597485568.0,
+            "27": 1597485568.0,
+            "28": 1597485568.0,
+            "29": 1597485568.0,
+            "30": 1597485568.0,
+            "31": 1597485568.0,
+            "32": 1597485568.0,
+            "33": 1597485568.0,
+            "34": 1597485568.0,
+            "35": 1597485568.0,
+            "36": 1597485568.0,
+            "37": 1597485568.0,
+            "38": 1597485568.0,
+            "39": 1597485568.0,
+            "40": 1597485568.0,
+            "41": 1597485568.0,
+            "42": 1597485568.0,
+            "43": 1597485568.0,
+            "44": 1597485568.0,
+            "45": 1597485568.0,
+            "46": 1597485568.0,
+            "47": 1597485568.0,
+            "48": 1597485568.0,
+            "49": 1597485568.0,
+            "50": 1597485568.0,
+            "51": 1597485568.0,
+            "52": 1597485568.0,
+            "53": 1597485568.0,
+            "54": 1597485568.0,
+            "55": 1597485568.0,
+            "56": 1597485568.0,
+            "57": 1597485568.0,
+            "58": 1597485568.0,
+            "59": 1597485568.0,
+            "60": 1597485568.0,
+            "61": 1597485568.0,
+            "62": 1597485568.0,
+            "63": 1597485568.0,
+            "64": 1597485568.0,
+            "65": 1597485568.0,
+            "66": 1597485568.0,
+            "67": 1597485568.0,
+            "68": 1597485568.0,
+            "69": 1597485568.0,
+            "70": 1597485568.0,
+            "71": 1597485568.0,
+            "72": 1597485568.0,
+            "73": 1597485568.0,
+            "74": 1597485568.0,
+            "75": 1597485568.0,
+            "76": 1597485568.0,
+            "77": 1597485568.0,
+            "78": 1597485568.0,
+            "79": 1597485568.0,
+            "80": 1597485568.0,
+            "81": 1597485568.0,
+            "82": 1597485568.0,
+            "83": 1597485568.0,
+            "84": 1597485568.0,
+            "85": 1597485568.0,
+            "86": 1597485568.0,
+            "87": 1597485568.0,
+            "88": 1597485568.0,
+            "89": 1597485568.0,
+            "90": 1597485568.0,
+            "91": 1597485568.0,
+            "92": 1597485568.0,
+            "93": 1597485568.0,
+            "94": 1597485568.0,
+            "95": 1597485568.0,
+            "96": 1597485568.0,
+            "97": 1597485568.0,
+            "98": 1597485568.0,
+            "99": 1597485568.0,
+            "100": 1597485568.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 3.79491,
+            "2": 0.31619,
+            "3": 0.28095,
+            "4": 0.28167,
+            "5": 0.28111,
+            "6": 0.28053,
+            "7": 0.27397,
+            "8": 0.27382,
+            "9": 0.2729,
+            "10": 0.28341,
+            "11": 0.27301,
+            "12": 0.27739,
+            "13": 0.27433,
+            "14": 0.27589,
+            "15": 0.27377,
+            "16": 0.45757,
+            "17": 0.28453,
+            "18": 0.29428,
+            "19": 0.29061,
+            "20": 0.28748,
+            "21": 0.28947,
+            "22": 0.27305,
+            "23": 0.28802,
+            "24": 0.29052,
+            "25": 0.28857,
+            "26": 0.28938,
+            "27": 0.28789,
+            "28": 0.29629,
+            "29": 0.29633,
+            "30": 0.29452,
+            "31": 0.29612,
+            "32": 0.29638,
+            "33": 0.29802,
+            "34": 0.29681,
+            "35": 0.29807,
+            "36": 0.29648,
+            "37": 0.29732,
+            "38": 0.29691,
+            "39": 0.29854,
+            "40": 0.29738,
+            "41": 0.29825,
+            "42": 0.29748,
+            "43": 0.29734,
+            "44": 0.29746,
+            "45": 0.29836,
+            "46": 0.29887,
+            "47": 0.29739,
+            "48": 0.29926,
+            "49": 0.29788,
+            "50": 0.29895,
+            "51": 0.29828,
+            "52": 0.29254,
+            "53": 0.29242,
+            "54": 0.29234,
+            "55": 0.29132,
+            "56": 0.29158,
+            "57": 0.29153,
+            "58": 0.29113,
+            "59": 0.29239,
+            "60": 0.29622,
+            "61": 0.29477,
+            "62": 0.29358,
+            "63": 0.29465,
+            "64": 0.29445,
+            "65": 0.29533,
+            "66": 0.29531,
+            "67": 0.29354,
+            "68": 0.29434,
+            "69": 0.29379,
+            "70": 0.29468,
+            "71": 0.29423,
+            "72": 0.29484,
+            "73": 0.29584,
+            "74": 0.2949,
+            "75": 0.29561,
+            "76": 0.29586,
+            "77": 0.29535,
+            "78": 0.29491,
+            "79": 0.29562,
+            "80": 0.29515,
+            "81": 0.2951,
+            "82": 0.29484,
+            "83": 0.2928,
+            "84": 0.29076,
+            "85": 0.29104,
+            "86": 0.29208,
+            "87": 0.29116,
+            "88": 0.29384,
+            "89": 0.29252,
+            "90": 0.29125,
+            "91": 0.29184,
+            "92": 0.29214,
+            "93": 0.29151,
+            "94": 0.29183,
+            "95": 0.29202,
+            "96": 0.29138,
+            "97": 0.29213,
+            "98": 0.2924,
+            "99": 0.29154,
+            "100": 0.29129
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": 2308.0,
+            "17": "nan",
+            "18": 2336.0,
+            "19": 2843.0,
+            "20": 1690.0,
+            "21": 2021.0,
+            "22": "nan",
+            "23": 2506.0,
+            "24": 2095.0,
+            "25": 2035.0,
+            "26": 1875.0,
+            "27": 2000.0,
+            "28": 2435.0,
+            "29": 2512.0,
+            "30": 2279.0,
+            "31": 1598.0,
+            "32": 2494.0,
+            "33": 2128.0,
+            "34": 1682.0,
+            "35": 1868.0,
+            "36": 1970.0,
+            "37": 2549.0,
+            "38": 2069.0,
+            "39": 2950.0,
+            "40": 1905.0,
+            "41": 3069.0,
+            "42": 2521.0,
+            "43": 2635.0,
+            "44": 1835.0,
+            "45": 2455.0,
+            "46": 2204.0,
+            "47": 2799.0,
+            "48": 2423.0,
+            "49": 1759.0,
+            "50": 2677.0,
+            "51": 2195.0,
+            "52": 2330.0,
+            "53": 3549.0,
+            "54": 2650.0,
+            "55": 2247.0,
+            "56": 2422.0,
+            "57": 2195.0,
+            "58": 3241.0,
+            "59": 2626.0,
+            "60": 2775.0,
+            "61": 2747.0,
+            "62": 2926.0,
+            "63": 2898.0,
+            "64": 3090.0,
+            "65": 2245.0,
+            "66": 3827.0,
+            "67": 2655.0,
+            "68": 3117.0,
+            "69": 2656.0,
+            "70": 3659.0,
+            "71": 2819.0,
+            "72": 2710.0,
+            "73": 3355.0,
+            "74": 2210.0,
+            "75": 2927.0,
+            "76": 3577.0,
+            "77": 3727.0,
+            "78": 3855.0,
+            "79": 4237.0,
+            "80": 3462.0,
+            "81": 5157.0,
+            "82": 3426.0,
+            "83": 3234.0,
+            "84": 3878.0,
+            "85": 3734.0,
+            "86": 3184.0,
+            "87": 4090.0,
+            "88": 3594.0,
+            "89": 4234.0,
+            "90": 3744.0,
+            "91": 2967.0,
+            "92": 4509.0,
+            "93": 3649.0,
+            "94": 4486.0,
+            "95": 4215.0,
+            "96": 3851.0,
+            "97": 4098.0,
+            "98": 5029.0,
+            "99": 3975.0,
+            "100": 3445.0
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_lts_dgx_a100.json
index f9728255608..0eef09cf2c1 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_lts_dgx_a100.json
@@ -1 +1,537 @@
-{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.86104, "5": 10.88371, "10": 10.84263, "15": 10.87935, "20": 10.87405, "25": 10.82867, "30": 10.77191, "35": 10.67622, "40": 10.517, "45": 10.28436, "50": 10.27861, "55": 10.20112, "60": 9.83306, "65": 9.26979, "70": 9.92662, "75": 9.61385, "80": 9.56419, "85": 9.74319, "90": 9.92148, "95": 9.6163, "100": 9.5087}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 284527616.0, "5": 284527616.0, "10": 284527616.0, "15": 284527616.0, "20": 416513536.0, "25": 416513536.0, "30": 416513536.0, "35": 416513536.0, "40": 416513536.0, "45": 416513536.0, "50": 416513536.0, "55": 416513536.0, "60": 416513536.0, "65": 416513536.0, "70": 416513536.0, "75": 416513536.0, "80": 416513536.0, "85": 416513536.0, "90": 416513536.0, "95": 416513536.0, "100": 416513536.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1465368064.0, "5": 1465368576.0, "10": 1465368576.0, "15": 1465368576.0, "20": 1596305920.0, "25": 1596305920.0, "30": 1596305920.0, "35": 1596305920.0, "40": 1596305920.0, "45": 1596305920.0, "50": 1596305920.0, "55": 1596305920.0, "60": 1596305920.0, "65": 1596305920.0, "70": 1596305920.0, "75": 1596305920.0, "80": 1596305920.0, "85": 1596305920.0, "90": 1596305920.0, "95": 1596305920.0, "100": 1596305920.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 8.79594, "5": 0.30836, "10": 0.30237, "15": 0.30482, "20": 0.31834, "25": 0.31761, "30": 0.31732, "35": 0.31511, "40": 0.31755, "45": 0.31516, "50": 0.31689, "55": 0.31106, "60": 0.31323, "65": 0.31051, "70": 0.31046, "75": 0.31201, "80": 0.30466, "85": 0.3063, "90": 0.30526, "95": 0.30371, "100": 0.30515}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": "nan", "5": "nan", "10": "nan", "15": "nan", "20": 1777.0, "25": 2124.0, "30": 2262.0, "35": 1763.0, "40": 1875.0, "45": 2434.0, "50": 2634.0, "55": 2383.0, "60": 2832.0, "65": 2223.0, "70": 3836.0, "75": 2913.0, "80": 3398.0, "85": 3618.0, "90": 3676.0, "95": 4082.0, "100": 3502.0}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.8595,
+            "2": 10.8576,
+            "3": 10.85984,
+            "4": 10.85001,
+            "5": 10.88364,
+            "6": 10.88818,
+            "7": 10.86353,
+            "8": 10.87179,
+            "9": 10.87022,
+            "10": 10.845,
+            "11": 10.88619,
+            "12": 10.88637,
+            "13": 10.89401,
+            "14": 10.9025,
+            "15": 10.88061,
+            "16": 10.88557,
+            "17": 10.86363,
+            "18": 10.88903,
+            "19": 10.87964,
+            "20": 10.87344,
+            "21": 10.8919,
+            "22": 10.83039,
+            "23": 10.89392,
+            "24": 10.8602,
+            "25": 10.83015,
+            "26": 10.82911,
+            "27": 10.8537,
+            "28": 10.84349,
+            "29": 10.85387,
+            "30": 10.77206,
+            "31": 10.672,
+            "32": 10.78864,
+            "33": 10.77618,
+            "34": 10.67579,
+            "35": 10.67298,
+            "36": 10.63381,
+            "37": 10.69227,
+            "38": 10.60773,
+            "39": 10.70463,
+            "40": 10.5185,
+            "41": 10.5453,
+            "42": 10.56937,
+            "43": 10.32495,
+            "44": 10.3911,
+            "45": 10.28431,
+            "46": 10.2732,
+            "47": 10.48175,
+            "48": 10.25374,
+            "49": 10.01592,
+            "50": 10.27755,
+            "51": 10.21727,
+            "52": 10.1271,
+            "53": 10.36018,
+            "54": 10.25981,
+            "55": 10.20104,
+            "56": 9.98213,
+            "57": 9.84717,
+            "58": 10.12257,
+            "59": 9.90914,
+            "60": 9.83288,
+            "61": 9.9713,
+            "62": 10.22005,
+            "63": 9.67481,
+            "64": 10.01706,
+            "65": 9.27085,
+            "66": 9.93979,
+            "67": 9.62899,
+            "68": 9.98681,
+            "69": 9.9839,
+            "70": 9.92559,
+            "71": 9.81011,
+            "72": 9.79196,
+            "73": 9.68163,
+            "74": 9.17945,
+            "75": 9.61324,
+            "76": 9.28951,
+            "77": 10.19435,
+            "78": 9.8755,
+            "79": 9.5297,
+            "80": 9.56593,
+            "81": 9.63478,
+            "82": 9.82295,
+            "83": 9.47164,
+            "84": 9.54623,
+            "85": 9.74358,
+            "86": 9.20093,
+            "87": 9.70179,
+            "88": 9.86553,
+            "89": 9.73045,
+            "90": 9.92108,
+            "91": 9.48732,
+            "92": 9.47637,
+            "93": 9.21283,
+            "94": 8.94903,
+            "95": 9.6165,
+            "96": 9.63374,
+            "97": 9.41244,
+            "98": 9.7751,
+            "99": 9.00191,
+            "100": 9.50967
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 284527616.0,
+            "2": 284527616.0,
+            "3": 284527616.0,
+            "4": 284527616.0,
+            "5": 284527616.0,
+            "6": 284527616.0,
+            "7": 284527616.0,
+            "8": 284527616.0,
+            "9": 284527616.0,
+            "10": 284527616.0,
+            "11": 284527616.0,
+            "12": 284527616.0,
+            "13": 284527616.0,
+            "14": 284527616.0,
+            "15": 284527616.0,
+            "16": 416513536.0,
+            "17": 416513536.0,
+            "18": 416513536.0,
+            "19": 416513536.0,
+            "20": 416513536.0,
+            "21": 416513536.0,
+            "22": 416513536.0,
+            "23": 416513536.0,
+            "24": 416513536.0,
+            "25": 416513536.0,
+            "26": 416513536.0,
+            "27": 416513536.0,
+            "28": 416513536.0,
+            "29": 416513536.0,
+            "30": 416513536.0,
+            "31": 416513536.0,
+            "32": 416513536.0,
+            "33": 416513536.0,
+            "34": 416513536.0,
+            "35": 416513536.0,
+            "36": 416513536.0,
+            "37": 416513536.0,
+            "38": 416513536.0,
+            "39": 416513536.0,
+            "40": 416513536.0,
+            "41": 416513536.0,
+            "42": 416513536.0,
+            "43": 416513536.0,
+            "44": 416513536.0,
+            "45": 416513536.0,
+            "46": 416513536.0,
+            "47": 416513536.0,
+            "48": 416513536.0,
+            "49": 416513536.0,
+            "50": 416513536.0,
+            "51": 416513536.0,
+            "52": 416513536.0,
+            "53": 416513536.0,
+            "54": 416513536.0,
+            "55": 416513536.0,
+            "56": 416513536.0,
+            "57": 416513536.0,
+            "58": 416513536.0,
+            "59": 416513536.0,
+            "60": 416513536.0,
+            "61": 416513536.0,
+            "62": 416513536.0,
+            "63": 416513536.0,
+            "64": 416513536.0,
+            "65": 416513536.0,
+            "66": 416513536.0,
+            "67": 416513536.0,
+            "68": 416513536.0,
+            "69": 416513536.0,
+            "70": 416513536.0,
+            "71": 416513536.0,
+            "72": 416513536.0,
+            "73": 416513536.0,
+            "74": 416513536.0,
+            "75": 416513536.0,
+            "76": 416513536.0,
+            "77": 416513536.0,
+            "78": 416513536.0,
+            "79": 416513536.0,
+            "80": 416513536.0,
+            "81": 416513536.0,
+            "82": 416513536.0,
+            "83": 416513536.0,
+            "84": 416513536.0,
+            "85": 416513536.0,
+            "86": 416513536.0,
+            "87": 416513536.0,
+            "88": 416513536.0,
+            "89": 416513536.0,
+            "90": 416513536.0,
+            "91": 416513536.0,
+            "92": 416513536.0,
+            "93": 416513536.0,
+            "94": 416513536.0,
+            "95": 416513536.0,
+            "96": 416513536.0,
+            "97": 416513536.0,
+            "98": 416513536.0,
+            "99": 416513536.0,
+            "100": 416513536.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1465367040.0,
+            "2": 1465367040.0,
+            "3": 1465368064.0,
+            "4": 1465368576.0,
+            "5": 1465368576.0,
+            "6": 1465368576.0,
+            "7": 1465368576.0,
+            "8": 1465368576.0,
+            "9": 1465368576.0,
+            "10": 1465368576.0,
+            "11": 1465368576.0,
+            "12": 1465368576.0,
+            "13": 1465368576.0,
+            "14": 1465368576.0,
+            "15": 1465368576.0,
+            "16": 1465368576.0,
+            "17": 1597485568.0,
+            "18": 1597485568.0,
+            "19": 1597485568.0,
+            "20": 1597485568.0,
+            "21": 1597485568.0,
+            "22": 1597485568.0,
+            "23": 1597485568.0,
+            "24": 1597485568.0,
+            "25": 1597485568.0,
+            "26": 1597485568.0,
+            "27": 1597485568.0,
+            "28": 1597485568.0,
+            "29": 1597485568.0,
+            "30": 1597485568.0,
+            "31": 1597485568.0,
+            "32": 1597485568.0,
+            "33": 1597485568.0,
+            "34": 1597485568.0,
+            "35": 1597485568.0,
+            "36": 1597485568.0,
+            "37": 1597485568.0,
+            "38": 1597485568.0,
+            "39": 1597485568.0,
+            "40": 1597485568.0,
+            "41": 1597485568.0,
+            "42": 1597485568.0,
+            "43": 1597485568.0,
+            "44": 1597485568.0,
+            "45": 1597485568.0,
+            "46": 1597485568.0,
+            "47": 1597485568.0,
+            "48": 1597485568.0,
+            "49": 1597485568.0,
+            "50": 1597485568.0,
+            "51": 1597485568.0,
+            "52": 1597485568.0,
+            "53": 1597485568.0,
+            "54": 1597485568.0,
+            "55": 1597485568.0,
+            "56": 1597485568.0,
+            "57": 1597485568.0,
+            "58": 1597485568.0,
+            "59": 1597485568.0,
+            "60": 1597485568.0,
+            "61": 1597485568.0,
+            "62": 1597485568.0,
+            "63": 1597485568.0,
+            "64": 1597485568.0,
+            "65": 1597485568.0,
+            "66": 1597485568.0,
+            "67": 1597485568.0,
+            "68": 1597485568.0,
+            "69": 1597485568.0,
+            "70": 1597485568.0,
+            "71": 1597485568.0,
+            "72": 1597485568.0,
+            "73": 1597485568.0,
+            "74": 1597485568.0,
+            "75": 1597485568.0,
+            "76": 1597485568.0,
+            "77": 1597485568.0,
+            "78": 1597485568.0,
+            "79": 1597485568.0,
+            "80": 1597485568.0,
+            "81": 1597485568.0,
+            "82": 1597485568.0,
+            "83": 1597485568.0,
+            "84": 1597485568.0,
+            "85": 1597485568.0,
+            "86": 1597485568.0,
+            "87": 1597485568.0,
+            "88": 1597485568.0,
+            "89": 1597485568.0,
+            "90": 1597485568.0,
+            "91": 1597485568.0,
+            "92": 1597485568.0,
+            "93": 1597485568.0,
+            "94": 1597485568.0,
+            "95": 1597485568.0,
+            "96": 1597485568.0,
+            "97": 1597485568.0,
+            "98": 1597485568.0,
+            "99": 1597485568.0,
+            "100": 1597485568.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 8.02782,
+            "2": 0.31435,
+            "3": 0.27957,
+            "4": 0.27933,
+            "5": 0.27866,
+            "6": 0.27855,
+            "7": 0.2779,
+            "8": 0.27621,
+            "9": 0.27704,
+            "10": 0.27611,
+            "11": 0.27501,
+            "12": 0.27489,
+            "13": 0.27468,
+            "14": 0.27386,
+            "15": 0.27315,
+            "16": 0.41595,
+            "17": 0.27523,
+            "18": 0.28979,
+            "19": 0.28871,
+            "20": 0.2888,
+            "21": 0.28867,
+            "22": 0.27653,
+            "23": 0.29205,
+            "24": 0.29078,
+            "25": 0.29104,
+            "26": 0.29087,
+            "27": 0.28794,
+            "28": 0.28784,
+            "29": 0.28659,
+            "30": 0.28669,
+            "31": 0.28638,
+            "32": 0.2878,
+            "33": 0.28717,
+            "34": 0.28616,
+            "35": 0.28626,
+            "36": 0.28648,
+            "37": 0.28977,
+            "38": 0.28615,
+            "39": 0.2864,
+            "40": 0.28588,
+            "41": 0.28749,
+            "42": 0.28735,
+            "43": 0.28605,
+            "44": 0.28798,
+            "45": 0.2882,
+            "46": 0.28727,
+            "47": 0.28616,
+            "48": 0.28603,
+            "49": 0.2876,
+            "50": 0.29155,
+            "51": 0.30309,
+            "52": 0.29889,
+            "53": 0.29736,
+            "54": 0.29772,
+            "55": 0.29611,
+            "56": 0.29565,
+            "57": 0.29413,
+            "58": 0.29391,
+            "59": 0.29344,
+            "60": 0.29428,
+            "61": 0.29695,
+            "62": 0.29282,
+            "63": 0.29418,
+            "64": 0.29352,
+            "65": 0.29274,
+            "66": 0.29449,
+            "67": 0.29627,
+            "68": 0.29636,
+            "69": 0.29393,
+            "70": 0.28967,
+            "71": 0.28925,
+            "72": 0.28962,
+            "73": 0.28944,
+            "74": 0.28948,
+            "75": 0.28996,
+            "76": 0.28938,
+            "77": 0.28855,
+            "78": 0.28891,
+            "79": 0.28905,
+            "80": 0.28968,
+            "81": 0.28873,
+            "82": 0.28966,
+            "83": 0.2884,
+            "84": 0.28842,
+            "85": 0.29077,
+            "86": 0.28927,
+            "87": 0.28888,
+            "88": 0.28909,
+            "89": 0.28807,
+            "90": 0.28887,
+            "91": 0.28894,
+            "92": 0.28908,
+            "93": 0.28985,
+            "94": 0.289,
+            "95": 0.28861,
+            "96": 0.28831,
+            "97": 0.2877,
+            "98": 0.29019,
+            "99": 0.28839,
+            "100": 0.2881
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": 2308.0,
+            "17": "nan",
+            "18": 2336.0,
+            "19": 2843.0,
+            "20": 1690.0,
+            "21": 2021.0,
+            "22": "nan",
+            "23": 2506.0,
+            "24": 2095.0,
+            "25": 2035.0,
+            "26": 1875.0,
+            "27": 2000.0,
+            "28": 2435.0,
+            "29": 2512.0,
+            "30": 2279.0,
+            "31": 1598.0,
+            "32": 2494.0,
+            "33": 2128.0,
+            "34": 1682.0,
+            "35": 1868.0,
+            "36": 1970.0,
+            "37": 2549.0,
+            "38": 2069.0,
+            "39": 2950.0,
+            "40": 1905.0,
+            "41": 3069.0,
+            "42": 2521.0,
+            "43": 2635.0,
+            "44": 1835.0,
+            "45": 2455.0,
+            "46": 2204.0,
+            "47": 2799.0,
+            "48": 2423.0,
+            "49": 1759.0,
+            "50": 2677.0,
+            "51": 2195.0,
+            "52": 2330.0,
+            "53": 3549.0,
+            "54": 2650.0,
+            "55": 2247.0,
+            "56": 2422.0,
+            "57": 2195.0,
+            "58": 3241.0,
+            "59": 2626.0,
+            "60": 2775.0,
+            "61": 2747.0,
+            "62": 2926.0,
+            "63": 2898.0,
+            "64": 3090.0,
+            "65": 2245.0,
+            "66": 3827.0,
+            "67": 2655.0,
+            "68": 3117.0,
+            "69": 2656.0,
+            "70": 3659.0,
+            "71": 2819.0,
+            "72": 2710.0,
+            "73": 3355.0,
+            "74": 2210.0,
+            "75": 2927.0,
+            "76": 3577.0,
+            "77": 3727.0,
+            "78": 3855.0,
+            "79": 4237.0,
+            "80": 3462.0,
+            "81": 5157.0,
+            "82": 3426.0,
+            "83": 3234.0,
+            "84": 3878.0,
+            "85": 3734.0,
+            "86": 3184.0,
+            "87": 4090.0,
+            "88": 3594.0,
+            "89": 4234.0,
+            "90": 3744.0,
+            "91": 2967.0,
+            "92": 4509.0,
+            "93": 3649.0,
+            "94": 4486.0,
+            "95": 4215.0,
+            "96": 3851.0,
+            "97": 4098.0,
+            "98": 5029.0,
+            "99": 3975.0,
+            "100": 3445.0
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch/golden_values_dev_dgx_h100.json
index 024ac59e078..12a9b70df83 100644
--- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch/golden_values_dev_dgx_h100.json
@@ -174,5 +174,5 @@
             -0.7878209352493286
         ]
     },
-    "throughput": 104.98559493782837
+    "throughput": [104.98559493782837, 104.98559493782837]
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_logitsmatch_decode_graphs_only/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_logitsmatch_decode_graphs_only/golden_values_dev_dgx_h100.json
index 18a5978a127..8e07dfee229 100644
--- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_logitsmatch_decode_graphs_only/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_logitsmatch_decode_graphs_only/golden_values_dev_dgx_h100.json
@@ -174,5 +174,5 @@
             -0.7878209352493286
         ]
     },
-    "throughput": 79.88988160240554
+    "throughput": [79.88988160240554, 79.88988160240554]
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_logitsmatch/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_logitsmatch/golden_values_dev_dgx_h100.json
index 3ba402cbcae..dd8b08e446f 100644
--- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_logitsmatch/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_logitsmatch/golden_values_dev_dgx_h100.json
@@ -2695,5 +2695,5 @@
             -0.00032085992279462516
         ]
     },
-    "throughput": 107.66332959870442
+    "throughput": [107.66332959870442, 107.66332959870442]
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_583m_logitsmatch/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_583m_logitsmatch/golden_values_dev_dgx_h100.json
index fe938d51e4f..6ef98105cbd 100644
--- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_583m_logitsmatch/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_583m_logitsmatch/golden_values_dev_dgx_h100.json
@@ -157,5 +157,5 @@
             -0.0585334412753582
         ]
     },
-    "throughput": 13.93210545115292
+    "throughput": [13.93210545115292, 13.93210545115292]
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_lts.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_lts.json
new file mode 100644
index 00000000000..d547773495d
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_lts.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.81189,
+            "2": 10.82837,
+            "3": 10.81793,
+            "4": 10.80617,
+            "5": 10.84927,
+            "6": 10.86922,
+            "7": 10.84362,
+            "8": 10.83419,
+            "9": 10.84988,
+            "10": 10.78408,
+            "11": 10.88038,
+            "12": 10.85362,
+            "13": 10.86619,
+            "14": 10.88223,
+            "15": 10.82311,
+            "16": 10.81052,
+            "17": 10.78202,
+            "18": 10.81541,
+            "19": 10.81756,
+            "20": 10.74584,
+            "21": 10.72627,
+            "22": 10.60022,
+            "23": 10.74769,
+            "24": 10.6365,
+            "25": 10.59453,
+            "26": 10.64799,
+            "27": 10.66378,
+            "28": 10.64562,
+            "29": 10.65242,
+            "30": 10.44192,
+            "31": 10.21776,
+            "32": 10.52786,
+            "33": 10.52222,
+            "34": 10.30567,
+            "35": 10.35142,
+            "36": 10.32502,
+            "37": 10.42585,
+            "38": 10.28522,
+            "39": 10.47826,
+            "40": 10.19414,
+            "41": 10.22953,
+            "42": 10.29398,
+            "43": 9.95703,
+            "44": 10.06609,
+            "45": 9.95939,
+            "46": 9.92711,
+            "47": 10.22774,
+            "48": 9.96376,
+            "49": 9.65724,
+            "50": 10.01644
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 31381.0,
+            "2": 33910.0,
+            "3": 33570.0,
+            "4": 31384.0,
+            "5": 35903.0,
+            "6": 37599.0,
+            "7": 35525.0,
+            "8": 31635.0,
+            "9": 34509.0,
+            "10": 30410.0,
+            "11": 37757.0,
+            "12": 35967.0,
+            "13": 36886.0,
+            "14": 38227.0,
+            "15": 35788.0,
+            "16": 36247.0,
+            "17": 35378.0,
+            "18": 35284.0,
+            "19": 36213.0,
+            "20": 33124.0,
+            "21": 33424.0,
+            "22": 31395.0,
+            "23": 38414.0,
+            "24": 32277.0,
+            "25": 31634.0,
+            "26": 35264.0,
+            "27": 35935.0,
+            "28": 37638.0,
+            "29": 38145.0,
+            "30": 33261.0,
+            "31": 30972.0,
+            "32": 36646.0,
+            "33": 38318.0,
+            "34": 33243.0,
+            "35": 34813.0,
+            "36": 35446.0,
+            "37": 38278.0,
+            "38": 35590.0,
+            "39": 38681.0,
+            "40": 36027.0,
+            "41": 36509.0,
+            "42": 37301.0,
+            "43": 34597.0,
+            "44": 34024.0,
+            "45": 35938.0,
+            "46": 37229.0,
+            "47": 40862.0,
+            "48": 35537.0,
+            "49": 35095.0,
+            "50": 39711.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 880441344.0,
+            "2": 880437760.0,
+            "3": 880442368.0,
+            "4": 880448512.0,
+            "5": 880440832.0,
+            "6": 880441344.0,
+            "7": 880445440.0,
+            "8": 880442880.0,
+            "9": 880440320.0,
+            "10": 880443392.0,
+            "11": 880442368.0,
+            "12": 880439808.0,
+            "13": 880439808.0,
+            "14": 880443904.0,
+            "15": 880443904.0,
+            "16": 880441856.0,
+            "17": 880448512.0,
+            "18": 880437760.0,
+            "19": 880440320.0,
+            "20": 880442880.0,
+            "21": 880445440.0,
+            "22": 880450048.0,
+            "23": 880449024.0,
+            "24": 880440320.0,
+            "25": 880440320.0,
+            "26": 880445952.0,
+            "27": 880441344.0,
+            "28": 880439808.0,
+            "29": 880443392.0,
+            "30": 880441344.0,
+            "31": 880454656.0,
+            "32": 880444928.0,
+            "33": 880440320.0,
+            "34": 880440832.0,
+            "35": 880446976.0,
+            "36": 880441344.0,
+            "37": 880442880.0,
+            "38": 880439808.0,
+            "39": 880441856.0,
+            "40": 880442880.0,
+            "41": 880445952.0,
+            "42": 880441344.0,
+            "43": 880444416.0,
+            "44": 880439808.0,
+            "45": 880440832.0,
+            "46": 880447488.0,
+            "47": 880440320.0,
+            "48": 880441856.0,
+            "49": 880444416.0,
+            "50": 880442368.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 2909851648.0,
+            "2": 3151821824.0,
+            "3": 3152806912.0,
+            "4": 3156619264.0,
+            "5": 3156619264.0,
+            "6": 3156619264.0,
+            "7": 3156619264.0,
+            "8": 3156619264.0,
+            "9": 3156619264.0,
+            "10": 3156619264.0,
+            "11": 3156619264.0,
+            "12": 3156619264.0,
+            "13": 3156619264.0,
+            "14": 3156619264.0,
+            "15": 3156619264.0,
+            "16": 3156619264.0,
+            "17": 3156619264.0,
+            "18": 3156619264.0,
+            "19": 3156619264.0,
+            "20": 3156619264.0,
+            "21": 3156619264.0,
+            "22": 3156619264.0,
+            "23": 3156619264.0,
+            "24": 3156619264.0,
+            "25": 3156619264.0,
+            "26": 3156619264.0,
+            "27": 3156619264.0,
+            "28": 3156619264.0,
+            "29": 3156619264.0,
+            "30": 3156619264.0,
+            "31": 3167942656.0,
+            "32": 3167942656.0,
+            "33": 3167942656.0,
+            "34": 3167942656.0,
+            "35": 3167942656.0,
+            "36": 3167942656.0,
+            "37": 3167942656.0,
+            "38": 3167942656.0,
+            "39": 3167942656.0,
+            "40": 3167942656.0,
+            "41": 3167942656.0,
+            "42": 3167942656.0,
+            "43": 3167942656.0,
+            "44": 3167942656.0,
+            "45": 3167942656.0,
+            "46": 3167942656.0,
+            "47": 3167942656.0,
+            "48": 3167942656.0,
+            "49": 3167942656.0,
+            "50": 3167942656.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 6.71086,
+            "2": 0.34876,
+            "3": 0.30494,
+            "4": 0.2983,
+            "5": 0.31094,
+            "6": 0.29581,
+            "7": 0.29535,
+            "8": 0.29392,
+            "9": 0.31226,
+            "10": 0.29284,
+            "11": 0.29297,
+            "12": 0.29417,
+            "13": 0.29411,
+            "14": 0.29617,
+            "15": 0.30137,
+            "16": 0.29884,
+            "17": 0.29723,
+            "18": 0.2965,
+            "19": 0.29582,
+            "20": 0.29827,
+            "21": 0.29422,
+            "22": 0.29464,
+            "23": 0.29372,
+            "24": 0.29476,
+            "25": 0.29448,
+            "26": 0.29512,
+            "27": 0.29393,
+            "28": 0.2949,
+            "29": 0.29363,
+            "30": 0.29407,
+            "31": 0.29478,
+            "32": 0.2959,
+            "33": 0.29578,
+            "34": 0.29539,
+            "35": 0.295,
+            "36": 0.29765,
+            "37": 0.29552,
+            "38": 0.2962,
+            "39": 0.29593,
+            "40": 0.29517,
+            "41": 0.29558,
+            "42": 0.2974,
+            "43": 0.29483,
+            "44": 0.29357,
+            "45": 0.29143,
+            "46": 0.29287,
+            "47": 0.28977,
+            "48": 0.30711,
+            "49": 0.29644,
+            "50": 0.29278
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_lts_dgx_a100.json
index 63ffa2aa706..f91ad30ed3a 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_lts_dgx_a100.json
@@ -1 +1,287 @@
-{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.8028, "5": 10.85317, "10": 10.79253, "15": 10.81762, "20": 10.74263, "25": 10.58726, "30": 10.43425, "35": 10.35057, "40": 10.18634, "45": 9.95891, "50": 10.0194}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 31321.0, "5": 36637.0, "10": 30318.0, "15": 35096.0, "20": 33013.0, "25": 30677.0, "30": 32775.0, "35": 34420.0, "40": 36074.0, "45": 35624.0, "50": 39159.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 863505920.0, "5": 863502848.0, "10": 863507456.0, "15": 863503872.0, "20": 863504896.0, "25": 863506944.0, "30": 863508992.0, "35": 863511040.0, "40": 863508480.0, "45": 863503872.0, "50": 863505920.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 2741182976.0, "5": 2984047104.0, "10": 2984047104.0, "15": 2984047104.0, "20": 2985508864.0, "25": 2985508864.0, "30": 2985508864.0, "35": 2986774016.0, "40": 2988336640.0, "45": 2988336640.0, "50": 2990856704.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 30.77131, "5": 0.27562, "10": 0.2823, "15": 0.2732, "20": 0.2737, "25": 0.28195, "30": 0.27083, "35": 0.27437, "40": 0.26787, "45": 0.26819, "50": 0.28362}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.81189,
+            "2": 10.82837,
+            "3": 10.81793,
+            "4": 10.80617,
+            "5": 10.84927,
+            "6": 10.86922,
+            "7": 10.84362,
+            "8": 10.83419,
+            "9": 10.84988,
+            "10": 10.78408,
+            "11": 10.88038,
+            "12": 10.85362,
+            "13": 10.86619,
+            "14": 10.88223,
+            "15": 10.82311,
+            "16": 10.81052,
+            "17": 10.78202,
+            "18": 10.81541,
+            "19": 10.81756,
+            "20": 10.74584,
+            "21": 10.72627,
+            "22": 10.60022,
+            "23": 10.74769,
+            "24": 10.6365,
+            "25": 10.59453,
+            "26": 10.64799,
+            "27": 10.66378,
+            "28": 10.64562,
+            "29": 10.65242,
+            "30": 10.44192,
+            "31": 10.21776,
+            "32": 10.52786,
+            "33": 10.52222,
+            "34": 10.30567,
+            "35": 10.35142,
+            "36": 10.32502,
+            "37": 10.42585,
+            "38": 10.28522,
+            "39": 10.47826,
+            "40": 10.19414,
+            "41": 10.22953,
+            "42": 10.29398,
+            "43": 9.95703,
+            "44": 10.06609,
+            "45": 9.95939,
+            "46": 9.92711,
+            "47": 10.22774,
+            "48": 9.96376,
+            "49": 9.65724,
+            "50": 10.01644
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 31381.0,
+            "2": 33910.0,
+            "3": 33570.0,
+            "4": 31384.0,
+            "5": 35903.0,
+            "6": 37599.0,
+            "7": 35525.0,
+            "8": 31635.0,
+            "9": 34509.0,
+            "10": 30410.0,
+            "11": 37757.0,
+            "12": 35967.0,
+            "13": 36886.0,
+            "14": 38227.0,
+            "15": 35788.0,
+            "16": 36247.0,
+            "17": 35378.0,
+            "18": 35284.0,
+            "19": 36213.0,
+            "20": 33124.0,
+            "21": 33424.0,
+            "22": 31395.0,
+            "23": 38414.0,
+            "24": 32277.0,
+            "25": 31634.0,
+            "26": 35264.0,
+            "27": 35935.0,
+            "28": 37638.0,
+            "29": 38145.0,
+            "30": 33261.0,
+            "31": 30972.0,
+            "32": 36646.0,
+            "33": 38318.0,
+            "34": 33243.0,
+            "35": 34813.0,
+            "36": 35446.0,
+            "37": 38278.0,
+            "38": 35590.0,
+            "39": 38681.0,
+            "40": 36027.0,
+            "41": 36509.0,
+            "42": 37301.0,
+            "43": 34597.0,
+            "44": 34024.0,
+            "45": 35938.0,
+            "46": 37229.0,
+            "47": 40862.0,
+            "48": 35537.0,
+            "49": 35095.0,
+            "50": 39711.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 880441344.0,
+            "2": 880437760.0,
+            "3": 880442368.0,
+            "4": 880448512.0,
+            "5": 880440832.0,
+            "6": 880441344.0,
+            "7": 880445440.0,
+            "8": 880442880.0,
+            "9": 880440320.0,
+            "10": 880443392.0,
+            "11": 880442368.0,
+            "12": 880439808.0,
+            "13": 880439808.0,
+            "14": 880443904.0,
+            "15": 880443904.0,
+            "16": 880441856.0,
+            "17": 880448512.0,
+            "18": 880437760.0,
+            "19": 880440320.0,
+            "20": 880442880.0,
+            "21": 880445440.0,
+            "22": 880450048.0,
+            "23": 880449024.0,
+            "24": 880440320.0,
+            "25": 880440320.0,
+            "26": 880445952.0,
+            "27": 880441344.0,
+            "28": 880439808.0,
+            "29": 880443392.0,
+            "30": 880441344.0,
+            "31": 880454656.0,
+            "32": 880444928.0,
+            "33": 880440320.0,
+            "34": 880440832.0,
+            "35": 880446976.0,
+            "36": 880441344.0,
+            "37": 880442880.0,
+            "38": 880439808.0,
+            "39": 880441856.0,
+            "40": 880442880.0,
+            "41": 880445952.0,
+            "42": 880441344.0,
+            "43": 880444416.0,
+            "44": 880439808.0,
+            "45": 880440832.0,
+            "46": 880447488.0,
+            "47": 880440320.0,
+            "48": 880441856.0,
+            "49": 880444416.0,
+            "50": 880442368.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 2910859264.0,
+            "2": 3151821824.0,
+            "3": 3152806912.0,
+            "4": 3156619264.0,
+            "5": 3156619264.0,
+            "6": 3156619264.0,
+            "7": 3156619264.0,
+            "8": 3156619264.0,
+            "9": 3156619264.0,
+            "10": 3156619264.0,
+            "11": 3156619264.0,
+            "12": 3156619264.0,
+            "13": 3156619264.0,
+            "14": 3156619264.0,
+            "15": 3156619264.0,
+            "16": 3156619264.0,
+            "17": 3156619264.0,
+            "18": 3156619264.0,
+            "19": 3156619264.0,
+            "20": 3156619264.0,
+            "21": 3156619264.0,
+            "22": 3156619264.0,
+            "23": 3156619264.0,
+            "24": 3156619264.0,
+            "25": 3156619264.0,
+            "26": 3156619264.0,
+            "27": 3156619264.0,
+            "28": 3156619264.0,
+            "29": 3156619264.0,
+            "30": 3156619264.0,
+            "31": 3167942656.0,
+            "32": 3167942656.0,
+            "33": 3167942656.0,
+            "34": 3167942656.0,
+            "35": 3167942656.0,
+            "36": 3167942656.0,
+            "37": 3167942656.0,
+            "38": 3167942656.0,
+            "39": 3167942656.0,
+            "40": 3167942656.0,
+            "41": 3167942656.0,
+            "42": 3167942656.0,
+            "43": 3167942656.0,
+            "44": 3167942656.0,
+            "45": 3167942656.0,
+            "46": 3167942656.0,
+            "47": 3167942656.0,
+            "48": 3167942656.0,
+            "49": 3167942656.0,
+            "50": 3167942656.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 6.90142,
+            "2": 0.35609,
+            "3": 0.29589,
+            "4": 0.29327,
+            "5": 0.29594,
+            "6": 0.293,
+            "7": 0.29087,
+            "8": 0.29178,
+            "9": 0.29184,
+            "10": 0.29303,
+            "11": 0.29381,
+            "12": 0.29249,
+            "13": 0.2936,
+            "14": 0.29671,
+            "15": 0.29969,
+            "16": 0.30214,
+            "17": 0.29463,
+            "18": 0.30986,
+            "19": 0.29429,
+            "20": 0.29497,
+            "21": 0.29609,
+            "22": 0.29421,
+            "23": 0.2931,
+            "24": 0.29341,
+            "25": 0.29443,
+            "26": 0.28879,
+            "27": 0.28844,
+            "28": 0.28873,
+            "29": 0.28741,
+            "30": 0.28737,
+            "31": 0.28905,
+            "32": 0.28701,
+            "33": 0.28706,
+            "34": 0.28739,
+            "35": 0.28701,
+            "36": 0.28751,
+            "37": 0.28826,
+            "38": 0.28792,
+            "39": 0.28663,
+            "40": 0.28805,
+            "41": 0.28776,
+            "42": 0.28855,
+            "43": 0.28777,
+            "44": 0.28801,
+            "45": 0.2885,
+            "46": 0.28907,
+            "47": 0.28755,
+            "48": 0.28719,
+            "49": 0.28878,
+            "50": 0.28677
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts_etp1_ep4/golden_values_lts.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts_etp1_ep4/golden_values_lts.json
new file mode 100644
index 00000000000..00b01bf0048
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts_etp1_ep4/golden_values_lts.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.78519,
+            "2": 10.81141,
+            "3": 10.79553,
+            "4": 10.78363,
+            "5": 10.83004,
+            "6": 10.8417,
+            "7": 10.81276,
+            "8": 10.81576,
+            "9": 10.82408,
+            "10": 10.75834,
+            "11": 10.86449,
+            "12": 10.83694,
+            "13": 10.84627,
+            "14": 10.85637,
+            "15": 10.79099,
+            "16": 10.78206,
+            "17": 10.73911,
+            "18": 10.77969,
+            "19": 10.77703,
+            "20": 10.69044,
+            "21": 10.66086,
+            "22": 10.51537,
+            "23": 10.68288,
+            "24": 10.54469,
+            "25": 10.49929,
+            "26": 10.5582,
+            "27": 10.57541,
+            "28": 10.54334,
+            "29": 10.55412,
+            "30": 10.31857,
+            "31": 10.06911,
+            "32": 10.42007,
+            "33": 10.42435,
+            "34": 10.16897,
+            "35": 10.22944,
+            "36": 10.19358,
+            "37": 10.29976,
+            "38": 10.1465,
+            "39": 10.37282,
+            "40": 10.04287,
+            "41": 10.09714,
+            "42": 10.17339,
+            "43": 9.79317,
+            "44": 9.91871,
+            "45": 9.79389,
+            "46": 9.77586,
+            "47": 10.10721,
+            "48": 9.81704,
+            "49": 9.48887,
+            "50": 9.87988
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 26764.0,
+            "2": 29274.0,
+            "3": 28827.0,
+            "4": 27434.0,
+            "5": 31796.0,
+            "6": 32589.0,
+            "7": 31434.0,
+            "8": 27373.0,
+            "9": 30359.0,
+            "10": 25775.0,
+            "11": 33420.0,
+            "12": 30835.0,
+            "13": 31910.0,
+            "14": 33275.0,
+            "15": 30931.0,
+            "16": 31713.0,
+            "17": 31263.0,
+            "18": 30574.0,
+            "19": 31200.0,
+            "20": 28330.0,
+            "21": 28769.0,
+            "22": 27643.0,
+            "23": 34373.0,
+            "24": 28585.0,
+            "25": 27693.0,
+            "26": 30668.0,
+            "27": 31536.0,
+            "28": 33620.0,
+            "29": 34457.0,
+            "30": 29810.0,
+            "31": 27999.0,
+            "32": 33871.0,
+            "33": 35836.0,
+            "34": 30770.0,
+            "35": 32783.0,
+            "36": 33452.0,
+            "37": 36437.0,
+            "38": 34114.0,
+            "39": 37433.0,
+            "40": 35063.0,
+            "41": 34239.0,
+            "42": 36041.0,
+            "43": 33976.0,
+            "44": 33303.0,
+            "45": 35148.0,
+            "46": 36112.0,
+            "47": 38996.0,
+            "48": 34779.0,
+            "49": 34913.0,
+            "50": 38547.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1364012544.0,
+            "2": 1364020736.0,
+            "3": 1364009472.0,
+            "4": 1364020224.0,
+            "5": 1364007424.0,
+            "6": 1364005888.0,
+            "7": 1364011520.0,
+            "8": 1364006912.0,
+            "9": 1364019712.0,
+            "10": 1364001280.0,
+            "11": 1364009984.0,
+            "12": 1364022784.0,
+            "13": 1364008960.0,
+            "14": 1364003840.0,
+            "15": 1364007936.0,
+            "16": 1363994112.0,
+            "17": 1363993088.0,
+            "18": 1364017152.0,
+            "19": 1364009984.0,
+            "20": 1363999744.0,
+            "21": 1364006400.0,
+            "22": 1363983360.0,
+            "23": 1363997696.0,
+            "24": 1364001792.0,
+            "25": 1363992576.0,
+            "26": 1364003328.0,
+            "27": 1363998720.0,
+            "28": 1363982336.0,
+            "29": 1363995136.0,
+            "30": 1363979776.0,
+            "31": 1363991552.0,
+            "32": 1363983872.0,
+            "33": 1363983872.0,
+            "34": 1363997184.0,
+            "35": 1363991040.0,
+            "36": 1363984896.0,
+            "37": 1363998720.0,
+            "38": 1363991040.0,
+            "39": 1363976704.0,
+            "40": 1363971584.0,
+            "41": 1363983872.0,
+            "42": 1363977728.0,
+            "43": 1363967488.0,
+            "44": 1363974656.0,
+            "45": 1363974144.0,
+            "46": 1363965952.0,
+            "47": 1363952640.0,
+            "48": 1363951616.0,
+            "49": 1363960320.0,
+            "50": 1363956736.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 3404911104.0,
+            "2": 3972516352.0,
+            "3": 3976973312.0,
+            "4": 3976973312.0,
+            "5": 3976973312.0,
+            "6": 3976973312.0,
+            "7": 3976973312.0,
+            "8": 3976973312.0,
+            "9": 3976973312.0,
+            "10": 3976973312.0,
+            "11": 3976973312.0,
+            "12": 3976975872.0,
+            "13": 3976975872.0,
+            "14": 3976975872.0,
+            "15": 3976975872.0,
+            "16": 3976975872.0,
+            "17": 3976975872.0,
+            "18": 3976975872.0,
+            "19": 3976975872.0,
+            "20": 3976975872.0,
+            "21": 3976975872.0,
+            "22": 3976975872.0,
+            "23": 3976975872.0,
+            "24": 3976975872.0,
+            "25": 3976975872.0,
+            "26": 3976975872.0,
+            "27": 3976975872.0,
+            "28": 3976975872.0,
+            "29": 3976975872.0,
+            "30": 3976975872.0,
+            "31": 3976975872.0,
+            "32": 3976975872.0,
+            "33": 3976975872.0,
+            "34": 3976975872.0,
+            "35": 3976975872.0,
+            "36": 3976975872.0,
+            "37": 3976975872.0,
+            "38": 3976975872.0,
+            "39": 3976975872.0,
+            "40": 3976975872.0,
+            "41": 3976975872.0,
+            "42": 3976975872.0,
+            "43": 3976975872.0,
+            "44": 3976975872.0,
+            "45": 3976975872.0,
+            "46": 3976975872.0,
+            "47": 3976975872.0,
+            "48": 3976975872.0,
+            "49": 3976975872.0,
+            "50": 3976975872.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 7.13471,
+            "2": 0.38969,
+            "3": 0.3103,
+            "4": 0.30799,
+            "5": 0.30603,
+            "6": 0.30643,
+            "7": 0.30599,
+            "8": 0.30393,
+            "9": 0.31467,
+            "10": 0.306,
+            "11": 0.30312,
+            "12": 0.30329,
+            "13": 0.30269,
+            "14": 0.30471,
+            "15": 0.30849,
+            "16": 0.30377,
+            "17": 0.30261,
+            "18": 0.3009,
+            "19": 0.30087,
+            "20": 0.30146,
+            "21": 0.30129,
+            "22": 0.30052,
+            "23": 0.30116,
+            "24": 0.2999,
+            "25": 0.30045,
+            "26": 0.30179,
+            "27": 0.30152,
+            "28": 0.30154,
+            "29": 0.30096,
+            "30": 0.31152,
+            "31": 0.31066,
+            "32": 0.30418,
+            "33": 0.35425,
+            "34": 0.30235,
+            "35": 0.30245,
+            "36": 0.302,
+            "37": 0.31083,
+            "38": 0.30275,
+            "39": 0.30205,
+            "40": 0.30289,
+            "41": 0.30375,
+            "42": 0.30405,
+            "43": 0.30174,
+            "44": 0.30207,
+            "45": 0.303,
+            "46": 0.30186,
+            "47": 0.30147,
+            "48": 0.30311,
+            "49": 0.30205,
+            "50": 0.3031
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts_etp1_ep4/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts_etp1_ep4/golden_values_lts_dgx_a100.json
index e640ba5b102..9114b4bb385 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts_etp1_ep4/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts_etp1_ep4/golden_values_lts_dgx_a100.json
@@ -1 +1,287 @@
-{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.8174, "5": 10.86097, "10": 10.80463, "15": 10.79539, "20": 10.71135, "25": 10.51962, "30": 10.33346, "35": 10.22683, "40": 10.041, "45": 9.7722, "50": 9.86932}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 26535.0, "5": 31546.0, "10": 25541.0, "15": 30463.0, "20": 28331.0, "25": 27263.0, "30": 29642.0, "35": 32022.0, "40": 34911.0, "45": 35017.0, "50": 38975.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1356172288.0, "5": 1356170240.0, "10": 1356160000.0, "15": 1356180480.0, "20": 1356170240.0, "25": 1356182528.0, "30": 1356198400.0, "35": 1356182016.0, "40": 1356156416.0, "45": 1356140544.0, "50": 1356098560.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 3163797504.0, "5": 3751905792.0, "10": 3759570944.0, "15": 3759570944.0, "20": 3759570944.0, "25": 3759570944.0, "30": 3770054144.0, "35": 3770054144.0, "40": 3770054144.0, "45": 3770054144.0, "50": 3770054144.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 25.66526, "5": 0.29505, "10": 0.28776, "15": 0.28997, "20": 0.28516, "25": 0.29206, "30": 0.2932, "35": 0.29941, "40": 0.30125, "45": 0.29922, "50": 0.30133}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.78519,
+            "2": 10.81141,
+            "3": 10.79553,
+            "4": 10.78363,
+            "5": 10.83004,
+            "6": 10.8417,
+            "7": 10.81276,
+            "8": 10.81576,
+            "9": 10.82408,
+            "10": 10.75834,
+            "11": 10.86449,
+            "12": 10.83694,
+            "13": 10.84627,
+            "14": 10.85637,
+            "15": 10.79099,
+            "16": 10.78206,
+            "17": 10.73911,
+            "18": 10.77969,
+            "19": 10.77703,
+            "20": 10.69044,
+            "21": 10.66086,
+            "22": 10.51537,
+            "23": 10.68288,
+            "24": 10.54469,
+            "25": 10.49929,
+            "26": 10.5582,
+            "27": 10.57541,
+            "28": 10.54334,
+            "29": 10.55412,
+            "30": 10.31857,
+            "31": 10.06911,
+            "32": 10.42007,
+            "33": 10.42435,
+            "34": 10.16897,
+            "35": 10.22944,
+            "36": 10.19358,
+            "37": 10.29976,
+            "38": 10.1465,
+            "39": 10.37282,
+            "40": 10.04287,
+            "41": 10.09714,
+            "42": 10.17339,
+            "43": 9.79317,
+            "44": 9.91871,
+            "45": 9.79389,
+            "46": 9.77586,
+            "47": 10.10721,
+            "48": 9.81704,
+            "49": 9.48887,
+            "50": 9.87988
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 26764.0,
+            "2": 29274.0,
+            "3": 28827.0,
+            "4": 27434.0,
+            "5": 31796.0,
+            "6": 32589.0,
+            "7": 31434.0,
+            "8": 27373.0,
+            "9": 30359.0,
+            "10": 25775.0,
+            "11": 33420.0,
+            "12": 30835.0,
+            "13": 31910.0,
+            "14": 33275.0,
+            "15": 30931.0,
+            "16": 31713.0,
+            "17": 31263.0,
+            "18": 30574.0,
+            "19": 31200.0,
+            "20": 28330.0,
+            "21": 28769.0,
+            "22": 27643.0,
+            "23": 34373.0,
+            "24": 28585.0,
+            "25": 27693.0,
+            "26": 30668.0,
+            "27": 31536.0,
+            "28": 33620.0,
+            "29": 34457.0,
+            "30": 29810.0,
+            "31": 27999.0,
+            "32": 33871.0,
+            "33": 35836.0,
+            "34": 30770.0,
+            "35": 32783.0,
+            "36": 33452.0,
+            "37": 36437.0,
+            "38": 34114.0,
+            "39": 37433.0,
+            "40": 35063.0,
+            "41": 34239.0,
+            "42": 36041.0,
+            "43": 33976.0,
+            "44": 33303.0,
+            "45": 35148.0,
+            "46": 36112.0,
+            "47": 38996.0,
+            "48": 34779.0,
+            "49": 34913.0,
+            "50": 38547.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1364012544.0,
+            "2": 1364020736.0,
+            "3": 1364009472.0,
+            "4": 1364020224.0,
+            "5": 1364007424.0,
+            "6": 1364005888.0,
+            "7": 1364011520.0,
+            "8": 1364006912.0,
+            "9": 1364019712.0,
+            "10": 1364001280.0,
+            "11": 1364009984.0,
+            "12": 1364022784.0,
+            "13": 1364008960.0,
+            "14": 1364003840.0,
+            "15": 1364007936.0,
+            "16": 1363994112.0,
+            "17": 1363993088.0,
+            "18": 1364017152.0,
+            "19": 1364009984.0,
+            "20": 1363999744.0,
+            "21": 1364006400.0,
+            "22": 1363983360.0,
+            "23": 1363997696.0,
+            "24": 1364001792.0,
+            "25": 1363992576.0,
+            "26": 1364003328.0,
+            "27": 1363998720.0,
+            "28": 1363982336.0,
+            "29": 1363995136.0,
+            "30": 1363979776.0,
+            "31": 1363991552.0,
+            "32": 1363983872.0,
+            "33": 1363983872.0,
+            "34": 1363997184.0,
+            "35": 1363991040.0,
+            "36": 1363984896.0,
+            "37": 1363998720.0,
+            "38": 1363991040.0,
+            "39": 1363976704.0,
+            "40": 1363971584.0,
+            "41": 1363983872.0,
+            "42": 1363977728.0,
+            "43": 1363967488.0,
+            "44": 1363974656.0,
+            "45": 1363974144.0,
+            "46": 1363965952.0,
+            "47": 1363952640.0,
+            "48": 1363951616.0,
+            "49": 1363960320.0,
+            "50": 1363956736.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 3404911104.0,
+            "2": 3972516352.0,
+            "3": 3976973312.0,
+            "4": 3976973312.0,
+            "5": 3976973312.0,
+            "6": 3976973312.0,
+            "7": 3976973312.0,
+            "8": 3976973312.0,
+            "9": 3976973312.0,
+            "10": 3976973312.0,
+            "11": 3976973312.0,
+            "12": 3976975872.0,
+            "13": 3976975872.0,
+            "14": 3976975872.0,
+            "15": 3976975872.0,
+            "16": 3976975872.0,
+            "17": 3976975872.0,
+            "18": 3976975872.0,
+            "19": 3976975872.0,
+            "20": 3976975872.0,
+            "21": 3976975872.0,
+            "22": 3976975872.0,
+            "23": 3976975872.0,
+            "24": 3976975872.0,
+            "25": 3976975872.0,
+            "26": 3976975872.0,
+            "27": 3976975872.0,
+            "28": 3976975872.0,
+            "29": 3976975872.0,
+            "30": 3976975872.0,
+            "31": 3976975872.0,
+            "32": 3976975872.0,
+            "33": 3976975872.0,
+            "34": 3976975872.0,
+            "35": 3976975872.0,
+            "36": 3976975872.0,
+            "37": 3976975872.0,
+            "38": 3976975872.0,
+            "39": 3976975872.0,
+            "40": 3976975872.0,
+            "41": 3976975872.0,
+            "42": 3976975872.0,
+            "43": 3976975872.0,
+            "44": 3976975872.0,
+            "45": 3976975872.0,
+            "46": 3976975872.0,
+            "47": 3976975872.0,
+            "48": 3976975872.0,
+            "49": 3976975872.0,
+            "50": 3976975872.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 7.62035,
+            "2": 0.36752,
+            "3": 0.30562,
+            "4": 0.29876,
+            "5": 0.298,
+            "6": 0.29743,
+            "7": 0.29729,
+            "8": 0.2967,
+            "9": 0.29751,
+            "10": 0.29912,
+            "11": 0.29575,
+            "12": 0.29589,
+            "13": 0.29696,
+            "14": 0.29898,
+            "15": 0.30053,
+            "16": 0.30093,
+            "17": 0.2977,
+            "18": 0.2973,
+            "19": 0.29596,
+            "20": 0.29757,
+            "21": 0.2967,
+            "22": 0.29963,
+            "23": 0.29707,
+            "24": 0.29748,
+            "25": 0.29701,
+            "26": 0.29838,
+            "27": 0.29889,
+            "28": 0.29962,
+            "29": 0.30399,
+            "30": 0.30932,
+            "31": 0.30553,
+            "32": 0.29765,
+            "33": 0.30499,
+            "34": 0.29754,
+            "35": 0.29747,
+            "36": 0.29801,
+            "37": 0.30768,
+            "38": 0.29693,
+            "39": 0.29912,
+            "40": 0.299,
+            "41": 0.2982,
+            "42": 0.37256,
+            "43": 0.29865,
+            "44": 0.29774,
+            "45": 0.29961,
+            "46": 0.2988,
+            "47": 0.30454,
+            "48": 0.30466,
+            "49": 0.30093,
+            "50": 0.29883
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4/golden_values_dev.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4/golden_values_dev.json
new file mode 100644
index 00000000000..390f4b8f13f
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4/golden_values_dev.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.77518,
+            "2": 10.78038,
+            "3": 10.79302,
+            "4": 10.74107,
+            "5": 10.82013,
+            "6": 10.82951,
+            "7": 10.7953,
+            "8": 10.78263,
+            "9": 10.79278,
+            "10": 10.7446,
+            "11": 10.85147,
+            "12": 10.82613,
+            "13": 10.82825,
+            "14": 10.85504,
+            "15": 10.75536,
+            "16": 10.75777,
+            "17": 10.72319,
+            "18": 10.76274,
+            "19": 10.75075,
+            "20": 10.66587,
+            "21": 10.6419,
+            "22": 10.47523,
+            "23": 10.66959,
+            "24": 10.54157,
+            "25": 10.4825,
+            "26": 10.55255,
+            "27": 10.57459,
+            "28": 10.55159,
+            "29": 10.5668,
+            "30": 10.31134,
+            "31": 10.01921,
+            "32": 10.42655,
+            "33": 10.42294,
+            "34": 10.14739,
+            "35": 10.21574,
+            "36": 10.15811,
+            "37": 10.30279,
+            "38": 10.14031,
+            "39": 10.36301,
+            "40": 10.02669,
+            "41": 10.07635,
+            "42": 10.16156,
+            "43": 9.74374,
+            "44": 9.88962,
+            "45": 9.75874,
+            "46": 9.73618,
+            "47": 10.0844,
+            "48": 9.78532,
+            "49": 9.45072,
+            "50": 9.85634
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 27105.0,
+            "2": 28791.0,
+            "3": 29282.0,
+            "4": 27583.0,
+            "5": 31595.0,
+            "6": 32831.0,
+            "7": 31023.0,
+            "8": 27107.0,
+            "9": 30780.0,
+            "10": 25505.0,
+            "11": 33684.0,
+            "12": 30235.0,
+            "13": 32960.0,
+            "14": 32880.0,
+            "15": 30405.0,
+            "16": 32455.0,
+            "17": 30933.0,
+            "18": 30623.0,
+            "19": 30803.0,
+            "20": 28593.0,
+            "21": 29002.0,
+            "22": 27030.0,
+            "23": 34463.0,
+            "24": 29154.0,
+            "25": 27827.0,
+            "26": 31119.0,
+            "27": 32108.0,
+            "28": 33412.0,
+            "29": 34737.0,
+            "30": 30465.0,
+            "31": 28775.0,
+            "32": 33115.0,
+            "33": 34745.0,
+            "34": 30785.0,
+            "35": 32116.0,
+            "36": 33968.0,
+            "37": 36757.0,
+            "38": 34150.0,
+            "39": 37240.0,
+            "40": 35353.0,
+            "41": 34638.0,
+            "42": 36703.0,
+            "43": 34601.0,
+            "44": 33783.0,
+            "45": 35388.0,
+            "46": 35484.0,
+            "47": 40591.0,
+            "48": 36671.0,
+            "49": 36174.0,
+            "50": 38231.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1566715392.0,
+            "2": 1564834816.0,
+            "3": 1564860416.0,
+            "4": 1564313088.0,
+            "5": 1564120576.0,
+            "6": 1565958144.0,
+            "7": 1564614656.0,
+            "8": 1563520512.0,
+            "9": 1563819008.0,
+            "10": 1563891712.0,
+            "11": 1563430400.0,
+            "12": 1564003840.0,
+            "13": 1564584960.0,
+            "14": 1566458880.0,
+            "15": 1564517376.0,
+            "16": 1565116928.0,
+            "17": 1564445184.0,
+            "18": 1566232576.0,
+            "19": 1565403136.0,
+            "20": 1565009408.0,
+            "21": 1568513024.0,
+            "22": 1564807168.0,
+            "23": 1567177216.0,
+            "24": 1564333568.0,
+            "25": 1566986240.0,
+            "26": 1565763584.0,
+            "27": 1563955200.0,
+            "28": 1564926464.0,
+            "29": 1563690496.0,
+            "30": 1563930624.0,
+            "31": 1564862464.0,
+            "32": 1564173824.0,
+            "33": 1565158400.0,
+            "34": 1564895232.0,
+            "35": 1564069376.0,
+            "36": 1565074944.0,
+            "37": 1563990016.0,
+            "38": 1565044736.0,
+            "39": 1563646976.0,
+            "40": 1565059072.0,
+            "41": 1566659584.0,
+            "42": 1564623360.0,
+            "43": 1563140608.0,
+            "44": 1565100032.0,
+            "45": 1567626752.0,
+            "46": 1565074944.0,
+            "47": 1564590080.0,
+            "48": 1564522496.0,
+            "49": 1563403776.0,
+            "50": 1564356096.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 3737165824.0,
+            "2": 4315653632.0,
+            "3": 4318730752.0,
+            "4": 4318730752.0,
+            "5": 4318730752.0,
+            "6": 4346984960.0,
+            "7": 4346984960.0,
+            "8": 4346984960.0,
+            "9": 4346984960.0,
+            "10": 4346984960.0,
+            "11": 4346984960.0,
+            "12": 4346984960.0,
+            "13": 4346984960.0,
+            "14": 4346984960.0,
+            "15": 4346984960.0,
+            "16": 4346984960.0,
+            "17": 4346984960.0,
+            "18": 4346984960.0,
+            "19": 4346984960.0,
+            "20": 4346984960.0,
+            "21": 4346984960.0,
+            "22": 4346984960.0,
+            "23": 4346984960.0,
+            "24": 4346984960.0,
+            "25": 4346984960.0,
+            "26": 4346984960.0,
+            "27": 4346984960.0,
+            "28": 4346984960.0,
+            "29": 4346984960.0,
+            "30": 4346984960.0,
+            "31": 4346984960.0,
+            "32": 4346984960.0,
+            "33": 4346984960.0,
+            "34": 4346984960.0,
+            "35": 4346984960.0,
+            "36": 4346984960.0,
+            "37": 4346984960.0,
+            "38": 4346984960.0,
+            "39": 4346984960.0,
+            "40": 4346984960.0,
+            "41": 4346984960.0,
+            "42": 4346984960.0,
+            "43": 4346984960.0,
+            "44": 4346984960.0,
+            "45": 4346984960.0,
+            "46": 4346984960.0,
+            "47": 4346984960.0,
+            "48": 4346984960.0,
+            "49": 4346984960.0,
+            "50": 4346984960.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 19.53588,
+            "2": 0.42103,
+            "3": 0.37171,
+            "4": 0.32787,
+            "5": 0.32504,
+            "6": 0.29089,
+            "7": 0.29215,
+            "8": 0.28165,
+            "9": 0.287,
+            "10": 0.28805,
+            "11": 0.27222,
+            "12": 0.2768,
+            "13": 0.26942,
+            "14": 0.27369,
+            "15": 0.27287,
+            "16": 0.26492,
+            "17": 0.27231,
+            "18": 0.26408,
+            "19": 0.26251,
+            "20": 0.26769,
+            "21": 0.26539,
+            "22": 0.27591,
+            "23": 0.26191,
+            "24": 0.27512,
+            "25": 0.26855,
+            "26": 0.27323,
+            "27": 0.26526,
+            "28": 0.26748,
+            "29": 0.27054,
+            "30": 0.28834,
+            "31": 0.27784,
+            "32": 0.27006,
+            "33": 0.29268,
+            "34": 0.27918,
+            "35": 0.29002,
+            "36": 0.28672,
+            "37": 0.28354,
+            "38": 0.27713,
+            "39": 0.28145,
+            "40": 0.62986,
+            "41": 0.2715,
+            "42": 0.2633,
+            "43": 0.27508,
+            "44": 0.28176,
+            "45": 0.26745,
+            "46": 0.27357,
+            "47": 0.26873,
+            "48": 0.26907,
+            "49": 0.26843,
+            "50": 0.63541
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_lts.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_lts.json
new file mode 100644
index 00000000000..779e572fdd4
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_lts.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.79175,
+            "2": 10.80907,
+            "3": 10.81011,
+            "4": 10.78146,
+            "5": 10.82288,
+            "6": 10.84057,
+            "7": 10.81192,
+            "8": 10.80005,
+            "9": 10.81667,
+            "10": 10.7688,
+            "11": 10.8618,
+            "12": 10.84042,
+            "13": 10.84452,
+            "14": 10.86421,
+            "15": 10.79157,
+            "16": 10.78199,
+            "17": 10.75122,
+            "18": 10.79446,
+            "19": 10.79523,
+            "20": 10.71001,
+            "21": 10.68811,
+            "22": 10.53736,
+            "23": 10.7066,
+            "24": 10.58865,
+            "25": 10.54662,
+            "26": 10.59492,
+            "27": 10.62142,
+            "28": 10.5969,
+            "29": 10.60036,
+            "30": 10.39407,
+            "31": 10.12951,
+            "32": 10.49684,
+            "33": 10.48779,
+            "34": 10.24347,
+            "35": 10.30461,
+            "36": 10.26056,
+            "37": 10.38859,
+            "38": 10.24848,
+            "39": 10.43799,
+            "40": 10.13303,
+            "41": 10.18651,
+            "42": 10.25823,
+            "43": 9.892,
+            "44": 10.02576,
+            "45": 9.90015,
+            "46": 9.88387,
+            "47": 10.19565,
+            "48": 9.91255,
+            "49": 9.60147,
+            "50": 9.97874
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 5656.0,
+            "2": 6018.0,
+            "3": 5790.0,
+            "4": 5941.0,
+            "5": 6476.0,
+            "6": 6653.0,
+            "7": 6287.0,
+            "8": 5875.0,
+            "9": 6239.0,
+            "10": 5453.0,
+            "11": 6936.0,
+            "12": 6711.0,
+            "13": 6655.0,
+            "14": 6814.0,
+            "15": 6233.0,
+            "16": 6533.0,
+            "17": 6397.0,
+            "18": 6112.0,
+            "19": 6678.0,
+            "20": 5837.0,
+            "21": 6403.0,
+            "22": 5715.0,
+            "23": 6744.0,
+            "24": 6051.0,
+            "25": 5811.0,
+            "26": 6104.0,
+            "27": 6484.0,
+            "28": 6884.0,
+            "29": 7253.0,
+            "30": 6047.0,
+            "31": 5593.0,
+            "32": 6625.0,
+            "33": 7054.0,
+            "34": 6104.0,
+            "35": 6712.0,
+            "36": 6684.0,
+            "37": 7523.0,
+            "38": 7273.0,
+            "39": 7620.0,
+            "40": 7062.0,
+            "41": 6895.0,
+            "42": 7426.0,
+            "43": 6713.0,
+            "44": 6664.0,
+            "45": 6681.0,
+            "46": 6923.0,
+            "47": 7705.0,
+            "48": 7248.0,
+            "49": 7331.0,
+            "50": 7527.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 462408192.0,
+            "2": 462406144.0,
+            "3": 462409728.0,
+            "4": 462406144.0,
+            "5": 462407680.0,
+            "6": 462408192.0,
+            "7": 462410752.0,
+            "8": 462410752.0,
+            "9": 462407168.0,
+            "10": 462410240.0,
+            "11": 462408192.0,
+            "12": 462408192.0,
+            "13": 462408704.0,
+            "14": 462409728.0,
+            "15": 462409728.0,
+            "16": 462407168.0,
+            "17": 462408704.0,
+            "18": 462408704.0,
+            "19": 462408704.0,
+            "20": 462408704.0,
+            "21": 462406144.0,
+            "22": 462412800.0,
+            "23": 462409216.0,
+            "24": 462408704.0,
+            "25": 462406144.0,
+            "26": 462410240.0,
+            "27": 462405120.0,
+            "28": 462408192.0,
+            "29": 462407168.0,
+            "30": 462406144.0,
+            "31": 462413312.0,
+            "32": 462408704.0,
+            "33": 462409216.0,
+            "34": 462406144.0,
+            "35": 462410240.0,
+            "36": 462407168.0,
+            "37": 462409728.0,
+            "38": 462408192.0,
+            "39": 462408192.0,
+            "40": 462407680.0,
+            "41": 462411264.0,
+            "42": 462409728.0,
+            "43": 462411264.0,
+            "44": 462407680.0,
+            "45": 462408704.0,
+            "46": 462410752.0,
+            "47": 462407680.0,
+            "48": 462408192.0,
+            "49": 462409728.0,
+            "50": 462409216.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1033451008.0,
+            "2": 1197371392.0,
+            "3": 1197371392.0,
+            "4": 1197880320.0,
+            "5": 1197880320.0,
+            "6": 1197880320.0,
+            "7": 1197880320.0,
+            "8": 1197880320.0,
+            "9": 1197965824.0,
+            "10": 1197965824.0,
+            "11": 1197965824.0,
+            "12": 1197965824.0,
+            "13": 1197965824.0,
+            "14": 1197965824.0,
+            "15": 1197965824.0,
+            "16": 1197965824.0,
+            "17": 1197965824.0,
+            "18": 1197965824.0,
+            "19": 1197965824.0,
+            "20": 1197965824.0,
+            "21": 1197965824.0,
+            "22": 1198112768.0,
+            "23": 1198112768.0,
+            "24": 1198112768.0,
+            "25": 1198112768.0,
+            "26": 1198112768.0,
+            "27": 1198112768.0,
+            "28": 1198112768.0,
+            "29": 1198112768.0,
+            "30": 1198112768.0,
+            "31": 1198112768.0,
+            "32": 1198112768.0,
+            "33": 1198112768.0,
+            "34": 1198112768.0,
+            "35": 1198112768.0,
+            "36": 1198112768.0,
+            "37": 1198112768.0,
+            "38": 1198112768.0,
+            "39": 1198112768.0,
+            "40": 1198333440.0,
+            "41": 1198333440.0,
+            "42": 1198333440.0,
+            "43": 1198444032.0,
+            "44": 1198444032.0,
+            "45": 1198444032.0,
+            "46": 1198444032.0,
+            "47": 1198444032.0,
+            "48": 1198444032.0,
+            "49": 1198444032.0,
+            "50": 1198444032.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 7.59918,
+            "2": 0.61895,
+            "3": 0.56068,
+            "4": 0.55629,
+            "5": 0.55614,
+            "6": 0.55955,
+            "7": 0.55763,
+            "8": 0.55984,
+            "9": 0.55,
+            "10": 0.5499,
+            "11": 0.55012,
+            "12": 0.54844,
+            "13": 0.55795,
+            "14": 0.54851,
+            "15": 0.5504,
+            "16": 0.55068,
+            "17": 0.54716,
+            "18": 0.55003,
+            "19": 0.54872,
+            "20": 0.54841,
+            "21": 0.55411,
+            "22": 0.55114,
+            "23": 0.54838,
+            "24": 0.54877,
+            "25": 0.54811,
+            "26": 0.54808,
+            "27": 0.54959,
+            "28": 0.55486,
+            "29": 0.55193,
+            "30": 0.54966,
+            "31": 0.54964,
+            "32": 0.55053,
+            "33": 0.55025,
+            "34": 0.55174,
+            "35": 0.55,
+            "36": 0.55117,
+            "37": 0.55004,
+            "38": 0.55089,
+            "39": 0.54857,
+            "40": 0.55402,
+            "41": 0.55168,
+            "42": 0.54916,
+            "43": 0.55132,
+            "44": 0.54941,
+            "45": 0.54876,
+            "46": 0.55724,
+            "47": 0.54849,
+            "48": 0.55007,
+            "49": 0.55215,
+            "50": 0.54754
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_lts_dgx_a100.json
index 7aae0170419..a186febffbe 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_lts_dgx_a100.json
@@ -1 +1,287 @@
-{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.79288, "5": 10.828, "10": 10.76831, "15": 10.79073, "20": 10.7066, "25": 10.54061, "30": 10.39485, "35": 10.30488, "40": 10.13079, "45": 9.90236, "50": 9.97938}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 5601.0, "5": 6601.0, "10": 5254.0, "15": 6250.0, "20": 5969.0, "25": 5816.0, "30": 6181.0, "35": 6751.0, "40": 6831.0, "45": 6881.0, "50": 7684.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 458211840.0, "5": 458212864.0, "10": 458214912.0, "15": 458215424.0, "20": 458212864.0, "25": 458215424.0, "30": 458211840.0, "35": 458214400.0, "40": 458214912.0, "45": 458213888.0, "50": 458214912.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1016394240.0, "5": 1180904960.0, "10": 1180934144.0, "15": 1180990976.0, "20": 1180990976.0, "25": 1181222912.0, "30": 1181222912.0, "35": 1181468160.0, "40": 1181468160.0, "45": 1183467008.0, "50": 1183467008.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 16.31521, "5": 0.56321, "10": 0.56536, "15": 0.56469, "20": 0.55362, "25": 0.55331, "30": 0.55482, "35": 0.57032, "40": 0.56489, "45": 0.55226, "50": 0.54242}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.79175,
+            "2": 10.80907,
+            "3": 10.81011,
+            "4": 10.78146,
+            "5": 10.82288,
+            "6": 10.84057,
+            "7": 10.81192,
+            "8": 10.80005,
+            "9": 10.81667,
+            "10": 10.7688,
+            "11": 10.8618,
+            "12": 10.84042,
+            "13": 10.84452,
+            "14": 10.86421,
+            "15": 10.79157,
+            "16": 10.78199,
+            "17": 10.75122,
+            "18": 10.79446,
+            "19": 10.79523,
+            "20": 10.71001,
+            "21": 10.68811,
+            "22": 10.53736,
+            "23": 10.7066,
+            "24": 10.58865,
+            "25": 10.54662,
+            "26": 10.59492,
+            "27": 10.62142,
+            "28": 10.5969,
+            "29": 10.60036,
+            "30": 10.39407,
+            "31": 10.12951,
+            "32": 10.49684,
+            "33": 10.48779,
+            "34": 10.24347,
+            "35": 10.30461,
+            "36": 10.26056,
+            "37": 10.38859,
+            "38": 10.24848,
+            "39": 10.43799,
+            "40": 10.13303,
+            "41": 10.18651,
+            "42": 10.25823,
+            "43": 9.892,
+            "44": 10.02576,
+            "45": 9.90015,
+            "46": 9.88387,
+            "47": 10.19565,
+            "48": 9.91255,
+            "49": 9.60147,
+            "50": 9.97874
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 5656.0,
+            "2": 6018.0,
+            "3": 5790.0,
+            "4": 5941.0,
+            "5": 6476.0,
+            "6": 6653.0,
+            "7": 6287.0,
+            "8": 5875.0,
+            "9": 6239.0,
+            "10": 5453.0,
+            "11": 6936.0,
+            "12": 6711.0,
+            "13": 6655.0,
+            "14": 6814.0,
+            "15": 6233.0,
+            "16": 6533.0,
+            "17": 6397.0,
+            "18": 6112.0,
+            "19": 6678.0,
+            "20": 5837.0,
+            "21": 6403.0,
+            "22": 5715.0,
+            "23": 6744.0,
+            "24": 6051.0,
+            "25": 5811.0,
+            "26": 6104.0,
+            "27": 6484.0,
+            "28": 6884.0,
+            "29": 7253.0,
+            "30": 6047.0,
+            "31": 5593.0,
+            "32": 6625.0,
+            "33": 7054.0,
+            "34": 6104.0,
+            "35": 6712.0,
+            "36": 6684.0,
+            "37": 7523.0,
+            "38": 7273.0,
+            "39": 7620.0,
+            "40": 7062.0,
+            "41": 6895.0,
+            "42": 7426.0,
+            "43": 6713.0,
+            "44": 6664.0,
+            "45": 6681.0,
+            "46": 6923.0,
+            "47": 7705.0,
+            "48": 7248.0,
+            "49": 7331.0,
+            "50": 7527.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 462408192.0,
+            "2": 462406144.0,
+            "3": 462409728.0,
+            "4": 462406144.0,
+            "5": 462407680.0,
+            "6": 462408192.0,
+            "7": 462410752.0,
+            "8": 462410752.0,
+            "9": 462407168.0,
+            "10": 462410240.0,
+            "11": 462408192.0,
+            "12": 462408192.0,
+            "13": 462408704.0,
+            "14": 462409728.0,
+            "15": 462409728.0,
+            "16": 462407168.0,
+            "17": 462408704.0,
+            "18": 462408704.0,
+            "19": 462408704.0,
+            "20": 462408704.0,
+            "21": 462406144.0,
+            "22": 462412800.0,
+            "23": 462409216.0,
+            "24": 462408704.0,
+            "25": 462406144.0,
+            "26": 462410240.0,
+            "27": 462405120.0,
+            "28": 462408192.0,
+            "29": 462407168.0,
+            "30": 462406144.0,
+            "31": 462413312.0,
+            "32": 462408704.0,
+            "33": 462409216.0,
+            "34": 462406144.0,
+            "35": 462410240.0,
+            "36": 462407168.0,
+            "37": 462409728.0,
+            "38": 462408192.0,
+            "39": 462408192.0,
+            "40": 462407680.0,
+            "41": 462411264.0,
+            "42": 462409728.0,
+            "43": 462411264.0,
+            "44": 462407680.0,
+            "45": 462408704.0,
+            "46": 462410752.0,
+            "47": 462407680.0,
+            "48": 462408192.0,
+            "49": 462409728.0,
+            "50": 462409216.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1033451008.0,
+            "2": 1197371392.0,
+            "3": 1197371392.0,
+            "4": 1197880320.0,
+            "5": 1197880320.0,
+            "6": 1197880320.0,
+            "7": 1197880320.0,
+            "8": 1197880320.0,
+            "9": 1197965824.0,
+            "10": 1197965824.0,
+            "11": 1197965824.0,
+            "12": 1197965824.0,
+            "13": 1197965824.0,
+            "14": 1197965824.0,
+            "15": 1197965824.0,
+            "16": 1197965824.0,
+            "17": 1197965824.0,
+            "18": 1197965824.0,
+            "19": 1197965824.0,
+            "20": 1197965824.0,
+            "21": 1197965824.0,
+            "22": 1198112768.0,
+            "23": 1198112768.0,
+            "24": 1198112768.0,
+            "25": 1198112768.0,
+            "26": 1198112768.0,
+            "27": 1198112768.0,
+            "28": 1198112768.0,
+            "29": 1198112768.0,
+            "30": 1198112768.0,
+            "31": 1198112768.0,
+            "32": 1198112768.0,
+            "33": 1198112768.0,
+            "34": 1198112768.0,
+            "35": 1198112768.0,
+            "36": 1198112768.0,
+            "37": 1198112768.0,
+            "38": 1198112768.0,
+            "39": 1198112768.0,
+            "40": 1198333440.0,
+            "41": 1198333440.0,
+            "42": 1198333440.0,
+            "43": 1198444032.0,
+            "44": 1198444032.0,
+            "45": 1198444032.0,
+            "46": 1198444032.0,
+            "47": 1198444032.0,
+            "48": 1198444032.0,
+            "49": 1198444032.0,
+            "50": 1198444032.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 12.49228,
+            "2": 0.63481,
+            "3": 0.56951,
+            "4": 0.57807,
+            "5": 0.581,
+            "6": 0.58159,
+            "7": 0.5705,
+            "8": 0.56929,
+            "9": 0.56794,
+            "10": 0.56314,
+            "11": 0.57935,
+            "12": 0.57294,
+            "13": 0.56865,
+            "14": 0.56698,
+            "15": 0.56505,
+            "16": 0.56266,
+            "17": 0.56337,
+            "18": 0.56237,
+            "19": 0.56197,
+            "20": 0.5626,
+            "21": 0.5642,
+            "22": 0.56373,
+            "23": 0.57291,
+            "24": 0.56432,
+            "25": 0.56287,
+            "26": 0.56295,
+            "27": 0.56146,
+            "28": 0.56459,
+            "29": 0.56415,
+            "30": 0.56587,
+            "31": 0.5671,
+            "32": 0.56896,
+            "33": 0.57526,
+            "34": 0.57281,
+            "35": 0.57407,
+            "36": 0.57321,
+            "37": 0.57403,
+            "38": 0.57296,
+            "39": 0.57248,
+            "40": 0.57089,
+            "41": 0.57201,
+            "42": 0.5661,
+            "43": 0.57044,
+            "44": 0.56777,
+            "45": 0.56877,
+            "46": 0.57143,
+            "47": 0.57031,
+            "48": 0.56952,
+            "49": 0.57353,
+            "50": 0.56636
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_lts.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_lts.json
new file mode 100644
index 00000000000..63611a809be
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_lts.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.79175,
+            "2": 10.80907,
+            "3": 10.81011,
+            "4": 10.78146,
+            "5": 10.82288,
+            "6": 10.84057,
+            "7": 10.81192,
+            "8": 10.80005,
+            "9": 10.81667,
+            "10": 10.7688,
+            "11": 10.8618,
+            "12": 10.84042,
+            "13": 10.84452,
+            "14": 10.86421,
+            "15": 10.79157,
+            "16": 10.78199,
+            "17": 10.75122,
+            "18": 10.79446,
+            "19": 10.79523,
+            "20": 10.71001,
+            "21": 10.68811,
+            "22": 10.53736,
+            "23": 10.7066,
+            "24": 10.58865,
+            "25": 10.54662,
+            "26": 10.59492,
+            "27": 10.62142,
+            "28": 10.5969,
+            "29": 10.60036,
+            "30": 10.39407,
+            "31": 10.12951,
+            "32": 10.49684,
+            "33": 10.48779,
+            "34": 10.24347,
+            "35": 10.30461,
+            "36": 10.26056,
+            "37": 10.38859,
+            "38": 10.24848,
+            "39": 10.43799,
+            "40": 10.13303,
+            "41": 10.18651,
+            "42": 10.25823,
+            "43": 9.892,
+            "44": 10.02576,
+            "45": 9.90015,
+            "46": 9.88387,
+            "47": 10.19565,
+            "48": 9.91255,
+            "49": 9.60147,
+            "50": 9.97874
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 5656.0,
+            "2": 6018.0,
+            "3": 5790.0,
+            "4": 5941.0,
+            "5": 6476.0,
+            "6": 6653.0,
+            "7": 6287.0,
+            "8": 5875.0,
+            "9": 6239.0,
+            "10": 5453.0,
+            "11": 6936.0,
+            "12": 6711.0,
+            "13": 6655.0,
+            "14": 6814.0,
+            "15": 6233.0,
+            "16": 6533.0,
+            "17": 6397.0,
+            "18": 6112.0,
+            "19": 6678.0,
+            "20": 5837.0,
+            "21": 6403.0,
+            "22": 5715.0,
+            "23": 6744.0,
+            "24": 6051.0,
+            "25": 5811.0,
+            "26": 6104.0,
+            "27": 6484.0,
+            "28": 6884.0,
+            "29": 7253.0,
+            "30": 6047.0,
+            "31": 5593.0,
+            "32": 6625.0,
+            "33": 7054.0,
+            "34": 6104.0,
+            "35": 6712.0,
+            "36": 6684.0,
+            "37": 7523.0,
+            "38": 7273.0,
+            "39": 7620.0,
+            "40": 7062.0,
+            "41": 6895.0,
+            "42": 7426.0,
+            "43": 6713.0,
+            "44": 6664.0,
+            "45": 6681.0,
+            "46": 6923.0,
+            "47": 7705.0,
+            "48": 7248.0,
+            "49": 7331.0,
+            "50": 7527.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 462408192.0,
+            "2": 462406144.0,
+            "3": 462409728.0,
+            "4": 462406144.0,
+            "5": 462407680.0,
+            "6": 462408192.0,
+            "7": 462410752.0,
+            "8": 462410752.0,
+            "9": 462407168.0,
+            "10": 462410240.0,
+            "11": 462408192.0,
+            "12": 462408192.0,
+            "13": 462408704.0,
+            "14": 462409728.0,
+            "15": 462409728.0,
+            "16": 462407168.0,
+            "17": 462408704.0,
+            "18": 462408704.0,
+            "19": 462408704.0,
+            "20": 462408704.0,
+            "21": 462406144.0,
+            "22": 462412800.0,
+            "23": 462409216.0,
+            "24": 462408704.0,
+            "25": 462406144.0,
+            "26": 462410240.0,
+            "27": 462405120.0,
+            "28": 462408192.0,
+            "29": 462407168.0,
+            "30": 462406144.0,
+            "31": 462413312.0,
+            "32": 462408704.0,
+            "33": 462409216.0,
+            "34": 462406144.0,
+            "35": 462410240.0,
+            "36": 462407168.0,
+            "37": 462409728.0,
+            "38": 462408192.0,
+            "39": 462408192.0,
+            "40": 462407680.0,
+            "41": 462411264.0,
+            "42": 462409728.0,
+            "43": 462411264.0,
+            "44": 462407680.0,
+            "45": 462408704.0,
+            "46": 462410752.0,
+            "47": 462407680.0,
+            "48": 462408192.0,
+            "49": 462409728.0,
+            "50": 462409216.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1033451008.0,
+            "2": 1197371392.0,
+            "3": 1197371392.0,
+            "4": 1197880320.0,
+            "5": 1197880320.0,
+            "6": 1197880320.0,
+            "7": 1197880320.0,
+            "8": 1197880320.0,
+            "9": 1197965824.0,
+            "10": 1197965824.0,
+            "11": 1197965824.0,
+            "12": 1197965824.0,
+            "13": 1197965824.0,
+            "14": 1197965824.0,
+            "15": 1197965824.0,
+            "16": 1197965824.0,
+            "17": 1197965824.0,
+            "18": 1197965824.0,
+            "19": 1197965824.0,
+            "20": 1197965824.0,
+            "21": 1197965824.0,
+            "22": 1198112768.0,
+            "23": 1198112768.0,
+            "24": 1198112768.0,
+            "25": 1198112768.0,
+            "26": 1198112768.0,
+            "27": 1198112768.0,
+            "28": 1198112768.0,
+            "29": 1198112768.0,
+            "30": 1198112768.0,
+            "31": 1198112768.0,
+            "32": 1198112768.0,
+            "33": 1198112768.0,
+            "34": 1198112768.0,
+            "35": 1198112768.0,
+            "36": 1198112768.0,
+            "37": 1198112768.0,
+            "38": 1198112768.0,
+            "39": 1198112768.0,
+            "40": 1198333440.0,
+            "41": 1198333440.0,
+            "42": 1198333440.0,
+            "43": 1198444032.0,
+            "44": 1198444032.0,
+            "45": 1198444032.0,
+            "46": 1198444032.0,
+            "47": 1198444032.0,
+            "48": 1198444032.0,
+            "49": 1198444032.0,
+            "50": 1198444032.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 7.94206,
+            "2": 0.63285,
+            "3": 0.57182,
+            "4": 0.57773,
+            "5": 0.57872,
+            "6": 0.57643,
+            "7": 0.57659,
+            "8": 0.57355,
+            "9": 0.57629,
+            "10": 0.56867,
+            "11": 0.56833,
+            "12": 0.56771,
+            "13": 0.56904,
+            "14": 0.59894,
+            "15": 0.67038,
+            "16": 0.5641,
+            "17": 0.56097,
+            "18": 0.56495,
+            "19": 0.56059,
+            "20": 0.56119,
+            "21": 0.55935,
+            "22": 0.55364,
+            "23": 0.55179,
+            "24": 0.55091,
+            "25": 0.55193,
+            "26": 0.55148,
+            "27": 0.55193,
+            "28": 0.55181,
+            "29": 0.55146,
+            "30": 0.55214,
+            "31": 0.55292,
+            "32": 0.55164,
+            "33": 0.55141,
+            "34": 0.54974,
+            "35": 0.55215,
+            "36": 0.55139,
+            "37": 0.55509,
+            "38": 0.5519,
+            "39": 0.55134,
+            "40": 0.55133,
+            "41": 0.55143,
+            "42": 0.5495,
+            "43": 0.54991,
+            "44": 0.5491,
+            "45": 0.54833,
+            "46": 0.54896,
+            "47": 0.54857,
+            "48": 0.5489,
+            "49": 0.54919,
+            "50": 0.54936
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_lts_dgx_a100.json
index 5f0c72de4dd..4ebfff8da76 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_lts_dgx_a100.json
@@ -1 +1,287 @@
-{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.79288, "5": 10.828, "10": 10.76831, "15": 10.79073, "20": 10.7066, "25": 10.54061, "30": 10.39485, "35": 10.30488, "40": 10.13079, "45": 9.90236, "50": 9.97938}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 5601.0, "5": 6601.0, "10": 5254.0, "15": 6250.0, "20": 5969.0, "25": 5816.0, "30": 6181.0, "35": 6751.0, "40": 6831.0, "45": 6881.0, "50": 7684.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 458211840.0, "5": 458212864.0, "10": 458214912.0, "15": 458215424.0, "20": 458212864.0, "25": 458215424.0, "30": 458211840.0, "35": 458214400.0, "40": 458214912.0, "45": 458213888.0, "50": 458214912.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1016394240.0, "5": 1180904960.0, "10": 1180934144.0, "15": 1180990976.0, "20": 1180990976.0, "25": 1181222912.0, "30": 1181222912.0, "35": 1181468160.0, "40": 1181468160.0, "45": 1183467008.0, "50": 1183467008.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 19.9711, "5": 0.5845, "10": 0.59728, "15": 0.60305, "20": 0.58013, "25": 0.57491, "30": 0.5625, "35": 0.56368, "40": 0.56421, "45": 0.57011, "50": 0.58956}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.79175,
+            "2": 10.80907,
+            "3": 10.81011,
+            "4": 10.78146,
+            "5": 10.82288,
+            "6": 10.84057,
+            "7": 10.81192,
+            "8": 10.80005,
+            "9": 10.81667,
+            "10": 10.7688,
+            "11": 10.8618,
+            "12": 10.84042,
+            "13": 10.84452,
+            "14": 10.86421,
+            "15": 10.79157,
+            "16": 10.78199,
+            "17": 10.75122,
+            "18": 10.79446,
+            "19": 10.79523,
+            "20": 10.71001,
+            "21": 10.68811,
+            "22": 10.53736,
+            "23": 10.7066,
+            "24": 10.58865,
+            "25": 10.54662,
+            "26": 10.59492,
+            "27": 10.62142,
+            "28": 10.5969,
+            "29": 10.60036,
+            "30": 10.39407,
+            "31": 10.12951,
+            "32": 10.49684,
+            "33": 10.48779,
+            "34": 10.24347,
+            "35": 10.30461,
+            "36": 10.26056,
+            "37": 10.38859,
+            "38": 10.24848,
+            "39": 10.43799,
+            "40": 10.13303,
+            "41": 10.18651,
+            "42": 10.25823,
+            "43": 9.892,
+            "44": 10.02576,
+            "45": 9.90015,
+            "46": 9.88387,
+            "47": 10.19565,
+            "48": 9.91255,
+            "49": 9.60147,
+            "50": 9.97874
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 5656.0,
+            "2": 6018.0,
+            "3": 5790.0,
+            "4": 5941.0,
+            "5": 6476.0,
+            "6": 6653.0,
+            "7": 6287.0,
+            "8": 5875.0,
+            "9": 6239.0,
+            "10": 5453.0,
+            "11": 6936.0,
+            "12": 6711.0,
+            "13": 6655.0,
+            "14": 6814.0,
+            "15": 6233.0,
+            "16": 6533.0,
+            "17": 6397.0,
+            "18": 6112.0,
+            "19": 6678.0,
+            "20": 5837.0,
+            "21": 6403.0,
+            "22": 5715.0,
+            "23": 6744.0,
+            "24": 6051.0,
+            "25": 5811.0,
+            "26": 6104.0,
+            "27": 6484.0,
+            "28": 6884.0,
+            "29": 7253.0,
+            "30": 6047.0,
+            "31": 5593.0,
+            "32": 6625.0,
+            "33": 7054.0,
+            "34": 6104.0,
+            "35": 6712.0,
+            "36": 6684.0,
+            "37": 7523.0,
+            "38": 7273.0,
+            "39": 7620.0,
+            "40": 7062.0,
+            "41": 6895.0,
+            "42": 7426.0,
+            "43": 6713.0,
+            "44": 6664.0,
+            "45": 6681.0,
+            "46": 6923.0,
+            "47": 7705.0,
+            "48": 7248.0,
+            "49": 7331.0,
+            "50": 7527.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 462408192.0,
+            "2": 462406144.0,
+            "3": 462409728.0,
+            "4": 462406144.0,
+            "5": 462407680.0,
+            "6": 462408192.0,
+            "7": 462410752.0,
+            "8": 462410752.0,
+            "9": 462407168.0,
+            "10": 462410240.0,
+            "11": 462408192.0,
+            "12": 462408192.0,
+            "13": 462408704.0,
+            "14": 462409728.0,
+            "15": 462409728.0,
+            "16": 462407168.0,
+            "17": 462408704.0,
+            "18": 462408704.0,
+            "19": 462408704.0,
+            "20": 462408704.0,
+            "21": 462406144.0,
+            "22": 462412800.0,
+            "23": 462409216.0,
+            "24": 462408704.0,
+            "25": 462406144.0,
+            "26": 462410240.0,
+            "27": 462405120.0,
+            "28": 462408192.0,
+            "29": 462407168.0,
+            "30": 462406144.0,
+            "31": 462413312.0,
+            "32": 462408704.0,
+            "33": 462409216.0,
+            "34": 462406144.0,
+            "35": 462410240.0,
+            "36": 462407168.0,
+            "37": 462409728.0,
+            "38": 462408192.0,
+            "39": 462408192.0,
+            "40": 462407680.0,
+            "41": 462411264.0,
+            "42": 462409728.0,
+            "43": 462411264.0,
+            "44": 462407680.0,
+            "45": 462408704.0,
+            "46": 462410752.0,
+            "47": 462407680.0,
+            "48": 462408192.0,
+            "49": 462409728.0,
+            "50": 462409216.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1033451008.0,
+            "2": 1197371392.0,
+            "3": 1197371392.0,
+            "4": 1197880320.0,
+            "5": 1197880320.0,
+            "6": 1197880320.0,
+            "7": 1197880320.0,
+            "8": 1197880320.0,
+            "9": 1197965824.0,
+            "10": 1197965824.0,
+            "11": 1197965824.0,
+            "12": 1197965824.0,
+            "13": 1197965824.0,
+            "14": 1197965824.0,
+            "15": 1197965824.0,
+            "16": 1197965824.0,
+            "17": 1197965824.0,
+            "18": 1197965824.0,
+            "19": 1197965824.0,
+            "20": 1197965824.0,
+            "21": 1197965824.0,
+            "22": 1198112768.0,
+            "23": 1198112768.0,
+            "24": 1198112768.0,
+            "25": 1198112768.0,
+            "26": 1198112768.0,
+            "27": 1198112768.0,
+            "28": 1198112768.0,
+            "29": 1198112768.0,
+            "30": 1198112768.0,
+            "31": 1198112768.0,
+            "32": 1198112768.0,
+            "33": 1198112768.0,
+            "34": 1198112768.0,
+            "35": 1198112768.0,
+            "36": 1198112768.0,
+            "37": 1198112768.0,
+            "38": 1198112768.0,
+            "39": 1198112768.0,
+            "40": 1198333440.0,
+            "41": 1198333440.0,
+            "42": 1198333440.0,
+            "43": 1198444032.0,
+            "44": 1198444032.0,
+            "45": 1198444032.0,
+            "46": 1198444032.0,
+            "47": 1198444032.0,
+            "48": 1198444032.0,
+            "49": 1198444032.0,
+            "50": 1198444032.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 8.27777,
+            "2": 0.60806,
+            "3": 0.55409,
+            "4": 0.55324,
+            "5": 0.54815,
+            "6": 0.54698,
+            "7": 0.54712,
+            "8": 0.55008,
+            "9": 0.55718,
+            "10": 0.55527,
+            "11": 0.55082,
+            "12": 0.56208,
+            "13": 0.55625,
+            "14": 0.55717,
+            "15": 0.56582,
+            "16": 0.55953,
+            "17": 0.57188,
+            "18": 0.55508,
+            "19": 0.55956,
+            "20": 0.55934,
+            "21": 0.55676,
+            "22": 0.55842,
+            "23": 0.55867,
+            "24": 0.55987,
+            "25": 0.55941,
+            "26": 0.55642,
+            "27": 0.55364,
+            "28": 0.55209,
+            "29": 0.55397,
+            "30": 0.55602,
+            "31": 0.55344,
+            "32": 0.55195,
+            "33": 0.56308,
+            "34": 0.55588,
+            "35": 0.55251,
+            "36": 0.55314,
+            "37": 0.55563,
+            "38": 0.56708,
+            "39": 0.5661,
+            "40": 0.56725,
+            "41": 0.5663,
+            "42": 0.56565,
+            "43": 0.5725,
+            "44": 0.56736,
+            "45": 0.5674,
+            "46": 0.56751,
+            "47": 0.56642,
+            "48": 0.56257,
+            "49": 0.56841,
+            "50": 0.56452
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev.json
new file mode 100644
index 00000000000..2c1e24efd92
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.82004,
+            "2": 10.8392,
+            "3": 10.81124,
+            "4": 10.81983,
+            "5": 10.84794,
+            "6": 10.8608,
+            "7": 10.84085,
+            "8": 10.84432,
+            "9": 10.8504,
+            "10": 10.79461,
+            "11": 10.85658,
+            "12": 10.84848,
+            "13": 10.86929,
+            "14": 10.8667,
+            "15": 10.82911,
+            "16": 10.81111,
+            "17": 10.79027,
+            "18": 10.80981,
+            "19": 10.81143,
+            "20": 10.73175,
+            "21": 10.71285,
+            "22": 10.58199,
+            "23": 10.72,
+            "24": 10.61704,
+            "25": 10.57964,
+            "26": 10.63372,
+            "27": 10.6365,
+            "28": 10.60641,
+            "29": 10.61561,
+            "30": 10.40859,
+            "31": 10.17068,
+            "32": 10.49958,
+            "33": 10.4963,
+            "34": 10.25574,
+            "35": 10.31503,
+            "36": 10.28536,
+            "37": 10.38742,
+            "38": 10.24676,
+            "39": 10.44249,
+            "40": 10.14367,
+            "41": 10.19116,
+            "42": 10.25654,
+            "43": 9.90671,
+            "44": 10.02653,
+            "45": 9.914,
+            "46": 9.89613,
+            "47": 10.18885,
+            "48": 9.92993,
+            "49": 9.61419,
+            "50": 9.97565
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 12826.0,
+            "2": 14613.0,
+            "3": 14549.0,
+            "4": 13422.0,
+            "5": 15951.0,
+            "6": 16055.0,
+            "7": 15208.0,
+            "8": 12944.0,
+            "9": 15110.0,
+            "10": 12611.0,
+            "11": 16586.0,
+            "12": 14954.0,
+            "13": 15925.0,
+            "14": 16182.0,
+            "15": 14834.0,
+            "16": 16023.0,
+            "17": 15486.0,
+            "18": 15116.0,
+            "19": 15584.0,
+            "20": 13675.0,
+            "21": 13873.0,
+            "22": 12917.0,
+            "23": 16766.0,
+            "24": 13924.0,
+            "25": 13129.0,
+            "26": 14794.0,
+            "27": 15169.0,
+            "28": 16393.0,
+            "29": 16719.0,
+            "30": 14652.0,
+            "31": 13126.0,
+            "32": 15987.0,
+            "33": 17372.0,
+            "34": 14206.0,
+            "35": 15183.0,
+            "36": 15837.0,
+            "37": 17507.0,
+            "38": 16617.0,
+            "39": 17712.0,
+            "40": 16971.0,
+            "41": 16795.0,
+            "42": 17304.0,
+            "43": 15578.0,
+            "44": 15564.0,
+            "45": 16188.0,
+            "46": 17443.0,
+            "47": 19238.0,
+            "48": 16575.0,
+            "49": 16273.0,
+            "50": 18998.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 629738496.0,
+            "2": 629705216.0,
+            "3": 629710336.0,
+            "4": 629699584.0,
+            "5": 629732864.0,
+            "6": 629703168.0,
+            "7": 629718528.0,
+            "8": 629722624.0,
+            "9": 629763072.0,
+            "10": 629733888.0,
+            "11": 629810176.0,
+            "12": 629705728.0,
+            "13": 629848576.0,
+            "14": 629702144.0,
+            "15": 629870592.0,
+            "16": 629805568.0,
+            "17": 629698048.0,
+            "18": 629731328.0,
+            "19": 629798912.0,
+            "20": 629752832.0,
+            "21": 629716480.0,
+            "22": 629699584.0,
+            "23": 629705216.0,
+            "24": 629736448.0,
+            "25": 629699584.0,
+            "26": 629736960.0,
+            "27": 629704192.0,
+            "28": 629750272.0,
+            "29": 629728256.0,
+            "30": 629933568.0,
+            "31": 629847040.0,
+            "32": 629700096.0,
+            "33": 629703168.0,
+            "34": 629752832.0,
+            "35": 629725696.0,
+            "36": 629724160.0,
+            "37": 629702656.0,
+            "38": 629704192.0,
+            "39": 629733888.0,
+            "40": 629749760.0,
+            "41": 629700096.0,
+            "42": 629729280.0,
+            "43": 629699072.0,
+            "44": 629769728.0,
+            "45": 629713920.0,
+            "46": 629804544.0,
+            "47": 629719552.0,
+            "48": 629843456.0,
+            "49": 630007296.0,
+            "50": 629703168.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1847654400.0,
+            "2": 2077632000.0,
+            "3": 2078750208.0,
+            "4": 2078750208.0,
+            "5": 2079077888.0,
+            "6": 2079077888.0,
+            "7": 2080535040.0,
+            "8": 2080535040.0,
+            "9": 2080535040.0,
+            "10": 2080535040.0,
+            "11": 2080535040.0,
+            "12": 2080535040.0,
+            "13": 2080535040.0,
+            "14": 2080535040.0,
+            "15": 2080535040.0,
+            "16": 2080535040.0,
+            "17": 2080535040.0,
+            "18": 2080535040.0,
+            "19": 2080535040.0,
+            "20": 2080535040.0,
+            "21": 2080535040.0,
+            "22": 2080535040.0,
+            "23": 2080535040.0,
+            "24": 2080535040.0,
+            "25": 2080535040.0,
+            "26": 2080535040.0,
+            "27": 2080535040.0,
+            "28": 2080535040.0,
+            "29": 2080535040.0,
+            "30": 2080535040.0,
+            "31": 2080535040.0,
+            "32": 2080535040.0,
+            "33": 2080535040.0,
+            "34": 2080535040.0,
+            "35": 2080535040.0,
+            "36": 2080535040.0,
+            "37": 2080535040.0,
+            "38": 2080535040.0,
+            "39": 2080535040.0,
+            "40": 2080535040.0,
+            "41": 2080535040.0,
+            "42": 2080535040.0,
+            "43": 2080535040.0,
+            "44": 2080535040.0,
+            "45": 2080535040.0,
+            "46": 2080535040.0,
+            "47": 2080535040.0,
+            "48": 2080535040.0,
+            "49": 2080535040.0,
+            "50": 2080535040.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 11.85676,
+            "2": 0.38679,
+            "3": 0.33955,
+            "4": 0.34149,
+            "5": 0.33738,
+            "6": 0.3367,
+            "7": 0.33549,
+            "8": 0.33607,
+            "9": 0.33582,
+            "10": 0.33626,
+            "11": 0.33555,
+            "12": 0.33476,
+            "13": 0.33518,
+            "14": 0.33155,
+            "15": 0.33236,
+            "16": 0.33276,
+            "17": 0.33237,
+            "18": 0.33234,
+            "19": 0.33191,
+            "20": 0.33586,
+            "21": 0.334,
+            "22": 0.33303,
+            "23": 0.33299,
+            "24": 0.33339,
+            "25": 0.33301,
+            "26": 0.33368,
+            "27": 0.33213,
+            "28": 0.33242,
+            "29": 0.33251,
+            "30": 0.33285,
+            "31": 0.33233,
+            "32": 0.33175,
+            "33": 0.33133,
+            "34": 0.33224,
+            "35": 0.3317,
+            "36": 0.33247,
+            "37": 0.33189,
+            "38": 0.3324,
+            "39": 0.33157,
+            "40": 0.33224,
+            "41": 0.33218,
+            "42": 0.33335,
+            "43": 0.33381,
+            "44": 0.33284,
+            "45": 0.33399,
+            "46": 0.33341,
+            "47": 0.33248,
+            "48": 0.33301,
+            "49": 0.33248,
+            "50": 0.33145
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_lts.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_lts.json
new file mode 100644
index 00000000000..c1d418bd4a8
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_lts.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.82004,
+            "2": 10.8392,
+            "3": 10.81124,
+            "4": 10.81983,
+            "5": 10.84794,
+            "6": 10.8608,
+            "7": 10.84085,
+            "8": 10.84432,
+            "9": 10.8504,
+            "10": 10.79461,
+            "11": 10.85658,
+            "12": 10.84848,
+            "13": 10.86929,
+            "14": 10.8667,
+            "15": 10.82911,
+            "16": 10.81111,
+            "17": 10.79027,
+            "18": 10.80981,
+            "19": 10.81143,
+            "20": 10.73175,
+            "21": 10.71285,
+            "22": 10.58199,
+            "23": 10.72,
+            "24": 10.61704,
+            "25": 10.57964,
+            "26": 10.63372,
+            "27": 10.6365,
+            "28": 10.60641,
+            "29": 10.61561,
+            "30": 10.40859,
+            "31": 10.17068,
+            "32": 10.49958,
+            "33": 10.4963,
+            "34": 10.25574,
+            "35": 10.31503,
+            "36": 10.28536,
+            "37": 10.38742,
+            "38": 10.24676,
+            "39": 10.44249,
+            "40": 10.14367,
+            "41": 10.19116,
+            "42": 10.25654,
+            "43": 9.90671,
+            "44": 10.02653,
+            "45": 9.914,
+            "46": 9.89613,
+            "47": 10.18885,
+            "48": 9.92993,
+            "49": 9.61419,
+            "50": 9.97565
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 12826.0,
+            "2": 14613.0,
+            "3": 14549.0,
+            "4": 13422.0,
+            "5": 15951.0,
+            "6": 16055.0,
+            "7": 15208.0,
+            "8": 12944.0,
+            "9": 15110.0,
+            "10": 12611.0,
+            "11": 16586.0,
+            "12": 14954.0,
+            "13": 15925.0,
+            "14": 16182.0,
+            "15": 14834.0,
+            "16": 16023.0,
+            "17": 15486.0,
+            "18": 15116.0,
+            "19": 15584.0,
+            "20": 13675.0,
+            "21": 13873.0,
+            "22": 12917.0,
+            "23": 16766.0,
+            "24": 13924.0,
+            "25": 13129.0,
+            "26": 14794.0,
+            "27": 15169.0,
+            "28": 16393.0,
+            "29": 16719.0,
+            "30": 14652.0,
+            "31": 13126.0,
+            "32": 15987.0,
+            "33": 17372.0,
+            "34": 14206.0,
+            "35": 15183.0,
+            "36": 15837.0,
+            "37": 17507.0,
+            "38": 16617.0,
+            "39": 17712.0,
+            "40": 16971.0,
+            "41": 16795.0,
+            "42": 17304.0,
+            "43": 15578.0,
+            "44": 15564.0,
+            "45": 16188.0,
+            "46": 17443.0,
+            "47": 19238.0,
+            "48": 16575.0,
+            "49": 16273.0,
+            "50": 18998.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 629738496.0,
+            "2": 629705216.0,
+            "3": 629710336.0,
+            "4": 629699584.0,
+            "5": 629732864.0,
+            "6": 629703168.0,
+            "7": 629718528.0,
+            "8": 629722624.0,
+            "9": 629763072.0,
+            "10": 629733888.0,
+            "11": 629810176.0,
+            "12": 629705728.0,
+            "13": 629848576.0,
+            "14": 629702144.0,
+            "15": 629870592.0,
+            "16": 629805568.0,
+            "17": 629698048.0,
+            "18": 629731328.0,
+            "19": 629798912.0,
+            "20": 629752832.0,
+            "21": 629716480.0,
+            "22": 629699584.0,
+            "23": 629705216.0,
+            "24": 629736448.0,
+            "25": 629699584.0,
+            "26": 629736960.0,
+            "27": 629704192.0,
+            "28": 629750272.0,
+            "29": 629728256.0,
+            "30": 629933568.0,
+            "31": 629847040.0,
+            "32": 629700096.0,
+            "33": 629703168.0,
+            "34": 629752832.0,
+            "35": 629725696.0,
+            "36": 629724160.0,
+            "37": 629702656.0,
+            "38": 629704192.0,
+            "39": 629733888.0,
+            "40": 629749760.0,
+            "41": 629700096.0,
+            "42": 629729280.0,
+            "43": 629699072.0,
+            "44": 629769728.0,
+            "45": 629713920.0,
+            "46": 629804544.0,
+            "47": 629719552.0,
+            "48": 629843456.0,
+            "49": 630007296.0,
+            "50": 629703168.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1847654400.0,
+            "2": 2077632000.0,
+            "3": 2078750208.0,
+            "4": 2078750208.0,
+            "5": 2079077888.0,
+            "6": 2079077888.0,
+            "7": 2080535040.0,
+            "8": 2080535040.0,
+            "9": 2080535040.0,
+            "10": 2080535040.0,
+            "11": 2080535040.0,
+            "12": 2080535040.0,
+            "13": 2080535040.0,
+            "14": 2080535040.0,
+            "15": 2080535040.0,
+            "16": 2080535040.0,
+            "17": 2080535040.0,
+            "18": 2080535040.0,
+            "19": 2080535040.0,
+            "20": 2080535040.0,
+            "21": 2080535040.0,
+            "22": 2080535040.0,
+            "23": 2080535040.0,
+            "24": 2080535040.0,
+            "25": 2080535040.0,
+            "26": 2080535040.0,
+            "27": 2080535040.0,
+            "28": 2080535040.0,
+            "29": 2080535040.0,
+            "30": 2080535040.0,
+            "31": 2080535040.0,
+            "32": 2080535040.0,
+            "33": 2080535040.0,
+            "34": 2080535040.0,
+            "35": 2080535040.0,
+            "36": 2080535040.0,
+            "37": 2080535040.0,
+            "38": 2080535040.0,
+            "39": 2080535040.0,
+            "40": 2080535040.0,
+            "41": 2080535040.0,
+            "42": 2080535040.0,
+            "43": 2080535040.0,
+            "44": 2080535040.0,
+            "45": 2080535040.0,
+            "46": 2080535040.0,
+            "47": 2080535040.0,
+            "48": 2080535040.0,
+            "49": 2080535040.0,
+            "50": 2080535040.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 12.62289,
+            "2": 0.39639,
+            "3": 0.35696,
+            "4": 0.3674,
+            "5": 0.3577,
+            "6": 0.35509,
+            "7": 0.35808,
+            "8": 0.35462,
+            "9": 0.35492,
+            "10": 0.35391,
+            "11": 0.35546,
+            "12": 0.35515,
+            "13": 0.35432,
+            "14": 0.35491,
+            "15": 0.35335,
+            "16": 0.35396,
+            "17": 0.35304,
+            "18": 0.35201,
+            "19": 0.35198,
+            "20": 0.35194,
+            "21": 0.35412,
+            "22": 0.35208,
+            "23": 0.3541,
+            "24": 0.3519,
+            "25": 0.38526,
+            "26": 0.39355,
+            "27": 0.39377,
+            "28": 0.3934,
+            "29": 0.39406,
+            "30": 0.39393,
+            "31": 0.38905,
+            "32": 0.3893,
+            "33": 0.39052,
+            "34": 0.39004,
+            "35": 0.38977,
+            "36": 0.39027,
+            "37": 0.38961,
+            "38": 0.38929,
+            "39": 0.38944,
+            "40": 0.38968,
+            "41": 0.38938,
+            "42": 0.38964,
+            "43": 0.3909,
+            "44": 0.39131,
+            "45": 0.38436,
+            "46": 0.34892,
+            "47": 0.34845,
+            "48": 0.34853,
+            "49": 0.3474,
+            "50": 0.34681
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_lts_dgx_a100.json
index 1f335e82393..478bae6fdec 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_lts_dgx_a100.json
@@ -1 +1,287 @@
-{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.82416, "5": 10.84149, "10": 10.79041, "15": 10.8302, "20": 10.73076, "25": 10.57969, "30": 10.41029, "35": 10.31774, "40": 10.14481, "45": 9.91307, "50": 9.97569}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 12709.0, "5": 15699.0, "10": 12503.0, "15": 14756.0, "20": 13658.0, "25": 13295.0, "30": 14801.0, "35": 15233.0, "40": 16737.0, "45": 16130.0, "50": 19242.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 625788416.0, "5": 625793536.0, "10": 625788928.0, "15": 625790976.0, "20": 625793024.0, "25": 625789440.0, "30": 625790976.0, "35": 625789952.0, "40": 625793536.0, "45": 625790976.0, "50": 625793024.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1818507264.0, "5": 2049478144.0, "10": 2049478144.0, "15": 2050050048.0, "20": 2050050048.0, "25": 2050050048.0, "30": 2050050048.0, "35": 2050050048.0, "40": 2050050048.0, "45": 2050050048.0, "50": 2050050048.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 16.64702, "5": 0.34446, "10": 0.34582, "15": 0.34645, "20": 0.34853, "25": 0.34852, "30": 0.34703, "35": 0.34516, "40": 0.34201, "45": 0.34861, "50": 0.3491}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.82004,
+            "2": 10.8392,
+            "3": 10.81124,
+            "4": 10.81983,
+            "5": 10.84794,
+            "6": 10.8608,
+            "7": 10.84085,
+            "8": 10.84432,
+            "9": 10.8504,
+            "10": 10.79461,
+            "11": 10.85658,
+            "12": 10.84848,
+            "13": 10.86929,
+            "14": 10.8667,
+            "15": 10.82911,
+            "16": 10.81111,
+            "17": 10.79027,
+            "18": 10.80981,
+            "19": 10.81143,
+            "20": 10.73175,
+            "21": 10.71285,
+            "22": 10.58199,
+            "23": 10.72,
+            "24": 10.61704,
+            "25": 10.57964,
+            "26": 10.63372,
+            "27": 10.6365,
+            "28": 10.60641,
+            "29": 10.61561,
+            "30": 10.40859,
+            "31": 10.17068,
+            "32": 10.49958,
+            "33": 10.4963,
+            "34": 10.25574,
+            "35": 10.31503,
+            "36": 10.28536,
+            "37": 10.38742,
+            "38": 10.24676,
+            "39": 10.44249,
+            "40": 10.14367,
+            "41": 10.19116,
+            "42": 10.25654,
+            "43": 9.90671,
+            "44": 10.02653,
+            "45": 9.914,
+            "46": 9.89613,
+            "47": 10.18885,
+            "48": 9.92993,
+            "49": 9.61419,
+            "50": 9.97565
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 12826.0,
+            "2": 14613.0,
+            "3": 14549.0,
+            "4": 13422.0,
+            "5": 15951.0,
+            "6": 16055.0,
+            "7": 15208.0,
+            "8": 12944.0,
+            "9": 15110.0,
+            "10": 12611.0,
+            "11": 16586.0,
+            "12": 14954.0,
+            "13": 15925.0,
+            "14": 16182.0,
+            "15": 14834.0,
+            "16": 16023.0,
+            "17": 15486.0,
+            "18": 15116.0,
+            "19": 15584.0,
+            "20": 13675.0,
+            "21": 13873.0,
+            "22": 12917.0,
+            "23": 16766.0,
+            "24": 13924.0,
+            "25": 13129.0,
+            "26": 14794.0,
+            "27": 15169.0,
+            "28": 16393.0,
+            "29": 16719.0,
+            "30": 14652.0,
+            "31": 13126.0,
+            "32": 15987.0,
+            "33": 17372.0,
+            "34": 14206.0,
+            "35": 15183.0,
+            "36": 15837.0,
+            "37": 17507.0,
+            "38": 16617.0,
+            "39": 17712.0,
+            "40": 16971.0,
+            "41": 16795.0,
+            "42": 17304.0,
+            "43": 15578.0,
+            "44": 15564.0,
+            "45": 16188.0,
+            "46": 17443.0,
+            "47": 19238.0,
+            "48": 16575.0,
+            "49": 16273.0,
+            "50": 18998.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 629738496.0,
+            "2": 629705216.0,
+            "3": 629710336.0,
+            "4": 629699584.0,
+            "5": 629732864.0,
+            "6": 629703168.0,
+            "7": 629718528.0,
+            "8": 629722624.0,
+            "9": 629763072.0,
+            "10": 629733888.0,
+            "11": 629810176.0,
+            "12": 629705728.0,
+            "13": 629848576.0,
+            "14": 629702144.0,
+            "15": 629870592.0,
+            "16": 629805568.0,
+            "17": 629698048.0,
+            "18": 629731328.0,
+            "19": 629798912.0,
+            "20": 629752832.0,
+            "21": 629716480.0,
+            "22": 629699584.0,
+            "23": 629705216.0,
+            "24": 629736448.0,
+            "25": 629699584.0,
+            "26": 629736960.0,
+            "27": 629704192.0,
+            "28": 629750272.0,
+            "29": 629728256.0,
+            "30": 629933568.0,
+            "31": 629847040.0,
+            "32": 629700096.0,
+            "33": 629703168.0,
+            "34": 629752832.0,
+            "35": 629725696.0,
+            "36": 629724160.0,
+            "37": 629702656.0,
+            "38": 629704192.0,
+            "39": 629733888.0,
+            "40": 629749760.0,
+            "41": 629700096.0,
+            "42": 629729280.0,
+            "43": 629699072.0,
+            "44": 629769728.0,
+            "45": 629713920.0,
+            "46": 629804544.0,
+            "47": 629719552.0,
+            "48": 629843456.0,
+            "49": 630007296.0,
+            "50": 629703168.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1847654400.0,
+            "2": 2077632000.0,
+            "3": 2078750208.0,
+            "4": 2078750208.0,
+            "5": 2079077888.0,
+            "6": 2079077888.0,
+            "7": 2080535040.0,
+            "8": 2080535040.0,
+            "9": 2080535040.0,
+            "10": 2080535040.0,
+            "11": 2080535040.0,
+            "12": 2080535040.0,
+            "13": 2080535040.0,
+            "14": 2080535040.0,
+            "15": 2080535040.0,
+            "16": 2080535040.0,
+            "17": 2080535040.0,
+            "18": 2080535040.0,
+            "19": 2080535040.0,
+            "20": 2080535040.0,
+            "21": 2080535040.0,
+            "22": 2080535040.0,
+            "23": 2080535040.0,
+            "24": 2080535040.0,
+            "25": 2080535040.0,
+            "26": 2080535040.0,
+            "27": 2080535040.0,
+            "28": 2080535040.0,
+            "29": 2080535040.0,
+            "30": 2080535040.0,
+            "31": 2080535040.0,
+            "32": 2080535040.0,
+            "33": 2080535040.0,
+            "34": 2080535040.0,
+            "35": 2080535040.0,
+            "36": 2080535040.0,
+            "37": 2080535040.0,
+            "38": 2080535040.0,
+            "39": 2080535040.0,
+            "40": 2080535040.0,
+            "41": 2080535040.0,
+            "42": 2080535040.0,
+            "43": 2080535040.0,
+            "44": 2080535040.0,
+            "45": 2080535040.0,
+            "46": 2080535040.0,
+            "47": 2080535040.0,
+            "48": 2080535040.0,
+            "49": 2080535040.0,
+            "50": 2080535040.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.46534,
+            "2": 0.4102,
+            "3": 0.34219,
+            "4": 0.34093,
+            "5": 0.34255,
+            "6": 0.33706,
+            "7": 0.33661,
+            "8": 0.33616,
+            "9": 0.33565,
+            "10": 0.33568,
+            "11": 0.33538,
+            "12": 0.33457,
+            "13": 0.33488,
+            "14": 0.33416,
+            "15": 0.33683,
+            "16": 0.33684,
+            "17": 0.33708,
+            "18": 0.33815,
+            "19": 0.33805,
+            "20": 0.33696,
+            "21": 0.33675,
+            "22": 0.33623,
+            "23": 0.33752,
+            "24": 0.33699,
+            "25": 0.3409,
+            "26": 0.33513,
+            "27": 0.33524,
+            "28": 0.33491,
+            "29": 0.33714,
+            "30": 0.33571,
+            "31": 0.33638,
+            "32": 0.33629,
+            "33": 0.3369,
+            "34": 0.33685,
+            "35": 0.33651,
+            "36": 0.33539,
+            "37": 0.33561,
+            "38": 0.33636,
+            "39": 0.33558,
+            "40": 0.3356,
+            "41": 0.33618,
+            "42": 0.33669,
+            "43": 0.33535,
+            "44": 0.3362,
+            "45": 0.3354,
+            "46": 0.33686,
+            "47": 0.33486,
+            "48": 0.33657,
+            "49": 0.33563,
+            "50": 0.33513
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgx_h100.json
index 51d434d10cf..1a9705f8181 100644
--- a/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgx_h100.json
@@ -174,5 +174,5 @@
       -0.5394397377967834
     ]
   },
-  "throughput": 25.35687538450034
+  "throughput": [25.35687538450034, 25.35687538450034]
 }
diff --git a/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgx_h100.json
index 711eeddfb25..7370d3c6bb0 100644
--- a/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgx_h100.json
@@ -157,5 +157,5 @@
    -0.5394397377967834
   ]
  },
- "throughput": 6.543502517233578
+ "throughput": [6.543502517233578, 6.543502517233578]
 }
\ No newline at end of file
diff --git a/tests/test_utils/python_scripts/download_golden_values.py b/tests/test_utils/python_scripts/download_golden_values.py
index 650867f231f..e2294b32fbb 100644
--- a/tests/test_utils/python_scripts/download_golden_values.py
+++ b/tests/test_utils/python_scripts/download_golden_values.py
@@ -1,3 +1,5 @@
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
 import logging
 import os
 import pathlib
@@ -91,8 +93,6 @@ def main(pipeline_id: int, only_failing: bool):
                 continue
 
             golden_values_source_name = golden_values_source.name
-            golden_values_source_name = golden_values_source_name.replace("_dgx_h100", "")
-            golden_values_source_name = golden_values_source_name.replace("_dgx_a100", "")
             golden_values_source_name = golden_values_source_name.replace(
                 "generations", "golden_values"
             )
diff --git a/tests/test_utils/python_scripts/launch_jet_workload.py b/tests/test_utils/python_scripts/launch_jet_workload.py
index 7dc4a7357a7..846ca8a1521 100644
--- a/tests/test_utils/python_scripts/launch_jet_workload.py
+++ b/tests/test_utils/python_scripts/launch_jet_workload.py
@@ -1,3 +1,5 @@
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
 import json
 import logging
 import os
@@ -289,6 +291,7 @@ def is_flaky_failure(concat_allranks_logs: str) -> bool:
         or "unspecified launch failure" in concat_allranks_logs
         or "free(): corrupted unsorted chunks" in concat_allranks_logs
         or "Segfault encountered" in concat_allranks_logs
+        or "Fatal glibc error" in concat_allranks_logs
     )
 
 
diff --git a/tests/unit_tests/inference/contexts/test_dynamic_context.py b/tests/unit_tests/inference/contexts/test_dynamic_context.py
index 1cd9d66ece1..0674cdfcabd 100644
--- a/tests/unit_tests/inference/contexts/test_dynamic_context.py
+++ b/tests/unit_tests/inference/contexts/test_dynamic_context.py
@@ -12,6 +12,7 @@
 )
 from megatron.core.inference.inference_request import DynamicInferenceRequest
 from megatron.core.inference.sampling_params import SamplingParams
+from megatron.core.ssm.mamba_hybrid_layer_allocation import Symbols
 from megatron.core.tensor_parallel.random import model_parallel_cuda_manual_seed
 from tests.unit_tests.test_utilities import Utils
 
@@ -42,12 +43,19 @@ def _get_dynamic_context(
         max_sequence_length,
         buffer_size_gb,
         block_size_tokens,
-        buffer_guarenteed_fraction,
+        buffer_guaranteed_fraction,
         buffer_overflow_factor,
         max_requests_override,
         max_tokens_override,
+        is_hybrid_model=False,
+        layer_type_list=None,
+        rounder=64,
     ):
-        set_rounder(64)
+        set_rounder(rounder)
+
+        if is_hybrid_model and layer_type_list is None:
+            layer_type_list = [Symbols.MAMBA, Symbols.MLP, Symbols.ATTENTION, Symbols.MLP]
+
         dynamic_context = DynamicInferenceContext(
             params_dtype=params_dtype,
             num_layers=num_layers,
@@ -55,23 +63,27 @@ def _get_dynamic_context(
             num_attention_heads=num_attention_heads,
             max_sequence_length=max_sequence_length,
             num_cuda_graphs=None,
+            use_cuda_graphs_for_non_decode_steps=not is_hybrid_model,
             buffer_size_gb=buffer_size_gb,
-            buffer_guaranteed_fraction=buffer_guarenteed_fraction,
+            buffer_guaranteed_fraction=buffer_guaranteed_fraction,
             block_size_tokens=block_size_tokens,
             buffer_overflow_factor=buffer_overflow_factor,
             max_requests_override=max_requests_override,
             max_tokens_override=max_tokens_override,
+            layer_type_list=layer_type_list,
+            mamba_conv_states_shape=(544, 4),
+            mamba_ssm_states_shape=(8, 64, 16),
             use_flashinfer_fused_rope=None,  # default to using flash-infer if available
             # this is for compatibility with the LTS environment
         )
         return dynamic_context
 
     def teardown_method(self, method):
-        set_rounder(64)
         Utils.destroy_model_parallel()
 
     @pytest.mark.internal
-    def test_initialize_dynamic_context(self):
+    @pytest.mark.parametrize("is_hybrid_model", [False, True])
+    def test_initialize_dynamic_context(self, is_hybrid_model: bool):
         self._setup_model_parallel_group(1, 1)
 
         dynamic_context = self._get_dynamic_context(
@@ -81,18 +93,30 @@ def test_initialize_dynamic_context(self):
             num_attention_heads=2,
             max_sequence_length=512,
             buffer_size_gb=0.03,
-            buffer_guarenteed_fraction=0.1,
+            buffer_guaranteed_fraction=0.1,
             block_size_tokens=128,
             max_requests_override=None,
             max_tokens_override=None,
             buffer_overflow_factor=None,
+            is_hybrid_model=is_hybrid_model,
         )
 
-        assert dynamic_context.gtd_block_count == 48
-        assert dynamic_context.gtd_request_count == 12
-        assert dynamic_context.block_allocator.block_count_total == 491
-        assert dynamic_context.max_requests == 128
-        assert dynamic_context.max_tokens == 62848
+        if not is_hybrid_model:
+            assert dynamic_context.gtd_block_count == 48
+            assert dynamic_context.gtd_request_count == 12
+            assert dynamic_context.block_allocator.block_count_total == 491
+            assert dynamic_context.max_requests == 128
+            assert dynamic_context.max_tokens == 62848
+            assert dynamic_context.num_mamba_layers == 0
+            assert dynamic_context.mamba_metadata is None
+        else:
+            assert dynamic_context.gtd_block_count == 112
+            assert dynamic_context.gtd_request_count == 28
+            assert dynamic_context.block_allocator.block_count_total == 1156
+            assert dynamic_context.max_requests == 320
+            assert dynamic_context.max_tokens == 154176
+            assert dynamic_context.num_mamba_layers == 1
+            assert dynamic_context.mamba_metadata is not None
 
         # Check initializations to -1
         assert torch.all(dynamic_context.request_ids == -1)
@@ -100,32 +124,38 @@ def test_initialize_dynamic_context(self):
     @pytest.mark.internal
     def test_is_static_batching(self):
         self._setup_model_parallel_group(1, 1)
-        dynamic_context = DynamicInferenceContext(
+        dynamic_context = self._get_dynamic_context(
             params_dtype=torch.float32,
             num_layers=2,
             kv_channels=64,
             num_attention_heads=8,
             max_sequence_length=512,
-            num_cuda_graphs=None,
             buffer_size_gb=1.0,
             buffer_guaranteed_fraction=0.1,
             block_size_tokens=128,
+            max_requests_override=None,
+            max_tokens_override=None,
+            buffer_overflow_factor=None,
         )
         assert not dynamic_context.is_static_batching()
 
     @pytest.mark.internal
-    def test_is_memory_available(self):
+    @pytest.mark.parametrize("is_hybrid_model", [False, True])
+    def test_is_memory_available(self, is_hybrid_model):
         self._setup_model_parallel_group(1, 1)
-        dynamic_context = DynamicInferenceContext(
+        dynamic_context = self._get_dynamic_context(
             params_dtype=torch.float32,
             num_layers=2,
             kv_channels=64,
             num_attention_heads=8,
             max_sequence_length=512,
-            num_cuda_graphs=None,
             buffer_size_gb=1.0,
             buffer_guaranteed_fraction=0.1,
             block_size_tokens=128,
+            max_requests_override=None,
+            max_tokens_override=None,
+            buffer_overflow_factor=None,
+            is_hybrid_model=is_hybrid_model,
         )
         dynamic_context.block_allocator.block_count_avail = 10
         assert dynamic_context.block_allocator.is_memory_available(10)
@@ -141,19 +171,24 @@ def test_is_memory_available(self):
         assert not dynamic_context.block_allocator.is_memory_available(6, safe=True)
 
     @pytest.mark.internal
-    def test_request_overflow(self):
+    @pytest.mark.parametrize("is_hybrid_model", [False, True])
+    def test_request_overflow(self, is_hybrid_model: bool):
         self._setup_model_parallel_group(1, 1)
-        set_rounder(1)
-        dynamic_context = DynamicInferenceContext(
+
+        dynamic_context = self._get_dynamic_context(
             params_dtype=torch.float32,
             num_layers=2,
             kv_channels=64,
             num_attention_heads=8,
             max_sequence_length=128,
-            num_cuda_graphs=None,
             buffer_size_gb=0.01,
             buffer_guaranteed_fraction=0.1,
             block_size_tokens=32,
+            max_requests_override=None,
+            max_tokens_override=None,
+            buffer_overflow_factor=None,
+            rounder=1,
+            is_hybrid_model=is_hybrid_model,
         )
         with pytest.raises(RequestOverflowError):
             for i in range(dynamic_context.max_requests + 1):
@@ -168,22 +203,24 @@ def test_request_overflow(self):
                 )  # Adding more than allowed requests
 
     @pytest.mark.internal
-    def test_token_overflow_error(self):
+    @pytest.mark.parametrize("is_hybrid_model", [False, True])
+    def test_token_overflow_error(self, is_hybrid_model: bool):
         self._setup_model_parallel_group(1, 1)
-        set_rounder(1)
-        dynamic_context = DynamicInferenceContext(
+
+        dynamic_context = self._get_dynamic_context(
             params_dtype=torch.float32,
             num_layers=2,
             kv_channels=64,
             num_attention_heads=8,
             max_sequence_length=512,
-            num_cuda_graphs=None,
             buffer_size_gb=0.1,
             buffer_guaranteed_fraction=0.1,
             block_size_tokens=128,
             buffer_overflow_factor=1.0,
             max_requests_override=2,
             max_tokens_override=20,  # Setting a very low token limit
+            rounder=1,
+            is_hybrid_model=is_hybrid_model,
         )
 
         with pytest.raises(TokenOverflowError):
@@ -198,18 +235,23 @@ def test_token_overflow_error(self):
             )  # Exceeding max token count
 
     @pytest.mark.internal
-    def test_reset(self):
+    @pytest.mark.parametrize("is_hybrid_model", [False, True])
+    def test_reset(self, is_hybrid_model: bool):
         self._setup_model_parallel_group(1, 1)
-        dynamic_context = DynamicInferenceContext(
+
+        dynamic_context = self._get_dynamic_context(
             params_dtype=torch.float32,
             num_layers=2,
             kv_channels=64,
             num_attention_heads=8,
             max_sequence_length=128,
-            num_cuda_graphs=None,
             buffer_size_gb=1.0,
             buffer_guaranteed_fraction=0.1,
             block_size_tokens=128,
+            max_requests_override=None,
+            max_tokens_override=None,
+            buffer_overflow_factor=None,
+            is_hybrid_model=is_hybrid_model,
         )
 
         # Initialize all variables
@@ -234,6 +276,9 @@ def test_reset(self):
         dynamic_context.block_allocator.block_count_avail = 5
         dynamic_context.memory_buffer.fill_(1)
         dynamic_context.request_to_kv_block_ids.fill_(1)
+        if is_hybrid_model:
+            dynamic_context.mamba_conv_states.fill_(1)
+            dynamic_context.mamba_ssm_states.fill_(1)
 
         # Call reset
         dynamic_context.reset()
@@ -262,9 +307,14 @@ def test_reset(self):
             == dynamic_context.block_allocator.block_count_total - 1
         )
         assert torch.all(dynamic_context.request_to_kv_block_ids == -1)
+        if is_hybrid_model:
+            assert torch.all(dynamic_context.mamba_metadata.request_to_mamba_state_idx == -1)
+            assert torch.all(dynamic_context.mamba_conv_states == 0)
+            assert torch.all(dynamic_context.mamba_ssm_states == 0)
 
     @pytest.mark.internal
-    def test_allocate_and_release_memory_blocks(self):
+    @pytest.mark.parametrize("is_hybrid_model", [False, True])
+    def test_allocate_and_release_memory_blocks(self, is_hybrid_model):
         self._setup_model_parallel_group(1, 1)
         dynamic_context = self._get_dynamic_context(
             params_dtype=torch.float32,
@@ -273,23 +323,38 @@ def test_allocate_and_release_memory_blocks(self):
             num_attention_heads=2,
             max_sequence_length=512,
             buffer_size_gb=0.03,
-            buffer_guarenteed_fraction=0.1,
+            buffer_guaranteed_fraction=0.1,
             block_size_tokens=128,
             max_requests_override=None,
             max_tokens_override=None,
             buffer_overflow_factor=None,
+            is_hybrid_model=is_hybrid_model,
         )
 
-        assert dynamic_context.block_allocator.allocate_memory_blocks(
-            4
-        ).cpu().detach().numpy().tolist() == [486, 487, 488, 489]
-        assert dynamic_context.block_allocator.block_count_avail == 486
+        if is_hybrid_model:
+            expected_memory_blocks = [1151, 1152, 1153, 1154]
+        else:
+            expected_memory_blocks = [486, 487, 488, 489]
+        expected_block_count_avail = expected_memory_blocks[0]
+
+        assert (
+            dynamic_context.block_allocator.allocate_memory_blocks(4)
+            .cpu()
+            .detach()
+            .numpy()
+            .tolist()
+            == expected_memory_blocks
+        )
+        assert dynamic_context.block_allocator.block_count_avail == expected_block_count_avail
         dynamic_context.block_allocator.release_memory_blocks(
-            torch.tensor([488, 489], device='cuda')
+            torch.tensor(expected_memory_blocks[-2:], device='cuda')
         )
-        assert dynamic_context.block_allocator.block_count_avail == 488
-        assert dynamic_context.block_allocator.allocate_memory_blocks(1).item() == 489
-        assert dynamic_context.block_allocator.block_count_avail == 487
+        assert dynamic_context.block_allocator.block_count_avail == expected_block_count_avail + 2
+        assert (
+            dynamic_context.block_allocator.allocate_memory_blocks(1).item()
+            == expected_memory_blocks[-1]
+        )
+        assert dynamic_context.block_allocator.block_count_avail == expected_block_count_avail + 1
         # Should return None since we allocate more blocks than what we have.
         assert (
             dynamic_context.block_allocator.allocate_memory_blocks(
@@ -299,8 +364,10 @@ def test_allocate_and_release_memory_blocks(self):
         )
 
     @pytest.mark.internal
-    def test_add_request(self):
+    @pytest.mark.parametrize("is_hybrid_model", [False, True])
+    def test_add_request(self, is_hybrid_model: bool):
         self._setup_model_parallel_group(1, 1)
+
         dynamic_context = self._get_dynamic_context(
             params_dtype=torch.float32,
             num_layers=4,
@@ -308,11 +375,12 @@ def test_add_request(self):
             num_attention_heads=2,
             max_sequence_length=512,
             buffer_size_gb=0.03,
-            buffer_guarenteed_fraction=0.1,
+            buffer_guaranteed_fraction=0.1,
             block_size_tokens=128,
             max_requests_override=None,
             max_tokens_override=None,
             buffer_overflow_factor=None,
+            is_hybrid_model=is_hybrid_model,
         )
         assert dynamic_context.block_size_tokens == 128
         context_length = 144
@@ -331,14 +399,10 @@ def test_add_request(self):
         assert torch.all(dynamic_context.request_ids[1:] == -1)
         assert dynamic_context.request_query_lengths[0] == context_length
         assert dynamic_context.request_kv_length_offsets[0] == 0
-        assert dynamic_context.request_to_kv_block_ids[0].cpu().detach().numpy().tolist() == [
-            488,
-            489,
-            -1,
-            -1,
-        ]
         assert dynamic_context.request_kv_block_counts[0] == 2
-        assert dynamic_context.request_last_kv_block_id[0] == 489
+        assert dynamic_context.request_last_kv_block_id[0].item() == (
+            1154 if is_hybrid_model else 489
+        )
         assert dynamic_context.request_last_kv_block_offset[0].item() == 15
         assert torch.all(
             dynamic_context.token_to_pos_ids[0:context_length]
@@ -352,17 +416,22 @@ def test_add_request(self):
             dynamic_context.token_to_position_in_request[0:context_length]
             == torch.arange(0, context_length, dtype=torch.long, device='cuda')
         )
+
+        # Verify token_to_block_idx and token_to_local_position_within_kv_block based on assigned blocks
+        first_block_id = dynamic_context.request_to_kv_block_ids[0, 0]
+        second_block_id = dynamic_context.request_to_kv_block_ids[0, 1]
+
         assert torch.all(
             dynamic_context.token_to_block_idx[0:context_length][
                 0 : dynamic_context.block_size_tokens
             ]
-            == 488
+            == first_block_id
         )
         assert torch.all(
             dynamic_context.token_to_block_idx[0:context_length][
                 dynamic_context.block_size_tokens : context_length
             ]
-            == 489
+            == second_block_id
         )
         assert torch.all(
             dynamic_context.token_to_local_position_within_kv_block[0:context_length]
@@ -371,8 +440,10 @@ def test_add_request(self):
         )
 
     @pytest.mark.internal
-    def test_update_request(self):
+    @pytest.mark.parametrize("is_hybrid_model", [False, True])
+    def test_update_request(self, is_hybrid_model: bool):
         self._setup_model_parallel_group(1, 1)
+
         dynamic_context = self._get_dynamic_context(
             params_dtype=torch.float32,
             num_layers=4,
@@ -380,11 +451,12 @@ def test_update_request(self):
             num_attention_heads=2,
             max_sequence_length=512,
             buffer_size_gb=0.03,
-            buffer_guarenteed_fraction=0.1,
+            buffer_guaranteed_fraction=0.1,
             block_size_tokens=128,
             max_requests_override=None,
             max_tokens_override=None,
             buffer_overflow_factor=None,
+            is_hybrid_model=is_hybrid_model,
         )
 
         # This case should just reset and return since all requests are finished
@@ -394,10 +466,19 @@ def test_update_request(self):
         dynamic_context.request_kv_block_counts[0:3] = 1
         new_block_ids = dynamic_context.block_allocator.allocate_memory_blocks(3, safe=True)
         dynamic_context.request_to_kv_block_ids[0:3, 0] = new_block_ids
+
+        if is_hybrid_model:
+            # Also initialize Mamba states for the dummy requests
+            dynamic_context.mamba_conv_states[:, 0:3, :, :].fill_(1.0)
+            dynamic_context.mamba_ssm_states[:, 0:3, :, :, :].fill_(1.0)
+
         dynamic_context.update_requests(
             active_requests_mask=active_requests_mask, new_tokens=torch.tensor([0, 1, 2])
         )
         assert dynamic_context.total_request_count == 0
+        if is_hybrid_model:
+            assert torch.all(dynamic_context.mamba_conv_states == 0)
+            assert torch.all(dynamic_context.mamba_ssm_states == 0)
 
         # This case would cover all cases
         # 1. Already there will be 2 paused requests
@@ -406,9 +487,9 @@ def test_update_request(self):
         # 4. Some of these requests will be resumed.
         # Setup is as follows :
         # Request ids 0, 1 are paused
-        # Request ids 2 , 4, 9 are active requests
+        # Request ids 2, 4, 9 are active requests
         # Request ids 3 7 8 have completed
-        # Request ids 5 and 6 will require on more block later on coz they finished their current block
+        # Request ids 5 and 6 will require on more block later on because they finished their current block
 
         dynamic_context = self._get_dynamic_context(
             params_dtype=torch.float32,
@@ -417,11 +498,12 @@ def test_update_request(self):
             num_attention_heads=2,
             max_sequence_length=512,
             buffer_size_gb=0.03,
-            buffer_guarenteed_fraction=0.1,
+            buffer_guaranteed_fraction=0.1,
             block_size_tokens=128,
             max_requests_override=None,
             max_tokens_override=None,
             buffer_overflow_factor=None,
+            is_hybrid_model=is_hybrid_model,
         )
 
         active_requests_mask = torch.Tensor([1, 0, 1, 1, 1, 0, 0, 1]).cuda().int()
@@ -472,6 +554,14 @@ def test_update_request(self):
         dynamic_context.request_last_kv_block_offset[0:2] = dynamic_context.block_size_tokens - 1
         dynamic_context.request_last_kv_block_offset[5:7] = dynamic_context.block_size_tokens - 1
 
+        if is_hybrid_model:
+            # Dummy fill for states to be non-zero before update
+            for i in range(total_request_count):
+                dynamic_context.mamba_metadata.request_to_mamba_state_idx[i] = i
+            dynamic_context.mamba_metadata.mamba_state_free_slot_count -= total_request_count
+            dynamic_context.mamba_conv_states[:, 0:total_request_count, :, :] = 1.0
+            dynamic_context.mamba_ssm_states[:, 0:total_request_count, :, :, :] = 1.0
+
         dynamic_context.update_requests(
             active_requests_mask=active_requests_mask, new_tokens=next_tokens
         )
@@ -522,28 +612,49 @@ def test_update_request(self):
 
         # The first 4 requests will require an extra block.
         # Since 3 requests have finished, the last 3 rows should be all -1.
-        assert torch.all(
-            dynamic_context.request_to_kv_block_ids[0:10].cpu()
-            == torch.tensor(
-                [
-                    [479, 482, -1, -1],
-                    [480, 479, -1, -1],
-                    [484, 486, -1, -1],
-                    [485, 487, -1, -1],
-                    [483, -1, -1, -1],
-                    [481, -1, -1, -1],
-                    [488, -1, -1, -1],
-                    [-1, -1, -1, -1],
-                    [-1, -1, -1, -1],
-                    [-1, -1, -1, -1],
-                ]
+        if is_hybrid_model:
+            assert torch.all(
+                dynamic_context.request_to_kv_block_ids[0:10].cpu()
+                == torch.tensor(
+                    [
+                        [1144, 1147, -1, -1],
+                        [1145, 1144, -1, -1],
+                        [1149, 1151, -1, -1],
+                        [1150, 1152, -1, -1],
+                        [1148, -1, -1, -1],
+                        [1146, -1, -1, -1],
+                        [1153, -1, -1, -1],
+                        [-1, -1, -1, -1],
+                        [-1, -1, -1, -1],
+                        [-1, -1, -1, -1],
+                    ]
+                )
+            )
+        else:
+            assert torch.all(
+                dynamic_context.request_to_kv_block_ids[0:10].cpu()
+                == torch.tensor(
+                    [
+                        [479, 482, -1, -1],
+                        [480, 479, -1, -1],
+                        [484, 486, -1, -1],
+                        [485, 487, -1, -1],
+                        [483, -1, -1, -1],
+                        [481, -1, -1, -1],
+                        [488, -1, -1, -1],
+                        [-1, -1, -1, -1],
+                        [-1, -1, -1, -1],
+                        [-1, -1, -1, -1],
+                    ]
+                )
             )
-        )
 
     @pytest.mark.internal
-    def test_release_memory_blocks_for_finished_requests(self):
+    @pytest.mark.parametrize("is_hybrid_model", [False, True])
+    def test_release_memory_blocks_for_finished_requests(self, is_hybrid_model):
         """Test that memory blocks are correctly released for finished requests."""
         self._setup_model_parallel_group(1, 1)
+
         dynamic_context = self._get_dynamic_context(
             params_dtype=torch.float32,
             num_layers=4,
@@ -551,11 +662,12 @@ def test_release_memory_blocks_for_finished_requests(self):
             num_attention_heads=2,
             max_sequence_length=512,
             buffer_size_gb=0.03,
-            buffer_guarenteed_fraction=0.1,
+            buffer_guaranteed_fraction=0.1,
             block_size_tokens=128,
             max_requests_override=None,
             max_tokens_override=None,
             buffer_overflow_factor=None,
+            is_hybrid_model=is_hybrid_model,
         )
 
         # Set up the initial state with 5 requests
@@ -572,6 +684,13 @@ def test_release_memory_blocks_for_finished_requests(self):
             dynamic_context.request_to_kv_block_ids[i, 0] = initial_blocks[i]
             dynamic_context.request_query_lengths[i] = 1
             dynamic_context.request_ids[i] = i
+            if is_hybrid_model:
+                dynamic_context.mamba_conv_states[:, i, :, :].fill_(
+                    float(i + 1)
+                )  # Fill with distinct values
+                dynamic_context.mamba_ssm_states[:, i, :, :, :].fill_(float(i + 1))
+                dynamic_context.mamba_metadata.request_to_mamba_state_idx[i] = i
+                dynamic_context.mamba_metadata.mamba_state_free_slot_count -= 1
 
         # Create an active_requests_mask where requests 0, 2, and 4 are finished (0),
         # and requests 1 and 3 are still active (1)
@@ -591,10 +710,26 @@ def test_release_memory_blocks_for_finished_requests(self):
         # Verify that 3 blocks were released by checking the available blocks
         assert dynamic_context.block_allocator.block_count_avail == initial_available_blocks + 3
 
+        if is_hybrid_model:
+            # Request at position 3 now moves into finished request position 0
+            # Request at position 1 remains active
+            mamba_idx = {
+                i: dynamic_context.mamba_metadata.request_to_mamba_state_idx[i] for i in range(5)
+            }
+            assert torch.all(dynamic_context.mamba_conv_states[:, mamba_idx[0], :, :] == 4.0)
+            assert torch.all(dynamic_context.mamba_ssm_states[:, mamba_idx[0], :, :, :] == 4.0)
+            assert torch.all(dynamic_context.mamba_conv_states[:, mamba_idx[1], :, :] == 2.0)
+            assert torch.all(dynamic_context.mamba_ssm_states[:, mamba_idx[1], :, :, :] == 2.0)
+            assert mamba_idx[2] == -1
+            assert mamba_idx[3] == -1
+            assert mamba_idx[4] == -1
+
     @pytest.mark.internal
-    def test_finished_requests_with_multiple_blocks(self):
+    @pytest.mark.parametrize("is_hybrid_model", [False, True])
+    def test_finished_requests_with_multiple_blocks(self, is_hybrid_model):
         """Test that all memory blocks are correctly released for finished requests that use multiple blocks."""
         self._setup_model_parallel_group(1, 1)
+
         dynamic_context = self._get_dynamic_context(
             params_dtype=torch.float32,
             num_layers=4,
@@ -602,11 +737,12 @@ def test_finished_requests_with_multiple_blocks(self):
             num_attention_heads=2,
             max_sequence_length=512,
             buffer_size_gb=0.03,
-            buffer_guarenteed_fraction=0.1,
+            buffer_guaranteed_fraction=0.1,
             block_size_tokens=128,
             max_requests_override=None,
             max_tokens_override=None,
             buffer_overflow_factor=None,
+            is_hybrid_model=is_hybrid_model,
         )
 
         # Set up the initial state with 3 requests, where some use multiple blocks
@@ -638,6 +774,9 @@ def test_finished_requests_with_multiple_blocks(self):
         for i in range(3):
             dynamic_context.request_query_lengths[i] = 1
             dynamic_context.request_ids[i] = i
+            if is_hybrid_model:
+                dynamic_context.mamba_conv_states[:, i, :, :].fill_(float(i + 1))
+                dynamic_context.mamba_ssm_states[:, i, :, :, :].fill_(float(i + 1))
 
         # Create an active_requests_mask where all requests are finished
         active_requests_mask = torch.tensor([0, 0, 0], device=torch.cuda.current_device())
@@ -655,6 +794,92 @@ def test_finished_requests_with_multiple_blocks(self):
         # Verify that all 6 blocks were released by checking the available blocks
         assert dynamic_context.block_allocator.block_count_avail == initial_available_blocks + 6
 
+        if is_hybrid_model:
+            # All mamba states should be zeroed out
+            assert torch.all(dynamic_context.mamba_conv_states == 0)
+            assert torch.all(dynamic_context.mamba_ssm_states == 0)
+
+    @pytest.mark.internal
+    @pytest.mark.parametrize("is_hybrid_model", [False, True])
+    def test_mamba_states_cache(self, is_hybrid_model: bool):
+        self._setup_model_parallel_group(1, 1)
+
+        if not is_hybrid_model:
+            # If not hybrid, mamba_states_cache should fail
+            dynamic_context = self._get_dynamic_context(
+                params_dtype=torch.float32,
+                num_layers=4,
+                kv_channels=8,
+                num_attention_heads=2,
+                max_sequence_length=512,
+                buffer_size_gb=0.03,
+                buffer_guaranteed_fraction=0.1,
+                block_size_tokens=128,
+                max_requests_override=None,
+                max_tokens_override=None,
+                buffer_overflow_factor=None,
+                is_hybrid_model=False,
+            )
+            with pytest.raises(AssertionError) as error:
+                conv_state, ssm_state = dynamic_context.mamba_states_cache(layer_number=1)
+            return
+
+        dynamic_context = self._get_dynamic_context(
+            params_dtype=torch.float32,
+            num_layers=4,
+            kv_channels=8,
+            num_attention_heads=2,
+            max_sequence_length=512,
+            buffer_size_gb=0.03,
+            buffer_guaranteed_fraction=0.1,
+            block_size_tokens=128,
+            max_requests_override=None,
+            max_tokens_override=None,
+            buffer_overflow_factor=None,
+            is_hybrid_model=is_hybrid_model,
+            layer_type_list=[Symbols.MAMBA, Symbols.ATTENTION, Symbols.MAMBA, Symbols.ATTENTION],
+        )
+
+        # Add a request to populate states
+        context_length = 10
+        dynamic_context.add_request(
+            DynamicInferenceRequest(
+                request_id=0,
+                prompt_tokens=torch.arange(0, context_length, dtype=torch.long, device='cuda'),
+                sampling_params=SamplingParams(
+                    num_tokens_to_generate=dynamic_context.max_tokens - 10
+                ),
+            )
+        )
+        dynamic_context.initialize_attention_state()
+
+        # Manually set some dummy values in mamba_conv_states and mamba_ssm_states
+        # Mamba layers are at global indices 0 and 2 (mapped to local 0 and 1 via layer_map)
+        # `layer_map` will map global layer index to the corresponding Mamba/Attention index.
+        # For layer_type_list ["MAMBA", "ATTENTION", "MAMBA", "ATTENTION"],
+        # global layer 1 (index 0) is MAMBA -> local mamba layer 0
+        # global layer 3 (index 2) is MAMBA -> local mamba layer 1
+
+        # Test for the first Mamba layer (global layer 1, local mamba layer 0)
+        global_layer_1_mamba_local_idx = 0
+        dynamic_context.mamba_conv_states[global_layer_1_mamba_local_idx] = 10.0
+        dynamic_context.mamba_ssm_states[global_layer_1_mamba_local_idx] = 20.0
+
+        # Test for the second Mamba layer (global layer 3, local mamba layer 1)
+        global_layer_3_mamba_local_idx = 1
+        dynamic_context.mamba_conv_states[global_layer_3_mamba_local_idx] = 30.0
+        dynamic_context.mamba_ssm_states[global_layer_3_mamba_local_idx] = 40.0
+
+        # Retrieve states using mamba_states_cache for global layer 1
+        conv_state_layer1, ssm_state_layer1 = dynamic_context.mamba_states_cache(layer_number=1)
+        assert torch.all(conv_state_layer1 == 10.0)
+        assert torch.all(ssm_state_layer1 == 20.0)
+
+        # Retrieve states using mamba_states_cache for global layer 3
+        conv_state_layer3, ssm_state_layer3 = dynamic_context.mamba_states_cache(layer_number=3)
+        assert torch.all(conv_state_layer3 == 30.0)
+        assert torch.all(ssm_state_layer3 == 40.0)
+
     @pytest.mark.internal
     def test_calculate_and_store_log_probs(self):
         self._setup_model_parallel_group(1, 1)
@@ -665,7 +890,7 @@ def test_calculate_and_store_log_probs(self):
             num_attention_heads=2,
             max_sequence_length=512,
             buffer_size_gb=0.03,
-            buffer_guarenteed_fraction=0.1,
+            buffer_guaranteed_fraction=0.1,
             block_size_tokens=128,
             max_requests_override=None,
             max_tokens_override=None,
diff --git a/tests/unit_tests/inference/engines/test_dynamic_engine.py b/tests/unit_tests/inference/engines/test_dynamic_engine.py
index bd8f0ae1c8a..b019c8d9528 100644
--- a/tests/unit_tests/inference/engines/test_dynamic_engine.py
+++ b/tests/unit_tests/inference/engines/test_dynamic_engine.py
@@ -4,11 +4,12 @@
 import random
 import types
 from dataclasses import dataclass
-from typing import Dict, List, Optional
+from typing import Dict, List, Optional, Tuple
 
 import pytest
 import torch
 from tqdm import tqdm
+from transformer_engine.pytorch.fp8 import check_fp8_support
 
 from megatron.core import parallel_state
 from megatron.core.inference.contexts.dynamic_context import (
@@ -31,15 +32,34 @@
 from megatron.core.inference.text_generation_controllers.text_generation_controller import (
     TextGenerationController,
 )
-from megatron.core.models.gpt.gpt_layer_specs import get_gpt_layer_local_spec
+from megatron.core.models.gpt.gpt_layer_specs import (
+    get_gpt_layer_local_spec,
+    get_gpt_layer_with_transformer_engine_spec,
+)
 from megatron.core.models.gpt.gpt_model import GPTModel
+from megatron.core.models.mamba.mamba_layer_specs import mamba_stack_spec
+from megatron.core.models.mamba.mamba_model import MambaModel
 from megatron.core.tensor_parallel.random import model_parallel_cuda_manual_seed
 from megatron.core.transformer.cuda_graphs import CudaGraphManager, _CudagraphGlobalRecord
 from megatron.core.transformer.transformer_config import TransformerConfig
-from megatron.core.utils import is_fa_min_version
+from megatron.core.utils import (
+    check_mamba_sequence_packing_support,
+    get_attr_wrapped_model,
+    is_fa_min_version,
+    is_te_min_version,
+)
 from tests.unit_tests.test_utilities import Utils
 
 
+def skip_if_mamba_sequence_packing_not_available(model_provider: str):
+    if model_provider == "mamba":
+        sequence_packing_available, reason_for_no_sequence_packing = (
+            check_mamba_sequence_packing_support()
+        )
+        if not sequence_packing_available:
+            pytest.skip(reason_for_no_sequence_packing)
+
+
 def set_rounder(value):
     """Utility function to set the DynamicInferenceContext rounder."""
     DynamicInferenceContext.ROUNDER = value  # For backwards compatibility
@@ -77,6 +97,9 @@ class DynamicEngineTestConfig:
 
     use_fixed_output_lengths: bool = False
     num_cuda_graphs: int = None
+    use_cuda_graphs_for_non_decode_steps: bool = True
+    fp8: bool = False
+    model_provider: str = "gpt"
     return_log_probs: bool = False
     materialize_only_last_token_logits: bool = True
     skip_prompt_log_probs_for_dynamic_inference: bool = False
@@ -89,6 +112,8 @@ class DynamicEngineTestConfig:
     # relevant to the test. The tests only check if the required
     # context attributes are set correctly.
 
+    fp8: bool = False
+
     def __post_init__(self):
 
         # Compute max_sequence_length.
@@ -161,6 +186,7 @@ def _build_requests(cls, test_config: DynamicEngineTestConfig) -> List[DynamicIn
                     -1 if test_config.use_fixed_output_lengths else test_config.vocab_size - 1
                 ),
                 return_log_probs=test_config.return_log_probs,
+                skip_prompt_log_probs=test_config.skip_prompt_log_probs,
             )
             if not hasattr(sampling_params, "num_tokens_total"):
                 # Remove this if statement branch in megatron-core 0.16
@@ -168,11 +194,6 @@ def _build_requests(cls, test_config: DynamicEngineTestConfig) -> List[DynamicIn
             else:
                 sampling_params.num_tokens_total = num_tokens_total
 
-            config_entry = test_config.skip_prompt_log_probs_for_dynamic_inference
-            sampling_params.add_attributes(
-                {"skip_prompt_log_probs_for_dynamic_inference": config_entry}
-            )
-
             # Request.
             prompt_tokens = torch.randint(
                 0,
@@ -194,6 +215,9 @@ def _build_inference_context(
         test_config: DynamicEngineTestConfig,
         transformer_config: TransformerConfig,
         requests: List[DynamicInferenceRequest],
+        layer_type_list: Optional[List[str]],
+        mamba_conv_states_shape: Optional[Tuple[int]] = None,
+        mamba_ssm_states_shape: Optional[Tuple[int]] = None,
     ):
         """The inference context manages the KV cache and other inference state."""
 
@@ -205,6 +229,7 @@ def _build_inference_context(
             num_attention_heads=transformer_config.num_query_groups,
             max_sequence_length=test_config.max_sequence_length,
             num_cuda_graphs=test_config.num_cuda_graphs,
+            use_cuda_graphs_for_non_decode_steps=not test_config.model_provider == "mamba",
             buffer_size_gb=test_config.context_buffer_size_gb,
             buffer_guaranteed_fraction=test_config.context_buffer_guaranteed_fraction,
             block_size_tokens=test_config.context_block_size_tokens,
@@ -212,6 +237,9 @@ def _build_inference_context(
             max_requests_override=test_config.context_max_requests_override,
             max_tokens_override=test_config.context_max_tokens_override,
             tensor_model_parallel_size=transformer_config.tensor_model_parallel_size,
+            layer_type_list=layer_type_list,
+            mamba_conv_states_shape=mamba_conv_states_shape,
+            mamba_ssm_states_shape=mamba_ssm_states_shape,
             materialize_only_last_token_logits=test_config.materialize_only_last_token_logits,
             use_flashinfer_fused_rope=None,  # default to using flash-infer if available
             # this is for compatibility with the LTS environment
@@ -220,6 +248,7 @@ def _build_inference_context(
         return context
 
     @classmethod
+    @torch.inference_mode()
     def _build_test_env(cls, test_config):
         Utils.initialize_model_parallel(
             tensor_model_parallel_size=test_config.tensor_model_parallel_size,
@@ -238,65 +267,142 @@ def _build_test_env(cls, test_config):
             force_reset_rng=True,
         )
 
-        # Transformer config.
-        transformer_config = TransformerConfig(
-            params_dtype=torch.bfloat16,
-            num_layers=4,
-            hidden_size=32,
-            num_attention_heads=4,
-            use_cpu_initialization=True,
-            cuda_graph_impl=(
-                "local"
-                if test_config.num_cuda_graphs is not None and test_config.force_build_cuda_graphs
-                else "none"
-            ),
-            inference_rng_tracker=True,
-            tensor_model_parallel_size=test_config.tensor_model_parallel_size,
-            pipeline_model_parallel_size=test_config.pipeline_model_parallel_size,
-            expert_model_parallel_size=test_config.expert_model_parallel_size,
-            num_moe_experts=(
-                None
-                if test_config.expert_model_parallel_size == 1
-                else test_config.expert_model_parallel_size
-            ),
-            sequence_parallel=test_config.sequence_parallel,
-            pipeline_dtype=torch.bfloat16,
-            add_bias_linear=test_config.expert_model_parallel_size == 1,
-            inference_sampling_seed=test_config.random_seed,
-            cuda_graph_scope=test_config.cuda_graph_scope,
-        )
-
         # Requests.
         requests = cls._build_requests(test_config)
 
-        # GPT model.
-        model = GPTModel(
-            config=transformer_config,
-            transformer_layer_spec=get_gpt_layer_local_spec(),
-            vocab_size=test_config.vocab_size,
-            max_sequence_length=test_config.max_sequence_length,
-            parallel_output=True,
-            pre_process=parallel_state.is_pipeline_first_stage(),
-            post_process=parallel_state.is_pipeline_last_stage(),
-        ).cuda()
+        if test_config.model_provider == "gpt":
+            # Transformer config.
+            transformer_config = TransformerConfig(
+                params_dtype=torch.bfloat16,
+                num_layers=4,
+                hidden_size=128 if test_config.fp8 else 32,
+                num_attention_heads=4,
+                use_cpu_initialization=True,
+                cuda_graph_impl=(
+                    "local"
+                    if test_config.num_cuda_graphs is not None
+                    and test_config.force_build_cuda_graphs
+                    else "none"
+                ),
+                inference_rng_tracker=True,
+                tensor_model_parallel_size=test_config.tensor_model_parallel_size,
+                pipeline_model_parallel_size=test_config.pipeline_model_parallel_size,
+                expert_model_parallel_size=test_config.expert_model_parallel_size,
+                num_moe_experts=(
+                    None
+                    if test_config.expert_model_parallel_size == 1
+                    else test_config.expert_model_parallel_size
+                ),
+                sequence_parallel=test_config.sequence_parallel,
+                pipeline_dtype=torch.bfloat16,
+                add_bias_linear=test_config.expert_model_parallel_size == 1,
+                fp8="hybrid" if test_config.fp8 else None,
+                fp8_recipe="tensorwise" if test_config.fp8 else None,
+                inference_sampling_seed=test_config.random_seed,
+                cuda_graph_scope=test_config.cuda_graph_scope,
+            )
+            if test_config.fp8:
+                layer_spec = get_gpt_layer_with_transformer_engine_spec()
+            else:
+                layer_spec = get_gpt_layer_local_spec()
+
+            # GPT model.
+            model = GPTModel(
+                config=transformer_config,
+                transformer_layer_spec=layer_spec,
+                vocab_size=test_config.vocab_size,
+                max_sequence_length=test_config.max_sequence_length,
+                parallel_output=True,
+                pre_process=parallel_state.is_pipeline_first_stage(),
+                post_process=parallel_state.is_pipeline_last_stage(),
+            ).cuda()
+        elif test_config.model_provider == "mamba":
+            # Transformer config.
+            transformer_config = TransformerConfig(
+                params_dtype=torch.bfloat16,
+                num_layers=3,  # 1 Mamba layer, 1 attention layer, 1 MLP layer
+                hidden_size=256,  # The Mamba layer places several constraints on this
+                mamba_num_heads=16,
+                num_attention_heads=16,
+                use_cpu_initialization=True,
+                cuda_graph_impl=(
+                    "local"
+                    if test_config.num_cuda_graphs is not None
+                    and test_config.force_build_cuda_graphs
+                    else "none"
+                ),
+                inference_rng_tracker=True,
+                tensor_model_parallel_size=test_config.tensor_model_parallel_size,
+                pipeline_model_parallel_size=test_config.pipeline_model_parallel_size,
+                expert_model_parallel_size=test_config.expert_model_parallel_size,
+                num_moe_experts=(
+                    None
+                    if test_config.expert_model_parallel_size == 1
+                    else test_config.expert_model_parallel_size
+                ),
+                sequence_parallel=test_config.sequence_parallel,
+                pipeline_dtype=torch.bfloat16,
+                add_bias_linear=test_config.expert_model_parallel_size == 1,
+                fp8="hybrid" if test_config.fp8 else None,
+                fp8_recipe="tensorwise" if test_config.fp8 else None,
+                cuda_graph_scope=test_config.cuda_graph_scope,
+            )
+
+            # Mamba model.
+            model = MambaModel(
+                config=transformer_config,
+                mamba_stack_spec=mamba_stack_spec,
+                vocab_size=test_config.vocab_size,
+                max_sequence_length=test_config.max_sequence_length,
+                parallel_output=True,
+                hybrid_attention_ratio=0.3,
+                hybrid_mlp_ratio=0.3,
+                pre_process=parallel_state.is_pipeline_first_stage(),
+                post_process=parallel_state.is_pipeline_last_stage(),
+            ).cuda()
+        else:
+            raise ValueError(f"Invalid model provider {test_config.model_provider}")
 
         for param in model.parameters():
             param.data = param.data.to(transformer_config.params_dtype)
 
         model.eval()
 
+        # Layer type list for hybrid models
+        decoder = get_attr_wrapped_model(model, "decoder")
+        layer_type_list = getattr(decoder, "layer_type_list", None)
+        if test_config.model_provider == "mamba":
+            mamba_states_shapes = decoder.mamba_state_shapes_per_request()
+            if mamba_states_shapes is not None:
+                (mamba_conv_states_shape, mamba_ssm_states_shape) = mamba_states_shapes
+            else:
+                # A `MambaBlock` can only not have a `MambaLayer` if using pipeline parallelism
+                # and a particular pipeline stage was not assigned a `MambaLayer`.
+                assert test_config.pipeline_model_parallel_size > 1
+                mamba_conv_states_shape = None
+                mamba_ssm_states_shape = None
+        else:
+            mamba_conv_states_shape = None
+            mamba_ssm_states_shape = None
+
         # Inference config.
         inference_config = InferenceWrapperConfig(
             hidden_size=transformer_config.hidden_size,
             inference_batch_times_seqlen_threshold=400,
             fp32_residual_connection=False,
             params_dtype=transformer_config.params_dtype,
+            fp8=transformer_config.fp8,
             padded_vocab_size=test_config.vocab_size,
         )
 
         # Inference context.
         inference_context = cls._build_inference_context(
-            test_config=test_config, transformer_config=transformer_config, requests=requests
+            test_config=test_config,
+            transformer_config=transformer_config,
+            requests=requests,
+            layer_type_list=layer_type_list,
+            mamba_conv_states_shape=mamba_conv_states_shape,
+            mamba_ssm_states_shape=mamba_ssm_states_shape,
         )
 
         # Inference model wrapper.
@@ -338,6 +444,7 @@ def mock_detokenize_prompt(tokens):
         return env
 
     @classmethod
+    @torch.inference_mode()
     def _run_step(cls, env):
         set_rounder(4)
         # Step inference engine (i.e., generate one token per request).
@@ -349,8 +456,8 @@ def _run_step(cls, env):
         finished_requests = result["finished_requests"]
 
     @classmethod
+    @torch.inference_mode()
     def _run_test(cls, **test_config_kwargs):
-
         # Test environment.
         test_config = DynamicEngineTestConfig(**test_config_kwargs)
         env = cls._build_test_env(test_config)
@@ -405,13 +512,16 @@ def teardown_method(self, method):
     @pytest.mark.skipif(
         not is_fa_min_version("2.7.3"), reason="need latest flash attn for dynamic batching"
     )
+    @pytest.mark.parametrize("model_provider", ["gpt", "mamba"])
     @pytest.mark.parametrize("num_cuda_graphs", [None, 1, 4])
     @pytest.mark.parametrize("cuda_graph_scope", [[], ["full_iteration"]])
-    def test_simple(self, num_cuda_graphs, cuda_graph_scope) -> None:
+    def test_simple(self, model_provider, num_cuda_graphs, cuda_graph_scope) -> None:
         """Simple test that runs without errors, and validates output."""
+        skip_if_mamba_sequence_packing_not_available(model_provider)
 
         # Run test.
         env = self._run_test(
+            model_provider=model_provider,
             num_cuda_graphs=num_cuda_graphs,
             context_max_requests_override=32,
             cuda_graph_scope=cuda_graph_scope,
@@ -422,8 +532,8 @@ def test_simple(self, num_cuda_graphs, cuda_graph_scope) -> None:
         assert env.engine.context.max_requests == 32
         assert env.engine.context.max_tokens == 160
 
-        # Validate generated tokens.
-        expected_generated_tokens_list = [
+        # Validate output tokens.
+        gpt_expected_generated_tokens = [
             [69, 85, 55, 74],
             [29, 54, 85, 89],
             [33, 30, 64, 59],
@@ -434,7 +544,26 @@ def test_simple(self, num_cuda_graphs, cuda_graph_scope) -> None:
             [],  # this request is failed due to max sequence length overflow
         ]
 
+        mamba_expected_generated_tokens = [
+            [74, 72, 83, 59],
+            [25, 54, 1, 70],
+            [28, 14, 15, 89],
+            [87, 27, 30, 52],
+            [44, 13, 82, 70],
+            [28, 74, 64, 16],
+            [8, 4, 83, 5],
+            [],
+        ]
+
+        if model_provider == "gpt":
+            expected_generated_tokens_list = gpt_expected_generated_tokens
+        elif model_provider == "mamba":
+            expected_generated_tokens_list = mamba_expected_generated_tokens
+        else:
+            raise ValueError(f"Invalid model_provider {model_provider}")
+
         assert len(env.requests) == len(expected_generated_tokens_list)
+
         for request, expected_generated_tokens in zip(env.requests, expected_generated_tokens_list):
             assert request.generated_tokens == expected_generated_tokens, (
                 f"request {request.request_id}, "
@@ -446,30 +575,41 @@ def test_simple(self, num_cuda_graphs, cuda_graph_scope) -> None:
     @pytest.mark.skipif(
         not is_fa_min_version("2.7.3"), reason="need latest flash attn for dynamic batching"
     )
-    def test_overflow_factor(self) -> None:
+    def test_overflow_factor(self, model_provider: str = "gpt") -> None:
         """Test overflow factor arg."""
+        skip_if_mamba_sequence_packing_not_available(model_provider)
+
         # Run test.
         env = self._run_test(
             context_buffer_overflow_factor=0.1,
             context_max_requests_override=None,
             context_max_tokens_override=None,
+            model_provider=model_provider,
         )
 
         # Validate max_requests, max_tokens.
-        assert env.engine.context.max_requests == 420
-        assert env.engine.context.max_tokens == 420
+        if model_provider == "gpt":
+            assert env.engine.context.max_requests == 420
+            assert env.engine.context.max_tokens == 420
+        elif model_provider == "mamba":
+            assert env.engine.context.max_requests == 16
+            assert env.engine.context.max_tokens == 16
 
     @pytest.mark.internal
     @pytest.mark.skipif(
         not is_fa_min_version("2.7.3"), reason="need latest flash attn for dynamic batching"
     )
-    def test_request_overflow(self) -> None:
+    @pytest.mark.parametrize("model_provider", ["gpt", "mamba"])
+    def test_request_overflow(self, model_provider: str) -> None:
         """Test request overflow."""
-        self._run_test(context_max_requests_override=4)
+        skip_if_mamba_sequence_packing_not_available(model_provider)
+
+        self._run_test(context_max_requests_override=4, model_provider=model_provider)
 
     @pytest.mark.skipif(
         not is_fa_min_version("2.7.3"), reason="need latest flash attn for dynamic batching"
     )
+    @torch.inference_mode()
     def test_token_overflow_transient(self) -> None:
         """Test token overflow."""
         test_config = DynamicEngineTestConfig(
@@ -505,13 +645,17 @@ def test_token_overflow_nontransient(self) -> None:
     @pytest.mark.skipif(
         not is_fa_min_version("2.7.3"), reason="need latest flash attn for dynamic batching"
     )
-    def test_block_overflow(self) -> None:
+    @pytest.mark.parametrize("model_provider", ["gpt", "mamba"])
+    def test_block_overflow(self, model_provider: str) -> None:
         """Test block overflow."""
-        env = self._build_test_env(DynamicEngineTestConfig())
+        skip_if_mamba_sequence_packing_not_available(model_provider)
+        env = self._build_test_env(DynamicEngineTestConfig(model_provider=model_provider))
         context = env.engine.context
         block_size_bytes = context.block_size_bytes
         buffer_size_gb = (block_size_bytes + 1) / 1024**3
-        test_config = DynamicEngineTestConfig(context_buffer_size_gb=buffer_size_gb)
+        test_config = DynamicEngineTestConfig(
+            context_buffer_size_gb=buffer_size_gb, model_provider=model_provider
+        )
         env = self._build_test_env(test_config)
         env.engine._add_request(env.requests[0])
         assert list(env.engine.waiting_request_ids) == [0]
@@ -520,17 +664,21 @@ def test_block_overflow(self) -> None:
     @pytest.mark.skipif(
         not is_fa_min_version("2.7.3"), reason="need latest flash attn for dynamic batching"
     )
-    def test_multi_add(self) -> None:
+    @pytest.mark.parametrize("model_provider", ["gpt", "mamba"])
+    def test_multi_add(self, model_provider: str) -> None:
         """Test adding multiple requests simultaneously."""
-        self._run_test(num_gap_steps=0)
+        skip_if_mamba_sequence_packing_not_available(model_provider)
+        self._run_test(num_gap_steps=0, model_provider=model_provider)
 
     @pytest.mark.internal
     @pytest.mark.skipif(
         not is_fa_min_version("2.7.3"), reason="need latest flash attn for dynamic batching"
     )
-    def test_fixed_output_lengths(self) -> None:
+    @pytest.mark.parametrize("model_provider", ["gpt", "mamba"])
+    def test_fixed_output_lengths(self, model_provider: str) -> None:
         """Test generating a fixed number of output tokens."""
-        self._run_test(use_fixed_output_lengths=True)
+        skip_if_mamba_sequence_packing_not_available(model_provider)
+        self._run_test(use_fixed_output_lengths=True, model_provider=model_provider)
 
     @pytest.mark.internal
     @pytest.mark.skipif(
@@ -587,6 +735,7 @@ def test_cuda_graph_token_counts(self) -> None:
             (32, 32),
         ],
     )
+    @torch.inference_mode()
     def test_cuda_graph_warmup(
         self,
         warmup_engine_mode: WarmupEngineMode,
@@ -673,11 +822,18 @@ def test_cuda_graph_warmup(
     @pytest.mark.skipif(
         not is_fa_min_version("2.7.3"), reason="need latest flash attn for dynamic batching"
     )
-    def test_generate_function(self) -> None:
+    @pytest.mark.parametrize("model_provider", ["gpt", "mamba"])
+    @torch.inference_mode()
+    def test_generate_function(self, model_provider: str) -> None:
         """Test the generate function that processes multiple prompts at once."""
+        skip_if_mamba_sequence_packing_not_available(model_provider)
+
         # Set up test environment
         test_config = DynamicEngineTestConfig(
-            num_requests=4, max_prompt_length=8, num_tokens_to_generate=4
+            num_requests=4,
+            max_prompt_length=8,
+            num_tokens_to_generate=4,
+            model_provider=model_provider,
         )
         env = self._build_test_env(test_config)
 
@@ -723,36 +879,55 @@ async def test_run_engine(self):
         Test asynchronously adding and waiting for requests while the engine is
         running continuously.
         """
-        # Test environment.
-        test_config = DynamicEngineTestConfig(use_fixed_output_lengths=True)
-        env = self._build_test_env(test_config)
+        # Have to wrap inference mode in-line because async functions are not supported
+        with torch.inference_mode():
+            # Test environment.
+            test_config = DynamicEngineTestConfig(num_requests=8, use_fixed_output_lengths=True)
+            env = self._build_test_env(test_config)
+
+            engine_task = asyncio.create_task(env.engine.run_engine(verbose=False))
+
+            request_completion_futures: Dict[int, asyncio.Future[DynamicInferenceRequest]] = {}
+
+            # Add requests to engine.
+            for request in tqdm(env.requests, "add requests"):
+                request_completion_futures[request.request_id] = env.engine._add_request(request)
+
+            # Wait for all requests to complete.
+            await asyncio.gather(*request_completion_futures.values())
+
+            # Verify that all request outputs were set.
+            for request_id, fut in request_completion_futures.items():
+                num_tokens_to_generate = env.requests[
+                    request_id
+                ].sampling_params.num_tokens_to_generate
+                result = fut.result()
+                assert result.generated_length == num_tokens_to_generate, (
+                    f"Request {request_id} expected to generate {num_tokens_to_generate} "
+                    f"tokens but generated {result.generated_length}"
+                )
 
-        engine_task = asyncio.create_task(env.engine.run_engine(verbose=False))
+            engine_task.cancel()
 
-        request_completion_futures: Dict[int, asyncio.Future[DynamicInferenceRequest]] = {}
+    @pytest.mark.internal
+    @pytest.mark.skipif(
+        not is_fa_min_version("2.7.3"), reason="need latest flash attn for dynamic batching"
+    )
+    @pytest.mark.skipif(not is_te_min_version("2.2.0"), reason="TE 2.2.0 is required")
+    @pytest.mark.parametrize("model_provider", ["gpt", "mamba"])
+    def test_fp8_inference(self, model_provider: str):
+        skip_if_mamba_sequence_packing_not_available(model_provider)
 
-        # Add requests to engine.
-        for request in tqdm(env.requests, "add requests"):
-            request_completion_futures[request.request_id] = env.engine._add_request(request)
-
-        # Wait for all requests to complete.
-        await asyncio.gather(*request_completion_futures.values())
-
-        # Verify that all request outputs were set.
-        for request_id, fut in request_completion_futures.items():
-            num_tokens_to_generate = env.requests[request_id].sampling_params.num_tokens_to_generate
-            result = fut.result()
-            assert result.generated_length == num_tokens_to_generate, (
-                f"Request {request_id} expected to generate {num_tokens_to_generate} "
-                f"tokens but generated {result.generated_length}"
-            )
+        fp8_available, reason_for_no_fp8 = check_fp8_support()
+        if not fp8_available:
+            pytest.skip(reason_for_no_fp8)
 
-        engine_task.cancel()
+        self._run_test(model_provider=model_provider, fp8=True)
 
-    @pytest.mark.internal
     @pytest.mark.skipif(
         not is_fa_min_version("2.7.3"), reason="need latest flash attn for dynamic batching"
     )
+    @torch.inference_mode()
     def test_return_log_probs(self):
         """Verify that returning log probs does not raise any error."""
         # Returning log probs requires materializing the full prompt logits or
@@ -763,7 +938,7 @@ def test_return_log_probs(self):
         env = self._run_test(
             return_log_probs=True,
             materialize_only_last_token_logits=True,
-            skip_prompt_log_probs_for_dynamic_inference=True,
+            skip_prompt_log_probs=True,
         )
 
     @pytest.mark.internal
@@ -775,9 +950,19 @@ def test_return_log_probs(self):
     @pytest.mark.parametrize("ep_size", [1, 2])
     @pytest.mark.parametrize("pp_size", [1, 2])
     @pytest.mark.parametrize("tp_size", [1, 2])
+    @pytest.mark.parametrize("model_provider", ["gpt", "mamba"])
+    @torch.inference_mode()
     def test_parallel_inference(
-        self, tp_size, pp_size, ep_size, sequence_parallel, materialize_only_last_token_logits
+        self,
+        model_provider,
+        tp_size,
+        pp_size,
+        ep_size,
+        sequence_parallel,
+        materialize_only_last_token_logits,
     ):
+        skip_if_mamba_sequence_packing_not_available(model_provider)
+
         if tp_size == 1 and pp_size == 1 and ep_size == 1:
             pytest.skip(reason="Test requires tp_size > 1 or pp_size > 1 or ep_size > 1")
         elif not torch.distributed.is_initialized():
@@ -790,7 +975,16 @@ def test_parallel_inference(
             pytest.skip(reason="Sequence parallelism requires tp_size > 1")
         elif tp_size > 1 and ep_size > 1 and not sequence_parallel:
             pytest.skip(reason="Sequence parallelism must be used with tp_size > 1 and ep_size > 1")
+        elif pp_size > 1 and model_provider == "mamba":
+            pytest.skip(
+                reason=(
+                    "Running hybrid models with pp_size > 1 and no attention on some "
+                    "pipeline stages is not supported yet."
+                )
+            )
+
         env = self._run_test(
+            model_provider=model_provider,
             tensor_model_parallel_size=tp_size,
             pipeline_model_parallel_size=pp_size,
             expert_model_parallel_size=ep_size,
@@ -798,6 +992,25 @@ def test_parallel_inference(
             materialize_only_last_token_logits=materialize_only_last_token_logits,
         )
 
+    @pytest.mark.internal
+    @pytest.mark.skipif(
+        not is_fa_min_version("2.7.3"), reason="need latest flash attn for dynamic batching"
+    )
+    @pytest.mark.parametrize("materialize_only_last_token_logits", [False, True])
+    def test_sequence_parallel_fp8_inference(self, materialize_only_last_token_logits: bool):
+        fp8_available, reason_for_no_fp8 = check_fp8_support()
+        if not fp8_available:
+            pytest.skip(reason_for_no_fp8)
+
+        self._run_test(
+            min_prompt_length=19,
+            max_prompt_length=19,
+            tensor_model_parallel_size=4,
+            sequence_parallel=True,
+            materialize_only_last_token_logits=True,
+            fp8=True,
+        )
+
     @pytest.mark.internal
     @pytest.mark.skipif(
         not is_fa_min_version("2.7.3"), reason="need latest flash attn for dynamic batching"
@@ -852,6 +1065,32 @@ def test_events(self):
 
         assert result_event_types == expected_event_types
 
+    @pytest.mark.internal
+    @pytest.mark.skipif(
+        not is_fa_min_version("2.7.3"), reason="need latest flash attn for dynamic batching"
+    )
+    @pytest.mark.parametrize("model_provider", ["gpt", "mamba"])
+    @torch.inference_mode()
+    def test_chunked_prefill(self, model_provider: str):
+        """Verify that chunked prefill output is equivalent to regular prefill."""
+        skip_if_mamba_sequence_packing_not_available(model_provider)
+
+        prompt_length = 1200
+        num_tokens_to_generate = 16
+        max_sequence_length = prompt_length + num_tokens_to_generate
+
+        # Configure context to force chunking (chunked prefill is enabled by default)
+        env = self._run_test(
+            num_requests=1,
+            min_prompt_length=prompt_length,
+            max_prompt_length=prompt_length,
+            num_tokens_to_generate=num_tokens_to_generate,
+            materialize_only_last_token_logits=False,
+            model_provider=model_provider,
+            context_block_size_tokens=256,
+            context_max_tokens_override=300,
+        )
+
 
 if __name__ == "__main__":
     test = TestDynamicInferenceEngine()
diff --git a/tests/unit_tests/inference/model_inference_wrappers/gpt/test_gpt_inference_wrapper.py b/tests/unit_tests/inference/model_inference_wrappers/gpt/test_gpt_inference_wrapper.py
index 644cb149988..07afebe1067 100644
--- a/tests/unit_tests/inference/model_inference_wrappers/gpt/test_gpt_inference_wrapper.py
+++ b/tests/unit_tests/inference/model_inference_wrappers/gpt/test_gpt_inference_wrapper.py
@@ -1,3 +1,5 @@
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
 from argparse import Namespace
 
 import pytest
@@ -32,7 +34,7 @@ def setup_model(self, tensor_parallel_size, pipeline_parallel_size):
         self.vocab_size = 100
         self.batch_size = 4
         self.sequence_length = 32
-        hidden_size = 12
+        hidden_size = 32
 
         transformer_config = TransformerConfig(
             num_layers=4,
diff --git a/tests/unit_tests/inference/text_generation_controllers/test_simple_text_generation_controller.py b/tests/unit_tests/inference/text_generation_controllers/test_simple_text_generation_controller.py
index f23a9782646..10ffe2fdd40 100644
--- a/tests/unit_tests/inference/text_generation_controllers/test_simple_text_generation_controller.py
+++ b/tests/unit_tests/inference/text_generation_controllers/test_simple_text_generation_controller.py
@@ -66,7 +66,7 @@ def setup_model(
         else:
             model_parallel_cuda_manual_seed(123, inference_rng_tracker=True)
         self.batch_size = batch_size
-        self.hidden_size = 12
+        self.hidden_size = 32
         self.vocab_size = 100
         self.sequence_length = 60 if fp8 else 64  # Test padding for fp8
         transformer_config = TransformerConfig(
diff --git a/tools/run_inference_performance_test.py b/tools/run_inference_performance_test.py
index 2f2adabc0ab..01e5ab58898 100644
--- a/tools/run_inference_performance_test.py
+++ b/tools/run_inference_performance_test.py
@@ -1,83 +1,59 @@
 # Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
-import os
-from megatron.core.inference.model_inference_wrappers.inference_wrapper_config import (
-    InferenceWrapperConfig,
-)
 import argparse
+import os
 import random
-import torch
 import sys
 import time
-import tqdm
-import warnings
-from model_provider import model_provider
+
+import torch
+
 from gpt_builders import gpt_builder
 from mamba_builders import mamba_builder
-from megatron.core.inference.engines.abstract_engine import AbstractEngine
+from megatron.core.inference.contexts import DynamicInferenceContext
 from megatron.core.inference.engines import DynamicInferenceEngine, StaticInferenceEngine
+from megatron.core.inference.engines.abstract_engine import AbstractEngine
 from megatron.core.inference.inference_request import InferenceRequest
-from megatron.core.inference.contexts import DynamicInferenceContext
-from megatron.core.inference.sampling_params import SamplingParams
 from megatron.core.inference.model_inference_wrappers.gpt.gpt_inference_wrapper import (
     GPTInferenceWrapper,
 )
+from megatron.core.inference.model_inference_wrappers.inference_wrapper_config import (
+    InferenceWrapperConfig,
+)
+from megatron.core.inference.sampling_params import SamplingParams
 from megatron.core.inference.text_generation_controllers.text_generation_controller import (
     TextGenerationController,
 )
+from megatron.core.ssm.mamba_hybrid_layer_allocation import Symbols
 from megatron.core.transformer.module import MegatronModule
+from megatron.core.utils import get_attr_wrapped_model
+from model_provider import model_provider
 
 sys.path.append(
     os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir, os.path.pardir))
 )
 
-from megatron.training import get_args
-from megatron.training import get_tokenizer
-from megatron.training.checkpointing import load_checkpoint
-from megatron.core import mpu
-from megatron.training.initialize import initialize_megatron
-from megatron.training import get_model, get_tokenizer
 import asyncio
 from functools import partial
-from typing import AsyncIterator, List, Union
+from typing import List, Union
+
+from examples.inference.gpt.utils import add_common_inference_args
+from megatron.core import mpu
+from megatron.training import get_args, get_model, get_tokenizer
+from megatron.training.checkpointing import load_checkpoint
+from megatron.training.initialize import initialize_megatron
 
 REQUEST_ID = 0
 
 
-def add_text_generate_args(parser):
-    """Text generation arguments."""
-    group = parser.add_argument_group(title='text generation')
+def add_inference_benchmarking_args(parser):
+    """Inference benchmarking arguments."""
+    parser = add_common_inference_args(parser)
+
+    group = parser.add_argument_group(title='inference_benchmarking')
 
-    group.add_argument("--temperature", type=float, default=1.0, help='Sampling temperature.')
-    group.add_argument("--top_k", type=int, default=1, help='Top k sampling.')
-    group.add_argument("--top_p", type=float, default=0.0, help='Top p sampling.')
-    group.add_argument(
-        "--return-log-probs",
-        action='store_true',
-        default=False,
-        help='Return the log probabilities of the final output tokens',
-    )
-    group.add_argument("--top-n-logprobs", type=int, default=0, help="Top-N logprobs")
-    group.add_argument(
-        "--num-tokens-to-generate",
-        type=int,
-        default=30,
-        help='Number of tokens to generate for each prompt',
-    )
-    group.add_argument(
-        "--prompts",
-        metavar='N',
-        type=str,
-        default=None,
-        nargs='+',
-        help='Input prompts with each prompt within quotes and seperated by space',
-    )
-    group.add_argument(
-        "--num-input-tokens", type=int, default=None, help='Number of input tokens per prompt'
-    )
-    group.add_argument("--stream", action="store_true", default=False, help="Stream output tokens")
     group.add_argument(
-        "--model-provider", choices=["mamba", "gpt"], default="gpt", help="Model provider"
+        "--num-input-tokens", type=int, default=128, help="Number of input tokens per request"
     )
     group.add_argument(
         "--engine-type", choices=["static", "dynamic"], default="static", help="Engine type"
@@ -85,14 +61,13 @@ def add_text_generate_args(parser):
     group.add_argument(
         "--benchmark-profile", action="store_true", default=False, help="If set, profile"
     )
+    group.add_argument('--stream', action="store_true", default=False, help="If set, stream tokens")
     return parser
 
 
 def get_inference_engine(args: argparse.Namespace, model: MegatronModule) -> AbstractEngine:
     """Utility to get the relevant backend for running inference
 
-    This function will automatically chose the TRTLLMBackend when possible, and if not revert to Mcore backend if the user does not specify any backends. TRT LLM Backend is not implmented yet.
-
     Args:
         args (Namespace): The user arguments parsed from command line
         model (MegatronModule): The megatron model .
@@ -111,9 +86,18 @@ def get_inference_engine(args: argparse.Namespace, model: MegatronModule) -> Abs
         inference_max_requests=args.inference_max_batch_size,
         inference_max_seq_length=args.inference_max_seq_length,
         nccl_all_reduce_for_prefill=args.nccl_all_reduce_for_prefill,
-        moe_pad_experts_for_cuda_graph_inference = args.moe_pad_experts_for_cuda_graph_inference
+        moe_pad_experts_for_cuda_graph_inference=args.moe_pad_experts_for_cuda_graph_inference,
     )
 
+    # Layer type list for hybrid models
+    decoder = get_attr_wrapped_model(model, "decoder")
+    layer_type_list = getattr(decoder, "layer_type_list", None)
+    if layer_type_list is not None and Symbols.MAMBA in layer_type_list:
+        (mamba_conv_states_shape, mamba_ssm_states_shape) = decoder.mamba_state_shapes_per_request()
+    else:
+        mamba_conv_states_shape = None
+        mamba_ssm_states_shape = None
+
     if args.engine_type == "static":
         inference_wrapped_model = GPTInferenceWrapper(model, inference_wrapper_config)
         inference_wrapped_model.model_is_pipeline_parallel = not (
@@ -132,12 +116,28 @@ def get_inference_engine(args: argparse.Namespace, model: MegatronModule) -> Abs
                 args.num_query_groups if args.group_query_attention else args.num_attention_heads
             ),
             max_sequence_length=args.inference_max_seq_length,
+            num_cuda_graphs=(
+                args.inference_dynamic_batching_num_cuda_graphs
+                if args.cuda_graph_impl == "local"
+                else None
+            ),
             buffer_size_gb=args.inference_dynamic_batching_buffer_size_gb,
             buffer_guaranteed_fraction=args.inference_dynamic_batching_buffer_guaranteed_fraction,
             buffer_overflow_factor=args.inference_dynamic_batching_buffer_overflow_factor,
             max_requests_override=args.inference_dynamic_batching_max_requests_override,
             max_tokens_override=args.inference_dynamic_batching_max_tokens_override,
             block_size_tokens=args.inference_dynamic_batching_block_size,
+            tensor_model_parallel_size=args.tensor_model_parallel_size,
+            materialize_only_last_token_logits=not args.return_log_probs,
+            layer_type_list=layer_type_list,
+            mamba_conv_states_shape=mamba_conv_states_shape,
+            mamba_ssm_states_shape=mamba_ssm_states_shape,
+            cache_mla_latent=args.multi_latent_attention and args.cache_mla_latents,
+            kv_lora_rank=args.kv_lora_rank if args.multi_latent_attention else None,
+            qk_pos_emb_head_dim=args.qk_pos_emb_head_dim,
+            use_cuda_graphs_for_non_decode_steps=not args.decode_only_cuda_graphs,
+            use_flashinfer_fused_rope=args.use_flashinfer_fused_rope,
+            unified_memory_level=args.inference_dynamic_batching_unified_memory_level,
         )
         inference_wrapped_model = GPTInferenceWrapper(
             model, inference_wrapper_config, inference_context=context
@@ -269,7 +269,7 @@ def main():
     # Note: The default args passed here can be overwritten by using appropriate params (check arguments.py file)
     # Micro batch size is not needed to be set by user. (It is calculated based on inference-batch-times-seqlen-threshold argument)
     initialize_megatron(
-        extra_args_provider=add_text_generate_args,
+        extra_args_provider=add_inference_benchmarking_args,
         args_defaults={
             'no_load_rng': True,
             'no_load_optim': True,
@@ -285,6 +285,8 @@ def main():
         model_builder = gpt_builder
     elif args.model_provider == "mamba":
         model_builder = mamba_builder
+    else:
+        raise ValueError(f"Invalid model provider {args.model_provider}")
 
     model = get_model(partial(model_provider, model_builder), wrap_with_ddp=False)
     tokenizer = get_tokenizer()
@@ -338,10 +340,7 @@ def main():
         print(f"Running warmup for CUDA graphs...")
         warmup_sampling_params = SamplingParams(num_tokens_to_generate=10)
         warmup_sampling_params.add_attributes({"no_early_termination": True})
-        if args.engine_type == "static":
-            inference_engine.generate(prompts=["warmup"], sampling_params=warmup_sampling_params)
-        elif args.engine_type == "dynamic":
-            generate_dynamic(args, requests, inference_engine)
+        inference_engine.generate(prompts=["warmup"], sampling_params=warmup_sampling_params)
 
     if args.benchmark_profile:
         torch.cuda.cudart().cudaProfilerStart()
diff --git a/tools/run_text_generation_server.py b/tools/run_text_generation_server.py
index fb5212f7649..350173dc16f 100644
--- a/tools/run_text_generation_server.py
+++ b/tools/run_text_generation_server.py
@@ -32,6 +32,7 @@
 from megatron.core.inference.text_generation_server import MegatronServer
 from megatron.core.inference.text_generation_server.run_mcore_engine import run_mcore_engine
 from megatron.core.transformer.module import MegatronModule
+from megatron.post_training.arguments import add_modelopt_args
 from megatron.training import get_model, print_rank_0
 from model_provider import model_provider
 
@@ -120,6 +121,7 @@ def add_text_generate_args(parser):
         default=None,
         help='Deprecated in favor of `--inference-max-batch-size`',
     )
+    add_modelopt_args(parser)
     return parser
 
 
From 71fa2e66be35ac371aa83f2b1a6977f93a95e8b8 Mon Sep 17 00:00:00 2001
From: Charlie Truong <chtruong@nvidia.com>
Date: Sun, 16 Nov 2025 23:07:57 -0600
Subject: [PATCH 130/334] Revert "[20251111] Ko3n1g/chore/main to dev (#2211)"
 (#2266)

Signed-off-by: Charlie Truong <chtruong@nvidia.com>
---
 .github/copy-pr-bot.yaml                      |   2 +-
 .github/workflows/auto-update-copy-pr-bot.yml |   1 -
 .github/workflows/cicd-main.yml               |  34 +-
 .github/workflows/multi-approval-bot.yml      |  75 ++
 .gitlab/stages/01.build.yml                   |   6 +-
 CODEOWNERS                                    |  26 +
 docker/.ngc_version.dev                       |   1 -
 docker/.ngc_version.lts                       |   1 -
 docker/Dockerfile.ci.dev                      |   6 +-
 docker/Dockerfile.ci.lts                      |  98 ++
 .../inference/gpt/gpt_dynamic_inference.py    |  71 +-
 .../gpt_dynamic_inference_with_coordinator.py |   4 -
 examples/inference/gpt/utils.py               |   3 -
 .../modelopt/generation_server.py             | 198 ++++
 .../modelopt/generation_server.sh             |   6 +-
 megatron/core/datasets/megatron_tokenizer.py  |   9 -
 megatron/core/enums.py                        |   6 +-
 megatron/core/fp4_utils.py                    |   5 +-
 megatron/core/fp8_utils.py                    | 103 +-
 megatron/core/inference/async_stream.py       |  10 +-
 .../attention_context/mamba_metadata.py       | 106 --
 .../inference/contexts/dynamic_context.py     | 220 +---
 .../contexts/fused_kv_append_kernel.py        |   4 +-
 .../data_parallel_inference_coordinator.py    |   7 -
 .../core/inference/engines/dynamic_engine.py  |  40 +-
 .../core/inference/engines/static_engine.py   |  10 +-
 megatron/core/inference/inference_client.py   |   4 +-
 megatron/core/inference/sampling_params.py    |   1 -
 .../text_generation_controller.py             |  11 +-
 megatron/core/inference/utils.py              |  28 -
 megatron/core/models/gpt/gpt_model.py         |   4 +-
 megatron/core/models/mamba/mamba_model.py     |  31 +-
 megatron/core/safe_globals.py                 |   2 -
 megatron/core/ssm/mamba_block.py              |  41 +-
 .../core/ssm/mamba_hybrid_layer_allocation.py |  31 +-
 megatron/core/ssm/mamba_layer.py              |  10 +-
 megatron/core/ssm/mamba_mixer.py              | 358 +------
 .../core/transformer/transformer_config.py    |  30 +-
 megatron/core/utils.py                        | 144 +--
 megatron/post_training/checkpointing.py       |  87 +-
 megatron/rl/__init__.py                       |  41 +
 megatron/rl/agent/api.py                      |   6 +-
 megatron/rl/inference/megatron.py             |   2 +-
 megatron/training/arguments.py                |  12 +-
 megatron/training/tokenizer/sft_tokenizer.py  |   5 -
 megatron/training/training.py                 |  23 +-
 .../test_inference_regular_pipeline.py        |   9 +-
 .../golden_values_lts.json                    | 287 -----
 .../golden_values_lts_dgx_a100.json           | 315 ++----
 .../golden_values_lts.json                    | 537 ----------
 .../golden_values_lts_dgx_a100.json           | 615 ++---------
 .../golden_values_lts.json                    | 537 ----------
 .../golden_values_lts_dgx_a100.json           | 538 +---------
 .../golden_values_lts.json                    | 537 ----------
 .../golden_values_lts_dgx_a100.json           | 615 ++---------
 .../golden_values_lts.json                    | 537 ----------
 .../golden_values_lts_dgx_a100.json           | 538 +---------
 .../golden_values_lts.json                    | 537 ----------
 .../golden_values_lts_dgx_a100.json           | 615 ++---------
 .../golden_values_lts.json                    | 287 -----
 .../golden_values_lts_dgx_a100.json           | 288 +----
 .../golden_values_lts.json                    | 287 -----
 .../golden_values_lts_dgx_a100.json           | 288 +----
 .../golden_values_lts.json                    | 537 ----------
 .../golden_values_lts_dgx_a100.json           | 538 +---------
 .../golden_values_lts.json                    | 537 ----------
 .../golden_values_lts_dgx_a100.json           | 538 +---------
 .../golden_values_lts.json                    | 537 ----------
 .../golden_values_lts_dgx_a100.json           | 538 +---------
 .../golden_values_lts.json                    | 537 ----------
 .../golden_values_lts_dgx_a100.json           | 538 +---------
 .../golden_values_lts.json                    | 537 ----------
 .../golden_values_lts_dgx_a100.json           | 538 +---------
 .../golden_values_lts.json                    | 537 ----------
 .../golden_values_lts_dgx_a100.json           | 538 +---------
 .../golden_values_lts.json                    | 287 -----
 .../golden_values_lts_dgx_a100.json           | 288 +----
 .../golden_values_lts.json                    | 287 -----
 .../golden_values_lts_dgx_a100.json           | 315 ++----
 .../golden_values_lts.json                    | 287 -----
 .../golden_values_lts_dgx_a100.json           | 288 +----
 .../golden_values_lts.json                    | 537 ----------
 .../golden_values_lts_dgx_a100.json           | 538 +---------
 .../golden_values_lts.json                    | 537 ----------
 .../golden_values_lts_dgx_a100.json           | 615 ++---------
 .../golden_values_lts.json                    | 537 ----------
 .../golden_values_lts_dgx_a100.json           | 538 +---------
 .../golden_values_lts.json                    | 537 ----------
 .../golden_values_lts_dgx_a100.json           | 538 +---------
 .../golden_values_lts.json                    | 537 ----------
 .../golden_values_lts_dgx_a100.json           | 538 +---------
 .../golden_values_lts.json                    | 287 -----
 .../golden_values_lts_dgx_a100.json           | 288 +----
 .../golden_values_lts.json                    | 537 ----------
 .../golden_values_lts_dgx_a100.json           | 538 +---------
 .../golden_values_lts.json                    | 537 ----------
 .../golden_values_lts_dgx_a100.json           | 998 +++++++++---------
 .../golden_values_lts.json                    | 537 ----------
 .../golden_values_lts_dgx_a100.json           | 538 +---------
 .../golden_values_lts.json                    | 537 ----------
 .../golden_values_lts_dgx_a100.json           | 521 ++-------
 .../gpt3_mcore_tp1_pp2/golden_values_lts.json | 287 -----
 .../golden_values_lts_dgx_a100.json           | 313 ++----
 .../golden_values_lts.json                    | 537 ----------
 .../golden_values_lts_dgx_a100.json           | 563 ++--------
 .../gpt3_mcore_tp1_pp4/golden_values_lts.json | 287 -----
 .../golden_values_lts_dgx_a100.json           | 313 ++----
 .../golden_values_lts.json                    | 537 ----------
 .../golden_values_lts_dgx_a100.json           | 538 +---------
 .../golden_values_lts.json                    | 537 ----------
 .../golden_values_lts_dgx_a100.json           | 538 +---------
 .../golden_values_lts.json                    | 537 ----------
 .../golden_values_lts_dgx_a100.json           | 538 +---------
 .../golden_values_dev_dgx_h100.json           |   2 +-
 .../golden_values_dev_dgx_h100.json           |   2 +-
 .../golden_values_dev_dgx_h100.json           |   2 +-
 .../golden_values_dev_dgx_h100.json           |   2 +-
 .../golden_values_lts.json                    | 287 -----
 .../golden_values_lts_dgx_a100.json           | 288 +----
 .../golden_values_lts.json                    | 287 -----
 .../golden_values_lts_dgx_a100.json           | 288 +----
 .../golden_values_dev.json                    | 287 -----
 .../golden_values_lts.json                    | 287 -----
 .../golden_values_lts_dgx_a100.json           | 288 +----
 .../golden_values_lts.json                    | 287 -----
 .../golden_values_lts_dgx_a100.json           | 288 +----
 .../golden_values_dev.json                    | 287 -----
 .../golden_values_lts.json                    | 287 -----
 .../golden_values_lts_dgx_a100.json           | 288 +----
 .../golden_values_dev_dgx_h100.json           |   2 +-
 .../golden_values_dev_dgx_h100.json           |   2 +-
 .../python_scripts/download_golden_values.py  |   4 +-
 .../python_scripts/launch_jet_workload.py     |   3 -
 .../contexts/test_dynamic_context.py          | 373 ++-----
 .../inference/engines/test_dynamic_engine.py  | 417 ++------
 .../gpt/test_gpt_inference_wrapper.py         |   4 +-
 .../test_simple_text_generation_controller.py |   2 +-
 tools/run_inference_performance_test.py       | 115 +-
 tools/run_text_generation_server.py           |   2 -
 139 files changed, 2283 insertions(+), 35885 deletions(-)
 create mode 100644 .github/workflows/multi-approval-bot.yml
 create mode 100644 CODEOWNERS
 delete mode 100644 docker/.ngc_version.dev
 delete mode 100644 docker/.ngc_version.lts
 create mode 100644 docker/Dockerfile.ci.lts
 create mode 100644 examples/post_training/modelopt/generation_server.py
 mode change 100755 => 100644 examples/post_training/modelopt/generation_server.sh
 delete mode 100644 megatron/core/inference/contexts/attention_context/mamba_metadata.py
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_lts.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_lts.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_lts.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_lts.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_lts.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_lts.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_lts.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_lts.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_lts.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_lts.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied/golden_values_lts.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap/golden_values_lts.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_lts.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_lts.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_lts.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_lts.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_lts.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_lts.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_lts.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader/golden_values_lts.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_lts.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_lts.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/golden_values_lts.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_lts.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/golden_values_lts.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_lts.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_lts.json
 delete mode 100644 tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_lts.json
 delete mode 100644 tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts_etp1_ep4/golden_values_lts.json
 delete mode 100644 tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4/golden_values_dev.json
 delete mode 100644 tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_lts.json
 delete mode 100644 tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_lts.json
 delete mode 100644 tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev.json
 delete mode 100644 tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_lts.json

diff --git a/.github/copy-pr-bot.yaml b/.github/copy-pr-bot.yaml
index 7013df60dc2..b9e97b18946 100644
--- a/.github/copy-pr-bot.yaml
+++ b/.github/copy-pr-bot.yaml
@@ -1,4 +1,4 @@
 enabled: true
 auto_sync_draft: false
 auto_sync_ready: true
-trustees_override: ["AAnoosheh", "ArEsKay3", "Autumn1998", "BestJuly", "BoxiangW", "ChenhanYu", "FDecaYed", "HaochenYuan", "ISEEKYAN", "JRD971000", "QiZhangNV", "ShriyaRishab", "Victarry", "Wohox", "ZhiyuLi-Nvidia", "aklife97", "ananthsub", "asolergi-nv", "buptzyb", "chtruong814", "cspades", "cuichenx", "deepakn94", "dimapihtar", "duncanriach", "erhoo82", "ericharper", "fanshiqing", "gautham-kollu", "hxbai", "jaredcasper", "jiemingz", "jkamalu", "jon-barker", "kanz-nv", "kevalmorabia97", "ko3n1g", "kunlunl", "kvareddy", "layalir", "lhb8125", "lmcafee-nvidia", "maanug-nv", "mathemakitten", "matthieule", "mehraakash", "mkhona-nvidia", "pablo-garay", "parthmannan", "pthombre", "rogerwaleffe", "sanandaraj5597", "santhnm2", "sbak5", "shanmugamr1992", "shifangx", "shjwudp", "sidsingh-nvidia", "skyw", "tdene", "theothermike", "thomasdhc", "trintamaki", "tylerpoon", "wdykas", "xiaoyao0115", "xuwchen", "yanring", "yaox12", "yaoyu-33", "yashaswikarnati", "yobibyte", "youngeunkwon0405", "yuzhongw-nvidia", "zhongbozhu"]
+trustees_override: ["AAnoosheh", "ArEsKay3", "Autumn1998", "BestJuly", "BoxiangW", "ChenhanYu", "FDecaYed", "HaochenYuan", "ISEEKYAN", "JRD971000", "QiZhangNV", "ShriyaRishab", "Victarry", "Wohox", "ZhiyuLi-Nvidia", "aklife97", "ananthsub", "asolergi-nv", "buptzyb", "chtruong814", "cspades", "cuichenx", "deepakn94", "dimapihtar", "duncanriach", "erhoo82", "ericharper", "fanshiqing", "gautham-kollu", "hxbai", "jaredcasper", "jkamalu", "jon-barker", "kanz-nv", "kevalmorabia97", "ko3n1g", "kunlunl", "kvareddy", "layalir", "lhb8125", "lmcafee-nvidia", "maanug-nv", "mathemakitten", "matthieule", "mkhona-nvidia", "pablo-garay", "pthombre", "rogerwaleffe", "sanandaraj5597", "santhnm2", "sbak5", "shanmugamr1992", "sidsingh-nvidia", "skyw", "tdene", "theothermike", "thomasdhc", "trintamaki", "tylerpoon", "wdykas", "xiaoyao0115", "yanring", "yaox12", "yaoyu-33", "yashaswikarnati", "yobibyte", "youngeunkwon0405", "yuzhongw-nvidia", "zhongbozhu"]
diff --git a/.github/workflows/auto-update-copy-pr-bot.yml b/.github/workflows/auto-update-copy-pr-bot.yml
index 969c46e3fdd..25b3a3d2a30 100644
--- a/.github/workflows/auto-update-copy-pr-bot.yml
+++ b/.github/workflows/auto-update-copy-pr-bot.yml
@@ -9,7 +9,6 @@ jobs:
   auto-update-copy-pr-bot:
     runs-on: ubuntu-latest
     environment: nemo-ci
-    if: github.repository == 'NVIDIA/Megatron-LM'
     steps:
       - name: Checkout code
         uses: actions/checkout@v3
diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml
index 4a4a1a2cad1..ca6e59a1625 100644
--- a/.github/workflows/cicd-main.yml
+++ b/.github/workflows/cicd-main.yml
@@ -233,26 +233,11 @@ jobs:
         with:
           python-version: 3.12
 
-      - name: Install GH CLI
-        shell: bash -x -e -u -o pipefail {0}
-        run: |
-          apt-get update
-          apt-get install -y gh
-
       - name: Get PR info
         id: get-pr-info
         if: startsWith(github.ref, 'refs/heads/pull-request/')
         uses: nv-gha-runners/get-pr-info@main
 
-      - name: Has lts label
-        id: has-lts-label
-        env:
-          GH_TOKEN: ${{ secrets.PAT }}
-        run: |
-          PR_NUMBER=${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number }}
-          HAS_LTS_LABEL=$(gh pr view $PR_NUMBER --json labels | jq '[.labels[].name] | any(. == "container::lts")') || echo "false"
-          echo "main=$HAS_LTS_LABEL" | tee -a $GITHUB_OUTPUT
-
       - name: Download test data
         shell: bash
         env:
@@ -291,22 +276,6 @@ jobs:
           echo "$LAST_PRS" | tee -a $GITHUB_OUTPUT
           echo "EOF" | tee -a $GITHUB_OUTPUT
 
-      - name: Parse baseimage
-        shell: bash
-        id: base-image
-        env:
-          HAS_LTS_LABEL: ${{ steps.has-lts-label.outputs.main }}
-        run: |
-          if [ "$HAS_LTS_LABEL" == "true" ]; then
-            NGC_VERSION=$(cat docker/.ngc_version.lts)
-            echo "version=$NGC_VERSION" | tee -a $GITHUB_OUTPUT
-            echo "image_type=lts" | tee -a $GITHUB_OUTPUT
-          else
-            NGC_VERSION=$(cat docker/.ngc_version.dev)
-            echo "version=$NGC_VERSION" | tee -a $GITHUB_OUTPUT
-            echo "image_type=dev" | tee -a $GITHUB_OUTPUT
-          fi
-
       - name: Set up Docker Buildx
         uses: docker/setup-buildx-action@v3
 
@@ -318,8 +287,7 @@ jobs:
           context: .
           target: main
           build-args: |
-            FROM_IMAGE_NAME=${{ steps.base-image.outputs.version }}
-            IMAGE_TYPE=${{ steps.base-image.outputs.image_type }}
+            FROM_IMAGE_NAME=nvcr.io/nvidia/pytorch:25.09-py3
           cache-from: |
             type=registry,ref=${{ env.container-registry }}/megatron-lm:${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number || 0 }}-buildcache,mode=max
             type=registry,ref=${{ env.container-registry }}/megatron-lm:main-buildcache,mode=max
diff --git a/.github/workflows/multi-approval-bot.yml b/.github/workflows/multi-approval-bot.yml
new file mode 100644
index 00000000000..e8507605aa7
--- /dev/null
+++ b/.github/workflows/multi-approval-bot.yml
@@ -0,0 +1,75 @@
+name: "Codeowners Approval Workflow"
+
+on:
+  push:
+    branches:
+      - "pull-request/[0-9]+"
+  merge_group:
+    types: [checks_requested]
+
+jobs:
+  pre-flight:
+    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_cicd_preflight.yml@v0.65.5
+    if: github.repository == 'NVIDIA/Megatron-LM'
+
+  codeowners-approval:
+    needs: [pre-flight]
+    runs-on: ubuntu-latest
+    environment: nemo-ci
+    if: |
+      !(needs.pre-flight.outputs.docs_only == 'true'
+      || needs.pre-flight.outputs.is_merge_group == 'true'
+      || needs.pre-flight.outputs.is_deployment_workflow == 'true')
+    steps:
+      - name: Get PR info
+        id: get-pr-info
+        if: startsWith(github.ref, 'refs/heads/pull-request/')
+        uses: nv-gha-runners/get-pr-info@main
+
+      - name: Checkout action
+        uses: actions/checkout@v3
+        with:
+          repository: noamelf/codeowner-multi-approval-action
+          ref: v0.1
+          path: codeowner-multi-approval-action
+
+      - name: Check Codeowners Approval
+        uses: ./codeowner-multi-approval-action
+        with:
+          pr-number: ${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number }}
+          repo-name: ${{ github.repository }}
+          github-token: ${{ secrets.PAT }}
+
+  multi-approval-bot-summary:
+    needs: [pre-flight, codeowners-approval]
+    if: |
+      (
+        needs.pre-flight.outputs.docs_only == 'true'
+        || needs.pre-flight.outputs.is_merge_group == 'true'
+        || needs.pre-flight.outputs.is_deployment_workflow == 'true'
+        || always()
+      )
+      && github.repository == 'NVIDIA/Megatron-LM'
+      && !cancelled()
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Result
+        env:
+          GH_TOKEN: ${{ github.token }}
+          GITHUB_RUN_ID: ${{ github.run_id }}
+          SKIPPING_IS_ALLOWED: ${{ needs.pre-flight.outputs.docs_only == 'true' || needs.pre-flight.outputs.is_deployment_workflow == 'true' || needs.pre-flight.outputs.is_merge_group == 'true' || needs.pre-flight.outputs.is_ci_workload == 'true' }}
+        run: |
+          FAILED_JOBS=$(gh run view $GITHUB_RUN_ID --json jobs --jq '[.jobs[] | select(.status == "completed" and .conclusion != "success")] | length') || echo 0
+
+          if [ "${FAILED_JOBS:-0}" -eq 0 ] || [ "$SKIPPING_IS_ALLOWED" == "true" ]; then
+              echo "✅ All previous jobs completed successfully"
+              exit 0
+          else
+              echo "❌ Found $FAILED_JOBS failed job(s)"
+              # Show which jobs failed
+              gh run view $GITHUB_RUN_ID --json jobs --jq '.jobs[] | select(.status == "completed" and .conclusion != "success") | .name'
+              exit 1
+          fi
diff --git a/.gitlab/stages/01.build.yml b/.gitlab/stages/01.build.yml
index d67225311f6..0658daaa9ec 100644
--- a/.gitlab/stages/01.build.yml
+++ b/.gitlab/stages/01.build.yml
@@ -53,12 +53,10 @@ test:build_image:
   parallel:
     matrix:
       - IMAGE: CI_MCORE_LTS_IMAGE
-        FILE: Dockerfile.ci.dev
-        IMAGE_TYPE: lts
-        BASE_IMAGE: nvcr.io/nvidia/pytorch:25.09-py3
+        FILE: Dockerfile.ci.lts
+        BASE_IMAGE: nvcr.io/nvidia/pytorch:24.01-py3
       - IMAGE: CI_MCORE_DEV_IMAGE
         FILE: Dockerfile.ci.dev
-        IMAGE_TYPE: dev
         BASE_IMAGE: nvcr.io/nvidia/pytorch:25.09-py3
       - IMAGE: UTILITY_IMAGE
         FILE: Dockerfile.linting
diff --git a/CODEOWNERS b/CODEOWNERS
new file mode 100644
index 00000000000..11c8403e584
--- /dev/null
+++ b/CODEOWNERS
@@ -0,0 +1,26 @@
+# Core
+[Core] @mcore-reviewers/dev-core
+megatron/core/ 
+
+[CI][1] @mcore-reviewers/ci
+.gitlab/
+.github/
+.gitlab-ci.yml
+docker/
+tests/unit_tests/run_ci_test.sh
+tests/test_utils/python_scripts/
+tests/functional_tests/python_test_utils/
+tests/functional_tests/shell_test_utils/
+megatron/core/transformer/transformer_block.py
+megatron/core/transformer/transformer_layer.py
+
+^[Tests][1] @mcore-reviewers/ci
+tests/functional_tests/test_cases/
+tests/functional_tests/recipes/
+tests/unit_tests/
+
+[RL] @mcore-reviewers/rl
+megatron/rl/
+examples/rl/
+tests/unit_tests/test_rl_utils.py
+train_rl.py
diff --git a/docker/.ngc_version.dev b/docker/.ngc_version.dev
deleted file mode 100644
index 6b72812b34f..00000000000
--- a/docker/.ngc_version.dev
+++ /dev/null
@@ -1 +0,0 @@
-nvcr.io/nvidia/pytorch:25.09-py3
\ No newline at end of file
diff --git a/docker/.ngc_version.lts b/docker/.ngc_version.lts
deleted file mode 100644
index 6b72812b34f..00000000000
--- a/docker/.ngc_version.lts
+++ /dev/null
@@ -1 +0,0 @@
-nvcr.io/nvidia/pytorch:25.09-py3
\ No newline at end of file
diff --git a/docker/Dockerfile.ci.dev b/docker/Dockerfile.ci.dev
index 95fecdb3f9b..9ebda435f99 100644
--- a/docker/Dockerfile.ci.dev
+++ b/docker/Dockerfile.ci.dev
@@ -1,6 +1,8 @@
 # syntax=docker/dockerfile:1.3-labs
 
 ARG FROM_IMAGE_NAME
+ARG WHEEL_DIR=/workspace/wheels
+
 FROM ${FROM_IMAGE_NAME} as main
 ENV PIP_CONSTRAINT=""
 ENV DEBIAN_FRONTEND=noninteractive
@@ -23,16 +25,16 @@ RUN bash -ex <<"EOF"
     curl -LsSf https://astral.sh/uv/${UV_VERSION}/install.sh | sh
 EOF
 
+ARG WHEEL_DIR
 COPY README.md pyproject.toml uv.lock /workspace/
 COPY megatron/core/__init__.py /workspace/megatron/core/
 COPY megatron/core/package_info.py /workspace/megatron/core/
-ARG IMAGE_TYPE=dev
 RUN --mount=type=cache,target=/root/.cache/uv \
     bash -ex <<"EOF"
     export NVTE_CUDA_ARCHS="80;90;100"
     uv venv ${UV_PROJECT_ENVIRONMENT} --system-site-packages
     uv sync --only-group build
-    uv sync --extra ${IMAGE_TYPE} --extra mlm --link-mode copy --locked \
+    uv sync --extra dev --extra mlm --link-mode copy --locked \
         --no-install-package torch \
         --no-install-package torchvision \
         --no-install-package triton \
diff --git a/docker/Dockerfile.ci.lts b/docker/Dockerfile.ci.lts
new file mode 100644
index 00000000000..7da27a03f1d
--- /dev/null
+++ b/docker/Dockerfile.ci.lts
@@ -0,0 +1,98 @@
+# syntax=docker/dockerfile:1.3-labs
+
+ARG FROM_IMAGE_NAME
+ARG WHEEL_DIR=/workspace/wheels
+
+FROM $FROM_IMAGE_NAME as build_mamba
+WORKDIR /opt
+ARG WHEEL_DIR
+RUN MAMBA_FORCE_BUILD=TRUE pip3 wheel -v git+https://github.com/state-spaces/mamba.git@v2.0.3 -w $WHEEL_DIR
+
+ARG FROM_IMAGE_NAME
+FROM $FROM_IMAGE_NAME as build_causalconv1d
+WORKDIR /opt
+ARG WHEEL_DIR
+RUN CAUSAL_CONV1D_FORCE_BUILD=TRUE pip3 wheel -v git+https://github.com/Dao-AILab/causal-conv1d.git@v1.2.2.post1 -w $WHEEL_DIR
+
+FROM $FROM_IMAGE_NAME as build_groupedgemm
+WORKDIR /opt
+ARG WHEEL_DIR
+RUN pip3 wheel -v git+https://github.com/fanshiqing/grouped_gemm@v1.1.2 -w $WHEEL_DIR
+
+
+ARG FROM_IMAGE_NAME
+FROM $FROM_IMAGE_NAME as main
+ENV DEBIAN_FRONTEND=noninteractive
+
+RUN bash -ex <<"EOF"
+    apt-get update
+    apt-get install -y --no-install-recommends gettext python3-venv psmisc
+    apt-get clean
+    python -m venv /opt/jet
+    wget https://github.com/mikefarah/yq/releases/download/v4.44.1/yq_linux_amd64 -O /usr/local/bin/yq
+    chmod a+x /usr/local/bin/yq
+EOF
+
+ARG UV_VERSION=0.7.2
+ENV PATH="/root/.local/bin:$PATH"
+RUN curl -LsSf https://astral.sh/uv/${UV_VERSION}/install.sh | sh
+ENV UV_PROJECT_ENVIRONMENT=/opt/venv
+ENV PATH="$UV_PROJECT_ENVIRONMENT/bin:$PATH"
+ENV VIRTUAL_ENV=$UV_PROJECT_ENVIRONMENT
+ENV UV_LINK_MODE=copy
+
+RUN
+ARG WHEEL_DIR
+COPY README.md pyproject.toml uv.lock /workspace/
+COPY megatron/core/__init__.py /workspace/megatron/core/
+COPY megatron/core/package_info.py /workspace/megatron/core/
+COPY docker/common/ /workspace/docker/common/
+COPY --from=build_mamba $WHEEL_DIR/*.whl $WHEEL_DIR/
+COPY --from=build_causalconv1d $WHEEL_DIR/*.whl $WHEEL_DIR/
+COPY --from=build_groupedgemm $WHEEL_DIR/*.whl $WHEEL_DIR/
+RUN bash -ex <<"EOF"
+    uv venv ${UV_PROJECT_ENVIRONMENT} --system-site-packages
+
+    uv sync --extra lts --extra mlm --link-mode copy --locked \
+        --no-install-package torch \
+        --no-install-package torchvision \
+        --no-install-package triton \
+        --no-install-package nvidia-cublas-cu12 \
+        --no-install-package nvidia-cuda-cupti-cu12 \
+        --no-install-package nvidia-cuda-nvrtc-cu12 \
+        --no-install-package nvidia-cuda-runtime-cu12 \
+        --no-install-package nvidia-cudnn-cu12 \
+        --no-install-package nvidia-cufft-cu12 \
+        --no-install-package nvidia-cufile-cu12 \
+        --no-install-package nvidia-curand-cu12 \
+        --no-install-package nvidia-cusolver-cu12 \
+        --no-install-package nvidia-cusparse-cu12 \
+        --no-install-package nvidia-cusparselt-cu12 \
+        --no-install-package nvidia-nccl-cu12
+
+    bash docker/common/install_source_wheels.sh --input-wheel-dir $WHEEL_DIR/ --environment lts
+EOF
+ENV PYTHONPATH="/opt/megatron-lm:$PYTHONPATH"
+COPY assets/ /opt/data/
+ENV UV_PYTHON=$UV_PROJECT_ENVIRONMENT/bin/python
+
+##### For NVIDIANS only #####
+FROM main as jet
+ARG JET_API_VERSION
+ENV PATH="$PATH:/opt/jet/bin"
+RUN --mount=type=secret,id=JET_INDEX_URLS bash -ex <<"EOF"
+    JET_INDEX_URLS=$(cat /run/secrets/JET_INDEX_URLS)
+    python -m venv /opt/jet 
+    /opt/jet/bin/pip install --no-cache-dir $JET_INDEX_URLS \
+        jet-api==$JET_API_VERSION
+EOF
+
+RUN --mount=type=secret,id=JET_INDEX_URLS \
+    --mount=type=secret,id=LOGGER_INDEX_URL bash -ex <<"EOF"
+    JET_INDEX_URLS=$(cat /run/secrets/JET_INDEX_URLS)
+    LOGGER_INDEX_URL=$(cat /run/secrets/LOGGER_INDEX_URL)
+    uv pip install --no-cache-dir --upgrade $LOGGER_INDEX_URL "one-logger"
+    uv pip install --no-cache-dir --upgrade "setuptools<80.0.0"
+    uv pip install --no-cache-dir --upgrade $JET_INDEX_URLS "jet-client~=3.0" 
+EOF
+###
\ No newline at end of file
diff --git a/examples/inference/gpt/gpt_dynamic_inference.py b/examples/inference/gpt/gpt_dynamic_inference.py
index 251aa100cba..e5344fbb8be 100644
--- a/examples/inference/gpt/gpt_dynamic_inference.py
+++ b/examples/inference/gpt/gpt_dynamic_inference.py
@@ -11,7 +11,7 @@
 from collections import defaultdict
 from functools import partial
 from tqdm import tqdm
-from typing import Dict, List, Tuple, Optional
+from typing import Dict, List, Optional
 
 import torch
 from tqdm import tqdm
@@ -28,21 +28,18 @@
 from megatron.core.inference.text_generation_controllers.text_generation_controller import (
     TextGenerationController,
 )
-from megatron.core.ssm.mamba_hybrid_layer_allocation import Symbols
 from megatron.core.tokenizers.text.utils.build_tokenizer import build_tokenizer
 from megatron.core.transformer.module import MegatronModule
-from megatron.core.utils import get_attr_wrapped_model
 
 sys.path.append(
     os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir, os.path.pardir))
 )
 from megatron.training import get_args, get_model as _get_model, get_tokenizer, initialize_megatron
 from megatron.training.checkpointing import load_checkpoint
-from model_provider import model_provider
-from gpt_builders import gpt_builder
-from mamba_builders import mamba_builder
 
 from megatron.core.utils import configure_nvtx_profiling
+from model_provider import model_provider
+from gpt_builders import gpt_builder
 
 import json
 
@@ -57,11 +54,17 @@
 from megatron.training import get_model as _get_model
 from megatron.training import get_tokenizer, initialize_megatron
 from megatron.training.checkpointing import load_checkpoint
+from pretrain_gpt import model_provider
 
 import torch
 import io
 import megatron
 
+torch.serialization.add_safe_globals([io.BytesIO])
+torch.serialization.add_safe_globals([megatron.core.rerun_state_machine.RerunState])
+torch.serialization.add_safe_globals([megatron.core.rerun_state_machine.RerunDiagnostic])
+
+
 
 def add_dynamic_inference_args(parser: ArgumentParser) -> ArgumentParser:
     """Dynamic inference arguments."""
@@ -88,16 +91,9 @@ def get_model() -> MegatronModule:
 
     args = get_args()
 
-    if args.model_provider == "gpt":
-        model_builder = gpt_builder
-    elif args.model_provider == "mamba":
-        model_builder = mamba_builder
-    else:
-        raise ValueError(f"Invalid model provider {args.model_provider}")
-
     # Build model.
     model = _get_model(
-        partial(model_provider, model_builder),
+        partial(model_provider, gpt_builder),
         wrap_with_ddp=False
     )
 
@@ -124,10 +120,7 @@ def get_model() -> MegatronModule:
 def get_inference_context(
     requests: List[Request],
     sampling_params: Optional[SamplingParams] = None,
-    calculate_max_sequence_length_from_requests: bool = True,
-    layer_type_list: Optional[List[str]] = None,
-    mamba_conv_states_shape: Optional[Tuple[int]] = None,
-    mamba_ssm_states_shape: Optional[Tuple[int]] = None,
+    calculate_max_sequence_length_from_requests: bool = True
 ):
     """The inference context manages the KV cache and other inference state."""
 
@@ -166,9 +159,6 @@ def get_inference_context(
         max_tokens_override=args.inference_dynamic_batching_max_tokens_override,
         tensor_model_parallel_size=args.tensor_model_parallel_size,
         materialize_only_last_token_logits=not args.return_log_probs,
-        layer_type_list=layer_type_list,
-        mamba_conv_states_shape=mamba_conv_states_shape,
-        mamba_ssm_states_shape=mamba_ssm_states_shape,
         cache_mla_latent=args.multi_latent_attention and args.cache_mla_latents,
         kv_lora_rank=args.kv_lora_rank if args.multi_latent_attention else None,
         qk_pos_emb_head_dim=args.qk_pos_emb_head_dim,
@@ -379,38 +369,21 @@ def main():
         termination_id=args.termination_id if args.termination_id is not None else tokenizer.eod,
     )
 
+    # Requests, context, conroller.
     model = get_model()
-
-    # Layer type list for hybrid models
-    decoder = get_attr_wrapped_model(model, "decoder")
-    layer_type_list = getattr(decoder, "layer_type_list", None)
-    if layer_type_list is not None and Symbols.MAMBA in layer_type_list:
-        (mamba_conv_states_shape, mamba_ssm_states_shape) = decoder.mamba_state_shapes_per_request()
-    else:
-        mamba_conv_states_shape = None
-        mamba_ssm_states_shape = None
-
-    # Requests, context, controller.
     requests = build_requests(args, tokenizer, sampling_params)
-    context = get_inference_context(
-        requests,
-        sampling_params,
-        layer_type_list=layer_type_list,
-        mamba_conv_states_shape=mamba_conv_states_shape,
-        mamba_ssm_states_shape=mamba_ssm_states_shape,
-    )
+    context = get_inference_context(requests, sampling_params)
     controller = get_inference_controller(model, context)
 
     # Validate all context_length's <= max_tokens.
-    if args.disable_chunked_prefill:
-        invalid_prompt_length_map = {}
-        for request_idx, request in enumerate(requests):
-            if len(request.prompt_tokens) > context.max_tokens:
-                invalid_prompt_length_map[request_idx] = len(request.prompt_tokens)
-        assert not invalid_prompt_length_map, (
-            "request idxs with prompts longer than context.max_tokens: "
-            ", ".join(f"{k}({v})" for k, v in invalid_prompt_length_map.items())
-        )
+    invalid_prompt_length_map = {}
+    for request_idx, request in enumerate(requests):
+        if len(request.prompt_tokens) > context.max_tokens:
+            invalid_prompt_length_map[request_idx] = len(request.prompt_tokens)
+    assert not invalid_prompt_length_map, (
+        "request idxs with prompts longer than context.max_tokens: "
+        ", ".join(f"{k}({v})" for k, v in invalid_prompt_length_map.items())
+    )
 
     # Inference engine.
     engine = DynamicInferenceEngine(
@@ -450,8 +423,8 @@ def main():
         )
 
     # Print unique prompts + outputs.
-
     if torch.distributed.get_rank() == 0:
+
         def escape_str(s):
             return s.replace("\n", "\\n")
 
diff --git a/examples/inference/gpt/gpt_dynamic_inference_with_coordinator.py b/examples/inference/gpt/gpt_dynamic_inference_with_coordinator.py
index 9e2b6bfa983..7b5de5c21f2 100644
--- a/examples/inference/gpt/gpt_dynamic_inference_with_coordinator.py
+++ b/examples/inference/gpt/gpt_dynamic_inference_with_coordinator.py
@@ -20,10 +20,6 @@
 from megatron.training.arguments import parse_args
 from megatron.core import parallel_state
 
-import logging
-
-logging.basicConfig(level=logging.INFO, force=True)
-
 async def main(
     engine: DynamicInferenceEngine,
     requests: List[Request],
diff --git a/examples/inference/gpt/utils.py b/examples/inference/gpt/utils.py
index 0ea1f5a3df0..baa25787e83 100644
--- a/examples/inference/gpt/utils.py
+++ b/examples/inference/gpt/utils.py
@@ -222,9 +222,6 @@ def arrival(r):
     if len(time_offsets) == 0:
         time_offsets = [0.0]
 
-    # Ensure first time is 0.
-    time_offsets = [to - time_offsets[0] for to in time_offsets]
-    
     # Truncate to num_requests.
     assert len(time_offsets) >= num_requests
     time_offsets = time_offsets[:num_requests]
diff --git a/examples/post_training/modelopt/generation_server.py b/examples/post_training/modelopt/generation_server.py
new file mode 100644
index 00000000000..b32cca0d73f
--- /dev/null
+++ b/examples/post_training/modelopt/generation_server.py
@@ -0,0 +1,198 @@
+# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+
+"""Sample Generate"""
+import os
+import sys
+import warnings
+from functools import partial
+
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../")))
+import os
+import sys
+from argparse import Namespace
+from contextlib import nullcontext
+
+import torch
+
+from megatron.core import mpu
+from megatron.core.inference.engines import AbstractEngine, StaticInferenceEngine
+from megatron.core.inference.engines.abstract_engine import AbstractEngine
+from megatron.core.inference.model_inference_wrappers.inference_wrapper_config import (
+    InferenceWrapperConfig,
+)
+from megatron.core.inference.sampling_params import SamplingParams
+from megatron.core.inference.text_generation_controllers.text_generation_controller import (
+    TextGenerationController,
+)
+from megatron.core.transformer.module import MegatronModule
+from megatron.inference.text_generation import beam_search_and_post_process
+from megatron.inference.text_generation.mcore_engine_server import (
+    ModelInferenceWrapperServer,
+    run_mcore_engine,
+)
+from megatron.inference.text_generation_server import MegatronServer
+from megatron.post_training.arguments import add_modelopt_args
+from megatron.training import get_args, get_model, get_tokenizer, print_rank_0
+from megatron.training.checkpointing import load_checkpoint
+from megatron.training.initialize import initialize_megatron
+
+
+def get_inference_engine(args: Namespace, model: MegatronModule) -> AbstractEngine:
+    """Get the relevant backend for running inference
+
+    This function will automatically choose the TRTLLMBackend when possible, and default to Mcore
+    backend if the user does not specify any backends. TRTLLMBackend is not implmented yet.
+
+    Args:
+        args (Namespace): The user arguments parsed from command line
+        model (MegatronModule): The megatron model.
+
+    Returns:
+        AbstractBackend: The chosen backend
+    """
+    tokenizer = get_tokenizer()
+
+    inference_wrapper_config = InferenceWrapperConfig(
+        hidden_size=args.hidden_size,
+        inference_batch_times_seqlen_threshold=args.inference_batch_times_seqlen_threshold,
+        fp32_residual_connection=args.fp32_residual_connection,
+        params_dtype=args.params_dtype,
+        padded_vocab_size=args.padded_vocab_size,
+        inference_max_seq_length=args.inference_max_seq_length,
+        inference_max_requests=args.inference_max_batch_size,
+        nccl_all_reduce_for_prefill=args.nccl_all_reduce_for_prefill,
+    )
+
+    inference_wrapped_model = ModelInferenceWrapperServer(model, inference_wrapper_config)
+    text_generation_controller = TextGenerationController(
+        inference_wrapped_model=inference_wrapped_model, tokenizer=tokenizer
+    )
+    return StaticInferenceEngine(
+        text_generation_controller=text_generation_controller,
+        max_batch_size=args.inference_max_batch_size,
+    )
+
+
+def add_text_generate_args(parser):
+    """Adds text generation arguments to parser."""
+    group = parser.add_argument_group(title='text generation')
+    group.add_argument(
+        "--port", type=int, default=5000, help='port for text generation server to run on'
+    )
+    group.add_argument("--temperature", type=float, default=1.0, help='Sampling temperature.')
+    group.add_argument("--top_k", type=int, default=1, help='Top k sampling.')
+    group.add_argument("--top_p", type=float, default=0.0, help='Top p sampling.')
+    group.add_argument(
+        "--return-log-probs",
+        action='store_true',
+        default=True,
+        help='Return the log probabilities of the final output tokens',
+    )
+    group.add_argument(
+        "--num-tokens-to-generate",
+        type=int,
+        default=30,
+        help='Number of tokens to generate for each prompt',
+    )
+    group.add_argument(
+        "--prompts",
+        metavar='N',
+        type=str,
+        nargs='+',
+        help='Input prompts with each prompt within quotes and seperated by space',
+    )
+    group.add_argument(
+        "--max-batch-size",
+        type=int,
+        default=None,
+        help='Deprecated in favor of `--inference-max-batch-size`',
+    )
+    add_modelopt_args(parser)
+    return parser
+
+
+@torch.inference_mode()
+def main(model_provider: str = "gpt"):
+    """Runs the text generation server with the specified model provider."""
+    initialize_megatron(
+        extra_args_provider=add_text_generate_args,
+        args_defaults={
+            'no_load_rng': True,
+            'no_load_optim': True,
+            'exit_on_missing_checkpoint': True,
+        },
+    )
+    args = get_args()
+    if args.num_layers_per_virtual_pipeline_stage is not None:
+        print("Interleaved pipeline schedule is not yet supported for text generation.")
+        exit()
+    print_rank_0("WARNING: Forcing exit_on_missing_checkpoint to True for text " "generation.")
+    args.exit_on_missing_checkpoint = True
+
+    # Set up model and load checkpoint
+    load_context = nullcontext()
+    if args.fp8:
+        from transformer_engine.pytorch.fp8 import fp8_model_init
+
+        load_context = fp8_model_init()
+    with load_context:
+
+        from megatron.post_training.model_builder import modelopt_gpt_mamba_builder
+        from model_provider import model_provider as root_model_provider
+        if model_provider == "gpt":
+            model = get_model(partial(root_model_provider, modelopt_gpt_mamba_builder), wrap_with_ddp=False)
+        elif model_provider == "mamba":
+            pass
+        else:
+            raise ValueError(f"Invalid model provider {model_provider}")
+
+    if args.load is not None:
+        _ = load_checkpoint(model, None, None, strict=False)
+
+    assert len(model) == 1, "Above condition should have caught this"
+    model = model[0]
+    model.eval()
+
+    if args.max_batch_size is not None:
+        assert args.inference_max_batch_size is not None
+        args.inference_max_batch_size = max(args.inference_max_batch_size, args.max_batch_size)
+        warnings.warn(
+            "`--max-batch-size` has been deprecated in favor of `--inference-max-requests`, "
+            f"setting maximum batch size to {args.inference_max_batch_size}"
+        )
+
+    inference_engine = get_inference_engine(args, model)
+
+    if args.cuda_graph_impl == "local":
+        print(f"Running warmup for CUDA graphs...")
+        inference_engine.generate(
+            prompts=["Test prompt"], sampling_params=SamplingParams(num_tokens_to_generate=10)
+        )
+
+    if (
+        mpu.is_pipeline_first_stage()
+        and mpu.get_tensor_model_parallel_rank() == 0
+        and mpu.get_expert_model_parallel_rank() == 0
+    ):
+        server = MegatronServer(inference_engine, args)
+        server.run("0.0.0.0", port=args.port)
+
+    while True:
+        choice = torch.tensor(1, dtype=torch.long, device='cuda')
+        torch.distributed.broadcast(choice, 0)
+        if choice.item() == 0:
+            try:
+                run_mcore_engine(inference_engine)
+            except ValueError as ve:
+                pass
+        elif choice.item() == 1:
+            try:
+                beam_search_and_post_process(
+                    inference_engine.text_generation_controller.inference_wrapped_model.model
+                )
+            except ValueError as ve:
+                pass
+
+
+if __name__ == "__main__":
+    main(model_provider="gpt")
diff --git a/examples/post_training/modelopt/generation_server.sh b/examples/post_training/modelopt/generation_server.sh
old mode 100755
new mode 100644
index 9acd61f3d04..a4d7ff2dada
--- a/examples/post_training/modelopt/generation_server.sh
+++ b/examples/post_training/modelopt/generation_server.sh
@@ -14,9 +14,11 @@ if [ -z ${MLM_MODEL_CKPT} ]; then
     exit 1
 fi
 
-TOOLS_DIR="$(realpath ${SCRIPT_DIR}/../../../tools)"
+if [ -z ${DRAFT_LEN} ]; then
+    DRAFT_LEN=0
+fi
 
-${LAUNCH_SCRIPT} ${TOOLS_DIR}/run_text_generation_server.py \
+${LAUNCH_SCRIPT} ${SCRIPT_DIR}/generation_server.py \
     ${MODEL_ARGS} \
     --tensor-model-parallel-size ${TP} \
     --expert-tensor-parallel-size ${ETP} \
diff --git a/megatron/core/datasets/megatron_tokenizer.py b/megatron/core/datasets/megatron_tokenizer.py
index 08b602c4766..224de24a9f9 100644
--- a/megatron/core/datasets/megatron_tokenizer.py
+++ b/megatron/core/datasets/megatron_tokenizer.py
@@ -1,14 +1,11 @@
 # Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
 import json
-import logging
 from abc import ABC, abstractmethod
 from collections import OrderedDict
 from typing import Any
 
 import numpy
 
-logger = logging.getLogger(__name__)
-
 
 class MegatronLegacyTokenizer(ABC):
     """Abstract class for tokenizer
@@ -23,12 +20,6 @@ class MegatronLegacyTokenizer(ABC):
     """
 
     def __init__(self, *tokenizer_paths: str, **tokenizer_options: Any):
-        # Deprecation warning
-        logger.warning(
-            "You’re using the legacy tokenizer system, which is deprecated "
-            "and will be removed in a future release. Please migrate to the new tokenizer system "
-            "(`megatron.core.tokenizers.MegatronTokenizer`)."
-        )
         self.unique_identifiers = OrderedDict()
         self.unique_identifiers["class"] = type(self).__name__
         self.unique_identifiers["tokenizer_path"] = list(tokenizer_paths)
diff --git a/megatron/core/enums.py b/megatron/core/enums.py
index fcca219badd..c9a715519f9 100644
--- a/megatron/core/enums.py
+++ b/megatron/core/enums.py
@@ -20,17 +20,15 @@ def encoder_and_decoder(self):
 
 
 class Fp8Recipe(str, enum.Enum):
-    """FP8 recipe names: delayed, tensorwise, mxfp8, blockwise, custom."""
+    """FP8 recipe names: delayed, tensorwise, mxfp8, blockwise."""
 
     delayed = "delayed"
     tensorwise = "tensorwise"
     mxfp8 = "mxfp8"
     blockwise = "blockwise"
-    custom = "custom"
 
 
 class Fp4Recipe(str, enum.Enum):
-    """FP4 recipe names: nvfp4, custom."""
+    """FP4 recipe names: nvfp4."""
 
     nvfp4 = "nvfp4"
-    custom = "custom"
diff --git a/megatron/core/fp4_utils.py b/megatron/core/fp4_utils.py
index 4f997f6be15..eb02a4796b0 100644
--- a/megatron/core/fp4_utils.py
+++ b/megatron/core/fp4_utils.py
@@ -7,7 +7,6 @@
 import torch
 
 from megatron.core.enums import Fp4Recipe
-from megatron.core.fp8_utils import _get_custom_recipe
 from megatron.core.transformer.transformer_config import TransformerConfig
 from megatron.core.utils import is_te_min_version
 
@@ -85,11 +84,9 @@ def get_fp4_recipe(config: TransformerConfig):
                         Transformer Engine. Please make sure you are using TE version 
                         >= 2.7.0.dev0."""
                     )
-            elif config.fp4_recipe == Fp4Recipe.custom:
-                fp4_recipe = _get_custom_recipe(config.fp4_quantizer_factory)
             else:
                 raise ValueError(
-                    "NVFP4BlockScaling and custom are the only supported FP4 recipes. "
+                    "NVFP4BlockScaling is the only supported FP4 recipe. "
                     "Please make sure you are using a compatible TE version >= 2.7.0.dev0."
                 )
         else:
diff --git a/megatron/core/fp8_utils.py b/megatron/core/fp8_utils.py
index 1c52e965cd7..7c3591ae5f7 100644
--- a/megatron/core/fp8_utils.py
+++ b/megatron/core/fp8_utils.py
@@ -2,21 +2,14 @@
 
 """Utility functions related to FP8 that are used throughout Megatron core"""
 
-import importlib
 import weakref
 from contextlib import nullcontext
 from functools import wraps
-from typing import List, Optional, Union
+from typing import List, Optional
 
 import torch
 
-from megatron.core.enums import Fp4Recipe, Fp8Recipe
-from megatron.core.tensor_parallel import (
-    ColumnParallelLinear,
-    RowParallelLinear,
-    gather_from_sequence_parallel_region,
-    reduce_scatter_to_sequence_parallel_region,
-)
+from megatron.core.enums import Fp8Recipe
 from megatron.core.transformer.transformer_config import TransformerConfig
 from megatron.core.utils import get_te_version, is_te_min_version
 
@@ -118,53 +111,6 @@ def dequantize_fp8_tensor(fp8_tensor: torch.Tensor) -> torch.Tensor:
         return fp8_tensor.from_float8()
 
 
-def _resolve_callable_from_python_import_path(dotted_path: str):
-    """Resolve a Python import path like 'pkg.mod.func' to a callable.
-
-    Raises ValueError with clear message on failure.
-    """
-    if not isinstance(dotted_path, str) or not dotted_path:
-        raise ValueError(
-            "fp8_quantizer_factory must be a non-empty string with format 'pkg.mod.func'."
-        )
-
-    parts = dotted_path.rsplit(".", 1)
-    if len(parts) == 1:
-        raise ValueError(f"Invalid fp8_quantizer_factory '{dotted_path}'. Expected 'pkg.mod.func'.")
-    module_path, attr = parts[0], parts[1]
-
-    try:
-        mod = importlib.import_module(module_path)
-    except Exception as exc:
-        raise ValueError(
-            f"Failed to import module '{module_path}' for fp8_quantizer_factory: {exc}"
-        ) from exc
-
-    fn = getattr(mod, attr, None)
-    if fn is None:
-        raise ValueError(
-            f"Attribute '{attr}' not found in module '{module_path}' for fp8_quantizer_factory."
-        )
-    if not callable(fn):
-        raise ValueError(
-            f"Resolved attribute '{module_path}.{attr}' is not callable for fp8_quantizer_factory."
-        )
-    return fn
-
-
-def _get_custom_recipe(quantizer_factory_python_path: str) -> Union[Fp8Recipe, Fp4Recipe]:
-    quantizer_factory = _resolve_callable_from_python_import_path(quantizer_factory_python_path)
-    try:
-        custom_recipe = transformer_engine.common.recipe.CustomRecipe(qfactory=quantizer_factory)
-    except AttributeError:
-        raise ValueError(
-            """CustomRecipe recipe is not available in this version of 
-            Transformer Engine. Please make sure you are using TE version 
-            >= 2.9.0.dev0."""
-        )
-    return custom_recipe
-
-
 def get_fp8_align_size(fp8_recipe: Fp8Recipe) -> int:
     """Get the alignment size required for fp8 GEMM."""
     if fp8_recipe == Fp8Recipe.mxfp8:
@@ -173,27 +119,6 @@ def get_fp8_align_size(fp8_recipe: Fp8Recipe) -> int:
         return 16
 
 
-def is_column_parallel_linear(module):
-    """Returns whether the given module is a ColumnParallelLinear layer."""
-    if HAVE_TE and (
-        isinstance(module, TEColumnParallelLinear)
-        or isinstance(module, TELayerNormColumnParallelLinear)
-    ):
-        return True
-    elif isinstance(module, ColumnParallelLinear):
-        return True
-    return False
-
-
-def is_row_parallel_linear(module):
-    """Returns whether the given module is a RowParallelLinear layer."""
-    if HAVE_TE and isinstance(module, TERowParallelLinear):
-        return True
-    elif isinstance(module, RowParallelLinear):
-        return True
-    return False
-
-
 """
 The code below abstracts the functionalities needed for implementing "--fp8-param-gather" into
 several functions. It provides different implementations for each function based on different
@@ -570,8 +495,6 @@ def get_fp8_recipe(config: TransformerConfig):
                 fp8_recipe = transformer_engine.common.recipe.MXFP8BlockScaling(
                     fp8_format=fp8_format
                 )
-            elif config.fp8_recipe == Fp8Recipe.custom:
-                fp8_recipe = _get_custom_recipe(config.fp8_quantizer_factory)
             else:
                 raise ValueError(
                     "Float8CurrentScaling, MXFP8BlockScaling, Float8BlockwiseScaling and "
@@ -693,18 +616,6 @@ def padded_forward(input_tensor, *args, **kwargs):
             if not FP8GlobalStateManager.is_fp8_enabled():
                 return original_forward(input_tensor, *args, **kwargs)
 
-            # With sequence parallelism we need to all-gather before padding
-            # and reduce-scatter after unpadding
-            if is_sequence_parallel := getattr(module, "sequence_parallel", False):
-                if is_column_parallel_linear(module):
-                    input_tensor = gather_from_sequence_parallel_region(
-                        input_tensor, group=module.tp_group
-                    )
-
-                # Disable sequence parallelism on the module because we are handling the
-                # all-gather and reduce-scatter externally
-                module.sequence_parallel = False
-
             seq_len, batch_size, hidden_size = input_tensor.shape
             # Reshape to (S, B*H) to pad sequence dimension
             input_2d = input_tensor.reshape(seq_len, -1)
@@ -730,16 +641,6 @@ def padded_forward(input_tensor, *args, **kwargs):
             unpadded_output_2d = _unpad_func(output_2d, [seq_len])
             unpadded_output = unpadded_output_2d.reshape(seq_len, batch_size, output_hidden_size)
 
-            if is_sequence_parallel:
-                # Reduce-scatter after unpadding
-                if is_row_parallel_linear(module):
-                    unpadded_output = reduce_scatter_to_sequence_parallel_region(
-                        unpadded_output, group=module.tp_group
-                    )
-
-                # Reset sequence parallelism flag on the module
-                module.sequence_parallel = True
-
             if other_outputs:
                 return (unpadded_output,) + other_outputs
             else:
diff --git a/megatron/core/inference/async_stream.py b/megatron/core/inference/async_stream.py
index 6c3242a13db..1bf8775e368 100644
--- a/megatron/core/inference/async_stream.py
+++ b/megatron/core/inference/async_stream.py
@@ -9,7 +9,6 @@
 from typing import Any, AsyncGenerator, Callable, Optional, Type, Union
 
 from megatron.core.inference.inference_request import InferenceRequest
-from megatron.core.utils import get_asyncio_loop
 
 STOP_ITERATION = Exception()
 
@@ -21,17 +20,12 @@ class AsyncStream:
     Adopted from https://github.com/vllm-project/vllm/blob/eb881ed006ca458b052905e33f0d16dbb428063a/vllm/v1/engine/async_stream.py # pylint: disable=line-too-long
     """
 
-    def __init__(
-        self,
-        request_id: int,
-        cancel: Callable[[str], None],
-        loop: Optional[asyncio.AbstractEventLoop] = None,
-    ) -> None:
+    def __init__(self, request_id: int, cancel: Callable[[str], None]) -> None:
         self._request_id = request_id
         self._cancel = cancel
         self._queue: asyncio.Queue = asyncio.Queue()
         self._finished = False
-        self._loop = get_asyncio_loop(loop)
+        self._loop = asyncio.get_running_loop()
 
     def put(self, item: Union[InferenceRequest, Exception]) -> None:
         """Adds a new value to the stream"""
diff --git a/megatron/core/inference/contexts/attention_context/mamba_metadata.py b/megatron/core/inference/contexts/attention_context/mamba_metadata.py
deleted file mode 100644
index e9cd99a6c48..00000000000
--- a/megatron/core/inference/contexts/attention_context/mamba_metadata.py
+++ /dev/null
@@ -1,106 +0,0 @@
-# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-
-import torch
-
-
-class MambaMetadata:
-    """Manages the metadata tensors required for Mamba layers during inference."""
-
-    def __init__(self, max_requests: int):
-        """
-        Initializes the Mamba slot allocator.
-
-        Args:
-            max_requests (int): The maximum number of concurrent requests.
-        """
-        self.max_requests = max_requests
-
-        # Metadata for mapping requests to slots in the static Mamba state buffer
-        self.request_to_mamba_state_idx = torch.full(
-            (self.max_requests,), -1, dtype=torch.int32, device=torch.cuda.current_device()
-        )
-
-        # Separate mapping used only for CUDA graph compatibility
-        self.request_to_mamba_state_idx_cudagraph_only = torch.full(
-            (self.max_requests,), -1, dtype=torch.int32, device=torch.cuda.current_device()
-        )
-
-        # Allocator for Mamba state slots
-        self.mamba_state_free_slots = torch.arange(
-            self.max_requests, dtype=torch.int32, device=torch.cuda.current_device()
-        )
-        self.mamba_state_free_slot_count = self.max_requests
-
-    def reset(self) -> None:
-        """
-        Resets all Mamba states and frees all allocated slots.
-        """
-        self.request_to_mamba_state_idx.fill_(-1)
-        self.request_to_mamba_state_idx_cudagraph_only.fill_(-1)
-
-        # Re-initialize the free slot pool
-        self.mamba_state_free_slots = torch.arange(
-            self.max_requests, dtype=torch.int32, device=torch.cuda.current_device()
-        )
-        self.mamba_state_free_slot_count = self.max_requests
-
-    def reset_cudagraph_mapping(self) -> None:
-        """
-        Resets only the CUDA graph mapping tensor.
-        """
-        self.request_to_mamba_state_idx_cudagraph_only.fill_(-1)
-
-    def update_cudagraph_mapping(
-        self, active_mamba_indices: torch.Tensor, num_active_requests: int
-    ) -> None:
-        """
-        Updates the dedicated CUDA graph mapping tensor with the indices
-        of currently active requests.
-
-        Args:
-            active_mamba_indices (Tensor): Tensor containing the Mamba slot indices
-                                           for active requests.
-            num_active_requests (int): The number of active requests.
-        """
-        self.request_to_mamba_state_idx_cudagraph_only[0:num_active_requests] = active_mamba_indices
-
-    def allocate_slot(self) -> int:
-        """
-        Allocates a new slot for a request in the Mamba state buffers.
-
-        Returns:
-            int: The index of the allocated slot.
-            Returns None if no slots are available.
-        """
-        if self.mamba_state_free_slot_count == 0:
-            return None
-
-        # Get a free slot
-        self.mamba_state_free_slot_count -= 1
-        mamba_idx = self.mamba_state_free_slots[self.mamba_state_free_slot_count]
-
-        return mamba_idx
-
-    def free_slots(self, request_indices: torch.Tensor) -> None:
-        """
-        Frees the Mamba state slots associated with the given request indices.
-
-        Args:
-            request_indices (Tensor): A 1D tensor of request indices to free.
-        """
-        # Get the Mamba state indices for finished requests
-        mamba_indices_to_free = self.request_to_mamba_state_idx[request_indices]
-
-        # Filter out any invalid indices (e.g., -1)
-        mamba_indices_to_free = mamba_indices_to_free[mamba_indices_to_free != -1]
-        num_to_free = len(mamba_indices_to_free)
-
-        if num_to_free > 0:
-            # Add the freed indices back to the free slot pool
-            start_idx = self.mamba_state_free_slot_count
-            end_idx = start_idx + num_to_free
-            self.mamba_state_free_slots[start_idx:end_idx] = mamba_indices_to_free
-            self.mamba_state_free_slot_count = end_idx
-
-        # Invalidate the Mamba state index for the finished requests
-        self.request_to_mamba_state_idx[request_indices] = -1
diff --git a/megatron/core/inference/contexts/dynamic_context.py b/megatron/core/inference/contexts/dynamic_context.py
index 000b58200f8..d6cc2598998 100644
--- a/megatron/core/inference/contexts/dynamic_context.py
+++ b/megatron/core/inference/contexts/dynamic_context.py
@@ -23,14 +23,9 @@
 from megatron.core.inference.utils import tensor_swap
 from megatron.core.models.common.embeddings.rope_utils import apply_rotary_pos_emb
 from megatron.core.package_info import __version__ as mcore_version
-from megatron.core.ssm.mamba_hybrid_layer_allocation import (
-    Symbols,
-    get_layer_maps_from_layer_type_list,
-)
 from megatron.core.transformer import TransformerConfig
 from megatron.core.utils import divide as core_divide
 
-from .attention_context.mamba_metadata import MambaMetadata
 from .attention_context.mha_metadata import GraphedMHAMetadata, NonGraphedMHAMetadata
 from .base_context import BaseInferenceContext
 from .dynamic_block_allocator import BlockAllocator
@@ -232,17 +227,8 @@ class DynamicInferenceContext(BaseInferenceContext):
             where the cuda graph batch sizes range from 1 to `max_requests` (as
             computed below). Due to rounding, the actual number of cuda graphs may
             not equal this argument.
-        materialize_only_last_token_logits (Optional[bool]): Whether to only
-            materialize logits for the last token. This should be set to False
-            if returning log probs.
-        layer_type_list (Optional[List[str]]): A list of strings that indicates
-            the layer type (Mamba / Attention / MLP) for each layer.
-            See `megatron/core/ssm/mamba_hybrid_layer_allocation.py` for the list
-            of symbols. This must be provided for hybrid models.
-        mamba_conv_states_shape: (Optional[Tuple[int]]): Mamba conv states shape per request.
-            This must be provided for hybrid models.
-        mamba_ssm_states_shape: (Optional[Tuple[int]]): Mamba ssm states shape per request.
-            This must be provided for hybrid models.
+        materialize_only_last_token_logits (bool): If True, only the last token logits
+            are materialized in the context.
         use_cuda_graphs_for_non_decode_steps (bool): If True, use cuda graphs for non-decode
             engine steps.
         unified_memory_level (Optional[int]): Set unified memory usage within the
@@ -273,10 +259,7 @@ def __init__(
         kv_lora_rank: Optional[int] = None,
         qk_pos_emb_head_dim: Optional[int] = None,
         num_cuda_graphs: Optional[int] = None,
-        materialize_only_last_token_logits: Optional[bool] = True,
-        layer_type_list: Optional[List[str]] = None,
-        mamba_conv_states_shape: Optional[Tuple[int]] = None,
-        mamba_ssm_states_shape: Optional[Tuple[int]] = None,
+        materialize_only_last_token_logits: bool = True,
         use_cuda_graphs_for_non_decode_steps: bool = True,
         use_flashinfer_fused_rope: bool = False,
         unified_memory_level: Optional[int] = 0,
@@ -300,41 +283,6 @@ def __init__(
             tp_size = tensor_model_parallel_size
         hidden_size_per_attention_head = core_divide(projection_size, num_attention_heads)
         num_attention_heads_per_partition = core_divide(num_attention_heads, tp_size)
-
-        # Mamba states.
-        self.is_hybrid_model = layer_type_list is not None and Symbols.MAMBA in layer_type_list
-        if self.is_hybrid_model:
-            assert (
-                mamba_conv_states_shape is not None
-            ), "`mamba_conv_states_shape` must be specified for hybrid models"
-            assert (
-                mamba_ssm_states_shape is not None
-            ), "`mamba_ssm_states_shape` must be specified for hybrid models"
-            assert (
-                not use_cuda_graphs_for_non_decode_steps
-            ), "Non-decode CUDA graphs not yet supported for hybrid models"
-
-            # For hybrid models, the layer map converts the global layer index to the
-            # corresponding attention layer index or Mamba layer index depending on the
-            # layer type.
-            attention_layer_map, mamba_layer_map, _ = get_layer_maps_from_layer_type_list(
-                layer_type_list
-            )
-            self.num_attention_layers = len(attention_layer_map)
-            self.num_mamba_layers = len(mamba_layer_map)
-            self.layer_map = attention_layer_map | mamba_layer_map
-        else:
-            # The layer map is the identity function for pure Transformer models.
-            self.num_attention_layers = num_layers
-            self.num_mamba_layers = 0
-            (mamba_conv_states_shape, mamba_ssm_states_shape) = (None, None)
-            self.layer_map = {i: i for i in range(self.num_attention_layers)}
-
-        if self.num_attention_layers == 0:
-            raise NotImplementedError(
-                f"Using `DynamicInferenceContext` with no attention is not supported."
-            )
-
         # Block size tokens, bytes.
         dtype_size_bytes = params_dtype.itemsize
         self.block_size_tokens = block_size_tokens
@@ -349,38 +297,24 @@ def __init__(
             self.block_size_bytes = (
                 dtype_size_bytes
                 * 2  # key, value
-                * self.num_attention_layers
+                * num_layers
                 * self.block_size_tokens
                 * num_attention_heads_per_partition
                 * hidden_size_per_attention_head
             )
-        assert self.block_size_bytes > 0
 
         # Adjust buffer to be a multiple of block size.
         buffer_size_bytes = int(buffer_size_gb * 1024**3)
         buffer_size_bytes_rem = buffer_size_bytes % self.block_size_bytes
         buffer_size_bytes = buffer_size_bytes - buffer_size_bytes_rem
 
-        mamba_states_memory_per_request = 0
-        if self.is_hybrid_model:
-            mamba_states_memory_per_request += math.prod(mamba_conv_states_shape)
-            mamba_states_memory_per_request += math.prod(mamba_ssm_states_shape)
-            mamba_states_memory_per_request *= self.num_mamba_layers
-            mamba_states_memory_per_request *= dtype_size_bytes
-
-        # Compute max_requets, max_tokens from buffer size, overflow factor, and Mamba state size.
+        # Compute max_requets, max_tokens from buffer size and overflow factor.
         def bytes_to_max_requests_and_tokens(n_bytes):
-            bytes_per_token = self.block_size_bytes / self.block_size_tokens
-            cost_per_request_bytes = (
-                mamba_states_memory_per_request + max_sequence_length * bytes_per_token
+            n_tokens = n_bytes / self.block_size_bytes * self.block_size_tokens
+            n_requests = n_tokens / max_sequence_length
+            return self.round_up_requests(int(n_requests), tp_size=tp_size), self.round_up_tokens(
+                int(n_tokens), tp_size=tp_size
             )
-            # TODO(ksanthanam): Leave room for an extra request in the event of padding
-            # for non-decode CUDA graphs
-            n_requests = n_bytes / cost_per_request_bytes
-            n_tokens = n_requests * max_sequence_length
-            n_requests = self.round_up_requests(int(n_requests), tp_size=tp_size)
-            n_tokens = self.round_up_tokens(int(n_tokens), tp_size=tp_size)
-            return n_requests, n_tokens
 
         self.max_requests, self.max_tokens = bytes_to_max_requests_and_tokens(buffer_size_bytes)
         if buffer_overflow_factor is not None:
@@ -405,6 +339,7 @@ def bytes_to_max_requests_and_tokens(n_bytes):
 
         # Initialize context state.
         self.params_dtype = params_dtype
+        self.num_layers = num_layers
         self.max_sequence_length = max_sequence_length
 
         # Unified memory.
@@ -455,11 +390,8 @@ def bytes_to_max_requests_and_tokens(n_bytes):
         self.token_to_position_in_request = torch.empty_like(self.token_to_input_ids)
         self.token_to_local_position_within_kv_block = torch.empty_like(self.token_to_input_ids)
 
-        # Calculate the total number of chunks available in the buffer
-        total_mamba_states_memory = mamba_states_memory_per_request * self.max_requests
-        block_count_total = (
-            max(0, buffer_size_bytes - total_mamba_states_memory) // self.block_size_bytes
-        )
+        # Calculate the total number of blocks available in the buffer
+        block_count_total = buffer_size_bytes // self.block_size_bytes
 
         # Memory buffer.
         ctx_manager = (
@@ -470,12 +402,7 @@ def bytes_to_max_requests_and_tokens(n_bytes):
         with ctx_manager:
             if cache_mla_latent:
                 self.memory_buffer = torch.full(
-                    (
-                        self.num_attention_layers,
-                        block_count_total,
-                        self.block_size_tokens,
-                        kv_reduced_dim,
-                    ),
+                    (self.num_layers, block_count_total, self.block_size_tokens, kv_reduced_dim),
                     -1,
                     dtype=self.params_dtype,
                     device=torch.cuda.current_device(),
@@ -484,7 +411,7 @@ def bytes_to_max_requests_and_tokens(n_bytes):
                 self.memory_buffer = torch.full(
                     (
                         2,  # key and value
-                        self.num_attention_layers,
+                        self.num_layers,
                         block_count_total,
                         self.block_size_tokens,
                         num_attention_heads_per_partition,
@@ -589,34 +516,14 @@ def bytes_to_max_requests_and_tokens(n_bytes):
             block_count_total=block_count_total, gtd_block_count=self.gtd_block_count
         )
 
-        # Optional state tensors for hybrid models
-        if self.is_hybrid_model:
-            self.mamba_metadata = MambaMetadata(max_requests=self.max_requests)
-
-            with ctx_manager:
-                self.mamba_conv_states = torch.zeros(
-                    (self.num_mamba_layers, self.max_requests) + mamba_conv_states_shape,
-                    dtype=self.params_dtype,
-                    device=torch.cuda.current_device(),
-                )
-                self.mamba_ssm_states = torch.zeros(
-                    (self.num_mamba_layers, self.max_requests) + mamba_ssm_states_shape,
-                    dtype=self.params_dtype,
-                    device=torch.cuda.current_device(),
-                )
-
-        else:
-            self.mamba_metadata = None
-
         # Store the dummy block idx reference for convenience
         self.dummy_block_idx = self.block_allocator.dummy_block_idx
 
         # Deal with chunked prefill
         self.chunked_prefill_request_id = -1
 
-        # Reset attention and Mamba state.
+        # Reset attention state.
         self.reset_attention_state()
-        self.reset_mamba_state()
 
         if use_flashinfer_fused_rope is True:
             assert HAVE_FLASHINFER, "flashinfer is not installed"
@@ -721,8 +628,7 @@ def is_decode_only(self) -> bool:
         """Test if all active requests are in decode phase.
 
         For a request in prefill phase active_tokens = query length
-        Once the request moves to decode phase active tokens is 1 for that request.
-        So if all active requests are in decode phase, they will be equal to active token count.
+        Once the request moves to decode phase active tokens is 1 for that request. So if all active requests are in decode phase, they will be equal to active token count.
         """
         total_active_requests = self.total_request_count - self.paused_request_count
         return total_active_requests == self.active_token_count
@@ -758,7 +664,11 @@ def get_max_sequence_lengths(self) -> Tensor:
 
     def get_active_request_count(self):
         """Returns the current number of active requests."""
-        return self.total_request_count - self.paused_request_count
+        active_sequence_lengths = self.get_active_sequence_lengths()
+        max_sequence_lengths = self.get_max_sequence_lengths()
+        active_requests_mask = torch.less(active_sequence_lengths, max_sequence_lengths).byte()
+        active_request_count = (active_requests_mask == 1).sum().item()
+        return active_request_count
 
     def append_key_value_cache(self, layer_number: int, key: Tensor, value: Tensor) -> None:
         """Append to KV cache.
@@ -768,12 +678,10 @@ def append_key_value_cache(self, layer_number: int, key: Tensor, value: Tensor)
             key (Tensor): Key tensor.
             value (Tensor): Value tensor.
         """
-        attention_layer_number = self.layer_map[layer_number - 1]
-
         if triton_append_key_value_cache is not None and not self.cache_mla_latent:
             # currently does not support MLA latent cache
             return triton_append_key_value_cache(
-                layer_number=attention_layer_number,
+                layer_number=layer_number,
                 key=key,
                 value=value,
                 memory_buffer=self.memory_buffer,
@@ -798,14 +706,14 @@ def append_key_value_cache(self, layer_number: int, key: Tensor, value: Tensor)
         if self.cache_mla_latent:
             # We pass the kv_concat as the key in cache_mla_latent
             kv_concat = key
-            self.memory_buffer[attention_layer_number, block_idx, local_kv_seq_idx] = kv_concat[
+            self.memory_buffer[layer_number - 1, block_idx, local_kv_seq_idx] = kv_concat[
                 : self.padded_active_token_count
             ]
         else:
-            self.memory_buffer[0, attention_layer_number, block_idx, local_kv_seq_idx] = key[
+            self.memory_buffer[0, layer_number - 1, block_idx, local_kv_seq_idx] = key[
                 : self.padded_active_token_count
             ]
-            self.memory_buffer[1, attention_layer_number, block_idx, local_kv_seq_idx] = value[
+            self.memory_buffer[1, layer_number - 1, block_idx, local_kv_seq_idx] = value[
                 : self.padded_active_token_count
             ]
 
@@ -819,30 +727,19 @@ def key_value_cache(self, layer_number: int) -> Tuple[Tensor, Tensor]:
             (Tuple[Tensor, Tensor]) The key and value pointer tensors that point
             to blocks within the block-level memory buffer.
         """
-        attention_layer_number = self.layer_map[layer_number - 1]
         if self.cache_mla_latent:
             return (
-                self.memory_buffer[attention_layer_number],
+                self.memory_buffer[layer_number - 1],
                 None,
                 self.active_attn_metadata["mha_metadata"].state_data["block_table"],
             )
         else:
             return (
-                self.memory_buffer[0, attention_layer_number],
-                self.memory_buffer[1, attention_layer_number],
+                self.memory_buffer[0, layer_number - 1],
+                self.memory_buffer[1, layer_number - 1],
                 self.active_attn_metadata["mha_metadata"].state_data["block_table"],
             )
 
-    def mamba_states_cache(self, layer_number: int) -> Tuple[Tensor, Tensor]:
-        """Returns the Mamba state tensors for the given layer."""
-        assert self.is_hybrid_model, "Only hybrid models have Mamba state tensors"
-
-        mamba_layer_number = self.layer_map[layer_number - 1]
-        conv_state = self.mamba_conv_states[mamba_layer_number]
-        ssm_state = self.mamba_ssm_states[mamba_layer_number]
-
-        return (conv_state, ssm_state)
-
     def apply_fused_qk_rotary_emb(
         self, query: Tensor, key: Tensor, cos_sin_emb: Tensor, config: TransformerConfig
     ) -> Tuple[Tensor, Tensor]:
@@ -957,16 +854,6 @@ def reset_attention_state(self) -> None:
             attn_metadata.reset()
         self.active_attn_metadata = None
 
-        if self.is_hybrid_model:
-            self.mamba_metadata.reset_cudagraph_mapping()
-
-    def reset_mamba_state(self) -> None:
-        """Reset state used within Mamba layers."""
-        if self.is_hybrid_model:
-            self.mamba_conv_states.fill_(0)
-            self.mamba_ssm_states.fill_(0)
-            self.mamba_metadata.reset()
-
     def using_cuda_graph_this_step(self) -> bool:
         """Returns True if cuda graphs are being used for this step."""
         has_cuda_graphs = self.cuda_graph_token_counts is not None
@@ -1090,17 +977,6 @@ def initialize_attention_state(
         )
         # All attention metadata calculations are now handled by MHAMetadata.update()
 
-        # Create Mamba state block table if it's a hybrid model
-        if self.is_hybrid_model:
-            active_mamba_indices = self.mamba_metadata.request_to_mamba_state_idx[
-                self.paused_request_count : self.total_request_count
-            ]
-
-            if self.is_decode_only() or self.using_cuda_graph_this_step():
-                self.mamba_metadata.update_cudagraph_mapping(
-                    active_mamba_indices, self.total_request_count - self.paused_request_count
-                )
-
     def reset(self) -> None:
         """Reset entire context.
 
@@ -1142,13 +1018,15 @@ def reset(self) -> None:
 
         # Reset available block count.
         self.reset_attention_state()
-        self.reset_mamba_state()
         self.block_allocator.reset()
         self.request_to_kv_block_ids.fill_(-1)
 
         # Reset chunked prefill state
         self.chunked_prefill_request_id = -1
 
+        # Reset chunked prefill state
+        self.chunked_prefill_request_id = -1
+
     def current_input_and_position_ids(
         self, *, num_warmup_tokens: Optional[int] = None
     ) -> Tuple[Tensor, Tensor]:
@@ -1320,18 +1198,6 @@ def add_request(self, req: DynamicInferenceRequest, chunk_length: Optional[int]
         self.token_to_local_position_within_kv_block[
             self.active_token_count : self.active_token_count + chunk_length
         ] = (token_offset_range % self.block_size_tokens)
-
-        if self.is_hybrid_model and not is_chunked_prefill:
-            # Allocate a slot for Mamba states
-            mamba_idx = self.mamba_metadata.allocate_slot()
-            if mamba_idx is None:
-                raise ContextOverflowError(req.request_id, "No Mamba slots available")
-
-            # Initialize the allocated Mamba state
-            self.mamba_conv_states[:, mamba_idx] = 0.0
-            self.mamba_ssm_states[:, mamba_idx] = 0.0
-            self.mamba_metadata.request_to_mamba_state_idx[self.total_request_count] = mamba_idx
-
         self.active_token_count += chunk_length
         self.total_request_count += 0 if req.finished_chunk_token_count > 0 else 1
 
@@ -1350,11 +1216,6 @@ def _move_book_keeping_tensors(self, src_idxs, dst_idxs, next_tokens):
         self.request_last_kv_block_id[dst_idxs] = self.request_last_kv_block_id[src_idxs]
         self.request_last_kv_block_offset[dst_idxs] = self.request_last_kv_block_offset[src_idxs]
 
-        if self.is_hybrid_model:
-            self.mamba_metadata.request_to_mamba_state_idx[dst_idxs] = (
-                self.mamba_metadata.request_to_mamba_state_idx[src_idxs]
-            )
-
     def _swap_book_keeping_tensors(self, src_idxs, dst_idxs, next_tokens):
         """
         Swaps all the relevent booking tensors with src idxs to dst idxs
@@ -1369,9 +1230,6 @@ def _swap_book_keeping_tensors(self, src_idxs, dst_idxs, next_tokens):
         tensor_swap(self.request_last_kv_block_id, src_idxs, dst_idxs)
         tensor_swap(self.request_last_kv_block_offset, src_idxs, dst_idxs)
 
-        if self.is_hybrid_model:
-            tensor_swap(self.mamba_metadata.request_to_mamba_state_idx, src_idxs, dst_idxs)
-
     # TODO: see if we can compile this function
     def update_requests(self, active_requests_mask: Tensor, new_tokens: Tensor) -> Tensor:
         """Update context state after calling engine.step().
@@ -1443,17 +1301,10 @@ def update_requests(self, active_requests_mask: Tensor, new_tokens: Tensor) -> T
                 non_zero_values_in_kv_memory = kv_blocks_assigned[kv_blocks_assigned != -1]
                 self.block_allocator.release_memory_blocks(non_zero_values_in_kv_memory)
 
-                if self.is_hybrid_model:
-                    self.mamba_metadata.free_slots(finished_idxs)
-
             # Reset request/token counts.
             self.request_to_kv_block_ids.fill_(-1)
             self.total_request_count = 0
             self.active_token_count = 0
-
-            # Reset Mamba state.
-            self.reset_mamba_state()
-
             return
 
         # 3. Concatenate the paused tokens to the active tokens if present.
@@ -1481,10 +1332,6 @@ def update_requests(self, active_requests_mask: Tensor, new_tokens: Tensor) -> T
             # and updates it instead of the original tensor.
             self.request_to_kv_block_ids[finished_idxs] = -1
 
-            if self.is_hybrid_model:
-                # Get the Mamba state indices for finished requests and free them
-                self.mamba_metadata.free_slots(finished_idxs)
-
             if active_request_count > 0:
                 finished_idxs_on_left = (
                     torch.nonzero(active_requests_mask[:active_request_count] == 0, as_tuple=True)[
@@ -1504,10 +1351,8 @@ def update_requests(self, active_requests_mask: Tensor, new_tokens: Tensor) -> T
                     next_tokens=next_tokens,
                 )
 
-                # Reset chunk ids for recently moved requests.
+                # Reset block ids for recently moved requests.
                 self.request_to_kv_block_ids[active_idxs_on_right] = -1
-                if self.is_hybrid_model:
-                    self.mamba_metadata.request_to_mamba_state_idx[active_idxs_on_right] = -1
 
         # 5. We identify requests that require a new block and add them to the paused requests (i.e move them left) :-
         #       a) Put requests that have filled their current block and  require a new one in a pause state temporarily
@@ -1605,7 +1450,6 @@ def update_requests(self, active_requests_mask: Tensor, new_tokens: Tensor) -> T
 
         # 7. We make changes to the request book keeping tesnsors and setup the tokens for next iteration
         self.total_request_count = active_request_count + self.paused_request_count
-
         # All these active requests are in decode phase, so they need only 1 token per request
         self.active_token_count = active_request_count
         # Always the first section of token input ids are only used.
diff --git a/megatron/core/inference/contexts/fused_kv_append_kernel.py b/megatron/core/inference/contexts/fused_kv_append_kernel.py
index db1eed456e1..2078878c8f4 100644
--- a/megatron/core/inference/contexts/fused_kv_append_kernel.py
+++ b/megatron/core/inference/contexts/fused_kv_append_kernel.py
@@ -119,8 +119,8 @@ def triton_append_key_value_cache(
 
     _, num_heads, h_dim = key.shape
 
-    key_cache = memory_buffer[0, layer_number]
-    value_cache = memory_buffer[1, layer_number]
+    key_cache = memory_buffer[0, layer_number - 1]
+    value_cache = memory_buffer[1, layer_number - 1]
 
     key_to_cache = key[:n_tokens]
     value_to_cache = value[:n_tokens]
diff --git a/megatron/core/inference/data_parallel_inference_coordinator.py b/megatron/core/inference/data_parallel_inference_coordinator.py
index 0045d5947a1..ea0560183d8 100644
--- a/megatron/core/inference/data_parallel_inference_coordinator.py
+++ b/megatron/core/inference/data_parallel_inference_coordinator.py
@@ -1,8 +1,6 @@
 # Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
-import faulthandler
 import logging
-import signal
 from collections import deque
 from itertools import cycle
 from multiprocessing import Event
@@ -25,11 +23,6 @@
 except:
     HAVE_MSGPACK = False
 
-# Register faulthandler to emit stack traces upon process kill.
-faulthandler.enable()
-faulthandler.register(signal.SIGTERM, all_threads=False, chain=True)
-faulthandler.register(signal.SIGINT, all_threads=False, chain=True)
-
 
 class DataParallelInferenceCoordinator:
     """
diff --git a/megatron/core/inference/engines/dynamic_engine.py b/megatron/core/inference/engines/dynamic_engine.py
index 4bff4f85fa8..bcde4f9894d 100644
--- a/megatron/core/inference/engines/dynamic_engine.py
+++ b/megatron/core/inference/engines/dynamic_engine.py
@@ -33,8 +33,8 @@
 from megatron.core.inference.text_generation_controllers.text_generation_controller import (
     TextGenerationController,
 )
-from megatron.core.inference.utils import Counter, await_process_event
-from megatron.core.utils import get_asyncio_loop, trace_async_exceptions
+from megatron.core.inference.utils import Counter
+from megatron.core.utils import get_asyncio_loop
 
 try:
     from tqdm import tqdm
@@ -293,11 +293,7 @@ def create_cuda_graphs(self, reset_context: bool = True):
         self.capture_stats = capture_stats
 
     async def start_listening_to_data_parallel_coordinator(
-        self,
-        inference_coordinator_port: int,
-        launch_inference_coordinator: bool = True,
-        *,
-        loop: Optional[asyncio.AbstractEventLoop] = None,
+        self, inference_coordinator_port: int, launch_inference_coordinator: bool = True
     ):
         """Initializes ZMQ communication to connect the engine with an inference coordinator.
 
@@ -411,14 +407,12 @@ async def start_listening_to_data_parallel_coordinator(
         torch.distributed.barrier(parallel_state.get_tensor_model_parallel_group())
 
         if launch_inference_coordinator and torch.distributed.get_rank() == 0:
-            await await_process_event(coordinator_ready_event, self.inference_coordinator_process)
+            coordinator_ready_event.wait()
             logging.info("Inference co-ordinator is ready to receive requests!")
 
         # Finally run the engine infinite loop
-        loop = get_asyncio_loop(loop)
-        self.engine_loop_task = loop.create_task(self.run_engine_with_coordinator(loop=loop))
+        self.engine_loop_task = asyncio.create_task(self.run_engine_with_coordinator())
 
-    @trace_async_exceptions
     async def _notify_cond_for_new_request(self):
         """Helper function to notify condition variable when a new request is added."""
         async with self._cond:
@@ -472,7 +466,7 @@ def _add_request(
             self.waiting_request_ids.append(request_id)
 
         # Create a new asyncio Future to notify the user when the request has completed.
-        self.request_completion_futures[request_id] = self._loop.create_future()
+        self.request_completion_futures[request_id] = asyncio.Future()
         return self.request_completion_futures[request_id]
 
     def add_request(
@@ -647,7 +641,7 @@ def schedule_non_chunked_prefill(self):
             if request_can_be_added and request_tokens_can_be_added and kv_cache_available:
                 self.context.add_request(req)
                 self._loop.call_soon_threadsafe(
-                    self._loop.create_task, self._notify_cond_for_new_request()
+                    asyncio.create_task, self._notify_cond_for_new_request()
                 )
                 req.remaining_prompt_tokens = req.remaining_prompt_tokens.new_empty(0)
                 req.add_event_add()
@@ -708,7 +702,7 @@ def schedule_chunked_prefill(self):
 
             # is_continuing_chunked_prefill is True if we are scheduling next
             # chunk of a existing chunked prefill request
-            is_continuing_chunked_prefill = self.context.chunked_prefill_request_id >= 0
+            is_continuing_chunked_prefill = self.context.chunked_prefill_request_id > 0
 
             # Use remaining prompt tokens for scheduling decisions
             remaining_len = len(req.remaining_prompt_tokens)
@@ -726,7 +720,7 @@ def schedule_chunked_prefill(self):
                     self.context.chunked_prefill_request_id = -1
                     self.context.add_request(req)
                     self._loop.call_soon_threadsafe(
-                        self._loop.create_task, self._notify_cond_for_new_request()
+                        asyncio.create_task, self._notify_cond_for_new_request()
                     )
                     req.remaining_prompt_tokens = req.remaining_prompt_tokens.new_empty(0)
                     req.add_event_add()
@@ -738,7 +732,7 @@ def schedule_chunked_prefill(self):
                     chunk_length = self.context.max_tokens - self.context.active_token_count
                     self.context.add_request(req, chunk_length=chunk_length)
                     self._loop.call_soon_threadsafe(
-                        self._loop.create_task, self._notify_cond_for_new_request()
+                        asyncio.create_task, self._notify_cond_for_new_request()
                     )
                     self.context.chunked_prefill_request_id = req.request_id
                     req.remaining_prompt_tokens = req.remaining_prompt_tokens[chunk_length:]
@@ -945,7 +939,7 @@ def generate(
             result = self.step_modern()
             finished_requests_list.extend(result["finished_requests"])
 
-        # Ensure requests are returned in the same order they were passed in
+        # Ensure requests are returned in the same order they were passed in.
         finished_requests_list.sort(key=lambda x: x.request_id)
 
         return finished_requests_list
@@ -1045,12 +1039,8 @@ def stop(self):
         self.zmq_context.term()
         parallel_state.destroy_model_parallel()
 
-    @trace_async_exceptions
-    async def run_engine(
-        self, *, loop: Optional[asyncio.AbstractEventLoop] = None, verbose: Optional[bool] = False
-    ):
+    async def run_engine(self, *, verbose: Optional[bool] = False):
         """Continually steps the engine asynchronously."""
-        self._loop = get_asyncio_loop(loop)
         try:
             while True:
                 # Wait until there are active requests before proceeding.
@@ -1064,12 +1054,8 @@ async def run_engine(
         except asyncio.CancelledError:
             pass
 
-    @trace_async_exceptions
-    async def run_engine_with_coordinator(
-        self, *, loop: Optional[asyncio.AbstractEventLoop] = None, verbose: Optional[bool] = False
-    ):
+    async def run_engine_with_coordinator(self, *, verbose: Optional[bool] = False):
         """Continually steps the engine asynchronously."""
-        self._loop = get_asyncio_loop(loop)
         try:
             while True:
                 self.schedule_requests()
diff --git a/megatron/core/inference/engines/static_engine.py b/megatron/core/inference/engines/static_engine.py
index dc86eb775f9..d084528b8f2 100644
--- a/megatron/core/inference/engines/static_engine.py
+++ b/megatron/core/inference/engines/static_engine.py
@@ -17,7 +17,6 @@
 from megatron.core.inference.text_generation_controllers.text_generation_controller import (
     TextGenerationController,
 )
-from megatron.core.utils import get_asyncio_loop
 
 try:
     from tqdm import tqdm
@@ -218,6 +217,11 @@ def generate_using_dynamic_engine(
             generated tokens, texts and log probs if required
         """
         assert hasattr(self, 'dynamic_engine'), "Dynamic engine not initialized"
+        try:
+            loop = asyncio.get_running_loop()
+        except RuntimeError:  # 'RuntimeError: There is no current event loop...'
+            loop = asyncio.new_event_loop()
+            asyncio.set_event_loop(loop)
 
         if common_inference_params:
             sampling_params = common_inference_params
@@ -381,8 +385,8 @@ def _wrapped_run_engine(self, cuda_device):
         torch.cuda.set_device(cuda_device)
         self.run_engine()
 
-    async def run_engine_async(self, loop: Optional[asyncio.AbstractEventLoop] = None):
+    async def run_engine_async(self):
         """Runs the engine asynchronously using asyncio"""
-        loop = get_asyncio_loop(loop)
+        loop = asyncio.get_running_loop()
 
         await loop.run_in_executor(None, self._wrapped_run_engine, torch.cuda.current_device())
diff --git a/megatron/core/inference/inference_client.py b/megatron/core/inference/inference_client.py
index 53daac091b0..59b9144a207 100644
--- a/megatron/core/inference/inference_client.py
+++ b/megatron/core/inference/inference_client.py
@@ -8,7 +8,6 @@
 
 from megatron.core.inference.inference_request import DynamicInferenceRequest
 from megatron.core.inference.sampling_params import SamplingParams
-from megatron.core.utils import get_asyncio_loop, trace_async_exceptions
 
 from .headers import Headers
 
@@ -104,11 +103,10 @@ def add_request(
         payload_serialized = msgpack.packb(payload, use_bin_type=True)
         self.socket.send(payload_serialized)
         assert request_id not in self.completion_futures
-        self.completion_futures[request_id] = get_asyncio_loop().create_future()
+        self.completion_futures[request_id] = asyncio.get_event_loop().create_future()
         self.request_submission_times[request_id] = time.perf_counter()
         return self.completion_futures[request_id]
 
-    @trace_async_exceptions
     async def _listen_for_completed_requests(self):
         """
         Listens for completed inference requests from the coordinator.
diff --git a/megatron/core/inference/sampling_params.py b/megatron/core/inference/sampling_params.py
index e215b3f134b..a64e2e56775 100644
--- a/megatron/core/inference/sampling_params.py
+++ b/megatron/core/inference/sampling_params.py
@@ -21,7 +21,6 @@ class SamplingParams:
     top_k: int = 0
     top_p: float = 0.0
     return_log_probs: bool = False
-    skip_prompt_log_probs: bool = False
     return_segments: bool = False  # Whether to return individually detokenized tokens
     num_tokens_to_generate: int = 30
     num_tokens_total: Optional[int] = None  # Cannot set both this and num_tokens_to_generate
diff --git a/megatron/core/inference/text_generation_controllers/text_generation_controller.py b/megatron/core/inference/text_generation_controllers/text_generation_controller.py
index 2bda1425710..ef092173c6a 100644
--- a/megatron/core/inference/text_generation_controllers/text_generation_controller.py
+++ b/megatron/core/inference/text_generation_controllers/text_generation_controller.py
@@ -626,8 +626,11 @@ def _dynamic_step_calculate_log_probs(
         return_log_probs = False
         for sampling_params, mask in active_sampling_map:
             if sampling_params.return_log_probs:
+                skip_prompt_log_probs_for_dynamic_inference = getattr(
+                    sampling_params, "skip_prompt_log_probs_for_dynamic_inference", False
+                )
                 assert (
-                    sampling_params.skip_prompt_log_probs
+                    skip_prompt_log_probs_for_dynamic_inference
                     or materialize_only_last_token_logits is False
                 ), "Materialize only last token logits must be false for returning log probs"
                 return_log_probs = True
@@ -759,12 +762,10 @@ async def async_generate_output_tokens_dynamic_batch(
 
     @torch.inference_mode()
     def generate_output_tokens_dynamic_batch(
-        self,
-        active_sampling_map: List[Tuple[SamplingParams, List[int]]],
-        loop: Optional[asyncio.AbstractEventLoop] = None,
+        self, active_sampling_map: List[Tuple[SamplingParams, List[int]]]
     ) -> Optional[Dict]:
         """Synchronous wrapper for `self.async_generate_output_tokens_dynamic_batch."""
-        loop = get_asyncio_loop(loop)
+        loop = get_asyncio_loop()
         return loop.run_until_complete(
             self.async_generate_output_tokens_dynamic_batch(active_sampling_map)
         )
diff --git a/megatron/core/inference/utils.py b/megatron/core/inference/utils.py
index d58f3c3a652..985042f31e3 100644
--- a/megatron/core/inference/utils.py
+++ b/megatron/core/inference/utils.py
@@ -1,8 +1,5 @@
 # Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
 
-import asyncio
-import multiprocessing
-
 import torch
 
 from megatron.core.transformer.moe.moe_layer import MoELayer
@@ -136,28 +133,3 @@ def tensor_swap(x, src_idxs, dst_idxs):
     Swap x[src_idxs] and x[dst_idxs]
     """
     x[dst_idxs], x[src_idxs] = x[src_idxs], x[dst_idxs]
-
-
-async def await_process_event(
-    event: multiprocessing.Event, process: multiprocessing.Process, timeout: float = 1.0
-) -> None:
-    """Repeatedly wait for a multiprocessing event to be set, aborting upon process failure.
-
-    Note that the timeout in this function is only for checking process liveness.
-    Its value should be set to a relatively high number. The only problem a high timeout
-    introduces is that an error is raised slighly later.
-    The timeout does not have any effect on the event-waiting, only on process failure detection.
-
-    Args:
-        event: The multiprocessing event to wait on.
-        process: The process to monitor for failure.
-        timeout: The timeout for each wait iteration in seconds.
-    """
-    while True:
-        signal = await asyncio.to_thread(event.wait, timeout)
-        if signal:
-            return
-        if not process.is_alive():
-            raise RuntimeError(
-                f"Process {process.name} (pid {process.pid}) has exited unexpectedly."
-            )
diff --git a/megatron/core/models/gpt/gpt_model.py b/megatron/core/models/gpt/gpt_model.py
index e840fca99b3..a1156012106 100644
--- a/megatron/core/models/gpt/gpt_model.py
+++ b/megatron/core/models/gpt/gpt_model.py
@@ -611,6 +611,8 @@ def _postprocess(
                     # Perform the sequence parallel gather here instead of after the output layer
                     # because we need to slice the last token logits from the full view of the
                     # packed logits across all requests.
+                    # TODO(ksanthanam): Make the equivalent change in the `MambaModel` code after
+                    # merging in !3722.
                     hidden_states = gather_from_sequence_parallel_region(
                         hidden_states, group=self.pg_collection.tp
                     )
@@ -618,7 +620,7 @@ def _postprocess(
                     sequence_parallel_override = True
 
                 # Reshape [B, 1, H] to [1, B, H] → extract each sample’s true last‐token hidden
-                # state ([B, H]) → unsqueeze back to [B, 1, H]
+                # state ([B, H]) → unsqueeze back to [1, B, H]
                 # (so that the output layer, which expects S×B×H, receives only the final token)
                 hidden_states = inference_context.last_token_logits(
                     hidden_states.squeeze(1).unsqueeze(0)
diff --git a/megatron/core/models/mamba/mamba_model.py b/megatron/core/models/mamba/mamba_model.py
index 378cf7e47d6..fb3df5e23f2 100644
--- a/megatron/core/models/mamba/mamba_model.py
+++ b/megatron/core/models/mamba/mamba_model.py
@@ -12,7 +12,6 @@
 from megatron.core.models.common.language_module.language_module import LanguageModule
 from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.quantization.utils import get_quant_config_or_none
-from megatron.core.tensor_parallel import gather_from_sequence_parallel_region
 from megatron.core.transformer import TransformerConfig
 from megatron.core.transformer.enums import ModelType
 from megatron.core.transformer.spec_utils import ModuleSpec, build_module
@@ -245,41 +244,13 @@ def forward(
         if self.share_embeddings_and_output_weights:
             output_weight = self.shared_embedding_or_output_weight()
 
-        sequence_parallel_override = False
         if in_inference_mode and inference_context.materialize_only_last_token_logits:
-            if inference_context.is_static_batching():
-                hidden_states = hidden_states[-1:, :, :]
-            else:
-                if self.output_layer.sequence_parallel:
-                    # Perform the sequence parallel gather here instead of after the output layer
-                    # because we need to slice the last token logits from the full view of the
-                    # packed logits across all requests.
-                    hidden_states = gather_from_sequence_parallel_region(
-                        hidden_states, group=self.pg_collection.tp
-                    )
-                    self.output_layer.sequence_parallel = False
-                    sequence_parallel_override = True
-
-                # Reshape [B, 1, H] to [1, B, H] → extract each sample’s true last‐token hidden
-                # state ([B, H]) → unsqueeze back to [B, 1, H]
-                # (so that the output layer, which expects S×B×H, receives only the final token)
-                hidden_states = inference_context.last_token_logits(
-                    hidden_states.squeeze(1).unsqueeze(0)
-                ).unsqueeze(1)
+            hidden_states = hidden_states[-1, :, :].unsqueeze(0)
 
         logits, _ = self.output_layer(
             hidden_states, weight=output_weight, runtime_gather_output=runtime_gather_output
         )
 
-        # Restore sequence parallel execution to the output layer if necessary.
-        if sequence_parallel_override:
-            assert (
-                in_inference_mode
-                and inference_context.is_dynamic_batching()
-                and inference_context.materialize_only_last_token_logits
-            )
-            self.output_layer.sequence_parallel = True
-
         if labels is None:
             # [s b h] => [b s h]
             return logits.transpose(0, 1).contiguous()
diff --git a/megatron/core/safe_globals.py b/megatron/core/safe_globals.py
index d2baed2a4a0..94a05e03c41 100755
--- a/megatron/core/safe_globals.py
+++ b/megatron/core/safe_globals.py
@@ -1,7 +1,6 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 
 from argparse import Namespace
-from io import BytesIO
 from pathlib import PosixPath
 from types import SimpleNamespace
 
@@ -27,7 +26,6 @@
     RerunDiagnostic,
     RerunMode,
     RerunState,
-    BytesIO,
 ]
 
 
diff --git a/megatron/core/ssm/mamba_block.py b/megatron/core/ssm/mamba_block.py
index 1bcadd0af10..5f426ea347b 100644
--- a/megatron/core/ssm/mamba_block.py
+++ b/megatron/core/ssm/mamba_block.py
@@ -9,7 +9,7 @@
 from contextlib import nullcontext
 from dataclasses import dataclass
 from functools import partial
-from typing import Optional, Tuple, Union
+from typing import Optional, Union
 
 import torch
 from torch import Tensor, nn
@@ -148,7 +148,7 @@ def __init__(
         self.hybrid_mlp_ratio = hybrid_mlp_ratio
         self.hybrid_override_pattern = hybrid_override_pattern
 
-        self.layer_type_list = allocate_layers(
+        layer_type_list = allocate_layers(
             self.config.num_layers,
             self.hybrid_attention_ratio,
             self.hybrid_mlp_ratio,
@@ -157,12 +157,12 @@ def __init__(
 
         pp_layer_offset = 0
         if self.pp_group.size() > 1:
-            pp_layer_offset, self.layer_type_list = self._select_layers_for_pipeline_parallel(
-                self.layer_type_list
+            pp_layer_offset, layer_type_list = self._select_layers_for_pipeline_parallel(
+                layer_type_list
             )
 
         self.layers = nn.ModuleList()
-        for i, layer_type in enumerate(self.layer_type_list):
+        for i, layer_type in enumerate(layer_type_list):
             fp8_init_context = get_fp8_context(self.config, i + pp_layer_offset, is_init=True)
             with fp8_init_context:
                 if layer_type == LayerSymbols.MAMBA:
@@ -226,6 +226,22 @@ def _select_layers_for_pipeline_parallel(self, layer_type_list):
 
         return offset, selected_list
 
+    def allocate_inference_cache(self, batch_size, max_seqlen, dtype=None):
+        """
+        Allocate inference cache for each layer.
+
+        Args:
+            batch_size (int): The batch size to use for inference.
+            max_seqlen (int): The maximum sequence length to use
+                for inference.
+            dtype (optional): The data type to use for allocation.
+                Defaults to the data type of the model.
+        """
+        return {
+            i: layer.allocate_inference_cache(batch_size, max_seqlen, dtype=dtype)
+            for i, layer in enumerate(self.layers)
+        }
+
     def set_input_tensor(self, input_tensor: Tensor):
         """Set input tensor to be used instead of forward()'s input.
 
@@ -236,16 +252,6 @@ def set_input_tensor(self, input_tensor: Tensor):
         forward_step_func"""
         self.input_tensor = input_tensor
 
-    def mamba_state_shapes_per_request(self) -> Optional[Tuple[Tuple[int], Tuple[int]]]:
-        """
-        Returns the Mamba conv and ssm states shapes per input sequence
-        if this block contains Mamba layers (this may not be the case with PP > 1).
-        """
-        for layer_type, layer in zip(self.layer_type_list, self.layers):
-            if layer_type == LayerSymbols.MAMBA:
-                return layer.mamba_state_shapes_per_request()
-        return None
-
     def forward(
         self,
         hidden_states: Union[Tensor, WrappedTensor],
@@ -283,7 +289,10 @@ def forward(
         if isinstance(hidden_states, WrappedTensor):
             hidden_states = hidden_states.unwrap()
 
-        if inference_context and inference_context.is_static_batching():
+        if inference_context:
+            assert (
+                inference_context.is_static_batching()
+            ), "Mamba currently does not support dynamic inference batching."
             # NOTE(bnorick): match BaseInferenceContext attributes for
             # mamba_ssm.utils.generation.BaseInferenceContext,
             # this hack supports eval
diff --git a/megatron/core/ssm/mamba_hybrid_layer_allocation.py b/megatron/core/ssm/mamba_hybrid_layer_allocation.py
index 7407bfe899f..26972b5454b 100644
--- a/megatron/core/ssm/mamba_hybrid_layer_allocation.py
+++ b/megatron/core/ssm/mamba_hybrid_layer_allocation.py
@@ -1,30 +1,20 @@
 # Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
 
 import logging
-from typing import Dict, List, Tuple
 
 if __name__ != "__main__":
     from megatron.core.utils import log_single_rank
 else:
     from typing import Any
 
-    import torch
-
     def log_single_rank(logger: logging.Logger, *args: Any, rank: int = 0, **kwargs: Any):
-        """Logs a message to the given rank."""
-        if torch.distributed.is_initialized():
-            if torch.distributed.get_rank() == rank:
-                logger.log(*args, **kwargs)
-        else:
-            logger.log(*args, **kwargs)
+        print(*args[1:], **kwargs)
 
 
 logger = logging.getLogger(__name__)
 
 
 class Symbols:
-    """Symbols for different layer types."""
-
     MAMBA = "M"
     ATTENTION = "*"
     MLP = "-"
@@ -97,7 +87,6 @@ def allocate_layers(
     target_mlp_ratio: float,
     override_pattern: str = None,
 ) -> list:
-    """Allocates layers according to the requested distribution of layer types."""
     assert total_layers_count > 0
     assert target_attention_ratio >= 0.0 and target_attention_ratio <= 1.0
     assert target_mlp_ratio >= 0.0 and target_mlp_ratio <= 1.0
@@ -167,22 +156,6 @@ def allocate_layers(
     return layer_type_list
 
 
-def get_layer_maps_from_layer_type_list(
-    layer_type_list: List[str],
-) -> Tuple[Dict[int, int], Dict[int, int], Dict[int, int]]:
-    """
-    Returns maps from global layer index to the corresponding layer index
-    for each layer type in [Attention, Mamba, MLP] given a layer type list.
-    """
-    layer_types = [Symbols.ATTENTION, Symbols.MAMBA, Symbols.MLP]
-    layer_maps = {layer_type: {} for layer_type in layer_types}
-    for global_layer_idx, layer_type in enumerate(layer_type_list):
-        layer_map = layer_maps[layer_type]
-        local_layer_idx = len(layer_map)
-        layer_map[global_layer_idx] = local_layer_idx
-    return [layer_maps[layer_type] for layer_type in layer_types]
-
-
 if __name__ == "__main__":
     test_cases = [
         # (10, 0.2, 0.0),
@@ -214,5 +187,5 @@ def get_layer_maps_from_layer_type_list(
         (9, 0.0, 0.0, "MMMMMMMMM"),
     ]
     for t in test_cases:
-        logging.info("")
+        print("")
         allocate_layers(*t)
diff --git a/megatron/core/ssm/mamba_layer.py b/megatron/core/ssm/mamba_layer.py
index 69d5ef21c81..d83d518331c 100644
--- a/megatron/core/ssm/mamba_layer.py
+++ b/megatron/core/ssm/mamba_layer.py
@@ -6,7 +6,7 @@
 # LICENSE file in the root directory of this source tree.
 
 from dataclasses import dataclass, field
-from typing import Dict, Optional, Tuple, Union
+from typing import Dict, Optional, Union
 
 import torch
 from torch import Tensor
@@ -82,10 +82,6 @@ def __init__(
         self.mamba_bda = build_module(submodules.mamba_bda)
         self.bias_dropout_add_exec_handler = torch.enable_grad
 
-    def mamba_state_shapes_per_request(self) -> Tuple[Tuple[int], Tuple[int]]:
-        """Returns the Mamba conv and ssm states shapes per request."""
-        return self.mixer.mamba_state_shapes_per_request()
-
     def forward(
         self,
         hidden_states: Tensor,
@@ -131,6 +127,10 @@ def forward(
 
         return hidden_states
 
+    def allocate_inference_cache(self, batch_size, max_seqlen, dtype=None):
+        """Allocate the inference cache."""
+        return self.mixer.allocate_inference_cache(batch_size, max_seqlen, dtype=dtype)
+
     def sharded_state_dict(
         self, prefix: str = '', sharded_offsets: tuple = (), metadata: Optional[dict] = None
     ) -> ShardedStateDict:
diff --git a/megatron/core/ssm/mamba_mixer.py b/megatron/core/ssm/mamba_mixer.py
index b792f8a2f1f..00067783ffa 100644
--- a/megatron/core/ssm/mamba_mixer.py
+++ b/megatron/core/ssm/mamba_mixer.py
@@ -29,12 +29,7 @@
     make_sharded_tensors_for_checkpoint,
     sharded_state_dict_default,
 )
-from megatron.core.utils import (
-    check_mamba_sequence_packing_support,
-    deprecate_inference_params,
-    log_single_rank,
-    maybe_cat,
-)
+from megatron.core.utils import deprecate_inference_params, log_single_rank
 
 from .mamba_context_parallel import MambaContextParallel
 
@@ -45,7 +40,6 @@
 
 try:
     from causal_conv1d import causal_conv1d_fn, causal_conv1d_update
-    from causal_conv1d.causal_conv1d_varlen import causal_conv1d_varlen_states
 except ImportError:
     causal_conv1d_fn = None
     causal_conv1d_update = None
@@ -71,6 +65,7 @@
 except ImportError:
     HAVE_EINOPS = False
 
+
 logger = logging.getLogger(__name__)
 
 
@@ -415,11 +410,11 @@ def forward(
 
         inference_context = deprecate_inference_params(inference_context, inference_params)
 
-        in_inference_mode = inference_context is not None and not self.training
-
         _, batch, dim = hidden_states.shape
+
         conv_state, ssm_state = None, None
 
+        in_inference_mode = inference_context is not None and not self.training
         if in_inference_mode:
             if inference_context.is_dynamic_batching():
                 return self.dynamic_inference(hidden_states, inference_context)
@@ -447,151 +442,27 @@ def forward(
 
         return out, out_bias
 
-    def dynamic_inference(self, hidden_states: torch.Tensor, context: DynamicInferenceContext):
-        """
-        Executes dynamic inference by separating decode and prefill requests and
-        running them independently. Also runs the chunked prefill request independently
-        if it exists.
-        """
-        sequence_packing_available, reason_for_no_sequence_packing = (
-            check_mamba_sequence_packing_support()
-        )
-        assert sequence_packing_available, reason_for_no_sequence_packing
-
-        conv_state, ssm_state = context.mamba_states_cache(self.layer_number)
-
-        # Fast path: decode-only
-        if context.is_decode_only():
-            batch_indices = context.mamba_metadata.request_to_mamba_state_idx_cudagraph_only[
-                : context.padded_active_token_count
-            ]
-            out, out_bias = self.decode(
-                hidden_states, conv_state, ssm_state, batch_indices=batch_indices
-            )
-            return out, out_bias
-
-        # Compute input projection before splitting into prefill and decode
-        # to ensure sequence parallel all-gather.
-        zxBCdt, _ = self.in_proj(hidden_states)
-
-        # Compute split between decode and prefill.
-        seq_idx, cu_seqlens, return_varlen_states = self._get_varlen_generation_state(context)
-        active_query_lengths = context.request_query_lengths[
-            context.paused_request_count : context.total_request_count
-        ]
-        batch_indices = context.mamba_metadata.request_to_mamba_state_idx
-
-        # First request with query len > 1 is prefill-start.
-        first_prefill_token_idx = torch.nonzero(active_query_lengths > 1)[0].int()
-
-        # Process decode requests if there are any.
-        if first_prefill_token_idx > 0:
-            zxBCdt_decode = zxBCdt[:first_prefill_token_idx]
-            batch_indices_decode = batch_indices[:first_prefill_token_idx]
-            y_decode = self.ssm_decode(
-                zxBCdt_decode.transpose(0, 1), conv_state, ssm_state, batch_indices_decode
-            ).transpose(0, 1)
-        else:
-            y_decode = None
-
-        active_token_count = context.active_token_count
-        active_request_count = context.get_active_request_count()
-        padded_active_token_count = context.padded_active_token_count
-
-        # Process the chunked prefill request if it exists.
-        if context.chunked_prefill_request_id != -1:
-            chunked_prefill_request_token_count = active_query_lengths[-1]
-            zxBCdt_chunked_prefill = zxBCdt[
-                active_token_count - chunked_prefill_request_token_count : active_token_count
-            ]
-            batch_index_chunked_prefill = batch_indices[context.chunked_prefill_request_id]
-
-            y_prefill_chunked = self.ssm_prefill(
-                zxBCdt_chunked_prefill,
-                conv_state=conv_state[batch_index_chunked_prefill].unsqueeze(0),
-                ssm_state=ssm_state[batch_index_chunked_prefill].unsqueeze(0),
-                is_chunked_prefill=True,
-            )
-
-            # Remove the chunked prefill request from the request / token counts so
-            # the subsequent prefill computation ignores the chunked prefill request.
-            active_token_count -= chunked_prefill_request_token_count
-            active_request_count -= 1
-        else:
-            y_prefill_chunked = None
-
-        # Process non-chunked prefill requests if there are any.
-        if (remaining_prefill_tokens := active_token_count - first_prefill_token_idx) > 0:
-            zxBCdt_prefill = zxBCdt[first_prefill_token_idx:active_token_count]
-            cu_seqlens_prefill = F.pad(
-                cu_seqlens[first_prefill_token_idx + 1 : active_request_count + 1]
-                - first_prefill_token_idx,
-                (1, 0),
-            )
-            seq_idx_prefill = (
-                seq_idx[:, first_prefill_token_idx:active_token_count] - first_prefill_token_idx
-            )
-            batch_indices_prefill = batch_indices[first_prefill_token_idx:active_request_count]
-
-            y_prefill = self.ssm_prefill(
-                zxBCdt_prefill,
-                conv_state=conv_state,
-                ssm_state=ssm_state,
-                seq_idx=seq_idx_prefill,
-                cu_seqlens=cu_seqlens_prefill,
-                return_varlen_states=return_varlen_states,
-                batch_indices=batch_indices_prefill,
-            )
-        else:
-            y_prefill = None
-
-        # Assemble the final output by concatenating the decode output,
-        # non-chunked prefill output, and chunked prefill output together.
-        y_prefill = maybe_cat(y_prefill, y_prefill_chunked, required=True)
-        y = maybe_cat(y_decode, y_prefill, required=True)
-
-        # Add padding tokens back if necessary. Note that we use the context active token count
-        # in case we modified the local count for chunked prefill above.
-        if (num_padding_tokens := padded_active_token_count - context.active_token_count) > 0:
-            y = torch.cat((y, y.new_zeros(num_padding_tokens, *y.shape[1:])), dim=0)
-
-        # The output projection will perform the sequence parallel reduce-scatter if necessary.
-        out, out_bias = self.out_proj(y)
-
-        return out, out_bias
-
-    def decode(
-        self, hidden_states, conv_state, ssm_state, batch_indices: Optional[torch.Tensor] = None
+    def dynamic_inference(
+        self, hidden_states, inference_context: DynamicInferenceContext
     ) -> Tuple[torch.Tensor, torch.Tensor]:
+        """Runs inference computation for dynamic batching."""
+        raise NotImplementedError(f"Dynamic inference is not supported.")
+
+    def decode(self, hidden_states, conv_state, ssm_state) -> Tuple[torch.Tensor, torch.Tensor]:
         """Performs inference step for decoding."""
         # assert self.ngroups_local_tp == 1, "Only support ngroups=1 for inference for now"
-        is_dynamic_batching = batch_indices is not None
+        dtype = hidden_states.dtype
+        assert hidden_states.shape[0] == 1, "Only support decoding with 1 token at a time for now"
 
-        if not is_dynamic_batching:
-            assert (
-                hidden_states.shape[0] == 1
-            ), "Only support decoding with 1 token at a time for now"
-
-        # (1, b, d_model) -> (1, b, proj_dim)
+        #  b d_model --> b p(2d)
         zxBCdt, _ = self.in_proj(hidden_states)
 
-        # Make batch size leading dimension since that is 1
-        if is_dynamic_batching:
-            zxBCdt = zxBCdt.transpose(0, 1)
-
         assert self.cp.cp_size == 1, "Context parallel not supported for Mamba inferenece decode"
 
-        y = self.ssm_decode(
-            zxBCdt, conv_state=conv_state, ssm_state=ssm_state, batch_indices=batch_indices
-        )
-
-        # Restore sequence length as first dimension
-        if is_dynamic_batching:
-            y = y.transpose(0, 1)
+        y = self.ssm_decode(zxBCdt, conv_state=conv_state, ssm_state=ssm_state)
 
-        # y has shape (1, b, d_inner), which is what out_proj expects
+        # l b pd --> l b d
         out, out_bias = self.out_proj(y)
-
         return out, out_bias
 
     def ssm_training(self, zxBCdt: torch.Tensor) -> torch.Tensor:
@@ -644,34 +515,8 @@ def ssm_prefill(
         zxBCdt: torch.Tensor,
         conv_state: Optional[torch.Tensor],
         ssm_state: Optional[torch.Tensor],
-        seq_idx: Optional[torch.Tensor] = None,
-        cu_seqlens: Optional[torch.Tensor] = None,
-        return_varlen_states: bool = False,
-        batch_indices: Optional[torch.Tensor] = None,
-        is_chunked_prefill: bool = False,
     ) -> torch.Tensor:
-        """
-        Performs SSM computation for inference prefill step.
-
-        Args:
-            zxBCdt: The input tensor of shape (l, b, d), which is a concatenation of
-                z, x, B, C, and dt projections.
-            conv_state: The convolution state tensor for inference.
-            ssm_state: The selective scan state tensor for inference.
-            seq_idx: A map from token index to request index for variable-length sequences.
-            cu_seqlens: Cumulative sequence lengths for variable-length sequences.
-            return_varlen_states: Whether to return variable-length states from the SSM kernel.
-            batch_indices: A map from batch id to position in the Mamba state tensors for
-                dynamic inference.
-            is_chunked_prefill: Whether the request is a chunked prefill request.
-
-        Returns:
-            The output tensor of shape (l, b, d).
-        """
-        is_dynamic_batching = seq_idx is not None
-        assert not (
-            is_dynamic_batching and is_chunked_prefill
-        ), "Cannot use chunked prefill with dynamic batching"
+        """Performs SSM computation for inference prefill step."""
 
         # transpose: l b pd --> b l pd
         zxBCdt = rearrange(zxBCdt, "l b d -> b l d").contiguous()
@@ -689,53 +534,29 @@ def ssm_prefill(
             dim=-1,
         )
 
-        # Compute short convolution
-        if conv_state is not None and is_dynamic_batching:
-            # xBC should have shape (b l d) for causal_conv1d_varlen_states
-            assert batch_indices is not None
-            conv_state[batch_indices] = causal_conv1d_varlen_states(
-                xBC.squeeze(0), cu_seqlens, state_len=conv_state.shape[-1]
-            )
+        # transpose: b l pd --> b pd l
+        xBC = rearrange(xBC, "b l d -> b d l").contiguous()
 
-            # Maintain channels-last memory layout to use seq_idx for causal_conv1d_fn
-            # See https://github.com/Dao-AILab/causal-conv1d/blob/69e6dadc28b169a4c49cb86b586f64ee90242c70/csrc/causal_conv1d.cpp#L174 # pylint: disable=line-too-long
-            xBC = xBC.transpose(1, 2)
-        elif is_chunked_prefill:
-            # Maintain channels-last memory layout to use initial_states for causal_conv1d_fn
-            # See https://github.com/Dao-AILab/causal-conv1d/blob/69e6dadc28b169a4c49cb86b586f64ee90242c70/csrc/causal_conv1d.cpp#L200 # pylint: disable=line-too-long
-            xBC = xBC.transpose(1, 2)
-        else:
-            # transpose: b l pd --> b pd l
-            xBC = rearrange(xBC, "b l d -> b d l").contiguous()
-            if conv_state is not None:
-                # If we just take x[:, :, -self.d_conv :], it will error if seqlen < self.d_conv
-                # Instead F.pad will pad with zeros if seqlen < self.d_conv, and truncate otherwise.
-                conv_state.copy_(
-                    F.pad(xBC, (self.d_conv - xBC.shape[-1], 0))
-                )  # Update state (B D W)
+        # Compute short convolution
+        if conv_state is not None:
+            # If we just take x[:, :, -self.d_conv :], it will error if seqlen < self.d_conv
+            # Instead F.pad will pad with zeros if seqlen < self.d_conv, and truncate otherwise.
+            conv_state.copy_(F.pad(xBC, (self.d_conv - xBC.shape[-1], 0)))  # Update state (B D W)
 
         seqlen = xBC.size(2)
         if causal_conv1d_fn is None:
             xBC = self.act(self.cp.conv1d(xBC)[..., :seqlen])
         else:
             assert self.activation in ["silu", "swish"]
-            if is_chunked_prefill:
-                initial_conv_state = (
-                    conv_state[:, :, 1:].permute(0, 2, 1).contiguous().transpose(1, 2)
-                )
-            else:
-                initial_conv_state = None
             xBC = causal_conv1d_fn(
                 x=xBC,
                 weight=rearrange(self.cp.get_conv1d_weight(), "d 1 w -> d w"),
                 bias=self.cp.get_conv1d_bias(),
                 activation=self.activation,
-                seq_idx=seq_idx,
-                initial_states=initial_conv_state,
             )
 
         # transpose b pd l --> b l pd
-        xBC = rearrange(xBC, "b d l -> b l d").contiguous()
+        xBC = rearrange(xBC, "b d l ->  b l d").contiguous()
 
         x, B, C = torch.split(
             xBC,
@@ -762,14 +583,6 @@ def ssm_prefill(
             self.cp.cp_size == 1 or self.rmsnorm
         ), "Context parallel not supported for use_mem_eff_path==False and rmsnorm==False"
 
-        if is_chunked_prefill:
-            initial_ssm_state = ssm_state
-        else:
-            initial_ssm_state = None
-
-        # Note that both `seq_idx` and `cu_seqlens` must be passed in
-        # for variable length generation.
-        # See https://github.com/state-spaces/mamba/blob/e0761ece1db07e0949dd88b4f4cd440420a19fd9/tests/test_generation.py#L97 # pylint: disable=line-too-long
         y = mamba_chunk_scan_combined(
             x,
             dt,
@@ -786,25 +599,11 @@ def ssm_prefill(
             dt_bias=self.cp.get_dt_bias().float(),
             dt_softplus=True,
             return_final_states=ssm_state is not None,
-            seq_idx=seq_idx,
-            cu_seqlens=cu_seqlens,
-            return_varlen_states=return_varlen_states,
-            initial_states=initial_ssm_state,
         )
 
         if ssm_state is not None:
-            if return_varlen_states:
-                assert batch_indices is not None
-
-                y, _, varlen_states = y
-
-                # This has to be varlen_states, NOT last_state
-                # See reference implementation:
-                # https://github.com/state-spaces/mamba/blob/e0761ece1db07e0949dd88b4f4cd440420a19fd9/mamba_ssm/modules/mamba2.py#L267 # pylint: disable=line-too-long
-                ssm_state[batch_indices] = varlen_states
-            else:
-                y, last_state = y
-                ssm_state.copy_(last_state)
+            y, last_state = y
+            ssm_state.copy_(last_state)
 
         y = rearrange(y, "b l h p -> l b (h p)").contiguous()
         y = self.cp.post_conv_ssm(y)
@@ -817,31 +616,14 @@ def ssm_prefill(
         return y
 
     def ssm_decode(
-        self,
-        zxBCdt: torch.Tensor,
-        conv_state: torch.Tensor,
-        ssm_state: torch.Tensor,
-        batch_indices: Optional[torch.Tensor] = None,
+        self, zxBCdt: torch.Tensor, conv_state: torch.Tensor, ssm_state: torch.Tensor
     ) -> torch.Tensor:
-        """
-        Performs SSM computation for inference decode step.
-
-        Args:
-            zxBCdt: The input tensor of shape (l, b, d), which is a concatenation of
-                z, x, B, C, and dt projections. For decoding, l must be 1.
-            conv_state: The convolution state tensor for inference.
-            ssm_state: The selective scan state tensor for inference.
-            batch_indices: A map from batch id to position in the Mamba state tensors for
-                dynamic inference.
-
-        Returns:
-            The output tensor of shape (l, b, d).
-        """
-        seq_len, batch_size, _ = zxBCdt.shape
+        """Performs SSM computation for inference decode step."""
+
         dtype = zxBCdt.dtype
-        assert seq_len == 1, "Only support decoding with 1 token at a time for now"
+        assert zxBCdt.shape[0] == 1, "Only support decoding with 1 token at a time for now"
 
-        # Remove sequence dimension
+        # l b d --> b d
         zxBCdt = zxBCdt.squeeze(0)
 
         z, xBC, dt = torch.split(
@@ -863,7 +645,7 @@ def ssm_decode(
             )  # (B D)
             if self.conv1d.bias is not None:
                 xBC = xBC + self.conv1d.bias
-            xBC = self.act(xBC).to(dtype=xBC.dtype)
+            xBC = self.act(xBC).to(dtype=dtype)
         else:
             xBC = causal_conv1d_update(
                 xBC,
@@ -871,7 +653,6 @@ def ssm_decode(
                 rearrange(self.conv1d.weight, "d 1 w -> d w"),
                 self.conv1d.bias,
                 self.activation,
-                conv_state_indices=batch_indices,
             )
 
         x, B, C = torch.split(
@@ -952,61 +733,35 @@ def ssm_decode(
                 z=z if not self.rmsnorm else None,
                 dt_bias=dt_bias,
                 dt_softplus=True,
-                state_batch_indices=batch_indices,
             )
             y = rearrange(y, "b h p -> b (h p)")
 
         if self.rmsnorm:
             y = self.norm(y, z)
 
-        # Restore sequence dimension
+        # b (h p) -> l b (h p)
         return y.unsqueeze(0)
 
-    def _get_varlen_generation_state(
-        self, inference_context: Optional[BaseInferenceContext] = None
-    ) -> Tuple[torch.Tensor, torch.Tensor, bool]:
-        """Constructs the variable length generation state for non-decode dynamic inference.
-
-        The returned state includes the following:
-            `seq_idx` (Tensor): A map from token idx to request idx.
-            `cu_seqlens` (Tensor): The cumulative sequence lengths.
-            `return_varlen_states` (bool): Whether to return a varlen states tensor for
-                `mamba_chunk_scan_combined`.
-
-        Returns empty state for training, static inference, or decode-only dynamic inference.
-
-        Args:
-            inference_context (InferenceContext): The inference context.
-
-        Returns:
-            A tuple of (`seq_idx`, `cu_seqlens`, `return_varlen_states`)
+    def allocate_inference_cache(self, batch_size, max_seqlen, dtype=None):
         """
-
-        if (
-            inference_context is None
-            or not inference_context.is_dynamic_batching()
-            or inference_context.is_decode_only()
-        ):
-            return None, None, False
-
-        active_token_count = inference_context.active_token_count
-        seq_idx = (
-            inference_context.token_to_request_idx[:active_token_count]
-            .clone()
-            .to(torch.int32)
-            .unsqueeze(0)
+        allocate inference cache
+        """
+        device = self.out_proj.weight.device
+        conv_dtype = self.conv1d.weight.dtype if dtype is None else dtype
+        conv_state = torch.zeros(
+            batch_size, self.conv1d.weight.shape[0], self.d_conv, device=device, dtype=conv_dtype
         )
-
-        # Get the list of cumulative sequence lengths for active requests.
-        cu_seqlens, _ = inference_context.cu_query_lengths()
-
-        return seq_idx, cu_seqlens, True
-
-    def mamba_state_shapes_per_request(self) -> Tuple[Tuple[int], Tuple[int]]:
-        """Returns the Mamba conv and ssm states shapes per request."""
-        conv_states_shape = (self.conv1d.weight.shape[0], self.d_conv)
-        ssm_states_shape = (self.nheads_local_tp, self.headdim, self.d_state)
-        return (conv_states_shape, ssm_states_shape)
+        ssm_dtype = self.in_proj.weight.dtype if dtype is None else dtype
+        # ssm_dtype = torch.float32
+        ssm_state = torch.zeros(
+            batch_size,
+            self.nheads_local_tp,
+            self.headdim,
+            self.d_state,
+            device=device,
+            dtype=ssm_dtype,
+        )
+        return conv_state, ssm_state
 
     def _get_states_from_cache(self, inference_context, batch_size, *, inference_params=None):
         """Initializes or retrieves the SSM state tensors from the cache.
@@ -1019,23 +774,23 @@ def _get_states_from_cache(self, inference_context, batch_size, *, inference_par
         inference_context = deprecate_inference_params(inference_context, inference_params)
 
         assert inference_context is not None
-        assert inference_context.is_static_batching()
         assert self.layer_number is not None
-
         if (
             self.layer_number not in inference_context.key_value_memory_dict
             or batch_size != self.cached_batch_size
         ):
-            conv_state_shape, ssm_state_shape = self.mamba_state_shapes_per_request()
             conv_state = torch.zeros(
                 batch_size,
-                *conv_state_shape,
+                self.conv1d.weight.shape[0],
+                self.d_conv,
                 device=self.conv1d.weight.device,
                 dtype=self.conv1d.weight.dtype,
             )
             ssm_state = torch.zeros(
                 batch_size,
-                *ssm_state_shape,
+                self.nheads_local_tp,
+                self.headdim,
+                self.d_state,
                 device=self.in_proj.weight.device,
                 dtype=self.in_proj.weight.dtype,
             )
@@ -1043,6 +798,7 @@ def _get_states_from_cache(self, inference_context, batch_size, *, inference_par
             self.cached_batch_size = batch_size
         else:
             conv_state, ssm_state = inference_context.key_value_memory_dict[self.layer_number]
+            # TODO: Remove reference to `inference_context.sequence_len_offset` for dynamic batching
             if inference_context.sequence_len_offset == 0:
                 conv_state.zero_()
                 ssm_state.zero_()
diff --git a/megatron/core/transformer/transformer_config.py b/megatron/core/transformer/transformer_config.py
index 895aef978e2..aab137b6430 100644
--- a/megatron/core/transformer/transformer_config.py
+++ b/megatron/core/transformer/transformer_config.py
@@ -7,7 +7,7 @@
 import torch
 import torch.nn.functional as F
 
-from megatron.core.enums import Fp4Recipe, Fp8Recipe
+from megatron.core.enums import Fp8Recipe
 from megatron.core.quantization.quant_config import RecipeConfig
 from megatron.core.transformer.enums import AttnBackend
 from megatron.core.transformer.pipeline_parallel_layer_layout import PipelineParallelLayerLayout
@@ -382,10 +382,10 @@ class TransformerConfig(ModelParallelConfig):
     activation and weight tensors and e5m2 for all FP8 output activation gradient tensors."""
 
     fp8_recipe: Optional[str] = "delayed"
-    """If set, enables the use of FP8 precision through Transformer Engine. There are 5 predefined
+    """If set, enables the use of FP8 precision through Transformer Engine. There are 3 predefined
     choices (1) 'tensorwise' uses per tensor current scaling recipe, (2) 'delayed'
     uses delayed scaling recipe, 3) 'mxfp8' for Blackwell architecture only,
-    4) 'blockwise' for blockwise scaling recipe, 5) 'custom' for custom quantization recipe."""
+    4) 'blockwise' for blockwise scaling recipe."""
 
     fp8_param: bool = False
     """If set, keep the parameters in fp8 precision to save memory. This option must be used
@@ -394,10 +394,6 @@ class TransformerConfig(ModelParallelConfig):
     primarily the weights of GEMMs. The specific parameters that will be converted to fp8 are
     determined by TE."""
 
-    fp8_quantizer_factory: Optional[str] = None
-    """Python import path to a callable quantizer factory, e.g., package.module.quantizer_factory.
-    Required when fp8_recipe is custom."""
-
     fp8_margin: int = 0
     """Margin for the scaling factor computation."""
 
@@ -459,10 +455,6 @@ class TransformerConfig(ModelParallelConfig):
     together with fp4 mode (i.e., TransformerConfig.fp4 is not None). Note that not all parameters
     will be converted to fp4; for example, biases will remain unchanged."""
 
-    fp4_quantizer_factory: Optional[str] = None
-    """Python import path to a callable quantizer factory, e.g., package.module.quantizer_factory.
-    Required when fp4_recipe is custom."""
-
     ####################
     # MoE related
     ####################
@@ -917,14 +909,6 @@ def __post_init__(self):
                         f"({max_bf16_layers_per_pipeline_stage})."
                     )
 
-            if self.fp8_recipe == Fp8Recipe.custom:
-                if not self.fp8_quantizer_factory:
-                    raise ValueError(
-                        "fp8_quantizer_factory must be provided when fp8_recipe is 'custom'. "
-                        "Specify a Python import path (e.g., package.module.quantizer_factory) "
-                        "via --fp8-quantizer-factory."
-                    )
-
         if self.fp8_param and not self.fp8:
             raise ValueError("fp8_param must be used together with fp8 mode.")
 
@@ -935,14 +919,6 @@ def __post_init__(self):
         if self.fp4 and self.fp8:
             raise ValueError("fp4 and fp8 cannot be used simultaneously. Please choose one.")
 
-        if self.fp4 and self.fp4_recipe == Fp4Recipe.custom:
-            if not self.fp4_quantizer_factory:
-                raise ValueError(
-                    "fp4_quantizer_factory must be provided when fp4_recipe is 'custom'. "
-                    "Specify a Python import path (e.g., package.module.quantizer_factory) "
-                    "via --fp4-quantizer-factory."
-                )
-
         if self.apply_query_key_layer_scaling:
             self.attention_softmax_in_fp32 = True
 
diff --git a/megatron/core/utils.py b/megatron/core/utils.py
index 8d6269e23a4..9947b8da683 100644
--- a/megatron/core/utils.py
+++ b/megatron/core/utils.py
@@ -17,16 +17,14 @@
 import time
 import traceback
 import warnings
-from collections import defaultdict
 from contextlib import contextmanager, nullcontext
 from dataclasses import dataclass
 from datetime import datetime
 from functools import lru_cache, reduce, wraps
 from importlib.metadata import version
 from types import TracebackType
-from typing import Any, Callable, Coroutine, Dict, List, Optional, Tuple, Type, Union
+from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Union
 
-import numpy
 import torch
 
 from megatron.core import config
@@ -67,8 +65,6 @@
     _torch_version = PkgVersion("0.0.0") if HAVE_PACKAGING else "0.0.0"
 _te_version = None
 _fa_version = None
-_mamba_ssm_version = None
-_causal_conv1d_version = None
 
 
 @contextmanager
@@ -392,79 +388,6 @@ def is_fa_min_version(version, check_equality=True):
     return get_fa_version() > PkgVersion(version)
 
 
-def get_mamba_version():
-    """Get mamba version from __version__; if not available use pip's. Use caching."""
-    if not HAVE_PACKAGING:
-        raise ImportError(
-            "packaging is not installed. Please install it with `pip install packaging`."
-        )
-
-    def get_mamba_version_str():
-        import mamba_ssm
-
-        if hasattr(mamba_ssm, "__version__"):
-            return str(mamba_ssm.__version__)
-        else:
-            return version("mamba_ssm")
-
-    global _mamba_ssm_version
-    if _mamba_ssm_version is None:
-        _mamba_ssm_version = PkgVersion(get_mamba_version_str())
-    return _mamba_ssm_version
-
-
-def is_mamba_min_version(version, check_equality=True):
-    """Check if minimum version of `mamba_ssm` is installed."""
-    if not HAVE_PACKAGING:
-        raise ImportError(
-            "packaging is not installed. Please install it with `pip install packaging`."
-        )
-    if check_equality:
-        return get_mamba_version() >= PkgVersion(version)
-    return get_mamba_version() > PkgVersion(version)
-
-
-def get_causal_conv1d_version():
-    """Get causal_conv1d version from __version__; if not available use pip's. Use caching."""
-    if not HAVE_PACKAGING:
-        raise ImportError(
-            "packaging is not installed. Please install it with `pip install packaging`."
-        )
-
-    def get_causal_conv1d_version_str():
-        import causal_conv1d
-
-        if hasattr(causal_conv1d, "__version__"):
-            return str(causal_conv1d.__version__)
-        else:
-            return version("causal_conv1d")
-
-    global _causal_conv1d_version
-    if _causal_conv1d_version is None:
-        _causal_conv1d_version = PkgVersion(get_causal_conv1d_version_str())
-    return _causal_conv1d_version
-
-
-def is_causal_conv1d_min_version(version, check_equality=True):
-    """Check if minimum version of `causal_conv1d` is installed."""
-    if not HAVE_PACKAGING:
-        raise ImportError(
-            "packaging is not installed. Please install it with `pip install packaging`."
-        )
-    if check_equality:
-        return get_causal_conv1d_version() >= PkgVersion(version)
-    return get_causal_conv1d_version() > PkgVersion(version)
-
-
-def check_mamba_sequence_packing_support() -> Tuple[bool, Optional[str]]:
-    """Checks whether `causal_conv1d` and `mamba_ssm` support sequence packing."""
-    if not is_causal_conv1d_min_version("1.5.3.post1"):
-        return False, "causal_conv1d >= 1.5.3.post1 is required"
-    elif not is_mamba_min_version("2.2.6.post3"):
-        return False, "mamba_ssm >= 2.2.6.post3 is required"
-    return True, None
-
-
 def ensure_divisibility(numerator, denominator):
     """Ensure that numerator is divisible by the denominator."""
     assert numerator % denominator == 0, "{} is not divisible by {}".format(numerator, denominator)
@@ -2130,16 +2053,6 @@ def unwrap_model(model, module_instances=None):
     return unwrapped_model
 
 
-def maybe_cat(a, b, dim=0, *, required=False):
-    """Concatenates `a` and `b` along `dim` if `a` and `b` exist."""
-    xs = [t for t in (a, b) if t is not None]
-    if not xs:
-        if required:
-            raise ValueError("both tensors are None")
-        return None
-    return xs[0] if len(xs) == 1 else torch.cat(xs, dim=dim)
-
-
 def get_asyncio_loop(loop: asyncio.AbstractEventLoop | None = None) -> asyncio.AbstractEventLoop:
     """Creates an asyncio loop if necessary and then returns the current asyncio loop."""
     if loop is None:
@@ -2149,58 +2062,3 @@ def get_asyncio_loop(loop: asyncio.AbstractEventLoop | None = None) -> asyncio.A
             loop = asyncio.new_event_loop()
             asyncio.set_event_loop(loop)
     return loop
-
-
-_ASYNC_TASK_STATS = defaultdict(lambda: [0, 0.0])  # cnt, total_time
-
-
-def trace_async_exceptions(
-    func: Optional[Callable[..., Coroutine]], *, verbose: bool = False
-) -> Callable[..., Coroutine]:
-    """Decorator to be applied to every coroutine that runs in a separate task.
-
-    This is needed because asyncio tasks do not propagate exceptions.
-    Coroutines running inside separate tasks will fail silently if not decorated.
-
-    Passing in `verbose=True` will print additional lifetime logging information about the task.
-    Such functionality is relied on by some users, and can be enabled as shown below:
-    ```
-        @trace_async_exceptions(verbose=True)
-        async def my_coroutine(...):
-            ...
-    ```
-    """
-
-    def _decorate(fn):
-        if not asyncio.iscoroutinefunction(fn):
-            raise TypeError("trace_async_exceptions can only be used with async functions")
-
-        @functools.wraps(fn)
-        async def wrapper(*args, **kwargs):
-            if verbose:
-                start = time.perf_counter()
-            try:
-                return await fn(*args, **kwargs)
-            except Exception as e:
-                logger.error(f"Exception in async function {fn.__name__}: {e}")
-                traceback.print_exc()
-                sys.exit(1)
-            finally:
-                if verbose:
-                    elapsed = (time.perf_counter() - start) * 1000.0
-                    name = fn.__qualname__
-                    cnt, tot = _ASYNC_TASK_STATS[name]
-                    _ASYNC_TASK_STATS[name] = [cnt + 1, tot + elapsed]
-                    avg = _ASYNC_TASK_STATS[name][1] / _ASYNC_TASK_STATS[name][0]
-
-                    log10 = numpy.log10(max(cnt, 1))
-                    if numpy.isclose(log10, round(log10)):
-                        logger.info(
-                            f"{name} completed in {elapsed:.3f} ms, "
-                            f"lifetime avg: {avg:.3f} ms, "
-                            f"lifetime cnt: {cnt + 1}"
-                        )
-
-        return wrapper
-
-    return _decorate if func is None else _decorate(func)
diff --git a/megatron/post_training/checkpointing.py b/megatron/post_training/checkpointing.py
index aac59341e37..cc26d5db2e3 100644
--- a/megatron/post_training/checkpointing.py
+++ b/megatron/post_training/checkpointing.py
@@ -2,13 +2,15 @@
 
 import logging
 from pathlib import Path
-from typing import Optional, Tuple, Union
+from typing import Dict, Optional, Tuple, Union
 
 import modelopt.torch.opt as mto
+import torch
 import torch.nn as nn
 from modelopt.torch.opt.plugins import restore_sharded_modelopt_state
 
 from megatron.core import dist_checkpointing
+from megatron.core.dist_checkpointing.strategies.common import COMMON_STATE_FNAME
 from megatron.core.utils import get_torch_version, is_torch_min_version
 from megatron.training import get_args
 from megatron.training.checkpointing import _load_base_checkpoint, load_checkpoint
@@ -19,36 +21,35 @@
 NEMO_WEIGHT_DIR_NAMES = {"model_weights": "model.", "weights": "module."}
 
 
-def has_modelopt_state(checkpoint_path: str) -> bool:
-    """Check if modelopt_state folder exists inside the checkpoint.
+def has_modelopt_state(checkpoint_path: str, ignore_kd_state: bool = False) -> bool:
+    """Check if modelopt_state folder exists inside the checkpoint path.
     Args:
         checkpoint_path: Path to the checkpoint directory
+        ignore_kd_state: If True, ignore the knowledge distillation state
 
     Returns:
-        True if modelopt_state exists, False otherwise
+        True if modelopt_state folder exists when ignore_kd_state is False,
+        True if modelopt_state folder exists when ignore_kd_state is True and has only
+        distillation state, False otherwise
     """
-    args = get_args()
-
-    try:
-        if args.ckpt_format == "torch":
-            # Non-sharded
-            state_dict, _, _ = _load_base_checkpoint(checkpoint_path, rank0=False)
-            if state_dict is None:
-                return False
-            if "modelopt_state" not in state_dict:
-                return False
-            return True
-        else:
-            # Sharded
-            load_dir, _ = get_sharded_load_dir(checkpoint_path)
-            if load_dir is None:
-                return False
-            if not (load_dir / "modelopt_state").is_dir():
-                return False
-            return True
-    except Exception as e:
-        print_rank_0(f"Failed to inspect checkpoint in {checkpoint_path}: {e}")
+    load_dir, _ = get_sharded_load_dir(checkpoint_path)
+    if load_dir is None:
         return False
+    modelopt_state_path = load_dir / "modelopt_state"
+    if not modelopt_state_path.is_dir():
+        return False
+    elif ignore_kd_state:
+        return _has_only_kd_state(modelopt_state_path)
+    else:
+        return True
+
+
+def _has_only_kd_state(modelopt_state_path: Path) -> bool:
+    modelopt_state = torch.load(modelopt_state_path / COMMON_STATE_FNAME, weights_only=False)
+    modes_dict = modelopt_state["modelopt_state_dict"]
+    if len(modes_dict) == 1 and modes_dict[0][0] == "kd_loss":
+        return True
+    return False
 
 
 def get_sharded_load_dir(load_dir: str) -> Tuple[Union[Path, None], str]:
@@ -88,41 +89,41 @@ def get_sharded_load_dir(load_dir: str) -> Tuple[Union[Path, None], str]:
     return sharded_load_dir, sharded_prefix
 
 
-def load_modelopt_state(model: nn.Module, load_dir: Optional[str] = None) -> None:
+def load_modelopt_state(load_dir: Optional[str] = None, model: Optional[nn.Module] = None) -> Dict:
     """Loading modelopt_state without loading the model.
 
-    If distributed checkpointing in use, we try to load from the sharded modelopt_state. This will not
-    load the model state_dict. Otherwise, if the checkpoint is not sharded, we load the base checkpoint
-    (which contains the model state as well) and extract the modelopt_state.
+    If --use-dist-ckpt, we try to load from the sharded modelopt_state. This will not load the model
+    state_dict. Otherwise, if the checkpoint is not sharded, we load the base checkpoint (that
+    contains the model state as well) and extract the modelopt_state.
 
     Args:
-        model: the model to load the modelopt_state into
         load_dir: optionally provide a different loading path
+        model: required when loading a sharded checkpoint
     """
     args = get_args()
-    load_dir = load_dir or args.load
 
-    if args.ckpt_format == "torch":
-        # Non-sharded
+    if load_dir is None:
+        load_dir = args.load
+
+    if args.use_dist_ckpt:
+        assert model is not None, "`model` argument required when `args.use_dist_ckpt is True`"
+        sharded_load_dir, _ = get_sharded_load_dir(load_dir)
+        if sharded_load_dir is None:
+            print_rank_0("No sharded checkpoint found. Skipping loading modelopt_state.")
+            return {}
+        restore_sharded_modelopt_state([model], sharded_load_dir)
+    else:
         print_rank_0(f"Loading ModelOpt state from base checkpoint ({load_dir})")
         try:
             state_dict, _, _ = _load_base_checkpoint(args.load, rank0=False)
         except Exception:
             print_rank_0("Failed to load base checkpoint via megatron _load_base_checkpoint!")
-            return
         if state_dict is None:
             print_rank_0("No checkpoint state_dict found. Skipping loading ModelOpt state.")
-            return
-        modelopt_state = state_dict.get("modelopt_state", None)
+        else:
+            modelopt_state = state_dict.get("modelopt_state", None)
         if modelopt_state is not None:
             mto.restore_from_modelopt_state(model, modelopt_state)
-    else:
-        # Sharded
-        sharded_load_dir, _ = get_sharded_load_dir(load_dir)
-        if sharded_load_dir is None:
-            print_rank_0("No sharded checkpoint found. Skipping loading modelopt_state.")
-            return
-        restore_sharded_modelopt_state([model], sharded_load_dir)
 
 
 def load_modelopt_checkpoint(
diff --git a/megatron/rl/__init__.py b/megatron/rl/__init__.py
index d3ae2fefd16..035da465519 100644
--- a/megatron/rl/__init__.py
+++ b/megatron/rl/__init__.py
@@ -12,6 +12,7 @@
 from pydantic import BaseModel, ConfigDict, Field
 from typing_extensions import Self, Type
 
+
 def import_class(class_path: str) -> Type:
     """Import a class from a string path.
 
@@ -75,3 +76,43 @@ class Request(BaseModel):
     """Generation Request."""
 
     generation_args: GenericGenerationArgs = GenericGenerationArgs()
+
+
+from collections import defaultdict
+
+_STATS = defaultdict(lambda: [0, 0.0])  # cnt, total_time
+
+
+def trace_async_exceptions(fn: Callable[..., Coroutine]) -> Callable[..., Coroutine]:
+    """Decorator to be applied to every coroutine that runs in a separate task.
+
+    This is needed because asyncio tasks do not propagate exceptions.
+    Coroutines running inside separate tasks will fail silently if not decorated.
+    """
+    if not asyncio.iscoroutinefunction(fn):
+        raise TypeError("trace_async_exceptions can only be used with async functions")
+
+    @functools.wraps(fn)
+    async def wrapper(*args, **kwargs):
+        start = time.perf_counter()
+        try:
+            return await fn(*args, **kwargs)
+        except Exception as e:
+            print(f"Exception in async function {fn.__name__}: {e}")
+            traceback.print_exc()
+            sys.exit(1)
+        finally:
+            elapsed = (time.perf_counter() - start) * 1000.0
+            name = fn.__qualname__
+            cnt, tot = _STATS[name]
+            _STATS[name] = [cnt + 1, tot + elapsed]
+            avg = _STATS[name][1] / _STATS[name][0]
+            import numpy as np
+
+            log10 = np.log10(max(cnt, 1))
+            if np.isclose(log10, round(log10)):
+                print(
+                    f"{name} completed in {elapsed:.3f} ms, lifetime avg: {avg:.3f} ms, lifetime cnt: {cnt + 1}"
+                )
+
+    return wrapper
diff --git a/megatron/rl/agent/api.py b/megatron/rl/agent/api.py
index fce7c3073ee..3e16f74599f 100644
--- a/megatron/rl/agent/api.py
+++ b/megatron/rl/agent/api.py
@@ -8,7 +8,7 @@
 import numpy as np
 from pydantic import BaseModel
 
-from ..__init__ import Request, TypeLookupable
+from ..__init__ import Request, TypeLookupable, trace_async_exceptions
 from ..inference import (
     ChatInferenceInterface,
     ChatInferenceRequest,
@@ -18,8 +18,6 @@
     ReturnsRaw,
 )
 
-from megatron.core.utils import trace_async_exceptions
-
 
 class AgentBaseModel(BaseModel, extra='allow'):
     pass
@@ -194,7 +192,7 @@ async def get_grouped_rollouts(self, request: GroupedRolloutRequest):
         )
         submitted_groups = 0
 
-        @trace_async_exceptions(verbose=True)
+        @trace_async_exceptions
         async def group_task():
             nonlocal submitted_groups
             while request.num_groups == -1 or submitted_groups < request.num_groups:
diff --git a/megatron/rl/inference/megatron.py b/megatron/rl/inference/megatron.py
index 58613b364a6..5cb91d6c952 100644
--- a/megatron/rl/inference/megatron.py
+++ b/megatron/rl/inference/megatron.py
@@ -183,7 +183,7 @@ async def base_generate(self, request: InferenceRequest):
             top_p=request.generation_args.top_p or 0.0,
             termination_id=self._coordinator.engine.controller.tokenizer.eod,
             return_log_probs=True,
-            skip_prompt_log_probs=True,
+            skip_prompt_log_probs_for_dynamic_inference=True,
             add_BOS=tokenizer.bos is not None,
         )
         request_ids = [
diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py
index 494c82f7873..507c21e6883 100644
--- a/megatron/training/arguments.py
+++ b/megatron/training/arguments.py
@@ -1359,13 +1359,9 @@ def _add_transformer_engine_args(parser):
                        dest='fp8')
     # per tensor current scaling recipe selection
     group.add_argument('--fp8-recipe', default='delayed',
-                       choices=['tensorwise', 'delayed', 'mxfp8', 'blockwise', 'custom'],
+                       choices=['tensorwise', 'delayed', 'mxfp8', 'blockwise'],
                        help='Which fp8 recipe to use for FP8 tensors in the forward and backward pass',
                        dest='fp8_recipe')
-    group.add_argument('--fp8-quantizer-factory', default=None,
-                       help='Python import path to a callable quantizer factory, '
-                            'e.g., package.module.quantizer_factory.',
-                       dest='fp8_quantizer_factory')
     # delayed scaling only configs
     group.add_argument('--fp8-margin', type=int, default=0,
                        help='Scaling margin for fp8',
@@ -1402,13 +1398,9 @@ def _add_transformer_engine_args(parser):
                        help='Which nvfp4 format scheme to use for FP4 tensors in the forward and backward pass',
                        dest='fp4')
     group.add_argument('--fp4-recipe', default='nvfp4',
-                       choices=['nvfp4', 'custom'],
+                       choices=['nvfp4'],
                        help='Which fp4 recipe to use for FP4 tensors in the forward and backward pass',
                        dest='fp4_recipe')
-    group.add_argument('--fp4-quantizer-factory', default=None,
-                       help='Python import path to a callable quantizer factory, '
-                            'e.g., package.module.quantizer_factory.',
-                       dest='fp4_quantizer_factory')
     group.add_argument('--fp4-param-gather', action='store_true',
                        help='Keep the compute param in fp4 (do not use any other intermediate '
                             'dtype) and perform the param all-gather in fp4.',
diff --git a/megatron/training/tokenizer/sft_tokenizer.py b/megatron/training/tokenizer/sft_tokenizer.py
index f525352e892..4a941fc180b 100644
--- a/megatron/training/tokenizer/sft_tokenizer.py
+++ b/megatron/training/tokenizer/sft_tokenizer.py
@@ -170,11 +170,6 @@ def pad(self):
         """Pad token ID."""
         return self._prompt_config.pad_token_id
 
-    @property
-    def bos(self):
-        """Beginning of sequence token ID."""
-        return self._tokenizer.bos_token_id
-
     @property
     def eod(self):
         """End of sentence token ID."""
diff --git a/megatron/training/training.py b/megatron/training/training.py
index 967397bec10..06dad540fed 100644
--- a/megatron/training/training.py
+++ b/megatron/training/training.py
@@ -952,18 +952,6 @@ def get_model(model_provider_func, model_type=ModelType.encoder_or_decoder, wrap
     args = get_args()
     args.model_type = model_type
 
-    if has_nvidia_modelopt:
-        from megatron.post_training.checkpointing import has_modelopt_state
-        # [ModelOpt]: Check if the checkpoint is a ModelOpt checkpoint and
-        # set a flag to use our model provider if so.
-        if args.load is not None and has_modelopt_state(args.load):
-            print_rank_0(f'ModelOpt checkpoint detected')
-            args.modelopt_enabled = True
-        elif getattr(args, "export_kd_teacher_load", None):
-            # For distillation ckpts without ModelOpt state
-            args.modelopt_enabled = True
-
-
     # Build model.
     def build_model():
         if (
@@ -1190,6 +1178,17 @@ def setup_model_and_optimizer(
     timers = get_timers()
     one_logger = get_one_logger()
 
+    if has_nvidia_modelopt:
+        from megatron.post_training.checkpointing import has_modelopt_state
+        # [ModelOpt]: Check if the checkpoint is a ModelOpt checkpoint and
+        # set a flag to use our model provider if so.
+        if args.load is not None and has_modelopt_state(args.load):
+            print_rank_0(f'ModelOpt checkpoint detected')
+            args.modelopt_enabled = True
+        elif getattr(args, "export_kd_teacher_load", None):
+            # For distillation ckpts without ModelOpt state
+            args.modelopt_enabled = True
+
     model = get_model(model_provider_func, model_type)
     unwrapped_model = unwrap_model(model)
 
diff --git a/tests/functional_tests/python_test_utils/test_inference_regular_pipeline.py b/tests/functional_tests/python_test_utils/test_inference_regular_pipeline.py
index ae57db10e55..9a0758d3ba1 100644
--- a/tests/functional_tests/python_test_utils/test_inference_regular_pipeline.py
+++ b/tests/functional_tests/python_test_utils/test_inference_regular_pipeline.py
@@ -40,17 +40,16 @@ def test_inference_pipeline(golden_values_path: str, test_values_path: str) -> N
 
         # First warmup iteration is excluded from throughput statistics.
         throughput_sampled = median(output_current["throughput"][1:])
-        throughput_golden = median(output_groundtruth["throughput"][1:])
 
         # 10% is empirically observed to be within hardware variance.
         assert (
-            throughput_sampled >= 0.9 * throughput_golden
-        ), f"Throughput is slower than expected! Expected to be within 10% of ~{throughput_golden} tok/s but benchmarked {output_current['throughput']} tok/s"
+            throughput_sampled >= 0.9 * output_groundtruth["throughput"]
+        ), f"Throughput is slower than expected! Expected to be within 10% of ~{output_groundtruth['throughput']} tok/s but benchmarked {output_current['throughput']} tok/s"
 
         # If throughput is significantly improved (> 20%), update golden values accordingly.
         assert (
-            throughput_sampled < throughput_golden * 1.2
-        ), f"Throughput has been improved from expected ~{throughput_golden} tok/s to {output_current['throughput']} tok/s. Please update golden values in the functional tests."
+            throughput_sampled < output_groundtruth["throughput"] * 1.2
+        ), f"Throughput has been improved from expected ~{output_groundtruth['throughput']} tok/s to {output_current['throughput']} tok/s. Please update golden values in the functional tests."
 
         output_groundtruth.pop('throughput')
 
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_lts.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_lts.json
deleted file mode 100644
index 3f7a0c2cac5..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_lts.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.84081,
-            "2": 10.8354,
-            "3": 10.83237,
-            "4": 10.81882,
-            "5": 10.84098,
-            "6": 10.87094,
-            "7": 10.83285,
-            "8": 10.8395,
-            "9": 10.84275,
-            "10": 10.80913,
-            "11": 10.85185,
-            "12": 10.84426,
-            "13": 10.86366,
-            "14": 10.86332,
-            "15": 10.80028,
-            "16": 10.79303,
-            "17": 10.7753,
-            "18": 10.80133,
-            "19": 10.79138,
-            "20": 10.70502,
-            "21": 10.68161,
-            "22": 10.56472,
-            "23": 10.70185,
-            "24": 10.58,
-            "25": 10.5355,
-            "26": 10.607,
-            "27": 10.59378,
-            "28": 10.56083,
-            "29": 10.57494,
-            "30": 10.35506,
-            "31": 10.12664,
-            "32": 10.46551,
-            "33": 10.45216,
-            "34": 10.22453,
-            "35": 10.27096,
-            "36": 10.22158,
-            "37": 10.33994,
-            "38": 10.18651,
-            "39": 10.39397,
-            "40": 10.07811,
-            "41": 10.13811,
-            "42": 10.20175,
-            "43": 9.83808,
-            "44": 9.94297,
-            "45": 9.82336,
-            "46": 9.82165,
-            "47": 10.13421,
-            "48": 9.84058,
-            "49": 9.52119,
-            "50": 9.90123
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1539.0,
-            "2": 1717.0,
-            "3": 1773.0,
-            "4": 1809.0,
-            "5": 1932.0,
-            "6": 1836.0,
-            "7": 1808.0,
-            "8": 1638.0,
-            "9": 1924.0,
-            "10": 1388.0,
-            "11": 1978.0,
-            "12": 1875.0,
-            "13": 1894.0,
-            "14": 1832.0,
-            "15": 1945.0,
-            "16": 1966.0,
-            "17": 1779.0,
-            "18": 1731.0,
-            "19": 1812.0,
-            "20": 1744.0,
-            "21": 1910.0,
-            "22": 1717.0,
-            "23": 2079.0,
-            "24": 1636.0,
-            "25": 1644.0,
-            "26": 1812.0,
-            "27": 1939.0,
-            "28": 1904.0,
-            "29": 2001.0,
-            "30": 2019.0,
-            "31": 1661.0,
-            "32": 1904.0,
-            "33": 2040.0,
-            "34": 1944.0,
-            "35": 1955.0,
-            "36": 1968.0,
-            "37": 2344.0,
-            "38": 2300.0,
-            "39": 2418.0,
-            "40": 2263.0,
-            "41": 2357.0,
-            "42": 2285.0,
-            "43": 1988.0,
-            "44": 2123.0,
-            "45": 2218.0,
-            "46": 2349.0,
-            "47": 2594.0,
-            "48": 2506.0,
-            "49": 2331.0,
-            "50": 2374.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 552128512.0,
-            "2": 552128512.0,
-            "3": 552128512.0,
-            "4": 552128512.0,
-            "5": 552128512.0,
-            "6": 552128512.0,
-            "7": 552128512.0,
-            "8": 552128512.0,
-            "9": 552128512.0,
-            "10": 552128512.0,
-            "11": 552128512.0,
-            "12": 552128512.0,
-            "13": 552128512.0,
-            "14": 552128512.0,
-            "15": 552128512.0,
-            "16": 552128512.0,
-            "17": 552128512.0,
-            "18": 552128512.0,
-            "19": 552128512.0,
-            "20": 552128512.0,
-            "21": 552128512.0,
-            "22": 552128512.0,
-            "23": 552128512.0,
-            "24": 552128512.0,
-            "25": 552128512.0,
-            "26": 552128512.0,
-            "27": 552128512.0,
-            "28": 552128512.0,
-            "29": 552128512.0,
-            "30": 552128512.0,
-            "31": 552128512.0,
-            "32": 552128512.0,
-            "33": 552128512.0,
-            "34": 552128512.0,
-            "35": 552128512.0,
-            "36": 552128512.0,
-            "37": 552128512.0,
-            "38": 552128512.0,
-            "39": 552128512.0,
-            "40": 552128512.0,
-            "41": 552128512.0,
-            "42": 552128512.0,
-            "43": 552128512.0,
-            "44": 552128512.0,
-            "45": 552128512.0,
-            "46": 552128512.0,
-            "47": 552128512.0,
-            "48": 552128512.0,
-            "49": 552128512.0,
-            "50": 552128512.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 4576453120.0,
-            "2": 4673069568.0,
-            "3": 4673069568.0,
-            "4": 4673069568.0,
-            "5": 4673069568.0,
-            "6": 4673069568.0,
-            "7": 4673069568.0,
-            "8": 4673069568.0,
-            "9": 4673069568.0,
-            "10": 4673069568.0,
-            "11": 4673069568.0,
-            "12": 4673069568.0,
-            "13": 4673069568.0,
-            "14": 4673069568.0,
-            "15": 4673069568.0,
-            "16": 4673069568.0,
-            "17": 4673069568.0,
-            "18": 4673069568.0,
-            "19": 4673069568.0,
-            "20": 4673069568.0,
-            "21": 4673069568.0,
-            "22": 4673069568.0,
-            "23": 4673069568.0,
-            "24": 4673069568.0,
-            "25": 4673069568.0,
-            "26": 4673069568.0,
-            "27": 4673069568.0,
-            "28": 4673069568.0,
-            "29": 4673069568.0,
-            "30": 4673069568.0,
-            "31": 4673069568.0,
-            "32": 4673069568.0,
-            "33": 4673069568.0,
-            "34": 4673069568.0,
-            "35": 4673069568.0,
-            "36": 4673069568.0,
-            "37": 4673069568.0,
-            "38": 4673069568.0,
-            "39": 4673069568.0,
-            "40": 4673069568.0,
-            "41": 4673069568.0,
-            "42": 4673069568.0,
-            "43": 4673069568.0,
-            "44": 4673069568.0,
-            "45": 4673069568.0,
-            "46": 4673069568.0,
-            "47": 4673069568.0,
-            "48": 4673069568.0,
-            "49": 4673069568.0,
-            "50": 4673069568.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 4.18387,
-            "2": 0.12504,
-            "3": 0.09705,
-            "4": 0.09652,
-            "5": 0.09748,
-            "6": 0.09683,
-            "7": 0.09565,
-            "8": 0.09466,
-            "9": 0.09428,
-            "10": 0.09486,
-            "11": 0.09436,
-            "12": 0.09386,
-            "13": 0.09434,
-            "14": 0.09599,
-            "15": 0.09464,
-            "16": 0.0943,
-            "17": 0.09447,
-            "18": 0.09424,
-            "19": 0.0942,
-            "20": 0.09425,
-            "21": 0.09401,
-            "22": 0.09476,
-            "23": 0.09408,
-            "24": 0.09462,
-            "25": 0.09414,
-            "26": 0.09442,
-            "27": 0.0939,
-            "28": 0.09352,
-            "29": 0.09364,
-            "30": 0.09376,
-            "31": 0.09494,
-            "32": 0.09358,
-            "33": 0.09378,
-            "34": 0.09361,
-            "35": 0.09442,
-            "36": 0.09437,
-            "37": 0.09367,
-            "38": 0.0934,
-            "39": 0.09328,
-            "40": 0.09295,
-            "41": 0.09331,
-            "42": 0.09302,
-            "43": 0.09373,
-            "44": 0.09287,
-            "45": 0.09264,
-            "46": 0.10047,
-            "47": 0.09374,
-            "48": 0.09248,
-            "49": 0.09248,
-            "50": 0.09148
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_lts_dgx_a100.json
index dd30f7144c7..a5a8b2f5f7e 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_lts_dgx_a100.json
@@ -2,286 +2,91 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 1,
+        "step_interval": 5,
         "values": {
-            "1": 10.84081,
-            "2": 10.8354,
-            "3": 10.83237,
-            "4": 10.81882,
-            "5": 10.84098,
-            "6": 10.87094,
-            "7": 10.83285,
-            "8": 10.8395,
-            "9": 10.84275,
-            "10": 10.80913,
-            "11": 10.85185,
-            "12": 10.84426,
-            "13": 10.86366,
-            "14": 10.86332,
-            "15": 10.80028,
-            "16": 10.79303,
-            "17": 10.7753,
-            "18": 10.80133,
-            "19": 10.79138,
-            "20": 10.70502,
-            "21": 10.68161,
-            "22": 10.56472,
-            "23": 10.70185,
-            "24": 10.58,
-            "25": 10.5355,
-            "26": 10.607,
-            "27": 10.59378,
-            "28": 10.56083,
-            "29": 10.57494,
-            "30": 10.35506,
-            "31": 10.12664,
-            "32": 10.46551,
-            "33": 10.45216,
-            "34": 10.22453,
-            "35": 10.27096,
-            "36": 10.22158,
-            "37": 10.33994,
-            "38": 10.18651,
-            "39": 10.39397,
-            "40": 10.07811,
-            "41": 10.13811,
-            "42": 10.20175,
-            "43": 9.83808,
-            "44": 9.94297,
-            "45": 9.82336,
-            "46": 9.82165,
-            "47": 10.13421,
-            "48": 9.84058,
-            "49": 9.52119,
-            "50": 9.90123
+            "1": 10.8401,
+            "5": 10.84032,
+            "10": 10.8134,
+            "15": 10.80276,
+            "20": 10.705,
+            "25": 10.53849,
+            "30": 10.35521,
+            "35": 10.27148,
+            "40": 10.08049,
+            "45": 9.8229,
+            "50": 9.90118
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 1,
+        "step_interval": 5,
         "values": {
-            "1": 1539.0,
-            "2": 1717.0,
-            "3": 1773.0,
-            "4": 1809.0,
-            "5": 1932.0,
-            "6": 1836.0,
-            "7": 1808.0,
-            "8": 1638.0,
-            "9": 1924.0,
-            "10": 1388.0,
-            "11": 1978.0,
-            "12": 1875.0,
-            "13": 1894.0,
-            "14": 1832.0,
-            "15": 1945.0,
-            "16": 1966.0,
-            "17": 1779.0,
-            "18": 1731.0,
-            "19": 1812.0,
-            "20": 1744.0,
-            "21": 1910.0,
-            "22": 1717.0,
-            "23": 2079.0,
-            "24": 1636.0,
-            "25": 1644.0,
-            "26": 1812.0,
-            "27": 1939.0,
-            "28": 1904.0,
-            "29": 2001.0,
-            "30": 2019.0,
-            "31": 1661.0,
-            "32": 1904.0,
-            "33": 2040.0,
-            "34": 1944.0,
-            "35": 1955.0,
-            "36": 1968.0,
-            "37": 2344.0,
-            "38": 2300.0,
-            "39": 2418.0,
-            "40": 2263.0,
-            "41": 2357.0,
-            "42": 2285.0,
-            "43": 1988.0,
-            "44": 2123.0,
-            "45": 2218.0,
-            "46": 2349.0,
-            "47": 2594.0,
-            "48": 2506.0,
-            "49": 2331.0,
-            "50": 2374.0
+            "1": 1670.0,
+            "5": 1970.0,
+            "10": 1397.0,
+            "15": 1936.0,
+            "20": 1715.0,
+            "25": 1703.0,
+            "30": 1957.0,
+            "35": 1991.0,
+            "40": 2239.0,
+            "45": 2144.0,
+            "50": 2377.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 1,
+        "step_interval": 5,
         "values": {
-            "1": 552128512.0,
-            "2": 552128512.0,
-            "3": 552128512.0,
-            "4": 552128512.0,
-            "5": 552128512.0,
-            "6": 552128512.0,
-            "7": 552128512.0,
-            "8": 552128512.0,
-            "9": 552128512.0,
-            "10": 552128512.0,
-            "11": 552128512.0,
-            "12": 552128512.0,
-            "13": 552128512.0,
-            "14": 552128512.0,
-            "15": 552128512.0,
-            "16": 552128512.0,
-            "17": 552128512.0,
-            "18": 552128512.0,
-            "19": 552128512.0,
-            "20": 552128512.0,
-            "21": 552128512.0,
-            "22": 552128512.0,
-            "23": 552128512.0,
-            "24": 552128512.0,
-            "25": 552128512.0,
-            "26": 552128512.0,
-            "27": 552128512.0,
-            "28": 552128512.0,
-            "29": 552128512.0,
-            "30": 552128512.0,
-            "31": 552128512.0,
-            "32": 552128512.0,
-            "33": 552128512.0,
-            "34": 552128512.0,
-            "35": 552128512.0,
-            "36": 552128512.0,
-            "37": 552128512.0,
-            "38": 552128512.0,
-            "39": 552128512.0,
-            "40": 552128512.0,
-            "41": 552128512.0,
-            "42": 552128512.0,
-            "43": 552128512.0,
-            "44": 552128512.0,
-            "45": 552128512.0,
-            "46": 552128512.0,
-            "47": 552128512.0,
-            "48": 552128512.0,
-            "49": 552128512.0,
-            "50": 552128512.0
+            "1": 552238592.0,
+            "5": 552238592.0,
+            "10": 552238592.0,
+            "15": 552238592.0,
+            "20": 552238592.0,
+            "25": 552238592.0,
+            "30": 552238592.0,
+            "35": 552238592.0,
+            "40": 552238592.0,
+            "45": 552238592.0,
+            "50": 552238592.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 1,
+        "step_interval": 5,
         "values": {
-            "1": 4576453120.0,
-            "2": 4673069568.0,
-            "3": 4673069568.0,
-            "4": 4673069568.0,
-            "5": 4673069568.0,
-            "6": 4673069568.0,
-            "7": 4673069568.0,
-            "8": 4673069568.0,
-            "9": 4673069568.0,
-            "10": 4673069568.0,
-            "11": 4673069568.0,
-            "12": 4673069568.0,
-            "13": 4673069568.0,
-            "14": 4673069568.0,
-            "15": 4673069568.0,
-            "16": 4673069568.0,
-            "17": 4673069568.0,
-            "18": 4673069568.0,
-            "19": 4673069568.0,
-            "20": 4673069568.0,
-            "21": 4673069568.0,
-            "22": 4673069568.0,
-            "23": 4673069568.0,
-            "24": 4673069568.0,
-            "25": 4673069568.0,
-            "26": 4673069568.0,
-            "27": 4673069568.0,
-            "28": 4673069568.0,
-            "29": 4673069568.0,
-            "30": 4673069568.0,
-            "31": 4673069568.0,
-            "32": 4673069568.0,
-            "33": 4673069568.0,
-            "34": 4673069568.0,
-            "35": 4673069568.0,
-            "36": 4673069568.0,
-            "37": 4673069568.0,
-            "38": 4673069568.0,
-            "39": 4673069568.0,
-            "40": 4673069568.0,
-            "41": 4673069568.0,
-            "42": 4673069568.0,
-            "43": 4673069568.0,
-            "44": 4673069568.0,
-            "45": 4673069568.0,
-            "46": 4673069568.0,
-            "47": 4673069568.0,
-            "48": 4673069568.0,
-            "49": 4673069568.0,
-            "50": 4673069568.0
+            "1": 4576563200.0,
+            "5": 4673179648.0,
+            "10": 4673179648.0,
+            "15": 4673179648.0,
+            "20": 4673179648.0,
+            "25": 4673179648.0,
+            "30": 4673179648.0,
+            "35": 4673179648.0,
+            "40": 4673179648.0,
+            "45": 4673179648.0,
+            "50": 4673179648.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 1,
+        "step_interval": 5,
         "values": {
-            "1": 4.57734,
-            "2": 0.12447,
-            "3": 0.1105,
-            "4": 0.11652,
-            "5": 0.11171,
-            "6": 0.10268,
-            "7": 0.0964,
-            "8": 0.09397,
-            "9": 0.09475,
-            "10": 0.09372,
-            "11": 0.09325,
-            "12": 0.09309,
-            "13": 0.09305,
-            "14": 0.09354,
-            "15": 0.09324,
-            "16": 0.09342,
-            "17": 0.09327,
-            "18": 0.09347,
-            "19": 0.09283,
-            "20": 0.09308,
-            "21": 0.09266,
-            "22": 0.09487,
-            "23": 0.09318,
-            "24": 0.09338,
-            "25": 0.09306,
-            "26": 0.09374,
-            "27": 0.09386,
-            "28": 0.09412,
-            "29": 0.09395,
-            "30": 0.09393,
-            "31": 0.09439,
-            "32": 0.09481,
-            "33": 0.09338,
-            "34": 0.09466,
-            "35": 0.0936,
-            "36": 0.09463,
-            "37": 0.09316,
-            "38": 0.09572,
-            "39": 0.09295,
-            "40": 0.09592,
-            "41": 0.09322,
-            "42": 0.09468,
-            "43": 0.09488,
-            "44": 0.09323,
-            "45": 0.09265,
-            "46": 0.09574,
-            "47": 0.09267,
-            "48": 0.09592,
-            "49": 0.09356,
-            "50": 0.09502
+            "1": 9.0358,
+            "5": 0.10098,
+            "10": 0.10007,
+            "15": 0.09866,
+            "20": 0.0986,
+            "25": 0.09963,
+            "30": 0.09768,
+            "35": 0.09735,
+            "40": 0.09791,
+            "45": 0.09757,
+            "50": 0.09769
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_lts.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_lts.json
deleted file mode 100644
index d8d087006d9..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_lts.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.84092,
-            "2": 10.83661,
-            "3": 10.83233,
-            "4": 10.81819,
-            "5": 10.84059,
-            "6": 10.86985,
-            "7": 10.83324,
-            "8": 10.83877,
-            "9": 10.84355,
-            "10": 10.80969,
-            "11": 10.85186,
-            "12": 10.84454,
-            "13": 10.8632,
-            "14": 10.86353,
-            "15": 10.79985,
-            "16": 10.79265,
-            "17": 10.77473,
-            "18": 10.80156,
-            "19": 10.79155,
-            "20": 10.70512,
-            "21": 10.68174,
-            "22": 10.56547,
-            "23": 10.70145,
-            "24": 10.5789,
-            "25": 10.53597,
-            "26": 10.60745,
-            "27": 10.59418,
-            "28": 10.56116,
-            "29": 10.57573,
-            "30": 10.35474,
-            "31": 10.12618,
-            "32": 10.46569,
-            "33": 10.45235,
-            "34": 10.22491,
-            "35": 10.27088,
-            "36": 10.22167,
-            "37": 10.33935,
-            "38": 10.18639,
-            "39": 10.39432,
-            "40": 10.07794,
-            "41": 10.13875,
-            "42": 10.20184,
-            "43": 9.83819,
-            "44": 9.94273,
-            "45": 9.82299,
-            "46": 9.82187,
-            "47": 10.13444,
-            "48": 9.84097,
-            "49": 9.52094,
-            "50": 9.90107,
-            "51": 9.83459,
-            "52": 9.73231,
-            "53": 10.04881,
-            "54": 9.93895,
-            "55": 9.86297,
-            "56": 9.613,
-            "57": 9.46964,
-            "58": 9.81136,
-            "59": 9.57107,
-            "60": 9.48153,
-            "61": 9.67881,
-            "62": 9.96579,
-            "63": 9.35276,
-            "64": 9.75644,
-            "65": 8.93769,
-            "66": 9.68152,
-            "67": 9.35669,
-            "68": 9.76806,
-            "69": 9.7739,
-            "70": 9.71012,
-            "71": 9.60009,
-            "72": 9.56796,
-            "73": 9.47739,
-            "74": 8.93177,
-            "75": 9.40721,
-            "76": 9.06847,
-            "77": 10.0464,
-            "78": 9.70984,
-            "79": 9.35731,
-            "80": 9.38978,
-            "81": 9.4662,
-            "82": 9.68056,
-            "83": 9.29144,
-            "84": 9.40194,
-            "85": 9.59734,
-            "86": 9.06207,
-            "87": 9.57921,
-            "88": 9.73262,
-            "89": 9.58838,
-            "90": 9.80354,
-            "91": 9.31991,
-            "92": 9.35013,
-            "93": 9.06378,
-            "94": 8.81909,
-            "95": 9.50572,
-            "96": 9.51068,
-            "97": 9.29244,
-            "98": 9.65579,
-            "99": 8.87401,
-            "100": 9.38837
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1628.0,
-            "2": 1744.0,
-            "3": 1662.0,
-            "4": 1801.0,
-            "5": 1937.0,
-            "6": 1866.0,
-            "7": 1855.0,
-            "8": 1584.0,
-            "9": 1901.0,
-            "10": 1325.0,
-            "11": 1963.0,
-            "12": 1833.0,
-            "13": 1863.0,
-            "14": 1928.0,
-            "15": 1948.0,
-            "16": 1999.0,
-            "17": 1838.0,
-            "18": 1703.0,
-            "19": 1898.0,
-            "20": 1610.0,
-            "21": 1944.0,
-            "22": 1728.0,
-            "23": 2116.0,
-            "24": 1613.0,
-            "25": 1634.0,
-            "26": 1788.0,
-            "27": 2059.0,
-            "28": 2067.0,
-            "29": 1978.0,
-            "30": 1929.0,
-            "31": 1782.0,
-            "32": 1850.0,
-            "33": 2169.0,
-            "34": 1815.0,
-            "35": 2007.0,
-            "36": 2010.0,
-            "37": 2385.0,
-            "38": 2413.0,
-            "39": 2474.0,
-            "40": 2254.0,
-            "41": 2373.0,
-            "42": 2253.0,
-            "43": 1900.0,
-            "44": 2058.0,
-            "45": 2153.0,
-            "46": 2385.0,
-            "47": 2514.0,
-            "48": 2475.0,
-            "49": 2362.0,
-            "50": 2335.0,
-            "51": 2452.0,
-            "52": 2576.0,
-            "53": 2914.0,
-            "54": 2741.0,
-            "55": 2408.0,
-            "56": 2650.0,
-            "57": 2264.0,
-            "58": 2853.0,
-            "59": 2757.0,
-            "60": 2509.0,
-            "61": 3076.0,
-            "62": 2709.0,
-            "63": 2563.0,
-            "64": 3041.0,
-            "65": 2687.0,
-            "66": 3089.0,
-            "67": 2767.0,
-            "68": 2930.0,
-            "69": 2911.0,
-            "70": 3286.0,
-            "71": 3105.0,
-            "72": 2507.0,
-            "73": 3063.0,
-            "74": 2022.0,
-            "75": 2763.0,
-            "76": 3002.0,
-            "77": 3382.0,
-            "78": 3470.0,
-            "79": 3109.0,
-            "80": 3357.0,
-            "81": 3798.0,
-            "82": 3348.0,
-            "83": 2763.0,
-            "84": 3271.0,
-            "85": 3245.0,
-            "86": 2587.0,
-            "87": 3650.0,
-            "88": 3103.0,
-            "89": 3471.0,
-            "90": 3086.0,
-            "91": 3050.0,
-            "92": 3368.0,
-            "93": 2828.0,
-            "94": 3495.0,
-            "95": 3424.0,
-            "96": 3559.0,
-            "97": 3289.0,
-            "98": 3727.0,
-            "99": 3275.0,
-            "100": 3401.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 552128512.0,
-            "2": 552128512.0,
-            "3": 552128512.0,
-            "4": 552128512.0,
-            "5": 552128512.0,
-            "6": 552128512.0,
-            "7": 552128512.0,
-            "8": 552128512.0,
-            "9": 552128512.0,
-            "10": 552128512.0,
-            "11": 552128512.0,
-            "12": 552128512.0,
-            "13": 552128512.0,
-            "14": 552128512.0,
-            "15": 552128512.0,
-            "16": 552128512.0,
-            "17": 552128512.0,
-            "18": 552128512.0,
-            "19": 552128512.0,
-            "20": 552128512.0,
-            "21": 552128512.0,
-            "22": 552128512.0,
-            "23": 552128512.0,
-            "24": 552128512.0,
-            "25": 552128512.0,
-            "26": 552128512.0,
-            "27": 552128512.0,
-            "28": 552128512.0,
-            "29": 552128512.0,
-            "30": 552128512.0,
-            "31": 552128512.0,
-            "32": 552128512.0,
-            "33": 552128512.0,
-            "34": 552128512.0,
-            "35": 552128512.0,
-            "36": 552128512.0,
-            "37": 552128512.0,
-            "38": 552128512.0,
-            "39": 552128512.0,
-            "40": 552128512.0,
-            "41": 552128512.0,
-            "42": 552128512.0,
-            "43": 552128512.0,
-            "44": 552128512.0,
-            "45": 552128512.0,
-            "46": 552128512.0,
-            "47": 552128512.0,
-            "48": 552128512.0,
-            "49": 552128512.0,
-            "50": 552128512.0,
-            "51": 552128512.0,
-            "52": 552128512.0,
-            "53": 552128512.0,
-            "54": 552128512.0,
-            "55": 552128512.0,
-            "56": 552128512.0,
-            "57": 552128512.0,
-            "58": 552128512.0,
-            "59": 552128512.0,
-            "60": 552128512.0,
-            "61": 552128512.0,
-            "62": 552128512.0,
-            "63": 552128512.0,
-            "64": 552128512.0,
-            "65": 552128512.0,
-            "66": 552128512.0,
-            "67": 552128512.0,
-            "68": 552128512.0,
-            "69": 552128512.0,
-            "70": 552128512.0,
-            "71": 552128512.0,
-            "72": 552128512.0,
-            "73": 552128512.0,
-            "74": 552128512.0,
-            "75": 552128512.0,
-            "76": 552128512.0,
-            "77": 552128512.0,
-            "78": 552128512.0,
-            "79": 552128512.0,
-            "80": 552128512.0,
-            "81": 552128512.0,
-            "82": 552128512.0,
-            "83": 552128512.0,
-            "84": 552128512.0,
-            "85": 552128512.0,
-            "86": 552128512.0,
-            "87": 552128512.0,
-            "88": 552128512.0,
-            "89": 552128512.0,
-            "90": 552128512.0,
-            "91": 552128512.0,
-            "92": 552128512.0,
-            "93": 552128512.0,
-            "94": 552128512.0,
-            "95": 552128512.0,
-            "96": 552128512.0,
-            "97": 552128512.0,
-            "98": 552128512.0,
-            "99": 552128512.0,
-            "100": 552128512.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 2615097856.0,
-            "2": 2711714304.0,
-            "3": 2711714304.0,
-            "4": 2711714304.0,
-            "5": 2711714304.0,
-            "6": 2711714304.0,
-            "7": 2711714304.0,
-            "8": 2711714304.0,
-            "9": 2711714304.0,
-            "10": 2711714304.0,
-            "11": 2711714304.0,
-            "12": 2711714304.0,
-            "13": 2711714304.0,
-            "14": 2711714304.0,
-            "15": 2711714304.0,
-            "16": 2711714304.0,
-            "17": 2711714304.0,
-            "18": 2711714304.0,
-            "19": 2711714304.0,
-            "20": 2711714304.0,
-            "21": 2711714304.0,
-            "22": 2711714304.0,
-            "23": 2711714304.0,
-            "24": 2711714304.0,
-            "25": 2711714304.0,
-            "26": 2711714304.0,
-            "27": 2711714304.0,
-            "28": 2711714304.0,
-            "29": 2711714304.0,
-            "30": 2711714304.0,
-            "31": 2711714304.0,
-            "32": 2711714304.0,
-            "33": 2711714304.0,
-            "34": 2711714304.0,
-            "35": 2711714304.0,
-            "36": 2711714304.0,
-            "37": 2711714304.0,
-            "38": 2711714304.0,
-            "39": 2711714304.0,
-            "40": 2711714304.0,
-            "41": 2711714304.0,
-            "42": 2711714304.0,
-            "43": 2711714304.0,
-            "44": 2711714304.0,
-            "45": 2711714304.0,
-            "46": 2711714304.0,
-            "47": 2711714304.0,
-            "48": 2711714304.0,
-            "49": 2711714304.0,
-            "50": 2711714304.0,
-            "51": 2711714304.0,
-            "52": 2711714304.0,
-            "53": 2711714304.0,
-            "54": 2711714304.0,
-            "55": 2711714304.0,
-            "56": 2711714304.0,
-            "57": 2711714304.0,
-            "58": 2711714304.0,
-            "59": 2711714304.0,
-            "60": 2711714304.0,
-            "61": 2711714304.0,
-            "62": 2711714304.0,
-            "63": 2711714304.0,
-            "64": 2711714304.0,
-            "65": 2711714304.0,
-            "66": 2711714304.0,
-            "67": 2711714304.0,
-            "68": 2711714304.0,
-            "69": 2711714304.0,
-            "70": 2711714304.0,
-            "71": 2711714304.0,
-            "72": 2711714304.0,
-            "73": 2711714304.0,
-            "74": 2711714304.0,
-            "75": 2711714304.0,
-            "76": 2711714304.0,
-            "77": 2711714304.0,
-            "78": 2711714304.0,
-            "79": 2711714304.0,
-            "80": 2711714304.0,
-            "81": 2711714304.0,
-            "82": 2711714304.0,
-            "83": 2711714304.0,
-            "84": 2711714304.0,
-            "85": 2711714304.0,
-            "86": 2711714304.0,
-            "87": 2711714304.0,
-            "88": 2711714304.0,
-            "89": 2711714304.0,
-            "90": 2711714304.0,
-            "91": 2711714304.0,
-            "92": 2711714304.0,
-            "93": 2711714304.0,
-            "94": 2711714304.0,
-            "95": 2711714304.0,
-            "96": 2711714304.0,
-            "97": 2711714304.0,
-            "98": 2711714304.0,
-            "99": 2711714304.0,
-            "100": 2711714304.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 4.22026,
-            "2": 0.11508,
-            "3": 0.08784,
-            "4": 0.08853,
-            "5": 0.08761,
-            "6": 0.08872,
-            "7": 0.08738,
-            "8": 0.08924,
-            "9": 0.08542,
-            "10": 0.08527,
-            "11": 0.08496,
-            "12": 0.08437,
-            "13": 0.08471,
-            "14": 0.08501,
-            "15": 0.08468,
-            "16": 0.08444,
-            "17": 0.08421,
-            "18": 0.08402,
-            "19": 0.08444,
-            "20": 0.0841,
-            "21": 0.08395,
-            "22": 0.08442,
-            "23": 0.0846,
-            "24": 0.08394,
-            "25": 0.08371,
-            "26": 0.08416,
-            "27": 0.08354,
-            "28": 0.08445,
-            "29": 0.08405,
-            "30": 0.08414,
-            "31": 0.08444,
-            "32": 0.08369,
-            "33": 0.08356,
-            "34": 0.08435,
-            "35": 0.08405,
-            "36": 0.08358,
-            "37": 0.08349,
-            "38": 0.08439,
-            "39": 0.0837,
-            "40": 0.08444,
-            "41": 0.08399,
-            "42": 0.0835,
-            "43": 0.0843,
-            "44": 0.08389,
-            "45": 0.084,
-            "46": 0.08426,
-            "47": 0.0842,
-            "48": 0.08434,
-            "49": 0.08385,
-            "50": 0.08424,
-            "51": 0.09846,
-            "52": 0.08909,
-            "53": 0.08511,
-            "54": 0.0849,
-            "55": 0.1206,
-            "56": 0.0845,
-            "57": 0.08446,
-            "58": 0.08474,
-            "59": 0.08505,
-            "60": 0.08422,
-            "61": 0.08413,
-            "62": 0.0845,
-            "63": 0.08441,
-            "64": 0.08486,
-            "65": 0.08527,
-            "66": 0.08442,
-            "67": 0.08533,
-            "68": 0.08468,
-            "69": 0.08469,
-            "70": 0.08503,
-            "71": 0.08424,
-            "72": 0.085,
-            "73": 0.08469,
-            "74": 0.08484,
-            "75": 0.08396,
-            "76": 0.08437,
-            "77": 0.08458,
-            "78": 0.08553,
-            "79": 0.08492,
-            "80": 0.08459,
-            "81": 0.08431,
-            "82": 0.08515,
-            "83": 0.08407,
-            "84": 0.08429,
-            "85": 0.08413,
-            "86": 0.08499,
-            "87": 0.08442,
-            "88": 0.08461,
-            "89": 0.08478,
-            "90": 0.08469,
-            "91": 0.08469,
-            "92": 0.08478,
-            "93": 0.08453,
-            "94": 0.0842,
-            "95": 0.08391,
-            "96": 0.08383,
-            "97": 0.08459,
-            "98": 0.08469,
-            "99": 0.085,
-            "100": 0.08518
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_lts_dgx_a100.json
index 9dad9972e22..66e69ad5f5e 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_lts_dgx_a100.json
@@ -2,536 +2,141 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 1,
+        "step_interval": 5,
         "values": {
-            "1": 10.84092,
-            "2": 10.83661,
-            "3": 10.83233,
-            "4": 10.81819,
-            "5": 10.84059,
-            "6": 10.86985,
-            "7": 10.83324,
-            "8": 10.83877,
-            "9": 10.84355,
-            "10": 10.80969,
-            "11": 10.85186,
-            "12": 10.84454,
-            "13": 10.8632,
-            "14": 10.86353,
-            "15": 10.79985,
-            "16": 10.79265,
-            "17": 10.77473,
-            "18": 10.80156,
-            "19": 10.79155,
-            "20": 10.70512,
-            "21": 10.68174,
-            "22": 10.56547,
-            "23": 10.70145,
-            "24": 10.5789,
-            "25": 10.53597,
-            "26": 10.60745,
-            "27": 10.59418,
-            "28": 10.56116,
-            "29": 10.57573,
-            "30": 10.35474,
-            "31": 10.12618,
-            "32": 10.46569,
-            "33": 10.45235,
-            "34": 10.22491,
-            "35": 10.27088,
-            "36": 10.22167,
-            "37": 10.33935,
-            "38": 10.18639,
-            "39": 10.39432,
-            "40": 10.07794,
-            "41": 10.13875,
-            "42": 10.20184,
-            "43": 9.83819,
-            "44": 9.94273,
-            "45": 9.82299,
-            "46": 9.82187,
-            "47": 10.13444,
-            "48": 9.84097,
-            "49": 9.52094,
-            "50": 9.90107,
-            "51": 9.83459,
-            "52": 9.73231,
-            "53": 10.04881,
-            "54": 9.93895,
-            "55": 9.86297,
-            "56": 9.613,
-            "57": 9.46964,
-            "58": 9.81136,
-            "59": 9.57107,
-            "60": 9.48153,
-            "61": 9.67881,
-            "62": 9.96579,
-            "63": 9.35276,
-            "64": 9.75644,
-            "65": 8.93769,
-            "66": 9.68152,
-            "67": 9.35669,
-            "68": 9.76806,
-            "69": 9.7739,
-            "70": 9.71012,
-            "71": 9.60009,
-            "72": 9.56796,
-            "73": 9.47739,
-            "74": 8.93177,
-            "75": 9.40721,
-            "76": 9.06847,
-            "77": 10.0464,
-            "78": 9.70984,
-            "79": 9.35731,
-            "80": 9.38978,
-            "81": 9.4662,
-            "82": 9.68056,
-            "83": 9.29144,
-            "84": 9.40194,
-            "85": 9.59734,
-            "86": 9.06207,
-            "87": 9.57921,
-            "88": 9.73262,
-            "89": 9.58838,
-            "90": 9.80354,
-            "91": 9.31991,
-            "92": 9.35013,
-            "93": 9.06378,
-            "94": 8.81909,
-            "95": 9.50572,
-            "96": 9.51068,
-            "97": 9.29244,
-            "98": 9.65579,
-            "99": 8.87401,
-            "100": 9.38837
+            "1": 10.8401,
+            "5": 10.84032,
+            "10": 10.8134,
+            "15": 10.80276,
+            "20": 10.705,
+            "25": 10.53849,
+            "30": 10.35521,
+            "35": 10.27148,
+            "40": 10.08049,
+            "45": 9.8229,
+            "50": 9.90118,
+            "55": 9.86424,
+            "60": 9.4803,
+            "65": 8.93743,
+            "70": 9.71026,
+            "75": 9.4088,
+            "80": 9.39078,
+            "85": 9.59741,
+            "90": 9.80389,
+            "95": 9.50562,
+            "100": 9.38808
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 1,
+        "step_interval": 5,
         "values": {
-            "1": 1628.0,
-            "2": 1744.0,
-            "3": 1662.0,
-            "4": 1801.0,
-            "5": 1937.0,
-            "6": 1866.0,
-            "7": 1855.0,
-            "8": 1584.0,
-            "9": 1901.0,
-            "10": 1325.0,
-            "11": 1963.0,
-            "12": 1833.0,
-            "13": 1863.0,
-            "14": 1928.0,
-            "15": 1948.0,
-            "16": 1999.0,
-            "17": 1838.0,
-            "18": 1703.0,
-            "19": 1898.0,
-            "20": 1610.0,
-            "21": 1944.0,
-            "22": 1728.0,
-            "23": 2116.0,
-            "24": 1613.0,
-            "25": 1634.0,
-            "26": 1788.0,
-            "27": 2059.0,
-            "28": 2067.0,
-            "29": 1978.0,
-            "30": 1929.0,
-            "31": 1782.0,
-            "32": 1850.0,
-            "33": 2169.0,
-            "34": 1815.0,
-            "35": 2007.0,
-            "36": 2010.0,
-            "37": 2385.0,
-            "38": 2413.0,
-            "39": 2474.0,
-            "40": 2254.0,
-            "41": 2373.0,
-            "42": 2253.0,
-            "43": 1900.0,
-            "44": 2058.0,
-            "45": 2153.0,
-            "46": 2385.0,
-            "47": 2514.0,
-            "48": 2475.0,
-            "49": 2362.0,
-            "50": 2335.0,
-            "51": 2452.0,
-            "52": 2576.0,
-            "53": 2914.0,
-            "54": 2741.0,
-            "55": 2408.0,
-            "56": 2650.0,
-            "57": 2264.0,
-            "58": 2853.0,
-            "59": 2757.0,
-            "60": 2509.0,
-            "61": 3076.0,
-            "62": 2709.0,
-            "63": 2563.0,
-            "64": 3041.0,
-            "65": 2687.0,
-            "66": 3089.0,
-            "67": 2767.0,
-            "68": 2930.0,
-            "69": 2911.0,
-            "70": 3286.0,
-            "71": 3105.0,
-            "72": 2507.0,
-            "73": 3063.0,
-            "74": 2022.0,
-            "75": 2763.0,
-            "76": 3002.0,
-            "77": 3382.0,
-            "78": 3470.0,
-            "79": 3109.0,
-            "80": 3357.0,
-            "81": 3798.0,
-            "82": 3348.0,
-            "83": 2763.0,
-            "84": 3271.0,
-            "85": 3245.0,
-            "86": 2587.0,
-            "87": 3650.0,
-            "88": 3103.0,
-            "89": 3471.0,
-            "90": 3086.0,
-            "91": 3050.0,
-            "92": 3368.0,
-            "93": 2828.0,
-            "94": 3495.0,
-            "95": 3424.0,
-            "96": 3559.0,
-            "97": 3289.0,
-            "98": 3727.0,
-            "99": 3275.0,
-            "100": 3401.0
+            "1": 1670.0,
+            "5": 1970.0,
+            "10": 1397.0,
+            "15": 1936.0,
+            "20": 1715.0,
+            "25": 1703.0,
+            "30": 1957.0,
+            "35": 1991.0,
+            "40": 2239.0,
+            "45": 2144.0,
+            "50": 2377.0,
+            "55": 2436.0,
+            "60": 2445.0,
+            "65": 2645.0,
+            "70": 3337.0,
+            "75": 2726.0,
+            "80": 3356.0,
+            "85": 3336.0,
+            "90": 3044.0,
+            "95": 3484.0,
+            "100": 3467.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 1,
+        "step_interval": 5,
         "values": {
-            "1": 552128512.0,
-            "2": 552128512.0,
-            "3": 552128512.0,
-            "4": 552128512.0,
-            "5": 552128512.0,
-            "6": 552128512.0,
-            "7": 552128512.0,
-            "8": 552128512.0,
-            "9": 552128512.0,
-            "10": 552128512.0,
-            "11": 552128512.0,
-            "12": 552128512.0,
-            "13": 552128512.0,
-            "14": 552128512.0,
-            "15": 552128512.0,
-            "16": 552128512.0,
-            "17": 552128512.0,
-            "18": 552128512.0,
-            "19": 552128512.0,
-            "20": 552128512.0,
-            "21": 552128512.0,
-            "22": 552128512.0,
-            "23": 552128512.0,
-            "24": 552128512.0,
-            "25": 552128512.0,
-            "26": 552128512.0,
-            "27": 552128512.0,
-            "28": 552128512.0,
-            "29": 552128512.0,
-            "30": 552128512.0,
-            "31": 552128512.0,
-            "32": 552128512.0,
-            "33": 552128512.0,
-            "34": 552128512.0,
-            "35": 552128512.0,
-            "36": 552128512.0,
-            "37": 552128512.0,
-            "38": 552128512.0,
-            "39": 552128512.0,
-            "40": 552128512.0,
-            "41": 552128512.0,
-            "42": 552128512.0,
-            "43": 552128512.0,
-            "44": 552128512.0,
-            "45": 552128512.0,
-            "46": 552128512.0,
-            "47": 552128512.0,
-            "48": 552128512.0,
-            "49": 552128512.0,
-            "50": 552128512.0,
-            "51": 552128512.0,
-            "52": 552128512.0,
-            "53": 552128512.0,
-            "54": 552128512.0,
-            "55": 552128512.0,
-            "56": 552128512.0,
-            "57": 552128512.0,
-            "58": 552128512.0,
-            "59": 552128512.0,
-            "60": 552128512.0,
-            "61": 552128512.0,
-            "62": 552128512.0,
-            "63": 552128512.0,
-            "64": 552128512.0,
-            "65": 552128512.0,
-            "66": 552128512.0,
-            "67": 552128512.0,
-            "68": 552128512.0,
-            "69": 552128512.0,
-            "70": 552128512.0,
-            "71": 552128512.0,
-            "72": 552128512.0,
-            "73": 552128512.0,
-            "74": 552128512.0,
-            "75": 552128512.0,
-            "76": 552128512.0,
-            "77": 552128512.0,
-            "78": 552128512.0,
-            "79": 552128512.0,
-            "80": 552128512.0,
-            "81": 552128512.0,
-            "82": 552128512.0,
-            "83": 552128512.0,
-            "84": 552128512.0,
-            "85": 552128512.0,
-            "86": 552128512.0,
-            "87": 552128512.0,
-            "88": 552128512.0,
-            "89": 552128512.0,
-            "90": 552128512.0,
-            "91": 552128512.0,
-            "92": 552128512.0,
-            "93": 552128512.0,
-            "94": 552128512.0,
-            "95": 552128512.0,
-            "96": 552128512.0,
-            "97": 552128512.0,
-            "98": 552128512.0,
-            "99": 552128512.0,
-            "100": 552128512.0
+            "1": 552238592.0,
+            "5": 552238592.0,
+            "10": 552238592.0,
+            "15": 552238592.0,
+            "20": 552238592.0,
+            "25": 552238592.0,
+            "30": 552238592.0,
+            "35": 552238592.0,
+            "40": 552238592.0,
+            "45": 552238592.0,
+            "50": 552238592.0,
+            "55": 552238592.0,
+            "60": 552238592.0,
+            "65": 552238592.0,
+            "70": 552238592.0,
+            "75": 552238592.0,
+            "80": 552238592.0,
+            "85": 552238592.0,
+            "90": 552238592.0,
+            "95": 552238592.0,
+            "100": 552238592.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 1,
+        "step_interval": 5,
         "values": {
-            "1": 2615097856.0,
-            "2": 2711714304.0,
-            "3": 2711714304.0,
-            "4": 2711714304.0,
-            "5": 2711714304.0,
-            "6": 2711714304.0,
-            "7": 2711714304.0,
-            "8": 2711714304.0,
-            "9": 2711714304.0,
-            "10": 2711714304.0,
-            "11": 2711714304.0,
-            "12": 2711714304.0,
-            "13": 2711714304.0,
-            "14": 2711714304.0,
-            "15": 2711714304.0,
-            "16": 2711714304.0,
-            "17": 2711714304.0,
-            "18": 2711714304.0,
-            "19": 2711714304.0,
-            "20": 2711714304.0,
-            "21": 2711714304.0,
-            "22": 2711714304.0,
-            "23": 2711714304.0,
-            "24": 2711714304.0,
-            "25": 2711714304.0,
-            "26": 2711714304.0,
-            "27": 2711714304.0,
-            "28": 2711714304.0,
-            "29": 2711714304.0,
-            "30": 2711714304.0,
-            "31": 2711714304.0,
-            "32": 2711714304.0,
-            "33": 2711714304.0,
-            "34": 2711714304.0,
-            "35": 2711714304.0,
-            "36": 2711714304.0,
-            "37": 2711714304.0,
-            "38": 2711714304.0,
-            "39": 2711714304.0,
-            "40": 2711714304.0,
-            "41": 2711714304.0,
-            "42": 2711714304.0,
-            "43": 2711714304.0,
-            "44": 2711714304.0,
-            "45": 2711714304.0,
-            "46": 2711714304.0,
-            "47": 2711714304.0,
-            "48": 2711714304.0,
-            "49": 2711714304.0,
-            "50": 2711714304.0,
-            "51": 2711714304.0,
-            "52": 2711714304.0,
-            "53": 2711714304.0,
-            "54": 2711714304.0,
-            "55": 2711714304.0,
-            "56": 2711714304.0,
-            "57": 2711714304.0,
-            "58": 2711714304.0,
-            "59": 2711714304.0,
-            "60": 2711714304.0,
-            "61": 2711714304.0,
-            "62": 2711714304.0,
-            "63": 2711714304.0,
-            "64": 2711714304.0,
-            "65": 2711714304.0,
-            "66": 2711714304.0,
-            "67": 2711714304.0,
-            "68": 2711714304.0,
-            "69": 2711714304.0,
-            "70": 2711714304.0,
-            "71": 2711714304.0,
-            "72": 2711714304.0,
-            "73": 2711714304.0,
-            "74": 2711714304.0,
-            "75": 2711714304.0,
-            "76": 2711714304.0,
-            "77": 2711714304.0,
-            "78": 2711714304.0,
-            "79": 2711714304.0,
-            "80": 2711714304.0,
-            "81": 2711714304.0,
-            "82": 2711714304.0,
-            "83": 2711714304.0,
-            "84": 2711714304.0,
-            "85": 2711714304.0,
-            "86": 2711714304.0,
-            "87": 2711714304.0,
-            "88": 2711714304.0,
-            "89": 2711714304.0,
-            "90": 2711714304.0,
-            "91": 2711714304.0,
-            "92": 2711714304.0,
-            "93": 2711714304.0,
-            "94": 2711714304.0,
-            "95": 2711714304.0,
-            "96": 2711714304.0,
-            "97": 2711714304.0,
-            "98": 2711714304.0,
-            "99": 2711714304.0,
-            "100": 2711714304.0
+            "1": 4576563200.0,
+            "5": 4673179648.0,
+            "10": 4673179648.0,
+            "15": 4673179648.0,
+            "20": 4673179648.0,
+            "25": 4673179648.0,
+            "30": 4673179648.0,
+            "35": 4673179648.0,
+            "40": 4673179648.0,
+            "45": 4673179648.0,
+            "50": 4673179648.0,
+            "55": 4673179648.0,
+            "60": 4673179648.0,
+            "65": 4673179648.0,
+            "70": 4673179648.0,
+            "75": 4673179648.0,
+            "80": 4673179648.0,
+            "85": 4673179648.0,
+            "90": 4673179648.0,
+            "95": 4673179648.0,
+            "100": 4673179648.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 1,
+        "step_interval": 5,
         "values": {
-            "1": 4.10688,
-            "2": 0.11397,
-            "3": 0.08797,
-            "4": 0.08663,
-            "5": 0.08687,
-            "6": 0.08702,
-            "7": 0.08653,
-            "8": 0.08674,
-            "9": 0.08696,
-            "10": 0.08678,
-            "11": 0.08635,
-            "12": 0.08637,
-            "13": 0.08738,
-            "14": 0.08674,
-            "15": 0.08706,
-            "16": 0.08684,
-            "17": 0.08681,
-            "18": 0.08601,
-            "19": 0.08591,
-            "20": 0.08645,
-            "21": 0.08634,
-            "22": 0.08598,
-            "23": 0.08618,
-            "24": 0.08622,
-            "25": 0.08632,
-            "26": 0.08621,
-            "27": 0.08644,
-            "28": 0.08581,
-            "29": 0.08622,
-            "30": 0.08652,
-            "31": 0.08679,
-            "32": 0.08526,
-            "33": 0.08525,
-            "34": 0.08525,
-            "35": 0.08519,
-            "36": 0.08535,
-            "37": 0.08568,
-            "38": 0.0852,
-            "39": 0.08521,
-            "40": 0.08523,
-            "41": 0.08535,
-            "42": 0.08486,
-            "43": 0.08614,
-            "44": 0.08491,
-            "45": 0.08554,
-            "46": 0.08508,
-            "47": 0.08524,
-            "48": 0.08608,
-            "49": 0.08565,
-            "50": 0.08559,
-            "51": 0.10342,
-            "52": 0.09048,
-            "53": 0.08707,
-            "54": 0.08719,
-            "55": 0.08631,
-            "56": 0.11667,
-            "57": 0.08592,
-            "58": 0.08517,
-            "59": 0.08612,
-            "60": 0.08514,
-            "61": 0.0855,
-            "62": 0.08527,
-            "63": 0.08586,
-            "64": 0.08556,
-            "65": 0.08633,
-            "66": 0.08532,
-            "67": 0.08593,
-            "68": 0.08563,
-            "69": 0.08537,
-            "70": 0.08538,
-            "71": 0.08507,
-            "72": 0.08593,
-            "73": 0.08623,
-            "74": 0.08561,
-            "75": 0.08536,
-            "76": 0.08551,
-            "77": 0.08526,
-            "78": 0.0859,
-            "79": 0.08518,
-            "80": 0.08601,
-            "81": 0.08574,
-            "82": 0.08618,
-            "83": 0.08532,
-            "84": 0.08505,
-            "85": 0.08545,
-            "86": 0.08554,
-            "87": 0.08542,
-            "88": 0.08575,
-            "89": 0.0861,
-            "90": 0.08516,
-            "91": 0.08552,
-            "92": 0.08581,
-            "93": 0.08558,
-            "94": 0.08577,
-            "95": 0.08708,
-            "96": 0.08574,
-            "97": 0.08543,
-            "98": 0.0855,
-            "99": 0.08537,
-            "100": 0.08541
+            "1": 9.31173,
+            "5": 0.10511,
+            "10": 0.10337,
+            "15": 0.10143,
+            "20": 0.10201,
+            "25": 0.10088,
+            "30": 0.10154,
+            "35": 0.1006,
+            "40": 0.10106,
+            "45": 0.10038,
+            "50": 0.10112,
+            "55": 0.10263,
+            "60": 0.0993,
+            "65": 0.09963,
+            "70": 0.09998,
+            "75": 0.09811,
+            "80": 0.09929,
+            "85": 0.09785,
+            "90": 0.09909,
+            "95": 0.09864,
+            "100": 0.09877
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_lts.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_lts.json
deleted file mode 100644
index c6d7c9c90fa..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_lts.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.84092,
-            "2": 10.83661,
-            "3": 10.83233,
-            "4": 10.81819,
-            "5": 10.84059,
-            "6": 10.86985,
-            "7": 10.83324,
-            "8": 10.83877,
-            "9": 10.84355,
-            "10": 10.80969,
-            "11": 10.85186,
-            "12": 10.84449,
-            "13": 10.86322,
-            "14": 10.86353,
-            "15": 10.79981,
-            "16": 10.79262,
-            "17": 10.77477,
-            "18": 10.80157,
-            "19": 10.79148,
-            "20": 10.70508,
-            "21": 10.68176,
-            "22": 10.56548,
-            "23": 10.70147,
-            "24": 10.57889,
-            "25": 10.53597,
-            "26": 10.60742,
-            "27": 10.59423,
-            "28": 10.56119,
-            "29": 10.57569,
-            "30": 10.35474,
-            "31": 10.12616,
-            "32": 10.46566,
-            "33": 10.45233,
-            "34": 10.22493,
-            "35": 10.27091,
-            "36": 10.22168,
-            "37": 10.33936,
-            "38": 10.18641,
-            "39": 10.39431,
-            "40": 10.07792,
-            "41": 10.13872,
-            "42": 10.20182,
-            "43": 9.83818,
-            "44": 9.94274,
-            "45": 9.82303,
-            "46": 9.82185,
-            "47": 10.13443,
-            "48": 9.84098,
-            "49": 9.52095,
-            "50": 9.90109,
-            "51": 9.83457,
-            "52": 9.73232,
-            "53": 10.0488,
-            "54": 9.93895,
-            "55": 9.863,
-            "56": 9.613,
-            "57": 9.46966,
-            "58": 9.81135,
-            "59": 9.57107,
-            "60": 9.48155,
-            "61": 9.6788,
-            "62": 9.96581,
-            "63": 9.35273,
-            "64": 9.75648,
-            "65": 8.93771,
-            "66": 9.68153,
-            "67": 9.35671,
-            "68": 9.76807,
-            "69": 9.7739,
-            "70": 9.71016,
-            "71": 9.60009,
-            "72": 9.56793,
-            "73": 9.4774,
-            "74": 8.93177,
-            "75": 9.4072,
-            "76": 9.06849,
-            "77": 10.0464,
-            "78": 9.70988,
-            "79": 9.35733,
-            "80": 9.38975,
-            "81": 9.4662,
-            "82": 9.68058,
-            "83": 9.2914,
-            "84": 9.40191,
-            "85": 9.59735,
-            "86": 9.06209,
-            "87": 9.57922,
-            "88": 9.73259,
-            "89": 9.58836,
-            "90": 9.80354,
-            "91": 9.31991,
-            "92": 9.35011,
-            "93": 9.06382,
-            "94": 8.81909,
-            "95": 9.50568,
-            "96": 9.51071,
-            "97": 9.29241,
-            "98": 9.65578,
-            "99": 8.87401,
-            "100": 9.38833
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1628.0,
-            "2": 1744.0,
-            "3": 1662.0,
-            "4": 1801.0,
-            "5": 1937.0,
-            "6": 1866.0,
-            "7": 1855.0,
-            "8": 1584.0,
-            "9": 1901.0,
-            "10": 1325.0,
-            "11": 1966.0,
-            "12": 1767.0,
-            "13": 1876.0,
-            "14": 1881.0,
-            "15": 1928.0,
-            "16": 1875.0,
-            "17": 1790.0,
-            "18": 1736.0,
-            "19": 1804.0,
-            "20": 1690.0,
-            "21": 2008.0,
-            "22": 1765.0,
-            "23": 2073.0,
-            "24": 1618.0,
-            "25": 1720.0,
-            "26": 1807.0,
-            "27": 1861.0,
-            "28": 2026.0,
-            "29": 1982.0,
-            "30": 1981.0,
-            "31": 1688.0,
-            "32": 1913.0,
-            "33": 2123.0,
-            "34": 1893.0,
-            "35": 2007.0,
-            "36": 1987.0,
-            "37": 2334.0,
-            "38": 2223.0,
-            "39": 2417.0,
-            "40": 2370.0,
-            "41": 2352.0,
-            "42": 2269.0,
-            "43": 1967.0,
-            "44": 2183.0,
-            "45": 2150.0,
-            "46": 2350.0,
-            "47": 2555.0,
-            "48": 2463.0,
-            "49": 2326.0,
-            "50": 2270.0,
-            "51": 2508.0,
-            "52": 2495.0,
-            "53": 2856.0,
-            "54": 2692.0,
-            "55": 2482.0,
-            "56": 2614.0,
-            "57": 2283.0,
-            "58": 2894.0,
-            "59": 2659.0,
-            "60": 2561.0,
-            "61": 3006.0,
-            "62": 2671.0,
-            "63": 2488.0,
-            "64": 3092.0,
-            "65": 2622.0,
-            "66": 3108.0,
-            "67": 2741.0,
-            "68": 2942.0,
-            "69": 2983.0,
-            "70": 3347.0,
-            "71": 3034.0,
-            "72": 2438.0,
-            "73": 3075.0,
-            "74": 1931.0,
-            "75": 2722.0,
-            "76": 2960.0,
-            "77": 3387.0,
-            "78": 3268.0,
-            "79": 3079.0,
-            "80": 3404.0,
-            "81": 3674.0,
-            "82": 3192.0,
-            "83": 2791.0,
-            "84": 3224.0,
-            "85": 3237.0,
-            "86": 2646.0,
-            "87": 3840.0,
-            "88": 3114.0,
-            "89": 3410.0,
-            "90": 3184.0,
-            "91": 3073.0,
-            "92": 3396.0,
-            "93": 2711.0,
-            "94": 3530.0,
-            "95": 3387.0,
-            "96": 3530.0,
-            "97": 3277.0,
-            "98": 3775.0,
-            "99": 3421.0,
-            "100": 3350.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1234585088.0,
-            "2": 1234585088.0,
-            "3": 1234585088.0,
-            "4": 1234585088.0,
-            "5": 1234585088.0,
-            "6": 1234585088.0,
-            "7": 1234585088.0,
-            "8": 1234585088.0,
-            "9": 1234585088.0,
-            "10": 1234585088.0,
-            "11": 1234585088.0,
-            "12": 1234585088.0,
-            "13": 1234585088.0,
-            "14": 1234585088.0,
-            "15": 1234585088.0,
-            "16": 1234585088.0,
-            "17": 1234585088.0,
-            "18": 1234585088.0,
-            "19": 1234585088.0,
-            "20": 1234585088.0,
-            "21": 1234585088.0,
-            "22": 1234585088.0,
-            "23": 1234585088.0,
-            "24": 1234585088.0,
-            "25": 1234585088.0,
-            "26": 1234585088.0,
-            "27": 1234585088.0,
-            "28": 1234585088.0,
-            "29": 1234585088.0,
-            "30": 1234585088.0,
-            "31": 1234585088.0,
-            "32": 1234585088.0,
-            "33": 1234585088.0,
-            "34": 1234585088.0,
-            "35": 1234585088.0,
-            "36": 1234585088.0,
-            "37": 1234585088.0,
-            "38": 1234585088.0,
-            "39": 1234585088.0,
-            "40": 1234585088.0,
-            "41": 1234585088.0,
-            "42": 1234585088.0,
-            "43": 1234585088.0,
-            "44": 1234585088.0,
-            "45": 1234585088.0,
-            "46": 1234585088.0,
-            "47": 1234585088.0,
-            "48": 1234585088.0,
-            "49": 1234585088.0,
-            "50": 1234585088.0,
-            "51": 1234585088.0,
-            "52": 1234585088.0,
-            "53": 1234585088.0,
-            "54": 1234585088.0,
-            "55": 1234585088.0,
-            "56": 1234585088.0,
-            "57": 1234585088.0,
-            "58": 1234585088.0,
-            "59": 1234585088.0,
-            "60": 1234585088.0,
-            "61": 1234585088.0,
-            "62": 1234585088.0,
-            "63": 1234585088.0,
-            "64": 1234585088.0,
-            "65": 1234585088.0,
-            "66": 1234585088.0,
-            "67": 1234585088.0,
-            "68": 1234585088.0,
-            "69": 1234585088.0,
-            "70": 1234585088.0,
-            "71": 1234585088.0,
-            "72": 1234585088.0,
-            "73": 1234585088.0,
-            "74": 1234585088.0,
-            "75": 1234585088.0,
-            "76": 1234585088.0,
-            "77": 1234585088.0,
-            "78": 1234585088.0,
-            "79": 1234585088.0,
-            "80": 1234585088.0,
-            "81": 1234585088.0,
-            "82": 1234585088.0,
-            "83": 1234585088.0,
-            "84": 1234585088.0,
-            "85": 1234585088.0,
-            "86": 1234585088.0,
-            "87": 1234585088.0,
-            "88": 1234585088.0,
-            "89": 1234585088.0,
-            "90": 1234585088.0,
-            "91": 1234585088.0,
-            "92": 1234585088.0,
-            "93": 1234585088.0,
-            "94": 1234585088.0,
-            "95": 1234585088.0,
-            "96": 1234585088.0,
-            "97": 1234585088.0,
-            "98": 1234585088.0,
-            "99": 1234585088.0,
-            "100": 1234585088.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1984492544.0,
-            "2": 2536167424.0,
-            "3": 2536167424.0,
-            "4": 2536167424.0,
-            "5": 2536167424.0,
-            "6": 2536167424.0,
-            "7": 2536167424.0,
-            "8": 2536167424.0,
-            "9": 2536167424.0,
-            "10": 2536167424.0,
-            "11": 2536167424.0,
-            "12": 2536167424.0,
-            "13": 2536167424.0,
-            "14": 2536167424.0,
-            "15": 2536167424.0,
-            "16": 2536167424.0,
-            "17": 2536167424.0,
-            "18": 2536167424.0,
-            "19": 2536167424.0,
-            "20": 2536167424.0,
-            "21": 2536167424.0,
-            "22": 2536167424.0,
-            "23": 2536167424.0,
-            "24": 2536167424.0,
-            "25": 2536167424.0,
-            "26": 2536167424.0,
-            "27": 2536167424.0,
-            "28": 2536167424.0,
-            "29": 2536167424.0,
-            "30": 2536167424.0,
-            "31": 2536167424.0,
-            "32": 2536167424.0,
-            "33": 2536167424.0,
-            "34": 2536167424.0,
-            "35": 2536167424.0,
-            "36": 2536167424.0,
-            "37": 2536167424.0,
-            "38": 2536167424.0,
-            "39": 2536167424.0,
-            "40": 2536167424.0,
-            "41": 2536167424.0,
-            "42": 2536167424.0,
-            "43": 2536167424.0,
-            "44": 2536167424.0,
-            "45": 2536167424.0,
-            "46": 2536167424.0,
-            "47": 2536167424.0,
-            "48": 2536167424.0,
-            "49": 2536167424.0,
-            "50": 2536167424.0,
-            "51": 2536167424.0,
-            "52": 2536167424.0,
-            "53": 2536167424.0,
-            "54": 2536167424.0,
-            "55": 2536167424.0,
-            "56": 2536167424.0,
-            "57": 2536167424.0,
-            "58": 2536167424.0,
-            "59": 2536167424.0,
-            "60": 2536167424.0,
-            "61": 2536167424.0,
-            "62": 2536167424.0,
-            "63": 2536167424.0,
-            "64": 2536167424.0,
-            "65": 2536167424.0,
-            "66": 2536167424.0,
-            "67": 2536167424.0,
-            "68": 2536167424.0,
-            "69": 2536167424.0,
-            "70": 2536167424.0,
-            "71": 2536167424.0,
-            "72": 2536167424.0,
-            "73": 2536167424.0,
-            "74": 2536167424.0,
-            "75": 2536167424.0,
-            "76": 2536167424.0,
-            "77": 2536167424.0,
-            "78": 2536167424.0,
-            "79": 2536167424.0,
-            "80": 2536167424.0,
-            "81": 2536167424.0,
-            "82": 2536167424.0,
-            "83": 2536167424.0,
-            "84": 2536167424.0,
-            "85": 2536167424.0,
-            "86": 2536167424.0,
-            "87": 2536167424.0,
-            "88": 2536167424.0,
-            "89": 2536167424.0,
-            "90": 2536167424.0,
-            "91": 2536167424.0,
-            "92": 2536167424.0,
-            "93": 2536167424.0,
-            "94": 2536167424.0,
-            "95": 2536167424.0,
-            "96": 2536167424.0,
-            "97": 2536167424.0,
-            "98": 2536167424.0,
-            "99": 2536167424.0,
-            "100": 2536167424.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 3.55988,
-            "2": 0.14641,
-            "3": 0.12458,
-            "4": 0.12359,
-            "5": 0.12323,
-            "6": 0.12296,
-            "7": 0.12273,
-            "8": 0.12322,
-            "9": 0.12253,
-            "10": 0.12257,
-            "11": 0.12186,
-            "12": 0.12242,
-            "13": 0.12302,
-            "14": 0.12233,
-            "15": 0.12201,
-            "16": 0.12138,
-            "17": 0.12155,
-            "18": 0.12113,
-            "19": 0.12159,
-            "20": 0.12132,
-            "21": 0.12202,
-            "22": 0.12206,
-            "23": 0.12186,
-            "24": 0.12276,
-            "25": 0.12238,
-            "26": 0.122,
-            "27": 0.12177,
-            "28": 0.12203,
-            "29": 0.12255,
-            "30": 0.12285,
-            "31": 0.1224,
-            "32": 0.12276,
-            "33": 0.12359,
-            "34": 0.12194,
-            "35": 0.12249,
-            "36": 0.12276,
-            "37": 0.12249,
-            "38": 0.12249,
-            "39": 0.12333,
-            "40": 0.12327,
-            "41": 0.12316,
-            "42": 0.12307,
-            "43": 0.12249,
-            "44": 0.12267,
-            "45": 0.12282,
-            "46": 0.12405,
-            "47": 0.12264,
-            "48": 0.12412,
-            "49": 0.12277,
-            "50": 0.12365,
-            "51": 0.1271,
-            "52": 0.12708,
-            "53": 0.12522,
-            "54": 0.1263,
-            "55": 0.12587,
-            "56": 0.12762,
-            "57": 0.12527,
-            "58": 0.12651,
-            "59": 0.12671,
-            "60": 0.12654,
-            "61": 0.12604,
-            "62": 0.12577,
-            "63": 0.12494,
-            "64": 0.12609,
-            "65": 0.12576,
-            "66": 0.12652,
-            "67": 0.12628,
-            "68": 0.12655,
-            "69": 0.12565,
-            "70": 0.12576,
-            "71": 0.12521,
-            "72": 0.12593,
-            "73": 0.12578,
-            "74": 0.12645,
-            "75": 0.12537,
-            "76": 0.12616,
-            "77": 0.12525,
-            "78": 0.12803,
-            "79": 0.1252,
-            "80": 0.12678,
-            "81": 0.12525,
-            "82": 0.12597,
-            "83": 0.12596,
-            "84": 0.12603,
-            "85": 0.1257,
-            "86": 0.12623,
-            "87": 0.12511,
-            "88": 0.12609,
-            "89": 0.12568,
-            "90": 0.12585,
-            "91": 0.12495,
-            "92": 0.12654,
-            "93": 0.12549,
-            "94": 0.12609,
-            "95": 0.12518,
-            "96": 0.12593,
-            "97": 0.12598,
-            "98": 0.12611,
-            "99": 0.12441,
-            "100": 0.12715
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_lts_dgx_a100.json
index bd7ca46935f..f24760a1ccb 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_lts_dgx_a100.json
@@ -1,537 +1 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.84092,
-            "2": 10.83661,
-            "3": 10.83233,
-            "4": 10.81819,
-            "5": 10.84059,
-            "6": 10.86985,
-            "7": 10.83324,
-            "8": 10.83877,
-            "9": 10.84355,
-            "10": 10.80969,
-            "11": 10.85186,
-            "12": 10.84449,
-            "13": 10.86322,
-            "14": 10.86353,
-            "15": 10.79981,
-            "16": 10.79262,
-            "17": 10.77477,
-            "18": 10.80157,
-            "19": 10.79148,
-            "20": 10.70508,
-            "21": 10.68176,
-            "22": 10.56548,
-            "23": 10.70147,
-            "24": 10.57889,
-            "25": 10.53597,
-            "26": 10.60742,
-            "27": 10.59423,
-            "28": 10.56119,
-            "29": 10.57569,
-            "30": 10.35474,
-            "31": 10.12616,
-            "32": 10.46566,
-            "33": 10.45233,
-            "34": 10.22493,
-            "35": 10.27091,
-            "36": 10.22168,
-            "37": 10.33936,
-            "38": 10.18641,
-            "39": 10.39431,
-            "40": 10.07792,
-            "41": 10.13872,
-            "42": 10.20182,
-            "43": 9.83818,
-            "44": 9.94274,
-            "45": 9.82303,
-            "46": 9.82185,
-            "47": 10.13443,
-            "48": 9.84098,
-            "49": 9.52095,
-            "50": 9.90109,
-            "51": 9.83457,
-            "52": 9.73232,
-            "53": 10.0488,
-            "54": 9.93895,
-            "55": 9.863,
-            "56": 9.613,
-            "57": 9.46966,
-            "58": 9.81135,
-            "59": 9.57107,
-            "60": 9.48155,
-            "61": 9.6788,
-            "62": 9.96581,
-            "63": 9.35273,
-            "64": 9.75648,
-            "65": 8.93771,
-            "66": 9.68153,
-            "67": 9.35671,
-            "68": 9.76807,
-            "69": 9.7739,
-            "70": 9.71016,
-            "71": 9.60009,
-            "72": 9.56793,
-            "73": 9.4774,
-            "74": 8.93177,
-            "75": 9.4072,
-            "76": 9.06849,
-            "77": 10.0464,
-            "78": 9.70988,
-            "79": 9.35733,
-            "80": 9.38975,
-            "81": 9.4662,
-            "82": 9.68058,
-            "83": 9.2914,
-            "84": 9.40191,
-            "85": 9.59735,
-            "86": 9.06209,
-            "87": 9.57922,
-            "88": 9.73259,
-            "89": 9.58836,
-            "90": 9.80354,
-            "91": 9.31991,
-            "92": 9.35011,
-            "93": 9.06382,
-            "94": 8.81909,
-            "95": 9.50568,
-            "96": 9.51071,
-            "97": 9.29241,
-            "98": 9.65578,
-            "99": 8.87401,
-            "100": 9.38833
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1628.0,
-            "2": 1744.0,
-            "3": 1662.0,
-            "4": 1801.0,
-            "5": 1937.0,
-            "6": 1866.0,
-            "7": 1855.0,
-            "8": 1584.0,
-            "9": 1901.0,
-            "10": 1325.0,
-            "11": 1966.0,
-            "12": 1767.0,
-            "13": 1876.0,
-            "14": 1881.0,
-            "15": 1928.0,
-            "16": 1875.0,
-            "17": 1790.0,
-            "18": 1736.0,
-            "19": 1804.0,
-            "20": 1690.0,
-            "21": 2008.0,
-            "22": 1765.0,
-            "23": 2073.0,
-            "24": 1618.0,
-            "25": 1720.0,
-            "26": 1807.0,
-            "27": 1861.0,
-            "28": 2026.0,
-            "29": 1982.0,
-            "30": 1981.0,
-            "31": 1688.0,
-            "32": 1913.0,
-            "33": 2123.0,
-            "34": 1893.0,
-            "35": 2007.0,
-            "36": 1987.0,
-            "37": 2334.0,
-            "38": 2223.0,
-            "39": 2417.0,
-            "40": 2370.0,
-            "41": 2352.0,
-            "42": 2269.0,
-            "43": 1967.0,
-            "44": 2183.0,
-            "45": 2150.0,
-            "46": 2350.0,
-            "47": 2555.0,
-            "48": 2463.0,
-            "49": 2326.0,
-            "50": 2270.0,
-            "51": 2508.0,
-            "52": 2495.0,
-            "53": 2856.0,
-            "54": 2692.0,
-            "55": 2482.0,
-            "56": 2614.0,
-            "57": 2283.0,
-            "58": 2894.0,
-            "59": 2659.0,
-            "60": 2561.0,
-            "61": 3006.0,
-            "62": 2671.0,
-            "63": 2488.0,
-            "64": 3092.0,
-            "65": 2622.0,
-            "66": 3108.0,
-            "67": 2741.0,
-            "68": 2942.0,
-            "69": 2983.0,
-            "70": 3347.0,
-            "71": 3034.0,
-            "72": 2438.0,
-            "73": 3075.0,
-            "74": 1931.0,
-            "75": 2722.0,
-            "76": 2960.0,
-            "77": 3387.0,
-            "78": 3268.0,
-            "79": 3079.0,
-            "80": 3404.0,
-            "81": 3674.0,
-            "82": 3192.0,
-            "83": 2791.0,
-            "84": 3224.0,
-            "85": 3237.0,
-            "86": 2646.0,
-            "87": 3840.0,
-            "88": 3114.0,
-            "89": 3410.0,
-            "90": 3184.0,
-            "91": 3073.0,
-            "92": 3396.0,
-            "93": 2711.0,
-            "94": 3530.0,
-            "95": 3387.0,
-            "96": 3530.0,
-            "97": 3277.0,
-            "98": 3775.0,
-            "99": 3421.0,
-            "100": 3350.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1234585088.0,
-            "2": 1234585088.0,
-            "3": 1234585088.0,
-            "4": 1234585088.0,
-            "5": 1234585088.0,
-            "6": 1234585088.0,
-            "7": 1234585088.0,
-            "8": 1234585088.0,
-            "9": 1234585088.0,
-            "10": 1234585088.0,
-            "11": 1234585088.0,
-            "12": 1234585088.0,
-            "13": 1234585088.0,
-            "14": 1234585088.0,
-            "15": 1234585088.0,
-            "16": 1234585088.0,
-            "17": 1234585088.0,
-            "18": 1234585088.0,
-            "19": 1234585088.0,
-            "20": 1234585088.0,
-            "21": 1234585088.0,
-            "22": 1234585088.0,
-            "23": 1234585088.0,
-            "24": 1234585088.0,
-            "25": 1234585088.0,
-            "26": 1234585088.0,
-            "27": 1234585088.0,
-            "28": 1234585088.0,
-            "29": 1234585088.0,
-            "30": 1234585088.0,
-            "31": 1234585088.0,
-            "32": 1234585088.0,
-            "33": 1234585088.0,
-            "34": 1234585088.0,
-            "35": 1234585088.0,
-            "36": 1234585088.0,
-            "37": 1234585088.0,
-            "38": 1234585088.0,
-            "39": 1234585088.0,
-            "40": 1234585088.0,
-            "41": 1234585088.0,
-            "42": 1234585088.0,
-            "43": 1234585088.0,
-            "44": 1234585088.0,
-            "45": 1234585088.0,
-            "46": 1234585088.0,
-            "47": 1234585088.0,
-            "48": 1234585088.0,
-            "49": 1234585088.0,
-            "50": 1234585088.0,
-            "51": 1234585088.0,
-            "52": 1234585088.0,
-            "53": 1234585088.0,
-            "54": 1234585088.0,
-            "55": 1234585088.0,
-            "56": 1234585088.0,
-            "57": 1234585088.0,
-            "58": 1234585088.0,
-            "59": 1234585088.0,
-            "60": 1234585088.0,
-            "61": 1234585088.0,
-            "62": 1234585088.0,
-            "63": 1234585088.0,
-            "64": 1234585088.0,
-            "65": 1234585088.0,
-            "66": 1234585088.0,
-            "67": 1234585088.0,
-            "68": 1234585088.0,
-            "69": 1234585088.0,
-            "70": 1234585088.0,
-            "71": 1234585088.0,
-            "72": 1234585088.0,
-            "73": 1234585088.0,
-            "74": 1234585088.0,
-            "75": 1234585088.0,
-            "76": 1234585088.0,
-            "77": 1234585088.0,
-            "78": 1234585088.0,
-            "79": 1234585088.0,
-            "80": 1234585088.0,
-            "81": 1234585088.0,
-            "82": 1234585088.0,
-            "83": 1234585088.0,
-            "84": 1234585088.0,
-            "85": 1234585088.0,
-            "86": 1234585088.0,
-            "87": 1234585088.0,
-            "88": 1234585088.0,
-            "89": 1234585088.0,
-            "90": 1234585088.0,
-            "91": 1234585088.0,
-            "92": 1234585088.0,
-            "93": 1234585088.0,
-            "94": 1234585088.0,
-            "95": 1234585088.0,
-            "96": 1234585088.0,
-            "97": 1234585088.0,
-            "98": 1234585088.0,
-            "99": 1234585088.0,
-            "100": 1234585088.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1984492544.0,
-            "2": 2536167424.0,
-            "3": 2536167424.0,
-            "4": 2536167424.0,
-            "5": 2536167424.0,
-            "6": 2536167424.0,
-            "7": 2536167424.0,
-            "8": 2536167424.0,
-            "9": 2536167424.0,
-            "10": 2536167424.0,
-            "11": 2536167424.0,
-            "12": 2536167424.0,
-            "13": 2536167424.0,
-            "14": 2536167424.0,
-            "15": 2536167424.0,
-            "16": 2536167424.0,
-            "17": 2536167424.0,
-            "18": 2536167424.0,
-            "19": 2536167424.0,
-            "20": 2536167424.0,
-            "21": 2536167424.0,
-            "22": 2536167424.0,
-            "23": 2536167424.0,
-            "24": 2536167424.0,
-            "25": 2536167424.0,
-            "26": 2536167424.0,
-            "27": 2536167424.0,
-            "28": 2536167424.0,
-            "29": 2536167424.0,
-            "30": 2536167424.0,
-            "31": 2536167424.0,
-            "32": 2536167424.0,
-            "33": 2536167424.0,
-            "34": 2536167424.0,
-            "35": 2536167424.0,
-            "36": 2536167424.0,
-            "37": 2536167424.0,
-            "38": 2536167424.0,
-            "39": 2536167424.0,
-            "40": 2536167424.0,
-            "41": 2536167424.0,
-            "42": 2536167424.0,
-            "43": 2536167424.0,
-            "44": 2536167424.0,
-            "45": 2536167424.0,
-            "46": 2536167424.0,
-            "47": 2536167424.0,
-            "48": 2536167424.0,
-            "49": 2536167424.0,
-            "50": 2536167424.0,
-            "51": 2536167424.0,
-            "52": 2536167424.0,
-            "53": 2536167424.0,
-            "54": 2536167424.0,
-            "55": 2536167424.0,
-            "56": 2536167424.0,
-            "57": 2536167424.0,
-            "58": 2536167424.0,
-            "59": 2536167424.0,
-            "60": 2536167424.0,
-            "61": 2536167424.0,
-            "62": 2536167424.0,
-            "63": 2536167424.0,
-            "64": 2536167424.0,
-            "65": 2536167424.0,
-            "66": 2536167424.0,
-            "67": 2536167424.0,
-            "68": 2536167424.0,
-            "69": 2536167424.0,
-            "70": 2536167424.0,
-            "71": 2536167424.0,
-            "72": 2536167424.0,
-            "73": 2536167424.0,
-            "74": 2536167424.0,
-            "75": 2536167424.0,
-            "76": 2536167424.0,
-            "77": 2536167424.0,
-            "78": 2536167424.0,
-            "79": 2536167424.0,
-            "80": 2536167424.0,
-            "81": 2536167424.0,
-            "82": 2536167424.0,
-            "83": 2536167424.0,
-            "84": 2536167424.0,
-            "85": 2536167424.0,
-            "86": 2536167424.0,
-            "87": 2536167424.0,
-            "88": 2536167424.0,
-            "89": 2536167424.0,
-            "90": 2536167424.0,
-            "91": 2536167424.0,
-            "92": 2536167424.0,
-            "93": 2536167424.0,
-            "94": 2536167424.0,
-            "95": 2536167424.0,
-            "96": 2536167424.0,
-            "97": 2536167424.0,
-            "98": 2536167424.0,
-            "99": 2536167424.0,
-            "100": 2536167424.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 3.43734,
-            "2": 0.14648,
-            "3": 0.12542,
-            "4": 0.12603,
-            "5": 0.12388,
-            "6": 0.12524,
-            "7": 0.12279,
-            "8": 0.1239,
-            "9": 0.12244,
-            "10": 0.12336,
-            "11": 0.12345,
-            "12": 0.12322,
-            "13": 0.12318,
-            "14": 0.12381,
-            "15": 0.12343,
-            "16": 0.12319,
-            "17": 0.12276,
-            "18": 0.12324,
-            "19": 0.12355,
-            "20": 0.12315,
-            "21": 0.12294,
-            "22": 0.12326,
-            "23": 0.12303,
-            "24": 0.12294,
-            "25": 0.12286,
-            "26": 0.12388,
-            "27": 0.12341,
-            "28": 0.12301,
-            "29": 0.12267,
-            "30": 0.12315,
-            "31": 0.12371,
-            "32": 0.12359,
-            "33": 0.12298,
-            "34": 0.12283,
-            "35": 0.12266,
-            "36": 0.12356,
-            "37": 0.12377,
-            "38": 0.12388,
-            "39": 0.12525,
-            "40": 0.12501,
-            "41": 0.12357,
-            "42": 0.12376,
-            "43": 0.12304,
-            "44": 0.12342,
-            "45": 0.12284,
-            "46": 0.12332,
-            "47": 0.12324,
-            "48": 0.12279,
-            "49": 0.12276,
-            "50": 0.12391,
-            "51": 0.12862,
-            "52": 0.12214,
-            "53": 0.12006,
-            "54": 0.12101,
-            "55": 0.12062,
-            "56": 0.12088,
-            "57": 0.121,
-            "58": 0.12034,
-            "59": 0.12049,
-            "60": 0.12066,
-            "61": 0.11974,
-            "62": 0.11979,
-            "63": 0.12196,
-            "64": 0.12149,
-            "65": 0.12119,
-            "66": 0.12067,
-            "67": 0.12079,
-            "68": 0.12104,
-            "69": 0.12025,
-            "70": 0.12059,
-            "71": 0.12069,
-            "72": 0.12102,
-            "73": 0.12115,
-            "74": 0.1208,
-            "75": 0.12051,
-            "76": 0.12011,
-            "77": 0.11958,
-            "78": 0.12095,
-            "79": 0.11983,
-            "80": 0.12106,
-            "81": 0.1203,
-            "82": 0.12062,
-            "83": 0.12021,
-            "84": 0.12036,
-            "85": 0.12053,
-            "86": 0.12119,
-            "87": 0.12057,
-            "88": 0.12092,
-            "89": 0.12271,
-            "90": 0.12095,
-            "91": 0.1204,
-            "92": 0.12052,
-            "93": 0.12075,
-            "94": 0.12025,
-            "95": 0.12129,
-            "96": 0.12087,
-            "97": 0.12098,
-            "98": 0.12136,
-            "99": 0.12046,
-            "100": 0.12064
-        }
-    }
-}
\ No newline at end of file
+{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.8401, "5": 10.84032, "10": 10.81341, "15": 10.80278, "20": 10.70496, "25": 10.53846, "30": 10.35517, "35": 10.27147, "40": 10.08044, "45": 9.82292, "50": 9.90113, "55": 9.86422, "60": 9.48029, "65": 8.93749, "70": 9.71025, "75": 9.40879, "80": 9.39077, "85": 9.59743, "90": 9.80386, "95": 9.50565, "100": 9.38812}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1670.0, "5": 1970.0, "10": 1436.0, "15": 1918.0, "20": 1786.0, "25": 1610.0, "30": 2039.0, "35": 2001.0, "40": 2321.0, "45": 2205.0, "50": 2365.0, "55": 2489.0, "60": 2508.0, "65": 2719.0, "70": 3241.0, "75": 2643.0, "80": 3368.0, "85": 3336.0, "90": 2961.0, "95": 3533.0, "100": 3432.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1230390272.0, "5": 1230390272.0, "10": 1230390272.0, "15": 1230390272.0, "20": 1230390272.0, "25": 1230390272.0, "30": 1230390272.0, "35": 1230390272.0, "40": 1230390272.0, "45": 1230390272.0, "50": 1230390272.0, "55": 1230390272.0, "60": 1230390272.0, "65": 1230390272.0, "70": 1230390272.0, "75": 1230390272.0, "80": 1230390272.0, "85": 1230390272.0, "90": 1230390272.0, "95": 1230390272.0, "100": 1230390272.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1984492032.0, "5": 2531972608.0, "10": 2531972608.0, "15": 2531972608.0, "20": 2531972608.0, "25": 2531972608.0, "30": 2531972608.0, "35": 2531972608.0, "40": 2531972608.0, "45": 2531972608.0, "50": 2531972608.0, "55": 2531972608.0, "60": 2531972608.0, "65": 2531972608.0, "70": 2531972608.0, "75": 2531972608.0, "80": 2531972608.0, "85": 2531972608.0, "90": 2531972608.0, "95": 2531972608.0, "100": 2531972608.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 6.64146, "5": 0.13017, "10": 0.12892, "15": 0.12866, "20": 0.12752, "25": 0.12949, "30": 0.12766, "35": 0.12823, "40": 0.12721, "45": 0.12802, "50": 0.12653, "55": 0.13387, "60": 0.12431, "65": 0.12501, "70": 0.12535, "75": 0.12473, "80": 0.12431, "85": 0.12451, "90": 0.12436, "95": 0.12406, "100": 0.12423}}}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_lts.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_lts.json
deleted file mode 100644
index 02ac3f23ca4..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_lts.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.84865,
-            "2": 10.84835,
-            "3": 10.84054,
-            "4": 10.82381,
-            "5": 10.86283,
-            "6": 10.87554,
-            "7": 10.87089,
-            "8": 10.85554,
-            "9": 10.86811,
-            "10": 10.82287,
-            "11": 10.90334,
-            "12": 10.87985,
-            "13": 10.8831,
-            "14": 10.89653,
-            "15": 10.81406,
-            "16": 10.81973,
-            "17": 10.79918,
-            "18": 10.82428,
-            "19": 10.8195,
-            "20": 10.71958,
-            "21": 10.67864,
-            "22": 10.53625,
-            "23": 10.72125,
-            "24": 10.57435,
-            "25": 10.52997,
-            "26": 10.60096,
-            "27": 10.61502,
-            "28": 10.57231,
-            "29": 10.58614,
-            "30": 10.33749,
-            "31": 10.06516,
-            "32": 10.46436,
-            "33": 10.43612,
-            "34": 10.17278,
-            "35": 10.24173,
-            "36": 10.19042,
-            "37": 10.32282,
-            "38": 10.14881,
-            "39": 10.37709,
-            "40": 10.05124,
-            "41": 10.11355,
-            "42": 10.17253,
-            "43": 9.76298,
-            "44": 9.89293,
-            "45": 9.7664,
-            "46": 9.7601,
-            "47": 10.09424,
-            "48": 9.78753,
-            "49": 9.454,
-            "50": 9.8548,
-            "51": 9.79157,
-            "52": 9.68731,
-            "53": 10.02181,
-            "54": 9.90398,
-            "55": 9.82389,
-            "56": 9.57081,
-            "57": 9.40818,
-            "58": 9.77678,
-            "59": 9.52729,
-            "60": 9.44284,
-            "61": 9.64071,
-            "62": 9.94046,
-            "63": 9.31099,
-            "64": 9.72506,
-            "65": 8.8916,
-            "66": 9.6525,
-            "67": 9.31718,
-            "68": 9.73957,
-            "69": 9.74304,
-            "70": 9.67942,
-            "71": 9.56228,
-            "72": 9.53149,
-            "73": 9.44531,
-            "74": 8.88431,
-            "75": 9.3677,
-            "76": 9.02482,
-            "77": 10.01647,
-            "78": 9.6813,
-            "79": 9.32719,
-            "80": 9.3577,
-            "81": 9.43335,
-            "82": 9.64804,
-            "83": 9.25573,
-            "84": 9.36738,
-            "85": 9.56091,
-            "86": 9.03567,
-            "87": 9.54622,
-            "88": 9.70041,
-            "89": 9.54992,
-            "90": 9.77126,
-            "91": 9.28801,
-            "92": 9.31055,
-            "93": 9.03195,
-            "94": 8.78121,
-            "95": 9.48115,
-            "96": 9.4759,
-            "97": 9.2489,
-            "98": 9.61705,
-            "99": 8.8368,
-            "100": 9.35043
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1658.0,
-            "2": 1892.0,
-            "3": 1844.0,
-            "4": 1851.0,
-            "5": 1895.0,
-            "6": 2058.0,
-            "7": 1902.0,
-            "8": 1808.0,
-            "9": 1894.0,
-            "10": 1583.0,
-            "11": 2044.0,
-            "12": 1889.0,
-            "13": 2101.0,
-            "14": 2062.0,
-            "15": 1981.0,
-            "16": 1922.0,
-            "17": 1845.0,
-            "18": 1842.0,
-            "19": 1890.0,
-            "20": 1769.0,
-            "21": 2048.0,
-            "22": 1800.0,
-            "23": 2134.0,
-            "24": 1822.0,
-            "25": 1861.0,
-            "26": 1864.0,
-            "27": 1886.0,
-            "28": 2050.0,
-            "29": 2009.0,
-            "30": 2118.0,
-            "31": 1688.0,
-            "32": 2112.0,
-            "33": 2153.0,
-            "34": 1963.0,
-            "35": 2014.0,
-            "36": 2023.0,
-            "37": 2222.0,
-            "38": 2239.0,
-            "39": 2367.0,
-            "40": 2161.0,
-            "41": 2422.0,
-            "42": 2240.0,
-            "43": 2116.0,
-            "44": 2343.0,
-            "45": 2127.0,
-            "46": 2189.0,
-            "47": 2411.0,
-            "48": 2347.0,
-            "49": 2271.0,
-            "50": 2345.0,
-            "51": 2482.0,
-            "52": 2570.0,
-            "53": 2835.0,
-            "54": 2589.0,
-            "55": 2450.0,
-            "56": 2744.0,
-            "57": 2429.0,
-            "58": 2684.0,
-            "59": 2748.0,
-            "60": 2464.0,
-            "61": 2995.0,
-            "62": 2518.0,
-            "63": 2570.0,
-            "64": 2843.0,
-            "65": 2648.0,
-            "66": 2842.0,
-            "67": 2954.0,
-            "68": 2833.0,
-            "69": 3027.0,
-            "70": 2993.0,
-            "71": 3010.0,
-            "72": 2597.0,
-            "73": 3002.0,
-            "74": 2325.0,
-            "75": 2882.0,
-            "76": 3143.0,
-            "77": 3062.0,
-            "78": 3272.0,
-            "79": 3303.0,
-            "80": 3280.0,
-            "81": 3517.0,
-            "82": 3283.0,
-            "83": 2834.0,
-            "84": 3365.0,
-            "85": 3288.0,
-            "86": 2562.0,
-            "87": 3493.0,
-            "88": 3388.0,
-            "89": 3102.0,
-            "90": 3230.0,
-            "91": 3154.0,
-            "92": 3263.0,
-            "93": 2967.0,
-            "94": 3520.0,
-            "95": 3175.0,
-            "96": 3317.0,
-            "97": 2999.0,
-            "98": 3549.0,
-            "99": 3248.0,
-            "100": 3227.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 892293120.0,
-            "2": 892293120.0,
-            "3": 892293120.0,
-            "4": 892293120.0,
-            "5": 892293120.0,
-            "6": 892293120.0,
-            "7": 892293120.0,
-            "8": 892293120.0,
-            "9": 892293120.0,
-            "10": 892293120.0,
-            "11": 892293120.0,
-            "12": 892293120.0,
-            "13": 892293120.0,
-            "14": 892293120.0,
-            "15": 892293120.0,
-            "16": 892293120.0,
-            "17": 892293120.0,
-            "18": 892293120.0,
-            "19": 892293120.0,
-            "20": 892293120.0,
-            "21": 892293120.0,
-            "22": 892293120.0,
-            "23": 892293120.0,
-            "24": 892293120.0,
-            "25": 892293120.0,
-            "26": 892293120.0,
-            "27": 892293120.0,
-            "28": 892293120.0,
-            "29": 892293120.0,
-            "30": 892293120.0,
-            "31": 892293120.0,
-            "32": 892293120.0,
-            "33": 892293120.0,
-            "34": 892293120.0,
-            "35": 892293120.0,
-            "36": 892293120.0,
-            "37": 892293120.0,
-            "38": 892293120.0,
-            "39": 892293120.0,
-            "40": 892293120.0,
-            "41": 892293120.0,
-            "42": 892293120.0,
-            "43": 892293120.0,
-            "44": 892293120.0,
-            "45": 892293120.0,
-            "46": 892293120.0,
-            "47": 892293120.0,
-            "48": 892293120.0,
-            "49": 892293120.0,
-            "50": 892293120.0,
-            "51": 892293120.0,
-            "52": 892293120.0,
-            "53": 892293120.0,
-            "54": 892293120.0,
-            "55": 892293120.0,
-            "56": 892293120.0,
-            "57": 892293120.0,
-            "58": 892293120.0,
-            "59": 892293120.0,
-            "60": 892293120.0,
-            "61": 892293120.0,
-            "62": 892293120.0,
-            "63": 892293120.0,
-            "64": 892293120.0,
-            "65": 892293120.0,
-            "66": 892293120.0,
-            "67": 892293120.0,
-            "68": 892293120.0,
-            "69": 892293120.0,
-            "70": 892293120.0,
-            "71": 892293120.0,
-            "72": 892293120.0,
-            "73": 892293120.0,
-            "74": 892293120.0,
-            "75": 892293120.0,
-            "76": 892293120.0,
-            "77": 892293120.0,
-            "78": 892293120.0,
-            "79": 892293120.0,
-            "80": 892293120.0,
-            "81": 892293120.0,
-            "82": 892293120.0,
-            "83": 892293120.0,
-            "84": 892293120.0,
-            "85": 892293120.0,
-            "86": 892293120.0,
-            "87": 892293120.0,
-            "88": 892293120.0,
-            "89": 892293120.0,
-            "90": 892293120.0,
-            "91": 892293120.0,
-            "92": 892293120.0,
-            "93": 892293120.0,
-            "94": 892293120.0,
-            "95": 892293120.0,
-            "96": 892293120.0,
-            "97": 892293120.0,
-            "98": 892293120.0,
-            "99": 892293120.0,
-            "100": 892293120.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 2236674048.0,
-            "2": 2599285760.0,
-            "3": 2599285760.0,
-            "4": 2599285760.0,
-            "5": 2599285760.0,
-            "6": 2599285760.0,
-            "7": 2599285760.0,
-            "8": 2599285760.0,
-            "9": 2599285760.0,
-            "10": 2599285760.0,
-            "11": 2599285760.0,
-            "12": 2599285760.0,
-            "13": 2599285760.0,
-            "14": 2599285760.0,
-            "15": 2599285760.0,
-            "16": 2599285760.0,
-            "17": 2599285760.0,
-            "18": 2599285760.0,
-            "19": 2599285760.0,
-            "20": 2599285760.0,
-            "21": 2599285760.0,
-            "22": 2599285760.0,
-            "23": 2599285760.0,
-            "24": 2599285760.0,
-            "25": 2599285760.0,
-            "26": 2599285760.0,
-            "27": 2599285760.0,
-            "28": 2599285760.0,
-            "29": 2599285760.0,
-            "30": 2599285760.0,
-            "31": 2599285760.0,
-            "32": 2599285760.0,
-            "33": 2599285760.0,
-            "34": 2599285760.0,
-            "35": 2599285760.0,
-            "36": 2599285760.0,
-            "37": 2599285760.0,
-            "38": 2599285760.0,
-            "39": 2599285760.0,
-            "40": 2599285760.0,
-            "41": 2599285760.0,
-            "42": 2599285760.0,
-            "43": 2599285760.0,
-            "44": 2599285760.0,
-            "45": 2599285760.0,
-            "46": 2599285760.0,
-            "47": 2599285760.0,
-            "48": 2599285760.0,
-            "49": 2599285760.0,
-            "50": 2599285760.0,
-            "51": 2599285760.0,
-            "52": 2599285760.0,
-            "53": 2599285760.0,
-            "54": 2599285760.0,
-            "55": 2599285760.0,
-            "56": 2599285760.0,
-            "57": 2599285760.0,
-            "58": 2599285760.0,
-            "59": 2599285760.0,
-            "60": 2599285760.0,
-            "61": 2599285760.0,
-            "62": 2599285760.0,
-            "63": 2599285760.0,
-            "64": 2599285760.0,
-            "65": 2599285760.0,
-            "66": 2599285760.0,
-            "67": 2599285760.0,
-            "68": 2599285760.0,
-            "69": 2599285760.0,
-            "70": 2599285760.0,
-            "71": 2599285760.0,
-            "72": 2599285760.0,
-            "73": 2599285760.0,
-            "74": 2599285760.0,
-            "75": 2599285760.0,
-            "76": 2599285760.0,
-            "77": 2599285760.0,
-            "78": 2599285760.0,
-            "79": 2599285760.0,
-            "80": 2599285760.0,
-            "81": 2599285760.0,
-            "82": 2599285760.0,
-            "83": 2599285760.0,
-            "84": 2599285760.0,
-            "85": 2599285760.0,
-            "86": 2599285760.0,
-            "87": 2599285760.0,
-            "88": 2599285760.0,
-            "89": 2599285760.0,
-            "90": 2599285760.0,
-            "91": 2599285760.0,
-            "92": 2599285760.0,
-            "93": 2599285760.0,
-            "94": 2599285760.0,
-            "95": 2599285760.0,
-            "96": 2599285760.0,
-            "97": 2599285760.0,
-            "98": 2599285760.0,
-            "99": 2599285760.0,
-            "100": 2599285760.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 5.33769,
-            "2": 0.18532,
-            "3": 0.14816,
-            "4": 0.14635,
-            "5": 0.14643,
-            "6": 0.14673,
-            "7": 0.1465,
-            "8": 0.14731,
-            "9": 0.14559,
-            "10": 0.1459,
-            "11": 0.14535,
-            "12": 0.14613,
-            "13": 0.14618,
-            "14": 0.14642,
-            "15": 0.1462,
-            "16": 0.14548,
-            "17": 0.14502,
-            "18": 0.1449,
-            "19": 0.145,
-            "20": 0.14696,
-            "21": 0.14496,
-            "22": 0.14536,
-            "23": 0.14489,
-            "24": 0.14464,
-            "25": 0.14477,
-            "26": 0.14552,
-            "27": 0.14534,
-            "28": 0.14506,
-            "29": 0.14487,
-            "30": 0.14509,
-            "31": 0.14642,
-            "32": 0.14499,
-            "33": 0.14538,
-            "34": 0.14461,
-            "35": 0.14541,
-            "36": 0.14464,
-            "37": 0.14539,
-            "38": 0.14569,
-            "39": 0.14515,
-            "40": 0.14583,
-            "41": 0.14836,
-            "42": 0.14589,
-            "43": 0.14625,
-            "44": 0.14559,
-            "45": 0.14588,
-            "46": 0.14644,
-            "47": 0.146,
-            "48": 0.1459,
-            "49": 0.14609,
-            "50": 0.14597,
-            "51": 0.15206,
-            "52": 0.1459,
-            "53": 0.1452,
-            "54": 0.14745,
-            "55": 0.14578,
-            "56": 0.14459,
-            "57": 0.14524,
-            "58": 0.14545,
-            "59": 0.14527,
-            "60": 0.14448,
-            "61": 0.14539,
-            "62": 0.14462,
-            "63": 0.14474,
-            "64": 0.1447,
-            "65": 0.14564,
-            "66": 0.14463,
-            "67": 0.14466,
-            "68": 0.14483,
-            "69": 0.14562,
-            "70": 0.1456,
-            "71": 0.14516,
-            "72": 0.14481,
-            "73": 0.14539,
-            "74": 0.14568,
-            "75": 0.14464,
-            "76": 0.14465,
-            "77": 0.14427,
-            "78": 0.14541,
-            "79": 0.1445,
-            "80": 0.14535,
-            "81": 0.14526,
-            "82": 0.14617,
-            "83": 0.14445,
-            "84": 0.14483,
-            "85": 0.14457,
-            "86": 0.14459,
-            "87": 0.14462,
-            "88": 0.14433,
-            "89": 0.14514,
-            "90": 0.14416,
-            "91": 0.14667,
-            "92": 0.14432,
-            "93": 0.14551,
-            "94": 0.14453,
-            "95": 0.14488,
-            "96": 0.14441,
-            "97": 0.14545,
-            "98": 0.14459,
-            "99": 0.14481,
-            "100": 0.14918
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_lts_dgx_a100.json
index 48aee8d379f..d30c8d5b5e6 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_lts_dgx_a100.json
@@ -2,536 +2,141 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 1,
+        "step_interval": 5,
         "values": {
-            "1": 10.84865,
-            "2": 10.84835,
-            "3": 10.84054,
-            "4": 10.82381,
-            "5": 10.86283,
-            "6": 10.87554,
-            "7": 10.87089,
-            "8": 10.85554,
-            "9": 10.86811,
-            "10": 10.82287,
-            "11": 10.90334,
-            "12": 10.87985,
-            "13": 10.8831,
-            "14": 10.89653,
-            "15": 10.81406,
-            "16": 10.81973,
-            "17": 10.79918,
-            "18": 10.82428,
-            "19": 10.8195,
-            "20": 10.71958,
-            "21": 10.67864,
-            "22": 10.53625,
-            "23": 10.72125,
-            "24": 10.57435,
-            "25": 10.52997,
-            "26": 10.60096,
-            "27": 10.61502,
-            "28": 10.57231,
-            "29": 10.58614,
-            "30": 10.33749,
-            "31": 10.06516,
-            "32": 10.46436,
-            "33": 10.43612,
-            "34": 10.17278,
-            "35": 10.24173,
-            "36": 10.19042,
-            "37": 10.32282,
-            "38": 10.14881,
-            "39": 10.37709,
-            "40": 10.05124,
-            "41": 10.11355,
-            "42": 10.17253,
-            "43": 9.76298,
-            "44": 9.89293,
-            "45": 9.7664,
-            "46": 9.7601,
-            "47": 10.09424,
-            "48": 9.78753,
-            "49": 9.454,
-            "50": 9.8548,
-            "51": 9.79157,
-            "52": 9.68731,
-            "53": 10.02181,
-            "54": 9.90398,
-            "55": 9.82389,
-            "56": 9.57081,
-            "57": 9.40818,
-            "58": 9.77678,
-            "59": 9.52729,
-            "60": 9.44284,
-            "61": 9.64071,
-            "62": 9.94046,
-            "63": 9.31099,
-            "64": 9.72506,
-            "65": 8.8916,
-            "66": 9.6525,
-            "67": 9.31718,
-            "68": 9.73957,
-            "69": 9.74304,
-            "70": 9.67942,
-            "71": 9.56228,
-            "72": 9.53149,
-            "73": 9.44531,
-            "74": 8.88431,
-            "75": 9.3677,
-            "76": 9.02482,
-            "77": 10.01647,
-            "78": 9.6813,
-            "79": 9.32719,
-            "80": 9.3577,
-            "81": 9.43335,
-            "82": 9.64804,
-            "83": 9.25573,
-            "84": 9.36738,
-            "85": 9.56091,
-            "86": 9.03567,
-            "87": 9.54622,
-            "88": 9.70041,
-            "89": 9.54992,
-            "90": 9.77126,
-            "91": 9.28801,
-            "92": 9.31055,
-            "93": 9.03195,
-            "94": 8.78121,
-            "95": 9.48115,
-            "96": 9.4759,
-            "97": 9.2489,
-            "98": 9.61705,
-            "99": 8.8368,
-            "100": 9.35043
+            "1": 10.84474,
+            "5": 10.86418,
+            "10": 10.82155,
+            "15": 10.81195,
+            "20": 10.71872,
+            "25": 10.53036,
+            "30": 10.3358,
+            "35": 10.24081,
+            "40": 10.05008,
+            "45": 9.76762,
+            "50": 9.85505,
+            "55": 9.82465,
+            "60": 9.44306,
+            "65": 8.89104,
+            "70": 9.67902,
+            "75": 9.36836,
+            "80": 9.35799,
+            "85": 9.56032,
+            "90": 9.77055,
+            "95": 9.48101,
+            "100": 9.34997
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 1,
+        "step_interval": 5,
         "values": {
-            "1": 1658.0,
-            "2": 1892.0,
-            "3": 1844.0,
-            "4": 1851.0,
-            "5": 1895.0,
-            "6": 2058.0,
-            "7": 1902.0,
-            "8": 1808.0,
-            "9": 1894.0,
-            "10": 1583.0,
-            "11": 2044.0,
-            "12": 1889.0,
-            "13": 2101.0,
-            "14": 2062.0,
-            "15": 1981.0,
-            "16": 1922.0,
-            "17": 1845.0,
-            "18": 1842.0,
-            "19": 1890.0,
-            "20": 1769.0,
-            "21": 2048.0,
-            "22": 1800.0,
-            "23": 2134.0,
-            "24": 1822.0,
-            "25": 1861.0,
-            "26": 1864.0,
-            "27": 1886.0,
-            "28": 2050.0,
-            "29": 2009.0,
-            "30": 2118.0,
-            "31": 1688.0,
-            "32": 2112.0,
-            "33": 2153.0,
-            "34": 1963.0,
-            "35": 2014.0,
-            "36": 2023.0,
-            "37": 2222.0,
-            "38": 2239.0,
-            "39": 2367.0,
-            "40": 2161.0,
-            "41": 2422.0,
-            "42": 2240.0,
-            "43": 2116.0,
-            "44": 2343.0,
-            "45": 2127.0,
-            "46": 2189.0,
-            "47": 2411.0,
-            "48": 2347.0,
-            "49": 2271.0,
-            "50": 2345.0,
-            "51": 2482.0,
-            "52": 2570.0,
-            "53": 2835.0,
-            "54": 2589.0,
-            "55": 2450.0,
-            "56": 2744.0,
-            "57": 2429.0,
-            "58": 2684.0,
-            "59": 2748.0,
-            "60": 2464.0,
-            "61": 2995.0,
-            "62": 2518.0,
-            "63": 2570.0,
-            "64": 2843.0,
-            "65": 2648.0,
-            "66": 2842.0,
-            "67": 2954.0,
-            "68": 2833.0,
-            "69": 3027.0,
-            "70": 2993.0,
-            "71": 3010.0,
-            "72": 2597.0,
-            "73": 3002.0,
-            "74": 2325.0,
-            "75": 2882.0,
-            "76": 3143.0,
-            "77": 3062.0,
-            "78": 3272.0,
-            "79": 3303.0,
-            "80": 3280.0,
-            "81": 3517.0,
-            "82": 3283.0,
-            "83": 2834.0,
-            "84": 3365.0,
-            "85": 3288.0,
-            "86": 2562.0,
-            "87": 3493.0,
-            "88": 3388.0,
-            "89": 3102.0,
-            "90": 3230.0,
-            "91": 3154.0,
-            "92": 3263.0,
-            "93": 2967.0,
-            "94": 3520.0,
-            "95": 3175.0,
-            "96": 3317.0,
-            "97": 2999.0,
-            "98": 3549.0,
-            "99": 3248.0,
-            "100": 3227.0
+            "1": 1776.0,
+            "5": 2128.0,
+            "10": 1615.0,
+            "15": 2021.0,
+            "20": 1775.0,
+            "25": 1916.0,
+            "30": 2029.0,
+            "35": 2107.0,
+            "40": 2174.0,
+            "45": 2110.0,
+            "50": 2363.0,
+            "55": 2460.0,
+            "60": 2462.0,
+            "65": 2724.0,
+            "70": 2952.0,
+            "75": 2823.0,
+            "80": 3222.0,
+            "85": 3314.0,
+            "90": 3087.0,
+            "95": 3146.0,
+            "100": 3331.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 1,
+        "step_interval": 5,
         "values": {
-            "1": 892293120.0,
-            "2": 892293120.0,
-            "3": 892293120.0,
-            "4": 892293120.0,
-            "5": 892293120.0,
-            "6": 892293120.0,
-            "7": 892293120.0,
-            "8": 892293120.0,
-            "9": 892293120.0,
-            "10": 892293120.0,
-            "11": 892293120.0,
-            "12": 892293120.0,
-            "13": 892293120.0,
-            "14": 892293120.0,
-            "15": 892293120.0,
-            "16": 892293120.0,
-            "17": 892293120.0,
-            "18": 892293120.0,
-            "19": 892293120.0,
-            "20": 892293120.0,
-            "21": 892293120.0,
-            "22": 892293120.0,
-            "23": 892293120.0,
-            "24": 892293120.0,
-            "25": 892293120.0,
-            "26": 892293120.0,
-            "27": 892293120.0,
-            "28": 892293120.0,
-            "29": 892293120.0,
-            "30": 892293120.0,
-            "31": 892293120.0,
-            "32": 892293120.0,
-            "33": 892293120.0,
-            "34": 892293120.0,
-            "35": 892293120.0,
-            "36": 892293120.0,
-            "37": 892293120.0,
-            "38": 892293120.0,
-            "39": 892293120.0,
-            "40": 892293120.0,
-            "41": 892293120.0,
-            "42": 892293120.0,
-            "43": 892293120.0,
-            "44": 892293120.0,
-            "45": 892293120.0,
-            "46": 892293120.0,
-            "47": 892293120.0,
-            "48": 892293120.0,
-            "49": 892293120.0,
-            "50": 892293120.0,
-            "51": 892293120.0,
-            "52": 892293120.0,
-            "53": 892293120.0,
-            "54": 892293120.0,
-            "55": 892293120.0,
-            "56": 892293120.0,
-            "57": 892293120.0,
-            "58": 892293120.0,
-            "59": 892293120.0,
-            "60": 892293120.0,
-            "61": 892293120.0,
-            "62": 892293120.0,
-            "63": 892293120.0,
-            "64": 892293120.0,
-            "65": 892293120.0,
-            "66": 892293120.0,
-            "67": 892293120.0,
-            "68": 892293120.0,
-            "69": 892293120.0,
-            "70": 892293120.0,
-            "71": 892293120.0,
-            "72": 892293120.0,
-            "73": 892293120.0,
-            "74": 892293120.0,
-            "75": 892293120.0,
-            "76": 892293120.0,
-            "77": 892293120.0,
-            "78": 892293120.0,
-            "79": 892293120.0,
-            "80": 892293120.0,
-            "81": 892293120.0,
-            "82": 892293120.0,
-            "83": 892293120.0,
-            "84": 892293120.0,
-            "85": 892293120.0,
-            "86": 892293120.0,
-            "87": 892293120.0,
-            "88": 892293120.0,
-            "89": 892293120.0,
-            "90": 892293120.0,
-            "91": 892293120.0,
-            "92": 892293120.0,
-            "93": 892293120.0,
-            "94": 892293120.0,
-            "95": 892293120.0,
-            "96": 892293120.0,
-            "97": 892293120.0,
-            "98": 892293120.0,
-            "99": 892293120.0,
-            "100": 892293120.0
+            "1": 888098304.0,
+            "5": 888098304.0,
+            "10": 888098304.0,
+            "15": 888098304.0,
+            "20": 888098304.0,
+            "25": 888098304.0,
+            "30": 888098304.0,
+            "35": 888098304.0,
+            "40": 888098304.0,
+            "45": 888098304.0,
+            "50": 888098304.0,
+            "55": 888098304.0,
+            "60": 888098304.0,
+            "65": 888098304.0,
+            "70": 888098304.0,
+            "75": 888098304.0,
+            "80": 888098304.0,
+            "85": 888098304.0,
+            "90": 888098304.0,
+            "95": 888098304.0,
+            "100": 888098304.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 1,
+        "step_interval": 5,
         "values": {
-            "1": 2236674048.0,
-            "2": 2599285760.0,
-            "3": 2599285760.0,
-            "4": 2599285760.0,
-            "5": 2599285760.0,
-            "6": 2599285760.0,
-            "7": 2599285760.0,
-            "8": 2599285760.0,
-            "9": 2599285760.0,
-            "10": 2599285760.0,
-            "11": 2599285760.0,
-            "12": 2599285760.0,
-            "13": 2599285760.0,
-            "14": 2599285760.0,
-            "15": 2599285760.0,
-            "16": 2599285760.0,
-            "17": 2599285760.0,
-            "18": 2599285760.0,
-            "19": 2599285760.0,
-            "20": 2599285760.0,
-            "21": 2599285760.0,
-            "22": 2599285760.0,
-            "23": 2599285760.0,
-            "24": 2599285760.0,
-            "25": 2599285760.0,
-            "26": 2599285760.0,
-            "27": 2599285760.0,
-            "28": 2599285760.0,
-            "29": 2599285760.0,
-            "30": 2599285760.0,
-            "31": 2599285760.0,
-            "32": 2599285760.0,
-            "33": 2599285760.0,
-            "34": 2599285760.0,
-            "35": 2599285760.0,
-            "36": 2599285760.0,
-            "37": 2599285760.0,
-            "38": 2599285760.0,
-            "39": 2599285760.0,
-            "40": 2599285760.0,
-            "41": 2599285760.0,
-            "42": 2599285760.0,
-            "43": 2599285760.0,
-            "44": 2599285760.0,
-            "45": 2599285760.0,
-            "46": 2599285760.0,
-            "47": 2599285760.0,
-            "48": 2599285760.0,
-            "49": 2599285760.0,
-            "50": 2599285760.0,
-            "51": 2599285760.0,
-            "52": 2599285760.0,
-            "53": 2599285760.0,
-            "54": 2599285760.0,
-            "55": 2599285760.0,
-            "56": 2599285760.0,
-            "57": 2599285760.0,
-            "58": 2599285760.0,
-            "59": 2599285760.0,
-            "60": 2599285760.0,
-            "61": 2599285760.0,
-            "62": 2599285760.0,
-            "63": 2599285760.0,
-            "64": 2599285760.0,
-            "65": 2599285760.0,
-            "66": 2599285760.0,
-            "67": 2599285760.0,
-            "68": 2599285760.0,
-            "69": 2599285760.0,
-            "70": 2599285760.0,
-            "71": 2599285760.0,
-            "72": 2599285760.0,
-            "73": 2599285760.0,
-            "74": 2599285760.0,
-            "75": 2599285760.0,
-            "76": 2599285760.0,
-            "77": 2599285760.0,
-            "78": 2599285760.0,
-            "79": 2599285760.0,
-            "80": 2599285760.0,
-            "81": 2599285760.0,
-            "82": 2599285760.0,
-            "83": 2599285760.0,
-            "84": 2599285760.0,
-            "85": 2599285760.0,
-            "86": 2599285760.0,
-            "87": 2599285760.0,
-            "88": 2599285760.0,
-            "89": 2599285760.0,
-            "90": 2599285760.0,
-            "91": 2599285760.0,
-            "92": 2599285760.0,
-            "93": 2599285760.0,
-            "94": 2599285760.0,
-            "95": 2599285760.0,
-            "96": 2599285760.0,
-            "97": 2599285760.0,
-            "98": 2599285760.0,
-            "99": 2599285760.0,
-            "100": 2599285760.0
+            "1": 3216302592.0,
+            "5": 3575768576.0,
+            "10": 3575768576.0,
+            "15": 3575768576.0,
+            "20": 3575768576.0,
+            "25": 3575768576.0,
+            "30": 3575768576.0,
+            "35": 3575768576.0,
+            "40": 3575768576.0,
+            "45": 3575768576.0,
+            "50": 3575768576.0,
+            "55": 3575768576.0,
+            "60": 3575768576.0,
+            "65": 3575768576.0,
+            "70": 3575768576.0,
+            "75": 3575768576.0,
+            "80": 3575768576.0,
+            "85": 3575768576.0,
+            "90": 3575768576.0,
+            "95": 3575768576.0,
+            "100": 3575768576.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 1,
+        "step_interval": 5,
         "values": {
-            "1": 9.51973,
-            "2": 0.20593,
-            "3": 0.14945,
-            "4": 0.14775,
-            "5": 0.14785,
-            "6": 0.14767,
-            "7": 0.14754,
-            "8": 0.14649,
-            "9": 0.14636,
-            "10": 0.14713,
-            "11": 0.14628,
-            "12": 0.14658,
-            "13": 0.14581,
-            "14": 0.14652,
-            "15": 0.14657,
-            "16": 0.14585,
-            "17": 0.14783,
-            "18": 0.1469,
-            "19": 0.14603,
-            "20": 0.14662,
-            "21": 0.14635,
-            "22": 0.1461,
-            "23": 0.14688,
-            "24": 0.14579,
-            "25": 0.14587,
-            "26": 0.14836,
-            "27": 0.14598,
-            "28": 0.1458,
-            "29": 0.14604,
-            "30": 0.14624,
-            "31": 0.14719,
-            "32": 0.14625,
-            "33": 0.14582,
-            "34": 0.14603,
-            "35": 0.14619,
-            "36": 0.14587,
-            "37": 0.14585,
-            "38": 0.14625,
-            "39": 0.14572,
-            "40": 0.14629,
-            "41": 0.14561,
-            "42": 0.14587,
-            "43": 0.14672,
-            "44": 0.14572,
-            "45": 0.14618,
-            "46": 0.14622,
-            "47": 0.14572,
-            "48": 0.14538,
-            "49": 0.14571,
-            "50": 0.1457,
-            "51": 0.1553,
-            "52": 0.14793,
-            "53": 0.14797,
-            "54": 0.14774,
-            "55": 0.14702,
-            "56": 0.15765,
-            "57": 0.1544,
-            "58": 0.15368,
-            "59": 0.15399,
-            "60": 0.15366,
-            "61": 0.15362,
-            "62": 0.15351,
-            "63": 0.15339,
-            "64": 0.15353,
-            "65": 0.15154,
-            "66": 0.14531,
-            "67": 0.14661,
-            "68": 0.14599,
-            "69": 0.14546,
-            "70": 0.14633,
-            "71": 0.14568,
-            "72": 0.1461,
-            "73": 0.14601,
-            "74": 0.1469,
-            "75": 0.14561,
-            "76": 0.14575,
-            "77": 0.14581,
-            "78": 0.14634,
-            "79": 0.14619,
-            "80": 0.14627,
-            "81": 0.146,
-            "82": 0.14559,
-            "83": 0.14618,
-            "84": 0.14683,
-            "85": 0.14582,
-            "86": 0.1462,
-            "87": 0.14574,
-            "88": 0.14574,
-            "89": 0.14516,
-            "90": 0.14556,
-            "91": 0.146,
-            "92": 0.14702,
-            "93": 0.14541,
-            "94": 0.14625,
-            "95": 0.14586,
-            "96": 0.1455,
-            "97": 0.14559,
-            "98": 0.14614,
-            "99": 0.15005,
-            "100": 0.14598
+            "1": 11.26216,
+            "5": 0.17522,
+            "10": 0.17308,
+            "15": 0.17129,
+            "20": 0.17153,
+            "25": 0.17072,
+            "30": 0.17154,
+            "35": 0.17096,
+            "40": 0.16956,
+            "45": 0.17006,
+            "50": 0.16739,
+            "55": 0.17674,
+            "60": 0.17809,
+            "65": 0.17803,
+            "70": 0.17656,
+            "75": 0.17665,
+            "80": 0.17686,
+            "85": 0.17744,
+            "90": 0.17611,
+            "95": 0.17687,
+            "100": 0.1753
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_lts.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_lts.json
deleted file mode 100644
index 92fea79bc3d..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_lts.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.79332,
-            "2": 10.80674,
-            "3": 10.8069,
-            "4": 10.77377,
-            "5": 10.84763,
-            "6": 10.86617,
-            "7": 10.8277,
-            "8": 10.81226,
-            "9": 10.83507,
-            "10": 10.77007,
-            "11": 10.89243,
-            "12": 10.84575,
-            "13": 10.85895,
-            "14": 10.883,
-            "15": 10.79054,
-            "16": 10.78048,
-            "17": 10.75666,
-            "18": 10.79491,
-            "19": 10.79656,
-            "20": 10.68058,
-            "21": 10.66001,
-            "22": 10.50257,
-            "23": 10.7118,
-            "24": 10.55327,
-            "25": 10.50697,
-            "26": 10.58286,
-            "27": 10.58643,
-            "28": 10.55758,
-            "29": 10.56227,
-            "30": 10.33257,
-            "31": 10.08654,
-            "32": 10.44724,
-            "33": 10.44499,
-            "34": 10.19999,
-            "35": 10.25465,
-            "36": 10.19443,
-            "37": 10.32044,
-            "38": 10.16641,
-            "39": 10.3774,
-            "40": 10.05603,
-            "41": 10.13739,
-            "42": 10.19161,
-            "43": 9.80954,
-            "44": 9.93054,
-            "45": 9.80619,
-            "46": 9.81395,
-            "47": 10.12881,
-            "48": 9.82729,
-            "49": 9.51291,
-            "50": 9.89126,
-            "51": 9.84055,
-            "52": 9.73438,
-            "53": 10.05482,
-            "54": 9.94058,
-            "55": 9.87124,
-            "56": 9.61045,
-            "57": 9.46116,
-            "58": 9.81654,
-            "59": 9.57887,
-            "60": 9.48507,
-            "61": 9.68515,
-            "62": 9.97438,
-            "63": 9.36298,
-            "64": 9.76793,
-            "65": 8.93913,
-            "66": 9.68918,
-            "67": 9.36638,
-            "68": 9.77507,
-            "69": 9.78344,
-            "70": 9.72196,
-            "71": 9.60806,
-            "72": 9.57714,
-            "73": 9.48934,
-            "74": 8.94008,
-            "75": 9.40867,
-            "76": 9.08075,
-            "77": 10.05717,
-            "78": 9.72281,
-            "79": 9.36465,
-            "80": 9.39746,
-            "81": 9.47553,
-            "82": 9.6886,
-            "83": 9.30263,
-            "84": 9.41008,
-            "85": 9.60793,
-            "86": 9.07115,
-            "87": 9.58676,
-            "88": 9.74129,
-            "89": 9.5986,
-            "90": 9.81041,
-            "91": 9.33113,
-            "92": 9.35502,
-            "93": 9.07481,
-            "94": 8.82745,
-            "95": 9.51149,
-            "96": 9.51876,
-            "97": 9.30173,
-            "98": 9.66726,
-            "99": 8.88087,
-            "100": 9.39727
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1624.0,
-            "2": 1758.0,
-            "3": 1678.0,
-            "4": 1680.0,
-            "5": 1824.0,
-            "6": 1899.0,
-            "7": 1871.0,
-            "8": 1677.0,
-            "9": 1802.0,
-            "10": 1414.0,
-            "11": 1904.0,
-            "12": 1630.0,
-            "13": 1956.0,
-            "14": 1777.0,
-            "15": 1902.0,
-            "16": 1801.0,
-            "17": 1830.0,
-            "18": 1630.0,
-            "19": 1835.0,
-            "20": 1642.0,
-            "21": 1752.0,
-            "22": 1696.0,
-            "23": 2085.0,
-            "24": 1618.0,
-            "25": 1593.0,
-            "26": 1722.0,
-            "27": 1778.0,
-            "28": 2042.0,
-            "29": 1900.0,
-            "30": 2001.0,
-            "31": 1592.0,
-            "32": 1757.0,
-            "33": 2116.0,
-            "34": 1924.0,
-            "35": 1904.0,
-            "36": 1852.0,
-            "37": 2382.0,
-            "38": 2195.0,
-            "39": 2267.0,
-            "40": 2335.0,
-            "41": 2223.0,
-            "42": 2317.0,
-            "43": 2069.0,
-            "44": 2060.0,
-            "45": 2140.0,
-            "46": 2397.0,
-            "47": 2464.0,
-            "48": 2455.0,
-            "49": 2276.0,
-            "50": 2374.0,
-            "51": 2574.0,
-            "52": 2457.0,
-            "53": 2905.0,
-            "54": 2609.0,
-            "55": 2220.0,
-            "56": 2663.0,
-            "57": 2258.0,
-            "58": 2898.0,
-            "59": 2676.0,
-            "60": 2397.0,
-            "61": 3048.0,
-            "62": 2533.0,
-            "63": 2370.0,
-            "64": 2975.0,
-            "65": 2591.0,
-            "66": 3065.0,
-            "67": 2732.0,
-            "68": 2870.0,
-            "69": 2955.0,
-            "70": 3112.0,
-            "71": 2989.0,
-            "72": 2451.0,
-            "73": 2881.0,
-            "74": 1859.0,
-            "75": 2649.0,
-            "76": 3026.0,
-            "77": 3316.0,
-            "78": 3212.0,
-            "79": 3183.0,
-            "80": 3262.0,
-            "81": 3669.0,
-            "82": 3187.0,
-            "83": 2798.0,
-            "84": 3209.0,
-            "85": 3309.0,
-            "86": 2738.0,
-            "87": 3804.0,
-            "88": 2989.0,
-            "89": 3327.0,
-            "90": 3031.0,
-            "91": 2720.0,
-            "92": 2972.0,
-            "93": 2719.0,
-            "94": 3387.0,
-            "95": 3321.0,
-            "96": 3342.0,
-            "97": 3191.0,
-            "98": 3533.0,
-            "99": 3214.0,
-            "100": 3318.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 716834304.0,
-            "2": 716834304.0,
-            "3": 716834304.0,
-            "4": 716834304.0,
-            "5": 716834304.0,
-            "6": 716834304.0,
-            "7": 716834304.0,
-            "8": 716834304.0,
-            "9": 716834304.0,
-            "10": 716834304.0,
-            "11": 716834304.0,
-            "12": 716834304.0,
-            "13": 716834304.0,
-            "14": 716834304.0,
-            "15": 716834304.0,
-            "16": 716834304.0,
-            "17": 716834304.0,
-            "18": 716834304.0,
-            "19": 716834304.0,
-            "20": 716834304.0,
-            "21": 716834304.0,
-            "22": 716834304.0,
-            "23": 716834304.0,
-            "24": 716834304.0,
-            "25": 716834304.0,
-            "26": 716834304.0,
-            "27": 716834304.0,
-            "28": 716834304.0,
-            "29": 716834304.0,
-            "30": 716834304.0,
-            "31": 716834304.0,
-            "32": 716834304.0,
-            "33": 716834304.0,
-            "34": 716834304.0,
-            "35": 716834304.0,
-            "36": 716834304.0,
-            "37": 716834304.0,
-            "38": 716834304.0,
-            "39": 716834304.0,
-            "40": 716834304.0,
-            "41": 716834304.0,
-            "42": 716834304.0,
-            "43": 716834304.0,
-            "44": 716834304.0,
-            "45": 716834304.0,
-            "46": 716834304.0,
-            "47": 716834304.0,
-            "48": 716834304.0,
-            "49": 716834304.0,
-            "50": 716834304.0,
-            "51": 716834304.0,
-            "52": 716834304.0,
-            "53": 716834304.0,
-            "54": 716834304.0,
-            "55": 716834304.0,
-            "56": 716834304.0,
-            "57": 716834304.0,
-            "58": 716834304.0,
-            "59": 716834304.0,
-            "60": 716834304.0,
-            "61": 716834304.0,
-            "62": 716834304.0,
-            "63": 716834304.0,
-            "64": 716834304.0,
-            "65": 716834304.0,
-            "66": 716834304.0,
-            "67": 716834304.0,
-            "68": 716834304.0,
-            "69": 716834304.0,
-            "70": 716834304.0,
-            "71": 716834304.0,
-            "72": 716834304.0,
-            "73": 716834304.0,
-            "74": 716834304.0,
-            "75": 716834304.0,
-            "76": 716834304.0,
-            "77": 716834304.0,
-            "78": 716834304.0,
-            "79": 716834304.0,
-            "80": 716834304.0,
-            "81": 716834304.0,
-            "82": 716834304.0,
-            "83": 716834304.0,
-            "84": 716834304.0,
-            "85": 716834304.0,
-            "86": 716834304.0,
-            "87": 716834304.0,
-            "88": 716834304.0,
-            "89": 716834304.0,
-            "90": 716834304.0,
-            "91": 716834304.0,
-            "92": 716834304.0,
-            "93": 716834304.0,
-            "94": 716834304.0,
-            "95": 716834304.0,
-            "96": 716834304.0,
-            "97": 716834304.0,
-            "98": 716834304.0,
-            "99": 716834304.0,
-            "100": 716834304.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1910424576.0,
-            "2": 2193074176.0,
-            "3": 2193074176.0,
-            "4": 2193074176.0,
-            "5": 2193074176.0,
-            "6": 2193074176.0,
-            "7": 2193074176.0,
-            "8": 2193074176.0,
-            "9": 2193074176.0,
-            "10": 2193074176.0,
-            "11": 2193074176.0,
-            "12": 2193074176.0,
-            "13": 2193074176.0,
-            "14": 2193074176.0,
-            "15": 2193074176.0,
-            "16": 2193074176.0,
-            "17": 2193074176.0,
-            "18": 2193074176.0,
-            "19": 2193074176.0,
-            "20": 2193074176.0,
-            "21": 2193074176.0,
-            "22": 2193074176.0,
-            "23": 2193074176.0,
-            "24": 2193074176.0,
-            "25": 2193074176.0,
-            "26": 2193074176.0,
-            "27": 2193074176.0,
-            "28": 2193074176.0,
-            "29": 2193074176.0,
-            "30": 2193074176.0,
-            "31": 2193074176.0,
-            "32": 2193074176.0,
-            "33": 2193074176.0,
-            "34": 2193074176.0,
-            "35": 2193074176.0,
-            "36": 2193074176.0,
-            "37": 2193074176.0,
-            "38": 2193074176.0,
-            "39": 2193074176.0,
-            "40": 2193074176.0,
-            "41": 2193074176.0,
-            "42": 2193074176.0,
-            "43": 2193074176.0,
-            "44": 2193074176.0,
-            "45": 2193074176.0,
-            "46": 2193074176.0,
-            "47": 2193074176.0,
-            "48": 2193074176.0,
-            "49": 2193074176.0,
-            "50": 2193074176.0,
-            "51": 2193074176.0,
-            "52": 2193074176.0,
-            "53": 2193074176.0,
-            "54": 2193074176.0,
-            "55": 2193074176.0,
-            "56": 2193074176.0,
-            "57": 2193074176.0,
-            "58": 2193074176.0,
-            "59": 2193074176.0,
-            "60": 2193074176.0,
-            "61": 2193074176.0,
-            "62": 2193074176.0,
-            "63": 2193074176.0,
-            "64": 2193074176.0,
-            "65": 2193074176.0,
-            "66": 2193074176.0,
-            "67": 2193074176.0,
-            "68": 2193074176.0,
-            "69": 2193074176.0,
-            "70": 2193074176.0,
-            "71": 2193074176.0,
-            "72": 2193074176.0,
-            "73": 2193074176.0,
-            "74": 2193074176.0,
-            "75": 2193074176.0,
-            "76": 2193074176.0,
-            "77": 2193074176.0,
-            "78": 2193074176.0,
-            "79": 2193074176.0,
-            "80": 2193074176.0,
-            "81": 2193074176.0,
-            "82": 2193074176.0,
-            "83": 2193074176.0,
-            "84": 2193074176.0,
-            "85": 2193074176.0,
-            "86": 2193074176.0,
-            "87": 2193074176.0,
-            "88": 2193074176.0,
-            "89": 2193074176.0,
-            "90": 2193074176.0,
-            "91": 2193074176.0,
-            "92": 2193074176.0,
-            "93": 2193074176.0,
-            "94": 2193074176.0,
-            "95": 2193074176.0,
-            "96": 2193074176.0,
-            "97": 2193074176.0,
-            "98": 2193074176.0,
-            "99": 2193074176.0,
-            "100": 2193074176.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 9.17541,
-            "2": 0.17628,
-            "3": 0.14823,
-            "4": 0.14828,
-            "5": 0.14489,
-            "6": 0.14681,
-            "7": 0.14589,
-            "8": 0.14567,
-            "9": 0.14899,
-            "10": 0.14748,
-            "11": 0.1469,
-            "12": 0.14571,
-            "13": 0.14519,
-            "14": 0.14594,
-            "15": 0.14553,
-            "16": 0.1461,
-            "17": 0.14672,
-            "18": 0.14616,
-            "19": 0.1455,
-            "20": 0.14588,
-            "21": 0.14801,
-            "22": 0.14714,
-            "23": 0.14721,
-            "24": 0.14624,
-            "25": 0.1462,
-            "26": 0.14548,
-            "27": 0.14684,
-            "28": 0.14765,
-            "29": 0.14671,
-            "30": 0.14515,
-            "31": 0.14617,
-            "32": 0.14666,
-            "33": 0.14596,
-            "34": 0.14868,
-            "35": 0.14573,
-            "36": 0.14694,
-            "37": 0.14585,
-            "38": 0.14605,
-            "39": 0.14599,
-            "40": 0.14558,
-            "41": 0.14673,
-            "42": 0.14745,
-            "43": 0.1456,
-            "44": 0.14744,
-            "45": 0.14524,
-            "46": 0.14572,
-            "47": 0.14533,
-            "48": 0.14632,
-            "49": 0.14734,
-            "50": 0.1453,
-            "51": 0.16371,
-            "52": 0.14839,
-            "53": 0.17852,
-            "54": 0.14579,
-            "55": 0.14651,
-            "56": 0.14872,
-            "57": 0.14723,
-            "58": 0.14775,
-            "59": 0.14896,
-            "60": 0.14649,
-            "61": 0.14672,
-            "62": 0.14696,
-            "63": 0.14572,
-            "64": 0.14639,
-            "65": 0.14739,
-            "66": 0.14722,
-            "67": 0.14732,
-            "68": 0.14566,
-            "69": 0.14664,
-            "70": 0.14693,
-            "71": 0.14641,
-            "72": 0.14742,
-            "73": 0.14691,
-            "74": 0.1482,
-            "75": 0.15006,
-            "76": 0.146,
-            "77": 0.14585,
-            "78": 0.14677,
-            "79": 0.14716,
-            "80": 0.14605,
-            "81": 0.14678,
-            "82": 0.14648,
-            "83": 0.14624,
-            "84": 0.14639,
-            "85": 0.14622,
-            "86": 0.14829,
-            "87": 0.14591,
-            "88": 0.14541,
-            "89": 0.14865,
-            "90": 0.14587,
-            "91": 0.14618,
-            "92": 0.14625,
-            "93": 0.14624,
-            "94": 0.14583,
-            "95": 0.14675,
-            "96": 0.14876,
-            "97": 0.14645,
-            "98": 0.14588,
-            "99": 0.14617,
-            "100": 0.14618
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_lts_dgx_a100.json
index 1246b8727ef..4d218d51297 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_lts_dgx_a100.json
@@ -1,537 +1 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.79332,
-            "2": 10.80674,
-            "3": 10.8069,
-            "4": 10.77377,
-            "5": 10.84763,
-            "6": 10.86617,
-            "7": 10.8277,
-            "8": 10.81226,
-            "9": 10.83507,
-            "10": 10.77007,
-            "11": 10.89243,
-            "12": 10.84575,
-            "13": 10.85895,
-            "14": 10.883,
-            "15": 10.79054,
-            "16": 10.78048,
-            "17": 10.75666,
-            "18": 10.79491,
-            "19": 10.79656,
-            "20": 10.68058,
-            "21": 10.66001,
-            "22": 10.50257,
-            "23": 10.7118,
-            "24": 10.55327,
-            "25": 10.50697,
-            "26": 10.58286,
-            "27": 10.58643,
-            "28": 10.55758,
-            "29": 10.56227,
-            "30": 10.33257,
-            "31": 10.08654,
-            "32": 10.44724,
-            "33": 10.44499,
-            "34": 10.19999,
-            "35": 10.25465,
-            "36": 10.19443,
-            "37": 10.32044,
-            "38": 10.16641,
-            "39": 10.3774,
-            "40": 10.05603,
-            "41": 10.13739,
-            "42": 10.19161,
-            "43": 9.80954,
-            "44": 9.93054,
-            "45": 9.80619,
-            "46": 9.81395,
-            "47": 10.12881,
-            "48": 9.82729,
-            "49": 9.51291,
-            "50": 9.89126,
-            "51": 9.84055,
-            "52": 9.73438,
-            "53": 10.05482,
-            "54": 9.94058,
-            "55": 9.87124,
-            "56": 9.61045,
-            "57": 9.46116,
-            "58": 9.81654,
-            "59": 9.57887,
-            "60": 9.48507,
-            "61": 9.68515,
-            "62": 9.97438,
-            "63": 9.36298,
-            "64": 9.76793,
-            "65": 8.93913,
-            "66": 9.68918,
-            "67": 9.36638,
-            "68": 9.77507,
-            "69": 9.78344,
-            "70": 9.72196,
-            "71": 9.60806,
-            "72": 9.57714,
-            "73": 9.48934,
-            "74": 8.94008,
-            "75": 9.40867,
-            "76": 9.08075,
-            "77": 10.05717,
-            "78": 9.72281,
-            "79": 9.36465,
-            "80": 9.39746,
-            "81": 9.47553,
-            "82": 9.6886,
-            "83": 9.30263,
-            "84": 9.41008,
-            "85": 9.60793,
-            "86": 9.07115,
-            "87": 9.58676,
-            "88": 9.74129,
-            "89": 9.5986,
-            "90": 9.81041,
-            "91": 9.33113,
-            "92": 9.35502,
-            "93": 9.07481,
-            "94": 8.82745,
-            "95": 9.51149,
-            "96": 9.51876,
-            "97": 9.30173,
-            "98": 9.66726,
-            "99": 8.88087,
-            "100": 9.39727
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1624.0,
-            "2": 1758.0,
-            "3": 1678.0,
-            "4": 1680.0,
-            "5": 1824.0,
-            "6": 1899.0,
-            "7": 1871.0,
-            "8": 1677.0,
-            "9": 1802.0,
-            "10": 1414.0,
-            "11": 1904.0,
-            "12": 1630.0,
-            "13": 1956.0,
-            "14": 1777.0,
-            "15": 1902.0,
-            "16": 1801.0,
-            "17": 1830.0,
-            "18": 1630.0,
-            "19": 1835.0,
-            "20": 1642.0,
-            "21": 1752.0,
-            "22": 1696.0,
-            "23": 2085.0,
-            "24": 1618.0,
-            "25": 1593.0,
-            "26": 1722.0,
-            "27": 1778.0,
-            "28": 2042.0,
-            "29": 1900.0,
-            "30": 2001.0,
-            "31": 1592.0,
-            "32": 1757.0,
-            "33": 2116.0,
-            "34": 1924.0,
-            "35": 1904.0,
-            "36": 1852.0,
-            "37": 2382.0,
-            "38": 2195.0,
-            "39": 2267.0,
-            "40": 2335.0,
-            "41": 2223.0,
-            "42": 2317.0,
-            "43": 2069.0,
-            "44": 2060.0,
-            "45": 2140.0,
-            "46": 2397.0,
-            "47": 2464.0,
-            "48": 2455.0,
-            "49": 2276.0,
-            "50": 2374.0,
-            "51": 2574.0,
-            "52": 2457.0,
-            "53": 2905.0,
-            "54": 2609.0,
-            "55": 2220.0,
-            "56": 2663.0,
-            "57": 2258.0,
-            "58": 2898.0,
-            "59": 2676.0,
-            "60": 2397.0,
-            "61": 3048.0,
-            "62": 2533.0,
-            "63": 2370.0,
-            "64": 2975.0,
-            "65": 2591.0,
-            "66": 3065.0,
-            "67": 2732.0,
-            "68": 2870.0,
-            "69": 2955.0,
-            "70": 3112.0,
-            "71": 2989.0,
-            "72": 2451.0,
-            "73": 2881.0,
-            "74": 1859.0,
-            "75": 2649.0,
-            "76": 3026.0,
-            "77": 3316.0,
-            "78": 3212.0,
-            "79": 3183.0,
-            "80": 3262.0,
-            "81": 3669.0,
-            "82": 3187.0,
-            "83": 2798.0,
-            "84": 3209.0,
-            "85": 3309.0,
-            "86": 2738.0,
-            "87": 3804.0,
-            "88": 2989.0,
-            "89": 3327.0,
-            "90": 3031.0,
-            "91": 2720.0,
-            "92": 2972.0,
-            "93": 2719.0,
-            "94": 3387.0,
-            "95": 3321.0,
-            "96": 3342.0,
-            "97": 3191.0,
-            "98": 3533.0,
-            "99": 3214.0,
-            "100": 3318.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 716834304.0,
-            "2": 716834304.0,
-            "3": 716834304.0,
-            "4": 716834304.0,
-            "5": 716834304.0,
-            "6": 716834304.0,
-            "7": 716834304.0,
-            "8": 716834304.0,
-            "9": 716834304.0,
-            "10": 716834304.0,
-            "11": 716834304.0,
-            "12": 716834304.0,
-            "13": 716834304.0,
-            "14": 716834304.0,
-            "15": 716834304.0,
-            "16": 716834304.0,
-            "17": 716834304.0,
-            "18": 716834304.0,
-            "19": 716834304.0,
-            "20": 716834304.0,
-            "21": 716834304.0,
-            "22": 716834304.0,
-            "23": 716834304.0,
-            "24": 716834304.0,
-            "25": 716834304.0,
-            "26": 716834304.0,
-            "27": 716834304.0,
-            "28": 716834304.0,
-            "29": 716834304.0,
-            "30": 716834304.0,
-            "31": 716834304.0,
-            "32": 716834304.0,
-            "33": 716834304.0,
-            "34": 716834304.0,
-            "35": 716834304.0,
-            "36": 716834304.0,
-            "37": 716834304.0,
-            "38": 716834304.0,
-            "39": 716834304.0,
-            "40": 716834304.0,
-            "41": 716834304.0,
-            "42": 716834304.0,
-            "43": 716834304.0,
-            "44": 716834304.0,
-            "45": 716834304.0,
-            "46": 716834304.0,
-            "47": 716834304.0,
-            "48": 716834304.0,
-            "49": 716834304.0,
-            "50": 716834304.0,
-            "51": 716834304.0,
-            "52": 716834304.0,
-            "53": 716834304.0,
-            "54": 716834304.0,
-            "55": 716834304.0,
-            "56": 716834304.0,
-            "57": 716834304.0,
-            "58": 716834304.0,
-            "59": 716834304.0,
-            "60": 716834304.0,
-            "61": 716834304.0,
-            "62": 716834304.0,
-            "63": 716834304.0,
-            "64": 716834304.0,
-            "65": 716834304.0,
-            "66": 716834304.0,
-            "67": 716834304.0,
-            "68": 716834304.0,
-            "69": 716834304.0,
-            "70": 716834304.0,
-            "71": 716834304.0,
-            "72": 716834304.0,
-            "73": 716834304.0,
-            "74": 716834304.0,
-            "75": 716834304.0,
-            "76": 716834304.0,
-            "77": 716834304.0,
-            "78": 716834304.0,
-            "79": 716834304.0,
-            "80": 716834304.0,
-            "81": 716834304.0,
-            "82": 716834304.0,
-            "83": 716834304.0,
-            "84": 716834304.0,
-            "85": 716834304.0,
-            "86": 716834304.0,
-            "87": 716834304.0,
-            "88": 716834304.0,
-            "89": 716834304.0,
-            "90": 716834304.0,
-            "91": 716834304.0,
-            "92": 716834304.0,
-            "93": 716834304.0,
-            "94": 716834304.0,
-            "95": 716834304.0,
-            "96": 716834304.0,
-            "97": 716834304.0,
-            "98": 716834304.0,
-            "99": 716834304.0,
-            "100": 716834304.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1910424576.0,
-            "2": 2193074176.0,
-            "3": 2193074176.0,
-            "4": 2193074176.0,
-            "5": 2193074176.0,
-            "6": 2193074176.0,
-            "7": 2193074176.0,
-            "8": 2193074176.0,
-            "9": 2193074176.0,
-            "10": 2193074176.0,
-            "11": 2193074176.0,
-            "12": 2193074176.0,
-            "13": 2193074176.0,
-            "14": 2193074176.0,
-            "15": 2193074176.0,
-            "16": 2193074176.0,
-            "17": 2193074176.0,
-            "18": 2193074176.0,
-            "19": 2193074176.0,
-            "20": 2193074176.0,
-            "21": 2193074176.0,
-            "22": 2193074176.0,
-            "23": 2193074176.0,
-            "24": 2193074176.0,
-            "25": 2193074176.0,
-            "26": 2193074176.0,
-            "27": 2193074176.0,
-            "28": 2193074176.0,
-            "29": 2193074176.0,
-            "30": 2193074176.0,
-            "31": 2193074176.0,
-            "32": 2193074176.0,
-            "33": 2193074176.0,
-            "34": 2193074176.0,
-            "35": 2193074176.0,
-            "36": 2193074176.0,
-            "37": 2193074176.0,
-            "38": 2193074176.0,
-            "39": 2193074176.0,
-            "40": 2193074176.0,
-            "41": 2193074176.0,
-            "42": 2193074176.0,
-            "43": 2193074176.0,
-            "44": 2193074176.0,
-            "45": 2193074176.0,
-            "46": 2193074176.0,
-            "47": 2193074176.0,
-            "48": 2193074176.0,
-            "49": 2193074176.0,
-            "50": 2193074176.0,
-            "51": 2193074176.0,
-            "52": 2193074176.0,
-            "53": 2193074176.0,
-            "54": 2193074176.0,
-            "55": 2193074176.0,
-            "56": 2193074176.0,
-            "57": 2193074176.0,
-            "58": 2193074176.0,
-            "59": 2193074176.0,
-            "60": 2193074176.0,
-            "61": 2193074176.0,
-            "62": 2193074176.0,
-            "63": 2193074176.0,
-            "64": 2193074176.0,
-            "65": 2193074176.0,
-            "66": 2193074176.0,
-            "67": 2193074176.0,
-            "68": 2193074176.0,
-            "69": 2193074176.0,
-            "70": 2193074176.0,
-            "71": 2193074176.0,
-            "72": 2193074176.0,
-            "73": 2193074176.0,
-            "74": 2193074176.0,
-            "75": 2193074176.0,
-            "76": 2193074176.0,
-            "77": 2193074176.0,
-            "78": 2193074176.0,
-            "79": 2193074176.0,
-            "80": 2193074176.0,
-            "81": 2193074176.0,
-            "82": 2193074176.0,
-            "83": 2193074176.0,
-            "84": 2193074176.0,
-            "85": 2193074176.0,
-            "86": 2193074176.0,
-            "87": 2193074176.0,
-            "88": 2193074176.0,
-            "89": 2193074176.0,
-            "90": 2193074176.0,
-            "91": 2193074176.0,
-            "92": 2193074176.0,
-            "93": 2193074176.0,
-            "94": 2193074176.0,
-            "95": 2193074176.0,
-            "96": 2193074176.0,
-            "97": 2193074176.0,
-            "98": 2193074176.0,
-            "99": 2193074176.0,
-            "100": 2193074176.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 6.43574,
-            "2": 0.18308,
-            "3": 0.16294,
-            "4": 0.15632,
-            "5": 0.15517,
-            "6": 0.15061,
-            "7": 0.15109,
-            "8": 0.1538,
-            "9": 0.15077,
-            "10": 0.15142,
-            "11": 0.15024,
-            "12": 0.15039,
-            "13": 0.14987,
-            "14": 0.15044,
-            "15": 0.1495,
-            "16": 0.15003,
-            "17": 0.14988,
-            "18": 0.1497,
-            "19": 0.15459,
-            "20": 0.15076,
-            "21": 0.1498,
-            "22": 0.15044,
-            "23": 0.15051,
-            "24": 0.15062,
-            "25": 0.14953,
-            "26": 0.15047,
-            "27": 0.14851,
-            "28": 0.14802,
-            "29": 0.14861,
-            "30": 0.1485,
-            "31": 0.1498,
-            "32": 0.14871,
-            "33": 0.1485,
-            "34": 0.14707,
-            "35": 0.14796,
-            "36": 0.14719,
-            "37": 0.15012,
-            "38": 0.14804,
-            "39": 0.1487,
-            "40": 0.14779,
-            "41": 0.14844,
-            "42": 0.1496,
-            "43": 0.15014,
-            "44": 0.14977,
-            "45": 0.1478,
-            "46": 0.14891,
-            "47": 0.14844,
-            "48": 0.1488,
-            "49": 0.14931,
-            "50": 0.14761,
-            "51": 0.15888,
-            "52": 0.1517,
-            "53": 0.14904,
-            "54": 0.17961,
-            "55": 0.14804,
-            "56": 0.1496,
-            "57": 0.1487,
-            "58": 0.14801,
-            "59": 0.14729,
-            "60": 0.14749,
-            "61": 0.14745,
-            "62": 0.1471,
-            "63": 0.14817,
-            "64": 0.1497,
-            "65": 0.14753,
-            "66": 0.14753,
-            "67": 0.14859,
-            "68": 0.14714,
-            "69": 0.14776,
-            "70": 0.14847,
-            "71": 0.14829,
-            "72": 0.14858,
-            "73": 0.14828,
-            "74": 0.14783,
-            "75": 0.14793,
-            "76": 0.14768,
-            "77": 0.14752,
-            "78": 0.14931,
-            "79": 0.15045,
-            "80": 0.14813,
-            "81": 0.1489,
-            "82": 0.1475,
-            "83": 0.14844,
-            "84": 0.1489,
-            "85": 0.14809,
-            "86": 0.14835,
-            "87": 0.14718,
-            "88": 0.14876,
-            "89": 0.14859,
-            "90": 0.1479,
-            "91": 0.14803,
-            "92": 0.14798,
-            "93": 0.14876,
-            "94": 0.14705,
-            "95": 0.14837,
-            "96": 0.14805,
-            "97": 0.14837,
-            "98": 0.14721,
-            "99": 0.14843,
-            "100": 0.14828
-        }
-    }
-}
\ No newline at end of file
+{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.79205, "5": 10.84695, "10": 10.77106, "15": 10.79093, "20": 10.68042, "25": 10.50715, "30": 10.33325, "35": 10.25545, "40": 10.05544, "45": 9.80575, "50": 9.89082, "55": 9.87063, "60": 9.48478, "65": 8.94022, "70": 9.72243, "75": 9.40907, "80": 9.3976, "85": 9.60746, "90": 9.81041, "95": 9.5116, "100": 9.39722}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1580.0, "5": 1901.0, "10": 1346.0, "15": 1926.0, "20": 1643.0, "25": 1683.0, "30": 1867.0, "35": 2020.0, "40": 2252.0, "45": 2243.0, "50": 2459.0, "55": 2291.0, "60": 2404.0, "65": 2474.0, "70": 3102.0, "75": 2603.0, "80": 3420.0, "85": 3388.0, "90": 2904.0, "95": 3333.0, "100": 3347.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 714736640.0, "5": 714736640.0, "10": 714736640.0, "15": 714736640.0, "20": 714736640.0, "25": 714736640.0, "30": 714736640.0, "35": 714736640.0, "40": 714736640.0, "45": 714736640.0, "50": 714736640.0, "55": 714736640.0, "60": 714736640.0, "65": 714736640.0, "70": 714736640.0, "75": 714736640.0, "80": 714736640.0, "85": 714736640.0, "90": 714736640.0, "95": 714736640.0, "100": 714736640.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 2399714304.0, "5": 2681315328.0, "10": 2681315328.0, "15": 2681315328.0, "20": 2681315328.0, "25": 2681315328.0, "30": 2681315328.0, "35": 2681315328.0, "40": 2681315328.0, "45": 2681315328.0, "50": 2681315328.0, "55": 2681315328.0, "60": 2681315328.0, "65": 2681315328.0, "70": 2681315328.0, "75": 2681315328.0, "80": 2681315328.0, "85": 2681315328.0, "90": 2681315328.0, "95": 2681315328.0, "100": 2681315328.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 9.04394, "5": 0.16449, "10": 0.16258, "15": 0.16166, "20": 0.18479, "25": 0.17915, "30": 0.18008, "35": 0.16013, "40": 0.15723, "45": 0.15769, "50": 0.15699, "55": 0.15776, "60": 0.15787, "65": 0.15913, "70": 0.15699, "75": 0.15748, "80": 0.15897, "85": 0.15762, "90": 0.15743, "95": 0.15733, "100": 0.15822}}}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_lts.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_lts.json
deleted file mode 100644
index 801e633e981..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_lts.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.90314,
-            "2": 10.88897,
-            "3": 10.89725,
-            "4": 10.88248,
-            "5": 10.89447,
-            "6": 10.91226,
-            "7": 10.89775,
-            "8": 10.88643,
-            "9": 10.89227,
-            "10": 10.8849,
-            "11": 10.91413,
-            "12": 10.88666,
-            "13": 10.89283,
-            "14": 10.90486,
-            "15": 10.83878,
-            "16": 10.84927,
-            "17": 10.83248,
-            "18": 10.83371,
-            "19": 10.83359,
-            "20": 10.74074,
-            "21": 10.7043,
-            "22": 10.59893,
-            "23": 10.72222,
-            "24": 10.60745,
-            "25": 10.57467,
-            "26": 10.62999,
-            "27": 10.62364,
-            "28": 10.57229,
-            "29": 10.6073,
-            "30": 10.37766,
-            "31": 10.15362,
-            "32": 10.47609,
-            "33": 10.48062,
-            "34": 10.24216,
-            "35": 10.29035,
-            "36": 10.25955,
-            "37": 10.36145,
-            "38": 10.21396,
-            "39": 10.44502,
-            "40": 10.11492,
-            "41": 10.1605,
-            "42": 10.23468,
-            "43": 9.85032,
-            "44": 9.97764,
-            "45": 9.85681,
-            "46": 9.8307,
-            "47": 10.17976,
-            "48": 9.85811,
-            "49": 9.54378,
-            "50": 9.93469,
-            "51": 9.86793,
-            "52": 9.76274,
-            "53": 10.10895,
-            "54": 9.95538,
-            "55": 9.8756,
-            "56": 9.64751,
-            "57": 9.48989,
-            "58": 9.85502,
-            "59": 9.59457,
-            "60": 9.52968,
-            "61": 9.69589,
-            "62": 10.01676,
-            "63": 9.38778,
-            "64": 9.80211,
-            "65": 8.95119,
-            "66": 9.72857,
-            "67": 9.37577,
-            "68": 9.80463,
-            "69": 9.81,
-            "70": 9.7662,
-            "71": 9.63135,
-            "72": 9.5784,
-            "73": 9.52148,
-            "74": 8.94976,
-            "75": 9.43087,
-            "76": 9.08489,
-            "77": 10.089,
-            "78": 9.72754,
-            "79": 9.37612,
-            "80": 9.40849,
-            "81": 9.49766,
-            "82": 9.71298,
-            "83": 9.33332,
-            "84": 9.43928,
-            "85": 9.63373,
-            "86": 9.07038,
-            "87": 9.61245,
-            "88": 9.78304,
-            "89": 9.60878,
-            "90": 9.85164,
-            "91": 9.34542,
-            "92": 9.38281,
-            "93": 9.07319,
-            "94": 8.81684,
-            "95": 9.51809,
-            "96": 9.54033,
-            "97": 9.34061,
-            "98": 9.70134,
-            "99": 8.88786,
-            "100": 9.43285
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 22727000.0,
-            "2": 22925616.0,
-            "3": 22596924.0,
-            "4": 23219540.0,
-            "5": 22714814.0,
-            "6": 23021786.0,
-            "7": 22771458.0,
-            "8": 22926012.0,
-            "9": 22842856.0,
-            "10": 22918360.0,
-            "11": 22500702.0,
-            "12": 22459866.0,
-            "13": 22916820.0,
-            "14": 22389026.0,
-            "15": 22821260.0,
-            "16": 22830812.0,
-            "17": 22818944.0,
-            "18": 22582240.0,
-            "19": 22618380.0,
-            "20": 22694352.0,
-            "21": 22740180.0,
-            "22": 22800024.0,
-            "23": 22540132.0,
-            "24": 22771492.0,
-            "25": 22818970.0,
-            "26": 22546852.0,
-            "27": 22468504.0,
-            "28": 22453052.0,
-            "29": 22529222.0,
-            "30": 22631432.0,
-            "31": 22955696.0,
-            "32": 22585238.0,
-            "33": 22557676.0,
-            "34": 22835412.0,
-            "35": 22788032.0,
-            "36": 22589678.0,
-            "37": 22497140.0,
-            "38": 22896132.0,
-            "39": 22801314.0,
-            "40": 22658064.0,
-            "41": 22659700.0,
-            "42": 22667816.0,
-            "43": 22976356.0,
-            "44": 22746708.0,
-            "45": 22675272.0,
-            "46": 22884382.0,
-            "47": 22634556.0,
-            "48": 22928080.0,
-            "49": 22727538.0,
-            "50": 22905284.0,
-            "51": 22791326.0,
-            "52": 22749392.0,
-            "53": 22925970.0,
-            "54": 22839434.0,
-            "55": 22518416.0,
-            "56": 22877660.0,
-            "57": 23113304.0,
-            "58": 22845008.0,
-            "59": 22715512.0,
-            "60": 22743058.0,
-            "61": 22723950.0,
-            "62": 22673248.0,
-            "63": 22846074.0,
-            "64": 22823228.0,
-            "65": 23060212.0,
-            "66": 22729902.0,
-            "67": 22907278.0,
-            "68": 22610092.0,
-            "69": 22584360.0,
-            "70": 22829348.0,
-            "71": 22749420.0,
-            "72": 22655446.0,
-            "73": 22740974.0,
-            "74": 23048296.0,
-            "75": 23053922.0,
-            "76": 22901008.0,
-            "77": 22272806.0,
-            "78": 22789370.0,
-            "79": 22743288.0,
-            "80": 22706236.0,
-            "81": 22890976.0,
-            "82": 22777092.0,
-            "83": 22839240.0,
-            "84": 23010352.0,
-            "85": 22712004.0,
-            "86": 23103740.0,
-            "87": 22734788.0,
-            "88": 22637620.0,
-            "89": 22499200.0,
-            "90": 22972420.0,
-            "91": 22766428.0,
-            "92": 22808890.0,
-            "93": 22659888.0,
-            "94": 22910970.0,
-            "95": 23048514.0,
-            "96": 22829470.0,
-            "97": 22608826.0,
-            "98": 22763528.0,
-            "99": 22905754.0,
-            "100": 23016268.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 719180288.0,
-            "2": 719180288.0,
-            "3": 719180288.0,
-            "4": 719180288.0,
-            "5": 719180288.0,
-            "6": 719180288.0,
-            "7": 719180288.0,
-            "8": 719180288.0,
-            "9": 719180288.0,
-            "10": 719180288.0,
-            "11": 719180288.0,
-            "12": 719180288.0,
-            "13": 719180288.0,
-            "14": 719180288.0,
-            "15": 719180288.0,
-            "16": 719180288.0,
-            "17": 719180288.0,
-            "18": 719180288.0,
-            "19": 719180288.0,
-            "20": 719180288.0,
-            "21": 719180288.0,
-            "22": 719180288.0,
-            "23": 719180288.0,
-            "24": 719180288.0,
-            "25": 719180288.0,
-            "26": 719180288.0,
-            "27": 719180288.0,
-            "28": 719180288.0,
-            "29": 719180288.0,
-            "30": 719180288.0,
-            "31": 719180288.0,
-            "32": 719180288.0,
-            "33": 719180288.0,
-            "34": 719180288.0,
-            "35": 719180288.0,
-            "36": 719180288.0,
-            "37": 719180288.0,
-            "38": 719180288.0,
-            "39": 719180288.0,
-            "40": 719180288.0,
-            "41": 719180288.0,
-            "42": 719180288.0,
-            "43": 719180288.0,
-            "44": 719180288.0,
-            "45": 719180288.0,
-            "46": 719180288.0,
-            "47": 719180288.0,
-            "48": 719180288.0,
-            "49": 719180288.0,
-            "50": 719180288.0,
-            "51": 719180288.0,
-            "52": 719180288.0,
-            "53": 719180288.0,
-            "54": 719180288.0,
-            "55": 719180288.0,
-            "56": 719180288.0,
-            "57": 719180288.0,
-            "58": 719180288.0,
-            "59": 719180288.0,
-            "60": 719180288.0,
-            "61": 719180288.0,
-            "62": 719180288.0,
-            "63": 719180288.0,
-            "64": 719180288.0,
-            "65": 719180288.0,
-            "66": 719180288.0,
-            "67": 719180288.0,
-            "68": 719180288.0,
-            "69": 719180288.0,
-            "70": 719180288.0,
-            "71": 719180288.0,
-            "72": 719180288.0,
-            "73": 719180288.0,
-            "74": 719180288.0,
-            "75": 719180288.0,
-            "76": 719180288.0,
-            "77": 719180288.0,
-            "78": 719180288.0,
-            "79": 719180288.0,
-            "80": 719180288.0,
-            "81": 719180288.0,
-            "82": 719180288.0,
-            "83": 719180288.0,
-            "84": 719180288.0,
-            "85": 719180288.0,
-            "86": 719180288.0,
-            "87": 719180288.0,
-            "88": 719180288.0,
-            "89": 719180288.0,
-            "90": 719180288.0,
-            "91": 719180288.0,
-            "92": 719180288.0,
-            "93": 719180288.0,
-            "94": 719180288.0,
-            "95": 719180288.0,
-            "96": 719180288.0,
-            "97": 719180288.0,
-            "98": 719180288.0,
-            "99": 719180288.0,
-            "100": 719180288.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1910562816.0,
-            "2": 2195420160.0,
-            "3": 2195420160.0,
-            "4": 2195420160.0,
-            "5": 2195420160.0,
-            "6": 2195420160.0,
-            "7": 2195420160.0,
-            "8": 2195420160.0,
-            "9": 2195420160.0,
-            "10": 2195420160.0,
-            "11": 2195420160.0,
-            "12": 2195420160.0,
-            "13": 2195420160.0,
-            "14": 2195420160.0,
-            "15": 2195420160.0,
-            "16": 2195420160.0,
-            "17": 2195420160.0,
-            "18": 2195420160.0,
-            "19": 2195420160.0,
-            "20": 2195420160.0,
-            "21": 2195420160.0,
-            "22": 2195420160.0,
-            "23": 2195420160.0,
-            "24": 2195420160.0,
-            "25": 2195420160.0,
-            "26": 2195420160.0,
-            "27": 2195420160.0,
-            "28": 2195420160.0,
-            "29": 2195420160.0,
-            "30": 2195420160.0,
-            "31": 2195420160.0,
-            "32": 2195420160.0,
-            "33": 2195420160.0,
-            "34": 2195420160.0,
-            "35": 2195420160.0,
-            "36": 2195420160.0,
-            "37": 2195420160.0,
-            "38": 2195420160.0,
-            "39": 2195420160.0,
-            "40": 2195420160.0,
-            "41": 2195420160.0,
-            "42": 2195420160.0,
-            "43": 2195420160.0,
-            "44": 2195420160.0,
-            "45": 2195420160.0,
-            "46": 2195420160.0,
-            "47": 2195420160.0,
-            "48": 2195420160.0,
-            "49": 2195420160.0,
-            "50": 2195420160.0,
-            "51": 2195420160.0,
-            "52": 2195420160.0,
-            "53": 2195420160.0,
-            "54": 2195420160.0,
-            "55": 2195420160.0,
-            "56": 2195420160.0,
-            "57": 2195420160.0,
-            "58": 2195420160.0,
-            "59": 2195420160.0,
-            "60": 2195420160.0,
-            "61": 2195420160.0,
-            "62": 2195420160.0,
-            "63": 2195420160.0,
-            "64": 2195420160.0,
-            "65": 2195420160.0,
-            "66": 2195420160.0,
-            "67": 2195420160.0,
-            "68": 2195420160.0,
-            "69": 2195420160.0,
-            "70": 2195420160.0,
-            "71": 2195420160.0,
-            "72": 2195420160.0,
-            "73": 2195420160.0,
-            "74": 2195420160.0,
-            "75": 2195420160.0,
-            "76": 2195420160.0,
-            "77": 2195420160.0,
-            "78": 2195420160.0,
-            "79": 2195420160.0,
-            "80": 2195420160.0,
-            "81": 2195420160.0,
-            "82": 2195420160.0,
-            "83": 2195420160.0,
-            "84": 2195420160.0,
-            "85": 2195420160.0,
-            "86": 2195420160.0,
-            "87": 2195420160.0,
-            "88": 2195420160.0,
-            "89": 2195420160.0,
-            "90": 2195420160.0,
-            "91": 2195420160.0,
-            "92": 2195420160.0,
-            "93": 2195420160.0,
-            "94": 2195420160.0,
-            "95": 2195420160.0,
-            "96": 2195420160.0,
-            "97": 2195420160.0,
-            "98": 2195420160.0,
-            "99": 2195420160.0,
-            "100": 2195420160.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 8.03051,
-            "2": 0.1884,
-            "3": 0.27987,
-            "4": 0.15687,
-            "5": 0.15614,
-            "6": 0.1563,
-            "7": 0.15622,
-            "8": 0.1563,
-            "9": 0.15473,
-            "10": 0.15615,
-            "11": 0.15528,
-            "12": 0.15754,
-            "13": 0.15661,
-            "14": 0.15677,
-            "15": 0.15572,
-            "16": 0.15582,
-            "17": 0.1571,
-            "18": 0.15664,
-            "19": 0.1556,
-            "20": 0.15592,
-            "21": 0.15572,
-            "22": 0.15642,
-            "23": 0.15643,
-            "24": 0.15743,
-            "25": 0.15666,
-            "26": 0.15552,
-            "27": 0.15475,
-            "28": 0.15586,
-            "29": 0.15603,
-            "30": 0.15463,
-            "31": 0.15712,
-            "32": 0.15442,
-            "33": 0.15543,
-            "34": 0.1557,
-            "35": 0.15682,
-            "36": 0.15539,
-            "37": 0.1553,
-            "38": 0.15578,
-            "39": 0.15667,
-            "40": 0.15715,
-            "41": 0.15704,
-            "42": 0.1566,
-            "43": 0.15655,
-            "44": 0.15629,
-            "45": 0.15584,
-            "46": 0.15734,
-            "47": 0.15735,
-            "48": 0.15572,
-            "49": 0.15706,
-            "50": 0.15561,
-            "51": 0.16957,
-            "52": 0.1587,
-            "53": 0.16014,
-            "54": 0.15805,
-            "55": 0.1578,
-            "56": 0.15801,
-            "57": 0.15813,
-            "58": 0.1574,
-            "59": 0.15781,
-            "60": 0.15923,
-            "61": 0.15655,
-            "62": 0.15633,
-            "63": 0.15583,
-            "64": 0.15734,
-            "65": 0.15761,
-            "66": 0.15822,
-            "67": 0.15755,
-            "68": 0.15815,
-            "69": 0.15816,
-            "70": 0.15813,
-            "71": 0.15747,
-            "72": 0.1574,
-            "73": 0.15783,
-            "74": 0.15766,
-            "75": 0.15527,
-            "76": 0.15579,
-            "77": 0.15483,
-            "78": 0.15482,
-            "79": 0.15404,
-            "80": 0.15431,
-            "81": 0.154,
-            "82": 0.15512,
-            "83": 0.15513,
-            "84": 0.15371,
-            "85": 0.15488,
-            "86": 0.15465,
-            "87": 0.15412,
-            "88": 0.15403,
-            "89": 0.15487,
-            "90": 0.15518,
-            "91": 0.15549,
-            "92": 0.154,
-            "93": 0.15405,
-            "94": 0.15438,
-            "95": 0.15444,
-            "96": 0.1534,
-            "97": 0.15487,
-            "98": 0.15398,
-            "99": 0.15434,
-            "100": 0.15391
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_lts_dgx_a100.json
index 307cec2659c..11578ee6c3e 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_lts_dgx_a100.json
@@ -2,536 +2,141 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 1,
+        "step_interval": 5,
         "values": {
-            "1": 10.90314,
-            "2": 10.88897,
-            "3": 10.89725,
-            "4": 10.88248,
-            "5": 10.89447,
-            "6": 10.91226,
-            "7": 10.89775,
-            "8": 10.88643,
-            "9": 10.89227,
-            "10": 10.8849,
-            "11": 10.91413,
-            "12": 10.88666,
-            "13": 10.89283,
-            "14": 10.90486,
-            "15": 10.83878,
-            "16": 10.84927,
-            "17": 10.83248,
-            "18": 10.83371,
-            "19": 10.83359,
-            "20": 10.74074,
-            "21": 10.7043,
-            "22": 10.59893,
-            "23": 10.72222,
-            "24": 10.60745,
-            "25": 10.57467,
-            "26": 10.62999,
-            "27": 10.62364,
-            "28": 10.57229,
-            "29": 10.6073,
-            "30": 10.37766,
-            "31": 10.15362,
-            "32": 10.47609,
-            "33": 10.48062,
-            "34": 10.24216,
-            "35": 10.29035,
-            "36": 10.25955,
-            "37": 10.36145,
-            "38": 10.21396,
-            "39": 10.44502,
-            "40": 10.11492,
-            "41": 10.1605,
-            "42": 10.23468,
-            "43": 9.85032,
-            "44": 9.97764,
-            "45": 9.85681,
-            "46": 9.8307,
-            "47": 10.17976,
-            "48": 9.85811,
-            "49": 9.54378,
-            "50": 9.93469,
-            "51": 9.86793,
-            "52": 9.76274,
-            "53": 10.10895,
-            "54": 9.95538,
-            "55": 9.8756,
-            "56": 9.64751,
-            "57": 9.48989,
-            "58": 9.85502,
-            "59": 9.59457,
-            "60": 9.52968,
-            "61": 9.69589,
-            "62": 10.01676,
-            "63": 9.38778,
-            "64": 9.80211,
-            "65": 8.95119,
-            "66": 9.72857,
-            "67": 9.37577,
-            "68": 9.80463,
-            "69": 9.81,
-            "70": 9.7662,
-            "71": 9.63135,
-            "72": 9.5784,
-            "73": 9.52148,
-            "74": 8.94976,
-            "75": 9.43087,
-            "76": 9.08489,
-            "77": 10.089,
-            "78": 9.72754,
-            "79": 9.37612,
-            "80": 9.40849,
-            "81": 9.49766,
-            "82": 9.71298,
-            "83": 9.33332,
-            "84": 9.43928,
-            "85": 9.63373,
-            "86": 9.07038,
-            "87": 9.61245,
-            "88": 9.78304,
-            "89": 9.60878,
-            "90": 9.85164,
-            "91": 9.34542,
-            "92": 9.38281,
-            "93": 9.07319,
-            "94": 8.81684,
-            "95": 9.51809,
-            "96": 9.54033,
-            "97": 9.34061,
-            "98": 9.70134,
-            "99": 8.88786,
-            "100": 9.43285
+            "1": 10.90105,
+            "5": 10.89686,
+            "10": 10.88269,
+            "15": 10.83975,
+            "20": 10.74037,
+            "25": 10.57931,
+            "30": 10.37738,
+            "35": 10.29033,
+            "40": 10.1156,
+            "45": 9.85639,
+            "50": 9.93378,
+            "55": 9.87553,
+            "60": 9.528,
+            "65": 8.95041,
+            "70": 9.76634,
+            "75": 9.4304,
+            "80": 9.40916,
+            "85": 9.63365,
+            "90": 9.8516,
+            "95": 9.51874,
+            "100": 9.43257
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 1,
+        "step_interval": 5,
         "values": {
-            "1": 22727000.0,
-            "2": 22925616.0,
-            "3": 22596924.0,
-            "4": 23219540.0,
-            "5": 22714814.0,
-            "6": 23021786.0,
-            "7": 22771458.0,
-            "8": 22926012.0,
-            "9": 22842856.0,
-            "10": 22918360.0,
-            "11": 22500702.0,
-            "12": 22459866.0,
-            "13": 22916820.0,
-            "14": 22389026.0,
-            "15": 22821260.0,
-            "16": 22830812.0,
-            "17": 22818944.0,
-            "18": 22582240.0,
-            "19": 22618380.0,
-            "20": 22694352.0,
-            "21": 22740180.0,
-            "22": 22800024.0,
-            "23": 22540132.0,
-            "24": 22771492.0,
-            "25": 22818970.0,
-            "26": 22546852.0,
-            "27": 22468504.0,
-            "28": 22453052.0,
-            "29": 22529222.0,
-            "30": 22631432.0,
-            "31": 22955696.0,
-            "32": 22585238.0,
-            "33": 22557676.0,
-            "34": 22835412.0,
-            "35": 22788032.0,
-            "36": 22589678.0,
-            "37": 22497140.0,
-            "38": 22896132.0,
-            "39": 22801314.0,
-            "40": 22658064.0,
-            "41": 22659700.0,
-            "42": 22667816.0,
-            "43": 22976356.0,
-            "44": 22746708.0,
-            "45": 22675272.0,
-            "46": 22884382.0,
-            "47": 22634556.0,
-            "48": 22928080.0,
-            "49": 22727538.0,
-            "50": 22905284.0,
-            "51": 22791326.0,
-            "52": 22749392.0,
-            "53": 22925970.0,
-            "54": 22839434.0,
-            "55": 22518416.0,
-            "56": 22877660.0,
-            "57": 23113304.0,
-            "58": 22845008.0,
-            "59": 22715512.0,
-            "60": 22743058.0,
-            "61": 22723950.0,
-            "62": 22673248.0,
-            "63": 22846074.0,
-            "64": 22823228.0,
-            "65": 23060212.0,
-            "66": 22729902.0,
-            "67": 22907278.0,
-            "68": 22610092.0,
-            "69": 22584360.0,
-            "70": 22829348.0,
-            "71": 22749420.0,
-            "72": 22655446.0,
-            "73": 22740974.0,
-            "74": 23048296.0,
-            "75": 23053922.0,
-            "76": 22901008.0,
-            "77": 22272806.0,
-            "78": 22789370.0,
-            "79": 22743288.0,
-            "80": 22706236.0,
-            "81": 22890976.0,
-            "82": 22777092.0,
-            "83": 22839240.0,
-            "84": 23010352.0,
-            "85": 22712004.0,
-            "86": 23103740.0,
-            "87": 22734788.0,
-            "88": 22637620.0,
-            "89": 22499200.0,
-            "90": 22972420.0,
-            "91": 22766428.0,
-            "92": 22808890.0,
-            "93": 22659888.0,
-            "94": 22910970.0,
-            "95": 23048514.0,
-            "96": 22829470.0,
-            "97": 22608826.0,
-            "98": 22763528.0,
-            "99": 22905754.0,
-            "100": 23016268.0
+            "1": 22727086.0,
+            "5": 22714736.0,
+            "10": 22918912.0,
+            "15": 22821334.0,
+            "20": 22694008.0,
+            "25": 22818912.0,
+            "30": 22631266.0,
+            "35": 22787944.0,
+            "40": 22658196.0,
+            "45": 22675296.0,
+            "50": 22904808.0,
+            "55": 22518754.0,
+            "60": 22743504.0,
+            "65": 23060460.0,
+            "70": 22829772.0,
+            "75": 23054048.0,
+            "80": 22706308.0,
+            "85": 22712054.0,
+            "90": 22972128.0,
+            "95": 23048144.0,
+            "100": 23015824.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 1,
+        "step_interval": 5,
         "values": {
-            "1": 719180288.0,
-            "2": 719180288.0,
-            "3": 719180288.0,
-            "4": 719180288.0,
-            "5": 719180288.0,
-            "6": 719180288.0,
-            "7": 719180288.0,
-            "8": 719180288.0,
-            "9": 719180288.0,
-            "10": 719180288.0,
-            "11": 719180288.0,
-            "12": 719180288.0,
-            "13": 719180288.0,
-            "14": 719180288.0,
-            "15": 719180288.0,
-            "16": 719180288.0,
-            "17": 719180288.0,
-            "18": 719180288.0,
-            "19": 719180288.0,
-            "20": 719180288.0,
-            "21": 719180288.0,
-            "22": 719180288.0,
-            "23": 719180288.0,
-            "24": 719180288.0,
-            "25": 719180288.0,
-            "26": 719180288.0,
-            "27": 719180288.0,
-            "28": 719180288.0,
-            "29": 719180288.0,
-            "30": 719180288.0,
-            "31": 719180288.0,
-            "32": 719180288.0,
-            "33": 719180288.0,
-            "34": 719180288.0,
-            "35": 719180288.0,
-            "36": 719180288.0,
-            "37": 719180288.0,
-            "38": 719180288.0,
-            "39": 719180288.0,
-            "40": 719180288.0,
-            "41": 719180288.0,
-            "42": 719180288.0,
-            "43": 719180288.0,
-            "44": 719180288.0,
-            "45": 719180288.0,
-            "46": 719180288.0,
-            "47": 719180288.0,
-            "48": 719180288.0,
-            "49": 719180288.0,
-            "50": 719180288.0,
-            "51": 719180288.0,
-            "52": 719180288.0,
-            "53": 719180288.0,
-            "54": 719180288.0,
-            "55": 719180288.0,
-            "56": 719180288.0,
-            "57": 719180288.0,
-            "58": 719180288.0,
-            "59": 719180288.0,
-            "60": 719180288.0,
-            "61": 719180288.0,
-            "62": 719180288.0,
-            "63": 719180288.0,
-            "64": 719180288.0,
-            "65": 719180288.0,
-            "66": 719180288.0,
-            "67": 719180288.0,
-            "68": 719180288.0,
-            "69": 719180288.0,
-            "70": 719180288.0,
-            "71": 719180288.0,
-            "72": 719180288.0,
-            "73": 719180288.0,
-            "74": 719180288.0,
-            "75": 719180288.0,
-            "76": 719180288.0,
-            "77": 719180288.0,
-            "78": 719180288.0,
-            "79": 719180288.0,
-            "80": 719180288.0,
-            "81": 719180288.0,
-            "82": 719180288.0,
-            "83": 719180288.0,
-            "84": 719180288.0,
-            "85": 719180288.0,
-            "86": 719180288.0,
-            "87": 719180288.0,
-            "88": 719180288.0,
-            "89": 719180288.0,
-            "90": 719180288.0,
-            "91": 719180288.0,
-            "92": 719180288.0,
-            "93": 719180288.0,
-            "94": 719180288.0,
-            "95": 719180288.0,
-            "96": 719180288.0,
-            "97": 719180288.0,
-            "98": 719180288.0,
-            "99": 719180288.0,
-            "100": 719180288.0
+            "1": 717082624.0,
+            "5": 717082624.0,
+            "10": 717082624.0,
+            "15": 717082624.0,
+            "20": 717082624.0,
+            "25": 717082624.0,
+            "30": 717082624.0,
+            "35": 717082624.0,
+            "40": 717082624.0,
+            "45": 717082624.0,
+            "50": 717082624.0,
+            "55": 717082624.0,
+            "60": 717082624.0,
+            "65": 717082624.0,
+            "70": 717082624.0,
+            "75": 717082624.0,
+            "80": 717082624.0,
+            "85": 717082624.0,
+            "90": 717082624.0,
+            "95": 717082624.0,
+            "100": 717082624.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 1,
+        "step_interval": 5,
         "values": {
-            "1": 1910562816.0,
-            "2": 2195420160.0,
-            "3": 2195420160.0,
-            "4": 2195420160.0,
-            "5": 2195420160.0,
-            "6": 2195420160.0,
-            "7": 2195420160.0,
-            "8": 2195420160.0,
-            "9": 2195420160.0,
-            "10": 2195420160.0,
-            "11": 2195420160.0,
-            "12": 2195420160.0,
-            "13": 2195420160.0,
-            "14": 2195420160.0,
-            "15": 2195420160.0,
-            "16": 2195420160.0,
-            "17": 2195420160.0,
-            "18": 2195420160.0,
-            "19": 2195420160.0,
-            "20": 2195420160.0,
-            "21": 2195420160.0,
-            "22": 2195420160.0,
-            "23": 2195420160.0,
-            "24": 2195420160.0,
-            "25": 2195420160.0,
-            "26": 2195420160.0,
-            "27": 2195420160.0,
-            "28": 2195420160.0,
-            "29": 2195420160.0,
-            "30": 2195420160.0,
-            "31": 2195420160.0,
-            "32": 2195420160.0,
-            "33": 2195420160.0,
-            "34": 2195420160.0,
-            "35": 2195420160.0,
-            "36": 2195420160.0,
-            "37": 2195420160.0,
-            "38": 2195420160.0,
-            "39": 2195420160.0,
-            "40": 2195420160.0,
-            "41": 2195420160.0,
-            "42": 2195420160.0,
-            "43": 2195420160.0,
-            "44": 2195420160.0,
-            "45": 2195420160.0,
-            "46": 2195420160.0,
-            "47": 2195420160.0,
-            "48": 2195420160.0,
-            "49": 2195420160.0,
-            "50": 2195420160.0,
-            "51": 2195420160.0,
-            "52": 2195420160.0,
-            "53": 2195420160.0,
-            "54": 2195420160.0,
-            "55": 2195420160.0,
-            "56": 2195420160.0,
-            "57": 2195420160.0,
-            "58": 2195420160.0,
-            "59": 2195420160.0,
-            "60": 2195420160.0,
-            "61": 2195420160.0,
-            "62": 2195420160.0,
-            "63": 2195420160.0,
-            "64": 2195420160.0,
-            "65": 2195420160.0,
-            "66": 2195420160.0,
-            "67": 2195420160.0,
-            "68": 2195420160.0,
-            "69": 2195420160.0,
-            "70": 2195420160.0,
-            "71": 2195420160.0,
-            "72": 2195420160.0,
-            "73": 2195420160.0,
-            "74": 2195420160.0,
-            "75": 2195420160.0,
-            "76": 2195420160.0,
-            "77": 2195420160.0,
-            "78": 2195420160.0,
-            "79": 2195420160.0,
-            "80": 2195420160.0,
-            "81": 2195420160.0,
-            "82": 2195420160.0,
-            "83": 2195420160.0,
-            "84": 2195420160.0,
-            "85": 2195420160.0,
-            "86": 2195420160.0,
-            "87": 2195420160.0,
-            "88": 2195420160.0,
-            "89": 2195420160.0,
-            "90": 2195420160.0,
-            "91": 2195420160.0,
-            "92": 2195420160.0,
-            "93": 2195420160.0,
-            "94": 2195420160.0,
-            "95": 2195420160.0,
-            "96": 2195420160.0,
-            "97": 2195420160.0,
-            "98": 2195420160.0,
-            "99": 2195420160.0,
-            "100": 2195420160.0
+            "1": 2399852544.0,
+            "5": 2683661312.0,
+            "10": 2683661312.0,
+            "15": 2683661312.0,
+            "20": 2683661312.0,
+            "25": 2683661312.0,
+            "30": 2683661312.0,
+            "35": 2683661312.0,
+            "40": 2683661312.0,
+            "45": 2683661312.0,
+            "50": 2683661312.0,
+            "55": 2683661312.0,
+            "60": 2683661312.0,
+            "65": 2683661312.0,
+            "70": 2683661312.0,
+            "75": 2683661312.0,
+            "80": 2683661312.0,
+            "85": 2683661312.0,
+            "90": 2683661312.0,
+            "95": 2683661312.0,
+            "100": 2683661312.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 1,
+        "step_interval": 5,
         "values": {
-            "1": 10.1537,
-            "2": 0.18498,
-            "3": 0.16024,
-            "4": 0.16059,
-            "5": 0.16002,
-            "6": 0.16103,
-            "7": 0.1591,
-            "8": 0.15912,
-            "9": 0.15909,
-            "10": 0.1574,
-            "11": 0.15721,
-            "12": 0.15764,
-            "13": 0.16009,
-            "14": 0.16035,
-            "15": 0.15973,
-            "16": 0.15641,
-            "17": 0.15673,
-            "18": 0.1565,
-            "19": 0.15684,
-            "20": 0.15713,
-            "21": 0.15762,
-            "22": 0.15859,
-            "23": 0.15877,
-            "24": 0.15973,
-            "25": 0.15946,
-            "26": 0.15909,
-            "27": 0.15855,
-            "28": 0.15876,
-            "29": 0.15921,
-            "30": 0.16148,
-            "31": 0.15991,
-            "32": 0.1576,
-            "33": 0.15829,
-            "34": 0.15886,
-            "35": 0.15948,
-            "36": 0.15819,
-            "37": 0.15886,
-            "38": 0.15896,
-            "39": 0.16029,
-            "40": 0.15802,
-            "41": 0.16038,
-            "42": 0.15965,
-            "43": 0.15985,
-            "44": 0.15882,
-            "45": 0.16056,
-            "46": 0.1592,
-            "47": 0.20747,
-            "48": 0.16124,
-            "49": 0.16012,
-            "50": 0.15759,
-            "51": 0.16615,
-            "52": 0.15685,
-            "53": 0.15965,
-            "54": 0.15787,
-            "55": 0.15762,
-            "56": 0.15748,
-            "57": 0.15807,
-            "58": 0.15831,
-            "59": 0.15671,
-            "60": 0.15765,
-            "61": 0.15997,
-            "62": 0.15756,
-            "63": 0.15822,
-            "64": 0.15898,
-            "65": 0.15778,
-            "66": 0.15853,
-            "67": 0.15855,
-            "68": 0.15784,
-            "69": 0.15777,
-            "70": 0.15791,
-            "71": 0.15907,
-            "72": 0.15986,
-            "73": 0.15727,
-            "74": 0.15842,
-            "75": 0.15738,
-            "76": 0.15786,
-            "77": 0.15749,
-            "78": 0.15761,
-            "79": 0.15838,
-            "80": 0.15955,
-            "81": 0.15796,
-            "82": 0.15816,
-            "83": 0.15953,
-            "84": 0.15849,
-            "85": 0.15905,
-            "86": 0.15852,
-            "87": 0.15827,
-            "88": 0.15773,
-            "89": 0.15778,
-            "90": 0.15679,
-            "91": 0.1583,
-            "92": 0.15749,
-            "93": 0.15843,
-            "94": 0.15878,
-            "95": 0.15805,
-            "96": 0.1588,
-            "97": 0.15983,
-            "98": 0.16098,
-            "99": 0.16131,
-            "100": 0.15935
+            "1": 18.12809,
+            "5": 0.18057,
+            "10": 0.17706,
+            "15": 0.17756,
+            "20": 0.17731,
+            "25": 0.17728,
+            "30": 0.1768,
+            "35": 0.17506,
+            "40": 0.1753,
+            "45": 0.17703,
+            "50": 0.17481,
+            "55": 0.17327,
+            "60": 0.17184,
+            "65": 0.17221,
+            "70": 0.17198,
+            "75": 0.1719,
+            "80": 0.17197,
+            "85": 0.1726,
+            "90": 0.17157,
+            "95": 0.17135,
+            "100": 0.17214
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_lts.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_lts.json
deleted file mode 100644
index eee2d1c3d29..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_lts.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.81478,
-            "2": 10.82042,
-            "3": 10.81232,
-            "4": 10.78901,
-            "5": 10.85168,
-            "6": 10.87098,
-            "7": 10.83086,
-            "8": 10.83472,
-            "9": 10.83886,
-            "10": 10.78749,
-            "11": 10.87935,
-            "12": 10.86017,
-            "13": 10.86578,
-            "14": 10.8781,
-            "15": 10.79513,
-            "16": 10.7958,
-            "17": 10.76832,
-            "18": 10.8109,
-            "19": 10.79887,
-            "20": 10.69131,
-            "21": 10.6801,
-            "22": 10.52152,
-            "23": 10.7071,
-            "24": 10.57678,
-            "25": 10.52316,
-            "26": 10.59563,
-            "27": 10.58607,
-            "28": 10.56175,
-            "29": 10.56945,
-            "30": 10.34623,
-            "31": 10.10035,
-            "32": 10.45432,
-            "33": 10.44591,
-            "34": 10.2072,
-            "35": 10.26185,
-            "36": 10.21228,
-            "37": 10.32449,
-            "38": 10.16803,
-            "39": 10.38353,
-            "40": 10.07219,
-            "41": 10.13754,
-            "42": 10.19755,
-            "43": 9.81134,
-            "44": 9.93287,
-            "45": 9.80998,
-            "46": 9.80859,
-            "47": 10.12583,
-            "48": 9.82132,
-            "49": 9.50738,
-            "50": 9.88351
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1544.0,
-            "2": 1712.0,
-            "3": 1660.0,
-            "4": 1767.0,
-            "5": 1795.0,
-            "6": 1799.0,
-            "7": 1805.0,
-            "8": 1664.0,
-            "9": 1769.0,
-            "10": 1340.0,
-            "11": 1830.0,
-            "12": 1812.0,
-            "13": 1837.0,
-            "14": 1710.0,
-            "15": 1894.0,
-            "16": 1731.0,
-            "17": 1848.0,
-            "18": 1705.0,
-            "19": 1686.0,
-            "20": 1620.0,
-            "21": 1840.0,
-            "22": 1764.0,
-            "23": 1937.0,
-            "24": 1620.0,
-            "25": 1728.0,
-            "26": 1727.0,
-            "27": 1821.0,
-            "28": 2042.0,
-            "29": 2029.0,
-            "30": 1825.0,
-            "31": 1594.0,
-            "32": 1903.0,
-            "33": 2041.0,
-            "34": 1895.0,
-            "35": 1908.0,
-            "36": 1906.0,
-            "37": 2224.0,
-            "38": 2150.0,
-            "39": 2327.0,
-            "40": 2074.0,
-            "41": 2314.0,
-            "42": 2230.0,
-            "43": 1920.0,
-            "44": 2115.0,
-            "45": 1962.0,
-            "46": 2287.0,
-            "47": 2481.0,
-            "48": 2407.0,
-            "49": 2270.0,
-            "50": 2349.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 730321408.0,
-            "2": 730321408.0,
-            "3": 730321408.0,
-            "4": 730321408.0,
-            "5": 730321408.0,
-            "6": 730321408.0,
-            "7": 730321408.0,
-            "8": 730321408.0,
-            "9": 730321408.0,
-            "10": 730321408.0,
-            "11": 730321408.0,
-            "12": 730321408.0,
-            "13": 730321408.0,
-            "14": 730321408.0,
-            "15": 730321408.0,
-            "16": 730321408.0,
-            "17": 730321408.0,
-            "18": 730321408.0,
-            "19": 730321408.0,
-            "20": 730321408.0,
-            "21": 730321408.0,
-            "22": 730321408.0,
-            "23": 730321408.0,
-            "24": 730321408.0,
-            "25": 730321408.0,
-            "26": 730321408.0,
-            "27": 730321408.0,
-            "28": 730321408.0,
-            "29": 730321408.0,
-            "30": 730321408.0,
-            "31": 730321408.0,
-            "32": 730321408.0,
-            "33": 730321408.0,
-            "34": 730321408.0,
-            "35": 730321408.0,
-            "36": 730321408.0,
-            "37": 730321408.0,
-            "38": 730321408.0,
-            "39": 730321408.0,
-            "40": 730321408.0,
-            "41": 730321408.0,
-            "42": 730321408.0,
-            "43": 730321408.0,
-            "44": 730321408.0,
-            "45": 730321408.0,
-            "46": 730321408.0,
-            "47": 730321408.0,
-            "48": 730321408.0,
-            "49": 730321408.0,
-            "50": 730321408.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 3837453824.0,
-            "2": 4119165440.0,
-            "3": 4119165440.0,
-            "4": 4119165440.0,
-            "5": 4119165440.0,
-            "6": 4119165440.0,
-            "7": 4119165440.0,
-            "8": 4119165440.0,
-            "9": 4119165440.0,
-            "10": 4119165440.0,
-            "11": 4119165440.0,
-            "12": 4119165440.0,
-            "13": 4119165440.0,
-            "14": 4119165440.0,
-            "15": 4119165440.0,
-            "16": 4119165440.0,
-            "17": 4119165440.0,
-            "18": 4119165440.0,
-            "19": 4119165440.0,
-            "20": 4119165440.0,
-            "21": 4119165440.0,
-            "22": 4119165440.0,
-            "23": 4119165440.0,
-            "24": 4119165440.0,
-            "25": 4119165440.0,
-            "26": 4119165440.0,
-            "27": 4119165440.0,
-            "28": 4119165440.0,
-            "29": 4119165440.0,
-            "30": 4119165440.0,
-            "31": 4119165440.0,
-            "32": 4119165440.0,
-            "33": 4119165440.0,
-            "34": 4119165440.0,
-            "35": 4119165440.0,
-            "36": 4119165440.0,
-            "37": 4119165440.0,
-            "38": 4119165440.0,
-            "39": 4119165440.0,
-            "40": 4119165440.0,
-            "41": 4119165440.0,
-            "42": 4119165440.0,
-            "43": 4119165440.0,
-            "44": 4119165440.0,
-            "45": 4119165440.0,
-            "46": 4119165440.0,
-            "47": 4119165440.0,
-            "48": 4119165440.0,
-            "49": 4119165440.0,
-            "50": 4119165440.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 12.00614,
-            "2": 0.18804,
-            "3": 0.28065,
-            "4": 0.14064,
-            "5": 0.1388,
-            "6": 0.13824,
-            "7": 0.13862,
-            "8": 0.13843,
-            "9": 0.14064,
-            "10": 0.13568,
-            "11": 0.13353,
-            "12": 0.13327,
-            "13": 0.13418,
-            "14": 0.13368,
-            "15": 0.13399,
-            "16": 0.13326,
-            "17": 0.13409,
-            "18": 0.13281,
-            "19": 0.13303,
-            "20": 0.13395,
-            "21": 0.13357,
-            "22": 0.13388,
-            "23": 0.13403,
-            "24": 0.1333,
-            "25": 0.13242,
-            "26": 0.13302,
-            "27": 0.134,
-            "28": 0.13304,
-            "29": 0.13302,
-            "30": 0.13398,
-            "31": 0.13424,
-            "32": 0.13315,
-            "33": 0.13365,
-            "34": 0.13391,
-            "35": 0.13392,
-            "36": 0.13316,
-            "37": 0.13254,
-            "38": 0.13292,
-            "39": 0.1333,
-            "40": 0.13401,
-            "41": 0.13408,
-            "42": 0.13349,
-            "43": 0.13469,
-            "44": 0.13282,
-            "45": 0.1344,
-            "46": 0.13536,
-            "47": 0.13291,
-            "48": 0.13374,
-            "49": 0.13338,
-            "50": 0.13336
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_lts_dgx_a100.json
index 4bf73c8b005..8faccf70250 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_lts_dgx_a100.json
@@ -1,287 +1 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.81478,
-            "2": 10.82042,
-            "3": 10.81232,
-            "4": 10.78901,
-            "5": 10.85168,
-            "6": 10.87098,
-            "7": 10.83086,
-            "8": 10.83472,
-            "9": 10.83886,
-            "10": 10.78749,
-            "11": 10.87935,
-            "12": 10.86017,
-            "13": 10.86578,
-            "14": 10.8781,
-            "15": 10.79513,
-            "16": 10.7958,
-            "17": 10.76832,
-            "18": 10.8109,
-            "19": 10.79887,
-            "20": 10.69131,
-            "21": 10.6801,
-            "22": 10.52152,
-            "23": 10.7071,
-            "24": 10.57678,
-            "25": 10.52316,
-            "26": 10.59563,
-            "27": 10.58607,
-            "28": 10.56175,
-            "29": 10.56945,
-            "30": 10.34623,
-            "31": 10.10035,
-            "32": 10.45432,
-            "33": 10.44591,
-            "34": 10.2072,
-            "35": 10.26185,
-            "36": 10.21228,
-            "37": 10.32449,
-            "38": 10.16803,
-            "39": 10.38353,
-            "40": 10.07219,
-            "41": 10.13754,
-            "42": 10.19755,
-            "43": 9.81134,
-            "44": 9.93287,
-            "45": 9.80998,
-            "46": 9.80859,
-            "47": 10.12583,
-            "48": 9.82132,
-            "49": 9.50738,
-            "50": 9.88351
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1544.0,
-            "2": 1712.0,
-            "3": 1660.0,
-            "4": 1767.0,
-            "5": 1795.0,
-            "6": 1799.0,
-            "7": 1805.0,
-            "8": 1664.0,
-            "9": 1769.0,
-            "10": 1340.0,
-            "11": 1830.0,
-            "12": 1812.0,
-            "13": 1837.0,
-            "14": 1710.0,
-            "15": 1894.0,
-            "16": 1731.0,
-            "17": 1848.0,
-            "18": 1705.0,
-            "19": 1686.0,
-            "20": 1620.0,
-            "21": 1840.0,
-            "22": 1764.0,
-            "23": 1937.0,
-            "24": 1620.0,
-            "25": 1728.0,
-            "26": 1727.0,
-            "27": 1821.0,
-            "28": 2042.0,
-            "29": 2029.0,
-            "30": 1825.0,
-            "31": 1594.0,
-            "32": 1903.0,
-            "33": 2041.0,
-            "34": 1895.0,
-            "35": 1908.0,
-            "36": 1906.0,
-            "37": 2224.0,
-            "38": 2150.0,
-            "39": 2327.0,
-            "40": 2074.0,
-            "41": 2314.0,
-            "42": 2230.0,
-            "43": 1920.0,
-            "44": 2115.0,
-            "45": 1962.0,
-            "46": 2287.0,
-            "47": 2481.0,
-            "48": 2407.0,
-            "49": 2270.0,
-            "50": 2349.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 730321408.0,
-            "2": 730321408.0,
-            "3": 730321408.0,
-            "4": 730321408.0,
-            "5": 730321408.0,
-            "6": 730321408.0,
-            "7": 730321408.0,
-            "8": 730321408.0,
-            "9": 730321408.0,
-            "10": 730321408.0,
-            "11": 730321408.0,
-            "12": 730321408.0,
-            "13": 730321408.0,
-            "14": 730321408.0,
-            "15": 730321408.0,
-            "16": 730321408.0,
-            "17": 730321408.0,
-            "18": 730321408.0,
-            "19": 730321408.0,
-            "20": 730321408.0,
-            "21": 730321408.0,
-            "22": 730321408.0,
-            "23": 730321408.0,
-            "24": 730321408.0,
-            "25": 730321408.0,
-            "26": 730321408.0,
-            "27": 730321408.0,
-            "28": 730321408.0,
-            "29": 730321408.0,
-            "30": 730321408.0,
-            "31": 730321408.0,
-            "32": 730321408.0,
-            "33": 730321408.0,
-            "34": 730321408.0,
-            "35": 730321408.0,
-            "36": 730321408.0,
-            "37": 730321408.0,
-            "38": 730321408.0,
-            "39": 730321408.0,
-            "40": 730321408.0,
-            "41": 730321408.0,
-            "42": 730321408.0,
-            "43": 730321408.0,
-            "44": 730321408.0,
-            "45": 730321408.0,
-            "46": 730321408.0,
-            "47": 730321408.0,
-            "48": 730321408.0,
-            "49": 730321408.0,
-            "50": 730321408.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 3837453824.0,
-            "2": 4119165440.0,
-            "3": 4119165440.0,
-            "4": 4119165440.0,
-            "5": 4119165440.0,
-            "6": 4119165440.0,
-            "7": 4119165440.0,
-            "8": 4119165440.0,
-            "9": 4119165440.0,
-            "10": 4119165440.0,
-            "11": 4119165440.0,
-            "12": 4119165440.0,
-            "13": 4119165440.0,
-            "14": 4119165440.0,
-            "15": 4119165440.0,
-            "16": 4119165440.0,
-            "17": 4119165440.0,
-            "18": 4119165440.0,
-            "19": 4119165440.0,
-            "20": 4119165440.0,
-            "21": 4119165440.0,
-            "22": 4119165440.0,
-            "23": 4119165440.0,
-            "24": 4119165440.0,
-            "25": 4119165440.0,
-            "26": 4119165440.0,
-            "27": 4119165440.0,
-            "28": 4119165440.0,
-            "29": 4119165440.0,
-            "30": 4119165440.0,
-            "31": 4119165440.0,
-            "32": 4119165440.0,
-            "33": 4119165440.0,
-            "34": 4119165440.0,
-            "35": 4119165440.0,
-            "36": 4119165440.0,
-            "37": 4119165440.0,
-            "38": 4119165440.0,
-            "39": 4119165440.0,
-            "40": 4119165440.0,
-            "41": 4119165440.0,
-            "42": 4119165440.0,
-            "43": 4119165440.0,
-            "44": 4119165440.0,
-            "45": 4119165440.0,
-            "46": 4119165440.0,
-            "47": 4119165440.0,
-            "48": 4119165440.0,
-            "49": 4119165440.0,
-            "50": 4119165440.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 9.25479,
-            "2": 0.18004,
-            "3": 0.15444,
-            "4": 0.15284,
-            "5": 0.15391,
-            "6": 0.14333,
-            "7": 0.14244,
-            "8": 0.13997,
-            "9": 0.14112,
-            "10": 0.13863,
-            "11": 0.13707,
-            "12": 0.13575,
-            "13": 0.13558,
-            "14": 0.13535,
-            "15": 0.13556,
-            "16": 0.13648,
-            "17": 0.13495,
-            "18": 0.1343,
-            "19": 0.13442,
-            "20": 0.13441,
-            "21": 0.1344,
-            "22": 0.13478,
-            "23": 0.13473,
-            "24": 0.13476,
-            "25": 0.13536,
-            "26": 0.13345,
-            "27": 0.1342,
-            "28": 0.13421,
-            "29": 0.13479,
-            "30": 0.13378,
-            "31": 0.13418,
-            "32": 0.13411,
-            "33": 0.13351,
-            "34": 0.13374,
-            "35": 0.13406,
-            "36": 0.13396,
-            "37": 0.13435,
-            "38": 0.13356,
-            "39": 0.13367,
-            "40": 0.13361,
-            "41": 0.13454,
-            "42": 0.13463,
-            "43": 0.13524,
-            "44": 0.13356,
-            "45": 0.13403,
-            "46": 0.1347,
-            "47": 0.13379,
-            "48": 0.1343,
-            "49": 0.13391,
-            "50": 0.13371
-        }
-    }
-}
\ No newline at end of file
+{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.82005, "5": 10.85284, "10": 10.78455, "15": 10.79229, "20": 10.69211, "25": 10.52412, "30": 10.34552, "35": 10.26239, "40": 10.07241, "45": 9.81101, "50": 9.88422}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1559.0, "5": 1840.0, "10": 1380.0, "15": 1848.0, "20": 1601.0, "25": 1635.0, "30": 1936.0, "35": 1908.0, "40": 2100.0, "45": 2098.0, "50": 2333.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 732811264.0, "5": 732811264.0, "10": 732811264.0, "15": 732811264.0, "20": 732811264.0, "25": 732811264.0, "30": 732811264.0, "35": 732811264.0, "40": 732811264.0, "45": 732811264.0, "50": 732811264.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 3838895104.0, "5": 4122703872.0, "10": 4122703872.0, "15": 4122703872.0, "20": 4122703872.0, "25": 4122703872.0, "30": 4122703872.0, "35": 4122703872.0, "40": 4122703872.0, "45": 4122703872.0, "50": 4122703872.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 14.9121, "5": 0.1731, "10": 0.17256, "15": 0.1722, "20": 0.17555, "25": 0.17245, "30": 0.17067, "35": 0.17091, "40": 0.17274, "45": 0.17151, "50": 0.17108}}}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_lts.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_lts.json
deleted file mode 100644
index 1553ecdd445..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_lts.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.81478,
-            "2": 10.82042,
-            "3": 10.81232,
-            "4": 10.78901,
-            "5": 10.85168,
-            "6": 10.87098,
-            "7": 10.83086,
-            "8": 10.83472,
-            "9": 10.83886,
-            "10": 10.78749,
-            "11": 10.87935,
-            "12": 10.86017,
-            "13": 10.86578,
-            "14": 10.8781,
-            "15": 10.79514,
-            "16": 10.79576,
-            "17": 10.76832,
-            "18": 10.81092,
-            "19": 10.79889,
-            "20": 10.69131,
-            "21": 10.68008,
-            "22": 10.52146,
-            "23": 10.70713,
-            "24": 10.57677,
-            "25": 10.52315,
-            "26": 10.59564,
-            "27": 10.5861,
-            "28": 10.56176,
-            "29": 10.56942,
-            "30": 10.34624,
-            "31": 10.10032,
-            "32": 10.45433,
-            "33": 10.44592,
-            "34": 10.20725,
-            "35": 10.26186,
-            "36": 10.21223,
-            "37": 10.32453,
-            "38": 10.16801,
-            "39": 10.38354,
-            "40": 10.07222,
-            "41": 10.13752,
-            "42": 10.19756,
-            "43": 9.81134,
-            "44": 9.93285,
-            "45": 9.81001,
-            "46": 9.80858,
-            "47": 10.12582,
-            "48": 9.82129,
-            "49": 9.50739,
-            "50": 9.88351
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1544.0,
-            "2": 1712.0,
-            "3": 1660.0,
-            "4": 1767.0,
-            "5": 1795.0,
-            "6": 1799.0,
-            "7": 1805.0,
-            "8": 1664.0,
-            "9": 1769.0,
-            "10": 1340.0,
-            "11": 1830.0,
-            "12": 1812.0,
-            "13": 1837.0,
-            "14": 1710.0,
-            "15": 1839.0,
-            "16": 1776.0,
-            "17": 1750.0,
-            "18": 1612.0,
-            "19": 1764.0,
-            "20": 1649.0,
-            "21": 1854.0,
-            "22": 1750.0,
-            "23": 1909.0,
-            "24": 1616.0,
-            "25": 1654.0,
-            "26": 1755.0,
-            "27": 1860.0,
-            "28": 2042.0,
-            "29": 1953.0,
-            "30": 1905.0,
-            "31": 1684.0,
-            "32": 1831.0,
-            "33": 2101.0,
-            "34": 1769.0,
-            "35": 1915.0,
-            "36": 1885.0,
-            "37": 2324.0,
-            "38": 2169.0,
-            "39": 2300.0,
-            "40": 2069.0,
-            "41": 2353.0,
-            "42": 2236.0,
-            "43": 1978.0,
-            "44": 2022.0,
-            "45": 2103.0,
-            "46": 2292.0,
-            "47": 2413.0,
-            "48": 2305.0,
-            "49": 2218.0,
-            "50": 2321.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 523371008.0,
-            "2": 523371008.0,
-            "3": 523371008.0,
-            "4": 523371008.0,
-            "5": 523371008.0,
-            "6": 523371008.0,
-            "7": 523371008.0,
-            "8": 523371008.0,
-            "9": 523371008.0,
-            "10": 523371008.0,
-            "11": 523371008.0,
-            "12": 523371008.0,
-            "13": 523371008.0,
-            "14": 523371008.0,
-            "15": 523371008.0,
-            "16": 523371008.0,
-            "17": 523371008.0,
-            "18": 523371008.0,
-            "19": 523371008.0,
-            "20": 523371008.0,
-            "21": 523371008.0,
-            "22": 523371008.0,
-            "23": 523371008.0,
-            "24": 523371008.0,
-            "25": 523371008.0,
-            "26": 523371008.0,
-            "27": 523371008.0,
-            "28": 523371008.0,
-            "29": 523371008.0,
-            "30": 523371008.0,
-            "31": 523371008.0,
-            "32": 523371008.0,
-            "33": 523371008.0,
-            "34": 523371008.0,
-            "35": 523371008.0,
-            "36": 523371008.0,
-            "37": 523371008.0,
-            "38": 523371008.0,
-            "39": 523371008.0,
-            "40": 523371008.0,
-            "41": 523371008.0,
-            "42": 523371008.0,
-            "43": 523371008.0,
-            "44": 523371008.0,
-            "45": 523371008.0,
-            "46": 523371008.0,
-            "47": 523371008.0,
-            "48": 523371008.0,
-            "49": 523371008.0,
-            "50": 523371008.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 3768846848.0,
-            "2": 3913263616.0,
-            "3": 3913263616.0,
-            "4": 3913263616.0,
-            "5": 3913263616.0,
-            "6": 3913263616.0,
-            "7": 3913263616.0,
-            "8": 3913263616.0,
-            "9": 3913263616.0,
-            "10": 3913263616.0,
-            "11": 3913263616.0,
-            "12": 3913263616.0,
-            "13": 3913263616.0,
-            "14": 3913263616.0,
-            "15": 3913263616.0,
-            "16": 3913263616.0,
-            "17": 3913263616.0,
-            "18": 3913263616.0,
-            "19": 3913263616.0,
-            "20": 3913263616.0,
-            "21": 3913263616.0,
-            "22": 3913263616.0,
-            "23": 3913263616.0,
-            "24": 3913263616.0,
-            "25": 3913263616.0,
-            "26": 3913263616.0,
-            "27": 3913263616.0,
-            "28": 3913263616.0,
-            "29": 3913263616.0,
-            "30": 3913263616.0,
-            "31": 3913263616.0,
-            "32": 3913263616.0,
-            "33": 3913263616.0,
-            "34": 3913263616.0,
-            "35": 3913263616.0,
-            "36": 3913263616.0,
-            "37": 3913263616.0,
-            "38": 3913263616.0,
-            "39": 3913263616.0,
-            "40": 3913263616.0,
-            "41": 3913263616.0,
-            "42": 3913263616.0,
-            "43": 3913263616.0,
-            "44": 3913263616.0,
-            "45": 3913263616.0,
-            "46": 3913263616.0,
-            "47": 3913263616.0,
-            "48": 3913263616.0,
-            "49": 3913263616.0,
-            "50": 3913263616.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 11.01582,
-            "2": 0.17662,
-            "3": 0.15135,
-            "4": 0.14999,
-            "5": 0.14829,
-            "6": 0.14621,
-            "7": 0.14816,
-            "8": 0.14731,
-            "9": 0.14766,
-            "10": 0.14515,
-            "11": 0.30054,
-            "12": 0.14534,
-            "13": 0.14429,
-            "14": 0.14592,
-            "15": 0.14632,
-            "16": 0.14618,
-            "17": 0.14537,
-            "18": 0.14666,
-            "19": 0.14384,
-            "20": 0.14453,
-            "21": 0.30388,
-            "22": 0.14466,
-            "23": 0.14511,
-            "24": 0.14435,
-            "25": 0.14401,
-            "26": 0.14328,
-            "27": 0.14376,
-            "28": 0.14434,
-            "29": 0.14386,
-            "30": 0.14418,
-            "31": 0.30313,
-            "32": 0.14394,
-            "33": 0.14406,
-            "34": 0.14377,
-            "35": 0.14417,
-            "36": 0.14415,
-            "37": 0.14393,
-            "38": 0.14577,
-            "39": 0.14494,
-            "40": 0.14489,
-            "41": 0.30235,
-            "42": 0.14494,
-            "43": 0.1472,
-            "44": 0.14577,
-            "45": 0.14497,
-            "46": 0.14619,
-            "47": 0.14474,
-            "48": 0.14551,
-            "49": 0.14554,
-            "50": 0.14507
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_lts_dgx_a100.json
index 35ef87a5085..dc393d0dffc 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_lts_dgx_a100.json
@@ -1,287 +1 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.81478,
-            "2": 10.82042,
-            "3": 10.81232,
-            "4": 10.78901,
-            "5": 10.85168,
-            "6": 10.87098,
-            "7": 10.83086,
-            "8": 10.83472,
-            "9": 10.83886,
-            "10": 10.78749,
-            "11": 10.87935,
-            "12": 10.86017,
-            "13": 10.86578,
-            "14": 10.8781,
-            "15": 10.79514,
-            "16": 10.79576,
-            "17": 10.76832,
-            "18": 10.81092,
-            "19": 10.79889,
-            "20": 10.69131,
-            "21": 10.68008,
-            "22": 10.52146,
-            "23": 10.70713,
-            "24": 10.57677,
-            "25": 10.52315,
-            "26": 10.59564,
-            "27": 10.5861,
-            "28": 10.56176,
-            "29": 10.56942,
-            "30": 10.34624,
-            "31": 10.10032,
-            "32": 10.45433,
-            "33": 10.44592,
-            "34": 10.20725,
-            "35": 10.26186,
-            "36": 10.21223,
-            "37": 10.32453,
-            "38": 10.16801,
-            "39": 10.38354,
-            "40": 10.07222,
-            "41": 10.13752,
-            "42": 10.19756,
-            "43": 9.81134,
-            "44": 9.93285,
-            "45": 9.81001,
-            "46": 9.80858,
-            "47": 10.12582,
-            "48": 9.82129,
-            "49": 9.50739,
-            "50": 9.88351
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1544.0,
-            "2": 1712.0,
-            "3": 1660.0,
-            "4": 1767.0,
-            "5": 1795.0,
-            "6": 1799.0,
-            "7": 1805.0,
-            "8": 1664.0,
-            "9": 1769.0,
-            "10": 1340.0,
-            "11": 1830.0,
-            "12": 1812.0,
-            "13": 1837.0,
-            "14": 1710.0,
-            "15": 1839.0,
-            "16": 1776.0,
-            "17": 1750.0,
-            "18": 1612.0,
-            "19": 1764.0,
-            "20": 1649.0,
-            "21": 1854.0,
-            "22": 1750.0,
-            "23": 1909.0,
-            "24": 1616.0,
-            "25": 1654.0,
-            "26": 1755.0,
-            "27": 1860.0,
-            "28": 2042.0,
-            "29": 1953.0,
-            "30": 1905.0,
-            "31": 1684.0,
-            "32": 1831.0,
-            "33": 2101.0,
-            "34": 1769.0,
-            "35": 1915.0,
-            "36": 1885.0,
-            "37": 2324.0,
-            "38": 2169.0,
-            "39": 2300.0,
-            "40": 2069.0,
-            "41": 2353.0,
-            "42": 2236.0,
-            "43": 1978.0,
-            "44": 2022.0,
-            "45": 2103.0,
-            "46": 2292.0,
-            "47": 2413.0,
-            "48": 2305.0,
-            "49": 2218.0,
-            "50": 2321.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 522452480.0,
-            "2": 522452480.0,
-            "3": 522452480.0,
-            "4": 522452480.0,
-            "5": 522452480.0,
-            "6": 522452480.0,
-            "7": 522452480.0,
-            "8": 522452480.0,
-            "9": 523501056.0,
-            "10": 522452480.0,
-            "11": 522452480.0,
-            "12": 523501056.0,
-            "13": 522452480.0,
-            "14": 522452480.0,
-            "15": 522452480.0,
-            "16": 522452480.0,
-            "17": 522452480.0,
-            "18": 522452480.0,
-            "19": 523501056.0,
-            "20": 523501056.0,
-            "21": 522452480.0,
-            "22": 522452480.0,
-            "23": 522452480.0,
-            "24": 523501056.0,
-            "25": 522452480.0,
-            "26": 522452480.0,
-            "27": 522452480.0,
-            "28": 522452480.0,
-            "29": 523501056.0,
-            "30": 522452480.0,
-            "31": 522452480.0,
-            "32": 522452480.0,
-            "33": 522452480.0,
-            "34": 522452480.0,
-            "35": 522452480.0,
-            "36": 522452480.0,
-            "37": 522452480.0,
-            "38": 522452480.0,
-            "39": 522452480.0,
-            "40": 522452480.0,
-            "41": 523371008.0,
-            "42": 522452480.0,
-            "43": 522452480.0,
-            "44": 522452480.0,
-            "45": 522452480.0,
-            "46": 523501056.0,
-            "47": 522452480.0,
-            "48": 522452480.0,
-            "49": 523501056.0,
-            "50": 522452480.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 3768846848.0,
-            "2": 3913263616.0,
-            "3": 3913263616.0,
-            "4": 3913263616.0,
-            "5": 3913263616.0,
-            "6": 3913263616.0,
-            "7": 3913263616.0,
-            "8": 3913263616.0,
-            "9": 3913263616.0,
-            "10": 3913263616.0,
-            "11": 3913263616.0,
-            "12": 3913263616.0,
-            "13": 3913263616.0,
-            "14": 3913263616.0,
-            "15": 3913263616.0,
-            "16": 3913263616.0,
-            "17": 3913263616.0,
-            "18": 3913263616.0,
-            "19": 3913263616.0,
-            "20": 3913263616.0,
-            "21": 3913263616.0,
-            "22": 3913263616.0,
-            "23": 3913263616.0,
-            "24": 3913263616.0,
-            "25": 3913263616.0,
-            "26": 3913263616.0,
-            "27": 3913263616.0,
-            "28": 3913263616.0,
-            "29": 3913263616.0,
-            "30": 3913263616.0,
-            "31": 3913263616.0,
-            "32": 3913263616.0,
-            "33": 3913263616.0,
-            "34": 3913263616.0,
-            "35": 3913263616.0,
-            "36": 3913263616.0,
-            "37": 3913263616.0,
-            "38": 3913263616.0,
-            "39": 3913263616.0,
-            "40": 3913263616.0,
-            "41": 3913263616.0,
-            "42": 3913263616.0,
-            "43": 3913263616.0,
-            "44": 3913263616.0,
-            "45": 3913263616.0,
-            "46": 3913263616.0,
-            "47": 3913263616.0,
-            "48": 3913263616.0,
-            "49": 3913263616.0,
-            "50": 3913263616.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.5204,
-            "2": 0.1877,
-            "3": 0.15851,
-            "4": 0.15284,
-            "5": 0.15092,
-            "6": 0.15084,
-            "7": 0.14786,
-            "8": 0.14787,
-            "9": 0.14698,
-            "10": 0.15104,
-            "11": 0.29695,
-            "12": 0.14795,
-            "13": 0.14771,
-            "14": 0.14662,
-            "15": 0.14409,
-            "16": 0.14378,
-            "17": 0.14431,
-            "18": 0.14329,
-            "19": 0.14334,
-            "20": 0.14441,
-            "21": 0.28011,
-            "22": 0.14378,
-            "23": 0.14643,
-            "24": 0.14572,
-            "25": 0.14331,
-            "26": 0.14307,
-            "27": 0.14541,
-            "28": 0.14512,
-            "29": 0.14536,
-            "30": 0.14358,
-            "31": 0.28944,
-            "32": 0.14533,
-            "33": 0.14477,
-            "34": 0.14423,
-            "35": 0.14395,
-            "36": 0.14486,
-            "37": 0.14319,
-            "38": 0.14455,
-            "39": 0.14454,
-            "40": 0.14537,
-            "41": 0.29312,
-            "42": 0.14458,
-            "43": 0.14749,
-            "44": 0.14448,
-            "45": 0.14501,
-            "46": 0.14588,
-            "47": 0.14249,
-            "48": 0.14564,
-            "49": 0.14388,
-            "50": 0.14222
-        }
-    }
-}
\ No newline at end of file
+{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.82005, "5": 10.85284, "10": 10.78455, "15": 10.7923, "20": 10.69213, "25": 10.5241, "30": 10.34556, "35": 10.26241, "40": 10.07237, "45": 9.811, "50": 9.88419}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1559.0, "5": 1840.0, "10": 1380.0, "15": 1850.0, "20": 1699.0, "25": 1614.0, "30": 1905.0, "35": 1933.0, "40": 2169.0, "45": 2101.0, "50": 2421.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 523004928.0, "5": 523004928.0, "10": 523004928.0, "15": 523004928.0, "20": 523004928.0, "25": 523004928.0, "30": 523004928.0, "35": 523004928.0, "40": 523004928.0, "45": 523004928.0, "50": 523004928.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 3768873984.0, "5": 3912766464.0, "10": 3912766464.0, "15": 3912766464.0, "20": 3912766464.0, "25": 3912766464.0, "30": 3912766464.0, "35": 3912766464.0, "40": 3912766464.0, "45": 3912766464.0, "50": 3912766464.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 18.88705, "5": 0.16956, "10": 0.17448, "15": 0.16853, "20": 0.1715, "25": 0.17071, "30": 0.17343, "35": 0.17213, "40": 0.1719, "45": 0.17357, "50": 0.17228}}}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_lts.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_lts.json
deleted file mode 100644
index 59dee721816..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_lts.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.81548,
-            "2": 10.8208,
-            "3": 10.81271,
-            "4": 10.78877,
-            "5": 10.85169,
-            "6": 10.8704,
-            "7": 10.831,
-            "8": 10.83427,
-            "9": 10.83995,
-            "10": 10.78684,
-            "11": 10.88021,
-            "12": 10.85971,
-            "13": 10.86589,
-            "14": 10.87818,
-            "15": 10.79463,
-            "16": 10.79607,
-            "17": 10.7688,
-            "18": 10.81045,
-            "19": 10.79836,
-            "20": 10.69045,
-            "21": 10.67932,
-            "22": 10.52101,
-            "23": 10.70743,
-            "24": 10.57665,
-            "25": 10.52275,
-            "26": 10.595,
-            "27": 10.5855,
-            "28": 10.56131,
-            "29": 10.56894,
-            "30": 10.34527,
-            "31": 10.10019,
-            "32": 10.45229,
-            "33": 10.44356,
-            "34": 10.20397,
-            "35": 10.25844,
-            "36": 10.2103,
-            "37": 10.32252,
-            "38": 10.1661,
-            "39": 10.38156,
-            "40": 10.07025,
-            "41": 10.13542,
-            "42": 10.19416,
-            "43": 9.80626,
-            "44": 9.92627,
-            "45": 9.8024,
-            "46": 9.79983,
-            "47": 10.11662,
-            "48": 9.81307,
-            "49": 9.50044,
-            "50": 9.87631,
-            "51": 9.82781,
-            "52": 9.71723,
-            "53": 10.03979,
-            "54": 9.92177,
-            "55": 9.85515,
-            "56": 9.59253,
-            "57": 9.44144,
-            "58": 9.79602,
-            "59": 9.55567,
-            "60": 9.4664,
-            "61": 9.6666,
-            "62": 9.95363,
-            "63": 9.33626,
-            "64": 9.74152,
-            "65": 8.9178,
-            "66": 9.66632,
-            "67": 9.34424,
-            "68": 9.75273,
-            "69": 9.75727,
-            "70": 9.69242,
-            "71": 9.5868,
-            "72": 9.55099,
-            "73": 9.46289,
-            "74": 8.90671,
-            "75": 9.37793,
-            "76": 9.04952,
-            "77": 10.0301,
-            "78": 9.69192,
-            "79": 9.33464,
-            "80": 9.3667,
-            "81": 9.44418,
-            "82": 9.66164,
-            "83": 9.27209,
-            "84": 9.38066,
-            "85": 9.57618,
-            "86": 9.0424,
-            "87": 9.55703,
-            "88": 9.70385,
-            "89": 9.56619,
-            "90": 9.77295,
-            "91": 9.29396,
-            "92": 9.31912,
-            "93": 9.03406,
-            "94": 8.78526,
-            "95": 9.46938,
-            "96": 9.47497,
-            "97": 9.25688,
-            "98": 9.61835,
-            "99": 8.83233,
-            "100": 9.34557
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1518.0,
-            "2": 1697.0,
-            "3": 1668.0,
-            "4": 1721.0,
-            "5": 1897.0,
-            "6": 1823.0,
-            "7": 1719.0,
-            "8": 1637.0,
-            "9": 1742.0,
-            "10": 1358.0,
-            "11": 1882.0,
-            "12": 1781.0,
-            "13": 1847.0,
-            "14": 1753.0,
-            "15": 1883.0,
-            "16": 1755.0,
-            "17": 1752.0,
-            "18": 1683.0,
-            "19": 1817.0,
-            "20": 1663.0,
-            "21": 1795.0,
-            "22": 1698.0,
-            "23": 1996.0,
-            "24": 1620.0,
-            "25": 1658.0,
-            "26": 1727.0,
-            "27": 1781.0,
-            "28": 2085.0,
-            "29": 1952.0,
-            "30": 1821.0,
-            "31": 1646.0,
-            "32": 1879.0,
-            "33": 2034.0,
-            "34": 1861.0,
-            "35": 1834.0,
-            "36": 1913.0,
-            "37": 2333.0,
-            "38": 2070.0,
-            "39": 2245.0,
-            "40": 2126.0,
-            "41": 2311.0,
-            "42": 2213.0,
-            "43": 1907.0,
-            "44": 1951.0,
-            "45": 2001.0,
-            "46": 2218.0,
-            "47": 2533.0,
-            "48": 2436.0,
-            "49": 2188.0,
-            "50": 2342.0,
-            "51": 2562.0,
-            "52": 2529.0,
-            "53": 3031.0,
-            "54": 2744.0,
-            "55": 2264.0,
-            "56": 2794.0,
-            "57": 2183.0,
-            "58": 2882.0,
-            "59": 2769.0,
-            "60": 2399.0,
-            "61": 3031.0,
-            "62": 2706.0,
-            "63": 2388.0,
-            "64": 3046.0,
-            "65": 2597.0,
-            "66": 3092.0,
-            "67": 2730.0,
-            "68": 2858.0,
-            "69": 2982.0,
-            "70": 3202.0,
-            "71": 2964.0,
-            "72": 2450.0,
-            "73": 2817.0,
-            "74": 1834.0,
-            "75": 2609.0,
-            "76": 3000.0,
-            "77": 3180.0,
-            "78": 3113.0,
-            "79": 3145.0,
-            "80": 3258.0,
-            "81": 3645.0,
-            "82": 3075.0,
-            "83": 2812.0,
-            "84": 3295.0,
-            "85": 3368.0,
-            "86": 2730.0,
-            "87": 3717.0,
-            "88": 3056.0,
-            "89": 3252.0,
-            "90": 2954.0,
-            "91": 2798.0,
-            "92": 3089.0,
-            "93": 2742.0,
-            "94": 3420.0,
-            "95": 3225.0,
-            "96": 3362.0,
-            "97": 3118.0,
-            "98": 3671.0,
-            "99": 3341.0,
-            "100": 3428.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 730321408.0,
-            "2": 730321408.0,
-            "3": 730321408.0,
-            "4": 730321408.0,
-            "5": 730321408.0,
-            "6": 730321408.0,
-            "7": 730321408.0,
-            "8": 730321408.0,
-            "9": 730321408.0,
-            "10": 730321408.0,
-            "11": 730321408.0,
-            "12": 730321408.0,
-            "13": 730321408.0,
-            "14": 730321408.0,
-            "15": 730321408.0,
-            "16": 730321408.0,
-            "17": 730321408.0,
-            "18": 730321408.0,
-            "19": 730321408.0,
-            "20": 730321408.0,
-            "21": 730321408.0,
-            "22": 730321408.0,
-            "23": 730321408.0,
-            "24": 730321408.0,
-            "25": 730321408.0,
-            "26": 730321408.0,
-            "27": 730321408.0,
-            "28": 730321408.0,
-            "29": 730321408.0,
-            "30": 730321408.0,
-            "31": 730321408.0,
-            "32": 730321408.0,
-            "33": 730321408.0,
-            "34": 730321408.0,
-            "35": 730321408.0,
-            "36": 730321408.0,
-            "37": 730321408.0,
-            "38": 730321408.0,
-            "39": 730321408.0,
-            "40": 730321408.0,
-            "41": 730321408.0,
-            "42": 730321408.0,
-            "43": 730321408.0,
-            "44": 730321408.0,
-            "45": 730321408.0,
-            "46": 730321408.0,
-            "47": 730321408.0,
-            "48": 730321408.0,
-            "49": 730321408.0,
-            "50": 730321408.0,
-            "51": 730321408.0,
-            "52": 730321408.0,
-            "53": 730321408.0,
-            "54": 730321408.0,
-            "55": 730321408.0,
-            "56": 730321408.0,
-            "57": 730321408.0,
-            "58": 730321408.0,
-            "59": 730321408.0,
-            "60": 730321408.0,
-            "61": 730321408.0,
-            "62": 730321408.0,
-            "63": 730321408.0,
-            "64": 730321408.0,
-            "65": 730321408.0,
-            "66": 730321408.0,
-            "67": 730321408.0,
-            "68": 730321408.0,
-            "69": 730321408.0,
-            "70": 730321408.0,
-            "71": 730321408.0,
-            "72": 730321408.0,
-            "73": 730321408.0,
-            "74": 730321408.0,
-            "75": 730321408.0,
-            "76": 730321408.0,
-            "77": 730321408.0,
-            "78": 730321408.0,
-            "79": 730321408.0,
-            "80": 730321408.0,
-            "81": 730321408.0,
-            "82": 730321408.0,
-            "83": 730321408.0,
-            "84": 730321408.0,
-            "85": 730321408.0,
-            "86": 730321408.0,
-            "87": 730321408.0,
-            "88": 730321408.0,
-            "89": 730321408.0,
-            "90": 730321408.0,
-            "91": 730321408.0,
-            "92": 730321408.0,
-            "93": 730321408.0,
-            "94": 730321408.0,
-            "95": 730321408.0,
-            "96": 730321408.0,
-            "97": 730321408.0,
-            "98": 730321408.0,
-            "99": 730321408.0,
-            "100": 730321408.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 2366437376.0,
-            "2": 2648148992.0,
-            "3": 2648148992.0,
-            "4": 2648148992.0,
-            "5": 2648148992.0,
-            "6": 2648148992.0,
-            "7": 2648148992.0,
-            "8": 2648148992.0,
-            "9": 2648148992.0,
-            "10": 2648148992.0,
-            "11": 2648148992.0,
-            "12": 2648148992.0,
-            "13": 2648148992.0,
-            "14": 2648148992.0,
-            "15": 2648148992.0,
-            "16": 2648148992.0,
-            "17": 2648148992.0,
-            "18": 2648148992.0,
-            "19": 2648148992.0,
-            "20": 2648148992.0,
-            "21": 2648148992.0,
-            "22": 2648148992.0,
-            "23": 2648148992.0,
-            "24": 2648148992.0,
-            "25": 2648148992.0,
-            "26": 2648148992.0,
-            "27": 2648148992.0,
-            "28": 2648148992.0,
-            "29": 2648148992.0,
-            "30": 2648148992.0,
-            "31": 2648148992.0,
-            "32": 2648148992.0,
-            "33": 2648148992.0,
-            "34": 2648148992.0,
-            "35": 2648148992.0,
-            "36": 2648148992.0,
-            "37": 2648148992.0,
-            "38": 2648148992.0,
-            "39": 2648148992.0,
-            "40": 2648148992.0,
-            "41": 2648148992.0,
-            "42": 2648148992.0,
-            "43": 2648148992.0,
-            "44": 2648148992.0,
-            "45": 2648148992.0,
-            "46": 2648148992.0,
-            "47": 2648148992.0,
-            "48": 2648148992.0,
-            "49": 2648148992.0,
-            "50": 2648148992.0,
-            "51": 2648148992.0,
-            "52": 2648148992.0,
-            "53": 2648148992.0,
-            "54": 2648148992.0,
-            "55": 2648148992.0,
-            "56": 2648148992.0,
-            "57": 2648148992.0,
-            "58": 2648148992.0,
-            "59": 2648148992.0,
-            "60": 2648148992.0,
-            "61": 2648148992.0,
-            "62": 2648148992.0,
-            "63": 2648148992.0,
-            "64": 2648148992.0,
-            "65": 2648148992.0,
-            "66": 2648148992.0,
-            "67": 2648148992.0,
-            "68": 2648148992.0,
-            "69": 2648148992.0,
-            "70": 2648148992.0,
-            "71": 2648148992.0,
-            "72": 2648148992.0,
-            "73": 2648148992.0,
-            "74": 2648148992.0,
-            "75": 2648148992.0,
-            "76": 2648148992.0,
-            "77": 2648148992.0,
-            "78": 2648148992.0,
-            "79": 2648148992.0,
-            "80": 2648148992.0,
-            "81": 2648148992.0,
-            "82": 2648148992.0,
-            "83": 2648148992.0,
-            "84": 2648148992.0,
-            "85": 2648148992.0,
-            "86": 2648148992.0,
-            "87": 2648148992.0,
-            "88": 2648148992.0,
-            "89": 2648148992.0,
-            "90": 2648148992.0,
-            "91": 2648148992.0,
-            "92": 2648148992.0,
-            "93": 2648148992.0,
-            "94": 2648148992.0,
-            "95": 2648148992.0,
-            "96": 2648148992.0,
-            "97": 2648148992.0,
-            "98": 2648148992.0,
-            "99": 2648148992.0,
-            "100": 2648148992.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 11.77273,
-            "2": 0.15704,
-            "3": 0.19484,
-            "4": 0.13176,
-            "5": 0.13019,
-            "6": 0.12976,
-            "7": 0.1302,
-            "8": 0.12925,
-            "9": 0.12988,
-            "10": 0.13099,
-            "11": 0.13015,
-            "12": 0.1297,
-            "13": 0.12988,
-            "14": 0.13024,
-            "15": 0.12985,
-            "16": 0.12971,
-            "17": 0.12961,
-            "18": 0.1302,
-            "19": 0.12963,
-            "20": 0.12994,
-            "21": 0.1299,
-            "22": 0.13037,
-            "23": 0.13043,
-            "24": 0.12989,
-            "25": 0.13018,
-            "26": 0.13019,
-            "27": 0.12985,
-            "28": 0.13014,
-            "29": 0.13068,
-            "30": 0.13099,
-            "31": 0.13197,
-            "32": 0.13151,
-            "33": 0.13168,
-            "34": 0.1303,
-            "35": 0.13073,
-            "36": 0.13088,
-            "37": 0.1307,
-            "38": 0.13091,
-            "39": 0.13292,
-            "40": 0.13172,
-            "41": 0.134,
-            "42": 0.13157,
-            "43": 0.13272,
-            "44": 0.13144,
-            "45": 0.13142,
-            "46": 0.133,
-            "47": 0.13069,
-            "48": 0.13192,
-            "49": 0.13124,
-            "50": 0.13106,
-            "51": 0.13227,
-            "52": 0.13218,
-            "53": 0.13063,
-            "54": 0.13182,
-            "55": 0.13138,
-            "56": 0.13226,
-            "57": 0.13156,
-            "58": 0.13127,
-            "59": 0.13198,
-            "60": 0.13133,
-            "61": 0.13107,
-            "62": 0.13121,
-            "63": 0.13141,
-            "64": 0.13149,
-            "65": 0.13192,
-            "66": 0.13188,
-            "67": 0.13167,
-            "68": 0.1319,
-            "69": 0.1318,
-            "70": 0.13204,
-            "71": 0.13077,
-            "72": 0.13205,
-            "73": 0.13251,
-            "74": 0.13212,
-            "75": 0.13195,
-            "76": 0.13016,
-            "77": 0.12994,
-            "78": 0.13184,
-            "79": 0.13131,
-            "80": 0.13153,
-            "81": 0.13219,
-            "82": 0.13088,
-            "83": 0.13074,
-            "84": 0.13035,
-            "85": 0.12939,
-            "86": 0.12953,
-            "87": 0.1294,
-            "88": 0.12951,
-            "89": 0.13007,
-            "90": 0.12893,
-            "91": 0.13881,
-            "92": 0.1299,
-            "93": 0.12993,
-            "94": 0.13022,
-            "95": 0.1304,
-            "96": 0.12965,
-            "97": 0.13013,
-            "98": 0.1306,
-            "99": 0.12958,
-            "100": 0.13011
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_lts_dgx_a100.json
index 6081b627567..855eefe74c4 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_lts_dgx_a100.json
@@ -1,537 +1 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.81548,
-            "2": 10.8208,
-            "3": 10.81271,
-            "4": 10.78877,
-            "5": 10.85169,
-            "6": 10.8704,
-            "7": 10.831,
-            "8": 10.83427,
-            "9": 10.83995,
-            "10": 10.78684,
-            "11": 10.88021,
-            "12": 10.85971,
-            "13": 10.86589,
-            "14": 10.87818,
-            "15": 10.79463,
-            "16": 10.79607,
-            "17": 10.7688,
-            "18": 10.81045,
-            "19": 10.79836,
-            "20": 10.69045,
-            "21": 10.67932,
-            "22": 10.52101,
-            "23": 10.70743,
-            "24": 10.57665,
-            "25": 10.52275,
-            "26": 10.595,
-            "27": 10.5855,
-            "28": 10.56131,
-            "29": 10.56894,
-            "30": 10.34527,
-            "31": 10.10019,
-            "32": 10.45229,
-            "33": 10.44356,
-            "34": 10.20397,
-            "35": 10.25844,
-            "36": 10.2103,
-            "37": 10.32252,
-            "38": 10.1661,
-            "39": 10.38156,
-            "40": 10.07025,
-            "41": 10.13542,
-            "42": 10.19416,
-            "43": 9.80626,
-            "44": 9.92627,
-            "45": 9.8024,
-            "46": 9.79983,
-            "47": 10.11662,
-            "48": 9.81307,
-            "49": 9.50044,
-            "50": 9.87631,
-            "51": 9.82781,
-            "52": 9.71723,
-            "53": 10.03979,
-            "54": 9.92177,
-            "55": 9.85515,
-            "56": 9.59253,
-            "57": 9.44144,
-            "58": 9.79602,
-            "59": 9.55567,
-            "60": 9.4664,
-            "61": 9.6666,
-            "62": 9.95363,
-            "63": 9.33626,
-            "64": 9.74152,
-            "65": 8.9178,
-            "66": 9.66632,
-            "67": 9.34424,
-            "68": 9.75273,
-            "69": 9.75727,
-            "70": 9.69242,
-            "71": 9.5868,
-            "72": 9.55099,
-            "73": 9.46289,
-            "74": 8.90671,
-            "75": 9.37793,
-            "76": 9.04952,
-            "77": 10.0301,
-            "78": 9.69192,
-            "79": 9.33464,
-            "80": 9.3667,
-            "81": 9.44418,
-            "82": 9.66164,
-            "83": 9.27209,
-            "84": 9.38066,
-            "85": 9.57618,
-            "86": 9.0424,
-            "87": 9.55703,
-            "88": 9.70385,
-            "89": 9.56619,
-            "90": 9.77295,
-            "91": 9.29396,
-            "92": 9.31912,
-            "93": 9.03406,
-            "94": 8.78526,
-            "95": 9.46938,
-            "96": 9.47497,
-            "97": 9.25688,
-            "98": 9.61835,
-            "99": 8.83233,
-            "100": 9.34557
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1518.0,
-            "2": 1697.0,
-            "3": 1668.0,
-            "4": 1721.0,
-            "5": 1897.0,
-            "6": 1823.0,
-            "7": 1719.0,
-            "8": 1637.0,
-            "9": 1742.0,
-            "10": 1358.0,
-            "11": 1882.0,
-            "12": 1781.0,
-            "13": 1847.0,
-            "14": 1753.0,
-            "15": 1883.0,
-            "16": 1755.0,
-            "17": 1752.0,
-            "18": 1683.0,
-            "19": 1817.0,
-            "20": 1663.0,
-            "21": 1795.0,
-            "22": 1698.0,
-            "23": 1996.0,
-            "24": 1620.0,
-            "25": 1658.0,
-            "26": 1727.0,
-            "27": 1781.0,
-            "28": 2085.0,
-            "29": 1952.0,
-            "30": 1821.0,
-            "31": 1646.0,
-            "32": 1879.0,
-            "33": 2034.0,
-            "34": 1861.0,
-            "35": 1834.0,
-            "36": 1913.0,
-            "37": 2333.0,
-            "38": 2070.0,
-            "39": 2245.0,
-            "40": 2126.0,
-            "41": 2311.0,
-            "42": 2213.0,
-            "43": 1907.0,
-            "44": 1951.0,
-            "45": 2001.0,
-            "46": 2218.0,
-            "47": 2533.0,
-            "48": 2436.0,
-            "49": 2188.0,
-            "50": 2342.0,
-            "51": 2562.0,
-            "52": 2529.0,
-            "53": 3031.0,
-            "54": 2744.0,
-            "55": 2264.0,
-            "56": 2794.0,
-            "57": 2183.0,
-            "58": 2882.0,
-            "59": 2769.0,
-            "60": 2399.0,
-            "61": 3031.0,
-            "62": 2706.0,
-            "63": 2388.0,
-            "64": 3046.0,
-            "65": 2597.0,
-            "66": 3092.0,
-            "67": 2730.0,
-            "68": 2858.0,
-            "69": 2982.0,
-            "70": 3202.0,
-            "71": 2964.0,
-            "72": 2450.0,
-            "73": 2817.0,
-            "74": 1834.0,
-            "75": 2609.0,
-            "76": 3000.0,
-            "77": 3180.0,
-            "78": 3113.0,
-            "79": 3145.0,
-            "80": 3258.0,
-            "81": 3645.0,
-            "82": 3075.0,
-            "83": 2812.0,
-            "84": 3295.0,
-            "85": 3368.0,
-            "86": 2730.0,
-            "87": 3717.0,
-            "88": 3056.0,
-            "89": 3252.0,
-            "90": 2954.0,
-            "91": 2798.0,
-            "92": 3089.0,
-            "93": 2742.0,
-            "94": 3420.0,
-            "95": 3225.0,
-            "96": 3362.0,
-            "97": 3118.0,
-            "98": 3671.0,
-            "99": 3341.0,
-            "100": 3428.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 730321408.0,
-            "2": 730321408.0,
-            "3": 730321408.0,
-            "4": 730321408.0,
-            "5": 730321408.0,
-            "6": 730321408.0,
-            "7": 730321408.0,
-            "8": 730321408.0,
-            "9": 730321408.0,
-            "10": 730321408.0,
-            "11": 730321408.0,
-            "12": 730321408.0,
-            "13": 730321408.0,
-            "14": 730321408.0,
-            "15": 730321408.0,
-            "16": 730321408.0,
-            "17": 730321408.0,
-            "18": 730321408.0,
-            "19": 730321408.0,
-            "20": 730321408.0,
-            "21": 730321408.0,
-            "22": 730321408.0,
-            "23": 730321408.0,
-            "24": 730321408.0,
-            "25": 730321408.0,
-            "26": 730321408.0,
-            "27": 730321408.0,
-            "28": 730321408.0,
-            "29": 730321408.0,
-            "30": 730321408.0,
-            "31": 730321408.0,
-            "32": 730321408.0,
-            "33": 730321408.0,
-            "34": 730321408.0,
-            "35": 730321408.0,
-            "36": 730321408.0,
-            "37": 730321408.0,
-            "38": 730321408.0,
-            "39": 730321408.0,
-            "40": 730321408.0,
-            "41": 730321408.0,
-            "42": 730321408.0,
-            "43": 730321408.0,
-            "44": 730321408.0,
-            "45": 730321408.0,
-            "46": 730321408.0,
-            "47": 730321408.0,
-            "48": 730321408.0,
-            "49": 730321408.0,
-            "50": 730321408.0,
-            "51": 730321408.0,
-            "52": 730321408.0,
-            "53": 730321408.0,
-            "54": 730321408.0,
-            "55": 730321408.0,
-            "56": 730321408.0,
-            "57": 730321408.0,
-            "58": 730321408.0,
-            "59": 730321408.0,
-            "60": 730321408.0,
-            "61": 730321408.0,
-            "62": 730321408.0,
-            "63": 730321408.0,
-            "64": 730321408.0,
-            "65": 730321408.0,
-            "66": 730321408.0,
-            "67": 730321408.0,
-            "68": 730321408.0,
-            "69": 730321408.0,
-            "70": 730321408.0,
-            "71": 730321408.0,
-            "72": 730321408.0,
-            "73": 730321408.0,
-            "74": 730321408.0,
-            "75": 730321408.0,
-            "76": 730321408.0,
-            "77": 730321408.0,
-            "78": 730321408.0,
-            "79": 730321408.0,
-            "80": 730321408.0,
-            "81": 730321408.0,
-            "82": 730321408.0,
-            "83": 730321408.0,
-            "84": 730321408.0,
-            "85": 730321408.0,
-            "86": 730321408.0,
-            "87": 730321408.0,
-            "88": 730321408.0,
-            "89": 730321408.0,
-            "90": 730321408.0,
-            "91": 730321408.0,
-            "92": 730321408.0,
-            "93": 730321408.0,
-            "94": 730321408.0,
-            "95": 730321408.0,
-            "96": 730321408.0,
-            "97": 730321408.0,
-            "98": 730321408.0,
-            "99": 730321408.0,
-            "100": 730321408.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 2366437376.0,
-            "2": 2648148992.0,
-            "3": 2648148992.0,
-            "4": 2648148992.0,
-            "5": 2648148992.0,
-            "6": 2648148992.0,
-            "7": 2648148992.0,
-            "8": 2648148992.0,
-            "9": 2648148992.0,
-            "10": 2648148992.0,
-            "11": 2648148992.0,
-            "12": 2648148992.0,
-            "13": 2648148992.0,
-            "14": 2648148992.0,
-            "15": 2648148992.0,
-            "16": 2648148992.0,
-            "17": 2648148992.0,
-            "18": 2648148992.0,
-            "19": 2648148992.0,
-            "20": 2648148992.0,
-            "21": 2648148992.0,
-            "22": 2648148992.0,
-            "23": 2648148992.0,
-            "24": 2648148992.0,
-            "25": 2648148992.0,
-            "26": 2648148992.0,
-            "27": 2648148992.0,
-            "28": 2648148992.0,
-            "29": 2648148992.0,
-            "30": 2648148992.0,
-            "31": 2648148992.0,
-            "32": 2648148992.0,
-            "33": 2648148992.0,
-            "34": 2648148992.0,
-            "35": 2648148992.0,
-            "36": 2648148992.0,
-            "37": 2648148992.0,
-            "38": 2648148992.0,
-            "39": 2648148992.0,
-            "40": 2648148992.0,
-            "41": 2648148992.0,
-            "42": 2648148992.0,
-            "43": 2648148992.0,
-            "44": 2648148992.0,
-            "45": 2648148992.0,
-            "46": 2648148992.0,
-            "47": 2648148992.0,
-            "48": 2648148992.0,
-            "49": 2648148992.0,
-            "50": 2648148992.0,
-            "51": 2648148992.0,
-            "52": 2648148992.0,
-            "53": 2648148992.0,
-            "54": 2648148992.0,
-            "55": 2648148992.0,
-            "56": 2648148992.0,
-            "57": 2648148992.0,
-            "58": 2648148992.0,
-            "59": 2648148992.0,
-            "60": 2648148992.0,
-            "61": 2648148992.0,
-            "62": 2648148992.0,
-            "63": 2648148992.0,
-            "64": 2648148992.0,
-            "65": 2648148992.0,
-            "66": 2648148992.0,
-            "67": 2648148992.0,
-            "68": 2648148992.0,
-            "69": 2648148992.0,
-            "70": 2648148992.0,
-            "71": 2648148992.0,
-            "72": 2648148992.0,
-            "73": 2648148992.0,
-            "74": 2648148992.0,
-            "75": 2648148992.0,
-            "76": 2648148992.0,
-            "77": 2648148992.0,
-            "78": 2648148992.0,
-            "79": 2648148992.0,
-            "80": 2648148992.0,
-            "81": 2648148992.0,
-            "82": 2648148992.0,
-            "83": 2648148992.0,
-            "84": 2648148992.0,
-            "85": 2648148992.0,
-            "86": 2648148992.0,
-            "87": 2648148992.0,
-            "88": 2648148992.0,
-            "89": 2648148992.0,
-            "90": 2648148992.0,
-            "91": 2648148992.0,
-            "92": 2648148992.0,
-            "93": 2648148992.0,
-            "94": 2648148992.0,
-            "95": 2648148992.0,
-            "96": 2648148992.0,
-            "97": 2648148992.0,
-            "98": 2648148992.0,
-            "99": 2648148992.0,
-            "100": 2648148992.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 12.81482,
-            "2": 0.16445,
-            "3": 0.16681,
-            "4": 0.12923,
-            "5": 0.12855,
-            "6": 0.12774,
-            "7": 0.12794,
-            "8": 0.12857,
-            "9": 0.12785,
-            "10": 0.12889,
-            "11": 0.13344,
-            "12": 0.1302,
-            "13": 0.13007,
-            "14": 0.12962,
-            "15": 0.13044,
-            "16": 0.12918,
-            "17": 0.13075,
-            "18": 0.13004,
-            "19": 0.13052,
-            "20": 0.13025,
-            "21": 0.12825,
-            "22": 0.13322,
-            "23": 0.13274,
-            "24": 0.13114,
-            "25": 0.13075,
-            "26": 0.12979,
-            "27": 0.13026,
-            "28": 0.13147,
-            "29": 0.13072,
-            "30": 0.13098,
-            "31": 0.13095,
-            "32": 0.13054,
-            "33": 0.13038,
-            "34": 0.13142,
-            "35": 0.13065,
-            "36": 0.12923,
-            "37": 0.13039,
-            "38": 0.12981,
-            "39": 0.12995,
-            "40": 0.13035,
-            "41": 0.12966,
-            "42": 0.13013,
-            "43": 0.13031,
-            "44": 0.13066,
-            "45": 0.12952,
-            "46": 0.13059,
-            "47": 0.12932,
-            "48": 0.13133,
-            "49": 0.13099,
-            "50": 0.13032,
-            "51": 0.13345,
-            "52": 0.13027,
-            "53": 0.13035,
-            "54": 0.13064,
-            "55": 0.13026,
-            "56": 0.13053,
-            "57": 0.13106,
-            "58": 0.13032,
-            "59": 0.13178,
-            "60": 0.13233,
-            "61": 0.13005,
-            "62": 0.13045,
-            "63": 0.13061,
-            "64": 0.13066,
-            "65": 0.13102,
-            "66": 0.13143,
-            "67": 0.13033,
-            "68": 0.13066,
-            "69": 0.12904,
-            "70": 0.13059,
-            "71": 0.13052,
-            "72": 0.13076,
-            "73": 0.13215,
-            "74": 0.13173,
-            "75": 0.13126,
-            "76": 0.12946,
-            "77": 0.13071,
-            "78": 0.12973,
-            "79": 0.12962,
-            "80": 0.12976,
-            "81": 0.12993,
-            "82": 0.12829,
-            "83": 0.13132,
-            "84": 0.1304,
-            "85": 0.13095,
-            "86": 0.13112,
-            "87": 0.12994,
-            "88": 0.13287,
-            "89": 0.1284,
-            "90": 0.1303,
-            "91": 0.12966,
-            "92": 0.13139,
-            "93": 0.12932,
-            "94": 0.12687,
-            "95": 0.13012,
-            "96": 0.12919,
-            "97": 0.13166,
-            "98": 0.12958,
-            "99": 0.13126,
-            "100": 0.1303
-        }
-    }
-}
\ No newline at end of file
+{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.82005, "5": 10.85285, "10": 10.78449, "15": 10.79226, "20": 10.69196, "25": 10.52318, "30": 10.34505, "35": 10.25888, "40": 10.07032, "45": 9.803, "50": 9.87675, "55": 9.85523, "60": 9.46634, "65": 8.91657, "70": 9.69276, "75": 9.37813, "80": 9.368, "85": 9.57598, "90": 9.77247, "95": 9.46912, "100": 9.34574}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1559.0, "5": 1915.0, "10": 1361.0, "15": 1831.0, "20": 1695.0, "25": 1650.0, "30": 1861.0, "35": 1916.0, "40": 2109.0, "45": 2098.0, "50": 2424.0, "55": 2309.0, "60": 2432.0, "65": 2660.0, "70": 3132.0, "75": 2555.0, "80": 3370.0, "85": 3434.0, "90": 2868.0, "95": 3273.0, "100": 3359.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 732811264.0, "5": 732811264.0, "10": 732811264.0, "15": 732811264.0, "20": 732811264.0, "25": 732811264.0, "30": 732811264.0, "35": 732811264.0, "40": 732811264.0, "45": 732811264.0, "50": 732811264.0, "55": 732811264.0, "60": 732811264.0, "65": 732811264.0, "70": 732811264.0, "75": 732811264.0, "80": 732811264.0, "85": 732811264.0, "90": 732811264.0, "95": 732811264.0, "100": 732811264.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 3838895104.0, "5": 4122703872.0, "10": 4122703872.0, "15": 4122703872.0, "20": 4122703872.0, "25": 4122703872.0, "30": 4122703872.0, "35": 4122703872.0, "40": 4122703872.0, "45": 4122703872.0, "50": 4122703872.0, "55": 4122703872.0, "60": 4122703872.0, "65": 4122703872.0, "70": 4122703872.0, "75": 4122703872.0, "80": 4122703872.0, "85": 4122703872.0, "90": 4122703872.0, "95": 4122703872.0, "100": 4122703872.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 21.12849, "5": 0.1796, "10": 0.17779, "15": 0.17478, "20": 0.17462, "25": 0.17506, "30": 0.17537, "35": 0.17332, "40": 0.17629, "45": 0.17732, "50": 0.17437, "55": 0.1738, "60": 0.17515, "65": 0.17451, "70": 0.17318, "75": 0.1729, "80": 0.17444, "85": 0.17406, "90": 0.17228, "95": 0.17346, "100": 0.17333}}}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_lts.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_lts.json
deleted file mode 100644
index f108f63200b..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_lts.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.81548,
-            "2": 10.8208,
-            "3": 10.81272,
-            "4": 10.78885,
-            "5": 10.85169,
-            "6": 10.87038,
-            "7": 10.83098,
-            "8": 10.8343,
-            "9": 10.83996,
-            "10": 10.78686,
-            "11": 10.88025,
-            "12": 10.85974,
-            "13": 10.86596,
-            "14": 10.8782,
-            "15": 10.79466,
-            "16": 10.79613,
-            "17": 10.76887,
-            "18": 10.81076,
-            "19": 10.7986,
-            "20": 10.69061,
-            "21": 10.67945,
-            "22": 10.52114,
-            "23": 10.70755,
-            "24": 10.57741,
-            "25": 10.52373,
-            "26": 10.59597,
-            "27": 10.58619,
-            "28": 10.56176,
-            "29": 10.56945,
-            "30": 10.34573,
-            "31": 10.10078,
-            "32": 10.45392,
-            "33": 10.4461,
-            "34": 10.20721,
-            "35": 10.262,
-            "36": 10.21312,
-            "37": 10.32468,
-            "38": 10.16831,
-            "39": 10.38374,
-            "40": 10.07231,
-            "41": 10.13763,
-            "42": 10.19765,
-            "43": 9.81155,
-            "44": 9.93311,
-            "45": 9.8104,
-            "46": 9.80854,
-            "47": 10.12558,
-            "48": 9.82105,
-            "49": 9.50764,
-            "50": 9.88382,
-            "51": 9.83549,
-            "52": 9.72516,
-            "53": 10.04799,
-            "54": 9.93011,
-            "55": 9.8636,
-            "56": 9.60217,
-            "57": 9.45187,
-            "58": 9.8078,
-            "59": 9.56783,
-            "60": 9.47966,
-            "61": 9.67984,
-            "62": 9.96754,
-            "63": 9.35113,
-            "64": 9.75623,
-            "65": 8.9318,
-            "66": 9.68107,
-            "67": 9.35956,
-            "68": 9.76948,
-            "69": 9.77492,
-            "70": 9.71182,
-            "71": 9.60632,
-            "72": 9.57129,
-            "73": 9.48392,
-            "74": 8.92911,
-            "75": 9.40028,
-            "76": 9.07194,
-            "77": 10.05252,
-            "78": 9.71494,
-            "79": 9.35747,
-            "80": 9.38946,
-            "81": 9.46791,
-            "82": 9.68508,
-            "83": 9.29588,
-            "84": 9.40522,
-            "85": 9.60163,
-            "86": 9.06713,
-            "87": 9.58402,
-            "88": 9.73304,
-            "89": 9.59526,
-            "90": 9.80555,
-            "91": 9.32604,
-            "92": 9.35323,
-            "93": 9.06915,
-            "94": 8.82268,
-            "95": 9.50858,
-            "96": 9.51584,
-            "97": 9.2976,
-            "98": 9.66184,
-            "99": 8.87662,
-            "100": 9.39222
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1518.0,
-            "2": 1697.0,
-            "3": 1678.0,
-            "4": 1709.0,
-            "5": 1873.0,
-            "6": 1786.0,
-            "7": 1813.0,
-            "8": 1519.0,
-            "9": 1684.0,
-            "10": 1417.0,
-            "11": 1943.0,
-            "12": 1723.0,
-            "13": 1939.0,
-            "14": 1759.0,
-            "15": 1728.0,
-            "16": 1773.0,
-            "17": 1805.0,
-            "18": 1702.0,
-            "19": 1803.0,
-            "20": 1627.0,
-            "21": 1822.0,
-            "22": 1748.0,
-            "23": 1938.0,
-            "24": 1642.0,
-            "25": 1696.0,
-            "26": 1760.0,
-            "27": 1809.0,
-            "28": 2025.0,
-            "29": 1900.0,
-            "30": 1902.0,
-            "31": 1645.0,
-            "32": 1876.0,
-            "33": 2105.0,
-            "34": 1881.0,
-            "35": 1913.0,
-            "36": 1864.0,
-            "37": 2322.0,
-            "38": 2194.0,
-            "39": 2318.0,
-            "40": 2010.0,
-            "41": 2358.0,
-            "42": 2155.0,
-            "43": 1980.0,
-            "44": 2105.0,
-            "45": 2082.0,
-            "46": 2221.0,
-            "47": 2537.0,
-            "48": 2367.0,
-            "49": 2190.0,
-            "50": 2352.0,
-            "51": 2441.0,
-            "52": 2482.0,
-            "53": 2916.0,
-            "54": 2550.0,
-            "55": 2347.0,
-            "56": 2765.0,
-            "57": 2116.0,
-            "58": 2968.0,
-            "59": 2810.0,
-            "60": 2384.0,
-            "61": 2912.0,
-            "62": 2554.0,
-            "63": 2364.0,
-            "64": 3035.0,
-            "65": 2648.0,
-            "66": 2979.0,
-            "67": 2741.0,
-            "68": 2799.0,
-            "69": 3071.0,
-            "70": 3098.0,
-            "71": 2950.0,
-            "72": 2342.0,
-            "73": 2829.0,
-            "74": 1840.0,
-            "75": 2426.0,
-            "76": 2941.0,
-            "77": 3245.0,
-            "78": 3272.0,
-            "79": 3066.0,
-            "80": 3221.0,
-            "81": 3565.0,
-            "82": 3162.0,
-            "83": 2876.0,
-            "84": 3180.0,
-            "85": 3410.0,
-            "86": 2778.0,
-            "87": 3752.0,
-            "88": 2995.0,
-            "89": 3264.0,
-            "90": 2940.0,
-            "91": 2791.0,
-            "92": 3118.0,
-            "93": 2634.0,
-            "94": 3464.0,
-            "95": 3344.0,
-            "96": 3499.0,
-            "97": 3122.0,
-            "98": 3568.0,
-            "99": 3272.0,
-            "100": 3476.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 733860352.0,
-            "2": 733860352.0,
-            "3": 733860352.0,
-            "4": 733860352.0,
-            "5": 733860352.0,
-            "6": 733860352.0,
-            "7": 733860352.0,
-            "8": 733860352.0,
-            "9": 733860352.0,
-            "10": 733860352.0,
-            "11": 733860352.0,
-            "12": 733860352.0,
-            "13": 733860352.0,
-            "14": 733860352.0,
-            "15": 733860352.0,
-            "16": 733860352.0,
-            "17": 733860352.0,
-            "18": 733860352.0,
-            "19": 733860352.0,
-            "20": 733860352.0,
-            "21": 733860352.0,
-            "22": 733860352.0,
-            "23": 733860352.0,
-            "24": 733860352.0,
-            "25": 733860352.0,
-            "26": 733860352.0,
-            "27": 733860352.0,
-            "28": 733860352.0,
-            "29": 733860352.0,
-            "30": 733860352.0,
-            "31": 733860352.0,
-            "32": 733860352.0,
-            "33": 733860352.0,
-            "34": 733860352.0,
-            "35": 733860352.0,
-            "36": 733860352.0,
-            "37": 733860352.0,
-            "38": 733860352.0,
-            "39": 733860352.0,
-            "40": 733860352.0,
-            "41": 733860352.0,
-            "42": 733860352.0,
-            "43": 733860352.0,
-            "44": 733860352.0,
-            "45": 733860352.0,
-            "46": 733860352.0,
-            "47": 733860352.0,
-            "48": 733860352.0,
-            "49": 733860352.0,
-            "50": 733860352.0,
-            "51": 733860352.0,
-            "52": 733860352.0,
-            "53": 733860352.0,
-            "54": 733860352.0,
-            "55": 733860352.0,
-            "56": 733860352.0,
-            "57": 733860352.0,
-            "58": 733860352.0,
-            "59": 733860352.0,
-            "60": 733860352.0,
-            "61": 733860352.0,
-            "62": 733860352.0,
-            "63": 733860352.0,
-            "64": 733860352.0,
-            "65": 733860352.0,
-            "66": 733860352.0,
-            "67": 733860352.0,
-            "68": 733860352.0,
-            "69": 733860352.0,
-            "70": 733860352.0,
-            "71": 733860352.0,
-            "72": 733860352.0,
-            "73": 733860352.0,
-            "74": 733860352.0,
-            "75": 733860352.0,
-            "76": 733860352.0,
-            "77": 733860352.0,
-            "78": 733860352.0,
-            "79": 733860352.0,
-            "80": 733860352.0,
-            "81": 733860352.0,
-            "82": 733860352.0,
-            "83": 733860352.0,
-            "84": 733860352.0,
-            "85": 733860352.0,
-            "86": 733860352.0,
-            "87": 733860352.0,
-            "88": 733860352.0,
-            "89": 733860352.0,
-            "90": 733860352.0,
-            "91": 733860352.0,
-            "92": 733860352.0,
-            "93": 733860352.0,
-            "94": 733860352.0,
-            "95": 733860352.0,
-            "96": 733860352.0,
-            "97": 733860352.0,
-            "98": 733860352.0,
-            "99": 733860352.0,
-            "100": 733860352.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 2367879168.0,
-            "2": 2651687936.0,
-            "3": 2651687936.0,
-            "4": 2651687936.0,
-            "5": 2651687936.0,
-            "6": 2651687936.0,
-            "7": 2651687936.0,
-            "8": 2651687936.0,
-            "9": 2651687936.0,
-            "10": 2651687936.0,
-            "11": 2651687936.0,
-            "12": 2651687936.0,
-            "13": 2651687936.0,
-            "14": 2651687936.0,
-            "15": 2651687936.0,
-            "16": 2651687936.0,
-            "17": 2651687936.0,
-            "18": 2651687936.0,
-            "19": 2651687936.0,
-            "20": 2651687936.0,
-            "21": 2651687936.0,
-            "22": 2651687936.0,
-            "23": 2651687936.0,
-            "24": 2651687936.0,
-            "25": 2651687936.0,
-            "26": 2651687936.0,
-            "27": 2651687936.0,
-            "28": 2651687936.0,
-            "29": 2651687936.0,
-            "30": 2651687936.0,
-            "31": 2651687936.0,
-            "32": 2651687936.0,
-            "33": 2651687936.0,
-            "34": 2651687936.0,
-            "35": 2651687936.0,
-            "36": 2651687936.0,
-            "37": 2651687936.0,
-            "38": 2651687936.0,
-            "39": 2651687936.0,
-            "40": 2651687936.0,
-            "41": 2651687936.0,
-            "42": 2651687936.0,
-            "43": 2651687936.0,
-            "44": 2651687936.0,
-            "45": 2651687936.0,
-            "46": 2651687936.0,
-            "47": 2651687936.0,
-            "48": 2651687936.0,
-            "49": 2651687936.0,
-            "50": 2651687936.0,
-            "51": 2651687936.0,
-            "52": 2651687936.0,
-            "53": 2651687936.0,
-            "54": 2651687936.0,
-            "55": 2651687936.0,
-            "56": 2651687936.0,
-            "57": 2651687936.0,
-            "58": 2651687936.0,
-            "59": 2651687936.0,
-            "60": 2651687936.0,
-            "61": 2651687936.0,
-            "62": 2651687936.0,
-            "63": 2651687936.0,
-            "64": 2651687936.0,
-            "65": 2651687936.0,
-            "66": 2651687936.0,
-            "67": 2651687936.0,
-            "68": 2651687936.0,
-            "69": 2651687936.0,
-            "70": 2651687936.0,
-            "71": 2651687936.0,
-            "72": 2651687936.0,
-            "73": 2651687936.0,
-            "74": 2651687936.0,
-            "75": 2651687936.0,
-            "76": 2651687936.0,
-            "77": 2651687936.0,
-            "78": 2651687936.0,
-            "79": 2651687936.0,
-            "80": 2651687936.0,
-            "81": 2651687936.0,
-            "82": 2651687936.0,
-            "83": 2651687936.0,
-            "84": 2651687936.0,
-            "85": 2651687936.0,
-            "86": 2651687936.0,
-            "87": 2651687936.0,
-            "88": 2651687936.0,
-            "89": 2651687936.0,
-            "90": 2651687936.0,
-            "91": 2651687936.0,
-            "92": 2651687936.0,
-            "93": 2651687936.0,
-            "94": 2651687936.0,
-            "95": 2651687936.0,
-            "96": 2651687936.0,
-            "97": 2651687936.0,
-            "98": 2651687936.0,
-            "99": 2651687936.0,
-            "100": 2651687936.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 12.1008,
-            "2": 0.16051,
-            "3": 0.12978,
-            "4": 0.12855,
-            "5": 0.12836,
-            "6": 0.12718,
-            "7": 0.12817,
-            "8": 0.12827,
-            "9": 0.12773,
-            "10": 0.12934,
-            "11": 0.1284,
-            "12": 0.1278,
-            "13": 0.12824,
-            "14": 0.12897,
-            "15": 0.12788,
-            "16": 0.12662,
-            "17": 0.12751,
-            "18": 0.12678,
-            "19": 0.12784,
-            "20": 0.12756,
-            "21": 0.12782,
-            "22": 0.12765,
-            "23": 0.12695,
-            "24": 0.12621,
-            "25": 0.12639,
-            "26": 0.12652,
-            "27": 0.1261,
-            "28": 0.12599,
-            "29": 0.12679,
-            "30": 0.12648,
-            "31": 0.12791,
-            "32": 0.1267,
-            "33": 0.12736,
-            "34": 0.1275,
-            "35": 0.12674,
-            "36": 0.12623,
-            "37": 0.12561,
-            "38": 0.12629,
-            "39": 0.12735,
-            "40": 0.12739,
-            "41": 0.12784,
-            "42": 0.12763,
-            "43": 0.12841,
-            "44": 0.12666,
-            "45": 0.12797,
-            "46": 0.12722,
-            "47": 0.12583,
-            "48": 0.1271,
-            "49": 0.12675,
-            "50": 0.12769,
-            "51": 0.13852,
-            "52": 0.1338,
-            "53": 0.1334,
-            "54": 0.13466,
-            "55": 0.13471,
-            "56": 0.13266,
-            "57": 0.13116,
-            "58": 0.13405,
-            "59": 0.13356,
-            "60": 0.13304,
-            "61": 0.13328,
-            "62": 0.13243,
-            "63": 0.13255,
-            "64": 0.13344,
-            "65": 0.13433,
-            "66": 0.13486,
-            "67": 0.13338,
-            "68": 0.13313,
-            "69": 0.13327,
-            "70": 0.1324,
-            "71": 0.13325,
-            "72": 0.13418,
-            "73": 0.1341,
-            "74": 0.1334,
-            "75": 0.13238,
-            "76": 0.13198,
-            "77": 0.13412,
-            "78": 0.1335,
-            "79": 0.13208,
-            "80": 0.13334,
-            "81": 0.13338,
-            "82": 0.13187,
-            "83": 0.13324,
-            "84": 0.13268,
-            "85": 0.13362,
-            "86": 0.13282,
-            "87": 0.13325,
-            "88": 0.13348,
-            "89": 0.13361,
-            "90": 0.13267,
-            "91": 0.13322,
-            "92": 0.13404,
-            "93": 0.13424,
-            "94": 0.13249,
-            "95": 0.1323,
-            "96": 0.13217,
-            "97": 0.16026,
-            "98": 0.13491,
-            "99": 0.13704,
-            "100": 0.13716
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_lts_dgx_a100.json
index 0cb12854799..fbeaf7f4f0d 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_lts_dgx_a100.json
@@ -1,537 +1 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.81548,
-            "2": 10.8208,
-            "3": 10.81272,
-            "4": 10.78885,
-            "5": 10.85169,
-            "6": 10.87038,
-            "7": 10.83098,
-            "8": 10.8343,
-            "9": 10.83996,
-            "10": 10.78686,
-            "11": 10.88025,
-            "12": 10.85974,
-            "13": 10.86596,
-            "14": 10.8782,
-            "15": 10.79466,
-            "16": 10.79613,
-            "17": 10.76887,
-            "18": 10.81076,
-            "19": 10.7986,
-            "20": 10.69061,
-            "21": 10.67945,
-            "22": 10.52114,
-            "23": 10.70755,
-            "24": 10.57741,
-            "25": 10.52373,
-            "26": 10.59597,
-            "27": 10.58619,
-            "28": 10.56176,
-            "29": 10.56945,
-            "30": 10.34573,
-            "31": 10.10078,
-            "32": 10.45392,
-            "33": 10.4461,
-            "34": 10.20721,
-            "35": 10.262,
-            "36": 10.21312,
-            "37": 10.32468,
-            "38": 10.16831,
-            "39": 10.38374,
-            "40": 10.07231,
-            "41": 10.13763,
-            "42": 10.19765,
-            "43": 9.81155,
-            "44": 9.93311,
-            "45": 9.8104,
-            "46": 9.80854,
-            "47": 10.12558,
-            "48": 9.82105,
-            "49": 9.50764,
-            "50": 9.88382,
-            "51": 9.83549,
-            "52": 9.72516,
-            "53": 10.04799,
-            "54": 9.93011,
-            "55": 9.8636,
-            "56": 9.60217,
-            "57": 9.45187,
-            "58": 9.8078,
-            "59": 9.56783,
-            "60": 9.47966,
-            "61": 9.67984,
-            "62": 9.96754,
-            "63": 9.35113,
-            "64": 9.75623,
-            "65": 8.9318,
-            "66": 9.68107,
-            "67": 9.35956,
-            "68": 9.76948,
-            "69": 9.77492,
-            "70": 9.71182,
-            "71": 9.60632,
-            "72": 9.57129,
-            "73": 9.48392,
-            "74": 8.92911,
-            "75": 9.40028,
-            "76": 9.07194,
-            "77": 10.05252,
-            "78": 9.71494,
-            "79": 9.35747,
-            "80": 9.38946,
-            "81": 9.46791,
-            "82": 9.68508,
-            "83": 9.29588,
-            "84": 9.40522,
-            "85": 9.60163,
-            "86": 9.06713,
-            "87": 9.58402,
-            "88": 9.73304,
-            "89": 9.59526,
-            "90": 9.80555,
-            "91": 9.32604,
-            "92": 9.35323,
-            "93": 9.06915,
-            "94": 8.82268,
-            "95": 9.50858,
-            "96": 9.51584,
-            "97": 9.2976,
-            "98": 9.66184,
-            "99": 8.87662,
-            "100": 9.39222
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1518.0,
-            "2": 1697.0,
-            "3": 1678.0,
-            "4": 1709.0,
-            "5": 1873.0,
-            "6": 1786.0,
-            "7": 1813.0,
-            "8": 1519.0,
-            "9": 1684.0,
-            "10": 1417.0,
-            "11": 1943.0,
-            "12": 1723.0,
-            "13": 1939.0,
-            "14": 1759.0,
-            "15": 1728.0,
-            "16": 1773.0,
-            "17": 1805.0,
-            "18": 1702.0,
-            "19": 1803.0,
-            "20": 1627.0,
-            "21": 1822.0,
-            "22": 1748.0,
-            "23": 1938.0,
-            "24": 1642.0,
-            "25": 1696.0,
-            "26": 1760.0,
-            "27": 1809.0,
-            "28": 2025.0,
-            "29": 1900.0,
-            "30": 1902.0,
-            "31": 1645.0,
-            "32": 1876.0,
-            "33": 2105.0,
-            "34": 1881.0,
-            "35": 1913.0,
-            "36": 1864.0,
-            "37": 2322.0,
-            "38": 2194.0,
-            "39": 2318.0,
-            "40": 2010.0,
-            "41": 2358.0,
-            "42": 2155.0,
-            "43": 1980.0,
-            "44": 2105.0,
-            "45": 2082.0,
-            "46": 2221.0,
-            "47": 2537.0,
-            "48": 2367.0,
-            "49": 2190.0,
-            "50": 2352.0,
-            "51": 2441.0,
-            "52": 2482.0,
-            "53": 2916.0,
-            "54": 2550.0,
-            "55": 2347.0,
-            "56": 2765.0,
-            "57": 2116.0,
-            "58": 2968.0,
-            "59": 2810.0,
-            "60": 2384.0,
-            "61": 2912.0,
-            "62": 2554.0,
-            "63": 2364.0,
-            "64": 3035.0,
-            "65": 2648.0,
-            "66": 2979.0,
-            "67": 2741.0,
-            "68": 2799.0,
-            "69": 3071.0,
-            "70": 3098.0,
-            "71": 2950.0,
-            "72": 2342.0,
-            "73": 2829.0,
-            "74": 1840.0,
-            "75": 2426.0,
-            "76": 2941.0,
-            "77": 3245.0,
-            "78": 3272.0,
-            "79": 3066.0,
-            "80": 3221.0,
-            "81": 3565.0,
-            "82": 3162.0,
-            "83": 2876.0,
-            "84": 3180.0,
-            "85": 3410.0,
-            "86": 2778.0,
-            "87": 3752.0,
-            "88": 2995.0,
-            "89": 3264.0,
-            "90": 2940.0,
-            "91": 2791.0,
-            "92": 3118.0,
-            "93": 2634.0,
-            "94": 3464.0,
-            "95": 3344.0,
-            "96": 3499.0,
-            "97": 3122.0,
-            "98": 3568.0,
-            "99": 3272.0,
-            "100": 3476.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 733860352.0,
-            "2": 733860352.0,
-            "3": 733860352.0,
-            "4": 733860352.0,
-            "5": 733860352.0,
-            "6": 733860352.0,
-            "7": 733860352.0,
-            "8": 733860352.0,
-            "9": 733860352.0,
-            "10": 733860352.0,
-            "11": 733860352.0,
-            "12": 733860352.0,
-            "13": 733860352.0,
-            "14": 733860352.0,
-            "15": 733860352.0,
-            "16": 733860352.0,
-            "17": 733860352.0,
-            "18": 733860352.0,
-            "19": 733860352.0,
-            "20": 733860352.0,
-            "21": 733860352.0,
-            "22": 733860352.0,
-            "23": 733860352.0,
-            "24": 733860352.0,
-            "25": 733860352.0,
-            "26": 733860352.0,
-            "27": 733860352.0,
-            "28": 733860352.0,
-            "29": 733860352.0,
-            "30": 733860352.0,
-            "31": 733860352.0,
-            "32": 733860352.0,
-            "33": 733860352.0,
-            "34": 733860352.0,
-            "35": 733860352.0,
-            "36": 733860352.0,
-            "37": 733860352.0,
-            "38": 733860352.0,
-            "39": 733860352.0,
-            "40": 733860352.0,
-            "41": 733860352.0,
-            "42": 733860352.0,
-            "43": 733860352.0,
-            "44": 733860352.0,
-            "45": 733860352.0,
-            "46": 733860352.0,
-            "47": 733860352.0,
-            "48": 733860352.0,
-            "49": 733860352.0,
-            "50": 733860352.0,
-            "51": 733860352.0,
-            "52": 733860352.0,
-            "53": 733860352.0,
-            "54": 733860352.0,
-            "55": 733860352.0,
-            "56": 733860352.0,
-            "57": 733860352.0,
-            "58": 733860352.0,
-            "59": 733860352.0,
-            "60": 733860352.0,
-            "61": 733860352.0,
-            "62": 733860352.0,
-            "63": 733860352.0,
-            "64": 733860352.0,
-            "65": 733860352.0,
-            "66": 733860352.0,
-            "67": 733860352.0,
-            "68": 733860352.0,
-            "69": 733860352.0,
-            "70": 733860352.0,
-            "71": 733860352.0,
-            "72": 733860352.0,
-            "73": 733860352.0,
-            "74": 733860352.0,
-            "75": 733860352.0,
-            "76": 733860352.0,
-            "77": 733860352.0,
-            "78": 733860352.0,
-            "79": 733860352.0,
-            "80": 733860352.0,
-            "81": 733860352.0,
-            "82": 733860352.0,
-            "83": 733860352.0,
-            "84": 733860352.0,
-            "85": 733860352.0,
-            "86": 733860352.0,
-            "87": 733860352.0,
-            "88": 733860352.0,
-            "89": 733860352.0,
-            "90": 733860352.0,
-            "91": 733860352.0,
-            "92": 733860352.0,
-            "93": 733860352.0,
-            "94": 733860352.0,
-            "95": 733860352.0,
-            "96": 733860352.0,
-            "97": 733860352.0,
-            "98": 733860352.0,
-            "99": 733860352.0,
-            "100": 733860352.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 2367879168.0,
-            "2": 2651687936.0,
-            "3": 2651687936.0,
-            "4": 2651687936.0,
-            "5": 2651687936.0,
-            "6": 2651687936.0,
-            "7": 2651687936.0,
-            "8": 2651687936.0,
-            "9": 2651687936.0,
-            "10": 2651687936.0,
-            "11": 2651687936.0,
-            "12": 2651687936.0,
-            "13": 2651687936.0,
-            "14": 2651687936.0,
-            "15": 2651687936.0,
-            "16": 2651687936.0,
-            "17": 2651687936.0,
-            "18": 2651687936.0,
-            "19": 2651687936.0,
-            "20": 2651687936.0,
-            "21": 2651687936.0,
-            "22": 2651687936.0,
-            "23": 2651687936.0,
-            "24": 2651687936.0,
-            "25": 2651687936.0,
-            "26": 2651687936.0,
-            "27": 2651687936.0,
-            "28": 2651687936.0,
-            "29": 2651687936.0,
-            "30": 2651687936.0,
-            "31": 2651687936.0,
-            "32": 2651687936.0,
-            "33": 2651687936.0,
-            "34": 2651687936.0,
-            "35": 2651687936.0,
-            "36": 2651687936.0,
-            "37": 2651687936.0,
-            "38": 2651687936.0,
-            "39": 2651687936.0,
-            "40": 2651687936.0,
-            "41": 2651687936.0,
-            "42": 2651687936.0,
-            "43": 2651687936.0,
-            "44": 2651687936.0,
-            "45": 2651687936.0,
-            "46": 2651687936.0,
-            "47": 2651687936.0,
-            "48": 2651687936.0,
-            "49": 2651687936.0,
-            "50": 2651687936.0,
-            "51": 2651687936.0,
-            "52": 2651687936.0,
-            "53": 2651687936.0,
-            "54": 2651687936.0,
-            "55": 2651687936.0,
-            "56": 2651687936.0,
-            "57": 2651687936.0,
-            "58": 2651687936.0,
-            "59": 2651687936.0,
-            "60": 2651687936.0,
-            "61": 2651687936.0,
-            "62": 2651687936.0,
-            "63": 2651687936.0,
-            "64": 2651687936.0,
-            "65": 2651687936.0,
-            "66": 2651687936.0,
-            "67": 2651687936.0,
-            "68": 2651687936.0,
-            "69": 2651687936.0,
-            "70": 2651687936.0,
-            "71": 2651687936.0,
-            "72": 2651687936.0,
-            "73": 2651687936.0,
-            "74": 2651687936.0,
-            "75": 2651687936.0,
-            "76": 2651687936.0,
-            "77": 2651687936.0,
-            "78": 2651687936.0,
-            "79": 2651687936.0,
-            "80": 2651687936.0,
-            "81": 2651687936.0,
-            "82": 2651687936.0,
-            "83": 2651687936.0,
-            "84": 2651687936.0,
-            "85": 2651687936.0,
-            "86": 2651687936.0,
-            "87": 2651687936.0,
-            "88": 2651687936.0,
-            "89": 2651687936.0,
-            "90": 2651687936.0,
-            "91": 2651687936.0,
-            "92": 2651687936.0,
-            "93": 2651687936.0,
-            "94": 2651687936.0,
-            "95": 2651687936.0,
-            "96": 2651687936.0,
-            "97": 2651687936.0,
-            "98": 2651687936.0,
-            "99": 2651687936.0,
-            "100": 2651687936.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 12.6402,
-            "2": 0.15932,
-            "3": 0.13183,
-            "4": 0.12969,
-            "5": 0.12913,
-            "6": 0.12942,
-            "7": 0.12823,
-            "8": 0.13014,
-            "9": 0.1305,
-            "10": 0.13004,
-            "11": 0.12983,
-            "12": 0.12943,
-            "13": 0.12925,
-            "14": 0.13022,
-            "15": 0.12947,
-            "16": 0.12988,
-            "17": 0.12984,
-            "18": 0.12989,
-            "19": 0.12987,
-            "20": 0.12935,
-            "21": 0.12974,
-            "22": 0.12965,
-            "23": 0.12983,
-            "24": 0.13037,
-            "25": 0.1293,
-            "26": 0.12914,
-            "27": 0.12908,
-            "28": 0.12909,
-            "29": 0.13186,
-            "30": 0.13433,
-            "31": 0.13401,
-            "32": 0.12902,
-            "33": 0.12808,
-            "34": 0.12907,
-            "35": 0.12884,
-            "36": 0.12913,
-            "37": 0.12932,
-            "38": 0.12992,
-            "39": 0.13072,
-            "40": 0.13131,
-            "41": 0.13172,
-            "42": 0.13072,
-            "43": 0.13259,
-            "44": 0.13124,
-            "45": 0.13129,
-            "46": 0.1291,
-            "47": 0.1308,
-            "48": 0.1301,
-            "49": 0.12906,
-            "50": 0.12828,
-            "51": 0.14265,
-            "52": 0.12979,
-            "53": 0.126,
-            "54": 0.12545,
-            "55": 0.12582,
-            "56": 0.12573,
-            "57": 0.12516,
-            "58": 0.1252,
-            "59": 0.12598,
-            "60": 0.12562,
-            "61": 0.12544,
-            "62": 0.12472,
-            "63": 0.12548,
-            "64": 0.12537,
-            "65": 0.12534,
-            "66": 0.12474,
-            "67": 0.12528,
-            "68": 0.12481,
-            "69": 0.12531,
-            "70": 0.12547,
-            "71": 0.12492,
-            "72": 0.12533,
-            "73": 0.12583,
-            "74": 0.1253,
-            "75": 0.12453,
-            "76": 0.12486,
-            "77": 0.12501,
-            "78": 0.12491,
-            "79": 0.12247,
-            "80": 0.1223,
-            "81": 0.1243,
-            "82": 0.12257,
-            "83": 0.12179,
-            "84": 0.12254,
-            "85": 0.12231,
-            "86": 0.12263,
-            "87": 0.12152,
-            "88": 0.12188,
-            "89": 0.1228,
-            "90": 0.12133,
-            "91": 0.1216,
-            "92": 0.12133,
-            "93": 0.12135,
-            "94": 0.12216,
-            "95": 0.12141,
-            "96": 0.12205,
-            "97": 0.12356,
-            "98": 0.12174,
-            "99": 0.12252,
-            "100": 0.1222
-        }
-    }
-}
\ No newline at end of file
+{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.82005, "5": 10.85284, "10": 10.78455, "15": 10.79229, "20": 10.69211, "25": 10.52412, "30": 10.34552, "35": 10.26239, "40": 10.07241, "45": 9.81101, "50": 9.88422, "55": 9.86374, "60": 9.47965, "65": 8.93063, "70": 9.71215, "75": 9.40048, "80": 9.39077, "85": 9.60141, "90": 9.80501, "95": 9.50816, "100": 9.3924}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1559.0, "5": 1840.0, "10": 1380.0, "15": 1848.0, "20": 1601.0, "25": 1635.0, "30": 1936.0, "35": 1908.0, "40": 2100.0, "45": 2098.0, "50": 2333.0, "55": 2260.0, "60": 2399.0, "65": 2656.0, "70": 3077.0, "75": 2547.0, "80": 3315.0, "85": 3371.0, "90": 2943.0, "95": 3457.0, "100": 3292.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 732811264.0, "5": 732811264.0, "10": 732811264.0, "15": 732811264.0, "20": 732811264.0, "25": 732811264.0, "30": 732811264.0, "35": 732811264.0, "40": 732811264.0, "45": 732811264.0, "50": 732811264.0, "55": 732811264.0, "60": 732811264.0, "65": 732811264.0, "70": 732811264.0, "75": 732811264.0, "80": 732811264.0, "85": 732811264.0, "90": 732811264.0, "95": 732811264.0, "100": 732811264.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 3838895104.0, "5": 4122703872.0, "10": 4122703872.0, "15": 4122703872.0, "20": 4122703872.0, "25": 4122703872.0, "30": 4122703872.0, "35": 4122703872.0, "40": 4122703872.0, "45": 4122703872.0, "50": 4122703872.0, "55": 4122703872.0, "60": 4122703872.0, "65": 4122703872.0, "70": 4122703872.0, "75": 4122703872.0, "80": 4122703872.0, "85": 4122703872.0, "90": 4122703872.0, "95": 4122703872.0, "100": 4122703872.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 24.14685, "5": 0.1855, "10": 0.18669, "15": 0.18585, "20": 0.18594, "25": 0.18564, "30": 0.18382, "35": 0.18679, "40": 0.1866, "45": 0.18389, "50": 0.18294, "55": 0.17556, "60": 0.17509, "65": 0.17542, "70": 0.17558, "75": 0.176, "80": 0.17523, "85": 0.17384, "90": 0.17335, "95": 0.17222, "100": 0.17356}}}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts.json
deleted file mode 100644
index 9798744d286..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.81548,
-            "2": 10.8208,
-            "3": 10.81272,
-            "4": 10.78885,
-            "5": 10.85169,
-            "6": 10.87038,
-            "7": 10.83098,
-            "8": 10.8343,
-            "9": 10.83996,
-            "10": 10.78686,
-            "11": 10.88025,
-            "12": 10.85974,
-            "13": 10.86596,
-            "14": 10.8782,
-            "15": 10.79466,
-            "16": 10.79611,
-            "17": 10.76888,
-            "18": 10.81074,
-            "19": 10.79865,
-            "20": 10.69058,
-            "21": 10.67945,
-            "22": 10.52113,
-            "23": 10.70755,
-            "24": 10.5774,
-            "25": 10.52375,
-            "26": 10.59601,
-            "27": 10.58621,
-            "28": 10.56178,
-            "29": 10.56944,
-            "30": 10.34575,
-            "31": 10.10079,
-            "32": 10.45392,
-            "33": 10.4461,
-            "34": 10.20725,
-            "35": 10.26198,
-            "36": 10.21312,
-            "37": 10.32467,
-            "38": 10.16829,
-            "39": 10.38371,
-            "40": 10.07233,
-            "41": 10.13762,
-            "42": 10.19766,
-            "43": 9.81156,
-            "44": 9.93307,
-            "45": 9.81043,
-            "46": 9.80854,
-            "47": 10.12558,
-            "48": 9.82104,
-            "49": 9.50761,
-            "50": 9.8838,
-            "51": 9.83548,
-            "52": 9.72518,
-            "53": 10.04799,
-            "54": 9.93007,
-            "55": 9.86362,
-            "56": 9.60218,
-            "57": 9.45185,
-            "58": 9.80781,
-            "59": 9.56786,
-            "60": 9.47966,
-            "61": 9.67985,
-            "62": 9.9675,
-            "63": 9.35111,
-            "64": 9.75622,
-            "65": 8.93178,
-            "66": 9.68108,
-            "67": 9.35959,
-            "68": 9.76948,
-            "69": 9.77494,
-            "70": 9.71179,
-            "71": 9.60631,
-            "72": 9.57134,
-            "73": 9.48393,
-            "74": 8.92913,
-            "75": 9.4003,
-            "76": 9.07189,
-            "77": 10.05248,
-            "78": 9.71492,
-            "79": 9.35744,
-            "80": 9.38946,
-            "81": 9.46798,
-            "82": 9.68509,
-            "83": 9.29591,
-            "84": 9.40521,
-            "85": 9.60161,
-            "86": 9.06713,
-            "87": 9.58406,
-            "88": 9.73301,
-            "89": 9.59528,
-            "90": 9.80559,
-            "91": 9.32603,
-            "92": 9.3532,
-            "93": 9.06916,
-            "94": 8.82266,
-            "95": 9.50858,
-            "96": 9.51587,
-            "97": 9.29763,
-            "98": 9.66187,
-            "99": 8.87661,
-            "100": 9.39222
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1518.0,
-            "2": 1697.0,
-            "3": 1678.0,
-            "4": 1709.0,
-            "5": 1873.0,
-            "6": 1786.0,
-            "7": 1813.0,
-            "8": 1519.0,
-            "9": 1684.0,
-            "10": 1417.0,
-            "11": 1943.0,
-            "12": 1723.0,
-            "13": 1939.0,
-            "14": 1759.0,
-            "15": 1772.0,
-            "16": 1750.0,
-            "17": 1812.0,
-            "18": 1646.0,
-            "19": 1818.0,
-            "20": 1655.0,
-            "21": 1879.0,
-            "22": 1759.0,
-            "23": 1944.0,
-            "24": 1650.0,
-            "25": 1708.0,
-            "26": 1750.0,
-            "27": 1815.0,
-            "28": 2091.0,
-            "29": 1958.0,
-            "30": 1934.0,
-            "31": 1588.0,
-            "32": 1891.0,
-            "33": 2099.0,
-            "34": 1765.0,
-            "35": 1894.0,
-            "36": 1947.0,
-            "37": 2279.0,
-            "38": 2134.0,
-            "39": 2350.0,
-            "40": 2091.0,
-            "41": 2318.0,
-            "42": 2137.0,
-            "43": 1849.0,
-            "44": 2109.0,
-            "45": 2020.0,
-            "46": 2263.0,
-            "47": 2593.0,
-            "48": 2323.0,
-            "49": 2177.0,
-            "50": 2363.0,
-            "51": 2554.0,
-            "52": 2619.0,
-            "53": 2863.0,
-            "54": 2688.0,
-            "55": 2406.0,
-            "56": 2649.0,
-            "57": 2175.0,
-            "58": 2856.0,
-            "59": 2775.0,
-            "60": 2307.0,
-            "61": 2914.0,
-            "62": 2644.0,
-            "63": 2362.0,
-            "64": 2946.0,
-            "65": 2578.0,
-            "66": 3122.0,
-            "67": 2697.0,
-            "68": 2687.0,
-            "69": 2956.0,
-            "70": 3157.0,
-            "71": 3028.0,
-            "72": 2294.0,
-            "73": 2876.0,
-            "74": 1887.0,
-            "75": 2523.0,
-            "76": 2937.0,
-            "77": 3162.0,
-            "78": 3318.0,
-            "79": 3074.0,
-            "80": 3213.0,
-            "81": 3664.0,
-            "82": 3238.0,
-            "83": 2838.0,
-            "84": 3251.0,
-            "85": 3275.0,
-            "86": 2748.0,
-            "87": 3758.0,
-            "88": 3023.0,
-            "89": 3267.0,
-            "90": 3085.0,
-            "91": 2812.0,
-            "92": 3116.0,
-            "93": 2665.0,
-            "94": 3380.0,
-            "95": 3236.0,
-            "96": 3462.0,
-            "97": 3002.0,
-            "98": 3545.0,
-            "99": 3265.0,
-            "100": 3458.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 520868352.0,
-            "2": 520868352.0,
-            "3": 520868352.0,
-            "4": 520868352.0,
-            "5": 520868352.0,
-            "6": 520868352.0,
-            "7": 520868352.0,
-            "8": 520868352.0,
-            "9": 520868352.0,
-            "10": 520868352.0,
-            "11": 520868352.0,
-            "12": 520868352.0,
-            "13": 520868352.0,
-            "14": 520868352.0,
-            "15": 520868352.0,
-            "16": 520868352.0,
-            "17": 520868352.0,
-            "18": 520868352.0,
-            "19": 520868352.0,
-            "20": 520868352.0,
-            "21": 520868352.0,
-            "22": 520868352.0,
-            "23": 520868352.0,
-            "24": 520868352.0,
-            "25": 520868352.0,
-            "26": 520868352.0,
-            "27": 520868352.0,
-            "28": 520868352.0,
-            "29": 520868352.0,
-            "30": 520868352.0,
-            "31": 520868352.0,
-            "32": 520868352.0,
-            "33": 520868352.0,
-            "34": 520868352.0,
-            "35": 520868352.0,
-            "36": 520868352.0,
-            "37": 520868352.0,
-            "38": 520868352.0,
-            "39": 520868352.0,
-            "40": 520868352.0,
-            "41": 520868352.0,
-            "42": 520868352.0,
-            "43": 520868352.0,
-            "44": 520868352.0,
-            "45": 520868352.0,
-            "46": 520868352.0,
-            "47": 520868352.0,
-            "48": 520868352.0,
-            "49": 520868352.0,
-            "50": 520868352.0,
-            "51": 520868352.0,
-            "52": 520868352.0,
-            "53": 520868352.0,
-            "54": 520868352.0,
-            "55": 520868352.0,
-            "56": 520868352.0,
-            "57": 520868352.0,
-            "58": 520868352.0,
-            "59": 520868352.0,
-            "60": 520868352.0,
-            "61": 520868352.0,
-            "62": 520868352.0,
-            "63": 520868352.0,
-            "64": 520868352.0,
-            "65": 520868352.0,
-            "66": 520868352.0,
-            "67": 520868352.0,
-            "68": 520868352.0,
-            "69": 520868352.0,
-            "70": 520868352.0,
-            "71": 520868352.0,
-            "72": 520868352.0,
-            "73": 520868352.0,
-            "74": 520868352.0,
-            "75": 520868352.0,
-            "76": 520868352.0,
-            "77": 520868352.0,
-            "78": 520868352.0,
-            "79": 520868352.0,
-            "80": 520868352.0,
-            "81": 520868352.0,
-            "82": 520868352.0,
-            "83": 520868352.0,
-            "84": 520868352.0,
-            "85": 520868352.0,
-            "86": 520868352.0,
-            "87": 520868352.0,
-            "88": 520868352.0,
-            "89": 520868352.0,
-            "90": 520868352.0,
-            "91": 520868352.0,
-            "92": 520868352.0,
-            "93": 520868352.0,
-            "94": 520868352.0,
-            "95": 520868352.0,
-            "96": 520868352.0,
-            "97": 520868352.0,
-            "98": 520868352.0,
-            "99": 520868352.0,
-            "100": 520868352.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 2297829376.0,
-            "2": 2439744512.0,
-            "3": 2439744512.0,
-            "4": 2439744512.0,
-            "5": 2439744512.0,
-            "6": 2439744512.0,
-            "7": 2439744512.0,
-            "8": 2439744512.0,
-            "9": 2439744512.0,
-            "10": 2439744512.0,
-            "11": 2439744512.0,
-            "12": 2439744512.0,
-            "13": 2439744512.0,
-            "14": 2439744512.0,
-            "15": 2439744512.0,
-            "16": 2439744512.0,
-            "17": 2439744512.0,
-            "18": 2439744512.0,
-            "19": 2439744512.0,
-            "20": 2439744512.0,
-            "21": 2439744512.0,
-            "22": 2439744512.0,
-            "23": 2439744512.0,
-            "24": 2439744512.0,
-            "25": 2439744512.0,
-            "26": 2439744512.0,
-            "27": 2439744512.0,
-            "28": 2439744512.0,
-            "29": 2439744512.0,
-            "30": 2439744512.0,
-            "31": 2439744512.0,
-            "32": 2439744512.0,
-            "33": 2439744512.0,
-            "34": 2439744512.0,
-            "35": 2439744512.0,
-            "36": 2439744512.0,
-            "37": 2439744512.0,
-            "38": 2439744512.0,
-            "39": 2439744512.0,
-            "40": 2439744512.0,
-            "41": 2439744512.0,
-            "42": 2439744512.0,
-            "43": 2439744512.0,
-            "44": 2439744512.0,
-            "45": 2439744512.0,
-            "46": 2439744512.0,
-            "47": 2439744512.0,
-            "48": 2439744512.0,
-            "49": 2439744512.0,
-            "50": 2439744512.0,
-            "51": 2439744512.0,
-            "52": 2439744512.0,
-            "53": 2439744512.0,
-            "54": 2439744512.0,
-            "55": 2439744512.0,
-            "56": 2439744512.0,
-            "57": 2439744512.0,
-            "58": 2439744512.0,
-            "59": 2439744512.0,
-            "60": 2439744512.0,
-            "61": 2439744512.0,
-            "62": 2439744512.0,
-            "63": 2439744512.0,
-            "64": 2439744512.0,
-            "65": 2439744512.0,
-            "66": 2439744512.0,
-            "67": 2439744512.0,
-            "68": 2439744512.0,
-            "69": 2439744512.0,
-            "70": 2439744512.0,
-            "71": 2439744512.0,
-            "72": 2439744512.0,
-            "73": 2439744512.0,
-            "74": 2439744512.0,
-            "75": 2439744512.0,
-            "76": 2439744512.0,
-            "77": 2439744512.0,
-            "78": 2439744512.0,
-            "79": 2439744512.0,
-            "80": 2439744512.0,
-            "81": 2439744512.0,
-            "82": 2439744512.0,
-            "83": 2439744512.0,
-            "84": 2439744512.0,
-            "85": 2439744512.0,
-            "86": 2439744512.0,
-            "87": 2439744512.0,
-            "88": 2439744512.0,
-            "89": 2439744512.0,
-            "90": 2439744512.0,
-            "91": 2439744512.0,
-            "92": 2439744512.0,
-            "93": 2439744512.0,
-            "94": 2439744512.0,
-            "95": 2439744512.0,
-            "96": 2439744512.0,
-            "97": 2439744512.0,
-            "98": 2439744512.0,
-            "99": 2439744512.0,
-            "100": 2439744512.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 13.16911,
-            "2": 0.18867,
-            "3": 0.13882,
-            "4": 0.13669,
-            "5": 0.13645,
-            "6": 0.13563,
-            "7": 0.13741,
-            "8": 0.13506,
-            "9": 0.13477,
-            "10": 0.13539,
-            "11": 0.13546,
-            "12": 0.13513,
-            "13": 0.13494,
-            "14": 0.13581,
-            "15": 0.13521,
-            "16": 0.13419,
-            "17": 0.14048,
-            "18": 0.13853,
-            "19": 0.13344,
-            "20": 0.13345,
-            "21": 0.13394,
-            "22": 0.1343,
-            "23": 0.13285,
-            "24": 0.13319,
-            "25": 0.13222,
-            "26": 0.13344,
-            "27": 0.13252,
-            "28": 0.13239,
-            "29": 0.1336,
-            "30": 0.13332,
-            "31": 0.13361,
-            "32": 0.13239,
-            "33": 0.13272,
-            "34": 0.13285,
-            "35": 0.21759,
-            "36": 0.1331,
-            "37": 0.1319,
-            "38": 0.13293,
-            "39": 0.13316,
-            "40": 0.13306,
-            "41": 0.13419,
-            "42": 0.13323,
-            "43": 0.13347,
-            "44": 0.13313,
-            "45": 0.13281,
-            "46": 0.13654,
-            "47": 0.13861,
-            "48": 0.14195,
-            "49": 0.14972,
-            "50": 0.14396,
-            "51": 0.1402,
-            "52": 0.13309,
-            "53": 0.1319,
-            "54": 0.13165,
-            "55": 0.1313,
-            "56": 0.13223,
-            "57": 0.13257,
-            "58": 0.13107,
-            "59": 0.13218,
-            "60": 0.13223,
-            "61": 0.13169,
-            "62": 0.13257,
-            "63": 0.13313,
-            "64": 0.13312,
-            "65": 0.1329,
-            "66": 0.13333,
-            "67": 0.13273,
-            "68": 0.13284,
-            "69": 0.1315,
-            "70": 0.13199,
-            "71": 0.13227,
-            "72": 0.13324,
-            "73": 0.13323,
-            "74": 0.13184,
-            "75": 0.13306,
-            "76": 0.13246,
-            "77": 0.13205,
-            "78": 0.1314,
-            "79": 0.13214,
-            "80": 0.13251,
-            "81": 0.1324,
-            "82": 0.13223,
-            "83": 0.133,
-            "84": 0.13175,
-            "85": 0.1338,
-            "86": 0.13446,
-            "87": 0.13241,
-            "88": 0.13183,
-            "89": 0.13223,
-            "90": 0.13104,
-            "91": 0.13159,
-            "92": 0.13112,
-            "93": 0.13189,
-            "94": 0.13213,
-            "95": 0.13312,
-            "96": 0.13243,
-            "97": 0.21757,
-            "98": 0.1322,
-            "99": 0.13344,
-            "100": 0.13353
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgx_a100.json
index 5d20ab395ec..85ab75a97bf 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgx_a100.json
@@ -1,537 +1 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.81548,
-            "2": 10.8208,
-            "3": 10.81272,
-            "4": 10.78885,
-            "5": 10.85169,
-            "6": 10.87038,
-            "7": 10.83098,
-            "8": 10.8343,
-            "9": 10.83996,
-            "10": 10.78686,
-            "11": 10.88025,
-            "12": 10.85974,
-            "13": 10.86596,
-            "14": 10.8782,
-            "15": 10.79466,
-            "16": 10.79611,
-            "17": 10.76888,
-            "18": 10.81074,
-            "19": 10.79865,
-            "20": 10.69058,
-            "21": 10.67945,
-            "22": 10.52113,
-            "23": 10.70755,
-            "24": 10.5774,
-            "25": 10.52375,
-            "26": 10.59601,
-            "27": 10.58621,
-            "28": 10.56178,
-            "29": 10.56944,
-            "30": 10.34575,
-            "31": 10.10079,
-            "32": 10.45392,
-            "33": 10.4461,
-            "34": 10.20725,
-            "35": 10.26198,
-            "36": 10.21312,
-            "37": 10.32467,
-            "38": 10.16829,
-            "39": 10.38371,
-            "40": 10.07233,
-            "41": 10.13762,
-            "42": 10.19766,
-            "43": 9.81156,
-            "44": 9.93307,
-            "45": 9.81043,
-            "46": 9.80854,
-            "47": 10.12558,
-            "48": 9.82104,
-            "49": 9.50761,
-            "50": 9.8838,
-            "51": 9.83548,
-            "52": 9.72518,
-            "53": 10.04799,
-            "54": 9.93007,
-            "55": 9.86362,
-            "56": 9.60218,
-            "57": 9.45185,
-            "58": 9.80781,
-            "59": 9.56786,
-            "60": 9.47966,
-            "61": 9.67985,
-            "62": 9.9675,
-            "63": 9.35111,
-            "64": 9.75622,
-            "65": 8.93178,
-            "66": 9.68108,
-            "67": 9.35959,
-            "68": 9.76948,
-            "69": 9.77494,
-            "70": 9.71179,
-            "71": 9.60631,
-            "72": 9.57134,
-            "73": 9.48393,
-            "74": 8.92913,
-            "75": 9.4003,
-            "76": 9.07189,
-            "77": 10.05248,
-            "78": 9.71492,
-            "79": 9.35744,
-            "80": 9.38946,
-            "81": 9.46798,
-            "82": 9.68509,
-            "83": 9.29591,
-            "84": 9.40521,
-            "85": 9.60161,
-            "86": 9.06713,
-            "87": 9.58406,
-            "88": 9.73301,
-            "89": 9.59528,
-            "90": 9.80559,
-            "91": 9.32603,
-            "92": 9.3532,
-            "93": 9.06916,
-            "94": 8.82266,
-            "95": 9.50858,
-            "96": 9.51587,
-            "97": 9.29763,
-            "98": 9.66187,
-            "99": 8.87661,
-            "100": 9.39222
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1518.0,
-            "2": 1697.0,
-            "3": 1678.0,
-            "4": 1709.0,
-            "5": 1873.0,
-            "6": 1786.0,
-            "7": 1813.0,
-            "8": 1519.0,
-            "9": 1684.0,
-            "10": 1417.0,
-            "11": 1943.0,
-            "12": 1723.0,
-            "13": 1939.0,
-            "14": 1759.0,
-            "15": 1772.0,
-            "16": 1750.0,
-            "17": 1812.0,
-            "18": 1646.0,
-            "19": 1818.0,
-            "20": 1655.0,
-            "21": 1879.0,
-            "22": 1759.0,
-            "23": 1944.0,
-            "24": 1650.0,
-            "25": 1708.0,
-            "26": 1750.0,
-            "27": 1815.0,
-            "28": 2091.0,
-            "29": 1958.0,
-            "30": 1934.0,
-            "31": 1588.0,
-            "32": 1891.0,
-            "33": 2099.0,
-            "34": 1765.0,
-            "35": 1894.0,
-            "36": 1947.0,
-            "37": 2279.0,
-            "38": 2134.0,
-            "39": 2350.0,
-            "40": 2091.0,
-            "41": 2318.0,
-            "42": 2137.0,
-            "43": 1849.0,
-            "44": 2109.0,
-            "45": 2020.0,
-            "46": 2263.0,
-            "47": 2593.0,
-            "48": 2323.0,
-            "49": 2177.0,
-            "50": 2363.0,
-            "51": 2554.0,
-            "52": 2619.0,
-            "53": 2863.0,
-            "54": 2688.0,
-            "55": 2406.0,
-            "56": 2649.0,
-            "57": 2175.0,
-            "58": 2856.0,
-            "59": 2775.0,
-            "60": 2307.0,
-            "61": 2914.0,
-            "62": 2644.0,
-            "63": 2362.0,
-            "64": 2946.0,
-            "65": 2578.0,
-            "66": 3122.0,
-            "67": 2697.0,
-            "68": 2687.0,
-            "69": 2956.0,
-            "70": 3157.0,
-            "71": 3028.0,
-            "72": 2294.0,
-            "73": 2876.0,
-            "74": 1887.0,
-            "75": 2523.0,
-            "76": 2937.0,
-            "77": 3162.0,
-            "78": 3318.0,
-            "79": 3074.0,
-            "80": 3213.0,
-            "81": 3664.0,
-            "82": 3238.0,
-            "83": 2838.0,
-            "84": 3251.0,
-            "85": 3275.0,
-            "86": 2748.0,
-            "87": 3758.0,
-            "88": 3023.0,
-            "89": 3267.0,
-            "90": 3085.0,
-            "91": 2812.0,
-            "92": 3116.0,
-            "93": 2665.0,
-            "94": 3380.0,
-            "95": 3236.0,
-            "96": 3462.0,
-            "97": 3002.0,
-            "98": 3545.0,
-            "99": 3265.0,
-            "100": 3458.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 521400832.0,
-            "2": 521400832.0,
-            "3": 521400832.0,
-            "4": 521400832.0,
-            "5": 521400832.0,
-            "6": 521400832.0,
-            "7": 521400832.0,
-            "8": 521400832.0,
-            "9": 521400832.0,
-            "10": 521400832.0,
-            "11": 521400832.0,
-            "12": 521400832.0,
-            "13": 521400832.0,
-            "14": 521400832.0,
-            "15": 521400832.0,
-            "16": 521400832.0,
-            "17": 521400832.0,
-            "18": 521400832.0,
-            "19": 521400832.0,
-            "20": 521400832.0,
-            "21": 521400832.0,
-            "22": 521400832.0,
-            "23": 521400832.0,
-            "24": 521400832.0,
-            "25": 521400832.0,
-            "26": 521400832.0,
-            "27": 521400832.0,
-            "28": 521400832.0,
-            "29": 521400832.0,
-            "30": 521400832.0,
-            "31": 521400832.0,
-            "32": 521400832.0,
-            "33": 521400832.0,
-            "34": 521400832.0,
-            "35": 521400832.0,
-            "36": 521400832.0,
-            "37": 521400832.0,
-            "38": 521400832.0,
-            "39": 521400832.0,
-            "40": 521400832.0,
-            "41": 521400832.0,
-            "42": 521400832.0,
-            "43": 521400832.0,
-            "44": 521400832.0,
-            "45": 521400832.0,
-            "46": 521400832.0,
-            "47": 521400832.0,
-            "48": 521400832.0,
-            "49": 521400832.0,
-            "50": 521400832.0,
-            "51": 521400832.0,
-            "52": 521400832.0,
-            "53": 521400832.0,
-            "54": 521400832.0,
-            "55": 521400832.0,
-            "56": 521400832.0,
-            "57": 521400832.0,
-            "58": 521400832.0,
-            "59": 521400832.0,
-            "60": 521400832.0,
-            "61": 521400832.0,
-            "62": 521400832.0,
-            "63": 521400832.0,
-            "64": 521400832.0,
-            "65": 521400832.0,
-            "66": 521400832.0,
-            "67": 521400832.0,
-            "68": 521400832.0,
-            "69": 521400832.0,
-            "70": 521400832.0,
-            "71": 521400832.0,
-            "72": 521400832.0,
-            "73": 521400832.0,
-            "74": 521400832.0,
-            "75": 521400832.0,
-            "76": 521400832.0,
-            "77": 521400832.0,
-            "78": 521400832.0,
-            "79": 521400832.0,
-            "80": 521400832.0,
-            "81": 521400832.0,
-            "82": 521400832.0,
-            "83": 521400832.0,
-            "84": 521400832.0,
-            "85": 521400832.0,
-            "86": 521400832.0,
-            "87": 521400832.0,
-            "88": 521400832.0,
-            "89": 521400832.0,
-            "90": 521400832.0,
-            "91": 521400832.0,
-            "92": 521400832.0,
-            "93": 521400832.0,
-            "94": 521400832.0,
-            "95": 521400832.0,
-            "96": 521400832.0,
-            "97": 521400832.0,
-            "98": 521400832.0,
-            "99": 521400832.0,
-            "100": 521400832.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 2297829376.0,
-            "2": 2439228416.0,
-            "3": 2439228416.0,
-            "4": 2439228416.0,
-            "5": 2439228416.0,
-            "6": 2439228416.0,
-            "7": 2439228416.0,
-            "8": 2439228416.0,
-            "9": 2439228416.0,
-            "10": 2439228416.0,
-            "11": 2439228416.0,
-            "12": 2439228416.0,
-            "13": 2439228416.0,
-            "14": 2439228416.0,
-            "15": 2439228416.0,
-            "16": 2439228416.0,
-            "17": 2439228416.0,
-            "18": 2439228416.0,
-            "19": 2439228416.0,
-            "20": 2439228416.0,
-            "21": 2439228416.0,
-            "22": 2439228416.0,
-            "23": 2439228416.0,
-            "24": 2439228416.0,
-            "25": 2439228416.0,
-            "26": 2439228416.0,
-            "27": 2439228416.0,
-            "28": 2439228416.0,
-            "29": 2439228416.0,
-            "30": 2439228416.0,
-            "31": 2439228416.0,
-            "32": 2439228416.0,
-            "33": 2439228416.0,
-            "34": 2439228416.0,
-            "35": 2439228416.0,
-            "36": 2439228416.0,
-            "37": 2439228416.0,
-            "38": 2439228416.0,
-            "39": 2439228416.0,
-            "40": 2439228416.0,
-            "41": 2439228416.0,
-            "42": 2439228416.0,
-            "43": 2439228416.0,
-            "44": 2439228416.0,
-            "45": 2439228416.0,
-            "46": 2439228416.0,
-            "47": 2439228416.0,
-            "48": 2439228416.0,
-            "49": 2439228416.0,
-            "50": 2439228416.0,
-            "51": 2439228416.0,
-            "52": 2439228416.0,
-            "53": 2439228416.0,
-            "54": 2439228416.0,
-            "55": 2439228416.0,
-            "56": 2439228416.0,
-            "57": 2439228416.0,
-            "58": 2439228416.0,
-            "59": 2439228416.0,
-            "60": 2439228416.0,
-            "61": 2439228416.0,
-            "62": 2439228416.0,
-            "63": 2439228416.0,
-            "64": 2439228416.0,
-            "65": 2439228416.0,
-            "66": 2439228416.0,
-            "67": 2439228416.0,
-            "68": 2439228416.0,
-            "69": 2439228416.0,
-            "70": 2439228416.0,
-            "71": 2439228416.0,
-            "72": 2439228416.0,
-            "73": 2439228416.0,
-            "74": 2439228416.0,
-            "75": 2439228416.0,
-            "76": 2439228416.0,
-            "77": 2439228416.0,
-            "78": 2439228416.0,
-            "79": 2439228416.0,
-            "80": 2439228416.0,
-            "81": 2439228416.0,
-            "82": 2439228416.0,
-            "83": 2439228416.0,
-            "84": 2439228416.0,
-            "85": 2439228416.0,
-            "86": 2439228416.0,
-            "87": 2439228416.0,
-            "88": 2439228416.0,
-            "89": 2439228416.0,
-            "90": 2439228416.0,
-            "91": 2439228416.0,
-            "92": 2439228416.0,
-            "93": 2439228416.0,
-            "94": 2439228416.0,
-            "95": 2439228416.0,
-            "96": 2439228416.0,
-            "97": 2439228416.0,
-            "98": 2439228416.0,
-            "99": 2439228416.0,
-            "100": 2439228416.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 9.8604,
-            "2": 0.16953,
-            "3": 0.13987,
-            "4": 0.13824,
-            "5": 0.13775,
-            "6": 0.13549,
-            "7": 0.13611,
-            "8": 0.13584,
-            "9": 0.13626,
-            "10": 0.13922,
-            "11": 0.13526,
-            "12": 0.13455,
-            "13": 0.13222,
-            "14": 0.13324,
-            "15": 0.1325,
-            "16": 0.13211,
-            "17": 0.13198,
-            "18": 0.13145,
-            "19": 0.13207,
-            "20": 0.13182,
-            "21": 0.13297,
-            "22": 0.1322,
-            "23": 0.13275,
-            "24": 0.1319,
-            "25": 0.13822,
-            "26": 0.13214,
-            "27": 0.13169,
-            "28": 0.13196,
-            "29": 0.13229,
-            "30": 0.13285,
-            "31": 0.13112,
-            "32": 0.13222,
-            "33": 0.13056,
-            "34": 0.13076,
-            "35": 0.13218,
-            "36": 0.13126,
-            "37": 0.13091,
-            "38": 0.13048,
-            "39": 0.13082,
-            "40": 0.1308,
-            "41": 0.13202,
-            "42": 0.1314,
-            "43": 0.13222,
-            "44": 0.13074,
-            "45": 0.13237,
-            "46": 0.13272,
-            "47": 0.13239,
-            "48": 0.13266,
-            "49": 0.13226,
-            "50": 0.13164,
-            "51": 0.13425,
-            "52": 0.13044,
-            "53": 0.13037,
-            "54": 0.13007,
-            "55": 0.1301,
-            "56": 0.13001,
-            "57": 0.13054,
-            "58": 0.12972,
-            "59": 0.13049,
-            "60": 0.13042,
-            "61": 0.12903,
-            "62": 0.13042,
-            "63": 0.13104,
-            "64": 0.13008,
-            "65": 0.13158,
-            "66": 0.13091,
-            "67": 0.13089,
-            "68": 0.13084,
-            "69": 0.12903,
-            "70": 0.13015,
-            "71": 0.12957,
-            "72": 0.12997,
-            "73": 0.13025,
-            "74": 0.12989,
-            "75": 0.13018,
-            "76": 0.12962,
-            "77": 0.13065,
-            "78": 0.12915,
-            "79": 0.13007,
-            "80": 0.12972,
-            "81": 0.1301,
-            "82": 0.12927,
-            "83": 0.1302,
-            "84": 0.12991,
-            "85": 0.13129,
-            "86": 0.13063,
-            "87": 0.13028,
-            "88": 0.1305,
-            "89": 0.13046,
-            "90": 0.12991,
-            "91": 0.13058,
-            "92": 0.13044,
-            "93": 0.13009,
-            "94": 0.1306,
-            "95": 0.13082,
-            "96": 0.13068,
-            "97": 0.13403,
-            "98": 0.13199,
-            "99": 0.13191,
-            "100": 0.13014
-        }
-    }
-}
\ No newline at end of file
+{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.82005, "5": 10.85284, "10": 10.78455, "15": 10.79229, "20": 10.69212, "25": 10.52413, "30": 10.34552, "35": 10.26241, "40": 10.07236, "45": 9.81098, "50": 9.88418, "55": 9.86374, "60": 9.47963, "65": 8.93065, "70": 9.71218, "75": 9.4005, "80": 9.39078, "85": 9.60138, "90": 9.80502, "95": 9.50821, "100": 9.3924}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1559.0, "5": 1840.0, "10": 1380.0, "15": 1848.0, "20": 1621.0, "25": 1689.0, "30": 1907.0, "35": 1922.0, "40": 2045.0, "45": 2097.0, "50": 2368.0, "55": 2383.0, "60": 2509.0, "65": 2628.0, "70": 3129.0, "75": 2552.0, "80": 3279.0, "85": 3344.0, "90": 2980.0, "95": 3324.0, "100": 3467.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 522989568.0, "5": 522989568.0, "10": 522989568.0, "15": 522989568.0, "20": 522989568.0, "25": 522989568.0, "30": 522989568.0, "35": 522989568.0, "40": 522989568.0, "45": 522989568.0, "50": 522989568.0, "55": 522989568.0, "60": 522989568.0, "65": 522989568.0, "70": 522989568.0, "75": 522989568.0, "80": 522989568.0, "85": 522989568.0, "90": 522989568.0, "95": 522989568.0, "100": 522989568.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 3768872960.0, "5": 3911833600.0, "10": 3911833600.0, "15": 3911833600.0, "20": 3911833600.0, "25": 3911833600.0, "30": 3911833600.0, "35": 3911833600.0, "40": 3911833600.0, "45": 3911833600.0, "50": 3911833600.0, "55": 3911833600.0, "60": 3911833600.0, "65": 3911833600.0, "70": 3911833600.0, "75": 3911833600.0, "80": 3911833600.0, "85": 3911833600.0, "90": 3911833600.0, "95": 3911833600.0, "100": 3911833600.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 18.37086, "5": 0.17677, "10": 0.17771, "15": 0.1763, "20": 0.17346, "25": 0.17343, "30": 0.17441, "35": 0.17427, "40": 0.17562, "45": 0.1738, "50": 0.17384, "55": 0.17214, "60": 0.17159, "65": 0.17227, "70": 0.17173, "75": 0.17264, "80": 0.1723, "85": 0.1716, "90": 0.17085, "95": 0.17315, "100": 0.1716}}}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts.json
deleted file mode 100644
index 0532ff11573..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.81548,
-            "2": 10.8208,
-            "3": 10.81272,
-            "4": 10.78885,
-            "5": 10.85169,
-            "6": 10.87038,
-            "7": 10.83098,
-            "8": 10.8343,
-            "9": 10.83996,
-            "10": 10.78686,
-            "11": 10.88025,
-            "12": 10.85974,
-            "13": 10.86596,
-            "14": 10.8782,
-            "15": 10.79466,
-            "16": 10.79611,
-            "17": 10.76888,
-            "18": 10.81074,
-            "19": 10.79865,
-            "20": 10.69058,
-            "21": 10.67945,
-            "22": 10.52113,
-            "23": 10.70755,
-            "24": 10.5774,
-            "25": 10.52375,
-            "26": 10.59601,
-            "27": 10.58621,
-            "28": 10.56178,
-            "29": 10.56944,
-            "30": 10.34575,
-            "31": 10.10079,
-            "32": 10.45392,
-            "33": 10.4461,
-            "34": 10.20725,
-            "35": 10.26198,
-            "36": 10.21312,
-            "37": 10.32467,
-            "38": 10.16829,
-            "39": 10.38371,
-            "40": 10.07233,
-            "41": 10.13762,
-            "42": 10.19766,
-            "43": 9.81156,
-            "44": 9.93307,
-            "45": 9.81043,
-            "46": 9.80854,
-            "47": 10.12558,
-            "48": 9.82104,
-            "49": 9.50761,
-            "50": 9.8838,
-            "51": 9.83548,
-            "52": 9.72518,
-            "53": 10.04799,
-            "54": 9.93007,
-            "55": 9.86362,
-            "56": 9.60218,
-            "57": 9.45185,
-            "58": 9.80781,
-            "59": 9.56786,
-            "60": 9.47966,
-            "61": 9.67985,
-            "62": 9.9675,
-            "63": 9.35111,
-            "64": 9.75622,
-            "65": 8.93178,
-            "66": 9.68108,
-            "67": 9.35959,
-            "68": 9.76948,
-            "69": 9.77494,
-            "70": 9.71179,
-            "71": 9.60631,
-            "72": 9.57134,
-            "73": 9.48393,
-            "74": 8.92913,
-            "75": 9.4003,
-            "76": 9.07189,
-            "77": 10.05248,
-            "78": 9.71492,
-            "79": 9.35744,
-            "80": 9.38946,
-            "81": 9.46798,
-            "82": 9.68509,
-            "83": 9.29591,
-            "84": 9.40521,
-            "85": 9.60161,
-            "86": 9.06713,
-            "87": 9.58406,
-            "88": 9.73301,
-            "89": 9.59528,
-            "90": 9.80559,
-            "91": 9.32603,
-            "92": 9.3532,
-            "93": 9.06916,
-            "94": 8.82266,
-            "95": 9.50858,
-            "96": 9.51587,
-            "97": 9.29763,
-            "98": 9.66187,
-            "99": 8.87661,
-            "100": 9.39222
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1518.0,
-            "2": 1697.0,
-            "3": 1678.0,
-            "4": 1709.0,
-            "5": 1873.0,
-            "6": 1786.0,
-            "7": 1813.0,
-            "8": 1519.0,
-            "9": 1684.0,
-            "10": 1417.0,
-            "11": 1943.0,
-            "12": 1723.0,
-            "13": 1939.0,
-            "14": 1759.0,
-            "15": 1772.0,
-            "16": 1750.0,
-            "17": 1812.0,
-            "18": 1646.0,
-            "19": 1818.0,
-            "20": 1655.0,
-            "21": 1879.0,
-            "22": 1759.0,
-            "23": 1944.0,
-            "24": 1650.0,
-            "25": 1708.0,
-            "26": 1750.0,
-            "27": 1815.0,
-            "28": 2091.0,
-            "29": 1958.0,
-            "30": 1934.0,
-            "31": 1588.0,
-            "32": 1891.0,
-            "33": 2099.0,
-            "34": 1765.0,
-            "35": 1894.0,
-            "36": 1947.0,
-            "37": 2279.0,
-            "38": 2134.0,
-            "39": 2350.0,
-            "40": 2091.0,
-            "41": 2318.0,
-            "42": 2137.0,
-            "43": 1849.0,
-            "44": 2109.0,
-            "45": 2020.0,
-            "46": 2263.0,
-            "47": 2593.0,
-            "48": 2323.0,
-            "49": 2177.0,
-            "50": 2363.0,
-            "51": 2554.0,
-            "52": 2619.0,
-            "53": 2863.0,
-            "54": 2688.0,
-            "55": 2406.0,
-            "56": 2649.0,
-            "57": 2175.0,
-            "58": 2856.0,
-            "59": 2775.0,
-            "60": 2307.0,
-            "61": 2914.0,
-            "62": 2644.0,
-            "63": 2362.0,
-            "64": 2946.0,
-            "65": 2578.0,
-            "66": 3122.0,
-            "67": 2697.0,
-            "68": 2687.0,
-            "69": 2956.0,
-            "70": 3157.0,
-            "71": 3028.0,
-            "72": 2294.0,
-            "73": 2876.0,
-            "74": 1887.0,
-            "75": 2523.0,
-            "76": 2937.0,
-            "77": 3162.0,
-            "78": 3318.0,
-            "79": 3074.0,
-            "80": 3213.0,
-            "81": 3664.0,
-            "82": 3238.0,
-            "83": 2838.0,
-            "84": 3251.0,
-            "85": 3275.0,
-            "86": 2748.0,
-            "87": 3758.0,
-            "88": 3023.0,
-            "89": 3267.0,
-            "90": 3085.0,
-            "91": 2812.0,
-            "92": 3116.0,
-            "93": 2665.0,
-            "94": 3380.0,
-            "95": 3236.0,
-            "96": 3462.0,
-            "97": 3002.0,
-            "98": 3545.0,
-            "99": 3265.0,
-            "100": 3458.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 519038464.0,
-            "2": 519038464.0,
-            "3": 519038464.0,
-            "4": 519038464.0,
-            "5": 519038464.0,
-            "6": 519038464.0,
-            "7": 519038464.0,
-            "8": 519038464.0,
-            "9": 519038464.0,
-            "10": 519038464.0,
-            "11": 519038464.0,
-            "12": 519038464.0,
-            "13": 519038464.0,
-            "14": 519038464.0,
-            "15": 519038464.0,
-            "16": 519038464.0,
-            "17": 519038464.0,
-            "18": 519038464.0,
-            "19": 519038464.0,
-            "20": 519038464.0,
-            "21": 519038464.0,
-            "22": 519038464.0,
-            "23": 519038464.0,
-            "24": 519038464.0,
-            "25": 519038464.0,
-            "26": 519038464.0,
-            "27": 519038464.0,
-            "28": 519038464.0,
-            "29": 519038464.0,
-            "30": 519038464.0,
-            "31": 519038464.0,
-            "32": 519038464.0,
-            "33": 519038464.0,
-            "34": 519038464.0,
-            "35": 519038464.0,
-            "36": 519038464.0,
-            "37": 519038464.0,
-            "38": 519038464.0,
-            "39": 519038464.0,
-            "40": 519038464.0,
-            "41": 519038464.0,
-            "42": 519038464.0,
-            "43": 519038464.0,
-            "44": 519038464.0,
-            "45": 519038464.0,
-            "46": 519038464.0,
-            "47": 519038464.0,
-            "48": 519038464.0,
-            "49": 519038464.0,
-            "50": 519038464.0,
-            "51": 519038464.0,
-            "52": 519038464.0,
-            "53": 519038464.0,
-            "54": 519038464.0,
-            "55": 519038464.0,
-            "56": 519038464.0,
-            "57": 519038464.0,
-            "58": 519038464.0,
-            "59": 519038464.0,
-            "60": 519038464.0,
-            "61": 519038464.0,
-            "62": 519038464.0,
-            "63": 519038464.0,
-            "64": 519038464.0,
-            "65": 519038464.0,
-            "66": 519038464.0,
-            "67": 519038464.0,
-            "68": 519038464.0,
-            "69": 519038464.0,
-            "70": 519038464.0,
-            "71": 519038464.0,
-            "72": 519038464.0,
-            "73": 519038464.0,
-            "74": 519038464.0,
-            "75": 519038464.0,
-            "76": 519038464.0,
-            "77": 519038464.0,
-            "78": 519038464.0,
-            "79": 519038464.0,
-            "80": 519038464.0,
-            "81": 519038464.0,
-            "82": 519038464.0,
-            "83": 519038464.0,
-            "84": 519038464.0,
-            "85": 519038464.0,
-            "86": 519038464.0,
-            "87": 519038464.0,
-            "88": 519038464.0,
-            "89": 519038464.0,
-            "90": 519038464.0,
-            "91": 519038464.0,
-            "92": 519038464.0,
-            "93": 519038464.0,
-            "94": 519038464.0,
-            "95": 519038464.0,
-            "96": 519038464.0,
-            "97": 519038464.0,
-            "98": 519038464.0,
-            "99": 519038464.0,
-            "100": 519038464.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 2296009728.0,
-            "2": 2436866048.0,
-            "3": 2436866048.0,
-            "4": 2436866048.0,
-            "5": 2436866048.0,
-            "6": 2436866048.0,
-            "7": 2436866048.0,
-            "8": 2436866048.0,
-            "9": 2436866048.0,
-            "10": 2436866048.0,
-            "11": 2436866048.0,
-            "12": 2436866048.0,
-            "13": 2436866048.0,
-            "14": 2436866048.0,
-            "15": 2436866048.0,
-            "16": 2436866048.0,
-            "17": 2436866048.0,
-            "18": 2436866048.0,
-            "19": 2436866048.0,
-            "20": 2436866048.0,
-            "21": 2436866048.0,
-            "22": 2436866048.0,
-            "23": 2436866048.0,
-            "24": 2436866048.0,
-            "25": 2436866048.0,
-            "26": 2436866048.0,
-            "27": 2436866048.0,
-            "28": 2436866048.0,
-            "29": 2436866048.0,
-            "30": 2436866048.0,
-            "31": 2436866048.0,
-            "32": 2436866048.0,
-            "33": 2436866048.0,
-            "34": 2436866048.0,
-            "35": 2436866048.0,
-            "36": 2436866048.0,
-            "37": 2436866048.0,
-            "38": 2436866048.0,
-            "39": 2436866048.0,
-            "40": 2436866048.0,
-            "41": 2436866048.0,
-            "42": 2436866048.0,
-            "43": 2436866048.0,
-            "44": 2436866048.0,
-            "45": 2436866048.0,
-            "46": 2436866048.0,
-            "47": 2436866048.0,
-            "48": 2436866048.0,
-            "49": 2436866048.0,
-            "50": 2436866048.0,
-            "51": 2436866048.0,
-            "52": 2436866048.0,
-            "53": 2436866048.0,
-            "54": 2436866048.0,
-            "55": 2436866048.0,
-            "56": 2436866048.0,
-            "57": 2436866048.0,
-            "58": 2436866048.0,
-            "59": 2436866048.0,
-            "60": 2436866048.0,
-            "61": 2436866048.0,
-            "62": 2436866048.0,
-            "63": 2436866048.0,
-            "64": 2436866048.0,
-            "65": 2436866048.0,
-            "66": 2436866048.0,
-            "67": 2436866048.0,
-            "68": 2436866048.0,
-            "69": 2436866048.0,
-            "70": 2436866048.0,
-            "71": 2436866048.0,
-            "72": 2436866048.0,
-            "73": 2436866048.0,
-            "74": 2436866048.0,
-            "75": 2436866048.0,
-            "76": 2436866048.0,
-            "77": 2436866048.0,
-            "78": 2436866048.0,
-            "79": 2436866048.0,
-            "80": 2436866048.0,
-            "81": 2436866048.0,
-            "82": 2436866048.0,
-            "83": 2436866048.0,
-            "84": 2436866048.0,
-            "85": 2436866048.0,
-            "86": 2436866048.0,
-            "87": 2436866048.0,
-            "88": 2436866048.0,
-            "89": 2436866048.0,
-            "90": 2436866048.0,
-            "91": 2436866048.0,
-            "92": 2436866048.0,
-            "93": 2436866048.0,
-            "94": 2436866048.0,
-            "95": 2436866048.0,
-            "96": 2436866048.0,
-            "97": 2436866048.0,
-            "98": 2436866048.0,
-            "99": 2436866048.0,
-            "100": 2436866048.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 14.21193,
-            "2": 0.16972,
-            "3": 0.14361,
-            "4": 0.13668,
-            "5": 0.13411,
-            "6": 0.13482,
-            "7": 0.13456,
-            "8": 0.13436,
-            "9": 0.13229,
-            "10": 0.13468,
-            "11": 0.29227,
-            "12": 0.13377,
-            "13": 0.13424,
-            "14": 0.1347,
-            "15": 0.13353,
-            "16": 0.13405,
-            "17": 0.13386,
-            "18": 0.13335,
-            "19": 0.1337,
-            "20": 0.13441,
-            "21": 0.28803,
-            "22": 0.13435,
-            "23": 0.13391,
-            "24": 0.1344,
-            "25": 0.13379,
-            "26": 0.13318,
-            "27": 0.13391,
-            "28": 0.13299,
-            "29": 0.13332,
-            "30": 0.13439,
-            "31": 0.28662,
-            "32": 0.13342,
-            "33": 0.1334,
-            "34": 0.13306,
-            "35": 0.13383,
-            "36": 0.13334,
-            "37": 0.1329,
-            "38": 0.13424,
-            "39": 0.13255,
-            "40": 0.13274,
-            "41": 0.28399,
-            "42": 0.13366,
-            "43": 0.13392,
-            "44": 0.13317,
-            "45": 0.13301,
-            "46": 0.13375,
-            "47": 0.13359,
-            "48": 0.13309,
-            "49": 0.13333,
-            "50": 0.13346,
-            "51": 0.28956,
-            "52": 0.14009,
-            "53": 0.13828,
-            "54": 0.1377,
-            "55": 0.13637,
-            "56": 0.13795,
-            "57": 0.13737,
-            "58": 0.13417,
-            "59": 0.13327,
-            "60": 0.13391,
-            "61": 0.29272,
-            "62": 0.13275,
-            "63": 0.13346,
-            "64": 0.13336,
-            "65": 0.13291,
-            "66": 0.13365,
-            "67": 0.13307,
-            "68": 0.13243,
-            "69": 0.1327,
-            "70": 0.13323,
-            "71": 0.28929,
-            "72": 0.1323,
-            "73": 0.13407,
-            "74": 0.13317,
-            "75": 0.132,
-            "76": 0.13278,
-            "77": 0.13359,
-            "78": 0.13257,
-            "79": 0.13226,
-            "80": 0.13339,
-            "81": 0.28457,
-            "82": 0.13147,
-            "83": 0.13289,
-            "84": 0.13213,
-            "85": 0.13375,
-            "86": 0.13168,
-            "87": 0.13351,
-            "88": 0.13059,
-            "89": 0.13189,
-            "90": 0.13102,
-            "91": 0.28755,
-            "92": 0.13136,
-            "93": 0.13235,
-            "94": 0.13175,
-            "95": 0.13072,
-            "96": 0.13269,
-            "97": 0.13234,
-            "98": 0.1311,
-            "99": 0.13293,
-            "100": 0.13016
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json
index e45c3949555..cda47f321c8 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json
@@ -1,537 +1 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.81548,
-            "2": 10.8208,
-            "3": 10.81272,
-            "4": 10.78885,
-            "5": 10.85169,
-            "6": 10.87038,
-            "7": 10.83098,
-            "8": 10.8343,
-            "9": 10.83996,
-            "10": 10.78686,
-            "11": 10.88025,
-            "12": 10.85974,
-            "13": 10.86596,
-            "14": 10.8782,
-            "15": 10.79466,
-            "16": 10.79611,
-            "17": 10.76888,
-            "18": 10.81074,
-            "19": 10.79865,
-            "20": 10.69058,
-            "21": 10.67945,
-            "22": 10.52113,
-            "23": 10.70755,
-            "24": 10.5774,
-            "25": 10.52375,
-            "26": 10.59601,
-            "27": 10.58621,
-            "28": 10.56178,
-            "29": 10.56944,
-            "30": 10.34575,
-            "31": 10.10079,
-            "32": 10.45392,
-            "33": 10.4461,
-            "34": 10.20725,
-            "35": 10.26198,
-            "36": 10.21312,
-            "37": 10.32467,
-            "38": 10.16829,
-            "39": 10.38371,
-            "40": 10.07233,
-            "41": 10.13762,
-            "42": 10.19766,
-            "43": 9.81156,
-            "44": 9.93307,
-            "45": 9.81043,
-            "46": 9.80854,
-            "47": 10.12558,
-            "48": 9.82104,
-            "49": 9.50761,
-            "50": 9.8838,
-            "51": 9.83548,
-            "52": 9.72518,
-            "53": 10.04799,
-            "54": 9.93007,
-            "55": 9.86362,
-            "56": 9.60218,
-            "57": 9.45185,
-            "58": 9.80781,
-            "59": 9.56786,
-            "60": 9.47966,
-            "61": 9.67985,
-            "62": 9.9675,
-            "63": 9.35111,
-            "64": 9.75622,
-            "65": 8.93178,
-            "66": 9.68108,
-            "67": 9.35959,
-            "68": 9.76948,
-            "69": 9.77494,
-            "70": 9.71179,
-            "71": 9.60631,
-            "72": 9.57134,
-            "73": 9.48393,
-            "74": 8.92913,
-            "75": 9.4003,
-            "76": 9.07189,
-            "77": 10.05248,
-            "78": 9.71492,
-            "79": 9.35744,
-            "80": 9.38946,
-            "81": 9.46798,
-            "82": 9.68509,
-            "83": 9.29591,
-            "84": 9.40521,
-            "85": 9.60161,
-            "86": 9.06713,
-            "87": 9.58406,
-            "88": 9.73301,
-            "89": 9.59528,
-            "90": 9.80559,
-            "91": 9.32603,
-            "92": 9.3532,
-            "93": 9.06916,
-            "94": 8.82266,
-            "95": 9.50858,
-            "96": 9.51587,
-            "97": 9.29763,
-            "98": 9.66187,
-            "99": 8.87661,
-            "100": 9.39222
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1518.0,
-            "2": 1697.0,
-            "3": 1678.0,
-            "4": 1709.0,
-            "5": 1873.0,
-            "6": 1786.0,
-            "7": 1813.0,
-            "8": 1519.0,
-            "9": 1684.0,
-            "10": 1417.0,
-            "11": 1943.0,
-            "12": 1723.0,
-            "13": 1939.0,
-            "14": 1759.0,
-            "15": 1772.0,
-            "16": 1750.0,
-            "17": 1812.0,
-            "18": 1646.0,
-            "19": 1818.0,
-            "20": 1655.0,
-            "21": 1879.0,
-            "22": 1759.0,
-            "23": 1944.0,
-            "24": 1650.0,
-            "25": 1708.0,
-            "26": 1750.0,
-            "27": 1815.0,
-            "28": 2091.0,
-            "29": 1958.0,
-            "30": 1934.0,
-            "31": 1588.0,
-            "32": 1891.0,
-            "33": 2099.0,
-            "34": 1765.0,
-            "35": 1894.0,
-            "36": 1947.0,
-            "37": 2279.0,
-            "38": 2134.0,
-            "39": 2350.0,
-            "40": 2091.0,
-            "41": 2318.0,
-            "42": 2137.0,
-            "43": 1849.0,
-            "44": 2109.0,
-            "45": 2020.0,
-            "46": 2263.0,
-            "47": 2593.0,
-            "48": 2323.0,
-            "49": 2177.0,
-            "50": 2363.0,
-            "51": 2554.0,
-            "52": 2619.0,
-            "53": 2863.0,
-            "54": 2688.0,
-            "55": 2406.0,
-            "56": 2649.0,
-            "57": 2175.0,
-            "58": 2856.0,
-            "59": 2775.0,
-            "60": 2307.0,
-            "61": 2914.0,
-            "62": 2644.0,
-            "63": 2362.0,
-            "64": 2946.0,
-            "65": 2578.0,
-            "66": 3122.0,
-            "67": 2697.0,
-            "68": 2687.0,
-            "69": 2956.0,
-            "70": 3157.0,
-            "71": 3028.0,
-            "72": 2294.0,
-            "73": 2876.0,
-            "74": 1887.0,
-            "75": 2523.0,
-            "76": 2937.0,
-            "77": 3162.0,
-            "78": 3318.0,
-            "79": 3074.0,
-            "80": 3213.0,
-            "81": 3664.0,
-            "82": 3238.0,
-            "83": 2838.0,
-            "84": 3251.0,
-            "85": 3275.0,
-            "86": 2748.0,
-            "87": 3758.0,
-            "88": 3023.0,
-            "89": 3267.0,
-            "90": 3085.0,
-            "91": 2812.0,
-            "92": 3116.0,
-            "93": 2665.0,
-            "94": 3380.0,
-            "95": 3236.0,
-            "96": 3462.0,
-            "97": 3002.0,
-            "98": 3545.0,
-            "99": 3265.0,
-            "100": 3458.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 519038464.0,
-            "2": 519038464.0,
-            "3": 519038464.0,
-            "4": 519038464.0,
-            "5": 519038464.0,
-            "6": 519038464.0,
-            "7": 519038464.0,
-            "8": 519038464.0,
-            "9": 519038464.0,
-            "10": 519038464.0,
-            "11": 519038464.0,
-            "12": 519038464.0,
-            "13": 519038464.0,
-            "14": 519038464.0,
-            "15": 519038464.0,
-            "16": 519038464.0,
-            "17": 519038464.0,
-            "18": 519038464.0,
-            "19": 519038464.0,
-            "20": 519038464.0,
-            "21": 519038464.0,
-            "22": 519038464.0,
-            "23": 519038464.0,
-            "24": 519038464.0,
-            "25": 519038464.0,
-            "26": 519038464.0,
-            "27": 519038464.0,
-            "28": 519038464.0,
-            "29": 519038464.0,
-            "30": 519038464.0,
-            "31": 519038464.0,
-            "32": 519038464.0,
-            "33": 519038464.0,
-            "34": 519038464.0,
-            "35": 519038464.0,
-            "36": 519038464.0,
-            "37": 519038464.0,
-            "38": 519038464.0,
-            "39": 519038464.0,
-            "40": 519038464.0,
-            "41": 519038464.0,
-            "42": 519038464.0,
-            "43": 519038464.0,
-            "44": 519038464.0,
-            "45": 519038464.0,
-            "46": 519038464.0,
-            "47": 519038464.0,
-            "48": 519038464.0,
-            "49": 519038464.0,
-            "50": 519038464.0,
-            "51": 519038464.0,
-            "52": 519038464.0,
-            "53": 519038464.0,
-            "54": 519038464.0,
-            "55": 519038464.0,
-            "56": 519038464.0,
-            "57": 519038464.0,
-            "58": 519038464.0,
-            "59": 519038464.0,
-            "60": 519038464.0,
-            "61": 519038464.0,
-            "62": 519038464.0,
-            "63": 519038464.0,
-            "64": 519038464.0,
-            "65": 519038464.0,
-            "66": 519038464.0,
-            "67": 519038464.0,
-            "68": 519038464.0,
-            "69": 519038464.0,
-            "70": 519038464.0,
-            "71": 519038464.0,
-            "72": 519038464.0,
-            "73": 519038464.0,
-            "74": 519038464.0,
-            "75": 519038464.0,
-            "76": 519038464.0,
-            "77": 519038464.0,
-            "78": 519038464.0,
-            "79": 519038464.0,
-            "80": 519038464.0,
-            "81": 519038464.0,
-            "82": 519038464.0,
-            "83": 519038464.0,
-            "84": 519038464.0,
-            "85": 519038464.0,
-            "86": 519038464.0,
-            "87": 519038464.0,
-            "88": 519038464.0,
-            "89": 519038464.0,
-            "90": 519038464.0,
-            "91": 519038464.0,
-            "92": 519038464.0,
-            "93": 519038464.0,
-            "94": 519038464.0,
-            "95": 519038464.0,
-            "96": 519038464.0,
-            "97": 519038464.0,
-            "98": 519038464.0,
-            "99": 519038464.0,
-            "100": 519038464.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 2296009728.0,
-            "2": 2436866048.0,
-            "3": 2436866048.0,
-            "4": 2436866048.0,
-            "5": 2436866048.0,
-            "6": 2436866048.0,
-            "7": 2436866048.0,
-            "8": 2436866048.0,
-            "9": 2436866048.0,
-            "10": 2436866048.0,
-            "11": 2436866048.0,
-            "12": 2436866048.0,
-            "13": 2436866048.0,
-            "14": 2436866048.0,
-            "15": 2436866048.0,
-            "16": 2436866048.0,
-            "17": 2436866048.0,
-            "18": 2436866048.0,
-            "19": 2436866048.0,
-            "20": 2436866048.0,
-            "21": 2436866048.0,
-            "22": 2436866048.0,
-            "23": 2436866048.0,
-            "24": 2436866048.0,
-            "25": 2436866048.0,
-            "26": 2436866048.0,
-            "27": 2436866048.0,
-            "28": 2436866048.0,
-            "29": 2436866048.0,
-            "30": 2436866048.0,
-            "31": 2436866048.0,
-            "32": 2436866048.0,
-            "33": 2436866048.0,
-            "34": 2436866048.0,
-            "35": 2436866048.0,
-            "36": 2436866048.0,
-            "37": 2436866048.0,
-            "38": 2436866048.0,
-            "39": 2436866048.0,
-            "40": 2436866048.0,
-            "41": 2436866048.0,
-            "42": 2436866048.0,
-            "43": 2436866048.0,
-            "44": 2436866048.0,
-            "45": 2436866048.0,
-            "46": 2436866048.0,
-            "47": 2436866048.0,
-            "48": 2436866048.0,
-            "49": 2436866048.0,
-            "50": 2436866048.0,
-            "51": 2436866048.0,
-            "52": 2436866048.0,
-            "53": 2436866048.0,
-            "54": 2436866048.0,
-            "55": 2436866048.0,
-            "56": 2436866048.0,
-            "57": 2436866048.0,
-            "58": 2436866048.0,
-            "59": 2436866048.0,
-            "60": 2436866048.0,
-            "61": 2436866048.0,
-            "62": 2436866048.0,
-            "63": 2436866048.0,
-            "64": 2436866048.0,
-            "65": 2436866048.0,
-            "66": 2436866048.0,
-            "67": 2436866048.0,
-            "68": 2436866048.0,
-            "69": 2436866048.0,
-            "70": 2436866048.0,
-            "71": 2436866048.0,
-            "72": 2436866048.0,
-            "73": 2436866048.0,
-            "74": 2436866048.0,
-            "75": 2436866048.0,
-            "76": 2436866048.0,
-            "77": 2436866048.0,
-            "78": 2436866048.0,
-            "79": 2436866048.0,
-            "80": 2436866048.0,
-            "81": 2436866048.0,
-            "82": 2436866048.0,
-            "83": 2436866048.0,
-            "84": 2436866048.0,
-            "85": 2436866048.0,
-            "86": 2436866048.0,
-            "87": 2436866048.0,
-            "88": 2436866048.0,
-            "89": 2436866048.0,
-            "90": 2436866048.0,
-            "91": 2436866048.0,
-            "92": 2436866048.0,
-            "93": 2436866048.0,
-            "94": 2436866048.0,
-            "95": 2436866048.0,
-            "96": 2436866048.0,
-            "97": 2436866048.0,
-            "98": 2436866048.0,
-            "99": 2436866048.0,
-            "100": 2436866048.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.90397,
-            "2": 0.16607,
-            "3": 0.13982,
-            "4": 0.14032,
-            "5": 0.13765,
-            "6": 0.13651,
-            "7": 0.13453,
-            "8": 0.13413,
-            "9": 0.13703,
-            "10": 0.13873,
-            "11": 0.28364,
-            "12": 0.13723,
-            "13": 0.13756,
-            "14": 0.1379,
-            "15": 0.14148,
-            "16": 0.1356,
-            "17": 0.13661,
-            "18": 0.13568,
-            "19": 0.13637,
-            "20": 0.1367,
-            "21": 0.28276,
-            "22": 0.13722,
-            "23": 0.13404,
-            "24": 0.13414,
-            "25": 0.1341,
-            "26": 0.13595,
-            "27": 0.13446,
-            "28": 0.13477,
-            "29": 0.13439,
-            "30": 0.13383,
-            "31": 0.27955,
-            "32": 0.13416,
-            "33": 0.13472,
-            "34": 0.13383,
-            "35": 0.13499,
-            "36": 0.13468,
-            "37": 0.13332,
-            "38": 0.13449,
-            "39": 0.13488,
-            "40": 0.1347,
-            "41": 0.2818,
-            "42": 0.13497,
-            "43": 0.13495,
-            "44": 0.13372,
-            "45": 0.13385,
-            "46": 0.13479,
-            "47": 0.13339,
-            "48": 0.13334,
-            "49": 0.13393,
-            "50": 0.13346,
-            "51": 0.2815,
-            "52": 0.13492,
-            "53": 0.13387,
-            "54": 0.13407,
-            "55": 0.13263,
-            "56": 0.13379,
-            "57": 0.13439,
-            "58": 0.13407,
-            "59": 0.13481,
-            "60": 0.13407,
-            "61": 0.28073,
-            "62": 0.13474,
-            "63": 0.13363,
-            "64": 0.13359,
-            "65": 0.13323,
-            "66": 0.13437,
-            "67": 0.13391,
-            "68": 0.13344,
-            "69": 0.21561,
-            "70": 0.1337,
-            "71": 0.27778,
-            "72": 0.13359,
-            "73": 0.13364,
-            "74": 0.13406,
-            "75": 0.13376,
-            "76": 0.13308,
-            "77": 0.13263,
-            "78": 0.13172,
-            "79": 0.13328,
-            "80": 0.13387,
-            "81": 0.28018,
-            "82": 0.13437,
-            "83": 0.13645,
-            "84": 0.13548,
-            "85": 0.13558,
-            "86": 0.13447,
-            "87": 0.13492,
-            "88": 0.13361,
-            "89": 0.13427,
-            "90": 0.13332,
-            "91": 0.27771,
-            "92": 0.13375,
-            "93": 0.1331,
-            "94": 0.13317,
-            "95": 0.13408,
-            "96": 0.13418,
-            "97": 0.13752,
-            "98": 0.13493,
-            "99": 0.13408,
-            "100": 0.13136
-        }
-    }
-}
\ No newline at end of file
+{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.82005, "5": 10.85284, "10": 10.78455, "15": 10.79229, "20": 10.69212, "25": 10.52413, "30": 10.34552, "35": 10.26241, "40": 10.07236, "45": 9.81098, "50": 9.88418, "55": 9.86374, "60": 9.47963, "65": 8.93065, "70": 9.71218, "75": 9.4005, "80": 9.39078, "85": 9.60138, "90": 9.80502, "95": 9.50821, "100": 9.3924}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1559.0, "5": 1840.0, "10": 1380.0, "15": 1848.0, "20": 1621.0, "25": 1689.0, "30": 1907.0, "35": 1922.0, "40": 2045.0, "45": 2097.0, "50": 2368.0, "55": 2383.0, "60": 2509.0, "65": 2628.0, "70": 3129.0, "75": 2552.0, "80": 3279.0, "85": 3344.0, "90": 2980.0, "95": 3324.0, "100": 3467.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 522989568.0, "5": 522989568.0, "10": 522989568.0, "15": 522989568.0, "20": 522989568.0, "25": 522989568.0, "30": 522989568.0, "35": 522989568.0, "40": 522989568.0, "45": 522989568.0, "50": 522989568.0, "55": 522989568.0, "60": 522989568.0, "65": 522989568.0, "70": 522989568.0, "75": 522989568.0, "80": 522989568.0, "85": 522989568.0, "90": 522989568.0, "95": 522989568.0, "100": 522989568.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 3768872960.0, "5": 3911833600.0, "10": 3911833600.0, "15": 3911833600.0, "20": 3911833600.0, "25": 3911833600.0, "30": 3911833600.0, "35": 3911833600.0, "40": 3911833600.0, "45": 3911833600.0, "50": 3911833600.0, "55": 3911833600.0, "60": 3911833600.0, "65": 3911833600.0, "70": 3911833600.0, "75": 3911833600.0, "80": 3911833600.0, "85": 3911833600.0, "90": 3911833600.0, "95": 3911833600.0, "100": 3911833600.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 24.27643, "5": 0.18328, "10": 0.18099, "15": 0.17942, "20": 0.17956, "25": 0.1771, "30": 0.17799, "35": 0.17821, "40": 0.1777, "45": 0.17833, "50": 0.17766, "55": 0.17789, "60": 0.17744, "65": 0.17689, "70": 0.1767, "75": 0.17717, "80": 0.17685, "85": 0.17672, "90": 0.17477, "95": 0.17583, "100": 0.17624}}}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied/golden_values_lts.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied/golden_values_lts.json
deleted file mode 100644
index 6f64205e3c4..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied/golden_values_lts.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.93693,
-            "2": 10.92078,
-            "3": 10.92311,
-            "4": 10.908,
-            "5": 10.93133,
-            "6": 10.93476,
-            "7": 10.92722,
-            "8": 10.91987,
-            "9": 10.93801,
-            "10": 10.91377,
-            "11": 10.94194,
-            "12": 10.93222,
-            "13": 10.92553,
-            "14": 10.94301,
-            "15": 10.86288,
-            "16": 10.87945,
-            "17": 10.86904,
-            "18": 10.88527,
-            "19": 10.86912,
-            "20": 10.77336,
-            "21": 10.75658,
-            "22": 10.62975,
-            "23": 10.76834,
-            "24": 10.65017,
-            "25": 10.60362,
-            "26": 10.66269,
-            "27": 10.66488,
-            "28": 10.60369,
-            "29": 10.64791,
-            "30": 10.40493,
-            "31": 10.16869,
-            "32": 10.5139,
-            "33": 10.50484,
-            "34": 10.27115,
-            "35": 10.31433,
-            "36": 10.27029,
-            "37": 10.38626,
-            "38": 10.23175,
-            "39": 10.45527,
-            "40": 10.12357,
-            "41": 10.19645,
-            "42": 10.25476,
-            "43": 9.86653,
-            "44": 9.99586,
-            "45": 9.87497,
-            "46": 9.86189,
-            "47": 10.19545,
-            "48": 9.87912,
-            "49": 9.56741,
-            "50": 9.94519,
-            "51": 9.89774,
-            "52": 9.78773,
-            "53": 10.12739,
-            "54": 9.98265,
-            "55": 9.90115,
-            "56": 9.66568,
-            "57": 9.49996,
-            "58": 9.87635,
-            "59": 9.61861,
-            "60": 9.55292,
-            "61": 9.71598,
-            "62": 10.03451,
-            "63": 9.41294,
-            "64": 9.81949,
-            "65": 8.96909,
-            "66": 9.7478,
-            "67": 9.39393,
-            "68": 9.82085,
-            "69": 9.82417,
-            "70": 9.77915,
-            "71": 9.64756,
-            "72": 9.59555,
-            "73": 9.53704,
-            "74": 8.96643,
-            "75": 9.44556,
-            "76": 9.09922,
-            "77": 10.10009,
-            "78": 9.73576,
-            "79": 9.38721,
-            "80": 9.41961,
-            "81": 9.50968,
-            "82": 9.72303,
-            "83": 9.34684,
-            "84": 9.44855,
-            "85": 9.64312,
-            "86": 9.07818,
-            "87": 9.61561,
-            "88": 9.79072,
-            "89": 9.61878,
-            "90": 9.85929,
-            "91": 9.35347,
-            "92": 9.38799,
-            "93": 9.07947,
-            "94": 8.82329,
-            "95": 9.52141,
-            "96": 9.54598,
-            "97": 9.34241,
-            "98": 9.70512,
-            "99": 8.8917,
-            "100": 9.43443
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 22727688.0,
-            "2": 22924916.0,
-            "3": 22596752.0,
-            "4": 23219574.0,
-            "5": 22715594.0,
-            "6": 23021444.0,
-            "7": 22771554.0,
-            "8": 22926856.0,
-            "9": 22842096.0,
-            "10": 22918196.0,
-            "11": 22500760.0,
-            "12": 22460572.0,
-            "13": 22917018.0,
-            "14": 22389964.0,
-            "15": 22820720.0,
-            "16": 22830308.0,
-            "17": 22819122.0,
-            "18": 22582876.0,
-            "19": 22617912.0,
-            "20": 22693480.0,
-            "21": 22740412.0,
-            "22": 22799838.0,
-            "23": 22539332.0,
-            "24": 22770580.0,
-            "25": 22818910.0,
-            "26": 22547838.0,
-            "27": 22468232.0,
-            "28": 22452766.0,
-            "29": 22528906.0,
-            "30": 22631718.0,
-            "31": 22955520.0,
-            "32": 22584888.0,
-            "33": 22558558.0,
-            "34": 22836286.0,
-            "35": 22788956.0,
-            "36": 22589940.0,
-            "37": 22496912.0,
-            "38": 22897132.0,
-            "39": 22801712.0,
-            "40": 22657514.0,
-            "41": 22659596.0,
-            "42": 22667300.0,
-            "43": 22976308.0,
-            "44": 22745816.0,
-            "45": 22675136.0,
-            "46": 22884540.0,
-            "47": 22633726.0,
-            "48": 22928020.0,
-            "49": 22727656.0,
-            "50": 22905162.0,
-            "51": 22791660.0,
-            "52": 22748630.0,
-            "53": 22925420.0,
-            "54": 22839176.0,
-            "55": 22518792.0,
-            "56": 22877648.0,
-            "57": 23113592.0,
-            "58": 22845136.0,
-            "59": 22715486.0,
-            "60": 22743726.0,
-            "61": 22724104.0,
-            "62": 22673746.0,
-            "63": 22846740.0,
-            "64": 22823862.0,
-            "65": 23061360.0,
-            "66": 22729628.0,
-            "67": 22907694.0,
-            "68": 22609888.0,
-            "69": 22584610.0,
-            "70": 22829190.0,
-            "71": 22749038.0,
-            "72": 22655052.0,
-            "73": 22739796.0,
-            "74": 23047606.0,
-            "75": 23054054.0,
-            "76": 22901052.0,
-            "77": 22271520.0,
-            "78": 22788892.0,
-            "79": 22743418.0,
-            "80": 22706694.0,
-            "81": 22890786.0,
-            "82": 22777316.0,
-            "83": 22839636.0,
-            "84": 23010088.0,
-            "85": 22712948.0,
-            "86": 23103380.0,
-            "87": 22735258.0,
-            "88": 22636824.0,
-            "89": 22498828.0,
-            "90": 22972694.0,
-            "91": 22767128.0,
-            "92": 22808892.0,
-            "93": 22659368.0,
-            "94": 22911112.0,
-            "95": 23047740.0,
-            "96": 22828678.0,
-            "97": 22608174.0,
-            "98": 22762982.0,
-            "99": 22905800.0,
-            "100": 23015792.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 519038464.0,
-            "2": 519038464.0,
-            "3": 519038464.0,
-            "4": 519038464.0,
-            "5": 519038464.0,
-            "6": 519038464.0,
-            "7": 519038464.0,
-            "8": 519038464.0,
-            "9": 519038464.0,
-            "10": 519038464.0,
-            "11": 519038464.0,
-            "12": 519038464.0,
-            "13": 519038464.0,
-            "14": 519038464.0,
-            "15": 519038464.0,
-            "16": 519038464.0,
-            "17": 519038464.0,
-            "18": 519038464.0,
-            "19": 519038464.0,
-            "20": 519038464.0,
-            "21": 519038464.0,
-            "22": 519038464.0,
-            "23": 519038464.0,
-            "24": 519038464.0,
-            "25": 519038464.0,
-            "26": 519038464.0,
-            "27": 519038464.0,
-            "28": 519038464.0,
-            "29": 519038464.0,
-            "30": 519038464.0,
-            "31": 519038464.0,
-            "32": 519038464.0,
-            "33": 519038464.0,
-            "34": 519038464.0,
-            "35": 519038464.0,
-            "36": 519038464.0,
-            "37": 519038464.0,
-            "38": 519038464.0,
-            "39": 519038464.0,
-            "40": 519038464.0,
-            "41": 519038464.0,
-            "42": 519038464.0,
-            "43": 519038464.0,
-            "44": 519038464.0,
-            "45": 519038464.0,
-            "46": 519038464.0,
-            "47": 519038464.0,
-            "48": 519038464.0,
-            "49": 519038464.0,
-            "50": 519038464.0,
-            "51": 519038464.0,
-            "52": 519038464.0,
-            "53": 519038464.0,
-            "54": 519038464.0,
-            "55": 519038464.0,
-            "56": 519038464.0,
-            "57": 519038464.0,
-            "58": 519038464.0,
-            "59": 519038464.0,
-            "60": 519038464.0,
-            "61": 519038464.0,
-            "62": 519038464.0,
-            "63": 519038464.0,
-            "64": 519038464.0,
-            "65": 519038464.0,
-            "66": 519038464.0,
-            "67": 519038464.0,
-            "68": 519038464.0,
-            "69": 519038464.0,
-            "70": 519038464.0,
-            "71": 519038464.0,
-            "72": 519038464.0,
-            "73": 519038464.0,
-            "74": 519038464.0,
-            "75": 519038464.0,
-            "76": 519038464.0,
-            "77": 519038464.0,
-            "78": 519038464.0,
-            "79": 519038464.0,
-            "80": 519038464.0,
-            "81": 519038464.0,
-            "82": 519038464.0,
-            "83": 519038464.0,
-            "84": 519038464.0,
-            "85": 519038464.0,
-            "86": 519038464.0,
-            "87": 519038464.0,
-            "88": 519038464.0,
-            "89": 519038464.0,
-            "90": 519038464.0,
-            "91": 519038464.0,
-            "92": 519038464.0,
-            "93": 519038464.0,
-            "94": 519038464.0,
-            "95": 519038464.0,
-            "96": 519038464.0,
-            "97": 519038464.0,
-            "98": 519038464.0,
-            "99": 519038464.0,
-            "100": 519038464.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 2296009728.0,
-            "2": 2436866048.0,
-            "3": 2436866048.0,
-            "4": 2436866048.0,
-            "5": 2436866048.0,
-            "6": 2436866048.0,
-            "7": 2436866048.0,
-            "8": 2436866048.0,
-            "9": 2436866048.0,
-            "10": 2436866048.0,
-            "11": 2436866048.0,
-            "12": 2436866048.0,
-            "13": 2436866048.0,
-            "14": 2436866048.0,
-            "15": 2436866048.0,
-            "16": 2436866048.0,
-            "17": 2436866048.0,
-            "18": 2436866048.0,
-            "19": 2436866048.0,
-            "20": 2436866048.0,
-            "21": 2436866048.0,
-            "22": 2436866048.0,
-            "23": 2436866048.0,
-            "24": 2436866048.0,
-            "25": 2436866048.0,
-            "26": 2436866048.0,
-            "27": 2436866048.0,
-            "28": 2436866048.0,
-            "29": 2436866048.0,
-            "30": 2436866048.0,
-            "31": 2436866048.0,
-            "32": 2436866048.0,
-            "33": 2436866048.0,
-            "34": 2436866048.0,
-            "35": 2436866048.0,
-            "36": 2436866048.0,
-            "37": 2436866048.0,
-            "38": 2436866048.0,
-            "39": 2436866048.0,
-            "40": 2436866048.0,
-            "41": 2436866048.0,
-            "42": 2436866048.0,
-            "43": 2436866048.0,
-            "44": 2436866048.0,
-            "45": 2436866048.0,
-            "46": 2436866048.0,
-            "47": 2436866048.0,
-            "48": 2436866048.0,
-            "49": 2436866048.0,
-            "50": 2436866048.0,
-            "51": 2436866048.0,
-            "52": 2436866048.0,
-            "53": 2436866048.0,
-            "54": 2436866048.0,
-            "55": 2436866048.0,
-            "56": 2436866048.0,
-            "57": 2436866048.0,
-            "58": 2436866048.0,
-            "59": 2436866048.0,
-            "60": 2436866048.0,
-            "61": 2436866048.0,
-            "62": 2436866048.0,
-            "63": 2436866048.0,
-            "64": 2436866048.0,
-            "65": 2436866048.0,
-            "66": 2436866048.0,
-            "67": 2436866048.0,
-            "68": 2436866048.0,
-            "69": 2436866048.0,
-            "70": 2436866048.0,
-            "71": 2436866048.0,
-            "72": 2436866048.0,
-            "73": 2436866048.0,
-            "74": 2436866048.0,
-            "75": 2436866048.0,
-            "76": 2436866048.0,
-            "77": 2436866048.0,
-            "78": 2436866048.0,
-            "79": 2436866048.0,
-            "80": 2436866048.0,
-            "81": 2436866048.0,
-            "82": 2436866048.0,
-            "83": 2436866048.0,
-            "84": 2436866048.0,
-            "85": 2436866048.0,
-            "86": 2436866048.0,
-            "87": 2436866048.0,
-            "88": 2436866048.0,
-            "89": 2436866048.0,
-            "90": 2436866048.0,
-            "91": 2436866048.0,
-            "92": 2436866048.0,
-            "93": 2436866048.0,
-            "94": 2436866048.0,
-            "95": 2436866048.0,
-            "96": 2436866048.0,
-            "97": 2436866048.0,
-            "98": 2436866048.0,
-            "99": 2436866048.0,
-            "100": 2436866048.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 11.56211,
-            "2": 0.17032,
-            "3": 0.14703,
-            "4": 0.1399,
-            "5": 0.13723,
-            "6": 0.13231,
-            "7": 0.13564,
-            "8": 0.13449,
-            "9": 0.13358,
-            "10": 0.13307,
-            "11": 0.13725,
-            "12": 0.13165,
-            "13": 0.13322,
-            "14": 0.13288,
-            "15": 0.13324,
-            "16": 0.13281,
-            "17": 0.13322,
-            "18": 0.13237,
-            "19": 0.13164,
-            "20": 0.13184,
-            "21": 0.13286,
-            "22": 0.13291,
-            "23": 0.13443,
-            "24": 0.14138,
-            "25": 0.13774,
-            "26": 0.13805,
-            "27": 0.13868,
-            "28": 0.13766,
-            "29": 0.1395,
-            "30": 0.13538,
-            "31": 0.13589,
-            "32": 0.13529,
-            "33": 0.13562,
-            "34": 0.13525,
-            "35": 0.13556,
-            "36": 0.13544,
-            "37": 0.13549,
-            "38": 0.13569,
-            "39": 0.13558,
-            "40": 0.13561,
-            "41": 0.13653,
-            "42": 0.13569,
-            "43": 0.13645,
-            "44": 0.13586,
-            "45": 0.1354,
-            "46": 0.13606,
-            "47": 0.13517,
-            "48": 0.13615,
-            "49": 0.13593,
-            "50": 0.13488,
-            "51": 0.13643,
-            "52": 0.13115,
-            "53": 0.13291,
-            "54": 0.13115,
-            "55": 0.13085,
-            "56": 0.1309,
-            "57": 0.13094,
-            "58": 0.13092,
-            "59": 0.13092,
-            "60": 0.13094,
-            "61": 0.13102,
-            "62": 0.13092,
-            "63": 0.13052,
-            "64": 0.1313,
-            "65": 0.13151,
-            "66": 0.1314,
-            "67": 0.13094,
-            "68": 0.1308,
-            "69": 0.13079,
-            "70": 0.1309,
-            "71": 0.13083,
-            "72": 0.13083,
-            "73": 0.13091,
-            "74": 0.13061,
-            "75": 0.13042,
-            "76": 0.13145,
-            "77": 0.13079,
-            "78": 0.13108,
-            "79": 0.13106,
-            "80": 0.13055,
-            "81": 0.13075,
-            "82": 0.13043,
-            "83": 0.13151,
-            "84": 0.13173,
-            "85": 0.13126,
-            "86": 0.13187,
-            "87": 0.13084,
-            "88": 0.13098,
-            "89": 0.13086,
-            "90": 0.13054,
-            "91": 0.1317,
-            "92": 0.13098,
-            "93": 0.13084,
-            "94": 0.13088,
-            "95": 0.13054,
-            "96": 0.13148,
-            "97": 0.13154,
-            "98": 0.13179,
-            "99": 0.13086,
-            "100": 0.13121
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied/golden_values_lts_dgx_a100.json
index 7848ef42dd8..e57b1a644a0 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied/golden_values_lts_dgx_a100.json
@@ -1,537 +1 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.93693,
-            "2": 10.92078,
-            "3": 10.92311,
-            "4": 10.908,
-            "5": 10.93133,
-            "6": 10.93476,
-            "7": 10.92722,
-            "8": 10.91987,
-            "9": 10.93801,
-            "10": 10.91377,
-            "11": 10.94194,
-            "12": 10.93222,
-            "13": 10.92553,
-            "14": 10.94301,
-            "15": 10.86288,
-            "16": 10.87945,
-            "17": 10.86904,
-            "18": 10.88527,
-            "19": 10.86912,
-            "20": 10.77336,
-            "21": 10.75658,
-            "22": 10.62975,
-            "23": 10.76834,
-            "24": 10.65017,
-            "25": 10.60362,
-            "26": 10.66269,
-            "27": 10.66488,
-            "28": 10.60369,
-            "29": 10.64791,
-            "30": 10.40493,
-            "31": 10.16869,
-            "32": 10.5139,
-            "33": 10.50484,
-            "34": 10.27115,
-            "35": 10.31433,
-            "36": 10.27029,
-            "37": 10.38626,
-            "38": 10.23175,
-            "39": 10.45527,
-            "40": 10.12357,
-            "41": 10.19645,
-            "42": 10.25476,
-            "43": 9.86653,
-            "44": 9.99586,
-            "45": 9.87497,
-            "46": 9.86189,
-            "47": 10.19545,
-            "48": 9.87912,
-            "49": 9.56741,
-            "50": 9.94519,
-            "51": 9.89774,
-            "52": 9.78773,
-            "53": 10.12739,
-            "54": 9.98265,
-            "55": 9.90115,
-            "56": 9.66568,
-            "57": 9.49996,
-            "58": 9.87635,
-            "59": 9.61861,
-            "60": 9.55292,
-            "61": 9.71598,
-            "62": 10.03451,
-            "63": 9.41294,
-            "64": 9.81949,
-            "65": 8.96909,
-            "66": 9.7478,
-            "67": 9.39393,
-            "68": 9.82085,
-            "69": 9.82417,
-            "70": 9.77915,
-            "71": 9.64756,
-            "72": 9.59555,
-            "73": 9.53704,
-            "74": 8.96643,
-            "75": 9.44556,
-            "76": 9.09922,
-            "77": 10.10009,
-            "78": 9.73576,
-            "79": 9.38721,
-            "80": 9.41961,
-            "81": 9.50968,
-            "82": 9.72303,
-            "83": 9.34684,
-            "84": 9.44855,
-            "85": 9.64312,
-            "86": 9.07818,
-            "87": 9.61561,
-            "88": 9.79072,
-            "89": 9.61878,
-            "90": 9.85929,
-            "91": 9.35347,
-            "92": 9.38799,
-            "93": 9.07947,
-            "94": 8.82329,
-            "95": 9.52141,
-            "96": 9.54598,
-            "97": 9.34241,
-            "98": 9.70512,
-            "99": 8.8917,
-            "100": 9.43443
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 22727688.0,
-            "2": 22924916.0,
-            "3": 22596752.0,
-            "4": 23219574.0,
-            "5": 22715594.0,
-            "6": 23021444.0,
-            "7": 22771554.0,
-            "8": 22926856.0,
-            "9": 22842096.0,
-            "10": 22918196.0,
-            "11": 22500760.0,
-            "12": 22460572.0,
-            "13": 22917018.0,
-            "14": 22389964.0,
-            "15": 22820720.0,
-            "16": 22830308.0,
-            "17": 22819122.0,
-            "18": 22582876.0,
-            "19": 22617912.0,
-            "20": 22693480.0,
-            "21": 22740412.0,
-            "22": 22799838.0,
-            "23": 22539332.0,
-            "24": 22770580.0,
-            "25": 22818910.0,
-            "26": 22547838.0,
-            "27": 22468232.0,
-            "28": 22452766.0,
-            "29": 22528906.0,
-            "30": 22631718.0,
-            "31": 22955520.0,
-            "32": 22584888.0,
-            "33": 22558558.0,
-            "34": 22836286.0,
-            "35": 22788956.0,
-            "36": 22589940.0,
-            "37": 22496912.0,
-            "38": 22897132.0,
-            "39": 22801712.0,
-            "40": 22657514.0,
-            "41": 22659596.0,
-            "42": 22667300.0,
-            "43": 22976308.0,
-            "44": 22745816.0,
-            "45": 22675136.0,
-            "46": 22884540.0,
-            "47": 22633726.0,
-            "48": 22928020.0,
-            "49": 22727656.0,
-            "50": 22905162.0,
-            "51": 22791660.0,
-            "52": 22748630.0,
-            "53": 22925420.0,
-            "54": 22839176.0,
-            "55": 22518792.0,
-            "56": 22877648.0,
-            "57": 23113592.0,
-            "58": 22845136.0,
-            "59": 22715486.0,
-            "60": 22743726.0,
-            "61": 22724104.0,
-            "62": 22673746.0,
-            "63": 22846740.0,
-            "64": 22823862.0,
-            "65": 23061360.0,
-            "66": 22729628.0,
-            "67": 22907694.0,
-            "68": 22609888.0,
-            "69": 22584610.0,
-            "70": 22829190.0,
-            "71": 22749038.0,
-            "72": 22655052.0,
-            "73": 22739796.0,
-            "74": 23047606.0,
-            "75": 23054054.0,
-            "76": 22901052.0,
-            "77": 22271520.0,
-            "78": 22788892.0,
-            "79": 22743418.0,
-            "80": 22706694.0,
-            "81": 22890786.0,
-            "82": 22777316.0,
-            "83": 22839636.0,
-            "84": 23010088.0,
-            "85": 22712948.0,
-            "86": 23103380.0,
-            "87": 22735258.0,
-            "88": 22636824.0,
-            "89": 22498828.0,
-            "90": 22972694.0,
-            "91": 22767128.0,
-            "92": 22808892.0,
-            "93": 22659368.0,
-            "94": 22911112.0,
-            "95": 23047740.0,
-            "96": 22828678.0,
-            "97": 22608174.0,
-            "98": 22762982.0,
-            "99": 22905800.0,
-            "100": 23015792.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 519038464.0,
-            "2": 519038464.0,
-            "3": 519038464.0,
-            "4": 519038464.0,
-            "5": 519038464.0,
-            "6": 519038464.0,
-            "7": 519038464.0,
-            "8": 519038464.0,
-            "9": 519038464.0,
-            "10": 519038464.0,
-            "11": 519038464.0,
-            "12": 519038464.0,
-            "13": 519038464.0,
-            "14": 519038464.0,
-            "15": 519038464.0,
-            "16": 519038464.0,
-            "17": 519038464.0,
-            "18": 519038464.0,
-            "19": 519038464.0,
-            "20": 519038464.0,
-            "21": 519038464.0,
-            "22": 519038464.0,
-            "23": 519038464.0,
-            "24": 519038464.0,
-            "25": 519038464.0,
-            "26": 519038464.0,
-            "27": 519038464.0,
-            "28": 519038464.0,
-            "29": 519038464.0,
-            "30": 519038464.0,
-            "31": 519038464.0,
-            "32": 519038464.0,
-            "33": 519038464.0,
-            "34": 519038464.0,
-            "35": 519038464.0,
-            "36": 519038464.0,
-            "37": 519038464.0,
-            "38": 519038464.0,
-            "39": 519038464.0,
-            "40": 519038464.0,
-            "41": 519038464.0,
-            "42": 519038464.0,
-            "43": 519038464.0,
-            "44": 519038464.0,
-            "45": 519038464.0,
-            "46": 519038464.0,
-            "47": 519038464.0,
-            "48": 519038464.0,
-            "49": 519038464.0,
-            "50": 519038464.0,
-            "51": 519038464.0,
-            "52": 519038464.0,
-            "53": 519038464.0,
-            "54": 519038464.0,
-            "55": 519038464.0,
-            "56": 519038464.0,
-            "57": 519038464.0,
-            "58": 519038464.0,
-            "59": 519038464.0,
-            "60": 519038464.0,
-            "61": 519038464.0,
-            "62": 519038464.0,
-            "63": 519038464.0,
-            "64": 519038464.0,
-            "65": 519038464.0,
-            "66": 519038464.0,
-            "67": 519038464.0,
-            "68": 519038464.0,
-            "69": 519038464.0,
-            "70": 519038464.0,
-            "71": 519038464.0,
-            "72": 519038464.0,
-            "73": 519038464.0,
-            "74": 519038464.0,
-            "75": 519038464.0,
-            "76": 519038464.0,
-            "77": 519038464.0,
-            "78": 519038464.0,
-            "79": 519038464.0,
-            "80": 519038464.0,
-            "81": 519038464.0,
-            "82": 519038464.0,
-            "83": 519038464.0,
-            "84": 519038464.0,
-            "85": 519038464.0,
-            "86": 519038464.0,
-            "87": 519038464.0,
-            "88": 519038464.0,
-            "89": 519038464.0,
-            "90": 519038464.0,
-            "91": 519038464.0,
-            "92": 519038464.0,
-            "93": 519038464.0,
-            "94": 519038464.0,
-            "95": 519038464.0,
-            "96": 519038464.0,
-            "97": 519038464.0,
-            "98": 519038464.0,
-            "99": 519038464.0,
-            "100": 519038464.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 2296009728.0,
-            "2": 2436866048.0,
-            "3": 2436866048.0,
-            "4": 2436866048.0,
-            "5": 2436866048.0,
-            "6": 2436866048.0,
-            "7": 2436866048.0,
-            "8": 2436866048.0,
-            "9": 2436866048.0,
-            "10": 2436866048.0,
-            "11": 2436866048.0,
-            "12": 2436866048.0,
-            "13": 2436866048.0,
-            "14": 2436866048.0,
-            "15": 2436866048.0,
-            "16": 2436866048.0,
-            "17": 2436866048.0,
-            "18": 2436866048.0,
-            "19": 2436866048.0,
-            "20": 2436866048.0,
-            "21": 2436866048.0,
-            "22": 2436866048.0,
-            "23": 2436866048.0,
-            "24": 2436866048.0,
-            "25": 2436866048.0,
-            "26": 2436866048.0,
-            "27": 2436866048.0,
-            "28": 2436866048.0,
-            "29": 2436866048.0,
-            "30": 2436866048.0,
-            "31": 2436866048.0,
-            "32": 2436866048.0,
-            "33": 2436866048.0,
-            "34": 2436866048.0,
-            "35": 2436866048.0,
-            "36": 2436866048.0,
-            "37": 2436866048.0,
-            "38": 2436866048.0,
-            "39": 2436866048.0,
-            "40": 2436866048.0,
-            "41": 2436866048.0,
-            "42": 2436866048.0,
-            "43": 2436866048.0,
-            "44": 2436866048.0,
-            "45": 2436866048.0,
-            "46": 2436866048.0,
-            "47": 2436866048.0,
-            "48": 2436866048.0,
-            "49": 2436866048.0,
-            "50": 2436866048.0,
-            "51": 2436866048.0,
-            "52": 2436866048.0,
-            "53": 2436866048.0,
-            "54": 2436866048.0,
-            "55": 2436866048.0,
-            "56": 2436866048.0,
-            "57": 2436866048.0,
-            "58": 2436866048.0,
-            "59": 2436866048.0,
-            "60": 2436866048.0,
-            "61": 2436866048.0,
-            "62": 2436866048.0,
-            "63": 2436866048.0,
-            "64": 2436866048.0,
-            "65": 2436866048.0,
-            "66": 2436866048.0,
-            "67": 2436866048.0,
-            "68": 2436866048.0,
-            "69": 2436866048.0,
-            "70": 2436866048.0,
-            "71": 2436866048.0,
-            "72": 2436866048.0,
-            "73": 2436866048.0,
-            "74": 2436866048.0,
-            "75": 2436866048.0,
-            "76": 2436866048.0,
-            "77": 2436866048.0,
-            "78": 2436866048.0,
-            "79": 2436866048.0,
-            "80": 2436866048.0,
-            "81": 2436866048.0,
-            "82": 2436866048.0,
-            "83": 2436866048.0,
-            "84": 2436866048.0,
-            "85": 2436866048.0,
-            "86": 2436866048.0,
-            "87": 2436866048.0,
-            "88": 2436866048.0,
-            "89": 2436866048.0,
-            "90": 2436866048.0,
-            "91": 2436866048.0,
-            "92": 2436866048.0,
-            "93": 2436866048.0,
-            "94": 2436866048.0,
-            "95": 2436866048.0,
-            "96": 2436866048.0,
-            "97": 2436866048.0,
-            "98": 2436866048.0,
-            "99": 2436866048.0,
-            "100": 2436866048.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 13.44016,
-            "2": 0.17357,
-            "3": 0.14155,
-            "4": 0.14433,
-            "5": 0.14312,
-            "6": 0.14041,
-            "7": 0.14082,
-            "8": 0.13921,
-            "9": 0.1399,
-            "10": 0.13856,
-            "11": 0.13995,
-            "12": 0.13864,
-            "13": 0.13803,
-            "14": 0.13783,
-            "15": 0.13752,
-            "16": 0.13882,
-            "17": 0.13834,
-            "18": 0.13863,
-            "19": 0.13872,
-            "20": 0.1384,
-            "21": 0.13424,
-            "22": 0.13105,
-            "23": 0.13094,
-            "24": 0.1307,
-            "25": 0.13252,
-            "26": 0.13172,
-            "27": 0.12995,
-            "28": 0.13015,
-            "29": 0.13002,
-            "30": 0.13019,
-            "31": 0.13071,
-            "32": 0.13106,
-            "33": 0.1305,
-            "34": 0.13023,
-            "35": 0.13178,
-            "36": 0.13167,
-            "37": 0.13002,
-            "38": 0.13094,
-            "39": 0.13093,
-            "40": 0.13167,
-            "41": 0.13178,
-            "42": 0.13107,
-            "43": 0.1328,
-            "44": 0.13048,
-            "45": 0.13046,
-            "46": 0.13126,
-            "47": 0.12901,
-            "48": 0.12854,
-            "49": 0.12862,
-            "50": 0.12918,
-            "51": 0.14204,
-            "52": 0.13766,
-            "53": 0.13573,
-            "54": 0.13601,
-            "55": 0.13392,
-            "56": 0.13591,
-            "57": 0.13683,
-            "58": 0.13487,
-            "59": 0.13645,
-            "60": 0.13627,
-            "61": 0.13507,
-            "62": 0.13578,
-            "63": 0.13619,
-            "64": 0.13556,
-            "65": 0.13673,
-            "66": 0.13706,
-            "67": 0.13535,
-            "68": 0.13581,
-            "69": 0.1342,
-            "70": 0.13519,
-            "71": 0.13563,
-            "72": 0.13553,
-            "73": 0.13626,
-            "74": 0.13636,
-            "75": 0.1351,
-            "76": 0.13531,
-            "77": 0.1341,
-            "78": 0.13121,
-            "79": 0.13164,
-            "80": 0.1338,
-            "81": 0.13214,
-            "82": 0.13227,
-            "83": 0.13301,
-            "84": 0.13291,
-            "85": 0.13384,
-            "86": 0.13276,
-            "87": 0.13499,
-            "88": 0.13549,
-            "89": 0.13554,
-            "90": 0.13505,
-            "91": 0.13486,
-            "92": 0.13406,
-            "93": 0.13522,
-            "94": 0.13615,
-            "95": 0.1365,
-            "96": 0.13586,
-            "97": 0.13623,
-            "98": 0.13603,
-            "99": 0.13615,
-            "100": 0.13526
-        }
-    }
-}
\ No newline at end of file
+{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.9359, "5": 10.93225, "10": 10.91083, "15": 10.85723, "20": 10.77088, "25": 10.60556, "30": 10.40549, "35": 10.31366, "40": 10.12334, "45": 9.87564, "50": 9.94452, "55": 9.90094, "60": 9.55238, "65": 8.96792, "70": 9.77835, "75": 9.44605, "80": 9.42014, "85": 9.64324, "90": 9.85827, "95": 9.52085, "100": 9.43415}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 22727686.0, "5": 22715312.0, "10": 22919060.0, "15": 22821200.0, "20": 22693840.0, "25": 22819524.0, "30": 22631232.0, "35": 22787888.0, "40": 22658144.0, "45": 22674630.0, "50": 22904452.0, "55": 22519094.0, "60": 22743052.0, "65": 23061016.0, "70": 22829332.0, "75": 23054164.0, "80": 22707344.0, "85": 22712182.0, "90": 22971846.0, "95": 23047676.0, "100": 23015938.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 522900480.0, "5": 522900480.0, "10": 522900480.0, "15": 522900480.0, "20": 522900480.0, "25": 522900480.0, "30": 522900480.0, "35": 522900480.0, "40": 522900480.0, "45": 522900480.0, "50": 522900480.0, "55": 522900480.0, "60": 522900480.0, "65": 522900480.0, "70": 522900480.0, "75": 522900480.0, "80": 522900480.0, "85": 522900480.0, "90": 522900480.0, "95": 522900480.0, "100": 522900480.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 3769943040.0, "5": 3912677376.0, "10": 3912677376.0, "15": 3912677376.0, "20": 3912677376.0, "25": 3912677376.0, "30": 3912677376.0, "35": 3912677376.0, "40": 3912677376.0, "45": 3912677376.0, "50": 3912677376.0, "55": 3912677376.0, "60": 3912677376.0, "65": 3912677376.0, "70": 3912677376.0, "75": 3912677376.0, "80": 3912677376.0, "85": 3912677376.0, "90": 3912677376.0, "95": 3912677376.0, "100": 3912677376.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 22.64226, "5": 0.17675, "10": 0.17674, "15": 0.17989, "20": 0.17736, "25": 0.1752, "30": 0.17793, "35": 0.17672, "40": 0.17711, "45": 0.17664, "50": 0.17313, "55": 0.17519, "60": 0.17937, "65": 0.17909, "70": 0.17798, "75": 0.17301, "80": 0.17354, "85": 0.17313, "90": 0.1739, "95": 0.17379, "100": 0.17144}}}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap/golden_values_lts.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap/golden_values_lts.json
deleted file mode 100644
index 03ebc195862..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap/golden_values_lts.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.80831,
-            "2": 10.81091,
-            "3": 10.82881,
-            "4": 10.82888,
-            "5": 10.8518,
-            "6": 10.84129,
-            "7": 10.839,
-            "8": 10.80345,
-            "9": 10.87875,
-            "10": 10.88064,
-            "11": 10.87605,
-            "12": 10.82786,
-            "13": 10.84179,
-            "14": 10.81686,
-            "15": 10.80637,
-            "16": 10.79763,
-            "17": 10.76624,
-            "18": 10.78069,
-            "19": 10.75188,
-            "20": 10.63371,
-            "21": 10.68119,
-            "22": 10.63819,
-            "23": 10.75544,
-            "24": 10.61129,
-            "25": 10.47478,
-            "26": 10.59702,
-            "27": 10.53977,
-            "28": 10.45162,
-            "29": 10.39421,
-            "30": 10.39348,
-            "31": 10.49397,
-            "32": 10.32158,
-            "33": 10.27676,
-            "34": 10.44629,
-            "35": 9.96856,
-            "36": 10.11335,
-            "37": 10.02306,
-            "38": 10.37579,
-            "39": 9.78845,
-            "40": 10.10124,
-            "41": 10.12323,
-            "42": 10.02969,
-            "43": 10.19869,
-            "44": 10.05528,
-            "45": 9.68289,
-            "46": 9.98143,
-            "47": 9.92628,
-            "48": 9.66986,
-            "49": 9.9198,
-            "50": 9.9226,
-            "51": 9.79344,
-            "52": 9.32777,
-            "53": 9.65056,
-            "54": 9.8608,
-            "55": 9.98167,
-            "56": 9.81701,
-            "57": 9.74358,
-            "58": 9.83,
-            "59": 9.32869,
-            "60": 9.35336,
-            "61": 9.45108,
-            "62": 10.19054,
-            "63": 9.35643,
-            "64": 9.62865,
-            "65": 9.70227,
-            "66": 9.52515,
-            "67": 9.6623,
-            "68": 9.58775,
-            "69": 9.38554,
-            "70": 9.73858,
-            "71": 9.87574,
-            "72": 9.69164,
-            "73": 9.39216,
-            "74": 9.43911,
-            "75": 8.95557,
-            "76": 9.56378,
-            "77": 9.6144,
-            "78": 9.39189,
-            "79": 9.52909,
-            "80": 9.31414,
-            "81": 9.70167,
-            "82": 9.90384,
-            "83": 9.31614,
-            "84": 9.47152,
-            "85": 8.97772,
-            "86": 9.66417,
-            "87": 9.43151,
-            "88": 9.58677,
-            "89": 9.52297,
-            "90": 9.55832,
-            "91": 9.62794,
-            "92": 9.14014,
-            "93": 9.42404,
-            "94": 9.54555,
-            "95": 9.13499,
-            "96": 8.75216,
-            "97": 9.58103,
-            "98": 9.79018,
-            "99": 9.37904,
-            "100": 9.21212
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1004.0,
-            "2": 1243.0,
-            "3": 1236.0,
-            "4": 1235.0,
-            "5": 1179.0,
-            "6": 1319.0,
-            "7": 1173.0,
-            "8": 1020.0,
-            "9": 1205.0,
-            "10": 1270.0,
-            "11": 1163.0,
-            "12": 1276.0,
-            "13": 1398.0,
-            "14": 1096.0,
-            "15": 1148.0,
-            "16": 1104.0,
-            "17": 1141.0,
-            "18": 1312.0,
-            "19": 1200.0,
-            "20": 1094.0,
-            "21": 1197.0,
-            "22": 1286.0,
-            "23": 1353.0,
-            "24": 1321.0,
-            "25": 1053.0,
-            "26": 1219.0,
-            "27": 1300.0,
-            "28": 1428.0,
-            "29": 1300.0,
-            "30": 1550.0,
-            "31": 1500.0,
-            "32": 1450.0,
-            "33": 1597.0,
-            "34": 1487.0,
-            "35": 1196.0,
-            "36": 1333.0,
-            "37": 1457.0,
-            "38": 1618.0,
-            "39": 1321.0,
-            "40": 1467.0,
-            "41": 1743.0,
-            "42": 1583.0,
-            "43": 1726.0,
-            "44": 1687.0,
-            "45": 1607.0,
-            "46": 1887.0,
-            "47": 1694.0,
-            "48": 1589.0,
-            "49": 1754.0,
-            "50": 1681.0,
-            "51": 1792.0,
-            "52": 1765.0,
-            "53": 1874.0,
-            "54": 1854.0,
-            "55": 1816.0,
-            "56": 1894.0,
-            "57": 1855.0,
-            "58": 1733.0,
-            "59": 1425.0,
-            "60": 1916.0,
-            "61": 2236.0,
-            "62": 2138.0,
-            "63": 2016.0,
-            "64": 2084.0,
-            "65": 2473.0,
-            "66": 2130.0,
-            "67": 2275.0,
-            "68": 2207.0,
-            "69": 2199.0,
-            "70": 2462.0,
-            "71": 2390.0,
-            "72": 2501.0,
-            "73": 1957.0,
-            "74": 2120.0,
-            "75": 2095.0,
-            "76": 2325.0,
-            "77": 2395.0,
-            "78": 2502.0,
-            "79": 2700.0,
-            "80": 1996.0,
-            "81": 2404.0,
-            "82": 2501.0,
-            "83": 2493.0,
-            "84": 2074.0,
-            "85": 2167.0,
-            "86": 2273.0,
-            "87": 2540.0,
-            "88": 2295.0,
-            "89": 2583.0,
-            "90": 2139.0,
-            "91": 2390.0,
-            "92": 1929.0,
-            "93": 2179.0,
-            "94": 2522.0,
-            "95": 2468.0,
-            "96": 2219.0,
-            "97": 2145.0,
-            "98": 2343.0,
-            "99": 2330.0,
-            "100": 1971.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 730321408.0,
-            "2": 730321408.0,
-            "3": 730321408.0,
-            "4": 730321408.0,
-            "5": 730321408.0,
-            "6": 730321408.0,
-            "7": 730321408.0,
-            "8": 730321408.0,
-            "9": 730321408.0,
-            "10": 730321408.0,
-            "11": 730321408.0,
-            "12": 730321408.0,
-            "13": 730321408.0,
-            "14": 730321408.0,
-            "15": 730321408.0,
-            "16": 730321408.0,
-            "17": 730321408.0,
-            "18": 730321408.0,
-            "19": 730321408.0,
-            "20": 730321408.0,
-            "21": 730321408.0,
-            "22": 730321408.0,
-            "23": 730321408.0,
-            "24": 730321408.0,
-            "25": 730321408.0,
-            "26": 730321408.0,
-            "27": 730321408.0,
-            "28": 730321408.0,
-            "29": 730321408.0,
-            "30": 730321408.0,
-            "31": 730321408.0,
-            "32": 730321408.0,
-            "33": 730321408.0,
-            "34": 730321408.0,
-            "35": 730321408.0,
-            "36": 730321408.0,
-            "37": 730321408.0,
-            "38": 730321408.0,
-            "39": 730321408.0,
-            "40": 730321408.0,
-            "41": 730321408.0,
-            "42": 730321408.0,
-            "43": 730321408.0,
-            "44": 730321408.0,
-            "45": 730321408.0,
-            "46": 730321408.0,
-            "47": 730321408.0,
-            "48": 730321408.0,
-            "49": 730321408.0,
-            "50": 730321408.0,
-            "51": 730321408.0,
-            "52": 730321408.0,
-            "53": 730321408.0,
-            "54": 730321408.0,
-            "55": 730321408.0,
-            "56": 730321408.0,
-            "57": 730321408.0,
-            "58": 730321408.0,
-            "59": 730321408.0,
-            "60": 730321408.0,
-            "61": 730321408.0,
-            "62": 730321408.0,
-            "63": 730321408.0,
-            "64": 730321408.0,
-            "65": 730321408.0,
-            "66": 730321408.0,
-            "67": 730321408.0,
-            "68": 730321408.0,
-            "69": 730321408.0,
-            "70": 730321408.0,
-            "71": 730321408.0,
-            "72": 730321408.0,
-            "73": 730321408.0,
-            "74": 730321408.0,
-            "75": 730321408.0,
-            "76": 730321408.0,
-            "77": 730321408.0,
-            "78": 730321408.0,
-            "79": 730321408.0,
-            "80": 730321408.0,
-            "81": 730321408.0,
-            "82": 730321408.0,
-            "83": 730321408.0,
-            "84": 730321408.0,
-            "85": 730321408.0,
-            "86": 730321408.0,
-            "87": 730321408.0,
-            "88": 730321408.0,
-            "89": 730321408.0,
-            "90": 730321408.0,
-            "91": 730321408.0,
-            "92": 730321408.0,
-            "93": 730321408.0,
-            "94": 730321408.0,
-            "95": 730321408.0,
-            "96": 730321408.0,
-            "97": 730321408.0,
-            "98": 730321408.0,
-            "99": 730321408.0,
-            "100": 730321408.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 2513633792.0,
-            "2": 2795345408.0,
-            "3": 2795345408.0,
-            "4": 2795345408.0,
-            "5": 2795345408.0,
-            "6": 2795345408.0,
-            "7": 2795345408.0,
-            "8": 2795345408.0,
-            "9": 2795345408.0,
-            "10": 2795345408.0,
-            "11": 2795345408.0,
-            "12": 2795345408.0,
-            "13": 2795345408.0,
-            "14": 2795345408.0,
-            "15": 2795345408.0,
-            "16": 2795345408.0,
-            "17": 2795345408.0,
-            "18": 2795345408.0,
-            "19": 2795345408.0,
-            "20": 2795345408.0,
-            "21": 2795345408.0,
-            "22": 2795345408.0,
-            "23": 2795345408.0,
-            "24": 2795345408.0,
-            "25": 2795345408.0,
-            "26": 2795345408.0,
-            "27": 2795345408.0,
-            "28": 2795345408.0,
-            "29": 2795345408.0,
-            "30": 2795345408.0,
-            "31": 2795345408.0,
-            "32": 2795345408.0,
-            "33": 2795345408.0,
-            "34": 2795345408.0,
-            "35": 2795345408.0,
-            "36": 2795345408.0,
-            "37": 2795345408.0,
-            "38": 2795345408.0,
-            "39": 2795345408.0,
-            "40": 2795345408.0,
-            "41": 2795345408.0,
-            "42": 2795345408.0,
-            "43": 2795345408.0,
-            "44": 2795345408.0,
-            "45": 2795345408.0,
-            "46": 2795345408.0,
-            "47": 2795345408.0,
-            "48": 2795345408.0,
-            "49": 2795345408.0,
-            "50": 2795345408.0,
-            "51": 2795345408.0,
-            "52": 2795345408.0,
-            "53": 2795345408.0,
-            "54": 2795345408.0,
-            "55": 2795345408.0,
-            "56": 2795345408.0,
-            "57": 2795345408.0,
-            "58": 2795345408.0,
-            "59": 2795345408.0,
-            "60": 2795345408.0,
-            "61": 2795345408.0,
-            "62": 2795345408.0,
-            "63": 2795345408.0,
-            "64": 2795345408.0,
-            "65": 2795345408.0,
-            "66": 2795345408.0,
-            "67": 2795345408.0,
-            "68": 2795345408.0,
-            "69": 2795345408.0,
-            "70": 2795345408.0,
-            "71": 2795345408.0,
-            "72": 2795345408.0,
-            "73": 2795345408.0,
-            "74": 2795345408.0,
-            "75": 2795345408.0,
-            "76": 2795345408.0,
-            "77": 2795345408.0,
-            "78": 2795345408.0,
-            "79": 2795345408.0,
-            "80": 2795345408.0,
-            "81": 2795345408.0,
-            "82": 2795345408.0,
-            "83": 2795345408.0,
-            "84": 2795345408.0,
-            "85": 2795345408.0,
-            "86": 2795345408.0,
-            "87": 2795345408.0,
-            "88": 2795345408.0,
-            "89": 2795345408.0,
-            "90": 2795345408.0,
-            "91": 2795345408.0,
-            "92": 2795345408.0,
-            "93": 2795345408.0,
-            "94": 2795345408.0,
-            "95": 2795345408.0,
-            "96": 2795345408.0,
-            "97": 2795345408.0,
-            "98": 2795345408.0,
-            "99": 2795345408.0,
-            "100": 2795345408.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 11.76623,
-            "2": 0.41645,
-            "3": 0.15729,
-            "4": 0.15655,
-            "5": 0.15439,
-            "6": 0.15381,
-            "7": 0.15343,
-            "8": 0.15339,
-            "9": 0.15322,
-            "10": 0.15355,
-            "11": 0.15239,
-            "12": 0.15196,
-            "13": 0.15235,
-            "14": 0.15283,
-            "15": 0.15313,
-            "16": 0.15425,
-            "17": 0.15411,
-            "18": 0.15593,
-            "19": 0.1533,
-            "20": 0.15547,
-            "21": 0.15314,
-            "22": 0.15551,
-            "23": 0.1542,
-            "24": 0.15396,
-            "25": 0.15334,
-            "26": 0.1528,
-            "27": 0.15224,
-            "28": 0.15377,
-            "29": 0.15378,
-            "30": 0.15296,
-            "31": 0.15277,
-            "32": 0.15329,
-            "33": 0.15349,
-            "34": 0.15291,
-            "35": 0.15336,
-            "36": 0.15204,
-            "37": 0.15238,
-            "38": 0.15344,
-            "39": 0.15283,
-            "40": 0.15272,
-            "41": 0.15285,
-            "42": 0.15614,
-            "43": 0.15406,
-            "44": 0.15389,
-            "45": 0.15591,
-            "46": 0.15294,
-            "47": 0.15419,
-            "48": 0.15358,
-            "49": 0.15525,
-            "50": 0.15464,
-            "51": 0.15896,
-            "52": 0.15708,
-            "53": 0.15757,
-            "54": 0.15758,
-            "55": 0.15641,
-            "56": 0.15661,
-            "57": 0.15804,
-            "58": 0.16037,
-            "59": 0.15723,
-            "60": 0.1577,
-            "61": 0.1593,
-            "62": 0.15586,
-            "63": 0.15828,
-            "64": 0.15836,
-            "65": 0.16023,
-            "66": 0.15618,
-            "67": 0.15583,
-            "68": 0.15756,
-            "69": 0.18281,
-            "70": 0.16635,
-            "71": 0.19055,
-            "72": 0.15756,
-            "73": 0.15973,
-            "74": 0.16035,
-            "75": 0.15764,
-            "76": 0.15689,
-            "77": 0.15696,
-            "78": 0.15869,
-            "79": 0.15855,
-            "80": 0.15884,
-            "81": 0.15812,
-            "82": 0.15771,
-            "83": 0.15837,
-            "84": 0.15744,
-            "85": 0.15771,
-            "86": 0.15721,
-            "87": 0.15692,
-            "88": 0.15759,
-            "89": 0.15908,
-            "90": 0.15803,
-            "91": 0.15786,
-            "92": 0.15817,
-            "93": 0.15819,
-            "94": 0.15883,
-            "95": 0.15933,
-            "96": 0.15749,
-            "97": 0.15871,
-            "98": 0.1577,
-            "99": 0.15695,
-            "100": 0.15802
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap/golden_values_lts_dgx_a100.json
index 4771e4e3c8c..3b1e19e8a67 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap/golden_values_lts_dgx_a100.json
@@ -1,537 +1 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.80831,
-            "2": 10.81091,
-            "3": 10.82881,
-            "4": 10.82888,
-            "5": 10.8518,
-            "6": 10.84129,
-            "7": 10.839,
-            "8": 10.80345,
-            "9": 10.87875,
-            "10": 10.88064,
-            "11": 10.87605,
-            "12": 10.82786,
-            "13": 10.84179,
-            "14": 10.81686,
-            "15": 10.80637,
-            "16": 10.79763,
-            "17": 10.76624,
-            "18": 10.78069,
-            "19": 10.75188,
-            "20": 10.63371,
-            "21": 10.68119,
-            "22": 10.63819,
-            "23": 10.75544,
-            "24": 10.61129,
-            "25": 10.47478,
-            "26": 10.59702,
-            "27": 10.53977,
-            "28": 10.45162,
-            "29": 10.39421,
-            "30": 10.39348,
-            "31": 10.49397,
-            "32": 10.32158,
-            "33": 10.27676,
-            "34": 10.44629,
-            "35": 9.96856,
-            "36": 10.11335,
-            "37": 10.02306,
-            "38": 10.37579,
-            "39": 9.78845,
-            "40": 10.10124,
-            "41": 10.12323,
-            "42": 10.02969,
-            "43": 10.19869,
-            "44": 10.05528,
-            "45": 9.68289,
-            "46": 9.98143,
-            "47": 9.92628,
-            "48": 9.66986,
-            "49": 9.9198,
-            "50": 9.9226,
-            "51": 9.79344,
-            "52": 9.32777,
-            "53": 9.65056,
-            "54": 9.8608,
-            "55": 9.98167,
-            "56": 9.81701,
-            "57": 9.74358,
-            "58": 9.83,
-            "59": 9.32869,
-            "60": 9.35336,
-            "61": 9.45108,
-            "62": 10.19054,
-            "63": 9.35643,
-            "64": 9.62865,
-            "65": 9.70227,
-            "66": 9.52515,
-            "67": 9.6623,
-            "68": 9.58775,
-            "69": 9.38554,
-            "70": 9.73858,
-            "71": 9.87574,
-            "72": 9.69164,
-            "73": 9.39216,
-            "74": 9.43911,
-            "75": 8.95557,
-            "76": 9.56378,
-            "77": 9.6144,
-            "78": 9.39189,
-            "79": 9.52909,
-            "80": 9.31414,
-            "81": 9.70167,
-            "82": 9.90384,
-            "83": 9.31614,
-            "84": 9.47152,
-            "85": 8.97772,
-            "86": 9.66417,
-            "87": 9.43151,
-            "88": 9.58677,
-            "89": 9.52297,
-            "90": 9.55832,
-            "91": 9.62794,
-            "92": 9.14014,
-            "93": 9.42404,
-            "94": 9.54555,
-            "95": 9.13499,
-            "96": 8.75216,
-            "97": 9.58103,
-            "98": 9.79018,
-            "99": 9.37904,
-            "100": 9.21212
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1004.0,
-            "2": 1243.0,
-            "3": 1236.0,
-            "4": 1235.0,
-            "5": 1179.0,
-            "6": 1319.0,
-            "7": 1173.0,
-            "8": 1020.0,
-            "9": 1205.0,
-            "10": 1270.0,
-            "11": 1163.0,
-            "12": 1276.0,
-            "13": 1398.0,
-            "14": 1096.0,
-            "15": 1148.0,
-            "16": 1104.0,
-            "17": 1141.0,
-            "18": 1312.0,
-            "19": 1200.0,
-            "20": 1094.0,
-            "21": 1197.0,
-            "22": 1286.0,
-            "23": 1353.0,
-            "24": 1321.0,
-            "25": 1053.0,
-            "26": 1219.0,
-            "27": 1300.0,
-            "28": 1428.0,
-            "29": 1300.0,
-            "30": 1550.0,
-            "31": 1500.0,
-            "32": 1450.0,
-            "33": 1597.0,
-            "34": 1487.0,
-            "35": 1196.0,
-            "36": 1333.0,
-            "37": 1457.0,
-            "38": 1618.0,
-            "39": 1321.0,
-            "40": 1467.0,
-            "41": 1743.0,
-            "42": 1583.0,
-            "43": 1726.0,
-            "44": 1687.0,
-            "45": 1607.0,
-            "46": 1887.0,
-            "47": 1694.0,
-            "48": 1589.0,
-            "49": 1754.0,
-            "50": 1681.0,
-            "51": 1792.0,
-            "52": 1765.0,
-            "53": 1874.0,
-            "54": 1854.0,
-            "55": 1816.0,
-            "56": 1894.0,
-            "57": 1855.0,
-            "58": 1733.0,
-            "59": 1425.0,
-            "60": 1916.0,
-            "61": 2236.0,
-            "62": 2138.0,
-            "63": 2016.0,
-            "64": 2084.0,
-            "65": 2473.0,
-            "66": 2130.0,
-            "67": 2275.0,
-            "68": 2207.0,
-            "69": 2199.0,
-            "70": 2462.0,
-            "71": 2390.0,
-            "72": 2501.0,
-            "73": 1957.0,
-            "74": 2120.0,
-            "75": 2095.0,
-            "76": 2325.0,
-            "77": 2395.0,
-            "78": 2502.0,
-            "79": 2700.0,
-            "80": 1996.0,
-            "81": 2404.0,
-            "82": 2501.0,
-            "83": 2493.0,
-            "84": 2074.0,
-            "85": 2167.0,
-            "86": 2273.0,
-            "87": 2540.0,
-            "88": 2295.0,
-            "89": 2583.0,
-            "90": 2139.0,
-            "91": 2390.0,
-            "92": 1929.0,
-            "93": 2179.0,
-            "94": 2522.0,
-            "95": 2468.0,
-            "96": 2219.0,
-            "97": 2145.0,
-            "98": 2343.0,
-            "99": 2330.0,
-            "100": 1971.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 730321408.0,
-            "2": 730321408.0,
-            "3": 730321408.0,
-            "4": 730321408.0,
-            "5": 730321408.0,
-            "6": 730321408.0,
-            "7": 730321408.0,
-            "8": 730321408.0,
-            "9": 730321408.0,
-            "10": 730321408.0,
-            "11": 730321408.0,
-            "12": 730321408.0,
-            "13": 730321408.0,
-            "14": 730321408.0,
-            "15": 730321408.0,
-            "16": 730321408.0,
-            "17": 730321408.0,
-            "18": 730321408.0,
-            "19": 730321408.0,
-            "20": 730321408.0,
-            "21": 730321408.0,
-            "22": 730321408.0,
-            "23": 730321408.0,
-            "24": 730321408.0,
-            "25": 730321408.0,
-            "26": 730321408.0,
-            "27": 730321408.0,
-            "28": 730321408.0,
-            "29": 730321408.0,
-            "30": 730321408.0,
-            "31": 730321408.0,
-            "32": 730321408.0,
-            "33": 730321408.0,
-            "34": 730321408.0,
-            "35": 730321408.0,
-            "36": 730321408.0,
-            "37": 730321408.0,
-            "38": 730321408.0,
-            "39": 730321408.0,
-            "40": 730321408.0,
-            "41": 730321408.0,
-            "42": 730321408.0,
-            "43": 730321408.0,
-            "44": 730321408.0,
-            "45": 730321408.0,
-            "46": 730321408.0,
-            "47": 730321408.0,
-            "48": 730321408.0,
-            "49": 730321408.0,
-            "50": 730321408.0,
-            "51": 730321408.0,
-            "52": 730321408.0,
-            "53": 730321408.0,
-            "54": 730321408.0,
-            "55": 730321408.0,
-            "56": 730321408.0,
-            "57": 730321408.0,
-            "58": 730321408.0,
-            "59": 730321408.0,
-            "60": 730321408.0,
-            "61": 730321408.0,
-            "62": 730321408.0,
-            "63": 730321408.0,
-            "64": 730321408.0,
-            "65": 730321408.0,
-            "66": 730321408.0,
-            "67": 730321408.0,
-            "68": 730321408.0,
-            "69": 730321408.0,
-            "70": 730321408.0,
-            "71": 730321408.0,
-            "72": 730321408.0,
-            "73": 730321408.0,
-            "74": 730321408.0,
-            "75": 730321408.0,
-            "76": 730321408.0,
-            "77": 730321408.0,
-            "78": 730321408.0,
-            "79": 730321408.0,
-            "80": 730321408.0,
-            "81": 730321408.0,
-            "82": 730321408.0,
-            "83": 730321408.0,
-            "84": 730321408.0,
-            "85": 730321408.0,
-            "86": 730321408.0,
-            "87": 730321408.0,
-            "88": 730321408.0,
-            "89": 730321408.0,
-            "90": 730321408.0,
-            "91": 730321408.0,
-            "92": 730321408.0,
-            "93": 730321408.0,
-            "94": 730321408.0,
-            "95": 730321408.0,
-            "96": 730321408.0,
-            "97": 730321408.0,
-            "98": 730321408.0,
-            "99": 730321408.0,
-            "100": 730321408.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 2513633792.0,
-            "2": 2795345408.0,
-            "3": 2795345408.0,
-            "4": 2795345408.0,
-            "5": 2795345408.0,
-            "6": 2795345408.0,
-            "7": 2795345408.0,
-            "8": 2795345408.0,
-            "9": 2795345408.0,
-            "10": 2795345408.0,
-            "11": 2795345408.0,
-            "12": 2795345408.0,
-            "13": 2795345408.0,
-            "14": 2795345408.0,
-            "15": 2795345408.0,
-            "16": 2795345408.0,
-            "17": 2795345408.0,
-            "18": 2795345408.0,
-            "19": 2795345408.0,
-            "20": 2795345408.0,
-            "21": 2795345408.0,
-            "22": 2795345408.0,
-            "23": 2795345408.0,
-            "24": 2795345408.0,
-            "25": 2795345408.0,
-            "26": 2795345408.0,
-            "27": 2795345408.0,
-            "28": 2795345408.0,
-            "29": 2795345408.0,
-            "30": 2795345408.0,
-            "31": 2795345408.0,
-            "32": 2795345408.0,
-            "33": 2795345408.0,
-            "34": 2795345408.0,
-            "35": 2795345408.0,
-            "36": 2795345408.0,
-            "37": 2795345408.0,
-            "38": 2795345408.0,
-            "39": 2795345408.0,
-            "40": 2795345408.0,
-            "41": 2795345408.0,
-            "42": 2795345408.0,
-            "43": 2795345408.0,
-            "44": 2795345408.0,
-            "45": 2795345408.0,
-            "46": 2795345408.0,
-            "47": 2795345408.0,
-            "48": 2795345408.0,
-            "49": 2795345408.0,
-            "50": 2795345408.0,
-            "51": 2795345408.0,
-            "52": 2795345408.0,
-            "53": 2795345408.0,
-            "54": 2795345408.0,
-            "55": 2795345408.0,
-            "56": 2795345408.0,
-            "57": 2795345408.0,
-            "58": 2795345408.0,
-            "59": 2795345408.0,
-            "60": 2795345408.0,
-            "61": 2795345408.0,
-            "62": 2795345408.0,
-            "63": 2795345408.0,
-            "64": 2795345408.0,
-            "65": 2795345408.0,
-            "66": 2795345408.0,
-            "67": 2795345408.0,
-            "68": 2795345408.0,
-            "69": 2795345408.0,
-            "70": 2795345408.0,
-            "71": 2795345408.0,
-            "72": 2795345408.0,
-            "73": 2795345408.0,
-            "74": 2795345408.0,
-            "75": 2795345408.0,
-            "76": 2795345408.0,
-            "77": 2795345408.0,
-            "78": 2795345408.0,
-            "79": 2795345408.0,
-            "80": 2795345408.0,
-            "81": 2795345408.0,
-            "82": 2795345408.0,
-            "83": 2795345408.0,
-            "84": 2795345408.0,
-            "85": 2795345408.0,
-            "86": 2795345408.0,
-            "87": 2795345408.0,
-            "88": 2795345408.0,
-            "89": 2795345408.0,
-            "90": 2795345408.0,
-            "91": 2795345408.0,
-            "92": 2795345408.0,
-            "93": 2795345408.0,
-            "94": 2795345408.0,
-            "95": 2795345408.0,
-            "96": 2795345408.0,
-            "97": 2795345408.0,
-            "98": 2795345408.0,
-            "99": 2795345408.0,
-            "100": 2795345408.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 8.66407,
-            "2": 0.18828,
-            "3": 0.15715,
-            "4": 0.15685,
-            "5": 0.1544,
-            "6": 0.15356,
-            "7": 0.15196,
-            "8": 0.15101,
-            "9": 0.15114,
-            "10": 0.15067,
-            "11": 0.15113,
-            "12": 0.15109,
-            "13": 0.15255,
-            "14": 0.15181,
-            "15": 0.15165,
-            "16": 0.14989,
-            "17": 0.15094,
-            "18": 0.15062,
-            "19": 0.15148,
-            "20": 0.15014,
-            "21": 0.15114,
-            "22": 0.14973,
-            "23": 0.15192,
-            "24": 0.15003,
-            "25": 0.15228,
-            "26": 0.15066,
-            "27": 0.15209,
-            "28": 0.15056,
-            "29": 0.1516,
-            "30": 0.15083,
-            "31": 0.15211,
-            "32": 0.15028,
-            "33": 0.1518,
-            "34": 0.1494,
-            "35": 0.1521,
-            "36": 0.15002,
-            "37": 0.15257,
-            "38": 0.15095,
-            "39": 0.1517,
-            "40": 0.1501,
-            "41": 0.15352,
-            "42": 0.15453,
-            "43": 0.15187,
-            "44": 0.15281,
-            "45": 0.15294,
-            "46": 0.15214,
-            "47": 0.15376,
-            "48": 0.15363,
-            "49": 0.15977,
-            "50": 0.15249,
-            "51": 0.15543,
-            "52": 0.15363,
-            "53": 0.15379,
-            "54": 0.15555,
-            "55": 0.15252,
-            "56": 0.15295,
-            "57": 0.15496,
-            "58": 0.15756,
-            "59": 0.15345,
-            "60": 0.15784,
-            "61": 0.1581,
-            "62": 0.15302,
-            "63": 0.15579,
-            "64": 0.1536,
-            "65": 0.15523,
-            "66": 0.15593,
-            "67": 0.15868,
-            "68": 0.15303,
-            "69": 0.1554,
-            "70": 0.15409,
-            "71": 0.15229,
-            "72": 0.15299,
-            "73": 0.15495,
-            "74": 0.15601,
-            "75": 0.15285,
-            "76": 0.15774,
-            "77": 0.15171,
-            "78": 0.15423,
-            "79": 0.15398,
-            "80": 0.15445,
-            "81": 0.15381,
-            "82": 0.15311,
-            "83": 0.15584,
-            "84": 0.15556,
-            "85": 0.15506,
-            "86": 0.15314,
-            "87": 0.15269,
-            "88": 0.15515,
-            "89": 0.15923,
-            "90": 0.15325,
-            "91": 0.15755,
-            "92": 0.1543,
-            "93": 0.15481,
-            "94": 0.15321,
-            "95": 0.15397,
-            "96": 0.15322,
-            "97": 0.15471,
-            "98": 0.15631,
-            "99": 0.15271,
-            "100": 0.15653
-        }
-    }
-}
\ No newline at end of file
+{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.81184, "5": 10.85464, "10": 10.88256, "15": 10.80679, "20": 10.63196, "25": 10.47374, "30": 10.39285, "35": 9.96791, "40": 10.1, "45": 9.68344, "50": 9.92465, "55": 9.98132, "60": 9.3523, "65": 9.70213, "70": 9.73809, "75": 8.95616, "80": 9.31501, "85": 8.97886, "90": 9.55812, "95": 9.13529, "100": 9.21091}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1125.0, "5": 1255.0, "10": 1367.0, "15": 1127.0, "20": 1082.0, "25": 1114.0, "30": 1558.0, "35": 1292.0, "40": 1433.0, "45": 1564.0, "50": 1749.0, "55": 1718.0, "60": 1872.0, "65": 2464.0, "70": 2564.0, "75": 1884.0, "80": 1924.0, "85": 2150.0, "90": 2319.0, "95": 2518.0, "100": 2071.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 733859840.0, "5": 733859840.0, "10": 733859840.0, "15": 733859840.0, "20": 733859840.0, "25": 733859840.0, "30": 733859840.0, "35": 733859840.0, "40": 733859840.0, "45": 733859840.0, "50": 733859840.0, "55": 733859840.0, "60": 733859840.0, "65": 733859840.0, "70": 733859840.0, "75": 733859840.0, "80": 733859840.0, "85": 733859840.0, "90": 733859840.0, "95": 733859840.0, "100": 733859840.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 4312984064.0, "5": 4596792832.0, "10": 4596792832.0, "15": 4596792832.0, "20": 4596792832.0, "25": 4596792832.0, "30": 4596792832.0, "35": 4596792832.0, "40": 4596792832.0, "45": 4596792832.0, "50": 4596792832.0, "55": 4596792832.0, "60": 4596792832.0, "65": 4596792832.0, "70": 4596792832.0, "75": 4596792832.0, "80": 4596792832.0, "85": 4596792832.0, "90": 4596792832.0, "95": 4596792832.0, "100": 4596792832.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 17.99007, "5": 0.37673, "10": 0.36645, "15": 0.3643, "20": 0.36375, "25": 0.3652, "30": 0.36495, "35": 0.36435, "40": 0.36456, "45": 0.3636, "50": 0.36345, "55": 0.36651, "60": 0.36683, "65": 0.36434, "70": 0.36726, "75": 0.36517, "80": 0.56121, "85": 0.36207, "90": 0.35913, "95": 0.36112, "100": 0.36099}}}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_lts.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_lts.json
deleted file mode 100644
index cf9f4245915..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_lts.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.80779,
-            "2": 10.81028,
-            "3": 10.82881,
-            "4": 10.82931,
-            "5": 10.85227,
-            "6": 10.84112,
-            "7": 10.83921,
-            "8": 10.80433,
-            "9": 10.87802,
-            "10": 10.88031,
-            "11": 10.87647,
-            "12": 10.82798,
-            "13": 10.84122,
-            "14": 10.81649,
-            "15": 10.80572,
-            "16": 10.7979,
-            "17": 10.76665,
-            "18": 10.78004,
-            "19": 10.7521,
-            "20": 10.63378,
-            "21": 10.68086,
-            "22": 10.63834,
-            "23": 10.75546,
-            "24": 10.61158,
-            "25": 10.47402,
-            "26": 10.59699,
-            "27": 10.53929,
-            "28": 10.45199,
-            "29": 10.39324,
-            "30": 10.39328,
-            "31": 10.49337,
-            "32": 10.32183,
-            "33": 10.2767,
-            "34": 10.44619,
-            "35": 9.96854,
-            "36": 10.11277,
-            "37": 10.02313,
-            "38": 10.37525,
-            "39": 9.78827,
-            "40": 10.10093,
-            "41": 10.1232,
-            "42": 10.02925,
-            "43": 10.19868,
-            "44": 10.05532,
-            "45": 9.68289,
-            "46": 9.98153,
-            "47": 9.92617,
-            "48": 9.66965,
-            "49": 9.91955,
-            "50": 9.92258
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1058.0,
-            "2": 1243.0,
-            "3": 1255.0,
-            "4": 1229.0,
-            "5": 1178.0,
-            "6": 1278.0,
-            "7": 1129.0,
-            "8": 955.0,
-            "9": 1299.0,
-            "10": 1314.0,
-            "11": 1220.0,
-            "12": 1334.0,
-            "13": 1364.0,
-            "14": 1153.0,
-            "15": 1214.0,
-            "16": 1150.0,
-            "17": 1176.0,
-            "18": 1304.0,
-            "19": 1090.0,
-            "20": 1170.0,
-            "21": 1286.0,
-            "22": 1325.0,
-            "23": 1353.0,
-            "24": 1341.0,
-            "25": 1100.0,
-            "26": 1174.0,
-            "27": 1357.0,
-            "28": 1293.0,
-            "29": 1259.0,
-            "30": 1539.0,
-            "31": 1485.0,
-            "32": 1422.0,
-            "33": 1410.0,
-            "34": 1525.0,
-            "35": 1163.0,
-            "36": 1230.0,
-            "37": 1392.0,
-            "38": 1570.0,
-            "39": 1260.0,
-            "40": 1481.0,
-            "41": 1687.0,
-            "42": 1606.0,
-            "43": 1696.0,
-            "44": 1648.0,
-            "45": 1620.0,
-            "46": 1901.0,
-            "47": 1627.0,
-            "48": 1595.0,
-            "49": 1750.0,
-            "50": 1723.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 730321408.0,
-            "2": 730321408.0,
-            "3": 730321408.0,
-            "4": 730321408.0,
-            "5": 730321408.0,
-            "6": 730321408.0,
-            "7": 730321408.0,
-            "8": 730321408.0,
-            "9": 730321408.0,
-            "10": 730321408.0,
-            "11": 730321408.0,
-            "12": 730321408.0,
-            "13": 730321408.0,
-            "14": 730321408.0,
-            "15": 730321408.0,
-            "16": 730321408.0,
-            "17": 730321408.0,
-            "18": 730321408.0,
-            "19": 730321408.0,
-            "20": 730321408.0,
-            "21": 730321408.0,
-            "22": 730321408.0,
-            "23": 730321408.0,
-            "24": 730321408.0,
-            "25": 730321408.0,
-            "26": 730321408.0,
-            "27": 730321408.0,
-            "28": 730321408.0,
-            "29": 730321408.0,
-            "30": 730321408.0,
-            "31": 730321408.0,
-            "32": 730321408.0,
-            "33": 730321408.0,
-            "34": 730321408.0,
-            "35": 730321408.0,
-            "36": 730321408.0,
-            "37": 730321408.0,
-            "38": 730321408.0,
-            "39": 730321408.0,
-            "40": 730321408.0,
-            "41": 730321408.0,
-            "42": 730321408.0,
-            "43": 730321408.0,
-            "44": 730321408.0,
-            "45": 730321408.0,
-            "46": 730321408.0,
-            "47": 730321408.0,
-            "48": 730321408.0,
-            "49": 730321408.0,
-            "50": 730321408.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 4311542784.0,
-            "2": 4593254400.0,
-            "3": 4593254400.0,
-            "4": 4593254400.0,
-            "5": 4593254400.0,
-            "6": 4593254400.0,
-            "7": 4593254400.0,
-            "8": 4593254400.0,
-            "9": 4593254400.0,
-            "10": 4593254400.0,
-            "11": 4593254400.0,
-            "12": 4593254400.0,
-            "13": 4593254400.0,
-            "14": 4593254400.0,
-            "15": 4593254400.0,
-            "16": 4593254400.0,
-            "17": 4593254400.0,
-            "18": 4593254400.0,
-            "19": 4593254400.0,
-            "20": 4593254400.0,
-            "21": 4593254400.0,
-            "22": 4593254400.0,
-            "23": 4593254400.0,
-            "24": 4593254400.0,
-            "25": 4593254400.0,
-            "26": 4593254400.0,
-            "27": 4593254400.0,
-            "28": 4593254400.0,
-            "29": 4593254400.0,
-            "30": 4593254400.0,
-            "31": 4593254400.0,
-            "32": 4593254400.0,
-            "33": 4593254400.0,
-            "34": 4593254400.0,
-            "35": 4593254400.0,
-            "36": 4593254400.0,
-            "37": 4593254400.0,
-            "38": 4593254400.0,
-            "39": 4593254400.0,
-            "40": 4593254400.0,
-            "41": 4593254400.0,
-            "42": 4593254400.0,
-            "43": 4593254400.0,
-            "44": 4593254400.0,
-            "45": 4593254400.0,
-            "46": 4593254400.0,
-            "47": 4593254400.0,
-            "48": 4593254400.0,
-            "49": 4593254400.0,
-            "50": 4593254400.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 9.15288,
-            "2": 0.50843,
-            "3": 0.16717,
-            "4": 0.16415,
-            "5": 0.16752,
-            "6": 0.16666,
-            "7": 0.16995,
-            "8": 0.17105,
-            "9": 0.17027,
-            "10": 0.17005,
-            "11": 0.17,
-            "12": 0.16945,
-            "13": 0.1682,
-            "14": 0.1687,
-            "15": 0.1682,
-            "16": 0.16899,
-            "17": 0.16877,
-            "18": 0.16877,
-            "19": 0.1688,
-            "20": 0.16832,
-            "21": 0.1693,
-            "22": 0.16953,
-            "23": 0.16772,
-            "24": 0.16919,
-            "25": 0.169,
-            "26": 0.16905,
-            "27": 0.16801,
-            "28": 0.16843,
-            "29": 0.1685,
-            "30": 0.16864,
-            "31": 0.16799,
-            "32": 0.16279,
-            "33": 0.16411,
-            "34": 0.16258,
-            "35": 0.16352,
-            "36": 0.16277,
-            "37": 0.16443,
-            "38": 0.16253,
-            "39": 0.16392,
-            "40": 0.16225,
-            "41": 0.16316,
-            "42": 0.16241,
-            "43": 0.16358,
-            "44": 0.16211,
-            "45": 0.16324,
-            "46": 0.16385,
-            "47": 0.16427,
-            "48": 0.16287,
-            "49": 0.16404,
-            "50": 0.164
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_lts_dgx_a100.json
index 16019e9879e..0a4708ee24f 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_lts_dgx_a100.json
@@ -1,287 +1 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.80779,
-            "2": 10.81028,
-            "3": 10.82881,
-            "4": 10.82931,
-            "5": 10.85227,
-            "6": 10.84112,
-            "7": 10.83921,
-            "8": 10.80433,
-            "9": 10.87802,
-            "10": 10.88031,
-            "11": 10.87647,
-            "12": 10.82798,
-            "13": 10.84122,
-            "14": 10.81649,
-            "15": 10.80572,
-            "16": 10.7979,
-            "17": 10.76665,
-            "18": 10.78004,
-            "19": 10.7521,
-            "20": 10.63378,
-            "21": 10.68086,
-            "22": 10.63834,
-            "23": 10.75546,
-            "24": 10.61158,
-            "25": 10.47402,
-            "26": 10.59699,
-            "27": 10.53929,
-            "28": 10.45199,
-            "29": 10.39324,
-            "30": 10.39328,
-            "31": 10.49337,
-            "32": 10.32183,
-            "33": 10.2767,
-            "34": 10.44619,
-            "35": 9.96854,
-            "36": 10.11277,
-            "37": 10.02313,
-            "38": 10.37525,
-            "39": 9.78827,
-            "40": 10.10093,
-            "41": 10.1232,
-            "42": 10.02925,
-            "43": 10.19868,
-            "44": 10.05532,
-            "45": 9.68289,
-            "46": 9.98153,
-            "47": 9.92617,
-            "48": 9.66965,
-            "49": 9.91955,
-            "50": 9.92258
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1058.0,
-            "2": 1243.0,
-            "3": 1255.0,
-            "4": 1229.0,
-            "5": 1178.0,
-            "6": 1278.0,
-            "7": 1129.0,
-            "8": 955.0,
-            "9": 1299.0,
-            "10": 1314.0,
-            "11": 1220.0,
-            "12": 1334.0,
-            "13": 1364.0,
-            "14": 1153.0,
-            "15": 1214.0,
-            "16": 1150.0,
-            "17": 1176.0,
-            "18": 1304.0,
-            "19": 1090.0,
-            "20": 1170.0,
-            "21": 1286.0,
-            "22": 1325.0,
-            "23": 1353.0,
-            "24": 1341.0,
-            "25": 1100.0,
-            "26": 1174.0,
-            "27": 1357.0,
-            "28": 1293.0,
-            "29": 1259.0,
-            "30": 1539.0,
-            "31": 1485.0,
-            "32": 1422.0,
-            "33": 1410.0,
-            "34": 1525.0,
-            "35": 1163.0,
-            "36": 1230.0,
-            "37": 1392.0,
-            "38": 1570.0,
-            "39": 1260.0,
-            "40": 1481.0,
-            "41": 1687.0,
-            "42": 1606.0,
-            "43": 1696.0,
-            "44": 1648.0,
-            "45": 1620.0,
-            "46": 1901.0,
-            "47": 1627.0,
-            "48": 1595.0,
-            "49": 1750.0,
-            "50": 1723.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 730321408.0,
-            "2": 730321408.0,
-            "3": 730321408.0,
-            "4": 730321408.0,
-            "5": 730321408.0,
-            "6": 730321408.0,
-            "7": 730321408.0,
-            "8": 730321408.0,
-            "9": 730321408.0,
-            "10": 730321408.0,
-            "11": 730321408.0,
-            "12": 730321408.0,
-            "13": 730321408.0,
-            "14": 730321408.0,
-            "15": 730321408.0,
-            "16": 730321408.0,
-            "17": 730321408.0,
-            "18": 730321408.0,
-            "19": 730321408.0,
-            "20": 730321408.0,
-            "21": 730321408.0,
-            "22": 730321408.0,
-            "23": 730321408.0,
-            "24": 730321408.0,
-            "25": 730321408.0,
-            "26": 730321408.0,
-            "27": 730321408.0,
-            "28": 730321408.0,
-            "29": 730321408.0,
-            "30": 730321408.0,
-            "31": 730321408.0,
-            "32": 730321408.0,
-            "33": 730321408.0,
-            "34": 730321408.0,
-            "35": 730321408.0,
-            "36": 730321408.0,
-            "37": 730321408.0,
-            "38": 730321408.0,
-            "39": 730321408.0,
-            "40": 730321408.0,
-            "41": 730321408.0,
-            "42": 730321408.0,
-            "43": 730321408.0,
-            "44": 730321408.0,
-            "45": 730321408.0,
-            "46": 730321408.0,
-            "47": 730321408.0,
-            "48": 730321408.0,
-            "49": 730321408.0,
-            "50": 730321408.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 4311542784.0,
-            "2": 4593254400.0,
-            "3": 4593254400.0,
-            "4": 4593254400.0,
-            "5": 4593254400.0,
-            "6": 4593254400.0,
-            "7": 4593254400.0,
-            "8": 4593254400.0,
-            "9": 4593254400.0,
-            "10": 4593254400.0,
-            "11": 4593254400.0,
-            "12": 4593254400.0,
-            "13": 4593254400.0,
-            "14": 4593254400.0,
-            "15": 4593254400.0,
-            "16": 4593254400.0,
-            "17": 4593254400.0,
-            "18": 4593254400.0,
-            "19": 4593254400.0,
-            "20": 4593254400.0,
-            "21": 4593254400.0,
-            "22": 4593254400.0,
-            "23": 4593254400.0,
-            "24": 4593254400.0,
-            "25": 4593254400.0,
-            "26": 4593254400.0,
-            "27": 4593254400.0,
-            "28": 4593254400.0,
-            "29": 4593254400.0,
-            "30": 4593254400.0,
-            "31": 4593254400.0,
-            "32": 4593254400.0,
-            "33": 4593254400.0,
-            "34": 4593254400.0,
-            "35": 4593254400.0,
-            "36": 4593254400.0,
-            "37": 4593254400.0,
-            "38": 4593254400.0,
-            "39": 4593254400.0,
-            "40": 4593254400.0,
-            "41": 4593254400.0,
-            "42": 4593254400.0,
-            "43": 4593254400.0,
-            "44": 4593254400.0,
-            "45": 4593254400.0,
-            "46": 4593254400.0,
-            "47": 4593254400.0,
-            "48": 4593254400.0,
-            "49": 4593254400.0,
-            "50": 4593254400.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 9.03263,
-            "2": 0.21266,
-            "3": 0.17373,
-            "4": 0.17827,
-            "5": 0.17392,
-            "6": 0.17641,
-            "7": 0.17509,
-            "8": 0.17211,
-            "9": 0.17464,
-            "10": 0.21373,
-            "11": 0.17143,
-            "12": 0.17137,
-            "13": 0.17701,
-            "14": 0.17242,
-            "15": 0.16945,
-            "16": 0.1686,
-            "17": 0.16945,
-            "18": 0.16793,
-            "19": 0.16997,
-            "20": 0.16992,
-            "21": 0.17016,
-            "22": 0.16832,
-            "23": 0.16853,
-            "24": 0.16912,
-            "25": 0.16822,
-            "26": 0.16908,
-            "27": 0.16609,
-            "28": 0.239,
-            "29": 0.16968,
-            "30": 0.16763,
-            "31": 0.16962,
-            "32": 0.16788,
-            "33": 0.1681,
-            "34": 0.16749,
-            "35": 0.16866,
-            "36": 0.1697,
-            "37": 0.16838,
-            "38": 0.16867,
-            "39": 0.16699,
-            "40": 0.17098,
-            "41": 0.1671,
-            "42": 0.17036,
-            "43": 0.16755,
-            "44": 0.16699,
-            "45": 0.1678,
-            "46": 0.17136,
-            "47": 0.16725,
-            "48": 0.17257,
-            "49": 0.16903,
-            "50": 0.1687
-        }
-    }
-}
\ No newline at end of file
+{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.81184, "5": 10.85464, "10": 10.88256, "15": 10.80679, "20": 10.63196, "25": 10.47374, "30": 10.39285, "35": 9.96791, "40": 10.1, "45": 9.68344, "50": 9.92465}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1125.0, "5": 1255.0, "10": 1367.0, "15": 1127.0, "20": 1082.0, "25": 1114.0, "30": 1558.0, "35": 1292.0, "40": 1433.0, "45": 1564.0, "50": 1749.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 733859840.0, "5": 733859840.0, "10": 733859840.0, "15": 733859840.0, "20": 733859840.0, "25": 733859840.0, "30": 733859840.0, "35": 733859840.0, "40": 733859840.0, "45": 733859840.0, "50": 733859840.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 4312984064.0, "5": 4596792832.0, "10": 4596792832.0, "15": 4596792832.0, "20": 4596792832.0, "25": 4596792832.0, "30": 4596792832.0, "35": 4596792832.0, "40": 4596792832.0, "45": 4596792832.0, "50": 4596792832.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 15.07476, "5": 0.37209, "10": 0.36754, "15": 0.36929, "20": 0.36944, "25": 0.36826, "30": 0.3678, "35": 0.36826, "40": 0.36559, "45": 0.36731, "50": 0.36188}}}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_lts.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_lts.json
deleted file mode 100644
index fa4447e6f40..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_lts.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.97215,
-            "2": 10.96419,
-            "3": 10.96484,
-            "4": 10.9625,
-            "5": 10.96069,
-            "6": 10.96314,
-            "7": 10.95542,
-            "8": 10.96012,
-            "9": 10.9642,
-            "10": 10.94938,
-            "11": 10.95571,
-            "12": 10.94758,
-            "13": 10.94696,
-            "14": 10.95407,
-            "15": 10.91083,
-            "16": 10.90945,
-            "17": 10.89001,
-            "18": 10.89837,
-            "19": 10.89189,
-            "20": 10.80856,
-            "21": 10.78598,
-            "22": 10.69678,
-            "23": 10.79518,
-            "24": 10.6983,
-            "25": 10.66637,
-            "26": 10.71173,
-            "27": 10.68383,
-            "28": 10.62344,
-            "29": 10.65067,
-            "30": 10.45487,
-            "31": 10.22475,
-            "32": 10.52171,
-            "33": 10.52706,
-            "34": 10.29181,
-            "35": 10.33574,
-            "36": 10.2871,
-            "37": 10.39557,
-            "38": 10.2536,
-            "39": 10.44971,
-            "40": 10.14791,
-            "41": 10.19255,
-            "42": 10.24716,
-            "43": 9.89482,
-            "44": 10.00764,
-            "45": 9.88792,
-            "46": 9.86452,
-            "47": 10.16578,
-            "48": 9.87929,
-            "49": 9.5693,
-            "50": 9.95204
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 22727298.0,
-            "2": 22924540.0,
-            "3": 22596662.0,
-            "4": 23219864.0,
-            "5": 22714604.0,
-            "6": 23021138.0,
-            "7": 22770420.0,
-            "8": 22926748.0,
-            "9": 22842456.0,
-            "10": 22918536.0,
-            "11": 22500950.0,
-            "12": 22459666.0,
-            "13": 22917172.0,
-            "14": 22388760.0,
-            "15": 22821456.0,
-            "16": 22831856.0,
-            "17": 22818966.0,
-            "18": 22582904.0,
-            "19": 22618308.0,
-            "20": 22695018.0,
-            "21": 22739412.0,
-            "22": 22799506.0,
-            "23": 22539964.0,
-            "24": 22771406.0,
-            "25": 22818944.0,
-            "26": 22548888.0,
-            "27": 22467554.0,
-            "28": 22453820.0,
-            "29": 22529468.0,
-            "30": 22631974.0,
-            "31": 22954914.0,
-            "32": 22584176.0,
-            "33": 22557850.0,
-            "34": 22835592.0,
-            "35": 22787628.0,
-            "36": 22589872.0,
-            "37": 22497168.0,
-            "38": 22896192.0,
-            "39": 22801888.0,
-            "40": 22658038.0,
-            "41": 22659420.0,
-            "42": 22666532.0,
-            "43": 22976852.0,
-            "44": 22746072.0,
-            "45": 22675348.0,
-            "46": 22884600.0,
-            "47": 22633528.0,
-            "48": 22929392.0,
-            "49": 22728496.0,
-            "50": 22905024.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 659815936.0,
-            "2": 659815936.0,
-            "3": 659815936.0,
-            "4": 659815936.0,
-            "5": 659815936.0,
-            "6": 659815936.0,
-            "7": 659815936.0,
-            "8": 659815936.0,
-            "9": 659815936.0,
-            "10": 659815936.0,
-            "11": 659815936.0,
-            "12": 659815936.0,
-            "13": 659815936.0,
-            "14": 659815936.0,
-            "15": 659815936.0,
-            "16": 659815936.0,
-            "17": 659815936.0,
-            "18": 659815936.0,
-            "19": 659815936.0,
-            "20": 659815936.0,
-            "21": 659815936.0,
-            "22": 659815936.0,
-            "23": 659815936.0,
-            "24": 659815936.0,
-            "25": 659815936.0,
-            "26": 659815936.0,
-            "27": 659815936.0,
-            "28": 659815936.0,
-            "29": 659815936.0,
-            "30": 659815936.0,
-            "31": 659815936.0,
-            "32": 659815936.0,
-            "33": 659815936.0,
-            "34": 659815936.0,
-            "35": 659815936.0,
-            "36": 659815936.0,
-            "37": 659815936.0,
-            "38": 659815936.0,
-            "39": 659815936.0,
-            "40": 659815936.0,
-            "41": 659815936.0,
-            "42": 659815936.0,
-            "43": 659815936.0,
-            "44": 659815936.0,
-            "45": 659815936.0,
-            "46": 659815936.0,
-            "47": 659815936.0,
-            "48": 659815936.0,
-            "49": 659815936.0,
-            "50": 659815936.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 2128664064.0,
-            "2": 2387253760.0,
-            "3": 2387253760.0,
-            "4": 2387253760.0,
-            "5": 2387253760.0,
-            "6": 2387253760.0,
-            "7": 2387253760.0,
-            "8": 2387253760.0,
-            "9": 2387253760.0,
-            "10": 2387253760.0,
-            "11": 2387253760.0,
-            "12": 2387253760.0,
-            "13": 2387253760.0,
-            "14": 2387253760.0,
-            "15": 2387253760.0,
-            "16": 2387253760.0,
-            "17": 2387253760.0,
-            "18": 2387253760.0,
-            "19": 2387253760.0,
-            "20": 2387253760.0,
-            "21": 2387253760.0,
-            "22": 2387253760.0,
-            "23": 2387253760.0,
-            "24": 2387253760.0,
-            "25": 2387253760.0,
-            "26": 2387253760.0,
-            "27": 2387253760.0,
-            "28": 2387253760.0,
-            "29": 2387253760.0,
-            "30": 2387253760.0,
-            "31": 2387253760.0,
-            "32": 2387253760.0,
-            "33": 2387253760.0,
-            "34": 2387253760.0,
-            "35": 2387253760.0,
-            "36": 2387253760.0,
-            "37": 2387253760.0,
-            "38": 2387253760.0,
-            "39": 2387253760.0,
-            "40": 2387253760.0,
-            "41": 2387253760.0,
-            "42": 2387253760.0,
-            "43": 2387253760.0,
-            "44": 2387253760.0,
-            "45": 2387253760.0,
-            "46": 2387253760.0,
-            "47": 2387253760.0,
-            "48": 2387253760.0,
-            "49": 2387253760.0,
-            "50": 2387253760.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 8.87306,
-            "2": 0.21422,
-            "3": 0.25104,
-            "4": 0.16762,
-            "5": 0.16515,
-            "6": 0.16439,
-            "7": 0.16456,
-            "8": 0.16608,
-            "9": 0.16475,
-            "10": 0.16561,
-            "11": 0.16487,
-            "12": 0.16315,
-            "13": 0.16552,
-            "14": 0.16522,
-            "15": 0.16481,
-            "16": 0.16526,
-            "17": 0.16423,
-            "18": 0.16401,
-            "19": 0.16489,
-            "20": 0.16413,
-            "21": 0.16376,
-            "22": 0.16471,
-            "23": 0.16552,
-            "24": 0.16391,
-            "25": 0.15853,
-            "26": 0.15805,
-            "27": 0.15784,
-            "28": 0.15821,
-            "29": 0.15801,
-            "30": 0.15776,
-            "31": 0.15957,
-            "32": 0.16205,
-            "33": 0.16126,
-            "34": 0.15977,
-            "35": 0.16204,
-            "36": 0.15979,
-            "37": 0.16009,
-            "38": 0.15888,
-            "39": 0.16022,
-            "40": 0.1597,
-            "41": 0.15982,
-            "42": 0.1593,
-            "43": 0.15759,
-            "44": 0.15811,
-            "45": 0.1573,
-            "46": 0.15807,
-            "47": 0.15605,
-            "48": 0.15694,
-            "49": 0.15675,
-            "50": 0.15757
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_lts_dgx_a100.json
index 775784e5ee0..0cd9c8700a3 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_lts_dgx_a100.json
@@ -2,286 +2,91 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 1,
+        "step_interval": 5,
         "values": {
-            "1": 10.97215,
-            "2": 10.96419,
-            "3": 10.96484,
-            "4": 10.9625,
-            "5": 10.96069,
-            "6": 10.96314,
-            "7": 10.95542,
-            "8": 10.96012,
-            "9": 10.9642,
-            "10": 10.94938,
-            "11": 10.95571,
-            "12": 10.94758,
-            "13": 10.94696,
-            "14": 10.95407,
-            "15": 10.91083,
-            "16": 10.90945,
-            "17": 10.89001,
-            "18": 10.89837,
-            "19": 10.89189,
-            "20": 10.80856,
-            "21": 10.78598,
-            "22": 10.69678,
-            "23": 10.79518,
-            "24": 10.6983,
-            "25": 10.66637,
-            "26": 10.71173,
-            "27": 10.68383,
-            "28": 10.62344,
-            "29": 10.65067,
-            "30": 10.45487,
-            "31": 10.22475,
-            "32": 10.52171,
-            "33": 10.52706,
-            "34": 10.29181,
-            "35": 10.33574,
-            "36": 10.2871,
-            "37": 10.39557,
-            "38": 10.2536,
-            "39": 10.44971,
-            "40": 10.14791,
-            "41": 10.19255,
-            "42": 10.24716,
-            "43": 9.89482,
-            "44": 10.00764,
-            "45": 9.88792,
-            "46": 9.86452,
-            "47": 10.16578,
-            "48": 9.87929,
-            "49": 9.5693,
-            "50": 9.95204
+            "1": 10.9735,
+            "5": 10.95594,
+            "10": 10.94989,
+            "15": 10.9115,
+            "20": 10.80975,
+            "25": 10.6662,
+            "30": 10.45501,
+            "35": 10.33418,
+            "40": 10.14646,
+            "45": 9.89112,
+            "50": 9.9526
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 1,
+        "step_interval": 5,
         "values": {
-            "1": 22727298.0,
-            "2": 22924540.0,
-            "3": 22596662.0,
-            "4": 23219864.0,
-            "5": 22714604.0,
-            "6": 23021138.0,
-            "7": 22770420.0,
-            "8": 22926748.0,
-            "9": 22842456.0,
-            "10": 22918536.0,
-            "11": 22500950.0,
-            "12": 22459666.0,
-            "13": 22917172.0,
-            "14": 22388760.0,
-            "15": 22821456.0,
-            "16": 22831856.0,
-            "17": 22818966.0,
-            "18": 22582904.0,
-            "19": 22618308.0,
-            "20": 22695018.0,
-            "21": 22739412.0,
-            "22": 22799506.0,
-            "23": 22539964.0,
-            "24": 22771406.0,
-            "25": 22818944.0,
-            "26": 22548888.0,
-            "27": 22467554.0,
-            "28": 22453820.0,
-            "29": 22529468.0,
-            "30": 22631974.0,
-            "31": 22954914.0,
-            "32": 22584176.0,
-            "33": 22557850.0,
-            "34": 22835592.0,
-            "35": 22787628.0,
-            "36": 22589872.0,
-            "37": 22497168.0,
-            "38": 22896192.0,
-            "39": 22801888.0,
-            "40": 22658038.0,
-            "41": 22659420.0,
-            "42": 22666532.0,
-            "43": 22976852.0,
-            "44": 22746072.0,
-            "45": 22675348.0,
-            "46": 22884600.0,
-            "47": 22633528.0,
-            "48": 22929392.0,
-            "49": 22728496.0,
-            "50": 22905024.0
+            "1": 22727052.0,
+            "5": 22714228.0,
+            "10": 22918376.0,
+            "15": 22820932.0,
+            "20": 22694228.0,
+            "25": 22819528.0,
+            "30": 22631178.0,
+            "35": 22787526.0,
+            "40": 22657932.0,
+            "45": 22674550.0,
+            "50": 22905400.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 1,
+        "step_interval": 5,
         "values": {
-            "1": 659815936.0,
-            "2": 659815936.0,
-            "3": 659815936.0,
-            "4": 659815936.0,
-            "5": 659815936.0,
-            "6": 659815936.0,
-            "7": 659815936.0,
-            "8": 659815936.0,
-            "9": 659815936.0,
-            "10": 659815936.0,
-            "11": 659815936.0,
-            "12": 659815936.0,
-            "13": 659815936.0,
-            "14": 659815936.0,
-            "15": 659815936.0,
-            "16": 659815936.0,
-            "17": 659815936.0,
-            "18": 659815936.0,
-            "19": 659815936.0,
-            "20": 659815936.0,
-            "21": 659815936.0,
-            "22": 659815936.0,
-            "23": 659815936.0,
-            "24": 659815936.0,
-            "25": 659815936.0,
-            "26": 659815936.0,
-            "27": 659815936.0,
-            "28": 659815936.0,
-            "29": 659815936.0,
-            "30": 659815936.0,
-            "31": 659815936.0,
-            "32": 659815936.0,
-            "33": 659815936.0,
-            "34": 659815936.0,
-            "35": 659815936.0,
-            "36": 659815936.0,
-            "37": 659815936.0,
-            "38": 659815936.0,
-            "39": 659815936.0,
-            "40": 659815936.0,
-            "41": 659815936.0,
-            "42": 659815936.0,
-            "43": 659815936.0,
-            "44": 659815936.0,
-            "45": 659815936.0,
-            "46": 659815936.0,
-            "47": 659815936.0,
-            "48": 659815936.0,
-            "49": 659815936.0,
-            "50": 659815936.0
+            "1": 657718272.0,
+            "5": 657718272.0,
+            "10": 657718272.0,
+            "15": 657718272.0,
+            "20": 657718272.0,
+            "25": 657718272.0,
+            "30": 657718272.0,
+            "35": 657718272.0,
+            "40": 657718272.0,
+            "45": 657718272.0,
+            "50": 657718272.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 1,
+        "step_interval": 5,
         "values": {
-            "1": 2128664064.0,
-            "2": 2387253760.0,
-            "3": 2387253760.0,
-            "4": 2387253760.0,
-            "5": 2387253760.0,
-            "6": 2387253760.0,
-            "7": 2387253760.0,
-            "8": 2387253760.0,
-            "9": 2387253760.0,
-            "10": 2387253760.0,
-            "11": 2387253760.0,
-            "12": 2387253760.0,
-            "13": 2387253760.0,
-            "14": 2387253760.0,
-            "15": 2387253760.0,
-            "16": 2387253760.0,
-            "17": 2387253760.0,
-            "18": 2387253760.0,
-            "19": 2387253760.0,
-            "20": 2387253760.0,
-            "21": 2387253760.0,
-            "22": 2387253760.0,
-            "23": 2387253760.0,
-            "24": 2387253760.0,
-            "25": 2387253760.0,
-            "26": 2387253760.0,
-            "27": 2387253760.0,
-            "28": 2387253760.0,
-            "29": 2387253760.0,
-            "30": 2387253760.0,
-            "31": 2387253760.0,
-            "32": 2387253760.0,
-            "33": 2387253760.0,
-            "34": 2387253760.0,
-            "35": 2387253760.0,
-            "36": 2387253760.0,
-            "37": 2387253760.0,
-            "38": 2387253760.0,
-            "39": 2387253760.0,
-            "40": 2387253760.0,
-            "41": 2387253760.0,
-            "42": 2387253760.0,
-            "43": 2387253760.0,
-            "44": 2387253760.0,
-            "45": 2387253760.0,
-            "46": 2387253760.0,
-            "47": 2387253760.0,
-            "48": 2387253760.0,
-            "49": 2387253760.0,
-            "50": 2387253760.0
+            "1": 2129712128.0,
+            "5": 2385156096.0,
+            "10": 2385156096.0,
+            "15": 2385156096.0,
+            "20": 2385156096.0,
+            "25": 2385156096.0,
+            "30": 2385156096.0,
+            "35": 2385156096.0,
+            "40": 2385156096.0,
+            "45": 2385156096.0,
+            "50": 2385156096.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 1,
+        "step_interval": 5,
         "values": {
-            "1": 10.48419,
-            "2": 0.19482,
-            "3": 0.26748,
-            "4": 0.1633,
-            "5": 0.15828,
-            "6": 0.15656,
-            "7": 0.1572,
-            "8": 0.15759,
-            "9": 0.15735,
-            "10": 0.15751,
-            "11": 0.15648,
-            "12": 0.15605,
-            "13": 0.15693,
-            "14": 0.15672,
-            "15": 0.15676,
-            "16": 0.15664,
-            "17": 0.15683,
-            "18": 0.15646,
-            "19": 0.15696,
-            "20": 0.15623,
-            "21": 0.15652,
-            "22": 0.15759,
-            "23": 0.15729,
-            "24": 0.15687,
-            "25": 0.15563,
-            "26": 0.1575,
-            "27": 0.15616,
-            "28": 0.15855,
-            "29": 0.15771,
-            "30": 0.15851,
-            "31": 0.1579,
-            "32": 0.1587,
-            "33": 0.1577,
-            "34": 0.15827,
-            "35": 0.15808,
-            "36": 0.15825,
-            "37": 0.1583,
-            "38": 0.15836,
-            "39": 0.15797,
-            "40": 0.15829,
-            "41": 0.15787,
-            "42": 0.15789,
-            "43": 0.15839,
-            "44": 0.15862,
-            "45": 0.15727,
-            "46": 0.15919,
-            "47": 0.15859,
-            "48": 0.15898,
-            "49": 0.15832,
-            "50": 0.1586
+            "1": 20.28193,
+            "5": 0.17568,
+            "10": 0.17422,
+            "15": 0.17474,
+            "20": 0.17247,
+            "25": 0.16917,
+            "30": 0.16924,
+            "35": 0.16826,
+            "40": 0.16896,
+            "45": 0.1662,
+            "50": 0.16732
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_lts.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_lts.json
deleted file mode 100644
index fc938769241..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_lts.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.84562,
-            "2": 10.8536,
-            "3": 10.85978,
-            "4": 10.84186,
-            "5": 10.8756,
-            "6": 10.88172,
-            "7": 10.86719,
-            "8": 10.84992,
-            "9": 10.86792,
-            "10": 10.83289,
-            "11": 10.91083,
-            "12": 10.87501,
-            "13": 10.86463,
-            "14": 10.8928,
-            "15": 10.84145,
-            "16": 10.84108,
-            "17": 10.82014,
-            "18": 10.85327,
-            "19": 10.8576,
-            "20": 10.80111,
-            "21": 10.78705,
-            "22": 10.72832,
-            "23": 10.812,
-            "24": 10.74372,
-            "25": 10.712,
-            "26": 10.76908,
-            "27": 10.78641,
-            "28": 10.73219,
-            "29": 10.75717,
-            "30": 10.58162,
-            "31": 10.43196,
-            "32": 10.68369,
-            "33": 10.66821,
-            "34": 10.49987,
-            "35": 10.5319,
-            "36": 10.52076,
-            "37": 10.59704,
-            "38": 10.45745,
-            "39": 10.62078,
-            "40": 10.35651,
-            "41": 10.40224,
-            "42": 10.45518,
-            "43": 10.11645,
-            "44": 10.24176,
-            "45": 10.1377,
-            "46": 10.11459,
-            "47": 10.39867,
-            "48": 10.14178,
-            "49": 9.8915,
-            "50": 10.20004
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 22727180.0,
-            "2": 22925490.0,
-            "3": 22595358.0,
-            "4": 23219094.0,
-            "5": 22713274.0,
-            "6": 23020840.0,
-            "7": 22770150.0,
-            "8": 22925912.0,
-            "9": 22842304.0,
-            "10": 22917686.0,
-            "11": 22499934.0,
-            "12": 22458902.0,
-            "13": 22916544.0,
-            "14": 22388412.0,
-            "15": 22820828.0,
-            "16": 22829442.0,
-            "17": 22818424.0,
-            "18": 22582572.0,
-            "19": 22616516.0,
-            "20": 22693786.0,
-            "21": 22738788.0,
-            "22": 22800098.0,
-            "23": 22538252.0,
-            "24": 22771304.0,
-            "25": 22818714.0,
-            "26": 22547732.0,
-            "27": 22467548.0,
-            "28": 22452916.0,
-            "29": 22528584.0,
-            "30": 22630192.0,
-            "31": 22954950.0,
-            "32": 22584400.0,
-            "33": 22557234.0,
-            "34": 22834292.0,
-            "35": 22786428.0,
-            "36": 22588576.0,
-            "37": 22496998.0,
-            "38": 22895112.0,
-            "39": 22800900.0,
-            "40": 22657380.0,
-            "41": 22658838.0,
-            "42": 22666328.0,
-            "43": 22975596.0,
-            "44": 22745924.0,
-            "45": 22674268.0,
-            "46": 22884128.0,
-            "47": 22632352.0,
-            "48": 22927496.0,
-            "49": 22727204.0,
-            "50": 22903716.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 611461632.0,
-            "2": 611461632.0,
-            "3": 611461632.0,
-            "4": 611461632.0,
-            "5": 611461632.0,
-            "6": 611461632.0,
-            "7": 611461632.0,
-            "8": 611461632.0,
-            "9": 611461632.0,
-            "10": 611461632.0,
-            "11": 611461632.0,
-            "12": 611461632.0,
-            "13": 611461632.0,
-            "14": 611461632.0,
-            "15": 611461632.0,
-            "16": 611461632.0,
-            "17": 611461632.0,
-            "18": 611461632.0,
-            "19": 611461632.0,
-            "20": 611461632.0,
-            "21": 611461632.0,
-            "22": 611461632.0,
-            "23": 611461632.0,
-            "24": 611461632.0,
-            "25": 611461632.0,
-            "26": 611461632.0,
-            "27": 611461632.0,
-            "28": 611461632.0,
-            "29": 611461632.0,
-            "30": 611461632.0,
-            "31": 611461632.0,
-            "32": 611461632.0,
-            "33": 611461632.0,
-            "34": 611461632.0,
-            "35": 611461632.0,
-            "36": 611461632.0,
-            "37": 611461632.0,
-            "38": 611461632.0,
-            "39": 611461632.0,
-            "40": 611461632.0,
-            "41": 611461632.0,
-            "42": 611461632.0,
-            "43": 611461632.0,
-            "44": 611461632.0,
-            "45": 611461632.0,
-            "46": 611461632.0,
-            "47": 611461632.0,
-            "48": 611461632.0,
-            "49": 611461632.0,
-            "50": 611461632.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 2580665856.0,
-            "2": 2812987904.0,
-            "3": 2812987904.0,
-            "4": 2812987904.0,
-            "5": 2812987904.0,
-            "6": 2812987904.0,
-            "7": 2812987904.0,
-            "8": 2812987904.0,
-            "9": 2812987904.0,
-            "10": 2812987904.0,
-            "11": 2812987904.0,
-            "12": 2812987904.0,
-            "13": 2812987904.0,
-            "14": 2812987904.0,
-            "15": 2812987904.0,
-            "16": 2812987904.0,
-            "17": 2812987904.0,
-            "18": 2812987904.0,
-            "19": 2812987904.0,
-            "20": 2812987904.0,
-            "21": 2812987904.0,
-            "22": 2812987904.0,
-            "23": 2812987904.0,
-            "24": 2812987904.0,
-            "25": 2812987904.0,
-            "26": 2812987904.0,
-            "27": 2812987904.0,
-            "28": 2812987904.0,
-            "29": 2812987904.0,
-            "30": 2812987904.0,
-            "31": 2812987904.0,
-            "32": 2812987904.0,
-            "33": 2812987904.0,
-            "34": 2812987904.0,
-            "35": 2812987904.0,
-            "36": 2812987904.0,
-            "37": 2812987904.0,
-            "38": 2812987904.0,
-            "39": 2812987904.0,
-            "40": 2812987904.0,
-            "41": 2812987904.0,
-            "42": 2812987904.0,
-            "43": 2812987904.0,
-            "44": 2812987904.0,
-            "45": 2812987904.0,
-            "46": 2812987904.0,
-            "47": 2812987904.0,
-            "48": 2812987904.0,
-            "49": 2812987904.0,
-            "50": 2812987904.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.97654,
-            "2": 0.12833,
-            "3": 0.09335,
-            "4": 0.09288,
-            "5": 0.09223,
-            "6": 0.09324,
-            "7": 0.09538,
-            "8": 0.09184,
-            "9": 0.09231,
-            "10": 0.09289,
-            "11": 0.09189,
-            "12": 0.09166,
-            "13": 0.09092,
-            "14": 0.09055,
-            "15": 0.09032,
-            "16": 0.09021,
-            "17": 0.09123,
-            "18": 0.09029,
-            "19": 0.09028,
-            "20": 0.09042,
-            "21": 0.08964,
-            "22": 0.08984,
-            "23": 0.08956,
-            "24": 0.08945,
-            "25": 0.09004,
-            "26": 0.0892,
-            "27": 0.08926,
-            "28": 0.08893,
-            "29": 0.08891,
-            "30": 0.08938,
-            "31": 0.08988,
-            "32": 0.08972,
-            "33": 0.08927,
-            "34": 0.09022,
-            "35": 0.0899,
-            "36": 0.0891,
-            "37": 0.08893,
-            "38": 0.08927,
-            "39": 0.08909,
-            "40": 0.08953,
-            "41": 0.08971,
-            "42": 0.08902,
-            "43": 0.09024,
-            "44": 0.08936,
-            "45": 0.0894,
-            "46": 0.08968,
-            "47": 0.08906,
-            "48": 0.08956,
-            "49": 0.08937,
-            "50": 0.08941
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_lts_dgx_a100.json
index 44d53d6e9d6..117df660805 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_lts_dgx_a100.json
@@ -1,287 +1 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.84562,
-            "2": 10.8536,
-            "3": 10.85978,
-            "4": 10.84186,
-            "5": 10.8756,
-            "6": 10.88172,
-            "7": 10.86719,
-            "8": 10.84992,
-            "9": 10.86792,
-            "10": 10.83289,
-            "11": 10.91083,
-            "12": 10.87501,
-            "13": 10.86463,
-            "14": 10.8928,
-            "15": 10.84145,
-            "16": 10.84108,
-            "17": 10.82014,
-            "18": 10.85327,
-            "19": 10.8576,
-            "20": 10.80111,
-            "21": 10.78705,
-            "22": 10.72832,
-            "23": 10.812,
-            "24": 10.74372,
-            "25": 10.712,
-            "26": 10.76908,
-            "27": 10.78641,
-            "28": 10.73219,
-            "29": 10.75717,
-            "30": 10.58162,
-            "31": 10.43196,
-            "32": 10.68369,
-            "33": 10.66821,
-            "34": 10.49987,
-            "35": 10.5319,
-            "36": 10.52076,
-            "37": 10.59704,
-            "38": 10.45745,
-            "39": 10.62078,
-            "40": 10.35651,
-            "41": 10.40224,
-            "42": 10.45518,
-            "43": 10.11645,
-            "44": 10.24176,
-            "45": 10.1377,
-            "46": 10.11459,
-            "47": 10.39867,
-            "48": 10.14178,
-            "49": 9.8915,
-            "50": 10.20004
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 22727180.0,
-            "2": 22925490.0,
-            "3": 22595358.0,
-            "4": 23219094.0,
-            "5": 22713274.0,
-            "6": 23020840.0,
-            "7": 22770150.0,
-            "8": 22925912.0,
-            "9": 22842304.0,
-            "10": 22917686.0,
-            "11": 22499934.0,
-            "12": 22458902.0,
-            "13": 22916544.0,
-            "14": 22388412.0,
-            "15": 22820828.0,
-            "16": 22829442.0,
-            "17": 22818424.0,
-            "18": 22582572.0,
-            "19": 22616516.0,
-            "20": 22693786.0,
-            "21": 22738788.0,
-            "22": 22800098.0,
-            "23": 22538252.0,
-            "24": 22771304.0,
-            "25": 22818714.0,
-            "26": 22547732.0,
-            "27": 22467548.0,
-            "28": 22452916.0,
-            "29": 22528584.0,
-            "30": 22630192.0,
-            "31": 22954950.0,
-            "32": 22584400.0,
-            "33": 22557234.0,
-            "34": 22834292.0,
-            "35": 22786428.0,
-            "36": 22588576.0,
-            "37": 22496998.0,
-            "38": 22895112.0,
-            "39": 22800900.0,
-            "40": 22657380.0,
-            "41": 22658838.0,
-            "42": 22666328.0,
-            "43": 22975596.0,
-            "44": 22745924.0,
-            "45": 22674268.0,
-            "46": 22884128.0,
-            "47": 22632352.0,
-            "48": 22927496.0,
-            "49": 22727204.0,
-            "50": 22903716.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 609364480.0,
-            "2": 609364480.0,
-            "3": 609364480.0,
-            "4": 609364480.0,
-            "5": 609364480.0,
-            "6": 609364480.0,
-            "7": 609364480.0,
-            "8": 609364480.0,
-            "9": 609364480.0,
-            "10": 609364480.0,
-            "11": 609364480.0,
-            "12": 609364480.0,
-            "13": 609364480.0,
-            "14": 609364480.0,
-            "15": 609364480.0,
-            "16": 609364480.0,
-            "17": 609364480.0,
-            "18": 609364480.0,
-            "19": 609364480.0,
-            "20": 609364480.0,
-            "21": 609364480.0,
-            "22": 609364480.0,
-            "23": 609364480.0,
-            "24": 609364480.0,
-            "25": 609364480.0,
-            "26": 609364480.0,
-            "27": 609364480.0,
-            "28": 609364480.0,
-            "29": 609364480.0,
-            "30": 609364480.0,
-            "31": 609364480.0,
-            "32": 609364480.0,
-            "33": 609364480.0,
-            "34": 609364480.0,
-            "35": 609364480.0,
-            "36": 609364480.0,
-            "37": 609364480.0,
-            "38": 609364480.0,
-            "39": 609364480.0,
-            "40": 609364480.0,
-            "41": 609364480.0,
-            "42": 609364480.0,
-            "43": 609364480.0,
-            "44": 609364480.0,
-            "45": 609364480.0,
-            "46": 609364480.0,
-            "47": 609364480.0,
-            "48": 609364480.0,
-            "49": 609364480.0,
-            "50": 609364480.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 2580665856.0,
-            "2": 2810890752.0,
-            "3": 2811808256.0,
-            "4": 2811808256.0,
-            "5": 2811808256.0,
-            "6": 2811808256.0,
-            "7": 2811808256.0,
-            "8": 2811808256.0,
-            "9": 2811808256.0,
-            "10": 2811808256.0,
-            "11": 2811808256.0,
-            "12": 2811808256.0,
-            "13": 2811808256.0,
-            "14": 2811808256.0,
-            "15": 2811808256.0,
-            "16": 2811808256.0,
-            "17": 2811808256.0,
-            "18": 2811808256.0,
-            "19": 2811808256.0,
-            "20": 2811808256.0,
-            "21": 2811808256.0,
-            "22": 2811808256.0,
-            "23": 2811808256.0,
-            "24": 2811808256.0,
-            "25": 2811808256.0,
-            "26": 2811808256.0,
-            "27": 2811808256.0,
-            "28": 2811808256.0,
-            "29": 2811808256.0,
-            "30": 2811808256.0,
-            "31": 2811808256.0,
-            "32": 2811808256.0,
-            "33": 2811808256.0,
-            "34": 2811808256.0,
-            "35": 2811808256.0,
-            "36": 2811808256.0,
-            "37": 2811808256.0,
-            "38": 2811808256.0,
-            "39": 2811808256.0,
-            "40": 2811808256.0,
-            "41": 2811808256.0,
-            "42": 2811808256.0,
-            "43": 2811808256.0,
-            "44": 2811808256.0,
-            "45": 2811808256.0,
-            "46": 2811808256.0,
-            "47": 2811808256.0,
-            "48": 2811808256.0,
-            "49": 2811808256.0,
-            "50": 2811808256.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 9.118,
-            "2": 0.12375,
-            "3": 0.31133,
-            "4": 0.09209,
-            "5": 0.09124,
-            "6": 0.09155,
-            "7": 0.09163,
-            "8": 0.0915,
-            "9": 0.09161,
-            "10": 0.09407,
-            "11": 0.09038,
-            "12": 0.09031,
-            "13": 0.09069,
-            "14": 0.09024,
-            "15": 0.09043,
-            "16": 0.08996,
-            "17": 0.09133,
-            "18": 0.09072,
-            "19": 0.09048,
-            "20": 0.09016,
-            "21": 0.09061,
-            "22": 0.09073,
-            "23": 0.09098,
-            "24": 0.09135,
-            "25": 0.09235,
-            "26": 0.09059,
-            "27": 0.09009,
-            "28": 0.09049,
-            "29": 0.09147,
-            "30": 0.09097,
-            "31": 0.09098,
-            "32": 0.09045,
-            "33": 0.09082,
-            "34": 0.08994,
-            "35": 0.09054,
-            "36": 0.09124,
-            "37": 0.09063,
-            "38": 0.08989,
-            "39": 0.09234,
-            "40": 0.09165,
-            "41": 0.09179,
-            "42": 0.09165,
-            "43": 0.09235,
-            "44": 0.09147,
-            "45": 0.0922,
-            "46": 0.09192,
-            "47": 0.09138,
-            "48": 0.09278,
-            "49": 0.09145,
-            "50": 0.09175
-        }
-    }
-}
\ No newline at end of file
+{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.84528, "5": 10.87387, "10": 10.83535, "15": 10.84228, "20": 10.80167, "25": 10.71149, "30": 10.58467, "35": 10.53287, "40": 10.35652, "45": 10.13839, "50": 10.20005}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 22726236.0, "5": 22714030.0, "10": 22918036.0, "15": 22821520.0, "20": 22693656.0, "25": 22819138.0, "30": 22630790.0, "35": 22787508.0, "40": 22657590.0, "45": 22674508.0, "50": 22904058.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 611461120.0, "5": 611461120.0, "10": 611461120.0, "15": 611461120.0, "20": 611461120.0, "25": 611461120.0, "30": 611461120.0, "35": 611461120.0, "40": 611461120.0, "45": 611461120.0, "50": 611461120.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 2582211584.0, "5": 2814533632.0, "10": 2814533632.0, "15": 2814533632.0, "20": 2814533632.0, "25": 2814533632.0, "30": 2814533632.0, "35": 2814533632.0, "40": 2814533632.0, "45": 2814533632.0, "50": 2814533632.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 14.65275, "5": 0.11877, "10": 0.11712, "15": 0.11705, "20": 0.11603, "25": 0.11576, "30": 0.11652, "35": 0.11656, "40": 0.11712, "45": 0.11593, "50": 0.11726}}}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_lts.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_lts.json
deleted file mode 100644
index 57fe2806d3f..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_lts.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.88765,
-            "2": 10.90324,
-            "3": 10.87803,
-            "4": 10.87967,
-            "5": 10.91013,
-            "6": 10.91377,
-            "7": 10.89285,
-            "8": 10.88408,
-            "9": 10.89749,
-            "10": 10.87762,
-            "11": 10.88984,
-            "12": 10.89502,
-            "13": 10.90738,
-            "14": 10.90831,
-            "15": 10.86285,
-            "16": 10.85944,
-            "17": 10.8221,
-            "18": 10.84343,
-            "19": 10.83663,
-            "20": 10.74694,
-            "21": 10.73044,
-            "22": 10.62623,
-            "23": 10.74379,
-            "24": 10.63229,
-            "25": 10.59789,
-            "26": 10.64585,
-            "27": 10.64828,
-            "28": 10.59539,
-            "29": 10.60615,
-            "30": 10.40161,
-            "31": 10.18116,
-            "32": 10.49112,
-            "33": 10.48508,
-            "34": 10.25199,
-            "35": 10.30784,
-            "36": 10.25604,
-            "37": 10.36576,
-            "38": 10.21673,
-            "39": 10.40989,
-            "40": 10.11047,
-            "41": 10.15883,
-            "42": 10.22753,
-            "43": 9.8745,
-            "44": 9.97803,
-            "45": 9.8591,
-            "46": 9.8546,
-            "47": 10.15264,
-            "48": 9.86489,
-            "49": 9.5555,
-            "50": 9.92088,
-            "51": 9.86094,
-            "52": 9.75697,
-            "53": 10.07633,
-            "54": 9.96082,
-            "55": 9.88565,
-            "56": 9.6349,
-            "57": 9.4925,
-            "58": 9.83099,
-            "59": 9.59122,
-            "60": 9.50798,
-            "61": 9.7061,
-            "62": 9.98413,
-            "63": 9.37604,
-            "64": 9.77938,
-            "65": 8.95852,
-            "66": 9.70596,
-            "67": 9.37402,
-            "68": 9.78683,
-            "69": 9.78932,
-            "70": 9.72766,
-            "71": 9.61135,
-            "72": 9.59178,
-            "73": 9.49896,
-            "74": 8.95742,
-            "75": 9.42469,
-            "76": 9.09651,
-            "77": 10.06653,
-            "78": 9.73149,
-            "79": 9.37959,
-            "80": 9.40394,
-            "81": 9.48277,
-            "82": 9.69318,
-            "83": 9.31104,
-            "84": 9.4139,
-            "85": 9.61469,
-            "86": 9.07793,
-            "87": 9.59662,
-            "88": 9.74827,
-            "89": 9.60196,
-            "90": 9.81239,
-            "91": 9.34524,
-            "92": 9.36524,
-            "93": 9.07745,
-            "94": 8.83182,
-            "95": 9.521,
-            "96": 9.52525,
-            "97": 9.31322,
-            "98": 9.677,
-            "99": 8.88904,
-            "100": 9.40063
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1753.0,
-            "2": 1863.0,
-            "3": 1781.0,
-            "4": 1850.0,
-            "5": 2021.0,
-            "6": 1987.0,
-            "7": 2006.0,
-            "8": 1657.0,
-            "9": 1961.0,
-            "10": 1491.0,
-            "11": 2010.0,
-            "12": 1846.0,
-            "13": 1934.0,
-            "14": 1954.0,
-            "15": 1942.0,
-            "16": 1969.0,
-            "17": 1936.0,
-            "18": 1836.0,
-            "19": 1841.0,
-            "20": 1684.0,
-            "21": 1958.0,
-            "22": 1782.0,
-            "23": 2112.0,
-            "24": 1651.0,
-            "25": 1713.0,
-            "26": 1878.0,
-            "27": 1837.0,
-            "28": 2025.0,
-            "29": 2115.0,
-            "30": 1867.0,
-            "31": 1622.0,
-            "32": 1939.0,
-            "33": 2167.0,
-            "34": 1867.0,
-            "35": 2035.0,
-            "36": 2086.0,
-            "37": 2477.0,
-            "38": 2296.0,
-            "39": 2505.0,
-            "40": 2150.0,
-            "41": 2315.0,
-            "42": 2239.0,
-            "43": 1941.0,
-            "44": 2169.0,
-            "45": 2172.0,
-            "46": 2231.0,
-            "47": 2604.0,
-            "48": 2429.0,
-            "49": 2289.0,
-            "50": 2529.0,
-            "51": 2742.0,
-            "52": 2671.0,
-            "53": 3066.0,
-            "54": 2782.0,
-            "55": 2510.0,
-            "56": 2874.0,
-            "57": 2304.0,
-            "58": 3111.0,
-            "59": 2862.0,
-            "60": 2374.0,
-            "61": 2977.0,
-            "62": 2740.0,
-            "63": 2394.0,
-            "64": 3232.0,
-            "65": 2720.0,
-            "66": 3277.0,
-            "67": 2810.0,
-            "68": 2830.0,
-            "69": 3094.0,
-            "70": 3327.0,
-            "71": 3106.0,
-            "72": 2261.0,
-            "73": 3147.0,
-            "74": 1902.0,
-            "75": 2545.0,
-            "76": 2905.0,
-            "77": 3468.0,
-            "78": 3432.0,
-            "79": 3336.0,
-            "80": 3434.0,
-            "81": 3605.0,
-            "82": 3269.0,
-            "83": 2891.0,
-            "84": 3343.0,
-            "85": 3501.0,
-            "86": 2786.0,
-            "87": 3872.0,
-            "88": 3019.0,
-            "89": 3407.0,
-            "90": 3023.0,
-            "91": 2630.0,
-            "92": 3186.0,
-            "93": 2746.0,
-            "94": 3526.0,
-            "95": 3414.0,
-            "96": 3546.0,
-            "97": 3339.0,
-            "98": 3758.0,
-            "99": 3058.0,
-            "100": 3454.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 462388736.0,
-            "2": 462388736.0,
-            "3": 462388736.0,
-            "4": 462388736.0,
-            "5": 462388736.0,
-            "6": 462388736.0,
-            "7": 462388736.0,
-            "8": 462388736.0,
-            "9": 462388736.0,
-            "10": 462388736.0,
-            "11": 462388736.0,
-            "12": 462388736.0,
-            "13": 462388736.0,
-            "14": 462388736.0,
-            "15": 462388736.0,
-            "16": 462388736.0,
-            "17": 462388736.0,
-            "18": 462388736.0,
-            "19": 462388736.0,
-            "20": 462388736.0,
-            "21": 462388736.0,
-            "22": 462388736.0,
-            "23": 462388736.0,
-            "24": 462388736.0,
-            "25": 462388736.0,
-            "26": 462388736.0,
-            "27": 462388736.0,
-            "28": 462388736.0,
-            "29": 462388736.0,
-            "30": 462388736.0,
-            "31": 462388736.0,
-            "32": 462388736.0,
-            "33": 462388736.0,
-            "34": 462388736.0,
-            "35": 462388736.0,
-            "36": 462388736.0,
-            "37": 462388736.0,
-            "38": 462388736.0,
-            "39": 462388736.0,
-            "40": 462388736.0,
-            "41": 462388736.0,
-            "42": 462388736.0,
-            "43": 462388736.0,
-            "44": 462388736.0,
-            "45": 462388736.0,
-            "46": 462388736.0,
-            "47": 462388736.0,
-            "48": 462388736.0,
-            "49": 462388736.0,
-            "50": 462388736.0,
-            "51": 462388736.0,
-            "52": 462388736.0,
-            "53": 462388736.0,
-            "54": 462388736.0,
-            "55": 462388736.0,
-            "56": 462388736.0,
-            "57": 462388736.0,
-            "58": 462388736.0,
-            "59": 462388736.0,
-            "60": 462388736.0,
-            "61": 462388736.0,
-            "62": 462388736.0,
-            "63": 462388736.0,
-            "64": 462388736.0,
-            "65": 462388736.0,
-            "66": 462388736.0,
-            "67": 462388736.0,
-            "68": 462388736.0,
-            "69": 462388736.0,
-            "70": 462388736.0,
-            "71": 462388736.0,
-            "72": 462388736.0,
-            "73": 462388736.0,
-            "74": 462388736.0,
-            "75": 462388736.0,
-            "76": 462388736.0,
-            "77": 462388736.0,
-            "78": 462388736.0,
-            "79": 462388736.0,
-            "80": 462388736.0,
-            "81": 462388736.0,
-            "82": 462388736.0,
-            "83": 462388736.0,
-            "84": 462388736.0,
-            "85": 462388736.0,
-            "86": 462388736.0,
-            "87": 462388736.0,
-            "88": 462388736.0,
-            "89": 462388736.0,
-            "90": 462388736.0,
-            "91": 462388736.0,
-            "92": 462388736.0,
-            "93": 462388736.0,
-            "94": 462388736.0,
-            "95": 462388736.0,
-            "96": 462388736.0,
-            "97": 462388736.0,
-            "98": 462388736.0,
-            "99": 462388736.0,
-            "100": 462388736.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1549081088.0,
-            "2": 1677508096.0,
-            "3": 1677508096.0,
-            "4": 1677508096.0,
-            "5": 1677508096.0,
-            "6": 1677508096.0,
-            "7": 1677508096.0,
-            "8": 1677508096.0,
-            "9": 1677508096.0,
-            "10": 1677508096.0,
-            "11": 1677508096.0,
-            "12": 1677508096.0,
-            "13": 1677508096.0,
-            "14": 1677508096.0,
-            "15": 1677508096.0,
-            "16": 1677508096.0,
-            "17": 1677508096.0,
-            "18": 1677508096.0,
-            "19": 1677508096.0,
-            "20": 1677508096.0,
-            "21": 1677508096.0,
-            "22": 1677508096.0,
-            "23": 1677508096.0,
-            "24": 1677508096.0,
-            "25": 1677508096.0,
-            "26": 1677508096.0,
-            "27": 1677508096.0,
-            "28": 1677508096.0,
-            "29": 1677508096.0,
-            "30": 1677508096.0,
-            "31": 1677508096.0,
-            "32": 1677508096.0,
-            "33": 1677508096.0,
-            "34": 1677508096.0,
-            "35": 1677508096.0,
-            "36": 1677508096.0,
-            "37": 1677508096.0,
-            "38": 1677508096.0,
-            "39": 1677508096.0,
-            "40": 1677508096.0,
-            "41": 1677508096.0,
-            "42": 1677508096.0,
-            "43": 1677508096.0,
-            "44": 1677508096.0,
-            "45": 1677508096.0,
-            "46": 1677508096.0,
-            "47": 1677508096.0,
-            "48": 1677508096.0,
-            "49": 1677508096.0,
-            "50": 1677508096.0,
-            "51": 1677508096.0,
-            "52": 1677508096.0,
-            "53": 1677508096.0,
-            "54": 1677508096.0,
-            "55": 1677508096.0,
-            "56": 1677508096.0,
-            "57": 1677508096.0,
-            "58": 1677508096.0,
-            "59": 1677508096.0,
-            "60": 1677508096.0,
-            "61": 1677508096.0,
-            "62": 1677508096.0,
-            "63": 1677508096.0,
-            "64": 1677508096.0,
-            "65": 1677508096.0,
-            "66": 1677508096.0,
-            "67": 1677508096.0,
-            "68": 1677508096.0,
-            "69": 1677508096.0,
-            "70": 1677508096.0,
-            "71": 1677508096.0,
-            "72": 1677508096.0,
-            "73": 1677508096.0,
-            "74": 1677508096.0,
-            "75": 1677508096.0,
-            "76": 1677508096.0,
-            "77": 1677508096.0,
-            "78": 1677508096.0,
-            "79": 1677508096.0,
-            "80": 1677508096.0,
-            "81": 1677508096.0,
-            "82": 1677508096.0,
-            "83": 1677508096.0,
-            "84": 1677508096.0,
-            "85": 1677508096.0,
-            "86": 1677508096.0,
-            "87": 1677508096.0,
-            "88": 1677508096.0,
-            "89": 1677508096.0,
-            "90": 1677508096.0,
-            "91": 1677508096.0,
-            "92": 1677508096.0,
-            "93": 1677508096.0,
-            "94": 1677508096.0,
-            "95": 1677508096.0,
-            "96": 1677508096.0,
-            "97": 1677508096.0,
-            "98": 1677508096.0,
-            "99": 1677508096.0,
-            "100": 1677508096.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 5.74038,
-            "2": 0.18941,
-            "3": 0.17443,
-            "4": 0.17279,
-            "5": 0.17221,
-            "6": 0.17285,
-            "7": 0.17288,
-            "8": 0.17181,
-            "9": 0.168,
-            "10": 0.16829,
-            "11": 0.16793,
-            "12": 0.16785,
-            "13": 0.16712,
-            "14": 0.17584,
-            "15": 0.16768,
-            "16": 0.16694,
-            "17": 0.16606,
-            "18": 0.16539,
-            "19": 0.16505,
-            "20": 0.16548,
-            "21": 0.16486,
-            "22": 0.16481,
-            "23": 0.1694,
-            "24": 0.16549,
-            "25": 0.17312,
-            "26": 0.16555,
-            "27": 0.17362,
-            "28": 0.16604,
-            "29": 0.1708,
-            "30": 0.16752,
-            "31": 0.16804,
-            "32": 0.16714,
-            "33": 0.16525,
-            "34": 0.16728,
-            "35": 0.16428,
-            "36": 0.17043,
-            "37": 0.16532,
-            "38": 0.16525,
-            "39": 0.16555,
-            "40": 0.16531,
-            "41": 0.16563,
-            "42": 0.16563,
-            "43": 0.16499,
-            "44": 0.16632,
-            "45": 0.16537,
-            "46": 0.16563,
-            "47": 0.16529,
-            "48": 0.1657,
-            "49": 0.16462,
-            "50": 0.16554,
-            "51": 0.17653,
-            "52": 0.17191,
-            "53": 0.17217,
-            "54": 0.17273,
-            "55": 0.17175,
-            "56": 0.1728,
-            "57": 0.17152,
-            "58": 0.17298,
-            "59": 0.17049,
-            "60": 0.16917,
-            "61": 0.16777,
-            "62": 0.16726,
-            "63": 0.16773,
-            "64": 0.16713,
-            "65": 0.16836,
-            "66": 0.1678,
-            "67": 0.1679,
-            "68": 0.16772,
-            "69": 0.16815,
-            "70": 0.16816,
-            "71": 0.16728,
-            "72": 0.16778,
-            "73": 0.16743,
-            "74": 0.16783,
-            "75": 0.16824,
-            "76": 0.16759,
-            "77": 0.16828,
-            "78": 0.16749,
-            "79": 0.16757,
-            "80": 0.16781,
-            "81": 0.16773,
-            "82": 0.16701,
-            "83": 0.16756,
-            "84": 0.16842,
-            "85": 0.16707,
-            "86": 0.16752,
-            "87": 0.16807,
-            "88": 0.17901,
-            "89": 0.17301,
-            "90": 0.1702,
-            "91": 0.17493,
-            "92": 0.17105,
-            "93": 0.16969,
-            "94": 0.16931,
-            "95": 0.17031,
-            "96": 0.16969,
-            "97": 0.17032,
-            "98": 0.17059,
-            "99": 0.17,
-            "100": 0.17572
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_lts_dgx_a100.json
index 3be93706d81..053ded61519 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_lts_dgx_a100.json
@@ -1,537 +1 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.88765,
-            "2": 10.90324,
-            "3": 10.87803,
-            "4": 10.87967,
-            "5": 10.91013,
-            "6": 10.91377,
-            "7": 10.89285,
-            "8": 10.88408,
-            "9": 10.89749,
-            "10": 10.87762,
-            "11": 10.88984,
-            "12": 10.89502,
-            "13": 10.90738,
-            "14": 10.90831,
-            "15": 10.86285,
-            "16": 10.85944,
-            "17": 10.8221,
-            "18": 10.84343,
-            "19": 10.83663,
-            "20": 10.74694,
-            "21": 10.73044,
-            "22": 10.62623,
-            "23": 10.74379,
-            "24": 10.63229,
-            "25": 10.59789,
-            "26": 10.64585,
-            "27": 10.64828,
-            "28": 10.59539,
-            "29": 10.60615,
-            "30": 10.40161,
-            "31": 10.18116,
-            "32": 10.49112,
-            "33": 10.48508,
-            "34": 10.25199,
-            "35": 10.30784,
-            "36": 10.25604,
-            "37": 10.36576,
-            "38": 10.21673,
-            "39": 10.40989,
-            "40": 10.11047,
-            "41": 10.15883,
-            "42": 10.22753,
-            "43": 9.8745,
-            "44": 9.97803,
-            "45": 9.8591,
-            "46": 9.8546,
-            "47": 10.15264,
-            "48": 9.86489,
-            "49": 9.5555,
-            "50": 9.92088,
-            "51": 9.86094,
-            "52": 9.75697,
-            "53": 10.07633,
-            "54": 9.96082,
-            "55": 9.88565,
-            "56": 9.6349,
-            "57": 9.4925,
-            "58": 9.83099,
-            "59": 9.59122,
-            "60": 9.50798,
-            "61": 9.7061,
-            "62": 9.98413,
-            "63": 9.37604,
-            "64": 9.77938,
-            "65": 8.95852,
-            "66": 9.70596,
-            "67": 9.37402,
-            "68": 9.78683,
-            "69": 9.78932,
-            "70": 9.72766,
-            "71": 9.61135,
-            "72": 9.59178,
-            "73": 9.49896,
-            "74": 8.95742,
-            "75": 9.42469,
-            "76": 9.09651,
-            "77": 10.06653,
-            "78": 9.73149,
-            "79": 9.37959,
-            "80": 9.40394,
-            "81": 9.48277,
-            "82": 9.69318,
-            "83": 9.31104,
-            "84": 9.4139,
-            "85": 9.61469,
-            "86": 9.07793,
-            "87": 9.59662,
-            "88": 9.74827,
-            "89": 9.60196,
-            "90": 9.81239,
-            "91": 9.34524,
-            "92": 9.36524,
-            "93": 9.07745,
-            "94": 8.83182,
-            "95": 9.521,
-            "96": 9.52525,
-            "97": 9.31322,
-            "98": 9.677,
-            "99": 8.88904,
-            "100": 9.40063
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1753.0,
-            "2": 1863.0,
-            "3": 1781.0,
-            "4": 1850.0,
-            "5": 2021.0,
-            "6": 1987.0,
-            "7": 2006.0,
-            "8": 1657.0,
-            "9": 1961.0,
-            "10": 1491.0,
-            "11": 2010.0,
-            "12": 1846.0,
-            "13": 1934.0,
-            "14": 1954.0,
-            "15": 1942.0,
-            "16": 1969.0,
-            "17": 1936.0,
-            "18": 1836.0,
-            "19": 1841.0,
-            "20": 1684.0,
-            "21": 1958.0,
-            "22": 1782.0,
-            "23": 2112.0,
-            "24": 1651.0,
-            "25": 1713.0,
-            "26": 1878.0,
-            "27": 1837.0,
-            "28": 2025.0,
-            "29": 2115.0,
-            "30": 1867.0,
-            "31": 1622.0,
-            "32": 1939.0,
-            "33": 2167.0,
-            "34": 1867.0,
-            "35": 2035.0,
-            "36": 2086.0,
-            "37": 2477.0,
-            "38": 2296.0,
-            "39": 2505.0,
-            "40": 2150.0,
-            "41": 2315.0,
-            "42": 2239.0,
-            "43": 1941.0,
-            "44": 2169.0,
-            "45": 2172.0,
-            "46": 2231.0,
-            "47": 2604.0,
-            "48": 2429.0,
-            "49": 2289.0,
-            "50": 2529.0,
-            "51": 2742.0,
-            "52": 2671.0,
-            "53": 3066.0,
-            "54": 2782.0,
-            "55": 2510.0,
-            "56": 2874.0,
-            "57": 2304.0,
-            "58": 3111.0,
-            "59": 2862.0,
-            "60": 2374.0,
-            "61": 2977.0,
-            "62": 2740.0,
-            "63": 2394.0,
-            "64": 3232.0,
-            "65": 2720.0,
-            "66": 3277.0,
-            "67": 2810.0,
-            "68": 2830.0,
-            "69": 3094.0,
-            "70": 3327.0,
-            "71": 3106.0,
-            "72": 2261.0,
-            "73": 3147.0,
-            "74": 1902.0,
-            "75": 2545.0,
-            "76": 2905.0,
-            "77": 3468.0,
-            "78": 3432.0,
-            "79": 3336.0,
-            "80": 3434.0,
-            "81": 3605.0,
-            "82": 3269.0,
-            "83": 2891.0,
-            "84": 3343.0,
-            "85": 3501.0,
-            "86": 2786.0,
-            "87": 3872.0,
-            "88": 3019.0,
-            "89": 3407.0,
-            "90": 3023.0,
-            "91": 2630.0,
-            "92": 3186.0,
-            "93": 2746.0,
-            "94": 3526.0,
-            "95": 3414.0,
-            "96": 3546.0,
-            "97": 3339.0,
-            "98": 3758.0,
-            "99": 3058.0,
-            "100": 3454.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 462388736.0,
-            "2": 462388736.0,
-            "3": 462388736.0,
-            "4": 462388736.0,
-            "5": 462388736.0,
-            "6": 462388736.0,
-            "7": 462388736.0,
-            "8": 462388736.0,
-            "9": 462388736.0,
-            "10": 462388736.0,
-            "11": 462388736.0,
-            "12": 462388736.0,
-            "13": 462388736.0,
-            "14": 462388736.0,
-            "15": 462388736.0,
-            "16": 462388736.0,
-            "17": 462388736.0,
-            "18": 462388736.0,
-            "19": 462388736.0,
-            "20": 462388736.0,
-            "21": 462388736.0,
-            "22": 462388736.0,
-            "23": 462388736.0,
-            "24": 462388736.0,
-            "25": 462388736.0,
-            "26": 462388736.0,
-            "27": 462388736.0,
-            "28": 462388736.0,
-            "29": 462388736.0,
-            "30": 462388736.0,
-            "31": 462388736.0,
-            "32": 462388736.0,
-            "33": 462388736.0,
-            "34": 462388736.0,
-            "35": 462388736.0,
-            "36": 462388736.0,
-            "37": 462388736.0,
-            "38": 462388736.0,
-            "39": 462388736.0,
-            "40": 462388736.0,
-            "41": 462388736.0,
-            "42": 462388736.0,
-            "43": 462388736.0,
-            "44": 462388736.0,
-            "45": 462388736.0,
-            "46": 462388736.0,
-            "47": 462388736.0,
-            "48": 462388736.0,
-            "49": 462388736.0,
-            "50": 462388736.0,
-            "51": 462388736.0,
-            "52": 462388736.0,
-            "53": 462388736.0,
-            "54": 462388736.0,
-            "55": 462388736.0,
-            "56": 462388736.0,
-            "57": 462388736.0,
-            "58": 462388736.0,
-            "59": 462388736.0,
-            "60": 462388736.0,
-            "61": 462388736.0,
-            "62": 462388736.0,
-            "63": 462388736.0,
-            "64": 462388736.0,
-            "65": 462388736.0,
-            "66": 462388736.0,
-            "67": 462388736.0,
-            "68": 462388736.0,
-            "69": 462388736.0,
-            "70": 462388736.0,
-            "71": 462388736.0,
-            "72": 462388736.0,
-            "73": 462388736.0,
-            "74": 462388736.0,
-            "75": 462388736.0,
-            "76": 462388736.0,
-            "77": 462388736.0,
-            "78": 462388736.0,
-            "79": 462388736.0,
-            "80": 462388736.0,
-            "81": 462388736.0,
-            "82": 462388736.0,
-            "83": 462388736.0,
-            "84": 462388736.0,
-            "85": 462388736.0,
-            "86": 462388736.0,
-            "87": 462388736.0,
-            "88": 462388736.0,
-            "89": 462388736.0,
-            "90": 462388736.0,
-            "91": 462388736.0,
-            "92": 462388736.0,
-            "93": 462388736.0,
-            "94": 462388736.0,
-            "95": 462388736.0,
-            "96": 462388736.0,
-            "97": 462388736.0,
-            "98": 462388736.0,
-            "99": 462388736.0,
-            "100": 462388736.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1549081088.0,
-            "2": 1677508096.0,
-            "3": 1677508096.0,
-            "4": 1677508096.0,
-            "5": 1677508096.0,
-            "6": 1677508096.0,
-            "7": 1677508096.0,
-            "8": 1677508096.0,
-            "9": 1677508096.0,
-            "10": 1677508096.0,
-            "11": 1677508096.0,
-            "12": 1677508096.0,
-            "13": 1677508096.0,
-            "14": 1677508096.0,
-            "15": 1677508096.0,
-            "16": 1677508096.0,
-            "17": 1677508096.0,
-            "18": 1677508096.0,
-            "19": 1677508096.0,
-            "20": 1677508096.0,
-            "21": 1677508096.0,
-            "22": 1677508096.0,
-            "23": 1677508096.0,
-            "24": 1677508096.0,
-            "25": 1677508096.0,
-            "26": 1677508096.0,
-            "27": 1677508096.0,
-            "28": 1677508096.0,
-            "29": 1677508096.0,
-            "30": 1677508096.0,
-            "31": 1677508096.0,
-            "32": 1677508096.0,
-            "33": 1677508096.0,
-            "34": 1677508096.0,
-            "35": 1677508096.0,
-            "36": 1677508096.0,
-            "37": 1677508096.0,
-            "38": 1677508096.0,
-            "39": 1677508096.0,
-            "40": 1677508096.0,
-            "41": 1677508096.0,
-            "42": 1677508096.0,
-            "43": 1677508096.0,
-            "44": 1677508096.0,
-            "45": 1677508096.0,
-            "46": 1677508096.0,
-            "47": 1677508096.0,
-            "48": 1677508096.0,
-            "49": 1677508096.0,
-            "50": 1677508096.0,
-            "51": 1677508096.0,
-            "52": 1677508096.0,
-            "53": 1677508096.0,
-            "54": 1677508096.0,
-            "55": 1677508096.0,
-            "56": 1677508096.0,
-            "57": 1677508096.0,
-            "58": 1677508096.0,
-            "59": 1677508096.0,
-            "60": 1677508096.0,
-            "61": 1677508096.0,
-            "62": 1677508096.0,
-            "63": 1677508096.0,
-            "64": 1677508096.0,
-            "65": 1677508096.0,
-            "66": 1677508096.0,
-            "67": 1677508096.0,
-            "68": 1677508096.0,
-            "69": 1677508096.0,
-            "70": 1677508096.0,
-            "71": 1677508096.0,
-            "72": 1677508096.0,
-            "73": 1677508096.0,
-            "74": 1677508096.0,
-            "75": 1677508096.0,
-            "76": 1677508096.0,
-            "77": 1677508096.0,
-            "78": 1677508096.0,
-            "79": 1677508096.0,
-            "80": 1677508096.0,
-            "81": 1677508096.0,
-            "82": 1677508096.0,
-            "83": 1677508096.0,
-            "84": 1677508096.0,
-            "85": 1677508096.0,
-            "86": 1677508096.0,
-            "87": 1677508096.0,
-            "88": 1677508096.0,
-            "89": 1677508096.0,
-            "90": 1677508096.0,
-            "91": 1677508096.0,
-            "92": 1677508096.0,
-            "93": 1677508096.0,
-            "94": 1677508096.0,
-            "95": 1677508096.0,
-            "96": 1677508096.0,
-            "97": 1677508096.0,
-            "98": 1677508096.0,
-            "99": 1677508096.0,
-            "100": 1677508096.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 9.17153,
-            "2": 0.2103,
-            "3": 0.21541,
-            "4": 0.21948,
-            "5": 0.17282,
-            "6": 0.16921,
-            "7": 0.1711,
-            "8": 0.16967,
-            "9": 0.17064,
-            "10": 0.16972,
-            "11": 0.1696,
-            "12": 0.1701,
-            "13": 0.16923,
-            "14": 0.16942,
-            "15": 0.16782,
-            "16": 0.17,
-            "17": 0.16748,
-            "18": 0.16821,
-            "19": 0.16739,
-            "20": 0.16883,
-            "21": 0.16894,
-            "22": 0.16847,
-            "23": 0.16846,
-            "24": 0.16887,
-            "25": 0.16905,
-            "26": 0.16873,
-            "27": 0.16876,
-            "28": 0.16868,
-            "29": 0.1706,
-            "30": 0.17379,
-            "31": 0.17109,
-            "32": 0.17107,
-            "33": 0.17072,
-            "34": 0.17137,
-            "35": 0.17105,
-            "36": 0.17106,
-            "37": 0.17077,
-            "38": 0.17115,
-            "39": 0.17067,
-            "40": 0.17057,
-            "41": 0.17099,
-            "42": 0.17074,
-            "43": 0.17091,
-            "44": 0.17078,
-            "45": 0.17104,
-            "46": 0.17055,
-            "47": 0.17137,
-            "48": 0.17086,
-            "49": 0.17081,
-            "50": 0.17053,
-            "51": 0.17448,
-            "52": 0.16607,
-            "53": 0.16686,
-            "54": 0.16608,
-            "55": 0.16654,
-            "56": 0.16591,
-            "57": 0.16614,
-            "58": 0.1659,
-            "59": 0.16577,
-            "60": 0.16589,
-            "61": 0.16557,
-            "62": 0.16528,
-            "63": 0.16612,
-            "64": 0.1658,
-            "65": 0.16543,
-            "66": 0.1651,
-            "67": 0.16559,
-            "68": 0.16502,
-            "69": 0.16533,
-            "70": 0.16636,
-            "71": 0.16516,
-            "72": 0.1657,
-            "73": 0.1656,
-            "74": 0.16521,
-            "75": 0.16623,
-            "76": 0.16628,
-            "77": 0.16593,
-            "78": 0.16615,
-            "79": 0.1658,
-            "80": 0.16904,
-            "81": 0.16665,
-            "82": 0.16575,
-            "83": 0.16623,
-            "84": 0.16603,
-            "85": 0.16577,
-            "86": 0.16568,
-            "87": 0.16525,
-            "88": 0.16531,
-            "89": 0.16616,
-            "90": 0.16544,
-            "91": 0.16581,
-            "92": 0.16545,
-            "93": 0.16603,
-            "94": 0.16501,
-            "95": 0.16632,
-            "96": 0.16545,
-            "97": 0.16577,
-            "98": 0.19996,
-            "99": 0.19154,
-            "100": 0.19156
-        }
-    }
-}
\ No newline at end of file
+{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.88734, "5": 10.90948, "10": 10.87763, "15": 10.86371, "20": 10.7509, "25": 10.59917, "30": 10.40104, "35": 10.30793, "40": 10.10902, "45": 9.85831, "50": 9.92111, "55": 9.88529, "60": 9.50737, "65": 8.95828, "70": 9.72733, "75": 9.42571, "80": 9.40559, "85": 9.61572, "90": 9.81277, "95": 9.52119, "100": 9.40111}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1614.0, "5": 2021.0, "10": 1399.0, "15": 1872.0, "20": 1692.0, "25": 1784.0, "30": 1900.0, "35": 2081.0, "40": 2233.0, "45": 2144.0, "50": 2513.0, "55": 2493.0, "60": 2440.0, "65": 2763.0, "70": 3219.0, "75": 2601.0, "80": 3341.0, "85": 3433.0, "90": 3019.0, "95": 3417.0, "100": 3372.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 462455808.0, "5": 462455808.0, "10": 462455808.0, "15": 462455808.0, "20": 462455808.0, "25": 462455808.0, "30": 462455808.0, "35": 462455808.0, "40": 462455808.0, "45": 462455808.0, "50": 462455808.0, "55": 462455808.0, "60": 462455808.0, "65": 462455808.0, "70": 462455808.0, "75": 462455808.0, "80": 462455808.0, "85": 462455808.0, "90": 462455808.0, "95": 462455808.0, "100": 462455808.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 2529822720.0, "5": 2658249728.0, "10": 2658249728.0, "15": 2658249728.0, "20": 2658249728.0, "25": 2658249728.0, "30": 2658249728.0, "35": 2658249728.0, "40": 2658249728.0, "45": 2658249728.0, "50": 2658249728.0, "55": 2658249728.0, "60": 2658249728.0, "65": 2658249728.0, "70": 2658249728.0, "75": 2658249728.0, "80": 2658249728.0, "85": 2658249728.0, "90": 2658249728.0, "95": 2658249728.0, "100": 2658249728.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 11.76278, "5": 0.18711, "10": 0.18109, "15": 0.18221, "20": 0.18082, "25": 0.18031, "30": 0.18054, "35": 0.17683, "40": 0.17853, "45": 0.17695, "50": 0.17851, "55": 0.1773, "60": 0.17609, "65": 0.18072, "70": 0.18082, "75": 0.17652, "80": 0.17661, "85": 0.18002, "90": 0.17586, "95": 0.17539, "100": 0.17485}}}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_lts.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_lts.json
deleted file mode 100644
index 1cf5408b8ba..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_lts.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.92228,
-            "2": 10.92833,
-            "3": 10.91713,
-            "4": 10.90495,
-            "5": 10.92807,
-            "6": 10.93673,
-            "7": 10.90406,
-            "8": 10.92229,
-            "9": 10.91254,
-            "10": 10.90852,
-            "11": 10.89335,
-            "12": 10.9208,
-            "13": 10.91496,
-            "14": 10.92145,
-            "15": 10.88435,
-            "16": 10.87455,
-            "17": 10.83916,
-            "18": 10.87309,
-            "19": 10.85325,
-            "20": 10.77493,
-            "21": 10.74754,
-            "22": 10.63148,
-            "23": 10.75622,
-            "24": 10.65569,
-            "25": 10.59216,
-            "26": 10.65327,
-            "27": 10.64876,
-            "28": 10.59656,
-            "29": 10.61011,
-            "30": 10.39286,
-            "31": 10.15725,
-            "32": 10.49217,
-            "33": 10.47941,
-            "34": 10.24014,
-            "35": 10.2971,
-            "36": 10.24566,
-            "37": 10.35283,
-            "38": 10.20534,
-            "39": 10.40418,
-            "40": 10.09553,
-            "41": 10.15279,
-            "42": 10.2188,
-            "43": 9.85527,
-            "44": 9.96244,
-            "45": 9.84613,
-            "46": 9.83799,
-            "47": 10.13884,
-            "48": 9.85689,
-            "49": 9.53747,
-            "50": 9.90876,
-            "51": 9.84971,
-            "52": 9.74156,
-            "53": 10.06322,
-            "54": 9.94581,
-            "55": 9.87731,
-            "56": 9.62746,
-            "57": 9.47259,
-            "58": 9.82912,
-            "59": 9.583,
-            "60": 9.49181,
-            "61": 9.69961,
-            "62": 9.98089,
-            "63": 9.37212,
-            "64": 9.7756,
-            "65": 8.9433,
-            "66": 9.69993,
-            "67": 9.36414,
-            "68": 9.78706,
-            "69": 9.78397,
-            "70": 9.72288,
-            "71": 9.60749,
-            "72": 9.58416,
-            "73": 9.49093,
-            "74": 8.94864,
-            "75": 9.41807,
-            "76": 9.08721,
-            "77": 10.06283,
-            "78": 9.729,
-            "79": 9.37091,
-            "80": 9.40033,
-            "81": 9.47754,
-            "82": 9.69121,
-            "83": 9.30762,
-            "84": 9.41252,
-            "85": 9.61132,
-            "86": 9.07621,
-            "87": 9.59459,
-            "88": 9.74768,
-            "89": 9.6068,
-            "90": 9.81078,
-            "91": 9.34441,
-            "92": 9.36535,
-            "93": 9.07743,
-            "94": 8.82975,
-            "95": 9.51676,
-            "96": 9.52546,
-            "97": 9.31031,
-            "98": 9.67812,
-            "99": 8.88848,
-            "100": 9.40128
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1681.0,
-            "2": 1822.0,
-            "3": 1757.0,
-            "4": 1751.0,
-            "5": 1937.0,
-            "6": 1886.0,
-            "7": 1837.0,
-            "8": 1786.0,
-            "9": 1935.0,
-            "10": 1542.0,
-            "11": 1896.0,
-            "12": 1808.0,
-            "13": 1960.0,
-            "14": 1846.0,
-            "15": 1948.0,
-            "16": 1903.0,
-            "17": 1900.0,
-            "18": 1725.0,
-            "19": 1812.0,
-            "20": 1647.0,
-            "21": 1886.0,
-            "22": 1675.0,
-            "23": 1992.0,
-            "24": 1621.0,
-            "25": 1624.0,
-            "26": 1778.0,
-            "27": 1925.0,
-            "28": 1996.0,
-            "29": 2074.0,
-            "30": 1899.0,
-            "31": 1539.0,
-            "32": 1956.0,
-            "33": 2254.0,
-            "34": 1927.0,
-            "35": 2029.0,
-            "36": 1986.0,
-            "37": 2377.0,
-            "38": 2234.0,
-            "39": 2396.0,
-            "40": 2123.0,
-            "41": 2316.0,
-            "42": 2245.0,
-            "43": 2077.0,
-            "44": 2179.0,
-            "45": 2078.0,
-            "46": 2280.0,
-            "47": 2573.0,
-            "48": 2440.0,
-            "49": 2213.0,
-            "50": 2532.0,
-            "51": 2735.0,
-            "52": 2607.0,
-            "53": 2951.0,
-            "54": 2672.0,
-            "55": 2451.0,
-            "56": 2712.0,
-            "57": 2392.0,
-            "58": 2979.0,
-            "59": 2869.0,
-            "60": 2435.0,
-            "61": 2938.0,
-            "62": 2669.0,
-            "63": 2392.0,
-            "64": 2998.0,
-            "65": 2689.0,
-            "66": 3285.0,
-            "67": 2782.0,
-            "68": 2753.0,
-            "69": 2958.0,
-            "70": 3271.0,
-            "71": 3040.0,
-            "72": 2504.0,
-            "73": 3096.0,
-            "74": 1910.0,
-            "75": 2617.0,
-            "76": 3081.0,
-            "77": 3390.0,
-            "78": 3186.0,
-            "79": 3320.0,
-            "80": 3483.0,
-            "81": 3782.0,
-            "82": 3516.0,
-            "83": 2864.0,
-            "84": 3396.0,
-            "85": 3247.0,
-            "86": 2785.0,
-            "87": 3762.0,
-            "88": 3102.0,
-            "89": 3483.0,
-            "90": 3076.0,
-            "91": 2643.0,
-            "92": 3198.0,
-            "93": 2666.0,
-            "94": 3390.0,
-            "95": 3410.0,
-            "96": 3508.0,
-            "97": 3178.0,
-            "98": 3865.0,
-            "99": 3143.0,
-            "100": 3357.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 487096832.0,
-            "2": 487096832.0,
-            "3": 487096832.0,
-            "4": 487096832.0,
-            "5": 487096832.0,
-            "6": 487096832.0,
-            "7": 487096832.0,
-            "8": 487096832.0,
-            "9": 487096832.0,
-            "10": 487096832.0,
-            "11": 487096832.0,
-            "12": 487096832.0,
-            "13": 487096832.0,
-            "14": 487096832.0,
-            "15": 487096832.0,
-            "16": 487096832.0,
-            "17": 487096832.0,
-            "18": 487096832.0,
-            "19": 487096832.0,
-            "20": 487096832.0,
-            "21": 487096832.0,
-            "22": 487096832.0,
-            "23": 487096832.0,
-            "24": 487096832.0,
-            "25": 487096832.0,
-            "26": 487096832.0,
-            "27": 487096832.0,
-            "28": 487096832.0,
-            "29": 487096832.0,
-            "30": 487096832.0,
-            "31": 487096832.0,
-            "32": 487096832.0,
-            "33": 487096832.0,
-            "34": 487096832.0,
-            "35": 487096832.0,
-            "36": 487096832.0,
-            "37": 487096832.0,
-            "38": 487096832.0,
-            "39": 487096832.0,
-            "40": 487096832.0,
-            "41": 487096832.0,
-            "42": 487096832.0,
-            "43": 487096832.0,
-            "44": 487096832.0,
-            "45": 487096832.0,
-            "46": 487096832.0,
-            "47": 487096832.0,
-            "48": 487096832.0,
-            "49": 487096832.0,
-            "50": 487096832.0,
-            "51": 487096832.0,
-            "52": 487096832.0,
-            "53": 487096832.0,
-            "54": 487096832.0,
-            "55": 487096832.0,
-            "56": 487096832.0,
-            "57": 487096832.0,
-            "58": 487096832.0,
-            "59": 487096832.0,
-            "60": 487096832.0,
-            "61": 487096832.0,
-            "62": 487096832.0,
-            "63": 487096832.0,
-            "64": 487096832.0,
-            "65": 487096832.0,
-            "66": 487096832.0,
-            "67": 487096832.0,
-            "68": 487096832.0,
-            "69": 487096832.0,
-            "70": 487096832.0,
-            "71": 487096832.0,
-            "72": 487096832.0,
-            "73": 487096832.0,
-            "74": 487096832.0,
-            "75": 487096832.0,
-            "76": 487096832.0,
-            "77": 487096832.0,
-            "78": 487096832.0,
-            "79": 487096832.0,
-            "80": 487096832.0,
-            "81": 487096832.0,
-            "82": 487096832.0,
-            "83": 487096832.0,
-            "84": 487096832.0,
-            "85": 487096832.0,
-            "86": 487096832.0,
-            "87": 487096832.0,
-            "88": 487096832.0,
-            "89": 487096832.0,
-            "90": 487096832.0,
-            "91": 487096832.0,
-            "92": 487096832.0,
-            "93": 487096832.0,
-            "94": 487096832.0,
-            "95": 487096832.0,
-            "96": 487096832.0,
-            "97": 487096832.0,
-            "98": 487096832.0,
-            "99": 487096832.0,
-            "100": 487096832.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1228699136.0,
-            "2": 1409821184.0,
-            "3": 1409821184.0,
-            "4": 1409821184.0,
-            "5": 1409821184.0,
-            "6": 1409821184.0,
-            "7": 1409821184.0,
-            "8": 1409821184.0,
-            "9": 1409821184.0,
-            "10": 1409821184.0,
-            "11": 1409821184.0,
-            "12": 1409821184.0,
-            "13": 1409821184.0,
-            "14": 1409821184.0,
-            "15": 1409821184.0,
-            "16": 1409821184.0,
-            "17": 1409821184.0,
-            "18": 1409821184.0,
-            "19": 1409821184.0,
-            "20": 1409821184.0,
-            "21": 1409821184.0,
-            "22": 1409821184.0,
-            "23": 1409821184.0,
-            "24": 1409821184.0,
-            "25": 1409821184.0,
-            "26": 1409821184.0,
-            "27": 1409821184.0,
-            "28": 1409821184.0,
-            "29": 1409821184.0,
-            "30": 1409821184.0,
-            "31": 1409821184.0,
-            "32": 1409821184.0,
-            "33": 1409821184.0,
-            "34": 1409821184.0,
-            "35": 1409821184.0,
-            "36": 1409821184.0,
-            "37": 1409821184.0,
-            "38": 1409821184.0,
-            "39": 1409821184.0,
-            "40": 1409821184.0,
-            "41": 1409821184.0,
-            "42": 1409821184.0,
-            "43": 1409821184.0,
-            "44": 1409821184.0,
-            "45": 1409821184.0,
-            "46": 1409821184.0,
-            "47": 1409821184.0,
-            "48": 1409821184.0,
-            "49": 1409821184.0,
-            "50": 1409821184.0,
-            "51": 1409821184.0,
-            "52": 1409821184.0,
-            "53": 1409821184.0,
-            "54": 1409821184.0,
-            "55": 1409821184.0,
-            "56": 1409821184.0,
-            "57": 1409821184.0,
-            "58": 1409821184.0,
-            "59": 1409821184.0,
-            "60": 1409821184.0,
-            "61": 1409821184.0,
-            "62": 1409821184.0,
-            "63": 1409821184.0,
-            "64": 1409821184.0,
-            "65": 1409821184.0,
-            "66": 1409821184.0,
-            "67": 1409821184.0,
-            "68": 1409821184.0,
-            "69": 1409821184.0,
-            "70": 1409821184.0,
-            "71": 1409821184.0,
-            "72": 1409821184.0,
-            "73": 1409821184.0,
-            "74": 1409821184.0,
-            "75": 1409821184.0,
-            "76": 1409821184.0,
-            "77": 1409821184.0,
-            "78": 1409821184.0,
-            "79": 1409821184.0,
-            "80": 1409821184.0,
-            "81": 1409821184.0,
-            "82": 1409821184.0,
-            "83": 1409821184.0,
-            "84": 1409821184.0,
-            "85": 1409821184.0,
-            "86": 1409821184.0,
-            "87": 1409821184.0,
-            "88": 1409821184.0,
-            "89": 1409821184.0,
-            "90": 1409821184.0,
-            "91": 1409821184.0,
-            "92": 1409821184.0,
-            "93": 1409821184.0,
-            "94": 1409821184.0,
-            "95": 1409821184.0,
-            "96": 1409821184.0,
-            "97": 1409821184.0,
-            "98": 1409821184.0,
-            "99": 1409821184.0,
-            "100": 1409821184.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 8.7942,
-            "2": 0.22889,
-            "3": 0.19794,
-            "4": 0.19876,
-            "5": 0.19802,
-            "6": 0.19732,
-            "7": 0.19829,
-            "8": 0.19869,
-            "9": 0.20221,
-            "10": 0.19735,
-            "11": 0.1983,
-            "12": 0.19776,
-            "13": 0.19772,
-            "14": 0.19813,
-            "15": 0.20156,
-            "16": 0.19951,
-            "17": 0.19694,
-            "18": 0.19906,
-            "19": 0.19681,
-            "20": 0.19852,
-            "21": 0.20083,
-            "22": 0.19703,
-            "23": 0.19864,
-            "24": 0.1981,
-            "25": 0.19771,
-            "26": 0.19763,
-            "27": 0.19879,
-            "28": 0.20245,
-            "29": 0.19781,
-            "30": 0.19864,
-            "31": 0.19901,
-            "32": 0.19832,
-            "33": 0.19887,
-            "34": 0.20272,
-            "35": 0.19918,
-            "36": 0.19934,
-            "37": 0.19832,
-            "38": 0.19851,
-            "39": 0.19719,
-            "40": 0.20496,
-            "41": 0.19802,
-            "42": 0.19778,
-            "43": 0.19799,
-            "44": 0.19797,
-            "45": 0.21191,
-            "46": 0.20272,
-            "47": 0.20241,
-            "48": 0.202,
-            "49": 0.20032,
-            "50": 0.20198,
-            "51": 0.21717,
-            "52": 0.21085,
-            "53": 0.20963,
-            "54": 0.20989,
-            "55": 0.20877,
-            "56": 0.20983,
-            "57": 0.21317,
-            "58": 0.21422,
-            "59": 0.2112,
-            "60": 0.21308,
-            "61": 0.21283,
-            "62": 0.21356,
-            "63": 0.21093,
-            "64": 0.20492,
-            "65": 0.20132,
-            "66": 0.20054,
-            "67": 0.19914,
-            "68": 0.19918,
-            "69": 0.19966,
-            "70": 0.19998,
-            "71": 0.19998,
-            "72": 0.19839,
-            "73": 0.19973,
-            "74": 0.19856,
-            "75": 0.19805,
-            "76": 0.19968,
-            "77": 0.19757,
-            "78": 0.19789,
-            "79": 0.19675,
-            "80": 0.1975,
-            "81": 0.19765,
-            "82": 0.19742,
-            "83": 0.19785,
-            "84": 0.19665,
-            "85": 0.19764,
-            "86": 0.19704,
-            "87": 0.19705,
-            "88": 0.19866,
-            "89": 0.19749,
-            "90": 0.1966,
-            "91": 0.19783,
-            "92": 0.19848,
-            "93": 0.19735,
-            "94": 0.19683,
-            "95": 0.19659,
-            "96": 0.19746,
-            "97": 0.19792,
-            "98": 0.19802,
-            "99": 0.19675,
-            "100": 0.19692
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_lts_dgx_a100.json
index 16e4a038563..b47c82b17f0 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_lts_dgx_a100.json
@@ -2,536 +2,141 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 1,
+        "step_interval": 5,
         "values": {
-            "1": 10.92228,
-            "2": 10.92833,
-            "3": 10.91713,
-            "4": 10.90495,
-            "5": 10.92807,
-            "6": 10.93673,
-            "7": 10.90406,
-            "8": 10.92229,
-            "9": 10.91254,
-            "10": 10.90852,
-            "11": 10.89335,
-            "12": 10.9208,
-            "13": 10.91496,
-            "14": 10.92145,
-            "15": 10.88435,
-            "16": 10.87455,
-            "17": 10.83916,
-            "18": 10.87309,
-            "19": 10.85325,
-            "20": 10.77493,
-            "21": 10.74754,
-            "22": 10.63148,
-            "23": 10.75622,
-            "24": 10.65569,
-            "25": 10.59216,
-            "26": 10.65327,
-            "27": 10.64876,
-            "28": 10.59656,
-            "29": 10.61011,
-            "30": 10.39286,
-            "31": 10.15725,
-            "32": 10.49217,
-            "33": 10.47941,
-            "34": 10.24014,
-            "35": 10.2971,
-            "36": 10.24566,
-            "37": 10.35283,
-            "38": 10.20534,
-            "39": 10.40418,
-            "40": 10.09553,
-            "41": 10.15279,
-            "42": 10.2188,
-            "43": 9.85527,
-            "44": 9.96244,
-            "45": 9.84613,
-            "46": 9.83799,
-            "47": 10.13884,
-            "48": 9.85689,
-            "49": 9.53747,
-            "50": 9.90876,
-            "51": 9.84971,
-            "52": 9.74156,
-            "53": 10.06322,
-            "54": 9.94581,
-            "55": 9.87731,
-            "56": 9.62746,
-            "57": 9.47259,
-            "58": 9.82912,
-            "59": 9.583,
-            "60": 9.49181,
-            "61": 9.69961,
-            "62": 9.98089,
-            "63": 9.37212,
-            "64": 9.7756,
-            "65": 8.9433,
-            "66": 9.69993,
-            "67": 9.36414,
-            "68": 9.78706,
-            "69": 9.78397,
-            "70": 9.72288,
-            "71": 9.60749,
-            "72": 9.58416,
-            "73": 9.49093,
-            "74": 8.94864,
-            "75": 9.41807,
-            "76": 9.08721,
-            "77": 10.06283,
-            "78": 9.729,
-            "79": 9.37091,
-            "80": 9.40033,
-            "81": 9.47754,
-            "82": 9.69121,
-            "83": 9.30762,
-            "84": 9.41252,
-            "85": 9.61132,
-            "86": 9.07621,
-            "87": 9.59459,
-            "88": 9.74768,
-            "89": 9.6068,
-            "90": 9.81078,
-            "91": 9.34441,
-            "92": 9.36535,
-            "93": 9.07743,
-            "94": 8.82975,
-            "95": 9.51676,
-            "96": 9.52546,
-            "97": 9.31031,
-            "98": 9.67812,
-            "99": 8.88848,
-            "100": 9.40128
+            "1": 10.92705,
+            "5": 10.92795,
+            "10": 10.90786,
+            "15": 10.88314,
+            "20": 10.77629,
+            "25": 10.59141,
+            "30": 10.39192,
+            "35": 10.29686,
+            "40": 10.0964,
+            "45": 9.84464,
+            "50": 9.90918,
+            "55": 9.87762,
+            "60": 9.49121,
+            "65": 8.94236,
+            "70": 9.72266,
+            "75": 9.41912,
+            "80": 9.40077,
+            "85": 9.61207,
+            "90": 9.81015,
+            "95": 9.51716,
+            "100": 9.4015
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 1,
+        "step_interval": 5,
         "values": {
-            "1": 1681.0,
-            "2": 1822.0,
-            "3": 1757.0,
-            "4": 1751.0,
-            "5": 1937.0,
-            "6": 1886.0,
-            "7": 1837.0,
-            "8": 1786.0,
-            "9": 1935.0,
-            "10": 1542.0,
-            "11": 1896.0,
-            "12": 1808.0,
-            "13": 1960.0,
-            "14": 1846.0,
-            "15": 1948.0,
-            "16": 1903.0,
-            "17": 1900.0,
-            "18": 1725.0,
-            "19": 1812.0,
-            "20": 1647.0,
-            "21": 1886.0,
-            "22": 1675.0,
-            "23": 1992.0,
-            "24": 1621.0,
-            "25": 1624.0,
-            "26": 1778.0,
-            "27": 1925.0,
-            "28": 1996.0,
-            "29": 2074.0,
-            "30": 1899.0,
-            "31": 1539.0,
-            "32": 1956.0,
-            "33": 2254.0,
-            "34": 1927.0,
-            "35": 2029.0,
-            "36": 1986.0,
-            "37": 2377.0,
-            "38": 2234.0,
-            "39": 2396.0,
-            "40": 2123.0,
-            "41": 2316.0,
-            "42": 2245.0,
-            "43": 2077.0,
-            "44": 2179.0,
-            "45": 2078.0,
-            "46": 2280.0,
-            "47": 2573.0,
-            "48": 2440.0,
-            "49": 2213.0,
-            "50": 2532.0,
-            "51": 2735.0,
-            "52": 2607.0,
-            "53": 2951.0,
-            "54": 2672.0,
-            "55": 2451.0,
-            "56": 2712.0,
-            "57": 2392.0,
-            "58": 2979.0,
-            "59": 2869.0,
-            "60": 2435.0,
-            "61": 2938.0,
-            "62": 2669.0,
-            "63": 2392.0,
-            "64": 2998.0,
-            "65": 2689.0,
-            "66": 3285.0,
-            "67": 2782.0,
-            "68": 2753.0,
-            "69": 2958.0,
-            "70": 3271.0,
-            "71": 3040.0,
-            "72": 2504.0,
-            "73": 3096.0,
-            "74": 1910.0,
-            "75": 2617.0,
-            "76": 3081.0,
-            "77": 3390.0,
-            "78": 3186.0,
-            "79": 3320.0,
-            "80": 3483.0,
-            "81": 3782.0,
-            "82": 3516.0,
-            "83": 2864.0,
-            "84": 3396.0,
-            "85": 3247.0,
-            "86": 2785.0,
-            "87": 3762.0,
-            "88": 3102.0,
-            "89": 3483.0,
-            "90": 3076.0,
-            "91": 2643.0,
-            "92": 3198.0,
-            "93": 2666.0,
-            "94": 3390.0,
-            "95": 3410.0,
-            "96": 3508.0,
-            "97": 3178.0,
-            "98": 3865.0,
-            "99": 3143.0,
-            "100": 3357.0
+            "1": 1627.0,
+            "5": 2010.0,
+            "10": 1368.0,
+            "15": 1897.0,
+            "20": 1626.0,
+            "25": 1743.0,
+            "30": 1930.0,
+            "35": 1954.0,
+            "40": 2199.0,
+            "45": 2068.0,
+            "50": 2460.0,
+            "55": 2427.0,
+            "60": 2380.0,
+            "65": 2657.0,
+            "70": 3265.0,
+            "75": 2675.0,
+            "80": 3434.0,
+            "85": 3302.0,
+            "90": 3230.0,
+            "95": 3340.0,
+            "100": 3291.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 1,
+        "step_interval": 5,
         "values": {
-            "1": 487096832.0,
-            "2": 487096832.0,
-            "3": 487096832.0,
-            "4": 487096832.0,
-            "5": 487096832.0,
-            "6": 487096832.0,
-            "7": 487096832.0,
-            "8": 487096832.0,
-            "9": 487096832.0,
-            "10": 487096832.0,
-            "11": 487096832.0,
-            "12": 487096832.0,
-            "13": 487096832.0,
-            "14": 487096832.0,
-            "15": 487096832.0,
-            "16": 487096832.0,
-            "17": 487096832.0,
-            "18": 487096832.0,
-            "19": 487096832.0,
-            "20": 487096832.0,
-            "21": 487096832.0,
-            "22": 487096832.0,
-            "23": 487096832.0,
-            "24": 487096832.0,
-            "25": 487096832.0,
-            "26": 487096832.0,
-            "27": 487096832.0,
-            "28": 487096832.0,
-            "29": 487096832.0,
-            "30": 487096832.0,
-            "31": 487096832.0,
-            "32": 487096832.0,
-            "33": 487096832.0,
-            "34": 487096832.0,
-            "35": 487096832.0,
-            "36": 487096832.0,
-            "37": 487096832.0,
-            "38": 487096832.0,
-            "39": 487096832.0,
-            "40": 487096832.0,
-            "41": 487096832.0,
-            "42": 487096832.0,
-            "43": 487096832.0,
-            "44": 487096832.0,
-            "45": 487096832.0,
-            "46": 487096832.0,
-            "47": 487096832.0,
-            "48": 487096832.0,
-            "49": 487096832.0,
-            "50": 487096832.0,
-            "51": 487096832.0,
-            "52": 487096832.0,
-            "53": 487096832.0,
-            "54": 487096832.0,
-            "55": 487096832.0,
-            "56": 487096832.0,
-            "57": 487096832.0,
-            "58": 487096832.0,
-            "59": 487096832.0,
-            "60": 487096832.0,
-            "61": 487096832.0,
-            "62": 487096832.0,
-            "63": 487096832.0,
-            "64": 487096832.0,
-            "65": 487096832.0,
-            "66": 487096832.0,
-            "67": 487096832.0,
-            "68": 487096832.0,
-            "69": 487096832.0,
-            "70": 487096832.0,
-            "71": 487096832.0,
-            "72": 487096832.0,
-            "73": 487096832.0,
-            "74": 487096832.0,
-            "75": 487096832.0,
-            "76": 487096832.0,
-            "77": 487096832.0,
-            "78": 487096832.0,
-            "79": 487096832.0,
-            "80": 487096832.0,
-            "81": 487096832.0,
-            "82": 487096832.0,
-            "83": 487096832.0,
-            "84": 487096832.0,
-            "85": 487096832.0,
-            "86": 487096832.0,
-            "87": 487096832.0,
-            "88": 487096832.0,
-            "89": 487096832.0,
-            "90": 487096832.0,
-            "91": 487096832.0,
-            "92": 487096832.0,
-            "93": 487096832.0,
-            "94": 487096832.0,
-            "95": 487096832.0,
-            "96": 487096832.0,
-            "97": 487096832.0,
-            "98": 487096832.0,
-            "99": 487096832.0,
-            "100": 487096832.0
+            "1": 487096320.0,
+            "5": 487096320.0,
+            "10": 487096320.0,
+            "15": 487096320.0,
+            "20": 487096320.0,
+            "25": 487096320.0,
+            "30": 487096320.0,
+            "35": 487096320.0,
+            "40": 487096320.0,
+            "45": 487096320.0,
+            "50": 487096320.0,
+            "55": 487096320.0,
+            "60": 487096320.0,
+            "65": 487096320.0,
+            "70": 487096320.0,
+            "75": 487096320.0,
+            "80": 487096320.0,
+            "85": 487096320.0,
+            "90": 487096320.0,
+            "95": 487096320.0,
+            "100": 487096320.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 1,
+        "step_interval": 5,
         "values": {
-            "1": 1228699136.0,
-            "2": 1409821184.0,
-            "3": 1409821184.0,
-            "4": 1409821184.0,
-            "5": 1409821184.0,
-            "6": 1409821184.0,
-            "7": 1409821184.0,
-            "8": 1409821184.0,
-            "9": 1409821184.0,
-            "10": 1409821184.0,
-            "11": 1409821184.0,
-            "12": 1409821184.0,
-            "13": 1409821184.0,
-            "14": 1409821184.0,
-            "15": 1409821184.0,
-            "16": 1409821184.0,
-            "17": 1409821184.0,
-            "18": 1409821184.0,
-            "19": 1409821184.0,
-            "20": 1409821184.0,
-            "21": 1409821184.0,
-            "22": 1409821184.0,
-            "23": 1409821184.0,
-            "24": 1409821184.0,
-            "25": 1409821184.0,
-            "26": 1409821184.0,
-            "27": 1409821184.0,
-            "28": 1409821184.0,
-            "29": 1409821184.0,
-            "30": 1409821184.0,
-            "31": 1409821184.0,
-            "32": 1409821184.0,
-            "33": 1409821184.0,
-            "34": 1409821184.0,
-            "35": 1409821184.0,
-            "36": 1409821184.0,
-            "37": 1409821184.0,
-            "38": 1409821184.0,
-            "39": 1409821184.0,
-            "40": 1409821184.0,
-            "41": 1409821184.0,
-            "42": 1409821184.0,
-            "43": 1409821184.0,
-            "44": 1409821184.0,
-            "45": 1409821184.0,
-            "46": 1409821184.0,
-            "47": 1409821184.0,
-            "48": 1409821184.0,
-            "49": 1409821184.0,
-            "50": 1409821184.0,
-            "51": 1409821184.0,
-            "52": 1409821184.0,
-            "53": 1409821184.0,
-            "54": 1409821184.0,
-            "55": 1409821184.0,
-            "56": 1409821184.0,
-            "57": 1409821184.0,
-            "58": 1409821184.0,
-            "59": 1409821184.0,
-            "60": 1409821184.0,
-            "61": 1409821184.0,
-            "62": 1409821184.0,
-            "63": 1409821184.0,
-            "64": 1409821184.0,
-            "65": 1409821184.0,
-            "66": 1409821184.0,
-            "67": 1409821184.0,
-            "68": 1409821184.0,
-            "69": 1409821184.0,
-            "70": 1409821184.0,
-            "71": 1409821184.0,
-            "72": 1409821184.0,
-            "73": 1409821184.0,
-            "74": 1409821184.0,
-            "75": 1409821184.0,
-            "76": 1409821184.0,
-            "77": 1409821184.0,
-            "78": 1409821184.0,
-            "79": 1409821184.0,
-            "80": 1409821184.0,
-            "81": 1409821184.0,
-            "82": 1409821184.0,
-            "83": 1409821184.0,
-            "84": 1409821184.0,
-            "85": 1409821184.0,
-            "86": 1409821184.0,
-            "87": 1409821184.0,
-            "88": 1409821184.0,
-            "89": 1409821184.0,
-            "90": 1409821184.0,
-            "91": 1409821184.0,
-            "92": 1409821184.0,
-            "93": 1409821184.0,
-            "94": 1409821184.0,
-            "95": 1409821184.0,
-            "96": 1409821184.0,
-            "97": 1409821184.0,
-            "98": 1409821184.0,
-            "99": 1409821184.0,
-            "100": 1409821184.0
+            "1": 1719035904.0,
+            "5": 1900157952.0,
+            "10": 1901729792.0,
+            "15": 1901729792.0,
+            "20": 1901729792.0,
+            "25": 1901729792.0,
+            "30": 1901729792.0,
+            "35": 1901729792.0,
+            "40": 1901729792.0,
+            "45": 1901729792.0,
+            "50": 1901729792.0,
+            "55": 1901729792.0,
+            "60": 1901729792.0,
+            "65": 1901729792.0,
+            "70": 1901729792.0,
+            "75": 1901729792.0,
+            "80": 1901729792.0,
+            "85": 1901729792.0,
+            "90": 1901729792.0,
+            "95": 1901729792.0,
+            "100": 1901729792.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 1,
+        "step_interval": 5,
         "values": {
-            "1": 7.63807,
-            "2": 0.24295,
-            "3": 0.21281,
-            "4": 0.20931,
-            "5": 0.20554,
-            "6": 0.20827,
-            "7": 0.20618,
-            "8": 0.20701,
-            "9": 0.2077,
-            "10": 0.20875,
-            "11": 0.20704,
-            "12": 0.20735,
-            "13": 0.20734,
-            "14": 0.20659,
-            "15": 0.2071,
-            "16": 0.20766,
-            "17": 0.20579,
-            "18": 0.20511,
-            "19": 0.20563,
-            "20": 0.20589,
-            "21": 0.205,
-            "22": 0.20541,
-            "23": 0.2062,
-            "24": 0.20485,
-            "25": 0.20487,
-            "26": 0.20458,
-            "27": 0.20496,
-            "28": 0.20545,
-            "29": 0.20623,
-            "30": 0.20511,
-            "31": 0.20822,
-            "32": 0.20482,
-            "33": 0.20538,
-            "34": 0.20452,
-            "35": 0.21063,
-            "36": 0.20987,
-            "37": 0.20831,
-            "38": 0.2088,
-            "39": 0.20816,
-            "40": 0.20875,
-            "41": 0.20857,
-            "42": 0.20959,
-            "43": 0.20886,
-            "44": 0.2086,
-            "45": 0.20776,
-            "46": 0.20831,
-            "47": 0.20853,
-            "48": 0.2086,
-            "49": 0.20813,
-            "50": 0.209,
-            "51": 0.20574,
-            "52": 0.19892,
-            "53": 0.19904,
-            "54": 0.19867,
-            "55": 0.19897,
-            "56": 0.20031,
-            "57": 0.19874,
-            "58": 0.19971,
-            "59": 0.2002,
-            "60": 0.19847,
-            "61": 0.19948,
-            "62": 0.20017,
-            "63": 0.19926,
-            "64": 0.19923,
-            "65": 0.19974,
-            "66": 0.19915,
-            "67": 0.19992,
-            "68": 0.19949,
-            "69": 0.19842,
-            "70": 0.19824,
-            "71": 0.2012,
-            "72": 0.20144,
-            "73": 0.20339,
-            "74": 0.19815,
-            "75": 0.19802,
-            "76": 0.19898,
-            "77": 0.20003,
-            "78": 0.20017,
-            "79": 0.20157,
-            "80": 0.20266,
-            "81": 0.20004,
-            "82": 0.19937,
-            "83": 0.2008,
-            "84": 0.2009,
-            "85": 0.20194,
-            "86": 0.2015,
-            "87": 0.20004,
-            "88": 0.20091,
-            "89": 0.19998,
-            "90": 0.19993,
-            "91": 0.20008,
-            "92": 0.19991,
-            "93": 0.19979,
-            "94": 0.19939,
-            "95": 0.20098,
-            "96": 0.20045,
-            "97": 0.19917,
-            "98": 0.20012,
-            "99": 0.19963,
-            "100": 0.19848
+            "1": 14.4466,
+            "5": 0.23058,
+            "10": 0.23299,
+            "15": 0.23075,
+            "20": 0.23163,
+            "25": 0.23303,
+            "30": 0.22991,
+            "35": 0.22977,
+            "40": 0.22747,
+            "45": 0.22974,
+            "50": 0.52211,
+            "55": 0.23173,
+            "60": 0.23326,
+            "65": 0.23498,
+            "70": 0.23013,
+            "75": 0.22876,
+            "80": 0.22947,
+            "85": 0.22706,
+            "90": 0.22612,
+            "95": 0.22523,
+            "100": 0.22404
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_lts.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_lts.json
deleted file mode 100644
index 4378b779118..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_lts.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.92228,
-            "2": 10.92833,
-            "3": 10.91713,
-            "4": 10.90495,
-            "5": 10.92807,
-            "6": 10.93673,
-            "7": 10.90406,
-            "8": 10.92229,
-            "9": 10.91254,
-            "10": 10.90852,
-            "11": 10.89335,
-            "12": 10.9208,
-            "13": 10.91496,
-            "14": 10.92145,
-            "15": 10.88435,
-            "16": 10.87455,
-            "17": 10.83916,
-            "18": 10.87309,
-            "19": 10.85325,
-            "20": 10.77493,
-            "21": 10.74754,
-            "22": 10.63148,
-            "23": 10.75622,
-            "24": 10.65569,
-            "25": 10.59216,
-            "26": 10.65327,
-            "27": 10.64876,
-            "28": 10.59656,
-            "29": 10.61011,
-            "30": 10.39286,
-            "31": 10.15725,
-            "32": 10.49217,
-            "33": 10.47941,
-            "34": 10.24014,
-            "35": 10.2971,
-            "36": 10.24566,
-            "37": 10.35283,
-            "38": 10.20534,
-            "39": 10.40418,
-            "40": 10.09553,
-            "41": 10.15279,
-            "42": 10.2188,
-            "43": 9.85527,
-            "44": 9.96244,
-            "45": 9.84613,
-            "46": 9.83799,
-            "47": 10.13884,
-            "48": 9.85689,
-            "49": 9.53747,
-            "50": 9.90876,
-            "51": 9.84971,
-            "52": 9.74156,
-            "53": 10.06322,
-            "54": 9.94581,
-            "55": 9.87731,
-            "56": 9.62746,
-            "57": 9.47259,
-            "58": 9.82912,
-            "59": 9.583,
-            "60": 9.49181,
-            "61": 9.69961,
-            "62": 9.98089,
-            "63": 9.37212,
-            "64": 9.7756,
-            "65": 8.9433,
-            "66": 9.69993,
-            "67": 9.36414,
-            "68": 9.78706,
-            "69": 9.78397,
-            "70": 9.72288,
-            "71": 9.60749,
-            "72": 9.58416,
-            "73": 9.49093,
-            "74": 8.94864,
-            "75": 9.41807,
-            "76": 9.08721,
-            "77": 10.06283,
-            "78": 9.729,
-            "79": 9.37091,
-            "80": 9.40033,
-            "81": 9.47754,
-            "82": 9.69121,
-            "83": 9.30762,
-            "84": 9.41252,
-            "85": 9.61132,
-            "86": 9.07621,
-            "87": 9.59459,
-            "88": 9.74768,
-            "89": 9.6068,
-            "90": 9.81078,
-            "91": 9.34441,
-            "92": 9.36535,
-            "93": 9.07743,
-            "94": 8.82975,
-            "95": 9.51676,
-            "96": 9.52546,
-            "97": 9.31031,
-            "98": 9.67812,
-            "99": 8.88848,
-            "100": 9.40128
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1681.0,
-            "2": 1822.0,
-            "3": 1757.0,
-            "4": 1751.0,
-            "5": 1937.0,
-            "6": 1886.0,
-            "7": 1837.0,
-            "8": 1786.0,
-            "9": 1935.0,
-            "10": 1542.0,
-            "11": 1896.0,
-            "12": 1808.0,
-            "13": 1960.0,
-            "14": 1846.0,
-            "15": 1948.0,
-            "16": 1903.0,
-            "17": 1900.0,
-            "18": 1725.0,
-            "19": 1812.0,
-            "20": 1647.0,
-            "21": 1886.0,
-            "22": 1675.0,
-            "23": 1992.0,
-            "24": 1621.0,
-            "25": 1624.0,
-            "26": 1778.0,
-            "27": 1925.0,
-            "28": 1996.0,
-            "29": 2074.0,
-            "30": 1899.0,
-            "31": 1539.0,
-            "32": 1956.0,
-            "33": 2254.0,
-            "34": 1927.0,
-            "35": 2029.0,
-            "36": 1986.0,
-            "37": 2377.0,
-            "38": 2234.0,
-            "39": 2396.0,
-            "40": 2123.0,
-            "41": 2316.0,
-            "42": 2245.0,
-            "43": 2077.0,
-            "44": 2179.0,
-            "45": 2078.0,
-            "46": 2280.0,
-            "47": 2573.0,
-            "48": 2440.0,
-            "49": 2213.0,
-            "50": 2532.0,
-            "51": 2735.0,
-            "52": 2607.0,
-            "53": 2951.0,
-            "54": 2672.0,
-            "55": 2451.0,
-            "56": 2712.0,
-            "57": 2392.0,
-            "58": 2979.0,
-            "59": 2869.0,
-            "60": 2435.0,
-            "61": 2938.0,
-            "62": 2669.0,
-            "63": 2392.0,
-            "64": 2998.0,
-            "65": 2689.0,
-            "66": 3285.0,
-            "67": 2782.0,
-            "68": 2753.0,
-            "69": 2958.0,
-            "70": 3271.0,
-            "71": 3040.0,
-            "72": 2504.0,
-            "73": 3096.0,
-            "74": 1910.0,
-            "75": 2617.0,
-            "76": 3081.0,
-            "77": 3390.0,
-            "78": 3186.0,
-            "79": 3320.0,
-            "80": 3483.0,
-            "81": 3782.0,
-            "82": 3516.0,
-            "83": 2864.0,
-            "84": 3396.0,
-            "85": 3247.0,
-            "86": 2785.0,
-            "87": 3762.0,
-            "88": 3102.0,
-            "89": 3483.0,
-            "90": 3076.0,
-            "91": 2643.0,
-            "92": 3198.0,
-            "93": 2666.0,
-            "94": 3390.0,
-            "95": 3410.0,
-            "96": 3508.0,
-            "97": 3178.0,
-            "98": 3865.0,
-            "99": 3143.0,
-            "100": 3357.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 487096832.0,
-            "2": 487096832.0,
-            "3": 487096832.0,
-            "4": 487096832.0,
-            "5": 487096832.0,
-            "6": 487096832.0,
-            "7": 487096832.0,
-            "8": 487096832.0,
-            "9": 487096832.0,
-            "10": 487096832.0,
-            "11": 487096832.0,
-            "12": 487096832.0,
-            "13": 487096832.0,
-            "14": 487096832.0,
-            "15": 487096832.0,
-            "16": 487096832.0,
-            "17": 487096832.0,
-            "18": 487096832.0,
-            "19": 487096832.0,
-            "20": 487096832.0,
-            "21": 487096832.0,
-            "22": 487096832.0,
-            "23": 487096832.0,
-            "24": 487096832.0,
-            "25": 487096832.0,
-            "26": 487096832.0,
-            "27": 487096832.0,
-            "28": 487096832.0,
-            "29": 487096832.0,
-            "30": 487096832.0,
-            "31": 487096832.0,
-            "32": 487096832.0,
-            "33": 487096832.0,
-            "34": 487096832.0,
-            "35": 487096832.0,
-            "36": 487096832.0,
-            "37": 487096832.0,
-            "38": 487096832.0,
-            "39": 487096832.0,
-            "40": 487096832.0,
-            "41": 487096832.0,
-            "42": 487096832.0,
-            "43": 487096832.0,
-            "44": 487096832.0,
-            "45": 487096832.0,
-            "46": 487096832.0,
-            "47": 487096832.0,
-            "48": 487096832.0,
-            "49": 487096832.0,
-            "50": 487096832.0,
-            "51": 487096832.0,
-            "52": 487096832.0,
-            "53": 487096832.0,
-            "54": 487096832.0,
-            "55": 487096832.0,
-            "56": 487096832.0,
-            "57": 487096832.0,
-            "58": 487096832.0,
-            "59": 487096832.0,
-            "60": 487096832.0,
-            "61": 487096832.0,
-            "62": 487096832.0,
-            "63": 487096832.0,
-            "64": 487096832.0,
-            "65": 487096832.0,
-            "66": 487096832.0,
-            "67": 487096832.0,
-            "68": 487096832.0,
-            "69": 487096832.0,
-            "70": 487096832.0,
-            "71": 487096832.0,
-            "72": 487096832.0,
-            "73": 487096832.0,
-            "74": 487096832.0,
-            "75": 487096832.0,
-            "76": 487096832.0,
-            "77": 487096832.0,
-            "78": 487096832.0,
-            "79": 487096832.0,
-            "80": 487096832.0,
-            "81": 487096832.0,
-            "82": 487096832.0,
-            "83": 487096832.0,
-            "84": 487096832.0,
-            "85": 487096832.0,
-            "86": 487096832.0,
-            "87": 487096832.0,
-            "88": 487096832.0,
-            "89": 487096832.0,
-            "90": 487096832.0,
-            "91": 487096832.0,
-            "92": 487096832.0,
-            "93": 487096832.0,
-            "94": 487096832.0,
-            "95": 487096832.0,
-            "96": 487096832.0,
-            "97": 487096832.0,
-            "98": 487096832.0,
-            "99": 487096832.0,
-            "100": 487096832.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1228699136.0,
-            "2": 1409821184.0,
-            "3": 1409821184.0,
-            "4": 1409821184.0,
-            "5": 1409821184.0,
-            "6": 1409821184.0,
-            "7": 1409821184.0,
-            "8": 1409821184.0,
-            "9": 1409821184.0,
-            "10": 1409821184.0,
-            "11": 1409821184.0,
-            "12": 1409821184.0,
-            "13": 1409821184.0,
-            "14": 1409821184.0,
-            "15": 1409821184.0,
-            "16": 1409821184.0,
-            "17": 1409821184.0,
-            "18": 1409821184.0,
-            "19": 1409821184.0,
-            "20": 1409821184.0,
-            "21": 1409821184.0,
-            "22": 1409821184.0,
-            "23": 1409821184.0,
-            "24": 1409821184.0,
-            "25": 1409821184.0,
-            "26": 1409821184.0,
-            "27": 1409821184.0,
-            "28": 1409821184.0,
-            "29": 1409821184.0,
-            "30": 1409821184.0,
-            "31": 1409821184.0,
-            "32": 1409821184.0,
-            "33": 1409821184.0,
-            "34": 1409821184.0,
-            "35": 1409821184.0,
-            "36": 1409821184.0,
-            "37": 1409821184.0,
-            "38": 1409821184.0,
-            "39": 1409821184.0,
-            "40": 1409821184.0,
-            "41": 1409821184.0,
-            "42": 1409821184.0,
-            "43": 1409821184.0,
-            "44": 1409821184.0,
-            "45": 1409821184.0,
-            "46": 1409821184.0,
-            "47": 1409821184.0,
-            "48": 1409821184.0,
-            "49": 1409821184.0,
-            "50": 1409821184.0,
-            "51": 1409821184.0,
-            "52": 1409821184.0,
-            "53": 1409821184.0,
-            "54": 1409821184.0,
-            "55": 1409821184.0,
-            "56": 1409821184.0,
-            "57": 1409821184.0,
-            "58": 1409821184.0,
-            "59": 1409821184.0,
-            "60": 1409821184.0,
-            "61": 1409821184.0,
-            "62": 1409821184.0,
-            "63": 1409821184.0,
-            "64": 1409821184.0,
-            "65": 1409821184.0,
-            "66": 1409821184.0,
-            "67": 1409821184.0,
-            "68": 1409821184.0,
-            "69": 1409821184.0,
-            "70": 1409821184.0,
-            "71": 1409821184.0,
-            "72": 1409821184.0,
-            "73": 1409821184.0,
-            "74": 1409821184.0,
-            "75": 1409821184.0,
-            "76": 1409821184.0,
-            "77": 1409821184.0,
-            "78": 1409821184.0,
-            "79": 1409821184.0,
-            "80": 1409821184.0,
-            "81": 1409821184.0,
-            "82": 1409821184.0,
-            "83": 1409821184.0,
-            "84": 1409821184.0,
-            "85": 1409821184.0,
-            "86": 1409821184.0,
-            "87": 1409821184.0,
-            "88": 1409821184.0,
-            "89": 1409821184.0,
-            "90": 1409821184.0,
-            "91": 1409821184.0,
-            "92": 1409821184.0,
-            "93": 1409821184.0,
-            "94": 1409821184.0,
-            "95": 1409821184.0,
-            "96": 1409821184.0,
-            "97": 1409821184.0,
-            "98": 1409821184.0,
-            "99": 1409821184.0,
-            "100": 1409821184.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 6.19385,
-            "2": 0.24311,
-            "3": 0.20769,
-            "4": 0.20606,
-            "5": 0.21034,
-            "6": 0.20875,
-            "7": 0.20511,
-            "8": 0.2045,
-            "9": 0.20566,
-            "10": 0.20495,
-            "11": 0.20684,
-            "12": 0.20533,
-            "13": 0.20573,
-            "14": 0.20691,
-            "15": 0.20612,
-            "16": 0.20502,
-            "17": 0.20553,
-            "18": 0.20502,
-            "19": 0.20618,
-            "20": 0.20587,
-            "21": 0.20529,
-            "22": 0.20461,
-            "23": 0.20575,
-            "24": 0.2056,
-            "25": 0.20434,
-            "26": 0.20473,
-            "27": 0.20499,
-            "28": 0.20543,
-            "29": 0.20597,
-            "30": 0.20594,
-            "31": 0.20485,
-            "32": 0.20584,
-            "33": 0.20569,
-            "34": 0.20368,
-            "35": 0.20375,
-            "36": 0.20415,
-            "37": 0.20296,
-            "38": 0.20379,
-            "39": 0.20482,
-            "40": 0.20372,
-            "41": 0.20369,
-            "42": 0.2037,
-            "43": 0.20368,
-            "44": 0.2036,
-            "45": 0.20913,
-            "46": 0.20299,
-            "47": 0.20291,
-            "48": 0.20437,
-            "49": 0.2033,
-            "50": 0.20293,
-            "51": 0.21225,
-            "52": 0.20266,
-            "53": 0.20231,
-            "54": 0.20341,
-            "55": 0.20168,
-            "56": 0.20234,
-            "57": 0.20234,
-            "58": 0.20311,
-            "59": 0.20312,
-            "60": 0.20314,
-            "61": 0.20332,
-            "62": 0.20225,
-            "63": 0.20251,
-            "64": 0.20258,
-            "65": 0.20214,
-            "66": 0.2022,
-            "67": 0.20405,
-            "68": 0.20192,
-            "69": 0.2033,
-            "70": 0.20207,
-            "71": 0.20216,
-            "72": 0.20251,
-            "73": 0.20231,
-            "74": 0.20203,
-            "75": 0.20107,
-            "76": 0.20252,
-            "77": 0.20263,
-            "78": 0.20334,
-            "79": 0.20378,
-            "80": 0.20072,
-            "81": 0.20029,
-            "82": 0.19967,
-            "83": 0.20354,
-            "84": 0.20035,
-            "85": 0.201,
-            "86": 0.20061,
-            "87": 0.20132,
-            "88": 0.20043,
-            "89": 0.20079,
-            "90": 0.20125,
-            "91": 0.20103,
-            "92": 0.19988,
-            "93": 0.20137,
-            "94": 0.20191,
-            "95": 0.20085,
-            "96": 0.20182,
-            "97": 0.20169,
-            "98": 0.20353,
-            "99": 0.2004,
-            "100": 0.20044
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_lts_dgx_a100.json
index 97ea213f560..053168da21b 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_lts_dgx_a100.json
@@ -1,537 +1 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.92228,
-            "2": 10.92833,
-            "3": 10.91713,
-            "4": 10.90495,
-            "5": 10.92807,
-            "6": 10.93673,
-            "7": 10.90406,
-            "8": 10.92229,
-            "9": 10.91254,
-            "10": 10.90852,
-            "11": 10.89335,
-            "12": 10.9208,
-            "13": 10.91496,
-            "14": 10.92145,
-            "15": 10.88435,
-            "16": 10.87455,
-            "17": 10.83916,
-            "18": 10.87309,
-            "19": 10.85325,
-            "20": 10.77493,
-            "21": 10.74754,
-            "22": 10.63148,
-            "23": 10.75622,
-            "24": 10.65569,
-            "25": 10.59216,
-            "26": 10.65327,
-            "27": 10.64876,
-            "28": 10.59656,
-            "29": 10.61011,
-            "30": 10.39286,
-            "31": 10.15725,
-            "32": 10.49217,
-            "33": 10.47941,
-            "34": 10.24014,
-            "35": 10.2971,
-            "36": 10.24566,
-            "37": 10.35283,
-            "38": 10.20534,
-            "39": 10.40418,
-            "40": 10.09553,
-            "41": 10.15279,
-            "42": 10.2188,
-            "43": 9.85527,
-            "44": 9.96244,
-            "45": 9.84613,
-            "46": 9.83799,
-            "47": 10.13884,
-            "48": 9.85689,
-            "49": 9.53747,
-            "50": 9.90876,
-            "51": 9.84971,
-            "52": 9.74156,
-            "53": 10.06322,
-            "54": 9.94581,
-            "55": 9.87731,
-            "56": 9.62746,
-            "57": 9.47259,
-            "58": 9.82912,
-            "59": 9.583,
-            "60": 9.49181,
-            "61": 9.69961,
-            "62": 9.98089,
-            "63": 9.37212,
-            "64": 9.7756,
-            "65": 8.9433,
-            "66": 9.69993,
-            "67": 9.36414,
-            "68": 9.78706,
-            "69": 9.78397,
-            "70": 9.72288,
-            "71": 9.60749,
-            "72": 9.58416,
-            "73": 9.49093,
-            "74": 8.94864,
-            "75": 9.41807,
-            "76": 9.08721,
-            "77": 10.06283,
-            "78": 9.729,
-            "79": 9.37091,
-            "80": 9.40033,
-            "81": 9.47754,
-            "82": 9.69121,
-            "83": 9.30762,
-            "84": 9.41252,
-            "85": 9.61132,
-            "86": 9.07621,
-            "87": 9.59459,
-            "88": 9.74768,
-            "89": 9.6068,
-            "90": 9.81078,
-            "91": 9.34441,
-            "92": 9.36535,
-            "93": 9.07743,
-            "94": 8.82975,
-            "95": 9.51676,
-            "96": 9.52546,
-            "97": 9.31031,
-            "98": 9.67812,
-            "99": 8.88848,
-            "100": 9.40128
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1681.0,
-            "2": 1822.0,
-            "3": 1757.0,
-            "4": 1751.0,
-            "5": 1937.0,
-            "6": 1886.0,
-            "7": 1837.0,
-            "8": 1786.0,
-            "9": 1935.0,
-            "10": 1542.0,
-            "11": 1896.0,
-            "12": 1808.0,
-            "13": 1960.0,
-            "14": 1846.0,
-            "15": 1948.0,
-            "16": 1903.0,
-            "17": 1900.0,
-            "18": 1725.0,
-            "19": 1812.0,
-            "20": 1647.0,
-            "21": 1886.0,
-            "22": 1675.0,
-            "23": 1992.0,
-            "24": 1621.0,
-            "25": 1624.0,
-            "26": 1778.0,
-            "27": 1925.0,
-            "28": 1996.0,
-            "29": 2074.0,
-            "30": 1899.0,
-            "31": 1539.0,
-            "32": 1956.0,
-            "33": 2254.0,
-            "34": 1927.0,
-            "35": 2029.0,
-            "36": 1986.0,
-            "37": 2377.0,
-            "38": 2234.0,
-            "39": 2396.0,
-            "40": 2123.0,
-            "41": 2316.0,
-            "42": 2245.0,
-            "43": 2077.0,
-            "44": 2179.0,
-            "45": 2078.0,
-            "46": 2280.0,
-            "47": 2573.0,
-            "48": 2440.0,
-            "49": 2213.0,
-            "50": 2532.0,
-            "51": 2735.0,
-            "52": 2607.0,
-            "53": 2951.0,
-            "54": 2672.0,
-            "55": 2451.0,
-            "56": 2712.0,
-            "57": 2392.0,
-            "58": 2979.0,
-            "59": 2869.0,
-            "60": 2435.0,
-            "61": 2938.0,
-            "62": 2669.0,
-            "63": 2392.0,
-            "64": 2998.0,
-            "65": 2689.0,
-            "66": 3285.0,
-            "67": 2782.0,
-            "68": 2753.0,
-            "69": 2958.0,
-            "70": 3271.0,
-            "71": 3040.0,
-            "72": 2504.0,
-            "73": 3096.0,
-            "74": 1910.0,
-            "75": 2617.0,
-            "76": 3081.0,
-            "77": 3390.0,
-            "78": 3186.0,
-            "79": 3320.0,
-            "80": 3483.0,
-            "81": 3782.0,
-            "82": 3516.0,
-            "83": 2864.0,
-            "84": 3396.0,
-            "85": 3247.0,
-            "86": 2785.0,
-            "87": 3762.0,
-            "88": 3102.0,
-            "89": 3483.0,
-            "90": 3076.0,
-            "91": 2643.0,
-            "92": 3198.0,
-            "93": 2666.0,
-            "94": 3390.0,
-            "95": 3410.0,
-            "96": 3508.0,
-            "97": 3178.0,
-            "98": 3865.0,
-            "99": 3143.0,
-            "100": 3357.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 487096832.0,
-            "2": 487096832.0,
-            "3": 487096832.0,
-            "4": 487096832.0,
-            "5": 487096832.0,
-            "6": 487096832.0,
-            "7": 487096832.0,
-            "8": 487096832.0,
-            "9": 487096832.0,
-            "10": 487096832.0,
-            "11": 487096832.0,
-            "12": 487096832.0,
-            "13": 487096832.0,
-            "14": 487096832.0,
-            "15": 487096832.0,
-            "16": 487096832.0,
-            "17": 487096832.0,
-            "18": 487096832.0,
-            "19": 487096832.0,
-            "20": 487096832.0,
-            "21": 487096832.0,
-            "22": 487096832.0,
-            "23": 487096832.0,
-            "24": 487096832.0,
-            "25": 487096832.0,
-            "26": 487096832.0,
-            "27": 487096832.0,
-            "28": 487096832.0,
-            "29": 487096832.0,
-            "30": 487096832.0,
-            "31": 487096832.0,
-            "32": 487096832.0,
-            "33": 487096832.0,
-            "34": 487096832.0,
-            "35": 487096832.0,
-            "36": 487096832.0,
-            "37": 487096832.0,
-            "38": 487096832.0,
-            "39": 487096832.0,
-            "40": 487096832.0,
-            "41": 487096832.0,
-            "42": 487096832.0,
-            "43": 487096832.0,
-            "44": 487096832.0,
-            "45": 487096832.0,
-            "46": 487096832.0,
-            "47": 487096832.0,
-            "48": 487096832.0,
-            "49": 487096832.0,
-            "50": 487096832.0,
-            "51": 487096832.0,
-            "52": 487096832.0,
-            "53": 487096832.0,
-            "54": 487096832.0,
-            "55": 487096832.0,
-            "56": 487096832.0,
-            "57": 487096832.0,
-            "58": 487096832.0,
-            "59": 487096832.0,
-            "60": 487096832.0,
-            "61": 487096832.0,
-            "62": 487096832.0,
-            "63": 487096832.0,
-            "64": 487096832.0,
-            "65": 487096832.0,
-            "66": 487096832.0,
-            "67": 487096832.0,
-            "68": 487096832.0,
-            "69": 487096832.0,
-            "70": 487096832.0,
-            "71": 487096832.0,
-            "72": 487096832.0,
-            "73": 487096832.0,
-            "74": 487096832.0,
-            "75": 487096832.0,
-            "76": 487096832.0,
-            "77": 487096832.0,
-            "78": 487096832.0,
-            "79": 487096832.0,
-            "80": 487096832.0,
-            "81": 487096832.0,
-            "82": 487096832.0,
-            "83": 487096832.0,
-            "84": 487096832.0,
-            "85": 487096832.0,
-            "86": 487096832.0,
-            "87": 487096832.0,
-            "88": 487096832.0,
-            "89": 487096832.0,
-            "90": 487096832.0,
-            "91": 487096832.0,
-            "92": 487096832.0,
-            "93": 487096832.0,
-            "94": 487096832.0,
-            "95": 487096832.0,
-            "96": 487096832.0,
-            "97": 487096832.0,
-            "98": 487096832.0,
-            "99": 487096832.0,
-            "100": 487096832.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1228699136.0,
-            "2": 1409821184.0,
-            "3": 1409821184.0,
-            "4": 1409821184.0,
-            "5": 1409821184.0,
-            "6": 1409821184.0,
-            "7": 1409821184.0,
-            "8": 1409821184.0,
-            "9": 1409821184.0,
-            "10": 1409821184.0,
-            "11": 1409821184.0,
-            "12": 1409821184.0,
-            "13": 1409821184.0,
-            "14": 1409821184.0,
-            "15": 1409821184.0,
-            "16": 1409821184.0,
-            "17": 1409821184.0,
-            "18": 1409821184.0,
-            "19": 1409821184.0,
-            "20": 1409821184.0,
-            "21": 1409821184.0,
-            "22": 1409821184.0,
-            "23": 1409821184.0,
-            "24": 1409821184.0,
-            "25": 1409821184.0,
-            "26": 1409821184.0,
-            "27": 1409821184.0,
-            "28": 1409821184.0,
-            "29": 1409821184.0,
-            "30": 1409821184.0,
-            "31": 1409821184.0,
-            "32": 1409821184.0,
-            "33": 1409821184.0,
-            "34": 1409821184.0,
-            "35": 1409821184.0,
-            "36": 1409821184.0,
-            "37": 1409821184.0,
-            "38": 1409821184.0,
-            "39": 1409821184.0,
-            "40": 1409821184.0,
-            "41": 1409821184.0,
-            "42": 1409821184.0,
-            "43": 1409821184.0,
-            "44": 1409821184.0,
-            "45": 1409821184.0,
-            "46": 1409821184.0,
-            "47": 1409821184.0,
-            "48": 1409821184.0,
-            "49": 1409821184.0,
-            "50": 1409821184.0,
-            "51": 1409821184.0,
-            "52": 1409821184.0,
-            "53": 1409821184.0,
-            "54": 1409821184.0,
-            "55": 1409821184.0,
-            "56": 1409821184.0,
-            "57": 1409821184.0,
-            "58": 1409821184.0,
-            "59": 1409821184.0,
-            "60": 1409821184.0,
-            "61": 1409821184.0,
-            "62": 1409821184.0,
-            "63": 1409821184.0,
-            "64": 1409821184.0,
-            "65": 1409821184.0,
-            "66": 1409821184.0,
-            "67": 1409821184.0,
-            "68": 1409821184.0,
-            "69": 1409821184.0,
-            "70": 1409821184.0,
-            "71": 1409821184.0,
-            "72": 1409821184.0,
-            "73": 1409821184.0,
-            "74": 1409821184.0,
-            "75": 1409821184.0,
-            "76": 1409821184.0,
-            "77": 1409821184.0,
-            "78": 1409821184.0,
-            "79": 1409821184.0,
-            "80": 1409821184.0,
-            "81": 1409821184.0,
-            "82": 1409821184.0,
-            "83": 1409821184.0,
-            "84": 1409821184.0,
-            "85": 1409821184.0,
-            "86": 1409821184.0,
-            "87": 1409821184.0,
-            "88": 1409821184.0,
-            "89": 1409821184.0,
-            "90": 1409821184.0,
-            "91": 1409821184.0,
-            "92": 1409821184.0,
-            "93": 1409821184.0,
-            "94": 1409821184.0,
-            "95": 1409821184.0,
-            "96": 1409821184.0,
-            "97": 1409821184.0,
-            "98": 1409821184.0,
-            "99": 1409821184.0,
-            "100": 1409821184.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 6.25138,
-            "2": 0.23075,
-            "3": 0.20054,
-            "4": 0.20395,
-            "5": 0.20085,
-            "6": 0.19693,
-            "7": 0.1984,
-            "8": 0.19691,
-            "9": 0.19734,
-            "10": 0.19831,
-            "11": 0.19755,
-            "12": 0.20036,
-            "13": 0.19718,
-            "14": 0.20205,
-            "15": 0.19931,
-            "16": 0.1974,
-            "17": 0.19891,
-            "18": 0.19725,
-            "19": 0.19744,
-            "20": 0.19621,
-            "21": 0.19556,
-            "22": 0.1957,
-            "23": 0.19653,
-            "24": 0.19561,
-            "25": 0.19465,
-            "26": 0.19483,
-            "27": 0.19566,
-            "28": 0.19514,
-            "29": 0.19571,
-            "30": 0.19512,
-            "31": 0.19603,
-            "32": 0.19794,
-            "33": 0.19597,
-            "34": 0.20052,
-            "35": 0.19938,
-            "36": 0.19968,
-            "37": 0.19971,
-            "38": 0.19989,
-            "39": 0.20233,
-            "40": 0.20594,
-            "41": 0.20596,
-            "42": 0.20875,
-            "43": 0.20692,
-            "44": 0.20224,
-            "45": 0.20492,
-            "46": 0.20483,
-            "47": 0.20404,
-            "48": 0.20062,
-            "49": 0.1998,
-            "50": 0.19944,
-            "51": 0.21056,
-            "52": 0.20322,
-            "53": 0.20394,
-            "54": 0.20267,
-            "55": 0.20305,
-            "56": 0.20261,
-            "57": 0.20266,
-            "58": 0.2023,
-            "59": 0.20259,
-            "60": 0.20297,
-            "61": 0.20333,
-            "62": 0.20344,
-            "63": 0.20255,
-            "64": 0.20203,
-            "65": 0.20288,
-            "66": 0.20295,
-            "67": 0.20276,
-            "68": 0.20255,
-            "69": 0.20306,
-            "70": 0.20225,
-            "71": 0.20236,
-            "72": 0.20262,
-            "73": 0.2033,
-            "74": 0.20279,
-            "75": 0.20276,
-            "76": 0.20185,
-            "77": 0.20283,
-            "78": 0.20284,
-            "79": 0.2021,
-            "80": 0.20273,
-            "81": 0.20261,
-            "82": 0.20101,
-            "83": 0.20222,
-            "84": 0.20269,
-            "85": 0.20272,
-            "86": 0.20286,
-            "87": 0.20079,
-            "88": 0.20309,
-            "89": 0.2026,
-            "90": 0.20209,
-            "91": 0.20371,
-            "92": 0.20302,
-            "93": 0.20226,
-            "94": 0.20222,
-            "95": 0.20289,
-            "96": 0.20273,
-            "97": 0.20346,
-            "98": 0.20283,
-            "99": 0.20241,
-            "100": 0.20343
-        }
-    }
-}
\ No newline at end of file
+{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.92705, "5": 10.92795, "10": 10.90786, "15": 10.88314, "20": 10.77629, "25": 10.59141, "30": 10.39192, "35": 10.29686, "40": 10.0964, "45": 9.84464, "50": 9.90918, "55": 9.87762, "60": 9.49121, "65": 8.94236, "70": 9.72266, "75": 9.41912, "80": 9.40077, "85": 9.61207, "90": 9.81015, "95": 9.51716, "100": 9.4015}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1627.0, "5": 2010.0, "10": 1368.0, "15": 1897.0, "20": 1626.0, "25": 1743.0, "30": 1930.0, "35": 1954.0, "40": 2199.0, "45": 2068.0, "50": 2460.0, "55": 2427.0, "60": 2380.0, "65": 2657.0, "70": 3265.0, "75": 2675.0, "80": 3434.0, "85": 3302.0, "90": 3230.0, "95": 3340.0, "100": 3291.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 487096320.0, "5": 487096320.0, "10": 487096320.0, "15": 487096320.0, "20": 487096320.0, "25": 487096320.0, "30": 487096320.0, "35": 487096320.0, "40": 487096320.0, "45": 487096320.0, "50": 487096320.0, "55": 487096320.0, "60": 487096320.0, "65": 487096320.0, "70": 487096320.0, "75": 487096320.0, "80": 487096320.0, "85": 487096320.0, "90": 487096320.0, "95": 487096320.0, "100": 487096320.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1720084480.0, "5": 1900157952.0, "10": 1900157952.0, "15": 1900157952.0, "20": 1900157952.0, "25": 1900157952.0, "30": 1900157952.0, "35": 1900157952.0, "40": 1900157952.0, "45": 1900157952.0, "50": 1900157952.0, "55": 1900157952.0, "60": 1900157952.0, "65": 1900157952.0, "70": 1900157952.0, "75": 1900157952.0, "80": 1900157952.0, "85": 1900157952.0, "90": 1900157952.0, "95": 1900157952.0, "100": 1900157952.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 12.26256, "5": 0.218, "10": 0.21609, "15": 0.21516, "20": 0.2149, "25": 0.21592, "30": 0.21243, "35": 0.21175, "40": 0.21098, "45": 0.21116, "50": 0.21161, "55": 0.21221, "60": 0.21264, "65": 0.21113, "70": 0.21176, "75": 0.22341, "80": 0.22245, "85": 0.22222, "90": 0.22203, "95": 0.21302, "100": 0.21583}}}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_lts.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_lts.json
deleted file mode 100644
index f6faa6eb158..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_lts.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.92228,
-            "2": 10.92833,
-            "3": 10.91713,
-            "4": 10.90497,
-            "5": 10.92805,
-            "6": 10.93671,
-            "7": 10.90403,
-            "8": 10.92231,
-            "9": 10.91252,
-            "10": 10.90851,
-            "11": 10.89338,
-            "12": 10.9208,
-            "13": 10.91494,
-            "14": 10.92144,
-            "15": 10.88435,
-            "16": 10.87458,
-            "17": 10.83918,
-            "18": 10.87307,
-            "19": 10.85328,
-            "20": 10.77494,
-            "21": 10.74759,
-            "22": 10.63148,
-            "23": 10.75623,
-            "24": 10.65568,
-            "25": 10.59221,
-            "26": 10.65325,
-            "27": 10.64881,
-            "28": 10.59654,
-            "29": 10.61012,
-            "30": 10.39283,
-            "31": 10.15719,
-            "32": 10.49221,
-            "33": 10.47943,
-            "34": 10.24015,
-            "35": 10.29711,
-            "36": 10.24562,
-            "37": 10.35281,
-            "38": 10.20533,
-            "39": 10.40423,
-            "40": 10.09553,
-            "41": 10.15279,
-            "42": 10.21879,
-            "43": 9.85526,
-            "44": 9.96244,
-            "45": 9.84617,
-            "46": 9.83798,
-            "47": 10.1388,
-            "48": 9.85687,
-            "49": 9.53746,
-            "50": 9.90877,
-            "51": 9.84971,
-            "52": 9.74156,
-            "53": 10.06324,
-            "54": 9.94584,
-            "55": 9.87735,
-            "56": 9.62744,
-            "57": 9.4726,
-            "58": 9.82907,
-            "59": 9.58298,
-            "60": 9.49182,
-            "61": 9.6996,
-            "62": 9.98091,
-            "63": 9.37212,
-            "64": 9.77558,
-            "65": 8.94327,
-            "66": 9.69991,
-            "67": 9.3641,
-            "68": 9.78706,
-            "69": 9.78396,
-            "70": 9.72291,
-            "71": 9.60749,
-            "72": 9.58417,
-            "73": 9.4909,
-            "74": 8.94863,
-            "75": 9.41807,
-            "76": 9.08721,
-            "77": 10.06284,
-            "78": 9.729,
-            "79": 9.37087,
-            "80": 9.40029,
-            "81": 9.47753,
-            "82": 9.69123,
-            "83": 9.30764,
-            "84": 9.4125,
-            "85": 9.61132,
-            "86": 9.07624,
-            "87": 9.59459,
-            "88": 9.74769,
-            "89": 9.60678,
-            "90": 9.81079,
-            "91": 9.34443,
-            "92": 9.36534,
-            "93": 9.07741,
-            "94": 8.82974,
-            "95": 9.51676,
-            "96": 9.52545,
-            "97": 9.31031,
-            "98": 9.67811,
-            "99": 8.88848,
-            "100": 9.40128
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 59.0,
-            "2": 59.0,
-            "3": 57.0,
-            "4": 55.0,
-            "5": 68.0,
-            "6": 60.0,
-            "7": 58.0,
-            "8": 63.0,
-            "9": 62.0,
-            "10": 47.0,
-            "11": 57.0,
-            "12": 48.0,
-            "13": 68.0,
-            "14": 58.0,
-            "15": 75.0,
-            "16": 67.0,
-            "17": 59.0,
-            "18": 56.0,
-            "19": 72.0,
-            "20": 52.0,
-            "21": 60.0,
-            "22": 69.0,
-            "23": 77.0,
-            "24": 58.0,
-            "25": 71.0,
-            "26": 68.0,
-            "27": 55.0,
-            "28": 94.0,
-            "29": 57.0,
-            "30": 66.0,
-            "31": 60.0,
-            "32": 64.0,
-            "33": 69.0,
-            "34": 73.0,
-            "35": 73.0,
-            "36": 72.0,
-            "37": 89.0,
-            "38": 72.0,
-            "39": 75.0,
-            "40": 82.0,
-            "41": 82.0,
-            "42": 74.0,
-            "43": 90.0,
-            "44": 69.0,
-            "45": 83.0,
-            "46": 68.0,
-            "47": 74.0,
-            "48": 95.0,
-            "49": 81.0,
-            "50": 83.0,
-            "51": 77.0,
-            "52": 100.0,
-            "53": 71.0,
-            "54": 67.0,
-            "55": 70.0,
-            "56": 83.0,
-            "57": 74.0,
-            "58": 106.0,
-            "59": 72.0,
-            "60": 98.0,
-            "61": 67.0,
-            "62": 73.0,
-            "63": 77.0,
-            "64": 94.0,
-            "65": 82.0,
-            "66": 87.0,
-            "67": 65.0,
-            "68": 78.0,
-            "69": 59.0,
-            "70": 102.0,
-            "71": 82.0,
-            "72": 60.0,
-            "73": 96.0,
-            "74": 61.0,
-            "75": 64.0,
-            "76": 70.0,
-            "77": 84.0,
-            "78": 93.0,
-            "79": 102.0,
-            "80": 71.0,
-            "81": 88.0,
-            "82": 85.0,
-            "83": 75.0,
-            "84": 69.0,
-            "85": 84.0,
-            "86": 66.0,
-            "87": 93.0,
-            "88": 96.0,
-            "89": 73.0,
-            "90": 77.0,
-            "91": 66.0,
-            "92": 86.0,
-            "93": 63.0,
-            "94": 60.0,
-            "95": 70.0,
-            "96": 65.0,
-            "97": 67.0,
-            "98": 96.0,
-            "99": 54.0,
-            "100": 77.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 516063744.0,
-            "2": 516063744.0,
-            "3": 516063744.0,
-            "4": 516063744.0,
-            "5": 516063744.0,
-            "6": 516063744.0,
-            "7": 516063744.0,
-            "8": 516063744.0,
-            "9": 516063744.0,
-            "10": 516063744.0,
-            "11": 516063744.0,
-            "12": 516063744.0,
-            "13": 516063744.0,
-            "14": 516063744.0,
-            "15": 516063744.0,
-            "16": 516063744.0,
-            "17": 516063744.0,
-            "18": 516063744.0,
-            "19": 516063744.0,
-            "20": 516063744.0,
-            "21": 516063744.0,
-            "22": 516063744.0,
-            "23": 516063744.0,
-            "24": 516063744.0,
-            "25": 516063744.0,
-            "26": 516063744.0,
-            "27": 516063744.0,
-            "28": 516063744.0,
-            "29": 516063744.0,
-            "30": 516063744.0,
-            "31": 516063744.0,
-            "32": 516063744.0,
-            "33": 516063744.0,
-            "34": 516063744.0,
-            "35": 516063744.0,
-            "36": 516063744.0,
-            "37": 516063744.0,
-            "38": 516063744.0,
-            "39": 516063744.0,
-            "40": 516063744.0,
-            "41": 516063744.0,
-            "42": 516063744.0,
-            "43": 516063744.0,
-            "44": 516063744.0,
-            "45": 516063744.0,
-            "46": 516063744.0,
-            "47": 516063744.0,
-            "48": 516063744.0,
-            "49": 516063744.0,
-            "50": 516063744.0,
-            "51": 516063744.0,
-            "52": 516063744.0,
-            "53": 516063744.0,
-            "54": 516063744.0,
-            "55": 516063744.0,
-            "56": 516063744.0,
-            "57": 516063744.0,
-            "58": 516063744.0,
-            "59": 516063744.0,
-            "60": 516063744.0,
-            "61": 516063744.0,
-            "62": 516063744.0,
-            "63": 516063744.0,
-            "64": 516063744.0,
-            "65": 516063744.0,
-            "66": 516063744.0,
-            "67": 516063744.0,
-            "68": 516063744.0,
-            "69": 516063744.0,
-            "70": 516063744.0,
-            "71": 516063744.0,
-            "72": 516063744.0,
-            "73": 516063744.0,
-            "74": 516063744.0,
-            "75": 516063744.0,
-            "76": 516063744.0,
-            "77": 516063744.0,
-            "78": 516063744.0,
-            "79": 516063744.0,
-            "80": 516063744.0,
-            "81": 516063744.0,
-            "82": 516063744.0,
-            "83": 516063744.0,
-            "84": 516063744.0,
-            "85": 516063744.0,
-            "86": 516063744.0,
-            "87": 516063744.0,
-            "88": 516063744.0,
-            "89": 516063744.0,
-            "90": 516063744.0,
-            "91": 516063744.0,
-            "92": 516063744.0,
-            "93": 516063744.0,
-            "94": 516063744.0,
-            "95": 516063744.0,
-            "96": 516063744.0,
-            "97": 516063744.0,
-            "98": 516063744.0,
-            "99": 516063744.0,
-            "100": 516063744.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1697019392.0,
-            "2": 1877092864.0,
-            "3": 1877092864.0,
-            "4": 1877092864.0,
-            "5": 1877092864.0,
-            "6": 1877092864.0,
-            "7": 1877092864.0,
-            "8": 1877092864.0,
-            "9": 1877092864.0,
-            "10": 1877092864.0,
-            "11": 1877092864.0,
-            "12": 1877092864.0,
-            "13": 1877092864.0,
-            "14": 1877092864.0,
-            "15": 1877092864.0,
-            "16": 1877092864.0,
-            "17": 1877092864.0,
-            "18": 1877092864.0,
-            "19": 1877092864.0,
-            "20": 1877092864.0,
-            "21": 1877092864.0,
-            "22": 1877092864.0,
-            "23": 1877092864.0,
-            "24": 1877092864.0,
-            "25": 1877092864.0,
-            "26": 1877092864.0,
-            "27": 1877092864.0,
-            "28": 1877092864.0,
-            "29": 1877092864.0,
-            "30": 1877092864.0,
-            "31": 1877092864.0,
-            "32": 1877092864.0,
-            "33": 1877092864.0,
-            "34": 1877092864.0,
-            "35": 1877092864.0,
-            "36": 1877092864.0,
-            "37": 1877092864.0,
-            "38": 1877092864.0,
-            "39": 1877092864.0,
-            "40": 1877092864.0,
-            "41": 1877092864.0,
-            "42": 1877092864.0,
-            "43": 1877092864.0,
-            "44": 1877092864.0,
-            "45": 1877092864.0,
-            "46": 1877092864.0,
-            "47": 1877092864.0,
-            "48": 1877092864.0,
-            "49": 1877092864.0,
-            "50": 1877092864.0,
-            "51": 1877092864.0,
-            "52": 1877092864.0,
-            "53": 1877092864.0,
-            "54": 1877092864.0,
-            "55": 1877092864.0,
-            "56": 1877092864.0,
-            "57": 1877092864.0,
-            "58": 1877092864.0,
-            "59": 1877092864.0,
-            "60": 1877092864.0,
-            "61": 1877092864.0,
-            "62": 1877092864.0,
-            "63": 1877092864.0,
-            "64": 1877092864.0,
-            "65": 1877092864.0,
-            "66": 1877092864.0,
-            "67": 1877092864.0,
-            "68": 1877092864.0,
-            "69": 1877092864.0,
-            "70": 1877092864.0,
-            "71": 1877092864.0,
-            "72": 1877092864.0,
-            "73": 1877092864.0,
-            "74": 1877092864.0,
-            "75": 1877092864.0,
-            "76": 1877092864.0,
-            "77": 1877092864.0,
-            "78": 1877092864.0,
-            "79": 1877092864.0,
-            "80": 1877092864.0,
-            "81": 1877092864.0,
-            "82": 1877092864.0,
-            "83": 1877092864.0,
-            "84": 1877092864.0,
-            "85": 1877092864.0,
-            "86": 1877092864.0,
-            "87": 1877092864.0,
-            "88": 1877092864.0,
-            "89": 1877092864.0,
-            "90": 1877092864.0,
-            "91": 1877092864.0,
-            "92": 1877092864.0,
-            "93": 1877092864.0,
-            "94": 1877092864.0,
-            "95": 1877092864.0,
-            "96": 1877092864.0,
-            "97": 1877092864.0,
-            "98": 1877092864.0,
-            "99": 1877092864.0,
-            "100": 1877092864.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 5.79688,
-            "2": 0.23426,
-            "3": 0.19644,
-            "4": 0.195,
-            "5": 0.19474,
-            "6": 0.19503,
-            "7": 0.19553,
-            "8": 0.19608,
-            "9": 0.1942,
-            "10": 0.19493,
-            "11": 0.19477,
-            "12": 0.19436,
-            "13": 0.19434,
-            "14": 0.19471,
-            "15": 0.19477,
-            "16": 0.19465,
-            "17": 0.19394,
-            "18": 0.19358,
-            "19": 0.19755,
-            "20": 0.19602,
-            "21": 0.19501,
-            "22": 0.1946,
-            "23": 0.19563,
-            "24": 0.19522,
-            "25": 0.19502,
-            "26": 0.19664,
-            "27": 0.19509,
-            "28": 0.19415,
-            "29": 0.19426,
-            "30": 0.19405,
-            "31": 0.19405,
-            "32": 0.19558,
-            "33": 0.19428,
-            "34": 0.19418,
-            "35": 0.19493,
-            "36": 0.19478,
-            "37": 0.19511,
-            "38": 0.19481,
-            "39": 0.19506,
-            "40": 0.19573,
-            "41": 0.19576,
-            "42": 0.19504,
-            "43": 0.19379,
-            "44": 0.19366,
-            "45": 0.19309,
-            "46": 0.19331,
-            "47": 0.19424,
-            "48": 0.21152,
-            "49": 0.2113,
-            "50": 0.21175,
-            "51": 0.20739,
-            "52": 0.19757,
-            "53": 0.19742,
-            "54": 0.19771,
-            "55": 0.19922,
-            "56": 0.19903,
-            "57": 0.19826,
-            "58": 0.19889,
-            "59": 0.1988,
-            "60": 0.19894,
-            "61": 0.19841,
-            "62": 0.19853,
-            "63": 0.19879,
-            "64": 0.19776,
-            "65": 0.19833,
-            "66": 0.19911,
-            "67": 0.19919,
-            "68": 0.19768,
-            "69": 0.19855,
-            "70": 0.19789,
-            "71": 0.19851,
-            "72": 0.19815,
-            "73": 0.19885,
-            "74": 0.19845,
-            "75": 0.19827,
-            "76": 0.19878,
-            "77": 0.19962,
-            "78": 0.19912,
-            "79": 0.19891,
-            "80": 0.1985,
-            "81": 0.19933,
-            "82": 0.1977,
-            "83": 0.19774,
-            "84": 0.19798,
-            "85": 0.19781,
-            "86": 0.19763,
-            "87": 0.1977,
-            "88": 0.19934,
-            "89": 0.19771,
-            "90": 0.19835,
-            "91": 0.19886,
-            "92": 0.19768,
-            "93": 0.19818,
-            "94": 0.19769,
-            "95": 0.19836,
-            "96": 0.19833,
-            "97": 0.19773,
-            "98": 0.1988,
-            "99": 0.1973,
-            "100": 0.19801
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_lts_dgx_a100.json
index 96cf765384a..59a50d2f033 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_lts_dgx_a100.json
@@ -1,537 +1 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.92228,
-            "2": 10.92833,
-            "3": 10.91713,
-            "4": 10.90497,
-            "5": 10.92805,
-            "6": 10.93671,
-            "7": 10.90403,
-            "8": 10.92231,
-            "9": 10.91252,
-            "10": 10.90851,
-            "11": 10.89338,
-            "12": 10.9208,
-            "13": 10.91494,
-            "14": 10.92144,
-            "15": 10.88435,
-            "16": 10.87458,
-            "17": 10.83918,
-            "18": 10.87307,
-            "19": 10.85328,
-            "20": 10.77494,
-            "21": 10.74759,
-            "22": 10.63148,
-            "23": 10.75623,
-            "24": 10.65568,
-            "25": 10.59221,
-            "26": 10.65325,
-            "27": 10.64881,
-            "28": 10.59654,
-            "29": 10.61012,
-            "30": 10.39283,
-            "31": 10.15719,
-            "32": 10.49221,
-            "33": 10.47943,
-            "34": 10.24015,
-            "35": 10.29711,
-            "36": 10.24562,
-            "37": 10.35281,
-            "38": 10.20533,
-            "39": 10.40423,
-            "40": 10.09553,
-            "41": 10.15279,
-            "42": 10.21879,
-            "43": 9.85526,
-            "44": 9.96244,
-            "45": 9.84617,
-            "46": 9.83798,
-            "47": 10.1388,
-            "48": 9.85687,
-            "49": 9.53746,
-            "50": 9.90877,
-            "51": 9.84971,
-            "52": 9.74156,
-            "53": 10.06324,
-            "54": 9.94584,
-            "55": 9.87735,
-            "56": 9.62744,
-            "57": 9.4726,
-            "58": 9.82907,
-            "59": 9.58298,
-            "60": 9.49182,
-            "61": 9.6996,
-            "62": 9.98091,
-            "63": 9.37212,
-            "64": 9.77558,
-            "65": 8.94327,
-            "66": 9.69991,
-            "67": 9.3641,
-            "68": 9.78706,
-            "69": 9.78396,
-            "70": 9.72291,
-            "71": 9.60749,
-            "72": 9.58417,
-            "73": 9.4909,
-            "74": 8.94863,
-            "75": 9.41807,
-            "76": 9.08721,
-            "77": 10.06284,
-            "78": 9.729,
-            "79": 9.37087,
-            "80": 9.40029,
-            "81": 9.47753,
-            "82": 9.69123,
-            "83": 9.30764,
-            "84": 9.4125,
-            "85": 9.61132,
-            "86": 9.07624,
-            "87": 9.59459,
-            "88": 9.74769,
-            "89": 9.60678,
-            "90": 9.81079,
-            "91": 9.34443,
-            "92": 9.36534,
-            "93": 9.07741,
-            "94": 8.82974,
-            "95": 9.51676,
-            "96": 9.52545,
-            "97": 9.31031,
-            "98": 9.67811,
-            "99": 8.88848,
-            "100": 9.40128
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 59.0,
-            "2": 59.0,
-            "3": 57.0,
-            "4": 55.0,
-            "5": 68.0,
-            "6": 60.0,
-            "7": 58.0,
-            "8": 63.0,
-            "9": 62.0,
-            "10": 47.0,
-            "11": 57.0,
-            "12": 48.0,
-            "13": 68.0,
-            "14": 58.0,
-            "15": 75.0,
-            "16": 67.0,
-            "17": 59.0,
-            "18": 56.0,
-            "19": 72.0,
-            "20": 52.0,
-            "21": 60.0,
-            "22": 69.0,
-            "23": 77.0,
-            "24": 58.0,
-            "25": 71.0,
-            "26": 68.0,
-            "27": 55.0,
-            "28": 94.0,
-            "29": 57.0,
-            "30": 66.0,
-            "31": 60.0,
-            "32": 64.0,
-            "33": 69.0,
-            "34": 73.0,
-            "35": 73.0,
-            "36": 72.0,
-            "37": 89.0,
-            "38": 72.0,
-            "39": 75.0,
-            "40": 82.0,
-            "41": 82.0,
-            "42": 74.0,
-            "43": 90.0,
-            "44": 69.0,
-            "45": 83.0,
-            "46": 68.0,
-            "47": 74.0,
-            "48": 95.0,
-            "49": 81.0,
-            "50": 83.0,
-            "51": 77.0,
-            "52": 100.0,
-            "53": 71.0,
-            "54": 67.0,
-            "55": 70.0,
-            "56": 83.0,
-            "57": 74.0,
-            "58": 106.0,
-            "59": 72.0,
-            "60": 98.0,
-            "61": 67.0,
-            "62": 73.0,
-            "63": 77.0,
-            "64": 94.0,
-            "65": 82.0,
-            "66": 87.0,
-            "67": 65.0,
-            "68": 78.0,
-            "69": 59.0,
-            "70": 102.0,
-            "71": 82.0,
-            "72": 60.0,
-            "73": 96.0,
-            "74": 61.0,
-            "75": 64.0,
-            "76": 70.0,
-            "77": 84.0,
-            "78": 93.0,
-            "79": 102.0,
-            "80": 71.0,
-            "81": 88.0,
-            "82": 85.0,
-            "83": 75.0,
-            "84": 69.0,
-            "85": 84.0,
-            "86": 66.0,
-            "87": 93.0,
-            "88": 96.0,
-            "89": 73.0,
-            "90": 77.0,
-            "91": 66.0,
-            "92": 86.0,
-            "93": 63.0,
-            "94": 60.0,
-            "95": 70.0,
-            "96": 65.0,
-            "97": 67.0,
-            "98": 96.0,
-            "99": 54.0,
-            "100": 77.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 516063744.0,
-            "2": 516063744.0,
-            "3": 516063744.0,
-            "4": 516063744.0,
-            "5": 516063744.0,
-            "6": 516063744.0,
-            "7": 516063744.0,
-            "8": 516063744.0,
-            "9": 516063744.0,
-            "10": 516063744.0,
-            "11": 516063744.0,
-            "12": 516063744.0,
-            "13": 516063744.0,
-            "14": 516063744.0,
-            "15": 516063744.0,
-            "16": 516063744.0,
-            "17": 516063744.0,
-            "18": 516063744.0,
-            "19": 516063744.0,
-            "20": 516063744.0,
-            "21": 516063744.0,
-            "22": 516063744.0,
-            "23": 516063744.0,
-            "24": 516063744.0,
-            "25": 516063744.0,
-            "26": 516063744.0,
-            "27": 516063744.0,
-            "28": 516063744.0,
-            "29": 516063744.0,
-            "30": 516063744.0,
-            "31": 516063744.0,
-            "32": 516063744.0,
-            "33": 516063744.0,
-            "34": 516063744.0,
-            "35": 516063744.0,
-            "36": 516063744.0,
-            "37": 516063744.0,
-            "38": 516063744.0,
-            "39": 516063744.0,
-            "40": 516063744.0,
-            "41": 516063744.0,
-            "42": 516063744.0,
-            "43": 516063744.0,
-            "44": 516063744.0,
-            "45": 516063744.0,
-            "46": 516063744.0,
-            "47": 516063744.0,
-            "48": 516063744.0,
-            "49": 516063744.0,
-            "50": 516063744.0,
-            "51": 516063744.0,
-            "52": 516063744.0,
-            "53": 516063744.0,
-            "54": 516063744.0,
-            "55": 516063744.0,
-            "56": 516063744.0,
-            "57": 516063744.0,
-            "58": 516063744.0,
-            "59": 516063744.0,
-            "60": 516063744.0,
-            "61": 516063744.0,
-            "62": 516063744.0,
-            "63": 516063744.0,
-            "64": 516063744.0,
-            "65": 516063744.0,
-            "66": 516063744.0,
-            "67": 516063744.0,
-            "68": 516063744.0,
-            "69": 516063744.0,
-            "70": 516063744.0,
-            "71": 516063744.0,
-            "72": 516063744.0,
-            "73": 516063744.0,
-            "74": 516063744.0,
-            "75": 516063744.0,
-            "76": 516063744.0,
-            "77": 516063744.0,
-            "78": 516063744.0,
-            "79": 516063744.0,
-            "80": 516063744.0,
-            "81": 516063744.0,
-            "82": 516063744.0,
-            "83": 516063744.0,
-            "84": 516063744.0,
-            "85": 516063744.0,
-            "86": 516063744.0,
-            "87": 516063744.0,
-            "88": 516063744.0,
-            "89": 516063744.0,
-            "90": 516063744.0,
-            "91": 516063744.0,
-            "92": 516063744.0,
-            "93": 516063744.0,
-            "94": 516063744.0,
-            "95": 516063744.0,
-            "96": 516063744.0,
-            "97": 516063744.0,
-            "98": 516063744.0,
-            "99": 516063744.0,
-            "100": 516063744.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1697019392.0,
-            "2": 1877092864.0,
-            "3": 1877092864.0,
-            "4": 1877092864.0,
-            "5": 1877092864.0,
-            "6": 1877092864.0,
-            "7": 1877092864.0,
-            "8": 1877092864.0,
-            "9": 1877092864.0,
-            "10": 1877092864.0,
-            "11": 1877092864.0,
-            "12": 1877092864.0,
-            "13": 1877092864.0,
-            "14": 1877092864.0,
-            "15": 1877092864.0,
-            "16": 1877092864.0,
-            "17": 1877092864.0,
-            "18": 1877092864.0,
-            "19": 1877092864.0,
-            "20": 1877092864.0,
-            "21": 1877092864.0,
-            "22": 1877092864.0,
-            "23": 1877092864.0,
-            "24": 1877092864.0,
-            "25": 1877092864.0,
-            "26": 1877092864.0,
-            "27": 1877092864.0,
-            "28": 1877092864.0,
-            "29": 1877092864.0,
-            "30": 1877092864.0,
-            "31": 1877092864.0,
-            "32": 1877092864.0,
-            "33": 1877092864.0,
-            "34": 1877092864.0,
-            "35": 1877092864.0,
-            "36": 1877092864.0,
-            "37": 1877092864.0,
-            "38": 1877092864.0,
-            "39": 1877092864.0,
-            "40": 1877092864.0,
-            "41": 1877092864.0,
-            "42": 1877092864.0,
-            "43": 1877092864.0,
-            "44": 1877092864.0,
-            "45": 1877092864.0,
-            "46": 1877092864.0,
-            "47": 1877092864.0,
-            "48": 1877092864.0,
-            "49": 1877092864.0,
-            "50": 1877092864.0,
-            "51": 1877092864.0,
-            "52": 1877092864.0,
-            "53": 1877092864.0,
-            "54": 1877092864.0,
-            "55": 1877092864.0,
-            "56": 1877092864.0,
-            "57": 1877092864.0,
-            "58": 1877092864.0,
-            "59": 1877092864.0,
-            "60": 1877092864.0,
-            "61": 1877092864.0,
-            "62": 1877092864.0,
-            "63": 1877092864.0,
-            "64": 1877092864.0,
-            "65": 1877092864.0,
-            "66": 1877092864.0,
-            "67": 1877092864.0,
-            "68": 1877092864.0,
-            "69": 1877092864.0,
-            "70": 1877092864.0,
-            "71": 1877092864.0,
-            "72": 1877092864.0,
-            "73": 1877092864.0,
-            "74": 1877092864.0,
-            "75": 1877092864.0,
-            "76": 1877092864.0,
-            "77": 1877092864.0,
-            "78": 1877092864.0,
-            "79": 1877092864.0,
-            "80": 1877092864.0,
-            "81": 1877092864.0,
-            "82": 1877092864.0,
-            "83": 1877092864.0,
-            "84": 1877092864.0,
-            "85": 1877092864.0,
-            "86": 1877092864.0,
-            "87": 1877092864.0,
-            "88": 1877092864.0,
-            "89": 1877092864.0,
-            "90": 1877092864.0,
-            "91": 1877092864.0,
-            "92": 1877092864.0,
-            "93": 1877092864.0,
-            "94": 1877092864.0,
-            "95": 1877092864.0,
-            "96": 1877092864.0,
-            "97": 1877092864.0,
-            "98": 1877092864.0,
-            "99": 1877092864.0,
-            "100": 1877092864.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 6.10882,
-            "2": 0.24563,
-            "3": 0.21507,
-            "4": 0.21225,
-            "5": 0.21165,
-            "6": 0.21127,
-            "7": 0.21406,
-            "8": 0.21402,
-            "9": 0.21175,
-            "10": 0.19518,
-            "11": 0.19565,
-            "12": 0.19461,
-            "13": 0.19428,
-            "14": 0.19385,
-            "15": 0.19329,
-            "16": 0.19311,
-            "17": 0.19391,
-            "18": 0.19383,
-            "19": 0.19364,
-            "20": 0.19408,
-            "21": 0.19327,
-            "22": 0.19729,
-            "23": 0.19599,
-            "24": 0.19601,
-            "25": 0.1965,
-            "26": 0.19683,
-            "27": 0.19626,
-            "28": 0.19667,
-            "29": 0.1989,
-            "30": 0.19644,
-            "31": 0.19728,
-            "32": 0.19614,
-            "33": 0.1973,
-            "34": 0.1971,
-            "35": 0.19674,
-            "36": 0.19628,
-            "37": 0.19578,
-            "38": 0.19629,
-            "39": 0.19673,
-            "40": 0.19712,
-            "41": 0.19593,
-            "42": 0.1969,
-            "43": 0.19639,
-            "44": 0.20378,
-            "45": 0.19737,
-            "46": 0.19738,
-            "47": 0.19532,
-            "48": 0.19579,
-            "49": 0.19617,
-            "50": 0.19695,
-            "51": 0.20318,
-            "52": 0.19428,
-            "53": 0.19415,
-            "54": 0.19663,
-            "55": 0.19266,
-            "56": 0.19426,
-            "57": 0.19455,
-            "58": 0.19473,
-            "59": 0.19413,
-            "60": 0.19467,
-            "61": 0.19511,
-            "62": 0.19475,
-            "63": 0.19464,
-            "64": 0.19452,
-            "65": 0.19445,
-            "66": 0.19395,
-            "67": 0.19423,
-            "68": 0.19431,
-            "69": 0.19512,
-            "70": 0.1941,
-            "71": 0.19453,
-            "72": 0.19467,
-            "73": 0.19615,
-            "74": 0.19355,
-            "75": 0.19419,
-            "76": 0.19407,
-            "77": 0.19455,
-            "78": 0.19511,
-            "79": 0.19498,
-            "80": 0.19577,
-            "81": 0.19399,
-            "82": 0.19362,
-            "83": 0.19425,
-            "84": 0.19418,
-            "85": 0.19432,
-            "86": 0.20057,
-            "87": 0.19522,
-            "88": 0.19447,
-            "89": 0.19472,
-            "90": 0.19377,
-            "91": 0.19433,
-            "92": 0.19432,
-            "93": 0.19456,
-            "94": 0.19394,
-            "95": 0.19417,
-            "96": 0.19476,
-            "97": 0.19423,
-            "98": 0.19401,
-            "99": 0.19403,
-            "100": 0.19364
-        }
-    }
-}
\ No newline at end of file
+{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.92705, "5": 10.92799, "10": 10.90789, "15": 10.88313, "20": 10.77626, "25": 10.59138, "30": 10.39195, "35": 10.29687, "40": 10.0964, "45": 9.84466, "50": 9.9092, "55": 9.87764, "60": 9.49121, "65": 8.94232, "70": 9.72263, "75": 9.41912, "80": 9.40079, "85": 9.61209, "90": 9.81012, "95": 9.51718, "100": 9.40153}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 68.0, "5": 64.0, "10": 61.0, "15": 58.0, "20": 64.0, "25": 58.0, "30": 85.0, "35": 66.0, "40": 85.0, "45": 84.0, "50": 85.0, "55": 69.0, "60": 67.0, "65": 65.0, "70": 92.0, "75": 51.0, "80": 98.0, "85": 86.0, "90": 83.0, "95": 72.0, "100": 73.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 488144896.0, "5": 488144896.0, "10": 488144896.0, "15": 488144896.0, "20": 488144896.0, "25": 489193472.0, "30": 488144896.0, "35": 488144896.0, "40": 488144896.0, "45": 488144896.0, "50": 488144896.0, "55": 488144896.0, "60": 488144896.0, "65": 488144896.0, "70": 489193472.0, "75": 489193472.0, "80": 489193472.0, "85": 488144896.0, "90": 488144896.0, "95": 488144896.0, "100": 489193472.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 2158389248.0, "5": 2340559872.0, "10": 2340559872.0, "15": 2340559872.0, "20": 2340559872.0, "25": 2340559872.0, "30": 2340559872.0, "35": 2340559872.0, "40": 2340559872.0, "45": 2340559872.0, "50": 2340559872.0, "55": 2340559872.0, "60": 2340559872.0, "65": 2340559872.0, "70": 2340559872.0, "75": 2340559872.0, "80": 2340559872.0, "85": 2340559872.0, "90": 2340559872.0, "95": 2340559872.0, "100": 2340559872.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 15.21811, "5": 0.22614, "10": 0.22685, "15": 0.22633, "20": 0.22582, "25": 0.22604, "30": 0.22747, "35": 0.22529, "40": 0.22441, "45": 0.22458, "50": 0.22397, "55": 0.21965, "60": 0.22066, "65": 0.22265, "70": 0.22234, "75": 0.2195, "80": 0.21889, "85": 0.21987, "90": 0.21875, "95": 0.21918, "100": 0.21913}}}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader/golden_values_lts.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader/golden_values_lts.json
deleted file mode 100644
index 6a8c9ac9b1d..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader/golden_values_lts.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.92228,
-            "2": 10.92833,
-            "3": 10.91713,
-            "4": 10.90495,
-            "5": 10.92807,
-            "6": 10.93673,
-            "7": 10.90406,
-            "8": 10.92229,
-            "9": 10.91254,
-            "10": 10.90852,
-            "11": 10.89335,
-            "12": 10.9208,
-            "13": 10.91496,
-            "14": 10.92145,
-            "15": 10.88435,
-            "16": 10.87455,
-            "17": 10.83916,
-            "18": 10.87309,
-            "19": 10.85325,
-            "20": 10.77493,
-            "21": 10.74754,
-            "22": 10.63148,
-            "23": 10.75622,
-            "24": 10.65569,
-            "25": 10.59216,
-            "26": 10.65327,
-            "27": 10.64876,
-            "28": 10.59656,
-            "29": 10.61011,
-            "30": 10.39286,
-            "31": 10.15725,
-            "32": 10.49217,
-            "33": 10.47941,
-            "34": 10.24014,
-            "35": 10.2971,
-            "36": 10.24566,
-            "37": 10.35283,
-            "38": 10.20534,
-            "39": 10.40418,
-            "40": 10.09553,
-            "41": 10.15279,
-            "42": 10.2188,
-            "43": 9.85527,
-            "44": 9.96244,
-            "45": 9.84613,
-            "46": 9.83799,
-            "47": 10.13884,
-            "48": 9.85689,
-            "49": 9.53747,
-            "50": 9.90876,
-            "51": 9.84971,
-            "52": 9.74156,
-            "53": 10.06322,
-            "54": 9.94581,
-            "55": 9.87731,
-            "56": 9.62746,
-            "57": 9.47259,
-            "58": 9.82912,
-            "59": 9.583,
-            "60": 9.49181,
-            "61": 9.69961,
-            "62": 9.98089,
-            "63": 9.37212,
-            "64": 9.7756,
-            "65": 8.9433,
-            "66": 9.69993,
-            "67": 9.36414,
-            "68": 9.78706,
-            "69": 9.78397,
-            "70": 9.72288,
-            "71": 9.60749,
-            "72": 9.58416,
-            "73": 9.49093,
-            "74": 8.94864,
-            "75": 9.41807,
-            "76": 9.08721,
-            "77": 10.06283,
-            "78": 9.729,
-            "79": 9.37091,
-            "80": 9.40033,
-            "81": 9.47754,
-            "82": 9.69121,
-            "83": 9.30762,
-            "84": 9.41252,
-            "85": 9.61132,
-            "86": 9.07621,
-            "87": 9.59459,
-            "88": 9.74768,
-            "89": 9.6068,
-            "90": 9.81078,
-            "91": 9.34441,
-            "92": 9.36535,
-            "93": 9.07743,
-            "94": 8.82975,
-            "95": 9.51676,
-            "96": 9.52546,
-            "97": 9.31031,
-            "98": 9.67812,
-            "99": 8.88848,
-            "100": 9.40128
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1681.0,
-            "2": 1822.0,
-            "3": 1757.0,
-            "4": 1751.0,
-            "5": 1937.0,
-            "6": 1886.0,
-            "7": 1837.0,
-            "8": 1786.0,
-            "9": 1935.0,
-            "10": 1542.0,
-            "11": 1896.0,
-            "12": 1808.0,
-            "13": 1960.0,
-            "14": 1846.0,
-            "15": 1948.0,
-            "16": 1903.0,
-            "17": 1900.0,
-            "18": 1725.0,
-            "19": 1812.0,
-            "20": 1647.0,
-            "21": 1886.0,
-            "22": 1675.0,
-            "23": 1992.0,
-            "24": 1621.0,
-            "25": 1624.0,
-            "26": 1778.0,
-            "27": 1925.0,
-            "28": 1996.0,
-            "29": 2074.0,
-            "30": 1899.0,
-            "31": 1539.0,
-            "32": 1956.0,
-            "33": 2254.0,
-            "34": 1927.0,
-            "35": 2029.0,
-            "36": 1986.0,
-            "37": 2377.0,
-            "38": 2234.0,
-            "39": 2396.0,
-            "40": 2123.0,
-            "41": 2316.0,
-            "42": 2245.0,
-            "43": 2077.0,
-            "44": 2179.0,
-            "45": 2078.0,
-            "46": 2280.0,
-            "47": 2573.0,
-            "48": 2440.0,
-            "49": 2213.0,
-            "50": 2532.0,
-            "51": 2735.0,
-            "52": 2607.0,
-            "53": 2951.0,
-            "54": 2672.0,
-            "55": 2451.0,
-            "56": 2712.0,
-            "57": 2392.0,
-            "58": 2979.0,
-            "59": 2869.0,
-            "60": 2435.0,
-            "61": 2938.0,
-            "62": 2669.0,
-            "63": 2392.0,
-            "64": 2998.0,
-            "65": 2689.0,
-            "66": 3285.0,
-            "67": 2782.0,
-            "68": 2753.0,
-            "69": 2958.0,
-            "70": 3271.0,
-            "71": 3040.0,
-            "72": 2504.0,
-            "73": 3096.0,
-            "74": 1910.0,
-            "75": 2617.0,
-            "76": 3081.0,
-            "77": 3390.0,
-            "78": 3186.0,
-            "79": 3320.0,
-            "80": 3483.0,
-            "81": 3782.0,
-            "82": 3516.0,
-            "83": 2864.0,
-            "84": 3396.0,
-            "85": 3247.0,
-            "86": 2785.0,
-            "87": 3762.0,
-            "88": 3102.0,
-            "89": 3483.0,
-            "90": 3076.0,
-            "91": 2643.0,
-            "92": 3198.0,
-            "93": 2666.0,
-            "94": 3390.0,
-            "95": 3410.0,
-            "96": 3508.0,
-            "97": 3178.0,
-            "98": 3865.0,
-            "99": 3143.0,
-            "100": 3357.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 482499072.0,
-            "2": 482499072.0,
-            "3": 482499072.0,
-            "4": 482499072.0,
-            "5": 482499072.0,
-            "6": 482499072.0,
-            "7": 482499072.0,
-            "8": 482499072.0,
-            "9": 482499072.0,
-            "10": 482499072.0,
-            "11": 482499072.0,
-            "12": 482499072.0,
-            "13": 482499072.0,
-            "14": 482499072.0,
-            "15": 482499072.0,
-            "16": 482499072.0,
-            "17": 482499072.0,
-            "18": 482499072.0,
-            "19": 482499072.0,
-            "20": 482499072.0,
-            "21": 482499072.0,
-            "22": 482499072.0,
-            "23": 482499072.0,
-            "24": 482499072.0,
-            "25": 482499072.0,
-            "26": 482499072.0,
-            "27": 482499072.0,
-            "28": 482499072.0,
-            "29": 482499072.0,
-            "30": 482499072.0,
-            "31": 482499072.0,
-            "32": 482499072.0,
-            "33": 482499072.0,
-            "34": 482499072.0,
-            "35": 482499072.0,
-            "36": 482499072.0,
-            "37": 482499072.0,
-            "38": 482499072.0,
-            "39": 482499072.0,
-            "40": 482499072.0,
-            "41": 482499072.0,
-            "42": 482499072.0,
-            "43": 482499072.0,
-            "44": 482499072.0,
-            "45": 482499072.0,
-            "46": 482499072.0,
-            "47": 482499072.0,
-            "48": 482499072.0,
-            "49": 482499072.0,
-            "50": 482499072.0,
-            "51": 482499072.0,
-            "52": 482499072.0,
-            "53": 482499072.0,
-            "54": 482499072.0,
-            "55": 482499072.0,
-            "56": 482499072.0,
-            "57": 482499072.0,
-            "58": 482499072.0,
-            "59": 482499072.0,
-            "60": 482499072.0,
-            "61": 482499072.0,
-            "62": 482499072.0,
-            "63": 482499072.0,
-            "64": 482499072.0,
-            "65": 482499072.0,
-            "66": 482499072.0,
-            "67": 482499072.0,
-            "68": 482499072.0,
-            "69": 482499072.0,
-            "70": 482499072.0,
-            "71": 482499072.0,
-            "72": 482499072.0,
-            "73": 482499072.0,
-            "74": 482499072.0,
-            "75": 482499072.0,
-            "76": 482499072.0,
-            "77": 482499072.0,
-            "78": 482499072.0,
-            "79": 482499072.0,
-            "80": 482499072.0,
-            "81": 482499072.0,
-            "82": 482499072.0,
-            "83": 482499072.0,
-            "84": 482499072.0,
-            "85": 482499072.0,
-            "86": 482499072.0,
-            "87": 482499072.0,
-            "88": 482499072.0,
-            "89": 482499072.0,
-            "90": 482499072.0,
-            "91": 482499072.0,
-            "92": 482499072.0,
-            "93": 482499072.0,
-            "94": 482499072.0,
-            "95": 482499072.0,
-            "96": 482499072.0,
-            "97": 482499072.0,
-            "98": 482499072.0,
-            "99": 482499072.0,
-            "100": 482499072.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1222004224.0,
-            "2": 1401029120.0,
-            "3": 1401029120.0,
-            "4": 1401029120.0,
-            "5": 1401029120.0,
-            "6": 1401029120.0,
-            "7": 1401029120.0,
-            "8": 1401029120.0,
-            "9": 1401029120.0,
-            "10": 1401029120.0,
-            "11": 1401029120.0,
-            "12": 1401029120.0,
-            "13": 1401029120.0,
-            "14": 1401029120.0,
-            "15": 1401029120.0,
-            "16": 1401029120.0,
-            "17": 1401029120.0,
-            "18": 1401029120.0,
-            "19": 1401029120.0,
-            "20": 1401029120.0,
-            "21": 1401029120.0,
-            "22": 1401029120.0,
-            "23": 1401029120.0,
-            "24": 1401029120.0,
-            "25": 1401029120.0,
-            "26": 1401029120.0,
-            "27": 1401029120.0,
-            "28": 1401029120.0,
-            "29": 1401029120.0,
-            "30": 1401029120.0,
-            "31": 1401029120.0,
-            "32": 1401029120.0,
-            "33": 1401029120.0,
-            "34": 1401029120.0,
-            "35": 1401029120.0,
-            "36": 1401029120.0,
-            "37": 1401029120.0,
-            "38": 1401029120.0,
-            "39": 1401029120.0,
-            "40": 1401029120.0,
-            "41": 1401029120.0,
-            "42": 1401029120.0,
-            "43": 1401029120.0,
-            "44": 1401029120.0,
-            "45": 1401029120.0,
-            "46": 1401029120.0,
-            "47": 1401029120.0,
-            "48": 1401029120.0,
-            "49": 1401029120.0,
-            "50": 1401029120.0,
-            "51": 1401029120.0,
-            "52": 1401029120.0,
-            "53": 1401029120.0,
-            "54": 1401029120.0,
-            "55": 1401029120.0,
-            "56": 1401029120.0,
-            "57": 1401029120.0,
-            "58": 1401029120.0,
-            "59": 1401029120.0,
-            "60": 1401029120.0,
-            "61": 1401029120.0,
-            "62": 1401029120.0,
-            "63": 1401029120.0,
-            "64": 1401029120.0,
-            "65": 1401029120.0,
-            "66": 1401029120.0,
-            "67": 1401029120.0,
-            "68": 1401029120.0,
-            "69": 1401029120.0,
-            "70": 1401029120.0,
-            "71": 1401029120.0,
-            "72": 1401029120.0,
-            "73": 1401029120.0,
-            "74": 1401029120.0,
-            "75": 1401029120.0,
-            "76": 1401029120.0,
-            "77": 1401029120.0,
-            "78": 1401029120.0,
-            "79": 1401029120.0,
-            "80": 1401029120.0,
-            "81": 1401029120.0,
-            "82": 1401029120.0,
-            "83": 1401029120.0,
-            "84": 1401029120.0,
-            "85": 1401029120.0,
-            "86": 1401029120.0,
-            "87": 1401029120.0,
-            "88": 1401029120.0,
-            "89": 1401029120.0,
-            "90": 1401029120.0,
-            "91": 1401029120.0,
-            "92": 1401029120.0,
-            "93": 1401029120.0,
-            "94": 1401029120.0,
-            "95": 1401029120.0,
-            "96": 1401029120.0,
-            "97": 1401029120.0,
-            "98": 1401029120.0,
-            "99": 1401029120.0,
-            "100": 1401029120.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 8.51486,
-            "2": 0.24464,
-            "3": 0.21825,
-            "4": 0.21579,
-            "5": 0.21661,
-            "6": 0.22001,
-            "7": 0.22285,
-            "8": 0.21811,
-            "9": 0.21904,
-            "10": 0.22002,
-            "11": 0.21823,
-            "12": 0.21823,
-            "13": 0.21801,
-            "14": 0.2183,
-            "15": 0.21803,
-            "16": 0.21896,
-            "17": 0.22491,
-            "18": 0.21905,
-            "19": 0.21958,
-            "20": 0.21884,
-            "21": 0.21909,
-            "22": 0.21942,
-            "23": 0.21797,
-            "24": 0.21855,
-            "25": 0.21901,
-            "26": 0.21791,
-            "27": 0.2177,
-            "28": 0.2177,
-            "29": 0.21821,
-            "30": 0.21868,
-            "31": 0.21834,
-            "32": 0.22204,
-            "33": 0.22091,
-            "34": 0.21934,
-            "35": 0.21783,
-            "36": 0.21796,
-            "37": 0.21775,
-            "38": 0.22135,
-            "39": 0.22172,
-            "40": 0.21756,
-            "41": 0.2177,
-            "42": 0.21859,
-            "43": 0.21823,
-            "44": 0.21755,
-            "45": 0.21785,
-            "46": 0.21785,
-            "47": 0.21753,
-            "48": 0.21811,
-            "49": 0.21778,
-            "50": 0.218,
-            "51": 0.22344,
-            "52": 0.21422,
-            "53": 0.21383,
-            "54": 0.21399,
-            "55": 0.21397,
-            "56": 0.21365,
-            "57": 0.21387,
-            "58": 0.20817,
-            "59": 0.20847,
-            "60": 0.20906,
-            "61": 0.20895,
-            "62": 0.20893,
-            "63": 0.20812,
-            "64": 0.20751,
-            "65": 0.20797,
-            "66": 0.2078,
-            "67": 0.20815,
-            "68": 0.20705,
-            "69": 0.20775,
-            "70": 0.20774,
-            "71": 0.2069,
-            "72": 0.20722,
-            "73": 0.20718,
-            "74": 0.2072,
-            "75": 0.20697,
-            "76": 0.20716,
-            "77": 0.20758,
-            "78": 0.20779,
-            "79": 0.20721,
-            "80": 0.20748,
-            "81": 0.20797,
-            "82": 0.20844,
-            "83": 0.20732,
-            "84": 0.20744,
-            "85": 0.20706,
-            "86": 0.20715,
-            "87": 0.20731,
-            "88": 0.20708,
-            "89": 0.20778,
-            "90": 0.20769,
-            "91": 0.2074,
-            "92": 0.20797,
-            "93": 0.20707,
-            "94": 0.2072,
-            "95": 0.20676,
-            "96": 0.20732,
-            "97": 0.20728,
-            "98": 0.20675,
-            "99": 0.2068,
-            "100": 0.20724
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader/golden_values_lts_dgx_a100.json
index d6134cdcc5a..339f9b5b061 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader/golden_values_lts_dgx_a100.json
@@ -1,537 +1 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.92228,
-            "2": 10.92833,
-            "3": 10.91713,
-            "4": 10.90495,
-            "5": 10.92807,
-            "6": 10.93673,
-            "7": 10.90406,
-            "8": 10.92229,
-            "9": 10.91254,
-            "10": 10.90852,
-            "11": 10.89335,
-            "12": 10.9208,
-            "13": 10.91496,
-            "14": 10.92145,
-            "15": 10.88435,
-            "16": 10.87455,
-            "17": 10.83916,
-            "18": 10.87309,
-            "19": 10.85325,
-            "20": 10.77493,
-            "21": 10.74754,
-            "22": 10.63148,
-            "23": 10.75622,
-            "24": 10.65569,
-            "25": 10.59216,
-            "26": 10.65327,
-            "27": 10.64876,
-            "28": 10.59656,
-            "29": 10.61011,
-            "30": 10.39286,
-            "31": 10.15725,
-            "32": 10.49217,
-            "33": 10.47941,
-            "34": 10.24014,
-            "35": 10.2971,
-            "36": 10.24566,
-            "37": 10.35283,
-            "38": 10.20534,
-            "39": 10.40418,
-            "40": 10.09553,
-            "41": 10.15279,
-            "42": 10.2188,
-            "43": 9.85527,
-            "44": 9.96244,
-            "45": 9.84613,
-            "46": 9.83799,
-            "47": 10.13884,
-            "48": 9.85689,
-            "49": 9.53747,
-            "50": 9.90876,
-            "51": 9.84971,
-            "52": 9.74156,
-            "53": 10.06322,
-            "54": 9.94581,
-            "55": 9.87731,
-            "56": 9.62746,
-            "57": 9.47259,
-            "58": 9.82912,
-            "59": 9.583,
-            "60": 9.49181,
-            "61": 9.69961,
-            "62": 9.98089,
-            "63": 9.37212,
-            "64": 9.7756,
-            "65": 8.9433,
-            "66": 9.69993,
-            "67": 9.36414,
-            "68": 9.78706,
-            "69": 9.78397,
-            "70": 9.72288,
-            "71": 9.60749,
-            "72": 9.58416,
-            "73": 9.49093,
-            "74": 8.94864,
-            "75": 9.41807,
-            "76": 9.08721,
-            "77": 10.06283,
-            "78": 9.729,
-            "79": 9.37091,
-            "80": 9.40033,
-            "81": 9.47754,
-            "82": 9.69121,
-            "83": 9.30762,
-            "84": 9.41252,
-            "85": 9.61132,
-            "86": 9.07621,
-            "87": 9.59459,
-            "88": 9.74768,
-            "89": 9.6068,
-            "90": 9.81078,
-            "91": 9.34441,
-            "92": 9.36535,
-            "93": 9.07743,
-            "94": 8.82975,
-            "95": 9.51676,
-            "96": 9.52546,
-            "97": 9.31031,
-            "98": 9.67812,
-            "99": 8.88848,
-            "100": 9.40128
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1681.0,
-            "2": 1822.0,
-            "3": 1757.0,
-            "4": 1751.0,
-            "5": 1937.0,
-            "6": 1886.0,
-            "7": 1837.0,
-            "8": 1786.0,
-            "9": 1935.0,
-            "10": 1542.0,
-            "11": 1896.0,
-            "12": 1808.0,
-            "13": 1960.0,
-            "14": 1846.0,
-            "15": 1948.0,
-            "16": 1903.0,
-            "17": 1900.0,
-            "18": 1725.0,
-            "19": 1812.0,
-            "20": 1647.0,
-            "21": 1886.0,
-            "22": 1675.0,
-            "23": 1992.0,
-            "24": 1621.0,
-            "25": 1624.0,
-            "26": 1778.0,
-            "27": 1925.0,
-            "28": 1996.0,
-            "29": 2074.0,
-            "30": 1899.0,
-            "31": 1539.0,
-            "32": 1956.0,
-            "33": 2254.0,
-            "34": 1927.0,
-            "35": 2029.0,
-            "36": 1986.0,
-            "37": 2377.0,
-            "38": 2234.0,
-            "39": 2396.0,
-            "40": 2123.0,
-            "41": 2316.0,
-            "42": 2245.0,
-            "43": 2077.0,
-            "44": 2179.0,
-            "45": 2078.0,
-            "46": 2280.0,
-            "47": 2573.0,
-            "48": 2440.0,
-            "49": 2213.0,
-            "50": 2532.0,
-            "51": 2735.0,
-            "52": 2607.0,
-            "53": 2951.0,
-            "54": 2672.0,
-            "55": 2451.0,
-            "56": 2712.0,
-            "57": 2392.0,
-            "58": 2979.0,
-            "59": 2869.0,
-            "60": 2435.0,
-            "61": 2938.0,
-            "62": 2669.0,
-            "63": 2392.0,
-            "64": 2998.0,
-            "65": 2689.0,
-            "66": 3285.0,
-            "67": 2782.0,
-            "68": 2753.0,
-            "69": 2958.0,
-            "70": 3271.0,
-            "71": 3040.0,
-            "72": 2504.0,
-            "73": 3096.0,
-            "74": 1910.0,
-            "75": 2617.0,
-            "76": 3081.0,
-            "77": 3390.0,
-            "78": 3186.0,
-            "79": 3320.0,
-            "80": 3483.0,
-            "81": 3782.0,
-            "82": 3516.0,
-            "83": 2864.0,
-            "84": 3396.0,
-            "85": 3247.0,
-            "86": 2785.0,
-            "87": 3762.0,
-            "88": 3102.0,
-            "89": 3483.0,
-            "90": 3076.0,
-            "91": 2643.0,
-            "92": 3198.0,
-            "93": 2666.0,
-            "94": 3390.0,
-            "95": 3410.0,
-            "96": 3508.0,
-            "97": 3178.0,
-            "98": 3865.0,
-            "99": 3143.0,
-            "100": 3357.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 482499072.0,
-            "2": 482499072.0,
-            "3": 482499072.0,
-            "4": 482499072.0,
-            "5": 482499072.0,
-            "6": 482499072.0,
-            "7": 482499072.0,
-            "8": 482499072.0,
-            "9": 482499072.0,
-            "10": 482499072.0,
-            "11": 482499072.0,
-            "12": 482499072.0,
-            "13": 482499072.0,
-            "14": 482499072.0,
-            "15": 482499072.0,
-            "16": 482499072.0,
-            "17": 482499072.0,
-            "18": 482499072.0,
-            "19": 482499072.0,
-            "20": 482499072.0,
-            "21": 482499072.0,
-            "22": 482499072.0,
-            "23": 482499072.0,
-            "24": 482499072.0,
-            "25": 482499072.0,
-            "26": 482499072.0,
-            "27": 482499072.0,
-            "28": 482499072.0,
-            "29": 482499072.0,
-            "30": 482499072.0,
-            "31": 482499072.0,
-            "32": 482499072.0,
-            "33": 482499072.0,
-            "34": 482499072.0,
-            "35": 482499072.0,
-            "36": 482499072.0,
-            "37": 482499072.0,
-            "38": 482499072.0,
-            "39": 482499072.0,
-            "40": 482499072.0,
-            "41": 482499072.0,
-            "42": 482499072.0,
-            "43": 482499072.0,
-            "44": 482499072.0,
-            "45": 482499072.0,
-            "46": 482499072.0,
-            "47": 482499072.0,
-            "48": 482499072.0,
-            "49": 482499072.0,
-            "50": 482499072.0,
-            "51": 482499072.0,
-            "52": 482499072.0,
-            "53": 482499072.0,
-            "54": 482499072.0,
-            "55": 482499072.0,
-            "56": 482499072.0,
-            "57": 482499072.0,
-            "58": 482499072.0,
-            "59": 482499072.0,
-            "60": 482499072.0,
-            "61": 482499072.0,
-            "62": 482499072.0,
-            "63": 482499072.0,
-            "64": 482499072.0,
-            "65": 482499072.0,
-            "66": 482499072.0,
-            "67": 482499072.0,
-            "68": 482499072.0,
-            "69": 482499072.0,
-            "70": 482499072.0,
-            "71": 482499072.0,
-            "72": 482499072.0,
-            "73": 482499072.0,
-            "74": 482499072.0,
-            "75": 482499072.0,
-            "76": 482499072.0,
-            "77": 482499072.0,
-            "78": 482499072.0,
-            "79": 482499072.0,
-            "80": 482499072.0,
-            "81": 482499072.0,
-            "82": 482499072.0,
-            "83": 482499072.0,
-            "84": 482499072.0,
-            "85": 482499072.0,
-            "86": 482499072.0,
-            "87": 482499072.0,
-            "88": 482499072.0,
-            "89": 482499072.0,
-            "90": 482499072.0,
-            "91": 482499072.0,
-            "92": 482499072.0,
-            "93": 482499072.0,
-            "94": 482499072.0,
-            "95": 482499072.0,
-            "96": 482499072.0,
-            "97": 482499072.0,
-            "98": 482499072.0,
-            "99": 482499072.0,
-            "100": 482499072.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1222004224.0,
-            "2": 1401029120.0,
-            "3": 1401029120.0,
-            "4": 1401029120.0,
-            "5": 1401029120.0,
-            "6": 1401029120.0,
-            "7": 1401029120.0,
-            "8": 1401029120.0,
-            "9": 1401029120.0,
-            "10": 1401029120.0,
-            "11": 1401029120.0,
-            "12": 1401029120.0,
-            "13": 1401029120.0,
-            "14": 1401029120.0,
-            "15": 1401029120.0,
-            "16": 1401029120.0,
-            "17": 1401029120.0,
-            "18": 1401029120.0,
-            "19": 1401029120.0,
-            "20": 1401029120.0,
-            "21": 1401029120.0,
-            "22": 1401029120.0,
-            "23": 1401029120.0,
-            "24": 1401029120.0,
-            "25": 1401029120.0,
-            "26": 1401029120.0,
-            "27": 1401029120.0,
-            "28": 1401029120.0,
-            "29": 1401029120.0,
-            "30": 1401029120.0,
-            "31": 1401029120.0,
-            "32": 1401029120.0,
-            "33": 1401029120.0,
-            "34": 1401029120.0,
-            "35": 1401029120.0,
-            "36": 1401029120.0,
-            "37": 1401029120.0,
-            "38": 1401029120.0,
-            "39": 1401029120.0,
-            "40": 1401029120.0,
-            "41": 1401029120.0,
-            "42": 1401029120.0,
-            "43": 1401029120.0,
-            "44": 1401029120.0,
-            "45": 1401029120.0,
-            "46": 1401029120.0,
-            "47": 1401029120.0,
-            "48": 1401029120.0,
-            "49": 1401029120.0,
-            "50": 1401029120.0,
-            "51": 1401029120.0,
-            "52": 1401029120.0,
-            "53": 1401029120.0,
-            "54": 1401029120.0,
-            "55": 1401029120.0,
-            "56": 1401029120.0,
-            "57": 1401029120.0,
-            "58": 1401029120.0,
-            "59": 1401029120.0,
-            "60": 1401029120.0,
-            "61": 1401029120.0,
-            "62": 1401029120.0,
-            "63": 1401029120.0,
-            "64": 1401029120.0,
-            "65": 1401029120.0,
-            "66": 1401029120.0,
-            "67": 1401029120.0,
-            "68": 1401029120.0,
-            "69": 1401029120.0,
-            "70": 1401029120.0,
-            "71": 1401029120.0,
-            "72": 1401029120.0,
-            "73": 1401029120.0,
-            "74": 1401029120.0,
-            "75": 1401029120.0,
-            "76": 1401029120.0,
-            "77": 1401029120.0,
-            "78": 1401029120.0,
-            "79": 1401029120.0,
-            "80": 1401029120.0,
-            "81": 1401029120.0,
-            "82": 1401029120.0,
-            "83": 1401029120.0,
-            "84": 1401029120.0,
-            "85": 1401029120.0,
-            "86": 1401029120.0,
-            "87": 1401029120.0,
-            "88": 1401029120.0,
-            "89": 1401029120.0,
-            "90": 1401029120.0,
-            "91": 1401029120.0,
-            "92": 1401029120.0,
-            "93": 1401029120.0,
-            "94": 1401029120.0,
-            "95": 1401029120.0,
-            "96": 1401029120.0,
-            "97": 1401029120.0,
-            "98": 1401029120.0,
-            "99": 1401029120.0,
-            "100": 1401029120.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 8.20377,
-            "2": 0.2288,
-            "3": 0.19616,
-            "4": 0.19587,
-            "5": 0.19737,
-            "6": 0.19775,
-            "7": 0.19658,
-            "8": 0.19621,
-            "9": 0.19557,
-            "10": 0.19534,
-            "11": 0.19453,
-            "12": 0.1949,
-            "13": 0.19522,
-            "14": 0.19865,
-            "15": 0.20415,
-            "16": 0.19686,
-            "17": 0.1985,
-            "18": 0.19858,
-            "19": 0.19709,
-            "20": 0.19609,
-            "21": 0.19758,
-            "22": 0.19837,
-            "23": 0.19786,
-            "24": 0.19688,
-            "25": 0.1972,
-            "26": 0.19859,
-            "27": 0.19814,
-            "28": 0.1989,
-            "29": 0.1984,
-            "30": 0.19783,
-            "31": 0.19727,
-            "32": 0.19754,
-            "33": 0.19648,
-            "34": 0.19977,
-            "35": 0.19847,
-            "36": 0.19696,
-            "37": 0.20498,
-            "38": 0.20415,
-            "39": 0.20225,
-            "40": 0.19712,
-            "41": 0.19751,
-            "42": 0.19764,
-            "43": 0.19738,
-            "44": 0.19703,
-            "45": 0.19703,
-            "46": 0.19814,
-            "47": 0.19757,
-            "48": 0.19759,
-            "49": 0.19688,
-            "50": 0.20181,
-            "51": 0.22215,
-            "52": 0.2134,
-            "53": 0.2129,
-            "54": 0.2133,
-            "55": 0.21255,
-            "56": 0.21221,
-            "57": 0.21233,
-            "58": 0.2124,
-            "59": 0.21242,
-            "60": 0.21258,
-            "61": 0.21219,
-            "62": 0.21255,
-            "63": 0.21385,
-            "64": 0.2127,
-            "65": 0.21252,
-            "66": 0.21191,
-            "67": 0.21327,
-            "68": 0.21176,
-            "69": 0.2127,
-            "70": 0.21284,
-            "71": 0.21291,
-            "72": 0.21265,
-            "73": 0.21221,
-            "74": 0.21387,
-            "75": 0.21247,
-            "76": 0.21204,
-            "77": 0.21169,
-            "78": 0.21259,
-            "79": 0.21196,
-            "80": 0.21204,
-            "81": 0.21211,
-            "82": 0.21314,
-            "83": 0.21268,
-            "84": 0.21291,
-            "85": 0.21328,
-            "86": 0.2128,
-            "87": 0.21213,
-            "88": 0.21192,
-            "89": 0.21242,
-            "90": 0.21253,
-            "91": 0.21252,
-            "92": 0.21236,
-            "93": 0.21254,
-            "94": 0.21255,
-            "95": 0.21209,
-            "96": 0.21345,
-            "97": 0.21202,
-            "98": 0.21234,
-            "99": 0.21237,
-            "100": 0.21317
-        }
-    }
-}
\ No newline at end of file
+{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.92705, "5": 10.92795, "10": 10.90786, "15": 10.88314, "20": 10.77629, "25": 10.59141, "30": 10.39192, "35": 10.29686, "40": 10.0964, "45": 9.84464, "50": 9.90918, "55": 9.87762, "60": 9.49121, "65": 8.94236, "70": 9.72266, "75": 9.41912, "80": 9.40077, "85": 9.61207, "90": 9.81015, "95": 9.51716, "100": 9.4015}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1627.0, "5": 2010.0, "10": 1368.0, "15": 1897.0, "20": 1626.0, "25": 1743.0, "30": 1930.0, "35": 1954.0, "40": 2199.0, "45": 2068.0, "50": 2460.0, "55": 2427.0, "60": 2380.0, "65": 2657.0, "70": 3265.0, "75": 2675.0, "80": 3434.0, "85": 3302.0, "90": 3230.0, "95": 3340.0, "100": 3291.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 486047744.0, "5": 486047744.0, "10": 486047744.0, "15": 486047744.0, "20": 486047744.0, "25": 486047744.0, "30": 486047744.0, "35": 486047744.0, "40": 486047744.0, "45": 486047744.0, "50": 486047744.0, "55": 486047744.0, "60": 486047744.0, "65": 486047744.0, "70": 486047744.0, "75": 486047744.0, "80": 486047744.0, "85": 486047744.0, "90": 486047744.0, "95": 486047744.0, "100": 486047744.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1715890176.0, "5": 1895963648.0, "10": 1895963648.0, "15": 1895963648.0, "20": 1895963648.0, "25": 1895963648.0, "30": 1895963648.0, "35": 1895963648.0, "40": 1895963648.0, "45": 1895963648.0, "50": 1895963648.0, "55": 1895963648.0, "60": 1895963648.0, "65": 1895963648.0, "70": 1895963648.0, "75": 1895963648.0, "80": 1895963648.0, "85": 1895963648.0, "90": 1895963648.0, "95": 1895963648.0, "100": 1895963648.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 12.92657, "5": 0.21658, "10": 0.20915, "15": 0.2079, "20": 0.21595, "25": 0.21334, "30": 0.21734, "35": 0.21646, "40": 0.21045, "45": 0.20732, "50": 0.2058, "55": 0.20444, "60": 0.20486, "65": 0.20503, "70": 0.20714, "75": 0.20517, "80": 0.2067, "85": 0.20664, "90": 0.20761, "95": 0.20379, "100": 0.20501}}}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts.json
deleted file mode 100644
index b276f849ced..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.85961,
-            "2": 10.85774,
-            "3": 10.85857,
-            "4": 10.84879,
-            "5": 10.88247,
-            "6": 10.88701,
-            "7": 10.86152,
-            "8": 10.86792,
-            "9": 10.86512,
-            "10": 10.83742,
-            "11": 10.87758,
-            "12": 10.87232,
-            "13": 10.87849,
-            "14": 10.88848,
-            "15": 10.82866,
-            "16": 10.8307,
-            "17": 10.79817,
-            "18": 10.82417,
-            "19": 10.81181,
-            "20": 10.72691,
-            "21": 10.7183,
-            "22": 10.57059,
-            "23": 10.72817,
-            "24": 10.61403,
-            "25": 10.55842,
-            "26": 10.60676,
-            "27": 10.61661,
-            "28": 10.57482,
-            "29": 10.59587,
-            "30": 10.37815,
-            "31": 10.13027,
-            "32": 10.47778,
-            "33": 10.46854,
-            "34": 10.22658,
-            "35": 10.27984,
-            "36": 10.22842,
-            "37": 10.35309,
-            "38": 10.20319,
-            "39": 10.4096,
-            "40": 10.0888,
-            "41": 10.15947,
-            "42": 10.2147,
-            "43": 9.84971,
-            "44": 9.96501,
-            "45": 9.82545,
-            "46": 9.83484,
-            "47": 10.15515,
-            "48": 9.84462,
-            "49": 9.52863,
-            "50": 9.91253
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1816.0,
-            "2": 1884.0,
-            "3": 1906.0,
-            "4": 1901.0,
-            "5": 2061.0,
-            "6": 2186.0,
-            "7": 2145.0,
-            "8": 1775.0,
-            "9": 2106.0,
-            "10": 1630.0,
-            "11": 2236.0,
-            "12": 2015.0,
-            "13": 2168.0,
-            "14": 2021.0,
-            "15": 2088.0,
-            "16": 2152.0,
-            "17": 1976.0,
-            "18": 1952.0,
-            "19": 2046.0,
-            "20": 1740.0,
-            "21": 2073.0,
-            "22": 1810.0,
-            "23": 2186.0,
-            "24": 1839.0,
-            "25": 1681.0,
-            "26": 1962.0,
-            "27": 2009.0,
-            "28": 2204.0,
-            "29": 2264.0,
-            "30": 2117.0,
-            "31": 1873.0,
-            "32": 2178.0,
-            "33": 2372.0,
-            "34": 2127.0,
-            "35": 2225.0,
-            "36": 2140.0,
-            "37": 2649.0,
-            "38": 2602.0,
-            "39": 2607.0,
-            "40": 2574.0,
-            "41": 2503.0,
-            "42": 2532.0,
-            "43": 2285.0,
-            "44": 2313.0,
-            "45": 2423.0,
-            "46": 2563.0,
-            "47": 2856.0,
-            "48": 2629.0,
-            "49": 2566.0,
-            "50": 2651.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 269842944.0,
-            "2": 269842944.0,
-            "3": 269842944.0,
-            "4": 269842944.0,
-            "5": 269842944.0,
-            "6": 269842944.0,
-            "7": 269842944.0,
-            "8": 269842944.0,
-            "9": 269842944.0,
-            "10": 269842944.0,
-            "11": 269842944.0,
-            "12": 269842944.0,
-            "13": 269842944.0,
-            "14": 269842944.0,
-            "15": 269842944.0,
-            "16": 269842944.0,
-            "17": 269842944.0,
-            "18": 269842944.0,
-            "19": 269842944.0,
-            "20": 269842944.0,
-            "21": 269842944.0,
-            "22": 269842944.0,
-            "23": 269842944.0,
-            "24": 269842944.0,
-            "25": 269842944.0,
-            "26": 269842944.0,
-            "27": 269842944.0,
-            "28": 269842944.0,
-            "29": 269842944.0,
-            "30": 269842944.0,
-            "31": 269842944.0,
-            "32": 269842944.0,
-            "33": 269842944.0,
-            "34": 269842944.0,
-            "35": 269842944.0,
-            "36": 269842944.0,
-            "37": 269842944.0,
-            "38": 269842944.0,
-            "39": 269842944.0,
-            "40": 269842944.0,
-            "41": 269842944.0,
-            "42": 269842944.0,
-            "43": 269842944.0,
-            "44": 269842944.0,
-            "45": 269842944.0,
-            "46": 269842944.0,
-            "47": 269842944.0,
-            "48": 269842944.0,
-            "49": 269842944.0,
-            "50": 269842944.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1449634304.0,
-            "2": 1516021248.0,
-            "3": 1516021248.0,
-            "4": 1516021248.0,
-            "5": 1516021248.0,
-            "6": 1516021248.0,
-            "7": 1516021248.0,
-            "8": 1516021248.0,
-            "9": 1516021248.0,
-            "10": 1516021248.0,
-            "11": 1516021248.0,
-            "12": 1516021248.0,
-            "13": 1516021248.0,
-            "14": 1516021248.0,
-            "15": 1516021248.0,
-            "16": 1516021248.0,
-            "17": 1516021248.0,
-            "18": 1516021248.0,
-            "19": 1516021248.0,
-            "20": 1516021248.0,
-            "21": 1516021248.0,
-            "22": 1516021248.0,
-            "23": 1516021248.0,
-            "24": 1516021248.0,
-            "25": 1516021248.0,
-            "26": 1516021248.0,
-            "27": 1516021248.0,
-            "28": 1516021248.0,
-            "29": 1516021248.0,
-            "30": 1516021248.0,
-            "31": 1516021248.0,
-            "32": 1516021248.0,
-            "33": 1516021248.0,
-            "34": 1516021248.0,
-            "35": 1516021248.0,
-            "36": 1516021248.0,
-            "37": 1516021248.0,
-            "38": 1516021248.0,
-            "39": 1516021248.0,
-            "40": 1516021248.0,
-            "41": 1516021248.0,
-            "42": 1516021248.0,
-            "43": 1516021248.0,
-            "44": 1516021248.0,
-            "45": 1516021248.0,
-            "46": 1516021248.0,
-            "47": 1516021248.0,
-            "48": 1516021248.0,
-            "49": 1516021248.0,
-            "50": 1516021248.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 4.56269,
-            "2": 0.39732,
-            "3": 0.33618,
-            "4": 0.33333,
-            "5": 0.33363,
-            "6": 0.33477,
-            "7": 0.33191,
-            "8": 0.33146,
-            "9": 0.33302,
-            "10": 0.33134,
-            "11": 0.33151,
-            "12": 0.33246,
-            "13": 0.33088,
-            "14": 0.3303,
-            "15": 0.33382,
-            "16": 0.33069,
-            "17": 0.33094,
-            "18": 0.33118,
-            "19": 0.33644,
-            "20": 0.3305,
-            "21": 0.33088,
-            "22": 0.33128,
-            "23": 0.33088,
-            "24": 0.33099,
-            "25": 0.33033,
-            "26": 0.33052,
-            "27": 0.33152,
-            "28": 0.33036,
-            "29": 0.33036,
-            "30": 0.33116,
-            "31": 0.33249,
-            "32": 0.33284,
-            "33": 0.33031,
-            "34": 0.33138,
-            "35": 0.33068,
-            "36": 0.33706,
-            "37": 0.33218,
-            "38": 0.33033,
-            "39": 0.33382,
-            "40": 0.34395,
-            "41": 0.33213,
-            "42": 0.3316,
-            "43": 0.33261,
-            "44": 0.33133,
-            "45": 0.33097,
-            "46": 0.33137,
-            "47": 0.33245,
-            "48": 0.33171,
-            "49": 0.33138,
-            "50": 0.33185
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json
index 5dd18b2b701..28f4a9f3d58 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json
@@ -1,287 +1 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.85961,
-            "2": 10.85774,
-            "3": 10.85857,
-            "4": 10.84879,
-            "5": 10.88247,
-            "6": 10.88701,
-            "7": 10.86152,
-            "8": 10.86792,
-            "9": 10.86512,
-            "10": 10.83742,
-            "11": 10.87758,
-            "12": 10.87232,
-            "13": 10.87849,
-            "14": 10.88848,
-            "15": 10.82866,
-            "16": 10.8307,
-            "17": 10.79817,
-            "18": 10.82417,
-            "19": 10.81181,
-            "20": 10.72691,
-            "21": 10.7183,
-            "22": 10.57059,
-            "23": 10.72817,
-            "24": 10.61403,
-            "25": 10.55842,
-            "26": 10.60676,
-            "27": 10.61661,
-            "28": 10.57482,
-            "29": 10.59587,
-            "30": 10.37815,
-            "31": 10.13027,
-            "32": 10.47778,
-            "33": 10.46854,
-            "34": 10.22658,
-            "35": 10.27984,
-            "36": 10.22842,
-            "37": 10.35309,
-            "38": 10.20319,
-            "39": 10.4096,
-            "40": 10.0888,
-            "41": 10.15947,
-            "42": 10.2147,
-            "43": 9.84971,
-            "44": 9.96501,
-            "45": 9.82545,
-            "46": 9.83484,
-            "47": 10.15515,
-            "48": 9.84462,
-            "49": 9.52863,
-            "50": 9.91253
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1816.0,
-            "2": 1884.0,
-            "3": 1906.0,
-            "4": 1901.0,
-            "5": 2061.0,
-            "6": 2186.0,
-            "7": 2145.0,
-            "8": 1775.0,
-            "9": 2106.0,
-            "10": 1630.0,
-            "11": 2236.0,
-            "12": 2015.0,
-            "13": 2168.0,
-            "14": 2021.0,
-            "15": 2088.0,
-            "16": 2152.0,
-            "17": 1976.0,
-            "18": 1952.0,
-            "19": 2046.0,
-            "20": 1740.0,
-            "21": 2073.0,
-            "22": 1810.0,
-            "23": 2186.0,
-            "24": 1839.0,
-            "25": 1681.0,
-            "26": 1962.0,
-            "27": 2009.0,
-            "28": 2204.0,
-            "29": 2264.0,
-            "30": 2117.0,
-            "31": 1873.0,
-            "32": 2178.0,
-            "33": 2372.0,
-            "34": 2127.0,
-            "35": 2225.0,
-            "36": 2140.0,
-            "37": 2649.0,
-            "38": 2602.0,
-            "39": 2607.0,
-            "40": 2574.0,
-            "41": 2503.0,
-            "42": 2532.0,
-            "43": 2285.0,
-            "44": 2313.0,
-            "45": 2423.0,
-            "46": 2563.0,
-            "47": 2856.0,
-            "48": 2629.0,
-            "49": 2566.0,
-            "50": 2651.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 269842944.0,
-            "2": 269842944.0,
-            "3": 269842944.0,
-            "4": 269842944.0,
-            "5": 269842944.0,
-            "6": 269842944.0,
-            "7": 269842944.0,
-            "8": 269842944.0,
-            "9": 269842944.0,
-            "10": 269842944.0,
-            "11": 269842944.0,
-            "12": 269842944.0,
-            "13": 269842944.0,
-            "14": 269842944.0,
-            "15": 269842944.0,
-            "16": 269842944.0,
-            "17": 269842944.0,
-            "18": 269842944.0,
-            "19": 269842944.0,
-            "20": 269842944.0,
-            "21": 269842944.0,
-            "22": 269842944.0,
-            "23": 269842944.0,
-            "24": 269842944.0,
-            "25": 269842944.0,
-            "26": 269842944.0,
-            "27": 269842944.0,
-            "28": 269842944.0,
-            "29": 269842944.0,
-            "30": 269842944.0,
-            "31": 269842944.0,
-            "32": 269842944.0,
-            "33": 269842944.0,
-            "34": 269842944.0,
-            "35": 269842944.0,
-            "36": 269842944.0,
-            "37": 269842944.0,
-            "38": 269842944.0,
-            "39": 269842944.0,
-            "40": 269842944.0,
-            "41": 269842944.0,
-            "42": 269842944.0,
-            "43": 269842944.0,
-            "44": 269842944.0,
-            "45": 269842944.0,
-            "46": 269842944.0,
-            "47": 269842944.0,
-            "48": 269842944.0,
-            "49": 269842944.0,
-            "50": 269842944.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1449634304.0,
-            "2": 1516021248.0,
-            "3": 1516021248.0,
-            "4": 1516021248.0,
-            "5": 1516021248.0,
-            "6": 1516021248.0,
-            "7": 1516021248.0,
-            "8": 1516021248.0,
-            "9": 1516021248.0,
-            "10": 1516021248.0,
-            "11": 1516021248.0,
-            "12": 1516021248.0,
-            "13": 1516021248.0,
-            "14": 1516021248.0,
-            "15": 1516021248.0,
-            "16": 1516021248.0,
-            "17": 1516021248.0,
-            "18": 1516021248.0,
-            "19": 1516021248.0,
-            "20": 1516021248.0,
-            "21": 1516021248.0,
-            "22": 1516021248.0,
-            "23": 1516021248.0,
-            "24": 1516021248.0,
-            "25": 1516021248.0,
-            "26": 1516021248.0,
-            "27": 1516021248.0,
-            "28": 1516021248.0,
-            "29": 1516021248.0,
-            "30": 1516021248.0,
-            "31": 1516021248.0,
-            "32": 1516021248.0,
-            "33": 1516021248.0,
-            "34": 1516021248.0,
-            "35": 1516021248.0,
-            "36": 1516021248.0,
-            "37": 1516021248.0,
-            "38": 1516021248.0,
-            "39": 1516021248.0,
-            "40": 1516021248.0,
-            "41": 1516021248.0,
-            "42": 1516021248.0,
-            "43": 1516021248.0,
-            "44": 1516021248.0,
-            "45": 1516021248.0,
-            "46": 1516021248.0,
-            "47": 1516021248.0,
-            "48": 1516021248.0,
-            "49": 1516021248.0,
-            "50": 1516021248.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 4.8193,
-            "2": 0.36983,
-            "3": 0.32405,
-            "4": 0.32179,
-            "5": 0.32037,
-            "6": 0.32162,
-            "7": 0.32479,
-            "8": 0.32031,
-            "9": 0.32398,
-            "10": 0.32296,
-            "11": 0.32125,
-            "12": 0.32185,
-            "13": 0.323,
-            "14": 0.32307,
-            "15": 0.32035,
-            "16": 0.31953,
-            "17": 0.32119,
-            "18": 0.32536,
-            "19": 0.32368,
-            "20": 0.32071,
-            "21": 0.32043,
-            "22": 0.32093,
-            "23": 0.32096,
-            "24": 0.31999,
-            "25": 0.32046,
-            "26": 0.31988,
-            "27": 0.32184,
-            "28": 0.32107,
-            "29": 0.32078,
-            "30": 0.32174,
-            "31": 0.32345,
-            "32": 0.32975,
-            "33": 0.32181,
-            "34": 0.32294,
-            "35": 0.32426,
-            "36": 0.32184,
-            "37": 0.32175,
-            "38": 0.32222,
-            "39": 0.32058,
-            "40": 0.32111,
-            "41": 0.33546,
-            "42": 0.32505,
-            "43": 0.32502,
-            "44": 0.32486,
-            "45": 0.32683,
-            "46": 0.32331,
-            "47": 0.322,
-            "48": 0.32205,
-            "49": 0.32128,
-            "50": 0.32053
-        }
-    }
-}
\ No newline at end of file
+{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.86122, "5": 10.88242, "10": 10.83506, "15": 10.82738, "20": 10.72743, "25": 10.55753, "30": 10.37895, "35": 10.28321, "40": 10.08785, "45": 9.82625, "50": 9.91327}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1778.0, "5": 2219.0, "10": 1530.0, "15": 2125.0, "20": 1867.0, "25": 1773.0, "30": 2129.0, "35": 2169.0, "40": 2486.0, "45": 2335.0, "50": 2687.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 269891584.0, "5": 269891584.0, "10": 269891584.0, "15": 269891584.0, "20": 269891584.0, "25": 269891584.0, "30": 269891584.0, "35": 269891584.0, "40": 269891584.0, "45": 269891584.0, "50": 269891584.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1450730496.0, "5": 1515676160.0, "10": 1515676672.0, "15": 1515676672.0, "20": 1515676672.0, "25": 1515676672.0, "30": 1515676672.0, "35": 1515676672.0, "40": 1515676672.0, "45": 1515676672.0, "50": 1515676672.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 11.10997, "5": 0.31331, "10": 0.32223, "15": 0.31308, "20": 0.3166, "25": 0.31574, "30": 0.31074, "35": 0.31512, "40": 0.31604, "45": 0.32221, "50": 0.31241}}}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts.json
deleted file mode 100644
index e2187c31d21..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.85932,
-            "2": 10.85709,
-            "3": 10.85896,
-            "4": 10.84972,
-            "5": 10.88254,
-            "6": 10.88712,
-            "7": 10.86187,
-            "8": 10.86848,
-            "9": 10.86474,
-            "10": 10.83743,
-            "11": 10.87761,
-            "12": 10.87275,
-            "13": 10.87805,
-            "14": 10.88829,
-            "15": 10.8292,
-            "16": 10.83093,
-            "17": 10.79858,
-            "18": 10.82391,
-            "19": 10.8112,
-            "20": 10.72714,
-            "21": 10.71846,
-            "22": 10.57021,
-            "23": 10.72839,
-            "24": 10.6137,
-            "25": 10.55895,
-            "26": 10.60667,
-            "27": 10.61746,
-            "28": 10.57512,
-            "29": 10.59634,
-            "30": 10.37833,
-            "31": 10.13006,
-            "32": 10.47788,
-            "33": 10.46827,
-            "34": 10.22647,
-            "35": 10.28047,
-            "36": 10.22818,
-            "37": 10.35344,
-            "38": 10.203,
-            "39": 10.40996,
-            "40": 10.08874,
-            "41": 10.15951,
-            "42": 10.2151,
-            "43": 9.84978,
-            "44": 9.96524,
-            "45": 9.82532,
-            "46": 9.83508,
-            "47": 10.15501,
-            "48": 9.8447,
-            "49": 9.5289,
-            "50": 9.91268,
-            "51": 9.85065,
-            "52": 9.7464,
-            "53": 10.07271,
-            "54": 9.95757,
-            "55": 9.87725,
-            "56": 9.62951,
-            "57": 9.48816,
-            "58": 9.83239,
-            "59": 9.58985,
-            "60": 9.50827,
-            "61": 9.6947,
-            "62": 9.99304,
-            "63": 9.37511,
-            "64": 9.77996,
-            "65": 8.95215,
-            "66": 9.71323,
-            "67": 9.37884,
-            "68": 9.78794,
-            "69": 9.79078,
-            "70": 9.7308,
-            "71": 9.61793,
-            "72": 9.59094,
-            "73": 9.49435,
-            "74": 8.94865,
-            "75": 9.43606,
-            "76": 9.09894,
-            "77": 10.06437,
-            "78": 9.73006,
-            "79": 9.37771,
-            "80": 9.41266,
-            "81": 9.4854,
-            "82": 9.69576,
-            "83": 9.32017,
-            "84": 9.42235,
-            "85": 9.61578,
-            "86": 9.07218,
-            "87": 9.59328,
-            "88": 9.7509,
-            "89": 9.61159,
-            "90": 9.82148,
-            "91": 9.35304,
-            "92": 9.36254,
-            "93": 9.08747,
-            "94": 8.83398,
-            "95": 9.51923,
-            "96": 9.52595,
-            "97": 9.31413,
-            "98": 9.67414,
-            "99": 8.88869,
-            "100": 9.40651
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1754.0,
-            "2": 1848.0,
-            "3": 1845.0,
-            "4": 1871.0,
-            "5": 2173.0,
-            "6": 2054.0,
-            "7": 2021.0,
-            "8": 1737.0,
-            "9": 2044.0,
-            "10": 1524.0,
-            "11": 2198.0,
-            "12": 2030.0,
-            "13": 2211.0,
-            "14": 2094.0,
-            "15": 2036.0,
-            "16": 2165.0,
-            "17": 2044.0,
-            "18": 1842.0,
-            "19": 2002.0,
-            "20": 1774.0,
-            "21": 1976.0,
-            "22": 1869.0,
-            "23": 2232.0,
-            "24": 1769.0,
-            "25": 1714.0,
-            "26": 1866.0,
-            "27": 2106.0,
-            "28": 2287.0,
-            "29": 2221.0,
-            "30": 1967.0,
-            "31": 1788.0,
-            "32": 2187.0,
-            "33": 2432.0,
-            "34": 2059.0,
-            "35": 2308.0,
-            "36": 2173.0,
-            "37": 2619.0,
-            "38": 2572.0,
-            "39": 2618.0,
-            "40": 2548.0,
-            "41": 2531.0,
-            "42": 2447.0,
-            "43": 2283.0,
-            "44": 2358.0,
-            "45": 2398.0,
-            "46": 2572.0,
-            "47": 2818.0,
-            "48": 2599.0,
-            "49": 2579.0,
-            "50": 2731.0,
-            "51": 2873.0,
-            "52": 2946.0,
-            "53": 3158.0,
-            "54": 2907.0,
-            "55": 2740.0,
-            "56": 3029.0,
-            "57": 2489.0,
-            "58": 3327.0,
-            "59": 3042.0,
-            "60": 2780.0,
-            "61": 3302.0,
-            "62": 2961.0,
-            "63": 2702.0,
-            "64": 3318.0,
-            "65": 2909.0,
-            "66": 3513.0,
-            "67": 2959.0,
-            "68": 2963.0,
-            "69": 3171.0,
-            "70": 3547.0,
-            "71": 3246.0,
-            "72": 2586.0,
-            "73": 3301.0,
-            "74": 2135.0,
-            "75": 2752.0,
-            "76": 3275.0,
-            "77": 3648.0,
-            "78": 3472.0,
-            "79": 3536.0,
-            "80": 3685.0,
-            "81": 4159.0,
-            "82": 3488.0,
-            "83": 3179.0,
-            "84": 3639.0,
-            "85": 3631.0,
-            "86": 3045.0,
-            "87": 4315.0,
-            "88": 3481.0,
-            "89": 3819.0,
-            "90": 3323.0,
-            "91": 3014.0,
-            "92": 3581.0,
-            "93": 2932.0,
-            "94": 3715.0,
-            "95": 3593.0,
-            "96": 3764.0,
-            "97": 3582.0,
-            "98": 3998.0,
-            "99": 3406.0,
-            "100": 3521.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 269842944.0,
-            "2": 269842944.0,
-            "3": 269842944.0,
-            "4": 269842944.0,
-            "5": 269842944.0,
-            "6": 269842944.0,
-            "7": 269842944.0,
-            "8": 269842944.0,
-            "9": 269842944.0,
-            "10": 269842944.0,
-            "11": 269842944.0,
-            "12": 269842944.0,
-            "13": 269842944.0,
-            "14": 269842944.0,
-            "15": 269842944.0,
-            "16": 269842944.0,
-            "17": 269842944.0,
-            "18": 269842944.0,
-            "19": 269842944.0,
-            "20": 269842944.0,
-            "21": 269842944.0,
-            "22": 269842944.0,
-            "23": 269842944.0,
-            "24": 269842944.0,
-            "25": 269842944.0,
-            "26": 269842944.0,
-            "27": 269842944.0,
-            "28": 269842944.0,
-            "29": 269842944.0,
-            "30": 269842944.0,
-            "31": 269842944.0,
-            "32": 269842944.0,
-            "33": 269842944.0,
-            "34": 269842944.0,
-            "35": 269842944.0,
-            "36": 269842944.0,
-            "37": 269842944.0,
-            "38": 269842944.0,
-            "39": 269842944.0,
-            "40": 269842944.0,
-            "41": 269842944.0,
-            "42": 269842944.0,
-            "43": 269842944.0,
-            "44": 269842944.0,
-            "45": 269842944.0,
-            "46": 269842944.0,
-            "47": 269842944.0,
-            "48": 269842944.0,
-            "49": 269842944.0,
-            "50": 269842944.0,
-            "51": 269842944.0,
-            "52": 269842944.0,
-            "53": 269842944.0,
-            "54": 269842944.0,
-            "55": 269842944.0,
-            "56": 269842944.0,
-            "57": 269842944.0,
-            "58": 269842944.0,
-            "59": 269842944.0,
-            "60": 269842944.0,
-            "61": 269842944.0,
-            "62": 269842944.0,
-            "63": 269842944.0,
-            "64": 269842944.0,
-            "65": 269842944.0,
-            "66": 269842944.0,
-            "67": 269842944.0,
-            "68": 269842944.0,
-            "69": 269842944.0,
-            "70": 269842944.0,
-            "71": 269842944.0,
-            "72": 269842944.0,
-            "73": 269842944.0,
-            "74": 269842944.0,
-            "75": 269842944.0,
-            "76": 269842944.0,
-            "77": 269842944.0,
-            "78": 269842944.0,
-            "79": 269842944.0,
-            "80": 269842944.0,
-            "81": 269842944.0,
-            "82": 269842944.0,
-            "83": 269842944.0,
-            "84": 269842944.0,
-            "85": 269842944.0,
-            "86": 269842944.0,
-            "87": 269842944.0,
-            "88": 269842944.0,
-            "89": 269842944.0,
-            "90": 269842944.0,
-            "91": 269842944.0,
-            "92": 269842944.0,
-            "93": 269842944.0,
-            "94": 269842944.0,
-            "95": 269842944.0,
-            "96": 269842944.0,
-            "97": 269842944.0,
-            "98": 269842944.0,
-            "99": 269842944.0,
-            "100": 269842944.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 971620864.0,
-            "2": 1036172800.0,
-            "3": 1036172800.0,
-            "4": 1036172800.0,
-            "5": 1036172800.0,
-            "6": 1036172800.0,
-            "7": 1036172800.0,
-            "8": 1036172800.0,
-            "9": 1036172800.0,
-            "10": 1036172800.0,
-            "11": 1036172800.0,
-            "12": 1036172800.0,
-            "13": 1036172800.0,
-            "14": 1036172800.0,
-            "15": 1036172800.0,
-            "16": 1036172800.0,
-            "17": 1036172800.0,
-            "18": 1036172800.0,
-            "19": 1036172800.0,
-            "20": 1036172800.0,
-            "21": 1036172800.0,
-            "22": 1036172800.0,
-            "23": 1036172800.0,
-            "24": 1036172800.0,
-            "25": 1036172800.0,
-            "26": 1036172800.0,
-            "27": 1036172800.0,
-            "28": 1036172800.0,
-            "29": 1036172800.0,
-            "30": 1036172800.0,
-            "31": 1036172800.0,
-            "32": 1036172800.0,
-            "33": 1036172800.0,
-            "34": 1036172800.0,
-            "35": 1036172800.0,
-            "36": 1036172800.0,
-            "37": 1036172800.0,
-            "38": 1036172800.0,
-            "39": 1036172800.0,
-            "40": 1036172800.0,
-            "41": 1036172800.0,
-            "42": 1036172800.0,
-            "43": 1036172800.0,
-            "44": 1036172800.0,
-            "45": 1036172800.0,
-            "46": 1036172800.0,
-            "47": 1036172800.0,
-            "48": 1036172800.0,
-            "49": 1036172800.0,
-            "50": 1036172800.0,
-            "51": 1036172800.0,
-            "52": 1036172800.0,
-            "53": 1036172800.0,
-            "54": 1036172800.0,
-            "55": 1036172800.0,
-            "56": 1036172800.0,
-            "57": 1036172800.0,
-            "58": 1036172800.0,
-            "59": 1036172800.0,
-            "60": 1036172800.0,
-            "61": 1036172800.0,
-            "62": 1036172800.0,
-            "63": 1036172800.0,
-            "64": 1036172800.0,
-            "65": 1036172800.0,
-            "66": 1036172800.0,
-            "67": 1036172800.0,
-            "68": 1036172800.0,
-            "69": 1036172800.0,
-            "70": 1036172800.0,
-            "71": 1036172800.0,
-            "72": 1036172800.0,
-            "73": 1036172800.0,
-            "74": 1036172800.0,
-            "75": 1036172800.0,
-            "76": 1036172800.0,
-            "77": 1036172800.0,
-            "78": 1036172800.0,
-            "79": 1036172800.0,
-            "80": 1036172800.0,
-            "81": 1036172800.0,
-            "82": 1036172800.0,
-            "83": 1036172800.0,
-            "84": 1036172800.0,
-            "85": 1036172800.0,
-            "86": 1036172800.0,
-            "87": 1036172800.0,
-            "88": 1036172800.0,
-            "89": 1036172800.0,
-            "90": 1036172800.0,
-            "91": 1036172800.0,
-            "92": 1036172800.0,
-            "93": 1036172800.0,
-            "94": 1036172800.0,
-            "95": 1036172800.0,
-            "96": 1036172800.0,
-            "97": 1036172800.0,
-            "98": 1036172800.0,
-            "99": 1036172800.0,
-            "100": 1036172800.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 4.72292,
-            "2": 0.40079,
-            "3": 0.30233,
-            "4": 0.29437,
-            "5": 0.29362,
-            "6": 0.29166,
-            "7": 0.29282,
-            "8": 0.29142,
-            "9": 0.29651,
-            "10": 0.29169,
-            "11": 0.29163,
-            "12": 0.29014,
-            "13": 0.29007,
-            "14": 0.28821,
-            "15": 0.28623,
-            "16": 0.28688,
-            "17": 0.28583,
-            "18": 0.2859,
-            "19": 0.28593,
-            "20": 0.28532,
-            "21": 0.28573,
-            "22": 0.28605,
-            "23": 0.2847,
-            "24": 0.28621,
-            "25": 0.28664,
-            "26": 0.28556,
-            "27": 0.2872,
-            "28": 0.28573,
-            "29": 0.28695,
-            "30": 0.28636,
-            "31": 0.28686,
-            "32": 0.28621,
-            "33": 0.28593,
-            "34": 0.28506,
-            "35": 0.28606,
-            "36": 0.28626,
-            "37": 0.28499,
-            "38": 0.28674,
-            "39": 0.28971,
-            "40": 0.28955,
-            "41": 0.28907,
-            "42": 0.29041,
-            "43": 0.28912,
-            "44": 0.28905,
-            "45": 0.28882,
-            "46": 0.28956,
-            "47": 0.28907,
-            "48": 0.28954,
-            "49": 0.28925,
-            "50": 0.28916,
-            "51": 0.2978,
-            "52": 0.29215,
-            "53": 0.30386,
-            "54": 0.28798,
-            "55": 0.30697,
-            "56": 0.32648,
-            "57": 0.29076,
-            "58": 0.29115,
-            "59": 0.29247,
-            "60": 0.29092,
-            "61": 0.29317,
-            "62": 0.28506,
-            "63": 0.28662,
-            "64": 0.2855,
-            "65": 0.28592,
-            "66": 0.28679,
-            "67": 0.28596,
-            "68": 0.28605,
-            "69": 0.28703,
-            "70": 0.28617,
-            "71": 0.28619,
-            "72": 0.28686,
-            "73": 0.28763,
-            "74": 0.28583,
-            "75": 0.28621,
-            "76": 0.28619,
-            "77": 0.28487,
-            "78": 0.28523,
-            "79": 0.28634,
-            "80": 0.28584,
-            "81": 0.28564,
-            "82": 0.28528,
-            "83": 0.28536,
-            "84": 0.28516,
-            "85": 0.28552,
-            "86": 0.28537,
-            "87": 0.2846,
-            "88": 0.28517,
-            "89": 0.28625,
-            "90": 0.28499,
-            "91": 0.28525,
-            "92": 0.28549,
-            "93": 0.28511,
-            "94": 0.28501,
-            "95": 0.28642,
-            "96": 0.28514,
-            "97": 0.28481,
-            "98": 0.2852,
-            "99": 0.28627,
-            "100": 0.2861
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgx_a100.json
index 0cc3719ac53..82a2437ad9d 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgx_a100.json
@@ -1,537 +1 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.85932,
-            "2": 10.85709,
-            "3": 10.85896,
-            "4": 10.84972,
-            "5": 10.88254,
-            "6": 10.88712,
-            "7": 10.86187,
-            "8": 10.86848,
-            "9": 10.86474,
-            "10": 10.83743,
-            "11": 10.87761,
-            "12": 10.87275,
-            "13": 10.87805,
-            "14": 10.88829,
-            "15": 10.8292,
-            "16": 10.83093,
-            "17": 10.79858,
-            "18": 10.82391,
-            "19": 10.8112,
-            "20": 10.72714,
-            "21": 10.71846,
-            "22": 10.57021,
-            "23": 10.72839,
-            "24": 10.6137,
-            "25": 10.55895,
-            "26": 10.60667,
-            "27": 10.61746,
-            "28": 10.57512,
-            "29": 10.59634,
-            "30": 10.37833,
-            "31": 10.13006,
-            "32": 10.47788,
-            "33": 10.46827,
-            "34": 10.22647,
-            "35": 10.28047,
-            "36": 10.22818,
-            "37": 10.35344,
-            "38": 10.203,
-            "39": 10.40996,
-            "40": 10.08874,
-            "41": 10.15951,
-            "42": 10.2151,
-            "43": 9.84978,
-            "44": 9.96524,
-            "45": 9.82532,
-            "46": 9.83508,
-            "47": 10.15501,
-            "48": 9.8447,
-            "49": 9.5289,
-            "50": 9.91268,
-            "51": 9.85065,
-            "52": 9.7464,
-            "53": 10.07271,
-            "54": 9.95757,
-            "55": 9.87725,
-            "56": 9.62951,
-            "57": 9.48816,
-            "58": 9.83239,
-            "59": 9.58985,
-            "60": 9.50827,
-            "61": 9.6947,
-            "62": 9.99304,
-            "63": 9.37511,
-            "64": 9.77996,
-            "65": 8.95215,
-            "66": 9.71323,
-            "67": 9.37884,
-            "68": 9.78794,
-            "69": 9.79078,
-            "70": 9.7308,
-            "71": 9.61793,
-            "72": 9.59094,
-            "73": 9.49435,
-            "74": 8.94865,
-            "75": 9.43606,
-            "76": 9.09894,
-            "77": 10.06437,
-            "78": 9.73006,
-            "79": 9.37771,
-            "80": 9.41266,
-            "81": 9.4854,
-            "82": 9.69576,
-            "83": 9.32017,
-            "84": 9.42235,
-            "85": 9.61578,
-            "86": 9.07218,
-            "87": 9.59328,
-            "88": 9.7509,
-            "89": 9.61159,
-            "90": 9.82148,
-            "91": 9.35304,
-            "92": 9.36254,
-            "93": 9.08747,
-            "94": 8.83398,
-            "95": 9.51923,
-            "96": 9.52595,
-            "97": 9.31413,
-            "98": 9.67414,
-            "99": 8.88869,
-            "100": 9.40651
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1754.0,
-            "2": 1848.0,
-            "3": 1845.0,
-            "4": 1871.0,
-            "5": 2173.0,
-            "6": 2054.0,
-            "7": 2021.0,
-            "8": 1737.0,
-            "9": 2044.0,
-            "10": 1524.0,
-            "11": 2198.0,
-            "12": 2030.0,
-            "13": 2211.0,
-            "14": 2094.0,
-            "15": 2036.0,
-            "16": 2165.0,
-            "17": 2044.0,
-            "18": 1842.0,
-            "19": 2002.0,
-            "20": 1774.0,
-            "21": 1976.0,
-            "22": 1869.0,
-            "23": 2232.0,
-            "24": 1769.0,
-            "25": 1714.0,
-            "26": 1866.0,
-            "27": 2106.0,
-            "28": 2287.0,
-            "29": 2221.0,
-            "30": 1967.0,
-            "31": 1788.0,
-            "32": 2187.0,
-            "33": 2432.0,
-            "34": 2059.0,
-            "35": 2308.0,
-            "36": 2173.0,
-            "37": 2619.0,
-            "38": 2572.0,
-            "39": 2618.0,
-            "40": 2548.0,
-            "41": 2531.0,
-            "42": 2447.0,
-            "43": 2283.0,
-            "44": 2358.0,
-            "45": 2398.0,
-            "46": 2572.0,
-            "47": 2818.0,
-            "48": 2599.0,
-            "49": 2579.0,
-            "50": 2731.0,
-            "51": 2873.0,
-            "52": 2946.0,
-            "53": 3158.0,
-            "54": 2907.0,
-            "55": 2740.0,
-            "56": 3029.0,
-            "57": 2489.0,
-            "58": 3327.0,
-            "59": 3042.0,
-            "60": 2780.0,
-            "61": 3302.0,
-            "62": 2961.0,
-            "63": 2702.0,
-            "64": 3318.0,
-            "65": 2909.0,
-            "66": 3513.0,
-            "67": 2959.0,
-            "68": 2963.0,
-            "69": 3171.0,
-            "70": 3547.0,
-            "71": 3246.0,
-            "72": 2586.0,
-            "73": 3301.0,
-            "74": 2135.0,
-            "75": 2752.0,
-            "76": 3275.0,
-            "77": 3648.0,
-            "78": 3472.0,
-            "79": 3536.0,
-            "80": 3685.0,
-            "81": 4159.0,
-            "82": 3488.0,
-            "83": 3179.0,
-            "84": 3639.0,
-            "85": 3631.0,
-            "86": 3045.0,
-            "87": 4315.0,
-            "88": 3481.0,
-            "89": 3819.0,
-            "90": 3323.0,
-            "91": 3014.0,
-            "92": 3581.0,
-            "93": 2932.0,
-            "94": 3715.0,
-            "95": 3593.0,
-            "96": 3764.0,
-            "97": 3582.0,
-            "98": 3998.0,
-            "99": 3406.0,
-            "100": 3521.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 269842944.0,
-            "2": 269842944.0,
-            "3": 269842944.0,
-            "4": 269842944.0,
-            "5": 269842944.0,
-            "6": 269842944.0,
-            "7": 269842944.0,
-            "8": 269842944.0,
-            "9": 269842944.0,
-            "10": 269842944.0,
-            "11": 269842944.0,
-            "12": 269842944.0,
-            "13": 269842944.0,
-            "14": 269842944.0,
-            "15": 269842944.0,
-            "16": 269842944.0,
-            "17": 269842944.0,
-            "18": 269842944.0,
-            "19": 269842944.0,
-            "20": 269842944.0,
-            "21": 269842944.0,
-            "22": 269842944.0,
-            "23": 269842944.0,
-            "24": 269842944.0,
-            "25": 269842944.0,
-            "26": 269842944.0,
-            "27": 269842944.0,
-            "28": 269842944.0,
-            "29": 269842944.0,
-            "30": 269842944.0,
-            "31": 269842944.0,
-            "32": 269842944.0,
-            "33": 269842944.0,
-            "34": 269842944.0,
-            "35": 269842944.0,
-            "36": 269842944.0,
-            "37": 269842944.0,
-            "38": 269842944.0,
-            "39": 269842944.0,
-            "40": 269842944.0,
-            "41": 269842944.0,
-            "42": 269842944.0,
-            "43": 269842944.0,
-            "44": 269842944.0,
-            "45": 269842944.0,
-            "46": 269842944.0,
-            "47": 269842944.0,
-            "48": 269842944.0,
-            "49": 269842944.0,
-            "50": 269842944.0,
-            "51": 269842944.0,
-            "52": 269842944.0,
-            "53": 269842944.0,
-            "54": 269842944.0,
-            "55": 269842944.0,
-            "56": 269842944.0,
-            "57": 269842944.0,
-            "58": 269842944.0,
-            "59": 269842944.0,
-            "60": 269842944.0,
-            "61": 269842944.0,
-            "62": 269842944.0,
-            "63": 269842944.0,
-            "64": 269842944.0,
-            "65": 269842944.0,
-            "66": 269842944.0,
-            "67": 269842944.0,
-            "68": 269842944.0,
-            "69": 269842944.0,
-            "70": 269842944.0,
-            "71": 269842944.0,
-            "72": 269842944.0,
-            "73": 269842944.0,
-            "74": 269842944.0,
-            "75": 269842944.0,
-            "76": 269842944.0,
-            "77": 269842944.0,
-            "78": 269842944.0,
-            "79": 269842944.0,
-            "80": 269842944.0,
-            "81": 269842944.0,
-            "82": 269842944.0,
-            "83": 269842944.0,
-            "84": 269842944.0,
-            "85": 269842944.0,
-            "86": 269842944.0,
-            "87": 269842944.0,
-            "88": 269842944.0,
-            "89": 269842944.0,
-            "90": 269842944.0,
-            "91": 269842944.0,
-            "92": 269842944.0,
-            "93": 269842944.0,
-            "94": 269842944.0,
-            "95": 269842944.0,
-            "96": 269842944.0,
-            "97": 269842944.0,
-            "98": 269842944.0,
-            "99": 269842944.0,
-            "100": 269842944.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 971619840.0,
-            "2": 1036172800.0,
-            "3": 1036172800.0,
-            "4": 1036172800.0,
-            "5": 1036172800.0,
-            "6": 1036172800.0,
-            "7": 1036172800.0,
-            "8": 1036172800.0,
-            "9": 1036172800.0,
-            "10": 1036172800.0,
-            "11": 1036172800.0,
-            "12": 1036172800.0,
-            "13": 1036172800.0,
-            "14": 1036172800.0,
-            "15": 1036172800.0,
-            "16": 1036172800.0,
-            "17": 1036172800.0,
-            "18": 1036172800.0,
-            "19": 1036172800.0,
-            "20": 1036172800.0,
-            "21": 1036172800.0,
-            "22": 1036172800.0,
-            "23": 1036172800.0,
-            "24": 1036172800.0,
-            "25": 1036172800.0,
-            "26": 1036172800.0,
-            "27": 1036172800.0,
-            "28": 1036172800.0,
-            "29": 1036172800.0,
-            "30": 1036172800.0,
-            "31": 1036172800.0,
-            "32": 1036172800.0,
-            "33": 1036172800.0,
-            "34": 1036172800.0,
-            "35": 1036172800.0,
-            "36": 1036172800.0,
-            "37": 1036172800.0,
-            "38": 1036172800.0,
-            "39": 1036172800.0,
-            "40": 1036172800.0,
-            "41": 1036172800.0,
-            "42": 1036172800.0,
-            "43": 1036172800.0,
-            "44": 1036172800.0,
-            "45": 1036172800.0,
-            "46": 1036172800.0,
-            "47": 1036172800.0,
-            "48": 1036172800.0,
-            "49": 1036172800.0,
-            "50": 1036172800.0,
-            "51": 1036172800.0,
-            "52": 1036172800.0,
-            "53": 1036172800.0,
-            "54": 1036172800.0,
-            "55": 1036172800.0,
-            "56": 1036172800.0,
-            "57": 1036172800.0,
-            "58": 1036172800.0,
-            "59": 1036172800.0,
-            "60": 1036172800.0,
-            "61": 1036172800.0,
-            "62": 1036172800.0,
-            "63": 1036172800.0,
-            "64": 1036172800.0,
-            "65": 1036172800.0,
-            "66": 1036172800.0,
-            "67": 1036172800.0,
-            "68": 1036172800.0,
-            "69": 1036172800.0,
-            "70": 1036172800.0,
-            "71": 1036172800.0,
-            "72": 1036172800.0,
-            "73": 1036172800.0,
-            "74": 1036172800.0,
-            "75": 1036172800.0,
-            "76": 1036172800.0,
-            "77": 1036172800.0,
-            "78": 1036172800.0,
-            "79": 1036172800.0,
-            "80": 1036172800.0,
-            "81": 1036172800.0,
-            "82": 1036172800.0,
-            "83": 1036172800.0,
-            "84": 1036172800.0,
-            "85": 1036172800.0,
-            "86": 1036172800.0,
-            "87": 1036172800.0,
-            "88": 1036172800.0,
-            "89": 1036172800.0,
-            "90": 1036172800.0,
-            "91": 1036172800.0,
-            "92": 1036172800.0,
-            "93": 1036172800.0,
-            "94": 1036172800.0,
-            "95": 1036172800.0,
-            "96": 1036172800.0,
-            "97": 1036172800.0,
-            "98": 1036172800.0,
-            "99": 1036172800.0,
-            "100": 1036172800.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 7.22987,
-            "2": 0.54363,
-            "3": 0.2879,
-            "4": 0.28745,
-            "5": 0.28509,
-            "6": 0.28364,
-            "7": 0.28401,
-            "8": 0.28235,
-            "9": 0.28321,
-            "10": 0.32258,
-            "11": 0.28697,
-            "12": 0.27808,
-            "13": 0.27857,
-            "14": 0.27833,
-            "15": 0.28035,
-            "16": 0.27859,
-            "17": 0.27841,
-            "18": 0.27879,
-            "19": 0.27874,
-            "20": 0.27806,
-            "21": 0.27812,
-            "22": 0.2783,
-            "23": 0.27919,
-            "24": 0.27841,
-            "25": 0.27852,
-            "26": 0.27871,
-            "27": 0.27891,
-            "28": 0.28056,
-            "29": 0.27909,
-            "30": 0.2797,
-            "31": 0.27903,
-            "32": 0.27895,
-            "33": 0.27929,
-            "34": 0.27838,
-            "35": 0.27904,
-            "36": 0.2787,
-            "37": 0.28662,
-            "38": 0.27812,
-            "39": 0.27805,
-            "40": 0.27846,
-            "41": 0.27884,
-            "42": 0.27807,
-            "43": 0.27794,
-            "44": 0.27825,
-            "45": 0.28052,
-            "46": 0.27856,
-            "47": 0.27832,
-            "48": 0.27799,
-            "49": 0.2783,
-            "50": 0.27861,
-            "51": 0.2915,
-            "52": 0.28668,
-            "53": 0.28545,
-            "54": 0.28632,
-            "55": 0.28616,
-            "56": 0.28735,
-            "57": 0.28738,
-            "58": 0.28556,
-            "59": 0.28453,
-            "60": 0.28543,
-            "61": 0.28452,
-            "62": 0.28404,
-            "63": 0.28542,
-            "64": 0.28492,
-            "65": 0.28488,
-            "66": 0.2861,
-            "67": 0.286,
-            "68": 0.28505,
-            "69": 0.28531,
-            "70": 0.28377,
-            "71": 0.28517,
-            "72": 0.28454,
-            "73": 0.2853,
-            "74": 0.28678,
-            "75": 0.28484,
-            "76": 0.28523,
-            "77": 0.28548,
-            "78": 0.28488,
-            "79": 0.28559,
-            "80": 0.28528,
-            "81": 0.28479,
-            "82": 0.28465,
-            "83": 0.28506,
-            "84": 0.28493,
-            "85": 0.28486,
-            "86": 0.28572,
-            "87": 0.28404,
-            "88": 0.28473,
-            "89": 0.28431,
-            "90": 0.28945,
-            "91": 0.28446,
-            "92": 0.28489,
-            "93": 0.28474,
-            "94": 0.28484,
-            "95": 0.28526,
-            "96": 0.28573,
-            "97": 0.28411,
-            "98": 0.28402,
-            "99": 0.28413,
-            "100": 0.28454
-        }
-    }
-}
\ No newline at end of file
+{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.86122, "5": 10.88242, "10": 10.83506, "15": 10.82738, "20": 10.72743, "25": 10.55753, "30": 10.37895, "35": 10.28321, "40": 10.08785, "45": 9.82625, "50": 9.91327, "55": 9.87788, "60": 9.50874, "65": 8.95103, "70": 9.73167, "75": 9.43681, "80": 9.41156, "85": 9.61613, "90": 9.8217, "95": 9.5191, "100": 9.40588}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1778.0, "5": 2219.0, "10": 1530.0, "15": 2125.0, "20": 1867.0, "25": 1773.0, "30": 2129.0, "35": 2169.0, "40": 2486.0, "45": 2335.0, "50": 2687.0, "55": 2652.0, "60": 2765.0, "65": 2946.0, "70": 3629.0, "75": 2702.0, "80": 3866.0, "85": 3517.0, "90": 3349.0, "95": 3530.0, "100": 3530.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 269891584.0, "5": 269891584.0, "10": 269891584.0, "15": 269891584.0, "20": 269891584.0, "25": 269891584.0, "30": 269891584.0, "35": 269891584.0, "40": 269891584.0, "45": 269891584.0, "50": 269891584.0, "55": 269891584.0, "60": 269891584.0, "65": 269891584.0, "70": 269891584.0, "75": 269891584.0, "80": 269891584.0, "85": 269891584.0, "90": 269891584.0, "95": 269891584.0, "100": 269891584.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1448634368.0, "5": 1515676672.0, "10": 1515676672.0, "15": 1515676672.0, "20": 1515676672.0, "25": 1515676672.0, "30": 1515676672.0, "35": 1515676672.0, "40": 1515676672.0, "45": 1515676672.0, "50": 1515676672.0, "55": 1515676672.0, "60": 1515676672.0, "65": 1515676672.0, "70": 1515676672.0, "75": 1515676672.0, "80": 1515676672.0, "85": 1515676672.0, "90": 1515676672.0, "95": 1515676672.0, "100": 1515676672.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.54144, "5": 0.31998, "10": 0.30758, "15": 0.30768, "20": 0.30227, "25": 0.30186, "30": 0.3093, "35": 0.3126, "40": 0.31097, "45": 0.30984, "50": 0.3102, "55": 0.30366, "60": 0.30466, "65": 0.3097, "70": 0.30131, "75": 0.30287, "80": 0.30479, "85": 0.30276, "90": 0.3042, "95": 0.30274, "100": 0.30335}}}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts.json
deleted file mode 100644
index a39e093badf..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.85932,
-            "2": 10.85709,
-            "3": 10.85896,
-            "4": 10.84972,
-            "5": 10.88254,
-            "6": 10.88712,
-            "7": 10.86187,
-            "8": 10.86848,
-            "9": 10.86474,
-            "10": 10.83743,
-            "11": 10.87761,
-            "12": 10.87275,
-            "13": 10.87805,
-            "14": 10.88829,
-            "15": 10.8292,
-            "16": 10.83093,
-            "17": 10.79858,
-            "18": 10.82391,
-            "19": 10.8112,
-            "20": 10.72714,
-            "21": 10.71846,
-            "22": 10.57021,
-            "23": 10.72839,
-            "24": 10.6137,
-            "25": 10.55895,
-            "26": 10.60667,
-            "27": 10.61746,
-            "28": 10.57512,
-            "29": 10.59634,
-            "30": 10.37833,
-            "31": 10.13006,
-            "32": 10.47788,
-            "33": 10.46827,
-            "34": 10.22647,
-            "35": 10.28047,
-            "36": 10.22818,
-            "37": 10.35344,
-            "38": 10.203,
-            "39": 10.40996,
-            "40": 10.08874,
-            "41": 10.15951,
-            "42": 10.2151,
-            "43": 9.84978,
-            "44": 9.96524,
-            "45": 9.82532,
-            "46": 9.83508,
-            "47": 10.15501,
-            "48": 9.8447,
-            "49": 9.5289,
-            "50": 9.91268,
-            "51": 9.85065,
-            "52": 9.7464,
-            "53": 10.07271,
-            "54": 9.95757,
-            "55": 9.87725,
-            "56": 9.62951,
-            "57": 9.48816,
-            "58": 9.83239,
-            "59": 9.58985,
-            "60": 9.50827,
-            "61": 9.6947,
-            "62": 9.99304,
-            "63": 9.37511,
-            "64": 9.77996,
-            "65": 8.95215,
-            "66": 9.71323,
-            "67": 9.37884,
-            "68": 9.78794,
-            "69": 9.79078,
-            "70": 9.7308,
-            "71": 9.61793,
-            "72": 9.59094,
-            "73": 9.49435,
-            "74": 8.94865,
-            "75": 9.43606,
-            "76": 9.09894,
-            "77": 10.06437,
-            "78": 9.73006,
-            "79": 9.37771,
-            "80": 9.41266,
-            "81": 9.4854,
-            "82": 9.69576,
-            "83": 9.32017,
-            "84": 9.42235,
-            "85": 9.61578,
-            "86": 9.07218,
-            "87": 9.59328,
-            "88": 9.7509,
-            "89": 9.61159,
-            "90": 9.82148,
-            "91": 9.35304,
-            "92": 9.36254,
-            "93": 9.08747,
-            "94": 8.83398,
-            "95": 9.51923,
-            "96": 9.52595,
-            "97": 9.31413,
-            "98": 9.67414,
-            "99": 8.88869,
-            "100": 9.40651
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1754.0,
-            "2": 1848.0,
-            "3": 1845.0,
-            "4": 1871.0,
-            "5": 2173.0,
-            "6": 2054.0,
-            "7": 2021.0,
-            "8": 1737.0,
-            "9": 2044.0,
-            "10": 1524.0,
-            "11": 2198.0,
-            "12": 2030.0,
-            "13": 2211.0,
-            "14": 2094.0,
-            "15": 2036.0,
-            "16": 2165.0,
-            "17": 2044.0,
-            "18": 1842.0,
-            "19": 2002.0,
-            "20": 1774.0,
-            "21": 1976.0,
-            "22": 1869.0,
-            "23": 2232.0,
-            "24": 1769.0,
-            "25": 1714.0,
-            "26": 1866.0,
-            "27": 2106.0,
-            "28": 2287.0,
-            "29": 2221.0,
-            "30": 1967.0,
-            "31": 1788.0,
-            "32": 2187.0,
-            "33": 2432.0,
-            "34": 2059.0,
-            "35": 2308.0,
-            "36": 2173.0,
-            "37": 2619.0,
-            "38": 2572.0,
-            "39": 2618.0,
-            "40": 2548.0,
-            "41": 2531.0,
-            "42": 2447.0,
-            "43": 2283.0,
-            "44": 2358.0,
-            "45": 2398.0,
-            "46": 2572.0,
-            "47": 2818.0,
-            "48": 2599.0,
-            "49": 2579.0,
-            "50": 2731.0,
-            "51": 2873.0,
-            "52": 2946.0,
-            "53": 3158.0,
-            "54": 2907.0,
-            "55": 2740.0,
-            "56": 3029.0,
-            "57": 2489.0,
-            "58": 3327.0,
-            "59": 3042.0,
-            "60": 2780.0,
-            "61": 3302.0,
-            "62": 2961.0,
-            "63": 2702.0,
-            "64": 3318.0,
-            "65": 2909.0,
-            "66": 3513.0,
-            "67": 2959.0,
-            "68": 2963.0,
-            "69": 3171.0,
-            "70": 3547.0,
-            "71": 3246.0,
-            "72": 2586.0,
-            "73": 3301.0,
-            "74": 2135.0,
-            "75": 2752.0,
-            "76": 3275.0,
-            "77": 3648.0,
-            "78": 3472.0,
-            "79": 3536.0,
-            "80": 3685.0,
-            "81": 4159.0,
-            "82": 3488.0,
-            "83": 3179.0,
-            "84": 3639.0,
-            "85": 3631.0,
-            "86": 3045.0,
-            "87": 4315.0,
-            "88": 3481.0,
-            "89": 3819.0,
-            "90": 3323.0,
-            "91": 3014.0,
-            "92": 3581.0,
-            "93": 2932.0,
-            "94": 3715.0,
-            "95": 3593.0,
-            "96": 3764.0,
-            "97": 3582.0,
-            "98": 3998.0,
-            "99": 3406.0,
-            "100": 3521.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 269842944.0,
-            "2": 269842944.0,
-            "3": 269842944.0,
-            "4": 269842944.0,
-            "5": 269842944.0,
-            "6": 269842944.0,
-            "7": 269842944.0,
-            "8": 269842944.0,
-            "9": 269842944.0,
-            "10": 269842944.0,
-            "11": 269842944.0,
-            "12": 269842944.0,
-            "13": 269842944.0,
-            "14": 269842944.0,
-            "15": 269842944.0,
-            "16": 269842944.0,
-            "17": 269842944.0,
-            "18": 269842944.0,
-            "19": 269842944.0,
-            "20": 269842944.0,
-            "21": 269842944.0,
-            "22": 269842944.0,
-            "23": 269842944.0,
-            "24": 269842944.0,
-            "25": 269842944.0,
-            "26": 269842944.0,
-            "27": 269842944.0,
-            "28": 269842944.0,
-            "29": 269842944.0,
-            "30": 269842944.0,
-            "31": 269842944.0,
-            "32": 269842944.0,
-            "33": 269842944.0,
-            "34": 269842944.0,
-            "35": 269842944.0,
-            "36": 269842944.0,
-            "37": 269842944.0,
-            "38": 269842944.0,
-            "39": 269842944.0,
-            "40": 269842944.0,
-            "41": 269842944.0,
-            "42": 269842944.0,
-            "43": 269842944.0,
-            "44": 269842944.0,
-            "45": 269842944.0,
-            "46": 269842944.0,
-            "47": 269842944.0,
-            "48": 269842944.0,
-            "49": 269842944.0,
-            "50": 269842944.0,
-            "51": 269842944.0,
-            "52": 269842944.0,
-            "53": 269842944.0,
-            "54": 269842944.0,
-            "55": 269842944.0,
-            "56": 269842944.0,
-            "57": 269842944.0,
-            "58": 269842944.0,
-            "59": 269842944.0,
-            "60": 269842944.0,
-            "61": 269842944.0,
-            "62": 269842944.0,
-            "63": 269842944.0,
-            "64": 269842944.0,
-            "65": 269842944.0,
-            "66": 269842944.0,
-            "67": 269842944.0,
-            "68": 269842944.0,
-            "69": 269842944.0,
-            "70": 269842944.0,
-            "71": 269842944.0,
-            "72": 269842944.0,
-            "73": 269842944.0,
-            "74": 269842944.0,
-            "75": 269842944.0,
-            "76": 269842944.0,
-            "77": 269842944.0,
-            "78": 269842944.0,
-            "79": 269842944.0,
-            "80": 269842944.0,
-            "81": 269842944.0,
-            "82": 269842944.0,
-            "83": 269842944.0,
-            "84": 269842944.0,
-            "85": 269842944.0,
-            "86": 269842944.0,
-            "87": 269842944.0,
-            "88": 269842944.0,
-            "89": 269842944.0,
-            "90": 269842944.0,
-            "91": 269842944.0,
-            "92": 269842944.0,
-            "93": 269842944.0,
-            "94": 269842944.0,
-            "95": 269842944.0,
-            "96": 269842944.0,
-            "97": 269842944.0,
-            "98": 269842944.0,
-            "99": 269842944.0,
-            "100": 269842944.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 971619840.0,
-            "2": 1036172288.0,
-            "3": 1036172800.0,
-            "4": 1036172800.0,
-            "5": 1036172800.0,
-            "6": 1036172800.0,
-            "7": 1036172800.0,
-            "8": 1036172800.0,
-            "9": 1036172800.0,
-            "10": 1036172800.0,
-            "11": 1036172800.0,
-            "12": 1036172800.0,
-            "13": 1036172800.0,
-            "14": 1036172800.0,
-            "15": 1036172800.0,
-            "16": 1036172800.0,
-            "17": 1036172800.0,
-            "18": 1036172800.0,
-            "19": 1036172800.0,
-            "20": 1036172800.0,
-            "21": 1036172800.0,
-            "22": 1036172800.0,
-            "23": 1036172800.0,
-            "24": 1036172800.0,
-            "25": 1036172800.0,
-            "26": 1036172800.0,
-            "27": 1036172800.0,
-            "28": 1036172800.0,
-            "29": 1036172800.0,
-            "30": 1036172800.0,
-            "31": 1036172800.0,
-            "32": 1036172800.0,
-            "33": 1036172800.0,
-            "34": 1036172800.0,
-            "35": 1036172800.0,
-            "36": 1036172800.0,
-            "37": 1036172800.0,
-            "38": 1036172800.0,
-            "39": 1036172800.0,
-            "40": 1036172800.0,
-            "41": 1036172800.0,
-            "42": 1036172800.0,
-            "43": 1036172800.0,
-            "44": 1036172800.0,
-            "45": 1036172800.0,
-            "46": 1036172800.0,
-            "47": 1036172800.0,
-            "48": 1036172800.0,
-            "49": 1036172800.0,
-            "50": 1036172800.0,
-            "51": 1036172800.0,
-            "52": 1036172800.0,
-            "53": 1036172800.0,
-            "54": 1036172800.0,
-            "55": 1036172800.0,
-            "56": 1036172800.0,
-            "57": 1036172800.0,
-            "58": 1036172800.0,
-            "59": 1036172800.0,
-            "60": 1036172800.0,
-            "61": 1036172800.0,
-            "62": 1036172800.0,
-            "63": 1036172800.0,
-            "64": 1036172800.0,
-            "65": 1036172800.0,
-            "66": 1036172800.0,
-            "67": 1036172800.0,
-            "68": 1036172800.0,
-            "69": 1036172800.0,
-            "70": 1036172800.0,
-            "71": 1036172800.0,
-            "72": 1036172800.0,
-            "73": 1036172800.0,
-            "74": 1036172800.0,
-            "75": 1036172800.0,
-            "76": 1036172800.0,
-            "77": 1036172800.0,
-            "78": 1036172800.0,
-            "79": 1036172800.0,
-            "80": 1036172800.0,
-            "81": 1036172800.0,
-            "82": 1036172800.0,
-            "83": 1036172800.0,
-            "84": 1036172800.0,
-            "85": 1036172800.0,
-            "86": 1036172800.0,
-            "87": 1036172800.0,
-            "88": 1036172800.0,
-            "89": 1036172800.0,
-            "90": 1036172800.0,
-            "91": 1036172800.0,
-            "92": 1036172800.0,
-            "93": 1036172800.0,
-            "94": 1036172800.0,
-            "95": 1036172800.0,
-            "96": 1036172800.0,
-            "97": 1036172800.0,
-            "98": 1036172800.0,
-            "99": 1036172800.0,
-            "100": 1036172800.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 4.83564,
-            "2": 0.46115,
-            "3": 0.29577,
-            "4": 0.29295,
-            "5": 0.2937,
-            "6": 0.29159,
-            "7": 0.29645,
-            "8": 0.3016,
-            "9": 0.29961,
-            "10": 0.29819,
-            "11": 0.29865,
-            "12": 0.2945,
-            "13": 0.29585,
-            "14": 0.29535,
-            "15": 0.29488,
-            "16": 0.29419,
-            "17": 0.30001,
-            "18": 0.29304,
-            "19": 0.29268,
-            "20": 0.29243,
-            "21": 0.29254,
-            "22": 0.29287,
-            "23": 0.2907,
-            "24": 0.29139,
-            "25": 0.29157,
-            "26": 0.29169,
-            "27": 0.29335,
-            "28": 0.29054,
-            "29": 0.29193,
-            "30": 0.29255,
-            "31": 0.29123,
-            "32": 0.29014,
-            "33": 0.29168,
-            "34": 0.29286,
-            "35": 0.29078,
-            "36": 0.29177,
-            "37": 0.29116,
-            "38": 0.29148,
-            "39": 0.29215,
-            "40": 0.2925,
-            "41": 0.29112,
-            "42": 0.29245,
-            "43": 0.28949,
-            "44": 0.28901,
-            "45": 0.28833,
-            "46": 0.28915,
-            "47": 0.28932,
-            "48": 0.28813,
-            "49": 0.28806,
-            "50": 0.28818,
-            "51": 0.30469,
-            "52": 0.29982,
-            "53": 0.29764,
-            "54": 0.29724,
-            "55": 0.29321,
-            "56": 0.29333,
-            "57": 0.29533,
-            "58": 0.29391,
-            "59": 0.29338,
-            "60": 0.29347,
-            "61": 0.29437,
-            "62": 0.29327,
-            "63": 0.29354,
-            "64": 0.31415,
-            "65": 0.2977,
-            "66": 0.2961,
-            "67": 0.29576,
-            "68": 0.2944,
-            "69": 0.29403,
-            "70": 0.29434,
-            "71": 0.29505,
-            "72": 0.2967,
-            "73": 0.30974,
-            "74": 0.2985,
-            "75": 0.29801,
-            "76": 0.29505,
-            "77": 0.29554,
-            "78": 0.29652,
-            "79": 0.29594,
-            "80": 0.2958,
-            "81": 0.29561,
-            "82": 0.29502,
-            "83": 0.29476,
-            "84": 0.29486,
-            "85": 0.29516,
-            "86": 0.29553,
-            "87": 0.29666,
-            "88": 0.29626,
-            "89": 0.29583,
-            "90": 0.29691,
-            "91": 0.29506,
-            "92": 0.29865,
-            "93": 0.29628,
-            "94": 0.30563,
-            "95": 0.29582,
-            "96": 0.29572,
-            "97": 0.29519,
-            "98": 0.29708,
-            "99": 0.29725,
-            "100": 0.29684
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json
index 1641ae309dc..54bb3cbea8d 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json
@@ -4,106 +4,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 10.85932,
-            "2": 10.85709,
-            "3": 10.85896,
-            "4": 10.84972,
-            "5": 10.88254,
-            "6": 10.88712,
-            "7": 10.86187,
-            "8": 10.86848,
-            "9": 10.86474,
-            "10": 10.83743,
-            "11": 10.87761,
-            "12": 10.87275,
-            "13": 10.87805,
-            "14": 10.88829,
-            "15": 10.8292,
-            "16": 10.83093,
-            "17": 10.79858,
-            "18": 10.82391,
-            "19": 10.8112,
-            "20": 10.72714,
-            "21": 10.71846,
-            "22": 10.57021,
-            "23": 10.72839,
-            "24": 10.6137,
-            "25": 10.55895,
-            "26": 10.60667,
-            "27": 10.61746,
-            "28": 10.57512,
-            "29": 10.59634,
-            "30": 10.37833,
-            "31": 10.13006,
-            "32": 10.47788,
-            "33": 10.46827,
-            "34": 10.22647,
-            "35": 10.28047,
-            "36": 10.22818,
-            "37": 10.35344,
-            "38": 10.203,
-            "39": 10.40996,
-            "40": 10.08874,
-            "41": 10.15951,
-            "42": 10.2151,
-            "43": 9.84978,
-            "44": 9.96524,
-            "45": 9.82532,
-            "46": 9.83508,
-            "47": 10.15501,
-            "48": 9.8447,
-            "49": 9.5289,
-            "50": 9.91268,
-            "51": 9.85065,
-            "52": 9.7464,
-            "53": 10.07271,
-            "54": 9.95757,
-            "55": 9.87725,
-            "56": 9.62951,
-            "57": 9.48816,
-            "58": 9.83239,
-            "59": 9.58985,
-            "60": 9.50827,
-            "61": 9.6947,
-            "62": 9.99304,
-            "63": 9.37511,
-            "64": 9.77996,
-            "65": 8.95215,
-            "66": 9.71323,
+            "1": 10.86122,
+            "2": 10.85774,
+            "3": 10.86039,
+            "4": 10.84813,
+            "5": 10.88242,
+            "6": 10.88645,
+            "7": 10.86227,
+            "8": 10.86932,
+            "9": 10.86444,
+            "10": 10.83506,
+            "11": 10.87765,
+            "12": 10.87384,
+            "13": 10.87945,
+            "14": 10.88919,
+            "15": 10.82738,
+            "16": 10.83105,
+            "17": 10.79888,
+            "18": 10.82441,
+            "19": 10.81363,
+            "20": 10.72743,
+            "21": 10.71638,
+            "22": 10.57153,
+            "23": 10.7269,
+            "24": 10.61223,
+            "25": 10.55753,
+            "26": 10.60603,
+            "27": 10.61792,
+            "28": 10.57695,
+            "29": 10.59633,
+            "30": 10.37895,
+            "31": 10.13125,
+            "32": 10.47822,
+            "33": 10.46894,
+            "34": 10.22715,
+            "35": 10.28321,
+            "36": 10.22751,
+            "37": 10.35397,
+            "38": 10.20483,
+            "39": 10.40755,
+            "40": 10.08785,
+            "41": 10.1591,
+            "42": 10.21601,
+            "43": 9.84821,
+            "44": 9.9651,
+            "45": 9.82625,
+            "46": 9.83468,
+            "47": 10.15337,
+            "48": 9.84529,
+            "49": 9.52926,
+            "50": 9.91327,
+            "51": 9.8517,
+            "52": 9.74686,
+            "53": 10.07204,
+            "54": 9.95738,
+            "55": 9.87788,
+            "56": 9.62943,
+            "57": 9.48988,
+            "58": 9.83265,
+            "59": 9.58831,
+            "60": 9.50874,
+            "61": 9.69495,
+            "62": 9.99373,
+            "63": 9.377,
+            "64": 9.78004,
+            "65": 8.95103,
+            "66": 9.71392,
             "67": 9.37884,
-            "68": 9.78794,
-            "69": 9.79078,
-            "70": 9.7308,
-            "71": 9.61793,
-            "72": 9.59094,
-            "73": 9.49435,
-            "74": 8.94865,
-            "75": 9.43606,
-            "76": 9.09894,
-            "77": 10.06437,
-            "78": 9.73006,
-            "79": 9.37771,
-            "80": 9.41266,
-            "81": 9.4854,
-            "82": 9.69576,
-            "83": 9.32017,
-            "84": 9.42235,
-            "85": 9.61578,
-            "86": 9.07218,
-            "87": 9.59328,
-            "88": 9.7509,
-            "89": 9.61159,
-            "90": 9.82148,
-            "91": 9.35304,
-            "92": 9.36254,
-            "93": 9.08747,
-            "94": 8.83398,
-            "95": 9.51923,
-            "96": 9.52595,
-            "97": 9.31413,
-            "98": 9.67414,
-            "99": 8.88869,
-            "100": 9.40651
+            "68": 9.78831,
+            "69": 9.79096,
+            "70": 9.73167,
+            "71": 9.61776,
+            "72": 9.59099,
+            "73": 9.49436,
+            "74": 8.95001,
+            "75": 9.43681,
+            "76": 9.09852,
+            "77": 10.06447,
+            "78": 9.72944,
+            "79": 9.37805,
+            "80": 9.41156,
+            "81": 9.48537,
+            "82": 9.69592,
+            "83": 9.31981,
+            "84": 9.42306,
+            "85": 9.61613,
+            "86": 9.07185,
+            "87": 9.59282,
+            "88": 9.75055,
+            "89": 9.61194,
+            "90": 9.8217,
+            "91": 9.35308,
+            "92": 9.36305,
+            "93": 9.08788,
+            "94": 8.83439,
+            "95": 9.5191,
+            "96": 9.52647,
+            "97": 9.31412,
+            "98": 9.67541,
+            "99": 8.88941,
+            "100": 9.40588
         }
     },
     "num-zeros": {
@@ -111,106 +111,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 1754.0,
-            "2": 1848.0,
-            "3": 1845.0,
-            "4": 1871.0,
-            "5": 2173.0,
-            "6": 2054.0,
-            "7": 2021.0,
-            "8": 1737.0,
-            "9": 2044.0,
-            "10": 1524.0,
-            "11": 2198.0,
-            "12": 2030.0,
-            "13": 2211.0,
-            "14": 2094.0,
-            "15": 2036.0,
-            "16": 2165.0,
-            "17": 2044.0,
-            "18": 1842.0,
-            "19": 2002.0,
-            "20": 1774.0,
-            "21": 1976.0,
-            "22": 1869.0,
-            "23": 2232.0,
-            "24": 1769.0,
-            "25": 1714.0,
-            "26": 1866.0,
-            "27": 2106.0,
-            "28": 2287.0,
-            "29": 2221.0,
-            "30": 1967.0,
-            "31": 1788.0,
-            "32": 2187.0,
-            "33": 2432.0,
-            "34": 2059.0,
-            "35": 2308.0,
-            "36": 2173.0,
-            "37": 2619.0,
-            "38": 2572.0,
-            "39": 2618.0,
-            "40": 2548.0,
-            "41": 2531.0,
-            "42": 2447.0,
-            "43": 2283.0,
-            "44": 2358.0,
-            "45": 2398.0,
-            "46": 2572.0,
-            "47": 2818.0,
-            "48": 2599.0,
-            "49": 2579.0,
-            "50": 2731.0,
-            "51": 2873.0,
-            "52": 2946.0,
-            "53": 3158.0,
-            "54": 2907.0,
-            "55": 2740.0,
-            "56": 3029.0,
-            "57": 2489.0,
-            "58": 3327.0,
-            "59": 3042.0,
-            "60": 2780.0,
-            "61": 3302.0,
-            "62": 2961.0,
-            "63": 2702.0,
-            "64": 3318.0,
-            "65": 2909.0,
-            "66": 3513.0,
-            "67": 2959.0,
-            "68": 2963.0,
-            "69": 3171.0,
-            "70": 3547.0,
-            "71": 3246.0,
-            "72": 2586.0,
-            "73": 3301.0,
-            "74": 2135.0,
-            "75": 2752.0,
-            "76": 3275.0,
-            "77": 3648.0,
-            "78": 3472.0,
-            "79": 3536.0,
-            "80": 3685.0,
-            "81": 4159.0,
-            "82": 3488.0,
-            "83": 3179.0,
-            "84": 3639.0,
-            "85": 3631.0,
-            "86": 3045.0,
-            "87": 4315.0,
-            "88": 3481.0,
-            "89": 3819.0,
-            "90": 3323.0,
-            "91": 3014.0,
-            "92": 3581.0,
-            "93": 2932.0,
-            "94": 3715.0,
-            "95": 3593.0,
-            "96": 3764.0,
-            "97": 3582.0,
-            "98": 3998.0,
-            "99": 3406.0,
-            "100": 3521.0
+            "1": 1778.0,
+            "2": 1875.0,
+            "3": 1879.0,
+            "4": 1912.0,
+            "5": 2219.0,
+            "6": 2163.0,
+            "7": 2113.0,
+            "8": 1747.0,
+            "9": 2049.0,
+            "10": 1530.0,
+            "11": 2113.0,
+            "12": 1959.0,
+            "13": 2134.0,
+            "14": 2055.0,
+            "15": 2125.0,
+            "16": 2139.0,
+            "17": 1988.0,
+            "18": 1892.0,
+            "19": 1991.0,
+            "20": 1867.0,
+            "21": 2023.0,
+            "22": 1865.0,
+            "23": 2185.0,
+            "24": 1774.0,
+            "25": 1773.0,
+            "26": 1990.0,
+            "27": 2061.0,
+            "28": 2215.0,
+            "29": 2186.0,
+            "30": 2129.0,
+            "31": 1794.0,
+            "32": 2109.0,
+            "33": 2422.0,
+            "34": 2135.0,
+            "35": 2169.0,
+            "36": 2127.0,
+            "37": 2432.0,
+            "38": 2490.0,
+            "39": 2495.0,
+            "40": 2486.0,
+            "41": 2465.0,
+            "42": 2535.0,
+            "43": 2216.0,
+            "44": 2407.0,
+            "45": 2335.0,
+            "46": 2617.0,
+            "47": 2830.0,
+            "48": 2480.0,
+            "49": 2492.0,
+            "50": 2687.0,
+            "51": 2863.0,
+            "52": 2881.0,
+            "53": 3220.0,
+            "54": 2894.0,
+            "55": 2652.0,
+            "56": 3006.0,
+            "57": 2561.0,
+            "58": 3273.0,
+            "59": 3039.0,
+            "60": 2765.0,
+            "61": 3310.0,
+            "62": 2936.0,
+            "63": 2630.0,
+            "64": 3230.0,
+            "65": 2946.0,
+            "66": 3500.0,
+            "67": 2976.0,
+            "68": 2944.0,
+            "69": 3117.0,
+            "70": 3629.0,
+            "71": 3255.0,
+            "72": 2633.0,
+            "73": 3338.0,
+            "74": 2172.0,
+            "75": 2702.0,
+            "76": 3162.0,
+            "77": 3850.0,
+            "78": 3590.0,
+            "79": 3658.0,
+            "80": 3866.0,
+            "81": 3976.0,
+            "82": 3680.0,
+            "83": 3153.0,
+            "84": 3586.0,
+            "85": 3517.0,
+            "86": 3137.0,
+            "87": 4177.0,
+            "88": 3589.0,
+            "89": 3849.0,
+            "90": 3349.0,
+            "91": 2936.0,
+            "92": 3526.0,
+            "93": 2965.0,
+            "94": 3772.0,
+            "95": 3530.0,
+            "96": 3774.0,
+            "97": 3636.0,
+            "98": 4064.0,
+            "99": 3394.0,
+            "100": 3530.0
         }
     },
     "mem-allocated-bytes": {
@@ -218,106 +218,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 269842944.0,
-            "2": 269842944.0,
-            "3": 269842944.0,
-            "4": 269842944.0,
-            "5": 269842944.0,
-            "6": 269842944.0,
-            "7": 269842944.0,
-            "8": 269842944.0,
-            "9": 269842944.0,
-            "10": 269842944.0,
-            "11": 269842944.0,
-            "12": 269842944.0,
-            "13": 269842944.0,
-            "14": 269842944.0,
-            "15": 269842944.0,
-            "16": 269842944.0,
-            "17": 269842944.0,
-            "18": 269842944.0,
-            "19": 269842944.0,
-            "20": 269842944.0,
-            "21": 269842944.0,
-            "22": 269842944.0,
-            "23": 269842944.0,
-            "24": 269842944.0,
-            "25": 269842944.0,
-            "26": 269842944.0,
-            "27": 269842944.0,
-            "28": 269842944.0,
-            "29": 269842944.0,
-            "30": 269842944.0,
-            "31": 269842944.0,
-            "32": 269842944.0,
-            "33": 269842944.0,
-            "34": 269842944.0,
-            "35": 269842944.0,
-            "36": 269842944.0,
-            "37": 269842944.0,
-            "38": 269842944.0,
-            "39": 269842944.0,
-            "40": 269842944.0,
-            "41": 269842944.0,
-            "42": 269842944.0,
-            "43": 269842944.0,
-            "44": 269842944.0,
-            "45": 269842944.0,
-            "46": 269842944.0,
-            "47": 269842944.0,
-            "48": 269842944.0,
-            "49": 269842944.0,
-            "50": 269842944.0,
-            "51": 269842944.0,
-            "52": 269842944.0,
-            "53": 269842944.0,
-            "54": 269842944.0,
-            "55": 269842944.0,
-            "56": 269842944.0,
-            "57": 269842944.0,
-            "58": 269842944.0,
-            "59": 269842944.0,
-            "60": 269842944.0,
-            "61": 269842944.0,
-            "62": 269842944.0,
-            "63": 269842944.0,
-            "64": 269842944.0,
-            "65": 269842944.0,
-            "66": 269842944.0,
-            "67": 269842944.0,
-            "68": 269842944.0,
-            "69": 269842944.0,
-            "70": 269842944.0,
-            "71": 269842944.0,
-            "72": 269842944.0,
-            "73": 269842944.0,
-            "74": 269842944.0,
-            "75": 269842944.0,
-            "76": 269842944.0,
-            "77": 269842944.0,
-            "78": 269842944.0,
-            "79": 269842944.0,
-            "80": 269842944.0,
-            "81": 269842944.0,
-            "82": 269842944.0,
-            "83": 269842944.0,
-            "84": 269842944.0,
-            "85": 269842944.0,
-            "86": 269842944.0,
-            "87": 269842944.0,
-            "88": 269842944.0,
-            "89": 269842944.0,
-            "90": 269842944.0,
-            "91": 269842944.0,
-            "92": 269842944.0,
-            "93": 269842944.0,
-            "94": 269842944.0,
-            "95": 269842944.0,
-            "96": 269842944.0,
-            "97": 269842944.0,
-            "98": 269842944.0,
-            "99": 269842944.0,
-            "100": 269842944.0
+            "1": 269891584.0,
+            "2": 269891584.0,
+            "3": 269891584.0,
+            "4": 269891584.0,
+            "5": 269891584.0,
+            "6": 269891584.0,
+            "7": 269891584.0,
+            "8": 269891584.0,
+            "9": 269891584.0,
+            "10": 269891584.0,
+            "11": 269891584.0,
+            "12": 269891584.0,
+            "13": 269891584.0,
+            "14": 269891584.0,
+            "15": 269891584.0,
+            "16": 269891584.0,
+            "17": 269891584.0,
+            "18": 269891584.0,
+            "19": 269891584.0,
+            "20": 269891584.0,
+            "21": 269891584.0,
+            "22": 269891584.0,
+            "23": 269891584.0,
+            "24": 269891584.0,
+            "25": 269891584.0,
+            "26": 269891584.0,
+            "27": 269891584.0,
+            "28": 269891584.0,
+            "29": 269891584.0,
+            "30": 269891584.0,
+            "31": 269891584.0,
+            "32": 269891584.0,
+            "33": 269891584.0,
+            "34": 269891584.0,
+            "35": 269891584.0,
+            "36": 269891584.0,
+            "37": 269891584.0,
+            "38": 269891584.0,
+            "39": 269891584.0,
+            "40": 269891584.0,
+            "41": 269891584.0,
+            "42": 269891584.0,
+            "43": 269891584.0,
+            "44": 269891584.0,
+            "45": 269891584.0,
+            "46": 269891584.0,
+            "47": 269891584.0,
+            "48": 269891584.0,
+            "49": 269891584.0,
+            "50": 269891584.0,
+            "51": 269891584.0,
+            "52": 269891584.0,
+            "53": 269891584.0,
+            "54": 269891584.0,
+            "55": 269891584.0,
+            "56": 269891584.0,
+            "57": 269891584.0,
+            "58": 269891584.0,
+            "59": 269891584.0,
+            "60": 269891584.0,
+            "61": 269891584.0,
+            "62": 269891584.0,
+            "63": 269891584.0,
+            "64": 269891584.0,
+            "65": 269891584.0,
+            "66": 269891584.0,
+            "67": 269891584.0,
+            "68": 269891584.0,
+            "69": 269891584.0,
+            "70": 269891584.0,
+            "71": 269891584.0,
+            "72": 269891584.0,
+            "73": 269891584.0,
+            "74": 269891584.0,
+            "75": 269891584.0,
+            "76": 269891584.0,
+            "77": 269891584.0,
+            "78": 269891584.0,
+            "79": 269891584.0,
+            "80": 269891584.0,
+            "81": 269891584.0,
+            "82": 269891584.0,
+            "83": 269891584.0,
+            "84": 269891584.0,
+            "85": 269891584.0,
+            "86": 269891584.0,
+            "87": 269891584.0,
+            "88": 269891584.0,
+            "89": 269891584.0,
+            "90": 269891584.0,
+            "91": 269891584.0,
+            "92": 269891584.0,
+            "93": 269891584.0,
+            "94": 269891584.0,
+            "95": 269891584.0,
+            "96": 269891584.0,
+            "97": 269891584.0,
+            "98": 269891584.0,
+            "99": 269891584.0,
+            "100": 269891584.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -325,106 +325,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 971619840.0,
-            "2": 1036172800.0,
-            "3": 1036172800.0,
-            "4": 1036172800.0,
-            "5": 1036172800.0,
-            "6": 1036172800.0,
-            "7": 1036172800.0,
-            "8": 1036172800.0,
-            "9": 1036172800.0,
-            "10": 1036172800.0,
-            "11": 1036172800.0,
-            "12": 1036172800.0,
-            "13": 1036172800.0,
-            "14": 1036172800.0,
-            "15": 1036172800.0,
-            "16": 1036172800.0,
-            "17": 1036172800.0,
-            "18": 1036172800.0,
-            "19": 1036172800.0,
-            "20": 1036172800.0,
-            "21": 1036172800.0,
-            "22": 1036172800.0,
-            "23": 1036172800.0,
-            "24": 1036172800.0,
-            "25": 1036172800.0,
-            "26": 1036172800.0,
-            "27": 1036172800.0,
-            "28": 1036172800.0,
-            "29": 1036172800.0,
-            "30": 1036172800.0,
-            "31": 1036172800.0,
-            "32": 1036172800.0,
-            "33": 1036172800.0,
-            "34": 1036172800.0,
-            "35": 1036172800.0,
-            "36": 1036172800.0,
-            "37": 1036172800.0,
-            "38": 1036172800.0,
-            "39": 1036172800.0,
-            "40": 1036172800.0,
-            "41": 1036172800.0,
-            "42": 1036172800.0,
-            "43": 1036172800.0,
-            "44": 1036172800.0,
-            "45": 1036172800.0,
-            "46": 1036172800.0,
-            "47": 1036172800.0,
-            "48": 1036172800.0,
-            "49": 1036172800.0,
-            "50": 1036172800.0,
-            "51": 1036172800.0,
-            "52": 1036172800.0,
-            "53": 1036172800.0,
-            "54": 1036172800.0,
-            "55": 1036172800.0,
-            "56": 1036172800.0,
-            "57": 1036172800.0,
-            "58": 1036172800.0,
-            "59": 1036172800.0,
-            "60": 1036172800.0,
-            "61": 1036172800.0,
-            "62": 1036172800.0,
-            "63": 1036172800.0,
-            "64": 1036172800.0,
-            "65": 1036172800.0,
-            "66": 1036172800.0,
-            "67": 1036172800.0,
-            "68": 1036172800.0,
-            "69": 1036172800.0,
-            "70": 1036172800.0,
-            "71": 1036172800.0,
-            "72": 1036172800.0,
-            "73": 1036172800.0,
-            "74": 1036172800.0,
-            "75": 1036172800.0,
-            "76": 1036172800.0,
-            "77": 1036172800.0,
-            "78": 1036172800.0,
-            "79": 1036172800.0,
-            "80": 1036172800.0,
-            "81": 1036172800.0,
-            "82": 1036172800.0,
-            "83": 1036172800.0,
-            "84": 1036172800.0,
-            "85": 1036172800.0,
-            "86": 1036172800.0,
-            "87": 1036172800.0,
-            "88": 1036172800.0,
-            "89": 1036172800.0,
-            "90": 1036172800.0,
-            "91": 1036172800.0,
-            "92": 1036172800.0,
-            "93": 1036172800.0,
-            "94": 1036172800.0,
-            "95": 1036172800.0,
-            "96": 1036172800.0,
-            "97": 1036172800.0,
-            "98": 1036172800.0,
-            "99": 1036172800.0,
-            "100": 1036172800.0
+            "1": 1448634368.0,
+            "2": 1515676672.0,
+            "3": 1515676672.0,
+            "4": 1515676672.0,
+            "5": 1515676672.0,
+            "6": 1515676672.0,
+            "7": 1515676672.0,
+            "8": 1515676672.0,
+            "9": 1515676672.0,
+            "10": 1515676672.0,
+            "11": 1515676672.0,
+            "12": 1515676672.0,
+            "13": 1515676672.0,
+            "14": 1515676672.0,
+            "15": 1515676672.0,
+            "16": 1515676672.0,
+            "17": 1515676672.0,
+            "18": 1515676672.0,
+            "19": 1515676672.0,
+            "20": 1515676672.0,
+            "21": 1515676672.0,
+            "22": 1515676672.0,
+            "23": 1515676672.0,
+            "24": 1515676672.0,
+            "25": 1515676672.0,
+            "26": 1515676672.0,
+            "27": 1515676672.0,
+            "28": 1515676672.0,
+            "29": 1515676672.0,
+            "30": 1515676672.0,
+            "31": 1515676672.0,
+            "32": 1515676672.0,
+            "33": 1515676672.0,
+            "34": 1515676672.0,
+            "35": 1515676672.0,
+            "36": 1515676672.0,
+            "37": 1515676672.0,
+            "38": 1515676672.0,
+            "39": 1515676672.0,
+            "40": 1515676672.0,
+            "41": 1515676672.0,
+            "42": 1515676672.0,
+            "43": 1515676672.0,
+            "44": 1515676672.0,
+            "45": 1515676672.0,
+            "46": 1515676672.0,
+            "47": 1515676672.0,
+            "48": 1515676672.0,
+            "49": 1515676672.0,
+            "50": 1515676672.0,
+            "51": 1515676672.0,
+            "52": 1515676672.0,
+            "53": 1515676672.0,
+            "54": 1515676672.0,
+            "55": 1515676672.0,
+            "56": 1515676672.0,
+            "57": 1515676672.0,
+            "58": 1515676672.0,
+            "59": 1515676672.0,
+            "60": 1515676672.0,
+            "61": 1515676672.0,
+            "62": 1515676672.0,
+            "63": 1515676672.0,
+            "64": 1515676672.0,
+            "65": 1515676672.0,
+            "66": 1515676672.0,
+            "67": 1515676672.0,
+            "68": 1515676672.0,
+            "69": 1515676672.0,
+            "70": 1515676672.0,
+            "71": 1515676672.0,
+            "72": 1515676672.0,
+            "73": 1515676672.0,
+            "74": 1515676672.0,
+            "75": 1515676672.0,
+            "76": 1515676672.0,
+            "77": 1515676672.0,
+            "78": 1515676672.0,
+            "79": 1515676672.0,
+            "80": 1515676672.0,
+            "81": 1515676672.0,
+            "82": 1515676672.0,
+            "83": 1515676672.0,
+            "84": 1515676672.0,
+            "85": 1515676672.0,
+            "86": 1515676672.0,
+            "87": 1515676672.0,
+            "88": 1515676672.0,
+            "89": 1515676672.0,
+            "90": 1515676672.0,
+            "91": 1515676672.0,
+            "92": 1515676672.0,
+            "93": 1515676672.0,
+            "94": 1515676672.0,
+            "95": 1515676672.0,
+            "96": 1515676672.0,
+            "97": 1515676672.0,
+            "98": 1515676672.0,
+            "99": 1515676672.0,
+            "100": 1515676672.0
         }
     },
     "iteration-time": {
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 5.18846,
-            "2": 0.36168,
-            "3": 0.29466,
-            "4": 0.29234,
-            "5": 0.29276,
-            "6": 0.29792,
-            "7": 0.29352,
-            "8": 0.2936,
-            "9": 0.29237,
-            "10": 0.29769,
-            "11": 0.29346,
-            "12": 0.29527,
-            "13": 0.29315,
-            "14": 0.29363,
-            "15": 0.29305,
-            "16": 0.29641,
-            "17": 0.29489,
-            "18": 0.29861,
-            "19": 0.29574,
-            "20": 0.29312,
-            "21": 0.29388,
-            "22": 0.29283,
-            "23": 0.29431,
-            "24": 0.29335,
-            "25": 0.29314,
-            "26": 0.29296,
-            "27": 0.29356,
-            "28": 0.29335,
-            "29": 0.29568,
-            "30": 0.29411,
-            "31": 0.29379,
-            "32": 0.29273,
-            "33": 0.29354,
-            "34": 0.29433,
-            "35": 0.29411,
-            "36": 0.29363,
-            "37": 0.2938,
-            "38": 0.29351,
-            "39": 0.29356,
-            "40": 0.29298,
-            "41": 0.29347,
-            "42": 0.29413,
-            "43": 0.29252,
-            "44": 0.29273,
-            "45": 0.29334,
-            "46": 0.29356,
-            "47": 0.29382,
-            "48": 0.29398,
-            "49": 0.2936,
-            "50": 0.29316,
-            "51": 0.29514,
-            "52": 0.28916,
-            "53": 0.29005,
-            "54": 0.28929,
-            "55": 0.28956,
-            "56": 0.28848,
-            "57": 0.28858,
-            "58": 0.28768,
-            "59": 0.28853,
-            "60": 0.29008,
-            "61": 0.2889,
-            "62": 0.28847,
-            "63": 0.28786,
-            "64": 0.28795,
-            "65": 0.28879,
-            "66": 0.28923,
-            "67": 0.28915,
-            "68": 0.28861,
-            "69": 0.28895,
-            "70": 0.28885,
-            "71": 0.28882,
-            "72": 0.28775,
-            "73": 0.28792,
-            "74": 0.28799,
-            "75": 0.28754,
-            "76": 0.28789,
-            "77": 0.2888,
-            "78": 0.28929,
-            "79": 0.28854,
-            "80": 0.28894,
-            "81": 0.28751,
-            "82": 0.28815,
-            "83": 0.2885,
-            "84": 0.28813,
-            "85": 0.28933,
-            "86": 0.28794,
-            "87": 0.28758,
-            "88": 0.28772,
-            "89": 0.28903,
-            "90": 0.28798,
-            "91": 0.28695,
-            "92": 0.28757,
-            "93": 0.28831,
-            "94": 0.28828,
-            "95": 0.28871,
-            "96": 0.28746,
-            "97": 0.28767,
-            "98": 0.28881,
-            "99": 0.2875,
-            "100": 0.28775
+            "1": 10.29271,
+            "2": 0.42506,
+            "3": 0.68343,
+            "4": 0.36852,
+            "5": 0.35945,
+            "6": 0.70082,
+            "7": 0.36184,
+            "8": 0.36666,
+            "9": 0.36956,
+            "10": 0.36948,
+            "11": 0.34035,
+            "12": 0.33106,
+            "13": 0.32678,
+            "14": 0.50153,
+            "15": 0.32624,
+            "16": 0.32544,
+            "17": 0.33191,
+            "18": 0.32618,
+            "19": 0.3263,
+            "20": 0.33069,
+            "21": 0.32595,
+            "22": 0.3257,
+            "23": 0.33264,
+            "24": 0.32517,
+            "25": 0.32475,
+            "26": 0.33346,
+            "27": 0.33354,
+            "28": 0.32383,
+            "29": 0.33025,
+            "30": 0.32292,
+            "31": 0.32259,
+            "32": 0.33133,
+            "33": 0.32233,
+            "34": 0.32205,
+            "35": 0.32577,
+            "36": 0.33027,
+            "37": 0.32369,
+            "38": 0.3231,
+            "39": 0.32941,
+            "40": 0.32272,
+            "41": 0.32419,
+            "42": 0.32862,
+            "43": 0.32341,
+            "44": 0.32437,
+            "45": 0.3291,
+            "46": 0.32245,
+            "47": 0.32412,
+            "48": 0.32928,
+            "49": 0.32252,
+            "50": 0.3232,
+            "51": 0.3288,
+            "52": 0.32267,
+            "53": 0.32323,
+            "54": 0.33682,
+            "55": 0.32632,
+            "56": 0.32697,
+            "57": 0.33895,
+            "58": 0.32618,
+            "59": 0.32589,
+            "60": 0.3322,
+            "61": 0.3251,
+            "62": 0.32521,
+            "63": 0.33036,
+            "64": 0.32444,
+            "65": 0.32508,
+            "66": 0.33114,
+            "67": 0.32315,
+            "68": 0.32508,
+            "69": 0.3303,
+            "70": 0.32701,
+            "71": 0.32493,
+            "72": 0.32932,
+            "73": 0.32763,
+            "74": 0.32474,
+            "75": 0.32636,
+            "76": 0.33103,
+            "77": 0.32433,
+            "78": 0.32583,
+            "79": 0.33332,
+            "80": 0.32445,
+            "81": 0.32512,
+            "82": 0.33846,
+            "83": 0.32647,
+            "84": 0.32584,
+            "85": 0.33063,
+            "86": 0.32531,
+            "87": 0.32597,
+            "88": 0.33536,
+            "89": 0.32529,
+            "90": 0.32619,
+            "91": 0.33191,
+            "92": 0.32549,
+            "93": 0.32565,
+            "94": 0.33549,
+            "95": 0.32239,
+            "96": 0.32249,
+            "97": 0.32967,
+            "98": 0.3225,
+            "99": 0.32206,
+            "100": 0.32856
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_lts.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_lts.json
deleted file mode 100644
index d8c6189b1ed..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_lts.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.85966,
-            "2": 10.85779,
-            "3": 10.85806,
-            "4": 10.85111,
-            "5": 10.88314,
-            "6": 10.8873,
-            "7": 10.86205,
-            "8": 10.86979,
-            "9": 10.86489,
-            "10": 10.83833,
-            "11": 10.87881,
-            "12": 10.87319,
-            "13": 10.87924,
-            "14": 10.88927,
-            "15": 10.83002,
-            "16": 10.83231,
-            "17": 10.80059,
-            "18": 10.82546,
-            "19": 10.81132,
-            "20": 10.73125,
-            "21": 10.72292,
-            "22": 10.57559,
-            "23": 10.73211,
-            "24": 10.61836,
-            "25": 10.56602,
-            "26": 10.61211,
-            "27": 10.62224,
-            "28": 10.58011,
-            "29": 10.59905,
-            "30": 10.38311,
-            "31": 10.14181,
-            "32": 10.48118,
-            "33": 10.47322,
-            "34": 10.23556,
-            "35": 10.28738,
-            "36": 10.23493,
-            "37": 10.35785,
-            "38": 10.20631,
-            "39": 10.41397,
-            "40": 10.09217,
-            "41": 10.16577,
-            "42": 10.21856,
-            "43": 9.85974,
-            "44": 9.97115,
-            "45": 9.83404,
-            "46": 9.84483,
-            "47": 10.16033,
-            "48": 9.85135,
-            "49": 9.53781,
-            "50": 9.91577,
-            "51": 9.8567,
-            "52": 9.75178,
-            "53": 10.07652,
-            "54": 9.96084,
-            "55": 9.88221,
-            "56": 9.63206,
-            "57": 9.49147,
-            "58": 9.83408,
-            "59": 9.59352,
-            "60": 9.51388,
-            "61": 9.69802,
-            "62": 9.99154,
-            "63": 9.3723,
-            "64": 9.77839,
-            "65": 8.95518,
-            "66": 9.70976,
-            "67": 9.38198,
-            "68": 9.78701,
-            "69": 9.793,
-            "70": 9.73033,
-            "71": 9.61752,
-            "72": 9.58459,
-            "73": 9.48958,
-            "74": 8.94015,
-            "75": 9.43092,
-            "76": 9.09168,
-            "77": 10.06222,
-            "78": 9.72696,
-            "79": 9.37408,
-            "80": 9.40676,
-            "81": 9.47995,
-            "82": 9.69225,
-            "83": 9.31299,
-            "84": 9.41921,
-            "85": 9.61096,
-            "86": 9.06853,
-            "87": 9.59119,
-            "88": 9.74582,
-            "89": 9.60624,
-            "90": 9.81746,
-            "91": 9.34247,
-            "92": 9.35856,
-            "93": 9.07894,
-            "94": 8.82753,
-            "95": 9.51606,
-            "96": 9.52063,
-            "97": 9.31097,
-            "98": 9.67055,
-            "99": 8.88626,
-            "100": 9.40485
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1756.0,
-            "2": 2002.0,
-            "3": 1950.0,
-            "4": 1894.0,
-            "5": 2227.0,
-            "6": 2168.0,
-            "7": 2021.0,
-            "8": 1775.0,
-            "9": 2051.0,
-            "10": 1611.0,
-            "11": 2147.0,
-            "12": 1965.0,
-            "13": 2185.0,
-            "14": 2052.0,
-            "15": 2134.0,
-            "16": 2077.0,
-            "17": 2080.0,
-            "18": 1914.0,
-            "19": 1993.0,
-            "20": 1823.0,
-            "21": 2142.0,
-            "22": 1847.0,
-            "23": 2340.0,
-            "24": 1831.0,
-            "25": 1790.0,
-            "26": 1923.0,
-            "27": 2066.0,
-            "28": 2268.0,
-            "29": 2261.0,
-            "30": 2043.0,
-            "31": 1759.0,
-            "32": 2122.0,
-            "33": 2389.0,
-            "34": 2186.0,
-            "35": 2312.0,
-            "36": 2176.0,
-            "37": 2626.0,
-            "38": 2471.0,
-            "39": 2624.0,
-            "40": 2556.0,
-            "41": 2475.0,
-            "42": 2504.0,
-            "43": 2326.0,
-            "44": 2307.0,
-            "45": 2355.0,
-            "46": 2625.0,
-            "47": 2809.0,
-            "48": 2621.0,
-            "49": 2670.0,
-            "50": 2699.0,
-            "51": 2829.0,
-            "52": 2809.0,
-            "53": 3230.0,
-            "54": 2864.0,
-            "55": 2706.0,
-            "56": 2917.0,
-            "57": 2529.0,
-            "58": 3339.0,
-            "59": 3051.0,
-            "60": 2623.0,
-            "61": 3287.0,
-            "62": 2913.0,
-            "63": 2639.0,
-            "64": 3154.0,
-            "65": 2856.0,
-            "66": 3465.0,
-            "67": 2934.0,
-            "68": 2985.0,
-            "69": 3298.0,
-            "70": 3653.0,
-            "71": 3260.0,
-            "72": 2684.0,
-            "73": 3232.0,
-            "74": 2191.0,
-            "75": 2766.0,
-            "76": 3335.0,
-            "77": 3793.0,
-            "78": 3608.0,
-            "79": 3384.0,
-            "80": 3782.0,
-            "81": 3969.0,
-            "82": 3640.0,
-            "83": 3237.0,
-            "84": 3606.0,
-            "85": 3553.0,
-            "86": 3160.0,
-            "87": 4130.0,
-            "88": 3430.0,
-            "89": 3818.0,
-            "90": 3363.0,
-            "91": 3041.0,
-            "92": 3524.0,
-            "93": 3060.0,
-            "94": 3575.0,
-            "95": 3463.0,
-            "96": 3921.0,
-            "97": 3597.0,
-            "98": 4039.0,
-            "99": 3435.0,
-            "100": 3548.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 368387584.0,
-            "2": 368387584.0,
-            "3": 368387584.0,
-            "4": 368387584.0,
-            "5": 368387584.0,
-            "6": 368387584.0,
-            "7": 368387584.0,
-            "8": 368387584.0,
-            "9": 368387584.0,
-            "10": 368387584.0,
-            "11": 368387584.0,
-            "12": 368387584.0,
-            "13": 368387584.0,
-            "14": 368387584.0,
-            "15": 368387584.0,
-            "16": 368387584.0,
-            "17": 368387584.0,
-            "18": 368387584.0,
-            "19": 368387584.0,
-            "20": 368387584.0,
-            "21": 368387584.0,
-            "22": 368387584.0,
-            "23": 368387584.0,
-            "24": 368387584.0,
-            "25": 368387584.0,
-            "26": 368387584.0,
-            "27": 368387584.0,
-            "28": 368387584.0,
-            "29": 368387584.0,
-            "30": 368387584.0,
-            "31": 368387584.0,
-            "32": 368387584.0,
-            "33": 368387584.0,
-            "34": 368387584.0,
-            "35": 368387584.0,
-            "36": 368387584.0,
-            "37": 368387584.0,
-            "38": 368387584.0,
-            "39": 368387584.0,
-            "40": 368387584.0,
-            "41": 368387584.0,
-            "42": 368387584.0,
-            "43": 368387584.0,
-            "44": 368387584.0,
-            "45": 368387584.0,
-            "46": 368387584.0,
-            "47": 368387584.0,
-            "48": 368387584.0,
-            "49": 368387584.0,
-            "50": 368387584.0,
-            "51": 368387584.0,
-            "52": 368387584.0,
-            "53": 368387584.0,
-            "54": 368387584.0,
-            "55": 368387584.0,
-            "56": 368387584.0,
-            "57": 368387584.0,
-            "58": 368387584.0,
-            "59": 368387584.0,
-            "60": 368387584.0,
-            "61": 368387584.0,
-            "62": 368387584.0,
-            "63": 368387584.0,
-            "64": 368387584.0,
-            "65": 368387584.0,
-            "66": 368387584.0,
-            "67": 368387584.0,
-            "68": 368387584.0,
-            "69": 368387584.0,
-            "70": 368387584.0,
-            "71": 368387584.0,
-            "72": 368387584.0,
-            "73": 368387584.0,
-            "74": 368387584.0,
-            "75": 368387584.0,
-            "76": 368387584.0,
-            "77": 368387584.0,
-            "78": 368387584.0,
-            "79": 368387584.0,
-            "80": 368387584.0,
-            "81": 368387584.0,
-            "82": 368387584.0,
-            "83": 368387584.0,
-            "84": 368387584.0,
-            "85": 368387584.0,
-            "86": 368387584.0,
-            "87": 368387584.0,
-            "88": 368387584.0,
-            "89": 368387584.0,
-            "90": 368387584.0,
-            "91": 368387584.0,
-            "92": 368387584.0,
-            "93": 368387584.0,
-            "94": 368387584.0,
-            "95": 368387584.0,
-            "96": 368387584.0,
-            "97": 368387584.0,
-            "98": 368387584.0,
-            "99": 368387584.0,
-            "100": 368387584.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1030075904.0,
-            "2": 1161062912.0,
-            "3": 1161062912.0,
-            "4": 1161062912.0,
-            "5": 1161062912.0,
-            "6": 1161062912.0,
-            "7": 1161062912.0,
-            "8": 1161062912.0,
-            "9": 1161062912.0,
-            "10": 1161062912.0,
-            "11": 1161062912.0,
-            "12": 1161062912.0,
-            "13": 1161062912.0,
-            "14": 1161062912.0,
-            "15": 1161062912.0,
-            "16": 1161062912.0,
-            "17": 1161062912.0,
-            "18": 1161062912.0,
-            "19": 1161062912.0,
-            "20": 1161062912.0,
-            "21": 1161062912.0,
-            "22": 1161062912.0,
-            "23": 1161062912.0,
-            "24": 1161062912.0,
-            "25": 1161062912.0,
-            "26": 1161062912.0,
-            "27": 1161062912.0,
-            "28": 1161062912.0,
-            "29": 1161062912.0,
-            "30": 1161062912.0,
-            "31": 1161062912.0,
-            "32": 1161062912.0,
-            "33": 1161062912.0,
-            "34": 1161062912.0,
-            "35": 1161062912.0,
-            "36": 1161062912.0,
-            "37": 1161062912.0,
-            "38": 1161062912.0,
-            "39": 1161062912.0,
-            "40": 1161062912.0,
-            "41": 1161062912.0,
-            "42": 1161062912.0,
-            "43": 1161062912.0,
-            "44": 1161062912.0,
-            "45": 1161062912.0,
-            "46": 1161062912.0,
-            "47": 1161062912.0,
-            "48": 1161062912.0,
-            "49": 1161062912.0,
-            "50": 1161062912.0,
-            "51": 1161062912.0,
-            "52": 1161062912.0,
-            "53": 1161062912.0,
-            "54": 1161062912.0,
-            "55": 1161062912.0,
-            "56": 1161062912.0,
-            "57": 1161062912.0,
-            "58": 1161062912.0,
-            "59": 1161062912.0,
-            "60": 1161062912.0,
-            "61": 1161062912.0,
-            "62": 1161062912.0,
-            "63": 1161062912.0,
-            "64": 1161062912.0,
-            "65": 1161062912.0,
-            "66": 1161062912.0,
-            "67": 1161062912.0,
-            "68": 1161062912.0,
-            "69": 1161062912.0,
-            "70": 1161062912.0,
-            "71": 1161062912.0,
-            "72": 1161062912.0,
-            "73": 1161062912.0,
-            "74": 1161062912.0,
-            "75": 1161062912.0,
-            "76": 1161062912.0,
-            "77": 1161062912.0,
-            "78": 1161062912.0,
-            "79": 1161062912.0,
-            "80": 1161062912.0,
-            "81": 1161062912.0,
-            "82": 1161062912.0,
-            "83": 1161062912.0,
-            "84": 1161062912.0,
-            "85": 1161062912.0,
-            "86": 1161062912.0,
-            "87": 1161062912.0,
-            "88": 1161062912.0,
-            "89": 1161062912.0,
-            "90": 1161062912.0,
-            "91": 1161062912.0,
-            "92": 1161062912.0,
-            "93": 1161062912.0,
-            "94": 1161062912.0,
-            "95": 1161062912.0,
-            "96": 1161062912.0,
-            "97": 1161062912.0,
-            "98": 1161062912.0,
-            "99": 1161062912.0,
-            "100": 1161062912.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 4.69857,
-            "2": 0.5607,
-            "3": 0.4224,
-            "4": 0.408,
-            "5": 0.40601,
-            "6": 0.40622,
-            "7": 0.41117,
-            "8": 0.41277,
-            "9": 0.41135,
-            "10": 0.41075,
-            "11": 0.41036,
-            "12": 0.41422,
-            "13": 0.41066,
-            "14": 0.41137,
-            "15": 0.41203,
-            "16": 0.41967,
-            "17": 0.41058,
-            "18": 0.41138,
-            "19": 0.41123,
-            "20": 0.4101,
-            "21": 0.40646,
-            "22": 0.40698,
-            "23": 0.40933,
-            "24": 0.40762,
-            "25": 0.40817,
-            "26": 0.40815,
-            "27": 0.40808,
-            "28": 0.41113,
-            "29": 0.41068,
-            "30": 0.40954,
-            "31": 0.40816,
-            "32": 0.40856,
-            "33": 0.40858,
-            "34": 0.4094,
-            "35": 0.40789,
-            "36": 0.40721,
-            "37": 0.4076,
-            "38": 0.4074,
-            "39": 0.40856,
-            "40": 0.40702,
-            "41": 0.40749,
-            "42": 0.40775,
-            "43": 0.40697,
-            "44": 0.4055,
-            "45": 0.40519,
-            "46": 0.40481,
-            "47": 0.40587,
-            "48": 0.40507,
-            "49": 0.40669,
-            "50": 0.40562,
-            "51": 0.42093,
-            "52": 0.41028,
-            "53": 0.4088,
-            "54": 0.41137,
-            "55": 0.40983,
-            "56": 0.40893,
-            "57": 0.41252,
-            "58": 0.40982,
-            "59": 0.41034,
-            "60": 0.41176,
-            "61": 0.40849,
-            "62": 0.40923,
-            "63": 0.40913,
-            "64": 0.4084,
-            "65": 0.40845,
-            "66": 0.41123,
-            "67": 0.40774,
-            "68": 0.40592,
-            "69": 0.4048,
-            "70": 0.40668,
-            "71": 0.40553,
-            "72": 0.40771,
-            "73": 0.40622,
-            "74": 0.40899,
-            "75": 0.41092,
-            "76": 0.41085,
-            "77": 0.41068,
-            "78": 0.4084,
-            "79": 0.41086,
-            "80": 0.77469,
-            "81": 0.41176,
-            "82": 0.41234,
-            "83": 0.41225,
-            "84": 0.41217,
-            "85": 0.41295,
-            "86": 0.41174,
-            "87": 0.40921,
-            "88": 0.40831,
-            "89": 0.40924,
-            "90": 0.40844,
-            "91": 0.40982,
-            "92": 0.40955,
-            "93": 0.40963,
-            "94": 0.4085,
-            "95": 0.43734,
-            "96": 0.40945,
-            "97": 0.40886,
-            "98": 0.41034,
-            "99": 0.40924,
-            "100": 0.40948
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_lts_dgx_a100.json
index 1091699bf9a..b9747029488 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_lts_dgx_a100.json
@@ -1,537 +1 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.85966,
-            "2": 10.85779,
-            "3": 10.85806,
-            "4": 10.85111,
-            "5": 10.88314,
-            "6": 10.8873,
-            "7": 10.86205,
-            "8": 10.86979,
-            "9": 10.86489,
-            "10": 10.83833,
-            "11": 10.87881,
-            "12": 10.87319,
-            "13": 10.87924,
-            "14": 10.88927,
-            "15": 10.83002,
-            "16": 10.83231,
-            "17": 10.80059,
-            "18": 10.82546,
-            "19": 10.81132,
-            "20": 10.73125,
-            "21": 10.72292,
-            "22": 10.57559,
-            "23": 10.73211,
-            "24": 10.61836,
-            "25": 10.56602,
-            "26": 10.61211,
-            "27": 10.62224,
-            "28": 10.58011,
-            "29": 10.59905,
-            "30": 10.38311,
-            "31": 10.14181,
-            "32": 10.48118,
-            "33": 10.47322,
-            "34": 10.23556,
-            "35": 10.28738,
-            "36": 10.23493,
-            "37": 10.35785,
-            "38": 10.20631,
-            "39": 10.41397,
-            "40": 10.09217,
-            "41": 10.16577,
-            "42": 10.21856,
-            "43": 9.85974,
-            "44": 9.97115,
-            "45": 9.83404,
-            "46": 9.84483,
-            "47": 10.16033,
-            "48": 9.85135,
-            "49": 9.53781,
-            "50": 9.91577,
-            "51": 9.8567,
-            "52": 9.75178,
-            "53": 10.07652,
-            "54": 9.96084,
-            "55": 9.88221,
-            "56": 9.63206,
-            "57": 9.49147,
-            "58": 9.83408,
-            "59": 9.59352,
-            "60": 9.51388,
-            "61": 9.69802,
-            "62": 9.99154,
-            "63": 9.3723,
-            "64": 9.77839,
-            "65": 8.95518,
-            "66": 9.70976,
-            "67": 9.38198,
-            "68": 9.78701,
-            "69": 9.793,
-            "70": 9.73033,
-            "71": 9.61752,
-            "72": 9.58459,
-            "73": 9.48958,
-            "74": 8.94015,
-            "75": 9.43092,
-            "76": 9.09168,
-            "77": 10.06222,
-            "78": 9.72696,
-            "79": 9.37408,
-            "80": 9.40676,
-            "81": 9.47995,
-            "82": 9.69225,
-            "83": 9.31299,
-            "84": 9.41921,
-            "85": 9.61096,
-            "86": 9.06853,
-            "87": 9.59119,
-            "88": 9.74582,
-            "89": 9.60624,
-            "90": 9.81746,
-            "91": 9.34247,
-            "92": 9.35856,
-            "93": 9.07894,
-            "94": 8.82753,
-            "95": 9.51606,
-            "96": 9.52063,
-            "97": 9.31097,
-            "98": 9.67055,
-            "99": 8.88626,
-            "100": 9.40485
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1756.0,
-            "2": 2002.0,
-            "3": 1950.0,
-            "4": 1894.0,
-            "5": 2227.0,
-            "6": 2168.0,
-            "7": 2021.0,
-            "8": 1775.0,
-            "9": 2051.0,
-            "10": 1611.0,
-            "11": 2147.0,
-            "12": 1965.0,
-            "13": 2185.0,
-            "14": 2052.0,
-            "15": 2134.0,
-            "16": 2077.0,
-            "17": 2080.0,
-            "18": 1914.0,
-            "19": 1993.0,
-            "20": 1823.0,
-            "21": 2142.0,
-            "22": 1847.0,
-            "23": 2340.0,
-            "24": 1831.0,
-            "25": 1790.0,
-            "26": 1923.0,
-            "27": 2066.0,
-            "28": 2268.0,
-            "29": 2261.0,
-            "30": 2043.0,
-            "31": 1759.0,
-            "32": 2122.0,
-            "33": 2389.0,
-            "34": 2186.0,
-            "35": 2312.0,
-            "36": 2176.0,
-            "37": 2626.0,
-            "38": 2471.0,
-            "39": 2624.0,
-            "40": 2556.0,
-            "41": 2475.0,
-            "42": 2504.0,
-            "43": 2326.0,
-            "44": 2307.0,
-            "45": 2355.0,
-            "46": 2625.0,
-            "47": 2809.0,
-            "48": 2621.0,
-            "49": 2670.0,
-            "50": 2699.0,
-            "51": 2829.0,
-            "52": 2809.0,
-            "53": 3230.0,
-            "54": 2864.0,
-            "55": 2706.0,
-            "56": 2917.0,
-            "57": 2529.0,
-            "58": 3339.0,
-            "59": 3051.0,
-            "60": 2623.0,
-            "61": 3287.0,
-            "62": 2913.0,
-            "63": 2639.0,
-            "64": 3154.0,
-            "65": 2856.0,
-            "66": 3465.0,
-            "67": 2934.0,
-            "68": 2985.0,
-            "69": 3298.0,
-            "70": 3653.0,
-            "71": 3260.0,
-            "72": 2684.0,
-            "73": 3232.0,
-            "74": 2191.0,
-            "75": 2766.0,
-            "76": 3335.0,
-            "77": 3793.0,
-            "78": 3608.0,
-            "79": 3384.0,
-            "80": 3782.0,
-            "81": 3969.0,
-            "82": 3640.0,
-            "83": 3237.0,
-            "84": 3606.0,
-            "85": 3553.0,
-            "86": 3160.0,
-            "87": 4130.0,
-            "88": 3430.0,
-            "89": 3818.0,
-            "90": 3363.0,
-            "91": 3041.0,
-            "92": 3524.0,
-            "93": 3060.0,
-            "94": 3575.0,
-            "95": 3463.0,
-            "96": 3921.0,
-            "97": 3597.0,
-            "98": 4039.0,
-            "99": 3435.0,
-            "100": 3548.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 368387584.0,
-            "2": 368387584.0,
-            "3": 368387584.0,
-            "4": 368387584.0,
-            "5": 368387584.0,
-            "6": 368387584.0,
-            "7": 368387584.0,
-            "8": 368387584.0,
-            "9": 368387584.0,
-            "10": 368387584.0,
-            "11": 368387584.0,
-            "12": 368387584.0,
-            "13": 368387584.0,
-            "14": 368387584.0,
-            "15": 368387584.0,
-            "16": 368387584.0,
-            "17": 368387584.0,
-            "18": 368387584.0,
-            "19": 368387584.0,
-            "20": 368387584.0,
-            "21": 368387584.0,
-            "22": 368387584.0,
-            "23": 368387584.0,
-            "24": 368387584.0,
-            "25": 368387584.0,
-            "26": 368387584.0,
-            "27": 368387584.0,
-            "28": 368387584.0,
-            "29": 368387584.0,
-            "30": 368387584.0,
-            "31": 368387584.0,
-            "32": 368387584.0,
-            "33": 368387584.0,
-            "34": 368387584.0,
-            "35": 368387584.0,
-            "36": 368387584.0,
-            "37": 368387584.0,
-            "38": 368387584.0,
-            "39": 368387584.0,
-            "40": 368387584.0,
-            "41": 368387584.0,
-            "42": 368387584.0,
-            "43": 368387584.0,
-            "44": 368387584.0,
-            "45": 368387584.0,
-            "46": 368387584.0,
-            "47": 368387584.0,
-            "48": 368387584.0,
-            "49": 368387584.0,
-            "50": 368387584.0,
-            "51": 368387584.0,
-            "52": 368387584.0,
-            "53": 368387584.0,
-            "54": 368387584.0,
-            "55": 368387584.0,
-            "56": 368387584.0,
-            "57": 368387584.0,
-            "58": 368387584.0,
-            "59": 368387584.0,
-            "60": 368387584.0,
-            "61": 368387584.0,
-            "62": 368387584.0,
-            "63": 368387584.0,
-            "64": 368387584.0,
-            "65": 368387584.0,
-            "66": 368387584.0,
-            "67": 368387584.0,
-            "68": 368387584.0,
-            "69": 368387584.0,
-            "70": 368387584.0,
-            "71": 368387584.0,
-            "72": 368387584.0,
-            "73": 368387584.0,
-            "74": 368387584.0,
-            "75": 368387584.0,
-            "76": 368387584.0,
-            "77": 368387584.0,
-            "78": 368387584.0,
-            "79": 368387584.0,
-            "80": 368387584.0,
-            "81": 368387584.0,
-            "82": 368387584.0,
-            "83": 368387584.0,
-            "84": 368387584.0,
-            "85": 368387584.0,
-            "86": 368387584.0,
-            "87": 368387584.0,
-            "88": 368387584.0,
-            "89": 368387584.0,
-            "90": 368387584.0,
-            "91": 368387584.0,
-            "92": 368387584.0,
-            "93": 368387584.0,
-            "94": 368387584.0,
-            "95": 368387584.0,
-            "96": 368387584.0,
-            "97": 368387584.0,
-            "98": 368387584.0,
-            "99": 368387584.0,
-            "100": 368387584.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1030075904.0,
-            "2": 1161062912.0,
-            "3": 1161062912.0,
-            "4": 1161062912.0,
-            "5": 1161062912.0,
-            "6": 1161062912.0,
-            "7": 1161062912.0,
-            "8": 1161062912.0,
-            "9": 1161062912.0,
-            "10": 1161062912.0,
-            "11": 1161062912.0,
-            "12": 1161062912.0,
-            "13": 1161062912.0,
-            "14": 1161062912.0,
-            "15": 1161062912.0,
-            "16": 1161062912.0,
-            "17": 1161062912.0,
-            "18": 1161062912.0,
-            "19": 1161062912.0,
-            "20": 1161062912.0,
-            "21": 1161062912.0,
-            "22": 1161062912.0,
-            "23": 1161062912.0,
-            "24": 1161062912.0,
-            "25": 1161062912.0,
-            "26": 1161062912.0,
-            "27": 1161062912.0,
-            "28": 1161062912.0,
-            "29": 1161062912.0,
-            "30": 1161062912.0,
-            "31": 1161062912.0,
-            "32": 1161062912.0,
-            "33": 1161062912.0,
-            "34": 1161062912.0,
-            "35": 1161062912.0,
-            "36": 1161062912.0,
-            "37": 1161062912.0,
-            "38": 1161062912.0,
-            "39": 1161062912.0,
-            "40": 1161062912.0,
-            "41": 1161062912.0,
-            "42": 1161062912.0,
-            "43": 1161062912.0,
-            "44": 1161062912.0,
-            "45": 1161062912.0,
-            "46": 1161062912.0,
-            "47": 1161062912.0,
-            "48": 1161062912.0,
-            "49": 1161062912.0,
-            "50": 1161062912.0,
-            "51": 1161062912.0,
-            "52": 1161062912.0,
-            "53": 1161062912.0,
-            "54": 1161062912.0,
-            "55": 1161062912.0,
-            "56": 1161062912.0,
-            "57": 1161062912.0,
-            "58": 1161062912.0,
-            "59": 1161062912.0,
-            "60": 1161062912.0,
-            "61": 1161062912.0,
-            "62": 1161062912.0,
-            "63": 1161062912.0,
-            "64": 1161062912.0,
-            "65": 1161062912.0,
-            "66": 1161062912.0,
-            "67": 1161062912.0,
-            "68": 1161062912.0,
-            "69": 1161062912.0,
-            "70": 1161062912.0,
-            "71": 1161062912.0,
-            "72": 1161062912.0,
-            "73": 1161062912.0,
-            "74": 1161062912.0,
-            "75": 1161062912.0,
-            "76": 1161062912.0,
-            "77": 1161062912.0,
-            "78": 1161062912.0,
-            "79": 1161062912.0,
-            "80": 1161062912.0,
-            "81": 1161062912.0,
-            "82": 1161062912.0,
-            "83": 1161062912.0,
-            "84": 1161062912.0,
-            "85": 1161062912.0,
-            "86": 1161062912.0,
-            "87": 1161062912.0,
-            "88": 1161062912.0,
-            "89": 1161062912.0,
-            "90": 1161062912.0,
-            "91": 1161062912.0,
-            "92": 1161062912.0,
-            "93": 1161062912.0,
-            "94": 1161062912.0,
-            "95": 1161062912.0,
-            "96": 1161062912.0,
-            "97": 1161062912.0,
-            "98": 1161062912.0,
-            "99": 1161062912.0,
-            "100": 1161062912.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 4.72553,
-            "2": 0.52446,
-            "3": 0.41527,
-            "4": 0.41699,
-            "5": 0.41496,
-            "6": 0.41411,
-            "7": 0.41829,
-            "8": 0.41655,
-            "9": 0.41643,
-            "10": 0.42008,
-            "11": 0.41959,
-            "12": 0.41842,
-            "13": 0.41485,
-            "14": 0.41643,
-            "15": 0.41486,
-            "16": 0.41617,
-            "17": 0.41476,
-            "18": 0.42598,
-            "19": 0.41821,
-            "20": 0.41457,
-            "21": 0.41579,
-            "22": 0.41438,
-            "23": 0.41644,
-            "24": 0.41499,
-            "25": 0.41537,
-            "26": 0.41593,
-            "27": 0.42875,
-            "28": 0.41636,
-            "29": 0.41505,
-            "30": 0.4148,
-            "31": 0.41806,
-            "32": 0.41549,
-            "33": 0.41482,
-            "34": 0.41559,
-            "35": 0.4156,
-            "36": 0.4152,
-            "37": 0.4152,
-            "38": 0.4154,
-            "39": 0.41674,
-            "40": 0.41745,
-            "41": 0.41582,
-            "42": 0.41548,
-            "43": 0.41428,
-            "44": 0.4158,
-            "45": 0.41469,
-            "46": 0.41584,
-            "47": 0.41662,
-            "48": 0.41588,
-            "49": 0.41548,
-            "50": 0.42504,
-            "51": 0.41857,
-            "52": 0.40985,
-            "53": 0.40877,
-            "54": 0.41013,
-            "55": 0.40869,
-            "56": 0.84381,
-            "57": 0.41437,
-            "58": 0.42502,
-            "59": 0.41122,
-            "60": 0.41956,
-            "61": 0.40996,
-            "62": 0.40983,
-            "63": 0.41144,
-            "64": 0.41126,
-            "65": 0.41361,
-            "66": 0.41243,
-            "67": 0.41431,
-            "68": 0.4396,
-            "69": 0.42434,
-            "70": 0.41269,
-            "71": 0.42108,
-            "72": 0.41357,
-            "73": 0.41116,
-            "74": 0.41086,
-            "75": 0.41041,
-            "76": 0.41106,
-            "77": 0.41,
-            "78": 0.41669,
-            "79": 0.41627,
-            "80": 0.41237,
-            "81": 0.41157,
-            "82": 0.41168,
-            "83": 0.41229,
-            "84": 0.41209,
-            "85": 0.41258,
-            "86": 0.41294,
-            "87": 0.41185,
-            "88": 0.41106,
-            "89": 0.41159,
-            "90": 0.41277,
-            "91": 0.41162,
-            "92": 0.41309,
-            "93": 0.41351,
-            "94": 0.40941,
-            "95": 0.40961,
-            "96": 0.41012,
-            "97": 0.40887,
-            "98": 0.40809,
-            "99": 0.40865,
-            "100": 0.40854
-        }
-    }
-}
\ No newline at end of file
+{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.86209, "5": 10.88253, "10": 10.83642, "15": 10.82909, "20": 10.73237, "25": 10.56392, "30": 10.38484, "35": 10.28951, "40": 10.09133, "45": 9.83487, "50": 9.91604, "55": 9.88232, "60": 9.51376, "65": 8.95392, "70": 9.73094, "75": 9.4313, "80": 9.40597, "85": 9.61137, "90": 9.81743, "95": 9.51571, "100": 9.40429}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1780.0, "5": 2137.0, "10": 1588.0, "15": 2083.0, "20": 1816.0, "25": 1862.0, "30": 2045.0, "35": 2205.0, "40": 2629.0, "45": 2441.0, "50": 2734.0, "55": 2662.0, "60": 2726.0, "65": 3011.0, "70": 3737.0, "75": 2771.0, "80": 3804.0, "85": 3588.0, "90": 3384.0, "95": 3590.0, "100": 3454.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 368387072.0, "5": 368387072.0, "10": 368387072.0, "15": 368387072.0, "20": 368387072.0, "25": 368387072.0, "30": 368387072.0, "35": 368387072.0, "40": 368387072.0, "45": 368387072.0, "50": 368387072.0, "55": 368387072.0, "60": 368387072.0, "65": 368387072.0, "70": 368387072.0, "75": 368387072.0, "80": 368387072.0, "85": 368387072.0, "90": 368387072.0, "95": 368387072.0, "100": 368387072.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1510972416.0, "5": 1645104640.0, "10": 1645105152.0, "15": 1645105152.0, "20": 1645105152.0, "25": 1645105152.0, "30": 1645105152.0, "35": 1645105152.0, "40": 1645105152.0, "45": 1645105152.0, "50": 1645105152.0, "55": 1645105152.0, "60": 1645105152.0, "65": 1645105152.0, "70": 1645105152.0, "75": 1647199744.0, "80": 1647199744.0, "85": 1647199744.0, "90": 1647199744.0, "95": 1647199744.0, "100": 1647199744.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 9.70786, "5": 0.41822, "10": 0.42132, "15": 0.41057, "20": 0.4105, "25": 0.41988, "30": 0.41035, "35": 0.41599, "40": 0.40546, "45": 0.40496, "50": 0.40588, "55": 0.40982, "60": 0.4157, "65": 0.40455, "70": 0.40547, "75": 0.41128, "80": 0.41661, "85": 0.42011, "90": 0.41367, "95": 0.41851, "100": 0.40235}}}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts.json
deleted file mode 100644
index 128bc5b55bb..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.84082,
-            "2": 10.83544,
-            "3": 10.83361,
-            "4": 10.8202,
-            "5": 10.84212,
-            "6": 10.87215,
-            "7": 10.83489,
-            "8": 10.8434,
-            "9": 10.84794,
-            "10": 10.81681,
-            "11": 10.86063,
-            "12": 10.85821,
-            "13": 10.87957,
-            "14": 10.87773,
-            "15": 10.85163,
-            "16": 10.8472,
-            "17": 10.84071,
-            "18": 10.86582,
-            "19": 10.85937,
-            "20": 10.85131,
-            "21": 10.85552,
-            "22": 10.82535,
-            "23": 10.86895,
-            "24": 10.82877,
-            "25": 10.81945,
-            "26": 10.83385,
-            "27": 10.82583,
-            "28": 10.84435,
-            "29": 10.83124,
-            "30": 10.75388,
-            "31": 10.66502,
-            "32": 10.78819,
-            "33": 10.76332,
-            "34": 10.67654,
-            "35": 10.68319,
-            "36": 10.63342,
-            "37": 10.68482,
-            "38": 10.60112,
-            "39": 10.69463,
-            "40": 10.52433,
-            "41": 10.54208,
-            "42": 10.56578,
-            "43": 10.34551,
-            "44": 10.38873,
-            "45": 10.31101,
-            "46": 10.30065,
-            "47": 10.48062,
-            "48": 10.28105,
-            "49": 10.05975,
-            "50": 10.29413,
-            "51": 10.23775,
-            "52": 10.15443,
-            "53": 10.36085,
-            "54": 10.26927,
-            "55": 10.2161,
-            "56": 9.99594,
-            "57": 9.8744,
-            "58": 10.14007,
-            "59": 9.93447,
-            "60": 9.84864,
-            "61": 9.98549,
-            "62": 10.2164,
-            "63": 9.69034,
-            "64": 10.0182,
-            "65": 9.30046,
-            "66": 9.9355,
-            "67": 9.63051,
-            "68": 9.99128,
-            "69": 9.9852,
-            "70": 9.92463,
-            "71": 9.81436,
-            "72": 9.79481,
-            "73": 9.68082,
-            "74": 9.1945,
-            "75": 9.60407,
-            "76": 9.28537,
-            "77": 10.18507,
-            "78": 9.86718,
-            "79": 9.52407,
-            "80": 9.55749,
-            "81": 9.62863,
-            "82": 9.81568,
-            "83": 9.45708,
-            "84": 9.53654,
-            "85": 9.73266,
-            "86": 9.20138,
-            "87": 9.69524,
-            "88": 9.85412,
-            "89": 9.71648,
-            "90": 9.91047,
-            "91": 9.45992,
-            "92": 9.46603,
-            "93": 9.19321,
-            "94": 8.94,
-            "95": 9.60607,
-            "96": 9.62214,
-            "97": 9.39796,
-            "98": 9.76023,
-            "99": 8.99097,
-            "100": 9.49505
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 454770688.0,
-            "2": 454770688.0,
-            "3": 454770688.0,
-            "4": 454770688.0,
-            "5": 454770688.0,
-            "6": 454770688.0,
-            "7": 454770688.0,
-            "8": 454770688.0,
-            "9": 454770688.0,
-            "10": 454770688.0,
-            "11": 454770688.0,
-            "12": 454770688.0,
-            "13": 454770688.0,
-            "14": 454770688.0,
-            "15": 454770688.0,
-            "16": 454770688.0,
-            "17": 454770688.0,
-            "18": 518880768.0,
-            "19": 518880768.0,
-            "20": 518880768.0,
-            "21": 518880768.0,
-            "22": 518880768.0,
-            "23": 518880768.0,
-            "24": 518880768.0,
-            "25": 518880768.0,
-            "26": 518880768.0,
-            "27": 518880768.0,
-            "28": 518880768.0,
-            "29": 518880768.0,
-            "30": 518880768.0,
-            "31": 518880768.0,
-            "32": 518880768.0,
-            "33": 518880768.0,
-            "34": 518880768.0,
-            "35": 518880768.0,
-            "36": 518880768.0,
-            "37": 518880768.0,
-            "38": 518880768.0,
-            "39": 518880768.0,
-            "40": 518880768.0,
-            "41": 518880768.0,
-            "42": 518880768.0,
-            "43": 518880768.0,
-            "44": 518880768.0,
-            "45": 518880768.0,
-            "46": 518880768.0,
-            "47": 518880768.0,
-            "48": 518880768.0,
-            "49": 518880768.0,
-            "50": 518880768.0,
-            "51": 518880768.0,
-            "52": 518880768.0,
-            "53": 518880768.0,
-            "54": 518880768.0,
-            "55": 518880768.0,
-            "56": 518880768.0,
-            "57": 518880768.0,
-            "58": 518880768.0,
-            "59": 518880768.0,
-            "60": 518880768.0,
-            "61": 518880768.0,
-            "62": 518880768.0,
-            "63": 518880768.0,
-            "64": 518880768.0,
-            "65": 518880768.0,
-            "66": 518880768.0,
-            "67": 518880768.0,
-            "68": 518880768.0,
-            "69": 518880768.0,
-            "70": 518880768.0,
-            "71": 518880768.0,
-            "72": 518880768.0,
-            "73": 518880768.0,
-            "74": 518880768.0,
-            "75": 518880768.0,
-            "76": 518880768.0,
-            "77": 518880768.0,
-            "78": 518880768.0,
-            "79": 518880768.0,
-            "80": 518880768.0,
-            "81": 518880768.0,
-            "82": 518880768.0,
-            "83": 518880768.0,
-            "84": 518880768.0,
-            "85": 518880768.0,
-            "86": 518880768.0,
-            "87": 518880768.0,
-            "88": 518880768.0,
-            "89": 518880768.0,
-            "90": 518880768.0,
-            "91": 518880768.0,
-            "92": 518880768.0,
-            "93": 518880768.0,
-            "94": 518880768.0,
-            "95": 518880768.0,
-            "96": 518880768.0,
-            "97": 518880768.0,
-            "98": 518880768.0,
-            "99": 518880768.0,
-            "100": 518880768.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 4511150592.0,
-            "2": 4544705536.0,
-            "3": 4544705536.0,
-            "4": 4544705536.0,
-            "5": 4544705536.0,
-            "6": 4544705536.0,
-            "7": 4544705536.0,
-            "8": 4544705536.0,
-            "9": 4544705536.0,
-            "10": 4544705536.0,
-            "11": 4544705536.0,
-            "12": 4544705536.0,
-            "13": 4544705536.0,
-            "14": 4544705536.0,
-            "15": 4544705536.0,
-            "16": 4544705536.0,
-            "17": 4544705536.0,
-            "18": 4544705536.0,
-            "19": 4607767040.0,
-            "20": 4607767040.0,
-            "21": 4607767040.0,
-            "22": 4607767040.0,
-            "23": 4607767040.0,
-            "24": 4607767040.0,
-            "25": 4607767040.0,
-            "26": 4607767040.0,
-            "27": 4607767040.0,
-            "28": 4607767040.0,
-            "29": 4607767040.0,
-            "30": 4607767040.0,
-            "31": 4607767040.0,
-            "32": 4607767040.0,
-            "33": 4607767040.0,
-            "34": 4607767040.0,
-            "35": 4607767040.0,
-            "36": 4607767040.0,
-            "37": 4607767040.0,
-            "38": 4607767040.0,
-            "39": 4607767040.0,
-            "40": 4607767040.0,
-            "41": 4607767040.0,
-            "42": 4607767040.0,
-            "43": 4607767040.0,
-            "44": 4607767040.0,
-            "45": 4607767040.0,
-            "46": 4607767040.0,
-            "47": 4607767040.0,
-            "48": 4607767040.0,
-            "49": 4607767040.0,
-            "50": 4607767040.0,
-            "51": 4607767040.0,
-            "52": 4607767040.0,
-            "53": 4607767040.0,
-            "54": 4607767040.0,
-            "55": 4607767040.0,
-            "56": 4607767040.0,
-            "57": 4607767040.0,
-            "58": 4607767040.0,
-            "59": 4607767040.0,
-            "60": 4607767040.0,
-            "61": 4607767040.0,
-            "62": 4607767040.0,
-            "63": 4607767040.0,
-            "64": 4607767040.0,
-            "65": 4607767040.0,
-            "66": 4607767040.0,
-            "67": 4607767040.0,
-            "68": 4607767040.0,
-            "69": 4607767040.0,
-            "70": 4607767040.0,
-            "71": 4607767040.0,
-            "72": 4607767040.0,
-            "73": 4607767040.0,
-            "74": 4607767040.0,
-            "75": 4607767040.0,
-            "76": 4607767040.0,
-            "77": 4607767040.0,
-            "78": 4607767040.0,
-            "79": 4607767040.0,
-            "80": 4607767040.0,
-            "81": 4607767040.0,
-            "82": 4607767040.0,
-            "83": 4607767040.0,
-            "84": 4607767040.0,
-            "85": 4607767040.0,
-            "86": 4607767040.0,
-            "87": 4607767040.0,
-            "88": 4607767040.0,
-            "89": 4607767040.0,
-            "90": 4607767040.0,
-            "91": 4607767040.0,
-            "92": 4607767040.0,
-            "93": 4607767040.0,
-            "94": 4607767040.0,
-            "95": 4607767040.0,
-            "96": 4607767040.0,
-            "97": 4607767040.0,
-            "98": 4607767040.0,
-            "99": 4607767040.0,
-            "100": 4607767040.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 8.20459,
-            "2": 0.14512,
-            "3": 0.09601,
-            "4": 0.09045,
-            "5": 0.09053,
-            "6": 0.08966,
-            "7": 0.08974,
-            "8": 0.08915,
-            "9": 0.08902,
-            "10": 0.08919,
-            "11": 0.08955,
-            "12": 0.08907,
-            "13": 0.08947,
-            "14": 0.08882,
-            "15": 0.08864,
-            "16": 0.08897,
-            "17": 0.08902,
-            "18": 0.14106,
-            "19": 0.0977,
-            "20": 0.09649,
-            "21": 0.09724,
-            "22": 0.09168,
-            "23": 0.09714,
-            "24": 0.09658,
-            "25": 0.09708,
-            "26": 0.09693,
-            "27": 0.09615,
-            "28": 0.09697,
-            "29": 0.09661,
-            "30": 0.09679,
-            "31": 0.09716,
-            "32": 0.09671,
-            "33": 0.09697,
-            "34": 0.09691,
-            "35": 0.09689,
-            "36": 0.09592,
-            "37": 0.09711,
-            "38": 0.09635,
-            "39": 0.0969,
-            "40": 0.09642,
-            "41": 0.09689,
-            "42": 0.09616,
-            "43": 0.0971,
-            "44": 0.09592,
-            "45": 0.09674,
-            "46": 0.09586,
-            "47": 0.09669,
-            "48": 0.096,
-            "49": 0.09678,
-            "50": 0.09589,
-            "51": 0.09963,
-            "52": 0.09882,
-            "53": 0.09639,
-            "54": 0.09745,
-            "55": 0.09701,
-            "56": 0.0971,
-            "57": 0.0929,
-            "58": 0.09316,
-            "59": 0.09353,
-            "60": 0.0935,
-            "61": 0.0935,
-            "62": 0.09059,
-            "63": 0.09063,
-            "64": 0.09203,
-            "65": 0.09133,
-            "66": 0.09044,
-            "67": 0.09102,
-            "68": 0.08852,
-            "69": 0.08696,
-            "70": 0.08735,
-            "71": 0.08724,
-            "72": 0.08697,
-            "73": 0.08755,
-            "74": 0.08731,
-            "75": 0.08922,
-            "76": 0.08886,
-            "77": 0.09297,
-            "78": 0.0878,
-            "79": 0.08778,
-            "80": 0.08751,
-            "81": 0.08754,
-            "82": 0.08716,
-            "83": 0.08674,
-            "84": 0.0865,
-            "85": 0.08602,
-            "86": 0.08613,
-            "87": 0.08623,
-            "88": 0.08639,
-            "89": 0.08691,
-            "90": 0.08616,
-            "91": 0.08641,
-            "92": 0.08645,
-            "93": 0.08643,
-            "94": 0.08632,
-            "95": 0.0862,
-            "96": 0.08615,
-            "97": 0.08618,
-            "98": 0.0866,
-            "99": 0.0867,
-            "100": 0.0861
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": "nan",
-            "2": "nan",
-            "3": "nan",
-            "4": "nan",
-            "5": "nan",
-            "6": "nan",
-            "7": "nan",
-            "8": "nan",
-            "9": "nan",
-            "10": "nan",
-            "11": "nan",
-            "12": "nan",
-            "13": "nan",
-            "14": "nan",
-            "15": "nan",
-            "16": "nan",
-            "17": "nan",
-            "18": 1208.0,
-            "19": 1541.0,
-            "20": 1173.0,
-            "21": 1368.0,
-            "22": "nan",
-            "23": 1317.0,
-            "24": 1131.0,
-            "25": 1119.0,
-            "26": 1067.0,
-            "27": 1300.0,
-            "28": 1549.0,
-            "29": 1501.0,
-            "30": 1354.0,
-            "31": 1044.0,
-            "32": 1202.0,
-            "33": 1503.0,
-            "34": 1276.0,
-            "35": 1207.0,
-            "36": 1217.0,
-            "37": 1430.0,
-            "38": 1473.0,
-            "39": 1525.0,
-            "40": 1413.0,
-            "41": 1441.0,
-            "42": 1487.0,
-            "43": 1151.0,
-            "44": 1217.0,
-            "45": 1314.0,
-            "46": 1265.0,
-            "47": 1868.0,
-            "48": 1314.0,
-            "49": 1105.0,
-            "50": 1476.0,
-            "51": 1508.0,
-            "52": 1400.0,
-            "53": 1740.0,
-            "54": 1498.0,
-            "55": 1551.0,
-            "56": 1363.0,
-            "57": 1465.0,
-            "58": 1610.0,
-            "59": 1574.0,
-            "60": 1599.0,
-            "61": 1727.0,
-            "62": 1804.0,
-            "63": 1590.0,
-            "64": 1813.0,
-            "65": 1398.0,
-            "66": 1738.0,
-            "67": 1536.0,
-            "68": 1764.0,
-            "69": 1781.0,
-            "70": 1926.0,
-            "71": 1950.0,
-            "72": 1461.0,
-            "73": 1985.0,
-            "74": 1345.0,
-            "75": 1871.0,
-            "76": 1732.0,
-            "77": 2086.0,
-            "78": 2075.0,
-            "79": 1992.0,
-            "80": 2260.0,
-            "81": 2300.0,
-            "82": 2290.0,
-            "83": 1774.0,
-            "84": 2172.0,
-            "85": 2216.0,
-            "86": 2038.0,
-            "87": 2741.0,
-            "88": 2079.0,
-            "89": 2349.0,
-            "90": 2315.0,
-            "91": 1875.0,
-            "92": 2611.0,
-            "93": 2053.0,
-            "94": 2220.0,
-            "95": 2296.0,
-            "96": 2665.0,
-            "97": 2516.0,
-            "98": 2549.0,
-            "99": 2378.0,
-            "100": 2257.0
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json
index 8d29fc96a7f..a6254c12055 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json
@@ -2,536 +2,141 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 1,
+        "step_interval": 5,
         "values": {
-            "1": 10.84082,
-            "2": 10.83544,
-            "3": 10.83361,
-            "4": 10.8202,
-            "5": 10.84212,
-            "6": 10.87215,
-            "7": 10.83489,
-            "8": 10.8434,
-            "9": 10.84794,
-            "10": 10.81681,
-            "11": 10.86063,
-            "12": 10.85821,
-            "13": 10.87957,
-            "14": 10.87773,
-            "15": 10.85163,
-            "16": 10.8472,
-            "17": 10.84071,
-            "18": 10.86582,
-            "19": 10.85937,
-            "20": 10.85131,
-            "21": 10.85552,
-            "22": 10.82535,
-            "23": 10.86895,
-            "24": 10.82877,
-            "25": 10.81945,
-            "26": 10.83385,
-            "27": 10.82583,
-            "28": 10.84435,
-            "29": 10.83124,
-            "30": 10.75388,
-            "31": 10.66502,
-            "32": 10.78819,
-            "33": 10.76332,
-            "34": 10.67654,
-            "35": 10.68319,
-            "36": 10.63342,
-            "37": 10.68482,
-            "38": 10.60112,
-            "39": 10.69463,
-            "40": 10.52433,
-            "41": 10.54208,
-            "42": 10.56578,
-            "43": 10.34551,
-            "44": 10.38873,
-            "45": 10.31101,
-            "46": 10.30065,
-            "47": 10.48062,
-            "48": 10.28105,
-            "49": 10.05975,
-            "50": 10.29413,
-            "51": 10.23775,
-            "52": 10.15443,
-            "53": 10.36085,
-            "54": 10.26927,
-            "55": 10.2161,
-            "56": 9.99594,
-            "57": 9.8744,
-            "58": 10.14007,
-            "59": 9.93447,
-            "60": 9.84864,
-            "61": 9.98549,
-            "62": 10.2164,
-            "63": 9.69034,
-            "64": 10.0182,
-            "65": 9.30046,
-            "66": 9.9355,
-            "67": 9.63051,
-            "68": 9.99128,
-            "69": 9.9852,
-            "70": 9.92463,
-            "71": 9.81436,
-            "72": 9.79481,
-            "73": 9.68082,
-            "74": 9.1945,
-            "75": 9.60407,
-            "76": 9.28537,
-            "77": 10.18507,
-            "78": 9.86718,
-            "79": 9.52407,
-            "80": 9.55749,
-            "81": 9.62863,
-            "82": 9.81568,
-            "83": 9.45708,
-            "84": 9.53654,
-            "85": 9.73266,
-            "86": 9.20138,
-            "87": 9.69524,
-            "88": 9.85412,
-            "89": 9.71648,
-            "90": 9.91047,
-            "91": 9.45992,
-            "92": 9.46603,
-            "93": 9.19321,
-            "94": 8.94,
-            "95": 9.60607,
-            "96": 9.62214,
-            "97": 9.39796,
-            "98": 9.76023,
-            "99": 8.99097,
-            "100": 9.49505
+            "1": 10.84012,
+            "5": 10.84147,
+            "10": 10.82107,
+            "15": 10.85423,
+            "20": 10.85111,
+            "25": 10.82229,
+            "30": 10.75584,
+            "35": 10.68389,
+            "40": 10.52756,
+            "45": 10.3119,
+            "50": 10.29392,
+            "55": 10.21882,
+            "60": 9.8477,
+            "65": 9.30027,
+            "70": 9.92473,
+            "75": 9.60615,
+            "80": 9.55867,
+            "85": 9.7331,
+            "90": 9.91024,
+            "95": 9.60586,
+            "100": 9.49453
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 1,
+        "step_interval": 5,
         "values": {
             "1": 454770688.0,
-            "2": 454770688.0,
-            "3": 454770688.0,
-            "4": 454770688.0,
             "5": 454770688.0,
-            "6": 454770688.0,
-            "7": 454770688.0,
-            "8": 454770688.0,
-            "9": 454770688.0,
             "10": 454770688.0,
-            "11": 454770688.0,
-            "12": 454770688.0,
-            "13": 454770688.0,
-            "14": 454770688.0,
             "15": 454770688.0,
-            "16": 454770688.0,
-            "17": 454770688.0,
-            "18": 518880768.0,
-            "19": 518880768.0,
             "20": 518880768.0,
-            "21": 518880768.0,
-            "22": 518880768.0,
-            "23": 518880768.0,
-            "24": 518880768.0,
             "25": 518880768.0,
-            "26": 518880768.0,
-            "27": 518880768.0,
-            "28": 518880768.0,
-            "29": 518880768.0,
             "30": 518880768.0,
-            "31": 518880768.0,
-            "32": 518880768.0,
-            "33": 518880768.0,
-            "34": 518880768.0,
             "35": 518880768.0,
-            "36": 518880768.0,
-            "37": 518880768.0,
-            "38": 518880768.0,
-            "39": 518880768.0,
             "40": 518880768.0,
-            "41": 518880768.0,
-            "42": 518880768.0,
-            "43": 518880768.0,
-            "44": 518880768.0,
             "45": 518880768.0,
-            "46": 518880768.0,
-            "47": 518880768.0,
-            "48": 518880768.0,
-            "49": 518880768.0,
             "50": 518880768.0,
-            "51": 518880768.0,
-            "52": 518880768.0,
-            "53": 518880768.0,
-            "54": 518880768.0,
             "55": 518880768.0,
-            "56": 518880768.0,
-            "57": 518880768.0,
-            "58": 518880768.0,
-            "59": 518880768.0,
             "60": 518880768.0,
-            "61": 518880768.0,
-            "62": 518880768.0,
-            "63": 518880768.0,
-            "64": 518880768.0,
             "65": 518880768.0,
-            "66": 518880768.0,
-            "67": 518880768.0,
-            "68": 518880768.0,
-            "69": 518880768.0,
             "70": 518880768.0,
-            "71": 518880768.0,
-            "72": 518880768.0,
-            "73": 518880768.0,
-            "74": 518880768.0,
             "75": 518880768.0,
-            "76": 518880768.0,
-            "77": 518880768.0,
-            "78": 518880768.0,
-            "79": 518880768.0,
             "80": 518880768.0,
-            "81": 518880768.0,
-            "82": 518880768.0,
-            "83": 518880768.0,
-            "84": 518880768.0,
             "85": 518880768.0,
-            "86": 518880768.0,
-            "87": 518880768.0,
-            "88": 518880768.0,
-            "89": 518880768.0,
             "90": 518880768.0,
-            "91": 518880768.0,
-            "92": 518880768.0,
-            "93": 518880768.0,
-            "94": 518880768.0,
             "95": 518880768.0,
-            "96": 518880768.0,
-            "97": 518880768.0,
-            "98": 518880768.0,
-            "99": 518880768.0,
             "100": 518880768.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 1,
+        "step_interval": 5,
         "values": {
             "1": 4511150592.0,
-            "2": 4544705536.0,
-            "3": 4544705536.0,
-            "4": 4544705536.0,
             "5": 4544705536.0,
-            "6": 4544705536.0,
-            "7": 4544705536.0,
-            "8": 4544705536.0,
-            "9": 4544705536.0,
             "10": 4544705536.0,
-            "11": 4544705536.0,
-            "12": 4544705536.0,
-            "13": 4544705536.0,
-            "14": 4544705536.0,
             "15": 4544705536.0,
-            "16": 4544705536.0,
-            "17": 4544705536.0,
-            "18": 4544705536.0,
-            "19": 4607767040.0,
             "20": 4607767040.0,
-            "21": 4607767040.0,
-            "22": 4607767040.0,
-            "23": 4607767040.0,
-            "24": 4607767040.0,
             "25": 4607767040.0,
-            "26": 4607767040.0,
-            "27": 4607767040.0,
-            "28": 4607767040.0,
-            "29": 4607767040.0,
             "30": 4607767040.0,
-            "31": 4607767040.0,
-            "32": 4607767040.0,
-            "33": 4607767040.0,
-            "34": 4607767040.0,
             "35": 4607767040.0,
-            "36": 4607767040.0,
-            "37": 4607767040.0,
-            "38": 4607767040.0,
-            "39": 4607767040.0,
             "40": 4607767040.0,
-            "41": 4607767040.0,
-            "42": 4607767040.0,
-            "43": 4607767040.0,
-            "44": 4607767040.0,
             "45": 4607767040.0,
-            "46": 4607767040.0,
-            "47": 4607767040.0,
-            "48": 4607767040.0,
-            "49": 4607767040.0,
             "50": 4607767040.0,
-            "51": 4607767040.0,
-            "52": 4607767040.0,
-            "53": 4607767040.0,
-            "54": 4607767040.0,
             "55": 4607767040.0,
-            "56": 4607767040.0,
-            "57": 4607767040.0,
-            "58": 4607767040.0,
-            "59": 4607767040.0,
             "60": 4607767040.0,
-            "61": 4607767040.0,
-            "62": 4607767040.0,
-            "63": 4607767040.0,
-            "64": 4607767040.0,
             "65": 4607767040.0,
-            "66": 4607767040.0,
-            "67": 4607767040.0,
-            "68": 4607767040.0,
-            "69": 4607767040.0,
             "70": 4607767040.0,
-            "71": 4607767040.0,
-            "72": 4607767040.0,
-            "73": 4607767040.0,
-            "74": 4607767040.0,
             "75": 4607767040.0,
-            "76": 4607767040.0,
-            "77": 4607767040.0,
-            "78": 4607767040.0,
-            "79": 4607767040.0,
             "80": 4607767040.0,
-            "81": 4607767040.0,
-            "82": 4607767040.0,
-            "83": 4607767040.0,
-            "84": 4607767040.0,
             "85": 4607767040.0,
-            "86": 4607767040.0,
-            "87": 4607767040.0,
-            "88": 4607767040.0,
-            "89": 4607767040.0,
             "90": 4607767040.0,
-            "91": 4607767040.0,
-            "92": 4607767040.0,
-            "93": 4607767040.0,
-            "94": 4607767040.0,
             "95": 4607767040.0,
-            "96": 4607767040.0,
-            "97": 4607767040.0,
-            "98": 4607767040.0,
-            "99": 4607767040.0,
             "100": 4607767040.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 1,
+        "step_interval": 5,
         "values": {
-            "1": 4.2592,
-            "2": 0.13544,
-            "3": 0.09999,
-            "4": 0.08273,
-            "5": 0.08157,
-            "6": 0.08266,
-            "7": 0.08111,
-            "8": 0.08184,
-            "9": 0.08109,
-            "10": 0.08281,
-            "11": 0.08041,
-            "12": 0.08186,
-            "13": 0.08098,
-            "14": 0.08513,
-            "15": 0.0821,
-            "16": 0.08144,
-            "17": 0.08052,
-            "18": 0.13091,
-            "19": 0.08819,
-            "20": 0.08804,
-            "21": 0.08818,
-            "22": 0.08404,
-            "23": 0.08729,
-            "24": 0.08805,
-            "25": 0.08736,
-            "26": 0.08811,
-            "27": 0.08757,
-            "28": 0.08887,
-            "29": 0.08961,
-            "30": 0.0883,
-            "31": 0.08788,
-            "32": 0.08884,
-            "33": 0.08833,
-            "34": 0.08811,
-            "35": 0.08831,
-            "36": 0.08859,
-            "37": 0.08809,
-            "38": 0.08879,
-            "39": 0.08769,
-            "40": 0.0883,
-            "41": 0.08757,
-            "42": 0.08797,
-            "43": 0.08669,
-            "44": 0.08751,
-            "45": 0.08893,
-            "46": 0.08862,
-            "47": 0.08698,
-            "48": 0.089,
-            "49": 0.08841,
-            "50": 0.08813,
-            "51": 0.09282,
-            "52": 0.08991,
-            "53": 0.08846,
-            "54": 0.08878,
-            "55": 0.08875,
-            "56": 0.0897,
-            "57": 0.0888,
-            "58": 0.08814,
-            "59": 0.08821,
-            "60": 0.08782,
-            "61": 0.0888,
-            "62": 0.08762,
-            "63": 0.08743,
-            "64": 0.0879,
-            "65": 0.08877,
-            "66": 0.08656,
-            "67": 0.08681,
-            "68": 0.08654,
-            "69": 0.08705,
-            "70": 0.08667,
-            "71": 0.08696,
-            "72": 0.08664,
-            "73": 0.08625,
-            "74": 0.08667,
-            "75": 0.08656,
-            "76": 0.08557,
-            "77": 0.08578,
-            "78": 0.08586,
-            "79": 0.08584,
-            "80": 0.08576,
-            "81": 0.08653,
-            "82": 0.08572,
-            "83": 0.08613,
-            "84": 0.08557,
-            "85": 0.08616,
-            "86": 0.08608,
-            "87": 0.08563,
-            "88": 0.08581,
-            "89": 0.08591,
-            "90": 0.08593,
-            "91": 0.08543,
-            "92": 0.08641,
-            "93": 0.08635,
-            "94": 0.08549,
-            "95": 0.08554,
-            "96": 0.08487,
-            "97": 0.08505,
-            "98": 0.08522,
-            "99": 0.08533,
-            "100": 0.08544
+            "1": 9.36364,
+            "5": 0.11207,
+            "10": 0.09807,
+            "15": 0.11189,
+            "20": 0.12039,
+            "25": 0.10422,
+            "30": 0.10453,
+            "35": 0.10312,
+            "40": 0.10542,
+            "45": 0.10362,
+            "50": 0.10448,
+            "55": 0.10212,
+            "60": 0.10194,
+            "65": 0.10259,
+            "70": 0.10183,
+            "75": 0.10119,
+            "80": 0.10042,
+            "85": 0.10084,
+            "90": 0.10028,
+            "95": 0.10004,
+            "100": 0.09998
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 1,
+        "step_interval": 5,
         "values": {
             "1": "nan",
-            "2": "nan",
-            "3": "nan",
-            "4": "nan",
             "5": "nan",
-            "6": "nan",
-            "7": "nan",
-            "8": "nan",
-            "9": "nan",
             "10": "nan",
-            "11": "nan",
-            "12": "nan",
-            "13": "nan",
-            "14": "nan",
             "15": "nan",
-            "16": "nan",
-            "17": "nan",
-            "18": 1208.0,
-            "19": 1541.0,
-            "20": 1173.0,
-            "21": 1368.0,
-            "22": "nan",
-            "23": 1317.0,
-            "24": 1131.0,
-            "25": 1119.0,
-            "26": 1067.0,
-            "27": 1300.0,
-            "28": 1549.0,
-            "29": 1501.0,
-            "30": 1354.0,
-            "31": 1044.0,
-            "32": 1202.0,
-            "33": 1503.0,
-            "34": 1276.0,
+            "20": 1143.0,
+            "25": 1185.0,
+            "30": 1375.0,
             "35": 1207.0,
-            "36": 1217.0,
-            "37": 1430.0,
-            "38": 1473.0,
-            "39": 1525.0,
-            "40": 1413.0,
-            "41": 1441.0,
-            "42": 1487.0,
-            "43": 1151.0,
-            "44": 1217.0,
-            "45": 1314.0,
-            "46": 1265.0,
-            "47": 1868.0,
-            "48": 1314.0,
-            "49": 1105.0,
-            "50": 1476.0,
-            "51": 1508.0,
-            "52": 1400.0,
-            "53": 1740.0,
-            "54": 1498.0,
-            "55": 1551.0,
-            "56": 1363.0,
-            "57": 1465.0,
-            "58": 1610.0,
-            "59": 1574.0,
-            "60": 1599.0,
-            "61": 1727.0,
-            "62": 1804.0,
-            "63": 1590.0,
-            "64": 1813.0,
-            "65": 1398.0,
-            "66": 1738.0,
-            "67": 1536.0,
-            "68": 1764.0,
-            "69": 1781.0,
-            "70": 1926.0,
-            "71": 1950.0,
-            "72": 1461.0,
-            "73": 1985.0,
-            "74": 1345.0,
-            "75": 1871.0,
-            "76": 1732.0,
-            "77": 2086.0,
-            "78": 2075.0,
-            "79": 1992.0,
-            "80": 2260.0,
-            "81": 2300.0,
-            "82": 2290.0,
-            "83": 1774.0,
-            "84": 2172.0,
-            "85": 2216.0,
-            "86": 2038.0,
-            "87": 2741.0,
-            "88": 2079.0,
-            "89": 2349.0,
-            "90": 2315.0,
-            "91": 1875.0,
-            "92": 2611.0,
-            "93": 2053.0,
-            "94": 2220.0,
-            "95": 2296.0,
-            "96": 2665.0,
-            "97": 2516.0,
-            "98": 2549.0,
-            "99": 2378.0,
-            "100": 2257.0
+            "40": 1423.0,
+            "45": 1344.0,
+            "50": 1559.0,
+            "55": 1573.0,
+            "60": 1564.0,
+            "65": 1496.0,
+            "70": 1898.0,
+            "75": 1893.0,
+            "80": 2134.0,
+            "85": 2231.0,
+            "90": 2378.0,
+            "95": 2285.0,
+            "100": 2233.0
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_lts.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_lts.json
deleted file mode 100644
index 93d8ec41a7b..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_lts.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.83572,
-            "2": 10.8327,
-            "3": 10.83034,
-            "4": 10.80223,
-            "5": 10.85556,
-            "6": 10.86667,
-            "7": 10.84637,
-            "8": 10.84251,
-            "9": 10.85429,
-            "10": 10.80752,
-            "11": 10.88921,
-            "12": 10.87043,
-            "13": 10.87395,
-            "14": 10.88615,
-            "15": 10.79982,
-            "16": 10.81152,
-            "17": 10.78946,
-            "18": 10.82078,
-            "19": 10.80788,
-            "20": 10.71283,
-            "21": 10.68647,
-            "22": 10.54255,
-            "23": 10.71926,
-            "24": 10.58463,
-            "25": 10.54402,
-            "26": 10.61055,
-            "27": 10.61018,
-            "28": 10.57117,
-            "29": 10.59062,
-            "30": 10.35312,
-            "31": 10.0924,
-            "32": 10.46991,
-            "33": 10.45536,
-            "34": 10.19985,
-            "35": 10.26269,
-            "36": 10.21197,
-            "37": 10.33645,
-            "38": 10.17551,
-            "39": 10.39309,
-            "40": 10.07155,
-            "41": 10.13767,
-            "42": 10.19633,
-            "43": 9.81246,
-            "44": 9.93615,
-            "45": 9.81584,
-            "46": 9.80638,
-            "47": 10.12803,
-            "48": 9.82444,
-            "49": 9.50618,
-            "50": 9.89067
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1564.0,
-            "2": 1660.0,
-            "3": 1617.0,
-            "4": 1711.0,
-            "5": 1840.0,
-            "6": 1754.0,
-            "7": 1793.0,
-            "8": 1599.0,
-            "9": 1894.0,
-            "10": 1424.0,
-            "11": 1793.0,
-            "12": 1727.0,
-            "13": 1856.0,
-            "14": 1761.0,
-            "15": 1915.0,
-            "16": 1859.0,
-            "17": 1727.0,
-            "18": 1585.0,
-            "19": 1747.0,
-            "20": 1655.0,
-            "21": 1783.0,
-            "22": 1671.0,
-            "23": 2028.0,
-            "24": 1698.0,
-            "25": 1618.0,
-            "26": 1708.0,
-            "27": 1804.0,
-            "28": 1978.0,
-            "29": 1950.0,
-            "30": 1911.0,
-            "31": 1582.0,
-            "32": 1852.0,
-            "33": 2049.0,
-            "34": 1912.0,
-            "35": 1999.0,
-            "36": 1972.0,
-            "37": 2236.0,
-            "38": 2294.0,
-            "39": 2209.0,
-            "40": 2171.0,
-            "41": 2340.0,
-            "42": 2153.0,
-            "43": 2015.0,
-            "44": 2122.0,
-            "45": 2123.0,
-            "46": 2194.0,
-            "47": 2463.0,
-            "48": 2382.0,
-            "49": 2300.0,
-            "50": 2397.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 952847360.0,
-            "2": 952847360.0,
-            "3": 952847360.0,
-            "4": 952847360.0,
-            "5": 952847360.0,
-            "6": 952847360.0,
-            "7": 952847360.0,
-            "8": 952847360.0,
-            "9": 952847360.0,
-            "10": 952847360.0,
-            "11": 952847360.0,
-            "12": 952847360.0,
-            "13": 952847360.0,
-            "14": 952847360.0,
-            "15": 952847360.0,
-            "16": 952847360.0,
-            "17": 952847360.0,
-            "18": 952847360.0,
-            "19": 952847360.0,
-            "20": 952847360.0,
-            "21": 952847360.0,
-            "22": 952847360.0,
-            "23": 952847360.0,
-            "24": 952847360.0,
-            "25": 952847360.0,
-            "26": 952847360.0,
-            "27": 952847360.0,
-            "28": 952847360.0,
-            "29": 952847360.0,
-            "30": 952847360.0,
-            "31": 952847360.0,
-            "32": 952847360.0,
-            "33": 952847360.0,
-            "34": 952847360.0,
-            "35": 952847360.0,
-            "36": 952847360.0,
-            "37": 952847360.0,
-            "38": 952847360.0,
-            "39": 952847360.0,
-            "40": 952847360.0,
-            "41": 952847360.0,
-            "42": 952847360.0,
-            "43": 952847360.0,
-            "44": 952847360.0,
-            "45": 952847360.0,
-            "46": 952847360.0,
-            "47": 952847360.0,
-            "48": 952847360.0,
-            "49": 952847360.0,
-            "50": 952847360.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 3275808768.0,
-            "2": 3637371904.0,
-            "3": 3637371904.0,
-            "4": 3637371904.0,
-            "5": 3637371904.0,
-            "6": 3637371904.0,
-            "7": 3637371904.0,
-            "8": 3637371904.0,
-            "9": 3637371904.0,
-            "10": 3637371904.0,
-            "11": 3637371904.0,
-            "12": 3637371904.0,
-            "13": 3637371904.0,
-            "14": 3637371904.0,
-            "15": 3637371904.0,
-            "16": 3637371904.0,
-            "17": 3637371904.0,
-            "18": 3637371904.0,
-            "19": 3637371904.0,
-            "20": 3637371904.0,
-            "21": 3637371904.0,
-            "22": 3637371904.0,
-            "23": 3637371904.0,
-            "24": 3637371904.0,
-            "25": 3637371904.0,
-            "26": 3637371904.0,
-            "27": 3637371904.0,
-            "28": 3637371904.0,
-            "29": 3637371904.0,
-            "30": 3637371904.0,
-            "31": 3637371904.0,
-            "32": 3637371904.0,
-            "33": 3637371904.0,
-            "34": 3637371904.0,
-            "35": 3637371904.0,
-            "36": 3637371904.0,
-            "37": 3637371904.0,
-            "38": 3637371904.0,
-            "39": 3637371904.0,
-            "40": 3637371904.0,
-            "41": 3637371904.0,
-            "42": 3637371904.0,
-            "43": 3637371904.0,
-            "44": 3637371904.0,
-            "45": 3637371904.0,
-            "46": 3637371904.0,
-            "47": 3637371904.0,
-            "48": 3637371904.0,
-            "49": 3637371904.0,
-            "50": 3637371904.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 5.51421,
-            "2": 0.1601,
-            "3": 0.13443,
-            "4": 0.13403,
-            "5": 0.13289,
-            "6": 0.13124,
-            "7": 0.13139,
-            "8": 0.13164,
-            "9": 0.12936,
-            "10": 0.1315,
-            "11": 0.13031,
-            "12": 0.12986,
-            "13": 0.13068,
-            "14": 0.13035,
-            "15": 0.12992,
-            "16": 0.12988,
-            "17": 0.12998,
-            "18": 0.13022,
-            "19": 0.13175,
-            "20": 0.13299,
-            "21": 0.13082,
-            "22": 0.13147,
-            "23": 0.13039,
-            "24": 0.13108,
-            "25": 0.13113,
-            "26": 0.1301,
-            "27": 0.12969,
-            "28": 0.12939,
-            "29": 0.13157,
-            "30": 0.13294,
-            "31": 0.132,
-            "32": 0.13107,
-            "33": 0.13092,
-            "34": 0.12992,
-            "35": 0.13029,
-            "36": 0.13044,
-            "37": 0.13082,
-            "38": 0.1305,
-            "39": 0.13098,
-            "40": 0.13116,
-            "41": 0.13121,
-            "42": 0.13136,
-            "43": 0.13117,
-            "44": 0.13317,
-            "45": 0.13163,
-            "46": 0.1312,
-            "47": 0.13036,
-            "48": 0.13105,
-            "49": 0.13091,
-            "50": 0.1304
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_lts_dgx_a100.json
index bdc8c7f9895..45cbf767489 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_lts_dgx_a100.json
@@ -2,286 +2,91 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 1,
+        "step_interval": 5,
         "values": {
-            "1": 10.83572,
-            "2": 10.8327,
-            "3": 10.83034,
-            "4": 10.80223,
-            "5": 10.85556,
-            "6": 10.86667,
-            "7": 10.84637,
-            "8": 10.84251,
-            "9": 10.85429,
-            "10": 10.80752,
-            "11": 10.88921,
-            "12": 10.87043,
-            "13": 10.87395,
-            "14": 10.88615,
-            "15": 10.79982,
-            "16": 10.81152,
-            "17": 10.78946,
-            "18": 10.82078,
-            "19": 10.80788,
-            "20": 10.71283,
-            "21": 10.68647,
-            "22": 10.54255,
-            "23": 10.71926,
-            "24": 10.58463,
-            "25": 10.54402,
-            "26": 10.61055,
-            "27": 10.61018,
-            "28": 10.57117,
-            "29": 10.59062,
-            "30": 10.35312,
-            "31": 10.0924,
-            "32": 10.46991,
-            "33": 10.45536,
-            "34": 10.19985,
-            "35": 10.26269,
-            "36": 10.21197,
-            "37": 10.33645,
-            "38": 10.17551,
-            "39": 10.39309,
+            "1": 10.8337,
+            "5": 10.85677,
+            "10": 10.80851,
+            "15": 10.79856,
+            "20": 10.71089,
+            "25": 10.5436,
+            "30": 10.35069,
+            "35": 10.26074,
             "40": 10.07155,
-            "41": 10.13767,
-            "42": 10.19633,
-            "43": 9.81246,
-            "44": 9.93615,
-            "45": 9.81584,
-            "46": 9.80638,
-            "47": 10.12803,
-            "48": 9.82444,
-            "49": 9.50618,
-            "50": 9.89067
+            "45": 9.81507,
+            "50": 9.88952
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 1,
+        "step_interval": 5,
         "values": {
-            "1": 1564.0,
-            "2": 1660.0,
-            "3": 1617.0,
-            "4": 1711.0,
-            "5": 1840.0,
-            "6": 1754.0,
-            "7": 1793.0,
-            "8": 1599.0,
-            "9": 1894.0,
-            "10": 1424.0,
-            "11": 1793.0,
-            "12": 1727.0,
-            "13": 1856.0,
-            "14": 1761.0,
-            "15": 1915.0,
-            "16": 1859.0,
-            "17": 1727.0,
-            "18": 1585.0,
-            "19": 1747.0,
-            "20": 1655.0,
-            "21": 1783.0,
-            "22": 1671.0,
-            "23": 2028.0,
-            "24": 1698.0,
-            "25": 1618.0,
-            "26": 1708.0,
-            "27": 1804.0,
-            "28": 1978.0,
-            "29": 1950.0,
-            "30": 1911.0,
-            "31": 1582.0,
-            "32": 1852.0,
-            "33": 2049.0,
-            "34": 1912.0,
-            "35": 1999.0,
-            "36": 1972.0,
-            "37": 2236.0,
-            "38": 2294.0,
-            "39": 2209.0,
-            "40": 2171.0,
-            "41": 2340.0,
-            "42": 2153.0,
-            "43": 2015.0,
-            "44": 2122.0,
-            "45": 2123.0,
-            "46": 2194.0,
-            "47": 2463.0,
-            "48": 2382.0,
-            "49": 2300.0,
-            "50": 2397.0
+            "1": 1563.0,
+            "5": 1808.0,
+            "10": 1377.0,
+            "15": 1872.0,
+            "20": 1675.0,
+            "25": 1549.0,
+            "30": 2043.0,
+            "35": 2002.0,
+            "40": 2311.0,
+            "45": 2149.0,
+            "50": 2368.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 1,
+        "step_interval": 5,
         "values": {
-            "1": 952847360.0,
-            "2": 952847360.0,
-            "3": 952847360.0,
-            "4": 952847360.0,
-            "5": 952847360.0,
-            "6": 952847360.0,
-            "7": 952847360.0,
-            "8": 952847360.0,
-            "9": 952847360.0,
-            "10": 952847360.0,
-            "11": 952847360.0,
-            "12": 952847360.0,
-            "13": 952847360.0,
-            "14": 952847360.0,
-            "15": 952847360.0,
-            "16": 952847360.0,
-            "17": 952847360.0,
-            "18": 952847360.0,
-            "19": 952847360.0,
-            "20": 952847360.0,
-            "21": 952847360.0,
-            "22": 952847360.0,
-            "23": 952847360.0,
-            "24": 952847360.0,
-            "25": 952847360.0,
-            "26": 952847360.0,
-            "27": 952847360.0,
-            "28": 952847360.0,
-            "29": 952847360.0,
-            "30": 952847360.0,
-            "31": 952847360.0,
-            "32": 952847360.0,
-            "33": 952847360.0,
-            "34": 952847360.0,
-            "35": 952847360.0,
-            "36": 952847360.0,
-            "37": 952847360.0,
-            "38": 952847360.0,
-            "39": 952847360.0,
-            "40": 952847360.0,
-            "41": 952847360.0,
-            "42": 952847360.0,
-            "43": 952847360.0,
-            "44": 952847360.0,
-            "45": 952847360.0,
-            "46": 952847360.0,
-            "47": 952847360.0,
-            "48": 952847360.0,
-            "49": 952847360.0,
-            "50": 952847360.0
+            "1": 948653056.0,
+            "5": 948653056.0,
+            "10": 948653056.0,
+            "15": 948653056.0,
+            "20": 948653056.0,
+            "25": 948653056.0,
+            "30": 948653056.0,
+            "35": 948653056.0,
+            "40": 948653056.0,
+            "45": 948653056.0,
+            "50": 948653056.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 1,
+        "step_interval": 5,
         "values": {
-            "1": 3275808768.0,
-            "2": 3637371904.0,
-            "3": 3637371904.0,
-            "4": 3637371904.0,
-            "5": 3637371904.0,
-            "6": 3637371904.0,
-            "7": 3637371904.0,
-            "8": 3637371904.0,
-            "9": 3637371904.0,
-            "10": 3637371904.0,
-            "11": 3637371904.0,
-            "12": 3637371904.0,
-            "13": 3637371904.0,
-            "14": 3637371904.0,
-            "15": 3637371904.0,
-            "16": 3637371904.0,
-            "17": 3637371904.0,
-            "18": 3637371904.0,
-            "19": 3637371904.0,
-            "20": 3637371904.0,
-            "21": 3637371904.0,
-            "22": 3637371904.0,
-            "23": 3637371904.0,
-            "24": 3637371904.0,
-            "25": 3637371904.0,
-            "26": 3637371904.0,
-            "27": 3637371904.0,
-            "28": 3637371904.0,
-            "29": 3637371904.0,
-            "30": 3637371904.0,
-            "31": 3637371904.0,
-            "32": 3637371904.0,
-            "33": 3637371904.0,
-            "34": 3637371904.0,
-            "35": 3637371904.0,
-            "36": 3637371904.0,
-            "37": 3637371904.0,
-            "38": 3637371904.0,
-            "39": 3637371904.0,
-            "40": 3637371904.0,
-            "41": 3637371904.0,
-            "42": 3637371904.0,
-            "43": 3637371904.0,
-            "44": 3637371904.0,
-            "45": 3637371904.0,
-            "46": 3637371904.0,
-            "47": 3637371904.0,
-            "48": 3637371904.0,
-            "49": 3637371904.0,
-            "50": 3637371904.0
+            "1": 3275284480.0,
+            "5": 3632653312.0,
+            "10": 3632653312.0,
+            "15": 3632653312.0,
+            "20": 3632653312.0,
+            "25": 3632653312.0,
+            "30": 3632653312.0,
+            "35": 3632653312.0,
+            "40": 3632653312.0,
+            "45": 3632653312.0,
+            "50": 3632653312.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 1,
+        "step_interval": 5,
         "values": {
-            "1": 5.86972,
-            "2": 0.17288,
-            "3": 0.13781,
-            "4": 0.13826,
-            "5": 0.13569,
-            "6": 0.13252,
-            "7": 0.1323,
-            "8": 0.13208,
-            "9": 0.13237,
-            "10": 0.13177,
-            "11": 0.13164,
-            "12": 0.135,
-            "13": 0.13389,
-            "14": 0.13431,
-            "15": 0.13376,
-            "16": 0.1342,
-            "17": 0.13348,
-            "18": 0.13307,
-            "19": 0.13389,
-            "20": 0.13476,
-            "21": 0.13346,
-            "22": 0.13333,
-            "23": 0.13336,
-            "24": 0.13304,
-            "25": 0.13373,
-            "26": 0.13283,
-            "27": 0.1331,
-            "28": 0.13314,
-            "29": 0.13299,
-            "30": 0.13362,
-            "31": 0.13392,
-            "32": 0.13417,
-            "33": 0.13406,
-            "34": 0.13351,
-            "35": 0.13357,
-            "36": 0.13345,
-            "37": 0.13422,
-            "38": 0.1339,
-            "39": 0.13419,
-            "40": 0.13437,
-            "41": 0.13425,
-            "42": 0.13364,
-            "43": 0.13389,
-            "44": 0.13482,
-            "45": 0.13461,
-            "46": 0.134,
-            "47": 0.13363,
-            "48": 0.13416,
-            "49": 0.13386,
-            "50": 0.13343
+            "1": 11.60457,
+            "5": 0.13759,
+            "10": 0.13772,
+            "15": 0.14601,
+            "20": 0.14893,
+            "25": 0.14839,
+            "30": 0.14809,
+            "35": 0.14777,
+            "40": 0.14825,
+            "45": 0.14788,
+            "50": 0.14852
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/golden_values_lts.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/golden_values_lts.json
deleted file mode 100644
index f6017e22e9a..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/golden_values_lts.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.83568,
-            "2": 10.83266,
-            "3": 10.83151,
-            "4": 10.80343,
-            "5": 10.8567,
-            "6": 10.86778,
-            "7": 10.84836,
-            "8": 10.84624,
-            "9": 10.85924,
-            "10": 10.81478,
-            "11": 10.89821,
-            "12": 10.88433,
-            "13": 10.88963,
-            "14": 10.90075,
-            "15": 10.85098,
-            "16": 10.86603,
-            "17": 10.85455,
-            "18": 10.88507,
-            "19": 10.8773,
-            "20": 10.85865,
-            "21": 10.85654,
-            "22": 10.79685,
-            "23": 10.88724,
-            "24": 10.82649,
-            "25": 10.81343,
-            "26": 10.82705,
-            "27": 10.84612,
-            "28": 10.84227,
-            "29": 10.85329,
-            "30": 10.74969,
-            "31": 10.63041,
-            "32": 10.79004,
-            "33": 10.77234,
-            "34": 10.65722,
-            "35": 10.65857,
-            "36": 10.61583,
-            "37": 10.67536,
-            "38": 10.58101,
-            "39": 10.69083,
-            "40": 10.50359,
-            "41": 10.52777,
-            "42": 10.55371,
-            "43": 10.28636,
-            "44": 10.36369,
-            "45": 10.2738,
-            "46": 10.24567,
-            "47": 10.45103,
-            "48": 10.23707,
-            "49": 9.99555,
-            "50": 10.25588,
-            "51": 10.20129,
-            "52": 10.10855,
-            "53": 10.34609,
-            "54": 10.24857,
-            "55": 10.18782,
-            "56": 9.95521,
-            "57": 9.81221,
-            "58": 10.10875,
-            "59": 9.8863,
-            "60": 9.80901,
-            "61": 9.94824,
-            "62": 10.1999,
-            "63": 9.64431,
-            "64": 9.9951,
-            "65": 9.24475,
-            "66": 9.90917,
-            "67": 9.59735,
-            "68": 9.97285,
-            "69": 9.96332,
-            "70": 9.91039,
-            "71": 9.78596,
-            "72": 9.77263,
-            "73": 9.6618,
-            "74": 9.16289,
-            "75": 9.5812,
-            "76": 9.26137,
-            "77": 10.17615,
-            "78": 9.85644,
-            "79": 9.50644,
-            "80": 9.54102,
-            "81": 9.61313,
-            "82": 9.80669,
-            "83": 9.44696,
-            "84": 9.52782,
-            "85": 9.72633,
-            "86": 9.19099,
-            "87": 9.68736,
-            "88": 9.85216,
-            "89": 9.71335,
-            "90": 9.90316,
-            "91": 9.46064,
-            "92": 9.46059,
-            "93": 9.19418,
-            "94": 8.93434,
-            "95": 9.60258,
-            "96": 9.61852,
-            "97": 9.39594,
-            "98": 9.76012,
-            "99": 8.98668,
-            "100": 9.49405
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 684610560.0,
-            "2": 685659136.0,
-            "3": 685659136.0,
-            "4": 685659136.0,
-            "5": 685659136.0,
-            "6": 685659136.0,
-            "7": 685659136.0,
-            "8": 685659136.0,
-            "9": 685659136.0,
-            "10": 685659136.0,
-            "11": 685659136.0,
-            "12": 685659136.0,
-            "13": 685659136.0,
-            "14": 685659136.0,
-            "15": 685659136.0,
-            "16": 685659136.0,
-            "17": 1043027456.0,
-            "18": 1043027456.0,
-            "19": 1043027456.0,
-            "20": 1043027456.0,
-            "21": 1043027456.0,
-            "22": 1043027456.0,
-            "23": 1043027456.0,
-            "24": 1043027456.0,
-            "25": 1043027456.0,
-            "26": 1043027456.0,
-            "27": 1043027456.0,
-            "28": 1043027456.0,
-            "29": 1043027456.0,
-            "30": 1043027456.0,
-            "31": 1043027456.0,
-            "32": 1043027456.0,
-            "33": 1043027456.0,
-            "34": 1043027456.0,
-            "35": 1043027456.0,
-            "36": 1043027456.0,
-            "37": 1043027456.0,
-            "38": 1043027456.0,
-            "39": 1043027456.0,
-            "40": 1043027456.0,
-            "41": 1043027456.0,
-            "42": 1043027456.0,
-            "43": 1043027456.0,
-            "44": 1043027456.0,
-            "45": 1043027456.0,
-            "46": 1043027456.0,
-            "47": 1043027456.0,
-            "48": 1043027456.0,
-            "49": 1043027456.0,
-            "50": 1043027456.0,
-            "51": 1043027456.0,
-            "52": 1043027456.0,
-            "53": 1043027456.0,
-            "54": 1043027456.0,
-            "55": 1043027456.0,
-            "56": 1043027456.0,
-            "57": 1043027456.0,
-            "58": 1043027456.0,
-            "59": 1043027456.0,
-            "60": 1043027456.0,
-            "61": 1043027456.0,
-            "62": 1043027456.0,
-            "63": 1043027456.0,
-            "64": 1043027456.0,
-            "65": 1043027456.0,
-            "66": 1043027456.0,
-            "67": 1043027456.0,
-            "68": 1043027456.0,
-            "69": 1043027456.0,
-            "70": 1043027456.0,
-            "71": 1043027456.0,
-            "72": 1043027456.0,
-            "73": 1043027456.0,
-            "74": 1043027456.0,
-            "75": 1043027456.0,
-            "76": 1043027456.0,
-            "77": 1043027456.0,
-            "78": 1043027456.0,
-            "79": 1043027456.0,
-            "80": 1043027456.0,
-            "81": 1043027456.0,
-            "82": 1043027456.0,
-            "83": 1043027456.0,
-            "84": 1043027456.0,
-            "85": 1043027456.0,
-            "86": 1043027456.0,
-            "87": 1043027456.0,
-            "88": 1043027456.0,
-            "89": 1043027456.0,
-            "90": 1043027456.0,
-            "91": 1043027456.0,
-            "92": 1043027456.0,
-            "93": 1043027456.0,
-            "94": 1043027456.0,
-            "95": 1043027456.0,
-            "96": 1043027456.0,
-            "97": 1043027456.0,
-            "98": 1043027456.0,
-            "99": 1043027456.0,
-            "100": 1043027456.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 3187304960.0,
-            "2": 3187305472.0,
-            "3": 3187305472.0,
-            "4": 3187305472.0,
-            "5": 3187305472.0,
-            "6": 3187305472.0,
-            "7": 3187305472.0,
-            "8": 3187305472.0,
-            "9": 3187305472.0,
-            "10": 3187305472.0,
-            "11": 3187305472.0,
-            "12": 3187305472.0,
-            "13": 3187305472.0,
-            "14": 3187305472.0,
-            "15": 3187305472.0,
-            "16": 3187305472.0,
-            "17": 3187305472.0,
-            "18": 3544935936.0,
-            "19": 3544935936.0,
-            "20": 3544935936.0,
-            "21": 3544935936.0,
-            "22": 3544935936.0,
-            "23": 3544935936.0,
-            "24": 3544935936.0,
-            "25": 3544935936.0,
-            "26": 3544935936.0,
-            "27": 3544935936.0,
-            "28": 3544935936.0,
-            "29": 3544935936.0,
-            "30": 3544935936.0,
-            "31": 3544935936.0,
-            "32": 3544935936.0,
-            "33": 3544935936.0,
-            "34": 3544935936.0,
-            "35": 3544935936.0,
-            "36": 3544935936.0,
-            "37": 3544935936.0,
-            "38": 3544935936.0,
-            "39": 3544935936.0,
-            "40": 3544935936.0,
-            "41": 3544935936.0,
-            "42": 3544935936.0,
-            "43": 3544935936.0,
-            "44": 3544935936.0,
-            "45": 3544935936.0,
-            "46": 3544935936.0,
-            "47": 3544935936.0,
-            "48": 3544935936.0,
-            "49": 3544935936.0,
-            "50": 3544935936.0,
-            "51": 3544935936.0,
-            "52": 3544935936.0,
-            "53": 3544935936.0,
-            "54": 3544935936.0,
-            "55": 3544935936.0,
-            "56": 3544935936.0,
-            "57": 3544935936.0,
-            "58": 3544935936.0,
-            "59": 3544935936.0,
-            "60": 3544935936.0,
-            "61": 3544935936.0,
-            "62": 3544935936.0,
-            "63": 3544935936.0,
-            "64": 3544935936.0,
-            "65": 3544935936.0,
-            "66": 3544935936.0,
-            "67": 3544935936.0,
-            "68": 3544935936.0,
-            "69": 3544935936.0,
-            "70": 3544935936.0,
-            "71": 3544935936.0,
-            "72": 3544935936.0,
-            "73": 3544935936.0,
-            "74": 3544935936.0,
-            "75": 3544935936.0,
-            "76": 3544935936.0,
-            "77": 3544935936.0,
-            "78": 3544935936.0,
-            "79": 3544935936.0,
-            "80": 3544935936.0,
-            "81": 3544935936.0,
-            "82": 3544935936.0,
-            "83": 3544935936.0,
-            "84": 3544935936.0,
-            "85": 3544935936.0,
-            "86": 3544935936.0,
-            "87": 3544935936.0,
-            "88": 3544935936.0,
-            "89": 3544935936.0,
-            "90": 3544935936.0,
-            "91": 3544935936.0,
-            "92": 3544935936.0,
-            "93": 3544935936.0,
-            "94": 3544935936.0,
-            "95": 3544935936.0,
-            "96": 3544935936.0,
-            "97": 3544935936.0,
-            "98": 3544935936.0,
-            "99": 3544935936.0,
-            "100": 3544935936.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 5.71216,
-            "2": 0.14973,
-            "3": 0.11823,
-            "4": 0.11589,
-            "5": 0.115,
-            "6": 0.11544,
-            "7": 0.11494,
-            "8": 0.11629,
-            "9": 0.11558,
-            "10": 0.11559,
-            "11": 0.11587,
-            "12": 0.11557,
-            "13": 0.11565,
-            "14": 0.11485,
-            "15": 0.11461,
-            "16": 0.11592,
-            "17": 0.17863,
-            "18": 0.13323,
-            "19": 0.12474,
-            "20": 0.12384,
-            "21": 0.12332,
-            "22": 0.11528,
-            "23": 0.12299,
-            "24": 0.12347,
-            "25": 0.12391,
-            "26": 0.12403,
-            "27": 0.12332,
-            "28": 0.12373,
-            "29": 0.12271,
-            "30": 0.12236,
-            "31": 0.12244,
-            "32": 0.12402,
-            "33": 0.12212,
-            "34": 0.12202,
-            "35": 0.1228,
-            "36": 0.12235,
-            "37": 0.12236,
-            "38": 0.12224,
-            "39": 0.12198,
-            "40": 0.12302,
-            "41": 0.12287,
-            "42": 0.1224,
-            "43": 0.12222,
-            "44": 0.12187,
-            "45": 0.12269,
-            "46": 0.1224,
-            "47": 0.12251,
-            "48": 0.12187,
-            "49": 0.12239,
-            "50": 0.12176,
-            "51": 0.12901,
-            "52": 0.12249,
-            "53": 0.12142,
-            "54": 0.12162,
-            "55": 0.12146,
-            "56": 0.12102,
-            "57": 0.12365,
-            "58": 0.12178,
-            "59": 0.12229,
-            "60": 0.12129,
-            "61": 0.12124,
-            "62": 0.12162,
-            "63": 0.12218,
-            "64": 0.12145,
-            "65": 0.1222,
-            "66": 0.12127,
-            "67": 0.12238,
-            "68": 0.12143,
-            "69": 0.12118,
-            "70": 0.12095,
-            "71": 0.12155,
-            "72": 0.12227,
-            "73": 0.12265,
-            "74": 0.1222,
-            "75": 0.1232,
-            "76": 0.12175,
-            "77": 0.12226,
-            "78": 0.12166,
-            "79": 0.12163,
-            "80": 0.12235,
-            "81": 0.12164,
-            "82": 0.1221,
-            "83": 0.12172,
-            "84": 0.12179,
-            "85": 0.1221,
-            "86": 0.12134,
-            "87": 0.12203,
-            "88": 0.12166,
-            "89": 0.12224,
-            "90": 0.1218,
-            "91": 0.12135,
-            "92": 0.12216,
-            "93": 0.12267,
-            "94": 0.12146,
-            "95": 0.1222,
-            "96": 0.12287,
-            "97": 0.12296,
-            "98": 0.12234,
-            "99": 0.12184,
-            "100": 0.12221
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": "nan",
-            "2": "nan",
-            "3": "nan",
-            "4": "nan",
-            "5": "nan",
-            "6": "nan",
-            "7": "nan",
-            "8": "nan",
-            "9": "nan",
-            "10": "nan",
-            "11": "nan",
-            "12": "nan",
-            "13": "nan",
-            "14": "nan",
-            "15": "nan",
-            "16": "nan",
-            "17": 2249.0,
-            "18": 2165.0,
-            "19": 2362.0,
-            "20": 1953.0,
-            "21": 1898.0,
-            "22": "nan",
-            "23": 2371.0,
-            "24": 1984.0,
-            "25": 1818.0,
-            "26": 1980.0,
-            "27": 2078.0,
-            "28": 2467.0,
-            "29": 2395.0,
-            "30": 2298.0,
-            "31": 1682.0,
-            "32": 2236.0,
-            "33": 2192.0,
-            "34": 1800.0,
-            "35": 2083.0,
-            "36": 2139.0,
-            "37": 2498.0,
-            "38": 2218.0,
-            "39": 2642.0,
-            "40": 2287.0,
-            "41": 2344.0,
-            "42": 2304.0,
-            "43": 2098.0,
-            "44": 2107.0,
-            "45": 2243.0,
-            "46": 1960.0,
-            "47": 2729.0,
-            "48": 2418.0,
-            "49": 1910.0,
-            "50": 2426.0,
-            "51": 2335.0,
-            "52": 2407.0,
-            "53": 2888.0,
-            "54": 2477.0,
-            "55": 2440.0,
-            "56": 2286.0,
-            "57": 2340.0,
-            "58": 2652.0,
-            "59": 2321.0,
-            "60": 2493.0,
-            "61": 2812.0,
-            "62": 2711.0,
-            "63": 2367.0,
-            "64": 2802.0,
-            "65": 2411.0,
-            "66": 2869.0,
-            "67": 2577.0,
-            "68": 2859.0,
-            "69": 2524.0,
-            "70": 3119.0,
-            "71": 2926.0,
-            "72": 2251.0,
-            "73": 2929.0,
-            "74": 2110.0,
-            "75": 2884.0,
-            "76": 2992.0,
-            "77": 3380.0,
-            "78": 3484.0,
-            "79": 3533.0,
-            "80": 3549.0,
-            "81": 3616.0,
-            "82": 3347.0,
-            "83": 3124.0,
-            "84": 3276.0,
-            "85": 3721.0,
-            "86": 3207.0,
-            "87": 3941.0,
-            "88": 3250.0,
-            "89": 3863.0,
-            "90": 3452.0,
-            "91": 2630.0,
-            "92": 3431.0,
-            "93": 3123.0,
-            "94": 3671.0,
-            "95": 3340.0,
-            "96": 3874.0,
-            "97": 3519.0,
-            "98": 3727.0,
-            "99": 3447.0,
-            "100": 3338.0
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/golden_values_lts_dgx_a100.json
index 094be8516a7..56c78e6ebf6 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/golden_values_lts_dgx_a100.json
@@ -2,536 +2,141 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 1,
+        "step_interval": 5,
         "values": {
-            "1": 10.83568,
-            "2": 10.83266,
-            "3": 10.83151,
-            "4": 10.80343,
-            "5": 10.8567,
-            "6": 10.86778,
-            "7": 10.84836,
-            "8": 10.84624,
-            "9": 10.85924,
-            "10": 10.81478,
-            "11": 10.89821,
-            "12": 10.88433,
-            "13": 10.88963,
-            "14": 10.90075,
-            "15": 10.85098,
-            "16": 10.86603,
-            "17": 10.85455,
-            "18": 10.88507,
-            "19": 10.8773,
-            "20": 10.85865,
-            "21": 10.85654,
-            "22": 10.79685,
-            "23": 10.88724,
-            "24": 10.82649,
-            "25": 10.81343,
-            "26": 10.82705,
-            "27": 10.84612,
-            "28": 10.84227,
-            "29": 10.85329,
-            "30": 10.74969,
-            "31": 10.63041,
-            "32": 10.79004,
-            "33": 10.77234,
-            "34": 10.65722,
-            "35": 10.65857,
-            "36": 10.61583,
-            "37": 10.67536,
-            "38": 10.58101,
-            "39": 10.69083,
-            "40": 10.50359,
-            "41": 10.52777,
-            "42": 10.55371,
-            "43": 10.28636,
-            "44": 10.36369,
-            "45": 10.2738,
-            "46": 10.24567,
-            "47": 10.45103,
-            "48": 10.23707,
-            "49": 9.99555,
-            "50": 10.25588,
-            "51": 10.20129,
-            "52": 10.10855,
-            "53": 10.34609,
-            "54": 10.24857,
-            "55": 10.18782,
-            "56": 9.95521,
-            "57": 9.81221,
-            "58": 10.10875,
-            "59": 9.8863,
-            "60": 9.80901,
-            "61": 9.94824,
-            "62": 10.1999,
-            "63": 9.64431,
-            "64": 9.9951,
-            "65": 9.24475,
-            "66": 9.90917,
-            "67": 9.59735,
-            "68": 9.97285,
-            "69": 9.96332,
-            "70": 9.91039,
-            "71": 9.78596,
-            "72": 9.77263,
-            "73": 9.6618,
-            "74": 9.16289,
-            "75": 9.5812,
-            "76": 9.26137,
-            "77": 10.17615,
-            "78": 9.85644,
-            "79": 9.50644,
-            "80": 9.54102,
-            "81": 9.61313,
-            "82": 9.80669,
-            "83": 9.44696,
-            "84": 9.52782,
-            "85": 9.72633,
-            "86": 9.19099,
-            "87": 9.68736,
-            "88": 9.85216,
-            "89": 9.71335,
-            "90": 9.90316,
-            "91": 9.46064,
-            "92": 9.46059,
-            "93": 9.19418,
-            "94": 8.93434,
-            "95": 9.60258,
-            "96": 9.61852,
-            "97": 9.39594,
-            "98": 9.76012,
-            "99": 8.98668,
-            "100": 9.49405
+            "1": 10.8337,
+            "5": 10.85789,
+            "10": 10.81578,
+            "15": 10.84971,
+            "20": 10.85686,
+            "25": 10.81326,
+            "30": 10.74663,
+            "35": 10.65647,
+            "40": 10.50351,
+            "45": 10.27258,
+            "50": 10.25445,
+            "55": 10.18699,
+            "60": 9.8098,
+            "65": 9.24409,
+            "70": 9.91024,
+            "75": 9.58173,
+            "80": 9.54167,
+            "85": 9.7262,
+            "90": 9.90242,
+            "95": 9.60208,
+            "100": 9.49424
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 1,
+        "step_interval": 5,
         "values": {
             "1": 684610560.0,
-            "2": 685659136.0,
-            "3": 685659136.0,
-            "4": 685659136.0,
-            "5": 685659136.0,
-            "6": 685659136.0,
-            "7": 685659136.0,
-            "8": 685659136.0,
-            "9": 685659136.0,
-            "10": 685659136.0,
-            "11": 685659136.0,
-            "12": 685659136.0,
-            "13": 685659136.0,
-            "14": 685659136.0,
-            "15": 685659136.0,
-            "16": 685659136.0,
-            "17": 1043027456.0,
-            "18": 1043027456.0,
-            "19": 1043027456.0,
+            "5": 684610560.0,
+            "10": 684610560.0,
+            "15": 684610560.0,
             "20": 1043027456.0,
-            "21": 1043027456.0,
-            "22": 1043027456.0,
-            "23": 1043027456.0,
-            "24": 1043027456.0,
             "25": 1043027456.0,
-            "26": 1043027456.0,
-            "27": 1043027456.0,
-            "28": 1043027456.0,
-            "29": 1043027456.0,
             "30": 1043027456.0,
-            "31": 1043027456.0,
-            "32": 1043027456.0,
-            "33": 1043027456.0,
-            "34": 1043027456.0,
             "35": 1043027456.0,
-            "36": 1043027456.0,
-            "37": 1043027456.0,
-            "38": 1043027456.0,
-            "39": 1043027456.0,
             "40": 1043027456.0,
-            "41": 1043027456.0,
-            "42": 1043027456.0,
-            "43": 1043027456.0,
-            "44": 1043027456.0,
             "45": 1043027456.0,
-            "46": 1043027456.0,
-            "47": 1043027456.0,
-            "48": 1043027456.0,
-            "49": 1043027456.0,
             "50": 1043027456.0,
-            "51": 1043027456.0,
-            "52": 1043027456.0,
-            "53": 1043027456.0,
-            "54": 1043027456.0,
             "55": 1043027456.0,
-            "56": 1043027456.0,
-            "57": 1043027456.0,
-            "58": 1043027456.0,
-            "59": 1043027456.0,
             "60": 1043027456.0,
-            "61": 1043027456.0,
-            "62": 1043027456.0,
-            "63": 1043027456.0,
-            "64": 1043027456.0,
             "65": 1043027456.0,
-            "66": 1043027456.0,
-            "67": 1043027456.0,
-            "68": 1043027456.0,
-            "69": 1043027456.0,
             "70": 1043027456.0,
-            "71": 1043027456.0,
-            "72": 1043027456.0,
-            "73": 1043027456.0,
-            "74": 1043027456.0,
             "75": 1043027456.0,
-            "76": 1043027456.0,
-            "77": 1043027456.0,
-            "78": 1043027456.0,
-            "79": 1043027456.0,
             "80": 1043027456.0,
-            "81": 1043027456.0,
-            "82": 1043027456.0,
-            "83": 1043027456.0,
-            "84": 1043027456.0,
             "85": 1043027456.0,
-            "86": 1043027456.0,
-            "87": 1043027456.0,
-            "88": 1043027456.0,
-            "89": 1043027456.0,
             "90": 1043027456.0,
-            "91": 1043027456.0,
-            "92": 1043027456.0,
-            "93": 1043027456.0,
-            "94": 1043027456.0,
             "95": 1043027456.0,
-            "96": 1043027456.0,
-            "97": 1043027456.0,
-            "98": 1043027456.0,
-            "99": 1043027456.0,
             "100": 1043027456.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 1,
+        "step_interval": 5,
         "values": {
             "1": 3187304960.0,
-            "2": 3187305472.0,
-            "3": 3187305472.0,
-            "4": 3187305472.0,
             "5": 3187305472.0,
-            "6": 3187305472.0,
-            "7": 3187305472.0,
-            "8": 3187305472.0,
-            "9": 3187305472.0,
             "10": 3187305472.0,
-            "11": 3187305472.0,
-            "12": 3187305472.0,
-            "13": 3187305472.0,
-            "14": 3187305472.0,
             "15": 3187305472.0,
-            "16": 3187305472.0,
-            "17": 3187305472.0,
-            "18": 3544935936.0,
-            "19": 3544935936.0,
-            "20": 3544935936.0,
-            "21": 3544935936.0,
-            "22": 3544935936.0,
-            "23": 3544935936.0,
-            "24": 3544935936.0,
-            "25": 3544935936.0,
-            "26": 3544935936.0,
-            "27": 3544935936.0,
-            "28": 3544935936.0,
-            "29": 3544935936.0,
-            "30": 3544935936.0,
-            "31": 3544935936.0,
-            "32": 3544935936.0,
-            "33": 3544935936.0,
-            "34": 3544935936.0,
-            "35": 3544935936.0,
-            "36": 3544935936.0,
-            "37": 3544935936.0,
-            "38": 3544935936.0,
-            "39": 3544935936.0,
-            "40": 3544935936.0,
-            "41": 3544935936.0,
-            "42": 3544935936.0,
-            "43": 3544935936.0,
-            "44": 3544935936.0,
-            "45": 3544935936.0,
-            "46": 3544935936.0,
-            "47": 3544935936.0,
-            "48": 3544935936.0,
-            "49": 3544935936.0,
-            "50": 3544935936.0,
-            "51": 3544935936.0,
-            "52": 3544935936.0,
-            "53": 3544935936.0,
-            "54": 3544935936.0,
-            "55": 3544935936.0,
-            "56": 3544935936.0,
-            "57": 3544935936.0,
-            "58": 3544935936.0,
-            "59": 3544935936.0,
-            "60": 3544935936.0,
-            "61": 3544935936.0,
-            "62": 3544935936.0,
-            "63": 3544935936.0,
-            "64": 3544935936.0,
-            "65": 3544935936.0,
-            "66": 3544935936.0,
-            "67": 3544935936.0,
-            "68": 3544935936.0,
-            "69": 3544935936.0,
-            "70": 3544935936.0,
-            "71": 3544935936.0,
-            "72": 3544935936.0,
-            "73": 3544935936.0,
-            "74": 3544935936.0,
-            "75": 3544935936.0,
-            "76": 3544935936.0,
-            "77": 3544935936.0,
-            "78": 3544935936.0,
-            "79": 3544935936.0,
-            "80": 3544935936.0,
-            "81": 3544935936.0,
-            "82": 3544935936.0,
-            "83": 3544935936.0,
-            "84": 3544935936.0,
-            "85": 3544935936.0,
-            "86": 3544935936.0,
-            "87": 3544935936.0,
-            "88": 3544935936.0,
-            "89": 3544935936.0,
-            "90": 3544935936.0,
-            "91": 3544935936.0,
-            "92": 3544935936.0,
-            "93": 3544935936.0,
-            "94": 3544935936.0,
-            "95": 3544935936.0,
-            "96": 3544935936.0,
-            "97": 3544935936.0,
-            "98": 3544935936.0,
-            "99": 3544935936.0,
-            "100": 3544935936.0
+            "20": 3547033088.0,
+            "25": 3547033088.0,
+            "30": 3547033088.0,
+            "35": 3547033088.0,
+            "40": 3547033088.0,
+            "45": 3547033088.0,
+            "50": 3547033088.0,
+            "55": 3547033088.0,
+            "60": 3547033088.0,
+            "65": 3547033088.0,
+            "70": 3547033088.0,
+            "75": 3547033088.0,
+            "80": 3547033088.0,
+            "85": 3547033088.0,
+            "90": 3547033088.0,
+            "95": 3547033088.0,
+            "100": 3547033088.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 1,
+        "step_interval": 5,
         "values": {
-            "1": 7.5468,
-            "2": 0.1514,
-            "3": 0.11679,
-            "4": 0.11442,
-            "5": 0.11418,
-            "6": 0.1134,
-            "7": 0.11341,
-            "8": 0.11355,
-            "9": 0.11332,
-            "10": 0.11336,
-            "11": 0.11414,
-            "12": 0.11322,
-            "13": 0.11309,
-            "14": 0.11355,
-            "15": 0.11296,
-            "16": 0.11311,
-            "17": 0.19183,
-            "18": 0.13278,
-            "19": 0.12368,
-            "20": 0.1244,
-            "21": 0.12354,
-            "22": 0.11533,
-            "23": 0.12281,
-            "24": 0.12403,
-            "25": 0.12406,
-            "26": 0.12339,
-            "27": 0.12448,
-            "28": 0.12265,
-            "29": 0.1229,
-            "30": 0.1231,
-            "31": 0.12325,
-            "32": 0.12261,
-            "33": 0.12283,
-            "34": 0.12275,
-            "35": 0.12311,
-            "36": 0.12273,
-            "37": 0.12367,
-            "38": 0.12288,
-            "39": 0.12297,
-            "40": 0.12264,
-            "41": 0.1206,
-            "42": 0.12099,
-            "43": 0.12152,
-            "44": 0.12016,
-            "45": 0.12042,
-            "46": 0.12101,
-            "47": 0.12019,
-            "48": 0.12057,
-            "49": 0.12054,
-            "50": 0.12043,
-            "51": 0.12804,
-            "52": 0.12188,
-            "53": 0.12082,
-            "54": 0.12046,
-            "55": 0.12243,
-            "56": 0.12099,
-            "57": 0.12158,
-            "58": 0.12118,
-            "59": 0.12094,
-            "60": 0.12085,
-            "61": 0.12158,
-            "62": 0.12129,
-            "63": 0.12239,
-            "64": 0.12127,
-            "65": 0.12091,
-            "66": 0.12161,
-            "67": 0.12115,
-            "68": 0.12107,
-            "69": 0.12194,
-            "70": 0.12208,
-            "71": 0.12158,
-            "72": 0.12253,
-            "73": 0.12311,
-            "74": 0.12157,
-            "75": 0.12129,
-            "76": 0.12243,
-            "77": 0.1209,
-            "78": 0.12118,
-            "79": 0.12236,
-            "80": 0.12456,
-            "81": 0.12169,
-            "82": 0.12201,
-            "83": 0.12239,
-            "84": 0.12311,
-            "85": 0.12253,
-            "86": 0.12237,
-            "87": 0.12156,
-            "88": 0.12306,
-            "89": 0.12961,
-            "90": 0.12349,
-            "91": 0.12189,
-            "92": 0.12121,
-            "93": 0.12178,
-            "94": 0.12615,
-            "95": 0.12189,
-            "96": 0.12145,
-            "97": 0.12112,
-            "98": 0.12242,
-            "99": 0.12142,
-            "100": 0.12094
+            "1": 10.71226,
+            "5": 0.14227,
+            "10": 0.14747,
+            "15": 0.14757,
+            "20": 0.1588,
+            "25": 0.15692,
+            "30": 0.15748,
+            "35": 0.15684,
+            "40": 0.15704,
+            "45": 0.15798,
+            "50": 0.15606,
+            "55": 0.15074,
+            "60": 0.15245,
+            "65": 0.15193,
+            "70": 0.15162,
+            "75": 0.14856,
+            "80": 0.14952,
+            "85": 0.14876,
+            "90": 0.14933,
+            "95": 0.15012,
+            "100": 0.15591
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 1,
+        "step_interval": 5,
         "values": {
             "1": "nan",
-            "2": "nan",
-            "3": "nan",
-            "4": "nan",
             "5": "nan",
-            "6": "nan",
-            "7": "nan",
-            "8": "nan",
-            "9": "nan",
             "10": "nan",
-            "11": "nan",
-            "12": "nan",
-            "13": "nan",
-            "14": "nan",
             "15": "nan",
-            "16": "nan",
-            "17": 2249.0,
-            "18": 2165.0,
-            "19": 2362.0,
-            "20": 1953.0,
-            "21": 1898.0,
-            "22": "nan",
-            "23": 2371.0,
-            "24": 1984.0,
-            "25": 1818.0,
-            "26": 1980.0,
-            "27": 2078.0,
-            "28": 2467.0,
-            "29": 2395.0,
-            "30": 2298.0,
-            "31": 1682.0,
-            "32": 2236.0,
-            "33": 2192.0,
-            "34": 1800.0,
-            "35": 2083.0,
-            "36": 2139.0,
-            "37": 2498.0,
-            "38": 2218.0,
-            "39": 2642.0,
-            "40": 2287.0,
-            "41": 2344.0,
-            "42": 2304.0,
-            "43": 2098.0,
-            "44": 2107.0,
-            "45": 2243.0,
-            "46": 1960.0,
-            "47": 2729.0,
-            "48": 2418.0,
-            "49": 1910.0,
-            "50": 2426.0,
-            "51": 2335.0,
-            "52": 2407.0,
-            "53": 2888.0,
-            "54": 2477.0,
-            "55": 2440.0,
-            "56": 2286.0,
-            "57": 2340.0,
-            "58": 2652.0,
-            "59": 2321.0,
-            "60": 2493.0,
-            "61": 2812.0,
-            "62": 2711.0,
-            "63": 2367.0,
-            "64": 2802.0,
-            "65": 2411.0,
-            "66": 2869.0,
-            "67": 2577.0,
-            "68": 2859.0,
-            "69": 2524.0,
-            "70": 3119.0,
-            "71": 2926.0,
-            "72": 2251.0,
-            "73": 2929.0,
-            "74": 2110.0,
-            "75": 2884.0,
-            "76": 2992.0,
-            "77": 3380.0,
-            "78": 3484.0,
-            "79": 3533.0,
-            "80": 3549.0,
-            "81": 3616.0,
-            "82": 3347.0,
-            "83": 3124.0,
-            "84": 3276.0,
-            "85": 3721.0,
-            "86": 3207.0,
-            "87": 3941.0,
-            "88": 3250.0,
-            "89": 3863.0,
-            "90": 3452.0,
-            "91": 2630.0,
-            "92": 3431.0,
-            "93": 3123.0,
-            "94": 3671.0,
-            "95": 3340.0,
-            "96": 3874.0,
-            "97": 3519.0,
-            "98": 3727.0,
-            "99": 3447.0,
-            "100": 3338.0
+            "20": 1969.0,
+            "25": 1863.0,
+            "30": 2312.0,
+            "35": 2063.0,
+            "40": 2212.0,
+            "45": 2333.0,
+            "50": 2369.0,
+            "55": 2357.0,
+            "60": 2500.0,
+            "65": 2438.0,
+            "70": 3075.0,
+            "75": 2803.0,
+            "80": 3568.0,
+            "85": 3675.0,
+            "90": 3397.0,
+            "95": 3468.0,
+            "100": 3267.0
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_lts.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_lts.json
deleted file mode 100644
index ed50a469580..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_lts.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.79432,
-            "2": 10.80634,
-            "3": 10.80586,
-            "4": 10.77237,
-            "5": 10.84693,
-            "6": 10.86605,
-            "7": 10.82692,
-            "8": 10.81129,
-            "9": 10.8342,
-            "10": 10.76876,
-            "11": 10.89167,
-            "12": 10.84477,
-            "13": 10.85758,
-            "14": 10.88197,
-            "15": 10.78831,
-            "16": 10.7793,
-            "17": 10.75446,
-            "18": 10.79296,
-            "19": 10.79494,
-            "20": 10.67877,
-            "21": 10.65858,
-            "22": 10.50081,
-            "23": 10.71065,
-            "24": 10.55089,
-            "25": 10.50321,
-            "26": 10.58033,
-            "27": 10.58262,
-            "28": 10.55556,
-            "29": 10.56003,
-            "30": 10.32992,
-            "31": 10.08344,
-            "32": 10.44434,
-            "33": 10.44238,
-            "34": 10.19765,
-            "35": 10.25197,
-            "36": 10.19117,
-            "37": 10.31772,
-            "38": 10.1631,
-            "39": 10.37486,
-            "40": 10.05284,
-            "41": 10.1344,
-            "42": 10.18877,
-            "43": 9.80641,
-            "44": 9.92687,
-            "45": 9.80332,
-            "46": 9.811,
-            "47": 10.12605,
-            "48": 9.82455,
-            "49": 9.50975,
-            "50": 9.88831
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1620.0,
-            "2": 1664.0,
-            "3": 1748.0,
-            "4": 1757.0,
-            "5": 1754.0,
-            "6": 1841.0,
-            "7": 1877.0,
-            "8": 1697.0,
-            "9": 1886.0,
-            "10": 1404.0,
-            "11": 1889.0,
-            "12": 1689.0,
-            "13": 1918.0,
-            "14": 1757.0,
-            "15": 1913.0,
-            "16": 1860.0,
-            "17": 1797.0,
-            "18": 1655.0,
-            "19": 1784.0,
-            "20": 1616.0,
-            "21": 1887.0,
-            "22": 1751.0,
-            "23": 2100.0,
-            "24": 1717.0,
-            "25": 1696.0,
-            "26": 1723.0,
-            "27": 1819.0,
-            "28": 1980.0,
-            "29": 1962.0,
-            "30": 2046.0,
-            "31": 1562.0,
-            "32": 1935.0,
-            "33": 2182.0,
-            "34": 1919.0,
-            "35": 1994.0,
-            "36": 1947.0,
-            "37": 2436.0,
-            "38": 2218.0,
-            "39": 2319.0,
-            "40": 2278.0,
-            "41": 2348.0,
-            "42": 2258.0,
-            "43": 1967.0,
-            "44": 2011.0,
-            "45": 2215.0,
-            "46": 2291.0,
-            "47": 2519.0,
-            "48": 2517.0,
-            "49": 2334.0,
-            "50": 2325.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 779997184.0,
-            "2": 779997184.0,
-            "3": 779997184.0,
-            "4": 779997184.0,
-            "5": 779997184.0,
-            "6": 779997184.0,
-            "7": 779997184.0,
-            "8": 779997184.0,
-            "9": 779997184.0,
-            "10": 779997184.0,
-            "11": 779997184.0,
-            "12": 779997184.0,
-            "13": 779997184.0,
-            "14": 779997184.0,
-            "15": 779997184.0,
-            "16": 779997184.0,
-            "17": 779997184.0,
-            "18": 779997184.0,
-            "19": 779997184.0,
-            "20": 779997184.0,
-            "21": 779997184.0,
-            "22": 779997184.0,
-            "23": 779997184.0,
-            "24": 779997184.0,
-            "25": 779997184.0,
-            "26": 779997184.0,
-            "27": 779997184.0,
-            "28": 779997184.0,
-            "29": 779997184.0,
-            "30": 779997184.0,
-            "31": 779997184.0,
-            "32": 779997184.0,
-            "33": 779997184.0,
-            "34": 779997184.0,
-            "35": 779997184.0,
-            "36": 779997184.0,
-            "37": 779997184.0,
-            "38": 779997184.0,
-            "39": 779997184.0,
-            "40": 779997184.0,
-            "41": 779997184.0,
-            "42": 779997184.0,
-            "43": 779997184.0,
-            "44": 779997184.0,
-            "45": 779997184.0,
-            "46": 779997184.0,
-            "47": 779997184.0,
-            "48": 779997184.0,
-            "49": 779997184.0,
-            "50": 779997184.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 2463815168.0,
-            "2": 2746575872.0,
-            "3": 2746575872.0,
-            "4": 2746575872.0,
-            "5": 2746575872.0,
-            "6": 2746575872.0,
-            "7": 2746575872.0,
-            "8": 2746575872.0,
-            "9": 2746575872.0,
-            "10": 2746575872.0,
-            "11": 2746575872.0,
-            "12": 2746575872.0,
-            "13": 2746575872.0,
-            "14": 2746575872.0,
-            "15": 2746575872.0,
-            "16": 2746575872.0,
-            "17": 2746575872.0,
-            "18": 2746575872.0,
-            "19": 2746575872.0,
-            "20": 2746575872.0,
-            "21": 2746575872.0,
-            "22": 2746575872.0,
-            "23": 2746575872.0,
-            "24": 2746575872.0,
-            "25": 2746575872.0,
-            "26": 2746575872.0,
-            "27": 2746575872.0,
-            "28": 2746575872.0,
-            "29": 2746575872.0,
-            "30": 2746575872.0,
-            "31": 2746575872.0,
-            "32": 2746575872.0,
-            "33": 2746575872.0,
-            "34": 2746575872.0,
-            "35": 2746575872.0,
-            "36": 2746575872.0,
-            "37": 2746575872.0,
-            "38": 2746575872.0,
-            "39": 2746575872.0,
-            "40": 2746575872.0,
-            "41": 2746575872.0,
-            "42": 2746575872.0,
-            "43": 2746575872.0,
-            "44": 2746575872.0,
-            "45": 2746575872.0,
-            "46": 2746575872.0,
-            "47": 2746575872.0,
-            "48": 2746575872.0,
-            "49": 2746575872.0,
-            "50": 2746575872.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 8.01512,
-            "2": 0.18918,
-            "3": 0.2837,
-            "4": 0.16138,
-            "5": 0.1616,
-            "6": 0.16078,
-            "7": 0.16042,
-            "8": 0.15997,
-            "9": 0.16098,
-            "10": 0.16102,
-            "11": 0.16098,
-            "12": 0.16041,
-            "13": 0.16091,
-            "14": 0.16108,
-            "15": 0.16115,
-            "16": 0.16297,
-            "17": 0.16105,
-            "18": 0.16124,
-            "19": 0.16111,
-            "20": 0.16078,
-            "21": 0.16059,
-            "22": 0.16143,
-            "23": 0.1618,
-            "24": 0.16075,
-            "25": 0.1607,
-            "26": 0.16058,
-            "27": 0.16134,
-            "28": 0.16032,
-            "29": 0.16066,
-            "30": 0.1619,
-            "31": 0.16069,
-            "32": 0.16055,
-            "33": 0.16041,
-            "34": 0.16109,
-            "35": 0.1616,
-            "36": 0.16,
-            "37": 0.16147,
-            "38": 0.16094,
-            "39": 0.16098,
-            "40": 0.16007,
-            "41": 0.16179,
-            "42": 0.1607,
-            "43": 0.16132,
-            "44": 0.16157,
-            "45": 0.16066,
-            "46": 0.16082,
-            "47": 0.16056,
-            "48": 0.16095,
-            "49": 0.16092,
-            "50": 0.16103
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_lts_dgx_a100.json
index 06342d2a540..e7e40a40328 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_lts_dgx_a100.json
@@ -2,286 +2,91 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 1,
+        "step_interval": 5,
         "values": {
-            "1": 10.79432,
-            "2": 10.80634,
-            "3": 10.80586,
-            "4": 10.77237,
-            "5": 10.84693,
-            "6": 10.86605,
-            "7": 10.82692,
-            "8": 10.81129,
-            "9": 10.8342,
-            "10": 10.76876,
-            "11": 10.89167,
-            "12": 10.84477,
-            "13": 10.85758,
-            "14": 10.88197,
-            "15": 10.78831,
-            "16": 10.7793,
-            "17": 10.75446,
-            "18": 10.79296,
-            "19": 10.79494,
-            "20": 10.67877,
-            "21": 10.65858,
-            "22": 10.50081,
-            "23": 10.71065,
-            "24": 10.55089,
-            "25": 10.50321,
-            "26": 10.58033,
-            "27": 10.58262,
-            "28": 10.55556,
-            "29": 10.56003,
-            "30": 10.32992,
-            "31": 10.08344,
-            "32": 10.44434,
-            "33": 10.44238,
-            "34": 10.19765,
-            "35": 10.25197,
-            "36": 10.19117,
-            "37": 10.31772,
-            "38": 10.1631,
-            "39": 10.37486,
-            "40": 10.05284,
-            "41": 10.1344,
-            "42": 10.18877,
-            "43": 9.80641,
-            "44": 9.92687,
-            "45": 9.80332,
-            "46": 9.811,
-            "47": 10.12605,
-            "48": 9.82455,
-            "49": 9.50975,
-            "50": 9.88831
+            "1": 10.79203,
+            "5": 10.84593,
+            "10": 10.76951,
+            "15": 10.78971,
+            "20": 10.67886,
+            "25": 10.50431,
+            "30": 10.33089,
+            "35": 10.25278,
+            "40": 10.05241,
+            "45": 9.80259,
+            "50": 9.88804
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 1,
+        "step_interval": 5,
         "values": {
-            "1": 1620.0,
-            "2": 1664.0,
-            "3": 1748.0,
-            "4": 1757.0,
-            "5": 1754.0,
-            "6": 1841.0,
-            "7": 1877.0,
-            "8": 1697.0,
-            "9": 1886.0,
-            "10": 1404.0,
-            "11": 1889.0,
-            "12": 1689.0,
-            "13": 1918.0,
-            "14": 1757.0,
-            "15": 1913.0,
-            "16": 1860.0,
-            "17": 1797.0,
-            "18": 1655.0,
-            "19": 1784.0,
-            "20": 1616.0,
-            "21": 1887.0,
-            "22": 1751.0,
-            "23": 2100.0,
-            "24": 1717.0,
-            "25": 1696.0,
-            "26": 1723.0,
-            "27": 1819.0,
-            "28": 1980.0,
-            "29": 1962.0,
-            "30": 2046.0,
-            "31": 1562.0,
-            "32": 1935.0,
-            "33": 2182.0,
-            "34": 1919.0,
-            "35": 1994.0,
-            "36": 1947.0,
-            "37": 2436.0,
-            "38": 2218.0,
-            "39": 2319.0,
-            "40": 2278.0,
-            "41": 2348.0,
-            "42": 2258.0,
-            "43": 1967.0,
-            "44": 2011.0,
-            "45": 2215.0,
-            "46": 2291.0,
-            "47": 2519.0,
-            "48": 2517.0,
-            "49": 2334.0,
-            "50": 2325.0
+            "1": 1649.0,
+            "5": 1890.0,
+            "10": 1397.0,
+            "15": 1928.0,
+            "20": 1565.0,
+            "25": 1699.0,
+            "30": 1943.0,
+            "35": 2018.0,
+            "40": 2191.0,
+            "45": 2207.0,
+            "50": 2362.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 1,
+        "step_interval": 5,
         "values": {
-            "1": 777900032.0,
-            "2": 777900032.0,
-            "3": 777900032.0,
-            "4": 777900032.0,
-            "5": 777900032.0,
-            "6": 777900032.0,
-            "7": 777900032.0,
-            "8": 777900032.0,
-            "9": 777900032.0,
-            "10": 777900032.0,
-            "11": 777900032.0,
-            "12": 777900032.0,
-            "13": 777900032.0,
-            "14": 777900032.0,
-            "15": 777900032.0,
-            "16": 777900032.0,
-            "17": 777900032.0,
-            "18": 777900032.0,
-            "19": 777900032.0,
-            "20": 777900032.0,
-            "21": 777900032.0,
-            "22": 777900032.0,
-            "23": 777900032.0,
-            "24": 777900032.0,
-            "25": 777900032.0,
-            "26": 777900032.0,
-            "27": 777900032.0,
-            "28": 777900032.0,
-            "29": 777900032.0,
-            "30": 777900032.0,
-            "31": 777900032.0,
-            "32": 777900032.0,
-            "33": 777900032.0,
-            "34": 777900032.0,
-            "35": 777900032.0,
-            "36": 777900032.0,
-            "37": 777900032.0,
-            "38": 777900032.0,
-            "39": 777900032.0,
-            "40": 777900032.0,
-            "41": 777900032.0,
-            "42": 777900032.0,
-            "43": 777900032.0,
-            "44": 777900032.0,
-            "45": 777900032.0,
-            "46": 777900032.0,
-            "47": 777900032.0,
-            "48": 777900032.0,
-            "49": 777900032.0,
-            "50": 777900032.0
+            "1": 782094336.0,
+            "5": 782094336.0,
+            "10": 782094336.0,
+            "15": 782094336.0,
+            "20": 782094336.0,
+            "25": 782094336.0,
+            "30": 782094336.0,
+            "35": 782094336.0,
+            "40": 782094336.0,
+            "45": 782094336.0,
+            "50": 782094336.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 1,
+        "step_interval": 5,
         "values": {
             "1": 2462767104.0,
-            "2": 2744478720.0,
-            "3": 2744478720.0,
-            "4": 2744478720.0,
-            "5": 2744478720.0,
-            "6": 2744478720.0,
-            "7": 2744478720.0,
-            "8": 2744478720.0,
-            "9": 2744478720.0,
-            "10": 2744478720.0,
-            "11": 2744478720.0,
-            "12": 2744478720.0,
-            "13": 2744478720.0,
-            "14": 2744478720.0,
-            "15": 2744478720.0,
-            "16": 2744478720.0,
-            "17": 2744478720.0,
-            "18": 2744478720.0,
-            "19": 2744478720.0,
-            "20": 2744478720.0,
-            "21": 2744478720.0,
-            "22": 2744478720.0,
-            "23": 2744478720.0,
-            "24": 2744478720.0,
-            "25": 2744478720.0,
-            "26": 2744478720.0,
-            "27": 2744478720.0,
-            "28": 2744478720.0,
-            "29": 2744478720.0,
-            "30": 2744478720.0,
-            "31": 2744478720.0,
-            "32": 2744478720.0,
-            "33": 2744478720.0,
-            "34": 2744478720.0,
-            "35": 2744478720.0,
-            "36": 2744478720.0,
-            "37": 2744478720.0,
-            "38": 2744478720.0,
-            "39": 2744478720.0,
-            "40": 2744478720.0,
-            "41": 2744478720.0,
-            "42": 2744478720.0,
-            "43": 2744478720.0,
-            "44": 2744478720.0,
-            "45": 2744478720.0,
-            "46": 2744478720.0,
-            "47": 2744478720.0,
-            "48": 2744478720.0,
-            "49": 2744478720.0,
-            "50": 2744478720.0
+            "5": 2748673024.0,
+            "10": 2748673024.0,
+            "15": 2748673024.0,
+            "20": 2748673024.0,
+            "25": 2748673024.0,
+            "30": 2748673024.0,
+            "35": 2748673024.0,
+            "40": 2748673024.0,
+            "45": 2748673024.0,
+            "50": 2748673024.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 1,
+        "step_interval": 5,
         "values": {
-            "1": 9.69348,
-            "2": 0.20058,
-            "3": 0.16793,
-            "4": 0.16851,
-            "5": 0.16769,
-            "6": 0.16776,
-            "7": 0.1679,
-            "8": 0.1698,
-            "9": 0.16773,
-            "10": 0.16689,
-            "11": 0.16616,
-            "12": 0.16649,
-            "13": 0.16602,
-            "14": 0.16651,
-            "15": 0.16681,
-            "16": 0.16794,
-            "17": 0.17068,
-            "18": 0.16616,
-            "19": 0.16604,
-            "20": 0.16664,
-            "21": 0.16675,
-            "22": 0.16587,
-            "23": 0.16669,
-            "24": 0.16593,
-            "25": 0.16666,
-            "26": 0.16624,
-            "27": 0.16546,
-            "28": 0.16503,
-            "29": 0.16469,
-            "30": 0.1651,
-            "31": 0.16508,
-            "32": 0.16533,
-            "33": 0.16475,
-            "34": 0.16518,
-            "35": 0.16543,
-            "36": 0.16422,
-            "37": 0.1648,
-            "38": 0.16453,
-            "39": 0.16423,
-            "40": 0.16482,
-            "41": 0.16457,
-            "42": 0.1653,
-            "43": 0.16536,
-            "44": 0.16541,
-            "45": 0.16481,
-            "46": 0.16481,
-            "47": 0.16542,
-            "48": 0.16607,
-            "49": 0.1639,
-            "50": 0.1641
+            "1": 16.61466,
+            "5": 0.16864,
+            "10": 0.16989,
+            "15": 0.1682,
+            "20": 0.16954,
+            "25": 0.16878,
+            "30": 0.2008,
+            "35": 0.18359,
+            "40": 0.16981,
+            "45": 0.17863,
+            "50": 0.18407
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/golden_values_lts.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/golden_values_lts.json
deleted file mode 100644
index 44dd2d42336..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/golden_values_lts.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.79449,
-            "2": 10.80656,
-            "3": 10.80727,
-            "4": 10.77389,
-            "5": 10.84829,
-            "6": 10.86736,
-            "7": 10.82922,
-            "8": 10.81537,
-            "9": 10.83956,
-            "10": 10.77652,
-            "11": 10.90107,
-            "12": 10.85927,
-            "13": 10.87396,
-            "14": 10.89723,
-            "15": 10.83961,
-            "16": 10.83508,
-            "17": 10.82101,
-            "18": 10.86029,
-            "19": 10.86558,
-            "20": 10.82896,
-            "21": 10.83275,
-            "22": 10.75286,
-            "23": 10.88062,
-            "24": 10.78219,
-            "25": 10.76607,
-            "26": 10.79522,
-            "27": 10.79866,
-            "28": 10.81697,
-            "29": 10.82169,
-            "30": 10.69891,
-            "31": 10.55698,
-            "32": 10.75759,
-            "33": 10.74362,
-            "34": 10.59976,
-            "35": 10.61772,
-            "36": 10.56389,
-            "37": 10.63614,
-            "38": 10.53029,
-            "39": 10.65358,
-            "40": 10.44072,
-            "41": 10.49636,
-            "42": 10.50954,
-            "43": 10.22362,
-            "44": 10.30902,
-            "45": 10.21065,
-            "46": 10.19943,
-            "47": 10.41641,
-            "48": 10.18128,
-            "49": 9.94311,
-            "50": 10.21224,
-            "51": 10.16759,
-            "52": 10.06895,
-            "53": 10.30707,
-            "54": 10.20911,
-            "55": 10.15688,
-            "56": 9.91474,
-            "57": 9.77696,
-            "58": 10.07417,
-            "59": 9.86333,
-            "60": 9.77328,
-            "61": 9.9292,
-            "62": 10.17156,
-            "63": 9.62041,
-            "64": 9.97113,
-            "65": 9.21979,
-            "66": 9.88693,
-            "67": 9.58363,
-            "68": 9.94922,
-            "69": 9.95271,
-            "70": 9.89312,
-            "71": 9.77658,
-            "72": 9.75435,
-            "73": 9.6497,
-            "74": 9.1439,
-            "75": 9.56121,
-            "76": 9.25111,
-            "77": 10.17063,
-            "78": 9.85402,
-            "79": 9.49965,
-            "80": 9.53086,
-            "81": 9.60555,
-            "82": 9.80179,
-            "83": 9.43744,
-            "84": 9.51987,
-            "85": 9.7196,
-            "86": 9.18595,
-            "87": 9.68687,
-            "88": 9.8443,
-            "89": 9.70586,
-            "90": 9.89977,
-            "91": 9.45029,
-            "92": 9.45356,
-            "93": 9.18554,
-            "94": 8.92968,
-            "95": 9.59767,
-            "96": 9.61491,
-            "97": 9.39084,
-            "98": 9.75667,
-            "99": 8.97921,
-            "100": 9.49001
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 570640384.0,
-            "2": 570640384.0,
-            "3": 570640384.0,
-            "4": 570640384.0,
-            "5": 570640384.0,
-            "6": 570640384.0,
-            "7": 570640384.0,
-            "8": 570640384.0,
-            "9": 570640384.0,
-            "10": 570640384.0,
-            "11": 570640384.0,
-            "12": 570640384.0,
-            "13": 570640384.0,
-            "14": 570640384.0,
-            "15": 570640384.0,
-            "16": 570640384.0,
-            "17": 852351488.0,
-            "18": 852351488.0,
-            "19": 852351488.0,
-            "20": 852351488.0,
-            "21": 852351488.0,
-            "22": 852351488.0,
-            "23": 852351488.0,
-            "24": 852351488.0,
-            "25": 852351488.0,
-            "26": 852351488.0,
-            "27": 852351488.0,
-            "28": 852351488.0,
-            "29": 852351488.0,
-            "30": 852351488.0,
-            "31": 852351488.0,
-            "32": 852351488.0,
-            "33": 852351488.0,
-            "34": 852351488.0,
-            "35": 852351488.0,
-            "36": 852351488.0,
-            "37": 852351488.0,
-            "38": 852351488.0,
-            "39": 852351488.0,
-            "40": 852351488.0,
-            "41": 852351488.0,
-            "42": 852351488.0,
-            "43": 852351488.0,
-            "44": 852351488.0,
-            "45": 852351488.0,
-            "46": 852351488.0,
-            "47": 852351488.0,
-            "48": 852351488.0,
-            "49": 852351488.0,
-            "50": 852351488.0,
-            "51": 852351488.0,
-            "52": 852351488.0,
-            "53": 852351488.0,
-            "54": 852351488.0,
-            "55": 852351488.0,
-            "56": 852351488.0,
-            "57": 852351488.0,
-            "58": 852351488.0,
-            "59": 852351488.0,
-            "60": 852351488.0,
-            "61": 852351488.0,
-            "62": 852351488.0,
-            "63": 852351488.0,
-            "64": 852351488.0,
-            "65": 852351488.0,
-            "66": 852351488.0,
-            "67": 852351488.0,
-            "68": 852351488.0,
-            "69": 852351488.0,
-            "70": 852351488.0,
-            "71": 852351488.0,
-            "72": 852351488.0,
-            "73": 852351488.0,
-            "74": 852351488.0,
-            "75": 852351488.0,
-            "76": 852351488.0,
-            "77": 852351488.0,
-            "78": 852351488.0,
-            "79": 852351488.0,
-            "80": 852351488.0,
-            "81": 852351488.0,
-            "82": 852351488.0,
-            "83": 852351488.0,
-            "84": 852351488.0,
-            "85": 852351488.0,
-            "86": 852351488.0,
-            "87": 852351488.0,
-            "88": 852351488.0,
-            "89": 852351488.0,
-            "90": 852351488.0,
-            "91": 852351488.0,
-            "92": 852351488.0,
-            "93": 852351488.0,
-            "94": 852351488.0,
-            "95": 852351488.0,
-            "96": 852351488.0,
-            "97": 852351488.0,
-            "98": 852351488.0,
-            "99": 852351488.0,
-            "100": 852351488.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 2394265600.0,
-            "2": 2394265600.0,
-            "3": 2394265600.0,
-            "4": 2394265600.0,
-            "5": 2394265600.0,
-            "6": 2394265600.0,
-            "7": 2394265600.0,
-            "8": 2394265600.0,
-            "9": 2394265600.0,
-            "10": 2394265600.0,
-            "11": 2394265600.0,
-            "12": 2394265600.0,
-            "13": 2394265600.0,
-            "14": 2394265600.0,
-            "15": 2394265600.0,
-            "16": 2394265600.0,
-            "17": 2394265600.0,
-            "18": 2675191296.0,
-            "19": 2675191296.0,
-            "20": 2675191296.0,
-            "21": 2675191296.0,
-            "22": 2675191296.0,
-            "23": 2675191296.0,
-            "24": 2675191296.0,
-            "25": 2675191296.0,
-            "26": 2675191296.0,
-            "27": 2675191296.0,
-            "28": 2675191296.0,
-            "29": 2675191296.0,
-            "30": 2675191296.0,
-            "31": 2675191296.0,
-            "32": 2675191296.0,
-            "33": 2675191296.0,
-            "34": 2675191296.0,
-            "35": 2675191296.0,
-            "36": 2675191296.0,
-            "37": 2675191296.0,
-            "38": 2675191296.0,
-            "39": 2675191296.0,
-            "40": 2675191296.0,
-            "41": 2675191296.0,
-            "42": 2675191296.0,
-            "43": 2675191296.0,
-            "44": 2675191296.0,
-            "45": 2675191296.0,
-            "46": 2675191296.0,
-            "47": 2675191296.0,
-            "48": 2675191296.0,
-            "49": 2675191296.0,
-            "50": 2675191296.0,
-            "51": 2675191296.0,
-            "52": 2675191296.0,
-            "53": 2675191296.0,
-            "54": 2675191296.0,
-            "55": 2675191296.0,
-            "56": 2675191296.0,
-            "57": 2675191296.0,
-            "58": 2675191296.0,
-            "59": 2675191296.0,
-            "60": 2675191296.0,
-            "61": 2675191296.0,
-            "62": 2675191296.0,
-            "63": 2675191296.0,
-            "64": 2675191296.0,
-            "65": 2675191296.0,
-            "66": 2675191296.0,
-            "67": 2675191296.0,
-            "68": 2675191296.0,
-            "69": 2675191296.0,
-            "70": 2675191296.0,
-            "71": 2675191296.0,
-            "72": 2675191296.0,
-            "73": 2675191296.0,
-            "74": 2675191296.0,
-            "75": 2675191296.0,
-            "76": 2675191296.0,
-            "77": 2675191296.0,
-            "78": 2675191296.0,
-            "79": 2675191296.0,
-            "80": 2675191296.0,
-            "81": 2675191296.0,
-            "82": 2675191296.0,
-            "83": 2675191296.0,
-            "84": 2675191296.0,
-            "85": 2675191296.0,
-            "86": 2675191296.0,
-            "87": 2675191296.0,
-            "88": 2675191296.0,
-            "89": 2675191296.0,
-            "90": 2675191296.0,
-            "91": 2675191296.0,
-            "92": 2675191296.0,
-            "93": 2675191296.0,
-            "94": 2675191296.0,
-            "95": 2675191296.0,
-            "96": 2675191296.0,
-            "97": 2675191296.0,
-            "98": 2675191296.0,
-            "99": 2675191296.0,
-            "100": 2675191296.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 7.94598,
-            "2": 0.18624,
-            "3": 0.25859,
-            "4": 0.15175,
-            "5": 0.15224,
-            "6": 0.15109,
-            "7": 0.15137,
-            "8": 0.1508,
-            "9": 0.1517,
-            "10": 0.15243,
-            "11": 0.15119,
-            "12": 0.15082,
-            "13": 0.15199,
-            "14": 0.1522,
-            "15": 0.15257,
-            "16": 0.15242,
-            "17": 0.21211,
-            "18": 0.16275,
-            "19": 0.15814,
-            "20": 0.15774,
-            "21": 0.15784,
-            "22": 0.15701,
-            "23": 0.15663,
-            "24": 0.15702,
-            "25": 0.15682,
-            "26": 0.15759,
-            "27": 0.15671,
-            "28": 0.15664,
-            "29": 0.15666,
-            "30": 0.15733,
-            "31": 0.15729,
-            "32": 0.15598,
-            "33": 0.1563,
-            "34": 0.15661,
-            "35": 0.15747,
-            "36": 0.15621,
-            "37": 0.15693,
-            "38": 0.15652,
-            "39": 0.15686,
-            "40": 0.15725,
-            "41": 0.15693,
-            "42": 0.1567,
-            "43": 0.15752,
-            "44": 0.15682,
-            "45": 0.15685,
-            "46": 0.15675,
-            "47": 0.15696,
-            "48": 0.15702,
-            "49": 0.15714,
-            "50": 0.15648,
-            "51": 0.1656,
-            "52": 0.15563,
-            "53": 0.1545,
-            "54": 0.15554,
-            "55": 0.15521,
-            "56": 0.15534,
-            "57": 0.15463,
-            "58": 0.15657,
-            "59": 0.15534,
-            "60": 0.15488,
-            "61": 0.15475,
-            "62": 0.15571,
-            "63": 0.15676,
-            "64": 0.15647,
-            "65": 0.1567,
-            "66": 0.15541,
-            "67": 0.15585,
-            "68": 0.15577,
-            "69": 0.15571,
-            "70": 0.15462,
-            "71": 0.15484,
-            "72": 0.15532,
-            "73": 0.15732,
-            "74": 0.15634,
-            "75": 0.15545,
-            "76": 0.1569,
-            "77": 0.15472,
-            "78": 0.15486,
-            "79": 0.15517,
-            "80": 0.15532,
-            "81": 0.15621,
-            "82": 0.15556,
-            "83": 0.15556,
-            "84": 0.15546,
-            "85": 0.15484,
-            "86": 0.15503,
-            "87": 0.15496,
-            "88": 0.15534,
-            "89": 0.15573,
-            "90": 0.15449,
-            "91": 0.15665,
-            "92": 0.15529,
-            "93": 0.15627,
-            "94": 0.15525,
-            "95": 0.15483,
-            "96": 0.15509,
-            "97": 0.15618,
-            "98": 0.15569,
-            "99": 0.15482,
-            "100": 0.15516
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": "nan",
-            "2": "nan",
-            "3": "nan",
-            "4": "nan",
-            "5": "nan",
-            "6": "nan",
-            "7": "nan",
-            "8": "nan",
-            "9": "nan",
-            "10": "nan",
-            "11": "nan",
-            "12": "nan",
-            "13": "nan",
-            "14": "nan",
-            "15": "nan",
-            "16": "nan",
-            "17": 2437.0,
-            "18": 2405.0,
-            "19": 2950.0,
-            "20": 1827.0,
-            "21": 2154.0,
-            "22": 2731.0,
-            "23": 2609.0,
-            "24": 2290.0,
-            "25": 2325.0,
-            "26": 2079.0,
-            "27": 2138.0,
-            "28": 2702.0,
-            "29": 2576.0,
-            "30": 2528.0,
-            "31": 1895.0,
-            "32": 2628.0,
-            "33": 2325.0,
-            "34": 1928.0,
-            "35": 2061.0,
-            "36": 2153.0,
-            "37": 2600.0,
-            "38": 2350.0,
-            "39": 2997.0,
-            "40": 2053.0,
-            "41": 3352.0,
-            "42": 2497.0,
-            "43": 2867.0,
-            "44": 2109.0,
-            "45": 2490.0,
-            "46": 2279.0,
-            "47": 3051.0,
-            "48": 2527.0,
-            "49": 1973.0,
-            "50": 2887.0,
-            "51": 2310.0,
-            "52": 2526.0,
-            "53": 3705.0,
-            "54": 2888.0,
-            "55": 2440.0,
-            "56": 2496.0,
-            "57": 2338.0,
-            "58": 3283.0,
-            "59": 2849.0,
-            "60": 2893.0,
-            "61": 2956.0,
-            "62": 3134.0,
-            "63": 3275.0,
-            "64": 3176.0,
-            "65": 2318.0,
-            "66": 3857.0,
-            "67": 2606.0,
-            "68": 3313.0,
-            "69": 2826.0,
-            "70": 3665.0,
-            "71": 3011.0,
-            "72": 2693.0,
-            "73": 3357.0,
-            "74": 2271.0,
-            "75": 2955.0,
-            "76": 3617.0,
-            "77": 3936.0,
-            "78": 3951.0,
-            "79": 4065.0,
-            "80": 3665.0,
-            "81": 5191.0,
-            "82": 3511.0,
-            "83": 3263.0,
-            "84": 3876.0,
-            "85": 4048.0,
-            "86": 3414.0,
-            "87": 3980.0,
-            "88": 3617.0,
-            "89": 4400.0,
-            "90": 3695.0,
-            "91": 2857.0,
-            "92": 4432.0,
-            "93": 3494.0,
-            "94": 4438.0,
-            "95": 4076.0,
-            "96": 3948.0,
-            "97": 4242.0,
-            "98": 4943.0,
-            "99": 3861.0,
-            "100": 3631.0
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/golden_values_lts_dgx_a100.json
index 3b0a03dc6ef..a8c7cb012b6 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/golden_values_lts_dgx_a100.json
@@ -1,537 +1 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.79449,
-            "2": 10.80656,
-            "3": 10.80727,
-            "4": 10.77389,
-            "5": 10.84829,
-            "6": 10.86736,
-            "7": 10.82922,
-            "8": 10.81537,
-            "9": 10.83956,
-            "10": 10.77652,
-            "11": 10.90107,
-            "12": 10.85927,
-            "13": 10.87396,
-            "14": 10.89723,
-            "15": 10.83961,
-            "16": 10.83508,
-            "17": 10.82101,
-            "18": 10.86029,
-            "19": 10.86558,
-            "20": 10.82896,
-            "21": 10.83275,
-            "22": 10.75286,
-            "23": 10.88062,
-            "24": 10.78219,
-            "25": 10.76607,
-            "26": 10.79522,
-            "27": 10.79866,
-            "28": 10.81697,
-            "29": 10.82169,
-            "30": 10.69891,
-            "31": 10.55698,
-            "32": 10.75759,
-            "33": 10.74362,
-            "34": 10.59976,
-            "35": 10.61772,
-            "36": 10.56389,
-            "37": 10.63614,
-            "38": 10.53029,
-            "39": 10.65358,
-            "40": 10.44072,
-            "41": 10.49636,
-            "42": 10.50954,
-            "43": 10.22362,
-            "44": 10.30902,
-            "45": 10.21065,
-            "46": 10.19943,
-            "47": 10.41641,
-            "48": 10.18128,
-            "49": 9.94311,
-            "50": 10.21224,
-            "51": 10.16759,
-            "52": 10.06895,
-            "53": 10.30707,
-            "54": 10.20911,
-            "55": 10.15688,
-            "56": 9.91474,
-            "57": 9.77696,
-            "58": 10.07417,
-            "59": 9.86333,
-            "60": 9.77328,
-            "61": 9.9292,
-            "62": 10.17156,
-            "63": 9.62041,
-            "64": 9.97113,
-            "65": 9.21979,
-            "66": 9.88693,
-            "67": 9.58363,
-            "68": 9.94922,
-            "69": 9.95271,
-            "70": 9.89312,
-            "71": 9.77658,
-            "72": 9.75435,
-            "73": 9.6497,
-            "74": 9.1439,
-            "75": 9.56121,
-            "76": 9.25111,
-            "77": 10.17063,
-            "78": 9.85402,
-            "79": 9.49965,
-            "80": 9.53086,
-            "81": 9.60555,
-            "82": 9.80179,
-            "83": 9.43744,
-            "84": 9.51987,
-            "85": 9.7196,
-            "86": 9.18595,
-            "87": 9.68687,
-            "88": 9.8443,
-            "89": 9.70586,
-            "90": 9.89977,
-            "91": 9.45029,
-            "92": 9.45356,
-            "93": 9.18554,
-            "94": 8.92968,
-            "95": 9.59767,
-            "96": 9.61491,
-            "97": 9.39084,
-            "98": 9.75667,
-            "99": 8.97921,
-            "100": 9.49001
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 570640384.0,
-            "2": 570640384.0,
-            "3": 570640384.0,
-            "4": 570640384.0,
-            "5": 570640384.0,
-            "6": 570640384.0,
-            "7": 570640384.0,
-            "8": 570640384.0,
-            "9": 570640384.0,
-            "10": 570640384.0,
-            "11": 570640384.0,
-            "12": 570640384.0,
-            "13": 570640384.0,
-            "14": 570640384.0,
-            "15": 570640384.0,
-            "16": 570640384.0,
-            "17": 852351488.0,
-            "18": 852351488.0,
-            "19": 852351488.0,
-            "20": 852351488.0,
-            "21": 852351488.0,
-            "22": 852351488.0,
-            "23": 852351488.0,
-            "24": 852351488.0,
-            "25": 852351488.0,
-            "26": 852351488.0,
-            "27": 852351488.0,
-            "28": 852351488.0,
-            "29": 852351488.0,
-            "30": 852351488.0,
-            "31": 852351488.0,
-            "32": 852351488.0,
-            "33": 852351488.0,
-            "34": 852351488.0,
-            "35": 852351488.0,
-            "36": 852351488.0,
-            "37": 852351488.0,
-            "38": 852351488.0,
-            "39": 852351488.0,
-            "40": 852351488.0,
-            "41": 852351488.0,
-            "42": 852351488.0,
-            "43": 852351488.0,
-            "44": 852351488.0,
-            "45": 852351488.0,
-            "46": 852351488.0,
-            "47": 852351488.0,
-            "48": 852351488.0,
-            "49": 852351488.0,
-            "50": 852351488.0,
-            "51": 852351488.0,
-            "52": 852351488.0,
-            "53": 852351488.0,
-            "54": 852351488.0,
-            "55": 852351488.0,
-            "56": 852351488.0,
-            "57": 852351488.0,
-            "58": 852351488.0,
-            "59": 852351488.0,
-            "60": 852351488.0,
-            "61": 852351488.0,
-            "62": 852351488.0,
-            "63": 852351488.0,
-            "64": 852351488.0,
-            "65": 852351488.0,
-            "66": 852351488.0,
-            "67": 852351488.0,
-            "68": 852351488.0,
-            "69": 852351488.0,
-            "70": 852351488.0,
-            "71": 852351488.0,
-            "72": 852351488.0,
-            "73": 852351488.0,
-            "74": 852351488.0,
-            "75": 852351488.0,
-            "76": 852351488.0,
-            "77": 852351488.0,
-            "78": 852351488.0,
-            "79": 852351488.0,
-            "80": 852351488.0,
-            "81": 852351488.0,
-            "82": 852351488.0,
-            "83": 852351488.0,
-            "84": 852351488.0,
-            "85": 852351488.0,
-            "86": 852351488.0,
-            "87": 852351488.0,
-            "88": 852351488.0,
-            "89": 852351488.0,
-            "90": 852351488.0,
-            "91": 852351488.0,
-            "92": 852351488.0,
-            "93": 852351488.0,
-            "94": 852351488.0,
-            "95": 852351488.0,
-            "96": 852351488.0,
-            "97": 852351488.0,
-            "98": 852351488.0,
-            "99": 852351488.0,
-            "100": 852351488.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 2393217536.0,
-            "2": 2393218048.0,
-            "3": 2393218048.0,
-            "4": 2394266624.0,
-            "5": 2394266624.0,
-            "6": 2394266624.0,
-            "7": 2394266624.0,
-            "8": 2394266624.0,
-            "9": 2394266624.0,
-            "10": 2394266624.0,
-            "11": 2394266624.0,
-            "12": 2394266624.0,
-            "13": 2394266624.0,
-            "14": 2394266624.0,
-            "15": 2394266624.0,
-            "16": 2394266624.0,
-            "17": 2394266624.0,
-            "18": 2675191296.0,
-            "19": 2675191296.0,
-            "20": 2675191296.0,
-            "21": 2675191296.0,
-            "22": 2675191296.0,
-            "23": 2675191296.0,
-            "24": 2675191296.0,
-            "25": 2675191296.0,
-            "26": 2675191296.0,
-            "27": 2675191296.0,
-            "28": 2675191296.0,
-            "29": 2675191296.0,
-            "30": 2675191296.0,
-            "31": 2675191296.0,
-            "32": 2675191296.0,
-            "33": 2675191296.0,
-            "34": 2675191296.0,
-            "35": 2675191296.0,
-            "36": 2675191296.0,
-            "37": 2675191296.0,
-            "38": 2675191296.0,
-            "39": 2675191296.0,
-            "40": 2675191296.0,
-            "41": 2675191296.0,
-            "42": 2675191296.0,
-            "43": 2675191296.0,
-            "44": 2675191296.0,
-            "45": 2675191296.0,
-            "46": 2675191296.0,
-            "47": 2675191296.0,
-            "48": 2675191296.0,
-            "49": 2675191296.0,
-            "50": 2675191296.0,
-            "51": 2675191296.0,
-            "52": 2675191296.0,
-            "53": 2675191296.0,
-            "54": 2675191296.0,
-            "55": 2675191296.0,
-            "56": 2675191296.0,
-            "57": 2675191296.0,
-            "58": 2675191296.0,
-            "59": 2675191296.0,
-            "60": 2675191296.0,
-            "61": 2675191296.0,
-            "62": 2675191296.0,
-            "63": 2675191296.0,
-            "64": 2675191296.0,
-            "65": 2675191296.0,
-            "66": 2675191296.0,
-            "67": 2675191296.0,
-            "68": 2675191296.0,
-            "69": 2675191296.0,
-            "70": 2675191296.0,
-            "71": 2675191296.0,
-            "72": 2675191296.0,
-            "73": 2675191296.0,
-            "74": 2675191296.0,
-            "75": 2675191296.0,
-            "76": 2675191296.0,
-            "77": 2675191296.0,
-            "78": 2675191296.0,
-            "79": 2675191296.0,
-            "80": 2675191296.0,
-            "81": 2675191296.0,
-            "82": 2675191296.0,
-            "83": 2675191296.0,
-            "84": 2675191296.0,
-            "85": 2675191296.0,
-            "86": 2675191296.0,
-            "87": 2675191296.0,
-            "88": 2675191296.0,
-            "89": 2675191296.0,
-            "90": 2675191296.0,
-            "91": 2675191296.0,
-            "92": 2675191296.0,
-            "93": 2675191296.0,
-            "94": 2675191296.0,
-            "95": 2675191296.0,
-            "96": 2675191296.0,
-            "97": 2675191296.0,
-            "98": 2675191296.0,
-            "99": 2675191296.0,
-            "100": 2675191296.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.58025,
-            "2": 0.18555,
-            "3": 0.31194,
-            "4": 0.1522,
-            "5": 0.15205,
-            "6": 0.1496,
-            "7": 0.14979,
-            "8": 0.14921,
-            "9": 0.14957,
-            "10": 0.15024,
-            "11": 0.14887,
-            "12": 0.14852,
-            "13": 0.14925,
-            "14": 0.15079,
-            "15": 0.14925,
-            "16": 0.14936,
-            "17": 0.2057,
-            "18": 0.15996,
-            "19": 0.15397,
-            "20": 0.15414,
-            "21": 0.1543,
-            "22": 0.15499,
-            "23": 0.15504,
-            "24": 0.15679,
-            "25": 0.15462,
-            "26": 0.15509,
-            "27": 0.15394,
-            "28": 0.15487,
-            "29": 0.15522,
-            "30": 0.1553,
-            "31": 0.15536,
-            "32": 0.15406,
-            "33": 0.15461,
-            "34": 0.1548,
-            "35": 0.15472,
-            "36": 0.15413,
-            "37": 0.1548,
-            "38": 0.15446,
-            "39": 0.15545,
-            "40": 0.15442,
-            "41": 0.15567,
-            "42": 0.15413,
-            "43": 0.15585,
-            "44": 0.15428,
-            "45": 0.15497,
-            "46": 0.15438,
-            "47": 0.15508,
-            "48": 0.15481,
-            "49": 0.15466,
-            "50": 0.15476,
-            "51": 0.16245,
-            "52": 0.15411,
-            "53": 0.15376,
-            "54": 0.15405,
-            "55": 0.15375,
-            "56": 0.15402,
-            "57": 0.15434,
-            "58": 0.15404,
-            "59": 0.15454,
-            "60": 0.15434,
-            "61": 0.15384,
-            "62": 0.15505,
-            "63": 0.15431,
-            "64": 0.15388,
-            "65": 0.1547,
-            "66": 0.15453,
-            "67": 0.15364,
-            "68": 0.15388,
-            "69": 0.15362,
-            "70": 0.15366,
-            "71": 0.15425,
-            "72": 0.15393,
-            "73": 0.15476,
-            "74": 0.15414,
-            "75": 0.15415,
-            "76": 0.1535,
-            "77": 0.15481,
-            "78": 0.1541,
-            "79": 0.15382,
-            "80": 0.15363,
-            "81": 0.15386,
-            "82": 0.18555,
-            "83": 0.15422,
-            "84": 0.15393,
-            "85": 0.15462,
-            "86": 0.15512,
-            "87": 0.15391,
-            "88": 0.15431,
-            "89": 0.15431,
-            "90": 0.15521,
-            "91": 0.15475,
-            "92": 0.154,
-            "93": 0.15414,
-            "94": 0.15426,
-            "95": 0.15422,
-            "96": 0.15393,
-            "97": 0.15497,
-            "98": 0.1538,
-            "99": 0.15481,
-            "100": 0.15442
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": "nan",
-            "2": "nan",
-            "3": "nan",
-            "4": "nan",
-            "5": "nan",
-            "6": "nan",
-            "7": "nan",
-            "8": "nan",
-            "9": "nan",
-            "10": "nan",
-            "11": "nan",
-            "12": "nan",
-            "13": "nan",
-            "14": "nan",
-            "15": "nan",
-            "16": "nan",
-            "17": 2437.0,
-            "18": 2405.0,
-            "19": 2950.0,
-            "20": 1827.0,
-            "21": 2154.0,
-            "22": 2731.0,
-            "23": 2609.0,
-            "24": 2290.0,
-            "25": 2325.0,
-            "26": 2079.0,
-            "27": 2138.0,
-            "28": 2702.0,
-            "29": 2576.0,
-            "30": 2528.0,
-            "31": 1895.0,
-            "32": 2628.0,
-            "33": 2325.0,
-            "34": 1928.0,
-            "35": 2061.0,
-            "36": 2153.0,
-            "37": 2600.0,
-            "38": 2350.0,
-            "39": 2997.0,
-            "40": 2053.0,
-            "41": 3352.0,
-            "42": 2497.0,
-            "43": 2867.0,
-            "44": 2109.0,
-            "45": 2490.0,
-            "46": 2279.0,
-            "47": 3051.0,
-            "48": 2527.0,
-            "49": 1973.0,
-            "50": 2887.0,
-            "51": 2310.0,
-            "52": 2526.0,
-            "53": 3705.0,
-            "54": 2888.0,
-            "55": 2440.0,
-            "56": 2496.0,
-            "57": 2338.0,
-            "58": 3283.0,
-            "59": 2849.0,
-            "60": 2893.0,
-            "61": 2956.0,
-            "62": 3134.0,
-            "63": 3275.0,
-            "64": 3176.0,
-            "65": 2318.0,
-            "66": 3857.0,
-            "67": 2606.0,
-            "68": 3313.0,
-            "69": 2826.0,
-            "70": 3665.0,
-            "71": 3011.0,
-            "72": 2693.0,
-            "73": 3357.0,
-            "74": 2271.0,
-            "75": 2955.0,
-            "76": 3617.0,
-            "77": 3936.0,
-            "78": 3951.0,
-            "79": 4065.0,
-            "80": 3665.0,
-            "81": 5191.0,
-            "82": 3511.0,
-            "83": 3263.0,
-            "84": 3876.0,
-            "85": 4048.0,
-            "86": 3414.0,
-            "87": 3980.0,
-            "88": 3617.0,
-            "89": 4400.0,
-            "90": 3695.0,
-            "91": 2857.0,
-            "92": 4432.0,
-            "93": 3494.0,
-            "94": 4438.0,
-            "95": 4076.0,
-            "96": 3948.0,
-            "97": 4242.0,
-            "98": 4943.0,
-            "99": 3861.0,
-            "100": 3631.0
-        }
-    }
-}
\ No newline at end of file
+{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.79219, "5": 10.84727, "10": 10.77729, "15": 10.84106, "20": 10.82889, "25": 10.7666, "30": 10.69961, "35": 10.61845, "40": 10.44051, "45": 10.20859, "50": 10.21168, "55": 10.15675, "60": 9.77265, "65": 9.22128, "70": 9.89371, "75": 9.56098, "80": 9.5311, "85": 9.71911, "90": 9.89983, "95": 9.59785, "100": 9.49009}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 570640384.0, "5": 570640384.0, "10": 570640384.0, "15": 570640384.0, "20": 852351488.0, "25": 852351488.0, "30": 852351488.0, "35": 852351488.0, "40": 852351488.0, "45": 852351488.0, "50": 852351488.0, "55": 852351488.0, "60": 852351488.0, "65": 852351488.0, "70": 852351488.0, "75": 852351488.0, "80": 852351488.0, "85": 852351488.0, "90": 852351488.0, "95": 852351488.0, "100": 852351488.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 2393217536.0, "5": 2393218048.0, "10": 2393218048.0, "15": 2393218048.0, "20": 2675191296.0, "25": 2675191296.0, "30": 2675191296.0, "35": 2675191296.0, "40": 2675191296.0, "45": 2675191296.0, "50": 2675191296.0, "55": 2675191296.0, "60": 2675191296.0, "65": 2675191296.0, "70": 2675191296.0, "75": 2675191296.0, "80": 2675191296.0, "85": 2675191296.0, "90": 2675191296.0, "95": 2675191296.0, "100": 2675191296.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 20.32613, "5": 0.16735, "10": 0.16973, "15": 0.16584, "20": 0.17045, "25": 0.17023, "30": 0.16985, "35": 0.17228, "40": 0.17914, "45": 0.17032, "50": 0.17169, "55": 0.16858, "60": 0.16807, "65": 0.16897, "70": 0.16857, "75": 0.16759, "80": 0.16701, "85": 0.16722, "90": 0.16741, "95": 0.16829, "100": 0.16715}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": "nan", "5": "nan", "10": "nan", "15": "nan", "20": 1788.0, "25": 2300.0, "30": 2419.0, "35": 2097.0, "40": 2126.0, "45": 2520.0, "50": 2929.0, "55": 2457.0, "60": 2961.0, "65": 2359.0, "70": 3633.0, "75": 2932.0, "80": 3595.0, "85": 3959.0, "90": 3785.0, "95": 4072.0, "100": 3581.0}}}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_lts.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_lts.json
deleted file mode 100644
index 646419f3d75..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_lts.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.8595,
-            "2": 10.8576,
-            "3": 10.85984,
-            "4": 10.85001,
-            "5": 10.88364,
-            "6": 10.88818,
-            "7": 10.86353,
-            "8": 10.87179,
-            "9": 10.87022,
-            "10": 10.845,
-            "11": 10.88619,
-            "12": 10.88637,
-            "13": 10.89401,
-            "14": 10.9025,
-            "15": 10.88061,
-            "16": 10.88557,
-            "17": 10.86363,
-            "18": 10.88903,
-            "19": 10.87964,
-            "20": 10.87344,
-            "21": 10.8919,
-            "22": 10.83039,
-            "23": 10.89392,
-            "24": 10.8602,
-            "25": 10.83015,
-            "26": 10.82911,
-            "27": 10.8537,
-            "28": 10.84349,
-            "29": 10.85387,
-            "30": 10.77206,
-            "31": 10.672,
-            "32": 10.78864,
-            "33": 10.77618,
-            "34": 10.67579,
-            "35": 10.67298,
-            "36": 10.63381,
-            "37": 10.69227,
-            "38": 10.60773,
-            "39": 10.70463,
-            "40": 10.5185,
-            "41": 10.5453,
-            "42": 10.56937,
-            "43": 10.32495,
-            "44": 10.3911,
-            "45": 10.28431,
-            "46": 10.2732,
-            "47": 10.48175,
-            "48": 10.25374,
-            "49": 10.01592,
-            "50": 10.27755,
-            "51": 10.21727,
-            "52": 10.1271,
-            "53": 10.36018,
-            "54": 10.25981,
-            "55": 10.20104,
-            "56": 9.98213,
-            "57": 9.84717,
-            "58": 10.12257,
-            "59": 9.90914,
-            "60": 9.83288,
-            "61": 9.9713,
-            "62": 10.22005,
-            "63": 9.67481,
-            "64": 10.01706,
-            "65": 9.27085,
-            "66": 9.93979,
-            "67": 9.62899,
-            "68": 9.98681,
-            "69": 9.9839,
-            "70": 9.92559,
-            "71": 9.81011,
-            "72": 9.79196,
-            "73": 9.68163,
-            "74": 9.17945,
-            "75": 9.61324,
-            "76": 9.28951,
-            "77": 10.19435,
-            "78": 9.8755,
-            "79": 9.5297,
-            "80": 9.56593,
-            "81": 9.63478,
-            "82": 9.82295,
-            "83": 9.47164,
-            "84": 9.54623,
-            "85": 9.74358,
-            "86": 9.20093,
-            "87": 9.70179,
-            "88": 9.86553,
-            "89": 9.73045,
-            "90": 9.92108,
-            "91": 9.48732,
-            "92": 9.47637,
-            "93": 9.21283,
-            "94": 8.94903,
-            "95": 9.6165,
-            "96": 9.63374,
-            "97": 9.41244,
-            "98": 9.7751,
-            "99": 9.00191,
-            "100": 9.50967
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 284527616.0,
-            "2": 284527616.0,
-            "3": 284527616.0,
-            "4": 284527616.0,
-            "5": 284527616.0,
-            "6": 284527616.0,
-            "7": 284527616.0,
-            "8": 284527616.0,
-            "9": 284527616.0,
-            "10": 284527616.0,
-            "11": 284527616.0,
-            "12": 284527616.0,
-            "13": 284527616.0,
-            "14": 284527616.0,
-            "15": 284527616.0,
-            "16": 416513536.0,
-            "17": 416513536.0,
-            "18": 416513536.0,
-            "19": 416513536.0,
-            "20": 416513536.0,
-            "21": 416513536.0,
-            "22": 416513536.0,
-            "23": 416513536.0,
-            "24": 416513536.0,
-            "25": 416513536.0,
-            "26": 416513536.0,
-            "27": 416513536.0,
-            "28": 416513536.0,
-            "29": 416513536.0,
-            "30": 416513536.0,
-            "31": 416513536.0,
-            "32": 416513536.0,
-            "33": 416513536.0,
-            "34": 416513536.0,
-            "35": 416513536.0,
-            "36": 416513536.0,
-            "37": 416513536.0,
-            "38": 416513536.0,
-            "39": 416513536.0,
-            "40": 416513536.0,
-            "41": 416513536.0,
-            "42": 416513536.0,
-            "43": 416513536.0,
-            "44": 416513536.0,
-            "45": 416513536.0,
-            "46": 416513536.0,
-            "47": 416513536.0,
-            "48": 416513536.0,
-            "49": 416513536.0,
-            "50": 416513536.0,
-            "51": 416513536.0,
-            "52": 416513536.0,
-            "53": 416513536.0,
-            "54": 416513536.0,
-            "55": 416513536.0,
-            "56": 416513536.0,
-            "57": 416513536.0,
-            "58": 416513536.0,
-            "59": 416513536.0,
-            "60": 416513536.0,
-            "61": 416513536.0,
-            "62": 416513536.0,
-            "63": 416513536.0,
-            "64": 416513536.0,
-            "65": 416513536.0,
-            "66": 416513536.0,
-            "67": 416513536.0,
-            "68": 416513536.0,
-            "69": 416513536.0,
-            "70": 416513536.0,
-            "71": 416513536.0,
-            "72": 416513536.0,
-            "73": 416513536.0,
-            "74": 416513536.0,
-            "75": 416513536.0,
-            "76": 416513536.0,
-            "77": 416513536.0,
-            "78": 416513536.0,
-            "79": 416513536.0,
-            "80": 416513536.0,
-            "81": 416513536.0,
-            "82": 416513536.0,
-            "83": 416513536.0,
-            "84": 416513536.0,
-            "85": 416513536.0,
-            "86": 416513536.0,
-            "87": 416513536.0,
-            "88": 416513536.0,
-            "89": 416513536.0,
-            "90": 416513536.0,
-            "91": 416513536.0,
-            "92": 416513536.0,
-            "93": 416513536.0,
-            "94": 416513536.0,
-            "95": 416513536.0,
-            "96": 416513536.0,
-            "97": 416513536.0,
-            "98": 416513536.0,
-            "99": 416513536.0,
-            "100": 416513536.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1465367040.0,
-            "2": 1465367040.0,
-            "3": 1465367040.0,
-            "4": 1465367040.0,
-            "5": 1465367040.0,
-            "6": 1465367040.0,
-            "7": 1465368064.0,
-            "8": 1465368064.0,
-            "9": 1465368064.0,
-            "10": 1465368064.0,
-            "11": 1465368064.0,
-            "12": 1465368064.0,
-            "13": 1465368064.0,
-            "14": 1465368064.0,
-            "15": 1465368064.0,
-            "16": 1465368064.0,
-            "17": 1597485568.0,
-            "18": 1597485568.0,
-            "19": 1597485568.0,
-            "20": 1597485568.0,
-            "21": 1597485568.0,
-            "22": 1597485568.0,
-            "23": 1597485568.0,
-            "24": 1597485568.0,
-            "25": 1597485568.0,
-            "26": 1597485568.0,
-            "27": 1597485568.0,
-            "28": 1597485568.0,
-            "29": 1597485568.0,
-            "30": 1597485568.0,
-            "31": 1597485568.0,
-            "32": 1597485568.0,
-            "33": 1597485568.0,
-            "34": 1597485568.0,
-            "35": 1597485568.0,
-            "36": 1597485568.0,
-            "37": 1597485568.0,
-            "38": 1597485568.0,
-            "39": 1597485568.0,
-            "40": 1597485568.0,
-            "41": 1597485568.0,
-            "42": 1597485568.0,
-            "43": 1597485568.0,
-            "44": 1597485568.0,
-            "45": 1597485568.0,
-            "46": 1597485568.0,
-            "47": 1597485568.0,
-            "48": 1597485568.0,
-            "49": 1597485568.0,
-            "50": 1597485568.0,
-            "51": 1597485568.0,
-            "52": 1597485568.0,
-            "53": 1597485568.0,
-            "54": 1597485568.0,
-            "55": 1597485568.0,
-            "56": 1597485568.0,
-            "57": 1597485568.0,
-            "58": 1597485568.0,
-            "59": 1597485568.0,
-            "60": 1597485568.0,
-            "61": 1597485568.0,
-            "62": 1597485568.0,
-            "63": 1597485568.0,
-            "64": 1597485568.0,
-            "65": 1597485568.0,
-            "66": 1597485568.0,
-            "67": 1597485568.0,
-            "68": 1597485568.0,
-            "69": 1597485568.0,
-            "70": 1597485568.0,
-            "71": 1597485568.0,
-            "72": 1597485568.0,
-            "73": 1597485568.0,
-            "74": 1597485568.0,
-            "75": 1597485568.0,
-            "76": 1597485568.0,
-            "77": 1597485568.0,
-            "78": 1597485568.0,
-            "79": 1597485568.0,
-            "80": 1597485568.0,
-            "81": 1597485568.0,
-            "82": 1597485568.0,
-            "83": 1597485568.0,
-            "84": 1597485568.0,
-            "85": 1597485568.0,
-            "86": 1597485568.0,
-            "87": 1597485568.0,
-            "88": 1597485568.0,
-            "89": 1597485568.0,
-            "90": 1597485568.0,
-            "91": 1597485568.0,
-            "92": 1597485568.0,
-            "93": 1597485568.0,
-            "94": 1597485568.0,
-            "95": 1597485568.0,
-            "96": 1597485568.0,
-            "97": 1597485568.0,
-            "98": 1597485568.0,
-            "99": 1597485568.0,
-            "100": 1597485568.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 3.93622,
-            "2": 0.31349,
-            "3": 0.28163,
-            "4": 0.27921,
-            "5": 0.27918,
-            "6": 0.27813,
-            "7": 0.27823,
-            "8": 0.27889,
-            "9": 0.27839,
-            "10": 0.2785,
-            "11": 0.27567,
-            "12": 0.27474,
-            "13": 0.2746,
-            "14": 0.27424,
-            "15": 0.27387,
-            "16": 0.4724,
-            "17": 0.27409,
-            "18": 0.28601,
-            "19": 0.28651,
-            "20": 0.28586,
-            "21": 0.29036,
-            "22": 0.27749,
-            "23": 0.29042,
-            "24": 0.29143,
-            "25": 0.29191,
-            "26": 0.29136,
-            "27": 0.29223,
-            "28": 0.29253,
-            "29": 0.29127,
-            "30": 0.29158,
-            "31": 0.29283,
-            "32": 0.29224,
-            "33": 0.29404,
-            "34": 0.29214,
-            "35": 0.29125,
-            "36": 0.29141,
-            "37": 0.29119,
-            "38": 0.29131,
-            "39": 0.29195,
-            "40": 0.29134,
-            "41": 0.29104,
-            "42": 0.2909,
-            "43": 0.29202,
-            "44": 0.29149,
-            "45": 0.29285,
-            "46": 0.29117,
-            "47": 0.29278,
-            "48": 0.29328,
-            "49": 0.2921,
-            "50": 0.29144,
-            "51": 0.2946,
-            "52": 0.29041,
-            "53": 0.28952,
-            "54": 0.28982,
-            "55": 0.28907,
-            "56": 0.28812,
-            "57": 0.2882,
-            "58": 0.29098,
-            "59": 0.29394,
-            "60": 0.29381,
-            "61": 0.29366,
-            "62": 0.29495,
-            "63": 0.29248,
-            "64": 0.29429,
-            "65": 0.29369,
-            "66": 0.29315,
-            "67": 0.29284,
-            "68": 0.2941,
-            "69": 0.29349,
-            "70": 0.29458,
-            "71": 0.29385,
-            "72": 0.29691,
-            "73": 0.29214,
-            "74": 0.292,
-            "75": 0.29157,
-            "76": 0.29169,
-            "77": 0.29162,
-            "78": 0.2922,
-            "79": 0.29243,
-            "80": 0.29113,
-            "81": 0.2927,
-            "82": 0.29109,
-            "83": 0.29115,
-            "84": 0.29064,
-            "85": 0.29302,
-            "86": 0.29203,
-            "87": 0.29135,
-            "88": 0.29306,
-            "89": 0.29406,
-            "90": 0.29439,
-            "91": 0.29348,
-            "92": 0.30393,
-            "93": 0.29287,
-            "94": 0.29232,
-            "95": 0.29711,
-            "96": 0.29103,
-            "97": 0.29472,
-            "98": 0.29334,
-            "99": 0.29389,
-            "100": 0.29184
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": "nan",
-            "2": "nan",
-            "3": "nan",
-            "4": "nan",
-            "5": "nan",
-            "6": "nan",
-            "7": "nan",
-            "8": "nan",
-            "9": "nan",
-            "10": "nan",
-            "11": "nan",
-            "12": "nan",
-            "13": "nan",
-            "14": "nan",
-            "15": "nan",
-            "16": 2308.0,
-            "17": "nan",
-            "18": 2336.0,
-            "19": 2843.0,
-            "20": 1690.0,
-            "21": 2021.0,
-            "22": "nan",
-            "23": 2506.0,
-            "24": 2095.0,
-            "25": 2035.0,
-            "26": 1875.0,
-            "27": 2000.0,
-            "28": 2435.0,
-            "29": 2512.0,
-            "30": 2279.0,
-            "31": 1598.0,
-            "32": 2494.0,
-            "33": 2128.0,
-            "34": 1682.0,
-            "35": 1868.0,
-            "36": 1970.0,
-            "37": 2549.0,
-            "38": 2069.0,
-            "39": 2950.0,
-            "40": 1905.0,
-            "41": 3069.0,
-            "42": 2521.0,
-            "43": 2635.0,
-            "44": 1835.0,
-            "45": 2455.0,
-            "46": 2204.0,
-            "47": 2799.0,
-            "48": 2423.0,
-            "49": 1759.0,
-            "50": 2677.0,
-            "51": 2195.0,
-            "52": 2330.0,
-            "53": 3549.0,
-            "54": 2650.0,
-            "55": 2247.0,
-            "56": 2422.0,
-            "57": 2195.0,
-            "58": 3241.0,
-            "59": 2626.0,
-            "60": 2775.0,
-            "61": 2747.0,
-            "62": 2926.0,
-            "63": 2898.0,
-            "64": 3090.0,
-            "65": 2245.0,
-            "66": 3827.0,
-            "67": 2655.0,
-            "68": 3117.0,
-            "69": 2656.0,
-            "70": 3659.0,
-            "71": 2819.0,
-            "72": 2710.0,
-            "73": 3355.0,
-            "74": 2210.0,
-            "75": 2927.0,
-            "76": 3577.0,
-            "77": 3727.0,
-            "78": 3855.0,
-            "79": 4237.0,
-            "80": 3462.0,
-            "81": 5157.0,
-            "82": 3426.0,
-            "83": 3234.0,
-            "84": 3878.0,
-            "85": 3734.0,
-            "86": 3184.0,
-            "87": 4090.0,
-            "88": 3594.0,
-            "89": 4234.0,
-            "90": 3744.0,
-            "91": 2967.0,
-            "92": 4509.0,
-            "93": 3649.0,
-            "94": 4486.0,
-            "95": 4215.0,
-            "96": 3851.0,
-            "97": 4098.0,
-            "98": 5029.0,
-            "99": 3975.0,
-            "100": 3445.0
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_lts_dgx_a100.json
index 9f83249318a..c8abc0e775c 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_lts_dgx_a100.json
@@ -1,537 +1 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.8595,
-            "2": 10.8576,
-            "3": 10.85984,
-            "4": 10.85001,
-            "5": 10.88364,
-            "6": 10.88818,
-            "7": 10.86353,
-            "8": 10.87179,
-            "9": 10.87022,
-            "10": 10.845,
-            "11": 10.88619,
-            "12": 10.88637,
-            "13": 10.89401,
-            "14": 10.9025,
-            "15": 10.88061,
-            "16": 10.88557,
-            "17": 10.86363,
-            "18": 10.88903,
-            "19": 10.87964,
-            "20": 10.87344,
-            "21": 10.8919,
-            "22": 10.83039,
-            "23": 10.89392,
-            "24": 10.8602,
-            "25": 10.83015,
-            "26": 10.82911,
-            "27": 10.8537,
-            "28": 10.84349,
-            "29": 10.85387,
-            "30": 10.77206,
-            "31": 10.672,
-            "32": 10.78864,
-            "33": 10.77618,
-            "34": 10.67579,
-            "35": 10.67298,
-            "36": 10.63381,
-            "37": 10.69227,
-            "38": 10.60773,
-            "39": 10.70463,
-            "40": 10.5185,
-            "41": 10.5453,
-            "42": 10.56937,
-            "43": 10.32495,
-            "44": 10.3911,
-            "45": 10.28431,
-            "46": 10.2732,
-            "47": 10.48175,
-            "48": 10.25374,
-            "49": 10.01592,
-            "50": 10.27755,
-            "51": 10.21727,
-            "52": 10.1271,
-            "53": 10.36018,
-            "54": 10.25981,
-            "55": 10.20104,
-            "56": 9.98213,
-            "57": 9.84717,
-            "58": 10.12257,
-            "59": 9.90914,
-            "60": 9.83288,
-            "61": 9.9713,
-            "62": 10.22005,
-            "63": 9.67481,
-            "64": 10.01706,
-            "65": 9.27085,
-            "66": 9.93979,
-            "67": 9.62899,
-            "68": 9.98681,
-            "69": 9.9839,
-            "70": 9.92559,
-            "71": 9.81011,
-            "72": 9.79196,
-            "73": 9.68163,
-            "74": 9.17945,
-            "75": 9.61324,
-            "76": 9.28951,
-            "77": 10.19435,
-            "78": 9.8755,
-            "79": 9.5297,
-            "80": 9.56593,
-            "81": 9.63478,
-            "82": 9.82295,
-            "83": 9.47164,
-            "84": 9.54623,
-            "85": 9.74358,
-            "86": 9.20093,
-            "87": 9.70179,
-            "88": 9.86553,
-            "89": 9.73045,
-            "90": 9.92108,
-            "91": 9.48732,
-            "92": 9.47637,
-            "93": 9.21283,
-            "94": 8.94903,
-            "95": 9.6165,
-            "96": 9.63374,
-            "97": 9.41244,
-            "98": 9.7751,
-            "99": 9.00191,
-            "100": 9.50967
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 284527616.0,
-            "2": 284527616.0,
-            "3": 284527616.0,
-            "4": 284527616.0,
-            "5": 284527616.0,
-            "6": 284527616.0,
-            "7": 284527616.0,
-            "8": 284527616.0,
-            "9": 284527616.0,
-            "10": 284527616.0,
-            "11": 284527616.0,
-            "12": 284527616.0,
-            "13": 284527616.0,
-            "14": 284527616.0,
-            "15": 284527616.0,
-            "16": 416513536.0,
-            "17": 416513536.0,
-            "18": 416513536.0,
-            "19": 416513536.0,
-            "20": 416513536.0,
-            "21": 416513536.0,
-            "22": 416513536.0,
-            "23": 416513536.0,
-            "24": 416513536.0,
-            "25": 416513536.0,
-            "26": 416513536.0,
-            "27": 416513536.0,
-            "28": 416513536.0,
-            "29": 416513536.0,
-            "30": 416513536.0,
-            "31": 416513536.0,
-            "32": 416513536.0,
-            "33": 416513536.0,
-            "34": 416513536.0,
-            "35": 416513536.0,
-            "36": 416513536.0,
-            "37": 416513536.0,
-            "38": 416513536.0,
-            "39": 416513536.0,
-            "40": 416513536.0,
-            "41": 416513536.0,
-            "42": 416513536.0,
-            "43": 416513536.0,
-            "44": 416513536.0,
-            "45": 416513536.0,
-            "46": 416513536.0,
-            "47": 416513536.0,
-            "48": 416513536.0,
-            "49": 416513536.0,
-            "50": 416513536.0,
-            "51": 416513536.0,
-            "52": 416513536.0,
-            "53": 416513536.0,
-            "54": 416513536.0,
-            "55": 416513536.0,
-            "56": 416513536.0,
-            "57": 416513536.0,
-            "58": 416513536.0,
-            "59": 416513536.0,
-            "60": 416513536.0,
-            "61": 416513536.0,
-            "62": 416513536.0,
-            "63": 416513536.0,
-            "64": 416513536.0,
-            "65": 416513536.0,
-            "66": 416513536.0,
-            "67": 416513536.0,
-            "68": 416513536.0,
-            "69": 416513536.0,
-            "70": 416513536.0,
-            "71": 416513536.0,
-            "72": 416513536.0,
-            "73": 416513536.0,
-            "74": 416513536.0,
-            "75": 416513536.0,
-            "76": 416513536.0,
-            "77": 416513536.0,
-            "78": 416513536.0,
-            "79": 416513536.0,
-            "80": 416513536.0,
-            "81": 416513536.0,
-            "82": 416513536.0,
-            "83": 416513536.0,
-            "84": 416513536.0,
-            "85": 416513536.0,
-            "86": 416513536.0,
-            "87": 416513536.0,
-            "88": 416513536.0,
-            "89": 416513536.0,
-            "90": 416513536.0,
-            "91": 416513536.0,
-            "92": 416513536.0,
-            "93": 416513536.0,
-            "94": 416513536.0,
-            "95": 416513536.0,
-            "96": 416513536.0,
-            "97": 416513536.0,
-            "98": 416513536.0,
-            "99": 416513536.0,
-            "100": 416513536.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1465367040.0,
-            "2": 1465367552.0,
-            "3": 1465367552.0,
-            "4": 1465367552.0,
-            "5": 1465367552.0,
-            "6": 1465367552.0,
-            "7": 1465367552.0,
-            "8": 1465367552.0,
-            "9": 1465367552.0,
-            "10": 1465367552.0,
-            "11": 1465367552.0,
-            "12": 1465367552.0,
-            "13": 1465367552.0,
-            "14": 1465367552.0,
-            "15": 1465367552.0,
-            "16": 1465367552.0,
-            "17": 1597485568.0,
-            "18": 1597485568.0,
-            "19": 1597485568.0,
-            "20": 1597485568.0,
-            "21": 1597485568.0,
-            "22": 1597485568.0,
-            "23": 1597485568.0,
-            "24": 1597485568.0,
-            "25": 1597485568.0,
-            "26": 1597485568.0,
-            "27": 1597485568.0,
-            "28": 1597485568.0,
-            "29": 1597485568.0,
-            "30": 1597485568.0,
-            "31": 1597485568.0,
-            "32": 1597485568.0,
-            "33": 1597485568.0,
-            "34": 1597485568.0,
-            "35": 1597485568.0,
-            "36": 1597485568.0,
-            "37": 1597485568.0,
-            "38": 1597485568.0,
-            "39": 1597485568.0,
-            "40": 1597485568.0,
-            "41": 1597485568.0,
-            "42": 1597485568.0,
-            "43": 1597485568.0,
-            "44": 1597485568.0,
-            "45": 1597485568.0,
-            "46": 1597485568.0,
-            "47": 1597485568.0,
-            "48": 1597485568.0,
-            "49": 1597485568.0,
-            "50": 1597485568.0,
-            "51": 1597485568.0,
-            "52": 1597485568.0,
-            "53": 1597485568.0,
-            "54": 1597485568.0,
-            "55": 1597485568.0,
-            "56": 1597485568.0,
-            "57": 1597485568.0,
-            "58": 1597485568.0,
-            "59": 1597485568.0,
-            "60": 1597485568.0,
-            "61": 1597485568.0,
-            "62": 1597485568.0,
-            "63": 1597485568.0,
-            "64": 1597485568.0,
-            "65": 1597485568.0,
-            "66": 1597485568.0,
-            "67": 1597485568.0,
-            "68": 1597485568.0,
-            "69": 1597485568.0,
-            "70": 1597485568.0,
-            "71": 1597485568.0,
-            "72": 1597485568.0,
-            "73": 1597485568.0,
-            "74": 1597485568.0,
-            "75": 1597485568.0,
-            "76": 1597485568.0,
-            "77": 1597485568.0,
-            "78": 1597485568.0,
-            "79": 1597485568.0,
-            "80": 1597485568.0,
-            "81": 1597485568.0,
-            "82": 1597485568.0,
-            "83": 1597485568.0,
-            "84": 1597485568.0,
-            "85": 1597485568.0,
-            "86": 1597485568.0,
-            "87": 1597485568.0,
-            "88": 1597485568.0,
-            "89": 1597485568.0,
-            "90": 1597485568.0,
-            "91": 1597485568.0,
-            "92": 1597485568.0,
-            "93": 1597485568.0,
-            "94": 1597485568.0,
-            "95": 1597485568.0,
-            "96": 1597485568.0,
-            "97": 1597485568.0,
-            "98": 1597485568.0,
-            "99": 1597485568.0,
-            "100": 1597485568.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 3.81628,
-            "2": 0.32142,
-            "3": 0.27555,
-            "4": 0.28299,
-            "5": 0.28901,
-            "6": 0.28043,
-            "7": 0.29138,
-            "8": 0.30944,
-            "9": 0.28461,
-            "10": 0.28789,
-            "11": 0.28709,
-            "12": 0.29186,
-            "13": 0.29114,
-            "14": 0.29464,
-            "15": 0.31626,
-            "16": 0.48847,
-            "17": 0.28436,
-            "18": 0.30264,
-            "19": 0.29287,
-            "20": 0.30599,
-            "21": 0.29335,
-            "22": 0.27957,
-            "23": 0.29491,
-            "24": 0.29371,
-            "25": 0.29398,
-            "26": 0.29344,
-            "27": 0.29457,
-            "28": 0.29449,
-            "29": 0.29412,
-            "30": 0.29337,
-            "31": 0.29404,
-            "32": 0.29391,
-            "33": 0.29483,
-            "34": 0.29389,
-            "35": 0.29433,
-            "36": 0.29449,
-            "37": 0.29463,
-            "38": 0.29428,
-            "39": 0.29385,
-            "40": 0.29379,
-            "41": 0.29345,
-            "42": 0.29404,
-            "43": 0.29413,
-            "44": 0.29357,
-            "45": 0.29308,
-            "46": 0.29302,
-            "47": 0.29311,
-            "48": 0.29341,
-            "49": 0.2946,
-            "50": 0.29365,
-            "51": 0.29978,
-            "52": 0.31599,
-            "53": 0.29361,
-            "54": 0.29341,
-            "55": 0.29321,
-            "56": 0.29262,
-            "57": 0.29474,
-            "58": 0.29427,
-            "59": 0.29281,
-            "60": 0.29314,
-            "61": 0.29219,
-            "62": 0.29346,
-            "63": 0.29348,
-            "64": 0.30211,
-            "65": 0.29324,
-            "66": 0.29357,
-            "67": 0.29314,
-            "68": 0.29229,
-            "69": 0.30197,
-            "70": 0.29329,
-            "71": 0.30206,
-            "72": 0.29435,
-            "73": 0.29495,
-            "74": 0.2943,
-            "75": 0.29926,
-            "76": 0.29332,
-            "77": 0.29464,
-            "78": 0.29342,
-            "79": 0.29434,
-            "80": 0.29439,
-            "81": 0.29391,
-            "82": 0.29436,
-            "83": 0.29426,
-            "84": 0.29408,
-            "85": 0.29452,
-            "86": 0.29406,
-            "87": 0.29421,
-            "88": 0.29373,
-            "89": 0.29437,
-            "90": 0.29425,
-            "91": 0.29383,
-            "92": 0.2933,
-            "93": 0.29369,
-            "94": 0.2937,
-            "95": 0.29465,
-            "96": 0.29439,
-            "97": 0.29435,
-            "98": 0.2952,
-            "99": 0.29361,
-            "100": 0.2936
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": "nan",
-            "2": "nan",
-            "3": "nan",
-            "4": "nan",
-            "5": "nan",
-            "6": "nan",
-            "7": "nan",
-            "8": "nan",
-            "9": "nan",
-            "10": "nan",
-            "11": "nan",
-            "12": "nan",
-            "13": "nan",
-            "14": "nan",
-            "15": "nan",
-            "16": 2308.0,
-            "17": "nan",
-            "18": 2336.0,
-            "19": 2843.0,
-            "20": 1690.0,
-            "21": 2021.0,
-            "22": "nan",
-            "23": 2506.0,
-            "24": 2095.0,
-            "25": 2035.0,
-            "26": 1875.0,
-            "27": 2000.0,
-            "28": 2435.0,
-            "29": 2512.0,
-            "30": 2279.0,
-            "31": 1598.0,
-            "32": 2494.0,
-            "33": 2128.0,
-            "34": 1682.0,
-            "35": 1868.0,
-            "36": 1970.0,
-            "37": 2549.0,
-            "38": 2069.0,
-            "39": 2950.0,
-            "40": 1905.0,
-            "41": 3069.0,
-            "42": 2521.0,
-            "43": 2635.0,
-            "44": 1835.0,
-            "45": 2455.0,
-            "46": 2204.0,
-            "47": 2799.0,
-            "48": 2423.0,
-            "49": 1759.0,
-            "50": 2677.0,
-            "51": 2195.0,
-            "52": 2330.0,
-            "53": 3549.0,
-            "54": 2650.0,
-            "55": 2247.0,
-            "56": 2422.0,
-            "57": 2195.0,
-            "58": 3241.0,
-            "59": 2626.0,
-            "60": 2775.0,
-            "61": 2747.0,
-            "62": 2926.0,
-            "63": 2898.0,
-            "64": 3090.0,
-            "65": 2245.0,
-            "66": 3827.0,
-            "67": 2655.0,
-            "68": 3117.0,
-            "69": 2656.0,
-            "70": 3659.0,
-            "71": 2819.0,
-            "72": 2710.0,
-            "73": 3355.0,
-            "74": 2210.0,
-            "75": 2927.0,
-            "76": 3577.0,
-            "77": 3727.0,
-            "78": 3855.0,
-            "79": 4237.0,
-            "80": 3462.0,
-            "81": 5157.0,
-            "82": 3426.0,
-            "83": 3234.0,
-            "84": 3878.0,
-            "85": 3734.0,
-            "86": 3184.0,
-            "87": 4090.0,
-            "88": 3594.0,
-            "89": 4234.0,
-            "90": 3744.0,
-            "91": 2967.0,
-            "92": 4509.0,
-            "93": 3649.0,
-            "94": 4486.0,
-            "95": 4215.0,
-            "96": 3851.0,
-            "97": 4098.0,
-            "98": 5029.0,
-            "99": 3975.0,
-            "100": 3445.0
-        }
-    }
-}
\ No newline at end of file
+{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.86104, "5": 10.88371, "10": 10.84263, "15": 10.87935, "20": 10.87405, "25": 10.82867, "30": 10.77191, "35": 10.67622, "40": 10.517, "45": 10.28436, "50": 10.27861, "55": 10.20112, "60": 9.83306, "65": 9.26979, "70": 9.92662, "75": 9.61385, "80": 9.56419, "85": 9.74319, "90": 9.92148, "95": 9.6163, "100": 9.5087}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 284527616.0, "5": 284527616.0, "10": 284527616.0, "15": 284527616.0, "20": 416513536.0, "25": 416513536.0, "30": 416513536.0, "35": 416513536.0, "40": 416513536.0, "45": 416513536.0, "50": 416513536.0, "55": 416513536.0, "60": 416513536.0, "65": 416513536.0, "70": 416513536.0, "75": 416513536.0, "80": 416513536.0, "85": 416513536.0, "90": 416513536.0, "95": 416513536.0, "100": 416513536.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1465368064.0, "5": 1465368576.0, "10": 1465368576.0, "15": 1465368576.0, "20": 1596305408.0, "25": 1596305920.0, "30": 1596305920.0, "35": 1596305920.0, "40": 1596305920.0, "45": 1596305920.0, "50": 1596305920.0, "55": 1596305920.0, "60": 1596305920.0, "65": 1596305920.0, "70": 1596305920.0, "75": 1596305920.0, "80": 1596305920.0, "85": 1596305920.0, "90": 1596305920.0, "95": 1596305920.0, "100": 1596305920.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 7.32922, "5": 0.3111, "10": 0.3117, "15": 0.30486, "20": 0.3229, "25": 0.31791, "30": 0.31805, "35": 0.32249, "40": 0.32048, "45": 0.32173, "50": 0.31691, "55": 0.31145, "60": 0.31713, "65": 0.31381, "70": 0.31308, "75": 0.31799, "80": 0.31492, "85": 0.31756, "90": 0.31282, "95": 0.41456, "100": 0.30993}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": "nan", "5": "nan", "10": "nan", "15": "nan", "20": 1777.0, "25": 2124.0, "30": 2262.0, "35": 1763.0, "40": 1875.0, "45": 2434.0, "50": 2634.0, "55": 2383.0, "60": 2832.0, "65": 2223.0, "70": 3836.0, "75": 2913.0, "80": 3398.0, "85": 3618.0, "90": 3676.0, "95": 4082.0, "100": 3502.0}}}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_lts.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_lts.json
deleted file mode 100644
index a7feec6d207..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_lts.json
+++ /dev/null
@@ -1,537 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.8595,
-            "2": 10.8576,
-            "3": 10.85984,
-            "4": 10.85001,
-            "5": 10.88364,
-            "6": 10.88818,
-            "7": 10.86353,
-            "8": 10.87179,
-            "9": 10.87022,
-            "10": 10.845,
-            "11": 10.88619,
-            "12": 10.88637,
-            "13": 10.89401,
-            "14": 10.9025,
-            "15": 10.88061,
-            "16": 10.88557,
-            "17": 10.86363,
-            "18": 10.88903,
-            "19": 10.87964,
-            "20": 10.87344,
-            "21": 10.8919,
-            "22": 10.83039,
-            "23": 10.89392,
-            "24": 10.8602,
-            "25": 10.83015,
-            "26": 10.82911,
-            "27": 10.8537,
-            "28": 10.84349,
-            "29": 10.85387,
-            "30": 10.77206,
-            "31": 10.672,
-            "32": 10.78864,
-            "33": 10.77618,
-            "34": 10.67579,
-            "35": 10.67298,
-            "36": 10.63381,
-            "37": 10.69227,
-            "38": 10.60773,
-            "39": 10.70463,
-            "40": 10.5185,
-            "41": 10.5453,
-            "42": 10.56937,
-            "43": 10.32495,
-            "44": 10.3911,
-            "45": 10.28431,
-            "46": 10.2732,
-            "47": 10.48175,
-            "48": 10.25374,
-            "49": 10.01592,
-            "50": 10.27755,
-            "51": 10.21727,
-            "52": 10.1271,
-            "53": 10.36018,
-            "54": 10.25981,
-            "55": 10.20104,
-            "56": 9.98213,
-            "57": 9.84717,
-            "58": 10.12257,
-            "59": 9.90914,
-            "60": 9.83288,
-            "61": 9.9713,
-            "62": 10.22005,
-            "63": 9.67481,
-            "64": 10.01706,
-            "65": 9.27085,
-            "66": 9.93979,
-            "67": 9.62899,
-            "68": 9.98681,
-            "69": 9.9839,
-            "70": 9.92559,
-            "71": 9.81011,
-            "72": 9.79196,
-            "73": 9.68163,
-            "74": 9.17945,
-            "75": 9.61324,
-            "76": 9.28951,
-            "77": 10.19435,
-            "78": 9.8755,
-            "79": 9.5297,
-            "80": 9.56593,
-            "81": 9.63478,
-            "82": 9.82295,
-            "83": 9.47164,
-            "84": 9.54623,
-            "85": 9.74358,
-            "86": 9.20093,
-            "87": 9.70179,
-            "88": 9.86553,
-            "89": 9.73045,
-            "90": 9.92108,
-            "91": 9.48732,
-            "92": 9.47637,
-            "93": 9.21283,
-            "94": 8.94903,
-            "95": 9.6165,
-            "96": 9.63374,
-            "97": 9.41244,
-            "98": 9.7751,
-            "99": 9.00191,
-            "100": 9.50967
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 284527616.0,
-            "2": 284527616.0,
-            "3": 284527616.0,
-            "4": 284527616.0,
-            "5": 284527616.0,
-            "6": 284527616.0,
-            "7": 284527616.0,
-            "8": 284527616.0,
-            "9": 284527616.0,
-            "10": 284527616.0,
-            "11": 284527616.0,
-            "12": 284527616.0,
-            "13": 284527616.0,
-            "14": 284527616.0,
-            "15": 284527616.0,
-            "16": 416513536.0,
-            "17": 416513536.0,
-            "18": 416513536.0,
-            "19": 416513536.0,
-            "20": 416513536.0,
-            "21": 416513536.0,
-            "22": 416513536.0,
-            "23": 416513536.0,
-            "24": 416513536.0,
-            "25": 416513536.0,
-            "26": 416513536.0,
-            "27": 416513536.0,
-            "28": 416513536.0,
-            "29": 416513536.0,
-            "30": 416513536.0,
-            "31": 416513536.0,
-            "32": 416513536.0,
-            "33": 416513536.0,
-            "34": 416513536.0,
-            "35": 416513536.0,
-            "36": 416513536.0,
-            "37": 416513536.0,
-            "38": 416513536.0,
-            "39": 416513536.0,
-            "40": 416513536.0,
-            "41": 416513536.0,
-            "42": 416513536.0,
-            "43": 416513536.0,
-            "44": 416513536.0,
-            "45": 416513536.0,
-            "46": 416513536.0,
-            "47": 416513536.0,
-            "48": 416513536.0,
-            "49": 416513536.0,
-            "50": 416513536.0,
-            "51": 416513536.0,
-            "52": 416513536.0,
-            "53": 416513536.0,
-            "54": 416513536.0,
-            "55": 416513536.0,
-            "56": 416513536.0,
-            "57": 416513536.0,
-            "58": 416513536.0,
-            "59": 416513536.0,
-            "60": 416513536.0,
-            "61": 416513536.0,
-            "62": 416513536.0,
-            "63": 416513536.0,
-            "64": 416513536.0,
-            "65": 416513536.0,
-            "66": 416513536.0,
-            "67": 416513536.0,
-            "68": 416513536.0,
-            "69": 416513536.0,
-            "70": 416513536.0,
-            "71": 416513536.0,
-            "72": 416513536.0,
-            "73": 416513536.0,
-            "74": 416513536.0,
-            "75": 416513536.0,
-            "76": 416513536.0,
-            "77": 416513536.0,
-            "78": 416513536.0,
-            "79": 416513536.0,
-            "80": 416513536.0,
-            "81": 416513536.0,
-            "82": 416513536.0,
-            "83": 416513536.0,
-            "84": 416513536.0,
-            "85": 416513536.0,
-            "86": 416513536.0,
-            "87": 416513536.0,
-            "88": 416513536.0,
-            "89": 416513536.0,
-            "90": 416513536.0,
-            "91": 416513536.0,
-            "92": 416513536.0,
-            "93": 416513536.0,
-            "94": 416513536.0,
-            "95": 416513536.0,
-            "96": 416513536.0,
-            "97": 416513536.0,
-            "98": 416513536.0,
-            "99": 416513536.0,
-            "100": 416513536.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1465367040.0,
-            "2": 1465368576.0,
-            "3": 1465368576.0,
-            "4": 1465368576.0,
-            "5": 1465368576.0,
-            "6": 1465368576.0,
-            "7": 1465368576.0,
-            "8": 1465368576.0,
-            "9": 1465368576.0,
-            "10": 1465368576.0,
-            "11": 1465368576.0,
-            "12": 1465368576.0,
-            "13": 1465368576.0,
-            "14": 1465368576.0,
-            "15": 1465368576.0,
-            "16": 1465368576.0,
-            "17": 1597092352.0,
-            "18": 1597484544.0,
-            "19": 1597485568.0,
-            "20": 1597485568.0,
-            "21": 1597485568.0,
-            "22": 1597485568.0,
-            "23": 1597485568.0,
-            "24": 1597485568.0,
-            "25": 1597485568.0,
-            "26": 1597485568.0,
-            "27": 1597485568.0,
-            "28": 1597485568.0,
-            "29": 1597485568.0,
-            "30": 1597485568.0,
-            "31": 1597485568.0,
-            "32": 1597485568.0,
-            "33": 1597485568.0,
-            "34": 1597485568.0,
-            "35": 1597485568.0,
-            "36": 1597485568.0,
-            "37": 1597485568.0,
-            "38": 1597485568.0,
-            "39": 1597485568.0,
-            "40": 1597485568.0,
-            "41": 1597485568.0,
-            "42": 1597485568.0,
-            "43": 1597485568.0,
-            "44": 1597485568.0,
-            "45": 1597485568.0,
-            "46": 1597485568.0,
-            "47": 1597485568.0,
-            "48": 1597485568.0,
-            "49": 1597485568.0,
-            "50": 1597485568.0,
-            "51": 1597485568.0,
-            "52": 1597485568.0,
-            "53": 1597485568.0,
-            "54": 1597485568.0,
-            "55": 1597485568.0,
-            "56": 1597485568.0,
-            "57": 1597485568.0,
-            "58": 1597485568.0,
-            "59": 1597485568.0,
-            "60": 1597485568.0,
-            "61": 1597485568.0,
-            "62": 1597485568.0,
-            "63": 1597485568.0,
-            "64": 1597485568.0,
-            "65": 1597485568.0,
-            "66": 1597485568.0,
-            "67": 1597485568.0,
-            "68": 1597485568.0,
-            "69": 1597485568.0,
-            "70": 1597485568.0,
-            "71": 1597485568.0,
-            "72": 1597485568.0,
-            "73": 1597485568.0,
-            "74": 1597485568.0,
-            "75": 1597485568.0,
-            "76": 1597485568.0,
-            "77": 1597485568.0,
-            "78": 1597485568.0,
-            "79": 1597485568.0,
-            "80": 1597485568.0,
-            "81": 1597485568.0,
-            "82": 1597485568.0,
-            "83": 1597485568.0,
-            "84": 1597485568.0,
-            "85": 1597485568.0,
-            "86": 1597485568.0,
-            "87": 1597485568.0,
-            "88": 1597485568.0,
-            "89": 1597485568.0,
-            "90": 1597485568.0,
-            "91": 1597485568.0,
-            "92": 1597485568.0,
-            "93": 1597485568.0,
-            "94": 1597485568.0,
-            "95": 1597485568.0,
-            "96": 1597485568.0,
-            "97": 1597485568.0,
-            "98": 1597485568.0,
-            "99": 1597485568.0,
-            "100": 1597485568.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 3.79491,
-            "2": 0.31619,
-            "3": 0.28095,
-            "4": 0.28167,
-            "5": 0.28111,
-            "6": 0.28053,
-            "7": 0.27397,
-            "8": 0.27382,
-            "9": 0.2729,
-            "10": 0.28341,
-            "11": 0.27301,
-            "12": 0.27739,
-            "13": 0.27433,
-            "14": 0.27589,
-            "15": 0.27377,
-            "16": 0.45757,
-            "17": 0.28453,
-            "18": 0.29428,
-            "19": 0.29061,
-            "20": 0.28748,
-            "21": 0.28947,
-            "22": 0.27305,
-            "23": 0.28802,
-            "24": 0.29052,
-            "25": 0.28857,
-            "26": 0.28938,
-            "27": 0.28789,
-            "28": 0.29629,
-            "29": 0.29633,
-            "30": 0.29452,
-            "31": 0.29612,
-            "32": 0.29638,
-            "33": 0.29802,
-            "34": 0.29681,
-            "35": 0.29807,
-            "36": 0.29648,
-            "37": 0.29732,
-            "38": 0.29691,
-            "39": 0.29854,
-            "40": 0.29738,
-            "41": 0.29825,
-            "42": 0.29748,
-            "43": 0.29734,
-            "44": 0.29746,
-            "45": 0.29836,
-            "46": 0.29887,
-            "47": 0.29739,
-            "48": 0.29926,
-            "49": 0.29788,
-            "50": 0.29895,
-            "51": 0.29828,
-            "52": 0.29254,
-            "53": 0.29242,
-            "54": 0.29234,
-            "55": 0.29132,
-            "56": 0.29158,
-            "57": 0.29153,
-            "58": 0.29113,
-            "59": 0.29239,
-            "60": 0.29622,
-            "61": 0.29477,
-            "62": 0.29358,
-            "63": 0.29465,
-            "64": 0.29445,
-            "65": 0.29533,
-            "66": 0.29531,
-            "67": 0.29354,
-            "68": 0.29434,
-            "69": 0.29379,
-            "70": 0.29468,
-            "71": 0.29423,
-            "72": 0.29484,
-            "73": 0.29584,
-            "74": 0.2949,
-            "75": 0.29561,
-            "76": 0.29586,
-            "77": 0.29535,
-            "78": 0.29491,
-            "79": 0.29562,
-            "80": 0.29515,
-            "81": 0.2951,
-            "82": 0.29484,
-            "83": 0.2928,
-            "84": 0.29076,
-            "85": 0.29104,
-            "86": 0.29208,
-            "87": 0.29116,
-            "88": 0.29384,
-            "89": 0.29252,
-            "90": 0.29125,
-            "91": 0.29184,
-            "92": 0.29214,
-            "93": 0.29151,
-            "94": 0.29183,
-            "95": 0.29202,
-            "96": 0.29138,
-            "97": 0.29213,
-            "98": 0.2924,
-            "99": 0.29154,
-            "100": 0.29129
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": "nan",
-            "2": "nan",
-            "3": "nan",
-            "4": "nan",
-            "5": "nan",
-            "6": "nan",
-            "7": "nan",
-            "8": "nan",
-            "9": "nan",
-            "10": "nan",
-            "11": "nan",
-            "12": "nan",
-            "13": "nan",
-            "14": "nan",
-            "15": "nan",
-            "16": 2308.0,
-            "17": "nan",
-            "18": 2336.0,
-            "19": 2843.0,
-            "20": 1690.0,
-            "21": 2021.0,
-            "22": "nan",
-            "23": 2506.0,
-            "24": 2095.0,
-            "25": 2035.0,
-            "26": 1875.0,
-            "27": 2000.0,
-            "28": 2435.0,
-            "29": 2512.0,
-            "30": 2279.0,
-            "31": 1598.0,
-            "32": 2494.0,
-            "33": 2128.0,
-            "34": 1682.0,
-            "35": 1868.0,
-            "36": 1970.0,
-            "37": 2549.0,
-            "38": 2069.0,
-            "39": 2950.0,
-            "40": 1905.0,
-            "41": 3069.0,
-            "42": 2521.0,
-            "43": 2635.0,
-            "44": 1835.0,
-            "45": 2455.0,
-            "46": 2204.0,
-            "47": 2799.0,
-            "48": 2423.0,
-            "49": 1759.0,
-            "50": 2677.0,
-            "51": 2195.0,
-            "52": 2330.0,
-            "53": 3549.0,
-            "54": 2650.0,
-            "55": 2247.0,
-            "56": 2422.0,
-            "57": 2195.0,
-            "58": 3241.0,
-            "59": 2626.0,
-            "60": 2775.0,
-            "61": 2747.0,
-            "62": 2926.0,
-            "63": 2898.0,
-            "64": 3090.0,
-            "65": 2245.0,
-            "66": 3827.0,
-            "67": 2655.0,
-            "68": 3117.0,
-            "69": 2656.0,
-            "70": 3659.0,
-            "71": 2819.0,
-            "72": 2710.0,
-            "73": 3355.0,
-            "74": 2210.0,
-            "75": 2927.0,
-            "76": 3577.0,
-            "77": 3727.0,
-            "78": 3855.0,
-            "79": 4237.0,
-            "80": 3462.0,
-            "81": 5157.0,
-            "82": 3426.0,
-            "83": 3234.0,
-            "84": 3878.0,
-            "85": 3734.0,
-            "86": 3184.0,
-            "87": 4090.0,
-            "88": 3594.0,
-            "89": 4234.0,
-            "90": 3744.0,
-            "91": 2967.0,
-            "92": 4509.0,
-            "93": 3649.0,
-            "94": 4486.0,
-            "95": 4215.0,
-            "96": 3851.0,
-            "97": 4098.0,
-            "98": 5029.0,
-            "99": 3975.0,
-            "100": 3445.0
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_lts_dgx_a100.json
index 0eef09cf2c1..f9728255608 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_lts_dgx_a100.json
@@ -1,537 +1 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.8595,
-            "2": 10.8576,
-            "3": 10.85984,
-            "4": 10.85001,
-            "5": 10.88364,
-            "6": 10.88818,
-            "7": 10.86353,
-            "8": 10.87179,
-            "9": 10.87022,
-            "10": 10.845,
-            "11": 10.88619,
-            "12": 10.88637,
-            "13": 10.89401,
-            "14": 10.9025,
-            "15": 10.88061,
-            "16": 10.88557,
-            "17": 10.86363,
-            "18": 10.88903,
-            "19": 10.87964,
-            "20": 10.87344,
-            "21": 10.8919,
-            "22": 10.83039,
-            "23": 10.89392,
-            "24": 10.8602,
-            "25": 10.83015,
-            "26": 10.82911,
-            "27": 10.8537,
-            "28": 10.84349,
-            "29": 10.85387,
-            "30": 10.77206,
-            "31": 10.672,
-            "32": 10.78864,
-            "33": 10.77618,
-            "34": 10.67579,
-            "35": 10.67298,
-            "36": 10.63381,
-            "37": 10.69227,
-            "38": 10.60773,
-            "39": 10.70463,
-            "40": 10.5185,
-            "41": 10.5453,
-            "42": 10.56937,
-            "43": 10.32495,
-            "44": 10.3911,
-            "45": 10.28431,
-            "46": 10.2732,
-            "47": 10.48175,
-            "48": 10.25374,
-            "49": 10.01592,
-            "50": 10.27755,
-            "51": 10.21727,
-            "52": 10.1271,
-            "53": 10.36018,
-            "54": 10.25981,
-            "55": 10.20104,
-            "56": 9.98213,
-            "57": 9.84717,
-            "58": 10.12257,
-            "59": 9.90914,
-            "60": 9.83288,
-            "61": 9.9713,
-            "62": 10.22005,
-            "63": 9.67481,
-            "64": 10.01706,
-            "65": 9.27085,
-            "66": 9.93979,
-            "67": 9.62899,
-            "68": 9.98681,
-            "69": 9.9839,
-            "70": 9.92559,
-            "71": 9.81011,
-            "72": 9.79196,
-            "73": 9.68163,
-            "74": 9.17945,
-            "75": 9.61324,
-            "76": 9.28951,
-            "77": 10.19435,
-            "78": 9.8755,
-            "79": 9.5297,
-            "80": 9.56593,
-            "81": 9.63478,
-            "82": 9.82295,
-            "83": 9.47164,
-            "84": 9.54623,
-            "85": 9.74358,
-            "86": 9.20093,
-            "87": 9.70179,
-            "88": 9.86553,
-            "89": 9.73045,
-            "90": 9.92108,
-            "91": 9.48732,
-            "92": 9.47637,
-            "93": 9.21283,
-            "94": 8.94903,
-            "95": 9.6165,
-            "96": 9.63374,
-            "97": 9.41244,
-            "98": 9.7751,
-            "99": 9.00191,
-            "100": 9.50967
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 284527616.0,
-            "2": 284527616.0,
-            "3": 284527616.0,
-            "4": 284527616.0,
-            "5": 284527616.0,
-            "6": 284527616.0,
-            "7": 284527616.0,
-            "8": 284527616.0,
-            "9": 284527616.0,
-            "10": 284527616.0,
-            "11": 284527616.0,
-            "12": 284527616.0,
-            "13": 284527616.0,
-            "14": 284527616.0,
-            "15": 284527616.0,
-            "16": 416513536.0,
-            "17": 416513536.0,
-            "18": 416513536.0,
-            "19": 416513536.0,
-            "20": 416513536.0,
-            "21": 416513536.0,
-            "22": 416513536.0,
-            "23": 416513536.0,
-            "24": 416513536.0,
-            "25": 416513536.0,
-            "26": 416513536.0,
-            "27": 416513536.0,
-            "28": 416513536.0,
-            "29": 416513536.0,
-            "30": 416513536.0,
-            "31": 416513536.0,
-            "32": 416513536.0,
-            "33": 416513536.0,
-            "34": 416513536.0,
-            "35": 416513536.0,
-            "36": 416513536.0,
-            "37": 416513536.0,
-            "38": 416513536.0,
-            "39": 416513536.0,
-            "40": 416513536.0,
-            "41": 416513536.0,
-            "42": 416513536.0,
-            "43": 416513536.0,
-            "44": 416513536.0,
-            "45": 416513536.0,
-            "46": 416513536.0,
-            "47": 416513536.0,
-            "48": 416513536.0,
-            "49": 416513536.0,
-            "50": 416513536.0,
-            "51": 416513536.0,
-            "52": 416513536.0,
-            "53": 416513536.0,
-            "54": 416513536.0,
-            "55": 416513536.0,
-            "56": 416513536.0,
-            "57": 416513536.0,
-            "58": 416513536.0,
-            "59": 416513536.0,
-            "60": 416513536.0,
-            "61": 416513536.0,
-            "62": 416513536.0,
-            "63": 416513536.0,
-            "64": 416513536.0,
-            "65": 416513536.0,
-            "66": 416513536.0,
-            "67": 416513536.0,
-            "68": 416513536.0,
-            "69": 416513536.0,
-            "70": 416513536.0,
-            "71": 416513536.0,
-            "72": 416513536.0,
-            "73": 416513536.0,
-            "74": 416513536.0,
-            "75": 416513536.0,
-            "76": 416513536.0,
-            "77": 416513536.0,
-            "78": 416513536.0,
-            "79": 416513536.0,
-            "80": 416513536.0,
-            "81": 416513536.0,
-            "82": 416513536.0,
-            "83": 416513536.0,
-            "84": 416513536.0,
-            "85": 416513536.0,
-            "86": 416513536.0,
-            "87": 416513536.0,
-            "88": 416513536.0,
-            "89": 416513536.0,
-            "90": 416513536.0,
-            "91": 416513536.0,
-            "92": 416513536.0,
-            "93": 416513536.0,
-            "94": 416513536.0,
-            "95": 416513536.0,
-            "96": 416513536.0,
-            "97": 416513536.0,
-            "98": 416513536.0,
-            "99": 416513536.0,
-            "100": 416513536.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1465367040.0,
-            "2": 1465367040.0,
-            "3": 1465368064.0,
-            "4": 1465368576.0,
-            "5": 1465368576.0,
-            "6": 1465368576.0,
-            "7": 1465368576.0,
-            "8": 1465368576.0,
-            "9": 1465368576.0,
-            "10": 1465368576.0,
-            "11": 1465368576.0,
-            "12": 1465368576.0,
-            "13": 1465368576.0,
-            "14": 1465368576.0,
-            "15": 1465368576.0,
-            "16": 1465368576.0,
-            "17": 1597485568.0,
-            "18": 1597485568.0,
-            "19": 1597485568.0,
-            "20": 1597485568.0,
-            "21": 1597485568.0,
-            "22": 1597485568.0,
-            "23": 1597485568.0,
-            "24": 1597485568.0,
-            "25": 1597485568.0,
-            "26": 1597485568.0,
-            "27": 1597485568.0,
-            "28": 1597485568.0,
-            "29": 1597485568.0,
-            "30": 1597485568.0,
-            "31": 1597485568.0,
-            "32": 1597485568.0,
-            "33": 1597485568.0,
-            "34": 1597485568.0,
-            "35": 1597485568.0,
-            "36": 1597485568.0,
-            "37": 1597485568.0,
-            "38": 1597485568.0,
-            "39": 1597485568.0,
-            "40": 1597485568.0,
-            "41": 1597485568.0,
-            "42": 1597485568.0,
-            "43": 1597485568.0,
-            "44": 1597485568.0,
-            "45": 1597485568.0,
-            "46": 1597485568.0,
-            "47": 1597485568.0,
-            "48": 1597485568.0,
-            "49": 1597485568.0,
-            "50": 1597485568.0,
-            "51": 1597485568.0,
-            "52": 1597485568.0,
-            "53": 1597485568.0,
-            "54": 1597485568.0,
-            "55": 1597485568.0,
-            "56": 1597485568.0,
-            "57": 1597485568.0,
-            "58": 1597485568.0,
-            "59": 1597485568.0,
-            "60": 1597485568.0,
-            "61": 1597485568.0,
-            "62": 1597485568.0,
-            "63": 1597485568.0,
-            "64": 1597485568.0,
-            "65": 1597485568.0,
-            "66": 1597485568.0,
-            "67": 1597485568.0,
-            "68": 1597485568.0,
-            "69": 1597485568.0,
-            "70": 1597485568.0,
-            "71": 1597485568.0,
-            "72": 1597485568.0,
-            "73": 1597485568.0,
-            "74": 1597485568.0,
-            "75": 1597485568.0,
-            "76": 1597485568.0,
-            "77": 1597485568.0,
-            "78": 1597485568.0,
-            "79": 1597485568.0,
-            "80": 1597485568.0,
-            "81": 1597485568.0,
-            "82": 1597485568.0,
-            "83": 1597485568.0,
-            "84": 1597485568.0,
-            "85": 1597485568.0,
-            "86": 1597485568.0,
-            "87": 1597485568.0,
-            "88": 1597485568.0,
-            "89": 1597485568.0,
-            "90": 1597485568.0,
-            "91": 1597485568.0,
-            "92": 1597485568.0,
-            "93": 1597485568.0,
-            "94": 1597485568.0,
-            "95": 1597485568.0,
-            "96": 1597485568.0,
-            "97": 1597485568.0,
-            "98": 1597485568.0,
-            "99": 1597485568.0,
-            "100": 1597485568.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 8.02782,
-            "2": 0.31435,
-            "3": 0.27957,
-            "4": 0.27933,
-            "5": 0.27866,
-            "6": 0.27855,
-            "7": 0.2779,
-            "8": 0.27621,
-            "9": 0.27704,
-            "10": 0.27611,
-            "11": 0.27501,
-            "12": 0.27489,
-            "13": 0.27468,
-            "14": 0.27386,
-            "15": 0.27315,
-            "16": 0.41595,
-            "17": 0.27523,
-            "18": 0.28979,
-            "19": 0.28871,
-            "20": 0.2888,
-            "21": 0.28867,
-            "22": 0.27653,
-            "23": 0.29205,
-            "24": 0.29078,
-            "25": 0.29104,
-            "26": 0.29087,
-            "27": 0.28794,
-            "28": 0.28784,
-            "29": 0.28659,
-            "30": 0.28669,
-            "31": 0.28638,
-            "32": 0.2878,
-            "33": 0.28717,
-            "34": 0.28616,
-            "35": 0.28626,
-            "36": 0.28648,
-            "37": 0.28977,
-            "38": 0.28615,
-            "39": 0.2864,
-            "40": 0.28588,
-            "41": 0.28749,
-            "42": 0.28735,
-            "43": 0.28605,
-            "44": 0.28798,
-            "45": 0.2882,
-            "46": 0.28727,
-            "47": 0.28616,
-            "48": 0.28603,
-            "49": 0.2876,
-            "50": 0.29155,
-            "51": 0.30309,
-            "52": 0.29889,
-            "53": 0.29736,
-            "54": 0.29772,
-            "55": 0.29611,
-            "56": 0.29565,
-            "57": 0.29413,
-            "58": 0.29391,
-            "59": 0.29344,
-            "60": 0.29428,
-            "61": 0.29695,
-            "62": 0.29282,
-            "63": 0.29418,
-            "64": 0.29352,
-            "65": 0.29274,
-            "66": 0.29449,
-            "67": 0.29627,
-            "68": 0.29636,
-            "69": 0.29393,
-            "70": 0.28967,
-            "71": 0.28925,
-            "72": 0.28962,
-            "73": 0.28944,
-            "74": 0.28948,
-            "75": 0.28996,
-            "76": 0.28938,
-            "77": 0.28855,
-            "78": 0.28891,
-            "79": 0.28905,
-            "80": 0.28968,
-            "81": 0.28873,
-            "82": 0.28966,
-            "83": 0.2884,
-            "84": 0.28842,
-            "85": 0.29077,
-            "86": 0.28927,
-            "87": 0.28888,
-            "88": 0.28909,
-            "89": 0.28807,
-            "90": 0.28887,
-            "91": 0.28894,
-            "92": 0.28908,
-            "93": 0.28985,
-            "94": 0.289,
-            "95": 0.28861,
-            "96": 0.28831,
-            "97": 0.2877,
-            "98": 0.29019,
-            "99": 0.28839,
-            "100": 0.2881
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": "nan",
-            "2": "nan",
-            "3": "nan",
-            "4": "nan",
-            "5": "nan",
-            "6": "nan",
-            "7": "nan",
-            "8": "nan",
-            "9": "nan",
-            "10": "nan",
-            "11": "nan",
-            "12": "nan",
-            "13": "nan",
-            "14": "nan",
-            "15": "nan",
-            "16": 2308.0,
-            "17": "nan",
-            "18": 2336.0,
-            "19": 2843.0,
-            "20": 1690.0,
-            "21": 2021.0,
-            "22": "nan",
-            "23": 2506.0,
-            "24": 2095.0,
-            "25": 2035.0,
-            "26": 1875.0,
-            "27": 2000.0,
-            "28": 2435.0,
-            "29": 2512.0,
-            "30": 2279.0,
-            "31": 1598.0,
-            "32": 2494.0,
-            "33": 2128.0,
-            "34": 1682.0,
-            "35": 1868.0,
-            "36": 1970.0,
-            "37": 2549.0,
-            "38": 2069.0,
-            "39": 2950.0,
-            "40": 1905.0,
-            "41": 3069.0,
-            "42": 2521.0,
-            "43": 2635.0,
-            "44": 1835.0,
-            "45": 2455.0,
-            "46": 2204.0,
-            "47": 2799.0,
-            "48": 2423.0,
-            "49": 1759.0,
-            "50": 2677.0,
-            "51": 2195.0,
-            "52": 2330.0,
-            "53": 3549.0,
-            "54": 2650.0,
-            "55": 2247.0,
-            "56": 2422.0,
-            "57": 2195.0,
-            "58": 3241.0,
-            "59": 2626.0,
-            "60": 2775.0,
-            "61": 2747.0,
-            "62": 2926.0,
-            "63": 2898.0,
-            "64": 3090.0,
-            "65": 2245.0,
-            "66": 3827.0,
-            "67": 2655.0,
-            "68": 3117.0,
-            "69": 2656.0,
-            "70": 3659.0,
-            "71": 2819.0,
-            "72": 2710.0,
-            "73": 3355.0,
-            "74": 2210.0,
-            "75": 2927.0,
-            "76": 3577.0,
-            "77": 3727.0,
-            "78": 3855.0,
-            "79": 4237.0,
-            "80": 3462.0,
-            "81": 5157.0,
-            "82": 3426.0,
-            "83": 3234.0,
-            "84": 3878.0,
-            "85": 3734.0,
-            "86": 3184.0,
-            "87": 4090.0,
-            "88": 3594.0,
-            "89": 4234.0,
-            "90": 3744.0,
-            "91": 2967.0,
-            "92": 4509.0,
-            "93": 3649.0,
-            "94": 4486.0,
-            "95": 4215.0,
-            "96": 3851.0,
-            "97": 4098.0,
-            "98": 5029.0,
-            "99": 3975.0,
-            "100": 3445.0
-        }
-    }
-}
\ No newline at end of file
+{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.86104, "5": 10.88371, "10": 10.84263, "15": 10.87935, "20": 10.87405, "25": 10.82867, "30": 10.77191, "35": 10.67622, "40": 10.517, "45": 10.28436, "50": 10.27861, "55": 10.20112, "60": 9.83306, "65": 9.26979, "70": 9.92662, "75": 9.61385, "80": 9.56419, "85": 9.74319, "90": 9.92148, "95": 9.6163, "100": 9.5087}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 284527616.0, "5": 284527616.0, "10": 284527616.0, "15": 284527616.0, "20": 416513536.0, "25": 416513536.0, "30": 416513536.0, "35": 416513536.0, "40": 416513536.0, "45": 416513536.0, "50": 416513536.0, "55": 416513536.0, "60": 416513536.0, "65": 416513536.0, "70": 416513536.0, "75": 416513536.0, "80": 416513536.0, "85": 416513536.0, "90": 416513536.0, "95": 416513536.0, "100": 416513536.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1465368064.0, "5": 1465368576.0, "10": 1465368576.0, "15": 1465368576.0, "20": 1596305920.0, "25": 1596305920.0, "30": 1596305920.0, "35": 1596305920.0, "40": 1596305920.0, "45": 1596305920.0, "50": 1596305920.0, "55": 1596305920.0, "60": 1596305920.0, "65": 1596305920.0, "70": 1596305920.0, "75": 1596305920.0, "80": 1596305920.0, "85": 1596305920.0, "90": 1596305920.0, "95": 1596305920.0, "100": 1596305920.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 8.79594, "5": 0.30836, "10": 0.30237, "15": 0.30482, "20": 0.31834, "25": 0.31761, "30": 0.31732, "35": 0.31511, "40": 0.31755, "45": 0.31516, "50": 0.31689, "55": 0.31106, "60": 0.31323, "65": 0.31051, "70": 0.31046, "75": 0.31201, "80": 0.30466, "85": 0.3063, "90": 0.30526, "95": 0.30371, "100": 0.30515}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": "nan", "5": "nan", "10": "nan", "15": "nan", "20": 1777.0, "25": 2124.0, "30": 2262.0, "35": 1763.0, "40": 1875.0, "45": 2434.0, "50": 2634.0, "55": 2383.0, "60": 2832.0, "65": 2223.0, "70": 3836.0, "75": 2913.0, "80": 3398.0, "85": 3618.0, "90": 3676.0, "95": 4082.0, "100": 3502.0}}}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch/golden_values_dev_dgx_h100.json
index 12a9b70df83..024ac59e078 100644
--- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch/golden_values_dev_dgx_h100.json
@@ -174,5 +174,5 @@
             -0.7878209352493286
         ]
     },
-    "throughput": [104.98559493782837, 104.98559493782837]
+    "throughput": 104.98559493782837
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_logitsmatch_decode_graphs_only/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_logitsmatch_decode_graphs_only/golden_values_dev_dgx_h100.json
index 8e07dfee229..18a5978a127 100644
--- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_logitsmatch_decode_graphs_only/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_logitsmatch_decode_graphs_only/golden_values_dev_dgx_h100.json
@@ -174,5 +174,5 @@
             -0.7878209352493286
         ]
     },
-    "throughput": [79.88988160240554, 79.88988160240554]
+    "throughput": 79.88988160240554
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_logitsmatch/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_logitsmatch/golden_values_dev_dgx_h100.json
index dd8b08e446f..3ba402cbcae 100644
--- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_logitsmatch/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_logitsmatch/golden_values_dev_dgx_h100.json
@@ -2695,5 +2695,5 @@
             -0.00032085992279462516
         ]
     },
-    "throughput": [107.66332959870442, 107.66332959870442]
+    "throughput": 107.66332959870442
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_583m_logitsmatch/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_583m_logitsmatch/golden_values_dev_dgx_h100.json
index 6ef98105cbd..fe938d51e4f 100644
--- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_583m_logitsmatch/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_583m_logitsmatch/golden_values_dev_dgx_h100.json
@@ -157,5 +157,5 @@
             -0.0585334412753582
         ]
     },
-    "throughput": [13.93210545115292, 13.93210545115292]
+    "throughput": 13.93210545115292
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_lts.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_lts.json
deleted file mode 100644
index d547773495d..00000000000
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_lts.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.81189,
-            "2": 10.82837,
-            "3": 10.81793,
-            "4": 10.80617,
-            "5": 10.84927,
-            "6": 10.86922,
-            "7": 10.84362,
-            "8": 10.83419,
-            "9": 10.84988,
-            "10": 10.78408,
-            "11": 10.88038,
-            "12": 10.85362,
-            "13": 10.86619,
-            "14": 10.88223,
-            "15": 10.82311,
-            "16": 10.81052,
-            "17": 10.78202,
-            "18": 10.81541,
-            "19": 10.81756,
-            "20": 10.74584,
-            "21": 10.72627,
-            "22": 10.60022,
-            "23": 10.74769,
-            "24": 10.6365,
-            "25": 10.59453,
-            "26": 10.64799,
-            "27": 10.66378,
-            "28": 10.64562,
-            "29": 10.65242,
-            "30": 10.44192,
-            "31": 10.21776,
-            "32": 10.52786,
-            "33": 10.52222,
-            "34": 10.30567,
-            "35": 10.35142,
-            "36": 10.32502,
-            "37": 10.42585,
-            "38": 10.28522,
-            "39": 10.47826,
-            "40": 10.19414,
-            "41": 10.22953,
-            "42": 10.29398,
-            "43": 9.95703,
-            "44": 10.06609,
-            "45": 9.95939,
-            "46": 9.92711,
-            "47": 10.22774,
-            "48": 9.96376,
-            "49": 9.65724,
-            "50": 10.01644
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 31381.0,
-            "2": 33910.0,
-            "3": 33570.0,
-            "4": 31384.0,
-            "5": 35903.0,
-            "6": 37599.0,
-            "7": 35525.0,
-            "8": 31635.0,
-            "9": 34509.0,
-            "10": 30410.0,
-            "11": 37757.0,
-            "12": 35967.0,
-            "13": 36886.0,
-            "14": 38227.0,
-            "15": 35788.0,
-            "16": 36247.0,
-            "17": 35378.0,
-            "18": 35284.0,
-            "19": 36213.0,
-            "20": 33124.0,
-            "21": 33424.0,
-            "22": 31395.0,
-            "23": 38414.0,
-            "24": 32277.0,
-            "25": 31634.0,
-            "26": 35264.0,
-            "27": 35935.0,
-            "28": 37638.0,
-            "29": 38145.0,
-            "30": 33261.0,
-            "31": 30972.0,
-            "32": 36646.0,
-            "33": 38318.0,
-            "34": 33243.0,
-            "35": 34813.0,
-            "36": 35446.0,
-            "37": 38278.0,
-            "38": 35590.0,
-            "39": 38681.0,
-            "40": 36027.0,
-            "41": 36509.0,
-            "42": 37301.0,
-            "43": 34597.0,
-            "44": 34024.0,
-            "45": 35938.0,
-            "46": 37229.0,
-            "47": 40862.0,
-            "48": 35537.0,
-            "49": 35095.0,
-            "50": 39711.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 880441344.0,
-            "2": 880437760.0,
-            "3": 880442368.0,
-            "4": 880448512.0,
-            "5": 880440832.0,
-            "6": 880441344.0,
-            "7": 880445440.0,
-            "8": 880442880.0,
-            "9": 880440320.0,
-            "10": 880443392.0,
-            "11": 880442368.0,
-            "12": 880439808.0,
-            "13": 880439808.0,
-            "14": 880443904.0,
-            "15": 880443904.0,
-            "16": 880441856.0,
-            "17": 880448512.0,
-            "18": 880437760.0,
-            "19": 880440320.0,
-            "20": 880442880.0,
-            "21": 880445440.0,
-            "22": 880450048.0,
-            "23": 880449024.0,
-            "24": 880440320.0,
-            "25": 880440320.0,
-            "26": 880445952.0,
-            "27": 880441344.0,
-            "28": 880439808.0,
-            "29": 880443392.0,
-            "30": 880441344.0,
-            "31": 880454656.0,
-            "32": 880444928.0,
-            "33": 880440320.0,
-            "34": 880440832.0,
-            "35": 880446976.0,
-            "36": 880441344.0,
-            "37": 880442880.0,
-            "38": 880439808.0,
-            "39": 880441856.0,
-            "40": 880442880.0,
-            "41": 880445952.0,
-            "42": 880441344.0,
-            "43": 880444416.0,
-            "44": 880439808.0,
-            "45": 880440832.0,
-            "46": 880447488.0,
-            "47": 880440320.0,
-            "48": 880441856.0,
-            "49": 880444416.0,
-            "50": 880442368.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 2909851648.0,
-            "2": 3151821824.0,
-            "3": 3152806912.0,
-            "4": 3156619264.0,
-            "5": 3156619264.0,
-            "6": 3156619264.0,
-            "7": 3156619264.0,
-            "8": 3156619264.0,
-            "9": 3156619264.0,
-            "10": 3156619264.0,
-            "11": 3156619264.0,
-            "12": 3156619264.0,
-            "13": 3156619264.0,
-            "14": 3156619264.0,
-            "15": 3156619264.0,
-            "16": 3156619264.0,
-            "17": 3156619264.0,
-            "18": 3156619264.0,
-            "19": 3156619264.0,
-            "20": 3156619264.0,
-            "21": 3156619264.0,
-            "22": 3156619264.0,
-            "23": 3156619264.0,
-            "24": 3156619264.0,
-            "25": 3156619264.0,
-            "26": 3156619264.0,
-            "27": 3156619264.0,
-            "28": 3156619264.0,
-            "29": 3156619264.0,
-            "30": 3156619264.0,
-            "31": 3167942656.0,
-            "32": 3167942656.0,
-            "33": 3167942656.0,
-            "34": 3167942656.0,
-            "35": 3167942656.0,
-            "36": 3167942656.0,
-            "37": 3167942656.0,
-            "38": 3167942656.0,
-            "39": 3167942656.0,
-            "40": 3167942656.0,
-            "41": 3167942656.0,
-            "42": 3167942656.0,
-            "43": 3167942656.0,
-            "44": 3167942656.0,
-            "45": 3167942656.0,
-            "46": 3167942656.0,
-            "47": 3167942656.0,
-            "48": 3167942656.0,
-            "49": 3167942656.0,
-            "50": 3167942656.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 6.71086,
-            "2": 0.34876,
-            "3": 0.30494,
-            "4": 0.2983,
-            "5": 0.31094,
-            "6": 0.29581,
-            "7": 0.29535,
-            "8": 0.29392,
-            "9": 0.31226,
-            "10": 0.29284,
-            "11": 0.29297,
-            "12": 0.29417,
-            "13": 0.29411,
-            "14": 0.29617,
-            "15": 0.30137,
-            "16": 0.29884,
-            "17": 0.29723,
-            "18": 0.2965,
-            "19": 0.29582,
-            "20": 0.29827,
-            "21": 0.29422,
-            "22": 0.29464,
-            "23": 0.29372,
-            "24": 0.29476,
-            "25": 0.29448,
-            "26": 0.29512,
-            "27": 0.29393,
-            "28": 0.2949,
-            "29": 0.29363,
-            "30": 0.29407,
-            "31": 0.29478,
-            "32": 0.2959,
-            "33": 0.29578,
-            "34": 0.29539,
-            "35": 0.295,
-            "36": 0.29765,
-            "37": 0.29552,
-            "38": 0.2962,
-            "39": 0.29593,
-            "40": 0.29517,
-            "41": 0.29558,
-            "42": 0.2974,
-            "43": 0.29483,
-            "44": 0.29357,
-            "45": 0.29143,
-            "46": 0.29287,
-            "47": 0.28977,
-            "48": 0.30711,
-            "49": 0.29644,
-            "50": 0.29278
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_lts_dgx_a100.json
index f91ad30ed3a..63ffa2aa706 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_lts_dgx_a100.json
@@ -1,287 +1 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.81189,
-            "2": 10.82837,
-            "3": 10.81793,
-            "4": 10.80617,
-            "5": 10.84927,
-            "6": 10.86922,
-            "7": 10.84362,
-            "8": 10.83419,
-            "9": 10.84988,
-            "10": 10.78408,
-            "11": 10.88038,
-            "12": 10.85362,
-            "13": 10.86619,
-            "14": 10.88223,
-            "15": 10.82311,
-            "16": 10.81052,
-            "17": 10.78202,
-            "18": 10.81541,
-            "19": 10.81756,
-            "20": 10.74584,
-            "21": 10.72627,
-            "22": 10.60022,
-            "23": 10.74769,
-            "24": 10.6365,
-            "25": 10.59453,
-            "26": 10.64799,
-            "27": 10.66378,
-            "28": 10.64562,
-            "29": 10.65242,
-            "30": 10.44192,
-            "31": 10.21776,
-            "32": 10.52786,
-            "33": 10.52222,
-            "34": 10.30567,
-            "35": 10.35142,
-            "36": 10.32502,
-            "37": 10.42585,
-            "38": 10.28522,
-            "39": 10.47826,
-            "40": 10.19414,
-            "41": 10.22953,
-            "42": 10.29398,
-            "43": 9.95703,
-            "44": 10.06609,
-            "45": 9.95939,
-            "46": 9.92711,
-            "47": 10.22774,
-            "48": 9.96376,
-            "49": 9.65724,
-            "50": 10.01644
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 31381.0,
-            "2": 33910.0,
-            "3": 33570.0,
-            "4": 31384.0,
-            "5": 35903.0,
-            "6": 37599.0,
-            "7": 35525.0,
-            "8": 31635.0,
-            "9": 34509.0,
-            "10": 30410.0,
-            "11": 37757.0,
-            "12": 35967.0,
-            "13": 36886.0,
-            "14": 38227.0,
-            "15": 35788.0,
-            "16": 36247.0,
-            "17": 35378.0,
-            "18": 35284.0,
-            "19": 36213.0,
-            "20": 33124.0,
-            "21": 33424.0,
-            "22": 31395.0,
-            "23": 38414.0,
-            "24": 32277.0,
-            "25": 31634.0,
-            "26": 35264.0,
-            "27": 35935.0,
-            "28": 37638.0,
-            "29": 38145.0,
-            "30": 33261.0,
-            "31": 30972.0,
-            "32": 36646.0,
-            "33": 38318.0,
-            "34": 33243.0,
-            "35": 34813.0,
-            "36": 35446.0,
-            "37": 38278.0,
-            "38": 35590.0,
-            "39": 38681.0,
-            "40": 36027.0,
-            "41": 36509.0,
-            "42": 37301.0,
-            "43": 34597.0,
-            "44": 34024.0,
-            "45": 35938.0,
-            "46": 37229.0,
-            "47": 40862.0,
-            "48": 35537.0,
-            "49": 35095.0,
-            "50": 39711.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 880441344.0,
-            "2": 880437760.0,
-            "3": 880442368.0,
-            "4": 880448512.0,
-            "5": 880440832.0,
-            "6": 880441344.0,
-            "7": 880445440.0,
-            "8": 880442880.0,
-            "9": 880440320.0,
-            "10": 880443392.0,
-            "11": 880442368.0,
-            "12": 880439808.0,
-            "13": 880439808.0,
-            "14": 880443904.0,
-            "15": 880443904.0,
-            "16": 880441856.0,
-            "17": 880448512.0,
-            "18": 880437760.0,
-            "19": 880440320.0,
-            "20": 880442880.0,
-            "21": 880445440.0,
-            "22": 880450048.0,
-            "23": 880449024.0,
-            "24": 880440320.0,
-            "25": 880440320.0,
-            "26": 880445952.0,
-            "27": 880441344.0,
-            "28": 880439808.0,
-            "29": 880443392.0,
-            "30": 880441344.0,
-            "31": 880454656.0,
-            "32": 880444928.0,
-            "33": 880440320.0,
-            "34": 880440832.0,
-            "35": 880446976.0,
-            "36": 880441344.0,
-            "37": 880442880.0,
-            "38": 880439808.0,
-            "39": 880441856.0,
-            "40": 880442880.0,
-            "41": 880445952.0,
-            "42": 880441344.0,
-            "43": 880444416.0,
-            "44": 880439808.0,
-            "45": 880440832.0,
-            "46": 880447488.0,
-            "47": 880440320.0,
-            "48": 880441856.0,
-            "49": 880444416.0,
-            "50": 880442368.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 2910859264.0,
-            "2": 3151821824.0,
-            "3": 3152806912.0,
-            "4": 3156619264.0,
-            "5": 3156619264.0,
-            "6": 3156619264.0,
-            "7": 3156619264.0,
-            "8": 3156619264.0,
-            "9": 3156619264.0,
-            "10": 3156619264.0,
-            "11": 3156619264.0,
-            "12": 3156619264.0,
-            "13": 3156619264.0,
-            "14": 3156619264.0,
-            "15": 3156619264.0,
-            "16": 3156619264.0,
-            "17": 3156619264.0,
-            "18": 3156619264.0,
-            "19": 3156619264.0,
-            "20": 3156619264.0,
-            "21": 3156619264.0,
-            "22": 3156619264.0,
-            "23": 3156619264.0,
-            "24": 3156619264.0,
-            "25": 3156619264.0,
-            "26": 3156619264.0,
-            "27": 3156619264.0,
-            "28": 3156619264.0,
-            "29": 3156619264.0,
-            "30": 3156619264.0,
-            "31": 3167942656.0,
-            "32": 3167942656.0,
-            "33": 3167942656.0,
-            "34": 3167942656.0,
-            "35": 3167942656.0,
-            "36": 3167942656.0,
-            "37": 3167942656.0,
-            "38": 3167942656.0,
-            "39": 3167942656.0,
-            "40": 3167942656.0,
-            "41": 3167942656.0,
-            "42": 3167942656.0,
-            "43": 3167942656.0,
-            "44": 3167942656.0,
-            "45": 3167942656.0,
-            "46": 3167942656.0,
-            "47": 3167942656.0,
-            "48": 3167942656.0,
-            "49": 3167942656.0,
-            "50": 3167942656.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 6.90142,
-            "2": 0.35609,
-            "3": 0.29589,
-            "4": 0.29327,
-            "5": 0.29594,
-            "6": 0.293,
-            "7": 0.29087,
-            "8": 0.29178,
-            "9": 0.29184,
-            "10": 0.29303,
-            "11": 0.29381,
-            "12": 0.29249,
-            "13": 0.2936,
-            "14": 0.29671,
-            "15": 0.29969,
-            "16": 0.30214,
-            "17": 0.29463,
-            "18": 0.30986,
-            "19": 0.29429,
-            "20": 0.29497,
-            "21": 0.29609,
-            "22": 0.29421,
-            "23": 0.2931,
-            "24": 0.29341,
-            "25": 0.29443,
-            "26": 0.28879,
-            "27": 0.28844,
-            "28": 0.28873,
-            "29": 0.28741,
-            "30": 0.28737,
-            "31": 0.28905,
-            "32": 0.28701,
-            "33": 0.28706,
-            "34": 0.28739,
-            "35": 0.28701,
-            "36": 0.28751,
-            "37": 0.28826,
-            "38": 0.28792,
-            "39": 0.28663,
-            "40": 0.28805,
-            "41": 0.28776,
-            "42": 0.28855,
-            "43": 0.28777,
-            "44": 0.28801,
-            "45": 0.2885,
-            "46": 0.28907,
-            "47": 0.28755,
-            "48": 0.28719,
-            "49": 0.28878,
-            "50": 0.28677
-        }
-    }
-}
\ No newline at end of file
+{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.8028, "5": 10.85317, "10": 10.79253, "15": 10.81762, "20": 10.74263, "25": 10.58726, "30": 10.43425, "35": 10.35057, "40": 10.18634, "45": 9.95891, "50": 10.0194}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 31321.0, "5": 36637.0, "10": 30318.0, "15": 35096.0, "20": 33013.0, "25": 30677.0, "30": 32775.0, "35": 34420.0, "40": 36074.0, "45": 35624.0, "50": 39159.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 863505920.0, "5": 863502848.0, "10": 863507456.0, "15": 863503872.0, "20": 863504896.0, "25": 863506944.0, "30": 863508992.0, "35": 863511040.0, "40": 863508480.0, "45": 863503872.0, "50": 863505920.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 2741182976.0, "5": 2984047104.0, "10": 2984047104.0, "15": 2984047104.0, "20": 2985508864.0, "25": 2985508864.0, "30": 2985508864.0, "35": 2986774016.0, "40": 2988336640.0, "45": 2988336640.0, "50": 2990856704.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 30.77131, "5": 0.27562, "10": 0.2823, "15": 0.2732, "20": 0.2737, "25": 0.28195, "30": 0.27083, "35": 0.27437, "40": 0.26787, "45": 0.26819, "50": 0.28362}}}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts_etp1_ep4/golden_values_lts.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts_etp1_ep4/golden_values_lts.json
deleted file mode 100644
index 00b01bf0048..00000000000
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts_etp1_ep4/golden_values_lts.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.78519,
-            "2": 10.81141,
-            "3": 10.79553,
-            "4": 10.78363,
-            "5": 10.83004,
-            "6": 10.8417,
-            "7": 10.81276,
-            "8": 10.81576,
-            "9": 10.82408,
-            "10": 10.75834,
-            "11": 10.86449,
-            "12": 10.83694,
-            "13": 10.84627,
-            "14": 10.85637,
-            "15": 10.79099,
-            "16": 10.78206,
-            "17": 10.73911,
-            "18": 10.77969,
-            "19": 10.77703,
-            "20": 10.69044,
-            "21": 10.66086,
-            "22": 10.51537,
-            "23": 10.68288,
-            "24": 10.54469,
-            "25": 10.49929,
-            "26": 10.5582,
-            "27": 10.57541,
-            "28": 10.54334,
-            "29": 10.55412,
-            "30": 10.31857,
-            "31": 10.06911,
-            "32": 10.42007,
-            "33": 10.42435,
-            "34": 10.16897,
-            "35": 10.22944,
-            "36": 10.19358,
-            "37": 10.29976,
-            "38": 10.1465,
-            "39": 10.37282,
-            "40": 10.04287,
-            "41": 10.09714,
-            "42": 10.17339,
-            "43": 9.79317,
-            "44": 9.91871,
-            "45": 9.79389,
-            "46": 9.77586,
-            "47": 10.10721,
-            "48": 9.81704,
-            "49": 9.48887,
-            "50": 9.87988
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 26764.0,
-            "2": 29274.0,
-            "3": 28827.0,
-            "4": 27434.0,
-            "5": 31796.0,
-            "6": 32589.0,
-            "7": 31434.0,
-            "8": 27373.0,
-            "9": 30359.0,
-            "10": 25775.0,
-            "11": 33420.0,
-            "12": 30835.0,
-            "13": 31910.0,
-            "14": 33275.0,
-            "15": 30931.0,
-            "16": 31713.0,
-            "17": 31263.0,
-            "18": 30574.0,
-            "19": 31200.0,
-            "20": 28330.0,
-            "21": 28769.0,
-            "22": 27643.0,
-            "23": 34373.0,
-            "24": 28585.0,
-            "25": 27693.0,
-            "26": 30668.0,
-            "27": 31536.0,
-            "28": 33620.0,
-            "29": 34457.0,
-            "30": 29810.0,
-            "31": 27999.0,
-            "32": 33871.0,
-            "33": 35836.0,
-            "34": 30770.0,
-            "35": 32783.0,
-            "36": 33452.0,
-            "37": 36437.0,
-            "38": 34114.0,
-            "39": 37433.0,
-            "40": 35063.0,
-            "41": 34239.0,
-            "42": 36041.0,
-            "43": 33976.0,
-            "44": 33303.0,
-            "45": 35148.0,
-            "46": 36112.0,
-            "47": 38996.0,
-            "48": 34779.0,
-            "49": 34913.0,
-            "50": 38547.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1364012544.0,
-            "2": 1364020736.0,
-            "3": 1364009472.0,
-            "4": 1364020224.0,
-            "5": 1364007424.0,
-            "6": 1364005888.0,
-            "7": 1364011520.0,
-            "8": 1364006912.0,
-            "9": 1364019712.0,
-            "10": 1364001280.0,
-            "11": 1364009984.0,
-            "12": 1364022784.0,
-            "13": 1364008960.0,
-            "14": 1364003840.0,
-            "15": 1364007936.0,
-            "16": 1363994112.0,
-            "17": 1363993088.0,
-            "18": 1364017152.0,
-            "19": 1364009984.0,
-            "20": 1363999744.0,
-            "21": 1364006400.0,
-            "22": 1363983360.0,
-            "23": 1363997696.0,
-            "24": 1364001792.0,
-            "25": 1363992576.0,
-            "26": 1364003328.0,
-            "27": 1363998720.0,
-            "28": 1363982336.0,
-            "29": 1363995136.0,
-            "30": 1363979776.0,
-            "31": 1363991552.0,
-            "32": 1363983872.0,
-            "33": 1363983872.0,
-            "34": 1363997184.0,
-            "35": 1363991040.0,
-            "36": 1363984896.0,
-            "37": 1363998720.0,
-            "38": 1363991040.0,
-            "39": 1363976704.0,
-            "40": 1363971584.0,
-            "41": 1363983872.0,
-            "42": 1363977728.0,
-            "43": 1363967488.0,
-            "44": 1363974656.0,
-            "45": 1363974144.0,
-            "46": 1363965952.0,
-            "47": 1363952640.0,
-            "48": 1363951616.0,
-            "49": 1363960320.0,
-            "50": 1363956736.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 3404911104.0,
-            "2": 3972516352.0,
-            "3": 3976973312.0,
-            "4": 3976973312.0,
-            "5": 3976973312.0,
-            "6": 3976973312.0,
-            "7": 3976973312.0,
-            "8": 3976973312.0,
-            "9": 3976973312.0,
-            "10": 3976973312.0,
-            "11": 3976973312.0,
-            "12": 3976975872.0,
-            "13": 3976975872.0,
-            "14": 3976975872.0,
-            "15": 3976975872.0,
-            "16": 3976975872.0,
-            "17": 3976975872.0,
-            "18": 3976975872.0,
-            "19": 3976975872.0,
-            "20": 3976975872.0,
-            "21": 3976975872.0,
-            "22": 3976975872.0,
-            "23": 3976975872.0,
-            "24": 3976975872.0,
-            "25": 3976975872.0,
-            "26": 3976975872.0,
-            "27": 3976975872.0,
-            "28": 3976975872.0,
-            "29": 3976975872.0,
-            "30": 3976975872.0,
-            "31": 3976975872.0,
-            "32": 3976975872.0,
-            "33": 3976975872.0,
-            "34": 3976975872.0,
-            "35": 3976975872.0,
-            "36": 3976975872.0,
-            "37": 3976975872.0,
-            "38": 3976975872.0,
-            "39": 3976975872.0,
-            "40": 3976975872.0,
-            "41": 3976975872.0,
-            "42": 3976975872.0,
-            "43": 3976975872.0,
-            "44": 3976975872.0,
-            "45": 3976975872.0,
-            "46": 3976975872.0,
-            "47": 3976975872.0,
-            "48": 3976975872.0,
-            "49": 3976975872.0,
-            "50": 3976975872.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 7.13471,
-            "2": 0.38969,
-            "3": 0.3103,
-            "4": 0.30799,
-            "5": 0.30603,
-            "6": 0.30643,
-            "7": 0.30599,
-            "8": 0.30393,
-            "9": 0.31467,
-            "10": 0.306,
-            "11": 0.30312,
-            "12": 0.30329,
-            "13": 0.30269,
-            "14": 0.30471,
-            "15": 0.30849,
-            "16": 0.30377,
-            "17": 0.30261,
-            "18": 0.3009,
-            "19": 0.30087,
-            "20": 0.30146,
-            "21": 0.30129,
-            "22": 0.30052,
-            "23": 0.30116,
-            "24": 0.2999,
-            "25": 0.30045,
-            "26": 0.30179,
-            "27": 0.30152,
-            "28": 0.30154,
-            "29": 0.30096,
-            "30": 0.31152,
-            "31": 0.31066,
-            "32": 0.30418,
-            "33": 0.35425,
-            "34": 0.30235,
-            "35": 0.30245,
-            "36": 0.302,
-            "37": 0.31083,
-            "38": 0.30275,
-            "39": 0.30205,
-            "40": 0.30289,
-            "41": 0.30375,
-            "42": 0.30405,
-            "43": 0.30174,
-            "44": 0.30207,
-            "45": 0.303,
-            "46": 0.30186,
-            "47": 0.30147,
-            "48": 0.30311,
-            "49": 0.30205,
-            "50": 0.3031
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts_etp1_ep4/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts_etp1_ep4/golden_values_lts_dgx_a100.json
index 9114b4bb385..e640ba5b102 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts_etp1_ep4/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts_etp1_ep4/golden_values_lts_dgx_a100.json
@@ -1,287 +1 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.78519,
-            "2": 10.81141,
-            "3": 10.79553,
-            "4": 10.78363,
-            "5": 10.83004,
-            "6": 10.8417,
-            "7": 10.81276,
-            "8": 10.81576,
-            "9": 10.82408,
-            "10": 10.75834,
-            "11": 10.86449,
-            "12": 10.83694,
-            "13": 10.84627,
-            "14": 10.85637,
-            "15": 10.79099,
-            "16": 10.78206,
-            "17": 10.73911,
-            "18": 10.77969,
-            "19": 10.77703,
-            "20": 10.69044,
-            "21": 10.66086,
-            "22": 10.51537,
-            "23": 10.68288,
-            "24": 10.54469,
-            "25": 10.49929,
-            "26": 10.5582,
-            "27": 10.57541,
-            "28": 10.54334,
-            "29": 10.55412,
-            "30": 10.31857,
-            "31": 10.06911,
-            "32": 10.42007,
-            "33": 10.42435,
-            "34": 10.16897,
-            "35": 10.22944,
-            "36": 10.19358,
-            "37": 10.29976,
-            "38": 10.1465,
-            "39": 10.37282,
-            "40": 10.04287,
-            "41": 10.09714,
-            "42": 10.17339,
-            "43": 9.79317,
-            "44": 9.91871,
-            "45": 9.79389,
-            "46": 9.77586,
-            "47": 10.10721,
-            "48": 9.81704,
-            "49": 9.48887,
-            "50": 9.87988
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 26764.0,
-            "2": 29274.0,
-            "3": 28827.0,
-            "4": 27434.0,
-            "5": 31796.0,
-            "6": 32589.0,
-            "7": 31434.0,
-            "8": 27373.0,
-            "9": 30359.0,
-            "10": 25775.0,
-            "11": 33420.0,
-            "12": 30835.0,
-            "13": 31910.0,
-            "14": 33275.0,
-            "15": 30931.0,
-            "16": 31713.0,
-            "17": 31263.0,
-            "18": 30574.0,
-            "19": 31200.0,
-            "20": 28330.0,
-            "21": 28769.0,
-            "22": 27643.0,
-            "23": 34373.0,
-            "24": 28585.0,
-            "25": 27693.0,
-            "26": 30668.0,
-            "27": 31536.0,
-            "28": 33620.0,
-            "29": 34457.0,
-            "30": 29810.0,
-            "31": 27999.0,
-            "32": 33871.0,
-            "33": 35836.0,
-            "34": 30770.0,
-            "35": 32783.0,
-            "36": 33452.0,
-            "37": 36437.0,
-            "38": 34114.0,
-            "39": 37433.0,
-            "40": 35063.0,
-            "41": 34239.0,
-            "42": 36041.0,
-            "43": 33976.0,
-            "44": 33303.0,
-            "45": 35148.0,
-            "46": 36112.0,
-            "47": 38996.0,
-            "48": 34779.0,
-            "49": 34913.0,
-            "50": 38547.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1364012544.0,
-            "2": 1364020736.0,
-            "3": 1364009472.0,
-            "4": 1364020224.0,
-            "5": 1364007424.0,
-            "6": 1364005888.0,
-            "7": 1364011520.0,
-            "8": 1364006912.0,
-            "9": 1364019712.0,
-            "10": 1364001280.0,
-            "11": 1364009984.0,
-            "12": 1364022784.0,
-            "13": 1364008960.0,
-            "14": 1364003840.0,
-            "15": 1364007936.0,
-            "16": 1363994112.0,
-            "17": 1363993088.0,
-            "18": 1364017152.0,
-            "19": 1364009984.0,
-            "20": 1363999744.0,
-            "21": 1364006400.0,
-            "22": 1363983360.0,
-            "23": 1363997696.0,
-            "24": 1364001792.0,
-            "25": 1363992576.0,
-            "26": 1364003328.0,
-            "27": 1363998720.0,
-            "28": 1363982336.0,
-            "29": 1363995136.0,
-            "30": 1363979776.0,
-            "31": 1363991552.0,
-            "32": 1363983872.0,
-            "33": 1363983872.0,
-            "34": 1363997184.0,
-            "35": 1363991040.0,
-            "36": 1363984896.0,
-            "37": 1363998720.0,
-            "38": 1363991040.0,
-            "39": 1363976704.0,
-            "40": 1363971584.0,
-            "41": 1363983872.0,
-            "42": 1363977728.0,
-            "43": 1363967488.0,
-            "44": 1363974656.0,
-            "45": 1363974144.0,
-            "46": 1363965952.0,
-            "47": 1363952640.0,
-            "48": 1363951616.0,
-            "49": 1363960320.0,
-            "50": 1363956736.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 3404911104.0,
-            "2": 3972516352.0,
-            "3": 3976973312.0,
-            "4": 3976973312.0,
-            "5": 3976973312.0,
-            "6": 3976973312.0,
-            "7": 3976973312.0,
-            "8": 3976973312.0,
-            "9": 3976973312.0,
-            "10": 3976973312.0,
-            "11": 3976973312.0,
-            "12": 3976975872.0,
-            "13": 3976975872.0,
-            "14": 3976975872.0,
-            "15": 3976975872.0,
-            "16": 3976975872.0,
-            "17": 3976975872.0,
-            "18": 3976975872.0,
-            "19": 3976975872.0,
-            "20": 3976975872.0,
-            "21": 3976975872.0,
-            "22": 3976975872.0,
-            "23": 3976975872.0,
-            "24": 3976975872.0,
-            "25": 3976975872.0,
-            "26": 3976975872.0,
-            "27": 3976975872.0,
-            "28": 3976975872.0,
-            "29": 3976975872.0,
-            "30": 3976975872.0,
-            "31": 3976975872.0,
-            "32": 3976975872.0,
-            "33": 3976975872.0,
-            "34": 3976975872.0,
-            "35": 3976975872.0,
-            "36": 3976975872.0,
-            "37": 3976975872.0,
-            "38": 3976975872.0,
-            "39": 3976975872.0,
-            "40": 3976975872.0,
-            "41": 3976975872.0,
-            "42": 3976975872.0,
-            "43": 3976975872.0,
-            "44": 3976975872.0,
-            "45": 3976975872.0,
-            "46": 3976975872.0,
-            "47": 3976975872.0,
-            "48": 3976975872.0,
-            "49": 3976975872.0,
-            "50": 3976975872.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 7.62035,
-            "2": 0.36752,
-            "3": 0.30562,
-            "4": 0.29876,
-            "5": 0.298,
-            "6": 0.29743,
-            "7": 0.29729,
-            "8": 0.2967,
-            "9": 0.29751,
-            "10": 0.29912,
-            "11": 0.29575,
-            "12": 0.29589,
-            "13": 0.29696,
-            "14": 0.29898,
-            "15": 0.30053,
-            "16": 0.30093,
-            "17": 0.2977,
-            "18": 0.2973,
-            "19": 0.29596,
-            "20": 0.29757,
-            "21": 0.2967,
-            "22": 0.29963,
-            "23": 0.29707,
-            "24": 0.29748,
-            "25": 0.29701,
-            "26": 0.29838,
-            "27": 0.29889,
-            "28": 0.29962,
-            "29": 0.30399,
-            "30": 0.30932,
-            "31": 0.30553,
-            "32": 0.29765,
-            "33": 0.30499,
-            "34": 0.29754,
-            "35": 0.29747,
-            "36": 0.29801,
-            "37": 0.30768,
-            "38": 0.29693,
-            "39": 0.29912,
-            "40": 0.299,
-            "41": 0.2982,
-            "42": 0.37256,
-            "43": 0.29865,
-            "44": 0.29774,
-            "45": 0.29961,
-            "46": 0.2988,
-            "47": 0.30454,
-            "48": 0.30466,
-            "49": 0.30093,
-            "50": 0.29883
-        }
-    }
-}
\ No newline at end of file
+{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.8174, "5": 10.86097, "10": 10.80463, "15": 10.79539, "20": 10.71135, "25": 10.51962, "30": 10.33346, "35": 10.22683, "40": 10.041, "45": 9.7722, "50": 9.86932}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 26535.0, "5": 31546.0, "10": 25541.0, "15": 30463.0, "20": 28331.0, "25": 27263.0, "30": 29642.0, "35": 32022.0, "40": 34911.0, "45": 35017.0, "50": 38975.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1356172288.0, "5": 1356170240.0, "10": 1356160000.0, "15": 1356180480.0, "20": 1356170240.0, "25": 1356182528.0, "30": 1356198400.0, "35": 1356182016.0, "40": 1356156416.0, "45": 1356140544.0, "50": 1356098560.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 3163797504.0, "5": 3751905792.0, "10": 3759570944.0, "15": 3759570944.0, "20": 3759570944.0, "25": 3759570944.0, "30": 3770054144.0, "35": 3770054144.0, "40": 3770054144.0, "45": 3770054144.0, "50": 3770054144.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 25.66526, "5": 0.29505, "10": 0.28776, "15": 0.28997, "20": 0.28516, "25": 0.29206, "30": 0.2932, "35": 0.29941, "40": 0.30125, "45": 0.29922, "50": 0.30133}}}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4/golden_values_dev.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4/golden_values_dev.json
deleted file mode 100644
index 390f4b8f13f..00000000000
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4/golden_values_dev.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.77518,
-            "2": 10.78038,
-            "3": 10.79302,
-            "4": 10.74107,
-            "5": 10.82013,
-            "6": 10.82951,
-            "7": 10.7953,
-            "8": 10.78263,
-            "9": 10.79278,
-            "10": 10.7446,
-            "11": 10.85147,
-            "12": 10.82613,
-            "13": 10.82825,
-            "14": 10.85504,
-            "15": 10.75536,
-            "16": 10.75777,
-            "17": 10.72319,
-            "18": 10.76274,
-            "19": 10.75075,
-            "20": 10.66587,
-            "21": 10.6419,
-            "22": 10.47523,
-            "23": 10.66959,
-            "24": 10.54157,
-            "25": 10.4825,
-            "26": 10.55255,
-            "27": 10.57459,
-            "28": 10.55159,
-            "29": 10.5668,
-            "30": 10.31134,
-            "31": 10.01921,
-            "32": 10.42655,
-            "33": 10.42294,
-            "34": 10.14739,
-            "35": 10.21574,
-            "36": 10.15811,
-            "37": 10.30279,
-            "38": 10.14031,
-            "39": 10.36301,
-            "40": 10.02669,
-            "41": 10.07635,
-            "42": 10.16156,
-            "43": 9.74374,
-            "44": 9.88962,
-            "45": 9.75874,
-            "46": 9.73618,
-            "47": 10.0844,
-            "48": 9.78532,
-            "49": 9.45072,
-            "50": 9.85634
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 27105.0,
-            "2": 28791.0,
-            "3": 29282.0,
-            "4": 27583.0,
-            "5": 31595.0,
-            "6": 32831.0,
-            "7": 31023.0,
-            "8": 27107.0,
-            "9": 30780.0,
-            "10": 25505.0,
-            "11": 33684.0,
-            "12": 30235.0,
-            "13": 32960.0,
-            "14": 32880.0,
-            "15": 30405.0,
-            "16": 32455.0,
-            "17": 30933.0,
-            "18": 30623.0,
-            "19": 30803.0,
-            "20": 28593.0,
-            "21": 29002.0,
-            "22": 27030.0,
-            "23": 34463.0,
-            "24": 29154.0,
-            "25": 27827.0,
-            "26": 31119.0,
-            "27": 32108.0,
-            "28": 33412.0,
-            "29": 34737.0,
-            "30": 30465.0,
-            "31": 28775.0,
-            "32": 33115.0,
-            "33": 34745.0,
-            "34": 30785.0,
-            "35": 32116.0,
-            "36": 33968.0,
-            "37": 36757.0,
-            "38": 34150.0,
-            "39": 37240.0,
-            "40": 35353.0,
-            "41": 34638.0,
-            "42": 36703.0,
-            "43": 34601.0,
-            "44": 33783.0,
-            "45": 35388.0,
-            "46": 35484.0,
-            "47": 40591.0,
-            "48": 36671.0,
-            "49": 36174.0,
-            "50": 38231.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1566715392.0,
-            "2": 1564834816.0,
-            "3": 1564860416.0,
-            "4": 1564313088.0,
-            "5": 1564120576.0,
-            "6": 1565958144.0,
-            "7": 1564614656.0,
-            "8": 1563520512.0,
-            "9": 1563819008.0,
-            "10": 1563891712.0,
-            "11": 1563430400.0,
-            "12": 1564003840.0,
-            "13": 1564584960.0,
-            "14": 1566458880.0,
-            "15": 1564517376.0,
-            "16": 1565116928.0,
-            "17": 1564445184.0,
-            "18": 1566232576.0,
-            "19": 1565403136.0,
-            "20": 1565009408.0,
-            "21": 1568513024.0,
-            "22": 1564807168.0,
-            "23": 1567177216.0,
-            "24": 1564333568.0,
-            "25": 1566986240.0,
-            "26": 1565763584.0,
-            "27": 1563955200.0,
-            "28": 1564926464.0,
-            "29": 1563690496.0,
-            "30": 1563930624.0,
-            "31": 1564862464.0,
-            "32": 1564173824.0,
-            "33": 1565158400.0,
-            "34": 1564895232.0,
-            "35": 1564069376.0,
-            "36": 1565074944.0,
-            "37": 1563990016.0,
-            "38": 1565044736.0,
-            "39": 1563646976.0,
-            "40": 1565059072.0,
-            "41": 1566659584.0,
-            "42": 1564623360.0,
-            "43": 1563140608.0,
-            "44": 1565100032.0,
-            "45": 1567626752.0,
-            "46": 1565074944.0,
-            "47": 1564590080.0,
-            "48": 1564522496.0,
-            "49": 1563403776.0,
-            "50": 1564356096.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 3737165824.0,
-            "2": 4315653632.0,
-            "3": 4318730752.0,
-            "4": 4318730752.0,
-            "5": 4318730752.0,
-            "6": 4346984960.0,
-            "7": 4346984960.0,
-            "8": 4346984960.0,
-            "9": 4346984960.0,
-            "10": 4346984960.0,
-            "11": 4346984960.0,
-            "12": 4346984960.0,
-            "13": 4346984960.0,
-            "14": 4346984960.0,
-            "15": 4346984960.0,
-            "16": 4346984960.0,
-            "17": 4346984960.0,
-            "18": 4346984960.0,
-            "19": 4346984960.0,
-            "20": 4346984960.0,
-            "21": 4346984960.0,
-            "22": 4346984960.0,
-            "23": 4346984960.0,
-            "24": 4346984960.0,
-            "25": 4346984960.0,
-            "26": 4346984960.0,
-            "27": 4346984960.0,
-            "28": 4346984960.0,
-            "29": 4346984960.0,
-            "30": 4346984960.0,
-            "31": 4346984960.0,
-            "32": 4346984960.0,
-            "33": 4346984960.0,
-            "34": 4346984960.0,
-            "35": 4346984960.0,
-            "36": 4346984960.0,
-            "37": 4346984960.0,
-            "38": 4346984960.0,
-            "39": 4346984960.0,
-            "40": 4346984960.0,
-            "41": 4346984960.0,
-            "42": 4346984960.0,
-            "43": 4346984960.0,
-            "44": 4346984960.0,
-            "45": 4346984960.0,
-            "46": 4346984960.0,
-            "47": 4346984960.0,
-            "48": 4346984960.0,
-            "49": 4346984960.0,
-            "50": 4346984960.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 19.53588,
-            "2": 0.42103,
-            "3": 0.37171,
-            "4": 0.32787,
-            "5": 0.32504,
-            "6": 0.29089,
-            "7": 0.29215,
-            "8": 0.28165,
-            "9": 0.287,
-            "10": 0.28805,
-            "11": 0.27222,
-            "12": 0.2768,
-            "13": 0.26942,
-            "14": 0.27369,
-            "15": 0.27287,
-            "16": 0.26492,
-            "17": 0.27231,
-            "18": 0.26408,
-            "19": 0.26251,
-            "20": 0.26769,
-            "21": 0.26539,
-            "22": 0.27591,
-            "23": 0.26191,
-            "24": 0.27512,
-            "25": 0.26855,
-            "26": 0.27323,
-            "27": 0.26526,
-            "28": 0.26748,
-            "29": 0.27054,
-            "30": 0.28834,
-            "31": 0.27784,
-            "32": 0.27006,
-            "33": 0.29268,
-            "34": 0.27918,
-            "35": 0.29002,
-            "36": 0.28672,
-            "37": 0.28354,
-            "38": 0.27713,
-            "39": 0.28145,
-            "40": 0.62986,
-            "41": 0.2715,
-            "42": 0.2633,
-            "43": 0.27508,
-            "44": 0.28176,
-            "45": 0.26745,
-            "46": 0.27357,
-            "47": 0.26873,
-            "48": 0.26907,
-            "49": 0.26843,
-            "50": 0.63541
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_lts.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_lts.json
deleted file mode 100644
index 779e572fdd4..00000000000
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_lts.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.79175,
-            "2": 10.80907,
-            "3": 10.81011,
-            "4": 10.78146,
-            "5": 10.82288,
-            "6": 10.84057,
-            "7": 10.81192,
-            "8": 10.80005,
-            "9": 10.81667,
-            "10": 10.7688,
-            "11": 10.8618,
-            "12": 10.84042,
-            "13": 10.84452,
-            "14": 10.86421,
-            "15": 10.79157,
-            "16": 10.78199,
-            "17": 10.75122,
-            "18": 10.79446,
-            "19": 10.79523,
-            "20": 10.71001,
-            "21": 10.68811,
-            "22": 10.53736,
-            "23": 10.7066,
-            "24": 10.58865,
-            "25": 10.54662,
-            "26": 10.59492,
-            "27": 10.62142,
-            "28": 10.5969,
-            "29": 10.60036,
-            "30": 10.39407,
-            "31": 10.12951,
-            "32": 10.49684,
-            "33": 10.48779,
-            "34": 10.24347,
-            "35": 10.30461,
-            "36": 10.26056,
-            "37": 10.38859,
-            "38": 10.24848,
-            "39": 10.43799,
-            "40": 10.13303,
-            "41": 10.18651,
-            "42": 10.25823,
-            "43": 9.892,
-            "44": 10.02576,
-            "45": 9.90015,
-            "46": 9.88387,
-            "47": 10.19565,
-            "48": 9.91255,
-            "49": 9.60147,
-            "50": 9.97874
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 5656.0,
-            "2": 6018.0,
-            "3": 5790.0,
-            "4": 5941.0,
-            "5": 6476.0,
-            "6": 6653.0,
-            "7": 6287.0,
-            "8": 5875.0,
-            "9": 6239.0,
-            "10": 5453.0,
-            "11": 6936.0,
-            "12": 6711.0,
-            "13": 6655.0,
-            "14": 6814.0,
-            "15": 6233.0,
-            "16": 6533.0,
-            "17": 6397.0,
-            "18": 6112.0,
-            "19": 6678.0,
-            "20": 5837.0,
-            "21": 6403.0,
-            "22": 5715.0,
-            "23": 6744.0,
-            "24": 6051.0,
-            "25": 5811.0,
-            "26": 6104.0,
-            "27": 6484.0,
-            "28": 6884.0,
-            "29": 7253.0,
-            "30": 6047.0,
-            "31": 5593.0,
-            "32": 6625.0,
-            "33": 7054.0,
-            "34": 6104.0,
-            "35": 6712.0,
-            "36": 6684.0,
-            "37": 7523.0,
-            "38": 7273.0,
-            "39": 7620.0,
-            "40": 7062.0,
-            "41": 6895.0,
-            "42": 7426.0,
-            "43": 6713.0,
-            "44": 6664.0,
-            "45": 6681.0,
-            "46": 6923.0,
-            "47": 7705.0,
-            "48": 7248.0,
-            "49": 7331.0,
-            "50": 7527.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 462408192.0,
-            "2": 462406144.0,
-            "3": 462409728.0,
-            "4": 462406144.0,
-            "5": 462407680.0,
-            "6": 462408192.0,
-            "7": 462410752.0,
-            "8": 462410752.0,
-            "9": 462407168.0,
-            "10": 462410240.0,
-            "11": 462408192.0,
-            "12": 462408192.0,
-            "13": 462408704.0,
-            "14": 462409728.0,
-            "15": 462409728.0,
-            "16": 462407168.0,
-            "17": 462408704.0,
-            "18": 462408704.0,
-            "19": 462408704.0,
-            "20": 462408704.0,
-            "21": 462406144.0,
-            "22": 462412800.0,
-            "23": 462409216.0,
-            "24": 462408704.0,
-            "25": 462406144.0,
-            "26": 462410240.0,
-            "27": 462405120.0,
-            "28": 462408192.0,
-            "29": 462407168.0,
-            "30": 462406144.0,
-            "31": 462413312.0,
-            "32": 462408704.0,
-            "33": 462409216.0,
-            "34": 462406144.0,
-            "35": 462410240.0,
-            "36": 462407168.0,
-            "37": 462409728.0,
-            "38": 462408192.0,
-            "39": 462408192.0,
-            "40": 462407680.0,
-            "41": 462411264.0,
-            "42": 462409728.0,
-            "43": 462411264.0,
-            "44": 462407680.0,
-            "45": 462408704.0,
-            "46": 462410752.0,
-            "47": 462407680.0,
-            "48": 462408192.0,
-            "49": 462409728.0,
-            "50": 462409216.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1033451008.0,
-            "2": 1197371392.0,
-            "3": 1197371392.0,
-            "4": 1197880320.0,
-            "5": 1197880320.0,
-            "6": 1197880320.0,
-            "7": 1197880320.0,
-            "8": 1197880320.0,
-            "9": 1197965824.0,
-            "10": 1197965824.0,
-            "11": 1197965824.0,
-            "12": 1197965824.0,
-            "13": 1197965824.0,
-            "14": 1197965824.0,
-            "15": 1197965824.0,
-            "16": 1197965824.0,
-            "17": 1197965824.0,
-            "18": 1197965824.0,
-            "19": 1197965824.0,
-            "20": 1197965824.0,
-            "21": 1197965824.0,
-            "22": 1198112768.0,
-            "23": 1198112768.0,
-            "24": 1198112768.0,
-            "25": 1198112768.0,
-            "26": 1198112768.0,
-            "27": 1198112768.0,
-            "28": 1198112768.0,
-            "29": 1198112768.0,
-            "30": 1198112768.0,
-            "31": 1198112768.0,
-            "32": 1198112768.0,
-            "33": 1198112768.0,
-            "34": 1198112768.0,
-            "35": 1198112768.0,
-            "36": 1198112768.0,
-            "37": 1198112768.0,
-            "38": 1198112768.0,
-            "39": 1198112768.0,
-            "40": 1198333440.0,
-            "41": 1198333440.0,
-            "42": 1198333440.0,
-            "43": 1198444032.0,
-            "44": 1198444032.0,
-            "45": 1198444032.0,
-            "46": 1198444032.0,
-            "47": 1198444032.0,
-            "48": 1198444032.0,
-            "49": 1198444032.0,
-            "50": 1198444032.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 7.59918,
-            "2": 0.61895,
-            "3": 0.56068,
-            "4": 0.55629,
-            "5": 0.55614,
-            "6": 0.55955,
-            "7": 0.55763,
-            "8": 0.55984,
-            "9": 0.55,
-            "10": 0.5499,
-            "11": 0.55012,
-            "12": 0.54844,
-            "13": 0.55795,
-            "14": 0.54851,
-            "15": 0.5504,
-            "16": 0.55068,
-            "17": 0.54716,
-            "18": 0.55003,
-            "19": 0.54872,
-            "20": 0.54841,
-            "21": 0.55411,
-            "22": 0.55114,
-            "23": 0.54838,
-            "24": 0.54877,
-            "25": 0.54811,
-            "26": 0.54808,
-            "27": 0.54959,
-            "28": 0.55486,
-            "29": 0.55193,
-            "30": 0.54966,
-            "31": 0.54964,
-            "32": 0.55053,
-            "33": 0.55025,
-            "34": 0.55174,
-            "35": 0.55,
-            "36": 0.55117,
-            "37": 0.55004,
-            "38": 0.55089,
-            "39": 0.54857,
-            "40": 0.55402,
-            "41": 0.55168,
-            "42": 0.54916,
-            "43": 0.55132,
-            "44": 0.54941,
-            "45": 0.54876,
-            "46": 0.55724,
-            "47": 0.54849,
-            "48": 0.55007,
-            "49": 0.55215,
-            "50": 0.54754
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_lts_dgx_a100.json
index a186febffbe..7aae0170419 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_lts_dgx_a100.json
@@ -1,287 +1 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.79175,
-            "2": 10.80907,
-            "3": 10.81011,
-            "4": 10.78146,
-            "5": 10.82288,
-            "6": 10.84057,
-            "7": 10.81192,
-            "8": 10.80005,
-            "9": 10.81667,
-            "10": 10.7688,
-            "11": 10.8618,
-            "12": 10.84042,
-            "13": 10.84452,
-            "14": 10.86421,
-            "15": 10.79157,
-            "16": 10.78199,
-            "17": 10.75122,
-            "18": 10.79446,
-            "19": 10.79523,
-            "20": 10.71001,
-            "21": 10.68811,
-            "22": 10.53736,
-            "23": 10.7066,
-            "24": 10.58865,
-            "25": 10.54662,
-            "26": 10.59492,
-            "27": 10.62142,
-            "28": 10.5969,
-            "29": 10.60036,
-            "30": 10.39407,
-            "31": 10.12951,
-            "32": 10.49684,
-            "33": 10.48779,
-            "34": 10.24347,
-            "35": 10.30461,
-            "36": 10.26056,
-            "37": 10.38859,
-            "38": 10.24848,
-            "39": 10.43799,
-            "40": 10.13303,
-            "41": 10.18651,
-            "42": 10.25823,
-            "43": 9.892,
-            "44": 10.02576,
-            "45": 9.90015,
-            "46": 9.88387,
-            "47": 10.19565,
-            "48": 9.91255,
-            "49": 9.60147,
-            "50": 9.97874
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 5656.0,
-            "2": 6018.0,
-            "3": 5790.0,
-            "4": 5941.0,
-            "5": 6476.0,
-            "6": 6653.0,
-            "7": 6287.0,
-            "8": 5875.0,
-            "9": 6239.0,
-            "10": 5453.0,
-            "11": 6936.0,
-            "12": 6711.0,
-            "13": 6655.0,
-            "14": 6814.0,
-            "15": 6233.0,
-            "16": 6533.0,
-            "17": 6397.0,
-            "18": 6112.0,
-            "19": 6678.0,
-            "20": 5837.0,
-            "21": 6403.0,
-            "22": 5715.0,
-            "23": 6744.0,
-            "24": 6051.0,
-            "25": 5811.0,
-            "26": 6104.0,
-            "27": 6484.0,
-            "28": 6884.0,
-            "29": 7253.0,
-            "30": 6047.0,
-            "31": 5593.0,
-            "32": 6625.0,
-            "33": 7054.0,
-            "34": 6104.0,
-            "35": 6712.0,
-            "36": 6684.0,
-            "37": 7523.0,
-            "38": 7273.0,
-            "39": 7620.0,
-            "40": 7062.0,
-            "41": 6895.0,
-            "42": 7426.0,
-            "43": 6713.0,
-            "44": 6664.0,
-            "45": 6681.0,
-            "46": 6923.0,
-            "47": 7705.0,
-            "48": 7248.0,
-            "49": 7331.0,
-            "50": 7527.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 462408192.0,
-            "2": 462406144.0,
-            "3": 462409728.0,
-            "4": 462406144.0,
-            "5": 462407680.0,
-            "6": 462408192.0,
-            "7": 462410752.0,
-            "8": 462410752.0,
-            "9": 462407168.0,
-            "10": 462410240.0,
-            "11": 462408192.0,
-            "12": 462408192.0,
-            "13": 462408704.0,
-            "14": 462409728.0,
-            "15": 462409728.0,
-            "16": 462407168.0,
-            "17": 462408704.0,
-            "18": 462408704.0,
-            "19": 462408704.0,
-            "20": 462408704.0,
-            "21": 462406144.0,
-            "22": 462412800.0,
-            "23": 462409216.0,
-            "24": 462408704.0,
-            "25": 462406144.0,
-            "26": 462410240.0,
-            "27": 462405120.0,
-            "28": 462408192.0,
-            "29": 462407168.0,
-            "30": 462406144.0,
-            "31": 462413312.0,
-            "32": 462408704.0,
-            "33": 462409216.0,
-            "34": 462406144.0,
-            "35": 462410240.0,
-            "36": 462407168.0,
-            "37": 462409728.0,
-            "38": 462408192.0,
-            "39": 462408192.0,
-            "40": 462407680.0,
-            "41": 462411264.0,
-            "42": 462409728.0,
-            "43": 462411264.0,
-            "44": 462407680.0,
-            "45": 462408704.0,
-            "46": 462410752.0,
-            "47": 462407680.0,
-            "48": 462408192.0,
-            "49": 462409728.0,
-            "50": 462409216.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1033451008.0,
-            "2": 1197371392.0,
-            "3": 1197371392.0,
-            "4": 1197880320.0,
-            "5": 1197880320.0,
-            "6": 1197880320.0,
-            "7": 1197880320.0,
-            "8": 1197880320.0,
-            "9": 1197965824.0,
-            "10": 1197965824.0,
-            "11": 1197965824.0,
-            "12": 1197965824.0,
-            "13": 1197965824.0,
-            "14": 1197965824.0,
-            "15": 1197965824.0,
-            "16": 1197965824.0,
-            "17": 1197965824.0,
-            "18": 1197965824.0,
-            "19": 1197965824.0,
-            "20": 1197965824.0,
-            "21": 1197965824.0,
-            "22": 1198112768.0,
-            "23": 1198112768.0,
-            "24": 1198112768.0,
-            "25": 1198112768.0,
-            "26": 1198112768.0,
-            "27": 1198112768.0,
-            "28": 1198112768.0,
-            "29": 1198112768.0,
-            "30": 1198112768.0,
-            "31": 1198112768.0,
-            "32": 1198112768.0,
-            "33": 1198112768.0,
-            "34": 1198112768.0,
-            "35": 1198112768.0,
-            "36": 1198112768.0,
-            "37": 1198112768.0,
-            "38": 1198112768.0,
-            "39": 1198112768.0,
-            "40": 1198333440.0,
-            "41": 1198333440.0,
-            "42": 1198333440.0,
-            "43": 1198444032.0,
-            "44": 1198444032.0,
-            "45": 1198444032.0,
-            "46": 1198444032.0,
-            "47": 1198444032.0,
-            "48": 1198444032.0,
-            "49": 1198444032.0,
-            "50": 1198444032.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 12.49228,
-            "2": 0.63481,
-            "3": 0.56951,
-            "4": 0.57807,
-            "5": 0.581,
-            "6": 0.58159,
-            "7": 0.5705,
-            "8": 0.56929,
-            "9": 0.56794,
-            "10": 0.56314,
-            "11": 0.57935,
-            "12": 0.57294,
-            "13": 0.56865,
-            "14": 0.56698,
-            "15": 0.56505,
-            "16": 0.56266,
-            "17": 0.56337,
-            "18": 0.56237,
-            "19": 0.56197,
-            "20": 0.5626,
-            "21": 0.5642,
-            "22": 0.56373,
-            "23": 0.57291,
-            "24": 0.56432,
-            "25": 0.56287,
-            "26": 0.56295,
-            "27": 0.56146,
-            "28": 0.56459,
-            "29": 0.56415,
-            "30": 0.56587,
-            "31": 0.5671,
-            "32": 0.56896,
-            "33": 0.57526,
-            "34": 0.57281,
-            "35": 0.57407,
-            "36": 0.57321,
-            "37": 0.57403,
-            "38": 0.57296,
-            "39": 0.57248,
-            "40": 0.57089,
-            "41": 0.57201,
-            "42": 0.5661,
-            "43": 0.57044,
-            "44": 0.56777,
-            "45": 0.56877,
-            "46": 0.57143,
-            "47": 0.57031,
-            "48": 0.56952,
-            "49": 0.57353,
-            "50": 0.56636
-        }
-    }
-}
\ No newline at end of file
+{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.79288, "5": 10.828, "10": 10.76831, "15": 10.79073, "20": 10.7066, "25": 10.54061, "30": 10.39485, "35": 10.30488, "40": 10.13079, "45": 9.90236, "50": 9.97938}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 5601.0, "5": 6601.0, "10": 5254.0, "15": 6250.0, "20": 5969.0, "25": 5816.0, "30": 6181.0, "35": 6751.0, "40": 6831.0, "45": 6881.0, "50": 7684.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 458211840.0, "5": 458212864.0, "10": 458214912.0, "15": 458215424.0, "20": 458212864.0, "25": 458215424.0, "30": 458211840.0, "35": 458214400.0, "40": 458214912.0, "45": 458213888.0, "50": 458214912.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1016394240.0, "5": 1180904960.0, "10": 1180934144.0, "15": 1180990976.0, "20": 1180990976.0, "25": 1181222912.0, "30": 1181222912.0, "35": 1181468160.0, "40": 1181468160.0, "45": 1183467008.0, "50": 1183467008.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 16.31521, "5": 0.56321, "10": 0.56536, "15": 0.56469, "20": 0.55362, "25": 0.55331, "30": 0.55482, "35": 0.57032, "40": 0.56489, "45": 0.55226, "50": 0.54242}}}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_lts.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_lts.json
deleted file mode 100644
index 63611a809be..00000000000
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_lts.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.79175,
-            "2": 10.80907,
-            "3": 10.81011,
-            "4": 10.78146,
-            "5": 10.82288,
-            "6": 10.84057,
-            "7": 10.81192,
-            "8": 10.80005,
-            "9": 10.81667,
-            "10": 10.7688,
-            "11": 10.8618,
-            "12": 10.84042,
-            "13": 10.84452,
-            "14": 10.86421,
-            "15": 10.79157,
-            "16": 10.78199,
-            "17": 10.75122,
-            "18": 10.79446,
-            "19": 10.79523,
-            "20": 10.71001,
-            "21": 10.68811,
-            "22": 10.53736,
-            "23": 10.7066,
-            "24": 10.58865,
-            "25": 10.54662,
-            "26": 10.59492,
-            "27": 10.62142,
-            "28": 10.5969,
-            "29": 10.60036,
-            "30": 10.39407,
-            "31": 10.12951,
-            "32": 10.49684,
-            "33": 10.48779,
-            "34": 10.24347,
-            "35": 10.30461,
-            "36": 10.26056,
-            "37": 10.38859,
-            "38": 10.24848,
-            "39": 10.43799,
-            "40": 10.13303,
-            "41": 10.18651,
-            "42": 10.25823,
-            "43": 9.892,
-            "44": 10.02576,
-            "45": 9.90015,
-            "46": 9.88387,
-            "47": 10.19565,
-            "48": 9.91255,
-            "49": 9.60147,
-            "50": 9.97874
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 5656.0,
-            "2": 6018.0,
-            "3": 5790.0,
-            "4": 5941.0,
-            "5": 6476.0,
-            "6": 6653.0,
-            "7": 6287.0,
-            "8": 5875.0,
-            "9": 6239.0,
-            "10": 5453.0,
-            "11": 6936.0,
-            "12": 6711.0,
-            "13": 6655.0,
-            "14": 6814.0,
-            "15": 6233.0,
-            "16": 6533.0,
-            "17": 6397.0,
-            "18": 6112.0,
-            "19": 6678.0,
-            "20": 5837.0,
-            "21": 6403.0,
-            "22": 5715.0,
-            "23": 6744.0,
-            "24": 6051.0,
-            "25": 5811.0,
-            "26": 6104.0,
-            "27": 6484.0,
-            "28": 6884.0,
-            "29": 7253.0,
-            "30": 6047.0,
-            "31": 5593.0,
-            "32": 6625.0,
-            "33": 7054.0,
-            "34": 6104.0,
-            "35": 6712.0,
-            "36": 6684.0,
-            "37": 7523.0,
-            "38": 7273.0,
-            "39": 7620.0,
-            "40": 7062.0,
-            "41": 6895.0,
-            "42": 7426.0,
-            "43": 6713.0,
-            "44": 6664.0,
-            "45": 6681.0,
-            "46": 6923.0,
-            "47": 7705.0,
-            "48": 7248.0,
-            "49": 7331.0,
-            "50": 7527.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 462408192.0,
-            "2": 462406144.0,
-            "3": 462409728.0,
-            "4": 462406144.0,
-            "5": 462407680.0,
-            "6": 462408192.0,
-            "7": 462410752.0,
-            "8": 462410752.0,
-            "9": 462407168.0,
-            "10": 462410240.0,
-            "11": 462408192.0,
-            "12": 462408192.0,
-            "13": 462408704.0,
-            "14": 462409728.0,
-            "15": 462409728.0,
-            "16": 462407168.0,
-            "17": 462408704.0,
-            "18": 462408704.0,
-            "19": 462408704.0,
-            "20": 462408704.0,
-            "21": 462406144.0,
-            "22": 462412800.0,
-            "23": 462409216.0,
-            "24": 462408704.0,
-            "25": 462406144.0,
-            "26": 462410240.0,
-            "27": 462405120.0,
-            "28": 462408192.0,
-            "29": 462407168.0,
-            "30": 462406144.0,
-            "31": 462413312.0,
-            "32": 462408704.0,
-            "33": 462409216.0,
-            "34": 462406144.0,
-            "35": 462410240.0,
-            "36": 462407168.0,
-            "37": 462409728.0,
-            "38": 462408192.0,
-            "39": 462408192.0,
-            "40": 462407680.0,
-            "41": 462411264.0,
-            "42": 462409728.0,
-            "43": 462411264.0,
-            "44": 462407680.0,
-            "45": 462408704.0,
-            "46": 462410752.0,
-            "47": 462407680.0,
-            "48": 462408192.0,
-            "49": 462409728.0,
-            "50": 462409216.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1033451008.0,
-            "2": 1197371392.0,
-            "3": 1197371392.0,
-            "4": 1197880320.0,
-            "5": 1197880320.0,
-            "6": 1197880320.0,
-            "7": 1197880320.0,
-            "8": 1197880320.0,
-            "9": 1197965824.0,
-            "10": 1197965824.0,
-            "11": 1197965824.0,
-            "12": 1197965824.0,
-            "13": 1197965824.0,
-            "14": 1197965824.0,
-            "15": 1197965824.0,
-            "16": 1197965824.0,
-            "17": 1197965824.0,
-            "18": 1197965824.0,
-            "19": 1197965824.0,
-            "20": 1197965824.0,
-            "21": 1197965824.0,
-            "22": 1198112768.0,
-            "23": 1198112768.0,
-            "24": 1198112768.0,
-            "25": 1198112768.0,
-            "26": 1198112768.0,
-            "27": 1198112768.0,
-            "28": 1198112768.0,
-            "29": 1198112768.0,
-            "30": 1198112768.0,
-            "31": 1198112768.0,
-            "32": 1198112768.0,
-            "33": 1198112768.0,
-            "34": 1198112768.0,
-            "35": 1198112768.0,
-            "36": 1198112768.0,
-            "37": 1198112768.0,
-            "38": 1198112768.0,
-            "39": 1198112768.0,
-            "40": 1198333440.0,
-            "41": 1198333440.0,
-            "42": 1198333440.0,
-            "43": 1198444032.0,
-            "44": 1198444032.0,
-            "45": 1198444032.0,
-            "46": 1198444032.0,
-            "47": 1198444032.0,
-            "48": 1198444032.0,
-            "49": 1198444032.0,
-            "50": 1198444032.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 7.94206,
-            "2": 0.63285,
-            "3": 0.57182,
-            "4": 0.57773,
-            "5": 0.57872,
-            "6": 0.57643,
-            "7": 0.57659,
-            "8": 0.57355,
-            "9": 0.57629,
-            "10": 0.56867,
-            "11": 0.56833,
-            "12": 0.56771,
-            "13": 0.56904,
-            "14": 0.59894,
-            "15": 0.67038,
-            "16": 0.5641,
-            "17": 0.56097,
-            "18": 0.56495,
-            "19": 0.56059,
-            "20": 0.56119,
-            "21": 0.55935,
-            "22": 0.55364,
-            "23": 0.55179,
-            "24": 0.55091,
-            "25": 0.55193,
-            "26": 0.55148,
-            "27": 0.55193,
-            "28": 0.55181,
-            "29": 0.55146,
-            "30": 0.55214,
-            "31": 0.55292,
-            "32": 0.55164,
-            "33": 0.55141,
-            "34": 0.54974,
-            "35": 0.55215,
-            "36": 0.55139,
-            "37": 0.55509,
-            "38": 0.5519,
-            "39": 0.55134,
-            "40": 0.55133,
-            "41": 0.55143,
-            "42": 0.5495,
-            "43": 0.54991,
-            "44": 0.5491,
-            "45": 0.54833,
-            "46": 0.54896,
-            "47": 0.54857,
-            "48": 0.5489,
-            "49": 0.54919,
-            "50": 0.54936
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_lts_dgx_a100.json
index 4ebfff8da76..5f0c72de4dd 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_lts_dgx_a100.json
@@ -1,287 +1 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.79175,
-            "2": 10.80907,
-            "3": 10.81011,
-            "4": 10.78146,
-            "5": 10.82288,
-            "6": 10.84057,
-            "7": 10.81192,
-            "8": 10.80005,
-            "9": 10.81667,
-            "10": 10.7688,
-            "11": 10.8618,
-            "12": 10.84042,
-            "13": 10.84452,
-            "14": 10.86421,
-            "15": 10.79157,
-            "16": 10.78199,
-            "17": 10.75122,
-            "18": 10.79446,
-            "19": 10.79523,
-            "20": 10.71001,
-            "21": 10.68811,
-            "22": 10.53736,
-            "23": 10.7066,
-            "24": 10.58865,
-            "25": 10.54662,
-            "26": 10.59492,
-            "27": 10.62142,
-            "28": 10.5969,
-            "29": 10.60036,
-            "30": 10.39407,
-            "31": 10.12951,
-            "32": 10.49684,
-            "33": 10.48779,
-            "34": 10.24347,
-            "35": 10.30461,
-            "36": 10.26056,
-            "37": 10.38859,
-            "38": 10.24848,
-            "39": 10.43799,
-            "40": 10.13303,
-            "41": 10.18651,
-            "42": 10.25823,
-            "43": 9.892,
-            "44": 10.02576,
-            "45": 9.90015,
-            "46": 9.88387,
-            "47": 10.19565,
-            "48": 9.91255,
-            "49": 9.60147,
-            "50": 9.97874
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 5656.0,
-            "2": 6018.0,
-            "3": 5790.0,
-            "4": 5941.0,
-            "5": 6476.0,
-            "6": 6653.0,
-            "7": 6287.0,
-            "8": 5875.0,
-            "9": 6239.0,
-            "10": 5453.0,
-            "11": 6936.0,
-            "12": 6711.0,
-            "13": 6655.0,
-            "14": 6814.0,
-            "15": 6233.0,
-            "16": 6533.0,
-            "17": 6397.0,
-            "18": 6112.0,
-            "19": 6678.0,
-            "20": 5837.0,
-            "21": 6403.0,
-            "22": 5715.0,
-            "23": 6744.0,
-            "24": 6051.0,
-            "25": 5811.0,
-            "26": 6104.0,
-            "27": 6484.0,
-            "28": 6884.0,
-            "29": 7253.0,
-            "30": 6047.0,
-            "31": 5593.0,
-            "32": 6625.0,
-            "33": 7054.0,
-            "34": 6104.0,
-            "35": 6712.0,
-            "36": 6684.0,
-            "37": 7523.0,
-            "38": 7273.0,
-            "39": 7620.0,
-            "40": 7062.0,
-            "41": 6895.0,
-            "42": 7426.0,
-            "43": 6713.0,
-            "44": 6664.0,
-            "45": 6681.0,
-            "46": 6923.0,
-            "47": 7705.0,
-            "48": 7248.0,
-            "49": 7331.0,
-            "50": 7527.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 462408192.0,
-            "2": 462406144.0,
-            "3": 462409728.0,
-            "4": 462406144.0,
-            "5": 462407680.0,
-            "6": 462408192.0,
-            "7": 462410752.0,
-            "8": 462410752.0,
-            "9": 462407168.0,
-            "10": 462410240.0,
-            "11": 462408192.0,
-            "12": 462408192.0,
-            "13": 462408704.0,
-            "14": 462409728.0,
-            "15": 462409728.0,
-            "16": 462407168.0,
-            "17": 462408704.0,
-            "18": 462408704.0,
-            "19": 462408704.0,
-            "20": 462408704.0,
-            "21": 462406144.0,
-            "22": 462412800.0,
-            "23": 462409216.0,
-            "24": 462408704.0,
-            "25": 462406144.0,
-            "26": 462410240.0,
-            "27": 462405120.0,
-            "28": 462408192.0,
-            "29": 462407168.0,
-            "30": 462406144.0,
-            "31": 462413312.0,
-            "32": 462408704.0,
-            "33": 462409216.0,
-            "34": 462406144.0,
-            "35": 462410240.0,
-            "36": 462407168.0,
-            "37": 462409728.0,
-            "38": 462408192.0,
-            "39": 462408192.0,
-            "40": 462407680.0,
-            "41": 462411264.0,
-            "42": 462409728.0,
-            "43": 462411264.0,
-            "44": 462407680.0,
-            "45": 462408704.0,
-            "46": 462410752.0,
-            "47": 462407680.0,
-            "48": 462408192.0,
-            "49": 462409728.0,
-            "50": 462409216.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1033451008.0,
-            "2": 1197371392.0,
-            "3": 1197371392.0,
-            "4": 1197880320.0,
-            "5": 1197880320.0,
-            "6": 1197880320.0,
-            "7": 1197880320.0,
-            "8": 1197880320.0,
-            "9": 1197965824.0,
-            "10": 1197965824.0,
-            "11": 1197965824.0,
-            "12": 1197965824.0,
-            "13": 1197965824.0,
-            "14": 1197965824.0,
-            "15": 1197965824.0,
-            "16": 1197965824.0,
-            "17": 1197965824.0,
-            "18": 1197965824.0,
-            "19": 1197965824.0,
-            "20": 1197965824.0,
-            "21": 1197965824.0,
-            "22": 1198112768.0,
-            "23": 1198112768.0,
-            "24": 1198112768.0,
-            "25": 1198112768.0,
-            "26": 1198112768.0,
-            "27": 1198112768.0,
-            "28": 1198112768.0,
-            "29": 1198112768.0,
-            "30": 1198112768.0,
-            "31": 1198112768.0,
-            "32": 1198112768.0,
-            "33": 1198112768.0,
-            "34": 1198112768.0,
-            "35": 1198112768.0,
-            "36": 1198112768.0,
-            "37": 1198112768.0,
-            "38": 1198112768.0,
-            "39": 1198112768.0,
-            "40": 1198333440.0,
-            "41": 1198333440.0,
-            "42": 1198333440.0,
-            "43": 1198444032.0,
-            "44": 1198444032.0,
-            "45": 1198444032.0,
-            "46": 1198444032.0,
-            "47": 1198444032.0,
-            "48": 1198444032.0,
-            "49": 1198444032.0,
-            "50": 1198444032.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 8.27777,
-            "2": 0.60806,
-            "3": 0.55409,
-            "4": 0.55324,
-            "5": 0.54815,
-            "6": 0.54698,
-            "7": 0.54712,
-            "8": 0.55008,
-            "9": 0.55718,
-            "10": 0.55527,
-            "11": 0.55082,
-            "12": 0.56208,
-            "13": 0.55625,
-            "14": 0.55717,
-            "15": 0.56582,
-            "16": 0.55953,
-            "17": 0.57188,
-            "18": 0.55508,
-            "19": 0.55956,
-            "20": 0.55934,
-            "21": 0.55676,
-            "22": 0.55842,
-            "23": 0.55867,
-            "24": 0.55987,
-            "25": 0.55941,
-            "26": 0.55642,
-            "27": 0.55364,
-            "28": 0.55209,
-            "29": 0.55397,
-            "30": 0.55602,
-            "31": 0.55344,
-            "32": 0.55195,
-            "33": 0.56308,
-            "34": 0.55588,
-            "35": 0.55251,
-            "36": 0.55314,
-            "37": 0.55563,
-            "38": 0.56708,
-            "39": 0.5661,
-            "40": 0.56725,
-            "41": 0.5663,
-            "42": 0.56565,
-            "43": 0.5725,
-            "44": 0.56736,
-            "45": 0.5674,
-            "46": 0.56751,
-            "47": 0.56642,
-            "48": 0.56257,
-            "49": 0.56841,
-            "50": 0.56452
-        }
-    }
-}
\ No newline at end of file
+{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.79288, "5": 10.828, "10": 10.76831, "15": 10.79073, "20": 10.7066, "25": 10.54061, "30": 10.39485, "35": 10.30488, "40": 10.13079, "45": 9.90236, "50": 9.97938}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 5601.0, "5": 6601.0, "10": 5254.0, "15": 6250.0, "20": 5969.0, "25": 5816.0, "30": 6181.0, "35": 6751.0, "40": 6831.0, "45": 6881.0, "50": 7684.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 458211840.0, "5": 458212864.0, "10": 458214912.0, "15": 458215424.0, "20": 458212864.0, "25": 458215424.0, "30": 458211840.0, "35": 458214400.0, "40": 458214912.0, "45": 458213888.0, "50": 458214912.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1016394240.0, "5": 1180904960.0, "10": 1180934144.0, "15": 1180990976.0, "20": 1180990976.0, "25": 1181222912.0, "30": 1181222912.0, "35": 1181468160.0, "40": 1181468160.0, "45": 1183467008.0, "50": 1183467008.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 19.9711, "5": 0.5845, "10": 0.59728, "15": 0.60305, "20": 0.58013, "25": 0.57491, "30": 0.5625, "35": 0.56368, "40": 0.56421, "45": 0.57011, "50": 0.58956}}}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev.json
deleted file mode 100644
index 2c1e24efd92..00000000000
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.82004,
-            "2": 10.8392,
-            "3": 10.81124,
-            "4": 10.81983,
-            "5": 10.84794,
-            "6": 10.8608,
-            "7": 10.84085,
-            "8": 10.84432,
-            "9": 10.8504,
-            "10": 10.79461,
-            "11": 10.85658,
-            "12": 10.84848,
-            "13": 10.86929,
-            "14": 10.8667,
-            "15": 10.82911,
-            "16": 10.81111,
-            "17": 10.79027,
-            "18": 10.80981,
-            "19": 10.81143,
-            "20": 10.73175,
-            "21": 10.71285,
-            "22": 10.58199,
-            "23": 10.72,
-            "24": 10.61704,
-            "25": 10.57964,
-            "26": 10.63372,
-            "27": 10.6365,
-            "28": 10.60641,
-            "29": 10.61561,
-            "30": 10.40859,
-            "31": 10.17068,
-            "32": 10.49958,
-            "33": 10.4963,
-            "34": 10.25574,
-            "35": 10.31503,
-            "36": 10.28536,
-            "37": 10.38742,
-            "38": 10.24676,
-            "39": 10.44249,
-            "40": 10.14367,
-            "41": 10.19116,
-            "42": 10.25654,
-            "43": 9.90671,
-            "44": 10.02653,
-            "45": 9.914,
-            "46": 9.89613,
-            "47": 10.18885,
-            "48": 9.92993,
-            "49": 9.61419,
-            "50": 9.97565
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 12826.0,
-            "2": 14613.0,
-            "3": 14549.0,
-            "4": 13422.0,
-            "5": 15951.0,
-            "6": 16055.0,
-            "7": 15208.0,
-            "8": 12944.0,
-            "9": 15110.0,
-            "10": 12611.0,
-            "11": 16586.0,
-            "12": 14954.0,
-            "13": 15925.0,
-            "14": 16182.0,
-            "15": 14834.0,
-            "16": 16023.0,
-            "17": 15486.0,
-            "18": 15116.0,
-            "19": 15584.0,
-            "20": 13675.0,
-            "21": 13873.0,
-            "22": 12917.0,
-            "23": 16766.0,
-            "24": 13924.0,
-            "25": 13129.0,
-            "26": 14794.0,
-            "27": 15169.0,
-            "28": 16393.0,
-            "29": 16719.0,
-            "30": 14652.0,
-            "31": 13126.0,
-            "32": 15987.0,
-            "33": 17372.0,
-            "34": 14206.0,
-            "35": 15183.0,
-            "36": 15837.0,
-            "37": 17507.0,
-            "38": 16617.0,
-            "39": 17712.0,
-            "40": 16971.0,
-            "41": 16795.0,
-            "42": 17304.0,
-            "43": 15578.0,
-            "44": 15564.0,
-            "45": 16188.0,
-            "46": 17443.0,
-            "47": 19238.0,
-            "48": 16575.0,
-            "49": 16273.0,
-            "50": 18998.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 629738496.0,
-            "2": 629705216.0,
-            "3": 629710336.0,
-            "4": 629699584.0,
-            "5": 629732864.0,
-            "6": 629703168.0,
-            "7": 629718528.0,
-            "8": 629722624.0,
-            "9": 629763072.0,
-            "10": 629733888.0,
-            "11": 629810176.0,
-            "12": 629705728.0,
-            "13": 629848576.0,
-            "14": 629702144.0,
-            "15": 629870592.0,
-            "16": 629805568.0,
-            "17": 629698048.0,
-            "18": 629731328.0,
-            "19": 629798912.0,
-            "20": 629752832.0,
-            "21": 629716480.0,
-            "22": 629699584.0,
-            "23": 629705216.0,
-            "24": 629736448.0,
-            "25": 629699584.0,
-            "26": 629736960.0,
-            "27": 629704192.0,
-            "28": 629750272.0,
-            "29": 629728256.0,
-            "30": 629933568.0,
-            "31": 629847040.0,
-            "32": 629700096.0,
-            "33": 629703168.0,
-            "34": 629752832.0,
-            "35": 629725696.0,
-            "36": 629724160.0,
-            "37": 629702656.0,
-            "38": 629704192.0,
-            "39": 629733888.0,
-            "40": 629749760.0,
-            "41": 629700096.0,
-            "42": 629729280.0,
-            "43": 629699072.0,
-            "44": 629769728.0,
-            "45": 629713920.0,
-            "46": 629804544.0,
-            "47": 629719552.0,
-            "48": 629843456.0,
-            "49": 630007296.0,
-            "50": 629703168.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1847654400.0,
-            "2": 2077632000.0,
-            "3": 2078750208.0,
-            "4": 2078750208.0,
-            "5": 2079077888.0,
-            "6": 2079077888.0,
-            "7": 2080535040.0,
-            "8": 2080535040.0,
-            "9": 2080535040.0,
-            "10": 2080535040.0,
-            "11": 2080535040.0,
-            "12": 2080535040.0,
-            "13": 2080535040.0,
-            "14": 2080535040.0,
-            "15": 2080535040.0,
-            "16": 2080535040.0,
-            "17": 2080535040.0,
-            "18": 2080535040.0,
-            "19": 2080535040.0,
-            "20": 2080535040.0,
-            "21": 2080535040.0,
-            "22": 2080535040.0,
-            "23": 2080535040.0,
-            "24": 2080535040.0,
-            "25": 2080535040.0,
-            "26": 2080535040.0,
-            "27": 2080535040.0,
-            "28": 2080535040.0,
-            "29": 2080535040.0,
-            "30": 2080535040.0,
-            "31": 2080535040.0,
-            "32": 2080535040.0,
-            "33": 2080535040.0,
-            "34": 2080535040.0,
-            "35": 2080535040.0,
-            "36": 2080535040.0,
-            "37": 2080535040.0,
-            "38": 2080535040.0,
-            "39": 2080535040.0,
-            "40": 2080535040.0,
-            "41": 2080535040.0,
-            "42": 2080535040.0,
-            "43": 2080535040.0,
-            "44": 2080535040.0,
-            "45": 2080535040.0,
-            "46": 2080535040.0,
-            "47": 2080535040.0,
-            "48": 2080535040.0,
-            "49": 2080535040.0,
-            "50": 2080535040.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 11.85676,
-            "2": 0.38679,
-            "3": 0.33955,
-            "4": 0.34149,
-            "5": 0.33738,
-            "6": 0.3367,
-            "7": 0.33549,
-            "8": 0.33607,
-            "9": 0.33582,
-            "10": 0.33626,
-            "11": 0.33555,
-            "12": 0.33476,
-            "13": 0.33518,
-            "14": 0.33155,
-            "15": 0.33236,
-            "16": 0.33276,
-            "17": 0.33237,
-            "18": 0.33234,
-            "19": 0.33191,
-            "20": 0.33586,
-            "21": 0.334,
-            "22": 0.33303,
-            "23": 0.33299,
-            "24": 0.33339,
-            "25": 0.33301,
-            "26": 0.33368,
-            "27": 0.33213,
-            "28": 0.33242,
-            "29": 0.33251,
-            "30": 0.33285,
-            "31": 0.33233,
-            "32": 0.33175,
-            "33": 0.33133,
-            "34": 0.33224,
-            "35": 0.3317,
-            "36": 0.33247,
-            "37": 0.33189,
-            "38": 0.3324,
-            "39": 0.33157,
-            "40": 0.33224,
-            "41": 0.33218,
-            "42": 0.33335,
-            "43": 0.33381,
-            "44": 0.33284,
-            "45": 0.33399,
-            "46": 0.33341,
-            "47": 0.33248,
-            "48": 0.33301,
-            "49": 0.33248,
-            "50": 0.33145
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_lts.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_lts.json
deleted file mode 100644
index c1d418bd4a8..00000000000
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_lts.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.82004,
-            "2": 10.8392,
-            "3": 10.81124,
-            "4": 10.81983,
-            "5": 10.84794,
-            "6": 10.8608,
-            "7": 10.84085,
-            "8": 10.84432,
-            "9": 10.8504,
-            "10": 10.79461,
-            "11": 10.85658,
-            "12": 10.84848,
-            "13": 10.86929,
-            "14": 10.8667,
-            "15": 10.82911,
-            "16": 10.81111,
-            "17": 10.79027,
-            "18": 10.80981,
-            "19": 10.81143,
-            "20": 10.73175,
-            "21": 10.71285,
-            "22": 10.58199,
-            "23": 10.72,
-            "24": 10.61704,
-            "25": 10.57964,
-            "26": 10.63372,
-            "27": 10.6365,
-            "28": 10.60641,
-            "29": 10.61561,
-            "30": 10.40859,
-            "31": 10.17068,
-            "32": 10.49958,
-            "33": 10.4963,
-            "34": 10.25574,
-            "35": 10.31503,
-            "36": 10.28536,
-            "37": 10.38742,
-            "38": 10.24676,
-            "39": 10.44249,
-            "40": 10.14367,
-            "41": 10.19116,
-            "42": 10.25654,
-            "43": 9.90671,
-            "44": 10.02653,
-            "45": 9.914,
-            "46": 9.89613,
-            "47": 10.18885,
-            "48": 9.92993,
-            "49": 9.61419,
-            "50": 9.97565
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 12826.0,
-            "2": 14613.0,
-            "3": 14549.0,
-            "4": 13422.0,
-            "5": 15951.0,
-            "6": 16055.0,
-            "7": 15208.0,
-            "8": 12944.0,
-            "9": 15110.0,
-            "10": 12611.0,
-            "11": 16586.0,
-            "12": 14954.0,
-            "13": 15925.0,
-            "14": 16182.0,
-            "15": 14834.0,
-            "16": 16023.0,
-            "17": 15486.0,
-            "18": 15116.0,
-            "19": 15584.0,
-            "20": 13675.0,
-            "21": 13873.0,
-            "22": 12917.0,
-            "23": 16766.0,
-            "24": 13924.0,
-            "25": 13129.0,
-            "26": 14794.0,
-            "27": 15169.0,
-            "28": 16393.0,
-            "29": 16719.0,
-            "30": 14652.0,
-            "31": 13126.0,
-            "32": 15987.0,
-            "33": 17372.0,
-            "34": 14206.0,
-            "35": 15183.0,
-            "36": 15837.0,
-            "37": 17507.0,
-            "38": 16617.0,
-            "39": 17712.0,
-            "40": 16971.0,
-            "41": 16795.0,
-            "42": 17304.0,
-            "43": 15578.0,
-            "44": 15564.0,
-            "45": 16188.0,
-            "46": 17443.0,
-            "47": 19238.0,
-            "48": 16575.0,
-            "49": 16273.0,
-            "50": 18998.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 629738496.0,
-            "2": 629705216.0,
-            "3": 629710336.0,
-            "4": 629699584.0,
-            "5": 629732864.0,
-            "6": 629703168.0,
-            "7": 629718528.0,
-            "8": 629722624.0,
-            "9": 629763072.0,
-            "10": 629733888.0,
-            "11": 629810176.0,
-            "12": 629705728.0,
-            "13": 629848576.0,
-            "14": 629702144.0,
-            "15": 629870592.0,
-            "16": 629805568.0,
-            "17": 629698048.0,
-            "18": 629731328.0,
-            "19": 629798912.0,
-            "20": 629752832.0,
-            "21": 629716480.0,
-            "22": 629699584.0,
-            "23": 629705216.0,
-            "24": 629736448.0,
-            "25": 629699584.0,
-            "26": 629736960.0,
-            "27": 629704192.0,
-            "28": 629750272.0,
-            "29": 629728256.0,
-            "30": 629933568.0,
-            "31": 629847040.0,
-            "32": 629700096.0,
-            "33": 629703168.0,
-            "34": 629752832.0,
-            "35": 629725696.0,
-            "36": 629724160.0,
-            "37": 629702656.0,
-            "38": 629704192.0,
-            "39": 629733888.0,
-            "40": 629749760.0,
-            "41": 629700096.0,
-            "42": 629729280.0,
-            "43": 629699072.0,
-            "44": 629769728.0,
-            "45": 629713920.0,
-            "46": 629804544.0,
-            "47": 629719552.0,
-            "48": 629843456.0,
-            "49": 630007296.0,
-            "50": 629703168.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1847654400.0,
-            "2": 2077632000.0,
-            "3": 2078750208.0,
-            "4": 2078750208.0,
-            "5": 2079077888.0,
-            "6": 2079077888.0,
-            "7": 2080535040.0,
-            "8": 2080535040.0,
-            "9": 2080535040.0,
-            "10": 2080535040.0,
-            "11": 2080535040.0,
-            "12": 2080535040.0,
-            "13": 2080535040.0,
-            "14": 2080535040.0,
-            "15": 2080535040.0,
-            "16": 2080535040.0,
-            "17": 2080535040.0,
-            "18": 2080535040.0,
-            "19": 2080535040.0,
-            "20": 2080535040.0,
-            "21": 2080535040.0,
-            "22": 2080535040.0,
-            "23": 2080535040.0,
-            "24": 2080535040.0,
-            "25": 2080535040.0,
-            "26": 2080535040.0,
-            "27": 2080535040.0,
-            "28": 2080535040.0,
-            "29": 2080535040.0,
-            "30": 2080535040.0,
-            "31": 2080535040.0,
-            "32": 2080535040.0,
-            "33": 2080535040.0,
-            "34": 2080535040.0,
-            "35": 2080535040.0,
-            "36": 2080535040.0,
-            "37": 2080535040.0,
-            "38": 2080535040.0,
-            "39": 2080535040.0,
-            "40": 2080535040.0,
-            "41": 2080535040.0,
-            "42": 2080535040.0,
-            "43": 2080535040.0,
-            "44": 2080535040.0,
-            "45": 2080535040.0,
-            "46": 2080535040.0,
-            "47": 2080535040.0,
-            "48": 2080535040.0,
-            "49": 2080535040.0,
-            "50": 2080535040.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 12.62289,
-            "2": 0.39639,
-            "3": 0.35696,
-            "4": 0.3674,
-            "5": 0.3577,
-            "6": 0.35509,
-            "7": 0.35808,
-            "8": 0.35462,
-            "9": 0.35492,
-            "10": 0.35391,
-            "11": 0.35546,
-            "12": 0.35515,
-            "13": 0.35432,
-            "14": 0.35491,
-            "15": 0.35335,
-            "16": 0.35396,
-            "17": 0.35304,
-            "18": 0.35201,
-            "19": 0.35198,
-            "20": 0.35194,
-            "21": 0.35412,
-            "22": 0.35208,
-            "23": 0.3541,
-            "24": 0.3519,
-            "25": 0.38526,
-            "26": 0.39355,
-            "27": 0.39377,
-            "28": 0.3934,
-            "29": 0.39406,
-            "30": 0.39393,
-            "31": 0.38905,
-            "32": 0.3893,
-            "33": 0.39052,
-            "34": 0.39004,
-            "35": 0.38977,
-            "36": 0.39027,
-            "37": 0.38961,
-            "38": 0.38929,
-            "39": 0.38944,
-            "40": 0.38968,
-            "41": 0.38938,
-            "42": 0.38964,
-            "43": 0.3909,
-            "44": 0.39131,
-            "45": 0.38436,
-            "46": 0.34892,
-            "47": 0.34845,
-            "48": 0.34853,
-            "49": 0.3474,
-            "50": 0.34681
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_lts_dgx_a100.json
index 478bae6fdec..1f335e82393 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_lts_dgx_a100.json
@@ -1,287 +1 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.82004,
-            "2": 10.8392,
-            "3": 10.81124,
-            "4": 10.81983,
-            "5": 10.84794,
-            "6": 10.8608,
-            "7": 10.84085,
-            "8": 10.84432,
-            "9": 10.8504,
-            "10": 10.79461,
-            "11": 10.85658,
-            "12": 10.84848,
-            "13": 10.86929,
-            "14": 10.8667,
-            "15": 10.82911,
-            "16": 10.81111,
-            "17": 10.79027,
-            "18": 10.80981,
-            "19": 10.81143,
-            "20": 10.73175,
-            "21": 10.71285,
-            "22": 10.58199,
-            "23": 10.72,
-            "24": 10.61704,
-            "25": 10.57964,
-            "26": 10.63372,
-            "27": 10.6365,
-            "28": 10.60641,
-            "29": 10.61561,
-            "30": 10.40859,
-            "31": 10.17068,
-            "32": 10.49958,
-            "33": 10.4963,
-            "34": 10.25574,
-            "35": 10.31503,
-            "36": 10.28536,
-            "37": 10.38742,
-            "38": 10.24676,
-            "39": 10.44249,
-            "40": 10.14367,
-            "41": 10.19116,
-            "42": 10.25654,
-            "43": 9.90671,
-            "44": 10.02653,
-            "45": 9.914,
-            "46": 9.89613,
-            "47": 10.18885,
-            "48": 9.92993,
-            "49": 9.61419,
-            "50": 9.97565
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 12826.0,
-            "2": 14613.0,
-            "3": 14549.0,
-            "4": 13422.0,
-            "5": 15951.0,
-            "6": 16055.0,
-            "7": 15208.0,
-            "8": 12944.0,
-            "9": 15110.0,
-            "10": 12611.0,
-            "11": 16586.0,
-            "12": 14954.0,
-            "13": 15925.0,
-            "14": 16182.0,
-            "15": 14834.0,
-            "16": 16023.0,
-            "17": 15486.0,
-            "18": 15116.0,
-            "19": 15584.0,
-            "20": 13675.0,
-            "21": 13873.0,
-            "22": 12917.0,
-            "23": 16766.0,
-            "24": 13924.0,
-            "25": 13129.0,
-            "26": 14794.0,
-            "27": 15169.0,
-            "28": 16393.0,
-            "29": 16719.0,
-            "30": 14652.0,
-            "31": 13126.0,
-            "32": 15987.0,
-            "33": 17372.0,
-            "34": 14206.0,
-            "35": 15183.0,
-            "36": 15837.0,
-            "37": 17507.0,
-            "38": 16617.0,
-            "39": 17712.0,
-            "40": 16971.0,
-            "41": 16795.0,
-            "42": 17304.0,
-            "43": 15578.0,
-            "44": 15564.0,
-            "45": 16188.0,
-            "46": 17443.0,
-            "47": 19238.0,
-            "48": 16575.0,
-            "49": 16273.0,
-            "50": 18998.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 629738496.0,
-            "2": 629705216.0,
-            "3": 629710336.0,
-            "4": 629699584.0,
-            "5": 629732864.0,
-            "6": 629703168.0,
-            "7": 629718528.0,
-            "8": 629722624.0,
-            "9": 629763072.0,
-            "10": 629733888.0,
-            "11": 629810176.0,
-            "12": 629705728.0,
-            "13": 629848576.0,
-            "14": 629702144.0,
-            "15": 629870592.0,
-            "16": 629805568.0,
-            "17": 629698048.0,
-            "18": 629731328.0,
-            "19": 629798912.0,
-            "20": 629752832.0,
-            "21": 629716480.0,
-            "22": 629699584.0,
-            "23": 629705216.0,
-            "24": 629736448.0,
-            "25": 629699584.0,
-            "26": 629736960.0,
-            "27": 629704192.0,
-            "28": 629750272.0,
-            "29": 629728256.0,
-            "30": 629933568.0,
-            "31": 629847040.0,
-            "32": 629700096.0,
-            "33": 629703168.0,
-            "34": 629752832.0,
-            "35": 629725696.0,
-            "36": 629724160.0,
-            "37": 629702656.0,
-            "38": 629704192.0,
-            "39": 629733888.0,
-            "40": 629749760.0,
-            "41": 629700096.0,
-            "42": 629729280.0,
-            "43": 629699072.0,
-            "44": 629769728.0,
-            "45": 629713920.0,
-            "46": 629804544.0,
-            "47": 629719552.0,
-            "48": 629843456.0,
-            "49": 630007296.0,
-            "50": 629703168.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1847654400.0,
-            "2": 2077632000.0,
-            "3": 2078750208.0,
-            "4": 2078750208.0,
-            "5": 2079077888.0,
-            "6": 2079077888.0,
-            "7": 2080535040.0,
-            "8": 2080535040.0,
-            "9": 2080535040.0,
-            "10": 2080535040.0,
-            "11": 2080535040.0,
-            "12": 2080535040.0,
-            "13": 2080535040.0,
-            "14": 2080535040.0,
-            "15": 2080535040.0,
-            "16": 2080535040.0,
-            "17": 2080535040.0,
-            "18": 2080535040.0,
-            "19": 2080535040.0,
-            "20": 2080535040.0,
-            "21": 2080535040.0,
-            "22": 2080535040.0,
-            "23": 2080535040.0,
-            "24": 2080535040.0,
-            "25": 2080535040.0,
-            "26": 2080535040.0,
-            "27": 2080535040.0,
-            "28": 2080535040.0,
-            "29": 2080535040.0,
-            "30": 2080535040.0,
-            "31": 2080535040.0,
-            "32": 2080535040.0,
-            "33": 2080535040.0,
-            "34": 2080535040.0,
-            "35": 2080535040.0,
-            "36": 2080535040.0,
-            "37": 2080535040.0,
-            "38": 2080535040.0,
-            "39": 2080535040.0,
-            "40": 2080535040.0,
-            "41": 2080535040.0,
-            "42": 2080535040.0,
-            "43": 2080535040.0,
-            "44": 2080535040.0,
-            "45": 2080535040.0,
-            "46": 2080535040.0,
-            "47": 2080535040.0,
-            "48": 2080535040.0,
-            "49": 2080535040.0,
-            "50": 2080535040.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.46534,
-            "2": 0.4102,
-            "3": 0.34219,
-            "4": 0.34093,
-            "5": 0.34255,
-            "6": 0.33706,
-            "7": 0.33661,
-            "8": 0.33616,
-            "9": 0.33565,
-            "10": 0.33568,
-            "11": 0.33538,
-            "12": 0.33457,
-            "13": 0.33488,
-            "14": 0.33416,
-            "15": 0.33683,
-            "16": 0.33684,
-            "17": 0.33708,
-            "18": 0.33815,
-            "19": 0.33805,
-            "20": 0.33696,
-            "21": 0.33675,
-            "22": 0.33623,
-            "23": 0.33752,
-            "24": 0.33699,
-            "25": 0.3409,
-            "26": 0.33513,
-            "27": 0.33524,
-            "28": 0.33491,
-            "29": 0.33714,
-            "30": 0.33571,
-            "31": 0.33638,
-            "32": 0.33629,
-            "33": 0.3369,
-            "34": 0.33685,
-            "35": 0.33651,
-            "36": 0.33539,
-            "37": 0.33561,
-            "38": 0.33636,
-            "39": 0.33558,
-            "40": 0.3356,
-            "41": 0.33618,
-            "42": 0.33669,
-            "43": 0.33535,
-            "44": 0.3362,
-            "45": 0.3354,
-            "46": 0.33686,
-            "47": 0.33486,
-            "48": 0.33657,
-            "49": 0.33563,
-            "50": 0.33513
-        }
-    }
-}
\ No newline at end of file
+{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.82416, "5": 10.84149, "10": 10.79041, "15": 10.8302, "20": 10.73076, "25": 10.57969, "30": 10.41029, "35": 10.31774, "40": 10.14481, "45": 9.91307, "50": 9.97569}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 12709.0, "5": 15699.0, "10": 12503.0, "15": 14756.0, "20": 13658.0, "25": 13295.0, "30": 14801.0, "35": 15233.0, "40": 16737.0, "45": 16130.0, "50": 19242.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 625788416.0, "5": 625793536.0, "10": 625788928.0, "15": 625790976.0, "20": 625793024.0, "25": 625789440.0, "30": 625790976.0, "35": 625789952.0, "40": 625793536.0, "45": 625790976.0, "50": 625793024.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1818507264.0, "5": 2049478144.0, "10": 2049478144.0, "15": 2050050048.0, "20": 2050050048.0, "25": 2050050048.0, "30": 2050050048.0, "35": 2050050048.0, "40": 2050050048.0, "45": 2050050048.0, "50": 2050050048.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 16.64702, "5": 0.34446, "10": 0.34582, "15": 0.34645, "20": 0.34853, "25": 0.34852, "30": 0.34703, "35": 0.34516, "40": 0.34201, "45": 0.34861, "50": 0.3491}}}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgx_h100.json
index 1a9705f8181..51d434d10cf 100644
--- a/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgx_h100.json
@@ -174,5 +174,5 @@
       -0.5394397377967834
     ]
   },
-  "throughput": [25.35687538450034, 25.35687538450034]
+  "throughput": 25.35687538450034
 }
diff --git a/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgx_h100.json
index 7370d3c6bb0..711eeddfb25 100644
--- a/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgx_h100.json
@@ -157,5 +157,5 @@
    -0.5394397377967834
   ]
  },
- "throughput": [6.543502517233578, 6.543502517233578]
+ "throughput": 6.543502517233578
 }
\ No newline at end of file
diff --git a/tests/test_utils/python_scripts/download_golden_values.py b/tests/test_utils/python_scripts/download_golden_values.py
index e2294b32fbb..650867f231f 100644
--- a/tests/test_utils/python_scripts/download_golden_values.py
+++ b/tests/test_utils/python_scripts/download_golden_values.py
@@ -1,5 +1,3 @@
-# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-
 import logging
 import os
 import pathlib
@@ -93,6 +91,8 @@ def main(pipeline_id: int, only_failing: bool):
                 continue
 
             golden_values_source_name = golden_values_source.name
+            golden_values_source_name = golden_values_source_name.replace("_dgx_h100", "")
+            golden_values_source_name = golden_values_source_name.replace("_dgx_a100", "")
             golden_values_source_name = golden_values_source_name.replace(
                 "generations", "golden_values"
             )
diff --git a/tests/test_utils/python_scripts/launch_jet_workload.py b/tests/test_utils/python_scripts/launch_jet_workload.py
index 846ca8a1521..7dc4a7357a7 100644
--- a/tests/test_utils/python_scripts/launch_jet_workload.py
+++ b/tests/test_utils/python_scripts/launch_jet_workload.py
@@ -1,5 +1,3 @@
-# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-
 import json
 import logging
 import os
@@ -291,7 +289,6 @@ def is_flaky_failure(concat_allranks_logs: str) -> bool:
         or "unspecified launch failure" in concat_allranks_logs
         or "free(): corrupted unsorted chunks" in concat_allranks_logs
         or "Segfault encountered" in concat_allranks_logs
-        or "Fatal glibc error" in concat_allranks_logs
     )
 
 
diff --git a/tests/unit_tests/inference/contexts/test_dynamic_context.py b/tests/unit_tests/inference/contexts/test_dynamic_context.py
index 0674cdfcabd..1cd9d66ece1 100644
--- a/tests/unit_tests/inference/contexts/test_dynamic_context.py
+++ b/tests/unit_tests/inference/contexts/test_dynamic_context.py
@@ -12,7 +12,6 @@
 )
 from megatron.core.inference.inference_request import DynamicInferenceRequest
 from megatron.core.inference.sampling_params import SamplingParams
-from megatron.core.ssm.mamba_hybrid_layer_allocation import Symbols
 from megatron.core.tensor_parallel.random import model_parallel_cuda_manual_seed
 from tests.unit_tests.test_utilities import Utils
 
@@ -43,19 +42,12 @@ def _get_dynamic_context(
         max_sequence_length,
         buffer_size_gb,
         block_size_tokens,
-        buffer_guaranteed_fraction,
+        buffer_guarenteed_fraction,
         buffer_overflow_factor,
         max_requests_override,
         max_tokens_override,
-        is_hybrid_model=False,
-        layer_type_list=None,
-        rounder=64,
     ):
-        set_rounder(rounder)
-
-        if is_hybrid_model and layer_type_list is None:
-            layer_type_list = [Symbols.MAMBA, Symbols.MLP, Symbols.ATTENTION, Symbols.MLP]
-
+        set_rounder(64)
         dynamic_context = DynamicInferenceContext(
             params_dtype=params_dtype,
             num_layers=num_layers,
@@ -63,27 +55,23 @@ def _get_dynamic_context(
             num_attention_heads=num_attention_heads,
             max_sequence_length=max_sequence_length,
             num_cuda_graphs=None,
-            use_cuda_graphs_for_non_decode_steps=not is_hybrid_model,
             buffer_size_gb=buffer_size_gb,
-            buffer_guaranteed_fraction=buffer_guaranteed_fraction,
+            buffer_guaranteed_fraction=buffer_guarenteed_fraction,
             block_size_tokens=block_size_tokens,
             buffer_overflow_factor=buffer_overflow_factor,
             max_requests_override=max_requests_override,
             max_tokens_override=max_tokens_override,
-            layer_type_list=layer_type_list,
-            mamba_conv_states_shape=(544, 4),
-            mamba_ssm_states_shape=(8, 64, 16),
             use_flashinfer_fused_rope=None,  # default to using flash-infer if available
             # this is for compatibility with the LTS environment
         )
         return dynamic_context
 
     def teardown_method(self, method):
+        set_rounder(64)
         Utils.destroy_model_parallel()
 
     @pytest.mark.internal
-    @pytest.mark.parametrize("is_hybrid_model", [False, True])
-    def test_initialize_dynamic_context(self, is_hybrid_model: bool):
+    def test_initialize_dynamic_context(self):
         self._setup_model_parallel_group(1, 1)
 
         dynamic_context = self._get_dynamic_context(
@@ -93,30 +81,18 @@ def test_initialize_dynamic_context(self, is_hybrid_model: bool):
             num_attention_heads=2,
             max_sequence_length=512,
             buffer_size_gb=0.03,
-            buffer_guaranteed_fraction=0.1,
+            buffer_guarenteed_fraction=0.1,
             block_size_tokens=128,
             max_requests_override=None,
             max_tokens_override=None,
             buffer_overflow_factor=None,
-            is_hybrid_model=is_hybrid_model,
         )
 
-        if not is_hybrid_model:
-            assert dynamic_context.gtd_block_count == 48
-            assert dynamic_context.gtd_request_count == 12
-            assert dynamic_context.block_allocator.block_count_total == 491
-            assert dynamic_context.max_requests == 128
-            assert dynamic_context.max_tokens == 62848
-            assert dynamic_context.num_mamba_layers == 0
-            assert dynamic_context.mamba_metadata is None
-        else:
-            assert dynamic_context.gtd_block_count == 112
-            assert dynamic_context.gtd_request_count == 28
-            assert dynamic_context.block_allocator.block_count_total == 1156
-            assert dynamic_context.max_requests == 320
-            assert dynamic_context.max_tokens == 154176
-            assert dynamic_context.num_mamba_layers == 1
-            assert dynamic_context.mamba_metadata is not None
+        assert dynamic_context.gtd_block_count == 48
+        assert dynamic_context.gtd_request_count == 12
+        assert dynamic_context.block_allocator.block_count_total == 491
+        assert dynamic_context.max_requests == 128
+        assert dynamic_context.max_tokens == 62848
 
         # Check initializations to -1
         assert torch.all(dynamic_context.request_ids == -1)
@@ -124,38 +100,32 @@ def test_initialize_dynamic_context(self, is_hybrid_model: bool):
     @pytest.mark.internal
     def test_is_static_batching(self):
         self._setup_model_parallel_group(1, 1)
-        dynamic_context = self._get_dynamic_context(
+        dynamic_context = DynamicInferenceContext(
             params_dtype=torch.float32,
             num_layers=2,
             kv_channels=64,
             num_attention_heads=8,
             max_sequence_length=512,
+            num_cuda_graphs=None,
             buffer_size_gb=1.0,
             buffer_guaranteed_fraction=0.1,
             block_size_tokens=128,
-            max_requests_override=None,
-            max_tokens_override=None,
-            buffer_overflow_factor=None,
         )
         assert not dynamic_context.is_static_batching()
 
     @pytest.mark.internal
-    @pytest.mark.parametrize("is_hybrid_model", [False, True])
-    def test_is_memory_available(self, is_hybrid_model):
+    def test_is_memory_available(self):
         self._setup_model_parallel_group(1, 1)
-        dynamic_context = self._get_dynamic_context(
+        dynamic_context = DynamicInferenceContext(
             params_dtype=torch.float32,
             num_layers=2,
             kv_channels=64,
             num_attention_heads=8,
             max_sequence_length=512,
+            num_cuda_graphs=None,
             buffer_size_gb=1.0,
             buffer_guaranteed_fraction=0.1,
             block_size_tokens=128,
-            max_requests_override=None,
-            max_tokens_override=None,
-            buffer_overflow_factor=None,
-            is_hybrid_model=is_hybrid_model,
         )
         dynamic_context.block_allocator.block_count_avail = 10
         assert dynamic_context.block_allocator.is_memory_available(10)
@@ -171,24 +141,19 @@ def test_is_memory_available(self, is_hybrid_model):
         assert not dynamic_context.block_allocator.is_memory_available(6, safe=True)
 
     @pytest.mark.internal
-    @pytest.mark.parametrize("is_hybrid_model", [False, True])
-    def test_request_overflow(self, is_hybrid_model: bool):
+    def test_request_overflow(self):
         self._setup_model_parallel_group(1, 1)
-
-        dynamic_context = self._get_dynamic_context(
+        set_rounder(1)
+        dynamic_context = DynamicInferenceContext(
             params_dtype=torch.float32,
             num_layers=2,
             kv_channels=64,
             num_attention_heads=8,
             max_sequence_length=128,
+            num_cuda_graphs=None,
             buffer_size_gb=0.01,
             buffer_guaranteed_fraction=0.1,
             block_size_tokens=32,
-            max_requests_override=None,
-            max_tokens_override=None,
-            buffer_overflow_factor=None,
-            rounder=1,
-            is_hybrid_model=is_hybrid_model,
         )
         with pytest.raises(RequestOverflowError):
             for i in range(dynamic_context.max_requests + 1):
@@ -203,24 +168,22 @@ def test_request_overflow(self, is_hybrid_model: bool):
                 )  # Adding more than allowed requests
 
     @pytest.mark.internal
-    @pytest.mark.parametrize("is_hybrid_model", [False, True])
-    def test_token_overflow_error(self, is_hybrid_model: bool):
+    def test_token_overflow_error(self):
         self._setup_model_parallel_group(1, 1)
-
-        dynamic_context = self._get_dynamic_context(
+        set_rounder(1)
+        dynamic_context = DynamicInferenceContext(
             params_dtype=torch.float32,
             num_layers=2,
             kv_channels=64,
             num_attention_heads=8,
             max_sequence_length=512,
+            num_cuda_graphs=None,
             buffer_size_gb=0.1,
             buffer_guaranteed_fraction=0.1,
             block_size_tokens=128,
             buffer_overflow_factor=1.0,
             max_requests_override=2,
             max_tokens_override=20,  # Setting a very low token limit
-            rounder=1,
-            is_hybrid_model=is_hybrid_model,
         )
 
         with pytest.raises(TokenOverflowError):
@@ -235,23 +198,18 @@ def test_token_overflow_error(self, is_hybrid_model: bool):
             )  # Exceeding max token count
 
     @pytest.mark.internal
-    @pytest.mark.parametrize("is_hybrid_model", [False, True])
-    def test_reset(self, is_hybrid_model: bool):
+    def test_reset(self):
         self._setup_model_parallel_group(1, 1)
-
-        dynamic_context = self._get_dynamic_context(
+        dynamic_context = DynamicInferenceContext(
             params_dtype=torch.float32,
             num_layers=2,
             kv_channels=64,
             num_attention_heads=8,
             max_sequence_length=128,
+            num_cuda_graphs=None,
             buffer_size_gb=1.0,
             buffer_guaranteed_fraction=0.1,
             block_size_tokens=128,
-            max_requests_override=None,
-            max_tokens_override=None,
-            buffer_overflow_factor=None,
-            is_hybrid_model=is_hybrid_model,
         )
 
         # Initialize all variables
@@ -276,9 +234,6 @@ def test_reset(self, is_hybrid_model: bool):
         dynamic_context.block_allocator.block_count_avail = 5
         dynamic_context.memory_buffer.fill_(1)
         dynamic_context.request_to_kv_block_ids.fill_(1)
-        if is_hybrid_model:
-            dynamic_context.mamba_conv_states.fill_(1)
-            dynamic_context.mamba_ssm_states.fill_(1)
 
         # Call reset
         dynamic_context.reset()
@@ -307,14 +262,9 @@ def test_reset(self, is_hybrid_model: bool):
             == dynamic_context.block_allocator.block_count_total - 1
         )
         assert torch.all(dynamic_context.request_to_kv_block_ids == -1)
-        if is_hybrid_model:
-            assert torch.all(dynamic_context.mamba_metadata.request_to_mamba_state_idx == -1)
-            assert torch.all(dynamic_context.mamba_conv_states == 0)
-            assert torch.all(dynamic_context.mamba_ssm_states == 0)
 
     @pytest.mark.internal
-    @pytest.mark.parametrize("is_hybrid_model", [False, True])
-    def test_allocate_and_release_memory_blocks(self, is_hybrid_model):
+    def test_allocate_and_release_memory_blocks(self):
         self._setup_model_parallel_group(1, 1)
         dynamic_context = self._get_dynamic_context(
             params_dtype=torch.float32,
@@ -323,38 +273,23 @@ def test_allocate_and_release_memory_blocks(self, is_hybrid_model):
             num_attention_heads=2,
             max_sequence_length=512,
             buffer_size_gb=0.03,
-            buffer_guaranteed_fraction=0.1,
+            buffer_guarenteed_fraction=0.1,
             block_size_tokens=128,
             max_requests_override=None,
             max_tokens_override=None,
             buffer_overflow_factor=None,
-            is_hybrid_model=is_hybrid_model,
         )
 
-        if is_hybrid_model:
-            expected_memory_blocks = [1151, 1152, 1153, 1154]
-        else:
-            expected_memory_blocks = [486, 487, 488, 489]
-        expected_block_count_avail = expected_memory_blocks[0]
-
-        assert (
-            dynamic_context.block_allocator.allocate_memory_blocks(4)
-            .cpu()
-            .detach()
-            .numpy()
-            .tolist()
-            == expected_memory_blocks
-        )
-        assert dynamic_context.block_allocator.block_count_avail == expected_block_count_avail
+        assert dynamic_context.block_allocator.allocate_memory_blocks(
+            4
+        ).cpu().detach().numpy().tolist() == [486, 487, 488, 489]
+        assert dynamic_context.block_allocator.block_count_avail == 486
         dynamic_context.block_allocator.release_memory_blocks(
-            torch.tensor(expected_memory_blocks[-2:], device='cuda')
+            torch.tensor([488, 489], device='cuda')
         )
-        assert dynamic_context.block_allocator.block_count_avail == expected_block_count_avail + 2
-        assert (
-            dynamic_context.block_allocator.allocate_memory_blocks(1).item()
-            == expected_memory_blocks[-1]
-        )
-        assert dynamic_context.block_allocator.block_count_avail == expected_block_count_avail + 1
+        assert dynamic_context.block_allocator.block_count_avail == 488
+        assert dynamic_context.block_allocator.allocate_memory_blocks(1).item() == 489
+        assert dynamic_context.block_allocator.block_count_avail == 487
         # Should return None since we allocate more blocks than what we have.
         assert (
             dynamic_context.block_allocator.allocate_memory_blocks(
@@ -364,10 +299,8 @@ def test_allocate_and_release_memory_blocks(self, is_hybrid_model):
         )
 
     @pytest.mark.internal
-    @pytest.mark.parametrize("is_hybrid_model", [False, True])
-    def test_add_request(self, is_hybrid_model: bool):
+    def test_add_request(self):
         self._setup_model_parallel_group(1, 1)
-
         dynamic_context = self._get_dynamic_context(
             params_dtype=torch.float32,
             num_layers=4,
@@ -375,12 +308,11 @@ def test_add_request(self, is_hybrid_model: bool):
             num_attention_heads=2,
             max_sequence_length=512,
             buffer_size_gb=0.03,
-            buffer_guaranteed_fraction=0.1,
+            buffer_guarenteed_fraction=0.1,
             block_size_tokens=128,
             max_requests_override=None,
             max_tokens_override=None,
             buffer_overflow_factor=None,
-            is_hybrid_model=is_hybrid_model,
         )
         assert dynamic_context.block_size_tokens == 128
         context_length = 144
@@ -399,10 +331,14 @@ def test_add_request(self, is_hybrid_model: bool):
         assert torch.all(dynamic_context.request_ids[1:] == -1)
         assert dynamic_context.request_query_lengths[0] == context_length
         assert dynamic_context.request_kv_length_offsets[0] == 0
+        assert dynamic_context.request_to_kv_block_ids[0].cpu().detach().numpy().tolist() == [
+            488,
+            489,
+            -1,
+            -1,
+        ]
         assert dynamic_context.request_kv_block_counts[0] == 2
-        assert dynamic_context.request_last_kv_block_id[0].item() == (
-            1154 if is_hybrid_model else 489
-        )
+        assert dynamic_context.request_last_kv_block_id[0] == 489
         assert dynamic_context.request_last_kv_block_offset[0].item() == 15
         assert torch.all(
             dynamic_context.token_to_pos_ids[0:context_length]
@@ -416,22 +352,17 @@ def test_add_request(self, is_hybrid_model: bool):
             dynamic_context.token_to_position_in_request[0:context_length]
             == torch.arange(0, context_length, dtype=torch.long, device='cuda')
         )
-
-        # Verify token_to_block_idx and token_to_local_position_within_kv_block based on assigned blocks
-        first_block_id = dynamic_context.request_to_kv_block_ids[0, 0]
-        second_block_id = dynamic_context.request_to_kv_block_ids[0, 1]
-
         assert torch.all(
             dynamic_context.token_to_block_idx[0:context_length][
                 0 : dynamic_context.block_size_tokens
             ]
-            == first_block_id
+            == 488
         )
         assert torch.all(
             dynamic_context.token_to_block_idx[0:context_length][
                 dynamic_context.block_size_tokens : context_length
             ]
-            == second_block_id
+            == 489
         )
         assert torch.all(
             dynamic_context.token_to_local_position_within_kv_block[0:context_length]
@@ -440,10 +371,8 @@ def test_add_request(self, is_hybrid_model: bool):
         )
 
     @pytest.mark.internal
-    @pytest.mark.parametrize("is_hybrid_model", [False, True])
-    def test_update_request(self, is_hybrid_model: bool):
+    def test_update_request(self):
         self._setup_model_parallel_group(1, 1)
-
         dynamic_context = self._get_dynamic_context(
             params_dtype=torch.float32,
             num_layers=4,
@@ -451,12 +380,11 @@ def test_update_request(self, is_hybrid_model: bool):
             num_attention_heads=2,
             max_sequence_length=512,
             buffer_size_gb=0.03,
-            buffer_guaranteed_fraction=0.1,
+            buffer_guarenteed_fraction=0.1,
             block_size_tokens=128,
             max_requests_override=None,
             max_tokens_override=None,
             buffer_overflow_factor=None,
-            is_hybrid_model=is_hybrid_model,
         )
 
         # This case should just reset and return since all requests are finished
@@ -466,19 +394,10 @@ def test_update_request(self, is_hybrid_model: bool):
         dynamic_context.request_kv_block_counts[0:3] = 1
         new_block_ids = dynamic_context.block_allocator.allocate_memory_blocks(3, safe=True)
         dynamic_context.request_to_kv_block_ids[0:3, 0] = new_block_ids
-
-        if is_hybrid_model:
-            # Also initialize Mamba states for the dummy requests
-            dynamic_context.mamba_conv_states[:, 0:3, :, :].fill_(1.0)
-            dynamic_context.mamba_ssm_states[:, 0:3, :, :, :].fill_(1.0)
-
         dynamic_context.update_requests(
             active_requests_mask=active_requests_mask, new_tokens=torch.tensor([0, 1, 2])
         )
         assert dynamic_context.total_request_count == 0
-        if is_hybrid_model:
-            assert torch.all(dynamic_context.mamba_conv_states == 0)
-            assert torch.all(dynamic_context.mamba_ssm_states == 0)
 
         # This case would cover all cases
         # 1. Already there will be 2 paused requests
@@ -487,9 +406,9 @@ def test_update_request(self, is_hybrid_model: bool):
         # 4. Some of these requests will be resumed.
         # Setup is as follows :
         # Request ids 0, 1 are paused
-        # Request ids 2, 4, 9 are active requests
+        # Request ids 2 , 4, 9 are active requests
         # Request ids 3 7 8 have completed
-        # Request ids 5 and 6 will require on more block later on because they finished their current block
+        # Request ids 5 and 6 will require on more block later on coz they finished their current block
 
         dynamic_context = self._get_dynamic_context(
             params_dtype=torch.float32,
@@ -498,12 +417,11 @@ def test_update_request(self, is_hybrid_model: bool):
             num_attention_heads=2,
             max_sequence_length=512,
             buffer_size_gb=0.03,
-            buffer_guaranteed_fraction=0.1,
+            buffer_guarenteed_fraction=0.1,
             block_size_tokens=128,
             max_requests_override=None,
             max_tokens_override=None,
             buffer_overflow_factor=None,
-            is_hybrid_model=is_hybrid_model,
         )
 
         active_requests_mask = torch.Tensor([1, 0, 1, 1, 1, 0, 0, 1]).cuda().int()
@@ -554,14 +472,6 @@ def test_update_request(self, is_hybrid_model: bool):
         dynamic_context.request_last_kv_block_offset[0:2] = dynamic_context.block_size_tokens - 1
         dynamic_context.request_last_kv_block_offset[5:7] = dynamic_context.block_size_tokens - 1
 
-        if is_hybrid_model:
-            # Dummy fill for states to be non-zero before update
-            for i in range(total_request_count):
-                dynamic_context.mamba_metadata.request_to_mamba_state_idx[i] = i
-            dynamic_context.mamba_metadata.mamba_state_free_slot_count -= total_request_count
-            dynamic_context.mamba_conv_states[:, 0:total_request_count, :, :] = 1.0
-            dynamic_context.mamba_ssm_states[:, 0:total_request_count, :, :, :] = 1.0
-
         dynamic_context.update_requests(
             active_requests_mask=active_requests_mask, new_tokens=next_tokens
         )
@@ -612,49 +522,28 @@ def test_update_request(self, is_hybrid_model: bool):
 
         # The first 4 requests will require an extra block.
         # Since 3 requests have finished, the last 3 rows should be all -1.
-        if is_hybrid_model:
-            assert torch.all(
-                dynamic_context.request_to_kv_block_ids[0:10].cpu()
-                == torch.tensor(
-                    [
-                        [1144, 1147, -1, -1],
-                        [1145, 1144, -1, -1],
-                        [1149, 1151, -1, -1],
-                        [1150, 1152, -1, -1],
-                        [1148, -1, -1, -1],
-                        [1146, -1, -1, -1],
-                        [1153, -1, -1, -1],
-                        [-1, -1, -1, -1],
-                        [-1, -1, -1, -1],
-                        [-1, -1, -1, -1],
-                    ]
-                )
-            )
-        else:
-            assert torch.all(
-                dynamic_context.request_to_kv_block_ids[0:10].cpu()
-                == torch.tensor(
-                    [
-                        [479, 482, -1, -1],
-                        [480, 479, -1, -1],
-                        [484, 486, -1, -1],
-                        [485, 487, -1, -1],
-                        [483, -1, -1, -1],
-                        [481, -1, -1, -1],
-                        [488, -1, -1, -1],
-                        [-1, -1, -1, -1],
-                        [-1, -1, -1, -1],
-                        [-1, -1, -1, -1],
-                    ]
-                )
+        assert torch.all(
+            dynamic_context.request_to_kv_block_ids[0:10].cpu()
+            == torch.tensor(
+                [
+                    [479, 482, -1, -1],
+                    [480, 479, -1, -1],
+                    [484, 486, -1, -1],
+                    [485, 487, -1, -1],
+                    [483, -1, -1, -1],
+                    [481, -1, -1, -1],
+                    [488, -1, -1, -1],
+                    [-1, -1, -1, -1],
+                    [-1, -1, -1, -1],
+                    [-1, -1, -1, -1],
+                ]
             )
+        )
 
     @pytest.mark.internal
-    @pytest.mark.parametrize("is_hybrid_model", [False, True])
-    def test_release_memory_blocks_for_finished_requests(self, is_hybrid_model):
+    def test_release_memory_blocks_for_finished_requests(self):
         """Test that memory blocks are correctly released for finished requests."""
         self._setup_model_parallel_group(1, 1)
-
         dynamic_context = self._get_dynamic_context(
             params_dtype=torch.float32,
             num_layers=4,
@@ -662,12 +551,11 @@ def test_release_memory_blocks_for_finished_requests(self, is_hybrid_model):
             num_attention_heads=2,
             max_sequence_length=512,
             buffer_size_gb=0.03,
-            buffer_guaranteed_fraction=0.1,
+            buffer_guarenteed_fraction=0.1,
             block_size_tokens=128,
             max_requests_override=None,
             max_tokens_override=None,
             buffer_overflow_factor=None,
-            is_hybrid_model=is_hybrid_model,
         )
 
         # Set up the initial state with 5 requests
@@ -684,13 +572,6 @@ def test_release_memory_blocks_for_finished_requests(self, is_hybrid_model):
             dynamic_context.request_to_kv_block_ids[i, 0] = initial_blocks[i]
             dynamic_context.request_query_lengths[i] = 1
             dynamic_context.request_ids[i] = i
-            if is_hybrid_model:
-                dynamic_context.mamba_conv_states[:, i, :, :].fill_(
-                    float(i + 1)
-                )  # Fill with distinct values
-                dynamic_context.mamba_ssm_states[:, i, :, :, :].fill_(float(i + 1))
-                dynamic_context.mamba_metadata.request_to_mamba_state_idx[i] = i
-                dynamic_context.mamba_metadata.mamba_state_free_slot_count -= 1
 
         # Create an active_requests_mask where requests 0, 2, and 4 are finished (0),
         # and requests 1 and 3 are still active (1)
@@ -710,26 +591,10 @@ def test_release_memory_blocks_for_finished_requests(self, is_hybrid_model):
         # Verify that 3 blocks were released by checking the available blocks
         assert dynamic_context.block_allocator.block_count_avail == initial_available_blocks + 3
 
-        if is_hybrid_model:
-            # Request at position 3 now moves into finished request position 0
-            # Request at position 1 remains active
-            mamba_idx = {
-                i: dynamic_context.mamba_metadata.request_to_mamba_state_idx[i] for i in range(5)
-            }
-            assert torch.all(dynamic_context.mamba_conv_states[:, mamba_idx[0], :, :] == 4.0)
-            assert torch.all(dynamic_context.mamba_ssm_states[:, mamba_idx[0], :, :, :] == 4.0)
-            assert torch.all(dynamic_context.mamba_conv_states[:, mamba_idx[1], :, :] == 2.0)
-            assert torch.all(dynamic_context.mamba_ssm_states[:, mamba_idx[1], :, :, :] == 2.0)
-            assert mamba_idx[2] == -1
-            assert mamba_idx[3] == -1
-            assert mamba_idx[4] == -1
-
     @pytest.mark.internal
-    @pytest.mark.parametrize("is_hybrid_model", [False, True])
-    def test_finished_requests_with_multiple_blocks(self, is_hybrid_model):
+    def test_finished_requests_with_multiple_blocks(self):
         """Test that all memory blocks are correctly released for finished requests that use multiple blocks."""
         self._setup_model_parallel_group(1, 1)
-
         dynamic_context = self._get_dynamic_context(
             params_dtype=torch.float32,
             num_layers=4,
@@ -737,12 +602,11 @@ def test_finished_requests_with_multiple_blocks(self, is_hybrid_model):
             num_attention_heads=2,
             max_sequence_length=512,
             buffer_size_gb=0.03,
-            buffer_guaranteed_fraction=0.1,
+            buffer_guarenteed_fraction=0.1,
             block_size_tokens=128,
             max_requests_override=None,
             max_tokens_override=None,
             buffer_overflow_factor=None,
-            is_hybrid_model=is_hybrid_model,
         )
 
         # Set up the initial state with 3 requests, where some use multiple blocks
@@ -774,9 +638,6 @@ def test_finished_requests_with_multiple_blocks(self, is_hybrid_model):
         for i in range(3):
             dynamic_context.request_query_lengths[i] = 1
             dynamic_context.request_ids[i] = i
-            if is_hybrid_model:
-                dynamic_context.mamba_conv_states[:, i, :, :].fill_(float(i + 1))
-                dynamic_context.mamba_ssm_states[:, i, :, :, :].fill_(float(i + 1))
 
         # Create an active_requests_mask where all requests are finished
         active_requests_mask = torch.tensor([0, 0, 0], device=torch.cuda.current_device())
@@ -794,92 +655,6 @@ def test_finished_requests_with_multiple_blocks(self, is_hybrid_model):
         # Verify that all 6 blocks were released by checking the available blocks
         assert dynamic_context.block_allocator.block_count_avail == initial_available_blocks + 6
 
-        if is_hybrid_model:
-            # All mamba states should be zeroed out
-            assert torch.all(dynamic_context.mamba_conv_states == 0)
-            assert torch.all(dynamic_context.mamba_ssm_states == 0)
-
-    @pytest.mark.internal
-    @pytest.mark.parametrize("is_hybrid_model", [False, True])
-    def test_mamba_states_cache(self, is_hybrid_model: bool):
-        self._setup_model_parallel_group(1, 1)
-
-        if not is_hybrid_model:
-            # If not hybrid, mamba_states_cache should fail
-            dynamic_context = self._get_dynamic_context(
-                params_dtype=torch.float32,
-                num_layers=4,
-                kv_channels=8,
-                num_attention_heads=2,
-                max_sequence_length=512,
-                buffer_size_gb=0.03,
-                buffer_guaranteed_fraction=0.1,
-                block_size_tokens=128,
-                max_requests_override=None,
-                max_tokens_override=None,
-                buffer_overflow_factor=None,
-                is_hybrid_model=False,
-            )
-            with pytest.raises(AssertionError) as error:
-                conv_state, ssm_state = dynamic_context.mamba_states_cache(layer_number=1)
-            return
-
-        dynamic_context = self._get_dynamic_context(
-            params_dtype=torch.float32,
-            num_layers=4,
-            kv_channels=8,
-            num_attention_heads=2,
-            max_sequence_length=512,
-            buffer_size_gb=0.03,
-            buffer_guaranteed_fraction=0.1,
-            block_size_tokens=128,
-            max_requests_override=None,
-            max_tokens_override=None,
-            buffer_overflow_factor=None,
-            is_hybrid_model=is_hybrid_model,
-            layer_type_list=[Symbols.MAMBA, Symbols.ATTENTION, Symbols.MAMBA, Symbols.ATTENTION],
-        )
-
-        # Add a request to populate states
-        context_length = 10
-        dynamic_context.add_request(
-            DynamicInferenceRequest(
-                request_id=0,
-                prompt_tokens=torch.arange(0, context_length, dtype=torch.long, device='cuda'),
-                sampling_params=SamplingParams(
-                    num_tokens_to_generate=dynamic_context.max_tokens - 10
-                ),
-            )
-        )
-        dynamic_context.initialize_attention_state()
-
-        # Manually set some dummy values in mamba_conv_states and mamba_ssm_states
-        # Mamba layers are at global indices 0 and 2 (mapped to local 0 and 1 via layer_map)
-        # `layer_map` will map global layer index to the corresponding Mamba/Attention index.
-        # For layer_type_list ["MAMBA", "ATTENTION", "MAMBA", "ATTENTION"],
-        # global layer 1 (index 0) is MAMBA -> local mamba layer 0
-        # global layer 3 (index 2) is MAMBA -> local mamba layer 1
-
-        # Test for the first Mamba layer (global layer 1, local mamba layer 0)
-        global_layer_1_mamba_local_idx = 0
-        dynamic_context.mamba_conv_states[global_layer_1_mamba_local_idx] = 10.0
-        dynamic_context.mamba_ssm_states[global_layer_1_mamba_local_idx] = 20.0
-
-        # Test for the second Mamba layer (global layer 3, local mamba layer 1)
-        global_layer_3_mamba_local_idx = 1
-        dynamic_context.mamba_conv_states[global_layer_3_mamba_local_idx] = 30.0
-        dynamic_context.mamba_ssm_states[global_layer_3_mamba_local_idx] = 40.0
-
-        # Retrieve states using mamba_states_cache for global layer 1
-        conv_state_layer1, ssm_state_layer1 = dynamic_context.mamba_states_cache(layer_number=1)
-        assert torch.all(conv_state_layer1 == 10.0)
-        assert torch.all(ssm_state_layer1 == 20.0)
-
-        # Retrieve states using mamba_states_cache for global layer 3
-        conv_state_layer3, ssm_state_layer3 = dynamic_context.mamba_states_cache(layer_number=3)
-        assert torch.all(conv_state_layer3 == 30.0)
-        assert torch.all(ssm_state_layer3 == 40.0)
-
     @pytest.mark.internal
     def test_calculate_and_store_log_probs(self):
         self._setup_model_parallel_group(1, 1)
@@ -890,7 +665,7 @@ def test_calculate_and_store_log_probs(self):
             num_attention_heads=2,
             max_sequence_length=512,
             buffer_size_gb=0.03,
-            buffer_guaranteed_fraction=0.1,
+            buffer_guarenteed_fraction=0.1,
             block_size_tokens=128,
             max_requests_override=None,
             max_tokens_override=None,
diff --git a/tests/unit_tests/inference/engines/test_dynamic_engine.py b/tests/unit_tests/inference/engines/test_dynamic_engine.py
index b019c8d9528..bd8f0ae1c8a 100644
--- a/tests/unit_tests/inference/engines/test_dynamic_engine.py
+++ b/tests/unit_tests/inference/engines/test_dynamic_engine.py
@@ -4,12 +4,11 @@
 import random
 import types
 from dataclasses import dataclass
-from typing import Dict, List, Optional, Tuple
+from typing import Dict, List, Optional
 
 import pytest
 import torch
 from tqdm import tqdm
-from transformer_engine.pytorch.fp8 import check_fp8_support
 
 from megatron.core import parallel_state
 from megatron.core.inference.contexts.dynamic_context import (
@@ -32,34 +31,15 @@
 from megatron.core.inference.text_generation_controllers.text_generation_controller import (
     TextGenerationController,
 )
-from megatron.core.models.gpt.gpt_layer_specs import (
-    get_gpt_layer_local_spec,
-    get_gpt_layer_with_transformer_engine_spec,
-)
+from megatron.core.models.gpt.gpt_layer_specs import get_gpt_layer_local_spec
 from megatron.core.models.gpt.gpt_model import GPTModel
-from megatron.core.models.mamba.mamba_layer_specs import mamba_stack_spec
-from megatron.core.models.mamba.mamba_model import MambaModel
 from megatron.core.tensor_parallel.random import model_parallel_cuda_manual_seed
 from megatron.core.transformer.cuda_graphs import CudaGraphManager, _CudagraphGlobalRecord
 from megatron.core.transformer.transformer_config import TransformerConfig
-from megatron.core.utils import (
-    check_mamba_sequence_packing_support,
-    get_attr_wrapped_model,
-    is_fa_min_version,
-    is_te_min_version,
-)
+from megatron.core.utils import is_fa_min_version
 from tests.unit_tests.test_utilities import Utils
 
 
-def skip_if_mamba_sequence_packing_not_available(model_provider: str):
-    if model_provider == "mamba":
-        sequence_packing_available, reason_for_no_sequence_packing = (
-            check_mamba_sequence_packing_support()
-        )
-        if not sequence_packing_available:
-            pytest.skip(reason_for_no_sequence_packing)
-
-
 def set_rounder(value):
     """Utility function to set the DynamicInferenceContext rounder."""
     DynamicInferenceContext.ROUNDER = value  # For backwards compatibility
@@ -97,9 +77,6 @@ class DynamicEngineTestConfig:
 
     use_fixed_output_lengths: bool = False
     num_cuda_graphs: int = None
-    use_cuda_graphs_for_non_decode_steps: bool = True
-    fp8: bool = False
-    model_provider: str = "gpt"
     return_log_probs: bool = False
     materialize_only_last_token_logits: bool = True
     skip_prompt_log_probs_for_dynamic_inference: bool = False
@@ -112,8 +89,6 @@ class DynamicEngineTestConfig:
     # relevant to the test. The tests only check if the required
     # context attributes are set correctly.
 
-    fp8: bool = False
-
     def __post_init__(self):
 
         # Compute max_sequence_length.
@@ -186,7 +161,6 @@ def _build_requests(cls, test_config: DynamicEngineTestConfig) -> List[DynamicIn
                     -1 if test_config.use_fixed_output_lengths else test_config.vocab_size - 1
                 ),
                 return_log_probs=test_config.return_log_probs,
-                skip_prompt_log_probs=test_config.skip_prompt_log_probs,
             )
             if not hasattr(sampling_params, "num_tokens_total"):
                 # Remove this if statement branch in megatron-core 0.16
@@ -194,6 +168,11 @@ def _build_requests(cls, test_config: DynamicEngineTestConfig) -> List[DynamicIn
             else:
                 sampling_params.num_tokens_total = num_tokens_total
 
+            config_entry = test_config.skip_prompt_log_probs_for_dynamic_inference
+            sampling_params.add_attributes(
+                {"skip_prompt_log_probs_for_dynamic_inference": config_entry}
+            )
+
             # Request.
             prompt_tokens = torch.randint(
                 0,
@@ -215,9 +194,6 @@ def _build_inference_context(
         test_config: DynamicEngineTestConfig,
         transformer_config: TransformerConfig,
         requests: List[DynamicInferenceRequest],
-        layer_type_list: Optional[List[str]],
-        mamba_conv_states_shape: Optional[Tuple[int]] = None,
-        mamba_ssm_states_shape: Optional[Tuple[int]] = None,
     ):
         """The inference context manages the KV cache and other inference state."""
 
@@ -229,7 +205,6 @@ def _build_inference_context(
             num_attention_heads=transformer_config.num_query_groups,
             max_sequence_length=test_config.max_sequence_length,
             num_cuda_graphs=test_config.num_cuda_graphs,
-            use_cuda_graphs_for_non_decode_steps=not test_config.model_provider == "mamba",
             buffer_size_gb=test_config.context_buffer_size_gb,
             buffer_guaranteed_fraction=test_config.context_buffer_guaranteed_fraction,
             block_size_tokens=test_config.context_block_size_tokens,
@@ -237,9 +212,6 @@ def _build_inference_context(
             max_requests_override=test_config.context_max_requests_override,
             max_tokens_override=test_config.context_max_tokens_override,
             tensor_model_parallel_size=transformer_config.tensor_model_parallel_size,
-            layer_type_list=layer_type_list,
-            mamba_conv_states_shape=mamba_conv_states_shape,
-            mamba_ssm_states_shape=mamba_ssm_states_shape,
             materialize_only_last_token_logits=test_config.materialize_only_last_token_logits,
             use_flashinfer_fused_rope=None,  # default to using flash-infer if available
             # this is for compatibility with the LTS environment
@@ -248,7 +220,6 @@ def _build_inference_context(
         return context
 
     @classmethod
-    @torch.inference_mode()
     def _build_test_env(cls, test_config):
         Utils.initialize_model_parallel(
             tensor_model_parallel_size=test_config.tensor_model_parallel_size,
@@ -267,142 +238,65 @@ def _build_test_env(cls, test_config):
             force_reset_rng=True,
         )
 
+        # Transformer config.
+        transformer_config = TransformerConfig(
+            params_dtype=torch.bfloat16,
+            num_layers=4,
+            hidden_size=32,
+            num_attention_heads=4,
+            use_cpu_initialization=True,
+            cuda_graph_impl=(
+                "local"
+                if test_config.num_cuda_graphs is not None and test_config.force_build_cuda_graphs
+                else "none"
+            ),
+            inference_rng_tracker=True,
+            tensor_model_parallel_size=test_config.tensor_model_parallel_size,
+            pipeline_model_parallel_size=test_config.pipeline_model_parallel_size,
+            expert_model_parallel_size=test_config.expert_model_parallel_size,
+            num_moe_experts=(
+                None
+                if test_config.expert_model_parallel_size == 1
+                else test_config.expert_model_parallel_size
+            ),
+            sequence_parallel=test_config.sequence_parallel,
+            pipeline_dtype=torch.bfloat16,
+            add_bias_linear=test_config.expert_model_parallel_size == 1,
+            inference_sampling_seed=test_config.random_seed,
+            cuda_graph_scope=test_config.cuda_graph_scope,
+        )
+
         # Requests.
         requests = cls._build_requests(test_config)
 
-        if test_config.model_provider == "gpt":
-            # Transformer config.
-            transformer_config = TransformerConfig(
-                params_dtype=torch.bfloat16,
-                num_layers=4,
-                hidden_size=128 if test_config.fp8 else 32,
-                num_attention_heads=4,
-                use_cpu_initialization=True,
-                cuda_graph_impl=(
-                    "local"
-                    if test_config.num_cuda_graphs is not None
-                    and test_config.force_build_cuda_graphs
-                    else "none"
-                ),
-                inference_rng_tracker=True,
-                tensor_model_parallel_size=test_config.tensor_model_parallel_size,
-                pipeline_model_parallel_size=test_config.pipeline_model_parallel_size,
-                expert_model_parallel_size=test_config.expert_model_parallel_size,
-                num_moe_experts=(
-                    None
-                    if test_config.expert_model_parallel_size == 1
-                    else test_config.expert_model_parallel_size
-                ),
-                sequence_parallel=test_config.sequence_parallel,
-                pipeline_dtype=torch.bfloat16,
-                add_bias_linear=test_config.expert_model_parallel_size == 1,
-                fp8="hybrid" if test_config.fp8 else None,
-                fp8_recipe="tensorwise" if test_config.fp8 else None,
-                inference_sampling_seed=test_config.random_seed,
-                cuda_graph_scope=test_config.cuda_graph_scope,
-            )
-            if test_config.fp8:
-                layer_spec = get_gpt_layer_with_transformer_engine_spec()
-            else:
-                layer_spec = get_gpt_layer_local_spec()
-
-            # GPT model.
-            model = GPTModel(
-                config=transformer_config,
-                transformer_layer_spec=layer_spec,
-                vocab_size=test_config.vocab_size,
-                max_sequence_length=test_config.max_sequence_length,
-                parallel_output=True,
-                pre_process=parallel_state.is_pipeline_first_stage(),
-                post_process=parallel_state.is_pipeline_last_stage(),
-            ).cuda()
-        elif test_config.model_provider == "mamba":
-            # Transformer config.
-            transformer_config = TransformerConfig(
-                params_dtype=torch.bfloat16,
-                num_layers=3,  # 1 Mamba layer, 1 attention layer, 1 MLP layer
-                hidden_size=256,  # The Mamba layer places several constraints on this
-                mamba_num_heads=16,
-                num_attention_heads=16,
-                use_cpu_initialization=True,
-                cuda_graph_impl=(
-                    "local"
-                    if test_config.num_cuda_graphs is not None
-                    and test_config.force_build_cuda_graphs
-                    else "none"
-                ),
-                inference_rng_tracker=True,
-                tensor_model_parallel_size=test_config.tensor_model_parallel_size,
-                pipeline_model_parallel_size=test_config.pipeline_model_parallel_size,
-                expert_model_parallel_size=test_config.expert_model_parallel_size,
-                num_moe_experts=(
-                    None
-                    if test_config.expert_model_parallel_size == 1
-                    else test_config.expert_model_parallel_size
-                ),
-                sequence_parallel=test_config.sequence_parallel,
-                pipeline_dtype=torch.bfloat16,
-                add_bias_linear=test_config.expert_model_parallel_size == 1,
-                fp8="hybrid" if test_config.fp8 else None,
-                fp8_recipe="tensorwise" if test_config.fp8 else None,
-                cuda_graph_scope=test_config.cuda_graph_scope,
-            )
-
-            # Mamba model.
-            model = MambaModel(
-                config=transformer_config,
-                mamba_stack_spec=mamba_stack_spec,
-                vocab_size=test_config.vocab_size,
-                max_sequence_length=test_config.max_sequence_length,
-                parallel_output=True,
-                hybrid_attention_ratio=0.3,
-                hybrid_mlp_ratio=0.3,
-                pre_process=parallel_state.is_pipeline_first_stage(),
-                post_process=parallel_state.is_pipeline_last_stage(),
-            ).cuda()
-        else:
-            raise ValueError(f"Invalid model provider {test_config.model_provider}")
+        # GPT model.
+        model = GPTModel(
+            config=transformer_config,
+            transformer_layer_spec=get_gpt_layer_local_spec(),
+            vocab_size=test_config.vocab_size,
+            max_sequence_length=test_config.max_sequence_length,
+            parallel_output=True,
+            pre_process=parallel_state.is_pipeline_first_stage(),
+            post_process=parallel_state.is_pipeline_last_stage(),
+        ).cuda()
 
         for param in model.parameters():
             param.data = param.data.to(transformer_config.params_dtype)
 
         model.eval()
 
-        # Layer type list for hybrid models
-        decoder = get_attr_wrapped_model(model, "decoder")
-        layer_type_list = getattr(decoder, "layer_type_list", None)
-        if test_config.model_provider == "mamba":
-            mamba_states_shapes = decoder.mamba_state_shapes_per_request()
-            if mamba_states_shapes is not None:
-                (mamba_conv_states_shape, mamba_ssm_states_shape) = mamba_states_shapes
-            else:
-                # A `MambaBlock` can only not have a `MambaLayer` if using pipeline parallelism
-                # and a particular pipeline stage was not assigned a `MambaLayer`.
-                assert test_config.pipeline_model_parallel_size > 1
-                mamba_conv_states_shape = None
-                mamba_ssm_states_shape = None
-        else:
-            mamba_conv_states_shape = None
-            mamba_ssm_states_shape = None
-
         # Inference config.
         inference_config = InferenceWrapperConfig(
             hidden_size=transformer_config.hidden_size,
             inference_batch_times_seqlen_threshold=400,
             fp32_residual_connection=False,
             params_dtype=transformer_config.params_dtype,
-            fp8=transformer_config.fp8,
             padded_vocab_size=test_config.vocab_size,
         )
 
         # Inference context.
         inference_context = cls._build_inference_context(
-            test_config=test_config,
-            transformer_config=transformer_config,
-            requests=requests,
-            layer_type_list=layer_type_list,
-            mamba_conv_states_shape=mamba_conv_states_shape,
-            mamba_ssm_states_shape=mamba_ssm_states_shape,
+            test_config=test_config, transformer_config=transformer_config, requests=requests
         )
 
         # Inference model wrapper.
@@ -444,7 +338,6 @@ def mock_detokenize_prompt(tokens):
         return env
 
     @classmethod
-    @torch.inference_mode()
     def _run_step(cls, env):
         set_rounder(4)
         # Step inference engine (i.e., generate one token per request).
@@ -456,8 +349,8 @@ def _run_step(cls, env):
         finished_requests = result["finished_requests"]
 
     @classmethod
-    @torch.inference_mode()
     def _run_test(cls, **test_config_kwargs):
+
         # Test environment.
         test_config = DynamicEngineTestConfig(**test_config_kwargs)
         env = cls._build_test_env(test_config)
@@ -512,16 +405,13 @@ def teardown_method(self, method):
     @pytest.mark.skipif(
         not is_fa_min_version("2.7.3"), reason="need latest flash attn for dynamic batching"
     )
-    @pytest.mark.parametrize("model_provider", ["gpt", "mamba"])
     @pytest.mark.parametrize("num_cuda_graphs", [None, 1, 4])
     @pytest.mark.parametrize("cuda_graph_scope", [[], ["full_iteration"]])
-    def test_simple(self, model_provider, num_cuda_graphs, cuda_graph_scope) -> None:
+    def test_simple(self, num_cuda_graphs, cuda_graph_scope) -> None:
         """Simple test that runs without errors, and validates output."""
-        skip_if_mamba_sequence_packing_not_available(model_provider)
 
         # Run test.
         env = self._run_test(
-            model_provider=model_provider,
             num_cuda_graphs=num_cuda_graphs,
             context_max_requests_override=32,
             cuda_graph_scope=cuda_graph_scope,
@@ -532,8 +422,8 @@ def test_simple(self, model_provider, num_cuda_graphs, cuda_graph_scope) -> None
         assert env.engine.context.max_requests == 32
         assert env.engine.context.max_tokens == 160
 
-        # Validate output tokens.
-        gpt_expected_generated_tokens = [
+        # Validate generated tokens.
+        expected_generated_tokens_list = [
             [69, 85, 55, 74],
             [29, 54, 85, 89],
             [33, 30, 64, 59],
@@ -544,26 +434,7 @@ def test_simple(self, model_provider, num_cuda_graphs, cuda_graph_scope) -> None
             [],  # this request is failed due to max sequence length overflow
         ]
 
-        mamba_expected_generated_tokens = [
-            [74, 72, 83, 59],
-            [25, 54, 1, 70],
-            [28, 14, 15, 89],
-            [87, 27, 30, 52],
-            [44, 13, 82, 70],
-            [28, 74, 64, 16],
-            [8, 4, 83, 5],
-            [],
-        ]
-
-        if model_provider == "gpt":
-            expected_generated_tokens_list = gpt_expected_generated_tokens
-        elif model_provider == "mamba":
-            expected_generated_tokens_list = mamba_expected_generated_tokens
-        else:
-            raise ValueError(f"Invalid model_provider {model_provider}")
-
         assert len(env.requests) == len(expected_generated_tokens_list)
-
         for request, expected_generated_tokens in zip(env.requests, expected_generated_tokens_list):
             assert request.generated_tokens == expected_generated_tokens, (
                 f"request {request.request_id}, "
@@ -575,41 +446,30 @@ def test_simple(self, model_provider, num_cuda_graphs, cuda_graph_scope) -> None
     @pytest.mark.skipif(
         not is_fa_min_version("2.7.3"), reason="need latest flash attn for dynamic batching"
     )
-    def test_overflow_factor(self, model_provider: str = "gpt") -> None:
+    def test_overflow_factor(self) -> None:
         """Test overflow factor arg."""
-        skip_if_mamba_sequence_packing_not_available(model_provider)
-
         # Run test.
         env = self._run_test(
             context_buffer_overflow_factor=0.1,
             context_max_requests_override=None,
             context_max_tokens_override=None,
-            model_provider=model_provider,
         )
 
         # Validate max_requests, max_tokens.
-        if model_provider == "gpt":
-            assert env.engine.context.max_requests == 420
-            assert env.engine.context.max_tokens == 420
-        elif model_provider == "mamba":
-            assert env.engine.context.max_requests == 16
-            assert env.engine.context.max_tokens == 16
+        assert env.engine.context.max_requests == 420
+        assert env.engine.context.max_tokens == 420
 
     @pytest.mark.internal
     @pytest.mark.skipif(
         not is_fa_min_version("2.7.3"), reason="need latest flash attn for dynamic batching"
     )
-    @pytest.mark.parametrize("model_provider", ["gpt", "mamba"])
-    def test_request_overflow(self, model_provider: str) -> None:
+    def test_request_overflow(self) -> None:
         """Test request overflow."""
-        skip_if_mamba_sequence_packing_not_available(model_provider)
-
-        self._run_test(context_max_requests_override=4, model_provider=model_provider)
+        self._run_test(context_max_requests_override=4)
 
     @pytest.mark.skipif(
         not is_fa_min_version("2.7.3"), reason="need latest flash attn for dynamic batching"
     )
-    @torch.inference_mode()
     def test_token_overflow_transient(self) -> None:
         """Test token overflow."""
         test_config = DynamicEngineTestConfig(
@@ -645,17 +505,13 @@ def test_token_overflow_nontransient(self) -> None:
     @pytest.mark.skipif(
         not is_fa_min_version("2.7.3"), reason="need latest flash attn for dynamic batching"
     )
-    @pytest.mark.parametrize("model_provider", ["gpt", "mamba"])
-    def test_block_overflow(self, model_provider: str) -> None:
+    def test_block_overflow(self) -> None:
         """Test block overflow."""
-        skip_if_mamba_sequence_packing_not_available(model_provider)
-        env = self._build_test_env(DynamicEngineTestConfig(model_provider=model_provider))
+        env = self._build_test_env(DynamicEngineTestConfig())
         context = env.engine.context
         block_size_bytes = context.block_size_bytes
         buffer_size_gb = (block_size_bytes + 1) / 1024**3
-        test_config = DynamicEngineTestConfig(
-            context_buffer_size_gb=buffer_size_gb, model_provider=model_provider
-        )
+        test_config = DynamicEngineTestConfig(context_buffer_size_gb=buffer_size_gb)
         env = self._build_test_env(test_config)
         env.engine._add_request(env.requests[0])
         assert list(env.engine.waiting_request_ids) == [0]
@@ -664,21 +520,17 @@ def test_block_overflow(self, model_provider: str) -> None:
     @pytest.mark.skipif(
         not is_fa_min_version("2.7.3"), reason="need latest flash attn for dynamic batching"
     )
-    @pytest.mark.parametrize("model_provider", ["gpt", "mamba"])
-    def test_multi_add(self, model_provider: str) -> None:
+    def test_multi_add(self) -> None:
         """Test adding multiple requests simultaneously."""
-        skip_if_mamba_sequence_packing_not_available(model_provider)
-        self._run_test(num_gap_steps=0, model_provider=model_provider)
+        self._run_test(num_gap_steps=0)
 
     @pytest.mark.internal
     @pytest.mark.skipif(
         not is_fa_min_version("2.7.3"), reason="need latest flash attn for dynamic batching"
     )
-    @pytest.mark.parametrize("model_provider", ["gpt", "mamba"])
-    def test_fixed_output_lengths(self, model_provider: str) -> None:
+    def test_fixed_output_lengths(self) -> None:
         """Test generating a fixed number of output tokens."""
-        skip_if_mamba_sequence_packing_not_available(model_provider)
-        self._run_test(use_fixed_output_lengths=True, model_provider=model_provider)
+        self._run_test(use_fixed_output_lengths=True)
 
     @pytest.mark.internal
     @pytest.mark.skipif(
@@ -735,7 +587,6 @@ def test_cuda_graph_token_counts(self) -> None:
             (32, 32),
         ],
     )
-    @torch.inference_mode()
     def test_cuda_graph_warmup(
         self,
         warmup_engine_mode: WarmupEngineMode,
@@ -822,18 +673,11 @@ def test_cuda_graph_warmup(
     @pytest.mark.skipif(
         not is_fa_min_version("2.7.3"), reason="need latest flash attn for dynamic batching"
     )
-    @pytest.mark.parametrize("model_provider", ["gpt", "mamba"])
-    @torch.inference_mode()
-    def test_generate_function(self, model_provider: str) -> None:
+    def test_generate_function(self) -> None:
         """Test the generate function that processes multiple prompts at once."""
-        skip_if_mamba_sequence_packing_not_available(model_provider)
-
         # Set up test environment
         test_config = DynamicEngineTestConfig(
-            num_requests=4,
-            max_prompt_length=8,
-            num_tokens_to_generate=4,
-            model_provider=model_provider,
+            num_requests=4, max_prompt_length=8, num_tokens_to_generate=4
         )
         env = self._build_test_env(test_config)
 
@@ -879,55 +723,36 @@ async def test_run_engine(self):
         Test asynchronously adding and waiting for requests while the engine is
         running continuously.
         """
-        # Have to wrap inference mode in-line because async functions are not supported
-        with torch.inference_mode():
-            # Test environment.
-            test_config = DynamicEngineTestConfig(num_requests=8, use_fixed_output_lengths=True)
-            env = self._build_test_env(test_config)
-
-            engine_task = asyncio.create_task(env.engine.run_engine(verbose=False))
-
-            request_completion_futures: Dict[int, asyncio.Future[DynamicInferenceRequest]] = {}
-
-            # Add requests to engine.
-            for request in tqdm(env.requests, "add requests"):
-                request_completion_futures[request.request_id] = env.engine._add_request(request)
-
-            # Wait for all requests to complete.
-            await asyncio.gather(*request_completion_futures.values())
-
-            # Verify that all request outputs were set.
-            for request_id, fut in request_completion_futures.items():
-                num_tokens_to_generate = env.requests[
-                    request_id
-                ].sampling_params.num_tokens_to_generate
-                result = fut.result()
-                assert result.generated_length == num_tokens_to_generate, (
-                    f"Request {request_id} expected to generate {num_tokens_to_generate} "
-                    f"tokens but generated {result.generated_length}"
-                )
+        # Test environment.
+        test_config = DynamicEngineTestConfig(use_fixed_output_lengths=True)
+        env = self._build_test_env(test_config)
 
-            engine_task.cancel()
+        engine_task = asyncio.create_task(env.engine.run_engine(verbose=False))
 
-    @pytest.mark.internal
-    @pytest.mark.skipif(
-        not is_fa_min_version("2.7.3"), reason="need latest flash attn for dynamic batching"
-    )
-    @pytest.mark.skipif(not is_te_min_version("2.2.0"), reason="TE 2.2.0 is required")
-    @pytest.mark.parametrize("model_provider", ["gpt", "mamba"])
-    def test_fp8_inference(self, model_provider: str):
-        skip_if_mamba_sequence_packing_not_available(model_provider)
+        request_completion_futures: Dict[int, asyncio.Future[DynamicInferenceRequest]] = {}
 
-        fp8_available, reason_for_no_fp8 = check_fp8_support()
-        if not fp8_available:
-            pytest.skip(reason_for_no_fp8)
+        # Add requests to engine.
+        for request in tqdm(env.requests, "add requests"):
+            request_completion_futures[request.request_id] = env.engine._add_request(request)
+
+        # Wait for all requests to complete.
+        await asyncio.gather(*request_completion_futures.values())
+
+        # Verify that all request outputs were set.
+        for request_id, fut in request_completion_futures.items():
+            num_tokens_to_generate = env.requests[request_id].sampling_params.num_tokens_to_generate
+            result = fut.result()
+            assert result.generated_length == num_tokens_to_generate, (
+                f"Request {request_id} expected to generate {num_tokens_to_generate} "
+                f"tokens but generated {result.generated_length}"
+            )
 
-        self._run_test(model_provider=model_provider, fp8=True)
+        engine_task.cancel()
 
+    @pytest.mark.internal
     @pytest.mark.skipif(
         not is_fa_min_version("2.7.3"), reason="need latest flash attn for dynamic batching"
     )
-    @torch.inference_mode()
     def test_return_log_probs(self):
         """Verify that returning log probs does not raise any error."""
         # Returning log probs requires materializing the full prompt logits or
@@ -938,7 +763,7 @@ def test_return_log_probs(self):
         env = self._run_test(
             return_log_probs=True,
             materialize_only_last_token_logits=True,
-            skip_prompt_log_probs=True,
+            skip_prompt_log_probs_for_dynamic_inference=True,
         )
 
     @pytest.mark.internal
@@ -950,19 +775,9 @@ def test_return_log_probs(self):
     @pytest.mark.parametrize("ep_size", [1, 2])
     @pytest.mark.parametrize("pp_size", [1, 2])
     @pytest.mark.parametrize("tp_size", [1, 2])
-    @pytest.mark.parametrize("model_provider", ["gpt", "mamba"])
-    @torch.inference_mode()
     def test_parallel_inference(
-        self,
-        model_provider,
-        tp_size,
-        pp_size,
-        ep_size,
-        sequence_parallel,
-        materialize_only_last_token_logits,
+        self, tp_size, pp_size, ep_size, sequence_parallel, materialize_only_last_token_logits
     ):
-        skip_if_mamba_sequence_packing_not_available(model_provider)
-
         if tp_size == 1 and pp_size == 1 and ep_size == 1:
             pytest.skip(reason="Test requires tp_size > 1 or pp_size > 1 or ep_size > 1")
         elif not torch.distributed.is_initialized():
@@ -975,16 +790,7 @@ def test_parallel_inference(
             pytest.skip(reason="Sequence parallelism requires tp_size > 1")
         elif tp_size > 1 and ep_size > 1 and not sequence_parallel:
             pytest.skip(reason="Sequence parallelism must be used with tp_size > 1 and ep_size > 1")
-        elif pp_size > 1 and model_provider == "mamba":
-            pytest.skip(
-                reason=(
-                    "Running hybrid models with pp_size > 1 and no attention on some "
-                    "pipeline stages is not supported yet."
-                )
-            )
-
         env = self._run_test(
-            model_provider=model_provider,
             tensor_model_parallel_size=tp_size,
             pipeline_model_parallel_size=pp_size,
             expert_model_parallel_size=ep_size,
@@ -992,25 +798,6 @@ def test_parallel_inference(
             materialize_only_last_token_logits=materialize_only_last_token_logits,
         )
 
-    @pytest.mark.internal
-    @pytest.mark.skipif(
-        not is_fa_min_version("2.7.3"), reason="need latest flash attn for dynamic batching"
-    )
-    @pytest.mark.parametrize("materialize_only_last_token_logits", [False, True])
-    def test_sequence_parallel_fp8_inference(self, materialize_only_last_token_logits: bool):
-        fp8_available, reason_for_no_fp8 = check_fp8_support()
-        if not fp8_available:
-            pytest.skip(reason_for_no_fp8)
-
-        self._run_test(
-            min_prompt_length=19,
-            max_prompt_length=19,
-            tensor_model_parallel_size=4,
-            sequence_parallel=True,
-            materialize_only_last_token_logits=True,
-            fp8=True,
-        )
-
     @pytest.mark.internal
     @pytest.mark.skipif(
         not is_fa_min_version("2.7.3"), reason="need latest flash attn for dynamic batching"
@@ -1065,32 +852,6 @@ def test_events(self):
 
         assert result_event_types == expected_event_types
 
-    @pytest.mark.internal
-    @pytest.mark.skipif(
-        not is_fa_min_version("2.7.3"), reason="need latest flash attn for dynamic batching"
-    )
-    @pytest.mark.parametrize("model_provider", ["gpt", "mamba"])
-    @torch.inference_mode()
-    def test_chunked_prefill(self, model_provider: str):
-        """Verify that chunked prefill output is equivalent to regular prefill."""
-        skip_if_mamba_sequence_packing_not_available(model_provider)
-
-        prompt_length = 1200
-        num_tokens_to_generate = 16
-        max_sequence_length = prompt_length + num_tokens_to_generate
-
-        # Configure context to force chunking (chunked prefill is enabled by default)
-        env = self._run_test(
-            num_requests=1,
-            min_prompt_length=prompt_length,
-            max_prompt_length=prompt_length,
-            num_tokens_to_generate=num_tokens_to_generate,
-            materialize_only_last_token_logits=False,
-            model_provider=model_provider,
-            context_block_size_tokens=256,
-            context_max_tokens_override=300,
-        )
-
 
 if __name__ == "__main__":
     test = TestDynamicInferenceEngine()
diff --git a/tests/unit_tests/inference/model_inference_wrappers/gpt/test_gpt_inference_wrapper.py b/tests/unit_tests/inference/model_inference_wrappers/gpt/test_gpt_inference_wrapper.py
index 07afebe1067..644cb149988 100644
--- a/tests/unit_tests/inference/model_inference_wrappers/gpt/test_gpt_inference_wrapper.py
+++ b/tests/unit_tests/inference/model_inference_wrappers/gpt/test_gpt_inference_wrapper.py
@@ -1,5 +1,3 @@
-# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-
 from argparse import Namespace
 
 import pytest
@@ -34,7 +32,7 @@ def setup_model(self, tensor_parallel_size, pipeline_parallel_size):
         self.vocab_size = 100
         self.batch_size = 4
         self.sequence_length = 32
-        hidden_size = 32
+        hidden_size = 12
 
         transformer_config = TransformerConfig(
             num_layers=4,
diff --git a/tests/unit_tests/inference/text_generation_controllers/test_simple_text_generation_controller.py b/tests/unit_tests/inference/text_generation_controllers/test_simple_text_generation_controller.py
index 10ffe2fdd40..f23a9782646 100644
--- a/tests/unit_tests/inference/text_generation_controllers/test_simple_text_generation_controller.py
+++ b/tests/unit_tests/inference/text_generation_controllers/test_simple_text_generation_controller.py
@@ -66,7 +66,7 @@ def setup_model(
         else:
             model_parallel_cuda_manual_seed(123, inference_rng_tracker=True)
         self.batch_size = batch_size
-        self.hidden_size = 32
+        self.hidden_size = 12
         self.vocab_size = 100
         self.sequence_length = 60 if fp8 else 64  # Test padding for fp8
         transformer_config = TransformerConfig(
diff --git a/tools/run_inference_performance_test.py b/tools/run_inference_performance_test.py
index 01e5ab58898..2f2adabc0ab 100644
--- a/tools/run_inference_performance_test.py
+++ b/tools/run_inference_performance_test.py
@@ -1,59 +1,83 @@
 # Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
-import argparse
 import os
+from megatron.core.inference.model_inference_wrappers.inference_wrapper_config import (
+    InferenceWrapperConfig,
+)
+import argparse
 import random
+import torch
 import sys
 import time
-
-import torch
-
+import tqdm
+import warnings
+from model_provider import model_provider
 from gpt_builders import gpt_builder
 from mamba_builders import mamba_builder
-from megatron.core.inference.contexts import DynamicInferenceContext
-from megatron.core.inference.engines import DynamicInferenceEngine, StaticInferenceEngine
 from megatron.core.inference.engines.abstract_engine import AbstractEngine
+from megatron.core.inference.engines import DynamicInferenceEngine, StaticInferenceEngine
 from megatron.core.inference.inference_request import InferenceRequest
+from megatron.core.inference.contexts import DynamicInferenceContext
+from megatron.core.inference.sampling_params import SamplingParams
 from megatron.core.inference.model_inference_wrappers.gpt.gpt_inference_wrapper import (
     GPTInferenceWrapper,
 )
-from megatron.core.inference.model_inference_wrappers.inference_wrapper_config import (
-    InferenceWrapperConfig,
-)
-from megatron.core.inference.sampling_params import SamplingParams
 from megatron.core.inference.text_generation_controllers.text_generation_controller import (
     TextGenerationController,
 )
-from megatron.core.ssm.mamba_hybrid_layer_allocation import Symbols
 from megatron.core.transformer.module import MegatronModule
-from megatron.core.utils import get_attr_wrapped_model
-from model_provider import model_provider
 
 sys.path.append(
     os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir, os.path.pardir))
 )
 
-import asyncio
-from functools import partial
-from typing import List, Union
-
-from examples.inference.gpt.utils import add_common_inference_args
-from megatron.core import mpu
-from megatron.training import get_args, get_model, get_tokenizer
+from megatron.training import get_args
+from megatron.training import get_tokenizer
 from megatron.training.checkpointing import load_checkpoint
+from megatron.core import mpu
 from megatron.training.initialize import initialize_megatron
+from megatron.training import get_model, get_tokenizer
+import asyncio
+from functools import partial
+from typing import AsyncIterator, List, Union
 
 REQUEST_ID = 0
 
 
-def add_inference_benchmarking_args(parser):
-    """Inference benchmarking arguments."""
-    parser = add_common_inference_args(parser)
-
-    group = parser.add_argument_group(title='inference_benchmarking')
+def add_text_generate_args(parser):
+    """Text generation arguments."""
+    group = parser.add_argument_group(title='text generation')
 
+    group.add_argument("--temperature", type=float, default=1.0, help='Sampling temperature.')
+    group.add_argument("--top_k", type=int, default=1, help='Top k sampling.')
+    group.add_argument("--top_p", type=float, default=0.0, help='Top p sampling.')
     group.add_argument(
-        "--num-input-tokens", type=int, default=128, help="Number of input tokens per request"
+        "--return-log-probs",
+        action='store_true',
+        default=False,
+        help='Return the log probabilities of the final output tokens',
+    )
+    group.add_argument("--top-n-logprobs", type=int, default=0, help="Top-N logprobs")
+    group.add_argument(
+        "--num-tokens-to-generate",
+        type=int,
+        default=30,
+        help='Number of tokens to generate for each prompt',
+    )
+    group.add_argument(
+        "--prompts",
+        metavar='N',
+        type=str,
+        default=None,
+        nargs='+',
+        help='Input prompts with each prompt within quotes and seperated by space',
+    )
+    group.add_argument(
+        "--num-input-tokens", type=int, default=None, help='Number of input tokens per prompt'
+    )
+    group.add_argument("--stream", action="store_true", default=False, help="Stream output tokens")
+    group.add_argument(
+        "--model-provider", choices=["mamba", "gpt"], default="gpt", help="Model provider"
     )
     group.add_argument(
         "--engine-type", choices=["static", "dynamic"], default="static", help="Engine type"
@@ -61,13 +85,14 @@ def add_inference_benchmarking_args(parser):
     group.add_argument(
         "--benchmark-profile", action="store_true", default=False, help="If set, profile"
     )
-    group.add_argument('--stream', action="store_true", default=False, help="If set, stream tokens")
     return parser
 
 
 def get_inference_engine(args: argparse.Namespace, model: MegatronModule) -> AbstractEngine:
     """Utility to get the relevant backend for running inference
 
+    This function will automatically chose the TRTLLMBackend when possible, and if not revert to Mcore backend if the user does not specify any backends. TRT LLM Backend is not implmented yet.
+
     Args:
         args (Namespace): The user arguments parsed from command line
         model (MegatronModule): The megatron model .
@@ -86,18 +111,9 @@ def get_inference_engine(args: argparse.Namespace, model: MegatronModule) -> Abs
         inference_max_requests=args.inference_max_batch_size,
         inference_max_seq_length=args.inference_max_seq_length,
         nccl_all_reduce_for_prefill=args.nccl_all_reduce_for_prefill,
-        moe_pad_experts_for_cuda_graph_inference=args.moe_pad_experts_for_cuda_graph_inference,
+        moe_pad_experts_for_cuda_graph_inference = args.moe_pad_experts_for_cuda_graph_inference
     )
 
-    # Layer type list for hybrid models
-    decoder = get_attr_wrapped_model(model, "decoder")
-    layer_type_list = getattr(decoder, "layer_type_list", None)
-    if layer_type_list is not None and Symbols.MAMBA in layer_type_list:
-        (mamba_conv_states_shape, mamba_ssm_states_shape) = decoder.mamba_state_shapes_per_request()
-    else:
-        mamba_conv_states_shape = None
-        mamba_ssm_states_shape = None
-
     if args.engine_type == "static":
         inference_wrapped_model = GPTInferenceWrapper(model, inference_wrapper_config)
         inference_wrapped_model.model_is_pipeline_parallel = not (
@@ -116,28 +132,12 @@ def get_inference_engine(args: argparse.Namespace, model: MegatronModule) -> Abs
                 args.num_query_groups if args.group_query_attention else args.num_attention_heads
             ),
             max_sequence_length=args.inference_max_seq_length,
-            num_cuda_graphs=(
-                args.inference_dynamic_batching_num_cuda_graphs
-                if args.cuda_graph_impl == "local"
-                else None
-            ),
             buffer_size_gb=args.inference_dynamic_batching_buffer_size_gb,
             buffer_guaranteed_fraction=args.inference_dynamic_batching_buffer_guaranteed_fraction,
             buffer_overflow_factor=args.inference_dynamic_batching_buffer_overflow_factor,
             max_requests_override=args.inference_dynamic_batching_max_requests_override,
             max_tokens_override=args.inference_dynamic_batching_max_tokens_override,
             block_size_tokens=args.inference_dynamic_batching_block_size,
-            tensor_model_parallel_size=args.tensor_model_parallel_size,
-            materialize_only_last_token_logits=not args.return_log_probs,
-            layer_type_list=layer_type_list,
-            mamba_conv_states_shape=mamba_conv_states_shape,
-            mamba_ssm_states_shape=mamba_ssm_states_shape,
-            cache_mla_latent=args.multi_latent_attention and args.cache_mla_latents,
-            kv_lora_rank=args.kv_lora_rank if args.multi_latent_attention else None,
-            qk_pos_emb_head_dim=args.qk_pos_emb_head_dim,
-            use_cuda_graphs_for_non_decode_steps=not args.decode_only_cuda_graphs,
-            use_flashinfer_fused_rope=args.use_flashinfer_fused_rope,
-            unified_memory_level=args.inference_dynamic_batching_unified_memory_level,
         )
         inference_wrapped_model = GPTInferenceWrapper(
             model, inference_wrapper_config, inference_context=context
@@ -269,7 +269,7 @@ def main():
     # Note: The default args passed here can be overwritten by using appropriate params (check arguments.py file)
     # Micro batch size is not needed to be set by user. (It is calculated based on inference-batch-times-seqlen-threshold argument)
     initialize_megatron(
-        extra_args_provider=add_inference_benchmarking_args,
+        extra_args_provider=add_text_generate_args,
         args_defaults={
             'no_load_rng': True,
             'no_load_optim': True,
@@ -285,8 +285,6 @@ def main():
         model_builder = gpt_builder
     elif args.model_provider == "mamba":
         model_builder = mamba_builder
-    else:
-        raise ValueError(f"Invalid model provider {args.model_provider}")
 
     model = get_model(partial(model_provider, model_builder), wrap_with_ddp=False)
     tokenizer = get_tokenizer()
@@ -340,7 +338,10 @@ def main():
         print(f"Running warmup for CUDA graphs...")
         warmup_sampling_params = SamplingParams(num_tokens_to_generate=10)
         warmup_sampling_params.add_attributes({"no_early_termination": True})
-        inference_engine.generate(prompts=["warmup"], sampling_params=warmup_sampling_params)
+        if args.engine_type == "static":
+            inference_engine.generate(prompts=["warmup"], sampling_params=warmup_sampling_params)
+        elif args.engine_type == "dynamic":
+            generate_dynamic(args, requests, inference_engine)
 
     if args.benchmark_profile:
         torch.cuda.cudart().cudaProfilerStart()
diff --git a/tools/run_text_generation_server.py b/tools/run_text_generation_server.py
index 350173dc16f..fb5212f7649 100644
--- a/tools/run_text_generation_server.py
+++ b/tools/run_text_generation_server.py
@@ -32,7 +32,6 @@
 from megatron.core.inference.text_generation_server import MegatronServer
 from megatron.core.inference.text_generation_server.run_mcore_engine import run_mcore_engine
 from megatron.core.transformer.module import MegatronModule
-from megatron.post_training.arguments import add_modelopt_args
 from megatron.training import get_model, print_rank_0
 from model_provider import model_provider
 
@@ -121,7 +120,6 @@ def add_text_generate_args(parser):
         default=None,
         help='Deprecated in favor of `--inference-max-batch-size`',
     )
-    add_modelopt_args(parser)
     return parser
 
 
From 0bf9ff961e85ee77eb4ad2668a952982b0e9658f Mon Sep 17 00:00:00 2001
From: Boxiang Wang <boxiangw@nvidia.com>
Date: Mon, 17 Nov 2025 18:21:40 +0800
Subject: [PATCH 131/334] chroe: [Dev]Disable muon test for now (#2275)

Signed-off-by: Boxiang Wang <boxiangw@nvidia.com>
---
 tests/test_utils/recipes/moe.yaml | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/tests/test_utils/recipes/moe.yaml b/tests/test_utils/recipes/moe.yaml
index ebba7e0dbf6..2d4e8c4c94c 100644
--- a/tests/test_utils/recipes/moe.yaml
+++ b/tests/test_utils/recipes/moe.yaml
@@ -121,16 +121,16 @@ products:
       - environment: [dev]
         scope: [mr, mr-github]
         platforms: [dgx_h100]
-  - test_case: [gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_muon]
-    products:
-      - environment: [dev]
-        scope: [mr, mr-github, mr-slim]
-        platforms: [dgx_h100]
-  - test_case: [gpt3_moe_mcore_te_ep8_resume_torch_dist_muon]
-    products:
-      - environment: [dev]
-        scope: [mr, mr-github, mr-slim]
-        platforms: [dgx_h100]
+  # - test_case: [gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_muon]
+  #   products:
+  #     - environment: [dev]
+  #       scope: [mr, mr-github, mr-slim]
+  #       platforms: [dgx_h100]
+  # - test_case: [gpt3_moe_mcore_te_ep8_resume_torch_dist_muon]
+  #   products:
+  #     - environment: [dev]
+  #       scope: [mr, mr-github, mr-slim]
+  #       platforms: [dgx_h100]
   - test_case: [gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading]
     products:
       - environment: [dev]

From 565202fe93f4ca11fe249c49e9e6dde8ac1f5f1a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Mon, 17 Nov 2025 12:10:53 +0000
Subject: [PATCH 132/334] Fixes of Merge main into dev
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 .gitlab/stages/04.functional-tests.yml        |    8 +-
 .../inference/gpt/gpt_static_inference.py     |    2 +-
 pyproject.toml                                |   19 +-
 .../inference/engines/test_dynamic_engine.py  |    2 +-
 uv.lock                                       | 1639 +++++++++++------
 5 files changed, 1099 insertions(+), 571 deletions(-)

diff --git a/.gitlab/stages/04.functional-tests.yml b/.gitlab/stages/04.functional-tests.yml
index dbdef4484f2..12b440d9f7d 100644
--- a/.gitlab/stages/04.functional-tests.yml
+++ b/.gitlab/stages/04.functional-tests.yml
@@ -172,15 +172,15 @@ functional:run_dev_dgx_h100:
 functional:run_nemo:
   extends: [.functional_tests_rules]
   trigger:
-    project: "dl/joc/nemo-ci"
+    project: 'dl/joc/nemo-ci'
     branch: main-mirror
     strategy: depend
   inherit:
     variables: true
   variables:
     MCORE_MR_COMMIT: $CI_COMMIT_SHA
-    TEST_NEMO2_MODULE: "True"
-    ALLOW_FAILURE_DEPENDENCY: "True"
+    TEST_NEMO2_MODULE: 'True'
+    ALLOW_FAILURE_DEPENDENCY: 'True'
     TESTS_TO_RUN_ON_THIS_COMMIT: nightly
   rules:
     - if: $FUNCTIONAL_TEST == "yes"
@@ -217,7 +217,7 @@ functional:x_notify:
     - export RO_API_TOKEN=${PROJECT_ACCESS_TOKEN_MCORE}
     - export GITLAB_ENDPOINT
     - export CONTEXT=$FUNCTIONAL_TEST_SCOPE
-    - export TAG_TEAM=$([[ "$CI_COMMIT_BRANCH" == "main" ]] && echo "1" || "0")
+    - export TAG_TEAM=$([[ "$CI_COMMIT_BRANCH" == "main" || "$CI_COMMIT_BRANCH" == "dev" ]] && echo "1" || "0")
     - export TEAM_SLUG=$SLACK_ADMIN
     - |
       python tests/test_utils/python_scripts/notify.py \
diff --git a/examples/inference/gpt/gpt_static_inference.py b/examples/inference/gpt/gpt_static_inference.py
index 2dcae5549a6..32a49191b19 100644
--- a/examples/inference/gpt/gpt_static_inference.py
+++ b/examples/inference/gpt/gpt_static_inference.py
@@ -104,7 +104,7 @@ def get_inference_engine(args: Namespace, model: MegatronModule) -> StaticInfere
     text_generation_controller = TextGenerationController(
         inference_wrapped_model=inference_wrapped_model, tokenizer=tokenizer
     )
-    return StaticInferenceEngine(text_generation_controller=text_generation_controller, legacy=args.use_legacy_static_engine)
+    return StaticInferenceEngine(text_generation_controller=text_generation_controller, legacy=True)
 
 
 async def generate(
diff --git a/pyproject.toml b/pyproject.toml
index 456cb6634aa..3e4267889e2 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -69,7 +69,7 @@ mlm = ["flask-restful", "sentencepiece", "tiktoken", "wandb", "transformers"]
 dev = [
     "nvidia-modelopt[torch]>=0.33.0a0,<0.34.0; sys_platform != 'darwin'",
     "transformer-engine[pytorch]>=2.9.0a0,<2.10.0",
-    "nvidia-resiliency-ext",
+    "nvidia-resiliency-ext>=0.4.0a0,<0.5.0",
     "tqdm",
     "einops~=0.8",
     "tensorstore~=0.1,!=0.1.46,!=0.1.72",
@@ -91,20 +91,13 @@ dev = [
 
 lts = [
     "tqdm",
-    "einops~=0.8",
-    "tensorstore~=0.1,!=0.1.46,!=0.1.72",
-    "nvtx~=0.2",
-    "multi-storage-client~=0.27",
-    "opentelemetry-api~=1.33.1",
+    "einops",
+    "tensorstore!=0.1.46,!=0.1.72",
+    "nvtx",
+    "transformers",
+    "zarr",
     "setuptools<80.0.0",
-    "mamba-ssm~=2.2",
-    "causal-conv1d~=1.5",
-    "nv-grouped-gemm~=1.1",
-    "megatron-energon[av_decode]~=6.0",
-    "av<16.0.0",                          # At the time, av 16.0.0 is not compatible with Python 3.12
-    "flashinfer-python",
     "wget",
-    "onnxscript",
 ]
 
 [dependency-groups]
diff --git a/tests/unit_tests/inference/engines/test_dynamic_engine.py b/tests/unit_tests/inference/engines/test_dynamic_engine.py
index b019c8d9528..0ac4b296746 100644
--- a/tests/unit_tests/inference/engines/test_dynamic_engine.py
+++ b/tests/unit_tests/inference/engines/test_dynamic_engine.py
@@ -102,7 +102,7 @@ class DynamicEngineTestConfig:
     model_provider: str = "gpt"
     return_log_probs: bool = False
     materialize_only_last_token_logits: bool = True
-    skip_prompt_log_probs_for_dynamic_inference: bool = False
+    skip_prompt_log_probs: bool = False
     cuda_graph_scope: List[str] = None
     force_build_cuda_graphs: bool = False
     # If False, do not build cuda graphs in the tests, even if
diff --git a/uv.lock b/uv.lock
index c3dc9b95bb4..8b2e0f3d13e 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1,17 +1,51 @@
 version = 1
-revision = 2
+revision = 3
 requires-python = ">=3.10"
 resolution-markers = [
-    "python_full_version >= '3.14' and sys_platform == 'linux'",
-    "python_full_version == '3.13.*' and sys_platform == 'linux'",
-    "python_full_version == '3.12.*' and sys_platform == 'linux'",
-    "python_full_version >= '3.14' and sys_platform != 'linux'",
-    "python_full_version == '3.13.*' and sys_platform != 'linux'",
-    "python_full_version == '3.12.*' and sys_platform != 'linux'",
-    "python_full_version == '3.11.*' and sys_platform == 'linux'",
-    "python_full_version == '3.11.*' and sys_platform != 'linux'",
-    "python_full_version < '3.11' and sys_platform == 'linux'",
-    "python_full_version < '3.11' and sys_platform != 'linux'",
+    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.12.*' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.12.*' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.11.*' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.11.*' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version < '3.11' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version < '3.11' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.12.*' and sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.12.*' and sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.11.*' and sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.11.*' and sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version < '3.11' and sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version < '3.11' and sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.12.*' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.12.*' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.12.*' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.12.*' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.11.*' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.11.*' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version < '3.11' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version < '3.11' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
 ]
 conflicts = [[
     { package = "megatron-core", extra = "dev" },
@@ -48,7 +82,7 @@ wheels = [
 
 [[package]]
 name = "aiobotocore"
-version = "2.25.2"
+version = "2.25.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "aiohttp" },
@@ -57,11 +91,11 @@ dependencies = [
     { name = "jmespath" },
     { name = "multidict" },
     { name = "python-dateutil" },
-    { name = "wrapt" },
+    { name = "wrapt", version = "1.17.3", source = { registry = "https://pypi.org/simple" } },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/52/48/cf3c88c5e3fecdeed824f97a8a98a9fc0d7ef33e603f8f22c2fd32b9ef09/aiobotocore-2.25.2.tar.gz", hash = "sha256:ae0a512b34127097910b7af60752956254099ae54402a84c2021830768f92cda", size = 120585, upload-time = "2025-11-11T18:51:28.056Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/62/94/2e4ec48cf1abb89971cb2612d86f979a6240520f0a659b53a43116d344dc/aiobotocore-2.25.1.tar.gz", hash = "sha256:ea9be739bfd7ece8864f072ec99bb9ed5c7e78ebb2b0b15f29781fbe02daedbc", size = 120560, upload-time = "2025-10-28T22:33:21.787Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/8e/ad/a2f3964aa37da5a4c94c1e5f3934d6ac1333f991f675fcf08a618397a413/aiobotocore-2.25.2-py3-none-any.whl", hash = "sha256:0cec45c6ba7627dd5e5460337291c86ac38c3b512ec4054ce76407d0f7f2a48f", size = 86048, upload-time = "2025-11-11T18:51:26.139Z" },
+    { url = "https://files.pythonhosted.org/packages/95/2a/d275ec4ce5cd0096665043995a7d76f5d0524853c76a3d04656de49f8808/aiobotocore-2.25.1-py3-none-any.whl", hash = "sha256:eb6daebe3cbef5b39a0bb2a97cffbe9c7cb46b2fcc399ad141f369f3c2134b1f", size = 86039, upload-time = "2025-10-28T22:33:19.949Z" },
 ]
 
 [[package]]
@@ -195,11 +229,11 @@ wheels = [
 
 [[package]]
 name = "aioitertools"
-version = "0.13.0"
+version = "0.12.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/fd/3c/53c4a17a05fb9ea2313ee1777ff53f5e001aefd5cc85aa2f4c2d982e1e38/aioitertools-0.13.0.tar.gz", hash = "sha256:620bd241acc0bbb9ec819f1ab215866871b4bbd1f73836a55f799200ee86950c", size = 19322, upload-time = "2025-11-06T22:17:07.609Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/06/de/38491a84ab323b47c7f86e94d2830e748780525f7a10c8600b67ead7e9ea/aioitertools-0.12.0.tar.gz", hash = "sha256:c2a9055b4fbb7705f561b9d86053e8af5d10cc845d22c32008c43490b2d8dd6b", size = 19369, upload-time = "2024-09-02T03:33:40.349Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/10/a1/510b0a7fadc6f43a6ce50152e69dbd86415240835868bb0bd9b5b88b1e06/aioitertools-0.13.0-py3-none-any.whl", hash = "sha256:0be0292b856f08dfac90e31f4739432f4cb6d7520ab9eb73e143f4f2fa5259be", size = 24182, upload-time = "2025-11-06T22:17:06.502Z" },
+    { url = "https://files.pythonhosted.org/packages/85/13/58b70a580de00893223d61de8fea167877a3aed97d4a5e1405c9159ef925/aioitertools-0.12.0-py3-none-any.whl", hash = "sha256:fc1f5fac3d737354de8831cbba3eb04f79dd649d8f3afb4c5b114925e662a796", size = 24345, upload-time = "2024-09-02T03:34:59.454Z" },
 ]
 
 [[package]]
@@ -233,6 +267,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/59/75/e0e10dc7ed1408c28e03a6cb2d7a407f99320eb953f229d008a7a6d05546/aniso8601-10.0.1-py2.py3-none-any.whl", hash = "sha256:eb19717fd4e0db6de1aab06f12450ab92144246b257423fe020af5748c0cb89e", size = 52848, upload-time = "2025-04-18T17:29:41.492Z" },
 ]
 
+[[package]]
+name = "annotated-doc"
+version = "0.0.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/d7/a6/dc46877b911e40c00d395771ea710d5e77b6de7bacd5fdcd78d70cc5a48f/annotated_doc-0.0.3.tar.gz", hash = "sha256:e18370014c70187422c33e945053ff4c286f453a984eba84d0dbfa0c935adeda", size = 5535, upload-time = "2025-10-24T14:57:10.718Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/02/b7/cf592cb5de5cb3bade3357f8d2cf42bf103bbe39f459824b4939fd212911/annotated_doc-0.0.3-py3-none-any.whl", hash = "sha256:348ec6664a76f1fd3be81f43dffbee4c7e8ce931ba71ec67cc7f4ade7fbbb580", size = 5488, upload-time = "2025-10-24T14:57:09.462Z" },
+]
+
 [[package]]
 name = "annotated-types"
 version = "0.7.0"
@@ -253,10 +296,10 @@ name = "anyio"
 version = "4.9.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "exceptiongroup", marker = "python_full_version < '3.11'" },
+    { name = "exceptiongroup", marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "idna" },
     { name = "sniffio" },
-    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
+    { name = "typing-extensions", marker = "python_full_version < '3.13' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/95/7d/4c1bd541d4dffa1b52bd83fb8527089e097a106fc90b467a7313b105f840/anyio-4.9.0.tar.gz", hash = "sha256:673c0c244e15788651a4ff38710fea9675823028a6f08a5eda409e0c9840a028", size = 190949, upload-time = "2025-03-17T00:02:54.77Z" }
 wheels = [
@@ -265,38 +308,44 @@ wheels = [
 
 [[package]]
 name = "apache-tvm-ffi"
-version = "0.1.2"
+version = "0.1.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/78/85/37f89a12961152026e2bca28e7781c3a216835bd6f9ef8f7e9b6b596ae26/apache_tvm_ffi-0.1.2.tar.gz", hash = "sha256:91f6e4e38572f7ce78c6df810cc16bdd1283fd925010b0e503697934d58bb7e7", size = 1266004, upload-time = "2025-11-11T15:29:37.04Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/ac/da/9a3784a6f713bfcb23ccdfd772fcddd52575beab06d52c95a2f01abf2f7b/apache_tvm_ffi-0.1.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5c5dd8edbb7c70c2531055aa11d305b4c648d3c603b40be94108ee54c57bfff5", size = 1727564, upload-time = "2025-11-11T15:28:35.837Z" },
-    { url = "https://files.pythonhosted.org/packages/7f/bf/7759592c0fdaa0560b7190295f1d7f760c35f777255535d3a9387abd41bd/apache_tvm_ffi-0.1.2-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:85654383246a92eb93e11413a71bc5f43aeb07b4fcd34e264e0a19e5bb6c74fa", size = 1882846, upload-time = "2025-11-11T15:28:38.097Z" },
-    { url = "https://files.pythonhosted.org/packages/2d/90/7621f35b71537322519257ad989d55c24eddac64228ff08446418b8dd950/apache_tvm_ffi-0.1.2-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1727ccd716bc660a97b436c8b79581c0e5f9e49509c72d998a5c8546319e1d0f", size = 1955889, upload-time = "2025-11-11T15:28:40.047Z" },
-    { url = "https://files.pythonhosted.org/packages/e7/c7/34489858c2ea8040ce1a186c07d0b205932d8fd6a453edcf33e592c2f7dd/apache_tvm_ffi-0.1.2-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4fa95de8aca32b195c18cb69e1e4fddb093eec601b1753877d0d92be31f3385d", size = 1836839, upload-time = "2025-11-11T15:28:41.616Z" },
-    { url = "https://files.pythonhosted.org/packages/d8/d0/03f5022f4444b9dabd719f9aacb32a9fdf773e09ddc46c046c16b3786ed7/apache_tvm_ffi-0.1.2-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c81e317fe4941902b4ea81e7c8cdb379babc516f4531ebece802c4bef2b5ceaf", size = 1947466, upload-time = "2025-11-11T15:28:43.094Z" },
-    { url = "https://files.pythonhosted.org/packages/2a/0e/1ab046ebb387833562e7a3f62fc007eb5b0709ceb4f9ac5a8cca854ce185/apache_tvm_ffi-0.1.2-cp310-cp310-win_amd64.whl", hash = "sha256:93fb39b8f42126f884e173ec763dbe80e490162d8e403bcf7c09a89105e9cca0", size = 1712582, upload-time = "2025-11-11T15:28:44.684Z" },
-    { url = "https://files.pythonhosted.org/packages/5b/50/d7d6ec1aa0ca8ff33aa24d43540570371c00adb3644a704fa04b77ba8027/apache_tvm_ffi-0.1.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:6b8ae1aaa5eb713440950a91191753495cff63aedf9952edec42ba87523ead6e", size = 1727049, upload-time = "2025-11-11T15:28:46.316Z" },
-    { url = "https://files.pythonhosted.org/packages/c4/3e/4aed56991cc9442f6873c344885a38879aaa2adff006038d2083674c507c/apache_tvm_ffi-0.1.2-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:161cb2843753aa7428e20542fa7e90b494a86dca1317173a3846cee7cca66cb0", size = 1882729, upload-time = "2025-11-11T15:28:47.846Z" },
-    { url = "https://files.pythonhosted.org/packages/fc/12/65539ca9f49bdc8a0a87ddddeb4a95176d3bfcc94c7604a62d9646d86c47/apache_tvm_ffi-0.1.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:42fb0fb16a4956b7029beb3e1575dfbdd75a76685f8c99e57f1ab1c743b32446", size = 1955858, upload-time = "2025-11-11T15:28:49.887Z" },
-    { url = "https://files.pythonhosted.org/packages/8e/08/b64ebc2a31c6502b1763a44e81369de7ea6235417b21e4f4acf0d4223f7c/apache_tvm_ffi-0.1.2-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8c80b8defe86c805d679a75a3593a5d09bcf1bde4638dcca41dff7d2503df030", size = 1837206, upload-time = "2025-11-11T15:28:51.764Z" },
-    { url = "https://files.pythonhosted.org/packages/1f/cb/775ff54f965801e1314afe1c39292b14e118402c66470f1baa7e50e638a7/apache_tvm_ffi-0.1.2-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ea8adc68e21d2eb46e8f89ee0c1be1e76f0450de9836bce63c6387b4e8b506f3", size = 1947597, upload-time = "2025-11-11T15:28:53.288Z" },
-    { url = "https://files.pythonhosted.org/packages/50/0b/d52787822a06afc52707bb03cbdd3d5f4859f653bf7888d00492d66a9fbc/apache_tvm_ffi-0.1.2-cp311-cp311-win_amd64.whl", hash = "sha256:dbcb788d94fde7bac932f73ae283d0f5a791f5ce8bc93d8f250b50b9724c92f7", size = 1712343, upload-time = "2025-11-11T15:28:54.856Z" },
-    { url = "https://files.pythonhosted.org/packages/f3/db/b450e65e5a7d231f42a0a54c19bf45f870176d85d631df0b0636704bdc14/apache_tvm_ffi-0.1.2-cp312-abi3-macosx_11_0_arm64.whl", hash = "sha256:fa1b18c3f15089154729ffd6e9f18dc5e1737542b8f770770bdacba75c11d824", size = 1703946, upload-time = "2025-11-11T15:28:56.341Z" },
-    { url = "https://files.pythonhosted.org/packages/0a/60/c199b53def4de5c412fd2d9ffce70dea974f2fdedeadcf67463e82562597/apache_tvm_ffi-0.1.2-cp312-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fc9c12f26514a714e1ffbe3d3d82f8459a705600b2bd91621468dd4c16fadb4b", size = 1855387, upload-time = "2025-11-11T15:28:57.861Z" },
-    { url = "https://files.pythonhosted.org/packages/ca/69/99ce93a028bdd08d35f760d8528773d9fbd79cd55a405f8600779b9deca7/apache_tvm_ffi-0.1.2-cp312-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1637a3069573a3dd47921f2d2c81b1a60ce0d9a3cb0d18534a336e0e0fbfc831", size = 1932336, upload-time = "2025-11-11T15:28:59.323Z" },
-    { url = "https://files.pythonhosted.org/packages/03/d7/dffbd0d454ce02e51712e35f16a5f28c5a4ca0c8cba35bcef98e8b974dec/apache_tvm_ffi-0.1.2-cp312-abi3-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e0b5853895659759453aeb24fbc933cc6865f2f713f18216ab76360c9dd06f19", size = 1809796, upload-time = "2025-11-11T15:29:00.803Z" },
-    { url = "https://files.pythonhosted.org/packages/79/83/5b2567261d9de3d3df32a137fc43bf83f82d95094889b9813c86d11224a9/apache_tvm_ffi-0.1.2-cp312-abi3-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bbe79e942923ee6cd1576c153a728d40cd590e4fb83f7b55c595c6d1bb50173c", size = 1921824, upload-time = "2025-11-11T15:29:02.585Z" },
-    { url = "https://files.pythonhosted.org/packages/88/67/22325ae6eaca866cf745bd796a1c460b6a940b5c57e1868ccead097a8602/apache_tvm_ffi-0.1.2-cp312-abi3-win_amd64.whl", hash = "sha256:78d7c0f724d36ef4c9e9d84b750fa8f8d0ca72e2fc21f7f79368ad2cdfcfea74", size = 1692931, upload-time = "2025-11-11T15:29:04.343Z" },
-    { url = "https://files.pythonhosted.org/packages/44/4b/bd940660a7daf256ab0bd0f3084918c4292899d9cb9549ec80d661fa06e5/apache_tvm_ffi-0.1.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c6578482ec5a682379007e75bda2ed86b40c473c0e1616b8d92078f226b7f10f", size = 1734016, upload-time = "2025-11-11T15:29:06.485Z" },
-    { url = "https://files.pythonhosted.org/packages/6a/30/54806d8cacb27d3acef9176df57003006f324629270880900d57daa89e04/apache_tvm_ffi-0.1.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:6de5455b5b9e457dd0f293cbae281d07ad10064cab62b1b47c1f29084e377295", size = 1869479, upload-time = "2025-11-11T15:29:08.466Z" },
-    { url = "https://files.pythonhosted.org/packages/49/98/119a55517abd0fdbe445e2461b3397f0876027ba2a978644010491a111c6/apache_tvm_ffi-0.1.2-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:43365f39dcc4cf33bcca0a51600add2caf4cc99da02d36c981fdb4da37293c5e", size = 1940519, upload-time = "2025-11-11T15:29:10.484Z" },
-    { url = "https://files.pythonhosted.org/packages/a6/a5/962bde8b751c6f33cea8d2ff014df66583b39ff900a9cd9add55f133f388/apache_tvm_ffi-0.1.2-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d090c0515c852ef4bc1a4497d99286a41bbcaab0dc02d14d4c8930ff6e717fd7", size = 1822032, upload-time = "2025-11-11T15:29:12.112Z" },
-    { url = "https://files.pythonhosted.org/packages/d3/27/bcb603e6426a965592bc62281b055a3cdc468cae8be3c9d16b196d0c92a6/apache_tvm_ffi-0.1.2-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:cdc3a7bf0f142b433dc7b10d920b6583e21af310f7ca5641f14b7988676745e9", size = 1931627, upload-time = "2025-11-11T15:29:13.95Z" },
-    { url = "https://files.pythonhosted.org/packages/e6/48/3efd1d2cb48c5d3c824e0b2b704b8b489561ecd138cd398b1449ed5f4074/apache_tvm_ffi-0.1.2-cp314-cp314t-win_amd64.whl", hash = "sha256:0872bb060ab90d6760296cf0720b2eed88245c17d7890592fea6ef0617b299d2", size = 1752549, upload-time = "2025-11-11T15:29:15.758Z" },
-]
+sdist = { url = "https://files.pythonhosted.org/packages/d8/e8/7db1ca6db40877d190a8538cc378f740aae247c6fe063815898607c2d2ca/apache_tvm_ffi-0.1.1.tar.gz", hash = "sha256:728ce3f4ae02b89a7147b718f7f670afac3c6d1f96df38d488757274643709fc", size = 1259223, upload-time = "2025-11-04T02:43:38.154Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/8a/ad/550aff4c9652ee8297f90a04c3ab4143ece1d373101010d85b5c9a9a2e7d/apache_tvm_ffi-0.1.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:af0de7bb9581ac9e090276cba37c4e7ffaeed601a2b2b546bf0e2daed3810cec", size = 1723658, upload-time = "2025-11-04T02:42:37.628Z" },
+    { url = "https://files.pythonhosted.org/packages/48/5a/01e65f4a6c2b146f7c40f6d8d663d76b60c3be324159f8fb8223ea505738/apache_tvm_ffi-0.1.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:eb7d6828652803cb8c0e13d1f06d01fc6bfb8e79e77e3de7e6fd4b5fae5ee9d2", size = 1882437, upload-time = "2025-11-04T02:42:39.647Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/bd/b52b71d03637d7a82388c2e90d48dddec2c46121be1333c9851d6a135824/apache_tvm_ffi-0.1.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1fe072b55a7949720a792a9d455c0659aa097825e709a16a4667d720137b8b5c", size = 1954949, upload-time = "2025-11-04T02:42:41.119Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/ef/ff85926928694785f2399a4c5b793bcfecf8c3cf806dedf9202b7db73b8b/apache_tvm_ffi-0.1.1-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b25178b265903dabd9a35bd767db26928be3b7869f681fe1d6e1aed93d7c0799", size = 1837395, upload-time = "2025-11-04T02:42:42.954Z" },
+    { url = "https://files.pythonhosted.org/packages/de/69/f048bda5e5445a89200737062a202cb39097d3b1902e886654de9cd6b624/apache_tvm_ffi-0.1.1-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d5552af3c625750361d1b7d646d499a28caf94858967e74c9cce6ed7d4629b28", size = 1947740, upload-time = "2025-11-04T02:42:44.49Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/df/295f71613502edeb39a39b30c8bbb9ec8fcc06bd95b3043dd99b55fa98a8/apache_tvm_ffi-0.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:c102ba5899ce106c8068a3f21155c106790b5b0141fba52a52ed6e9aeb286aff", size = 1710966, upload-time = "2025-11-04T02:42:46.037Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/a9/544767d7058f825c0ceb5bc25760ad3a821b2efcc6a3dbe2e3988a3aee86/apache_tvm_ffi-0.1.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7cbf31c472920cdc5b3f75f2d2720b8a6b37ddbdb11d573fa94524815ea5a144", size = 1725662, upload-time = "2025-11-04T02:42:47.528Z" },
+    { url = "https://files.pythonhosted.org/packages/54/c3/fe1a9f8968d5ce2d3b674e397c2bf01961e32a72b723817478c67c9780e3/apache_tvm_ffi-0.1.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d7602bc37019387a4705677b6e742059c7e1973a899b6918af235febcb3d3b47", size = 1884278, upload-time = "2025-11-04T02:42:48.998Z" },
+    { url = "https://files.pythonhosted.org/packages/24/b9/80cbba18b2d7d9013031d8c13671986912275b9ca6aaea70a1dd9b361c39/apache_tvm_ffi-0.1.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7941f82a2ae4549f55c07d82d37c5765628d70f29dace98628393fcea525e870", size = 1957018, upload-time = "2025-11-04T02:42:50.538Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/0c/d27beb98d6841a3929468648433ed2c53e4da953fadb73c754b9372b2356/apache_tvm_ffi-0.1.1-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2e0d6d8e0888ee3a3defd2cbe1eff7a65c05900b4e8fa0e18c890048fc6a44a6", size = 1839279, upload-time = "2025-11-04T02:42:52.438Z" },
+    { url = "https://files.pythonhosted.org/packages/0f/10/d7cf7779c65047ad2ca652234a174c2908d936cb69bc4f5156e17382fa91/apache_tvm_ffi-0.1.1-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:549c2150e1c2d7ca7912cad173f62a192aec90cd981c024bd246161283ea5d78", size = 1950476, upload-time = "2025-11-04T02:42:54.159Z" },
+    { url = "https://files.pythonhosted.org/packages/53/71/bb5ee4bca52a37a8f9580ab1f1de1be5366808a194981c324a756dabbe15/apache_tvm_ffi-0.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:3fbcfe526b458bc8edeafdc769388782d3bb4321c46a987e50bcece93ae78af8", size = 1711278, upload-time = "2025-11-04T02:42:55.56Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/1e/f8d16dbe2303d1e7348037b4207d6c1093c554573484c97c8f3cde61a060/apache_tvm_ffi-0.1.1-cp312-abi3-macosx_11_0_arm64.whl", hash = "sha256:f2c0164a5c6286f9c333ddedeb448b855cbc1225688d0a4c9aeab006ddfa1180", size = 1701072, upload-time = "2025-11-04T02:42:57.28Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/47/f7a55e9b5b741f901ed9101a3ef46fd250f2c1519a6479e055432ff4f308/apache_tvm_ffi-0.1.1-cp312-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:33cc35738e0c44f2a67e550457b6b7dc7de9109ca64422a9e7063b1ba43c336e", size = 1854467, upload-time = "2025-11-04T02:43:00.158Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/db/f3adbe1e2d092fbb18908971a25ceb5496669ec65d01a28b7dd57f471ae0/apache_tvm_ffi-0.1.1-cp312-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e9db6484259120b1bdc600f736084ee3d574775b1f4a3e8fef110323e3a9d2b6", size = 1930968, upload-time = "2025-11-04T02:43:01.96Z" },
+    { url = "https://files.pythonhosted.org/packages/3b/da/7f678675ccc8af1c7d313322f3875e2c829f1faaa58c0d982431beeb3b3e/apache_tvm_ffi-0.1.1-cp312-abi3-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c7bd812058ce9046cb69fd7b3e18538d1d0eefa1719822a1441b00bb841f7af4", size = 1811173, upload-time = "2025-11-04T02:43:03.404Z" },
+    { url = "https://files.pythonhosted.org/packages/e1/11/c8b3b7d69ceebd219dcb06f5e4a3997edea3bc2e0bbdd8f57ae65bba4f2f/apache_tvm_ffi-0.1.1-cp312-abi3-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:807def3039fb336a228c120ca8c32eb794bdfd2d7aff218c8611f287ad913736", size = 1922690, upload-time = "2025-11-04T02:43:04.846Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/0b/f816735d761049e53eb388264238655f58fcb42a31e0d1848a4fb6a6556b/apache_tvm_ffi-0.1.1-cp312-abi3-win_amd64.whl", hash = "sha256:624b4430ca3949f85fffd9ef498ebaf1155ff0ac659fc764eec6c6fd66ec7986", size = 1690969, upload-time = "2025-11-04T02:43:06.581Z" },
+    { url = "https://files.pythonhosted.org/packages/12/aa/df81df8f8b39d3c41fbac41b1e6661d192d9987a3ef317fabcefecf727a6/apache_tvm_ffi-0.1.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c93d9de81c1ba9560fcc696cf84d777f88016eb53f05ee2d6288ddcb95a5e72f", size = 1732582, upload-time = "2025-11-04T02:43:08.042Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/55/861090532e4accd855e119f0e67e0e482b42abb866c9505edd8956148ebc/apache_tvm_ffi-0.1.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f9e0227179a0ce83384132b34757fd05f492270f1c031eae615870a5641b5039", size = 1870196, upload-time = "2025-11-04T02:43:09.911Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/c6/470493934559e371ad699e1764649176efc5e022267c6dd0a565217177ad/apache_tvm_ffi-0.1.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:78e75e193d675b9639e6fd0c33c60c3a4259d4c9f848f60baa6a3194df7e1fea", size = 1941999, upload-time = "2025-11-04T02:43:11.467Z" },
+    { url = "https://files.pythonhosted.org/packages/85/b8/84eba0d266c9b10beae59a6863ef5c68044e20a6f12d46a42116e80db774/apache_tvm_ffi-0.1.1-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:49792622720421525a18e378d848411731d32fcb05a00b6e54b84d05ff46cc22", size = 1823965, upload-time = "2025-11-04T02:43:12.941Z" },
+    { url = "https://files.pythonhosted.org/packages/64/73/ca73a43260a1374b1f34d0e6fcf6f8af16f66867a89dfd562b26184af1bd/apache_tvm_ffi-0.1.1-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:039293086d44e7f601bf8231e369198afe7ad38986330969ddb1a5fc7622976b", size = 1933779, upload-time = "2025-11-04T02:43:14.543Z" },
+    { url = "https://files.pythonhosted.org/packages/5b/91/687c3b9ff3313addeebc1188ac50b299a82944ef1784b91890fc6f250ebd/apache_tvm_ffi-0.1.1-cp314-cp314t-win_amd64.whl", hash = "sha256:3f6cbd214bee2e52719d5264f05a2685c955ae7b096980f0361d917a5a9f47a6", size = 1751905, upload-time = "2025-11-04T02:43:16.286Z" },
+]
+
+[[package]]
+name = "asciitree"
+version = "0.3.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/2d/6a/885bc91484e1aa8f618f6f0228d76d0e67000b0fdd6090673b777e311913/asciitree-0.3.3.tar.gz", hash = "sha256:4aa4b9b649f85e3fcb343363d97564aa1fb62e249677f2e18a96765145cc0f6e", size = 3951, upload-time = "2016-09-05T19:10:42.681Z" }
 
 [[package]]
 name = "astroid"
@@ -618,16 +667,16 @@ wheels = [
 
 [[package]]
 name = "botocore"
-version = "1.40.70"
+version = "1.40.61"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "jmespath" },
     { name = "python-dateutil" },
     { name = "urllib3" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/35/c1/8c4c199ae1663feee579a15861e34f10b29da11ae6ea0ad7b6a847ef3823/botocore-1.40.70.tar.gz", hash = "sha256:61b1f2cecd54d1b28a081116fa113b97bf4e17da57c62ae2c2751fe4c528af1f", size = 14444592, upload-time = "2025-11-10T20:29:04.046Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/28/a3/81d3a47c2dbfd76f185d3b894f2ad01a75096c006a2dd91f237dca182188/botocore-1.40.61.tar.gz", hash = "sha256:a2487ad69b090f9cccd64cf07c7021cd80ee9c0655ad974f87045b02f3ef52cd", size = 14393956, upload-time = "2025-10-28T19:26:46.108Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/55/d2/507fd0ee4dd574d2bdbdeac5df83f39d2cae1ffe97d4622cca6f6bab39f1/botocore-1.40.70-py3-none-any.whl", hash = "sha256:4a394ad25f5d9f1ef0bed610365744523eeb5c22de6862ab25d8c93f9f6d295c", size = 14106829, upload-time = "2025-11-10T20:29:01.101Z" },
+    { url = "https://files.pythonhosted.org/packages/38/c5/f6ce561004db45f0b847c2cd9b19c67c6bf348a82018a48cb718be6b58b0/botocore-1.40.61-py3-none-any.whl", hash = "sha256:17ebae412692fd4824f99cde0f08d50126dc97954008e5ba2b522eb049238aa7", size = 14055973, upload-time = "2025-10-28T19:26:42.15Z" },
 ]
 
 [[package]]
@@ -670,11 +719,11 @@ sdist = { url = "https://files.pythonhosted.org/packages/64/cb/104778c728dc3d5ea
 
 [[package]]
 name = "certifi"
-version = "2025.11.12"
+version = "2025.10.5"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/a2/8c/58f469717fa48465e4a50c014a0400602d3c437d7c0c468e17ada824da3a/certifi-2025.11.12.tar.gz", hash = "sha256:d8ab5478f2ecd78af242878415affce761ca6bc54a22a27e026d7c25357c3316", size = 160538, upload-time = "2025-11-12T02:54:51.517Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/4c/5b/b6ce21586237c77ce67d01dc5507039d444b630dd76611bbca2d8e5dcd91/certifi-2025.10.5.tar.gz", hash = "sha256:47c09d31ccf2acf0be3f701ea53595ee7e0b8fa08801c6624be771df09ae7b43", size = 164519, upload-time = "2025-10-05T04:12:15.808Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/70/7d/9bc192684cea499815ff478dfcdc13835ddf401365057044fb721ec6bddb/certifi-2025.11.12-py3-none-any.whl", hash = "sha256:97de8790030bbd5c2d96b7ec782fc2f7820ef8dba6db909ccf95449f2d062d4b", size = 159438, upload-time = "2025-11-12T02:54:49.735Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/37/af0d2ef3967ac0d6113837b44a4f0bfe1328c2b9763bd5b1744520e5cfed/certifi-2025.10.5-py3-none-any.whl", hash = "sha256:0f212c2744a9bb6de0c56639a6f68afe01ecd92d91f14ae897c4fe7bbeeef0de", size = 163286, upload-time = "2025-10-05T04:12:14.03Z" },
 ]
 
 [[package]]
@@ -682,7 +731,7 @@ name = "cffi"
 version = "2.0.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "pycparser", marker = "implementation_name != 'PyPy'" },
+    { name = "pycparser", marker = "implementation_name != 'PyPy' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/eb/56/b1ba7935a17738ae8453301356628e8147c79dbb825bcbc73dc7401f9846/cffi-2.0.0.tar.gz", hash = "sha256:44d1b5909021139fe36001ae048dbdde8214afa20200eda0f64c068cac5d5529", size = 523588, upload-time = "2025-09-08T23:24:04.541Z" }
 wheels = [
@@ -853,7 +902,7 @@ name = "click"
 version = "8.3.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "colorama", marker = "sys_platform == 'win32'" },
+    { name = "colorama", marker = "sys_platform == 'win32' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/46/61/de6cd827efad202d7057d93e0fed9294b96952e188f7384832791c7b2254/click-8.3.0.tar.gz", hash = "sha256:e7b8232224eba16f4ebe410c25ced9f7875cb5f3263ffc93cc3e8da705e229c4", size = 276943, upload-time = "2025-09-18T17:32:23.696Z" }
 wheels = [
@@ -889,101 +938,101 @@ wheels = [
 
 [[package]]
 name = "coverage"
-version = "7.11.3"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/d2/59/9698d57a3b11704c7b89b21d69e9d23ecf80d538cabb536c8b63f4a12322/coverage-7.11.3.tar.gz", hash = "sha256:0f59387f5e6edbbffec2281affb71cdc85e0776c1745150a3ab9b6c1d016106b", size = 815210, upload-time = "2025-11-10T00:13:17.18Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/fd/68/b53157115ef76d50d1d916d6240e5cd5b3c14dba8ba1b984632b8221fc2e/coverage-7.11.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0c986537abca9b064510f3fd104ba33e98d3036608c7f2f5537f869bc10e1ee5", size = 216377, upload-time = "2025-11-10T00:10:27.317Z" },
-    { url = "https://files.pythonhosted.org/packages/14/c1/d2f9d8e37123fe6e7ab8afcaab8195f13bc84a8b2f449a533fd4812ac724/coverage-7.11.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:28c5251b3ab1d23e66f1130ca0c419747edfbcb4690de19467cd616861507af7", size = 216892, upload-time = "2025-11-10T00:10:30.624Z" },
-    { url = "https://files.pythonhosted.org/packages/83/73/18f05d8010149b650ed97ee5c9f7e4ae68c05c7d913391523281e41c2495/coverage-7.11.3-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:4f2bb4ee8dd40f9b2a80bb4adb2aecece9480ba1fa60d9382e8c8e0bd558e2eb", size = 243650, upload-time = "2025-11-10T00:10:32.392Z" },
-    { url = "https://files.pythonhosted.org/packages/63/3c/c0cbb296c0ecc6dcbd70f4b473fcd7fe4517bbef8b09f4326d78f38adb87/coverage-7.11.3-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:e5f4bfac975a2138215a38bda599ef00162e4143541cf7dd186da10a7f8e69f1", size = 245478, upload-time = "2025-11-10T00:10:34.157Z" },
-    { url = "https://files.pythonhosted.org/packages/b9/9a/dad288cf9faa142a14e75e39dc646d968b93d74e15c83e9b13fd628f2cb3/coverage-7.11.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8f4cbfff5cf01fa07464439a8510affc9df281535f41a1f5312fbd2b59b4ab5c", size = 247337, upload-time = "2025-11-10T00:10:35.655Z" },
-    { url = "https://files.pythonhosted.org/packages/e3/ba/f6148ebf5547b3502013175e41bf3107a4e34b7dd19f9793a6ce0e1cd61f/coverage-7.11.3-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:31663572f20bf3406d7ac00d6981c7bbbcec302539d26b5ac596ca499664de31", size = 244328, upload-time = "2025-11-10T00:10:37.459Z" },
-    { url = "https://files.pythonhosted.org/packages/e6/4d/b93784d0b593c5df89a0d48cbbd2d0963e0ca089eaf877405849792e46d3/coverage-7.11.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:9799bd6a910961cb666196b8583ed0ee125fa225c6fdee2cbf00232b861f29d2", size = 245381, upload-time = "2025-11-10T00:10:39.229Z" },
-    { url = "https://files.pythonhosted.org/packages/3a/8d/6735bfd4f0f736d457642ee056a570d704c9d57fdcd5c91ea5d6b15c944e/coverage-7.11.3-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:097acc18bedf2c6e3144eaf09b5f6034926c3c9bb9e10574ffd0942717232507", size = 243390, upload-time = "2025-11-10T00:10:40.984Z" },
-    { url = "https://files.pythonhosted.org/packages/db/3d/7ba68ed52d1873d450aefd8d2f5a353e67b421915cb6c174e4222c7b918c/coverage-7.11.3-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:6f033dec603eea88204589175782290a038b436105a8f3637a81c4359df27832", size = 243654, upload-time = "2025-11-10T00:10:42.496Z" },
-    { url = "https://files.pythonhosted.org/packages/14/26/be2720c4c7bf73c6591ae4ab503a7b5a31c7a60ced6dba855cfcb4a5af7e/coverage-7.11.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:dd9ca2d44ed8018c90efb72f237a2a140325a4c3339971364d758e78b175f58e", size = 244272, upload-time = "2025-11-10T00:10:44.39Z" },
-    { url = "https://files.pythonhosted.org/packages/90/20/086f5697780df146dbc0df4ae9b6db2b23ddf5aa550f977b2825137728e9/coverage-7.11.3-cp310-cp310-win32.whl", hash = "sha256:900580bc99c145e2561ea91a2d207e639171870d8a18756eb57db944a017d4bb", size = 218969, upload-time = "2025-11-10T00:10:45.863Z" },
-    { url = "https://files.pythonhosted.org/packages/98/5c/cc6faba945ede5088156da7770e30d06c38b8591785ac99bcfb2074f9ef6/coverage-7.11.3-cp310-cp310-win_amd64.whl", hash = "sha256:c8be5bfcdc7832011b2652db29ed7672ce9d353dd19bce5272ca33dbcf60aaa8", size = 219903, upload-time = "2025-11-10T00:10:47.676Z" },
-    { url = "https://files.pythonhosted.org/packages/92/92/43a961c0f57b666d01c92bcd960c7f93677de5e4ee7ca722564ad6dee0fa/coverage-7.11.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:200bb89fd2a8a07780eafcdff6463104dec459f3c838d980455cfa84f5e5e6e1", size = 216504, upload-time = "2025-11-10T00:10:49.524Z" },
-    { url = "https://files.pythonhosted.org/packages/5d/5c/dbfc73329726aef26dbf7fefef81b8a2afd1789343a579ea6d99bf15d26e/coverage-7.11.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8d264402fc179776d43e557e1ca4a7d953020d3ee95f7ec19cc2c9d769277f06", size = 217006, upload-time = "2025-11-10T00:10:51.32Z" },
-    { url = "https://files.pythonhosted.org/packages/a5/e0/878c84fb6661964bc435beb1e28c050650aa30e4c1cdc12341e298700bda/coverage-7.11.3-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:385977d94fc155f8731c895accdfcc3dd0d9dd9ef90d102969df95d3c637ab80", size = 247415, upload-time = "2025-11-10T00:10:52.805Z" },
-    { url = "https://files.pythonhosted.org/packages/56/9e/0677e78b1e6a13527f39c4b39c767b351e256b333050539861c63f98bd61/coverage-7.11.3-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:0542ddf6107adbd2592f29da9f59f5d9cff7947b5bb4f734805085c327dcffaa", size = 249332, upload-time = "2025-11-10T00:10:54.35Z" },
-    { url = "https://files.pythonhosted.org/packages/54/90/25fc343e4ce35514262451456de0953bcae5b37dda248aed50ee51234cee/coverage-7.11.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d60bf4d7f886989ddf80e121a7f4d140d9eac91f1d2385ce8eb6bda93d563297", size = 251443, upload-time = "2025-11-10T00:10:55.832Z" },
-    { url = "https://files.pythonhosted.org/packages/13/56/bc02bbc890fd8b155a64285c93e2ab38647486701ac9c980d457cdae857a/coverage-7.11.3-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:c0a3b6e32457535df0d41d2d895da46434706dd85dbaf53fbc0d3bd7d914b362", size = 247554, upload-time = "2025-11-10T00:10:57.829Z" },
-    { url = "https://files.pythonhosted.org/packages/0f/ab/0318888d091d799a82d788c1e8d8bd280f1d5c41662bbb6e11187efe33e8/coverage-7.11.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:876a3ee7fd2613eb79602e4cdb39deb6b28c186e76124c3f29e580099ec21a87", size = 249139, upload-time = "2025-11-10T00:10:59.465Z" },
-    { url = "https://files.pythonhosted.org/packages/79/d8/3ee50929c4cd36fcfcc0f45d753337001001116c8a5b8dd18d27ea645737/coverage-7.11.3-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:a730cd0824e8083989f304e97b3f884189efb48e2151e07f57e9e138ab104200", size = 247209, upload-time = "2025-11-10T00:11:01.432Z" },
-    { url = "https://files.pythonhosted.org/packages/94/7c/3cf06e327401c293e60c962b4b8a2ceb7167c1a428a02be3adbd1d7c7e4c/coverage-7.11.3-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:b5cd111d3ab7390be0c07ad839235d5ad54d2ca497b5f5db86896098a77180a4", size = 246936, upload-time = "2025-11-10T00:11:02.964Z" },
-    { url = "https://files.pythonhosted.org/packages/99/0b/ffc03dc8f4083817900fd367110015ef4dd227b37284104a5eb5edc9c106/coverage-7.11.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:074e6a5cd38e06671580b4d872c1a67955d4e69639e4b04e87fc03b494c1f060", size = 247835, upload-time = "2025-11-10T00:11:04.405Z" },
-    { url = "https://files.pythonhosted.org/packages/17/4d/dbe54609ee066553d0bcdcdf108b177c78dab836292bee43f96d6a5674d1/coverage-7.11.3-cp311-cp311-win32.whl", hash = "sha256:86d27d2dd7c7c5a44710565933c7dc9cd70e65ef97142e260d16d555667deef7", size = 218994, upload-time = "2025-11-10T00:11:05.966Z" },
-    { url = "https://files.pythonhosted.org/packages/94/11/8e7155df53f99553ad8114054806c01a2c0b08f303ea7e38b9831652d83d/coverage-7.11.3-cp311-cp311-win_amd64.whl", hash = "sha256:ca90ef33a152205fb6f2f0c1f3e55c50df4ef049bb0940ebba666edd4cdebc55", size = 219926, upload-time = "2025-11-10T00:11:07.936Z" },
-    { url = "https://files.pythonhosted.org/packages/1f/93/bea91b6a9e35d89c89a1cd5824bc72e45151a9c2a9ca0b50d9e9a85e3ae3/coverage-7.11.3-cp311-cp311-win_arm64.whl", hash = "sha256:56f909a40d68947ef726ce6a34eb38f0ed241ffbe55c5007c64e616663bcbafc", size = 218599, upload-time = "2025-11-10T00:11:09.578Z" },
-    { url = "https://files.pythonhosted.org/packages/c2/39/af056ec7a27c487e25c7f6b6e51d2ee9821dba1863173ddf4dc2eebef4f7/coverage-7.11.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:5b771b59ac0dfb7f139f70c85b42717ef400a6790abb6475ebac1ecee8de782f", size = 216676, upload-time = "2025-11-10T00:11:11.566Z" },
-    { url = "https://files.pythonhosted.org/packages/3c/f8/21126d34b174d037b5d01bea39077725cbb9a0da94a95c5f96929c695433/coverage-7.11.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:603c4414125fc9ae9000f17912dcfd3d3eb677d4e360b85206539240c96ea76e", size = 217034, upload-time = "2025-11-10T00:11:13.12Z" },
-    { url = "https://files.pythonhosted.org/packages/d5/3f/0fd35f35658cdd11f7686303214bd5908225838f374db47f9e457c8d6df8/coverage-7.11.3-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:77ffb3b7704eb7b9b3298a01fe4509cef70117a52d50bcba29cffc5f53dd326a", size = 248531, upload-time = "2025-11-10T00:11:15.023Z" },
-    { url = "https://files.pythonhosted.org/packages/8f/59/0bfc5900fc15ce4fd186e092451de776bef244565c840c9c026fd50857e1/coverage-7.11.3-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:4d4ca49f5ba432b0755ebb0fc3a56be944a19a16bb33802264bbc7311622c0d1", size = 251290, upload-time = "2025-11-10T00:11:16.628Z" },
-    { url = "https://files.pythonhosted.org/packages/71/88/d5c184001fa2ac82edf1b8f2cd91894d2230d7c309e937c54c796176e35b/coverage-7.11.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:05fd3fb6edff0c98874d752013588836f458261e5eba587afe4c547bba544afd", size = 252375, upload-time = "2025-11-10T00:11:18.249Z" },
-    { url = "https://files.pythonhosted.org/packages/5c/29/f60af9f823bf62c7a00ce1ac88441b9a9a467e499493e5cc65028c8b8dd2/coverage-7.11.3-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:0e920567f8c3a3ce68ae5a42cf7c2dc4bb6cc389f18bff2235dd8c03fa405de5", size = 248946, upload-time = "2025-11-10T00:11:20.202Z" },
-    { url = "https://files.pythonhosted.org/packages/67/16/4662790f3b1e03fce5280cad93fd18711c35980beb3c6f28dca41b5230c6/coverage-7.11.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:4bec8c7160688bd5a34e65c82984b25409563134d63285d8943d0599efbc448e", size = 250310, upload-time = "2025-11-10T00:11:21.689Z" },
-    { url = "https://files.pythonhosted.org/packages/8f/75/dd6c2e28308a83e5fc1ee602f8204bd3aa5af685c104cb54499230cf56db/coverage-7.11.3-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:adb9b7b42c802bd8cb3927de8c1c26368ce50c8fdaa83a9d8551384d77537044", size = 248461, upload-time = "2025-11-10T00:11:23.384Z" },
-    { url = "https://files.pythonhosted.org/packages/16/fe/b71af12be9f59dc9eb060688fa19a95bf3223f56c5af1e9861dfa2275d2c/coverage-7.11.3-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:c8f563b245b4ddb591e99f28e3cd140b85f114b38b7f95b2e42542f0603eb7d7", size = 248039, upload-time = "2025-11-10T00:11:25.07Z" },
-    { url = "https://files.pythonhosted.org/packages/11/b8/023b2003a2cd96bdf607afe03d9b96c763cab6d76e024abe4473707c4eb8/coverage-7.11.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e2a96fdc7643c9517a317553aca13b5cae9bad9a5f32f4654ce247ae4d321405", size = 249903, upload-time = "2025-11-10T00:11:26.992Z" },
-    { url = "https://files.pythonhosted.org/packages/d6/ee/5f1076311aa67b1fa4687a724cc044346380e90ce7d94fec09fd384aa5fd/coverage-7.11.3-cp312-cp312-win32.whl", hash = "sha256:e8feeb5e8705835f0622af0fe7ff8d5cb388948454647086494d6c41ec142c2e", size = 219201, upload-time = "2025-11-10T00:11:28.619Z" },
-    { url = "https://files.pythonhosted.org/packages/4f/24/d21688f48fe9fcc778956680fd5aaf69f4e23b245b7c7a4755cbd421d25b/coverage-7.11.3-cp312-cp312-win_amd64.whl", hash = "sha256:abb903ffe46bd319d99979cdba350ae7016759bb69f47882242f7b93f3356055", size = 220012, upload-time = "2025-11-10T00:11:30.234Z" },
-    { url = "https://files.pythonhosted.org/packages/4f/9e/d5eb508065f291456378aa9b16698b8417d87cb084c2b597f3beb00a8084/coverage-7.11.3-cp312-cp312-win_arm64.whl", hash = "sha256:1451464fd855d9bd000c19b71bb7dafea9ab815741fb0bd9e813d9b671462d6f", size = 218652, upload-time = "2025-11-10T00:11:32.165Z" },
-    { url = "https://files.pythonhosted.org/packages/6d/f6/d8572c058211c7d976f24dab71999a565501fb5b3cdcb59cf782f19c4acb/coverage-7.11.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:84b892e968164b7a0498ddc5746cdf4e985700b902128421bb5cec1080a6ee36", size = 216694, upload-time = "2025-11-10T00:11:34.296Z" },
-    { url = "https://files.pythonhosted.org/packages/4a/f6/b6f9764d90c0ce1bce8d995649fa307fff21f4727b8d950fa2843b7b0de5/coverage-7.11.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f761dbcf45e9416ec4698e1a7649248005f0064ce3523a47402d1bff4af2779e", size = 217065, upload-time = "2025-11-10T00:11:36.281Z" },
-    { url = "https://files.pythonhosted.org/packages/a5/8d/a12cb424063019fd077b5be474258a0ed8369b92b6d0058e673f0a945982/coverage-7.11.3-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:1410bac9e98afd9623f53876fae7d8a5db9f5a0ac1c9e7c5188463cb4b3212e2", size = 248062, upload-time = "2025-11-10T00:11:37.903Z" },
-    { url = "https://files.pythonhosted.org/packages/7f/9c/dab1a4e8e75ce053d14259d3d7485d68528a662e286e184685ea49e71156/coverage-7.11.3-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:004cdcea3457c0ea3233622cd3464c1e32ebba9b41578421097402bee6461b63", size = 250657, upload-time = "2025-11-10T00:11:39.509Z" },
-    { url = "https://files.pythonhosted.org/packages/3f/89/a14f256438324f33bae36f9a1a7137729bf26b0a43f5eda60b147ec7c8c7/coverage-7.11.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8f067ada2c333609b52835ca4d4868645d3b63ac04fb2b9a658c55bba7f667d3", size = 251900, upload-time = "2025-11-10T00:11:41.372Z" },
-    { url = "https://files.pythonhosted.org/packages/04/07/75b0d476eb349f1296486b1418b44f2d8780cc8db47493de3755e5340076/coverage-7.11.3-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:07bc7745c945a6d95676953e86ba7cebb9f11de7773951c387f4c07dc76d03f5", size = 248254, upload-time = "2025-11-10T00:11:43.27Z" },
-    { url = "https://files.pythonhosted.org/packages/5a/4b/0c486581fa72873489ca092c52792d008a17954aa352809a7cbe6cf0bf07/coverage-7.11.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:8bba7e4743e37484ae17d5c3b8eb1ce78b564cb91b7ace2e2182b25f0f764cb5", size = 250041, upload-time = "2025-11-10T00:11:45.274Z" },
-    { url = "https://files.pythonhosted.org/packages/af/a3/0059dafb240ae3e3291f81b8de00e9c511d3dd41d687a227dd4b529be591/coverage-7.11.3-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:fbffc22d80d86fbe456af9abb17f7a7766e7b2101f7edaacc3535501691563f7", size = 248004, upload-time = "2025-11-10T00:11:46.93Z" },
-    { url = "https://files.pythonhosted.org/packages/83/93/967d9662b1eb8c7c46917dcc7e4c1875724ac3e73c3cb78e86d7a0ac719d/coverage-7.11.3-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:0dba4da36730e384669e05b765a2c49f39514dd3012fcc0398dd66fba8d746d5", size = 247828, upload-time = "2025-11-10T00:11:48.563Z" },
-    { url = "https://files.pythonhosted.org/packages/4c/1c/5077493c03215701e212767e470b794548d817dfc6247a4718832cc71fac/coverage-7.11.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ae12fe90b00b71a71b69f513773310782ce01d5f58d2ceb2b7c595ab9d222094", size = 249588, upload-time = "2025-11-10T00:11:50.581Z" },
-    { url = "https://files.pythonhosted.org/packages/7f/a5/77f64de461016e7da3e05d7d07975c89756fe672753e4cf74417fc9b9052/coverage-7.11.3-cp313-cp313-win32.whl", hash = "sha256:12d821de7408292530b0d241468b698bce18dd12ecaf45316149f53877885f8c", size = 219223, upload-time = "2025-11-10T00:11:52.184Z" },
-    { url = "https://files.pythonhosted.org/packages/ed/1c/ec51a3c1a59d225b44bdd3a4d463135b3159a535c2686fac965b698524f4/coverage-7.11.3-cp313-cp313-win_amd64.whl", hash = "sha256:6bb599052a974bb6cedfa114f9778fedfad66854107cf81397ec87cb9b8fbcf2", size = 220033, upload-time = "2025-11-10T00:11:53.871Z" },
-    { url = "https://files.pythonhosted.org/packages/01/ec/e0ce39746ed558564c16f2cc25fa95ce6fc9fa8bfb3b9e62855d4386b886/coverage-7.11.3-cp313-cp313-win_arm64.whl", hash = "sha256:bb9d7efdb063903b3fdf77caec7b77c3066885068bdc0d44bc1b0c171033f944", size = 218661, upload-time = "2025-11-10T00:11:55.597Z" },
-    { url = "https://files.pythonhosted.org/packages/46/cb/483f130bc56cbbad2638248915d97b185374d58b19e3cc3107359715949f/coverage-7.11.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:fb58da65e3339b3dbe266b607bb936efb983d86b00b03eb04c4ad5b442c58428", size = 217389, upload-time = "2025-11-10T00:11:57.59Z" },
-    { url = "https://files.pythonhosted.org/packages/cb/ae/81f89bae3afef75553cf10e62feb57551535d16fd5859b9ee5a2a97ddd27/coverage-7.11.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:8d16bbe566e16a71d123cd66382c1315fcd520c7573652a8074a8fe281b38c6a", size = 217742, upload-time = "2025-11-10T00:11:59.519Z" },
-    { url = "https://files.pythonhosted.org/packages/db/6e/a0fb897041949888191a49c36afd5c6f5d9f5fd757e0b0cd99ec198a324b/coverage-7.11.3-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:a8258f10059b5ac837232c589a350a2df4a96406d6d5f2a09ec587cbdd539655", size = 259049, upload-time = "2025-11-10T00:12:01.592Z" },
-    { url = "https://files.pythonhosted.org/packages/d9/b6/d13acc67eb402d91eb94b9bd60593411799aed09ce176ee8d8c0e39c94ca/coverage-7.11.3-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:4c5627429f7fbff4f4131cfdd6abd530734ef7761116811a707b88b7e205afd7", size = 261113, upload-time = "2025-11-10T00:12:03.639Z" },
-    { url = "https://files.pythonhosted.org/packages/ea/07/a6868893c48191d60406df4356aa7f0f74e6de34ef1f03af0d49183e0fa1/coverage-7.11.3-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:465695268414e149bab754c54b0c45c8ceda73dd4a5c3ba255500da13984b16d", size = 263546, upload-time = "2025-11-10T00:12:05.485Z" },
-    { url = "https://files.pythonhosted.org/packages/24/e5/28598f70b2c1098332bac47925806353b3313511d984841111e6e760c016/coverage-7.11.3-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:4ebcddfcdfb4c614233cff6e9a3967a09484114a8b2e4f2c7a62dc83676ba13f", size = 258260, upload-time = "2025-11-10T00:12:07.137Z" },
-    { url = "https://files.pythonhosted.org/packages/0e/58/58e2d9e6455a4ed746a480c4b9cf96dc3cb2a6b8f3efbee5efd33ae24b06/coverage-7.11.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:13b2066303a1c1833c654d2af0455bb009b6e1727b3883c9964bc5c2f643c1d0", size = 261121, upload-time = "2025-11-10T00:12:09.138Z" },
-    { url = "https://files.pythonhosted.org/packages/17/57/38803eefb9b0409934cbc5a14e3978f0c85cb251d2b6f6a369067a7105a0/coverage-7.11.3-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:d8750dd20362a1b80e3cf84f58013d4672f89663aee457ea59336df50fab6739", size = 258736, upload-time = "2025-11-10T00:12:11.195Z" },
-    { url = "https://files.pythonhosted.org/packages/a8/f3/f94683167156e93677b3442be1d4ca70cb33718df32a2eea44a5898f04f6/coverage-7.11.3-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:ab6212e62ea0e1006531a2234e209607f360d98d18d532c2fa8e403c1afbdd71", size = 257625, upload-time = "2025-11-10T00:12:12.843Z" },
-    { url = "https://files.pythonhosted.org/packages/87/ed/42d0bf1bc6bfa7d65f52299a31daaa866b4c11000855d753857fe78260ac/coverage-7.11.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:a6b17c2b5e0b9bb7702449200f93e2d04cb04b1414c41424c08aa1e5d352da76", size = 259827, upload-time = "2025-11-10T00:12:15.128Z" },
-    { url = "https://files.pythonhosted.org/packages/d3/76/5682719f5d5fbedb0c624c9851ef847407cae23362deb941f185f489c54e/coverage-7.11.3-cp313-cp313t-win32.whl", hash = "sha256:426559f105f644b69290ea414e154a0d320c3ad8a2bb75e62884731f69cf8e2c", size = 219897, upload-time = "2025-11-10T00:12:17.274Z" },
-    { url = "https://files.pythonhosted.org/packages/10/e0/1da511d0ac3d39e6676fa6cc5ec35320bbf1cebb9b24e9ee7548ee4e931a/coverage-7.11.3-cp313-cp313t-win_amd64.whl", hash = "sha256:90a96fcd824564eae6137ec2563bd061d49a32944858d4bdbae5c00fb10e76ac", size = 220959, upload-time = "2025-11-10T00:12:19.292Z" },
-    { url = "https://files.pythonhosted.org/packages/e5/9d/e255da6a04e9ec5f7b633c54c0fdfa221a9e03550b67a9c83217de12e96c/coverage-7.11.3-cp313-cp313t-win_arm64.whl", hash = "sha256:1e33d0bebf895c7a0905fcfaff2b07ab900885fc78bba2a12291a2cfbab014cc", size = 219234, upload-time = "2025-11-10T00:12:21.251Z" },
-    { url = "https://files.pythonhosted.org/packages/84/d6/634ec396e45aded1772dccf6c236e3e7c9604bc47b816e928f32ce7987d1/coverage-7.11.3-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:fdc5255eb4815babcdf236fa1a806ccb546724c8a9b129fd1ea4a5448a0bf07c", size = 216746, upload-time = "2025-11-10T00:12:23.089Z" },
-    { url = "https://files.pythonhosted.org/packages/28/76/1079547f9d46f9c7c7d0dad35b6873c98bc5aa721eeabceafabd722cd5e7/coverage-7.11.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:fe3425dc6021f906c6325d3c415e048e7cdb955505a94f1eb774dafc779ba203", size = 217077, upload-time = "2025-11-10T00:12:24.863Z" },
-    { url = "https://files.pythonhosted.org/packages/2d/71/6ad80d6ae0d7cb743b9a98df8bb88b1ff3dc54491508a4a97549c2b83400/coverage-7.11.3-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:4ca5f876bf41b24378ee67c41d688155f0e54cdc720de8ef9ad6544005899240", size = 248122, upload-time = "2025-11-10T00:12:26.553Z" },
-    { url = "https://files.pythonhosted.org/packages/20/1d/784b87270784b0b88e4beec9d028e8d58f73ae248032579c63ad2ac6f69a/coverage-7.11.3-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:9061a3e3c92b27fd8036dafa26f25d95695b6aa2e4514ab16a254f297e664f83", size = 250638, upload-time = "2025-11-10T00:12:28.555Z" },
-    { url = "https://files.pythonhosted.org/packages/f5/26/b6dd31e23e004e9de84d1a8672cd3d73e50f5dae65dbd0f03fa2cdde6100/coverage-7.11.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:abcea3b5f0dc44e1d01c27090bc32ce6ffb7aa665f884f1890710454113ea902", size = 251972, upload-time = "2025-11-10T00:12:30.246Z" },
-    { url = "https://files.pythonhosted.org/packages/c9/ef/f9c64d76faac56b82daa036b34d4fe9ab55eb37f22062e68e9470583e688/coverage-7.11.3-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:68c4eb92997dbaaf839ea13527be463178ac0ddd37a7ac636b8bc11a51af2428", size = 248147, upload-time = "2025-11-10T00:12:32.195Z" },
-    { url = "https://files.pythonhosted.org/packages/b6/eb/5b666f90a8f8053bd264a1ce693d2edef2368e518afe70680070fca13ecd/coverage-7.11.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:149eccc85d48c8f06547534068c41d69a1a35322deaa4d69ba1561e2e9127e75", size = 249995, upload-time = "2025-11-10T00:12:33.969Z" },
-    { url = "https://files.pythonhosted.org/packages/eb/7b/871e991ffb5d067f8e67ffb635dabba65b231d6e0eb724a4a558f4a702a5/coverage-7.11.3-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:08c0bcf932e47795c49f0406054824b9d45671362dfc4269e0bc6e4bff010704", size = 247948, upload-time = "2025-11-10T00:12:36.341Z" },
-    { url = "https://files.pythonhosted.org/packages/0a/8b/ce454f0af9609431b06dbe5485fc9d1c35ddc387e32ae8e374f49005748b/coverage-7.11.3-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:39764c6167c82d68a2d8c97c33dba45ec0ad9172570860e12191416f4f8e6e1b", size = 247770, upload-time = "2025-11-10T00:12:38.167Z" },
-    { url = "https://files.pythonhosted.org/packages/61/8f/79002cb58a61dfbd2085de7d0a46311ef2476823e7938db80284cedd2428/coverage-7.11.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:3224c7baf34e923ffc78cb45e793925539d640d42c96646db62dbd61bbcfa131", size = 249431, upload-time = "2025-11-10T00:12:40.354Z" },
-    { url = "https://files.pythonhosted.org/packages/58/cc/d06685dae97468ed22999440f2f2f5060940ab0e7952a7295f236d98cce7/coverage-7.11.3-cp314-cp314-win32.whl", hash = "sha256:c713c1c528284d636cd37723b0b4c35c11190da6f932794e145fc40f8210a14a", size = 219508, upload-time = "2025-11-10T00:12:42.231Z" },
-    { url = "https://files.pythonhosted.org/packages/5f/ed/770cd07706a3598c545f62d75adf2e5bd3791bffccdcf708ec383ad42559/coverage-7.11.3-cp314-cp314-win_amd64.whl", hash = "sha256:c381a252317f63ca0179d2c7918e83b99a4ff3101e1b24849b999a00f9cd4f86", size = 220325, upload-time = "2025-11-10T00:12:44.065Z" },
-    { url = "https://files.pythonhosted.org/packages/ee/ac/6a1c507899b6fb1b9a56069954365f655956bcc648e150ce64c2b0ecbed8/coverage-7.11.3-cp314-cp314-win_arm64.whl", hash = "sha256:3e33a968672be1394eded257ec10d4acbb9af2ae263ba05a99ff901bb863557e", size = 218899, upload-time = "2025-11-10T00:12:46.18Z" },
-    { url = "https://files.pythonhosted.org/packages/9a/58/142cd838d960cd740654d094f7b0300d7b81534bb7304437d2439fb685fb/coverage-7.11.3-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:f9c96a29c6d65bd36a91f5634fef800212dff69dacdb44345c4c9783943ab0df", size = 217471, upload-time = "2025-11-10T00:12:48.392Z" },
-    { url = "https://files.pythonhosted.org/packages/bc/2c/2f44d39eb33e41ab3aba80571daad32e0f67076afcf27cb443f9e5b5a3ee/coverage-7.11.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:2ec27a7a991d229213c8070d31e3ecf44d005d96a9edc30c78eaeafaa421c001", size = 217742, upload-time = "2025-11-10T00:12:50.182Z" },
-    { url = "https://files.pythonhosted.org/packages/32/76/8ebc66c3c699f4de3174a43424c34c086323cd93c4930ab0f835731c443a/coverage-7.11.3-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:72c8b494bd20ae1c58528b97c4a67d5cfeafcb3845c73542875ecd43924296de", size = 259120, upload-time = "2025-11-10T00:12:52.451Z" },
-    { url = "https://files.pythonhosted.org/packages/19/89/78a3302b9595f331b86e4f12dfbd9252c8e93d97b8631500888f9a3a2af7/coverage-7.11.3-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:60ca149a446da255d56c2a7a813b51a80d9497a62250532598d249b3cdb1a926", size = 261229, upload-time = "2025-11-10T00:12:54.667Z" },
-    { url = "https://files.pythonhosted.org/packages/07/59/1a9c0844dadef2a6efac07316d9781e6c5a3f3ea7e5e701411e99d619bfd/coverage-7.11.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:eb5069074db19a534de3859c43eec78e962d6d119f637c41c8e028c5ab3f59dd", size = 263642, upload-time = "2025-11-10T00:12:56.841Z" },
-    { url = "https://files.pythonhosted.org/packages/37/86/66c15d190a8e82eee777793cabde730640f555db3c020a179625a2ad5320/coverage-7.11.3-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:ac5d5329c9c942bbe6295f4251b135d860ed9f86acd912d418dce186de7c19ac", size = 258193, upload-time = "2025-11-10T00:12:58.687Z" },
-    { url = "https://files.pythonhosted.org/packages/c7/c7/4a4aeb25cb6f83c3ec4763e5f7cc78da1c6d4ef9e22128562204b7f39390/coverage-7.11.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e22539b676fafba17f0a90ac725f029a309eb6e483f364c86dcadee060429d46", size = 261107, upload-time = "2025-11-10T00:13:00.502Z" },
-    { url = "https://files.pythonhosted.org/packages/ed/91/b986b5035f23cf0272446298967ecdd2c3c0105ee31f66f7e6b6948fd7f8/coverage-7.11.3-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:2376e8a9c889016f25472c452389e98bc6e54a19570b107e27cde9d47f387b64", size = 258717, upload-time = "2025-11-10T00:13:02.747Z" },
-    { url = "https://files.pythonhosted.org/packages/f0/c7/6c084997f5a04d050c513545d3344bfa17bd3b67f143f388b5757d762b0b/coverage-7.11.3-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:4234914b8c67238a3c4af2bba648dc716aa029ca44d01f3d51536d44ac16854f", size = 257541, upload-time = "2025-11-10T00:13:04.689Z" },
-    { url = "https://files.pythonhosted.org/packages/3b/c5/38e642917e406930cb67941210a366ccffa767365c8f8d9ec0f465a8b218/coverage-7.11.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:f0b4101e2b3c6c352ff1f70b3a6fcc7c17c1ab1a91ccb7a33013cb0782af9820", size = 259872, upload-time = "2025-11-10T00:13:06.559Z" },
-    { url = "https://files.pythonhosted.org/packages/b7/67/5e812979d20c167f81dbf9374048e0193ebe64c59a3d93d7d947b07865fa/coverage-7.11.3-cp314-cp314t-win32.whl", hash = "sha256:305716afb19133762e8cf62745c46c4853ad6f9eeba54a593e373289e24ea237", size = 220289, upload-time = "2025-11-10T00:13:08.635Z" },
-    { url = "https://files.pythonhosted.org/packages/24/3a/b72573802672b680703e0df071faadfab7dcd4d659aaaffc4626bc8bbde8/coverage-7.11.3-cp314-cp314t-win_amd64.whl", hash = "sha256:9245bd392572b9f799261c4c9e7216bafc9405537d0f4ce3ad93afe081a12dc9", size = 221398, upload-time = "2025-11-10T00:13:10.734Z" },
-    { url = "https://files.pythonhosted.org/packages/f8/4e/649628f28d38bad81e4e8eb3f78759d20ac173e3c456ac629123815feb40/coverage-7.11.3-cp314-cp314t-win_arm64.whl", hash = "sha256:9a1d577c20b4334e5e814c3d5fe07fa4a8c3ae42a601945e8d7940bab811d0bd", size = 219435, upload-time = "2025-11-10T00:13:12.712Z" },
-    { url = "https://files.pythonhosted.org/packages/19/8f/92bdd27b067204b99f396a1414d6342122f3e2663459baf787108a6b8b84/coverage-7.11.3-py3-none-any.whl", hash = "sha256:351511ae28e2509c8d8cae5311577ea7dd511ab8e746ffc8814a0896c3d33fbe", size = 208478, upload-time = "2025-11-10T00:13:14.908Z" },
+version = "7.11.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/1c/38/ee22495420457259d2f3390309505ea98f98a5eed40901cf62196abad006/coverage-7.11.0.tar.gz", hash = "sha256:167bd504ac1ca2af7ff3b81d245dfea0292c5032ebef9d66cc08a7d28c1b8050", size = 811905, upload-time = "2025-10-15T15:15:08.542Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/12/95/c49df0aceb5507a80b9fe5172d3d39bf23f05be40c23c8d77d556df96cec/coverage-7.11.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:eb53f1e8adeeb2e78962bade0c08bfdc461853c7969706ed901821e009b35e31", size = 215800, upload-time = "2025-10-15T15:12:19.824Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/c6/7bb46ce01ed634fff1d7bb53a54049f539971862cc388b304ff3c51b4f66/coverage-7.11.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d9a03ec6cb9f40a5c360f138b88266fd8f58408d71e89f536b4f91d85721d075", size = 216198, upload-time = "2025-10-15T15:12:22.549Z" },
+    { url = "https://files.pythonhosted.org/packages/94/b2/75d9d8fbf2900268aca5de29cd0a0fe671b0f69ef88be16767cc3c828b85/coverage-7.11.0-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:0d7f0616c557cbc3d1c2090334eddcbb70e1ae3a40b07222d62b3aa47f608fab", size = 242953, upload-time = "2025-10-15T15:12:24.139Z" },
+    { url = "https://files.pythonhosted.org/packages/65/ac/acaa984c18f440170525a8743eb4b6c960ace2dbad80dc22056a437fc3c6/coverage-7.11.0-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:e44a86a47bbdf83b0a3ea4d7df5410d6b1a0de984fbd805fa5101f3624b9abe0", size = 244766, upload-time = "2025-10-15T15:12:25.974Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/0d/938d0bff76dfa4a6b228c3fc4b3e1c0e2ad4aa6200c141fcda2bd1170227/coverage-7.11.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:596763d2f9a0ee7eec6e643e29660def2eef297e1de0d334c78c08706f1cb785", size = 246625, upload-time = "2025-10-15T15:12:27.387Z" },
+    { url = "https://files.pythonhosted.org/packages/38/54/8f5f5e84bfa268df98f46b2cb396b1009734cfb1e5d6adb663d284893b32/coverage-7.11.0-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:ef55537ff511b5e0a43edb4c50a7bf7ba1c3eea20b4f49b1490f1e8e0e42c591", size = 243568, upload-time = "2025-10-15T15:12:28.799Z" },
+    { url = "https://files.pythonhosted.org/packages/68/30/8ba337c2877fe3f2e1af0ed7ff4be0c0c4aca44d6f4007040f3ca2255e99/coverage-7.11.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:9cbabd8f4d0d3dc571d77ae5bdbfa6afe5061e679a9d74b6797c48d143307088", size = 244665, upload-time = "2025-10-15T15:12:30.297Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/fb/c6f1d6d9a665536b7dde2333346f0cc41dc6a60bd1ffc10cd5c33e7eb000/coverage-7.11.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:e24045453384e0ae2a587d562df2a04d852672eb63051d16096d3f08aa4c7c2f", size = 242681, upload-time = "2025-10-15T15:12:32.326Z" },
+    { url = "https://files.pythonhosted.org/packages/be/38/1b532319af5f991fa153c20373291dc65c2bf532af7dbcffdeef745c8f79/coverage-7.11.0-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:7161edd3426c8d19bdccde7d49e6f27f748f3c31cc350c5de7c633fea445d866", size = 242912, upload-time = "2025-10-15T15:12:34.079Z" },
+    { url = "https://files.pythonhosted.org/packages/67/3d/f39331c60ef6050d2a861dc1b514fa78f85f792820b68e8c04196ad733d6/coverage-7.11.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:3d4ed4de17e692ba6415b0587bc7f12bc80915031fc9db46a23ce70fc88c9841", size = 243559, upload-time = "2025-10-15T15:12:35.809Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/55/cb7c9df9d0495036ce582a8a2958d50c23cd73f84a23284bc23bd4711a6f/coverage-7.11.0-cp310-cp310-win32.whl", hash = "sha256:765c0bc8fe46f48e341ef737c91c715bd2a53a12792592296a095f0c237e09cf", size = 218266, upload-time = "2025-10-15T15:12:37.429Z" },
+    { url = "https://files.pythonhosted.org/packages/68/a8/b79cb275fa7bd0208767f89d57a1b5f6ba830813875738599741b97c2e04/coverage-7.11.0-cp310-cp310-win_amd64.whl", hash = "sha256:24d6f3128f1b2d20d84b24f4074475457faedc3d4613a7e66b5e769939c7d969", size = 219169, upload-time = "2025-10-15T15:12:39.25Z" },
+    { url = "https://files.pythonhosted.org/packages/49/3a/ee1074c15c408ddddddb1db7dd904f6b81bc524e01f5a1c5920e13dbde23/coverage-7.11.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3d58ecaa865c5b9fa56e35efc51d1014d4c0d22838815b9fce57a27dd9576847", size = 215912, upload-time = "2025-10-15T15:12:40.665Z" },
+    { url = "https://files.pythonhosted.org/packages/70/c4/9f44bebe5cb15f31608597b037d78799cc5f450044465bcd1ae8cb222fe1/coverage-7.11.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b679e171f1c104a5668550ada700e3c4937110dbdd153b7ef9055c4f1a1ee3cc", size = 216310, upload-time = "2025-10-15T15:12:42.461Z" },
+    { url = "https://files.pythonhosted.org/packages/42/01/5e06077cfef92d8af926bdd86b84fb28bf9bc6ad27343d68be9b501d89f2/coverage-7.11.0-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:ca61691ba8c5b6797deb221a0d09d7470364733ea9c69425a640f1f01b7c5bf0", size = 246706, upload-time = "2025-10-15T15:12:44.001Z" },
+    { url = "https://files.pythonhosted.org/packages/40/b8/7a3f1f33b35cc4a6c37e759137533119560d06c0cc14753d1a803be0cd4a/coverage-7.11.0-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:aef1747ede4bd8ca9cfc04cc3011516500c6891f1b33a94add3253f6f876b7b7", size = 248634, upload-time = "2025-10-15T15:12:45.768Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/41/7f987eb33de386bc4c665ab0bf98d15fcf203369d6aacae74f5dd8ec489a/coverage-7.11.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a1839d08406e4cba2953dcc0ffb312252f14d7c4c96919f70167611f4dee2623", size = 250741, upload-time = "2025-10-15T15:12:47.222Z" },
+    { url = "https://files.pythonhosted.org/packages/23/c1/a4e0ca6a4e83069fb8216b49b30a7352061ca0cb38654bd2dc96b7b3b7da/coverage-7.11.0-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e0eb0a2dcc62478eb5b4cbb80b97bdee852d7e280b90e81f11b407d0b81c4287", size = 246837, upload-time = "2025-10-15T15:12:48.904Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/03/ced062a17f7c38b4728ff76c3acb40d8465634b20b4833cdb3cc3a74e115/coverage-7.11.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:bc1fbea96343b53f65d5351d8fd3b34fd415a2670d7c300b06d3e14a5af4f552", size = 248429, upload-time = "2025-10-15T15:12:50.73Z" },
+    { url = "https://files.pythonhosted.org/packages/97/af/a7c6f194bb8c5a2705ae019036b8fe7f49ea818d638eedb15fdb7bed227c/coverage-7.11.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:214b622259dd0cf435f10241f1333d32caa64dbc27f8790ab693428a141723de", size = 246490, upload-time = "2025-10-15T15:12:52.646Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/c3/aab4df02b04a8fde79068c3c41ad7a622b0ef2b12e1ed154da986a727c3f/coverage-7.11.0-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:258d9967520cca899695d4eb7ea38be03f06951d6ca2f21fb48b1235f791e601", size = 246208, upload-time = "2025-10-15T15:12:54.586Z" },
+    { url = "https://files.pythonhosted.org/packages/30/d8/e282ec19cd658238d60ed404f99ef2e45eed52e81b866ab1518c0d4163cf/coverage-7.11.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:cf9e6ff4ca908ca15c157c409d608da77a56a09877b97c889b98fb2c32b6465e", size = 247126, upload-time = "2025-10-15T15:12:56.485Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/17/a635fa07fac23adb1a5451ec756216768c2767efaed2e4331710342a3399/coverage-7.11.0-cp311-cp311-win32.whl", hash = "sha256:fcc15fc462707b0680cff6242c48625da7f9a16a28a41bb8fd7a4280920e676c", size = 218314, upload-time = "2025-10-15T15:12:58.365Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/29/2ac1dfcdd4ab9a70026edc8d715ece9b4be9a1653075c658ee6f271f394d/coverage-7.11.0-cp311-cp311-win_amd64.whl", hash = "sha256:865965bf955d92790f1facd64fe7ff73551bd2c1e7e6b26443934e9701ba30b9", size = 219203, upload-time = "2025-10-15T15:12:59.902Z" },
+    { url = "https://files.pythonhosted.org/packages/03/21/5ce8b3a0133179115af4c041abf2ee652395837cb896614beb8ce8ddcfd9/coverage-7.11.0-cp311-cp311-win_arm64.whl", hash = "sha256:5693e57a065760dcbeb292d60cc4d0231a6d4b6b6f6a3191561e1d5e8820b745", size = 217879, upload-time = "2025-10-15T15:13:01.35Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/db/86f6906a7c7edc1a52b2c6682d6dd9be775d73c0dfe2b84f8923dfea5784/coverage-7.11.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:9c49e77811cf9d024b95faf86c3f059b11c0c9be0b0d61bc598f453703bd6fd1", size = 216098, upload-time = "2025-10-15T15:13:02.916Z" },
+    { url = "https://files.pythonhosted.org/packages/21/54/e7b26157048c7ba555596aad8569ff903d6cd67867d41b75287323678ede/coverage-7.11.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a61e37a403a778e2cda2a6a39abcc895f1d984071942a41074b5c7ee31642007", size = 216331, upload-time = "2025-10-15T15:13:04.403Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/19/1ce6bf444f858b83a733171306134a0544eaddf1ca8851ede6540a55b2ad/coverage-7.11.0-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:c79cae102bb3b1801e2ef1511fb50e91ec83a1ce466b2c7c25010d884336de46", size = 247825, upload-time = "2025-10-15T15:13:05.92Z" },
+    { url = "https://files.pythonhosted.org/packages/71/0b/d3bcbbc259fcced5fb67c5d78f6e7ee965f49760c14afd931e9e663a83b2/coverage-7.11.0-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:16ce17ceb5d211f320b62df002fa7016b7442ea0fd260c11cec8ce7730954893", size = 250573, upload-time = "2025-10-15T15:13:07.471Z" },
+    { url = "https://files.pythonhosted.org/packages/58/8d/b0ff3641a320abb047258d36ed1c21d16be33beed4152628331a1baf3365/coverage-7.11.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:80027673e9d0bd6aef86134b0771845e2da85755cf686e7c7c59566cf5a89115", size = 251706, upload-time = "2025-10-15T15:13:09.4Z" },
+    { url = "https://files.pythonhosted.org/packages/59/c8/5a586fe8c7b0458053d9c687f5cff515a74b66c85931f7fe17a1c958b4ac/coverage-7.11.0-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:4d3ffa07a08657306cd2215b0da53761c4d73cb54d9143b9303a6481ec0cd415", size = 248221, upload-time = "2025-10-15T15:13:10.964Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/ff/3a25e3132804ba44cfa9a778cdf2b73dbbe63ef4b0945e39602fc896ba52/coverage-7.11.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a3b6a5f8b2524fd6c1066bc85bfd97e78709bb5e37b5b94911a6506b65f47186", size = 249624, upload-time = "2025-10-15T15:13:12.5Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/12/ff10c8ce3895e1b17a73485ea79ebc1896a9e466a9d0f4aef63e0d17b718/coverage-7.11.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:fcc0a4aa589de34bc56e1a80a740ee0f8c47611bdfb28cd1849de60660f3799d", size = 247744, upload-time = "2025-10-15T15:13:14.554Z" },
+    { url = "https://files.pythonhosted.org/packages/16/02/d500b91f5471b2975947e0629b8980e5e90786fe316b6d7299852c1d793d/coverage-7.11.0-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:dba82204769d78c3fd31b35c3d5f46e06511936c5019c39f98320e05b08f794d", size = 247325, upload-time = "2025-10-15T15:13:16.438Z" },
+    { url = "https://files.pythonhosted.org/packages/77/11/dee0284fbbd9cd64cfce806b827452c6df3f100d9e66188e82dfe771d4af/coverage-7.11.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:81b335f03ba67309a95210caf3eb43bd6fe75a4e22ba653ef97b4696c56c7ec2", size = 249180, upload-time = "2025-10-15T15:13:17.959Z" },
+    { url = "https://files.pythonhosted.org/packages/59/1b/cdf1def928f0a150a057cab03286774e73e29c2395f0d30ce3d9e9f8e697/coverage-7.11.0-cp312-cp312-win32.whl", hash = "sha256:037b2d064c2f8cc8716fe4d39cb705779af3fbf1ba318dc96a1af858888c7bb5", size = 218479, upload-time = "2025-10-15T15:13:19.608Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/55/e5884d55e031da9c15b94b90a23beccc9d6beee65e9835cd6da0a79e4f3a/coverage-7.11.0-cp312-cp312-win_amd64.whl", hash = "sha256:d66c0104aec3b75e5fd897e7940188ea1892ca1d0235316bf89286d6a22568c0", size = 219290, upload-time = "2025-10-15T15:13:21.593Z" },
+    { url = "https://files.pythonhosted.org/packages/23/a8/faa930cfc71c1d16bc78f9a19bb73700464f9c331d9e547bfbc1dbd3a108/coverage-7.11.0-cp312-cp312-win_arm64.whl", hash = "sha256:d91ebeac603812a09cf6a886ba6e464f3bbb367411904ae3790dfe28311b15ad", size = 217924, upload-time = "2025-10-15T15:13:23.39Z" },
+    { url = "https://files.pythonhosted.org/packages/60/7f/85e4dfe65e400645464b25c036a26ac226cf3a69d4a50c3934c532491cdd/coverage-7.11.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:cc3f49e65ea6e0d5d9bd60368684fe52a704d46f9e7fc413918f18d046ec40e1", size = 216129, upload-time = "2025-10-15T15:13:25.371Z" },
+    { url = "https://files.pythonhosted.org/packages/96/5d/dc5fa98fea3c175caf9d360649cb1aa3715e391ab00dc78c4c66fabd7356/coverage-7.11.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f39ae2f63f37472c17b4990f794035c9890418b1b8cca75c01193f3c8d3e01be", size = 216380, upload-time = "2025-10-15T15:13:26.976Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/f5/3da9cc9596708273385189289c0e4d8197d37a386bdf17619013554b3447/coverage-7.11.0-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:7db53b5cdd2917b6eaadd0b1251cf4e7d96f4a8d24e174bdbdf2f65b5ea7994d", size = 247375, upload-time = "2025-10-15T15:13:28.923Z" },
+    { url = "https://files.pythonhosted.org/packages/65/6c/f7f59c342359a235559d2bc76b0c73cfc4bac7d61bb0df210965cb1ecffd/coverage-7.11.0-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:10ad04ac3a122048688387828b4537bc9cf60c0bf4869c1e9989c46e45690b82", size = 249978, upload-time = "2025-10-15T15:13:30.525Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/8c/042dede2e23525e863bf1ccd2b92689692a148d8b5fd37c37899ba882645/coverage-7.11.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4036cc9c7983a2b1f2556d574d2eb2154ac6ed55114761685657e38782b23f52", size = 251253, upload-time = "2025-10-15T15:13:32.174Z" },
+    { url = "https://files.pythonhosted.org/packages/7b/a9/3c58df67bfa809a7bddd786356d9c5283e45d693edb5f3f55d0986dd905a/coverage-7.11.0-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:7ab934dd13b1c5e94b692b1e01bd87e4488cb746e3a50f798cb9464fd128374b", size = 247591, upload-time = "2025-10-15T15:13:34.147Z" },
+    { url = "https://files.pythonhosted.org/packages/26/5b/c7f32efd862ee0477a18c41e4761305de6ddd2d49cdeda0c1116227570fd/coverage-7.11.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:59a6e5a265f7cfc05f76e3bb53eca2e0dfe90f05e07e849930fecd6abb8f40b4", size = 249411, upload-time = "2025-10-15T15:13:38.425Z" },
+    { url = "https://files.pythonhosted.org/packages/76/b5/78cb4f1e86c1611431c990423ec0768122905b03837e1b4c6a6f388a858b/coverage-7.11.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:df01d6c4c81e15a7c88337b795bb7595a8596e92310266b5072c7e301168efbd", size = 247303, upload-time = "2025-10-15T15:13:40.464Z" },
+    { url = "https://files.pythonhosted.org/packages/87/c9/23c753a8641a330f45f221286e707c427e46d0ffd1719b080cedc984ec40/coverage-7.11.0-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:8c934bd088eed6174210942761e38ee81d28c46de0132ebb1801dbe36a390dcc", size = 247157, upload-time = "2025-10-15T15:13:42.087Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/42/6e0cc71dc8a464486e944a4fa0d85bdec031cc2969e98ed41532a98336b9/coverage-7.11.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5a03eaf7ec24078ad64a07f02e30060aaf22b91dedf31a6b24d0d98d2bba7f48", size = 248921, upload-time = "2025-10-15T15:13:43.715Z" },
+    { url = "https://files.pythonhosted.org/packages/e8/1c/743c2ef665e6858cccb0f84377dfe3a4c25add51e8c7ef19249be92465b6/coverage-7.11.0-cp313-cp313-win32.whl", hash = "sha256:695340f698a5f56f795b2836abe6fb576e7c53d48cd155ad2f80fd24bc63a040", size = 218526, upload-time = "2025-10-15T15:13:45.336Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/d5/226daadfd1bf8ddbccefbd3aa3547d7b960fb48e1bdac124e2dd13a2b71a/coverage-7.11.0-cp313-cp313-win_amd64.whl", hash = "sha256:2727d47fce3ee2bac648528e41455d1b0c46395a087a229deac75e9f88ba5a05", size = 219317, upload-time = "2025-10-15T15:13:47.401Z" },
+    { url = "https://files.pythonhosted.org/packages/97/54/47db81dcbe571a48a298f206183ba8a7ba79200a37cd0d9f4788fcd2af4a/coverage-7.11.0-cp313-cp313-win_arm64.whl", hash = "sha256:0efa742f431529699712b92ecdf22de8ff198df41e43aeaaadf69973eb93f17a", size = 217948, upload-time = "2025-10-15T15:13:49.096Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/8b/cb68425420154e7e2a82fd779a8cc01549b6fa83c2ad3679cd6c088ebd07/coverage-7.11.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:587c38849b853b157706407e9ebdca8fd12f45869edb56defbef2daa5fb0812b", size = 216837, upload-time = "2025-10-15T15:13:51.09Z" },
+    { url = "https://files.pythonhosted.org/packages/33/55/9d61b5765a025685e14659c8d07037247de6383c0385757544ffe4606475/coverage-7.11.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:b971bdefdd75096163dd4261c74be813c4508477e39ff7b92191dea19f24cd37", size = 217061, upload-time = "2025-10-15T15:13:52.747Z" },
+    { url = "https://files.pythonhosted.org/packages/52/85/292459c9186d70dcec6538f06ea251bc968046922497377bf4a1dc9a71de/coverage-7.11.0-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:269bfe913b7d5be12ab13a95f3a76da23cf147be7fa043933320ba5625f0a8de", size = 258398, upload-time = "2025-10-15T15:13:54.45Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/e2/46edd73fb8bf51446c41148d81944c54ed224854812b6ca549be25113ee0/coverage-7.11.0-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:dadbcce51a10c07b7c72b0ce4a25e4b6dcb0c0372846afb8e5b6307a121eb99f", size = 260574, upload-time = "2025-10-15T15:13:56.145Z" },
+    { url = "https://files.pythonhosted.org/packages/07/5e/1df469a19007ff82e2ca8fe509822820a31e251f80ee7344c34f6cd2ec43/coverage-7.11.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9ed43fa22c6436f7957df036331f8fe4efa7af132054e1844918866cd228af6c", size = 262797, upload-time = "2025-10-15T15:13:58.635Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/50/de216b31a1434b94d9b34a964c09943c6be45069ec704bfc379d8d89a649/coverage-7.11.0-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:9516add7256b6713ec08359b7b05aeff8850c98d357784c7205b2e60aa2513fa", size = 257361, upload-time = "2025-10-15T15:14:00.409Z" },
+    { url = "https://files.pythonhosted.org/packages/82/1e/3f9f8344a48111e152e0fd495b6fff13cc743e771a6050abf1627a7ba918/coverage-7.11.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:eb92e47c92fcbcdc692f428da67db33337fa213756f7adb6a011f7b5a7a20740", size = 260349, upload-time = "2025-10-15T15:14:02.188Z" },
+    { url = "https://files.pythonhosted.org/packages/65/9b/3f52741f9e7d82124272f3070bbe316006a7de1bad1093f88d59bfc6c548/coverage-7.11.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:d06f4fc7acf3cabd6d74941d53329e06bab00a8fe10e4df2714f0b134bfc64ef", size = 258114, upload-time = "2025-10-15T15:14:03.907Z" },
+    { url = "https://files.pythonhosted.org/packages/0b/8b/918f0e15f0365d50d3986bbd3338ca01178717ac5678301f3f547b6619e6/coverage-7.11.0-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:6fbcee1a8f056af07ecd344482f711f563a9eb1c2cad192e87df00338ec3cdb0", size = 256723, upload-time = "2025-10-15T15:14:06.324Z" },
+    { url = "https://files.pythonhosted.org/packages/44/9e/7776829f82d3cf630878a7965a7d70cc6ca94f22c7d20ec4944f7148cb46/coverage-7.11.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:dbbf012be5f32533a490709ad597ad8a8ff80c582a95adc8d62af664e532f9ca", size = 259238, upload-time = "2025-10-15T15:14:08.002Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/b8/49cf253e1e7a3bedb85199b201862dd7ca4859f75b6cf25ffa7298aa0760/coverage-7.11.0-cp313-cp313t-win32.whl", hash = "sha256:cee6291bb4fed184f1c2b663606a115c743df98a537c969c3c64b49989da96c2", size = 219180, upload-time = "2025-10-15T15:14:09.786Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/e1/1a541703826be7ae2125a0fb7f821af5729d56bb71e946e7b933cc7a89a4/coverage-7.11.0-cp313-cp313t-win_amd64.whl", hash = "sha256:a386c1061bf98e7ea4758e4313c0ab5ecf57af341ef0f43a0bf26c2477b5c268", size = 220241, upload-time = "2025-10-15T15:14:11.471Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/d1/5ee0e0a08621140fd418ec4020f595b4d52d7eb429ae6a0c6542b4ba6f14/coverage-7.11.0-cp313-cp313t-win_arm64.whl", hash = "sha256:f9ea02ef40bb83823b2b04964459d281688fe173e20643870bb5d2edf68bc836", size = 218510, upload-time = "2025-10-15T15:14:13.46Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/06/e923830c1985ce808e40a3fa3eb46c13350b3224b7da59757d37b6ce12b8/coverage-7.11.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:c770885b28fb399aaf2a65bbd1c12bf6f307ffd112d6a76c5231a94276f0c497", size = 216110, upload-time = "2025-10-15T15:14:15.157Z" },
+    { url = "https://files.pythonhosted.org/packages/42/82/cdeed03bfead45203fb651ed756dfb5266028f5f939e7f06efac4041dad5/coverage-7.11.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:a3d0e2087dba64c86a6b254f43e12d264b636a39e88c5cc0a01a7c71bcfdab7e", size = 216395, upload-time = "2025-10-15T15:14:16.863Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/ba/e1c80caffc3199aa699813f73ff097bc2df7b31642bdbc7493600a8f1de5/coverage-7.11.0-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:73feb83bb41c32811973b8565f3705caf01d928d972b72042b44e97c71fd70d1", size = 247433, upload-time = "2025-10-15T15:14:18.589Z" },
+    { url = "https://files.pythonhosted.org/packages/80/c0/5b259b029694ce0a5bbc1548834c7ba3db41d3efd3474489d7efce4ceb18/coverage-7.11.0-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:c6f31f281012235ad08f9a560976cc2fc9c95c17604ff3ab20120fe480169bca", size = 249970, upload-time = "2025-10-15T15:14:20.307Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/86/171b2b5e1aac7e2fd9b43f7158b987dbeb95f06d1fbecad54ad8163ae3e8/coverage-7.11.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e9570ad567f880ef675673992222746a124b9595506826b210fbe0ce3f0499cd", size = 251324, upload-time = "2025-10-15T15:14:22.419Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/7e/7e10414d343385b92024af3932a27a1caf75c6e27ee88ba211221ff1a145/coverage-7.11.0-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:8badf70446042553a773547a61fecaa734b55dc738cacf20c56ab04b77425e43", size = 247445, upload-time = "2025-10-15T15:14:24.205Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/3b/e4f966b21f5be8c4bf86ad75ae94efa0de4c99c7bbb8114476323102e345/coverage-7.11.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:a09c1211959903a479e389685b7feb8a17f59ec5a4ef9afde7650bd5eabc2777", size = 249324, upload-time = "2025-10-15T15:14:26.234Z" },
+    { url = "https://files.pythonhosted.org/packages/00/a2/8479325576dfcd909244d0df215f077f47437ab852ab778cfa2f8bf4d954/coverage-7.11.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:5ef83b107f50db3f9ae40f69e34b3bd9337456c5a7fe3461c7abf8b75dd666a2", size = 247261, upload-time = "2025-10-15T15:14:28.42Z" },
+    { url = "https://files.pythonhosted.org/packages/7b/d8/3a9e2db19d94d65771d0f2e21a9ea587d11b831332a73622f901157cc24b/coverage-7.11.0-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:f91f927a3215b8907e214af77200250bb6aae36eca3f760f89780d13e495388d", size = 247092, upload-time = "2025-10-15T15:14:30.784Z" },
+    { url = "https://files.pythonhosted.org/packages/b3/b1/bbca3c472544f9e2ad2d5116b2379732957048be4b93a9c543fcd0207e5f/coverage-7.11.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:cdbcd376716d6b7fbfeedd687a6c4be019c5a5671b35f804ba76a4c0a778cba4", size = 248755, upload-time = "2025-10-15T15:14:32.585Z" },
+    { url = "https://files.pythonhosted.org/packages/89/49/638d5a45a6a0f00af53d6b637c87007eb2297042186334e9923a61aa8854/coverage-7.11.0-cp314-cp314-win32.whl", hash = "sha256:bab7ec4bb501743edc63609320aaec8cd9188b396354f482f4de4d40a9d10721", size = 218793, upload-time = "2025-10-15T15:14:34.972Z" },
+    { url = "https://files.pythonhosted.org/packages/30/cc/b675a51f2d068adb3cdf3799212c662239b0ca27f4691d1fff81b92ea850/coverage-7.11.0-cp314-cp314-win_amd64.whl", hash = "sha256:3d4ba9a449e9364a936a27322b20d32d8b166553bfe63059bd21527e681e2fad", size = 219587, upload-time = "2025-10-15T15:14:37.047Z" },
+    { url = "https://files.pythonhosted.org/packages/93/98/5ac886876026de04f00820e5094fe22166b98dcb8b426bf6827aaf67048c/coverage-7.11.0-cp314-cp314-win_arm64.whl", hash = "sha256:ce37f215223af94ef0f75ac68ea096f9f8e8c8ec7d6e8c346ee45c0d363f0479", size = 218168, upload-time = "2025-10-15T15:14:38.861Z" },
+    { url = "https://files.pythonhosted.org/packages/14/d1/b4145d35b3e3ecf4d917e97fc8895bcf027d854879ba401d9ff0f533f997/coverage-7.11.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:f413ce6e07e0d0dc9c433228727b619871532674b45165abafe201f200cc215f", size = 216850, upload-time = "2025-10-15T15:14:40.651Z" },
+    { url = "https://files.pythonhosted.org/packages/ca/d1/7f645fc2eccd318369a8a9948acc447bb7c1ade2911e31d3c5620544c22b/coverage-7.11.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:05791e528a18f7072bf5998ba772fe29db4da1234c45c2087866b5ba4dea710e", size = 217071, upload-time = "2025-10-15T15:14:42.755Z" },
+    { url = "https://files.pythonhosted.org/packages/54/7d/64d124649db2737ceced1dfcbdcb79898d5868d311730f622f8ecae84250/coverage-7.11.0-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:cacb29f420cfeb9283b803263c3b9a068924474ff19ca126ba9103e1278dfa44", size = 258570, upload-time = "2025-10-15T15:14:44.542Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/3f/6f5922f80dc6f2d8b2c6f974835c43f53eb4257a7797727e6ca5b7b2ec1f/coverage-7.11.0-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:314c24e700d7027ae3ab0d95fbf8d53544fca1f20345fd30cd219b737c6e58d3", size = 260738, upload-time = "2025-10-15T15:14:46.436Z" },
+    { url = "https://files.pythonhosted.org/packages/0e/5f/9e883523c4647c860b3812b417a2017e361eca5b635ee658387dc11b13c1/coverage-7.11.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:630d0bd7a293ad2fc8b4b94e5758c8b2536fdf36c05f1681270203e463cbfa9b", size = 262994, upload-time = "2025-10-15T15:14:48.3Z" },
+    { url = "https://files.pythonhosted.org/packages/07/bb/43b5a8e94c09c8bf51743ffc65c4c841a4ca5d3ed191d0a6919c379a1b83/coverage-7.11.0-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e89641f5175d65e2dbb44db15fe4ea48fade5d5bbb9868fdc2b4fce22f4a469d", size = 257282, upload-time = "2025-10-15T15:14:50.236Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/e5/0ead8af411411330b928733e1d201384b39251a5f043c1612970310e8283/coverage-7.11.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:c9f08ea03114a637dab06cedb2e914da9dc67fa52c6015c018ff43fdde25b9c2", size = 260430, upload-time = "2025-10-15T15:14:52.413Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/66/03dd8bb0ba5b971620dcaac145461950f6d8204953e535d2b20c6b65d729/coverage-7.11.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:ce9f3bde4e9b031eaf1eb61df95c1401427029ea1bfddb8621c1161dcb0fa02e", size = 258190, upload-time = "2025-10-15T15:14:54.268Z" },
+    { url = "https://files.pythonhosted.org/packages/45/ae/28a9cce40bf3174426cb2f7e71ee172d98e7f6446dff936a7ccecee34b14/coverage-7.11.0-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:e4dc07e95495923d6fd4d6c27bf70769425b71c89053083843fd78f378558996", size = 256658, upload-time = "2025-10-15T15:14:56.436Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/7c/3a44234a8599513684bfc8684878fd7b126c2760f79712bb78c56f19efc4/coverage-7.11.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:424538266794db2861db4922b05d729ade0940ee69dcf0591ce8f69784db0e11", size = 259342, upload-time = "2025-10-15T15:14:58.538Z" },
+    { url = "https://files.pythonhosted.org/packages/e1/e6/0108519cba871af0351725ebdb8660fd7a0fe2ba3850d56d32490c7d9b4b/coverage-7.11.0-cp314-cp314t-win32.whl", hash = "sha256:4c1eeb3fb8eb9e0190bebafd0462936f75717687117339f708f395fe455acc73", size = 219568, upload-time = "2025-10-15T15:15:00.382Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/76/44ba876e0942b4e62fdde23ccb029ddb16d19ba1bef081edd00857ba0b16/coverage-7.11.0-cp314-cp314t-win_amd64.whl", hash = "sha256:b56efee146c98dbf2cf5cffc61b9829d1e94442df4d7398b26892a53992d3547", size = 220687, upload-time = "2025-10-15T15:15:02.322Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/0c/0df55ecb20d0d0ed5c322e10a441775e1a3a5d78c60f0c4e1abfe6fcf949/coverage-7.11.0-cp314-cp314t-win_arm64.whl", hash = "sha256:b5c2705afa83f49bd91962a4094b6b082f94aef7626365ab3f8f4bd159c5acf3", size = 218711, upload-time = "2025-10-15T15:15:04.575Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/04/642c1d8a448ae5ea1369eac8495740a79eb4e581a9fb0cbdce56bbf56da1/coverage-7.11.0-py3-none-any.whl", hash = "sha256:4b7589765348d78fb4e5fb6ea35d07564e387da2fc5efff62e0222971f155f68", size = 207761, upload-time = "2025-10-15T15:15:06.439Z" },
 ]
 
 [package.optional-dependencies]
@@ -991,6 +1040,82 @@ toml = [
     { name = "tomli", marker = "python_full_version <= '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
 
+[[package]]
+name = "crc32c"
+version = "2.8"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/e3/66/7e97aa77af7cf6afbff26e3651b564fe41932599bc2d3dce0b2f73d4829a/crc32c-2.8.tar.gz", hash = "sha256:578728964e59c47c356aeeedee6220e021e124b9d3e8631d95d9a5e5f06e261c", size = 48179, upload-time = "2025-10-17T06:20:13.61Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c4/a0/28b4686a8db0bb0f77970f4c6ccede90d1d5740a1d4b4703bd54c3e75655/crc32c-2.8-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:2c0f4eb01fe7c0a3e3f973a418e04d52101bb077dd77626fd80c658ec60aaf95", size = 66321, upload-time = "2025-10-17T06:18:53.543Z" },
+    { url = "https://files.pythonhosted.org/packages/76/1f/1697f5b8b770f715ed9b264d79e36b4f77ae0527f81f3c749ef08937a32e/crc32c-2.8-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6baefcfbca82b1a9678455416da24f18629769a76920c640d5a538620a7d12bb", size = 62985, upload-time = "2025-10-17T06:18:54.97Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/e5/333cfa5ffa8d5779733aced2b984b5e5139b4a8ceaa2c6bc563e9a1092f3/crc32c-2.8-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d7f959fcf6c5aad1c4a653ee1a50f05760dab1d1c35d98ec4d7f0f68643f7612", size = 61517, upload-time = "2025-10-17T06:18:55.795Z" },
+    { url = "https://files.pythonhosted.org/packages/e1/d8/362a009e8140dd926a153b44d56753e3aa7cb50aca243779a84adadbff11/crc32c-2.8-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:9bb678507a4e4cf3f0506607b046ecc4ed1c58a19e08a3fb3c2d25441c480bf1", size = 79385, upload-time = "2025-10-17T06:18:56.598Z" },
+    { url = "https://files.pythonhosted.org/packages/4a/9f/0d4ea3aa71ffb15f1285669d23024cc40779388ce32157d339dc2584491c/crc32c-2.8-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1a16f7ffa4c242a909558565567cbba95148603717b53538ea299c98da68e7a9", size = 80965, upload-time = "2025-10-17T06:18:57.384Z" },
+    { url = "https://files.pythonhosted.org/packages/20/44/d77657aaca4a2c0283f2356a3da6f8e91b003567bb8f09daaf540cbf192f/crc32c-2.8-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:0184369aad562d801f91f454c81f56b9ecb966f6b96684c4d6cf82fc8741d2ad", size = 79993, upload-time = "2025-10-17T06:18:58.503Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/c0/07017a93ebf85d9408028b7e03ef96d5c6bfb14cb77cfe90d35eedcc1501/crc32c-2.8-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:86d2eeb5f0189bd803720abe7387019328ea34c4acde62999e5723f789bc316b", size = 79243, upload-time = "2025-10-17T06:18:59.273Z" },
+    { url = "https://files.pythonhosted.org/packages/c7/1a/b3c5ac4cf2fd1f82395173d0bd8e1a15d09f0bc1eccdf10ea7f8caaccd67/crc32c-2.8-cp310-cp310-win32.whl", hash = "sha256:51da61904a9e753780a2e6011885677d601db1fa840be4b68799643a113e6f08", size = 64888, upload-time = "2025-10-17T06:19:00.089Z" },
+    { url = "https://files.pythonhosted.org/packages/b6/f2/60c45fc7bb2221d3c93c7a872e921be591f40d45228fe46f879b1d8c0424/crc32c-2.8-cp310-cp310-win_amd64.whl", hash = "sha256:b2d6a1f2500daaf2e4b08f97ad0349aa2eff5faaaa5fd3350314a26eade334cd", size = 66639, upload-time = "2025-10-17T06:19:00.974Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/0b/5e03b22d913698e9cc563f39b9f6bbd508606bf6b8e9122cd6bf196b87ea/crc32c-2.8-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:e560a97fbb96c9897cb1d9b5076ef12fc12e2e25622530a1afd0de4240f17e1f", size = 66329, upload-time = "2025-10-17T06:19:01.771Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/38/2fe0051ffe8c6a650c8b1ac0da31b8802d1dbe5fa40a84e4b6b6f5583db5/crc32c-2.8-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6762d276d90331a490ef7e71ffee53b9c0eb053bd75a272d786f3b08d3fe3671", size = 62988, upload-time = "2025-10-17T06:19:02.953Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/30/5837a71c014be83aba1469c58820d287fc836512a0cad6b8fdd43868accd/crc32c-2.8-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:60670569f5ede91e39f48fb0cb4060e05b8d8704dd9e17ede930bf441b2f73ef", size = 61522, upload-time = "2025-10-17T06:19:03.796Z" },
+    { url = "https://files.pythonhosted.org/packages/ca/29/63972fc1452778e2092ae998c50cbfc2fc93e3fa9798a0278650cd6169c5/crc32c-2.8-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:711743da6ccc70b3c6718c328947b0b6f34a1fe6a6c27cc6c1d69cc226bf70e9", size = 80200, upload-time = "2025-10-17T06:19:04.617Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/3a/60eb49d7bdada4122b3ffd45b0df54bdc1b8dd092cda4b069a287bdfcff4/crc32c-2.8-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5eb4094a2054774f13b26f21bf56792bb44fa1fcee6c6ad099387a43ffbfb4fa", size = 81757, upload-time = "2025-10-17T06:19:05.496Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/63/6efc1b64429ef7d23bd58b75b7ac24d15df327e3ebbe9c247a0f7b1c2ed1/crc32c-2.8-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:fff15bf2bd3e95780516baae935ed12be88deaa5ebe6143c53eb0d26a7bdc7b7", size = 80830, upload-time = "2025-10-17T06:19:06.621Z" },
+    { url = "https://files.pythonhosted.org/packages/e1/eb/0ae9f436f8004f1c88f7429e659a7218a3879bd11a6b18ed1257aad7e98b/crc32c-2.8-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:4c0e11e3826668121fa53e0745635baf5e4f0ded437e8ff63ea56f38fc4f970a", size = 80095, upload-time = "2025-10-17T06:19:07.381Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/81/4afc9d468977a4cd94a2eb62908553345009a7c0d30e74463a15d4b48ec3/crc32c-2.8-cp311-cp311-win32.whl", hash = "sha256:38f915336715d1f1353ab07d7d786f8a789b119e273aea106ba55355dfc9101d", size = 64886, upload-time = "2025-10-17T06:19:08.497Z" },
+    { url = "https://files.pythonhosted.org/packages/d6/e8/94e839c9f7e767bf8479046a207afd440a08f5c59b52586e1af5e64fa4a0/crc32c-2.8-cp311-cp311-win_amd64.whl", hash = "sha256:60e0a765b1caab8d31b2ea80840639253906a9351d4b861551c8c8625ea20f86", size = 66639, upload-time = "2025-10-17T06:19:09.338Z" },
+    { url = "https://files.pythonhosted.org/packages/b6/36/fd18ef23c42926b79c7003e16cb0f79043b5b179c633521343d3b499e996/crc32c-2.8-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:572ffb1b78cce3d88e8d4143e154d31044a44be42cb3f6fbbf77f1e7a941c5ab", size = 66379, upload-time = "2025-10-17T06:19:10.115Z" },
+    { url = "https://files.pythonhosted.org/packages/7f/b8/c584958e53f7798dd358f5bdb1bbfc97483134f053ee399d3eeb26cca075/crc32c-2.8-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:cf827b3758ee0c4aacd21ceca0e2da83681f10295c38a10bfeb105f7d98f7a68", size = 63042, upload-time = "2025-10-17T06:19:10.946Z" },
+    { url = "https://files.pythonhosted.org/packages/62/e6/6f2af0ec64a668a46c861e5bc778ea3ee42171fedfc5440f791f470fd783/crc32c-2.8-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:106fbd79013e06fa92bc3b51031694fcc1249811ed4364ef1554ee3dd2c7f5a2", size = 61528, upload-time = "2025-10-17T06:19:11.768Z" },
+    { url = "https://files.pythonhosted.org/packages/17/8b/4a04bd80a024f1a23978f19ae99407783e06549e361ab56e9c08bba3c1d3/crc32c-2.8-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:6dde035f91ffbfe23163e68605ee5a4bb8ceebd71ed54bb1fb1d0526cdd125a2", size = 80028, upload-time = "2025-10-17T06:19:12.554Z" },
+    { url = "https://files.pythonhosted.org/packages/21/8f/01c7afdc76ac2007d0e6a98e7300b4470b170480f8188475b597d1f4b4c6/crc32c-2.8-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e41ebe7c2f0fdcd9f3a3fd206989a36b460b4d3f24816d53e5be6c7dba72c5e1", size = 81531, upload-time = "2025-10-17T06:19:13.406Z" },
+    { url = "https://files.pythonhosted.org/packages/32/2b/8f78c5a8cc66486be5f51b6f038fc347c3ba748d3ea68be17a014283c331/crc32c-2.8-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ecf66cf90266d9c15cea597d5cc86c01917cd1a238dc3c51420c7886fa750d7e", size = 80608, upload-time = "2025-10-17T06:19:14.223Z" },
+    { url = "https://files.pythonhosted.org/packages/db/86/fad1a94cdeeeb6b6e2323c87f970186e74bfd6fbfbc247bf5c88ad0873d5/crc32c-2.8-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:59eee5f3a69ad0793d5fa9cdc9b9d743b0cd50edf7fccc0a3988a821fef0208c", size = 79886, upload-time = "2025-10-17T06:19:15.345Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/db/1a7cb6757a1e32376fa2dfce00c815ea4ee614a94f9bff8228e37420c183/crc32c-2.8-cp312-cp312-win32.whl", hash = "sha256:a73d03ce3604aa5d7a2698e9057a0eef69f529c46497b27ee1c38158e90ceb76", size = 64896, upload-time = "2025-10-17T06:19:16.457Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/8e/2024de34399b2e401a37dcb54b224b56c747b0dc46de4966886827b4d370/crc32c-2.8-cp312-cp312-win_amd64.whl", hash = "sha256:56b3b7d015247962cf58186e06d18c3d75a1a63d709d3233509e1c50a2d36aa2", size = 66645, upload-time = "2025-10-17T06:19:17.235Z" },
+    { url = "https://files.pythonhosted.org/packages/e8/d8/3ae227890b3be40955a7144106ef4dd97d6123a82c2a5310cdab58ca49d8/crc32c-2.8-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:36f1e03ee9e9c6938e67d3bcb60e36f260170aa5f37da1185e04ef37b56af395", size = 66380, upload-time = "2025-10-17T06:19:18.009Z" },
+    { url = "https://files.pythonhosted.org/packages/bd/8b/178d3f987cd0e049b484615512d3f91f3d2caeeb8ff336bb5896ae317438/crc32c-2.8-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b2f3226b94b85a8dd9b3533601d7a63e9e3e8edf03a8a169830ee8303a199aeb", size = 63048, upload-time = "2025-10-17T06:19:18.853Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/a1/48145ae2545ebc0169d3283ebe882da580ea4606bfb67cf4ca922ac3cfc3/crc32c-2.8-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:6e08628bc72d5b6bc8e0730e8f142194b610e780a98c58cb6698e665cb885a5b", size = 61530, upload-time = "2025-10-17T06:19:19.974Z" },
+    { url = "https://files.pythonhosted.org/packages/06/4b/cf05ed9d934cc30e5ae22f97c8272face420a476090e736615d9a6b53de0/crc32c-2.8-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:086f64793c5ec856d1ab31a026d52ad2b895ac83d7a38fce557d74eb857f0a82", size = 80001, upload-time = "2025-10-17T06:19:20.784Z" },
+    { url = "https://files.pythonhosted.org/packages/15/ab/4b04801739faf36345f6ba1920be5b1c70282fec52f8280afd3613fb13e2/crc32c-2.8-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bcf72ee7e0135b3d941c34bb2c26c3fc6bc207106b49fd89aaafaeae223ae209", size = 81543, upload-time = "2025-10-17T06:19:21.557Z" },
+    { url = "https://files.pythonhosted.org/packages/a9/1b/6e38dde5bfd2ea69b7f2ab6ec229fcd972a53d39e2db4efe75c0ac0382ce/crc32c-2.8-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:8a717dd9c3fd777d9bc6603717eae172887d402c4ab589d124ebd0184a83f89e", size = 80644, upload-time = "2025-10-17T06:19:22.325Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/45/012176ffee90059ae8ec7131019c71724ea472aa63e72c0c8edbd1fad1d7/crc32c-2.8-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:0450bb845b3c3c7b9bdc0b4e95620ec9a40824abdc8c86d6285c919a90743c1a", size = 79919, upload-time = "2025-10-17T06:19:23.101Z" },
+    { url = "https://files.pythonhosted.org/packages/f0/2b/f557629842f9dec2b3461cb3a0d854bb586ec45b814cea58b082c32f0dde/crc32c-2.8-cp313-cp313-win32.whl", hash = "sha256:765d220bfcbcffa6598ac11eb1e10af0ee4802b49fe126aa6bf79f8ddb9931d1", size = 64896, upload-time = "2025-10-17T06:19:23.88Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/db/fd0f698c15d1e21d47c64181a98290665a08fcbb3940cd559e9c15bda57e/crc32c-2.8-cp313-cp313-win_amd64.whl", hash = "sha256:171ff0260d112c62abcce29332986950a57bddee514e0a2418bfde493ea06bb3", size = 66646, upload-time = "2025-10-17T06:19:24.702Z" },
+    { url = "https://files.pythonhosted.org/packages/db/b9/8e5d7054fe8e7eecab10fd0c8e7ffb01439417bdb6de1d66a81c38fc4a20/crc32c-2.8-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:b977a32a3708d6f51703c8557008f190aaa434d7347431efb0e86fcbe78c2a50", size = 66203, upload-time = "2025-10-17T06:19:25.872Z" },
+    { url = "https://files.pythonhosted.org/packages/55/5f/cc926c70057a63cc0c98a3c8a896eb15fc7e74d3034eadd53c94917c6cc3/crc32c-2.8-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:7399b01db4adaf41da2fb36fe2408e75a8d82a179a9564ed7619412e427b26d6", size = 62956, upload-time = "2025-10-17T06:19:26.652Z" },
+    { url = "https://files.pythonhosted.org/packages/a1/8a/0660c44a2dd2cb6ccbb529eb363b9280f5c766f1017bc8355ed8d695bd94/crc32c-2.8-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:4379f73f9cdad31958a673d11a332ec725ca71572401ca865867229f5f15e853", size = 61442, upload-time = "2025-10-17T06:19:27.74Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/5a/6108d2dfc0fe33522ce83ba07aed4b22014911b387afa228808a278e27cd/crc32c-2.8-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:2e68264555fab19bab08331550dab58573e351a63ed79c869d455edd3b0aa417", size = 79109, upload-time = "2025-10-17T06:19:28.535Z" },
+    { url = "https://files.pythonhosted.org/packages/84/1e/c054f9e390090c197abf3d2936f4f9effaf0c6ee14569ae03d6ddf86958a/crc32c-2.8-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b48f2486727b8d0e7ccbae4a34cb0300498433d2a9d6b49cb13cb57c2e3f19cb", size = 80987, upload-time = "2025-10-17T06:19:29.305Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/ad/1650e5c3341e4a485f800ea83116d72965030c5d48ccc168fcc685756e4d/crc32c-2.8-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:ecf123348934a086df8c8fde7f9f2d716d523ca0707c5a1367b8bb00d8134823", size = 79994, upload-time = "2025-10-17T06:19:30.109Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/3b/f2ed924b177729cbb2ab30ca2902abff653c31d48c95e7b66717a9ca9fcc/crc32c-2.8-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:e636ac60f76de538f7a2c0d0f3abf43104ee83a8f5e516f6345dc283ed1a4df7", size = 79046, upload-time = "2025-10-17T06:19:30.894Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/80/413b05ee6ace613208b31b3670c3135ee1cf451f0e72a9c839b4946acc04/crc32c-2.8-cp313-cp313t-win32.whl", hash = "sha256:8dd4a19505e0253892e1b2f1425cc3bd47f79ae5a04cb8800315d00aad7197f2", size = 64837, upload-time = "2025-10-17T06:19:32.03Z" },
+    { url = "https://files.pythonhosted.org/packages/3b/1b/85eddb6ac5b38496c4e35c20298aae627970c88c3c624a22ab33e84f16c7/crc32c-2.8-cp313-cp313t-win_amd64.whl", hash = "sha256:4bb18e4bd98fb266596523ffc6be9c5b2387b2fa4e505ec56ca36336f49cb639", size = 66574, upload-time = "2025-10-17T06:19:33.143Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/df/50e9079b532ff53dbfc0e66eed781374bd455af02ed5df8b56ad538de4ff/crc32c-2.8-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:3a3b2e4bcf7b3ee333050e7d3ff38e2ba46ea205f1d73d8949b248aaffe937ac", size = 66399, upload-time = "2025-10-17T06:19:34.279Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/2e/67e3b0bc3d30e46ea5d16365cc81203286387671e22f2307eb41f19abb9c/crc32c-2.8-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:445e559e66dff16be54f8a4ef95aa6b01db799a639956d995c5498ba513fccc2", size = 63044, upload-time = "2025-10-17T06:19:35.062Z" },
+    { url = "https://files.pythonhosted.org/packages/36/ea/1723b17437e4344ed8d067456382ecb1f5b535d83fdc5aaebab676c6d273/crc32c-2.8-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:bf3040919e17afa5782e01b1875d6a05f44b8f19c05f211d8b9f8a1deb8bbd9c", size = 61541, upload-time = "2025-10-17T06:19:36.204Z" },
+    { url = "https://files.pythonhosted.org/packages/4c/6a/cbec8a235c5b46a01f319939b538958662159aec0ed3a74944e3a6de21f1/crc32c-2.8-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:5607ab8221e1ffd411f64aa40dbb6850cf06dd2908c9debd05d371e1acf62ff3", size = 80139, upload-time = "2025-10-17T06:19:37.351Z" },
+    { url = "https://files.pythonhosted.org/packages/21/31/d096722fe74b692d6e8206c27da1ea5f6b2a12ff92c54a62a6ba2f376254/crc32c-2.8-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c7f5db4f16816926986d3c94253314920689706ae13a9bf4888b47336c6735ce", size = 81736, upload-time = "2025-10-17T06:19:38.16Z" },
+    { url = "https://files.pythonhosted.org/packages/f6/a2/f75ef716ff7e3c22f385ba6ef30c5de80c19a21ebe699dc90824a1903275/crc32c-2.8-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:70b0153c4d418b673309d3529334d117e1074c4a3b2d7f676e430d72c14de67b", size = 80795, upload-time = "2025-10-17T06:19:38.948Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/94/6d647a12d96ab087d9b8eacee3da073f981987827d57c7072f89ffc7b6cd/crc32c-2.8-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:5c8933531442042438753755a5c8a9034e4d88b01da9eb796f7e151b31a7256c", size = 80042, upload-time = "2025-10-17T06:19:39.725Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/dc/32b8896b40a0afee7a3c040536d0da5a73e68df2be9fadd21770fd158e16/crc32c-2.8-cp314-cp314-win32.whl", hash = "sha256:cdc83a3fe6c4e5df9457294cfd643de7d95bd4e9382c1dd6ed1e0f0f9169172c", size = 64914, upload-time = "2025-10-17T06:19:40.527Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/b4/4308b27d307e8ecaf8dd1dcc63bbb0e47ae1826d93faa3e62d1ee00ee2d5/crc32c-2.8-cp314-cp314-win_amd64.whl", hash = "sha256:509e10035106df66770fe24b9eb8d9e32b6fb967df17744402fb67772d8b2bc7", size = 66723, upload-time = "2025-10-17T06:19:42.449Z" },
+    { url = "https://files.pythonhosted.org/packages/90/d5/a19d2489fa997a143bfbbf971a5c9a43f8b1ba9e775b1fb362d8fb15260c/crc32c-2.8-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:864359a39777a07b09b28eb31337c0cc603d5c1bf0fc328c3af736a8da624ec0", size = 66201, upload-time = "2025-10-17T06:19:43.273Z" },
+    { url = "https://files.pythonhosted.org/packages/98/c2/5f82f22d2c1242cb6f6fe92aa9a42991ebea86de994b8f9974d9c1d128e2/crc32c-2.8-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:14511d7cfc5d9f5e1a6c6b64caa6225c2bdc1ed00d725e9a374a3e84073ce180", size = 62956, upload-time = "2025-10-17T06:19:44.099Z" },
+    { url = "https://files.pythonhosted.org/packages/9b/61/3d43d33489cf974fb78bfb3500845770e139ae6d1d83473b660bd8f79a6c/crc32c-2.8-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:918b7999b52b5dcbcea34081e9a02d46917d571921a3f209956a9a429b2e06e5", size = 61443, upload-time = "2025-10-17T06:19:44.89Z" },
+    { url = "https://files.pythonhosted.org/packages/52/6d/f306ce64a352a3002f76b0fc88a1373f4541f9d34fad3668688610bab14b/crc32c-2.8-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:cc445da03fc012a5a03b71da1df1b40139729e6a5571fd4215ab40bfb39689c7", size = 79106, upload-time = "2025-10-17T06:19:45.688Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/b7/1f74965dd7ea762954a69d172dfb3a706049c84ffa45d31401d010a4a126/crc32c-2.8-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1e3dde2ec59a8a830511d72a086ead95c0b0b7f0d418f93ea106244c5e77e350", size = 80983, upload-time = "2025-10-17T06:19:46.792Z" },
+    { url = "https://files.pythonhosted.org/packages/1b/50/af93f0d91ccd61833ce77374ebfbd16f5805f5c17d18c6470976d9866d76/crc32c-2.8-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:61d51681a08b6a2a2e771b7f0cd1947fb87cb28f38ed55a01cb7c40b2ac4cdd8", size = 80009, upload-time = "2025-10-17T06:19:47.619Z" },
+    { url = "https://files.pythonhosted.org/packages/ee/fa/94f394beb68a88258af694dab2f1284f55a406b615d7900bdd6235283bc4/crc32c-2.8-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:67c0716c3b1a02d5235be649487b637eed21f2d070f2b3f63f709dcd2fefb4c7", size = 79066, upload-time = "2025-10-17T06:19:48.409Z" },
+    { url = "https://files.pythonhosted.org/packages/91/c6/a6050e0c64fd73c67a97da96cb59f08b05111e00b958fb87ecdce99f17ac/crc32c-2.8-cp314-cp314t-win32.whl", hash = "sha256:2e8fe863fbbd8bdb6b414a2090f1b0f52106e76e9a9c96a413495dbe5ebe492a", size = 64869, upload-time = "2025-10-17T06:19:49.197Z" },
+    { url = "https://files.pythonhosted.org/packages/08/1f/c7735034e401cb1ea14f996a224518e3a3fa9987cb13680e707328a7d779/crc32c-2.8-cp314-cp314t-win_amd64.whl", hash = "sha256:20a9cfb897693eb6da19e52e2a7be2026fd4d9fc8ae318f086c0d71d5dd2d8e0", size = 66633, upload-time = "2025-10-17T06:19:50.003Z" },
+    { url = "https://files.pythonhosted.org/packages/a7/1d/dd926c68eb8aac8b142a1a10b8eb62d95212c1cf81775644373fe7cceac2/crc32c-2.8-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:5833f4071da7ea182c514ba17d1eee8aec3c5be927d798222fbfbbd0f5eea02c", size = 62345, upload-time = "2025-10-17T06:20:09.39Z" },
+    { url = "https://files.pythonhosted.org/packages/51/be/803404e5abea2ef2c15042edca04bbb7f625044cca879e47f186b43887c2/crc32c-2.8-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:1dc4da036126ac07b39dd9d03e93e585ec615a2ad28ff12757aef7de175295a8", size = 61229, upload-time = "2025-10-17T06:20:10.236Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/3a/00cc578cd27ed0b22c9be25cef2c24539d92df9fa80ebd67a3fc5419724c/crc32c-2.8-pp311-pypy311_pp73-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:15905fa78344654e241371c47e6ed2411f9eeb2b8095311c68c88eccf541e8b4", size = 64108, upload-time = "2025-10-17T06:20:11.072Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/bc/0587ef99a1c7629f95dd0c9d4f3d894de383a0df85831eb16c48a6afdae4/crc32c-2.8-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c596f918688821f796434e89b431b1698396c38bf0b56de873621528fe3ecb1e", size = 64815, upload-time = "2025-10-17T06:20:11.919Z" },
+    { url = "https://files.pythonhosted.org/packages/73/42/94f2b8b92eae9064fcfb8deef2b971514065bd606231f8857ff8ae02bebd/crc32c-2.8-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:8d23c4fe01b3844cb6e091044bc1cebdef7d16472e058ce12d9fadf10d2614af", size = 66659, upload-time = "2025-10-17T06:20:12.766Z" },
+]
+
 [[package]]
 name = "cryptography"
 version = "42.0.8"
@@ -1082,40 +1207,40 @@ wheels = [
 
 [[package]]
 name = "cython"
-version = "3.2.1"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/83/36/cce2972e13e83ffe58bc73bfd9d37340b5e5113e8243841a57511c7ae1c2/cython-3.2.1.tar.gz", hash = "sha256:2be1e4d0cbdf7f4cd4d9b8284a034e1989b59fd060f6bd4d24bf3729394d2ed8", size = 3270455, upload-time = "2025-11-12T19:02:59.847Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/87/74/f9fe9e7034f24aef407e7816880c012d8e863bedaa6b42b9ff33e79ea139/cython-3.2.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f1d10b3731171a33563ba81fdcba39c229e45087269dfbe07a1c00e7dcb2537f", size = 2957374, upload-time = "2025-11-12T19:03:10.132Z" },
-    { url = "https://files.pythonhosted.org/packages/65/47/f9dd519117f520aaf4d723c88fd9e9139262a0379edc01e71a1e9825e082/cython-3.2.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:92b814b6066d178a5057b557d372e2a03854e947e41cb9dec21db732fbd14c3c", size = 3366838, upload-time = "2025-11-12T19:03:11.742Z" },
-    { url = "https://files.pythonhosted.org/packages/5d/3e/d967acfafef00056c3ba832692b9bb358ede2919f641e4a2d24828adacc6/cython-3.2.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c9fc6abd0532007827d8c6143b2bfedf80c7cb89a3c1c12f058336663489ed2e", size = 3535901, upload-time = "2025-11-12T19:03:13.545Z" },
-    { url = "https://files.pythonhosted.org/packages/68/79/bc46e714ecb010f80a8aa7f7eaf412c53cbabbe7489590d6aba5f4478ba5/cython-3.2.1-cp310-cp310-win_amd64.whl", hash = "sha256:14f1ed135347587cfddcd3c3219667cac4f0ea0b66aa1c4c0187d50a1b92c222", size = 2764043, upload-time = "2025-11-12T19:03:15.584Z" },
-    { url = "https://files.pythonhosted.org/packages/48/d4/ba7b9f341ec168de78bd659600e04bb7de3b2d069bf98b2178a135e88ea4/cython-3.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3cb32c650e7f4476941d1f735cae75a2067d5e3279576273bb8802e8ea907222", size = 2949720, upload-time = "2025-11-12T19:03:17.492Z" },
-    { url = "https://files.pythonhosted.org/packages/ad/47/c42417f424c0b928361f48d7dd0ae72716ee21f647b73ceb16f66b98663e/cython-3.2.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8a2b306813d7f28aa0a2c3e4e63ada1427a8109917532df942cd5429db228252", size = 3242127, upload-time = "2025-11-12T19:03:19.227Z" },
-    { url = "https://files.pythonhosted.org/packages/e6/fc/1040460889129551649ec35be45e05169871fbcf71bd8e13c533e86f9468/cython-3.2.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0959d9a36d4f004ce63acc1474b3c606745af98b65e8ae709efd0c10988e9d6b", size = 3377094, upload-time = "2025-11-12T19:03:21.25Z" },
-    { url = "https://files.pythonhosted.org/packages/f8/f2/8c754298eefa40e21af0ae3592837c6e71254900d5aea1c8859e96b11de5/cython-3.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:60c62e734421365135cc2842013d883136054a26c617c001be494235edfc447a", size = 2767824, upload-time = "2025-11-12T19:03:23.317Z" },
-    { url = "https://files.pythonhosted.org/packages/ee/0e/19d5041b87f98ed19c94c388607cd27c1f7458078c3bad5de2dead55b2e1/cython-3.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ea5097d97afd2ab14e98637b7033eba5146de29a5dedf89f5e946076396ab891", size = 2966736, upload-time = "2025-11-12T19:03:25.064Z" },
-    { url = "https://files.pythonhosted.org/packages/84/b8/bcc36d9d2464348106984956608a52a42a01ab44ea64031207dffdebc078/cython-3.2.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a4bf12de0475bb6a21e2336a4a04dc4a2b4dd0507a2a3c703e045f3484266605", size = 3221633, upload-time = "2025-11-12T19:03:26.754Z" },
-    { url = "https://files.pythonhosted.org/packages/79/20/7d4807fe4ebcef9f20f2e5f93312d0f5d02f9f76524fd4e37706d04e83f7/cython-3.2.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:18c64a0f69a1b8164de70ec7efc72250c589fec21519170de21582300f6aaed9", size = 3389542, upload-time = "2025-11-12T19:03:28.656Z" },
-    { url = "https://files.pythonhosted.org/packages/2a/92/b06ba6721299293bc41e89732070132c453bdbaaeabb8f8cc76851b75345/cython-3.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:5ba14907d5826d8010e82306ce279a0d3650f5b50a4813c80836a17b2213c520", size = 2755307, upload-time = "2025-11-12T19:03:30.684Z" },
-    { url = "https://files.pythonhosted.org/packages/40/28/c6e36c214baeb27ae45b518552e74457536c7c964b1a55b5900b047fa467/cython-3.2.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:b4e850fc7a2f72d19679dd083fe4d20bf66860fceabb4f3207112f240249d708", size = 2957307, upload-time = "2025-11-12T19:03:32.471Z" },
-    { url = "https://files.pythonhosted.org/packages/c8/c8/b0b9ba64f81f2875c42aab5c0979d6454cd1ac6b3c1e2373ad552701565d/cython-3.2.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3d20ca4afe993f7dccad3aeddbf4c3536cb0fd3ad6dc7a225935a666a5655af2", size = 3210919, upload-time = "2025-11-12T19:03:34.274Z" },
-    { url = "https://files.pythonhosted.org/packages/f9/33/5d9ca6abba0e77e1851b843dd1b3c4095fbc6373166935e83c4414f80e88/cython-3.2.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f5a54a757d01ca6a260b02ce5baf17d9db1c2253566ab5844ee4966ff2a69c19", size = 3373350, upload-time = "2025-11-12T19:03:35.927Z" },
-    { url = "https://files.pythonhosted.org/packages/e4/29/4408c3486ff380a2d6ae0d4b71da5195efcef3c4360017113ee7d1cb7335/cython-3.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:1b81e56584727a328e00d91c164f8f0f2c59b02bf6857c3f000cd830fa571453", size = 2753425, upload-time = "2025-11-12T19:03:38.157Z" },
-    { url = "https://files.pythonhosted.org/packages/f0/32/c1aa03ccadda89487ff31b90d8651c3706ce2744bf4f2c2ae213147e89bd/cython-3.2.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:d7af6ad01c0fe1965d1d3badaeb6df53c1f37383ebae1ccb405b73f628f87713", size = 2967833, upload-time = "2025-11-12T19:03:40.233Z" },
-    { url = "https://files.pythonhosted.org/packages/ff/dc/3488d3ade0635408a2ebb05561a3009e2f54616bfefd1f107088dfeb2c4c/cython-3.2.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e3ea7cd085b62acb67c0fbde5cd17a7d9e47992c965e81ec977cf9ea7c59cd65", size = 3256237, upload-time = "2025-11-12T19:03:42.005Z" },
-    { url = "https://files.pythonhosted.org/packages/7b/ba/f3d35d3803c9a424fa8812893847114deb9e2440c1bc67a31ab9ec4b9355/cython-3.2.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:986aea38fdf231e78d73745f83271c5654852c822dc5141a1d3fba64429a6aa6", size = 3383100, upload-time = "2025-11-12T19:03:43.675Z" },
-    { url = "https://files.pythonhosted.org/packages/86/dc/d72dbb2f8e7ca95d2d18fd86f32b2e385996576230e7ecddd7d250786825/cython-3.2.1-cp314-cp314-win_amd64.whl", hash = "sha256:4960e26cd34c1385f21646339f2e0361fcdd2ed3c01cdb50fe734add577ec56a", size = 2790322, upload-time = "2025-11-12T19:03:45.373Z" },
-    { url = "https://files.pythonhosted.org/packages/5a/7e/1194f4ba98b981bbdca945a292e4f49e87ea09d69516b24445409e7cf611/cython-3.2.1-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:4e9167316bf6ecfea33dcca62f074605648fb93cc053ef46b5deb3e5d12fc0d3", size = 2872858, upload-time = "2025-11-12T19:03:55.074Z" },
-    { url = "https://files.pythonhosted.org/packages/6b/1a/393ca8ffec7ad3f02b8e4bffaba3dba4fb62c4a1c4c0b6dbf3b80e709fe3/cython-3.2.1-cp39-abi3-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:3095df6cd470064742f428c937bed7200c5123b9e19ee04aa09ec61281e565a3", size = 3209664, upload-time = "2025-11-12T19:03:56.771Z" },
-    { url = "https://files.pythonhosted.org/packages/37/57/f209f64c609d3d8fac60a572e56da2f621dc1789e399c58db61d5645a31f/cython-3.2.1-cp39-abi3-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:db3f53b2d9afb206075a2605f1150aa019f0733c7795a38eccc6119c2e9c3f7b", size = 2854607, upload-time = "2025-11-12T19:03:59.413Z" },
-    { url = "https://files.pythonhosted.org/packages/fc/af/1e5c73fe52423f40776130b0be914fd9f9f8dc26c4f6ea4c2ed04772d558/cython-3.2.1-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:0fc5e7687ac8f8e2b2fb95648f43e9e074ebaa72fd5cb3d8e20e5f1e8b8e02d9", size = 2991567, upload-time = "2025-11-12T19:04:02.209Z" },
-    { url = "https://files.pythonhosted.org/packages/39/2c/3ea175b6b1fdfb429f9e9c395240d894155b3c0615caced05fef43264cba/cython-3.2.1-cp39-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:bbb3bc152bc0de82b031c8d355418fa4890a92424209d59366c2c0bc9e6cf53c", size = 2889178, upload-time = "2025-11-12T19:04:05.272Z" },
-    { url = "https://files.pythonhosted.org/packages/f1/88/b2ab22a3a3feac78c62354a823c5c0c33659909e9918f53aa05904532b4b/cython-3.2.1-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:a2022bc48ad0c2c0e0485bf0b54902913a3d81086b7d435f4437620c667799f6", size = 3223755, upload-time = "2025-11-12T19:04:07.262Z" },
-    { url = "https://files.pythonhosted.org/packages/0b/56/9ba58629a03cbffb5965a3c65ccd91fa683d95d588c21a875da72fdc249b/cython-3.2.1-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:99fdd4ffc2dcb513f4be9ce71c6fedd895b96b1f814655b6bbab196df497b090", size = 3113456, upload-time = "2025-11-12T19:04:09.175Z" },
-    { url = "https://files.pythonhosted.org/packages/56/5b/148c1a7ea5aebe460a70cad716a77e5fd0205be2de9fc5250491eb13ad8c/cython-3.2.1-cp39-abi3-win32.whl", hash = "sha256:06071f85bd5ce040464d43b2f9f287742a79f905e81b709fe904567230f1ed51", size = 2434223, upload-time = "2025-11-12T19:04:11.294Z" },
-    { url = "https://files.pythonhosted.org/packages/7a/54/bb9b0c9db2a92a5e93747ca3027cfc645741411f8f1c6af2fb2a7b82df5d/cython-3.2.1-cp39-abi3-win_arm64.whl", hash = "sha256:e87c131d59480aee1ebac622b64f287c0e1d665ad1a1b7d498ac48accdb36c6b", size = 2439268, upload-time = "2025-11-12T19:04:12.931Z" },
-    { url = "https://files.pythonhosted.org/packages/aa/30/373775b8d933d781d055c1dd0f110f275a101f320dab724c8c63a7c1b945/cython-3.2.1-py3-none-any.whl", hash = "sha256:cd72c46e7bffe8250c52d400e72c8d5d3086437b6aeec5b0eca99ccd337f5834", size = 1254219, upload-time = "2025-11-12T19:02:56.14Z" },
+version = "3.2.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/52/82/01f0b63287cb922e5ba96c5147c30f1e51f541ce91bd178025bb3518b1ba/cython-3.2.0.tar.gz", hash = "sha256:41fdce8237baee2d961c292ed0386903dfe126f131e450a62de0fd7a5280d4b2", size = 3267264, upload-time = "2025-11-05T13:35:04.231Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/57/8d/b2e9578d960d38b1b04a278bf66e13008486aa73e73967186f2015d63d1c/cython-3.2.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ee408125b2d218ec7d7a061e09d24715fcab9bf7ea1a4ac01907c3f8ec8730b3", size = 2953775, upload-time = "2025-11-05T13:35:22.291Z" },
+    { url = "https://files.pythonhosted.org/packages/19/dd/cfd684f98bac9e0f505af1cbb7998498c59d713275e920a72b40dab03bfa/cython-3.2.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c93ce307b05fcd86a5bb0e4a7d7fab238e2f0e9936636097a60bc0e21f2def30", size = 3361627, upload-time = "2025-11-05T13:35:24.519Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/c1/75acdbe9f6292514f0bb92ab1b78df5eedd7049235f4cbd194d2c6c46bfc/cython-3.2.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:191cfc2fa84642ad41a52d5abaacfb330d9a6653a465e4bf0a5681f66197a967", size = 3529751, upload-time = "2025-11-05T13:35:26.341Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/ce/d0468eb6d87b956902b02909f5007ad61e3839d4c07ab235b514911d869b/cython-3.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:a259053037ef82959b743b7fde238bd191ee43f88eb8e51101d5f3d8849f1e32", size = 2758839, upload-time = "2025-11-05T13:35:28.36Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/2b/904493fceda95747ba83971b40a66c8cc29ff009313429903f38ee620140/cython-3.2.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e9e4b2248dc3a98b86aeba65e9862d2cc881d072c163c0fb31b511d4d72e93c8", size = 2946248, upload-time = "2025-11-05T13:35:30.406Z" },
+    { url = "https://files.pythonhosted.org/packages/89/fe/abe926699fe6c580967e30bc4035da54b5e31355ba9b1f4c0cf574228a84/cython-3.2.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:02fb4990a83d5d6f780dda18ed8baa8d587cb6523f57b4d72bc0b41ad3766c96", size = 3236384, upload-time = "2025-11-05T13:35:32.233Z" },
+    { url = "https://files.pythonhosted.org/packages/1b/36/6b6266549802234286438298d494152deb19922a94928d9dcd256659ebd1/cython-3.2.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8a98925517819d62ea25d2cf40057df60a9bcf75fdd1d6ed3882e6ae0730d82f", size = 3372915, upload-time = "2025-11-05T13:35:34.082Z" },
+    { url = "https://files.pythonhosted.org/packages/29/fa/5cf15466b428f9248e38a28515cf0fd98078ae869aa395cfb300315964c4/cython-3.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:4c959a5d4cd6331e8498822ba47200bd2ff4bf74517c0c91475d5bc21da3b4d5", size = 2762735, upload-time = "2025-11-05T13:35:35.806Z" },
+    { url = "https://files.pythonhosted.org/packages/57/d3/2e6f5f2552c860bb9c00653d092103521846114f6a2ae0648ecf84c0816c/cython-3.2.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:511d823d9f8a1b850178ec355d6df0a1731b9c20b08ee6d1a780f68215e9013f", size = 2959932, upload-time = "2025-11-05T13:35:37.518Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/bf/7bdc7f231fff6780f78586f939c1740475adecaa03bf256fcb62b2353952/cython-3.2.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bbadeedcb2d135655bcce7380fb28c9e2a75b6810426c12b6e5a6fe6106fafb4", size = 3218588, upload-time = "2025-11-05T13:35:39.642Z" },
+    { url = "https://files.pythonhosted.org/packages/be/81/7d7a81010897dc5abee59691f5fc85849dcc4c8a7687b22ed01bc8d86a7a/cython-3.2.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:92d2394a3e3fe704210b5324eb8118333b514af72c98b1e02a6503945825b231", size = 3381940, upload-time = "2025-11-05T13:35:41.886Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/9d/35e7fb7b591bd9912685a772fcc773d7bb951a8feb6fb9be20addbc38928/cython-3.2.0-cp312-cp312-win_amd64.whl", hash = "sha256:73435e56654a34ece57d4c3304a4556a8402cc4ae2d0e30f71c237a985dc5246", size = 2750886, upload-time = "2025-11-05T13:35:43.629Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/d0/dc4b260e8fde81b23ab4dca56948b3e69617ef470247ec6a3e09370a9849/cython-3.2.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:d900e58e826f9a5a27b0e2b50e33473e9986a5bae375c39b0f2e19f2c545fa23", size = 2950437, upload-time = "2025-11-05T13:35:45.427Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/53/c322bf0486a938ad954a645866b67e978777d79183cf0a042bda6bea11de/cython-3.2.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a9d38cd3aab720d21fa6d6ee168228352f69aea0a95bd4fb84e8879c6ed38fbb", size = 3209331, upload-time = "2025-11-05T13:35:47.278Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/48/55d02dba0606768d3450afd088e2bbcd6f8a54977dce041c2c3c1894631c/cython-3.2.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:92b31d0b7b0a49b3d2aa94faaf75d44a03174cff2616b341a8853c919e511d51", size = 3370974, upload-time = "2025-11-05T13:35:49.534Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/bd/6dab19652b68464572b7a137d07a91ebe86db2a81c35842ff5e49ef23403/cython-3.2.0-cp313-cp313-win_amd64.whl", hash = "sha256:2847b74e76dbad612f6fc7182c12a5f78cffb0d05808fd2c4b638cf02d1aade6", size = 2746274, upload-time = "2025-11-05T13:35:51.522Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/db/de5331ca6489da1761078825709257e1f24e543b4040f86a2502a4b841f9/cython-3.2.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:a0a8274959d538d12f865193dcd67bb5630906e020190c890d2b7c13d31713c6", size = 2961164, upload-time = "2025-11-05T13:35:53.826Z" },
+    { url = "https://files.pythonhosted.org/packages/54/3e/64e37e419331f7c4c540ad25c0b3e6d8f44d597f21ab8861afbc66aa7e02/cython-3.2.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0a1c800833c25195833805c7c3626a2c30b3baaaa9ba361a1af3bbc379662a8d", size = 3249627, upload-time = "2025-11-05T13:35:55.524Z" },
+    { url = "https://files.pythonhosted.org/packages/9b/fc/9faedfcc2de807f77115d97a4910c260dd4693f4fa9e0e3be0d9ae89e260/cython-3.2.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:df15af08c21c18a2e848df5954d6fd3310735089b60405132fa4111e2cf7482a", size = 3375458, upload-time = "2025-11-05T13:35:57.279Z" },
+    { url = "https://files.pythonhosted.org/packages/31/e0/30d449cd97ee0d6395aba18f2646b61b52ab3dc5a3851a346e2d363a7d85/cython-3.2.0-cp314-cp314-win_amd64.whl", hash = "sha256:9d6876af2132757fff1b42a2f4eaa72482f991863160e3f0dc8f2c812b300ebf", size = 2783210, upload-time = "2025-11-05T13:35:59.54Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/6b/9e1e171fe19274465d84dffa4610d46f434b1ae945e946802db396695d67/cython-3.2.0-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:04821ce06598a3aa5c9e0270d98960cfe6556dedbd1418c65e4479162b8ae74a", size = 2869249, upload-time = "2025-11-05T13:36:08.944Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/f1/f461726f664668a96072b2a245bdfae566d68e2eb1393ec72780cc59c21e/cython-3.2.0-cp39-abi3-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:54b5b1c72a63da822b3f4739a0e31546c0a19f8e834b174906bf817ed5f9d65f", size = 3204332, upload-time = "2025-11-05T13:36:11.386Z" },
+    { url = "https://files.pythonhosted.org/packages/78/d8/73c07ce64cae496e5f5a6dfe3e53574af1a8ef777e2a834d10dae8b67a4e/cython-3.2.0-cp39-abi3-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:6155a6c360e32af1aaa16fa10b0119b49deeadff42a1958973324150870af1b5", size = 2851317, upload-time = "2025-11-05T13:36:13.14Z" },
+    { url = "https://files.pythonhosted.org/packages/bc/d9/d9f321637b8034b5028fa5fe7d1085ffa9351fea350af6510d5cb924c014/cython-3.2.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:861258ac3878b76c57b9b5a379787d772a0bc47fec9167b43986777de542c474", size = 2987155, upload-time = "2025-11-05T13:36:15.018Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/b5/9f9e7d261f083b4066d734b27a7872b0c584fd4c3578196652dbf72b3f62/cython-3.2.0-cp39-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:85dbf955e3193893d0288105afa0fa5f4e835ff587061681f240a4f0487c44fb", size = 2884219, upload-time = "2025-11-05T13:36:17.334Z" },
+    { url = "https://files.pythonhosted.org/packages/88/64/5aeb6e43e0ded9efedc5a516f87a487fdca8e434491cc352e5a805380459/cython-3.2.0-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:3b3f13822526726bac43275c0e92916bbcc2c30e9f559edc4c1132670b70498d", size = 3218067, upload-time = "2025-11-05T13:36:19.493Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/a0/1958f54cd79d8251a330b9c9652b2a5ceba6a3fcec10782dd03e2a23c74f/cython-3.2.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:ab18d09673d219008be5b6174bcbb6dbfd50904e66371f104a8a4698b791472d", size = 3108277, upload-time = "2025-11-05T13:36:21.203Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/84/9b8112160cab922b97edef00616ed18771567d88b5ba9d30d1736880c345/cython-3.2.0-cp39-abi3-win32.whl", hash = "sha256:c9fd986413fc52929b916187630a9abab9f876299951488c4b905ad5346afee6", size = 2430852, upload-time = "2025-11-05T13:36:23.049Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/57/65d3de140b51c45dd6892846bfabdfaaa032e2418f1cb1a2f46058c1fe42/cython-3.2.0-cp39-abi3-win_arm64.whl", hash = "sha256:ee2ea79ddeb721f912e7efea039b9db059c81767ff04fbf9a995f64e1187df99", size = 2435793, upload-time = "2025-11-05T13:36:25.139Z" },
+    { url = "https://files.pythonhosted.org/packages/20/58/1f798ddb7fe6bfddf85f4f97d2d4ad63a491a7b643e85c1e274d0f09138e/cython-3.2.0-py3-none-any.whl", hash = "sha256:73f7f4c75acde5b5b4df05b11fdc2705ec637b99241d1bc2f4ebf345f7a2ea90", size = 1252818, upload-time = "2025-11-05T13:35:00.391Z" },
 ]
 
 [[package]]
@@ -1166,7 +1291,8 @@ name = "deprecated"
 version = "1.3.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "wrapt" },
+    { name = "wrapt", version = "1.17.3", source = { registry = "https://pypi.org/simple" }, marker = "extra == 'extra-13-megatron-core-dev'" },
+    { name = "wrapt", version = "2.0.0", source = { registry = "https://pypi.org/simple" }, marker = "extra == 'extra-13-megatron-core-lts' or extra != 'extra-13-megatron-core-dev'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/49/85/12f0a49a7c4ffb70572b6c2ef13c90c88fd190debda93b23f026b25f9634/deprecated-1.3.1.tar.gz", hash = "sha256:b1b50e0ff0c1fddaa5708a2c6b0a6588bb09b892825ab2b214ac9ea9d92a5223", size = 2932523, upload-time = "2025-10-30T08:19:02.757Z" }
 wheels = [
@@ -1214,6 +1340,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/8f/d7/9322c609343d929e75e7e5e6255e614fcc67572cfd083959cdef3b7aad79/docutils-0.21.2-py3-none-any.whl", hash = "sha256:dafca5b9e384f0e419294eb4d2ff9fa826435bf15f15b7bd45723e8ad76811b2", size = 587408, upload-time = "2024-04-23T18:57:14.835Z" },
 ]
 
+[[package]]
+name = "donfig"
+version = "0.8.1.post1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pyyaml", marker = "python_full_version >= '3.11'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/25/71/80cc718ff6d7abfbabacb1f57aaa42e9c1552bfdd01e64ddd704e4a03638/donfig-0.8.1.post1.tar.gz", hash = "sha256:3bef3413a4c1c601b585e8d297256d0c1470ea012afa6e8461dc28bfb7c23f52", size = 19506, upload-time = "2024-05-23T14:14:31.513Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/0c/d5/c5db1ea3394c6e1732fb3286b3bd878b59507a8f77d32a2cebda7d7b7cd4/donfig-0.8.1.post1-py3-none-any.whl", hash = "sha256:2a3175ce74a06109ff9307d90a230f81215cbac9a751f4d1c6194644b8204f9d", size = 21592, upload-time = "2024-05-23T14:13:55.283Z" },
+]
+
 [[package]]
 name = "ebmlite"
 version = "3.4.1"
@@ -1247,7 +1385,7 @@ name = "exceptiongroup"
 version = "1.3.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
+    { name = "typing-extensions", marker = "python_full_version < '3.13' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/0b/9f/a65090624ecf468cdca03533906e7c69ed7588582240cfe7cc9e770b50eb/exceptiongroup-1.3.0.tar.gz", hash = "sha256:b241f5885f560bc56a59ee63ca4c6a8bfa46ae4ad651af316d4e81817bb9fd88", size = 29749, upload-time = "2025-05-10T17:42:51.123Z" }
 wheels = [
@@ -1271,15 +1409,26 @@ wheels = [
 
 [[package]]
 name = "fastapi"
-version = "0.1.17"
+version = "0.121.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
+    { name = "annotated-doc" },
     { name = "pydantic" },
     { name = "starlette" },
+    { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/fb/ca/e336cae3818f843392ce9e1e6f8cfc4b8dbdc1c2fcf81b34f7e6255ea05f/fastapi-0.1.17.tar.gz", hash = "sha256:a9a9b6cc32c38bab27a6549b94c44a30c70b485bc789d03de3aa8725f3394be5", size = 2826896, upload-time = "2019-01-05T13:39:54.842Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/8c/e3/77a2df0946703973b9905fd0cde6172c15e0781984320123b4f5079e7113/fastapi-0.121.0.tar.gz", hash = "sha256:06663356a0b1ee93e875bbf05a31fb22314f5bed455afaaad2b2dad7f26e98fa", size = 342412, upload-time = "2025-11-03T10:25:54.818Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/ce/04/3639503a8a1bac37e747597e687997c58b302be92bc633049376f59e7dd7/fastapi-0.1.17-py3-none-any.whl", hash = "sha256:a6aaad2f60684477480ac9d7a1c95e67f4696a722f184db467494bfdd5b8f29d", size = 105578, upload-time = "2019-01-05T13:39:45.597Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/2c/42277afc1ba1a18f8358561eee40785d27becab8f80a1f945c0a3051c6eb/fastapi-0.121.0-py3-none-any.whl", hash = "sha256:8bdf1b15a55f4e4b0d6201033da9109ea15632cb76cf156e7b8b4019f2172106", size = 109183, upload-time = "2025-11-03T10:25:53.27Z" },
+]
+
+[[package]]
+name = "fasteners"
+version = "0.20"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/2d/18/7881a99ba5244bfc82f06017316ffe93217dbbbcfa52b887caa1d4f2a6d3/fasteners-0.20.tar.gz", hash = "sha256:55dce8792a41b56f727ba6e123fcaee77fd87e638a6863cec00007bfea84c8d8", size = 25087, upload-time = "2025-08-11T10:19:37.785Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/51/ac/e5d886f892666d2d1e5cb8c1a41146e1d79ae8896477b1153a21711d3b44/fasteners-0.20-py3-none-any.whl", hash = "sha256:9422c40d1e350e4259f509fb2e608d6bc43c0136f79a00db1b49046029d0b3b7", size = 18702, upload-time = "2025-08-11T10:19:35.716Z" },
 ]
 
 [[package]]
@@ -1364,7 +1513,7 @@ source = { git = "https://github.com/deepseek-ai/FlashMLA?rev=9edee0c022cd093814
 
 [[package]]
 name = "flashinfer-python"
-version = "0.5.2"
+version = "0.5.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "apache-tvm-ffi" },
@@ -1381,9 +1530,9 @@ dependencies = [
     { name = "torch", marker = "sys_platform == 'never'" },
     { name = "tqdm" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/d8/04/e357eaa50238e12c49e66fcf47f83e066e741ef19a117c136782b32eafbb/flashinfer_python-0.5.2.tar.gz", hash = "sha256:99d097a28be1e98c7f85e4a767e9e9a4794374f9318c27db14d21e367149063f", size = 4632657, upload-time = "2025-11-07T02:53:27.261Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/6c/bb/897c3b9d683dcf6490f70e468efb585eebcd673970b13a04ed947b491982/flashinfer_python-0.5.1.tar.gz", hash = "sha256:f12b32d88d8cc10a396456df8ab017f1c4661fbf257e14f4d2461961ec0d090e", size = 4627606, upload-time = "2025-11-04T05:55:02.376Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/8d/0c/4a8ffbbc0d85e314f534cf5c32711f2af5d5e6e49225a5a414400a67b684/flashinfer_python-0.5.2-py3-none-any.whl", hash = "sha256:739c27d86d5ff4e3ad1ea41dcb90bda08e44c332549bf696f9c9c5c57f608e63", size = 6936306, upload-time = "2025-11-07T02:53:25.515Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/f1/33dedad087a2bc3d66244126bd5d1c79721ea22d1f2124299f9e5bdaf3b1/flashinfer_python-0.5.1-py3-none-any.whl", hash = "sha256:ec8434d21e53a0ec333734a3c61946a0f7d2f972e344aefa99ba5b87e63aa76a", size = 6932706, upload-time = "2025-11-04T05:55:00.335Z" },
 ]
 
 [[package]]
@@ -1764,7 +1913,7 @@ source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "filelock" },
     { name = "fsspec" },
-    { name = "hf-xet", marker = "platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'" },
+    { name = "hf-xet", marker = "platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "packaging" },
     { name = "pyyaml" },
     { name = "requests" },
@@ -1807,14 +1956,74 @@ wheels = [
 name = "importlib-metadata"
 version = "8.6.1"
 source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform == 'linux'",
+    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform == 'linux'",
+    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform == 'linux'",
+    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform == 'linux'",
+    "python_full_version == '3.12.*' and sys_platform == 'linux'",
+    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform != 'linux'",
+    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform != 'linux'",
+    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform != 'linux'",
+    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform != 'linux'",
+    "python_full_version == '3.12.*' and sys_platform != 'linux'",
+    "python_full_version == '3.11.*' and sys_platform == 'linux'",
+    "python_full_version == '3.11.*' and sys_platform != 'linux'",
+    "python_full_version < '3.11' and sys_platform == 'linux'",
+    "python_full_version < '3.11' and sys_platform != 'linux'",
+]
 dependencies = [
-    { name = "zipp" },
+    { name = "zipp", marker = "extra == 'extra-13-megatron-core-dev'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/33/08/c1395a292bb23fd03bdf572a1357c5a733d3eecbab877641ceacab23db6e/importlib_metadata-8.6.1.tar.gz", hash = "sha256:310b41d755445d74569f993ccfc22838295d9fe005425094fad953d7f15c8580", size = 55767, upload-time = "2025-01-20T22:21:30.429Z" }
 wheels = [
     { url = "https://files.pythonhosted.org/packages/79/9d/0fb148dc4d6fa4a7dd1d8378168d9b4cd8d4560a6fbf6f0121c5fc34eb68/importlib_metadata-8.6.1-py3-none-any.whl", hash = "sha256:02a89390c1e15fdfdc0d7c6b25cb3e62650d0494005c97d6f148bf5b9787525e", size = 26971, upload-time = "2025-01-20T22:21:29.177Z" },
 ]
 
+[[package]]
+name = "importlib-metadata"
+version = "8.7.0"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.12.*' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.12.*' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.11.*' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.11.*' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version < '3.11' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version < '3.11' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.12.*' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.12.*' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.12.*' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.12.*' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.11.*' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.11.*' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version < '3.11' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version < '3.11' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+]
+dependencies = [
+    { name = "zipp", marker = "extra == 'extra-13-megatron-core-lts' or extra != 'extra-13-megatron-core-dev'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/76/66/650a33bd90f786193e4de4b3ad86ea60b53c89b669a5c7be931fac31cdb0/importlib_metadata-8.7.0.tar.gz", hash = "sha256:d13b81ad223b890aa16c5471f2ac3056cf76c5f10f82d6f9292f0b415f389000", size = 56641, upload-time = "2025-04-27T15:29:01.736Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/20/b0/36bd937216ec521246249be3bf9855081de4c5e06a0c9b4219dbeda50373/importlib_metadata-8.7.0-py3-none-any.whl", hash = "sha256:e5dd1551894c77868a30651cef00984d50e1002d06942a7101d34870c5f02afd", size = 27656, upload-time = "2025-04-27T15:29:00.214Z" },
+]
+
 [[package]]
 name = "iniconfig"
 version = "2.3.0"
@@ -1941,7 +2150,7 @@ wheels = [
 
 [[package]]
 name = "leptonai"
-version = "0.26.7"
+version = "0.26.6"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "anyio" },
@@ -1966,7 +2175,7 @@ dependencies = [
     { name = "uvicorn" },
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/c2/4d/2b5ab13294b23326ba1d8ef6ad703b1d9535bf72a0617030ddd6238eb925/leptonai-0.26.7-py3-none-any.whl", hash = "sha256:74996da36bf177d2b148887dd349627ab8cd78b94623d543bc91ed9ad65ba0e2", size = 2452890, upload-time = "2025-11-07T20:07:14.99Z" },
+    { url = "https://files.pythonhosted.org/packages/68/b4/e29dfe5a6e63a0e55fc26115a8eef55fbbc004c7677544bbd88798e1c003/leptonai-0.26.6-py3-none-any.whl", hash = "sha256:e76846b52d6ffc186b26a1fa40ebf0432eb1d8108dda1fb2f7785a1f25c803c2", size = 2443372, upload-time = "2025-09-23T08:04:27.984Z" },
 ]
 
 [[package]]
@@ -2229,30 +2438,23 @@ dev = [
     { name = "onnxscript", version = "0.5.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.13' and extra == 'extra-13-megatron-core-dev') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "opentelemetry-api" },
     { name = "setuptools" },
-    { name = "tensorstore", version = "0.1.74", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.13' and extra == 'extra-13-megatron-core-dev') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "tensorstore", version = "0.1.79", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and python_full_version < '3.13' and extra == 'extra-13-megatron-core-dev') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (python_full_version >= '3.13' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "tensorstore", version = "0.1.74", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.13' and extra == 'extra-13-megatron-core-dev') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "tensorstore", version = "0.1.78", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.13' and extra == 'extra-13-megatron-core-dev') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "tqdm" },
     { name = "transformer-engine", marker = "extra == 'extra-13-megatron-core-dev'" },
     { name = "wget" },
 ]
 lts = [
-    { name = "av" },
-    { name = "causal-conv1d" },
     { name = "einops" },
-    { name = "flashinfer-python" },
-    { name = "mamba-ssm" },
-    { name = "megatron-energon", extra = ["av-decode"], marker = "extra == 'extra-13-megatron-core-lts'" },
-    { name = "multi-storage-client" },
-    { name = "nv-grouped-gemm" },
     { name = "nvtx" },
-    { name = "onnxscript", version = "0.5.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.13' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "onnxscript", version = "0.5.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.13' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "opentelemetry-api" },
     { name = "setuptools" },
-    { name = "tensorstore", version = "0.1.74", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-13-megatron-core-lts') or (python_full_version >= '3.13' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "tensorstore", version = "0.1.79", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and python_full_version < '3.13' and extra == 'extra-13-megatron-core-lts') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (python_full_version >= '3.13' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "tensorstore", version = "0.1.74", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.13' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "tensorstore", version = "0.1.78", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.13' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "tqdm" },
+    { name = "transformers" },
     { name = "wget" },
+    { name = "zarr", version = "2.18.3", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "zarr", version = "3.1.3", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
 mlm = [
     { name = "flask-restful" },
@@ -2310,54 +2512,48 @@ test = [
     { name = "pytest-random-order" },
     { name = "pyyaml" },
     { name = "tensorboard" },
-    { name = "wrapt" },
+    { name = "wrapt", version = "1.17.3", source = { registry = "https://pypi.org/simple" }, marker = "extra == 'extra-13-megatron-core-dev'" },
+    { name = "wrapt", version = "2.0.0", source = { registry = "https://pypi.org/simple" }, marker = "extra == 'extra-13-megatron-core-lts' or extra != 'extra-13-megatron-core-dev'" },
 ]
 
 [package.metadata]
 requires-dist = [
     { name = "av", marker = "extra == 'dev'", specifier = "<16.0.0" },
-    { name = "av", marker = "extra == 'lts'", specifier = "<16.0.0" },
     { name = "causal-conv1d", marker = "extra == 'dev'", specifier = "~=1.5" },
-    { name = "causal-conv1d", marker = "extra == 'lts'", specifier = "~=1.5" },
     { name = "einops", marker = "extra == 'dev'", specifier = "~=0.8" },
-    { name = "einops", marker = "extra == 'lts'", specifier = "~=0.8" },
+    { name = "einops", marker = "extra == 'lts'" },
     { name = "emerging-optimizers", marker = "extra == 'dev'", git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git?rev=cf9909b777ffac18e05b67a6708282cadc000942" },
     { name = "flash-linear-attention", marker = "extra == 'dev'", specifier = "~=0.3.2" },
     { name = "flashinfer-python", marker = "extra == 'dev'" },
-    { name = "flashinfer-python", marker = "extra == 'lts'" },
     { name = "flask-restful", marker = "extra == 'mlm'" },
     { name = "mamba-ssm", marker = "extra == 'dev'", specifier = "~=2.2" },
-    { name = "mamba-ssm", marker = "extra == 'lts'", specifier = "~=2.2" },
     { name = "megatron-energon", extras = ["av-decode"], marker = "extra == 'dev'", specifier = "~=6.0" },
-    { name = "megatron-energon", extras = ["av-decode"], marker = "extra == 'lts'", specifier = "~=6.0" },
     { name = "multi-storage-client", marker = "extra == 'dev'", specifier = "~=0.27" },
-    { name = "multi-storage-client", marker = "extra == 'lts'", specifier = "~=0.27" },
     { name = "numpy", specifier = "<2.0.0" },
     { name = "nv-grouped-gemm", marker = "extra == 'dev'", specifier = "~=1.1" },
-    { name = "nv-grouped-gemm", marker = "extra == 'lts'", specifier = "~=1.1" },
     { name = "nvidia-modelopt", extras = ["torch"], marker = "sys_platform != 'darwin' and extra == 'dev'", specifier = ">=0.33.0a0,<0.34.0" },
-    { name = "nvidia-resiliency-ext", marker = "extra == 'dev'" },
+    { name = "nvidia-resiliency-ext", marker = "extra == 'dev'", specifier = ">=0.4.0a0,<0.5.0" },
     { name = "nvtx", marker = "extra == 'dev'", specifier = "~=0.2" },
-    { name = "nvtx", marker = "extra == 'lts'", specifier = "~=0.2" },
+    { name = "nvtx", marker = "extra == 'lts'" },
     { name = "onnxscript", marker = "extra == 'dev'" },
-    { name = "onnxscript", marker = "extra == 'lts'" },
     { name = "opentelemetry-api", marker = "extra == 'dev'", specifier = "~=1.33.1" },
-    { name = "opentelemetry-api", marker = "extra == 'lts'", specifier = "~=1.33.1" },
     { name = "packaging", specifier = ">=24.2" },
     { name = "sentencepiece", marker = "extra == 'mlm'" },
     { name = "setuptools", marker = "extra == 'dev'", specifier = "<80.0.0" },
     { name = "setuptools", marker = "extra == 'lts'", specifier = "<80.0.0" },
     { name = "tensorstore", marker = "extra == 'dev'", specifier = "~=0.1,!=0.1.46,!=0.1.72" },
-    { name = "tensorstore", marker = "extra == 'lts'", specifier = "~=0.1,!=0.1.46,!=0.1.72" },
+    { name = "tensorstore", marker = "extra == 'lts'", specifier = "!=0.1.46,!=0.1.72" },
     { name = "tiktoken", marker = "extra == 'mlm'" },
     { name = "torch" },
     { name = "tqdm", marker = "extra == 'dev'" },
     { name = "tqdm", marker = "extra == 'lts'" },
     { name = "transformer-engine", extras = ["pytorch"], marker = "extra == 'dev'", git = "https://github.com/NVIDIA/TransformerEngine.git?rev=release_v2.9" },
+    { name = "transformers", marker = "extra == 'lts'" },
     { name = "transformers", marker = "extra == 'mlm'" },
     { name = "wandb", marker = "extra == 'mlm'" },
     { name = "wget", marker = "extra == 'dev'" },
     { name = "wget", marker = "extra == 'lts'" },
+    { name = "zarr", marker = "extra == 'lts'" },
 ]
 provides-extras = ["mlm", "dev", "lts"]
 
@@ -2444,10 +2640,14 @@ name = "ml-dtypes"
 version = "0.4.1"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
-    "python_full_version >= '3.14' and sys_platform == 'linux'",
-    "python_full_version == '3.13.*' and sys_platform == 'linux'",
-    "python_full_version >= '3.14' and sys_platform != 'linux'",
-    "python_full_version == '3.13.*' and sys_platform != 'linux'",
+    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform == 'linux'",
+    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform == 'linux'",
+    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform == 'linux'",
+    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform == 'linux'",
+    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform != 'linux'",
+    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform != 'linux'",
+    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform != 'linux'",
+    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform != 'linux'",
 ]
 dependencies = [
     { name = "numpy", marker = "python_full_version >= '3.13'" },
@@ -2589,7 +2789,7 @@ wheels = [
 
 [[package]]
 name = "multi-storage-client"
-version = "0.34.0"
+version = "0.33.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "filelock" },
@@ -2602,27 +2802,26 @@ dependencies = [
     { name = "python-dateutil" },
     { name = "pyyaml" },
     { name = "tqdm" },
-    { name = "tzdata" },
     { name = "wcmatch" },
     { name = "xattr" },
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/46/d9/88f1f3a9d354470216ec9dc7a619b944f4fdde4f28cc17c4090242c4548b/multi_storage_client-0.34.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:53a0597f68c9749c784745c3d58507fede7c018c5d113316beed501027b2a09a", size = 5291250, upload-time = "2025-11-07T23:47:05.434Z" },
-    { url = "https://files.pythonhosted.org/packages/4d/79/00135da703e2d865e6379851e1357160f9a417ee62f4d73a507cefb4a36b/multi_storage_client-0.34.0-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:13b778339a05067cf375e99388deda9b78789fa5aa6cfcec9ee86d818c537188", size = 5411078, upload-time = "2025-11-07T23:41:52.668Z" },
-    { url = "https://files.pythonhosted.org/packages/84/89/c1ed1ca4566ca953e057c0eea74ec364f6553fb83b2cccb805248885e218/multi_storage_client-0.34.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b1de485d993ac13b7ec2d0a28635da4e2c3c5a4c24305ba8ca2931f1f17985f7", size = 3182416, upload-time = "2025-11-07T23:41:29.088Z" },
-    { url = "https://files.pythonhosted.org/packages/ae/ea/2dd5e7d2d9add2da8121609c9a3c7df2c1ce4950fa869f5e1c3a9e220ddd/multi_storage_client-0.34.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:61941fad48c27809ed78d0beb7544d2ed3451468ce54b9b79373a0237a43a5b7", size = 3357014, upload-time = "2025-11-07T23:35:43.587Z" },
-    { url = "https://files.pythonhosted.org/packages/1c/04/86a442ef3f7a5a37d88ea3f8a8d921d0935ab49ae8b847f61e5cc2861c77/multi_storage_client-0.34.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c9a56665f1918147081bb591c31073d96c159bc315d91fb50c315e05003a52e1", size = 5291104, upload-time = "2025-11-07T23:47:28.686Z" },
-    { url = "https://files.pythonhosted.org/packages/b1/cc/9c91a617d22e5ad0e532b655e3faeace98e6dbc0e69d961eb8bd7f99f4fb/multi_storage_client-0.34.0-cp311-cp311-macosx_11_0_x86_64.whl", hash = "sha256:901fd0f14bf69185e7daa12f72ac8d413216c53bf6a33d3676e423f6720c6a0a", size = 5411597, upload-time = "2025-11-07T23:43:52.954Z" },
-    { url = "https://files.pythonhosted.org/packages/a0/3f/71dc4df8db34d9621353d4783f81c6f718da160445781ed4fb79bbb436be/multi_storage_client-0.34.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a03a48121291e898e265a8aac508595d0a856fa7978410b8808a5747ff9839e", size = 3184427, upload-time = "2025-11-07T23:36:07.595Z" },
-    { url = "https://files.pythonhosted.org/packages/6c/8e/84b0420c5e26f01133e9503bc7378589da8e9cd4be40b1b94bc19954efbe/multi_storage_client-0.34.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f833d64061e711734ca4f47a9ca598ccc526c55bc4bfa95743b02ab1ef0f0d9a", size = 3355781, upload-time = "2025-11-07T23:37:26.703Z" },
-    { url = "https://files.pythonhosted.org/packages/6e/2d/e42a994ba4871bb816d8cd33b01b97c5a48e458883c7ad780f3dfd017263/multi_storage_client-0.34.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:684363c7f1160d276ad5f064ee241de5154767822df8ff30a43790dd94b709f9", size = 5281570, upload-time = "2025-11-07T23:43:04.104Z" },
-    { url = "https://files.pythonhosted.org/packages/83/2e/0614b98877642b8c93b0f3a357e0bb50ecee0117cc2ac13bceb51d8212ad/multi_storage_client-0.34.0-cp312-cp312-macosx_11_0_x86_64.whl", hash = "sha256:f06d8313a4aed35b96106d493774366e839275bec2aff922bd89d860ee913128", size = 5407200, upload-time = "2025-11-07T23:43:27.569Z" },
-    { url = "https://files.pythonhosted.org/packages/31/b1/feac8e8fb1dd732f1296163afe02658430ba7d86d499fe4dd70c2e3adbe8/multi_storage_client-0.34.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:959a0288727c896ca11ae0fd50b2dc018803e6fc36e09d2a2d939855eeb6dd99", size = 3186714, upload-time = "2025-11-07T23:38:16.647Z" },
-    { url = "https://files.pythonhosted.org/packages/7f/25/839a08917e2d24f213b7ba5c8c8f80d95150e4fae796bc47cce410f223ae/multi_storage_client-0.34.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a120c90a4058ab51eda16b2162c98f5210941ba9ff2729c519f6b842404a6c68", size = 3356859, upload-time = "2025-11-07T23:46:42.392Z" },
-    { url = "https://files.pythonhosted.org/packages/21/63/4209ae3d4156f09df264a514d9f20e32a9ccdb553e4267fec534ef5f74bf/multi_storage_client-0.34.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7cd82176cb68d8bc1e0074ce93d3623a233562df0628a801c11317d79e9f74e5", size = 5280719, upload-time = "2025-11-07T23:41:05.9Z" },
-    { url = "https://files.pythonhosted.org/packages/24/7f/615e21aa9ff254d939c2a86541e27e70eb4e5b83c3d9969efa3014452736/multi_storage_client-0.34.0-cp313-cp313-macosx_11_0_x86_64.whl", hash = "sha256:9e89c5890964f47f58417dc7f3b15e7fa8626987bf447556881a941afebc639f", size = 5406246, upload-time = "2025-11-07T23:45:47.151Z" },
-    { url = "https://files.pythonhosted.org/packages/f1/63/d20df6a0857b901480f27a2a9a127a69b92ddb9e6418fcaf0656b7e44897/multi_storage_client-0.34.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:700eb585eed1ed94466d8025f4d05ec44ceb0699e32e44206e00ea53e541c462", size = 3185115, upload-time = "2025-11-07T23:36:31.145Z" },
-    { url = "https://files.pythonhosted.org/packages/48/a0/de6e69cb392d51d07c76532bcfa03596516f02c5db6c08a5042a63676d08/multi_storage_client-0.34.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7094cc453e6432da5b1f2e3ade2d2a439cdb447432d2bf1213eab9bb1adf79bc", size = 3355566, upload-time = "2025-11-07T23:44:55.244Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/c4/6279fb7d4b8b0a7af060047d592f00f8d49c547adfebe50bcd8d0d2dc8a5/multi_storage_client-0.33.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:df52b3040ef5698c6388fa589bd63812ae0d2f967d358a792abcad5638686590", size = 5282006, upload-time = "2025-10-23T03:45:37.761Z" },
+    { url = "https://files.pythonhosted.org/packages/22/3b/23d8beccd73b887c4552bf884275611255b5028388fa3317365cd56c2a93/multi_storage_client-0.33.0-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:370da04b1e56a601ba505a29d42fcabc19b583e10d725a37bc0c11ba3573d211", size = 5403083, upload-time = "2025-10-23T03:53:11.998Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/ad/dc355d05fd369da0d800e5f7de24da0393f542c5a6f775f6bcee7edcacb1/multi_storage_client-0.33.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c57749a28ec5d49440f465fd73e4e2feaab18ece9b6e57c73395308b41950f66", size = 3178432, upload-time = "2025-10-23T04:07:00.543Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/ad/97b54419d8a58f696b85504568391a627641152f80650d7d2697fc2702ed/multi_storage_client-0.33.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c7d95f5fe094aab00a240bf6aa11dfe85bec293b76b3688ec3a9c33d86c751d2", size = 3351102, upload-time = "2025-10-23T03:47:47.622Z" },
+    { url = "https://files.pythonhosted.org/packages/52/28/1038a68b9df1b179a61967ce9f7d2e80b9954cdb289801afecde5f7660db/multi_storage_client-0.33.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4b5a0f5a0b7684835be20ae6782070884982a86665e9bab317375a56a20294d1", size = 5281523, upload-time = "2025-10-23T04:06:36.671Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/c5/e18de5e2a2671efdc0a12383b8d63f523044ca453525725b3450d0179c0e/multi_storage_client-0.33.0-cp311-cp311-macosx_11_0_x86_64.whl", hash = "sha256:0db694311f90f44ee8f6f7734a14a0857738a467f2ae201649218a3ecf1f6ab2", size = 5403353, upload-time = "2025-10-23T04:07:25.941Z" },
+    { url = "https://files.pythonhosted.org/packages/7e/c9/d9f65eb2370151dbbb06925f4216ee017e6cdbf7657263fd98e60944e52b/multi_storage_client-0.33.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2cbe3a0b856f0b968f9fc693670a521b5a995b625351241ca008f866fdfff62a", size = 3180052, upload-time = "2025-10-23T03:57:32.797Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/38/08b9d84c93b19ae87caf542ae77f17dfa44a85281ba09de660ffcf3a7718/multi_storage_client-0.33.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:018e7e82255feeff973ff02563f11a30f5e507e4cbc87a2167a9568740144ef2", size = 3351389, upload-time = "2025-10-23T04:02:07.348Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/31/c95634a27723b5ba9d2d74158444cc5e40b151b51ae59ca196fc9993f039/multi_storage_client-0.33.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:030b3a592c6352605e9ebdb8d9303dd42daf5d171ffa684f3283d4a5c6e2edfe", size = 5273976, upload-time = "2025-10-23T04:04:35.99Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/cf/82d1778d73c3baaec331da4ae8d01fa7934bcd73336aa88a08d86d080347/multi_storage_client-0.33.0-cp312-cp312-macosx_11_0_x86_64.whl", hash = "sha256:14dc0ace16d3830917427d6376d14ef62bd053fb2509f893998555ca1e9c4dcb", size = 5400735, upload-time = "2025-10-23T03:58:37.149Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/34/a6194ec725ef80c02de58b5ed3520bb1711807df75a27f7214effd22df34/multi_storage_client-0.33.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a2821765d5c6de365b5b1dcdc7cf2ebba719ff4061fd02975639629f8aa319f6", size = 3182623, upload-time = "2025-10-23T04:03:29.551Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/36/7ec85178fd1dd69c278407a82acaccfb806449deda13f3dbd41f653d73bd/multi_storage_client-0.33.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f92f89480c58067fa53c178785b86e7650e16f277a61a732a8a7019173b16129", size = 3352104, upload-time = "2025-10-23T04:08:51.005Z" },
+    { url = "https://files.pythonhosted.org/packages/88/ef/f2eb2efefb0e0588b29ed573b8354ecd72c38e6143da7ed5ecf53e859bf8/multi_storage_client-0.33.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:ed9af7e77e3cbac1f614816062b36975dcbc610bd3f8c86741d48aa18c718781", size = 5272154, upload-time = "2025-10-23T04:07:49.572Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/49/050aa4fccb2579d2ef5bd0d27169ec98fe85c92bba7a2c31154c491a4f75/multi_storage_client-0.33.0-cp313-cp313-macosx_11_0_x86_64.whl", hash = "sha256:c9d75e95a266ee858cf20c88ed255021552de67a40af9c8884d2fc22037dcd2b", size = 5399474, upload-time = "2025-10-23T04:09:14.545Z" },
+    { url = "https://files.pythonhosted.org/packages/f6/4b/70c2df3b60c28360f185188d351e9c3958b702614963a09ffb1dc251c1ca/multi_storage_client-0.33.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:48195a2ab9e6e9a2763bde17184cad2bdef82684353e210d0d325f20cea18869", size = 3181788, upload-time = "2025-10-23T04:03:10.404Z" },
+    { url = "https://files.pythonhosted.org/packages/9b/96/5008852677fdad10eb9d8dd08a6ea58c6f7e820199a3b2c56607186ac6d5/multi_storage_client-0.33.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bd64403efdcee2a6efcf7bfdb01422dd174c146014563b09f44590346fd835e6", size = 3351269, upload-time = "2025-10-23T04:00:34.714Z" },
 ]
 
 [[package]]
@@ -2853,14 +3052,44 @@ name = "networkx"
 version = "3.5"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
-    "python_full_version >= '3.14' and sys_platform == 'linux'",
-    "python_full_version == '3.13.*' and sys_platform == 'linux'",
-    "python_full_version == '3.12.*' and sys_platform == 'linux'",
-    "python_full_version >= '3.14' and sys_platform != 'linux'",
-    "python_full_version == '3.13.*' and sys_platform != 'linux'",
-    "python_full_version == '3.12.*' and sys_platform != 'linux'",
-    "python_full_version == '3.11.*' and sys_platform == 'linux'",
-    "python_full_version == '3.11.*' and sys_platform != 'linux'",
+    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.12.*' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.12.*' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.11.*' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.11.*' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.12.*' and sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.12.*' and sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.11.*' and sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.11.*' and sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.12.*' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.12.*' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.12.*' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.12.*' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.11.*' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.11.*' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/6c/4f/ccdb8ad3a38e583f214547fd2f7ff1fc160c43a75af88e6aec213404b96a/networkx-3.5.tar.gz", hash = "sha256:d4c6f9cf81f52d69230866796b82afbccdec3db7ae4fbd1b65ea750feed50037", size = 2471065, upload-time = "2025-05-29T11:35:07.804Z" }
 wheels = [
@@ -2908,6 +3137,83 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/60/90/81ac364ef94209c100e12579629dc92bf7a709a84af32f8c551b02c07e94/nltk-3.9.2-py3-none-any.whl", hash = "sha256:1e209d2b3009110635ed9709a67a1a3e33a10f799490fa71cf4bec218c11c88a", size = 1513404, upload-time = "2025-10-01T07:19:21.648Z" },
 ]
 
+[[package]]
+name = "numcodecs"
+version = "0.13.1"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version < '3.11' and sys_platform == 'linux'",
+    "python_full_version < '3.11' and sys_platform != 'linux'",
+]
+dependencies = [
+    { name = "numpy", marker = "python_full_version < '3.11'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/85/56/8895a76abe4ec94ebd01eeb6d74f587bc4cddd46569670e1402852a5da13/numcodecs-0.13.1.tar.gz", hash = "sha256:a3cf37881df0898f3a9c0d4477df88133fe85185bffe57ba31bcc2fa207709bc", size = 5955215, upload-time = "2024-10-09T16:28:00.188Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/14/c0/6d72cde772bcec196b7188731d41282993b2958440f77fdf0db216f722da/numcodecs-0.13.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:96add4f783c5ce57cc7e650b6cac79dd101daf887c479a00a29bc1487ced180b", size = 1580012, upload-time = "2024-10-09T16:27:19.069Z" },
+    { url = "https://files.pythonhosted.org/packages/94/1d/f81fc1fa9210bbea97258242393a1f9feab4f6d8fb201f81f76003005e4b/numcodecs-0.13.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:237b7171609e868a20fd313748494444458ccd696062f67e198f7f8f52000c15", size = 1176919, upload-time = "2024-10-09T16:27:21.634Z" },
+    { url = "https://files.pythonhosted.org/packages/16/e4/b9ec2f4dfc34ecf724bc1beb96a9f6fa9b91801645688ffadacd485089da/numcodecs-0.13.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:96e42f73c31b8c24259c5fac6adba0c3ebf95536e37749dc6c62ade2989dca28", size = 8625842, upload-time = "2024-10-09T16:27:24.168Z" },
+    { url = "https://files.pythonhosted.org/packages/fe/90/299952e1477954ec4f92813fa03e743945e3ff711bb4f6c9aace431cb3da/numcodecs-0.13.1-cp310-cp310-win_amd64.whl", hash = "sha256:eda7d7823c9282e65234731fd6bd3986b1f9e035755f7fed248d7d366bb291ab", size = 828638, upload-time = "2024-10-09T16:27:27.063Z" },
+    { url = "https://files.pythonhosted.org/packages/f0/78/34b8e869ef143e88d62e8231f4dbfcad85e5c41302a11fc5bd2228a13df5/numcodecs-0.13.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:2eda97dd2f90add98df6d295f2c6ae846043396e3d51a739ca5db6c03b5eb666", size = 1580199, upload-time = "2024-10-09T16:27:29.336Z" },
+    { url = "https://files.pythonhosted.org/packages/3b/cf/f70797d86bb585d258d1e6993dced30396f2044725b96ce8bcf87a02be9c/numcodecs-0.13.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2a86f5367af9168e30f99727ff03b27d849c31ad4522060dde0bce2923b3a8bc", size = 1177203, upload-time = "2024-10-09T16:27:31.011Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/b5/d14ad69b63fde041153dfd05d7181a49c0d4864de31a7a1093c8370da957/numcodecs-0.13.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:233bc7f26abce24d57e44ea8ebeb5cd17084690b4e7409dd470fdb75528d615f", size = 8868743, upload-time = "2024-10-09T16:27:32.833Z" },
+    { url = "https://files.pythonhosted.org/packages/13/d4/27a7b5af0b33f6d61e198faf177fbbf3cb83ff10d9d1a6857b7efc525ad5/numcodecs-0.13.1-cp311-cp311-win_amd64.whl", hash = "sha256:796b3e6740107e4fa624cc636248a1580138b3f1c579160f260f76ff13a4261b", size = 829603, upload-time = "2024-10-09T16:27:35.415Z" },
+    { url = "https://files.pythonhosted.org/packages/37/3a/bc09808425e7d3df41e5fc73fc7a802c429ba8c6b05e55f133654ade019d/numcodecs-0.13.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:5195bea384a6428f8afcece793860b1ab0ae28143c853f0b2b20d55a8947c917", size = 1575806, upload-time = "2024-10-09T16:27:37.804Z" },
+    { url = "https://files.pythonhosted.org/packages/3a/cc/dc74d0bfdf9ec192332a089d199f1e543e747c556b5659118db7a437dcca/numcodecs-0.13.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3501a848adaddce98a71a262fee15cd3618312692aa419da77acd18af4a6a3f6", size = 1178233, upload-time = "2024-10-09T16:27:40.169Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/ce/434e8e3970b8e92ae9ab6d9db16cb9bc7aa1cd02e17c11de6848224100a1/numcodecs-0.13.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:da2230484e6102e5fa3cc1a5dd37ca1f92dfbd183d91662074d6f7574e3e8f53", size = 8857827, upload-time = "2024-10-09T16:27:42.743Z" },
+    { url = "https://files.pythonhosted.org/packages/83/e7/1d8b1b266a92f9013c755b1c146c5ad71a2bff147ecbc67f86546a2e4d6a/numcodecs-0.13.1-cp312-cp312-win_amd64.whl", hash = "sha256:e5db4824ebd5389ea30e54bc8aeccb82d514d28b6b68da6c536b8fa4596f4bca", size = 826539, upload-time = "2024-10-09T16:27:44.808Z" },
+    { url = "https://files.pythonhosted.org/packages/83/8b/06771dead2cc4a8ae1ea9907737cf1c8d37a323392fa28f938a586373468/numcodecs-0.13.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:7a60d75179fd6692e301ddfb3b266d51eb598606dcae7b9fc57f986e8d65cb43", size = 1571660, upload-time = "2024-10-09T16:27:47.125Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/ea/d925bf85f92dfe4635356018da9fe4bfecb07b1c72f62b01c1bc47f936b1/numcodecs-0.13.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3f593c7506b0ab248961a3b13cb148cc6e8355662ff124ac591822310bc55ecf", size = 1169925, upload-time = "2024-10-09T16:27:49.512Z" },
+    { url = "https://files.pythonhosted.org/packages/0f/d6/643a3839d571d8e439a2c77dc4b0b8cab18d96ac808e4a81dbe88e959ab6/numcodecs-0.13.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:80d3071465f03522e776a31045ddf2cfee7f52df468b977ed3afdd7fe5869701", size = 8814257, upload-time = "2024-10-09T16:27:52.059Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/c5/f3e56bc9b4e438a287fff738993d6d11abef368c0328a612ac2842ba9fca/numcodecs-0.13.1-cp313-cp313-win_amd64.whl", hash = "sha256:90d3065ae74c9342048ae0046006f99dcb1388b7288da5a19b3bddf9c30c3176", size = 821887, upload-time = "2024-10-09T16:27:55.039Z" },
+]
+
+[[package]]
+name = "numcodecs"
+version = "0.16.3"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform == 'linux'",
+    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform == 'linux'",
+    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform == 'linux'",
+    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform == 'linux'",
+    "python_full_version == '3.12.*' and sys_platform == 'linux'",
+    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform != 'linux'",
+    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform != 'linux'",
+    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform != 'linux'",
+    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform != 'linux'",
+    "python_full_version == '3.12.*' and sys_platform != 'linux'",
+    "python_full_version == '3.11.*' and sys_platform == 'linux'",
+    "python_full_version == '3.11.*' and sys_platform != 'linux'",
+]
+dependencies = [
+    { name = "numpy", marker = "python_full_version >= '3.11'" },
+    { name = "typing-extensions", marker = "python_full_version >= '3.11'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/f6/48/6188e359b90a9d8a1850f2bc888c023e66f4a8b2b496820babbea414f008/numcodecs-0.16.3.tar.gz", hash = "sha256:53d705865faaf0a7927c973af3777532001c8fbb653de119c1e844608614d799", size = 6275704, upload-time = "2025-09-18T18:54:57.221Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d4/cc/917a85972537498f2bbd7914047efc98babc8667587ceb9dcb228378978a/numcodecs-0.16.3-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:95c9f2a49bef10cf91ad614a761cba9bfe96656b60c12540e1080de5d909b4ca", size = 1642356, upload-time = "2025-09-18T18:54:36.402Z" },
+    { url = "https://files.pythonhosted.org/packages/3b/6a/64c25a089e8537441fe67c09ecb7f3f7fb5d98cd04faf01f605d43aca41c/numcodecs-0.16.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e2afe73d5ebaf9ca0cd5c83aad945da80d29a33d860a80d43a7248491d8813ff", size = 1169186, upload-time = "2025-09-18T18:54:37.838Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/a0/0de627baeb43e2045a3d4b3de99bf8b69af329a33df1ed4cda468d70c1fb/numcodecs-0.16.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:913f08194d82dcb37594e6705e6d4ae6ccd4b6571500b832fb3e4a155de1dfe8", size = 8341668, upload-time = "2025-09-18T18:54:39.444Z" },
+    { url = "https://files.pythonhosted.org/packages/b6/0f/49d1f74a216149240c4b9403218111f11670bd11af0919fda357bb056bf2/numcodecs-0.16.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:85a7f1cae9eb18b85709af46570bf9c60056e7155c4c8f610e8080c68124d0e5", size = 8866611, upload-time = "2025-09-18T18:54:41.168Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/51/03aece765108fe247717105b5131856546e5428f22a56a14ffdebd017424/numcodecs-0.16.3-cp311-cp311-win_amd64.whl", hash = "sha256:f7bb7f2c46eb7ec8a1c5f8d8fe1a72c222256dd6d6df5af9eaac7a6b905f3575", size = 806787, upload-time = "2025-09-18T18:54:42.78Z" },
+    { url = "https://files.pythonhosted.org/packages/0d/78/e4b34803a3aa1d0769919695de4b133266c18c80c474d32ebc462fa1a9bd/numcodecs-0.16.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:c77454d92941a335d148b0b822f5d4783103f392774d5d76283bbf7f21b49529", size = 1681108, upload-time = "2025-09-18T18:54:43.856Z" },
+    { url = "https://files.pythonhosted.org/packages/25/cf/ca36f463b03a4097767d2a1c1b72f31810e8c6384e9449dd9b925203783c/numcodecs-0.16.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:270e7a33ee96bdf5c957acf25a2487002a233811a125a155c400c2f036b69c73", size = 1165589, upload-time = "2025-09-18T18:54:44.954Z" },
+    { url = "https://files.pythonhosted.org/packages/ed/ae/670260c3c4b5ed34a0674561355f3d4ce7fcbdf09a667e5bc841526d271c/numcodecs-0.16.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:12f43fa4a347d1dba775c4506a1c9b15b90144c258433b81f79f1c1b1a990db5", size = 8316365, upload-time = "2025-09-18T18:54:46.073Z" },
+    { url = "https://files.pythonhosted.org/packages/bb/fa/94e022419c751a60ff0f53642ebae5ef81ed3cc3640f958588e3ad3dc18d/numcodecs-0.16.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:44869ef564a50aa545215c6a0d42ba5bbc34e9715523fb2336ada3d1fb2b331d", size = 8846228, upload-time = "2025-09-18T18:54:47.858Z" },
+    { url = "https://files.pythonhosted.org/packages/71/60/f23733589f3e059bf8589508acd23ffeec230bdf179f138a54f5ab16e0a6/numcodecs-0.16.3-cp312-cp312-win_amd64.whl", hash = "sha256:9aae6996172ba10c5f5111b2998709071b5aeba6b58b1ee0b26b61ed6aa7f2f4", size = 806260, upload-time = "2025-09-18T18:54:49.41Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/d5/d3536d06ac1e5fb848a3186958204082b68b106364c9a3669652dd786731/numcodecs-0.16.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:947406b01c20f2ce7ce2e631e7f21b782e8a9d4b57b374a41c9e7b1341a8f3a2", size = 1677129, upload-time = "2025-09-18T18:54:50.5Z" },
+    { url = "https://files.pythonhosted.org/packages/e1/fd/b0513a3428dc2b38ec85eea771703ae69c49f09b9650d6c44c9105c80073/numcodecs-0.16.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7cf50e351398a34b45817974c411527629e88937b7683695e276afd65da6ed6f", size = 1159058, upload-time = "2025-09-18T18:54:51.675Z" },
+    { url = "https://files.pythonhosted.org/packages/98/05/b7c127283cfb154a97abb284363825401b69302d71a28608af66f73257cc/numcodecs-0.16.3-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f7938502fcc060ed9543814f38ca67048b33d7bd2667756e36e6b1060455b17e", size = 8260987, upload-time = "2025-09-18T18:54:52.883Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/46/320d960aff884bc63abaaf846ffa3de4803e83e8070b6f84c5688464839c/numcodecs-0.16.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:010d628c95be1214536fb22c0df4ced58da954b404b1fcb25ddebf64e4a3f7f3", size = 8805295, upload-time = "2025-09-18T18:54:54.698Z" },
+    { url = "https://files.pythonhosted.org/packages/31/ae/acc2e0f1f49ba32afa2174578f170673139248ef86f77e334f2619133867/numcodecs-0.16.3-cp313-cp313-win_amd64.whl", hash = "sha256:e83115e3c32de798c7b7164503e06aae9f9746c1cef564d029616eb44bd6cd90", size = 803204, upload-time = "2025-09-18T18:54:56.192Z" },
+]
+
+[package.optional-dependencies]
+crc32c = [
+    { name = "crc32c", marker = "python_full_version >= '3.11'" },
+]
+
 [[package]]
 name = "numpy"
 version = "1.26.4"
@@ -2951,36 +3257,6 @@ dependencies = [
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/05/79/87c45f32e661b25e0aaa1e325ba166511f57be5dff8f0fcabc12d3e73b64/nv_grouped_gemm-1.1.4.post6.tar.gz", hash = "sha256:dad6115f4b4ff7ceb0bc40ad44e923c13a24fc88cfe1e20b1a6b4c9cf24c445c", size = 26508, upload-time = "2025-10-10T18:52:29.508Z" }
 
-[[package]]
-name = "nv-one-logger-core"
-version = "2.3.1"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "overrides" },
-    { name = "pydantic" },
-    { name = "strenum" },
-    { name = "toml" },
-    { name = "typing-extensions" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/3b/37/963095797035f371e0db6ea761f5aaccb624fc786af217115b423baeb0e2/nv_one_logger_core-2.3.1.tar.gz", hash = "sha256:cbb2f87604c78b96a302f32d87199902129d76153a73a20f8455a250b3246c1d", size = 52640, upload-time = "2025-10-29T21:11:55.812Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/ee/c4/ea91554c4fcbff66057f667690101d7a4b965605741350ac661b03fa6c46/nv_one_logger_core-2.3.1-py3-none-any.whl", hash = "sha256:0c8b77bcdac4daa1ea913bf8d4afd2a057bd5526e3654ac39f67caba157341a6", size = 63066, upload-time = "2025-10-29T21:11:52.753Z" },
-]
-
-[[package]]
-name = "nv-one-logger-training-telemetry"
-version = "2.3.1"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "nv-one-logger-core" },
-    { name = "strenum" },
-    { name = "typing-extensions" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/c5/21/016fa067967734d52f1ccf5a2a37a1a65216f2d7053bc2b85872cce956ca/nv_one_logger_training_telemetry-2.3.1.tar.gz", hash = "sha256:8c67940ea71799afaf1f46df3ba2f52f93aea26321c6f1c1d54aae02efc2a4af", size = 44435, upload-time = "2025-10-29T21:21:42.035Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/e5/15/97e6e4ddfe5fc35bcee74a45b7c33fb73abb83713c7dfa26420b971a86c3/nv_one_logger_training_telemetry-2.3.1-py3-none-any.whl", hash = "sha256:5319443829b59378a498c3c62ac98973e14f31be675c229ff2b14e2fe109aa0b", size = 44140, upload-time = "2025-10-29T21:21:40.72Z" },
-]
-
 [[package]]
 name = "nvidia-cublas-cu12"
 version = "12.8.4.1"
@@ -3036,21 +3312,21 @@ wheels = [
 
 [[package]]
 name = "nvidia-cudnn-frontend"
-version = "1.16.0"
+version = "1.15.0"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/fa/cf/3cd3cc682df5488288c6043fc0977090497ff015a082ab160076fecb080a/nvidia_cudnn_frontend-1.16.0-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:83ecbe6d1145dc208a9ae82aa0b45b2c8f74ed8a43d3a102a13eef2117e2fedd", size = 1835542, upload-time = "2025-11-07T01:28:20.133Z" },
-    { url = "https://files.pythonhosted.org/packages/92/45/87f3f2d94a928be21459949b03b0b8bcea13531d30094ad84a8ae4fca761/nvidia_cudnn_frontend-1.16.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:77cb06b91877c8489363867434ba1d9936f3e10bf7ed98d82e98f5f578611920", size = 1950339, upload-time = "2025-11-07T01:31:41.69Z" },
-    { url = "https://files.pythonhosted.org/packages/be/f5/1662f18084ef4441bfb3a01383cbf77194905b53474dcb51c0d0f373c74b/nvidia_cudnn_frontend-1.16.0-cp310-cp310-win_amd64.whl", hash = "sha256:ee3f3886f107919dad48cbc905fa6ae9207c8d7d5a24165e55625ea96f0fe40f", size = 1367883, upload-time = "2025-11-07T01:25:17.791Z" },
-    { url = "https://files.pythonhosted.org/packages/10/b7/d0a3a337f5e83f26ff79a7fd63a859181ff2911f1d905d6fbab5fc80170d/nvidia_cudnn_frontend-1.16.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c360d5840d6eb597aade9e9c8780e24aec283b8e6bc97d52881c821a35c92aa9", size = 1837573, upload-time = "2025-11-07T01:29:05.507Z" },
-    { url = "https://files.pythonhosted.org/packages/95/dc/465a14f2d235778405f2e84fce336d07ab045bf1c7df6404bdf8033e06a8/nvidia_cudnn_frontend-1.16.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5c4a8fc573d85a86e08b15d9bf37f729e2487298781867a492a59cde6ac295e2", size = 1952630, upload-time = "2025-11-07T01:32:00.242Z" },
-    { url = "https://files.pythonhosted.org/packages/3b/89/f14435f616603a999975930c4456d6140127f6acb19a877c752beccad837/nvidia_cudnn_frontend-1.16.0-cp311-cp311-win_amd64.whl", hash = "sha256:a257f10a932ffde9741f644efd3611acf77e2fd89d493d81bc6a8353c48f1ec2", size = 1368775, upload-time = "2025-11-07T01:25:42.252Z" },
-    { url = "https://files.pythonhosted.org/packages/00/39/79b606e805abd67ab4fa72f752a5413a496159f10d94fbdb1d67bb5ae86c/nvidia_cudnn_frontend-1.16.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:dd6fdd71c0896ff2ca1809d914cbd17f2904d55863f8881f47946e1d634c7a88", size = 1839271, upload-time = "2025-11-07T01:29:53.06Z" },
-    { url = "https://files.pythonhosted.org/packages/09/21/a0e0d50ba8d7b639fe635500fee0d9c0319561b1ae72176d7024ec04b439/nvidia_cudnn_frontend-1.16.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:16efb069d4bda4d3b99134f59f376cfd4d09558298bd96af778fdc7f2851e696", size = 1954062, upload-time = "2025-11-07T01:32:18.556Z" },
-    { url = "https://files.pythonhosted.org/packages/ce/d6/30ae67bb9c010e9459d1211c56d73373eb4e3dd9f57f4c3c1fe0966efcb1/nvidia_cudnn_frontend-1.16.0-cp312-cp312-win_amd64.whl", hash = "sha256:7b7860db03767c158accbe0b4e9c9553506513cc970ff08ed28c7761681ac466", size = 1368435, upload-time = "2025-11-07T01:26:28.022Z" },
-    { url = "https://files.pythonhosted.org/packages/32/2c/b4376afef0a6342c56e82e3465c1f8f5c719f588293a50dd04019a22ae6e/nvidia_cudnn_frontend-1.16.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b6bcb3a2fbff80538958e21e2227520f082a961164865aaeedaac527f61084f9", size = 1839805, upload-time = "2025-11-07T01:30:31.056Z" },
-    { url = "https://files.pythonhosted.org/packages/71/13/836b90354036154ab82db3861210e5736983fe1fc44bb39c146ad93b333b/nvidia_cudnn_frontend-1.16.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:cbdad88b2bec5dde837f8fa7632022334cddb4756f923b5421c06a712cb59d31", size = 1953953, upload-time = "2025-11-07T01:33:03.781Z" },
-    { url = "https://files.pythonhosted.org/packages/e5/30/3025f34f2c86ceef85134dc1f323f8cf2a26d3ffddc5ada48528c80bfae1/nvidia_cudnn_frontend-1.16.0-cp313-cp313-win_amd64.whl", hash = "sha256:138de2bc4697fabb2eb2f0f601a7e31f8fe97874908e26e33d737276f335473c", size = 1368359, upload-time = "2025-11-07T01:26:51.561Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/3f/d7bf811f4a76f4e9aa4ef390b11217562bba06f0c77f9e14c765681ccba6/nvidia_cudnn_frontend-1.15.0-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8b4e8c77e848502ad79f8aef6b6c699613a6b5139572aba1f55f626d7bf31b44", size = 1743761, upload-time = "2025-10-10T18:54:15.142Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/b8/286f7fb3f1068acf0014a851f86863ed9fec69aff79a10dcc0dfbffe0523/nvidia_cudnn_frontend-1.15.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:64a926602e52268e09127cf7a227e6b3d7c6e9e2a97fb57eebe88132aec8d9c8", size = 1859188, upload-time = "2025-10-10T18:56:59.386Z" },
+    { url = "https://files.pythonhosted.org/packages/e8/f7/6e55b0122ca5924f0cdbd717392d35a92f43c6ed4b6d64c7d378ee01f301/nvidia_cudnn_frontend-1.15.0-cp310-cp310-win_amd64.whl", hash = "sha256:7a21ec041fa4009cc8b76b2d26ad73010ab5e005804e4df8b1c1abdba5e23cd5", size = 1296575, upload-time = "2025-10-10T18:45:45.04Z" },
+    { url = "https://files.pythonhosted.org/packages/80/b8/d0f1ab5c309c513fe1e4235e860872fc7ee60876e69b30eb0a20fe8c35d8/nvidia_cudnn_frontend-1.15.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:570c2e028ff9b8293f9625b31484084a638de6fb685802194b8dfe16db5a44b4", size = 1747611, upload-time = "2025-10-10T18:54:51.427Z" },
+    { url = "https://files.pythonhosted.org/packages/0e/52/5b77edb810063c10040ac34e1517ee62690c4f030f0cf68298a4608552bc/nvidia_cudnn_frontend-1.15.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:21ac16e4add264839a8db570d5378bb6583bf9539649d80bc8802ded00098a20", size = 1860815, upload-time = "2025-10-10T18:57:17.393Z" },
+    { url = "https://files.pythonhosted.org/packages/de/2b/1fa26eee0479ae0b40582679c1bd08eb78a0b49bb5893ec3edce2a606e9f/nvidia_cudnn_frontend-1.15.0-cp311-cp311-win_amd64.whl", hash = "sha256:c1be7480e3200606c2f2f49263cc13adc72c2a38e38f31f18e9b3727d99618b2", size = 1297355, upload-time = "2025-10-10T18:46:10.171Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/9c/0c2340454f8c9cc4143fdbccef8218dad1e49042d62b26c1781915617c40/nvidia_cudnn_frontend-1.15.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6c2cfe2a0f94bff71614bd3add0ae077f513f7d14909c223afca01ac8056ff84", size = 1749017, upload-time = "2025-10-10T18:55:29.412Z" },
+    { url = "https://files.pythonhosted.org/packages/19/b4/c35104b8fc32986111b611b3080bbcf35fd3fd6794d4aec4e068136ea628/nvidia_cudnn_frontend-1.15.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:aab1098ad4c79935b6e8dc251e9145129a04a8dc6ff75eb30871aacdd1487946", size = 1865629, upload-time = "2025-10-10T18:57:35.941Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/d7/6534807d209a27817d101cf86745e335896e96379bf2d207195cfe9f24ab/nvidia_cudnn_frontend-1.15.0-cp312-cp312-win_amd64.whl", hash = "sha256:13e58a5b001154899f0744165716a7ad24cd7567d759a8229a9ada730a1046b2", size = 1297335, upload-time = "2025-10-10T18:46:35.069Z" },
+    { url = "https://files.pythonhosted.org/packages/9b/75/5a75942aae2bb3a0c1cc44378e9f80c1213a6d7b952c8df19b8845836a34/nvidia_cudnn_frontend-1.15.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:fda240405eba3c04866e30b3c1beae26ea7775af4fa4d555cd598695067d32ac", size = 1750048, upload-time = "2025-10-10T18:56:06.057Z" },
+    { url = "https://files.pythonhosted.org/packages/79/70/2ed9802725cb305189dac906a67c799eeb47e4f395b97df0249a750c56fe/nvidia_cudnn_frontend-1.15.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:14941c05a6484d3f05f3089cd290c9b1e6614298f37e07cd01789933932c9f28", size = 1867440, upload-time = "2025-10-10T18:57:53.964Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/04/519fd6e3ea12fe7fe98c497c4d51f6c5c87763d02e90ea3102cef32a6ef1/nvidia_cudnn_frontend-1.15.0-cp313-cp313-win_amd64.whl", hash = "sha256:7c8c6f12534b73b0cd55956c5e9419b7840a01e4c260837606112450ce1ca0d9", size = 1297324, upload-time = "2025-10-10T18:46:53.104Z" },
 ]
 
 [[package]]
@@ -3145,10 +3421,10 @@ wheels = [
 
 [[package]]
 name = "nvidia-mathdx"
-version = "25.6.0"
+version = "25.1.1"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/20/1a/a418b8c1adc58abd87fd69414c19883af5c1b10514e3dbfcc27cde831b13/nvidia_mathdx-25.6.0-py3-none-any.whl", hash = "sha256:22e6ad5d0d005f836be5cbd14e836cf2e9ea42c82deb602707246ce8198eaa96", size = 23013087, upload-time = "2025-11-13T18:25:11.228Z" },
+    { url = "https://files.pythonhosted.org/packages/59/00/f1a73ac224d466b31b6eb09794656112e896185678720b05668777e87db3/nvidia_mathdx-25.1.1-py3-none-any.whl", hash = "sha256:4fb948fe4842d24e679f3d0c140c8a0e8e24c3c7ae5eb6e08584253ad94a198b", size = 39894902, upload-time = "2025-05-06T22:58:32.29Z" },
 ]
 
 [[package]]
@@ -3240,30 +3516,29 @@ wheels = [
 
 [[package]]
 name = "nvidia-resiliency-ext"
-version = "0.5.0"
+version = "0.4.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "defusedxml" },
-    { name = "nv-one-logger-core" },
-    { name = "nv-one-logger-training-telemetry" },
     { name = "nvidia-ml-py" },
     { name = "packaging" },
     { name = "psutil" },
+    { name = "pynvml" },
     { name = "pyyaml" },
     { name = "torch", marker = "sys_platform == 'never'" },
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/df/18/1898cad3bdd643c6bfa5f7aee125a5ef308ab1701ab15106e3e9c66bb416/nvidia_resiliency_ext-0.5.0-cp310-cp310-manylinux_2_39_aarch64.whl", hash = "sha256:97d4b68d3949f3b8370addb474d8662d6ac5008c3c1296420cdeb93a88d6a804", size = 402915, upload-time = "2025-11-13T21:28:34.578Z" },
-    { url = "https://files.pythonhosted.org/packages/fa/48/10fc3f278898e3b2aacc3bea65f0ac4b579e6e0e8447b467742d75adeec1/nvidia_resiliency_ext-0.5.0-cp310-cp310-manylinux_2_39_x86_64.whl", hash = "sha256:ceb04ec5a7bc9301fd6f14449bda6b0d1f37ead4fbe37aa3bf1d7b2ad5b662d4", size = 406483, upload-time = "2025-11-13T21:28:58.732Z" },
-    { url = "https://files.pythonhosted.org/packages/14/17/c19dfed8d4aced307a1c1404f0917ee6c1b319db8092b3cfe2af4e76de6d/nvidia_resiliency_ext-0.5.0-cp311-cp311-manylinux_2_39_aarch64.whl", hash = "sha256:62d396356adcf898cb86a54956eeece29017a41b5872db0b364c8449d23f2f66", size = 404062, upload-time = "2025-11-13T21:29:46.873Z" },
-    { url = "https://files.pythonhosted.org/packages/7f/99/b4324595171c3cdffb03cef070006ab9a3de7fca90a22403576ec6423b69/nvidia_resiliency_ext-0.5.0-cp311-cp311-manylinux_2_39_x86_64.whl", hash = "sha256:c4fcd006ef69300f753bb30d17efbb6bcee6699f044e3532209b2825d22e9977", size = 407027, upload-time = "2025-11-13T21:30:09.124Z" },
-    { url = "https://files.pythonhosted.org/packages/8c/73/232d9f25558f3c6165ff1d15c980a434b47c13e8f527f999cd265859abcf/nvidia_resiliency_ext-0.5.0-cp312-cp312-manylinux_2_39_aarch64.whl", hash = "sha256:81e3d827885e90bed369e67f76dda6709dd4073c2e5fa1228df85d6987cee495", size = 403317, upload-time = "2025-11-13T21:31:24.603Z" },
-    { url = "https://files.pythonhosted.org/packages/44/89/4d7f39416aa3be72ee9f1260a7af56af40f2570f5add1e039d96279a8764/nvidia_resiliency_ext-0.5.0-cp312-cp312-manylinux_2_39_x86_64.whl", hash = "sha256:eb720cd25feabef07f971d4051c7bcac2f9ec73642a9031953d2663307950cb9", size = 407963, upload-time = "2025-11-13T21:30:28.998Z" },
+    { url = "https://files.pythonhosted.org/packages/a7/8c/6547d9fdea9730d4f69a19ca492ccbe221768f8473b82502a78a824acc3d/nvidia_resiliency_ext-0.4.1-cp310-cp310-manylinux_2_31_aarch64.whl", hash = "sha256:cf80599411018ebbf03da64769527dee6b37746b72b8606f919b7999633770b8", size = 442891, upload-time = "2025-07-17T03:53:38.878Z" },
+    { url = "https://files.pythonhosted.org/packages/34/0d/520cab980949ad11bd5291784fea309bcd6654a9c97943a3a87644c1d111/nvidia_resiliency_ext-0.4.1-cp310-cp310-manylinux_2_31_x86_64.whl", hash = "sha256:0c23e621d598ba436549db83deeb3569c19df0194b89fe6169d62b6ead711be3", size = 448044, upload-time = "2025-07-17T03:48:30.851Z" },
+    { url = "https://files.pythonhosted.org/packages/46/77/8cda264b262e2868a4e6ebcddaea112200b1e34b8d5a35a2fe3b4978d137/nvidia_resiliency_ext-0.4.1-cp311-cp311-manylinux_2_31_aarch64.whl", hash = "sha256:d8ca454a8b8abef72e0ff0e33914686c263414e8891471c02a9f6af9d2d6b925", size = 443649, upload-time = "2025-07-17T03:49:16.183Z" },
+    { url = "https://files.pythonhosted.org/packages/3a/53/029cc7493b5833cb8dfa201f15a1e422e2e1cc6308d34c5b0a90028a73fd/nvidia_resiliency_ext-0.4.1-cp311-cp311-manylinux_2_31_x86_64.whl", hash = "sha256:dde6034f29350ac6326cdd861ceec641bdd93be0eddbf034739f4cd9452a4dd9", size = 449189, upload-time = "2025-07-17T03:52:15.24Z" },
+    { url = "https://files.pythonhosted.org/packages/70/05/38d491962273c7905708762279f440520eb79f3c00b67a023497215ad023/nvidia_resiliency_ext-0.4.1-cp312-cp312-manylinux_2_31_aarch64.whl", hash = "sha256:b3bd5f01535574b16d0f38bca6e39afe3806c4a2896eee1b321cd944e00025a7", size = 444570, upload-time = "2025-07-17T03:50:58.877Z" },
+    { url = "https://files.pythonhosted.org/packages/18/8b/4cb8aa2bbdf3705d3034c3f3dacdadb03b3b7dd3dc7f5200e64663fb477f/nvidia_resiliency_ext-0.4.1-cp312-cp312-manylinux_2_31_x86_64.whl", hash = "sha256:ca9f8de465af345952bedbea53c90c0e2323d88cfd830ded0e806fad91845c0e", size = 450280, upload-time = "2025-07-17T03:49:55.327Z" },
 ]
 
 [[package]]
 name = "nvidia-sphinx-theme"
-version = "0.0.9.post1"
+version = "0.0.8"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "pydata-sphinx-theme" },
@@ -3271,7 +3546,7 @@ dependencies = [
     { name = "sphinx", version = "8.2.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/8c/79/017fab2f7167a9a9795665f894d04f77aafceca80821b51589bb4b23ff5c/nvidia_sphinx_theme-0.0.9.post1-py3-none-any.whl", hash = "sha256:21ca60206dff2f380d7783d64bbaf71a5b9cacae53c7d0686f089c16b5a3d45a", size = 143816, upload-time = "2025-11-09T23:16:55.719Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/74/996dbc314da8ed670cd5e040d0b4b5be79ff1fc3db3fe25e63134deebe9a/nvidia_sphinx_theme-0.0.8-py3-none-any.whl", hash = "sha256:18f117aa154a3a156251a75647279c541464f3e75f7df2ae283e720cc7d0bc2c", size = 140678, upload-time = "2025-03-24T21:56:25.621Z" },
 ]
 
 [[package]]
@@ -3312,8 +3587,8 @@ name = "onnx"
 version = "1.19.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "ml-dtypes", version = "0.4.1", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.13' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.13' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "ml-dtypes", version = "0.5.3", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.13' and extra == 'extra-13-megatron-core-dev') or (python_full_version < '3.13' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "ml-dtypes", version = "0.4.1", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.13' and extra == 'extra-13-megatron-core-dev') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "ml-dtypes", version = "0.5.3", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.13' and extra == 'extra-13-megatron-core-dev') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "numpy" },
     { name = "protobuf" },
     { name = "typing-extensions" },
@@ -3355,10 +3630,14 @@ name = "onnx-ir"
 version = "0.1.8"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
-    "python_full_version >= '3.14' and sys_platform == 'linux'",
-    "python_full_version == '3.13.*' and sys_platform == 'linux'",
-    "python_full_version >= '3.14' and sys_platform != 'linux'",
-    "python_full_version == '3.13.*' and sys_platform != 'linux'",
+    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform == 'linux'",
+    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform == 'linux'",
+    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform == 'linux'",
+    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform == 'linux'",
+    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform != 'linux'",
+    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform != 'linux'",
+    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform != 'linux'",
+    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform != 'linux'",
 ]
 dependencies = [
     { name = "ml-dtypes", version = "0.4.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.13'" },
@@ -3399,10 +3678,14 @@ name = "onnxscript"
 version = "0.5.0"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
-    "python_full_version >= '3.14' and sys_platform == 'linux'",
-    "python_full_version == '3.13.*' and sys_platform == 'linux'",
-    "python_full_version >= '3.14' and sys_platform != 'linux'",
-    "python_full_version == '3.13.*' and sys_platform != 'linux'",
+    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform == 'linux'",
+    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform == 'linux'",
+    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform == 'linux'",
+    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform == 'linux'",
+    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform != 'linux'",
+    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform != 'linux'",
+    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform != 'linux'",
+    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform != 'linux'",
 ]
 dependencies = [
     { name = "ml-dtypes", version = "0.4.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.13'" },
@@ -3448,22 +3731,13 @@ version = "1.33.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "deprecated" },
-    { name = "importlib-metadata" },
+    { name = "importlib-metadata", version = "8.6.1", source = { registry = "https://pypi.org/simple" } },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/9a/8d/1f5a45fbcb9a7d87809d460f09dc3399e3fbd31d7f3e14888345e9d29951/opentelemetry_api-1.33.1.tar.gz", hash = "sha256:1c6055fc0a2d3f23a50c7e17e16ef75ad489345fd3df1f8b8af7c0bbf8a109e8", size = 65002, upload-time = "2025-05-16T18:52:41.146Z" }
 wheels = [
     { url = "https://files.pythonhosted.org/packages/05/44/4c45a34def3506122ae61ad684139f0bbc4e00c39555d4f7e20e0e001c8a/opentelemetry_api-1.33.1-py3-none-any.whl", hash = "sha256:4db83ebcf7ea93e64637ec6ee6fabee45c5cbe4abd9cf3da95c43828ddb50b83", size = 65771, upload-time = "2025-05-16T18:52:17.419Z" },
 ]
 
-[[package]]
-name = "overrides"
-version = "7.7.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/36/86/b585f53236dec60aba864e050778b25045f857e17f6e5ea0ae95fe80edd2/overrides-7.7.0.tar.gz", hash = "sha256:55158fa3d93b98cc75299b1e67078ad9003ca27945c76162c1c0766d6f91820a", size = 22812, upload-time = "2024-01-27T21:01:33.423Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/2c/ab/fc8290c6a4c722e5514d80f62b2dc4c4df1a68a41d1364e625c35990fcf3/overrides-7.7.0-py3-none-any.whl", hash = "sha256:c7ed9d062f78b8e4c1a7b70bd8796b35ead4d9f510227ef9c5dc7626c60d7e49", size = 17832, upload-time = "2024-01-27T21:01:31.393Z" },
-]
-
 [[package]]
 name = "packaging"
 version = "25.0"
@@ -3845,17 +4119,17 @@ wheels = [
 
 [[package]]
 name = "protobuf"
-version = "6.33.1"
+version = "6.33.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/0a/03/a1440979a3f74f16cab3b75b0da1a1a7f922d56a8ddea96092391998edc0/protobuf-6.33.1.tar.gz", hash = "sha256:97f65757e8d09870de6fd973aeddb92f85435607235d20b2dfed93405d00c85b", size = 443432, upload-time = "2025-11-13T16:44:18.895Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/19/ff/64a6c8f420818bb873713988ca5492cba3a7946be57e027ac63495157d97/protobuf-6.33.0.tar.gz", hash = "sha256:140303d5c8d2037730c548f8c7b93b20bb1dc301be280c378b82b8894589c954", size = 443463, upload-time = "2025-10-15T20:39:52.159Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/06/f1/446a9bbd2c60772ca36556bac8bfde40eceb28d9cc7838755bc41e001d8f/protobuf-6.33.1-cp310-abi3-win32.whl", hash = "sha256:f8d3fdbc966aaab1d05046d0240dd94d40f2a8c62856d41eaa141ff64a79de6b", size = 425593, upload-time = "2025-11-13T16:44:06.275Z" },
-    { url = "https://files.pythonhosted.org/packages/a6/79/8780a378c650e3df849b73de8b13cf5412f521ca2ff9b78a45c247029440/protobuf-6.33.1-cp310-abi3-win_amd64.whl", hash = "sha256:923aa6d27a92bf44394f6abf7ea0500f38769d4b07f4be41cb52bd8b1123b9ed", size = 436883, upload-time = "2025-11-13T16:44:09.222Z" },
-    { url = "https://files.pythonhosted.org/packages/cd/93/26213ff72b103ae55bb0d73e7fb91ea570ef407c3ab4fd2f1f27cac16044/protobuf-6.33.1-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:fe34575f2bdde76ac429ec7b570235bf0c788883e70aee90068e9981806f2490", size = 427522, upload-time = "2025-11-13T16:44:10.475Z" },
-    { url = "https://files.pythonhosted.org/packages/c2/32/df4a35247923393aa6b887c3b3244a8c941c32a25681775f96e2b418f90e/protobuf-6.33.1-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:f8adba2e44cde2d7618996b3fc02341f03f5bc3f2748be72dc7b063319276178", size = 324445, upload-time = "2025-11-13T16:44:11.869Z" },
-    { url = "https://files.pythonhosted.org/packages/8e/d0/d796e419e2ec93d2f3fa44888861c3f88f722cde02b7c3488fcc6a166820/protobuf-6.33.1-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:0f4cf01222c0d959c2b399142deb526de420be8236f22c71356e2a544e153c53", size = 339161, upload-time = "2025-11-13T16:44:12.778Z" },
-    { url = "https://files.pythonhosted.org/packages/1d/2a/3c5f05a4af06649547027d288747f68525755de692a26a7720dced3652c0/protobuf-6.33.1-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:8fd7d5e0eb08cd5b87fd3df49bc193f5cfd778701f47e11d127d0afc6c39f1d1", size = 323171, upload-time = "2025-11-13T16:44:14.035Z" },
-    { url = "https://files.pythonhosted.org/packages/08/b4/46310463b4f6ceef310f8348786f3cff181cea671578e3d9743ba61a459e/protobuf-6.33.1-py3-none-any.whl", hash = "sha256:d595a9fd694fdeb061a62fbe10eb039cc1e444df81ec9bb70c7fc59ebcb1eafa", size = 170477, upload-time = "2025-11-13T16:44:17.633Z" },
+    { url = "https://files.pythonhosted.org/packages/7e/ee/52b3fa8feb6db4a833dfea4943e175ce645144532e8a90f72571ad85df4e/protobuf-6.33.0-cp310-abi3-win32.whl", hash = "sha256:d6101ded078042a8f17959eccd9236fb7a9ca20d3b0098bbcb91533a5680d035", size = 425593, upload-time = "2025-10-15T20:39:40.29Z" },
+    { url = "https://files.pythonhosted.org/packages/7b/c6/7a465f1825872c55e0341ff4a80198743f73b69ce5d43ab18043699d1d81/protobuf-6.33.0-cp310-abi3-win_amd64.whl", hash = "sha256:9a031d10f703f03768f2743a1c403af050b6ae1f3480e9c140f39c45f81b13ee", size = 436882, upload-time = "2025-10-15T20:39:42.841Z" },
+    { url = "https://files.pythonhosted.org/packages/e1/a9/b6eee662a6951b9c3640e8e452ab3e09f117d99fc10baa32d1581a0d4099/protobuf-6.33.0-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:905b07a65f1a4b72412314082c7dbfae91a9e8b68a0cc1577515f8df58ecf455", size = 427521, upload-time = "2025-10-15T20:39:43.803Z" },
+    { url = "https://files.pythonhosted.org/packages/10/35/16d31e0f92c6d2f0e77c2a3ba93185130ea13053dd16200a57434c882f2b/protobuf-6.33.0-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:e0697ece353e6239b90ee43a9231318302ad8353c70e6e45499fa52396debf90", size = 324445, upload-time = "2025-10-15T20:39:44.932Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/eb/2a981a13e35cda8b75b5585aaffae2eb904f8f351bdd3870769692acbd8a/protobuf-6.33.0-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:e0a1715e4f27355afd9570f3ea369735afc853a6c3951a6afe1f80d8569ad298", size = 339159, upload-time = "2025-10-15T20:39:46.186Z" },
+    { url = "https://files.pythonhosted.org/packages/21/51/0b1cbad62074439b867b4e04cc09b93f6699d78fd191bed2bbb44562e077/protobuf-6.33.0-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:35be49fd3f4fefa4e6e2aacc35e8b837d6703c37a2168a55ac21e9b1bc7559ef", size = 323172, upload-time = "2025-10-15T20:39:47.465Z" },
+    { url = "https://files.pythonhosted.org/packages/07/d1/0a28c21707807c6aacd5dc9c3704b2aa1effbf37adebd8caeaf68b17a636/protobuf-6.33.0-py3-none-any.whl", hash = "sha256:25c9e1963c6734448ea2d308cfa610e692b801304ba0908d7bfa564ac5132995", size = 170477, upload-time = "2025-10-15T20:39:51.311Z" },
 ]
 
 [[package]]
@@ -4198,39 +4472,51 @@ wheels = [
 
 [[package]]
 name = "pynacl"
-version = "1.6.1"
+version = "1.6.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "cffi", marker = "platform_python_implementation != 'PyPy' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/b2/46/aeca065d227e2265125aea590c9c47fbf5786128c9400ee0eb7c88931f06/pynacl-1.6.1.tar.gz", hash = "sha256:8d361dac0309f2b6ad33b349a56cd163c98430d409fa503b10b70b3ad66eaa1d", size = 3506616, upload-time = "2025-11-10T16:02:13.195Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/75/d6/4b2dca33ed512de8f54e5c6074aa06eaeb225bfbcd9b16f33a414389d6bd/pynacl-1.6.1-cp314-cp314t-macosx_10_10_universal2.whl", hash = "sha256:7d7c09749450c385301a3c20dca967a525152ae4608c0a096fe8464bfc3df93d", size = 389109, upload-time = "2025-11-10T16:01:28.79Z" },
-    { url = "https://files.pythonhosted.org/packages/3c/30/e8dbb8ff4fa2559bbbb2187ba0d0d7faf728d17cb8396ecf4a898b22d3da/pynacl-1.6.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fc734c1696ffd49b40f7c1779c89ba908157c57345cf626be2e0719488a076d3", size = 808254, upload-time = "2025-11-10T16:01:37.839Z" },
-    { url = "https://files.pythonhosted.org/packages/44/f9/f5449c652f31da00249638dbab065ad4969c635119094b79b17c3a4da2ab/pynacl-1.6.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3cd787ec1f5c155dc8ecf39b1333cfef41415dc96d392f1ce288b4fe970df489", size = 1407365, upload-time = "2025-11-10T16:01:40.454Z" },
-    { url = "https://files.pythonhosted.org/packages/eb/2f/9aa5605f473b712065c0a193ebf4ad4725d7a245533f0cd7e5dcdbc78f35/pynacl-1.6.1-cp314-cp314t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6b35d93ab2df03ecb3aa506be0d3c73609a51449ae0855c2e89c7ed44abde40b", size = 843842, upload-time = "2025-11-10T16:01:30.524Z" },
-    { url = "https://files.pythonhosted.org/packages/32/8d/748f0f6956e207453da8f5f21a70885fbbb2e060d5c9d78e0a4a06781451/pynacl-1.6.1-cp314-cp314t-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dece79aecbb8f4640a1adbb81e4aa3bfb0e98e99834884a80eb3f33c7c30e708", size = 1445559, upload-time = "2025-11-10T16:01:33.663Z" },
-    { url = "https://files.pythonhosted.org/packages/78/d0/2387f0dcb0e9816f38373999e48db4728ed724d31accdd4e737473319d35/pynacl-1.6.1-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:c2228054f04bf32d558fb89bb99f163a8197d5a9bf4efa13069a7fa8d4b93fc3", size = 825791, upload-time = "2025-11-10T16:01:34.823Z" },
-    { url = "https://files.pythonhosted.org/packages/18/3d/ef6fb7eb072aaf15f280bc66f26ab97e7fc9efa50fb1927683013ef47473/pynacl-1.6.1-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:2b12f1b97346f177affcdfdc78875ff42637cb40dcf79484a97dae3448083a78", size = 1410843, upload-time = "2025-11-10T16:01:36.401Z" },
-    { url = "https://files.pythonhosted.org/packages/e3/fb/23824a017526850ee7d8a1cc4cd1e3e5082800522c10832edbbca8619537/pynacl-1.6.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e735c3a1bdfde3834503baf1a6d74d4a143920281cb724ba29fb84c9f49b9c48", size = 801140, upload-time = "2025-11-10T16:01:42.013Z" },
-    { url = "https://files.pythonhosted.org/packages/5d/d1/ebc6b182cb98603a35635b727d62f094bc201bf610f97a3bb6357fe688d2/pynacl-1.6.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:3384a454adf5d716a9fadcb5eb2e3e72cd49302d1374a60edc531c9957a9b014", size = 1371966, upload-time = "2025-11-10T16:01:43.297Z" },
-    { url = "https://files.pythonhosted.org/packages/64/f4/c9d7b6f02924b1f31db546c7bd2a83a2421c6b4a8e6a2e53425c9f2802e0/pynacl-1.6.1-cp314-cp314t-win32.whl", hash = "sha256:d8615ee34d01c8e0ab3f302dcdd7b32e2bcf698ba5f4809e7cc407c8cdea7717", size = 230482, upload-time = "2025-11-10T16:01:47.688Z" },
-    { url = "https://files.pythonhosted.org/packages/c4/2c/942477957fba22da7bf99131850e5ebdff66623418ab48964e78a7a8293e/pynacl-1.6.1-cp314-cp314t-win_amd64.whl", hash = "sha256:5f5b35c1a266f8a9ad22525049280a600b19edd1f785bccd01ae838437dcf935", size = 243232, upload-time = "2025-11-10T16:01:45.208Z" },
-    { url = "https://files.pythonhosted.org/packages/7a/0c/bdbc0d04a53b96a765ab03aa2cf9a76ad8653d70bf1665459b9a0dedaa1c/pynacl-1.6.1-cp314-cp314t-win_arm64.whl", hash = "sha256:d984c91fe3494793b2a1fb1e91429539c6c28e9ec8209d26d25041ec599ccf63", size = 187907, upload-time = "2025-11-10T16:01:46.328Z" },
-    { url = "https://files.pythonhosted.org/packages/49/41/3cfb3b4f3519f6ff62bf71bf1722547644bcfb1b05b8fdbdc300249ba113/pynacl-1.6.1-cp38-abi3-macosx_10_10_universal2.whl", hash = "sha256:a6f9fd6d6639b1e81115c7f8ff16b8dedba1e8098d2756275d63d208b0e32021", size = 387591, upload-time = "2025-11-10T16:01:49.1Z" },
-    { url = "https://files.pythonhosted.org/packages/18/21/b8a6563637799f617a3960f659513eccb3fcc655d5fc2be6e9dc6416826f/pynacl-1.6.1-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:e49a3f3d0da9f79c1bec2aa013261ab9fa651c7da045d376bd306cf7c1792993", size = 798866, upload-time = "2025-11-10T16:01:55.688Z" },
-    { url = "https://files.pythonhosted.org/packages/e8/6c/dc38033bc3ea461e05ae8f15a81e0e67ab9a01861d352ae971c99de23e7c/pynacl-1.6.1-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7713f8977b5d25f54a811ec9efa2738ac592e846dd6e8a4d3f7578346a841078", size = 1398001, upload-time = "2025-11-10T16:01:57.101Z" },
-    { url = "https://files.pythonhosted.org/packages/9f/05/3ec0796a9917100a62c5073b20c4bce7bf0fea49e99b7906d1699cc7b61b/pynacl-1.6.1-cp38-abi3-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5a3becafc1ee2e5ea7f9abc642f56b82dcf5be69b961e782a96ea52b55d8a9fc", size = 834024, upload-time = "2025-11-10T16:01:50.228Z" },
-    { url = "https://files.pythonhosted.org/packages/f0/b7/ae9982be0f344f58d9c64a1c25d1f0125c79201634efe3c87305ac7cb3e3/pynacl-1.6.1-cp38-abi3-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4ce50d19f1566c391fedc8dc2f2f5be265ae214112ebe55315e41d1f36a7f0a9", size = 1436766, upload-time = "2025-11-10T16:01:51.886Z" },
-    { url = "https://files.pythonhosted.org/packages/b4/51/b2ccbf89cf3025a02e044dd68a365cad593ebf70f532299f2c047d2b7714/pynacl-1.6.1-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:543f869140f67d42b9b8d47f922552d7a967e6c116aad028c9bfc5f3f3b3a7b7", size = 817275, upload-time = "2025-11-10T16:01:53.351Z" },
-    { url = "https://files.pythonhosted.org/packages/a8/6c/dd9ee8214edf63ac563b08a9b30f98d116942b621d39a751ac3256694536/pynacl-1.6.1-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:a2bb472458c7ca959aeeff8401b8efef329b0fc44a89d3775cffe8fad3398ad8", size = 1401891, upload-time = "2025-11-10T16:01:54.587Z" },
-    { url = "https://files.pythonhosted.org/packages/0f/c1/97d3e1c83772d78ee1db3053fd674bc6c524afbace2bfe8d419fd55d7ed1/pynacl-1.6.1-cp38-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:3206fa98737fdc66d59b8782cecc3d37d30aeec4593d1c8c145825a345bba0f0", size = 772291, upload-time = "2025-11-10T16:01:58.111Z" },
-    { url = "https://files.pythonhosted.org/packages/4d/ca/691ff2fe12f3bb3e43e8e8df4b806f6384593d427f635104d337b8e00291/pynacl-1.6.1-cp38-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:53543b4f3d8acb344f75fd4d49f75e6572fce139f4bfb4815a9282296ff9f4c0", size = 1370839, upload-time = "2025-11-10T16:01:59.252Z" },
-    { url = "https://files.pythonhosted.org/packages/30/27/06fe5389d30391fce006442246062cc35773c84fbcad0209fbbf5e173734/pynacl-1.6.1-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:319de653ef84c4f04e045eb250e6101d23132372b0a61a7acf91bac0fda8e58c", size = 791371, upload-time = "2025-11-10T16:02:01.075Z" },
-    { url = "https://files.pythonhosted.org/packages/2c/7a/e2bde8c9d39074a5aa046c7d7953401608d1f16f71e237f4bef3fb9d7e49/pynacl-1.6.1-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:262a8de6bba4aee8a66f5edf62c214b06647461c9b6b641f8cd0cb1e3b3196fe", size = 1363031, upload-time = "2025-11-10T16:02:02.656Z" },
-    { url = "https://files.pythonhosted.org/packages/dd/b6/63fd77264dae1087770a1bb414bc604470f58fbc21d83822fc9c76248076/pynacl-1.6.1-cp38-abi3-win32.whl", hash = "sha256:9fd1a4eb03caf8a2fe27b515a998d26923adb9ddb68db78e35ca2875a3830dde", size = 226585, upload-time = "2025-11-10T16:02:07.116Z" },
-    { url = "https://files.pythonhosted.org/packages/12/c8/b419180f3fdb72ab4d45e1d88580761c267c7ca6eda9a20dcbcba254efe6/pynacl-1.6.1-cp38-abi3-win_amd64.whl", hash = "sha256:a569a4069a7855f963940040f35e87d8bc084cb2d6347428d5ad20550a0a1a21", size = 238923, upload-time = "2025-11-10T16:02:04.401Z" },
-    { url = "https://files.pythonhosted.org/packages/35/76/c34426d532e4dce7ff36e4d92cb20f4cbbd94b619964b93d24e8f5b5510f/pynacl-1.6.1-cp38-abi3-win_arm64.whl", hash = "sha256:5953e8b8cfadb10889a6e7bd0f53041a745d1b3d30111386a1bb37af171e6daf", size = 183970, upload-time = "2025-11-10T16:02:05.786Z" },
+sdist = { url = "https://files.pythonhosted.org/packages/06/c6/a3124dee667a423f2c637cfd262a54d67d8ccf3e160f3c50f622a85b7723/pynacl-1.6.0.tar.gz", hash = "sha256:cb36deafe6e2bce3b286e5d1f3e1c246e0ccdb8808ddb4550bb2792f2df298f2", size = 3505641, upload-time = "2025-09-10T23:39:22.308Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/70/24/1b639176401255605ba7c2b93a7b1eb1e379e0710eca62613633eb204201/pynacl-1.6.0-cp314-cp314t-macosx_10_10_universal2.whl", hash = "sha256:f46386c24a65383a9081d68e9c2de909b1834ec74ff3013271f1bca9c2d233eb", size = 384141, upload-time = "2025-09-10T23:38:28.675Z" },
+    { url = "https://files.pythonhosted.org/packages/5e/7b/874efdf57d6bf172db0df111b479a553c3d9e8bb4f1f69eb3ffff772d6e8/pynacl-1.6.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:dea103a1afcbc333bc0e992e64233d360d393d1e63d0bc88554f572365664348", size = 808132, upload-time = "2025-09-10T23:38:38.995Z" },
+    { url = "https://files.pythonhosted.org/packages/f3/61/9b53f5913f3b75ac3d53170cdb897101b2b98afc76f4d9d3c8de5aa3ac05/pynacl-1.6.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:04f20784083014e265ad58c1b2dd562c3e35864b5394a14ab54f5d150ee9e53e", size = 1407253, upload-time = "2025-09-10T23:38:40.492Z" },
+    { url = "https://files.pythonhosted.org/packages/7c/0a/b138916b22bbf03a1bdbafecec37d714e7489dd7bcaf80cd17852f8b67be/pynacl-1.6.0-cp314-cp314t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bbcc4452a1eb10cd5217318c822fde4be279c9de8567f78bad24c773c21254f8", size = 843719, upload-time = "2025-09-10T23:38:30.87Z" },
+    { url = "https://files.pythonhosted.org/packages/01/3b/17c368197dfb2c817ce033f94605a47d0cc27901542109e640cef263f0af/pynacl-1.6.0-cp314-cp314t-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:51fed9fe1bec9e7ff9af31cd0abba179d0e984a2960c77e8e5292c7e9b7f7b5d", size = 1445441, upload-time = "2025-09-10T23:38:33.078Z" },
+    { url = "https://files.pythonhosted.org/packages/35/3c/f79b185365ab9be80cd3cd01dacf30bf5895f9b7b001e683b369e0bb6d3d/pynacl-1.6.0-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:10d755cf2a455d8c0f8c767a43d68f24d163b8fe93ccfaabfa7bafd26be58d73", size = 825691, upload-time = "2025-09-10T23:38:34.832Z" },
+    { url = "https://files.pythonhosted.org/packages/f7/1f/8b37d25e95b8f2a434a19499a601d4d272b9839ab8c32f6b0fc1e40c383f/pynacl-1.6.0-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:536703b8f90e911294831a7fbcd0c062b837f3ccaa923d92a6254e11178aaf42", size = 1410726, upload-time = "2025-09-10T23:38:36.893Z" },
+    { url = "https://files.pythonhosted.org/packages/bd/93/5a4a4cf9913014f83d615ad6a2df9187330f764f606246b3a744c0788c03/pynacl-1.6.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:6b08eab48c9669d515a344fb0ef27e2cbde847721e34bba94a343baa0f33f1f4", size = 801035, upload-time = "2025-09-10T23:38:42.109Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/60/40da6b0fe6a4d5fd88f608389eb1df06492ba2edca93fca0b3bebff9b948/pynacl-1.6.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5789f016e08e5606803161ba24de01b5a345d24590a80323379fc4408832d290", size = 1371854, upload-time = "2025-09-10T23:38:44.16Z" },
+    { url = "https://files.pythonhosted.org/packages/44/b2/37ac1d65008f824cba6b5bf68d18b76d97d0f62d7a032367ea69d4a187c8/pynacl-1.6.0-cp314-cp314t-win32.whl", hash = "sha256:4853c154dc16ea12f8f3ee4b7e763331876316cc3a9f06aeedf39bcdca8f9995", size = 230345, upload-time = "2025-09-10T23:38:48.276Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/5a/9234b7b45af890d02ebee9aae41859b9b5f15fb4a5a56d88e3b4d1659834/pynacl-1.6.0-cp314-cp314t-win_amd64.whl", hash = "sha256:347dcddce0b4d83ed3f32fd00379c83c425abee5a9d2cd0a2c84871334eaff64", size = 243103, upload-time = "2025-09-10T23:38:45.503Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/2c/c1a0f19d720ab0af3bc4241af2bdf4d813c3ecdcb96392b5e1ddf2d8f24f/pynacl-1.6.0-cp314-cp314t-win_arm64.whl", hash = "sha256:2d6cd56ce4998cb66a6c112fda7b1fdce5266c9f05044fa72972613bef376d15", size = 187778, upload-time = "2025-09-10T23:38:46.731Z" },
+    { url = "https://files.pythonhosted.org/packages/63/37/87c72df19857c5b3b47ace6f211a26eb862ada495cc96daa372d96048fca/pynacl-1.6.0-cp38-abi3-macosx_10_10_universal2.whl", hash = "sha256:f4b3824920e206b4f52abd7de621ea7a44fd3cb5c8daceb7c3612345dfc54f2e", size = 382610, upload-time = "2025-09-10T23:38:49.459Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/64/3ce958a5817fd3cc6df4ec14441c43fd9854405668d73babccf77f9597a3/pynacl-1.6.0-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:16dd347cdc8ae0b0f6187a2608c0af1c8b7ecbbe6b4a06bff8253c192f696990", size = 798744, upload-time = "2025-09-10T23:38:58.531Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/8a/3f0dd297a0a33fa3739c255feebd0206bb1df0b44c52fbe2caf8e8bc4425/pynacl-1.6.0-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:16c60daceee88d04f8d41d0a4004a7ed8d9a5126b997efd2933e08e93a3bd850", size = 1397879, upload-time = "2025-09-10T23:39:00.44Z" },
+    { url = "https://files.pythonhosted.org/packages/41/94/028ff0434a69448f61348d50d2c147dda51aabdd4fbc93ec61343332174d/pynacl-1.6.0-cp38-abi3-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:25720bad35dfac34a2bcdd61d9e08d6bfc6041bebc7751d9c9f2446cf1e77d64", size = 833907, upload-time = "2025-09-10T23:38:50.936Z" },
+    { url = "https://files.pythonhosted.org/packages/52/bc/a5cff7f8c30d5f4c26a07dfb0bcda1176ab8b2de86dda3106c00a02ad787/pynacl-1.6.0-cp38-abi3-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8bfaa0a28a1ab718bad6239979a5a57a8d1506d0caf2fba17e524dbb409441cf", size = 1436649, upload-time = "2025-09-10T23:38:52.783Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/20/c397be374fd5d84295046e398de4ba5f0722dc14450f65db76a43c121471/pynacl-1.6.0-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:ef214b90556bb46a485b7da8258e59204c244b1b5b576fb71848819b468c44a7", size = 817142, upload-time = "2025-09-10T23:38:54.4Z" },
+    { url = "https://files.pythonhosted.org/packages/12/30/5efcef3406940cda75296c6d884090b8a9aad2dcc0c304daebb5ae99fb4a/pynacl-1.6.0-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:49c336dd80ea54780bcff6a03ee1a476be1612423010472e60af83452aa0f442", size = 1401794, upload-time = "2025-09-10T23:38:56.614Z" },
+    { url = "https://files.pythonhosted.org/packages/be/e1/a8fe1248cc17ccb03b676d80fa90763760a6d1247da434844ea388d0816c/pynacl-1.6.0-cp38-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:f3482abf0f9815e7246d461fab597aa179b7524628a4bc36f86a7dc418d2608d", size = 772161, upload-time = "2025-09-10T23:39:01.93Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/76/8a62702fb657d6d9104ce13449db221a345665d05e6a3fdefb5a7cafd2ad/pynacl-1.6.0-cp38-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:140373378e34a1f6977e573033d1dd1de88d2a5d90ec6958c9485b2fd9f3eb90", size = 1370720, upload-time = "2025-09-10T23:39:03.531Z" },
+    { url = "https://files.pythonhosted.org/packages/6d/38/9e9e9b777a1c4c8204053733e1a0269672c0bd40852908c9ad6b6eaba82c/pynacl-1.6.0-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:6b393bc5e5a0eb86bb85b533deb2d2c815666665f840a09e0aa3362bb6088736", size = 791252, upload-time = "2025-09-10T23:39:05.058Z" },
+    { url = "https://files.pythonhosted.org/packages/63/ef/d972ce3d92ae05c9091363cf185e8646933f91c376e97b8be79ea6e96c22/pynacl-1.6.0-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:4a25cfede801f01e54179b8ff9514bd7b5944da560b7040939732d1804d25419", size = 1362910, upload-time = "2025-09-10T23:39:06.924Z" },
+    { url = "https://files.pythonhosted.org/packages/35/2c/ee0b373a1861f66a7ca8bdb999331525615061320dd628527a50ba8e8a60/pynacl-1.6.0-cp38-abi3-win32.whl", hash = "sha256:dcdeb41c22ff3c66eef5e63049abf7639e0db4edee57ba70531fc1b6b133185d", size = 226461, upload-time = "2025-09-10T23:39:11.894Z" },
+    { url = "https://files.pythonhosted.org/packages/75/f7/41b6c0b9dd9970173b6acc026bab7b4c187e4e5beef2756d419ad65482da/pynacl-1.6.0-cp38-abi3-win_amd64.whl", hash = "sha256:cf831615cc16ba324240de79d925eacae8265b7691412ac6b24221db157f6bd1", size = 238802, upload-time = "2025-09-10T23:39:08.966Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/0f/462326910c6172fa2c6ed07922b22ffc8e77432b3affffd9e18f444dbfbb/pynacl-1.6.0-cp38-abi3-win_arm64.whl", hash = "sha256:84709cea8f888e618c21ed9a0efdb1a59cc63141c403db8bf56c469b71ad56f2", size = 183846, upload-time = "2025-09-10T23:39:10.552Z" },
+]
+
+[[package]]
+name = "pynvml"
+version = "13.0.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "nvidia-ml-py" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/5c/57/da7dc63a79f59e082e26a66ac02d87d69ea316b35b35b7a00d82f3ce3d2f/pynvml-13.0.1.tar.gz", hash = "sha256:1245991d9db786b4d2f277ce66869bd58f38ac654e38c9397d18f243c8f6e48f", size = 35226, upload-time = "2025-09-05T20:33:25.377Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d7/4a/cac76c174bb439a0c46c9a4413fcbea5c6cabfb01879f7bbdb9fdfaed76c/pynvml-13.0.1-py3-none-any.whl", hash = "sha256:e2b20e0a501eeec951e2455b7ab444759cf048e0e13a57b08049fa2775266aa8", size = 28810, upload-time = "2025-09-05T20:33:24.13Z" },
 ]
 
 [[package]]
@@ -4251,12 +4537,12 @@ name = "pytest"
 version = "8.3.5"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "colorama", marker = "sys_platform == 'win32'" },
-    { name = "exceptiongroup", marker = "python_full_version < '3.11'" },
+    { name = "colorama", marker = "sys_platform == 'win32' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "exceptiongroup", marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "iniconfig" },
     { name = "packaging" },
     { name = "pluggy" },
-    { name = "tomli", marker = "python_full_version < '3.11'" },
+    { name = "tomli", marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/ae/3c/c9d525a414d506893f0cd8a8d0de7706446213181570cdbd766691164e40/pytest-8.3.5.tar.gz", hash = "sha256:f4efe70cc14e511565ac476b57c279e12a855b11f48f212af1080ef2263d3845", size = 1450891, upload-time = "2025-03-02T12:54:54.503Z" }
 wheels = [
@@ -4265,16 +4551,16 @@ wheels = [
 
 [[package]]
 name = "pytest-asyncio"
-version = "1.3.0"
+version = "1.2.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "backports-asyncio-runner", marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "pytest" },
     { name = "typing-extensions", marker = "python_full_version < '3.13' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/90/2c/8af215c0f776415f3590cac4f9086ccefd6fd463befeae41cd4d3f193e5a/pytest_asyncio-1.3.0.tar.gz", hash = "sha256:d7f52f36d231b80ee124cd216ffb19369aa168fc10095013c6b014a34d3ee9e5", size = 50087, upload-time = "2025-11-10T16:07:47.256Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/42/86/9e3c5f48f7b7b638b216e4b9e645f54d199d7abbbab7a64a13b4e12ba10f/pytest_asyncio-1.2.0.tar.gz", hash = "sha256:c609a64a2a8768462d0c99811ddb8bd2583c33fd33cf7f21af1c142e824ffb57", size = 50119, upload-time = "2025-09-12T07:33:53.816Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/e5/35/f8b19922b6a25bc0880171a2f1a003eaeb93657475193ab516fd87cac9da/pytest_asyncio-1.3.0-py3-none-any.whl", hash = "sha256:611e26147c7f77640e6d0a92a38ed17c3e9848063698d5c93d5aa7aa11cebff5", size = 15075, upload-time = "2025-11-10T16:07:45.537Z" },
+    { url = "https://files.pythonhosted.org/packages/04/93/2fa34714b7a4ae72f2f8dad66ba17dd9a2c793220719e736dda28b7aec27/pytest_asyncio-1.2.0-py3-none-any.whl", hash = "sha256:8e17ae5e46d8e7efe51ab6494dd2010f4ca8dae51652aa3c8d55acf50bfb2e99", size = 15095, upload-time = "2025-09-12T07:33:52.639Z" },
 ]
 
 [[package]]
@@ -4511,7 +4797,7 @@ source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "attrs" },
     { name = "rpds-py" },
-    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
+    { name = "typing-extensions", marker = "python_full_version < '3.13' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/22/f5/df4e9027acead3ecc63e50fe1e36aca1523e1719559c499951bb4b53188f/referencing-0.37.0.tar.gz", hash = "sha256:44aefc3142c5b842538163acb373e24cce6632bd54bdb01b21ad5863489f50d8", size = 78036, upload-time = "2025-10-13T15:30:48.871Z" }
 wheels = [
@@ -4922,11 +5208,15 @@ name = "scipy"
 version = "1.16.3"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
-    "python_full_version >= '3.14' and sys_platform == 'linux'",
-    "python_full_version == '3.13.*' and sys_platform == 'linux'",
+    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform == 'linux'",
+    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform == 'linux'",
+    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform == 'linux'",
+    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform == 'linux'",
     "python_full_version == '3.12.*' and sys_platform == 'linux'",
-    "python_full_version >= '3.14' and sys_platform != 'linux'",
-    "python_full_version == '3.13.*' and sys_platform != 'linux'",
+    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform != 'linux'",
+    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform != 'linux'",
+    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform != 'linux'",
+    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform != 'linux'",
     "python_full_version == '3.12.*' and sys_platform != 'linux'",
     "python_full_version == '3.11.*' and sys_platform == 'linux'",
     "python_full_version == '3.11.*' and sys_platform != 'linux'",
@@ -5064,15 +5354,15 @@ wheels = [
 
 [[package]]
 name = "sentry-sdk"
-version = "2.44.0"
+version = "2.43.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "certifi" },
     { name = "urllib3" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/62/26/ff7d93a14a0ec309021dca2fb7c62669d4f6f5654aa1baf60797a16681e0/sentry_sdk-2.44.0.tar.gz", hash = "sha256:5b1fe54dfafa332e900b07dd8f4dfe35753b64e78e7d9b1655a28fd3065e2493", size = 371464, upload-time = "2025-11-11T09:35:56.075Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/b3/18/09875b4323b03ca9025bae7e6539797b27e4fc032998a466b4b9c3d24653/sentry_sdk-2.43.0.tar.gz", hash = "sha256:52ed6e251c5d2c084224d73efee56b007ef5c2d408a4a071270e82131d336e20", size = 368953, upload-time = "2025-10-29T11:26:08.156Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/a8/56/c16bda4d53012c71fa1b588edde603c6b455bc8206bf6de7b83388fcce75/sentry_sdk-2.44.0-py2.py3-none-any.whl", hash = "sha256:9e36a0372b881e8f92fdbff4564764ce6cec4b7f25424d0a3a8d609c9e4651a7", size = 402352, upload-time = "2025-11-11T09:35:54.1Z" },
+    { url = "https://files.pythonhosted.org/packages/69/31/8228fa962f7fd8814d634e4ebece8780e2cdcfbdf0cd2e14d4a6861a7cd5/sentry_sdk-2.43.0-py2.py3-none-any.whl", hash = "sha256:4aacafcf1756ef066d359ae35030881917160ba7f6fc3ae11e0e58b09edc2d5d", size = 400997, upload-time = "2025-10-29T11:26:05.77Z" },
 ]
 
 [[package]]
@@ -5104,11 +5394,11 @@ wheels = [
 
 [[package]]
 name = "slack-sdk"
-version = "3.38.0"
+version = "3.37.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/69/90/8c830172b1847bd3084a2cf39aee9d522e2a55d1c3d4e2b066001e9765ee/slack_sdk-3.38.0.tar.gz", hash = "sha256:73f43ef535929c6034982434aba4d5fd04db3b40f4e0cd14c3abfd56419d181d", size = 240091, upload-time = "2025-11-13T16:05:20.905Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/8e/c2/0a174a155623d7dc3ed4d1360cdf755590acdc2c3fc9ce0d2340f468909f/slack_sdk-3.37.0.tar.gz", hash = "sha256:242d6cffbd9e843af807487ff04853189b812081aeaa22f90a8f159f20220ed9", size = 241612, upload-time = "2025-10-06T23:07:20.856Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/3d/fc/0563891d3d1f7c503b2daf0b5b9ae9e605ea112272661897c150fd3d69cb/slack_sdk-3.38.0-py2.py3-none-any.whl", hash = "sha256:6c5e908abd68e97373a88437ef2fa3ff7a4c356807bbc41fcd7d6cbbfa2034d6", size = 302796, upload-time = "2025-11-13T16:05:18.856Z" },
+    { url = "https://files.pythonhosted.org/packages/07/fd/a502ee24d8c7d12a8f749878ae0949b8eeb50aeac22dc5a613d417a256d0/slack_sdk-3.37.0-py2.py3-none-any.whl", hash = "sha256:e108a0836eafda74d8a95e76c12c2bcb010e645d504d8497451e4c7ebb229c87", size = 302751, upload-time = "2025-10-06T23:07:19.542Z" },
 ]
 
 [[package]]
@@ -5212,14 +5502,44 @@ name = "sphinx"
 version = "8.2.3"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
-    "python_full_version >= '3.14' and sys_platform == 'linux'",
-    "python_full_version == '3.13.*' and sys_platform == 'linux'",
-    "python_full_version == '3.12.*' and sys_platform == 'linux'",
-    "python_full_version >= '3.14' and sys_platform != 'linux'",
-    "python_full_version == '3.13.*' and sys_platform != 'linux'",
-    "python_full_version == '3.12.*' and sys_platform != 'linux'",
-    "python_full_version == '3.11.*' and sys_platform == 'linux'",
-    "python_full_version == '3.11.*' and sys_platform != 'linux'",
+    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.12.*' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.12.*' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.11.*' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.11.*' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.12.*' and sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.12.*' and sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.11.*' and sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.11.*' and sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.12.*' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.12.*' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.12.*' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.12.*' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.11.*' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.11.*' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
 ]
 dependencies = [
     { name = "alabaster", marker = "python_full_version >= '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
@@ -5271,14 +5591,44 @@ name = "sphinx-autobuild"
 version = "2025.8.25"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
-    "python_full_version >= '3.14' and sys_platform == 'linux'",
-    "python_full_version == '3.13.*' and sys_platform == 'linux'",
-    "python_full_version == '3.12.*' and sys_platform == 'linux'",
-    "python_full_version >= '3.14' and sys_platform != 'linux'",
-    "python_full_version == '3.13.*' and sys_platform != 'linux'",
-    "python_full_version == '3.12.*' and sys_platform != 'linux'",
-    "python_full_version == '3.11.*' and sys_platform == 'linux'",
-    "python_full_version == '3.11.*' and sys_platform != 'linux'",
+    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.12.*' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.12.*' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.11.*' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.11.*' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.12.*' and sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.12.*' and sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.11.*' and sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.11.*' and sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.12.*' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.12.*' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.12.*' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.12.*' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.11.*' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.11.*' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
 ]
 dependencies = [
     { name = "colorama", marker = "python_full_version >= '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
@@ -5376,24 +5726,15 @@ wheels = [
 
 [[package]]
 name = "starlette"
-version = "0.50.0"
+version = "0.49.3"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "anyio" },
     { name = "typing-extensions", marker = "python_full_version < '3.13' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/ba/b8/73a0e6a6e079a9d9cfa64113d771e421640b6f679a52eeb9b32f72d871a1/starlette-0.50.0.tar.gz", hash = "sha256:a2a17b22203254bcbc2e1f926d2d55f3f9497f769416b3190768befe598fa3ca", size = 2646985, upload-time = "2025-11-01T15:25:27.516Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/d9/52/1064f510b141bd54025f9b55105e26d1fa970b9be67ad766380a3c9b74b0/starlette-0.50.0-py3-none-any.whl", hash = "sha256:9e5391843ec9b6e472eed1365a78c8098cfceb7a74bfd4d6b1c0c0095efb3bca", size = 74033, upload-time = "2025-11-01T15:25:25.461Z" },
-]
-
-[[package]]
-name = "strenum"
-version = "0.4.15"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/85/ad/430fb60d90e1d112a62ff57bdd1f286ec73a2a0331272febfddd21f330e1/StrEnum-0.4.15.tar.gz", hash = "sha256:878fb5ab705442070e4dd1929bb5e2249511c0bcf2b0eeacf3bcd80875c82eff", size = 23384, upload-time = "2023-06-29T22:02:58.399Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/de/1a/608df0b10b53b0beb96a37854ee05864d182ddd4b1156a22f1ad3860425a/starlette-0.49.3.tar.gz", hash = "sha256:1c14546f299b5901a1ea0e34410575bc33bbd741377a10484a54445588d00284", size = 2655031, upload-time = "2025-11-01T15:12:26.13Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/81/69/297302c5f5f59c862faa31e6cb9a4cd74721cd1e052b38e464c5b402df8b/StrEnum-0.4.15-py3-none-any.whl", hash = "sha256:a30cda4af7cc6b5bf52c8055bc4bf4b2b6b14a93b574626da33df53cf7740659", size = 8851, upload-time = "2023-06-29T22:02:56.947Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/e0/021c772d6a662f43b63044ab481dc6ac7592447605b5b35a957785363122/starlette-0.49.3-py3-none-any.whl", hash = "sha256:b579b99715fdc2980cf88c8ec96d3bf1ce16f5a8051a7c2b84ef9b1cdecaea2f", size = 74340, upload-time = "2025-11-01T15:12:24.387Z" },
 ]
 
 [[package]]
@@ -5452,17 +5793,18 @@ name = "tensorstore"
 version = "0.1.74"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
-    "python_full_version >= '3.14' and sys_platform == 'linux'",
-    "python_full_version == '3.13.*' and sys_platform == 'linux'",
-    "python_full_version >= '3.14' and sys_platform != 'linux'",
-    "python_full_version == '3.13.*' and sys_platform != 'linux'",
-    "python_full_version < '3.11' and sys_platform == 'linux'",
-    "python_full_version < '3.11' and sys_platform != 'linux'",
+    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform == 'linux'",
+    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform == 'linux'",
+    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform == 'linux'",
+    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform == 'linux'",
+    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform != 'linux'",
+    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform != 'linux'",
+    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform != 'linux'",
+    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform != 'linux'",
 ]
 dependencies = [
-    { name = "ml-dtypes", version = "0.4.1", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.13' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.13' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "ml-dtypes", version = "0.5.3", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "numpy", marker = "python_full_version < '3.11' or python_full_version >= '3.13'" },
+    { name = "ml-dtypes", version = "0.4.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.13'" },
+    { name = "numpy", marker = "python_full_version >= '3.13'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/3c/b9/ea25aba62c688a87d7d7d9cc5926d602e2f9e84fa72586825486fb180b7e/tensorstore-0.1.74.tar.gz", hash = "sha256:a062875f27283d30ce4959c408c253ecb336fce8e3f9837c064e3d30cda79203", size = 6795605, upload-time = "2025-04-24T15:42:18.829Z" }
 wheels = [
@@ -5490,40 +5832,42 @@ wheels = [
 
 [[package]]
 name = "tensorstore"
-version = "0.1.79"
+version = "0.1.78"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
     "python_full_version == '3.12.*' and sys_platform == 'linux'",
     "python_full_version == '3.12.*' and sys_platform != 'linux'",
     "python_full_version == '3.11.*' and sys_platform == 'linux'",
     "python_full_version == '3.11.*' and sys_platform != 'linux'",
+    "python_full_version < '3.11' and sys_platform == 'linux'",
+    "python_full_version < '3.11' and sys_platform != 'linux'",
 ]
 dependencies = [
-    { name = "ml-dtypes", version = "0.5.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' and python_full_version < '3.13'" },
-    { name = "numpy", marker = "python_full_version >= '3.11' and python_full_version < '3.13'" },
+    { name = "ml-dtypes", version = "0.5.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.13'" },
+    { name = "numpy", marker = "python_full_version < '3.13'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/26/2c/50ab489a0862ca88d2d766130a6fec45ccd5174f0e04081d8b7b07a8aedd/tensorstore-0.1.79.tar.gz", hash = "sha256:8dad44a8a7f2952a5d0030a8bd868b3cfdff048bd40ab53e7226f3d8b0881c5e", size = 7075782, upload-time = "2025-11-11T22:05:23.824Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/68/a9/1695d7ea197c4568c2f02f34b203eef702ec8080422331f00a65c6fb2a37/tensorstore-0.1.79-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:11a2c62694ea9c21770bc5a09938d3d15c4b9662b738ae6e1e513c26ed96251a", size = 16466511, upload-time = "2025-11-11T22:04:18.614Z" },
-    { url = "https://files.pythonhosted.org/packages/db/0e/5ce8a615c7f9ad7cf8ed4ac6e182fe0ef46fd06fef89757e49ba84a6ba9e/tensorstore-0.1.79-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5e152d334bf34fbabdfe8e5bc35b87d1f9947065924ff83c29e659308b36e948", size = 14499810, upload-time = "2025-11-11T22:04:21.725Z" },
-    { url = "https://files.pythonhosted.org/packages/c0/29/2cb9552138fe84ab29421489121350e4af0502eafff31ccd9017490be0d8/tensorstore-0.1.79-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c4230b8fd29795e88e441f749d881973eca8dadf33c5262b367839fb8891f79b", size = 18937510, upload-time = "2025-11-11T22:04:24.221Z" },
-    { url = "https://files.pythonhosted.org/packages/42/70/d2a672a93faebdd176cd8541405cd5614b14d3d8dc812fbeaf2cf46d390a/tensorstore-0.1.79-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:83072ee0e551d6dca582e154b64c8b8066d276ec0759784e3149c28212a61f18", size = 20910324, upload-time = "2025-11-11T22:04:26.769Z" },
-    { url = "https://files.pythonhosted.org/packages/91/d5/7958cbfb614c4ffa5070ae9575874d46937067c0d81a7739e67fb1d62de5/tensorstore-0.1.79-cp311-cp311-win_amd64.whl", hash = "sha256:6c98c6b74c00e00eba7969292144e471d5c45d67088f0dc08e3a4c60a15ee191", size = 13206191, upload-time = "2025-11-11T22:04:29.254Z" },
-    { url = "https://files.pythonhosted.org/packages/f1/a2/a77be16b4a882ace36da0748305795f35306bdad568472f208bd89b96b9d/tensorstore-0.1.79-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:71aa9b45436d888c37b965f7b71195916d15438119b7dccb66a3b0776bfba367", size = 16485740, upload-time = "2025-11-11T22:04:33.478Z" },
-    { url = "https://files.pythonhosted.org/packages/7a/e4/7fe268ec41aa70b71a1c56b1ec83346fbcbf12f4bfbefc79d14fb9c03408/tensorstore-0.1.79-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:108c0e867aa2c87d4982cc6325a2de0c4f5bd63c2bea18adb193a370c40594ce", size = 14508736, upload-time = "2025-11-11T22:04:38.613Z" },
-    { url = "https://files.pythonhosted.org/packages/5a/f1/b1248dae02598ce534834413e841f915a32ab185c36ecd05e4c67bdc8d19/tensorstore-0.1.79-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:debd435042c00be68ba1fb3cf59325a7babb3f4a3cf4744c87dde346802cbbb4", size = 18947817, upload-time = "2025-11-11T22:04:40.768Z" },
-    { url = "https://files.pythonhosted.org/packages/87/4a/60e234147570e21bbab4ac70ab79dd794a5ef9a4945d36c34c1914a73205/tensorstore-0.1.79-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:608f7178ec6e4e4a3c26545b0a44f44bf83438d04bf2d960cd0e7699eaa99ef6", size = 20929832, upload-time = "2025-11-11T22:04:43.613Z" },
-    { url = "https://files.pythonhosted.org/packages/f8/48/0531868bce12a2f520002e810d4200ec6f01ba33a2f27b6bd7289fbc197b/tensorstore-0.1.79-cp312-cp312-win_amd64.whl", hash = "sha256:a071c6c255b7e412957a6aa563bc4250242c7894edad06ae6358e3d30b7d88ce", size = 13211970, upload-time = "2025-11-11T22:04:46.179Z" },
-    { url = "https://files.pythonhosted.org/packages/fa/0b/54a44e55836d8e8f576343134c0e3db71c6c837d39a0ac44699aba5b01df/tensorstore-0.1.79-cp313-cp313-macosx_10_14_x86_64.whl", hash = "sha256:1e8e2d098829919caac6a62cf568902e34789069ceddb28497d6e36ebcb95c0b", size = 16485855, upload-time = "2025-11-11T22:04:48.734Z" },
-    { url = "https://files.pythonhosted.org/packages/04/59/cadb9a45896d480882476df4759cda1659c70669aff87a4d5a4a07ded084/tensorstore-0.1.79-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:29cf4336153af136ac8ac528e2ed46df19367edae7e14e37bca1a8b7c4848ef2", size = 14508277, upload-time = "2025-11-11T22:04:50.775Z" },
-    { url = "https://files.pythonhosted.org/packages/e6/cb/3647bdd03c7692882ebc10c19df9ede49f290c216b2906f785edbdb53ef1/tensorstore-0.1.79-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:94d8fc9df1721b0287046aca7209fd5040889cad4202e7b73a1fdb77cd9b71c6", size = 18949307, upload-time = "2025-11-11T22:04:53.145Z" },
-    { url = "https://files.pythonhosted.org/packages/20/a0/f91ac492cf2ee9f7541aefaaed4ad1258e73e33f3cd3e06cdce5859431db/tensorstore-0.1.79-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c9f2dc3342e4686af98f6e259dc9fb377f1bf657b649c247bf6647bbe4f98090", size = 20930427, upload-time = "2025-11-11T22:04:55.353Z" },
-    { url = "https://files.pythonhosted.org/packages/69/a6/752fd11747eb9fead715b02d389da7fb180a56172b885de0b48b20237d1e/tensorstore-0.1.79-cp313-cp313-win_amd64.whl", hash = "sha256:0fd6165f3df49abc7c9de029b2b72d74bebd2ff2481a5ced003607eb61c56d3e", size = 13212196, upload-time = "2025-11-11T22:05:00.451Z" },
-    { url = "https://files.pythonhosted.org/packages/46/57/1649019893accb3f195780fec55b8bf6793343faf140040bc73f1c28d6a5/tensorstore-0.1.79-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:6f8f5a940eab434a951c2dadcc7c0516c7bef6d8b7a7144054f7a0c56152b5f5", size = 16488849, upload-time = "2025-11-11T22:05:03.014Z" },
-    { url = "https://files.pythonhosted.org/packages/bf/23/2668cb120e855a6a7a8a5eb0eba30e2e7020da932a4d3fa13c6ee3c41f9f/tensorstore-0.1.79-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:97756d2cba3c5ce21e15602c2af5a02521cc0ecda7f9fb6d18da2f3bd51827f4", size = 14511448, upload-time = "2025-11-11T22:05:05.58Z" },
-    { url = "https://files.pythonhosted.org/packages/6a/0e/c38f079f3933cc284aab53d52976f6cb4f1ad43bb6a704ac27e0b710f176/tensorstore-0.1.79-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:847982652273fb7b2d694b789205747aaf3e50ae64738c5cb7b5eb03d86a9947", size = 18949282, upload-time = "2025-11-11T22:05:07.562Z" },
-    { url = "https://files.pythonhosted.org/packages/6f/99/03479deea5bfd27a0d8a8c75d5f1d85417a7bbc9c6c7a90fb85b4a4e347a/tensorstore-0.1.79-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7af9422269c2bfcdecf9dd55309060665ab9c2d7f6c892377ed32c032400feea", size = 20931601, upload-time = "2025-11-11T22:05:10.098Z" },
-    { url = "https://files.pythonhosted.org/packages/26/36/2617edf6c6d6fc73b3ff96d9d0b97332adf0d0c56fa2014a226bf4f7dfa6/tensorstore-0.1.79-cp314-cp314-win_amd64.whl", hash = "sha256:bbd8c1ab7d2e3c03ded3d40bb373ee9a67668e33a564484927865ce43b210386", size = 13599766, upload-time = "2025-11-11T22:05:12.265Z" },
+sdist = { url = "https://files.pythonhosted.org/packages/9f/ee/05eb424437f4db63331c90e4605025eedc0f71da3faff97161d5d7b405af/tensorstore-0.1.78.tar.gz", hash = "sha256:e26074ffe462394cf54197eb76d6569b500f347573cd74da3f4dd5f510a4ad7c", size = 6913502, upload-time = "2025-10-06T17:44:29.649Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/7a/1e/77eff7bb320f72a9cb6e9a19eee4d78bee4a6ac1c28ceef60df28b4ab670/tensorstore-0.1.78-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:f1bc58164ad964d9cc298d20b62ca704ab6241639a21015e47ce6ea5b5cae27f", size = 15710776, upload-time = "2025-10-06T17:43:47.469Z" },
+    { url = "https://files.pythonhosted.org/packages/55/df/f74f8004b246006ae03c90c28e32d71eb8a86a5b325d2d84dda327babdcc/tensorstore-0.1.78-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1910101ea85b6507958da28628ef53712c5311df19a795f449604f82bae6a24b", size = 13771121, upload-time = "2025-10-06T17:43:49.88Z" },
+    { url = "https://files.pythonhosted.org/packages/be/b8/ab0d0b2afc53f47fbfd95c10d9ae21d393019aca45c8513657b8d7002f1f/tensorstore-0.1.78-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1e92195db0c8c3ca749f24b1e930ab93382ac27430ac4ad2e3f53fc8f739323f", size = 18154513, upload-time = "2025-10-06T17:43:51.694Z" },
+    { url = "https://files.pythonhosted.org/packages/f7/ea/c1b4cc6a089a39f63e8d189a55c715e393995628b12b4c8560b3ae4874ba/tensorstore-0.1.78-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:90570b867f9100f7405e4116c73910d0bd283a101500ea5680c5a8a881ea05c6", size = 20048971, upload-time = "2025-10-06T17:43:54.358Z" },
+    { url = "https://files.pythonhosted.org/packages/58/2a/7167087885b12473f20ae4fddb9a8feeed6bd44ea8d42c73ae29ad3d1591/tensorstore-0.1.78-cp310-cp310-win_amd64.whl", hash = "sha256:4de9d4ee93d712cb665890af0738f4d74cac3b9b9a0492d477a3ee63fbbf445b", size = 12707793, upload-time = "2025-10-06T17:43:56.405Z" },
+    { url = "https://files.pythonhosted.org/packages/33/b1/45070c393586306cef44c7bfc47ed2eddfb8930e648aaa847f615e3ae797/tensorstore-0.1.78-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:1c91e7ff93561612bd9868f3ee56702b0e4fecb45079a4c152dff9a6aa751913", size = 15712387, upload-time = "2025-10-06T17:43:58.458Z" },
+    { url = "https://files.pythonhosted.org/packages/a0/d8/c045da71460301f37704e1ab1eec9e7e480dc711dbd281d86dc3d792c50e/tensorstore-0.1.78-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:781e123d392b2d9115e94b01849797a4540f54cd6d34c6ee32b9491f2f2a399c", size = 13773158, upload-time = "2025-10-06T17:44:00.285Z" },
+    { url = "https://files.pythonhosted.org/packages/5b/e8/2b0d48100816649ec516fca31d02ad8028c090324e77b1c309c09a172350/tensorstore-0.1.78-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e650d363ad43754626a828a242785e6359a59fedb171276e9a0c66c0bd963cd4", size = 18154388, upload-time = "2025-10-06T17:44:02.428Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/a1/d9be82de18afe764c0fc7fb21b3d3bb0ad12845d202861fff7189afdb99d/tensorstore-0.1.78-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:33fed0ffa7a42ad24ce203486cf039f81b211723b45bd54859ba237a9d3aedb9", size = 20050304, upload-time = "2025-10-06T17:44:04.673Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/fc/b980958f91a9780e4dbc1038da723d2ad91307dbe30563359606f78926e5/tensorstore-0.1.78-cp311-cp311-win_amd64.whl", hash = "sha256:c02df3d8de4703d9ee42c8f620b2288f41c19a0fd5ffa907b72a736678e22188", size = 12708115, upload-time = "2025-10-06T17:44:06.574Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/5f/5853c04bebaed2d3c0ada9245328ffe3fff8b0f0f1c64f4776f67b42033f/tensorstore-0.1.78-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:ce375a8f6621cdb94638b9cdc5266519db16a58353d4c6920e8b9d6bdd419e21", size = 15727539, upload-time = "2025-10-06T17:44:08.631Z" },
+    { url = "https://files.pythonhosted.org/packages/a2/e2/f67fcca8f90258c1cf1326aa366fe10f559f4c60102f53fdcc6614159c45/tensorstore-0.1.78-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:82f68fa5a3b4c84365a667ea0a7465a53d5d969c4d3909ac990f314d1569ffc3", size = 13780753, upload-time = "2025-10-06T17:44:10.488Z" },
+    { url = "https://files.pythonhosted.org/packages/57/de/95013db6ef3b6a14b4237b95184c21becdf56d16605bf42903bb141f729e/tensorstore-0.1.78-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5dc0bd6361d73e3f67d70980f96f4e8bcbd8e810b5475a01333ca9c37f0785a5", size = 18157446, upload-time = "2025-10-06T17:44:12.831Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/75/6e7cef68cab3a672c6668cc80c399ae6626a498a3ef04b35b3704b41e9cc/tensorstore-0.1.78-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:75a17cef99f05fad9cc6fda37f1a1868d5f1502fd577af13174382931481c948", size = 20060211, upload-time = "2025-10-06T17:44:15.189Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/46/4ff3e395c44348c7442523c8ddd8ccc72d9ac81838e7a8f6afdd92131c3e/tensorstore-0.1.78-cp312-cp312-win_amd64.whl", hash = "sha256:56271d4652a7cb445879089f620af47801c091765d35a005505d6bfb8d00c535", size = 12711274, upload-time = "2025-10-06T17:44:17.586Z" },
+    { url = "https://files.pythonhosted.org/packages/18/36/cfb5a2acf9005896c88f80b93c2aee42f00fab9d0045369fef6e1b297242/tensorstore-0.1.78-cp313-cp313-macosx_10_14_x86_64.whl", hash = "sha256:8a1d0ae7996c80f2e623be5b8cfbc32a307d08dfef3d2dcb455f592908ecd46d", size = 15727334, upload-time = "2025-10-06T17:44:19.93Z" },
+    { url = "https://files.pythonhosted.org/packages/54/cd/d1bcc3aab5be4298616dbc060b5aa2012b686270aaa16a9579c7945d0a1c/tensorstore-0.1.78-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:311846cfb2d644cd4a7861005e521a79816093e76d7924c83de5d06ca323067e", size = 13780722, upload-time = "2025-10-06T17:44:21.822Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/3b/b0bb4440a9d67859b1abb367e436c62b0a27991dd7109f20be9dabff488f/tensorstore-0.1.78-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:630538a66eb9964bd2975c4e09ae83be9984f2e4ebd5f7969983137bfda92071", size = 18157269, upload-time = "2025-10-06T17:44:23.743Z" },
+    { url = "https://files.pythonhosted.org/packages/68/d6/d95cde18ca2475bf317051b2be168cc963c5cfcd67e9c59786326ccdca53/tensorstore-0.1.78-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6886bec93b8ba22f83c4dc9e7c1ee20b11025ea9a5a839de21d0cbf7fd7aada2", size = 20060053, upload-time = "2025-10-06T17:44:25.942Z" },
+    { url = "https://files.pythonhosted.org/packages/db/a2/dbd1af0e97d5d549051309d72c6e3f2fe81fae636f9db3692d21adc9c731/tensorstore-0.1.78-cp313-cp313-win_amd64.whl", hash = "sha256:e0073de8fa3074bc4cc92ced0210310fd89851899faf42a5ba256f0ba87d095c", size = 12711250, upload-time = "2025-10-06T17:44:27.926Z" },
 ]
 
 [[package]]
@@ -5681,7 +6025,7 @@ wheels = [
 
 [[package]]
 name = "torch"
-version = "2.9.1"
+version = "2.9.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "filelock" },
@@ -5689,55 +6033,55 @@ dependencies = [
     { name = "jinja2" },
     { name = "networkx", version = "3.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "networkx", version = "3.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "nvidia-cublas-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-cuda-cupti-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-cuda-nvrtc-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-cuda-runtime-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-cudnn-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-cufft-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-cufile-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-curand-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-cusolver-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-cusparse-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-cusparselt-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-nccl-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-nvjitlink-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-nvshmem-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-nvtx-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "setuptools", marker = "python_full_version >= '3.12'" },
+    { name = "nvidia-cublas-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "nvidia-cuda-cupti-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "nvidia-cuda-nvrtc-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "nvidia-cuda-runtime-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "nvidia-cudnn-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "nvidia-cufft-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "nvidia-cufile-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "nvidia-curand-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "nvidia-cusolver-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "nvidia-cusparse-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "nvidia-cusparselt-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "nvidia-nccl-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "nvidia-nvjitlink-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "nvidia-nvshmem-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "nvidia-nvtx-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "setuptools", marker = "python_full_version >= '3.12' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "sympy" },
-    { name = "triton", marker = "sys_platform == 'never'" },
+    { name = "triton", marker = "sys_platform == 'never' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "typing-extensions" },
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/5f/56/9577683b23072075ed2e40d725c52c2019d71a972fab8e083763da8e707e/torch-2.9.1-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:1cc208435f6c379f9b8fdfd5ceb5be1e3b72a6bdf1cb46c0d2812aa73472db9e", size = 104207681, upload-time = "2025-11-12T15:19:56.48Z" },
-    { url = "https://files.pythonhosted.org/packages/38/45/be5a74f221df8f4b609b78ff79dc789b0cc9017624544ac4dd1c03973150/torch-2.9.1-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:9fd35c68b3679378c11f5eb73220fdcb4e6f4592295277fbb657d31fd053237c", size = 899794036, upload-time = "2025-11-12T15:21:01.886Z" },
-    { url = "https://files.pythonhosted.org/packages/67/95/a581e8a382596b69385a44bab2733f1273d45c842f5d4a504c0edc3133b6/torch-2.9.1-cp310-cp310-win_amd64.whl", hash = "sha256:2af70e3be4a13becba4655d6cc07dcfec7ae844db6ac38d6c1dafeb245d17d65", size = 110969861, upload-time = "2025-11-12T15:21:30.145Z" },
-    { url = "https://files.pythonhosted.org/packages/ad/51/1756dc128d2bf6ea4e0a915cb89ea5e730315ff33d60c1ff56fd626ba3eb/torch-2.9.1-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:a83b0e84cc375e3318a808d032510dde99d696a85fe9473fc8575612b63ae951", size = 74452222, upload-time = "2025-11-12T15:20:46.223Z" },
-    { url = "https://files.pythonhosted.org/packages/15/db/c064112ac0089af3d2f7a2b5bfbabf4aa407a78b74f87889e524b91c5402/torch-2.9.1-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:62b3fd888277946918cba4478cf849303da5359f0fb4e3bfb86b0533ba2eaf8d", size = 104220430, upload-time = "2025-11-12T15:20:31.705Z" },
-    { url = "https://files.pythonhosted.org/packages/56/be/76eaa36c9cd032d3b01b001e2c5a05943df75f26211f68fae79e62f87734/torch-2.9.1-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:d033ff0ac3f5400df862a51bdde9bad83561f3739ea0046e68f5401ebfa67c1b", size = 899821446, upload-time = "2025-11-12T15:20:15.544Z" },
-    { url = "https://files.pythonhosted.org/packages/47/cc/7a2949e38dfe3244c4df21f0e1c27bce8aedd6c604a587dd44fc21017cb4/torch-2.9.1-cp311-cp311-win_amd64.whl", hash = "sha256:0d06b30a9207b7c3516a9e0102114024755a07045f0c1d2f2a56b1819ac06bcb", size = 110973074, upload-time = "2025-11-12T15:21:39.958Z" },
-    { url = "https://files.pythonhosted.org/packages/1e/ce/7d251155a783fb2c1bb6837b2b7023c622a2070a0a72726ca1df47e7ea34/torch-2.9.1-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:52347912d868653e1528b47cafaf79b285b98be3f4f35d5955389b1b95224475", size = 74463887, upload-time = "2025-11-12T15:20:36.611Z" },
-    { url = "https://files.pythonhosted.org/packages/0f/27/07c645c7673e73e53ded71705045d6cb5bae94c4b021b03aa8d03eee90ab/torch-2.9.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:da5f6f4d7f4940a173e5572791af238cb0b9e21b1aab592bd8b26da4c99f1cd6", size = 104126592, upload-time = "2025-11-12T15:20:41.62Z" },
-    { url = "https://files.pythonhosted.org/packages/19/17/e377a460603132b00760511299fceba4102bd95db1a0ee788da21298ccff/torch-2.9.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:27331cd902fb4322252657f3902adf1c4f6acad9dcad81d8df3ae14c7c4f07c4", size = 899742281, upload-time = "2025-11-12T15:22:17.602Z" },
-    { url = "https://files.pythonhosted.org/packages/b1/1a/64f5769025db846a82567fa5b7d21dba4558a7234ee631712ee4771c436c/torch-2.9.1-cp312-cp312-win_amd64.whl", hash = "sha256:81a285002d7b8cfd3fdf1b98aa8df138d41f1a8334fd9ea37511517cedf43083", size = 110940568, upload-time = "2025-11-12T15:21:18.689Z" },
-    { url = "https://files.pythonhosted.org/packages/6e/ab/07739fd776618e5882661d04c43f5b5586323e2f6a2d7d84aac20d8f20bd/torch-2.9.1-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:c0d25d1d8e531b8343bea0ed811d5d528958f1dcbd37e7245bc686273177ad7e", size = 74479191, upload-time = "2025-11-12T15:21:25.816Z" },
-    { url = "https://files.pythonhosted.org/packages/20/60/8fc5e828d050bddfab469b3fe78e5ab9a7e53dda9c3bdc6a43d17ce99e63/torch-2.9.1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:c29455d2b910b98738131990394da3e50eea8291dfeb4b12de71ecf1fdeb21cb", size = 104135743, upload-time = "2025-11-12T15:21:34.936Z" },
-    { url = "https://files.pythonhosted.org/packages/f2/b7/6d3f80e6918213babddb2a37b46dbb14c15b14c5f473e347869a51f40e1f/torch-2.9.1-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:524de44cd13931208ba2c4bde9ec7741fd4ae6bfd06409a604fc32f6520c2bc9", size = 899749493, upload-time = "2025-11-12T15:24:36.356Z" },
-    { url = "https://files.pythonhosted.org/packages/a6/47/c7843d69d6de8938c1cbb1eba426b1d48ddf375f101473d3e31a5fc52b74/torch-2.9.1-cp313-cp313-win_amd64.whl", hash = "sha256:545844cc16b3f91e08ce3b40e9c2d77012dd33a48d505aed34b7740ed627a1b2", size = 110944162, upload-time = "2025-11-12T15:21:53.151Z" },
-    { url = "https://files.pythonhosted.org/packages/28/0e/2a37247957e72c12151b33a01e4df651d9d155dd74d8cfcbfad15a79b44a/torch-2.9.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:5be4bf7496f1e3ffb1dd44b672adb1ac3f081f204c5ca81eba6442f5f634df8e", size = 74830751, upload-time = "2025-11-12T15:21:43.792Z" },
-    { url = "https://files.pythonhosted.org/packages/4b/f7/7a18745edcd7b9ca2381aa03353647bca8aace91683c4975f19ac233809d/torch-2.9.1-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:30a3e170a84894f3652434b56d59a64a2c11366b0ed5776fab33c2439396bf9a", size = 104142929, upload-time = "2025-11-12T15:21:48.319Z" },
-    { url = "https://files.pythonhosted.org/packages/f4/dd/f1c0d879f2863ef209e18823a988dc7a1bf40470750e3ebe927efdb9407f/torch-2.9.1-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:8301a7b431e51764629208d0edaa4f9e4c33e6df0f2f90b90e261d623df6a4e2", size = 899748978, upload-time = "2025-11-12T15:23:04.568Z" },
-    { url = "https://files.pythonhosted.org/packages/1f/9f/6986b83a53b4d043e36f3f898b798ab51f7f20fdf1a9b01a2720f445043d/torch-2.9.1-cp313-cp313t-win_amd64.whl", hash = "sha256:2e1c42c0ae92bf803a4b2409fdfed85e30f9027a66887f5e7dcdbc014c7531db", size = 111176995, upload-time = "2025-11-12T15:22:01.618Z" },
-    { url = "https://files.pythonhosted.org/packages/40/60/71c698b466dd01e65d0e9514b5405faae200c52a76901baf6906856f17e4/torch-2.9.1-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:2c14b3da5df416cf9cb5efab83aa3056f5b8cd8620b8fde81b4987ecab730587", size = 74480347, upload-time = "2025-11-12T15:21:57.648Z" },
-    { url = "https://files.pythonhosted.org/packages/48/50/c4b5112546d0d13cc9eaa1c732b823d676a9f49ae8b6f97772f795874a03/torch-2.9.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1edee27a7c9897f4e0b7c14cfc2f3008c571921134522d5b9b5ec4ebbc69041a", size = 74433245, upload-time = "2025-11-12T15:22:39.027Z" },
-    { url = "https://files.pythonhosted.org/packages/81/c9/2628f408f0518b3bae49c95f5af3728b6ab498c8624ab1e03a43dd53d650/torch-2.9.1-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:19d144d6b3e29921f1fc70503e9f2fc572cde6a5115c0c0de2f7ca8b1483e8b6", size = 104134804, upload-time = "2025-11-12T15:22:35.222Z" },
-    { url = "https://files.pythonhosted.org/packages/28/fc/5bc91d6d831ae41bf6e9e6da6468f25330522e92347c9156eb3f1cb95956/torch-2.9.1-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:c432d04376f6d9767a9852ea0def7b47a7bbc8e7af3b16ac9cf9ce02b12851c9", size = 899747132, upload-time = "2025-11-12T15:23:36.068Z" },
-    { url = "https://files.pythonhosted.org/packages/63/5d/e8d4e009e52b6b2cf1684bde2a6be157b96fb873732542fb2a9a99e85a83/torch-2.9.1-cp314-cp314-win_amd64.whl", hash = "sha256:d187566a2cdc726fc80138c3cdb260970fab1c27e99f85452721f7759bbd554d", size = 110934845, upload-time = "2025-11-12T15:22:48.367Z" },
-    { url = "https://files.pythonhosted.org/packages/bd/b2/2d15a52516b2ea3f414643b8de68fa4cb220d3877ac8b1028c83dc8ca1c4/torch-2.9.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:cb10896a1f7fedaddbccc2017ce6ca9ecaaf990f0973bdfcf405439750118d2c", size = 74823558, upload-time = "2025-11-12T15:22:43.392Z" },
-    { url = "https://files.pythonhosted.org/packages/86/5c/5b2e5d84f5b9850cd1e71af07524d8cbb74cba19379800f1f9f7c997fc70/torch-2.9.1-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:0a2bd769944991c74acf0c4ef23603b9c777fdf7637f115605a4b2d8023110c7", size = 104145788, upload-time = "2025-11-12T15:23:52.109Z" },
-    { url = "https://files.pythonhosted.org/packages/a9/8c/3da60787bcf70add986c4ad485993026ac0ca74f2fc21410bc4eb1bb7695/torch-2.9.1-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:07c8a9660bc9414c39cac530ac83b1fb1b679d7155824144a40a54f4a47bfa73", size = 899735500, upload-time = "2025-11-12T15:24:08.788Z" },
-    { url = "https://files.pythonhosted.org/packages/db/2b/f7818f6ec88758dfd21da46b6cd46af9d1b3433e53ddbb19ad1e0da17f9b/torch-2.9.1-cp314-cp314t-win_amd64.whl", hash = "sha256:c88d3299ddeb2b35dcc31753305612db485ab6f1823e37fb29451c8b2732b87e", size = 111163659, upload-time = "2025-11-12T15:23:20.009Z" },
+    { url = "https://files.pythonhosted.org/packages/bb/86/245c240d2138c17ed572c943c289056c2721abab70810d772c6bf5495b28/torch-2.9.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:030bbfe367379ae6a4ae4042b6c44da25383343b8b3c68abaa9c7231efbaf2dd", size = 104213554, upload-time = "2025-10-15T15:45:59.798Z" },
+    { url = "https://files.pythonhosted.org/packages/58/1d/fd1e88ae0948825efcab7dd66d12bec23f05d4d38ed81573c8d453c14c06/torch-2.9.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:51cb63902182a78e90886e8068befd8ea102af4b00e420263591a3d70c7d3c6c", size = 899795167, upload-time = "2025-10-15T15:47:12.695Z" },
+    { url = "https://files.pythonhosted.org/packages/63/5a/496197b45c14982bef4e079b24c61dc108e3ab0d0cc9718dba9f54f45a46/torch-2.9.0-cp310-cp310-win_amd64.whl", hash = "sha256:3f6aad4d2f0ee2248bac25339d74858ff846c3969b27d14ac235821f055af83d", size = 109310314, upload-time = "2025-10-15T15:46:16.633Z" },
+    { url = "https://files.pythonhosted.org/packages/58/b0/2b4e647b0fc706e88eb6c253d05511865578f5f67b55fad639bf3272a4a1/torch-2.9.0-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:413e1654c9203733138858780e184d9fc59442f0b3b209e16f39354eb893db9b", size = 74452019, upload-time = "2025-10-15T15:46:04.296Z" },
+    { url = "https://files.pythonhosted.org/packages/58/fe/334225e6330e672b36aef23d77451fa906ea12881570c08638a91331a212/torch-2.9.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:c596708b5105d0b199215acf0c9be7c1db5f1680d88eddadf4b75a299259a677", size = 104230578, upload-time = "2025-10-15T15:46:08.182Z" },
+    { url = "https://files.pythonhosted.org/packages/05/cc/49566caaa218872ec9a2912456f470ff92649894a4bc2e5274aa9ef87c4a/torch-2.9.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:51de31219c97c51cf4bf2be94d622e3deb5dcc526c6dc00e97c17eaec0fc1d67", size = 899815990, upload-time = "2025-10-15T15:48:03.336Z" },
+    { url = "https://files.pythonhosted.org/packages/74/25/e9ab21d5925b642d008f139d4a3c9664fc9ee1faafca22913c080cc4c0a5/torch-2.9.0-cp311-cp311-win_amd64.whl", hash = "sha256:dd515c70059afd95f48b8192733764c08ca37a1d19803af6401b5ecad7c8676e", size = 109313698, upload-time = "2025-10-15T15:46:12.425Z" },
+    { url = "https://files.pythonhosted.org/packages/b3/b7/205ef3e94de636feffd64b28bb59a0dfac0771221201b9871acf9236f5ca/torch-2.9.0-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:614a185e4986326d526a91210c8fc1397e76e8cfafa78baf6296a790e53a9eec", size = 74463678, upload-time = "2025-10-15T15:46:29.779Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/d3/3985739f3b8e88675127bf70f82b3a48ae083e39cda56305dbd90398fec0/torch-2.9.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:e5f7af1dc4c0a7c4a260c2534f41ddaf209714f7c89145e644c44712fbd6b642", size = 104107898, upload-time = "2025-10-15T15:46:20.883Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/4b/f4bb2e6c25d0272f798cd6d7a04ed315da76cec68c602d87040c7847287f/torch-2.9.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:01cff95ecd9a212ea2f141db28acccdceb6a4c54f64e6c51091146f5e2a772c6", size = 899738273, upload-time = "2025-10-15T15:50:04.188Z" },
+    { url = "https://files.pythonhosted.org/packages/66/11/c1c5ba6691cda6279087c35bd626536e4fd29521fe740abf5008377a9a02/torch-2.9.0-cp312-cp312-win_amd64.whl", hash = "sha256:4582b162f541651f0cb184d3e291c05c2f556c7117c64a9873e2ee158d40062b", size = 109280887, upload-time = "2025-10-15T15:46:26.228Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/5f/b85bd8c05312d71de9402bf5868d217c38827cfd09d8f8514e5be128a52b/torch-2.9.0-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:33f58e9a102a91259af289d50525c30323b5c9ae1d31322b6447c0814da68695", size = 74478983, upload-time = "2025-10-15T15:46:39.406Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/1c/90eb13833cdf4969ea9707586d7b57095c3b6e2b223a7256bf111689bcb8/torch-2.9.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:c30a17fc83eeab346913e237c64b15b5ba6407fff812f6c541e322e19bc9ea0e", size = 104111330, upload-time = "2025-10-15T15:46:35.238Z" },
+    { url = "https://files.pythonhosted.org/packages/0e/21/2254c54b8d523592c25ef4434769aa23e29b1e6bf5f4c0ad9e27bf442927/torch-2.9.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:8f25033b8667b57857dfd01458fbf2a9e6a6df1f8def23aef0dc46292f6aa642", size = 899750243, upload-time = "2025-10-15T15:48:57.459Z" },
+    { url = "https://files.pythonhosted.org/packages/b7/a5/5cb94fa4fd1e78223455c23c200f30f6dc10c6d4a2bcc8f6e7f2a2588370/torch-2.9.0-cp313-cp313-win_amd64.whl", hash = "sha256:d037f1b4ffd25013be4a7bf3651a0a910c68554956c7b2c92ebe87c76475dece", size = 109284513, upload-time = "2025-10-15T15:46:45.061Z" },
+    { url = "https://files.pythonhosted.org/packages/66/e8/fc414d8656250ee46120b44836ffbb3266343db424b3e18ca79ebbf69d4f/torch-2.9.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e4e5b5cba837a2a8d1a497ba9a58dae46fa392593eaa13b871c42f71847503a5", size = 74830362, upload-time = "2025-10-15T15:46:48.983Z" },
+    { url = "https://files.pythonhosted.org/packages/ed/5f/9474c98fc5ae0cd04b9466035428cd360e6611a86b8352a0fc2fa504acdc/torch-2.9.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:64693568f5dc4dbd5f880a478b1cea0201cc6b510d91d1bc54fea86ac5d1a637", size = 104144940, upload-time = "2025-10-15T15:47:29.076Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/5a/8e0c1cf57830172c109d4bd6be2708cabeaf550983eee7029291322447a0/torch-2.9.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:f8ed31ddd7d10bfb3fbe0b9fe01b1243577f13d75e6f4a0839a283915ce3791e", size = 899744054, upload-time = "2025-10-15T15:48:29.864Z" },
+    { url = "https://files.pythonhosted.org/packages/6d/28/82c28b30fcb4b7c9cdd995763d18bbb830d6521356712faebbad92ffa61d/torch-2.9.0-cp313-cp313t-win_amd64.whl", hash = "sha256:eff527d4e4846e6f70d2afd8058b73825761203d66576a7e04ea2ecfebcb4ab8", size = 109517546, upload-time = "2025-10-15T15:47:33.395Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/c3/a91f96ec74347fa5fd24453fa514bc61c61ecc79196fa760b012a1873d96/torch-2.9.0-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:f8877779cf56d1ce431a7636703bdb13307f5960bb1af49716d8b179225e0e6a", size = 74480732, upload-time = "2025-10-15T15:47:38.002Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/73/9f70af34b334a7e0ef496ceec96b7ec767bd778ea35385ce6f77557534d1/torch-2.9.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:7e614fae699838038d888729f82b687c03413c5989ce2a9481f9a7e7a396e0bb", size = 74433037, upload-time = "2025-10-15T15:47:41.894Z" },
+    { url = "https://files.pythonhosted.org/packages/b7/84/37cf88625901934c97109e583ecc21777d21c6f54cda97a7e5bbad1ee2f2/torch-2.9.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:dfb5b8cd310ba3436c7e14e8b7833ef658cf3045e50d2bdaed23c8fc517065eb", size = 104116482, upload-time = "2025-10-15T15:47:46.266Z" },
+    { url = "https://files.pythonhosted.org/packages/56/8e/ca8b17866943a8d4f4664d402ea84210aa274588b4c5d89918f5caa24eec/torch-2.9.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:b3d29524993a478e46f5d598b249cd824b7ed98d7fba538bd9c4cde6c803948f", size = 899746916, upload-time = "2025-10-15T15:50:40.294Z" },
+    { url = "https://files.pythonhosted.org/packages/43/65/3b17c0fbbdab6501c5b320a52a648628d0d44e7379f64e27d9eef701b6bf/torch-2.9.0-cp314-cp314-win_amd64.whl", hash = "sha256:71c7578984f5ec0eb645eb4816ac8435fcf3e3e2ae1901bcd2f519a9cafb5125", size = 109275151, upload-time = "2025-10-15T15:49:20.715Z" },
+    { url = "https://files.pythonhosted.org/packages/83/36/74f8c051f785500396e42f93542422422dfd874a174f21f8d955d36e5d64/torch-2.9.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:71d9309aee457bbe0b164bce2111cd911c4ed4e847e65d5077dbbcd3aba6befc", size = 74823353, upload-time = "2025-10-15T15:49:16.59Z" },
+    { url = "https://files.pythonhosted.org/packages/62/51/dc3b4e2f9ba98ae27238f0153ca098bf9340b2dafcc67fde645d496dfc2a/torch-2.9.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:c08fb654d783899e204a32cca758a7ce8a45b2d78eeb89517cc937088316f78e", size = 104140340, upload-time = "2025-10-15T15:50:19.67Z" },
+    { url = "https://files.pythonhosted.org/packages/c0/8d/b00657f8141ac16af7bb6cda2e67de18499a3263b78d516b9a93fcbc98e3/torch-2.9.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:ec8feb0099b2daa5728fbc7abb0b05730fd97e0f359ff8bda09865aaa7bd7d4b", size = 899731750, upload-time = "2025-10-15T15:49:36.673Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/29/bd361e0cbb2c79ce6450f42643aaf6919956f89923a50571b0ebfe92d142/torch-2.9.0-cp314-cp314t-win_amd64.whl", hash = "sha256:695ba920f234ad4170c9c50e28d56c848432f8f530e6bc7f88fcb15ddf338e75", size = 109503850, upload-time = "2025-10-15T15:50:24.118Z" },
 ]
 
 [[package]]
@@ -5756,7 +6100,7 @@ wheels = [
 
 [[package]]
 name = "torchvision"
-version = "0.24.1"
+version = "0.24.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "numpy" },
@@ -5764,34 +6108,34 @@ dependencies = [
     { name = "torch", marker = "sys_platform == 'never'" },
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/f7/09/d51aadf8591138e08b74c64a6eb783630c7a31ca2634416277115a9c3a2b/torchvision-0.24.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ded5e625788572e4e1c4d155d1bbc48805c113794100d70e19c76e39e4d53465", size = 1891441, upload-time = "2025-11-12T15:25:01.687Z" },
-    { url = "https://files.pythonhosted.org/packages/6b/49/a35df863e7c153aad82af7505abd8264a5b510306689712ef86bea862822/torchvision-0.24.1-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:54ed17c3d30e718e08d8da3fd5b30ea44b0311317e55647cb97077a29ecbc25b", size = 2386226, upload-time = "2025-11-12T15:25:05.449Z" },
-    { url = "https://files.pythonhosted.org/packages/49/20/f2d7cd1eea052887c1083afff0b8df5228ec93b53e03759f20b1a3c6d22a/torchvision-0.24.1-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:f476da4e085b7307aaab6f540219617d46d5926aeda24be33e1359771c83778f", size = 8046093, upload-time = "2025-11-12T15:25:09.425Z" },
-    { url = "https://files.pythonhosted.org/packages/d8/cf/0ff4007c09903199307da5f53a192ff5d62b45447069e9ef3a19bdc5ff12/torchvision-0.24.1-cp310-cp310-win_amd64.whl", hash = "sha256:fbdbdae5e540b868a681240b7dbd6473986c862445ee8a138680a6a97d6c34ff", size = 3696202, upload-time = "2025-11-12T15:25:10.657Z" },
-    { url = "https://files.pythonhosted.org/packages/e7/69/30f5f03752aa1a7c23931d2519b31e557f3f10af5089d787cddf3b903ecf/torchvision-0.24.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:056c525dc875f18fe8e9c27079ada166a7b2755cea5a2199b0bc7f1f8364e600", size = 1891436, upload-time = "2025-11-12T15:25:04.3Z" },
-    { url = "https://files.pythonhosted.org/packages/0c/69/49aae86edb75fe16460b59a191fcc0f568c2378f780bb063850db0fe007a/torchvision-0.24.1-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:1e39619de698e2821d71976c92c8a9e50cdfd1e993507dfb340f2688bfdd8283", size = 2387757, upload-time = "2025-11-12T15:25:06.795Z" },
-    { url = "https://files.pythonhosted.org/packages/11/c9/1dfc3db98797b326f1d0c3f3bb61c83b167a813fc7eab6fcd2edb8c7eb9d/torchvision-0.24.1-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:a0f106663e60332aa4fcb1ca2159ef8c3f2ed266b0e6df88de261048a840e0df", size = 8047682, upload-time = "2025-11-12T15:25:21.125Z" },
-    { url = "https://files.pythonhosted.org/packages/fa/bb/cfc6a6f6ccc84a534ed1fdf029ae5716dd6ff04e57ed9dc2dab38bf652d5/torchvision-0.24.1-cp311-cp311-win_amd64.whl", hash = "sha256:a9308cdd37d8a42e14a3e7fd9d271830c7fecb150dd929b642f3c1460514599a", size = 4037588, upload-time = "2025-11-12T15:25:14.402Z" },
-    { url = "https://files.pythonhosted.org/packages/f0/af/18e2c6b9538a045f60718a0c5a058908ccb24f88fde8e6f0fc12d5ff7bd3/torchvision-0.24.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:e48bf6a8ec95872eb45763f06499f87bd2fb246b9b96cb00aae260fda2f96193", size = 1891433, upload-time = "2025-11-12T15:25:03.232Z" },
-    { url = "https://files.pythonhosted.org/packages/9d/43/600e5cfb0643d10d633124f5982d7abc2170dfd7ce985584ff16edab3e76/torchvision-0.24.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:7fb7590c737ebe3e1c077ad60c0e5e2e56bb26e7bccc3b9d04dbfc34fd09f050", size = 2386737, upload-time = "2025-11-12T15:25:08.288Z" },
-    { url = "https://files.pythonhosted.org/packages/93/b1/db2941526ecddd84884132e2742a55c9311296a6a38627f9e2627f5ac889/torchvision-0.24.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:66a98471fc18cad9064123106d810a75f57f0838eee20edc56233fd8484b0cc7", size = 8049868, upload-time = "2025-11-12T15:25:13.058Z" },
-    { url = "https://files.pythonhosted.org/packages/69/98/16e583f59f86cd59949f59d52bfa8fc286f86341a229a9d15cbe7a694f0c/torchvision-0.24.1-cp312-cp312-win_amd64.whl", hash = "sha256:4aa6cb806eb8541e92c9b313e96192c6b826e9eb0042720e2fa250d021079952", size = 4302006, upload-time = "2025-11-12T15:25:16.184Z" },
-    { url = "https://files.pythonhosted.org/packages/e4/97/ab40550f482577f2788304c27220e8ba02c63313bd74cf2f8920526aac20/torchvision-0.24.1-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:8a6696db7fb71eadb2c6a48602106e136c785642e598eb1533e0b27744f2cce6", size = 1891435, upload-time = "2025-11-12T15:25:28.642Z" },
-    { url = "https://files.pythonhosted.org/packages/30/65/ac0a3f9be6abdbe4e1d82c915d7e20de97e7fd0e9a277970508b015309f3/torchvision-0.24.1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:db2125c46f9cb25dc740be831ce3ce99303cfe60439249a41b04fd9f373be671", size = 2338718, upload-time = "2025-11-12T15:25:26.19Z" },
-    { url = "https://files.pythonhosted.org/packages/10/b5/5bba24ff9d325181508501ed7f0c3de8ed3dd2edca0784d48b144b6c5252/torchvision-0.24.1-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:f035f0cacd1f44a8ff6cb7ca3627d84c54d685055961d73a1a9fb9827a5414c8", size = 8049661, upload-time = "2025-11-12T15:25:22.558Z" },
-    { url = "https://files.pythonhosted.org/packages/5c/ec/54a96ae9ab6a0dd66d4bba27771f892e36478a9c3489fa56e51c70abcc4d/torchvision-0.24.1-cp313-cp313-win_amd64.whl", hash = "sha256:16274823b93048e0a29d83415166a2e9e0bf4e1b432668357b657612a4802864", size = 4319808, upload-time = "2025-11-12T15:25:17.318Z" },
-    { url = "https://files.pythonhosted.org/packages/d5/f3/a90a389a7e547f3eb8821b13f96ea7c0563cdefbbbb60a10e08dda9720ff/torchvision-0.24.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e3f96208b4bef54cd60e415545f5200346a65024e04f29a26cd0006dbf9e8e66", size = 2005342, upload-time = "2025-11-12T15:25:11.871Z" },
-    { url = "https://files.pythonhosted.org/packages/a9/fe/ff27d2ed1b524078164bea1062f23d2618a5fc3208e247d6153c18c91a76/torchvision-0.24.1-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:f231f6a4f2aa6522713326d0d2563538fa72d613741ae364f9913027fa52ea35", size = 2341708, upload-time = "2025-11-12T15:25:25.08Z" },
-    { url = "https://files.pythonhosted.org/packages/b1/b9/d6c903495cbdfd2533b3ef6f7b5643ff589ea062f8feb5c206ee79b9d9e5/torchvision-0.24.1-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:1540a9e7f8cf55fe17554482f5a125a7e426347b71de07327d5de6bfd8d17caa", size = 8177239, upload-time = "2025-11-12T15:25:18.554Z" },
-    { url = "https://files.pythonhosted.org/packages/4f/2b/ba02e4261369c3798310483028495cf507e6cb3f394f42e4796981ecf3a7/torchvision-0.24.1-cp313-cp313t-win_amd64.whl", hash = "sha256:d83e16d70ea85d2f196d678bfb702c36be7a655b003abed84e465988b6128938", size = 4251604, upload-time = "2025-11-12T15:25:34.069Z" },
-    { url = "https://files.pythonhosted.org/packages/42/84/577b2cef8f32094add5f52887867da4c2a3e6b4261538447e9b48eb25812/torchvision-0.24.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:cccf4b4fec7fdfcd3431b9ea75d1588c0a8596d0333245dafebee0462abe3388", size = 2005319, upload-time = "2025-11-12T15:25:23.827Z" },
-    { url = "https://files.pythonhosted.org/packages/5f/34/ecb786bffe0159a3b49941a61caaae089853132f3cd1e8f555e3621f7e6f/torchvision-0.24.1-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:1b495edd3a8f9911292424117544f0b4ab780452e998649425d1f4b2bed6695f", size = 2338844, upload-time = "2025-11-12T15:25:32.625Z" },
-    { url = "https://files.pythonhosted.org/packages/51/99/a84623786a6969504c87f2dc3892200f586ee13503f519d282faab0bb4f0/torchvision-0.24.1-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:ab211e1807dc3e53acf8f6638df9a7444c80c0ad050466e8d652b3e83776987b", size = 8175144, upload-time = "2025-11-12T15:25:31.355Z" },
-    { url = "https://files.pythonhosted.org/packages/6d/ba/8fae3525b233e109317ce6a9c1de922ab2881737b029a7e88021f81e068f/torchvision-0.24.1-cp314-cp314-win_amd64.whl", hash = "sha256:18f9cb60e64b37b551cd605a3d62c15730c086362b40682d23e24b616a697d41", size = 4234459, upload-time = "2025-11-12T15:25:19.859Z" },
-    { url = "https://files.pythonhosted.org/packages/50/33/481602c1c72d0485d4b3a6b48c9534b71c2957c9d83bf860eb837bf5a620/torchvision-0.24.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:ec9d7379c519428395e4ffda4dbb99ec56be64b0a75b95989e00f9ec7ae0b2d7", size = 2005336, upload-time = "2025-11-12T15:25:27.225Z" },
-    { url = "https://files.pythonhosted.org/packages/d0/7f/372de60bf3dd8f5593bd0d03f4aecf0d1fd58f5bc6943618d9d913f5e6d5/torchvision-0.24.1-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:af9201184c2712d808bd4eb656899011afdfce1e83721c7cb08000034df353fe", size = 2341704, upload-time = "2025-11-12T15:25:29.857Z" },
-    { url = "https://files.pythonhosted.org/packages/36/9b/0f3b9ff3d0225ee2324ec663de0e7fb3eb855615ca958ac1875f22f1f8e5/torchvision-0.24.1-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:9ef95d819fd6df81bc7cc97b8f21a15d2c0d3ac5dbfaab5cbc2d2ce57114b19e", size = 8177422, upload-time = "2025-11-12T15:25:37.357Z" },
-    { url = "https://files.pythonhosted.org/packages/d6/ab/e2bcc7c2f13d882a58f8b30ff86f794210b075736587ea50f8c545834f8a/torchvision-0.24.1-cp314-cp314t-win_amd64.whl", hash = "sha256:480b271d6edff83ac2e8d69bbb4cf2073f93366516a50d48f140ccfceedb002e", size = 4335190, upload-time = "2025-11-12T15:25:35.745Z" },
+    { url = "https://files.pythonhosted.org/packages/63/5b/1404eeab00819df71a30e916c2081654366741f7838fcc4fff86b7bd9e7e/torchvision-0.24.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5e8d5e667deff87bd66d26df6d225f46224bb0782d4f3f8f5d2f3068b5fd4492", size = 1891723, upload-time = "2025-10-15T15:51:08.5Z" },
+    { url = "https://files.pythonhosted.org/packages/88/e3/1b003ecd52bd721f8304aeb66691edfbc2002747ec83d36188ad6abab506/torchvision-0.24.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:a110a51c75e89807a8382b0d8034f5e180fb9319570be3389ffd3d4ac4fd57a9", size = 2418988, upload-time = "2025-10-15T15:51:25.195Z" },
+    { url = "https://files.pythonhosted.org/packages/56/2e/3c19a35e62da0f606baf8f6e2ceeab1eb66aaa2f84c6528538b06b416d54/torchvision-0.24.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:81d5b12a6df1bb2cc8bdbad837b637d6ea446f2866e6d94f1b5d478856331be3", size = 8046769, upload-time = "2025-10-15T15:51:15.221Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/1d/e7ab614a1ace820a2366eab1532679fbe81bd9501ffd6a1b7be14936366d/torchvision-0.24.0-cp310-cp310-win_amd64.whl", hash = "sha256:0839dbb305d34671f5a64f558782095134b04bbeff8b90f11eb80515d7d50092", size = 3686529, upload-time = "2025-10-15T15:51:20.982Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/17/54ed2ec6944ea972b461a86424c8c7f98835982c90cbc45bf59bd962863a/torchvision-0.24.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f771cf918351ad509a28488be475f3e9cc71a750d6b1467842bfb64863a5e986", size = 1891719, upload-time = "2025-10-15T15:51:10.384Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/07/0cd6776eee784742ad3cb2bfd3295383d84cb2f9e87386119333d1587f0f/torchvision-0.24.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:bbd63bf4ebff84c48c50123eba90526cc9f794fe45bc9f5dd07cec19e8c62bce", size = 2420513, upload-time = "2025-10-15T15:51:18.087Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/f4/6026c08011ddcefcbc14161c5aa9dce55c35c6b045e04ef0952e88bf4594/torchvision-0.24.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:78fe414b3bb6dbf7e6f6da6f733ba96881f6b29a9b997228de7c5f603e5ed940", size = 8048018, upload-time = "2025-10-15T15:51:13.579Z" },
+    { url = "https://files.pythonhosted.org/packages/2f/b4/362b4e67ed87cee0fb4f8f0363a852eaeef527968bf62c07ed56f764d729/torchvision-0.24.0-cp311-cp311-win_amd64.whl", hash = "sha256:629584b94e52f32a6278f2a35d85eeaae95fcc38730fcb765064f26c3c96df5d", size = 4027686, upload-time = "2025-10-15T15:51:19.189Z" },
+    { url = "https://files.pythonhosted.org/packages/47/ef/81e4e69e02e2c4650b30e8c11c8974f946682a30e0ab7e9803a831beff76/torchvision-0.24.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c61d40bcd2e2451e932902a702ad495ba1ec6f279e90b1e15cef2bb55dc911e2", size = 1891726, upload-time = "2025-10-15T15:51:16.977Z" },
+    { url = "https://files.pythonhosted.org/packages/00/7b/e3809b3302caea9a12c13f3adebe4fef127188438e719fd6c8dc93db1da6/torchvision-0.24.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:b0531d1483fc322d7da0d83be52f0df860a75114ab87dbeeb9de765feaeda843", size = 2419495, upload-time = "2025-10-15T15:51:11.885Z" },
+    { url = "https://files.pythonhosted.org/packages/7e/e6/7324ead6793075a8c75c56abeed1236d1750de16a5613cfe2ddad164a92a/torchvision-0.24.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:26b9dd9c083f8e5f7ac827de6d5b88c615d9c582dc87666770fbdf16887e4c25", size = 8050480, upload-time = "2025-10-15T15:51:24.012Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/ad/3c56fcd2a0d6e8afa80e115b5ade4302232ec99655220a51d05709819523/torchvision-0.24.0-cp312-cp312-win_amd64.whl", hash = "sha256:060b7c50ed4b3fb0316b08e2e31bfd874ec2f63ef5ae02f81e54341ca4e88703", size = 4292225, upload-time = "2025-10-15T15:51:27.699Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/b5/b2008e4b77a8d6aada828dd0f6a438d8f94befa23fdd2d62fa0ac6e60113/torchvision-0.24.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:84d79cfc6457310107ce4d712de7a3d388b24484bc9aeded4a76d8f8e3a2813d", size = 1891722, upload-time = "2025-10-15T15:51:28.854Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/02/e2f6b0ff93ca4db5751ac9c5be43f13d5e53d9e9412324f464dca1775027/torchvision-0.24.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:fec12a269cf80f6b0b71471c8d498cd3bdd9d8e892c425bf39fecb604852c3b0", size = 2371478, upload-time = "2025-10-15T15:51:37.842Z" },
+    { url = "https://files.pythonhosted.org/packages/77/85/42e5fc4f716ec7b73cf1f32eeb5c77961be4d4054b26cd6a5ff97f20c966/torchvision-0.24.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:7323a9be5e3da695605753f501cdc87824888c5655d27735cdeaa9986b45884c", size = 8050200, upload-time = "2025-10-15T15:51:46.276Z" },
+    { url = "https://files.pythonhosted.org/packages/93/c2/48cb0b6b26276d2120b1e0dbc877579a748eae02b4091a7522ce54f6d5e1/torchvision-0.24.0-cp313-cp313-win_amd64.whl", hash = "sha256:08cad8b204196e945f0b2d73adee952d433db1c03645851d52b22a45f1015b13", size = 4309939, upload-time = "2025-10-15T15:51:39.002Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/d7/3dd10830b047eeb46ae6b465474258d7b4fbb7d8872dca69bd42449f5c82/torchvision-0.24.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:6ab956a6e588623353e0f20d4b03eb1656cb4a3c75ca4dd8b4e32e01bc43271a", size = 2028355, upload-time = "2025-10-15T15:51:22.384Z" },
+    { url = "https://files.pythonhosted.org/packages/f7/cf/2d7e43409089ce7070f5336161f9216d58653ee1cb26bcb5d6c84cc2de36/torchvision-0.24.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:b1b3db80609c32a088554e8e94b4fc31f1033fe5bb4ac0673ec49c3eb03fb4da", size = 2374466, upload-time = "2025-10-15T15:51:35.382Z" },
+    { url = "https://files.pythonhosted.org/packages/e9/30/8f7c328fd7e0a9665da4b6b56b1c627665c18470bfe62f3729ad3eda9aec/torchvision-0.24.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:e6635f100d455c80b43f297df4b8585a76c6a2e114802f6567ddd28d7b5479b0", size = 8217068, upload-time = "2025-10-15T15:51:36.623Z" },
+    { url = "https://files.pythonhosted.org/packages/55/a2/b6f9e40e2904574c80b3bb872c66af20bbd642053e7c8e1b9e99ab396535/torchvision-0.24.0-cp313-cp313t-win_amd64.whl", hash = "sha256:4ce158bbdc3a9086034bced0b5212888bd5b251fee6d08a9eff151d30b4b228a", size = 4273912, upload-time = "2025-10-15T15:51:33.866Z" },
+    { url = "https://files.pythonhosted.org/packages/1b/24/790a39645cc8c71bf442d54a76da9bda5caeb2a44c5f7e02498649cd99d4/torchvision-0.24.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:4bdfc85a5ed706421555f32cdc5e3ddb6d40bf65ef03a274ce3c176393e2904b", size = 2028335, upload-time = "2025-10-15T15:51:26.252Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/d7/69479a066ea773653e88eda99031e38681e9094046f87cb957af5036db0e/torchvision-0.24.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:73576a9c4a593223fbae85a64e8bbd77049abd1101893ecf3c5e981284fd58b4", size = 2371609, upload-time = "2025-10-15T15:51:29.859Z" },
+    { url = "https://files.pythonhosted.org/packages/46/64/3c7fdb3771ec992b9445a1f7a969466b23ce2cdb14e09303b3db351a0655/torchvision-0.24.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:dd565b1b06666ff399d0801d4d1824fa570c0167a179ca700a5be232527b3c62", size = 8214918, upload-time = "2025-10-15T15:51:41.465Z" },
+    { url = "https://files.pythonhosted.org/packages/58/51/abc416bc34d574ad479af738e413d9ebf93027ee92d0f4ae38f966b818f7/torchvision-0.24.0-cp314-cp314-win_amd64.whl", hash = "sha256:eb45d12ac48d757738788fd3fb8e88e647d6b2ab2424134ca87556efc72d81b5", size = 4257776, upload-time = "2025-10-15T15:51:42.642Z" },
+    { url = "https://files.pythonhosted.org/packages/08/f7/261d1353c611820541ecd43046b89da3f1ae998dc786e4288b890a009883/torchvision-0.24.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:68120e7e03c31900e499a10bb7fdd63cfd67f0054c9fa108e7e27f9cd372f315", size = 2028359, upload-time = "2025-10-15T15:51:32.119Z" },
+    { url = "https://files.pythonhosted.org/packages/a2/fd/615d8a86db1578345de7fa1edaf476fbcf4f057bf7e4fd898306b620c487/torchvision-0.24.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:64e54494043eecf9f57a9881c6fdea49c62282782e737c002ae8b1639e6ea80e", size = 2374469, upload-time = "2025-10-15T15:51:40.19Z" },
+    { url = "https://files.pythonhosted.org/packages/04/98/bac11e8fdbf00d6c398246ff2781370aa72c99f2ac685c01ce79354c9a32/torchvision-0.24.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:75ef9546323b321a451239d886f0cb528f7e98bb294da47a3200effd4e572064", size = 8217060, upload-time = "2025-10-15T15:51:45.033Z" },
+    { url = "https://files.pythonhosted.org/packages/47/6f/9fba8abc468c904570699eceeb51588f9622172b8fffa4ab11bcf15598c2/torchvision-0.24.0-cp314-cp314t-win_amd64.whl", hash = "sha256:2efb617667950814fc8bb9437e5893861b3616e214285be33cbc364a3f42c599", size = 4358490, upload-time = "2025-10-15T15:51:43.884Z" },
 ]
 
 [[package]]
@@ -5803,7 +6147,8 @@ dependencies = [
     { name = "docstring-parser" },
     { name = "filelock" },
     { name = "fsspec" },
-    { name = "importlib-metadata" },
+    { name = "importlib-metadata", version = "8.6.1", source = { registry = "https://pypi.org/simple" }, marker = "extra == 'extra-13-megatron-core-dev'" },
+    { name = "importlib-metadata", version = "8.7.0", source = { registry = "https://pypi.org/simple" }, marker = "extra == 'extra-13-megatron-core-lts' or extra != 'extra-13-megatron-core-dev'" },
     { name = "pyre-extensions" },
     { name = "pyyaml" },
     { name = "tabulate" },
@@ -5819,7 +6164,7 @@ name = "tqdm"
 version = "4.67.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "colorama", marker = "sys_platform == 'win32'" },
+    { name = "colorama", marker = "sys_platform == 'win32' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/a8/4b/29b4ef32e036bb34e4ab51796dd745cdba7ed47ad142a9f4a1eb8e0c744d/tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2", size = 169737, upload-time = "2024-11-24T20:12:22.481Z" }
 wheels = [
@@ -5832,7 +6177,7 @@ version = "2.9.0+70f53666"
 source = { git = "https://github.com/NVIDIA/TransformerEngine.git?rev=release_v2.9#70f536662ae10a62a54f4ed1ba92e3314c5cfd69" }
 dependencies = [
     { name = "einops" },
-    { name = "importlib-metadata" },
+    { name = "importlib-metadata", version = "8.6.1", source = { registry = "https://pypi.org/simple" } },
     { name = "onnx" },
     { name = "onnxscript", version = "0.5.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.13' and extra == 'extra-13-megatron-core-dev') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "onnxscript", version = "0.5.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.13' and extra == 'extra-13-megatron-core-dev') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
@@ -5864,23 +6209,23 @@ wheels = [
 
 [[package]]
 name = "triton"
-version = "3.5.1"
+version = "3.5.0"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/d9/2e/f95e673222afa2c7f0c687d8913e98fcf2589ef0b1405de76894e37fe18f/triton-3.5.1-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f63e34dcb32d7bd3a1d0195f60f30d2aee8b08a69a0424189b71017e23dfc3d2", size = 159821655, upload-time = "2025-11-11T17:51:44.09Z" },
-    { url = "https://files.pythonhosted.org/packages/fd/6e/676ab5019b4dde8b9b7bab71245102fc02778ef3df48218b298686b9ffd6/triton-3.5.1-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5fc53d849f879911ea13f4a877243afc513187bc7ee92d1f2c0f1ba3169e3c94", size = 170320692, upload-time = "2025-11-11T17:40:46.074Z" },
-    { url = "https://files.pythonhosted.org/packages/dc/dc/6ce44d055f2fc2403c4ec6b3cfd3a9b25f57b7d95efadccdea91497f8e81/triton-3.5.1-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:da47169e30a779bade679ce78df4810fca6d78a955843d2ddb11f226adc517dc", size = 159928005, upload-time = "2025-11-11T17:51:50.008Z" },
-    { url = "https://files.pythonhosted.org/packages/b0/72/ec90c3519eaf168f22cb1757ad412f3a2add4782ad3a92861c9ad135d886/triton-3.5.1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:61413522a48add32302353fdbaaf92daaaab06f6b5e3229940d21b5207f47579", size = 170425802, upload-time = "2025-11-11T17:40:53.209Z" },
-    { url = "https://files.pythonhosted.org/packages/db/53/2bcc46879910991f09c063eea07627baef2bc62fe725302ba8f46a2c1ae5/triton-3.5.1-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:275a045b6ed670dd1bd005c3e6c2d61846c74c66f4512d6f33cc027b11de8fd4", size = 159940689, upload-time = "2025-11-11T17:51:55.938Z" },
-    { url = "https://files.pythonhosted.org/packages/f2/50/9a8358d3ef58162c0a415d173cfb45b67de60176e1024f71fbc4d24c0b6d/triton-3.5.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d2c6b915a03888ab931a9fd3e55ba36785e1fe70cbea0b40c6ef93b20fc85232", size = 170470207, upload-time = "2025-11-11T17:41:00.253Z" },
-    { url = "https://files.pythonhosted.org/packages/f1/ba/805684a992ee32d486b7948d36aed2f5e3c643fc63883bf8bdca1c3f3980/triton-3.5.1-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:56765ffe12c554cd560698398b8a268db1f616c120007bfd8829d27139abd24a", size = 159955460, upload-time = "2025-11-11T17:52:01.861Z" },
-    { url = "https://files.pythonhosted.org/packages/27/46/8c3bbb5b0a19313f50edcaa363b599e5a1a5ac9683ead82b9b80fe497c8d/triton-3.5.1-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f3f4346b6ebbd4fad18773f5ba839114f4826037c9f2f34e0148894cd5dd3dba", size = 170470410, upload-time = "2025-11-11T17:41:06.319Z" },
-    { url = "https://files.pythonhosted.org/packages/84/1e/7df59baef41931e21159371c481c31a517ff4c2517343b62503d0cd2be99/triton-3.5.1-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:02c770856f5e407d24d28ddc66e33cf026e6f4d360dcb8b2fabe6ea1fc758621", size = 160072799, upload-time = "2025-11-11T17:52:07.293Z" },
-    { url = "https://files.pythonhosted.org/packages/37/92/e97fcc6b2c27cdb87ce5ee063d77f8f26f19f06916aa680464c8104ef0f6/triton-3.5.1-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0b4d2c70127fca6a23e247f9348b8adde979d2e7a20391bfbabaac6aebc7e6a8", size = 170579924, upload-time = "2025-11-11T17:41:12.455Z" },
-    { url = "https://files.pythonhosted.org/packages/14/f9/0430e879c1e63a1016cb843261528fd3187c872c3a9539132efc39514753/triton-3.5.1-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f617aa7925f9ea9968ec2e1adaf93e87864ff51549c8f04ce658f29bbdb71e2d", size = 159956163, upload-time = "2025-11-11T17:52:12.999Z" },
-    { url = "https://files.pythonhosted.org/packages/a4/e6/c595c35e5c50c4bc56a7bac96493dad321e9e29b953b526bbbe20f9911d0/triton-3.5.1-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d0637b1efb1db599a8e9dc960d53ab6e4637db7d4ab6630a0974705d77b14b60", size = 170480488, upload-time = "2025-11-11T17:41:18.222Z" },
-    { url = "https://files.pythonhosted.org/packages/41/1e/63d367c576c75919e268e4fbc33c1cb33b6dc12bb85e8bfe531c2a8bd5d3/triton-3.5.1-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8932391d7f93698dfe5bc9bead77c47a24f97329e9f20c10786bb230a9083f56", size = 160073620, upload-time = "2025-11-11T17:52:18.403Z" },
-    { url = "https://files.pythonhosted.org/packages/16/b5/b0d3d8b901b6a04ca38df5e24c27e53afb15b93624d7fd7d658c7cd9352a/triton-3.5.1-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bac7f7d959ad0f48c0e97d6643a1cc0fd5786fe61cb1f83b537c6b2d54776478", size = 170582192, upload-time = "2025-11-11T17:41:23.963Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/22/507b6f58a35e05e84381630b2dc2a3cee1a7a2a7eaf4cba857c638a18a24/triton-3.5.0-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6f90de6a6566bb619b4c0adc9855729e1b1b5e26533fca1bf6206e96b6d277a3", size = 159827599, upload-time = "2025-10-15T19:15:43.87Z" },
+    { url = "https://files.pythonhosted.org/packages/0b/eb/09e31d107a5d00eb281aa7e6635ca463e9bca86515944e399480eadb71f8/triton-3.5.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d5d3b3d480debf24eaa739623c9a42446b0b77f95593d30eb1f64cd2278cc1f0", size = 170333110, upload-time = "2025-10-13T16:37:49.588Z" },
+    { url = "https://files.pythonhosted.org/packages/79/f9/b6f60f978397c616fd8dacca2305759fe4f80d397b20ef72534803244bd5/triton-3.5.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8457b22148defefdcb7fa8144b05ce211b9faefad650a1ce85b23df488d5549c", size = 159926731, upload-time = "2025-10-15T19:15:49.682Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/78/949a04391c21956c816523678f0e5fa308eb5b1e7622d88c4e4ef5fceca0/triton-3.5.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f34bfa21c5b3a203c0f0eab28dcc1e49bd1f67d22724e77fb6665a659200a4ec", size = 170433488, upload-time = "2025-10-13T16:37:57.132Z" },
+    { url = "https://files.pythonhosted.org/packages/87/9b/30988039e1e84df7554fba24e6a734d2d0e847af33cabdf9b532b3c51456/triton-3.5.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7da21fccceafc163e3a5e857abe34351ef76345af06cabf9637a914742671f0b", size = 159946647, upload-time = "2025-10-15T19:15:56.325Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/3a/e991574f3102147b642e49637e0281e9bb7c4ba254edb2bab78247c85e01/triton-3.5.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c9e71db82261c4ffa3921cd050cd5faa18322d2d405c30eb56084afaff3b0833", size = 170476535, upload-time = "2025-10-13T16:38:05.18Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/85/e37f1197acb04c8f3d83851d23d5d6ed5060ef74580668b112e23fdfa203/triton-3.5.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:188da5b81fa2f8322c27fec1627703eac24cb9bb7ab0dfbe9925973bc1b070d3", size = 159958970, upload-time = "2025-10-15T19:16:01.717Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/29/10728de8a6e932e517c10773486b8e99f85d1b1d9dd87d9a9616e1fef4a1/triton-3.5.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e6bb9aa5519c084a333acdba443789e50012a4b851cd486c54f0b8dc2a8d3a12", size = 170487289, upload-time = "2025-10-13T16:38:11.662Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/1d/38258f05010ac17a7b058c022911c9cae6526e149b7397134a048cf5a6c2/triton-3.5.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:03127d9b33aaf979c856676b394bc059ec1d68cb6da68ae03f62dd8ad77a04ae", size = 160073012, upload-time = "2025-10-15T19:16:07.477Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/38/db80e48b9220c9bce872b0f616ad0446cdf554a40b85c7865cbca99ab3c2/triton-3.5.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c83f2343e1a220a716c7b3ab9fccfcbe3ad4020d189549200e2d2e8d5868bed9", size = 170577179, upload-time = "2025-10-13T16:38:17.865Z" },
+    { url = "https://files.pythonhosted.org/packages/91/fe/8f5771d00227f4eb1ee034f218ed427102b989366d2275fe3b3c105a3921/triton-3.5.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:468936651d383f4a6d10068d34a627505e13af55be5d002b9f27b987e7a5f0ac", size = 159957460, upload-time = "2025-10-15T19:16:12.626Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/60/1810655d1d856c9a4fcc90ee8966d85f552d98c53a6589f95ab2cbe27bb8/triton-3.5.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:da0fa67ccd76c3dcfb0bffe1b1c57c685136a6bd33d141c24d9655d4185b1289", size = 170487949, upload-time = "2025-10-13T16:38:24.881Z" },
+    { url = "https://files.pythonhosted.org/packages/78/59/99edd103958fe6e42b50b9ad8ce4f223ddf4ccf475259cf7d2b53381dc6c/triton-3.5.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c7ceef21410229ac23173a28eee5cfc0e37c1dfdb8b4bc11ecda2e3ecec7c686", size = 160075629, upload-time = "2025-10-15T19:16:18.746Z" },
+    { url = "https://files.pythonhosted.org/packages/fb/b7/1dec8433ac604c061173d0589d99217fe7bf90a70bdc375e745d044b8aad/triton-3.5.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:317fe477ea8fd4524a6a8c499fb0a36984a56d0b75bf9c9cb6133a1c56d5a6e7", size = 170580176, upload-time = "2025-10-13T16:38:31.14Z" },
 ]
 
 [[package]]
@@ -5975,7 +6320,7 @@ wheels = [
 
 [[package]]
 name = "wandb"
-version = "0.23.0"
+version = "0.22.3"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "click" },
@@ -5989,17 +6334,17 @@ dependencies = [
     { name = "sentry-sdk" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/ef/8b/db2d44395c967cd452517311fd6ede5d1e07310769f448358d4874248512/wandb-0.23.0.tar.gz", hash = "sha256:e5f98c61a8acc3ee84583ca78057f64344162ce026b9f71cb06eea44aec27c93", size = 44413921, upload-time = "2025-11-11T21:06:30.737Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/c1/d1/6b70f365ed86bd69debba8ad55dec8606fc21006e7ca703a5a091bd3b719/wandb-0.22.3.tar.gz", hash = "sha256:04468a8ab2769a46f5e384c9c4ada5da0dced005ca689a8424e4b8b5cb2a0291", size = 44337368, upload-time = "2025-10-28T23:59:10.275Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/41/61/a3220c7fa4cadfb2b2a5c09e3fa401787326584ade86d7c1f58bf1cd43bd/wandb-0.23.0-py3-none-macosx_12_0_arm64.whl", hash = "sha256:b682ec5e38fc97bd2e868ac7615a0ab4fc6a15220ee1159e87270a5ebb7a816d", size = 18992250, upload-time = "2025-11-11T21:06:03.412Z" },
-    { url = "https://files.pythonhosted.org/packages/90/16/e69333cf3d11e7847f424afc6c8ae325e1f6061b2e5118d7a17f41b6525d/wandb-0.23.0-py3-none-macosx_12_0_x86_64.whl", hash = "sha256:ec094eb71b778e77db8c188da19e52c4f96cb9d5b4421d7dc05028afc66fd7e7", size = 20045616, upload-time = "2025-11-11T21:06:07.109Z" },
-    { url = "https://files.pythonhosted.org/packages/62/79/42dc6c7bb0b425775fe77f1a3f1a22d75d392841a06b43e150a3a7f2553a/wandb-0.23.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e43f1f04b98c34f407dcd2744cec0a590abce39bed14a61358287f817514a7b", size = 18758848, upload-time = "2025-11-11T21:06:09.832Z" },
-    { url = "https://files.pythonhosted.org/packages/b8/94/d6ddb78334996ccfc1179444bfcfc0f37ffd07ee79bb98940466da6f68f8/wandb-0.23.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6e5847f98cbb3175caf5291932374410141f5bb3b7c25f9c5e562c1988ce0bf5", size = 20231493, upload-time = "2025-11-11T21:06:12.323Z" },
-    { url = "https://files.pythonhosted.org/packages/52/4d/0ad6df0e750c19dabd24d2cecad0938964f69a072f05fbdab7281bec2b64/wandb-0.23.0-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:6151355fd922539926e870be811474238c9614b96541773b990f1ce53368aef6", size = 18793473, upload-time = "2025-11-11T21:06:14.967Z" },
-    { url = "https://files.pythonhosted.org/packages/f8/da/c2ba49c5573dff93dafc0acce691bb1c3d57361bf834b2f2c58e6193439b/wandb-0.23.0-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:df62e426e448ebc44269140deb7240df474e743b12d4b1f53b753afde4aa06d4", size = 20332882, upload-time = "2025-11-11T21:06:17.865Z" },
-    { url = "https://files.pythonhosted.org/packages/40/65/21bfb10ee5cd93fbcaf794958863c7e05bac4bbeb1cc1b652094aa3743a5/wandb-0.23.0-py3-none-win32.whl", hash = "sha256:6c21d3eadda17aef7df6febdffdddfb0b4835c7754435fc4fe27631724269f5c", size = 19433198, upload-time = "2025-11-11T21:06:21.913Z" },
-    { url = "https://files.pythonhosted.org/packages/f1/33/cbe79e66c171204e32cf940c7fdfb8b5f7d2af7a00f301c632f3a38aa84b/wandb-0.23.0-py3-none-win_amd64.whl", hash = "sha256:b50635fa0e16e528bde25715bf446e9153368428634ca7a5dbd7a22c8ae4e915", size = 19433201, upload-time = "2025-11-11T21:06:24.607Z" },
-    { url = "https://files.pythonhosted.org/packages/1c/a0/5ecfae12d78ea036a746c071e4c13b54b28d641efbba61d2947c73b3e6f9/wandb-0.23.0-py3-none-win_arm64.whl", hash = "sha256:fa0181b02ce4d1993588f4a728d8b73ae487eb3cb341e6ce01c156be7a98ec72", size = 17678649, upload-time = "2025-11-11T21:06:27.289Z" },
+    { url = "https://files.pythonhosted.org/packages/23/02/87fb60f587ec249f784a40bd91c30de1b2b24d691ee72675d5b66c3d0728/wandb-0.22.3-py3-none-macosx_12_0_arm64.whl", hash = "sha256:81b3b6e405f38342b0a080898b7d00c5b9375432f5ba358942a09e65cdcfe781", size = 18758047, upload-time = "2025-10-28T23:58:46.56Z" },
+    { url = "https://files.pythonhosted.org/packages/26/88/64081740ef2b2efc7fbcb2139a07a849e42bcb09ae0c56ae50c41bd0ad63/wandb-0.22.3-py3-none-macosx_12_0_x86_64.whl", hash = "sha256:d29c16817cca6401b4919069ec7570c781eacb67dc0b1ff2e0096a9a59581720", size = 19798011, upload-time = "2025-10-28T23:58:49.718Z" },
+    { url = "https://files.pythonhosted.org/packages/19/72/c4f922b33dbb84d1c81ee045ff8791dd14e26d79e1e9bbafff964b7043e2/wandb-0.22.3-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb955d73a4ba55df9adc61fafbabef5556784d33fc39c7b5c8165d2694ddeb3b", size = 18542713, upload-time = "2025-10-28T23:58:51.927Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/98/3ce5f6e2086d91b0c51b38ae7ff591109e7da2bb25fe1a12eec0cdbaa494/wandb-0.22.3-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:23f3ebe41a26506117a098fdfd2706ed0e50b37899bfbefe3a0628fcbd70c69d", size = 19984910, upload-time = "2025-10-28T23:58:54.641Z" },
+    { url = "https://files.pythonhosted.org/packages/5e/57/e68cb38427b60490d6ddf1b992e6c7f36be83be1079d291ce87a8d347f48/wandb-0.22.3-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:2973462bed5d4a653b1a97cf9fc350673bb200fb356a2f4eba34beae9b87e0aa", size = 18581776, upload-time = "2025-10-28T23:58:56.975Z" },
+    { url = "https://files.pythonhosted.org/packages/66/6d/543f907ce0c6b6da13628b23d19ca7282c559fd73eb47b04977b9a61d0c6/wandb-0.22.3-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:c5c2bd18f95c1639863c527da0a5818ac6b0e5194f9c691426b265908ddd8b2c", size = 20078800, upload-time = "2025-10-28T23:58:59.217Z" },
+    { url = "https://files.pythonhosted.org/packages/da/91/1decaf1a6ac2017481c782e0fad7f90bc9ae4057f3d76d478cb6527f3dd3/wandb-0.22.3-py3-none-win32.whl", hash = "sha256:09ca1edfe0fd6dc30447d368acddb825668e60ee705c98594a6bbfd30d34d47e", size = 19160297, upload-time = "2025-10-28T23:59:01.536Z" },
+    { url = "https://files.pythonhosted.org/packages/4c/ba/3b092634279994b0c79fe05220532822be09f3a353ae95c54e7142769db8/wandb-0.22.3-py3-none-win_amd64.whl", hash = "sha256:55403bf93872c9978433d101324f51e43e78c70c809bf6d06ca7b2760e39f497", size = 19160300, upload-time = "2025-10-28T23:59:04.06Z" },
+    { url = "https://files.pythonhosted.org/packages/7f/80/4662fce9eebcc8c71f5083e9152ccaf7d43d4ca9c446e1422f9aa784a51c/wandb-0.22.3-py3-none-win_arm64.whl", hash = "sha256:49f66b05882abfa53816cc8d01b3c2435a89c5a090176802fa6928b5979d34d9", size = 17461959, upload-time = "2025-10-28T23:59:07.059Z" },
 ]
 
 [[package]]
@@ -6230,6 +6575,22 @@ wheels = [
 name = "wrapt"
 version = "1.17.3"
 source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform == 'linux'",
+    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform == 'linux'",
+    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform == 'linux'",
+    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform == 'linux'",
+    "python_full_version == '3.12.*' and sys_platform == 'linux'",
+    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform != 'linux'",
+    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform != 'linux'",
+    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform != 'linux'",
+    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform != 'linux'",
+    "python_full_version == '3.12.*' and sys_platform != 'linux'",
+    "python_full_version == '3.11.*' and sys_platform == 'linux'",
+    "python_full_version == '3.11.*' and sys_platform != 'linux'",
+    "python_full_version < '3.11' and sys_platform == 'linux'",
+    "python_full_version < '3.11' and sys_platform != 'linux'",
+]
 sdist = { url = "https://files.pythonhosted.org/packages/95/8f/aeb76c5b46e273670962298c23e7ddde79916cb74db802131d49a85e4b7d/wrapt-1.17.3.tar.gz", hash = "sha256:f66eb08feaa410fe4eebd17f2a2c8e2e46d3476e9f8c783daa8e09e0faa666d0", size = 55547, upload-time = "2025-08-12T05:53:21.714Z" }
 wheels = [
     { url = "https://files.pythonhosted.org/packages/3f/23/bb82321b86411eb51e5a5db3fb8f8032fd30bd7c2d74bfe936136b2fa1d6/wrapt-1.17.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:88bbae4d40d5a46142e70d58bf664a89b6b4befaea7b2ecc14e03cedb8e06c04", size = 53482, upload-time = "2025-08-12T05:51:44.467Z" },
@@ -6295,6 +6656,131 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/1f/f6/a933bd70f98e9cf3e08167fc5cd7aaaca49147e48411c0bd5ae701bb2194/wrapt-1.17.3-py3-none-any.whl", hash = "sha256:7171ae35d2c33d326ac19dd8facb1e82e5fd04ef8c6c0e394d7af55a55051c22", size = 23591, upload-time = "2025-08-12T05:53:20.674Z" },
 ]
 
+[[package]]
+name = "wrapt"
+version = "2.0.0"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.12.*' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.12.*' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.11.*' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.11.*' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version < '3.11' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version < '3.11' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
+    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.12.*' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.12.*' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.12.*' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.12.*' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.11.*' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version == '3.11.*' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version < '3.11' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version < '3.11' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+]
+sdist = { url = "https://files.pythonhosted.org/packages/49/19/5e5bcd855d808892fe02d49219f97a50f64cd6d8313d75df3494ee97b1a3/wrapt-2.0.0.tar.gz", hash = "sha256:35a542cc7a962331d0279735c30995b024e852cf40481e384fd63caaa391cbb9", size = 81722, upload-time = "2025-10-19T23:47:54.07Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ee/db/ac9546e89b645e525686727f8749847485e3b45ffc4507b61c4669358638/wrapt-2.0.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a7cebcee61f21b1e46aa32db8d9d93826d0fbf1ad85defc2ccfb93b4adef1435", size = 77431, upload-time = "2025-10-19T23:45:25.177Z" },
+    { url = "https://files.pythonhosted.org/packages/74/bc/3b57c8012bbd0d02eec5ae838681c1a819df6c5e765ebc897f52623b5eb1/wrapt-2.0.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:827e6e3a3a560f6ec1f5ee92d4319c21a0549384f896ec692f3201eda31ebd11", size = 60644, upload-time = "2025-10-19T23:45:27.511Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/6e/b5e7d47713e3d46c30ec6ae83fafd369bc34de8148668c6e3168d9301863/wrapt-2.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1a91075a5383a7cbfe46aed1845ef7c3f027e8e20e7d9a8a75e36ebc9b0dd15e", size = 61526, upload-time = "2025-10-19T23:45:28.789Z" },
+    { url = "https://files.pythonhosted.org/packages/28/8d/d5df2af58ae479785473607a3b25726c295640cdcaee830847cee339eff9/wrapt-2.0.0-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:b6a18c813196e18146b8d041e20875bdb0cb09b94ac1d1e1146e0fa87b2deb0d", size = 113638, upload-time = "2025-10-19T23:45:31.977Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/b7/9501c45ab93b4d6ba396ef02fcfb55867866bc8579fff045bb54cae58423/wrapt-2.0.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ec5028d26011a53c76bd91bb6198b30b438c6e0f7adb45f2ad84fe2655b6a104", size = 115651, upload-time = "2025-10-19T23:45:33.257Z" },
+    { url = "https://files.pythonhosted.org/packages/5e/3a/bfebe2ba51cf98ae80c5dbb6fa5892ae75d1acf1a4c404eda88e28f5ab06/wrapt-2.0.0-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:bed9b04900204721a24bcefc652ca267b01c1e8ad8bc8c0cff81558a45a3aadc", size = 112060, upload-time = "2025-10-19T23:45:30.298Z" },
+    { url = "https://files.pythonhosted.org/packages/00/e7/cd50a32bed022d98f61a90e57faf782aa063f7930f57eb67eb105d3189be/wrapt-2.0.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:03442f2b45fa3f2b98a94a1917f52fb34670de8f96c0a009c02dbd512d855a3d", size = 114829, upload-time = "2025-10-19T23:45:34.23Z" },
+    { url = "https://files.pythonhosted.org/packages/9d/2c/c709578271df0c70a27ab8f797c44c258650f24a32b452f03d7afedc070d/wrapt-2.0.0-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:17d0b5c42495ba142a1cee52b76414f9210591c84aae94dffda70240753bfb3c", size = 111249, upload-time = "2025-10-19T23:45:35.554Z" },
+    { url = "https://files.pythonhosted.org/packages/60/ef/cb58f6eea41f129600bda68d1ae4c80b14d4e0663eec1d5220cbffe50be5/wrapt-2.0.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:ee44215e7d13e112a8fc74e12ed1a1f41cab2bc07b11cc703f2398cd114b261c", size = 113312, upload-time = "2025-10-19T23:45:36.66Z" },
+    { url = "https://files.pythonhosted.org/packages/59/55/97e6c4e1c175fb27f8dec717a3e36493ff0c4e50173a95f439496556910f/wrapt-2.0.0-cp310-cp310-win32.whl", hash = "sha256:fe6eafac3bc3c957ab6597a0c0654a0a308868458d00d218743e5b5fae51951c", size = 57961, upload-time = "2025-10-19T23:45:40.958Z" },
+    { url = "https://files.pythonhosted.org/packages/3b/0a/898b1d81ae1f3dd9a79fd2e0330a7c8dd793982f815a318548777cb21ee5/wrapt-2.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:9e070c3491397fba0445b8977900271eca9656570cca7c900d9b9352186703a0", size = 60311, upload-time = "2025-10-19T23:45:38.033Z" },
+    { url = "https://files.pythonhosted.org/packages/44/f1/e7e92f9535f5624ee22879f09456df9d1f1ae9bb338eef711077b48e456a/wrapt-2.0.0-cp310-cp310-win_arm64.whl", hash = "sha256:806e2e73186eb5e3546f39fb5d0405040e0088db0fc8b2f667fd1863de2b3c99", size = 58822, upload-time = "2025-10-19T23:45:39.785Z" },
+    { url = "https://files.pythonhosted.org/packages/12/8f/8e4c8b6da60b4205191d588cbac448fb9ff4f5ed89f4e555dc4813ab30cf/wrapt-2.0.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:b7e221abb6c5387819db9323dac3c875b459695057449634f1111955d753c621", size = 77433, upload-time = "2025-10-19T23:45:42.543Z" },
+    { url = "https://files.pythonhosted.org/packages/22/9a/01a29ccb029aa8e78241f8b53cb89ae8826c240129abbbb6ebba3416eff9/wrapt-2.0.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1147a84c8fc852426580af8b6e33138461ddbc65aa459a25ea539374d32069fa", size = 60641, upload-time = "2025-10-19T23:45:43.866Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/ec/e058997971428b7665b5c3665a55b18bb251ea7e08d002925e3ca017c020/wrapt-2.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5d6691d4a711504a0bc10de789842ad6ac627bed22937b10f37a1211a8ab7bb3", size = 61526, upload-time = "2025-10-19T23:45:44.839Z" },
+    { url = "https://files.pythonhosted.org/packages/70/c3/c82263503f554715aa1847e85dc75a69631a54e9d7ab0f1a55e34a22d44a/wrapt-2.0.0-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:f460e1eb8e75a17c3918c8e35ba57625721eef2439ef0bcf05304ac278a65e1d", size = 114069, upload-time = "2025-10-19T23:45:47.223Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/97/d95e88a3a1bc2890a1aa47880c2762cf0eb6d231b5a64048e351cec6f071/wrapt-2.0.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:12c37784b77bf043bf65cc96c7195a5db474b8e54173208af076bdbb61df7b3e", size = 116109, upload-time = "2025-10-19T23:45:48.252Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/36/cba0bf954f2303897b80fa5342499b43f8c5201110dddf0d578d6841b149/wrapt-2.0.0-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:75e5c049eb583835f7a0e0e311d9dde9bfbaac723a6dd89d052540f9b2809977", size = 112500, upload-time = "2025-10-19T23:45:45.838Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/2b/8cb88e63bec989f641d208acb3fd198bfdbbb4ef7dfb71f0cac3c90b07a9/wrapt-2.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:e50bcbd5b65dac21b82319fcf18486e6ac439947e9305034b00704eb7405f553", size = 115356, upload-time = "2025-10-19T23:45:49.249Z" },
+    { url = "https://files.pythonhosted.org/packages/bb/60/a6d5fb94648cd430648705bef9f4241bd22ead123ead552b6d2873ad5240/wrapt-2.0.0-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:06b78cb6b9320f57737a52fede882640d93cface98332d1a3df0c5696ec9ae9f", size = 111754, upload-time = "2025-10-19T23:45:51.21Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/44/1963854edf0592ae806307899dc7bf891e76cec19e598f55845c94603a65/wrapt-2.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:8c8349ebfc3cd98bc9105e0112dd8c8ac1f3c7cb5601f9d02248cae83a63f748", size = 113789, upload-time = "2025-10-19T23:45:52.473Z" },
+    { url = "https://files.pythonhosted.org/packages/62/ec/4b1d76cb6d96ac511aaaa92efc57f528e57f06082a595b8b2663fcdb0f20/wrapt-2.0.0-cp311-cp311-win32.whl", hash = "sha256:028f19ec29e204fe725139d4a8b09f77ecfb64f8f02b7ab5ee822c85e330b68b", size = 57954, upload-time = "2025-10-19T23:45:57.03Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/cf/df8ff9bd64d4a75f9a9f6c1c93480a51904d0c9bd71c11994301c47d8a33/wrapt-2.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:c6961f05e58d919153ba311b397b7b904b907132b7b8344dde47865d4bb5ec89", size = 60308, upload-time = "2025-10-19T23:45:54.314Z" },
+    { url = "https://files.pythonhosted.org/packages/69/d8/61e245fe387d58d84b3f913d5da9d909c4f239b887db692a05105aaf2a1b/wrapt-2.0.0-cp311-cp311-win_arm64.whl", hash = "sha256:be7e316c2accd5a31dbcc230de19e2a846a325f8967fdea72704d00e38e6af06", size = 58822, upload-time = "2025-10-19T23:45:55.772Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/28/7f266b5bf50c3ad0c99c524d99faa0f7d6eecb045d950e7d2c9e1f0e1338/wrapt-2.0.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:73c6f734aecb1a030d9a265c13a425897e1ea821b73249bb14471445467ca71c", size = 78078, upload-time = "2025-10-19T23:45:58.855Z" },
+    { url = "https://files.pythonhosted.org/packages/06/0c/bbdcad7eb535fae9d6b0fcfa3995c364797cd8e2b423bba5559ab2d88dcf/wrapt-2.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b4a7f8023b8ce8a36370154733c747f8d65c8697cb977d8b6efeb89291fff23e", size = 61158, upload-time = "2025-10-19T23:46:00.096Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/8a/bba3e7a4ebf4d1624103ee59d97b78a1fbb08fb5753ff5d1b69f5ef5e863/wrapt-2.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a1cb62f686c50e9dab5983c68f6c8e9cbf14a6007935e683662898a7d892fa69", size = 61646, upload-time = "2025-10-19T23:46:01.279Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/0c/0f565294897a72493dbafe7b46229b5f09f3776795a894d6b737e98387de/wrapt-2.0.0-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:43dc0550ae15e33e6bb45a82a5e1b5495be2587fbaa996244b509921810ee49f", size = 121442, upload-time = "2025-10-19T23:46:04.287Z" },
+    { url = "https://files.pythonhosted.org/packages/da/80/7f03501a8a078ad79b19b1a888f9192a9494e62ddf8985267902766a4f30/wrapt-2.0.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:39c5b45b056d630545e40674d1f5e1b51864b3546f25ab6a4a331943de96262e", size = 123018, upload-time = "2025-10-19T23:46:06.052Z" },
+    { url = "https://files.pythonhosted.org/packages/37/6b/ad0e1ff98359f13b4b0c2c52848e792841146fe79ac5f56899b9a028fc0d/wrapt-2.0.0-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:804e88f824b76240a1b670330637ccfd2d18b9efa3bb4f02eb20b2f64880b324", size = 117369, upload-time = "2025-10-19T23:46:02.53Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/6c/a90437bba8cb1ce2ed639af979515e09784678c2a7f4ffc79f2cf7de809e/wrapt-2.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:c2c476aa3fc2b9899c3f7b20963fac4f952e7edb74a31fc92f7745389a2e3618", size = 121453, upload-time = "2025-10-19T23:46:07.747Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/a9/b3982f9bd15bd45857a23c48b7c36e47d05db4a4dcc5061c31f169238845/wrapt-2.0.0-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:8d851e526891216f89fcb7a1820dad9bd503ba3468fb9635ee28e93c781aa98e", size = 116250, upload-time = "2025-10-19T23:46:09.385Z" },
+    { url = "https://files.pythonhosted.org/packages/73/e2/b7a8b1afac9f791d8f5eac0d9726559f1d7ec4a2b5a6b4e67ac145b007a5/wrapt-2.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b95733c2360c4a8656ee93c7af78e84c0bd617da04a236d7a456c8faa34e7a2d", size = 120575, upload-time = "2025-10-19T23:46:11.882Z" },
+    { url = "https://files.pythonhosted.org/packages/a2/0f/37920eeea96094f450ae35505d39f1135df951a2cdee0d4e01d4f843396a/wrapt-2.0.0-cp312-cp312-win32.whl", hash = "sha256:ea56817176834edf143df1109ae8fdaa087be82fdad3492648de0baa8ae82bf2", size = 58175, upload-time = "2025-10-19T23:46:15.678Z" },
+    { url = "https://files.pythonhosted.org/packages/f0/db/b395f3b0c7f2c60d9219afacc54ceb699801ccf2d3d969ba556dc6d3af20/wrapt-2.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:3c7d3bee7be7a2665286103f4d1f15405c8074e6e1f89dac5774f9357c9a3809", size = 60415, upload-time = "2025-10-19T23:46:12.913Z" },
+    { url = "https://files.pythonhosted.org/packages/86/22/33d660214548af47fc59d9eec8c0e0693bcedc5b3a0b52e8cbdd61f3b646/wrapt-2.0.0-cp312-cp312-win_arm64.whl", hash = "sha256:680f707e1d26acbc60926659799b15659f077df5897a6791c7c598a5d4a211c4", size = 58911, upload-time = "2025-10-19T23:46:13.889Z" },
+    { url = "https://files.pythonhosted.org/packages/18/0a/dd88abfe756b1aa79f0777e5ee4ce9e4b5dc4999bd805e9b04b52efc7b18/wrapt-2.0.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:e2ea096db28d5eb64d381af0e93464621ace38a7003a364b6b5ffb7dd713aabe", size = 78083, upload-time = "2025-10-19T23:46:16.937Z" },
+    { url = "https://files.pythonhosted.org/packages/7f/b9/8afebc1655a863bb2178b23c2d699b8743f3a7dab466904adc6155f3c858/wrapt-2.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:c92b5a82d28491e3f14f037e1aae99a27a5e6e0bb161e65f52c0445a3fa7c940", size = 61156, upload-time = "2025-10-19T23:46:17.927Z" },
+    { url = "https://files.pythonhosted.org/packages/bb/8b/f710a6528ccc52e21943f42c8cf64814cde90f9adbd3bcd58c7c274b4f75/wrapt-2.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:81d234718aabe632d179fac52c7f69f0f99fbaac4d4bcd670e62462bbcbfcad7", size = 61641, upload-time = "2025-10-19T23:46:19.229Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/5f/e4eabd0cc6684c5b208c2abc5c3459449c4d15be1694a9bbcf51e0e135fd/wrapt-2.0.0-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:db2eea83c43f84e4e41dbbb4c1de371a53166e55f900a6b130c3ef51c6345c1a", size = 121454, upload-time = "2025-10-19T23:46:21.808Z" },
+    { url = "https://files.pythonhosted.org/packages/6f/c4/ec31ee17cc7866960d323609ba7402be786d211a6d713a59f776c4270bb3/wrapt-2.0.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:65f50e356c425c061e1e17fe687ff30e294fed9bf3441dc1f13ef73859c2a817", size = 123063, upload-time = "2025-10-19T23:46:23.545Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/2b/a4b10c3c0022e40aeae9bec009bafb049f440493f0575ebb27ecf61c32f8/wrapt-2.0.0-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:887f2a667e3cbfb19e204032d42ad7dedaa43972e4861dc7a3d51ae951d9b578", size = 117401, upload-time = "2025-10-19T23:46:20.433Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/4a/ade23a76967e1f148e461076a4d0e24a7950a5f18b394c9107fe60224ae2/wrapt-2.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:9054829da4be461e3ad3192e4b6bbf1fc18af64c9975ce613aec191924e004dc", size = 121485, upload-time = "2025-10-19T23:46:24.85Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/ba/33b5f3e2edede4e1cfd259f0d9c203cf370f259bb9b215dd58fc6cbb94e9/wrapt-2.0.0-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:b952ffd77133a5a2798ee3feb18e51b0a299d2f440961e5bb7737dbb02e57289", size = 116276, upload-time = "2025-10-19T23:46:27.006Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/bf/b7f95bb4529a35ca11eb95d48f9d1a563b495471f7cf404c644566fb4293/wrapt-2.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e25fde03c480061b8234d8ee4863eb5f40a9be4fb258ce105b364de38fc6bcf9", size = 120578, upload-time = "2025-10-19T23:46:28.679Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/71/984849df6f052592474a44aafd6b847e1cffad39b0debc5390a04aa46331/wrapt-2.0.0-cp313-cp313-win32.whl", hash = "sha256:49e982b7860d325094978292a49e0418833fc7fc42c0dc7cd0b7524d7d06ee74", size = 58178, upload-time = "2025-10-19T23:46:32.372Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/3b/4e1fc0f2e1355fbc55ab248311bf4c958dbbd96bd9183b9e96882cc16213/wrapt-2.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:6e5c86389d9964050ce50babe247d172a5e3911d59a64023b90db2b4fa00ae7c", size = 60423, upload-time = "2025-10-19T23:46:30.041Z" },
+    { url = "https://files.pythonhosted.org/packages/20/0a/9384e0551f56fe361f41bb8f209a13bb9ef689c3a18264225b249849b12c/wrapt-2.0.0-cp313-cp313-win_arm64.whl", hash = "sha256:b96fdaa4611e05c7231937930567d3c16782be9dbcf03eb9f60d83e57dd2f129", size = 58918, upload-time = "2025-10-19T23:46:31.056Z" },
+    { url = "https://files.pythonhosted.org/packages/68/70/37b90d3ee5bf0d0dc4859306383da08b685c9a51abff6fd6b0a7c052e117/wrapt-2.0.0-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:f2c7b7fead096dbf1dcc455b7f59facb05de3f5bfb04f60a69f98cdfe6049e5f", size = 81980, upload-time = "2025-10-19T23:46:33.368Z" },
+    { url = "https://files.pythonhosted.org/packages/95/23/0ce69cc90806b90b3ee4cfd9ad8d2ee9becc3a1aab7df3c3bfc7d0904cb6/wrapt-2.0.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:04c7c8393f25b11c0faa5d907dd9eb462e87e4e7ba55e308a046d7ed37f4bbe2", size = 62900, upload-time = "2025-10-19T23:46:34.415Z" },
+    { url = "https://files.pythonhosted.org/packages/54/76/03ec08170c02f38f3be3646977920976b968e0b704a0693a98f95d02f4d2/wrapt-2.0.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:a93e0f8b376c0735b2f4daf58018b4823614d2b896cb72b6641c4d3dbdca1d75", size = 63636, upload-time = "2025-10-19T23:46:35.643Z" },
+    { url = "https://files.pythonhosted.org/packages/75/c1/04ce0511e504cdcd84cdb6980bc7d4efa38ac358e8103d6dd0cd278bfc6d/wrapt-2.0.0-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:b42d13603da4416c43c430dbc6313c8d7ff745c40942f146ed4f6dd02c7d2547", size = 152650, upload-time = "2025-10-19T23:46:38.717Z" },
+    { url = "https://files.pythonhosted.org/packages/17/06/cd2e32b5f744701189c954f9ab5eee449c86695b13f414bb8ea7a83f6d48/wrapt-2.0.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c8bbd2472abf8c33480ad2314b1f8fac45d592aba6cc093e8839a7b2045660e6", size = 158811, upload-time = "2025-10-19T23:46:40.875Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/a2/a6d920695cca62563c1b969064e5cd2051344a6e330c184b6f80383d87e4/wrapt-2.0.0-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e64a3a1fd9a308ab9b815a2ad7a65b679730629dbf85f8fc3f7f970d634ee5df", size = 146033, upload-time = "2025-10-19T23:46:37.351Z" },
+    { url = "https://files.pythonhosted.org/packages/c6/90/7fd2abe4ec646bc43cb6b0d05086be6fcf15e64f06f51fc4198804396d68/wrapt-2.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:d61214525eaf88e0d0edf3d1ad5b5889863c6f88e588c6cdc6aa4ee5d1f10a4a", size = 155673, upload-time = "2025-10-19T23:46:42.582Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/8d/6cce7f8c41633e677ac8aa34e84b53a22a645ec2a680deb991785ca2798d/wrapt-2.0.0-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:04f7a5f92c5f7324a1735043cc467b1295a1c5b4e0c1395472b7c44706e3dc61", size = 144364, upload-time = "2025-10-19T23:46:44.381Z" },
+    { url = "https://files.pythonhosted.org/packages/72/42/9570349e03afa9d83daf7f33ffb17e8cdc62d7e84c0d09005d0f51912efa/wrapt-2.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:2356f76cb99b3de5b4e5b8210367fbbb81c7309fe39b622f5d199dd88eb7f765", size = 150275, upload-time = "2025-10-19T23:46:45.662Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/d8/448728e6fe030e5c4f1022c82cd3af1de1c672fa53d2d5b36b32a55ce7bf/wrapt-2.0.0-cp313-cp313t-win32.whl", hash = "sha256:0a921b657a224e40e4bc161b5d33934583b34f0c9c5bdda4e6ac66f9d2fcb849", size = 59867, upload-time = "2025-10-19T23:46:49.593Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/b1/ad812b1fe1cd85f6498dc3a3c9809a1e880d6108283b1735119bec217041/wrapt-2.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:c16f6d4eea98080f6659a8a7fc559d4a0a337ee66960659265cad2c8a40f7c0f", size = 63170, upload-time = "2025-10-19T23:46:46.87Z" },
+    { url = "https://files.pythonhosted.org/packages/7f/29/c105b1e76650c82823c491952a7a8eafe09b78944f7a43f22d37ed860229/wrapt-2.0.0-cp313-cp313t-win_arm64.whl", hash = "sha256:52878edc13dc151c58a9966621d67163a80654bc6cff4b2e1c79fa62d0352b26", size = 60339, upload-time = "2025-10-19T23:46:47.862Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/38/0dd39f83163fd28326afba84e3e416656938df07e60a924ac4d992b30220/wrapt-2.0.0-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:79a53d86c2aff7b32cc77267e3a308365d1fcb881e74bc9cbe26f63ee90e37f0", size = 78242, upload-time = "2025-10-19T23:46:51.096Z" },
+    { url = "https://files.pythonhosted.org/packages/08/ef/fa7a5c1d73f8690c712f9d2e4615700c6809942536dd3f441b9ba650a310/wrapt-2.0.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:d731a4f22ed6ffa4cb551b4d2b0c24ff940c27a88edaf8e3490a5ee3a05aef71", size = 61207, upload-time = "2025-10-19T23:46:52.558Z" },
+    { url = "https://files.pythonhosted.org/packages/23/d9/67cb93da492eb0a1cb17b7ed18220d059e58f00467ce6728b674d3441b3d/wrapt-2.0.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:3e02ab8c0ac766a5a6e81cd3b6cc39200c69051826243182175555872522bd5a", size = 61748, upload-time = "2025-10-19T23:46:54.468Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/be/912bbd70cc614f491b526a1d7fe85695b283deed19287b9f32460178c54d/wrapt-2.0.0-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:895870602d65d7338edb3b6a717d856632ad9f14f7ff566214e4fb11f0816649", size = 120424, upload-time = "2025-10-19T23:46:57.575Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/e1/10df8937e7da2aa9bc3662a4b623e51a323c68f42cad7b13f0e61a700ce2/wrapt-2.0.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0b9ad4fab76a0086dc364c4f17f39ad289600e73ef5c6e9ab529aff22cac1ac3", size = 122804, upload-time = "2025-10-19T23:46:59.308Z" },
+    { url = "https://files.pythonhosted.org/packages/f3/60/576751b1919adab9f63168e3b5fd46c0d1565871b1cc4c2569503ccf4be6/wrapt-2.0.0-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e7ca0562606d7bad2736b2c18f61295d61f50cd3f4bfc51753df13614dbcce1b", size = 117398, upload-time = "2025-10-19T23:46:55.814Z" },
+    { url = "https://files.pythonhosted.org/packages/ec/55/243411f360cc27bae5f8e21c16f1a8d87674c5534f4558e8a97c1e0d1c6f/wrapt-2.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:fe089d9f5a4a3dea0108a8ae34bced114d0c4cca417bada1c5e8f42d98af9050", size = 121230, upload-time = "2025-10-19T23:47:01.347Z" },
+    { url = "https://files.pythonhosted.org/packages/d6/23/2f21f692c3b3f0857cb82708ce0c341fbac55a489d4025ae4e3fd5d5de8c/wrapt-2.0.0-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:e761f2d2f8dbc80384af3d547b522a80e67db3e319c7b02e7fd97aded0a8a678", size = 116296, upload-time = "2025-10-19T23:47:02.659Z" },
+    { url = "https://files.pythonhosted.org/packages/bd/ed/678957fad212cfb1b65b2359d62f5619f5087d1d1cf296c6a996be45171c/wrapt-2.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:17ba1bdc52d0c783481850996aa26cea5237720769197335abea2ae6b4c23bc0", size = 119602, upload-time = "2025-10-19T23:47:03.775Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/e3/aeb4c3b052d3eed95e61babc20dcb1a512651e098cca4b84a6896585c06a/wrapt-2.0.0-cp314-cp314-win32.whl", hash = "sha256:f73318741b141223a4674ba96992aa2291b1b3f7a5e85cb3c2c964f86171eb45", size = 58649, upload-time = "2025-10-19T23:47:07.382Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/2a/a71c51cb211798405b59172c7df5789a5b934b18317223cf22e0c6f852de/wrapt-2.0.0-cp314-cp314-win_amd64.whl", hash = "sha256:8e08d4edb13cafe7b3260f31d4de033f73d3205774540cf583bffaa4bec97db9", size = 60897, upload-time = "2025-10-19T23:47:04.862Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/a5/acc5628035d06f69e9144cca543ca54c33b42a5a23b6f1e8fa131026db89/wrapt-2.0.0-cp314-cp314-win_arm64.whl", hash = "sha256:af01695c2b7bbd8d67b869d8e3de2b123a7bfbee0185bdd138c2775f75373b83", size = 59306, upload-time = "2025-10-19T23:47:05.883Z" },
+    { url = "https://files.pythonhosted.org/packages/a7/e6/1318ca07d7fcee57e4592a78dacd9d5493b8ddd971c553a62904fb2c0cf2/wrapt-2.0.0-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:057f02c13cce7b26c79624c06a3e1c2353e6dc9708525232232f6768118042ca", size = 81987, upload-time = "2025-10-19T23:47:08.7Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/bf/ffac358ddf61c3923d94a8b0e7620f2af1cd1b637a0fe4963a3919aa62b7/wrapt-2.0.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:79bdd84570267f3f43d609c892ae2d30b91ee4b8614c2cbfd311a2965f1c9bdb", size = 62902, upload-time = "2025-10-19T23:47:10.248Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/af/387c51f9e7b544fe95d852fc94f9f3866e3f7d7d39c2ee65041752f90bc2/wrapt-2.0.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:93c8b4f4d54fd401a817abbfc9bf482aa72fd447f8adf19ce81d035b3f5c762c", size = 63635, upload-time = "2025-10-19T23:47:11.746Z" },
+    { url = "https://files.pythonhosted.org/packages/7c/99/d38d8c80b9cc352531d4d539a17e3674169a5cc25a7e6e5e3c27bc29893e/wrapt-2.0.0-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:5e09ffd31001dce71c2c2a4fc201bdba9a2f9f62b23700cf24af42266e784741", size = 152659, upload-time = "2025-10-19T23:47:15.344Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/2a/e154432f274e22ecf2465583386c5ceffa5e0bab3947c1c5b26cc8e7b275/wrapt-2.0.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d87c285ff04e26083c4b03546e7b74df7ba4f1f32f1dcb92e9ac13c2dbb4c379", size = 158818, upload-time = "2025-10-19T23:47:17.569Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/7a/3a40c453300e2898e99c27495b8109ff7cd526997d12cfb8ebd1843199a4/wrapt-2.0.0-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e52e50ea0a72ea48d1291cf8b8aaedcc99072d9dc5baba6b820486dcf4c67da8", size = 146113, upload-time = "2025-10-19T23:47:13.026Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/e2/3116a9eade8bea2bf5eedba3fa420e3c7d193d4b047440330d8eaf1098de/wrapt-2.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:1fd4c95536975895f32571073446e614d5e2810b666b64955586dcddfd438fd3", size = 155689, upload-time = "2025-10-19T23:47:19.397Z" },
+    { url = "https://files.pythonhosted.org/packages/43/1c/277d3fbe9d177830ab9e54fe9253f38455b75a22d639a4bd9fa092d55ae5/wrapt-2.0.0-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:d6ebfe9283209220ed9de80a3e9442aab8fc2be5a9bbf8491b99e02ca9349a89", size = 144403, upload-time = "2025-10-19T23:47:20.779Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/37/ab6ddaf182248aac5ed925725ef4c69a510594764665ecbd95bdd4481f16/wrapt-2.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5d3ebd784804f146b7ea55359beb138e23cc18e5a5cc2cf26ad438723c00ce3a", size = 150307, upload-time = "2025-10-19T23:47:22.604Z" },
+    { url = "https://files.pythonhosted.org/packages/f6/d7/df9e2d8040a3af618ff9496261cf90ca4f886fd226af0f4a69ac0c020c3b/wrapt-2.0.0-cp314-cp314t-win32.whl", hash = "sha256:9b15940ae9debc8b40b15dc57e1ce4433f7fb9d3f8761c7fab1ddd94cb999d99", size = 60557, upload-time = "2025-10-19T23:47:26.73Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/c2/502bd4557a3a9199ea73cc5932cf83354bd362682162f0b14164d2e90216/wrapt-2.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:7a0efbbc06d3e2077476a04f55859819d23206600b4c33f791359a8e6fa3c362", size = 63988, upload-time = "2025-10-19T23:47:23.826Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/f2/632b13942f45db7af709f346ff38b8992c8c21b004e61ab320b0dec525fe/wrapt-2.0.0-cp314-cp314t-win_arm64.whl", hash = "sha256:7fec8a9455c029c8cf4ff143a53b6e7c463268d42be6c17efa847ebd2f809965", size = 60584, upload-time = "2025-10-19T23:47:25.396Z" },
+    { url = "https://files.pythonhosted.org/packages/00/5c/c34575f96a0a038579683c7f10fca943c15c7946037d1d254ab9db1536ec/wrapt-2.0.0-py3-none-any.whl", hash = "sha256:02482fb0df89857e35427dfb844319417e14fae05878f295ee43fa3bf3b15502", size = 43998, upload-time = "2025-10-19T23:47:52.858Z" },
+]
+
 [[package]]
 name = "xattr"
 version = "1.3.0"
@@ -6592,6 +7078,55 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/73/ae/b48f95715333080afb75a4504487cbe142cae1268afc482d06692d605ae6/yarl-1.22.0-py3-none-any.whl", hash = "sha256:1380560bdba02b6b6c90de54133c81c9f2a453dee9912fe58c1dcced1edb7cff", size = 46814, upload-time = "2025-10-06T14:12:53.872Z" },
 ]
 
+[[package]]
+name = "zarr"
+version = "2.18.3"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version < '3.11' and sys_platform == 'linux'",
+    "python_full_version < '3.11' and sys_platform != 'linux'",
+]
+dependencies = [
+    { name = "asciitree", marker = "python_full_version < '3.11'" },
+    { name = "fasteners", marker = "python_full_version < '3.11' and sys_platform != 'emscripten'" },
+    { name = "numcodecs", version = "0.13.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
+    { name = "numpy", marker = "python_full_version < '3.11'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/23/c4/187a21ce7cf7c8f00c060dd0e04c2a81139bb7b1ab178bba83f2e1134ce2/zarr-2.18.3.tar.gz", hash = "sha256:2580d8cb6dd84621771a10d31c4d777dca8a27706a1a89b29f42d2d37e2df5ce", size = 3603224, upload-time = "2024-09-04T23:20:16.595Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ed/c9/142095e654c2b97133ff71df60979422717b29738b08bc8a1709a5d5e0d0/zarr-2.18.3-py3-none-any.whl", hash = "sha256:b1f7dfd2496f436745cdd4c7bcf8d3b4bc1dceef5fdd0d589c87130d842496dd", size = 210723, upload-time = "2024-09-04T23:20:14.491Z" },
+]
+
+[[package]]
+name = "zarr"
+version = "3.1.3"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform == 'linux'",
+    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform == 'linux'",
+    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform == 'linux'",
+    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform == 'linux'",
+    "python_full_version == '3.12.*' and sys_platform == 'linux'",
+    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform != 'linux'",
+    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform != 'linux'",
+    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform != 'linux'",
+    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform != 'linux'",
+    "python_full_version == '3.12.*' and sys_platform != 'linux'",
+    "python_full_version == '3.11.*' and sys_platform == 'linux'",
+    "python_full_version == '3.11.*' and sys_platform != 'linux'",
+]
+dependencies = [
+    { name = "donfig", marker = "python_full_version >= '3.11'" },
+    { name = "numcodecs", version = "0.16.3", source = { registry = "https://pypi.org/simple" }, extra = ["crc32c"], marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", marker = "python_full_version >= '3.11'" },
+    { name = "packaging", marker = "python_full_version >= '3.11'" },
+    { name = "typing-extensions", marker = "python_full_version >= '3.11'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/d6/67/14be68a7bad15eecda09b1e81fca2420f7533645fe187bf4d6104c1aad52/zarr-3.1.3.tar.gz", hash = "sha256:01342f3e26a02ed5670db608a5576fbdb8d76acb5c280bd2d0082454b1ba6f79", size = 349125, upload-time = "2025-09-18T19:32:41.688Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/1a/71/9de7229515a53d1cc5705ca9c411530f711a2242f962214d9dbfe2741aa4/zarr-3.1.3-py3-none-any.whl", hash = "sha256:45f67f87f65f14fa453f99dd8110a5936b7ac69f3a21981d33e90407c80c302a", size = 276427, upload-time = "2025-09-18T19:32:40.042Z" },
+]
+
 [[package]]
 name = "zipp"
 version = "3.23.0"

From d1a31a323c954adf48326d53adc845b868ab8936 Mon Sep 17 00:00:00 2001
From: Boxiang Wang <boxiangw@nvidia.com>
Date: Tue, 18 Nov 2025 12:46:48 +0800
Subject: [PATCH 133/334] [Dev] MuonClip support (non-split version) on dev
 branch (#2194)

Signed-off-by: Boxiang Wang <boxiangw@nvidia.com>
---
 .../core/extensions/transformer_engine.py     |  24 ++
 megatron/core/optimizer/qk_clip.py            |  39 +++
 megatron/core/transformer/attention.py        | 104 ++++++++
 .../transformer/multi_latent_attention.py     | 120 +++++++++
 .../core/transformer/transformer_config.py    |  13 +
 megatron/training/arguments.py                |  24 ++
 megatron/training/training.py                 |  20 +-
 .../unit_tests/transformer/test_attention.py  | 186 ++++++++++++++
 .../test_multi_latent_attention.py            | 228 ++++++++++++++++++
 9 files changed, 756 insertions(+), 2 deletions(-)
 create mode 100644 megatron/core/optimizer/qk_clip.py

diff --git a/megatron/core/extensions/transformer_engine.py b/megatron/core/extensions/transformer_engine.py
index 808ac14a2e4..85732c0f7ea 100644
--- a/megatron/core/extensions/transformer_engine.py
+++ b/megatron/core/extensions/transformer_engine.py
@@ -1021,6 +1021,14 @@ def __init__(
             self.kept_packed_seq_params.discard("cu_seqlens_q_padded")
             self.kept_packed_seq_params.discard("cu_seqlens_kv_padded")
 
+        if config.qk_clip or config.log_max_attention_logit:
+            # qk-clip is only supported in TE 2.9.0 and later
+            assert is_te_min_version("2.9.0"), "qk-clip is only supported in TE 2.9.0 and later"
+
+            # TE 2.9.0 introduces return_max_logit for qk-clip getting the max attention logits
+            extra_kwargs["return_max_logit"] = True
+            self.current_max_attn_logits = None
+
         super().__init__(
             num_attention_heads=self.config.num_attention_heads,
             kv_channels=kv_channels,
@@ -1090,6 +1098,22 @@ def forward(
                 **attention_bias_kwargs,
                 **packed_seq_kwargs,
             )
+
+            if self.config.qk_clip or self.config.log_max_attention_logit:
+                # qk-clip is only supported in TE 2.9.0 and later
+                assert is_te_min_version("2.9.0"), "qk-clip is only supported in TE 2.9.0 and later"
+
+                # Update Q K outside of TE Attention API
+                core_attn_out, batch_max_attention_logits = core_attn_out
+
+                # Update QK_Clip balancing eta
+                if self.current_max_attn_logits is None:
+                    self.current_max_attn_logits = batch_max_attention_logits
+                else:
+                    self.current_max_attn_logits = torch.max(
+                        self.current_max_attn_logits, batch_max_attention_logits
+                    )
+
         else:
             core_attn_out = super().forward(
                 query, key, value, attention_mask, **attention_bias_kwargs, **packed_seq_kwargs
diff --git a/megatron/core/optimizer/qk_clip.py b/megatron/core/optimizer/qk_clip.py
new file mode 100644
index 00000000000..f5b34a8216b
--- /dev/null
+++ b/megatron/core/optimizer/qk_clip.py
@@ -0,0 +1,39 @@
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+
+import torch
+
+from megatron.core import mpu
+
+
+def clip_qk(model, log_max_only=False) -> float:
+    """
+    Clip the QK attention logits to the threshold, recommended for Muon optimizer.
+
+    Args:
+        model: The model to clip the QK attention logits, a list of model chunks.
+        log_only: Whether to only log the max attention logit, without updating the weights.
+
+    Returns:
+        The maximum attention logit, a float.
+    """
+
+    with torch.no_grad():
+        log_max_attention_logit = 0
+        for model_chunk in model:
+            for transformer_layer in model_chunk.module.module.decoder.layers:
+                if hasattr(transformer_layer.self_attention, 'clip_qk'):
+                    torch.distributed.all_reduce(
+                        transformer_layer.self_attention.core_attention.current_max_attn_logits,
+                        op=torch.distributed.ReduceOp.MAX,
+                        group=mpu.get_data_parallel_group(with_context_parallel=True),
+                    )
+                    log_max_attention_logit = max(
+                        log_max_attention_logit,
+                        torch.max(
+                            transformer_layer.self_attention.core_attention.current_max_attn_logits
+                        ).item(),
+                    )
+                    if not log_max_only:
+                        transformer_layer.self_attention.clip_qk()
+
+    return log_max_attention_logit
diff --git a/megatron/core/transformer/attention.py b/megatron/core/transformer/attention.py
index be52fe10d20..606befa1066 100644
--- a/megatron/core/transformer/attention.py
+++ b/megatron/core/transformer/attention.py
@@ -1004,6 +1004,13 @@ def set_for_recompute_input_layernorm(self):
         """Set the attention layer for recompute input_layernorm. Only needed for fp8."""
         raise NotImplementedError("set_for_recompute_input_layernorm is not implemented.")
 
+    def clip_qk(self):
+        """
+        QK Clipping is a technique to clip the query and key attention logits to prevent the
+        attention logits from exploding.
+        """
+        raise NotImplementedError("clip_qk is not implemented.")
+
 
 class SelfAttention(Attention):
     """Self-attention layer class
@@ -1239,6 +1246,103 @@ def set_for_recompute_input_layernorm(self):
 
         set_save_original_input(self.linear_qkv)
 
+    def clip_qk(self):
+        """
+        QK Clipping is a technique to clip the query and key attention logits to prevent the
+        attention logits from exploding. This function is experimental on GQA.
+        """
+        if not self.config.qk_clip:
+            raise ValueError("qk_clip option needs to be enabled")
+
+        if self.core_attention.current_max_attn_logits is None:
+            raise ValueError("current_max_attn_logits is None")
+
+        assert self.core_attention.current_max_attn_logits.shape == (
+            self.num_attention_heads_per_partition,
+        ), f"current_max_attn_logits shape is not ({self.num_attention_heads_per_partition}, ) \
+                    but {self.core_attention.current_max_attn_logits.shape}"
+
+        grouped_max_attn_logits = torch.max(
+            self.core_attention.current_max_attn_logits.view(
+                self.num_query_groups_per_partition, -1
+            ),
+            dim=1,
+        ).values
+
+        # only update the weight if any head has
+        # current_max_attn_logits > qk_clip_threshold
+        if torch.any(grouped_max_attn_logits > self.config.qk_clip_threshold):
+            # Use num_query_groups_per_partition for tensor parallel scenarios
+
+            # qk_clip_balancing_eta (g, 1, 1)
+            assert grouped_max_attn_logits.shape == (
+                self.num_query_groups_per_partition,
+            ), f"current_max_attn_logits shape is not ({self.num_query_groups_per_partition},) \
+                but {grouped_max_attn_logits.shape}"
+            self.qk_clip_balancing_eta = torch.clamp(
+                self.config.qk_clip_threshold / grouped_max_attn_logits, max=1.0
+            ).view(self.num_query_groups_per_partition, 1, 1)
+            assert torch.all(self.qk_clip_balancing_eta <= 1.0)
+
+            # Handle different weight access patterns (main_param vs direct access)
+            if hasattr(self.linear_qkv.weight, 'main_param'):
+                self.linear_qkv.weight.main_param.data.copy_(
+                    self._clip_linear_qkv(self.linear_qkv.weight.main_param.data)
+                )
+
+            self.linear_qkv.weight.data.copy_(self._clip_linear_qkv(self.linear_qkv.weight.data))
+
+        # reset current_max_attn_logits
+        self.core_attention.current_max_attn_logits = None
+
+    def _clip_linear_qkv(self, weight):
+        """Apply qkclip to linear_qkv layer"""
+        # Reshape to (g, query_projection_size + 2 * kv_projection_size, -1)
+        weight_reshaped = weight.view(
+            self.num_query_groups_per_partition,
+            (self.query_projection_size + 2 * self.kv_projection_size)
+            // self.num_query_groups_per_partition,
+            -1,
+        )
+
+        # Split into query_projection_size and 2 * kv_projection_size parts:
+        # (n, a, -1) and (n, b, -1)
+        weight_q = weight_reshaped[
+            :, : self.query_projection_size // self.num_query_groups_per_partition, :
+        ]
+        weight_k = weight_reshaped[
+            :,
+            self.query_projection_size
+            // self.num_query_groups_per_partition : (
+                self.query_projection_size + self.kv_projection_size
+            )
+            // self.num_query_groups_per_partition,
+            :,
+        ]
+        weight_v = weight_reshaped[
+            :,
+            (self.query_projection_size + self.kv_projection_size)
+            // self.num_query_groups_per_partition :,
+            :,
+        ]
+
+        # extend the qk_clip_balancing_eta to the same shape as weight_q and weight_k
+        self.qk_clip_balancing_eta_extended = self.qk_clip_balancing_eta.repeat(
+            1, weight_q.size(1), 1
+        )
+
+        # Clipping
+        weight_q.mul_(torch.pow(self.qk_clip_balancing_eta_extended, self.config.qk_clip_alpha))
+        weight_k.mul_(torch.pow(self.qk_clip_balancing_eta, 1 - self.config.qk_clip_alpha))
+
+        # Concatenate back and reshape to original shape
+        weight_updated = torch.cat([weight_q, weight_k, weight_v], dim=1)
+        weight_updated = weight_updated.view(
+            self.query_projection_size + 2 * self.kv_projection_size, -1
+        )
+
+        return weight_updated
+
 
 class CrossAttention(Attention):
     """Cross-attention layer class
diff --git a/megatron/core/transformer/multi_latent_attention.py b/megatron/core/transformer/multi_latent_attention.py
index 5d3f16c1041..46e09daa873 100644
--- a/megatron/core/transformer/multi_latent_attention.py
+++ b/megatron/core/transformer/multi_latent_attention.py
@@ -937,3 +937,123 @@ def set_for_recompute_input_layernorm(self):
         if self.config.q_lora_rank is not None:
             set_save_original_input(self.linear_q_down_proj)
         set_save_original_input(self.linear_kv_down_proj)
+
+    def clip_qk(self):
+        """
+        QK Clipping is a technique to clip the query and key attention logits to prevent the
+        attention logits from exploding. Per MuonClip usage, we update the weight by calling this
+        function after Muon optimizer step.
+        """
+
+        if not self.config.qk_clip:
+            raise ValueError("qk_clip option needs to be enabled")
+
+        if self.core_attention.current_max_attn_logits is None:
+            raise ValueError("current_max_attn_logits is None")
+
+        # Check if we're in absorption mode
+        if self.cache_mla_latents and not hasattr(self, 'linear_kv_up_proj'):
+            raise ValueError(
+                "qk_clip is not supported when cache_mla_latents is enabled and absorption is "
+                "active. The linear_kv_up_proj layer has been deleted during absorption "
+                "preparation."
+            )
+
+        assert self.core_attention.current_max_attn_logits.shape == (
+            self.num_attention_heads_per_partition,
+        ), f"current_max_attn_logits shape is not ({self.num_attention_heads_per_partition}, ) \
+                    but {self.core_attention.current_max_attn_logits.shape}"
+
+        # only update the weight if any head has
+        # current_max_attn_logits > qk_clip_threshold
+        if torch.any(self.core_attention.current_max_attn_logits > self.config.qk_clip_threshold):
+            # Use num_attention_heads_per_partition for tensor parallel scenarios
+
+            # qk_clip_balancing_eta (n, 1, 1)
+            assert self.core_attention.current_max_attn_logits.shape == (
+                self.num_attention_heads_per_partition,
+            ), f"current_max_attn_logits shape is not ({self.num_attention_heads_per_partition},) \
+                but {self.core_attention.current_max_attn_logits.shape}"
+            self.qk_clip_balancing_eta = torch.clamp(
+                self.config.qk_clip_threshold / self.core_attention.current_max_attn_logits, max=1.0
+            ).view(self.num_attention_heads_per_partition, 1, 1)
+            assert torch.all(self.qk_clip_balancing_eta <= 1.0)
+
+            # Update q side weight, keep qk_pos_emb_head_dim side weight unchanged
+            if self.config.q_lora_rank is None:
+                q_proj_weight = self.linear_q_proj.weight
+            else:
+                q_proj_weight = self.linear_q_up_proj.weight
+
+            # Handle different weight access patterns (main_param vs direct access)
+            if hasattr(q_proj_weight, 'main_param'):
+                q_proj_weight.main_param.data.copy_(
+                    self._clip_q_proj_weight(q_proj_weight.main_param.data)
+                )
+            q_proj_weight.data.copy_(self._clip_q_proj_weight(q_proj_weight.data))
+
+            # Update k side weight, keep v side weight unchanged
+            kv_proj_weight = self.linear_kv_up_proj.weight
+
+            # Handle different weight access patterns
+            if hasattr(kv_proj_weight, 'main_param'):
+                kv_proj_weight.main_param.data.copy_(
+                    self._clip_kv_proj_weight(kv_proj_weight.main_param.data)
+                )
+            kv_proj_weight.data.copy_(self._clip_kv_proj_weight(kv_proj_weight.data))
+
+        # reset current_max_attn_logits
+        self.core_attention.current_max_attn_logits = None
+
+    def _clip_q_proj_weight(self, weight):
+        """Clip q_proj_weight"""
+        # Reshape to (n, a + b, -1)
+        weight_reshaped = weight.view(
+            self.num_attention_heads_per_partition,
+            self.config.qk_head_dim + self.config.qk_pos_emb_head_dim,
+            -1,
+        )
+
+        # Split into qk_head_dim and qk_pos_emb_head_dim parts: (n, a, -1) and (n, b, -1)
+        weight_q_nope = weight_reshaped[:, : self.config.qk_head_dim, :]
+        weight_q_pe = weight_reshaped[:, self.config.qk_head_dim :, :]
+
+        # Clipping
+        weight_q_nope.mul_(torch.pow(self.qk_clip_balancing_eta, self.config.qk_clip_alpha))
+        weight_q_pe.mul_(self.qk_clip_balancing_eta)
+
+        # Concatenate back and reshape to original shape
+        weight_q_updated = torch.cat([weight_q_nope, weight_q_pe], dim=1)
+        weight_q_updated = weight_q_updated.view(
+            self.num_attention_heads_per_partition
+            * (self.config.qk_head_dim + self.config.qk_pos_emb_head_dim),
+            -1,
+        )
+
+        return weight_q_updated
+
+    def _clip_kv_proj_weight(self, weight):
+        """Clip kv_proj_weight"""
+        # shape: (n, qk_head_dim + v_head_dim, kv_lora_rank)
+        weight_reshaped = weight.view(
+            self.num_attention_heads_per_partition,
+            self.config.qk_head_dim + self.config.v_head_dim,
+            -1,
+        )
+
+        # Split into qk_head_dim and v_head_dim parts: (n, a, -1) and (n, b, -1)
+        weight_k = weight_reshaped[:, : self.config.qk_head_dim, :]
+        weight_v = weight_reshaped[:, self.config.qk_head_dim :, :]
+
+        # Clipping
+        weight_k.mul_(torch.pow(self.qk_clip_balancing_eta, 1 - self.config.qk_clip_alpha))
+
+        # Concatenate back and reshape to original shape
+        weight_kv_updated = torch.cat([weight_k, weight_v], dim=1)
+        weight_kv_updated = weight_kv_updated.view(
+            self.num_attention_heads_per_partition
+            * (self.config.qk_head_dim + self.config.v_head_dim),
+            -1,
+        )
+
+        return weight_kv_updated
diff --git a/megatron/core/transformer/transformer_config.py b/megatron/core/transformer/transformer_config.py
index 895aef978e2..306ac7314ba 100644
--- a/megatron/core/transformer/transformer_config.py
+++ b/megatron/core/transformer/transformer_config.py
@@ -192,6 +192,19 @@ class TransformerConfig(ModelParallelConfig):
     qk_layernorm: bool = False
     """Whether to apply `normalization` type of normalization to the query and key embeddings."""
 
+    qk_clip: bool = False
+    """Whether to clip the query and key weights. Needed for Muon MLA Model training."""
+
+    qk_clip_alpha: float = 0.5
+    """The balancing alpha for qk-clip. Q = Q * (eta ** alpha)"""
+
+    qk_clip_threshold: float = 100
+    """The balancing threshold for qk-clip. eta = min(threshold / max_attention_logits, 1.0)"""
+
+    log_max_attention_logit: bool = False
+    """Whether to log the max attention logit across whole model. Decoupled from qk_clip,
+    defualts to False. Setting qk_clip will automatically log the max logit"""
+
     attention_output_gate: bool = False
     """Whether to apply output gate to the attention layers."""
 
diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py
index 494c82f7873..dda90835b8e 100644
--- a/megatron/training/arguments.py
+++ b/megatron/training/arguments.py
@@ -1003,6 +1003,19 @@ def validate_args(args, defaults={}):
     if args.add_bias_linear:
         args.add_qkv_bias = True
 
+    if args.qk_clip:
+        assert is_te_min_version("2.9.0"), \
+            '--qk-clip is only supported with TE >= 2.9.0.'
+        assert 0.0 < args.qk_clip_alpha < 1.0, \
+            '--qk-clip-alpha must be between 0.0 and 1.0 when using --qk-clip.'
+        assert args.qk_clip_threshold > 0, \
+            '--qk-clip-threshold must be greater than 0 when using --qk-clip.'
+
+    # decoupled log max attention logit check
+    if args.log_max_attention_logit:
+        assert is_te_min_version("2.9.0"), \
+            '--log-max-attention-logit is only supported with TE >= 2.9.0.'
+
     # Retro checks.
     if args.retro_add_retriever:
 
@@ -1245,6 +1258,9 @@ def validate_args(args, defaults={}):
         assert (
             args.recompute_granularity != 'full'
         ), 'recompute_granularity must not be full when CUDA Graphs are enabled.'
+    
+    if args.multi_latent_attention:
+        assert not args.group_query_attention, "Group query attention is mutually exclusive with multi latent attention."
 
     # Print arguments.
     _print_args("arguments", args)
@@ -1919,6 +1935,8 @@ def _add_logging_args(parser):
     group.add_argument('--log-world-size-to-tensorboard',
                        action='store_true',
                        help='Enable world size logging to tensorboard.')
+    group.add_argument('--log-max-attention-logit', action='store_true',
+                       help='Enable max attention logit logging to tensorboard.')
     group.add_argument('--wandb-project', type=str, default='',
                        help='The wandb project name. Ignore wandb by default.')
     group.add_argument('--wandb-entity', type=str, default='',
@@ -2288,6 +2306,12 @@ def _add_training_args(parser):
     group.add_argument('--add-qkv-bias', action='store_true',
                        help='Enable bias only in the QKV linear layers',
                        dest='add_qkv_bias')
+    group.add_argument('--qk-clip', action='store_true',
+                       help='Whether to use qk-clip for training stabilization, strongly recommended for Muon.')
+    group.add_argument('--qk-clip-alpha', type=float, default=0.5,
+                       help='The balancing alpha for qk-clip.')
+    group.add_argument('--qk-clip-threshold', type=float, default=100,
+                       help='The balancing threshold for qk-clip.')
     group.add_argument('--optimizer', type=str, default='adam',
                        choices=['adam', 'sgd', 'muon', 'dist_muon'],
                        help='Optimizer function')
diff --git a/megatron/training/training.py b/megatron/training/training.py
index 967397bec10..32599aa0889 100644
--- a/megatron/training/training.py
+++ b/megatron/training/training.py
@@ -64,6 +64,7 @@
 from megatron.core.distributed import DistributedDataParallel as DDP
 from megatron.core.distributed.fsdp.mcore_fsdp_adapter import FullyShardedDataParallel as megatron_FSDP
 from megatron.core.optimizer.optimizer import param_group_identifier_keys
+from megatron.core.optimizer.qk_clip import clip_qk
 
 try:
     from megatron.core.distributed import TorchFullyShardedDataParallel as torch_FSDP
@@ -1391,7 +1392,7 @@ def train_step(forward_step_func, data_iterator, model, optimizer, opt_param_sch
         )
     should_checkpoint, should_exit, exit_code = rerun_state_machine.should_checkpoint_and_exit()
     if should_exit:
-        return {}, True, should_checkpoint, should_exit, exit_code, None, None
+        return {}, True, should_checkpoint, should_exit, exit_code, None, None, 0
 
     # Empty unused memory.
     if args.empty_unused_memory_level >= 1:
@@ -1406,6 +1407,13 @@ def train_step(forward_step_func, data_iterator, model, optimizer, opt_param_sch
 
     timers('optimizer', log_level=1).start(barrier=args.barrier_with_L1_time)
     update_successful, grad_norm, num_zeros_in_grad = optimizer.step()
+
+    # get max attention logit for logging and run clip_qk()
+    # Part of MuonClip Optimizer step
+    log_max_attention_logit = 0
+    if args.qk_clip or args.log_max_attention_logit:
+        log_max_attention_logit = clip_qk(model, log_max_only=not args.qk_clip)
+            
     timers('optimizer').stop()
 
     # when freezing sub-models we may have a mixture of successful and unsucessful ranks,
@@ -1477,8 +1485,9 @@ def train_step(forward_step_func, data_iterator, model, optimizer, opt_param_sch
             exit_code,
             grad_norm,
             num_zeros_in_grad,
+            log_max_attention_logit,
         )
-    return {}, skipped_iter, should_checkpoint, should_exit, exit_code, grad_norm, num_zeros_in_grad
+    return {}, skipped_iter, should_checkpoint, should_exit, exit_code, grad_norm, num_zeros_in_grad, log_max_attention_logit
 
 
 def training_log(
@@ -1493,6 +1502,7 @@ def training_log(
     grad_norm,
     params_norm,
     num_zeros_in_grad,
+    max_attention_logit,
 ):
     """Log training information such as losses, timing, ...."""
     args = get_args()
@@ -1655,6 +1665,10 @@ def training_log(
                 "mem-max-allocated-bytes", mem_stats["allocated_bytes.all.peak"], iteration
             )
             writer.add_scalar("mem-allocated-count", mem_stats["allocation.all.current"], iteration)
+        if args.log_max_attention_logit:
+            writer.add_scalar('max_attention_logit', max_attention_logit, iteration)
+            if wandb_writer:
+                wandb_writer.log({'max_attention_logit': max_attention_logit}, iteration)
     if args.num_experts is not None:
         moe_loss_scale = 1 / get_num_microbatches()
         track_names = []
@@ -2422,6 +2436,7 @@ def get_e2e_base_metrics():
             exit_code,
             grad_norm,
             num_zeros_in_grad,
+            max_attention_logit,
         ) = train_step(
             forward_step_func, train_data_iterator, model, optimizer, opt_param_scheduler, config, forward_backward_func
         )
@@ -2526,6 +2541,7 @@ def get_e2e_base_metrics():
             grad_norm,
             params_norm,
             num_zeros_in_grad,
+            max_attention_logit,
         )
 
         # Evaluation.
diff --git a/tests/unit_tests/transformer/test_attention.py b/tests/unit_tests/transformer/test_attention.py
index 23858937c72..d7771d0920d 100644
--- a/tests/unit_tests/transformer/test_attention.py
+++ b/tests/unit_tests/transformer/test_attention.py
@@ -165,6 +165,192 @@ def test_checkpointed_gpu_forward(self):
         assert bias.shape[0] == config.hidden_size
 
 
+@pytest.mark.skipif(not is_te_min_version("2.9.0"), reason="QK clipping requires TE >= 2.9.0")
+class TestClipQK:
+
+    def setup_method(self, method):
+        Utils.initialize_model_parallel(1, 1)
+        model_parallel_cuda_manual_seed(123)
+
+    def teardown_method(self, method):
+        Utils.destroy_model_parallel()
+
+    def test_clip_qk_disabled_raises_error(self):
+        """Test that clip_qk raises ValueError when qk_clip is not enabled."""
+        transformer_config = TransformerConfig(
+            num_layers=2,
+            hidden_size=128,
+            num_attention_heads=4,
+            use_cpu_initialization=True,
+            qk_clip=False,
+        )
+        attention = SelfAttention(
+            transformer_config,
+            get_gpt_layer_with_transformer_engine_spec().submodules.self_attention.submodules,
+            layer_number=1,
+        )
+
+        with pytest.raises(ValueError, match="qk_clip option needs to be enabled"):
+            attention.clip_qk()
+
+    def test_clip_qk_none_logits_raises_error(self):
+        """Test that clip_qk raises ValueError when current_max_attn_logits is None."""
+        transformer_config = TransformerConfig(
+            num_layers=2,
+            hidden_size=128,
+            num_attention_heads=4,
+            use_cpu_initialization=True,
+            qk_clip=True,
+            qk_clip_threshold=100.0,
+            qk_clip_alpha=0.5,
+        )
+        attention = SelfAttention(
+            transformer_config,
+            get_gpt_layer_with_transformer_engine_spec().submodules.self_attention.submodules,
+            layer_number=1,
+        )
+
+        with pytest.raises(ValueError, match="current_max_attn_logits is None"):
+            attention.clip_qk()
+
+    def test_clip_qk_below_threshold_no_update(self):
+        """Test that weights are not updated when max logits are below threshold."""
+        transformer_config = TransformerConfig(
+            num_layers=2,
+            hidden_size=128,
+            num_attention_heads=4,
+            use_cpu_initialization=True,
+            qk_clip=True,
+            qk_clip_threshold=100.0,
+            qk_clip_alpha=0.5,
+        )
+        attention = SelfAttention(
+            transformer_config,
+            get_gpt_layer_with_transformer_engine_spec().submodules.self_attention.submodules,
+            layer_number=1,
+        )
+        attention.cuda()
+
+        # Save original weights
+        original_weight = attention.linear_qkv.weight.data.clone()
+
+        # Set current_max_attn_logits below threshold
+        attention.core_attention.current_max_attn_logits = torch.tensor(
+            [50.0, 60.0, 70.0, 80.0], device='cuda'
+        )
+
+        # Call clip_qk
+        attention.clip_qk()
+
+        # Weights should not be updated
+        assert torch.equal(attention.linear_qkv.weight.data, original_weight)
+        # current_max_attn_logits should be reset
+        assert attention.core_attention.current_max_attn_logits is None
+
+    def test_clip_qk_above_threshold_updates_weights(self):
+        """Test that weights are updated when max logits exceed threshold."""
+        transformer_config = TransformerConfig(
+            num_layers=2,
+            hidden_size=128,
+            num_attention_heads=4,
+            use_cpu_initialization=True,
+            qk_clip=True,
+            qk_clip_threshold=100.0,
+            qk_clip_alpha=0.5,
+        )
+        attention = SelfAttention(
+            transformer_config,
+            get_gpt_layer_with_transformer_engine_spec().submodules.self_attention.submodules,
+            layer_number=1,
+        )
+        attention.cuda()
+
+        # Save original weights
+        original_weight = attention.linear_qkv.weight.data.clone()
+
+        # Set current_max_attn_logits above threshold
+        attention.core_attention.current_max_attn_logits = torch.tensor(
+            [150.0, 160.0, 170.0, 180.0], device='cuda'
+        )
+
+        # Call clip_qk
+        attention.clip_qk()
+
+        # Weights should be updated
+        assert not torch.equal(attention.linear_qkv.weight.data, original_weight)
+        # current_max_attn_logits should be reset
+        assert attention.core_attention.current_max_attn_logits is None
+
+    def test_clip_qk_gqa_configuration(self):
+        """Test clip_qk with GQA (Grouped Query Attention) configuration."""
+        transformer_config = TransformerConfig(
+            num_layers=2,
+            hidden_size=128,
+            num_attention_heads=8,
+            num_query_groups=4,  # GQA with 2 heads per group
+            use_cpu_initialization=True,
+            qk_clip=True,
+            qk_clip_threshold=100.0,
+            qk_clip_alpha=0.5,
+        )
+        attention = SelfAttention(
+            transformer_config,
+            get_gpt_layer_with_transformer_engine_spec().submodules.self_attention.submodules,
+            layer_number=1,
+        )
+        attention.cuda()
+
+        # Save original weights
+        original_weight = attention.linear_qkv.weight.data.clone()
+
+        # Set current_max_attn_logits for all heads (8 heads)
+        attention.core_attention.current_max_attn_logits = torch.tensor(
+            [150.0, 160.0, 170.0, 180.0, 190.0, 200.0, 210.0, 220.0], device='cuda'
+        )
+
+        # Call clip_qk
+        attention.clip_qk()
+
+        # Weights should be updated
+        assert not torch.equal(attention.linear_qkv.weight.data, original_weight)
+        # current_max_attn_logits should be reset
+        assert attention.core_attention.current_max_attn_logits is None
+
+    def test_clip_qk_mixed_logits(self):
+        """Test clip_qk with mixed logits (some above, some below threshold)."""
+        transformer_config = TransformerConfig(
+            num_layers=2,
+            hidden_size=128,
+            num_attention_heads=4,
+            use_cpu_initialization=True,
+            qk_clip=True,
+            qk_clip_threshold=100.0,
+            qk_clip_alpha=0.5,
+        )
+        attention = SelfAttention(
+            transformer_config,
+            get_gpt_layer_with_transformer_engine_spec().submodules.self_attention.submodules,
+            layer_number=1,
+        )
+        attention.cuda()
+
+        # Save original weights
+        original_weight = attention.linear_qkv.weight.data.clone()
+
+        # Set mixed current_max_attn_logits (some above, some below threshold)
+        attention.core_attention.current_max_attn_logits = torch.tensor(
+            [80.0, 150.0, 90.0, 200.0], device='cuda'
+        )
+
+        # Call clip_qk
+        attention.clip_qk()
+
+        # Weights should be updated since at least one head exceeds threshold
+        assert not torch.equal(attention.linear_qkv.weight.data, original_weight)
+        # current_max_attn_logits should be reset
+        assert attention.core_attention.current_max_attn_logits is None
+
+
 @pytest.mark.parametrize("output_gate", [False, True])
 class TestSelfAttention:
 
diff --git a/tests/unit_tests/transformer/test_multi_latent_attention.py b/tests/unit_tests/transformer/test_multi_latent_attention.py
index 8ade4b6bcb8..ad156dfda13 100644
--- a/tests/unit_tests/transformer/test_multi_latent_attention.py
+++ b/tests/unit_tests/transformer/test_multi_latent_attention.py
@@ -1034,6 +1034,234 @@ def test_gpu_forward_thd_precision(self):
             os.environ.update(_environ)
 
 
+@pytest.mark.skipif(not is_te_min_version("2.9.0"), reason="QK clipping requires TE >= 2.9.0")
+@pytest.mark.parametrize("rope_type", ('yarn', 'rope'))
+class TestMLAClipQK:
+
+    @pytest.fixture(scope='function', autouse=True)
+    def setup_and_teardown(self, rope_type):
+        Utils.initialize_model_parallel(1, 1)
+        model_parallel_cuda_manual_seed(123)
+        self.transformer_config = MLATransformerConfig(
+            num_layers=2,
+            hidden_size=12,
+            num_attention_heads=4,
+            use_cpu_initialization=True,
+            q_lora_rank=32,
+            kv_lora_rank=32,
+            qk_head_dim=128,
+            v_head_dim=128,
+            qk_pos_emb_head_dim=64,
+            rope_type=rope_type,
+            rotary_base=10000,
+            original_max_position_embeddings=32,
+            qk_clip=True,
+            qk_clip_threshold=100.0,
+            qk_clip_alpha=0.5,
+        )
+
+    def teardown_method(self, method):
+        Utils.destroy_model_parallel()
+
+    def test_clip_qk_disabled_raises_error(self):
+        """Test that clip_qk raises ValueError when qk_clip is not enabled."""
+        if is_te_min_version("1.10.0"):
+            # Create config without qk_clip
+            config = MLATransformerConfig(
+                num_layers=2,
+                hidden_size=12,
+                num_attention_heads=4,
+                use_cpu_initialization=True,
+                q_lora_rank=32,
+                kv_lora_rank=32,
+                qk_head_dim=128,
+                v_head_dim=128,
+                qk_pos_emb_head_dim=64,
+                rotary_base=10000,
+                original_max_position_embeddings=32,
+                qk_clip=False,
+            )
+            attention = MLASelfAttention(
+                config,
+                get_mla_self_attn_submodules(),
+                layer_number=1,
+                attn_mask_type=AttnMaskType.causal,
+            )
+
+            with pytest.raises(ValueError, match="qk_clip option needs to be enabled"):
+                attention.clip_qk()
+
+    def test_clip_qk_none_logits_raises_error(self):
+        """Test that clip_qk raises ValueError when current_max_attn_logits is None."""
+        if is_te_min_version("1.10.0"):
+            attention = MLASelfAttention(
+                self.transformer_config,
+                get_mla_self_attn_submodules(),
+                layer_number=1,
+                attn_mask_type=AttnMaskType.causal,
+            )
+
+            with pytest.raises(ValueError, match="current_max_attn_logits is None"):
+                attention.clip_qk()
+
+    def test_clip_qk_below_threshold_no_update(self):
+        """Test that weights are not updated when max logits are below threshold."""
+        if not is_te_min_version("1.10.0"):
+            pytest.skip("MLA requires TransformerEngine >= 1.10.0")
+
+        attention = MLASelfAttention(
+            self.transformer_config,
+            get_mla_self_attn_submodules(),
+            layer_number=1,
+            attn_mask_type=AttnMaskType.causal,
+        )
+        attention.cuda()
+
+        # Save original weights
+        if self.transformer_config.q_lora_rank is None:
+            original_q_weight = attention.linear_q_proj.weight.data.clone()
+        else:
+            original_q_weight = attention.linear_q_up_proj.weight.data.clone()
+        original_kv_weight = attention.linear_kv_up_proj.weight.data.clone()
+
+        # Set current_max_attn_logits below threshold
+        attention.core_attention.current_max_attn_logits = torch.tensor(
+            [50.0, 60.0, 70.0, 80.0], device='cuda'
+        )
+
+        # Call clip_qk
+        attention.clip_qk()
+
+        # Weights should not be updated
+        if self.transformer_config.q_lora_rank is None:
+            assert torch.equal(attention.linear_q_proj.weight.data, original_q_weight)
+        else:
+            assert torch.equal(attention.linear_q_up_proj.weight.data, original_q_weight)
+        assert torch.equal(attention.linear_kv_up_proj.weight.data, original_kv_weight)
+        # current_max_attn_logits should be reset
+        assert attention.core_attention.current_max_attn_logits is None
+
+    def test_clip_qk_above_threshold_updates_weights(self):
+        """Test that weights are updated when max logits exceed threshold."""
+        if not is_te_min_version("1.10.0"):
+            pytest.skip("MLA requires TransformerEngine >= 1.10.0")
+
+        attention = MLASelfAttention(
+            self.transformer_config,
+            get_mla_self_attn_submodules(),
+            layer_number=1,
+            attn_mask_type=AttnMaskType.causal,
+        )
+        attention.cuda()
+
+        # Save original weights
+        if self.transformer_config.q_lora_rank is None:
+            original_q_weight = attention.linear_q_proj.weight.data.clone()
+        else:
+            original_q_weight = attention.linear_q_up_proj.weight.data.clone()
+        original_kv_weight = attention.linear_kv_up_proj.weight.data.clone()
+
+        # Set current_max_attn_logits above threshold
+        attention.core_attention.current_max_attn_logits = torch.tensor(
+            [150.0, 160.0, 170.0, 180.0], device='cuda'
+        )
+
+        # Call clip_qk
+        attention.clip_qk()
+
+        # Weights should be updated
+        if self.transformer_config.q_lora_rank is None:
+            assert not torch.equal(attention.linear_q_proj.weight.data, original_q_weight)
+        else:
+            assert not torch.equal(attention.linear_q_up_proj.weight.data, original_q_weight)
+        assert not torch.equal(attention.linear_kv_up_proj.weight.data, original_kv_weight)
+        # current_max_attn_logits should be reset
+        assert attention.core_attention.current_max_attn_logits is None
+
+    def test_clip_qk_mixed_logits(self):
+        """Test clip_qk with mixed logits (some above, some below threshold)."""
+        if not is_te_min_version("1.10.0"):
+            pytest.skip("MLA requires TransformerEngine >= 1.10.0")
+
+        attention = MLASelfAttention(
+            self.transformer_config,
+            get_mla_self_attn_submodules(),
+            layer_number=1,
+            attn_mask_type=AttnMaskType.causal,
+        )
+        attention.cuda()
+
+        # Save original weights
+        if self.transformer_config.q_lora_rank is None:
+            original_q_weight = attention.linear_q_proj.weight.data.clone()
+        else:
+            original_q_weight = attention.linear_q_up_proj.weight.data.clone()
+        original_kv_weight = attention.linear_kv_up_proj.weight.data.clone()
+
+        # Set mixed current_max_attn_logits (some above, some below threshold)
+        attention.core_attention.current_max_attn_logits = torch.tensor(
+            [80.0, 150.0, 90.0, 200.0], device='cuda'
+        )
+
+        # Call clip_qk
+        attention.clip_qk()
+
+        # Weights should be updated since at least one head exceeds threshold
+        if self.transformer_config.q_lora_rank is None:
+            assert not torch.equal(attention.linear_q_proj.weight.data, original_q_weight)
+        else:
+            assert not torch.equal(attention.linear_q_up_proj.weight.data, original_q_weight)
+        assert not torch.equal(attention.linear_kv_up_proj.weight.data, original_kv_weight)
+        # current_max_attn_logits should be reset
+        assert attention.core_attention.current_max_attn_logits is None
+
+    def test_clip_qk_with_absorption_raises_error(self):
+        """Test that clip_qk raises ValueError when in absorption mode."""
+        if not is_te_min_version("1.10.0"):
+            pytest.skip("MLA requires TransformerEngine >= 1.10.0")
+
+        # Create config with cache_mla_latents enabled
+        config = MLATransformerConfig(
+            num_layers=2,
+            hidden_size=12,
+            num_attention_heads=4,
+            use_cpu_initialization=True,
+            q_lora_rank=32,
+            kv_lora_rank=32,
+            qk_head_dim=128,
+            v_head_dim=128,
+            qk_pos_emb_head_dim=64,
+            rotary_base=10000,
+            original_max_position_embeddings=32,
+            qk_clip=True,
+            qk_clip_threshold=100.0,
+            qk_clip_alpha=0.5,
+        )
+        attention = MLASelfAttention(
+            config,
+            get_mla_self_attn_submodules(),
+            layer_number=1,
+            attn_mask_type=AttnMaskType.causal,
+        )
+        attention.cuda()
+
+        # Simulate absorption mode by setting cache_mla_latents and deleting linear_kv_up_proj
+        attention.cache_mla_latents = True
+        if hasattr(attention, 'linear_kv_up_proj'):
+            delattr(attention, 'linear_kv_up_proj')
+
+        # Set current_max_attn_logits
+        attention.core_attention.current_max_attn_logits = torch.tensor(
+            [150.0, 160.0, 170.0, 180.0], device='cuda'
+        )
+
+        with pytest.raises(
+            ValueError,
+            match="qk_clip is not supported when cache_mla_latents is enabled and absorption is active",
+        ):
+            attention.clip_qk()
+
+
 @pytest.mark.experimental
 @pytest.mark.parametrize(
     ("rope_type", "apply_rope_fusion"),

From 7968d5f98f8457297d4a73f96d8a086d84a8fa67 Mon Sep 17 00:00:00 2001
From: Kunlun Li <94586211+kunlunl@users.noreply.github.com>
Date: Tue, 18 Nov 2025 14:00:27 +0800
Subject: [PATCH 134/334] [dev] Add assertion for mxfp8 params without dp
 overlap (#2270)

Signed-off-by: kunlunl <kunlunl@nvidia.com>
---
 .../core/distributed/distributed_data_parallel_config.py  | 8 ++++++++
 .../src/megatron_fsdp/distributed_data_parallel_config.py | 8 ++++++++
 2 files changed, 16 insertions(+)

diff --git a/megatron/core/distributed/distributed_data_parallel_config.py b/megatron/core/distributed/distributed_data_parallel_config.py
index 3f97beab825..e2a026d836f 100644
--- a/megatron/core/distributed/distributed_data_parallel_config.py
+++ b/megatron/core/distributed/distributed_data_parallel_config.py
@@ -146,6 +146,14 @@ def __post_init__(self):
         """Check the validity of the config."""
         if self.reuse_grad_buf_for_mxfp8_param_ag:
             assert self.fp8_param_gather, "Reuse grad buffer only when keeping params in MXFP8."
+            # Using mxfp8 param without overlap param gather and overlap grad reduce will cause NaN.
+            # TODO: Remove this assertion when the issue is fixed.
+            assert (
+                self.overlap_param_gather
+            ), "--overlap-param-gather is required when using mxfp8 params"
+            assert (
+                self.overlap_grad_reduce
+            ), "--overlap-grad-reduce is required when using mxfp8 params"
 
         if self.nccl_ub:
             if 'expandable_segments:True' in os.getenv('PYTORCH_CUDA_ALLOC_CONF', '').split(','):
diff --git a/megatron/core/distributed/fsdp/src/megatron_fsdp/distributed_data_parallel_config.py b/megatron/core/distributed/fsdp/src/megatron_fsdp/distributed_data_parallel_config.py
index 86826758498..5151ecabfb5 100644
--- a/megatron/core/distributed/fsdp/src/megatron_fsdp/distributed_data_parallel_config.py
+++ b/megatron/core/distributed/fsdp/src/megatron_fsdp/distributed_data_parallel_config.py
@@ -137,6 +137,14 @@ def __post_init__(self):
         """Check the validity of the config."""
         if self.reuse_grad_buf_for_mxfp8_param_ag:
             assert self.fp8_param_gather, "Reuse grad buffer only when keeping params in MXFP8."
+            # Using mxfp8 param without overlap param gather and overlap grad reduce will cause NaN.
+            # TODO: Remove this assertion when the issue is fixed.
+            assert (
+                self.overlap_param_gather
+            ), "--overlap-param-gather is required when using mxfp8 params"
+            assert (
+                self.overlap_grad_reduce
+            ), "--overlap-grad-reduce is required when using mxfp8 params"
 
         if self.nccl_ub:
             if 'expandable_segments:True' in os.getenv('PYTORCH_CUDA_ALLOC_CONF', '').split(','):

From d09482c1a0a23ce8d0e226b80f53e0c119372904 Mon Sep 17 00:00:00 2001
From: Li Tao <lit@nvidia.com>
Date: Tue, 18 Nov 2025 14:27:55 +0800
Subject: [PATCH 135/334] [DEV] Save memory using main_param for moe in
 param_l2_norm (#2234)

Signed-off-by: lit <lit@nvidia.com>
---
 megatron/training/utils.py | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/megatron/training/utils.py b/megatron/training/utils.py
index 3be7b6c8914..f0864115b13 100644
--- a/megatron/training/utils.py
+++ b/megatron/training/utils.py
@@ -81,10 +81,21 @@ def calc_params_l2_norm(model, force_create_fp32_copy=False):
                 continue
             assert is_not_tp_duplicate
             if not getattr(param, 'allreduce', True):
-                # TODO: Implement memory optimization for MoE parameters.
                 assert param_is_not_shared(param)
                 param = to_local_if_dtensor(param)
-                moe_params_data.append(param.data.float() if args.bf16 else param.data)
+                if args.bf16:
+                    if not force_create_fp32_copy and hasattr(param, 'main_param'):
+                        if getattr(param, 'main_param_sharded', False):
+                            if param.main_param is not None:
+                                sharded_params_data.append(param.main_param)
+                        else:
+                            moe_params_data.append(param.main_param)
+                    else:
+                        # Fallback to original logic of making a fp32 copy of the
+                        # parameter if `.main_param` attribute is not available.
+                        moe_params_data.append(param.data.float())
+                else:
+                    moe_params_data.append(param.data)
             else:
                 if param_is_not_shared(param):
                     param = to_local_if_dtensor(param)

From 7da6e5b07fdc836f7c87e0fba266d6157a1bbdc4 Mon Sep 17 00:00:00 2001
From: Zhongbo Zhu <42691305+zhongbozhu@users.noreply.github.com>
Date: Mon, 17 Nov 2025 23:25:20 -0800
Subject: [PATCH 136/334] [DEV][NVFP4] Fix NVFP4 Selective Activation Recompute
  (#2036)

Signed-off-by: Zhongbo Zhu <zhongboz@nvidia.com>
Signed-off-by: Xiaowei Ren <xren@nvidia.com>
Co-authored-by: Xiaowei Ren <xren@nvidia.com>
Co-authored-by: Xin Yao <yaox12@outlook.com>
Co-authored-by: Xin Yao <xiny@nvidia.com>
---
 megatron/core/fp4_utils.py                    | 10 ++++++-
 megatron/core/transformer/attention.py        | 17 +++++++-----
 megatron/core/transformer/moe/experts.py      | 17 ++++--------
 megatron/core/transformer/moe/moe_layer.py    |  6 ++---
 megatron/core/transformer/moe/moe_utils.py    | 12 +++++++++
 .../core/transformer/moe/shared_experts.py    |  6 +++--
 .../core/transformer/moe/token_dispatcher.py  | 27 ++++---------------
 .../transformer/multi_latent_attention.py     | 18 ++++++++-----
 .../transformer/multi_token_prediction.py     |  2 ++
 .../core/transformer/transformer_config.py    |  2 +-
 .../core/transformer/transformer_layer.py     |  6 ++---
 11 files changed, 67 insertions(+), 56 deletions(-)

diff --git a/megatron/core/fp4_utils.py b/megatron/core/fp4_utils.py
index 4f997f6be15..4f9e7e5d026 100644
--- a/megatron/core/fp4_utils.py
+++ b/megatron/core/fp4_utils.py
@@ -57,9 +57,17 @@ def get_fp4_align_size(fp4_recipe: Fp4Recipe) -> int:
     a 16-byte aligned address for efficient memory access. Since FP4 uses 4 bits per value,
     16 bytes (128 bits) corresponds to 32 FP4 values. Therefore, the alignment size for FP4
     is 32. With this alignment, NVFP4 GEMM can be performed efficiently.
+
+    Note that since we are also random hadamard transform for NVFP4 training, we want
+    fused group nvfp4 quantize plus hadamard transform. Hadamard transform will leverage
+    tensor core instructions for better performance, while group quantize kernels also
+    prefer a more aligned size in token dimension M. Therefore, we apply align size 64
+    here for better performance in MOE.
+
+    Paper link: https://arxiv.org/pdf/2509.25149
     """
     # pylint: disable=unused-argument
-    return 32
+    return 64
 
 
 def dequantize_fp4_tensor(fp4_tensor: torch.Tensor) -> torch.Tensor:
diff --git a/megatron/core/transformer/attention.py b/megatron/core/transformer/attention.py
index 606befa1066..74031f38219 100644
--- a/megatron/core/transformer/attention.py
+++ b/megatron/core/transformer/attention.py
@@ -65,7 +65,7 @@
 
 if not HAVE_FA3:
     try:
-        from flashattn_hopper.flash_attn_interface import _flash_attn_forward
+        from flash_attn_3.flash_attn_interface import _flash_attn_forward
         from flashattn_hopper.flash_attn_interface import (
             flash_attn_with_kvcache as flash_attn3_with_kvcache,
         )
@@ -238,12 +238,17 @@ def __init__(
 
         if (
             HAVE_TE
-            and self.config.fp8
-            and self.config.fp8_recipe != 'delayed'
-            and is_te_min_version("2.6.0dev0")
             and isinstance(self.linear_proj, TELinear)
+            and (
+                (
+                    self.config.fp8
+                    and self.config.fp8_recipe != 'delayed'
+                    and is_te_min_version("2.6.0dev0")
+                )
+                or (self.config.fp4 and is_te_min_version("2.7.0.dev0"))
+            )
         ):
-            # For fp8 training, the output of the fused core_attn is saved by itself, and
+            # For fp8/fp4 training, the output of the fused core_attn is saved by itself, and
             # linear_proj also saves the quantized tensor of this output. Here we set the
             # linear_proj to save the original input tensors to avoid the extra memory usage of
             # the quantized tensor.
@@ -1241,7 +1246,7 @@ def _backward_output_proj(self):
         self.linear_proj.backward_dw()
 
     def set_for_recompute_input_layernorm(self):
-        """Set the attention layer for recompute input_layernorm. Only needed for fp8."""
+        """Set the attention layer for recompute input_layernorm. Only needed for fp8/fp4."""
         from megatron.core.extensions.transformer_engine import set_save_original_input
 
         set_save_original_input(self.linear_qkv)
diff --git a/megatron/core/transformer/moe/experts.py b/megatron/core/transformer/moe/experts.py
index 8bb5caddc4b..7391bcaf123 100644
--- a/megatron/core/transformer/moe/experts.py
+++ b/megatron/core/transformer/moe/experts.py
@@ -21,8 +21,6 @@
     ShardedTensorFactory,
 )
 from megatron.core.dist_checkpointing.utils import replace_prefix_for_sharding
-from megatron.core.fp4_utils import get_fp4_align_size
-from megatron.core.fp8_utils import get_fp8_align_size
 from megatron.core.fusions.fused_bias_geglu import quick_gelu, weighted_bias_quick_geglu_impl
 from megatron.core.fusions.fused_bias_swiglu import weighted_bias_swiglu_impl
 from megatron.core.fusions.fused_weighted_squared_relu import weighted_squared_relu_impl
@@ -41,7 +39,10 @@
 from megatron.core.transformer.mlp import MLP, MLPSubmodules, apply_swiglu_sharded_factory
 from megatron.core.transformer.module import MegatronModule
 from megatron.core.transformer.moe import grouped_gemm_util as gg
-from megatron.core.transformer.moe.moe_utils import ProcessGroupCollection
+from megatron.core.transformer.moe.moe_utils import (
+    ProcessGroupCollection,
+    get_align_size_for_quantization,
+)
 from megatron.core.transformer.spec_utils import build_module
 from megatron.core.transformer.transformer_config import TransformerConfig
 from megatron.core.transformer.utils import (
@@ -1074,18 +1075,10 @@ def __init__(
             )
             self.local_experts.append(expert)
 
-    def _get_align_size_for_quantization(self):
-        """Get the alignment size for quantization."""
-        if self.config.fp8:
-            return get_fp8_align_size(self.config.fp8_recipe)
-        elif self.config.fp4:
-            return get_fp4_align_size(self.config.fp4_recipe)
-        return 16
-
     def _pad_tensor_for_quantization(self, hidden, probs):
         """Padding tensor shape to multiples of 16/32."""
         actual_num_tokens = hidden.shape[0]
-        divisor = self._get_align_size_for_quantization()
+        divisor = get_align_size_for_quantization(self.config)
         padded_num_tokens = ceil(actual_num_tokens / divisor) * divisor - actual_num_tokens
         if padded_num_tokens > 0:
             pad_tensor = torch.zeros(
diff --git a/megatron/core/transformer/moe/moe_layer.py b/megatron/core/transformer/moe/moe_layer.py
index 095e6526934..10d10f667fe 100644
--- a/megatron/core/transformer/moe/moe_layer.py
+++ b/megatron/core/transformer/moe/moe_layer.py
@@ -222,7 +222,7 @@ def shared_experts_compute(self, hidden_states: torch.Tensor):
         if self.use_shared_expert and not self.shared_expert_overlap:
             # Compute the shared expert separately when not overlapped with communication.
             if self.shared_experts_recompute:
-                if self.config.fp8:
+                if self.config.fp8 or self.config.fp4:
                     shared_expert_output = te_checkpoint(
                         self.shared_experts,
                         False,
@@ -311,7 +311,7 @@ def custom_forward(hidden_states):
             return output, mlp_bias
 
         if self.moe_layer_recompute:
-            if self.config.fp8:
+            if self.config.fp8 or self.config.fp4:
                 outputs = te_checkpoint(
                     custom_forward,
                     False,
@@ -333,7 +333,7 @@ def backward_dw(self):
             self.shared_experts.backward_dw()
 
     def set_for_recompute_pre_mlp_layernorm(self):
-        """Set the MoE layer for recompute pre_mlp_layernorm. Only needed for fp8."""
+        """Set the MoE layer for recompute pre_mlp_layernorm. Only needed for fp8/fp4."""
         # If shared_experts_recompute is used, nothing needs to be done because the checkpoint
         # function will save the original input tensors.
         if self.shared_experts is not None and not self.shared_experts_recompute:
diff --git a/megatron/core/transformer/moe/moe_utils.py b/megatron/core/transformer/moe/moe_utils.py
index e8c6a05340c..d28cbfea3fe 100644
--- a/megatron/core/transformer/moe/moe_utils.py
+++ b/megatron/core/transformer/moe/moe_utils.py
@@ -7,8 +7,11 @@
 import torch
 
 from megatron.core import parallel_state
+from megatron.core.fp4_utils import get_fp4_align_size
+from megatron.core.fp8_utils import get_fp8_align_size
 from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.transformer.cuda_graphs import is_graph_capturing
+from megatron.core.transformer.transformer_config import TransformerConfig
 
 try:
     import transformer_engine as te  # pylint: disable=unused-import
@@ -1017,6 +1020,15 @@ def router_gating_linear(
     return RouterGatingLinearFunction.apply(inp, weight, bias, router_dtype)
 
 
+def get_align_size_for_quantization(config: TransformerConfig):
+    """Get the alignment size for quantization."""
+    if config.fp8:
+        return get_fp8_align_size(config.fp8_recipe)
+    elif config.fp4:
+        return get_fp4_align_size(config.fp4_recipe)
+    return 16
+
+
 # TODO(Hepteract): delete the usage of the global parallel_state.
 # Initialize process groups with the global parallel_state.
 def get_default_pg_collection():
diff --git a/megatron/core/transformer/moe/shared_experts.py b/megatron/core/transformer/moe/shared_experts.py
index c63e074e1b1..bf2c2072af9 100644
--- a/megatron/core/transformer/moe/shared_experts.py
+++ b/megatron/core/transformer/moe/shared_experts.py
@@ -62,8 +62,10 @@ def __init__(
         else:
             self.gate_weight = None
 
-        if self.config.fp8 and is_te_min_version("2.6.0dev0"):
-            # For fp8 training, the output of pre_mlp_layernorm is saved by router, and
+        if (self.config.fp8 and is_te_min_version("2.6.0dev0")) or (
+            self.config.fp4 and is_te_min_version("2.7.0.dev0")
+        ):
+            # For fp8/fp4 training, the output of pre_mlp_layernorm is saved by router, and
             # the shared expert linear_fc1 also saves the quantized tensor of this output.
             # Here we set the linear_fc1 to save the original input tensors to avoid the extra
             # memory usage of the quantized tensor.
diff --git a/megatron/core/transformer/moe/token_dispatcher.py b/megatron/core/transformer/moe/token_dispatcher.py
index 51575d423e2..b2135fdb00d 100644
--- a/megatron/core/transformer/moe/token_dispatcher.py
+++ b/megatron/core/transformer/moe/token_dispatcher.py
@@ -8,8 +8,6 @@
 
 from megatron.core import utils
 from megatron.core.config import is_experimental_enabled
-from megatron.core.fp4_utils import get_fp4_align_size
-from megatron.core.fp8_utils import get_fp8_align_size
 from megatron.core.fusions.fused_indices_converter import fused_indices_to_multihot
 from megatron.core.fusions.fused_pad_routing_map import fused_pad_routing_map
 from megatron.core.jit import jit_fuser
@@ -27,6 +25,7 @@
 )
 from megatron.core.transformer.moe.moe_utils import (
     ProcessGroupCollection,
+    get_align_size_for_quantization,
     get_capacity,
     maybe_move_tensor_to_cpu,
     pad_routing_map,
@@ -204,14 +203,6 @@ def set_shared_experts(self, shared_experts):
         assert self.config.moe_shared_expert_overlap
         self.shared_experts = shared_experts
 
-    def get_align_size_for_quantization(self):
-        """Get the alignment size for quantization."""
-        if self.config.fp8:
-            return get_fp8_align_size(self.config.fp8_recipe)
-        elif self.config.fp4:
-            return get_fp4_align_size(self.config.fp4_recipe)
-        return 16
-
 
 class MoEAllGatherTokenDispatcher(MoETokenDispatcher):
     """
@@ -624,7 +615,7 @@ def dispatch_preprocess(
         hidden_states = hidden_states.view(-1, self.hidden_shape[-1])
 
         if self.config.moe_router_padding_for_quantization:
-            pad_multiple = self.get_align_size_for_quantization()
+            pad_multiple = get_align_size_for_quantization(self.config)
             if is_experimental_enabled() and self.config.moe_permute_fusion:
                 self.routing_map = fused_pad_routing_map(self.routing_map, pad_multiple)
             else:
@@ -1047,8 +1038,8 @@ def dispatch(
                     "HybridEP only supports float32 probs, please set --moe-router-dtype=fp32"
                 )
             self.token_probs = self.token_probs.float()  # downcast or upcast
-        if self.config.fp8:
-            self.pad_multiple = get_fp8_align_size(self.config.fp8_recipe)
+        if self.config.fp8 or self.config.fp4:
+            self.pad_multiple = get_align_size_for_quantization(self.config)
         dispatched_hidden, self.dispatched_probs, _, tokens_per_expert, self.handle = (
             hybrid_ep_dispatch(
                 x=hidden_states,
@@ -1269,7 +1260,7 @@ def _pad_routing_map(
         """
         Pad the routing map to the nearest multiple of the pad_multiple.
         """
-        pad_multiple = self.get_align_size_for_quantization()
+        pad_multiple = get_align_size_for_quantization(self.config)
 
         num_input_tokens = routing_map.shape[0]
         target_tokens_per_expert = (
@@ -1332,14 +1323,6 @@ def get_restored_hidden_states_by_experts(self, hidden_states: torch.Tensor) ->
         )
         return hidden_states
 
-    def get_align_size_for_quantization(self):
-        """Get the alignment size for quantization."""
-        if self.config.fp8:
-            return get_fp8_align_size(self.config.fp8_recipe)
-        elif self.config.fp4:
-            return get_fp4_align_size(self.config.fp4_recipe)
-        return 16
-
 
 class MoEFlexTokenDispatcher(MoETokenDispatcher):
     """A flexible token dispatcher that abstracts the underlying tensor and expert
diff --git a/megatron/core/transformer/multi_latent_attention.py b/megatron/core/transformer/multi_latent_attention.py
index 46e09daa873..074523afd7b 100644
--- a/megatron/core/transformer/multi_latent_attention.py
+++ b/megatron/core/transformer/multi_latent_attention.py
@@ -182,12 +182,17 @@ def __init__(
 
         if (
             HAVE_TE
-            and self.config.fp8
-            and self.config.fp8_recipe != 'delayed'
-            and is_te_min_version("2.6.0dev0")
             and isinstance(self.linear_proj, TELinear)
+            and (
+                (
+                    self.config.fp8
+                    and self.config.fp8_recipe != 'delayed'
+                    and is_te_min_version("2.6.0dev0")
+                )
+                or (self.config.fp4 and is_te_min_version("2.7.0.dev0"))
+            )
         ):
-            # For fp8 training, the output of the fused core_attn is saved by itself, and
+            # For fp8/fp4 training, the output of the fused core_attn is saved by itself, and
             # linear_proj also saves the quantized tensor of this output. Here we set the
             # linear_proj to save the original input tensors to avoid the extra memory usage of
             # the quantized tensor.
@@ -801,7 +806,8 @@ def qkv_up_proj_and_rope_apply(q_compressed, kv_compressed, k_pos_emb, rotary_po
             return query, key, value
 
         if self.recompute_up_proj:
-            self.qkv_up_checkpoint = tensor_parallel.CheckpointWithoutOutput(fp8=self.config.fp8)
+            quantization = self.config.fp8 or self.config.fp4
+            self.qkv_up_checkpoint = tensor_parallel.CheckpointWithoutOutput(fp8=quantization)
             query, key, value = self.qkv_up_checkpoint.checkpoint(
                 qkv_up_proj_and_rope_apply, q_compressed, kv_compressed, k_pos_emb, rotary_pos_emb
             )
@@ -931,7 +937,7 @@ def _backward_output_proj(self):
         self.linear_proj.backward_dw()
 
     def set_for_recompute_input_layernorm(self):
-        """Set the attention layer for recompute input_layernorm. Only needed for fp8."""
+        """Set the attention layer for recompute input_layernorm. Only needed for fp8/fp4."""
         from megatron.core.extensions.transformer_engine import set_save_original_input
 
         if self.config.q_lora_rank is not None:
diff --git a/megatron/core/transformer/multi_token_prediction.py b/megatron/core/transformer/multi_token_prediction.py
index 94fbfb23677..e79af23ef04 100755
--- a/megatron/core/transformer/multi_token_prediction.py
+++ b/megatron/core/transformer/multi_token_prediction.py
@@ -675,6 +675,8 @@ def _proj_and_transformer_layer(
             fp8_context = nullcontext()
             transformer_layer_fp8_context = nullcontext()
 
+        # TODO: currently ignoring FP4 in MTP layers because we need more numerical validation
+
         with rng_context:
             with fp8_context:
                 hidden_states = self._concat_embeddings(hidden_states, decoder_input)
diff --git a/megatron/core/transformer/transformer_config.py b/megatron/core/transformer/transformer_config.py
index 306ac7314ba..cdf490b0e17 100644
--- a/megatron/core/transformer/transformer_config.py
+++ b/megatron/core/transformer/transformer_config.py
@@ -1659,7 +1659,7 @@ def __post_init__(self):
                         ), 'moe cuda graph is only supported with drop-padding MoE.'
                         if self.moe_token_dispatcher_type == 'alltoall' and (
                             self.moe_expert_capacity_factor is not None
-                            or self.moe_router_padding_for_fp8
+                            or self.moe_router_padding_for_quantization
                         ):
                             assert 'moe_preprocess' not in self.cuda_graph_scope, (
                                 'moe_preprocess cuda graph is not supported when there are '
diff --git a/megatron/core/transformer/transformer_layer.py b/megatron/core/transformer/transformer_layer.py
index c322788af2f..f89678e6216 100644
--- a/megatron/core/transformer/transformer_layer.py
+++ b/megatron/core/transformer/transformer_layer.py
@@ -385,7 +385,7 @@ def __init__(
                     or 'attn' not in self.config.cuda_graph_scope
                 ):
                     self.recompute_input_layernorm = True
-                    if self.config.fp8:
+                    if self.config.fp8 or self.config.fp4:
                         self.self_attention.set_for_recompute_input_layernorm()
                 if not isinstance(self.pre_mlp_layernorm, IdentityOp) and (
                     self.config.cuda_graph_impl == "none"
@@ -397,7 +397,7 @@ def __init__(
                     )
                 ):
                     self.recompute_pre_mlp_layernorm = True
-                    if self.config.fp8:
+                    if self.config.fp8 or self.config.fp4:
                         if isinstance(self.mlp, MoELayer):
                             self.mlp.set_for_recompute_pre_mlp_layernorm()
                         else:
@@ -642,7 +642,7 @@ def _forward_mlp(self, hidden_states, inference_context=None):
             cudagraph_outputs = self.mlp(pre_mlp_layernorm_output)
             return cudagraph_outputs + [residual]
         elif self.recompute_mlp:
-            if self.config.fp8:
+            if self.config.fp8 or self.config.fp4:
                 # import here to avoid circular import
                 from megatron.core.extensions.transformer_engine import te_checkpoint
 

From ca4c03eadc864268ae915b8e0e19b1bf3158cd42 Mon Sep 17 00:00:00 2001
From: "Dennis(Zhenhuan) Liu" <denliu@nvidia.com>
Date: Tue, 18 Nov 2025 15:56:59 +0800
Subject: [PATCH 137/334] [DEV] Fix aux loss scale when cp enabled (#2217)

---
 megatron/core/pipeline_parallel/schedules.py      | 4 +++-
 tests/unit_tests/transformer/moe/test_aux_loss.py | 3 ++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/megatron/core/pipeline_parallel/schedules.py b/megatron/core/pipeline_parallel/schedules.py
index 11e54e0fa53..d0b912349b4 100644
--- a/megatron/core/pipeline_parallel/schedules.py
+++ b/megatron/core/pipeline_parallel/schedules.py
@@ -271,7 +271,9 @@ def forward_step_calc_loss(
         if config.calculate_per_token_loss:
             MoEAuxLossAutoScaler.set_loss_scale(loss_scale)
         else:
-            MoEAuxLossAutoScaler.set_loss_scale(loss_scale / num_microbatches)
+            # See https://github.com/NVIDIA/Megatron-LM/pull/2217 for detailed explanation
+            # of scaling by cp_group_size
+            MoEAuxLossAutoScaler.set_loss_scale(loss_scale * cp_group_size / num_microbatches)
 
     # Set the loss scale for Multi-Token Prediction (MTP) loss.
     if hasattr(config, 'mtp_num_layers') and config.mtp_num_layers is not None:
diff --git a/tests/unit_tests/transformer/moe/test_aux_loss.py b/tests/unit_tests/transformer/moe/test_aux_loss.py
index 108c9a65a4b..b1f78582383 100644
--- a/tests/unit_tests/transformer/moe/test_aux_loss.py
+++ b/tests/unit_tests/transformer/moe/test_aux_loss.py
@@ -331,7 +331,7 @@ def test_seq_aux_loss(self, tp_size, ep_size, cp_size):
         not torch.cuda.is_available() or not HAVE_ROUTER_FUSION,
         reason="CUDA or TE fused router ops not available",
     )
-    @pytest.mark.parametrize("aux_type", ["aux_loss", "seq_aux_loss"])
+    @pytest.mark.parametrize("aux_type", ["aux_loss", "seq_aux_loss", "global_aux_loss"])
     def test_aux_loss_fusion_equivalence(self, aux_type):
         # Compare fused vs unfused aux loss path to ensure numerical equivalence
         router_ref = self.new_router(
@@ -350,6 +350,7 @@ def test_aux_loss_fusion_equivalence(self, aux_type):
         loss_name_map = {
             "aux_loss": "load_balancing_loss",
             "seq_aux_loss": "seq_load_balancing_loss",
+            "global_aux_loss": "global_load_balancing_loss",
         }
         loss_name = loss_name_map[aux_type]
 

From 157bec937ac23320b9f6dbf14567a3d425961072 Mon Sep 17 00:00:00 2001
From: NR Wu <shibuyanorailgun@foxmail.com>
Date: Tue, 18 Nov 2025 17:53:29 +0800
Subject: [PATCH 138/334] [Community][Dev] feat(moe): Adding context parallel
 support to eager attention implementation (#1859)

Co-authored-by: Yuzhong Wang <yuzhongw@nvidia.com>
---
 gpt_builders.py                               |   1 +
 .../transformer_engine_spec_provider.py       |   9 +-
 megatron/core/models/gpt/gpt_layer_specs.py   |  23 +-
 .../core/transformer/dot_product_attention.py |  27 +-
 .../dot_product_attention_context_parallel.py | 342 +++++++++++++
 .../core/transformer/transformer_config.py    |  23 +
 megatron/training/arguments.py                |   3 +
 .../unit_tests/transformer/test_attention.py  | 455 +++++++++++++++++-
 8 files changed, 861 insertions(+), 22 deletions(-)
 create mode 100644 megatron/core/transformer/dot_product_attention_context_parallel.py

diff --git a/gpt_builders.py b/gpt_builders.py
index 4fe832028be..9fa1aff72c7 100644
--- a/gpt_builders.py
+++ b/gpt_builders.py
@@ -118,6 +118,7 @@ def _get_transformer_layer_spec(use_te, config):
             moe_use_legacy_grouped_gemm=args.moe_use_legacy_grouped_gemm,
             qk_l2_norm=args.qk_l2_norm,
             use_kitchen=config.use_kitchen,
+            fallback_to_eager_attn=config.fallback_to_eager_attn,
         )
     else:
         return get_gpt_layer_local_spec(
diff --git a/megatron/core/extensions/transformer_engine_spec_provider.py b/megatron/core/extensions/transformer_engine_spec_provider.py
index c630671ad07..6f8947078b9 100644
--- a/megatron/core/extensions/transformer_engine_spec_provider.py
+++ b/megatron/core/extensions/transformer_engine_spec_provider.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 import warnings
 from typing import Optional, Tuple
@@ -17,6 +17,7 @@
 from megatron.core.fusions.fused_layer_norm import FusedLayerNorm
 from megatron.core.models.backends import BackendSpecProvider
 from megatron.core.tensor_parallel.layers import ColumnParallelLinear, RowParallelLinear
+from megatron.core.transformer.dot_product_attention import DotProductAttention
 from megatron.core.transformer.mlp import MLPSubmodules
 from megatron.core.transformer.moe.experts import GroupedMLP, SequentialMLP, TEGroupedMLP
 from megatron.core.utils import get_te_version, is_te_min_version
@@ -25,6 +26,10 @@
 class TESpecProvider(BackendSpecProvider):
     """A protocol for providing the submodules used in Spec building."""
 
+    def __init__(self, fallback_to_eager_attn: bool = False):
+        super().__init__()
+        self.fallback_to_eager_attn = fallback_to_eager_attn
+
     def linear(self) -> type:
         """Which linear module TE backend uses"""
         return TELinear
@@ -56,6 +61,8 @@ def layer_norm(self, rms_norm: bool = False, for_qk: bool = False) -> type:
 
     def core_attention(self) -> type:
         """Which module to use for attention"""
+        if self.fallback_to_eager_attn:
+            return DotProductAttention
         return TEDotProductAttention
 
     def grouped_mlp_modules(
diff --git a/megatron/core/models/gpt/gpt_layer_specs.py b/megatron/core/models/gpt/gpt_layer_specs.py
index 196c21ebe44..c5c9caa3d67 100755
--- a/megatron/core/models/gpt/gpt_layer_specs.py
+++ b/megatron/core/models/gpt/gpt_layer_specs.py
@@ -10,6 +10,7 @@
 )
 from megatron.core.models.gpt.moe_module_specs import get_moe_module_spec_for_backend
 from megatron.core.transformer.attention import SelfAttention, SelfAttentionSubmodules
+from megatron.core.transformer.dot_product_attention import DotProductAttention
 from megatron.core.transformer.enums import AttnMaskType, LayerType
 from megatron.core.transformer.identity_op import IdentityOp
 from megatron.core.transformer.mlp import MLP, MLPSubmodules
@@ -85,6 +86,7 @@ def get_gpt_layer_with_transformer_engine_spec(
     use_te_op_fuser: Optional[bool] = False,
     use_kitchen: bool = False,
     use_te_activation_func: bool = False,
+    fallback_to_eager_attn: bool = False,
 ) -> ModuleSpec:
     """Use this spec to use lower-level Transformer Engine modules (required for fp8 training).
 
@@ -116,13 +118,15 @@ def get_gpt_layer_with_transformer_engine_spec(
 
     if use_kitchen:
         assert HAVE_KITCHEN
-        backend: BackendSpecProvider = KitchenSpecProvider(fallback=TESpecProvider())
+        backend: BackendSpecProvider = KitchenSpecProvider(
+            fallback=TESpecProvider(fallback_to_eager_attn=fallback_to_eager_attn)
+        )
         if use_te_op_fuser:
             raise AssertionError("use_te_op_fuser not compatible with using kitchen in mlp.")
         if use_te_activation_func:
             raise AssertionError("use_te_activation_func not compatible with using kitchen.")
     else:
-        backend = TESpecProvider()
+        backend = TESpecProvider(fallback_to_eager_attn=fallback_to_eager_attn)
 
     sharded_state_dict_keys_map = {}
 
@@ -135,6 +139,7 @@ def get_gpt_layer_with_transformer_engine_spec(
         multi_latent_attention=multi_latent_attention,
         mla_down_proj_use_column_parallel=False,
         normalization=normalization,
+        fallback_to_eager_attn=fallback_to_eager_attn,
     )
 
     mlp = get_mlp_module_spec_for_backend(
@@ -214,6 +219,7 @@ def get_gpt_layer_local_spec(
         multi_latent_attention=multi_latent_attention,
         mla_down_proj_use_column_parallel=True,
         normalization=normalization,
+        fallback_to_eager_attn=False,
     )
 
     mlp = get_mlp_module_spec_for_backend(
@@ -278,6 +284,7 @@ def get_attention_module_spec_for_backend(
     multi_latent_attention: Optional[bool] = False,
     mla_down_proj_use_column_parallel: Optional[bool] = False,
     normalization: Optional[str] = None,
+    fallback_to_eager_attn: Optional[bool] = False,
 ) -> ModuleSpec:
     """Helper function to get module spec for Attention"""
 
@@ -292,6 +299,7 @@ def get_attention_module_spec_for_backend(
     rms_norm = normalization == "RMSNorm"
     qk_norm = backend.layer_norm(rms_norm=rms_norm, for_qk=True)
 
+    core_attention = backend.core_attention() if not fallback_to_eager_attn else DotProductAttention
     if multi_latent_attention:
         assert qk_l2_norm is False, "qk_l2_norm is not supported with MLA."
         linear_q_down_proj = (
@@ -328,7 +336,7 @@ def get_attention_module_spec_for_backend(
                 linear_q_up_proj=linear_q_up_proj,
                 linear_kv_down_proj=linear_kv_down_proj,
                 linear_kv_up_proj=linear_kv_up_proj,
-                core_attention=backend.core_attention(),
+                core_attention=core_attention,
                 linear_proj=backend.row_parallel_linear(),
                 q_layernorm=qk_norm,
                 kv_layernorm=qk_norm,
@@ -352,7 +360,7 @@ def get_attention_module_spec_for_backend(
             params={"attn_mask_type": AttnMaskType.causal},
             submodules=SelfAttentionSubmodules(
                 linear_qkv=linear_qkv,
-                core_attention=backend.core_attention(),
+                core_attention=core_attention,
                 linear_proj=backend.row_parallel_linear(),
                 q_layernorm=qk_norm,
                 k_layernorm=qk_norm,
@@ -499,6 +507,7 @@ def get_gpt_decoder_layer_specs(
     if use_transformer_engine:
         layer_norm_impl = TENorm
         get_layer_spec_kwargs["use_te_activation_func"] = config.use_te_activation_func
+        get_layer_spec_kwargs['fallback_to_eager_attn'] = config.fallback_to_eager_attn
         get_layer_spec_fn = get_gpt_layer_with_transformer_engine_spec
     else:
         layer_norm_impl = LNImpl
@@ -652,9 +661,11 @@ def get_gpt_mtp_block_spec(
     """GPT Multi-Token Prediction (MTP) block spec."""
     if use_transformer_engine:
         backend: BackendSpecProvider = (
-            KitchenSpecProvider(fallback=TESpecProvider())
+            KitchenSpecProvider(
+                fallback=TESpecProvider(fallback_to_eager_attn=config.fallback_to_eager_attn)
+            )
             if config.use_kitchen
-            else TESpecProvider()
+            else TESpecProvider(fallback_to_eager_attn=config.fallback_to_eager_attn)
         )
     else:
         backend = (
diff --git a/megatron/core/transformer/dot_product_attention.py b/megatron/core/transformer/dot_product_attention.py
index 32f9a08cfa1..7102440552a 100644
--- a/megatron/core/transformer/dot_product_attention.py
+++ b/megatron/core/transformer/dot_product_attention.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 
 import math
@@ -12,6 +12,9 @@
 from megatron.core.fusions.fused_softmax import FusedScaleMaskSoftmax
 from megatron.core.packed_seq_params import PackedSeqParams
 from megatron.core.process_groups_config import ProcessGroupCollection
+from megatron.core.transformer.dot_product_attention_context_parallel import (
+    AttentionFuncionWithContextParallel,
+)
 from megatron.core.transformer.enums import AttnMaskType
 from megatron.core.transformer.module import MegatronModule
 from megatron.core.transformer.transformer_config import TransformerConfig
@@ -54,9 +57,12 @@ def __init__(
 
         self.config: TransformerConfig = config
 
-        assert (
-            self.config.context_parallel_size == 1
-        ), "Context parallelism is only supported by TEDotProductAttention!"
+        if self.config.context_parallel_size > 1:
+            assert attention_dropout is None and self.config.attention_dropout == 0.0, (
+                f'DotProductAttention with context parallelism does not support attention dropout,'
+                f' but got {self.config.context_parallel_size=},'
+                f' {attention_dropout=}, and {self.config.attention_dropout=}.'
+            )
 
         self.layer_number = max(1, layer_number)
         self.attn_mask_type = attn_mask_type
@@ -174,6 +180,19 @@ def forward(
                 self.num_attention_heads_per_partition // self.num_query_groups_per_partition, dim=2
             )
 
+        if self.config.context_parallel_size > 1:
+            output = AttentionFuncionWithContextParallel.apply(
+                query,
+                key,
+                value,
+                attention_mask,
+                self.config.attention_dropout,
+                self.softmax_scale,
+                parallel_state.get_context_parallel_group(),
+            )
+            output = output.view(query.shape[0], query.shape[1], self.hidden_size_per_partition)
+            return output
+
         # [b, np, sq, sk]
         output_size = (query.size(1), query.size(2), query.size(0), key.size(0))
 
diff --git a/megatron/core/transformer/dot_product_attention_context_parallel.py b/megatron/core/transformer/dot_product_attention_context_parallel.py
new file mode 100644
index 00000000000..89659a1d743
--- /dev/null
+++ b/megatron/core/transformer/dot_product_attention_context_parallel.py
@@ -0,0 +1,342 @@
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+# Some of this code was adopted from https://github.com/zhuzilin/ring-flash-attention/
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+from torch.nn import functional as F
+
+try:
+    import einops
+
+    HAVE_EINOPS = True
+except ImportError:
+    HAVE_EINOPS = False
+
+
+@torch.no_grad
+def eager_attn_fwd(q, k, v, attn_bias, sinks, scale, dropout):
+    """Forward pass for eager attention"""
+
+    # Rearrange query, key, value to (b, h, s, d)
+    b, sq, h, d = q.shape
+    sk = k.shape[1]
+    _q = einops.rearrange(q, 'b s h d -> b h s d')
+    _k = einops.rearrange(k, 'b s h d -> b h d s')
+    _v = einops.rearrange(v, 'b s h d -> b h s d')
+
+    # Compute attention weights
+    attn_w = torch.matmul(_q, _k) * scale
+    attn_w = attn_w + attn_bias
+
+    # Add sinks to attention weights
+    if sinks is None:
+        logits = attn_w
+    else:
+        _sinks = sinks.reshape(1, h, 1, 1).expand(b, -1, sq, 1)
+        logits = torch.cat([attn_w, _sinks], dim=-1)
+
+    # Compute attention scores
+    probs = F.softmax(logits, dim=-1, dtype=logits.dtype)
+    if sinks is None:
+        attn_w = probs
+    else:
+        attn_w = probs[..., :-1]  # Drop the sink
+
+    # Compute attention output
+    attn_output = torch.matmul(attn_w, _v)
+    attn_output = einops.rearrange(attn_output, 'b h s d -> b s h d')
+    attn_output = attn_output.contiguous()
+
+    return attn_output, probs
+
+
+@torch.no_grad
+def eager_attn_bwd(q, k, v, attn_bias, sinks, scale, dropout, attn_output, probs, grad_output):
+    """Backward pass for eager attention"""
+
+    # Rearrange query, key, value to (b, h, s, d)
+    b, sq, h, d = q.shape
+    sk = k.shape[1]
+    _q_T = einops.rearrange(q, 'b s h d -> b h d s')
+    _k_T = einops.rearrange(k, 'b s h d -> b h s d')
+    _v_T = einops.rearrange(v, ' b s h d -> b h d s')
+
+    # Backward pass for score @ value
+    if sinks is None:
+        attn_w = probs
+    else:
+        attn_w = probs[..., :-1]  # Drop the sink
+    grad_output = einops.rearrange(grad_output, 'b s h d -> b h s d')
+    attn_w_T = einops.rearrange(attn_w, ' b h sq sk -> b h sk sq')
+    grad__v = torch.matmul(attn_w_T, grad_output)
+    grad_attn_w = torch.matmul(grad_output, _v_T)
+
+    # Backward pass for softmax
+    if sinks is None:
+        grad_probs = grad_attn_w
+    else:
+        dummy = torch.zeros((b, h, sq, 1), device=q.device, dtype=q.dtype)
+        grad_probs = torch.cat([grad_attn_w, dummy], dim=3)
+    del grad_attn_w
+    grad_logits = torch._softmax_backward_data(
+        grad_probs, probs, -1, probs.dtype
+    )  # [b, h, sq, sk+1]
+
+    # Backward pass for adding sinks
+    if sinks is None:
+        grad_sinks = None
+        grad_attn_w = grad_logits
+    else:
+        grad__sinks = grad_logits[:, :, :, -1]  # [b, h, sq]
+        grad_sinks = einops.rearrange(grad__sinks, 'b h s -> h (b s)').sum(-1)
+        grad_attn_w = grad_logits[:, :, :, :-1].contiguous()  # [b, h, sq, sk]
+
+    # Backward pass for q @ K^T
+    grad_attn_w *= scale
+    grad__q = torch.matmul(grad_attn_w, _k_T)
+    grad__k = torch.matmul(_q_T, grad_attn_w)
+
+    # Rearrange grads to (b, s, h, d)
+    grad_v = einops.rearrange(grad__v, 'b h s d -> b s h d')
+    grad_k = einops.rearrange(grad__k, 'b h d s -> b s h d')
+    grad_q = einops.rearrange(grad__q, 'b h s d -> b s h d')
+    return grad_q, grad_k, grad_v, grad_sinks
+
+
+class AllGatherComm:
+    """All gather communication with async operations"""
+
+    def __init__(self, group=None) -> None:
+        self.group = group
+        self.handles = []
+
+    def all_gather(self, output_tensor: torch.Tensor, input_tensor: torch.Tensor):
+        '''All gather the input tensor to the output tensor'''
+
+        if self.group is None:
+            output_tensor.copy_(input_tensor)
+        else:
+            handle = torch.distributed.all_gather_into_tensor(
+                output_tensor, input_tensor, group=self.group, async_op=True
+            )
+            self.handles.append(handle)
+
+    def wait(self):
+        '''Wait for all gather operations to complete'''
+
+        if self.group is not None:
+            for handle in self.handles:
+                handle.wait()
+            self.handles = []
+
+
+def to_zz_mask_attn_bias(attention_mask, cp_size, nheads, nheads_k, heads_k_stride, device, dtype):
+    '''Convert the attention mask to the attention bias'''
+
+    if cp_size == 1:
+        zz_mask = attention_mask
+    else:
+        chunked = attention_mask.chunk(dim=3, chunks=cp_size * 2)
+        zz_mask = [_x for _p in zip(chunked[:cp_size], reversed(chunked[cp_size:])) for _x in _p]
+        zz_mask = torch.cat(zz_mask, dim=3)
+    attn_bias = torch.zeros(zz_mask.shape, device=device, dtype=dtype)
+    attn_bias.masked_fill_(zz_mask, float('-inf'))
+    attn_bias = attn_bias.expand(-1, heads_k_stride * (nheads // nheads_k), -1, -1)
+    return attn_bias
+
+
+class AttentionFuncionWithContextParallel(torch.autograd.Function):
+    """Native attention function with context parallelism."""
+
+    @staticmethod
+    def forward(ctx, q, k, v, attention_mask, attention_dropout, softmax_scale, pg):
+        '''Forward pass for the native attention function with context parallelism'''
+
+        # Assert einops exists
+        if not HAVE_EINOPS:
+            raise ImportError("einops is required by the attention CP but cannot be imported.")
+
+        # Initialize communication group and constants
+        cp_size = 1
+        if pg is not None:
+            cp_size = torch.distributed.get_world_size(pg)
+        comm = AllGatherComm(group=pg)
+        nheads = q.shape[2]
+        nheads_k = k.shape[2]
+        heads_k_stride = 1
+        assert nheads % nheads_k == 0 and nheads_k % heads_k_stride == 0
+        outs = []
+        probs = []
+
+        # Initialize KV buffers
+        kv_buffer = torch.empty(
+            (2, k.shape[0] * cp_size, k.shape[1], heads_k_stride, k.shape[3]),
+            dtype=k.dtype,
+            device=k.device,
+        )
+        kv_buffer_copy = torch.empty_like(kv_buffer)
+
+        # All-gather first chunk of KV buffers
+        k_0 = k[:, :, :heads_k_stride].contiguous()
+        v_0 = v[:, :, :heads_k_stride].contiguous()
+        comm.all_gather(kv_buffer_copy[0], k_0)
+        comm.all_gather(kv_buffer_copy[1], v_0)
+
+        # Prepare attention bias
+        attn_bias = to_zz_mask_attn_bias(
+            attention_mask, cp_size, nheads, nheads_k, heads_k_stride, q.device, q.dtype
+        )
+
+        # Iterate over heads
+        for i in range(0, nheads_k, heads_k_stride):
+            # Wait for previous all-gather to complete
+            comm.wait()
+            kv_buffer, kv_buffer_copy = kv_buffer_copy, kv_buffer
+            # All-gather the next portion of KV buffers if not the last iteration
+            if i < nheads_k - heads_k_stride:
+                kvsl = i + heads_k_stride
+                kvsr = kvsl + heads_k_stride
+                send_k = k[:, :, kvsl:kvsr].contiguous()
+                send_v = v[:, :, kvsl:kvsr].contiguous()
+                comm.all_gather(kv_buffer_copy[0], send_k)
+                comm.all_gather(kv_buffer_copy[1], send_v)
+
+            # Prepare query, key, value for attention
+            q_i = q[:, :, i * nheads // nheads_k : (i + heads_k_stride) * nheads // nheads_k]
+            k_i = kv_buffer[0]
+            v_i = kv_buffer[1]
+
+            # Rearrange query, key, value to (b, s, h, d)
+            q_i = einops.rearrange(q_i, 's b h d -> b s h d')
+            k_i = einops.rearrange(k_i, 's b h d -> b s h d')
+            v_i = einops.rearrange(v_i, 's b h d -> b s h d')
+
+            # Forward pass
+            out_i, probs_i = eager_attn_fwd(
+                q_i, k_i, v_i, attn_bias, None, softmax_scale, attention_dropout
+            )
+            outs.append(out_i)
+            probs.append(probs_i)
+
+        # Concatenate outputs and rearrange to (s, b, h, d)
+        out = torch.cat(outs, dim=2)
+        out = einops.rearrange(out, 'b s h d -> s b h d')
+
+        # Save contexts for backward pass
+        ctx.save_for_backward(q, k, v, attention_mask, *outs, *probs)
+        ctx.dropout = attention_dropout
+        ctx.scale = softmax_scale
+        ctx.heads_k_stride = heads_k_stride  # TODO make it configurable
+        ctx.pg = pg
+
+        return out
+
+    @staticmethod
+    def backward(ctx, dout):
+        '''Backward pass for the native attention function with context parallelism'''
+
+        # Initialize or resume constants and communication group
+        q, k, v, attention_mask, *rest = ctx.saved_tensors
+        nheads = q.shape[2]
+        nheads_k = k.shape[2]
+        heads_k_stride = ctx.heads_k_stride
+        assert nheads_k % heads_k_stride == 0
+        outs = rest[: nheads_k // heads_k_stride]
+        probs = rest[nheads_k // heads_k_stride :]
+        pg = ctx.pg
+        cp_size = 1
+        if pg is not None:
+            cp_size = torch.distributed.get_world_size(pg)
+        comm = AllGatherComm(group=pg)
+
+        # Initialize KV buffers
+        kv_buffer = torch.empty(
+            (2, k.shape[0] * cp_size, k.shape[1], heads_k_stride, k.shape[3]),
+            dtype=k.dtype,
+            device=k.device,
+        )
+        kv_buffer_copy = torch.empty_like(kv_buffer)
+
+        # All-gather first chunk of KV buffers
+        dq = []
+        dk = []
+        dv = []
+        k_0 = k[:, :, :heads_k_stride].contiguous()
+        v_0 = v[:, :, :heads_k_stride].contiguous()
+        comm.all_gather(kv_buffer_copy[0], k_0)
+        comm.all_gather(kv_buffer_copy[1], v_0)
+
+        # Prepare attention bias
+        attn_bias = to_zz_mask_attn_bias(
+            attention_mask, cp_size, nheads, nheads_k, heads_k_stride, q.device, q.dtype
+        )
+
+        # Iterate over heads
+        for i in range(0, nheads_k, heads_k_stride):
+            # Slice query and output for this iteration
+            q_slice = slice(i * nheads // nheads_k, (i + heads_k_stride) * nheads // nheads_k)
+            q_i = q[:, :, q_slice]
+            dout_i = dout[:, :, q_slice]
+
+            # Wait for previous all-gather to complete
+            comm.wait()
+            kv_buffer, kv_buffer_copy = kv_buffer_copy, kv_buffer
+
+            # All-gather the next portion of KV buffers if not the last iteration
+            if i < nheads_k - heads_k_stride:
+                kvsl = i + heads_k_stride
+                kvsr = kvsl + heads_k_stride
+                send_k = k[:, :, kvsl:kvsr].contiguous()
+                send_v = v[:, :, kvsl:kvsr].contiguous()
+                comm.all_gather(kv_buffer_copy[0], send_k)
+                comm.all_gather(kv_buffer_copy[1], send_v)
+
+            # Prepare key, value for attention
+            k_i = kv_buffer[0]
+            v_i = kv_buffer[1]
+
+            # Rearrange query, key, value to (b, s, h, d)
+            q_i = einops.rearrange(q_i, 's b h d -> b s h d')
+            k_i = einops.rearrange(k_i, 's b h d -> b s h d')
+            v_i = einops.rearrange(v_i, 's b h d -> b s h d')
+            dout_i = einops.rearrange(dout_i, 's b h d -> b s h d')
+
+            # Backward pass
+            dq_i, _dk_i, _dv_i, _ = eager_attn_bwd(
+                q_i, k_i, v_i, attn_bias, None, ctx.scale, ctx.dropout, outs[i], probs[i], dout_i
+            )
+
+            # Rearrange gradients to (s, b, h, d)
+            dq_i = einops.rearrange(dq_i, 'b s h d -> s b h d')
+            _dk_i = einops.rearrange(_dk_i, 'b s h d -> s b h d')
+            _dv_i = einops.rearrange(_dv_i, 'b s h d -> s b h d')
+            if pg is None:
+                dk_i = _dk_i
+                dv_i = _dv_i
+            else:
+                # Reduce-scatter gradients if CP > 1
+                dk_i = torch.zeros(
+                    (k_i.shape[1] // cp_size, k_i.shape[0], k_i.shape[2], k_i.shape[3]),
+                    device=k_i.device,
+                    dtype=k_i.dtype,
+                )
+                dv_i = torch.zeros(
+                    (v_i.shape[1] // cp_size, v_i.shape[0], v_i.shape[2], v_i.shape[3]),
+                    device=v_i.device,
+                    dtype=v_i.dtype,
+                )
+                torch.distributed.reduce_scatter_tensor(dk_i, _dk_i, group=pg)
+                torch.distributed.reduce_scatter_tensor(dv_i, _dv_i, group=pg)
+
+            # Collect gradients
+            dq.append(dq_i)
+            dk.append(dk_i)
+            dv.append(dv_i)
+
+        # Concatenate gradients and return
+        dq = torch.cat(dq, dim=2)
+        dk = torch.cat(dk, dim=2)
+        dv = torch.cat(dv, dim=2)
+        return dq, dk, dv, None, None, None, None
diff --git a/megatron/core/transformer/transformer_config.py b/megatron/core/transformer/transformer_config.py
index cdf490b0e17..a82422aebdc 100644
--- a/megatron/core/transformer/transformer_config.py
+++ b/megatron/core/transformer/transformer_config.py
@@ -792,6 +792,10 @@ class TransformerConfig(ModelParallelConfig):
     """Transformer implementation to use.
     Options are 'transformer_engine' for Transformer Engine and 'local' for MCore."""
 
+    fallback_to_eager_attn: bool = False
+    """Whether to fallback to eager attention in TE implementation.
+    Suggested for when desired features are not available in TE implementation."""
+
     #####################################
     # Fine-grained Activation Offloading
     #####################################
@@ -1850,6 +1854,25 @@ def __post_init__(self):
                     f"the number of layers ({self.num_layers})"
                 )
 
+        if self.fallback_to_eager_attn:
+            assert self.transformer_impl == "transformer_engine", (
+                f"fallback_to_eager_attn is only available with transformer_engine implementation,"
+                f" but got {self.transformer_impl=}."
+            )
+
+        if self.fallback_to_eager_attn or self.transformer_impl == "local":
+            if self.context_parallel_size > 1 and self.cp_comm_type is not None:
+                all_cp_comm_types_are_all_gather = (
+                    all(item == "all_gather" for item in self.cp_comm_type)
+                    if isinstance(self.cp_comm_type, list)
+                    else self.cp_comm_type == "all_gather"
+                )
+                if not all_cp_comm_types_are_all_gather:
+                    raise ValueError(
+                        f"fallback_to_eager_attn only supports all_gather communication type "
+                        f"for context parallelism, but got {self.cp_comm_type=} instead."
+                    )
+
 
 @dataclass
 class MLATransformerConfig(TransformerConfig):
diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py
index dda90835b8e..43b276ae181 100644
--- a/megatron/training/arguments.py
+++ b/megatron/training/arguments.py
@@ -1402,6 +1402,9 @@ def _add_transformer_engine_args(parser):
     group.add_argument('--transformer-impl', default='transformer_engine',
                        choices=['local', 'transformer_engine'],
                        help='Which Transformer implementation to use.')
+    group.add_argument('--fallback-to-eager-attn', action='store_true',
+                       help='Fallback to eager attention in TE implementation. '
+                       'Suggested for when desired features are not available in TE implementation.')
     group.add_argument('--fp8-param-gather', action='store_true',
                        help='Keep the compute param in fp8 (do not use any other intermediate '
                             'dtype) and perform the param all-gather in fp8.')
diff --git a/tests/unit_tests/transformer/test_attention.py b/tests/unit_tests/transformer/test_attention.py
index d7771d0920d..de32ede6e8d 100644
--- a/tests/unit_tests/transformer/test_attention.py
+++ b/tests/unit_tests/transformer/test_attention.py
@@ -1,20 +1,45 @@
-# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 import copy
+from functools import partial
+from unittest import mock
 
+import einops
 import pytest
 import torch
 from packaging import version
+from torch.nn import functional as F
 
 import megatron.core.parallel_state as parallel_state
 from megatron.core.hyper_comm_grid import HyperCommGrid
-from megatron.core.models.gpt.gpt_layer_specs import get_gpt_layer_with_transformer_engine_spec
+from megatron.core.models.common.embeddings.rope_utils import (
+    get_pos_emb_on_this_cp_rank as get_tensor_on_this_cp_rank,
+)
+from megatron.core.models.gpt.gpt_layer_specs import (
+    get_gpt_layer_local_spec,
+    get_gpt_layer_with_transformer_engine_spec,
+)
+from megatron.core.models.gpt.gpt_model import GPTModel
 from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.tensor_parallel.random import model_parallel_cuda_manual_seed
 from megatron.core.transformer import TransformerConfig
 from megatron.core.transformer.attention import SelfAttention
+from megatron.core.transformer.dot_product_attention_context_parallel import (
+    AttentionFuncionWithContextParallel,
+    to_zz_mask_attn_bias,
+)
 from megatron.core.transformer.enums import AttnMaskType
 from megatron.core.utils import is_te_min_version
+from megatron.training.arguments import parse_args
+from megatron.training.checkpointing import load_checkpoint, save_checkpoint
+from megatron.training.global_vars import set_args
+from megatron.training.training import get_model
+from megatron.training.utils import unwrap_model
+from tests.unit_tests.dist_checkpointing import (
+    TempNamedDir,
+    init_basic_mock_args,
+    init_checkpointing_mock_args,
+)
 from tests.unit_tests.test_utilities import Utils
 
 try:
@@ -26,10 +51,19 @@
 
 
 @pytest.mark.parametrize("output_gate", [False, True])
+@pytest.mark.parametrize(
+    ("transformer_impl", "fallback_to_eager_attn"),
+    [("transformer_engine", False), ("transformer_engine", True), ("native", False)],
+)
 class TestParallelAttention:
 
     @pytest.fixture(scope='function', autouse=True)
-    def setup_method(self, output_gate):
+    def setup_method(self, output_gate, transformer_impl, fallback_to_eager_attn):
+        if output_gate:
+            if transformer_impl == "native":
+                pytest.skip("Native implementation does not support output gate.")
+            if fallback_to_eager_attn:
+                pytest.skip("No need to test output gate for fallback_to_eager_attn = True.")
         Utils.initialize_model_parallel(1, 1)
         model_parallel_cuda_manual_seed(123)
         self.transformer_config = TransformerConfig(
@@ -40,11 +74,18 @@ def setup_method(self, output_gate):
             bf16=True,
             params_dtype=torch.bfloat16,
             attention_output_gate=output_gate,
+            transformer_impl=transformer_impl,
+            fallback_to_eager_attn=fallback_to_eager_attn,
         )
+        if transformer_impl == "transformer_engine":
+            layer_spec = get_gpt_layer_with_transformer_engine_spec(
+                fallback_to_eager_attn=fallback_to_eager_attn
+            )
+        else:
+            layer_spec = get_gpt_layer_local_spec()
+        attn_layer_spec = layer_spec.submodules.self_attention.submodules
         self.parallel_attention = SelfAttention(
-            self.transformer_config,
-            get_gpt_layer_with_transformer_engine_spec().submodules.self_attention.submodules,
-            layer_number=1,
+            self.transformer_config, attn_layer_spec, layer_number=1
         )
 
     def teardown_method(self):
@@ -55,10 +96,19 @@ def test_constructor(self):
         assert self.parallel_attention.layer_number == 1
 
         num_weights = sum([p.numel() for p in self.parallel_attention.parameters()])
+
+        hidden_size = self.transformer_config.hidden_size
+        standard_num_weights = (
+            hidden_size * hidden_size * 4 + hidden_size * 4  # QKVO weight  # QKVO bias
+        )
         if self.transformer_config.attention_output_gate:
-            assert num_weights == 82816
-        else:
-            assert num_weights == 66304
+            standard_num_weights += hidden_size * hidden_size + hidden_size  # Gate weight and bias
+        if self.transformer_config.transformer_impl == "transformer_engine":
+            standard_num_weights += hidden_size * 2  # fused pre layernorm weight and bias
+
+        assert (
+            num_weights == standard_num_weights
+        ), f"{num_weights=} does not match {standard_num_weights=}."
 
     def test_cpu_forward(self):
         # we can't currently do this because the global memory buffer is on GPU
@@ -93,6 +143,8 @@ def test_gpu_forward(self):
     @pytest.mark.parametrize("rotary_interleaved", [True, False])
     @pytest.mark.parametrize("fused_qkv_rope", [True, False])
     def test_fused_rope_gpu_forward(self, rotary_interleaved, fused_qkv_rope):
+        if self.transformer_config.fallback_to_eager_attn:
+            pytest.skip("No need to test fused RoPE for fallback_to_eager_attn = True.")
         self.parallel_attention.config.apply_rope_fusion = True
         if rotary_interleaved and not is_te_min_version("2.3.0"):
             pytest.skip("Only TE >= 2.3.0 supports interleaved fused RoPE.")
@@ -352,10 +404,15 @@ def test_clip_qk_mixed_logits(self):
 
 
 @pytest.mark.parametrize("output_gate", [False, True])
+@pytest.mark.parametrize("transformer_impl", ["transformer_engine", "native"])
 class TestSelfAttention:
 
     @pytest.fixture(scope='function', autouse=True)
-    def setup_method(self, output_gate):
+    def setup_method(self, output_gate, transformer_impl):
+        if transformer_impl == "native":
+            if output_gate:
+                pytest.skip("Native implementation does not support output gate.")
+        self.transformer_impl = transformer_impl
         self.output_gate = output_gate
         Utils.destroy_model_parallel()
 
@@ -371,10 +428,15 @@ def run_self_attention(self, pg_collection):
             attention_output_gate=self.output_gate,
             tensor_model_parallel_size=tensor_model_parallel_size,
             use_cpu_initialization=False,
+            transformer_impl=self.transformer_impl,
         )
+        if self.transformer_impl == "transformer_engine":
+            get_gpt_layer_spec_fn = get_gpt_layer_with_transformer_engine_spec
+        else:
+            get_gpt_layer_spec_fn = get_gpt_layer_local_spec
         self.self_attention = SelfAttention(
             self.transformer_config,
-            get_gpt_layer_with_transformer_engine_spec().submodules.self_attention.submodules,
+            get_gpt_layer_spec_fn().submodules.self_attention.submodules,
             layer_number=1,
             attn_mask_type=AttnMaskType.causal,
             pg_collection=pg_collection,
@@ -447,3 +509,374 @@ def test_self_attention_independent_pg_smoke(self):
         pg_collection = ProcessGroupCollection(tp=tp_group, cp=cp_group)
 
         self.run_self_attention(pg_collection)
+
+
+def _test_parallel_attention_correctness(
+    transformer_config,
+    transformer_layer_spec,
+    tmp_path_dist_ckpt,
+    atol,
+    rtol,
+    tp=1,
+    sp=False,
+    cp=1,
+    seed=123,
+    sequence_length=256,
+    micro_batch_size=4,
+):
+    # Model initialization function
+    def initialize_gpt_model(config, pre_process=True, post_process=True, vp_stage=None):
+        gpt_model = GPTModel(
+            config=config,
+            transformer_layer_spec=transformer_layer_spec,
+            vocab_size=128,
+            max_sequence_length=sequence_length,
+            pre_process=pre_process,
+            post_process=post_process,
+            vp_stage=vp_stage,
+        )
+        return gpt_model
+
+    # Initialize baseline parallel state
+    Utils.initialize_model_parallel(
+        tensor_model_parallel_size=1, pipeline_model_parallel_size=1, context_parallel_size=1
+    )
+
+    # Initialize input hidden states
+    torch.manual_seed(seed)
+    model_parallel_cuda_manual_seed(seed)
+    input_hidden_states = (
+        torch.rand((sequence_length, micro_batch_size, transformer_config.hidden_size))
+        .cuda()
+        .bfloat16()
+        .requires_grad_(True)
+    )
+
+    with TempNamedDir(tmp_path_dist_ckpt / 'test_parallel_attn', sync=True) as ckpt_dir:
+        # Set argument
+        mock_args = parse_args(ignore_unknown_args=True)
+        set_args(mock_args)
+
+        # Initialize baseline model
+        init_basic_mock_args(mock_args, 1, 1, bf16=True)
+        mock_args.context_parallel_size = 1
+        mock_args.sequence_parallel = 1
+        gpt_model = unwrap_model(
+            get_model(partial(initialize_gpt_model, config=transformer_config))
+        )
+
+        # Initialize args and save checkpoint
+        init_checkpointing_mock_args(mock_args, ckpt_dir, False)
+        mock_args.no_save_optim = True
+        mock_args.no_save_rng = True
+        mock_args.no_load_optim = True
+        mock_args.no_load_rng = True
+        save_checkpoint(10, gpt_model, None, None, 0)
+
+        # Calculate baseline output
+        attention = gpt_model[0].decoder.layers[0].self_attention
+        output_hidden_states_baseline, bias_hidden_states_baseline = attention(
+            input_hidden_states, attention_mask=None
+        )
+        output_hidden_states_baseline.sum().backward()
+
+        # Save baseline output
+        input_grad_baseline = input_hidden_states.grad.detach()
+        output_hidden_states_baseline = output_hidden_states_baseline.detach()
+        bias_hidden_states_baseline = bias_hidden_states_baseline
+        if bias_hidden_states_baseline is not None:
+            bias_hidden_states_baseline = bias_hidden_states_baseline.detach()
+            has_bias = True
+        else:
+            has_bias = False
+
+        # Initialize parallel model
+        Utils.destroy_model_parallel()
+        Utils.initialize_model_parallel(
+            tensor_model_parallel_size=tp, pipeline_model_parallel_size=1, context_parallel_size=cp
+        )
+        torch.manual_seed(seed)
+        model_parallel_cuda_manual_seed(seed)
+        transformer_config.context_parallel_size = cp
+        transformer_config.tensor_model_parallel_size = tp
+        transformer_config.sequence_parallel = sp
+        init_basic_mock_args(mock_args, tp, 1, bf16=True)
+        mock_args.context_parallel_size = cp
+        mock_args.sequence_parallel = sp
+        gpt_model = unwrap_model(
+            get_model(partial(initialize_gpt_model, config=transformer_config))
+        )
+        with mock.patch('megatron.training.checkpointing.check_checkpoint_args'):
+            with mock.patch('megatron.training.checkpointing.update_num_microbatches'):
+                load_checkpoint(gpt_model, None, None)
+
+        # Function to get tensor on this tp and cp rank
+        cp_group = parallel_state.get_context_parallel_group()
+        tp_rank = parallel_state.get_tensor_model_parallel_rank()
+
+        def get_tensor_on_this_rank(tensor):
+            if cp > 1:
+                tensor = get_tensor_on_this_cp_rank(tensor, 0, cp_group)
+            if tp > 1 and sp:
+                sp_seg = sequence_length // tp // cp
+                tensor = tensor[tp_rank * sp_seg : (tp_rank + 1) * sp_seg]
+            return tensor
+
+        # Calculate parallel model output
+        input_hidden_states = get_tensor_on_this_rank(input_hidden_states)
+        input_hidden_states = input_hidden_states.detach().requires_grad_(True)
+        parallel_attention = gpt_model[0].decoder.layers[0].self_attention
+        output_hidden_states_parallel, bias_hidden_states_parallel = parallel_attention(
+            input_hidden_states, attention_mask=None
+        )
+        output_hidden_states_parallel.sum().backward()
+        input_grad_parallel = input_hidden_states.grad.detach()
+
+        # Check if the output is close
+        output_hidden_states_baseline = get_tensor_on_this_rank(output_hidden_states_baseline)
+        input_grad_baseline = get_tensor_on_this_rank(input_grad_baseline)
+
+        assert torch.all(
+            ~torch.isnan(output_hidden_states_baseline)
+        ), "output_hidden_states_baseline contains nan"
+        assert torch.all(
+            ~torch.isinf(output_hidden_states_baseline)
+        ), "output_hidden_states_baseline contains inf"
+        assert torch.all(~torch.isnan(input_grad_baseline)), "input_grad_baseline contains nan"
+        assert torch.all(~torch.isinf(input_grad_baseline)), "input_grad_baseline contains inf"
+        assert torch.all(
+            ~torch.isnan(output_hidden_states_parallel)
+        ), "output_hidden_states_parallel contains nan"
+        assert torch.all(
+            ~torch.isinf(output_hidden_states_parallel)
+        ), "output_hidden_states_parallel contains inf"
+        assert torch.all(~torch.isnan(input_grad_parallel)), "input_grad_parallel contains nan"
+        assert torch.all(~torch.isinf(input_grad_parallel)), "input_grad_parallel contains inf"
+        if has_bias:
+            assert torch.all(
+                ~torch.isnan(bias_hidden_states_baseline)
+            ), "bias_hidden_states_baseline contains nan"
+            assert torch.all(
+                ~torch.isinf(bias_hidden_states_baseline)
+            ), "bias_hidden_states_baseline contains inf"
+            assert torch.all(
+                ~torch.isnan(bias_hidden_states_parallel)
+            ), "bias_hidden_states_parallel contains nan"
+            assert torch.all(
+                ~torch.isinf(bias_hidden_states_parallel)
+            ), "bias_hidden_states_parallel contains inf"
+
+        torch.testing.assert_close(
+            output_hidden_states_baseline,
+            output_hidden_states_parallel,
+            atol=atol,
+            rtol=rtol,
+            msg=lambda msg: f"Mismatch in output_hidden_states: {msg}",
+        )
+        torch.testing.assert_close(
+            input_grad_baseline,
+            input_grad_parallel,
+            atol=atol,
+            rtol=rtol,
+            msg=lambda msg: f"Mismatch in input_grad: {msg}",
+        )
+        if has_bias:
+            torch.testing.assert_close(
+                bias_hidden_states_baseline,
+                bias_hidden_states_parallel,
+                atol=atol,
+                rtol=rtol,
+                msg=lambda msg: f"Mismatch in bias_hidden_states: {msg}",
+            )
+
+        Utils.destroy_model_parallel()
+
+
+@pytest.mark.parametrize("apply_rope_fusion", [False, True])
+@pytest.mark.parametrize(
+    ("tp", "sp", "cp"),
+    [
+        (4, False, 1),  # TP w/o SP
+        (4, True, 1),  # TP w/ SP
+        (1, False, 4),  # CP
+        (2, False, 2),  # CP + TP w/o SP
+        (2, True, 2),  # CP + TP w/ SP
+    ],
+)
+@pytest.mark.parametrize("qk_layernorm", [False, True])
+@pytest.mark.parametrize("fallback_to_eager_attn", [False, True])
+@pytest.mark.parametrize("output_gate", [False, True])
+def test_parallel_attention_correctness(
+    tmp_path_dist_ckpt,
+    apply_rope_fusion,
+    tp,
+    sp,
+    cp,
+    qk_layernorm,
+    fallback_to_eager_attn,
+    output_gate,
+):
+    transformer_config = TransformerConfig(
+        num_layers=1,
+        hidden_size=128,
+        num_attention_heads=4,
+        context_parallel_size=1,
+        tensor_model_parallel_size=1,
+        sequence_parallel=False,
+        bf16=True,
+        qk_layernorm=qk_layernorm,
+        apply_rope_fusion=apply_rope_fusion,
+        attention_output_gate=output_gate,
+        hidden_dropout=0.0,
+        attention_dropout=0.0,
+    )
+
+    transformer_layer_spec = get_gpt_layer_with_transformer_engine_spec(
+        fallback_to_eager_attn=fallback_to_eager_attn,
+        normalization="RMSNorm",
+        qk_layernorm=qk_layernorm,
+    )
+    if cp > 1:
+        if qk_layernorm:
+            atol, rtol = 2e-2, 2e-2
+        else:
+            atol, rtol = 5e-3, 5e-3
+    else:
+        if qk_layernorm:
+            atol, rtol = 1e-2, 1e-2
+        else:
+            atol, rtol = 2e-3, 2e-3
+
+    _test_parallel_attention_correctness(
+        transformer_config, transformer_layer_spec, tmp_path_dist_ckpt, tp, sp, cp
+    )
+
+
+def _torch_native_attention(query, key, value, attention_mask, sinks, scaling: float):
+    """Torch native attention implementation
+    This was not in the original implementation and slightly affect results;
+    it prevents overflow in BF16/FP16 when training with batch size > 1 we clamp max values.
+    """
+    # Rearrange query, key, value to (b, h, s, d)
+    query = einops.rearrange(query, 's b h d -> b h s d')
+    key = einops.rearrange(key, 's b h d -> b h s d')
+    value = einops.rearrange(value, 's b h d -> b h s d')
+
+    # Compute attention weights
+    attn_weights = torch.matmul(query, key.transpose(2, 3)) * scaling
+    if attention_mask is not None:
+        nheads = query.shape[1]
+        nheads_k = key.shape[1]
+        heads_k_stride = 1
+        mask_bias = to_zz_mask_attn_bias(
+            attention_mask, 1, nheads, nheads_k, heads_k_stride, query.device, query.dtype
+        )
+        attn_weights = attn_weights + mask_bias
+
+    # Add sinks to attention weights
+    if sinks is None:
+        combined_logits = attn_weights
+    else:
+        sinks = sinks.reshape(1, -1, 1, 1).expand(query.shape[0], -1, query.shape[-2], -1)
+        combined_logits = torch.cat([attn_weights, sinks], dim=-1)
+
+    # Compute attention scores
+    probs = F.softmax(combined_logits, dim=-1, dtype=combined_logits.dtype)
+    if sinks is None:
+        scores = probs
+    else:
+        scores = probs[..., :-1]
+
+    # Compute attention output
+    attn_output = torch.matmul(scores, value)
+    attn_output = einops.rearrange(attn_output, 'b h s d -> s b h d')
+    attn_output = attn_output.contiguous()
+    return attn_output
+
+
+def test_eager_attention_function_correctness():
+    """Test the correctness of the context parallel eager attention function"""
+
+    # Configuration
+    batch_size = 4
+    num_heads = 2
+    head_dim = 256
+    seq_len_q = 512
+    seq_len_k = 2048
+    scale = 1 / (head_dim**2)
+
+    # Initialize inputs
+    q = torch.rand(
+        (seq_len_q, batch_size, num_heads, head_dim),
+        device='cuda',
+        dtype=torch.bfloat16,
+        requires_grad=True,
+    )
+    k = torch.rand(
+        (seq_len_k, batch_size, num_heads, head_dim),
+        device='cuda',
+        dtype=torch.bfloat16,
+        requires_grad=True,
+    )
+    v = torch.rand(
+        (seq_len_k, batch_size, num_heads, head_dim),
+        device='cuda',
+        dtype=torch.bfloat16,
+        requires_grad=True,
+    )
+
+    def randbool(shape, **kwargs):
+        return torch.randn(shape, **kwargs) > 0
+
+    attn_bias = randbool((batch_size, 1, seq_len_q, seq_len_k), device='cuda')
+    sinks = None
+
+    # Torch native attention forward and backward pass
+    out_torch = _torch_native_attention(
+        query=q, key=k, value=v, attention_mask=attn_bias, sinks=sinks, scaling=scale
+    )
+    loss_torch = out_torch.sum()
+    loss_torch.backward()
+    torch_q_grad = q.grad.clone()
+    torch_k_grad = k.grad.clone()
+    torch_v_grad = v.grad.clone()
+    q.grad.zero_()
+    k.grad.zero_()
+    v.grad.zero_()
+    if sinks is not None:
+        torch_sinks_grad = sinks.grad.clone()
+        sinks.grad.zero_()
+    else:
+        torch_sinks_grad = None
+
+    # Custom attention forward and backward pass
+    out_custom = AttentionFuncionWithContextParallel.apply(
+        q, k, v, attn_bias, 0.0, scale, None  # dropout
+    )
+    loss_custom = out_custom.sum()
+    loss_custom.backward()
+    custom_q_grad = q.grad.clone()
+    custom_k_grad = k.grad.clone()
+    custom_v_grad = v.grad.clone()
+    q.grad.zero_()
+    k.grad.zero_()
+    v.grad.zero_()
+    if sinks is not None:
+        custom_sinks_grad = sinks.grad.clone()
+        sinks.grad.zero_()
+    else:
+        custom_sinks_grad = None
+
+    # Check attention output and gradients
+    assert torch.equal(out_custom, out_torch), "Mismatch in attention output"
+    tol = {"atol": 1e-4, "rtol": 1e-4}
+    for tensor_name, tensor_torch, tensor_custom in [
+        ("q_grad", torch_q_grad, custom_q_grad),
+        ("k_grad", torch_k_grad, custom_k_grad),
+        ("v_grad", torch_v_grad, custom_v_grad),
+        ("sinks_grad", torch_sinks_grad, custom_sinks_grad),
+    ]:
+        if (tensor_torch is not None) and (tensor_custom is not None):
+            torch.testing.assert_close(
+                out_custom, out_torch, **tol, msg=lambda msg: f"Mismatch in {tensor_name}: {msg}"
+            )

From 5c1d294e9f5076ebb248dda7ed2ba602528be564 Mon Sep 17 00:00:00 2001
From: Tong Liu <tongliu@nvidia.com>
Date: Tue, 18 Nov 2025 18:09:02 +0800
Subject: [PATCH 139/334] [HOT FIX] Fix bug of hybrid-ep backend in
 flex-dispatcher (#2287)

Co-authored-by: Xin Yao <xiny@nvidia.com>
---
 docker/Dockerfile.ci.dev                                  | 4 ++--
 megatron/core/transformer/transformer_config.py           | 3 +++
 tests/unit_tests/transformer/moe/test_token_dispatcher.py | 5 -----
 3 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/docker/Dockerfile.ci.dev b/docker/Dockerfile.ci.dev
index 95fecdb3f9b..6596fc01aaf 100644
--- a/docker/Dockerfile.ci.dev
+++ b/docker/Dockerfile.ci.dev
@@ -59,9 +59,9 @@ RUN bash -ex <<"EOF"
         ln -s libnvshmem_host.so.3 libnvshmem_host.so
     popd
 
-    git clone https://github.com/deepseek-ai/DeepEP.git
+    git clone --branch hybrid-ep https://github.com/deepseek-ai/DeepEP.git
     pushd DeepEP
-        git checkout ef73fd9a4c098e09929e48dfd696505ddc8e2043
+        git checkout 1dddd194c26911c35b4f53a148617dd73de0ffc9
         patch -p1 < /workspace/deepep.patch
     popd
     TORCH_CUDA_ARCH_LIST="9.0 10.0 12.0" uv pip install --no-build-isolation -v DeepEP/.
diff --git a/megatron/core/transformer/transformer_config.py b/megatron/core/transformer/transformer_config.py
index a82422aebdc..71f8255b584 100644
--- a/megatron/core/transformer/transformer_config.py
+++ b/megatron/core/transformer/transformer_config.py
@@ -981,6 +981,9 @@ def __post_init__(self):
         if self.moe_enable_deepep:
             if self.moe_token_dispatcher_type != "flex":
                 raise ValueError("DeepEP backend is only supported with flex token dispatcher.")
+            if self.moe_flex_dispatcher_backend == "hybridep":
+                raise ValueError("Only one backend is supported for flex token dispatcher.")
+            self.moe_flex_dispatcher_backend = "deepep"
             warnings.warn(
                 "moe_enable_deepep is deprecated."
                 "Please use --moe-flex-dispatcher-backend=deepep instead."
diff --git a/tests/unit_tests/transformer/moe/test_token_dispatcher.py b/tests/unit_tests/transformer/moe/test_token_dispatcher.py
index 80b0d2bca69..4b4cfa567c5 100644
--- a/tests/unit_tests/transformer/moe/test_token_dispatcher.py
+++ b/tests/unit_tests/transformer/moe/test_token_dispatcher.py
@@ -115,7 +115,6 @@ def __del__(self):
         torch.cuda.synchronize()
         Utils.destroy_model_parallel()
 
-    @pytest.mark.flaky_in_dev
     @pytest.mark.internal
     def dispatcher_dropless_test(self):
         moe_layer = self.moe_layer
@@ -158,7 +157,6 @@ def dispatcher_dropless_test(self):
             hidden_states.grad, ans
         ), "Restored hidden states do not match original hidden states"
 
-    @pytest.mark.flaky_in_dev
     @pytest.mark.internal
     def dispatcher_capacity_test(self):
         moe_layer = self.moe_layer
@@ -212,7 +210,6 @@ def dispatcher_capacity_test(self):
             hidden_states.grad, restored_hidden_states_answer
         ), "Gradient of hidden states should be same as hidden states"
 
-    @pytest.mark.flaky_in_dev
     @pytest.mark.internal
     def dispatcher_drop_and_pad_test(self):
         """Test if the tokens are dropped and padded correctly.
@@ -278,7 +275,6 @@ def dispatcher_drop_and_pad_test(self):
             hidden_states.grad, backward_answer
         ), "Gradient of hidden states should be same as hidden states"
 
-    @pytest.mark.flaky_in_dev
     @pytest.mark.internal
     def dispatcher_router_padding_for_fp8_test(self):
         """Test if the routing map is padded correctly for FP8 training.
@@ -371,7 +367,6 @@ def teardown_method(self, method):
     @pytest.mark.internal
     @pytest.mark.parametrize("tp_size,ep_size", [(8, 1), (1, 8), (2, 4), (1, 1)])
     @pytest.mark.parametrize("permute_fusion", permute_fusion_params)
-    @pytest.mark.flaky_in_dev
     def test_forward_backward(self, tp_size, ep_size, permute_fusion):
         container = MoEModelTestContainer(
             tp_size=tp_size,

From 2782acfca6d9c77e90fc74947e28b272718f99c7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Tue, 18 Nov 2025 14:14:57 +0100
Subject: [PATCH 140/334] Ko3n1g/ci/golden values weeklies (#2279)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 .gitlab/stages/04.functional-tests.yml        |     2 +-
 .../golden_values_dev_dgx_h100.json           | 10243 +++++++++++++-
 .../golden_values_dev_dgx_h100.json           | 11492 ++++++++++++++++
 3 files changed, 21735 insertions(+), 2 deletions(-)
 create mode 100644 tests/functional_tests/test_cases/mixtral/deepseekv3_proxy_flex_tp1pp4emp16etp1cp1_release_sm/golden_values_dev_dgx_h100.json

diff --git a/.gitlab/stages/04.functional-tests.yml b/.gitlab/stages/04.functional-tests.yml
index cc18c34722e..eee5a9b80fe 100644
--- a/.gitlab/stages/04.functional-tests.yml
+++ b/.gitlab/stages/04.functional-tests.yml
@@ -231,7 +231,7 @@ functional:x_notify:
     paths:
       - scripts
   rules:
-    - if: ($CI_PIPELINE_SOURCE == "schedule" || $CI_COMMIT_BRANCH == "main") && $FUNCTIONAL_TEST == "yes"
+    - if: ($CI_PIPELINE_SOURCE == "schedule" || $CI_COMMIT_BRANCH == "main" || $CI_COMMIT_BRANCH == "dev") && $FUNCTIONAL_TEST == "yes"
       when: always
     - when: never
 
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_15b_8t_release_sm/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_15b_8t_release_sm/golden_values_dev_dgx_h100.json
index 02bcf7fe698..523227bf433 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_15b_8t_release_sm/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_15b_8t_release_sm/golden_values_dev_dgx_h100.json
@@ -1 +1,10242 @@
-{"lm loss": {"start_step": 1, "end_step": 13000, "step_interval": 5, "values": {"1": 12.98419, "5": 12.93858, "10": 12.06404, "15": 11.97882, "20": 10.53588, "25": 10.11952, "30": 9.7286, "35": 9.44173, "40": 9.2373, "45": 9.03763, "50": 8.85277, "55": 8.64259, "60": 8.60098, "65": 8.50179, "70": 8.41326, "75": 8.31346, "80": 8.16921, "85": 8.09253, "90": 7.97894, "95": 7.91859, "100": 7.82704, "105": 7.71191, "110": 7.62418, "115": 7.52685, "120": 7.48107, "125": 7.48004, "130": 7.33364, "135": 7.26758, "140": 7.23146, "145": 7.04647, "150": 7.17621, "155": 7.00383, "160": 6.89968, "165": 6.91293, "170": 6.84228, "175": 6.85916, "180": 6.81429, "185": 6.7203, "190": 6.66124, "195": 6.59364, "200": 6.64046, "205": 6.64305, "210": 6.5179, "215": 6.51519, "220": 6.51027, "225": 6.46653, "230": 6.47574, "235": 6.42409, "240": 6.36976, "245": 6.3778, "250": 6.29868, "255": 6.43438, "260": 6.34377, "265": 6.28803, "270": 6.23364, "275": 6.26123, "280": 6.19076, "285": 6.19886, "290": 6.15022, "295": 6.12619, "300": 6.11141, "305": 6.01886, "310": 6.08556, "315": 6.07169, "320": 5.99243, "325": 5.93189, "330": 5.99792, "335": 6.0145, "340": 5.93453, "345": 5.92339, "350": 5.87179, "355": 5.84258, "360": 5.85866, "365": 5.81752, "370": 5.80407, "375": 5.80516, "380": 5.85848, "385": 5.78993, "390": 5.81141, "395": 5.68051, "400": 5.66121, "405": 5.68906, "410": 5.66202, "415": 5.70461, "420": 5.63851, "425": 5.66062, "430": 5.62802, "435": 5.56913, "440": 5.62147, "445": 5.52803, "450": 5.58428, "455": 5.5123, "460": 5.49325, "465": 5.56828, "470": 5.54845, "475": 5.49678, "480": 5.46247, "485": 5.49185, "490": 5.47566, "495": 5.47856, "500": 5.42533, "505": 5.38883, "510": 5.44319, "515": 5.42148, "520": 5.47608, "525": 5.31477, "530": 5.33216, "535": 5.36, "540": 5.33276, "545": 5.41314, "550": 5.37099, "555": 5.23374, "560": 5.32665, "565": 5.27809, "570": 5.25324, "575": 5.28184, "580": 5.23593, "585": 5.21762, "590": 5.22346, "595": 5.22561, "600": 5.26751, "605": 5.22896, "610": 5.2012, "615": 5.18737, "620": 5.19543, "625": 5.19655, "630": 5.14985, "635": 5.12452, "640": 5.09298, "645": 5.13279, "650": 5.14481, "655": 5.11963, "660": 5.0475, "665": 5.1142, "670": 5.04119, "675": 5.01723, "680": 5.05635, "685": 5.00678, "690": 5.01633, "695": 4.96228, "700": 4.97301, "705": 4.95571, "710": 4.97305, "715": 4.87719, "720": 4.85764, "725": 4.80769, "730": 4.84352, "735": 4.82916, "740": 4.8644, "745": 4.74895, "750": 4.75764, "755": 4.8023, "760": 4.78257, "765": 4.76428, "770": 4.69615, "775": 4.69212, "780": 4.684, "785": 4.7405, "790": 4.67498, "795": 4.64675, "800": 4.61184, "805": 4.61203, "810": 4.65393, "815": 4.60253, "820": 4.62914, "825": 4.58486, "830": 4.57946, "835": 4.56275, "840": 4.48603, "845": 4.50743, "850": 4.45704, "855": 4.51258, "860": 4.43583, "865": 4.52116, "870": 4.47717, "875": 4.38345, "880": 4.41849, "885": 4.38985, "890": 4.43389, "895": 4.42652, "900": 4.39808, "905": 4.32618, "910": 4.35391, "915": 4.34171, "920": 4.38377, "925": 4.38353, "930": 4.30961, "935": 4.29021, "940": 4.36235, "945": 4.31265, "950": 4.35051, "955": 4.28087, "960": 4.19218, "965": 4.27537, "970": 4.26236, "975": 4.24397, "980": 4.22146, "985": 4.17523, "990": 4.13237, "995": 4.18304, "1000": 4.2308, "1005": 4.18908, "1010": 4.17127, "1015": 4.13273, "1020": 4.15962, "1025": 4.22451, "1030": 4.1247, "1035": 4.10028, "1040": 4.13078, "1045": 4.11598, "1050": 4.15985, "1055": 4.0973, "1060": 4.1011, "1065": 4.06848, "1070": 4.05654, "1075": 4.07977, "1080": 4.07117, "1085": 4.06586, "1090": 4.02567, "1095": 4.09923, "1100": 4.06572, "1105": 4.071, "1110": 4.03046, "1115": 4.00398, "1120": 4.00356, "1125": 4.00318, "1130": 4.04694, "1135": 4.01101, "1140": 4.00143, "1145": 3.93633, "1150": 4.03948, "1155": 4.00619, "1160": 3.9851, "1165": 3.86882, "1170": 3.92684, "1175": 3.93038, "1180": 3.95878, "1185": 3.96184, "1190": 3.93034, "1195": 3.94125, "1200": 3.89932, "1205": 3.87266, "1210": 3.97831, "1215": 3.84899, "1220": 3.87263, "1225": 3.81413, "1230": 3.92846, "1235": 3.89312, "1240": 3.87537, "1245": 3.80079, "1250": 3.82885, "1255": 3.8493, "1260": 3.8874, "1265": 3.79336, "1270": 3.88167, "1275": 3.8455, "1280": 3.82143, "1285": 3.85735, "1290": 3.87159, "1295": 3.84912, "1300": 3.81711, "1305": 3.82495, "1310": 3.82551, "1315": 3.81192, "1320": 3.81405, "1325": 3.72789, "1330": 3.79529, "1335": 3.76889, "1340": 3.76123, "1345": 3.74704, "1350": 3.73516, "1355": 3.76572, "1360": 3.73919, "1365": 3.73441, "1370": 3.73284, "1375": 3.74202, "1380": 3.74909, "1385": 3.7541, "1390": 3.66502, "1395": 3.73883, "1400": 3.72777, "1405": 3.66183, "1410": 3.67095, "1415": 3.64909, "1420": 3.7012, "1425": 3.71121, "1430": 3.6707, "1435": 3.65988, "1440": 3.63587, "1445": 3.67497, "1450": 3.67892, "1455": 3.65003, "1460": 3.64827, "1465": 3.69085, "1470": 3.63258, "1475": 3.6929, "1480": 3.65433, "1485": 3.65353, "1490": 3.61514, "1495": 3.60996, "1500": 3.65344, "1505": 3.69268, "1510": 3.55544, "1515": 3.60731, "1520": 3.63573, "1525": 3.59446, "1530": 3.59436, "1535": 3.59439, "1540": 3.59806, "1545": 3.59944, "1550": 3.56476, "1555": 3.56897, "1560": 3.61095, "1565": 3.62486, "1570": 3.58908, "1575": 3.54769, "1580": 3.59493, "1585": 3.58838, "1590": 3.47011, "1595": 3.51425, "1600": 3.50212, "1605": 3.55557, "1610": 3.56864, "1615": 3.50712, "1620": 3.52052, "1625": 3.47381, "1630": 3.50275, "1635": 3.55197, "1640": 3.51791, "1645": 3.54635, "1650": 3.49031, "1655": 3.4704, "1660": 3.52131, "1665": 3.45881, "1670": 3.51236, "1675": 3.50045, "1680": 3.46681, "1685": 3.49078, "1690": 3.48353, "1695": 3.49776, "1700": 3.46185, "1705": 3.40868, "1710": 3.48728, "1715": 3.49675, "1720": 3.43132, "1725": 3.44122, "1730": 3.42961, "1735": 3.4644, "1740": 3.45584, "1745": 3.43534, "1750": 3.41748, "1755": 3.42246, "1760": 3.37607, "1765": 3.42468, "1770": 3.43261, "1775": 3.379, "1780": 3.4355, "1785": 3.42306, "1790": 3.37807, "1795": 3.40594, "1800": 3.34684, "1805": 3.39025, "1810": 3.34654, "1815": 3.4221, "1820": 3.41309, "1825": 3.38102, "1830": 3.32801, "1835": 3.42581, "1840": 3.38756, "1845": 3.42483, "1850": 3.39575, "1855": 3.37642, "1860": 3.35084, "1865": 3.38387, "1870": 3.29854, "1875": 3.45603, "1880": 3.34133, "1885": 3.36396, "1890": 3.34216, "1895": 3.39809, "1900": 3.36794, "1905": 3.30189, "1910": 3.32844, "1915": 3.3151, "1920": 3.36278, "1925": 3.33318, "1930": 3.31578, "1935": 3.31145, "1940": 3.36438, "1945": 3.26186, "1950": 3.40063, "1955": 3.30708, "1960": 3.31486, "1965": 3.28405, "1970": 3.29999, "1975": 3.33744, "1980": 3.34165, "1985": 3.23762, "1990": 3.32593, "1995": 3.28362, "2000": 3.27303, "2005": 3.26618, "2010": 3.28661, "2015": 3.22715, "2020": 3.27479, "2025": 3.27135, "2030": 3.27231, "2035": 3.29709, "2040": 3.26992, "2045": 3.23395, "2050": 3.27144, "2055": 3.32457, "2060": 3.28203, "2065": 3.24424, "2070": 3.30416, "2075": 3.24361, "2080": 3.22085, "2085": 3.30421, "2090": 3.15415, "2095": 3.29493, "2100": 3.23056, "2105": 3.19586, "2110": 3.20308, "2115": 3.24137, "2120": 3.18045, "2125": 3.21484, "2130": 3.22503, "2135": 3.27763, "2140": 3.1948, "2145": 3.21466, "2150": 3.20867, "2155": 3.2365, "2160": 3.20615, "2165": 3.25296, "2170": 3.23909, "2175": 3.17384, "2180": 3.22049, "2185": 3.25755, "2190": 3.24863, "2195": 3.15892, "2200": 3.2045, "2205": 3.17192, "2210": 3.12378, "2215": 3.19067, "2220": 3.19593, "2225": 3.18949, "2230": 3.13791, "2235": 3.19063, "2240": 3.21593, "2245": 3.1871, "2250": 3.21088, "2255": 3.1585, "2260": 3.14105, "2265": 3.23218, "2270": 3.21271, "2275": 3.1566, "2280": 3.17614, "2285": 3.16486, "2290": 3.17692, "2295": 3.20197, "2300": 3.13847, "2305": 3.15963, "2310": 3.12614, "2315": 3.06316, "2320": 3.11512, "2325": 3.17382, "2330": 3.12397, "2335": 3.12649, "2340": 3.17195, "2345": 3.12462, "2350": 3.129, "2355": 3.11726, "2360": 3.16031, "2365": 3.09266, "2370": 3.15197, "2375": 3.13019, "2380": 3.11082, "2385": 3.09359, "2390": 3.09567, "2395": 3.09807, "2400": 3.09966, "2405": 3.10436, "2410": 3.09007, "2415": 3.09491, "2420": 3.08537, "2425": 3.07877, "2430": 3.08079, "2435": 3.06761, "2440": 3.08574, "2445": 3.05747, "2450": 3.12167, "2455": 3.15832, "2460": 3.08596, "2465": 3.07656, "2470": 3.03663, "2475": 3.06421, "2480": 3.10252, "2485": 3.06485, "2490": 3.06573, "2495": 3.08845, "2500": 3.05671, "2505": 3.105, "2510": 3.12399, "2515": 3.0532, "2520": 3.07806, "2525": 3.02426, "2530": 3.04842, "2535": 3.09401, "2540": 3.07984, "2545": 3.05538, "2550": 3.00469, "2555": 3.07001, "2560": 3.04403, "2565": 3.12, "2570": 3.00976, "2575": 3.0601, "2580": 3.08548, "2585": 3.02156, "2590": 3.06606, "2595": 2.99925, "2600": 3.0841, "2605": 3.06879, "2610": 3.05401, "2615": 3.06935, "2620": 2.99191, "2625": 3.01384, "2630": 3.03627, "2635": 3.05041, "2640": 3.01088, "2645": 3.05612, "2650": 3.02233, "2655": 2.98756, "2660": 3.01604, "2665": 3.03817, "2670": 2.98547, "2675": 2.97442, "2680": 3.00378, "2685": 3.00171, "2690": 2.99912, "2695": 2.99265, "2700": 3.03079, "2705": 2.98376, "2710": 2.97975, "2715": 2.96047, "2720": 3.02663, "2725": 2.99565, "2730": 3.05827, "2735": 3.04913, "2740": 3.02027, "2745": 3.02502, "2750": 3.02065, "2755": 2.96792, "2760": 2.99447, "2765": 3.00785, "2770": 2.98958, "2775": 2.99278, "2780": 3.02294, "2785": 2.95383, "2790": 2.96474, "2795": 2.95595, "2800": 2.98985, "2805": 2.94051, "2810": 2.99046, "2815": 2.95976, "2820": 3.0756, "2825": 3.03639, "2830": 3.01855, "2835": 2.92175, "2840": 2.92574, "2845": 2.96102, "2850": 2.96997, "2855": 2.96207, "2860": 2.94977, "2865": 2.91535, "2870": 2.99202, "2875": 2.92084, "2880": 2.96303, "2885": 2.91779, "2890": 2.98572, "2895": 2.93253, "2900": 2.95289, "2905": 3.00499, "2910": 2.92994, "2915": 2.94325, "2920": 2.95516, "2925": 2.94427, "2930": 2.95621, "2935": 2.94005, "2940": 2.94552, "2945": 2.9075, "2950": 2.97913, "2955": 2.91177, "2960": 2.97029, "2965": 2.87292, "2970": 2.96107, "2975": 2.99603, "2980": 2.94257, "2985": 3.04155, "2990": 2.93897, "2995": 2.87114, "3000": 2.9422, "3005": 2.89655, "3010": 2.93538, "3015": 2.91032, "3020": 2.91995, "3025": 2.91883, "3030": 2.92686, "3035": 2.95815, "3040": 2.9312, "3045": 2.83504, "3050": 2.8988, "3055": 2.89613, "3060": 2.92461, "3065": 2.92459, "3070": 2.88159, "3075": 2.86953, "3080": 2.9243, "3085": 2.90325, "3090": 2.91754, "3095": 2.92816, "3100": 2.86703, "3105": 2.92918, "3110": 2.90236, "3115": 2.94681, "3120": 2.95312, "3125": 2.86217, "3130": 2.93048, "3135": 2.92489, "3140": 2.87699, "3145": 2.91715, "3150": 2.85701, "3155": 2.8442, "3160": 2.83887, "3165": 2.84564, "3170": 2.89213, "3175": 2.90452, "3180": 2.85788, "3185": 2.89571, "3190": 2.90627, "3195": 2.92723, "3200": 2.92789, "3205": 2.85912, "3210": 2.86987, "3215": 2.91563, "3220": 2.87374, "3225": 2.86935, "3230": 2.815, "3235": 2.87434, "3240": 2.8734, "3245": 2.90299, "3250": 2.86289, "3255": 2.8503, "3260": 2.85959, "3265": 2.86936, "3270": 2.85223, "3275": 2.86681, "3280": 2.79974, "3285": 2.81123, "3290": 2.86928, "3295": 2.92038, "3300": 2.87938, "3305": 2.86113, "3310": 2.85785, "3315": 2.80615, "3320": 2.8258, "3325": 2.82403, "3330": 2.82839, "3335": 2.8465, "3340": 2.82742, "3345": 2.84707, "3350": 2.84121, "3355": 2.85933, "3360": 2.79899, "3365": 2.85514, "3370": 2.84537, "3375": 2.84431, "3380": 2.84971, "3385": 2.87822, "3390": 2.8682, "3395": 2.81029, "3400": 2.78472, "3405": 2.82816, "3410": 2.84591, "3415": 2.86134, "3420": 2.82742, "3425": 2.81129, "3430": 2.82982, "3435": 2.8906, "3440": 2.81795, "3445": 2.86973, "3450": 2.81507, "3455": 2.7888, "3460": 2.8203, "3465": 2.84691, "3470": 2.83475, "3475": 2.7758, "3480": 2.84173, "3485": 2.82043, "3490": 2.8933, "3495": 2.84901, "3500": 2.84084, "3505": 2.82498, "3510": 2.81285, "3515": 2.83554, "3520": 2.77982, "3525": 2.80208, "3530": 2.84998, "3535": 2.78436, "3540": 2.83771, "3545": 2.81048, "3550": 2.79684, "3555": 2.8181, "3560": 2.82828, "3565": 2.82912, "3570": 2.80393, "3575": 2.80372, "3580": 2.82073, "3585": 2.83541, "3590": 2.8298, "3595": 2.77921, "3600": 2.74897, "3605": 2.79083, "3610": 2.8488, "3615": 2.75492, "3620": 2.80351, "3625": 2.88338, "3630": 2.77314, "3635": 2.78625, "3640": 2.78253, "3645": 2.76926, "3650": 2.80301, "3655": 2.81469, "3660": 2.76524, "3665": 2.7858, "3670": 2.77457, "3675": 2.77711, "3680": 2.80733, "3685": 2.80194, "3690": 2.8055, "3695": 2.81135, "3700": 2.78752, "3705": 2.78408, "3710": 2.75166, "3715": 2.80201, "3720": 2.79409, "3725": 2.78884, "3730": 2.84096, "3735": 2.80547, "3740": 2.74952, "3745": 2.78986, "3750": 2.8022, "3755": 2.79576, "3760": 2.75757, "3765": 2.75586, "3770": 2.75989, "3775": 2.76991, "3780": 2.76383, "3785": 2.7793, "3790": 2.74202, "3795": 2.79511, "3800": 2.80269, "3805": 2.75159, "3810": 2.80354, "3815": 2.76482, "3820": 2.78758, "3825": 2.73331, "3830": 2.74563, "3835": 2.81464, "3840": 2.72812, "3845": 2.71424, "3850": 2.77453, "3855": 2.71774, "3860": 2.80173, "3865": 2.75469, "3870": 2.77531, "3875": 2.75779, "3880": 2.78968, "3885": 2.78424, "3890": 2.74541, "3895": 2.79804, "3900": 2.76127, "3905": 2.72353, "3910": 2.74147, "3915": 2.75183, "3920": 2.79462, "3925": 2.77792, "3930": 2.70759, "3935": 2.73982, "3940": 2.75131, "3945": 2.74267, "3950": 2.725, "3955": 2.77958, "3960": 2.75991, "3965": 2.74216, "3970": 2.75653, "3975": 2.72552, "3980": 2.73817, "3985": 2.75045, "3990": 2.69347, "3995": 2.78059, "4000": 2.73558, "4005": 2.7658, "4010": 2.70885, "4015": 2.72538, "4020": 2.74956, "4025": 2.733, "4030": 2.65924, "4035": 2.69455, "4040": 2.74652, "4045": 2.74857, "4050": 2.78817, "4055": 2.7239, "4060": 2.71419, "4065": 2.6515, "4070": 2.80691, "4075": 2.75748, "4080": 2.71884, "4085": 2.74977, "4090": 2.67836, "4095": 2.69073, "4100": 2.7114, "4105": 2.73822, "4110": 2.72956, "4115": 2.70127, "4120": 2.73267, "4125": 2.70389, "4130": 2.69553, "4135": 2.6893, "4140": 2.68057, "4145": 2.77973, "4150": 2.70801, "4155": 2.73792, "4160": 2.76329, "4165": 2.72099, "4170": 2.67438, "4175": 2.71828, "4180": 2.72666, "4185": 2.72916, "4190": 2.73806, "4195": 2.70222, "4200": 2.71066, "4205": 2.73922, "4210": 2.67309, "4215": 2.66565, "4220": 2.65928, "4225": 2.70242, "4230": 2.71409, "4235": 2.7326, "4240": 2.70413, "4245": 2.69658, "4250": 2.71363, "4255": 2.64837, "4260": 2.7266, "4265": 2.73863, "4270": 2.72157, "4275": 2.68943, "4280": 2.70233, "4285": 2.73114, "4290": 2.68765, "4295": 2.69223, "4300": 2.69956, "4305": 2.70313, "4310": 2.73003, "4315": 2.71191, "4320": 2.69906, "4325": 2.70557, "4330": 2.7106, "4335": 2.69172, "4340": 2.6976, "4345": 2.72675, "4350": 2.67431, "4355": 2.69349, "4360": 2.71041, "4365": 2.78314, "4370": 2.73369, "4375": 2.74431, "4380": 2.71504, "4385": 2.69901, "4390": 2.70191, "4395": 2.75058, "4400": 2.66501, "4405": 2.66954, "4410": 2.68278, "4415": 2.70442, "4420": 2.7077, "4425": 2.72158, "4430": 2.69277, "4435": 2.68035, "4440": 2.69127, "4445": 2.67896, "4450": 2.65272, "4455": 2.69044, "4460": 2.70302, "4465": 2.70631, "4470": 2.6731, "4475": 2.68761, "4480": 2.65626, "4485": 2.69968, "4490": 2.65279, "4495": 2.70894, "4500": 2.70235, "4505": 2.69674, "4510": 2.64893, "4515": 2.70162, "4520": 2.66802, "4525": 2.66816, "4530": 2.6736, "4535": 2.67113, "4540": 2.70729, "4545": 2.65603, "4550": 2.70177, "4555": 2.68009, "4560": 2.65895, "4565": 2.63898, "4570": 2.6402, "4575": 2.66692, "4580": 2.68858, "4585": 2.68157, "4590": 2.61727, "4595": 2.66336, "4600": 2.67638, "4605": 2.68094, "4610": 2.66704, "4615": 2.66234, "4620": 2.65727, "4625": 2.68721, "4630": 2.6742, "4635": 2.64708, "4640": 2.69575, "4645": 2.64774, "4650": 2.7018, "4655": 2.70661, "4660": 2.67838, "4665": 2.68918, "4670": 2.67556, "4675": 2.68422, "4680": 2.66596, "4685": 2.65737, "4690": 2.70912, "4695": 2.65528, "4700": 2.67174, "4705": 2.65146, "4710": 2.68366, "4715": 2.64964, "4720": 2.72485, "4725": 2.62902, "4730": 2.65213, "4735": 2.68926, "4740": 2.64614, "4745": 2.65553, "4750": 2.65737, "4755": 2.65793, "4760": 2.66632, "4765": 2.64489, "4770": 2.62202, "4775": 2.65194, "4780": 2.65519, "4785": 2.68655, "4790": 2.65201, "4795": 2.67305, "4800": 2.62427, "4805": 2.64099, "4810": 2.65942, "4815": 2.65033, "4820": 2.6668, "4825": 2.65019, "4830": 2.6151, "4835": 2.64832, "4840": 2.65513, "4845": 2.6348, "4850": 2.62482, "4855": 2.60337, "4860": 2.65757, "4865": 2.62536, "4870": 2.63972, "4875": 2.61897, "4880": 2.62576, "4885": 2.62632, "4890": 2.67912, "4895": 2.65961, "4900": 2.618, "4905": 2.61823, "4910": 2.63845, "4915": 2.61463, "4920": 2.65397, "4925": 2.64838, "4930": 2.57129, "4935": 2.65193, "4940": 2.63034, "4945": 2.63777, "4950": 2.62825, "4955": 2.61794, "4960": 2.61856, "4965": 2.65951, "4970": 2.6008, "4975": 2.65676, "4980": 2.62049, "4985": 2.63225, "4990": 2.65645, "4995": 2.58184, "5000": 2.6621, "5005": 2.6658, "5010": 2.68112, "5015": 2.63396, "5020": 2.64091, "5025": 2.68726, "5030": 2.64362, "5035": 2.61873, "5040": 2.62248, "5045": 2.60699, "5050": 2.62641, "5055": 2.65014, "5060": 2.64375, "5065": 2.68893, "5070": 2.60617, "5075": 2.61421, "5080": 2.61231, "5085": 2.60499, "5090": 2.59441, "5095": 2.65086, "5100": 2.64984, "5105": 2.61053, "5110": 2.66408, "5115": 2.62171, "5120": 2.67055, "5125": 2.6309, "5130": 2.615, "5135": 2.61462, "5140": 2.57424, "5145": 2.62966, "5150": 2.63646, "5155": 2.61887, "5160": 2.66278, "5165": 2.58409, "5170": 2.59136, "5175": 2.62185, "5180": 2.60659, "5185": 2.62099, "5190": 2.6266, "5195": 2.67047, "5200": 2.5968, "5205": 2.60868, "5210": 2.60701, "5215": 2.64792, "5220": 2.58826, "5225": 2.55166, "5230": 2.6359, "5235": 2.61417, "5240": 2.62802, "5245": 2.64006, "5250": 2.61297, "5255": 2.62612, "5260": 2.5619, "5265": 2.59802, "5270": 2.58865, "5275": 2.61781, "5280": 2.61032, "5285": 2.60442, "5290": 2.63245, "5295": 2.62071, "5300": 2.57979, "5305": 2.59834, "5310": 2.60591, "5315": 2.5881, "5320": 2.61539, "5325": 2.64615, "5330": 2.6015, "5335": 2.58439, "5340": 2.56291, "5345": 2.65819, "5350": 2.62526, "5355": 2.57953, "5360": 2.59528, "5365": 2.62373, "5370": 2.61518, "5375": 2.63002, "5380": 2.58083, "5385": 2.56502, "5390": 2.58666, "5395": 2.61597, "5400": 2.60909, "5405": 2.54774, "5410": 2.61298, "5415": 2.59619, "5420": 2.61443, "5425": 2.62678, "5430": 2.62674, "5435": 2.57707, "5440": 2.58734, "5445": 2.633, "5450": 2.6473, "5455": 2.61252, "5460": 2.59272, "5465": 2.60502, "5470": 2.60189, "5475": 2.62728, "5480": 2.58753, "5485": 2.59002, "5490": 2.57733, "5495": 2.57075, "5500": 2.56937, "5505": 2.61715, "5510": 2.62664, "5515": 2.58137, "5520": 2.55697, "5525": 2.5859, "5530": 2.66433, "5535": 2.62339, "5540": 2.57109, "5545": 2.59633, "5550": 2.54936, "5555": 2.57342, "5560": 2.56447, "5565": 2.60758, "5570": 2.65168, "5575": 2.63138, "5580": 2.57564, "5585": 2.59822, "5590": 2.56185, "5595": 2.58521, "5600": 2.55512, "5605": 2.59879, "5610": 2.58291, "5615": 2.58198, "5620": 2.58123, "5625": 2.55147, "5630": 2.57081, "5635": 2.63484, "5640": 2.59425, "5645": 2.56995, "5650": 2.58004, "5655": 2.54766, "5660": 2.55881, "5665": 2.58604, "5670": 2.56686, "5675": 2.60728, "5680": 2.52861, "5685": 2.56813, "5690": 2.6039, "5695": 2.55782, "5700": 2.59695, "5705": 2.596, "5710": 2.57921, "5715": 2.58424, "5720": 2.53643, "5725": 2.6038, "5730": 2.57366, "5735": 2.61087, "5740": 2.59519, "5745": 2.56, "5750": 2.54216, "5755": 2.55997, "5760": 2.62481, "5765": 2.56328, "5770": 2.5429, "5775": 2.58373, "5780": 2.57701, "5785": 2.53911, "5790": 2.56461, "5795": 2.60179, "5800": 2.54494, "5805": 2.53531, "5810": 2.55658, "5815": 2.52456, "5820": 2.59694, "5825": 2.50599, "5830": 2.49558, "5835": 2.59597, "5840": 2.53979, "5845": 2.5528, "5850": 2.61315, "5855": 2.5102, "5860": 2.56169, "5865": 2.51778, "5870": 2.57574, "5875": 2.60723, "5880": 2.58596, "5885": 2.56757, "5890": 2.58608, "5895": 2.55562, "5900": 2.61651, "5905": 2.55716, "5910": 2.59828, "5915": 2.61008, "5920": 2.58733, "5925": 2.55324, "5930": 2.57568, "5935": 2.55168, "5940": 2.57131, "5945": 2.5204, "5950": 2.55562, "5955": 2.586, "5960": 2.56741, "5965": 2.62046, "5970": 2.55594, "5975": 2.58503, "5980": 2.55843, "5985": 2.56032, "5990": 2.55653, "5995": 2.55873, "6000": 2.55658, "6005": 2.51961, "6010": 2.5612, "6015": 2.52607, "6020": 2.53453, "6025": 2.55768, "6030": 2.6046, "6035": 2.54228, "6040": 2.54868, "6045": 2.49077, "6050": 2.5963, "6055": 2.5204, "6060": 2.54409, "6065": 2.52518, "6070": 2.52918, "6075": 2.5364, "6080": 2.53607, "6085": 2.59714, "6090": 2.57034, "6095": 2.53592, "6100": 2.5428, "6105": 2.52487, "6110": 2.55483, "6115": 2.58495, "6120": 2.55695, "6125": 2.53683, "6130": 2.47322, "6135": 2.5563, "6140": 2.55589, "6145": 2.55739, "6150": 2.52565, "6155": 2.50872, "6160": 2.54299, "6165": 2.57304, "6170": 2.54638, "6175": 2.60079, "6180": 2.51196, "6185": 2.55194, "6190": 2.49345, "6195": 2.57854, "6200": 2.55164, "6205": 2.5377, "6210": 2.52088, "6215": 2.51358, "6220": 2.56539, "6225": 2.51406, "6230": 2.51072, "6235": 2.56268, "6240": 2.55115, "6245": 2.52327, "6250": 2.53069, "6255": 2.57365, "6260": 2.52537, "6265": 2.57441, "6270": 2.52397, "6275": 2.56565, "6280": 2.52297, "6285": 2.5207, "6290": 2.51982, "6295": 2.50722, "6300": 2.55559, "6305": 2.52486, "6310": 2.51259, "6315": 2.53731, "6320": 2.4894, "6325": 2.59818, "6330": 2.555, "6335": 2.51085, "6340": 2.51313, "6345": 2.55702, "6350": 2.556, "6355": 2.52448, "6360": 2.52293, "6365": 2.48409, "6370": 2.53563, "6375": 2.49779, "6380": 2.56282, "6385": 2.58189, "6390": 2.50441, "6395": 2.55121, "6400": 2.5086, "6405": 2.5278, "6410": 2.51466, "6415": 2.52482, "6420": 2.54258, "6425": 2.53509, "6430": 2.57978, "6435": 2.54444, "6440": 2.53907, "6445": 2.53125, "6450": 2.53474, "6455": 2.52399, "6460": 2.51849, "6465": 2.56225, "6470": 2.52104, "6475": 2.52654, "6480": 2.48826, "6485": 2.52861, "6490": 2.50978, "6495": 2.49978, "6500": 2.52402, "6505": 2.49432, "6510": 2.54199, "6515": 2.5101, "6520": 2.51003, "6525": 2.49503, "6530": 2.54392, "6535": 2.53282, "6540": 2.53291, "6545": 2.56194, "6550": 2.50127, "6555": 2.55627, "6560": 2.51016, "6565": 2.52281, "6570": 2.58445, "6575": 2.52324, "6580": 2.49815, "6585": 2.50823, "6590": 2.5097, "6595": 2.49807, "6600": 2.49539, "6605": 2.54253, "6610": 2.4797, "6615": 2.56766, "6620": 2.53402, "6625": 2.51202, "6630": 2.51431, "6635": 2.47464, "6640": 2.54106, "6645": 2.59681, "6650": 2.51024, "6655": 2.4983, "6660": 2.57419, "6665": 2.52156, "6670": 2.5674, "6675": 2.46861, "6680": 2.54697, "6685": 2.53564, "6690": 2.51427, "6695": 2.48573, "6700": 2.52463, "6705": 2.52218, "6710": 2.49347, "6715": 2.51687, "6720": 2.50996, "6725": 2.52089, "6730": 2.52013, "6735": 2.4825, "6740": 2.51535, "6745": 2.49672, "6750": 2.55754, "6755": 2.47484, "6760": 2.54212, "6765": 2.48878, "6770": 2.51847, "6775": 2.50828, "6780": 2.53878, "6785": 2.47177, "6790": 2.54553, "6795": 2.49868, "6800": 2.52671, "6805": 2.51099, "6810": 2.50296, "6815": 2.52064, "6820": 2.48696, "6825": 2.5071, "6830": 2.54063, "6835": 2.50678, "6840": 2.50885, "6845": 2.52492, "6850": 2.47583, "6855": 2.512, "6860": 2.50239, "6865": 2.49001, "6870": 2.55392, "6875": 2.47561, "6880": 2.55072, "6885": 2.47892, "6890": 2.54905, "6895": 2.50384, "6900": 2.49072, "6905": 2.51205, "6910": 2.5215, "6915": 2.51823, "6920": 2.5328, "6925": 2.54741, "6930": 2.49289, "6935": 2.521, "6940": 2.50604, "6945": 2.46237, "6950": 2.48628, "6955": 2.5288, "6960": 2.51952, "6965": 2.49196, "6970": 2.47065, "6975": 2.52409, "6980": 2.45258, "6985": 2.51631, "6990": 2.52932, "6995": 2.46179, "7000": 2.49172, "7005": 2.47011, "7010": 2.47632, "7015": 2.51983, "7020": 2.46705, "7025": 2.45424, "7030": 2.48487, "7035": 2.47988, "7040": 2.50783, "7045": 2.52359, "7050": 2.52831, "7055": 2.44161, "7060": 2.47409, "7065": 2.48138, "7070": 2.48981, "7075": 2.49452, "7080": 2.53479, "7085": 2.48717, "7090": 2.47618, "7095": 2.4999, "7100": 2.51585, "7105": 2.4884, "7110": 2.487, "7115": 2.50558, "7120": 2.47286, "7125": 2.46376, "7130": 2.48693, "7135": 2.51456, "7140": 2.50032, "7145": 2.49769, "7150": 2.51016, "7155": 2.50401, "7160": 2.47274, "7165": 2.45638, "7170": 2.50459, "7175": 2.50355, "7180": 2.50497, "7185": 2.48172, "7190": 2.46296, "7195": 2.46639, "7200": 2.50998, "7205": 2.49029, "7210": 2.44246, "7215": 2.47885, "7220": 2.4456, "7225": 2.51269, "7230": 2.50805, "7235": 2.48249, "7240": 2.47867, "7245": 2.50035, "7250": 2.50922, "7255": 2.49324, "7260": 2.46058, "7265": 2.45308, "7270": 2.47086, "7275": 2.49781, "7280": 2.49343, "7285": 2.42363, "7290": 2.47944, "7295": 2.48626, "7300": 2.41751, "7305": 2.44554, "7310": 2.44899, "7315": 2.48986, "7320": 2.48389, "7325": 2.45917, "7330": 2.4893, "7335": 2.47688, "7340": 2.46486, "7345": 2.49515, "7350": 2.5106, "7355": 2.49669, "7360": 2.48037, "7365": 2.46906, "7370": 2.47138, "7375": 2.4508, "7380": 2.49622, "7385": 2.48448, "7390": 2.47337, "7395": 2.47339, "7400": 2.48169, "7405": 2.43994, "7410": 2.48078, "7415": 2.47113, "7420": 2.49398, "7425": 2.45774, "7430": 2.52358, "7435": 2.49185, "7440": 2.52151, "7445": 2.5101, "7450": 2.4751, "7455": 2.45401, "7460": 2.46474, "7465": 2.47685, "7470": 2.44899, "7475": 2.45681, "7480": 2.51145, "7485": 2.45042, "7490": 2.47478, "7495": 2.48246, "7500": 2.49584, "7505": 2.44104, "7510": 2.43501, "7515": 2.41997, "7520": 2.49389, "7525": 2.49884, "7530": 2.47668, "7535": 2.4601, "7540": 2.47288, "7545": 2.47471, "7550": 2.49181, "7555": 2.45487, "7560": 2.42922, "7565": 2.51106, "7570": 2.4857, "7575": 2.439, "7580": 2.45825, "7585": 2.48256, "7590": 2.48193, "7595": 2.46508, "7600": 2.46362, "7605": 2.44863, "7610": 2.44948, "7615": 2.42526, "7620": 2.54441, "7625": 2.47879, "7630": 2.42526, "7635": 2.42739, "7640": 2.45364, "7645": 2.47151, "7650": 2.46303, "7655": 2.48304, "7660": 2.4532, "7665": 2.4342, "7670": 2.4426, "7675": 2.45588, "7680": 2.48517, "7685": 2.43208, "7690": 2.48, "7695": 2.45485, "7700": 2.48159, "7705": 2.49878, "7710": 2.49483, "7715": 2.44384, "7720": 2.4696, "7725": 2.47981, "7730": 2.45864, "7735": 2.47057, "7740": 2.43882, "7745": 2.45157, "7750": 2.43921, "7755": 2.46722, "7760": 2.45122, "7765": 2.45511, "7770": 2.47144, "7775": 2.45332, "7780": 2.41653, "7785": 2.44516, "7790": 2.48285, "7795": 2.44125, "7800": 2.46355, "7805": 2.48202, "7810": 2.50258, "7815": 2.48733, "7820": 2.44788, "7825": 2.51471, "7830": 2.45477, "7835": 2.4697, "7840": 2.47907, "7845": 2.46064, "7850": 2.41717, "7855": 2.47244, "7860": 2.49887, "7865": 2.42434, "7870": 2.46693, "7875": 2.44544, "7880": 2.45287, "7885": 2.46023, "7890": 2.47026, "7895": 2.44872, "7900": 2.4404, "7905": 2.43773, "7910": 2.42565, "7915": 2.48107, "7920": 2.47699, "7925": 2.4218, "7930": 2.47199, "7935": 2.44975, "7940": 2.42126, "7945": 2.46977, "7950": 2.44424, "7955": 2.4204, "7960": 2.49038, "7965": 2.5188, "7970": 2.52207, "7975": 2.44798, "7980": 2.44076, "7985": 2.46872, "7990": 2.43169, "7995": 2.46954, "8000": 2.43641, "8005": 2.41891, "8010": 2.45749, "8015": 2.46841, "8020": 2.48116, "8025": 2.47363, "8030": 2.45173, "8035": 2.47071, "8040": 2.41983, "8045": 2.45333, "8050": 2.44721, "8055": 2.42302, "8060": 2.44253, "8065": 2.46158, "8070": 2.4567, "8075": 2.46077, "8080": 2.44618, "8085": 2.44085, "8090": 2.42787, "8095": 2.42397, "8100": 2.43904, "8105": 2.49479, "8110": 2.43878, "8115": 2.58899, "8120": 2.49362, "8125": 2.47876, "8130": 2.45879, "8135": 2.4574, "8140": 2.44166, "8145": 2.42774, "8150": 2.42089, "8155": 2.48312, "8160": 2.45131, "8165": 2.43947, "8170": 2.43326, "8175": 2.42092, "8180": 2.4946, "8185": 2.42477, "8190": 2.46908, "8195": 2.45732, "8200": 2.44651, "8205": 2.44406, "8210": 2.43096, "8215": 2.44122, "8220": 2.43556, "8225": 2.41067, "8230": 2.44055, "8235": 2.46438, "8240": 2.42694, "8245": 2.44767, "8250": 2.44524, "8255": 2.43772, "8260": 2.43153, "8265": 2.42903, "8270": 2.4363, "8275": 2.44197, "8280": 2.39831, "8285": 2.4405, "8290": 2.48021, "8295": 2.44762, "8300": 2.45931, "8305": 2.40847, "8310": 2.43461, "8315": 2.45616, "8320": 2.40422, "8325": 2.39725, "8330": 2.43986, "8335": 2.44684, "8340": 2.49212, "8345": 2.44942, "8350": 2.45049, "8355": 2.40704, "8360": 2.40131, "8365": 2.45443, "8370": 2.45427, "8375": 2.42518, "8380": 2.41939, "8385": 2.42541, "8390": 2.4387, "8395": 2.44193, "8400": 2.44114, "8405": 2.49132, "8410": 2.4383, "8415": 2.43519, "8420": 2.41861, "8425": 2.44324, "8430": 2.46253, "8435": 2.40559, "8440": 2.45227, "8445": 2.45999, "8450": 2.40867, "8455": 2.46028, "8460": 2.45495, "8465": 2.43629, "8470": 2.40854, "8475": 2.47887, "8480": 2.40222, "8485": 2.41392, "8490": 2.46612, "8495": 2.43613, "8500": 2.44492, "8505": 2.40329, "8510": 2.40218, "8515": 2.42871, "8520": 2.42574, "8525": 2.49152, "8530": 2.3746, "8535": 2.40109, "8540": 2.48679, "8545": 2.3811, "8550": 2.43875, "8555": 2.4514, "8560": 2.47019, "8565": 2.42055, "8570": 2.43185, "8575": 2.44959, "8580": 2.44124, "8585": 2.42059, "8590": 2.4038, "8595": 2.42895, "8600": 2.41116, "8605": 2.49131, "8610": 2.42052, "8615": 2.38808, "8620": 2.45039, "8625": 2.42523, "8630": 2.45471, "8635": 2.4509, "8640": 2.43534, "8645": 2.47406, "8650": 2.42305, "8655": 2.45293, "8660": 2.45576, "8665": 2.38622, "8670": 2.41139, "8675": 2.42943, "8680": 2.44841, "8685": 2.43079, "8690": 2.41017, "8695": 2.44311, "8700": 2.43428, "8705": 2.42016, "8710": 2.42854, "8715": 2.44862, "8720": 2.47696, "8725": 2.41012, "8730": 2.39278, "8735": 2.43505, "8740": 2.43198, "8745": 2.39801, "8750": 2.43609, "8755": 2.42381, "8760": 2.40031, "8765": 2.43541, "8770": 2.40569, "8775": 2.43812, "8780": 2.42153, "8785": 2.47144, "8790": 2.42041, "8795": 2.41876, "8800": 2.41592, "8805": 2.40548, "8810": 2.41139, "8815": 2.47509, "8820": 2.45362, "8825": 2.4241, "8830": 2.38744, "8835": 2.42258, "8840": 2.39347, "8845": 2.42679, "8850": 2.43485, "8855": 2.4044, "8860": 2.42715, "8865": 2.42631, "8870": 2.43391, "8875": 2.44152, "8880": 2.41099, "8885": 2.39514, "8890": 2.44614, "8895": 2.42902, "8900": 2.41354, "8905": 2.40085, "8910": 2.4019, "8915": 2.4163, "8920": 2.43454, "8925": 2.46713, "8930": 2.41511, "8935": 2.40784, "8940": 2.38869, "8945": 2.39353, "8950": 2.41789, "8955": 2.39534, "8960": 2.43426, "8965": 2.41798, "8970": 2.40536, "8975": 2.47767, "8980": 2.44109, "8985": 2.37482, "8990": 2.41061, "8995": 2.416, "9000": 2.45568, "9005": 2.41279, "9010": 2.37662, "9015": 2.41141, "9020": 2.40089, "9025": 2.3701, "9030": 2.40026, "9035": 2.4243, "9040": 2.42079, "9045": 2.41805, "9050": 2.39505, "9055": 2.41785, "9060": 2.41922, "9065": 2.40527, "9070": 2.44454, "9075": 2.39395, "9080": 2.43398, "9085": 2.4136, "9090": 2.41293, "9095": 2.39793, "9100": 2.40135, "9105": 2.35782, "9110": 2.46451, "9115": 2.41499, "9120": 2.40368, "9125": 2.45804, "9130": 2.39387, "9135": 2.44878, "9140": 2.43562, "9145": 2.42684, "9150": 2.42505, "9155": 2.3752, "9160": 2.41724, "9165": 2.42569, "9170": 2.37359, "9175": 2.41857, "9180": 2.37803, "9185": 2.43942, "9190": 2.41281, "9195": 2.40662, "9200": 2.39186, "9205": 2.44999, "9210": 2.36248, "9215": 2.46363, "9220": 2.44779, "9225": 2.3828, "9230": 2.44575, "9235": 2.39772, "9240": 2.40182, "9245": 2.43796, "9250": 2.43806, "9255": 2.4326, "9260": 2.38813, "9265": 2.43977, "9270": 2.43657, "9275": 2.39535, "9280": 2.39074, "9285": 2.42225, "9290": 2.40437, "9295": 2.38603, "9300": 2.42495, "9305": 2.40579, "9310": 2.41555, "9315": 2.41153, "9320": 2.44493, "9325": 2.37049, "9330": 2.40434, "9335": 2.36191, "9340": 2.40835, "9345": 2.41458, "9350": 2.44039, "9355": 2.47763, "9360": 2.43745, "9365": 2.38821, "9370": 2.43648, "9375": 2.43331, "9380": 2.35346, "9385": 2.39958, "9390": 2.38109, "9395": 2.38731, "9400": 2.44471, "9405": 2.41259, "9410": 2.39756, "9415": 2.43759, "9420": 2.4441, "9425": 2.43656, "9430": 2.45071, "9435": 2.41453, "9440": 2.47761, "9445": 2.37622, "9450": 2.39383, "9455": 2.40249, "9460": 2.38597, "9465": 2.3775, "9470": 2.38205, "9475": 2.36454, "9480": 2.43551, "9485": 2.38642, "9490": 2.4204, "9495": 2.38165, "9500": 2.36325, "9505": 2.4296, "9510": 2.39916, "9515": 2.43096, "9520": 2.41792, "9525": 2.38898, "9530": 2.45385, "9535": 2.40151, "9540": 2.41839, "9545": 2.37813, "9550": 2.42143, "9555": 2.39054, "9560": 2.42191, "9565": 2.40523, "9570": 2.37157, "9575": 2.41109, "9580": 2.39564, "9585": 2.42353, "9590": 2.42924, "9595": 2.44777, "9600": 2.39117, "9605": 2.38431, "9610": 2.42142, "9615": 2.41558, "9620": 2.41413, "9625": 2.44723, "9630": 2.39712, "9635": 2.40396, "9640": 2.44817, "9645": 2.4109, "9650": 2.39894, "9655": 2.37366, "9660": 2.42329, "9665": 2.39029, "9670": 2.38274, "9675": 2.35662, "9680": 2.39869, "9685": 2.40199, "9690": 2.46804, "9695": 2.38133, "9700": 2.37698, "9705": 2.38453, "9710": 2.36554, "9715": 2.38868, "9720": 2.43552, "9725": 2.4413, "9730": 2.42919, "9735": 2.38684, "9740": 2.38077, "9745": 2.42676, "9750": 2.3991, "9755": 2.40788, "9760": 2.41084, "9765": 2.37036, "9770": 2.43675, "9775": 2.40145, "9780": 2.36196, "9785": 2.40085, "9790": 2.40714, "9795": 2.3593, "9800": 2.39629, "9805": 2.40561, "9810": 2.41066, "9815": 2.37884, "9820": 2.37671, "9825": 2.40364, "9830": 2.42194, "9835": 2.3861, "9840": 2.41457, "9845": 2.36502, "9850": 2.39824, "9855": 2.39496, "9860": 2.3972, "9865": 2.38197, "9870": 2.39342, "9875": 2.38398, "9880": 2.45319, "9885": 2.39313, "9890": 2.35399, "9895": 2.32116, "9900": 2.3962, "9905": 2.42494, "9910": 2.35642, "9915": 2.36473, "9920": 2.41154, "9925": 2.39863, "9930": 2.38182, "9935": 2.35063, "9940": 2.38377, "9945": 2.37842, "9950": 2.40342, "9955": 2.44928, "9960": 2.43108, "9965": 2.35851, "9970": 2.41017, "9975": 2.38564, "9980": 2.33084, "9985": 2.40772, "9990": 2.39761, "9995": 2.39543, "10000": 2.36621, "10005": 2.37213, "10010": 2.38256, "10015": 2.44495, "10020": 2.36326, "10025": 2.38851, "10030": 2.38817, "10035": 2.40993, "10040": 2.40515, "10045": 2.3831, "10050": 2.34965, "10055": 2.36805, "10060": 2.42146, "10065": 2.37528, "10070": 2.42235, "10075": 2.37088, "10080": 2.36211, "10085": 2.36918, "10090": 2.34573, "10095": 2.40221, "10100": 2.31408, "10105": 2.38253, "10110": 2.40897, "10115": 2.38736, "10120": 2.35801, "10125": 2.37033, "10130": 2.36037, "10135": 2.38382, "10140": 2.4139, "10145": 2.40714, "10150": 2.37532, "10155": 2.39536, "10160": 2.36205, "10165": 2.38369, "10170": 2.4236, "10175": 2.32447, "10180": 2.39651, "10185": 2.3824, "10190": 2.44396, "10195": 2.40416, "10200": 2.38955, "10205": 2.38797, "10210": 2.36805, "10215": 2.34261, "10220": 2.41843, "10225": 2.43079, "10230": 2.35627, "10235": 2.38764, "10240": 2.37226, "10245": 2.39117, "10250": 2.38838, "10255": 2.41316, "10260": 2.33469, "10265": 2.34846, "10270": 2.34979, "10275": 2.3717, "10280": 2.4513, "10285": 2.35906, "10290": 2.3861, "10295": 2.375, "10300": 2.36936, "10305": 2.41578, "10310": 2.38877, "10315": 2.36095, "10320": 2.36607, "10325": 2.36094, "10330": 2.41247, "10335": 2.36135, "10340": 2.41934, "10345": 2.36966, "10350": 2.35686, "10355": 2.39609, "10360": 2.37338, "10365": 2.36225, "10370": 2.34061, "10375": 2.3585, "10380": 2.41953, "10385": 2.40576, "10390": 2.38058, "10395": 2.35968, "10400": 2.37919, "10405": 2.34877, "10410": 2.3389, "10415": 2.41664, "10420": 2.37924, "10425": 2.32522, "10430": 2.35941, "10435": 2.37129, "10440": 2.3711, "10445": 2.35949, "10450": 2.36154, "10455": 2.38113, "10460": 2.38064, "10465": 2.30273, "10470": 2.3577, "10475": 2.37958, "10480": 2.36276, "10485": 2.36137, "10490": 2.41283, "10495": 2.36502, "10500": 2.36277, "10505": 2.37018, "10510": 2.38172, "10515": 2.37393, "10520": 2.40259, "10525": 2.39024, "10530": 2.39211, "10535": 2.35551, "10540": 2.40461, "10545": 2.35856, "10550": 2.37752, "10555": 2.35793, "10560": 2.34025, "10565": 2.37346, "10570": 2.37536, "10575": 2.3535, "10580": 2.37788, "10585": 2.36682, "10590": 2.37817, "10595": 2.37713, "10600": 2.33146, "10605": 2.3724, "10610": 2.36498, "10615": 2.36379, "10620": 2.34659, "10625": 2.41843, "10630": 2.36855, "10635": 2.32266, "10640": 2.36413, "10645": 2.42158, "10650": 2.36174, "10655": 2.30869, "10660": 2.34689, "10665": 2.39981, "10670": 2.31617, "10675": 2.41612, "10680": 2.35445, "10685": 2.28871, "10690": 2.38456, "10695": 2.33038, "10700": 2.38407, "10705": 2.38432, "10710": 2.34313, "10715": 2.3828, "10720": 2.32518, "10725": 2.35278, "10730": 2.34872, "10735": 2.35338, "10740": 2.31849, "10745": 2.33808, "10750": 2.33362, "10755": 2.4041, "10760": 2.36431, "10765": 2.33591, "10770": 2.36802, "10775": 2.38746, "10780": 2.36985, "10785": 2.39167, "10790": 2.34599, "10795": 2.38556, "10800": 2.32491, "10805": 2.39755, "10810": 2.37536, "10815": 2.35431, "10820": 2.34323, "10825": 2.37192, "10830": 2.33781, "10835": 2.3477, "10840": 2.32993, "10845": 2.38645, "10850": 2.33282, "10855": 2.36654, "10860": 2.33304, "10865": 2.32192, "10870": 2.32311, "10875": 2.30406, "10880": 2.39356, "10885": 2.40455, "10890": 2.36115, "10895": 2.37301, "10900": 2.33176, "10905": 2.31266, "10910": 2.40728, "10915": 2.37119, "10920": 2.37413, "10925": 2.36306, "10930": 2.31881, "10935": 2.36035, "10940": 2.35501, "10945": 2.34689, "10950": 2.36286, "10955": 2.3644, "10960": 2.30987, "10965": 2.3635, "10970": 2.35624, "10975": 2.40775, "10980": 2.37303, "10985": 2.3427, "10990": 2.39729, "10995": 2.36387, "11000": 2.33714, "11005": 2.36117, "11010": 2.34243, "11015": 2.32557, "11020": 2.3346, "11025": 2.36577, "11030": 2.34044, "11035": 2.31307, "11040": 2.31887, "11045": 2.31738, "11050": 2.31805, "11055": 2.28859, "11060": 2.33998, "11065": 2.31013, "11070": 2.39402, "11075": 2.32015, "11080": 2.35427, "11085": 2.33669, "11090": 2.34632, "11095": 2.37084, "11100": 2.32912, "11105": 2.31663, "11110": 2.36288, "11115": 2.37225, "11120": 2.38139, "11125": 2.31341, "11130": 2.34997, "11135": 2.3336, "11140": 2.37217, "11145": 2.35107, "11150": 2.39612, "11155": 2.34114, "11160": 2.3659, "11165": 2.36388, "11170": 2.34098, "11175": 2.33474, "11180": 2.37348, "11185": 2.31203, "11190": 2.27804, "11195": 2.32819, "11200": 2.34726, "11205": 2.36258, "11210": 2.33385, "11215": 2.31927, "11220": 2.34329, "11225": 2.37141, "11230": 2.36569, "11235": 2.32069, "11240": 2.34092, "11245": 2.35748, "11250": 2.3324, "11255": 2.33515, "11260": 2.35577, "11265": 2.38918, "11270": 2.28782, "11275": 2.31519, "11280": 2.36893, "11285": 2.29387, "11290": 2.34639, "11295": 2.3655, "11300": 2.38111, "11305": 2.33495, "11310": 2.32963, "11315": 2.29825, "11320": 2.30482, "11325": 2.31462, "11330": 2.35421, "11335": 2.33831, "11340": 2.30841, "11345": 2.31278, "11350": 2.29588, "11355": 2.3219, "11360": 2.35153, "11365": 2.29378, "11370": 2.35263, "11375": 2.32804, "11380": 2.34006, "11385": 2.34763, "11390": 2.33477, "11395": 2.28732, "11400": 2.30981, "11405": 2.35647, "11410": 2.35502, "11415": 2.38458, "11420": 2.35172, "11425": 2.30761, "11430": 2.36718, "11435": 2.36201, "11440": 2.34796, "11445": 2.36318, "11450": 2.32182, "11455": 2.30476, "11460": 2.35092, "11465": 2.34386, "11470": 2.37434, "11475": 2.31342, "11480": 2.32527, "11485": 2.30987, "11490": 2.34568, "11495": 2.406, "11500": 2.33937, "11505": 2.35014, "11510": 2.36223, "11515": 2.32176, "11520": 2.30507, "11525": 2.36152, "11530": 2.31469, "11535": 2.32196, "11540": 2.34627, "11545": 2.34321, "11550": 2.36438, "11555": 2.32533, "11560": 2.34981, "11565": 2.34125, "11570": 2.34916, "11575": 2.29628, "11580": 2.32931, "11585": 2.35173, "11590": 2.36158, "11595": 2.33454, "11600": 2.35704, "11605": 2.3235, "11610": 2.36089, "11615": 2.35899, "11620": 2.29569, "11625": 2.2757, "11630": 2.32782, "11635": 2.34204, "11640": 2.30488, "11645": 2.30751, "11650": 2.32628, "11655": 2.35114, "11660": 2.33566, "11665": 2.32994, "11670": 2.30002, "11675": 2.29666, "11680": 2.32542, "11685": 2.33637, "11690": 2.34433, "11695": 2.31688, "11700": 2.32535, "11705": 2.3009, "11710": 2.34479, "11715": 2.31575, "11720": 2.29844, "11725": 2.33988, "11730": 2.30403, "11735": 2.32822, "11740": 2.27122, "11745": 2.31714, "11750": 2.32793, "11755": 2.35133, "11760": 2.31357, "11765": 2.3378, "11770": 2.27597, "11775": 2.32591, "11780": 2.25511, "11785": 2.2973, "11790": 2.31403, "11795": 2.32024, "11800": 2.3345, "11805": 2.30403, "11810": 2.30398, "11815": 2.33078, "11820": 2.32015, "11825": 2.36083, "11830": 2.31663, "11835": 2.33741, "11840": 2.34081, "11845": 2.31727, "11850": 2.30496, "11855": 2.31403, "11860": 2.34333, "11865": 2.35836, "11870": 2.37861, "11875": 2.28155, "11880": 2.29163, "11885": 2.33553, "11890": 2.29241, "11895": 2.29059, "11900": 2.33401, "11905": 2.31769, "11910": 2.27783, "11915": 2.31082, "11920": 2.33519, "11925": 2.30272, "11930": 2.30681, "11935": 2.31569, "11940": 2.3175, "11945": 2.34208, "11950": 2.29773, "11955": 2.31327, "11960": 2.33576, "11965": 2.29584, "11970": 2.28204, "11975": 2.33575, "11980": 2.30612, "11985": 2.2776, "11990": 2.30416, "11995": 2.33013, "12000": 2.32323, "12005": 2.32565, "12010": 2.2884, "12015": 2.30861, "12020": 2.32922, "12025": 2.33525, "12030": 2.31246, "12035": 2.33617, "12040": 2.3154, "12045": 2.3126, "12050": 2.30835, "12055": 2.33352, "12060": 2.29764, "12065": 2.32975, "12070": 2.30319, "12075": 2.2775, "12080": 2.35063, "12085": 2.33812, "12090": 2.33359, "12095": 2.28176, "12100": 2.31543, "12105": 2.30903, "12110": 2.33029, "12115": 2.3036, "12120": 2.30606, "12125": 2.29484, "12130": 2.30409, "12135": 2.32842, "12140": 2.29591, "12145": 2.25622, "12150": 2.26125, "12155": 2.34249, "12160": 2.35771, "12165": 2.31914, "12170": 2.3336, "12175": 2.3412, "12180": 2.33054, "12185": 2.34135, "12190": 2.33375, "12195": 2.29767, "12200": 2.30036, "12205": 2.32225, "12210": 2.35697, "12215": 2.30437, "12220": 2.2987, "12225": 2.24241, "12230": 2.33348, "12235": 2.33945, "12240": 2.32345, "12245": 2.28764, "12250": 2.27397, "12255": 2.33706, "12260": 2.31368, "12265": 2.34287, "12270": 2.31292, "12275": 2.31361, "12280": 2.31869, "12285": 2.28631, "12290": 2.31074, "12295": 2.26654, "12300": 2.32931, "12305": 2.26821, "12310": 2.28768, "12315": 2.3543, "12320": 2.2963, "12325": 2.32045, "12330": 2.30113, "12335": 2.3194, "12340": 2.34117, "12345": 2.36885, "12350": 2.34318, "12355": 2.30683, "12360": 2.31344, "12365": 2.32933, "12370": 2.29273, "12375": 2.29957, "12380": 2.29184, "12385": 2.29061, "12390": 2.25018, "12395": 2.30421, "12400": 2.29905, "12405": 2.31088, "12410": 2.30419, "12415": 2.28306, "12420": 2.31729, "12425": 2.30099, "12430": 2.31571, "12435": 2.30048, "12440": 2.33123, "12445": 2.3202, "12450": 2.30745, "12455": 2.24018, "12460": 2.33488, "12465": 2.36363, "12470": 2.27626, "12475": 2.27276, "12480": 2.29139, "12485": 2.30632, "12490": 2.33128, "12495": 2.26961, "12500": 2.32122, "12505": 2.3351, "12510": 2.35582, "12515": 2.27062, "12520": 2.31971, "12525": 2.28653, "12530": 2.32054, "12535": 2.27138, "12540": 2.28491, "12545": 2.29049, "12550": 2.31572, "12555": 2.32333, "12560": 2.30023, "12565": 2.3353, "12570": 2.27829, "12575": 2.29941, "12580": 2.31153, "12585": 2.29201, "12590": 2.33455, "12595": 2.3227, "12600": 2.28167, "12605": 2.31996, "12610": 2.3631, "12615": 2.30567, "12620": 2.33322, "12625": 2.32935, "12630": 2.29885, "12635": 2.33561, "12640": 2.29568, "12645": 2.27902, "12650": 2.32556, "12655": 2.2647, "12660": 2.34199, "12665": 2.31843, "12670": 2.3097, "12675": 2.31886, "12680": 2.27525, "12685": 2.3664, "12690": 2.30452, "12695": 2.33199, "12700": 2.29244, "12705": 2.30628, "12710": 2.30837, "12715": 2.28749, "12720": "nan", "12725": "nan", "12730": "nan", "12735": "nan", "12740": "nan", "12745": "nan", "12750": "nan", "12755": "nan", "12760": "nan", "12765": "nan", "12770": "nan", "12775": "nan", "12780": "nan", "12785": "nan", "12790": "nan", "12795": "nan", "12800": "nan", "12805": "nan", "12810": "nan", "12815": "nan", "12820": "nan", "12825": "nan", "12830": "nan", "12835": "nan", "12840": "nan", "12845": "nan", "12850": "nan", "12855": "nan", "12860": "nan", "12865": "nan", "12870": "nan", "12875": "nan", "12880": "nan", "12885": "nan", "12890": "nan", "12895": "nan", "12900": "nan", "12905": "nan", "12910": "nan", "12915": "nan", "12920": "nan", "12925": "nan", "12930": "nan", "12935": "nan", "12940": "nan", "12945": "nan", "12950": "nan", "12955": "nan", "12960": "nan", "12965": "nan", "12970": "nan", "12975": "nan", "12980": "nan", "12985": "nan", "12990": "nan", "12995": "nan", "13000": "nan"}}, "num-zeros": {"start_step": 1, "end_step": 13000, "step_interval": 5, "values": {"1": 956236544.0, "5": 967337600.0, "10": 971388224.0, "15": 946439424.0, "20": 961330240.0, "25": 1083876480.0, "30": 1211133312.0, "35": 1297707520.0, "40": 1271785728.0, "45": 1175048064.0, "50": 1126729728.0, "55": 1083975424.0, "60": 1045060608.0, "65": 1026047360.0, "70": 995721280.0, "75": 986257152.0, "80": 1010241664.0, "85": 1006739968.0, "90": 988780736.0, "95": 959700032.0, "100": 971861632.0, "105": 980754624.0, "110": 977222528.0, "115": 978430848.0, "120": 961162432.0, "125": 942469184.0, "130": 977095104.0, "135": 966160128.0, "140": 963476928.0, "145": 976512384.0, "150": 921597184.0, "155": 968134336.0, "160": 956383232.0, "165": 959869952.0, "170": 974372224.0, "175": 949013120.0, "180": 946688448.0, "185": 972006784.0, "190": 969055488.0, "195": 985121664.0, "200": 945774592.0, "205": 958353792.0, "210": 979445248.0, "215": 967478208.0, "220": 956423424.0, "225": 962400768.0, "230": 948177792.0, "235": 965221120.0, "240": 966072192.0, "245": 969161216.0, "250": 974435968.0, "255": 925063296.0, "260": 965635968.0, "265": 970660352.0, "270": 959131264.0, "275": 954001216.0, "280": 963427648.0, "285": 945777408.0, "290": 974124544.0, "295": 966704640.0, "300": 967140096.0, "305": 964514048.0, "310": 940354048.0, "315": 967404800.0, "320": 969006080.0, "325": 980552832.0, "330": 972090752.0, "335": 946865984.0, "340": 966598784.0, "345": 973025856.0, "350": 973918720.0, "355": 963261696.0, "360": 948351680.0, "365": 964821248.0, "370": 962952704.0, "375": 958446848.0, "380": 947153280.0, "385": 955988608.0, "390": 945399616.0, "395": 970423552.0, "400": 979770112.0, "405": 968344320.0, "410": 970058752.0, "415": 953158528.0, "420": 943569920.0, "425": 954774144.0, "430": 962663232.0, "435": 977082240.0, "440": 954811392.0, "445": 971894272.0, "450": 963512576.0, "455": 973134720.0, "460": 983714688.0, "465": 945280512.0, "470": 942055616.0, "475": 967007104.0, "480": 966107264.0, "485": 976414528.0, "490": 962538880.0, "495": 945454464.0, "500": 964454656.0, "505": 986005440.0, "510": 965682944.0, "515": 943411584.0, "520": 945017408.0, "525": 971262208.0, "530": 971890688.0, "535": 979140352.0, "540": 969531264.0, "545": 954116608.0, "550": 951267584.0, "555": 987219456.0, "560": 960428288.0, "565": 966616320.0, "570": 975727488.0, "575": 927224960.0, "580": 970694528.0, "585": 961176064.0, "590": 972967040.0, "595": 963682816.0, "600": 937079168.0, "605": 951470208.0, "610": 963360768.0, "615": 970009728.0, "620": 976472192.0, "625": 949579776.0, "630": 954445504.0, "635": 986042816.0, "640": 980980992.0, "645": 955010560.0, "650": 958545664.0, "655": 951656640.0, "660": 961043712.0, "665": 967552000.0, "670": 962514304.0, "675": 968337536.0, "680": 965619200.0, "685": 962871040.0, "690": 961921088.0, "695": 954770368.0, "700": 970340608.0, "705": 945512640.0, "710": 943885440.0, "715": 973357568.0, "720": 968369856.0, "725": 978489984.0, "730": 952195008.0, "735": 948813952.0, "740": 955633408.0, "745": 975866880.0, "750": 981235072.0, "755": 962156608.0, "760": 951964800.0, "765": 967343616.0, "770": 976148096.0, "775": 970544000.0, "780": 977540928.0, "785": 931529024.0, "790": 960441536.0, "795": 964582016.0, "800": 967022848.0, "805": 962321024.0, "810": 940969344.0, "815": 949037568.0, "820": 953181440.0, "825": 954502400.0, "830": 976442240.0, "835": 956073344.0, "840": 948401920.0, "845": 965153024.0, "850": 966029248.0, "855": 960904384.0, "860": 976027200.0, "865": 938157824.0, "870": 966414016.0, "875": 972314880.0, "880": 963120896.0, "885": 967745600.0, "890": 949967872.0, "895": 960019072.0, "900": 974229696.0, "905": 963968256.0, "910": 958435072.0, "915": 956354560.0, "920": 943974592.0, "925": 960833728.0, "930": 978845952.0, "935": 971073664.0, "940": 960905792.0, "945": 945063040.0, "950": 957423360.0, "955": 979035520.0, "960": 983589248.0, "965": 966165824.0, "970": 951228672.0, "975": 961577344.0, "980": 968071040.0, "985": 968991872.0, "990": 984393024.0, "995": 953291264.0, "1000": 934780480.0, "1005": 960147328.0, "1010": 971538624.0, "1015": 985184896.0, "1020": 962780928.0, "1025": 935009408.0, "1030": 974679936.0, "1035": 964992384.0, "1040": 980464256.0, "1045": 960826496.0, "1050": 955197824.0, "1055": 957780352.0, "1060": 967748800.0, "1065": 967116352.0, "1070": 966600064.0, "1075": 950061696.0, "1080": 954508544.0, "1085": 967251712.0, "1090": 977132800.0, "1095": 961237632.0, "1100": 979613568.0, "1105": 953365120.0, "1110": 965954176.0, "1115": 966986944.0, "1120": 970350592.0, "1125": 965707776.0, "1130": 954942400.0, "1135": 965843328.0, "1140": 965176384.0, "1145": 970988224.0, "1150": 955556864.0, "1155": 930578432.0, "1160": 957774208.0, "1165": 978124736.0, "1170": 974299520.0, "1175": 973059648.0, "1180": 973083648.0, "1185": 947344640.0, "1190": 964793216.0, "1195": 953138560.0, "1200": 972843136.0, "1205": 988478656.0, "1210": 931126784.0, "1215": 968647040.0, "1220": 969160960.0, "1225": 975950656.0, "1230": 967331712.0, "1235": 943446912.0, "1240": 955853952.0, "1245": 981503488.0, "1250": 966111808.0, "1255": 973676032.0, "1260": 946497280.0, "1265": 963997824.0, "1270": 960489024.0, "1275": 973615104.0, "1280": 961112576.0, "1285": 957580480.0, "1290": 952528768.0, "1295": 971610240.0, "1300": 968862464.0, "1305": 963739136.0, "1310": 963334656.0, "1315": 943553408.0, "1320": 966307200.0, "1325": 989784960.0, "1330": 969508992.0, "1335": 972302464.0, "1340": 972269440.0, "1345": 960658304.0, "1350": 968639296.0, "1355": 955853312.0, "1360": 971822144.0, "1365": 960387584.0, "1370": 948791872.0, "1375": 973533376.0, "1380": 953470208.0, "1385": 969146880.0, "1390": 975720640.0, "1395": 931673984.0, "1400": 945854848.0, "1405": 976753536.0, "1410": 974510336.0, "1415": 967573760.0, "1420": 966747328.0, "1425": 937378560.0, "1430": 973916608.0, "1435": 978335552.0, "1440": 964178304.0, "1445": 958058240.0, "1450": 946147712.0, "1455": 983922304.0, "1460": 968651136.0, "1465": 948745152.0, "1470": 984243328.0, "1475": 943906048.0, "1480": 963975488.0, "1485": 957349376.0, "1490": 961261888.0, "1495": 980539648.0, "1500": 958332032.0, "1505": 942866816.0, "1510": 984180096.0, "1515": 959094528.0, "1520": 959105408.0, "1525": 952786816.0, "1530": 957741312.0, "1535": 949428928.0, "1540": 971088256.0, "1545": 963132352.0, "1550": 978666752.0, "1555": 952320512.0, "1560": 980089984.0, "1565": 967314048.0, "1570": 973844352.0, "1575": 975494912.0, "1580": 941862656.0, "1585": 970028352.0, "1590": 983822208.0, "1595": 948631616.0, "1600": 967442560.0, "1605": 952451328.0, "1610": 969616512.0, "1615": 983146496.0, "1620": 968019200.0, "1625": 970715776.0, "1630": 962887360.0, "1635": 942311936.0, "1640": 981612224.0, "1645": 973977856.0, "1650": 974188224.0, "1655": 967265024.0, "1660": 940687744.0, "1665": 961704448.0, "1670": 962902016.0, "1675": 971280896.0, "1680": 980879232.0, "1685": 944416192.0, "1690": 964688128.0, "1695": 965644992.0, "1700": 966342336.0, "1705": 985200000.0, "1710": 978354304.0, "1715": 943210880.0, "1720": 977089408.0, "1725": 965870208.0, "1730": 968968960.0, "1735": 965088000.0, "1740": 949713280.0, "1745": 970012352.0, "1750": 959681728.0, "1755": 960085440.0, "1760": 966381376.0, "1765": 951816192.0, "1770": 954665728.0, "1775": 973752064.0, "1780": 970534272.0, "1785": 968824960.0, "1790": 950235520.0, "1795": 945131072.0, "1800": 984666816.0, "1805": 987163520.0, "1810": 977766656.0, "1815": 948004480.0, "1820": 949209216.0, "1825": 978853632.0, "1830": 966362368.0, "1835": 964133632.0, "1840": 972320128.0, "1845": 935415808.0, "1850": 952497792.0, "1855": 980048640.0, "1860": 975866880.0, "1865": 958966528.0, "1870": 958949056.0, "1875": 932593408.0, "1880": 973574016.0, "1885": 978843264.0, "1890": 971358720.0, "1895": 959212288.0, "1900": 947394432.0, "1905": 981829952.0, "1910": 969126912.0, "1915": 970040704.0, "1920": 975597056.0, "1925": 960496512.0, "1930": 977922304.0, "1935": 963250432.0, "1940": 952460928.0, "1945": 981338176.0, "1950": 939172864.0, "1955": 960604416.0, "1960": 970031744.0, "1965": 981176000.0, "1970": 962045120.0, "1975": 952822016.0, "1980": 936847360.0, "1985": 975938432.0, "1990": 965965696.0, "1995": 962609920.0, "2000": 960553984.0, "2005": 954497728.0, "2010": 975579776.0, "2015": 991802112.0, "2020": 975433408.0, "2025": 974303936.0, "2030": 952084736.0, "2035": 967847680.0, "2040": 987457536.0, "2045": 976480064.0, "2050": 984702464.0, "2055": 942839488.0, "2060": 942593920.0, "2065": 966208768.0, "2070": 969622528.0, "2075": 980553536.0, "2080": 977598080.0, "2085": 939635968.0, "2090": 969872256.0, "2095": 961274880.0, "2100": 976719168.0, "2105": 972537920.0, "2110": 959901568.0, "2115": 956875264.0, "2120": 977482304.0, "2125": 962566784.0, "2130": 979618496.0, "2135": 950537408.0, "2140": 946996544.0, "2145": 962273920.0, "2150": 973404416.0, "2155": 972690944.0, "2160": 970314560.0, "2165": 948644160.0, "2170": 961541696.0, "2175": 969377216.0, "2180": 969329920.0, "2185": 947446592.0, "2190": 940480960.0, "2195": 986085952.0, "2200": 961861248.0, "2205": 978924672.0, "2210": 964100864.0, "2215": 963502336.0, "2220": 951311104.0, "2225": 969315776.0, "2230": 976331328.0, "2235": 974025920.0, "2240": 975493888.0, "2245": 960230784.0, "2250": 967640192.0, "2255": 969129984.0, "2260": 975065024.0, "2265": 968258688.0, "2270": 951744768.0, "2275": 962766848.0, "2280": 969640064.0, "2285": 971692992.0, "2290": 962889344.0, "2295": 931409280.0, "2300": 959906048.0, "2305": 970426560.0, "2310": 967444864.0, "2315": 970905792.0, "2320": 975590848.0, "2325": 938587264.0, "2330": 988438528.0, "2335": 977489408.0, "2340": 964596352.0, "2345": 964166528.0, "2350": 947555712.0, "2355": 977029568.0, "2360": 966899072.0, "2365": 977297728.0, "2370": 965072640.0, "2375": 953966272.0, "2380": 962918912.0, "2385": 967194496.0, "2390": 963077248.0, "2395": 974465792.0, "2400": 958410816.0, "2405": 968119552.0, "2410": 951586112.0, "2415": 965904256.0, "2420": 966516160.0, "2425": 959045632.0, "2430": 956685952.0, "2435": 961389184.0, "2440": 959755904.0, "2445": 970891392.0, "2450": 961996736.0, "2455": 922721216.0, "2460": 951953536.0, "2465": 955730432.0, "2470": 972570496.0, "2475": 973812992.0, "2480": 943895296.0, "2485": 944184064.0, "2490": 972411136.0, "2495": 974451712.0, "2500": 973910080.0, "2505": 958492032.0, "2510": 939510912.0, "2515": 979553728.0, "2520": 970473792.0, "2525": 964390784.0, "2530": 955799168.0, "2535": 936598144.0, "2540": 969027648.0, "2545": 970385024.0, "2550": 969462528.0, "2555": 969439040.0, "2560": 964978304.0, "2565": 959763712.0, "2570": 985176704.0, "2575": 957426112.0, "2580": 967424512.0, "2585": 966022400.0, "2590": 956354944.0, "2595": 981830400.0, "2600": 959530880.0, "2605": 962999168.0, "2610": 965972864.0, "2615": 951924992.0, "2620": 971241216.0, "2625": 976456064.0, "2630": 974409984.0, "2635": 948071296.0, "2640": 948137088.0, "2645": 963036736.0, "2650": 953982912.0, "2655": 977112448.0, "2660": 949622976.0, "2665": 953929024.0, "2670": 959064064.0, "2675": 979275904.0, "2680": 961395840.0, "2685": 970701952.0, "2690": 965222016.0, "2695": 943553536.0, "2700": 969425664.0, "2705": 978961792.0, "2710": 971810560.0, "2715": 990813952.0, "2720": 942649152.0, "2725": 967955328.0, "2730": 955466496.0, "2735": 970672704.0, "2740": 977921536.0, "2745": 932280000.0, "2750": 947856384.0, "2755": 956317184.0, "2760": 981697664.0, "2765": 966112192.0, "2770": 948914688.0, "2775": 935830272.0, "2780": 964777088.0, "2785": 969570176.0, "2790": 974273152.0, "2795": 966886144.0, "2800": 944388096.0, "2805": 964353920.0, "2810": 969610752.0, "2815": 975845248.0, "2820": 963081600.0, "2825": 937627392.0, "2830": 956738368.0, "2835": 986321024.0, "2840": 961756672.0, "2845": 967505920.0, "2850": 951714816.0, "2855": 962091520.0, "2860": 954242304.0, "2865": 955881216.0, "2870": 944662848.0, "2875": 974662784.0, "2880": 968199936.0, "2885": 981081984.0, "2890": 953454080.0, "2895": 957178304.0, "2900": 964989440.0, "2905": 931707648.0, "2910": 955730688.0, "2915": 979477120.0, "2920": 970492544.0, "2925": 964975680.0, "2930": 964046592.0, "2935": 940140416.0, "2940": 964912384.0, "2945": 989149952.0, "2950": 965209088.0, "2955": 965104256.0, "2960": 933161472.0, "2965": 968794496.0, "2970": 973034688.0, "2975": 958092288.0, "2980": 964497280.0, "2985": 937267584.0, "2990": 951254720.0, "2995": 978315264.0, "3000": 969275392.0, "3005": 974686080.0, "3010": 950235008.0, "3015": 943841024.0, "3020": 958440960.0, "3025": 975186560.0, "3030": 965018624.0, "3035": 963454464.0, "3040": 952131776.0, "3045": 989793472.0, "3050": 965545728.0, "3055": 982520704.0, "3060": 971227712.0, "3065": 943915648.0, "3070": 978409024.0, "3075": 975204992.0, "3080": 960992896.0, "3085": 962352128.0, "3090": 945953664.0, "3095": 938116032.0, "3100": 972929088.0, "3105": 961989120.0, "3110": 970657152.0, "3115": 963390528.0, "3120": 947116032.0, "3125": 972720640.0, "3130": 952973312.0, "3135": 966041920.0, "3140": 968488896.0, "3145": 937852288.0, "3150": 975009280.0, "3155": 976815488.0, "3160": 969629184.0, "3165": 982194944.0, "3170": 937959936.0, "3175": 953825280.0, "3180": 983810176.0, "3185": 965169536.0, "3190": 968482880.0, "3195": 950933888.0, "3200": 945101440.0, "3205": 959863232.0, "3210": 957486336.0, "3215": 958020096.0, "3220": 968129792.0, "3225": 935614144.0, "3230": 962588672.0, "3235": 975775488.0, "3240": 962621440.0, "3245": 981274368.0, "3250": 943260544.0, "3255": 954599424.0, "3260": 980363648.0, "3265": 963619968.0, "3270": 965162624.0, "3275": 959731072.0, "3280": 967046720.0, "3285": 982478144.0, "3290": 947689408.0, "3295": 966419840.0, "3300": 959165632.0, "3305": 949131008.0, "3310": 979510144.0, "3315": 964283008.0, "3320": 969207296.0, "3325": 956193280.0, "3330": 941167104.0, "3335": 964973184.0, "3340": 956900736.0, "3345": 972500224.0, "3350": 964576192.0, "3355": 943346176.0, "3360": 970037632.0, "3365": 969453952.0, "3370": 954774976.0, "3375": 958676288.0, "3380": 971463168.0, "3385": 947973376.0, "3390": 965793024.0, "3395": 978390016.0, "3400": 978127360.0, "3405": 976724032.0, "3410": 924193664.0, "3415": 955424256.0, "3420": 971820416.0, "3425": 977167488.0, "3430": 973837696.0, "3435": 936071680.0, "3440": 970502464.0, "3445": 957303232.0, "3450": 959840896.0, "3455": 963864256.0, "3460": 967881856.0, "3465": 931318976.0, "3470": 952348224.0, "3475": 973704384.0, "3480": 959738112.0, "3485": 979960640.0, "3490": 944670592.0, "3495": 953904576.0, "3500": 969330432.0, "3505": 964348416.0, "3510": 971222656.0, "3515": 955946112.0, "3520": 958733120.0, "3525": 971914240.0, "3530": 964124672.0, "3535": 983192192.0, "3540": 937489664.0, "3545": 944730496.0, "3550": 984462784.0, "3555": 978057984.0, "3560": 974372992.0, "3565": 968800128.0, "3570": 946694528.0, "3575": 976104640.0, "3580": 977494272.0, "3585": 954568832.0, "3590": 956425856.0, "3595": 951444800.0, "3600": 988996800.0, "3605": 962008448.0, "3610": 965055104.0, "3615": 974635648.0, "3620": 954888384.0, "3625": 939515392.0, "3630": 990148736.0, "3635": 971435712.0, "3640": 976025856.0, "3645": 961489664.0, "3650": 945804160.0, "3655": 965782784.0, "3660": 976208000.0, "3665": 964021248.0, "3670": 977431936.0, "3675": 943476096.0, "3680": 958185152.0, "3685": 964285312.0, "3690": 982093952.0, "3695": 963125248.0, "3700": 950570176.0, "3705": 947345792.0, "3710": 982355328.0, "3715": 972682240.0, "3720": 976138624.0, "3725": 964038272.0, "3730": 948851328.0, "3735": 967079424.0, "3740": 960973568.0, "3745": 969334272.0, "3750": 963949056.0, "3755": 953432832.0, "3760": 976641920.0, "3765": 979841280.0, "3770": 972360320.0, "3775": 972374720.0, "3780": 952585664.0, "3785": 960236800.0, "3790": 985598848.0, "3795": 969195392.0, "3800": 957877504.0, "3805": 972408192.0, "3810": 954517952.0, "3815": 974566528.0, "3820": 963019648.0, "3825": 962070528.0, "3830": 969394304.0, "3835": 934744960.0, "3840": 971255808.0, "3845": 986849792.0, "3850": 968873536.0, "3855": 965248640.0, "3860": 948057600.0, "3865": 975049216.0, "3870": 985098432.0, "3875": 983043072.0, "3880": 963615616.0, "3885": 953012224.0, "3890": 960296960.0, "3895": 960591616.0, "3900": 984926464.0, "3905": 976222592.0, "3910": 987360512.0, "3915": 946017792.0, "3920": 974867328.0, "3925": 961248384.0, "3930": 976790208.0, "3935": 978925824.0, "3940": 950292672.0, "3945": 960260864.0, "3950": 974185152.0, "3955": 972967808.0, "3960": 974078848.0, "3965": 950861696.0, "3970": 980692992.0, "3975": 960750208.0, "3980": 977519808.0, "3985": 962949440.0, "3990": 972755328.0, "3995": 953714176.0, "4000": 975011968.0, "4005": 971658752.0, "4010": 978420992.0, "4015": 971491968.0, "4020": 950311040.0, "4025": 968433024.0, "4030": 997935360.0, "4035": 978548864.0, "4040": 959813248.0, "4045": 939664448.0, "4050": 944718720.0, "4055": 980958720.0, "4060": 977682816.0, "4065": 975706624.0, "4070": 942152832.0, "4075": 945765376.0, "4080": 988765632.0, "4085": 962079872.0, "4090": 983356736.0, "4095": 986958720.0, "4100": 957224448.0, "4105": 954073984.0, "4110": 966488192.0, "4115": 976042432.0, "4120": 983531776.0, "4125": 960054336.0, "4130": 967278976.0, "4135": 971431296.0, "4140": 963171200.0, "4145": 956175616.0, "4150": 960332672.0, "4155": 946216576.0, "4160": 968451328.0, "4165": 970327424.0, "4170": 971946880.0, "4175": 955862272.0, "4180": 940997632.0, "4185": 968318592.0, "4190": 968003712.0, "4195": 989223168.0, "4200": 962678912.0, "4205": 960595072.0, "4210": 971851136.0, "4215": 974145536.0, "4220": 981144704.0, "4225": 975196160.0, "4230": 952703104.0, "4235": 958493312.0, "4240": 966824448.0, "4245": 961756800.0, "4250": 965845632.0, "4255": 958268032.0, "4260": 949545344.0, "4265": 964137856.0, "4270": 978408896.0, "4275": 975397504.0, "4280": 962672064.0, "4285": 951451136.0, "4290": 980029952.0, "4295": 968842816.0, "4300": 958299456.0, "4305": 966802048.0, "4310": 939613376.0, "4315": 949416704.0, "4320": 984522240.0, "4325": 982547712.0, "4330": 974746240.0, "4335": 949459840.0, "4340": 959478400.0, "4345": 956644096.0, "4350": 979960320.0, "4355": 968869888.0, "4360": 966313728.0, "4365": 941208704.0, "4370": 969495552.0, "4375": 972982336.0, "4380": 966147840.0, "4385": 972060544.0, "4390": 954141120.0, "4395": 951856512.0, "4400": 973629184.0, "4405": 972441728.0, "4410": 967908480.0, "4415": 958935232.0, "4420": 960777856.0, "4425": 976416000.0, "4430": 965905664.0, "4435": 975864704.0, "4440": 962282496.0, "4445": 954799872.0, "4450": 978294912.0, "4455": 960203776.0, "4460": 968579008.0, "4465": 968805760.0, "4470": 944127232.0, "4475": 951973056.0, "4480": 978824832.0, "4485": 968265728.0, "4490": 956975616.0, "4495": 938925248.0, "4500": 953215488.0, "4505": 977129344.0, "4510": 978700416.0, "4515": 962493568.0, "4520": 958825024.0, "4525": 958101760.0, "4530": 964428608.0, "4535": 976641024.0, "4540": 976846784.0, "4545": 970169408.0, "4550": 953223680.0, "4555": 959591040.0, "4560": 972497920.0, "4565": 973442560.0, "4570": 978909568.0, "4575": 957760000.0, "4580": 963142016.0, "4585": 957381888.0, "4590": 986530176.0, "4595": 960189824.0, "4600": 952267648.0, "4605": 959297664.0, "4610": 963698304.0, "4615": 957950912.0, "4620": 960255936.0, "4625": 973786624.0, "4630": 944507648.0, "4635": 977024896.0, "4640": 960310272.0, "4645": 981993856.0, "4650": 962400128.0, "4655": 939457792.0, "4660": 964001280.0, "4665": 962540544.0, "4670": 976671872.0, "4675": 963514432.0, "4680": 957488448.0, "4685": 949674432.0, "4690": 956962240.0, "4695": 969926912.0, "4700": 961214016.0, "4705": 970781312.0, "4710": 934493248.0, "4715": 970475008.0, "4720": 966361920.0, "4725": 980303616.0, "4730": 965826240.0, "4735": 937871360.0, "4740": 960124288.0, "4745": 975997376.0, "4750": 967957760.0, "4755": 984959744.0, "4760": 959123648.0, "4765": 955261056.0, "4770": 958621440.0, "4775": 991092608.0, "4780": 976856000.0, "4785": 967582336.0, "4790": 943756160.0, "4795": 955861760.0, "4800": 967745984.0, "4805": 976565888.0, "4810": 965160448.0, "4815": 957969408.0, "4820": 973993216.0, "4825": 961505920.0, "4830": 962638336.0, "4835": 972543936.0, "4840": 948913920.0, "4845": 965716608.0, "4850": 960305024.0, "4855": 964130624.0, "4860": 963051008.0, "4865": 967532352.0, "4870": 957207424.0, "4875": 983574528.0, "4880": 957085120.0, "4885": 977052480.0, "4890": 959740928.0, "4895": 942152192.0, "4900": 973700352.0, "4905": 975213952.0, "4910": 969220608.0, "4915": 970053248.0, "4920": 941169024.0, "4925": 954809344.0, "4930": 977034624.0, "4935": 963750016.0, "4940": 972586496.0, "4945": 960056448.0, "4950": 940793856.0, "4955": 968036480.0, "4960": 976800640.0, "4965": 961022464.0, "4970": 958665472.0, "4975": 933775168.0, "4980": 960824704.0, "4985": 963012160.0, "4990": 963588736.0, "4995": 986307968.0, "5000": 940776000.0, "5005": 968887552.0, "5010": 970307776.0, "5015": 965233408.0, "5020": 966702336.0, "5025": 949471744.0, "5030": 953490560.0, "5035": 967352704.0, "5040": 955689856.0, "5045": 969129856.0, "5050": 953417984.0, "5055": 954811520.0, "5060": 963029248.0, "5065": 952195072.0, "5070": 973597056.0, "5075": 978581632.0, "5080": 942828736.0, "5085": 965863040.0, "5090": 972857088.0, "5095": 964397952.0, "5100": 958318016.0, "5105": 965353728.0, "5110": 950401920.0, "5115": 972347520.0, "5120": 960424960.0, "5125": 969755008.0, "5130": 938795456.0, "5135": 943659008.0, "5140": 969888896.0, "5145": 968688768.0, "5150": 970601216.0, "5155": 972635008.0, "5160": 926551872.0, "5165": 961591552.0, "5170": 966873472.0, "5175": 966086400.0, "5180": 963656192.0, "5185": 930802688.0, "5190": 949852992.0, "5195": 972422016.0, "5200": 973758656.0, "5205": 968249664.0, "5210": 960527936.0, "5215": 928815680.0, "5220": 979174208.0, "5225": 984780416.0, "5230": 975060672.0, "5235": 975055232.0, "5240": 944314112.0, "5245": 970834048.0, "5250": 972426624.0, "5255": 966895296.0, "5260": 976679744.0, "5265": 942270592.0, "5270": 969202176.0, "5275": 970073344.0, "5280": 962825728.0, "5285": 964072064.0, "5290": 932501824.0, "5295": 951762944.0, "5300": 975588288.0, "5305": 951853504.0, "5310": 968040960.0, "5315": 955817472.0, "5320": 950921984.0, "5325": 973040384.0, "5330": 967843264.0, "5335": 967536384.0, "5340": 966503424.0, "5345": 962961344.0, "5350": 978937216.0, "5355": 972202560.0, "5360": 963892288.0, "5365": 965246464.0, "5370": 947824896.0, "5375": 948820864.0, "5380": 967210752.0, "5385": 980540416.0, "5390": 965312832.0, "5395": 955160000.0, "5400": 948286464.0, "5405": 974365952.0, "5410": 967844224.0, "5415": 976074688.0, "5420": 967431680.0, "5425": 937412096.0, "5430": 963919744.0, "5435": 971948544.0, "5440": 969128192.0, "5445": 957501184.0, "5450": 919431040.0, "5455": 952052800.0, "5460": 962315264.0, "5465": 978905088.0, "5470": 981031104.0, "5475": 941610304.0, "5480": 955755072.0, "5485": 964902912.0, "5490": 976036992.0, "5495": 962805120.0, "5500": 971218240.0, "5505": 957001728.0, "5510": 968610432.0, "5515": 945466624.0, "5520": 963181952.0, "5525": 975932480.0, "5530": 936635520.0, "5535": 970693824.0, "5540": 960295936.0, "5545": 972084800.0, "5550": 967898240.0, "5555": 955968960.0, "5560": 954520832.0, "5565": 968862848.0, "5570": 945186112.0, "5575": 960539584.0, "5580": 960563456.0, "5585": 959470720.0, "5590": 977668224.0, "5595": 975194496.0, "5600": 963009472.0, "5605": 964201216.0, "5610": 943157760.0, "5615": 966515904.0, "5620": 963224448.0, "5625": 982398656.0, "5630": 976073984.0, "5635": 957347520.0, "5640": 951455488.0, "5645": 967805568.0, "5650": 979181056.0, "5655": 983510912.0, "5660": 956493952.0, "5665": 953574656.0, "5670": 966097408.0, "5675": 967701184.0, "5680": 978598848.0, "5685": 962008576.0, "5690": 935908928.0, "5695": 963739648.0, "5700": 952464384.0, "5705": 974525376.0, "5710": 971341376.0, "5715": 946147648.0, "5720": 974991360.0, "5725": 967473664.0, "5730": 978561792.0, "5735": 964979712.0, "5740": 943573056.0, "5745": 971142016.0, "5750": 981951168.0, "5755": 956754944.0, "5760": 963695168.0, "5765": 957707648.0, "5770": 955747584.0, "5775": 970847104.0, "5780": 962811840.0, "5785": 970673664.0, "5790": 974652672.0, "5795": 949802368.0, "5800": 965999232.0, "5805": 968774272.0, "5810": 975986176.0, "5815": 970103936.0, "5820": 936377408.0, "5825": 969266816.0, "5830": 977614464.0, "5835": 974980224.0, "5840": 963061120.0, "5845": 968774464.0, "5850": 942897536.0, "5855": 975999104.0, "5860": 979537600.0, "5865": 978369280.0, "5870": 968714112.0, "5875": 942328320.0, "5880": 964508224.0, "5885": 974806656.0, "5890": 972671104.0, "5895": 965681920.0, "5900": 941482880.0, "5905": 961766528.0, "5910": 958568832.0, "5915": 968174464.0, "5920": 977468032.0, "5925": 959468800.0, "5930": 946750080.0, "5935": 952334656.0, "5940": 977662144.0, "5945": 984818560.0, "5950": 980689536.0, "5955": 935094464.0, "5960": 961735296.0, "5965": 965938176.0, "5970": 970612096.0, "5975": 961933888.0, "5980": 958344832.0, "5985": 964562816.0, "5990": 973676288.0, "5995": 955919488.0, "6000": 955633216.0, "6005": 961366784.0, "6010": 952707072.0, "6015": 974539328.0, "6020": 978265920.0, "6025": 972226688.0, "6030": 955311744.0, "6035": 946954368.0, "6040": 962641920.0, "6045": 983743552.0, "6050": 956515200.0, "6055": 963445888.0, "6060": 945767488.0, "6065": 958443776.0, "6070": 978390272.0, "6075": 977988416.0, "6080": 957530304.0, "6085": 947641408.0, "6090": 953645632.0, "6095": 964770560.0, "6100": 979886720.0, "6105": 971011904.0, "6110": 961816128.0, "6115": 943814016.0, "6120": 968573824.0, "6125": 960703232.0, "6130": 984040320.0, "6135": 960994432.0, "6140": 958720704.0, "6145": 971225728.0, "6150": 968493312.0, "6155": 974930688.0, "6160": 977216128.0, "6165": 952742848.0, "6170": 951129728.0, "6175": 963358592.0, "6180": 969549568.0, "6185": 966320320.0, "6190": 963731264.0, "6195": 947266752.0, "6200": 969378240.0, "6205": 967161728.0, "6210": 959339264.0, "6215": 973147776.0, "6220": 936374912.0, "6225": 978715520.0, "6230": 976146816.0, "6235": 971740992.0, "6240": 966006336.0, "6245": 956249728.0, "6250": 956449920.0, "6255": 973363584.0, "6260": 978804800.0, "6265": 974941952.0, "6270": 958855808.0, "6275": 963713152.0, "6280": 973141120.0, "6285": 966123136.0, "6290": 971040256.0, "6295": 987490560.0, "6300": 947640064.0, "6305": 964795456.0, "6310": 979017216.0, "6315": 978443392.0, "6320": 971724672.0, "6325": 923002624.0, "6330": 959366784.0, "6335": 974940864.0, "6340": 984824576.0, "6345": 966835456.0, "6350": 944574016.0, "6355": 957990272.0, "6360": 972711552.0, "6365": 972207744.0, "6370": 958966784.0, "6375": 967142144.0, "6380": 951399104.0, "6385": 973584896.0, "6390": 965514880.0, "6395": 975032064.0, "6400": 983945472.0, "6405": 944065408.0, "6410": 977178496.0, "6415": 971635776.0, "6420": 956726592.0, "6425": 960937728.0, "6430": 957525120.0, "6435": 960333440.0, "6440": 968713088.0, "6445": 973479168.0, "6450": 974637056.0, "6455": 962142208.0, "6460": 940994496.0, "6465": 974482944.0, "6470": 979911936.0, "6475": 960847808.0, "6480": 967532032.0, "6485": 948559616.0, "6490": 970748032.0, "6495": 988369024.0, "6500": 980468864.0, "6505": 972158336.0, "6510": 951648576.0, "6515": 957565440.0, "6520": 979061952.0, "6525": 978903424.0, "6530": 973271744.0, "6535": 967829056.0, "6540": 950159040.0, "6545": 966294144.0, "6550": 979335168.0, "6555": 967119872.0, "6560": 975391104.0, "6565": 949645696.0, "6570": 952068224.0, "6575": 962553728.0, "6580": 975679424.0, "6585": 979544832.0, "6590": 949212544.0, "6595": 961471616.0, "6600": 961353856.0, "6605": 961755520.0, "6610": 985212480.0, "6615": 959518336.0, "6620": 944576256.0, "6625": 971028736.0, "6630": 971564928.0, "6635": 964103936.0, "6640": 959857152.0, "6645": 951077504.0, "6650": 978674944.0, "6655": 965949440.0, "6660": 968814080.0, "6665": 969002112.0, "6670": 932999424.0, "6675": 970736128.0, "6680": 969016064.0, "6685": 958784384.0, "6690": 956215552.0, "6695": 955745920.0, "6700": 962135936.0, "6705": 979365824.0, "6710": 971098240.0, "6715": 966874944.0, "6720": 974162048.0, "6725": 941768192.0, "6730": 979399488.0, "6735": 994709376.0, "6740": 976356224.0, "6745": 974602752.0, "6750": 939272320.0, "6755": 977649344.0, "6760": 969757888.0, "6765": 978454848.0, "6770": 975513728.0, "6775": 943523520.0, "6780": 947283584.0, "6785": 975371712.0, "6790": 960607104.0, "6795": 976217984.0, "6800": 973344640.0, "6805": 946806016.0, "6810": 958265856.0, "6815": 970929792.0, "6820": 978086528.0, "6825": 969192704.0, "6830": 950435072.0, "6835": 981464192.0, "6840": 983022336.0, "6845": 948763840.0, "6850": 965465152.0, "6855": 954199552.0, "6860": 979123968.0, "6865": 983975808.0, "6870": 964842560.0, "6875": 978847808.0, "6880": 950371200.0, "6885": 958582016.0, "6890": 960484032.0, "6895": 965665280.0, "6900": 985370880.0, "6905": 968478592.0, "6910": 950097088.0, "6915": 971060736.0, "6920": 967166720.0, "6925": 965180672.0, "6930": 964715648.0, "6935": 952122112.0, "6940": 962920704.0, "6945": 986470144.0, "6950": 973350272.0, "6955": 964715136.0, "6960": 940248960.0, "6965": 974503680.0, "6970": 978554240.0, "6975": 985114880.0, "6980": 982851072.0, "6985": 959949376.0, "6990": 945298944.0, "6995": 987557120.0, "7000": 963329344.0, "7005": 962922240.0, "7010": 985144320.0, "7015": 945447424.0, "7020": 982884608.0, "7025": 968840640.0, "7030": 953537472.0, "7035": 982810432.0, "7040": 950520320.0, "7045": 956041600.0, "7050": 960403712.0, "7055": 963929728.0, "7060": 976999040.0, "7065": 968391296.0, "7070": 953547264.0, "7075": 956559360.0, "7080": 969124864.0, "7085": 965868800.0, "7090": 969521920.0, "7095": 960078592.0, "7100": 973651200.0, "7105": 973332672.0, "7110": 970254848.0, "7115": 958633088.0, "7120": 948865536.0, "7125": 963163584.0, "7130": 971421376.0, "7135": 964540096.0, "7140": 961681152.0, "7145": 930416448.0, "7150": 946280064.0, "7155": 991092864.0, "7160": 968442496.0, "7165": 956888320.0, "7170": 968275328.0, "7175": 955828224.0, "7180": 958441536.0, "7185": 984880256.0, "7190": 978735936.0, "7195": 973712000.0, "7200": 935905536.0, "7205": 957525760.0, "7210": 967114624.0, "7215": 969707264.0, "7220": 982219584.0, "7225": 928936768.0, "7230": 949560960.0, "7235": 967251712.0, "7240": 966968064.0, "7245": 967454976.0, "7250": 949502336.0, "7255": 957363968.0, "7260": 970114816.0, "7265": 974961664.0, "7270": 959874240.0, "7275": 959286784.0, "7280": 957121920.0, "7285": 977575808.0, "7290": 977249920.0, "7295": 962901120.0, "7300": 975499904.0, "7305": 964022528.0, "7310": 977515520.0, "7315": 966809600.0, "7320": 974552768.0, "7325": 966906752.0, "7330": 959690880.0, "7335": 964049280.0, "7340": 977556864.0, "7345": 967668224.0, "7350": 984804864.0, "7355": 959674816.0, "7360": 948842240.0, "7365": 972772864.0, "7370": 982593664.0, "7375": 963567424.0, "7380": 964284224.0, "7385": 948615488.0, "7390": 964192512.0, "7395": 958787008.0, "7400": 970242816.0, "7405": 988116736.0, "7410": 952423488.0, "7415": 950935744.0, "7420": 967472640.0, "7425": 982705664.0, "7430": 965871552.0, "7435": 973294080.0, "7440": 937228160.0, "7445": 969074752.0, "7450": 980608832.0, "7455": 971587712.0, "7460": 972749056.0, "7465": 939573760.0, "7470": 972011648.0, "7475": 958500480.0, "7480": 969529792.0, "7485": 961637568.0, "7490": 934760704.0, "7495": 957297216.0, "7500": 969548416.0, "7505": 970380928.0, "7510": 972589184.0, "7515": 979619840.0, "7520": 951796224.0, "7525": 970763840.0, "7530": 954655104.0, "7535": 971887616.0, "7540": 979952832.0, "7545": 959493248.0, "7550": 960599936.0, "7555": 960564352.0, "7560": 970209920.0, "7565": 955267200.0, "7570": 942667904.0, "7575": 966060032.0, "7580": 982740480.0, "7585": 979190784.0, "7590": 970155264.0, "7595": 950188416.0, "7600": 946546432.0, "7605": 982722432.0, "7610": 969487360.0, "7615": 988970624.0, "7620": 957136000.0, "7625": 941577856.0, "7630": 971809152.0, "7635": 984756608.0, "7640": 983937792.0, "7645": 968197120.0, "7650": 959357504.0, "7655": 962682368.0, "7660": 969198976.0, "7665": 978073088.0, "7670": 975279104.0, "7675": 975891840.0, "7680": 943167616.0, "7685": 960475136.0, "7690": 975843968.0, "7695": 982086400.0, "7700": 979923648.0, "7705": 940595776.0, "7710": 974811648.0, "7715": 979946496.0, "7720": 968216448.0, "7725": 960576640.0, "7730": 943583104.0, "7735": 968598400.0, "7740": 980697600.0, "7745": 964667008.0, "7750": 963965568.0, "7755": 960341056.0, "7760": 970667072.0, "7765": 971220096.0, "7770": 962730624.0, "7775": 981588800.0, "7780": 965073280.0, "7785": 959776384.0, "7790": 968147968.0, "7795": 969118208.0, "7800": 971586880.0, "7805": 968712128.0, "7810": 946156608.0, "7815": 963601664.0, "7820": 974369664.0, "7825": 963930944.0, "7830": 957420864.0, "7835": 949820864.0, "7840": 957576448.0, "7845": 954299264.0, "7850": 980140416.0, "7855": 987100288.0, "7860": 947203712.0, "7865": 949597632.0, "7870": 965653760.0, "7875": 976341632.0, "7880": 968749184.0, "7885": 969863296.0, "7890": 951979520.0, "7895": 974744576.0, "7900": 964075264.0, "7905": 964628544.0, "7910": 966224768.0, "7915": 943623808.0, "7920": 951251584.0, "7925": 969683840.0, "7930": 965018496.0, "7935": 984522112.0, "7940": 965260992.0, "7945": 950920512.0, "7950": 961778944.0, "7955": 980819072.0, "7960": 964107328.0, "7965": 952800768.0, "7970": 952096960.0, "7975": 969954944.0, "7980": 965058752.0, "7985": 959497728.0, "7990": 968288768.0, "7995": 947074368.0, "8000": 962595712.0, "8005": 980875264.0, "8010": 965703040.0, "8015": 982795648.0, "8020": 960636544.0, "8025": 965519616.0, "8030": 958643200.0, "8035": 975716096.0, "8040": 960827648.0, "8045": 948395264.0, "8050": 959831808.0, "8055": 979617792.0, "8060": 969592128.0, "8065": 958394752.0, "8070": 964066944.0, "8075": 942266240.0, "8080": 966035328.0, "8085": 966815936.0, "8090": 983700160.0, "8095": 988871424.0, "8100": 966531968.0, "8105": 944438272.0, "8110": 969326016.0, "8115": 985228672.0, "8120": 974833408.0, "8125": 964005120.0, "8130": 966272000.0, "8135": 967624576.0, "8140": 963686848.0, "8145": 994976768.0, "8150": 973166016.0, "8155": 938390528.0, "8160": 964462464.0, "8165": 972803200.0, "8170": 968497280.0, "8175": 961587008.0, "8180": 936029440.0, "8185": 962625536.0, "8190": 967799296.0, "8195": 977385088.0, "8200": 956367296.0, "8205": 960566528.0, "8210": 946495424.0, "8215": 982005248.0, "8220": 988443520.0, "8225": 966243584.0, "8230": 962552576.0, "8235": 934131712.0, "8240": 980267904.0, "8245": 976606848.0, "8250": 964327808.0, "8255": 977492864.0, "8260": 956833664.0, "8265": 982957440.0, "8270": 952836608.0, "8275": 974283968.0, "8280": 974906560.0, "8285": 953985664.0, "8290": 940194816.0, "8295": 981360128.0, "8300": 972952832.0, "8305": 978368320.0, "8310": 951095936.0, "8315": 937922048.0, "8320": 977484544.0, "8325": 967872768.0, "8330": 990116800.0, "8335": 975746048.0, "8340": 947366912.0, "8345": 970641408.0, "8350": 970082176.0, "8355": 975014080.0, "8360": 979651456.0, "8365": 932855680.0, "8370": 965537344.0, "8375": 979732736.0, "8380": 965482496.0, "8385": 972889472.0, "8390": 962502912.0, "8395": 951003840.0, "8400": 972739968.0, "8405": 951808384.0, "8410": 960912000.0, "8415": 965867904.0, "8420": 941925888.0, "8425": 968447872.0, "8430": 961416704.0, "8435": 966249344.0, "8440": 969510272.0, "8445": 952921344.0, "8450": 984742912.0, "8455": 990518400.0, "8460": 969086848.0, "8465": 967798656.0, "8470": 963598464.0, "8475": 942921920.0, "8480": 987605888.0, "8485": 979799936.0, "8490": 991849856.0, "8495": 971815552.0, "8500": 951760768.0, "8505": 982982848.0, "8510": 974371200.0, "8515": 969206912.0, "8520": 961827968.0, "8525": 944996096.0, "8530": 984721152.0, "8535": 978411520.0, "8540": 968342592.0, "8545": 969125440.0, "8550": 942408448.0, "8555": 971549056.0, "8560": 958775296.0, "8565": 975676160.0, "8570": 975305216.0, "8575": 971852992.0, "8580": 932583232.0, "8585": 966065856.0, "8590": 978933760.0, "8595": 979387904.0, "8600": 983792768.0, "8605": 958356416.0, "8610": 984069888.0, "8615": 978067776.0, "8620": 963535168.0, "8625": 979909120.0, "8630": 943580032.0, "8635": 961797632.0, "8640": 973745600.0, "8645": 970784128.0, "8650": 969289152.0, "8655": 970653440.0, "8660": 944484096.0, "8665": 986977728.0, "8670": 960353920.0, "8675": 974610176.0, "8680": 962718976.0, "8685": 956147136.0, "8690": 978612864.0, "8695": 969139072.0, "8700": 973135360.0, "8705": 973914176.0, "8710": 947435776.0, "8715": 973736320.0, "8720": 958622976.0, "8725": 978719488.0, "8730": 985894400.0, "8735": 952583040.0, "8740": 940201728.0, "8745": 987763456.0, "8750": 972207744.0, "8755": 971134720.0, "8760": 965569152.0, "8765": 934519872.0, "8770": 986656640.0, "8775": 969789440.0, "8780": 967920512.0, "8785": 962639488.0, "8790": 947921664.0, "8795": 969775296.0, "8800": 971220608.0, "8805": 973559168.0, "8810": 983161280.0, "8815": 951065856.0, "8820": 939478016.0, "8825": 964494336.0, "8830": 981089472.0, "8835": 971889408.0, "8840": 979835520.0, "8845": 951616384.0, "8850": 987153920.0, "8855": 971335296.0, "8860": 962222080.0, "8865": 957359360.0, "8870": 946242816.0, "8875": 968628096.0, "8880": 984173184.0, "8885": 971110144.0, "8890": 970299648.0, "8895": 952971136.0, "8900": 962246528.0, "8905": 977392000.0, "8910": 981876416.0, "8915": 981149952.0, "8920": 968258432.0, "8925": 940189184.0, "8930": 970787456.0, "8935": 963634560.0, "8940": 978025664.0, "8945": 982356352.0, "8950": 946274176.0, "8955": 972928128.0, "8960": 974032128.0, "8965": 973961216.0, "8970": 966361216.0, "8975": 937321600.0, "8980": 953099648.0, "8985": 977878528.0, "8990": 967166592.0, "8995": 980283904.0, "9000": 952421184.0, "9005": 950292544.0, "9010": 974935552.0, "9015": 982668672.0, "9020": 959278656.0, "9025": 979055040.0, "9030": 953936640.0, "9035": 968749312.0, "9040": 978270080.0, "9045": 968843136.0, "9050": 983417600.0, "9055": 947885952.0, "9060": 956699776.0, "9065": 970246528.0, "9070": 968015744.0, "9075": 981225856.0, "9080": 952541632.0, "9085": 971319168.0, "9090": 963789184.0, "9095": 968313984.0, "9100": 974584320.0, "9105": 960032896.0, "9110": 947321664.0, "9115": 956833728.0, "9120": 985899904.0, "9125": 963026176.0, "9130": 958457216.0, "9135": 951989056.0, "9140": 967565824.0, "9145": 977433728.0, "9150": 987305408.0, "9155": 976649408.0, "9160": 958050816.0, "9165": 950957248.0, "9170": 988702272.0, "9175": 971913280.0, "9180": 967854400.0, "9185": 955127680.0, "9190": 957263744.0, "9195": 966003584.0, "9200": 968856960.0, "9205": 967330048.0, "9210": 984179584.0, "9215": 931743808.0, "9220": 949808960.0, "9225": 971440256.0, "9230": 971281792.0, "9235": 971857152.0, "9240": 959917376.0, "9245": 963584128.0, "9250": 961416384.0, "9255": 983241472.0, "9260": 979566336.0, "9265": 953039104.0, "9270": 949474624.0, "9275": 978502016.0, "9280": 978025536.0, "9285": 962828800.0, "9290": 979390080.0, "9295": 958548480.0, "9300": 965876352.0, "9305": 969599232.0, "9310": 973283008.0, "9315": 976451392.0, "9320": 948304512.0, "9325": 979749696.0, "9330": 977926784.0, "9335": 975525504.0, "9340": 960336000.0, "9345": 943464832.0, "9350": 952835072.0, "9355": 962850048.0, "9360": 960675328.0, "9365": 983816320.0, "9370": 983035904.0, "9375": 942080896.0, "9380": 982540928.0, "9385": 985259136.0, "9390": 973406272.0, "9395": 978528128.0, "9400": 938038400.0, "9405": 968500672.0, "9410": 981791488.0, "9415": 991945472.0, "9420": 960625728.0, "9425": 956681216.0, "9430": 938695808.0, "9435": 974362368.0, "9440": 959727872.0, "9445": 973720576.0, "9450": 961877760.0, "9455": 946303872.0, "9460": 978086272.0, "9465": 988617984.0, "9470": 963615872.0, "9475": 983908608.0, "9480": 930854528.0, "9485": 987221248.0, "9490": 963974912.0, "9495": 972857088.0, "9500": 982392960.0, "9505": 970286080.0, "9510": 964873536.0, "9515": 957183296.0, "9520": 948641664.0, "9525": 965336064.0, "9530": 958567296.0, "9535": 950963840.0, "9540": 954501120.0, "9545": 979935296.0, "9550": 955384704.0, "9555": 953296192.0, "9560": 958726208.0, "9565": 969930112.0, "9570": 977751168.0, "9575": 958849792.0, "9580": 963257728.0, "9585": 946197184.0, "9590": 948135936.0, "9595": 967007808.0, "9600": 985117952.0, "9605": 985499648.0, "9610": 943959808.0, "9615": 952912128.0, "9620": 980920192.0, "9625": 978524736.0, "9630": 969671168.0, "9635": 974868544.0, "9640": 940772416.0, "9645": 962475008.0, "9650": 970857536.0, "9655": 987496960.0, "9660": 963394176.0, "9665": 950327872.0, "9670": 965817856.0, "9675": 963579264.0, "9680": 965384064.0, "9685": 986598272.0, "9690": 940596864.0, "9695": 950521728.0, "9700": 975714688.0, "9705": 972896256.0, "9710": 967299968.0, "9715": 971403392.0, "9720": 940613632.0, "9725": 966514816.0, "9730": 974099584.0, "9735": 974345792.0, "9740": 971516928.0, "9745": 951220736.0, "9750": 979370880.0, "9755": 970170432.0, "9760": 968237888.0, "9765": 963835520.0, "9770": 952652160.0, "9775": 956682880.0, "9780": 970721984.0, "9785": 958959232.0, "9790": 961043072.0, "9795": 958779200.0, "9800": 949918656.0, "9805": 962651200.0, "9810": 979093888.0, "9815": 978146816.0, "9820": 982841088.0, "9825": 939730944.0, "9830": 969614208.0, "9835": 973272832.0, "9840": 971945664.0, "9845": 967603328.0, "9850": 947232896.0, "9855": 956896512.0, "9860": 987801728.0, "9865": 970385664.0, "9870": 990310144.0, "9875": 957380096.0, "9880": 931362176.0, "9885": 963678464.0, "9890": 972811648.0, "9895": 984054016.0, "9900": 956595136.0, "9905": 939303808.0, "9910": 979107072.0, "9915": 973996800.0, "9920": 943946432.0, "9925": 963187328.0, "9930": 948020224.0, "9935": 960573120.0, "9940": 965856512.0, "9945": 958998016.0, "9950": 964584192.0, "9955": 943733120.0, "9960": 966844160.0, "9965": 983732096.0, "9970": 966840192.0, "9975": 964040640.0, "9980": 980881024.0, "9985": 942746240.0, "9990": 976134400.0, "9995": 982950848.0, "10000": 972073152.0, "10005": 970193472.0, "10010": 944380480.0, "10015": 983265344.0, "10020": 977865472.0, "10025": 979868544.0, "10030": 971490816.0, "10035": 946263296.0, "10040": 950534016.0, "10045": 977546880.0, "10050": 986017280.0, "10055": 990492800.0, "10060": 958996032.0, "10065": 947517312.0, "10070": 966895616.0, "10075": 979683904.0, "10080": 971953920.0, "10085": 974879744.0, "10090": 944216960.0, "10095": 962977344.0, "10100": 972381952.0, "10105": 976354432.0, "10110": 972128768.0, "10115": 948919680.0, "10120": 962852480.0, "10125": 974293120.0, "10130": 980737472.0, "10135": 972335104.0, "10140": 957843264.0, "10145": 934671872.0, "10150": 973965568.0, "10155": 970306112.0, "10160": 962491456.0, "10165": 975341248.0, "10170": 944624384.0, "10175": 979643712.0, "10180": 984008448.0, "10185": 978870144.0, "10190": 955877376.0, "10195": 937261120.0, "10200": 988253760.0, "10205": 973401856.0, "10210": 966901120.0, "10215": 976049664.0, "10220": 948799872.0, "10225": 950572096.0, "10230": 976120896.0, "10235": 954421632.0, "10240": 969850752.0, "10245": 962265472.0, "10250": 936756480.0, "10255": 979774976.0, "10260": 965000704.0, "10265": 967563712.0, "10270": 969297920.0, "10275": 935944256.0, "10280": 969526272.0, "10285": 996465152.0, "10290": 979762816.0, "10295": 981662912.0, "10300": 952271936.0, "10305": 972024256.0, "10310": 960359872.0, "10315": 971605760.0, "10320": 985354304.0, "10325": 983302336.0, "10330": 935148288.0, "10335": 976392064.0, "10340": 957603840.0, "10345": 973044352.0, "10350": 984707136.0, "10355": 942479296.0, "10360": 962279040.0, "10365": 973641856.0, "10370": 980432768.0, "10375": 970343296.0, "10380": 962080384.0, "10385": 955687296.0, "10390": 990783104.0, "10395": 965164608.0, "10400": 960470208.0, "10405": 950214848.0, "10410": 955491392.0, "10415": 975924736.0, "10420": 967248320.0, "10425": 969875328.0, "10430": 965126272.0, "10435": 962680768.0, "10440": 972024064.0, "10445": 972467456.0, "10450": 974949504.0, "10455": 965864704.0, "10460": 948726272.0, "10465": 971534464.0, "10470": 972756736.0, "10475": 979392128.0, "10480": 997292352.0, "10485": 949631936.0, "10490": 935104896.0, "10495": 969599424.0, "10500": 978688704.0, "10505": 959342784.0, "10510": 951008000.0, "10515": 954223744.0, "10520": 972150016.0, "10525": 969942528.0, "10530": 970425728.0, "10535": 986576256.0, "10540": 946829632.0, "10545": 970484032.0, "10550": 969371968.0, "10555": 959521856.0, "10560": 976274496.0, "10565": 960798208.0, "10570": 968688128.0, "10575": 973272576.0, "10580": 961017472.0, "10585": 973457024.0, "10590": 952053568.0, "10595": 956331776.0, "10600": 967935552.0, "10605": 986576256.0, "10610": 966417408.0, "10615": 976957568.0, "10620": 940933888.0, "10625": 965306432.0, "10630": 968022272.0, "10635": 973333888.0, "10640": 974664448.0, "10645": 948582400.0, "10650": 966388224.0, "10655": 985562624.0, "10660": 976682624.0, "10665": 967088256.0, "10670": 955226368.0, "10675": 934529920.0, "10680": 986153344.0, "10685": 991102656.0, "10690": 963886208.0, "10695": 971933632.0, "10700": 950091520.0, "10705": 978240128.0, "10710": 968317184.0, "10715": 967450432.0, "10720": 966357824.0, "10725": 944490816.0, "10730": 980318592.0, "10735": 961117952.0, "10740": 971283392.0, "10745": 984630528.0, "10750": 981762816.0, "10755": 945191296.0, "10760": 969882304.0, "10765": 972886400.0, "10770": 974268608.0, "10775": 959067392.0, "10780": 949520384.0, "10785": 953706304.0, "10790": 970157568.0, "10795": 960631552.0, "10800": 972050368.0, "10805": 951460864.0, "10810": 974235456.0, "10815": 959804160.0, "10820": 971302656.0, "10825": 967211072.0, "10830": 957016128.0, "10835": 963139136.0, "10840": 971035008.0, "10845": 964268160.0, "10850": 958162432.0, "10855": 967657344.0, "10860": 950849536.0, "10865": 964061696.0, "10870": 983627200.0, "10875": 982016640.0, "10880": 958659648.0, "10885": 954981888.0, "10890": 973122560.0, "10895": 973655744.0, "10900": 970546048.0, "10905": 965184256.0, "10910": 939048192.0, "10915": 960749824.0, "10920": 983653376.0, "10925": 970068160.0, "10930": 968771200.0, "10935": 963228480.0, "10940": 954249408.0, "10945": 964532608.0, "10950": 972466880.0, "10955": 966621248.0, "10960": 972285056.0, "10965": 966333184.0, "10970": 983572160.0, "10975": 965330496.0, "10980": 974669248.0, "10985": 986818496.0, "10990": 950797760.0, "10995": 963598784.0, "11000": 985495104.0, "11005": 978671168.0, "11010": 971614464.0, "11015": 970071232.0, "11020": 948195648.0, "11025": 960105088.0, "11030": 978168768.0, "11035": 976017024.0, "11040": 986523264.0, "11045": 956708480.0, "11050": 973395968.0, "11055": 974051968.0, "11060": 962164544.0, "11065": 985712768.0, "11070": 949791424.0, "11075": 976565888.0, "11080": 972315712.0, "11085": 967328576.0, "11090": 976399296.0, "11095": 946696448.0, "11100": 966199040.0, "11105": 974421504.0, "11110": 981198912.0, "11115": 968108160.0, "11120": 957518656.0, "11125": 956979840.0, "11130": 975786432.0, "11135": 979636544.0, "11140": 964944832.0, "11145": 966499008.0, "11150": 935518400.0, "11155": 976579008.0, "11160": 984367232.0, "11165": 982289792.0, "11170": 978113472.0, "11175": 958084864.0, "11180": 962589888.0, "11185": 972260672.0, "11190": 979666368.0, "11195": 985502784.0, "11200": 983014336.0, "11205": 942426240.0, "11210": 984802368.0, "11215": 967690816.0, "11220": 983476928.0, "11225": 961985728.0, "11230": 953398272.0, "11235": 981841280.0, "11240": 977805568.0, "11245": 966530176.0, "11250": 969466304.0, "11255": 960572544.0, "11260": 980096576.0, "11265": 963926720.0, "11270": 981695936.0, "11275": 968525888.0, "11280": 955905088.0, "11285": 953700224.0, "11290": 956489152.0, "11295": 968197568.0, "11300": 962513216.0, "11305": 958759872.0, "11310": 946304256.0, "11315": 983036096.0, "11320": 964828480.0, "11325": 980906304.0, "11330": 975476608.0, "11335": 952186816.0, "11340": 970596800.0, "11345": 969926080.0, "11350": 981628736.0, "11355": 981905088.0, "11360": 940723328.0, "11365": 970750592.0, "11370": 978978432.0, "11375": 975338432.0, "11380": 968256960.0, "11385": 958096384.0, "11390": 937948288.0, "11395": 977494080.0, "11400": 973515520.0, "11405": 961359424.0, "11410": 966143616.0, "11415": 929202368.0, "11420": 964768960.0, "11425": 981196352.0, "11430": 978636864.0, "11435": 970153280.0, "11440": 945072704.0, "11445": 975241024.0, "11450": 984735296.0, "11455": 971426176.0, "11460": 965182016.0, "11465": 960090176.0, "11470": 955191296.0, "11475": 972691072.0, "11480": 956542272.0, "11485": 977076864.0, "11490": 986332352.0, "11495": 959121344.0, "11500": 969424704.0, "11505": 964024640.0, "11510": 976702848.0, "11515": 977904064.0, "11520": 953963584.0, "11525": 976039360.0, "11530": 976686784.0, "11535": 979809792.0, "11540": 974141760.0, "11545": 953644288.0, "11550": 953295552.0, "11555": 981560640.0, "11560": 984532352.0, "11565": 965181312.0, "11570": 966487424.0, "11575": 950896832.0, "11580": 976062592.0, "11585": 977550784.0, "11590": 969314368.0, "11595": 976719232.0, "11600": 946047104.0, "11605": 973359168.0, "11610": 982457984.0, "11615": 972010048.0, "11620": 969363904.0, "11625": 949111040.0, "11630": 937578176.0, "11635": 973667008.0, "11640": 981259456.0, "11645": 980106048.0, "11650": 971758144.0, "11655": 956204288.0, "11660": 980712192.0, "11665": 958265664.0, "11670": 982618880.0, "11675": 972427200.0, "11680": 956445568.0, "11685": 982955712.0, "11690": 968614528.0, "11695": 968085632.0, "11700": 973819008.0, "11705": 956432640.0, "11710": 964584640.0, "11715": 983208448.0, "11720": 983636224.0, "11725": 965204032.0, "11730": 955695040.0, "11735": 942941376.0, "11740": 973829824.0, "11745": 971260672.0, "11750": 961624256.0, "11755": 963534976.0, "11760": 950291904.0, "11765": 983877632.0, "11770": 984810368.0, "11775": 975671936.0, "11780": 985430336.0, "11785": 947272512.0, "11790": 972444352.0, "11795": 970670464.0, "11800": 973251520.0, "11805": 986780480.0, "11810": 967591808.0, "11815": 955862848.0, "11820": 973765952.0, "11825": 970671296.0, "11830": 974883776.0, "11835": 961826368.0, "11840": 944467904.0, "11845": 980681344.0, "11850": 974601536.0, "11855": 977943744.0, "11860": 971600192.0, "11865": 938631104.0, "11870": 940150208.0, "11875": 990036736.0, "11880": 971605184.0, "11885": 962938432.0, "11890": 970330560.0, "11895": 965356416.0, "11900": 978791360.0, "11905": 961690240.0, "11910": 983653824.0, "11915": 989882688.0, "11920": 944918016.0, "11925": 994335296.0, "11930": 964853504.0, "11935": 963527104.0, "11940": 977089344.0, "11945": 944859968.0, "11950": 977822912.0, "11955": 979190400.0, "11960": 972201664.0, "11965": 976359488.0, "11970": 963048064.0, "11975": 963239936.0, "11980": 977609536.0, "11985": 953135424.0, "11990": 968692864.0, "11995": 965206144.0, "12000": 958650816.0, "12005": 974558720.0, "12010": 979488320.0, "12015": 972212928.0, "12020": 972972992.0, "12025": 934714048.0, "12030": 969111104.0, "12035": 984089664.0, "12040": 977507648.0, "12045": 981645056.0, "12050": 931286400.0, "12055": 938790208.0, "12060": 974349248.0, "12065": 965845696.0, "12070": 968239104.0, "12075": 949940224.0, "12080": 953231552.0, "12085": 972342592.0, "12090": 964042304.0, "12095": 963552832.0, "12100": 976701248.0, "12105": 950225216.0, "12110": 972205568.0, "12115": 968115136.0, "12120": 986359296.0, "12125": 980675264.0, "12130": 941589504.0, "12135": 955591040.0, "12140": 975756032.0, "12145": 979390528.0, "12150": 979435776.0, "12155": 961767936.0, "12160": 946323264.0, "12165": 968612864.0, "12170": 964129152.0, "12175": 967543936.0, "12180": 974942848.0, "12185": 952774592.0, "12190": 988261760.0, "12195": 970261312.0, "12200": 964981312.0, "12205": 968767232.0, "12210": 939144320.0, "12215": 996663488.0, "12220": 970037696.0, "12225": 979618880.0, "12230": 980198144.0, "12235": 950224576.0, "12240": 963441344.0, "12245": 965754240.0, "12250": 976793792.0, "12255": 968140288.0, "12260": 983346688.0, "12265": 931758592.0, "12270": 966457472.0, "12275": 979665408.0, "12280": 977629696.0, "12285": 970432320.0, "12290": 929095296.0, "12295": 976750144.0, "12300": 985683008.0, "12305": 969918016.0, "12310": 986105792.0, "12315": 936237952.0, "12320": 957828032.0, "12325": 966696000.0, "12330": 968270016.0, "12335": 963800896.0, "12340": 957433344.0, "12345": 944247872.0, "12350": 966701760.0, "12355": 975709440.0, "12360": 978832832.0, "12365": 964561280.0, "12370": 948990016.0, "12375": 963593536.0, "12380": 964789056.0, "12385": 973126912.0, "12390": 961501056.0, "12395": 961629824.0, "12400": 975307712.0, "12405": 976388032.0, "12410": 953154688.0, "12415": 962821184.0, "12420": 943854144.0, "12425": 948976640.0, "12430": 972157696.0, "12435": 969004352.0, "12440": 961893696.0, "12445": 951751744.0, "12450": 947021888.0, "12455": 981248448.0, "12460": 973989696.0, "12465": 954462784.0, "12470": 981144320.0, "12475": 958569728.0, "12480": 967084864.0, "12485": 978267776.0, "12490": 974168192.0, "12495": 969692160.0, "12500": 961573632.0, "12505": 943534528.0, "12510": 961013504.0, "12515": 969566080.0, "12520": 974194304.0, "12525": 972107840.0, "12530": 944608640.0, "12535": 976414272.0, "12540": 965917440.0, "12545": 972033856.0, "12550": 969671552.0, "12555": 941300736.0, "12560": 964469312.0, "12565": 947620608.0, "12570": 974443840.0, "12575": 963055232.0, "12580": 958104128.0, "12585": 964300352.0, "12590": 965924288.0, "12595": 978732672.0, "12600": 982093952.0, "12605": 949125824.0, "12610": 937745344.0, "12615": 962779264.0, "12620": 961201664.0, "12625": 966531136.0, "12630": 971046272.0, "12635": 962236416.0, "12640": 978541696.0, "12645": 969369920.0, "12650": 970161664.0, "12655": 964217216.0, "12660": 932390336.0, "12665": 956865664.0, "12670": 986180352.0, "12675": 965566464.0, "12680": 961126528.0, "12685": 951304256.0, "12690": 945491456.0, "12695": 978387648.0, "12700": 985277888.0, "12705": 958784640.0, "12710": 968294144.0, "12715": 956280512.0, "12720": "nan", "12725": "nan", "12730": "nan", "12735": "nan", "12740": "nan", "12745": "nan", "12750": "nan", "12755": "nan", "12760": "nan", "12765": "nan", "12770": "nan", "12775": "nan", "12780": "nan", "12785": "nan", "12790": "nan", "12795": "nan", "12800": "nan", "12805": "nan", "12810": "nan", "12815": "nan", "12820": "nan", "12825": "nan", "12830": "nan", "12835": "nan", "12840": "nan", "12845": "nan", "12850": "nan", "12855": "nan", "12860": "nan", "12865": "nan", "12870": "nan", "12875": "nan", "12880": "nan", "12885": "nan", "12890": "nan", "12895": "nan", "12900": "nan", "12905": "nan", "12910": "nan", "12915": "nan", "12920": "nan", "12925": "nan", "12930": "nan", "12935": "nan", "12940": "nan", "12945": "nan", "12950": "nan", "12955": "nan", "12960": "nan", "12965": "nan", "12970": "nan", "12975": "nan", "12980": "nan", "12985": "nan", "12990": "nan", "12995": "nan", "13000": "nan"}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 13000, "step_interval": 5, "values": {"1": 12795811840.0, "5": 12795811840.0, "10": 12795811840.0, "15": 12795811840.0, "20": 12795811840.0, "25": 12795811840.0, "30": 12795811840.0, "35": 12795811840.0, "40": 12795811840.0, "45": 12795811840.0, "50": 12795811840.0, "55": 12795811840.0, "60": 12795811840.0, "65": 12795811840.0, "70": 12795811840.0, "75": 12795811840.0, "80": 12795811840.0, "85": 12795811840.0, "90": 12795811840.0, "95": 12795811840.0, "100": 12795811840.0, "105": 12795811840.0, "110": 12795811840.0, "115": 12795811840.0, "120": 12795811840.0, "125": 12795811840.0, "130": 12795811840.0, "135": 12795811840.0, "140": 12795811840.0, "145": 12795811840.0, "150": 12795811840.0, "155": 12795811840.0, "160": 12795811840.0, "165": 12795811840.0, "170": 12795811840.0, "175": 12795811840.0, "180": 12795811840.0, "185": 12795811840.0, "190": 12795811840.0, "195": 12795811840.0, "200": 12795811840.0, "205": 12795811840.0, "210": 12795811840.0, "215": 12795811840.0, "220": 12795811840.0, "225": 12795811840.0, "230": 12795811840.0, "235": 12795811840.0, "240": 12795811840.0, "245": 12795811840.0, "250": 12795811840.0, "255": 12795811840.0, "260": 12795811840.0, "265": 12795811840.0, "270": 12795811840.0, "275": 12795811840.0, "280": 12795811840.0, "285": 12795811840.0, "290": 12795811840.0, "295": 12795811840.0, "300": 12795811840.0, "305": 12795811840.0, "310": 12795811840.0, "315": 12795811840.0, "320": 12795811840.0, "325": 12795811840.0, "330": 12795811840.0, "335": 12795811840.0, "340": 12795811840.0, "345": 12795811840.0, "350": 12795811840.0, "355": 12795811840.0, "360": 12795811840.0, "365": 12795811840.0, "370": 12795811840.0, "375": 12795811840.0, "380": 12795811840.0, "385": 12795811840.0, "390": 12795811840.0, "395": 12795811840.0, "400": 12795811840.0, "405": 12795811840.0, "410": 12795811840.0, "415": 12795811840.0, "420": 12795811840.0, "425": 12795811840.0, "430": 12795811840.0, "435": 12795811840.0, "440": 12795811840.0, "445": 12795811840.0, "450": 12795811840.0, "455": 12795811840.0, "460": 12795811840.0, "465": 12795811840.0, "470": 12795811840.0, "475": 12795811840.0, "480": 12795811840.0, "485": 12795811840.0, "490": 12795811840.0, "495": 12795811840.0, "500": 12795811840.0, "505": 12795811840.0, "510": 12795811840.0, "515": 12795811840.0, "520": 12795811840.0, "525": 12795811840.0, "530": 12795811840.0, "535": 12795811840.0, "540": 12795811840.0, "545": 12795811840.0, "550": 12795811840.0, "555": 12795811840.0, "560": 12795811840.0, "565": 12795811840.0, "570": 12795811840.0, "575": 12795811840.0, "580": 12795811840.0, "585": 12795811840.0, "590": 12795811840.0, "595": 12795811840.0, "600": 12795811840.0, "605": 12795811840.0, "610": 12795811840.0, "615": 12795811840.0, "620": 12795811840.0, "625": 12795811840.0, "630": 12795811840.0, "635": 12795811840.0, "640": 12795811840.0, "645": 12795811840.0, "650": 12795811840.0, "655": 12795811840.0, "660": 12795811840.0, "665": 12795811840.0, "670": 12795811840.0, "675": 12795811840.0, "680": 12795811840.0, "685": 12795811840.0, "690": 12795811840.0, "695": 12795811840.0, "700": 12795811840.0, "705": 12795811840.0, "710": 12795811840.0, "715": 12795811840.0, "720": 12795811840.0, "725": 12795811840.0, "730": 12795811840.0, "735": 12795811840.0, "740": 12795811840.0, "745": 12795811840.0, "750": 12795811840.0, "755": 12795811840.0, "760": 12795811840.0, "765": 12795811840.0, "770": 12795811840.0, "775": 12795811840.0, "780": 12795811840.0, "785": 12795811840.0, "790": 12795811840.0, "795": 12795811840.0, "800": 12795811840.0, "805": 12795811840.0, "810": 12795811840.0, "815": 12795811840.0, "820": 12795811840.0, "825": 12795811840.0, "830": 12795811840.0, "835": 12795811840.0, "840": 12795811840.0, "845": 12795811840.0, "850": 12795811840.0, "855": 12795811840.0, "860": 12795811840.0, "865": 12795811840.0, "870": 12795811840.0, "875": 12795811840.0, "880": 12795811840.0, "885": 12795811840.0, "890": 12795811840.0, "895": 12795811840.0, "900": 12795811840.0, "905": 12795811840.0, "910": 12795811840.0, "915": 12795811840.0, "920": 12795811840.0, "925": 12795811840.0, "930": 12795811840.0, "935": 12795811840.0, "940": 12795811840.0, "945": 12795811840.0, "950": 12795811840.0, "955": 12795811840.0, "960": 12795811840.0, "965": 12795811840.0, "970": 12795811840.0, "975": 12795811840.0, "980": 12795811840.0, "985": 12795811840.0, "990": 12795811840.0, "995": 12795811840.0, "1000": 12795811840.0, "1005": 12795811840.0, "1010": 12795811840.0, "1015": 12795811840.0, "1020": 12795811840.0, "1025": 12795811840.0, "1030": 12795811840.0, "1035": 12795811840.0, "1040": 12795811840.0, "1045": 12795811840.0, "1050": 12795811840.0, "1055": 12795811840.0, "1060": 12795811840.0, "1065": 12795811840.0, "1070": 12795811840.0, "1075": 12795811840.0, "1080": 12795811840.0, "1085": 12795811840.0, "1090": 12795811840.0, "1095": 12795811840.0, "1100": 12795811840.0, "1105": 12795811840.0, "1110": 12795811840.0, "1115": 12795811840.0, "1120": 12795811840.0, "1125": 12795811840.0, "1130": 12795811840.0, "1135": 12795811840.0, "1140": 12795811840.0, "1145": 12795811840.0, "1150": 12795811840.0, "1155": 12795811840.0, "1160": 12795811840.0, "1165": 12795811840.0, "1170": 12795811840.0, "1175": 12795811840.0, "1180": 12795811840.0, "1185": 12795811840.0, "1190": 12795811840.0, "1195": 12795811840.0, "1200": 12795811840.0, "1205": 12795811840.0, "1210": 12795811840.0, "1215": 12795811840.0, "1220": 12795811840.0, "1225": 12795811840.0, "1230": 12795811840.0, "1235": 12795811840.0, "1240": 12795811840.0, "1245": 12795811840.0, "1250": 12795811840.0, "1255": 12795811840.0, "1260": 12795811840.0, "1265": 12795811840.0, "1270": 12795811840.0, "1275": 12795811840.0, "1280": 12795811840.0, "1285": 12795811840.0, "1290": 12795811840.0, "1295": 12795811840.0, "1300": 12795811840.0, "1305": 12795811840.0, "1310": 12795811840.0, "1315": 12795811840.0, "1320": 12795811840.0, "1325": 12795811840.0, "1330": 12795811840.0, "1335": 12795811840.0, "1340": 12795811840.0, "1345": 12795811840.0, "1350": 12795811840.0, "1355": 12795811840.0, "1360": 12795811840.0, "1365": 12795811840.0, "1370": 12795811840.0, "1375": 12795811840.0, "1380": 12795811840.0, "1385": 12795811840.0, "1390": 12795811840.0, "1395": 12795811840.0, "1400": 12795811840.0, "1405": 12795811840.0, "1410": 12795811840.0, "1415": 12795811840.0, "1420": 12795811840.0, "1425": 12795811840.0, "1430": 12795811840.0, "1435": 12795811840.0, "1440": 12795811840.0, "1445": 12795811840.0, "1450": 12795811840.0, "1455": 12795811840.0, "1460": 12795811840.0, "1465": 12795811840.0, "1470": 12795811840.0, "1475": 12795811840.0, "1480": 12795811840.0, "1485": 12795811840.0, "1490": 12795811840.0, "1495": 12795811840.0, "1500": 12795811840.0, "1505": 12795811840.0, "1510": 12795811840.0, "1515": 12795811840.0, "1520": 12795811840.0, "1525": 12795811840.0, "1530": 12795811840.0, "1535": 12795811840.0, "1540": 12795811840.0, "1545": 12795811840.0, "1550": 12795811840.0, "1555": 12795811840.0, "1560": 12795811840.0, "1565": 12795811840.0, "1570": 12795811840.0, "1575": 12795811840.0, "1580": 12795811840.0, "1585": 12795811840.0, "1590": 12795811840.0, "1595": 12795811840.0, "1600": 12795811840.0, "1605": 12795811840.0, "1610": 12795811840.0, "1615": 12795811840.0, "1620": 12795811840.0, "1625": 12795811840.0, "1630": 12795811840.0, "1635": 12795811840.0, "1640": 12795811840.0, "1645": 12795811840.0, "1650": 12795811840.0, "1655": 12795811840.0, "1660": 12795811840.0, "1665": 12795811840.0, "1670": 12795811840.0, "1675": 12795811840.0, "1680": 12795811840.0, "1685": 12795811840.0, "1690": 12795811840.0, "1695": 12795811840.0, "1700": 12795811840.0, "1705": 12795811840.0, "1710": 12795811840.0, "1715": 12795811840.0, "1720": 12795811840.0, "1725": 12795811840.0, "1730": 12795811840.0, "1735": 12795811840.0, "1740": 12795811840.0, "1745": 12795811840.0, "1750": 12795811840.0, "1755": 12795811840.0, "1760": 12795811840.0, "1765": 12795811840.0, "1770": 12795811840.0, "1775": 12795811840.0, "1780": 12795811840.0, "1785": 12795811840.0, "1790": 12795811840.0, "1795": 12795811840.0, "1800": 12795811840.0, "1805": 12795811840.0, "1810": 12795811840.0, "1815": 12795811840.0, "1820": 12795811840.0, "1825": 12795811840.0, "1830": 12795811840.0, "1835": 12795811840.0, "1840": 12795811840.0, "1845": 12795811840.0, "1850": 12795811840.0, "1855": 12795811840.0, "1860": 12795811840.0, "1865": 12795811840.0, "1870": 12795811840.0, "1875": 12795811840.0, "1880": 12795811840.0, "1885": 12795811840.0, "1890": 12795811840.0, "1895": 12795811840.0, "1900": 12795811840.0, "1905": 12795811840.0, "1910": 12795811840.0, "1915": 12795811840.0, "1920": 12795811840.0, "1925": 12795811840.0, "1930": 12795811840.0, "1935": 12795811840.0, "1940": 12795811840.0, "1945": 12795811840.0, "1950": 12795811840.0, "1955": 12795811840.0, "1960": 12795811840.0, "1965": 12795811840.0, "1970": 12795811840.0, "1975": 12795811840.0, "1980": 12795811840.0, "1985": 12795811840.0, "1990": 12795811840.0, "1995": 12795811840.0, "2000": 12795811840.0, "2005": 12795811840.0, "2010": 12795811840.0, "2015": 12795811840.0, "2020": 12795811840.0, "2025": 12795811840.0, "2030": 12795811840.0, "2035": 12795811840.0, "2040": 12795811840.0, "2045": 12795811840.0, "2050": 12795811840.0, "2055": 12795811840.0, "2060": 12795811840.0, "2065": 12795811840.0, "2070": 12795811840.0, "2075": 12795811840.0, "2080": 12795811840.0, "2085": 12795811840.0, "2090": 12795811840.0, "2095": 12795811840.0, "2100": 12795811840.0, "2105": 12795811840.0, "2110": 12795811840.0, "2115": 12795811840.0, "2120": 12795811840.0, "2125": 12795811840.0, "2130": 12795811840.0, "2135": 12795811840.0, "2140": 12795811840.0, "2145": 12795811840.0, "2150": 12795811840.0, "2155": 12795811840.0, "2160": 12795811840.0, "2165": 12795811840.0, "2170": 12795811840.0, "2175": 12795811840.0, "2180": 12795811840.0, "2185": 12795811840.0, "2190": 12795811840.0, "2195": 12795811840.0, "2200": 12795811840.0, "2205": 12795811840.0, "2210": 12795811840.0, "2215": 12795811840.0, "2220": 12795811840.0, "2225": 12795811840.0, "2230": 12795811840.0, "2235": 12795811840.0, "2240": 12795811840.0, "2245": 12795811840.0, "2250": 12795811840.0, "2255": 12795811840.0, "2260": 12795811840.0, "2265": 12795811840.0, "2270": 12795811840.0, "2275": 12795811840.0, "2280": 12795811840.0, "2285": 12795811840.0, "2290": 12795811840.0, "2295": 12795811840.0, "2300": 12795811840.0, "2305": 12795811840.0, "2310": 12795811840.0, "2315": 12795811840.0, "2320": 12795811840.0, "2325": 12795811840.0, "2330": 12795811840.0, "2335": 12795811840.0, "2340": 12795811840.0, "2345": 12795811840.0, "2350": 12795811840.0, "2355": 12795811840.0, "2360": 12795811840.0, "2365": 12795811840.0, "2370": 12795811840.0, "2375": 12795811840.0, "2380": 12795811840.0, "2385": 12795811840.0, "2390": 12795811840.0, "2395": 12795811840.0, "2400": 12795811840.0, "2405": 12795811840.0, "2410": 12795811840.0, "2415": 12795811840.0, "2420": 12795811840.0, "2425": 12795811840.0, "2430": 12795811840.0, "2435": 12795811840.0, "2440": 12795811840.0, "2445": 12795811840.0, "2450": 12795811840.0, "2455": 12795811840.0, "2460": 12795811840.0, "2465": 12795811840.0, "2470": 12795811840.0, "2475": 12795811840.0, "2480": 12795811840.0, "2485": 12795811840.0, "2490": 12795811840.0, "2495": 12795811840.0, "2500": 12795811840.0, "2505": 12795811840.0, "2510": 12795811840.0, "2515": 12795811840.0, "2520": 12795811840.0, "2525": 12795811840.0, "2530": 12795811840.0, "2535": 12795811840.0, "2540": 12795811840.0, "2545": 12795811840.0, "2550": 12795811840.0, "2555": 12795811840.0, "2560": 12795811840.0, "2565": 12795811840.0, "2570": 12795811840.0, "2575": 12795811840.0, "2580": 12795811840.0, "2585": 12795811840.0, "2590": 12795811840.0, "2595": 12795811840.0, "2600": 12795811840.0, "2605": 12795811840.0, "2610": 12795811840.0, "2615": 12795811840.0, "2620": 12795811840.0, "2625": 12795811840.0, "2630": 12795811840.0, "2635": 12795811840.0, "2640": 12795811840.0, "2645": 12795811840.0, "2650": 12795811840.0, "2655": 12795811840.0, "2660": 12795811840.0, "2665": 12795811840.0, "2670": 12795811840.0, "2675": 12795811840.0, "2680": 12795811840.0, "2685": 12795811840.0, "2690": 12795811840.0, "2695": 12795811840.0, "2700": 12795811840.0, "2705": 12795811840.0, "2710": 12795811840.0, "2715": 12795811840.0, "2720": 12795811840.0, "2725": 12795811840.0, "2730": 12795811840.0, "2735": 12795811840.0, "2740": 12795811840.0, "2745": 12795811840.0, "2750": 12795811840.0, "2755": 12795811840.0, "2760": 12795811840.0, "2765": 12795811840.0, "2770": 12795811840.0, "2775": 12795811840.0, "2780": 12795811840.0, "2785": 12795811840.0, "2790": 12795811840.0, "2795": 12795811840.0, "2800": 12795811840.0, "2805": 12795811840.0, "2810": 12795811840.0, "2815": 12795811840.0, "2820": 12795811840.0, "2825": 12795811840.0, "2830": 12795811840.0, "2835": 12795811840.0, "2840": 12795811840.0, "2845": 12795811840.0, "2850": 12795811840.0, "2855": 12795811840.0, "2860": 12795811840.0, "2865": 12795811840.0, "2870": 12795811840.0, "2875": 12795811840.0, "2880": 12795811840.0, "2885": 12795811840.0, "2890": 12795811840.0, "2895": 12795811840.0, "2900": 12795811840.0, "2905": 12795811840.0, "2910": 12795811840.0, "2915": 12795811840.0, "2920": 12795811840.0, "2925": 12795811840.0, "2930": 12795811840.0, "2935": 12795811840.0, "2940": 12795811840.0, "2945": 12795811840.0, "2950": 12795811840.0, "2955": 12795811840.0, "2960": 12795811840.0, "2965": 12795811840.0, "2970": 12795811840.0, "2975": 12795811840.0, "2980": 12795811840.0, "2985": 12795811840.0, "2990": 12795811840.0, "2995": 12795811840.0, "3000": 12795811840.0, "3005": 12795811840.0, "3010": 12795811840.0, "3015": 12795811840.0, "3020": 12795811840.0, "3025": 12795811840.0, "3030": 12795811840.0, "3035": 12795811840.0, "3040": 12795811840.0, "3045": 12795811840.0, "3050": 12795811840.0, "3055": 12795811840.0, "3060": 12795811840.0, "3065": 12795811840.0, "3070": 12795811840.0, "3075": 12795811840.0, "3080": 12795811840.0, "3085": 12795811840.0, "3090": 12795811840.0, "3095": 12795811840.0, "3100": 12795811840.0, "3105": 12795811840.0, "3110": 12795811840.0, "3115": 12795811840.0, "3120": 12795811840.0, "3125": 12795811840.0, "3130": 12795811840.0, "3135": 12795811840.0, "3140": 12795811840.0, "3145": 12795811840.0, "3150": 12795811840.0, "3155": 12795811840.0, "3160": 12795811840.0, "3165": 12795811840.0, "3170": 12795811840.0, "3175": 12795811840.0, "3180": 12795811840.0, "3185": 12795811840.0, "3190": 12795811840.0, "3195": 12795811840.0, "3200": 12795811840.0, "3205": 12795811840.0, "3210": 12795811840.0, "3215": 12795811840.0, "3220": 12795811840.0, "3225": 12795811840.0, "3230": 12795811840.0, "3235": 12795811840.0, "3240": 12795811840.0, "3245": 12795811840.0, "3250": 12795811840.0, "3255": 12795811840.0, "3260": 12795811840.0, "3265": 12795811840.0, "3270": 12795811840.0, "3275": 12795811840.0, "3280": 12795811840.0, "3285": 12795811840.0, "3290": 12795811840.0, "3295": 12795811840.0, "3300": 12795811840.0, "3305": 12795811840.0, "3310": 12795811840.0, "3315": 12795811840.0, "3320": 12795811840.0, "3325": 12795811840.0, "3330": 12795811840.0, "3335": 12795811840.0, "3340": 12795811840.0, "3345": 12795811840.0, "3350": 12795811840.0, "3355": 12795811840.0, "3360": 12795811840.0, "3365": 12795811840.0, "3370": 12795811840.0, "3375": 12795811840.0, "3380": 12795811840.0, "3385": 12795811840.0, "3390": 12795811840.0, "3395": 12795811840.0, "3400": 12795811840.0, "3405": 12795811840.0, "3410": 12795811840.0, "3415": 12795811840.0, "3420": 12795811840.0, "3425": 12795811840.0, "3430": 12795811840.0, "3435": 12795811840.0, "3440": 12795811840.0, "3445": 12795811840.0, "3450": 12795811840.0, "3455": 12795811840.0, "3460": 12795811840.0, "3465": 12795811840.0, "3470": 12795811840.0, "3475": 12795811840.0, "3480": 12795811840.0, "3485": 12795811840.0, "3490": 12795811840.0, "3495": 12795811840.0, "3500": 12795811840.0, "3505": 12795811840.0, "3510": 12795811840.0, "3515": 12795811840.0, "3520": 12795811840.0, "3525": 12795811840.0, "3530": 12795811840.0, "3535": 12795811840.0, "3540": 12795811840.0, "3545": 12795811840.0, "3550": 12795811840.0, "3555": 12795811840.0, "3560": 12795811840.0, "3565": 12795811840.0, "3570": 12795789312.0, "3575": 12795789312.0, "3580": 12795789312.0, "3585": 12795789312.0, "3590": 12795789312.0, "3595": 12795789312.0, "3600": 12795789312.0, "3605": 12795789312.0, "3610": 12795789312.0, "3615": 12795789312.0, "3620": 12795789312.0, "3625": 12795789312.0, "3630": 12795789312.0, "3635": 12795789312.0, "3640": 12795789312.0, "3645": 12795789312.0, "3650": 12795789312.0, "3655": 12795789312.0, "3660": 12795789312.0, "3665": 12795789312.0, "3670": 12795789312.0, "3675": 12795789312.0, "3680": 12795789312.0, "3685": 12795789312.0, "3690": 12795789312.0, "3695": 12795789312.0, "3700": 12795789312.0, "3705": 12795789312.0, "3710": 12795789312.0, "3715": 12795789312.0, "3720": 12795789312.0, "3725": 12795789312.0, "3730": 12795789312.0, "3735": 12795789312.0, "3740": 12795789312.0, "3745": 12795789312.0, "3750": 12795789312.0, "3755": 12795789312.0, "3760": 12795789312.0, "3765": 12795789312.0, "3770": 12795789312.0, "3775": 12795789312.0, "3780": 12795789312.0, "3785": 12795789312.0, "3790": 12795789312.0, "3795": 12795789312.0, "3800": 12795789312.0, "3805": 12795789312.0, "3810": 12795789312.0, "3815": 12795789312.0, "3820": 12795789312.0, "3825": 12795789312.0, "3830": 12795789312.0, "3835": 12795789312.0, "3840": 12795789312.0, "3845": 12795789312.0, "3850": 12795789312.0, "3855": 12795789312.0, "3860": 12795789312.0, "3865": 12795789312.0, "3870": 12795789312.0, "3875": 12795789312.0, "3880": 12795789312.0, "3885": 12795789312.0, "3890": 12795789312.0, "3895": 12795789312.0, "3900": 12795789312.0, "3905": 12795789312.0, "3910": 12795789312.0, "3915": 12795789312.0, "3920": 12795789312.0, "3925": 12795789312.0, "3930": 12795789312.0, "3935": 12795789312.0, "3940": 12795789312.0, "3945": 12795789312.0, "3950": 12795789312.0, "3955": 12795789312.0, "3960": 12795789312.0, "3965": 12795789312.0, "3970": 12795789312.0, "3975": 12795789312.0, "3980": 12795789312.0, "3985": 12795789312.0, "3990": 12795789312.0, "3995": 12795789312.0, "4000": 12795789312.0, "4005": 12795789312.0, "4010": 12795789312.0, "4015": 12795789312.0, "4020": 12795789312.0, "4025": 12795789312.0, "4030": 12795789312.0, "4035": 12795789312.0, "4040": 12795789312.0, "4045": 12795789312.0, "4050": 12795789312.0, "4055": 12795789312.0, "4060": 12795789312.0, "4065": 12795789312.0, "4070": 12795789312.0, "4075": 12795789312.0, "4080": 12795789312.0, "4085": 12795789312.0, "4090": 12795789312.0, "4095": 12795789312.0, "4100": 12795789312.0, "4105": 12795789312.0, "4110": 12795789312.0, "4115": 12795789312.0, "4120": 12795789312.0, "4125": 12795789312.0, "4130": 12795789312.0, "4135": 12795789312.0, "4140": 12795789312.0, "4145": 12795789312.0, "4150": 12795789312.0, "4155": 12795789312.0, "4160": 12795789312.0, "4165": 12795789312.0, "4170": 12795789312.0, "4175": 12795789312.0, "4180": 12795789312.0, "4185": 12795789312.0, "4190": 12795789312.0, "4195": 12795789312.0, "4200": 12795789312.0, "4205": 12795789312.0, "4210": 12795789312.0, "4215": 12795789312.0, "4220": 12795789312.0, "4225": 12795789312.0, "4230": 12795789312.0, "4235": 12795789312.0, "4240": 12795789312.0, "4245": 12795789312.0, "4250": 12795789312.0, "4255": 12795789312.0, "4260": 12795789312.0, "4265": 12795789312.0, "4270": 12795789312.0, "4275": 12795789312.0, "4280": 12795789312.0, "4285": 12795789312.0, "4290": 12795789312.0, "4295": 12795789312.0, "4300": 12795789312.0, "4305": 12795789312.0, "4310": 12795789312.0, "4315": 12795789312.0, "4320": 12795789312.0, "4325": 12795789312.0, "4330": 12795789312.0, "4335": 12795789312.0, "4340": 12795789312.0, "4345": 12795789312.0, "4350": 12795789312.0, "4355": 12795789312.0, "4360": 12795789312.0, "4365": 12795789312.0, "4370": 12795789312.0, "4375": 12795789312.0, "4380": 12795789312.0, "4385": 12795789312.0, "4390": 12795789312.0, "4395": 12795789312.0, "4400": 12795789312.0, "4405": 12795789312.0, "4410": 12795789312.0, "4415": 12795789312.0, "4420": 12795789312.0, "4425": 12795789312.0, "4430": 12795789312.0, "4435": 12795789312.0, "4440": 12795789312.0, "4445": 12795789312.0, "4450": 12795789312.0, "4455": 12795789312.0, "4460": 12795789312.0, "4465": 12795789312.0, "4470": 12795789312.0, "4475": 12795789312.0, "4480": 12795789312.0, "4485": 12795789312.0, "4490": 12795789312.0, "4495": 12795789312.0, "4500": 12795789312.0, "4505": 12795789312.0, "4510": 12795789312.0, "4515": 12795789312.0, "4520": 12795789312.0, "4525": 12795789312.0, "4530": 12795789312.0, "4535": 12795789312.0, "4540": 12795789312.0, "4545": 12795789312.0, "4550": 12795789312.0, "4555": 12795789312.0, "4560": 12795789312.0, "4565": 12795789312.0, "4570": 12795789312.0, "4575": 12795789312.0, "4580": 12795789312.0, "4585": 12795789312.0, "4590": 12795789312.0, "4595": 12795789312.0, "4600": 12795789312.0, "4605": 12795789312.0, "4610": 12795789312.0, "4615": 12795789312.0, "4620": 12795789312.0, "4625": 12795789312.0, "4630": 12795789312.0, "4635": 12795789312.0, "4640": 12795789312.0, "4645": 12795789312.0, "4650": 12795789312.0, "4655": 12795789312.0, "4660": 12795789312.0, "4665": 12795789312.0, "4670": 12795789312.0, "4675": 12795789312.0, "4680": 12795789312.0, "4685": 12795789312.0, "4690": 12795789312.0, "4695": 12795789312.0, "4700": 12795789312.0, "4705": 12795789312.0, "4710": 12795789312.0, "4715": 12795789312.0, "4720": 12795789312.0, "4725": 12795789312.0, "4730": 12795789312.0, "4735": 12795789312.0, "4740": 12795789312.0, "4745": 12795789312.0, "4750": 12795789312.0, "4755": 12795789312.0, "4760": 12795789312.0, "4765": 12795789312.0, "4770": 12795789312.0, "4775": 12795789312.0, "4780": 12795789312.0, "4785": 12795789312.0, "4790": 12795789312.0, "4795": 12795789312.0, "4800": 12795789312.0, "4805": 12795789312.0, "4810": 12795789312.0, "4815": 12795789312.0, "4820": 12795789312.0, "4825": 12795789312.0, "4830": 12795789312.0, "4835": 12795789312.0, "4840": 12795789312.0, "4845": 12795789312.0, "4850": 12795789312.0, "4855": 12795789312.0, "4860": 12795789312.0, "4865": 12795789312.0, "4870": 12795789312.0, "4875": 12795789312.0, "4880": 12795789312.0, "4885": 12795789312.0, "4890": 12795789312.0, "4895": 12795789312.0, "4900": 12795789312.0, "4905": 12795789312.0, "4910": 12795789312.0, "4915": 12795789312.0, "4920": 12795789312.0, "4925": 12795789312.0, "4930": 12795789312.0, "4935": 12795789312.0, "4940": 12795789312.0, "4945": 12795789312.0, "4950": 12795789312.0, "4955": 12795789312.0, "4960": 12795789312.0, "4965": 12795789312.0, "4970": 12795789312.0, "4975": 12795789312.0, "4980": 12795789312.0, "4985": 12795789312.0, "4990": 12795789312.0, "4995": 12795789312.0, "5000": 12795789312.0, "5005": 12795789312.0, "5010": 12795789312.0, "5015": 12795789312.0, "5020": 12795789312.0, "5025": 12795789312.0, "5030": 12795789312.0, "5035": 12795789312.0, "5040": 12795789312.0, "5045": 12795789312.0, "5050": 12795789312.0, "5055": 12795789312.0, "5060": 12795789312.0, "5065": 12795789312.0, "5070": 12795789312.0, "5075": 12795789312.0, "5080": 12795789312.0, "5085": 12795789312.0, "5090": 12795789312.0, "5095": 12795789312.0, "5100": 12795789312.0, "5105": 12795789312.0, "5110": 12795789312.0, "5115": 12795789312.0, "5120": 12795789312.0, "5125": 12795789312.0, "5130": 12795789312.0, "5135": 12795789312.0, "5140": 12795789312.0, "5145": 12795789312.0, "5150": 12795789312.0, "5155": 12795789312.0, "5160": 12795789312.0, "5165": 12795789312.0, "5170": 12795789312.0, "5175": 12795789312.0, "5180": 12795789312.0, "5185": 12795789312.0, "5190": 12795789312.0, "5195": 12795789312.0, "5200": 12795789312.0, "5205": 12795789312.0, "5210": 12795789312.0, "5215": 12795789312.0, "5220": 12795789312.0, "5225": 12795789312.0, "5230": 12795789312.0, "5235": 12795789312.0, "5240": 12795789312.0, "5245": 12795789312.0, "5250": 12795789312.0, "5255": 12795789312.0, "5260": 12795789312.0, "5265": 12795789312.0, "5270": 12795789312.0, "5275": 12795789312.0, "5280": 12795789312.0, "5285": 12795789312.0, "5290": 12795789312.0, "5295": 12795789312.0, "5300": 12795789312.0, "5305": 12795789312.0, "5310": 12795789312.0, "5315": 12795789312.0, "5320": 12795789312.0, "5325": 12795789312.0, "5330": 12795789312.0, "5335": 12795789312.0, "5340": 12795789312.0, "5345": 12795789312.0, "5350": 12795789312.0, "5355": 12795789312.0, "5360": 12795789312.0, "5365": 12795789312.0, "5370": 12795789312.0, "5375": 12795789312.0, "5380": 12795789312.0, "5385": 12795789312.0, "5390": 12795789312.0, "5395": 12795789312.0, "5400": 12795789312.0, "5405": 12795789312.0, "5410": 12795789312.0, "5415": 12795789312.0, "5420": 12795789312.0, "5425": 12795789312.0, "5430": 12795789312.0, "5435": 12795789312.0, "5440": 12795789312.0, "5445": 12795789312.0, "5450": 12795789312.0, "5455": 12795789312.0, "5460": 12795789312.0, "5465": 12795789312.0, "5470": 12795789312.0, "5475": 12795789312.0, "5480": 12795789312.0, "5485": 12795789312.0, "5490": 12795789312.0, "5495": 12795789312.0, "5500": 12795789312.0, "5505": 12795789312.0, "5510": 12795789312.0, "5515": 12795789312.0, "5520": 12795789312.0, "5525": 12795789312.0, "5530": 12795789312.0, "5535": 12795789312.0, "5540": 12795789312.0, "5545": 12795789312.0, "5550": 12795789312.0, "5555": 12795789312.0, "5560": 12795789312.0, "5565": 12795789312.0, "5570": 12795789312.0, "5575": 12795789312.0, "5580": 12795789312.0, "5585": 12795789312.0, "5590": 12795789312.0, "5595": 12795789312.0, "5600": 12795789312.0, "5605": 12795789312.0, "5610": 12795789312.0, "5615": 12795789312.0, "5620": 12795789312.0, "5625": 12795789312.0, "5630": 12795789312.0, "5635": 12795789312.0, "5640": 12795789312.0, "5645": 12795789312.0, "5650": 12795789312.0, "5655": 12795789312.0, "5660": 12795789312.0, "5665": 12795789312.0, "5670": 12795789312.0, "5675": 12795789312.0, "5680": 12795789312.0, "5685": 12795789312.0, "5690": 12795789312.0, "5695": 12795789312.0, "5700": 12795789312.0, "5705": 12795789312.0, "5710": 12795789312.0, "5715": 12795789312.0, "5720": 12795789312.0, "5725": 12795789312.0, "5730": 12795789312.0, "5735": 12795789312.0, "5740": 12795789312.0, "5745": 12795789312.0, "5750": 12795789312.0, "5755": 12795789312.0, "5760": 12795789312.0, "5765": 12795789312.0, "5770": 12795789312.0, "5775": 12795789312.0, "5780": 12795789312.0, "5785": 12795789312.0, "5790": 12795789312.0, "5795": 12795789312.0, "5800": 12795789312.0, "5805": 12795789312.0, "5810": 12795789312.0, "5815": 12795789312.0, "5820": 12795789312.0, "5825": 12795789312.0, "5830": 12795789312.0, "5835": 12795789312.0, "5840": 12795789312.0, "5845": 12795789312.0, "5850": 12795789312.0, "5855": 12795789312.0, "5860": 12795789312.0, "5865": 12795789312.0, "5870": 12795789312.0, "5875": 12795789312.0, "5880": 12795789312.0, "5885": 12795789312.0, "5890": 12795789312.0, "5895": 12795789312.0, "5900": 12795789312.0, "5905": 12795789312.0, "5910": 12795789312.0, "5915": 12795789312.0, "5920": 12795789312.0, "5925": 12795789312.0, "5930": 12795789312.0, "5935": 12795789312.0, "5940": 12795789312.0, "5945": 12795789312.0, "5950": 12795789312.0, "5955": 12795789312.0, "5960": 12795789312.0, "5965": 12795789312.0, "5970": 12795789312.0, "5975": 12795789312.0, "5980": 12795789312.0, "5985": 12795789312.0, "5990": 12795789312.0, "5995": 12795789312.0, "6000": 12795789312.0, "6005": 12795789312.0, "6010": 12795789312.0, "6015": 12795789312.0, "6020": 12795789312.0, "6025": 12795789312.0, "6030": 12795789312.0, "6035": 12795789312.0, "6040": 12795789312.0, "6045": 12795789312.0, "6050": 12795789312.0, "6055": 12795789312.0, "6060": 12795789312.0, "6065": 12795789312.0, "6070": 12795789312.0, "6075": 12795789312.0, "6080": 12795789312.0, "6085": 12795789312.0, "6090": 12795789312.0, "6095": 12795789312.0, "6100": 12795789312.0, "6105": 12795789312.0, "6110": 12795789312.0, "6115": 12795789312.0, "6120": 12795789312.0, "6125": 12795789312.0, "6130": 12795789312.0, "6135": 12795789312.0, "6140": 12795789312.0, "6145": 12795789312.0, "6150": 12795789312.0, "6155": 12795789312.0, "6160": 12795789312.0, "6165": 12795789312.0, "6170": 12795789312.0, "6175": 12795789312.0, "6180": 12795789312.0, "6185": 12795789312.0, "6190": 12795789312.0, "6195": 12795789312.0, "6200": 12795789312.0, "6205": 12795789312.0, "6210": 12795789312.0, "6215": 12795789312.0, "6220": 12795789312.0, "6225": 12795789312.0, "6230": 12795789312.0, "6235": 12795789312.0, "6240": 12795789312.0, "6245": 12795789312.0, "6250": 12795789312.0, "6255": 12795789312.0, "6260": 12795789312.0, "6265": 12795789312.0, "6270": 12795789312.0, "6275": 12795789312.0, "6280": 12795789312.0, "6285": 12795789312.0, "6290": 12795789312.0, "6295": 12795789312.0, "6300": 12795789312.0, "6305": 12795789312.0, "6310": 12795789312.0, "6315": 12795789312.0, "6320": 12795789312.0, "6325": 12795789312.0, "6330": 12795789312.0, "6335": 12795789312.0, "6340": 12795789312.0, "6345": 12795789312.0, "6350": 12795789312.0, "6355": 12795789312.0, "6360": 12795789312.0, "6365": 12795789312.0, "6370": 12795789312.0, "6375": 12795789312.0, "6380": 12795789312.0, "6385": 12795789312.0, "6390": 12795789312.0, "6395": 12795789312.0, "6400": 12795789312.0, "6405": 12795789312.0, "6410": 12795789312.0, "6415": 12795789312.0, "6420": 12795789312.0, "6425": 12795789312.0, "6430": 12795789312.0, "6435": 12795789312.0, "6440": 12795789312.0, "6445": 12795789312.0, "6450": 12795789312.0, "6455": 12795789312.0, "6460": 12795789312.0, "6465": 12795789312.0, "6470": 12795789312.0, "6475": 12795789312.0, "6480": 12795789312.0, "6485": 12795789312.0, "6490": 12795789312.0, "6495": 12795789312.0, "6500": 12795789312.0, "6505": 12795789312.0, "6510": 12795789312.0, "6515": 12795789312.0, "6520": 12795789312.0, "6525": 12795789312.0, "6530": 12795789312.0, "6535": 12795789312.0, "6540": 12795789312.0, "6545": 12795789312.0, "6550": 12795789312.0, "6555": 12795789312.0, "6560": 12795789312.0, "6565": 12795789312.0, "6570": 12795789312.0, "6575": 12795789312.0, "6580": 12795789312.0, "6585": 12795789312.0, "6590": 12795789312.0, "6595": 12795789312.0, "6600": 12795789312.0, "6605": 12795789312.0, "6610": 12795789312.0, "6615": 12795789312.0, "6620": 12795789312.0, "6625": 12795789312.0, "6630": 12795789312.0, "6635": 12795789312.0, "6640": 12795789312.0, "6645": 12795789312.0, "6650": 12795789312.0, "6655": 12795789312.0, "6660": 12795789312.0, "6665": 12795789312.0, "6670": 12795789312.0, "6675": 12795789312.0, "6680": 12795789312.0, "6685": 12795789312.0, "6690": 12795789312.0, "6695": 12795789312.0, "6700": 12795789312.0, "6705": 12795789312.0, "6710": 12795789312.0, "6715": 12795789312.0, "6720": 12795789312.0, "6725": 12795789312.0, "6730": 12795789312.0, "6735": 12795789312.0, "6740": 12795789312.0, "6745": 12795789312.0, "6750": 12795789312.0, "6755": 12795789312.0, "6760": 12795789312.0, "6765": 12795789312.0, "6770": 12795789312.0, "6775": 12795789312.0, "6780": 12795789312.0, "6785": 12795789312.0, "6790": 12795789312.0, "6795": 12795789312.0, "6800": 12795789312.0, "6805": 12795789312.0, "6810": 12795789312.0, "6815": 12795789312.0, "6820": 12795789312.0, "6825": 12795789312.0, "6830": 12795789312.0, "6835": 12795789312.0, "6840": 12795789312.0, "6845": 12795789312.0, "6850": 12795789312.0, "6855": 12795789312.0, "6860": 12795789312.0, "6865": 12795789312.0, "6870": 12795789312.0, "6875": 12795789312.0, "6880": 12795789312.0, "6885": 12795789312.0, "6890": 12795789312.0, "6895": 12795789312.0, "6900": 12795789312.0, "6905": 12795789312.0, "6910": 12795789312.0, "6915": 12795789312.0, "6920": 12795789312.0, "6925": 12795789312.0, "6930": 12795789312.0, "6935": 12795789312.0, "6940": 12795789312.0, "6945": 12795789312.0, "6950": 12795789312.0, "6955": 12795789312.0, "6960": 12795789312.0, "6965": 12795789312.0, "6970": 12795789312.0, "6975": 12795789312.0, "6980": 12795789312.0, "6985": 12795789312.0, "6990": 12795789312.0, "6995": 12795789312.0, "7000": 12795789312.0, "7005": 12795789312.0, "7010": 12795789312.0, "7015": 12795789312.0, "7020": 12795789312.0, "7025": 12795789312.0, "7030": 12795789312.0, "7035": 12795789312.0, "7040": 12795789312.0, "7045": 12795789312.0, "7050": 12795789312.0, "7055": 12795789312.0, "7060": 12795789312.0, "7065": 12795789312.0, "7070": 12795789312.0, "7075": 12795789312.0, "7080": 12795789312.0, "7085": 12795789312.0, "7090": 12795789312.0, "7095": 12795789312.0, "7100": 12795789312.0, "7105": 12795789312.0, "7110": 12795789312.0, "7115": 12795789312.0, "7120": 12795789312.0, "7125": 12795789312.0, "7130": 12795789312.0, "7135": 12795789312.0, "7140": 12795789312.0, "7145": 12795789312.0, "7150": 12795789312.0, "7155": 12795789312.0, "7160": 12795789312.0, "7165": 12795789312.0, "7170": 12795789312.0, "7175": 12795789312.0, "7180": 12795789312.0, "7185": 12795789312.0, "7190": 12795789312.0, "7195": 12795789312.0, "7200": 12795789312.0, "7205": 12795789312.0, "7210": 12795789312.0, "7215": 12795789312.0, "7220": 12795789312.0, "7225": 12795789312.0, "7230": 12795789312.0, "7235": 12795789312.0, "7240": 12795789312.0, "7245": 12795789312.0, "7250": 12795789312.0, "7255": 12795789312.0, "7260": 12795789312.0, "7265": 12795789312.0, "7270": 12795789312.0, "7275": 12795789312.0, "7280": 12795789312.0, "7285": 12795789312.0, "7290": 12795789312.0, "7295": 12795789312.0, "7300": 12795789312.0, "7305": 12795789312.0, "7310": 12795789312.0, "7315": 12795789312.0, "7320": 12795789312.0, "7325": 12795789312.0, "7330": 12795789312.0, "7335": 12795789312.0, "7340": 12795789312.0, "7345": 12795789312.0, "7350": 12795789312.0, "7355": 12795789312.0, "7360": 12795789312.0, "7365": 12795789312.0, "7370": 12795789312.0, "7375": 12795789312.0, "7380": 12795789312.0, "7385": 12795789312.0, "7390": 12795789312.0, "7395": 12795789312.0, "7400": 12795789312.0, "7405": 12795789312.0, "7410": 12795789312.0, "7415": 12795789312.0, "7420": 12795789312.0, "7425": 12795789312.0, "7430": 12795789312.0, "7435": 12795789312.0, "7440": 12795789312.0, "7445": 12795789312.0, "7450": 12795789312.0, "7455": 12795789312.0, "7460": 12795789312.0, "7465": 12795789312.0, "7470": 12795789312.0, "7475": 12795789312.0, "7480": 12795789312.0, "7485": 12795789312.0, "7490": 12795789312.0, "7495": 12795789312.0, "7500": 12795789312.0, "7505": 12795789312.0, "7510": 12795789312.0, "7515": 12795789312.0, "7520": 12795789312.0, "7525": 12795789312.0, "7530": 12795789312.0, "7535": 12795789312.0, "7540": 12795789312.0, "7545": 12795789312.0, "7550": 12795789312.0, "7555": 12795789312.0, "7560": 12795789312.0, "7565": 12795789312.0, "7570": 12795789312.0, "7575": 12795789312.0, "7580": 12795789312.0, "7585": 12795789312.0, "7590": 12795789312.0, "7595": 12795789312.0, "7600": 12795789312.0, "7605": 12795789312.0, "7610": 12795789312.0, "7615": 12795789312.0, "7620": 12795789312.0, "7625": 12795789312.0, "7630": 12795789312.0, "7635": 12795789312.0, "7640": 12795789312.0, "7645": 12795789312.0, "7650": 12795789312.0, "7655": 12795789312.0, "7660": 12795789312.0, "7665": 12795789312.0, "7670": 12795789312.0, "7675": 12795789312.0, "7680": 12795789312.0, "7685": 12795789312.0, "7690": 12795789312.0, "7695": 12795789312.0, "7700": 12795789312.0, "7705": 12795789312.0, "7710": 12795789312.0, "7715": 12795789312.0, "7720": 12795789312.0, "7725": 12795789312.0, "7730": 12795789312.0, "7735": 12795789312.0, "7740": 12795789312.0, "7745": 12795789312.0, "7750": 12795789312.0, "7755": 12795789312.0, "7760": 12795789312.0, "7765": 12795789312.0, "7770": 12795789312.0, "7775": 12795789312.0, "7780": 12795789312.0, "7785": 12795789312.0, "7790": 12795789312.0, "7795": 12795789312.0, "7800": 12795789312.0, "7805": 12795789312.0, "7810": 12795789312.0, "7815": 12795789312.0, "7820": 12795789312.0, "7825": 12795789312.0, "7830": 12795789312.0, "7835": 12795789312.0, "7840": 12795789312.0, "7845": 12795789312.0, "7850": 12795789312.0, "7855": 12795789312.0, "7860": 12795789312.0, "7865": 12795789312.0, "7870": 12795789312.0, "7875": 12795789312.0, "7880": 12795789312.0, "7885": 12795789312.0, "7890": 12795789312.0, "7895": 12795789312.0, "7900": 12795789312.0, "7905": 12795789312.0, "7910": 12795789312.0, "7915": 12795789312.0, "7920": 12795789312.0, "7925": 12795789312.0, "7930": 12795789312.0, "7935": 12795789312.0, "7940": 12795789312.0, "7945": 12795789312.0, "7950": 12795789312.0, "7955": 12795789312.0, "7960": 12795789312.0, "7965": 12795789312.0, "7970": 12795789312.0, "7975": 12795789312.0, "7980": 12795789312.0, "7985": 12795789312.0, "7990": 12795789312.0, "7995": 12795789312.0, "8000": 12795789312.0, "8005": 12795789312.0, "8010": 12795789312.0, "8015": 12795789312.0, "8020": 12795789312.0, "8025": 12795789312.0, "8030": 12795789312.0, "8035": 12795789312.0, "8040": 12795789312.0, "8045": 12795789312.0, "8050": 12795789312.0, "8055": 12795789312.0, "8060": 12795789312.0, "8065": 12795789312.0, "8070": 12795789312.0, "8075": 12795789312.0, "8080": 12795789312.0, "8085": 12795789312.0, "8090": 12795789312.0, "8095": 12795789312.0, "8100": 12795789312.0, "8105": 12795789312.0, "8110": 12795789312.0, "8115": 12795789312.0, "8120": 12795789312.0, "8125": 12795789312.0, "8130": 12795789312.0, "8135": 12795789312.0, "8140": 12795789312.0, "8145": 12795789312.0, "8150": 12795789312.0, "8155": 12795789312.0, "8160": 12795789312.0, "8165": 12795789312.0, "8170": 12795789312.0, "8175": 12795789312.0, "8180": 12795789312.0, "8185": 12795789312.0, "8190": 12795789312.0, "8195": 12795789312.0, "8200": 12795789312.0, "8205": 12795789312.0, "8210": 12795789312.0, "8215": 12795789312.0, "8220": 12795789312.0, "8225": 12795789312.0, "8230": 12795789312.0, "8235": 12795789312.0, "8240": 12795789312.0, "8245": 12795789312.0, "8250": 12795789312.0, "8255": 12795789312.0, "8260": 12795789312.0, "8265": 12795789312.0, "8270": 12795789312.0, "8275": 12795789312.0, "8280": 12795789312.0, "8285": 12795789312.0, "8290": 12795789312.0, "8295": 12795789312.0, "8300": 12795789312.0, "8305": 12795789312.0, "8310": 12795789312.0, "8315": 12795789312.0, "8320": 12795789312.0, "8325": 12795789312.0, "8330": 12795789312.0, "8335": 12795789312.0, "8340": 12795789312.0, "8345": 12795789312.0, "8350": 12795789312.0, "8355": 12795789312.0, "8360": 12795789312.0, "8365": 12795789312.0, "8370": 12795789312.0, "8375": 12795789312.0, "8380": 12795789312.0, "8385": 12795789312.0, "8390": 12795789312.0, "8395": 12795789312.0, "8400": 12795789312.0, "8405": 12795789312.0, "8410": 12795789312.0, "8415": 12795789312.0, "8420": 12795789312.0, "8425": 12795789312.0, "8430": 12795789312.0, "8435": 12795789312.0, "8440": 12795789312.0, "8445": 12795789312.0, "8450": 12795789312.0, "8455": 12795789312.0, "8460": 12795789312.0, "8465": 12795789312.0, "8470": 12795789312.0, "8475": 12795789312.0, "8480": 12795789312.0, "8485": 12795789312.0, "8490": 12795789312.0, "8495": 12795789312.0, "8500": 12795789312.0, "8505": 12795789312.0, "8510": 12795789312.0, "8515": 12795789312.0, "8520": 12795789312.0, "8525": 12795789312.0, "8530": 12795789312.0, "8535": 12795789312.0, "8540": 12795789312.0, "8545": 12795789312.0, "8550": 12795789312.0, "8555": 12795789312.0, "8560": 12795789312.0, "8565": 12795789312.0, "8570": 12795789312.0, "8575": 12795789312.0, "8580": 12795789312.0, "8585": 12795789312.0, "8590": 12795789312.0, "8595": 12795789312.0, "8600": 12795789312.0, "8605": 12795789312.0, "8610": 12795789312.0, "8615": 12795789312.0, "8620": 12795789312.0, "8625": 12795789312.0, "8630": 12795789312.0, "8635": 12795789312.0, "8640": 12795789312.0, "8645": 12795789312.0, "8650": 12795789312.0, "8655": 12795789312.0, "8660": 12795789312.0, "8665": 12795789312.0, "8670": 12795789312.0, "8675": 12795789312.0, "8680": 12795789312.0, "8685": 12795789312.0, "8690": 12795789312.0, "8695": 12795789312.0, "8700": 12795789312.0, "8705": 12795789312.0, "8710": 12795789312.0, "8715": 12795789312.0, "8720": 12795789312.0, "8725": 12795789312.0, "8730": 12795789312.0, "8735": 12795789312.0, "8740": 12795789312.0, "8745": 12795789312.0, "8750": 12795789312.0, "8755": 12795789312.0, "8760": 12795789312.0, "8765": 12795789312.0, "8770": 12795789312.0, "8775": 12795789312.0, "8780": 12795789312.0, "8785": 12795789312.0, "8790": 12795789312.0, "8795": 12795789312.0, "8800": 12795789312.0, "8805": 12795789312.0, "8810": 12795789312.0, "8815": 12795789312.0, "8820": 12795789312.0, "8825": 12795789312.0, "8830": 12795789312.0, "8835": 12795789312.0, "8840": 12795789312.0, "8845": 12795789312.0, "8850": 12795789312.0, "8855": 12795789312.0, "8860": 12795789312.0, "8865": 12795789312.0, "8870": 12795789312.0, "8875": 12795789312.0, "8880": 12795789312.0, "8885": 12795789312.0, "8890": 12795789312.0, "8895": 12795789312.0, "8900": 12795789312.0, "8905": 12795789312.0, "8910": 12795789312.0, "8915": 12795789312.0, "8920": 12795789312.0, "8925": 12795789312.0, "8930": 12795789312.0, "8935": 12795789312.0, "8940": 12795789312.0, "8945": 12795789312.0, "8950": 12795789312.0, "8955": 12795789312.0, "8960": 12795789312.0, "8965": 12795789312.0, "8970": 12795789312.0, "8975": 12795789312.0, "8980": 12795789312.0, "8985": 12795789312.0, "8990": 12795789312.0, "8995": 12795789312.0, "9000": 12795789312.0, "9005": 12795789312.0, "9010": 12795789312.0, "9015": 12795789312.0, "9020": 12795789312.0, "9025": 12795789312.0, "9030": 12795789312.0, "9035": 12795789312.0, "9040": 12795789312.0, "9045": 12795789312.0, "9050": 12795789312.0, "9055": 12795789312.0, "9060": 12795789312.0, "9065": 12795789312.0, "9070": 12795789312.0, "9075": 12795789312.0, "9080": 12795789312.0, "9085": 12795789312.0, "9090": 12795789312.0, "9095": 12795789312.0, "9100": 12795789312.0, "9105": 12795789312.0, "9110": 12795789312.0, "9115": 12795789312.0, "9120": 12795789312.0, "9125": 12795789312.0, "9130": 12795789312.0, "9135": 12795789312.0, "9140": 12795789312.0, "9145": 12795789312.0, "9150": 12795789312.0, "9155": 12795789312.0, "9160": 12795789312.0, "9165": 12795789312.0, "9170": 12795789312.0, "9175": 12795789312.0, "9180": 12795789312.0, "9185": 12795789312.0, "9190": 12795789312.0, "9195": 12795789312.0, "9200": 12795789312.0, "9205": 12795789312.0, "9210": 12795789312.0, "9215": 12795789312.0, "9220": 12795789312.0, "9225": 12795789312.0, "9230": 12795789312.0, "9235": 12795789312.0, "9240": 12795789312.0, "9245": 12795789312.0, "9250": 12795789312.0, "9255": 12795789312.0, "9260": 12795789312.0, "9265": 12795789312.0, "9270": 12795789312.0, "9275": 12795789312.0, "9280": 12795789312.0, "9285": 12795789312.0, "9290": 12795789312.0, "9295": 12795789312.0, "9300": 12795789312.0, "9305": 12795789312.0, "9310": 12795789312.0, "9315": 12795789312.0, "9320": 12795789312.0, "9325": 12795789312.0, "9330": 12795789312.0, "9335": 12795789312.0, "9340": 12795789312.0, "9345": 12795789312.0, "9350": 12795789312.0, "9355": 12795789312.0, "9360": 12795789312.0, "9365": 12795789312.0, "9370": 12795789312.0, "9375": 12795789312.0, "9380": 12795789312.0, "9385": 12795789312.0, "9390": 12795789312.0, "9395": 12795789312.0, "9400": 12795789312.0, "9405": 12795789312.0, "9410": 12795789312.0, "9415": 12795789312.0, "9420": 12795789312.0, "9425": 12795789312.0, "9430": 12795789312.0, "9435": 12795789312.0, "9440": 12795789312.0, "9445": 12795789312.0, "9450": 12795789312.0, "9455": 12795789312.0, "9460": 12795789312.0, "9465": 12795789312.0, "9470": 12795789312.0, "9475": 12795789312.0, "9480": 12795789312.0, "9485": 12795789312.0, "9490": 12795789312.0, "9495": 12795789312.0, "9500": 12795789312.0, "9505": 12795789312.0, "9510": 12795789312.0, "9515": 12795789312.0, "9520": 12795789312.0, "9525": 12795789312.0, "9530": 12795789312.0, "9535": 12795789312.0, "9540": 12795789312.0, "9545": 12795789312.0, "9550": 12795789312.0, "9555": 12795789312.0, "9560": 12795789312.0, "9565": 12795789312.0, "9570": 12795789312.0, "9575": 12795789312.0, "9580": 12795789312.0, "9585": 12795789312.0, "9590": 12795789312.0, "9595": 12795789312.0, "9600": 12795789312.0, "9605": 12795789312.0, "9610": 12795789312.0, "9615": 12795789312.0, "9620": 12795789312.0, "9625": 12795789312.0, "9630": 12795789312.0, "9635": 12795789312.0, "9640": 12795789312.0, "9645": 12795789312.0, "9650": 12795789312.0, "9655": 12795789312.0, "9660": 12795789312.0, "9665": 12795789312.0, "9670": 12795789312.0, "9675": 12795789312.0, "9680": 12795789312.0, "9685": 12795789312.0, "9690": 12795789312.0, "9695": 12795789312.0, "9700": 12795789312.0, "9705": 12795789312.0, "9710": 12795789312.0, "9715": 12795789312.0, "9720": 12795789312.0, "9725": 12795789312.0, "9730": 12795789312.0, "9735": 12795789312.0, "9740": 12795789312.0, "9745": 12795789312.0, "9750": 12795789312.0, "9755": 12795789312.0, "9760": 12795789312.0, "9765": 12795789312.0, "9770": 12795789312.0, "9775": 12795789312.0, "9780": 12795789312.0, "9785": 12795789312.0, "9790": 12795789312.0, "9795": 12795789312.0, "9800": 12795789312.0, "9805": 12795789312.0, "9810": 12795789312.0, "9815": 12795789312.0, "9820": 12795789312.0, "9825": 12795789312.0, "9830": 12795789312.0, "9835": 12795789312.0, "9840": 12795789312.0, "9845": 12795789312.0, "9850": 12795789312.0, "9855": 12795789312.0, "9860": 12795789312.0, "9865": 12795789312.0, "9870": 12795789312.0, "9875": 12795789312.0, "9880": 12795789312.0, "9885": 12795789312.0, "9890": 12795789312.0, "9895": 12795789312.0, "9900": 12795789312.0, "9905": 12795789312.0, "9910": 12795789312.0, "9915": 12795789312.0, "9920": 12795789312.0, "9925": 12795789312.0, "9930": 12795789312.0, "9935": 12795789312.0, "9940": 12795789312.0, "9945": 12795789312.0, "9950": 12795789312.0, "9955": 12795789312.0, "9960": 12795789312.0, "9965": 12795789312.0, "9970": 12795789312.0, "9975": 12795789312.0, "9980": 12795789312.0, "9985": 12795789312.0, "9990": 12795789312.0, "9995": 12795789312.0, "10000": 12795789312.0, "10005": 12795789312.0, "10010": 12795789312.0, "10015": 12795789312.0, "10020": 12795789312.0, "10025": 12795789312.0, "10030": 12795789312.0, "10035": 12795789312.0, "10040": 12795789312.0, "10045": 12795789312.0, "10050": 12795789312.0, "10055": 12795789312.0, "10060": 12795789312.0, "10065": 12795789312.0, "10070": 12795789312.0, "10075": 12795789312.0, "10080": 12795789312.0, "10085": 12795789312.0, "10090": 12795789312.0, "10095": 12795789312.0, "10100": 12795789312.0, "10105": 12795789312.0, "10110": 12795789312.0, "10115": 12795789312.0, "10120": 12795789312.0, "10125": 12795789312.0, "10130": 12795789312.0, "10135": 12795789312.0, "10140": 12795789312.0, "10145": 12795789312.0, "10150": 12795789312.0, "10155": 12795789312.0, "10160": 12795789312.0, "10165": 12795789312.0, "10170": 12795789312.0, "10175": 12795789312.0, "10180": 12795789312.0, "10185": 12795789312.0, "10190": 12795789312.0, "10195": 12795789312.0, "10200": 12795789312.0, "10205": 12795789312.0, "10210": 12795789312.0, "10215": 12795789312.0, "10220": 12795789312.0, "10225": 12795789312.0, "10230": 12795789312.0, "10235": 12795789312.0, "10240": 12795789312.0, "10245": 12795789312.0, "10250": 12795789312.0, "10255": 12795789312.0, "10260": 12795789312.0, "10265": 12795789312.0, "10270": 12795789312.0, "10275": 12795789312.0, "10280": 12795789312.0, "10285": 12795789312.0, "10290": 12795789312.0, "10295": 12795789312.0, "10300": 12795789312.0, "10305": 12795789312.0, "10310": 12795789312.0, "10315": 12795789312.0, "10320": 12795789312.0, "10325": 12795789312.0, "10330": 12795789312.0, "10335": 12795789312.0, "10340": 12795789312.0, "10345": 12795789312.0, "10350": 12795789312.0, "10355": 12795789312.0, "10360": 12795789312.0, "10365": 12795789312.0, "10370": 12795789312.0, "10375": 12795789312.0, "10380": 12795789312.0, "10385": 12795789312.0, "10390": 12795789312.0, "10395": 12795789312.0, "10400": 12795789312.0, "10405": 12795789312.0, "10410": 12795789312.0, "10415": 12795789312.0, "10420": 12795789312.0, "10425": 12795789312.0, "10430": 12795789312.0, "10435": 12795789312.0, "10440": 12795789312.0, "10445": 12795789312.0, "10450": 12795789312.0, "10455": 12795789312.0, "10460": 12795789312.0, "10465": 12795789312.0, "10470": 12795789312.0, "10475": 12795789312.0, "10480": 12795789312.0, "10485": 12795789312.0, "10490": 12795789312.0, "10495": 12795789312.0, "10500": 12795789312.0, "10505": 12795789312.0, "10510": 12795789312.0, "10515": 12795789312.0, "10520": 12795789312.0, "10525": 12795789312.0, "10530": 12795789312.0, "10535": 12795789312.0, "10540": 12795789312.0, "10545": 12795789312.0, "10550": 12795789312.0, "10555": 12795789312.0, "10560": 12795789312.0, "10565": 12795789312.0, "10570": 12795789312.0, "10575": 12795789312.0, "10580": 12795789312.0, "10585": 12795789312.0, "10590": 12795789312.0, "10595": 12795789312.0, "10600": 12795789312.0, "10605": 12795789312.0, "10610": 12795789312.0, "10615": 12795789312.0, "10620": 12795789312.0, "10625": 12795789312.0, "10630": 12795789312.0, "10635": 12795789312.0, "10640": 12795789312.0, "10645": 12795789312.0, "10650": 12795789312.0, "10655": 12795789312.0, "10660": 12795789312.0, "10665": 12795789312.0, "10670": 12795789312.0, "10675": 12795789312.0, "10680": 12795789312.0, "10685": 12795789312.0, "10690": 12795789312.0, "10695": 12795789312.0, "10700": 12795789312.0, "10705": 12795789312.0, "10710": 12795789312.0, "10715": 12795789312.0, "10720": 12795789312.0, "10725": 12795789312.0, "10730": 12795789312.0, "10735": 12795789312.0, "10740": 12795789312.0, "10745": 12795789312.0, "10750": 12795789312.0, "10755": 12795789312.0, "10760": 12795789312.0, "10765": 12795789312.0, "10770": 12795789312.0, "10775": 12795789312.0, "10780": 12795789312.0, "10785": 12795789312.0, "10790": 12795789312.0, "10795": 12795789312.0, "10800": 12795789312.0, "10805": 12795789312.0, "10810": 12795789312.0, "10815": 12795789312.0, "10820": 12795789312.0, "10825": 12795789312.0, "10830": 12795789312.0, "10835": 12795789312.0, "10840": 12795789312.0, "10845": 12795789312.0, "10850": 12795789312.0, "10855": 12795789312.0, "10860": 12795789312.0, "10865": 12795789312.0, "10870": 12795789312.0, "10875": 12795789312.0, "10880": 12795789312.0, "10885": 12795789312.0, "10890": 12795789312.0, "10895": 12795789312.0, "10900": 12795789312.0, "10905": 12795789312.0, "10910": 12795789312.0, "10915": 12795789312.0, "10920": 12795789312.0, "10925": 12795789312.0, "10930": 12795789312.0, "10935": 12795789312.0, "10940": 12795789312.0, "10945": 12795789312.0, "10950": 12795789312.0, "10955": 12795789312.0, "10960": 12795789312.0, "10965": 12795789312.0, "10970": 12795789312.0, "10975": 12795789312.0, "10980": 12795789312.0, "10985": 12795789312.0, "10990": 12795789312.0, "10995": 12795789312.0, "11000": 12795789312.0, "11005": 12795789312.0, "11010": 12795789312.0, "11015": 12795789312.0, "11020": 12795789312.0, "11025": 12795789312.0, "11030": 12795789312.0, "11035": 12795789312.0, "11040": 12795789312.0, "11045": 12795789312.0, "11050": 12795789312.0, "11055": 12795789312.0, "11060": 12795789312.0, "11065": 12795789312.0, "11070": 12795789312.0, "11075": 12795789312.0, "11080": 12795789312.0, "11085": 12795789312.0, "11090": 12795789312.0, "11095": 12795789312.0, "11100": 12795789312.0, "11105": 12795789312.0, "11110": 12795789312.0, "11115": 12795789312.0, "11120": 12795789312.0, "11125": 12795789312.0, "11130": 12795789312.0, "11135": 12795789312.0, "11140": 12795789312.0, "11145": 12795789312.0, "11150": 12795789312.0, "11155": 12795789312.0, "11160": 12795789312.0, "11165": 12795789312.0, "11170": 12795789312.0, "11175": 12795789312.0, "11180": 12795789312.0, "11185": 12795789312.0, "11190": 12795789312.0, "11195": 12795789312.0, "11200": 12795789312.0, "11205": 12795789312.0, "11210": 12795789312.0, "11215": 12795789312.0, "11220": 12795789312.0, "11225": 12795789312.0, "11230": 12795789312.0, "11235": 12795789312.0, "11240": 12795789312.0, "11245": 12795789312.0, "11250": 12795789312.0, "11255": 12795789312.0, "11260": 12795789312.0, "11265": 12795789312.0, "11270": 12795789312.0, "11275": 12795789312.0, "11280": 12795789312.0, "11285": 12795789312.0, "11290": 12795789312.0, "11295": 12795789312.0, "11300": 12795789312.0, "11305": 12795789312.0, "11310": 12795789312.0, "11315": 12795789312.0, "11320": 12795789312.0, "11325": 12795789312.0, "11330": 12795789312.0, "11335": 12795789312.0, "11340": 12795789312.0, "11345": 12795789312.0, "11350": 12795789312.0, "11355": 12795789312.0, "11360": 12795789312.0, "11365": 12795789312.0, "11370": 12795789312.0, "11375": 12795789312.0, "11380": 12795789312.0, "11385": 12795789312.0, "11390": 12795789312.0, "11395": 12795789312.0, "11400": 12795789312.0, "11405": 12795789312.0, "11410": 12795789312.0, "11415": 12795789312.0, "11420": 12795789312.0, "11425": 12795789312.0, "11430": 12795789312.0, "11435": 12795789312.0, "11440": 12795789312.0, "11445": 12795789312.0, "11450": 12795789312.0, "11455": 12795789312.0, "11460": 12795789312.0, "11465": 12795789312.0, "11470": 12795789312.0, "11475": 12795789312.0, "11480": 12795789312.0, "11485": 12795789312.0, "11490": 12795789312.0, "11495": 12795789312.0, "11500": 12795789312.0, "11505": 12795789312.0, "11510": 12795789312.0, "11515": 12795789312.0, "11520": 12795789312.0, "11525": 12795789312.0, "11530": 12795789312.0, "11535": 12795789312.0, "11540": 12795789312.0, "11545": 12795789312.0, "11550": 12795789312.0, "11555": 12795789312.0, "11560": 12795789312.0, "11565": 12795789312.0, "11570": 12795789312.0, "11575": 12795789312.0, "11580": 12795789312.0, "11585": 12795789312.0, "11590": 12795789312.0, "11595": 12795789312.0, "11600": 12795789312.0, "11605": 12795789312.0, "11610": 12795789312.0, "11615": 12795789312.0, "11620": 12795789312.0, "11625": 12795789312.0, "11630": 12795789312.0, "11635": 12795789312.0, "11640": 12795789312.0, "11645": 12795789312.0, "11650": 12795789312.0, "11655": 12795789312.0, "11660": 12795789312.0, "11665": 12795789312.0, "11670": 12795789312.0, "11675": 12795789312.0, "11680": 12795789312.0, "11685": 12795789312.0, "11690": 12795789312.0, "11695": 12795789312.0, "11700": 12795789312.0, "11705": 12795789312.0, "11710": 12795789312.0, "11715": 12795789312.0, "11720": 12795789312.0, "11725": 12795789312.0, "11730": 12795789312.0, "11735": 12795789312.0, "11740": 12795789312.0, "11745": 12795789312.0, "11750": 12795789312.0, "11755": 12795789312.0, "11760": 12795789312.0, "11765": 12795789312.0, "11770": 12795789312.0, "11775": 12795789312.0, "11780": 12795789312.0, "11785": 12795789312.0, "11790": 12795789312.0, "11795": 12795789312.0, "11800": 12795789312.0, "11805": 12795789312.0, "11810": 12795789312.0, "11815": 12795789312.0, "11820": 12795789312.0, "11825": 12795789312.0, "11830": 12795789312.0, "11835": 12795789312.0, "11840": 12795789312.0, "11845": 12795789312.0, "11850": 12795789312.0, "11855": 12795789312.0, "11860": 12795789312.0, "11865": 12795789312.0, "11870": 12795789312.0, "11875": 12795789312.0, "11880": 12795789312.0, "11885": 12795789312.0, "11890": 12795789312.0, "11895": 12795789312.0, "11900": 12795789312.0, "11905": 12795789312.0, "11910": 12795789312.0, "11915": 12795789312.0, "11920": 12795789312.0, "11925": 12795789312.0, "11930": 12795789312.0, "11935": 12795789312.0, "11940": 12795789312.0, "11945": 12795789312.0, "11950": 12795789312.0, "11955": 12795789312.0, "11960": 12795789312.0, "11965": 12795789312.0, "11970": 12795789312.0, "11975": 12795789312.0, "11980": 12795789312.0, "11985": 12795789312.0, "11990": 12795789312.0, "11995": 12795789312.0, "12000": 12795789312.0, "12005": 12795789312.0, "12010": 12795789312.0, "12015": 12795789312.0, "12020": 12795789312.0, "12025": 12795789312.0, "12030": 12795789312.0, "12035": 12795789312.0, "12040": 12795789312.0, "12045": 12795789312.0, "12050": 12795789312.0, "12055": 12795789312.0, "12060": 12795789312.0, "12065": 12795789312.0, "12070": 12795789312.0, "12075": 12795789312.0, "12080": 12795789312.0, "12085": 12795789312.0, "12090": 12795789312.0, "12095": 12795789312.0, "12100": 12795789312.0, "12105": 12795789312.0, "12110": 12795789312.0, "12115": 12795789312.0, "12120": 12795789312.0, "12125": 12795789312.0, "12130": 12795789312.0, "12135": 12795789312.0, "12140": 12795789312.0, "12145": 12795789312.0, "12150": 12795789312.0, "12155": 12795789312.0, "12160": 12795789312.0, "12165": 12795789312.0, "12170": 12795789312.0, "12175": 12795789312.0, "12180": 12795789312.0, "12185": 12795789312.0, "12190": 12795789312.0, "12195": 12795789312.0, "12200": 12795789312.0, "12205": 12795789312.0, "12210": 12795789312.0, "12215": 12795789312.0, "12220": 12795789312.0, "12225": 12795789312.0, "12230": 12795789312.0, "12235": 12795789312.0, "12240": 12795789312.0, "12245": 12795789312.0, "12250": 12795789312.0, "12255": 12795789312.0, "12260": 12795789312.0, "12265": 12795789312.0, "12270": 12795789312.0, "12275": 12795789312.0, "12280": 12795789312.0, "12285": 12795789312.0, "12290": 12795789312.0, "12295": 12795789312.0, "12300": 12795789312.0, "12305": 12795789312.0, "12310": 12795789312.0, "12315": 12795789312.0, "12320": 12795789312.0, "12325": 12795789312.0, "12330": 12795789312.0, "12335": 12795789312.0, "12340": 12795789312.0, "12345": 12795789312.0, "12350": 12795789312.0, "12355": 12795789312.0, "12360": 12795789312.0, "12365": 12795789312.0, "12370": 12795789312.0, "12375": 12795789312.0, "12380": 12795789312.0, "12385": 12795789312.0, "12390": 12795789312.0, "12395": 12795789312.0, "12400": 12795789312.0, "12405": 12795789312.0, "12410": 12795789312.0, "12415": 12795789312.0, "12420": 12795789312.0, "12425": 12795789312.0, "12430": 12795789312.0, "12435": 12795789312.0, "12440": 12795789312.0, "12445": 12795789312.0, "12450": 12795789312.0, "12455": 12795789312.0, "12460": 12795789312.0, "12465": 12795789312.0, "12470": 12795789312.0, "12475": 12795789312.0, "12480": 12795789312.0, "12485": 12795789312.0, "12490": 12795789312.0, "12495": 12795789312.0, "12500": 12795789312.0, "12505": 12795789312.0, "12510": 12795789312.0, "12515": 12795789312.0, "12520": 12795789312.0, "12525": 12795789312.0, "12530": 12795789312.0, "12535": 12795789312.0, "12540": 12795789312.0, "12545": 12795789312.0, "12550": 12795789312.0, "12555": 12795789312.0, "12560": 12795789312.0, "12565": 12795789312.0, "12570": 12795789312.0, "12575": 12795789312.0, "12580": 12795789312.0, "12585": 12795789312.0, "12590": 12795789312.0, "12595": 12795789312.0, "12600": 12795789312.0, "12605": 12795789312.0, "12610": 12795789312.0, "12615": 12795789312.0, "12620": 12795789312.0, "12625": 12795789312.0, "12630": 12795789312.0, "12635": 12795789312.0, "12640": 12795789312.0, "12645": 12795789312.0, "12650": 12795789312.0, "12655": 12795789312.0, "12660": 12795789312.0, "12665": 12795789312.0, "12670": 12795789312.0, "12675": 12795789312.0, "12680": 12795789312.0, "12685": 12795789312.0, "12690": 12795789312.0, "12695": 12795789312.0, "12700": 12795789312.0, "12705": 12795789312.0, "12710": 12795789312.0, "12715": 12795789312.0, "12720": "nan", "12725": "nan", "12730": "nan", "12735": "nan", "12740": "nan", "12745": "nan", "12750": "nan", "12755": "nan", "12760": "nan", "12765": "nan", "12770": "nan", "12775": "nan", "12780": "nan", "12785": "nan", "12790": "nan", "12795": "nan", "12800": "nan", "12805": "nan", "12810": "nan", "12815": "nan", "12820": "nan", "12825": "nan", "12830": "nan", "12835": "nan", "12840": "nan", "12845": "nan", "12850": "nan", "12855": "nan", "12860": "nan", "12865": "nan", "12870": "nan", "12875": "nan", "12880": "nan", "12885": "nan", "12890": "nan", "12895": "nan", "12900": "nan", "12905": "nan", "12910": "nan", "12915": "nan", "12920": "nan", "12925": "nan", "12930": "nan", "12935": "nan", "12940": "nan", "12945": "nan", "12950": "nan", "12955": "nan", "12960": "nan", "12965": "nan", "12970": "nan", "12975": "nan", "12980": "nan", "12985": "nan", "12990": "nan", "12995": "nan", "13000": "nan"}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 13000, "step_interval": 5, "values": {"1": 27991298048.0, "5": 28489385984.0, "10": 28489385984.0, "15": 28489385984.0, "20": 28489385984.0, "25": 28489385984.0, "30": 28489385984.0, "35": 28489385984.0, "40": 28489385984.0, "45": 28489385984.0, "50": 28489385984.0, "55": 28489385984.0, "60": 28489385984.0, "65": 28489385984.0, "70": 28489385984.0, "75": 28489385984.0, "80": 28489385984.0, "85": 28489385984.0, "90": 28489385984.0, "95": 28489385984.0, "100": 28489385984.0, "105": 28489385984.0, "110": 28489385984.0, "115": 28489385984.0, "120": 28489385984.0, "125": 28489385984.0, "130": 28489385984.0, "135": 28489385984.0, "140": 28489385984.0, "145": 28489385984.0, "150": 28489385984.0, "155": 28489385984.0, "160": 28489385984.0, "165": 28489385984.0, "170": 28489385984.0, "175": 28489385984.0, "180": 28489385984.0, "185": 28489385984.0, "190": 28489385984.0, "195": 28489385984.0, "200": 28489385984.0, "205": 28489385984.0, "210": 28489385984.0, "215": 28489385984.0, "220": 28489385984.0, "225": 28489385984.0, "230": 28489385984.0, "235": 28489385984.0, "240": 28489385984.0, "245": 28489385984.0, "250": 28489385984.0, "255": 28489385984.0, "260": 28489385984.0, "265": 28489385984.0, "270": 28489385984.0, "275": 28489385984.0, "280": 28489385984.0, "285": 28489385984.0, "290": 28489385984.0, "295": 28489385984.0, "300": 28489385984.0, "305": 28489385984.0, "310": 28489385984.0, "315": 28489385984.0, "320": 28489385984.0, "325": 28489385984.0, "330": 28489385984.0, "335": 28489385984.0, "340": 28489385984.0, "345": 28489385984.0, "350": 28489385984.0, "355": 28489385984.0, "360": 28489385984.0, "365": 28489385984.0, "370": 28489385984.0, "375": 28489385984.0, "380": 28489385984.0, "385": 28489385984.0, "390": 28489385984.0, "395": 28489385984.0, "400": 28489385984.0, "405": 28489385984.0, "410": 28489385984.0, "415": 28489385984.0, "420": 28489385984.0, "425": 28489385984.0, "430": 28489385984.0, "435": 28489385984.0, "440": 28489385984.0, "445": 28489385984.0, "450": 28489385984.0, "455": 28489385984.0, "460": 28489385984.0, "465": 28489385984.0, "470": 28489385984.0, "475": 28489385984.0, "480": 28489385984.0, "485": 28489385984.0, "490": 28489385984.0, "495": 28489385984.0, "500": 28489385984.0, "505": 28489385984.0, "510": 28489385984.0, "515": 28489385984.0, "520": 28489385984.0, "525": 28489385984.0, "530": 28489385984.0, "535": 28489385984.0, "540": 28489385984.0, "545": 28489385984.0, "550": 28489385984.0, "555": 28489385984.0, "560": 28489385984.0, "565": 28489385984.0, "570": 28489385984.0, "575": 28489385984.0, "580": 28489385984.0, "585": 28489385984.0, "590": 28489385984.0, "595": 28489385984.0, "600": 28489385984.0, "605": 28489385984.0, "610": 28489385984.0, "615": 28489385984.0, "620": 28489385984.0, "625": 28489385984.0, "630": 28489385984.0, "635": 28489385984.0, "640": 28489385984.0, "645": 28489385984.0, "650": 28489385984.0, "655": 28489385984.0, "660": 28489385984.0, "665": 28489385984.0, "670": 28489385984.0, "675": 28489385984.0, "680": 28489385984.0, "685": 28489385984.0, "690": 28489385984.0, "695": 28489385984.0, "700": 28489385984.0, "705": 28489385984.0, "710": 28489385984.0, "715": 28489385984.0, "720": 28489385984.0, "725": 28489385984.0, "730": 28489385984.0, "735": 28489385984.0, "740": 28489385984.0, "745": 28489385984.0, "750": 28489385984.0, "755": 28489385984.0, "760": 28489385984.0, "765": 28489385984.0, "770": 28489385984.0, "775": 28489385984.0, "780": 28489385984.0, "785": 28489385984.0, "790": 28489385984.0, "795": 28489385984.0, "800": 28489385984.0, "805": 28489385984.0, "810": 28489385984.0, "815": 28489385984.0, "820": 28489385984.0, "825": 28489385984.0, "830": 28489385984.0, "835": 28489385984.0, "840": 28489385984.0, "845": 28489385984.0, "850": 28489385984.0, "855": 28489385984.0, "860": 28489385984.0, "865": 28489385984.0, "870": 28489385984.0, "875": 28489385984.0, "880": 28489385984.0, "885": 28489385984.0, "890": 28489385984.0, "895": 28489385984.0, "900": 28489385984.0, "905": 28489385984.0, "910": 28489385984.0, "915": 28489385984.0, "920": 28489385984.0, "925": 28489385984.0, "930": 28489385984.0, "935": 28489385984.0, "940": 28489385984.0, "945": 28489385984.0, "950": 28489385984.0, "955": 28489385984.0, "960": 28489385984.0, "965": 28489385984.0, "970": 28489385984.0, "975": 28489385984.0, "980": 28489385984.0, "985": 28489385984.0, "990": 28489385984.0, "995": 28489385984.0, "1000": 28489385984.0, "1005": 28489385984.0, "1010": 28489385984.0, "1015": 28489385984.0, "1020": 28489385984.0, "1025": 28489385984.0, "1030": 28489385984.0, "1035": 28489385984.0, "1040": 28489385984.0, "1045": 28489385984.0, "1050": 28489385984.0, "1055": 28489385984.0, "1060": 28489385984.0, "1065": 28489385984.0, "1070": 28489385984.0, "1075": 28489385984.0, "1080": 28489385984.0, "1085": 28489385984.0, "1090": 28489385984.0, "1095": 28489385984.0, "1100": 28489385984.0, "1105": 28489385984.0, "1110": 28489385984.0, "1115": 28489385984.0, "1120": 28489385984.0, "1125": 28489385984.0, "1130": 28489385984.0, "1135": 28489385984.0, "1140": 28489385984.0, "1145": 28489385984.0, "1150": 28489385984.0, "1155": 28489385984.0, "1160": 28489385984.0, "1165": 28489385984.0, "1170": 28489385984.0, "1175": 28489385984.0, "1180": 28489385984.0, "1185": 28489385984.0, "1190": 28489385984.0, "1195": 28489385984.0, "1200": 28489385984.0, "1205": 28489385984.0, "1210": 28489385984.0, "1215": 28489385984.0, "1220": 28489385984.0, "1225": 28489385984.0, "1230": 28489385984.0, "1235": 28489385984.0, "1240": 28489385984.0, "1245": 28489385984.0, "1250": 28489385984.0, "1255": 28489385984.0, "1260": 28489385984.0, "1265": 28489385984.0, "1270": 28489385984.0, "1275": 28489385984.0, "1280": 28489385984.0, "1285": 28489385984.0, "1290": 28489385984.0, "1295": 28489385984.0, "1300": 28489385984.0, "1305": 28489385984.0, "1310": 28489385984.0, "1315": 28489385984.0, "1320": 28489385984.0, "1325": 28489385984.0, "1330": 28489385984.0, "1335": 28489385984.0, "1340": 28489385984.0, "1345": 28489385984.0, "1350": 28489385984.0, "1355": 28489385984.0, "1360": 28489385984.0, "1365": 28489385984.0, "1370": 28489385984.0, "1375": 28489385984.0, "1380": 28489385984.0, "1385": 28489385984.0, "1390": 28489385984.0, "1395": 28489385984.0, "1400": 28489385984.0, "1405": 28489385984.0, "1410": 28489385984.0, "1415": 28489385984.0, "1420": 28489385984.0, "1425": 28489385984.0, "1430": 28489385984.0, "1435": 28489385984.0, "1440": 28489385984.0, "1445": 28489385984.0, "1450": 28489385984.0, "1455": 28489385984.0, "1460": 28489385984.0, "1465": 28489385984.0, "1470": 28489385984.0, "1475": 28489385984.0, "1480": 28489385984.0, "1485": 28489385984.0, "1490": 28489385984.0, "1495": 28489385984.0, "1500": 28489385984.0, "1505": 28489385984.0, "1510": 28489385984.0, "1515": 28489385984.0, "1520": 28489385984.0, "1525": 28489385984.0, "1530": 28489385984.0, "1535": 28489385984.0, "1540": 28489385984.0, "1545": 28489385984.0, "1550": 28489385984.0, "1555": 28489385984.0, "1560": 28489385984.0, "1565": 28489385984.0, "1570": 28489385984.0, "1575": 28489385984.0, "1580": 28489385984.0, "1585": 28489385984.0, "1590": 28489385984.0, "1595": 28489385984.0, "1600": 28489385984.0, "1605": 28489385984.0, "1610": 28489385984.0, "1615": 28489385984.0, "1620": 28489385984.0, "1625": 28489385984.0, "1630": 28489385984.0, "1635": 28489385984.0, "1640": 28489385984.0, "1645": 28489385984.0, "1650": 28489385984.0, "1655": 28489385984.0, "1660": 28489385984.0, "1665": 28489385984.0, "1670": 28489385984.0, "1675": 28489385984.0, "1680": 28489385984.0, "1685": 28489385984.0, "1690": 28489385984.0, "1695": 28489385984.0, "1700": 28489385984.0, "1705": 28489385984.0, "1710": 28489385984.0, "1715": 28489385984.0, "1720": 28489385984.0, "1725": 28489385984.0, "1730": 28489385984.0, "1735": 28489385984.0, "1740": 28489385984.0, "1745": 28489385984.0, "1750": 28489385984.0, "1755": 28489385984.0, "1760": 28489385984.0, "1765": 28489385984.0, "1770": 28489385984.0, "1775": 28489385984.0, "1780": 28489385984.0, "1785": 28489385984.0, "1790": 28489385984.0, "1795": 28489385984.0, "1800": 28489385984.0, "1805": 28489385984.0, "1810": 28489385984.0, "1815": 28489385984.0, "1820": 28489385984.0, "1825": 28489385984.0, "1830": 28489385984.0, "1835": 28489385984.0, "1840": 28489385984.0, "1845": 28489385984.0, "1850": 28489385984.0, "1855": 28489385984.0, "1860": 28489385984.0, "1865": 28489385984.0, "1870": 28489385984.0, "1875": 28489385984.0, "1880": 28489385984.0, "1885": 28489385984.0, "1890": 28489385984.0, "1895": 28489385984.0, "1900": 28489385984.0, "1905": 28489385984.0, "1910": 28489385984.0, "1915": 28489385984.0, "1920": 28489385984.0, "1925": 28489385984.0, "1930": 28489385984.0, "1935": 28489385984.0, "1940": 28489385984.0, "1945": 28489385984.0, "1950": 28489385984.0, "1955": 28489385984.0, "1960": 28489385984.0, "1965": 28489385984.0, "1970": 28489385984.0, "1975": 28489385984.0, "1980": 28489385984.0, "1985": 28489385984.0, "1990": 28489385984.0, "1995": 28489385984.0, "2000": 28489385984.0, "2005": 28489385984.0, "2010": 28489385984.0, "2015": 28489385984.0, "2020": 28489385984.0, "2025": 28489385984.0, "2030": 28489385984.0, "2035": 28489385984.0, "2040": 28489385984.0, "2045": 28489385984.0, "2050": 28489385984.0, "2055": 28489385984.0, "2060": 28489385984.0, "2065": 28489385984.0, "2070": 28489385984.0, "2075": 28489385984.0, "2080": 28489385984.0, "2085": 28489385984.0, "2090": 28489385984.0, "2095": 28489385984.0, "2100": 28489385984.0, "2105": 28489385984.0, "2110": 28489385984.0, "2115": 28489385984.0, "2120": 28489385984.0, "2125": 28489385984.0, "2130": 28489385984.0, "2135": 28489385984.0, "2140": 28489385984.0, "2145": 28489385984.0, "2150": 28489385984.0, "2155": 28489385984.0, "2160": 28489385984.0, "2165": 28489385984.0, "2170": 28489385984.0, "2175": 28489385984.0, "2180": 28489385984.0, "2185": 28489385984.0, "2190": 28489385984.0, "2195": 28489385984.0, "2200": 28489385984.0, "2205": 28489385984.0, "2210": 28489385984.0, "2215": 28489385984.0, "2220": 28489385984.0, "2225": 28489385984.0, "2230": 28489385984.0, "2235": 28489385984.0, "2240": 28489385984.0, "2245": 28489385984.0, "2250": 28489385984.0, "2255": 28489385984.0, "2260": 28489385984.0, "2265": 28489385984.0, "2270": 28489385984.0, "2275": 28489385984.0, "2280": 28489385984.0, "2285": 28489385984.0, "2290": 28489385984.0, "2295": 28489385984.0, "2300": 28489385984.0, "2305": 28489385984.0, "2310": 28489385984.0, "2315": 28489385984.0, "2320": 28489385984.0, "2325": 28489385984.0, "2330": 28489385984.0, "2335": 28489385984.0, "2340": 28489385984.0, "2345": 28489385984.0, "2350": 28489385984.0, "2355": 28489385984.0, "2360": 28489385984.0, "2365": 28489385984.0, "2370": 28489385984.0, "2375": 28489385984.0, "2380": 28489385984.0, "2385": 28489385984.0, "2390": 28489385984.0, "2395": 28489385984.0, "2400": 28489385984.0, "2405": 28489385984.0, "2410": 28489385984.0, "2415": 28489385984.0, "2420": 28489385984.0, "2425": 28489385984.0, "2430": 28489385984.0, "2435": 28489385984.0, "2440": 28489385984.0, "2445": 28489385984.0, "2450": 28489385984.0, "2455": 28489385984.0, "2460": 28489385984.0, "2465": 28489385984.0, "2470": 28489385984.0, "2475": 28489385984.0, "2480": 28489385984.0, "2485": 28489385984.0, "2490": 28489385984.0, "2495": 28489385984.0, "2500": 28489385984.0, "2505": 28489385984.0, "2510": 28489385984.0, "2515": 28489385984.0, "2520": 28489385984.0, "2525": 28489385984.0, "2530": 28489385984.0, "2535": 28489385984.0, "2540": 28489385984.0, "2545": 28489385984.0, "2550": 28489385984.0, "2555": 28489385984.0, "2560": 28489385984.0, "2565": 28489385984.0, "2570": 28489385984.0, "2575": 28489385984.0, "2580": 28489385984.0, "2585": 28489385984.0, "2590": 28489385984.0, "2595": 28489385984.0, "2600": 28489385984.0, "2605": 28489385984.0, "2610": 28489385984.0, "2615": 28489385984.0, "2620": 28489385984.0, "2625": 28489385984.0, "2630": 28489385984.0, "2635": 28489385984.0, "2640": 28489385984.0, "2645": 28489385984.0, "2650": 28489385984.0, "2655": 28489385984.0, "2660": 28489385984.0, "2665": 28489385984.0, "2670": 28489385984.0, "2675": 28489385984.0, "2680": 28489385984.0, "2685": 28489385984.0, "2690": 28489385984.0, "2695": 28489385984.0, "2700": 28489385984.0, "2705": 28489385984.0, "2710": 28489385984.0, "2715": 28489385984.0, "2720": 28489385984.0, "2725": 28489385984.0, "2730": 28489385984.0, "2735": 28489385984.0, "2740": 28489385984.0, "2745": 28489385984.0, "2750": 28489385984.0, "2755": 28489385984.0, "2760": 28489385984.0, "2765": 28489385984.0, "2770": 28489385984.0, "2775": 28489385984.0, "2780": 28489385984.0, "2785": 28489385984.0, "2790": 28489385984.0, "2795": 28489385984.0, "2800": 28489385984.0, "2805": 28489385984.0, "2810": 28489385984.0, "2815": 28489385984.0, "2820": 28489385984.0, "2825": 28489385984.0, "2830": 28489385984.0, "2835": 28489385984.0, "2840": 28489385984.0, "2845": 28489385984.0, "2850": 28489385984.0, "2855": 28489385984.0, "2860": 28489385984.0, "2865": 28489385984.0, "2870": 28489385984.0, "2875": 28489385984.0, "2880": 28489385984.0, "2885": 28489385984.0, "2890": 28489385984.0, "2895": 28489385984.0, "2900": 28489385984.0, "2905": 28489385984.0, "2910": 28489385984.0, "2915": 28489385984.0, "2920": 28489385984.0, "2925": 28489385984.0, "2930": 28489385984.0, "2935": 28489385984.0, "2940": 28489385984.0, "2945": 28489385984.0, "2950": 28489385984.0, "2955": 28489385984.0, "2960": 28489385984.0, "2965": 28489385984.0, "2970": 28489385984.0, "2975": 28489385984.0, "2980": 28489385984.0, "2985": 28489385984.0, "2990": 28489385984.0, "2995": 28489385984.0, "3000": 28489385984.0, "3005": 28489385984.0, "3010": 28489385984.0, "3015": 28489385984.0, "3020": 28489385984.0, "3025": 28489385984.0, "3030": 28489385984.0, "3035": 28489385984.0, "3040": 28489385984.0, "3045": 28489385984.0, "3050": 28489385984.0, "3055": 28489385984.0, "3060": 28489385984.0, "3065": 28489385984.0, "3070": 28489385984.0, "3075": 28489385984.0, "3080": 28489385984.0, "3085": 28489385984.0, "3090": 28489385984.0, "3095": 28489385984.0, "3100": 28489385984.0, "3105": 28489385984.0, "3110": 28489385984.0, "3115": 28489385984.0, "3120": 28489385984.0, "3125": 28489385984.0, "3130": 28489385984.0, "3135": 28489385984.0, "3140": 28489385984.0, "3145": 28489385984.0, "3150": 28489385984.0, "3155": 28489385984.0, "3160": 28489385984.0, "3165": 28489385984.0, "3170": 28489385984.0, "3175": 28489385984.0, "3180": 28489385984.0, "3185": 28489385984.0, "3190": 28489385984.0, "3195": 28489385984.0, "3200": 28489385984.0, "3205": 28489385984.0, "3210": 28489385984.0, "3215": 28489385984.0, "3220": 28489385984.0, "3225": 28489385984.0, "3230": 28489385984.0, "3235": 28489385984.0, "3240": 28489385984.0, "3245": 28489385984.0, "3250": 28489385984.0, "3255": 28489385984.0, "3260": 28489385984.0, "3265": 28489385984.0, "3270": 28489385984.0, "3275": 28489385984.0, "3280": 28489385984.0, "3285": 28489385984.0, "3290": 28489385984.0, "3295": 28489385984.0, "3300": 28489385984.0, "3305": 28489385984.0, "3310": 28489385984.0, "3315": 28489385984.0, "3320": 28489385984.0, "3325": 28489385984.0, "3330": 28489385984.0, "3335": 28489385984.0, "3340": 28489385984.0, "3345": 28489385984.0, "3350": 28489385984.0, "3355": 28489385984.0, "3360": 28489385984.0, "3365": 28489385984.0, "3370": 28489385984.0, "3375": 28489385984.0, "3380": 28489385984.0, "3385": 28489385984.0, "3390": 28489385984.0, "3395": 28489385984.0, "3400": 28489385984.0, "3405": 28489385984.0, "3410": 28489385984.0, "3415": 28489385984.0, "3420": 28489385984.0, "3425": 28489385984.0, "3430": 28489385984.0, "3435": 28489385984.0, "3440": 28489385984.0, "3445": 28489385984.0, "3450": 28489385984.0, "3455": 28489385984.0, "3460": 28489385984.0, "3465": 28489385984.0, "3470": 28489385984.0, "3475": 28489385984.0, "3480": 28489385984.0, "3485": 28489385984.0, "3490": 28489385984.0, "3495": 28489385984.0, "3500": 28489385984.0, "3505": 28489385984.0, "3510": 28489385984.0, "3515": 28489385984.0, "3520": 28489385984.0, "3525": 28489385984.0, "3530": 28489385984.0, "3535": 28489385984.0, "3540": 28489385984.0, "3545": 28489385984.0, "3550": 28489385984.0, "3555": 28489385984.0, "3560": 28489385984.0, "3565": 28489385984.0, "3570": 28489568256.0, "3575": 28489568256.0, "3580": 28489568256.0, "3585": 28489568256.0, "3590": 28489568256.0, "3595": 28489568256.0, "3600": 28489568256.0, "3605": 28489568256.0, "3610": 28489568256.0, "3615": 28489568256.0, "3620": 28489568256.0, "3625": 28489568256.0, "3630": 28489568256.0, "3635": 28489568256.0, "3640": 28489568256.0, "3645": 28489568256.0, "3650": 28489568256.0, "3655": 28489568256.0, "3660": 28489568256.0, "3665": 28489568256.0, "3670": 28489568256.0, "3675": 28489568256.0, "3680": 28489568256.0, "3685": 28489568256.0, "3690": 28489568256.0, "3695": 28489568256.0, "3700": 28489568256.0, "3705": 28489568256.0, "3710": 28489568256.0, "3715": 28489568256.0, "3720": 28489568256.0, "3725": 28489568256.0, "3730": 28489568256.0, "3735": 28489568256.0, "3740": 28489568256.0, "3745": 28489568256.0, "3750": 28489568256.0, "3755": 28489568256.0, "3760": 28489568256.0, "3765": 28489568256.0, "3770": 28489568256.0, "3775": 28489568256.0, "3780": 28489568256.0, "3785": 28489568256.0, "3790": 28489568256.0, "3795": 28489568256.0, "3800": 28489568256.0, "3805": 28489568256.0, "3810": 28489568256.0, "3815": 28489568256.0, "3820": 28489568256.0, "3825": 28489568256.0, "3830": 28489568256.0, "3835": 28489568256.0, "3840": 28489568256.0, "3845": 28489568256.0, "3850": 28489568256.0, "3855": 28489568256.0, "3860": 28489568256.0, "3865": 28489568256.0, "3870": 28489568256.0, "3875": 28489568256.0, "3880": 28489568256.0, "3885": 28489568256.0, "3890": 28489568256.0, "3895": 28489568256.0, "3900": 28489568256.0, "3905": 28489568256.0, "3910": 28489568256.0, "3915": 28489568256.0, "3920": 28489568256.0, "3925": 28489568256.0, "3930": 28489568256.0, "3935": 28489568256.0, "3940": 28489568256.0, "3945": 28489568256.0, "3950": 28489568256.0, "3955": 28489568256.0, "3960": 28489568256.0, "3965": 28489568256.0, "3970": 28489568256.0, "3975": 28489568256.0, "3980": 28489568256.0, "3985": 28489568256.0, "3990": 28489568256.0, "3995": 28489568256.0, "4000": 28489568256.0, "4005": 28489568256.0, "4010": 28489568256.0, "4015": 28489568256.0, "4020": 28489568256.0, "4025": 28489568256.0, "4030": 28489568256.0, "4035": 28489568256.0, "4040": 28489568256.0, "4045": 28489568256.0, "4050": 28489568256.0, "4055": 28489568256.0, "4060": 28489568256.0, "4065": 28489568256.0, "4070": 28489568256.0, "4075": 28489568256.0, "4080": 28489568256.0, "4085": 28489568256.0, "4090": 28489568256.0, "4095": 28489568256.0, "4100": 28489568256.0, "4105": 28489568256.0, "4110": 28489568256.0, "4115": 28489568256.0, "4120": 28489568256.0, "4125": 28489568256.0, "4130": 28489568256.0, "4135": 28489568256.0, "4140": 28489568256.0, "4145": 28489568256.0, "4150": 28489568256.0, "4155": 28489568256.0, "4160": 28489568256.0, "4165": 28489568256.0, "4170": 28489568256.0, "4175": 28489568256.0, "4180": 28489568256.0, "4185": 28489568256.0, "4190": 28489568256.0, "4195": 28489568256.0, "4200": 28489568256.0, "4205": 28489568256.0, "4210": 28489568256.0, "4215": 28489568256.0, "4220": 28489568256.0, "4225": 28489568256.0, "4230": 28489568256.0, "4235": 28489568256.0, "4240": 28489568256.0, "4245": 28489568256.0, "4250": 28489568256.0, "4255": 28489568256.0, "4260": 28489568256.0, "4265": 28489568256.0, "4270": 28489568256.0, "4275": 28489568256.0, "4280": 28489568256.0, "4285": 28489568256.0, "4290": 28489568256.0, "4295": 28489568256.0, "4300": 28489568256.0, "4305": 28489568256.0, "4310": 28489568256.0, "4315": 28489568256.0, "4320": 28489568256.0, "4325": 28489568256.0, "4330": 28489568256.0, "4335": 28489568256.0, "4340": 28489568256.0, "4345": 28489568256.0, "4350": 28489568256.0, "4355": 28489568256.0, "4360": 28489568256.0, "4365": 28489568256.0, "4370": 28489568256.0, "4375": 28489568256.0, "4380": 28489568256.0, "4385": 28489568256.0, "4390": 28489568256.0, "4395": 28489568256.0, "4400": 28489568256.0, "4405": 28489568256.0, "4410": 28489568256.0, "4415": 28489568256.0, "4420": 28489568256.0, "4425": 28489568256.0, "4430": 28489568256.0, "4435": 28489568256.0, "4440": 28489568256.0, "4445": 28489568256.0, "4450": 28489568256.0, "4455": 28489568256.0, "4460": 28489568256.0, "4465": 28489568256.0, "4470": 28489568256.0, "4475": 28489568256.0, "4480": 28489568256.0, "4485": 28489568256.0, "4490": 28489568256.0, "4495": 28489568256.0, "4500": 28489568256.0, "4505": 28489568256.0, "4510": 28489568256.0, "4515": 28489568256.0, "4520": 28489568256.0, "4525": 28489568256.0, "4530": 28489568256.0, "4535": 28489568256.0, "4540": 28489568256.0, "4545": 28489568256.0, "4550": 28489568256.0, "4555": 28489568256.0, "4560": 28489568256.0, "4565": 28489568256.0, "4570": 28489568256.0, "4575": 28489568256.0, "4580": 28489568256.0, "4585": 28489568256.0, "4590": 28489568256.0, "4595": 28489568256.0, "4600": 28489568256.0, "4605": 28489568256.0, "4610": 28489568256.0, "4615": 28489568256.0, "4620": 28489568256.0, "4625": 28489568256.0, "4630": 28489568256.0, "4635": 28489568256.0, "4640": 28489568256.0, "4645": 28489568256.0, "4650": 28489568256.0, "4655": 28489568256.0, "4660": 28489568256.0, "4665": 28489568256.0, "4670": 28489568256.0, "4675": 28489568256.0, "4680": 28489568256.0, "4685": 28489568256.0, "4690": 28489568256.0, "4695": 28489568256.0, "4700": 28489568256.0, "4705": 28489568256.0, "4710": 28489568256.0, "4715": 28489568256.0, "4720": 28489568256.0, "4725": 28489568256.0, "4730": 28489568256.0, "4735": 28489568256.0, "4740": 28489568256.0, "4745": 28489568256.0, "4750": 28489568256.0, "4755": 28489568256.0, "4760": 28489568256.0, "4765": 28489568256.0, "4770": 28489568256.0, "4775": 28489568256.0, "4780": 28489568256.0, "4785": 28489568256.0, "4790": 28489568256.0, "4795": 28489568256.0, "4800": 28489568256.0, "4805": 28489568256.0, "4810": 28489568256.0, "4815": 28489568256.0, "4820": 28489568256.0, "4825": 28489568256.0, "4830": 28489568256.0, "4835": 28489568256.0, "4840": 28489568256.0, "4845": 28489568256.0, "4850": 28489568256.0, "4855": 28489568256.0, "4860": 28489568256.0, "4865": 28489568256.0, "4870": 28489568256.0, "4875": 28489568256.0, "4880": 28489568256.0, "4885": 28489568256.0, "4890": 28489568256.0, "4895": 28489568256.0, "4900": 28489568256.0, "4905": 28489568256.0, "4910": 28489568256.0, "4915": 28489568256.0, "4920": 28489568256.0, "4925": 28489568256.0, "4930": 28489568256.0, "4935": 28489568256.0, "4940": 28489568256.0, "4945": 28489568256.0, "4950": 28489568256.0, "4955": 28489568256.0, "4960": 28489568256.0, "4965": 28489568256.0, "4970": 28489568256.0, "4975": 28489568256.0, "4980": 28489568256.0, "4985": 28489568256.0, "4990": 28489568256.0, "4995": 28489568256.0, "5000": 28489568256.0, "5005": 28489568256.0, "5010": 28489568256.0, "5015": 28489568256.0, "5020": 28489568256.0, "5025": 28489568256.0, "5030": 28489568256.0, "5035": 28489568256.0, "5040": 28489568256.0, "5045": 28489568256.0, "5050": 28489568256.0, "5055": 28489568256.0, "5060": 28489568256.0, "5065": 28489568256.0, "5070": 28489568256.0, "5075": 28489568256.0, "5080": 28489568256.0, "5085": 28489568256.0, "5090": 28489568256.0, "5095": 28489568256.0, "5100": 28489568256.0, "5105": 28489568256.0, "5110": 28489568256.0, "5115": 28489568256.0, "5120": 28489568256.0, "5125": 28489568256.0, "5130": 28489568256.0, "5135": 28489568256.0, "5140": 28489568256.0, "5145": 28489568256.0, "5150": 28489568256.0, "5155": 28489568256.0, "5160": 28489568256.0, "5165": 28489568256.0, "5170": 28489568256.0, "5175": 28489568256.0, "5180": 28489568256.0, "5185": 28489568256.0, "5190": 28489568256.0, "5195": 28489568256.0, "5200": 28489568256.0, "5205": 28489568256.0, "5210": 28489568256.0, "5215": 28489568256.0, "5220": 28489568256.0, "5225": 28489568256.0, "5230": 28489568256.0, "5235": 28489568256.0, "5240": 28489568256.0, "5245": 28489568256.0, "5250": 28489568256.0, "5255": 28489568256.0, "5260": 28489568256.0, "5265": 28489568256.0, "5270": 28489568256.0, "5275": 28489568256.0, "5280": 28489568256.0, "5285": 28489568256.0, "5290": 28489568256.0, "5295": 28489568256.0, "5300": 28489568256.0, "5305": 28489568256.0, "5310": 28489568256.0, "5315": 28489568256.0, "5320": 28489568256.0, "5325": 28489568256.0, "5330": 28489568256.0, "5335": 28489568256.0, "5340": 28489568256.0, "5345": 28489568256.0, "5350": 28489568256.0, "5355": 28489568256.0, "5360": 28489568256.0, "5365": 28489568256.0, "5370": 28489568256.0, "5375": 28489568256.0, "5380": 28489568256.0, "5385": 28489568256.0, "5390": 28489568256.0, "5395": 28489568256.0, "5400": 28489568256.0, "5405": 28489568256.0, "5410": 28489568256.0, "5415": 28489568256.0, "5420": 28489568256.0, "5425": 28489568256.0, "5430": 28489568256.0, "5435": 28489568256.0, "5440": 28489568256.0, "5445": 28489568256.0, "5450": 28489568256.0, "5455": 28489568256.0, "5460": 28489568256.0, "5465": 28489568256.0, "5470": 28489568256.0, "5475": 28489568256.0, "5480": 28489568256.0, "5485": 28489568256.0, "5490": 28489568256.0, "5495": 28489568256.0, "5500": 28489568256.0, "5505": 28489568256.0, "5510": 28489568256.0, "5515": 28489568256.0, "5520": 28489568256.0, "5525": 28489568256.0, "5530": 28489568256.0, "5535": 28489568256.0, "5540": 28489568256.0, "5545": 28489568256.0, "5550": 28489568256.0, "5555": 28489568256.0, "5560": 28489568256.0, "5565": 28489568256.0, "5570": 28489568256.0, "5575": 28489568256.0, "5580": 28489568256.0, "5585": 28489568256.0, "5590": 28489568256.0, "5595": 28489568256.0, "5600": 28489568256.0, "5605": 28489568256.0, "5610": 28489568256.0, "5615": 28489568256.0, "5620": 28489568256.0, "5625": 28489568256.0, "5630": 28489568256.0, "5635": 28489568256.0, "5640": 28489568256.0, "5645": 28489568256.0, "5650": 28489568256.0, "5655": 28489568256.0, "5660": 28489568256.0, "5665": 28489568256.0, "5670": 28489568256.0, "5675": 28489568256.0, "5680": 28489568256.0, "5685": 28489568256.0, "5690": 28489568256.0, "5695": 28489568256.0, "5700": 28489568256.0, "5705": 28489568256.0, "5710": 28489568256.0, "5715": 28489568256.0, "5720": 28489568256.0, "5725": 28489568256.0, "5730": 28489568256.0, "5735": 28489568256.0, "5740": 28489568256.0, "5745": 28489568256.0, "5750": 28489568256.0, "5755": 28489568256.0, "5760": 28489568256.0, "5765": 28489568256.0, "5770": 28489568256.0, "5775": 28489568256.0, "5780": 28489568256.0, "5785": 28489568256.0, "5790": 28489568256.0, "5795": 28489568256.0, "5800": 28489568256.0, "5805": 28489568256.0, "5810": 28489568256.0, "5815": 28489568256.0, "5820": 28489568256.0, "5825": 28489568256.0, "5830": 28489568256.0, "5835": 28489568256.0, "5840": 28489568256.0, "5845": 28489568256.0, "5850": 28489568256.0, "5855": 28489568256.0, "5860": 28489568256.0, "5865": 28489568256.0, "5870": 28489568256.0, "5875": 28489568256.0, "5880": 28489568256.0, "5885": 28489568256.0, "5890": 28489568256.0, "5895": 28489568256.0, "5900": 28489568256.0, "5905": 28489568256.0, "5910": 28489568256.0, "5915": 28489568256.0, "5920": 28489568256.0, "5925": 28489568256.0, "5930": 28489568256.0, "5935": 28489568256.0, "5940": 28489568256.0, "5945": 28489568256.0, "5950": 28489568256.0, "5955": 28489568256.0, "5960": 28489568256.0, "5965": 28489568256.0, "5970": 28489568256.0, "5975": 28489568256.0, "5980": 28489568256.0, "5985": 28489568256.0, "5990": 28489568256.0, "5995": 28489568256.0, "6000": 28489568256.0, "6005": 28489568256.0, "6010": 28489568256.0, "6015": 28489568256.0, "6020": 28489568256.0, "6025": 28489568256.0, "6030": 28489568256.0, "6035": 28489568256.0, "6040": 28489568256.0, "6045": 28489568256.0, "6050": 28489568256.0, "6055": 28489568256.0, "6060": 28489568256.0, "6065": 28489568256.0, "6070": 28489568256.0, "6075": 28489568256.0, "6080": 28489568256.0, "6085": 28489568256.0, "6090": 28489568256.0, "6095": 28489568256.0, "6100": 28489568256.0, "6105": 28489568256.0, "6110": 28489568256.0, "6115": 28489568256.0, "6120": 28489568256.0, "6125": 28489568256.0, "6130": 28489568256.0, "6135": 28489568256.0, "6140": 28489568256.0, "6145": 28489568256.0, "6150": 28489568256.0, "6155": 28489568256.0, "6160": 28489568256.0, "6165": 28489568256.0, "6170": 28489568256.0, "6175": 28489568256.0, "6180": 28489568256.0, "6185": 28489568256.0, "6190": 28489568256.0, "6195": 28489568256.0, "6200": 28489568256.0, "6205": 28489568256.0, "6210": 28489568256.0, "6215": 28489568256.0, "6220": 28489568256.0, "6225": 28489568256.0, "6230": 28489568256.0, "6235": 28489568256.0, "6240": 28489568256.0, "6245": 28489568256.0, "6250": 28489568256.0, "6255": 28489568256.0, "6260": 28489568256.0, "6265": 28489568256.0, "6270": 28489568256.0, "6275": 28489568256.0, "6280": 28489568256.0, "6285": 28489568256.0, "6290": 28489568256.0, "6295": 28489568256.0, "6300": 28489568256.0, "6305": 28489568256.0, "6310": 28489568256.0, "6315": 28489568256.0, "6320": 28489568256.0, "6325": 28489568256.0, "6330": 28489568256.0, "6335": 28489568256.0, "6340": 28489568256.0, "6345": 28489568256.0, "6350": 28489568256.0, "6355": 28489568256.0, "6360": 28489568256.0, "6365": 28489568256.0, "6370": 28489568256.0, "6375": 28489568256.0, "6380": 28489568256.0, "6385": 28489568256.0, "6390": 28489568256.0, "6395": 28489568256.0, "6400": 28489568256.0, "6405": 28489568256.0, "6410": 28489568256.0, "6415": 28489568256.0, "6420": 28489568256.0, "6425": 28489568256.0, "6430": 28489568256.0, "6435": 28489568256.0, "6440": 28489568256.0, "6445": 28489568256.0, "6450": 28489568256.0, "6455": 28489568256.0, "6460": 28489568256.0, "6465": 28489568256.0, "6470": 28489568256.0, "6475": 28489568256.0, "6480": 28489568256.0, "6485": 28489568256.0, "6490": 28489568256.0, "6495": 28489568256.0, "6500": 28489568256.0, "6505": 28489568256.0, "6510": 28489568256.0, "6515": 28489568256.0, "6520": 28489568256.0, "6525": 28489568256.0, "6530": 28489568256.0, "6535": 28489568256.0, "6540": 28489568256.0, "6545": 28489568256.0, "6550": 28489568256.0, "6555": 28489568256.0, "6560": 28489568256.0, "6565": 28489568256.0, "6570": 28489568256.0, "6575": 28489568256.0, "6580": 28489568256.0, "6585": 28489568256.0, "6590": 28489568256.0, "6595": 28489568256.0, "6600": 28489568256.0, "6605": 28489568256.0, "6610": 28489568256.0, "6615": 28489568256.0, "6620": 28489568256.0, "6625": 28489568256.0, "6630": 28489568256.0, "6635": 28489568256.0, "6640": 28489568256.0, "6645": 28489568256.0, "6650": 28489568256.0, "6655": 28489568256.0, "6660": 28489568256.0, "6665": 28489568256.0, "6670": 28489568256.0, "6675": 28489568256.0, "6680": 28489568256.0, "6685": 28489568256.0, "6690": 28489568256.0, "6695": 28489568256.0, "6700": 28489568256.0, "6705": 28489568256.0, "6710": 28489568256.0, "6715": 28489568256.0, "6720": 28489568256.0, "6725": 28489568256.0, "6730": 28489568256.0, "6735": 28489568256.0, "6740": 28489568256.0, "6745": 28489568256.0, "6750": 28489568256.0, "6755": 28489568256.0, "6760": 28489568256.0, "6765": 28489568256.0, "6770": 28489568256.0, "6775": 28489568256.0, "6780": 28489568256.0, "6785": 28489568256.0, "6790": 28489568256.0, "6795": 28489568256.0, "6800": 28489568256.0, "6805": 28489568256.0, "6810": 28489568256.0, "6815": 28489568256.0, "6820": 28489568256.0, "6825": 28489568256.0, "6830": 28489568256.0, "6835": 28489568256.0, "6840": 28489568256.0, "6845": 28489568256.0, "6850": 28489568256.0, "6855": 28489568256.0, "6860": 28489568256.0, "6865": 28489568256.0, "6870": 28489568256.0, "6875": 28489568256.0, "6880": 28489568256.0, "6885": 28489568256.0, "6890": 28489568256.0, "6895": 28489568256.0, "6900": 28489568256.0, "6905": 28489568256.0, "6910": 28489568256.0, "6915": 28489568256.0, "6920": 28489568256.0, "6925": 28489568256.0, "6930": 28489568256.0, "6935": 28489568256.0, "6940": 28489568256.0, "6945": 28489568256.0, "6950": 28489568256.0, "6955": 28489568256.0, "6960": 28489568256.0, "6965": 28489568256.0, "6970": 28489568256.0, "6975": 28489568256.0, "6980": 28489568256.0, "6985": 28489568256.0, "6990": 28489568256.0, "6995": 28489568256.0, "7000": 28489568256.0, "7005": 28489568256.0, "7010": 28489568256.0, "7015": 28489568256.0, "7020": 28489568256.0, "7025": 28489568256.0, "7030": 28489568256.0, "7035": 28489568256.0, "7040": 28489568256.0, "7045": 28489568256.0, "7050": 28489568256.0, "7055": 28489568256.0, "7060": 28489568256.0, "7065": 28489568256.0, "7070": 28489568256.0, "7075": 28489568256.0, "7080": 28489568256.0, "7085": 28489568256.0, "7090": 28489568256.0, "7095": 28489568256.0, "7100": 28489568256.0, "7105": 28489568256.0, "7110": 28489568256.0, "7115": 28489568256.0, "7120": 28489568256.0, "7125": 28489568256.0, "7130": 28489568256.0, "7135": 28489568256.0, "7140": 28489568256.0, "7145": 28489568256.0, "7150": 28489568256.0, "7155": 28489568256.0, "7160": 28489568256.0, "7165": 28489568256.0, "7170": 28489568256.0, "7175": 28489568256.0, "7180": 28489568256.0, "7185": 28489568256.0, "7190": 28489568256.0, "7195": 28489568256.0, "7200": 28489568256.0, "7205": 28489568256.0, "7210": 28489568256.0, "7215": 28489568256.0, "7220": 28489568256.0, "7225": 28489568256.0, "7230": 28489568256.0, "7235": 28489568256.0, "7240": 28489568256.0, "7245": 28489568256.0, "7250": 28489568256.0, "7255": 28489568256.0, "7260": 28489568256.0, "7265": 28489568256.0, "7270": 28489568256.0, "7275": 28489568256.0, "7280": 28489568256.0, "7285": 28489568256.0, "7290": 28489568256.0, "7295": 28489568256.0, "7300": 28489568256.0, "7305": 28489568256.0, "7310": 28489568256.0, "7315": 28489568256.0, "7320": 28489568256.0, "7325": 28489568256.0, "7330": 28489568256.0, "7335": 28489568256.0, "7340": 28489568256.0, "7345": 28489568256.0, "7350": 28489568256.0, "7355": 28489568256.0, "7360": 28489568256.0, "7365": 28489568256.0, "7370": 28489568256.0, "7375": 28489568256.0, "7380": 28489568256.0, "7385": 28489568256.0, "7390": 28489568256.0, "7395": 28489568256.0, "7400": 28489568256.0, "7405": 28489568256.0, "7410": 28489568256.0, "7415": 28489568256.0, "7420": 28489568256.0, "7425": 28489568256.0, "7430": 28489568256.0, "7435": 28489568256.0, "7440": 28489568256.0, "7445": 28489568256.0, "7450": 28489568256.0, "7455": 28489568256.0, "7460": 28489568256.0, "7465": 28489568256.0, "7470": 28489568256.0, "7475": 28489568256.0, "7480": 28489568256.0, "7485": 28489568256.0, "7490": 28489568256.0, "7495": 28489568256.0, "7500": 28489568256.0, "7505": 28489568256.0, "7510": 28489568256.0, "7515": 28489568256.0, "7520": 28489568256.0, "7525": 28489568256.0, "7530": 28489568256.0, "7535": 28489568256.0, "7540": 28489568256.0, "7545": 28489568256.0, "7550": 28489568256.0, "7555": 28489568256.0, "7560": 28489568256.0, "7565": 28489568256.0, "7570": 28489568256.0, "7575": 28489568256.0, "7580": 28489568256.0, "7585": 28489568256.0, "7590": 28489568256.0, "7595": 28489568256.0, "7600": 28489568256.0, "7605": 28489568256.0, "7610": 28489568256.0, "7615": 28489568256.0, "7620": 28489568256.0, "7625": 28489568256.0, "7630": 28489568256.0, "7635": 28489568256.0, "7640": 28489568256.0, "7645": 28489568256.0, "7650": 28489568256.0, "7655": 28489568256.0, "7660": 28489568256.0, "7665": 28489568256.0, "7670": 28489568256.0, "7675": 28489568256.0, "7680": 28489568256.0, "7685": 28489568256.0, "7690": 28489568256.0, "7695": 28489568256.0, "7700": 28489568256.0, "7705": 28489568256.0, "7710": 28489568256.0, "7715": 28489568256.0, "7720": 28489568256.0, "7725": 28489568256.0, "7730": 28489568256.0, "7735": 28489568256.0, "7740": 28489568256.0, "7745": 28489568256.0, "7750": 28489568256.0, "7755": 28489568256.0, "7760": 28489568256.0, "7765": 28489568256.0, "7770": 28489568256.0, "7775": 28489568256.0, "7780": 28489568256.0, "7785": 28489568256.0, "7790": 28489568256.0, "7795": 28489568256.0, "7800": 28489568256.0, "7805": 28489568256.0, "7810": 28489568256.0, "7815": 28489568256.0, "7820": 28489568256.0, "7825": 28489568256.0, "7830": 28489568256.0, "7835": 28489568256.0, "7840": 28489568256.0, "7845": 28489568256.0, "7850": 28489568256.0, "7855": 28489568256.0, "7860": 28489568256.0, "7865": 28489568256.0, "7870": 28489568256.0, "7875": 28489568256.0, "7880": 28489568256.0, "7885": 28489568256.0, "7890": 28489568256.0, "7895": 28489568256.0, "7900": 28489568256.0, "7905": 28489568256.0, "7910": 28489568256.0, "7915": 28489568256.0, "7920": 28489568256.0, "7925": 28489568256.0, "7930": 28489568256.0, "7935": 28489568256.0, "7940": 28489568256.0, "7945": 28489568256.0, "7950": 28489568256.0, "7955": 28489568256.0, "7960": 28489568256.0, "7965": 28489568256.0, "7970": 28489568256.0, "7975": 28489568256.0, "7980": 28489568256.0, "7985": 28489568256.0, "7990": 28489568256.0, "7995": 28489568256.0, "8000": 28489568256.0, "8005": 28489568256.0, "8010": 28489568256.0, "8015": 28489568256.0, "8020": 28489568256.0, "8025": 28489568256.0, "8030": 28489568256.0, "8035": 28489568256.0, "8040": 28489568256.0, "8045": 28489568256.0, "8050": 28489568256.0, "8055": 28489568256.0, "8060": 28489568256.0, "8065": 28489568256.0, "8070": 28489568256.0, "8075": 28489568256.0, "8080": 28489568256.0, "8085": 28489568256.0, "8090": 28489568256.0, "8095": 28489568256.0, "8100": 28489568256.0, "8105": 28489568256.0, "8110": 28489568256.0, "8115": 28489568256.0, "8120": 28489568256.0, "8125": 28489568256.0, "8130": 28489568256.0, "8135": 28489568256.0, "8140": 28489568256.0, "8145": 28489568256.0, "8150": 28489568256.0, "8155": 28489568256.0, "8160": 28489568256.0, "8165": 28489568256.0, "8170": 28489568256.0, "8175": 28489568256.0, "8180": 28489568256.0, "8185": 28489568256.0, "8190": 28489568256.0, "8195": 28489568256.0, "8200": 28489568256.0, "8205": 28489568256.0, "8210": 28489568256.0, "8215": 28489568256.0, "8220": 28489568256.0, "8225": 28489568256.0, "8230": 28489568256.0, "8235": 28489568256.0, "8240": 28489568256.0, "8245": 28489568256.0, "8250": 28489568256.0, "8255": 28489568256.0, "8260": 28489568256.0, "8265": 28489568256.0, "8270": 28489568256.0, "8275": 28489568256.0, "8280": 28489568256.0, "8285": 28489568256.0, "8290": 28489568256.0, "8295": 28489568256.0, "8300": 28489568256.0, "8305": 28489568256.0, "8310": 28489568256.0, "8315": 28489568256.0, "8320": 28489568256.0, "8325": 28489568256.0, "8330": 28489568256.0, "8335": 28489568256.0, "8340": 28489568256.0, "8345": 28489568256.0, "8350": 28489568256.0, "8355": 28489568256.0, "8360": 28489568256.0, "8365": 28489568256.0, "8370": 28489568256.0, "8375": 28489568256.0, "8380": 28489568256.0, "8385": 28489568256.0, "8390": 28489568256.0, "8395": 28489568256.0, "8400": 28489568256.0, "8405": 28489568256.0, "8410": 28489568256.0, "8415": 28489568256.0, "8420": 28489568256.0, "8425": 28489568256.0, "8430": 28489568256.0, "8435": 28489568256.0, "8440": 28489568256.0, "8445": 28489568256.0, "8450": 28489568256.0, "8455": 28489568256.0, "8460": 28489568256.0, "8465": 28489568256.0, "8470": 28489568256.0, "8475": 28489568256.0, "8480": 28489568256.0, "8485": 28489568256.0, "8490": 28489568256.0, "8495": 28489568256.0, "8500": 28489568256.0, "8505": 28489568256.0, "8510": 28489568256.0, "8515": 28489568256.0, "8520": 28489568256.0, "8525": 28489568256.0, "8530": 28489568256.0, "8535": 28489568256.0, "8540": 28489568256.0, "8545": 28489568256.0, "8550": 28489568256.0, "8555": 28489568256.0, "8560": 28489568256.0, "8565": 28489568256.0, "8570": 28489568256.0, "8575": 28489568256.0, "8580": 28489568256.0, "8585": 28489568256.0, "8590": 28489568256.0, "8595": 28489568256.0, "8600": 28489568256.0, "8605": 28489568256.0, "8610": 28489568256.0, "8615": 28489568256.0, "8620": 28489568256.0, "8625": 28489568256.0, "8630": 28489568256.0, "8635": 28489568256.0, "8640": 28489568256.0, "8645": 28489568256.0, "8650": 28489568256.0, "8655": 28489568256.0, "8660": 28489568256.0, "8665": 28489568256.0, "8670": 28489568256.0, "8675": 28489568256.0, "8680": 28489568256.0, "8685": 28489568256.0, "8690": 28489568256.0, "8695": 28489568256.0, "8700": 28489568256.0, "8705": 28489568256.0, "8710": 28489568256.0, "8715": 28489568256.0, "8720": 28489568256.0, "8725": 28489568256.0, "8730": 28489568256.0, "8735": 28489568256.0, "8740": 28489568256.0, "8745": 28489568256.0, "8750": 28489568256.0, "8755": 28489568256.0, "8760": 28489568256.0, "8765": 28489568256.0, "8770": 28489568256.0, "8775": 28489568256.0, "8780": 28489568256.0, "8785": 28489568256.0, "8790": 28489568256.0, "8795": 28489568256.0, "8800": 28489568256.0, "8805": 28489568256.0, "8810": 28489568256.0, "8815": 28489568256.0, "8820": 28489568256.0, "8825": 28489568256.0, "8830": 28489568256.0, "8835": 28489568256.0, "8840": 28489568256.0, "8845": 28489568256.0, "8850": 28489568256.0, "8855": 28489568256.0, "8860": 28489568256.0, "8865": 28489568256.0, "8870": 28489568256.0, "8875": 28489568256.0, "8880": 28489568256.0, "8885": 28489568256.0, "8890": 28489568256.0, "8895": 28489568256.0, "8900": 28489568256.0, "8905": 28489568256.0, "8910": 28489568256.0, "8915": 28489568256.0, "8920": 28489568256.0, "8925": 28489568256.0, "8930": 28489568256.0, "8935": 28489568256.0, "8940": 28489568256.0, "8945": 28489568256.0, "8950": 28489568256.0, "8955": 28489568256.0, "8960": 28489568256.0, "8965": 28489568256.0, "8970": 28489568256.0, "8975": 28489568256.0, "8980": 28489568256.0, "8985": 28489568256.0, "8990": 28489568256.0, "8995": 28489568256.0, "9000": 28489568256.0, "9005": 28489568256.0, "9010": 28489568256.0, "9015": 28489568256.0, "9020": 28489568256.0, "9025": 28489568256.0, "9030": 28489568256.0, "9035": 28489568256.0, "9040": 28489568256.0, "9045": 28489568256.0, "9050": 28489568256.0, "9055": 28489568256.0, "9060": 28489568256.0, "9065": 28489568256.0, "9070": 28489568256.0, "9075": 28489568256.0, "9080": 28489568256.0, "9085": 28489568256.0, "9090": 28489568256.0, "9095": 28489568256.0, "9100": 28489568256.0, "9105": 28489568256.0, "9110": 28489568256.0, "9115": 28489568256.0, "9120": 28489568256.0, "9125": 28489568256.0, "9130": 28489568256.0, "9135": 28489568256.0, "9140": 28489568256.0, "9145": 28489568256.0, "9150": 28489568256.0, "9155": 28489568256.0, "9160": 28489568256.0, "9165": 28489568256.0, "9170": 28489568256.0, "9175": 28489568256.0, "9180": 28489568256.0, "9185": 28489568256.0, "9190": 28489568256.0, "9195": 28489568256.0, "9200": 28489568256.0, "9205": 28489568256.0, "9210": 28489568256.0, "9215": 28489568256.0, "9220": 28489568256.0, "9225": 28489568256.0, "9230": 28489568256.0, "9235": 28489568256.0, "9240": 28489568256.0, "9245": 28489568256.0, "9250": 28489568256.0, "9255": 28489568256.0, "9260": 28489568256.0, "9265": 28489568256.0, "9270": 28489568256.0, "9275": 28489568256.0, "9280": 28489568256.0, "9285": 28489568256.0, "9290": 28489568256.0, "9295": 28489568256.0, "9300": 28489568256.0, "9305": 28489568256.0, "9310": 28489568256.0, "9315": 28489568256.0, "9320": 28489568256.0, "9325": 28489568256.0, "9330": 28489568256.0, "9335": 28489568256.0, "9340": 28489568256.0, "9345": 28489568256.0, "9350": 28489568256.0, "9355": 28489568256.0, "9360": 28489568256.0, "9365": 28489568256.0, "9370": 28489568256.0, "9375": 28489568256.0, "9380": 28489568256.0, "9385": 28489568256.0, "9390": 28489568256.0, "9395": 28489568256.0, "9400": 28489568256.0, "9405": 28489568256.0, "9410": 28489568256.0, "9415": 28489568256.0, "9420": 28489568256.0, "9425": 28489568256.0, "9430": 28489568256.0, "9435": 28489568256.0, "9440": 28489568256.0, "9445": 28489568256.0, "9450": 28489568256.0, "9455": 28489568256.0, "9460": 28489568256.0, "9465": 28489568256.0, "9470": 28489568256.0, "9475": 28489568256.0, "9480": 28489568256.0, "9485": 28489568256.0, "9490": 28489568256.0, "9495": 28489568256.0, "9500": 28489568256.0, "9505": 28489568256.0, "9510": 28489568256.0, "9515": 28489568256.0, "9520": 28489568256.0, "9525": 28489568256.0, "9530": 28489568256.0, "9535": 28489568256.0, "9540": 28489568256.0, "9545": 28489568256.0, "9550": 28489568256.0, "9555": 28489568256.0, "9560": 28489568256.0, "9565": 28489568256.0, "9570": 28489568256.0, "9575": 28489568256.0, "9580": 28489568256.0, "9585": 28489568256.0, "9590": 28489568256.0, "9595": 28489568256.0, "9600": 28489568256.0, "9605": 28489568256.0, "9610": 28489568256.0, "9615": 28489568256.0, "9620": 28489568256.0, "9625": 28489568256.0, "9630": 28489568256.0, "9635": 28489568256.0, "9640": 28489568256.0, "9645": 28489568256.0, "9650": 28489568256.0, "9655": 28489568256.0, "9660": 28489568256.0, "9665": 28489568256.0, "9670": 28489568256.0, "9675": 28489568256.0, "9680": 28489568256.0, "9685": 28489568256.0, "9690": 28489568256.0, "9695": 28489568256.0, "9700": 28489568256.0, "9705": 28489568256.0, "9710": 28489568256.0, "9715": 28489568256.0, "9720": 28489568256.0, "9725": 28489568256.0, "9730": 28489568256.0, "9735": 28489568256.0, "9740": 28489568256.0, "9745": 28489568256.0, "9750": 28489568256.0, "9755": 28489568256.0, "9760": 28489568256.0, "9765": 28489568256.0, "9770": 28489568256.0, "9775": 28489568256.0, "9780": 28489568256.0, "9785": 28489568256.0, "9790": 28489568256.0, "9795": 28489568256.0, "9800": 28489568256.0, "9805": 28489568256.0, "9810": 28489568256.0, "9815": 28489568256.0, "9820": 28489568256.0, "9825": 28489568256.0, "9830": 28489568256.0, "9835": 28489568256.0, "9840": 28489568256.0, "9845": 28489568256.0, "9850": 28489568256.0, "9855": 28489568256.0, "9860": 28489568256.0, "9865": 28489568256.0, "9870": 28489568256.0, "9875": 28489568256.0, "9880": 28489568256.0, "9885": 28489568256.0, "9890": 28489568256.0, "9895": 28489568256.0, "9900": 28489568256.0, "9905": 28489568256.0, "9910": 28489568256.0, "9915": 28489568256.0, "9920": 28489568256.0, "9925": 28489568256.0, "9930": 28489568256.0, "9935": 28489568256.0, "9940": 28489568256.0, "9945": 28489568256.0, "9950": 28489568256.0, "9955": 28489568256.0, "9960": 28489568256.0, "9965": 28489568256.0, "9970": 28489568256.0, "9975": 28489568256.0, "9980": 28489568256.0, "9985": 28489568256.0, "9990": 28489568256.0, "9995": 28489568256.0, "10000": 28489568256.0, "10005": 28489568256.0, "10010": 28489568256.0, "10015": 28489568256.0, "10020": 28489568256.0, "10025": 28489568256.0, "10030": 28489568256.0, "10035": 28489568256.0, "10040": 28489568256.0, "10045": 28489568256.0, "10050": 28489568256.0, "10055": 28489568256.0, "10060": 28489568256.0, "10065": 28489568256.0, "10070": 28489568256.0, "10075": 28489568256.0, "10080": 28489568256.0, "10085": 28489568256.0, "10090": 28489568256.0, "10095": 28489568256.0, "10100": 28489568256.0, "10105": 28489568256.0, "10110": 28489568256.0, "10115": 28489568256.0, "10120": 28489568256.0, "10125": 28489568256.0, "10130": 28489568256.0, "10135": 28489568256.0, "10140": 28489568256.0, "10145": 28489568256.0, "10150": 28489568256.0, "10155": 28489568256.0, "10160": 28489568256.0, "10165": 28489568256.0, "10170": 28489568256.0, "10175": 28489568256.0, "10180": 28489568256.0, "10185": 28489568256.0, "10190": 28489568256.0, "10195": 28489568256.0, "10200": 28489568256.0, "10205": 28489568256.0, "10210": 28489568256.0, "10215": 28489568256.0, "10220": 28489568256.0, "10225": 28489568256.0, "10230": 28489568256.0, "10235": 28489568256.0, "10240": 28489568256.0, "10245": 28489568256.0, "10250": 28489568256.0, "10255": 28489568256.0, "10260": 28489568256.0, "10265": 28489568256.0, "10270": 28489568256.0, "10275": 28489568256.0, "10280": 28489568256.0, "10285": 28489568256.0, "10290": 28489568256.0, "10295": 28489568256.0, "10300": 28489568256.0, "10305": 28489568256.0, "10310": 28489568256.0, "10315": 28489568256.0, "10320": 28489568256.0, "10325": 28489568256.0, "10330": 28489568256.0, "10335": 28489568256.0, "10340": 28489568256.0, "10345": 28489568256.0, "10350": 28489568256.0, "10355": 28489568256.0, "10360": 28489568256.0, "10365": 28489568256.0, "10370": 28489568256.0, "10375": 28489568256.0, "10380": 28489568256.0, "10385": 28489568256.0, "10390": 28489568256.0, "10395": 28489568256.0, "10400": 28489568256.0, "10405": 28489568256.0, "10410": 28489568256.0, "10415": 28489568256.0, "10420": 28489568256.0, "10425": 28489568256.0, "10430": 28489568256.0, "10435": 28489568256.0, "10440": 28489568256.0, "10445": 28489568256.0, "10450": 28489568256.0, "10455": 28489568256.0, "10460": 28489568256.0, "10465": 28489568256.0, "10470": 28489568256.0, "10475": 28489568256.0, "10480": 28489568256.0, "10485": 28489568256.0, "10490": 28489568256.0, "10495": 28489568256.0, "10500": 28489568256.0, "10505": 28489568256.0, "10510": 28489568256.0, "10515": 28489568256.0, "10520": 28489568256.0, "10525": 28489568256.0, "10530": 28489568256.0, "10535": 28489568256.0, "10540": 28489568256.0, "10545": 28489568256.0, "10550": 28489568256.0, "10555": 28489568256.0, "10560": 28489568256.0, "10565": 28489568256.0, "10570": 28489568256.0, "10575": 28489568256.0, "10580": 28489568256.0, "10585": 28489568256.0, "10590": 28489568256.0, "10595": 28489568256.0, "10600": 28489568256.0, "10605": 28489568256.0, "10610": 28489568256.0, "10615": 28489568256.0, "10620": 28489568256.0, "10625": 28489568256.0, "10630": 28489568256.0, "10635": 28489568256.0, "10640": 28489568256.0, "10645": 28489568256.0, "10650": 28489568256.0, "10655": 28489568256.0, "10660": 28489568256.0, "10665": 28489568256.0, "10670": 28489568256.0, "10675": 28489568256.0, "10680": 28489568256.0, "10685": 28489568256.0, "10690": 28489568256.0, "10695": 28489568256.0, "10700": 28489568256.0, "10705": 28489568256.0, "10710": 28489568256.0, "10715": 28489568256.0, "10720": 28489568256.0, "10725": 28489568256.0, "10730": 28489568256.0, "10735": 28489568256.0, "10740": 28489568256.0, "10745": 28489568256.0, "10750": 28489568256.0, "10755": 28489568256.0, "10760": 28489568256.0, "10765": 28489568256.0, "10770": 28489568256.0, "10775": 28489568256.0, "10780": 28489568256.0, "10785": 28489568256.0, "10790": 28489568256.0, "10795": 28489568256.0, "10800": 28489568256.0, "10805": 28489568256.0, "10810": 28489568256.0, "10815": 28489568256.0, "10820": 28489568256.0, "10825": 28489568256.0, "10830": 28489568256.0, "10835": 28489568256.0, "10840": 28489568256.0, "10845": 28489568256.0, "10850": 28489568256.0, "10855": 28489568256.0, "10860": 28489568256.0, "10865": 28489568256.0, "10870": 28489568256.0, "10875": 28489568256.0, "10880": 28489568256.0, "10885": 28489568256.0, "10890": 28489568256.0, "10895": 28489568256.0, "10900": 28489568256.0, "10905": 28489568256.0, "10910": 28489568256.0, "10915": 28489568256.0, "10920": 28489568256.0, "10925": 28489568256.0, "10930": 28489568256.0, "10935": 28489568256.0, "10940": 28489568256.0, "10945": 28489568256.0, "10950": 28489568256.0, "10955": 28489568256.0, "10960": 28489568256.0, "10965": 28489568256.0, "10970": 28489568256.0, "10975": 28489568256.0, "10980": 28489568256.0, "10985": 28489568256.0, "10990": 28489568256.0, "10995": 28489568256.0, "11000": 28489568256.0, "11005": 28489568256.0, "11010": 28489568256.0, "11015": 28489568256.0, "11020": 28489568256.0, "11025": 28489568256.0, "11030": 28489568256.0, "11035": 28489568256.0, "11040": 28489568256.0, "11045": 28489568256.0, "11050": 28489568256.0, "11055": 28489568256.0, "11060": 28489568256.0, "11065": 28489568256.0, "11070": 28489568256.0, "11075": 28489568256.0, "11080": 28489568256.0, "11085": 28489568256.0, "11090": 28489568256.0, "11095": 28489568256.0, "11100": 28489568256.0, "11105": 28489568256.0, "11110": 28489568256.0, "11115": 28489568256.0, "11120": 28489568256.0, "11125": 28489568256.0, "11130": 28489568256.0, "11135": 28489568256.0, "11140": 28489568256.0, "11145": 28489568256.0, "11150": 28489568256.0, "11155": 28489568256.0, "11160": 28489568256.0, "11165": 28489568256.0, "11170": 28489568256.0, "11175": 28489568256.0, "11180": 28489568256.0, "11185": 28489568256.0, "11190": 28489568256.0, "11195": 28489568256.0, "11200": 28489568256.0, "11205": 28489568256.0, "11210": 28489568256.0, "11215": 28489568256.0, "11220": 28489568256.0, "11225": 28489568256.0, "11230": 28489568256.0, "11235": 28489568256.0, "11240": 28489568256.0, "11245": 28489568256.0, "11250": 28489568256.0, "11255": 28489568256.0, "11260": 28489568256.0, "11265": 28489568256.0, "11270": 28489568256.0, "11275": 28489568256.0, "11280": 28489568256.0, "11285": 28489568256.0, "11290": 28489568256.0, "11295": 28489568256.0, "11300": 28489568256.0, "11305": 28489568256.0, "11310": 28489568256.0, "11315": 28489568256.0, "11320": 28489568256.0, "11325": 28489568256.0, "11330": 28489568256.0, "11335": 28489568256.0, "11340": 28489568256.0, "11345": 28489568256.0, "11350": 28489568256.0, "11355": 28489568256.0, "11360": 28489568256.0, "11365": 28489568256.0, "11370": 28489568256.0, "11375": 28489568256.0, "11380": 28489568256.0, "11385": 28489568256.0, "11390": 28489568256.0, "11395": 28489568256.0, "11400": 28489568256.0, "11405": 28489568256.0, "11410": 28489568256.0, "11415": 28489568256.0, "11420": 28489568256.0, "11425": 28489568256.0, "11430": 28489568256.0, "11435": 28489568256.0, "11440": 28489568256.0, "11445": 28489568256.0, "11450": 28489568256.0, "11455": 28489568256.0, "11460": 28489568256.0, "11465": 28489568256.0, "11470": 28489568256.0, "11475": 28489568256.0, "11480": 28489568256.0, "11485": 28489568256.0, "11490": 28489568256.0, "11495": 28489568256.0, "11500": 28489568256.0, "11505": 28489568256.0, "11510": 28489568256.0, "11515": 28489568256.0, "11520": 28489568256.0, "11525": 28489568256.0, "11530": 28489568256.0, "11535": 28489568256.0, "11540": 28489568256.0, "11545": 28489568256.0, "11550": 28489568256.0, "11555": 28489568256.0, "11560": 28489568256.0, "11565": 28489568256.0, "11570": 28489568256.0, "11575": 28489568256.0, "11580": 28489568256.0, "11585": 28489568256.0, "11590": 28489568256.0, "11595": 28489568256.0, "11600": 28489568256.0, "11605": 28489568256.0, "11610": 28489568256.0, "11615": 28489568256.0, "11620": 28489568256.0, "11625": 28489568256.0, "11630": 28489568256.0, "11635": 28489568256.0, "11640": 28489568256.0, "11645": 28489568256.0, "11650": 28489568256.0, "11655": 28489568256.0, "11660": 28489568256.0, "11665": 28489568256.0, "11670": 28489568256.0, "11675": 28489568256.0, "11680": 28489568256.0, "11685": 28489568256.0, "11690": 28489568256.0, "11695": 28489568256.0, "11700": 28489568256.0, "11705": 28489568256.0, "11710": 28489568256.0, "11715": 28489568256.0, "11720": 28489568256.0, "11725": 28489568256.0, "11730": 28489568256.0, "11735": 28489568256.0, "11740": 28489568256.0, "11745": 28489568256.0, "11750": 28489568256.0, "11755": 28489568256.0, "11760": 28489568256.0, "11765": 28489568256.0, "11770": 28489568256.0, "11775": 28489568256.0, "11780": 28489568256.0, "11785": 28489568256.0, "11790": 28489568256.0, "11795": 28489568256.0, "11800": 28489568256.0, "11805": 28489568256.0, "11810": 28489568256.0, "11815": 28489568256.0, "11820": 28489568256.0, "11825": 28489568256.0, "11830": 28489568256.0, "11835": 28489568256.0, "11840": 28489568256.0, "11845": 28489568256.0, "11850": 28489568256.0, "11855": 28489568256.0, "11860": 28489568256.0, "11865": 28489568256.0, "11870": 28489568256.0, "11875": 28489568256.0, "11880": 28489568256.0, "11885": 28489568256.0, "11890": 28489568256.0, "11895": 28489568256.0, "11900": 28489568256.0, "11905": 28489568256.0, "11910": 28489568256.0, "11915": 28489568256.0, "11920": 28489568256.0, "11925": 28489568256.0, "11930": 28489568256.0, "11935": 28489568256.0, "11940": 28489568256.0, "11945": 28489568256.0, "11950": 28489568256.0, "11955": 28489568256.0, "11960": 28489568256.0, "11965": 28489568256.0, "11970": 28489568256.0, "11975": 28489568256.0, "11980": 28489568256.0, "11985": 28489568256.0, "11990": 28489568256.0, "11995": 28489568256.0, "12000": 28489568256.0, "12005": 28489568256.0, "12010": 28489568256.0, "12015": 28489568256.0, "12020": 28489568256.0, "12025": 28489568256.0, "12030": 28489568256.0, "12035": 28489568256.0, "12040": 28489568256.0, "12045": 28489568256.0, "12050": 28489568256.0, "12055": 28489568256.0, "12060": 28489568256.0, "12065": 28489568256.0, "12070": 28489568256.0, "12075": 28489568256.0, "12080": 28489568256.0, "12085": 28489568256.0, "12090": 28489568256.0, "12095": 28489568256.0, "12100": 28489568256.0, "12105": 28489568256.0, "12110": 28489568256.0, "12115": 28489568256.0, "12120": 28489568256.0, "12125": 28489568256.0, "12130": 28489568256.0, "12135": 28489568256.0, "12140": 28489568256.0, "12145": 28489568256.0, "12150": 28489568256.0, "12155": 28489568256.0, "12160": 28489568256.0, "12165": 28489568256.0, "12170": 28489568256.0, "12175": 28489568256.0, "12180": 28489568256.0, "12185": 28489568256.0, "12190": 28489568256.0, "12195": 28489568256.0, "12200": 28489568256.0, "12205": 28489568256.0, "12210": 28489568256.0, "12215": 28489568256.0, "12220": 28489568256.0, "12225": 28489568256.0, "12230": 28489568256.0, "12235": 28489568256.0, "12240": 28489568256.0, "12245": 28489568256.0, "12250": 28489568256.0, "12255": 28489568256.0, "12260": 28489568256.0, "12265": 28489568256.0, "12270": 28489568256.0, "12275": 28489568256.0, "12280": 28489568256.0, "12285": 28489568256.0, "12290": 28489568256.0, "12295": 28489568256.0, "12300": 28489568256.0, "12305": 28489568256.0, "12310": 28489568256.0, "12315": 28489568256.0, "12320": 28489568256.0, "12325": 28489568256.0, "12330": 28489568256.0, "12335": 28489568256.0, "12340": 28489568256.0, "12345": 28489568256.0, "12350": 28489568256.0, "12355": 28489568256.0, "12360": 28489568256.0, "12365": 28489568256.0, "12370": 28489568256.0, "12375": 28489568256.0, "12380": 28489568256.0, "12385": 28489568256.0, "12390": 28489568256.0, "12395": 28489568256.0, "12400": 28489568256.0, "12405": 28489568256.0, "12410": 28489568256.0, "12415": 28489568256.0, "12420": 28489568256.0, "12425": 28489568256.0, "12430": 28489568256.0, "12435": 28489568256.0, "12440": 28489568256.0, "12445": 28489568256.0, "12450": 28489568256.0, "12455": 28489568256.0, "12460": 28489568256.0, "12465": 28489568256.0, "12470": 28489568256.0, "12475": 28489568256.0, "12480": 28489568256.0, "12485": 28489568256.0, "12490": 28489568256.0, "12495": 28489568256.0, "12500": 28489568256.0, "12505": 28489568256.0, "12510": 28489568256.0, "12515": 28489568256.0, "12520": 28489568256.0, "12525": 28489568256.0, "12530": 28489568256.0, "12535": 28489568256.0, "12540": 28489568256.0, "12545": 28489568256.0, "12550": 28489568256.0, "12555": 28489568256.0, "12560": 28489568256.0, "12565": 28489568256.0, "12570": 28489568256.0, "12575": 28489568256.0, "12580": 28489568256.0, "12585": 28489568256.0, "12590": 28489568256.0, "12595": 28489568256.0, "12600": 28489568256.0, "12605": 28489568256.0, "12610": 28489568256.0, "12615": 28489568256.0, "12620": 28489568256.0, "12625": 28489568256.0, "12630": 28489568256.0, "12635": 28489568256.0, "12640": 28489568256.0, "12645": 28489568256.0, "12650": 28489568256.0, "12655": 28489568256.0, "12660": 28489568256.0, "12665": 28489568256.0, "12670": 28489568256.0, "12675": 28489568256.0, "12680": 28489568256.0, "12685": 28489568256.0, "12690": 28489568256.0, "12695": 28489568256.0, "12700": 28489568256.0, "12705": 28489568256.0, "12710": 28489568256.0, "12715": 28489568256.0, "12720": "nan", "12725": "nan", "12730": "nan", "12735": "nan", "12740": "nan", "12745": "nan", "12750": "nan", "12755": "nan", "12760": "nan", "12765": "nan", "12770": "nan", "12775": "nan", "12780": "nan", "12785": "nan", "12790": "nan", "12795": "nan", "12800": "nan", "12805": "nan", "12810": "nan", "12815": "nan", "12820": "nan", "12825": "nan", "12830": "nan", "12835": "nan", "12840": "nan", "12845": "nan", "12850": "nan", "12855": "nan", "12860": "nan", "12865": "nan", "12870": "nan", "12875": "nan", "12880": "nan", "12885": "nan", "12890": "nan", "12895": "nan", "12900": "nan", "12905": "nan", "12910": "nan", "12915": "nan", "12920": "nan", "12925": "nan", "12930": "nan", "12935": "nan", "12940": "nan", "12945": "nan", "12950": "nan", "12955": "nan", "12960": "nan", "12965": "nan", "12970": "nan", "12975": "nan", "12980": "nan", "12985": "nan", "12990": "nan", "12995": "nan", "13000": "nan"}}, "iteration-time": {"start_step": 1, "end_step": 13000, "step_interval": 5, "values": {"1": "nan", "5": "nan", "10": "nan", "15": "nan", "20": "nan", "25": "nan", "30": "nan", "35": "nan", "40": "nan", "45": "nan", "50": "nan", "55": "nan", "60": "nan", "65": "nan", "70": "nan", "75": "nan", "80": "nan", "85": "nan", "90": "nan", "95": "nan", "100": 3.5554, "105": "nan", "110": "nan", "115": "nan", "120": "nan", "125": "nan", "130": "nan", "135": "nan", "140": "nan", "145": "nan", "150": "nan", "155": "nan", "160": "nan", "165": "nan", "170": "nan", "175": "nan", "180": "nan", "185": "nan", "190": "nan", "195": "nan", "200": 3.47185, "205": "nan", "210": "nan", "215": "nan", "220": "nan", "225": "nan", "230": "nan", "235": "nan", "240": "nan", "245": "nan", "250": "nan", "255": "nan", "260": "nan", "265": "nan", "270": "nan", "275": "nan", "280": "nan", "285": "nan", "290": "nan", "295": "nan", "300": 3.45692, "305": "nan", "310": "nan", "315": "nan", "320": "nan", "325": "nan", "330": "nan", "335": "nan", "340": "nan", "345": "nan", "350": "nan", "355": "nan", "360": "nan", "365": "nan", "370": "nan", "375": "nan", "380": "nan", "385": "nan", "390": "nan", "395": "nan", "400": 3.45471, "405": "nan", "410": "nan", "415": "nan", "420": "nan", "425": "nan", "430": "nan", "435": "nan", "440": "nan", "445": "nan", "450": "nan", "455": "nan", "460": "nan", "465": "nan", "470": "nan", "475": "nan", "480": "nan", "485": "nan", "490": "nan", "495": "nan", "500": 3.45467, "505": "nan", "510": "nan", "515": "nan", "520": "nan", "525": "nan", "530": "nan", "535": "nan", "540": "nan", "545": "nan", "550": "nan", "555": "nan", "560": "nan", "565": "nan", "570": "nan", "575": "nan", "580": "nan", "585": "nan", "590": "nan", "595": "nan", "600": 3.4543, "605": "nan", "610": "nan", "615": "nan", "620": "nan", "625": "nan", "630": "nan", "635": "nan", "640": "nan", "645": "nan", "650": "nan", "655": "nan", "660": "nan", "665": "nan", "670": "nan", "675": "nan", "680": "nan", "685": "nan", "690": "nan", "695": "nan", "700": 3.45264, "705": "nan", "710": "nan", "715": "nan", "720": "nan", "725": "nan", "730": "nan", "735": "nan", "740": "nan", "745": "nan", "750": "nan", "755": "nan", "760": "nan", "765": "nan", "770": "nan", "775": "nan", "780": "nan", "785": "nan", "790": "nan", "795": "nan", "800": 3.45125, "805": "nan", "810": "nan", "815": "nan", "820": "nan", "825": "nan", "830": "nan", "835": "nan", "840": "nan", "845": "nan", "850": "nan", "855": "nan", "860": "nan", "865": "nan", "870": "nan", "875": "nan", "880": "nan", "885": "nan", "890": "nan", "895": "nan", "900": 3.44668, "905": "nan", "910": "nan", "915": "nan", "920": "nan", "925": "nan", "930": "nan", "935": "nan", "940": "nan", "945": "nan", "950": "nan", "955": "nan", "960": "nan", "965": "nan", "970": "nan", "975": "nan", "980": "nan", "985": "nan", "990": "nan", "995": "nan", "1000": 3.44035, "1005": "nan", "1010": "nan", "1015": "nan", "1020": "nan", "1025": "nan", "1030": "nan", "1035": "nan", "1040": "nan", "1045": "nan", "1050": "nan", "1055": "nan", "1060": "nan", "1065": "nan", "1070": "nan", "1075": "nan", "1080": "nan", "1085": "nan", "1090": "nan", "1095": "nan", "1100": 3.43442, "1105": "nan", "1110": "nan", "1115": "nan", "1120": "nan", "1125": "nan", "1130": "nan", "1135": "nan", "1140": "nan", "1145": "nan", "1150": "nan", "1155": "nan", "1160": "nan", "1165": "nan", "1170": "nan", "1175": "nan", "1180": "nan", "1185": "nan", "1190": "nan", "1195": "nan", "1200": 3.4306, "1205": "nan", "1210": "nan", "1215": "nan", "1220": "nan", "1225": "nan", "1230": "nan", "1235": "nan", "1240": "nan", "1245": "nan", "1250": "nan", "1255": "nan", "1260": "nan", "1265": "nan", "1270": "nan", "1275": "nan", "1280": "nan", "1285": "nan", "1290": "nan", "1295": "nan", "1300": 3.42464, "1305": "nan", "1310": "nan", "1315": "nan", "1320": "nan", "1325": "nan", "1330": "nan", "1335": "nan", "1340": "nan", "1345": "nan", "1350": "nan", "1355": "nan", "1360": "nan", "1365": "nan", "1370": "nan", "1375": "nan", "1380": "nan", "1385": "nan", "1390": "nan", "1395": "nan", "1400": 3.42155, "1405": "nan", "1410": "nan", "1415": "nan", "1420": "nan", "1425": "nan", "1430": "nan", "1435": "nan", "1440": "nan", "1445": "nan", "1450": "nan", "1455": "nan", "1460": "nan", "1465": "nan", "1470": "nan", "1475": "nan", "1480": "nan", "1485": "nan", "1490": "nan", "1495": "nan", "1500": 3.4201, "1505": "nan", "1510": "nan", "1515": "nan", "1520": "nan", "1525": "nan", "1530": "nan", "1535": "nan", "1540": "nan", "1545": "nan", "1550": "nan", "1555": "nan", "1560": "nan", "1565": "nan", "1570": "nan", "1575": "nan", "1580": "nan", "1585": "nan", "1590": "nan", "1595": "nan", "1600": 3.41703, "1605": "nan", "1610": "nan", "1615": "nan", "1620": "nan", "1625": "nan", "1630": "nan", "1635": "nan", "1640": "nan", "1645": "nan", "1650": "nan", "1655": "nan", "1660": "nan", "1665": "nan", "1670": "nan", "1675": "nan", "1680": "nan", "1685": "nan", "1690": "nan", "1695": "nan", "1700": 3.41482, "1705": "nan", "1710": "nan", "1715": "nan", "1720": "nan", "1725": "nan", "1730": "nan", "1735": "nan", "1740": "nan", "1745": "nan", "1750": "nan", "1755": "nan", "1760": "nan", "1765": "nan", "1770": "nan", "1775": "nan", "1780": "nan", "1785": "nan", "1790": "nan", "1795": "nan", "1800": 3.41352, "1805": "nan", "1810": "nan", "1815": "nan", "1820": "nan", "1825": "nan", "1830": "nan", "1835": "nan", "1840": "nan", "1845": "nan", "1850": "nan", "1855": "nan", "1860": "nan", "1865": "nan", "1870": "nan", "1875": "nan", "1880": "nan", "1885": "nan", "1890": "nan", "1895": "nan", "1900": 3.4128, "1905": "nan", "1910": "nan", "1915": "nan", "1920": "nan", "1925": "nan", "1930": "nan", "1935": "nan", "1940": "nan", "1945": "nan", "1950": "nan", "1955": "nan", "1960": "nan", "1965": "nan", "1970": "nan", "1975": "nan", "1980": "nan", "1985": "nan", "1990": "nan", "1995": "nan", "2000": 3.40994, "2005": "nan", "2010": "nan", "2015": "nan", "2020": "nan", "2025": "nan", "2030": "nan", "2035": "nan", "2040": "nan", "2045": "nan", "2050": "nan", "2055": "nan", "2060": "nan", "2065": "nan", "2070": "nan", "2075": "nan", "2080": "nan", "2085": "nan", "2090": "nan", "2095": "nan", "2100": 3.40878, "2105": "nan", "2110": "nan", "2115": "nan", "2120": "nan", "2125": "nan", "2130": "nan", "2135": "nan", "2140": "nan", "2145": "nan", "2150": "nan", "2155": "nan", "2160": "nan", "2165": "nan", "2170": "nan", "2175": "nan", "2180": "nan", "2185": "nan", "2190": "nan", "2195": "nan", "2200": 3.40862, "2205": "nan", "2210": "nan", "2215": "nan", "2220": "nan", "2225": "nan", "2230": "nan", "2235": "nan", "2240": "nan", "2245": "nan", "2250": "nan", "2255": "nan", "2260": "nan", "2265": "nan", "2270": "nan", "2275": "nan", "2280": "nan", "2285": "nan", "2290": "nan", "2295": "nan", "2300": 3.41459, "2305": "nan", "2310": "nan", "2315": "nan", "2320": "nan", "2325": "nan", "2330": "nan", "2335": "nan", "2340": "nan", "2345": "nan", "2350": "nan", "2355": "nan", "2360": "nan", "2365": "nan", "2370": "nan", "2375": "nan", "2380": "nan", "2385": "nan", "2390": "nan", "2395": "nan", "2400": 3.40662, "2405": "nan", "2410": "nan", "2415": "nan", "2420": "nan", "2425": "nan", "2430": "nan", "2435": "nan", "2440": "nan", "2445": "nan", "2450": "nan", "2455": "nan", "2460": "nan", "2465": "nan", "2470": "nan", "2475": "nan", "2480": "nan", "2485": "nan", "2490": "nan", "2495": "nan", "2500": 3.40543, "2505": "nan", "2510": "nan", "2515": "nan", "2520": "nan", "2525": "nan", "2530": "nan", "2535": "nan", "2540": "nan", "2545": "nan", "2550": "nan", "2555": "nan", "2560": "nan", "2565": "nan", "2570": "nan", "2575": "nan", "2580": "nan", "2585": "nan", "2590": "nan", "2595": "nan", "2600": 3.40484, "2605": "nan", "2610": "nan", "2615": "nan", "2620": "nan", "2625": "nan", "2630": "nan", "2635": "nan", "2640": "nan", "2645": "nan", "2650": "nan", "2655": "nan", "2660": "nan", "2665": "nan", "2670": "nan", "2675": "nan", "2680": "nan", "2685": "nan", "2690": "nan", "2695": "nan", "2700": 3.40448, "2705": "nan", "2710": "nan", "2715": "nan", "2720": "nan", "2725": "nan", "2730": "nan", "2735": "nan", "2740": "nan", "2745": "nan", "2750": "nan", "2755": "nan", "2760": "nan", "2765": "nan", "2770": "nan", "2775": "nan", "2780": "nan", "2785": "nan", "2790": "nan", "2795": "nan", "2800": 3.403, "2805": "nan", "2810": "nan", "2815": "nan", "2820": "nan", "2825": "nan", "2830": "nan", "2835": "nan", "2840": "nan", "2845": "nan", "2850": "nan", "2855": "nan", "2860": "nan", "2865": "nan", "2870": "nan", "2875": "nan", "2880": "nan", "2885": "nan", "2890": "nan", "2895": "nan", "2900": 3.40346, "2905": "nan", "2910": "nan", "2915": "nan", "2920": "nan", "2925": "nan", "2930": "nan", "2935": "nan", "2940": "nan", "2945": "nan", "2950": "nan", "2955": "nan", "2960": "nan", "2965": "nan", "2970": "nan", "2975": "nan", "2980": "nan", "2985": "nan", "2990": "nan", "2995": "nan", "3000": 3.4023, "3005": "nan", "3010": "nan", "3015": "nan", "3020": "nan", "3025": "nan", "3030": "nan", "3035": "nan", "3040": "nan", "3045": "nan", "3050": "nan", "3055": "nan", "3060": "nan", "3065": "nan", "3070": "nan", "3075": "nan", "3080": "nan", "3085": "nan", "3090": "nan", "3095": "nan", "3100": 3.40069, "3105": "nan", "3110": "nan", "3115": "nan", "3120": "nan", "3125": "nan", "3130": "nan", "3135": "nan", "3140": "nan", "3145": "nan", "3150": "nan", "3155": "nan", "3160": "nan", "3165": "nan", "3170": "nan", "3175": "nan", "3180": "nan", "3185": "nan", "3190": "nan", "3195": "nan", "3200": 3.40162, "3205": "nan", "3210": "nan", "3215": "nan", "3220": "nan", "3225": "nan", "3230": "nan", "3235": "nan", "3240": "nan", "3245": "nan", "3250": "nan", "3255": "nan", "3260": "nan", "3265": "nan", "3270": "nan", "3275": "nan", "3280": "nan", "3285": "nan", "3290": "nan", "3295": "nan", "3300": 3.40071, "3305": "nan", "3310": "nan", "3315": "nan", "3320": "nan", "3325": "nan", "3330": "nan", "3335": "nan", "3340": "nan", "3345": "nan", "3350": "nan", "3355": "nan", "3360": "nan", "3365": "nan", "3370": "nan", "3375": "nan", "3380": "nan", "3385": "nan", "3390": "nan", "3395": "nan", "3400": 3.40058, "3405": "nan", "3410": "nan", "3415": "nan", "3420": "nan", "3425": "nan", "3430": "nan", "3435": "nan", "3440": "nan", "3445": "nan", "3450": "nan", "3455": "nan", "3460": "nan", "3465": "nan", "3470": "nan", "3475": "nan", "3480": "nan", "3485": "nan", "3490": "nan", "3495": "nan", "3500": 3.39993, "3505": "nan", "3510": "nan", "3515": "nan", "3520": "nan", "3525": "nan", "3530": "nan", "3535": "nan", "3540": "nan", "3545": "nan", "3550": "nan", "3555": "nan", "3560": "nan", "3565": "nan", "3570": "nan", "3575": "nan", "3580": "nan", "3585": "nan", "3590": "nan", "3595": "nan", "3600": 3.62689, "3605": "nan", "3610": "nan", "3615": "nan", "3620": "nan", "3625": "nan", "3630": "nan", "3635": "nan", "3640": "nan", "3645": "nan", "3650": "nan", "3655": "nan", "3660": "nan", "3665": "nan", "3670": "nan", "3675": "nan", "3680": "nan", "3685": "nan", "3690": "nan", "3695": "nan", "3700": 3.40474, "3705": "nan", "3710": "nan", "3715": "nan", "3720": "nan", "3725": "nan", "3730": "nan", "3735": "nan", "3740": "nan", "3745": "nan", "3750": "nan", "3755": "nan", "3760": "nan", "3765": "nan", "3770": "nan", "3775": "nan", "3780": "nan", "3785": "nan", "3790": "nan", "3795": "nan", "3800": 3.40515, "3805": "nan", "3810": "nan", "3815": "nan", "3820": "nan", "3825": "nan", "3830": "nan", "3835": "nan", "3840": "nan", "3845": "nan", "3850": "nan", "3855": "nan", "3860": "nan", "3865": "nan", "3870": "nan", "3875": "nan", "3880": "nan", "3885": "nan", "3890": "nan", "3895": "nan", "3900": 3.40436, "3905": "nan", "3910": "nan", "3915": "nan", "3920": "nan", "3925": "nan", "3930": "nan", "3935": "nan", "3940": "nan", "3945": "nan", "3950": "nan", "3955": "nan", "3960": "nan", "3965": "nan", "3970": "nan", "3975": "nan", "3980": "nan", "3985": "nan", "3990": "nan", "3995": "nan", "4000": 3.40304, "4005": "nan", "4010": "nan", "4015": "nan", "4020": "nan", "4025": "nan", "4030": "nan", "4035": "nan", "4040": "nan", "4045": "nan", "4050": "nan", "4055": "nan", "4060": "nan", "4065": "nan", "4070": "nan", "4075": "nan", "4080": "nan", "4085": "nan", "4090": "nan", "4095": "nan", "4100": 3.40076, "4105": "nan", "4110": "nan", "4115": "nan", "4120": "nan", "4125": "nan", "4130": "nan", "4135": "nan", "4140": "nan", "4145": "nan", "4150": "nan", "4155": "nan", "4160": "nan", "4165": "nan", "4170": "nan", "4175": "nan", "4180": "nan", "4185": "nan", "4190": "nan", "4195": "nan", "4200": 3.40222, "4205": "nan", "4210": "nan", "4215": "nan", "4220": "nan", "4225": "nan", "4230": "nan", "4235": "nan", "4240": "nan", "4245": "nan", "4250": "nan", "4255": "nan", "4260": "nan", "4265": "nan", "4270": "nan", "4275": "nan", "4280": "nan", "4285": "nan", "4290": "nan", "4295": "nan", "4300": 3.40241, "4305": "nan", "4310": "nan", "4315": "nan", "4320": "nan", "4325": "nan", "4330": "nan", "4335": "nan", "4340": "nan", "4345": "nan", "4350": "nan", "4355": "nan", "4360": "nan", "4365": "nan", "4370": "nan", "4375": "nan", "4380": "nan", "4385": "nan", "4390": "nan", "4395": "nan", "4400": 3.40228, "4405": "nan", "4410": "nan", "4415": "nan", "4420": "nan", "4425": "nan", "4430": "nan", "4435": "nan", "4440": "nan", "4445": "nan", "4450": "nan", "4455": "nan", "4460": "nan", "4465": "nan", "4470": "nan", "4475": "nan", "4480": "nan", "4485": "nan", "4490": "nan", "4495": "nan", "4500": 3.40282, "4505": "nan", "4510": "nan", "4515": "nan", "4520": "nan", "4525": "nan", "4530": "nan", "4535": "nan", "4540": "nan", "4545": "nan", "4550": "nan", "4555": "nan", "4560": "nan", "4565": "nan", "4570": "nan", "4575": "nan", "4580": "nan", "4585": "nan", "4590": "nan", "4595": "nan", "4600": 3.40214, "4605": "nan", "4610": "nan", "4615": "nan", "4620": "nan", "4625": "nan", "4630": "nan", "4635": "nan", "4640": "nan", "4645": "nan", "4650": "nan", "4655": "nan", "4660": "nan", "4665": "nan", "4670": "nan", "4675": "nan", "4680": "nan", "4685": "nan", "4690": "nan", "4695": "nan", "4700": 3.40155, "4705": "nan", "4710": "nan", "4715": "nan", "4720": "nan", "4725": "nan", "4730": "nan", "4735": "nan", "4740": "nan", "4745": "nan", "4750": "nan", "4755": "nan", "4760": "nan", "4765": "nan", "4770": "nan", "4775": "nan", "4780": "nan", "4785": "nan", "4790": "nan", "4795": "nan", "4800": 3.4016, "4805": "nan", "4810": "nan", "4815": "nan", "4820": "nan", "4825": "nan", "4830": "nan", "4835": "nan", "4840": "nan", "4845": "nan", "4850": "nan", "4855": "nan", "4860": "nan", "4865": "nan", "4870": "nan", "4875": "nan", "4880": "nan", "4885": "nan", "4890": "nan", "4895": "nan", "4900": 3.40208, "4905": "nan", "4910": "nan", "4915": "nan", "4920": "nan", "4925": "nan", "4930": "nan", "4935": "nan", "4940": "nan", "4945": "nan", "4950": "nan", "4955": "nan", "4960": "nan", "4965": "nan", "4970": "nan", "4975": "nan", "4980": "nan", "4985": "nan", "4990": "nan", "4995": "nan", "5000": 3.40265, "5005": "nan", "5010": "nan", "5015": "nan", "5020": "nan", "5025": "nan", "5030": "nan", "5035": "nan", "5040": "nan", "5045": "nan", "5050": "nan", "5055": "nan", "5060": "nan", "5065": "nan", "5070": "nan", "5075": "nan", "5080": "nan", "5085": "nan", "5090": "nan", "5095": "nan", "5100": 3.3986, "5105": "nan", "5110": "nan", "5115": "nan", "5120": "nan", "5125": "nan", "5130": "nan", "5135": "nan", "5140": "nan", "5145": "nan", "5150": "nan", "5155": "nan", "5160": "nan", "5165": "nan", "5170": "nan", "5175": "nan", "5180": "nan", "5185": "nan", "5190": "nan", "5195": "nan", "5200": 3.39887, "5205": "nan", "5210": "nan", "5215": "nan", "5220": "nan", "5225": "nan", "5230": "nan", "5235": "nan", "5240": "nan", "5245": "nan", "5250": "nan", "5255": "nan", "5260": "nan", "5265": "nan", "5270": "nan", "5275": "nan", "5280": "nan", "5285": "nan", "5290": "nan", "5295": "nan", "5300": 3.3991, "5305": "nan", "5310": "nan", "5315": "nan", "5320": "nan", "5325": "nan", "5330": "nan", "5335": "nan", "5340": "nan", "5345": "nan", "5350": "nan", "5355": "nan", "5360": "nan", "5365": "nan", "5370": "nan", "5375": "nan", "5380": "nan", "5385": "nan", "5390": "nan", "5395": "nan", "5400": 3.40087, "5405": "nan", "5410": "nan", "5415": "nan", "5420": "nan", "5425": "nan", "5430": "nan", "5435": "nan", "5440": "nan", "5445": "nan", "5450": "nan", "5455": "nan", "5460": "nan", "5465": "nan", "5470": "nan", "5475": "nan", "5480": "nan", "5485": "nan", "5490": "nan", "5495": "nan", "5500": 3.40055, "5505": "nan", "5510": "nan", "5515": "nan", "5520": "nan", "5525": "nan", "5530": "nan", "5535": "nan", "5540": "nan", "5545": "nan", "5550": "nan", "5555": "nan", "5560": "nan", "5565": "nan", "5570": "nan", "5575": "nan", "5580": "nan", "5585": "nan", "5590": "nan", "5595": "nan", "5600": 3.40101, "5605": "nan", "5610": "nan", "5615": "nan", "5620": "nan", "5625": "nan", "5630": "nan", "5635": "nan", "5640": "nan", "5645": "nan", "5650": "nan", "5655": "nan", "5660": "nan", "5665": "nan", "5670": "nan", "5675": "nan", "5680": "nan", "5685": "nan", "5690": "nan", "5695": "nan", "5700": 3.4007, "5705": "nan", "5710": "nan", "5715": "nan", "5720": "nan", "5725": "nan", "5730": "nan", "5735": "nan", "5740": "nan", "5745": "nan", "5750": "nan", "5755": "nan", "5760": "nan", "5765": "nan", "5770": "nan", "5775": "nan", "5780": "nan", "5785": "nan", "5790": "nan", "5795": "nan", "5800": 3.40177, "5805": "nan", "5810": "nan", "5815": "nan", "5820": "nan", "5825": "nan", "5830": "nan", "5835": "nan", "5840": "nan", "5845": "nan", "5850": "nan", "5855": "nan", "5860": "nan", "5865": "nan", "5870": "nan", "5875": "nan", "5880": "nan", "5885": "nan", "5890": "nan", "5895": "nan", "5900": 3.40093, "5905": "nan", "5910": "nan", "5915": "nan", "5920": "nan", "5925": "nan", "5930": "nan", "5935": "nan", "5940": "nan", "5945": "nan", "5950": "nan", "5955": "nan", "5960": "nan", "5965": "nan", "5970": "nan", "5975": "nan", "5980": "nan", "5985": "nan", "5990": "nan", "5995": "nan", "6000": 3.40207, "6005": "nan", "6010": "nan", "6015": "nan", "6020": "nan", "6025": "nan", "6030": "nan", "6035": "nan", "6040": "nan", "6045": "nan", "6050": "nan", "6055": "nan", "6060": "nan", "6065": "nan", "6070": "nan", "6075": "nan", "6080": "nan", "6085": "nan", "6090": "nan", "6095": "nan", "6100": 3.40047, "6105": "nan", "6110": "nan", "6115": "nan", "6120": "nan", "6125": "nan", "6130": "nan", "6135": "nan", "6140": "nan", "6145": "nan", "6150": "nan", "6155": "nan", "6160": "nan", "6165": "nan", "6170": "nan", "6175": "nan", "6180": "nan", "6185": "nan", "6190": "nan", "6195": "nan", "6200": 3.40254, "6205": "nan", "6210": "nan", "6215": "nan", "6220": "nan", "6225": "nan", "6230": "nan", "6235": "nan", "6240": "nan", "6245": "nan", "6250": "nan", "6255": "nan", "6260": "nan", "6265": "nan", "6270": "nan", "6275": "nan", "6280": "nan", "6285": "nan", "6290": "nan", "6295": "nan", "6300": 3.4024, "6305": "nan", "6310": "nan", "6315": "nan", "6320": "nan", "6325": "nan", "6330": "nan", "6335": "nan", "6340": "nan", "6345": "nan", "6350": "nan", "6355": "nan", "6360": "nan", "6365": "nan", "6370": "nan", "6375": "nan", "6380": "nan", "6385": "nan", "6390": "nan", "6395": "nan", "6400": 3.40281, "6405": "nan", "6410": "nan", "6415": "nan", "6420": "nan", "6425": "nan", "6430": "nan", "6435": "nan", "6440": "nan", "6445": "nan", "6450": "nan", "6455": "nan", "6460": "nan", "6465": "nan", "6470": "nan", "6475": "nan", "6480": "nan", "6485": "nan", "6490": "nan", "6495": "nan", "6500": 3.40268, "6505": "nan", "6510": "nan", "6515": "nan", "6520": "nan", "6525": "nan", "6530": "nan", "6535": "nan", "6540": "nan", "6545": "nan", "6550": "nan", "6555": "nan", "6560": "nan", "6565": "nan", "6570": "nan", "6575": "nan", "6580": "nan", "6585": "nan", "6590": "nan", "6595": "nan", "6600": 3.40166, "6605": "nan", "6610": "nan", "6615": "nan", "6620": "nan", "6625": "nan", "6630": "nan", "6635": "nan", "6640": "nan", "6645": "nan", "6650": "nan", "6655": "nan", "6660": "nan", "6665": "nan", "6670": "nan", "6675": "nan", "6680": "nan", "6685": "nan", "6690": "nan", "6695": "nan", "6700": 3.40129, "6705": "nan", "6710": "nan", "6715": "nan", "6720": "nan", "6725": "nan", "6730": "nan", "6735": "nan", "6740": "nan", "6745": "nan", "6750": "nan", "6755": "nan", "6760": "nan", "6765": "nan", "6770": "nan", "6775": "nan", "6780": "nan", "6785": "nan", "6790": "nan", "6795": "nan", "6800": 3.40047, "6805": "nan", "6810": "nan", "6815": "nan", "6820": "nan", "6825": "nan", "6830": "nan", "6835": "nan", "6840": "nan", "6845": "nan", "6850": "nan", "6855": "nan", "6860": "nan", "6865": "nan", "6870": "nan", "6875": "nan", "6880": "nan", "6885": "nan", "6890": "nan", "6895": "nan", "6900": 3.40079, "6905": "nan", "6910": "nan", "6915": "nan", "6920": "nan", "6925": "nan", "6930": "nan", "6935": "nan", "6940": "nan", "6945": "nan", "6950": "nan", "6955": "nan", "6960": "nan", "6965": "nan", "6970": "nan", "6975": "nan", "6980": "nan", "6985": "nan", "6990": "nan", "6995": "nan", "7000": 3.40034, "7005": "nan", "7010": "nan", "7015": "nan", "7020": "nan", "7025": "nan", "7030": "nan", "7035": "nan", "7040": "nan", "7045": "nan", "7050": "nan", "7055": "nan", "7060": "nan", "7065": "nan", "7070": "nan", "7075": "nan", "7080": "nan", "7085": "nan", "7090": "nan", "7095": "nan", "7100": 3.39893, "7105": "nan", "7110": "nan", "7115": "nan", "7120": "nan", "7125": "nan", "7130": "nan", "7135": "nan", "7140": "nan", "7145": "nan", "7150": "nan", "7155": "nan", "7160": "nan", "7165": "nan", "7170": "nan", "7175": "nan", "7180": "nan", "7185": "nan", "7190": "nan", "7195": "nan", "7200": 3.40035, "7205": "nan", "7210": "nan", "7215": "nan", "7220": "nan", "7225": "nan", "7230": "nan", "7235": "nan", "7240": "nan", "7245": "nan", "7250": "nan", "7255": "nan", "7260": "nan", "7265": "nan", "7270": "nan", "7275": "nan", "7280": "nan", "7285": "nan", "7290": "nan", "7295": "nan", "7300": 3.39965, "7305": "nan", "7310": "nan", "7315": "nan", "7320": "nan", "7325": "nan", "7330": "nan", "7335": "nan", "7340": "nan", "7345": "nan", "7350": "nan", "7355": "nan", "7360": "nan", "7365": "nan", "7370": "nan", "7375": "nan", "7380": "nan", "7385": "nan", "7390": "nan", "7395": "nan", "7400": 3.40073, "7405": "nan", "7410": "nan", "7415": "nan", "7420": "nan", "7425": "nan", "7430": "nan", "7435": "nan", "7440": "nan", "7445": "nan", "7450": "nan", "7455": "nan", "7460": "nan", "7465": "nan", "7470": "nan", "7475": "nan", "7480": "nan", "7485": "nan", "7490": "nan", "7495": "nan", "7500": 3.40077, "7505": "nan", "7510": "nan", "7515": "nan", "7520": "nan", "7525": "nan", "7530": "nan", "7535": "nan", "7540": "nan", "7545": "nan", "7550": "nan", "7555": "nan", "7560": "nan", "7565": "nan", "7570": "nan", "7575": "nan", "7580": "nan", "7585": "nan", "7590": "nan", "7595": "nan", "7600": 3.39989, "7605": "nan", "7610": "nan", "7615": "nan", "7620": "nan", "7625": "nan", "7630": "nan", "7635": "nan", "7640": "nan", "7645": "nan", "7650": "nan", "7655": "nan", "7660": "nan", "7665": "nan", "7670": "nan", "7675": "nan", "7680": "nan", "7685": "nan", "7690": "nan", "7695": "nan", "7700": 3.3988, "7705": "nan", "7710": "nan", "7715": "nan", "7720": "nan", "7725": "nan", "7730": "nan", "7735": "nan", "7740": "nan", "7745": "nan", "7750": "nan", "7755": "nan", "7760": "nan", "7765": "nan", "7770": "nan", "7775": "nan", "7780": "nan", "7785": "nan", "7790": "nan", "7795": "nan", "7800": 3.3995, "7805": "nan", "7810": "nan", "7815": "nan", "7820": "nan", "7825": "nan", "7830": "nan", "7835": "nan", "7840": "nan", "7845": "nan", "7850": "nan", "7855": "nan", "7860": "nan", "7865": "nan", "7870": "nan", "7875": "nan", "7880": "nan", "7885": "nan", "7890": "nan", "7895": "nan", "7900": 3.39928, "7905": "nan", "7910": "nan", "7915": "nan", "7920": "nan", "7925": "nan", "7930": "nan", "7935": "nan", "7940": "nan", "7945": "nan", "7950": "nan", "7955": "nan", "7960": "nan", "7965": "nan", "7970": "nan", "7975": "nan", "7980": "nan", "7985": "nan", "7990": "nan", "7995": "nan", "8000": 3.39834, "8005": "nan", "8010": "nan", "8015": "nan", "8020": "nan", "8025": "nan", "8030": "nan", "8035": "nan", "8040": "nan", "8045": "nan", "8050": "nan", "8055": "nan", "8060": "nan", "8065": "nan", "8070": "nan", "8075": "nan", "8080": "nan", "8085": "nan", "8090": "nan", "8095": "nan", "8100": 3.39716, "8105": "nan", "8110": "nan", "8115": "nan", "8120": "nan", "8125": "nan", "8130": "nan", "8135": "nan", "8140": "nan", "8145": "nan", "8150": "nan", "8155": "nan", "8160": "nan", "8165": "nan", "8170": "nan", "8175": "nan", "8180": "nan", "8185": "nan", "8190": "nan", "8195": "nan", "8200": 3.39809, "8205": "nan", "8210": "nan", "8215": "nan", "8220": "nan", "8225": "nan", "8230": "nan", "8235": "nan", "8240": "nan", "8245": "nan", "8250": "nan", "8255": "nan", "8260": "nan", "8265": "nan", "8270": "nan", "8275": "nan", "8280": "nan", "8285": "nan", "8290": "nan", "8295": "nan", "8300": 3.39856, "8305": "nan", "8310": "nan", "8315": "nan", "8320": "nan", "8325": "nan", "8330": "nan", "8335": "nan", "8340": "nan", "8345": "nan", "8350": "nan", "8355": "nan", "8360": "nan", "8365": "nan", "8370": "nan", "8375": "nan", "8380": "nan", "8385": "nan", "8390": "nan", "8395": "nan", "8400": 3.39822, "8405": "nan", "8410": "nan", "8415": "nan", "8420": "nan", "8425": "nan", "8430": "nan", "8435": "nan", "8440": "nan", "8445": "nan", "8450": "nan", "8455": "nan", "8460": "nan", "8465": "nan", "8470": "nan", "8475": "nan", "8480": "nan", "8485": "nan", "8490": "nan", "8495": "nan", "8500": 3.39927, "8505": "nan", "8510": "nan", "8515": "nan", "8520": "nan", "8525": "nan", "8530": "nan", "8535": "nan", "8540": "nan", "8545": "nan", "8550": "nan", "8555": "nan", "8560": "nan", "8565": "nan", "8570": "nan", "8575": "nan", "8580": "nan", "8585": "nan", "8590": "nan", "8595": "nan", "8600": 3.40049, "8605": "nan", "8610": "nan", "8615": "nan", "8620": "nan", "8625": "nan", "8630": "nan", "8635": "nan", "8640": "nan", "8645": "nan", "8650": "nan", "8655": "nan", "8660": "nan", "8665": "nan", "8670": "nan", "8675": "nan", "8680": "nan", "8685": "nan", "8690": "nan", "8695": "nan", "8700": 3.39943, "8705": "nan", "8710": "nan", "8715": "nan", "8720": "nan", "8725": "nan", "8730": "nan", "8735": "nan", "8740": "nan", "8745": "nan", "8750": "nan", "8755": "nan", "8760": "nan", "8765": "nan", "8770": "nan", "8775": "nan", "8780": "nan", "8785": "nan", "8790": "nan", "8795": "nan", "8800": 3.3999, "8805": "nan", "8810": "nan", "8815": "nan", "8820": "nan", "8825": "nan", "8830": "nan", "8835": "nan", "8840": "nan", "8845": "nan", "8850": "nan", "8855": "nan", "8860": "nan", "8865": "nan", "8870": "nan", "8875": "nan", "8880": "nan", "8885": "nan", "8890": "nan", "8895": "nan", "8900": 3.39978, "8905": "nan", "8910": "nan", "8915": "nan", "8920": "nan", "8925": "nan", "8930": "nan", "8935": "nan", "8940": "nan", "8945": "nan", "8950": "nan", "8955": "nan", "8960": "nan", "8965": "nan", "8970": "nan", "8975": "nan", "8980": "nan", "8985": "nan", "8990": "nan", "8995": "nan", "9000": 3.40051, "9005": "nan", "9010": "nan", "9015": "nan", "9020": "nan", "9025": "nan", "9030": "nan", "9035": "nan", "9040": "nan", "9045": "nan", "9050": "nan", "9055": "nan", "9060": "nan", "9065": "nan", "9070": "nan", "9075": "nan", "9080": "nan", "9085": "nan", "9090": "nan", "9095": "nan", "9100": 3.39875, "9105": "nan", "9110": "nan", "9115": "nan", "9120": "nan", "9125": "nan", "9130": "nan", "9135": "nan", "9140": "nan", "9145": "nan", "9150": "nan", "9155": "nan", "9160": "nan", "9165": "nan", "9170": "nan", "9175": "nan", "9180": "nan", "9185": "nan", "9190": "nan", "9195": "nan", "9200": 3.39972, "9205": "nan", "9210": "nan", "9215": "nan", "9220": "nan", "9225": "nan", "9230": "nan", "9235": "nan", "9240": "nan", "9245": "nan", "9250": "nan", "9255": "nan", "9260": "nan", "9265": "nan", "9270": "nan", "9275": "nan", "9280": "nan", "9285": "nan", "9290": "nan", "9295": "nan", "9300": 3.40071, "9305": "nan", "9310": "nan", "9315": "nan", "9320": "nan", "9325": "nan", "9330": "nan", "9335": "nan", "9340": "nan", "9345": "nan", "9350": "nan", "9355": "nan", "9360": "nan", "9365": "nan", "9370": "nan", "9375": "nan", "9380": "nan", "9385": "nan", "9390": "nan", "9395": "nan", "9400": 3.40005, "9405": "nan", "9410": "nan", "9415": "nan", "9420": "nan", "9425": "nan", "9430": "nan", "9435": "nan", "9440": "nan", "9445": "nan", "9450": "nan", "9455": "nan", "9460": "nan", "9465": "nan", "9470": "nan", "9475": "nan", "9480": "nan", "9485": "nan", "9490": "nan", "9495": "nan", "9500": 3.39896, "9505": "nan", "9510": "nan", "9515": "nan", "9520": "nan", "9525": "nan", "9530": "nan", "9535": "nan", "9540": "nan", "9545": "nan", "9550": "nan", "9555": "nan", "9560": "nan", "9565": "nan", "9570": "nan", "9575": "nan", "9580": "nan", "9585": "nan", "9590": "nan", "9595": "nan", "9600": 3.39953, "9605": "nan", "9610": "nan", "9615": "nan", "9620": "nan", "9625": "nan", "9630": "nan", "9635": "nan", "9640": "nan", "9645": "nan", "9650": "nan", "9655": "nan", "9660": "nan", "9665": "nan", "9670": "nan", "9675": "nan", "9680": "nan", "9685": "nan", "9690": "nan", "9695": "nan", "9700": 3.39866, "9705": "nan", "9710": "nan", "9715": "nan", "9720": "nan", "9725": "nan", "9730": "nan", "9735": "nan", "9740": "nan", "9745": "nan", "9750": "nan", "9755": "nan", "9760": "nan", "9765": "nan", "9770": "nan", "9775": "nan", "9780": "nan", "9785": "nan", "9790": "nan", "9795": "nan", "9800": 3.40009, "9805": "nan", "9810": "nan", "9815": "nan", "9820": "nan", "9825": "nan", "9830": "nan", "9835": "nan", "9840": "nan", "9845": "nan", "9850": "nan", "9855": "nan", "9860": "nan", "9865": "nan", "9870": "nan", "9875": "nan", "9880": "nan", "9885": "nan", "9890": "nan", "9895": "nan", "9900": 3.39979, "9905": "nan", "9910": "nan", "9915": "nan", "9920": "nan", "9925": "nan", "9930": "nan", "9935": "nan", "9940": "nan", "9945": "nan", "9950": "nan", "9955": "nan", "9960": "nan", "9965": "nan", "9970": "nan", "9975": "nan", "9980": "nan", "9985": "nan", "9990": "nan", "9995": "nan", "10000": 3.3996, "10005": "nan", "10010": "nan", "10015": "nan", "10020": "nan", "10025": "nan", "10030": "nan", "10035": "nan", "10040": "nan", "10045": "nan", "10050": "nan", "10055": "nan", "10060": "nan", "10065": "nan", "10070": "nan", "10075": "nan", "10080": "nan", "10085": "nan", "10090": "nan", "10095": "nan", "10100": 3.39815, "10105": "nan", "10110": "nan", "10115": "nan", "10120": "nan", "10125": "nan", "10130": "nan", "10135": "nan", "10140": "nan", "10145": "nan", "10150": "nan", "10155": "nan", "10160": "nan", "10165": "nan", "10170": "nan", "10175": "nan", "10180": "nan", "10185": "nan", "10190": "nan", "10195": "nan", "10200": 3.3996, "10205": "nan", "10210": "nan", "10215": "nan", "10220": "nan", "10225": "nan", "10230": "nan", "10235": "nan", "10240": "nan", "10245": "nan", "10250": "nan", "10255": "nan", "10260": "nan", "10265": "nan", "10270": "nan", "10275": "nan", "10280": "nan", "10285": "nan", "10290": "nan", "10295": "nan", "10300": 3.40067, "10305": "nan", "10310": "nan", "10315": "nan", "10320": "nan", "10325": "nan", "10330": "nan", "10335": "nan", "10340": "nan", "10345": "nan", "10350": "nan", "10355": "nan", "10360": "nan", "10365": "nan", "10370": "nan", "10375": "nan", "10380": "nan", "10385": "nan", "10390": "nan", "10395": "nan", "10400": 3.39985, "10405": "nan", "10410": "nan", "10415": "nan", "10420": "nan", "10425": "nan", "10430": "nan", "10435": "nan", "10440": "nan", "10445": "nan", "10450": "nan", "10455": "nan", "10460": "nan", "10465": "nan", "10470": "nan", "10475": "nan", "10480": "nan", "10485": "nan", "10490": "nan", "10495": "nan", "10500": 3.39923, "10505": "nan", "10510": "nan", "10515": "nan", "10520": "nan", "10525": "nan", "10530": "nan", "10535": "nan", "10540": "nan", "10545": "nan", "10550": "nan", "10555": "nan", "10560": "nan", "10565": "nan", "10570": "nan", "10575": "nan", "10580": "nan", "10585": "nan", "10590": "nan", "10595": "nan", "10600": 3.40087, "10605": "nan", "10610": "nan", "10615": "nan", "10620": "nan", "10625": "nan", "10630": "nan", "10635": "nan", "10640": "nan", "10645": "nan", "10650": "nan", "10655": "nan", "10660": "nan", "10665": "nan", "10670": "nan", "10675": "nan", "10680": "nan", "10685": "nan", "10690": "nan", "10695": "nan", "10700": 3.40098, "10705": "nan", "10710": "nan", "10715": "nan", "10720": "nan", "10725": "nan", "10730": "nan", "10735": "nan", "10740": "nan", "10745": "nan", "10750": "nan", "10755": "nan", "10760": "nan", "10765": "nan", "10770": "nan", "10775": "nan", "10780": "nan", "10785": "nan", "10790": "nan", "10795": "nan", "10800": 3.39973, "10805": "nan", "10810": "nan", "10815": "nan", "10820": "nan", "10825": "nan", "10830": "nan", "10835": "nan", "10840": "nan", "10845": "nan", "10850": "nan", "10855": "nan", "10860": "nan", "10865": "nan", "10870": "nan", "10875": "nan", "10880": "nan", "10885": "nan", "10890": "nan", "10895": "nan", "10900": 3.66029, "10905": "nan", "10910": "nan", "10915": "nan", "10920": "nan", "10925": "nan", "10930": "nan", "10935": "nan", "10940": "nan", "10945": "nan", "10950": "nan", "10955": "nan", "10960": "nan", "10965": "nan", "10970": "nan", "10975": "nan", "10980": "nan", "10985": "nan", "10990": "nan", "10995": "nan", "11000": 3.38756, "11005": "nan", "11010": "nan", "11015": "nan", "11020": "nan", "11025": "nan", "11030": "nan", "11035": "nan", "11040": "nan", "11045": "nan", "11050": "nan", "11055": "nan", "11060": "nan", "11065": "nan", "11070": "nan", "11075": "nan", "11080": "nan", "11085": "nan", "11090": "nan", "11095": "nan", "11100": 3.38656, "11105": "nan", "11110": "nan", "11115": "nan", "11120": "nan", "11125": "nan", "11130": "nan", "11135": "nan", "11140": "nan", "11145": "nan", "11150": "nan", "11155": "nan", "11160": "nan", "11165": "nan", "11170": "nan", "11175": "nan", "11180": "nan", "11185": "nan", "11190": "nan", "11195": "nan", "11200": 3.38765, "11205": "nan", "11210": "nan", "11215": "nan", "11220": "nan", "11225": "nan", "11230": "nan", "11235": "nan", "11240": "nan", "11245": "nan", "11250": "nan", "11255": "nan", "11260": "nan", "11265": "nan", "11270": "nan", "11275": "nan", "11280": "nan", "11285": "nan", "11290": "nan", "11295": "nan", "11300": 3.38835, "11305": "nan", "11310": "nan", "11315": "nan", "11320": "nan", "11325": "nan", "11330": "nan", "11335": "nan", "11340": "nan", "11345": "nan", "11350": "nan", "11355": "nan", "11360": "nan", "11365": "nan", "11370": "nan", "11375": "nan", "11380": "nan", "11385": "nan", "11390": "nan", "11395": "nan", "11400": 3.38844, "11405": "nan", "11410": "nan", "11415": "nan", "11420": "nan", "11425": "nan", "11430": "nan", "11435": "nan", "11440": "nan", "11445": "nan", "11450": "nan", "11455": "nan", "11460": "nan", "11465": "nan", "11470": "nan", "11475": "nan", "11480": "nan", "11485": "nan", "11490": "nan", "11495": "nan", "11500": 3.38793, "11505": "nan", "11510": "nan", "11515": "nan", "11520": "nan", "11525": "nan", "11530": "nan", "11535": "nan", "11540": "nan", "11545": "nan", "11550": "nan", "11555": "nan", "11560": "nan", "11565": "nan", "11570": "nan", "11575": "nan", "11580": "nan", "11585": "nan", "11590": "nan", "11595": "nan", "11600": 3.38725, "11605": "nan", "11610": "nan", "11615": "nan", "11620": "nan", "11625": "nan", "11630": "nan", "11635": "nan", "11640": "nan", "11645": "nan", "11650": "nan", "11655": "nan", "11660": "nan", "11665": "nan", "11670": "nan", "11675": "nan", "11680": "nan", "11685": "nan", "11690": "nan", "11695": "nan", "11700": 3.38738, "11705": "nan", "11710": "nan", "11715": "nan", "11720": "nan", "11725": "nan", "11730": "nan", "11735": "nan", "11740": "nan", "11745": "nan", "11750": "nan", "11755": "nan", "11760": "nan", "11765": "nan", "11770": "nan", "11775": "nan", "11780": "nan", "11785": "nan", "11790": "nan", "11795": "nan", "11800": 3.38748, "11805": "nan", "11810": "nan", "11815": "nan", "11820": "nan", "11825": "nan", "11830": "nan", "11835": "nan", "11840": "nan", "11845": "nan", "11850": "nan", "11855": "nan", "11860": "nan", "11865": "nan", "11870": "nan", "11875": "nan", "11880": "nan", "11885": "nan", "11890": "nan", "11895": "nan", "11900": 3.38839, "11905": "nan", "11910": "nan", "11915": "nan", "11920": "nan", "11925": "nan", "11930": "nan", "11935": "nan", "11940": "nan", "11945": "nan", "11950": "nan", "11955": "nan", "11960": "nan", "11965": "nan", "11970": "nan", "11975": "nan", "11980": "nan", "11985": "nan", "11990": "nan", "11995": "nan", "12000": 3.38814, "12005": "nan", "12010": "nan", "12015": "nan", "12020": "nan", "12025": "nan", "12030": "nan", "12035": "nan", "12040": "nan", "12045": "nan", "12050": "nan", "12055": "nan", "12060": "nan", "12065": "nan", "12070": "nan", "12075": "nan", "12080": "nan", "12085": "nan", "12090": "nan", "12095": "nan", "12100": 3.38677, "12105": "nan", "12110": "nan", "12115": "nan", "12120": "nan", "12125": "nan", "12130": "nan", "12135": "nan", "12140": "nan", "12145": "nan", "12150": "nan", "12155": "nan", "12160": "nan", "12165": "nan", "12170": "nan", "12175": "nan", "12180": "nan", "12185": "nan", "12190": "nan", "12195": "nan", "12200": 3.38679, "12205": "nan", "12210": "nan", "12215": "nan", "12220": "nan", "12225": "nan", "12230": "nan", "12235": "nan", "12240": "nan", "12245": "nan", "12250": "nan", "12255": "nan", "12260": "nan", "12265": "nan", "12270": "nan", "12275": "nan", "12280": "nan", "12285": "nan", "12290": "nan", "12295": "nan", "12300": 3.38609, "12305": "nan", "12310": "nan", "12315": "nan", "12320": "nan", "12325": "nan", "12330": "nan", "12335": "nan", "12340": "nan", "12345": "nan", "12350": "nan", "12355": "nan", "12360": "nan", "12365": "nan", "12370": "nan", "12375": "nan", "12380": "nan", "12385": "nan", "12390": "nan", "12395": "nan", "12400": 3.38665, "12405": "nan", "12410": "nan", "12415": "nan", "12420": "nan", "12425": "nan", "12430": "nan", "12435": "nan", "12440": "nan", "12445": "nan", "12450": "nan", "12455": "nan", "12460": "nan", "12465": "nan", "12470": "nan", "12475": "nan", "12480": "nan", "12485": "nan", "12490": "nan", "12495": "nan", "12500": 3.38727, "12505": "nan", "12510": "nan", "12515": "nan", "12520": "nan", "12525": "nan", "12530": "nan", "12535": "nan", "12540": "nan", "12545": "nan", "12550": "nan", "12555": "nan", "12560": "nan", "12565": "nan", "12570": "nan", "12575": "nan", "12580": "nan", "12585": "nan", "12590": "nan", "12595": "nan", "12600": 3.38752, "12605": "nan", "12610": "nan", "12615": "nan", "12620": "nan", "12625": "nan", "12630": "nan", "12635": "nan", "12640": "nan", "12645": "nan", "12650": "nan", "12655": "nan", "12660": "nan", "12665": "nan", "12670": "nan", "12675": "nan", "12680": "nan", "12685": "nan", "12690": "nan", "12695": "nan", "12700": 3.38807, "12705": "nan", "12710": "nan", "12715": "nan", "12720": "nan", "12725": "nan", "12730": "nan", "12735": "nan", "12740": "nan", "12745": "nan", "12750": "nan", "12755": "nan", "12760": "nan", "12765": "nan", "12770": "nan", "12775": "nan", "12780": "nan", "12785": "nan", "12790": "nan", "12795": "nan", "12800": "nan", "12805": "nan", "12810": "nan", "12815": "nan", "12820": "nan", "12825": "nan", "12830": "nan", "12835": "nan", "12840": "nan", "12845": "nan", "12850": "nan", "12855": "nan", "12860": "nan", "12865": "nan", "12870": "nan", "12875": "nan", "12880": "nan", "12885": "nan", "12890": "nan", "12895": "nan", "12900": "nan", "12905": "nan", "12910": "nan", "12915": "nan", "12920": "nan", "12925": "nan", "12930": "nan", "12935": "nan", "12940": "nan", "12945": "nan", "12950": "nan", "12955": "nan", "12960": "nan", "12965": "nan", "12970": "nan", "12975": "nan", "12980": "nan", "12985": "nan", "12990": "nan", "12995": "nan", "13000": "nan"}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 10200,
+        "step_interval": 5,
+        "values": {
+            "1": 12.95636,
+            "5": 12.91493,
+            "10": 12.06068,
+            "15": 11.39907,
+            "20": 10.42782,
+            "25": 9.9771,
+            "30": 9.62861,
+            "35": 9.36625,
+            "40": 9.17252,
+            "45": 9.00247,
+            "50": 8.84227,
+            "55": 8.635,
+            "60": 8.61848,
+            "65": 8.51386,
+            "70": 8.39546,
+            "75": 8.26338,
+            "80": 8.09298,
+            "85": 8.09858,
+            "90": 7.94846,
+            "95": 7.8971,
+            "100": 7.80875,
+            "105": 7.65613,
+            "110": 7.56901,
+            "115": 7.48001,
+            "120": 7.41832,
+            "125": 7.47105,
+            "130": 7.28349,
+            "135": 7.29674,
+            "140": 7.25373,
+            "145": 7.06382,
+            "150": 7.15942,
+            "155": 7.00232,
+            "160": 6.92577,
+            "165": 6.89706,
+            "170": 6.81939,
+            "175": 6.86062,
+            "180": 6.81545,
+            "185": 6.72257,
+            "190": 6.65587,
+            "195": 6.58935,
+            "200": 6.6135,
+            "205": 6.63845,
+            "210": 6.53563,
+            "215": 6.51843,
+            "220": 6.51177,
+            "225": 6.50414,
+            "230": 6.50432,
+            "235": 6.46917,
+            "240": 6.38517,
+            "245": 6.3812,
+            "250": 6.33511,
+            "255": 6.449,
+            "260": 6.33771,
+            "265": 6.27428,
+            "270": 6.23222,
+            "275": 6.27245,
+            "280": 6.20586,
+            "285": 6.23286,
+            "290": 6.16479,
+            "295": 6.145,
+            "300": 6.10719,
+            "305": 6.0417,
+            "310": 6.07263,
+            "315": 6.07514,
+            "320": 5.98003,
+            "325": 5.92946,
+            "330": 5.98563,
+            "335": 6.03283,
+            "340": 5.95895,
+            "345": 5.93647,
+            "350": 5.92017,
+            "355": 5.8495,
+            "360": 5.85881,
+            "365": 5.8328,
+            "370": 5.79334,
+            "375": 5.83779,
+            "380": 5.83614,
+            "385": 5.78613,
+            "390": 5.7777,
+            "395": 5.67732,
+            "400": 5.65984,
+            "405": 5.66333,
+            "410": 5.66665,
+            "415": 5.72633,
+            "420": 5.66944,
+            "425": 5.68244,
+            "430": 5.63871,
+            "435": 5.56635,
+            "440": 5.61469,
+            "445": 5.54454,
+            "450": 5.59407,
+            "455": 5.50781,
+            "460": 5.50087,
+            "465": 5.58081,
+            "470": 5.56589,
+            "475": 5.50951,
+            "480": 5.47548,
+            "485": 5.50235,
+            "490": 5.46533,
+            "495": 5.45996,
+            "500": 5.40736,
+            "505": 5.36926,
+            "510": 5.4224,
+            "515": 5.41691,
+            "520": 5.43066,
+            "525": 5.30525,
+            "530": 5.34552,
+            "535": 5.31057,
+            "540": 5.3297,
+            "545": 5.39306,
+            "550": 5.36947,
+            "555": 5.19608,
+            "560": 5.32548,
+            "565": 5.28038,
+            "570": 5.27177,
+            "575": 5.29276,
+            "580": 5.21254,
+            "585": 5.21476,
+            "590": 5.21614,
+            "595": 5.21547,
+            "600": 5.23604,
+            "605": 5.22126,
+            "610": 5.19884,
+            "615": 5.15789,
+            "620": 5.19311,
+            "625": 5.18747,
+            "630": 5.12499,
+            "635": 5.10527,
+            "640": 5.08589,
+            "645": 5.09422,
+            "650": 5.13357,
+            "655": 5.10849,
+            "660": 5.04614,
+            "665": 5.06634,
+            "670": 5.03349,
+            "675": 4.99757,
+            "680": 4.98096,
+            "685": 4.9669,
+            "690": 4.99708,
+            "695": 4.94269,
+            "700": 4.94045,
+            "705": 4.89612,
+            "710": 4.94295,
+            "715": 4.87556,
+            "720": 4.83941,
+            "725": 4.79045,
+            "730": 4.83137,
+            "735": 4.78843,
+            "740": 4.815,
+            "745": 4.70979,
+            "750": 4.70037,
+            "755": 4.7934,
+            "760": 4.75129,
+            "765": 4.70711,
+            "770": 4.65835,
+            "775": 4.64785,
+            "780": 4.68267,
+            "785": 4.72822,
+            "790": 4.61917,
+            "795": 4.6209,
+            "800": 4.58307,
+            "805": 4.60223,
+            "810": 4.62062,
+            "815": 4.58748,
+            "820": 4.59547,
+            "825": 4.57325,
+            "830": 4.54908,
+            "835": 4.52106,
+            "840": 4.46661,
+            "845": 4.46341,
+            "850": 4.41993,
+            "855": 4.46928,
+            "860": 4.41115,
+            "865": 4.49884,
+            "870": 4.44657,
+            "875": 4.35033,
+            "880": 4.38974,
+            "885": 4.36025,
+            "890": 4.40871,
+            "895": 4.39389,
+            "900": 4.35994,
+            "905": 4.31426,
+            "910": 4.32151,
+            "915": 4.31741,
+            "920": 4.3601,
+            "925": 4.35556,
+            "930": 4.28626,
+            "935": 4.28016,
+            "940": 4.31823,
+            "945": 4.28675,
+            "950": 4.33146,
+            "955": 4.23865,
+            "960": 4.17329,
+            "965": 4.24662,
+            "970": 4.26306,
+            "975": 4.21677,
+            "980": 4.20264,
+            "985": 4.16318,
+            "990": 4.12123,
+            "995": 4.17995,
+            "1000": 4.2382,
+            "1005": 4.17489,
+            "1010": 4.15353,
+            "1015": 4.10781,
+            "1020": 4.14472,
+            "1025": 4.20371,
+            "1030": 4.09909,
+            "1035": 4.09186,
+            "1040": 4.09316,
+            "1045": 4.09014,
+            "1050": 4.11964,
+            "1055": 4.09834,
+            "1060": 4.09551,
+            "1065": 4.05398,
+            "1070": 4.04751,
+            "1075": 4.06679,
+            "1080": 4.05176,
+            "1085": 4.05513,
+            "1090": 4.0203,
+            "1095": 4.08006,
+            "1100": 4.05952,
+            "1105": 4.06174,
+            "1110": 4.04279,
+            "1115": 3.99444,
+            "1120": 4.00593,
+            "1125": 3.9937,
+            "1130": 4.0578,
+            "1135": 4.00553,
+            "1140": 3.98485,
+            "1145": 3.93503,
+            "1150": 4.02991,
+            "1155": 3.98774,
+            "1160": 3.94876,
+            "1165": 3.85343,
+            "1170": 3.91359,
+            "1175": 3.93274,
+            "1180": 3.93706,
+            "1185": 3.97637,
+            "1190": 3.91901,
+            "1195": 3.93409,
+            "1200": 3.87706,
+            "1205": 3.87883,
+            "1210": 3.98038,
+            "1215": 3.82721,
+            "1220": 3.86817,
+            "1225": 3.80347,
+            "1230": 3.89882,
+            "1235": 3.88541,
+            "1240": 3.85377,
+            "1245": 3.79592,
+            "1250": 3.81771,
+            "1255": 3.85764,
+            "1260": 3.88133,
+            "1265": 3.77832,
+            "1270": 3.86905,
+            "1275": 3.82128,
+            "1280": 3.81356,
+            "1285": 3.82855,
+            "1290": 3.84927,
+            "1295": 3.84003,
+            "1300": 3.8012,
+            "1305": 3.81011,
+            "1310": 3.8163,
+            "1315": 3.80504,
+            "1320": 3.81781,
+            "1325": 3.71094,
+            "1330": 3.78861,
+            "1335": 3.7482,
+            "1340": 3.74059,
+            "1345": 3.76409,
+            "1350": 3.72869,
+            "1355": 3.78241,
+            "1360": 3.72585,
+            "1365": 3.72505,
+            "1370": 3.74271,
+            "1375": 3.75098,
+            "1380": 3.7467,
+            "1385": 3.74016,
+            "1390": 3.66738,
+            "1395": 3.73768,
+            "1400": 3.73285,
+            "1405": 3.66947,
+            "1410": 3.65899,
+            "1415": 3.65272,
+            "1420": 3.70798,
+            "1425": 3.70326,
+            "1430": 3.67713,
+            "1435": 3.65521,
+            "1440": 3.62359,
+            "1445": 3.67462,
+            "1450": 3.66895,
+            "1455": 3.63408,
+            "1460": 3.65242,
+            "1465": 3.67233,
+            "1470": 3.61501,
+            "1475": 3.68638,
+            "1480": 3.65857,
+            "1485": 3.67278,
+            "1490": 3.63029,
+            "1495": 3.60394,
+            "1500": 3.65552,
+            "1505": 3.68609,
+            "1510": 3.55109,
+            "1515": 3.59939,
+            "1520": 3.62723,
+            "1525": 3.59192,
+            "1530": 3.59983,
+            "1535": 3.58568,
+            "1540": 3.61642,
+            "1545": 3.59448,
+            "1550": 3.56613,
+            "1555": 3.57416,
+            "1560": 3.60143,
+            "1565": 3.62419,
+            "1570": 3.59238,
+            "1575": 3.55103,
+            "1580": 3.61463,
+            "1585": 3.57681,
+            "1590": 3.46422,
+            "1595": 3.50863,
+            "1600": 3.49103,
+            "1605": 3.53794,
+            "1610": 3.56037,
+            "1615": 3.49817,
+            "1620": 3.52298,
+            "1625": 3.46961,
+            "1630": 3.49722,
+            "1635": 3.5368,
+            "1640": 3.52869,
+            "1645": 3.53266,
+            "1650": 3.47711,
+            "1655": 3.4673,
+            "1660": 3.51444,
+            "1665": 3.45262,
+            "1670": 3.51621,
+            "1675": 3.49635,
+            "1680": 3.47136,
+            "1685": 3.4794,
+            "1690": 3.47578,
+            "1695": 3.49278,
+            "1700": 3.47195,
+            "1705": 3.39545,
+            "1710": 3.48055,
+            "1715": 3.48126,
+            "1720": 3.42363,
+            "1725": 3.41313,
+            "1730": 3.41695,
+            "1735": 3.48191,
+            "1740": 3.45584,
+            "1745": 3.42798,
+            "1750": 3.39635,
+            "1755": 3.41903,
+            "1760": 3.36551,
+            "1765": 3.41537,
+            "1770": 3.44954,
+            "1775": 3.37407,
+            "1780": 3.43219,
+            "1785": 3.41375,
+            "1790": 3.38142,
+            "1795": 3.39439,
+            "1800": 3.33122,
+            "1805": 3.38289,
+            "1810": 3.33314,
+            "1815": 3.42456,
+            "1820": 3.41124,
+            "1825": 3.38737,
+            "1830": 3.321,
+            "1835": 3.42972,
+            "1840": 3.3964,
+            "1845": 3.43042,
+            "1850": 3.37212,
+            "1855": 3.36489,
+            "1860": 3.33899,
+            "1865": 3.38416,
+            "1870": 3.2974,
+            "1875": 3.43275,
+            "1880": 3.33858,
+            "1885": 3.34035,
+            "1890": 3.33182,
+            "1895": 3.38805,
+            "1900": 3.37148,
+            "1905": 3.29374,
+            "1910": 3.32697,
+            "1915": 3.30981,
+            "1920": 3.35061,
+            "1925": 3.32954,
+            "1930": 3.30997,
+            "1935": 3.29828,
+            "1940": 3.34822,
+            "1945": 3.26885,
+            "1950": 3.40411,
+            "1955": 3.29406,
+            "1960": 3.29982,
+            "1965": 3.26189,
+            "1970": 3.29411,
+            "1975": 3.32491,
+            "1980": 3.3304,
+            "1985": 3.23214,
+            "1990": 3.29106,
+            "1995": 3.27313,
+            "2000": 3.26868,
+            "2005": 3.24857,
+            "2010": 3.24793,
+            "2015": 3.21842,
+            "2020": 3.26421,
+            "2025": 3.27751,
+            "2030": 3.27857,
+            "2035": 3.27744,
+            "2040": 3.24719,
+            "2045": 3.23203,
+            "2050": 3.26826,
+            "2055": 3.31971,
+            "2060": 3.2822,
+            "2065": 3.22581,
+            "2070": 3.27883,
+            "2075": 3.24034,
+            "2080": 3.22343,
+            "2085": 3.29053,
+            "2090": 3.13251,
+            "2095": 3.29421,
+            "2100": 3.22448,
+            "2105": 3.18523,
+            "2110": 3.18724,
+            "2115": 3.21807,
+            "2120": 3.17889,
+            "2125": 3.2113,
+            "2130": 3.20286,
+            "2135": 3.26519,
+            "2140": 3.18758,
+            "2145": 3.19507,
+            "2150": 3.20824,
+            "2155": 3.22091,
+            "2160": 3.19528,
+            "2165": 3.35955,
+            "2170": 3.2585,
+            "2175": 3.17861,
+            "2180": 3.21668,
+            "2185": 3.24726,
+            "2190": 3.23016,
+            "2195": 3.14745,
+            "2200": 3.18543,
+            "2205": 3.16084,
+            "2210": 3.11745,
+            "2215": 3.1858,
+            "2220": 3.18572,
+            "2225": 3.17846,
+            "2230": 3.14655,
+            "2235": 3.17171,
+            "2240": 3.22575,
+            "2245": 3.17054,
+            "2250": 3.19236,
+            "2255": 3.12598,
+            "2260": 3.12323,
+            "2265": 3.2398,
+            "2270": 3.17957,
+            "2275": 3.14483,
+            "2280": 3.19429,
+            "2285": 3.17293,
+            "2290": 3.16102,
+            "2295": 3.19661,
+            "2300": 3.12642,
+            "2305": 3.15285,
+            "2310": 3.11359,
+            "2315": 3.05962,
+            "2320": 3.10881,
+            "2325": 3.15813,
+            "2330": 3.11558,
+            "2335": 3.1051,
+            "2340": 3.16096,
+            "2345": 3.12605,
+            "2350": 3.14231,
+            "2355": 3.14361,
+            "2360": 3.15245,
+            "2365": 3.0827,
+            "2370": 3.14312,
+            "2375": 3.14177,
+            "2380": 3.12494,
+            "2385": 3.06938,
+            "2390": 3.08808,
+            "2395": 3.09036,
+            "2400": 3.08116,
+            "2405": 3.08824,
+            "2410": 3.07707,
+            "2415": 3.08042,
+            "2420": 3.06917,
+            "2425": 3.06886,
+            "2430": 3.07357,
+            "2435": 3.05462,
+            "2440": 3.08959,
+            "2445": 3.05963,
+            "2450": 3.12048,
+            "2455": 3.15148,
+            "2460": 3.07551,
+            "2465": 3.06878,
+            "2470": 3.02726,
+            "2475": 3.06894,
+            "2480": 3.08459,
+            "2485": 3.04843,
+            "2490": 3.05287,
+            "2495": 3.06025,
+            "2500": 3.06094,
+            "2505": 3.12591,
+            "2510": 3.11813,
+            "2515": 3.06371,
+            "2520": 3.06863,
+            "2525": 3.01919,
+            "2530": 3.04918,
+            "2535": 3.0825,
+            "2540": 3.07124,
+            "2545": 3.04337,
+            "2550": 2.99371,
+            "2555": 3.0642,
+            "2560": 3.03432,
+            "2565": 3.10038,
+            "2570": 3.02333,
+            "2575": 3.04987,
+            "2580": 3.08227,
+            "2585": 3.01065,
+            "2590": 3.06011,
+            "2595": 3.00654,
+            "2600": 3.05996,
+            "2605": 3.04069,
+            "2610": 3.04466,
+            "2615": 3.05458,
+            "2620": 2.98698,
+            "2625": 3.00577,
+            "2630": 3.03121,
+            "2635": 3.0586,
+            "2640": 3.0119,
+            "2645": 3.04364,
+            "2650": 3.01519,
+            "2655": 2.9886,
+            "2660": 3.00648,
+            "2665": 3.03526,
+            "2670": 2.97842,
+            "2675": 2.95563,
+            "2680": 2.98899,
+            "2685": 3.00002,
+            "2690": 2.99684,
+            "2695": 2.98246,
+            "2700": 3.02031,
+            "2705": 2.97486,
+            "2710": 2.97993,
+            "2715": 2.95982,
+            "2720": 3.02327,
+            "2725": 2.98962,
+            "2730": 3.03754,
+            "2735": 3.04423,
+            "2740": 3.00496,
+            "2745": 3.03712,
+            "2750": 3.01402,
+            "2755": 2.9617,
+            "2760": 2.99646,
+            "2765": 3.00037,
+            "2770": 2.97044,
+            "2775": 2.98405,
+            "2780": 3.00503,
+            "2785": 2.94687,
+            "2790": 2.95285,
+            "2795": 2.9441,
+            "2800": 2.95258,
+            "2805": 2.93276,
+            "2810": 2.97571,
+            "2815": 2.95532,
+            "2820": 2.98101,
+            "2825": 3.00437,
+            "2830": 2.98552,
+            "2835": 2.91254,
+            "2840": 2.92325,
+            "2845": 2.95348,
+            "2850": 2.96382,
+            "2855": 2.96228,
+            "2860": 2.95491,
+            "2865": 2.91272,
+            "2870": 2.98144,
+            "2875": 2.91969,
+            "2880": 2.95539,
+            "2885": 2.9164,
+            "2890": 2.97968,
+            "2895": 2.9266,
+            "2900": 2.951,
+            "2905": 3.00319,
+            "2910": 2.93046,
+            "2915": 2.97319,
+            "2920": 2.95673,
+            "2925": 2.94019,
+            "2930": 2.94216,
+            "2935": 2.93167,
+            "2940": 2.92828,
+            "2945": 2.91138,
+            "2950": 2.97326,
+            "2955": 2.90742,
+            "2960": 2.96518,
+            "2965": 2.86466,
+            "2970": 2.95452,
+            "2975": 2.98998,
+            "2980": 2.93403,
+            "2985": 3.0293,
+            "2990": 2.94204,
+            "2995": 2.86476,
+            "3000": 2.92558,
+            "3005": 2.88291,
+            "3010": 2.93006,
+            "3015": 2.91195,
+            "3020": 2.91509,
+            "3025": 2.91667,
+            "3030": 2.91981,
+            "3035": 2.95562,
+            "3040": 2.91897,
+            "3045": 2.83026,
+            "3050": 2.89438,
+            "3055": 2.89872,
+            "3060": 2.92425,
+            "3065": 2.9202,
+            "3070": 2.87732,
+            "3075": 2.8659,
+            "3080": 2.91886,
+            "3085": 2.89913,
+            "3090": 2.91337,
+            "3095": 2.91795,
+            "3100": 2.86455,
+            "3105": 2.91991,
+            "3110": 2.89521,
+            "3115": 2.94229,
+            "3120": 2.9502,
+            "3125": 2.85442,
+            "3130": 2.92898,
+            "3135": 2.91391,
+            "3140": 2.87153,
+            "3145": 2.91673,
+            "3150": 2.84677,
+            "3155": 2.84841,
+            "3160": 2.84268,
+            "3165": 2.83843,
+            "3170": 2.88775,
+            "3175": 2.90031,
+            "3180": 2.85186,
+            "3185": 2.88764,
+            "3190": 2.90097,
+            "3195": 2.9244,
+            "3200": 2.91971,
+            "3205": 2.86321,
+            "3210": 2.87135,
+            "3215": 2.91131,
+            "3220": 2.86938,
+            "3225": 2.86623,
+            "3230": 2.81226,
+            "3235": 2.86736,
+            "3240": 2.8667,
+            "3245": 2.90142,
+            "3250": 2.85616,
+            "3255": 2.8457,
+            "3260": 2.85684,
+            "3265": 2.86296,
+            "3270": 2.83877,
+            "3275": 2.86179,
+            "3280": 2.7955,
+            "3285": 2.80704,
+            "3290": 2.8637,
+            "3295": 2.89374,
+            "3300": 2.86884,
+            "3305": 2.85627,
+            "3310": 2.85096,
+            "3315": 2.8031,
+            "3320": 2.81957,
+            "3325": 2.8231,
+            "3330": 2.82253,
+            "3335": 2.84275,
+            "3340": 2.82275,
+            "3345": 2.83801,
+            "3350": 2.88535,
+            "3355": 2.93845,
+            "3360": 2.8104,
+            "3365": 2.88218,
+            "3370": 2.84292,
+            "3375": 2.82821,
+            "3380": 2.83961,
+            "3385": 2.86845,
+            "3390": 2.85562,
+            "3395": 2.79888,
+            "3400": 2.7751,
+            "3405": 2.82035,
+            "3410": 2.83623,
+            "3415": 2.85155,
+            "3420": 2.82276,
+            "3425": 2.80723,
+            "3430": 2.82699,
+            "3435": 2.88405,
+            "3440": 2.81654,
+            "3445": 2.8666,
+            "3450": 2.81173,
+            "3455": 2.78937,
+            "3460": 2.81326,
+            "3465": 2.85496,
+            "3470": 2.83669,
+            "3475": 2.76818,
+            "3480": 2.83952,
+            "3485": 2.81857,
+            "3490": 2.88583,
+            "3495": 2.84954,
+            "3500": 2.8294,
+            "3505": 2.82724,
+            "3510": 2.81197,
+            "3515": 2.83002,
+            "3520": 2.77447,
+            "3525": 2.80315,
+            "3530": 2.84695,
+            "3535": 2.77826,
+            "3540": 2.83561,
+            "3545": 2.80697,
+            "3550": 2.79352,
+            "3555": 2.81372,
+            "3560": 2.81919,
+            "3565": 2.82503,
+            "3570": 2.80064,
+            "3575": 2.80282,
+            "3580": 2.81963,
+            "3585": 2.83113,
+            "3590": 2.82883,
+            "3595": 2.77456,
+            "3600": 2.74549,
+            "3605": 2.79356,
+            "3610": 2.85251,
+            "3615": 2.75065,
+            "3620": 2.80136,
+            "3625": 2.88028,
+            "3630": 2.77415,
+            "3635": 2.78563,
+            "3640": 2.78098,
+            "3645": 2.7678,
+            "3650": 2.80164,
+            "3655": 2.81848,
+            "3660": 2.76378,
+            "3665": 2.78241,
+            "3670": 2.76706,
+            "3675": 2.7699,
+            "3680": 2.80551,
+            "3685": 2.79657,
+            "3690": 2.7994,
+            "3695": 2.80976,
+            "3700": 2.78825,
+            "3705": 2.78652,
+            "3710": 2.75031,
+            "3715": 2.80152,
+            "3720": 2.79403,
+            "3725": 2.78917,
+            "3730": 2.83544,
+            "3735": 2.7951,
+            "3740": 2.75106,
+            "3745": 2.78944,
+            "3750": 2.80263,
+            "3755": 2.79078,
+            "3760": 2.75654,
+            "3765": 2.75784,
+            "3770": 2.76197,
+            "3775": 2.76756,
+            "3780": 2.75776,
+            "3785": 2.77592,
+            "3790": 2.73997,
+            "3795": 2.78753,
+            "3800": 2.79987,
+            "3805": 2.74849,
+            "3810": 2.80307,
+            "3815": 2.76119,
+            "3820": 2.78756,
+            "3825": 2.73454,
+            "3830": 2.7435,
+            "3835": 2.8103,
+            "3840": 2.72999,
+            "3845": 2.7159,
+            "3850": 2.77108,
+            "3855": 2.71808,
+            "3860": 2.80515,
+            "3865": 2.74932,
+            "3870": 2.77188,
+            "3875": 2.75713,
+            "3880": 2.78467,
+            "3885": 2.77955,
+            "3890": 2.74036,
+            "3895": 2.79847,
+            "3900": 2.7638,
+            "3905": 2.71774,
+            "3910": 2.74179,
+            "3915": 2.75467,
+            "3920": 2.79458,
+            "3925": 2.77793,
+            "3930": 2.7083,
+            "3935": 2.74319,
+            "3940": 2.74853,
+            "3945": 2.74033,
+            "3950": 2.71968,
+            "3955": 2.77484,
+            "3960": 2.76006,
+            "3965": 2.73678,
+            "3970": 2.75662,
+            "3975": 2.72787,
+            "3980": 2.7365,
+            "3985": 2.74465,
+            "3990": 2.69651,
+            "3995": 2.78349,
+            "4000": 2.73738,
+            "4005": 2.77091,
+            "4010": 2.71132,
+            "4015": 2.7268,
+            "4020": 2.7481,
+            "4025": 2.73123,
+            "4030": 2.6573,
+            "4035": 2.69717,
+            "4040": 2.75386,
+            "4045": 2.74786,
+            "4050": 2.78816,
+            "4055": 2.72171,
+            "4060": 2.71292,
+            "4065": 2.65324,
+            "4070": 2.80526,
+            "4075": 2.75436,
+            "4080": 2.71672,
+            "4085": 2.74687,
+            "4090": 2.67753,
+            "4095": 2.69318,
+            "4100": 2.71292,
+            "4105": 2.73681,
+            "4110": 2.73132,
+            "4115": 2.70148,
+            "4120": 2.72697,
+            "4125": 2.70056,
+            "4130": 2.69693,
+            "4135": 2.68794,
+            "4140": 2.68045,
+            "4145": 2.78003,
+            "4150": 2.70964,
+            "4155": 2.73827,
+            "4160": 2.75924,
+            "4165": 2.71992,
+            "4170": 2.6732,
+            "4175": 2.71977,
+            "4180": 2.72516,
+            "4185": 2.7266,
+            "4190": 2.74574,
+            "4195": 2.69323,
+            "4200": 2.70389,
+            "4205": 2.739,
+            "4210": 2.67821,
+            "4215": 2.66615,
+            "4220": 2.66041,
+            "4225": 2.70194,
+            "4230": 2.72519,
+            "4235": 2.74505,
+            "4240": 2.70556,
+            "4245": 2.70809,
+            "4250": 2.71276,
+            "4255": 2.64758,
+            "4260": 2.72108,
+            "4265": 2.7407,
+            "4270": 2.71826,
+            "4275": 2.68449,
+            "4280": 2.69872,
+            "4285": 2.72975,
+            "4290": 2.68276,
+            "4295": 2.69079,
+            "4300": 2.70099,
+            "4305": 2.69702,
+            "4310": 2.72824,
+            "4315": 2.71117,
+            "4320": 2.6963,
+            "4325": 2.70147,
+            "4330": 2.70861,
+            "4335": 2.69073,
+            "4340": 2.69714,
+            "4345": 2.72371,
+            "4350": 2.67452,
+            "4355": 2.69376,
+            "4360": 2.70886,
+            "4365": 2.78626,
+            "4370": 2.73026,
+            "4375": 2.74021,
+            "4380": 2.69919,
+            "4385": 2.6977,
+            "4390": 2.70243,
+            "4395": 2.75288,
+            "4400": 2.66495,
+            "4405": 2.66457,
+            "4410": 2.68277,
+            "4415": 2.70545,
+            "4420": 2.70636,
+            "4425": 2.72116,
+            "4430": 2.68965,
+            "4435": 2.68021,
+            "4440": 2.69172,
+            "4445": 2.67902,
+            "4450": 2.65267,
+            "4455": 2.66361,
+            "4460": 2.68715,
+            "4465": 2.70505,
+            "4470": 2.67175,
+            "4475": 2.68445,
+            "4480": 2.65432,
+            "4485": 2.69855,
+            "4490": 2.6496,
+            "4495": 2.70815,
+            "4500": 2.70161,
+            "4505": 2.69411,
+            "4510": 2.64848,
+            "4515": 2.70089,
+            "4520": 2.66867,
+            "4525": 2.67028,
+            "4530": 2.67045,
+            "4535": 2.67125,
+            "4540": 2.70756,
+            "4545": 2.65655,
+            "4550": 2.69725,
+            "4555": 2.68117,
+            "4560": 2.65534,
+            "4565": 2.63892,
+            "4570": 2.63915,
+            "4575": 2.66537,
+            "4580": 2.68501,
+            "4585": 2.68386,
+            "4590": 2.61629,
+            "4595": 2.66205,
+            "4600": 2.67639,
+            "4605": 2.67565,
+            "4610": 2.65541,
+            "4615": 2.66315,
+            "4620": 2.65863,
+            "4625": 2.70449,
+            "4630": 2.67529,
+            "4635": 2.64482,
+            "4640": 2.69151,
+            "4645": 2.64525,
+            "4650": 2.69485,
+            "4655": 2.7021,
+            "4660": 2.67166,
+            "4665": 2.68866,
+            "4670": 2.67161,
+            "4675": 2.68208,
+            "4680": 2.66232,
+            "4685": 2.65409,
+            "4690": 2.70073,
+            "4695": 2.65354,
+            "4700": 2.67167,
+            "4705": 2.64927,
+            "4710": 2.67483,
+            "4715": 2.64734,
+            "4720": 2.71881,
+            "4725": 2.62661,
+            "4730": 2.6509,
+            "4735": 2.6871,
+            "4740": 2.64143,
+            "4745": 2.65229,
+            "4750": 2.64026,
+            "4755": 2.65299,
+            "4760": 2.66273,
+            "4765": 2.64635,
+            "4770": 2.62577,
+            "4775": 2.65478,
+            "4780": 2.65753,
+            "4785": 2.69313,
+            "4790": 2.65185,
+            "4795": 2.67228,
+            "4800": 2.62783,
+            "4805": 2.64059,
+            "4810": 2.6644,
+            "4815": 2.64691,
+            "4820": 2.67273,
+            "4825": 2.65228,
+            "4830": 2.61459,
+            "4835": 2.64948,
+            "4840": 2.65752,
+            "4845": 2.63255,
+            "4850": 2.62325,
+            "4855": 2.59969,
+            "4860": 2.65178,
+            "4865": 2.62536,
+            "4870": 2.6393,
+            "4875": 2.61885,
+            "4880": 2.62557,
+            "4885": 2.62692,
+            "4890": 2.67877,
+            "4895": 2.65892,
+            "4900": 2.61592,
+            "4905": 2.61904,
+            "4910": 2.63808,
+            "4915": 2.61278,
+            "4920": 2.65283,
+            "4925": 2.64926,
+            "4930": 2.57001,
+            "4935": 2.64979,
+            "4940": 2.63168,
+            "4945": 2.6367,
+            "4950": 2.6251,
+            "4955": 2.61685,
+            "4960": 2.6205,
+            "4965": 2.6585,
+            "4970": 2.59857,
+            "4975": 2.65638,
+            "4980": 2.61828,
+            "4985": 2.63171,
+            "4990": 2.65627,
+            "4995": 2.58066,
+            "5000": 2.65821,
+            "5005": 2.6652,
+            "5010": 2.68657,
+            "5015": 2.63702,
+            "5020": 2.6427,
+            "5025": 2.68656,
+            "5030": 2.64369,
+            "5035": 2.6177,
+            "5040": 2.62106,
+            "5045": 2.60302,
+            "5050": 2.62586,
+            "5055": 2.64898,
+            "5060": 2.64403,
+            "5065": 2.6874,
+            "5070": 2.60434,
+            "5075": 2.61149,
+            "5080": 2.61127,
+            "5085": 2.61237,
+            "5090": 2.59602,
+            "5095": 2.65327,
+            "5100": 2.64698,
+            "5105": 2.60906,
+            "5110": 2.66203,
+            "5115": 2.61929,
+            "5120": 2.67064,
+            "5125": 2.62921,
+            "5130": 2.61274,
+            "5135": 2.61163,
+            "5140": 2.57705,
+            "5145": 2.62816,
+            "5150": 2.63428,
+            "5155": 2.61691,
+            "5160": 2.66153,
+            "5165": 2.58419,
+            "5170": 2.59059,
+            "5175": 2.61619,
+            "5180": 2.60661,
+            "5185": 2.62149,
+            "5190": 2.62346,
+            "5195": 2.66821,
+            "5200": 2.60109,
+            "5205": 2.60537,
+            "5210": 2.6045,
+            "5215": 2.64533,
+            "5220": 2.58657,
+            "5225": 2.55085,
+            "5230": 2.63463,
+            "5235": 2.61656,
+            "5240": 2.62091,
+            "5245": 2.62955,
+            "5250": 2.59194,
+            "5255": 2.61576,
+            "5260": 2.55526,
+            "5265": 2.5946,
+            "5270": 2.58902,
+            "5275": 2.61851,
+            "5280": 2.61104,
+            "5285": 2.6026,
+            "5290": 2.63419,
+            "5295": 2.62205,
+            "5300": 2.57655,
+            "5305": 2.59742,
+            "5310": 2.61293,
+            "5315": 2.59071,
+            "5320": 2.61645,
+            "5325": 2.64426,
+            "5330": 2.60211,
+            "5335": 2.58442,
+            "5340": 2.56099,
+            "5345": 2.65685,
+            "5350": 2.61867,
+            "5355": 2.57667,
+            "5360": 2.59317,
+            "5365": 2.62154,
+            "5370": 2.6143,
+            "5375": 2.62936,
+            "5380": 2.57983,
+            "5385": 2.56433,
+            "5390": 2.58453,
+            "5395": 2.61878,
+            "5400": 2.60528,
+            "5405": 2.54318,
+            "5410": 2.61244,
+            "5415": 2.59663,
+            "5420": 2.61317,
+            "5425": 2.62412,
+            "5430": 2.62829,
+            "5435": 2.57578,
+            "5440": 2.5861,
+            "5445": 2.62832,
+            "5450": 2.65323,
+            "5455": 2.61253,
+            "5460": 2.59283,
+            "5465": 2.60497,
+            "5470": 2.5971,
+            "5475": 2.62515,
+            "5480": 2.58707,
+            "5485": 2.58882,
+            "5490": 2.57638,
+            "5495": 2.56911,
+            "5500": 2.56798,
+            "5505": 2.6147,
+            "5510": 2.62508,
+            "5515": 2.58073,
+            "5520": 2.55375,
+            "5525": 2.58367,
+            "5530": 2.66448,
+            "5535": 2.62011,
+            "5540": 2.56912,
+            "5545": 2.59599,
+            "5550": 2.54873,
+            "5555": 2.57203,
+            "5560": 2.56243,
+            "5565": 2.60838,
+            "5570": 2.65345,
+            "5575": 2.62922,
+            "5580": 2.57343,
+            "5585": 2.5961,
+            "5590": 2.56073,
+            "5595": 2.5818,
+            "5600": 2.55146,
+            "5605": 2.59879,
+            "5610": 2.58312,
+            "5615": 2.58429,
+            "5620": 2.58081,
+            "5625": 2.54856,
+            "5630": 2.56991,
+            "5635": 2.63041,
+            "5640": 2.5916,
+            "5645": 2.56846,
+            "5650": 2.57465,
+            "5655": 2.54631,
+            "5660": 2.55573,
+            "5665": 2.58316,
+            "5670": 2.56552,
+            "5675": 2.6051,
+            "5680": 2.52587,
+            "5685": 2.56617,
+            "5690": 2.60027,
+            "5695": 2.5569,
+            "5700": 2.59658,
+            "5705": 2.59486,
+            "5710": 2.57702,
+            "5715": 2.58277,
+            "5720": 2.53627,
+            "5725": 2.60322,
+            "5730": 2.57254,
+            "5735": 2.6087,
+            "5740": 2.59522,
+            "5745": 2.55931,
+            "5750": 2.53991,
+            "5755": 2.55759,
+            "5760": 2.62474,
+            "5765": 2.55861,
+            "5770": 2.54026,
+            "5775": 2.58406,
+            "5780": 2.57586,
+            "5785": 2.5382,
+            "5790": 2.56312,
+            "5795": 2.59953,
+            "5800": 2.54244,
+            "5805": 2.53338,
+            "5810": 2.55644,
+            "5815": 2.52433,
+            "5820": 2.59823,
+            "5825": 2.50593,
+            "5830": 2.49732,
+            "5835": 2.59651,
+            "5840": 2.53896,
+            "5845": 2.55347,
+            "5850": 2.61179,
+            "5855": 2.51002,
+            "5860": 2.55898,
+            "5865": 2.51794,
+            "5870": 2.57307,
+            "5875": 2.60738,
+            "5880": 2.58521,
+            "5885": 2.5662,
+            "5890": 2.58404,
+            "5895": 2.55371,
+            "5900": 2.61276,
+            "5905": 2.55604,
+            "5910": 2.59526,
+            "5915": 2.61001,
+            "5920": 2.58723,
+            "5925": 2.53738,
+            "5930": 2.57762,
+            "5935": 2.55201,
+            "5940": 2.57061,
+            "5945": 2.51742,
+            "5950": 2.55385,
+            "5955": 2.59921,
+            "5960": 2.56774,
+            "5965": 2.61891,
+            "5970": 2.55084,
+            "5975": 2.58223,
+            "5980": 2.55929,
+            "5985": 2.56146,
+            "5990": 2.55428,
+            "5995": 2.55604,
+            "6000": 2.55347,
+            "6005": 2.51918,
+            "6010": 2.55968,
+            "6015": 2.52192,
+            "6020": 2.53377,
+            "6025": 2.5569,
+            "6030": 2.60368,
+            "6035": 2.54135,
+            "6040": 2.54912,
+            "6045": 2.49048,
+            "6050": 2.59428,
+            "6055": 2.51886,
+            "6060": 2.54379,
+            "6065": 2.52482,
+            "6070": 2.52825,
+            "6075": 2.53458,
+            "6080": 2.53462,
+            "6085": 2.59606,
+            "6090": 2.56861,
+            "6095": 2.53271,
+            "6100": 2.54134,
+            "6105": 2.52059,
+            "6110": 2.55382,
+            "6115": 2.58369,
+            "6120": 2.55719,
+            "6125": 2.53864,
+            "6130": 2.47506,
+            "6135": 2.55486,
+            "6140": 2.55502,
+            "6145": 2.55645,
+            "6150": 2.52511,
+            "6155": 2.50788,
+            "6160": 2.53773,
+            "6165": 2.57035,
+            "6170": 2.54359,
+            "6175": 2.59872,
+            "6180": 2.50939,
+            "6185": 2.54966,
+            "6190": 2.49143,
+            "6195": 2.57856,
+            "6200": 2.55195,
+            "6205": 2.53696,
+            "6210": 2.51913,
+            "6215": 2.51433,
+            "6220": 2.56607,
+            "6225": 2.51286,
+            "6230": 2.51108,
+            "6235": 2.56061,
+            "6240": 2.54928,
+            "6245": 2.52483,
+            "6250": 2.52912,
+            "6255": 2.57251,
+            "6260": 2.52083,
+            "6265": 2.57221,
+            "6270": 2.52297,
+            "6275": 2.56269,
+            "6280": 2.52402,
+            "6285": 2.52172,
+            "6290": 2.52273,
+            "6295": 2.50766,
+            "6300": 2.55521,
+            "6305": 2.52333,
+            "6310": 2.51033,
+            "6315": 2.53641,
+            "6320": 2.48793,
+            "6325": 2.59556,
+            "6330": 2.55493,
+            "6335": 2.51056,
+            "6340": 2.51365,
+            "6345": 2.554,
+            "6350": 2.55544,
+            "6355": 2.52304,
+            "6360": 2.52028,
+            "6365": 2.48473,
+            "6370": 2.53351,
+            "6375": 2.49303,
+            "6380": 2.55576,
+            "6385": 2.57478,
+            "6390": 2.50485,
+            "6395": 2.54909,
+            "6400": 2.50599,
+            "6405": 2.52514,
+            "6410": 2.51147,
+            "6415": 2.52125,
+            "6420": 2.53958,
+            "6425": 2.53282,
+            "6430": 2.57602,
+            "6435": 2.54302,
+            "6440": 2.53463,
+            "6445": 2.52617,
+            "6450": 2.53113,
+            "6455": 2.52147,
+            "6460": 2.51423,
+            "6465": 2.55926,
+            "6470": 2.51637,
+            "6475": 2.52603,
+            "6480": 2.4882,
+            "6485": 2.52882,
+            "6490": 2.50677,
+            "6495": 2.49902,
+            "6500": 2.52356,
+            "6505": 2.49389,
+            "6510": 2.5409,
+            "6515": 2.50842,
+            "6520": 2.50856,
+            "6525": 2.49197,
+            "6530": 2.54065,
+            "6535": 2.53032,
+            "6540": 2.52904,
+            "6545": 2.55998,
+            "6550": 2.50044,
+            "6555": 2.55575,
+            "6560": 2.50919,
+            "6565": 2.51983,
+            "6570": 2.58266,
+            "6575": 2.5221,
+            "6580": 2.4976,
+            "6585": 2.50481,
+            "6590": 2.50722,
+            "6595": 2.49751,
+            "6600": 2.49428,
+            "6605": 2.53842,
+            "6610": 2.47637,
+            "6615": 2.56638,
+            "6620": 2.53233,
+            "6625": 2.51062,
+            "6630": 2.51116,
+            "6635": 2.47299,
+            "6640": 2.53891,
+            "6645": 2.5957,
+            "6650": 2.50945,
+            "6655": 2.49555,
+            "6660": 2.57228,
+            "6665": 2.5186,
+            "6670": 2.56576,
+            "6675": 2.46509,
+            "6680": 2.54752,
+            "6685": 2.53432,
+            "6690": 2.51231,
+            "6695": 2.48519,
+            "6700": 2.52191,
+            "6705": 2.51661,
+            "6710": 2.49005,
+            "6715": 2.51625,
+            "6720": 2.50795,
+            "6725": 2.51834,
+            "6730": 2.51954,
+            "6735": 2.48309,
+            "6740": 2.51335,
+            "6745": 2.49433,
+            "6750": 2.55697,
+            "6755": 2.47385,
+            "6760": 2.54193,
+            "6765": 2.48826,
+            "6770": 2.51722,
+            "6775": 2.50587,
+            "6780": 2.53774,
+            "6785": 2.47123,
+            "6790": 2.54526,
+            "6795": 2.49809,
+            "6800": 2.52509,
+            "6805": 2.51004,
+            "6810": 2.5022,
+            "6815": 2.52175,
+            "6820": 2.48494,
+            "6825": 2.5025,
+            "6830": 2.53822,
+            "6835": 2.5061,
+            "6840": 2.50895,
+            "6845": 2.5234,
+            "6850": 2.47328,
+            "6855": 2.51141,
+            "6860": 2.50139,
+            "6865": 2.48788,
+            "6870": 2.55255,
+            "6875": 2.47292,
+            "6880": 2.54863,
+            "6885": 2.47591,
+            "6890": 2.54517,
+            "6895": 2.50126,
+            "6900": 2.48503,
+            "6905": 2.49614,
+            "6910": 2.51701,
+            "6915": 2.51638,
+            "6920": 2.53141,
+            "6925": 2.53752,
+            "6930": 2.48681,
+            "6935": 2.51676,
+            "6940": 2.49758,
+            "6945": 2.45779,
+            "6950": 2.48149,
+            "6955": 2.52444,
+            "6960": 2.51846,
+            "6965": 2.49132,
+            "6970": 2.47056,
+            "6975": 2.52071,
+            "6980": 2.45195,
+            "6985": 2.51437,
+            "6990": 2.52894,
+            "6995": 2.46158,
+            "7000": 2.48597,
+            "7005": 2.4686,
+            "7010": 2.47377,
+            "7015": 2.52019,
+            "7020": 2.46598,
+            "7025": 2.45097,
+            "7030": 2.48257,
+            "7035": 2.47716,
+            "7040": 2.50435,
+            "7045": 2.51849,
+            "7050": 2.52366,
+            "7055": 2.43866,
+            "7060": 2.47405,
+            "7065": 2.4831,
+            "7070": 2.48979,
+            "7075": 2.49284,
+            "7080": 2.53297,
+            "7085": 2.48384,
+            "7090": 2.47443,
+            "7095": 2.50012,
+            "7100": 2.5144,
+            "7105": 2.48598,
+            "7110": 2.48519,
+            "7115": 2.50298,
+            "7120": 2.46732,
+            "7125": 2.45979,
+            "7130": 2.48348,
+            "7135": 2.51033,
+            "7140": 2.49838,
+            "7145": 2.49599,
+            "7150": 2.50775,
+            "7155": 2.50185,
+            "7160": 2.472,
+            "7165": 2.45522,
+            "7170": 2.50356,
+            "7175": 2.50054,
+            "7180": 2.50191,
+            "7185": 2.48,
+            "7190": 2.45987,
+            "7195": 2.46375,
+            "7200": 2.50646,
+            "7205": 2.48744,
+            "7210": 2.44239,
+            "7215": 2.47767,
+            "7220": 2.44151,
+            "7225": 2.51106,
+            "7230": 2.50582,
+            "7235": 2.48172,
+            "7240": 2.4782,
+            "7245": 2.49936,
+            "7250": 2.50678,
+            "7255": 2.4912,
+            "7260": 2.45802,
+            "7265": 2.45003,
+            "7270": 2.46693,
+            "7275": 2.49683,
+            "7280": 2.49101,
+            "7285": 2.42033,
+            "7290": 2.47866,
+            "7295": 2.48483,
+            "7300": 2.41501,
+            "7305": 2.44141,
+            "7310": 2.44461,
+            "7315": 2.48723,
+            "7320": 2.48097,
+            "7325": 2.45667,
+            "7330": 2.48916,
+            "7335": 2.47148,
+            "7340": 2.4637,
+            "7345": 2.49267,
+            "7350": 2.51067,
+            "7355": 2.49399,
+            "7360": 2.47747,
+            "7365": 2.46623,
+            "7370": 2.46976,
+            "7375": 2.44817,
+            "7380": 2.49099,
+            "7385": 2.48213,
+            "7390": 2.46996,
+            "7395": 2.46937,
+            "7400": 2.47759,
+            "7405": 2.43735,
+            "7410": 2.479,
+            "7415": 2.46864,
+            "7420": 2.49062,
+            "7425": 2.45385,
+            "7430": 2.52079,
+            "7435": 2.48865,
+            "7440": 2.51835,
+            "7445": 2.50675,
+            "7450": 2.4717,
+            "7455": 2.45394,
+            "7460": 2.46163,
+            "7465": 2.47321,
+            "7470": 2.44826,
+            "7475": 2.45502,
+            "7480": 2.50736,
+            "7485": 2.44851,
+            "7490": 2.47418,
+            "7495": 2.47985,
+            "7500": 2.49378,
+            "7505": 2.43918,
+            "7510": 2.43573,
+            "7515": 2.42055,
+            "7520": 2.49283,
+            "7525": 2.49607,
+            "7530": 2.47395,
+            "7535": 2.45882,
+            "7540": 2.47236,
+            "7545": 2.47257,
+            "7550": 2.48854,
+            "7555": 2.45427,
+            "7560": 2.42558,
+            "7565": 2.50802,
+            "7570": 2.48336,
+            "7575": 2.43657,
+            "7580": 2.45585,
+            "7585": 2.4796,
+            "7590": 2.47883,
+            "7595": 2.46048,
+            "7600": 2.4605,
+            "7605": 2.44515,
+            "7610": 2.44772,
+            "7615": 2.42408,
+            "7620": 2.54293,
+            "7625": 2.47978,
+            "7630": 2.42405,
+            "7635": 2.42523,
+            "7640": 2.45197,
+            "7645": 2.47074,
+            "7650": 2.46103,
+            "7655": 2.48292,
+            "7660": 2.45069,
+            "7665": 2.43132,
+            "7670": 2.43977,
+            "7675": 2.45451,
+            "7680": 2.48428,
+            "7685": 2.43105,
+            "7690": 2.47923,
+            "7695": 2.45298,
+            "7700": 2.49224,
+            "7705": 2.5344,
+            "7710": 2.49781,
+            "7715": 2.44151,
+            "7720": 2.46783,
+            "7725": 2.47891,
+            "7730": 2.45705,
+            "7735": 2.47177,
+            "7740": 2.43645,
+            "7745": 2.44594,
+            "7750": 2.43539,
+            "7755": 2.46419,
+            "7760": 2.44791,
+            "7765": 2.4518,
+            "7770": 2.4666,
+            "7775": 2.44972,
+            "7780": 2.41283,
+            "7785": 2.44234,
+            "7790": 2.48019,
+            "7795": 2.43806,
+            "7800": 2.46172,
+            "7805": 2.47968,
+            "7810": 2.50095,
+            "7815": 2.4866,
+            "7820": 2.44473,
+            "7825": 2.51138,
+            "7830": 2.45119,
+            "7835": 2.46518,
+            "7840": 2.47743,
+            "7845": 2.45952,
+            "7850": 2.41496,
+            "7855": 2.47017,
+            "7860": 2.49724,
+            "7865": 2.42238,
+            "7870": 2.46624,
+            "7875": 2.4439,
+            "7880": 2.45106,
+            "7885": 2.46043,
+            "7890": 2.47034,
+            "7895": 2.44531,
+            "7900": 2.43635,
+            "7905": 2.43389,
+            "7910": 2.42388,
+            "7915": 2.47992,
+            "7920": 2.47327,
+            "7925": 2.41978,
+            "7930": 2.47016,
+            "7935": 2.45012,
+            "7940": 2.41955,
+            "7945": 2.46827,
+            "7950": 2.4431,
+            "7955": 2.41866,
+            "7960": 2.48694,
+            "7965": 2.51733,
+            "7970": 2.52064,
+            "7975": 2.44744,
+            "7980": 2.44021,
+            "7985": 2.46499,
+            "7990": 2.42982,
+            "7995": 2.4678,
+            "8000": 2.43369,
+            "8005": 2.41551,
+            "8010": 2.4566,
+            "8015": 2.46743,
+            "8020": 2.4794,
+            "8025": 2.47237,
+            "8030": 2.44969,
+            "8035": 2.46877,
+            "8040": 2.42022,
+            "8045": 2.45138,
+            "8050": 2.44722,
+            "8055": 2.42479,
+            "8060": 2.44355,
+            "8065": 2.463,
+            "8070": 2.45573,
+            "8075": 2.45766,
+            "8080": 2.44452,
+            "8085": 2.43918,
+            "8090": 2.42947,
+            "8095": 2.42336,
+            "8100": 2.43809,
+            "8105": 2.4929,
+            "8110": 2.43687,
+            "8115": 2.44253,
+            "8120": 2.46616,
+            "8125": 2.46474,
+            "8130": 2.45131,
+            "8135": 2.45162,
+            "8140": 2.43804,
+            "8145": 2.42476,
+            "8150": 2.4201,
+            "8155": 2.48593,
+            "8160": 2.45442,
+            "8165": 2.44161,
+            "8170": 2.43359,
+            "8175": 2.42032,
+            "8180": 2.49387,
+            "8185": 2.42374,
+            "8190": 2.46733,
+            "8195": 2.45639,
+            "8200": 2.44552,
+            "8205": 2.44393,
+            "8210": 2.42974,
+            "8215": 2.43789,
+            "8220": 2.43568,
+            "8225": 2.40813,
+            "8230": 2.44002,
+            "8235": 2.46402,
+            "8240": 2.42523,
+            "8245": 2.44686,
+            "8250": 2.44348,
+            "8255": 2.43916,
+            "8260": 2.43307,
+            "8265": 2.42573,
+            "8270": 2.43174,
+            "8275": 2.44039,
+            "8280": 2.3963,
+            "8285": 2.43685,
+            "8290": 2.47852,
+            "8295": 2.44671,
+            "8300": 2.45587,
+            "8305": 2.40638,
+            "8310": 2.43379,
+            "8315": 2.45525,
+            "8320": 2.39709,
+            "8325": 2.39084,
+            "8330": 2.43277,
+            "8335": 2.44245,
+            "8340": 2.4878,
+            "8345": 2.44494,
+            "8350": 2.44735,
+            "8355": 2.40568,
+            "8360": 2.39807,
+            "8365": 2.45388,
+            "8370": 2.4495,
+            "8375": 2.42225,
+            "8380": 2.41565,
+            "8385": 2.42229,
+            "8390": 2.43699,
+            "8395": 2.43929,
+            "8400": 2.43589,
+            "8405": 2.4875,
+            "8410": 2.43766,
+            "8415": 2.43224,
+            "8420": 2.41348,
+            "8425": 2.43603,
+            "8430": 2.45874,
+            "8435": 2.40392,
+            "8440": 2.44872,
+            "8445": 2.45716,
+            "8450": 2.40546,
+            "8455": 2.45741,
+            "8460": 2.4534,
+            "8465": 2.43498,
+            "8470": 2.40732,
+            "8475": 2.47375,
+            "8480": 2.40045,
+            "8485": 2.41586,
+            "8490": 2.4639,
+            "8495": 2.43561,
+            "8500": 2.44316,
+            "8505": 2.40386,
+            "8510": 2.40086,
+            "8515": 2.42745,
+            "8520": 2.42214,
+            "8525": 2.4898,
+            "8530": 2.37128,
+            "8535": 2.39928,
+            "8540": 2.48121,
+            "8545": 2.37881,
+            "8550": 2.43739,
+            "8555": 2.44828,
+            "8560": 2.46927,
+            "8565": 2.41924,
+            "8570": 2.42864,
+            "8575": 2.44669,
+            "8580": 2.43829,
+            "8585": 2.41804,
+            "8590": 2.40217,
+            "8595": 2.42543,
+            "8600": 2.40923,
+            "8605": 2.48923,
+            "8610": 2.4189,
+            "8615": 2.38627,
+            "8620": 2.44771,
+            "8625": 2.42556,
+            "8630": 2.45885,
+            "8635": 2.45722,
+            "8640": 2.43592,
+            "8645": 2.47348,
+            "8650": 2.42058,
+            "8655": 2.45362,
+            "8660": 2.45518,
+            "8665": 2.38364,
+            "8670": 2.40841,
+            "8675": 2.42831,
+            "8680": 2.44533,
+            "8685": 2.42795,
+            "8690": 2.40821,
+            "8695": 2.44087,
+            "8700": 2.43118,
+            "8705": 2.41801,
+            "8710": 2.42606,
+            "8715": 2.44636,
+            "8720": 2.47348,
+            "8725": 2.40723,
+            "8730": 2.38832,
+            "8735": 2.43292,
+            "8740": 2.42791,
+            "8745": 2.39523,
+            "8750": 2.43296,
+            "8755": 2.4228,
+            "8760": 2.39965,
+            "8765": 2.43238,
+            "8770": 2.40279,
+            "8775": 2.43581,
+            "8780": 2.4169,
+            "8785": 2.47064,
+            "8790": 2.41812,
+            "8795": 2.41771,
+            "8800": 2.4148,
+            "8805": 2.40261,
+            "8810": 2.40855,
+            "8815": 2.47233,
+            "8820": 2.45221,
+            "8825": 2.42327,
+            "8830": 2.38596,
+            "8835": 2.42007,
+            "8840": 2.3912,
+            "8845": 2.42402,
+            "8850": 2.43315,
+            "8855": 2.40161,
+            "8860": 2.42753,
+            "8865": 2.42451,
+            "8870": 2.43294,
+            "8875": 2.43697,
+            "8880": 2.41075,
+            "8885": 2.39174,
+            "8890": 2.44505,
+            "8895": 2.42678,
+            "8900": 2.41124,
+            "8905": 2.40029,
+            "8910": 2.39777,
+            "8915": 2.41598,
+            "8920": 2.43089,
+            "8925": 2.46425,
+            "8930": 2.41457,
+            "8935": 2.40635,
+            "8940": 2.38763,
+            "8945": 2.39267,
+            "8950": 2.41516,
+            "8955": 2.39226,
+            "8960": 2.43161,
+            "8965": 2.41676,
+            "8970": 2.40264,
+            "8975": 2.47364,
+            "8980": 2.43865,
+            "8985": 2.37275,
+            "8990": 2.40714,
+            "8995": 2.41421,
+            "9000": 2.45425,
+            "9005": 2.41041,
+            "9010": 2.37331,
+            "9015": 2.40629,
+            "9020": 2.39684,
+            "9025": 2.36793,
+            "9030": 2.39775,
+            "9035": 2.4222,
+            "9040": 2.42089,
+            "9045": 2.4185,
+            "9050": 2.39505,
+            "9055": 2.41883,
+            "9060": 2.41777,
+            "9065": 2.40319,
+            "9070": 2.44397,
+            "9075": 2.39439,
+            "9080": 2.43305,
+            "9085": 2.41006,
+            "9090": 2.40938,
+            "9095": 2.3941,
+            "9100": 2.39969,
+            "9105": 2.35719,
+            "9110": 2.46849,
+            "9115": 2.4178,
+            "9120": 2.4039,
+            "9125": 2.45748,
+            "9130": 2.39278,
+            "9135": 2.44705,
+            "9140": 2.43327,
+            "9145": 2.42517,
+            "9150": 2.42294,
+            "9155": 2.37266,
+            "9160": 2.41355,
+            "9165": 2.42349,
+            "9170": 2.37092,
+            "9175": 2.41554,
+            "9180": 2.3779,
+            "9185": 2.438,
+            "9190": 2.41083,
+            "9195": 2.39616,
+            "9200": 2.40527,
+            "9205": 2.45505,
+            "9210": 2.36436,
+            "9215": 2.46318,
+            "9220": 2.44705,
+            "9225": 2.38094,
+            "9230": 2.44355,
+            "9235": 2.39394,
+            "9240": 2.39928,
+            "9245": 2.43306,
+            "9250": 2.43135,
+            "9255": 2.42746,
+            "9260": 2.38386,
+            "9265": 2.43528,
+            "9270": 2.43273,
+            "9275": 2.39047,
+            "9280": 2.38694,
+            "9285": 2.419,
+            "9290": 2.40065,
+            "9295": 2.38095,
+            "9300": 2.42048,
+            "9305": 2.40199,
+            "9310": 2.41314,
+            "9315": 2.40673,
+            "9320": 2.44261,
+            "9325": 2.36848,
+            "9330": 2.40099,
+            "9335": 2.35836,
+            "9340": 2.40562,
+            "9345": 2.41339,
+            "9350": 2.43711,
+            "9355": 2.54976,
+            "9360": 2.46791,
+            "9365": 2.40083,
+            "9370": 2.44296,
+            "9375": 2.43843,
+            "9380": 2.35492,
+            "9385": 2.40106,
+            "9390": 2.37923,
+            "9395": 2.38537,
+            "9400": 2.44192,
+            "9405": 2.41134,
+            "9410": 2.39282,
+            "9415": 2.43379,
+            "9420": 2.44336,
+            "9425": 2.42952,
+            "9430": 2.44459,
+            "9435": 2.41314,
+            "9440": 2.47521,
+            "9445": 2.37319,
+            "9450": 2.39425,
+            "9455": 2.40156,
+            "9460": 2.3856,
+            "9465": 2.37862,
+            "9470": 2.38082,
+            "9475": 2.36462,
+            "9480": 2.43197,
+            "9485": 2.38766,
+            "9490": 2.42039,
+            "9495": 2.38324,
+            "9500": 2.36513,
+            "9505": 2.43009,
+            "9510": 2.39857,
+            "9515": 2.42968,
+            "9520": 2.41785,
+            "9525": 2.38847,
+            "9530": 2.45376,
+            "9535": 2.40056,
+            "9540": 2.41611,
+            "9545": 2.37735,
+            "9550": 2.42123,
+            "9555": 2.38863,
+            "9560": 2.42176,
+            "9565": 2.4065,
+            "9570": 2.37249,
+            "9575": 2.41052,
+            "9580": 2.3951,
+            "9585": 2.42188,
+            "9590": 2.42821,
+            "9595": 2.44726,
+            "9600": 2.38934,
+            "9605": 2.38358,
+            "9610": 2.42106,
+            "9615": 2.4142,
+            "9620": 2.41302,
+            "9625": 2.44609,
+            "9630": 2.3957,
+            "9635": 2.40141,
+            "9640": 2.44439,
+            "9645": 2.41011,
+            "9650": 2.39715,
+            "9655": 2.3728,
+            "9660": 2.42407,
+            "9665": 2.38942,
+            "9670": 2.38106,
+            "9675": 2.35623,
+            "9680": 2.39556,
+            "9685": 2.39468,
+            "9690": 2.46311,
+            "9695": 2.3803,
+            "9700": 2.37568,
+            "9705": 2.38264,
+            "9710": 2.36519,
+            "9715": 2.38727,
+            "9720": 2.43505,
+            "9725": 2.44188,
+            "9730": 2.42828,
+            "9735": 2.38742,
+            "9740": 2.38014,
+            "9745": 2.42614,
+            "9750": 2.39713,
+            "9755": 2.40574,
+            "9760": 2.41046,
+            "9765": 2.36693,
+            "9770": 2.46311,
+            "9775": 2.40027,
+            "9780": 2.3623,
+            "9785": 2.40073,
+            "9790": 2.40637,
+            "9795": 2.35988,
+            "9800": 2.39429,
+            "9805": 2.40453,
+            "9810": 2.40856,
+            "9815": 2.37781,
+            "9820": 2.37606,
+            "9825": 2.40326,
+            "9830": 2.42047,
+            "9835": 2.38296,
+            "9840": 2.41561,
+            "9845": 2.36392,
+            "9850": 2.39833,
+            "9855": 2.39588,
+            "9860": 2.39015,
+            "9865": 2.37872,
+            "9870": 2.38479,
+            "9875": 2.37975,
+            "9880": 2.45111,
+            "9885": 2.39299,
+            "9890": 2.3509,
+            "9895": 2.31999,
+            "9900": 2.39398,
+            "9905": 2.42373,
+            "9910": 2.35469,
+            "9915": 2.36188,
+            "9920": 2.41069,
+            "9925": 2.3962,
+            "9930": 2.37914,
+            "9935": 2.34871,
+            "9940": 2.38316,
+            "9945": 2.37785,
+            "9950": 2.40242,
+            "9955": 2.44647,
+            "9960": 2.431,
+            "9965": 2.35688,
+            "9970": 2.40859,
+            "9975": 2.3835,
+            "9980": 2.33008,
+            "9985": 2.40516,
+            "9990": 2.39642,
+            "9995": 2.39479,
+            "10000": 2.36666,
+            "10005": 2.37185,
+            "10010": 2.38179,
+            "10015": 2.44374,
+            "10020": 2.36251,
+            "10025": 2.38767,
+            "10030": 2.38659,
+            "10035": 2.40904,
+            "10040": 2.40154,
+            "10045": 2.38171,
+            "10050": 2.35085,
+            "10055": 2.36819,
+            "10060": 2.41802,
+            "10065": 2.3733,
+            "10070": 2.42068,
+            "10075": 2.37133,
+            "10080": 2.36112,
+            "10085": 2.36913,
+            "10090": 2.34609,
+            "10095": 2.4004,
+            "10100": 2.31173,
+            "10105": 2.37923,
+            "10110": 2.40867,
+            "10115": 2.38512,
+            "10120": 2.35537,
+            "10125": 2.36885,
+            "10130": 2.35772,
+            "10135": 2.38091,
+            "10140": 2.41174,
+            "10145": 2.40531,
+            "10150": 2.37466,
+            "10155": 2.39341,
+            "10160": 2.36024,
+            "10165": 2.38118,
+            "10170": 2.42175,
+            "10175": 2.32278,
+            "10180": 2.39348,
+            "10185": 2.38112,
+            "10190": 2.44208,
+            "10195": 2.40114,
+            "10200": 2.38989
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 10200,
+        "step_interval": 5,
+        "values": {
+            "1": 956237952.0,
+            "5": 967338112.0,
+            "10": 971388416.0,
+            "15": 946492416.0,
+            "20": 957330304.0,
+            "25": 1064217280.0,
+            "30": 1177614208.0,
+            "35": 1231743488.0,
+            "40": 1192773248.0,
+            "45": 1126056192.0,
+            "50": 1114252800.0,
+            "55": 1079868160.0,
+            "60": 1027359552.0,
+            "65": 1009657856.0,
+            "70": 993522560.0,
+            "75": 986463616.0,
+            "80": 1003645696.0,
+            "85": 998620032.0,
+            "90": 980123200.0,
+            "95": 955802304.0,
+            "100": 970032128.0,
+            "105": 979558464.0,
+            "110": 977155840.0,
+            "115": 976995648.0,
+            "120": 960840640.0,
+            "125": 943207360.0,
+            "130": 976265792.0,
+            "135": 965483008.0,
+            "140": 963541184.0,
+            "145": 976785856.0,
+            "150": 921376256.0,
+            "155": 968230080.0,
+            "160": 956410112.0,
+            "165": 959776064.0,
+            "170": 974353984.0,
+            "175": 949006720.0,
+            "180": 946693120.0,
+            "185": 972018368.0,
+            "190": 969083072.0,
+            "195": 985127296.0,
+            "200": 945772672.0,
+            "205": 958353408.0,
+            "210": 979445952.0,
+            "215": 967486976.0,
+            "220": 956421696.0,
+            "225": 962398720.0,
+            "230": 948180224.0,
+            "235": 965227392.0,
+            "240": 966053440.0,
+            "245": 969162176.0,
+            "250": 974440448.0,
+            "255": 925063104.0,
+            "260": 965635712.0,
+            "265": 970669504.0,
+            "270": 959133056.0,
+            "275": 953996096.0,
+            "280": 963428992.0,
+            "285": 945783360.0,
+            "290": 974114048.0,
+            "295": 966696704.0,
+            "300": 967154368.0,
+            "305": 964506688.0,
+            "310": 940354688.0,
+            "315": 967400640.0,
+            "320": 969003968.0,
+            "325": 980556544.0,
+            "330": 972097024.0,
+            "335": 946861312.0,
+            "340": 966592256.0,
+            "345": 973024128.0,
+            "350": 973919360.0,
+            "355": 963257216.0,
+            "360": 948355072.0,
+            "365": 964821888.0,
+            "370": 962956160.0,
+            "375": 958448192.0,
+            "380": 947154304.0,
+            "385": 955990528.0,
+            "390": 945401728.0,
+            "395": 970420864.0,
+            "400": 979777280.0,
+            "405": 968349440.0,
+            "410": 970066624.0,
+            "415": 953156736.0,
+            "420": 943564288.0,
+            "425": 954775680.0,
+            "430": 962658560.0,
+            "435": 977076480.0,
+            "440": 954808960.0,
+            "445": 971887936.0,
+            "450": 963509120.0,
+            "455": 973133504.0,
+            "460": 983711680.0,
+            "465": 945280256.0,
+            "470": 942053120.0,
+            "475": 967006336.0,
+            "480": 966106368.0,
+            "485": 976411264.0,
+            "490": 962540736.0,
+            "495": 945465216.0,
+            "500": 964456640.0,
+            "505": 986011904.0,
+            "510": 965684480.0,
+            "515": 943408256.0,
+            "520": 945018624.0,
+            "525": 971265920.0,
+            "530": 971885824.0,
+            "535": 979139712.0,
+            "540": 969531776.0,
+            "545": 954128384.0,
+            "550": 951264512.0,
+            "555": 987223680.0,
+            "560": 960431488.0,
+            "565": 966615296.0,
+            "570": 975727872.0,
+            "575": 927221504.0,
+            "580": 970695168.0,
+            "585": 961176704.0,
+            "590": 972965824.0,
+            "595": 963685696.0,
+            "600": 937080896.0,
+            "605": 951472448.0,
+            "610": 963365312.0,
+            "615": 970013120.0,
+            "620": 976467456.0,
+            "625": 949582144.0,
+            "630": 954443392.0,
+            "635": 986044928.0,
+            "640": 980976256.0,
+            "645": 955010944.0,
+            "650": 958547072.0,
+            "655": 951653568.0,
+            "660": 961041088.0,
+            "665": 967554752.0,
+            "670": 962515968.0,
+            "675": 968332416.0,
+            "680": 965617152.0,
+            "685": 962867648.0,
+            "690": 961918848.0,
+            "695": 954766656.0,
+            "700": 970338304.0,
+            "705": 945516672.0,
+            "710": 943885568.0,
+            "715": 973354368.0,
+            "720": 968367552.0,
+            "725": 978491392.0,
+            "730": 952193920.0,
+            "735": 948812288.0,
+            "740": 955630848.0,
+            "745": 975866880.0,
+            "750": 981238016.0,
+            "755": 962152064.0,
+            "760": 951964928.0,
+            "765": 967348608.0,
+            "770": 976151168.0,
+            "775": 970545920.0,
+            "780": 977537536.0,
+            "785": 931530240.0,
+            "790": 960442880.0,
+            "795": 964583104.0,
+            "800": 967022336.0,
+            "805": 962319104.0,
+            "810": 940973184.0,
+            "815": 949035712.0,
+            "820": 953181248.0,
+            "825": 954500288.0,
+            "830": 976441984.0,
+            "835": 956073600.0,
+            "840": 948406720.0,
+            "845": 965158016.0,
+            "850": 966029056.0,
+            "855": 960904896.0,
+            "860": 976026752.0,
+            "865": 938160640.0,
+            "870": 966414464.0,
+            "875": 972316416.0,
+            "880": 963124608.0,
+            "885": 967743488.0,
+            "890": 949968896.0,
+            "895": 960018432.0,
+            "900": 974232704.0,
+            "905": 963969664.0,
+            "910": 958437888.0,
+            "915": 956355456.0,
+            "920": 943971968.0,
+            "925": 960835456.0,
+            "930": 978848640.0,
+            "935": 971072640.0,
+            "940": 960906880.0,
+            "945": 945063680.0,
+            "950": 957426432.0,
+            "955": 979036352.0,
+            "960": 983589184.0,
+            "965": 966167808.0,
+            "970": 951230848.0,
+            "975": 961577024.0,
+            "980": 968069504.0,
+            "985": 968991744.0,
+            "990": 984392448.0,
+            "995": 953289600.0,
+            "1000": 934775808.0,
+            "1005": 960145920.0,
+            "1010": 971539584.0,
+            "1015": 985186304.0,
+            "1020": 962782080.0,
+            "1025": 935012032.0,
+            "1030": 974681856.0,
+            "1035": 964992512.0,
+            "1040": 980466560.0,
+            "1045": 960827392.0,
+            "1050": 955202112.0,
+            "1055": 957778496.0,
+            "1060": 967746560.0,
+            "1065": 967116288.0,
+            "1070": 966603008.0,
+            "1075": 950059968.0,
+            "1080": 954511488.0,
+            "1085": 967252544.0,
+            "1090": 977133568.0,
+            "1095": 961236864.0,
+            "1100": 979609024.0,
+            "1105": 953366336.0,
+            "1110": 965954304.0,
+            "1115": 966985344.0,
+            "1120": 970350720.0,
+            "1125": 965709824.0,
+            "1130": 954939008.0,
+            "1135": 965841728.0,
+            "1140": 965176832.0,
+            "1145": 970989504.0,
+            "1150": 955556288.0,
+            "1155": 930579200.0,
+            "1160": 957775040.0,
+            "1165": 978125824.0,
+            "1170": 974303808.0,
+            "1175": 973059712.0,
+            "1180": 973081600.0,
+            "1185": 947342208.0,
+            "1190": 964794880.0,
+            "1195": 953139200.0,
+            "1200": 972845568.0,
+            "1205": 988479104.0,
+            "1210": 931126400.0,
+            "1215": 968647168.0,
+            "1220": 969162304.0,
+            "1225": 975951360.0,
+            "1230": 967335360.0,
+            "1235": 943445120.0,
+            "1240": 955857792.0,
+            "1245": 981503808.0,
+            "1250": 966111232.0,
+            "1255": 973673344.0,
+            "1260": 946494592.0,
+            "1265": 963999488.0,
+            "1270": 960487680.0,
+            "1275": 973616128.0,
+            "1280": 961113856.0,
+            "1285": 957583488.0,
+            "1290": 952529984.0,
+            "1295": 971612800.0,
+            "1300": 968863616.0,
+            "1305": 963741440.0,
+            "1310": 963333248.0,
+            "1315": 943553536.0,
+            "1320": 966307328.0,
+            "1325": 989787264.0,
+            "1330": 969507712.0,
+            "1335": 972301824.0,
+            "1340": 972270720.0,
+            "1345": 960657792.0,
+            "1350": 968638464.0,
+            "1355": 955853120.0,
+            "1360": 971822336.0,
+            "1365": 960387584.0,
+            "1370": 948790784.0,
+            "1375": 973530240.0,
+            "1380": 953468864.0,
+            "1385": 969147008.0,
+            "1390": 975717376.0,
+            "1395": 931675264.0,
+            "1400": 945854976.0,
+            "1405": 976751488.0,
+            "1410": 974512384.0,
+            "1415": 967568640.0,
+            "1420": 966748928.0,
+            "1425": 937378944.0,
+            "1430": 973915136.0,
+            "1435": 978334336.0,
+            "1440": 964178496.0,
+            "1445": 958059456.0,
+            "1450": 946148224.0,
+            "1455": 983922496.0,
+            "1460": 968650368.0,
+            "1465": 948745536.0,
+            "1470": 984242432.0,
+            "1475": 943906048.0,
+            "1480": 963975680.0,
+            "1485": 957348864.0,
+            "1490": 961262592.0,
+            "1495": 980537280.0,
+            "1500": 958331264.0,
+            "1505": 942866944.0,
+            "1510": 984179712.0,
+            "1515": 959093120.0,
+            "1520": 959104640.0,
+            "1525": 952784704.0,
+            "1530": 957740608.0,
+            "1535": 949429632.0,
+            "1540": 971081792.0,
+            "1545": 963131776.0,
+            "1550": 978667264.0,
+            "1555": 952316864.0,
+            "1560": 980088832.0,
+            "1565": 967314944.0,
+            "1570": 973842944.0,
+            "1575": 975493248.0,
+            "1580": 941857664.0,
+            "1585": 970029056.0,
+            "1590": 983819456.0,
+            "1595": 948632192.0,
+            "1600": 967443328.0,
+            "1605": 952451072.0,
+            "1610": 969618944.0,
+            "1615": 983146624.0,
+            "1620": 968018048.0,
+            "1625": 970716096.0,
+            "1630": 962886784.0,
+            "1635": 942310912.0,
+            "1640": 981611776.0,
+            "1645": 973976512.0,
+            "1650": 974183872.0,
+            "1655": 967264640.0,
+            "1660": 940688512.0,
+            "1665": 961703040.0,
+            "1670": 962901120.0,
+            "1675": 971277184.0,
+            "1680": 980877056.0,
+            "1685": 944416128.0,
+            "1690": 964686656.0,
+            "1695": 965641088.0,
+            "1700": 966339584.0,
+            "1705": 985198208.0,
+            "1710": 978354176.0,
+            "1715": 943209216.0,
+            "1720": 977088000.0,
+            "1725": 965873792.0,
+            "1730": 968969408.0,
+            "1735": 965084288.0,
+            "1740": 949714944.0,
+            "1745": 970010688.0,
+            "1750": 959682944.0,
+            "1755": 960089600.0,
+            "1760": 966381056.0,
+            "1765": 951815488.0,
+            "1770": 954663296.0,
+            "1775": 973752448.0,
+            "1780": 970534144.0,
+            "1785": 968825472.0,
+            "1790": 950234368.0,
+            "1795": 945131648.0,
+            "1800": 984667392.0,
+            "1805": 987162112.0,
+            "1810": 977768064.0,
+            "1815": 948004352.0,
+            "1820": 949208000.0,
+            "1825": 978852096.0,
+            "1830": 966362816.0,
+            "1835": 964133376.0,
+            "1840": 972319616.0,
+            "1845": 935413504.0,
+            "1850": 952499200.0,
+            "1855": 980050816.0,
+            "1860": 975869248.0,
+            "1865": 958964544.0,
+            "1870": 958950400.0,
+            "1875": 932595072.0,
+            "1880": 973574272.0,
+            "1885": 978847104.0,
+            "1890": 971360000.0,
+            "1895": 959212032.0,
+            "1900": 947393792.0,
+            "1905": 981830912.0,
+            "1910": 969124096.0,
+            "1915": 970039936.0,
+            "1920": 975597952.0,
+            "1925": 960497280.0,
+            "1930": 977924032.0,
+            "1935": 963251200.0,
+            "1940": 952460224.0,
+            "1945": 981337472.0,
+            "1950": 939171904.0,
+            "1955": 960605568.0,
+            "1960": 970030464.0,
+            "1965": 981177472.0,
+            "1970": 962043776.0,
+            "1975": 952823424.0,
+            "1980": 936848896.0,
+            "1985": 975940736.0,
+            "1990": 965966464.0,
+            "1995": 962611456.0,
+            "2000": 960555008.0,
+            "2005": 954499200.0,
+            "2010": 975581056.0,
+            "2015": 991801792.0,
+            "2020": 975435008.0,
+            "2025": 974302976.0,
+            "2030": 952083712.0,
+            "2035": 967848448.0,
+            "2040": 987458944.0,
+            "2045": 976479360.0,
+            "2050": 984702976.0,
+            "2055": 942838016.0,
+            "2060": 942594048.0,
+            "2065": 966208512.0,
+            "2070": 969622976.0,
+            "2075": 980553792.0,
+            "2080": 977598656.0,
+            "2085": 939638016.0,
+            "2090": 969875584.0,
+            "2095": 961274048.0,
+            "2100": 976719040.0,
+            "2105": 972537344.0,
+            "2110": 959904896.0,
+            "2115": 956878848.0,
+            "2120": 977481088.0,
+            "2125": 962566656.0,
+            "2130": 979619328.0,
+            "2135": 950538496.0,
+            "2140": 946996160.0,
+            "2145": 962277888.0,
+            "2150": 973404544.0,
+            "2155": 972690560.0,
+            "2160": 970314880.0,
+            "2165": 948640000.0,
+            "2170": 961543680.0,
+            "2175": 969376896.0,
+            "2180": 969329408.0,
+            "2185": 947448576.0,
+            "2190": 940479616.0,
+            "2195": 986087424.0,
+            "2200": 961861504.0,
+            "2205": 978924288.0,
+            "2210": 964101632.0,
+            "2215": 963502080.0,
+            "2220": 951311616.0,
+            "2225": 969316992.0,
+            "2230": 976328064.0,
+            "2235": 974026560.0,
+            "2240": 975493120.0,
+            "2245": 960232576.0,
+            "2250": 967639616.0,
+            "2255": 969130752.0,
+            "2260": 975064896.0,
+            "2265": 968259968.0,
+            "2270": 951744256.0,
+            "2275": 962768576.0,
+            "2280": 969637248.0,
+            "2285": 971692416.0,
+            "2290": 962890240.0,
+            "2295": 931408128.0,
+            "2300": 959906304.0,
+            "2305": 970426752.0,
+            "2310": 967444608.0,
+            "2315": 970905344.0,
+            "2320": 975590400.0,
+            "2325": 938586880.0,
+            "2330": 988439424.0,
+            "2335": 977490176.0,
+            "2340": 964596672.0,
+            "2345": 964165888.0,
+            "2350": 947553664.0,
+            "2355": 977028864.0,
+            "2360": 966901632.0,
+            "2365": 977296576.0,
+            "2370": 965071616.0,
+            "2375": 953964800.0,
+            "2380": 962916736.0,
+            "2385": 967195392.0,
+            "2390": 963075968.0,
+            "2395": 974463232.0,
+            "2400": 958411520.0,
+            "2405": 968120704.0,
+            "2410": 951586944.0,
+            "2415": 965904896.0,
+            "2420": 966517056.0,
+            "2425": 959046016.0,
+            "2430": 956686464.0,
+            "2435": 961390208.0,
+            "2440": 959755712.0,
+            "2445": 970888704.0,
+            "2450": 961996800.0,
+            "2455": 922722368.0,
+            "2460": 951953664.0,
+            "2465": 955730688.0,
+            "2470": 972571136.0,
+            "2475": 973811776.0,
+            "2480": 943895680.0,
+            "2485": 944185216.0,
+            "2490": 972411648.0,
+            "2495": 974452480.0,
+            "2500": 973908352.0,
+            "2505": 958490112.0,
+            "2510": 939510528.0,
+            "2515": 979550912.0,
+            "2520": 970472448.0,
+            "2525": 964388608.0,
+            "2530": 955796608.0,
+            "2535": 936597696.0,
+            "2540": 969027008.0,
+            "2545": 970385088.0,
+            "2550": 969461376.0,
+            "2555": 969440256.0,
+            "2560": 964978688.0,
+            "2565": 959765504.0,
+            "2570": 985177344.0,
+            "2575": 957424640.0,
+            "2580": 967423872.0,
+            "2585": 966023168.0,
+            "2590": 956354816.0,
+            "2595": 981828736.0,
+            "2600": 959531456.0,
+            "2605": 963000320.0,
+            "2610": 965972736.0,
+            "2615": 951925504.0,
+            "2620": 971241792.0,
+            "2625": 976456000.0,
+            "2630": 974411008.0,
+            "2635": 948070528.0,
+            "2640": 948137536.0,
+            "2645": 963037504.0,
+            "2650": 953983488.0,
+            "2655": 977112320.0,
+            "2660": 949622400.0,
+            "2665": 953930112.0,
+            "2670": 959063104.0,
+            "2675": 979275328.0,
+            "2680": 961396608.0,
+            "2685": 970700992.0,
+            "2690": 965221376.0,
+            "2695": 943552256.0,
+            "2700": 969424320.0,
+            "2705": 978961152.0,
+            "2710": 971810624.0,
+            "2715": 990813696.0,
+            "2720": 942648576.0,
+            "2725": 967955072.0,
+            "2730": 955466432.0,
+            "2735": 970671680.0,
+            "2740": 977920768.0,
+            "2745": 932277184.0,
+            "2750": 947856896.0,
+            "2755": 956317312.0,
+            "2760": 981696576.0,
+            "2765": 966112512.0,
+            "2770": 948914816.0,
+            "2775": 935831168.0,
+            "2780": 964779392.0,
+            "2785": 969569152.0,
+            "2790": 974273280.0,
+            "2795": 966886464.0,
+            "2800": 944390336.0,
+            "2805": 964353152.0,
+            "2810": 969610112.0,
+            "2815": 975844992.0,
+            "2820": 963087232.0,
+            "2825": 937630464.0,
+            "2830": 956741184.0,
+            "2835": 986323136.0,
+            "2840": 961760640.0,
+            "2845": 967509376.0,
+            "2850": 951717760.0,
+            "2855": 962093184.0,
+            "2860": 954244160.0,
+            "2865": 955883520.0,
+            "2870": 944663744.0,
+            "2875": 974664576.0,
+            "2880": 968203584.0,
+            "2885": 981082880.0,
+            "2890": 953455424.0,
+            "2895": 957180416.0,
+            "2900": 964989952.0,
+            "2905": 931707840.0,
+            "2910": 955731584.0,
+            "2915": 979475584.0,
+            "2920": 970492544.0,
+            "2925": 964975744.0,
+            "2930": 964048320.0,
+            "2935": 940140672.0,
+            "2940": 964912256.0,
+            "2945": 989149120.0,
+            "2950": 965210112.0,
+            "2955": 965103936.0,
+            "2960": 933162304.0,
+            "2965": 968795008.0,
+            "2970": 973035456.0,
+            "2975": 958092224.0,
+            "2980": 964498432.0,
+            "2985": 937269120.0,
+            "2990": 951253824.0,
+            "2995": 978316800.0,
+            "3000": 969276480.0,
+            "3005": 974686272.0,
+            "3010": 950235392.0,
+            "3015": 943840768.0,
+            "3020": 958442752.0,
+            "3025": 975186560.0,
+            "3030": 965017536.0,
+            "3035": 963453888.0,
+            "3040": 952134912.0,
+            "3045": 989794240.0,
+            "3050": 965545472.0,
+            "3055": 982520576.0,
+            "3060": 971226112.0,
+            "3065": 943915776.0,
+            "3070": 978408576.0,
+            "3075": 975205120.0,
+            "3080": 960991872.0,
+            "3085": 962352384.0,
+            "3090": 945953152.0,
+            "3095": 938115200.0,
+            "3100": 972929600.0,
+            "3105": 961989824.0,
+            "3110": 970656768.0,
+            "3115": 963390464.0,
+            "3120": 947116928.0,
+            "3125": 972720768.0,
+            "3130": 952973952.0,
+            "3135": 966041472.0,
+            "3140": 968488256.0,
+            "3145": 937852416.0,
+            "3150": 975010240.0,
+            "3155": 976814592.0,
+            "3160": 969627968.0,
+            "3165": 982194304.0,
+            "3170": 937959744.0,
+            "3175": 953826560.0,
+            "3180": 983809536.0,
+            "3185": 965169152.0,
+            "3190": 968482880.0,
+            "3195": 950932736.0,
+            "3200": 945101824.0,
+            "3205": 959861504.0,
+            "3210": 957486464.0,
+            "3215": 958020480.0,
+            "3220": 968130112.0,
+            "3225": 935615104.0,
+            "3230": 962589184.0,
+            "3235": 975776128.0,
+            "3240": 962621696.0,
+            "3245": 981274240.0,
+            "3250": 943259968.0,
+            "3255": 954599104.0,
+            "3260": 980363392.0,
+            "3265": 963620864.0,
+            "3270": 965163904.0,
+            "3275": 959731840.0,
+            "3280": 967046016.0,
+            "3285": 982476928.0,
+            "3290": 947689088.0,
+            "3295": 966422912.0,
+            "3300": 959166464.0,
+            "3305": 949131840.0,
+            "3310": 979510912.0,
+            "3315": 964284352.0,
+            "3320": 969208320.0,
+            "3325": 956195136.0,
+            "3330": 941167040.0,
+            "3335": 964974336.0,
+            "3340": 956901504.0,
+            "3345": 972501184.0,
+            "3350": 964574464.0,
+            "3355": 943343168.0,
+            "3360": 970036736.0,
+            "3365": 969453248.0,
+            "3370": 954763264.0,
+            "3375": 958677248.0,
+            "3380": 971464320.0,
+            "3385": 947973824.0,
+            "3390": 965781312.0,
+            "3395": 978391424.0,
+            "3400": 978128512.0,
+            "3405": 976725248.0,
+            "3410": 924193024.0,
+            "3415": 955425536.0,
+            "3420": 971820672.0,
+            "3425": 977156416.0,
+            "3430": 973826816.0,
+            "3435": 936072704.0,
+            "3440": 970503232.0,
+            "3445": 957302336.0,
+            "3450": 959840384.0,
+            "3455": 963854720.0,
+            "3460": 967882624.0,
+            "3465": 931319936.0,
+            "3470": 952335488.0,
+            "3475": 973704640.0,
+            "3480": 959738368.0,
+            "3485": 979948800.0,
+            "3490": 944671360.0,
+            "3495": 953905216.0,
+            "3500": 969318272.0,
+            "3505": 964348544.0,
+            "3510": 971222272.0,
+            "3515": 955946880.0,
+            "3520": 958734080.0,
+            "3525": 971914944.0,
+            "3530": 964126528.0,
+            "3535": 983191936.0,
+            "3540": 937491328.0,
+            "3545": 944731776.0,
+            "3550": 984462720.0,
+            "3555": 978058944.0,
+            "3560": 974374784.0,
+            "3565": 968801216.0,
+            "3570": 946694976.0,
+            "3575": 976117248.0,
+            "3580": 977494464.0,
+            "3585": 954581248.0,
+            "3590": 956426496.0,
+            "3595": 951457280.0,
+            "3600": 989009024.0,
+            "3605": 962008832.0,
+            "3610": 965065792.0,
+            "3615": 974648192.0,
+            "3620": 954900480.0,
+            "3625": 939528256.0,
+            "3630": 990161536.0,
+            "3635": 971435904.0,
+            "3640": 976024896.0,
+            "3645": 961489024.0,
+            "3650": 945804160.0,
+            "3655": 965794816.0,
+            "3660": 976196480.0,
+            "3665": 964021760.0,
+            "3670": 977445760.0,
+            "3675": 943476224.0,
+            "3680": 958185856.0,
+            "3685": 964286400.0,
+            "3690": 982094144.0,
+            "3695": 963125888.0,
+            "3700": 950571328.0,
+            "3705": 947345920.0,
+            "3710": 982356096.0,
+            "3715": 972681856.0,
+            "3720": 976138112.0,
+            "3725": 964038656.0,
+            "3730": 948849536.0,
+            "3735": 967079680.0,
+            "3740": 960960832.0,
+            "3745": 969322944.0,
+            "3750": 963936896.0,
+            "3755": 953420800.0,
+            "3760": 976641984.0,
+            "3765": 979829376.0,
+            "3770": 972359744.0,
+            "3775": 972373376.0,
+            "3780": 952573888.0,
+            "3785": 960225408.0,
+            "3790": 985586048.0,
+            "3795": 969183872.0,
+            "3800": 957865728.0,
+            "3805": 972396800.0,
+            "3810": 954505408.0,
+            "3815": 974555136.0,
+            "3820": 963006592.0,
+            "3825": 962059520.0,
+            "3830": 969383040.0,
+            "3835": 934733568.0,
+            "3840": 971243904.0,
+            "3845": 986837248.0,
+            "3850": 968849024.0,
+            "3855": 965235648.0,
+            "3860": 948045568.0,
+            "3865": 975036736.0,
+            "3870": 985086976.0,
+            "3875": 983029376.0,
+            "3880": 963603200.0,
+            "3885": 953000064.0,
+            "3890": 960284160.0,
+            "3895": 960578496.0,
+            "3900": 984913536.0,
+            "3905": 976211584.0,
+            "3910": 987323712.0,
+            "3915": 946029888.0,
+            "3920": 974879360.0,
+            "3925": 961235456.0,
+            "3930": 976765824.0,
+            "3935": 978901120.0,
+            "3940": 950293184.0,
+            "3945": 960261504.0,
+            "3950": 974173440.0,
+            "3955": 972979840.0,
+            "3960": 974042048.0,
+            "3965": 950886400.0,
+            "3970": 980680576.0,
+            "3975": 960738176.0,
+            "3980": 977519104.0,
+            "3985": 962937600.0,
+            "3990": 972755712.0,
+            "3995": 953677056.0,
+            "4000": 974963584.0,
+            "4005": 971645952.0,
+            "4010": 978396480.0,
+            "4015": 971467008.0,
+            "4020": 950334528.0,
+            "4025": 968445184.0,
+            "4030": 997960384.0,
+            "4035": 978560832.0,
+            "4040": 959813632.0,
+            "4045": 939663552.0,
+            "4050": 944706176.0,
+            "4055": 980959680.0,
+            "4060": 977695232.0,
+            "4065": 975718208.0,
+            "4070": 942152576.0,
+            "4075": 945752320.0,
+            "4080": 988741376.0,
+            "4085": 962067968.0,
+            "4090": 983356928.0,
+            "4095": 986945728.0,
+            "4100": 957188160.0,
+            "4105": 954086272.0,
+            "4110": 966486528.0,
+            "4115": 976029568.0,
+            "4120": 983530752.0,
+            "4125": 960041856.0,
+            "4130": 967289984.0,
+            "4135": 971454464.0,
+            "4140": 963159168.0,
+            "4145": 956199808.0,
+            "4150": 960319296.0,
+            "4155": 946193280.0,
+            "4160": 968426880.0,
+            "4165": 970351488.0,
+            "4170": 972008576.0,
+            "4175": 955898944.0,
+            "4180": 940996608.0,
+            "4185": 968293504.0,
+            "4190": 968040000.0,
+            "4195": 989248320.0,
+            "4200": 962678656.0,
+            "4205": 960595072.0,
+            "4210": 971837568.0,
+            "4215": 974168832.0,
+            "4220": 981145280.0,
+            "4225": 975208704.0,
+            "4230": 952678272.0,
+            "4235": 958456192.0,
+            "4240": 966824128.0,
+            "4245": 961718784.0,
+            "4250": 965833216.0,
+            "4255": 958255872.0,
+            "4260": 949557888.0,
+            "4265": 964137536.0,
+            "4270": 978396672.0,
+            "4275": 975410176.0,
+            "4280": 962671680.0,
+            "4285": 951440064.0,
+            "4290": 980029632.0,
+            "4295": 968818560.0,
+            "4300": 958274304.0,
+            "4305": 966815104.0,
+            "4310": 939614208.0,
+            "4315": 949429312.0,
+            "4320": 984548608.0,
+            "4325": 982584960.0,
+            "4330": 974746112.0,
+            "4335": 949471616.0,
+            "4340": 959541248.0,
+            "4345": 956619648.0,
+            "4350": 979947648.0,
+            "4355": 968833536.0,
+            "4360": 966363520.0,
+            "4365": 941244800.0,
+            "4370": 969470976.0,
+            "4375": 972995776.0,
+            "4380": 966147968.0,
+            "4385": 972035840.0,
+            "4390": 954153408.0,
+            "4395": 951868544.0,
+            "4400": 973640320.0,
+            "4405": 972478848.0,
+            "4410": 967920896.0,
+            "4415": 958937088.0,
+            "4420": 960777856.0,
+            "4425": 976428544.0,
+            "4430": 965968384.0,
+            "4435": 975901440.0,
+            "4440": 962332288.0,
+            "4445": 954849664.0,
+            "4450": 978307456.0,
+            "4455": 960230464.0,
+            "4460": 968569216.0,
+            "4465": 968757120.0,
+            "4470": 944102272.0,
+            "4475": 951972800.0,
+            "4480": 978813056.0,
+            "4485": 968302848.0,
+            "4490": 957025536.0,
+            "4495": 938938176.0,
+            "4500": 953241088.0,
+            "4505": 977178752.0,
+            "4510": 978724864.0,
+            "4515": 962468608.0,
+            "4520": 958824704.0,
+            "4525": 958102016.0,
+            "4530": 964416384.0,
+            "4535": 976616576.0,
+            "4540": 976871680.0,
+            "4545": 970144640.0,
+            "4550": 953199232.0,
+            "4555": 959578048.0,
+            "4560": 972472384.0,
+            "4565": 973467776.0,
+            "4570": 978947072.0,
+            "4575": 957798080.0,
+            "4580": 963180800.0,
+            "4585": 957380608.0,
+            "4590": 986567936.0,
+            "4595": 960214592.0,
+            "4600": 952255872.0,
+            "4605": 959274432.0,
+            "4610": 963772160.0,
+            "4615": 958012672.0,
+            "4620": 960256320.0,
+            "4625": 973835648.0,
+            "4630": 944496000.0,
+            "4635": 977013440.0,
+            "4640": 960259968.0,
+            "4645": 981992512.0,
+            "4650": 962375232.0,
+            "4655": 939472128.0,
+            "4660": 964000832.0,
+            "4665": 962565760.0,
+            "4670": 976683904.0,
+            "4675": 963478144.0,
+            "4680": 957451840.0,
+            "4685": 949662656.0,
+            "4690": 956937856.0,
+            "4695": 969950592.0,
+            "4700": 961190656.0,
+            "4705": 970768960.0,
+            "4710": 934469120.0,
+            "4715": 970426496.0,
+            "4720": 966336768.0,
+            "4725": 980291584.0,
+            "4730": 965826176.0,
+            "4735": 937835776.0,
+            "4740": 960112128.0,
+            "4745": 976009856.0,
+            "4750": 967935488.0,
+            "4755": 984935680.0,
+            "4760": 959148800.0,
+            "4765": 955200960.0,
+            "4770": 958584768.0,
+            "4775": 991080192.0,
+            "4780": 976757824.0,
+            "4785": 967570560.0,
+            "4790": 943719616.0,
+            "4795": 955799936.0,
+            "4800": 967697152.0,
+            "4805": 976492800.0,
+            "4810": 965148224.0,
+            "4815": 957982720.0,
+            "4820": 973943872.0,
+            "4825": 961432704.0,
+            "4830": 962614016.0,
+            "4835": 972544512.0,
+            "4840": 948901888.0,
+            "4845": 965715584.0,
+            "4850": 960293184.0,
+            "4855": 964168320.0,
+            "4860": 963038464.0,
+            "4865": 967507072.0,
+            "4870": 957207808.0,
+            "4875": 983562112.0,
+            "4880": 957035712.0,
+            "4885": 977040896.0,
+            "4890": 959740608.0,
+            "4895": 942201728.0,
+            "4900": 973663296.0,
+            "4905": 975201920.0,
+            "4910": 969245376.0,
+            "4915": 970078336.0,
+            "4920": 941145280.0,
+            "4925": 954784768.0,
+            "4930": 977046848.0,
+            "4935": 963774912.0,
+            "4940": 972611648.0,
+            "4945": 959971008.0,
+            "4950": 940758016.0,
+            "4955": 967988160.0,
+            "4960": 976764672.0,
+            "4965": 960972608.0,
+            "4970": 958543744.0,
+            "4975": 933702400.0,
+            "4980": 960813056.0,
+            "4985": 962963392.0,
+            "4990": 963553152.0,
+            "4995": 986284480.0,
+            "5000": 940703360.0,
+            "5005": 968887552.0,
+            "5010": 970308096.0,
+            "5015": 965208704.0,
+            "5020": 966690944.0,
+            "5025": 949385920.0,
+            "5030": 953442560.0,
+            "5035": 967304320.0,
+            "5040": 955678272.0,
+            "5045": 969118336.0,
+            "5050": 953356416.0,
+            "5055": 954799616.0,
+            "5060": 963030400.0,
+            "5065": 952183168.0,
+            "5070": 973597952.0,
+            "5075": 978556800.0,
+            "5080": 942865920.0,
+            "5085": 965826112.0,
+            "5090": 972856384.0,
+            "5095": 964435200.0,
+            "5100": 958367808.0,
+            "5105": 965341952.0,
+            "5110": 950402368.0,
+            "5115": 972383616.0,
+            "5120": 960425728.0,
+            "5125": 969731200.0,
+            "5130": 938820736.0,
+            "5135": 943697216.0,
+            "5140": 969888896.0,
+            "5145": 968603200.0,
+            "5150": 970638336.0,
+            "5155": 972598912.0,
+            "5160": 926527232.0,
+            "5165": 961629184.0,
+            "5170": 966850304.0,
+            "5175": 966061312.0,
+            "5180": 963668224.0,
+            "5185": 930852608.0,
+            "5190": 949780992.0,
+            "5195": 972447104.0,
+            "5200": 973784000.0,
+            "5205": 968213824.0,
+            "5210": 960503936.0,
+            "5215": 928804608.0,
+            "5220": 979174272.0,
+            "5225": 984779648.0,
+            "5230": 975024896.0,
+            "5235": 975043648.0,
+            "5240": 944313472.0,
+            "5245": 970787328.0,
+            "5250": 972415104.0,
+            "5255": 966871872.0,
+            "5260": 976729728.0,
+            "5265": 942223360.0,
+            "5270": 969202304.0,
+            "5275": 970110528.0,
+            "5280": 962826112.0,
+            "5285": 964096896.0,
+            "5290": 932539008.0,
+            "5295": 951751808.0,
+            "5300": 975588608.0,
+            "5305": 951842688.0,
+            "5310": 968040512.0,
+            "5315": 955720064.0,
+            "5320": 950885120.0,
+            "5325": 973053056.0,
+            "5330": 967782656.0,
+            "5335": 967585280.0,
+            "5340": 966443584.0,
+            "5345": 962912384.0,
+            "5350": 978899968.0,
+            "5355": 972204288.0,
+            "5360": 963818368.0,
+            "5365": 965210752.0,
+            "5370": 947777152.0,
+            "5375": 948783488.0,
+            "5380": 967150208.0,
+            "5385": 980441984.0,
+            "5390": 965277568.0,
+            "5395": 955074816.0,
+            "5400": 948262656.0,
+            "5405": 974243840.0,
+            "5410": 967782976.0,
+            "5415": 976050816.0,
+            "5420": 967358464.0,
+            "5425": 937314752.0,
+            "5430": 963808192.0,
+            "5435": 971801344.0,
+            "5440": 969017856.0,
+            "5445": 957391360.0,
+            "5450": 919358848.0,
+            "5455": 952027648.0,
+            "5460": 962326656.0,
+            "5465": 978770752.0,
+            "5470": 980846592.0,
+            "5475": 941512960.0,
+            "5480": 955731392.0,
+            "5485": 964854272.0,
+            "5490": 975913088.0,
+            "5495": 962719104.0,
+            "5500": 971083136.0,
+            "5505": 956855296.0,
+            "5510": 968464000.0,
+            "5515": 945392640.0,
+            "5520": 963158272.0,
+            "5525": 975747904.0,
+            "5530": 936486784.0,
+            "5535": 970620928.0,
+            "5540": 960246272.0,
+            "5545": 972023808.0,
+            "5550": 967775104.0,
+            "5555": 955895808.0,
+            "5560": 954422592.0,
+            "5565": 968691520.0,
+            "5570": 945089216.0,
+            "5575": 960418688.0,
+            "5580": 960526976.0,
+            "5585": 959396736.0,
+            "5590": 977582848.0,
+            "5595": 975146240.0,
+            "5600": 962923904.0,
+            "5605": 964054400.0,
+            "5610": 943058944.0,
+            "5615": 966442240.0,
+            "5620": 963212480.0,
+            "5625": 982264192.0,
+            "5630": 975889600.0,
+            "5635": 957163136.0,
+            "5640": 951369664.0,
+            "5645": 967695616.0,
+            "5650": 978997888.0,
+            "5655": 983425152.0,
+            "5660": 956272384.0,
+            "5665": 953364864.0,
+            "5670": 966013248.0,
+            "5675": 967555456.0,
+            "5680": 978488192.0,
+            "5685": 961909888.0,
+            "5690": 935774208.0,
+            "5695": 963653376.0,
+            "5700": 952427392.0,
+            "5705": 974427328.0,
+            "5710": 971157440.0,
+            "5715": 946073792.0,
+            "5720": 974930304.0,
+            "5725": 967388288.0,
+            "5730": 978427520.0,
+            "5735": 964795648.0,
+            "5740": 943538560.0,
+            "5745": 971106368.0,
+            "5750": 981828864.0,
+            "5755": 956631808.0,
+            "5760": 963585280.0,
+            "5765": 957608640.0,
+            "5770": 955601216.0,
+            "5775": 970675776.0,
+            "5780": 962738752.0,
+            "5785": 970490624.0,
+            "5790": 974666624.0,
+            "5795": 949705088.0,
+            "5800": 965914048.0,
+            "5805": 968700800.0,
+            "5810": 975987200.0,
+            "5815": 969991936.0,
+            "5820": 936229632.0,
+            "5825": 969217152.0,
+            "5830": 977565696.0,
+            "5835": 974930944.0,
+            "5840": 962975936.0,
+            "5845": 968653120.0,
+            "5850": 942886528.0,
+            "5855": 975877120.0,
+            "5860": 979501056.0,
+            "5865": 978198336.0,
+            "5870": 968715008.0,
+            "5875": 942132480.0,
+            "5880": 964251712.0,
+            "5885": 974611072.0,
+            "5890": 972438784.0,
+            "5895": 965571968.0,
+            "5900": 941212992.0,
+            "5905": 961669376.0,
+            "5910": 958372608.0,
+            "5915": 968039040.0,
+            "5920": 977222784.0,
+            "5925": 959432832.0,
+            "5930": 946590336.0,
+            "5935": 952272704.0,
+            "5940": 977613312.0,
+            "5945": 984918272.0,
+            "5950": 980554688.0,
+            "5955": 934946432.0,
+            "5960": 961601984.0,
+            "5965": 965853312.0,
+            "5970": 970501248.0,
+            "5975": 961882112.0,
+            "5980": 958110464.0,
+            "5985": 964414784.0,
+            "5990": 973356544.0,
+            "5995": 955833984.0,
+            "6000": 955485696.0,
+            "6005": 961293504.0,
+            "6010": 952645440.0,
+            "6015": 974415872.0,
+            "6020": 978156224.0,
+            "6025": 972178944.0,
+            "6030": 955398272.0,
+            "6035": 946868864.0,
+            "6040": 962679808.0,
+            "6045": 983656832.0,
+            "6050": 956588800.0,
+            "6055": 963322816.0,
+            "6060": 945595392.0,
+            "6065": 958394496.0,
+            "6070": 978304896.0,
+            "6075": 978074880.0,
+            "6080": 957444096.0,
+            "6085": 947629952.0,
+            "6090": 953646976.0,
+            "6095": 964623552.0,
+            "6100": 979801088.0,
+            "6105": 971049216.0,
+            "6110": 961718080.0,
+            "6115": 943655552.0,
+            "6120": 968487808.0,
+            "6125": 960593088.0,
+            "6130": 983892416.0,
+            "6135": 960969600.0,
+            "6140": 958597248.0,
+            "6145": 971126656.0,
+            "6150": 968345856.0,
+            "6155": 974893056.0,
+            "6160": 977191168.0,
+            "6165": 952755968.0,
+            "6170": 951080576.0,
+            "6175": 963409152.0,
+            "6180": 969340416.0,
+            "6185": 966259200.0,
+            "6190": 963511168.0,
+            "6195": 947045312.0,
+            "6200": 969109504.0,
+            "6205": 967150016.0,
+            "6210": 959082176.0,
+            "6215": 972729600.0,
+            "6220": 936227072.0,
+            "6225": 978544512.0,
+            "6230": 975876224.0,
+            "6235": 971570944.0,
+            "6240": 965809728.0,
+            "6245": 955906304.0,
+            "6250": 956425216.0,
+            "6255": 972970624.0,
+            "6260": 978730816.0,
+            "6265": 974782336.0,
+            "6270": 958695808.0,
+            "6275": 963775040.0,
+            "6280": 973067264.0,
+            "6285": 965963008.0,
+            "6290": 970720576.0,
+            "6295": 987342912.0,
+            "6300": 947369216.0,
+            "6305": 964586496.0,
+            "6310": 978893696.0,
+            "6315": 978454144.0,
+            "6320": 971650560.0,
+            "6325": 922867200.0,
+            "6330": 959047104.0,
+            "6335": 974670464.0,
+            "6340": 984554240.0,
+            "6345": 966774720.0,
+            "6350": 944513408.0,
+            "6355": 957805696.0,
+            "6360": 972588800.0,
+            "6365": 972220608.0,
+            "6370": 958906048.0,
+            "6375": 967154496.0,
+            "6380": 951240640.0,
+            "6385": 973634432.0,
+            "6390": 965183552.0,
+            "6395": 974971584.0,
+            "6400": 984093056.0,
+            "6405": 943782784.0,
+            "6410": 977277312.0,
+            "6415": 971329280.0,
+            "6420": 956394752.0,
+            "6425": 960816512.0,
+            "6430": 957378304.0,
+            "6435": 959965056.0,
+            "6440": 968701376.0,
+            "6445": 973296128.0,
+            "6450": 974207936.0,
+            "6455": 961958528.0,
+            "6460": 941006720.0,
+            "6465": 974272768.0,
+            "6470": 979691584.0,
+            "6475": 960665216.0,
+            "6480": 967482240.0,
+            "6485": 948620800.0,
+            "6490": 970648896.0,
+            "6495": 988281728.0,
+            "6500": 980382464.0,
+            "6505": 971875712.0,
+            "6510": 951611008.0,
+            "6515": 957443328.0,
+            "6520": 978938496.0,
+            "6525": 978842496.0,
+            "6530": 973296256.0,
+            "6535": 967939328.0,
+            "6540": 950123456.0,
+            "6545": 966146752.0,
+            "6550": 979249280.0,
+            "6555": 967058688.0,
+            "6560": 975182336.0,
+            "6565": 949290112.0,
+            "6570": 951981440.0,
+            "6575": 962234560.0,
+            "6580": 975681280.0,
+            "6585": 979275392.0,
+            "6590": 948979200.0,
+            "6595": 961632640.0,
+            "6600": 960998144.0,
+            "6605": 961411968.0,
+            "6610": 985152000.0,
+            "6615": 959273408.0,
+            "6620": 944343296.0,
+            "6625": 970979712.0,
+            "6630": 971369024.0,
+            "6635": 964018688.0,
+            "6640": 959501376.0,
+            "6645": 950697984.0,
+            "6650": 978663936.0,
+            "6655": 965815168.0,
+            "6660": 968359488.0,
+            "6665": 968707584.0,
+            "6670": 932667904.0,
+            "6675": 970711552.0,
+            "6680": 969041152.0,
+            "6685": 958538048.0,
+            "6690": 956117248.0,
+            "6695": 955316864.0,
+            "6700": 962073984.0,
+            "6705": 978985088.0,
+            "6710": 970864576.0,
+            "6715": 966629824.0,
+            "6720": 973916544.0,
+            "6725": 941781952.0,
+            "6730": 979276864.0,
+            "6735": 994439680.0,
+            "6740": 976625664.0,
+            "6745": 974443200.0,
+            "6750": 938891264.0,
+            "6755": 977391616.0,
+            "6760": 969598208.0,
+            "6765": 978356480.0,
+            "6770": 975059072.0,
+            "6775": 943254272.0,
+            "6780": 947074624.0,
+            "6785": 975161856.0,
+            "6790": 960433984.0,
+            "6795": 975886336.0,
+            "6800": 972829312.0,
+            "6805": 946376320.0,
+            "6810": 958179712.0,
+            "6815": 970549696.0,
+            "6820": 977619456.0,
+            "6825": 969243264.0,
+            "6830": 950263168.0,
+            "6835": 981328512.0,
+            "6840": 982974272.0,
+            "6845": 948935808.0,
+            "6850": 965416512.0,
+            "6855": 953916544.0,
+            "6860": 979025792.0,
+            "6865": 983853312.0,
+            "6870": 964855232.0,
+            "6875": 978762496.0,
+            "6880": 950433920.0,
+            "6885": 958312448.0,
+            "6890": 960313472.0,
+            "6895": 965972928.0,
+            "6900": 985248128.0,
+            "6905": 968528640.0,
+            "6910": 949863808.0,
+            "6915": 971062208.0,
+            "6920": 967351744.0,
+            "6925": 964971776.0,
+            "6930": 964557376.0,
+            "6935": 952123328.0,
+            "6940": 963032320.0,
+            "6945": 986507520.0,
+            "6950": 973327232.0,
+            "6955": 964531968.0,
+            "6960": 940003712.0,
+            "6965": 974492160.0,
+            "6970": 978420224.0,
+            "6975": 984993472.0,
+            "6980": 982692096.0,
+            "6985": 959667584.0,
+            "6990": 945238272.0,
+            "6995": 987459264.0,
+            "7000": 963220480.0,
+            "7005": 962812480.0,
+            "7010": 984947712.0,
+            "7015": 945496384.0,
+            "7020": 983131008.0,
+            "7025": 968731392.0,
+            "7030": 953218944.0,
+            "7035": 982774144.0,
+            "7040": 950460160.0,
+            "7045": 955747072.0,
+            "7050": 959962240.0,
+            "7055": 963622912.0,
+            "7060": 976790272.0,
+            "7065": 968059264.0,
+            "7070": 953450048.0,
+            "7075": 956437504.0,
+            "7080": 969003520.0,
+            "7085": 965390272.0,
+            "7090": 969313408.0,
+            "7095": 959464128.0,
+            "7100": 973246080.0,
+            "7105": 972939648.0,
+            "7110": 970071104.0,
+            "7115": 958216192.0,
+            "7120": 949039872.0,
+            "7125": 963264320.0,
+            "7130": 971177472.0,
+            "7135": 964579328.0,
+            "7140": 961276800.0,
+            "7145": 930369536.0,
+            "7150": 946022400.0,
+            "7155": 990835520.0,
+            "7160": 968320896.0,
+            "7165": 956643200.0,
+            "7170": 968043008.0,
+            "7175": 955791488.0,
+            "7180": 957939072.0,
+            "7185": 984720896.0,
+            "7190": 978552640.0,
+            "7195": 973603584.0,
+            "7200": 935673600.0,
+            "7205": 957390912.0,
+            "7210": 967078464.0,
+            "7215": 969339904.0,
+            "7220": 982085632.0,
+            "7225": 928925056.0,
+            "7230": 949390144.0,
+            "7235": 966846336.0,
+            "7240": 966649472.0,
+            "7245": 967233792.0,
+            "7250": 949331392.0,
+            "7255": 957081344.0,
+            "7260": 970027584.0,
+            "7265": 974604544.0,
+            "7270": 959689920.0,
+            "7275": 959067136.0,
+            "7280": 956852096.0,
+            "7285": 977440768.0,
+            "7290": 976980608.0,
+            "7295": 962803904.0,
+            "7300": 975069824.0,
+            "7305": 963961984.0,
+            "7310": 977308032.0,
+            "7315": 966674560.0,
+            "7320": 974209536.0,
+            "7325": 966674496.0,
+            "7330": 959420800.0,
+            "7335": 963904000.0,
+            "7340": 977262336.0,
+            "7345": 967447680.0,
+            "7350": 984572288.0,
+            "7355": 959405568.0,
+            "7360": 948622016.0,
+            "7365": 972354432.0,
+            "7370": 982372864.0,
+            "7375": 962879360.0,
+            "7380": 964038784.0,
+            "7385": 948431488.0,
+            "7390": 963677696.0,
+            "7395": 957965440.0,
+            "7400": 969640576.0,
+            "7405": 987491264.0,
+            "7410": 951945152.0,
+            "7415": 950271936.0,
+            "7420": 966599680.0,
+            "7425": 982952832.0,
+            "7430": 965750400.0,
+            "7435": 973161024.0,
+            "7440": 936884736.0,
+            "7445": 968767616.0,
+            "7450": 980462208.0,
+            "7455": 971280896.0,
+            "7460": 972147904.0,
+            "7465": 939168000.0,
+            "7470": 971950592.0,
+            "7475": 957960000.0,
+            "7480": 969186432.0,
+            "7485": 961158592.0,
+            "7490": 934207552.0,
+            "7495": 957347008.0,
+            "7500": 968909696.0,
+            "7505": 969939584.0,
+            "7510": 971938176.0,
+            "7515": 979643968.0,
+            "7520": 951674240.0,
+            "7525": 970150400.0,
+            "7530": 954250368.0,
+            "7535": 971002624.0,
+            "7540": 979351424.0,
+            "7545": 959002112.0,
+            "7550": 959936640.0,
+            "7555": 960035136.0,
+            "7560": 969951360.0,
+            "7565": 954850304.0,
+            "7570": 942336704.0,
+            "7575": 965396736.0,
+            "7580": 981979904.0,
+            "7585": 978810240.0,
+            "7590": 970279296.0,
+            "7595": 949956608.0,
+            "7600": 945846592.0,
+            "7605": 982612096.0,
+            "7610": 969044480.0,
+            "7615": 988564736.0,
+            "7620": 957000192.0,
+            "7625": 941454976.0,
+            "7630": 971133056.0,
+            "7635": 984264576.0,
+            "7640": 983299712.0,
+            "7645": 968097472.0,
+            "7650": 959086656.0,
+            "7655": 962007040.0,
+            "7660": 969076928.0,
+            "7665": 978171584.0,
+            "7670": 975401856.0,
+            "7675": 975338368.0,
+            "7680": 942762112.0,
+            "7685": 959972096.0,
+            "7690": 975364608.0,
+            "7695": 982221568.0,
+            "7700": 979086976.0,
+            "7705": 940546432.0,
+            "7710": 974737280.0,
+            "7715": 978988352.0,
+            "7720": 967479232.0,
+            "7725": 960220928.0,
+            "7730": 942869632.0,
+            "7735": 968105792.0,
+            "7740": 980045568.0,
+            "7745": 963941632.0,
+            "7750": 963412288.0,
+            "7755": 959505408.0,
+            "7760": 970593920.0,
+            "7765": 970864320.0,
+            "7770": 962472832.0,
+            "7775": 982167424.0,
+            "7780": 964654592.0,
+            "7785": 959739904.0,
+            "7790": 968466944.0,
+            "7795": 968762112.0,
+            "7800": 972053184.0,
+            "7805": 968564992.0,
+            "7810": 945972224.0,
+            "7815": 963664576.0,
+            "7820": 974677184.0,
+            "7825": 963635712.0,
+            "7830": 957630336.0,
+            "7835": 949710016.0,
+            "7840": 957638784.0,
+            "7845": 953894016.0,
+            "7850": 979439872.0,
+            "7855": 987051200.0,
+            "7860": 947830016.0,
+            "7865": 949621120.0,
+            "7870": 965052160.0,
+            "7875": 975974016.0,
+            "7880": 968774208.0,
+            "7885": 969494720.0,
+            "7890": 952188608.0,
+            "7895": 974695424.0,
+            "7900": 963989504.0,
+            "7905": 964420608.0,
+            "7910": 965869184.0,
+            "7915": 943378176.0,
+            "7920": 951312064.0,
+            "7925": 969302016.0,
+            "7930": 964944256.0,
+            "7935": 984349376.0,
+            "7940": 964437184.0,
+            "7945": 951030016.0,
+            "7950": 962343936.0,
+            "7955": 980155648.0,
+            "7960": 963591552.0,
+            "7965": 952763776.0,
+            "7970": 951961984.0,
+            "7975": 969783232.0,
+            "7980": 965072384.0,
+            "7985": 959288192.0,
+            "7990": 968166144.0,
+            "7995": 946780480.0,
+            "8000": 962926784.0,
+            "8005": 981072256.0,
+            "8010": 965470912.0,
+            "8015": 982932416.0,
+            "8020": 960808768.0,
+            "8025": 965372544.0,
+            "8030": 958532992.0,
+            "8035": 975850880.0,
+            "8040": 960656128.0,
+            "8045": 948529280.0,
+            "8050": 959733888.0,
+            "8055": 979126144.0,
+            "8060": 969222912.0,
+            "8065": 958210240.0,
+            "8070": 963956032.0,
+            "8075": 941995392.0,
+            "8080": 966046912.0,
+            "8085": 966803776.0,
+            "8090": 983576064.0,
+            "8095": 988772800.0,
+            "8100": 966714752.0,
+            "8105": 944461696.0,
+            "8110": 968896000.0,
+            "8115": 985733696.0,
+            "8120": 974861568.0,
+            "8125": 964166528.0,
+            "8130": 966678400.0,
+            "8135": 967760256.0,
+            "8140": 963773824.0,
+            "8145": 995469568.0,
+            "8150": 973350080.0,
+            "8155": 938673152.0,
+            "8160": 964474496.0,
+            "8165": 973369472.0,
+            "8170": 968755200.0,
+            "8175": 961722624.0,
+            "8180": 936287232.0,
+            "8185": 963154048.0,
+            "8190": 968499840.0,
+            "8195": 977409792.0,
+            "8200": 956736768.0,
+            "8205": 960825152.0,
+            "8210": 946704320.0,
+            "8215": 982718592.0,
+            "8220": 988075776.0,
+            "8225": 966563072.0,
+            "8230": 962898240.0,
+            "8235": 933923072.0,
+            "8240": 980574848.0,
+            "8245": 976655872.0,
+            "8250": 964328128.0,
+            "8255": 977591424.0,
+            "8260": 957202304.0,
+            "8265": 982956800.0,
+            "8270": 953142528.0,
+            "8275": 974529792.0,
+            "8280": 974647808.0,
+            "8285": 953322048.0,
+            "8290": 939886464.0,
+            "8295": 981518720.0,
+            "8300": 973431872.0,
+            "8305": 978196352.0,
+            "8310": 951561920.0,
+            "8315": 938326912.0,
+            "8320": 977595072.0,
+            "8325": 968155456.0,
+            "8330": 990191552.0,
+            "8335": 976103488.0,
+            "8340": 947710464.0,
+            "8345": 971095872.0,
+            "8350": 970304128.0,
+            "8355": 975432832.0,
+            "8360": 979810560.0,
+            "8365": 932929344.0,
+            "8370": 965587200.0,
+            "8375": 980236416.0,
+            "8380": 965580416.0,
+            "8385": 973086464.0,
+            "8390": 962870912.0,
+            "8395": 951507392.0,
+            "8400": 972631168.0,
+            "8405": 951465728.0,
+            "8410": 960777792.0,
+            "8415": 965439360.0,
+            "8420": 942011776.0,
+            "8425": 968658048.0,
+            "8430": 960766080.0,
+            "8435": 966214144.0,
+            "8440": 970051712.0,
+            "8445": 952774912.0,
+            "8450": 984547584.0,
+            "8455": 990665216.0,
+            "8460": 968987456.0,
+            "8465": 967294720.0,
+            "8470": 963132736.0,
+            "8475": 942846592.0,
+            "8480": 987200512.0,
+            "8485": 979788480.0,
+            "8490": 992367168.0,
+            "8495": 971569856.0,
+            "8500": 951234176.0,
+            "8505": 983252608.0,
+            "8510": 974223552.0,
+            "8515": 968555328.0,
+            "8520": 962099200.0,
+            "8525": 945303040.0,
+            "8530": 984622528.0,
+            "8535": 978460928.0,
+            "8540": 967298304.0,
+            "8545": 968855680.0,
+            "8550": 942065152.0,
+            "8555": 972102656.0,
+            "8560": 958762624.0,
+            "8565": 976119360.0,
+            "8570": 975011264.0,
+            "8575": 971620608.0,
+            "8580": 932116864.0,
+            "8585": 966041920.0,
+            "8590": 979227200.0,
+            "8595": 978896640.0,
+            "8600": 984223104.0,
+            "8605": 957447552.0,
+            "8610": 983899008.0,
+            "8615": 977896448.0,
+            "8620": 962810816.0,
+            "8625": 979565440.0,
+            "8630": 943517952.0,
+            "8635": 961760384.0,
+            "8640": 973499008.0,
+            "8645": 970415616.0,
+            "8650": 969116672.0,
+            "8655": 971144000.0,
+            "8660": 944569088.0,
+            "8665": 987124736.0,
+            "8670": 960697792.0,
+            "8675": 974081792.0,
+            "8680": 962718080.0,
+            "8685": 955789568.0,
+            "8690": 978378368.0,
+            "8695": 968400640.0,
+            "8700": 972974720.0,
+            "8705": 974037760.0,
+            "8710": 947079872.0,
+            "8715": 973847232.0,
+            "8720": 958228864.0,
+            "8725": 979063488.0,
+            "8730": 985832448.0,
+            "8735": 951944576.0,
+            "8740": 940816000.0,
+            "8745": 987874240.0,
+            "8750": 972060672.0,
+            "8755": 971515904.0,
+            "8760": 965372928.0,
+            "8765": 934839296.0,
+            "8770": 986313344.0,
+            "8775": 970021952.0,
+            "8780": 967712704.0,
+            "8785": 962283008.0,
+            "8790": 947736832.0,
+            "8795": 968976000.0,
+            "8800": 970581504.0,
+            "8805": 973301376.0,
+            "8810": 983639104.0,
+            "8815": 951717120.0,
+            "8820": 939956864.0,
+            "8825": 964321920.0,
+            "8830": 981076992.0,
+            "8835": 971459840.0,
+            "8840": 979418368.0,
+            "8845": 951370560.0,
+            "8850": 986478080.0,
+            "8855": 971028992.0,
+            "8860": 961827264.0,
+            "8865": 957370624.0,
+            "8870": 945910528.0,
+            "8875": 968308736.0,
+            "8880": 983829568.0,
+            "8885": 971367680.0,
+            "8890": 969993344.0,
+            "8895": 952713792.0,
+            "8900": 961952256.0,
+            "8905": 976987136.0,
+            "8910": 981570048.0,
+            "8915": 980806208.0,
+            "8920": 967926528.0,
+            "8925": 939821440.0,
+            "8930": 970123264.0,
+            "8935": 963770368.0,
+            "8940": 977559552.0,
+            "8945": 981912960.0,
+            "8950": 945599360.0,
+            "8955": 972731008.0,
+            "8960": 973885056.0,
+            "8965": 973850112.0,
+            "8970": 966361728.0,
+            "8975": 937235264.0,
+            "8980": 952251776.0,
+            "8985": 978014016.0,
+            "8990": 967350656.0,
+            "8995": 980370176.0,
+            "9000": 952395840.0,
+            "9005": 950832768.0,
+            "9010": 975267712.0,
+            "9015": 982975232.0,
+            "9020": 959192448.0,
+            "9025": 978796352.0,
+            "9030": 953629376.0,
+            "9035": 968787200.0,
+            "9040": 978294400.0,
+            "9045": 968522496.0,
+            "9050": 983036480.0,
+            "9055": 948155392.0,
+            "9060": 956478848.0,
+            "9065": 969792000.0,
+            "9070": 967683456.0,
+            "9075": 980881152.0,
+            "9080": 952258304.0,
+            "9085": 971295232.0,
+            "9090": 963557184.0,
+            "9095": 968166016.0,
+            "9100": 974511104.0,
+            "9105": 960145344.0,
+            "9110": 948329728.0,
+            "9115": 956402816.0,
+            "9120": 985494912.0,
+            "9125": 963296384.0,
+            "9130": 958346432.0,
+            "9135": 951620288.0,
+            "9140": 967284224.0,
+            "9145": 976844544.0,
+            "9150": 987023232.0,
+            "9155": 976968704.0,
+            "9160": 957694208.0,
+            "9165": 950415872.0,
+            "9170": 988382016.0,
+            "9175": 971692480.0,
+            "9180": 967374912.0,
+            "9185": 955127040.0,
+            "9190": 956710656.0,
+            "9195": 965721472.0,
+            "9200": 968597760.0,
+            "9205": 967291904.0,
+            "9210": 984681216.0,
+            "9215": 931374400.0,
+            "9220": 949709440.0,
+            "9225": 971070592.0,
+            "9230": 971195264.0,
+            "9235": 971746176.0,
+            "9240": 959830400.0,
+            "9245": 964026752.0,
+            "9250": 961097344.0,
+            "9255": 983241472.0,
+            "9260": 979074048.0,
+            "9265": 953027136.0,
+            "9270": 949216512.0,
+            "9275": 978441152.0,
+            "9280": 977570688.0,
+            "9285": 962618944.0,
+            "9290": 979501440.0,
+            "9295": 958696192.0,
+            "9300": 965544960.0,
+            "9305": 969071744.0,
+            "9310": 973001216.0,
+            "9315": 976390912.0,
+            "9320": 948280320.0,
+            "9325": 979885568.0,
+            "9330": 978062592.0,
+            "9335": 975746688.0,
+            "9340": 960410432.0,
+            "9345": 943464448.0,
+            "9350": 952934016.0,
+            "9355": 963355520.0,
+            "9360": 960499136.0,
+            "9365": 983692608.0,
+            "9370": 982297984.0,
+            "9375": 942116224.0,
+            "9380": 982577024.0,
+            "9385": 985393536.0,
+            "9390": 973418240.0,
+            "9395": 977974528.0,
+            "9400": 938173312.0,
+            "9405": 968168192.0,
+            "9410": 981667648.0,
+            "9415": 991883392.0,
+            "9420": 960392064.0,
+            "9425": 956951872.0,
+            "9430": 939298432.0,
+            "9435": 974398784.0,
+            "9440": 959543808.0,
+            "9445": 973733120.0,
+            "9450": 961915008.0,
+            "9455": 946070656.0,
+            "9460": 978455360.0,
+            "9465": 988236032.0,
+            "9470": 963308672.0,
+            "9475": 983797504.0,
+            "9480": 931000960.0,
+            "9485": 987539456.0,
+            "9490": 963408832.0,
+            "9495": 972525184.0,
+            "9500": 982379136.0,
+            "9505": 970260160.0,
+            "9510": 964553472.0,
+            "9515": 957060224.0,
+            "9520": 948433984.0,
+            "9525": 965556608.0,
+            "9530": 958432128.0,
+            "9535": 950951168.0,
+            "9540": 954255872.0,
+            "9545": 979665920.0,
+            "9550": 956061696.0,
+            "9555": 953442368.0,
+            "9560": 958456448.0,
+            "9565": 970137984.0,
+            "9570": 977209856.0,
+            "9575": 958959744.0,
+            "9580": 963097600.0,
+            "9585": 946270400.0,
+            "9590": 948540928.0,
+            "9595": 966947008.0,
+            "9600": 984798208.0,
+            "9605": 985302016.0,
+            "9610": 943246592.0,
+            "9615": 952814016.0,
+            "9620": 980981696.0,
+            "9625": 978266112.0,
+            "9630": 969978496.0,
+            "9635": 974721536.0,
+            "9640": 940294272.0,
+            "9645": 961910080.0,
+            "9650": 970943744.0,
+            "9655": 987569792.0,
+            "9660": 963184320.0,
+            "9665": 950168192.0,
+            "9670": 965781632.0,
+            "9675": 963111936.0,
+            "9680": 964954816.0,
+            "9685": 986709120.0,
+            "9690": 940607744.0,
+            "9695": 950447616.0,
+            "9700": 975738816.0,
+            "9705": 972589056.0,
+            "9710": 967656704.0,
+            "9715": 971132416.0,
+            "9720": 940416640.0,
+            "9725": 965998080.0,
+            "9730": 973718784.0,
+            "9735": 974172928.0,
+            "9740": 971431104.0,
+            "9745": 950998912.0,
+            "9750": 979592064.0,
+            "9755": 970403840.0,
+            "9760": 968064640.0,
+            "9765": 963786880.0,
+            "9770": 952209408.0,
+            "9775": 956731136.0,
+            "9780": 970107776.0,
+            "9785": 958344448.0,
+            "9790": 961042816.0,
+            "9795": 958213248.0,
+            "9800": 949058560.0,
+            "9805": 962196992.0,
+            "9810": 978331520.0,
+            "9815": 977753600.0,
+            "9820": 982361216.0,
+            "9825": 939473536.0,
+            "9830": 969626048.0,
+            "9835": 972510208.0,
+            "9840": 971687936.0,
+            "9845": 967061760.0,
+            "9850": 946544128.0,
+            "9855": 957252992.0,
+            "9860": 987359744.0,
+            "9865": 970127168.0,
+            "9870": 989524736.0,
+            "9875": 957073408.0,
+            "9880": 930847232.0,
+            "9885": 963629696.0,
+            "9890": 972392448.0,
+            "9895": 983574528.0,
+            "9900": 956631744.0,
+            "9905": 939009728.0,
+            "9910": 978860288.0,
+            "9915": 973504960.0,
+            "9920": 944413952.0,
+            "9925": 962928512.0,
+            "9930": 947381120.0,
+            "9935": 960634880.0,
+            "9940": 966066752.0,
+            "9945": 958494784.0,
+            "9950": 964043072.0,
+            "9955": 943413824.0,
+            "9960": 967004224.0,
+            "9965": 983596800.0,
+            "9970": 966349184.0,
+            "9975": 963217664.0,
+            "9980": 980808320.0,
+            "9985": 941910144.0,
+            "9990": 976343936.0,
+            "9995": 982828672.0,
+            "10000": 972023552.0,
+            "10005": 969480960.0,
+            "10010": 944330624.0,
+            "10015": 983276288.0,
+            "10020": 978455552.0,
+            "10025": 979844096.0,
+            "10030": 971601280.0,
+            "10035": 946594944.0,
+            "10040": 950559168.0,
+            "10045": 978050944.0,
+            "10050": 985832832.0,
+            "10055": 990444288.0,
+            "10060": 958898048.0,
+            "10065": 947517056.0,
+            "10070": 967166656.0,
+            "10075": 979106432.0,
+            "10080": 971621632.0,
+            "10085": 974302720.0,
+            "10090": 944020544.0,
+            "10095": 962781696.0,
+            "10100": 972061120.0,
+            "10105": 975506304.0,
+            "10110": 971932352.0,
+            "10115": 948612096.0,
+            "10120": 962324864.0,
+            "10125": 974035904.0,
+            "10130": 980282240.0,
+            "10135": 972211584.0,
+            "10140": 957941568.0,
+            "10145": 933985280.0,
+            "10150": 973634752.0,
+            "10155": 969765824.0,
+            "10160": 962060800.0,
+            "10165": 974346112.0,
+            "10170": 944527424.0,
+            "10175": 978734144.0,
+            "10180": 983909952.0,
+            "10185": 978637312.0,
+            "10190": 955692928.0,
+            "10195": 936523904.0,
+            "10200": 988193152.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 10200,
+        "step_interval": 5,
+        "values": {
+            "1": 13270018048.0,
+            "5": 13270018048.0,
+            "10": 13270018048.0,
+            "15": 13270018048.0,
+            "20": 13270018048.0,
+            "25": 13270018048.0,
+            "30": 13270018048.0,
+            "35": 13270018048.0,
+            "40": 13270018048.0,
+            "45": 13270018048.0,
+            "50": 13270018048.0,
+            "55": 13270018048.0,
+            "60": 13270018048.0,
+            "65": 13270018048.0,
+            "70": 13270018048.0,
+            "75": 13270018048.0,
+            "80": 13270018048.0,
+            "85": 13270018048.0,
+            "90": 13270018048.0,
+            "95": 13270018048.0,
+            "100": 13270018048.0,
+            "105": 13270018048.0,
+            "110": 13270018048.0,
+            "115": 13270018048.0,
+            "120": 13270018048.0,
+            "125": 13270018048.0,
+            "130": 13270018048.0,
+            "135": 13270018048.0,
+            "140": 13270018048.0,
+            "145": 13270018048.0,
+            "150": 13270018048.0,
+            "155": 13270018048.0,
+            "160": 13270018048.0,
+            "165": 13270018048.0,
+            "170": 13270018048.0,
+            "175": 13270018048.0,
+            "180": 13270018048.0,
+            "185": 13270018048.0,
+            "190": 13270018048.0,
+            "195": 13270018048.0,
+            "200": 13270018048.0,
+            "205": 13270018048.0,
+            "210": 13270018048.0,
+            "215": 13270018048.0,
+            "220": 13270018048.0,
+            "225": 13270018048.0,
+            "230": 13270018048.0,
+            "235": 13270018048.0,
+            "240": 13270018048.0,
+            "245": 13270018048.0,
+            "250": 13270018048.0,
+            "255": 13270018048.0,
+            "260": 13270018048.0,
+            "265": 13270018048.0,
+            "270": 13270018048.0,
+            "275": 13270018048.0,
+            "280": 13270018048.0,
+            "285": 13270018048.0,
+            "290": 13270018048.0,
+            "295": 13270018048.0,
+            "300": 13270018048.0,
+            "305": 13270018048.0,
+            "310": 13270018048.0,
+            "315": 13270018048.0,
+            "320": 13270018048.0,
+            "325": 13270018048.0,
+            "330": 13270018048.0,
+            "335": 13270018048.0,
+            "340": 13270018048.0,
+            "345": 13270018048.0,
+            "350": 13270018048.0,
+            "355": 13270018048.0,
+            "360": 13270018048.0,
+            "365": 13270018048.0,
+            "370": 13270018048.0,
+            "375": 13270018048.0,
+            "380": 13270018048.0,
+            "385": 13270018048.0,
+            "390": 13270018048.0,
+            "395": 13270018048.0,
+            "400": 13270018048.0,
+            "405": 13270018048.0,
+            "410": 13270018048.0,
+            "415": 13270018048.0,
+            "420": 13270018048.0,
+            "425": 13270018048.0,
+            "430": 13270018048.0,
+            "435": 13270018048.0,
+            "440": 13270018048.0,
+            "445": 13270018048.0,
+            "450": 13270018048.0,
+            "455": 13270018048.0,
+            "460": 13270018048.0,
+            "465": 13270018048.0,
+            "470": 13270018048.0,
+            "475": 13270018048.0,
+            "480": 13270018048.0,
+            "485": 13270018048.0,
+            "490": 13270018048.0,
+            "495": 13270018048.0,
+            "500": 13270018048.0,
+            "505": 13270018048.0,
+            "510": 13270018048.0,
+            "515": 13270018048.0,
+            "520": 13270018048.0,
+            "525": 13270018048.0,
+            "530": 13270018048.0,
+            "535": 13270018048.0,
+            "540": 13270018048.0,
+            "545": 13270018048.0,
+            "550": 13270018048.0,
+            "555": 13270018048.0,
+            "560": 13270018048.0,
+            "565": 13270018048.0,
+            "570": 13270018048.0,
+            "575": 13270018048.0,
+            "580": 13270018048.0,
+            "585": 13270018048.0,
+            "590": 13270018048.0,
+            "595": 13270018048.0,
+            "600": 13270018048.0,
+            "605": 13270018048.0,
+            "610": 13270018048.0,
+            "615": 13270018048.0,
+            "620": 13270018048.0,
+            "625": 13270018048.0,
+            "630": 13270018048.0,
+            "635": 13270018048.0,
+            "640": 13270018048.0,
+            "645": 13270018048.0,
+            "650": 13270018048.0,
+            "655": 13270018048.0,
+            "660": 13270018048.0,
+            "665": 13270018048.0,
+            "670": 13270018048.0,
+            "675": 13270018048.0,
+            "680": 13270018048.0,
+            "685": 13270018048.0,
+            "690": 13270018048.0,
+            "695": 13270018048.0,
+            "700": 13270018048.0,
+            "705": 13270018048.0,
+            "710": 13270018048.0,
+            "715": 13270018048.0,
+            "720": 13270018048.0,
+            "725": 13270018048.0,
+            "730": 13270018048.0,
+            "735": 13270018048.0,
+            "740": 13270018048.0,
+            "745": 13270018048.0,
+            "750": 13270018048.0,
+            "755": 13270018048.0,
+            "760": 13270018048.0,
+            "765": 13270018048.0,
+            "770": 13270018048.0,
+            "775": 13270018048.0,
+            "780": 13270018048.0,
+            "785": 13270018048.0,
+            "790": 13270018048.0,
+            "795": 13270018048.0,
+            "800": 13270018048.0,
+            "805": 13270018048.0,
+            "810": 13270018048.0,
+            "815": 13270018048.0,
+            "820": 13270018048.0,
+            "825": 13270018048.0,
+            "830": 13270018048.0,
+            "835": 13270018048.0,
+            "840": 13270018048.0,
+            "845": 13270018048.0,
+            "850": 13270018048.0,
+            "855": 13270018048.0,
+            "860": 13270018048.0,
+            "865": 13270018048.0,
+            "870": 13270018048.0,
+            "875": 13270018048.0,
+            "880": 13270018048.0,
+            "885": 13270018048.0,
+            "890": 13270018048.0,
+            "895": 13270018048.0,
+            "900": 13270018048.0,
+            "905": 13270018048.0,
+            "910": 13270018048.0,
+            "915": 13270018048.0,
+            "920": 13270018048.0,
+            "925": 13270018048.0,
+            "930": 13270018048.0,
+            "935": 13270018048.0,
+            "940": 13270018048.0,
+            "945": 13270018048.0,
+            "950": 13270018048.0,
+            "955": 13270018048.0,
+            "960": 13270018048.0,
+            "965": 13270018048.0,
+            "970": 13270018048.0,
+            "975": 13270018048.0,
+            "980": 13270018048.0,
+            "985": 13270018048.0,
+            "990": 13270018048.0,
+            "995": 13270018048.0,
+            "1000": 13270018048.0,
+            "1005": 13270018048.0,
+            "1010": 13270018048.0,
+            "1015": 13270018048.0,
+            "1020": 13270018048.0,
+            "1025": 13270018048.0,
+            "1030": 13270018048.0,
+            "1035": 13270018048.0,
+            "1040": 13270018048.0,
+            "1045": 13270018048.0,
+            "1050": 13270018048.0,
+            "1055": 13270018048.0,
+            "1060": 13270018048.0,
+            "1065": 13270018048.0,
+            "1070": 13270018048.0,
+            "1075": 13270018048.0,
+            "1080": 13270018048.0,
+            "1085": 13270018048.0,
+            "1090": 13270018048.0,
+            "1095": 13270018048.0,
+            "1100": 13270018048.0,
+            "1105": 13270018048.0,
+            "1110": 13270018048.0,
+            "1115": 13270018048.0,
+            "1120": 13270018048.0,
+            "1125": 13270018048.0,
+            "1130": 13270018048.0,
+            "1135": 13270018048.0,
+            "1140": 13270018048.0,
+            "1145": 13270018048.0,
+            "1150": 13270018048.0,
+            "1155": 13270018048.0,
+            "1160": 13270018048.0,
+            "1165": 13270018048.0,
+            "1170": 13270018048.0,
+            "1175": 13270018048.0,
+            "1180": 13270018048.0,
+            "1185": 13270018048.0,
+            "1190": 13270018048.0,
+            "1195": 13270018048.0,
+            "1200": 13270018048.0,
+            "1205": 13270018048.0,
+            "1210": 13270018048.0,
+            "1215": 13270018048.0,
+            "1220": 13270018048.0,
+            "1225": 13270018048.0,
+            "1230": 13270018048.0,
+            "1235": 13270018048.0,
+            "1240": 13270018048.0,
+            "1245": 13270018048.0,
+            "1250": 13270018048.0,
+            "1255": 13270018048.0,
+            "1260": 13270018048.0,
+            "1265": 13270018048.0,
+            "1270": 13270018048.0,
+            "1275": 13270018048.0,
+            "1280": 13270018048.0,
+            "1285": 13270018048.0,
+            "1290": 13270018048.0,
+            "1295": 13270018048.0,
+            "1300": 13270018048.0,
+            "1305": 13270018048.0,
+            "1310": 13270018048.0,
+            "1315": 13270018048.0,
+            "1320": 13270018048.0,
+            "1325": 13270018048.0,
+            "1330": 13270018048.0,
+            "1335": 13270018048.0,
+            "1340": 13270018048.0,
+            "1345": 13270018048.0,
+            "1350": 13270018048.0,
+            "1355": 13270018048.0,
+            "1360": 13270018048.0,
+            "1365": 13270018048.0,
+            "1370": 13270018048.0,
+            "1375": 13270018048.0,
+            "1380": 13270018048.0,
+            "1385": 13270018048.0,
+            "1390": 13270018048.0,
+            "1395": 13270018048.0,
+            "1400": 13270018048.0,
+            "1405": 13270018048.0,
+            "1410": 13270018048.0,
+            "1415": 13270018048.0,
+            "1420": 13270018048.0,
+            "1425": 13270018048.0,
+            "1430": 13270018048.0,
+            "1435": 13270018048.0,
+            "1440": 13270018048.0,
+            "1445": 13270018048.0,
+            "1450": 13270018048.0,
+            "1455": 13270018048.0,
+            "1460": 13270018048.0,
+            "1465": 13270018048.0,
+            "1470": 13270018048.0,
+            "1475": 13270018048.0,
+            "1480": 13270018048.0,
+            "1485": 13270018048.0,
+            "1490": 13270018048.0,
+            "1495": 13270018048.0,
+            "1500": 13270018048.0,
+            "1505": 13270018048.0,
+            "1510": 13270018048.0,
+            "1515": 13270018048.0,
+            "1520": 13270018048.0,
+            "1525": 13270018048.0,
+            "1530": 13270018048.0,
+            "1535": 13270018048.0,
+            "1540": 13270018048.0,
+            "1545": 13270018048.0,
+            "1550": 13270018048.0,
+            "1555": 13270018048.0,
+            "1560": 13270018048.0,
+            "1565": 13270018048.0,
+            "1570": 13270018048.0,
+            "1575": 13270018048.0,
+            "1580": 13270018048.0,
+            "1585": 13270018048.0,
+            "1590": 13270018048.0,
+            "1595": 13270018048.0,
+            "1600": 13270018048.0,
+            "1605": 13270018048.0,
+            "1610": 13270018048.0,
+            "1615": 13270018048.0,
+            "1620": 13270018048.0,
+            "1625": 13270018048.0,
+            "1630": 13270018048.0,
+            "1635": 13270018048.0,
+            "1640": 13270018048.0,
+            "1645": 13270018048.0,
+            "1650": 13270018048.0,
+            "1655": 13270018048.0,
+            "1660": 13270018048.0,
+            "1665": 13270018048.0,
+            "1670": 13270018048.0,
+            "1675": 13270018048.0,
+            "1680": 13270018048.0,
+            "1685": 13270018048.0,
+            "1690": 13270018048.0,
+            "1695": 13270018048.0,
+            "1700": 13270018048.0,
+            "1705": 13270018048.0,
+            "1710": 13270018048.0,
+            "1715": 13270018048.0,
+            "1720": 13270018048.0,
+            "1725": 13270018048.0,
+            "1730": 13270018048.0,
+            "1735": 13270018048.0,
+            "1740": 13270018048.0,
+            "1745": 13270018048.0,
+            "1750": 13270018048.0,
+            "1755": 13270018048.0,
+            "1760": 13270018048.0,
+            "1765": 13270018048.0,
+            "1770": 13270018048.0,
+            "1775": 13270018048.0,
+            "1780": 13270018048.0,
+            "1785": 13270018048.0,
+            "1790": 13270018048.0,
+            "1795": 13270018048.0,
+            "1800": 13270018048.0,
+            "1805": 13270018048.0,
+            "1810": 13270018048.0,
+            "1815": 13270018048.0,
+            "1820": 13270018048.0,
+            "1825": 13270018048.0,
+            "1830": 13270018048.0,
+            "1835": 13270018048.0,
+            "1840": 13270018048.0,
+            "1845": 13270018048.0,
+            "1850": 13270018048.0,
+            "1855": 13270018048.0,
+            "1860": 13270018048.0,
+            "1865": 13270018048.0,
+            "1870": 13270018048.0,
+            "1875": 13270018048.0,
+            "1880": 13270018048.0,
+            "1885": 13270018048.0,
+            "1890": 13270018048.0,
+            "1895": 13270018048.0,
+            "1900": 13270018048.0,
+            "1905": 13270018048.0,
+            "1910": 13270018048.0,
+            "1915": 13270018048.0,
+            "1920": 13270018048.0,
+            "1925": 13270018048.0,
+            "1930": 13270018048.0,
+            "1935": 13270018048.0,
+            "1940": 13270018048.0,
+            "1945": 13270018048.0,
+            "1950": 13270018048.0,
+            "1955": 13270018048.0,
+            "1960": 13270018048.0,
+            "1965": 13270018048.0,
+            "1970": 13270018048.0,
+            "1975": 13270018048.0,
+            "1980": 13270018048.0,
+            "1985": 13270018048.0,
+            "1990": 13270018048.0,
+            "1995": 13270018048.0,
+            "2000": 13270018048.0,
+            "2005": 13270018048.0,
+            "2010": 13270018048.0,
+            "2015": 13270018048.0,
+            "2020": 13270018048.0,
+            "2025": 13270018048.0,
+            "2030": 13270018048.0,
+            "2035": 13270018048.0,
+            "2040": 13270018048.0,
+            "2045": 13270018048.0,
+            "2050": 13270018048.0,
+            "2055": 13270018048.0,
+            "2060": 13270018048.0,
+            "2065": 13270018048.0,
+            "2070": 13270018048.0,
+            "2075": 13270018048.0,
+            "2080": 13270018048.0,
+            "2085": 13270018048.0,
+            "2090": 13270018048.0,
+            "2095": 13270018048.0,
+            "2100": 13270018048.0,
+            "2105": 13270018048.0,
+            "2110": 13270018048.0,
+            "2115": 13270018048.0,
+            "2120": 13270018048.0,
+            "2125": 13270018048.0,
+            "2130": 13270018048.0,
+            "2135": 13270018048.0,
+            "2140": 13270018048.0,
+            "2145": 13270018048.0,
+            "2150": 13270018048.0,
+            "2155": 13270018048.0,
+            "2160": 13270018048.0,
+            "2165": 13270018048.0,
+            "2170": 13270018048.0,
+            "2175": 13270018048.0,
+            "2180": 13270018048.0,
+            "2185": 13270018048.0,
+            "2190": 13270018048.0,
+            "2195": 13270018048.0,
+            "2200": 13270018048.0,
+            "2205": 13270018048.0,
+            "2210": 13270018048.0,
+            "2215": 13270018048.0,
+            "2220": 13270018048.0,
+            "2225": 13270018048.0,
+            "2230": 13270018048.0,
+            "2235": 13270018048.0,
+            "2240": 13270018048.0,
+            "2245": 13270018048.0,
+            "2250": 13270018048.0,
+            "2255": 13270018048.0,
+            "2260": 13270018048.0,
+            "2265": 13270018048.0,
+            "2270": 13270018048.0,
+            "2275": 13270018048.0,
+            "2280": 13270018048.0,
+            "2285": 13270018048.0,
+            "2290": 13270018048.0,
+            "2295": 13270018048.0,
+            "2300": 13270018048.0,
+            "2305": 13270018048.0,
+            "2310": 13270018048.0,
+            "2315": 13270018048.0,
+            "2320": 13270018048.0,
+            "2325": 13270018048.0,
+            "2330": 13270018048.0,
+            "2335": 13270018048.0,
+            "2340": 13270018048.0,
+            "2345": 13270018048.0,
+            "2350": 13270018048.0,
+            "2355": 13270018048.0,
+            "2360": 13270018048.0,
+            "2365": 13270018048.0,
+            "2370": 13270018048.0,
+            "2375": 13270018048.0,
+            "2380": 13270018048.0,
+            "2385": 13270018048.0,
+            "2390": 13270018048.0,
+            "2395": 13270018048.0,
+            "2400": 13270018048.0,
+            "2405": 13270018048.0,
+            "2410": 13270018048.0,
+            "2415": 13270018048.0,
+            "2420": 13270018048.0,
+            "2425": 13270018048.0,
+            "2430": 13270018048.0,
+            "2435": 13270018048.0,
+            "2440": 13270018048.0,
+            "2445": 13270018048.0,
+            "2450": 13270018048.0,
+            "2455": 13270018048.0,
+            "2460": 13270018048.0,
+            "2465": 13270018048.0,
+            "2470": 13270018048.0,
+            "2475": 13270018048.0,
+            "2480": 13270018048.0,
+            "2485": 13270018048.0,
+            "2490": 13270018048.0,
+            "2495": 13270018048.0,
+            "2500": 13270018048.0,
+            "2505": 13270018048.0,
+            "2510": 13270018048.0,
+            "2515": 13270018048.0,
+            "2520": 13270018048.0,
+            "2525": 13270018048.0,
+            "2530": 13270018048.0,
+            "2535": 13270018048.0,
+            "2540": 13270018048.0,
+            "2545": 13270018048.0,
+            "2550": 13270018048.0,
+            "2555": 13270018048.0,
+            "2560": 13270018048.0,
+            "2565": 13270018048.0,
+            "2570": 13270018048.0,
+            "2575": 13270018048.0,
+            "2580": 13270018048.0,
+            "2585": 13270018048.0,
+            "2590": 13270018048.0,
+            "2595": 13270018048.0,
+            "2600": 13270018048.0,
+            "2605": 13270018048.0,
+            "2610": 13270018048.0,
+            "2615": 13270018048.0,
+            "2620": 13270018048.0,
+            "2625": 13270018048.0,
+            "2630": 13270018048.0,
+            "2635": 13270018048.0,
+            "2640": 13270018048.0,
+            "2645": 13270018048.0,
+            "2650": 13270018048.0,
+            "2655": 13270018048.0,
+            "2660": 13270018048.0,
+            "2665": 13270018048.0,
+            "2670": 13270018048.0,
+            "2675": 13270018048.0,
+            "2680": 13270018048.0,
+            "2685": 13270018048.0,
+            "2690": 13270018048.0,
+            "2695": 13270018048.0,
+            "2700": 13270018048.0,
+            "2705": 13270018048.0,
+            "2710": 13270018048.0,
+            "2715": 13270018048.0,
+            "2720": 13270018048.0,
+            "2725": 13270018048.0,
+            "2730": 13270018048.0,
+            "2735": 13270018048.0,
+            "2740": 13270018048.0,
+            "2745": 13270018048.0,
+            "2750": 13270018048.0,
+            "2755": 13270018048.0,
+            "2760": 13270018048.0,
+            "2765": 13270018048.0,
+            "2770": 13270018048.0,
+            "2775": 13270018048.0,
+            "2780": 13270018048.0,
+            "2785": 13270018048.0,
+            "2790": 13270018048.0,
+            "2795": 13270018048.0,
+            "2800": 13270018048.0,
+            "2805": 13270018048.0,
+            "2810": 13270018048.0,
+            "2815": 13270018048.0,
+            "2820": 13270018048.0,
+            "2825": 13270018048.0,
+            "2830": 13270018048.0,
+            "2835": 13270018048.0,
+            "2840": 13270018048.0,
+            "2845": 13270018048.0,
+            "2850": 13270018048.0,
+            "2855": 13270018048.0,
+            "2860": 13270018048.0,
+            "2865": 13270018048.0,
+            "2870": 13270018048.0,
+            "2875": 13270018048.0,
+            "2880": 13270018048.0,
+            "2885": 13270018048.0,
+            "2890": 13270018048.0,
+            "2895": 13270018048.0,
+            "2900": 13270018048.0,
+            "2905": 13270018048.0,
+            "2910": 13270018048.0,
+            "2915": 13270018048.0,
+            "2920": 13270018048.0,
+            "2925": 13270018048.0,
+            "2930": 13270018048.0,
+            "2935": 13270018048.0,
+            "2940": 13270018048.0,
+            "2945": 13270018048.0,
+            "2950": 13270018048.0,
+            "2955": 13270018048.0,
+            "2960": 13270018048.0,
+            "2965": 13270018048.0,
+            "2970": 13270018048.0,
+            "2975": 13270018048.0,
+            "2980": 13270018048.0,
+            "2985": 13270018048.0,
+            "2990": 13270018048.0,
+            "2995": 13270018048.0,
+            "3000": 13270018048.0,
+            "3005": 13270018048.0,
+            "3010": 13270018048.0,
+            "3015": 13270018048.0,
+            "3020": 13270018048.0,
+            "3025": 13270018048.0,
+            "3030": 13270018048.0,
+            "3035": 13270018048.0,
+            "3040": 13270018048.0,
+            "3045": 13270018048.0,
+            "3050": 13270018048.0,
+            "3055": 13270018048.0,
+            "3060": 13270018048.0,
+            "3065": 13270018048.0,
+            "3070": 13270018048.0,
+            "3075": 13270018048.0,
+            "3080": 13270018048.0,
+            "3085": 13270018048.0,
+            "3090": 13270018048.0,
+            "3095": 13270018048.0,
+            "3100": 13270018048.0,
+            "3105": 13270018048.0,
+            "3110": 13270018048.0,
+            "3115": 13270018048.0,
+            "3120": 13270018048.0,
+            "3125": 13270018048.0,
+            "3130": 13270018048.0,
+            "3135": 13270018048.0,
+            "3140": 13270018048.0,
+            "3145": 13270018048.0,
+            "3150": 13270018048.0,
+            "3155": 13270018048.0,
+            "3160": 13270018048.0,
+            "3165": 13270018048.0,
+            "3170": 13270018048.0,
+            "3175": 13270018048.0,
+            "3180": 13270018048.0,
+            "3185": 13270018048.0,
+            "3190": 13270018048.0,
+            "3195": 13270018048.0,
+            "3200": 13270018048.0,
+            "3205": 13270018048.0,
+            "3210": 13270018048.0,
+            "3215": 13270018048.0,
+            "3220": 13270018048.0,
+            "3225": 13270018048.0,
+            "3230": 13270018048.0,
+            "3235": 13270018048.0,
+            "3240": 13270018048.0,
+            "3245": 13270018048.0,
+            "3250": 13270018048.0,
+            "3255": 13270018048.0,
+            "3260": 13270018048.0,
+            "3265": 13270018048.0,
+            "3270": 13270018048.0,
+            "3275": 13270018048.0,
+            "3280": 13270018048.0,
+            "3285": 13270018048.0,
+            "3290": 13270018048.0,
+            "3295": 13270018048.0,
+            "3300": 13270018048.0,
+            "3305": 13270018048.0,
+            "3310": 13270018048.0,
+            "3315": 13270018048.0,
+            "3320": 13270018048.0,
+            "3325": 13270018048.0,
+            "3330": 13270018048.0,
+            "3335": 13270018048.0,
+            "3340": 13270018048.0,
+            "3345": 13270018048.0,
+            "3350": 13270018048.0,
+            "3355": 13270018048.0,
+            "3360": 13270018048.0,
+            "3365": 13270018048.0,
+            "3370": 13270018048.0,
+            "3375": 13270018048.0,
+            "3380": 13270018048.0,
+            "3385": 13270018048.0,
+            "3390": 13270018048.0,
+            "3395": 13270018048.0,
+            "3400": 13270018048.0,
+            "3405": 13270018048.0,
+            "3410": 13270018048.0,
+            "3415": 13270018048.0,
+            "3420": 13270018048.0,
+            "3425": 13270018048.0,
+            "3430": 13270018048.0,
+            "3435": 13270018048.0,
+            "3440": 13270018048.0,
+            "3445": 13270018048.0,
+            "3450": 13270018048.0,
+            "3455": 13270018048.0,
+            "3460": 13270018048.0,
+            "3465": 13270018048.0,
+            "3470": 13270018048.0,
+            "3475": 13270018048.0,
+            "3480": 13270018048.0,
+            "3485": 13270018048.0,
+            "3490": 13270018048.0,
+            "3495": 13270018048.0,
+            "3500": 13270018048.0,
+            "3505": 13270018048.0,
+            "3510": 13270018048.0,
+            "3515": 13270018048.0,
+            "3520": 13270018048.0,
+            "3525": 13270018048.0,
+            "3530": 13270018048.0,
+            "3535": 13270018048.0,
+            "3540": 13270018048.0,
+            "3545": 13270018048.0,
+            "3550": 13270018048.0,
+            "3555": 13270018048.0,
+            "3560": 13270018048.0,
+            "3565": 13270018048.0,
+            "3570": 13270018048.0,
+            "3575": 13270018048.0,
+            "3580": 13270018048.0,
+            "3585": 13270018048.0,
+            "3590": 13270018048.0,
+            "3595": 13270018048.0,
+            "3600": 13270018048.0,
+            "3605": 13270018048.0,
+            "3610": 13270018048.0,
+            "3615": 13270018048.0,
+            "3620": 13270018048.0,
+            "3625": 13270018048.0,
+            "3630": 13270018048.0,
+            "3635": 13270018048.0,
+            "3640": 13270018048.0,
+            "3645": 13270018048.0,
+            "3650": 13270018048.0,
+            "3655": 13270018048.0,
+            "3660": 13270018048.0,
+            "3665": 13270018048.0,
+            "3670": 13270018048.0,
+            "3675": 13270018048.0,
+            "3680": 13270018048.0,
+            "3685": 13270018048.0,
+            "3690": 13270018048.0,
+            "3695": 13270018048.0,
+            "3700": 13270018048.0,
+            "3705": 13270018048.0,
+            "3710": 13270018048.0,
+            "3715": 13270018048.0,
+            "3720": 13270018048.0,
+            "3725": 13270018048.0,
+            "3730": 13270018048.0,
+            "3735": 13270018048.0,
+            "3740": 13270018048.0,
+            "3745": 13270018048.0,
+            "3750": 13270018048.0,
+            "3755": 13270018048.0,
+            "3760": 13270018048.0,
+            "3765": 13270018048.0,
+            "3770": 13270018048.0,
+            "3775": 13270018048.0,
+            "3780": 13270018048.0,
+            "3785": 13270018048.0,
+            "3790": 13270018048.0,
+            "3795": 13270018048.0,
+            "3800": 13270018048.0,
+            "3805": 13270018048.0,
+            "3810": 13270018048.0,
+            "3815": 13270018048.0,
+            "3820": 13270018048.0,
+            "3825": 13270018048.0,
+            "3830": 13270018048.0,
+            "3835": 13270018048.0,
+            "3840": 13270018048.0,
+            "3845": 13270018048.0,
+            "3850": 13270018048.0,
+            "3855": 13270018048.0,
+            "3860": 13270018048.0,
+            "3865": 13270018048.0,
+            "3870": 13270018048.0,
+            "3875": 13270018048.0,
+            "3880": 13270018048.0,
+            "3885": 13270018048.0,
+            "3890": 13270018048.0,
+            "3895": 13270018048.0,
+            "3900": 13270018048.0,
+            "3905": 13270018048.0,
+            "3910": 13270018048.0,
+            "3915": 13270018048.0,
+            "3920": 13270018048.0,
+            "3925": 13270018048.0,
+            "3930": 13270018048.0,
+            "3935": 13270018048.0,
+            "3940": 13270018048.0,
+            "3945": 13270018048.0,
+            "3950": 13270018048.0,
+            "3955": 13270018048.0,
+            "3960": 13270018048.0,
+            "3965": 13270018048.0,
+            "3970": 13270018048.0,
+            "3975": 13270018048.0,
+            "3980": 13270018048.0,
+            "3985": 13270018048.0,
+            "3990": 13270018048.0,
+            "3995": 13270018048.0,
+            "4000": 13270018048.0,
+            "4005": 13270018048.0,
+            "4010": 13270018048.0,
+            "4015": 13270018048.0,
+            "4020": 13270018048.0,
+            "4025": 13270018048.0,
+            "4030": 13270018048.0,
+            "4035": 13270018048.0,
+            "4040": 13270018048.0,
+            "4045": 13270018048.0,
+            "4050": 13270018048.0,
+            "4055": 13270018048.0,
+            "4060": 13270018048.0,
+            "4065": 13270018048.0,
+            "4070": 13270018048.0,
+            "4075": 13270018048.0,
+            "4080": 13270018048.0,
+            "4085": 13270018048.0,
+            "4090": 13270018048.0,
+            "4095": 13270018048.0,
+            "4100": 13270018048.0,
+            "4105": 13270018048.0,
+            "4110": 13270018048.0,
+            "4115": 13270018048.0,
+            "4120": 13270018048.0,
+            "4125": 13270018048.0,
+            "4130": 13270018048.0,
+            "4135": 13270018048.0,
+            "4140": 13270018048.0,
+            "4145": 13270018048.0,
+            "4150": 13270018048.0,
+            "4155": 13270018048.0,
+            "4160": 13270018048.0,
+            "4165": 13270018048.0,
+            "4170": 13270018048.0,
+            "4175": 13270018048.0,
+            "4180": 13270018048.0,
+            "4185": 13270018048.0,
+            "4190": 13270018048.0,
+            "4195": 13270018048.0,
+            "4200": 13270018048.0,
+            "4205": 13270018048.0,
+            "4210": 13270018048.0,
+            "4215": 13270018048.0,
+            "4220": 13270018048.0,
+            "4225": 13270018048.0,
+            "4230": 13270018048.0,
+            "4235": 13270018048.0,
+            "4240": 13270018048.0,
+            "4245": 13270018048.0,
+            "4250": 13270018048.0,
+            "4255": 13270018048.0,
+            "4260": 13270018048.0,
+            "4265": 13270018048.0,
+            "4270": 13270018048.0,
+            "4275": 13270018048.0,
+            "4280": 13270018048.0,
+            "4285": 13270018048.0,
+            "4290": 13270018048.0,
+            "4295": 13270018048.0,
+            "4300": 13270018048.0,
+            "4305": 13270018048.0,
+            "4310": 13270018048.0,
+            "4315": 13270018048.0,
+            "4320": 13270018048.0,
+            "4325": 13270018048.0,
+            "4330": 13270018048.0,
+            "4335": 13270018048.0,
+            "4340": 13270018048.0,
+            "4345": 13270018048.0,
+            "4350": 13270018048.0,
+            "4355": 13270018048.0,
+            "4360": 13270018048.0,
+            "4365": 13270018048.0,
+            "4370": 13270018048.0,
+            "4375": 13270018048.0,
+            "4380": 13270018048.0,
+            "4385": 13270018048.0,
+            "4390": 13270018048.0,
+            "4395": 13270018048.0,
+            "4400": 13270018048.0,
+            "4405": 13270018048.0,
+            "4410": 13270018048.0,
+            "4415": 13270018048.0,
+            "4420": 13270018048.0,
+            "4425": 13270018048.0,
+            "4430": 13270018048.0,
+            "4435": 13270018048.0,
+            "4440": 13270018048.0,
+            "4445": 13270018048.0,
+            "4450": 13270018048.0,
+            "4455": 13270018048.0,
+            "4460": 13270018048.0,
+            "4465": 13270018048.0,
+            "4470": 13270018048.0,
+            "4475": 13270018048.0,
+            "4480": 13270018048.0,
+            "4485": 13270018048.0,
+            "4490": 13270018048.0,
+            "4495": 13270018048.0,
+            "4500": 13270018048.0,
+            "4505": 13270018048.0,
+            "4510": 13270018048.0,
+            "4515": 13270018048.0,
+            "4520": 13270018048.0,
+            "4525": 13270018048.0,
+            "4530": 13270018048.0,
+            "4535": 13270018048.0,
+            "4540": 13270018048.0,
+            "4545": 13270018048.0,
+            "4550": 13270018048.0,
+            "4555": 13270018048.0,
+            "4560": 13270018048.0,
+            "4565": 13270018048.0,
+            "4570": 13270018048.0,
+            "4575": 13270018048.0,
+            "4580": 13270018048.0,
+            "4585": 13270018048.0,
+            "4590": 13270018048.0,
+            "4595": 13270018048.0,
+            "4600": 13270018048.0,
+            "4605": 13270018048.0,
+            "4610": 13270018048.0,
+            "4615": 13270018048.0,
+            "4620": 13270018048.0,
+            "4625": 13270018048.0,
+            "4630": 13270018048.0,
+            "4635": 13270018048.0,
+            "4640": 13270018048.0,
+            "4645": 13270018048.0,
+            "4650": 13270018048.0,
+            "4655": 13270018048.0,
+            "4660": 13270018048.0,
+            "4665": 13270018048.0,
+            "4670": 13270018048.0,
+            "4675": 13270018048.0,
+            "4680": 13270018048.0,
+            "4685": 13270018048.0,
+            "4690": 13270018048.0,
+            "4695": 13270018048.0,
+            "4700": 13270018048.0,
+            "4705": 13270018048.0,
+            "4710": 13270018048.0,
+            "4715": 13270018048.0,
+            "4720": 13270018048.0,
+            "4725": 13270018048.0,
+            "4730": 13270018048.0,
+            "4735": 13270018048.0,
+            "4740": 13270018048.0,
+            "4745": 13270018048.0,
+            "4750": 13270018048.0,
+            "4755": 13270018048.0,
+            "4760": 13270018048.0,
+            "4765": 13270018048.0,
+            "4770": 13270018048.0,
+            "4775": 13270018048.0,
+            "4780": 13270018048.0,
+            "4785": 13270018048.0,
+            "4790": 13270018048.0,
+            "4795": 13270018048.0,
+            "4800": 13270018048.0,
+            "4805": 13270018048.0,
+            "4810": 13270018048.0,
+            "4815": 13270018048.0,
+            "4820": 13270018048.0,
+            "4825": 13270018048.0,
+            "4830": 13270018048.0,
+            "4835": 13270018048.0,
+            "4840": 13270018048.0,
+            "4845": 13270018048.0,
+            "4850": 13270018048.0,
+            "4855": 13270018048.0,
+            "4860": 13270018048.0,
+            "4865": 13270018048.0,
+            "4870": 13270018048.0,
+            "4875": 13270018048.0,
+            "4880": 13270018048.0,
+            "4885": 13270018048.0,
+            "4890": 13270018048.0,
+            "4895": 13270018048.0,
+            "4900": 13270018048.0,
+            "4905": 13270018048.0,
+            "4910": 13270018048.0,
+            "4915": 13270018048.0,
+            "4920": 13270018048.0,
+            "4925": 13270018048.0,
+            "4930": 13270018048.0,
+            "4935": 13270018048.0,
+            "4940": 13270018048.0,
+            "4945": 13270018048.0,
+            "4950": 13270018048.0,
+            "4955": 13270018048.0,
+            "4960": 13270018048.0,
+            "4965": 13270018048.0,
+            "4970": 13270018048.0,
+            "4975": 13270018048.0,
+            "4980": 13270018048.0,
+            "4985": 13270018048.0,
+            "4990": 13270018048.0,
+            "4995": 13270018048.0,
+            "5000": 13270018048.0,
+            "5005": 13270018048.0,
+            "5010": 13270018048.0,
+            "5015": 13270018048.0,
+            "5020": 13270018048.0,
+            "5025": 13270018048.0,
+            "5030": 13270018048.0,
+            "5035": 13270018048.0,
+            "5040": 13270018048.0,
+            "5045": 13270018048.0,
+            "5050": 13270018048.0,
+            "5055": 13270018048.0,
+            "5060": 13270018048.0,
+            "5065": 13270018048.0,
+            "5070": 13270018048.0,
+            "5075": 13270018048.0,
+            "5080": 13270018048.0,
+            "5085": 13270018048.0,
+            "5090": 13270018048.0,
+            "5095": 13270018048.0,
+            "5100": 13270018048.0,
+            "5105": 13270018048.0,
+            "5110": 13270018048.0,
+            "5115": 13270018048.0,
+            "5120": 13270018048.0,
+            "5125": 13270018048.0,
+            "5130": 13270018048.0,
+            "5135": 13270018048.0,
+            "5140": 13270018048.0,
+            "5145": 13270018048.0,
+            "5150": 13270018048.0,
+            "5155": 13270018048.0,
+            "5160": 13270018048.0,
+            "5165": 13270018048.0,
+            "5170": 13270018048.0,
+            "5175": 13270018048.0,
+            "5180": 13270018048.0,
+            "5185": 13270018048.0,
+            "5190": 13270018048.0,
+            "5195": 13270018048.0,
+            "5200": 13270018048.0,
+            "5205": 13270018048.0,
+            "5210": 13270018048.0,
+            "5215": 13270018048.0,
+            "5220": 13270018048.0,
+            "5225": 13270018048.0,
+            "5230": 13270018048.0,
+            "5235": 13270018048.0,
+            "5240": 13270018048.0,
+            "5245": 13270018048.0,
+            "5250": 13270018048.0,
+            "5255": 13270018048.0,
+            "5260": 13270018048.0,
+            "5265": 13270018048.0,
+            "5270": 13270018048.0,
+            "5275": 13270018048.0,
+            "5280": 13270018048.0,
+            "5285": 13270018048.0,
+            "5290": 13270018048.0,
+            "5295": 13270018048.0,
+            "5300": 13270018048.0,
+            "5305": 13270018048.0,
+            "5310": 13270018048.0,
+            "5315": 13270018048.0,
+            "5320": 13270018048.0,
+            "5325": 13270018048.0,
+            "5330": 13270018048.0,
+            "5335": 13270018048.0,
+            "5340": 13270018048.0,
+            "5345": 13270018048.0,
+            "5350": 13270018048.0,
+            "5355": 13270018048.0,
+            "5360": 13270018048.0,
+            "5365": 13270018048.0,
+            "5370": 13270018048.0,
+            "5375": 13270018048.0,
+            "5380": 13270018048.0,
+            "5385": 13270018048.0,
+            "5390": 13270018048.0,
+            "5395": 13270018048.0,
+            "5400": 13270018048.0,
+            "5405": 13270018048.0,
+            "5410": 13270018048.0,
+            "5415": 13270018048.0,
+            "5420": 13270018048.0,
+            "5425": 13270018048.0,
+            "5430": 13270018048.0,
+            "5435": 13270018048.0,
+            "5440": 13270018048.0,
+            "5445": 13270018048.0,
+            "5450": 13270018048.0,
+            "5455": 13270018048.0,
+            "5460": 13270018048.0,
+            "5465": 13270018048.0,
+            "5470": 13270018048.0,
+            "5475": 13270018048.0,
+            "5480": 13270018048.0,
+            "5485": 13270018048.0,
+            "5490": 13270018048.0,
+            "5495": 13270018048.0,
+            "5500": 13270018048.0,
+            "5505": 13270018048.0,
+            "5510": 13270018048.0,
+            "5515": 13270018048.0,
+            "5520": 13270018048.0,
+            "5525": 13270018048.0,
+            "5530": 13270018048.0,
+            "5535": 13270018048.0,
+            "5540": 13270018048.0,
+            "5545": 13270018048.0,
+            "5550": 13270018048.0,
+            "5555": 13270018048.0,
+            "5560": 13270018048.0,
+            "5565": 13270018048.0,
+            "5570": 13270018048.0,
+            "5575": 13270018048.0,
+            "5580": 13270018048.0,
+            "5585": 13270018048.0,
+            "5590": 13270018048.0,
+            "5595": 13270018048.0,
+            "5600": 13270018048.0,
+            "5605": 13270018048.0,
+            "5610": 13270018048.0,
+            "5615": 13270018048.0,
+            "5620": 13270018048.0,
+            "5625": 13270018048.0,
+            "5630": 13270018048.0,
+            "5635": 13270018048.0,
+            "5640": 13270018048.0,
+            "5645": 13270018048.0,
+            "5650": 13270018048.0,
+            "5655": 13270018048.0,
+            "5660": 13270018048.0,
+            "5665": 13270018048.0,
+            "5670": 13270018048.0,
+            "5675": 13270018048.0,
+            "5680": 13270018048.0,
+            "5685": 13270018048.0,
+            "5690": 13270018048.0,
+            "5695": 13270018048.0,
+            "5700": 13270018048.0,
+            "5705": 13270018048.0,
+            "5710": 13270018048.0,
+            "5715": 13270018048.0,
+            "5720": 13270018048.0,
+            "5725": 13270018048.0,
+            "5730": 13270018048.0,
+            "5735": 13270018048.0,
+            "5740": 13270018048.0,
+            "5745": 13270018048.0,
+            "5750": 13270018048.0,
+            "5755": 13270018048.0,
+            "5760": 13270018048.0,
+            "5765": 13270018048.0,
+            "5770": 13270018048.0,
+            "5775": 13270018048.0,
+            "5780": 13270018048.0,
+            "5785": 13270018048.0,
+            "5790": 13270018048.0,
+            "5795": 13270018048.0,
+            "5800": 13270018048.0,
+            "5805": 13270018048.0,
+            "5810": 13270018048.0,
+            "5815": 13270018048.0,
+            "5820": 13270018048.0,
+            "5825": 13270018048.0,
+            "5830": 13270018048.0,
+            "5835": 13270018048.0,
+            "5840": 13270018048.0,
+            "5845": 13270018048.0,
+            "5850": 13270018048.0,
+            "5855": 13270018048.0,
+            "5860": 13270018048.0,
+            "5865": 13270018048.0,
+            "5870": 13270018048.0,
+            "5875": 13270018048.0,
+            "5880": 13270018048.0,
+            "5885": 13270018048.0,
+            "5890": 13270018048.0,
+            "5895": 13270018048.0,
+            "5900": 13270018048.0,
+            "5905": 13270018048.0,
+            "5910": 13270018048.0,
+            "5915": 13270018048.0,
+            "5920": 13270018048.0,
+            "5925": 13270018048.0,
+            "5930": 13270018048.0,
+            "5935": 13270018048.0,
+            "5940": 13270018048.0,
+            "5945": 13270018048.0,
+            "5950": 13270018048.0,
+            "5955": 13270018048.0,
+            "5960": 13270018048.0,
+            "5965": 13270018048.0,
+            "5970": 13270018048.0,
+            "5975": 13270018048.0,
+            "5980": 13270018048.0,
+            "5985": 13270018048.0,
+            "5990": 13270018048.0,
+            "5995": 13270018048.0,
+            "6000": 13270018048.0,
+            "6005": 13270018048.0,
+            "6010": 13270018048.0,
+            "6015": 13270018048.0,
+            "6020": 13270018048.0,
+            "6025": 13270018048.0,
+            "6030": 13270018048.0,
+            "6035": 13270018048.0,
+            "6040": 13270018048.0,
+            "6045": 13270018048.0,
+            "6050": 13270018048.0,
+            "6055": 13270018048.0,
+            "6060": 13270018048.0,
+            "6065": 13270018048.0,
+            "6070": 13270018048.0,
+            "6075": 13270018048.0,
+            "6080": 13270018048.0,
+            "6085": 13270018048.0,
+            "6090": 13270018048.0,
+            "6095": 13270018048.0,
+            "6100": 13270018048.0,
+            "6105": 13270018048.0,
+            "6110": 13270018048.0,
+            "6115": 13270018048.0,
+            "6120": 13270018048.0,
+            "6125": 13270018048.0,
+            "6130": 13270018048.0,
+            "6135": 13270018048.0,
+            "6140": 13270018048.0,
+            "6145": 13270018048.0,
+            "6150": 13270018048.0,
+            "6155": 13270018048.0,
+            "6160": 13270018048.0,
+            "6165": 13270018048.0,
+            "6170": 13270018048.0,
+            "6175": 13270018048.0,
+            "6180": 13270018048.0,
+            "6185": 13270018048.0,
+            "6190": 13270018048.0,
+            "6195": 13270018048.0,
+            "6200": 13270018048.0,
+            "6205": 13270018048.0,
+            "6210": 13270018048.0,
+            "6215": 13270018048.0,
+            "6220": 13270018048.0,
+            "6225": 13270018048.0,
+            "6230": 13270018048.0,
+            "6235": 13270018048.0,
+            "6240": 13270018048.0,
+            "6245": 13270018048.0,
+            "6250": 13270018048.0,
+            "6255": 13270018048.0,
+            "6260": 13270018048.0,
+            "6265": 13270018048.0,
+            "6270": 13270018048.0,
+            "6275": 13270018048.0,
+            "6280": 13270018048.0,
+            "6285": 13270018048.0,
+            "6290": 13270018048.0,
+            "6295": 13270018048.0,
+            "6300": 13270018048.0,
+            "6305": 13270018048.0,
+            "6310": 13270018048.0,
+            "6315": 13270018048.0,
+            "6320": 13270018048.0,
+            "6325": 13270018048.0,
+            "6330": 13270018048.0,
+            "6335": 13270018048.0,
+            "6340": 13270018048.0,
+            "6345": 13270018048.0,
+            "6350": 13270018048.0,
+            "6355": 13270018048.0,
+            "6360": 13270018048.0,
+            "6365": 13270018048.0,
+            "6370": 13270018048.0,
+            "6375": 13270018048.0,
+            "6380": 13270018048.0,
+            "6385": 13270018048.0,
+            "6390": 13270018048.0,
+            "6395": 13270018048.0,
+            "6400": 13270018048.0,
+            "6405": 13270018048.0,
+            "6410": 13270018048.0,
+            "6415": 13270018048.0,
+            "6420": 13270018048.0,
+            "6425": 13270018048.0,
+            "6430": 13270018048.0,
+            "6435": 13270018048.0,
+            "6440": 13270018048.0,
+            "6445": 13270018048.0,
+            "6450": 13270018048.0,
+            "6455": 13270018048.0,
+            "6460": 13270018048.0,
+            "6465": 13270018048.0,
+            "6470": 13270018048.0,
+            "6475": 13270018048.0,
+            "6480": 13270018048.0,
+            "6485": 13270018048.0,
+            "6490": 13270018048.0,
+            "6495": 13270018048.0,
+            "6500": 13270018048.0,
+            "6505": 13270018048.0,
+            "6510": 13270018048.0,
+            "6515": 13270018048.0,
+            "6520": 13270018048.0,
+            "6525": 13270018048.0,
+            "6530": 13270018048.0,
+            "6535": 13270018048.0,
+            "6540": 13270018048.0,
+            "6545": 13270018048.0,
+            "6550": 13270018048.0,
+            "6555": 13270018048.0,
+            "6560": 13270018048.0,
+            "6565": 13270018048.0,
+            "6570": 13270018048.0,
+            "6575": 13270018048.0,
+            "6580": 13270018048.0,
+            "6585": 13270018048.0,
+            "6590": 13270018048.0,
+            "6595": 13270018048.0,
+            "6600": 13270018048.0,
+            "6605": 13270018048.0,
+            "6610": 13270018048.0,
+            "6615": 13270018048.0,
+            "6620": 13270018048.0,
+            "6625": 13270018048.0,
+            "6630": 13270018048.0,
+            "6635": 13270018048.0,
+            "6640": 13270018048.0,
+            "6645": 13270018048.0,
+            "6650": 13270018048.0,
+            "6655": 13270018048.0,
+            "6660": 13270018048.0,
+            "6665": 13270018048.0,
+            "6670": 13270018048.0,
+            "6675": 13270018048.0,
+            "6680": 13270018048.0,
+            "6685": 13270018048.0,
+            "6690": 13270018048.0,
+            "6695": 13270018048.0,
+            "6700": 13270018048.0,
+            "6705": 13270018048.0,
+            "6710": 13270018048.0,
+            "6715": 13270018048.0,
+            "6720": 13270018048.0,
+            "6725": 13270018048.0,
+            "6730": 13270018048.0,
+            "6735": 13270018048.0,
+            "6740": 13270018048.0,
+            "6745": 13270018048.0,
+            "6750": 13270018048.0,
+            "6755": 13270018048.0,
+            "6760": 13270018048.0,
+            "6765": 13270018048.0,
+            "6770": 13270018048.0,
+            "6775": 13270018048.0,
+            "6780": 13270018048.0,
+            "6785": 13270018048.0,
+            "6790": 13270018048.0,
+            "6795": 13270018048.0,
+            "6800": 13270018048.0,
+            "6805": 13270018048.0,
+            "6810": 13270018048.0,
+            "6815": 13270018048.0,
+            "6820": 13270018048.0,
+            "6825": 13270018048.0,
+            "6830": 13270018048.0,
+            "6835": 13270018048.0,
+            "6840": 13270018048.0,
+            "6845": 13270018048.0,
+            "6850": 13270018048.0,
+            "6855": 13270018048.0,
+            "6860": 13270018048.0,
+            "6865": 13270018048.0,
+            "6870": 13270018048.0,
+            "6875": 13270018048.0,
+            "6880": 13270018048.0,
+            "6885": 13270018048.0,
+            "6890": 13270018048.0,
+            "6895": 13270018048.0,
+            "6900": 13270018048.0,
+            "6905": 13270018048.0,
+            "6910": 13270018048.0,
+            "6915": 13270018048.0,
+            "6920": 13270018048.0,
+            "6925": 13270018048.0,
+            "6930": 13270018048.0,
+            "6935": 13270018048.0,
+            "6940": 13270018048.0,
+            "6945": 13270018048.0,
+            "6950": 13270018048.0,
+            "6955": 13270018048.0,
+            "6960": 13270018048.0,
+            "6965": 13270018048.0,
+            "6970": 13270018048.0,
+            "6975": 13270018048.0,
+            "6980": 13270018048.0,
+            "6985": 13270018048.0,
+            "6990": 13270018048.0,
+            "6995": 13270018048.0,
+            "7000": 13270018048.0,
+            "7005": 13270018048.0,
+            "7010": 13270018048.0,
+            "7015": 13270018048.0,
+            "7020": 13270018048.0,
+            "7025": 13270018048.0,
+            "7030": 13270018048.0,
+            "7035": 13270018048.0,
+            "7040": 13270018048.0,
+            "7045": 13270018048.0,
+            "7050": 13270018048.0,
+            "7055": 13270018048.0,
+            "7060": 13270018048.0,
+            "7065": 13270018048.0,
+            "7070": 13270018048.0,
+            "7075": 13270018048.0,
+            "7080": 13270018048.0,
+            "7085": 13270018048.0,
+            "7090": 13270018048.0,
+            "7095": 13270018048.0,
+            "7100": 13270018048.0,
+            "7105": 13270018048.0,
+            "7110": 13270018048.0,
+            "7115": 13270018048.0,
+            "7120": 13270018048.0,
+            "7125": 13270018048.0,
+            "7130": 13270018048.0,
+            "7135": 13270018048.0,
+            "7140": 13270018048.0,
+            "7145": 13270018048.0,
+            "7150": 13270018048.0,
+            "7155": 13270018048.0,
+            "7160": 13270018048.0,
+            "7165": 13270018048.0,
+            "7170": 13270018048.0,
+            "7175": 13270018048.0,
+            "7180": 13270018048.0,
+            "7185": 13270018048.0,
+            "7190": 13270018048.0,
+            "7195": 13270018048.0,
+            "7200": 13270018048.0,
+            "7205": 13270018048.0,
+            "7210": 13270018048.0,
+            "7215": 13270018048.0,
+            "7220": 13270018048.0,
+            "7225": 13270018048.0,
+            "7230": 13270018048.0,
+            "7235": 13270018048.0,
+            "7240": 13270018048.0,
+            "7245": 13270018048.0,
+            "7250": 13270018048.0,
+            "7255": 13270018048.0,
+            "7260": 13270018048.0,
+            "7265": 13270018048.0,
+            "7270": 13270018048.0,
+            "7275": 13270018048.0,
+            "7280": 13270018048.0,
+            "7285": 13270018048.0,
+            "7290": 13270018048.0,
+            "7295": 13270018048.0,
+            "7300": 13270018048.0,
+            "7305": 13270018048.0,
+            "7310": 13270018048.0,
+            "7315": 13270018048.0,
+            "7320": 13270018048.0,
+            "7325": 13270018048.0,
+            "7330": 13270018048.0,
+            "7335": 13270018048.0,
+            "7340": 13270018048.0,
+            "7345": 13270018048.0,
+            "7350": 13270018048.0,
+            "7355": 13270018048.0,
+            "7360": 13270018048.0,
+            "7365": 13270018048.0,
+            "7370": 13270018048.0,
+            "7375": 13270018048.0,
+            "7380": 13270018048.0,
+            "7385": 13270018048.0,
+            "7390": 13270018048.0,
+            "7395": 13270018048.0,
+            "7400": 13270018048.0,
+            "7405": 13270018048.0,
+            "7410": 13270018048.0,
+            "7415": 13270018048.0,
+            "7420": 13270018048.0,
+            "7425": 13270018048.0,
+            "7430": 13270018048.0,
+            "7435": 13270018048.0,
+            "7440": 13270018048.0,
+            "7445": 13270018048.0,
+            "7450": 13270018048.0,
+            "7455": 13270018048.0,
+            "7460": 13270018048.0,
+            "7465": 13270018048.0,
+            "7470": 13270018048.0,
+            "7475": 13270018048.0,
+            "7480": 13270018048.0,
+            "7485": 13270018048.0,
+            "7490": 13270018048.0,
+            "7495": 13270018048.0,
+            "7500": 13270018048.0,
+            "7505": 13270018048.0,
+            "7510": 13270018048.0,
+            "7515": 13270018048.0,
+            "7520": 13270018048.0,
+            "7525": 13270018048.0,
+            "7530": 13270018048.0,
+            "7535": 13270018048.0,
+            "7540": 13270018048.0,
+            "7545": 13270018048.0,
+            "7550": 13270018048.0,
+            "7555": 13270018048.0,
+            "7560": 13270018048.0,
+            "7565": 13270018048.0,
+            "7570": 13270018048.0,
+            "7575": 13270018048.0,
+            "7580": 13270018048.0,
+            "7585": 13270018048.0,
+            "7590": 13270018048.0,
+            "7595": 13270018048.0,
+            "7600": 13270018048.0,
+            "7605": 13270018048.0,
+            "7610": 13270018048.0,
+            "7615": 13270018048.0,
+            "7620": 13270018048.0,
+            "7625": 13270018048.0,
+            "7630": 13270018048.0,
+            "7635": 13270018048.0,
+            "7640": 13270018048.0,
+            "7645": 13270018048.0,
+            "7650": 13270018048.0,
+            "7655": 13270018048.0,
+            "7660": 13270018048.0,
+            "7665": 13270018048.0,
+            "7670": 13270018048.0,
+            "7675": 13270018048.0,
+            "7680": 13270018048.0,
+            "7685": 13270018048.0,
+            "7690": 13270018048.0,
+            "7695": 13270018048.0,
+            "7700": 13270018048.0,
+            "7705": 13270018048.0,
+            "7710": 13270018048.0,
+            "7715": 13270018048.0,
+            "7720": 13270018048.0,
+            "7725": 13270018048.0,
+            "7730": 13270018048.0,
+            "7735": 13270018048.0,
+            "7740": 13270018048.0,
+            "7745": 13270018048.0,
+            "7750": 13270018048.0,
+            "7755": 13270018048.0,
+            "7760": 13270018048.0,
+            "7765": 13270018048.0,
+            "7770": 13270018048.0,
+            "7775": 13270018048.0,
+            "7780": 13270018048.0,
+            "7785": 13270018048.0,
+            "7790": 13270018048.0,
+            "7795": 13270018048.0,
+            "7800": 13270018048.0,
+            "7805": 13270018048.0,
+            "7810": 13270018048.0,
+            "7815": 13270018048.0,
+            "7820": 13270018048.0,
+            "7825": 13270018048.0,
+            "7830": 13270018048.0,
+            "7835": 13270018048.0,
+            "7840": 13270018048.0,
+            "7845": 13270018048.0,
+            "7850": 13270018048.0,
+            "7855": 13270018048.0,
+            "7860": 13270018048.0,
+            "7865": 13270018048.0,
+            "7870": 13270018048.0,
+            "7875": 13270018048.0,
+            "7880": 13270018048.0,
+            "7885": 13270018048.0,
+            "7890": 13270018048.0,
+            "7895": 13270018048.0,
+            "7900": 13270018048.0,
+            "7905": 13270018048.0,
+            "7910": 13270018048.0,
+            "7915": 13270018048.0,
+            "7920": 13270018048.0,
+            "7925": 13270018048.0,
+            "7930": 13270018048.0,
+            "7935": 13270018048.0,
+            "7940": 13270018048.0,
+            "7945": 13270018048.0,
+            "7950": 13270018048.0,
+            "7955": 13270018048.0,
+            "7960": 13270018048.0,
+            "7965": 13270018048.0,
+            "7970": 13270018048.0,
+            "7975": 13270018048.0,
+            "7980": 13270018048.0,
+            "7985": 13270018048.0,
+            "7990": 13270018048.0,
+            "7995": 13270018048.0,
+            "8000": 13270018048.0,
+            "8005": 13270018048.0,
+            "8010": 13270018048.0,
+            "8015": 13270018048.0,
+            "8020": 13270018048.0,
+            "8025": 13270018048.0,
+            "8030": 13270018048.0,
+            "8035": 13270018048.0,
+            "8040": 13270018048.0,
+            "8045": 13270018048.0,
+            "8050": 13270018048.0,
+            "8055": 13270018048.0,
+            "8060": 13270018048.0,
+            "8065": 13270018048.0,
+            "8070": 13270018048.0,
+            "8075": 13270018048.0,
+            "8080": 13270018048.0,
+            "8085": 13270018048.0,
+            "8090": 13270018048.0,
+            "8095": 13270018048.0,
+            "8100": 13270018048.0,
+            "8105": 13270018048.0,
+            "8110": 13270018048.0,
+            "8115": 13270018048.0,
+            "8120": 13270018048.0,
+            "8125": 13270018048.0,
+            "8130": 13270018048.0,
+            "8135": 13270018048.0,
+            "8140": 13270018048.0,
+            "8145": 13270018048.0,
+            "8150": 13270018048.0,
+            "8155": 13270018048.0,
+            "8160": 13270018048.0,
+            "8165": 13270018048.0,
+            "8170": 13270018048.0,
+            "8175": 13270018048.0,
+            "8180": 13270018048.0,
+            "8185": 13270018048.0,
+            "8190": 13270018048.0,
+            "8195": 13270018048.0,
+            "8200": 13270018048.0,
+            "8205": 13270018048.0,
+            "8210": 13270018048.0,
+            "8215": 13270018048.0,
+            "8220": 13270018048.0,
+            "8225": 13270018048.0,
+            "8230": 13270018048.0,
+            "8235": 13270018048.0,
+            "8240": 13270018048.0,
+            "8245": 13270018048.0,
+            "8250": 13270018048.0,
+            "8255": 13270018048.0,
+            "8260": 13270018048.0,
+            "8265": 13270018048.0,
+            "8270": 13270018048.0,
+            "8275": 13270018048.0,
+            "8280": 13270018048.0,
+            "8285": 13270018048.0,
+            "8290": 13270018048.0,
+            "8295": 13270018048.0,
+            "8300": 13270018048.0,
+            "8305": 13270018048.0,
+            "8310": 13270018048.0,
+            "8315": 13270018048.0,
+            "8320": 13270018048.0,
+            "8325": 13270018048.0,
+            "8330": 13270018048.0,
+            "8335": 13270018048.0,
+            "8340": 13270018048.0,
+            "8345": 13270018048.0,
+            "8350": 13270018048.0,
+            "8355": 13270018048.0,
+            "8360": 13270018048.0,
+            "8365": 13270018048.0,
+            "8370": 13270018048.0,
+            "8375": 13270018048.0,
+            "8380": 13270018048.0,
+            "8385": 13270018048.0,
+            "8390": 13270018048.0,
+            "8395": 13270018048.0,
+            "8400": 13270018048.0,
+            "8405": 13270018048.0,
+            "8410": 13270018048.0,
+            "8415": 13270018048.0,
+            "8420": 13270018048.0,
+            "8425": 13270018048.0,
+            "8430": 13270018048.0,
+            "8435": 13270018048.0,
+            "8440": 13270018048.0,
+            "8445": 13270018048.0,
+            "8450": 13270018048.0,
+            "8455": 13270018048.0,
+            "8460": 13270018048.0,
+            "8465": 13270018048.0,
+            "8470": 13270018048.0,
+            "8475": 13270018048.0,
+            "8480": 13270018048.0,
+            "8485": 13270018048.0,
+            "8490": 13270018048.0,
+            "8495": 13270018048.0,
+            "8500": 13270018048.0,
+            "8505": 13270018048.0,
+            "8510": 13270018048.0,
+            "8515": 13270018048.0,
+            "8520": 13270018048.0,
+            "8525": 13270018048.0,
+            "8530": 13270018048.0,
+            "8535": 13270018048.0,
+            "8540": 13270018048.0,
+            "8545": 13270018048.0,
+            "8550": 13270018048.0,
+            "8555": 13270018048.0,
+            "8560": 13270018048.0,
+            "8565": 13270018048.0,
+            "8570": 13270018048.0,
+            "8575": 13270018048.0,
+            "8580": 13270018048.0,
+            "8585": 13270018048.0,
+            "8590": 13270018048.0,
+            "8595": 13270018048.0,
+            "8600": 13270018048.0,
+            "8605": 13270018048.0,
+            "8610": 13270018048.0,
+            "8615": 13270018048.0,
+            "8620": 13270018048.0,
+            "8625": 13270018048.0,
+            "8630": 13270018048.0,
+            "8635": 13270018048.0,
+            "8640": 13270018048.0,
+            "8645": 13270018048.0,
+            "8650": 13270018048.0,
+            "8655": 13270018048.0,
+            "8660": 13270018048.0,
+            "8665": 13270018048.0,
+            "8670": 13270018048.0,
+            "8675": 13270018048.0,
+            "8680": 13270018048.0,
+            "8685": 13270018048.0,
+            "8690": 13270018048.0,
+            "8695": 13270018048.0,
+            "8700": 13270018048.0,
+            "8705": 13270018048.0,
+            "8710": 13270018048.0,
+            "8715": 13270018048.0,
+            "8720": 13270018048.0,
+            "8725": 13270018048.0,
+            "8730": 13270018048.0,
+            "8735": 13270018048.0,
+            "8740": 13270018048.0,
+            "8745": 13270018048.0,
+            "8750": 13270018048.0,
+            "8755": 13270018048.0,
+            "8760": 13270018048.0,
+            "8765": 13270018048.0,
+            "8770": 13270018048.0,
+            "8775": 13270018048.0,
+            "8780": 13270018048.0,
+            "8785": 13270018048.0,
+            "8790": 13270018048.0,
+            "8795": 13270018048.0,
+            "8800": 13270018048.0,
+            "8805": 13270018048.0,
+            "8810": 13268436992.0,
+            "8815": 13268436992.0,
+            "8820": 13268436992.0,
+            "8825": 13268436992.0,
+            "8830": 13268436992.0,
+            "8835": 13268436992.0,
+            "8840": 13268436992.0,
+            "8845": 13268436992.0,
+            "8850": 13268436992.0,
+            "8855": 13268436992.0,
+            "8860": 13268436992.0,
+            "8865": 13268436992.0,
+            "8870": 13268436992.0,
+            "8875": 13268436992.0,
+            "8880": 13268436992.0,
+            "8885": 13268436992.0,
+            "8890": 13268436992.0,
+            "8895": 13268436992.0,
+            "8900": 13268436992.0,
+            "8905": 13268436992.0,
+            "8910": 13268436992.0,
+            "8915": 13268436992.0,
+            "8920": 13268436992.0,
+            "8925": 13268436992.0,
+            "8930": 13268436992.0,
+            "8935": 13268436992.0,
+            "8940": 13268436992.0,
+            "8945": 13268436992.0,
+            "8950": 13268436992.0,
+            "8955": 13268436992.0,
+            "8960": 13268436992.0,
+            "8965": 13268436992.0,
+            "8970": 13268436992.0,
+            "8975": 13268436992.0,
+            "8980": 13268436992.0,
+            "8985": 13268436992.0,
+            "8990": 13268436992.0,
+            "8995": 13268436992.0,
+            "9000": 13268436992.0,
+            "9005": 13268436992.0,
+            "9010": 13268436992.0,
+            "9015": 13268436992.0,
+            "9020": 13268436992.0,
+            "9025": 13268436992.0,
+            "9030": 13268436992.0,
+            "9035": 13268436992.0,
+            "9040": 13268436992.0,
+            "9045": 13268436992.0,
+            "9050": 13268436992.0,
+            "9055": 13268436992.0,
+            "9060": 13268436992.0,
+            "9065": 13268436992.0,
+            "9070": 13268436992.0,
+            "9075": 13268436992.0,
+            "9080": 13268436992.0,
+            "9085": 13268436992.0,
+            "9090": 13268436992.0,
+            "9095": 13268436992.0,
+            "9100": 13268436992.0,
+            "9105": 13268436992.0,
+            "9110": 13268436992.0,
+            "9115": 13268436992.0,
+            "9120": 13268436992.0,
+            "9125": 13268436992.0,
+            "9130": 13268436992.0,
+            "9135": 13268436992.0,
+            "9140": 13268436992.0,
+            "9145": 13268436992.0,
+            "9150": 13268436992.0,
+            "9155": 13268436992.0,
+            "9160": 13268436992.0,
+            "9165": 13268436992.0,
+            "9170": 13268436992.0,
+            "9175": 13268436992.0,
+            "9180": 13268436992.0,
+            "9185": 13268436992.0,
+            "9190": 13268436992.0,
+            "9195": 13268436992.0,
+            "9200": 13268436992.0,
+            "9205": 13268436992.0,
+            "9210": 13268436992.0,
+            "9215": 13268436992.0,
+            "9220": 13268436992.0,
+            "9225": 13268436992.0,
+            "9230": 13268436992.0,
+            "9235": 13268436992.0,
+            "9240": 13268436992.0,
+            "9245": 13268436992.0,
+            "9250": 13268436992.0,
+            "9255": 13268436992.0,
+            "9260": 13268436992.0,
+            "9265": 13268436992.0,
+            "9270": 13268436992.0,
+            "9275": 13268436992.0,
+            "9280": 13268436992.0,
+            "9285": 13268436992.0,
+            "9290": 13268436992.0,
+            "9295": 13268436992.0,
+            "9300": 13268436992.0,
+            "9305": 13268436992.0,
+            "9310": 13268436992.0,
+            "9315": 13268436992.0,
+            "9320": 13268436992.0,
+            "9325": 13268436992.0,
+            "9330": 13268436992.0,
+            "9335": 13268436992.0,
+            "9340": 13268436992.0,
+            "9345": 13268436992.0,
+            "9350": 13268436992.0,
+            "9355": 13268436992.0,
+            "9360": 13268436992.0,
+            "9365": 13268436992.0,
+            "9370": 13268436992.0,
+            "9375": 13268436992.0,
+            "9380": 13268436992.0,
+            "9385": 13268436992.0,
+            "9390": 13268436992.0,
+            "9395": 13268436992.0,
+            "9400": 13268436992.0,
+            "9405": 13268436992.0,
+            "9410": 13268436992.0,
+            "9415": 13268436992.0,
+            "9420": 13268436992.0,
+            "9425": 13268436992.0,
+            "9430": 13268436992.0,
+            "9435": 13268436992.0,
+            "9440": 13268436992.0,
+            "9445": 13268436992.0,
+            "9450": 13268436992.0,
+            "9455": 13268436992.0,
+            "9460": 13268436992.0,
+            "9465": 13268436992.0,
+            "9470": 13268436992.0,
+            "9475": 13268436992.0,
+            "9480": 13268436992.0,
+            "9485": 13268436992.0,
+            "9490": 13268436992.0,
+            "9495": 13268436992.0,
+            "9500": 13268436992.0,
+            "9505": 13268436992.0,
+            "9510": 13268436992.0,
+            "9515": 13268436992.0,
+            "9520": 13268436992.0,
+            "9525": 13268436992.0,
+            "9530": 13268436992.0,
+            "9535": 13268436992.0,
+            "9540": 13268436992.0,
+            "9545": 13268436992.0,
+            "9550": 13268436992.0,
+            "9555": 13268436992.0,
+            "9560": 13268436992.0,
+            "9565": 13268436992.0,
+            "9570": 13268436992.0,
+            "9575": 13268436992.0,
+            "9580": 13268436992.0,
+            "9585": 13268436992.0,
+            "9590": 13268436992.0,
+            "9595": 13268436992.0,
+            "9600": 13268436992.0,
+            "9605": 13268436992.0,
+            "9610": 13268436992.0,
+            "9615": 13268436992.0,
+            "9620": 13268436992.0,
+            "9625": 13268436992.0,
+            "9630": 13268436992.0,
+            "9635": 13268436992.0,
+            "9640": 13268436992.0,
+            "9645": 13268436992.0,
+            "9650": 13268436992.0,
+            "9655": 13268436992.0,
+            "9660": 13268436992.0,
+            "9665": 13268436992.0,
+            "9670": 13268436992.0,
+            "9675": 13268436992.0,
+            "9680": 13268436992.0,
+            "9685": 13268436992.0,
+            "9690": 13268436992.0,
+            "9695": 13268436992.0,
+            "9700": 13268436992.0,
+            "9705": 13268436992.0,
+            "9710": 13268436992.0,
+            "9715": 13268436992.0,
+            "9720": 13268436992.0,
+            "9725": 13268436992.0,
+            "9730": 13268436992.0,
+            "9735": 13268436992.0,
+            "9740": 13268436992.0,
+            "9745": 13268436992.0,
+            "9750": 13268436992.0,
+            "9755": 13268436992.0,
+            "9760": 13268436992.0,
+            "9765": 13268436992.0,
+            "9770": 13268436992.0,
+            "9775": 13268436992.0,
+            "9780": 13268436992.0,
+            "9785": 13268436992.0,
+            "9790": 13268436992.0,
+            "9795": 13268436992.0,
+            "9800": 13268436992.0,
+            "9805": 13268436992.0,
+            "9810": 13268436992.0,
+            "9815": 13268436992.0,
+            "9820": 13268436992.0,
+            "9825": 13268436992.0,
+            "9830": 13268436992.0,
+            "9835": 13268436992.0,
+            "9840": 13268436992.0,
+            "9845": 13268436992.0,
+            "9850": 13268436992.0,
+            "9855": 13268436992.0,
+            "9860": 13268436992.0,
+            "9865": 13268436992.0,
+            "9870": 13268436992.0,
+            "9875": 13268436992.0,
+            "9880": 13268436992.0,
+            "9885": 13268436992.0,
+            "9890": 13268436992.0,
+            "9895": 13268436992.0,
+            "9900": 13268436992.0,
+            "9905": 13268436992.0,
+            "9910": 13268436992.0,
+            "9915": 13268436992.0,
+            "9920": 13268436992.0,
+            "9925": 13268436992.0,
+            "9930": 13268436992.0,
+            "9935": 13268436992.0,
+            "9940": 13268436992.0,
+            "9945": 13268436992.0,
+            "9950": 13268436992.0,
+            "9955": 13268436992.0,
+            "9960": 13268436992.0,
+            "9965": 13268436992.0,
+            "9970": 13268436992.0,
+            "9975": 13268436992.0,
+            "9980": 13268436992.0,
+            "9985": 13268436992.0,
+            "9990": 13268436992.0,
+            "9995": 13268436992.0,
+            "10000": 13268436992.0,
+            "10005": 13268436992.0,
+            "10010": 13268436992.0,
+            "10015": 13268436992.0,
+            "10020": 13268436992.0,
+            "10025": 13268436992.0,
+            "10030": 13268436992.0,
+            "10035": 13268436992.0,
+            "10040": 13268436992.0,
+            "10045": 13268436992.0,
+            "10050": 13268436992.0,
+            "10055": 13268436992.0,
+            "10060": 13268436992.0,
+            "10065": 13268436992.0,
+            "10070": 13268436992.0,
+            "10075": 13268436992.0,
+            "10080": 13268436992.0,
+            "10085": 13268436992.0,
+            "10090": 13268436992.0,
+            "10095": 13268436992.0,
+            "10100": 13268436992.0,
+            "10105": 13268436992.0,
+            "10110": 13268436992.0,
+            "10115": 13268436992.0,
+            "10120": 13268436992.0,
+            "10125": 13268436992.0,
+            "10130": 13268436992.0,
+            "10135": 13268436992.0,
+            "10140": 13268436992.0,
+            "10145": 13268436992.0,
+            "10150": 13268436992.0,
+            "10155": 13268436992.0,
+            "10160": 13268436992.0,
+            "10165": 13268436992.0,
+            "10170": 13268436992.0,
+            "10175": 13268436992.0,
+            "10180": 13268436992.0,
+            "10185": 13268436992.0,
+            "10190": 13268436992.0,
+            "10195": 13268436992.0,
+            "10200": 13268436992.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 10200,
+        "step_interval": 5,
+        "values": {
+            "1": 27658637312.0,
+            "5": 28158672896.0,
+            "10": 28158672896.0,
+            "15": 28158672896.0,
+            "20": 28158672896.0,
+            "25": 28158672896.0,
+            "30": 28158672896.0,
+            "35": 28158672896.0,
+            "40": 28158672896.0,
+            "45": 28158672896.0,
+            "50": 28158672896.0,
+            "55": 28158672896.0,
+            "60": 28158672896.0,
+            "65": 28158672896.0,
+            "70": 28158672896.0,
+            "75": 28158672896.0,
+            "80": 28158672896.0,
+            "85": 28158672896.0,
+            "90": 28158672896.0,
+            "95": 28158672896.0,
+            "100": 28158672896.0,
+            "105": 28158672896.0,
+            "110": 28158672896.0,
+            "115": 28158672896.0,
+            "120": 28158672896.0,
+            "125": 28158672896.0,
+            "130": 28158672896.0,
+            "135": 28158672896.0,
+            "140": 28158672896.0,
+            "145": 28158672896.0,
+            "150": 28158672896.0,
+            "155": 28158672896.0,
+            "160": 28158672896.0,
+            "165": 28158672896.0,
+            "170": 28158672896.0,
+            "175": 28158672896.0,
+            "180": 28158672896.0,
+            "185": 28158672896.0,
+            "190": 28158672896.0,
+            "195": 28158672896.0,
+            "200": 28158672896.0,
+            "205": 28158672896.0,
+            "210": 28158672896.0,
+            "215": 28158672896.0,
+            "220": 28158672896.0,
+            "225": 28158672896.0,
+            "230": 28158672896.0,
+            "235": 28158672896.0,
+            "240": 28158672896.0,
+            "245": 28158672896.0,
+            "250": 28158672896.0,
+            "255": 28158672896.0,
+            "260": 28158672896.0,
+            "265": 28158672896.0,
+            "270": 28158672896.0,
+            "275": 28158672896.0,
+            "280": 28158672896.0,
+            "285": 28158672896.0,
+            "290": 28158672896.0,
+            "295": 28158672896.0,
+            "300": 28158672896.0,
+            "305": 28158672896.0,
+            "310": 28158672896.0,
+            "315": 28158672896.0,
+            "320": 28158672896.0,
+            "325": 28158672896.0,
+            "330": 28158672896.0,
+            "335": 28158672896.0,
+            "340": 28158672896.0,
+            "345": 28158672896.0,
+            "350": 28158672896.0,
+            "355": 28158672896.0,
+            "360": 28158672896.0,
+            "365": 28158672896.0,
+            "370": 28158672896.0,
+            "375": 28158672896.0,
+            "380": 28158672896.0,
+            "385": 28158672896.0,
+            "390": 28158672896.0,
+            "395": 28158672896.0,
+            "400": 28158672896.0,
+            "405": 28158672896.0,
+            "410": 28158672896.0,
+            "415": 28158672896.0,
+            "420": 28158672896.0,
+            "425": 28158672896.0,
+            "430": 28158672896.0,
+            "435": 28158672896.0,
+            "440": 28158672896.0,
+            "445": 28158672896.0,
+            "450": 28158672896.0,
+            "455": 28158672896.0,
+            "460": 28158672896.0,
+            "465": 28158672896.0,
+            "470": 28158672896.0,
+            "475": 28158672896.0,
+            "480": 28158672896.0,
+            "485": 28158672896.0,
+            "490": 28158672896.0,
+            "495": 28158672896.0,
+            "500": 28158672896.0,
+            "505": 28158672896.0,
+            "510": 28158672896.0,
+            "515": 28158672896.0,
+            "520": 28158672896.0,
+            "525": 28158672896.0,
+            "530": 28158672896.0,
+            "535": 28158672896.0,
+            "540": 28158672896.0,
+            "545": 28158672896.0,
+            "550": 28158672896.0,
+            "555": 28158672896.0,
+            "560": 28158672896.0,
+            "565": 28158672896.0,
+            "570": 28158672896.0,
+            "575": 28158672896.0,
+            "580": 28158672896.0,
+            "585": 28158672896.0,
+            "590": 28158672896.0,
+            "595": 28158672896.0,
+            "600": 28158672896.0,
+            "605": 28158672896.0,
+            "610": 28158672896.0,
+            "615": 28158672896.0,
+            "620": 28158672896.0,
+            "625": 28158672896.0,
+            "630": 28158672896.0,
+            "635": 28158672896.0,
+            "640": 28158672896.0,
+            "645": 28158672896.0,
+            "650": 28158672896.0,
+            "655": 28158672896.0,
+            "660": 28158672896.0,
+            "665": 28158672896.0,
+            "670": 28158672896.0,
+            "675": 28158672896.0,
+            "680": 28158672896.0,
+            "685": 28158672896.0,
+            "690": 28158672896.0,
+            "695": 28158672896.0,
+            "700": 28158672896.0,
+            "705": 28158672896.0,
+            "710": 28158672896.0,
+            "715": 28158672896.0,
+            "720": 28158672896.0,
+            "725": 28158672896.0,
+            "730": 28158672896.0,
+            "735": 28158672896.0,
+            "740": 28158672896.0,
+            "745": 28158672896.0,
+            "750": 28158672896.0,
+            "755": 28158672896.0,
+            "760": 28158672896.0,
+            "765": 28158672896.0,
+            "770": 28158672896.0,
+            "775": 28158672896.0,
+            "780": 28158672896.0,
+            "785": 28158672896.0,
+            "790": 28158672896.0,
+            "795": 28158672896.0,
+            "800": 28158672896.0,
+            "805": 28158672896.0,
+            "810": 28158672896.0,
+            "815": 28158672896.0,
+            "820": 28158672896.0,
+            "825": 28158672896.0,
+            "830": 28158672896.0,
+            "835": 28158672896.0,
+            "840": 28158672896.0,
+            "845": 28158672896.0,
+            "850": 28158672896.0,
+            "855": 28158672896.0,
+            "860": 28158672896.0,
+            "865": 28158672896.0,
+            "870": 28158672896.0,
+            "875": 28158672896.0,
+            "880": 28158672896.0,
+            "885": 28158672896.0,
+            "890": 28158672896.0,
+            "895": 28158672896.0,
+            "900": 28158672896.0,
+            "905": 28158672896.0,
+            "910": 28158672896.0,
+            "915": 28158672896.0,
+            "920": 28158672896.0,
+            "925": 28158672896.0,
+            "930": 28158672896.0,
+            "935": 28158672896.0,
+            "940": 28158672896.0,
+            "945": 28158672896.0,
+            "950": 28158672896.0,
+            "955": 28158672896.0,
+            "960": 28158672896.0,
+            "965": 28158672896.0,
+            "970": 28158672896.0,
+            "975": 28158672896.0,
+            "980": 28158672896.0,
+            "985": 28158672896.0,
+            "990": 28158672896.0,
+            "995": 28158672896.0,
+            "1000": 28158672896.0,
+            "1005": 28158672896.0,
+            "1010": 28158672896.0,
+            "1015": 28158672896.0,
+            "1020": 28158672896.0,
+            "1025": 28158672896.0,
+            "1030": 28158672896.0,
+            "1035": 28158672896.0,
+            "1040": 28158672896.0,
+            "1045": 28158672896.0,
+            "1050": 28158672896.0,
+            "1055": 28158672896.0,
+            "1060": 28158672896.0,
+            "1065": 28158672896.0,
+            "1070": 28158672896.0,
+            "1075": 28158672896.0,
+            "1080": 28158672896.0,
+            "1085": 28158672896.0,
+            "1090": 28158672896.0,
+            "1095": 28158672896.0,
+            "1100": 28158672896.0,
+            "1105": 28158672896.0,
+            "1110": 28158672896.0,
+            "1115": 28158672896.0,
+            "1120": 28158672896.0,
+            "1125": 28158672896.0,
+            "1130": 28158672896.0,
+            "1135": 28158672896.0,
+            "1140": 28158672896.0,
+            "1145": 28158672896.0,
+            "1150": 28158672896.0,
+            "1155": 28158672896.0,
+            "1160": 28158672896.0,
+            "1165": 28158672896.0,
+            "1170": 28158672896.0,
+            "1175": 28158672896.0,
+            "1180": 28158672896.0,
+            "1185": 28158672896.0,
+            "1190": 28158672896.0,
+            "1195": 28158672896.0,
+            "1200": 28158672896.0,
+            "1205": 28158672896.0,
+            "1210": 28158672896.0,
+            "1215": 28158672896.0,
+            "1220": 28158672896.0,
+            "1225": 28158672896.0,
+            "1230": 28158672896.0,
+            "1235": 28158672896.0,
+            "1240": 28158672896.0,
+            "1245": 28158672896.0,
+            "1250": 28158672896.0,
+            "1255": 28158672896.0,
+            "1260": 28158672896.0,
+            "1265": 28158672896.0,
+            "1270": 28158672896.0,
+            "1275": 28158672896.0,
+            "1280": 28158672896.0,
+            "1285": 28158672896.0,
+            "1290": 28158672896.0,
+            "1295": 28158672896.0,
+            "1300": 28158672896.0,
+            "1305": 28158672896.0,
+            "1310": 28158672896.0,
+            "1315": 28158672896.0,
+            "1320": 28158672896.0,
+            "1325": 28158672896.0,
+            "1330": 28158672896.0,
+            "1335": 28158672896.0,
+            "1340": 28158672896.0,
+            "1345": 28158672896.0,
+            "1350": 28158672896.0,
+            "1355": 28158672896.0,
+            "1360": 28158672896.0,
+            "1365": 28158672896.0,
+            "1370": 28158672896.0,
+            "1375": 28158672896.0,
+            "1380": 28158672896.0,
+            "1385": 28158672896.0,
+            "1390": 28158672896.0,
+            "1395": 28158672896.0,
+            "1400": 28158672896.0,
+            "1405": 28158672896.0,
+            "1410": 28158672896.0,
+            "1415": 28158672896.0,
+            "1420": 28158672896.0,
+            "1425": 28158672896.0,
+            "1430": 28158672896.0,
+            "1435": 28158672896.0,
+            "1440": 28158672896.0,
+            "1445": 28158672896.0,
+            "1450": 28158672896.0,
+            "1455": 28158672896.0,
+            "1460": 28158672896.0,
+            "1465": 28158672896.0,
+            "1470": 28158672896.0,
+            "1475": 28158672896.0,
+            "1480": 28158672896.0,
+            "1485": 28158672896.0,
+            "1490": 28158672896.0,
+            "1495": 28158672896.0,
+            "1500": 28158672896.0,
+            "1505": 28158672896.0,
+            "1510": 28158672896.0,
+            "1515": 28158672896.0,
+            "1520": 28158672896.0,
+            "1525": 28158672896.0,
+            "1530": 28158672896.0,
+            "1535": 28158672896.0,
+            "1540": 28158672896.0,
+            "1545": 28158672896.0,
+            "1550": 28158672896.0,
+            "1555": 28158672896.0,
+            "1560": 28158672896.0,
+            "1565": 28158672896.0,
+            "1570": 28158672896.0,
+            "1575": 28158672896.0,
+            "1580": 28158672896.0,
+            "1585": 28158672896.0,
+            "1590": 28158672896.0,
+            "1595": 28158672896.0,
+            "1600": 28158672896.0,
+            "1605": 28158672896.0,
+            "1610": 28158672896.0,
+            "1615": 28158672896.0,
+            "1620": 28158672896.0,
+            "1625": 28158672896.0,
+            "1630": 28158672896.0,
+            "1635": 28158672896.0,
+            "1640": 28158672896.0,
+            "1645": 28158672896.0,
+            "1650": 28158672896.0,
+            "1655": 28158672896.0,
+            "1660": 28158672896.0,
+            "1665": 28158672896.0,
+            "1670": 28158672896.0,
+            "1675": 28158672896.0,
+            "1680": 28158672896.0,
+            "1685": 28158672896.0,
+            "1690": 28158672896.0,
+            "1695": 28158672896.0,
+            "1700": 28158672896.0,
+            "1705": 28158672896.0,
+            "1710": 28158672896.0,
+            "1715": 28158672896.0,
+            "1720": 28158672896.0,
+            "1725": 28158672896.0,
+            "1730": 28158672896.0,
+            "1735": 28158672896.0,
+            "1740": 28158672896.0,
+            "1745": 28158672896.0,
+            "1750": 28158672896.0,
+            "1755": 28158672896.0,
+            "1760": 28158672896.0,
+            "1765": 28158672896.0,
+            "1770": 28158672896.0,
+            "1775": 28158672896.0,
+            "1780": 28158672896.0,
+            "1785": 28158672896.0,
+            "1790": 28158672896.0,
+            "1795": 28158672896.0,
+            "1800": 28158672896.0,
+            "1805": 28158672896.0,
+            "1810": 28158672896.0,
+            "1815": 28158672896.0,
+            "1820": 28158672896.0,
+            "1825": 28158672896.0,
+            "1830": 28158672896.0,
+            "1835": 28158672896.0,
+            "1840": 28158672896.0,
+            "1845": 28158672896.0,
+            "1850": 28158672896.0,
+            "1855": 28158672896.0,
+            "1860": 28158672896.0,
+            "1865": 28158672896.0,
+            "1870": 28158672896.0,
+            "1875": 28158672896.0,
+            "1880": 28158672896.0,
+            "1885": 28158672896.0,
+            "1890": 28158672896.0,
+            "1895": 28158672896.0,
+            "1900": 28158672896.0,
+            "1905": 28158672896.0,
+            "1910": 28158672896.0,
+            "1915": 28158672896.0,
+            "1920": 28158672896.0,
+            "1925": 28158672896.0,
+            "1930": 28158672896.0,
+            "1935": 28158672896.0,
+            "1940": 28158672896.0,
+            "1945": 28158672896.0,
+            "1950": 28158672896.0,
+            "1955": 28158672896.0,
+            "1960": 28158672896.0,
+            "1965": 28158672896.0,
+            "1970": 28158672896.0,
+            "1975": 28158672896.0,
+            "1980": 28158672896.0,
+            "1985": 28158672896.0,
+            "1990": 28158672896.0,
+            "1995": 28158672896.0,
+            "2000": 28158672896.0,
+            "2005": 28158672896.0,
+            "2010": 28158672896.0,
+            "2015": 28158672896.0,
+            "2020": 28158672896.0,
+            "2025": 28158672896.0,
+            "2030": 28158672896.0,
+            "2035": 28158672896.0,
+            "2040": 28158672896.0,
+            "2045": 28158672896.0,
+            "2050": 28158672896.0,
+            "2055": 28158672896.0,
+            "2060": 28158672896.0,
+            "2065": 28158672896.0,
+            "2070": 28158672896.0,
+            "2075": 28158672896.0,
+            "2080": 28158672896.0,
+            "2085": 28158672896.0,
+            "2090": 28158672896.0,
+            "2095": 28158672896.0,
+            "2100": 28158672896.0,
+            "2105": 28158672896.0,
+            "2110": 28158672896.0,
+            "2115": 28158672896.0,
+            "2120": 28158672896.0,
+            "2125": 28158672896.0,
+            "2130": 28158672896.0,
+            "2135": 28158672896.0,
+            "2140": 28158672896.0,
+            "2145": 28158672896.0,
+            "2150": 28158672896.0,
+            "2155": 28158672896.0,
+            "2160": 28158672896.0,
+            "2165": 28158672896.0,
+            "2170": 28158672896.0,
+            "2175": 28158672896.0,
+            "2180": 28158672896.0,
+            "2185": 28158672896.0,
+            "2190": 28158672896.0,
+            "2195": 28158672896.0,
+            "2200": 28158672896.0,
+            "2205": 28158672896.0,
+            "2210": 28158672896.0,
+            "2215": 28158672896.0,
+            "2220": 28158672896.0,
+            "2225": 28158672896.0,
+            "2230": 28158672896.0,
+            "2235": 28158672896.0,
+            "2240": 28158672896.0,
+            "2245": 28158672896.0,
+            "2250": 28158672896.0,
+            "2255": 28158672896.0,
+            "2260": 28158672896.0,
+            "2265": 28158672896.0,
+            "2270": 28158672896.0,
+            "2275": 28158672896.0,
+            "2280": 28158672896.0,
+            "2285": 28158672896.0,
+            "2290": 28158672896.0,
+            "2295": 28158672896.0,
+            "2300": 28158672896.0,
+            "2305": 28158672896.0,
+            "2310": 28158672896.0,
+            "2315": 28158672896.0,
+            "2320": 28158672896.0,
+            "2325": 28158672896.0,
+            "2330": 28158672896.0,
+            "2335": 28158672896.0,
+            "2340": 28158672896.0,
+            "2345": 28158672896.0,
+            "2350": 28158672896.0,
+            "2355": 28158672896.0,
+            "2360": 28158672896.0,
+            "2365": 28158672896.0,
+            "2370": 28158672896.0,
+            "2375": 28158672896.0,
+            "2380": 28158672896.0,
+            "2385": 28158672896.0,
+            "2390": 28158672896.0,
+            "2395": 28158672896.0,
+            "2400": 28158672896.0,
+            "2405": 28158672896.0,
+            "2410": 28158672896.0,
+            "2415": 28158672896.0,
+            "2420": 28158672896.0,
+            "2425": 28158672896.0,
+            "2430": 28158672896.0,
+            "2435": 28158672896.0,
+            "2440": 28158672896.0,
+            "2445": 28158672896.0,
+            "2450": 28158672896.0,
+            "2455": 28158672896.0,
+            "2460": 28158672896.0,
+            "2465": 28158672896.0,
+            "2470": 28158672896.0,
+            "2475": 28158672896.0,
+            "2480": 28158672896.0,
+            "2485": 28158672896.0,
+            "2490": 28158672896.0,
+            "2495": 28158672896.0,
+            "2500": 28158672896.0,
+            "2505": 28158672896.0,
+            "2510": 28158672896.0,
+            "2515": 28158672896.0,
+            "2520": 28158672896.0,
+            "2525": 28158672896.0,
+            "2530": 28158672896.0,
+            "2535": 28158672896.0,
+            "2540": 28158672896.0,
+            "2545": 28158672896.0,
+            "2550": 28158672896.0,
+            "2555": 28158672896.0,
+            "2560": 28158672896.0,
+            "2565": 28158672896.0,
+            "2570": 28158672896.0,
+            "2575": 28158672896.0,
+            "2580": 28158672896.0,
+            "2585": 28158672896.0,
+            "2590": 28158672896.0,
+            "2595": 28158672896.0,
+            "2600": 28158672896.0,
+            "2605": 28158672896.0,
+            "2610": 28158672896.0,
+            "2615": 28158672896.0,
+            "2620": 28158672896.0,
+            "2625": 28158672896.0,
+            "2630": 28158672896.0,
+            "2635": 28158672896.0,
+            "2640": 28158672896.0,
+            "2645": 28158672896.0,
+            "2650": 28158672896.0,
+            "2655": 28158672896.0,
+            "2660": 28158672896.0,
+            "2665": 28158672896.0,
+            "2670": 28158672896.0,
+            "2675": 28158672896.0,
+            "2680": 28158672896.0,
+            "2685": 28158672896.0,
+            "2690": 28158672896.0,
+            "2695": 28158672896.0,
+            "2700": 28158672896.0,
+            "2705": 28158672896.0,
+            "2710": 28158672896.0,
+            "2715": 28158672896.0,
+            "2720": 28158672896.0,
+            "2725": 28158672896.0,
+            "2730": 28158672896.0,
+            "2735": 28158672896.0,
+            "2740": 28158672896.0,
+            "2745": 28158672896.0,
+            "2750": 28158672896.0,
+            "2755": 28158672896.0,
+            "2760": 28158672896.0,
+            "2765": 28158672896.0,
+            "2770": 28158672896.0,
+            "2775": 28158672896.0,
+            "2780": 28158672896.0,
+            "2785": 28158672896.0,
+            "2790": 28158672896.0,
+            "2795": 28158672896.0,
+            "2800": 28158672896.0,
+            "2805": 28158672896.0,
+            "2810": 28158672896.0,
+            "2815": 28158672896.0,
+            "2820": 28158672896.0,
+            "2825": 28158672896.0,
+            "2830": 28158672896.0,
+            "2835": 28158672896.0,
+            "2840": 28158672896.0,
+            "2845": 28158672896.0,
+            "2850": 28158672896.0,
+            "2855": 28158672896.0,
+            "2860": 28158672896.0,
+            "2865": 28158672896.0,
+            "2870": 28158672896.0,
+            "2875": 28158672896.0,
+            "2880": 28158672896.0,
+            "2885": 28158672896.0,
+            "2890": 28158672896.0,
+            "2895": 28158672896.0,
+            "2900": 28158672896.0,
+            "2905": 28158672896.0,
+            "2910": 28158672896.0,
+            "2915": 28158672896.0,
+            "2920": 28158672896.0,
+            "2925": 28158672896.0,
+            "2930": 28158672896.0,
+            "2935": 28158672896.0,
+            "2940": 28158672896.0,
+            "2945": 28158672896.0,
+            "2950": 28158672896.0,
+            "2955": 28158672896.0,
+            "2960": 28158672896.0,
+            "2965": 28158672896.0,
+            "2970": 28158672896.0,
+            "2975": 28158672896.0,
+            "2980": 28158672896.0,
+            "2985": 28158672896.0,
+            "2990": 28158672896.0,
+            "2995": 28158672896.0,
+            "3000": 28158672896.0,
+            "3005": 28158672896.0,
+            "3010": 28158672896.0,
+            "3015": 28158672896.0,
+            "3020": 28158672896.0,
+            "3025": 28158672896.0,
+            "3030": 28158672896.0,
+            "3035": 28158672896.0,
+            "3040": 28158672896.0,
+            "3045": 28158672896.0,
+            "3050": 28158672896.0,
+            "3055": 28158672896.0,
+            "3060": 28158672896.0,
+            "3065": 28158672896.0,
+            "3070": 28158672896.0,
+            "3075": 28158672896.0,
+            "3080": 28158672896.0,
+            "3085": 28158672896.0,
+            "3090": 28158672896.0,
+            "3095": 28158672896.0,
+            "3100": 28158672896.0,
+            "3105": 28158672896.0,
+            "3110": 28158672896.0,
+            "3115": 28158672896.0,
+            "3120": 28158672896.0,
+            "3125": 28158672896.0,
+            "3130": 28158672896.0,
+            "3135": 28158672896.0,
+            "3140": 28158672896.0,
+            "3145": 28158672896.0,
+            "3150": 28158672896.0,
+            "3155": 28158672896.0,
+            "3160": 28158672896.0,
+            "3165": 28158672896.0,
+            "3170": 28158672896.0,
+            "3175": 28158672896.0,
+            "3180": 28158672896.0,
+            "3185": 28158672896.0,
+            "3190": 28158672896.0,
+            "3195": 28158672896.0,
+            "3200": 28158672896.0,
+            "3205": 28158672896.0,
+            "3210": 28158672896.0,
+            "3215": 28158672896.0,
+            "3220": 28158672896.0,
+            "3225": 28158672896.0,
+            "3230": 28158672896.0,
+            "3235": 28158672896.0,
+            "3240": 28158672896.0,
+            "3245": 28158672896.0,
+            "3250": 28158672896.0,
+            "3255": 28158672896.0,
+            "3260": 28158672896.0,
+            "3265": 28158672896.0,
+            "3270": 28158672896.0,
+            "3275": 28158672896.0,
+            "3280": 28158672896.0,
+            "3285": 28158672896.0,
+            "3290": 28158672896.0,
+            "3295": 28158672896.0,
+            "3300": 28158672896.0,
+            "3305": 28158672896.0,
+            "3310": 28158672896.0,
+            "3315": 28158672896.0,
+            "3320": 28158672896.0,
+            "3325": 28158672896.0,
+            "3330": 28158672896.0,
+            "3335": 28158672896.0,
+            "3340": 28158672896.0,
+            "3345": 28158672896.0,
+            "3350": 28158672896.0,
+            "3355": 28158672896.0,
+            "3360": 28158672896.0,
+            "3365": 28158672896.0,
+            "3370": 28158672896.0,
+            "3375": 28158672896.0,
+            "3380": 28158672896.0,
+            "3385": 28158672896.0,
+            "3390": 28158672896.0,
+            "3395": 28158672896.0,
+            "3400": 28158672896.0,
+            "3405": 28158672896.0,
+            "3410": 28158672896.0,
+            "3415": 28158672896.0,
+            "3420": 28158672896.0,
+            "3425": 28158672896.0,
+            "3430": 28158672896.0,
+            "3435": 28158672896.0,
+            "3440": 28158672896.0,
+            "3445": 28158672896.0,
+            "3450": 28158672896.0,
+            "3455": 28158672896.0,
+            "3460": 28158672896.0,
+            "3465": 28158672896.0,
+            "3470": 28158672896.0,
+            "3475": 28158672896.0,
+            "3480": 28158672896.0,
+            "3485": 28158672896.0,
+            "3490": 28158672896.0,
+            "3495": 28158672896.0,
+            "3500": 28158672896.0,
+            "3505": 28158672896.0,
+            "3510": 28158672896.0,
+            "3515": 28158672896.0,
+            "3520": 28158672896.0,
+            "3525": 28158672896.0,
+            "3530": 28158672896.0,
+            "3535": 28158672896.0,
+            "3540": 28158672896.0,
+            "3545": 28158672896.0,
+            "3550": 28158672896.0,
+            "3555": 28158672896.0,
+            "3560": 28158672896.0,
+            "3565": 28158672896.0,
+            "3570": 28158672896.0,
+            "3575": 28158672896.0,
+            "3580": 28158672896.0,
+            "3585": 28158672896.0,
+            "3590": 28158672896.0,
+            "3595": 28158672896.0,
+            "3600": 28158672896.0,
+            "3605": 28158672896.0,
+            "3610": 28158672896.0,
+            "3615": 28158672896.0,
+            "3620": 28158672896.0,
+            "3625": 28158672896.0,
+            "3630": 28158672896.0,
+            "3635": 28158672896.0,
+            "3640": 28158672896.0,
+            "3645": 28158672896.0,
+            "3650": 28158672896.0,
+            "3655": 28158672896.0,
+            "3660": 28158672896.0,
+            "3665": 28158672896.0,
+            "3670": 28158672896.0,
+            "3675": 28158672896.0,
+            "3680": 28158672896.0,
+            "3685": 28158672896.0,
+            "3690": 28158672896.0,
+            "3695": 28158672896.0,
+            "3700": 28158672896.0,
+            "3705": 28158672896.0,
+            "3710": 28158672896.0,
+            "3715": 28158672896.0,
+            "3720": 28158672896.0,
+            "3725": 28158672896.0,
+            "3730": 28158672896.0,
+            "3735": 28158672896.0,
+            "3740": 28158672896.0,
+            "3745": 28158672896.0,
+            "3750": 28158672896.0,
+            "3755": 28158672896.0,
+            "3760": 28158672896.0,
+            "3765": 28158672896.0,
+            "3770": 28158672896.0,
+            "3775": 28158672896.0,
+            "3780": 28158672896.0,
+            "3785": 28158672896.0,
+            "3790": 28158672896.0,
+            "3795": 28158672896.0,
+            "3800": 28158672896.0,
+            "3805": 28158672896.0,
+            "3810": 28158672896.0,
+            "3815": 28158672896.0,
+            "3820": 28158672896.0,
+            "3825": 28158672896.0,
+            "3830": 28158672896.0,
+            "3835": 28158672896.0,
+            "3840": 28158672896.0,
+            "3845": 28158672896.0,
+            "3850": 28158672896.0,
+            "3855": 28158672896.0,
+            "3860": 28158672896.0,
+            "3865": 28158672896.0,
+            "3870": 28158672896.0,
+            "3875": 28158672896.0,
+            "3880": 28158672896.0,
+            "3885": 28158672896.0,
+            "3890": 28158672896.0,
+            "3895": 28158672896.0,
+            "3900": 28158672896.0,
+            "3905": 28158672896.0,
+            "3910": 28158672896.0,
+            "3915": 28158672896.0,
+            "3920": 28158672896.0,
+            "3925": 28158672896.0,
+            "3930": 28158672896.0,
+            "3935": 28158672896.0,
+            "3940": 28158672896.0,
+            "3945": 28158672896.0,
+            "3950": 28158672896.0,
+            "3955": 28158672896.0,
+            "3960": 28158672896.0,
+            "3965": 28158672896.0,
+            "3970": 28158672896.0,
+            "3975": 28158672896.0,
+            "3980": 28158672896.0,
+            "3985": 28158672896.0,
+            "3990": 28158672896.0,
+            "3995": 28158672896.0,
+            "4000": 28158672896.0,
+            "4005": 28158672896.0,
+            "4010": 28158672896.0,
+            "4015": 28158672896.0,
+            "4020": 28158672896.0,
+            "4025": 28158672896.0,
+            "4030": 28158672896.0,
+            "4035": 28158672896.0,
+            "4040": 28158672896.0,
+            "4045": 28158672896.0,
+            "4050": 28158672896.0,
+            "4055": 28158672896.0,
+            "4060": 28158672896.0,
+            "4065": 28158672896.0,
+            "4070": 28158672896.0,
+            "4075": 28158672896.0,
+            "4080": 28158672896.0,
+            "4085": 28158672896.0,
+            "4090": 28158672896.0,
+            "4095": 28158672896.0,
+            "4100": 28158672896.0,
+            "4105": 28158672896.0,
+            "4110": 28158672896.0,
+            "4115": 28158672896.0,
+            "4120": 28158672896.0,
+            "4125": 28158672896.0,
+            "4130": 28158672896.0,
+            "4135": 28158672896.0,
+            "4140": 28158672896.0,
+            "4145": 28158672896.0,
+            "4150": 28158672896.0,
+            "4155": 28158672896.0,
+            "4160": 28158672896.0,
+            "4165": 28158672896.0,
+            "4170": 28158672896.0,
+            "4175": 28158672896.0,
+            "4180": 28158672896.0,
+            "4185": 28158672896.0,
+            "4190": 28158672896.0,
+            "4195": 28158672896.0,
+            "4200": 28158672896.0,
+            "4205": 28158672896.0,
+            "4210": 28158672896.0,
+            "4215": 28158672896.0,
+            "4220": 28158672896.0,
+            "4225": 28158672896.0,
+            "4230": 28158672896.0,
+            "4235": 28158672896.0,
+            "4240": 28158672896.0,
+            "4245": 28158672896.0,
+            "4250": 28158672896.0,
+            "4255": 28158672896.0,
+            "4260": 28158672896.0,
+            "4265": 28158672896.0,
+            "4270": 28158672896.0,
+            "4275": 28158672896.0,
+            "4280": 28158672896.0,
+            "4285": 28158672896.0,
+            "4290": 28158672896.0,
+            "4295": 28158672896.0,
+            "4300": 28158672896.0,
+            "4305": 28158672896.0,
+            "4310": 28158672896.0,
+            "4315": 28158672896.0,
+            "4320": 28158672896.0,
+            "4325": 28158672896.0,
+            "4330": 28158672896.0,
+            "4335": 28158672896.0,
+            "4340": 28158672896.0,
+            "4345": 28158672896.0,
+            "4350": 28158672896.0,
+            "4355": 28158672896.0,
+            "4360": 28158672896.0,
+            "4365": 28158672896.0,
+            "4370": 28158672896.0,
+            "4375": 28158672896.0,
+            "4380": 28158672896.0,
+            "4385": 28158672896.0,
+            "4390": 28158672896.0,
+            "4395": 28158672896.0,
+            "4400": 28158672896.0,
+            "4405": 28158672896.0,
+            "4410": 28158672896.0,
+            "4415": 28158672896.0,
+            "4420": 28158672896.0,
+            "4425": 28158672896.0,
+            "4430": 28158672896.0,
+            "4435": 28158672896.0,
+            "4440": 28158672896.0,
+            "4445": 28158672896.0,
+            "4450": 28158672896.0,
+            "4455": 28158672896.0,
+            "4460": 28158672896.0,
+            "4465": 28158672896.0,
+            "4470": 28158672896.0,
+            "4475": 28158672896.0,
+            "4480": 28158672896.0,
+            "4485": 28158672896.0,
+            "4490": 28158672896.0,
+            "4495": 28158672896.0,
+            "4500": 28158672896.0,
+            "4505": 28158672896.0,
+            "4510": 28158672896.0,
+            "4515": 28158672896.0,
+            "4520": 28158672896.0,
+            "4525": 28158672896.0,
+            "4530": 28158672896.0,
+            "4535": 28158672896.0,
+            "4540": 28158672896.0,
+            "4545": 28158672896.0,
+            "4550": 28158672896.0,
+            "4555": 28158672896.0,
+            "4560": 28158672896.0,
+            "4565": 28158672896.0,
+            "4570": 28158672896.0,
+            "4575": 28158672896.0,
+            "4580": 28158672896.0,
+            "4585": 28158672896.0,
+            "4590": 28158672896.0,
+            "4595": 28158672896.0,
+            "4600": 28158672896.0,
+            "4605": 28158672896.0,
+            "4610": 28158672896.0,
+            "4615": 28158672896.0,
+            "4620": 28158672896.0,
+            "4625": 28158672896.0,
+            "4630": 28158672896.0,
+            "4635": 28158672896.0,
+            "4640": 28158672896.0,
+            "4645": 28158672896.0,
+            "4650": 28158672896.0,
+            "4655": 28158672896.0,
+            "4660": 28158672896.0,
+            "4665": 28158672896.0,
+            "4670": 28158672896.0,
+            "4675": 28158672896.0,
+            "4680": 28158672896.0,
+            "4685": 28158672896.0,
+            "4690": 28158672896.0,
+            "4695": 28158672896.0,
+            "4700": 28158672896.0,
+            "4705": 28158672896.0,
+            "4710": 28158672896.0,
+            "4715": 28158672896.0,
+            "4720": 28158672896.0,
+            "4725": 28158672896.0,
+            "4730": 28158672896.0,
+            "4735": 28158672896.0,
+            "4740": 28158672896.0,
+            "4745": 28158672896.0,
+            "4750": 28158672896.0,
+            "4755": 28158672896.0,
+            "4760": 28158672896.0,
+            "4765": 28158672896.0,
+            "4770": 28158672896.0,
+            "4775": 28158672896.0,
+            "4780": 28158672896.0,
+            "4785": 28158672896.0,
+            "4790": 28158672896.0,
+            "4795": 28158672896.0,
+            "4800": 28158672896.0,
+            "4805": 28158672896.0,
+            "4810": 28158672896.0,
+            "4815": 28158672896.0,
+            "4820": 28158672896.0,
+            "4825": 28158672896.0,
+            "4830": 28158672896.0,
+            "4835": 28158672896.0,
+            "4840": 28158672896.0,
+            "4845": 28158672896.0,
+            "4850": 28158672896.0,
+            "4855": 28158672896.0,
+            "4860": 28158672896.0,
+            "4865": 28158672896.0,
+            "4870": 28158672896.0,
+            "4875": 28158672896.0,
+            "4880": 28158672896.0,
+            "4885": 28158672896.0,
+            "4890": 28158672896.0,
+            "4895": 28158672896.0,
+            "4900": 28158672896.0,
+            "4905": 28158672896.0,
+            "4910": 28158672896.0,
+            "4915": 28158672896.0,
+            "4920": 28158672896.0,
+            "4925": 28158672896.0,
+            "4930": 28158672896.0,
+            "4935": 28158672896.0,
+            "4940": 28158672896.0,
+            "4945": 28158672896.0,
+            "4950": 28158672896.0,
+            "4955": 28158672896.0,
+            "4960": 28158672896.0,
+            "4965": 28158672896.0,
+            "4970": 28158672896.0,
+            "4975": 28158672896.0,
+            "4980": 28158672896.0,
+            "4985": 28158672896.0,
+            "4990": 28158672896.0,
+            "4995": 28158672896.0,
+            "5000": 28158672896.0,
+            "5005": 28158672896.0,
+            "5010": 28158672896.0,
+            "5015": 28158672896.0,
+            "5020": 28158672896.0,
+            "5025": 28158672896.0,
+            "5030": 28158672896.0,
+            "5035": 28158672896.0,
+            "5040": 28158672896.0,
+            "5045": 28158672896.0,
+            "5050": 28158672896.0,
+            "5055": 28158672896.0,
+            "5060": 28158672896.0,
+            "5065": 28158672896.0,
+            "5070": 28158672896.0,
+            "5075": 28158672896.0,
+            "5080": 28158672896.0,
+            "5085": 28158672896.0,
+            "5090": 28158672896.0,
+            "5095": 28158672896.0,
+            "5100": 28158672896.0,
+            "5105": 28158672896.0,
+            "5110": 28158672896.0,
+            "5115": 28158672896.0,
+            "5120": 28158672896.0,
+            "5125": 28158672896.0,
+            "5130": 28158672896.0,
+            "5135": 28158672896.0,
+            "5140": 28158672896.0,
+            "5145": 28158672896.0,
+            "5150": 28158672896.0,
+            "5155": 28158672896.0,
+            "5160": 28158672896.0,
+            "5165": 28158672896.0,
+            "5170": 28158672896.0,
+            "5175": 28158672896.0,
+            "5180": 28158672896.0,
+            "5185": 28158672896.0,
+            "5190": 28158672896.0,
+            "5195": 28158672896.0,
+            "5200": 28158672896.0,
+            "5205": 28158672896.0,
+            "5210": 28158672896.0,
+            "5215": 28158672896.0,
+            "5220": 28158672896.0,
+            "5225": 28158672896.0,
+            "5230": 28158672896.0,
+            "5235": 28158672896.0,
+            "5240": 28158672896.0,
+            "5245": 28158672896.0,
+            "5250": 28158672896.0,
+            "5255": 28158672896.0,
+            "5260": 28158672896.0,
+            "5265": 28158672896.0,
+            "5270": 28158672896.0,
+            "5275": 28158672896.0,
+            "5280": 28158672896.0,
+            "5285": 28158672896.0,
+            "5290": 28158672896.0,
+            "5295": 28158672896.0,
+            "5300": 28158672896.0,
+            "5305": 28158672896.0,
+            "5310": 28158672896.0,
+            "5315": 28158672896.0,
+            "5320": 28158672896.0,
+            "5325": 28158672896.0,
+            "5330": 28158672896.0,
+            "5335": 28158672896.0,
+            "5340": 28158672896.0,
+            "5345": 28158672896.0,
+            "5350": 28158672896.0,
+            "5355": 28158672896.0,
+            "5360": 28158672896.0,
+            "5365": 28158672896.0,
+            "5370": 28158672896.0,
+            "5375": 28158672896.0,
+            "5380": 28158672896.0,
+            "5385": 28158672896.0,
+            "5390": 28158672896.0,
+            "5395": 28158672896.0,
+            "5400": 28158672896.0,
+            "5405": 28158672896.0,
+            "5410": 28158672896.0,
+            "5415": 28158672896.0,
+            "5420": 28158672896.0,
+            "5425": 28158672896.0,
+            "5430": 28158672896.0,
+            "5435": 28158672896.0,
+            "5440": 28158672896.0,
+            "5445": 28158672896.0,
+            "5450": 28158672896.0,
+            "5455": 28158672896.0,
+            "5460": 28158672896.0,
+            "5465": 28158672896.0,
+            "5470": 28158672896.0,
+            "5475": 28158672896.0,
+            "5480": 28158672896.0,
+            "5485": 28158672896.0,
+            "5490": 28158672896.0,
+            "5495": 28158672896.0,
+            "5500": 28158672896.0,
+            "5505": 28158672896.0,
+            "5510": 28158672896.0,
+            "5515": 28158672896.0,
+            "5520": 28158672896.0,
+            "5525": 28158672896.0,
+            "5530": 28158672896.0,
+            "5535": 28158672896.0,
+            "5540": 28158672896.0,
+            "5545": 28158672896.0,
+            "5550": 28158672896.0,
+            "5555": 28158672896.0,
+            "5560": 28158672896.0,
+            "5565": 28158672896.0,
+            "5570": 28158672896.0,
+            "5575": 28158672896.0,
+            "5580": 28158672896.0,
+            "5585": 28158672896.0,
+            "5590": 28158672896.0,
+            "5595": 28158672896.0,
+            "5600": 28158672896.0,
+            "5605": 28158672896.0,
+            "5610": 28158672896.0,
+            "5615": 28158672896.0,
+            "5620": 28158672896.0,
+            "5625": 28158672896.0,
+            "5630": 28158672896.0,
+            "5635": 28158672896.0,
+            "5640": 28158672896.0,
+            "5645": 28158672896.0,
+            "5650": 28158672896.0,
+            "5655": 28158672896.0,
+            "5660": 28158672896.0,
+            "5665": 28158672896.0,
+            "5670": 28158672896.0,
+            "5675": 28158672896.0,
+            "5680": 28158672896.0,
+            "5685": 28158672896.0,
+            "5690": 28158672896.0,
+            "5695": 28158672896.0,
+            "5700": 28158672896.0,
+            "5705": 28158672896.0,
+            "5710": 28158672896.0,
+            "5715": 28158672896.0,
+            "5720": 28158672896.0,
+            "5725": 28158672896.0,
+            "5730": 28158672896.0,
+            "5735": 28158672896.0,
+            "5740": 28158672896.0,
+            "5745": 28158672896.0,
+            "5750": 28158672896.0,
+            "5755": 28158672896.0,
+            "5760": 28158672896.0,
+            "5765": 28158672896.0,
+            "5770": 28158672896.0,
+            "5775": 28158672896.0,
+            "5780": 28158672896.0,
+            "5785": 28158672896.0,
+            "5790": 28158672896.0,
+            "5795": 28158672896.0,
+            "5800": 28158672896.0,
+            "5805": 28158672896.0,
+            "5810": 28158672896.0,
+            "5815": 28158672896.0,
+            "5820": 28158672896.0,
+            "5825": 28158672896.0,
+            "5830": 28158672896.0,
+            "5835": 28158672896.0,
+            "5840": 28158672896.0,
+            "5845": 28158672896.0,
+            "5850": 28158672896.0,
+            "5855": 28158672896.0,
+            "5860": 28158672896.0,
+            "5865": 28158672896.0,
+            "5870": 28158672896.0,
+            "5875": 28158672896.0,
+            "5880": 28158672896.0,
+            "5885": 28158672896.0,
+            "5890": 28158672896.0,
+            "5895": 28158672896.0,
+            "5900": 28158672896.0,
+            "5905": 28158672896.0,
+            "5910": 28158672896.0,
+            "5915": 28158672896.0,
+            "5920": 28158672896.0,
+            "5925": 28158672896.0,
+            "5930": 28158672896.0,
+            "5935": 28158672896.0,
+            "5940": 28158672896.0,
+            "5945": 28158672896.0,
+            "5950": 28158672896.0,
+            "5955": 28158672896.0,
+            "5960": 28158672896.0,
+            "5965": 28158672896.0,
+            "5970": 28158672896.0,
+            "5975": 28158672896.0,
+            "5980": 28158672896.0,
+            "5985": 28158672896.0,
+            "5990": 28158672896.0,
+            "5995": 28158672896.0,
+            "6000": 28158672896.0,
+            "6005": 28158672896.0,
+            "6010": 28158672896.0,
+            "6015": 28158672896.0,
+            "6020": 28158672896.0,
+            "6025": 28158672896.0,
+            "6030": 28158672896.0,
+            "6035": 28158672896.0,
+            "6040": 28158672896.0,
+            "6045": 28158672896.0,
+            "6050": 28158672896.0,
+            "6055": 28158672896.0,
+            "6060": 28158672896.0,
+            "6065": 28158672896.0,
+            "6070": 28158672896.0,
+            "6075": 28158672896.0,
+            "6080": 28158672896.0,
+            "6085": 28158672896.0,
+            "6090": 28158672896.0,
+            "6095": 28158672896.0,
+            "6100": 28158672896.0,
+            "6105": 28158672896.0,
+            "6110": 28158672896.0,
+            "6115": 28158672896.0,
+            "6120": 28158672896.0,
+            "6125": 28158672896.0,
+            "6130": 28158672896.0,
+            "6135": 28158672896.0,
+            "6140": 28158672896.0,
+            "6145": 28158672896.0,
+            "6150": 28158672896.0,
+            "6155": 28158672896.0,
+            "6160": 28158672896.0,
+            "6165": 28158672896.0,
+            "6170": 28158672896.0,
+            "6175": 28158672896.0,
+            "6180": 28158672896.0,
+            "6185": 28158672896.0,
+            "6190": 28158672896.0,
+            "6195": 28158672896.0,
+            "6200": 28158672896.0,
+            "6205": 28158672896.0,
+            "6210": 28158672896.0,
+            "6215": 28158672896.0,
+            "6220": 28158672896.0,
+            "6225": 28158672896.0,
+            "6230": 28158672896.0,
+            "6235": 28158672896.0,
+            "6240": 28158672896.0,
+            "6245": 28158672896.0,
+            "6250": 28158672896.0,
+            "6255": 28158672896.0,
+            "6260": 28158672896.0,
+            "6265": 28158672896.0,
+            "6270": 28158672896.0,
+            "6275": 28158672896.0,
+            "6280": 28158672896.0,
+            "6285": 28158672896.0,
+            "6290": 28158672896.0,
+            "6295": 28158672896.0,
+            "6300": 28158672896.0,
+            "6305": 28158672896.0,
+            "6310": 28158672896.0,
+            "6315": 28158672896.0,
+            "6320": 28158672896.0,
+            "6325": 28158672896.0,
+            "6330": 28158672896.0,
+            "6335": 28158672896.0,
+            "6340": 28158672896.0,
+            "6345": 28158672896.0,
+            "6350": 28158672896.0,
+            "6355": 28158672896.0,
+            "6360": 28158672896.0,
+            "6365": 28158672896.0,
+            "6370": 28158672896.0,
+            "6375": 28158672896.0,
+            "6380": 28158672896.0,
+            "6385": 28158672896.0,
+            "6390": 28158672896.0,
+            "6395": 28158672896.0,
+            "6400": 28158672896.0,
+            "6405": 28158672896.0,
+            "6410": 28158672896.0,
+            "6415": 28158672896.0,
+            "6420": 28158672896.0,
+            "6425": 28158672896.0,
+            "6430": 28158672896.0,
+            "6435": 28158672896.0,
+            "6440": 28158672896.0,
+            "6445": 28158672896.0,
+            "6450": 28158672896.0,
+            "6455": 28158672896.0,
+            "6460": 28158672896.0,
+            "6465": 28158672896.0,
+            "6470": 28158672896.0,
+            "6475": 28158672896.0,
+            "6480": 28158672896.0,
+            "6485": 28158672896.0,
+            "6490": 28158672896.0,
+            "6495": 28158672896.0,
+            "6500": 28158672896.0,
+            "6505": 28158672896.0,
+            "6510": 28158672896.0,
+            "6515": 28158672896.0,
+            "6520": 28158672896.0,
+            "6525": 28158672896.0,
+            "6530": 28158672896.0,
+            "6535": 28158672896.0,
+            "6540": 28158672896.0,
+            "6545": 28158672896.0,
+            "6550": 28158672896.0,
+            "6555": 28158672896.0,
+            "6560": 28158672896.0,
+            "6565": 28158672896.0,
+            "6570": 28158672896.0,
+            "6575": 28158672896.0,
+            "6580": 28158672896.0,
+            "6585": 28158672896.0,
+            "6590": 28158672896.0,
+            "6595": 28158672896.0,
+            "6600": 28158672896.0,
+            "6605": 28158672896.0,
+            "6610": 28158672896.0,
+            "6615": 28158672896.0,
+            "6620": 28158672896.0,
+            "6625": 28158672896.0,
+            "6630": 28158672896.0,
+            "6635": 28158672896.0,
+            "6640": 28158672896.0,
+            "6645": 28158672896.0,
+            "6650": 28158672896.0,
+            "6655": 28158672896.0,
+            "6660": 28158672896.0,
+            "6665": 28158672896.0,
+            "6670": 28158672896.0,
+            "6675": 28158672896.0,
+            "6680": 28158672896.0,
+            "6685": 28158672896.0,
+            "6690": 28158672896.0,
+            "6695": 28158672896.0,
+            "6700": 28158672896.0,
+            "6705": 28158672896.0,
+            "6710": 28158672896.0,
+            "6715": 28158672896.0,
+            "6720": 28158672896.0,
+            "6725": 28158672896.0,
+            "6730": 28158672896.0,
+            "6735": 28158672896.0,
+            "6740": 28158672896.0,
+            "6745": 28158672896.0,
+            "6750": 28158672896.0,
+            "6755": 28158672896.0,
+            "6760": 28158672896.0,
+            "6765": 28158672896.0,
+            "6770": 28158672896.0,
+            "6775": 28158672896.0,
+            "6780": 28158672896.0,
+            "6785": 28158672896.0,
+            "6790": 28158672896.0,
+            "6795": 28158672896.0,
+            "6800": 28158672896.0,
+            "6805": 28158672896.0,
+            "6810": 28158672896.0,
+            "6815": 28158672896.0,
+            "6820": 28158672896.0,
+            "6825": 28158672896.0,
+            "6830": 28158672896.0,
+            "6835": 28158672896.0,
+            "6840": 28158672896.0,
+            "6845": 28158672896.0,
+            "6850": 28158672896.0,
+            "6855": 28158672896.0,
+            "6860": 28158672896.0,
+            "6865": 28158672896.0,
+            "6870": 28158672896.0,
+            "6875": 28158672896.0,
+            "6880": 28158672896.0,
+            "6885": 28158672896.0,
+            "6890": 28158672896.0,
+            "6895": 28158672896.0,
+            "6900": 28158672896.0,
+            "6905": 28158672896.0,
+            "6910": 28158672896.0,
+            "6915": 28158672896.0,
+            "6920": 28158672896.0,
+            "6925": 28158672896.0,
+            "6930": 28158672896.0,
+            "6935": 28158672896.0,
+            "6940": 28158672896.0,
+            "6945": 28158672896.0,
+            "6950": 28158672896.0,
+            "6955": 28158672896.0,
+            "6960": 28158672896.0,
+            "6965": 28158672896.0,
+            "6970": 28158672896.0,
+            "6975": 28158672896.0,
+            "6980": 28158672896.0,
+            "6985": 28158672896.0,
+            "6990": 28158672896.0,
+            "6995": 28158672896.0,
+            "7000": 28158672896.0,
+            "7005": 28158672896.0,
+            "7010": 28158672896.0,
+            "7015": 28158672896.0,
+            "7020": 28158672896.0,
+            "7025": 28158672896.0,
+            "7030": 28158672896.0,
+            "7035": 28158672896.0,
+            "7040": 28158672896.0,
+            "7045": 28158672896.0,
+            "7050": 28158672896.0,
+            "7055": 28158672896.0,
+            "7060": 28158672896.0,
+            "7065": 28158672896.0,
+            "7070": 28158672896.0,
+            "7075": 28158672896.0,
+            "7080": 28158672896.0,
+            "7085": 28158672896.0,
+            "7090": 28158672896.0,
+            "7095": 28158672896.0,
+            "7100": 28158672896.0,
+            "7105": 28158672896.0,
+            "7110": 28158672896.0,
+            "7115": 28158672896.0,
+            "7120": 28158672896.0,
+            "7125": 28158672896.0,
+            "7130": 28158672896.0,
+            "7135": 28158672896.0,
+            "7140": 28158672896.0,
+            "7145": 28158672896.0,
+            "7150": 28158672896.0,
+            "7155": 28158672896.0,
+            "7160": 28158672896.0,
+            "7165": 28158672896.0,
+            "7170": 28158672896.0,
+            "7175": 28158672896.0,
+            "7180": 28158672896.0,
+            "7185": 28158672896.0,
+            "7190": 28158672896.0,
+            "7195": 28158672896.0,
+            "7200": 28158672896.0,
+            "7205": 28158672896.0,
+            "7210": 28158672896.0,
+            "7215": 28158672896.0,
+            "7220": 28158672896.0,
+            "7225": 28158672896.0,
+            "7230": 28158672896.0,
+            "7235": 28158672896.0,
+            "7240": 28158672896.0,
+            "7245": 28158672896.0,
+            "7250": 28158672896.0,
+            "7255": 28158672896.0,
+            "7260": 28158672896.0,
+            "7265": 28158672896.0,
+            "7270": 28158672896.0,
+            "7275": 28158672896.0,
+            "7280": 28158672896.0,
+            "7285": 28158672896.0,
+            "7290": 28158672896.0,
+            "7295": 28158672896.0,
+            "7300": 28158672896.0,
+            "7305": 28158672896.0,
+            "7310": 28158672896.0,
+            "7315": 28158672896.0,
+            "7320": 28158672896.0,
+            "7325": 28158672896.0,
+            "7330": 28158672896.0,
+            "7335": 28158672896.0,
+            "7340": 28158672896.0,
+            "7345": 28158672896.0,
+            "7350": 28158672896.0,
+            "7355": 28158672896.0,
+            "7360": 28158672896.0,
+            "7365": 28158672896.0,
+            "7370": 28158672896.0,
+            "7375": 28158672896.0,
+            "7380": 28158672896.0,
+            "7385": 28158672896.0,
+            "7390": 28158672896.0,
+            "7395": 28158672896.0,
+            "7400": 28158672896.0,
+            "7405": 28158672896.0,
+            "7410": 28158672896.0,
+            "7415": 28158672896.0,
+            "7420": 28158672896.0,
+            "7425": 28158672896.0,
+            "7430": 28158672896.0,
+            "7435": 28158672896.0,
+            "7440": 28158672896.0,
+            "7445": 28158672896.0,
+            "7450": 28158672896.0,
+            "7455": 28158672896.0,
+            "7460": 28158672896.0,
+            "7465": 28158672896.0,
+            "7470": 28158672896.0,
+            "7475": 28158672896.0,
+            "7480": 28158672896.0,
+            "7485": 28158672896.0,
+            "7490": 28158672896.0,
+            "7495": 28158672896.0,
+            "7500": 28158672896.0,
+            "7505": 28158672896.0,
+            "7510": 28158672896.0,
+            "7515": 28158672896.0,
+            "7520": 28158672896.0,
+            "7525": 28158672896.0,
+            "7530": 28158672896.0,
+            "7535": 28158672896.0,
+            "7540": 28158672896.0,
+            "7545": 28158672896.0,
+            "7550": 28158672896.0,
+            "7555": 28158672896.0,
+            "7560": 28158672896.0,
+            "7565": 28158672896.0,
+            "7570": 28158672896.0,
+            "7575": 28158672896.0,
+            "7580": 28158672896.0,
+            "7585": 28158672896.0,
+            "7590": 28158672896.0,
+            "7595": 28158672896.0,
+            "7600": 28158672896.0,
+            "7605": 28158672896.0,
+            "7610": 28158672896.0,
+            "7615": 28158672896.0,
+            "7620": 28158672896.0,
+            "7625": 28158672896.0,
+            "7630": 28158672896.0,
+            "7635": 28158672896.0,
+            "7640": 28158672896.0,
+            "7645": 28158672896.0,
+            "7650": 28158672896.0,
+            "7655": 28158672896.0,
+            "7660": 28158672896.0,
+            "7665": 28158672896.0,
+            "7670": 28158672896.0,
+            "7675": 28158672896.0,
+            "7680": 28158672896.0,
+            "7685": 28158672896.0,
+            "7690": 28158672896.0,
+            "7695": 28158672896.0,
+            "7700": 28158672896.0,
+            "7705": 28158672896.0,
+            "7710": 28158672896.0,
+            "7715": 28158672896.0,
+            "7720": 28158672896.0,
+            "7725": 28158672896.0,
+            "7730": 28158672896.0,
+            "7735": 28158672896.0,
+            "7740": 28158672896.0,
+            "7745": 28158672896.0,
+            "7750": 28158672896.0,
+            "7755": 28158672896.0,
+            "7760": 28158672896.0,
+            "7765": 28158672896.0,
+            "7770": 28158672896.0,
+            "7775": 28158672896.0,
+            "7780": 28158672896.0,
+            "7785": 28158672896.0,
+            "7790": 28158672896.0,
+            "7795": 28158672896.0,
+            "7800": 28158672896.0,
+            "7805": 28158672896.0,
+            "7810": 28158672896.0,
+            "7815": 28158672896.0,
+            "7820": 28158672896.0,
+            "7825": 28158672896.0,
+            "7830": 28158672896.0,
+            "7835": 28158672896.0,
+            "7840": 28158672896.0,
+            "7845": 28158672896.0,
+            "7850": 28158672896.0,
+            "7855": 28158672896.0,
+            "7860": 28158672896.0,
+            "7865": 28158672896.0,
+            "7870": 28158672896.0,
+            "7875": 28158672896.0,
+            "7880": 28158672896.0,
+            "7885": 28158672896.0,
+            "7890": 28158672896.0,
+            "7895": 28158672896.0,
+            "7900": 28158672896.0,
+            "7905": 28158672896.0,
+            "7910": 28158672896.0,
+            "7915": 28158672896.0,
+            "7920": 28158672896.0,
+            "7925": 28158672896.0,
+            "7930": 28158672896.0,
+            "7935": 28158672896.0,
+            "7940": 28158672896.0,
+            "7945": 28158672896.0,
+            "7950": 28158672896.0,
+            "7955": 28158672896.0,
+            "7960": 28158672896.0,
+            "7965": 28158672896.0,
+            "7970": 28158672896.0,
+            "7975": 28158672896.0,
+            "7980": 28158672896.0,
+            "7985": 28158672896.0,
+            "7990": 28158672896.0,
+            "7995": 28158672896.0,
+            "8000": 28158672896.0,
+            "8005": 28158672896.0,
+            "8010": 28158672896.0,
+            "8015": 28158672896.0,
+            "8020": 28158672896.0,
+            "8025": 28158672896.0,
+            "8030": 28158672896.0,
+            "8035": 28158672896.0,
+            "8040": 28158672896.0,
+            "8045": 28158672896.0,
+            "8050": 28158672896.0,
+            "8055": 28158672896.0,
+            "8060": 28158672896.0,
+            "8065": 28158672896.0,
+            "8070": 28158672896.0,
+            "8075": 28158672896.0,
+            "8080": 28158672896.0,
+            "8085": 28158672896.0,
+            "8090": 28158672896.0,
+            "8095": 28158672896.0,
+            "8100": 28158672896.0,
+            "8105": 28158672896.0,
+            "8110": 28158672896.0,
+            "8115": 28158672896.0,
+            "8120": 28158672896.0,
+            "8125": 28158672896.0,
+            "8130": 28158672896.0,
+            "8135": 28158672896.0,
+            "8140": 28158672896.0,
+            "8145": 28158672896.0,
+            "8150": 28158672896.0,
+            "8155": 28158672896.0,
+            "8160": 28158672896.0,
+            "8165": 28158672896.0,
+            "8170": 28158672896.0,
+            "8175": 28158672896.0,
+            "8180": 28158672896.0,
+            "8185": 28158672896.0,
+            "8190": 28158672896.0,
+            "8195": 28158672896.0,
+            "8200": 28158672896.0,
+            "8205": 28158672896.0,
+            "8210": 28158672896.0,
+            "8215": 28158672896.0,
+            "8220": 28158672896.0,
+            "8225": 28158672896.0,
+            "8230": 28158672896.0,
+            "8235": 28158672896.0,
+            "8240": 28158672896.0,
+            "8245": 28158672896.0,
+            "8250": 28158672896.0,
+            "8255": 28158672896.0,
+            "8260": 28158672896.0,
+            "8265": 28158672896.0,
+            "8270": 28158672896.0,
+            "8275": 28158672896.0,
+            "8280": 28158672896.0,
+            "8285": 28158672896.0,
+            "8290": 28158672896.0,
+            "8295": 28158672896.0,
+            "8300": 28158672896.0,
+            "8305": 28158672896.0,
+            "8310": 28158672896.0,
+            "8315": 28158672896.0,
+            "8320": 28158672896.0,
+            "8325": 28158672896.0,
+            "8330": 28158672896.0,
+            "8335": 28158672896.0,
+            "8340": 28158672896.0,
+            "8345": 28158672896.0,
+            "8350": 28158672896.0,
+            "8355": 28158672896.0,
+            "8360": 28158672896.0,
+            "8365": 28158672896.0,
+            "8370": 28158672896.0,
+            "8375": 28158672896.0,
+            "8380": 28158672896.0,
+            "8385": 28158672896.0,
+            "8390": 28158672896.0,
+            "8395": 28158672896.0,
+            "8400": 28158672896.0,
+            "8405": 28158672896.0,
+            "8410": 28158672896.0,
+            "8415": 28158672896.0,
+            "8420": 28158672896.0,
+            "8425": 28158672896.0,
+            "8430": 28158672896.0,
+            "8435": 28158672896.0,
+            "8440": 28158672896.0,
+            "8445": 28158672896.0,
+            "8450": 28158672896.0,
+            "8455": 28158672896.0,
+            "8460": 28158672896.0,
+            "8465": 28158672896.0,
+            "8470": 28158672896.0,
+            "8475": 28158672896.0,
+            "8480": 28158672896.0,
+            "8485": 28158672896.0,
+            "8490": 28158672896.0,
+            "8495": 28158672896.0,
+            "8500": 28158672896.0,
+            "8505": 28158672896.0,
+            "8510": 28158672896.0,
+            "8515": 28158672896.0,
+            "8520": 28158672896.0,
+            "8525": 28158672896.0,
+            "8530": 28158672896.0,
+            "8535": 28158672896.0,
+            "8540": 28158672896.0,
+            "8545": 28158672896.0,
+            "8550": 28158672896.0,
+            "8555": 28158672896.0,
+            "8560": 28158672896.0,
+            "8565": 28158672896.0,
+            "8570": 28158672896.0,
+            "8575": 28158672896.0,
+            "8580": 28158672896.0,
+            "8585": 28158672896.0,
+            "8590": 28158672896.0,
+            "8595": 28158672896.0,
+            "8600": 28158672896.0,
+            "8605": 28158672896.0,
+            "8610": 28158672896.0,
+            "8615": 28158672896.0,
+            "8620": 28158672896.0,
+            "8625": 28158672896.0,
+            "8630": 28158672896.0,
+            "8635": 28158672896.0,
+            "8640": 28158672896.0,
+            "8645": 28158672896.0,
+            "8650": 28158672896.0,
+            "8655": 28158672896.0,
+            "8660": 28158672896.0,
+            "8665": 28158672896.0,
+            "8670": 28158672896.0,
+            "8675": 28158672896.0,
+            "8680": 28158672896.0,
+            "8685": 28158672896.0,
+            "8690": 28158672896.0,
+            "8695": 28158672896.0,
+            "8700": 28158672896.0,
+            "8705": 28158672896.0,
+            "8710": 28158672896.0,
+            "8715": 28158672896.0,
+            "8720": 28158672896.0,
+            "8725": 28158672896.0,
+            "8730": 28158672896.0,
+            "8735": 28158672896.0,
+            "8740": 28158672896.0,
+            "8745": 28158672896.0,
+            "8750": 28158672896.0,
+            "8755": 28158672896.0,
+            "8760": 28158672896.0,
+            "8765": 28158672896.0,
+            "8770": 28158672896.0,
+            "8775": 28158672896.0,
+            "8780": 28158672896.0,
+            "8785": 28158672896.0,
+            "8790": 28158672896.0,
+            "8795": 28158672896.0,
+            "8800": 28158672896.0,
+            "8805": 28158672896.0,
+            "8810": 28156907520.0,
+            "8815": 28156907520.0,
+            "8820": 28156907520.0,
+            "8825": 28156907520.0,
+            "8830": 28156907520.0,
+            "8835": 28156907520.0,
+            "8840": 28156907520.0,
+            "8845": 28156907520.0,
+            "8850": 28156907520.0,
+            "8855": 28156907520.0,
+            "8860": 28156907520.0,
+            "8865": 28156907520.0,
+            "8870": 28156907520.0,
+            "8875": 28156907520.0,
+            "8880": 28156907520.0,
+            "8885": 28156907520.0,
+            "8890": 28156907520.0,
+            "8895": 28156907520.0,
+            "8900": 28156907520.0,
+            "8905": 28156907520.0,
+            "8910": 28156907520.0,
+            "8915": 28156907520.0,
+            "8920": 28156907520.0,
+            "8925": 28156907520.0,
+            "8930": 28156907520.0,
+            "8935": 28156907520.0,
+            "8940": 28156907520.0,
+            "8945": 28156907520.0,
+            "8950": 28156907520.0,
+            "8955": 28156907520.0,
+            "8960": 28156907520.0,
+            "8965": 28156907520.0,
+            "8970": 28156907520.0,
+            "8975": 28156907520.0,
+            "8980": 28156907520.0,
+            "8985": 28156907520.0,
+            "8990": 28156907520.0,
+            "8995": 28156907520.0,
+            "9000": 28156907520.0,
+            "9005": 28156907520.0,
+            "9010": 28156907520.0,
+            "9015": 28156907520.0,
+            "9020": 28156907520.0,
+            "9025": 28156907520.0,
+            "9030": 28156907520.0,
+            "9035": 28156907520.0,
+            "9040": 28156907520.0,
+            "9045": 28156907520.0,
+            "9050": 28156907520.0,
+            "9055": 28156907520.0,
+            "9060": 28156907520.0,
+            "9065": 28156907520.0,
+            "9070": 28156907520.0,
+            "9075": 28156907520.0,
+            "9080": 28156907520.0,
+            "9085": 28156907520.0,
+            "9090": 28156907520.0,
+            "9095": 28156907520.0,
+            "9100": 28156907520.0,
+            "9105": 28156907520.0,
+            "9110": 28156907520.0,
+            "9115": 28156907520.0,
+            "9120": 28156907520.0,
+            "9125": 28156907520.0,
+            "9130": 28156907520.0,
+            "9135": 28156907520.0,
+            "9140": 28156907520.0,
+            "9145": 28156907520.0,
+            "9150": 28156907520.0,
+            "9155": 28156907520.0,
+            "9160": 28156907520.0,
+            "9165": 28156907520.0,
+            "9170": 28156907520.0,
+            "9175": 28156907520.0,
+            "9180": 28156907520.0,
+            "9185": 28156907520.0,
+            "9190": 28156907520.0,
+            "9195": 28156907520.0,
+            "9200": 28156907520.0,
+            "9205": 28156907520.0,
+            "9210": 28156907520.0,
+            "9215": 28156907520.0,
+            "9220": 28156907520.0,
+            "9225": 28156907520.0,
+            "9230": 28156907520.0,
+            "9235": 28156907520.0,
+            "9240": 28156907520.0,
+            "9245": 28156907520.0,
+            "9250": 28156907520.0,
+            "9255": 28156907520.0,
+            "9260": 28156907520.0,
+            "9265": 28156907520.0,
+            "9270": 28156907520.0,
+            "9275": 28156907520.0,
+            "9280": 28156907520.0,
+            "9285": 28156907520.0,
+            "9290": 28156907520.0,
+            "9295": 28156907520.0,
+            "9300": 28156907520.0,
+            "9305": 28156907520.0,
+            "9310": 28156907520.0,
+            "9315": 28156907520.0,
+            "9320": 28156907520.0,
+            "9325": 28156907520.0,
+            "9330": 28156907520.0,
+            "9335": 28156907520.0,
+            "9340": 28156907520.0,
+            "9345": 28156907520.0,
+            "9350": 28156907520.0,
+            "9355": 28156907520.0,
+            "9360": 28156907520.0,
+            "9365": 28156907520.0,
+            "9370": 28156907520.0,
+            "9375": 28156907520.0,
+            "9380": 28156907520.0,
+            "9385": 28156907520.0,
+            "9390": 28156907520.0,
+            "9395": 28156907520.0,
+            "9400": 28156907520.0,
+            "9405": 28156907520.0,
+            "9410": 28156907520.0,
+            "9415": 28156907520.0,
+            "9420": 28156907520.0,
+            "9425": 28156907520.0,
+            "9430": 28156907520.0,
+            "9435": 28156907520.0,
+            "9440": 28156907520.0,
+            "9445": 28156907520.0,
+            "9450": 28156907520.0,
+            "9455": 28156907520.0,
+            "9460": 28156907520.0,
+            "9465": 28156907520.0,
+            "9470": 28156907520.0,
+            "9475": 28156907520.0,
+            "9480": 28156907520.0,
+            "9485": 28156907520.0,
+            "9490": 28156907520.0,
+            "9495": 28156907520.0,
+            "9500": 28156907520.0,
+            "9505": 28156907520.0,
+            "9510": 28156907520.0,
+            "9515": 28156907520.0,
+            "9520": 28156907520.0,
+            "9525": 28156907520.0,
+            "9530": 28156907520.0,
+            "9535": 28156907520.0,
+            "9540": 28156907520.0,
+            "9545": 28156907520.0,
+            "9550": 28156907520.0,
+            "9555": 28156907520.0,
+            "9560": 28156907520.0,
+            "9565": 28156907520.0,
+            "9570": 28156907520.0,
+            "9575": 28156907520.0,
+            "9580": 28156907520.0,
+            "9585": 28156907520.0,
+            "9590": 28156907520.0,
+            "9595": 28156907520.0,
+            "9600": 28156907520.0,
+            "9605": 28156907520.0,
+            "9610": 28156907520.0,
+            "9615": 28156907520.0,
+            "9620": 28156907520.0,
+            "9625": 28156907520.0,
+            "9630": 28156907520.0,
+            "9635": 28156907520.0,
+            "9640": 28156907520.0,
+            "9645": 28156907520.0,
+            "9650": 28156907520.0,
+            "9655": 28156907520.0,
+            "9660": 28156907520.0,
+            "9665": 28156907520.0,
+            "9670": 28156907520.0,
+            "9675": 28156907520.0,
+            "9680": 28156907520.0,
+            "9685": 28156907520.0,
+            "9690": 28156907520.0,
+            "9695": 28156907520.0,
+            "9700": 28156907520.0,
+            "9705": 28156907520.0,
+            "9710": 28156907520.0,
+            "9715": 28156907520.0,
+            "9720": 28156907520.0,
+            "9725": 28156907520.0,
+            "9730": 28156907520.0,
+            "9735": 28156907520.0,
+            "9740": 28156907520.0,
+            "9745": 28156907520.0,
+            "9750": 28156907520.0,
+            "9755": 28156907520.0,
+            "9760": 28156907520.0,
+            "9765": 28156907520.0,
+            "9770": 28156907520.0,
+            "9775": 28156907520.0,
+            "9780": 28156907520.0,
+            "9785": 28156907520.0,
+            "9790": 28156907520.0,
+            "9795": 28156907520.0,
+            "9800": 28156907520.0,
+            "9805": 28156907520.0,
+            "9810": 28156907520.0,
+            "9815": 28156907520.0,
+            "9820": 28156907520.0,
+            "9825": 28156907520.0,
+            "9830": 28156907520.0,
+            "9835": 28156907520.0,
+            "9840": 28156907520.0,
+            "9845": 28156907520.0,
+            "9850": 28156907520.0,
+            "9855": 28156907520.0,
+            "9860": 28156907520.0,
+            "9865": 28156907520.0,
+            "9870": 28156907520.0,
+            "9875": 28156907520.0,
+            "9880": 28156907520.0,
+            "9885": 28156907520.0,
+            "9890": 28156907520.0,
+            "9895": 28156907520.0,
+            "9900": 28156907520.0,
+            "9905": 28156907520.0,
+            "9910": 28156907520.0,
+            "9915": 28156907520.0,
+            "9920": 28156907520.0,
+            "9925": 28156907520.0,
+            "9930": 28156907520.0,
+            "9935": 28156907520.0,
+            "9940": 28156907520.0,
+            "9945": 28156907520.0,
+            "9950": 28156907520.0,
+            "9955": 28156907520.0,
+            "9960": 28156907520.0,
+            "9965": 28156907520.0,
+            "9970": 28156907520.0,
+            "9975": 28156907520.0,
+            "9980": 28156907520.0,
+            "9985": 28156907520.0,
+            "9990": 28156907520.0,
+            "9995": 28156907520.0,
+            "10000": 28156907520.0,
+            "10005": 28156907520.0,
+            "10010": 28156907520.0,
+            "10015": 28156907520.0,
+            "10020": 28156907520.0,
+            "10025": 28156907520.0,
+            "10030": 28156907520.0,
+            "10035": 28156907520.0,
+            "10040": 28156907520.0,
+            "10045": 28156907520.0,
+            "10050": 28156907520.0,
+            "10055": 28156907520.0,
+            "10060": 28156907520.0,
+            "10065": 28156907520.0,
+            "10070": 28156907520.0,
+            "10075": 28156907520.0,
+            "10080": 28156907520.0,
+            "10085": 28156907520.0,
+            "10090": 28156907520.0,
+            "10095": 28156907520.0,
+            "10100": 28156907520.0,
+            "10105": 28156907520.0,
+            "10110": 28156907520.0,
+            "10115": 28156907520.0,
+            "10120": 28156907520.0,
+            "10125": 28156907520.0,
+            "10130": 28156907520.0,
+            "10135": 28156907520.0,
+            "10140": 28156907520.0,
+            "10145": 28156907520.0,
+            "10150": 28156907520.0,
+            "10155": 28156907520.0,
+            "10160": 28156907520.0,
+            "10165": 28156907520.0,
+            "10170": 28156907520.0,
+            "10175": 28156907520.0,
+            "10180": 28156907520.0,
+            "10185": 28156907520.0,
+            "10190": 28156907520.0,
+            "10195": 28156907520.0,
+            "10200": 28156907520.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 10200,
+        "step_interval": 5,
+        "values": {
+            "1": "nan",
+            "5": "nan",
+            "10": "nan",
+            "15": "nan",
+            "20": "nan",
+            "25": "nan",
+            "30": "nan",
+            "35": "nan",
+            "40": "nan",
+            "45": "nan",
+            "50": "nan",
+            "55": "nan",
+            "60": "nan",
+            "65": "nan",
+            "70": "nan",
+            "75": "nan",
+            "80": "nan",
+            "85": "nan",
+            "90": "nan",
+            "95": "nan",
+            "100": 1.72893,
+            "105": "nan",
+            "110": "nan",
+            "115": "nan",
+            "120": "nan",
+            "125": "nan",
+            "130": "nan",
+            "135": "nan",
+            "140": "nan",
+            "145": "nan",
+            "150": "nan",
+            "155": "nan",
+            "160": "nan",
+            "165": "nan",
+            "170": "nan",
+            "175": "nan",
+            "180": "nan",
+            "185": "nan",
+            "190": "nan",
+            "195": "nan",
+            "200": 1.43857,
+            "205": "nan",
+            "210": "nan",
+            "215": "nan",
+            "220": "nan",
+            "225": "nan",
+            "230": "nan",
+            "235": "nan",
+            "240": "nan",
+            "245": "nan",
+            "250": "nan",
+            "255": "nan",
+            "260": "nan",
+            "265": "nan",
+            "270": "nan",
+            "275": "nan",
+            "280": "nan",
+            "285": "nan",
+            "290": "nan",
+            "295": "nan",
+            "300": 1.43584,
+            "305": "nan",
+            "310": "nan",
+            "315": "nan",
+            "320": "nan",
+            "325": "nan",
+            "330": "nan",
+            "335": "nan",
+            "340": "nan",
+            "345": "nan",
+            "350": "nan",
+            "355": "nan",
+            "360": "nan",
+            "365": "nan",
+            "370": "nan",
+            "375": "nan",
+            "380": "nan",
+            "385": "nan",
+            "390": "nan",
+            "395": "nan",
+            "400": 1.43883,
+            "405": "nan",
+            "410": "nan",
+            "415": "nan",
+            "420": "nan",
+            "425": "nan",
+            "430": "nan",
+            "435": "nan",
+            "440": "nan",
+            "445": "nan",
+            "450": "nan",
+            "455": "nan",
+            "460": "nan",
+            "465": "nan",
+            "470": "nan",
+            "475": "nan",
+            "480": "nan",
+            "485": "nan",
+            "490": "nan",
+            "495": "nan",
+            "500": 1.43781,
+            "505": "nan",
+            "510": "nan",
+            "515": "nan",
+            "520": "nan",
+            "525": "nan",
+            "530": "nan",
+            "535": "nan",
+            "540": "nan",
+            "545": "nan",
+            "550": "nan",
+            "555": "nan",
+            "560": "nan",
+            "565": "nan",
+            "570": "nan",
+            "575": "nan",
+            "580": "nan",
+            "585": "nan",
+            "590": "nan",
+            "595": "nan",
+            "600": 1.43827,
+            "605": "nan",
+            "610": "nan",
+            "615": "nan",
+            "620": "nan",
+            "625": "nan",
+            "630": "nan",
+            "635": "nan",
+            "640": "nan",
+            "645": "nan",
+            "650": "nan",
+            "655": "nan",
+            "660": "nan",
+            "665": "nan",
+            "670": "nan",
+            "675": "nan",
+            "680": "nan",
+            "685": "nan",
+            "690": "nan",
+            "695": "nan",
+            "700": 1.4373,
+            "705": "nan",
+            "710": "nan",
+            "715": "nan",
+            "720": "nan",
+            "725": "nan",
+            "730": "nan",
+            "735": "nan",
+            "740": "nan",
+            "745": "nan",
+            "750": "nan",
+            "755": "nan",
+            "760": "nan",
+            "765": "nan",
+            "770": "nan",
+            "775": "nan",
+            "780": "nan",
+            "785": "nan",
+            "790": "nan",
+            "795": "nan",
+            "800": 1.43657,
+            "805": "nan",
+            "810": "nan",
+            "815": "nan",
+            "820": "nan",
+            "825": "nan",
+            "830": "nan",
+            "835": "nan",
+            "840": "nan",
+            "845": "nan",
+            "850": "nan",
+            "855": "nan",
+            "860": "nan",
+            "865": "nan",
+            "870": "nan",
+            "875": "nan",
+            "880": "nan",
+            "885": "nan",
+            "890": "nan",
+            "895": "nan",
+            "900": 1.44068,
+            "905": "nan",
+            "910": "nan",
+            "915": "nan",
+            "920": "nan",
+            "925": "nan",
+            "930": "nan",
+            "935": "nan",
+            "940": "nan",
+            "945": "nan",
+            "950": "nan",
+            "955": "nan",
+            "960": "nan",
+            "965": "nan",
+            "970": "nan",
+            "975": "nan",
+            "980": "nan",
+            "985": "nan",
+            "990": "nan",
+            "995": "nan",
+            "1000": 1.43583,
+            "1005": "nan",
+            "1010": "nan",
+            "1015": "nan",
+            "1020": "nan",
+            "1025": "nan",
+            "1030": "nan",
+            "1035": "nan",
+            "1040": "nan",
+            "1045": "nan",
+            "1050": "nan",
+            "1055": "nan",
+            "1060": "nan",
+            "1065": "nan",
+            "1070": "nan",
+            "1075": "nan",
+            "1080": "nan",
+            "1085": "nan",
+            "1090": "nan",
+            "1095": "nan",
+            "1100": 1.43072,
+            "1105": "nan",
+            "1110": "nan",
+            "1115": "nan",
+            "1120": "nan",
+            "1125": "nan",
+            "1130": "nan",
+            "1135": "nan",
+            "1140": "nan",
+            "1145": "nan",
+            "1150": "nan",
+            "1155": "nan",
+            "1160": "nan",
+            "1165": "nan",
+            "1170": "nan",
+            "1175": "nan",
+            "1180": "nan",
+            "1185": "nan",
+            "1190": "nan",
+            "1195": "nan",
+            "1200": 1.4292,
+            "1205": "nan",
+            "1210": "nan",
+            "1215": "nan",
+            "1220": "nan",
+            "1225": "nan",
+            "1230": "nan",
+            "1235": "nan",
+            "1240": "nan",
+            "1245": "nan",
+            "1250": "nan",
+            "1255": "nan",
+            "1260": "nan",
+            "1265": "nan",
+            "1270": "nan",
+            "1275": "nan",
+            "1280": "nan",
+            "1285": "nan",
+            "1290": "nan",
+            "1295": "nan",
+            "1300": 1.4269,
+            "1305": "nan",
+            "1310": "nan",
+            "1315": "nan",
+            "1320": "nan",
+            "1325": "nan",
+            "1330": "nan",
+            "1335": "nan",
+            "1340": "nan",
+            "1345": "nan",
+            "1350": "nan",
+            "1355": "nan",
+            "1360": "nan",
+            "1365": "nan",
+            "1370": "nan",
+            "1375": "nan",
+            "1380": "nan",
+            "1385": "nan",
+            "1390": "nan",
+            "1395": "nan",
+            "1400": 1.42766,
+            "1405": "nan",
+            "1410": "nan",
+            "1415": "nan",
+            "1420": "nan",
+            "1425": "nan",
+            "1430": "nan",
+            "1435": "nan",
+            "1440": "nan",
+            "1445": "nan",
+            "1450": "nan",
+            "1455": "nan",
+            "1460": "nan",
+            "1465": "nan",
+            "1470": "nan",
+            "1475": "nan",
+            "1480": "nan",
+            "1485": "nan",
+            "1490": "nan",
+            "1495": "nan",
+            "1500": 1.42106,
+            "1505": "nan",
+            "1510": "nan",
+            "1515": "nan",
+            "1520": "nan",
+            "1525": "nan",
+            "1530": "nan",
+            "1535": "nan",
+            "1540": "nan",
+            "1545": "nan",
+            "1550": "nan",
+            "1555": "nan",
+            "1560": "nan",
+            "1565": "nan",
+            "1570": "nan",
+            "1575": "nan",
+            "1580": "nan",
+            "1585": "nan",
+            "1590": "nan",
+            "1595": "nan",
+            "1600": 1.42409,
+            "1605": "nan",
+            "1610": "nan",
+            "1615": "nan",
+            "1620": "nan",
+            "1625": "nan",
+            "1630": "nan",
+            "1635": "nan",
+            "1640": "nan",
+            "1645": "nan",
+            "1650": "nan",
+            "1655": "nan",
+            "1660": "nan",
+            "1665": "nan",
+            "1670": "nan",
+            "1675": "nan",
+            "1680": "nan",
+            "1685": "nan",
+            "1690": "nan",
+            "1695": "nan",
+            "1700": 1.42073,
+            "1705": "nan",
+            "1710": "nan",
+            "1715": "nan",
+            "1720": "nan",
+            "1725": "nan",
+            "1730": "nan",
+            "1735": "nan",
+            "1740": "nan",
+            "1745": "nan",
+            "1750": "nan",
+            "1755": "nan",
+            "1760": "nan",
+            "1765": "nan",
+            "1770": "nan",
+            "1775": "nan",
+            "1780": "nan",
+            "1785": "nan",
+            "1790": "nan",
+            "1795": "nan",
+            "1800": 1.42098,
+            "1805": "nan",
+            "1810": "nan",
+            "1815": "nan",
+            "1820": "nan",
+            "1825": "nan",
+            "1830": "nan",
+            "1835": "nan",
+            "1840": "nan",
+            "1845": "nan",
+            "1850": "nan",
+            "1855": "nan",
+            "1860": "nan",
+            "1865": "nan",
+            "1870": "nan",
+            "1875": "nan",
+            "1880": "nan",
+            "1885": "nan",
+            "1890": "nan",
+            "1895": "nan",
+            "1900": 1.42066,
+            "1905": "nan",
+            "1910": "nan",
+            "1915": "nan",
+            "1920": "nan",
+            "1925": "nan",
+            "1930": "nan",
+            "1935": "nan",
+            "1940": "nan",
+            "1945": "nan",
+            "1950": "nan",
+            "1955": "nan",
+            "1960": "nan",
+            "1965": "nan",
+            "1970": "nan",
+            "1975": "nan",
+            "1980": "nan",
+            "1985": "nan",
+            "1990": "nan",
+            "1995": "nan",
+            "2000": 1.42267,
+            "2005": "nan",
+            "2010": "nan",
+            "2015": "nan",
+            "2020": "nan",
+            "2025": "nan",
+            "2030": "nan",
+            "2035": "nan",
+            "2040": "nan",
+            "2045": "nan",
+            "2050": "nan",
+            "2055": "nan",
+            "2060": "nan",
+            "2065": "nan",
+            "2070": "nan",
+            "2075": "nan",
+            "2080": "nan",
+            "2085": "nan",
+            "2090": "nan",
+            "2095": "nan",
+            "2100": 1.42131,
+            "2105": "nan",
+            "2110": "nan",
+            "2115": "nan",
+            "2120": "nan",
+            "2125": "nan",
+            "2130": "nan",
+            "2135": "nan",
+            "2140": "nan",
+            "2145": "nan",
+            "2150": "nan",
+            "2155": "nan",
+            "2160": "nan",
+            "2165": "nan",
+            "2170": "nan",
+            "2175": "nan",
+            "2180": "nan",
+            "2185": "nan",
+            "2190": "nan",
+            "2195": "nan",
+            "2200": 1.41946,
+            "2205": "nan",
+            "2210": "nan",
+            "2215": "nan",
+            "2220": "nan",
+            "2225": "nan",
+            "2230": "nan",
+            "2235": "nan",
+            "2240": "nan",
+            "2245": "nan",
+            "2250": "nan",
+            "2255": "nan",
+            "2260": "nan",
+            "2265": "nan",
+            "2270": "nan",
+            "2275": "nan",
+            "2280": "nan",
+            "2285": "nan",
+            "2290": "nan",
+            "2295": "nan",
+            "2300": 1.42191,
+            "2305": "nan",
+            "2310": "nan",
+            "2315": "nan",
+            "2320": "nan",
+            "2325": "nan",
+            "2330": "nan",
+            "2335": "nan",
+            "2340": "nan",
+            "2345": "nan",
+            "2350": "nan",
+            "2355": "nan",
+            "2360": "nan",
+            "2365": "nan",
+            "2370": "nan",
+            "2375": "nan",
+            "2380": "nan",
+            "2385": "nan",
+            "2390": "nan",
+            "2395": "nan",
+            "2400": 1.42003,
+            "2405": "nan",
+            "2410": "nan",
+            "2415": "nan",
+            "2420": "nan",
+            "2425": "nan",
+            "2430": "nan",
+            "2435": "nan",
+            "2440": "nan",
+            "2445": "nan",
+            "2450": "nan",
+            "2455": "nan",
+            "2460": "nan",
+            "2465": "nan",
+            "2470": "nan",
+            "2475": "nan",
+            "2480": "nan",
+            "2485": "nan",
+            "2490": "nan",
+            "2495": "nan",
+            "2500": 1.41919,
+            "2505": "nan",
+            "2510": "nan",
+            "2515": "nan",
+            "2520": "nan",
+            "2525": "nan",
+            "2530": "nan",
+            "2535": "nan",
+            "2540": "nan",
+            "2545": "nan",
+            "2550": "nan",
+            "2555": "nan",
+            "2560": "nan",
+            "2565": "nan",
+            "2570": "nan",
+            "2575": "nan",
+            "2580": "nan",
+            "2585": "nan",
+            "2590": "nan",
+            "2595": "nan",
+            "2600": 1.41875,
+            "2605": "nan",
+            "2610": "nan",
+            "2615": "nan",
+            "2620": "nan",
+            "2625": "nan",
+            "2630": "nan",
+            "2635": "nan",
+            "2640": "nan",
+            "2645": "nan",
+            "2650": "nan",
+            "2655": "nan",
+            "2660": "nan",
+            "2665": "nan",
+            "2670": "nan",
+            "2675": "nan",
+            "2680": "nan",
+            "2685": "nan",
+            "2690": "nan",
+            "2695": "nan",
+            "2700": 1.41854,
+            "2705": "nan",
+            "2710": "nan",
+            "2715": "nan",
+            "2720": "nan",
+            "2725": "nan",
+            "2730": "nan",
+            "2735": "nan",
+            "2740": "nan",
+            "2745": "nan",
+            "2750": "nan",
+            "2755": "nan",
+            "2760": "nan",
+            "2765": "nan",
+            "2770": "nan",
+            "2775": "nan",
+            "2780": "nan",
+            "2785": "nan",
+            "2790": "nan",
+            "2795": "nan",
+            "2800": 1.41791,
+            "2805": "nan",
+            "2810": "nan",
+            "2815": "nan",
+            "2820": "nan",
+            "2825": "nan",
+            "2830": "nan",
+            "2835": "nan",
+            "2840": "nan",
+            "2845": "nan",
+            "2850": "nan",
+            "2855": "nan",
+            "2860": "nan",
+            "2865": "nan",
+            "2870": "nan",
+            "2875": "nan",
+            "2880": "nan",
+            "2885": "nan",
+            "2890": "nan",
+            "2895": "nan",
+            "2900": 1.41567,
+            "2905": "nan",
+            "2910": "nan",
+            "2915": "nan",
+            "2920": "nan",
+            "2925": "nan",
+            "2930": "nan",
+            "2935": "nan",
+            "2940": "nan",
+            "2945": "nan",
+            "2950": "nan",
+            "2955": "nan",
+            "2960": "nan",
+            "2965": "nan",
+            "2970": "nan",
+            "2975": "nan",
+            "2980": "nan",
+            "2985": "nan",
+            "2990": "nan",
+            "2995": "nan",
+            "3000": 1.4195,
+            "3005": "nan",
+            "3010": "nan",
+            "3015": "nan",
+            "3020": "nan",
+            "3025": "nan",
+            "3030": "nan",
+            "3035": "nan",
+            "3040": "nan",
+            "3045": "nan",
+            "3050": "nan",
+            "3055": "nan",
+            "3060": "nan",
+            "3065": "nan",
+            "3070": "nan",
+            "3075": "nan",
+            "3080": "nan",
+            "3085": "nan",
+            "3090": "nan",
+            "3095": "nan",
+            "3100": 1.42148,
+            "3105": "nan",
+            "3110": "nan",
+            "3115": "nan",
+            "3120": "nan",
+            "3125": "nan",
+            "3130": "nan",
+            "3135": "nan",
+            "3140": "nan",
+            "3145": "nan",
+            "3150": "nan",
+            "3155": "nan",
+            "3160": "nan",
+            "3165": "nan",
+            "3170": "nan",
+            "3175": "nan",
+            "3180": "nan",
+            "3185": "nan",
+            "3190": "nan",
+            "3195": "nan",
+            "3200": 1.41644,
+            "3205": "nan",
+            "3210": "nan",
+            "3215": "nan",
+            "3220": "nan",
+            "3225": "nan",
+            "3230": "nan",
+            "3235": "nan",
+            "3240": "nan",
+            "3245": "nan",
+            "3250": "nan",
+            "3255": "nan",
+            "3260": "nan",
+            "3265": "nan",
+            "3270": "nan",
+            "3275": "nan",
+            "3280": "nan",
+            "3285": "nan",
+            "3290": "nan",
+            "3295": "nan",
+            "3300": 1.41612,
+            "3305": "nan",
+            "3310": "nan",
+            "3315": "nan",
+            "3320": "nan",
+            "3325": "nan",
+            "3330": "nan",
+            "3335": "nan",
+            "3340": "nan",
+            "3345": "nan",
+            "3350": "nan",
+            "3355": "nan",
+            "3360": "nan",
+            "3365": "nan",
+            "3370": "nan",
+            "3375": "nan",
+            "3380": "nan",
+            "3385": "nan",
+            "3390": "nan",
+            "3395": "nan",
+            "3400": 1.41832,
+            "3405": "nan",
+            "3410": "nan",
+            "3415": "nan",
+            "3420": "nan",
+            "3425": "nan",
+            "3430": "nan",
+            "3435": "nan",
+            "3440": "nan",
+            "3445": "nan",
+            "3450": "nan",
+            "3455": "nan",
+            "3460": "nan",
+            "3465": "nan",
+            "3470": "nan",
+            "3475": "nan",
+            "3480": "nan",
+            "3485": "nan",
+            "3490": "nan",
+            "3495": "nan",
+            "3500": 1.41619,
+            "3505": "nan",
+            "3510": "nan",
+            "3515": "nan",
+            "3520": "nan",
+            "3525": "nan",
+            "3530": "nan",
+            "3535": "nan",
+            "3540": "nan",
+            "3545": "nan",
+            "3550": "nan",
+            "3555": "nan",
+            "3560": "nan",
+            "3565": "nan",
+            "3570": "nan",
+            "3575": "nan",
+            "3580": "nan",
+            "3585": "nan",
+            "3590": "nan",
+            "3595": "nan",
+            "3600": 1.41975,
+            "3605": "nan",
+            "3610": "nan",
+            "3615": "nan",
+            "3620": "nan",
+            "3625": "nan",
+            "3630": "nan",
+            "3635": "nan",
+            "3640": "nan",
+            "3645": "nan",
+            "3650": "nan",
+            "3655": "nan",
+            "3660": "nan",
+            "3665": "nan",
+            "3670": "nan",
+            "3675": "nan",
+            "3680": "nan",
+            "3685": "nan",
+            "3690": "nan",
+            "3695": "nan",
+            "3700": 1.41987,
+            "3705": "nan",
+            "3710": "nan",
+            "3715": "nan",
+            "3720": "nan",
+            "3725": "nan",
+            "3730": "nan",
+            "3735": "nan",
+            "3740": "nan",
+            "3745": "nan",
+            "3750": "nan",
+            "3755": "nan",
+            "3760": "nan",
+            "3765": "nan",
+            "3770": "nan",
+            "3775": "nan",
+            "3780": "nan",
+            "3785": "nan",
+            "3790": "nan",
+            "3795": "nan",
+            "3800": 1.41734,
+            "3805": "nan",
+            "3810": "nan",
+            "3815": "nan",
+            "3820": "nan",
+            "3825": "nan",
+            "3830": "nan",
+            "3835": "nan",
+            "3840": "nan",
+            "3845": "nan",
+            "3850": "nan",
+            "3855": "nan",
+            "3860": "nan",
+            "3865": "nan",
+            "3870": "nan",
+            "3875": "nan",
+            "3880": "nan",
+            "3885": "nan",
+            "3890": "nan",
+            "3895": "nan",
+            "3900": 1.41853,
+            "3905": "nan",
+            "3910": "nan",
+            "3915": "nan",
+            "3920": "nan",
+            "3925": "nan",
+            "3930": "nan",
+            "3935": "nan",
+            "3940": "nan",
+            "3945": "nan",
+            "3950": "nan",
+            "3955": "nan",
+            "3960": "nan",
+            "3965": "nan",
+            "3970": "nan",
+            "3975": "nan",
+            "3980": "nan",
+            "3985": "nan",
+            "3990": "nan",
+            "3995": "nan",
+            "4000": 1.41699,
+            "4005": "nan",
+            "4010": "nan",
+            "4015": "nan",
+            "4020": "nan",
+            "4025": "nan",
+            "4030": "nan",
+            "4035": "nan",
+            "4040": "nan",
+            "4045": "nan",
+            "4050": "nan",
+            "4055": "nan",
+            "4060": "nan",
+            "4065": "nan",
+            "4070": "nan",
+            "4075": "nan",
+            "4080": "nan",
+            "4085": "nan",
+            "4090": "nan",
+            "4095": "nan",
+            "4100": 1.41688,
+            "4105": "nan",
+            "4110": "nan",
+            "4115": "nan",
+            "4120": "nan",
+            "4125": "nan",
+            "4130": "nan",
+            "4135": "nan",
+            "4140": "nan",
+            "4145": "nan",
+            "4150": "nan",
+            "4155": "nan",
+            "4160": "nan",
+            "4165": "nan",
+            "4170": "nan",
+            "4175": "nan",
+            "4180": "nan",
+            "4185": "nan",
+            "4190": "nan",
+            "4195": "nan",
+            "4200": 1.41758,
+            "4205": "nan",
+            "4210": "nan",
+            "4215": "nan",
+            "4220": "nan",
+            "4225": "nan",
+            "4230": "nan",
+            "4235": "nan",
+            "4240": "nan",
+            "4245": "nan",
+            "4250": "nan",
+            "4255": "nan",
+            "4260": "nan",
+            "4265": "nan",
+            "4270": "nan",
+            "4275": "nan",
+            "4280": "nan",
+            "4285": "nan",
+            "4290": "nan",
+            "4295": "nan",
+            "4300": 1.41698,
+            "4305": "nan",
+            "4310": "nan",
+            "4315": "nan",
+            "4320": "nan",
+            "4325": "nan",
+            "4330": "nan",
+            "4335": "nan",
+            "4340": "nan",
+            "4345": "nan",
+            "4350": "nan",
+            "4355": "nan",
+            "4360": "nan",
+            "4365": "nan",
+            "4370": "nan",
+            "4375": "nan",
+            "4380": "nan",
+            "4385": "nan",
+            "4390": "nan",
+            "4395": "nan",
+            "4400": 1.41728,
+            "4405": "nan",
+            "4410": "nan",
+            "4415": "nan",
+            "4420": "nan",
+            "4425": "nan",
+            "4430": "nan",
+            "4435": "nan",
+            "4440": "nan",
+            "4445": "nan",
+            "4450": "nan",
+            "4455": "nan",
+            "4460": "nan",
+            "4465": "nan",
+            "4470": "nan",
+            "4475": "nan",
+            "4480": "nan",
+            "4485": "nan",
+            "4490": "nan",
+            "4495": "nan",
+            "4500": 1.41647,
+            "4505": "nan",
+            "4510": "nan",
+            "4515": "nan",
+            "4520": "nan",
+            "4525": "nan",
+            "4530": "nan",
+            "4535": "nan",
+            "4540": "nan",
+            "4545": "nan",
+            "4550": "nan",
+            "4555": "nan",
+            "4560": "nan",
+            "4565": "nan",
+            "4570": "nan",
+            "4575": "nan",
+            "4580": "nan",
+            "4585": "nan",
+            "4590": "nan",
+            "4595": "nan",
+            "4600": 1.42068,
+            "4605": "nan",
+            "4610": "nan",
+            "4615": "nan",
+            "4620": "nan",
+            "4625": "nan",
+            "4630": "nan",
+            "4635": "nan",
+            "4640": "nan",
+            "4645": "nan",
+            "4650": "nan",
+            "4655": "nan",
+            "4660": "nan",
+            "4665": "nan",
+            "4670": "nan",
+            "4675": "nan",
+            "4680": "nan",
+            "4685": "nan",
+            "4690": "nan",
+            "4695": "nan",
+            "4700": 1.41828,
+            "4705": "nan",
+            "4710": "nan",
+            "4715": "nan",
+            "4720": "nan",
+            "4725": "nan",
+            "4730": "nan",
+            "4735": "nan",
+            "4740": "nan",
+            "4745": "nan",
+            "4750": "nan",
+            "4755": "nan",
+            "4760": "nan",
+            "4765": "nan",
+            "4770": "nan",
+            "4775": "nan",
+            "4780": "nan",
+            "4785": "nan",
+            "4790": "nan",
+            "4795": "nan",
+            "4800": 1.42135,
+            "4805": "nan",
+            "4810": "nan",
+            "4815": "nan",
+            "4820": "nan",
+            "4825": "nan",
+            "4830": "nan",
+            "4835": "nan",
+            "4840": "nan",
+            "4845": "nan",
+            "4850": "nan",
+            "4855": "nan",
+            "4860": "nan",
+            "4865": "nan",
+            "4870": "nan",
+            "4875": "nan",
+            "4880": "nan",
+            "4885": "nan",
+            "4890": "nan",
+            "4895": "nan",
+            "4900": 1.41613,
+            "4905": "nan",
+            "4910": "nan",
+            "4915": "nan",
+            "4920": "nan",
+            "4925": "nan",
+            "4930": "nan",
+            "4935": "nan",
+            "4940": "nan",
+            "4945": "nan",
+            "4950": "nan",
+            "4955": "nan",
+            "4960": "nan",
+            "4965": "nan",
+            "4970": "nan",
+            "4975": "nan",
+            "4980": "nan",
+            "4985": "nan",
+            "4990": "nan",
+            "4995": "nan",
+            "5000": 1.41976,
+            "5005": "nan",
+            "5010": "nan",
+            "5015": "nan",
+            "5020": "nan",
+            "5025": "nan",
+            "5030": "nan",
+            "5035": "nan",
+            "5040": "nan",
+            "5045": "nan",
+            "5050": "nan",
+            "5055": "nan",
+            "5060": "nan",
+            "5065": "nan",
+            "5070": "nan",
+            "5075": "nan",
+            "5080": "nan",
+            "5085": "nan",
+            "5090": "nan",
+            "5095": "nan",
+            "5100": 1.42195,
+            "5105": "nan",
+            "5110": "nan",
+            "5115": "nan",
+            "5120": "nan",
+            "5125": "nan",
+            "5130": "nan",
+            "5135": "nan",
+            "5140": "nan",
+            "5145": "nan",
+            "5150": "nan",
+            "5155": "nan",
+            "5160": "nan",
+            "5165": "nan",
+            "5170": "nan",
+            "5175": "nan",
+            "5180": "nan",
+            "5185": "nan",
+            "5190": "nan",
+            "5195": "nan",
+            "5200": 1.41591,
+            "5205": "nan",
+            "5210": "nan",
+            "5215": "nan",
+            "5220": "nan",
+            "5225": "nan",
+            "5230": "nan",
+            "5235": "nan",
+            "5240": "nan",
+            "5245": "nan",
+            "5250": "nan",
+            "5255": "nan",
+            "5260": "nan",
+            "5265": "nan",
+            "5270": "nan",
+            "5275": "nan",
+            "5280": "nan",
+            "5285": "nan",
+            "5290": "nan",
+            "5295": "nan",
+            "5300": 1.41729,
+            "5305": "nan",
+            "5310": "nan",
+            "5315": "nan",
+            "5320": "nan",
+            "5325": "nan",
+            "5330": "nan",
+            "5335": "nan",
+            "5340": "nan",
+            "5345": "nan",
+            "5350": "nan",
+            "5355": "nan",
+            "5360": "nan",
+            "5365": "nan",
+            "5370": "nan",
+            "5375": "nan",
+            "5380": "nan",
+            "5385": "nan",
+            "5390": "nan",
+            "5395": "nan",
+            "5400": 1.42072,
+            "5405": "nan",
+            "5410": "nan",
+            "5415": "nan",
+            "5420": "nan",
+            "5425": "nan",
+            "5430": "nan",
+            "5435": "nan",
+            "5440": "nan",
+            "5445": "nan",
+            "5450": "nan",
+            "5455": "nan",
+            "5460": "nan",
+            "5465": "nan",
+            "5470": "nan",
+            "5475": "nan",
+            "5480": "nan",
+            "5485": "nan",
+            "5490": "nan",
+            "5495": "nan",
+            "5500": 1.42092,
+            "5505": "nan",
+            "5510": "nan",
+            "5515": "nan",
+            "5520": "nan",
+            "5525": "nan",
+            "5530": "nan",
+            "5535": "nan",
+            "5540": "nan",
+            "5545": "nan",
+            "5550": "nan",
+            "5555": "nan",
+            "5560": "nan",
+            "5565": "nan",
+            "5570": "nan",
+            "5575": "nan",
+            "5580": "nan",
+            "5585": "nan",
+            "5590": "nan",
+            "5595": "nan",
+            "5600": 1.41992,
+            "5605": "nan",
+            "5610": "nan",
+            "5615": "nan",
+            "5620": "nan",
+            "5625": "nan",
+            "5630": "nan",
+            "5635": "nan",
+            "5640": "nan",
+            "5645": "nan",
+            "5650": "nan",
+            "5655": "nan",
+            "5660": "nan",
+            "5665": "nan",
+            "5670": "nan",
+            "5675": "nan",
+            "5680": "nan",
+            "5685": "nan",
+            "5690": "nan",
+            "5695": "nan",
+            "5700": 1.41833,
+            "5705": "nan",
+            "5710": "nan",
+            "5715": "nan",
+            "5720": "nan",
+            "5725": "nan",
+            "5730": "nan",
+            "5735": "nan",
+            "5740": "nan",
+            "5745": "nan",
+            "5750": "nan",
+            "5755": "nan",
+            "5760": "nan",
+            "5765": "nan",
+            "5770": "nan",
+            "5775": "nan",
+            "5780": "nan",
+            "5785": "nan",
+            "5790": "nan",
+            "5795": "nan",
+            "5800": 1.41738,
+            "5805": "nan",
+            "5810": "nan",
+            "5815": "nan",
+            "5820": "nan",
+            "5825": "nan",
+            "5830": "nan",
+            "5835": "nan",
+            "5840": "nan",
+            "5845": "nan",
+            "5850": "nan",
+            "5855": "nan",
+            "5860": "nan",
+            "5865": "nan",
+            "5870": "nan",
+            "5875": "nan",
+            "5880": "nan",
+            "5885": "nan",
+            "5890": "nan",
+            "5895": "nan",
+            "5900": 1.42112,
+            "5905": "nan",
+            "5910": "nan",
+            "5915": "nan",
+            "5920": "nan",
+            "5925": "nan",
+            "5930": "nan",
+            "5935": "nan",
+            "5940": "nan",
+            "5945": "nan",
+            "5950": "nan",
+            "5955": "nan",
+            "5960": "nan",
+            "5965": "nan",
+            "5970": "nan",
+            "5975": "nan",
+            "5980": "nan",
+            "5985": "nan",
+            "5990": "nan",
+            "5995": "nan",
+            "6000": 1.42017,
+            "6005": "nan",
+            "6010": "nan",
+            "6015": "nan",
+            "6020": "nan",
+            "6025": "nan",
+            "6030": "nan",
+            "6035": "nan",
+            "6040": "nan",
+            "6045": "nan",
+            "6050": "nan",
+            "6055": "nan",
+            "6060": "nan",
+            "6065": "nan",
+            "6070": "nan",
+            "6075": "nan",
+            "6080": "nan",
+            "6085": "nan",
+            "6090": "nan",
+            "6095": "nan",
+            "6100": 1.41819,
+            "6105": "nan",
+            "6110": "nan",
+            "6115": "nan",
+            "6120": "nan",
+            "6125": "nan",
+            "6130": "nan",
+            "6135": "nan",
+            "6140": "nan",
+            "6145": "nan",
+            "6150": "nan",
+            "6155": "nan",
+            "6160": "nan",
+            "6165": "nan",
+            "6170": "nan",
+            "6175": "nan",
+            "6180": "nan",
+            "6185": "nan",
+            "6190": "nan",
+            "6195": "nan",
+            "6200": 1.41783,
+            "6205": "nan",
+            "6210": "nan",
+            "6215": "nan",
+            "6220": "nan",
+            "6225": "nan",
+            "6230": "nan",
+            "6235": "nan",
+            "6240": "nan",
+            "6245": "nan",
+            "6250": "nan",
+            "6255": "nan",
+            "6260": "nan",
+            "6265": "nan",
+            "6270": "nan",
+            "6275": "nan",
+            "6280": "nan",
+            "6285": "nan",
+            "6290": "nan",
+            "6295": "nan",
+            "6300": 1.41997,
+            "6305": "nan",
+            "6310": "nan",
+            "6315": "nan",
+            "6320": "nan",
+            "6325": "nan",
+            "6330": "nan",
+            "6335": "nan",
+            "6340": "nan",
+            "6345": "nan",
+            "6350": "nan",
+            "6355": "nan",
+            "6360": "nan",
+            "6365": "nan",
+            "6370": "nan",
+            "6375": "nan",
+            "6380": "nan",
+            "6385": "nan",
+            "6390": "nan",
+            "6395": "nan",
+            "6400": 1.41757,
+            "6405": "nan",
+            "6410": "nan",
+            "6415": "nan",
+            "6420": "nan",
+            "6425": "nan",
+            "6430": "nan",
+            "6435": "nan",
+            "6440": "nan",
+            "6445": "nan",
+            "6450": "nan",
+            "6455": "nan",
+            "6460": "nan",
+            "6465": "nan",
+            "6470": "nan",
+            "6475": "nan",
+            "6480": "nan",
+            "6485": "nan",
+            "6490": "nan",
+            "6495": "nan",
+            "6500": 1.42002,
+            "6505": "nan",
+            "6510": "nan",
+            "6515": "nan",
+            "6520": "nan",
+            "6525": "nan",
+            "6530": "nan",
+            "6535": "nan",
+            "6540": "nan",
+            "6545": "nan",
+            "6550": "nan",
+            "6555": "nan",
+            "6560": "nan",
+            "6565": "nan",
+            "6570": "nan",
+            "6575": "nan",
+            "6580": "nan",
+            "6585": "nan",
+            "6590": "nan",
+            "6595": "nan",
+            "6600": 1.41769,
+            "6605": "nan",
+            "6610": "nan",
+            "6615": "nan",
+            "6620": "nan",
+            "6625": "nan",
+            "6630": "nan",
+            "6635": "nan",
+            "6640": "nan",
+            "6645": "nan",
+            "6650": "nan",
+            "6655": "nan",
+            "6660": "nan",
+            "6665": "nan",
+            "6670": "nan",
+            "6675": "nan",
+            "6680": "nan",
+            "6685": "nan",
+            "6690": "nan",
+            "6695": "nan",
+            "6700": 1.50538,
+            "6705": "nan",
+            "6710": "nan",
+            "6715": "nan",
+            "6720": "nan",
+            "6725": "nan",
+            "6730": "nan",
+            "6735": "nan",
+            "6740": "nan",
+            "6745": "nan",
+            "6750": "nan",
+            "6755": "nan",
+            "6760": "nan",
+            "6765": "nan",
+            "6770": "nan",
+            "6775": "nan",
+            "6780": "nan",
+            "6785": "nan",
+            "6790": "nan",
+            "6795": "nan",
+            "6800": 1.41809,
+            "6805": "nan",
+            "6810": "nan",
+            "6815": "nan",
+            "6820": "nan",
+            "6825": "nan",
+            "6830": "nan",
+            "6835": "nan",
+            "6840": "nan",
+            "6845": "nan",
+            "6850": "nan",
+            "6855": "nan",
+            "6860": "nan",
+            "6865": "nan",
+            "6870": "nan",
+            "6875": "nan",
+            "6880": "nan",
+            "6885": "nan",
+            "6890": "nan",
+            "6895": "nan",
+            "6900": 1.41692,
+            "6905": "nan",
+            "6910": "nan",
+            "6915": "nan",
+            "6920": "nan",
+            "6925": "nan",
+            "6930": "nan",
+            "6935": "nan",
+            "6940": "nan",
+            "6945": "nan",
+            "6950": "nan",
+            "6955": "nan",
+            "6960": "nan",
+            "6965": "nan",
+            "6970": "nan",
+            "6975": "nan",
+            "6980": "nan",
+            "6985": "nan",
+            "6990": "nan",
+            "6995": "nan",
+            "7000": 1.41814,
+            "7005": "nan",
+            "7010": "nan",
+            "7015": "nan",
+            "7020": "nan",
+            "7025": "nan",
+            "7030": "nan",
+            "7035": "nan",
+            "7040": "nan",
+            "7045": "nan",
+            "7050": "nan",
+            "7055": "nan",
+            "7060": "nan",
+            "7065": "nan",
+            "7070": "nan",
+            "7075": "nan",
+            "7080": "nan",
+            "7085": "nan",
+            "7090": "nan",
+            "7095": "nan",
+            "7100": 1.41682,
+            "7105": "nan",
+            "7110": "nan",
+            "7115": "nan",
+            "7120": "nan",
+            "7125": "nan",
+            "7130": "nan",
+            "7135": "nan",
+            "7140": "nan",
+            "7145": "nan",
+            "7150": "nan",
+            "7155": "nan",
+            "7160": "nan",
+            "7165": "nan",
+            "7170": "nan",
+            "7175": "nan",
+            "7180": "nan",
+            "7185": "nan",
+            "7190": "nan",
+            "7195": "nan",
+            "7200": 1.41907,
+            "7205": "nan",
+            "7210": "nan",
+            "7215": "nan",
+            "7220": "nan",
+            "7225": "nan",
+            "7230": "nan",
+            "7235": "nan",
+            "7240": "nan",
+            "7245": "nan",
+            "7250": "nan",
+            "7255": "nan",
+            "7260": "nan",
+            "7265": "nan",
+            "7270": "nan",
+            "7275": "nan",
+            "7280": "nan",
+            "7285": "nan",
+            "7290": "nan",
+            "7295": "nan",
+            "7300": 1.44244,
+            "7305": "nan",
+            "7310": "nan",
+            "7315": "nan",
+            "7320": "nan",
+            "7325": "nan",
+            "7330": "nan",
+            "7335": "nan",
+            "7340": "nan",
+            "7345": "nan",
+            "7350": "nan",
+            "7355": "nan",
+            "7360": "nan",
+            "7365": "nan",
+            "7370": "nan",
+            "7375": "nan",
+            "7380": "nan",
+            "7385": "nan",
+            "7390": "nan",
+            "7395": "nan",
+            "7400": 1.41831,
+            "7405": "nan",
+            "7410": "nan",
+            "7415": "nan",
+            "7420": "nan",
+            "7425": "nan",
+            "7430": "nan",
+            "7435": "nan",
+            "7440": "nan",
+            "7445": "nan",
+            "7450": "nan",
+            "7455": "nan",
+            "7460": "nan",
+            "7465": "nan",
+            "7470": "nan",
+            "7475": "nan",
+            "7480": "nan",
+            "7485": "nan",
+            "7490": "nan",
+            "7495": "nan",
+            "7500": 1.41688,
+            "7505": "nan",
+            "7510": "nan",
+            "7515": "nan",
+            "7520": "nan",
+            "7525": "nan",
+            "7530": "nan",
+            "7535": "nan",
+            "7540": "nan",
+            "7545": "nan",
+            "7550": "nan",
+            "7555": "nan",
+            "7560": "nan",
+            "7565": "nan",
+            "7570": "nan",
+            "7575": "nan",
+            "7580": "nan",
+            "7585": "nan",
+            "7590": "nan",
+            "7595": "nan",
+            "7600": 1.41519,
+            "7605": "nan",
+            "7610": "nan",
+            "7615": "nan",
+            "7620": "nan",
+            "7625": "nan",
+            "7630": "nan",
+            "7635": "nan",
+            "7640": "nan",
+            "7645": "nan",
+            "7650": "nan",
+            "7655": "nan",
+            "7660": "nan",
+            "7665": "nan",
+            "7670": "nan",
+            "7675": "nan",
+            "7680": "nan",
+            "7685": "nan",
+            "7690": "nan",
+            "7695": "nan",
+            "7700": 1.41777,
+            "7705": "nan",
+            "7710": "nan",
+            "7715": "nan",
+            "7720": "nan",
+            "7725": "nan",
+            "7730": "nan",
+            "7735": "nan",
+            "7740": "nan",
+            "7745": "nan",
+            "7750": "nan",
+            "7755": "nan",
+            "7760": "nan",
+            "7765": "nan",
+            "7770": "nan",
+            "7775": "nan",
+            "7780": "nan",
+            "7785": "nan",
+            "7790": "nan",
+            "7795": "nan",
+            "7800": 1.41752,
+            "7805": "nan",
+            "7810": "nan",
+            "7815": "nan",
+            "7820": "nan",
+            "7825": "nan",
+            "7830": "nan",
+            "7835": "nan",
+            "7840": "nan",
+            "7845": "nan",
+            "7850": "nan",
+            "7855": "nan",
+            "7860": "nan",
+            "7865": "nan",
+            "7870": "nan",
+            "7875": "nan",
+            "7880": "nan",
+            "7885": "nan",
+            "7890": "nan",
+            "7895": "nan",
+            "7900": 1.41496,
+            "7905": "nan",
+            "7910": "nan",
+            "7915": "nan",
+            "7920": "nan",
+            "7925": "nan",
+            "7930": "nan",
+            "7935": "nan",
+            "7940": "nan",
+            "7945": "nan",
+            "7950": "nan",
+            "7955": "nan",
+            "7960": "nan",
+            "7965": "nan",
+            "7970": "nan",
+            "7975": "nan",
+            "7980": "nan",
+            "7985": "nan",
+            "7990": "nan",
+            "7995": "nan",
+            "8000": 1.41445,
+            "8005": "nan",
+            "8010": "nan",
+            "8015": "nan",
+            "8020": "nan",
+            "8025": "nan",
+            "8030": "nan",
+            "8035": "nan",
+            "8040": "nan",
+            "8045": "nan",
+            "8050": "nan",
+            "8055": "nan",
+            "8060": "nan",
+            "8065": "nan",
+            "8070": "nan",
+            "8075": "nan",
+            "8080": "nan",
+            "8085": "nan",
+            "8090": "nan",
+            "8095": "nan",
+            "8100": 1.41546,
+            "8105": "nan",
+            "8110": "nan",
+            "8115": "nan",
+            "8120": "nan",
+            "8125": "nan",
+            "8130": "nan",
+            "8135": "nan",
+            "8140": "nan",
+            "8145": "nan",
+            "8150": "nan",
+            "8155": "nan",
+            "8160": "nan",
+            "8165": "nan",
+            "8170": "nan",
+            "8175": "nan",
+            "8180": "nan",
+            "8185": "nan",
+            "8190": "nan",
+            "8195": "nan",
+            "8200": 1.41496,
+            "8205": "nan",
+            "8210": "nan",
+            "8215": "nan",
+            "8220": "nan",
+            "8225": "nan",
+            "8230": "nan",
+            "8235": "nan",
+            "8240": "nan",
+            "8245": "nan",
+            "8250": "nan",
+            "8255": "nan",
+            "8260": "nan",
+            "8265": "nan",
+            "8270": "nan",
+            "8275": "nan",
+            "8280": "nan",
+            "8285": "nan",
+            "8290": "nan",
+            "8295": "nan",
+            "8300": 1.41627,
+            "8305": "nan",
+            "8310": "nan",
+            "8315": "nan",
+            "8320": "nan",
+            "8325": "nan",
+            "8330": "nan",
+            "8335": "nan",
+            "8340": "nan",
+            "8345": "nan",
+            "8350": "nan",
+            "8355": "nan",
+            "8360": "nan",
+            "8365": "nan",
+            "8370": "nan",
+            "8375": "nan",
+            "8380": "nan",
+            "8385": "nan",
+            "8390": "nan",
+            "8395": "nan",
+            "8400": 1.41435,
+            "8405": "nan",
+            "8410": "nan",
+            "8415": "nan",
+            "8420": "nan",
+            "8425": "nan",
+            "8430": "nan",
+            "8435": "nan",
+            "8440": "nan",
+            "8445": "nan",
+            "8450": "nan",
+            "8455": "nan",
+            "8460": "nan",
+            "8465": "nan",
+            "8470": "nan",
+            "8475": "nan",
+            "8480": "nan",
+            "8485": "nan",
+            "8490": "nan",
+            "8495": "nan",
+            "8500": 1.41723,
+            "8505": "nan",
+            "8510": "nan",
+            "8515": "nan",
+            "8520": "nan",
+            "8525": "nan",
+            "8530": "nan",
+            "8535": "nan",
+            "8540": "nan",
+            "8545": "nan",
+            "8550": "nan",
+            "8555": "nan",
+            "8560": "nan",
+            "8565": "nan",
+            "8570": "nan",
+            "8575": "nan",
+            "8580": "nan",
+            "8585": "nan",
+            "8590": "nan",
+            "8595": "nan",
+            "8600": 1.41437,
+            "8605": "nan",
+            "8610": "nan",
+            "8615": "nan",
+            "8620": "nan",
+            "8625": "nan",
+            "8630": "nan",
+            "8635": "nan",
+            "8640": "nan",
+            "8645": "nan",
+            "8650": "nan",
+            "8655": "nan",
+            "8660": "nan",
+            "8665": "nan",
+            "8670": "nan",
+            "8675": "nan",
+            "8680": "nan",
+            "8685": "nan",
+            "8690": "nan",
+            "8695": "nan",
+            "8700": 1.41717,
+            "8705": "nan",
+            "8710": "nan",
+            "8715": "nan",
+            "8720": "nan",
+            "8725": "nan",
+            "8730": "nan",
+            "8735": "nan",
+            "8740": "nan",
+            "8745": "nan",
+            "8750": "nan",
+            "8755": "nan",
+            "8760": "nan",
+            "8765": "nan",
+            "8770": "nan",
+            "8775": "nan",
+            "8780": "nan",
+            "8785": "nan",
+            "8790": "nan",
+            "8795": "nan",
+            "8800": 1.41626,
+            "8805": "nan",
+            "8810": "nan",
+            "8815": "nan",
+            "8820": "nan",
+            "8825": "nan",
+            "8830": "nan",
+            "8835": "nan",
+            "8840": "nan",
+            "8845": "nan",
+            "8850": "nan",
+            "8855": "nan",
+            "8860": "nan",
+            "8865": "nan",
+            "8870": "nan",
+            "8875": "nan",
+            "8880": "nan",
+            "8885": "nan",
+            "8890": "nan",
+            "8895": "nan",
+            "8900": 1.72345,
+            "8905": "nan",
+            "8910": "nan",
+            "8915": "nan",
+            "8920": "nan",
+            "8925": "nan",
+            "8930": "nan",
+            "8935": "nan",
+            "8940": "nan",
+            "8945": "nan",
+            "8950": "nan",
+            "8955": "nan",
+            "8960": "nan",
+            "8965": "nan",
+            "8970": "nan",
+            "8975": "nan",
+            "8980": "nan",
+            "8985": "nan",
+            "8990": "nan",
+            "8995": "nan",
+            "9000": 1.42136,
+            "9005": "nan",
+            "9010": "nan",
+            "9015": "nan",
+            "9020": "nan",
+            "9025": "nan",
+            "9030": "nan",
+            "9035": "nan",
+            "9040": "nan",
+            "9045": "nan",
+            "9050": "nan",
+            "9055": "nan",
+            "9060": "nan",
+            "9065": "nan",
+            "9070": "nan",
+            "9075": "nan",
+            "9080": "nan",
+            "9085": "nan",
+            "9090": "nan",
+            "9095": "nan",
+            "9100": 1.41472,
+            "9105": "nan",
+            "9110": "nan",
+            "9115": "nan",
+            "9120": "nan",
+            "9125": "nan",
+            "9130": "nan",
+            "9135": "nan",
+            "9140": "nan",
+            "9145": "nan",
+            "9150": "nan",
+            "9155": "nan",
+            "9160": "nan",
+            "9165": "nan",
+            "9170": "nan",
+            "9175": "nan",
+            "9180": "nan",
+            "9185": "nan",
+            "9190": "nan",
+            "9195": "nan",
+            "9200": 1.41768,
+            "9205": "nan",
+            "9210": "nan",
+            "9215": "nan",
+            "9220": "nan",
+            "9225": "nan",
+            "9230": "nan",
+            "9235": "nan",
+            "9240": "nan",
+            "9245": "nan",
+            "9250": "nan",
+            "9255": "nan",
+            "9260": "nan",
+            "9265": "nan",
+            "9270": "nan",
+            "9275": "nan",
+            "9280": "nan",
+            "9285": "nan",
+            "9290": "nan",
+            "9295": "nan",
+            "9300": 1.42052,
+            "9305": "nan",
+            "9310": "nan",
+            "9315": "nan",
+            "9320": "nan",
+            "9325": "nan",
+            "9330": "nan",
+            "9335": "nan",
+            "9340": "nan",
+            "9345": "nan",
+            "9350": "nan",
+            "9355": "nan",
+            "9360": "nan",
+            "9365": "nan",
+            "9370": "nan",
+            "9375": "nan",
+            "9380": "nan",
+            "9385": "nan",
+            "9390": "nan",
+            "9395": "nan",
+            "9400": 1.41618,
+            "9405": "nan",
+            "9410": "nan",
+            "9415": "nan",
+            "9420": "nan",
+            "9425": "nan",
+            "9430": "nan",
+            "9435": "nan",
+            "9440": "nan",
+            "9445": "nan",
+            "9450": "nan",
+            "9455": "nan",
+            "9460": "nan",
+            "9465": "nan",
+            "9470": "nan",
+            "9475": "nan",
+            "9480": "nan",
+            "9485": "nan",
+            "9490": "nan",
+            "9495": "nan",
+            "9500": 1.41637,
+            "9505": "nan",
+            "9510": "nan",
+            "9515": "nan",
+            "9520": "nan",
+            "9525": "nan",
+            "9530": "nan",
+            "9535": "nan",
+            "9540": "nan",
+            "9545": "nan",
+            "9550": "nan",
+            "9555": "nan",
+            "9560": "nan",
+            "9565": "nan",
+            "9570": "nan",
+            "9575": "nan",
+            "9580": "nan",
+            "9585": "nan",
+            "9590": "nan",
+            "9595": "nan",
+            "9600": 1.41511,
+            "9605": "nan",
+            "9610": "nan",
+            "9615": "nan",
+            "9620": "nan",
+            "9625": "nan",
+            "9630": "nan",
+            "9635": "nan",
+            "9640": "nan",
+            "9645": "nan",
+            "9650": "nan",
+            "9655": "nan",
+            "9660": "nan",
+            "9665": "nan",
+            "9670": "nan",
+            "9675": "nan",
+            "9680": "nan",
+            "9685": "nan",
+            "9690": "nan",
+            "9695": "nan",
+            "9700": 1.41737,
+            "9705": "nan",
+            "9710": "nan",
+            "9715": "nan",
+            "9720": "nan",
+            "9725": "nan",
+            "9730": "nan",
+            "9735": "nan",
+            "9740": "nan",
+            "9745": "nan",
+            "9750": "nan",
+            "9755": "nan",
+            "9760": "nan",
+            "9765": "nan",
+            "9770": "nan",
+            "9775": "nan",
+            "9780": "nan",
+            "9785": "nan",
+            "9790": "nan",
+            "9795": "nan",
+            "9800": 1.41703,
+            "9805": "nan",
+            "9810": "nan",
+            "9815": "nan",
+            "9820": "nan",
+            "9825": "nan",
+            "9830": "nan",
+            "9835": "nan",
+            "9840": "nan",
+            "9845": "nan",
+            "9850": "nan",
+            "9855": "nan",
+            "9860": "nan",
+            "9865": "nan",
+            "9870": "nan",
+            "9875": "nan",
+            "9880": "nan",
+            "9885": "nan",
+            "9890": "nan",
+            "9895": "nan",
+            "9900": 1.4201,
+            "9905": "nan",
+            "9910": "nan",
+            "9915": "nan",
+            "9920": "nan",
+            "9925": "nan",
+            "9930": "nan",
+            "9935": "nan",
+            "9940": "nan",
+            "9945": "nan",
+            "9950": "nan",
+            "9955": "nan",
+            "9960": "nan",
+            "9965": "nan",
+            "9970": "nan",
+            "9975": "nan",
+            "9980": "nan",
+            "9985": "nan",
+            "9990": "nan",
+            "9995": "nan",
+            "10000": 1.42216,
+            "10005": "nan",
+            "10010": "nan",
+            "10015": "nan",
+            "10020": "nan",
+            "10025": "nan",
+            "10030": "nan",
+            "10035": "nan",
+            "10040": "nan",
+            "10045": "nan",
+            "10050": "nan",
+            "10055": "nan",
+            "10060": "nan",
+            "10065": "nan",
+            "10070": "nan",
+            "10075": "nan",
+            "10080": "nan",
+            "10085": "nan",
+            "10090": "nan",
+            "10095": "nan",
+            "10100": 1.43065,
+            "10105": "nan",
+            "10110": "nan",
+            "10115": "nan",
+            "10120": "nan",
+            "10125": "nan",
+            "10130": "nan",
+            "10135": "nan",
+            "10140": "nan",
+            "10145": "nan",
+            "10150": "nan",
+            "10155": "nan",
+            "10160": "nan",
+            "10165": "nan",
+            "10170": "nan",
+            "10175": "nan",
+            "10180": "nan",
+            "10185": "nan",
+            "10190": "nan",
+            "10195": "nan",
+            "10200": 1.41657
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/mixtral/deepseekv3_proxy_flex_tp1pp4emp16etp1cp1_release_sm/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/mixtral/deepseekv3_proxy_flex_tp1pp4emp16etp1cp1_release_sm/golden_values_dev_dgx_h100.json
new file mode 100644
index 00000000000..f486950e5a2
--- /dev/null
+++ b/tests/functional_tests/test_cases/mixtral/deepseekv3_proxy_flex_tp1pp4emp16etp1cp1_release_sm/golden_values_dev_dgx_h100.json
@@ -0,0 +1,11492 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 9535,
+        "step_interval": 5,
+        "values": {
+            "1": 13.89756,
+            "5": 13.89155,
+            "10": 13.85814,
+            "15": 13.84947,
+            "20": 13.74128,
+            "25": 13.71269,
+            "30": 13.39136,
+            "35": 13.32418,
+            "40": 13.23329,
+            "45": 13.12045,
+            "50": 12.53632,
+            "55": 12.35058,
+            "60": 12.17187,
+            "65": 12.01029,
+            "70": 11.83519,
+            "75": 11.55823,
+            "80": 11.30557,
+            "85": 11.11711,
+            "90": 10.96045,
+            "95": 10.79835,
+            "100": 10.58719,
+            "105": 10.45871,
+            "110": 10.23985,
+            "115": 10.03197,
+            "120": 9.88087,
+            "125": 9.74001,
+            "130": 9.64895,
+            "135": 9.58316,
+            "140": 9.34895,
+            "145": 9.3363,
+            "150": 9.17736,
+            "155": 9.11162,
+            "160": 9.02957,
+            "165": 8.91504,
+            "170": 8.86399,
+            "175": 8.82531,
+            "180": 8.68067,
+            "185": 8.72019,
+            "190": 8.59287,
+            "195": 8.59803,
+            "200": 8.48665,
+            "205": 8.39681,
+            "210": 8.35424,
+            "215": 8.40636,
+            "220": 8.27837,
+            "225": 8.29496,
+            "230": 8.27773,
+            "235": 8.20463,
+            "240": 8.15385,
+            "245": 8.1344,
+            "250": 8.06891,
+            "255": 8.08354,
+            "260": 7.97761,
+            "265": 7.96264,
+            "270": 7.91745,
+            "275": 7.9055,
+            "280": 7.89502,
+            "285": 7.91233,
+            "290": 7.858,
+            "295": 7.84326,
+            "300": 7.73922,
+            "305": 7.73479,
+            "310": 7.6998,
+            "315": 7.6959,
+            "320": 7.68835,
+            "325": 7.60857,
+            "330": 7.59888,
+            "335": 7.57833,
+            "340": 7.62257,
+            "345": 7.51187,
+            "350": 7.5063,
+            "355": 7.43406,
+            "360": 7.53414,
+            "365": 7.45759,
+            "370": 7.49186,
+            "375": 7.43607,
+            "380": 7.41292,
+            "385": 7.41117,
+            "390": 7.42986,
+            "395": 7.36781,
+            "400": 7.30747,
+            "405": 7.31834,
+            "410": 7.30943,
+            "415": 7.29421,
+            "420": 7.2965,
+            "425": 7.26158,
+            "430": 7.20979,
+            "435": 7.22197,
+            "440": 7.18512,
+            "445": 7.1687,
+            "450": 7.12181,
+            "455": 7.14062,
+            "460": 7.11041,
+            "465": 7.10497,
+            "470": 7.07645,
+            "475": 7.09742,
+            "480": 6.97587,
+            "485": 7.03312,
+            "490": 6.99478,
+            "495": 6.9692,
+            "500": 6.91435,
+            "505": 6.94713,
+            "510": 6.92309,
+            "515": 6.88853,
+            "520": 6.88024,
+            "525": 6.87529,
+            "530": 6.88311,
+            "535": 6.8642,
+            "540": 6.78769,
+            "545": 6.8252,
+            "550": 6.84568,
+            "555": 6.86869,
+            "560": 6.81372,
+            "565": 6.74969,
+            "570": 6.76579,
+            "575": 6.77872,
+            "580": 6.69766,
+            "585": 6.71359,
+            "590": 6.65449,
+            "595": 6.64792,
+            "600": 6.67016,
+            "605": 6.65924,
+            "610": 6.63641,
+            "615": 6.68438,
+            "620": 6.60355,
+            "625": 6.57203,
+            "630": 6.56964,
+            "635": 6.60732,
+            "640": 6.59738,
+            "645": 6.5815,
+            "650": 6.62582,
+            "655": 6.62475,
+            "660": 6.53171,
+            "665": 6.52224,
+            "670": 6.47146,
+            "675": 6.57058,
+            "680": 6.53989,
+            "685": 6.49695,
+            "690": 6.47037,
+            "695": 6.43685,
+            "700": 6.43121,
+            "705": 6.4313,
+            "710": 6.46058,
+            "715": 6.46842,
+            "720": 6.35254,
+            "725": 6.40344,
+            "730": 6.39123,
+            "735": 6.41174,
+            "740": 6.34886,
+            "745": 6.31567,
+            "750": 6.37227,
+            "755": 6.29068,
+            "760": 6.30783,
+            "765": 6.32016,
+            "770": 6.31539,
+            "775": 6.3051,
+            "780": 6.27484,
+            "785": 6.28635,
+            "790": 6.25066,
+            "795": 6.24498,
+            "800": 6.22595,
+            "805": 6.30241,
+            "810": 6.16125,
+            "815": 6.18921,
+            "820": 6.19984,
+            "825": 6.20878,
+            "830": 6.21184,
+            "835": 6.16547,
+            "840": 6.13918,
+            "845": 6.18907,
+            "850": 6.14544,
+            "855": 6.14245,
+            "860": 6.12573,
+            "865": 6.14471,
+            "870": 6.103,
+            "875": 6.14755,
+            "880": 6.09503,
+            "885": 6.08625,
+            "890": 6.14906,
+            "895": 6.03612,
+            "900": 6.06033,
+            "905": 6.07119,
+            "910": 6.04765,
+            "915": 6.02795,
+            "920": 6.01922,
+            "925": 6.00762,
+            "930": 6.04202,
+            "935": 6.03448,
+            "940": 5.96552,
+            "945": 6.00691,
+            "950": 6.02802,
+            "955": 5.9757,
+            "960": 5.9732,
+            "965": 5.8947,
+            "970": 5.93848,
+            "975": 5.94046,
+            "980": 5.91694,
+            "985": 5.91057,
+            "990": 5.96163,
+            "995": 5.87028,
+            "1000": 5.89819,
+            "1005": 5.85552,
+            "1010": 5.89001,
+            "1015": 5.91011,
+            "1020": 5.82121,
+            "1025": 5.81525,
+            "1030": 5.82852,
+            "1035": 5.91121,
+            "1040": 5.83477,
+            "1045": 5.80641,
+            "1050": 5.84029,
+            "1055": 5.82471,
+            "1060": 5.77657,
+            "1065": 5.75965,
+            "1070": 5.80228,
+            "1075": 5.78852,
+            "1080": 5.77993,
+            "1085": 5.79347,
+            "1090": 5.7642,
+            "1095": 5.77727,
+            "1100": 5.73679,
+            "1105": 5.71252,
+            "1110": 5.76864,
+            "1115": 5.69994,
+            "1120": 5.64073,
+            "1125": 5.65212,
+            "1130": 5.71653,
+            "1135": 5.67194,
+            "1140": 5.66144,
+            "1145": 5.65572,
+            "1150": 5.68319,
+            "1155": 5.64543,
+            "1160": 5.63371,
+            "1165": 5.67226,
+            "1170": 5.65589,
+            "1175": 5.62136,
+            "1180": 5.63006,
+            "1185": 5.6181,
+            "1190": 5.60413,
+            "1195": 5.59825,
+            "1200": 5.54202,
+            "1205": 5.65572,
+            "1210": 5.51312,
+            "1215": 5.55359,
+            "1220": 5.63431,
+            "1225": 5.51403,
+            "1230": 5.56754,
+            "1235": 5.521,
+            "1240": 5.55808,
+            "1245": 5.52886,
+            "1250": 5.51046,
+            "1255": 5.50279,
+            "1260": 5.50208,
+            "1265": 5.47964,
+            "1270": 5.44537,
+            "1275": 5.52448,
+            "1280": 5.45447,
+            "1285": 5.4682,
+            "1290": 5.43648,
+            "1295": 5.46181,
+            "1300": 5.46016,
+            "1305": 5.43278,
+            "1310": 5.38271,
+            "1315": 5.44073,
+            "1320": 5.42393,
+            "1325": 5.3568,
+            "1330": 5.41966,
+            "1335": 5.39498,
+            "1340": 5.44678,
+            "1345": 5.4046,
+            "1350": 5.3745,
+            "1355": 5.36722,
+            "1360": 5.37555,
+            "1365": 5.38819,
+            "1370": 5.31687,
+            "1375": 5.3257,
+            "1380": 5.37435,
+            "1385": 5.33822,
+            "1390": 5.32907,
+            "1395": 5.35996,
+            "1400": 5.34708,
+            "1405": 5.32768,
+            "1410": 5.30321,
+            "1415": 5.26874,
+            "1420": 5.31115,
+            "1425": 5.3045,
+            "1430": 5.33954,
+            "1435": 5.24914,
+            "1440": 5.27894,
+            "1445": 5.31118,
+            "1450": 5.28087,
+            "1455": 5.30455,
+            "1460": 5.26455,
+            "1465": 5.26355,
+            "1470": 5.29615,
+            "1475": 5.27116,
+            "1480": 5.26692,
+            "1485": 5.21939,
+            "1490": 5.21283,
+            "1495": 5.23155,
+            "1500": 5.23275,
+            "1505": 5.20436,
+            "1510": 5.22447,
+            "1515": 5.15502,
+            "1520": 5.1852,
+            "1525": 5.15413,
+            "1530": 5.17452,
+            "1535": 5.16098,
+            "1540": 5.16276,
+            "1545": 5.19593,
+            "1550": 5.1989,
+            "1555": 5.18478,
+            "1560": 5.1253,
+            "1565": 5.15973,
+            "1570": 5.17281,
+            "1575": 5.1468,
+            "1580": 5.16002,
+            "1585": 5.14495,
+            "1590": 5.12815,
+            "1595": 5.09691,
+            "1600": 5.17173,
+            "1605": 5.09626,
+            "1610": 5.10506,
+            "1615": 5.09978,
+            "1620": 5.1145,
+            "1625": 5.10983,
+            "1630": 5.08211,
+            "1635": 5.12902,
+            "1640": 5.09565,
+            "1645": 5.08916,
+            "1650": 5.08067,
+            "1655": 5.06625,
+            "1660": 5.05546,
+            "1665": 5.04609,
+            "1670": 5.06711,
+            "1675": 5.06871,
+            "1680": 5.00775,
+            "1685": 5.01672,
+            "1690": 4.99799,
+            "1695": 5.00065,
+            "1700": 5.03983,
+            "1705": 5.01824,
+            "1710": 5.00629,
+            "1715": 4.97587,
+            "1720": 4.97437,
+            "1725": 4.9984,
+            "1730": 4.95014,
+            "1735": 5.02541,
+            "1740": 4.95266,
+            "1745": 4.97461,
+            "1750": 4.95639,
+            "1755": 4.97133,
+            "1760": 4.98489,
+            "1765": 4.93728,
+            "1770": 4.93343,
+            "1775": 4.9432,
+            "1780": 4.96314,
+            "1785": 4.91574,
+            "1790": 4.93944,
+            "1795": 4.93848,
+            "1800": 4.88725,
+            "1805": 4.87771,
+            "1810": 4.8976,
+            "1815": 4.89801,
+            "1820": 4.8872,
+            "1825": 4.89371,
+            "1830": 4.8786,
+            "1835": 4.87542,
+            "1840": 4.87209,
+            "1845": 4.85811,
+            "1850": 4.83484,
+            "1855": 4.89133,
+            "1860": 4.84322,
+            "1865": 4.85108,
+            "1870": 4.82648,
+            "1875": 4.83877,
+            "1880": 4.89485,
+            "1885": 4.84392,
+            "1890": 4.8281,
+            "1895": 4.77339,
+            "1900": 4.81423,
+            "1905": 4.81232,
+            "1910": 4.82991,
+            "1915": 4.79768,
+            "1920": 4.78308,
+            "1925": 4.79277,
+            "1930": 4.76544,
+            "1935": 4.7941,
+            "1940": 4.75875,
+            "1945": 4.80214,
+            "1950": 4.83843,
+            "1955": 4.77731,
+            "1960": 4.76768,
+            "1965": 4.72596,
+            "1970": 4.73388,
+            "1975": 4.7973,
+            "1980": 4.73036,
+            "1985": 4.74162,
+            "1990": 4.78353,
+            "1995": 4.74959,
+            "2000": 4.76948,
+            "2005": 4.80113,
+            "2010": 4.70951,
+            "2015": 4.69715,
+            "2020": 4.71284,
+            "2025": 4.75821,
+            "2030": 4.68831,
+            "2035": 4.71528,
+            "2040": 4.67772,
+            "2045": 4.76255,
+            "2050": 4.74404,
+            "2055": 4.7077,
+            "2060": 4.70614,
+            "2065": 4.66526,
+            "2070": 4.67653,
+            "2075": 4.69507,
+            "2080": 4.66174,
+            "2085": 4.69911,
+            "2090": 4.61739,
+            "2095": 4.64746,
+            "2100": 4.61666,
+            "2105": 4.64633,
+            "2110": 4.64123,
+            "2115": 4.65336,
+            "2120": 4.64559,
+            "2125": 4.61059,
+            "2130": 4.61466,
+            "2135": 4.62745,
+            "2140": 4.6232,
+            "2145": 4.58124,
+            "2150": 4.60983,
+            "2155": 4.57956,
+            "2160": 4.60382,
+            "2165": 4.58415,
+            "2170": 4.61387,
+            "2175": 4.60275,
+            "2180": 4.59531,
+            "2185": 4.60788,
+            "2190": 4.58246,
+            "2195": 4.55672,
+            "2200": 4.55346,
+            "2205": 4.56383,
+            "2210": 4.6146,
+            "2215": 4.64276,
+            "2220": 4.59912,
+            "2225": 4.57263,
+            "2230": 4.56854,
+            "2235": 4.61797,
+            "2240": 4.51401,
+            "2245": 4.5176,
+            "2250": 4.52905,
+            "2255": 4.54117,
+            "2260": 4.48536,
+            "2265": 4.56489,
+            "2270": 4.49655,
+            "2275": 4.55547,
+            "2280": 4.51075,
+            "2285": 4.53333,
+            "2290": 4.52269,
+            "2295": 4.52707,
+            "2300": 4.53228,
+            "2305": 4.49287,
+            "2310": 4.53148,
+            "2315": 4.46329,
+            "2320": 4.51121,
+            "2325": 4.49336,
+            "2330": 4.49351,
+            "2335": 4.47787,
+            "2340": 4.48626,
+            "2345": 4.52525,
+            "2350": 4.4674,
+            "2355": 4.47173,
+            "2360": 4.44099,
+            "2365": 4.44682,
+            "2370": 4.44716,
+            "2375": 4.44199,
+            "2380": 4.39487,
+            "2385": 4.43475,
+            "2390": 4.43071,
+            "2395": 4.46719,
+            "2400": 4.42074,
+            "2405": 4.40081,
+            "2410": 4.44955,
+            "2415": 4.42055,
+            "2420": 4.4293,
+            "2425": 4.39783,
+            "2430": 4.42084,
+            "2435": 4.40291,
+            "2440": 4.39501,
+            "2445": 4.40808,
+            "2450": 4.38239,
+            "2455": 4.4178,
+            "2460": 4.36606,
+            "2465": 4.41327,
+            "2470": 4.40023,
+            "2475": 4.41776,
+            "2480": 4.34092,
+            "2485": 4.37423,
+            "2490": 4.37838,
+            "2495": 4.35662,
+            "2500": 4.36528,
+            "2505": 4.37219,
+            "2510": 4.41251,
+            "2515": 4.40356,
+            "2520": 4.34516,
+            "2525": 4.36214,
+            "2530": 4.36786,
+            "2535": 4.36686,
+            "2540": 4.36548,
+            "2545": 4.37687,
+            "2550": 4.30337,
+            "2555": 4.37244,
+            "2560": 4.35158,
+            "2565": 4.30393,
+            "2570": 4.33393,
+            "2575": 4.30697,
+            "2580": 4.30582,
+            "2585": 4.29358,
+            "2590": 4.31272,
+            "2595": 4.28154,
+            "2600": 4.29867,
+            "2605": 4.31115,
+            "2610": 4.32106,
+            "2615": 4.27768,
+            "2620": 4.26935,
+            "2625": 4.30437,
+            "2630": 4.22434,
+            "2635": 4.30369,
+            "2640": 4.30012,
+            "2645": 4.2581,
+            "2650": 4.28639,
+            "2655": 4.26647,
+            "2660": 4.21474,
+            "2665": 4.30436,
+            "2670": 4.26382,
+            "2675": 4.2306,
+            "2680": 4.25227,
+            "2685": 4.25736,
+            "2690": 4.22986,
+            "2695": 4.28379,
+            "2700": 4.19098,
+            "2705": 4.23853,
+            "2710": 4.25092,
+            "2715": 4.23481,
+            "2720": 4.24356,
+            "2725": 4.2225,
+            "2730": 4.22941,
+            "2735": 4.22363,
+            "2740": 4.20346,
+            "2745": 4.18765,
+            "2750": 4.21101,
+            "2755": 4.22237,
+            "2760": 4.22902,
+            "2765": 4.18298,
+            "2770": 4.23755,
+            "2775": 4.17706,
+            "2780": 4.21186,
+            "2785": 4.19469,
+            "2790": 4.21736,
+            "2795": 4.18988,
+            "2800": 4.1159,
+            "2805": 4.16613,
+            "2810": 4.17076,
+            "2815": 4.15389,
+            "2820": 4.1969,
+            "2825": 4.19241,
+            "2830": 4.16864,
+            "2835": 4.17046,
+            "2840": 4.16148,
+            "2845": 4.14967,
+            "2850": 4.16619,
+            "2855": 4.11805,
+            "2860": 4.14572,
+            "2865": 4.17023,
+            "2870": 4.14096,
+            "2875": 4.1596,
+            "2880": 4.08582,
+            "2885": 4.14242,
+            "2890": 4.11503,
+            "2895": 4.15452,
+            "2900": 4.09735,
+            "2905": 4.11101,
+            "2910": 4.10798,
+            "2915": 4.14914,
+            "2920": 4.12546,
+            "2925": 4.10099,
+            "2930": 4.08522,
+            "2935": 4.07896,
+            "2940": 4.09225,
+            "2945": 4.06113,
+            "2950": 4.03479,
+            "2955": 4.03763,
+            "2960": 4.04955,
+            "2965": 4.0643,
+            "2970": 4.08593,
+            "2975": 4.0941,
+            "2980": 4.03102,
+            "2985": 4.07394,
+            "2990": 4.08923,
+            "2995": 4.03231,
+            "3000": 4.0436,
+            "3005": 4.02568,
+            "3010": 4.06747,
+            "3015": 4.02305,
+            "3020": 4.03992,
+            "3025": 4.02491,
+            "3030": 4.0567,
+            "3035": 4.04059,
+            "3040": 4.0544,
+            "3045": 4.04677,
+            "3050": 4.017,
+            "3055": 4.00507,
+            "3060": 3.9904,
+            "3065": 4.02281,
+            "3070": 4.03826,
+            "3075": 3.97211,
+            "3080": 4.0011,
+            "3085": 4.00548,
+            "3090": 4.00887,
+            "3095": 4.02745,
+            "3100": 4.01465,
+            "3105": 3.99035,
+            "3110": 3.99124,
+            "3115": 3.92509,
+            "3120": 4.00505,
+            "3125": 3.94183,
+            "3130": 3.96987,
+            "3135": 3.96132,
+            "3140": 3.95209,
+            "3145": 3.93524,
+            "3150": 3.96949,
+            "3155": 3.96213,
+            "3160": 3.96255,
+            "3165": 3.96146,
+            "3170": 3.96456,
+            "3175": 3.93165,
+            "3180": 3.93784,
+            "3185": 3.90234,
+            "3190": 3.92455,
+            "3195": 3.9116,
+            "3200": 3.89013,
+            "3205": 3.92029,
+            "3210": 3.89711,
+            "3215": 3.90569,
+            "3220": 3.89706,
+            "3225": 3.91097,
+            "3230": 3.89895,
+            "3235": 3.91122,
+            "3240": 3.88912,
+            "3245": 3.88902,
+            "3250": 3.84407,
+            "3255": 3.89259,
+            "3260": 3.88283,
+            "3265": 3.92603,
+            "3270": 3.9052,
+            "3275": 3.85915,
+            "3280": 3.88232,
+            "3285": 3.86652,
+            "3290": 3.86681,
+            "3295": 3.83806,
+            "3300": 3.85349,
+            "3305": 3.86048,
+            "3310": 3.85872,
+            "3315": 3.89673,
+            "3320": 3.85179,
+            "3325": 3.84353,
+            "3330": 3.82539,
+            "3335": 3.86213,
+            "3340": 3.81824,
+            "3345": 3.83129,
+            "3350": 3.85901,
+            "3355": 3.8452,
+            "3360": 3.83241,
+            "3365": 3.83682,
+            "3370": 3.82265,
+            "3375": 3.85232,
+            "3380": 3.79563,
+            "3385": 3.81353,
+            "3390": 3.79143,
+            "3395": 3.86888,
+            "3400": 3.83997,
+            "3405": 3.86197,
+            "3410": 3.77529,
+            "3415": 3.72916,
+            "3420": 3.80048,
+            "3425": 3.81237,
+            "3430": 3.84497,
+            "3435": 3.80796,
+            "3440": 3.8267,
+            "3445": 3.7742,
+            "3450": 3.78787,
+            "3455": 3.80217,
+            "3460": 3.78265,
+            "3465": 3.75891,
+            "3470": 3.77341,
+            "3475": 3.77638,
+            "3480": 3.77988,
+            "3485": 3.80588,
+            "3490": 3.76958,
+            "3495": 3.80315,
+            "3500": 3.77047,
+            "3505": 3.77239,
+            "3510": 3.75092,
+            "3515": 3.80896,
+            "3520": 3.79879,
+            "3525": 3.76372,
+            "3530": 3.75322,
+            "3535": 3.76209,
+            "3540": 3.81796,
+            "3545": 3.72915,
+            "3550": 3.79201,
+            "3555": 3.72604,
+            "3560": 3.78622,
+            "3565": 3.7451,
+            "3570": 3.74254,
+            "3575": 3.71868,
+            "3580": 3.77066,
+            "3585": 3.76174,
+            "3590": 3.68853,
+            "3595": 3.76509,
+            "3600": 3.71336,
+            "3605": 3.71948,
+            "3610": 3.70916,
+            "3615": 3.74868,
+            "3620": 3.7837,
+            "3625": 3.71964,
+            "3630": 3.76519,
+            "3635": 3.68617,
+            "3640": 3.7093,
+            "3645": 3.74263,
+            "3650": 3.69638,
+            "3655": 3.72074,
+            "3660": 3.72832,
+            "3665": 3.74694,
+            "3670": 3.71178,
+            "3675": 3.71065,
+            "3680": 3.72416,
+            "3685": 3.67473,
+            "3690": 3.6936,
+            "3695": 3.68528,
+            "3700": 3.70814,
+            "3705": 3.67651,
+            "3710": 3.68493,
+            "3715": 3.6842,
+            "3720": 3.66563,
+            "3725": 3.64716,
+            "3730": 3.64883,
+            "3735": 3.68782,
+            "3740": 3.6732,
+            "3745": 3.66354,
+            "3750": 3.6757,
+            "3755": 3.66351,
+            "3760": 3.67285,
+            "3765": 3.66004,
+            "3770": 3.6516,
+            "3775": 3.63831,
+            "3780": 3.62453,
+            "3785": 3.6765,
+            "3790": 3.60163,
+            "3795": 3.64291,
+            "3800": 3.63275,
+            "3805": 3.62032,
+            "3810": 3.59475,
+            "3815": 3.63585,
+            "3820": 3.64099,
+            "3825": 3.6535,
+            "3830": 3.63864,
+            "3835": 3.59938,
+            "3840": 3.67685,
+            "3845": 3.65895,
+            "3850": 3.60064,
+            "3855": 3.60428,
+            "3860": 3.65711,
+            "3865": 3.60867,
+            "3870": 3.6721,
+            "3875": 3.58596,
+            "3880": 3.58212,
+            "3885": 3.60502,
+            "3890": 3.60969,
+            "3895": 3.5558,
+            "3900": 3.61685,
+            "3905": 3.59135,
+            "3910": 3.5772,
+            "3915": 3.5862,
+            "3920": 3.57131,
+            "3925": 3.56751,
+            "3930": 3.58005,
+            "3935": 3.5821,
+            "3940": 3.57511,
+            "3945": 3.56965,
+            "3950": 3.61887,
+            "3955": 3.57531,
+            "3960": 3.60735,
+            "3965": 3.58853,
+            "3970": 3.56735,
+            "3975": 3.56709,
+            "3980": 3.5304,
+            "3985": 3.60527,
+            "3990": 3.58124,
+            "3995": 3.60753,
+            "4000": 3.55811,
+            "4005": 3.54162,
+            "4010": 3.58376,
+            "4015": 3.58398,
+            "4020": 3.58355,
+            "4025": 3.57409,
+            "4030": 3.62855,
+            "4035": 3.57033,
+            "4040": 3.5882,
+            "4045": 3.60161,
+            "4050": 3.57522,
+            "4055": 3.57403,
+            "4060": 3.5888,
+            "4065": 3.58382,
+            "4070": 3.51488,
+            "4075": 3.55887,
+            "4080": 3.53108,
+            "4085": 3.54596,
+            "4090": 3.54584,
+            "4095": 3.53161,
+            "4100": 3.55106,
+            "4105": 3.53794,
+            "4110": 3.51736,
+            "4115": 3.56348,
+            "4120": 3.49648,
+            "4125": 3.49769,
+            "4130": 3.55149,
+            "4135": 3.54373,
+            "4140": 3.49112,
+            "4145": 3.51351,
+            "4150": 3.55497,
+            "4155": 3.48797,
+            "4160": 3.54539,
+            "4165": 3.56451,
+            "4170": 3.50424,
+            "4175": 3.50239,
+            "4180": 3.4998,
+            "4185": 3.5138,
+            "4190": 3.5011,
+            "4195": 3.50044,
+            "4200": 3.49424,
+            "4205": 3.53032,
+            "4210": 3.51921,
+            "4215": 3.52292,
+            "4220": 3.53088,
+            "4225": 3.50168,
+            "4230": 3.49756,
+            "4235": 3.52008,
+            "4240": 3.49249,
+            "4245": 3.49542,
+            "4250": 3.48848,
+            "4255": 3.50707,
+            "4260": 3.4676,
+            "4265": 3.48819,
+            "4270": 3.50473,
+            "4275": 3.53933,
+            "4280": 3.48997,
+            "4285": 3.50947,
+            "4290": 3.48405,
+            "4295": 3.48692,
+            "4300": 3.52631,
+            "4305": 3.48704,
+            "4310": 3.51358,
+            "4315": 3.50638,
+            "4320": 3.50379,
+            "4325": 3.51699,
+            "4330": 3.45992,
+            "4335": 3.49232,
+            "4340": 3.50354,
+            "4345": 3.43189,
+            "4350": 3.44845,
+            "4355": 3.52327,
+            "4360": 3.48083,
+            "4365": 3.47079,
+            "4370": 3.47624,
+            "4375": 3.44129,
+            "4380": 3.44296,
+            "4385": 3.42527,
+            "4390": 3.49048,
+            "4395": 3.47699,
+            "4400": 3.47442,
+            "4405": 3.41723,
+            "4410": 3.48335,
+            "4415": 3.44899,
+            "4420": 3.44113,
+            "4425": 3.47273,
+            "4430": 3.44742,
+            "4435": 3.49082,
+            "4440": 3.48522,
+            "4445": 3.43744,
+            "4450": 3.3974,
+            "4455": 3.4624,
+            "4460": 3.43415,
+            "4465": 3.45284,
+            "4470": 3.42199,
+            "4475": 3.45352,
+            "4480": 3.44375,
+            "4485": 3.43643,
+            "4490": 3.43453,
+            "4495": 3.38677,
+            "4500": 3.45384,
+            "4505": 3.43515,
+            "4510": 3.44292,
+            "4515": 3.40605,
+            "4520": 3.43888,
+            "4525": 3.40731,
+            "4530": 3.44131,
+            "4535": 3.3963,
+            "4540": 3.42067,
+            "4545": 3.43217,
+            "4550": 3.47418,
+            "4555": 3.39854,
+            "4560": 3.42732,
+            "4565": 3.37837,
+            "4570": 3.41702,
+            "4575": 3.41117,
+            "4580": 3.45362,
+            "4585": 3.42636,
+            "4590": 3.42388,
+            "4595": 3.39853,
+            "4600": 3.39686,
+            "4605": 3.42144,
+            "4610": 3.41286,
+            "4615": 3.45309,
+            "4620": 3.39526,
+            "4625": 3.42534,
+            "4630": 3.4127,
+            "4635": 3.39195,
+            "4640": 3.4264,
+            "4645": 3.41975,
+            "4650": 3.43542,
+            "4655": 3.40687,
+            "4660": 3.39737,
+            "4665": 3.41231,
+            "4670": 3.446,
+            "4675": 3.40423,
+            "4680": 3.42886,
+            "4685": 3.42464,
+            "4690": 3.39897,
+            "4695": 3.38,
+            "4700": 3.3729,
+            "4705": 3.35029,
+            "4710": 3.40571,
+            "4715": 3.39222,
+            "4720": 3.38774,
+            "4725": 3.35968,
+            "4730": 3.39519,
+            "4735": 3.32069,
+            "4740": 3.36458,
+            "4745": 3.40698,
+            "4750": 3.36053,
+            "4755": 3.39053,
+            "4760": 3.41421,
+            "4765": 3.36022,
+            "4770": 3.36502,
+            "4775": 3.36135,
+            "4780": 3.37362,
+            "4785": 3.374,
+            "4790": 3.41163,
+            "4795": 3.39334,
+            "4800": 3.34583,
+            "4805": 3.41139,
+            "4810": 3.35086,
+            "4815": 3.38903,
+            "4820": 3.34814,
+            "4825": 3.40406,
+            "4830": 3.38314,
+            "4835": 3.3693,
+            "4840": 3.38086,
+            "4845": 3.32726,
+            "4850": 3.39372,
+            "4855": 3.39679,
+            "4860": 3.32727,
+            "4865": 3.36392,
+            "4870": 3.34896,
+            "4875": 3.39123,
+            "4880": 3.39974,
+            "4885": 3.35153,
+            "4890": 3.36191,
+            "4895": 3.35318,
+            "4900": 3.32971,
+            "4905": 3.33008,
+            "4910": 3.32861,
+            "4915": 3.37524,
+            "4920": 3.35807,
+            "4925": 3.31242,
+            "4930": 3.34376,
+            "4935": 3.3273,
+            "4940": 3.28784,
+            "4945": 3.36034,
+            "4950": 3.29629,
+            "4955": 3.40365,
+            "4960": 3.3479,
+            "4965": 3.34204,
+            "4970": 3.33369,
+            "4975": 3.34388,
+            "4980": 3.36573,
+            "4985": 3.35352,
+            "4990": 3.33542,
+            "4995": 3.3795,
+            "5000": 3.30893,
+            "5005": 3.35715,
+            "5010": 3.36146,
+            "5015": 3.30923,
+            "5020": 3.28653,
+            "5025": 3.31605,
+            "5030": 3.32648,
+            "5035": 3.32963,
+            "5040": 3.30481,
+            "5045": 3.34994,
+            "5050": 3.30693,
+            "5055": 3.32632,
+            "5060": 3.28843,
+            "5065": 3.33396,
+            "5070": 3.33431,
+            "5075": 3.34337,
+            "5080": 3.31868,
+            "5085": 3.34518,
+            "5090": 3.32323,
+            "5095": 3.29022,
+            "5100": 3.32026,
+            "5105": 3.32744,
+            "5110": 3.3329,
+            "5115": 3.3038,
+            "5120": 3.34196,
+            "5125": 3.3184,
+            "5130": 3.31738,
+            "5135": 3.30105,
+            "5140": 3.3111,
+            "5145": 3.31125,
+            "5150": 3.32063,
+            "5155": 3.31567,
+            "5160": 3.31039,
+            "5165": 3.34534,
+            "5170": 3.23105,
+            "5175": 3.31877,
+            "5180": 3.28445,
+            "5185": 3.30691,
+            "5190": 3.32611,
+            "5195": 3.30561,
+            "5200": 3.31019,
+            "5205": 3.34654,
+            "5210": 3.28506,
+            "5215": 3.2874,
+            "5220": 3.28219,
+            "5225": 3.28677,
+            "5230": 3.32011,
+            "5235": 3.27975,
+            "5240": 3.27349,
+            "5245": 3.29646,
+            "5250": 3.3023,
+            "5255": 3.28615,
+            "5260": 3.31039,
+            "5265": 3.27007,
+            "5270": 3.25412,
+            "5275": 3.25534,
+            "5280": 3.28407,
+            "5285": 3.30874,
+            "5290": 3.2589,
+            "5295": 3.27448,
+            "5300": 3.27858,
+            "5305": 3.26656,
+            "5310": 3.32809,
+            "5315": 3.25873,
+            "5320": 3.30633,
+            "5325": 3.3111,
+            "5330": 3.27899,
+            "5335": 3.28833,
+            "5340": 3.23016,
+            "5345": 3.28336,
+            "5350": 3.28737,
+            "5355": 3.28737,
+            "5360": 3.23407,
+            "5365": 3.25011,
+            "5370": 3.28855,
+            "5375": 3.26985,
+            "5380": 3.24418,
+            "5385": 3.28394,
+            "5390": 3.28221,
+            "5395": 3.20448,
+            "5400": 3.30114,
+            "5405": 3.21525,
+            "5410": 3.29188,
+            "5415": 3.22284,
+            "5420": 3.25707,
+            "5425": 3.23689,
+            "5430": 3.24779,
+            "5435": 3.2811,
+            "5440": 3.21236,
+            "5445": 3.24176,
+            "5450": 3.24576,
+            "5455": 3.22991,
+            "5460": 3.25196,
+            "5465": 3.29692,
+            "5470": 3.27194,
+            "5475": 3.20136,
+            "5480": 3.28214,
+            "5485": 3.24325,
+            "5490": 3.26633,
+            "5495": 3.27183,
+            "5500": 3.22718,
+            "5505": 3.23914,
+            "5510": 3.28342,
+            "5515": 3.27035,
+            "5520": 3.23742,
+            "5525": 3.28473,
+            "5530": 3.22923,
+            "5535": 3.26258,
+            "5540": 3.25366,
+            "5545": 3.26198,
+            "5550": 3.24962,
+            "5555": 3.22875,
+            "5560": 3.22306,
+            "5565": 3.26845,
+            "5570": 3.22989,
+            "5575": 3.26435,
+            "5580": 3.23553,
+            "5585": 3.18594,
+            "5590": 3.24664,
+            "5595": 3.2105,
+            "5600": 3.25488,
+            "5605": 3.17461,
+            "5610": 3.2604,
+            "5615": 3.25606,
+            "5620": 3.2609,
+            "5625": 3.25214,
+            "5630": 3.24091,
+            "5635": 3.21924,
+            "5640": 3.24377,
+            "5645": 3.20743,
+            "5650": 3.2076,
+            "5655": 3.20542,
+            "5660": 3.20971,
+            "5665": 3.21069,
+            "5670": 3.20056,
+            "5675": 3.22863,
+            "5680": 3.19922,
+            "5685": 3.20573,
+            "5690": 3.2077,
+            "5695": 3.24414,
+            "5700": 3.19628,
+            "5705": 3.18515,
+            "5710": 3.17855,
+            "5715": 3.28582,
+            "5720": 3.2496,
+            "5725": 3.2002,
+            "5730": 3.24085,
+            "5735": 3.22905,
+            "5740": 3.22477,
+            "5745": 3.20281,
+            "5750": 3.23329,
+            "5755": 3.23832,
+            "5760": 3.22288,
+            "5765": 3.22651,
+            "5770": 3.25303,
+            "5775": 3.19712,
+            "5780": 3.21565,
+            "5785": 3.21756,
+            "5790": 3.22715,
+            "5795": 3.22463,
+            "5800": 3.16888,
+            "5805": 3.18332,
+            "5810": 3.22432,
+            "5815": 3.20302,
+            "5820": 3.16241,
+            "5825": 3.20754,
+            "5830": 3.1647,
+            "5835": 3.17395,
+            "5840": 3.20628,
+            "5845": 3.217,
+            "5850": 3.21594,
+            "5855": 3.15148,
+            "5860": 3.17119,
+            "5865": 3.20009,
+            "5870": 3.16136,
+            "5875": 3.20014,
+            "5880": 3.19456,
+            "5885": 3.19488,
+            "5890": 3.21776,
+            "5895": 3.23301,
+            "5900": 3.1895,
+            "5905": 3.21986,
+            "5910": 3.20185,
+            "5915": 3.17464,
+            "5920": 3.1915,
+            "5925": 3.15681,
+            "5930": 3.19135,
+            "5935": 3.19128,
+            "5940": 3.2051,
+            "5945": 3.21968,
+            "5950": 3.20213,
+            "5955": 3.16275,
+            "5960": 3.22598,
+            "5965": 3.17666,
+            "5970": 3.21828,
+            "5975": 3.18539,
+            "5980": 3.25556,
+            "5985": 3.14035,
+            "5990": 3.2373,
+            "5995": 3.15341,
+            "6000": 3.17562,
+            "6005": 3.15642,
+            "6010": 3.15958,
+            "6015": 3.16383,
+            "6020": 3.17057,
+            "6025": 3.20846,
+            "6030": 3.14683,
+            "6035": 3.20108,
+            "6040": 3.18034,
+            "6045": 3.19784,
+            "6050": 3.19841,
+            "6055": 3.17123,
+            "6060": 3.18513,
+            "6065": 3.20946,
+            "6070": 3.16514,
+            "6075": 3.13204,
+            "6080": 3.19182,
+            "6085": 3.15022,
+            "6090": 3.18799,
+            "6095": 3.18454,
+            "6100": 3.13968,
+            "6105": 3.18911,
+            "6110": 3.13194,
+            "6115": 3.18032,
+            "6120": 3.17268,
+            "6125": 3.17817,
+            "6130": 3.16826,
+            "6135": 3.16641,
+            "6140": 3.16491,
+            "6145": 3.14203,
+            "6150": 3.17849,
+            "6155": 3.14973,
+            "6160": 3.12836,
+            "6165": 3.15943,
+            "6170": 3.14366,
+            "6175": 3.14619,
+            "6180": 3.14564,
+            "6185": 3.18694,
+            "6190": 3.15491,
+            "6195": 3.12582,
+            "6200": 3.15218,
+            "6205": 3.14598,
+            "6210": 3.10092,
+            "6215": 3.15518,
+            "6220": 3.1544,
+            "6225": 3.17142,
+            "6230": 3.10668,
+            "6235": 3.14063,
+            "6240": 3.08394,
+            "6245": 3.18223,
+            "6250": 3.14309,
+            "6255": 3.15773,
+            "6260": 3.14125,
+            "6265": 3.15597,
+            "6270": 3.10065,
+            "6275": 3.12382,
+            "6280": 3.13503,
+            "6285": 3.11829,
+            "6290": 3.14415,
+            "6295": 3.15298,
+            "6300": 3.15403,
+            "6305": 3.21086,
+            "6310": 3.11266,
+            "6315": 3.10982,
+            "6320": 3.16047,
+            "6325": 3.10246,
+            "6330": 3.16954,
+            "6335": 3.15391,
+            "6340": 3.10904,
+            "6345": 3.16578,
+            "6350": 3.11808,
+            "6355": 3.11742,
+            "6360": 3.1108,
+            "6365": 3.14775,
+            "6370": 3.16278,
+            "6375": 3.1337,
+            "6380": 3.15125,
+            "6385": 3.17081,
+            "6390": 3.12597,
+            "6395": 3.10466,
+            "6400": 3.10591,
+            "6405": 3.18617,
+            "6410": 3.17298,
+            "6415": 3.12537,
+            "6420": 3.17096,
+            "6425": 3.17458,
+            "6430": 3.16659,
+            "6435": 3.12451,
+            "6440": 3.13606,
+            "6445": 3.15196,
+            "6450": 3.09161,
+            "6455": 3.08666,
+            "6460": 3.13082,
+            "6465": 3.16786,
+            "6470": 3.13951,
+            "6475": 3.13285,
+            "6480": 3.15191,
+            "6485": 3.11206,
+            "6490": 3.0797,
+            "6495": 3.16564,
+            "6500": 3.14177,
+            "6505": 3.08566,
+            "6510": 3.14483,
+            "6515": 3.16369,
+            "6520": 3.09044,
+            "6525": 3.14867,
+            "6530": 3.10896,
+            "6535": 3.12403,
+            "6540": 3.18005,
+            "6545": 3.11404,
+            "6550": 3.11103,
+            "6555": 3.10947,
+            "6560": 3.0737,
+            "6565": 3.07934,
+            "6570": 3.10438,
+            "6575": 3.05844,
+            "6580": 3.17411,
+            "6585": 3.10694,
+            "6590": 3.0877,
+            "6595": 3.10332,
+            "6600": 3.1032,
+            "6605": 3.08625,
+            "6610": 3.08405,
+            "6615": 3.1316,
+            "6620": 3.076,
+            "6625": 3.09705,
+            "6630": 3.09309,
+            "6635": 3.12933,
+            "6640": 3.08864,
+            "6645": 3.10948,
+            "6650": 3.1378,
+            "6655": 3.07416,
+            "6660": 3.11313,
+            "6665": 3.12487,
+            "6670": 3.08048,
+            "6675": 3.10457,
+            "6680": 3.10673,
+            "6685": 3.14077,
+            "6690": 3.11651,
+            "6695": 3.12176,
+            "6700": 3.1127,
+            "6705": 3.09107,
+            "6710": 3.10728,
+            "6715": 3.05842,
+            "6720": 3.13504,
+            "6725": 3.12621,
+            "6730": 3.1099,
+            "6735": 3.10898,
+            "6740": 3.11731,
+            "6745": 3.0901,
+            "6750": 3.10983,
+            "6755": 3.06749,
+            "6760": 3.06624,
+            "6765": 3.08509,
+            "6770": 3.07057,
+            "6775": 3.10523,
+            "6780": 3.07455,
+            "6785": 3.07959,
+            "6790": 3.10472,
+            "6795": 3.07166,
+            "6800": 3.09692,
+            "6805": 3.08719,
+            "6810": 3.10858,
+            "6815": 3.04354,
+            "6820": 3.07401,
+            "6825": 3.10257,
+            "6830": 3.08637,
+            "6835": 3.06002,
+            "6840": 3.0654,
+            "6845": 3.11054,
+            "6850": 3.08009,
+            "6855": 3.11065,
+            "6860": 3.06305,
+            "6865": 3.10876,
+            "6870": 3.07538,
+            "6875": 3.07578,
+            "6880": 3.08642,
+            "6885": 3.05135,
+            "6890": 3.0749,
+            "6895": 3.05299,
+            "6900": 3.05973,
+            "6905": 3.07506,
+            "6910": 3.09159,
+            "6915": 3.11333,
+            "6920": 3.06615,
+            "6925": 3.08379,
+            "6930": 3.06742,
+            "6935": 3.02485,
+            "6940": 3.06623,
+            "6945": 3.05639,
+            "6950": 3.07964,
+            "6955": 3.05853,
+            "6960": 3.05554,
+            "6965": 3.09907,
+            "6970": 3.03589,
+            "6975": 3.1075,
+            "6980": 3.06776,
+            "6985": 3.06784,
+            "6990": 3.11146,
+            "6995": 3.09126,
+            "7000": 3.02783,
+            "7005": 3.09757,
+            "7010": 3.0779,
+            "7015": 3.07385,
+            "7020": 3.10018,
+            "7025": 3.08417,
+            "7030": 3.08746,
+            "7035": 3.04096,
+            "7040": 3.01984,
+            "7045": 3.07968,
+            "7050": 3.09817,
+            "7055": 3.03816,
+            "7060": 3.09848,
+            "7065": 3.11109,
+            "7070": 3.05748,
+            "7075": 3.06319,
+            "7080": 3.11208,
+            "7085": 3.03557,
+            "7090": 3.05692,
+            "7095": 3.04652,
+            "7100": 3.07149,
+            "7105": 3.02035,
+            "7110": 3.0623,
+            "7115": 3.03547,
+            "7120": 3.07999,
+            "7125": 3.03377,
+            "7130": 3.04883,
+            "7135": 3.05627,
+            "7140": 3.06014,
+            "7145": 3.0691,
+            "7150": 3.02375,
+            "7155": 3.08612,
+            "7160": 3.0047,
+            "7165": 3.0418,
+            "7170": 3.07701,
+            "7175": 3.03661,
+            "7180": 3.07042,
+            "7185": 3.09125,
+            "7190": 3.05302,
+            "7195": 3.06058,
+            "7200": 3.06039,
+            "7205": 3.04153,
+            "7210": 3.08703,
+            "7215": 3.06723,
+            "7220": 3.08798,
+            "7225": 3.06993,
+            "7230": 3.07403,
+            "7235": 3.05435,
+            "7240": 3.05017,
+            "7245": 3.07131,
+            "7250": 3.01274,
+            "7255": 3.03229,
+            "7260": 3.06928,
+            "7265": 3.00261,
+            "7270": 3.04138,
+            "7275": 3.04223,
+            "7280": 3.04181,
+            "7285": 3.05407,
+            "7290": 3.07344,
+            "7295": 3.06537,
+            "7300": 3.02809,
+            "7305": 3.02877,
+            "7310": 3.04926,
+            "7315": 3.07646,
+            "7320": 3.05669,
+            "7325": 3.06149,
+            "7330": 3.02592,
+            "7335": 3.02733,
+            "7340": 3.06004,
+            "7345": 3.0091,
+            "7350": 3.06031,
+            "7355": 3.04495,
+            "7360": 3.03923,
+            "7365": 3.03845,
+            "7370": 3.03136,
+            "7375": 2.9999,
+            "7380": 3.06202,
+            "7385": 3.07693,
+            "7390": 3.06411,
+            "7395": 3.02221,
+            "7400": 3.07516,
+            "7405": 3.04382,
+            "7410": 3.06023,
+            "7415": 3.05228,
+            "7420": 3.03261,
+            "7425": 3.08586,
+            "7430": 3.0272,
+            "7435": 3.01757,
+            "7440": 3.0377,
+            "7445": 3.01394,
+            "7450": 2.99482,
+            "7455": 3.04735,
+            "7460": 3.04105,
+            "7465": 3.04977,
+            "7470": 3.05673,
+            "7475": 3.06741,
+            "7480": 3.02749,
+            "7485": 2.98653,
+            "7490": 2.98973,
+            "7495": 2.99863,
+            "7500": 3.02945,
+            "7505": 3.0059,
+            "7510": 2.97871,
+            "7515": 3.02404,
+            "7520": 3.01697,
+            "7525": 2.98295,
+            "7530": 3.02636,
+            "7535": 3.04423,
+            "7540": 3.02494,
+            "7545": 3.0588,
+            "7550": 3.06534,
+            "7555": 3.00732,
+            "7560": 3.01283,
+            "7565": 3.00874,
+            "7570": 3.03442,
+            "7575": 2.97962,
+            "7580": 3.03034,
+            "7585": 3.01793,
+            "7590": 3.01504,
+            "7595": 3.07403,
+            "7600": 3.03015,
+            "7605": 3.02144,
+            "7610": 3.00533,
+            "7615": 2.99602,
+            "7620": 2.99265,
+            "7625": 3.03762,
+            "7630": 3.02026,
+            "7635": 3.01854,
+            "7640": 3.01712,
+            "7645": 3.04845,
+            "7650": 3.04439,
+            "7655": 3.08975,
+            "7660": 2.96325,
+            "7665": 3.02969,
+            "7670": 3.01245,
+            "7675": 3.00305,
+            "7680": 2.9998,
+            "7685": 3.07016,
+            "7690": 3.01368,
+            "7695": 2.99671,
+            "7700": 3.05056,
+            "7705": 3.01282,
+            "7710": 3.05828,
+            "7715": 2.99725,
+            "7720": 3.08276,
+            "7725": 2.98411,
+            "7730": 2.99881,
+            "7735": 3.02714,
+            "7740": 3.00979,
+            "7745": 3.00319,
+            "7750": 3.01,
+            "7755": 3.01954,
+            "7760": 2.98571,
+            "7765": 3.00397,
+            "7770": 3.02732,
+            "7775": 2.98978,
+            "7780": 2.97862,
+            "7785": 3.01472,
+            "7790": 2.99842,
+            "7795": 3.02413,
+            "7800": 3.00827,
+            "7805": 3.01176,
+            "7810": 3.03082,
+            "7815": 3.00244,
+            "7820": 3.0019,
+            "7825": 3.03231,
+            "7830": 3.03143,
+            "7835": 2.96605,
+            "7840": 3.04336,
+            "7845": 2.97937,
+            "7850": 2.93977,
+            "7855": 2.98529,
+            "7860": 2.98344,
+            "7865": 3.02956,
+            "7870": 2.9691,
+            "7875": 2.98838,
+            "7880": 3.00349,
+            "7885": 2.9968,
+            "7890": 3.03811,
+            "7895": 3.02857,
+            "7900": 3.03097,
+            "7905": 2.99876,
+            "7910": 3.0088,
+            "7915": 3.02527,
+            "7920": 3.01259,
+            "7925": 2.99646,
+            "7930": 3.02866,
+            "7935": 2.98913,
+            "7940": 3.03573,
+            "7945": 3.0501,
+            "7950": 2.96381,
+            "7955": 2.98711,
+            "7960": 2.96943,
+            "7965": 2.94566,
+            "7970": 2.9655,
+            "7975": 2.99544,
+            "7980": 3.00887,
+            "7985": 2.97698,
+            "7990": 2.97506,
+            "7995": 2.96124,
+            "8000": 3.02098,
+            "8005": 2.9801,
+            "8010": 2.97649,
+            "8015": 2.96466,
+            "8020": 2.97779,
+            "8025": 2.95601,
+            "8030": 2.97562,
+            "8035": 2.97196,
+            "8040": 2.95703,
+            "8045": 3.01604,
+            "8050": 3.01297,
+            "8055": 2.97453,
+            "8060": 3.00494,
+            "8065": 2.98862,
+            "8070": 2.96753,
+            "8075": 2.97734,
+            "8080": 3.01019,
+            "8085": 2.96754,
+            "8090": 2.98003,
+            "8095": 3.00216,
+            "8100": 2.95105,
+            "8105": 2.99247,
+            "8110": 2.98157,
+            "8115": 2.95999,
+            "8120": 2.97249,
+            "8125": 2.99946,
+            "8130": 2.97003,
+            "8135": 2.98766,
+            "8140": 2.96736,
+            "8145": 2.95939,
+            "8150": 2.98009,
+            "8155": 2.95146,
+            "8160": 2.997,
+            "8165": 2.9913,
+            "8170": 2.95554,
+            "8175": 2.95554,
+            "8180": 3.01376,
+            "8185": 2.98624,
+            "8190": 3.02032,
+            "8195": 2.99613,
+            "8200": 2.96412,
+            "8205": 2.97566,
+            "8210": 2.9781,
+            "8215": 2.99017,
+            "8220": 2.971,
+            "8225": 2.96329,
+            "8230": 2.99505,
+            "8235": 3.00306,
+            "8240": 2.97419,
+            "8245": 2.9738,
+            "8250": 3.00958,
+            "8255": 2.96716,
+            "8260": 2.97331,
+            "8265": 2.95555,
+            "8270": 2.97514,
+            "8275": 2.96718,
+            "8280": 2.94092,
+            "8285": 2.97838,
+            "8290": 2.96734,
+            "8295": 2.95246,
+            "8300": 2.96504,
+            "8305": 2.97504,
+            "8310": 2.97996,
+            "8315": 2.95732,
+            "8320": 2.97776,
+            "8325": 2.929,
+            "8330": 2.89908,
+            "8335": 2.96646,
+            "8340": 2.99201,
+            "8345": 2.94463,
+            "8350": 2.95886,
+            "8355": 2.98631,
+            "8360": 2.96643,
+            "8365": 2.98326,
+            "8370": 2.99094,
+            "8375": 2.93854,
+            "8380": 2.94099,
+            "8385": 2.97126,
+            "8390": 2.9453,
+            "8395": 2.97523,
+            "8400": 2.95927,
+            "8405": 2.97418,
+            "8410": 3.03057,
+            "8415": 2.93533,
+            "8420": 2.91801,
+            "8425": 2.97564,
+            "8430": 2.97808,
+            "8435": 2.93124,
+            "8440": 3.01239,
+            "8445": 2.99121,
+            "8450": 2.96616,
+            "8455": 2.97106,
+            "8460": 2.97975,
+            "8465": 2.92562,
+            "8470": 2.94697,
+            "8475": 2.99054,
+            "8480": 2.93097,
+            "8485": 2.93977,
+            "8490": 2.948,
+            "8495": 2.93336,
+            "8500": 2.96904,
+            "8505": 2.92233,
+            "8510": 3.00332,
+            "8515": 2.94052,
+            "8520": 2.95755,
+            "8525": 2.88522,
+            "8530": 2.95834,
+            "8535": 2.97603,
+            "8540": 2.93194,
+            "8545": 2.95741,
+            "8550": 2.92307,
+            "8555": 2.98961,
+            "8560": 2.99424,
+            "8565": 2.9514,
+            "8570": 2.94707,
+            "8575": 2.93509,
+            "8580": 2.9669,
+            "8585": 2.976,
+            "8590": 2.97659,
+            "8595": 2.97731,
+            "8600": 2.94787,
+            "8605": 2.94545,
+            "8610": 2.95479,
+            "8615": 2.96032,
+            "8620": 2.92346,
+            "8625": 2.94581,
+            "8630": 2.95087,
+            "8635": 2.94522,
+            "8640": 2.92578,
+            "8645": 2.98133,
+            "8650": 2.92232,
+            "8655": 2.96592,
+            "8660": 2.97073,
+            "8665": 2.95471,
+            "8670": 2.96657,
+            "8675": 2.93996,
+            "8680": 2.93576,
+            "8685": 2.94815,
+            "8690": 2.96442,
+            "8695": 2.97067,
+            "8700": 2.94799,
+            "8705": 2.91745,
+            "8710": 2.96979,
+            "8715": 2.91522,
+            "8720": 2.97447,
+            "8725": 2.94876,
+            "8730": 2.94256,
+            "8735": 2.97158,
+            "8740": 2.92587,
+            "8745": 2.96492,
+            "8750": 2.96628,
+            "8755": 2.93098,
+            "8760": 2.94924,
+            "8765": 2.91354,
+            "8770": 2.96822,
+            "8775": 2.94219,
+            "8780": 2.92859,
+            "8785": 2.94726,
+            "8790": 2.92803,
+            "8795": 2.96489,
+            "8800": 2.92662,
+            "8805": 2.90115,
+            "8810": 2.93145,
+            "8815": 2.93283,
+            "8820": 2.90387,
+            "8825": 2.92443,
+            "8830": 2.91245,
+            "8835": 2.89847,
+            "8840": 2.91518,
+            "8845": 2.92785,
+            "8850": 2.95695,
+            "8855": 2.92839,
+            "8860": 2.98878,
+            "8865": 2.93356,
+            "8870": 2.90865,
+            "8875": 2.92162,
+            "8880": 2.9295,
+            "8885": 2.9207,
+            "8890": 2.9404,
+            "8895": 2.92179,
+            "8900": 2.94464,
+            "8905": 2.93594,
+            "8910": 2.91993,
+            "8915": 2.90336,
+            "8920": 2.91127,
+            "8925": 2.97428,
+            "8930": 2.96209,
+            "8935": 2.97189,
+            "8940": 2.94882,
+            "8945": 2.94789,
+            "8950": 2.9328,
+            "8955": 2.91679,
+            "8960": 2.89858,
+            "8965": 2.92721,
+            "8970": 2.94082,
+            "8975": 2.90449,
+            "8980": 2.89797,
+            "8985": 2.92102,
+            "8990": 2.9662,
+            "8995": 2.9373,
+            "9000": 2.89467,
+            "9005": 2.9399,
+            "9010": 2.97901,
+            "9015": 2.90311,
+            "9020": 2.90423,
+            "9025": 2.92238,
+            "9030": 2.94518,
+            "9035": 2.85736,
+            "9040": 2.93491,
+            "9045": 2.92378,
+            "9050": 2.96087,
+            "9055": 2.88884,
+            "9060": 2.95609,
+            "9065": 2.98682,
+            "9070": 2.92665,
+            "9075": 2.94254,
+            "9080": 2.93301,
+            "9085": 2.9439,
+            "9090": 2.93648,
+            "9095": 2.89849,
+            "9100": 2.90017,
+            "9105": 2.89,
+            "9110": 2.93211,
+            "9115": 2.93981,
+            "9120": 2.97397,
+            "9125": 2.91648,
+            "9130": 2.92277,
+            "9135": 2.94086,
+            "9140": 2.94695,
+            "9145": 2.89447,
+            "9150": 2.92217,
+            "9155": 2.93169,
+            "9160": 2.93686,
+            "9165": 2.92557,
+            "9170": 2.9498,
+            "9175": 2.88716,
+            "9180": 2.93307,
+            "9185": 2.8947,
+            "9190": 2.94894,
+            "9195": 2.91222,
+            "9200": 2.93251,
+            "9205": 2.88702,
+            "9210": 2.93304,
+            "9215": 2.87965,
+            "9220": 2.90288,
+            "9225": 2.93315,
+            "9230": 2.86569,
+            "9235": 2.87842,
+            "9240": 2.89576,
+            "9245": 2.88279,
+            "9250": 2.88136,
+            "9255": 2.91192,
+            "9260": 2.87817,
+            "9265": 2.92175,
+            "9270": 2.89613,
+            "9275": 2.91313,
+            "9280": 2.91939,
+            "9285": 2.91903,
+            "9290": 2.93047,
+            "9295": 2.92844,
+            "9300": 2.87877,
+            "9305": 2.90909,
+            "9310": 2.89871,
+            "9315": 2.86609,
+            "9320": 2.86065,
+            "9325": 2.90436,
+            "9330": 2.95511,
+            "9335": 2.87572,
+            "9340": 2.93845,
+            "9345": 2.94693,
+            "9350": 2.9134,
+            "9355": 2.87737,
+            "9360": 2.89674,
+            "9365": 2.8823,
+            "9370": 2.93386,
+            "9375": 2.91236,
+            "9380": 2.86428,
+            "9385": 2.91358,
+            "9390": 2.92324,
+            "9395": 2.92024,
+            "9400": 2.89599,
+            "9405": 2.89197,
+            "9410": 2.9185,
+            "9415": 2.91775,
+            "9420": 2.89381,
+            "9425": 2.89983,
+            "9430": 2.87833,
+            "9435": 2.90417,
+            "9440": 2.89629,
+            "9445": 2.88366,
+            "9450": 2.89069,
+            "9455": 2.88969,
+            "9460": 2.94442,
+            "9465": 2.94721,
+            "9470": 2.88553,
+            "9475": 2.94033,
+            "9480": 2.88982,
+            "9485": 2.87815,
+            "9490": 2.89723,
+            "9495": 2.9225,
+            "9500": 2.89514,
+            "9505": 2.86794,
+            "9510": 2.894,
+            "9515": 2.90369,
+            "9520": 2.91102,
+            "9525": 2.89095,
+            "9530": 2.88696,
+            "9535": 2.91216
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 9535,
+        "step_interval": 5,
+        "values": {
+            "1": 1021640256.0,
+            "5": 1024063424.0,
+            "10": 1014250560.0,
+            "15": 1024077504.0,
+            "20": 1022486144.0,
+            "25": 1041373312.0,
+            "30": 1028112896.0,
+            "35": 1035625088.0,
+            "40": 1026328384.0,
+            "45": 1022350080.0,
+            "50": 1030098560.0,
+            "55": 1028966144.0,
+            "60": 1036320640.0,
+            "65": 1034679168.0,
+            "70": 1029374848.0,
+            "75": 1028745088.0,
+            "80": 1047575040.0,
+            "85": 1029448064.0,
+            "90": 1020467392.0,
+            "95": 1028310016.0,
+            "100": 1040961344.0,
+            "105": 1039436544.0,
+            "110": 1026879104.0,
+            "115": 1052312832.0,
+            "120": 1018863104.0,
+            "125": 1045372160.0,
+            "130": 1034330368.0,
+            "135": 1016615680.0,
+            "140": 1038582272.0,
+            "145": 1020688640.0,
+            "150": 1039788096.0,
+            "155": 1032796928.0,
+            "160": 1020952640.0,
+            "165": 1032424512.0,
+            "170": 1017396096.0,
+            "175": 1033427072.0,
+            "180": 1036119424.0,
+            "185": 1030573760.0,
+            "190": 1035673984.0,
+            "195": 1034555520.0,
+            "200": 1040973824.0,
+            "205": 1048500352.0,
+            "210": 1054481024.0,
+            "215": 1025159552.0,
+            "220": 1044962496.0,
+            "225": 1038076416.0,
+            "230": 1026222720.0,
+            "235": 1051134976.0,
+            "240": 1029276416.0,
+            "245": 1031397824.0,
+            "250": 1027879616.0,
+            "255": 1016929792.0,
+            "260": 1045008896.0,
+            "265": 1021330688.0,
+            "270": 1030964864.0,
+            "275": 1036911744.0,
+            "280": 1031743488.0,
+            "285": 1015014016.0,
+            "290": 1018756352.0,
+            "295": 1017237504.0,
+            "300": 1034761152.0,
+            "305": 1032166144.0,
+            "310": 1035583104.0,
+            "315": 1012734272.0,
+            "320": 1008275072.0,
+            "325": 1042741760.0,
+            "330": 1042870656.0,
+            "335": 1033508480.0,
+            "340": 1014464512.0,
+            "345": 1042618880.0,
+            "350": 1031852736.0,
+            "355": 1050844800.0,
+            "360": 1030258432.0,
+            "365": 1034595648.0,
+            "370": 1019436032.0,
+            "375": 1022144832.0,
+            "380": 1021326592.0,
+            "385": 1025589504.0,
+            "390": 1023195072.0,
+            "395": 1019653952.0,
+            "400": 1033520512.0,
+            "405": 1023880192.0,
+            "410": 1017910016.0,
+            "415": 1024288000.0,
+            "420": 1020624256.0,
+            "425": 1025854848.0,
+            "430": 1033854336.0,
+            "435": 1028182400.0,
+            "440": 1022090752.0,
+            "445": 1036768256.0,
+            "450": 1024997376.0,
+            "455": 1013852096.0,
+            "460": 1022093824.0,
+            "465": 1041431552.0,
+            "470": 1029038016.0,
+            "475": 1010065792.0,
+            "480": 1047607616.0,
+            "485": 1029724928.0,
+            "490": 1044668160.0,
+            "495": 1025229952.0,
+            "500": 1037464960.0,
+            "505": 1032181376.0,
+            "510": 1042853056.0,
+            "515": 1026159744.0,
+            "520": 1013409792.0,
+            "525": 1035147520.0,
+            "530": 1016375552.0,
+            "535": 1040113024.0,
+            "540": 1035052352.0,
+            "545": 1032113664.0,
+            "550": 1018673408.0,
+            "555": 1008638656.0,
+            "560": 1011927680.0,
+            "565": 1041824320.0,
+            "570": 1034942208.0,
+            "575": 1010199040.0,
+            "580": 1032210496.0,
+            "585": 1041262144.0,
+            "590": 1038867968.0,
+            "595": 1035743104.0,
+            "600": 1023772736.0,
+            "605": 1032294272.0,
+            "610": 1037748672.0,
+            "615": 1005974784.0,
+            "620": 1040407424.0,
+            "625": 1045209216.0,
+            "630": 1034414464.0,
+            "635": 1028523008.0,
+            "640": 1022644928.0,
+            "645": 1035876032.0,
+            "650": 1009255680.0,
+            "655": 997757696.0,
+            "660": 1029710464.0,
+            "665": 1025532608.0,
+            "670": 1048812288.0,
+            "675": 1025202688.0,
+            "680": 1019340032.0,
+            "685": 1027832512.0,
+            "690": 1029230080.0,
+            "695": 1040024576.0,
+            "700": 1042031680.0,
+            "705": 1034382976.0,
+            "710": 1020441792.0,
+            "715": 1031472128.0,
+            "720": 1040274560.0,
+            "725": 1023279936.0,
+            "730": 1022792704.0,
+            "735": 1025085696.0,
+            "740": 1038382656.0,
+            "745": 1045205504.0,
+            "750": 1013180928.0,
+            "755": 1031644032.0,
+            "760": 1032783552.0,
+            "765": 1027135936.0,
+            "770": 1023967232.0,
+            "775": 1025895168.0,
+            "780": 1038166464.0,
+            "785": 1025486400.0,
+            "790": 1040810624.0,
+            "795": 1032531200.0,
+            "800": 1039592768.0,
+            "805": 1024318016.0,
+            "810": 1034725632.0,
+            "815": 1036000448.0,
+            "820": 1035671552.0,
+            "825": 1051375360.0,
+            "830": 1035406784.0,
+            "835": 1022547776.0,
+            "840": 1036875648.0,
+            "845": 1025700352.0,
+            "850": 1048529920.0,
+            "855": 1014986432.0,
+            "860": 1033098624.0,
+            "865": 1031543040.0,
+            "870": 1040902912.0,
+            "875": 1023938304.0,
+            "880": 1028395904.0,
+            "885": 1054406656.0,
+            "890": 1019537152.0,
+            "895": 1045189824.0,
+            "900": 1031772928.0,
+            "905": 1020970688.0,
+            "910": 1031386112.0,
+            "915": 1032926912.0,
+            "920": 1038459392.0,
+            "925": 1026754560.0,
+            "930": 1025378752.0,
+            "935": 1031126464.0,
+            "940": 1057933568.0,
+            "945": 1029823104.0,
+            "950": 1014412480.0,
+            "955": 1032173696.0,
+            "960": 1026152064.0,
+            "965": 1062678976.0,
+            "970": 1030096128.0,
+            "975": 1036903680.0,
+            "980": 1027049216.0,
+            "985": 1030676736.0,
+            "990": 1020676864.0,
+            "995": 1042301760.0,
+            "1000": 1036831616.0,
+            "1005": 1050206080.0,
+            "1010": 1023801984.0,
+            "1015": 1020539008.0,
+            "1020": 1042587392.0,
+            "1025": 1037943808.0,
+            "1030": 1049210048.0,
+            "1035": 1012483456.0,
+            "1040": 1023092032.0,
+            "1045": 1039520768.0,
+            "1050": 1026825728.0,
+            "1055": 1034861184.0,
+            "1060": 1046128704.0,
+            "1065": 1036804096.0,
+            "1070": 1019994880.0,
+            "1075": 1025341696.0,
+            "1080": 1014979200.0,
+            "1085": 1030007744.0,
+            "1090": 1029062016.0,
+            "1095": 1020309888.0,
+            "1100": 1039835008.0,
+            "1105": 1048600064.0,
+            "1110": 1020704448.0,
+            "1115": 1024782720.0,
+            "1120": 1061896576.0,
+            "1125": 1043311616.0,
+            "1130": 1031219456.0,
+            "1135": 1041360512.0,
+            "1140": 1021486272.0,
+            "1145": 1051696128.0,
+            "1150": 1035590400.0,
+            "1155": 1029590528.0,
+            "1160": 1042564800.0,
+            "1165": 1026810496.0,
+            "1170": 1018001408.0,
+            "1175": 1033684032.0,
+            "1180": 1035633536.0,
+            "1185": 1023928960.0,
+            "1190": 1033160320.0,
+            "1195": 1024228608.0,
+            "1200": 1039116544.0,
+            "1205": 1031740800.0,
+            "1210": 1053250560.0,
+            "1215": 1024617600.0,
+            "1220": 1009041280.0,
+            "1225": 1036679680.0,
+            "1230": 1041257984.0,
+            "1235": 1053974912.0,
+            "1240": 1030356224.0,
+            "1245": 1017684864.0,
+            "1250": 1022772992.0,
+            "1255": 1033439104.0,
+            "1260": 1034284736.0,
+            "1265": 1034003840.0,
+            "1270": 1037323264.0,
+            "1275": 1029345792.0,
+            "1280": 1046489856.0,
+            "1285": 1028285120.0,
+            "1290": 1036578176.0,
+            "1295": 1032421696.0,
+            "1300": 1033065728.0,
+            "1305": 1030027008.0,
+            "1310": 1051262976.0,
+            "1315": 1035373184.0,
+            "1320": 1028263936.0,
+            "1325": 1049972736.0,
+            "1330": 1030133376.0,
+            "1335": 1031164800.0,
+            "1340": 1012758912.0,
+            "1345": 1044639232.0,
+            "1350": 1034957312.0,
+            "1355": 1033623744.0,
+            "1360": 1036683392.0,
+            "1365": 1038588672.0,
+            "1370": 1039851904.0,
+            "1375": 1034117632.0,
+            "1380": 1022886656.0,
+            "1385": 1018084096.0,
+            "1390": 1049054400.0,
+            "1395": 1034868352.0,
+            "1400": 1034998144.0,
+            "1405": 1034131456.0,
+            "1410": 1036368256.0,
+            "1415": 1043577600.0,
+            "1420": 1026111104.0,
+            "1425": 1033320320.0,
+            "1430": 1012808128.0,
+            "1435": 1038394880.0,
+            "1440": 1020971904.0,
+            "1445": 1032459904.0,
+            "1450": 1014039296.0,
+            "1455": 1011673984.0,
+            "1460": 1043275904.0,
+            "1465": 1014361600.0,
+            "1470": 1020655360.0,
+            "1475": 1030231296.0,
+            "1480": 1029370496.0,
+            "1485": 1022997696.0,
+            "1490": 1026783360.0,
+            "1495": 1021815744.0,
+            "1500": 1027177088.0,
+            "1505": 1034882880.0,
+            "1510": 1014397120.0,
+            "1515": 1042136832.0,
+            "1520": 1025792640.0,
+            "1525": 1036335872.0,
+            "1530": 1039948992.0,
+            "1535": 1047640192.0,
+            "1540": 1043539840.0,
+            "1545": 1034043520.0,
+            "1550": 1016108736.0,
+            "1555": 1015573504.0,
+            "1560": 1055021824.0,
+            "1565": 1015593728.0,
+            "1570": 1018243840.0,
+            "1575": 1032515456.0,
+            "1580": 1012984768.0,
+            "1585": 1025327680.0,
+            "1590": 1034127360.0,
+            "1595": 1057393664.0,
+            "1600": 1026867584.0,
+            "1605": 1019994624.0,
+            "1610": 1031268736.0,
+            "1615": 1035274880.0,
+            "1620": 1018016000.0,
+            "1625": 1028272512.0,
+            "1630": 1027205376.0,
+            "1635": 1023799040.0,
+            "1640": 1034120832.0,
+            "1645": 1021814528.0,
+            "1650": 1015262080.0,
+            "1655": 1018280064.0,
+            "1660": 1047982976.0,
+            "1665": 1027060352.0,
+            "1670": 1048219904.0,
+            "1675": 1021102912.0,
+            "1680": 1043288320.0,
+            "1685": 1052719360.0,
+            "1690": 1026724032.0,
+            "1695": 1040385280.0,
+            "1700": 1018036352.0,
+            "1705": 1020480640.0,
+            "1710": 1021024448.0,
+            "1715": 1026932992.0,
+            "1720": 1028350208.0,
+            "1725": 1034363136.0,
+            "1730": 1013692352.0,
+            "1735": 1018429696.0,
+            "1740": 1057257024.0,
+            "1745": 1029261952.0,
+            "1750": 1024357888.0,
+            "1755": 1029970112.0,
+            "1760": 1022192512.0,
+            "1765": 1040477056.0,
+            "1770": 1029669760.0,
+            "1775": 1046196864.0,
+            "1780": 1021955712.0,
+            "1785": 1035109376.0,
+            "1790": 1028263808.0,
+            "1795": 1031023616.0,
+            "1800": 1028300480.0,
+            "1805": 1025669248.0,
+            "1810": 1021556096.0,
+            "1815": 1033440256.0,
+            "1820": 1034885888.0,
+            "1825": 1020208448.0,
+            "1830": 1013885632.0,
+            "1835": 1031382272.0,
+            "1840": 1040391040.0,
+            "1845": 1034828800.0,
+            "1850": 1014480064.0,
+            "1855": 1019418816.0,
+            "1860": 1019569536.0,
+            "1865": 1035942400.0,
+            "1870": 1026242368.0,
+            "1875": 1031525248.0,
+            "1880": 1011590784.0,
+            "1885": 1041065536.0,
+            "1890": 1035000704.0,
+            "1895": 1028959488.0,
+            "1900": 1033997568.0,
+            "1905": 1027123776.0,
+            "1910": 1029217792.0,
+            "1915": 1030492864.0,
+            "1920": 1042920384.0,
+            "1925": 1038419392.0,
+            "1930": 1019304512.0,
+            "1935": 1032535936.0,
+            "1940": 1027806336.0,
+            "1945": 1034205056.0,
+            "1950": 1006036224.0,
+            "1955": 1032577600.0,
+            "1960": 1015720256.0,
+            "1965": 1029088512.0,
+            "1970": 1021554176.0,
+            "1975": 1034048000.0,
+            "1980": 1029366912.0,
+            "1985": 1027784960.0,
+            "1990": 1020947840.0,
+            "1995": 1010422912.0,
+            "2000": 1039617152.0,
+            "2005": 1001486208.0,
+            "2010": 1020422912.0,
+            "2015": 1032034048.0,
+            "2020": 1036298624.0,
+            "2025": 1037172352.0,
+            "2030": 1029770752.0,
+            "2035": 1040333312.0,
+            "2040": 1030112768.0,
+            "2045": 1032700800.0,
+            "2050": 1008016064.0,
+            "2055": 1045723840.0,
+            "2060": 1028142400.0,
+            "2065": 1038799488.0,
+            "2070": 1045645184.0,
+            "2075": 1035237952.0,
+            "2080": 1022882304.0,
+            "2085": 1024815424.0,
+            "2090": 1034363392.0,
+            "2095": 1005220672.0,
+            "2100": 1034644096.0,
+            "2105": 1035581312.0,
+            "2110": 1030685952.0,
+            "2115": 1029798528.0,
+            "2120": 1018846080.0,
+            "2125": 1021863168.0,
+            "2130": 1026638080.0,
+            "2135": 1053279488.0,
+            "2140": 1017060608.0,
+            "2145": 1019635072.0,
+            "2150": 1037130752.0,
+            "2155": 1033302784.0,
+            "2160": 1049035776.0,
+            "2165": 1039682816.0,
+            "2170": 1020308096.0,
+            "2175": 1027338752.0,
+            "2180": 1041703168.0,
+            "2185": 1028895360.0,
+            "2190": 1029309888.0,
+            "2195": 1028944768.0,
+            "2200": 1039639680.0,
+            "2205": 1036972288.0,
+            "2210": 1031740544.0,
+            "2215": 1021404480.0,
+            "2220": 1020910848.0,
+            "2225": 1033403072.0,
+            "2230": 1014201856.0,
+            "2235": 1029395968.0,
+            "2240": 1029885184.0,
+            "2245": 1026005824.0,
+            "2250": 1046268800.0,
+            "2255": 1032951936.0,
+            "2260": 1047494592.0,
+            "2265": 1023721088.0,
+            "2270": 1022566144.0,
+            "2275": 1028537600.0,
+            "2280": 1034973568.0,
+            "2285": 1031819968.0,
+            "2290": 1038650048.0,
+            "2295": 1028816000.0,
+            "2300": 1034450496.0,
+            "2305": 1032314496.0,
+            "2310": 1013586496.0,
+            "2315": 1048182656.0,
+            "2320": 1035210368.0,
+            "2325": 1046966016.0,
+            "2330": 1014696192.0,
+            "2335": 1027382272.0,
+            "2340": 1036736512.0,
+            "2345": 1020186944.0,
+            "2350": 1031017728.0,
+            "2355": 1037474240.0,
+            "2360": 1032608128.0,
+            "2365": 1028041856.0,
+            "2370": 1021004224.0,
+            "2375": 1022912000.0,
+            "2380": 1048556224.0,
+            "2385": 1044140736.0,
+            "2390": 1021986816.0,
+            "2395": 1020595584.0,
+            "2400": 1026930816.0,
+            "2405": 1038387200.0,
+            "2410": 1045395200.0,
+            "2415": 1048454656.0,
+            "2420": 1032227712.0,
+            "2425": 1029562176.0,
+            "2430": 1030386176.0,
+            "2435": 1029217856.0,
+            "2440": 1029168000.0,
+            "2445": 1033132160.0,
+            "2450": 1038557824.0,
+            "2455": 1034721536.0,
+            "2460": 1039984192.0,
+            "2465": 1032500992.0,
+            "2470": 1024143872.0,
+            "2475": 1016539520.0,
+            "2480": 1023613248.0,
+            "2485": 1021030592.0,
+            "2490": 1035920448.0,
+            "2495": 1032967360.0,
+            "2500": 1028107008.0,
+            "2505": 1015385600.0,
+            "2510": 1030967104.0,
+            "2515": 1025700096.0,
+            "2520": 1033326208.0,
+            "2525": 1029692800.0,
+            "2530": 1023986560.0,
+            "2535": 1071069696.0,
+            "2540": 1024537984.0,
+            "2545": 1033798784.0,
+            "2550": 1029448064.0,
+            "2555": 1029183488.0,
+            "2560": 1018115072.0,
+            "2565": 1031598528.0,
+            "2570": 1022847232.0,
+            "2575": 1026503104.0,
+            "2580": 1038622592.0,
+            "2585": 1025899456.0,
+            "2590": 1026100800.0,
+            "2595": 1046623104.0,
+            "2600": 1031103360.0,
+            "2605": 1001910656.0,
+            "2610": 1028423360.0,
+            "2615": 1025564544.0,
+            "2620": 1038651392.0,
+            "2625": 1026996352.0,
+            "2630": 1036831424.0,
+            "2635": 1021198400.0,
+            "2640": 1021865856.0,
+            "2645": 1039153408.0,
+            "2650": 1025943488.0,
+            "2655": 1013255808.0,
+            "2660": 1032645248.0,
+            "2665": 1035218048.0,
+            "2670": 1036437632.0,
+            "2675": 1039296064.0,
+            "2680": 1041661696.0,
+            "2685": 1034565504.0,
+            "2690": 1058871168.0,
+            "2695": 1019879552.0,
+            "2700": 1062626816.0,
+            "2705": 1035376320.0,
+            "2710": 1019542400.0,
+            "2715": 1031885824.0,
+            "2720": 1016403200.0,
+            "2725": 1040594688.0,
+            "2730": 1019586688.0,
+            "2735": 1030889856.0,
+            "2740": 1029290752.0,
+            "2745": 1040687744.0,
+            "2750": 1023880448.0,
+            "2755": 1011865664.0,
+            "2760": 1027684864.0,
+            "2765": 1030882240.0,
+            "2770": 1033119872.0,
+            "2775": 1026332352.0,
+            "2780": 1033684224.0,
+            "2785": 1024589888.0,
+            "2790": 1033734272.0,
+            "2795": 1045949184.0,
+            "2800": 1040286016.0,
+            "2805": 1019944192.0,
+            "2810": 1031449600.0,
+            "2815": 1030932736.0,
+            "2820": 1037855616.0,
+            "2825": 1041684096.0,
+            "2830": 1030459904.0,
+            "2835": 1013508352.0,
+            "2840": 1031449600.0,
+            "2845": 1030129920.0,
+            "2850": 1026617600.0,
+            "2855": 1024705280.0,
+            "2860": 1031700096.0,
+            "2865": 1027428800.0,
+            "2870": 1026690048.0,
+            "2875": 1012777024.0,
+            "2880": 1038301568.0,
+            "2885": 1017901184.0,
+            "2890": 1044200064.0,
+            "2895": 1036459136.0,
+            "2900": 1030652928.0,
+            "2905": 1035957376.0,
+            "2910": 1038718272.0,
+            "2915": 1039385408.0,
+            "2920": 1034781248.0,
+            "2925": 1043267840.0,
+            "2930": 1038229696.0,
+            "2935": 1021222144.0,
+            "2940": 1042307456.0,
+            "2945": 1045232384.0,
+            "2950": 1047525952.0,
+            "2955": 1034172928.0,
+            "2960": 1020891904.0,
+            "2965": 1027307840.0,
+            "2970": 1038796288.0,
+            "2975": 1034007296.0,
+            "2980": 1049590400.0,
+            "2985": 1034846016.0,
+            "2990": 1026008576.0,
+            "2995": 1034919296.0,
+            "3000": 1039017856.0,
+            "3005": 1038158848.0,
+            "3010": 1010907712.0,
+            "3015": 1044976064.0,
+            "3020": 1034050688.0,
+            "3025": 1037763840.0,
+            "3030": 1027722816.0,
+            "3035": 1041821056.0,
+            "3040": 1035311872.0,
+            "3045": 1027255296.0,
+            "3050": 1029708032.0,
+            "3055": 1028029568.0,
+            "3060": 1049976960.0,
+            "3065": 1024067200.0,
+            "3070": 1011545728.0,
+            "3075": 1042846272.0,
+            "3080": 1036094912.0,
+            "3085": 1030387456.0,
+            "3090": 1035262976.0,
+            "3095": 1013803008.0,
+            "3100": 1030144896.0,
+            "3105": 1017609088.0,
+            "3110": 1033370816.0,
+            "3115": 1023737728.0,
+            "3120": 1024877504.0,
+            "3125": 1046537216.0,
+            "3130": 1024676160.0,
+            "3135": 1025722496.0,
+            "3140": 1043778176.0,
+            "3145": 1044372672.0,
+            "3150": 1016483328.0,
+            "3155": 1042487936.0,
+            "3160": 1026834688.0,
+            "3165": 1031199360.0,
+            "3170": 1024332800.0,
+            "3175": 1024368640.0,
+            "3180": 1018204288.0,
+            "3185": 1034352512.0,
+            "3190": 1019221888.0,
+            "3195": 1028425408.0,
+            "3200": 1036080640.0,
+            "3205": 1016076160.0,
+            "3210": 1034109312.0,
+            "3215": 1031349312.0,
+            "3220": 1040833664.0,
+            "3225": 1022835008.0,
+            "3230": 1033255744.0,
+            "3235": 1019975488.0,
+            "3240": 1038131840.0,
+            "3245": 1031643136.0,
+            "3250": 1022390656.0,
+            "3255": 1032876672.0,
+            "3260": 1037751616.0,
+            "3265": 1021622656.0,
+            "3270": 1031242880.0,
+            "3275": 1038461184.0,
+            "3280": 1023236992.0,
+            "3285": 1031615424.0,
+            "3290": 1045247616.0,
+            "3295": 1043177536.0,
+            "3300": 1035084224.0,
+            "3305": 1042662400.0,
+            "3310": 1058092096.0,
+            "3315": 1024282880.0,
+            "3320": 1046015296.0,
+            "3325": 1023179008.0,
+            "3330": 1048037248.0,
+            "3335": 1036690560.0,
+            "3340": 1042123392.0,
+            "3345": 1030897920.0,
+            "3350": 1020621696.0,
+            "3355": 1025960576.0,
+            "3360": 1030305344.0,
+            "3365": 1031171520.0,
+            "3370": 1036454144.0,
+            "3375": 1023472384.0,
+            "3380": 1032383744.0,
+            "3385": 1038081536.0,
+            "3390": 1052811072.0,
+            "3395": 1012090496.0,
+            "3400": 1019209600.0,
+            "3405": 1021780224.0,
+            "3410": 1028433728.0,
+            "3415": 1058222400.0,
+            "3420": 1033492480.0,
+            "3425": 1029580352.0,
+            "3430": 1021150976.0,
+            "3435": 1034991872.0,
+            "3440": 1017961600.0,
+            "3445": 1025537280.0,
+            "3450": 1032254336.0,
+            "3455": 1036261312.0,
+            "3460": 1052071808.0,
+            "3465": 1027114240.0,
+            "3470": 1043729536.0,
+            "3475": 1033265792.0,
+            "3480": 1026619776.0,
+            "3485": 1029215232.0,
+            "3490": 1041041408.0,
+            "3495": 1019252224.0,
+            "3500": 1032059904.0,
+            "3505": 1025753728.0,
+            "3510": 1044367616.0,
+            "3515": 1013817280.0,
+            "3520": 1021846400.0,
+            "3525": 1032175552.0,
+            "3530": 1029789056.0,
+            "3535": 1034568704.0,
+            "3540": 1017731456.0,
+            "3545": 1035658880.0,
+            "3550": 1024535296.0,
+            "3555": 1035866112.0,
+            "3560": 1029737600.0,
+            "3565": 1028900160.0,
+            "3570": 1046029888.0,
+            "3575": 1039186304.0,
+            "3580": 1010838336.0,
+            "3585": 1031737728.0,
+            "3590": 1041450688.0,
+            "3595": 1037636800.0,
+            "3600": 1032763584.0,
+            "3605": 1045822272.0,
+            "3610": 1039235200.0,
+            "3615": 1036870144.0,
+            "3620": 1026929664.0,
+            "3625": 1033931136.0,
+            "3630": 1017582464.0,
+            "3635": 1026629056.0,
+            "3640": 1039529088.0,
+            "3645": 1022655872.0,
+            "3650": 1036842624.0,
+            "3655": 1023990144.0,
+            "3660": 1014987456.0,
+            "3665": 1026118784.0,
+            "3670": 1041672448.0,
+            "3675": 1033250304.0,
+            "3680": 1015353984.0,
+            "3685": 1029122304.0,
+            "3690": 1026204416.0,
+            "3695": 1043800832.0,
+            "3700": 1028613504.0,
+            "3705": 1049485312.0,
+            "3710": 1027180672.0,
+            "3715": 1016134912.0,
+            "3720": 1040818560.0,
+            "3725": 1032763776.0,
+            "3730": 1030920960.0,
+            "3735": 1019008640.0,
+            "3740": 1023825600.0,
+            "3745": 1046289152.0,
+            "3750": 1034462336.0,
+            "3755": 1032090048.0,
+            "3760": 1019366912.0,
+            "3765": 1031916736.0,
+            "3770": 1026677120.0,
+            "3775": 1035708288.0,
+            "3780": 1030671104.0,
+            "3785": 1027208128.0,
+            "3790": 1019584064.0,
+            "3795": 1030306048.0,
+            "3800": 1035614976.0,
+            "3805": 1035423360.0,
+            "3810": 1033294144.0,
+            "3815": 1033988608.0,
+            "3820": 1041105792.0,
+            "3825": 1024534976.0,
+            "3830": 1037630528.0,
+            "3835": 1040347968.0,
+            "3840": 1023445888.0,
+            "3845": 1048466688.0,
+            "3850": 1052489280.0,
+            "3855": 1028907264.0,
+            "3860": 1019532672.0,
+            "3865": 1035487744.0,
+            "3870": 1028491712.0,
+            "3875": 1041164800.0,
+            "3880": 1048854912.0,
+            "3885": 1027725248.0,
+            "3890": 1027487616.0,
+            "3895": 1034190592.0,
+            "3900": 1027645312.0,
+            "3905": 1027976128.0,
+            "3910": 1041572480.0,
+            "3915": 1043995392.0,
+            "3920": 1041063424.0,
+            "3925": 1030836160.0,
+            "3930": 1027072896.0,
+            "3935": 1033782016.0,
+            "3940": 1042275712.0,
+            "3945": 1036248064.0,
+            "3950": 1021430976.0,
+            "3955": 1036304128.0,
+            "3960": 1024184192.0,
+            "3965": 1027065856.0,
+            "3970": 1015984640.0,
+            "3975": 1041421632.0,
+            "3980": 1032455488.0,
+            "3985": 1037680640.0,
+            "3990": 1038684992.0,
+            "3995": 1023654528.0,
+            "4000": 1054410240.0,
+            "4005": 1029983424.0,
+            "4010": 1025138112.0,
+            "4015": 1030978560.0,
+            "4020": 1018472448.0,
+            "4025": 1027124352.0,
+            "4030": 1010306816.0,
+            "4035": 1038641088.0,
+            "4040": 1022256640.0,
+            "4045": 1025038208.0,
+            "4050": 1032348800.0,
+            "4055": 1022420864.0,
+            "4060": 1024520768.0,
+            "4065": 1032871168.0,
+            "4070": 1027791232.0,
+            "4075": 1025596928.0,
+            "4080": 1029366656.0,
+            "4085": 1020823552.0,
+            "4090": 1033322496.0,
+            "4095": 1024142656.0,
+            "4100": 1040948864.0,
+            "4105": 1027266496.0,
+            "4110": 1038791424.0,
+            "4115": 1023497088.0,
+            "4120": 1038943168.0,
+            "4125": 1048274176.0,
+            "4130": 1021490752.0,
+            "4135": 1034570880.0,
+            "4140": 1034613824.0,
+            "4145": 1044447232.0,
+            "4150": 1000353664.0,
+            "4155": 1028363392.0,
+            "4160": 1024242624.0,
+            "4165": 1033688704.0,
+            "4170": 1018888000.0,
+            "4175": 1026492608.0,
+            "4180": 1045409024.0,
+            "4185": 1033631616.0,
+            "4190": 1029574592.0,
+            "4195": 1038777984.0,
+            "4200": 1025102336.0,
+            "4205": 1019074816.0,
+            "4210": 1029560704.0,
+            "4215": 1032269184.0,
+            "4220": 1026242048.0,
+            "4225": 1031925888.0,
+            "4230": 1030269824.0,
+            "4235": 1027603328.0,
+            "4240": 1031480832.0,
+            "4245": 1028765056.0,
+            "4250": 1026987008.0,
+            "4255": 1021240064.0,
+            "4260": 1042082432.0,
+            "4265": 1025411200.0,
+            "4270": 1030169984.0,
+            "4275": 1012472448.0,
+            "4280": 1044505600.0,
+            "4285": 1019898304.0,
+            "4290": 1033058560.0,
+            "4295": 1033596032.0,
+            "4300": 1031638912.0,
+            "4305": 1023847936.0,
+            "4310": 1021568512.0,
+            "4315": 1047221504.0,
+            "4320": 1026520576.0,
+            "4325": 1005865600.0,
+            "4330": 1037666688.0,
+            "4335": 1022006464.0,
+            "4340": 1029009920.0,
+            "4345": 1033474496.0,
+            "4350": 1036886144.0,
+            "4355": 1026808832.0,
+            "4360": 1022938240.0,
+            "4365": 1028779648.0,
+            "4370": 1029624704.0,
+            "4375": 1042196864.0,
+            "4380": 1016100096.0,
+            "4385": 1045551296.0,
+            "4390": 1026270848.0,
+            "4395": 1029796416.0,
+            "4400": 1047365760.0,
+            "4405": 1029297344.0,
+            "4410": 1033424256.0,
+            "4415": 1028298304.0,
+            "4420": 1028148928.0,
+            "4425": 1033575552.0,
+            "4430": 1031374592.0,
+            "4435": 1028571136.0,
+            "4440": 1033123328.0,
+            "4445": 1028293504.0,
+            "4450": 1052210944.0,
+            "4455": 1026286080.0,
+            "4460": 1034885888.0,
+            "4465": 1031725696.0,
+            "4470": 1035446528.0,
+            "4475": 1036971712.0,
+            "4480": 1025117824.0,
+            "4485": 1034104960.0,
+            "4490": 1024630912.0,
+            "4495": 1047974912.0,
+            "4500": 1024707840.0,
+            "4505": 1038850048.0,
+            "4510": 1043723776.0,
+            "4515": 1044276736.0,
+            "4520": 1036872320.0,
+            "4525": 1058073536.0,
+            "4530": 1030973568.0,
+            "4535": 1032592256.0,
+            "4540": 1036428160.0,
+            "4545": 1025726400.0,
+            "4550": 1021749312.0,
+            "4555": 1037546112.0,
+            "4560": 1020099200.0,
+            "4565": 1036055296.0,
+            "4570": 1020501120.0,
+            "4575": 1050412608.0,
+            "4580": 1010437888.0,
+            "4585": 1022960768.0,
+            "4590": 1039710272.0,
+            "4595": 1023274880.0,
+            "4600": 1042477824.0,
+            "4605": 1039746688.0,
+            "4610": 1046104192.0,
+            "4615": 1017999744.0,
+            "4620": 1044734592.0,
+            "4625": 1030479104.0,
+            "4630": 1027260800.0,
+            "4635": 1026995200.0,
+            "4640": 1034901248.0,
+            "4645": 1036420352.0,
+            "4650": 1033711488.0,
+            "4655": 1035461056.0,
+            "4660": 1035324800.0,
+            "4665": 1020265664.0,
+            "4670": 1020057344.0,
+            "4675": 1054848768.0,
+            "4680": 1024895872.0,
+            "4685": 1027820160.0,
+            "4690": 1034449664.0,
+            "4695": 1039151744.0,
+            "4700": 1038865024.0,
+            "4705": 1027655808.0,
+            "4710": 1020522560.0,
+            "4715": 1031825536.0,
+            "4720": 1030300416.0,
+            "4725": 1030298368.0,
+            "4730": 1044096704.0,
+            "4735": 1046133376.0,
+            "4740": 1036178112.0,
+            "4745": 1039043840.0,
+            "4750": 1031790528.0,
+            "4755": 1047723392.0,
+            "4760": 1026178176.0,
+            "4765": 1034695040.0,
+            "4770": 1036521856.0,
+            "4775": 1029375168.0,
+            "4780": 1028543488.0,
+            "4785": 1028414976.0,
+            "4790": 1019620224.0,
+            "4795": 1033060160.0,
+            "4800": 1051866880.0,
+            "4805": 1015414400.0,
+            "4810": 1029454336.0,
+            "4815": 1009572096.0,
+            "4820": 1041051200.0,
+            "4825": 1026708608.0,
+            "4830": 1020450816.0,
+            "4835": 1051307840.0,
+            "4840": 1019456512.0,
+            "4845": 1032315008.0,
+            "4850": 1036794496.0,
+            "4855": 1031052736.0,
+            "4860": 1033131776.0,
+            "4865": 1032064384.0,
+            "4870": 1049832576.0,
+            "4875": 1025110528.0,
+            "4880": 1048476160.0,
+            "4885": 1016853056.0,
+            "4890": 1037317312.0,
+            "4895": 1024323136.0,
+            "4900": 1043374208.0,
+            "4905": 1033397120.0,
+            "4910": 1032830272.0,
+            "4915": 1016889856.0,
+            "4920": 1022294784.0,
+            "4925": 1034965888.0,
+            "4930": 1034630016.0,
+            "4935": 1025885312.0,
+            "4940": 1048398272.0,
+            "4945": 1025248576.0,
+            "4950": 1024208768.0,
+            "4955": 1007485952.0,
+            "4960": 1040213824.0,
+            "4965": 1018775296.0,
+            "4970": 1014274688.0,
+            "4975": 1038025472.0,
+            "4980": 1020917888.0,
+            "4985": 1029045888.0,
+            "4990": 1028394816.0,
+            "4995": 1032020480.0,
+            "5000": 1039791104.0,
+            "5005": 1024351552.0,
+            "5010": 1029147968.0,
+            "5015": 1021807296.0,
+            "5020": 1023506944.0,
+            "5025": 1037603456.0,
+            "5030": 1041947136.0,
+            "5035": 1047130304.0,
+            "5040": 1060956096.0,
+            "5045": 1032108544.0,
+            "5050": 1029534336.0,
+            "5055": 1024552192.0,
+            "5060": 1035282304.0,
+            "5065": 1021205504.0,
+            "5070": 1035756288.0,
+            "5075": 1015771264.0,
+            "5080": 1027040064.0,
+            "5085": 1021792192.0,
+            "5090": 1034973568.0,
+            "5095": 1015499712.0,
+            "5100": 1032257600.0,
+            "5105": 1017981568.0,
+            "5110": 1019586304.0,
+            "5115": 1036063936.0,
+            "5120": 1032695040.0,
+            "5125": 1019076992.0,
+            "5130": 1033404672.0,
+            "5135": 1041203072.0,
+            "5140": 1026258752.0,
+            "5145": 1033705856.0,
+            "5150": 1022043520.0,
+            "5155": 1032265664.0,
+            "5160": 1039625984.0,
+            "5165": 1031576448.0,
+            "5170": 1035555328.0,
+            "5175": 1026116224.0,
+            "5180": 1030316032.0,
+            "5185": 1024495680.0,
+            "5190": 1019492608.0,
+            "5195": 1035626496.0,
+            "5200": 1016905344.0,
+            "5205": 1013435648.0,
+            "5210": 1049395456.0,
+            "5215": 1030833280.0,
+            "5220": 1025276800.0,
+            "5225": 1035239936.0,
+            "5230": 1025930624.0,
+            "5235": 1025120000.0,
+            "5240": 1046308224.0,
+            "5245": 1022740608.0,
+            "5250": 1027062336.0,
+            "5255": 1023887360.0,
+            "5260": 1033821440.0,
+            "5265": 1045733696.0,
+            "5270": 1052500480.0,
+            "5275": 1033018112.0,
+            "5280": 1030073920.0,
+            "5285": 1025212608.0,
+            "5290": 1026575616.0,
+            "5295": 1032653440.0,
+            "5300": 1024367872.0,
+            "5305": 1029634368.0,
+            "5310": 1033197312.0,
+            "5315": 1032988992.0,
+            "5320": 1019521664.0,
+            "5325": 1022718336.0,
+            "5330": 1021335168.0,
+            "5335": 1039275776.0,
+            "5340": 1037219648.0,
+            "5345": 1039188096.0,
+            "5350": 1023701888.0,
+            "5355": 1029935872.0,
+            "5360": 1047046080.0,
+            "5365": 1037426432.0,
+            "5370": 1024381568.0,
+            "5375": 1042070656.0,
+            "5380": 1020368384.0,
+            "5385": 1021765696.0,
+            "5390": 1035133184.0,
+            "5395": 1049653568.0,
+            "5400": 1026015744.0,
+            "5405": 1036453120.0,
+            "5410": 1027635776.0,
+            "5415": 1042285824.0,
+            "5420": 1039941888.0,
+            "5425": 1028381184.0,
+            "5430": 1043799808.0,
+            "5435": 1032653312.0,
+            "5440": 1033384448.0,
+            "5445": 1034144640.0,
+            "5450": 1025299328.0,
+            "5455": 1034079424.0,
+            "5460": 1026812416.0,
+            "5465": 1027399552.0,
+            "5470": 1028969216.0,
+            "5475": 1037233920.0,
+            "5480": 1023830272.0,
+            "5485": 1019186752.0,
+            "5490": 1030891520.0,
+            "5495": 1029399424.0,
+            "5500": 1032681216.0,
+            "5505": 1018275200.0,
+            "5510": 1023987648.0,
+            "5515": 1025156032.0,
+            "5520": 1039527296.0,
+            "5525": 1018024576.0,
+            "5530": 1037663936.0,
+            "5535": 1031599232.0,
+            "5540": 1027564544.0,
+            "5545": 1033212160.0,
+            "5550": 1032115968.0,
+            "5555": 1044802304.0,
+            "5560": 1028511232.0,
+            "5565": 1029686016.0,
+            "5570": 1042027776.0,
+            "5575": 1025379392.0,
+            "5580": 1023716736.0,
+            "5585": 1044093696.0,
+            "5590": 1041319936.0,
+            "5595": 1031549824.0,
+            "5600": 1023400320.0,
+            "5605": 1040115456.0,
+            "5610": 1034087552.0,
+            "5615": 1021042816.0,
+            "5620": 1031004800.0,
+            "5625": 1030188544.0,
+            "5630": 1023502080.0,
+            "5635": 1026684096.0,
+            "5640": 1034589120.0,
+            "5645": 1018655744.0,
+            "5650": 1052378752.0,
+            "5655": 1048933504.0,
+            "5660": 1050077696.0,
+            "5665": 1033958144.0,
+            "5670": 1033750016.0,
+            "5675": 1025392640.0,
+            "5680": 1039378304.0,
+            "5685": 1033056576.0,
+            "5690": 1031464576.0,
+            "5695": 1021946368.0,
+            "5700": 1038065664.0,
+            "5705": 1043684736.0,
+            "5710": 1057231616.0,
+            "5715": 1014462848.0,
+            "5720": 1021258816.0,
+            "5725": 1041822272.0,
+            "5730": 1039454912.0,
+            "5735": 1025128576.0,
+            "5740": 1026045440.0,
+            "5745": 1036990208.0,
+            "5750": 1044552256.0,
+            "5755": 1011860416.0,
+            "5760": 1028389568.0,
+            "5765": 1028245504.0,
+            "5770": 1021530368.0,
+            "5775": 1051210240.0,
+            "5780": 1034984512.0,
+            "5785": 1037513920.0,
+            "5790": 1016957184.0,
+            "5795": 1027873536.0,
+            "5800": 1029780736.0,
+            "5805": 1050694912.0,
+            "5810": 1018478336.0,
+            "5815": 1036123520.0,
+            "5820": 1048408704.0,
+            "5825": 1030977920.0,
+            "5830": 1031572096.0,
+            "5835": 1034045440.0,
+            "5840": 1039843776.0,
+            "5845": 1021746048.0,
+            "5850": 1029807744.0,
+            "5855": 1038789376.0,
+            "5860": 1031436288.0,
+            "5865": 1026397568.0,
+            "5870": 1029861824.0,
+            "5875": 1032841856.0,
+            "5880": 1032675968.0,
+            "5885": 1024576128.0,
+            "5890": 1026798976.0,
+            "5895": 1015796160.0,
+            "5900": 1049707008.0,
+            "5905": 1025653248.0,
+            "5910": 1019150720.0,
+            "5915": 1042739136.0,
+            "5920": 1028047232.0,
+            "5925": 1034016448.0,
+            "5930": 1030963328.0,
+            "5935": 1038102784.0,
+            "5940": 1019172864.0,
+            "5945": 1025130112.0,
+            "5950": 1035530240.0,
+            "5955": 1050437184.0,
+            "5960": 1024548736.0,
+            "5965": 1029923712.0,
+            "5970": 1016427776.0,
+            "5975": 1036682752.0,
+            "5980": 1024118464.0,
+            "5985": 1035386624.0,
+            "5990": 1010550784.0,
+            "5995": 1047019200.0,
+            "6000": 1021245568.0,
+            "6005": 1040460416.0,
+            "6010": 1025358720.0,
+            "6015": 1050179072.0,
+            "6020": 1039514496.0,
+            "6025": 1030254592.0,
+            "6030": 1025931968.0,
+            "6035": 1021745408.0,
+            "6040": 1034117056.0,
+            "6045": 1028282112.0,
+            "6050": 1020112320.0,
+            "6055": 1040397056.0,
+            "6060": 1026347008.0,
+            "6065": 1022198400.0,
+            "6070": 1040668416.0,
+            "6075": 1046037440.0,
+            "6080": 1038583168.0,
+            "6085": 1041485568.0,
+            "6090": 1037205888.0,
+            "6095": 1036282880.0,
+            "6100": 1030454720.0,
+            "6105": 1019216640.0,
+            "6110": 1035357824.0,
+            "6115": 1019452544.0,
+            "6120": 1032188800.0,
+            "6125": 1020922624.0,
+            "6130": 1012013952.0,
+            "6135": 1038733824.0,
+            "6140": 1041736896.0,
+            "6145": 1041917056.0,
+            "6150": 1018958208.0,
+            "6155": 1024649344.0,
+            "6160": 1047972160.0,
+            "6165": 1050408832.0,
+            "6170": 1032505344.0,
+            "6175": 1045793664.0,
+            "6180": 1040067072.0,
+            "6185": 1029710464.0,
+            "6190": 1023293760.0,
+            "6195": 1050897728.0,
+            "6200": 1035035776.0,
+            "6205": 1036275584.0,
+            "6210": 1039772736.0,
+            "6215": 1033200256.0,
+            "6220": 1026162432.0,
+            "6225": 1036741120.0,
+            "6230": 1025144192.0,
+            "6235": 1019352832.0,
+            "6240": 1057104384.0,
+            "6245": 1018413952.0,
+            "6250": 1035337344.0,
+            "6255": 1025380992.0,
+            "6260": 1034863744.0,
+            "6265": 1027703424.0,
+            "6270": 1042116480.0,
+            "6275": 1037659008.0,
+            "6280": 1018270208.0,
+            "6285": 1032642304.0,
+            "6290": 1038598592.0,
+            "6295": 1031803456.0,
+            "6300": 1034635200.0,
+            "6305": 1011066624.0,
+            "6310": 1039458624.0,
+            "6315": 1030054272.0,
+            "6320": 1030534208.0,
+            "6325": 1038642496.0,
+            "6330": 1033908800.0,
+            "6335": 1032297856.0,
+            "6340": 1033544448.0,
+            "6345": 1031036416.0,
+            "6350": 1037451264.0,
+            "6355": 1028075968.0,
+            "6360": 1043313408.0,
+            "6365": 1025223808.0,
+            "6370": 1033939200.0,
+            "6375": 1036038720.0,
+            "6380": 1029108096.0,
+            "6385": 1025395072.0,
+            "6390": 1025517952.0,
+            "6395": 1048611584.0,
+            "6400": 1040734976.0,
+            "6405": 1024247936.0,
+            "6410": 1017489280.0,
+            "6415": 1042827072.0,
+            "6420": 1025202432.0,
+            "6425": 1027164928.0,
+            "6430": 1040568256.0,
+            "6435": 1022908800.0,
+            "6440": 1047994624.0,
+            "6445": 1036089088.0,
+            "6450": 1048532224.0,
+            "6455": 1037272320.0,
+            "6460": 1036750912.0,
+            "6465": 1033652032.0,
+            "6470": 1018135232.0,
+            "6475": 1034691648.0,
+            "6480": 1028994048.0,
+            "6485": 1033258880.0,
+            "6490": 1035638656.0,
+            "6495": 1024470016.0,
+            "6500": 1020572096.0,
+            "6505": 1059327104.0,
+            "6510": 1020472576.0,
+            "6515": 1018688064.0,
+            "6520": 1051470592.0,
+            "6525": 1035544512.0,
+            "6530": 1027897216.0,
+            "6535": 1022722240.0,
+            "6540": 1023273984.0,
+            "6545": 1033173120.0,
+            "6550": 1029488512.0,
+            "6555": 1029575296.0,
+            "6560": 1056438784.0,
+            "6565": 1054295040.0,
+            "6570": 1032319040.0,
+            "6575": 1041208320.0,
+            "6580": 1028134400.0,
+            "6585": 1036504832.0,
+            "6590": 1042456192.0,
+            "6595": 1038568832.0,
+            "6600": 1031388096.0,
+            "6605": 1045715456.0,
+            "6610": 1034713472.0,
+            "6615": 1015576448.0,
+            "6620": 1039115136.0,
+            "6625": 1054654208.0,
+            "6630": 1043092928.0,
+            "6635": 1032226304.0,
+            "6640": 1016738496.0,
+            "6645": 1016178816.0,
+            "6650": 1034692672.0,
+            "6655": 1031753472.0,
+            "6660": 1041401920.0,
+            "6665": 1024657984.0,
+            "6670": 1023820032.0,
+            "6675": 1038306176.0,
+            "6680": 1025624064.0,
+            "6685": 1045394048.0,
+            "6690": 1046390720.0,
+            "6695": 1027754368.0,
+            "6700": 1033473920.0,
+            "6705": 1038857152.0,
+            "6710": 1047485888.0,
+            "6715": 1043229440.0,
+            "6720": 1022995456.0,
+            "6725": 1018910144.0,
+            "6730": 1027525504.0,
+            "6735": 1016937856.0,
+            "6740": 1027238016.0,
+            "6745": 1030263680.0,
+            "6750": 1006373760.0,
+            "6755": 1034765056.0,
+            "6760": 1040735296.0,
+            "6765": 1023827008.0,
+            "6770": 1036441344.0,
+            "6775": 1019627712.0,
+            "6780": 1043723904.0,
+            "6785": 1037409280.0,
+            "6790": 1029403072.0,
+            "6795": 1026349440.0,
+            "6800": 1036628224.0,
+            "6805": 1024579712.0,
+            "6810": 1042340544.0,
+            "6815": 1035274112.0,
+            "6820": 1022594880.0,
+            "6825": 1034793344.0,
+            "6830": 1029862400.0,
+            "6835": 1041609600.0,
+            "6840": 1042283776.0,
+            "6845": 1018954624.0,
+            "6850": 1032171136.0,
+            "6855": 1034434752.0,
+            "6860": 1042054848.0,
+            "6865": 1021813568.0,
+            "6870": 1037015424.0,
+            "6875": 1030379968.0,
+            "6880": 1029360768.0,
+            "6885": 1030435968.0,
+            "6890": 1039890432.0,
+            "6895": 1027267712.0,
+            "6900": 1035174016.0,
+            "6905": 1043975424.0,
+            "6910": 1019763072.0,
+            "6915": 1017476608.0,
+            "6920": 1017184256.0,
+            "6925": 1030650688.0,
+            "6930": 1036672384.0,
+            "6935": 1042835712.0,
+            "6940": 1040313216.0,
+            "6945": 1044196992.0,
+            "6950": 1040513472.0,
+            "6955": 1036112704.0,
+            "6960": 1036436224.0,
+            "6965": 1019161024.0,
+            "6970": 1034729088.0,
+            "6975": 1019134464.0,
+            "6980": 1028436160.0,
+            "6985": 1023240128.0,
+            "6990": 1026994688.0,
+            "6995": 1027547520.0,
+            "7000": 1058819840.0,
+            "7005": 1013737856.0,
+            "7010": 1028959488.0,
+            "7015": 1037288768.0,
+            "7020": 1011880576.0,
+            "7025": 1017313280.0,
+            "7030": 1028301440.0,
+            "7035": 1035955392.0,
+            "7040": 1042966016.0,
+            "7045": 1028185856.0,
+            "7050": 1017979584.0,
+            "7055": 1035088000.0,
+            "7060": 1051802624.0,
+            "7065": 1007664640.0,
+            "7070": 1035819008.0,
+            "7075": 1031039552.0,
+            "7080": 1026143296.0,
+            "7085": 1044906432.0,
+            "7090": 1046261760.0,
+            "7095": 1043760512.0,
+            "7100": 1035089024.0,
+            "7105": 1049143296.0,
+            "7110": 1010962944.0,
+            "7115": 1033869504.0,
+            "7120": 1031267456.0,
+            "7125": 1037496832.0,
+            "7130": 1024881856.0,
+            "7135": 1031991808.0,
+            "7140": 1019090176.0,
+            "7145": 1033081088.0,
+            "7150": 1037554112.0,
+            "7155": 1015729728.0,
+            "7160": 1024724608.0,
+            "7165": 1030895808.0,
+            "7170": 1037367808.0,
+            "7175": 1028816896.0,
+            "7180": 1037633280.0,
+            "7185": 1016174080.0,
+            "7190": 1019808128.0,
+            "7195": 1040915392.0,
+            "7200": 1041375360.0,
+            "7205": 1026538240.0,
+            "7210": 1022638720.0,
+            "7215": 1041890560.0,
+            "7220": 1017742720.0,
+            "7225": 1027296640.0,
+            "7230": 1030200448.0,
+            "7235": 1035726848.0,
+            "7240": 1037854848.0,
+            "7245": 1023971008.0,
+            "7250": 1044708096.0,
+            "7255": 1031900480.0,
+            "7260": 1030128256.0,
+            "7265": 1036887104.0,
+            "7270": 1050097152.0,
+            "7275": 1029225216.0,
+            "7280": 1020231808.0,
+            "7285": 1029842048.0,
+            "7290": 1017219328.0,
+            "7295": 1029139584.0,
+            "7300": 1031533824.0,
+            "7305": 1027298176.0,
+            "7310": 1029089664.0,
+            "7315": 1022782272.0,
+            "7320": 1036458176.0,
+            "7325": 1036851840.0,
+            "7330": 1021706496.0,
+            "7335": 1030715904.0,
+            "7340": 1039382976.0,
+            "7345": 1040177664.0,
+            "7350": 1034973568.0,
+            "7355": 1033656320.0,
+            "7360": 1031254912.0,
+            "7365": 1048742016.0,
+            "7370": 1027298304.0,
+            "7375": 1041854848.0,
+            "7380": 1016725760.0,
+            "7385": 1017578368.0,
+            "7390": 1017234944.0,
+            "7395": 1046793600.0,
+            "7400": 1048441216.0,
+            "7405": 1013394304.0,
+            "7410": 1017386368.0,
+            "7415": 1017815360.0,
+            "7420": 1028043008.0,
+            "7425": 1012840576.0,
+            "7430": 1034042368.0,
+            "7435": 1032530432.0,
+            "7440": 1002692928.0,
+            "7445": 1034451200.0,
+            "7450": 1039304832.0,
+            "7455": 1019027008.0,
+            "7460": 1014740928.0,
+            "7465": 1027204736.0,
+            "7470": 1030422784.0,
+            "7475": 1033792064.0,
+            "7480": 1043317376.0,
+            "7485": 1038215168.0,
+            "7490": 1049000960.0,
+            "7495": 1028982720.0,
+            "7500": 1027426816.0,
+            "7505": 1028695936.0,
+            "7510": 1048886528.0,
+            "7515": 1035648704.0,
+            "7520": 1017198848.0,
+            "7525": 1036572736.0,
+            "7530": 1029261952.0,
+            "7535": 1027190144.0,
+            "7540": 1028338048.0,
+            "7545": 1025986304.0,
+            "7550": 1023025856.0,
+            "7555": 1033025344.0,
+            "7560": 1031404672.0,
+            "7565": 1022710528.0,
+            "7570": 1037591552.0,
+            "7575": 1022603136.0,
+            "7580": 1018123584.0,
+            "7585": 1033054208.0,
+            "7590": 1010993280.0,
+            "7595": 1018260352.0,
+            "7600": 1049904448.0,
+            "7605": 1037361216.0,
+            "7610": 1040415744.0,
+            "7615": 1035247488.0,
+            "7620": 1024230912.0,
+            "7625": 1020317184.0,
+            "7630": 1034939584.0,
+            "7635": 1043224192.0,
+            "7640": 1033491520.0,
+            "7645": 1034444608.0,
+            "7650": 1039804800.0,
+            "7655": 1031240576.0,
+            "7660": 1056628096.0,
+            "7665": 1031076096.0,
+            "7670": 1033685120.0,
+            "7675": 1030681600.0,
+            "7680": 1035398720.0,
+            "7685": 1018661760.0,
+            "7690": 1031921024.0,
+            "7695": 1025858880.0,
+            "7700": 1017715200.0,
+            "7705": 1036531200.0,
+            "7710": 1029893248.0,
+            "7715": 1053230656.0,
+            "7720": 1019514240.0,
+            "7725": 1042193216.0,
+            "7730": 1035620992.0,
+            "7735": 1020726144.0,
+            "7740": 1045576128.0,
+            "7745": 1026932992.0,
+            "7750": 1048550208.0,
+            "7755": 1022539264.0,
+            "7760": 1049532032.0,
+            "7765": 1029370176.0,
+            "7770": 1018375296.0,
+            "7775": 1021364672.0,
+            "7780": 1039770624.0,
+            "7785": 1039914112.0,
+            "7790": 1030516992.0,
+            "7795": 1039353728.0,
+            "7800": 1028187904.0,
+            "7805": 1027635776.0,
+            "7810": 1020970368.0,
+            "7815": 1035878400.0,
+            "7820": 1017666240.0,
+            "7825": 1018067392.0,
+            "7830": 1035104128.0,
+            "7835": 1044507648.0,
+            "7840": 1027836224.0,
+            "7845": 1032101504.0,
+            "7850": 1034609408.0,
+            "7855": 1025464832.0,
+            "7860": 1059051648.0,
+            "7865": 1016626240.0,
+            "7870": 1033729408.0,
+            "7875": 1044185600.0,
+            "7880": 1029084352.0,
+            "7885": 1040308288.0,
+            "7890": 1029556480.0,
+            "7895": 1032947008.0,
+            "7900": 1021409216.0,
+            "7905": 1020955904.0,
+            "7910": 1008993856.0,
+            "7915": 1023120768.0,
+            "7920": 1023070976.0,
+            "7925": 1030094080.0,
+            "7930": 1020712704.0,
+            "7935": 1019443776.0,
+            "7940": 1017809152.0,
+            "7945": 1014447552.0,
+            "7950": 1026303616.0,
+            "7955": 1034518272.0,
+            "7960": 1056026304.0,
+            "7965": 1031047872.0,
+            "7970": 1030417152.0,
+            "7975": 1022189888.0,
+            "7980": 1034474624.0,
+            "7985": 1047305024.0,
+            "7990": 1032066176.0,
+            "7995": 1044264704.0,
+            "8000": 1028876672.0,
+            "8005": 1028045440.0,
+            "8010": 1050665408.0,
+            "8015": 1019758976.0,
+            "8020": 1043297408.0,
+            "8025": 1039018560.0,
+            "8030": 1030868800.0,
+            "8035": 1045304192.0,
+            "8040": 1026310784.0,
+            "8045": 1024970368.0,
+            "8050": 1018405632.0,
+            "8055": 1033736960.0,
+            "8060": 1012986816.0,
+            "8065": 1022016640.0,
+            "8070": 1034776064.0,
+            "8075": 1042759616.0,
+            "8080": 1027758784.0,
+            "8085": 1037205376.0,
+            "8090": 1007008256.0,
+            "8095": 1030374528.0,
+            "8100": 1030726016.0,
+            "8105": 1027794944.0,
+            "8110": 1031557248.0,
+            "8115": 1037685248.0,
+            "8120": 1037692992.0,
+            "8125": 1031097472.0,
+            "8130": 1028627072.0,
+            "8135": 1029680256.0,
+            "8140": 1049904256.0,
+            "8145": 1043463552.0,
+            "8150": 1040087424.0,
+            "8155": 1046780288.0,
+            "8160": 1010199040.0,
+            "8165": 1031657728.0,
+            "8170": 1024483264.0,
+            "8175": 1035019648.0,
+            "8180": 1024460544.0,
+            "8185": 1021960448.0,
+            "8190": 1037125504.0,
+            "8195": 1022368384.0,
+            "8200": 1035635968.0,
+            "8205": 1026482496.0,
+            "8210": 1023888000.0,
+            "8215": 1014276416.0,
+            "8220": 1026756224.0,
+            "8225": 1028540160.0,
+            "8230": 1027163072.0,
+            "8235": 1037914048.0,
+            "8240": 1025909376.0,
+            "8245": 1024676608.0,
+            "8250": 1041635840.0,
+            "8255": 1031908224.0,
+            "8260": 1032424512.0,
+            "8265": 1023164800.0,
+            "8270": 1040172544.0,
+            "8275": 1038050688.0,
+            "8280": 1041849216.0,
+            "8285": 1038804352.0,
+            "8290": 1024074880.0,
+            "8295": 1028403648.0,
+            "8300": 1039341440.0,
+            "8305": 1012104192.0,
+            "8310": 1021882048.0,
+            "8315": 1027307200.0,
+            "8320": 1021636992.0,
+            "8325": 1048572160.0,
+            "8330": 1041039616.0,
+            "8335": 1037964928.0,
+            "8340": 1033019136.0,
+            "8345": 1043864192.0,
+            "8350": 1037713792.0,
+            "8355": 1029686400.0,
+            "8360": 1040667776.0,
+            "8365": 1027450304.0,
+            "8370": 1037742848.0,
+            "8375": 1041986944.0,
+            "8380": 1037628416.0,
+            "8385": 1023436160.0,
+            "8390": 1026068224.0,
+            "8395": 1028913408.0,
+            "8400": 1046530560.0,
+            "8405": 1040179456.0,
+            "8410": 1034252672.0,
+            "8415": 1040258688.0,
+            "8420": 1054730752.0,
+            "8425": 1031514880.0,
+            "8430": 1030295680.0,
+            "8435": 1045707200.0,
+            "8440": 1026310784.0,
+            "8445": 1029027392.0,
+            "8450": 1034201920.0,
+            "8455": 1031794688.0,
+            "8460": 1016828032.0,
+            "8465": 1035163648.0,
+            "8470": 1035185152.0,
+            "8475": 1024712960.0,
+            "8480": 1035901184.0,
+            "8485": 1028948480.0,
+            "8490": 1023079168.0,
+            "8495": 1037393280.0,
+            "8500": 1025960064.0,
+            "8505": 1042724992.0,
+            "8510": 1028167936.0,
+            "8515": 1038101056.0,
+            "8520": 1023107328.0,
+            "8525": 1037987328.0,
+            "8530": 1027572800.0,
+            "8535": 1041656128.0,
+            "8540": 1033880960.0,
+            "8545": 1015116160.0,
+            "8550": 1040188160.0,
+            "8555": 1016340672.0,
+            "8560": 1019330048.0,
+            "8565": 1021410112.0,
+            "8570": 1032032320.0,
+            "8575": 1031880128.0,
+            "8580": 1016011264.0,
+            "8585": 1030017408.0,
+            "8590": 1031637248.0,
+            "8595": 1017776128.0,
+            "8600": 1002393216.0,
+            "8605": 1030238336.0,
+            "8610": 1017532288.0,
+            "8615": 1023989248.0,
+            "8620": 1047205696.0,
+            "8625": 1034231552.0,
+            "8630": 1030921280.0,
+            "8635": 1051992512.0,
+            "8640": 1041134208.0,
+            "8645": 1024870720.0,
+            "8650": 1025595392.0,
+            "8655": 1036904832.0,
+            "8660": 1031171200.0,
+            "8665": 1032904640.0,
+            "8670": 1037400576.0,
+            "8675": 1029157248.0,
+            "8680": 1031264704.0,
+            "8685": 1041197568.0,
+            "8690": 1035035392.0,
+            "8695": 1008508416.0,
+            "8700": 1027459072.0,
+            "8705": 1051504896.0,
+            "8710": 1041678016.0,
+            "8715": 1034152256.0,
+            "8720": 1017596544.0,
+            "8725": 1025187456.0,
+            "8730": 1036610816.0,
+            "8735": 1014829568.0,
+            "8740": 1036081536.0,
+            "8745": 1021252416.0,
+            "8750": 1027866496.0,
+            "8755": 1020742272.0,
+            "8760": 1036899712.0,
+            "8765": 1058672448.0,
+            "8770": 1020462464.0,
+            "8775": 1031773056.0,
+            "8780": 1030892544.0,
+            "8785": 1032117504.0,
+            "8790": 1041034112.0,
+            "8795": 1019523968.0,
+            "8800": 1038245632.0,
+            "8805": 1035106752.0,
+            "8810": 1043257088.0,
+            "8815": 1026490496.0,
+            "8820": 1027666944.0,
+            "8825": 1043464064.0,
+            "8830": 1027480192.0,
+            "8835": 1038812928.0,
+            "8840": 1034490752.0,
+            "8845": 1033909760.0,
+            "8850": 1030491008.0,
+            "8855": 1042524992.0,
+            "8860": 1013002880.0,
+            "8865": 1038368128.0,
+            "8870": 1025187456.0,
+            "8875": 1012981760.0,
+            "8880": 1028376704.0,
+            "8885": 1046461056.0,
+            "8890": 1038603840.0,
+            "8895": 1037909504.0,
+            "8900": 1027294848.0,
+            "8905": 1032792064.0,
+            "8910": 1029795264.0,
+            "8915": 1030003968.0,
+            "8920": 1030339968.0,
+            "8925": 1028569984.0,
+            "8930": 1031637376.0,
+            "8935": 1022951424.0,
+            "8940": 1019847872.0,
+            "8945": 1031909248.0,
+            "8950": 1039951744.0,
+            "8955": 1041902720.0,
+            "8960": 1026878464.0,
+            "8965": 1022083968.0,
+            "8970": 1029559424.0,
+            "8975": 1038934400.0,
+            "8980": 1033860160.0,
+            "8985": 1030649472.0,
+            "8990": 1025014144.0,
+            "8995": 1013963648.0,
+            "9000": 1035286400.0,
+            "9005": 1028649280.0,
+            "9010": 1011913280.0,
+            "9015": 1038912128.0,
+            "9020": 1030153856.0,
+            "9025": 1024685056.0,
+            "9030": 1025861888.0,
+            "9035": 1054309248.0,
+            "9040": 1027293952.0,
+            "9045": 1036583040.0,
+            "9050": 1020929664.0,
+            "9055": 1043212800.0,
+            "9060": 1023159104.0,
+            "9065": 1023387520.0,
+            "9070": 1039364480.0,
+            "9075": 1026728320.0,
+            "9080": 1018873408.0,
+            "9085": 1015439104.0,
+            "9090": 1043764736.0,
+            "9095": 1014020224.0,
+            "9100": 1031975296.0,
+            "9105": 1026514304.0,
+            "9110": 1029229568.0,
+            "9115": 1024866432.0,
+            "9120": 999986240.0,
+            "9125": 1032842752.0,
+            "9130": 1038534336.0,
+            "9135": 1031037696.0,
+            "9140": 1025502208.0,
+            "9145": 1030405248.0,
+            "9150": 1029416576.0,
+            "9155": 1038268928.0,
+            "9160": 1046043904.0,
+            "9165": 1017948992.0,
+            "9170": 1040955520.0,
+            "9175": 1031287552.0,
+            "9180": 1037830656.0,
+            "9185": 1040684416.0,
+            "9190": 1028985728.0,
+            "9195": 1034312320.0,
+            "9200": 1035551872.0,
+            "9205": 1029847040.0,
+            "9210": 1026535872.0,
+            "9215": 1030520448.0,
+            "9220": 1025732224.0,
+            "9225": 1048001408.0,
+            "9230": 1041601792.0,
+            "9235": 1027775104.0,
+            "9240": 1025245760.0,
+            "9245": 1036211584.0,
+            "9250": 1041192384.0,
+            "9255": 1020063872.0,
+            "9260": 1035337984.0,
+            "9265": 1023102208.0,
+            "9270": 1038332928.0,
+            "9275": 1036053568.0,
+            "9280": 1026541504.0,
+            "9285": 1014285184.0,
+            "9290": 1018866304.0,
+            "9295": 1026915264.0,
+            "9300": 1037085888.0,
+            "9305": 1045435392.0,
+            "9310": 1033242944.0,
+            "9315": 1039043840.0,
+            "9320": 1048495488.0,
+            "9325": 1023059840.0,
+            "9330": 1031724672.0,
+            "9335": 1035673472.0,
+            "9340": 1013719296.0,
+            "9345": 1022572032.0,
+            "9350": 1026585600.0,
+            "9355": 1034807104.0,
+            "9360": 1029839552.0,
+            "9365": 1019863296.0,
+            "9370": 1006904320.0,
+            "9375": 1036232960.0,
+            "9380": 1049012736.0,
+            "9385": 1015905344.0,
+            "9390": 1029208704.0,
+            "9395": 1008931968.0,
+            "9400": 1026893568.0,
+            "9405": 1027653312.0,
+            "9410": 1040913280.0,
+            "9415": 1035128576.0,
+            "9420": 1030792640.0,
+            "9425": 1027581056.0,
+            "9430": 1032727360.0,
+            "9435": 1031796288.0,
+            "9440": 1051730048.0,
+            "9445": 1019626752.0,
+            "9450": 1044505152.0,
+            "9455": 1035773696.0,
+            "9460": 1013828224.0,
+            "9465": 1023403904.0,
+            "9470": 1023576832.0,
+            "9475": 1039164416.0,
+            "9480": 1029597056.0,
+            "9485": 1032075200.0,
+            "9490": 1020994560.0,
+            "9495": 1021375616.0,
+            "9500": 1035594304.0,
+            "9505": 1034478464.0,
+            "9510": 1014286592.0,
+            "9515": 1031309312.0,
+            "9520": 1026563904.0,
+            "9525": 1035853184.0,
+            "9530": 1031624448.0,
+            "9535": 1025926720.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 9535,
+        "step_interval": 5,
+        "values": {
+            "1": 33307314176.0,
+            "5": 33307424768.0,
+            "10": 33307447296.0,
+            "15": 33307439104.0,
+            "20": 33307533312.0,
+            "25": 33307473920.0,
+            "30": 33307504640.0,
+            "35": 33307639808.0,
+            "40": 33307637760.0,
+            "45": 33307568128.0,
+            "50": 33307418624.0,
+            "55": 33307326464.0,
+            "60": 33307346944.0,
+            "65": 33307490304.0,
+            "70": 33307312128.0,
+            "75": 33307308032.0,
+            "80": 33307404288.0,
+            "85": 33307314176.0,
+            "90": 33307285504.0,
+            "95": 33307392000.0,
+            "100": 33307260928.0,
+            "105": 33307129856.0,
+            "110": 33307037696.0,
+            "115": 33306703872.0,
+            "120": 33307355136.0,
+            "125": 33306873856.0,
+            "130": 33307017216.0,
+            "135": 33307305984.0,
+            "140": 33307004928.0,
+            "145": 33307121664.0,
+            "150": 33307312128.0,
+            "155": 33307176960.0,
+            "160": 33307103232.0,
+            "165": 33307174912.0,
+            "170": 33307832320.0,
+            "175": 33307199488.0,
+            "180": 33307355136.0,
+            "185": 33307355136.0,
+            "190": 33307131904.0,
+            "195": 33307256832.0,
+            "200": 33307326464.0,
+            "205": 33307492352.0,
+            "210": 33307500544.0,
+            "215": 33307086848.0,
+            "220": 33306857472.0,
+            "225": 33306933248.0,
+            "230": 33307092992.0,
+            "235": 33307183104.0,
+            "240": 33307303936.0,
+            "245": 33307426816.0,
+            "250": 33307308032.0,
+            "255": 33307295744.0,
+            "260": 33306767360.0,
+            "265": 33307461632.0,
+            "270": 33307467776.0,
+            "275": 33307469824.0,
+            "280": 33307254784.0,
+            "285": 33307947008.0,
+            "290": 33307191296.0,
+            "295": 33308014592.0,
+            "300": 33307856896.0,
+            "305": 33308340224.0,
+            "310": 33307815936.0,
+            "315": 33307181056.0,
+            "320": 33307512832.0,
+            "325": 33307488256.0,
+            "330": 33307977728.0,
+            "335": 33307947008.0,
+            "340": 33308606464.0,
+            "345": 33308037120.0,
+            "350": 33307693056.0,
+            "355": 33308000256.0,
+            "360": 33307348992.0,
+            "365": 33307451392.0,
+            "370": 33308000256.0,
+            "375": 33307283456.0,
+            "380": 33307570176.0,
+            "385": 33307860992.0,
+            "390": 33307416576.0,
+            "395": 33307031552.0,
+            "400": 33307246592.0,
+            "405": 33307676672.0,
+            "410": 33306935296.0,
+            "415": 33307752448.0,
+            "420": 33307529216.0,
+            "425": 33307314176.0,
+            "430": 33306988544.0,
+            "435": 33307455488.0,
+            "440": 33307369472.0,
+            "445": 33307709440.0,
+            "450": 33307588608.0,
+            "455": 33306963968.0,
+            "460": 33307193344.0,
+            "465": 33306845184.0,
+            "470": 33307766784.0,
+            "475": 33306464256.0,
+            "480": 33307566080.0,
+            "485": 33307682816.0,
+            "490": 33307389952.0,
+            "495": 33307179008.0,
+            "500": 33307969536.0,
+            "505": 33307629568.0,
+            "510": 33308192768.0,
+            "515": 33307279360.0,
+            "520": 33306544128.0,
+            "525": 33307265024.0,
+            "530": 33307025408.0,
+            "535": 33307648000.0,
+            "540": 33307582464.0,
+            "545": 33307297792.0,
+            "550": 33307396096.0,
+            "555": 33307301888.0,
+            "560": 33307899904.0,
+            "565": 33307379712.0,
+            "570": 33307553792.0,
+            "575": 33307136000.0,
+            "580": 33305892864.0,
+            "585": 33306945536.0,
+            "590": 33307629568.0,
+            "595": 33307860992.0,
+            "600": 33306873856.0,
+            "605": 33307357184.0,
+            "610": 33306556416.0,
+            "615": 33306349568.0,
+            "620": 33307791360.0,
+            "625": 33306378240.0,
+            "630": 33307168768.0,
+            "635": 33306767360.0,
+            "640": 33306116096.0,
+            "645": 33308092416.0,
+            "650": 33307277312.0,
+            "655": 33307131904.0,
+            "660": 33308485632.0,
+            "665": 33307334656.0,
+            "670": 33307959296.0,
+            "675": 33307701248.0,
+            "680": 33306863616.0,
+            "685": 33306697728.0,
+            "690": 33307863040.0,
+            "695": 33307293696.0,
+            "700": 33306263552.0,
+            "705": 33306955776.0,
+            "710": 33308225536.0,
+            "715": 33307174912.0,
+            "720": 33307107328.0,
+            "725": 33307324416.0,
+            "730": 33308231680.0,
+            "735": 33307224064.0,
+            "740": 33307815936.0,
+            "745": 33307938816.0,
+            "750": 33307779072.0,
+            "755": 33308463104.0,
+            "760": 33306349568.0,
+            "765": 33308266496.0,
+            "770": 33306603520.0,
+            "775": 33307424768.0,
+            "780": 33308608512.0,
+            "785": 33307969536.0,
+            "790": 33308188672.0,
+            "795": 33307656192.0,
+            "800": 33307547648.0,
+            "805": 33307619328.0,
+            "810": 33307910144.0,
+            "815": 33307170816.0,
+            "820": 33307029504.0,
+            "825": 33307443200.0,
+            "830": 33307422720.0,
+            "835": 33307262976.0,
+            "840": 33307613184.0,
+            "845": 33307928576.0,
+            "850": 33306238976.0,
+            "855": 33307396096.0,
+            "860": 33307938816.0,
+            "865": 33307701248.0,
+            "870": 33307940864.0,
+            "875": 33307545600.0,
+            "880": 33307527168.0,
+            "885": 33307336704.0,
+            "890": 33308262400.0,
+            "895": 33307717632.0,
+            "900": 33306474496.0,
+            "905": 33307480064.0,
+            "910": 33307725824.0,
+            "915": 33308303360.0,
+            "920": 33307770880.0,
+            "925": 33307566080.0,
+            "930": 33307451392.0,
+            "935": 33307975680.0,
+            "940": 33306320896.0,
+            "945": 33306429440.0,
+            "950": 33307136000.0,
+            "955": 33307846656.0,
+            "960": 33307611136.0,
+            "965": 33307465728.0,
+            "970": 33308293120.0,
+            "975": 33307078656.0,
+            "980": 33307568128.0,
+            "985": 33307080704.0,
+            "990": 33307367424.0,
+            "995": 33306861568.0,
+            "1000": 33307889664.0,
+            "1005": 33305956352.0,
+            "1010": 33307508736.0,
+            "1015": 33306671104.0,
+            "1020": 33306669056.0,
+            "1025": 33306509312.0,
+            "1030": 33307117568.0,
+            "1035": 33308332032.0,
+            "1040": 33307353088.0,
+            "1045": 33308368896.0,
+            "1050": 33306615808.0,
+            "1055": 33306802176.0,
+            "1060": 33307103232.0,
+            "1065": 33307404288.0,
+            "1070": 33307070464.0,
+            "1075": 33308188672.0,
+            "1080": 33307011072.0,
+            "1085": 33307027456.0,
+            "1090": 33308086272.0,
+            "1095": 33307086848.0,
+            "1100": 33307287552.0,
+            "1105": 33308497920.0,
+            "1110": 33307461632.0,
+            "1115": 33307533312.0,
+            "1120": 33307777024.0,
+            "1125": 33307809792.0,
+            "1130": 33307484160.0,
+            "1135": 33308082176.0,
+            "1140": 33307029504.0,
+            "1145": 33307432960.0,
+            "1150": 33307574272.0,
+            "1155": 33307551744.0,
+            "1160": 33307561984.0,
+            "1165": 33307086848.0,
+            "1170": 33307856896.0,
+            "1175": 33306976256.0,
+            "1180": 33308237824.0,
+            "1185": 33307875328.0,
+            "1190": 33307369472.0,
+            "1195": 33308231680.0,
+            "1200": 33307197440.0,
+            "1205": 33307480064.0,
+            "1210": 33305866240.0,
+            "1215": 33308297216.0,
+            "1220": 33307451392.0,
+            "1225": 33307518976.0,
+            "1230": 33307688960.0,
+            "1235": 33307901952.0,
+            "1240": 33307394048.0,
+            "1245": 33307842560.0,
+            "1250": 33307281408.0,
+            "1255": 33306906624.0,
+            "1260": 33307301888.0,
+            "1265": 33307674624.0,
+            "1270": 33307150336.0,
+            "1275": 33307686912.0,
+            "1280": 33307430912.0,
+            "1285": 33306974208.0,
+            "1290": 33307529216.0,
+            "1295": 33307901952.0,
+            "1300": 33307002880.0,
+            "1305": 33308059648.0,
+            "1310": 33306939392.0,
+            "1315": 33307336704.0,
+            "1320": 33307262976.0,
+            "1325": 33307011072.0,
+            "1330": 33306550272.0,
+            "1335": 33307181056.0,
+            "1340": 33307406336.0,
+            "1345": 33307463680.0,
+            "1350": 33308135424.0,
+            "1355": 33307480064.0,
+            "1360": 33307533312.0,
+            "1365": 33307066368.0,
+            "1370": 33306595328.0,
+            "1375": 33307891712.0,
+            "1380": 33307830272.0,
+            "1385": 33308487680.0,
+            "1390": 33306521600.0,
+            "1395": 33307338752.0,
+            "1400": 33308430336.0,
+            "1405": 33307768832.0,
+            "1410": 33308041216.0,
+            "1415": 33307797504.0,
+            "1420": 33306605568.0,
+            "1425": 33307240448.0,
+            "1430": 33307322368.0,
+            "1435": 33307559936.0,
+            "1440": 33306662912.0,
+            "1445": 33307058176.0,
+            "1450": 33307705344.0,
+            "1455": 33307291648.0,
+            "1460": 33306861568.0,
+            "1465": 33306312704.0,
+            "1470": 33307394048.0,
+            "1475": 33307211776.0,
+            "1480": 33306527744.0,
+            "1485": 33307361280.0,
+            "1490": 33307693056.0,
+            "1495": 33307271168.0,
+            "1500": 33306820608.0,
+            "1505": 33307092992.0,
+            "1510": 33306624000.0,
+            "1515": 33307097088.0,
+            "1520": 33306931200.0,
+            "1525": 33307635712.0,
+            "1530": 33307353088.0,
+            "1535": 33306468352.0,
+            "1540": 33307172864.0,
+            "1545": 33307693056.0,
+            "1550": 33307938816.0,
+            "1555": 33307832320.0,
+            "1560": 33308182528.0,
+            "1565": 33307099136.0,
+            "1570": 33306798080.0,
+            "1575": 33307492352.0,
+            "1580": 33307688960.0,
+            "1585": 33307326464.0,
+            "1590": 33306988544.0,
+            "1595": 33306818560.0,
+            "1600": 33307836416.0,
+            "1605": 33307590656.0,
+            "1610": 33307168768.0,
+            "1615": 33306931200.0,
+            "1620": 33306732544.0,
+            "1625": 33308260352.0,
+            "1630": 33308227584.0,
+            "1635": 33306957824.0,
+            "1640": 33306759168.0,
+            "1645": 33306021888.0,
+            "1650": 33306689536.0,
+            "1655": 33307332608.0,
+            "1660": 33307170816.0,
+            "1665": 33306583040.0,
+            "1670": 33307535360.0,
+            "1675": 33306912768.0,
+            "1680": 33306675200.0,
+            "1685": 33307774976.0,
+            "1690": 33307783168.0,
+            "1695": 33307971584.0,
+            "1700": 33307623424.0,
+            "1705": 33307652096.0,
+            "1710": 33307731968.0,
+            "1715": 33308090368.0,
+            "1720": 33307172864.0,
+            "1725": 33307672576.0,
+            "1730": 33306355712.0,
+            "1735": 33308229632.0,
+            "1740": 33307142144.0,
+            "1745": 33308151808.0,
+            "1750": 33306898432.0,
+            "1755": 33307105280.0,
+            "1760": 33308000256.0,
+            "1765": 33307750400.0,
+            "1770": 33308450816.0,
+            "1775": 33308184576.0,
+            "1780": 33308129280.0,
+            "1785": 33307936768.0,
+            "1790": 33307238400.0,
+            "1795": 33307922432.0,
+            "1800": 33306900480.0,
+            "1805": 33307203584.0,
+            "1810": 33306923008.0,
+            "1815": 33307617280.0,
+            "1820": 33307664384.0,
+            "1825": 33308440576.0,
+            "1830": 33306843136.0,
+            "1835": 33307979776.0,
+            "1840": 33307588608.0,
+            "1845": 33307602944.0,
+            "1850": 33307774976.0,
+            "1855": 33307529216.0,
+            "1860": 33307054080.0,
+            "1865": 33307097088.0,
+            "1870": 33307373568.0,
+            "1875": 33306265600.0,
+            "1880": 33307275264.0,
+            "1885": 33307224064.0,
+            "1890": 33307324416.0,
+            "1895": 33307283456.0,
+            "1900": 33306810368.0,
+            "1905": 33307191296.0,
+            "1910": 33306884096.0,
+            "1915": 33308162048.0,
+            "1920": 33307664384.0,
+            "1925": 33305972736.0,
+            "1930": 33308504064.0,
+            "1935": 33307377664.0,
+            "1940": 33307119616.0,
+            "1945": 33307416576.0,
+            "1950": 33307746304.0,
+            "1955": 33307420672.0,
+            "1960": 33308073984.0,
+            "1965": 33307148288.0,
+            "1970": 33306775552.0,
+            "1975": 33308207104.0,
+            "1980": 33307473920.0,
+            "1985": 33307095040.0,
+            "1990": 33307527168.0,
+            "1995": 33307037696.0,
+            "2000": 33308801024.0,
+            "2005": 33307985920.0,
+            "2010": 33307516928.0,
+            "2015": 33307604992.0,
+            "2020": 33307406336.0,
+            "2025": 33307719680.0,
+            "2030": 33308381184.0,
+            "2035": 33307914240.0,
+            "2040": 33307324416.0,
+            "2045": 33306476544.0,
+            "2050": 33308246016.0,
+            "2055": 33307430912.0,
+            "2060": 33307912192.0,
+            "2065": 33307543552.0,
+            "2070": 33307670528.0,
+            "2075": 33307482112.0,
+            "2080": 33307871232.0,
+            "2085": 33306722304.0,
+            "2090": 33307549696.0,
+            "2095": 33307260928.0,
+            "2100": 33306765312.0,
+            "2105": 33306847232.0,
+            "2110": 33307332608.0,
+            "2115": 33306480640.0,
+            "2120": 33307168768.0,
+            "2125": 33307277312.0,
+            "2130": 33307314176.0,
+            "2135": 33307752448.0,
+            "2140": 33306710016.0,
+            "2145": 33307478016.0,
+            "2150": 33307729920.0,
+            "2155": 33306943488.0,
+            "2160": 33307508736.0,
+            "2165": 33307049984.0,
+            "2170": 33307158528.0,
+            "2175": 33306599424.0,
+            "2180": 33307054080.0,
+            "2185": 33307017216.0,
+            "2190": 33307119616.0,
+            "2195": 33307289600.0,
+            "2200": 33306726400.0,
+            "2205": 33306636288.0,
+            "2210": 33307639808.0,
+            "2215": 33308215296.0,
+            "2220": 33307314176.0,
+            "2225": 33307437056.0,
+            "2230": 33306318848.0,
+            "2235": 33306941440.0,
+            "2240": 33308131328.0,
+            "2245": 33307707392.0,
+            "2250": 33307256832.0,
+            "2255": 33306845184.0,
+            "2260": 33307736064.0,
+            "2265": 33308620800.0,
+            "2270": 33307357184.0,
+            "2275": 33308151808.0,
+            "2280": 33307981824.0,
+            "2285": 33307922432.0,
+            "2290": 33306767360.0,
+            "2295": 33307670528.0,
+            "2300": 33307179008.0,
+            "2305": 33307545600.0,
+            "2310": 33307924480.0,
+            "2315": 33307396096.0,
+            "2320": 33307725824.0,
+            "2325": 33308024832.0,
+            "2330": 33307793408.0,
+            "2335": 33307019264.0,
+            "2340": 33307162624.0,
+            "2345": 33307934720.0,
+            "2350": 33306232832.0,
+            "2355": 33307719680.0,
+            "2360": 33307375616.0,
+            "2365": 33306537984.0,
+            "2370": 33307279360.0,
+            "2375": 33308131328.0,
+            "2380": 33307136000.0,
+            "2385": 33307490304.0,
+            "2390": 33307316224.0,
+            "2395": 33306587136.0,
+            "2400": 33307594752.0,
+            "2405": 33308393472.0,
+            "2410": 33306726400.0,
+            "2415": 33307506688.0,
+            "2420": 33308407808.0,
+            "2425": 33307942912.0,
+            "2430": 33308116992.0,
+            "2435": 33307308032.0,
+            "2440": 33308362752.0,
+            "2445": 33308071936.0,
+            "2450": 33307740160.0,
+            "2455": 33307959296.0,
+            "2460": 33308258304.0,
+            "2465": 33307299840.0,
+            "2470": 33307056128.0,
+            "2475": 33307224064.0,
+            "2480": 33307713536.0,
+            "2485": 33306550272.0,
+            "2490": 33306992640.0,
+            "2495": 33307232256.0,
+            "2500": 33307095040.0,
+            "2505": 33307107328.0,
+            "2510": 33307488256.0,
+            "2515": 33308360704.0,
+            "2520": 33307369472.0,
+            "2525": 33306959872.0,
+            "2530": 33307258880.0,
+            "2535": 33307082752.0,
+            "2540": 33308633088.0,
+            "2545": 33308542976.0,
+            "2550": 33308002304.0,
+            "2555": 33307961344.0,
+            "2560": 33307328512.0,
+            "2565": 33308299264.0,
+            "2570": 33307770880.0,
+            "2575": 33307877376.0,
+            "2580": 33307990016.0,
+            "2585": 33308016640.0,
+            "2590": 33308135424.0,
+            "2595": 33307617280.0,
+            "2600": 33306667008.0,
+            "2605": 33307422720.0,
+            "2610": 33306683392.0,
+            "2615": 33308669952.0,
+            "2620": 33308616704.0,
+            "2625": 33308366848.0,
+            "2630": 33307574272.0,
+            "2635": 33308166144.0,
+            "2640": 33307983872.0,
+            "2645": 33307609088.0,
+            "2650": 33307807744.0,
+            "2655": 33306955776.0,
+            "2660": 33307273216.0,
+            "2665": 33307709440.0,
+            "2670": 33307693056.0,
+            "2675": 33307731968.0,
+            "2680": 33308227584.0,
+            "2685": 33307742208.0,
+            "2690": 33307734016.0,
+            "2695": 33307424768.0,
+            "2700": 33306644480.0,
+            "2705": 33306300416.0,
+            "2710": 33307881472.0,
+            "2715": 33307488256.0,
+            "2720": 33307318272.0,
+            "2725": 33307604992.0,
+            "2730": 33306710016.0,
+            "2735": 33308049408.0,
+            "2740": 33307437056.0,
+            "2745": 33307572224.0,
+            "2750": 33307136000.0,
+            "2755": 33307584512.0,
+            "2760": 33307355136.0,
+            "2765": 33307713536.0,
+            "2770": 33308000256.0,
+            "2775": 33306460160.0,
+            "2780": 33306923008.0,
+            "2785": 33307017216.0,
+            "2790": 33306720256.0,
+            "2795": 33307785216.0,
+            "2800": 33307234304.0,
+            "2805": 33306685440.0,
+            "2810": 33307469824.0,
+            "2815": 33308069888.0,
+            "2820": 33306460160.0,
+            "2825": 33307467776.0,
+            "2830": 33307666432.0,
+            "2835": 33307371520.0,
+            "2840": 33306904576.0,
+            "2845": 33308061696.0,
+            "2850": 33308520448.0,
+            "2855": 33307695104.0,
+            "2860": 33308487680.0,
+            "2865": 33307058176.0,
+            "2870": 33307303936.0,
+            "2875": 33307324416.0,
+            "2880": 33306968064.0,
+            "2885": 33307641856.0,
+            "2890": 33307785216.0,
+            "2895": 33308221440.0,
+            "2900": 33307596800.0,
+            "2905": 33307533312.0,
+            "2910": 33307459584.0,
+            "2915": 33307799552.0,
+            "2920": 33308461056.0,
+            "2925": 33307938816.0,
+            "2930": 33308268544.0,
+            "2935": 33308594176.0,
+            "2940": 33308170240.0,
+            "2945": 33307578368.0,
+            "2950": 33307590656.0,
+            "2955": 33308131328.0,
+            "2960": 33306839040.0,
+            "2965": 33307111424.0,
+            "2970": 33307570176.0,
+            "2975": 33307766784.0,
+            "2980": 33307600896.0,
+            "2985": 33307123712.0,
+            "2990": 33307641856.0,
+            "2995": 33307527168.0,
+            "3000": 33307863040.0,
+            "3005": 33306927104.0,
+            "3010": 33307738112.0,
+            "3015": 33308217344.0,
+            "3020": 33306697728.0,
+            "3025": 33306970112.0,
+            "3030": 33308127232.0,
+            "3035": 33308213248.0,
+            "3040": 33307578368.0,
+            "3045": 33308327936.0,
+            "3050": 33306910720.0,
+            "3055": 33307004928.0,
+            "3060": 33307602944.0,
+            "3065": 33306970112.0,
+            "3070": 33307985920.0,
+            "3075": 33306945536.0,
+            "3080": 33307312128.0,
+            "3085": 33306533888.0,
+            "3090": 33306933248.0,
+            "3095": 33307906048.0,
+            "3100": 33306793984.0,
+            "3105": 33307127808.0,
+            "3110": 33308295168.0,
+            "3115": 33307295744.0,
+            "3120": 33307897856.0,
+            "3125": 33307066368.0,
+            "3130": 33307781120.0,
+            "3135": 33307762688.0,
+            "3140": 33308196864.0,
+            "3145": 33306904576.0,
+            "3150": 33307140096.0,
+            "3155": 33306660864.0,
+            "3160": 33307514880.0,
+            "3165": 33307246592.0,
+            "3170": 33307613184.0,
+            "3175": 33307375616.0,
+            "3180": 33307551744.0,
+            "3185": 33307842560.0,
+            "3190": 33308342272.0,
+            "3195": 33308350464.0,
+            "3200": 33307799552.0,
+            "3205": 33307099136.0,
+            "3210": 33306869760.0,
+            "3215": 33307678720.0,
+            "3220": 33307111424.0,
+            "3225": 33307146240.0,
+            "3230": 33306972160.0,
+            "3235": 33307387904.0,
+            "3240": 33307521024.0,
+            "3245": 33307287552.0,
+            "3250": 33307523072.0,
+            "3255": 33307639808.0,
+            "3260": 33307092992.0,
+            "3265": 33308338176.0,
+            "3270": 33307273216.0,
+            "3275": 33307713536.0,
+            "3280": 33307719680.0,
+            "3285": 33308049408.0,
+            "3290": 33307484160.0,
+            "3295": 33307594752.0,
+            "3300": 33307228160.0,
+            "3305": 33306580992.0,
+            "3310": 33307541504.0,
+            "3315": 33307211776.0,
+            "3320": 33307324416.0,
+            "3325": 33306615808.0,
+            "3330": 33307777024.0,
+            "3335": 33308135424.0,
+            "3340": 33307351040.0,
+            "3345": 33307131904.0,
+            "3350": 33307031552.0,
+            "3355": 33307791360.0,
+            "3360": 33307410432.0,
+            "3365": 33307090944.0,
+            "3370": 33306187776.0,
+            "3375": 33307113472.0,
+            "3380": 33308071936.0,
+            "3385": 33307717632.0,
+            "3390": 33306648576.0,
+            "3395": 33306781696.0,
+            "3400": 33307734016.0,
+            "3405": 33307570176.0,
+            "3410": 33307750400.0,
+            "3415": 33307920384.0,
+            "3420": 33308157952.0,
+            "3425": 33307500544.0,
+            "3430": 33307168768.0,
+            "3435": 33307645952.0,
+            "3440": 33307185152.0,
+            "3445": 33307459584.0,
+            "3450": 33306804224.0,
+            "3455": 33307662336.0,
+            "3460": 33306748928.0,
+            "3465": 33306497024.0,
+            "3470": 33306796032.0,
+            "3475": 33307947008.0,
+            "3480": 33308039168.0,
+            "3485": 33307676672.0,
+            "3490": 33306728448.0,
+            "3495": 33307115520.0,
+            "3500": 33306628096.0,
+            "3505": 33307537408.0,
+            "3510": 33306945536.0,
+            "3515": 33306902528.0,
+            "3520": 33307553792.0,
+            "3525": 33307590656.0,
+            "3530": 33307852800.0,
+            "3535": 33306773504.0,
+            "3540": 33307953152.0,
+            "3545": 33307463680.0,
+            "3550": 33307123712.0,
+            "3555": 33307738112.0,
+            "3560": 33307766784.0,
+            "3565": 33307088896.0,
+            "3570": 33306882048.0,
+            "3575": 33307443200.0,
+            "3580": 33306951680.0,
+            "3585": 33306841088.0,
+            "3590": 33308293120.0,
+            "3595": 33307723776.0,
+            "3600": 33307756544.0,
+            "3605": 33307930624.0,
+            "3610": 33307985920.0,
+            "3615": 33307222016.0,
+            "3620": 33307430912.0,
+            "3625": 33307148288.0,
+            "3630": 33306388480.0,
+            "3635": 33307035648.0,
+            "3640": 33307455488.0,
+            "3645": 33306906624.0,
+            "3650": 33307545600.0,
+            "3655": 33307336704.0,
+            "3660": 33306910720.0,
+            "3665": 33307623424.0,
+            "3670": 33306824704.0,
+            "3675": 33307590656.0,
+            "3680": 33307373568.0,
+            "3685": 33306505216.0,
+            "3690": 33307817984.0,
+            "3695": 33306890240.0,
+            "3700": 33306802176.0,
+            "3705": 33306945536.0,
+            "3710": 33306904576.0,
+            "3715": 33307754496.0,
+            "3720": 33308395520.0,
+            "3725": 33308112896.0,
+            "3730": 33307652096.0,
+            "3735": 33307867136.0,
+            "3740": 33307805696.0,
+            "3745": 33308069888.0,
+            "3750": 33307826176.0,
+            "3755": 33306439680.0,
+            "3760": 33306849280.0,
+            "3765": 33307471872.0,
+            "3770": 33307095040.0,
+            "3775": 33307492352.0,
+            "3780": 33308141568.0,
+            "3785": 33307910144.0,
+            "3790": 33307656192.0,
+            "3795": 33307727872.0,
+            "3800": 33307246592.0,
+            "3805": 33307848704.0,
+            "3810": 33307490304.0,
+            "3815": 33307357184.0,
+            "3820": 33307346944.0,
+            "3825": 33307619328.0,
+            "3830": 33308102656.0,
+            "3835": 33306849280.0,
+            "3840": 33307678720.0,
+            "3845": 33307258880.0,
+            "3850": 33307686912.0,
+            "3855": 33307467776.0,
+            "3860": 33307471872.0,
+            "3865": 33307439104.0,
+            "3870": 33307676672.0,
+            "3875": 33306865664.0,
+            "3880": 33307232256.0,
+            "3885": 33307099136.0,
+            "3890": 33307854848.0,
+            "3895": 33306370048.0,
+            "3900": 33306900480.0,
+            "3905": 33306824704.0,
+            "3910": 33307361280.0,
+            "3915": 33306591232.0,
+            "3920": 33307213824.0,
+            "3925": 33306980352.0,
+            "3930": 33308110848.0,
+            "3935": 33307179008.0,
+            "3940": 33307379712.0,
+            "3945": 33307813888.0,
+            "3950": 33307277312.0,
+            "3955": 33307203584.0,
+            "3960": 33307234304.0,
+            "3965": 33307121664.0,
+            "3970": 33307303936.0,
+            "3975": 33307144192.0,
+            "3980": 33307869184.0,
+            "3985": 33307660288.0,
+            "3990": 33307779072.0,
+            "3995": 33307795456.0,
+            "4000": 33307131904.0,
+            "4005": 33307238400.0,
+            "4010": 33307875328.0,
+            "4015": 33306726400.0,
+            "4020": 33308227584.0,
+            "4025": 33307799552.0,
+            "4030": 33307318272.0,
+            "4035": 33308190720.0,
+            "4040": 33307932672.0,
+            "4045": 33307291648.0,
+            "4050": 33307959296.0,
+            "4055": 33307447296.0,
+            "4060": 33307486208.0,
+            "4065": 33308088320.0,
+            "4070": 33307183104.0,
+            "4075": 33307201536.0,
+            "4080": 33308184576.0,
+            "4085": 33306406912.0,
+            "4090": 33307891712.0,
+            "4095": 33307031552.0,
+            "4100": 33308100608.0,
+            "4105": 33307258880.0,
+            "4110": 33307492352.0,
+            "4115": 33308344320.0,
+            "4120": 33306552320.0,
+            "4125": 33307611136.0,
+            "4130": 33306083328.0,
+            "4135": 33308463104.0,
+            "4140": 33307611136.0,
+            "4145": 33307455488.0,
+            "4150": 33307658240.0,
+            "4155": 33307133952.0,
+            "4160": 33308233728.0,
+            "4165": 33307408384.0,
+            "4170": 33306888192.0,
+            "4175": 33307852800.0,
+            "4180": 33307150336.0,
+            "4185": 33307127808.0,
+            "4190": 33307582464.0,
+            "4195": 33308610560.0,
+            "4200": 33308231680.0,
+            "4205": 33307906048.0,
+            "4210": 33308307456.0,
+            "4215": 33306363904.0,
+            "4220": 33306980352.0,
+            "4225": 33306318848.0,
+            "4230": 33307731968.0,
+            "4235": 33307142144.0,
+            "4240": 33307432960.0,
+            "4245": 33307097088.0,
+            "4250": 33307783168.0,
+            "4255": 33307365376.0,
+            "4260": 33306947584.0,
+            "4265": 33306611712.0,
+            "4270": 33306347520.0,
+            "4275": 33306624000.0,
+            "4280": 33307185152.0,
+            "4285": 33307922432.0,
+            "4290": 33307508736.0,
+            "4295": 33307658240.0,
+            "4300": 33308405760.0,
+            "4305": 33306474496.0,
+            "4310": 33307557888.0,
+            "4315": 33308307456.0,
+            "4320": 33307719680.0,
+            "4325": 33306824704.0,
+            "4330": 33307594752.0,
+            "4335": 33306144768.0,
+            "4340": 33307852800.0,
+            "4345": 33307342848.0,
+            "4350": 33308139520.0,
+            "4355": 33307713536.0,
+            "4360": 33307373568.0,
+            "4365": 33308065792.0,
+            "4370": 33306681344.0,
+            "4375": 33307770880.0,
+            "4380": 33307361280.0,
+            "4385": 33307086848.0,
+            "4390": 33307019264.0,
+            "4395": 33306986496.0,
+            "4400": 33307103232.0,
+            "4405": 33307664384.0,
+            "4410": 33307996160.0,
+            "4415": 33306990592.0,
+            "4420": 33306546176.0,
+            "4425": 33306904576.0,
+            "4430": 33307303936.0,
+            "4435": 33306763264.0,
+            "4440": 33308063744.0,
+            "4445": 33307242496.0,
+            "4450": 33307283456.0,
+            "4455": 33306654720.0,
+            "4460": 33307205632.0,
+            "4465": 33306867712.0,
+            "4470": 33307916288.0,
+            "4475": 33307791360.0,
+            "4480": 33308450816.0,
+            "4485": 33307547648.0,
+            "4490": 33307090944.0,
+            "4495": 33307000832.0,
+            "4500": 33306935296.0,
+            "4505": 33307099136.0,
+            "4510": 33307525120.0,
+            "4515": 33307367424.0,
+            "4520": 33307813888.0,
+            "4525": 33307715584.0,
+            "4530": 33307901952.0,
+            "4535": 33307174912.0,
+            "4540": 33306880000.0,
+            "4545": 33307138048.0,
+            "4550": 33306873856.0,
+            "4555": 33306316800.0,
+            "4560": 33305849856.0,
+            "4565": 33307187200.0,
+            "4570": 33307260928.0,
+            "4575": 33307410432.0,
+            "4580": 33307201536.0,
+            "4585": 33306920960.0,
+            "4590": 33307355136.0,
+            "4595": 33307346944.0,
+            "4600": 33307856896.0,
+            "4605": 33307752448.0,
+            "4610": 33307095040.0,
+            "4615": 33306286080.0,
+            "4620": 33306699776.0,
+            "4625": 33308069888.0,
+            "4630": 33307439104.0,
+            "4635": 33306900480.0,
+            "4640": 33307076608.0,
+            "4645": 33308160000.0,
+            "4650": 33307758592.0,
+            "4655": 33307865088.0,
+            "4660": 33306255360.0,
+            "4665": 33307641856.0,
+            "4670": 33307912192.0,
+            "4675": 33306603520.0,
+            "4680": 33307799552.0,
+            "4685": 33307488256.0,
+            "4690": 33307394048.0,
+            "4695": 33306763264.0,
+            "4700": 33307873280.0,
+            "4705": 33308106752.0,
+            "4710": 33307617280.0,
+            "4715": 33307047936.0,
+            "4720": 33307901952.0,
+            "4725": 33307793408.0,
+            "4730": 33308123136.0,
+            "4735": 33307451392.0,
+            "4740": 33307623424.0,
+            "4745": 33306857472.0,
+            "4750": 33308436480.0,
+            "4755": 33307260928.0,
+            "4760": 33307975680.0,
+            "4765": 33307965440.0,
+            "4770": 33306859520.0,
+            "4775": 33307922432.0,
+            "4780": 33306978304.0,
+            "4785": 33306869760.0,
+            "4790": 33307084800.0,
+            "4795": 33307226112.0,
+            "4800": 33307961344.0,
+            "4805": 33308334080.0,
+            "4810": 33305587712.0,
+            "4815": 33307928576.0,
+            "4820": 33307875328.0,
+            "4825": 33306957824.0,
+            "4830": 33307797504.0,
+            "4835": 33306116096.0,
+            "4840": 33307654144.0,
+            "4845": 33307131904.0,
+            "4850": 33308055552.0,
+            "4855": 33305792512.0,
+            "4860": 33307402240.0,
+            "4865": 33307086848.0,
+            "4870": 33307637760.0,
+            "4875": 33307789312.0,
+            "4880": 33307701248.0,
+            "4885": 33308010496.0,
+            "4890": 33307039744.0,
+            "4895": 33307369472.0,
+            "4900": 33307127808.0,
+            "4905": 33306988544.0,
+            "4910": 33308276736.0,
+            "4915": 33307090944.0,
+            "4920": 33307015168.0,
+            "4925": 33308043264.0,
+            "4930": 33307607040.0,
+            "4935": 33308209152.0,
+            "4940": 33307725824.0,
+            "4945": 33307985920.0,
+            "4950": 33307582464.0,
+            "4955": 33307297792.0,
+            "4960": 33307639808.0,
+            "4965": 33307445248.0,
+            "4970": 33306869760.0,
+            "4975": 33306787840.0,
+            "4980": 33307099136.0,
+            "4985": 33307635712.0,
+            "4990": 33307406336.0,
+            "4995": 33307471872.0,
+            "5000": 33307375616.0,
+            "5005": 33307672576.0,
+            "5010": 33306970112.0,
+            "5015": 33307244544.0,
+            "5020": 33306966016.0,
+            "5025": 33307705344.0,
+            "5030": 33307463680.0,
+            "5035": 33306818560.0,
+            "5040": 33306972160.0,
+            "5045": 33308157952.0,
+            "5050": 33306376192.0,
+            "5055": 33307594752.0,
+            "5060": 33308471296.0,
+            "5065": 33307455488.0,
+            "5070": 33307301888.0,
+            "5075": 33307488256.0,
+            "5080": 33307910144.0,
+            "5085": 33307635712.0,
+            "5090": 33307406336.0,
+            "5095": 33307254784.0,
+            "5100": 33306828800.0,
+            "5105": 33307852800.0,
+            "5110": 33308258304.0,
+            "5115": 33307228160.0,
+            "5120": 33307955200.0,
+            "5125": 33305640960.0,
+            "5130": 33306683392.0,
+            "5135": 33307336704.0,
+            "5140": 33307834368.0,
+            "5145": 33307060224.0,
+            "5150": 33307023360.0,
+            "5155": 33307308032.0,
+            "5160": 33306664960.0,
+            "5165": 33307123712.0,
+            "5170": 33306935296.0,
+            "5175": 33308094464.0,
+            "5180": 33306566656.0,
+            "5185": 33306796032.0,
+            "5190": 33307545600.0,
+            "5195": 33308067840.0,
+            "5200": 33307754496.0,
+            "5205": 33307445248.0,
+            "5210": 33306785792.0,
+            "5215": 33307551744.0,
+            "5220": 33308188672.0,
+            "5225": 33307338752.0,
+            "5230": 33307283456.0,
+            "5235": 33306976256.0,
+            "5240": 33308041216.0,
+            "5245": 33308340224.0,
+            "5250": 33308153856.0,
+            "5255": 33307590656.0,
+            "5260": 33306896384.0,
+            "5265": 33308303360.0,
+            "5270": 33308796928.0,
+            "5275": 33307949056.0,
+            "5280": 33306157056.0,
+            "5285": 33307904000.0,
+            "5290": 33308143616.0,
+            "5295": 33306533888.0,
+            "5300": 33307912192.0,
+            "5305": 33308338176.0,
+            "5310": 33308688384.0,
+            "5315": 33308045312.0,
+            "5320": 33306206208.0,
+            "5325": 33308219392.0,
+            "5330": 33308012544.0,
+            "5335": 33307602944.0,
+            "5340": 33306685440.0,
+            "5345": 33308209152.0,
+            "5350": 33307150336.0,
+            "5355": 33308176384.0,
+            "5360": 33307273216.0,
+            "5365": 33307850752.0,
+            "5370": 33307222016.0,
+            "5375": 33307803648.0,
+            "5380": 33307617280.0,
+            "5385": 33307179008.0,
+            "5390": 33307389952.0,
+            "5395": 33306927104.0,
+            "5400": 33307518976.0,
+            "5405": 33307400192.0,
+            "5410": 33307598848.0,
+            "5415": 33307846656.0,
+            "5420": 33307490304.0,
+            "5425": 33307459584.0,
+            "5430": 33307283456.0,
+            "5435": 33307453440.0,
+            "5440": 33307383808.0,
+            "5445": 33307117568.0,
+            "5450": 33307832320.0,
+            "5455": 33307582464.0,
+            "5460": 33306963968.0,
+            "5465": 33306947584.0,
+            "5470": 33307355136.0,
+            "5475": 33306748928.0,
+            "5480": 33306435584.0,
+            "5485": 33307590656.0,
+            "5490": 33307787264.0,
+            "5495": 33307568128.0,
+            "5500": 33307351040.0,
+            "5505": 33307568128.0,
+            "5510": 33307426816.0,
+            "5515": 33307451392.0,
+            "5520": 33307549696.0,
+            "5525": 33307000832.0,
+            "5530": 33307566080.0,
+            "5535": 33307664384.0,
+            "5540": 33306966016.0,
+            "5545": 33307781120.0,
+            "5550": 33307275264.0,
+            "5555": 33307269120.0,
+            "5560": 33307576320.0,
+            "5565": 33307377664.0,
+            "5570": 33307052032.0,
+            "5575": 33306978304.0,
+            "5580": 33307965440.0,
+            "5585": 33307494400.0,
+            "5590": 33308055552.0,
+            "5595": 33306943488.0,
+            "5600": 33306542080.0,
+            "5605": 33307680768.0,
+            "5610": 33308542976.0,
+            "5615": 33307826176.0,
+            "5620": 33308108800.0,
+            "5625": 33308225536.0,
+            "5630": 33308069888.0,
+            "5635": 33307760640.0,
+            "5640": 33307500544.0,
+            "5645": 33307930624.0,
+            "5650": 33306755072.0,
+            "5655": 33308192768.0,
+            "5660": 33308631040.0,
+            "5665": 33307418624.0,
+            "5670": 33307504640.0,
+            "5675": 33307715584.0,
+            "5680": 33307910144.0,
+            "5685": 33307996160.0,
+            "5690": 33307478016.0,
+            "5695": 33308164096.0,
+            "5700": 33307906048.0,
+            "5705": 33307750400.0,
+            "5710": 33306779648.0,
+            "5715": 33307219968.0,
+            "5720": 33307750400.0,
+            "5725": 33307537408.0,
+            "5730": 33307262976.0,
+            "5735": 33306767360.0,
+            "5740": 33307508736.0,
+            "5745": 33306753024.0,
+            "5750": 33306636288.0,
+            "5755": 33306943488.0,
+            "5760": 33307553792.0,
+            "5765": 33307842560.0,
+            "5770": 33307047936.0,
+            "5775": 33307348992.0,
+            "5780": 33306361856.0,
+            "5785": 33307709440.0,
+            "5790": 33307832320.0,
+            "5795": 33307406336.0,
+            "5800": 33307056128.0,
+            "5805": 33307631616.0,
+            "5810": 33307766784.0,
+            "5815": 33307971584.0,
+            "5820": 33307447296.0,
+            "5825": 33307084800.0,
+            "5830": 33307324416.0,
+            "5835": 33307127808.0,
+            "5840": 33307729920.0,
+            "5845": 33307088896.0,
+            "5850": 33307635712.0,
+            "5855": 33307119616.0,
+            "5860": 33306703872.0,
+            "5865": 33307291648.0,
+            "5870": 33307613184.0,
+            "5875": 33307893760.0,
+            "5880": 33307893760.0,
+            "5885": 33307301888.0,
+            "5890": 33307830272.0,
+            "5895": 33306671104.0,
+            "5900": 33306488832.0,
+            "5905": 33308141568.0,
+            "5910": 33307373568.0,
+            "5915": 33307330560.0,
+            "5920": 33307656192.0,
+            "5925": 33307533312.0,
+            "5930": 33307848704.0,
+            "5935": 33307586560.0,
+            "5940": 33307602944.0,
+            "5945": 33307631616.0,
+            "5950": 33306615808.0,
+            "5955": 33307719680.0,
+            "5960": 33308553216.0,
+            "5965": 33308676096.0,
+            "5970": 33308313600.0,
+            "5975": 33306810368.0,
+            "5980": 33307222016.0,
+            "5985": 33307367424.0,
+            "5990": 33307119616.0,
+            "5995": 33307166720.0,
+            "6000": 33307822080.0,
+            "6005": 33307553792.0,
+            "6010": 33307756544.0,
+            "6015": 33306392576.0,
+            "6020": 33308116992.0,
+            "6025": 33307738112.0,
+            "6030": 33307459584.0,
+            "6035": 33306920960.0,
+            "6040": 33307701248.0,
+            "6045": 33307932672.0,
+            "6050": 33307496448.0,
+            "6055": 33307133952.0,
+            "6060": 33306370048.0,
+            "6065": 33307521024.0,
+            "6070": 33307244544.0,
+            "6075": 33306447872.0,
+            "6080": 33306963968.0,
+            "6085": 33307932672.0,
+            "6090": 33307293696.0,
+            "6095": 33307058176.0,
+            "6100": 33307449344.0,
+            "6105": 33307613184.0,
+            "6110": 33307779072.0,
+            "6115": 33306832896.0,
+            "6120": 33306732544.0,
+            "6125": 33306488832.0,
+            "6130": 33308866560.0,
+            "6135": 33308000256.0,
+            "6140": 33307906048.0,
+            "6145": 33308504064.0,
+            "6150": 33307826176.0,
+            "6155": 33306906624.0,
+            "6160": 33307533312.0,
+            "6165": 33307578368.0,
+            "6170": 33307891712.0,
+            "6175": 33307537408.0,
+            "6180": 33307803648.0,
+            "6185": 33308125184.0,
+            "6190": 33307342848.0,
+            "6195": 33308135424.0,
+            "6200": 33306468352.0,
+            "6205": 33308026880.0,
+            "6210": 33308028928.0,
+            "6215": 33308157952.0,
+            "6220": 33307662336.0,
+            "6225": 33307344896.0,
+            "6230": 33308231680.0,
+            "6235": 33307148288.0,
+            "6240": 33308809216.0,
+            "6245": 33307017216.0,
+            "6250": 33307234304.0,
+            "6255": 33308430336.0,
+            "6260": 33307246592.0,
+            "6265": 33307418624.0,
+            "6270": 33308319744.0,
+            "6275": 33307090944.0,
+            "6280": 33307404288.0,
+            "6285": 33308227584.0,
+            "6290": 33307656192.0,
+            "6295": 33306865664.0,
+            "6300": 33307596800.0,
+            "6305": 33308192768.0,
+            "6310": 33307695104.0,
+            "6315": 33307361280.0,
+            "6320": 33306775552.0,
+            "6325": 33307557888.0,
+            "6330": 33307639808.0,
+            "6335": 33307820032.0,
+            "6340": 33307410432.0,
+            "6345": 33307410432.0,
+            "6350": 33308256256.0,
+            "6355": 33307082752.0,
+            "6360": 33306855424.0,
+            "6365": 33307418624.0,
+            "6370": 33307066368.0,
+            "6375": 33307891712.0,
+            "6380": 33307779072.0,
+            "6385": 33306128384.0,
+            "6390": 33306884096.0,
+            "6395": 33307060224.0,
+            "6400": 33307250688.0,
+            "6405": 33308135424.0,
+            "6410": 33308155904.0,
+            "6415": 33307101184.0,
+            "6420": 33306318848.0,
+            "6425": 33308065792.0,
+            "6430": 33307813888.0,
+            "6435": 33307842560.0,
+            "6440": 33308571648.0,
+            "6445": 33306138624.0,
+            "6450": 33307762688.0,
+            "6455": 33308119040.0,
+            "6460": 33308037120.0,
+            "6465": 33308467200.0,
+            "6470": 33307181056.0,
+            "6475": 33307246592.0,
+            "6480": 33306855424.0,
+            "6485": 33308440576.0,
+            "6490": 33307863040.0,
+            "6495": 33306857472.0,
+            "6500": 33306529792.0,
+            "6505": 33307097088.0,
+            "6510": 33307842560.0,
+            "6515": 33307095040.0,
+            "6520": 33307848704.0,
+            "6525": 33307596800.0,
+            "6530": 33307117568.0,
+            "6535": 33307811840.0,
+            "6540": 33307645952.0,
+            "6545": 33307211776.0,
+            "6550": 33308196864.0,
+            "6555": 33307213824.0,
+            "6560": 33307326464.0,
+            "6565": 33306490880.0,
+            "6570": 33306877952.0,
+            "6575": 33307199488.0,
+            "6580": 33308370944.0,
+            "6585": 33307828224.0,
+            "6590": 33307871232.0,
+            "6595": 33307590656.0,
+            "6600": 33306578944.0,
+            "6605": 33307496448.0,
+            "6610": 33307912192.0,
+            "6615": 33307521024.0,
+            "6620": 33307189248.0,
+            "6625": 33306961920.0,
+            "6630": 33306800128.0,
+            "6635": 33306957824.0,
+            "6640": 33307762688.0,
+            "6645": 33306427392.0,
+            "6650": 33307672576.0,
+            "6655": 33305133056.0,
+            "6660": 33307598848.0,
+            "6665": 33306884096.0,
+            "6670": 33307500544.0,
+            "6675": 33307592704.0,
+            "6680": 33306923008.0,
+            "6685": 33307084800.0,
+            "6690": 33307402240.0,
+            "6695": 33307963392.0,
+            "6700": 33307336704.0,
+            "6705": 33306845184.0,
+            "6710": 33307230208.0,
+            "6715": 33306310656.0,
+            "6720": 33307834368.0,
+            "6725": 33308094464.0,
+            "6730": 33308327936.0,
+            "6735": 33308092416.0,
+            "6740": 33306873856.0,
+            "6745": 33308082176.0,
+            "6750": 33306112000.0,
+            "6755": 33306810368.0,
+            "6760": 33307394048.0,
+            "6765": 33307414528.0,
+            "6770": 33308286976.0,
+            "6775": 33308618752.0,
+            "6780": 33306904576.0,
+            "6785": 33308182528.0,
+            "6790": 33308057600.0,
+            "6795": 33307049984.0,
+            "6800": 33306744832.0,
+            "6805": 33307242496.0,
+            "6810": 33307176960.0,
+            "6815": 33307779072.0,
+            "6820": 33306849280.0,
+            "6825": 33307623424.0,
+            "6830": 33307887616.0,
+            "6835": 33307670528.0,
+            "6840": 33308348416.0,
+            "6845": 33308184576.0,
+            "6850": 33307727872.0,
+            "6855": 33307252736.0,
+            "6860": 33307680768.0,
+            "6865": 33306963968.0,
+            "6870": 33307099136.0,
+            "6875": 33307037696.0,
+            "6880": 33307635712.0,
+            "6885": 33307615232.0,
+            "6890": 33307652096.0,
+            "6895": 33307369472.0,
+            "6900": 33307947008.0,
+            "6905": 33307334656.0,
+            "6910": 33306824704.0,
+            "6915": 33307537408.0,
+            "6920": 33306619904.0,
+            "6925": 33306408960.0,
+            "6930": 33306765312.0,
+            "6935": 33306609664.0,
+            "6940": 33307623424.0,
+            "6945": 33307160576.0,
+            "6950": 33307463680.0,
+            "6955": 33306507264.0,
+            "6960": 33307185152.0,
+            "6965": 33307019264.0,
+            "6970": 33307598848.0,
+            "6975": 33307435008.0,
+            "6980": 33307238400.0,
+            "6985": 33306222592.0,
+            "6990": 33308581888.0,
+            "6995": 33307254784.0,
+            "7000": 33308035072.0,
+            "7005": 33308233728.0,
+            "7010": 33307092992.0,
+            "7015": 33307193344.0,
+            "7020": 33307643904.0,
+            "7025": 33308274688.0,
+            "7030": 33307019264.0,
+            "7035": 33308454912.0,
+            "7040": 33308086272.0,
+            "7045": 33307277312.0,
+            "7050": 33307172864.0,
+            "7055": 33306599424.0,
+            "7060": 33307613184.0,
+            "7065": 33307031552.0,
+            "7070": 33306243072.0,
+            "7075": 33308037120.0,
+            "7080": 33306759168.0,
+            "7085": 33308033024.0,
+            "7090": 33307971584.0,
+            "7095": 33306873856.0,
+            "7100": 33308522496.0,
+            "7105": 33307363328.0,
+            "7110": 33308063744.0,
+            "7115": 33307770880.0,
+            "7120": 33307906048.0,
+            "7125": 33307443200.0,
+            "7130": 33307574272.0,
+            "7135": 33307541504.0,
+            "7140": 33306765312.0,
+            "7145": 33307854848.0,
+            "7150": 33306853376.0,
+            "7155": 33307856896.0,
+            "7160": 33307906048.0,
+            "7165": 33308184576.0,
+            "7170": 33308272640.0,
+            "7175": 33306417152.0,
+            "7180": 33307107328.0,
+            "7185": 33307860992.0,
+            "7190": 33307078656.0,
+            "7195": 33307494400.0,
+            "7200": 33307613184.0,
+            "7205": 33307680768.0,
+            "7210": 33307990016.0,
+            "7215": 33306822656.0,
+            "7220": 33306730496.0,
+            "7225": 33307539456.0,
+            "7230": 33307744256.0,
+            "7235": 33306136576.0,
+            "7240": 33307189248.0,
+            "7245": 33307236352.0,
+            "7250": 33306980352.0,
+            "7255": 33307832320.0,
+            "7260": 33307426816.0,
+            "7265": 33307340800.0,
+            "7270": 33307844608.0,
+            "7275": 33308094464.0,
+            "7280": 33308602368.0,
+            "7285": 33307498496.0,
+            "7290": 33307920384.0,
+            "7295": 33307426816.0,
+            "7300": 33306392576.0,
+            "7305": 33306718208.0,
+            "7310": 33307260928.0,
+            "7315": 33307527168.0,
+            "7320": 33306963968.0,
+            "7325": 33308188672.0,
+            "7330": 33307799552.0,
+            "7335": 33307717632.0,
+            "7340": 33307238400.0,
+            "7345": 33307365376.0,
+            "7350": 33307314176.0,
+            "7355": 33307940864.0,
+            "7360": 33306284032.0,
+            "7365": 33307893760.0,
+            "7370": 33306275840.0,
+            "7375": 33307873280.0,
+            "7380": 33309245440.0,
+            "7385": 33306730496.0,
+            "7390": 33307758592.0,
+            "7395": 33306609664.0,
+            "7400": 33307652096.0,
+            "7405": 33306427392.0,
+            "7410": 33308524544.0,
+            "7415": 33307961344.0,
+            "7420": 33307242496.0,
+            "7425": 33307811840.0,
+            "7430": 33307119616.0,
+            "7435": 33307428864.0,
+            "7440": 33307709440.0,
+            "7445": 33308342272.0,
+            "7450": 33306980352.0,
+            "7455": 33307351040.0,
+            "7460": 33306730496.0,
+            "7465": 33306537984.0,
+            "7470": 33307664384.0,
+            "7475": 33308037120.0,
+            "7480": 33307179008.0,
+            "7485": 33308467200.0,
+            "7490": 33307822080.0,
+            "7495": 33306638336.0,
+            "7500": 33306689536.0,
+            "7505": 33307717632.0,
+            "7510": 33306789888.0,
+            "7515": 33307518976.0,
+            "7520": 33307260928.0,
+            "7525": 33307676672.0,
+            "7530": 33306916864.0,
+            "7535": 33306996736.0,
+            "7540": 33306566656.0,
+            "7545": 33306720256.0,
+            "7550": 33307584512.0,
+            "7555": 33307471872.0,
+            "7560": 33306736640.0,
+            "7565": 33306292224.0,
+            "7570": 33307066368.0,
+            "7575": 33306871808.0,
+            "7580": 33307324416.0,
+            "7585": 33307115520.0,
+            "7590": 33306341376.0,
+            "7595": 33307744256.0,
+            "7600": 33307482112.0,
+            "7605": 33308149760.0,
+            "7610": 33307525120.0,
+            "7615": 33307656192.0,
+            "7620": 33307224064.0,
+            "7625": 33307158528.0,
+            "7630": 33307742208.0,
+            "7635": 33308012544.0,
+            "7640": 33307049984.0,
+            "7645": 33308631040.0,
+            "7650": 33307865088.0,
+            "7655": 33308229632.0,
+            "7660": 33307043840.0,
+            "7665": 33307037696.0,
+            "7670": 33306791936.0,
+            "7675": 33307320320.0,
+            "7680": 33307293696.0,
+            "7685": 33307432960.0,
+            "7690": 33307103232.0,
+            "7695": 33307568128.0,
+            "7700": 33306312704.0,
+            "7705": 33307795456.0,
+            "7710": 33307996160.0,
+            "7715": 33307133952.0,
+            "7720": 33308164096.0,
+            "7725": 33307254784.0,
+            "7730": 33307830272.0,
+            "7735": 33307721728.0,
+            "7740": 33307492352.0,
+            "7745": 33307783168.0,
+            "7750": 33306728448.0,
+            "7755": 33307734016.0,
+            "7760": 33308614656.0,
+            "7765": 33306791936.0,
+            "7770": 33308278784.0,
+            "7775": 33307873280.0,
+            "7780": 33307078656.0,
+            "7785": 33306990592.0,
+            "7790": 33307062272.0,
+            "7795": 33307680768.0,
+            "7800": 33306982400.0,
+            "7805": 33308090368.0,
+            "7810": 33307308032.0,
+            "7815": 33307078656.0,
+            "7820": 33307951104.0,
+            "7825": 33306480640.0,
+            "7830": 33307258880.0,
+            "7835": 33307891712.0,
+            "7840": 33307432960.0,
+            "7845": 33307066368.0,
+            "7850": 33306910720.0,
+            "7855": 33307938816.0,
+            "7860": 33307308032.0,
+            "7865": 33308264448.0,
+            "7870": 33307729920.0,
+            "7875": 33308129280.0,
+            "7880": 33308352512.0,
+            "7885": 33307398144.0,
+            "7890": 33306920960.0,
+            "7895": 33307156480.0,
+            "7900": 33308221440.0,
+            "7905": 33308047360.0,
+            "7910": 33306146816.0,
+            "7915": 33306910720.0,
+            "7920": 33307090944.0,
+            "7925": 33308264448.0,
+            "7930": 33307908096.0,
+            "7935": 33307465728.0,
+            "7940": 33307375616.0,
+            "7945": 33307848704.0,
+            "7950": 33308090368.0,
+            "7955": 33307043840.0,
+            "7960": 33307168768.0,
+            "7965": 33307846656.0,
+            "7970": 33306454016.0,
+            "7975": 33307635712.0,
+            "7980": 33307555840.0,
+            "7985": 33307131904.0,
+            "7990": 33306732544.0,
+            "7995": 33307430912.0,
+            "8000": 33307674624.0,
+            "8005": 33307746304.0,
+            "8010": 33308002304.0,
+            "8015": 33306906624.0,
+            "8020": 33307895808.0,
+            "8025": 33308231680.0,
+            "8030": 33307664384.0,
+            "8035": 33306888192.0,
+            "8040": 33308024832.0,
+            "8045": 33307693056.0,
+            "8050": 33306583040.0,
+            "8055": 33307201536.0,
+            "8060": 33307594752.0,
+            "8065": 33308260352.0,
+            "8070": 33307426816.0,
+            "8075": 33308108800.0,
+            "8080": 33308178432.0,
+            "8085": 33307308032.0,
+            "8090": 33306513408.0,
+            "8095": 33306968064.0,
+            "8100": 33308413952.0,
+            "8105": 33308241920.0,
+            "8110": 33307471872.0,
+            "8115": 33307832320.0,
+            "8120": 33307193344.0,
+            "8125": 33307295744.0,
+            "8130": 33306775552.0,
+            "8135": 33307097088.0,
+            "8140": 33307865088.0,
+            "8145": 33306746880.0,
+            "8150": 33307023360.0,
+            "8155": 33306806272.0,
+            "8160": 33307373568.0,
+            "8165": 33307631616.0,
+            "8170": 33306769408.0,
+            "8175": 33308239872.0,
+            "8180": 33307240448.0,
+            "8185": 33307471872.0,
+            "8190": 33308184576.0,
+            "8195": 33307754496.0,
+            "8200": 33307459584.0,
+            "8205": 33307850752.0,
+            "8210": 33306810368.0,
+            "8215": 33306222592.0,
+            "8220": 33307795456.0,
+            "8225": 33308078080.0,
+            "8230": 33306132480.0,
+            "8235": 33308764160.0,
+            "8240": 33307432960.0,
+            "8245": 33307867136.0,
+            "8250": 33308260352.0,
+            "8255": 33308334080.0,
+            "8260": 33308233728.0,
+            "8265": 33308528640.0,
+            "8270": 33307699200.0,
+            "8275": 33306748928.0,
+            "8280": 33307635712.0,
+            "8285": 33308008448.0,
+            "8290": 33307590656.0,
+            "8295": 33308041216.0,
+            "8300": 33307516928.0,
+            "8305": 33307879424.0,
+            "8310": 33307576320.0,
+            "8315": 33308366848.0,
+            "8320": 33307496448.0,
+            "8325": 33307256832.0,
+            "8330": 33307680768.0,
+            "8335": 33306669056.0,
+            "8340": 33306990592.0,
+            "8345": 33307936768.0,
+            "8350": 33307955200.0,
+            "8355": 33307791360.0,
+            "8360": 33306640384.0,
+            "8365": 33307586560.0,
+            "8370": 33307648000.0,
+            "8375": 33306890240.0,
+            "8380": 33307764736.0,
+            "8385": 33307871232.0,
+            "8390": 33307023360.0,
+            "8395": 33307664384.0,
+            "8400": 33307510784.0,
+            "8405": 33307338752.0,
+            "8410": 33307316224.0,
+            "8415": 33307566080.0,
+            "8420": 33307891712.0,
+            "8425": 33307676672.0,
+            "8430": 33307693056.0,
+            "8435": 33306812416.0,
+            "8440": 33307762688.0,
+            "8445": 33307447296.0,
+            "8450": 33307426816.0,
+            "8455": 33306660864.0,
+            "8460": 33307385856.0,
+            "8465": 33308121088.0,
+            "8470": 33307664384.0,
+            "8475": 33307023360.0,
+            "8480": 33308082176.0,
+            "8485": 33307346944.0,
+            "8490": 33307471872.0,
+            "8495": 33307889664.0,
+            "8500": 33307492352.0,
+            "8505": 33307502592.0,
+            "8510": 33307815936.0,
+            "8515": 33307983872.0,
+            "8520": 33306431488.0,
+            "8525": 33306537984.0,
+            "8530": 33307199488.0,
+            "8535": 33307848704.0,
+            "8540": 33307459584.0,
+            "8545": 33307432960.0,
+            "8550": 33307600896.0,
+            "8555": 33308553216.0,
+            "8560": 33307701248.0,
+            "8565": 33307799552.0,
+            "8570": 33307934720.0,
+            "8575": 33306324992.0,
+            "8580": 33307648000.0,
+            "8585": 33307951104.0,
+            "8590": 33308108800.0,
+            "8595": 33308037120.0,
+            "8600": 33308182528.0,
+            "8605": 33307410432.0,
+            "8610": 33308102656.0,
+            "8615": 33307342848.0,
+            "8620": 33306077184.0,
+            "8625": 33308153856.0,
+            "8630": 33307807744.0,
+            "8635": 33306734592.0,
+            "8640": 33307867136.0,
+            "8645": 33307129856.0,
+            "8650": 33307430912.0,
+            "8655": 33307545600.0,
+            "8660": 33307975680.0,
+            "8665": 33307822080.0,
+            "8670": 33307156480.0,
+            "8675": 33307758592.0,
+            "8680": 33308340224.0,
+            "8685": 33307357184.0,
+            "8690": 33308479488.0,
+            "8695": 33306523648.0,
+            "8700": 33307404288.0,
+            "8705": 33307791360.0,
+            "8710": 33308004352.0,
+            "8715": 33308108800.0,
+            "8720": 33307424768.0,
+            "8725": 33307564032.0,
+            "8730": 33306877952.0,
+            "8735": 33307199488.0,
+            "8740": 33307734016.0,
+            "8745": 33307248640.0,
+            "8750": 33307912192.0,
+            "8755": 33307215872.0,
+            "8760": 33308012544.0,
+            "8765": 33306640384.0,
+            "8770": 33307977728.0,
+            "8775": 33306624000.0,
+            "8780": 33307357184.0,
+            "8785": 33306353664.0,
+            "8790": 33307518976.0,
+            "8795": 33308178432.0,
+            "8800": 33307113472.0,
+            "8805": 33307045888.0,
+            "8810": 33307252736.0,
+            "8815": 33307430912.0,
+            "8820": 33307568128.0,
+            "8825": 33306791936.0,
+            "8830": 33307529216.0,
+            "8835": 33306691584.0,
+            "8840": 33306529792.0,
+            "8845": 33307303936.0,
+            "8850": 33307901952.0,
+            "8855": 33308196864.0,
+            "8860": 33307965440.0,
+            "8865": 33307971584.0,
+            "8870": 33306595328.0,
+            "8875": 33306419200.0,
+            "8880": 33307508736.0,
+            "8885": 33306345472.0,
+            "8890": 33307373568.0,
+            "8895": 33307631616.0,
+            "8900": 33307330560.0,
+            "8905": 33308209152.0,
+            "8910": 33308155904.0,
+            "8915": 33306943488.0,
+            "8920": 33307381760.0,
+            "8925": 33307437056.0,
+            "8930": 33308041216.0,
+            "8935": 33307142144.0,
+            "8940": 33307768832.0,
+            "8945": 33308551168.0,
+            "8950": 33307682816.0,
+            "8955": 33307656192.0,
+            "8960": 33307787264.0,
+            "8965": 33306220544.0,
+            "8970": 33307693056.0,
+            "8975": 33307529216.0,
+            "8980": 33307027456.0,
+            "8985": 33308442624.0,
+            "8990": 33307588608.0,
+            "8995": 33308315648.0,
+            "9000": 33307787264.0,
+            "9005": 33307951104.0,
+            "9010": 33305649152.0,
+            "9015": 33307592704.0,
+            "9020": 33307033600.0,
+            "9025": 33307232256.0,
+            "9030": 33307793408.0,
+            "9035": 33307385856.0,
+            "9040": 33308012544.0,
+            "9045": 33307287552.0,
+            "9050": 33307701248.0,
+            "9055": 33306814464.0,
+            "9060": 33307975680.0,
+            "9065": 33307693056.0,
+            "9070": 33306888192.0,
+            "9075": 33307168768.0,
+            "9080": 33306818560.0,
+            "9085": 33307557888.0,
+            "9090": 33308200960.0,
+            "9095": 33306867712.0,
+            "9100": 33308563456.0,
+            "9105": 33306994688.0,
+            "9110": 33307004928.0,
+            "9115": 33307439104.0,
+            "9120": 33307340800.0,
+            "9125": 33307295744.0,
+            "9130": 33306771456.0,
+            "9135": 33307031552.0,
+            "9140": 33306497024.0,
+            "9145": 33307629568.0,
+            "9150": 33308002304.0,
+            "9155": 33307484160.0,
+            "9160": 33308100608.0,
+            "9165": 33307611136.0,
+            "9170": 33307897856.0,
+            "9175": 33307473920.0,
+            "9180": 33307977728.0,
+            "9185": 33307203584.0,
+            "9190": 33306693632.0,
+            "9195": 33306931200.0,
+            "9200": 33307779072.0,
+            "9205": 33307205632.0,
+            "9210": 33307637760.0,
+            "9215": 33307090944.0,
+            "9220": 33308454912.0,
+            "9225": 33307471872.0,
+            "9230": 33307322368.0,
+            "9235": 33307422720.0,
+            "9240": 33307242496.0,
+            "9245": 33308026880.0,
+            "9250": 33308203008.0,
+            "9255": 33307389952.0,
+            "9260": 33308825600.0,
+            "9265": 33306505216.0,
+            "9270": 33307426816.0,
+            "9275": 33307865088.0,
+            "9280": 33307435008.0,
+            "9285": 33307258880.0,
+            "9290": 33308000256.0,
+            "9295": 33307498496.0,
+            "9300": 33307301888.0,
+            "9305": 33307674624.0,
+            "9310": 33307031552.0,
+            "9315": 33306327040.0,
+            "9320": 33306834944.0,
+            "9325": 33307971584.0,
+            "9330": 33307910144.0,
+            "9335": 33307213824.0,
+            "9340": 33307385856.0,
+            "9345": 33307385856.0,
+            "9350": 33308127232.0,
+            "9355": 33306615808.0,
+            "9360": 33306697728.0,
+            "9365": 33307463680.0,
+            "9370": 33306355712.0,
+            "9375": 33307219968.0,
+            "9380": 33307224064.0,
+            "9385": 33308024832.0,
+            "9390": 33307830272.0,
+            "9395": 33307535360.0,
+            "9400": 33307031552.0,
+            "9405": 33307418624.0,
+            "9410": 33306822656.0,
+            "9415": 33307267072.0,
+            "9420": 33306994688.0,
+            "9425": 33306892288.0,
+            "9430": 33307199488.0,
+            "9435": 33306980352.0,
+            "9440": 33306451968.0,
+            "9445": 33308420096.0,
+            "9450": 33306755072.0,
+            "9455": 33306341376.0,
+            "9460": 33308131328.0,
+            "9465": 33307023360.0,
+            "9470": 33308307456.0,
+            "9475": 33308221440.0,
+            "9480": 33308037120.0,
+            "9485": 33308055552.0,
+            "9490": 33307908096.0,
+            "9495": 33306486784.0,
+            "9500": 33306490880.0,
+            "9505": 33307967488.0,
+            "9510": 33307125760.0,
+            "9515": 33307242496.0,
+            "9520": 33307670528.0,
+            "9525": 33307496448.0,
+            "9530": 33307731968.0,
+            "9535": 33307435008.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 9535,
+        "step_interval": 5,
+        "values": {
+            "1": 36905754624.0,
+            "5": 45014786048.0,
+            "10": 45173362688.0,
+            "15": 45173362688.0,
+            "20": 45251878912.0,
+            "25": 45286207488.0,
+            "30": 45286207488.0,
+            "35": 45288939520.0,
+            "40": 45288939520.0,
+            "45": 45288939520.0,
+            "50": 45288939520.0,
+            "55": 45288939520.0,
+            "60": 45288939520.0,
+            "65": 45288939520.0,
+            "70": 45288939520.0,
+            "75": 45288939520.0,
+            "80": 45288939520.0,
+            "85": 45288939520.0,
+            "90": 45288939520.0,
+            "95": 45288939520.0,
+            "100": 45288939520.0,
+            "105": 45288939520.0,
+            "110": 45299392512.0,
+            "115": 45314936832.0,
+            "120": 45378736128.0,
+            "125": 45428596736.0,
+            "130": 45428596736.0,
+            "135": 45445640192.0,
+            "140": 45445640192.0,
+            "145": 45445640192.0,
+            "150": 45445640192.0,
+            "155": 45445640192.0,
+            "160": 45445640192.0,
+            "165": 45445640192.0,
+            "170": 45445640192.0,
+            "175": 45445640192.0,
+            "180": 45445640192.0,
+            "185": 45445640192.0,
+            "190": 45445640192.0,
+            "195": 45445640192.0,
+            "200": 45536641024.0,
+            "205": 45638885376.0,
+            "210": 45638885376.0,
+            "215": 45638885376.0,
+            "220": 45638885376.0,
+            "225": 45638885376.0,
+            "230": 45638885376.0,
+            "235": 45713887232.0,
+            "240": 45932376064.0,
+            "245": 45982269440.0,
+            "250": 45982269440.0,
+            "255": 45982269440.0,
+            "260": 46039670784.0,
+            "265": 46039670784.0,
+            "270": 46039670784.0,
+            "275": 46039670784.0,
+            "280": 46293884928.0,
+            "285": 46293884928.0,
+            "290": 46293884928.0,
+            "295": 46293884928.0,
+            "300": 46293884928.0,
+            "305": 46319267840.0,
+            "310": 46319267840.0,
+            "315": 46319267840.0,
+            "320": 46319267840.0,
+            "325": 46319267840.0,
+            "330": 46319267840.0,
+            "335": 46319267840.0,
+            "340": 46319267840.0,
+            "345": 46451261440.0,
+            "350": 46451261440.0,
+            "355": 46451261440.0,
+            "360": 46451261440.0,
+            "365": 46451261440.0,
+            "370": 46451261440.0,
+            "375": 46451261440.0,
+            "380": 46451261440.0,
+            "385": 46451261440.0,
+            "390": 46451261440.0,
+            "395": 46451261440.0,
+            "400": 46451261440.0,
+            "405": 46451261440.0,
+            "410": 46451261440.0,
+            "415": 46451261440.0,
+            "420": 46451261440.0,
+            "425": 46451261440.0,
+            "430": 46451261440.0,
+            "435": 46451261440.0,
+            "440": 46451261440.0,
+            "445": 46451261440.0,
+            "450": 46451261440.0,
+            "455": 46451261440.0,
+            "460": 46451261440.0,
+            "465": 46451261440.0,
+            "470": 46451261440.0,
+            "475": 46451261440.0,
+            "480": 46451261440.0,
+            "485": 46451261440.0,
+            "490": 46451261440.0,
+            "495": 46451261440.0,
+            "500": 46451261440.0,
+            "505": 46451261440.0,
+            "510": 46451261440.0,
+            "515": 46451261440.0,
+            "520": 46451261440.0,
+            "525": 46451261440.0,
+            "530": 46451261440.0,
+            "535": 46451261440.0,
+            "540": 46451261440.0,
+            "545": 46451261440.0,
+            "550": 46451261440.0,
+            "555": 46451261440.0,
+            "560": 46451261440.0,
+            "565": 46451261440.0,
+            "570": 46451261440.0,
+            "575": 46451261440.0,
+            "580": 46451261440.0,
+            "585": 46451261440.0,
+            "590": 46451261440.0,
+            "595": 46451261440.0,
+            "600": 46451261440.0,
+            "605": 46451261440.0,
+            "610": 46451261440.0,
+            "615": 46451261440.0,
+            "620": 46451261440.0,
+            "625": 46451261440.0,
+            "630": 46451261440.0,
+            "635": 46451261440.0,
+            "640": 46451261440.0,
+            "645": 46451261440.0,
+            "650": 46451261440.0,
+            "655": 46451261440.0,
+            "660": 46451261440.0,
+            "665": 46451261440.0,
+            "670": 46451261440.0,
+            "675": 46451261440.0,
+            "680": 46451261440.0,
+            "685": 46451261440.0,
+            "690": 46451261440.0,
+            "695": 46451261440.0,
+            "700": 46451261440.0,
+            "705": 46451261440.0,
+            "710": 46451261440.0,
+            "715": 46451261440.0,
+            "720": 46451261440.0,
+            "725": 46451261440.0,
+            "730": 46451261440.0,
+            "735": 46451261440.0,
+            "740": 46451261440.0,
+            "745": 46451261440.0,
+            "750": 46451261440.0,
+            "755": 46451261440.0,
+            "760": 46451261440.0,
+            "765": 46451261440.0,
+            "770": 46451261440.0,
+            "775": 46451261440.0,
+            "780": 46451261440.0,
+            "785": 46451261440.0,
+            "790": 46451261440.0,
+            "795": 46451261440.0,
+            "800": 46451261440.0,
+            "805": 46451261440.0,
+            "810": 46451261440.0,
+            "815": 46451261440.0,
+            "820": 46451261440.0,
+            "825": 46451261440.0,
+            "830": 46451261440.0,
+            "835": 46451261440.0,
+            "840": 46451261440.0,
+            "845": 46451261440.0,
+            "850": 46451261440.0,
+            "855": 46451261440.0,
+            "860": 46451261440.0,
+            "865": 46451261440.0,
+            "870": 46451261440.0,
+            "875": 46451261440.0,
+            "880": 46451261440.0,
+            "885": 46451261440.0,
+            "890": 46451261440.0,
+            "895": 46451261440.0,
+            "900": 46451261440.0,
+            "905": 46451261440.0,
+            "910": 46451261440.0,
+            "915": 46451261440.0,
+            "920": 46451261440.0,
+            "925": 46451261440.0,
+            "930": 46451261440.0,
+            "935": 46451261440.0,
+            "940": 46451261440.0,
+            "945": 46451261440.0,
+            "950": 46451261440.0,
+            "955": 46451261440.0,
+            "960": 45564735488.0,
+            "965": 45952081920.0,
+            "970": 45952081920.0,
+            "975": 46005657600.0,
+            "980": 46005657600.0,
+            "985": 46005657600.0,
+            "990": 46005657600.0,
+            "995": 46169923584.0,
+            "1000": 46169923584.0,
+            "1005": 46169923584.0,
+            "1010": 46169923584.0,
+            "1015": 46169923584.0,
+            "1020": 46169923584.0,
+            "1025": 46169923584.0,
+            "1030": 46169923584.0,
+            "1035": 46169923584.0,
+            "1040": 46169923584.0,
+            "1045": 46169923584.0,
+            "1050": 46169923584.0,
+            "1055": 46169923584.0,
+            "1060": 46169923584.0,
+            "1065": 46169923584.0,
+            "1070": 46169923584.0,
+            "1075": 46169923584.0,
+            "1080": 46169923584.0,
+            "1085": 46169923584.0,
+            "1090": 46169923584.0,
+            "1095": 46169923584.0,
+            "1100": 46169923584.0,
+            "1105": 46169923584.0,
+            "1110": 46169923584.0,
+            "1115": 46169923584.0,
+            "1120": 46169923584.0,
+            "1125": 46169923584.0,
+            "1130": 46169923584.0,
+            "1135": 46169923584.0,
+            "1140": 46169923584.0,
+            "1145": 46169923584.0,
+            "1150": 46169923584.0,
+            "1155": 46169923584.0,
+            "1160": 46169923584.0,
+            "1165": 46169923584.0,
+            "1170": 46169923584.0,
+            "1175": 46169923584.0,
+            "1180": 46192005120.0,
+            "1185": 46192005120.0,
+            "1190": 46192005120.0,
+            "1195": 46192005120.0,
+            "1200": 46192005120.0,
+            "1205": 46192005120.0,
+            "1210": 46192005120.0,
+            "1215": 46192005120.0,
+            "1220": 46192005120.0,
+            "1225": 46192005120.0,
+            "1230": 46192005120.0,
+            "1235": 46192005120.0,
+            "1240": 46192005120.0,
+            "1245": 46192005120.0,
+            "1250": 46192005120.0,
+            "1255": 46192005120.0,
+            "1260": 46192005120.0,
+            "1265": 46192005120.0,
+            "1270": 46192005120.0,
+            "1275": 46192005120.0,
+            "1280": 46192005120.0,
+            "1285": 46192005120.0,
+            "1290": 46192005120.0,
+            "1295": 46192005120.0,
+            "1300": 46192005120.0,
+            "1305": 46192005120.0,
+            "1310": 46192005120.0,
+            "1315": 46192005120.0,
+            "1320": 46192005120.0,
+            "1325": 46192005120.0,
+            "1330": 46192005120.0,
+            "1335": 46192005120.0,
+            "1340": 46192005120.0,
+            "1345": 46192005120.0,
+            "1350": 46192005120.0,
+            "1355": 46192005120.0,
+            "1360": 46192005120.0,
+            "1365": 46192005120.0,
+            "1370": 46192005120.0,
+            "1375": 46192005120.0,
+            "1380": 46192005120.0,
+            "1385": 46192005120.0,
+            "1390": 46192005120.0,
+            "1395": 46192005120.0,
+            "1400": 46192005120.0,
+            "1405": 46192005120.0,
+            "1410": 46192005120.0,
+            "1415": 46192005120.0,
+            "1420": 46192005120.0,
+            "1425": 46192005120.0,
+            "1430": 46192005120.0,
+            "1435": 46192005120.0,
+            "1440": 46192005120.0,
+            "1445": 46192005120.0,
+            "1450": 46192005120.0,
+            "1455": 46192005120.0,
+            "1460": 46192005120.0,
+            "1465": 46192005120.0,
+            "1470": 46192005120.0,
+            "1475": 46192005120.0,
+            "1480": 46192005120.0,
+            "1485": 46192005120.0,
+            "1490": 46192005120.0,
+            "1495": 46192005120.0,
+            "1500": 46192005120.0,
+            "1505": 46192005120.0,
+            "1510": 46192005120.0,
+            "1515": 46192005120.0,
+            "1520": 46192005120.0,
+            "1525": 46192005120.0,
+            "1530": 46192005120.0,
+            "1535": 46192005120.0,
+            "1540": 46192005120.0,
+            "1545": 46192005120.0,
+            "1550": 46260322304.0,
+            "1555": 46260322304.0,
+            "1560": 46260322304.0,
+            "1565": 46260322304.0,
+            "1570": 46260322304.0,
+            "1575": 46260322304.0,
+            "1580": 46260322304.0,
+            "1585": 46260322304.0,
+            "1590": 46260322304.0,
+            "1595": 46260322304.0,
+            "1600": 46260322304.0,
+            "1605": 46260322304.0,
+            "1610": 46260322304.0,
+            "1615": 46260322304.0,
+            "1620": 46260322304.0,
+            "1625": 46260322304.0,
+            "1630": 46260322304.0,
+            "1635": 46260322304.0,
+            "1640": 46260322304.0,
+            "1645": 46260322304.0,
+            "1650": 46260322304.0,
+            "1655": 46260322304.0,
+            "1660": 46260322304.0,
+            "1665": 46260322304.0,
+            "1670": 46260322304.0,
+            "1675": 46260322304.0,
+            "1680": 46260322304.0,
+            "1685": 46260322304.0,
+            "1690": 46260322304.0,
+            "1695": 46260322304.0,
+            "1700": 46260322304.0,
+            "1705": 46260322304.0,
+            "1710": 46260322304.0,
+            "1715": 46260322304.0,
+            "1720": 46260322304.0,
+            "1725": 46260322304.0,
+            "1730": 46260322304.0,
+            "1735": 46260322304.0,
+            "1740": 46260322304.0,
+            "1745": 46260322304.0,
+            "1750": 46260322304.0,
+            "1755": 46260322304.0,
+            "1760": 46260322304.0,
+            "1765": 46260322304.0,
+            "1770": 46260322304.0,
+            "1775": 46260322304.0,
+            "1780": 46260322304.0,
+            "1785": 46260322304.0,
+            "1790": 46260322304.0,
+            "1795": 46260322304.0,
+            "1800": 46260322304.0,
+            "1805": 46260322304.0,
+            "1810": 46260322304.0,
+            "1815": 46260322304.0,
+            "1820": 46260322304.0,
+            "1825": 46260322304.0,
+            "1830": 46260322304.0,
+            "1835": 46260322304.0,
+            "1840": 46260322304.0,
+            "1845": 46260322304.0,
+            "1850": 46260322304.0,
+            "1855": 46260322304.0,
+            "1860": 46260322304.0,
+            "1865": 46260322304.0,
+            "1870": 46260322304.0,
+            "1875": 46260322304.0,
+            "1880": 46260322304.0,
+            "1885": 46260322304.0,
+            "1890": 46260322304.0,
+            "1895": 46260322304.0,
+            "1900": 46260322304.0,
+            "1905": 46260322304.0,
+            "1910": 46260322304.0,
+            "1915": 46260322304.0,
+            "1920": 46260322304.0,
+            "1925": 46260322304.0,
+            "1930": 46260322304.0,
+            "1935": 46260322304.0,
+            "1940": 46260322304.0,
+            "1945": 46260322304.0,
+            "1950": 46260322304.0,
+            "1955": 46260322304.0,
+            "1960": 46260322304.0,
+            "1965": 46260322304.0,
+            "1970": 46260322304.0,
+            "1975": 46261714944.0,
+            "1980": 46261714944.0,
+            "1985": 46261714944.0,
+            "1990": 46261714944.0,
+            "1995": 46261714944.0,
+            "2000": 46261714944.0,
+            "2005": 46261714944.0,
+            "2010": 46261714944.0,
+            "2015": 46261714944.0,
+            "2020": 46261714944.0,
+            "2025": 46261714944.0,
+            "2030": 46261714944.0,
+            "2035": 46261714944.0,
+            "2040": 46261714944.0,
+            "2045": 46261714944.0,
+            "2050": 46261714944.0,
+            "2055": 46261714944.0,
+            "2060": 46261714944.0,
+            "2065": 46261714944.0,
+            "2070": 46261714944.0,
+            "2075": 46261714944.0,
+            "2080": 46261714944.0,
+            "2085": 46261714944.0,
+            "2090": 46261714944.0,
+            "2095": 46261714944.0,
+            "2100": 46261714944.0,
+            "2105": 46261714944.0,
+            "2110": 46261714944.0,
+            "2115": 46261714944.0,
+            "2120": 46261714944.0,
+            "2125": 46261714944.0,
+            "2130": 46261714944.0,
+            "2135": 46261714944.0,
+            "2140": 46261714944.0,
+            "2145": 46261714944.0,
+            "2150": 46261714944.0,
+            "2155": 46261714944.0,
+            "2160": 46261714944.0,
+            "2165": 46261714944.0,
+            "2170": 46261714944.0,
+            "2175": 46261714944.0,
+            "2180": 46261714944.0,
+            "2185": 46261714944.0,
+            "2190": 46261714944.0,
+            "2195": 46261714944.0,
+            "2200": 46261714944.0,
+            "2205": 46261714944.0,
+            "2210": 46261714944.0,
+            "2215": 46261714944.0,
+            "2220": 46261714944.0,
+            "2225": 46261714944.0,
+            "2230": 46261714944.0,
+            "2235": 46261714944.0,
+            "2240": 46261714944.0,
+            "2245": 46261714944.0,
+            "2250": 46261714944.0,
+            "2255": 46261714944.0,
+            "2260": 46261714944.0,
+            "2265": 46261714944.0,
+            "2270": 46261714944.0,
+            "2275": 46261714944.0,
+            "2280": 46261714944.0,
+            "2285": 46261714944.0,
+            "2290": 46261714944.0,
+            "2295": 46261714944.0,
+            "2300": 46261714944.0,
+            "2305": 46261714944.0,
+            "2310": 46261714944.0,
+            "2315": 46261714944.0,
+            "2320": 46261714944.0,
+            "2325": 46261714944.0,
+            "2330": 46261714944.0,
+            "2335": 46261714944.0,
+            "2340": 46261714944.0,
+            "2345": 46261714944.0,
+            "2350": 46261714944.0,
+            "2355": 46261714944.0,
+            "2360": 46261714944.0,
+            "2365": 46261714944.0,
+            "2370": 46261714944.0,
+            "2375": 46261714944.0,
+            "2380": 46261714944.0,
+            "2385": 46261714944.0,
+            "2390": 46261714944.0,
+            "2395": 46261714944.0,
+            "2400": 46261714944.0,
+            "2405": 46261714944.0,
+            "2410": 46261714944.0,
+            "2415": 46261714944.0,
+            "2420": 46261714944.0,
+            "2425": 46261714944.0,
+            "2430": 46261714944.0,
+            "2435": 46261714944.0,
+            "2440": 46261714944.0,
+            "2445": 46261714944.0,
+            "2450": 46261714944.0,
+            "2455": 46261714944.0,
+            "2460": 46261714944.0,
+            "2465": 46261714944.0,
+            "2470": 46261714944.0,
+            "2475": 46261714944.0,
+            "2480": 46261714944.0,
+            "2485": 46261714944.0,
+            "2490": 46261714944.0,
+            "2495": 46261714944.0,
+            "2500": 46261714944.0,
+            "2505": 46261714944.0,
+            "2510": 46261714944.0,
+            "2515": 46261714944.0,
+            "2520": 46261714944.0,
+            "2525": 46261714944.0,
+            "2530": 46261714944.0,
+            "2535": 46261714944.0,
+            "2540": 46261714944.0,
+            "2545": 46261714944.0,
+            "2550": 46261714944.0,
+            "2555": 46261714944.0,
+            "2560": 46261714944.0,
+            "2565": 46261714944.0,
+            "2570": 46261714944.0,
+            "2575": 46261714944.0,
+            "2580": 46261714944.0,
+            "2585": 46261714944.0,
+            "2590": 46261714944.0,
+            "2595": 46261714944.0,
+            "2600": 46261714944.0,
+            "2605": 46261714944.0,
+            "2610": 46261714944.0,
+            "2615": 46261714944.0,
+            "2620": 46261714944.0,
+            "2625": 46261714944.0,
+            "2630": 46261714944.0,
+            "2635": 46261714944.0,
+            "2640": 46261714944.0,
+            "2645": 46261714944.0,
+            "2650": 46261714944.0,
+            "2655": 46261714944.0,
+            "2660": 46261714944.0,
+            "2665": 46261714944.0,
+            "2670": 46261714944.0,
+            "2675": 46261714944.0,
+            "2680": 46261714944.0,
+            "2685": 46261714944.0,
+            "2690": 46261714944.0,
+            "2695": 46261714944.0,
+            "2700": 46261714944.0,
+            "2705": 46261714944.0,
+            "2710": 46261714944.0,
+            "2715": 46261714944.0,
+            "2720": 46261714944.0,
+            "2725": 46261714944.0,
+            "2730": 46261714944.0,
+            "2735": 46261714944.0,
+            "2740": 46261714944.0,
+            "2745": 46261714944.0,
+            "2750": 46261714944.0,
+            "2755": 46261714944.0,
+            "2760": 46261714944.0,
+            "2765": 46261714944.0,
+            "2770": 46261714944.0,
+            "2775": 46261714944.0,
+            "2780": 46261714944.0,
+            "2785": 46261714944.0,
+            "2790": 46261714944.0,
+            "2795": 46261714944.0,
+            "2800": 46261714944.0,
+            "2805": 46261714944.0,
+            "2810": 46261714944.0,
+            "2815": 46261714944.0,
+            "2820": 46261714944.0,
+            "2825": 46261714944.0,
+            "2830": 46261714944.0,
+            "2835": 46261714944.0,
+            "2840": 46261714944.0,
+            "2845": 46261714944.0,
+            "2850": 46261714944.0,
+            "2855": 46261714944.0,
+            "2860": 46261714944.0,
+            "2865": 46261714944.0,
+            "2870": 46261714944.0,
+            "2875": 46261714944.0,
+            "2880": 46261714944.0,
+            "2885": 46261714944.0,
+            "2890": 46261714944.0,
+            "2895": 46261714944.0,
+            "2900": 46261714944.0,
+            "2905": 46261714944.0,
+            "2910": 46261714944.0,
+            "2915": 46261714944.0,
+            "2920": 46261714944.0,
+            "2925": 46261714944.0,
+            "2930": 46261714944.0,
+            "2935": 46261714944.0,
+            "2940": 46261714944.0,
+            "2945": 46261714944.0,
+            "2950": 46261714944.0,
+            "2955": 46261714944.0,
+            "2960": 46261714944.0,
+            "2965": 46261714944.0,
+            "2970": 46261714944.0,
+            "2975": 46261714944.0,
+            "2980": 46261714944.0,
+            "2985": 45706711040.0,
+            "2990": 45883699200.0,
+            "2995": 46072287232.0,
+            "3000": 46072287232.0,
+            "3005": 46072287232.0,
+            "3010": 46072287232.0,
+            "3015": 46072287232.0,
+            "3020": 46072287232.0,
+            "3025": 46072287232.0,
+            "3030": 46072287232.0,
+            "3035": 46072287232.0,
+            "3040": 46072287232.0,
+            "3045": 46072287232.0,
+            "3050": 46072287232.0,
+            "3055": 46072287232.0,
+            "3060": 46072287232.0,
+            "3065": 46072287232.0,
+            "3070": 46072287232.0,
+            "3075": 46072287232.0,
+            "3080": 46072287232.0,
+            "3085": 46072287232.0,
+            "3090": 46072287232.0,
+            "3095": 46072287232.0,
+            "3100": 46072287232.0,
+            "3105": 46072287232.0,
+            "3110": 46072287232.0,
+            "3115": 46072287232.0,
+            "3120": 46072287232.0,
+            "3125": 46072287232.0,
+            "3130": 46072287232.0,
+            "3135": 46072287232.0,
+            "3140": 46072287232.0,
+            "3145": 46072287232.0,
+            "3150": 46072287232.0,
+            "3155": 46072287232.0,
+            "3160": 46072287232.0,
+            "3165": 46072287232.0,
+            "3170": 46072287232.0,
+            "3175": 46072287232.0,
+            "3180": 46072287232.0,
+            "3185": 46072287232.0,
+            "3190": 46072287232.0,
+            "3195": 46072287232.0,
+            "3200": 46072287232.0,
+            "3205": 46072287232.0,
+            "3210": 46072287232.0,
+            "3215": 46072287232.0,
+            "3220": 46072287232.0,
+            "3225": 46072287232.0,
+            "3230": 46072287232.0,
+            "3235": 46072287232.0,
+            "3240": 46072287232.0,
+            "3245": 46072287232.0,
+            "3250": 46072287232.0,
+            "3255": 46072287232.0,
+            "3260": 46072287232.0,
+            "3265": 46072287232.0,
+            "3270": 46072287232.0,
+            "3275": 46072287232.0,
+            "3280": 46072287232.0,
+            "3285": 46072287232.0,
+            "3290": 46072287232.0,
+            "3295": 46072287232.0,
+            "3300": 46072287232.0,
+            "3305": 46072287232.0,
+            "3310": 46072287232.0,
+            "3315": 46072287232.0,
+            "3320": 46072287232.0,
+            "3325": 46072287232.0,
+            "3330": 46072287232.0,
+            "3335": 46072287232.0,
+            "3340": 46072287232.0,
+            "3345": 46072287232.0,
+            "3350": 46072287232.0,
+            "3355": 46072287232.0,
+            "3360": 46072287232.0,
+            "3365": 46072287232.0,
+            "3370": 46072287232.0,
+            "3375": 46072287232.0,
+            "3380": 46072287232.0,
+            "3385": 46072287232.0,
+            "3390": 46072287232.0,
+            "3395": 46072287232.0,
+            "3400": 46072287232.0,
+            "3405": 46072287232.0,
+            "3410": 46072287232.0,
+            "3415": 46072287232.0,
+            "3420": 46072287232.0,
+            "3425": 46072672256.0,
+            "3430": 46072672256.0,
+            "3435": 46072672256.0,
+            "3440": 46072672256.0,
+            "3445": 46072672256.0,
+            "3450": 46072672256.0,
+            "3455": 46072672256.0,
+            "3460": 46072672256.0,
+            "3465": 46072672256.0,
+            "3470": 46072672256.0,
+            "3475": 46072672256.0,
+            "3480": 46072672256.0,
+            "3485": 46095564800.0,
+            "3490": 46095564800.0,
+            "3495": 46095564800.0,
+            "3500": 46095564800.0,
+            "3505": 46095564800.0,
+            "3510": 46095564800.0,
+            "3515": 46095564800.0,
+            "3520": 46095564800.0,
+            "3525": 46095564800.0,
+            "3530": 46095564800.0,
+            "3535": 46095564800.0,
+            "3540": 46095564800.0,
+            "3545": 46095564800.0,
+            "3550": 46191697920.0,
+            "3555": 46191697920.0,
+            "3560": 46191697920.0,
+            "3565": 46191697920.0,
+            "3570": 46191697920.0,
+            "3575": 46191697920.0,
+            "3580": 46191697920.0,
+            "3585": 46191697920.0,
+            "3590": 46191697920.0,
+            "3595": 46191697920.0,
+            "3600": 46191697920.0,
+            "3605": 46191697920.0,
+            "3610": 46191697920.0,
+            "3615": 46191697920.0,
+            "3620": 46191697920.0,
+            "3625": 46191697920.0,
+            "3630": 46191697920.0,
+            "3635": 46191697920.0,
+            "3640": 46191697920.0,
+            "3645": 46191697920.0,
+            "3650": 46191697920.0,
+            "3655": 46191697920.0,
+            "3660": 46191697920.0,
+            "3665": 46191697920.0,
+            "3670": 46191697920.0,
+            "3675": 46191697920.0,
+            "3680": 46191697920.0,
+            "3685": 46191697920.0,
+            "3690": 46191697920.0,
+            "3695": 46191697920.0,
+            "3700": 46191697920.0,
+            "3705": 46191697920.0,
+            "3710": 46191697920.0,
+            "3715": 46191697920.0,
+            "3720": 46191697920.0,
+            "3725": 46191697920.0,
+            "3730": 46191697920.0,
+            "3735": 46191697920.0,
+            "3740": 46191697920.0,
+            "3745": 46191697920.0,
+            "3750": 46191697920.0,
+            "3755": 46191697920.0,
+            "3760": 46191697920.0,
+            "3765": 46191697920.0,
+            "3770": 46191697920.0,
+            "3775": 46191697920.0,
+            "3780": 46191697920.0,
+            "3785": 46191697920.0,
+            "3790": 46191697920.0,
+            "3795": 46191697920.0,
+            "3800": 46191697920.0,
+            "3805": 46191697920.0,
+            "3810": 46191697920.0,
+            "3815": 46191697920.0,
+            "3820": 46191697920.0,
+            "3825": 46191697920.0,
+            "3830": 46191697920.0,
+            "3835": 46191697920.0,
+            "3840": 46191697920.0,
+            "3845": 46191697920.0,
+            "3850": 46191697920.0,
+            "3855": 46191697920.0,
+            "3860": 46191697920.0,
+            "3865": 46191697920.0,
+            "3870": 46191697920.0,
+            "3875": 46191697920.0,
+            "3880": 46191697920.0,
+            "3885": 46191697920.0,
+            "3890": 46191697920.0,
+            "3895": 46191697920.0,
+            "3900": 46191697920.0,
+            "3905": 46191697920.0,
+            "3910": 46191697920.0,
+            "3915": 46191697920.0,
+            "3920": 46191697920.0,
+            "3925": 46191697920.0,
+            "3930": 46191697920.0,
+            "3935": 46191697920.0,
+            "3940": 46191697920.0,
+            "3945": 46191697920.0,
+            "3950": 46191697920.0,
+            "3955": 46191697920.0,
+            "3960": 46191697920.0,
+            "3965": 46191697920.0,
+            "3970": 46191697920.0,
+            "3975": 46191697920.0,
+            "3980": 46191697920.0,
+            "3985": 46191697920.0,
+            "3990": 46191697920.0,
+            "3995": 46191697920.0,
+            "4000": 45840449536.0,
+            "4005": 45869191168.0,
+            "4010": 45897973760.0,
+            "4015": 45897973760.0,
+            "4020": 45940301824.0,
+            "4025": 45940301824.0,
+            "4030": 45940301824.0,
+            "4035": 45940301824.0,
+            "4040": 45940301824.0,
+            "4045": 45940301824.0,
+            "4050": 45940301824.0,
+            "4055": 45940301824.0,
+            "4060": 45940301824.0,
+            "4065": 45940301824.0,
+            "4070": 45940301824.0,
+            "4075": 45940301824.0,
+            "4080": 45940301824.0,
+            "4085": 46009651200.0,
+            "4090": 46009651200.0,
+            "4095": 46009651200.0,
+            "4100": 46009651200.0,
+            "4105": 46009651200.0,
+            "4110": 46009651200.0,
+            "4115": 46009651200.0,
+            "4120": 46009651200.0,
+            "4125": 46009651200.0,
+            "4130": 46009651200.0,
+            "4135": 46009651200.0,
+            "4140": 46009651200.0,
+            "4145": 46009651200.0,
+            "4150": 46009651200.0,
+            "4155": 46009651200.0,
+            "4160": 46009651200.0,
+            "4165": 46009651200.0,
+            "4170": 46009651200.0,
+            "4175": 46009651200.0,
+            "4180": 46009651200.0,
+            "4185": 46009651200.0,
+            "4190": 46009651200.0,
+            "4195": 46009651200.0,
+            "4200": 46009651200.0,
+            "4205": 46009651200.0,
+            "4210": 46009651200.0,
+            "4215": 46009651200.0,
+            "4220": 46009651200.0,
+            "4225": 46064635904.0,
+            "4230": 46064635904.0,
+            "4235": 46064635904.0,
+            "4240": 46064635904.0,
+            "4245": 46064635904.0,
+            "4250": 46064635904.0,
+            "4255": 46064635904.0,
+            "4260": 46064635904.0,
+            "4265": 46064635904.0,
+            "4270": 46064635904.0,
+            "4275": 46064635904.0,
+            "4280": 46064635904.0,
+            "4285": 46064635904.0,
+            "4290": 46064635904.0,
+            "4295": 46064635904.0,
+            "4300": 46064635904.0,
+            "4305": 46064635904.0,
+            "4310": 46064635904.0,
+            "4315": 46064635904.0,
+            "4320": 46064635904.0,
+            "4325": 46064635904.0,
+            "4330": 46064635904.0,
+            "4335": 46064635904.0,
+            "4340": 46064635904.0,
+            "4345": 46064635904.0,
+            "4350": 46064635904.0,
+            "4355": 46064635904.0,
+            "4360": 46064635904.0,
+            "4365": 46064635904.0,
+            "4370": 46064635904.0,
+            "4375": 46064635904.0,
+            "4380": 46064635904.0,
+            "4385": 46064635904.0,
+            "4390": 46064635904.0,
+            "4395": 46064635904.0,
+            "4400": 46064635904.0,
+            "4405": 46064635904.0,
+            "4410": 46064635904.0,
+            "4415": 46064635904.0,
+            "4420": 46064635904.0,
+            "4425": 46064635904.0,
+            "4430": 46064635904.0,
+            "4435": 46064635904.0,
+            "4440": 46064635904.0,
+            "4445": 46064635904.0,
+            "4450": 46064635904.0,
+            "4455": 46064635904.0,
+            "4460": 46080573440.0,
+            "4465": 46080573440.0,
+            "4470": 46080573440.0,
+            "4475": 46080573440.0,
+            "4480": 46080573440.0,
+            "4485": 46080573440.0,
+            "4490": 46080573440.0,
+            "4495": 46080573440.0,
+            "4500": 46080573440.0,
+            "4505": 46080573440.0,
+            "4510": 46080573440.0,
+            "4515": 46080573440.0,
+            "4520": 46080573440.0,
+            "4525": 46080573440.0,
+            "4530": 46080573440.0,
+            "4535": 46080573440.0,
+            "4540": 46080573440.0,
+            "4545": 46080573440.0,
+            "4550": 46080573440.0,
+            "4555": 46080573440.0,
+            "4560": 46080573440.0,
+            "4565": 46080573440.0,
+            "4570": 46080573440.0,
+            "4575": 46080573440.0,
+            "4580": 46080573440.0,
+            "4585": 46080573440.0,
+            "4590": 46080573440.0,
+            "4595": 46080573440.0,
+            "4600": 46080573440.0,
+            "4605": 46080573440.0,
+            "4610": 46080573440.0,
+            "4615": 46343888896.0,
+            "4620": 46343888896.0,
+            "4625": 46343888896.0,
+            "4630": 46343888896.0,
+            "4635": 46343888896.0,
+            "4640": 46343888896.0,
+            "4645": 46343888896.0,
+            "4650": 46343888896.0,
+            "4655": 46343888896.0,
+            "4660": 46343888896.0,
+            "4665": 46343888896.0,
+            "4670": 46343888896.0,
+            "4675": 46343888896.0,
+            "4680": 46343888896.0,
+            "4685": 46343888896.0,
+            "4690": 46343888896.0,
+            "4695": 46343888896.0,
+            "4700": 46343888896.0,
+            "4705": 46343888896.0,
+            "4710": 46343888896.0,
+            "4715": 46343888896.0,
+            "4720": 46343888896.0,
+            "4725": 46343888896.0,
+            "4730": 46343888896.0,
+            "4735": 46343888896.0,
+            "4740": 46343888896.0,
+            "4745": 46343888896.0,
+            "4750": 46343888896.0,
+            "4755": 46343888896.0,
+            "4760": 46343888896.0,
+            "4765": 46343888896.0,
+            "4770": 46343888896.0,
+            "4775": 46343888896.0,
+            "4780": 46343888896.0,
+            "4785": 46343888896.0,
+            "4790": 46343888896.0,
+            "4795": 46343888896.0,
+            "4800": 46343888896.0,
+            "4805": 46343888896.0,
+            "4810": 46343888896.0,
+            "4815": 46343888896.0,
+            "4820": 46343888896.0,
+            "4825": 46343888896.0,
+            "4830": 46343888896.0,
+            "4835": 46343888896.0,
+            "4840": 46343888896.0,
+            "4845": 46343888896.0,
+            "4850": 46343888896.0,
+            "4855": 46343888896.0,
+            "4860": 46343888896.0,
+            "4865": 46343888896.0,
+            "4870": 46343888896.0,
+            "4875": 46343888896.0,
+            "4880": 46343888896.0,
+            "4885": 46343888896.0,
+            "4890": 46343888896.0,
+            "4895": 46343888896.0,
+            "4900": 46343888896.0,
+            "4905": 46343888896.0,
+            "4910": 46343888896.0,
+            "4915": 46343888896.0,
+            "4920": 46343888896.0,
+            "4925": 46343888896.0,
+            "4930": 46343888896.0,
+            "4935": 46343888896.0,
+            "4940": 46343888896.0,
+            "4945": 46343888896.0,
+            "4950": 46343888896.0,
+            "4955": 46343888896.0,
+            "4960": 46343888896.0,
+            "4965": 46343888896.0,
+            "4970": 46343888896.0,
+            "4975": 46343888896.0,
+            "4980": 46343888896.0,
+            "4985": 46343888896.0,
+            "4990": 46343888896.0,
+            "4995": 46343888896.0,
+            "5000": 46343888896.0,
+            "5005": 46199529472.0,
+            "5010": 46199529472.0,
+            "5015": 45764182016.0,
+            "5020": 45878784000.0,
+            "5025": 45878784000.0,
+            "5030": 45878784000.0,
+            "5035": 45878784000.0,
+            "5040": 45992685568.0,
+            "5045": 45992685568.0,
+            "5050": 45992685568.0,
+            "5055": 45992685568.0,
+            "5060": 45992685568.0,
+            "5065": 45992685568.0,
+            "5070": 45992685568.0,
+            "5075": 45992685568.0,
+            "5080": 45992685568.0,
+            "5085": 45992685568.0,
+            "5090": 45992685568.0,
+            "5095": 46014451712.0,
+            "5100": 46014451712.0,
+            "5105": 46014451712.0,
+            "5110": 46014451712.0,
+            "5115": 46014451712.0,
+            "5120": 46014451712.0,
+            "5125": 46014451712.0,
+            "5130": 46014451712.0,
+            "5135": 46014451712.0,
+            "5140": 46014451712.0,
+            "5145": 46014451712.0,
+            "5150": 46014451712.0,
+            "5155": 46014451712.0,
+            "5160": 46014451712.0,
+            "5165": 46014451712.0,
+            "5170": 46014451712.0,
+            "5175": 46014451712.0,
+            "5180": 46014451712.0,
+            "5185": 46014451712.0,
+            "5190": 46014451712.0,
+            "5195": 46014451712.0,
+            "5200": 46139572224.0,
+            "5205": 46139572224.0,
+            "5210": 46139572224.0,
+            "5215": 46139572224.0,
+            "5220": 46168403968.0,
+            "5225": 46168403968.0,
+            "5230": 46168403968.0,
+            "5235": 46168403968.0,
+            "5240": 46168403968.0,
+            "5245": 46168403968.0,
+            "5250": 46168403968.0,
+            "5255": 46168403968.0,
+            "5260": 46168403968.0,
+            "5265": 46168403968.0,
+            "5270": 46168403968.0,
+            "5275": 46168403968.0,
+            "5280": 46168403968.0,
+            "5285": 46168403968.0,
+            "5290": 46168403968.0,
+            "5295": 46168403968.0,
+            "5300": 46168403968.0,
+            "5305": 46168403968.0,
+            "5310": 46168403968.0,
+            "5315": 46168403968.0,
+            "5320": 46168403968.0,
+            "5325": 46168403968.0,
+            "5330": 46168403968.0,
+            "5335": 46168403968.0,
+            "5340": 46168403968.0,
+            "5345": 46168403968.0,
+            "5350": 46168403968.0,
+            "5355": 46168403968.0,
+            "5360": 46168403968.0,
+            "5365": 46168403968.0,
+            "5370": 46168403968.0,
+            "5375": 46168403968.0,
+            "5380": 46168403968.0,
+            "5385": 46168403968.0,
+            "5390": 46168403968.0,
+            "5395": 46168403968.0,
+            "5400": 46168403968.0,
+            "5405": 46168403968.0,
+            "5410": 46168403968.0,
+            "5415": 46168403968.0,
+            "5420": 46168403968.0,
+            "5425": 46168403968.0,
+            "5430": 46168403968.0,
+            "5435": 46168403968.0,
+            "5440": 46168403968.0,
+            "5445": 46168403968.0,
+            "5450": 46168403968.0,
+            "5455": 46168403968.0,
+            "5460": 46168403968.0,
+            "5465": 46168403968.0,
+            "5470": 46168403968.0,
+            "5475": 46168403968.0,
+            "5480": 46168403968.0,
+            "5485": 46168403968.0,
+            "5490": 46168403968.0,
+            "5495": 46168403968.0,
+            "5500": 46168403968.0,
+            "5505": 46168403968.0,
+            "5510": 46168403968.0,
+            "5515": 46168403968.0,
+            "5520": 46168403968.0,
+            "5525": 46168403968.0,
+            "5530": 46168403968.0,
+            "5535": 46168403968.0,
+            "5540": 46168403968.0,
+            "5545": 46168403968.0,
+            "5550": 46168403968.0,
+            "5555": 46168403968.0,
+            "5560": 46168403968.0,
+            "5565": 46168403968.0,
+            "5570": 46168403968.0,
+            "5575": 46168403968.0,
+            "5580": 46168403968.0,
+            "5585": 46168403968.0,
+            "5590": 46168403968.0,
+            "5595": 46168403968.0,
+            "5600": 46168403968.0,
+            "5605": 46226247680.0,
+            "5610": 46226247680.0,
+            "5615": 46226247680.0,
+            "5620": 46226247680.0,
+            "5625": 46226247680.0,
+            "5630": 46226247680.0,
+            "5635": 46226247680.0,
+            "5640": 46226247680.0,
+            "5645": 46226247680.0,
+            "5650": 46226247680.0,
+            "5655": 46226247680.0,
+            "5660": 46226247680.0,
+            "5665": 46226247680.0,
+            "5670": 46226247680.0,
+            "5675": 46226247680.0,
+            "5680": 46226247680.0,
+            "5685": 46226247680.0,
+            "5690": 46226247680.0,
+            "5695": 46226247680.0,
+            "5700": 46226247680.0,
+            "5705": 46226247680.0,
+            "5710": 46226247680.0,
+            "5715": 46226247680.0,
+            "5720": 46226247680.0,
+            "5725": 46226247680.0,
+            "5730": 46226247680.0,
+            "5735": 46226247680.0,
+            "5740": 46226247680.0,
+            "5745": 46226247680.0,
+            "5750": 46226247680.0,
+            "5755": 46226247680.0,
+            "5760": 46226247680.0,
+            "5765": 46226247680.0,
+            "5770": 46226247680.0,
+            "5775": 46226247680.0,
+            "5780": 46226247680.0,
+            "5785": 46226247680.0,
+            "5790": 46226247680.0,
+            "5795": 46226247680.0,
+            "5800": 46226247680.0,
+            "5805": 46226247680.0,
+            "5810": 46226247680.0,
+            "5815": 46226247680.0,
+            "5820": 46226247680.0,
+            "5825": 46226247680.0,
+            "5830": 46226247680.0,
+            "5835": 46226247680.0,
+            "5840": 46226247680.0,
+            "5845": 46226247680.0,
+            "5850": 46226247680.0,
+            "5855": 46226247680.0,
+            "5860": 46226247680.0,
+            "5865": 46226247680.0,
+            "5870": 46226247680.0,
+            "5875": 46226247680.0,
+            "5880": 46226247680.0,
+            "5885": 46226247680.0,
+            "5890": 46226247680.0,
+            "5895": 46226247680.0,
+            "5900": 46226247680.0,
+            "5905": 46226247680.0,
+            "5910": 46226247680.0,
+            "5915": 46226247680.0,
+            "5920": 46226247680.0,
+            "5925": 46226247680.0,
+            "5930": 46226247680.0,
+            "5935": 46226247680.0,
+            "5940": 46226247680.0,
+            "5945": 46226247680.0,
+            "5950": 46226247680.0,
+            "5955": 46226247680.0,
+            "5960": 46226247680.0,
+            "5965": 46226247680.0,
+            "5970": 46226247680.0,
+            "5975": 46226247680.0,
+            "5980": 46226247680.0,
+            "5985": 46226247680.0,
+            "5990": 46226247680.0,
+            "5995": 46226247680.0,
+            "6000": 46226247680.0,
+            "6005": 46226247680.0,
+            "6010": 46226247680.0,
+            "6015": 46226247680.0,
+            "6020": 46226247680.0,
+            "6025": 46226247680.0,
+            "6030": 45912186880.0,
+            "6035": 45912186880.0,
+            "6040": 45995683840.0,
+            "6045": 45995683840.0,
+            "6050": 45995683840.0,
+            "6055": 45995683840.0,
+            "6060": 45995683840.0,
+            "6065": 45995683840.0,
+            "6070": 45995683840.0,
+            "6075": 46014836736.0,
+            "6080": 46014836736.0,
+            "6085": 46014836736.0,
+            "6090": 46014836736.0,
+            "6095": 46014836736.0,
+            "6100": 46014836736.0,
+            "6105": 46014836736.0,
+            "6110": 46014836736.0,
+            "6115": 46014836736.0,
+            "6120": 46014836736.0,
+            "6125": 46014836736.0,
+            "6130": 46014836736.0,
+            "6135": 46014836736.0,
+            "6140": 46014836736.0,
+            "6145": 46014836736.0,
+            "6150": 46014836736.0,
+            "6155": 46014836736.0,
+            "6160": 46014836736.0,
+            "6165": 46025334784.0,
+            "6170": 46025334784.0,
+            "6175": 46025334784.0,
+            "6180": 46025334784.0,
+            "6185": 46035255296.0,
+            "6190": 46035255296.0,
+            "6195": 46035255296.0,
+            "6200": 46035255296.0,
+            "6205": 46035255296.0,
+            "6210": 46035255296.0,
+            "6215": 46035255296.0,
+            "6220": 46035255296.0,
+            "6225": 46035255296.0,
+            "6230": 46035255296.0,
+            "6235": 46035255296.0,
+            "6240": 46035255296.0,
+            "6245": 46035255296.0,
+            "6250": 46035255296.0,
+            "6255": 46035255296.0,
+            "6260": 46035255296.0,
+            "6265": 46035255296.0,
+            "6270": 46035255296.0,
+            "6275": 46035255296.0,
+            "6280": 46035255296.0,
+            "6285": 46035255296.0,
+            "6290": 46035255296.0,
+            "6295": 46035255296.0,
+            "6300": 46035255296.0,
+            "6305": 46035255296.0,
+            "6310": 46035255296.0,
+            "6315": 46035255296.0,
+            "6320": 46035255296.0,
+            "6325": 46035255296.0,
+            "6330": 46035255296.0,
+            "6335": 46035255296.0,
+            "6340": 46035255296.0,
+            "6345": 46035255296.0,
+            "6350": 46035255296.0,
+            "6355": 46035255296.0,
+            "6360": 46035255296.0,
+            "6365": 46035255296.0,
+            "6370": 46035255296.0,
+            "6375": 46035255296.0,
+            "6380": 46035255296.0,
+            "6385": 46035255296.0,
+            "6390": 46035255296.0,
+            "6395": 46035255296.0,
+            "6400": 46035255296.0,
+            "6405": 46035255296.0,
+            "6410": 46035255296.0,
+            "6415": 46035255296.0,
+            "6420": 46035255296.0,
+            "6425": 46035255296.0,
+            "6430": 46035255296.0,
+            "6435": 46035255296.0,
+            "6440": 46035255296.0,
+            "6445": 46035255296.0,
+            "6450": 46035255296.0,
+            "6455": 46035255296.0,
+            "6460": 46035255296.0,
+            "6465": 46035255296.0,
+            "6470": 46035255296.0,
+            "6475": 46035255296.0,
+            "6480": 46035255296.0,
+            "6485": 46035255296.0,
+            "6490": 46035255296.0,
+            "6495": 46035255296.0,
+            "6500": 46035255296.0,
+            "6505": 46064041984.0,
+            "6510": 46064041984.0,
+            "6515": 46064041984.0,
+            "6520": 46064041984.0,
+            "6525": 46064041984.0,
+            "6530": 46064041984.0,
+            "6535": 46064041984.0,
+            "6540": 46064041984.0,
+            "6545": 46064041984.0,
+            "6550": 46064041984.0,
+            "6555": 46064041984.0,
+            "6560": 46064041984.0,
+            "6565": 46064041984.0,
+            "6570": 46064041984.0,
+            "6575": 46064041984.0,
+            "6580": 46064041984.0,
+            "6585": 46064041984.0,
+            "6590": 46064041984.0,
+            "6595": 46064041984.0,
+            "6600": 46064041984.0,
+            "6605": 46064041984.0,
+            "6610": 46064041984.0,
+            "6615": 46064041984.0,
+            "6620": 46064041984.0,
+            "6625": 46064041984.0,
+            "6630": 46064041984.0,
+            "6635": 46064041984.0,
+            "6640": 46064041984.0,
+            "6645": 46064041984.0,
+            "6650": 46064041984.0,
+            "6655": 46064041984.0,
+            "6660": 46064041984.0,
+            "6665": 46064041984.0,
+            "6670": 46064041984.0,
+            "6675": 46064041984.0,
+            "6680": 46064041984.0,
+            "6685": 46064041984.0,
+            "6690": 46064041984.0,
+            "6695": 46064041984.0,
+            "6700": 46064041984.0,
+            "6705": 46064041984.0,
+            "6710": 46064041984.0,
+            "6715": 46064041984.0,
+            "6720": 46064041984.0,
+            "6725": 46064041984.0,
+            "6730": 46064041984.0,
+            "6735": 46064041984.0,
+            "6740": 46064041984.0,
+            "6745": 46064041984.0,
+            "6750": 46064041984.0,
+            "6755": 46064041984.0,
+            "6760": 46064041984.0,
+            "6765": 46064041984.0,
+            "6770": 46064041984.0,
+            "6775": 46064041984.0,
+            "6780": 46064041984.0,
+            "6785": 46064041984.0,
+            "6790": 46064041984.0,
+            "6795": 46064041984.0,
+            "6800": 46064041984.0,
+            "6805": 46064041984.0,
+            "6810": 46064041984.0,
+            "6815": 46064041984.0,
+            "6820": 46064041984.0,
+            "6825": 46064041984.0,
+            "6830": 46064041984.0,
+            "6835": 46064041984.0,
+            "6840": 46064041984.0,
+            "6845": 46064041984.0,
+            "6850": 46064041984.0,
+            "6855": 46064041984.0,
+            "6860": 46064041984.0,
+            "6865": 46064041984.0,
+            "6870": 46064041984.0,
+            "6875": 46064041984.0,
+            "6880": 46064041984.0,
+            "6885": 46064041984.0,
+            "6890": 46064041984.0,
+            "6895": 46064041984.0,
+            "6900": 46064041984.0,
+            "6905": 46064041984.0,
+            "6910": 46064041984.0,
+            "6915": 46064041984.0,
+            "6920": 46064041984.0,
+            "6925": 46064041984.0,
+            "6930": 46064041984.0,
+            "6935": 46064041984.0,
+            "6940": 46064041984.0,
+            "6945": 46064041984.0,
+            "6950": 46064041984.0,
+            "6955": 46064041984.0,
+            "6960": 46064041984.0,
+            "6965": 46064041984.0,
+            "6970": 46064041984.0,
+            "6975": 46064041984.0,
+            "6980": 46064041984.0,
+            "6985": 46064041984.0,
+            "6990": 46064041984.0,
+            "6995": 46064041984.0,
+            "7000": 46064041984.0,
+            "7005": 46064041984.0,
+            "7010": 46064041984.0,
+            "7015": 46064041984.0,
+            "7020": 46064041984.0,
+            "7025": 46064041984.0,
+            "7030": 46108979200.0,
+            "7035": 46108979200.0,
+            "7040": 46108979200.0,
+            "7045": 46108979200.0,
+            "7050": 46065532928.0,
+            "7055": 46065532928.0,
+            "7060": 46065532928.0,
+            "7065": 46065532928.0,
+            "7070": 46065532928.0,
+            "7075": 46065532928.0,
+            "7080": 46065532928.0,
+            "7085": 46065532928.0,
+            "7090": 46065532928.0,
+            "7095": 46065532928.0,
+            "7100": 46065532928.0,
+            "7105": 46065532928.0,
+            "7110": 46065532928.0,
+            "7115": 46065532928.0,
+            "7120": 46065532928.0,
+            "7125": 46065532928.0,
+            "7130": 46065532928.0,
+            "7135": 46065532928.0,
+            "7140": 46065532928.0,
+            "7145": 46065532928.0,
+            "7150": 46065532928.0,
+            "7155": 46065532928.0,
+            "7160": 46065532928.0,
+            "7165": 46065532928.0,
+            "7170": 46065532928.0,
+            "7175": 46065532928.0,
+            "7180": 46065532928.0,
+            "7185": 46065532928.0,
+            "7190": 46065532928.0,
+            "7195": 46065532928.0,
+            "7200": 46065532928.0,
+            "7205": 46065532928.0,
+            "7210": 46065532928.0,
+            "7215": 46065532928.0,
+            "7220": 46065532928.0,
+            "7225": 46065532928.0,
+            "7230": 46065532928.0,
+            "7235": 46065532928.0,
+            "7240": 46065532928.0,
+            "7245": 46065532928.0,
+            "7250": 46065532928.0,
+            "7255": 46065532928.0,
+            "7260": 46065532928.0,
+            "7265": 46065532928.0,
+            "7270": 46065532928.0,
+            "7275": 46065532928.0,
+            "7280": 46065532928.0,
+            "7285": 46065532928.0,
+            "7290": 46065532928.0,
+            "7295": 46065532928.0,
+            "7300": 46065532928.0,
+            "7305": 46065532928.0,
+            "7310": 46065532928.0,
+            "7315": 46065532928.0,
+            "7320": 46065532928.0,
+            "7325": 46065532928.0,
+            "7330": 46065532928.0,
+            "7335": 46065532928.0,
+            "7340": 46065532928.0,
+            "7345": 46065532928.0,
+            "7350": 46065532928.0,
+            "7355": 46065532928.0,
+            "7360": 46065532928.0,
+            "7365": 46065532928.0,
+            "7370": 46065532928.0,
+            "7375": 46065532928.0,
+            "7380": 46065532928.0,
+            "7385": 46065532928.0,
+            "7390": 46065532928.0,
+            "7395": 46065532928.0,
+            "7400": 46065532928.0,
+            "7405": 46065532928.0,
+            "7410": 46065532928.0,
+            "7415": 46065532928.0,
+            "7420": 46065532928.0,
+            "7425": 46065532928.0,
+            "7430": 46065532928.0,
+            "7435": 46065532928.0,
+            "7440": 46065532928.0,
+            "7445": 46065532928.0,
+            "7450": 46065532928.0,
+            "7455": 46065532928.0,
+            "7460": 46065532928.0,
+            "7465": 46065532928.0,
+            "7470": 46065532928.0,
+            "7475": 46065532928.0,
+            "7480": 46065532928.0,
+            "7485": 46065532928.0,
+            "7490": 46065532928.0,
+            "7495": 46065532928.0,
+            "7500": 46065532928.0,
+            "7505": 46065532928.0,
+            "7510": 46065532928.0,
+            "7515": 46065532928.0,
+            "7520": 45618061312.0,
+            "7525": 45747933184.0,
+            "7530": 45825024000.0,
+            "7535": 45825024000.0,
+            "7540": 45825024000.0,
+            "7545": 45910597632.0,
+            "7550": 45910597632.0,
+            "7555": 45910597632.0,
+            "7560": 45910597632.0,
+            "7565": 45910597632.0,
+            "7570": 45910597632.0,
+            "7575": 45910597632.0,
+            "7580": 45910597632.0,
+            "7585": 45910597632.0,
+            "7590": 45910597632.0,
+            "7595": 45916950528.0,
+            "7600": 45924253696.0,
+            "7605": 45924253696.0,
+            "7610": 45924253696.0,
+            "7615": 45924253696.0,
+            "7620": 45924253696.0,
+            "7625": 45924253696.0,
+            "7630": 45924253696.0,
+            "7635": 45924253696.0,
+            "7640": 45924253696.0,
+            "7645": 45944950784.0,
+            "7650": 45944950784.0,
+            "7655": 45944950784.0,
+            "7660": 45944950784.0,
+            "7665": 45944950784.0,
+            "7670": 45944950784.0,
+            "7675": 45944950784.0,
+            "7680": 45944950784.0,
+            "7685": 45944950784.0,
+            "7690": 45944950784.0,
+            "7695": 45944950784.0,
+            "7700": 45944950784.0,
+            "7705": 45944950784.0,
+            "7710": 45944950784.0,
+            "7715": 45944950784.0,
+            "7720": 45944950784.0,
+            "7725": 45944950784.0,
+            "7730": 45944950784.0,
+            "7735": 45944950784.0,
+            "7740": 45944950784.0,
+            "7745": 45944950784.0,
+            "7750": 45944950784.0,
+            "7755": 45944950784.0,
+            "7760": 45944950784.0,
+            "7765": 45944950784.0,
+            "7770": 45944950784.0,
+            "7775": 45944950784.0,
+            "7780": 45944950784.0,
+            "7785": 45944950784.0,
+            "7790": 45944950784.0,
+            "7795": 45944950784.0,
+            "7800": 45944950784.0,
+            "7805": 45944950784.0,
+            "7810": 45944950784.0,
+            "7815": 45944950784.0,
+            "7820": 45944950784.0,
+            "7825": 45944950784.0,
+            "7830": 45944950784.0,
+            "7835": 45944950784.0,
+            "7840": 45973135360.0,
+            "7845": 45973135360.0,
+            "7850": 46089904128.0,
+            "7855": 46089904128.0,
+            "7860": 46089904128.0,
+            "7865": 46089904128.0,
+            "7870": 46089904128.0,
+            "7875": 46089904128.0,
+            "7880": 46089904128.0,
+            "7885": 46089904128.0,
+            "7890": 46089904128.0,
+            "7895": 46089904128.0,
+            "7900": 46089904128.0,
+            "7905": 46089904128.0,
+            "7910": 46089904128.0,
+            "7915": 46089904128.0,
+            "7920": 46089904128.0,
+            "7925": 46089904128.0,
+            "7930": 46089904128.0,
+            "7935": 46089904128.0,
+            "7940": 46089904128.0,
+            "7945": 46089904128.0,
+            "7950": 46089904128.0,
+            "7955": 46089904128.0,
+            "7960": 46089904128.0,
+            "7965": 46089904128.0,
+            "7970": 46089904128.0,
+            "7975": 46089904128.0,
+            "7980": 46089904128.0,
+            "7985": 46089904128.0,
+            "7990": 46089904128.0,
+            "7995": 46089904128.0,
+            "8000": 46089904128.0,
+            "8005": 46089904128.0,
+            "8010": 46089904128.0,
+            "8015": 46089904128.0,
+            "8020": 46089904128.0,
+            "8025": 46089904128.0,
+            "8030": 46089904128.0,
+            "8035": 46089904128.0,
+            "8040": 46089904128.0,
+            "8045": 46089904128.0,
+            "8050": 46089904128.0,
+            "8055": 46089904128.0,
+            "8060": 46089904128.0,
+            "8065": 46089904128.0,
+            "8070": 46089904128.0,
+            "8075": 46089904128.0,
+            "8080": 46089904128.0,
+            "8085": 46089904128.0,
+            "8090": 46089904128.0,
+            "8095": 46089904128.0,
+            "8100": 46089904128.0,
+            "8105": 46089904128.0,
+            "8110": 46089904128.0,
+            "8115": 46089904128.0,
+            "8120": 46089904128.0,
+            "8125": 46089904128.0,
+            "8130": 46089904128.0,
+            "8135": 46089904128.0,
+            "8140": 46089904128.0,
+            "8145": 46089904128.0,
+            "8150": 46089904128.0,
+            "8155": 46089904128.0,
+            "8160": 46089904128.0,
+            "8165": 46089904128.0,
+            "8170": 46089904128.0,
+            "8175": 46089904128.0,
+            "8180": 46089904128.0,
+            "8185": 46089904128.0,
+            "8190": 46089904128.0,
+            "8195": 46089904128.0,
+            "8200": 46089904128.0,
+            "8205": 46089904128.0,
+            "8210": 46089904128.0,
+            "8215": 46089904128.0,
+            "8220": 46089904128.0,
+            "8225": 46089904128.0,
+            "8230": 46089904128.0,
+            "8235": 46089904128.0,
+            "8240": 46089904128.0,
+            "8245": 46089904128.0,
+            "8250": 46089904128.0,
+            "8255": 46089904128.0,
+            "8260": 46089904128.0,
+            "8265": 46089904128.0,
+            "8270": 46089904128.0,
+            "8275": 46089904128.0,
+            "8280": 46089904128.0,
+            "8285": 46089904128.0,
+            "8290": 46089904128.0,
+            "8295": 46089904128.0,
+            "8300": 46089904128.0,
+            "8305": 46089904128.0,
+            "8310": 46089904128.0,
+            "8315": 46089904128.0,
+            "8320": 46089904128.0,
+            "8325": 46089904128.0,
+            "8330": 46089904128.0,
+            "8335": 46089904128.0,
+            "8340": 46089904128.0,
+            "8345": 46089904128.0,
+            "8350": 46089904128.0,
+            "8355": 46089904128.0,
+            "8360": 46089904128.0,
+            "8365": 46089904128.0,
+            "8370": 46089904128.0,
+            "8375": 46089904128.0,
+            "8380": 46089904128.0,
+            "8385": 46089904128.0,
+            "8390": 46089904128.0,
+            "8395": 46089904128.0,
+            "8400": 46089904128.0,
+            "8405": 46089904128.0,
+            "8410": 46089904128.0,
+            "8415": 46089904128.0,
+            "8420": 46089904128.0,
+            "8425": 46089904128.0,
+            "8430": 46089904128.0,
+            "8435": 46089904128.0,
+            "8440": 46089904128.0,
+            "8445": 46089904128.0,
+            "8450": 46089904128.0,
+            "8455": 46089904128.0,
+            "8460": 46089904128.0,
+            "8465": 46089904128.0,
+            "8470": 46089904128.0,
+            "8475": 46089904128.0,
+            "8480": 46089904128.0,
+            "8485": 46089904128.0,
+            "8490": 46089904128.0,
+            "8495": 46089904128.0,
+            "8500": 46089904128.0,
+            "8505": 46089904128.0,
+            "8510": 46089904128.0,
+            "8515": 46089904128.0,
+            "8520": 46089904128.0,
+            "8525": 46089904128.0,
+            "8530": 45938114560.0,
+            "8535": 45938114560.0,
+            "8540": 45938114560.0,
+            "8545": 45938114560.0,
+            "8550": 45938114560.0,
+            "8555": 45938114560.0,
+            "8560": 45938114560.0,
+            "8565": 45938114560.0,
+            "8570": 45938114560.0,
+            "8575": 45938114560.0,
+            "8580": 45938114560.0,
+            "8585": 45938114560.0,
+            "8590": 45950377984.0,
+            "8595": 45950377984.0,
+            "8600": 45950377984.0,
+            "8605": 45950377984.0,
+            "8610": 45950377984.0,
+            "8615": 45950377984.0,
+            "8620": 45950377984.0,
+            "8625": 45950377984.0,
+            "8630": 45950377984.0,
+            "8635": 45950377984.0,
+            "8640": 45950377984.0,
+            "8645": 45950377984.0,
+            "8650": 45950377984.0,
+            "8655": 45950377984.0,
+            "8660": 45950377984.0,
+            "8665": 45950377984.0,
+            "8670": 45955510272.0,
+            "8675": 45955510272.0,
+            "8680": 45955510272.0,
+            "8685": 45955510272.0,
+            "8690": 45991550976.0,
+            "8695": 45991550976.0,
+            "8700": 45991550976.0,
+            "8705": 45991550976.0,
+            "8710": 45991550976.0,
+            "8715": 45991550976.0,
+            "8720": 45991550976.0,
+            "8725": 45991550976.0,
+            "8730": 45991550976.0,
+            "8735": 45991550976.0,
+            "8740": 46068584448.0,
+            "8745": 46068584448.0,
+            "8750": 46068584448.0,
+            "8755": 46068584448.0,
+            "8760": 46068584448.0,
+            "8765": 46068584448.0,
+            "8770": 46068584448.0,
+            "8775": 46068584448.0,
+            "8780": 46068584448.0,
+            "8785": 46068584448.0,
+            "8790": 46068584448.0,
+            "8795": 46068584448.0,
+            "8800": 46068584448.0,
+            "8805": 46068584448.0,
+            "8810": 46068584448.0,
+            "8815": 46068584448.0,
+            "8820": 46068584448.0,
+            "8825": 46068584448.0,
+            "8830": 46068584448.0,
+            "8835": 46068584448.0,
+            "8840": 46068584448.0,
+            "8845": 46068584448.0,
+            "8850": 46068584448.0,
+            "8855": 46184767488.0,
+            "8860": 46184767488.0,
+            "8865": 46184767488.0,
+            "8870": 46184767488.0,
+            "8875": 46184767488.0,
+            "8880": 46184767488.0,
+            "8885": 46184767488.0,
+            "8890": 46184767488.0,
+            "8895": 46184767488.0,
+            "8900": 46184767488.0,
+            "8905": 46184767488.0,
+            "8910": 46184767488.0,
+            "8915": 46184767488.0,
+            "8920": 46184767488.0,
+            "8925": 46184767488.0,
+            "8930": 46184767488.0,
+            "8935": 46184767488.0,
+            "8940": 46184767488.0,
+            "8945": 46184767488.0,
+            "8950": 46184767488.0,
+            "8955": 46184767488.0,
+            "8960": 46184767488.0,
+            "8965": 46184767488.0,
+            "8970": 46184767488.0,
+            "8975": 46184767488.0,
+            "8980": 46184767488.0,
+            "8985": 46184767488.0,
+            "8990": 46184767488.0,
+            "8995": 46184767488.0,
+            "9000": 46184767488.0,
+            "9005": 46184767488.0,
+            "9010": 46184767488.0,
+            "9015": 46184767488.0,
+            "9020": 46184767488.0,
+            "9025": 46184767488.0,
+            "9030": 46184767488.0,
+            "9035": 46184767488.0,
+            "9040": 46184767488.0,
+            "9045": 46184767488.0,
+            "9050": 46184767488.0,
+            "9055": 46184767488.0,
+            "9060": 46184767488.0,
+            "9065": 46184767488.0,
+            "9070": 46184767488.0,
+            "9075": 46184767488.0,
+            "9080": 46184767488.0,
+            "9085": 46184767488.0,
+            "9090": 46184767488.0,
+            "9095": 46184767488.0,
+            "9100": 46184767488.0,
+            "9105": 46184767488.0,
+            "9110": 46184767488.0,
+            "9115": 46184767488.0,
+            "9120": 46184767488.0,
+            "9125": 46184767488.0,
+            "9130": 46184767488.0,
+            "9135": 46184767488.0,
+            "9140": 46184767488.0,
+            "9145": 46184767488.0,
+            "9150": 46184767488.0,
+            "9155": 46184767488.0,
+            "9160": 46184767488.0,
+            "9165": 46184767488.0,
+            "9170": 46184767488.0,
+            "9175": 46184767488.0,
+            "9180": 46184767488.0,
+            "9185": 46184767488.0,
+            "9190": 46184767488.0,
+            "9195": 46184767488.0,
+            "9200": 46184767488.0,
+            "9205": 46184767488.0,
+            "9210": 46184767488.0,
+            "9215": 46184767488.0,
+            "9220": 46184767488.0,
+            "9225": 46184767488.0,
+            "9230": 46184767488.0,
+            "9235": 46184767488.0,
+            "9240": 46184767488.0,
+            "9245": 46184767488.0,
+            "9250": 46184767488.0,
+            "9255": 46184767488.0,
+            "9260": 46184767488.0,
+            "9265": 46184767488.0,
+            "9270": 46184767488.0,
+            "9275": 46184767488.0,
+            "9280": 46184767488.0,
+            "9285": 46184767488.0,
+            "9290": 46184767488.0,
+            "9295": 46184767488.0,
+            "9300": 46184767488.0,
+            "9305": 46184767488.0,
+            "9310": 46184767488.0,
+            "9315": 46184767488.0,
+            "9320": 46184767488.0,
+            "9325": 46184767488.0,
+            "9330": 46184767488.0,
+            "9335": 46184767488.0,
+            "9340": 46184767488.0,
+            "9345": 46184767488.0,
+            "9350": 46184767488.0,
+            "9355": 46184767488.0,
+            "9360": 46184767488.0,
+            "9365": 46184767488.0,
+            "9370": 46184767488.0,
+            "9375": 46184767488.0,
+            "9380": 46184767488.0,
+            "9385": 46184767488.0,
+            "9390": 46184767488.0,
+            "9395": 46184767488.0,
+            "9400": 46184767488.0,
+            "9405": 46184767488.0,
+            "9410": 46184767488.0,
+            "9415": 46184767488.0,
+            "9420": 46184767488.0,
+            "9425": 46184767488.0,
+            "9430": 46184767488.0,
+            "9435": 46184767488.0,
+            "9440": 46184767488.0,
+            "9445": 46184767488.0,
+            "9450": 46184767488.0,
+            "9455": 46184767488.0,
+            "9460": 46184767488.0,
+            "9465": 46184767488.0,
+            "9470": 46184767488.0,
+            "9475": 46184767488.0,
+            "9480": 46184767488.0,
+            "9485": 46184767488.0,
+            "9490": 46184767488.0,
+            "9495": 46184767488.0,
+            "9500": 46184767488.0,
+            "9505": 46184767488.0,
+            "9510": 46184767488.0,
+            "9515": 46184767488.0,
+            "9520": 46184767488.0,
+            "9525": 46184767488.0,
+            "9530": 46184767488.0,
+            "9535": 46184767488.0
+        }
+    },
+    "mtp_1 loss": {
+        "start_step": 1,
+        "end_step": 9535,
+        "step_interval": 5,
+        "values": {
+            "1": 13.88878,
+            "5": 13.88979,
+            "10": 13.88767,
+            "15": 13.88576,
+            "20": 13.88068,
+            "25": 13.87774,
+            "30": 13.85566,
+            "35": 13.84855,
+            "40": 13.84546,
+            "45": 13.82693,
+            "50": 13.74828,
+            "55": 13.7249,
+            "60": 13.70841,
+            "65": 13.67571,
+            "70": 13.63981,
+            "75": 13.44327,
+            "80": 13.36054,
+            "85": 13.2835,
+            "90": 13.18641,
+            "95": 13.0505,
+            "100": 12.90733,
+            "105": 12.74689,
+            "110": 12.48525,
+            "115": 12.26801,
+            "120": 12.04358,
+            "125": 11.87011,
+            "130": 11.74911,
+            "135": 11.5841,
+            "140": 11.3494,
+            "145": 11.26997,
+            "150": 11.11919,
+            "155": 11.0211,
+            "160": 10.88133,
+            "165": 10.75162,
+            "170": 10.65694,
+            "175": 10.59566,
+            "180": 10.43546,
+            "185": 10.42441,
+            "190": 10.27183,
+            "195": 10.2539,
+            "200": 10.12718,
+            "205": 9.97472,
+            "210": 9.94271,
+            "215": 9.92122,
+            "220": 9.78944,
+            "225": 9.77014,
+            "230": 9.73,
+            "235": 9.64372,
+            "240": 9.57366,
+            "245": 9.50499,
+            "250": 9.43776,
+            "255": 9.37037,
+            "260": 9.29579,
+            "265": 9.2411,
+            "270": 9.15629,
+            "275": 9.12851,
+            "280": 9.10516,
+            "285": 9.09815,
+            "290": 9.01068,
+            "295": 8.94828,
+            "300": 8.83207,
+            "305": 8.80663,
+            "310": 8.74389,
+            "315": 8.71813,
+            "320": 8.68425,
+            "325": 8.58706,
+            "330": 8.56208,
+            "335": 8.53307,
+            "340": 8.52937,
+            "345": 8.41091,
+            "350": 8.39973,
+            "355": 8.29759,
+            "360": 8.38348,
+            "365": 8.28981,
+            "370": 8.2833,
+            "375": 8.22588,
+            "380": 8.18359,
+            "385": 8.16998,
+            "390": 8.1467,
+            "395": 8.09789,
+            "400": 8.01583,
+            "405": 8.01349,
+            "410": 8.00377,
+            "415": 7.95012,
+            "420": 7.93109,
+            "425": 7.88677,
+            "430": 7.81895,
+            "435": 7.82989,
+            "440": 7.77278,
+            "445": 7.7493,
+            "450": 7.67877,
+            "455": 7.7063,
+            "460": 7.6532,
+            "465": 7.6329,
+            "470": 7.59885,
+            "475": 7.61277,
+            "480": 7.48436,
+            "485": 7.53153,
+            "490": 7.48574,
+            "495": 7.4714,
+            "500": 7.41282,
+            "505": 7.41932,
+            "510": 7.38698,
+            "515": 7.35645,
+            "520": 7.35102,
+            "525": 7.32559,
+            "530": 7.32588,
+            "535": 7.30357,
+            "540": 7.2179,
+            "545": 7.24022,
+            "550": 7.27618,
+            "555": 7.30238,
+            "560": 7.23984,
+            "565": 7.16321,
+            "570": 7.17228,
+            "575": 7.18898,
+            "580": 7.11497,
+            "585": 7.11901,
+            "590": 7.06121,
+            "595": 7.04317,
+            "600": 7.06682,
+            "605": 7.06137,
+            "610": 7.01939,
+            "615": 7.078,
+            "620": 6.98113,
+            "625": 6.95612,
+            "630": 6.96104,
+            "635": 6.98871,
+            "640": 6.96819,
+            "645": 6.95817,
+            "650": 7.00625,
+            "655": 7.00242,
+            "660": 6.89823,
+            "665": 6.88159,
+            "670": 6.84888,
+            "675": 6.93827,
+            "680": 6.89638,
+            "685": 6.85679,
+            "690": 6.83445,
+            "695": 6.79719,
+            "700": 6.79183,
+            "705": 6.78625,
+            "710": 6.82275,
+            "715": 6.82665,
+            "720": 6.71137,
+            "725": 6.76643,
+            "730": 6.75579,
+            "735": 6.75515,
+            "740": 6.70045,
+            "745": 6.67565,
+            "750": 6.73564,
+            "755": 6.65767,
+            "760": 6.66496,
+            "765": 6.65951,
+            "770": 6.68075,
+            "775": 6.65453,
+            "780": 6.62427,
+            "785": 6.64321,
+            "790": 6.59399,
+            "795": 6.59812,
+            "800": 6.5878,
+            "805": 6.65391,
+            "810": 6.51946,
+            "815": 6.5419,
+            "820": 6.55134,
+            "825": 6.55855,
+            "830": 6.57041,
+            "835": 6.52603,
+            "840": 6.49033,
+            "845": 6.54438,
+            "850": 6.49874,
+            "855": 6.49335,
+            "860": 6.49024,
+            "865": 6.49642,
+            "870": 6.46222,
+            "875": 6.51054,
+            "880": 6.4748,
+            "885": 6.43786,
+            "890": 6.51246,
+            "895": 6.39629,
+            "900": 6.41895,
+            "905": 6.44341,
+            "910": 6.40617,
+            "915": 6.38978,
+            "920": 6.38772,
+            "925": 6.37391,
+            "930": 6.40825,
+            "935": 6.39755,
+            "940": 6.34172,
+            "945": 6.36869,
+            "950": 6.3953,
+            "955": 6.34893,
+            "960": 6.35406,
+            "965": 6.25416,
+            "970": 6.32381,
+            "975": 6.31262,
+            "980": 6.28797,
+            "985": 6.29222,
+            "990": 6.34527,
+            "995": 6.26326,
+            "1000": 6.28434,
+            "1005": 6.23155,
+            "1010": 6.26712,
+            "1015": 6.29352,
+            "1020": 6.20454,
+            "1025": 6.21082,
+            "1030": 6.20913,
+            "1035": 6.29924,
+            "1040": 6.22531,
+            "1045": 6.19943,
+            "1050": 6.2267,
+            "1055": 6.21777,
+            "1060": 6.1673,
+            "1065": 6.15758,
+            "1070": 6.19281,
+            "1075": 6.19093,
+            "1080": 6.19319,
+            "1085": 6.19606,
+            "1090": 6.17796,
+            "1095": 6.181,
+            "1100": 6.1397,
+            "1105": 6.11513,
+            "1110": 6.17787,
+            "1115": 6.11231,
+            "1120": 6.05286,
+            "1125": 6.08699,
+            "1130": 6.14167,
+            "1135": 6.09531,
+            "1140": 6.08221,
+            "1145": 6.06731,
+            "1150": 6.09458,
+            "1155": 6.06298,
+            "1160": 6.04607,
+            "1165": 6.09676,
+            "1170": 6.07336,
+            "1175": 6.04568,
+            "1180": 6.05058,
+            "1185": 6.04124,
+            "1190": 6.04961,
+            "1195": 6.02949,
+            "1200": 5.97329,
+            "1205": 6.07601,
+            "1210": 5.93751,
+            "1215": 5.98403,
+            "1220": 6.06306,
+            "1225": 5.95152,
+            "1230": 5.99877,
+            "1235": 5.95912,
+            "1240": 5.99322,
+            "1245": 5.97187,
+            "1250": 5.95299,
+            "1255": 5.94742,
+            "1260": 5.95227,
+            "1265": 5.93352,
+            "1270": 5.90818,
+            "1275": 5.96805,
+            "1280": 5.90416,
+            "1285": 5.92308,
+            "1290": 5.90725,
+            "1295": 5.92,
+            "1300": 5.9267,
+            "1305": 5.90057,
+            "1310": 5.83908,
+            "1315": 5.8992,
+            "1320": 5.89614,
+            "1325": 5.8271,
+            "1330": 5.88462,
+            "1335": 5.8531,
+            "1340": 5.91994,
+            "1345": 5.86667,
+            "1350": 5.84738,
+            "1355": 5.84415,
+            "1360": 5.85216,
+            "1365": 5.84478,
+            "1370": 5.79663,
+            "1375": 5.80667,
+            "1380": 5.86219,
+            "1385": 5.81826,
+            "1390": 5.81231,
+            "1395": 5.8299,
+            "1400": 5.83135,
+            "1405": 5.82032,
+            "1410": 5.78518,
+            "1415": 5.77017,
+            "1420": 5.8049,
+            "1425": 5.79565,
+            "1430": 5.83189,
+            "1435": 5.74562,
+            "1440": 5.76408,
+            "1445": 5.8071,
+            "1450": 5.78859,
+            "1455": 5.80534,
+            "1460": 5.75975,
+            "1465": 5.76379,
+            "1470": 5.8044,
+            "1475": 5.76985,
+            "1480": 5.77563,
+            "1485": 5.72396,
+            "1490": 5.72354,
+            "1495": 5.74538,
+            "1500": 5.75109,
+            "1505": 5.72321,
+            "1510": 5.74832,
+            "1515": 5.67052,
+            "1520": 5.70302,
+            "1525": 5.67385,
+            "1530": 5.69497,
+            "1535": 5.68565,
+            "1540": 5.672,
+            "1545": 5.7178,
+            "1550": 5.72274,
+            "1555": 5.70942,
+            "1560": 5.65211,
+            "1565": 5.69926,
+            "1570": 5.71179,
+            "1575": 5.6613,
+            "1580": 5.69275,
+            "1585": 5.67221,
+            "1590": 5.66087,
+            "1595": 5.63673,
+            "1600": 5.70849,
+            "1605": 5.64113,
+            "1610": 5.64353,
+            "1615": 5.63334,
+            "1620": 5.65496,
+            "1625": 5.64982,
+            "1630": 5.62727,
+            "1635": 5.67706,
+            "1640": 5.62761,
+            "1645": 5.6449,
+            "1650": 5.63803,
+            "1655": 5.62499,
+            "1660": 5.61278,
+            "1665": 5.60116,
+            "1670": 5.61214,
+            "1675": 5.62193,
+            "1680": 5.56155,
+            "1685": 5.57098,
+            "1690": 5.55098,
+            "1695": 5.55521,
+            "1700": 5.60178,
+            "1705": 5.57706,
+            "1710": 5.58407,
+            "1715": 5.54721,
+            "1720": 5.52704,
+            "1725": 5.56718,
+            "1730": 5.53148,
+            "1735": 5.58307,
+            "1740": 5.52337,
+            "1745": 5.55772,
+            "1750": 5.53213,
+            "1755": 5.5301,
+            "1760": 5.55304,
+            "1765": 5.5132,
+            "1770": 5.522,
+            "1775": 5.52704,
+            "1780": 5.53997,
+            "1785": 5.48896,
+            "1790": 5.52187,
+            "1795": 5.52448,
+            "1800": 5.4698,
+            "1805": 5.46326,
+            "1810": 5.47869,
+            "1815": 5.48464,
+            "1820": 5.48466,
+            "1825": 5.48352,
+            "1830": 5.46909,
+            "1835": 5.46355,
+            "1840": 5.46633,
+            "1845": 5.44723,
+            "1850": 5.42996,
+            "1855": 5.4834,
+            "1860": 5.43502,
+            "1865": 5.44302,
+            "1870": 5.43258,
+            "1875": 5.42823,
+            "1880": 5.491,
+            "1885": 5.45039,
+            "1890": 5.44132,
+            "1895": 5.38084,
+            "1900": 5.42123,
+            "1905": 5.41299,
+            "1910": 5.43539,
+            "1915": 5.4013,
+            "1920": 5.37729,
+            "1925": 5.4085,
+            "1930": 5.37579,
+            "1935": 5.39731,
+            "1940": 5.3727,
+            "1945": 5.4174,
+            "1950": 5.45899,
+            "1955": 5.39197,
+            "1960": 5.39342,
+            "1965": 5.34213,
+            "1970": 5.34023,
+            "1975": 5.40413,
+            "1980": 5.35398,
+            "1985": 5.37376,
+            "1990": 5.39658,
+            "1995": 5.37398,
+            "2000": 5.38469,
+            "2005": 5.42838,
+            "2010": 5.32884,
+            "2015": 5.32047,
+            "2020": 5.32991,
+            "2025": 5.37403,
+            "2030": 5.31228,
+            "2035": 5.33119,
+            "2040": 5.29466,
+            "2045": 5.38332,
+            "2050": 5.35716,
+            "2055": 5.33062,
+            "2060": 5.32903,
+            "2065": 5.29751,
+            "2070": 5.29985,
+            "2075": 5.32708,
+            "2080": 5.29709,
+            "2085": 5.32918,
+            "2090": 5.24905,
+            "2095": 5.29587,
+            "2100": 5.25777,
+            "2105": 5.28625,
+            "2110": 5.28042,
+            "2115": 5.28102,
+            "2120": 5.2839,
+            "2125": 5.24699,
+            "2130": 5.25602,
+            "2135": 5.25599,
+            "2140": 5.26607,
+            "2145": 5.22772,
+            "2150": 5.24774,
+            "2155": 5.22588,
+            "2160": 5.24123,
+            "2165": 5.22937,
+            "2170": 5.26626,
+            "2175": 5.2603,
+            "2180": 5.24294,
+            "2185": 5.24675,
+            "2190": 5.22691,
+            "2195": 5.20127,
+            "2200": 5.20409,
+            "2205": 5.2127,
+            "2210": 5.25738,
+            "2215": 5.30103,
+            "2220": 5.24446,
+            "2225": 5.2194,
+            "2230": 5.21789,
+            "2235": 5.25766,
+            "2240": 5.16329,
+            "2245": 5.1607,
+            "2250": 5.18607,
+            "2255": 5.19635,
+            "2260": 5.13701,
+            "2265": 5.21276,
+            "2270": 5.14278,
+            "2275": 5.19722,
+            "2280": 5.17159,
+            "2285": 5.18798,
+            "2290": 5.17456,
+            "2295": 5.18141,
+            "2300": 5.17912,
+            "2305": 5.15551,
+            "2310": 5.1834,
+            "2315": 5.12144,
+            "2320": 5.17039,
+            "2325": 5.14984,
+            "2330": 5.15156,
+            "2335": 5.13195,
+            "2340": 5.13852,
+            "2345": 5.18732,
+            "2350": 5.12945,
+            "2355": 5.11891,
+            "2360": 5.10445,
+            "2365": 5.11898,
+            "2370": 5.10258,
+            "2375": 5.11122,
+            "2380": 5.05395,
+            "2385": 5.09747,
+            "2390": 5.11702,
+            "2395": 5.1322,
+            "2400": 5.07944,
+            "2405": 5.06236,
+            "2410": 5.11554,
+            "2415": 5.09106,
+            "2420": 5.10878,
+            "2425": 5.06863,
+            "2430": 5.09273,
+            "2435": 5.08666,
+            "2440": 5.07515,
+            "2445": 5.08608,
+            "2450": 5.04943,
+            "2455": 5.09523,
+            "2460": 5.04536,
+            "2465": 5.08334,
+            "2470": 5.07644,
+            "2475": 5.11246,
+            "2480": 5.02872,
+            "2485": 5.05906,
+            "2490": 5.05297,
+            "2495": 5.04377,
+            "2500": 5.04447,
+            "2505": 5.05124,
+            "2510": 5.0909,
+            "2515": 5.08005,
+            "2520": 5.02414,
+            "2525": 5.03617,
+            "2530": 5.05281,
+            "2535": 5.04127,
+            "2540": 5.04342,
+            "2545": 5.05498,
+            "2550": 4.99288,
+            "2555": 5.05988,
+            "2560": 5.03403,
+            "2565": 5.00279,
+            "2570": 5.02524,
+            "2575": 4.98811,
+            "2580": 5.00235,
+            "2585": 4.98259,
+            "2590": 5.00195,
+            "2595": 4.95577,
+            "2600": 4.99616,
+            "2605": 5.01565,
+            "2610": 5.00846,
+            "2615": 4.9779,
+            "2620": 4.96,
+            "2625": 4.99167,
+            "2630": 4.92069,
+            "2635": 5.00179,
+            "2640": 5.00217,
+            "2645": 4.95857,
+            "2650": 4.98056,
+            "2655": 4.97276,
+            "2660": 4.91658,
+            "2665": 5.00931,
+            "2670": 4.95271,
+            "2675": 4.92627,
+            "2680": 4.95939,
+            "2685": 4.9606,
+            "2690": 4.92299,
+            "2695": 4.99925,
+            "2700": 4.90798,
+            "2705": 4.92161,
+            "2710": 4.9625,
+            "2715": 4.94083,
+            "2720": 4.97062,
+            "2725": 4.91977,
+            "2730": 4.9445,
+            "2735": 4.9369,
+            "2740": 4.92939,
+            "2745": 4.89678,
+            "2750": 4.93832,
+            "2755": 4.94144,
+            "2760": 4.94244,
+            "2765": 4.91315,
+            "2770": 4.95527,
+            "2775": 4.90029,
+            "2780": 4.93753,
+            "2785": 4.91159,
+            "2790": 4.93952,
+            "2795": 4.89812,
+            "2800": 4.84327,
+            "2805": 4.89103,
+            "2810": 4.88284,
+            "2815": 4.89434,
+            "2820": 4.93504,
+            "2825": 4.92479,
+            "2830": 4.90086,
+            "2835": 4.90451,
+            "2840": 4.89553,
+            "2845": 4.87238,
+            "2850": 4.90777,
+            "2855": 4.83628,
+            "2860": 4.89239,
+            "2865": 4.90134,
+            "2870": 4.89048,
+            "2875": 4.90822,
+            "2880": 4.82774,
+            "2885": 4.8758,
+            "2890": 4.84909,
+            "2895": 4.88906,
+            "2900": 4.84436,
+            "2905": 4.85096,
+            "2910": 4.84745,
+            "2915": 4.89554,
+            "2920": 4.87192,
+            "2925": 4.84408,
+            "2930": 4.83304,
+            "2935": 4.83856,
+            "2940": 4.8364,
+            "2945": 4.80087,
+            "2950": 4.79094,
+            "2955": 4.79257,
+            "2960": 4.81394,
+            "2965": 4.82244,
+            "2970": 4.83033,
+            "2975": 4.843,
+            "2980": 4.78708,
+            "2985": 4.83546,
+            "2990": 4.84632,
+            "2995": 4.79479,
+            "3000": 4.79957,
+            "3005": 4.7852,
+            "3010": 4.81747,
+            "3015": 4.77707,
+            "3020": 4.79613,
+            "3025": 4.80689,
+            "3030": 4.81521,
+            "3035": 4.81107,
+            "3040": 4.83014,
+            "3045": 4.81253,
+            "3050": 4.78854,
+            "3055": 4.79109,
+            "3060": 4.77291,
+            "3065": 4.80026,
+            "3070": 4.82011,
+            "3075": 4.75177,
+            "3080": 4.78059,
+            "3085": 4.7825,
+            "3090": 4.76596,
+            "3095": 4.80833,
+            "3100": 4.79656,
+            "3105": 4.77177,
+            "3110": 4.76085,
+            "3115": 4.71609,
+            "3120": 4.78235,
+            "3125": 4.74714,
+            "3130": 4.75497,
+            "3135": 4.75435,
+            "3140": 4.7318,
+            "3145": 4.71606,
+            "3150": 4.74842,
+            "3155": 4.78313,
+            "3160": 4.765,
+            "3165": 4.75911,
+            "3170": 4.7541,
+            "3175": 4.746,
+            "3180": 4.73371,
+            "3185": 4.70655,
+            "3190": 4.70906,
+            "3195": 4.70876,
+            "3200": 4.67795,
+            "3205": 4.72527,
+            "3210": 4.67973,
+            "3215": 4.71138,
+            "3220": 4.67941,
+            "3225": 4.71501,
+            "3230": 4.698,
+            "3235": 4.73415,
+            "3240": 4.68214,
+            "3245": 4.6954,
+            "3250": 4.64543,
+            "3255": 4.69551,
+            "3260": 4.67926,
+            "3265": 4.72582,
+            "3270": 4.70744,
+            "3275": 4.65457,
+            "3280": 4.68021,
+            "3285": 4.69583,
+            "3290": 4.66845,
+            "3295": 4.67202,
+            "3300": 4.66858,
+            "3305": 4.67172,
+            "3310": 4.66314,
+            "3315": 4.70829,
+            "3320": 4.64885,
+            "3325": 4.65812,
+            "3330": 4.64245,
+            "3335": 4.65293,
+            "3340": 4.62608,
+            "3345": 4.64548,
+            "3350": 4.65071,
+            "3355": 4.65765,
+            "3360": 4.64823,
+            "3365": 4.66194,
+            "3370": 4.63984,
+            "3375": 4.67722,
+            "3380": 4.61449,
+            "3385": 4.62869,
+            "3390": 4.60608,
+            "3395": 4.6967,
+            "3400": 4.64188,
+            "3405": 4.6721,
+            "3410": 4.60581,
+            "3415": 4.55337,
+            "3420": 4.61467,
+            "3425": 4.63228,
+            "3430": 4.66874,
+            "3435": 4.63419,
+            "3440": 4.65338,
+            "3445": 4.60093,
+            "3450": 4.59889,
+            "3455": 4.62429,
+            "3460": 4.58089,
+            "3465": 4.57689,
+            "3470": 4.59454,
+            "3475": 4.60079,
+            "3480": 4.59374,
+            "3485": 4.62356,
+            "3490": 4.60917,
+            "3495": 4.63221,
+            "3500": 4.59027,
+            "3505": 4.59844,
+            "3510": 4.59797,
+            "3515": 4.648,
+            "3520": 4.62554,
+            "3525": 4.57245,
+            "3530": 4.58587,
+            "3535": 4.58174,
+            "3540": 4.63653,
+            "3545": 4.56212,
+            "3550": 4.62056,
+            "3555": 4.55332,
+            "3560": 4.62414,
+            "3565": 4.55473,
+            "3570": 4.56696,
+            "3575": 4.53468,
+            "3580": 4.59878,
+            "3585": 4.58068,
+            "3590": 4.51872,
+            "3595": 4.58848,
+            "3600": 4.55395,
+            "3605": 4.53571,
+            "3610": 4.54008,
+            "3615": 4.56874,
+            "3620": 4.61691,
+            "3625": 4.55023,
+            "3630": 4.59867,
+            "3635": 4.50879,
+            "3640": 4.52782,
+            "3645": 4.56947,
+            "3650": 4.53552,
+            "3655": 4.54665,
+            "3660": 4.55228,
+            "3665": 4.58643,
+            "3670": 4.54047,
+            "3675": 4.55594,
+            "3680": 4.57348,
+            "3685": 4.49418,
+            "3690": 4.54299,
+            "3695": 4.49297,
+            "3700": 4.52866,
+            "3705": 4.50654,
+            "3710": 4.51966,
+            "3715": 4.53,
+            "3720": 4.50118,
+            "3725": 4.47886,
+            "3730": 4.4879,
+            "3735": 4.50546,
+            "3740": 4.49399,
+            "3745": 4.48041,
+            "3750": 4.51288,
+            "3755": 4.48915,
+            "3760": 4.50004,
+            "3765": 4.47669,
+            "3770": 4.48984,
+            "3775": 4.46969,
+            "3780": 4.45476,
+            "3785": 4.50898,
+            "3790": 4.42336,
+            "3795": 4.4846,
+            "3800": 4.46028,
+            "3805": 4.46023,
+            "3810": 4.42629,
+            "3815": 4.4806,
+            "3820": 4.4736,
+            "3825": 4.4803,
+            "3830": 4.46747,
+            "3835": 4.42638,
+            "3840": 4.52349,
+            "3845": 4.48225,
+            "3850": 4.42266,
+            "3855": 4.46223,
+            "3860": 4.48001,
+            "3865": 4.44144,
+            "3870": 4.50523,
+            "3875": 4.41439,
+            "3880": 4.42672,
+            "3885": 4.44983,
+            "3890": 4.43819,
+            "3895": 4.38007,
+            "3900": 4.43434,
+            "3905": 4.41283,
+            "3910": 4.42081,
+            "3915": 4.42082,
+            "3920": 4.41329,
+            "3925": 4.39336,
+            "3930": 4.41243,
+            "3935": 4.41903,
+            "3940": 4.41848,
+            "3945": 4.39397,
+            "3950": 4.46098,
+            "3955": 4.39087,
+            "3960": 4.43851,
+            "3965": 4.44901,
+            "3970": 4.39272,
+            "3975": 4.40242,
+            "3980": 4.37236,
+            "3985": 4.40832,
+            "3990": 4.40208,
+            "3995": 4.44335,
+            "4000": 4.38322,
+            "4005": 4.37255,
+            "4010": 4.40982,
+            "4015": 4.39813,
+            "4020": 4.43488,
+            "4025": 4.39111,
+            "4030": 4.44761,
+            "4035": 4.40548,
+            "4040": 4.43553,
+            "4045": 4.41155,
+            "4050": 4.40643,
+            "4055": 4.41393,
+            "4060": 4.40665,
+            "4065": 4.41291,
+            "4070": 4.34904,
+            "4075": 4.37708,
+            "4080": 4.35797,
+            "4085": 4.39736,
+            "4090": 4.37437,
+            "4095": 4.35826,
+            "4100": 4.37323,
+            "4105": 4.36208,
+            "4110": 4.32609,
+            "4115": 4.39421,
+            "4120": 4.31057,
+            "4125": 4.31168,
+            "4130": 4.39302,
+            "4135": 4.37289,
+            "4140": 4.31616,
+            "4145": 4.32788,
+            "4150": 4.37558,
+            "4155": 4.29766,
+            "4160": 4.35633,
+            "4165": 4.38157,
+            "4170": 4.32646,
+            "4175": 4.33285,
+            "4180": 4.32735,
+            "4185": 4.31953,
+            "4190": 4.31017,
+            "4195": 4.31525,
+            "4200": 4.31406,
+            "4205": 4.37,
+            "4210": 4.32695,
+            "4215": 4.3562,
+            "4220": 4.33701,
+            "4225": 4.32036,
+            "4230": 4.30579,
+            "4235": 4.35051,
+            "4240": 4.30872,
+            "4245": 4.31564,
+            "4250": 4.29999,
+            "4255": 4.31166,
+            "4260": 4.29019,
+            "4265": 4.30554,
+            "4270": 4.29954,
+            "4275": 4.36276,
+            "4280": 4.29798,
+            "4285": 4.33284,
+            "4290": 4.27741,
+            "4295": 4.30368,
+            "4300": 4.32594,
+            "4305": 4.29066,
+            "4310": 4.33408,
+            "4315": 4.3163,
+            "4320": 4.30571,
+            "4325": 4.32764,
+            "4330": 4.26525,
+            "4335": 4.30418,
+            "4340": 4.28838,
+            "4345": 4.23753,
+            "4350": 4.25927,
+            "4355": 4.33009,
+            "4360": 4.30543,
+            "4365": 4.30411,
+            "4370": 4.28149,
+            "4375": 4.24372,
+            "4380": 4.25559,
+            "4385": 4.23331,
+            "4390": 4.30895,
+            "4395": 4.27518,
+            "4400": 4.26254,
+            "4405": 4.23007,
+            "4410": 4.28048,
+            "4415": 4.26816,
+            "4420": 4.24916,
+            "4425": 4.29252,
+            "4430": 4.24244,
+            "4435": 4.29049,
+            "4440": 4.28601,
+            "4445": 4.24232,
+            "4450": 4.20719,
+            "4455": 4.26016,
+            "4460": 4.23459,
+            "4465": 4.25243,
+            "4470": 4.23841,
+            "4475": 4.2641,
+            "4480": 4.24909,
+            "4485": 4.23389,
+            "4490": 4.23593,
+            "4495": 4.17962,
+            "4500": 4.25444,
+            "4505": 4.22942,
+            "4510": 4.23965,
+            "4515": 4.19566,
+            "4520": 4.23113,
+            "4525": 4.19456,
+            "4530": 4.24001,
+            "4535": 4.20166,
+            "4540": 4.21127,
+            "4545": 4.23188,
+            "4550": 4.27088,
+            "4555": 4.2072,
+            "4560": 4.22378,
+            "4565": 4.15426,
+            "4570": 4.21606,
+            "4575": 4.1941,
+            "4580": 4.25747,
+            "4585": 4.22428,
+            "4590": 4.21266,
+            "4595": 4.17399,
+            "4600": 4.16313,
+            "4605": 4.2045,
+            "4610": 4.19939,
+            "4615": 4.24443,
+            "4620": 4.16447,
+            "4625": 4.19099,
+            "4630": 4.20991,
+            "4635": 4.18208,
+            "4640": 4.21078,
+            "4645": 4.20652,
+            "4650": 4.22758,
+            "4655": 4.19246,
+            "4660": 4.18248,
+            "4665": 4.193,
+            "4670": 4.23574,
+            "4675": 4.17989,
+            "4680": 4.20859,
+            "4685": 4.19688,
+            "4690": 4.1723,
+            "4695": 4.18485,
+            "4700": 4.16546,
+            "4705": 4.14067,
+            "4710": 4.20305,
+            "4715": 4.19002,
+            "4720": 4.14737,
+            "4725": 4.12216,
+            "4730": 4.17809,
+            "4735": 4.10178,
+            "4740": 4.14697,
+            "4745": 4.18779,
+            "4750": 4.13615,
+            "4755": 4.19424,
+            "4760": 4.1984,
+            "4765": 4.1461,
+            "4770": 4.14849,
+            "4775": 4.14773,
+            "4780": 4.15523,
+            "4785": 4.13664,
+            "4790": 4.19224,
+            "4795": 4.17628,
+            "4800": 4.13942,
+            "4805": 4.17839,
+            "4810": 4.1375,
+            "4815": 4.17167,
+            "4820": 4.12226,
+            "4825": 4.17474,
+            "4830": 4.16985,
+            "4835": 4.14976,
+            "4840": 4.15298,
+            "4845": 4.10968,
+            "4850": 4.17354,
+            "4855": 4.17639,
+            "4860": 4.11236,
+            "4865": 4.13759,
+            "4870": 4.13215,
+            "4875": 4.17643,
+            "4880": 4.1702,
+            "4885": 4.13029,
+            "4890": 4.1249,
+            "4895": 4.12403,
+            "4900": 4.09958,
+            "4905": 4.09173,
+            "4910": 4.09074,
+            "4915": 4.14665,
+            "4920": 4.12021,
+            "4925": 4.08814,
+            "4930": 4.09778,
+            "4935": 4.12094,
+            "4940": 4.04981,
+            "4945": 4.13369,
+            "4950": 4.07708,
+            "4955": 4.15684,
+            "4960": 4.11652,
+            "4965": 4.1151,
+            "4970": 4.09971,
+            "4975": 4.11736,
+            "4980": 4.12585,
+            "4985": 4.12754,
+            "4990": 4.09005,
+            "4995": 4.12916,
+            "5000": 4.05682,
+            "5005": 4.11701,
+            "5010": 4.10942,
+            "5015": 4.07584,
+            "5020": 4.05201,
+            "5025": 4.06082,
+            "5030": 4.10005,
+            "5035": 4.08177,
+            "5040": 4.0418,
+            "5045": 4.11064,
+            "5050": 4.06425,
+            "5055": 4.08995,
+            "5060": 4.03143,
+            "5065": 4.09666,
+            "5070": 4.07056,
+            "5075": 4.12386,
+            "5080": 4.07795,
+            "5085": 4.09595,
+            "5090": 4.07748,
+            "5095": 4.0424,
+            "5100": 4.0782,
+            "5105": 4.0809,
+            "5110": 4.08612,
+            "5115": 4.07663,
+            "5120": 4.09438,
+            "5125": 4.05976,
+            "5130": 4.06327,
+            "5135": 4.0488,
+            "5140": 4.06922,
+            "5145": 4.05942,
+            "5150": 4.07092,
+            "5155": 4.07553,
+            "5160": 4.05549,
+            "5165": 4.09766,
+            "5170": 3.96642,
+            "5175": 4.07515,
+            "5180": 4.03531,
+            "5185": 4.05861,
+            "5190": 4.08092,
+            "5195": 4.04601,
+            "5200": 4.06577,
+            "5205": 4.09747,
+            "5210": 4.01055,
+            "5215": 4.02373,
+            "5220": 4.02621,
+            "5225": 4.02349,
+            "5230": 4.06271,
+            "5235": 4.03585,
+            "5240": 4.02422,
+            "5245": 4.04177,
+            "5250": 4.04544,
+            "5255": 4.03173,
+            "5260": 4.04798,
+            "5265": 4.01495,
+            "5270": 3.98673,
+            "5275": 4.00519,
+            "5280": 4.02024,
+            "5285": 4.04277,
+            "5290": 4.00304,
+            "5295": 4.00093,
+            "5300": 4.02323,
+            "5305": 4.01012,
+            "5310": 4.0478,
+            "5315": 3.99571,
+            "5320": 4.03864,
+            "5325": 4.06497,
+            "5330": 3.99981,
+            "5335": 4.02122,
+            "5340": 3.9739,
+            "5345": 4.01424,
+            "5350": 4.0246,
+            "5355": 4.01714,
+            "5360": 3.9668,
+            "5365": 3.98455,
+            "5370": 4.02892,
+            "5375": 3.99384,
+            "5380": 3.98952,
+            "5385": 4.00787,
+            "5390": 3.99585,
+            "5395": 3.932,
+            "5400": 4.02192,
+            "5405": 3.94401,
+            "5410": 4.03103,
+            "5415": 3.94954,
+            "5420": 3.98108,
+            "5425": 3.96619,
+            "5430": 3.97462,
+            "5435": 4.00917,
+            "5440": 3.96082,
+            "5445": 3.96843,
+            "5450": 3.98078,
+            "5455": 3.96312,
+            "5460": 3.97781,
+            "5465": 4.03343,
+            "5470": 3.99301,
+            "5475": 3.92634,
+            "5480": 4.0001,
+            "5485": 3.96789,
+            "5490": 3.99381,
+            "5495": 3.99755,
+            "5500": 3.95394,
+            "5505": 3.9702,
+            "5510": 4.00139,
+            "5515": 3.97886,
+            "5520": 3.95723,
+            "5525": 4.01089,
+            "5530": 3.95723,
+            "5535": 3.99058,
+            "5540": 3.95888,
+            "5545": 3.97704,
+            "5550": 3.97005,
+            "5555": 3.93134,
+            "5560": 3.94203,
+            "5565": 3.98688,
+            "5570": 3.94409,
+            "5575": 3.97691,
+            "5580": 3.95423,
+            "5585": 3.89232,
+            "5590": 3.96662,
+            "5595": 3.91996,
+            "5600": 3.97099,
+            "5605": 3.87423,
+            "5610": 3.96509,
+            "5615": 3.9629,
+            "5620": 3.97882,
+            "5625": 3.95843,
+            "5630": 3.94884,
+            "5635": 3.92989,
+            "5640": 3.95308,
+            "5645": 3.91537,
+            "5650": 3.88759,
+            "5655": 3.91914,
+            "5660": 3.9101,
+            "5665": 3.92739,
+            "5670": 3.91107,
+            "5675": 3.94487,
+            "5680": 3.91238,
+            "5685": 3.92365,
+            "5690": 3.92517,
+            "5695": 3.953,
+            "5700": 3.88996,
+            "5705": 3.88995,
+            "5710": 3.87532,
+            "5715": 3.99623,
+            "5720": 3.94505,
+            "5725": 3.89527,
+            "5730": 3.94792,
+            "5735": 3.92817,
+            "5740": 3.92171,
+            "5745": 3.89897,
+            "5750": 3.92176,
+            "5755": 3.94672,
+            "5760": 3.92632,
+            "5765": 3.92024,
+            "5770": 3.95286,
+            "5775": 3.86965,
+            "5780": 3.91041,
+            "5785": 3.91605,
+            "5790": 3.9236,
+            "5795": 3.93068,
+            "5800": 3.86954,
+            "5805": 3.8764,
+            "5810": 3.92692,
+            "5815": 3.89083,
+            "5820": 3.84021,
+            "5825": 3.89285,
+            "5830": 3.85163,
+            "5835": 3.88292,
+            "5840": 3.89361,
+            "5845": 3.91293,
+            "5850": 3.90508,
+            "5855": 3.84956,
+            "5860": 3.87018,
+            "5865": 3.8979,
+            "5870": 3.85816,
+            "5875": 3.89604,
+            "5880": 3.88075,
+            "5885": 3.89965,
+            "5890": 3.90395,
+            "5895": 3.92339,
+            "5900": 3.85618,
+            "5905": 3.92033,
+            "5910": 3.88782,
+            "5915": 3.85158,
+            "5920": 3.88999,
+            "5925": 3.82174,
+            "5930": 3.88478,
+            "5935": 3.86887,
+            "5940": 3.89924,
+            "5945": 3.90324,
+            "5950": 3.88472,
+            "5955": 3.83758,
+            "5960": 3.91077,
+            "5965": 3.85295,
+            "5970": 3.90592,
+            "5975": 3.87131,
+            "5980": 3.94635,
+            "5985": 3.81828,
+            "5990": 3.91445,
+            "5995": 3.82666,
+            "6000": 3.86389,
+            "6005": 3.82737,
+            "6010": 3.84638,
+            "6015": 3.82528,
+            "6020": 3.84213,
+            "6025": 3.8812,
+            "6030": 3.82864,
+            "6035": 3.87549,
+            "6040": 3.85371,
+            "6045": 3.88892,
+            "6050": 3.86125,
+            "6055": 3.84398,
+            "6060": 3.86538,
+            "6065": 3.8955,
+            "6070": 3.844,
+            "6075": 3.79156,
+            "6080": 3.86497,
+            "6085": 3.82767,
+            "6090": 3.86054,
+            "6095": 3.85995,
+            "6100": 3.82399,
+            "6105": 3.87238,
+            "6110": 3.80525,
+            "6115": 3.87931,
+            "6120": 3.85374,
+            "6125": 3.85469,
+            "6130": 3.85122,
+            "6135": 3.82709,
+            "6140": 3.8225,
+            "6145": 3.81264,
+            "6150": 3.85853,
+            "6155": 3.83605,
+            "6160": 3.80232,
+            "6165": 3.82292,
+            "6170": 3.81513,
+            "6175": 3.80691,
+            "6180": 3.8071,
+            "6185": 3.84448,
+            "6190": 3.81178,
+            "6195": 3.78014,
+            "6200": 3.80543,
+            "6205": 3.81219,
+            "6210": 3.77002,
+            "6215": 3.82559,
+            "6220": 3.822,
+            "6225": 3.82598,
+            "6230": 3.76955,
+            "6235": 3.8072,
+            "6240": 3.73374,
+            "6245": 3.84624,
+            "6250": 3.80845,
+            "6255": 3.8223,
+            "6260": 3.7948,
+            "6265": 3.82819,
+            "6270": 3.75673,
+            "6275": 3.78492,
+            "6280": 3.80313,
+            "6285": 3.78154,
+            "6290": 3.79976,
+            "6295": 3.80168,
+            "6300": 3.80756,
+            "6305": 3.88253,
+            "6310": 3.7702,
+            "6315": 3.7633,
+            "6320": 3.81817,
+            "6325": 3.75526,
+            "6330": 3.82862,
+            "6335": 3.81943,
+            "6340": 3.76721,
+            "6345": 3.82391,
+            "6350": 3.76718,
+            "6355": 3.77414,
+            "6360": 3.75111,
+            "6365": 3.80986,
+            "6370": 3.81014,
+            "6375": 3.78548,
+            "6380": 3.8065,
+            "6385": 3.82336,
+            "6390": 3.78289,
+            "6395": 3.75935,
+            "6400": 3.76038,
+            "6405": 3.83749,
+            "6410": 3.83127,
+            "6415": 3.7623,
+            "6420": 3.82306,
+            "6425": 3.83219,
+            "6430": 3.81048,
+            "6435": 3.77764,
+            "6440": 3.76108,
+            "6445": 3.80173,
+            "6450": 3.73884,
+            "6455": 3.75156,
+            "6460": 3.77352,
+            "6465": 3.80905,
+            "6470": 3.78701,
+            "6475": 3.78176,
+            "6480": 3.81548,
+            "6485": 3.76414,
+            "6490": 3.71291,
+            "6495": 3.81407,
+            "6500": 3.79809,
+            "6505": 3.72741,
+            "6510": 3.7976,
+            "6515": 3.81938,
+            "6520": 3.73166,
+            "6525": 3.80464,
+            "6530": 3.76853,
+            "6535": 3.76159,
+            "6540": 3.82675,
+            "6545": 3.76261,
+            "6550": 3.76963,
+            "6555": 3.75505,
+            "6560": 3.71108,
+            "6565": 3.70887,
+            "6570": 3.7465,
+            "6575": 3.69338,
+            "6580": 3.81517,
+            "6585": 3.76239,
+            "6590": 3.72546,
+            "6595": 3.74461,
+            "6600": 3.73687,
+            "6605": 3.71668,
+            "6610": 3.72679,
+            "6615": 3.76079,
+            "6620": 3.70966,
+            "6625": 3.72313,
+            "6630": 3.72114,
+            "6635": 3.76232,
+            "6640": 3.73374,
+            "6645": 3.75061,
+            "6650": 3.77922,
+            "6655": 3.70627,
+            "6660": 3.73531,
+            "6665": 3.7573,
+            "6670": 3.71979,
+            "6675": 3.74124,
+            "6680": 3.73477,
+            "6685": 3.76436,
+            "6690": 3.74256,
+            "6695": 3.75545,
+            "6700": 3.74559,
+            "6705": 3.72882,
+            "6710": 3.72913,
+            "6715": 3.69291,
+            "6720": 3.77736,
+            "6725": 3.75737,
+            "6730": 3.73993,
+            "6735": 3.74082,
+            "6740": 3.73806,
+            "6745": 3.72041,
+            "6750": 3.74412,
+            "6755": 3.69337,
+            "6760": 3.68122,
+            "6765": 3.74232,
+            "6770": 3.69625,
+            "6775": 3.74604,
+            "6780": 3.70485,
+            "6785": 3.70942,
+            "6790": 3.73683,
+            "6795": 3.69846,
+            "6800": 3.71752,
+            "6805": 3.72172,
+            "6810": 3.73628,
+            "6815": 3.65876,
+            "6820": 3.70229,
+            "6825": 3.72745,
+            "6830": 3.70872,
+            "6835": 3.68623,
+            "6840": 3.67517,
+            "6845": 3.74818,
+            "6850": 3.70405,
+            "6855": 3.73713,
+            "6860": 3.6695,
+            "6865": 3.73585,
+            "6870": 3.6953,
+            "6875": 3.69781,
+            "6880": 3.70324,
+            "6885": 3.67727,
+            "6890": 3.69236,
+            "6895": 3.67848,
+            "6900": 3.68133,
+            "6905": 3.68771,
+            "6910": 3.72919,
+            "6915": 3.73359,
+            "6920": 3.68934,
+            "6925": 3.69022,
+            "6930": 3.68858,
+            "6935": 3.62056,
+            "6940": 3.68927,
+            "6945": 3.67777,
+            "6950": 3.68038,
+            "6955": 3.6771,
+            "6960": 3.68108,
+            "6965": 3.72225,
+            "6970": 3.64603,
+            "6975": 3.72781,
+            "6980": 3.68459,
+            "6985": 3.68985,
+            "6990": 3.7316,
+            "6995": 3.70495,
+            "7000": 3.63993,
+            "7005": 3.71744,
+            "7010": 3.69223,
+            "7015": 3.67561,
+            "7020": 3.72152,
+            "7025": 3.70969,
+            "7030": 3.70236,
+            "7035": 3.65723,
+            "7040": 3.61488,
+            "7045": 3.69518,
+            "7050": 3.71947,
+            "7055": 3.64991,
+            "7060": 3.69149,
+            "7065": 3.74261,
+            "7070": 3.67108,
+            "7075": 3.67419,
+            "7080": 3.71683,
+            "7085": 3.64191,
+            "7090": 3.66318,
+            "7095": 3.63818,
+            "7100": 3.68341,
+            "7105": 3.62024,
+            "7110": 3.68873,
+            "7115": 3.63797,
+            "7120": 3.68741,
+            "7125": 3.63499,
+            "7130": 3.65311,
+            "7135": 3.66196,
+            "7140": 3.66504,
+            "7145": 3.68183,
+            "7150": 3.62677,
+            "7155": 3.69052,
+            "7160": 3.62415,
+            "7165": 3.64241,
+            "7170": 3.68231,
+            "7175": 3.64603,
+            "7180": 3.67571,
+            "7185": 3.70721,
+            "7190": 3.663,
+            "7195": 3.66862,
+            "7200": 3.67265,
+            "7205": 3.65833,
+            "7210": 3.68834,
+            "7215": 3.67282,
+            "7220": 3.69117,
+            "7225": 3.66107,
+            "7230": 3.68593,
+            "7235": 3.64823,
+            "7240": 3.64663,
+            "7245": 3.66574,
+            "7250": 3.60447,
+            "7255": 3.62598,
+            "7260": 3.68023,
+            "7265": 3.60288,
+            "7270": 3.63936,
+            "7275": 3.64805,
+            "7280": 3.62623,
+            "7285": 3.65053,
+            "7290": 3.6735,
+            "7295": 3.66357,
+            "7300": 3.62393,
+            "7305": 3.62784,
+            "7310": 3.66312,
+            "7315": 3.67632,
+            "7320": 3.65015,
+            "7325": 3.65453,
+            "7330": 3.62344,
+            "7335": 3.62574,
+            "7340": 3.64422,
+            "7345": 3.60533,
+            "7350": 3.65727,
+            "7355": 3.64352,
+            "7360": 3.61779,
+            "7365": 3.63578,
+            "7370": 3.6188,
+            "7375": 3.59366,
+            "7380": 3.64743,
+            "7385": 3.67218,
+            "7390": 3.65876,
+            "7395": 3.60688,
+            "7400": 3.65695,
+            "7405": 3.64945,
+            "7410": 3.66151,
+            "7415": 3.64439,
+            "7420": 3.63591,
+            "7425": 3.6844,
+            "7430": 3.63181,
+            "7435": 3.61154,
+            "7440": 3.62564,
+            "7445": 3.60843,
+            "7450": 3.57301,
+            "7455": 3.64772,
+            "7460": 3.63452,
+            "7465": 3.63169,
+            "7470": 3.63744,
+            "7475": 3.64264,
+            "7480": 3.61171,
+            "7485": 3.57567,
+            "7490": 3.57599,
+            "7495": 3.5863,
+            "7500": 3.61565,
+            "7505": 3.59614,
+            "7510": 3.55707,
+            "7515": 3.61683,
+            "7520": 3.60991,
+            "7525": 3.56658,
+            "7530": 3.61196,
+            "7535": 3.62507,
+            "7540": 3.61046,
+            "7545": 3.64639,
+            "7550": 3.65882,
+            "7555": 3.58595,
+            "7560": 3.60212,
+            "7565": 3.59782,
+            "7570": 3.60603,
+            "7575": 3.57351,
+            "7580": 3.62111,
+            "7585": 3.60137,
+            "7590": 3.6026,
+            "7595": 3.66318,
+            "7600": 3.6076,
+            "7605": 3.59626,
+            "7610": 3.58483,
+            "7615": 3.58478,
+            "7620": 3.56787,
+            "7625": 3.62193,
+            "7630": 3.60469,
+            "7635": 3.5928,
+            "7640": 3.59019,
+            "7645": 3.62279,
+            "7650": 3.6259,
+            "7655": 3.66371,
+            "7660": 3.5305,
+            "7665": 3.60545,
+            "7670": 3.59796,
+            "7675": 3.58201,
+            "7680": 3.57701,
+            "7685": 3.64556,
+            "7690": 3.59102,
+            "7695": 3.57063,
+            "7700": 3.63352,
+            "7705": 3.58816,
+            "7710": 3.62048,
+            "7715": 3.5764,
+            "7720": 3.65561,
+            "7725": 3.55706,
+            "7730": 3.57614,
+            "7735": 3.61006,
+            "7740": 3.58168,
+            "7745": 3.58454,
+            "7750": 3.57422,
+            "7755": 3.59202,
+            "7760": 3.56089,
+            "7765": 3.58551,
+            "7770": 3.60104,
+            "7775": 3.57103,
+            "7780": 3.55457,
+            "7785": 3.57713,
+            "7790": 3.57042,
+            "7795": 3.58792,
+            "7800": 3.57997,
+            "7805": 3.58361,
+            "7810": 3.60683,
+            "7815": 3.57773,
+            "7820": 3.57578,
+            "7825": 3.61835,
+            "7830": 3.59192,
+            "7835": 3.52632,
+            "7840": 3.6194,
+            "7845": 3.55538,
+            "7850": 3.51354,
+            "7855": 3.56599,
+            "7860": 3.54645,
+            "7865": 3.60369,
+            "7870": 3.54114,
+            "7875": 3.55695,
+            "7880": 3.572,
+            "7885": 3.56229,
+            "7890": 3.60585,
+            "7895": 3.59334,
+            "7900": 3.60641,
+            "7905": 3.56339,
+            "7910": 3.58203,
+            "7915": 3.58298,
+            "7920": 3.59012,
+            "7925": 3.5681,
+            "7930": 3.59927,
+            "7935": 3.56169,
+            "7940": 3.60948,
+            "7945": 3.62723,
+            "7950": 3.53708,
+            "7955": 3.54481,
+            "7960": 3.53124,
+            "7965": 3.51862,
+            "7970": 3.52486,
+            "7975": 3.55975,
+            "7980": 3.56722,
+            "7985": 3.54114,
+            "7990": 3.54399,
+            "7995": 3.5186,
+            "8000": 3.57756,
+            "8005": 3.54643,
+            "8010": 3.53705,
+            "8015": 3.53445,
+            "8020": 3.53111,
+            "8025": 3.51514,
+            "8030": 3.54148,
+            "8035": 3.53478,
+            "8040": 3.52163,
+            "8045": 3.57586,
+            "8050": 3.57789,
+            "8055": 3.54866,
+            "8060": 3.5712,
+            "8065": 3.54757,
+            "8070": 3.53654,
+            "8075": 3.52629,
+            "8080": 3.57467,
+            "8085": 3.52928,
+            "8090": 3.53424,
+            "8095": 3.56313,
+            "8100": 3.51543,
+            "8105": 3.54752,
+            "8110": 3.5453,
+            "8115": 3.51645,
+            "8120": 3.52703,
+            "8125": 3.56437,
+            "8130": 3.52567,
+            "8135": 3.53994,
+            "8140": 3.52104,
+            "8145": 3.50389,
+            "8150": 3.52394,
+            "8155": 3.51178,
+            "8160": 3.56129,
+            "8165": 3.54328,
+            "8170": 3.5116,
+            "8175": 3.5057,
+            "8180": 3.57245,
+            "8185": 3.54733,
+            "8190": 3.58207,
+            "8195": 3.55001,
+            "8200": 3.52156,
+            "8205": 3.52888,
+            "8210": 3.53558,
+            "8215": 3.55713,
+            "8220": 3.5201,
+            "8225": 3.51201,
+            "8230": 3.53756,
+            "8235": 3.55814,
+            "8240": 3.54052,
+            "8245": 3.53652,
+            "8250": 3.5692,
+            "8255": 3.51844,
+            "8260": 3.52912,
+            "8265": 3.52072,
+            "8270": 3.52843,
+            "8275": 3.51526,
+            "8280": 3.50321,
+            "8285": 3.52669,
+            "8290": 3.5272,
+            "8295": 3.49645,
+            "8300": 3.51721,
+            "8305": 3.53958,
+            "8310": 3.5351,
+            "8315": 3.50396,
+            "8320": 3.53046,
+            "8325": 3.47885,
+            "8330": 3.44388,
+            "8335": 3.51457,
+            "8340": 3.54076,
+            "8345": 3.49873,
+            "8350": 3.51134,
+            "8355": 3.54342,
+            "8360": 3.51607,
+            "8365": 3.53716,
+            "8370": 3.53127,
+            "8375": 3.48696,
+            "8380": 3.4848,
+            "8385": 3.52879,
+            "8390": 3.49474,
+            "8395": 3.52721,
+            "8400": 3.49636,
+            "8405": 3.51685,
+            "8410": 3.57651,
+            "8415": 3.48228,
+            "8420": 3.45216,
+            "8425": 3.53401,
+            "8430": 3.53787,
+            "8435": 3.47534,
+            "8440": 3.55163,
+            "8445": 3.53658,
+            "8450": 3.50995,
+            "8455": 3.52875,
+            "8460": 3.53463,
+            "8465": 3.4708,
+            "8470": 3.4929,
+            "8475": 3.55004,
+            "8480": 3.47555,
+            "8485": 3.49487,
+            "8490": 3.48489,
+            "8495": 3.48023,
+            "8500": 3.52888,
+            "8505": 3.46749,
+            "8510": 3.54064,
+            "8515": 3.48982,
+            "8520": 3.49184,
+            "8525": 3.42254,
+            "8530": 3.50181,
+            "8535": 3.52351,
+            "8540": 3.47484,
+            "8545": 3.49944,
+            "8550": 3.46881,
+            "8555": 3.53517,
+            "8560": 3.5346,
+            "8565": 3.48792,
+            "8570": 3.48883,
+            "8575": 3.46414,
+            "8580": 3.50837,
+            "8585": 3.52994,
+            "8590": 3.51956,
+            "8595": 3.52409,
+            "8600": 3.50319,
+            "8605": 3.49079,
+            "8610": 3.49584,
+            "8615": 3.49483,
+            "8620": 3.46525,
+            "8625": 3.4875,
+            "8630": 3.49269,
+            "8635": 3.47742,
+            "8640": 3.46288,
+            "8645": 3.52844,
+            "8650": 3.45936,
+            "8655": 3.50294,
+            "8660": 3.51093,
+            "8665": 3.48996,
+            "8670": 3.50547,
+            "8675": 3.47414,
+            "8680": 3.4685,
+            "8685": 3.48029,
+            "8690": 3.51264,
+            "8695": 3.51367,
+            "8700": 3.48324,
+            "8705": 3.45351,
+            "8710": 3.50031,
+            "8715": 3.45042,
+            "8720": 3.52876,
+            "8725": 3.48819,
+            "8730": 3.47981,
+            "8735": 3.51018,
+            "8740": 3.46013,
+            "8745": 3.50108,
+            "8750": 3.50543,
+            "8755": 3.46564,
+            "8760": 3.48373,
+            "8765": 3.43955,
+            "8770": 3.50951,
+            "8775": 3.47313,
+            "8780": 3.45782,
+            "8785": 3.47628,
+            "8790": 3.4608,
+            "8795": 3.49675,
+            "8800": 3.46402,
+            "8805": 3.43267,
+            "8810": 3.45044,
+            "8815": 3.47281,
+            "8820": 3.43586,
+            "8825": 3.46906,
+            "8830": 3.44494,
+            "8835": 3.42402,
+            "8840": 3.4361,
+            "8845": 3.45772,
+            "8850": 3.48143,
+            "8855": 3.46505,
+            "8860": 3.53187,
+            "8865": 3.46882,
+            "8870": 3.44869,
+            "8875": 3.45286,
+            "8880": 3.45584,
+            "8885": 3.44986,
+            "8890": 3.47298,
+            "8895": 3.45131,
+            "8900": 3.47879,
+            "8905": 3.46796,
+            "8910": 3.45421,
+            "8915": 3.44293,
+            "8920": 3.43345,
+            "8925": 3.50917,
+            "8930": 3.49052,
+            "8935": 3.50073,
+            "8940": 3.47584,
+            "8945": 3.47848,
+            "8950": 3.45717,
+            "8955": 3.44615,
+            "8960": 3.43965,
+            "8965": 3.45818,
+            "8970": 3.47179,
+            "8975": 3.42177,
+            "8980": 3.42266,
+            "8985": 3.44671,
+            "8990": 3.50075,
+            "8995": 3.47255,
+            "9000": 3.41954,
+            "9005": 3.46563,
+            "9010": 3.51573,
+            "9015": 3.4185,
+            "9020": 3.43896,
+            "9025": 3.44768,
+            "9030": 3.4718,
+            "9035": 3.37943,
+            "9040": 3.45501,
+            "9045": 3.45466,
+            "9050": 3.49179,
+            "9055": 3.40312,
+            "9060": 3.49477,
+            "9065": 3.51349,
+            "9070": 3.44713,
+            "9075": 3.47746,
+            "9080": 3.47127,
+            "9085": 3.47459,
+            "9090": 3.46668,
+            "9095": 3.42167,
+            "9100": 3.4227,
+            "9105": 3.41261,
+            "9110": 3.45663,
+            "9115": 3.46481,
+            "9120": 3.51949,
+            "9125": 3.44245,
+            "9130": 3.43654,
+            "9135": 3.46008,
+            "9140": 3.47929,
+            "9145": 3.42408,
+            "9150": 3.44307,
+            "9155": 3.45089,
+            "9160": 3.44998,
+            "9165": 3.45651,
+            "9170": 3.47508,
+            "9175": 3.41133,
+            "9180": 3.45323,
+            "9185": 3.41086,
+            "9190": 3.46875,
+            "9195": 3.43315,
+            "9200": 3.44758,
+            "9205": 3.42373,
+            "9210": 3.45572,
+            "9215": 3.39585,
+            "9220": 3.42327,
+            "9225": 3.44665,
+            "9230": 3.37357,
+            "9235": 3.39456,
+            "9240": 3.42282,
+            "9245": 3.40683,
+            "9250": 3.40791,
+            "9255": 3.42077,
+            "9260": 3.39755,
+            "9265": 3.44216,
+            "9270": 3.40754,
+            "9275": 3.42864,
+            "9280": 3.44334,
+            "9285": 3.44087,
+            "9290": 3.45563,
+            "9295": 3.44456,
+            "9300": 3.39522,
+            "9305": 3.42638,
+            "9310": 3.41593,
+            "9315": 3.38278,
+            "9320": 3.3797,
+            "9325": 3.42046,
+            "9330": 3.47853,
+            "9335": 3.38962,
+            "9340": 3.4706,
+            "9345": 3.46224,
+            "9350": 3.42735,
+            "9355": 3.39326,
+            "9360": 3.4165,
+            "9365": 3.41212,
+            "9370": 3.46155,
+            "9375": 3.42622,
+            "9380": 3.36413,
+            "9385": 3.43469,
+            "9390": 3.44403,
+            "9395": 3.45465,
+            "9400": 3.41582,
+            "9405": 3.40031,
+            "9410": 3.43744,
+            "9415": 3.42574,
+            "9420": 3.40295,
+            "9425": 3.42063,
+            "9430": 3.3935,
+            "9435": 3.41529,
+            "9440": 3.40125,
+            "9445": 3.39961,
+            "9450": 3.39469,
+            "9455": 3.4008,
+            "9460": 3.46489,
+            "9465": 3.46303,
+            "9470": 3.40478,
+            "9475": 3.45335,
+            "9480": 3.40789,
+            "9485": 3.3998,
+            "9490": 3.41154,
+            "9495": 3.44387,
+            "9500": 3.40535,
+            "9505": 3.37735,
+            "9510": 3.41645,
+            "9515": 3.41113,
+            "9520": 3.43045,
+            "9525": 3.40102,
+            "9530": 3.40027,
+            "9535": 3.42216
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 9535,
+        "step_interval": 5,
+        "values": {
+            "1": 241.22832,
+            "5": 11.6467,
+            "10": 11.59177,
+            "15": 11.54982,
+            "20": 11.50554,
+            "25": 11.48401,
+            "30": 11.47019,
+            "35": 11.4638,
+            "40": 11.44621,
+            "45": 11.45505,
+            "50": 11.48551,
+            "55": 11.47505,
+            "60": 11.46559,
+            "65": 11.69276,
+            "70": 11.51491,
+            "75": 11.58841,
+            "80": 11.59402,
+            "85": 11.55505,
+            "90": 11.57827,
+            "95": 11.6084,
+            "100": 11.72328,
+            "105": 11.84735,
+            "110": 11.81445,
+            "115": 12.01469,
+            "120": 12.27052,
+            "125": 12.40894,
+            "130": 12.32306,
+            "135": 12.6537,
+            "140": 12.87941,
+            "145": 12.87274,
+            "150": 13.17646,
+            "155": 13.42132,
+            "160": 13.29203,
+            "165": 13.33468,
+            "170": 13.38365,
+            "175": 13.29143,
+            "180": 13.37704,
+            "185": 13.17491,
+            "190": 13.2207,
+            "195": 13.0407,
+            "200": 13.03378,
+            "205": 12.93499,
+            "210": 12.93302,
+            "215": 12.83429,
+            "220": 12.77504,
+            "225": 12.71437,
+            "230": 12.67462,
+            "235": 12.7241,
+            "240": 12.78341,
+            "245": 12.61372,
+            "250": 12.60968,
+            "255": 12.49502,
+            "260": 12.38655,
+            "265": 12.35372,
+            "270": 12.32939,
+            "275": 12.25213,
+            "280": 12.23412,
+            "285": 12.25047,
+            "290": 12.1386,
+            "295": 12.11066,
+            "300": 12.11487,
+            "305": 12.08746,
+            "310": 12.06842,
+            "315": 12.13334,
+            "320": 12.12044,
+            "325": 12.01351,
+            "330": 11.97276,
+            "335": 11.951,
+            "340": 11.97582,
+            "345": 11.94178,
+            "350": 11.90942,
+            "355": 11.9474,
+            "360": 11.94231,
+            "365": 11.91539,
+            "370": 11.89051,
+            "375": 11.87871,
+            "380": 11.8539,
+            "385": 11.81422,
+            "390": 11.82072,
+            "395": 11.85516,
+            "400": 11.8322,
+            "405": 11.81286,
+            "410": 11.81008,
+            "415": 11.76854,
+            "420": 11.7721,
+            "425": 11.7287,
+            "430": 11.80281,
+            "435": 11.76948,
+            "440": 11.78237,
+            "445": 11.81223,
+            "450": 11.76024,
+            "455": 11.83905,
+            "460": 11.86797,
+            "465": 11.88193,
+            "470": 11.94544,
+            "475": 12.03403,
+            "480": 11.8718,
+            "485": 11.96463,
+            "490": 11.9543,
+            "495": 11.99738,
+            "500": 12.06608,
+            "505": 12.04813,
+            "510": 12.09706,
+            "515": 12.14335,
+            "520": 12.36581,
+            "525": 12.19115,
+            "530": 12.1887,
+            "535": 12.25354,
+            "540": 12.27902,
+            "545": 12.32347,
+            "550": 12.44366,
+            "555": 12.25807,
+            "560": 12.22369,
+            "565": 12.28956,
+            "570": 12.31572,
+            "575": 12.28835,
+            "580": 12.33571,
+            "585": 12.26567,
+            "590": 12.30079,
+            "595": 12.29151,
+            "600": 12.30023,
+            "605": 12.45501,
+            "610": 12.27373,
+            "615": 12.217,
+            "620": 12.22334,
+            "625": 12.21274,
+            "630": 12.21904,
+            "635": 12.20277,
+            "640": 12.25538,
+            "645": 12.19988,
+            "650": 12.14026,
+            "655": 12.14302,
+            "660": 12.14678,
+            "665": 12.13972,
+            "670": 12.11485,
+            "675": 12.0282,
+            "680": 12.01901,
+            "685": 11.98462,
+            "690": 11.98742,
+            "695": 11.95917,
+            "700": 11.92521,
+            "705": 18.38779,
+            "710": 11.92438,
+            "715": 11.8274,
+            "720": 11.90138,
+            "725": 11.84998,
+            "730": 11.83009,
+            "735": 11.89248,
+            "740": 11.82364,
+            "745": 11.91839,
+            "750": 11.9577,
+            "755": 11.85056,
+            "760": 11.90523,
+            "765": 11.9116,
+            "770": 11.83717,
+            "775": 12.05864,
+            "780": 11.84895,
+            "785": 11.84375,
+            "790": 11.86493,
+            "795": 11.85763,
+            "800": 11.94365,
+            "805": 11.86899,
+            "810": 11.86748,
+            "815": 11.86393,
+            "820": 11.87992,
+            "825": 11.85259,
+            "830": 11.86886,
+            "835": 11.8517,
+            "840": 11.86254,
+            "845": 11.89508,
+            "850": 11.85613,
+            "855": 11.87434,
+            "860": 11.90703,
+            "865": 11.83224,
+            "870": 11.88246,
+            "875": 11.9305,
+            "880": 11.96022,
+            "885": 11.81651,
+            "890": 12.06642,
+            "895": 11.92653,
+            "900": 11.86469,
+            "905": 12.01767,
+            "910": 11.89635,
+            "915": 11.8254,
+            "920": 11.86106,
+            "925": 11.88434,
+            "930": 11.97059,
+            "935": 12.03718,
+            "940": 11.87698,
+            "945": 11.88008,
+            "950": 12.02071,
+            "955": 11.84843,
+            "960": 244.37245,
+            "965": 12.32084,
+            "970": 11.86341,
+            "975": 12.01988,
+            "980": 11.92166,
+            "985": 11.85411,
+            "990": 11.87753,
+            "995": 11.84786,
+            "1000": 11.89892,
+            "1005": 11.99759,
+            "1010": 11.91045,
+            "1015": 11.87038,
+            "1020": 11.85674,
+            "1025": 11.85567,
+            "1030": 11.86674,
+            "1035": 11.92499,
+            "1040": 11.85969,
+            "1045": 12.04929,
+            "1050": 11.82341,
+            "1055": 11.83111,
+            "1060": 11.87567,
+            "1065": 11.84584,
+            "1070": 11.93603,
+            "1075": 11.87121,
+            "1080": 11.85935,
+            "1085": 11.88667,
+            "1090": 11.86058,
+            "1095": 11.86482,
+            "1100": 11.82375,
+            "1105": 11.86482,
+            "1110": 11.89668,
+            "1115": 11.94941,
+            "1120": 11.84941,
+            "1125": 11.94466,
+            "1130": 11.90846,
+            "1135": 11.8602,
+            "1140": 11.86926,
+            "1145": 11.90365,
+            "1150": 11.88788,
+            "1155": 11.81781,
+            "1160": 11.88464,
+            "1165": 11.85344,
+            "1170": 11.8865,
+            "1175": 11.93361,
+            "1180": 11.89647,
+            "1185": 11.9031,
+            "1190": 11.89287,
+            "1195": 11.88683,
+            "1200": 11.85927,
+            "1205": 11.92471,
+            "1210": 11.85592,
+            "1215": 17.4276,
+            "1220": 11.87359,
+            "1225": 11.9296,
+            "1230": 11.95025,
+            "1235": 11.90738,
+            "1240": 11.86621,
+            "1245": 11.98001,
+            "1250": 12.003,
+            "1255": 11.91396,
+            "1260": 11.92279,
+            "1265": 11.85195,
+            "1270": 11.87463,
+            "1275": 11.90307,
+            "1280": 11.84637,
+            "1285": 11.95883,
+            "1290": 11.88039,
+            "1295": 11.8399,
+            "1300": 11.81976,
+            "1305": 11.89766,
+            "1310": 11.91584,
+            "1315": 12.12571,
+            "1320": 12.05556,
+            "1325": 11.84679,
+            "1330": 11.94985,
+            "1335": 11.94039,
+            "1340": 12.00572,
+            "1345": 11.98268,
+            "1350": 12.15927,
+            "1355": 12.04312,
+            "1360": 11.98816,
+            "1365": 11.95737,
+            "1370": 11.92395,
+            "1375": 11.89595,
+            "1380": 11.88635,
+            "1385": 11.96617,
+            "1390": 11.87421,
+            "1395": 12.02833,
+            "1400": 11.87415,
+            "1405": 11.85875,
+            "1410": 11.85419,
+            "1415": 11.8978,
+            "1420": 11.86309,
+            "1425": 11.87505,
+            "1430": 12.10339,
+            "1435": 11.88151,
+            "1440": 12.15068,
+            "1445": 11.98493,
+            "1450": 11.95438,
+            "1455": 12.03808,
+            "1460": 11.85293,
+            "1465": 11.93176,
+            "1470": 11.92246,
+            "1475": 11.90448,
+            "1480": 11.98959,
+            "1485": 11.93685,
+            "1490": 11.92389,
+            "1495": 11.95047,
+            "1500": 11.94526,
+            "1505": 11.9086,
+            "1510": 11.95225,
+            "1515": 11.87405,
+            "1520": 11.87975,
+            "1525": 11.88264,
+            "1530": 12.04989,
+            "1535": 12.02942,
+            "1540": 11.93089,
+            "1545": 11.89376,
+            "1550": 11.88596,
+            "1555": 11.95001,
+            "1560": 11.90239,
+            "1565": 11.89699,
+            "1570": 11.91441,
+            "1575": 11.87813,
+            "1580": 11.86939,
+            "1585": 11.8566,
+            "1590": 11.8665,
+            "1595": 11.90861,
+            "1600": 11.90425,
+            "1605": 11.82248,
+            "1610": 11.86531,
+            "1615": 11.8796,
+            "1620": 11.87587,
+            "1625": 11.88944,
+            "1630": 11.88839,
+            "1635": 11.8307,
+            "1640": 11.87082,
+            "1645": 11.84687,
+            "1650": 11.87887,
+            "1655": 11.85709,
+            "1660": 11.85167,
+            "1665": 11.90284,
+            "1670": 11.85205,
+            "1675": 12.00742,
+            "1680": 11.90754,
+            "1685": 11.97458,
+            "1690": 11.97016,
+            "1695": 11.9189,
+            "1700": 11.89709,
+            "1705": 11.88042,
+            "1710": 11.87879,
+            "1715": 12.06779,
+            "1720": 11.98631,
+            "1725": 12.01044,
+            "1730": 11.9924,
+            "1735": 11.87648,
+            "1740": 11.87455,
+            "1745": 11.93461,
+            "1750": 11.90235,
+            "1755": 11.97053,
+            "1760": 11.89545,
+            "1765": 11.8564,
+            "1770": 11.92635,
+            "1775": 11.91815,
+            "1780": 11.91235,
+            "1785": 11.85546,
+            "1790": 11.93087,
+            "1795": 11.91138,
+            "1800": 11.95901,
+            "1805": 12.0529,
+            "1810": 11.98858,
+            "1815": 12.13997,
+            "1820": 11.94798,
+            "1825": 11.97682,
+            "1830": 11.91244,
+            "1835": 11.94888,
+            "1840": 11.93666,
+            "1845": 11.87312,
+            "1850": 11.86327,
+            "1855": 11.94769,
+            "1860": 12.00187,
+            "1865": 12.06916,
+            "1870": 11.99528,
+            "1875": 11.89416,
+            "1880": 12.02292,
+            "1885": 12.04249,
+            "1890": 11.94094,
+            "1895": 11.93619,
+            "1900": 11.95301,
+            "1905": 11.85793,
+            "1910": 11.96264,
+            "1915": 11.92826,
+            "1920": 11.94216,
+            "1925": 12.01307,
+            "1930": 11.98891,
+            "1935": 11.95834,
+            "1940": 11.92143,
+            "1945": 11.98459,
+            "1950": 16.97099,
+            "1955": 11.89147,
+            "1960": 11.94643,
+            "1965": 11.92486,
+            "1970": 11.91542,
+            "1975": 13.09741,
+            "1980": 12.02148,
+            "1985": 11.92812,
+            "1990": 12.01102,
+            "1995": 11.94891,
+            "2000": 12.06741,
+            "2005": 11.94166,
+            "2010": 11.95871,
+            "2015": 12.00042,
+            "2020": 11.99101,
+            "2025": 11.95463,
+            "2030": 12.36755,
+            "2035": 11.96199,
+            "2040": 11.97863,
+            "2045": 12.01033,
+            "2050": 12.0643,
+            "2055": 11.96928,
+            "2060": 11.98383,
+            "2065": 11.92648,
+            "2070": 11.92379,
+            "2075": 11.97669,
+            "2080": 11.95508,
+            "2085": 11.94472,
+            "2090": 11.9663,
+            "2095": 11.93695,
+            "2100": 11.97178,
+            "2105": 11.98764,
+            "2110": 11.9516,
+            "2115": 11.9215,
+            "2120": 11.95207,
+            "2125": 11.95947,
+            "2130": 11.96722,
+            "2135": 11.97924,
+            "2140": 11.88777,
+            "2145": 11.95546,
+            "2150": 11.90266,
+            "2155": 11.97573,
+            "2160": 11.93275,
+            "2165": 11.98593,
+            "2170": 11.9842,
+            "2175": 12.00145,
+            "2180": 11.99219,
+            "2185": 11.96424,
+            "2190": 11.94313,
+            "2195": 11.93489,
+            "2200": 11.94356,
+            "2205": 12.00157,
+            "2210": 11.97153,
+            "2215": 11.9563,
+            "2220": 12.14117,
+            "2225": 11.97066,
+            "2230": 12.00037,
+            "2235": 11.95279,
+            "2240": 11.9544,
+            "2245": 11.97031,
+            "2250": 11.92229,
+            "2255": 11.98097,
+            "2260": 11.96529,
+            "2265": 11.98619,
+            "2270": 12.02117,
+            "2275": 11.94865,
+            "2280": 12.02569,
+            "2285": 11.98203,
+            "2290": 12.10479,
+            "2295": 11.95346,
+            "2300": 11.99961,
+            "2305": 11.96025,
+            "2310": 11.98746,
+            "2315": 11.95209,
+            "2320": 12.02644,
+            "2325": 11.95369,
+            "2330": 11.91985,
+            "2335": 11.93244,
+            "2340": 11.97061,
+            "2345": 11.90115,
+            "2350": 11.99136,
+            "2355": 12.0541,
+            "2360": 12.03728,
+            "2365": 11.95319,
+            "2370": 11.8917,
+            "2375": 11.94629,
+            "2380": 11.9087,
+            "2385": 11.91696,
+            "2390": 11.90123,
+            "2395": 11.87998,
+            "2400": 12.02954,
+            "2405": 11.97917,
+            "2410": 11.98456,
+            "2415": 11.9575,
+            "2420": 11.95917,
+            "2425": 11.95788,
+            "2430": 11.99944,
+            "2435": 12.00043,
+            "2440": 11.91339,
+            "2445": 11.97889,
+            "2450": 11.93997,
+            "2455": 11.91834,
+            "2460": 11.98321,
+            "2465": 11.94509,
+            "2470": 11.93387,
+            "2475": 11.9562,
+            "2480": 11.93148,
+            "2485": 11.94432,
+            "2490": 11.95477,
+            "2495": 11.94334,
+            "2500": 11.9284,
+            "2505": 11.93757,
+            "2510": 11.92289,
+            "2515": 11.97869,
+            "2520": 11.94858,
+            "2525": 11.96606,
+            "2530": 11.90894,
+            "2535": 11.95425,
+            "2540": 11.89136,
+            "2545": 11.94553,
+            "2550": 11.98026,
+            "2555": 11.93376,
+            "2560": 11.94866,
+            "2565": 11.92767,
+            "2570": 11.93583,
+            "2575": 11.97284,
+            "2580": 11.98911,
+            "2585": 11.95484,
+            "2590": 11.96399,
+            "2595": 11.96211,
+            "2600": 11.93906,
+            "2605": 11.9733,
+            "2610": 12.01872,
+            "2615": 11.99897,
+            "2620": 11.90926,
+            "2625": 11.93248,
+            "2630": 11.92842,
+            "2635": 11.94338,
+            "2640": 11.94678,
+            "2645": 11.95901,
+            "2650": 11.9296,
+            "2655": 12.02405,
+            "2660": 12.0166,
+            "2665": 12.01166,
+            "2670": 11.90595,
+            "2675": 11.98569,
+            "2680": 12.0118,
+            "2685": 11.92029,
+            "2690": 11.93111,
+            "2695": 12.00369,
+            "2700": 11.94818,
+            "2705": 11.99119,
+            "2710": 11.93978,
+            "2715": 11.9296,
+            "2720": 11.93044,
+            "2725": 11.94343,
+            "2730": 12.02248,
+            "2735": 11.95389,
+            "2740": 11.94611,
+            "2745": 11.92776,
+            "2750": 11.91647,
+            "2755": 11.9522,
+            "2760": 11.95012,
+            "2765": 11.96707,
+            "2770": 11.94892,
+            "2775": 11.9867,
+            "2780": 11.96897,
+            "2785": 11.97268,
+            "2790": 12.01936,
+            "2795": 11.97259,
+            "2800": 12.01028,
+            "2805": 11.94892,
+            "2810": 12.04828,
+            "2815": 11.93469,
+            "2820": 11.94568,
+            "2825": 11.92529,
+            "2830": 11.97458,
+            "2835": 11.99475,
+            "2840": 11.94984,
+            "2845": 11.93356,
+            "2850": 12.05796,
+            "2855": 11.99065,
+            "2860": 11.96077,
+            "2865": 11.9377,
+            "2870": 11.97627,
+            "2875": 11.97986,
+            "2880": 11.97201,
+            "2885": 11.91879,
+            "2890": 11.93586,
+            "2895": 12.00661,
+            "2900": 11.94616,
+            "2905": 11.94376,
+            "2910": 11.94168,
+            "2915": 11.94867,
+            "2920": 11.99355,
+            "2925": 11.94779,
+            "2930": 11.97133,
+            "2935": 11.96256,
+            "2940": 11.97787,
+            "2945": 11.93759,
+            "2950": 11.91863,
+            "2955": 11.98973,
+            "2960": 12.00486,
+            "2965": 11.91623,
+            "2970": 11.94846,
+            "2975": 11.91534,
+            "2980": 11.97787,
+            "2985": 12.385,
+            "2990": 11.88498,
+            "2995": 11.92173,
+            "3000": 11.90561,
+            "3005": 11.86795,
+            "3010": 11.88075,
+            "3015": 11.87833,
+            "3020": 11.98777,
+            "3025": 11.90078,
+            "3030": 11.98251,
+            "3035": 11.92211,
+            "3040": 11.91067,
+            "3045": 12.04371,
+            "3050": 11.91886,
+            "3055": 11.952,
+            "3060": 11.90649,
+            "3065": 11.86917,
+            "3070": 11.86601,
+            "3075": 11.92435,
+            "3080": 11.98092,
+            "3085": 11.94809,
+            "3090": 12.20304,
+            "3095": 11.87329,
+            "3100": 11.92696,
+            "3105": 11.85799,
+            "3110": 11.84125,
+            "3115": 11.82558,
+            "3120": 11.87566,
+            "3125": 11.89426,
+            "3130": 11.85869,
+            "3135": 11.92893,
+            "3140": 11.97022,
+            "3145": 11.84939,
+            "3150": 11.9785,
+            "3155": 11.92499,
+            "3160": 11.8889,
+            "3165": 11.87938,
+            "3170": 11.95555,
+            "3175": 11.91883,
+            "3180": 11.85842,
+            "3185": 11.9325,
+            "3190": 11.86061,
+            "3195": 11.90479,
+            "3200": 11.85963,
+            "3205": 11.91214,
+            "3210": 11.9243,
+            "3215": 11.8472,
+            "3220": 11.86665,
+            "3225": 11.89836,
+            "3230": 11.86299,
+            "3235": 11.89396,
+            "3240": 11.87482,
+            "3245": 11.86774,
+            "3250": 11.86673,
+            "3255": 11.88133,
+            "3260": 11.9014,
+            "3265": 11.92289,
+            "3270": 11.98401,
+            "3275": 11.95198,
+            "3280": 11.87392,
+            "3285": 11.89268,
+            "3290": 11.88963,
+            "3295": 11.91043,
+            "3300": 11.89803,
+            "3305": 11.87011,
+            "3310": 11.84465,
+            "3315": 11.84015,
+            "3320": 11.88334,
+            "3325": 11.93368,
+            "3330": 11.83472,
+            "3335": 11.86862,
+            "3340": 11.87575,
+            "3345": 11.94875,
+            "3350": 11.93528,
+            "3355": 11.81967,
+            "3360": 11.95954,
+            "3365": 11.88024,
+            "3370": 11.88333,
+            "3375": 11.85751,
+            "3380": 11.88742,
+            "3385": 11.9179,
+            "3390": 11.83242,
+            "3395": 11.96084,
+            "3400": 11.88213,
+            "3405": 11.86112,
+            "3410": 11.8407,
+            "3415": 11.92255,
+            "3420": 11.91997,
+            "3425": 11.88372,
+            "3430": 11.8672,
+            "3435": 11.85235,
+            "3440": 11.84935,
+            "3445": 11.93228,
+            "3450": 11.85166,
+            "3455": 11.9026,
+            "3460": 11.99596,
+            "3465": 11.88838,
+            "3470": 11.90065,
+            "3475": 11.92033,
+            "3480": 11.87265,
+            "3485": 11.89235,
+            "3490": 11.89267,
+            "3495": 11.97544,
+            "3500": 11.92819,
+            "3505": 11.82459,
+            "3510": 11.90756,
+            "3515": 11.92021,
+            "3520": 11.88124,
+            "3525": 11.86983,
+            "3530": 11.90548,
+            "3535": 11.94666,
+            "3540": 11.93322,
+            "3545": 11.90904,
+            "3550": 11.85224,
+            "3555": 11.886,
+            "3560": 11.93583,
+            "3565": 11.87294,
+            "3570": 11.86107,
+            "3575": 11.83618,
+            "3580": 11.94649,
+            "3585": 11.8886,
+            "3590": 12.01796,
+            "3595": 11.86065,
+            "3600": 11.96008,
+            "3605": 11.94154,
+            "3610": 11.91928,
+            "3615": 11.88551,
+            "3620": 11.8865,
+            "3625": 11.86807,
+            "3630": 11.98152,
+            "3635": 11.87685,
+            "3640": 11.89995,
+            "3645": 11.86485,
+            "3650": 11.94291,
+            "3655": 11.86472,
+            "3660": 11.84946,
+            "3665": 11.90789,
+            "3670": 11.86396,
+            "3675": 12.07226,
+            "3680": 11.8654,
+            "3685": 11.90154,
+            "3690": 11.87282,
+            "3695": 11.84993,
+            "3700": 11.92847,
+            "3705": 11.85848,
+            "3710": 11.86691,
+            "3715": 11.93176,
+            "3720": 11.86996,
+            "3725": 11.92665,
+            "3730": 11.90876,
+            "3735": 11.83597,
+            "3740": 11.8819,
+            "3745": 11.90119,
+            "3750": 11.90765,
+            "3755": 11.89791,
+            "3760": 11.91124,
+            "3765": 11.95606,
+            "3770": 11.93789,
+            "3775": 11.87152,
+            "3780": 11.89754,
+            "3785": 11.8704,
+            "3790": 11.88079,
+            "3795": 11.89363,
+            "3800": 11.88641,
+            "3805": 11.87724,
+            "3810": 11.86303,
+            "3815": 11.96793,
+            "3820": 11.97071,
+            "3825": 11.90678,
+            "3830": 11.84478,
+            "3835": 11.86339,
+            "3840": 11.84359,
+            "3845": 11.85381,
+            "3850": 11.89843,
+            "3855": 11.83659,
+            "3860": 11.8253,
+            "3865": 11.82796,
+            "3870": 11.93815,
+            "3875": 11.87584,
+            "3880": 11.85716,
+            "3885": 11.85848,
+            "3890": 11.84472,
+            "3895": 11.85001,
+            "3900": 11.90416,
+            "3905": 11.87723,
+            "3910": 11.90409,
+            "3915": 11.88375,
+            "3920": 11.9526,
+            "3925": 11.8796,
+            "3930": 11.92607,
+            "3935": 12.02111,
+            "3940": 11.89989,
+            "3945": 11.96829,
+            "3950": 11.92362,
+            "3955": 11.91298,
+            "3960": 11.93391,
+            "3965": 11.9977,
+            "3970": 11.91134,
+            "3975": 11.87698,
+            "3980": 11.84039,
+            "3985": 11.8296,
+            "3990": 11.8824,
+            "3995": 12.03103,
+            "4000": 12.53061,
+            "4005": 11.99032,
+            "4010": 11.94569,
+            "4015": 12.02459,
+            "4020": 12.05098,
+            "4025": 11.9408,
+            "4030": 11.9872,
+            "4035": 11.91882,
+            "4040": 11.91053,
+            "4045": 11.94764,
+            "4050": 11.96252,
+            "4055": 11.92924,
+            "4060": 11.95584,
+            "4065": 11.96477,
+            "4070": 11.95333,
+            "4075": 11.95009,
+            "4080": 11.94196,
+            "4085": 11.96679,
+            "4090": 12.09863,
+            "4095": 12.09521,
+            "4100": 11.99854,
+            "4105": 12.05345,
+            "4110": 11.99127,
+            "4115": 12.05731,
+            "4120": 11.95072,
+            "4125": 12.09249,
+            "4130": 12.04972,
+            "4135": 11.892,
+            "4140": 11.93048,
+            "4145": 11.92862,
+            "4150": 12.00088,
+            "4155": 11.95542,
+            "4160": 12.01499,
+            "4165": 11.90691,
+            "4170": 11.99204,
+            "4175": 12.02661,
+            "4180": 12.08762,
+            "4185": 11.93626,
+            "4190": 11.96513,
+            "4195": 11.9247,
+            "4200": 11.89449,
+            "4205": 11.95353,
+            "4210": 11.90984,
+            "4215": 11.92857,
+            "4220": 11.99809,
+            "4225": 12.01358,
+            "4230": 12.00065,
+            "4235": 11.95146,
+            "4240": 12.12674,
+            "4245": 11.99718,
+            "4250": 11.98808,
+            "4255": 11.95388,
+            "4260": 11.91437,
+            "4265": 11.97358,
+            "4270": 11.99013,
+            "4275": 11.95746,
+            "4280": 11.9273,
+            "4285": 11.92873,
+            "4290": 11.94103,
+            "4295": 11.93054,
+            "4300": 11.92986,
+            "4305": 12.11627,
+            "4310": 11.95471,
+            "4315": 11.96985,
+            "4320": 12.03911,
+            "4325": 12.01041,
+            "4330": 11.93084,
+            "4335": 11.95171,
+            "4340": 12.03209,
+            "4345": 11.94503,
+            "4350": 11.95426,
+            "4355": 12.08714,
+            "4360": 12.18212,
+            "4365": 11.94575,
+            "4370": 11.96598,
+            "4375": 12.00939,
+            "4380": 12.08808,
+            "4385": 11.9772,
+            "4390": 12.02704,
+            "4395": 12.01062,
+            "4400": 11.94619,
+            "4405": 11.98609,
+            "4410": 11.98025,
+            "4415": 11.99156,
+            "4420": 11.96913,
+            "4425": 12.02991,
+            "4430": 11.98417,
+            "4435": 12.07654,
+            "4440": 12.09429,
+            "4445": 11.9962,
+            "4450": 11.91032,
+            "4455": 11.99724,
+            "4460": 11.94549,
+            "4465": 11.92313,
+            "4470": 11.98709,
+            "4475": 11.9946,
+            "4480": 12.041,
+            "4485": 11.98684,
+            "4490": 12.00793,
+            "4495": 11.96519,
+            "4500": 11.91768,
+            "4505": 11.93855,
+            "4510": 11.96344,
+            "4515": 11.93266,
+            "4520": 11.99772,
+            "4525": 12.00265,
+            "4530": 12.00144,
+            "4535": 11.93099,
+            "4540": 11.9976,
+            "4545": 12.04415,
+            "4550": 11.92104,
+            "4555": 11.97762,
+            "4560": 12.05513,
+            "4565": 12.08413,
+            "4570": 12.00561,
+            "4575": 12.03402,
+            "4580": 12.07435,
+            "4585": 11.91157,
+            "4590": 11.93266,
+            "4595": 12.00575,
+            "4600": 11.98764,
+            "4605": 12.07608,
+            "4610": 11.98608,
+            "4615": 12.23058,
+            "4620": 11.96992,
+            "4625": 11.98931,
+            "4630": 11.92725,
+            "4635": 11.94909,
+            "4640": 11.94336,
+            "4645": 11.95955,
+            "4650": 11.99978,
+            "4655": 11.95199,
+            "4660": 11.97643,
+            "4665": 12.03686,
+            "4670": 12.0499,
+            "4675": 11.98439,
+            "4680": 12.00394,
+            "4685": 11.97515,
+            "4690": 11.95102,
+            "4695": 12.07552,
+            "4700": 11.9222,
+            "4705": 11.97387,
+            "4710": 11.99203,
+            "4715": 11.93004,
+            "4720": 11.97237,
+            "4725": 12.00277,
+            "4730": 12.00835,
+            "4735": 11.97435,
+            "4740": 11.98233,
+            "4745": 11.92423,
+            "4750": 11.95154,
+            "4755": 12.02084,
+            "4760": 11.94378,
+            "4765": 11.95313,
+            "4770": 11.92338,
+            "4775": 11.92352,
+            "4780": 12.00277,
+            "4785": 11.94768,
+            "4790": 11.97296,
+            "4795": 11.98757,
+            "4800": 12.26361,
+            "4805": 11.90736,
+            "4810": 11.9844,
+            "4815": 12.04212,
+            "4820": 11.98762,
+            "4825": 12.89959,
+            "4830": 11.9442,
+            "4835": 12.35106,
+            "4840": 11.93828,
+            "4845": 11.92418,
+            "4850": 11.96443,
+            "4855": 12.03431,
+            "4860": 12.04422,
+            "4865": 11.9646,
+            "4870": 11.91857,
+            "4875": 11.95672,
+            "4880": 11.9198,
+            "4885": 11.96783,
+            "4890": 11.94953,
+            "4895": 11.96692,
+            "4900": 12.04475,
+            "4905": 12.05877,
+            "4910": 12.15039,
+            "4915": 12.15039,
+            "4920": 11.95008,
+            "4925": 11.96843,
+            "4930": 11.958,
+            "4935": 11.98531,
+            "4940": 11.90874,
+            "4945": 11.95752,
+            "4950": 12.01284,
+            "4955": 11.97799,
+            "4960": 11.99989,
+            "4965": 11.9277,
+            "4970": 12.06095,
+            "4975": 11.95713,
+            "4980": 12.02719,
+            "4985": 11.96446,
+            "4990": 11.92043,
+            "4995": 11.99522,
+            "5000": 12.0792,
+            "5005": 11.95462,
+            "5010": 18.30939,
+            "5015": 12.57034,
+            "5020": 12.13652,
+            "5025": 11.95064,
+            "5030": 11.93538,
+            "5035": 12.01779,
+            "5040": 11.8639,
+            "5045": 11.89312,
+            "5050": 11.93054,
+            "5055": 11.89904,
+            "5060": 11.88635,
+            "5065": 11.89505,
+            "5070": 11.95957,
+            "5075": 11.96591,
+            "5080": 11.85594,
+            "5085": 11.87343,
+            "5090": 11.89162,
+            "5095": 11.9231,
+            "5100": 11.9213,
+            "5105": 11.9793,
+            "5110": 11.92942,
+            "5115": 11.87025,
+            "5120": 11.84167,
+            "5125": 11.92967,
+            "5130": 11.90523,
+            "5135": 11.8727,
+            "5140": 11.95822,
+            "5145": 11.97795,
+            "5150": 11.90614,
+            "5155": 11.88276,
+            "5160": 11.94188,
+            "5165": 11.91373,
+            "5170": 12.01192,
+            "5175": 11.85511,
+            "5180": 11.84375,
+            "5185": 11.88965,
+            "5190": 11.88542,
+            "5195": 11.85346,
+            "5200": 11.94188,
+            "5205": 11.92082,
+            "5210": 11.8821,
+            "5215": 11.92239,
+            "5220": 11.90608,
+            "5225": 11.8947,
+            "5230": 11.88619,
+            "5235": 11.8948,
+            "5240": 11.89599,
+            "5245": 11.88662,
+            "5250": 11.95415,
+            "5255": 11.96527,
+            "5260": 11.89009,
+            "5265": 11.87997,
+            "5270": 11.94016,
+            "5275": 11.89138,
+            "5280": 11.90447,
+            "5285": 11.86453,
+            "5290": 11.90845,
+            "5295": 11.89373,
+            "5300": 11.96084,
+            "5305": 12.00505,
+            "5310": 11.87874,
+            "5315": 11.94047,
+            "5320": 11.90115,
+            "5325": 11.8657,
+            "5330": 11.98456,
+            "5335": 11.89142,
+            "5340": 11.94056,
+            "5345": 11.88326,
+            "5350": 12.02941,
+            "5355": 11.94937,
+            "5360": 11.84158,
+            "5365": 11.85236,
+            "5370": 11.89414,
+            "5375": 11.92681,
+            "5380": 11.89983,
+            "5385": 11.93247,
+            "5390": 11.88545,
+            "5395": 11.85963,
+            "5400": 11.87187,
+            "5405": 11.92558,
+            "5410": 11.94364,
+            "5415": 11.9087,
+            "5420": 11.86332,
+            "5425": 11.92767,
+            "5430": 11.87425,
+            "5435": 11.91049,
+            "5440": 11.87699,
+            "5445": 11.93171,
+            "5450": 11.90161,
+            "5455": 11.921,
+            "5460": 11.88038,
+            "5465": 11.91315,
+            "5470": 11.89728,
+            "5475": 11.95689,
+            "5480": 11.98965,
+            "5485": 11.91576,
+            "5490": 11.89757,
+            "5495": 11.93064,
+            "5500": 11.88252,
+            "5505": 11.96073,
+            "5510": 11.86654,
+            "5515": 11.87886,
+            "5520": 11.90936,
+            "5525": 12.03373,
+            "5530": 11.90318,
+            "5535": 11.92154,
+            "5540": 11.90086,
+            "5545": 11.89022,
+            "5550": 11.90225,
+            "5555": 11.83513,
+            "5560": 11.91062,
+            "5565": 11.87125,
+            "5570": 11.87145,
+            "5575": 11.86357,
+            "5580": 11.91841,
+            "5585": 11.92436,
+            "5590": 11.9023,
+            "5595": 11.86709,
+            "5600": 11.91375,
+            "5605": 11.90872,
+            "5610": 11.8916,
+            "5615": 11.95578,
+            "5620": 11.89294,
+            "5625": 11.90784,
+            "5630": 11.92391,
+            "5635": 11.89956,
+            "5640": 11.89869,
+            "5645": 11.91776,
+            "5650": 11.9431,
+            "5655": 11.89517,
+            "5660": 11.88968,
+            "5665": 11.89529,
+            "5670": 11.91051,
+            "5675": 11.91888,
+            "5680": 11.90991,
+            "5685": 11.93985,
+            "5690": 11.90708,
+            "5695": 11.8876,
+            "5700": 11.95923,
+            "5705": 11.93355,
+            "5710": 11.87364,
+            "5715": 11.9268,
+            "5720": 11.98226,
+            "5725": 11.87678,
+            "5730": 11.83368,
+            "5735": 11.89468,
+            "5740": 11.90674,
+            "5745": 11.88476,
+            "5750": 11.86646,
+            "5755": 11.88929,
+            "5760": 11.85649,
+            "5765": 11.85565,
+            "5770": 11.93646,
+            "5775": 11.90704,
+            "5780": 12.04897,
+            "5785": 11.91885,
+            "5790": 11.90414,
+            "5795": 11.92795,
+            "5800": 11.9484,
+            "5805": 11.9947,
+            "5810": 11.88562,
+            "5815": 11.89893,
+            "5820": 11.86069,
+            "5825": 11.85602,
+            "5830": 11.90577,
+            "5835": 11.90369,
+            "5840": 11.95291,
+            "5845": 11.93547,
+            "5850": 11.89776,
+            "5855": 11.89365,
+            "5860": 11.88809,
+            "5865": 11.89502,
+            "5870": 11.90093,
+            "5875": 11.89463,
+            "5880": 11.85877,
+            "5885": 11.91775,
+            "5890": 11.9362,
+            "5895": 11.90238,
+            "5900": 11.89416,
+            "5905": 11.9161,
+            "5910": 11.91617,
+            "5915": 11.89704,
+            "5920": 11.86193,
+            "5925": 11.94942,
+            "5930": 11.85147,
+            "5935": 11.87033,
+            "5940": 11.9311,
+            "5945": 11.96348,
+            "5950": 11.96932,
+            "5955": 11.90137,
+            "5960": 11.87563,
+            "5965": 11.86128,
+            "5970": 11.99512,
+            "5975": 11.92846,
+            "5980": 11.83738,
+            "5985": 11.88075,
+            "5990": 11.89265,
+            "5995": 11.92537,
+            "6000": 11.88009,
+            "6005": 11.9523,
+            "6010": 11.93509,
+            "6015": 11.89766,
+            "6020": 11.88045,
+            "6025": 11.87641,
+            "6030": 246.60413,
+            "6035": 12.33879,
+            "6040": 11.91607,
+            "6045": 11.95709,
+            "6050": 11.93381,
+            "6055": 11.91355,
+            "6060": 11.91286,
+            "6065": 11.97819,
+            "6070": 11.93373,
+            "6075": 11.85049,
+            "6080": 11.96747,
+            "6085": 11.93318,
+            "6090": 11.93239,
+            "6095": 11.8622,
+            "6100": 11.88525,
+            "6105": 11.97899,
+            "6110": 11.91577,
+            "6115": 11.92755,
+            "6120": 11.92296,
+            "6125": 11.99725,
+            "6130": 11.97753,
+            "6135": 11.92108,
+            "6140": 11.91607,
+            "6145": 11.9071,
+            "6150": 11.92499,
+            "6155": 11.91611,
+            "6160": 12.01604,
+            "6165": 11.89838,
+            "6170": 11.90254,
+            "6175": 11.96493,
+            "6180": 11.84452,
+            "6185": 11.91052,
+            "6190": 11.8712,
+            "6195": 11.90582,
+            "6200": 11.90605,
+            "6205": 11.98397,
+            "6210": 11.92035,
+            "6215": 11.96579,
+            "6220": 11.99275,
+            "6225": 11.88749,
+            "6230": 11.89369,
+            "6235": 11.95748,
+            "6240": 11.93057,
+            "6245": 11.94912,
+            "6250": 11.9372,
+            "6255": 11.90439,
+            "6260": 11.92527,
+            "6265": 11.95201,
+            "6270": 11.9095,
+            "6275": 11.97821,
+            "6280": 11.94458,
+            "6285": 11.90287,
+            "6290": 11.89278,
+            "6295": 11.96073,
+            "6300": 11.90554,
+            "6305": 11.88653,
+            "6310": 11.8962,
+            "6315": 11.93036,
+            "6320": 11.95396,
+            "6325": 11.94894,
+            "6330": 12.04569,
+            "6335": 11.88055,
+            "6340": 11.91066,
+            "6345": 11.89024,
+            "6350": 11.89994,
+            "6355": 11.92221,
+            "6360": 11.92333,
+            "6365": 11.91761,
+            "6370": 11.97313,
+            "6375": 11.90689,
+            "6380": 12.08922,
+            "6385": 11.94942,
+            "6390": 11.91702,
+            "6395": 11.90139,
+            "6400": 11.89012,
+            "6405": 11.9541,
+            "6410": 12.00044,
+            "6415": 11.89967,
+            "6420": 11.86695,
+            "6425": 11.87294,
+            "6430": 11.89524,
+            "6435": 11.94881,
+            "6440": 11.91361,
+            "6445": 11.91243,
+            "6450": 11.90246,
+            "6455": 11.88301,
+            "6460": 11.94133,
+            "6465": 11.95353,
+            "6470": 11.93545,
+            "6475": 11.91767,
+            "6480": 11.904,
+            "6485": 11.97366,
+            "6490": 11.9268,
+            "6495": 11.92497,
+            "6500": 12.05293,
+            "6505": 11.83715,
+            "6510": 11.86732,
+            "6515": 11.90038,
+            "6520": 11.86776,
+            "6525": 11.86971,
+            "6530": 11.85789,
+            "6535": 11.88616,
+            "6540": 11.85825,
+            "6545": 11.82803,
+            "6550": 11.89596,
+            "6555": 11.89246,
+            "6560": 11.87827,
+            "6565": 11.87369,
+            "6570": 11.88103,
+            "6575": 11.86696,
+            "6580": 11.90165,
+            "6585": 11.85113,
+            "6590": 11.85101,
+            "6595": 11.80896,
+            "6600": 11.90596,
+            "6605": 11.87406,
+            "6610": 11.8658,
+            "6615": 11.86475,
+            "6620": 11.88848,
+            "6625": 11.85675,
+            "6630": 11.84722,
+            "6635": 11.83752,
+            "6640": 11.8855,
+            "6645": 11.91332,
+            "6650": 11.86288,
+            "6655": 11.89588,
+            "6660": 11.8071,
+            "6665": 11.84093,
+            "6670": 11.88653,
+            "6675": 11.88047,
+            "6680": 11.87018,
+            "6685": 11.8411,
+            "6690": 11.82244,
+            "6695": 11.86596,
+            "6700": 11.85423,
+            "6705": 11.86228,
+            "6710": 11.86517,
+            "6715": 11.87189,
+            "6720": 11.84138,
+            "6725": 11.88097,
+            "6730": 11.90906,
+            "6735": 11.91578,
+            "6740": 11.88058,
+            "6745": 11.88169,
+            "6750": 12.03575,
+            "6755": 11.84511,
+            "6760": 11.84038,
+            "6765": 11.83499,
+            "6770": 11.87927,
+            "6775": 11.81349,
+            "6780": 13.01048,
+            "6785": 11.81032,
+            "6790": 11.93614,
+            "6795": 11.97801,
+            "6800": 11.86,
+            "6805": 11.83039,
+            "6810": 11.8441,
+            "6815": 11.89187,
+            "6820": 11.87841,
+            "6825": 11.86012,
+            "6830": 11.83442,
+            "6835": 11.85081,
+            "6840": 11.83799,
+            "6845": 11.82691,
+            "6850": 11.89092,
+            "6855": 11.82022,
+            "6860": 11.8279,
+            "6865": 11.79814,
+            "6870": 11.83217,
+            "6875": 11.90136,
+            "6880": 11.85295,
+            "6885": 11.84058,
+            "6890": 11.84482,
+            "6895": 11.82768,
+            "6900": 11.88337,
+            "6905": 11.84656,
+            "6910": 11.90272,
+            "6915": 11.8005,
+            "6920": 11.93804,
+            "6925": 12.00166,
+            "6930": 11.88293,
+            "6935": 11.9479,
+            "6940": 11.85228,
+            "6945": 11.86242,
+            "6950": 11.83582,
+            "6955": 11.81523,
+            "6960": 11.75894,
+            "6965": 11.81699,
+            "6970": 11.85282,
+            "6975": 11.84727,
+            "6980": 11.84729,
+            "6985": 12.01189,
+            "6990": 11.86887,
+            "6995": 11.88713,
+            "7000": 11.85612,
+            "7005": 11.86648,
+            "7010": 11.8888,
+            "7015": 11.84573,
+            "7020": 11.77395,
+            "7025": 11.85096,
+            "7030": 11.86323,
+            "7035": 11.84315,
+            "7040": 11.82293,
+            "7045": 11.81241,
+            "7050": 11.85808,
+            "7055": 11.86593,
+            "7060": 11.87475,
+            "7065": 11.90707,
+            "7070": 11.9358,
+            "7075": 11.84297,
+            "7080": 11.80853,
+            "7085": 11.88178,
+            "7090": 11.87836,
+            "7095": 11.85532,
+            "7100": 11.89414,
+            "7105": 11.85379,
+            "7110": 11.89642,
+            "7115": 11.85858,
+            "7120": 11.90327,
+            "7125": 11.89711,
+            "7130": 11.89177,
+            "7135": 11.88659,
+            "7140": 11.85757,
+            "7145": 11.87756,
+            "7150": 11.88577,
+            "7155": 11.86153,
+            "7160": 11.92297,
+            "7165": 11.88396,
+            "7170": 11.85778,
+            "7175": 11.91483,
+            "7180": 11.86232,
+            "7185": 11.87476,
+            "7190": 11.8982,
+            "7195": 11.88516,
+            "7200": 11.88158,
+            "7205": 11.88444,
+            "7210": 11.89206,
+            "7215": 11.87279,
+            "7220": 11.90742,
+            "7225": 11.85079,
+            "7230": 11.8483,
+            "7235": 11.90312,
+            "7240": 11.87181,
+            "7245": 11.91535,
+            "7250": 11.87908,
+            "7255": 11.92293,
+            "7260": 11.84549,
+            "7265": 11.8901,
+            "7270": 11.84322,
+            "7275": 11.848,
+            "7280": 11.8967,
+            "7285": 11.89986,
+            "7290": 11.95382,
+            "7295": 11.90753,
+            "7300": 11.86218,
+            "7305": 11.85436,
+            "7310": 11.85753,
+            "7315": 11.9134,
+            "7320": 11.90034,
+            "7325": 11.83407,
+            "7330": 11.85974,
+            "7335": 11.90032,
+            "7340": 11.88835,
+            "7345": 11.88443,
+            "7350": 11.85147,
+            "7355": 11.86003,
+            "7360": 11.88911,
+            "7365": 11.88721,
+            "7370": 11.94597,
+            "7375": 11.88507,
+            "7380": 11.8675,
+            "7385": 11.88615,
+            "7390": 11.85493,
+            "7395": 11.9078,
+            "7400": 11.89976,
+            "7405": 11.94755,
+            "7410": 11.86216,
+            "7415": 11.81832,
+            "7420": 11.89699,
+            "7425": 11.90201,
+            "7430": 11.88324,
+            "7435": 11.84242,
+            "7440": 11.89387,
+            "7445": 11.85554,
+            "7450": 11.927,
+            "7455": 11.89196,
+            "7460": 11.93241,
+            "7465": 11.89671,
+            "7470": 11.8633,
+            "7475": 11.85785,
+            "7480": 11.86619,
+            "7485": 11.90047,
+            "7490": 11.93453,
+            "7495": 11.89595,
+            "7500": 11.92255,
+            "7505": 11.86705,
+            "7510": 11.86492,
+            "7515": 11.83778,
+            "7520": 12.43308,
+            "7525": 11.94046,
+            "7530": 12.11911,
+            "7535": 11.95645,
+            "7540": 12.01144,
+            "7545": 11.94459,
+            "7550": 12.00989,
+            "7555": 11.95308,
+            "7560": 12.02894,
+            "7565": 12.00926,
+            "7570": 11.88032,
+            "7575": 11.94986,
+            "7580": 11.94673,
+            "7585": 11.92777,
+            "7590": 11.96311,
+            "7595": 11.90291,
+            "7600": 11.96776,
+            "7605": 11.91009,
+            "7610": 11.98945,
+            "7615": 11.943,
+            "7620": 11.97203,
+            "7625": 11.87696,
+            "7630": 11.92313,
+            "7635": 11.9056,
+            "7640": 11.89922,
+            "7645": 11.93063,
+            "7650": 11.89735,
+            "7655": 11.93078,
+            "7660": 11.95494,
+            "7665": 11.91011,
+            "7670": 11.97093,
+            "7675": 11.97514,
+            "7680": 11.93177,
+            "7685": 11.8992,
+            "7690": 11.94571,
+            "7695": 11.92277,
+            "7700": 11.94906,
+            "7705": 11.92727,
+            "7710": 11.93604,
+            "7715": 11.92305,
+            "7720": 11.93766,
+            "7725": 11.95622,
+            "7730": 11.90603,
+            "7735": 11.91132,
+            "7740": 11.97695,
+            "7745": 11.96601,
+            "7750": 11.88967,
+            "7755": 11.93644,
+            "7760": 11.96688,
+            "7765": 11.92672,
+            "7770": 23.39259,
+            "7775": 23.06567,
+            "7780": 11.93112,
+            "7785": 11.93477,
+            "7790": 11.94106,
+            "7795": 11.94556,
+            "7800": 12.0002,
+            "7805": 11.97342,
+            "7810": 11.95163,
+            "7815": 11.96208,
+            "7820": 11.96513,
+            "7825": 11.93368,
+            "7830": 11.91708,
+            "7835": 11.89017,
+            "7840": 11.94549,
+            "7845": 11.96002,
+            "7850": 11.95829,
+            "7855": 11.92186,
+            "7860": 11.93832,
+            "7865": 11.889,
+            "7870": 11.96191,
+            "7875": 12.05703,
+            "7880": 11.97288,
+            "7885": 11.91666,
+            "7890": 11.93728,
+            "7895": 11.96047,
+            "7900": 11.9818,
+            "7905": 11.92242,
+            "7910": 11.97684,
+            "7915": 11.91154,
+            "7920": 11.96828,
+            "7925": 11.94506,
+            "7930": 11.93465,
+            "7935": 11.90216,
+            "7940": 11.91383,
+            "7945": 11.91481,
+            "7950": 11.96693,
+            "7955": 11.94446,
+            "7960": 11.92358,
+            "7965": 11.94155,
+            "7970": 11.95822,
+            "7975": 12.03469,
+            "7980": 11.94102,
+            "7985": 11.94681,
+            "7990": 11.92459,
+            "7995": 11.92763,
+            "8000": 11.96299,
+            "8005": 11.9788,
+            "8010": 11.96826,
+            "8015": 12.02982,
+            "8020": 11.94329,
+            "8025": 11.98105,
+            "8030": 12.01501,
+            "8035": 11.96502,
+            "8040": 11.97586,
+            "8045": 11.96948,
+            "8050": 11.92611,
+            "8055": 11.93414,
+            "8060": 11.93961,
+            "8065": 11.9262,
+            "8070": 11.9178,
+            "8075": 11.90325,
+            "8080": 11.93833,
+            "8085": 11.97936,
+            "8090": 11.99724,
+            "8095": 11.94796,
+            "8100": 11.9625,
+            "8105": 11.94798,
+            "8110": 11.92353,
+            "8115": 11.96357,
+            "8120": 11.92451,
+            "8125": 11.89352,
+            "8130": 11.97563,
+            "8135": 11.97236,
+            "8140": 11.9723,
+            "8145": 11.92641,
+            "8150": 11.89834,
+            "8155": 11.94876,
+            "8160": 11.95465,
+            "8165": 11.95874,
+            "8170": 11.93402,
+            "8175": 11.96745,
+            "8180": 11.91172,
+            "8185": 11.91331,
+            "8190": 11.95504,
+            "8195": 11.94346,
+            "8200": 11.95192,
+            "8205": 11.9973,
+            "8210": 11.95023,
+            "8215": 12.03521,
+            "8220": 11.96486,
+            "8225": 11.95464,
+            "8230": 11.96151,
+            "8235": 11.95994,
+            "8240": 11.97909,
+            "8245": 11.92928,
+            "8250": 11.92518,
+            "8255": 11.94881,
+            "8260": 11.907,
+            "8265": 11.93185,
+            "8270": 11.9211,
+            "8275": 11.86366,
+            "8280": 12.00914,
+            "8285": 11.97086,
+            "8290": 11.98208,
+            "8295": 11.92309,
+            "8300": 11.94129,
+            "8305": 11.99302,
+            "8310": 11.97601,
+            "8315": 11.88862,
+            "8320": 11.96454,
+            "8325": 11.89961,
+            "8330": 11.99534,
+            "8335": 11.91687,
+            "8340": 11.96466,
+            "8345": 11.93152,
+            "8350": 11.94368,
+            "8355": 11.92235,
+            "8360": 11.99578,
+            "8365": 11.90045,
+            "8370": 11.91744,
+            "8375": 11.92667,
+            "8380": 11.90428,
+            "8385": 11.94828,
+            "8390": 11.93507,
+            "8395": 11.9473,
+            "8400": 11.94267,
+            "8405": 11.93414,
+            "8410": 11.90959,
+            "8415": 11.92941,
+            "8420": 11.91201,
+            "8425": 11.91625,
+            "8430": 11.9332,
+            "8435": 11.99456,
+            "8440": 11.8869,
+            "8445": 11.90729,
+            "8450": 11.93362,
+            "8455": 11.96619,
+            "8460": 12.01359,
+            "8465": 11.9429,
+            "8470": 11.99594,
+            "8475": 11.95465,
+            "8480": 11.92489,
+            "8485": 11.92415,
+            "8490": 11.97388,
+            "8495": 11.89913,
+            "8500": 11.95945,
+            "8505": 11.91567,
+            "8510": 11.91482,
+            "8515": 11.93548,
+            "8520": 11.95743,
+            "8525": 11.94743,
+            "8530": 12.42097,
+            "8535": 11.9272,
+            "8540": 12.09436,
+            "8545": 12.04967,
+            "8550": 11.9651,
+            "8555": 12.03857,
+            "8560": 11.97265,
+            "8565": 11.91082,
+            "8570": 11.95406,
+            "8575": 11.94802,
+            "8580": 11.9942,
+            "8585": 11.96288,
+            "8590": 11.95701,
+            "8595": 11.97786,
+            "8600": 11.89715,
+            "8605": 11.93644,
+            "8610": 11.98611,
+            "8615": 11.91557,
+            "8620": 11.92076,
+            "8625": 11.96113,
+            "8630": 11.99266,
+            "8635": 11.93916,
+            "8640": 12.02781,
+            "8645": 11.99006,
+            "8650": 11.91164,
+            "8655": 11.91924,
+            "8660": 11.95194,
+            "8665": 12.00021,
+            "8670": 11.90972,
+            "8675": 11.96086,
+            "8680": 11.95175,
+            "8685": 11.95495,
+            "8690": 12.00198,
+            "8695": 12.07659,
+            "8700": 11.96371,
+            "8705": 11.91845,
+            "8710": 11.97745,
+            "8715": 11.93805,
+            "8720": 11.9173,
+            "8725": 11.91035,
+            "8730": 12.01393,
+            "8735": 11.98447,
+            "8740": 11.97475,
+            "8745": 11.96291,
+            "8750": 11.9361,
+            "8755": 11.96838,
+            "8760": 11.93695,
+            "8765": 12.00162,
+            "8770": 11.92599,
+            "8775": 12.0012,
+            "8780": 12.03738,
+            "8785": 11.94909,
+            "8790": 11.90577,
+            "8795": 11.97012,
+            "8800": 11.93035,
+            "8805": 11.99893,
+            "8810": 11.94421,
+            "8815": 11.98191,
+            "8820": 11.99062,
+            "8825": 11.92267,
+            "8830": 11.95194,
+            "8835": 11.937,
+            "8840": 11.97075,
+            "8845": 11.95007,
+            "8850": 12.02522,
+            "8855": 11.94712,
+            "8860": 11.96728,
+            "8865": 11.89285,
+            "8870": 11.94189,
+            "8875": 11.92065,
+            "8880": 11.98822,
+            "8885": 11.98285,
+            "8890": 11.99582,
+            "8895": 11.96596,
+            "8900": 11.94354,
+            "8905": 11.95473,
+            "8910": 11.99259,
+            "8915": 11.96618,
+            "8920": 11.93587,
+            "8925": 11.99413,
+            "8930": 12.00638,
+            "8935": 11.93,
+            "8940": 11.95031,
+            "8945": 11.91928,
+            "8950": 11.9941,
+            "8955": 11.94031,
+            "8960": 11.96914,
+            "8965": 11.95062,
+            "8970": 11.95268,
+            "8975": 12.03161,
+            "8980": 11.97245,
+            "8985": 12.01027,
+            "8990": 11.9446,
+            "8995": 11.96843,
+            "9000": 11.9429,
+            "9005": 11.94091,
+            "9010": 11.93667,
+            "9015": 11.95344,
+            "9020": 11.93207,
+            "9025": 11.91998,
+            "9030": 11.92651,
+            "9035": 11.97131,
+            "9040": 11.92008,
+            "9045": 11.9777,
+            "9050": 11.93287,
+            "9055": 11.96682,
+            "9060": 11.982,
+            "9065": 11.9763,
+            "9070": 11.92703,
+            "9075": 11.95149,
+            "9080": 11.94863,
+            "9085": 11.92217,
+            "9090": 11.92326,
+            "9095": 11.9586,
+            "9100": 11.93403,
+            "9105": 11.97708,
+            "9110": 11.97248,
+            "9115": 11.91899,
+            "9120": 11.98175,
+            "9125": 12.0043,
+            "9130": 11.98361,
+            "9135": 11.95811,
+            "9140": 11.89116,
+            "9145": 11.92833,
+            "9150": 11.96999,
+            "9155": 11.95682,
+            "9160": 11.93898,
+            "9165": 11.98676,
+            "9170": 11.96776,
+            "9175": 11.91735,
+            "9180": 11.96488,
+            "9185": 11.93801,
+            "9190": 11.93829,
+            "9195": 11.96444,
+            "9200": 11.91924,
+            "9205": 11.99554,
+            "9210": 11.91977,
+            "9215": 11.99739,
+            "9220": 11.92053,
+            "9225": 11.93702,
+            "9230": 11.95815,
+            "9235": 12.05346,
+            "9240": 11.9596,
+            "9245": 11.97173,
+            "9250": 11.94092,
+            "9255": 11.94632,
+            "9260": 12.00354,
+            "9265": 11.96854,
+            "9270": 11.91621,
+            "9275": 11.94709,
+            "9280": 11.93375,
+            "9285": 11.92465,
+            "9290": 11.93047,
+            "9295": 11.93184,
+            "9300": 11.95538,
+            "9305": 11.96102,
+            "9310": 11.93874,
+            "9315": 11.94123,
+            "9320": 11.95854,
+            "9325": 11.98961,
+            "9330": 11.87394,
+            "9335": 11.97986,
+            "9340": 12.02583,
+            "9345": 11.94202,
+            "9350": 12.00113,
+            "9355": 11.97405,
+            "9360": 11.96746,
+            "9365": 11.96018,
+            "9370": 11.9475,
+            "9375": 11.94327,
+            "9380": 11.92135,
+            "9385": 12.01574,
+            "9390": 11.95494,
+            "9395": 11.93529,
+            "9400": 11.96463,
+            "9405": 11.9807,
+            "9410": 11.92926,
+            "9415": 11.95919,
+            "9420": 11.94796,
+            "9425": 11.94261,
+            "9430": 11.94968,
+            "9435": 11.9655,
+            "9440": 11.94016,
+            "9445": 11.98541,
+            "9450": 11.94602,
+            "9455": 11.96365,
+            "9460": 11.9884,
+            "9465": 11.93962,
+            "9470": 11.93471,
+            "9475": 11.91073,
+            "9480": 11.92557,
+            "9485": 11.93537,
+            "9490": 11.97267,
+            "9495": 11.93521,
+            "9500": 11.92542,
+            "9505": 12.00627,
+            "9510": 11.9749,
+            "9515": 11.97511,
+            "9520": 11.88493,
+            "9525": 11.91739,
+            "9530": 11.92418,
+            "9535": 11.97024
+        }
+    }
+}
\ No newline at end of file

From dc9a38d4f58a19e429646b622f63e5879e55b829 Mon Sep 17 00:00:00 2001
From: "Dennis(Zhenhuan) Liu" <denliu@nvidia.com>
Date: Tue, 18 Nov 2025 23:02:40 +0800
Subject: [PATCH 141/334] [DEV] Add support of fake distributed process group
 (#2254)

---
 megatron/training/arguments.py  | 9 +++++++++
 megatron/training/initialize.py | 5 +++++
 megatron/training/training.py   | 6 ++++--
 megatron/training/utils.py      | 2 +-
 4 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py
index 43b276ae181..9398e1939ea 100644
--- a/megatron/training/arguments.py
+++ b/megatron/training/arguments.py
@@ -1142,6 +1142,11 @@ def validate_args(args, defaults={}):
                 assert not args.distrib_optim_fully_reshardable_mem_efficient, \
                     '--distrib-optim-fully-reshardable-mem-efficient requires -enable-gloo-process-groups'
 
+    if args.fake_process_group:
+        # Disable nan check for fake process group
+        args.check_for_nan_in_loss_and_grad = False
+        # Disable gloo process groups for fake process group
+        args.enable_gloo_process_groups = False
 
     # Checkpointing
     if args.ckpt_fully_parallel_save_deprecated and args.rank == 0:
@@ -2869,6 +2874,10 @@ def _add_distributed_args(parser):
                        "and must be consistent across all ranks.")
     group.add_argument('--replication-factor', default=2, type=int,
                        help="Number of machines storing the replica of a given rank's data.")
+    group.add_argument('--fake-process-group', action='store_true', default=False,
+                       help='If set, initialize with fake distributed process group and all distributed communication operations will be skipped. \
+                       This is quite useful for profiling memory usage of distributed training with just one GPU. \
+                       Setting WORLD_SIZE and RANK to the specific values for target distribtued scale.')
     return parser
 
 
diff --git a/megatron/training/initialize.py b/megatron/training/initialize.py
index 1e14926a2f9..8b585fdd87b 100644
--- a/megatron/training/initialize.py
+++ b/megatron/training/initialize.py
@@ -346,6 +346,11 @@ def _initialize_distributed(get_embedding_ranks, get_position_embedding_ranks, s
             'rank': args.rank,
             'timeout': timedelta(minutes=args.distributed_timeout_minutes),
         }
+        if args.fake_process_group:
+            from torch.testing._internal.distributed.fake_pg import FakeStore
+            store = FakeStore()
+            init_process_group_kwargs['backend'] = 'fake'
+            init_process_group_kwargs['store'] = store
 
         torch.distributed.init_process_group(**init_process_group_kwargs)
         inprocess_restart.maybe_force_nccl_backend_init(device_id)
diff --git a/megatron/training/training.py b/megatron/training/training.py
index 32599aa0889..fd2184767f2 100644
--- a/megatron/training/training.py
+++ b/megatron/training/training.py
@@ -1699,7 +1699,7 @@ def training_log(
             mtp_loss_scale, iteration, writer, wandb_writer, total_loss_dict
         )
     if iteration % args.log_interval == 0:
-        if args.record_memory_history and is_last_rank():
+        if args.record_memory_history and (is_last_rank() or torch.distributed.get_backend() == 'fake'):
             snapshot = torch.cuda.memory._snapshot()
             from pickle import dump
 
@@ -1788,7 +1788,9 @@ def training_log(
                 num_microbatches = get_num_microbatches()
                 report_theoretical_memory(args, num_microbatches=num_microbatches, verbose=True)
             report_memory(f'(after {iteration} iterations)')
-            report_memory_flag = False
+            if iteration > 1:
+                # Make sure the memory after the second iteration is reported to include optimizer state memory.
+                report_memory_flag = False
         # Write timers to wandb, don't reset the counts
         if args.log_timers_to_tensorboard:
             timers.write(timers_to_log, writer, iteration, normalizer=args.log_interval, reset=False)
diff --git a/megatron/training/utils.py b/megatron/training/utils.py
index f0864115b13..52a3bf36d88 100644
--- a/megatron/training/utils.py
+++ b/megatron/training/utils.py
@@ -425,7 +425,7 @@ def is_last_rank():
 
 def print_rank_last(message):
     """If distributed is initialized, print only on last rank."""
-    if torch.distributed.is_initialized():
+    if torch.distributed.is_initialized() and torch.distributed.get_backend() != 'fake':
         if is_last_rank():
             print(message, flush=True)
     else:

From a8fc5912fb1e045d28f01d9498c50cf377e6122a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Tue, 18 Nov 2025 16:48:30 +0100
Subject: [PATCH 142/334] Cherrypick CI changes between 20251111 - 20251118
 (#2292)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
Signed-off-by: Charlie Truong <chtruong@nvidia.com>
Co-authored-by: Charlie Truong <chtruong@nvidia.com>
---
 .github/workflows/install-test.yml                  | 13 ++++++++-----
 LICENSE                                             |  3 +--
 .../dist_checkpointing/test_flattened_resharding.py |  3 +++
 .../test_global_metadata_reuse.py                   |  2 ++
 tests/unit_tests/dist_checkpointing/test_local.py   |  1 +
 .../dist_checkpointing/test_serialization.py        |  1 +
 .../test_mcore_fully_sharded_data_parallel.py       |  3 +++
 tests/unit_tests/models/test_bert_model.py          |  1 +
 tests/unit_tests/models/test_t5_model.py            |  1 +
 tests/unit_tests/test_parallel_state.py             |  3 +++
 tests/unit_tests/test_utils.py                      |  6 ++++++
 .../unit_tests/transformer/test_retro_attention.py  |  1 +
 12 files changed, 31 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/install-test.yml b/.github/workflows/install-test.yml
index 1eff8553875..ece9184ee94 100644
--- a/.github/workflows/install-test.yml
+++ b/.github/workflows/install-test.yml
@@ -22,8 +22,8 @@ on:
     branches:
       - dev
       - main
-      - "pull-request/[0-9]+"
-      - "deploy-release/*"
+      - 'pull-request/[0-9]+'
+      - 'deploy-release/*'
   merge_group:
     types: [checks_requested]
 
@@ -47,7 +47,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.12"]
+        python-version: ['3.12']
     steps:
       - name: Checkout repository
         uses: actions/checkout@v4
@@ -93,7 +93,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.12"]
+        python-version: ['3.12']
     steps:
       - name: Checkout repository
         uses: actions/checkout@v4
@@ -140,13 +140,16 @@ jobs:
       && !cancelled()
       && github.repository == 'NVIDIA/Megatron-LM'
     steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
       - name: Get workflow result
         id: result
         shell: bash -x -e -u -o pipefail {0}
         env:
           GH_TOKEN: ${{ github.token }}
           RUN_ID: ${{ github.run_id }}
-          SKIPPING_IS_ALLOWED: ${{ needs.pre-flight.outputs.docs_only == 'true' || needs.pre-flight.outputs.is_deployment_workflow == 'true' }}
+          SKIPPING_IS_ALLOWED: ${{ needs.pre-flight.outputs.docs_only == 'true' || needs.pre-flight.outputs.is_deployment_workflow == 'true' || needs.pre-flight.outputs.is_merge_group == 'true' }}
         run: |
           FAILED_JOBS=$(gh run view $GITHUB_RUN_ID --json jobs --jq '[.jobs[] | select(.status == "completed" and .conclusion != "success")] | length') || echo 0
 
diff --git a/LICENSE b/LICENSE
index 621920c377e..8ebf5004971 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,7 +1,6 @@
 The following applies to all files unless otherwise noted:
 
-# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
-#
+# Copyright (c) 2019-2025, NVIDIA CORPORATION. All rights reserved.
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
 # are met:
diff --git a/tests/unit_tests/dist_checkpointing/test_flattened_resharding.py b/tests/unit_tests/dist_checkpointing/test_flattened_resharding.py
index bc682369498..3ca4613f59e 100644
--- a/tests/unit_tests/dist_checkpointing/test_flattened_resharding.py
+++ b/tests/unit_tests/dist_checkpointing/test_flattened_resharding.py
@@ -34,6 +34,7 @@ def setup_method(self, method):
     def teardown_method(self, method):
         Utils.destroy_model_parallel()
 
+    @pytest.mark.flaky
     @pytest.mark.flaky_in_dev  # Issue #2854
     @pytest.mark.parametrize(
         ('src_tp_pp', 'dest_tp_pp'),
@@ -60,6 +61,7 @@ def test_partition_change_save_load(self, tmp_path_dist_ckpt, src_tp_pp, dest_tp
 
         Utils.destroy_model_parallel()
 
+    @pytest.mark.flaky
     @pytest.mark.flaky_in_dev  # Issue #2854
     @pytest.mark.parametrize(
         ('src_tp_pp', 'dest_tp_pp', 'expected_ckpt_offsets_by_rank'),
@@ -126,6 +128,7 @@ def test_reformulate_nd_flattened_tensors(
 
         Utils.destroy_model_parallel()
 
+    @pytest.mark.flaky
     @pytest.mark.flaky_in_dev  # Issue #2854
     @pytest.mark.parametrize(('src_tp_pp',), [((2, 4),), ((8, 1),), ((1, 1),), ((1, 4),)])
     def test_load_tensor_metadata(self, tmp_path_dist_ckpt, src_tp_pp):
diff --git a/tests/unit_tests/dist_checkpointing/test_global_metadata_reuse.py b/tests/unit_tests/dist_checkpointing/test_global_metadata_reuse.py
index 636ad9b1b52..642d43b9d59 100644
--- a/tests/unit_tests/dist_checkpointing/test_global_metadata_reuse.py
+++ b/tests/unit_tests/dist_checkpointing/test_global_metadata_reuse.py
@@ -23,6 +23,7 @@ def setup_method(self, method):
     def teardown_method(self, method):
         Utils.destroy_model_parallel()
 
+    @pytest.mark.flaky
     @pytest.mark.flaky_in_dev  # Issue #2856
     @pytest.mark.parametrize(('tp,pp'), [(2, 4)])
     def test_global_metadata_reuse(self, tmp_path_dist_ckpt, tp, pp):
@@ -94,6 +95,7 @@ def test_global_metadata_reuse(self, tmp_path_dist_ckpt, tp, pp):
 
             assert resume_ckpt_context['save_strategy'].validated_loaded_metadata_reuse
 
+    @pytest.mark.flaky
     @pytest.mark.flaky_in_dev  # Issue #2856
     @pytest.mark.parametrize(('tp,pp'), [(2, 4)])
     def test_no_global_metadata_reuse_on_different_parallelism(self, tmp_path_dist_ckpt, tp, pp):
diff --git a/tests/unit_tests/dist_checkpointing/test_local.py b/tests/unit_tests/dist_checkpointing/test_local.py
index 5ce3422c726..ab70b71f8a4 100644
--- a/tests/unit_tests/dist_checkpointing/test_local.py
+++ b/tests/unit_tests/dist_checkpointing/test_local.py
@@ -261,6 +261,7 @@ def deterministic_empty(*args, **kwargs):
     @pytest.mark.parametrize(('use_ramdisk'), [True, False])
     @pytest.mark.parametrize(('async_save'), [True, False])
     @pytest.mark.parametrize(('algo'), ['atomic', 'fully_parallel'])
+    @pytest.mark.flaky
     @pytest.mark.flaky_in_dev
     def test_failed_save(self, caplog, tmp_path_dist_ckpt, tp, pp, use_ramdisk, async_save, algo):
         Utils.initialize_model_parallel(tp, pp)
diff --git a/tests/unit_tests/dist_checkpointing/test_serialization.py b/tests/unit_tests/dist_checkpointing/test_serialization.py
index 1a50be17d27..d2bebc93101 100644
--- a/tests/unit_tests/dist_checkpointing/test_serialization.py
+++ b/tests/unit_tests/dist_checkpointing/test_serialization.py
@@ -547,6 +547,7 @@ def test_tensor_shape_mismatch(self, tmp_path_dist_ckpt):
         not is_torch_min_version("2.3.0"),
         reason="remove_sharded_tensors relies on Torch APIs introduced in v2.3.0",
     )
+    @pytest.mark.flaky
     @pytest.mark.flaky_in_dev
     def test_remove_sharded_tensors(self, tmp_path_dist_ckpt):
         Utils.initialize_model_parallel(2, 4)
diff --git a/tests/unit_tests/distributed/test_mcore_fully_sharded_data_parallel.py b/tests/unit_tests/distributed/test_mcore_fully_sharded_data_parallel.py
index 863d3d20d41..3b41daf58ef 100644
--- a/tests/unit_tests/distributed/test_mcore_fully_sharded_data_parallel.py
+++ b/tests/unit_tests/distributed/test_mcore_fully_sharded_data_parallel.py
@@ -20,6 +20,7 @@
 
 
 # Test model for testing FSDP
+@pytest.mark.flaky
 @pytest.mark.flaky_in_dev
 class TestModel(torch.nn.Module):
     def __init__(self, input_dim, output_dim):
@@ -36,6 +37,7 @@ def forward(self, x):
 
 
 # Test model with uniform shaped weights for testing FSDP
+@pytest.mark.flaky
 @pytest.mark.flaky_in_dev
 class TestModelUniform(torch.nn.Module):
     def __init__(self, hidden_dim):
@@ -67,6 +69,7 @@ def setup_seed(seed):
     torch.backends.cudnn.benchmark = False  # Disable auto-tuner for reproducibility
 
 
+@pytest.mark.flaky
 @pytest.mark.flaky_in_dev
 class TestFullyShardedDataParallel:
     @classmethod
diff --git a/tests/unit_tests/models/test_bert_model.py b/tests/unit_tests/models/test_bert_model.py
index b30d1413cf8..c878fd4d3b5 100644
--- a/tests/unit_tests/models/test_bert_model.py
+++ b/tests/unit_tests/models/test_bert_model.py
@@ -167,6 +167,7 @@ def test_transformer_engine_version_1_7_to_1_10_flash_attn(self, mocker):
         ), f"Expected b11s for attn_mask_dimensions but got {attn_mask_dimensions}"
 
     @pytest.mark.internal
+    @pytest.mark.flaky
     @pytest.mark.flaky_in_dev
     def test_transformer_engine_version_1_7_to_1_10_rng_error(self, mocker):
         bert_layer_with_transformer_engine_spec.submodules.self_attention.params[
diff --git a/tests/unit_tests/models/test_t5_model.py b/tests/unit_tests/models/test_t5_model.py
index 14f5ce14ecd..6796719be50 100644
--- a/tests/unit_tests/models/test_t5_model.py
+++ b/tests/unit_tests/models/test_t5_model.py
@@ -103,6 +103,7 @@ def test_set_input_tensor(self):
             assert self.t5_model.encoder_hidden_state.shape[1] == micro_batch_size
             assert self.t5_model.encoder_hidden_state.shape[2] == config.hidden_size
 
+    @pytest.mark.flaky
     @pytest.mark.flaky_in_dev
     def test_post_process_forward(self):
         pass
diff --git a/tests/unit_tests/test_parallel_state.py b/tests/unit_tests/test_parallel_state.py
index ddee638776c..7218ed5b6e1 100644
--- a/tests/unit_tests/test_parallel_state.py
+++ b/tests/unit_tests/test_parallel_state.py
@@ -1,3 +1,5 @@
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+
 import pytest
 import torch
 
@@ -10,6 +12,7 @@
 
 
 @pytest.mark.parametrize('order', test_parallel_order)
+@pytest.mark.flaky
 @pytest.mark.flaky_in_dev
 def test_initialize_and_destroy_model_parallel(order):
     with pytest.raises(AssertionError):
diff --git a/tests/unit_tests/test_utils.py b/tests/unit_tests/test_utils.py
index 64a25e60075..282c4b8b2a4 100644
--- a/tests/unit_tests/test_utils.py
+++ b/tests/unit_tests/test_utils.py
@@ -1,3 +1,5 @@
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+
 import os
 import time
 import urllib.request as req
@@ -182,6 +184,7 @@ def nvtx_decorated_function_with_message():
     assert all(execution_tracker.values())
 
 
+@pytest.mark.flaky
 @pytest.mark.flaky_in_dev
 def test_check_param_hashes_across_dp_replicas():
     world = int(os.getenv('WORLD_SIZE', '1'))
@@ -207,6 +210,7 @@ def test_check_param_hashes_across_dp_replicas():
     _deinit_distributed()
 
 
+@pytest.mark.flaky
 @pytest.mark.flaky_in_dev
 def test_cross_check_param_hashes_across_dp_replicas():
     world = int(os.getenv('WORLD_SIZE', '1'))
@@ -231,6 +235,7 @@ def test_cross_check_param_hashes_across_dp_replicas():
 
 
 @pytest.mark.parametrize("use_distributed_optimizer", [False, True])
+@pytest.mark.flaky
 @pytest.mark.flaky_in_dev
 @pytest.mark.internal
 def test_param_norm(use_distributed_optimizer: bool):
@@ -281,6 +286,7 @@ def test_param_norm(use_distributed_optimizer: bool):
     _deinit_distributed()
 
 
+@pytest.mark.flaky
 @pytest.mark.flaky_in_dev
 def test_straggler_detector():
     world = int(os.getenv('WORLD_SIZE', '1'))
diff --git a/tests/unit_tests/transformer/test_retro_attention.py b/tests/unit_tests/transformer/test_retro_attention.py
index e735105111e..85c5347c909 100644
--- a/tests/unit_tests/transformer/test_retro_attention.py
+++ b/tests/unit_tests/transformer/test_retro_attention.py
@@ -196,6 +196,7 @@ def run_gpu_forward(self, recompute_granularity, use_transformer_engine):
             config.hidden_size,
         )
 
+    @pytest.mark.flaky
     @pytest.mark.flaky_in_dev
     def test_gpu_forward(self):
         for recompute_granularity in (None, 'selective'):

From d547462ec92ee3f4970a03c8dc2d0f3446d199cb Mon Sep 17 00:00:00 2001
From: Hao Wu <skyw@users.noreply.github.com>
Date: Tue, 18 Nov 2025 08:43:46 -0800
Subject: [PATCH 143/334] [DEV] Update emerging optimizers (#2261)

Signed-off-by: Hao Wu <skyw@nvidia.com>
---
 megatron/core/optimizer/muon.py |  11 +-
 pyproject.toml                  |   4 +-
 uv.lock                         | 228 ++++----------------------------
 3 files changed, 34 insertions(+), 209 deletions(-)

diff --git a/megatron/core/optimizer/muon.py b/megatron/core/optimizer/muon.py
index a31c84a6e8a..ddf20b0abb8 100644
--- a/megatron/core/optimizer/muon.py
+++ b/megatron/core/optimizer/muon.py
@@ -95,15 +95,16 @@ def scaled_orthogonalize_fn(
         self.is_qkv_fn = is_qkv_fn
         self.qkv_split_shapes = qkv_split_shapes
 
+        weight_decay_method = "decoupled" if use_decoupled_weight_decay else "l2"
         super().__init__(
             params,
             lr,
             momentum_beta,
-            use_nesterov,
-            weight_decay,
-            use_decoupled_weight_decay,
-            fp32_matmul_prec,
-            scaled_orthogonalize_fn,
+            use_nesterov=use_nesterov,
+            weight_decay=weight_decay,
+            weight_decay_method=weight_decay_method,
+            fp32_matmul_prec=fp32_matmul_prec,
+            scaled_orthogonalize_fn=scaled_orthogonalize_fn,
         )
 
     def orthogonalize(self, p: torch.Tensor, grad: torch.Tensor, **kwargs: Any) -> torch.Tensor:
diff --git a/pyproject.toml b/pyproject.toml
index 3e4267889e2..7f734927c1a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,7 +1,7 @@
 # Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
 
 [build-system]
-requires = ["setuptools>=80.0.0", "pybind11", "packaging>=24.2"]
+requires = ["setuptools<80.0.0", "pybind11", "packaging>=24.2"]
 build-backend = "setuptools.build_meta"
 
 [tool.setuptools]
@@ -170,7 +170,7 @@ flash_mla = [
 ]
 transformer-engine = { git = "https://github.com/NVIDIA/TransformerEngine.git", rev = "release_v2.9" } # on `release_v2.9`
 nemo-run = { git = "https://github.com/NVIDIA-NeMo/Run.git", rev = "01a9a8ba360f7b2908728ad0516e0ad9d936966d" }
-emerging_optimizers = { git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git", rev = "cf9909b777ffac18e05b67a6708282cadc000942" }
+emerging_optimizers = { git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git", rev = "v0.1.0" }
 
 [tool.isort]
 profile = "black"                                                          # black-compatible
diff --git a/uv.lock b/uv.lock
index 8b2e0f3d13e..f636a791f12 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1,5 +1,5 @@
 version = 1
-revision = 3
+revision = 2
 requires-python = ">=3.10"
 resolution-markers = [
     "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
@@ -296,10 +296,10 @@ name = "anyio"
 version = "4.9.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "exceptiongroup", marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "exceptiongroup", marker = "python_full_version < '3.11'" },
     { name = "idna" },
     { name = "sniffio" },
-    { name = "typing-extensions", marker = "python_full_version < '3.13' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/95/7d/4c1bd541d4dffa1b52bd83fb8527089e097a106fc90b467a7313b105f840/anyio-4.9.0.tar.gz", hash = "sha256:673c0c244e15788651a4ff38710fea9675823028a6f08a5eda409e0c9840a028", size = 190949, upload-time = "2025-03-17T00:02:54.77Z" }
 wheels = [
@@ -731,7 +731,7 @@ name = "cffi"
 version = "2.0.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "pycparser", marker = "implementation_name != 'PyPy' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "pycparser", marker = "implementation_name != 'PyPy'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/eb/56/b1ba7935a17738ae8453301356628e8147c79dbb825bcbc73dc7401f9846/cffi-2.0.0.tar.gz", hash = "sha256:44d1b5909021139fe36001ae048dbdde8214afa20200eda0f64c068cac5d5529", size = 523588, upload-time = "2025-09-08T23:24:04.541Z" }
 wheels = [
@@ -902,7 +902,7 @@ name = "click"
 version = "8.3.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "colorama", marker = "sys_platform == 'win32' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "colorama", marker = "sys_platform == 'win32'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/46/61/de6cd827efad202d7057d93e0fed9294b96952e188f7384832791c7b2254/click-8.3.0.tar.gz", hash = "sha256:e7b8232224eba16f4ebe410c25ced9f7875cb5f3263ffc93cc3e8da705e229c4", size = 276943, upload-time = "2025-09-18T17:32:23.696Z" }
 wheels = [
@@ -1373,7 +1373,7 @@ wheels = [
 [[package]]
 name = "emerging-optimizers"
 version = "0.1.0"
-source = { git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git?rev=cf9909b777ffac18e05b67a6708282cadc000942#cf9909b777ffac18e05b67a6708282cadc000942" }
+source = { git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git?rev=v0.1.0#d5363b4a418128cd8111983b191c4b8869a9766b" }
 dependencies = [
     { name = "absl-py" },
     { name = "torch", marker = "sys_platform == 'never'" },
@@ -1385,7 +1385,7 @@ name = "exceptiongroup"
 version = "1.3.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "typing-extensions", marker = "python_full_version < '3.13' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/0b/9f/a65090624ecf468cdca03533906e7c69ed7588582240cfe7cc9e770b50eb/exceptiongroup-1.3.0.tar.gz", hash = "sha256:b241f5885f560bc56a59ee63ca4c6a8bfa46ae4ad651af316d4e81817bb9fd88", size = 29749, upload-time = "2025-05-10T17:42:51.123Z" }
 wheels = [
@@ -1913,7 +1913,7 @@ source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "filelock" },
     { name = "fsspec" },
-    { name = "hf-xet", marker = "platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "hf-xet", marker = "platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'" },
     { name = "packaging" },
     { name = "pyyaml" },
     { name = "requests" },
@@ -2522,7 +2522,7 @@ requires-dist = [
     { name = "causal-conv1d", marker = "extra == 'dev'", specifier = "~=1.5" },
     { name = "einops", marker = "extra == 'dev'", specifier = "~=0.8" },
     { name = "einops", marker = "extra == 'lts'" },
-    { name = "emerging-optimizers", marker = "extra == 'dev'", git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git?rev=cf9909b777ffac18e05b67a6708282cadc000942" },
+    { name = "emerging-optimizers", marker = "extra == 'dev'", git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git?rev=v0.1.0" },
     { name = "flash-linear-attention", marker = "extra == 'dev'", specifier = "~=0.3.2" },
     { name = "flashinfer-python", marker = "extra == 'dev'" },
     { name = "flask-restful", marker = "extra == 'mlm'" },
@@ -3257,59 +3257,6 @@ dependencies = [
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/05/79/87c45f32e661b25e0aaa1e325ba166511f57be5dff8f0fcabc12d3e73b64/nv_grouped_gemm-1.1.4.post6.tar.gz", hash = "sha256:dad6115f4b4ff7ceb0bc40ad44e923c13a24fc88cfe1e20b1a6b4c9cf24c445c", size = 26508, upload-time = "2025-10-10T18:52:29.508Z" }
 
-[[package]]
-name = "nvidia-cublas-cu12"
-version = "12.8.4.1"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/29/99/db44d685f0e257ff0e213ade1964fc459b4a690a73293220e98feb3307cf/nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:b86f6dd8935884615a0683b663891d43781b819ac4f2ba2b0c9604676af346d0", size = 590537124, upload-time = "2025-03-07T01:43:53.556Z" },
-    { url = "https://files.pythonhosted.org/packages/dc/61/e24b560ab2e2eaeb3c839129175fb330dfcfc29e5203196e5541a4c44682/nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:8ac4e771d5a348c551b2a426eda6193c19aa630236b418086020df5ba9667142", size = 594346921, upload-time = "2025-03-07T01:44:31.254Z" },
-    { url = "https://files.pythonhosted.org/packages/70/61/7d7b3c70186fb651d0fbd35b01dbfc8e755f69fd58f817f3d0f642df20c3/nvidia_cublas_cu12-12.8.4.1-py3-none-win_amd64.whl", hash = "sha256:47e9b82132fa8d2b4944e708049229601448aaad7e6f296f630f2d1a32de35af", size = 567544208, upload-time = "2025-03-07T01:53:30.535Z" },
-]
-
-[[package]]
-name = "nvidia-cuda-cupti-cu12"
-version = "12.8.90"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/d5/1f/b3bd73445e5cb342727fd24fe1f7b748f690b460acadc27ea22f904502c8/nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:4412396548808ddfed3f17a467b104ba7751e6b58678a4b840675c56d21cf7ed", size = 9533318, upload-time = "2025-03-07T01:40:10.421Z" },
-    { url = "https://files.pythonhosted.org/packages/f8/02/2adcaa145158bf1a8295d83591d22e4103dbfd821bcaf6f3f53151ca4ffa/nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ea0cb07ebda26bb9b29ba82cda34849e73c166c18162d3913575b0c9db9a6182", size = 10248621, upload-time = "2025-03-07T01:40:21.213Z" },
-    { url = "https://files.pythonhosted.org/packages/41/bc/83f5426095d93694ae39fe1311431b5d5a9bb82e48bf0dd8e19be2765942/nvidia_cuda_cupti_cu12-12.8.90-py3-none-win_amd64.whl", hash = "sha256:bb479dcdf7e6d4f8b0b01b115260399bf34154a1a2e9fe11c85c517d87efd98e", size = 7015759, upload-time = "2025-03-07T01:51:11.355Z" },
-]
-
-[[package]]
-name = "nvidia-cuda-nvrtc-cu12"
-version = "12.8.93"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/05/6b/32f747947df2da6994e999492ab306a903659555dddc0fbdeb9d71f75e52/nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:a7756528852ef889772a84c6cd89d41dfa74667e24cca16bb31f8f061e3e9994", size = 88040029, upload-time = "2025-03-07T01:42:13.562Z" },
-    { url = "https://files.pythonhosted.org/packages/eb/d1/e50d0acaab360482034b84b6e27ee83c6738f7d32182b987f9c7a4e32962/nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fc1fec1e1637854b4c0a65fb9a8346b51dd9ee69e61ebaccc82058441f15bce8", size = 43106076, upload-time = "2025-03-07T01:41:59.817Z" },
-    { url = "https://files.pythonhosted.org/packages/45/51/52a3d84baa2136cc8df15500ad731d74d3a1114d4c123e043cb608d4a32b/nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-win_amd64.whl", hash = "sha256:7a4b6b2904850fe78e0bd179c4b655c404d4bb799ef03ddc60804247099ae909", size = 73586838, upload-time = "2025-03-07T01:52:13.483Z" },
-]
-
-[[package]]
-name = "nvidia-cuda-runtime-cu12"
-version = "12.8.90"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/7c/75/f865a3b236e4647605ea34cc450900854ba123834a5f1598e160b9530c3a/nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:52bf7bbee900262ffefe5e9d5a2a69a30d97e2bc5bb6cc866688caa976966e3d", size = 965265, upload-time = "2025-03-07T01:39:43.533Z" },
-    { url = "https://files.pythonhosted.org/packages/0d/9b/a997b638fcd068ad6e4d53b8551a7d30fe8b404d6f1804abf1df69838932/nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:adade8dcbd0edf427b7204d480d6066d33902cab2a4707dcfc48a2d0fd44ab90", size = 954765, upload-time = "2025-03-07T01:40:01.615Z" },
-    { url = "https://files.pythonhosted.org/packages/30/a5/a515b7600ad361ea14bfa13fb4d6687abf500adc270f19e89849c0590492/nvidia_cuda_runtime_cu12-12.8.90-py3-none-win_amd64.whl", hash = "sha256:c0c6027f01505bfed6c3b21ec546f69c687689aad5f1a377554bc6ca4aa993a8", size = 944318, upload-time = "2025-03-07T01:51:01.794Z" },
-]
-
-[[package]]
-name = "nvidia-cudnn-cu12"
-version = "9.10.2.21"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "nvidia-cublas-cu12" },
-]
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/fa/41/e79269ce215c857c935fd86bcfe91a451a584dfc27f1e068f568b9ad1ab7/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:c9132cc3f8958447b4910a1720036d9eff5928cc3179b0a51fb6d167c6cc87d8", size = 705026878, upload-time = "2025-06-06T21:52:51.348Z" },
-    { url = "https://files.pythonhosted.org/packages/ba/51/e123d997aa098c61d029f76663dedbfb9bc8dcf8c60cbd6adbe42f76d049/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:949452be657fa16687d0930933f032835951ef0892b37d2d53824d1a84dc97a8", size = 706758467, upload-time = "2025-06-06T21:54:08.597Z" },
-    { url = "https://files.pythonhosted.org/packages/3d/90/0bd6e586701b3a890fd38aa71c387dab4883d619d6e5ad912ccbd05bfd67/nvidia_cudnn_cu12-9.10.2.21-py3-none-win_amd64.whl", hash = "sha256:c6288de7d63e6cf62988f0923f96dc339cea362decb1bf5b3141883392a7d65e", size = 692992268, upload-time = "2025-06-06T21:55:18.114Z" },
-]
-
 [[package]]
 name = "nvidia-cudnn-frontend"
 version = "1.15.0"
@@ -3329,76 +3276,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/d1/04/519fd6e3ea12fe7fe98c497c4d51f6c5c87763d02e90ea3102cef32a6ef1/nvidia_cudnn_frontend-1.15.0-cp313-cp313-win_amd64.whl", hash = "sha256:7c8c6f12534b73b0cd55956c5e9419b7840a01e4c260837606112450ce1ca0d9", size = 1297324, upload-time = "2025-10-10T18:46:53.104Z" },
 ]
 
-[[package]]
-name = "nvidia-cufft-cu12"
-version = "11.3.3.83"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "nvidia-nvjitlink-cu12" },
-]
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/60/bc/7771846d3a0272026c416fbb7e5f4c1f146d6d80704534d0b187dd6f4800/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:848ef7224d6305cdb2a4df928759dca7b1201874787083b6e7550dd6765ce69a", size = 193109211, upload-time = "2025-03-07T01:44:56.873Z" },
-    { url = "https://files.pythonhosted.org/packages/1f/13/ee4e00f30e676b66ae65b4f08cb5bcbb8392c03f54f2d5413ea99a5d1c80/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d2dd21ec0b88cf61b62e6b43564355e5222e4a3fb394cac0db101f2dd0d4f74", size = 193118695, upload-time = "2025-03-07T01:45:27.821Z" },
-    { url = "https://files.pythonhosted.org/packages/7d/ec/ce1629f1e478bb5ccd208986b5f9e0316a78538dd6ab1d0484f012f8e2a1/nvidia_cufft_cu12-11.3.3.83-py3-none-win_amd64.whl", hash = "sha256:7a64a98ef2a7c47f905aaf8931b69a3a43f27c55530c698bb2ed7c75c0b42cb7", size = 192216559, upload-time = "2025-03-07T01:53:57.106Z" },
-]
-
-[[package]]
-name = "nvidia-cufile-cu12"
-version = "1.13.1.3"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/bb/fe/1bcba1dfbfb8d01be8d93f07bfc502c93fa23afa6fd5ab3fc7c1df71038a/nvidia_cufile_cu12-1.13.1.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1d069003be650e131b21c932ec3d8969c1715379251f8d23a1860554b1cb24fc", size = 1197834, upload-time = "2025-03-07T01:45:50.723Z" },
-    { url = "https://files.pythonhosted.org/packages/1e/f5/5607710447a6fe9fd9b3283956fceeee8a06cda1d2f56ce31371f595db2a/nvidia_cufile_cu12-1.13.1.3-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:4beb6d4cce47c1a0f1013d72e02b0994730359e17801d395bdcbf20cfb3bb00a", size = 1120705, upload-time = "2025-03-07T01:45:41.434Z" },
-]
-
-[[package]]
-name = "nvidia-curand-cu12"
-version = "10.3.9.90"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/45/5e/92aa15eca622a388b80fbf8375d4760738df6285b1e92c43d37390a33a9a/nvidia_curand_cu12-10.3.9.90-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:dfab99248034673b779bc6decafdc3404a8a6f502462201f2f31f11354204acd", size = 63625754, upload-time = "2025-03-07T01:46:10.735Z" },
-    { url = "https://files.pythonhosted.org/packages/fb/aa/6584b56dc84ebe9cf93226a5cde4d99080c8e90ab40f0c27bda7a0f29aa1/nvidia_curand_cu12-10.3.9.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:b32331d4f4df5d6eefa0554c565b626c7216f87a06a4f56fab27c3b68a830ec9", size = 63619976, upload-time = "2025-03-07T01:46:23.323Z" },
-    { url = "https://files.pythonhosted.org/packages/b9/75/70c05b2f3ed5be3bb30b7102b6eb78e100da4bbf6944fd6725c012831cab/nvidia_curand_cu12-10.3.9.90-py3-none-win_amd64.whl", hash = "sha256:f149a8ca457277da854f89cf282d6ef43176861926c7ac85b2a0fbd237c587ec", size = 62765309, upload-time = "2025-03-07T01:54:20.478Z" },
-]
-
-[[package]]
-name = "nvidia-cusolver-cu12"
-version = "11.7.3.90"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "nvidia-cublas-cu12" },
-    { name = "nvidia-cusparse-cu12" },
-    { name = "nvidia-nvjitlink-cu12" },
-]
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/c8/32/f7cd6ce8a7690544d084ea21c26e910a97e077c9b7f07bf5de623ee19981/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:db9ed69dbef9715071232caa9b69c52ac7de3a95773c2db65bdba85916e4e5c0", size = 267229841, upload-time = "2025-03-07T01:46:54.356Z" },
-    { url = "https://files.pythonhosted.org/packages/85/48/9a13d2975803e8cf2777d5ed57b87a0b6ca2cc795f9a4f59796a910bfb80/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:4376c11ad263152bd50ea295c05370360776f8c3427b30991df774f9fb26c450", size = 267506905, upload-time = "2025-03-07T01:47:16.273Z" },
-    { url = "https://files.pythonhosted.org/packages/13/c0/76ca8551b8a84146ffa189fec81c26d04adba4bc0dbe09cd6e6fd9b7de04/nvidia_cusolver_cu12-11.7.3.90-py3-none-win_amd64.whl", hash = "sha256:4a550db115fcabc4d495eb7d39ac8b58d4ab5d8e63274d3754df1c0ad6a22d34", size = 256720438, upload-time = "2025-03-07T01:54:39.898Z" },
-]
-
-[[package]]
-name = "nvidia-cusparse-cu12"
-version = "12.5.8.93"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "nvidia-nvjitlink-cu12" },
-]
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/bc/f7/cd777c4109681367721b00a106f491e0d0d15cfa1fd59672ce580ce42a97/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:9b6c161cb130be1a07a27ea6923df8141f3c295852f4b260c65f18f3e0a091dc", size = 288117129, upload-time = "2025-03-07T01:47:40.407Z" },
-    { url = "https://files.pythonhosted.org/packages/c2/f5/e1854cb2f2bcd4280c44736c93550cc300ff4b8c95ebe370d0aa7d2b473d/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1ec05d76bbbd8b61b06a80e1eaf8cf4959c3d4ce8e711b65ebd0443bb0ebb13b", size = 288216466, upload-time = "2025-03-07T01:48:13.779Z" },
-    { url = "https://files.pythonhosted.org/packages/62/07/f3b2ad63f8e3d257a599f422ae34eb565e70c41031aecefa3d18b62cabd1/nvidia_cusparse_cu12-12.5.8.93-py3-none-win_amd64.whl", hash = "sha256:9a33604331cb2cac199f2e7f5104dfbb8a5a898c367a53dfda9ff2acb6b6b4dd", size = 284937404, upload-time = "2025-03-07T01:55:07.742Z" },
-]
-
-[[package]]
-name = "nvidia-cusparselt-cu12"
-version = "0.7.1"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/73/b9/598f6ff36faaece4b3c50d26f50e38661499ff34346f00e057760b35cc9d/nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_aarch64.whl", hash = "sha256:8878dce784d0fac90131b6817b607e803c36e629ba34dc5b433471382196b6a5", size = 283835557, upload-time = "2025-02-26T00:16:54.265Z" },
-    { url = "https://files.pythonhosted.org/packages/56/79/12978b96bd44274fe38b5dde5cfb660b1d114f70a65ef962bcbbed99b549/nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_x86_64.whl", hash = "sha256:f1bb701d6b930d5a7cea44c19ceb973311500847f81b634d802b7b539dc55623", size = 287193691, upload-time = "2025-02-26T00:15:44.104Z" },
-    { url = "https://files.pythonhosted.org/packages/2f/d8/a6b0d0d0c2435e9310f3e2bb0d9c9dd4c33daef86aa5f30b3681defd37ea/nvidia_cusparselt_cu12-0.7.1-py3-none-win_amd64.whl", hash = "sha256:f67fbb5831940ec829c9117b7f33807db9f9678dc2a617fbe781cac17b4e1075", size = 271020911, upload-time = "2025-02-26T00:14:47.204Z" },
-]
-
 [[package]]
 name = "nvidia-cutlass-dsl"
 version = "4.2.1"
@@ -3476,44 +3353,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/13/40/4427583475dfd8eb1b8c7522d75d4d059f0512ff03dcc62d6986a22ab918/nvidia_modelopt_core-0.33.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:69d5ace564f2b056c916117be2023f2b7fc01cd1501073915e6b2ced2b8a5394", size = 1363366, upload-time = "2025-08-12T18:39:28.854Z" },
 ]
 
-[[package]]
-name = "nvidia-nccl-cu12"
-version = "2.27.5"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/bb/1c/857979db0ef194ca5e21478a0612bcdbbe59458d7694361882279947b349/nvidia_nccl_cu12-2.27.5-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:31432ad4d1fb1004eb0c56203dc9bc2178a1ba69d1d9e02d64a6938ab5e40e7a", size = 322400625, upload-time = "2025-06-26T04:11:04.496Z" },
-    { url = "https://files.pythonhosted.org/packages/6e/89/f7a07dc961b60645dbbf42e80f2bc85ade7feb9a491b11a1e973aa00071f/nvidia_nccl_cu12-2.27.5-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ad730cf15cb5d25fe849c6e6ca9eb5b76db16a80f13f425ac68d8e2e55624457", size = 322348229, upload-time = "2025-06-26T04:11:28.385Z" },
-]
-
-[[package]]
-name = "nvidia-nvjitlink-cu12"
-version = "12.8.93"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/f6/74/86a07f1d0f42998ca31312f998bd3b9a7eff7f52378f4f270c8679c77fb9/nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:81ff63371a7ebd6e6451970684f916be2eab07321b73c9d244dc2b4da7f73b88", size = 39254836, upload-time = "2025-03-07T01:49:55.661Z" },
-    { url = "https://files.pythonhosted.org/packages/2a/a2/8cee5da30d13430e87bf99bb33455d2724d0a4a9cb5d7926d80ccb96d008/nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:adccd7161ace7261e01bb91e44e88da350895c270d23f744f0820c818b7229e7", size = 38386204, upload-time = "2025-03-07T01:49:43.612Z" },
-    { url = "https://files.pythonhosted.org/packages/ed/d7/34f02dad2e30c31b10a51f6b04e025e5dd60e5f936af9045a9b858a05383/nvidia_nvjitlink_cu12-12.8.93-py3-none-win_amd64.whl", hash = "sha256:bd93fbeeee850917903583587f4fc3a4eafa022e34572251368238ab5e6bd67f", size = 268553710, upload-time = "2025-03-07T01:56:24.13Z" },
-]
-
-[[package]]
-name = "nvidia-nvshmem-cu12"
-version = "3.3.20"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/92/9d/3dd98852568fb845ec1f7902c90a22b240fe1cbabda411ccedf2fd737b7b/nvidia_nvshmem_cu12-3.3.20-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0b0b960da3842212758e4fa4696b94f129090b30e5122fea3c5345916545cff0", size = 124484616, upload-time = "2025-08-04T20:24:59.172Z" },
-    { url = "https://files.pythonhosted.org/packages/3b/6c/99acb2f9eb85c29fc6f3a7ac4dccfd992e22666dd08a642b303311326a97/nvidia_nvshmem_cu12-3.3.20-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d00f26d3f9b2e3c3065be895e3059d6479ea5c638a3f38c9fec49b1b9dd7c1e5", size = 124657145, upload-time = "2025-08-04T20:25:19.995Z" },
-]
-
-[[package]]
-name = "nvidia-nvtx-cu12"
-version = "12.8.90"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/10/c0/1b303feea90d296f6176f32a2a70b5ef230f9bdeb3a72bddb0dc922dc137/nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d7ad891da111ebafbf7e015d34879f7112832fc239ff0d7d776b6cb685274615", size = 91161, upload-time = "2025-03-07T01:42:23.922Z" },
-    { url = "https://files.pythonhosted.org/packages/a2/eb/86626c1bbc2edb86323022371c39aa48df6fd8b0a1647bc274577f72e90b/nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5b17e2001cc0d751a5bc2c6ec6d26ad95913324a4adb86788c944f8ce9ba441f", size = 89954, upload-time = "2025-03-07T01:42:44.131Z" },
-    { url = "https://files.pythonhosted.org/packages/9f/99/4c9c0c329bf9fc125008c3b54c7c94c0023518d06fc025ae36431375e1fe/nvidia_nvtx_cu12-12.8.90-py3-none-win_amd64.whl", hash = "sha256:619c8304aedc69f02ea82dd244541a83c3d9d40993381b3b590f1adaed3db41e", size = 56492, upload-time = "2025-03-07T01:52:24.69Z" },
-]
-
 [[package]]
 name = "nvidia-resiliency-ext"
 version = "0.4.1"
@@ -4537,12 +4376,12 @@ name = "pytest"
 version = "8.3.5"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "colorama", marker = "sys_platform == 'win32' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "exceptiongroup", marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "colorama", marker = "sys_platform == 'win32'" },
+    { name = "exceptiongroup", marker = "python_full_version < '3.11'" },
     { name = "iniconfig" },
     { name = "packaging" },
     { name = "pluggy" },
-    { name = "tomli", marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "tomli", marker = "python_full_version < '3.11'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/ae/3c/c9d525a414d506893f0cd8a8d0de7706446213181570cdbd766691164e40/pytest-8.3.5.tar.gz", hash = "sha256:f4efe70cc14e511565ac476b57c279e12a855b11f48f212af1080ef2263d3845", size = 1450891, upload-time = "2025-03-02T12:54:54.503Z" }
 wheels = [
@@ -4797,7 +4636,7 @@ source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "attrs" },
     { name = "rpds-py" },
-    { name = "typing-extensions", marker = "python_full_version < '3.13' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/22/f5/df4e9027acead3ecc63e50fe1e36aca1523e1719559c499951bb4b53188f/referencing-0.37.0.tar.gz", hash = "sha256:44aefc3142c5b842538163acb373e24cce6632bd54bdb01b21ad5863489f50d8", size = 78036, upload-time = "2025-10-13T15:30:48.871Z" }
 wheels = [
@@ -5742,7 +5581,7 @@ name = "sympy"
 version = "1.14.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "mpmath" },
+    { name = "mpmath", marker = "sys_platform != 'linux'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/83/d3/803453b36afefb7c2bb238361cd4ae6125a569b4db67cd9e79846ba2d68c/sympy-1.14.0.tar.gz", hash = "sha256:d3d3fe8df1e5a0b42f0e7bdf50541697dbe7d23746e894990c030e2b05e72517", size = 7793921, upload-time = "2025-04-27T18:05:01.611Z" }
 wheels = [
@@ -6028,30 +5867,15 @@ name = "torch"
 version = "2.9.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "filelock" },
-    { name = "fsspec" },
-    { name = "jinja2" },
-    { name = "networkx", version = "3.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "networkx", version = "3.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "nvidia-cublas-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "nvidia-cuda-cupti-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "nvidia-cuda-nvrtc-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "nvidia-cuda-runtime-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "nvidia-cudnn-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "nvidia-cufft-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "nvidia-cufile-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "nvidia-curand-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "nvidia-cusolver-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "nvidia-cusparse-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "nvidia-cusparselt-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "nvidia-nccl-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "nvidia-nvjitlink-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "nvidia-nvshmem-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "nvidia-nvtx-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "setuptools", marker = "python_full_version >= '3.12' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "sympy" },
-    { name = "triton", marker = "sys_platform == 'never' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "typing-extensions" },
+    { name = "filelock", marker = "sys_platform != 'linux'" },
+    { name = "fsspec", marker = "sys_platform != 'linux'" },
+    { name = "jinja2", marker = "sys_platform != 'linux'" },
+    { name = "networkx", version = "3.4.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and sys_platform != 'linux') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "networkx", version = "3.5", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and sys_platform != 'linux') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "setuptools", marker = "python_full_version >= '3.12' and sys_platform != 'linux'" },
+    { name = "sympy", marker = "sys_platform != 'linux'" },
+    { name = "triton", marker = "sys_platform == 'never'" },
+    { name = "typing-extensions", marker = "sys_platform != 'linux'" },
 ]
 wheels = [
     { url = "https://files.pythonhosted.org/packages/bb/86/245c240d2138c17ed572c943c289056c2721abab70810d772c6bf5495b28/torch-2.9.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:030bbfe367379ae6a4ae4042b6c44da25383343b8b3c68abaa9c7231efbaf2dd", size = 104213554, upload-time = "2025-10-15T15:45:59.798Z" },
@@ -6103,8 +5927,8 @@ name = "torchvision"
 version = "0.24.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "numpy" },
-    { name = "pillow" },
+    { name = "numpy", marker = "sys_platform != 'linux'" },
+    { name = "pillow", marker = "sys_platform != 'linux'" },
     { name = "torch", marker = "sys_platform == 'never'" },
 ]
 wheels = [
@@ -6164,7 +5988,7 @@ name = "tqdm"
 version = "4.67.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "colorama", marker = "sys_platform == 'win32' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "colorama", marker = "sys_platform == 'win32'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/a8/4b/29b4ef32e036bb34e4ab51796dd745cdba7ed47ad142a9f4a1eb8e0c744d/tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2", size = 169737, upload-time = "2024-11-24T20:12:22.481Z" }
 wheels = [

From 056ebc55e71037cd895c5c9e1e2ec169b18bb2d3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Tue, 18 Nov 2025 18:29:53 +0000
Subject: [PATCH 144/334] ci(hotfix): Do not run on main/dev
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 .github/workflows/copyright-check.yml | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/.github/workflows/copyright-check.yml b/.github/workflows/copyright-check.yml
index a6806ea0da8..8a10cb65450 100644
--- a/.github/workflows/copyright-check.yml
+++ b/.github/workflows/copyright-check.yml
@@ -17,10 +17,8 @@ name: Copyright check
 on:
   push:
     branches:
-      - dev
-      - main
-      - "pull-request/[0-9]+"
-      - "deploy-release/*"
+      - 'pull-request/[0-9]+'
+      - 'deploy-release/*'
   merge_group:
     types: [checks_requested]
 
@@ -36,7 +34,7 @@ jobs:
       || needs.pre-flight.outputs.is_merge_group == 'true'
       || needs.pre-flight.outputs.is_deployment_workflow == 'true')
       && github.repository == 'NVIDIA/Megatron-LM'
-    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_copyright_check.yml@v0.65.13
+    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_copyright_check.yml@v0.65.12
 
   copyright-check-summary:
     needs: [pre-flight, copyright-check]
@@ -50,13 +48,8 @@ jobs:
       && github.repository == 'NVIDIA/Megatron-LM'
     runs-on: ubuntu-latest
     steps:
-      - name: Checkout repository
-        uses: actions/checkout@v4
-
       - name: Result
         env:
-          GH_TOKEN: ${{ github.token }}
-          GITHUB_RUN_ID: ${{ github.run_id }}
           SKIPPING_IS_ALLOWED: ${{ needs.pre-flight.outputs.docs_only == 'true' || needs.pre-flight.outputs.is_deployment_workflow == 'true' || needs.pre-flight.outputs.is_merge_group == 'true' || needs.pre-flight.outputs.is_ci_workload == 'true' }}
         run: |
           FAILED_JOBS=$(gh run view $GITHUB_RUN_ID --json jobs --jq '[.jobs[] | select(.status == "completed" and .conclusion != "success")] | length') || echo 0

From 5880674a396ed3489d185afaefb1773a6481fe18 Mon Sep 17 00:00:00 2001
From: Yuzhong Wang <yuzhongw@nvidia.com>
Date: Wed, 19 Nov 2025 09:52:20 +0800
Subject: [PATCH 145/334] [dev] ci(moe): Add a functional test case for
 Qwen3Next-specific features (#2240)

---
 .../gpt/linear_attention_module_specs.py      |  14 +-
 megatron/core/ssm/gated_delta_net.py          | 129 +++++++-
 .../core/transformer/transformer_config.py    |   4 +-
 megatron/training/arguments.py                |  10 +-
 megatron/training/training.py                 |  28 +-
 .../golden_values_dev_dgx_h100.json           | 287 ++++++++++++++++++
 .../model_config.yaml                         |  72 +++++
 tests/test_utils/recipes/gpt.yaml             |   5 +
 8 files changed, 504 insertions(+), 45 deletions(-)
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_gdn/golden_values_dev_dgx_h100.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_gdn/model_config.yaml

diff --git a/megatron/core/models/gpt/linear_attention_module_specs.py b/megatron/core/models/gpt/linear_attention_module_specs.py
index af23b4b2c08..7e76d845cff 100644
--- a/megatron/core/models/gpt/linear_attention_module_specs.py
+++ b/megatron/core/models/gpt/linear_attention_module_specs.py
@@ -4,7 +4,6 @@
 
 from megatron.core.models.backends import BackendSpecProvider
 from megatron.core.ssm.gated_delta_net import GatedDeltaNet, GatedDeltaNetSubmodules
-from megatron.core.ssm.mamba_mixer import MambaMixer, MambaMixerSubmodules
 from megatron.core.transformer.spec_utils import ModuleSpec
 
 
@@ -13,18 +12,7 @@ def get_linear_attention_module_spec_for_backend(
 ) -> ModuleSpec:
     """Helper function to get module spec for Linear Attention"""
     rms_norm = normalization == "RMSNorm"
-    if linear_attention_type == "mamba":
-        attention = (
-            ModuleSpec(
-                module=MambaMixer,
-                submodules=MambaMixerSubmodules(
-                    in_proj=backend.column_parallel_layer_norm_linear(),
-                    out_proj=backend.row_parallel_linear(),
-                ),
-                metainfo={"fuse_input_layernorm": True},
-            ),
-        )
-    elif linear_attention_type == "gated_delta_net":
+    if linear_attention_type == "gated_delta_net":
         attention = ModuleSpec(
             module=GatedDeltaNet,
             submodules=GatedDeltaNetSubmodules(
diff --git a/megatron/core/ssm/gated_delta_net.py b/megatron/core/ssm/gated_delta_net.py
index 1bef6416ded..dfa6e4c35e4 100644
--- a/megatron/core/ssm/gated_delta_net.py
+++ b/megatron/core/ssm/gated_delta_net.py
@@ -328,7 +328,7 @@ def forward(
         # Convolution on qkv
         qkv = qkv.transpose(1, 2).contiguous()  # b, s, d -> b, d, s
         nvtx_range_push(suffix="conv1d")
-        if causal_conv1d_fn is None:
+        if (causal_conv1d_fn is None) or self.config.deterministic_mode:
             qkv = self.act_fn(self.conv1d(qkv)[..., :seq_len])
         else:
             assert self.activation in ["silu", "swish"]
@@ -372,16 +372,28 @@ def forward(
         nvtx_range_pop(suffix="g_and_beta")
 
         nvtx_range_push(suffix="gated_delta_rule")
-        core_attn_out, last_recurrent_state = chunk_gated_delta_rule(
-            query,
-            key,
-            value,
-            g=g,
-            beta=beta,
-            initial_state=None,
-            output_final_state=False,
-            use_qk_l2norm_in_kernel=False,
-        )
+        if self.config.deterministic_mode:
+            core_attn_out, last_recurrent_state = torch_chunk_gated_delta_rule(
+                query,
+                key,
+                value,
+                g=g,
+                beta=beta,
+                initial_state=None,
+                output_final_state=False,
+                use_qk_l2norm_in_kernel=False,
+            )
+        else:
+            core_attn_out, last_recurrent_state = chunk_gated_delta_rule(
+                query,
+                key,
+                value,
+                g=g,
+                beta=beta,
+                initial_state=None,
+                output_final_state=False,
+                use_qk_l2norm_in_kernel=False,
+            )
         nvtx_range_pop(suffix="gated_delta_rule")
 
         # RMSNorm
@@ -560,3 +572,98 @@ def sh_ten_merge_fn(sub_state_dict):
     return ShardedTensorFactory(
         orig_sh_ten.key, orig_sh_ten.data, sh_ten_build_fn, sh_ten_merge_fn, orig_sh_ten.replica_id
     )
+
+
+def torch_chunk_gated_delta_rule(
+    query,
+    key,
+    value,
+    g,
+    beta,
+    chunk_size=64,
+    initial_state=None,
+    output_final_state=False,
+    use_qk_l2norm_in_kernel=False,
+):
+    # pylint: disable=line-too-long
+    '''
+    Torch-native implementation of chunked gated delta rule for deterministic mode.
+    Need this because FLA is not deterministic.
+
+    Reference: https://github.com/huggingface/transformers/blob/144c8ce2809a2e21914017652700e1ecb450501e/src/transformers/models/qwen3_next/modeling_qwen3_next.py#L470-L547
+    '''
+
+    initial_dtype = query.dtype
+    if use_qk_l2norm_in_kernel:
+        query = l2norm(query, dim=-1, eps=1e-6)
+        key = l2norm(key, dim=-1, eps=1e-6)
+    query, key, value, beta, g = [
+        x.transpose(1, 2).contiguous().to(torch.float32) for x in (query, key, value, beta, g)
+    ]
+
+    batch_size, num_heads, sequence_length, k_head_dim = key.shape
+    v_head_dim = value.shape[-1]
+    pad_size = (chunk_size - sequence_length % chunk_size) % chunk_size
+    query = F.pad(query, (0, 0, 0, pad_size))
+    key = F.pad(key, (0, 0, 0, pad_size))
+    value = F.pad(value, (0, 0, 0, pad_size))
+    beta = F.pad(beta, (0, pad_size))
+    g = F.pad(g, (0, pad_size))
+    total_sequence_length = sequence_length + pad_size
+    scale = 1 / (query.shape[-1] ** 0.5)
+    query = query * scale
+
+    v_beta = value * beta.unsqueeze(-1)
+    k_beta = key * beta.unsqueeze(-1)
+    # reshape to chunks
+    query, key, value, k_beta, v_beta = [
+        x.reshape(x.shape[0], x.shape[1], -1, chunk_size, x.shape[-1])
+        for x in (query, key, value, k_beta, v_beta)
+    ]
+    g = g.reshape(g.shape[0], g.shape[1], -1, chunk_size)
+    mask = torch.triu(
+        torch.ones(chunk_size, chunk_size, dtype=torch.bool, device=query.device), diagonal=0
+    )
+
+    # chunk decay
+    g = g.cumsum(dim=-1)
+    decay_mask = ((g.unsqueeze(-1) - g.unsqueeze(-2)).tril().exp().float()).tril()
+    attn = -((k_beta @ key.transpose(-1, -2)) * decay_mask).masked_fill(mask, 0)
+    for i in range(1, chunk_size):
+        row = attn[..., i, :i].clone()
+        sub = attn[..., :i, :i].clone()
+        attn[..., i, :i] = row + (row.unsqueeze(-1) * sub).sum(-2)
+    attn = attn + torch.eye(chunk_size, dtype=attn.dtype, device=attn.device)
+    value = attn @ v_beta
+    k_cumdecay = attn @ (k_beta * g.exp().unsqueeze(-1))
+    last_recurrent_state = (
+        torch.zeros(batch_size, num_heads, k_head_dim, v_head_dim).to(value)
+        if initial_state is None
+        else initial_state.to(value)
+    )
+    core_attn_out = torch.zeros_like(value)
+    mask = torch.triu(
+        torch.ones(chunk_size, chunk_size, dtype=torch.bool, device=query.device), diagonal=1
+    )
+
+    # for each chunk
+    for i in range(0, total_sequence_length // chunk_size):
+        q_i, k_i, v_i = query[:, :, i], key[:, :, i], value[:, :, i]
+        attn = (q_i @ k_i.transpose(-1, -2) * decay_mask[:, :, i]).masked_fill_(mask, 0)
+        v_prime = (k_cumdecay[:, :, i]) @ last_recurrent_state
+        v_new = v_i - v_prime
+        attn_inter = (q_i * g[:, :, i, :, None].exp()) @ last_recurrent_state
+        core_attn_out[:, :, i] = attn_inter + attn @ v_new
+        last_recurrent_state = (
+            last_recurrent_state * g[:, :, i, -1, None, None].exp()
+            + (k_i * (g[:, :, i, -1, None] - g[:, :, i]).exp()[..., None]).transpose(-1, -2) @ v_new
+        )
+
+    if not output_final_state:
+        last_recurrent_state = None
+    core_attn_out = core_attn_out.reshape(
+        core_attn_out.shape[0], core_attn_out.shape[1], -1, core_attn_out.shape[-1]
+    )
+    core_attn_out = core_attn_out[:, :, :sequence_length]
+    core_attn_out = core_attn_out.transpose(1, 2).contiguous().to(initial_dtype)
+    return core_attn_out, last_recurrent_state
diff --git a/megatron/core/transformer/transformer_config.py b/megatron/core/transformer/transformer_config.py
index 71f8255b584..fae2e2f5d4d 100644
--- a/megatron/core/transformer/transformer_config.py
+++ b/megatron/core/transformer/transformer_config.py
@@ -856,7 +856,7 @@ def __post_init__(self):
             )
 
         if self.linear_attention_type is not None:
-            supported_la_types = ["gated_delta_net", "mamba"]
+            supported_la_types = ["gated_delta_net"]
             assert self.linear_attention_type in supported_la_types, (
                 f"linear_attention_type ({self.linear_attention_type}) only support"
                 f" one of {supported_la_types}."
@@ -900,8 +900,6 @@ def __post_init__(self):
                     f"Gated delta net does not support context parallel for now,"
                     f" but got {self.context_parallel_size=}."
                 )
-            elif self.linear_attention_type == "mamba":
-                raise NotImplementedError("Mamba is not supported yet.")
 
         if self.fp8:
             # cannot support first last layer bf16 with delayed scaling
diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py
index 9398e1939ea..bb1b17e9ba2 100644
--- a/megatron/training/arguments.py
+++ b/megatron/training/arguments.py
@@ -1200,6 +1200,8 @@ def validate_args(args, defaults={}):
         assert not args.use_torch_fsdp2, "Muon optimizer does not support Torch-FSDP2 for now."
         assert not args.use_megatron_fsdp, "Muon optimizer does not support Megatron-FSDP for now."
         assert args.ckpt_format in ["torch", "torch_dist"], "Muon optimizer supports torch and torch_dist checkpoint format."
+        assert args.linear_attention_type is None, "Muon optimizer does not support linear attention type for now."
+        assert not args.attention_output_gate, "Muon optimizer does not support attention output gate for now."
 
     # Optimizer CPU offload check
     if args.optimizer_cpu_offload:
@@ -1977,11 +1979,11 @@ def _add_regularization_args(parser):
     group.add_argument('--weight-decay-incr-style', type=str, default='constant',
                        choices=['constant', 'linear', 'cosine'],
                        help='Weight decay increment function.')
-    group.add_argument('--no-weight-decay-cond-type', type=str, choices=['qwen3_next'],
+    group.add_argument('--no-weight-decay-cond-type', type=str, choices=['apply_wd_to_qk_layernorm'],
                        help='Type of no weight decay condition. Choices: '
                        'None (default): param no weight decay if and only if it is 1D; or it is bias; '
                        'or it is embedding and embedding_init_method_std is not None. '
-                       '"qwen3_next": In addition to the default rules, '
+                       '"apply_wd_to_qk_layernorm": In addition to the default rules, '
                        'apply weight decay to qk layernorm as a special case.')
     group.add_argument('--clip-grad', type=float, default=1.0,
                        help='Gradient clipping based on global L2 norm.')
@@ -3351,8 +3353,8 @@ def _add_mla_args(parser):
 
 def _add_linear_attention_args(parser):
     group = parser.add_argument_group(title="la")
-    group.add_argument('--linear-attention-type', default=None, choices=['gated_delta_net', 'mamba'], type=str,
-                       help='Type of linear attention to use. Currently support gated_delta_net and mamba.')
+    group.add_argument('--linear-attention-type', default=None, choices=['gated_delta_net'], type=str,
+                       help='Type of linear attention to use. Currently support gated_delta_net.')
     group.add_argument('--linear-attention-freq', type=la_freq_type, default=None,
                        help='Frequency between LA (linear attention) layers and'
                             ' SDPA (scaled dot-product attention) layers. Accepts either: '
diff --git a/megatron/training/training.py b/megatron/training/training.py
index fd2184767f2..9986f931641 100644
--- a/megatron/training/training.py
+++ b/megatron/training/training.py
@@ -594,15 +594,15 @@ def reorder_inner_param_groups(optimizer_state_dict):
     return preprocessed_common_state_dict
 
 
-def get_no_wd_decay_cond(no_wd_decay_cond_type, default_skip_embedding_weight_decay):
+def get_no_weight_decay_cond(no_weight_decay_cond_type, default_skip_embedding_weight_decay):
     """Get the no weight decay condition function."""
 
-    # Default case: no_wd_decay_cond_type is None
-    no_wd_decay_cond_fn = None
+    # Default case: no_weight_decay_cond_type is None
+    no_weight_decay_cond_fn = None
 
-    if no_wd_decay_cond_type == 'qwen3_next':
+    if no_weight_decay_cond_type == 'apply_wd_to_qk_layernorm':
         # Qwen3-Next applies weight decay to qk layernorm as a special case
-        def qwen3_next_no_wd_decay_cond(name, param):
+        def apply_wd_to_qk_layernorm_fn(name, param):
             if "q_layernorm" in name or "k_layernorm" in name:
                 no_wd = False
             else:
@@ -612,11 +612,11 @@ def qwen3_next_no_wd_decay_cond(name, param):
                     or (default_skip_embedding_weight_decay and "embedding" in name)
                 )
             return no_wd
-        no_wd_decay_cond_fn = qwen3_next_no_wd_decay_cond
-    elif no_wd_decay_cond_type is not None:
-        raise ValueError(f"Invalid no_wd_decay_cond_type: {no_wd_decay_cond_type}")
+        no_weight_decay_cond_fn = apply_wd_to_qk_layernorm_fn
+    elif no_weight_decay_cond_type is not None:
+        raise ValueError(f"Invalid no_weight_decay_cond_type: {no_weight_decay_cond_type}")
 
-    return no_wd_decay_cond_fn
+    return no_weight_decay_cond_fn
 
 def pretrain(
     train_valid_test_dataset_provider,
@@ -754,7 +754,7 @@ def pretrain(
 
     # Model, optimizer, and learning rate.
     timers('model-and-optimizer-setup', log_level=0).start(barrier=True)
-    no_wd_decay_cond = get_no_wd_decay_cond(
+    no_weight_decay_cond = get_no_weight_decay_cond(
         args.no_weight_decay_cond_type,
         default_skip_embedding_weight_decay=args.embedding_init_method_std is not None,
     )
@@ -762,7 +762,7 @@ def pretrain(
         model_provider,
         model_type,
         checkpointing_context=checkpointing_context,
-        no_wd_decay_cond=no_wd_decay_cond,
+        no_weight_decay_cond=no_weight_decay_cond,
     )
 
     timers('model-and-optimizer-setup').stop()
@@ -1181,7 +1181,7 @@ def get_optimizer_param_scheduler(optimizer):
 def setup_model_and_optimizer(
     model_provider_func,
     model_type,
-    no_wd_decay_cond=None,
+    no_weight_decay_cond=None,
     scale_lr_cond=None,
     lr_mult=1.0,
     checkpointing_context=None,
@@ -1206,7 +1206,7 @@ def setup_model_and_optimizer(
         optimizer = get_megatron_optimizer(
             config,
             model,
-            no_wd_decay_cond,
+            no_weight_decay_cond,
             scale_lr_cond,
             lr_mult,
             use_gloo_process_groups=args.enable_gloo_process_groups,
@@ -1219,7 +1219,7 @@ def setup_model_and_optimizer(
         optimizer = get_megatron_muon_optimizer(
             config,
             model,
-            no_wd_decay_cond,
+            no_weight_decay_cond,
             scale_lr_cond,
             lr_mult,
             use_gloo_process_groups=args.enable_gloo_process_groups,
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_gdn/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_gdn/golden_values_dev_dgx_h100.json
new file mode 100644
index 00000000000..e836165b1af
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_gdn/golden_values_dev_dgx_h100.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.94549,
+            "2": 10.94266,
+            "3": 10.95029,
+            "4": 10.92935,
+            "5": 10.94226,
+            "6": 10.94118,
+            "7": 10.92599,
+            "8": 10.93843,
+            "9": 10.92667,
+            "10": 10.95239,
+            "11": 10.9316,
+            "12": 10.93754,
+            "13": 10.92806,
+            "14": 10.93106,
+            "15": 10.92268,
+            "16": 10.93309,
+            "17": 10.92783,
+            "18": 10.93162,
+            "19": 10.92174,
+            "20": 10.9222,
+            "21": 10.91749,
+            "22": 10.89939,
+            "23": 10.91334,
+            "24": 10.90584,
+            "25": 10.89761,
+            "26": 10.90421,
+            "27": 10.90329,
+            "28": 10.87234,
+            "29": 10.89828,
+            "30": 10.85482,
+            "31": 10.74433,
+            "32": 10.85937,
+            "33": 10.87082,
+            "34": 10.78866,
+            "35": 10.80404,
+            "36": 10.78603,
+            "37": 10.83611,
+            "38": 10.77081,
+            "39": 10.85659,
+            "40": 10.72227,
+            "41": 10.72701,
+            "42": 10.78348,
+            "43": 10.58371,
+            "44": 10.69609,
+            "45": 10.60756,
+            "46": 10.55935,
+            "47": 10.72505,
+            "48": 10.58391,
+            "49": 10.40808,
+            "50": 10.63209
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 22806516.0,
+            "2": 23004070.0,
+            "3": 22675828.0,
+            "4": 23298692.0,
+            "5": 22793918.0,
+            "6": 23100284.0,
+            "7": 22849388.0,
+            "8": 23004824.0,
+            "9": 22919836.0,
+            "10": 22997154.0,
+            "11": 22579508.0,
+            "12": 22537754.0,
+            "13": 22996688.0,
+            "14": 22467402.0,
+            "15": 22900118.0,
+            "16": 22909232.0,
+            "17": 22897812.0,
+            "18": 22661628.0,
+            "19": 22697360.0,
+            "20": 22773234.0,
+            "21": 22818520.0,
+            "22": 22878406.0,
+            "23": 22618508.0,
+            "24": 22849596.0,
+            "25": 22897480.0,
+            "26": 22626820.0,
+            "27": 22547392.0,
+            "28": 22531804.0,
+            "29": 22606952.0,
+            "30": 22710502.0,
+            "31": 23033192.0,
+            "32": 22663120.0,
+            "33": 22637648.0,
+            "34": 22914116.0,
+            "35": 22866052.0,
+            "36": 22667304.0,
+            "37": 22575802.0,
+            "38": 22974080.0,
+            "39": 22879488.0,
+            "40": 22736406.0,
+            "41": 22737628.0,
+            "42": 22745946.0,
+            "43": 23054018.0,
+            "44": 22825168.0,
+            "45": 22753408.0,
+            "46": 22962704.0,
+            "47": 22712868.0,
+            "48": 23007200.0,
+            "49": 22805320.0,
+            "50": 22983010.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 739501056.0,
+            "2": 739501056.0,
+            "3": 739501056.0,
+            "4": 739501056.0,
+            "5": 739501056.0,
+            "6": 739501056.0,
+            "7": 739501056.0,
+            "8": 739501056.0,
+            "9": 739501056.0,
+            "10": 739501056.0,
+            "11": 739501056.0,
+            "12": 739501056.0,
+            "13": 739501056.0,
+            "14": 739501056.0,
+            "15": 739501056.0,
+            "16": 739501056.0,
+            "17": 739501056.0,
+            "18": 739501056.0,
+            "19": 739501056.0,
+            "20": 739501056.0,
+            "21": 739501056.0,
+            "22": 739501056.0,
+            "23": 739501056.0,
+            "24": 739501056.0,
+            "25": 739501056.0,
+            "26": 739501056.0,
+            "27": 739501056.0,
+            "28": 739501056.0,
+            "29": 739501056.0,
+            "30": 739501056.0,
+            "31": 739501056.0,
+            "32": 739501056.0,
+            "33": 739501056.0,
+            "34": 739501056.0,
+            "35": 739501056.0,
+            "36": 739501056.0,
+            "37": 739501056.0,
+            "38": 739501056.0,
+            "39": 739501056.0,
+            "40": 739501056.0,
+            "41": 739501056.0,
+            "42": 739501056.0,
+            "43": 739501056.0,
+            "44": 739501056.0,
+            "45": 739501056.0,
+            "46": 739501056.0,
+            "47": 739501056.0,
+            "48": 739501056.0,
+            "49": 739501056.0,
+            "50": 739501056.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 2185745408.0,
+            "2": 2467083264.0,
+            "3": 2467083264.0,
+            "4": 2467083264.0,
+            "5": 2467083264.0,
+            "6": 2467083264.0,
+            "7": 2467083264.0,
+            "8": 2467083264.0,
+            "9": 2467083264.0,
+            "10": 2467083264.0,
+            "11": 2467083264.0,
+            "12": 2467083264.0,
+            "13": 2467083264.0,
+            "14": 2467083264.0,
+            "15": 2467083264.0,
+            "16": 2467083264.0,
+            "17": 2467083264.0,
+            "18": 2467083264.0,
+            "19": 2467083264.0,
+            "20": 2467083264.0,
+            "21": 2467083264.0,
+            "22": 2467083264.0,
+            "23": 2467083264.0,
+            "24": 2467083264.0,
+            "25": 2467083264.0,
+            "26": 2467083264.0,
+            "27": 2467083264.0,
+            "28": 2467083264.0,
+            "29": 2467083264.0,
+            "30": 2467083264.0,
+            "31": 2467083264.0,
+            "32": 2467083264.0,
+            "33": 2467083264.0,
+            "34": 2467083264.0,
+            "35": 2467083264.0,
+            "36": 2467083264.0,
+            "37": 2467083264.0,
+            "38": 2467083264.0,
+            "39": 2467083264.0,
+            "40": 2467083264.0,
+            "41": 2467083264.0,
+            "42": 2467083264.0,
+            "43": 2467083264.0,
+            "44": 2467083264.0,
+            "45": 2467083264.0,
+            "46": 2467083264.0,
+            "47": 2467083264.0,
+            "48": 2467083264.0,
+            "49": 2467083264.0,
+            "50": 2467083264.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 37.98779,
+            "2": 0.44183,
+            "3": 0.41794,
+            "4": 0.41574,
+            "5": 0.41502,
+            "6": 0.41403,
+            "7": 0.41636,
+            "8": 0.41731,
+            "9": 0.41907,
+            "10": 0.41341,
+            "11": 0.41278,
+            "12": 0.41269,
+            "13": 0.41248,
+            "14": 0.4133,
+            "15": 0.4156,
+            "16": 0.41652,
+            "17": 0.41625,
+            "18": 0.41902,
+            "19": 0.41584,
+            "20": 0.41729,
+            "21": 0.42212,
+            "22": 0.41334,
+            "23": 0.41588,
+            "24": 0.41641,
+            "25": 0.41859,
+            "26": 0.41721,
+            "27": 0.40783,
+            "28": 0.40735,
+            "29": 0.4046,
+            "30": 0.40445,
+            "31": 0.41196,
+            "32": 0.40703,
+            "33": 0.40362,
+            "34": 0.4043,
+            "35": 0.40787,
+            "36": 0.4094,
+            "37": 0.40514,
+            "38": 0.40653,
+            "39": 0.40616,
+            "40": 0.40471,
+            "41": 0.40633,
+            "42": 0.40318,
+            "43": 0.40362,
+            "44": 0.40095,
+            "45": 0.40173,
+            "46": 0.4018,
+            "47": 0.40121,
+            "48": 0.3989,
+            "49": 0.39861,
+            "50": 0.39894
+        }
+    }
+}
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_gdn/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_gdn/model_config.yaml
new file mode 100644
index 00000000000..8c5838748d1
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_gdn/model_config.yaml
@@ -0,0 +1,72 @@
+ENV_VARS:
+  CUDA_DEVICE_MAX_CONNECTIONS: 1
+  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
+  NCCL_ALGO: Ring
+  CUBLAS_WORKSPACE_CONFIG: :4096:8
+MODEL_ARGS:
+  # Add network size args
+  --untie-embeddings-and-output-weights: true
+  --num-layers: 6
+  --hidden-size: 512
+  --num-attention-heads: 8
+  --group-query-attention: true
+  --num-query-groups: 2
+  --swiglu: true
+  --position-embedding-type: rope
+  --rotary-percent: 0.5
+  --no-rope-fusion: true #TODO: We can remove this once upgrading to the DEV container
+  --apply-layernorm-1p: true
+  --attention-output-gate: true
+  --no-weight-decay-cond-type: apply_wd_to_qk_layernorm
+  --linear-attention-type: gated_delta_net
+  --linear-attention-freq: 3
+  --linear-conv-kernel-dim: 4
+  --linear-key-head-dim: 64
+  --linear-value-head-dim: 64
+  --linear-num-key-heads: 4
+  --linear-num-value-heads: 8
+  # Add logging args
+  --log-params-norm: true
+  --log-num-zeros-in-grad: true
+  --log-validation-ppl-to-tensorboard: true
+  --log-timers-to-tensorboard: true
+  --tensorboard-dir: ${TENSORBOARD_PATH}
+  --micro-batch-size: 4
+  --global-batch-size: 32
+  --seq-length: 1024
+  --max-position-embeddings: 1024
+  --train-iters: 50
+  --timing-log-level: 0
+  --lr-decay-iters: 320000
+  --save: ${CHECKPOINT_SAVE_PATH}
+  --load: ${CHECKPOINT_LOAD_PATH}
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
+  --split: 949,50,1
+  --distributed-backend: nccl
+  --lr: 0.00015
+  --lr-decay-style: cosine
+  --min-lr: 1.0e-5
+  --weight-decay: 1e-2
+  --clip-grad: 1.0
+  --lr-warmup-fraction: .01
+  --log-interval: 1
+  --save-interval: 25
+  --eval-interval: 1000
+  --eval-iters: 10
+  --transformer-impl: transformer_engine
+  --tensor-model-parallel-size: 2
+  --pipeline-model-parallel-size: 1
+  --sequence-parallel: true
+  --untie-embeddings-and-output-weights: true
+  --deterministic-mode: true
+  --no-gradient-accumulation-fusion: true
+  --attention-softmax-in-fp32: true
+  --use-mcore-models: true
+  --ckpt-format: torch_dist
+  --data-cache-path: ${DATA_CACHE_PATH}
+  --bf16: true
+  --attention-backend: unfused
+  --log-memory-to-tensorboard: true
+TEST_TYPE: ckpt-resume
diff --git a/tests/test_utils/recipes/gpt.yaml b/tests/test_utils/recipes/gpt.yaml
index 2e011372e22..242eb5acee0 100644
--- a/tests/test_utils/recipes/gpt.yaml
+++ b/tests/test_utils/recipes/gpt.yaml
@@ -337,6 +337,11 @@ products:
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
+  - test_case: [gpt3_mcore_te_tp2_pp1_gdn]
+    products:
+      - environment: [dev]
+        scope: [mr, mr-github]
+        platforms: [dgx_h100]
   - test_case: [gpt3_mcore_te_tp2_pp2_mla]
     products:
       - environment: [dev]

From a4fce1d969b58dcd454a791df801b240cff42702 Mon Sep 17 00:00:00 2001
From: Deyu Fu <Deyu.Foo@gmail.com>
Date: Thu, 20 Nov 2025 10:19:17 +0800
Subject: [PATCH 146/334] [DEV] fix layerwise torch_dist checkpointing fails
 due to empty rank (#2255)

---
 .../core/optimizer/layer_wise_optimizer.py    | 61 ++++++++++++++-----
 1 file changed, 47 insertions(+), 14 deletions(-)

diff --git a/megatron/core/optimizer/layer_wise_optimizer.py b/megatron/core/optimizer/layer_wise_optimizer.py
index e43601d90fd..de4396a5b4f 100644
--- a/megatron/core/optimizer/layer_wise_optimizer.py
+++ b/megatron/core/optimizer/layer_wise_optimizer.py
@@ -1,12 +1,13 @@
 # Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
+import logging
 from typing import Callable, List, Optional
 
 import torch
 from torch._utils import _flatten_dense_tensors, _unflatten_dense_tensors
 
 from megatron.core.dist_checkpointing.dict_utils import nested_values
-from megatron.core.dist_checkpointing.mapping import ShardedStateDict
+from megatron.core.dist_checkpointing.mapping import LocalNonpersistentObject, ShardedStateDict
 from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.utils import get_pg_rank, get_pg_size
 
@@ -19,6 +20,8 @@
 )
 from .optimizer_config import OptimizerConfig
 
+logger = logging.getLogger(__name__)
+
 
 class LayerWiseDistributedOptimizer(ChainedOptimizer):
     """Layer-wise distributed optimizer for Megatron-core models.
@@ -225,6 +228,23 @@ def step(self):  # type: ignore[no-untyped-def]
 
         return update_successful, grad_norm, num_zeros_in_grad
 
+    # TODO(deyuf): need to improve dist checkpointing design to properly handle this
+    # fp32_from_fp16_params is list, each sub list could be empty if group is empty
+    # this breaks dist checkpointing assumption since extract_sharded_base drop list structure
+    # for now, we convert it to dict with index as key and convert back in load_state_dict
+    def load_state_dict(self, state_dict):
+        if len(self.chained_optimizers) == 1:
+            wrapped_state_dict = {1: state_dict}
+        else:
+            wrapped_state_dict = state_dict
+        for sd in wrapped_state_dict.values():
+            if 'fp32_from_fp16_params' in sd and isinstance(sd['fp32_from_fp16_params'], dict):
+                logger.info('[layerwise] converting fp32_from_fp16_params from dict to list')
+                sd['fp32_from_fp16_params'] = [
+                    v for k, v in sorted(sd['fp32_from_fp16_params'].items())
+                ]
+        super().load_state_dict(state_dict)
+
     def sharded_state_dict(
         self, model_sharded_state_dict: ShardedStateDict, is_loading: bool = False, **kwargs
     ):
@@ -246,27 +266,40 @@ def sharded_state_dict(
                     0 if isinstance(sh_base.replica_id, int) else (*sh_base.replica_id[:2], 0)
                 )
 
+        # later code assume list but chained optimizer fallback to non-list if there's only one
         if len(self.chained_optimizers) == 1:
             wrapped_sharded_state_dict = {1: sharded_state_dict}
         else:
             wrapped_sharded_state_dict = sharded_state_dict
-        # Adjust dict due to possible empty rank 0 which output common_dict
+
+        # Adjust dict rank 0 output correct global metadata into common_dict
         for sd in wrapped_sharded_state_dict.values():
-            # Drop empty group state to avoid save in common dict (non-empty rank still save)
+            # wrap empty containers into LocalNonpersistentObject so it won't be saved/loaded
+            # params is already wrapped, we only need to handle fp32_from_fp16_params and state
+            # more details in load_state_dict comment
             if 'fp32_from_fp16_params' in sd:
                 sd['fp32_from_fp16_params'][:] = [
-                    group for group in sd['fp32_from_fp16_params'] if group
+                    group if group else LocalNonpersistentObject(group)
+                    for group in sd['fp32_from_fp16_params']
                 ]
-            # TODO(deyuf): 'common_step' code path is broken and 'step' is saved in 'param_groups'
-            # Find next 'step' if present. note this still break if rank0 adam is fully empty
-            step = next(
-                (group['step'] for group in sd['optimizer']['param_groups'] if 'step' in group),
-                None,
-            )
-            if step is not None:
-                for group in sd['optimizer']['param_groups']:
-                    group['step'] = step
-
+                sd['fp32_from_fp16_params'] = {
+                    i: v for i, v in enumerate(sd['fp32_from_fp16_params'])
+                }
+            # state is a single dict and will be empty if optimizer is fully empty
+            if not sd['optimizer']['state']:
+                sd['optimizer']['state'] = LocalNonpersistentObject(sd['optimizer']['state'])
+            # group keys(e.g. 'step') might be missing or not updated
+            for i, group in enumerate(sd['optimizer']['param_groups']):
+                # keep local param tensor so we only gather metadata
+                local_params = group.pop('params')
+                # save whether this group is empty, so we can use non-empty rank for metadata
+                group['params'] = bool(local_params.unwrap())
+                all_rank_groups = [None for _ in range(torch.distributed.get_world_size())]
+                torch.distributed.all_gather_object(all_rank_groups, group)
+                # find first non-empty group if it exists
+                nonempty_rank_group = next((g for g in all_rank_groups if g['params']), group)
+                nonempty_rank_group['params'] = local_params
+                sd['optimizer']['param_groups'][i] = nonempty_rank_group
         return sharded_state_dict
 
     def save_state_dict_to_file(self, filename: str) -> None:

From c6e2b29c9195be0befa57d2ccd1110c7b58efd9a Mon Sep 17 00:00:00 2001
From: xuwchen <79835960+xuwchen@users.noreply.github.com>
Date: Thu, 20 Nov 2025 12:18:20 +0800
Subject: [PATCH 147/334] [Dev] fix(megatron-fsdp): Resolve hang caused by
 non-deterministic reduce-scatter (#2252)

Co-authored-by: Jianbin Chang <shjwudp@gmail.com>
Co-authored-by: Zijie Yan <zijiey@nvidia.com>
---
 .../fsdp/src/megatron_fsdp/param_and_grad_buffer.py            | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py b/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py
index bdf480d867b..6a294b69602 100644
--- a/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py
+++ b/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py
@@ -2782,6 +2782,9 @@ def reduce_gradients(
             outer_fsdp_group_grad_reduce (bool, optional): Whether to reduce gradients
                 across outer-DP groups. Defaults to False.
         """
+        # Sort parameters by their bucket IDs to ensure a deterministic processing order.
+        # Performing reduce-scatter operations out of order can lead to hangs.
+        params = sorted(list(params), key=lambda x: self.buffer.param_to_param_group[x])
         for param in params:
             bucket_id = self.buffer.param_to_param_group[param]
             param_group = self.buffer.parameter_groups[bucket_id]

From c6f277a7f869274c19aace594582d9938b06abac Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Thu, 20 Nov 2025 11:37:32 +0100
Subject: [PATCH 148/334] ci: Disable flaky unit test (#2338)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 tests/test_utils/recipes/gpt.yaml                | 4 ++--
 tests/unit_tests/transformer/test_cuda_graphs.py | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/test_utils/recipes/gpt.yaml b/tests/test_utils/recipes/gpt.yaml
index 242eb5acee0..0b3606fd702 100644
--- a/tests/test_utils/recipes/gpt.yaml
+++ b/tests/test_utils/recipes/gpt.yaml
@@ -3,7 +3,7 @@ format_version: 1
 maintainers: [mcore]
 loggers: [stdout]
 spec:
-  name: "{test_case}_{environment}_{platforms}"
+  name: '{test_case}_{environment}_{platforms}'
   model: gpt
   build: mcore-pyt-{environment}
   nodes: 1
@@ -340,7 +340,7 @@ products:
   - test_case: [gpt3_mcore_te_tp2_pp1_gdn]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr-broken, mr-github-broken]
         platforms: [dgx_h100]
   - test_case: [gpt3_mcore_te_tp2_pp2_mla]
     products:
diff --git a/tests/unit_tests/transformer/test_cuda_graphs.py b/tests/unit_tests/transformer/test_cuda_graphs.py
index fb3567074f1..3ad0262a1cf 100644
--- a/tests/unit_tests/transformer/test_cuda_graphs.py
+++ b/tests/unit_tests/transformer/test_cuda_graphs.py
@@ -652,6 +652,7 @@ def capture_cuda_graphs(self, cuda_graph_capture_freeze_gc: bool) -> None:
         return engine.capture_stats
 
     @pytest.mark.flaky_in_dev  # Issue #2855
+    @pytest.mark.flaky
     @pytest.mark.experimental
     @pytest.mark.skipif(
         not is_fa_min_version("2.7.3"), reason="need latest flash attn for dynamic batching"

From 716bb4a18b141c8eed6415978094cae60917b72a Mon Sep 17 00:00:00 2001
From: Pablo Garay <pagaray@nvidia.com>
Date: Thu, 20 Nov 2025 14:47:12 -0800
Subject: [PATCH 149/334] feat: check: api backwards compatibility [dev]
 (#2341)

Signed-off-by: Pablo Garay <pagaray@nvidia.com>
---
 ...k_api_backwards_compatibility_workflow.yml | 171 +++++++++
 docs/api-backwards-compatibility-check.md     | 310 +++++++++++++++
 megatron/core/utils.py                        | 103 +++++
 scripts/check_api_backwards_compatibility.py  | 353 ++++++++++++++++++
 .../test_api_backwards_compat_setup.py        | 173 +++++++++
 5 files changed, 1110 insertions(+)
 create mode 100644 .github/workflows/check_api_backwards_compatibility_workflow.yml
 create mode 100644 docs/api-backwards-compatibility-check.md
 create mode 100644 scripts/check_api_backwards_compatibility.py
 create mode 100644 tests/unit_tests/test_api_backwards_compat_setup.py

diff --git a/.github/workflows/check_api_backwards_compatibility_workflow.yml b/.github/workflows/check_api_backwards_compatibility_workflow.yml
new file mode 100644
index 00000000000..381976340fa
--- /dev/null
+++ b/.github/workflows/check_api_backwards_compatibility_workflow.yml
@@ -0,0 +1,171 @@
+name: API Compatibility Check
+
+on:
+  push:
+    branches:
+      - "pull-request/[0-9]+"
+    paths:
+      - 'megatron/core/**/*.py'
+      - '!megatron/core/tests/**'
+      - '!megatron/legacy/**'
+
+  # Allow manual trigger
+  workflow_dispatch:
+    inputs:
+      baseline:
+        description: 'Baseline git reference (tag/branch/commit)'
+        required: true
+
+jobs:
+  check-compatibility:
+    name: Check API Backward Compatibility
+    runs-on: ubuntu-latest
+    
+    # ============================================================================
+    # Configuration Parameters (modify here)
+    # ============================================================================
+    env:
+      # Default baseline for automatic PR checks
+      # Can be: branch name (e.g., 'main'), commit hash, or tag
+      # Will be resolved to commit hash during execution
+      DEFAULT_BASELINE: 'c6f277a7f869274c19aace594582d9938b06abac'
+      # Tag pattern for auto-detection (e.g., 'core_r*', 'core_v*')
+      TAG_PATTERN: 'core_v*'
+      # Tag regex filter (e.g., '^core_v[0-9]+\.[0-9]+\.[0-9]+$' for stable versions only)
+      TAG_REGEX_FILTER: '^core_v[0-9]+\.[0-9]+\.[0-9]+$'
+    # ============================================================================
+    
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0  # Need full history to access baseline ref
+      
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.12'
+      
+      - name: Install griffe
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install griffe
+          python -c "import griffe; print('Griffe installed successfully')"
+          python -c "from griffe import Object; print('Object import successful')" || echo "Object import from griffe failed"
+          python -c "from griffe.dataclasses import Object; print('Object import from dataclasses successful')" || echo "Object import from dataclasses failed"
+      
+      - name: Determine baseline reference
+        id: baseline
+        run: |
+          if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then
+            # Use manually specified baseline (branch, tag, or commit hash)
+            BASELINE_REF="${{ github.event.inputs.baseline }}"
+          else
+            # Use the configured default baseline
+            BASELINE_REF="${{ env.DEFAULT_BASELINE }}"
+            
+            # Uncomment below to auto-detect from tags instead:
+            # BASELINE_REF=$(git tag -l '${{ env.TAG_PATTERN }}' | grep -E '${{ env.TAG_REGEX_FILTER }}' | sort -V | tail -1)
+            # if [ -z "$BASELINE_REF" ]; then
+            #   echo "Warning: No tags matching pattern found. Using default: ${{ env.DEFAULT_BASELINE }}" >&2
+            #   BASELINE_REF="${{ env.DEFAULT_BASELINE }}"
+            # fi
+          fi
+          
+          # Resolve baseline to commit hash (works for branches, tags, or commit hashes)
+          BASELINE_HASH=$(git rev-parse "$BASELINE_REF")
+          
+          echo "baseline=$BASELINE_HASH" >> $GITHUB_OUTPUT
+          echo "Using baseline: $BASELINE_REF (resolved to commit: $BASELINE_HASH)"
+      
+      - name: Run compatibility check
+        id: compat_check
+        run: |
+          # Save output to file for later display
+          python scripts/check_api_backwards_compatibility.py \
+            --baseline ${{ steps.baseline.outputs.baseline }} \
+            --verbose 2>&1 | tee compat_check_output.txt
+          
+          # Capture exit code
+          EXIT_CODE=${PIPESTATUS[0]}
+          echo "exit_code=$EXIT_CODE" >> $GITHUB_OUTPUT
+          exit $EXIT_CODE
+        continue-on-error: true
+      
+      - name: Fail job if breaking changes detected
+        if: steps.compat_check.outcome == 'failure'
+        run: |
+          echo ""
+          echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
+          echo "🔍 WHAT IS THIS CHECK?"
+          echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
+          echo ""
+          echo "This check ensures that changes to Megatron Core's public API do not"
+          echo "break backward compatibility for users. It compares your PR against"
+          echo "the latest stable release to detect breaking changes in:"
+          echo ""
+          echo "  • Function signatures (parameters, order, types)"
+          echo "  • Class structures and methods"
+          echo "  • Return types and public interfaces"
+          echo ""
+          echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
+          echo "🛠️  HOW TO FIX THIS"
+          echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
+          echo ""
+          echo "Choose ONE of these resolution strategies:"
+          echo ""
+          echo "1️⃣  REVERT THE BREAKING CHANGE (Recommended)"
+          echo "   → Modify your code to preserve backward compatibility"
+          echo "   → Add new parameters as optional (with defaults)"
+          echo "   → Keep existing parameters in the same order"
+          echo ""
+          echo "2️⃣  MARK AS INTERNAL API (If this is internal code)"
+          echo "   → Add @internal_api decorator from megatron.core.utils"
+          echo ""
+          echo "   Example (for classes):"
+          echo "      from megatron.core.utils import internal_api"
+          echo ""
+          echo "      @internal_api"
+          echo "      class ExperimentalFeature:"
+          echo "          pass"
+          echo ""
+          echo "   Example (for functions):"
+          echo "      from megatron.core.utils import internal_api"
+          echo ""
+          echo "      @internal_api"
+          echo "      def internal_helper_function():"
+          echo "          pass"
+          echo ""
+          echo "3️⃣  USE DEPRECATION (For gradual API changes)"
+          echo "   → Add @deprecated decorator for transition period"
+          echo "   → Example:"
+          echo "      from megatron.core.utils import deprecated"
+          echo ""
+          echo "      @deprecated(version='1.0', removal_version='2.0',"
+          echo "                  alternative='new_function')"
+          echo "      def old_function():"
+          echo "          pass"
+          echo ""
+          echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
+          echo "📋 BREAKING CHANGES DETECTED"
+          echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
+          echo ""
+          cat compat_check_output.txt
+          echo ""
+          echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
+          echo "📚 MORE INFORMATION"
+          echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
+          echo ""
+          echo "📖 Full documentation: docs/api-backwards-compatibility-check.md"
+          echo "🔧 Checker script: scripts/check_api_backwards_compatibility.py"
+          echo "❓ Questions? Check the docs or ask in #megatron-core"
+          echo ""
+          
+          echo "::error::Breaking API changes detected. Please review the output above and choose a resolution strategy."
+          exit 1
+      
+      - name: Success message
+        if: steps.compat_check.outcome == 'success'
+        run: |
+          echo "::notice::✅ No breaking API changes detected!"
+
diff --git a/docs/api-backwards-compatibility-check.md b/docs/api-backwards-compatibility-check.md
new file mode 100644
index 00000000000..e2fabbf4cd2
--- /dev/null
+++ b/docs/api-backwards-compatibility-check.md
@@ -0,0 +1,310 @@
+# API Backward Compatibility Checking
+
+## Overview
+
+Megatron Core uses automated API compatibility checking to ensure stable interfaces between releases. This prevents accidental breaking changes that could affect users upgrading between versions.
+
+## How It Works
+
+The compatibility checker:
+1. Compares the current code against the latest release
+2. Detects breaking changes in function signatures
+3. Fails CI if breaking changes are found (unless explicitly exempted)
+4. Runs automatically on every PR that modifies `megatron/core`
+
+## What Gets Checked
+
+### ✅ Breaking Changes Detected
+
+- **Parameter removed** - Removing a function parameter
+- **Parameter added without default** - Adding a required parameter
+- **Parameter order changed** - Changing the order of parameters
+- **Optional→Required** - Removing a default value from a parameter
+- **Function removed** - Deleting a public function
+- **Return type changed** - Changing the return type annotation (warning)
+
+### ⏭️ What Gets Skipped
+
+- **Test functions** - Functions starting with `test_`
+- **Exempt decorators** - Functions marked with `@internal_api` or `@deprecated`
+- **Excluded paths** - Code in `tests/`, `experimental/`, `legacy/`
+
+### ✅ Allowed Changes
+
+- **Adding optional parameters** - Adding parameters with default values
+- **Adding new functions** - New public APIs
+- **Making parameters optional** - Adding default values to required parameters
+
+## For Developers
+
+### Running Locally
+
+```bash
+# Install griffe
+pip install griffe
+
+# Check against latest release
+python scripts/check_api_backwards_compatibility.py --baseline core_r0.8.0
+
+# Check with verbose output
+python scripts/check_api_backwards_compatibility.py --baseline core_r0.8.0 -v
+
+# Compare two specific branches
+python scripts/check_api_backwards_compatibility.py --baseline core_r0.8.0 --current main
+```
+
+### Marking Functions as Exempt
+
+If you need to make breaking changes to internal or experimental APIs:
+
+```python
+from megatron.core.utils import internal_api
+
+@internal_api
+def experimental_feature(x, y):
+    """
+    This API is experimental and may change.
+    NOT FOR EXTERNAL USE.
+    """
+    pass
+```
+
+**When to use:**
+- Internal APIs not documented for external use
+- Experimental features explicitly marked as unstable
+- Functions in development that haven't been released yet
+
+### Deprecating APIs
+
+For planned API changes, use the deprecation workflow:
+
+```python
+from megatron.core.backwards_compatibility_decorators import deprecated
+
+@deprecated(
+    version="1.0.0",           # When deprecation starts
+    removal_version="2.0.0",    # When it will be removed
+    alternative="new_function", # Recommended replacement
+    reason="Improved performance and cleaner API"
+)
+def old_function(x):
+    """This function is deprecated."""
+    pass
+```
+
+**Deprecation Timeline:**
+1. **Version N** - Add `@deprecated` decorator, function still works
+2. **Version N+1** - Keep function with deprecation warnings
+3. **Version N+2** - Remove function (users have been warned)
+
+### Handling CI Failures
+
+If the compatibility check fails on your PR:
+
+1. **Review the breaking changes** in the CI logs
+2. **Choose an action:**
+   - **Fix the code** - Revert the breaking change
+   - **Add exemption** - Use `@internal_api` if intentional
+   - **Use deprecation** - For planned API changes
+3. **Update your PR** with the fix
+
+## Examples
+
+### Example 1: Compatible Change
+
+```python
+# ✅ BEFORE (v1.0)
+def train_model(config, dataloader):
+    pass
+
+# ✅ AFTER (v1.1) - Added optional parameter
+def train_model(config, dataloader, optimizer="adam"):
+    pass
+```
+**Result:** ✅ Check passes
+
+---
+
+### Example 2: Breaking Change
+
+```python
+# BEFORE (v1.0)
+def train_model(config, dataloader, optimizer="adam"):
+    pass
+
+# ❌ AFTER (v1.1) - Removed parameter
+def train_model(config, dataloader):
+    pass
+```
+**Result:** ❌ Check fails - "Parameter 'optimizer' removed"
+
+---
+
+### Example 3: Exempt Internal API
+
+```python
+from megatron.core.utils import internal_api
+
+# BEFORE (v1.0)
+@internal_api
+def _internal_compute(x, y):
+    pass
+
+# ✅ AFTER (v1.1) - Can change freely
+@internal_api
+def _internal_compute(x, y, z):  # Added parameter
+    pass
+```
+**Result:** ✅ Check passes (function is exempt)
+
+---
+
+### Example 4: Deprecation Workflow
+
+```python
+from megatron.core.backwards_compatibility_decorators import deprecated
+
+# Version 1.0 - Add deprecation
+@deprecated(
+    version="1.0.0",
+    removal_version="2.0.0",
+    alternative="train_model_v2"
+)
+def train_model(config):
+    """Old training function - DEPRECATED"""
+    pass
+
+def train_model_v2(config, **options):
+    """New improved training function"""
+    pass
+
+# Version 1.1 - Keep both (users migrate)
+# Version 2.0 - Remove train_model()
+```
+
+## Architecture
+
+```
+Developer commits code
+    ↓
+GitHub Actions triggers
+    ↓
+CI runs check_api_backwards_compatibility.py
+    ↓
+Script loads code via griffe:
+  • Baseline: latest release (e.g., core_r0.8.0)
+  • Current: PR branch
+    ↓
+Apply filtering:
+  • Skip @internal_api and @deprecated
+  • Skip private functions (_prefix)
+  • Skip test/experimental paths
+    ↓
+Griffe compares signatures:
+  • Parameters
+  • Types
+  • Return types
+  • Defaults
+    ↓
+Report breaking changes
+    ↓
+Exit: 0=pass, 1=fail
+    ↓
+CI fails if breaking changes detected
+```
+
+## Configuration
+
+### Customizing Filters
+
+Edit `scripts/check_api_backwards_compatibility.py`:
+
+```python
+# Add more exempt decorators
+EXEMPT_DECORATORS = [
+    "internal_api",
+    "deprecated",
+]
+
+# Add more path exclusions
+EXCLUDE_PATHS = {
+    "tests",
+    "experimental",
+    "legacy",
+    "your_custom_path",  # ← Add here
+}
+```
+
+### Changing the Baseline
+
+The workflow auto-detects the latest `core_r*` tag. To manually specify:
+
+```yaml
+# In .github/workflows/check_api_backwards_compatibility_workflow.yml
+- name: Run compatibility check
+  run: |
+    python scripts/check_api_backwards_compatibility.py \
+      --baseline your_custom_baseline
+```
+
+## FAQ
+
+### Q: Why did my PR fail the compatibility check?
+
+**A:** Your code introduced breaking changes compared to the last release. Review the CI logs to see what changed.
+
+### Q: Can I disable the check for my PR?
+
+**A:** No, but you can mark specific functions as exempt using `@internal_api`.
+
+### Q: What if I need to make a breaking change?
+
+**A:** Use the `@deprecated` decorator for a gradual transition, or mark the function as exempt if it's internal/experimental.
+
+### Q: Does this check all of Megatron-LM?
+
+**A:** No, only `megatron/core/**` (Megatron Core). Legacy code is excluded.
+
+### Q: What about class methods?
+
+**A:** Yes, class methods are checked just like functions.
+
+### Q: Can I run this locally before pushing?
+
+**A:** Yes! Run `python scripts/check_api_backwards_compatibility.py --baseline core_r0.8.0`
+
+### Q: What if there's no release tag yet?
+
+**A:** The workflow will use `main` as the baseline. Update it once you have release tags.
+
+## Troubleshooting
+
+### Error: "griffe is not installed"
+
+```bash
+pip install griffe
+```
+
+### Error: "No core_r* tags found"
+
+The repository doesn't have release tags yet. The workflow will fall back to `main`.
+
+### False Positives
+
+If the checker reports a breaking change that isn't actually breaking, file an issue and use `@internal_api` as a temporary workaround.
+
+## References
+
+- **Script:** `scripts/check_api_backwards_compatibility.py`
+- **Workflow:** `.github/workflows/check_api_backwards_compatibility_workflow.yml`
+- **Decorators:** `megatron/core/backwards_compatibility_decorators.py`
+- **Griffe Documentation:** https://mkdocstrings.github.io/griffe/
+
+## Support
+
+For questions or issues:
+1. Check this documentation
+2. Review existing PRs with compatibility checks
+3. Ask in the Megatron-LM Slack/Discord
+4. File an issue on GitHub
+
diff --git a/megatron/core/utils.py b/megatron/core/utils.py
index 8d6269e23a4..9b62b18d400 100644
--- a/megatron/core/utils.py
+++ b/megatron/core/utils.py
@@ -2204,3 +2204,106 @@ async def wrapper(*args, **kwargs):
         return wrapper
 
     return _decorate if func is None else _decorate(func)
+
+
+# ============================================================================
+# Backward Compatibility Decorators
+# ============================================================================
+
+
+def deprecated(
+    version: str,
+    removal_version: Optional[str] = None,
+    alternative: Optional[str] = None,
+    reason: Optional[str] = None,
+) -> Callable:
+    """
+    Mark a function as deprecated.
+
+    This decorator:
+    1. Adds deprecation metadata to the function
+    2. Issues a DeprecationWarning when the function is called
+    3. Allows the compatibility checker to track deprecation lifecycle
+
+    Args:
+        version: Version where deprecation starts (e.g., "1.0.0")
+        removal_version: Version where function will be removed (e.g., "2.0.0")
+        alternative: Name of the recommended replacement function
+        reason: Optional explanation for the deprecation
+
+    Returns:
+        Decorator function
+
+    Example:
+        @deprecated(
+            version="1.0.0",
+            removal_version="2.0.0",
+            alternative="new_train_model",
+            reason="Improved performance and cleaner API"
+        )
+        def old_train_model(config):
+            pass
+    """
+
+    def decorator(func: Callable) -> Callable:
+        # Add metadata
+        func._deprecated = True
+        func._deprecated_version = version
+        func._removal_version = removal_version
+        func._alternative = alternative
+        func._deprecation_reason = reason
+
+        @functools.wraps(func)
+        def wrapper(*args, **kwargs):
+            # Build warning message
+            msg_parts = [f"{func.__name__} is deprecated since version {version}."]
+
+            if alternative:
+                msg_parts.append(f"Use {alternative} instead.")
+
+            if removal_version:
+                msg_parts.append(f"Will be removed in version {removal_version}.")
+
+            if reason:
+                msg_parts.append(f"Reason: {reason}")
+
+            warnings.warn(" ".join(msg_parts), DeprecationWarning, stacklevel=2)
+
+            return func(*args, **kwargs)
+
+        return wrapper
+
+    return decorator
+
+
+def internal_api(func: Callable) -> Callable:
+    """
+    Mark a function or class as internal API (not for external use).
+
+    Use this decorator for:
+    - Internal APIs not intended for public consumption
+    - Experimental features that may change without notice
+    - Implementation details that are not part of the stable API
+
+    Objects marked with this decorator will be exempt from backward
+    compatibility checks.
+
+    Args:
+        func: The function or class to mark as internal
+
+    Returns:
+        The original function/class with an internal API marker
+
+    Example:
+        @internal_api
+        def _internal_helper():
+            '''For internal use only'''
+            pass
+
+        @internal_api
+        class ExperimentalFeature:
+            '''This API may change without notice'''
+            pass
+    """
+    func._internal_api = True
+    return func
diff --git a/scripts/check_api_backwards_compatibility.py b/scripts/check_api_backwards_compatibility.py
new file mode 100644
index 00000000000..9c1f29ca890
--- /dev/null
+++ b/scripts/check_api_backwards_compatibility.py
@@ -0,0 +1,353 @@
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#!/usr/bin/env python3
+"""
+Megatron Core API Compatibility Checker
+
+Simple checker using Griffe to find breaking changes between two versions.
+Objects decorated with @internal_api or @deprecated are excluded from checks.
+
+Usage:
+    python scripts/check_api_backwards_compatibility.py --baseline core_v0.14.0
+"""
+
+import argparse
+import logging
+import os
+import re
+import sys
+from collections import Counter
+
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(levelname)s: %(message)s',
+    handlers=[logging.StreamHandler(sys.stderr)]
+)
+logger = logging.getLogger(__name__)
+
+try:
+    import griffe
+    try:
+        from griffe.dataclasses import Object
+    except (ImportError, AttributeError):
+        from griffe import Object
+except ImportError as e:
+    logger.error(f"griffe not installed: {e}")
+    logger.error("Install with: pip install griffe")
+    sys.exit(2)
+
+# Configure UTF-8 for Windows
+if sys.platform == 'win32':
+    import io
+    sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace')
+    sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8', errors='replace')
+
+
+# Decorators that exempt objects from compatibility checks
+EXEMPT_DECORATORS = ['internal_api', 'deprecated']
+
+
+def has_exempt_decorator(obj: Object) -> bool:
+    """Check if a Griffe object has any exempt decorator.
+    
+    Args:
+        obj: A Griffe Object to check for exempt decorators
+        
+    Returns:
+        bool: True if the object has any decorator matching EXEMPT_DECORATORS list
+    """
+    if not hasattr(obj, 'decorators'):
+        return False
+    if not obj.decorators:
+        return False
+    for decorator in obj.decorators:
+        # Get the actual decorator name from the value attribute
+        dec_value = str(getattr(decorator, 'value', ''))
+        if any(exempt in dec_value for exempt in EXEMPT_DECORATORS):
+            return True
+    return False
+
+
+def get_filtered_paths(package: Object, package_name: str) -> set:
+    """Recursively collect all object paths with exempt decorators from a package.
+    
+    This function traverses the entire package tree and identifies objects that are
+    decorated with any of the EXEMPT_DECORATORS, building a set of their full paths.
+    
+    Args:
+        package: The Griffe package object to traverse
+        package_name: The full package name (e.g., "megatron.core") for path construction
+        
+    Returns:
+        set: A set of full object paths (e.g., "megatron.core.ModelParallelConfig") 
+             that should be filtered from compatibility checks
+    """
+    filtered = set()
+    visited = set()
+    
+    def visit(obj, path, depth=0, is_root=False):
+        # Prevent infinite recursion
+        if depth > 20 or id(obj) in visited:
+            return
+        visited.add(id(obj))
+        
+        # For root object, use the provided path; for children, append obj.name
+        if is_root:
+            current_path = path
+        else:
+            current_path = f"{path}.{obj.name}" if path else obj.name
+        
+        # Skip aliases (imported objects)
+        if hasattr(obj, 'is_alias') and obj.is_alias:
+            return
+        
+        # Skip private members
+        if obj.name.startswith('_') and not obj.name.startswith('__'):
+            return
+            
+        # Check for exempt decorator
+        if has_exempt_decorator(obj):
+            filtered.add(current_path)
+            logger.info(f"  ⏭️  Exempt: {current_path}")
+        
+        # Visit children
+        if hasattr(obj, 'members'):
+            for member in obj.members.values():
+                visit(member, current_path, depth + 1, is_root=False)
+    
+    # Start with the full package name (e.g., "megatron.core")
+    visit(package, package_name, is_root=True)
+    return filtered
+
+
+def strip_ansi_codes(text):
+    """Remove ANSI escape codes (terminal formatting) from text.
+    
+    Griffe includes ANSI codes for terminal formatting in some strings,
+    which breaks string matching. This strips them out.
+    
+    Args:
+        text: String potentially containing ANSI escape codes
+        
+    Returns:
+        str: Clean text with ANSI codes removed
+    """
+    if not text:
+        return text
+    # Pattern to match ANSI escape codes
+    ansi_escape = re.compile(r'\x1b\[[0-9;]*m')
+    return ansi_escape.sub('', text)
+
+
+def get_object_path(change) -> str:
+    """Extract the full object path from a Griffe breaking change.
+    
+    Tries multiple sources to get the object path:
+    1. Direct path attributes (new_path, old_path, path)
+    2. Path from new_value or old_value objects
+    3. Parse from the explanation string as last resort
+    
+    Args:
+        change: A Griffe breaking change object
+        
+    Returns:
+        str: The full object path (e.g., "megatron.core.ModelParallelConfig.__init__")
+             or None if unable to extract
+    """
+    # Try different attributes
+    path = (getattr(change, 'new_path', None) or 
+            getattr(change, 'old_path', None) or
+            getattr(change, 'path', None))
+    
+    if path:
+        return strip_ansi_codes(path)
+    
+    # Try from values
+    if hasattr(change, 'new_value') and change.new_value:
+        path = getattr(change.new_value, 'path', None)
+        if path:
+            return strip_ansi_codes(path)
+    
+    if hasattr(change, 'old_value') and change.old_value:
+        path = getattr(change.old_value, 'path', None)
+        if path:
+            return strip_ansi_codes(path)
+    
+    # Last resort: parse from explanation
+    # Format: "filepath:line: object_path: description"
+    # Example: "megatron/core/model_parallel_config.py:338: ModelParallelConfig.cpu_offloading_weights: Attribute value was changed"
+    try:
+        explanation = change.explain()
+        # Split by ": " and get the second part (object path)
+        parts = explanation.split(': ')
+        if len(parts) >= 2:
+            # Get the part after "filepath:line" but before the description
+            # It's usually the second part
+            object_path = parts[1]
+            
+            # Extract the module path from file path (first part)
+            file_part = parts[0].split(':')[0]  # Get just the file path, remove line number
+            
+            # Convert file path to module path
+            # e.g., "megatron/core/model_parallel_config.py" -> "megatron.core.model_parallel_config"
+            module_path = file_part.replace('/', '.').replace('\\', '.').replace('.py', '')
+            
+            # If object_path doesn't start with module, prepend it
+            if not object_path.startswith(module_path):
+                full_path = f"{module_path}.{object_path}"
+            else:
+                full_path = object_path
+            
+            return strip_ansi_codes(full_path)
+    except Exception:
+        pass
+    
+    return None
+
+
+def should_skip_change(change, filtered_paths: set) -> bool:
+    """Determine if a breaking change should be skipped based on exempt decorators.
+    
+    A change is skipped if:
+    - The changed object itself is in filtered_paths (exact match)
+    - The changed object is a child of an exempt object (prefix match)
+    
+    Args:
+        change: A Griffe breaking change object
+        filtered_paths: Set of paths with exempt decorators
+        
+    Returns:
+        bool: True if the change should be skipped (filtered out)
+    """
+    path = get_object_path(change)
+    if not path:
+        return False
+    
+    # Strip parameter names from path for matching
+    # e.g., "Class.__init__(param)" -> "Class.__init__"
+    clean_path = path.split('(')[0] if '(' in path else path
+    
+    # Check exact match
+    if clean_path in filtered_paths or path in filtered_paths:
+        return True
+    
+    # Check if it's a child of a filtered object
+    # e.g., MyClass.__init__ is child of MyClass, MyClass.attr is child of MyClass
+    for filtered_path in filtered_paths:
+        if clean_path.startswith(filtered_path + '.'):
+            return True
+        # Also check the original path in case parameter names matter
+        if path.startswith(filtered_path + '.'):
+            return True
+    
+    return False
+
+
+def main():
+    parser = argparse.ArgumentParser(description='Check API backwards compatibility')
+    parser.add_argument('--baseline', required=True, help='Baseline git ref (tag/branch/commit)')
+    parser.add_argument('--current', default=None, help='Current git ref (default: working directory)')
+    parser.add_argument('--package', default='megatron.core', help='Package to check')
+    parser.add_argument('--verbose', '-v', action='store_true', help='Verbose output')
+    args = parser.parse_args()
+    
+    try:
+        package_name = args.package
+        
+        logger.info(f"\n{'='*80}\nAPI COMPATIBILITY CHECK: {package_name}\n{'='*80}\n")
+        
+        # Load baseline
+        logger.info(f"📦 Loading baseline @ {args.baseline}...")
+        baseline = griffe.load_git(
+            package_name, ref=args.baseline, resolve_aliases=False, 
+            resolve_external=False, allow_inspection=False)
+        logger.info(f"   ✓ Loaded")
+        
+        # Load current
+        logger.info(f"\n📦 Loading current @ {args.current or 'working directory'}...")
+        if args.current:
+            current = griffe.load_git(
+                package_name, ref=args.current, resolve_aliases=False,
+                resolve_external=False, allow_inspection=False)
+        else:
+            current = griffe.load(
+                package_name, search_paths=[os.getcwd()], resolve_aliases=False,
+                resolve_external=False, allow_inspection=False)
+        logger.info(f"   ✓ Loaded")
+        
+        # Get filtered paths from CURRENT version only
+        logger.info(f"\n🔍 Finding exempt objects in current version...")
+        filtered_paths = get_filtered_paths(current, package_name)
+        logger.info(f"   Found {len(filtered_paths)} exempt objects")
+        
+        # Find breaking changes
+        logger.info(f"\n🔍 Comparing versions...")
+        all_changes = list(griffe.find_breaking_changes(baseline, current))
+        logger.info(f"   Found {len(all_changes)} potential breaking changes")
+        
+        # Filter out exempt changes  
+        breaking_changes = []
+        skipped_count = 0
+        
+        # DEBUG: Print first 5 breaking changes for debugging
+        print("\n===TEST DEBUG (first 5 changes)===")
+        print(f"Filtered paths: {filtered_paths}")
+        for i, change in enumerate(all_changes[:5]):
+            path = get_object_path(change)
+            clean_path = path.split('(')[0] if path and '(' in path else path
+            print(f"\nChange {i+1}: {path}")
+            print(f"  Clean: {clean_path}")
+            print(f"  Clean repr: {repr(clean_path)}")
+            
+            # Test matching
+            matched = False
+            for fpath in filtered_paths:
+                if clean_path and (clean_path == fpath or clean_path.startswith(fpath + '.')):
+                    print(f"  ✓ MATCH with: {fpath}")
+                    matched = True
+                    break
+            if not matched:
+                print(f"  ✗ NO MATCH")
+        print("\n===END TEST DEBUG===\n")
+        
+        for change in all_changes:
+            if should_skip_change(change, filtered_paths):
+                skipped_count += 1
+            else:
+                breaking_changes.append(change)
+        
+        logger.info(f"\n   Skipped {skipped_count} exempt | Reporting {len(breaking_changes)} breaking changes")
+        
+        # Print results
+        if not breaking_changes:
+            logger.info(f"\n✅ No breaking changes detected!")
+            return 0
+        
+        # Count by type
+        change_types = Counter(change.kind.value for change in breaking_changes)
+        logger.info(f"\n📊 Breaking changes by type:")
+        for change_type, count in sorted(change_types.items(), key=lambda x: -x[1]):
+            logger.info(f"   • {change_type}: {count}")
+        
+        # Print detailed changes
+        print(f"\n❌ Found {len(breaking_changes)} breaking change(s):\n{'='*80}")
+        
+        for i, change in enumerate(breaking_changes, 1):
+            path = get_object_path(change)
+            path_info = f"\n   Object: {path}" if path else ""
+            print(f"\n{i}. {change.kind.value}\n   Package: {package_name}{path_info}\n   → {change.explain()}\n{'-'*80}")
+        
+        print(f"\n{'='*80}\nSUMMARY\n{'='*80}\nTotal breaking changes: {len(breaking_changes)}\n{'='*80}\n")
+        
+        return 1
+        
+    except Exception as e:
+        logger.error(f"\n❌ Error: {e}")
+        if args.verbose:
+            import traceback
+            traceback.print_exc()
+        return 2
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/tests/unit_tests/test_api_backwards_compat_setup.py b/tests/unit_tests/test_api_backwards_compat_setup.py
new file mode 100644
index 00000000000..e15f72a44e1
--- /dev/null
+++ b/tests/unit_tests/test_api_backwards_compat_setup.py
@@ -0,0 +1,173 @@
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#!/usr/bin/env python3
+"""
+Sanity test script to verify API backward compatibility checker setup
+
+This script tests that:
+1. Griffe is installed
+2. The checker script can be imported
+3. The decorator module exists
+4. Basic functionality works
+
+Usage:
+    python tests/unit_tests/test_api_backwards_compat_setup.py
+"""
+
+import io
+import sys
+from pathlib import Path
+
+# Configure UTF-8 for Windows
+if sys.platform == 'win32':
+    sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace')
+    sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8', errors='replace')
+
+
+def test_griffe_installed():
+    """Test that griffe is installed"""
+    print("1. Testing griffe installation...", end=" ")
+    try:
+        import griffe
+
+        print("✅ griffe is installed")
+        print(f"   Version: {griffe.__version__ if hasattr(griffe, '__version__') else 'unknown'}")
+        return True
+    except ImportError:
+        print("❌ griffe is NOT installed")
+        print("   Install with: pip install griffe")
+        return False
+
+
+def test_decorator_module():
+    """Test that the compat decorator module exists"""
+    print("\n2. Testing decorator module...", end=" ")
+    try:
+        from megatron.core.utils import deprecated, internal_api
+
+        print("✅ Decorator module found")
+        print("   Available: @internal_api, @deprecated")
+        return True
+    except ImportError as e:
+        print("❌ Decorator module NOT found")
+        print(f"   Error: {e}")
+        return False
+
+
+def test_checker_script():
+    """Test that the checker script exists"""
+    print("\n3. Testing checker script...", end=" ")
+    script_path = Path("scripts/check_api_backwards_compatibility.py")
+    if script_path.exists():
+        print("✅ Checker script found")
+        print(f"   Location: {script_path}")
+        return True
+    else:
+        print("❌ Checker script NOT found")
+        print(f"   Expected: {script_path}")
+        return False
+
+
+def test_workflow():
+    """Test that the GitHub Actions workflow exists"""
+    print("\n4. Testing GitHub Actions workflow...", end=" ")
+    workflow_path = Path(".github/workflows/check_api_backwards_compatibility_workflow.yml")
+    if workflow_path.exists():
+        print("✅ Workflow found")
+        print(f"   Location: {workflow_path}")
+        return True
+    else:
+        print("❌ Workflow NOT found")
+        print(f"   Expected: {workflow_path}")
+        return False
+
+
+def test_decorators_work():
+    """Test that decorators can be applied"""
+    print("\n5. Testing decorator functionality...", end=" ")
+    try:
+        from megatron.core.utils import deprecated, internal_api
+
+        # Test internal_api decorator
+        @internal_api
+        def test_func1():
+            pass
+
+        assert hasattr(test_func1, '_internal_api')
+
+        # Test deprecated decorator
+        @deprecated(version="1.0", removal_version="2.0", alternative="new_func")
+        def test_func2():
+            pass
+
+        assert hasattr(test_func2, '_deprecated')
+
+        print("✅ Decorators work correctly")
+        return True
+    except Exception as e:
+        print(f"❌ Decorator test failed: {e}")
+        return False
+
+
+def test_basic_comparison():
+    """Test basic griffe comparison"""
+    print("\n6. Testing griffe comparison...", end=" ")
+    try:
+        import griffe
+
+        # Create two simple code snippets
+        old_code = """
+def example_func(x, y):
+    pass
+"""
+
+        new_code = """
+def example_func(x, y, z=None):
+    pass
+"""
+
+        # This would normally use griffe.load_git, but we'll skip the actual test
+        # since it requires a git repo. Just verify griffe has the function.
+        assert hasattr(griffe, 'find_breaking_changes')
+
+        print("✅ Griffe comparison available")
+        return True
+    except Exception as e:
+        print(f"❌ Comparison test failed: {e}")
+        return False
+
+
+def main():
+    print("=" * 70)
+    print("API Backward Compatibility Checker Setup Sanity Test")
+    print("=" * 70)
+
+    results = []
+    results.append(test_griffe_installed())
+    results.append(test_decorator_module())
+    results.append(test_checker_script())
+    results.append(test_workflow())
+    results.append(test_decorators_work())
+    results.append(test_basic_comparison())
+
+    print("\n" + "=" * 70)
+    print("Summary")
+    print("=" * 70)
+
+    passed = sum(results)
+    total = len(results)
+
+    if all(results):
+        print(f"✅ All tests passed ({passed}/{total})")
+        print("\nYou're ready to use the API compatibility checker!")
+        print("\nNext steps:")
+        print("  1. Run: python scripts/check_api_backwards_compatibility.py --baseline <ref>")
+        print("  2. See: docs/api-backwards-compatibility-check.md for full documentation")
+        return 0
+    else:
+        print(f"❌ Some tests failed ({passed}/{total} passed)")
+        print("\nPlease fix the issues above before using the checker.")
+        return 1
+
+
+if __name__ == "__main__":
+    sys.exit(main())

From 7b8e39eeb675e70917f16a2e704b7839087ed61d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Thu, 20 Nov 2025 23:26:41 +0000
Subject: [PATCH 150/334] Revert "[Dev] fix(megatron-fsdp): Resolve hang caused
 by non-deterministic reduce-scatter (#2252)"

This reverts commit c6e2b29c9195be0befa57d2ccd1110c7b58efd9a.
---
 .../fsdp/src/megatron_fsdp/param_and_grad_buffer.py            | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py b/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py
index 6a294b69602..bdf480d867b 100644
--- a/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py
+++ b/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py
@@ -2782,9 +2782,6 @@ def reduce_gradients(
             outer_fsdp_group_grad_reduce (bool, optional): Whether to reduce gradients
                 across outer-DP groups. Defaults to False.
         """
-        # Sort parameters by their bucket IDs to ensure a deterministic processing order.
-        # Performing reduce-scatter operations out of order can lead to hangs.
-        params = sorted(list(params), key=lambda x: self.buffer.param_to_param_group[x])
         for param in params:
             bucket_id = self.buffer.param_to_param_group[param]
             param_group = self.buffer.parameter_groups[bucket_id]

From cb88c6e960e1f44838da0642ab8d8d1d89183c9c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Fri, 21 Nov 2025 01:38:33 +0100
Subject: [PATCH 151/334] ci: Upload to testpypi only on main (#2342) (#2343)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 .github/workflows/_build_test_publish_wheel.yml | 1 +
 .github/workflows/build-test-publish-wheel.yml  | 6 +++---
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/_build_test_publish_wheel.yml b/.github/workflows/_build_test_publish_wheel.yml
index bd1e38c7a48..0bcefdf1729 100644
--- a/.github/workflows/_build_test_publish_wheel.yml
+++ b/.github/workflows/_build_test_publish_wheel.yml
@@ -129,6 +129,7 @@ jobs:
   publish-wheels:
     needs: [build-and-test-wheels]
     runs-on: ubuntu-latest
+    if: github.ref == 'refs/heads/main'
     environment: ${{ (github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/heads/r')) && 'main' || 'public' }}
     strategy:
       fail-fast: false
diff --git a/.github/workflows/build-test-publish-wheel.yml b/.github/workflows/build-test-publish-wheel.yml
index 21df8a5c986..f5cea1d11f8 100644
--- a/.github/workflows/build-test-publish-wheel.yml
+++ b/.github/workflows/build-test-publish-wheel.yml
@@ -19,8 +19,8 @@ on:
     branches:
       - dev
       - main
-      - "pull-request/[0-9]+"
-      - "deploy-release/*"
+      - 'pull-request/[0-9]+'
+      - 'deploy-release/*'
   merge_group:
     types: [checks_requested]
 
@@ -64,7 +64,7 @@ jobs:
         env:
           GH_TOKEN: ${{ github.token }}
           GITHUB_RUN_ID: ${{ github.run_id }}
-          SKIPPING_IS_ALLOWED: ${{ needs.pre-flight.outputs.docs_only == 'true' || needs.pre-flight.outputs.is_deployment_workflow == 'true' || needs.pre-flight.outputs.is_merge_group == 'true' || needs.pre-flight.outputs.is_ci_workload == 'true' }}
+          SKIPPING_IS_ALLOWED: ${{ needs.pre-flight.outputs.docs_only == 'true' || needs.pre-flight.outputs.is_deployment_workflow == 'true' || needs.pre-flight.outputs.is_merge_group == 'true' || needs.pre-flight.outputs.is_ci_workload == 'true' || github.ref != 'refs/heads/main' }}
         run: |
           FAILED_JOBS=$(gh run view $GITHUB_RUN_ID --json jobs --jq '[.jobs[] | select(.status == "completed" and .conclusion != "success")] | length') || echo 0
 

From c241d0cb52e0ef9933276da6ef8f8db3bfc67ecf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Fri, 21 Nov 2025 08:10:27 +0000
Subject: [PATCH 152/334] Reapply "[Dev] fix(megatron-fsdp): Resolve hang
 caused by non-deterministic reduce-scatter (#2252)"

This reverts commit 7b8e39eeb675e70917f16a2e704b7839087ed61d.
---
 .../fsdp/src/megatron_fsdp/param_and_grad_buffer.py            | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py b/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py
index bdf480d867b..6a294b69602 100644
--- a/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py
+++ b/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py
@@ -2782,6 +2782,9 @@ def reduce_gradients(
             outer_fsdp_group_grad_reduce (bool, optional): Whether to reduce gradients
                 across outer-DP groups. Defaults to False.
         """
+        # Sort parameters by their bucket IDs to ensure a deterministic processing order.
+        # Performing reduce-scatter operations out of order can lead to hangs.
+        params = sorted(list(params), key=lambda x: self.buffer.param_to_param_group[x])
         for param in params:
             bucket_id = self.buffer.param_to_param_group[param]
             param_group = self.buffer.parameter_groups[bucket_id]

From 31f5049e8f8bc5a5550e74948223525b30bdc8f0 Mon Sep 17 00:00:00 2001
From: Pablo Garay <pagaray@nvidia.com>
Date: Fri, 21 Nov 2025 01:34:37 -0800
Subject: [PATCH 153/334] feat: required check adjustment (#2349)

Signed-off-by: Pablo Garay <pagaray@nvidia.com>
---
 ...k_api_backwards_compatibility_workflow.yml | 76 ++++++++++++++++++-
 1 file changed, 72 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/check_api_backwards_compatibility_workflow.yml b/.github/workflows/check_api_backwards_compatibility_workflow.yml
index 381976340fa..5f6adec4c91 100644
--- a/.github/workflows/check_api_backwards_compatibility_workflow.yml
+++ b/.github/workflows/check_api_backwards_compatibility_workflow.yml
@@ -4,10 +4,6 @@ on:
   push:
     branches:
       - "pull-request/[0-9]+"
-    paths:
-      - 'megatron/core/**/*.py'
-      - '!megatron/core/tests/**'
-      - '!megatron/legacy/**'
 
   # Allow manual trigger
   workflow_dispatch:
@@ -17,7 +13,49 @@ on:
         required: true
 
 jobs:
+  pre-flight:
+    name: Pre-flight check
+    runs-on: ubuntu-latest
+    outputs:
+      should_skip: ${{ steps.check_files.outputs.should_skip }}
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+      
+      - name: Check if relevant files changed
+        id: check_files
+        run: |
+          # For manual triggers, never skip
+          if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then
+            echo "should_skip=false" >> $GITHUB_OUTPUT
+            echo "Manual trigger - will run compatibility check"
+            exit 0
+          fi
+          
+          # Check if any relevant files changed
+          # Use merge-base to find common ancestor with dev
+          # This ensures we only detect changes actually made in this PR branch,
+          # not changes that happened in dev after the branch was created
+          BASE_SHA=$(git merge-base origin/dev HEAD)
+          echo "Comparing against merge-base: $BASE_SHA"
+          
+          # Check for changes in megatron/core Python files (excluding tests and legacy)
+          CHANGED_FILES=$(git diff --name-only "$BASE_SHA" HEAD -- 'megatron/core/**/*.py' ':!megatron/core/tests/**' ':!megatron/legacy/**' || echo "")
+          
+          if [ -z "$CHANGED_FILES" ]; then
+            echo "should_skip=true" >> $GITHUB_OUTPUT
+            echo "No relevant megatron/core files changed - will skip compatibility check"
+          else
+            echo "should_skip=false" >> $GITHUB_OUTPUT
+            echo "Relevant files changed:"
+            echo "$CHANGED_FILES"
+          fi
+
   check-compatibility:
+    needs: [pre-flight]
+    if: needs.pre-flight.outputs.should_skip != 'true'
     name: Check API Backward Compatibility
     runs-on: ubuntu-latest
     
@@ -169,3 +207,33 @@ jobs:
         run: |
           echo "::notice::✅ No breaking API changes detected!"
 
+  api-backward-compatibility-summary:
+    needs: [pre-flight, check-compatibility]
+    runs-on: ubuntu-latest
+    name: API Backward Compatibility Check Summary
+    if: always() && !cancelled()
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Validate workflow result
+        shell: bash -x -e -u -o pipefail {0}
+        env:
+          GH_TOKEN: ${{ github.token }}
+          SKIPPING_IS_ALLOWED: ${{ needs.pre-flight.outputs.should_skip == 'true' }}
+        run: |
+          FAILED_JOBS=$(gh run view $GITHUB_RUN_ID --json jobs --jq '[.jobs[] | select(.status == "completed" and .conclusion != "success" and .name != "API Backward Compatibility Check Summary")] | length') || echo 0
+
+          if [ "${FAILED_JOBS:-0}" -eq 0 ] || [ "$SKIPPING_IS_ALLOWED" == "true" ]; then
+              if [ "$SKIPPING_IS_ALLOWED" == "true" ]; then
+                  echo "✅ Compatibility check was skipped (no relevant files changed)"
+              else
+                  echo "✅ All checks passed successfully"
+              fi
+              exit 0
+          else
+              echo "❌ Found $FAILED_JOBS failed job(s)"
+              gh run view $GITHUB_RUN_ID --json jobs --jq '.jobs[] | select(.status == "completed" and .conclusion != "success" and .name != "API Backward Compatibility Check Summary") | .name'
+              exit 1
+          fi
+

From 56682f80b0db4492afeee013a07187eadfa9dc8f Mon Sep 17 00:00:00 2001
From: Deyu Fu <Deyu.Foo@gmail.com>
Date: Fri, 28 Nov 2025 10:00:00 +0800
Subject: [PATCH 154/334] [DEV] pull main Nov 25 (#2395)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Keval Morabia <28916987+kevalmorabia97@users.noreply.github.com>
Signed-off-by: oliver könig <okoenig@nvidia.com>
Signed-off-by: Ananth Subramaniam <ansubramania@nvidia.com>
Signed-off-by: dimapihtar <dpihtar@gmail.com>
Signed-off-by: Youngeun Kwon <youngeunk@nvidia.com>
Signed-off-by: Youngeun <kyeg9404@gmail.com>
Signed-off-by: Maanu Grover <maanug@nvidia.com>
Signed-off-by: ykarnati <ykarnati@nvidia.com>
Signed-off-by: Deepak Narayanan <dnarayanan@nvidia.com>
Signed-off-by: GitHub Actions <github-actions[bot]@users.noreply.github.com>
Signed-off-by: Charlie Truong <chtruong@nvidia.com>
Signed-off-by: Zhongbo Zhu <zhongboz@nvidia.com>
Signed-off-by: Xiaowei Ren <xren@nvidia.com>
Signed-off-by: Xin Yao <xiny@nvidia.com>
Signed-off-by: Keshav Santhanam <ksanthanam@nvidia.com>
Signed-off-by: Pablo Garay <pagaray@nvidia.com>
Signed-off-by: Asha Anoosheh <aanoosheh@nvidia.com>
Signed-off-by: Chen Cui <chcui@nvidia.com>
Co-authored-by: Keval Morabia <28916987+kevalmorabia97@users.noreply.github.com>
Co-authored-by: Yashaswi Karnati <144376261+yashaswikarnati@users.noreply.github.com>
Co-authored-by: Jared Casper <155158+jaredcasper@users.noreply.github.com>
Co-authored-by: Antoni-Joan Solergibert <asolergibert@nvidia.com>
Co-authored-by: Jianbin Chang <shjwudp@gmail.com>
Co-authored-by: oliver könig <okoenig@nvidia.com>
Co-authored-by: Ananth Subramaniam <ansubramania@nvidia.com>
Co-authored-by: Teodor-Dumitru Ene <34819528+tdene@users.noreply.github.com>
Co-authored-by: Siddharth Singh <136645615+sidsingh-nvidia@users.noreply.github.com>
Co-authored-by: Mcore Bot <mcore-bot@nvidia.com>
Co-authored-by: Dmytro Pykhtar <37850217+dimapihtar@users.noreply.github.com>
Co-authored-by: Youngeun Kwon <youngeunk@nvidia.com>
Co-authored-by: Lawrence McAfee <85179052+lmcafee-nvidia@users.noreply.github.com>
Co-authored-by: Maanu Grover <109391026+maanug-nv@users.noreply.github.com>
Co-authored-by: Lawrence McAfee <lmcafee@nvidia.com>
Co-authored-by: AJ Schmidt <ajschmidt8@users.noreply.github.com>
Co-authored-by: Deepak Narayanan <2724038+deepakn94@users.noreply.github.com>
Co-authored-by: helen ngo <helenn@nvidia.com>
Co-authored-by: GitHub Actions <github-actions[bot]@users.noreply.github.com>
Co-authored-by: Aaron Gokaslan <aaronGokaslan@gmail.com>
Co-authored-by: Robert Kirby <rkirby@nvidia.com>
Co-authored-by: Teodor-Dumitru Ene <tene@nvidia.com>
Co-authored-by: yeyu-nvidia <yeyu@nvidia.com>
Co-authored-by: Abhinav Khattar <akhattar@nvidia.com>
Co-authored-by: Roger Waleffe <rwaleffe@nvidia.com>
Co-authored-by: Charlie Truong <chtruong@nvidia.com>
Co-authored-by: Tong Liu <liutongt1998@gmail.com>
Co-authored-by: Zhongbo Zhu <42691305+zhongbozhu@users.noreply.github.com>
Co-authored-by: Xiaowei Ren <xren@nvidia.com>
Co-authored-by: Xin Yao <xiny@nvidia.com>
Co-authored-by: Teodor-Dumitru Ene <teodord.ene@gmail.com>
Co-authored-by: Zijie Yan <zijiey@nvidia.com>
Co-authored-by: root <root@pool0-01101.cm.cluster>
Co-authored-by: Keshav Santhanam <ksanthanam@nvidia.com>
Co-authored-by: Pablo Garay <pagaray@nvidia.com>
Co-authored-by: Asha Anoosheh <aanoosheh@nvidia.com>
Co-authored-by: Kan Zhu <kanz@nvidia.com>
Co-authored-by: Robert Kirby <rkirby@cw-dfw-cs-001-vscode-01.cm.cluster>
Co-authored-by: Jorge Albericio <jalbericiola@nvidia.com>
Co-authored-by: Jon Barker <19699370+jon-barker@users.noreply.github.com>
Co-authored-by: Chen Cui <chcui@nvidia.com>
Co-authored-by: Pablo Garay <palenq@gmail.com>
Co-authored-by: Tong Liu <tongliu@nvidia.com>
---
 .github/copy-pr-bot.yaml                      |    2 +-
 .github/workflows/auto-update-copy-pr-bot.yml |    6 +-
 .github/workflows/cicd-main.yml               |    6 -
 .github/workflows/community-bot.yml           |    3 +-
 .../inference/gpt/gpt_dynamic_inference.py    |  238 ++--
 .../gpt/gpt_dynamic_inference_12b.sh          |   10 +-
 .../gpt/gpt_dynamic_inference_357m.sh         |   10 +-
 .../gpt_dynamic_inference_with_coordinator.py |  206 +++-
 examples/inference/gpt/utils.py               |   74 +-
 examples/post_training/modelopt/.gitignore    |    1 +
 examples/post_training/modelopt/ADVANCED.md   |   93 +-
 examples/post_training/modelopt/Dockerfile    |    2 +-
 examples/post_training/modelopt/README.md     |   97 +-
 .../{qwen => Qwen}/Qwen2.5-0.5B-Instruct.sh   |    0
 .../{qwen => Qwen}/Qwen2.5-7B-Instruct.sh     |    0
 .../conf/{qwen => Qwen}/Qwen3-0.6B.sh         |    0
 .../conf/{qwen => Qwen}/Qwen3-235B-A22B.sh    |    0
 .../conf/{qwen => Qwen}/Qwen3-30B-A3B.sh      |    0
 .../modelopt/conf/{qwen => Qwen}/Qwen3-8B.sh  |    0
 .../post_training/modelopt/conf/arguments.sh  |    3 +
 .../conf/moonshotai/kimi_k2_instruct.sh       |    7 +
 .../moonshotai/kimi_k2_instruct_export.sh     |   15 +
 .../nvidia/NVIDIA-Nemotron-Nano-9B-v2-Base.sh |   42 +-
 .../conf/nvidia/NVIDIA-Nemotron-Nano-9B-v2.sh |   41 +
 .../post_training/modelopt/convert_model.py   |   12 +-
 examples/post_training/modelopt/finetune.py   |    7 +-
 examples/post_training/modelopt/finetune.sh   |    3 +
 examples/post_training/modelopt/prune.py      |   38 +-
 examples/post_training/modelopt/prune.sh      |   36 +-
 .../modelopt/slurm/env_setup_template.sh      |    7 +
 .../post_training/modelopt/slurm/sbatch.sh    |   63 +
 examples/post_training/modelopt/validate.sh   |    8 +-
 gpt_builders.py                               |   10 +
 .../fsdp/src/megatron_fsdp/megatron_fsdp.py   |    7 +-
 .../core/fusions/fused_pad_routing_map.py     |    3 +-
 .../core/inference/communication_utils.py     |    3 +-
 .../attention_context/mamba_metadata.py       |   22 +-
 .../contexts/dynamic_block_allocator.py       |   86 +-
 .../inference/contexts/dynamic_context.py     |  643 ++++++-----
 .../data_parallel_inference_coordinator.py    |   70 +-
 megatron/core/inference/engines/__init__.py   |    2 +-
 .../core/inference/engines/dynamic_engine.py  |  883 +++++++++-----
 .../core/inference/engines/static_engine.py   |    9 +-
 megatron/core/inference/headers.py            |   27 +-
 megatron/core/inference/inference_client.py   |  102 +-
 megatron/core/inference/inference_request.py  |  237 +++-
 megatron/core/inference/sampling_params.py    |    2 +-
 .../text_generation_controller.py             |  435 ++++---
 megatron/core/inference/unified_memory.py     |   59 +-
 megatron/core/inference/utils.py              |   55 +
 megatron/core/models/backends.py              |   61 +
 megatron/core/models/gpt/gpt_layer_specs.py   |  102 +-
 megatron/core/models/gpt/moe_module_specs.py  |   10 +-
 .../core/models/mamba/mamba_layer_specs.py    |   16 +
 megatron/core/optimizer/__init__.py           |  307 +++--
 megatron/core/optimizer/muon.py               |   41 +-
 megatron/core/optimizer/optimizer.py          |    1 +
 megatron/core/optimizer/optimizer_config.py   |   75 +-
 megatron/core/optimizer_param_scheduler.py    |   31 +-
 megatron/core/parallel_state.py               |    1 +
 megatron/core/process_groups_config.py        |   17 +
 megatron/core/safe_globals.py                 |    2 +
 megatron/core/ssm/mamba_block.py              |   63 +-
 .../core/ssm/mamba_hybrid_layer_allocation.py |    7 +-
 megatron/core/ssm/mamba_layer.py              |    2 +
 megatron/core/ssm/mamba_mixer.py              |   32 +-
 .../core/tensor_parallel/inference_layers.py  |  151 +++
 .../text/libraries/huggingface_tokenizer.py   |   11 +-
 .../text/libraries/null_tokenizer.py          |    8 +
 .../core/tokenizers/text/text_tokenizer.py    |   16 +-
 megatron/core/transformer/attention.py        |   37 +-
 megatron/core/transformer/cuda_graphs.py      |   17 +
 .../transformer/fsdp_dtensor_checkpoint.py    |    2 +-
 .../core/transformer/moe/token_dispatcher.py  |    3 +-
 .../core/transformer/transformer_config.py    |   10 +
 megatron/core/utils.py                        |  113 +-
 .../legacy/data/biencoder_dataset_utils.py    |   11 +-
 megatron/legacy/data/vit_dataset.py           |   14 +-
 megatron/post_training/algos/__init__.py      |    1 -
 megatron/post_training/algos/distillation.py  |  601 ----------
 megatron/post_training/checkpointing.py       |    9 +-
 megatron/post_training/docs/distillation.md   |    2 +-
 megatron/post_training/generate.py            |    6 +-
 megatron/post_training/loss_func.py           |    6 +-
 megatron/post_training/model_builder.py       |   18 +-
 megatron/post_training/non_loss_data_func.py  |   19 +-
 megatron/post_training/utils.py               |    3 +-
 megatron/rl/inference/megatron.py             |  102 +-
 megatron/rl/rl_utils.py                       |   85 +-
 megatron/training/arguments.py                |   86 +-
 megatron/training/checkpointing.py            |   17 +-
 megatron/training/datasets/README.md          |   34 +
 .../datasets}/data_samplers.py                |  168 ++-
 megatron/training/datasets/fim_dataset.py     |  308 +++++
 megatron/training/dist_signal_handler.py      |   10 +-
 megatron/training/global_vars.py              |    9 +-
 megatron/training/training.py                 |  162 +--
 pretrain_gpt.py                               |   66 +-
 .../golden_values_dev_dgx_h100.json           |  287 +++++
 .../model_config.yaml                         |   56 +
 .../golden_values_dev_dgx_h100.json           |  361 +++---
 .../model_config.yaml                         |    2 -
 .../golden_values_dev_dgx_h100.json           |  361 +++---
 .../model_config.yaml                         |    2 -
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_h100.json           | 1028 +++++++++++++++++
 .../model_config.yaml                         |   59 +
 .../golden_values_dev_dgx_h100.json           |  158 +++
 .../model_config.yaml                         |   58 +
 .../golden_values_dev_dgx_h100.json           |  158 +++
 .../model_config.yaml                         |   58 +
 .../golden_values_dev_dgx_h100.json           |    4 +-
 .../model_config.yaml                         |    5 +-
 .../golden_values_dev_dgx_h100.json           |  314 ++---
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_h100.json           |  135 +++
 .../model_config.yaml                         |   72 ++
 .../golden_values_dev_dgx_h100.json           |    2 +-
 .../model_config.yaml                         |    1 +
 .../model_config.yaml                         |    1 +
 .../python_scripts/auto_reminder_github.py    |   29 +-
 ...pt-dynamic-inference-with-coordinator.yaml |   17 +-
 tests/test_utils/recipes/gpt.yaml             |    5 +
 .../recipes/mamba-dynamic-inference.yaml      |   61 +
 tests/unit_tests/data/test_fim_dataset.py     |   87 ++
 .../contexts/test_dynamic_context.py          |  251 ++--
 .../inference/engines/test_dynamic_engine.py  |  398 ++++---
 .../inference/engines/test_static_engine.py   |   17 +-
 ...est_data_parallel_inference_coordinator.py |  471 ++++++++
 .../inference/test_wandb_logging.py           |   26 +-
 .../test_simple_text_generation_controller.py |   96 +-
 tests/unit_tests/test_checkpointing.py        |   45 +-
 .../unit_tests/test_process_groups_config.py  |   23 +
 tests/unit_tests/test_rl_utils.py             |  656 +++++++++++
 .../transformer/moe/test_token_dispatcher.py  |    5 +-
 tools/run_inference_performance_test.py       |   16 +-
 train_rl.py                                   |    2 +-
 137 files changed, 8494 insertions(+), 3403 deletions(-)
 create mode 100644 examples/post_training/modelopt/.gitignore
 rename examples/post_training/modelopt/conf/{qwen => Qwen}/Qwen2.5-0.5B-Instruct.sh (100%)
 rename examples/post_training/modelopt/conf/{qwen => Qwen}/Qwen2.5-7B-Instruct.sh (100%)
 rename examples/post_training/modelopt/conf/{qwen => Qwen}/Qwen3-0.6B.sh (100%)
 rename examples/post_training/modelopt/conf/{qwen => Qwen}/Qwen3-235B-A22B.sh (100%)
 rename examples/post_training/modelopt/conf/{qwen => Qwen}/Qwen3-30B-A3B.sh (100%)
 rename examples/post_training/modelopt/conf/{qwen => Qwen}/Qwen3-8B.sh (100%)
 create mode 100644 examples/post_training/modelopt/conf/moonshotai/kimi_k2_instruct.sh
 create mode 100644 examples/post_training/modelopt/conf/moonshotai/kimi_k2_instruct_export.sh
 mode change 100644 => 120000 examples/post_training/modelopt/conf/nvidia/NVIDIA-Nemotron-Nano-9B-v2-Base.sh
 create mode 100644 examples/post_training/modelopt/conf/nvidia/NVIDIA-Nemotron-Nano-9B-v2.sh
 create mode 100644 examples/post_training/modelopt/slurm/env_setup_template.sh
 create mode 100644 examples/post_training/modelopt/slurm/sbatch.sh
 create mode 100644 megatron/core/tensor_parallel/inference_layers.py
 delete mode 100644 megatron/post_training/algos/__init__.py
 delete mode 100644 megatron/post_training/algos/distillation.py
 create mode 100644 megatron/training/datasets/README.md
 rename megatron/{legacy/data => training/datasets}/data_samplers.py (56%)
 create mode 100644 megatron/training/datasets/fim_dataset.py
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_fim_dataset/golden_values_dev_dgx_h100.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_fim_dataset/model_config.yaml
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_dp8_583m_throughputtest_zmq/golden_values_dev_dgx_h100.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_dp8_583m_throughputtest_zmq/model_config.yaml
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp8_dp1_583m_logitsmatch_zmq/golden_values_dev_dgx_h100.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp8_dp1_583m_logitsmatch_zmq/model_config.yaml
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp2_pp2_dp2_583m_logitsmatch_zmq/golden_values_dev_dgx_h100.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp2_pp2_dp2_583m_logitsmatch_zmq/model_config.yaml
 create mode 100644 tests/functional_tests/test_cases/hybrid/hybrid_dynamic_inference_tp1_pp1_dp8_583m/golden_values_dev_dgx_h100.json
 create mode 100644 tests/functional_tests/test_cases/hybrid/hybrid_dynamic_inference_tp1_pp1_dp8_583m/model_config.yaml
 create mode 100644 tests/test_utils/recipes/mamba-dynamic-inference.yaml
 create mode 100644 tests/unit_tests/data/test_fim_dataset.py
 create mode 100644 tests/unit_tests/inference/test_data_parallel_inference_coordinator.py
 create mode 100644 tests/unit_tests/test_rl_utils.py

diff --git a/.github/copy-pr-bot.yaml b/.github/copy-pr-bot.yaml
index 7013df60dc2..8e703301ca7 100644
--- a/.github/copy-pr-bot.yaml
+++ b/.github/copy-pr-bot.yaml
@@ -1,4 +1,4 @@
 enabled: true
 auto_sync_draft: false
 auto_sync_ready: true
-trustees_override: ["AAnoosheh", "ArEsKay3", "Autumn1998", "BestJuly", "BoxiangW", "ChenhanYu", "FDecaYed", "HaochenYuan", "ISEEKYAN", "JRD971000", "QiZhangNV", "ShriyaRishab", "Victarry", "Wohox", "ZhiyuLi-Nvidia", "aklife97", "ananthsub", "asolergi-nv", "buptzyb", "chtruong814", "cspades", "cuichenx", "deepakn94", "dimapihtar", "duncanriach", "erhoo82", "ericharper", "fanshiqing", "gautham-kollu", "hxbai", "jaredcasper", "jiemingz", "jkamalu", "jon-barker", "kanz-nv", "kevalmorabia97", "ko3n1g", "kunlunl", "kvareddy", "layalir", "lhb8125", "lmcafee-nvidia", "maanug-nv", "mathemakitten", "matthieule", "mehraakash", "mkhona-nvidia", "pablo-garay", "parthmannan", "pthombre", "rogerwaleffe", "sanandaraj5597", "santhnm2", "sbak5", "shanmugamr1992", "shifangx", "shjwudp", "sidsingh-nvidia", "skyw", "tdene", "theothermike", "thomasdhc", "trintamaki", "tylerpoon", "wdykas", "xiaoyao0115", "xuwchen", "yanring", "yaox12", "yaoyu-33", "yashaswikarnati", "yobibyte", "youngeunkwon0405", "yuzhongw-nvidia", "zhongbozhu"]
+trustees_override: ["AAnoosheh", "ArEsKay3", "Autumn1998", "BestJuly", "BoxiangW", "ChenhanYu", "FDecaYed", "HaochenYuan", "ISEEKYAN", "JRD971000", "QiZhangNV", "ShriyaRishab", "Victarry", "Wohox", "ZhiyuLi-Nvidia", "aklife97", "ananthsub", "asolergi-nv", "buptzyb", "chtruong814", "cspades", "cuichenx", "deepakn94", "dimapihtar", "duncanriach", "erhoo82", "ericharper", "fanshiqing", "gautham-kollu", "guyueh1", "hxbai", "jaredcasper", "jiemingz", "jkamalu", "jon-barker", "kanz-nv", "kevalmorabia97", "ko3n1g", "kunlunl", "kvareddy", "layalir", "lhb8125", "lmcafee-nvidia", "maanug-nv", "mathemakitten", "matthieule", "mehraakash", "mkhona-nvidia", "pablo-garay", "parthmannan", "pthombre", "rogerwaleffe", "sanandaraj5597", "santhnm2", "sbak5", "shanmugamr1992", "shifangx", "shjwudp", "sidsingh-nvidia", "skyw", "tdene", "theothermike", "thomasdhc", "trintamaki", "tylerpoon", "wdykas", "xiaoyao0115", "xuwchen", "yanring", "yaox12", "yaoyu-33", "yashaswikarnati", "yeyu-nvidia", "yobibyte", "youngeunkwon0405", "yuzhongw-nvidia", "zhongbozhu"]
diff --git a/.github/workflows/auto-update-copy-pr-bot.yml b/.github/workflows/auto-update-copy-pr-bot.yml
index 969c46e3fdd..b04d34251f0 100644
--- a/.github/workflows/auto-update-copy-pr-bot.yml
+++ b/.github/workflows/auto-update-copy-pr-bot.yml
@@ -48,8 +48,10 @@ jobs:
           mv .github/copy-pr-bot.yaml.new .github/copy-pr-bot.yaml
 
       - name: Commit changes
+        env:
+          GH_TOKEN: ${{ secrets.PAT }}
         run: |
-          git remote set-url origin https://x-access-token:${{ secrets.PAT }}@github.com/NVIDIA/Megatron-LM.git
+          git remote set-url origin https://x-access-token:${GH_TOKEN}@github.com/NVIDIA/Megatron-LM.git
           git config --global user.name "GitHub Actions"
           git config --global user.email "github-actions[bot]@users.noreply.github.com"
           git add .github/copy-pr-bot.yaml
@@ -58,4 +60,4 @@ jobs:
             exit 0
           fi
           git commit -m "Update copy-pr-bot.yaml [skip ci]"
-          git push
+          git push -u origin main
diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml
index 4a4a1a2cad1..b05b6c55b84 100644
--- a/.github/workflows/cicd-main.yml
+++ b/.github/workflows/cicd-main.yml
@@ -40,7 +40,6 @@ env:
 jobs:
   is-not-external-contributor:
     runs-on: ubuntu-latest
-    environment: nemo-ci
     if: github.repository == 'NVIDIA/Megatron-LM'
     outputs:
       is_external_contributor: ${{ github.event.pull_request.user.type == 'User' }}
@@ -215,7 +214,6 @@ jobs:
   cicd-container-build:
     needs: [pre-flight, cicd-wait-in-queue]
     runs-on: nvidia-ci-aws-gpu-x8
-    environment: nemo-ci
     if: |
       (
         success()
@@ -370,7 +368,6 @@ jobs:
       - cicd-parse-unit-tests
     runs-on: nvidia-ci-aws-gpu-x8
     name: "${{ matrix.bucket }} - latest"
-    environment: nemo-ci
     if: |
       (
         success()
@@ -403,7 +400,6 @@ jobs:
       - cicd-wait-in-queue
       - cicd-container-build
       - cicd-unit-tests-latest
-    environment: nemo-ci
     if: |
       (
         success()
@@ -484,7 +480,6 @@ jobs:
       - cicd-unit-tests-latest
     runs-on: nvidia-ci-aws-gpu-x8
     name: "${{ matrix.model }}/${{ matrix.test_case }} - latest"
-    environment: nemo-ci
     env:
       PIP_DISABLE_PIP_VERSION_CHECK: 1
       PIP_NO_PYTHON_VERSION_WARNING: 1
@@ -565,7 +560,6 @@ jobs:
       && needs.pre-flight.outputs.is_ci_workload == 'false'
       && !cancelled()
       && github.repository == 'NVIDIA/Megatron-LM'
-    environment: nemo-ci
     steps:
       - name: Generate fake coverage report
         uses: actions/github-script@v6
diff --git a/.github/workflows/community-bot.yml b/.github/workflows/community-bot.yml
index 3b102894e1f..1a98ece0f85 100644
--- a/.github/workflows/community-bot.yml
+++ b/.github/workflows/community-bot.yml
@@ -22,7 +22,8 @@ on:
 jobs:
   community-bot:
     uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_community_bot.yml@v0.65.10
+    with:
+      community_project_id: ${{ vars.COMMUNITY_PROJECT_ID }}
     if: github.repository == 'NVIDIA/Megatron-LM'
     secrets:
       GH_TOKEN: ${{ secrets.PAT }}
-      environment: main
diff --git a/examples/inference/gpt/gpt_dynamic_inference.py b/examples/inference/gpt/gpt_dynamic_inference.py
index 251aa100cba..1a537870020 100644
--- a/examples/inference/gpt/gpt_dynamic_inference.py
+++ b/examples/inference/gpt/gpt_dynamic_inference.py
@@ -1,6 +1,7 @@
 # Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 import hashlib
+import io
 import json
 import math
 import os
@@ -13,14 +14,26 @@
 from tqdm import tqdm
 from typing import Dict, List, Tuple, Optional
 
-import torch
-from tqdm import tqdm
+sys.path.append(
+    os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir, os.path.pardir))
+)
 
+import megatron
+from examples.inference.gpt.utils import (
+    Request,
+    add_common_inference_args,
+    build_dynamic_engine_setup_prefix,
+    build_requests,
+    get_curr_time,
+)
 from megatron.core.inference.contexts.dynamic_context import (
     ContextOverflowError,
     DynamicInferenceContext,
 )
-from megatron.core.inference.engines import DynamicInferenceEngine
+from megatron.core.inference.contexts.attention_context.mamba_metadata import (
+    MambaInferenceStateConfig,
+)
+from megatron.core.inference.engines import DynamicInferenceEngine, EngineSuspendedError
 from megatron.core.inference.model_inference_wrappers.gpt.gpt_inference_wrapper import (
     GPTInferenceWrapper,
 )
@@ -28,10 +41,9 @@
 from megatron.core.inference.text_generation_controllers.text_generation_controller import (
     TextGenerationController,
 )
-from megatron.core.ssm.mamba_hybrid_layer_allocation import Symbols
 from megatron.core.tokenizers.text.utils.build_tokenizer import build_tokenizer
 from megatron.core.transformer.module import MegatronModule
-from megatron.core.utils import get_attr_wrapped_model
+from megatron.core.utils import get_mamba_inference_state_config_from_model
 
 sys.path.append(
     os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir, os.path.pardir))
@@ -53,14 +65,14 @@
     build_requests,
     get_curr_time,
 )
-from megatron.training import get_args
-from megatron.training import get_model as _get_model
-from megatron.training import get_tokenizer, initialize_megatron
 from megatron.training.checkpointing import load_checkpoint
 
-import torch
-import io
-import megatron
+from model_provider import model_provider
+from gpt_builders import gpt_builder
+
+torch.serialization.add_safe_globals([io.BytesIO])
+torch.serialization.add_safe_globals([megatron.core.rerun_state_machine.RerunState])
+torch.serialization.add_safe_globals([megatron.core.rerun_state_machine.RerunDiagnostic])
 
 
 def add_dynamic_inference_args(parser: ArgumentParser) -> ArgumentParser:
@@ -76,9 +88,24 @@ def add_dynamic_inference_args(parser: ArgumentParser) -> ArgumentParser:
     )
     group.add_argument(
         "--termination-id", type=int, default=None,
-        help="Termination ID that overrides `tokenizer.eod`."
+        help="Termination ID that overrides `tokenizer.eod`.",
+    )
+    group.add_argument(
+        "--suspend-resume-interval", type=int, default=None,
+        help="Suspend and resume the dynamic engine every "
+        "`suspend_resume_interval` steps. This is used to tet the suspend/resume "
+        "system.",
+    )
+    group.add_argument(
+        "--inference-repeat-n", type=int, default=1,
+        help="Repeat inference iterations N times for benchmarking."
+    )
+    group.add_argument(
+        "--throughput-check-only",
+        action='store_true',
+        default=False,
+        help="If true, only run throughput check without verifying outputs."
     )
-    group.add_argument('--inference-repeat-n', type=int, default=1, help="Repeat inference iterations N times for benchmarking.")
 
     return parser
 
@@ -125,13 +152,12 @@ def get_inference_context(
     requests: List[Request],
     sampling_params: Optional[SamplingParams] = None,
     calculate_max_sequence_length_from_requests: bool = True,
-    layer_type_list: Optional[List[str]] = None,
-    mamba_conv_states_shape: Optional[Tuple[int]] = None,
-    mamba_ssm_states_shape: Optional[Tuple[int]] = None,
+    mamba_inference_state_config: Optional[MambaInferenceStateConfig] = None,
 ):
     """The inference context manages the KV cache and other inference state."""
 
     args = get_args()
+
     # Max sequence length.
     if calculate_max_sequence_length_from_requests:
         max_gen_length = sampling_params.num_tokens_to_generate    
@@ -147,7 +173,7 @@ def get_inference_context(
     # Inference context.
     context = DynamicInferenceContext(
         params_dtype=args.params_dtype,
-        num_layers=args.num_layers,
+        num_layers=args.num_layers // args.pipeline_model_parallel_size,
         kv_channels=args.kv_channels,
         num_attention_heads=(
             args.num_query_groups if args.group_query_attention else args.num_attention_heads
@@ -160,15 +186,10 @@ def get_inference_context(
         ),
         block_size_tokens=args.inference_dynamic_batching_block_size,
         buffer_size_gb=args.inference_dynamic_batching_buffer_size_gb,
-        buffer_guaranteed_fraction=args.inference_dynamic_batching_buffer_guaranteed_fraction,
-        buffer_overflow_factor=args.inference_dynamic_batching_buffer_overflow_factor,
-        max_requests_override=args.inference_dynamic_batching_max_requests_override,
-        max_tokens_override=args.inference_dynamic_batching_max_tokens_override,
+        max_tokens=args.inference_dynamic_batching_max_tokens,
         tensor_model_parallel_size=args.tensor_model_parallel_size,
         materialize_only_last_token_logits=not args.return_log_probs,
-        layer_type_list=layer_type_list,
-        mamba_conv_states_shape=mamba_conv_states_shape,
-        mamba_ssm_states_shape=mamba_ssm_states_shape,
+        mamba_inference_state_config=mamba_inference_state_config,
         cache_mla_latent=args.multi_latent_attention and args.cache_mla_latents,
         kv_lora_rank=args.kv_lora_rank if args.multi_latent_attention else None,
         qk_pos_emb_head_dim=args.qk_pos_emb_head_dim,
@@ -250,12 +271,12 @@ def run_inference(
     num_requests_total = len(requests)
     num_requests_added = 0
     num_requests_finished = 0
-    step_id = 0
     step_times = {"prefill": [], "decode": []}
     add_times = []
     output_times = []
     tbar = tqdm(total=num_requests_total)
     total_output_tokens = 0
+    attempted_step_count = 0
     if args.cuda_graph_impl == "local":
         cuda_graph_request_count_map = {r:0 for r in engine.context.cuda_graph_request_counts}
     else:
@@ -298,10 +319,37 @@ def _add_request():
 
         # Step inference engine (i.e., generate a token for each active request).
         # Before step, we haven't done the scheduling, so we cannot know the is_decode_only
-        result = engine.step_modern(verbose=True)
+        try:
+            result = engine.step_modern(verbose=True)
+        except EngineSuspendedError as e:
+            result = e
+            pass # ignore error in order to call 'engine.resume()' below.
+        attempted_step_count += 1
+
         # After step, we lost track of last iteration's is_decode_only, so we need to get it from the engine
         is_decode_only = engine.is_decode_only 
-        step_id += 1
+
+        # Test suspending and resuming engine.
+        if args.suspend_resume_interval is not None:
+
+            # Suspend.
+            if attempted_step_count % args.suspend_resume_interval == 0:
+                print("**** step %d/%d ... suspend." % (engine.step_count, attempted_step_count))
+                engine.suspend()
+
+            # Resume, 0+ attempted steps later.
+            if (
+                attempted_step_count > 0
+                and
+                (attempted_step_count - args.suspend_resume_interval // 2)
+                    % args.suspend_resume_interval == 0
+            ):
+                print("**** step %d/%d ... resume." % (engine.step_count, attempted_step_count))
+                engine.resume()
+
+        # If engine suspended, continue to next iter.
+        if isinstance(result, EngineSuspendedError):
+            continue
 
         # Record cuda_graph_request_count.
         cuda_graph_request_count = result["cuda_graph_request_count"]
@@ -309,10 +357,10 @@ def _add_request():
             cuda_graph_request_count_map[cuda_graph_request_count] += 1
 
         # Update requests.
-        active_requests = result["active_requests"]
-        finished_requests = result["finished_requests"]
+        active_request_ids = result["active_request_ids"]
+        finished_request_records = result["finished_request_records"]
         step_time = result["step_time"]
-        if len(active_requests) > 0 or len(finished_requests) > 0:
+        if len(active_request_ids) > 0 or len(finished_request_records) > 0:
             if is_decode_only:
                 step_times["decode"].append(step_time)
             else:
@@ -320,14 +368,26 @@ def _add_request():
 
             # Append output tokens.
             output_start = get_curr_time()
-            for finished_request in finished_requests:
+            for finished_request_record in finished_request_records:
+
+                finished_request = finished_request_record.merge(engine.controller.tokenizer)
+
+                # Update local request object.
                 request = requests[finished_request.request_id]
-                request.output_tokens = finished_request.generated_tokens
-                total_output_tokens += len(request.output_tokens)
                 request.time_end = get_curr_time()
-                request.output_text = finished_request.generated_text
                 request.state = "finished"
                 request.request_id = finished_request.request_id
+
+                # Update prompt, in case engine has been suspended and resumed.
+                request.prompt_tokens = finished_request.prompt_tokens
+                request.prompt_text = finished_request.prompt
+
+                # Get output tokens and text.
+                request.output_tokens = finished_request.generated_tokens
+                request.output_text = finished_request.generated_text
+                total_output_tokens += len(request.output_tokens)
+
+                # Log probs.
                 if finished_request.sampling_params.return_log_probs:
                     request.log_probs = (
                         finished_request.prompt_log_probs + finished_request.generated_log_probs
@@ -381,23 +441,14 @@ def main():
 
     model = get_model()
 
-    # Layer type list for hybrid models
-    decoder = get_attr_wrapped_model(model, "decoder")
-    layer_type_list = getattr(decoder, "layer_type_list", None)
-    if layer_type_list is not None and Symbols.MAMBA in layer_type_list:
-        (mamba_conv_states_shape, mamba_ssm_states_shape) = decoder.mamba_state_shapes_per_request()
-    else:
-        mamba_conv_states_shape = None
-        mamba_ssm_states_shape = None
+    mamba_inference_state_config = get_mamba_inference_state_config_from_model(model)
 
     # Requests, context, controller.
     requests = build_requests(args, tokenizer, sampling_params)
     context = get_inference_context(
         requests,
         sampling_params,
-        layer_type_list=layer_type_list,
-        mamba_conv_states_shape=mamba_conv_states_shape,
-        mamba_ssm_states_shape=mamba_ssm_states_shape,
+        mamba_inference_state_config=mamba_inference_state_config,
     )
     controller = get_inference_controller(model, context)
 
@@ -463,7 +514,9 @@ def escape_str(s):
             unique_prompt_map[request.prompt_text].append(request_idx)
 
         # Print unique prompts + outputs.
+        text_hashes = []
         for unique_idx, (prompt_text, request_idxs) in enumerate(unique_prompt_map.items()):
+
             # ---- Prompt summary line ----
             prompt_len = len(requests[request_idxs[0]].prompt_tokens)
             escaped_prompt_text = escape_str(prompt_text)
@@ -478,15 +531,20 @@ def escape_str(s):
             # ---- Print each unique output ----
             for output_text, output_request_idxs in output_map.items():
                 if output_text is not None:
-                    o_hash = hashlib.sha256(output_text.encode()).hexdigest()[:6]
+                    # Use hash of prompt + generated text in case engine was
+                    # suspended and resumed, which misaligns boundary between
+                    # prompt and generated tokens.
+                    o_hash = hashlib.sha256(
+                        (prompt_text + output_text).encode()
+                    ).hexdigest()[:6]
                     o_len = len(requests[output_request_idxs[0]].output_tokens)
                     escaped_output_text = escape_str(output_text)
-                    print(f"  >>>> [n {len(output_request_idxs)}, l {o_len}, hash {o_hash}] {escaped_output_text}")
                 else:
                     o_hash = "--"
                     o_len = 0
                     escaped_output_text = "--"
-                    print(f"  >>>> [n {len(output_request_idxs)}, {o_len} tokens, hash {o_hash}] {escaped_output_text}")
+                print(f"  >>>> [n {len(output_request_idxs)}, {o_len} tokens, hash {o_hash}] {escaped_output_text}")
+                text_hashes.append(o_hash)
 
         # Write results to JSON. Primarily used for functional testing.
         if args.output_path:
@@ -514,47 +572,49 @@ def escape_str(s):
             with open(args.output_path, "w") as fp:
                 json.dump(json_results, fp, indent=1)
 
-    # Timing results.
-    print("~~~")
-    peak_alloc_gb = stats["allocated_bytes.all.peak"] / 1024**3
-    peak_resvd_gb = stats["reserved_bytes.all.peak"] / 1024**3
-
-    p_times = step_times["prefill"]
-    d_times = step_times["decode"]
-
-    p_total = sum(p_times)
-    d_total = sum(d_times)
-
-    p_count = len(p_times)
-    d_count = len(d_times)
-
-    p_mean = p_total / p_count
-    d_mean = d_total / d_count
-
-    # Commented out for now as the step/add/output times are not calculated correctly.
-    # print(
-    #     f"{setup_prefix} … "
-    #     f"mem {peak_alloc_gb:.1f}/{peak_resvd_gb:.1f} GB … "
-    #     f"total time: {step_total:.3f}s … "
-    #     f"step time: total {step_total:.3f}s "
-    #     f"[ p {p_total:.3f}s, d {d_total:.3f}s ], "
-    #     f"mean [ p {p_mean:.3f}s, d {d_mean:.3f}s ], "
-    #     f"count [ p {p_count}, d {d_count} ]."
-    # )
-    capture_str = (
-        f"{engine.capture_stats['time']:.2f} sec"
-        if engine.capture_stats else
-        "--"
-    )
-    print(
-        f"{setup_prefix} … "
-        f"capture {capture_str} … "
-        f"mem {peak_alloc_gb:.1f}/{peak_resvd_gb:.1f} GB … "
-        f"total time: {total_time:.3f}s … "
-        f"steps: {engine.step_count:d} … "
-        f"throughput: {throughput:.3f} tok/s"
-    )
-    print("~~~")
+        # Timing results.
+        stats = torch.cuda.memory_stats()
+        throughput = total_output_tokens / total_time
+        print("~~~")
+        peak_alloc_gb = stats["allocated_bytes.all.peak"] / 1024**3
+        peak_resvd_gb = stats["reserved_bytes.all.peak"] / 1024**3
+
+        p_times = step_times["prefill"]
+        d_times = step_times["decode"]
+
+        p_total = sum(p_times)
+        d_total = sum(d_times)
+
+        p_count = len(p_times)
+        d_count = len(d_times)
+
+        p_mean = p_total / p_count
+        d_mean = d_total / d_count if d_count != 0 else 0.
+
+        # Commented out for now as the step/add/output times are not calculated correctly.
+        # print(
+        #     f"{setup_prefix} … "
+        #     f"mem {peak_alloc_gb:.1f}/{peak_resvd_gb:.1f} GB … "
+        #     f"total time: {step_total:.3f}s … "
+        #     f"step time: total {step_total:.3f}s "
+        #     f"[ p {p_total:.3f}s, d {d_total:.3f}s ], "
+        #     f"mean [ p {p_mean:.3f}s, d {d_mean:.3f}s ], "
+        #     f"count [ p {p_count}, d {d_count} ]."
+        # )
+        capture_str = (
+            f"{engine.capture_stats['time']:.2f} sec"
+            if engine.capture_stats else
+            "--"
+        )
+        print(
+            f"{setup_prefix} … "
+            f"throughput: {throughput:.3f} tok/s",
+            f"total time: {total_time:.3f}s … "
+            f"mem {peak_alloc_gb:.1f}/{peak_resvd_gb:.1f} GB … "
+            f"steps: {engine.step_count:d} … "
+            f"capture {capture_str} … "
+        )
+        print("~~~")
 
     # Stop Nsight profiler.
     if os.environ.get("NSIGHT_PREFIX"):
diff --git a/examples/inference/gpt/gpt_dynamic_inference_12b.sh b/examples/inference/gpt/gpt_dynamic_inference_12b.sh
index a16fe5176d5..20f1a29cb5b 100644
--- a/examples/inference/gpt/gpt_dynamic_inference_12b.sh
+++ b/examples/inference/gpt/gpt_dynamic_inference_12b.sh
@@ -24,13 +24,9 @@ export CUDA_DEVICE_MAX_CONNECTIONS=1
 
 # Dynamic context.
 : ${BUFFER_SIZE_GB=50.}
-: ${BUFFER_OVERFLOW_FACTOR=1.}
-: ${BUFFER_GUARANTEED_FRACTION=0.05}
 
 # Cuda graphs.
-: ${CUDA_GRAPH_IMPL=local}
 : ${NUM_CUDA_GRAPHS=16}
-: ${CUDA_GRAPH_SHARE_IO_BUFFERS=1}
 
 # Miscellaneous.
 : ${USE_COORDINATOR=0}
@@ -79,8 +75,6 @@ ARGS=" \
     \
     --inference-dynamic-batching \
     --inference-dynamic-batching-buffer-size-gb ${BUFFER_SIZE_GB} \
-    --inference-dynamic-batching-buffer-overflow-factor ${BUFFER_OVERFLOW_FACTOR} \
-    --inference-dynamic-batching-buffer-guaranteed-fraction ${BUFFER_GUARANTEED_FRACTION} \
     \
     ${EXTRA_ARGS} \
 "
@@ -91,6 +85,10 @@ if [ "${NUM_CUDA_GRAPHS}" != "0" ]; then
         --cuda-graph-impl local \
         --inference-dynamic-batching-num-cuda-graphs ${NUM_CUDA_GRAPHS} \
     "
+else
+    ARGS+=" \
+        --cuda-graph-impl none \
+    "
 fi
 
 # Prompts.
diff --git a/examples/inference/gpt/gpt_dynamic_inference_357m.sh b/examples/inference/gpt/gpt_dynamic_inference_357m.sh
index c095371714f..215cc2bac8f 100644
--- a/examples/inference/gpt/gpt_dynamic_inference_357m.sh
+++ b/examples/inference/gpt/gpt_dynamic_inference_357m.sh
@@ -25,13 +25,9 @@ export CUDA_DEVICE_MAX_CONNECTIONS=1
 
 # Dynamic context.
 : ${BUFFER_SIZE_GB=50.}
-: ${BUFFER_OVERFLOW_FACTOR=1.}
-: ${BUFFER_GUARANTEED_FRACTION=0.05}
 
 # Cuda graphs.
-: ${CUDA_GRAPH_IMPL=local}
 : ${NUM_CUDA_GRAPHS=16}
-: ${CUDA_GRAPH_SHARE_IO_BUFFERS=1}
 
 # Miscellaneous.
 : ${USE_COORDINATOR=0}
@@ -65,8 +61,6 @@ ARGS=" \
     \
     --inference-dynamic-batching \
     --inference-dynamic-batching-buffer-size-gb ${BUFFER_SIZE_GB} \
-    --inference-dynamic-batching-buffer-overflow-factor ${BUFFER_OVERFLOW_FACTOR} \
-    --inference-dynamic-batching-buffer-guaranteed-fraction ${BUFFER_GUARANTEED_FRACTION} \
     \
     ${EXTRA_ARGS} \
 "
@@ -77,6 +71,10 @@ if [ "${NUM_CUDA_GRAPHS}" != "0" ]; then
         --cuda-graph-impl local \
         --inference-dynamic-batching-num-cuda-graphs ${NUM_CUDA_GRAPHS} \
     "
+else
+    ARGS+=" \
+        --cuda-graph-impl none \
+    "
 fi
 
 # Prompts.
diff --git a/examples/inference/gpt/gpt_dynamic_inference_with_coordinator.py b/examples/inference/gpt/gpt_dynamic_inference_with_coordinator.py
index 9e2b6bfa983..7869002fff3 100644
--- a/examples/inference/gpt/gpt_dynamic_inference_with_coordinator.py
+++ b/examples/inference/gpt/gpt_dynamic_inference_with_coordinator.py
@@ -1,26 +1,41 @@
 # Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
-from megatron.core.inference.inference_client import InferenceClient
-from examples.inference.gpt.utils import add_common_inference_args
 import asyncio
-import torch.distributed as dist
-from examples.inference.gpt.gpt_dynamic_inference import get_model, get_inference_context, get_inference_controller, add_dynamic_inference_args
-from megatron.core.inference.inference_request import DynamicInferenceRequest
-from megatron.training import initialize_megatron
-import torch
-import os 
-from megatron.training import get_args, get_tokenizer 
-from megatron.core.inference.sampling_params import SamplingParams
-from examples.inference.gpt.utils import build_requests, build_dynamic_engine_setup_prefix, Request
-from megatron.core.inference.engines import DynamicInferenceEngine
+import json
+import os
 import time
+import torch
+import torch.distributed as dist
+from collections import defaultdict
 from tqdm import tqdm
 from typing import List
-import json
-from megatron.training.arguments import parse_args
+import warnings
+import logging
+
+from examples.inference.gpt.gpt_dynamic_inference import (
+    add_dynamic_inference_args,
+    get_inference_context,
+    get_inference_controller,
+    get_model,
+)
+from examples.inference.gpt.utils import (
+    Request, 
+    build_dynamic_engine_setup_prefix, 
+    build_requests,
+    add_common_inference_args
+)
+
 from megatron.core import parallel_state
+from megatron.core.inference.engines import DynamicInferenceEngine
+from megatron.core.inference.inference_client import InferenceClient
+from megatron.core.inference.inference_request import DynamicInferenceRequestRecord
+from megatron.core.inference.sampling_params import SamplingParams
+from megatron.core.utils import get_mamba_inference_state_config_from_model
 
-import logging
+from megatron.training import get_args, get_tokenizer, initialize_megatron
+from megatron.training.arguments import parse_args
+
+# pylint: disable=line-too-long
 
 logging.basicConfig(level=logging.INFO, force=True)
 
@@ -38,19 +53,45 @@ async def main(
         )
     # once you call engine.start_listening_to_data_parallel_coordinator,
     # the engine will start accepting requests from the data parallel coordinator.
-    # and processing them in an asyncio coroutine. 
-    await engine.start_listening_to_data_parallel_coordinator( 
-        inference_coordinator_port=port, launch_inference_coordinator=True
+    # and processing them in an asyncio coroutine.
+    
+    await engine.start_listening_to_data_parallel_coordinator(
+        inference_coordinator_port=port,
+        launch_inference_coordinator=True,
+        verbose=True,
     )
-    # if you want to use your own inference coordinator - 
+
+    # if you want to use your own inference coordinator -
     # 1. set launch_inference_coordinator to False
     # 2. setup a router socket at tcp://MASTER_ADDR:PORT
     # 3. wait for data parallel groups to establish connection (BasicInferenceCoordinator.__init__)
     # 4. look at InferenceCoordinator.start() to see how we can route requests from users <-> data parallel groups
-    #   based on headers. 
-    # 5. look at InferenceClient to see how we create requests with headers. 
-    if dist.get_rank() == 0: 
-        client = InferenceClient(port) # submits requests to the inference coordinator
+    #   based on headers.
+    # 5. look at InferenceClient to see how we create requests with headers.
+
+    args = get_args()
+
+    # Test suspend/resume intervals.
+    if args.suspend_resume_interval is not None:
+        # Since the client doesn't directly call engine.async_step here, we test
+        # the suspend-resume system ~4 times.
+        suspend_resume_interval = max(1, len(requests) // 4)
+        suspend_idxs = set(range(
+            suspend_resume_interval,
+            len(requests) + 1,
+            suspend_resume_interval,
+        ))
+        resume_idxs = set(
+            min(len(requests), i + suspend_resume_interval // 2)
+            for i in suspend_idxs
+        )
+    else:
+        suspend_idxs = set()
+        resume_idxs = set()
+
+    # Create client and run example.
+    if dist.get_rank() == 0:
+        client = InferenceClient(port)  # submits requests to the inference coordinator
         await client.start()
         base_arrival_time = time.time_ns() / 10**9
         for request in requests:
@@ -58,61 +99,104 @@ async def main(
         futures = []
         num_requests_total = len(requests)
         num_requests_added = 0
-        #tbar = tqdm(total=num_requests_total)
+        
         while True:
             current_time = time.time_ns() / 10**9
-            # Only add requests that have arrived at the current time.
-            while num_requests_added < num_requests_total and requests[num_requests_added].time_arrival <= current_time:
-                request = requests[num_requests_added]
-                # These add-request calls will queue up the request on a zmq socket and return
-                # instantaneously. They will return an asyncio future which can be awaited for
-                # request completion.
-                futures.append(client.add_request(request.prompt_text, request.sampling_params))
-                num_requests_added += 1
-                #tbar.update(1)
+            if args.incoming_requests_per_step is None:
+                # Only add requests that have arrived at the current time.
+                while num_requests_added < num_requests_total and requests[num_requests_added].time_arrival <= current_time:
+                    request = requests[num_requests_added]
+                    # These add-request calls will queue up the request on a zmq socket and return
+                    # instantaneously. They will return an asyncio future which can be awaited for
+                    # request completion.
+                    futures.append(client.add_request(request.prompt_text, request.sampling_params))
+                    num_requests_added += 1
+
+                    # Test suspend/resume.
+                    if num_requests_added in suspend_idxs:
+                        client.suspend_engines()
+                    if num_requests_added in resume_idxs:
+                        client.resume_engines()
+
+            else:
+                # Add deterministic number of requests (generally used for debugging).
+                for i in range(min(
+                    args.incoming_requests_per_step,
+                    num_requests_total - num_requests_added
+                )):
+                    # Change sampling parameters to force different generation lengths.
+                    request = requests[num_requests_added]
+                    n = request.sampling_params.num_tokens_to_generate
+                    request.sampling_params.num_tokens_to_generate = n + i
+                    futures.append(client.add_request(request.prompt_text, request.sampling_params))
+                    num_requests_added += 1
+
+                    # Test suspend/resume.
+                    if num_requests_added in suspend_idxs:
+                        client.suspend_engines()
+                    if num_requests_added in resume_idxs:
+                        client.resume_engines()
+
             if num_requests_added == num_requests_total:
                 break
-            # Relinquish control since there are no more requests to add at the moment. This allows the engine to run. 
+            # Relinquish control since there are no more requests to add at the moment. This allows the engine to run.
             await asyncio.sleep(0)
-        # While we wait for the requests to complete, the engine runs in the background.
-        results: List[DynamicInferenceRequest] = await asyncio.gather(*futures)
         
+        # While we wait for the requests to complete, the engine runs in the background.
+        results: List[DynamicInferenceRequestRecord] = await asyncio.gather(*futures)
 
     if dist.get_rank() == 0:
         # Write results to JSON. Primarily used for functional testing.
         if args.output_path:
             json_results = {}
+            throughputs = []
 
-            for req in results:
+            for record in results:
+                req = record.merge(engine.controller.tokenizer)
                 result_dict = {
                     "input_prompt": req.prompt,
                     "generated_text": req.generated_text.replace("\n", "\\n"),
                     "generated_tokens": req.generated_tokens,
-                    "latency": req.latency, #InferenceClient populates this field in the returned future.
+                    "latency": req.latency,  # InferenceClient populates this field in the returned future.
                 }
                 if req.sampling_params["return_log_probs"]:
                     result_dict["logprobs"] = req.prompt_log_probs + req.generated_log_probs
+                throughput = len(req.generated_tokens) / req.latency
+                throughputs.append(throughput)
                 json_results[req.request_id] = result_dict
+            throughput_dict = {"throughput": throughputs}
+            if args.throughput_check_only:
+                json_results = throughput_dict
             with open(args.output_path, "w") as fp:
                 json.dump(json_results, fp, indent=4)
         else:
             print("Results:")
-            for req in results:
-                print(f"rid: {req.request_id}\nprompt: {req.prompt!r}\noutput: {req.generated_text!r}\n\n")
- 
+            unique_prompt_map = defaultdict(list)
+            for record in results:
+                req = record.merge(engine.controller.tokenizer)
+                unique_prompt_map[req.prompt].append(req)
+            for idx, (prompt_text, reqs) in enumerate(unique_prompt_map.items()):
+                print(f"%d/%d. prompt '%s' ... [%d] output '%s'." % (
+                    idx,
+                    len(unique_prompt_map),
+                    prompt_text.replace("\n", "\\n"),
+                    len(reqs),
+                    reqs[0].generated_text.replace("\n", "\\n"),
+                ))
+
         # kill the engines and suspend the client
         client.stop_engines()
         client.stop()
-        
+
     # once the stop signal eventually makes its way to each GPU, the engines will stop.
     await asyncio.gather(engine.engine_loop_task)
 
+
 if __name__ == "__main__":
-    # enable inference mode in the very beginning as some fp-8 optimizations 
+    # enable inference mode in the very beginning as some fp-8 optimizations
     # check for it.
     with torch.inference_mode():
         initialize_megatron(
-            #parsed_args=args
             extra_args_provider=add_dynamic_inference_args,
             args_defaults={'no_load_rng': True, 'no_load_optim': True},
         )
@@ -131,17 +215,25 @@ async def main(
             top_p=args.top_p,
             return_log_probs=args.return_log_probs,
             num_tokens_to_generate=args.num_tokens_to_generate,
-            termination_id=args.termination_id if args.termination_id is not None else tokenizer.eod,
+            termination_id=(
+                args.termination_id if args.termination_id is not None else tokenizer.eod
+            ),
         )
 
         # Requests, context, conroller.
         model = get_model()
-        requests = build_requests(args, tokenizer, sampling_params) if dist.get_rank() == 0 else None
+        mamba_inference_state_config = get_mamba_inference_state_config_from_model(model)
+        requests = (
+            build_requests(args, tokenizer, sampling_params) if dist.get_rank() == 0 else None
+        )
+
+        context = get_inference_context(
+            None,
+            None,
+            calculate_max_sequence_length_from_requests=False,
+            mamba_inference_state_config=mamba_inference_state_config,
+        )
 
-        context = get_inference_context(None, 
-                                        None,
-                                        calculate_max_sequence_length_from_requests=False)
-        
         controller = get_inference_controller(model, context)
 
         # Inference engine.
@@ -150,17 +242,19 @@ async def main(
             context,
             enable_cuda_graph=args.cuda_graph_impl == "local",
             random_seed=args.seed,
-            enable_chunked_prefill=not args.disable_chunked_prefill
+            enable_chunked_prefill=not args.disable_chunked_prefill,
         )
 
-        
         if dist.get_rank() == 0:
             setup_prefix = build_dynamic_engine_setup_prefix(args, model, context, requests)
             print("~~~")
             print(setup_prefix)
             print("~~~")
-        
-        asyncio.run(main(engine, 
-                        requests,
-                        args.inference_coordinator_port))
 
+        asyncio.run(
+            main(
+                engine,
+                requests,
+                args.inference_coordinator_port,
+            )
+        )
diff --git a/examples/inference/gpt/utils.py b/examples/inference/gpt/utils.py
index 0ea1f5a3df0..efd4fdab4fc 100644
--- a/examples/inference/gpt/utils.py
+++ b/examples/inference/gpt/utils.py
@@ -1,5 +1,6 @@
 # Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
+import copy
 import json
 import itertools
 import random
@@ -11,12 +12,12 @@
 
 from megatron.core.inference.inference_request import DynamicInferenceRequest
 from megatron.core.inference.contexts import DynamicInferenceContext
+from megatron.core.inference.contexts.dynamic_context import get_mem_size_str
 from megatron.core.transformer.module import MegatronModule
 
 from megatron.core.inference.sampling_params import SamplingParams
 
 
-
 def add_common_inference_args(parser: ArgumentParser) -> ArgumentParser:
     """Common inference arguments."""
 
@@ -53,6 +54,12 @@ def add_common_inference_args(parser: ArgumentParser) -> ArgumentParser:
         default=30,
         help='Number of tokens to generate for each prompt',
     )
+    group.add_argument(
+        "--num-tokens-from-file",
+        action='store_true',
+        default=False,
+        help='Use per-prompt num_tokens_to_generate from prompt file',
+    )
     group.add_argument(
         "--top-n-logprobs",
         type=int,
@@ -65,7 +72,7 @@ def add_common_inference_args(parser: ArgumentParser) -> ArgumentParser:
         help="Add a deterministic number of requests per step. This arg is "
         "prioritized over `--incoming-requests-per-sec` below (which is non-"
         "deterministic). Note that the number of requests added per step is "
-        "additionally limited by the inference context's `max_requests`, "
+        "additionally limited by the inference context's `max_active_requests`, "
         "`max_tokens`, and KV buffer size.",
     )
     group.add_argument(
@@ -116,12 +123,6 @@ def add_common_inference_args(parser: ArgumentParser) -> ArgumentParser:
         '`--prompt-file` above). The first `--prompt-file-num-truncate` samples '
         'will be used, in order.',
     )
-    group.add_argument(
-        "--inference-coordinator-port",
-        type=int,
-        help="This port will be used to setup the inference co-ordinator on node-0",
-        default=12346
-    )
     group.add_argument(
         "--use-flashinfer-fused-rope",
         action='store_true',
@@ -176,6 +177,7 @@ def __init__(self, prompt_text: str, time_offset: float, tokenizer: Any, samplin
         self.time_end = None
         self.state = "not-started"
         self.sampling_params: SamplingParams = sampling_params if sampling_params is not None else get_default_sampling_params(tokenizer.eod)
+        self.sampling_params = copy.deepcopy(self.sampling_params)
 
     def __str__(self) -> str:
         return "state '%s'; toffset %.1e; prompt len %d; output len %d; '%s'" % (
@@ -262,10 +264,27 @@ def get_synthetic_requests(
         int(args.incoming_requests_per_sec * args.incoming_requests_duration),
     )
 
+    # Build prompts with expected lengths.
+    assert (
+        len(args.num_tokens_to_prompt) == 2
+        and
+        args.num_tokens_to_prompt[1] >= args.num_tokens_to_prompt[0]
+    )
+    max_prompt_length = args.num_tokens_to_prompt[1]
+    max_prompt_text = "hi " * max_prompt_length
+    max_prompt_tokens = tokenizer.tokenize(max_prompt_text)
+    prompt_lengths = [
+        random.randint(*args.num_tokens_to_prompt)
+        for _ in time_offsets
+    ]
+    prompt_tokens_list = [ max_prompt_tokens[:l] for l in prompt_lengths ]
+    prompt_texts = [ tokenizer.detokenize(tt) for tt in prompt_tokens_list ]
+
     # Init requests.
+    assert len(prompt_texts) == len(time_offsets)
     requests = [
-        Request("hi " * random.randint(*args.num_tokens_to_prompt), t, tokenizer, sampling_params)
-        for t in time_offsets
+        Request(t, o, tokenizer, sampling_params=sampling_params)
+        for t, o in zip(prompt_texts, time_offsets)
     ]
 
     return requests
@@ -281,9 +300,18 @@ def get_requests_from_file(
     # Load prompts.
     n_prompts = sum(1 for _ in open(args.prompt_file))
     prompts = []
+    sampling_params = get_default_sampling_params(tokenizer.eod)
+    sampling_params_list = []
     with open(args.prompt_file) as f:
         for line in tqdm(f.readlines(), "read prompt file", total=n_prompts):
-            prompts.append(json.loads(line)["text"])
+            line_dict = json.loads(line)
+            prompts.append(line_dict["text"])
+
+            sp = copy.deepcopy(sampling_params)
+            if args.num_tokens_from_file:
+                sp.num_tokens_to_generate = line_dict["chatgpt_output_token_length"]
+            sampling_params_list.append(sp)
+
             if len(prompts) == args.prompt_file_num_truncate:
                 break
 
@@ -297,8 +325,8 @@ def get_requests_from_file(
 
     # Init requests.
     requests = [
-        Request(p, t, tokenizer, sampling_params)
-        for p, t in tqdm(zip(prompts, time_offsets), "init requests", total=len(prompts))
+        Request(p, t, tokenizer, sp)
+        for p, t, sp in tqdm(zip(prompts, time_offsets, sampling_params_list), "init requests", total=len(prompts))
     ]
 
     return requests
@@ -342,7 +370,7 @@ def build_dynamic_engine_setup_prefix(
 
     Args:
         args (Namespace): Command-line arguments for this run.
-        context (DynamicInferenceContext): Stores limits such as `max_requests`,
+        context (DynamicInferenceContext): Stores limits such as `max_active_requests`,
             `max_tokens`, and `gtd_request_count`.
         requests (List[DynamicInferenceRequest]): List of inference requests.
 
@@ -352,7 +380,9 @@ def build_dynamic_engine_setup_prefix(
     # CUDA graph config
     if args.cuda_graph_impl == "local":
         cg_str = (
-            f"graphs {context.cuda_graph_token_counts[0]}:"
+            "graphs "
+            f"[{len(context.cuda_graph_token_counts)}] "
+            f"{context.cuda_graph_token_counts[0]}:"
             f"{context.cuda_graph_token_counts[-1]}"
         )
     else:
@@ -379,17 +409,10 @@ def build_dynamic_engine_setup_prefix(
     )
 
     # Buffer limits config
-    flw = args.inference_dynamic_batching_buffer_overflow_factor
-    flw_str = "no overflow" if flw is None else f"{flw:.1f}"
     buffer_limits_str = (
-        f"bf {args.inference_dynamic_batching_buffer_size_gb:.0f}, {flw_str} "
-        f"[r {context.max_requests}, t {context.max_tokens}]"
-    )
-
-    # Guaranteed request config
-    guaranteed_fraction_str = (
-        f"gtd {args.inference_dynamic_batching_buffer_guaranteed_fraction:.2f} "
-        f"[r {context.gtd_request_count}]"
+        f"bf: {get_mem_size_str(args.inference_dynamic_batching_buffer_size_gb*1024**3)}, "
+        f"{context.block_allocator.active_count} chunks "
+        f"[r {context.max_active_requests}, t {context.max_tokens}]"
     )
 
     parts = [
@@ -399,7 +422,6 @@ def build_dynamic_engine_setup_prefix(
         uvm_str,
         request_str,
         buffer_limits_str,
-        guaranteed_fraction_str,
     ]
 
     return " | ".join(parts)
diff --git a/examples/post_training/modelopt/.gitignore b/examples/post_training/modelopt/.gitignore
new file mode 100644
index 00000000000..b9272bd3eb2
--- /dev/null
+++ b/examples/post_training/modelopt/.gitignore
@@ -0,0 +1 @@
+!slurm*
diff --git a/examples/post_training/modelopt/ADVANCED.md b/examples/post_training/modelopt/ADVANCED.md
index 20b17831b70..28aad7d7964 100644
--- a/examples/post_training/modelopt/ADVANCED.md
+++ b/examples/post_training/modelopt/ADVANCED.md
@@ -1,12 +1,93 @@
 <div align="center">
 
-# TensorRT Model Optimizer Integration Advanced Topics
+# Advanced Usage
 
-[Local Examples](#getting-started-in-a-local-environment) |
-[Configuration](#learn-more-about-configuration) |
-[Slurm Examples](ADVANCED.md#slurm-examples) |
-[Advanced Topics](ADVANCED.md) |
-[Megatron-LM Integration](https://github.com/NVIDIA/Megatron-LM/tree/main/examples/post_training/modelopt)
+[Advanced Configuration](#advanced-configuration) |
+[Slurm Examples](#slurm-examples) |
+[Checkpoint Resume](#checkpoint-resume) |
 
 </div>
 
+## Advanced Configuration
+
+### Understanding Configuration Variables
+
+For simplicity, we use `shell` scripts and variables as arguments. Each script has at least 1 positional
+argument `[model_conf]`. Some scripts may require more such as `[qformat]` is needed for
+quantization.
+
+```sh
+\
+    HF_MODEL_CKPT=<pretrained_model_name_or_path> \
+    bash quantize.sh [model_conf] [qformat]
+```
+
+> **❗ IMPORTANT:** `model_conf` is used to get the corresponding Megatron-LM `${MODEL_ARGS}`. For example,
+> `meta-llama/Llama-3.1-8B-Instruct` or `deepseek-ai/DeepSeek-R1` are both supported.
+>
+> Provide the pretrained checkpoint through variable `${HF_MODEL_CKPT}` in commandline or
+> in a configuration shell script. More variables (e.g. `${TP}`, `${EP}`, ...) can be provided through
+> commandline but we recommend passing all variables in a separate `shell` script.
+
+### Using Configuration Scripts
+
+When `${HF_MODEL_CKPT}` is not set through the commandline, `./env_setup_template.sh` can be used
+to pass all variables instead. If you have your own script, use `${SANDBOX_ENV_SETUP}`.
+
+```sh
+\
+    SANDBOX_ENV_SETUP=<path_to_your_script> \
+    bash quantize.sh [model_conf] [qformat]
+```
+
+**For Slurm execution**, you **MUST USE** `${SANDBOX_ENV_SETUP}` (default: `./env_setup_template.sh`).
+Other variables are not passed through `sbatch` and `srun` automatically.
+
+### Common Configuration Variables
+
+- `HF_MODEL_CKPT`: Path to pretrained model checkpoint
+- `TP`: Tensor parallelism degree
+- `PP`: Pipeline parallelism degree
+- `EP`: Expert parallelism degree (for MoE models)
+- `ETP`: Expert tensor parallelism degree (for MoE models)
+- `MLM_MODEL_SAVE`: Path to save Megatron-LM checkpoint
+- `MLM_MODEL_LOAD`: Path to load Megatron-LM checkpoint
+- `MLM_EXTRA_ARGS`: Additional Megatron-LM arguments (e.g., for uneven PP)
+
+## Slurm Examples
+
+For models that require multi-node, our scripts in Megatron-LM examples also support `slurm` with a sbatch wrapper.
+Start with the example `slurm/sbatch.sh` with some minor modification or use your existing `sbatch`
+script.
+
+Different from local environment, we only allow passing variables through a shell script (default: `env_setup_template.sh`).
+Commandline variable passthrough is not supported.
+
+<br>
+
+### ⭐ BF16 Kimi-K2-Instruct EAGLE3 Training
+
+ `conf/moonshotai/kimi_k2_instruct.sh` is a config that has been tested
+with 8 nodes of DGX H100 (TP=8, ETP=1, EP=64, overall 64 H100 GPUs in total). Update `HF_MODEL_CKPT` to the exact
+checkpoint path in the container to start:
+
+```sh
+export USER_FSW=<path_to_scratch_space>
+export CONTAINER_IMAGE=<path_to_container_image>
+export SANDBOX_ENV_SETUP=./conf/moonshotai/kimi_k2_instruct.sh
+sbatch --nodes=8 slurm/sbatch.sh "eagle3.sh moonshotai/Kimi-K2-Instruct"
+```
+
+To export the trained EAGLE3 model, switch to `kimi_k2_instruct_export.sh`.
+**We only support pipeline-parallel (PP) export.** In this case, 2 nodes are used (PP=16).
+
+```sh
+export USER_FSW=<path_to_scratch_space>
+export CONTAINER_IMAGE=<path_to_container_image>
+export SANDBOX_ENV_SETUP=./conf/moonshotai/kimi_k2_instruct_export.sh
+sbatch --nodes=2 slurm/sbatch.sh "export.sh moonshotai/Kimi-K2-Instruct"
+```
+
+## Checkpoint Resume
+
+WIP
diff --git a/examples/post_training/modelopt/Dockerfile b/examples/post_training/modelopt/Dockerfile
index e0b4f00021e..e127215904d 100644
--- a/examples/post_training/modelopt/Dockerfile
+++ b/examples/post_training/modelopt/Dockerfile
@@ -4,7 +4,7 @@ ARG PIP_CONSTRAINT=
 
 WORKDIR /workspace/nmm-sandbox
 
-RUN pip install jsonlines omegaconf pulp torchprofile
+RUN pip install jsonlines omegaconf
 RUN pip install flask flask_restful fire nltk
 RUN pip install tiktoken blobfile
 
diff --git a/examples/post_training/modelopt/README.md b/examples/post_training/modelopt/README.md
index be455019096..33528c30097 100644
--- a/examples/post_training/modelopt/README.md
+++ b/examples/post_training/modelopt/README.md
@@ -5,22 +5,21 @@
 
 [TensorRT Model Optimizer](https://github.com/NVIDIA/TensorRT-Model-Optimizer) |
 [Local Examples](#getting-started-in-a-local-environment) |
-[Configuration](ADVANCED.md#learn-more-about-configuration) |
-[Slurm Examples](ADVANCED.md#slurm-examples) |
-[Speculative Decoding](speculative.md) |
-[Advanced Topics](ADVANCED.md)
+[Configuration](./ADVANCED.md#advanced-configuration) |
+[Slurm Examples](./ADVANCED.md#slurm-examples) |
+[Speculative Decoding](./speculative.md) |
+[Advanced Topics](./ADVANCED.md)
 
 </div>
 
 [TensorRT Model Optimizer](https://github.com/NVIDIA/TensorRT-Model-Optimizer) (**ModelOpt**, `nvidia-modelopt`)
-provides end-to-end model optimization for
-NVIDIA hardware including quantization (real or simulated), sparsity, knowledge distillation, pruning,
-neural architecture search, and speulative decoding.
+provides end-to-end model optimization for NVIDIA hardware including quantization (real or simulated),
+knowledge distillation, pruning, speculative decoding, and more.
 
 
 ## Major Features
 
-- Start from Hugging Face pretrained model checkpoint with on-the-fly conversion.
+- Start from Hugging Face pretrained model checkpoint with on-the-fly conversion to Megatron-LM checkpoint format.
 - Support all kinds of model parallelism (TP, EP, ETP, PP).
 - Export to TensorRT-LLM, vLLM, and SGLang ready unified checkpoint.
 
@@ -28,11 +27,14 @@ neural architecture search, and speulative decoding.
 
 | Model (`conf/`) | Quantization | EAGLE3 | Pruning (PP only) | Distillation |
 | :---: | :---: | :---: | :---: | :---: |
-| `moonshotai/Kimi-K2-Instruct` | ✅ | ✅ | - | - |
-| `Qwen/Qwen3-{30B-A3B, 235B-A22B}` | **WAR** | ✅ | - | - |
-| `Qwen/Qwen3-{0.6B, 8B}` | ✅ | ✅ | ✅ | ✅ |
 | `deepseek-ai/DeepSeek-R1` | ✅ | ✅ | - | - |
 | `meta-llama/Llama-{3.1-8B, 3.1-405B, 3.2-1B}-Instruct` | ✅ | ✅ | ✅ | ✅ |
+| `meta-llama/Llama-4-{Scout,Maverick}-17B-{16,128}E-Instruct` | ✅ | ✅ | - | - |
+| `moonshotai/Kimi-K2-Instruct` | ✅ | ✅ | - | - |
+| `nvidia/NVIDIA-Nemotron-Nano-9B-v2` | ✅ | - | ✅ | ✅ |
+| `openai/gpt-oss-{20b, 120b}` | ✅ | **Online** | ✅ | ✅ |
+| `Qwen/Qwen3-{0.6B, 8B}` | ✅ | ✅ | ✅ | ✅ |
+| `Qwen/Qwen3-{30B-A3B, 235B-A22B}` | **WAR** | ✅ | ✅ | ✅ |
 
 ## Getting Started in a Local Environment
 
@@ -43,6 +45,10 @@ pip install -U nvidia-modelopt
 Alternatively, you can install from [source](https://github.com/NVIDIA/TensorRT-Model-Optimizer)
 to try our latest features.
 
+> **❗ IMPORTANT:** The first positional argument (e.g. `meta-llama/Llama-3.2-1B-Instruct`) of each script
+> is the config name used to match the supported model config in `conf/`. The pretrained HF checkpoint should
+> be downloaded and provided through `${HF_MODEL_CKPT}`.
+
 
 ### ⭐ NVFP4 Quantization, Qauntization-Aware Training, and Model Export
 
@@ -55,7 +61,7 @@ provide `${EXPORT_DIR}` to `export.sh`.
 > low-precision numerical behavior (fake-quant) which can be run on GPUs with compute > 80.
 > Real low-precision paramters (e.g. `E4M3` or `E2M1`)
 > and low-precision compute (e.g. `FP8Linear`) are also supported depending on GPU compute capability.
-> **See [Adanvanced Topics](advanced.md) for details**.
+> **See [Adanvanced Topics](./ADVANCED.md) for details**.
 
 ```sh
 \
@@ -72,31 +78,6 @@ provide `${EXPORT_DIR}` to `export.sh`.
     ./export.sh meta-llama/Llama-3.2-1B-Instruct
 ```
 
-> **❗ IMPORTANT:** The first positional arugment (e.g. `meta-llama/Llama-3.2-1B-Instruct`) of each script
-> is the config name used to match the supported model config in `conf/`. The pretrained checkpoint should
-> be downloaded and provided through `${HF_MODEL_CKPT}`.
-
-Loading the saved distributed checkpoint, the quantized Megatron model can be resumed for inference
-(generate or evaluate) or training (SFT or PEFT). To read more about these features, see
-[Adanvanced Topics](advanced.md). To learn more about the design, see our [Design]() document [WIP].
-
-```sh
-\
-    TP=1 \
-    MLM_MODEL_CKPT=/tmp/Llama-3.2-1B-Instruct_quant \
-    ./generate.sh meta-llama/Llama-3.2-1B-Instruct
-
-\
-    TP=1 \
-    MLM_MODEL_CKPT=/tmp/Llama-3.2-1B-Instruct_quant \
-    ./mmlu.sh meta-llama/Llama-3.2-1B-Instruct
-
-\
-    TP=1 \
-    MLM_MODEL_CKPT=/tmp/Llama-3.2-1B-Instruct_quant \
-    ./finetune.sh meta-llama/Llama-3.2-1B-Instruct
-```
-
 ### ⭐ Online BF16 EAGLE3 Training
 
 Online EAGLE3 training has both the target (frozen) and draft models in the memory where the `hidden_states`
@@ -119,19 +100,23 @@ deployment.
     ./export.sh meta-llama/Llama-3.2-1B-Instruct
 ```
 
-See [Adanvanced Topics](ADVANCED.md) for a `moonshotai/Kimi-K2-Instruct` EAGLE3 training example using `slurm`.
+See [Adanvanced Topics](./ADVANCED.md) for a `moonshotai/Kimi-K2-Instruct` EAGLE3 training example using `slurm`.
 
 ### ⭐ Pruning
 
 Checkout pruning getting started section and guidelines for configuring pruning parameters in the [ModelOpt pruning README](https://github.com/NVIDIA/TensorRT-Model-Optimizer/tree/main/examples/pruning).
 
-Pruning is supported for GPT and Mamba models. Available pruning options are:
+Pruning is supported for GPT and Mamba models in Pipeline Parallel mode. Available pruning dimensions are:
+
 - `TARGET_FFN_HIDDEN_SIZE`
 - `TARGET_HIDDEN_SIZE`
 - `TARGET_NUM_ATTENTION_HEADS`
 - `TARGET_NUM_QUERY_GROUPS`
 - `TARGET_MAMBA_NUM_HEADS`
 - `TARGET_MAMBA_HEAD_DIM`
+- `TARGET_NUM_MOE_EXPERTS`
+- `TARGET_MOE_FFN_HIDDEN_SIZE`
+- `TARGET_MOE_SHARED_EXPERT_INTERMEDIATE_SIZE`
 - `TARGET_NUM_LAYERS`
 - `LAYERS_TO_DROP` (comma separated, 1-indexed list of layer numbers to directly drop)
 
@@ -142,12 +127,44 @@ PP=1 \
 TARGET_NUM_LAYERS=24 \
 HF_MODEL_CKPT=<pretrained_model_name_or_path> \
 MLM_MODEL_SAVE=Qwen3-8B-Pruned \
-./prune.sh qwen/Qwen3-8B
+./prune.sh Qwen/Qwen3-8B
 ```
 
 > [!TIP]
 > If number of layers in the model is not divisible by pipeline parallel size (PP), you can configure uneven
 > PP by setting `MLM_EXTRA_ARGS="--decoder-first-pipeline-num-layers <X> --decoder-last-pipeline-num-layers <Y>"`
 
+> [!TIP]
+> You can reuse pruning scores for pruning same model again to different architectures by setting
+> `PRUNE_ARGS="--pruning-scores-path <path_to_save_scores>"`
+
+> [!NOTE]
+> When loading pruned M-LM checkpoint for subsequent steps, make sure overwrite the pruned parameters in the
+> default `conf/` by setting `MLM_EXTRA_ARGS`. E.g.: for loading above pruned Qwen3-8B checkpoint for mmlu, set:
+> `MLM_EXTRA_ARGS="--num-layers 24"`
+
+### ⭐ Inference and Training
+
+The saved Megatron-LM distributed checkpoint (output of above scripts) can be resumed for inference
+(generate or evaluate) or training (SFT or PEFT). To read more about these features, see
+[Advanced Topics](./ADVANCED.md).
+
+```sh
+\
+    TP=1 \
+    MLM_MODEL_CKPT=/tmp/Llama-3.2-1B-Instruct_quant \
+    ./generate.sh meta-llama/Llama-3.2-1B-Instruct
+
+\
+    TP=1 \
+    MLM_MODEL_CKPT=/tmp/Llama-3.2-1B-Instruct_quant \
+    ./mmlu.sh meta-llama/Llama-3.2-1B-Instruct
+
+\
+    TP=1 \
+    MLM_MODEL_CKPT=/tmp/Llama-3.2-1B-Instruct_quant \
+    ./finetune.sh meta-llama/Llama-3.2-1B-Instruct
+```
+
 ## Advanced Usage
 TBD
diff --git a/examples/post_training/modelopt/conf/qwen/Qwen2.5-0.5B-Instruct.sh b/examples/post_training/modelopt/conf/Qwen/Qwen2.5-0.5B-Instruct.sh
similarity index 100%
rename from examples/post_training/modelopt/conf/qwen/Qwen2.5-0.5B-Instruct.sh
rename to examples/post_training/modelopt/conf/Qwen/Qwen2.5-0.5B-Instruct.sh
diff --git a/examples/post_training/modelopt/conf/qwen/Qwen2.5-7B-Instruct.sh b/examples/post_training/modelopt/conf/Qwen/Qwen2.5-7B-Instruct.sh
similarity index 100%
rename from examples/post_training/modelopt/conf/qwen/Qwen2.5-7B-Instruct.sh
rename to examples/post_training/modelopt/conf/Qwen/Qwen2.5-7B-Instruct.sh
diff --git a/examples/post_training/modelopt/conf/qwen/Qwen3-0.6B.sh b/examples/post_training/modelopt/conf/Qwen/Qwen3-0.6B.sh
similarity index 100%
rename from examples/post_training/modelopt/conf/qwen/Qwen3-0.6B.sh
rename to examples/post_training/modelopt/conf/Qwen/Qwen3-0.6B.sh
diff --git a/examples/post_training/modelopt/conf/qwen/Qwen3-235B-A22B.sh b/examples/post_training/modelopt/conf/Qwen/Qwen3-235B-A22B.sh
similarity index 100%
rename from examples/post_training/modelopt/conf/qwen/Qwen3-235B-A22B.sh
rename to examples/post_training/modelopt/conf/Qwen/Qwen3-235B-A22B.sh
diff --git a/examples/post_training/modelopt/conf/qwen/Qwen3-30B-A3B.sh b/examples/post_training/modelopt/conf/Qwen/Qwen3-30B-A3B.sh
similarity index 100%
rename from examples/post_training/modelopt/conf/qwen/Qwen3-30B-A3B.sh
rename to examples/post_training/modelopt/conf/Qwen/Qwen3-30B-A3B.sh
diff --git a/examples/post_training/modelopt/conf/qwen/Qwen3-8B.sh b/examples/post_training/modelopt/conf/Qwen/Qwen3-8B.sh
similarity index 100%
rename from examples/post_training/modelopt/conf/qwen/Qwen3-8B.sh
rename to examples/post_training/modelopt/conf/Qwen/Qwen3-8B.sh
diff --git a/examples/post_training/modelopt/conf/arguments.sh b/examples/post_training/modelopt/conf/arguments.sh
index f29e0a9d989..0193bf8b643 100644
--- a/examples/post_training/modelopt/conf/arguments.sh
+++ b/examples/post_training/modelopt/conf/arguments.sh
@@ -1,3 +1,6 @@
+#!/bin/bash
+set -e
+
 MLM_MODEL_CFG=$1
 
 # Bash coloring
diff --git a/examples/post_training/modelopt/conf/moonshotai/kimi_k2_instruct.sh b/examples/post_training/modelopt/conf/moonshotai/kimi_k2_instruct.sh
new file mode 100644
index 00000000000..4f301f31c1d
--- /dev/null
+++ b/examples/post_training/modelopt/conf/moonshotai/kimi_k2_instruct.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+
+HF_MODEL_CKPT=/workspace/scratch/moonshotai/Kimi-K2-Instruct
+TP=8
+ETP=1
+EP=64
+
diff --git a/examples/post_training/modelopt/conf/moonshotai/kimi_k2_instruct_export.sh b/examples/post_training/modelopt/conf/moonshotai/kimi_k2_instruct_export.sh
new file mode 100644
index 00000000000..73ee80a6d93
--- /dev/null
+++ b/examples/post_training/modelopt/conf/moonshotai/kimi_k2_instruct_export.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+
+HF_MODEL_CKPT=/workspace/scratch/moonshotai/Kimi-K2-Instruct
+
+MLM_EXTRA_ARGS=" \
+    --decoder-first-pipeline-num-layers 3 \
+    --decoder-last-pipeline-num-layers 2 \
+    --init-model-with-meta-device \
+    --use-cpu-initialization \
+
+"
+
+# Layer distribution over PP: 3, [4] * 14, 2.
+PP=16
+
diff --git a/examples/post_training/modelopt/conf/nvidia/NVIDIA-Nemotron-Nano-9B-v2-Base.sh b/examples/post_training/modelopt/conf/nvidia/NVIDIA-Nemotron-Nano-9B-v2-Base.sh
deleted file mode 100644
index d6ba1e1dcc4..00000000000
--- a/examples/post_training/modelopt/conf/nvidia/NVIDIA-Nemotron-Nano-9B-v2-Base.sh
+++ /dev/null
@@ -1,41 +0,0 @@
-#!/bin/bash
-
-if [ -z ${HF_MODEL_CKPT} ]; then
-    HF_MODEL_CKPT=nvidia/NVIDIA-Nemotron-Nano-9B-v2-Base
-    TOKENIZER_MODEL=nvidia/NVIDIA-Nemotron-Nano-9B-v2-Base
-else
-    TOKENIZER_MODEL=${HF_MODEL_CKPT}
-fi
-
-MODEL_ARGS=" \
-    --save-interval 100000 \
-    --micro-batch-size 1 \
-    --bf16 \
-    --no-masked-softmax-fusion \
-    --disable-bias-linear \
-    --untie-embeddings-and-output-weights \
-    --position-embedding-type none \
-    --no-rope-fusion \
-    --normalization RMSNorm \
-    --squared-relu \
-    --num-layers 56 \
-    --hidden-size 4480 \
-    --ffn-hidden-size 15680 \
-    --num-attention-heads 40 \
-    --kv-channels 128 \
-    --group-query-attention \
-    --num-query-groups 8 \
-    --hybrid-override-pattern M-M-M-MM-M-M-M*-M-M-M*-M-M-M-M*-M-M-M-M*-M-MM-M-M-M-M-M- \
-    --is-hybrid-model \
-    --mamba-head-dim 80 \
-    --mamba-num-heads 128 \
-    --mamba-num-groups 8 \
-    --mamba-state-dim 128 \
-    --seq-length 4096 \
-    --max-position-embeddings 131072 \
-    --tokenizer-type HuggingFaceTokenizer \
-    --make-vocab-size-divisible-by 1 \
-    --use-mcore-models \
-    --export-model-type MambaModel \
-    --padded-vocab-size 131072 \
-"
diff --git a/examples/post_training/modelopt/conf/nvidia/NVIDIA-Nemotron-Nano-9B-v2-Base.sh b/examples/post_training/modelopt/conf/nvidia/NVIDIA-Nemotron-Nano-9B-v2-Base.sh
new file mode 120000
index 00000000000..3771c930263
--- /dev/null
+++ b/examples/post_training/modelopt/conf/nvidia/NVIDIA-Nemotron-Nano-9B-v2-Base.sh
@@ -0,0 +1 @@
+NVIDIA-Nemotron-Nano-9B-v2.sh
\ No newline at end of file
diff --git a/examples/post_training/modelopt/conf/nvidia/NVIDIA-Nemotron-Nano-9B-v2.sh b/examples/post_training/modelopt/conf/nvidia/NVIDIA-Nemotron-Nano-9B-v2.sh
new file mode 100644
index 00000000000..d6ba1e1dcc4
--- /dev/null
+++ b/examples/post_training/modelopt/conf/nvidia/NVIDIA-Nemotron-Nano-9B-v2.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+if [ -z ${HF_MODEL_CKPT} ]; then
+    HF_MODEL_CKPT=nvidia/NVIDIA-Nemotron-Nano-9B-v2-Base
+    TOKENIZER_MODEL=nvidia/NVIDIA-Nemotron-Nano-9B-v2-Base
+else
+    TOKENIZER_MODEL=${HF_MODEL_CKPT}
+fi
+
+MODEL_ARGS=" \
+    --save-interval 100000 \
+    --micro-batch-size 1 \
+    --bf16 \
+    --no-masked-softmax-fusion \
+    --disable-bias-linear \
+    --untie-embeddings-and-output-weights \
+    --position-embedding-type none \
+    --no-rope-fusion \
+    --normalization RMSNorm \
+    --squared-relu \
+    --num-layers 56 \
+    --hidden-size 4480 \
+    --ffn-hidden-size 15680 \
+    --num-attention-heads 40 \
+    --kv-channels 128 \
+    --group-query-attention \
+    --num-query-groups 8 \
+    --hybrid-override-pattern M-M-M-MM-M-M-M*-M-M-M*-M-M-M-M*-M-M-M-M*-M-MM-M-M-M-M-M- \
+    --is-hybrid-model \
+    --mamba-head-dim 80 \
+    --mamba-num-heads 128 \
+    --mamba-num-groups 8 \
+    --mamba-state-dim 128 \
+    --seq-length 4096 \
+    --max-position-embeddings 131072 \
+    --tokenizer-type HuggingFaceTokenizer \
+    --make-vocab-size-divisible-by 1 \
+    --use-mcore-models \
+    --export-model-type MambaModel \
+    --padded-vocab-size 131072 \
+"
diff --git a/examples/post_training/modelopt/convert_model.py b/examples/post_training/modelopt/convert_model.py
index 9790d73fc4c..20ee59a2fe0 100644
--- a/examples/post_training/modelopt/convert_model.py
+++ b/examples/post_training/modelopt/convert_model.py
@@ -162,17 +162,7 @@ def check_arguments():
             if eagle_module is not None:
                 mcore_eagle_state_dict = torch.load(args.extra_model_path)
                 eagle_module.load_state_dict(mcore_eagle_state_dict, strict=False)
-
-        # Add mask tokens for parallel draft
-        if unwrapped_model.eagle_config.parallel_draft_step > 1:
-            assert unwrapped_model.eagle_config.parallel_draft_step <= 4, "Parallel draft only supports steps less than or equal to 4."
-            tokenizer = get_tokenizer()
-            for i in range(unwrapped_model.eagle_config.parallel_draft_step - 1):
-                mask_token = "[MASK_{}]".format(i)
-                tokenizer._tokenizer.add_tokens([mask_token], special_tokens=True)
-                token_id = tokenizer._tokenizer.convert_tokens_to_ids(mask_token)
-                setattr(unwrapped_model, "mask_token_{}".format(i), torch.tensor(token_id))
-
+                
     elif args.algorithm == "medusa":
         config = {"medusa_num_heads": args.export_num_medusa_heads, "medusa_num_layers": 1}
         unwrapped_model = mtsp.convert(unwrapped_model, [("medusa", config)])
diff --git a/examples/post_training/modelopt/finetune.py b/examples/post_training/modelopt/finetune.py
index bd0569bb513..6489d394392 100755
--- a/examples/post_training/modelopt/finetune.py
+++ b/examples/post_training/modelopt/finetune.py
@@ -167,7 +167,7 @@ def __init__(
             hf_dataset_kwargs = SFTDataset.hf_dataset_to_kwargs.get(
                 self.hf_dataset, {"split": "train"}
             )
-            self._raw_samples = datasets.load_dataset(self.hf_dataset, **hf_dataset_kwargs)
+            self._raw_samples = datasets.load_dataset(self.hf_dataset, token=os.environ.get("HF_TOKEN", None), **hf_dataset_kwargs)
             self._raw_samples = self._raw_samples.shard(
                 num_shards=self.num_shards, index=shard_index
             )
@@ -455,7 +455,10 @@ def non_loss_data_func(model: GPTModel):
     """Callback to compute the acceptance length."""
     args = get_args()
     if not args.export_offline_model:
-        report_draft_acceptance_length(model)
+        try:
+            report_draft_acceptance_length(model)
+        except Exception as e:
+            print(e)
 
 
diff --git a/examples/post_training/modelopt/finetune.sh b/examples/post_training/modelopt/finetune.sh
index 0579dd69157..21493697374 100755
--- a/examples/post_training/modelopt/finetune.sh
+++ b/examples/post_training/modelopt/finetune.sh
@@ -14,6 +14,7 @@ MLM_DEFAULT_ARGS=" \
     --distributed-timeout-minutes 30 \
     --auto-detect-ckpt-format \
     --export-te-mcore-model \
+    --finetune \
 "
 
 
@@ -67,6 +68,8 @@ if [ -z ${MLM_EVAL_ARGS} ]; then
     "
 fi
 
+export HF_TOKEN=${HF_TOKEN}
+
 ${LAUNCH_SCRIPT} ${SCRIPT_DIR}/finetune.py \
     ${MODEL_ARGS} \
     --tensor-model-parallel-size ${TP} \
diff --git a/examples/post_training/modelopt/prune.py b/examples/post_training/modelopt/prune.py
index 7819b2ed2af..6a0178a1420 100644
--- a/examples/post_training/modelopt/prune.py
+++ b/examples/post_training/modelopt/prune.py
@@ -20,6 +20,7 @@
 from modelopt.torch.export import import_mcore_gpt_from_hf
 from modelopt.torch.prune.plugins.mcore_minitron import SUPPORTED_HPARAMS
 
+from megatron.core.parallel_state import get_pipeline_model_parallel_group, get_tensor_model_parallel_group
 from megatron.post_training.arguments import add_modelopt_args
 from megatron.post_training.checkpointing import load_modelopt_checkpoint
 from megatron.post_training.generate import simple_generate
@@ -91,6 +92,21 @@ def add_prune_args(parser):
         type=int,
         help="Prune dimension of Mamba attention heads to this value",
     )
+    group.add_argument(
+        "--target-num-moe-experts",
+        type=int,
+        help="Prune number of MoE experts to this value",
+    )
+    group.add_argument(
+        "--target-moe-ffn-hidden-size",
+        type=int,
+        help="Prune MoE FFN hidden size to this value",
+    )
+    group.add_argument(
+        "--target-moe-shared-expert-intermediate-size",
+        type=int,
+        help="Prune MoE shared expert intermediate size to this value",
+    )
     group.add_argument(
         "--target-num-layers",
         type=int,
@@ -104,6 +120,12 @@ def add_prune_args(parser):
         nargs="*",
         help="Drop specific model layers (1-indexed). Cannot be used with rest of the pruning options",
     )
+    group.add_argument(
+        "--pruning-scores-path",
+        type=str,
+        default=None,
+        help="Path to the cache and reuse pruning scores for pruning again to different params",
+    )
     add_modelopt_args(parser)
     return parser
 
@@ -125,6 +147,14 @@ def get_calib_dataloader(calib_size=1024, max_sequence_length=512):
         yield dataset[i][text_column][:max_sequence_length]
 
 
+def get_params(model):
+    params = sum(p.numel() for p in model.parameters())
+    reduced_params = torch.Tensor([params]).to(device=next(model.parameters()).device)
+    torch.distributed.all_reduce(reduced_params, group=get_pipeline_model_parallel_group())
+    torch.distributed.all_reduce(reduced_params, group=get_tensor_model_parallel_group())
+    return reduced_params.item()
+
+
 if __name__ == "__main__":
     initialize_megatron(
         extra_args_provider=add_prune_args,
@@ -181,7 +211,7 @@ def _hf_dataset_forword_loop_func(model):
             simple_generate(model, tokens.input_ids.cuda(), osl=1)
 
     if args.layers_to_drop:
-        mtp.plugins.drop_mcore_language_model_layers(model, layers_to_drop=args.layers_to_drop)
+        mtp.mcore_minitron.drop_mcore_language_model_layers(model, layers_to_drop=args.layers_to_drop)
     else:
         print_rank_0("Pruning model...")
         export_config = {
@@ -189,18 +219,22 @@ def _hf_dataset_forword_loop_func(model):
             for k in SUPPORTED_HPARAMS
             if getattr(args, f"target_{k}", None) is not None
         }
+        config = {"forward_loop": _hf_dataset_forword_loop_func}
+        if args.pruning_scores_path is not None:
+            config["scores_path"] = args.pruning_scores_path
         mtp.prune(
             unwrapped_model,
             mode="mcore_minitron",
             constraints={"export_config": export_config},
             dummy_input=None,  # Not used
-            config={"forward_loop": _hf_dataset_forword_loop_func},
+            config=config,
         )
         # [WAR till modelopt 0.39]: Remove prune state to avoid converting again on restore which forces TP=1.
         if mto.ModeloptStateManager.has_state_for_mode_type("prune", model=unwrapped_model):
             mto.ModeloptStateManager.remove_state(unwrapped_model)
 
     print_rank_0(f"Pruned Model:\n {unwrapped_model}")
+    print_rank_0(f"Pruned Model Params: {get_params(unwrapped_model)/1e9:.2f}B")
 
     _custom_prompt_forward_loop_func(unwrapped_model)
 
diff --git a/examples/post_training/modelopt/prune.sh b/examples/post_training/modelopt/prune.sh
index ef86260b062..33f3e615e96 100755
--- a/examples/post_training/modelopt/prune.sh
+++ b/examples/post_training/modelopt/prune.sh
@@ -23,23 +23,27 @@ MLM_DEFAULT_ARGS="
 # Example: export LAYERS_TO_DROP="1 5 10"
 
 # Define pruning argument mappings: "env_var:cli_arg"
-PRUNE_ARG_MAPPINGS=(
-    "TARGET_FFN_HIDDEN_SIZE:--target-ffn-hidden-size"
-    "TARGET_HIDDEN_SIZE:--target-hidden-size"
-    "TARGET_NUM_ATTENTION_HEADS:--target-num-attention-heads"
-    "TARGET_NUM_QUERY_GROUPS:--target-num-query-groups"
-    "TARGET_MAMBA_NUM_HEADS:--target-mamba-num-heads"
-    "TARGET_MAMBA_HEAD_DIM:--target-mamba-head-dim"
-    "TARGET_NUM_LAYERS:--target-num-layers"
-    "LAYERS_TO_DROP:--layers-to-drop"
+# List of environment variables we want to check for pruning CLI args
+PRUNE_ENV_VARS=(
+    TARGET_FFN_HIDDEN_SIZE
+    TARGET_HIDDEN_SIZE
+    TARGET_NUM_ATTENTION_HEADS
+    TARGET_NUM_QUERY_GROUPS
+    TARGET_MAMBA_NUM_HEADS
+    TARGET_MAMBA_HEAD_DIM
+    TARGET_NUM_MOE_EXPERTS
+    TARGET_MOE_FFN_HIDDEN_SIZE
+    TARGET_MOE_SHARED_EXPERT_INTERMEDIATE_SIZE
+    TARGET_NUM_LAYERS
+    LAYERS_TO_DROP
 )
 
-# Build arguments from environment variables
-PRUNE_ARGS=""
-for mapping in "${PRUNE_ARG_MAPPINGS[@]}"; do
-    env_var="${mapping%%:*}"
-    cli_arg="${mapping##*:}"
+# Build arguments from environment variables (TARGET_NUM_LAYERS -> --target-num-layers, etc.)
+PRUNE_ARGS=${PRUNE_ARGS:-""}
+for env_var in "${PRUNE_ENV_VARS[@]}"; do
     if [ ! -z "${!env_var}" ]; then
+        # prepend --, convert to lowercase, replace _ with -
+        cli_arg="--$(echo "${env_var}" | tr '[:upper:]' '[:lower:]' | tr '_' '-')"
         PRUNE_ARGS="${PRUNE_ARGS} ${cli_arg} ${!env_var}"
     fi
 done
@@ -59,6 +63,9 @@ else
     LOAD_ARGS="--load ${MLM_MODEL_CKPT}"
 fi
 
+
+set -ex
+
 ${LAUNCH_SCRIPT} ${SCRIPT_DIR}/prune.py \
     ${MODEL_ARGS} \
     ${LOAD_ARGS} \
@@ -67,6 +74,5 @@ ${LAUNCH_SCRIPT} ${SCRIPT_DIR}/prune.py \
     --tokenizer-model ${TOKENIZER_MODEL} \
     --save ${MLM_MODEL_SAVE} \
     --references "${MLM_REF_LABEL}" \
-    --calib-size 1024 \
     ${PRUNE_ARGS} \
     ${MLM_DEFAULT_ARGS} ${MLM_EXTRA_ARGS}
diff --git a/examples/post_training/modelopt/slurm/env_setup_template.sh b/examples/post_training/modelopt/slurm/env_setup_template.sh
new file mode 100644
index 00000000000..12b59f06eed
--- /dev/null
+++ b/examples/post_training/modelopt/slurm/env_setup_template.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+
+HF_MODEL_CKPT=/workspace/scratch/meta-llama/Llama-3.2-1B-Instruct
+TP=1
+ETP=1
+EP=1
+PP=1
diff --git a/examples/post_training/modelopt/slurm/sbatch.sh b/examples/post_training/modelopt/slurm/sbatch.sh
new file mode 100644
index 00000000000..3916c5de2b5
--- /dev/null
+++ b/examples/post_training/modelopt/slurm/sbatch.sh
@@ -0,0 +1,63 @@
+#!/bin/bash
+
+#SBATCH -A <account>
+#SBATCH -p <partition>
+#SBATCH --job-name=<job-name>
+#SBATCH --nodes=1 --ntasks-per-node=8 --gpus-per-node=8
+#SBATCH -t 04:00:00
+#SBATCH --exclusive --mem=0 --overcommit
+
+# Bash coloring
+RED='\033[0;31m'
+YELLOW='\033[0;33m'
+GREEN='\033[0;32m'
+BLUE='\033[0;34m'
+PURPLE='\033[0;35m'
+WHITE='\033[0;37m'
+
+# Predefined logging
+MLM_ERROR="${RED}ERROR:  ${WHITE}"
+MLM_WARNING="${YELLOW}WARNING:${WHITE}"
+
+# CHANGE THE FOLLOWING TO YOUR DATA, MEGATRON, and CHECKPOINT DIR
+if [[ -z ${USER_FSW} ]]; then
+    printf "${MLM_ERROR} Variable USER_FSW (read/write scratch space) must be set!\n"
+    exit 1
+fi
+
+if [ -z ${SANDBOX_DIR} ]; then
+    SANDBOX_DIR="$(pwd)"
+    printf "${MLM_WARNING} Variable SANDBOX_DIR not set! (default: ${SANDBOX_DIR})\n"
+fi
+
+if [ -z ${SANDBOX_ENV_SETUP} ]; then
+    SANDBOX_ENV_SETUP=./env_setup_template.sh
+    printf "${MLM_WARNING} Variable SANDBOX_ENV_SETUP not set! (default: ${SANDBOX_ENV_SETUP})\n"
+fi
+
+if [ -z ${CONTAINER_IMAGE} ]; then
+    CONTAINER_IMAGE="nvidia-modelopt-megatron:latest"
+    printf "${MLM_WARNING} Variable CONTAINER_IMAGE not set! (default: ${CONTAINER_IMAGE})\n"
+fi
+
+if [ -z ${LAUNCH_SCRIPT} ]; then
+    LAUNCH_SCRIPT="python"
+    printf "${MLM_WARNING} Variable LAUNCH_SCRIPT not set! (default: ${LAUNCH_SCRIPT})\n"
+fi
+
+# DO NOT MODIFY THE VALUES BELOW UNLESS YOU KNOW WHAT YOU ARE DOING!!!
+DATETIME=`date +'date_%y-%m-%d_time_%H-%M-%S'`
+
+CONTAINER_MOUNT="${SANDBOX_DIR}:/workspace/nmm-sandbox,${USER_FSW}:/workspace/scratch"
+
+srun -l \
+    --mpi=pmix \
+    --output=%x_%j_$DATETIME.log \
+    --container-image ${CONTAINER_IMAGE} \
+    --container-workdir "/workspace/nmm-sandbox" \
+    --container-mounts ${CONTAINER_MOUNT} \
+    --export "HF_MODEL_CKPT=${HF_MODEL_CKPT},SANDBOX_ENV_SETUP=${SANDBOX_ENV_SETUP},LAUNCH_SCRIPT=${LAUNCH_SCRIPT}" \
+    bash ${1}
+
+set +x
+
diff --git a/examples/post_training/modelopt/validate.sh b/examples/post_training/modelopt/validate.sh
index 90ff4810117..796231e508e 100644
--- a/examples/post_training/modelopt/validate.sh
+++ b/examples/post_training/modelopt/validate.sh
@@ -16,8 +16,9 @@ if [ -z ${MLM_MODEL_CKPT} ]; then
 fi
 
 if [ -z ${PROMPTS_PATH} ]; then
-    printf "${MLM_ERROR} Variable ${PURPLE}PROMPTS_PATH${WHITE} must be set!\n"
-    exit 1
+    PROMPT_ARGS=""
+else
+    PROMPT_ARGS="--prompts-path ${PROMPTS_PATH}"
 fi
 
 if [ -z ${STEPS} ]; then
@@ -40,6 +41,7 @@ if [ -z ${OSL} ]; then
     STEPS=64
 fi
 
+export HF_TOKEN=${HF_TOKEN}
 
 ${LAUNCH_SCRIPT} ${SCRIPT_DIR}/validate.py \
     ${MODEL_ARGS} \
@@ -49,9 +51,9 @@ ${LAUNCH_SCRIPT} ${SCRIPT_DIR}/validate.py \
     --pipeline-model-parallel-size ${PP} \
     --tokenizer-model ${TOKENIZER_MODEL} \
     --load ${MLM_MODEL_CKPT} \
-    --prompts-path ${PROMPTS_PATH} \
     --steps ${STEPS} \
     --osl ${OSL} \
+    ${PROMPT_ARGS} \
     ${GT_ARGS} \
     ${SAVE_ARGS} \
     ${MLM_DEFAULT_ARGS} ${MLM_EXTRA_ARGS}
diff --git a/gpt_builders.py b/gpt_builders.py
index 9fa1aff72c7..2ef41846f2c 100644
--- a/gpt_builders.py
+++ b/gpt_builders.py
@@ -5,6 +5,7 @@
     get_gpt_decoder_block_spec,
     get_gpt_layer_local_spec,
     get_gpt_layer_with_transformer_engine_spec,
+    get_gpt_layer_with_inference_spec,
     get_gpt_mtp_block_spec,
     get_gpt_decoder_layer_specs,
 )
@@ -43,6 +44,7 @@ def gpt_builder(args, pre_process, post_process, vp_stage=None, config=None):
             use_te = args.transformer_impl == "transformer_engine"
 
             if args.num_experts or (args.linear_attention_type is not None):
+                assert not (config.transformer_impl == "inference_optimized")
                 # Define the decoder block spec
                 transformer_layer_spec = get_gpt_decoder_block_spec(
                     config,
@@ -52,12 +54,14 @@ def gpt_builder(args, pre_process, post_process, vp_stage=None, config=None):
                     vp_stage=vp_stage,
                 )
             elif args.heterogeneous_layers_config_path is not None:
+                assert not (config.transformer_impl == "inference_optimized")
                 transformer_layer_spec = get_gpt_heterogeneous_layer_spec(config, use_te)
             else:
                 # Define the decoder layer spec
                 transformer_layer_spec = _get_transformer_layer_spec(use_te, config)
         mtp_block_spec = None
         if args.mtp_num_layers is not None:
+            assert not (config.transformer_impl == "inference_optimized")
             # Get GPT decoder layer specs for the model.
             if args.spec is not None:
                 mtp_transformer_layer_spec = import_module(args.spec)
@@ -120,6 +124,12 @@ def _get_transformer_layer_spec(use_te, config):
             use_kitchen=config.use_kitchen,
             fallback_to_eager_attn=config.fallback_to_eager_attn,
         )
+    elif config.transformer_impl == "inference_optimized":
+        return get_gpt_layer_with_inference_spec(
+            args.qk_layernorm,
+            args.multi_latent_attention,
+            qk_l2_norm=args.qk_l2_norm,
+        )
     else:
         return get_gpt_layer_local_spec(
             args.num_experts,
diff --git a/megatron/core/distributed/fsdp/src/megatron_fsdp/megatron_fsdp.py b/megatron/core/distributed/fsdp/src/megatron_fsdp/megatron_fsdp.py
index d6ef5f6210e..8a63e0f5cf7 100644
--- a/megatron/core/distributed/fsdp/src/megatron_fsdp/megatron_fsdp.py
+++ b/megatron/core/distributed/fsdp/src/megatron_fsdp/megatron_fsdp.py
@@ -898,9 +898,10 @@ def forward_hook(_module, inputs, output):
 
         # Register pre state_dict hook to ensure that the module parameters are
         # distributed before saving the state_dict.
-        self._state_dict_pre_hook = self.module.register_state_dict_pre_hook(
-            lambda *args, **kwargs: self._replace_param_with_distributed_if_needed()
-        )
+        for name, module in self.named_modules():
+            module.register_state_dict_pre_hook(
+                lambda *args, **kwargs: self._replace_param_with_distributed_if_needed()
+            )
 
     @contextmanager
     def no_sync(self):
diff --git a/megatron/core/fusions/fused_pad_routing_map.py b/megatron/core/fusions/fused_pad_routing_map.py
index c382178b6c9..8e4d1763270 100644
--- a/megatron/core/fusions/fused_pad_routing_map.py
+++ b/megatron/core/fusions/fused_pad_routing_map.py
@@ -6,7 +6,7 @@
 from packaging import version
 
 from megatron.core.jit import jit_fuser
-from megatron.core.utils import null_decorator
+from megatron.core.utils import experimental_fn, null_decorator
 
 try:
     import triton
@@ -70,6 +70,7 @@ def _pad_routing_map_kernel(
     tl.store(output_row_ptr + token_indices, output_row, mask=token_mask)
 
 
+@experimental_fn(introduced_with_version="0.13.0")
 @jit_fuser
 def fused_pad_routing_map(routing_map: torch.Tensor, pad_multiple: int) -> torch.Tensor:
     """Fused version of pad_routing_map.
diff --git a/megatron/core/inference/communication_utils.py b/megatron/core/inference/communication_utils.py
index 18fbb18f2f0..a5bfe75fbb6 100644
--- a/megatron/core/inference/communication_utils.py
+++ b/megatron/core/inference/communication_utils.py
@@ -71,8 +71,7 @@ def broadcast_from_last_pipeline_stage(
             tensor.shape
         ), f"Expected tensor of shape {size} but got {list(tensor.shape)}"
         assert dtype == tensor.dtype, f"Expected tensor of type {dtype} but got {tensor.dtype}"
-        _is_cuda(tensor)
-        assert tensor.is_contiguous()
+        _is_cuda_contiguous(tensor)
     else:
         tensor = torch.empty(size, dtype=dtype, device=torch.cuda.current_device())
 
diff --git a/megatron/core/inference/contexts/attention_context/mamba_metadata.py b/megatron/core/inference/contexts/attention_context/mamba_metadata.py
index e9cd99a6c48..ecb0296559f 100644
--- a/megatron/core/inference/contexts/attention_context/mamba_metadata.py
+++ b/megatron/core/inference/contexts/attention_context/mamba_metadata.py
@@ -1,8 +1,28 @@
 # Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
+from dataclasses import dataclass
+from typing import List, Optional, Tuple
+
 import torch
 
 
+@dataclass
+class MambaInferenceStateConfig:
+    """Config for initializing Mamba model inference state tensors."""
+
+    layer_type_list: List[str]
+    """
+    A list of strings that indicates the layer type (Mamba / Attention / MLP) for each layer.
+    See `megatron/core/ssm/mamba_hybrid_layer_allocation.py` for the list of symbols.
+    """
+
+    mamba_conv_states_shape: Tuple[int]
+    """Mamba conv states shape per request."""
+
+    mamba_ssm_states_shape: Tuple[int]
+    """Mamba ssm states shape per request."""
+
+
 class MambaMetadata:
     """Manages the metadata tensors required for Mamba layers during inference."""
 
@@ -64,7 +84,7 @@ def update_cudagraph_mapping(
         """
         self.request_to_mamba_state_idx_cudagraph_only[0:num_active_requests] = active_mamba_indices
 
-    def allocate_slot(self) -> int:
+    def allocate_slot(self) -> Optional[int]:
         """
         Allocates a new slot for a request in the Mamba state buffers.
 
diff --git a/megatron/core/inference/contexts/dynamic_block_allocator.py b/megatron/core/inference/contexts/dynamic_block_allocator.py
index 4baa3f5212c..026ee47d094 100644
--- a/megatron/core/inference/contexts/dynamic_block_allocator.py
+++ b/megatron/core/inference/contexts/dynamic_block_allocator.py
@@ -13,60 +13,86 @@ class BlockAllocator:
     - Initializing a pool of block IDs
     - Allocating blocks from the pool
     - Releasing blocks back to the pool
-    - Managing the guaranteed block count for active requests
 
     Args:
-        block_count_total (int): Total number of blocks available in the buffer.
-        gtd_block_count (int): Number of blocks reserved for guaranteed requests.
+        context (DynamicInferenceContext): Dynamic inference context.
+        active_count (int): Total number of active blocks available in the buffer.
+            The full buffer size is 2*active_count, to accommodate an equal-size
+            space for paused requests that live on the CPU.
     """
 
-    def __init__(self, block_count_total: int, gtd_block_count: int):
-        self.block_count_total = block_count_total
-        self.gtd_block_count = gtd_block_count
+    def __init__(self, context: "DynamicInferenceContext", total_count: int):
 
-        # Reserve last block ID as dummy block for decode-only inference steps
-        self.block_count_avail = self.block_count_total - 1
-        self.dummy_block_idx = self.block_count_total - 1
+        self.context = context
+
+        active_count = (total_count - 1) // 2  # -1 for dummy_block_idx (see below)
+        active_count = max(1, active_count)  # need at least one block
+        self.total_count = 2 * active_count + 1  # +1 for dummy_block_idx
+        self.total_avail = self.total_count - 1  # -1 for dummy_block_idx
+        self.active_count = active_count
+        self.paused_count = self.total_count - self.active_count - 1  # -1 for dummy_block_idx
+        self.dummy_block_idx = self.total_count - 1
 
         # Initialize block pool as a "stack" data structure
         self.block_bag = torch.arange(
-            self.block_count_total, dtype=torch.int32, device=torch.cuda.current_device()
+            self.total_count, dtype=torch.int32, device=torch.cuda.current_device()
         )
 
-    def is_memory_available(self, num_blocks: int, safe: bool = False) -> bool:
-        """Check if memory blocks are available.
+    def __str__(self):
+        return (
+            f"total avail {self.total_avail} / {self.total_count - 1}"
+            f"; active {self.active_count}"
+        )
 
-        Use 'safe' to avoid all requests being deadlocked. A fraction of the KV cache
-        memory buffer is reserved to guarantee that a minimum number of active
-        requests can run on any given step.
+    def get_active_used(self):
+        """Compute number of active blocks used."""
+        return (
+            self.context.request_kv_block_counts[
+                self.context.paused_request_count : self.context.total_request_count
+            ]
+            .sum()
+            .item()
+        )
+
+    def get_paused_used(self):
+        """Compute number of paused blocks used."""
+        return (
+            self.context.request_kv_block_counts[: self.context.paused_request_count].sum().item()
+        )
+
+    def get_active_avail(self):
+        """Compute number of active blocks available."""
+        return self.active_count - self.get_active_used()
+
+    def get_paused_avail(self):
+        """Compute number of paused blocks available."""
+        return self.paused_count - self.get_paused_used()
+
+    def is_memory_available(self, num_blocks: int) -> bool:
+        """Check if memory blocks are available.
 
         Args:
             num_blocks (int): Number of blocks to check.
-            safe (bool): Include extra space for guaranteeing ability to run
-                requests to completion.
 
         Return:
             (bool) Is memory available?
         """
-        if safe:
-            return self.block_count_avail >= num_blocks + self.gtd_block_count
-        else:
-            return self.block_count_avail >= num_blocks
+        return self.get_active_avail() >= num_blocks
 
-    def allocate_memory_blocks(self, num_blocks: int = 1, safe: bool = False) -> Optional[Tensor]:
+    def allocate_memory_blocks(self, num_blocks: int) -> Optional[Tensor]:
         """Allocate memory blocks if available, else return None.
 
         Args:
             num_blocks (int): Number of blocks to allocate.
-            safe (bool): Include extra space for guaranteeing ability to run
-                requests to completion.
 
         Return:
             (Optional[Tensor]) Allocated block IDs.
         """
-        if self.is_memory_available(num_blocks, safe):
-            self.block_count_avail -= num_blocks
-            return self.block_bag[self.block_count_avail : (self.block_count_avail + num_blocks)]
+        if self.is_memory_available(num_blocks):
+            self.total_avail -= num_blocks
+            block_ids = self.block_bag[self.total_avail : (self.total_avail + num_blocks)]
+            assert num_blocks == block_ids.numel()
+            return block_ids
         else:
             return None
 
@@ -80,8 +106,8 @@ def release_memory_blocks(self, blocks: Tensor) -> None:
             None
         """
         num_blocks = blocks.size(dim=0)
-        self.block_bag[self.block_count_avail : (self.block_count_avail + num_blocks)] = blocks
-        self.block_count_avail += num_blocks
+        self.block_bag[self.total_avail : (self.total_avail + num_blocks)] = blocks
+        self.total_avail += num_blocks
 
     def reset(self) -> None:
         """Reset the allocator to initial state.
@@ -89,4 +115,4 @@ def reset(self) -> None:
         This resets the available block count to the entire memory pool
         (except for the dummy block).
         """
-        self.block_count_avail = self.block_count_total - 1
+        self.total_avail = self.total_count - 1
diff --git a/megatron/core/inference/contexts/dynamic_context.py b/megatron/core/inference/contexts/dynamic_context.py
index 000b58200f8..d15daa90d10 100644
--- a/megatron/core/inference/contexts/dynamic_context.py
+++ b/megatron/core/inference/contexts/dynamic_context.py
@@ -1,5 +1,6 @@
 # Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
+import logging
 import math
 import warnings
 from contextlib import nullcontext
@@ -23,14 +24,11 @@
 from megatron.core.inference.utils import tensor_swap
 from megatron.core.models.common.embeddings.rope_utils import apply_rotary_pos_emb
 from megatron.core.package_info import __version__ as mcore_version
-from megatron.core.ssm.mamba_hybrid_layer_allocation import (
-    Symbols,
-    get_layer_maps_from_layer_type_list,
-)
+from megatron.core.ssm.mamba_hybrid_layer_allocation import get_layer_maps_from_layer_type_list
 from megatron.core.transformer import TransformerConfig
 from megatron.core.utils import divide as core_divide
 
-from .attention_context.mamba_metadata import MambaMetadata
+from .attention_context.mamba_metadata import MambaInferenceStateConfig, MambaMetadata
 from .attention_context.mha_metadata import GraphedMHAMetadata, NonGraphedMHAMetadata
 from .base_context import BaseInferenceContext
 from .dynamic_block_allocator import BlockAllocator
@@ -113,7 +111,7 @@ class BlockOverflowError(ContextOverflowError):
 
 class ActiveRequestCountOverflowError(ContextOverflowError):
     '''Used when `initialize_attention_state()` is called with
-    `num_warmup_requests > max_requests.'''
+    `num_warmup_requests > max_active_requests.'''
 
     def __init__(self, max_request_count, active_request_count):
         assert active_request_count > max_request_count
@@ -124,6 +122,13 @@ def __init__(self, max_request_count, active_request_count):
         )
 
 
+class TensorStateDeallocatedError(ContextOverflowError):
+    """Context's tensor state is currently deallocated, such as when the engine
+    has been suspended."""
+
+    pass
+
+
 class ContextErrorFactory:
     """Factory class for serializing/deserializing context errors."""
 
@@ -175,6 +180,15 @@ class WarmupEngineMode(Enum):
     NON_DECODE = "non_decode"
 
 
+def get_mem_size_str(n_bytes: int) -> str:
+    """Convert number of bytes to human-readable string."""
+    for exp, suffix in ((4, "TB"), (3, "GB"), (2, "MB"), (3, "KB"), (0, "bytes")):
+        nquery = int(1024**exp)
+        if round(n_bytes / nquery) >= 1:
+            return "%.3g %s" % (n_bytes / nquery, suffix)
+    raise Exception(f"something went wrong, n_bytes={n_bytes}.")
+
+
 # pylint: disable=line-too-long
 class DynamicInferenceContext(BaseInferenceContext):
     """Inference context that is passed to the main model in order
@@ -185,64 +199,37 @@ class DynamicInferenceContext(BaseInferenceContext):
     arbitrary sequence length may be added, paused, or removed from the context
     at any step. The only constraint is the maximum number of requests or tokens
     that the context is defined to support. For the block-level KV cache, a memory
-    buffer is allocated up front (size `buffer_size_gb`), that is divided into
-    blocks and dynamically assigned to requests. At any given step, any unassigned
-    blocks equate to unused space.
-
-    Additionally, a fraction of the memory buffer (`gtd_request_fraction`, i.e.,
-    the 'guaranteed' request fraction) is reserved for guaranteeing that a
-    minimum number of active requests may continue to generate tokens on any step.
-    The reason for this is that the context manages two pools of requests: 1)
-    active requests, and 2) paused requests. Paused requests are requests where
-    insufficient memory blocks remain for future assignment, and these requests
-    are set aside until enough memory blocks are available. Active requests are
-    requests that have sufficient memory blocks to proceed with their generations.
-
-    The situation can arise where all requests eventually become paused due to all
-    memory blocks being assigned. In this case, there are no active requests and
-    thus no progress can be made. To handle this case, a fraction of the memory
-    buffer is reserved that only allows active requests, and no paused requests.
-    This fraction must be carefully tuned, as it can have an order of magnitude
-    impact on overall latency.
+    buffer is allocated up front (size `buffer_size_gb` if `unified_memory_level`
+    == 0, or `2 * buffer_size_gb` if `unified_memory_level` == 1), that is
+    divided into blocks and dynamically assigned to requests. At any given step,
+    any unassigned blocks equate to unused space.
 
     Args:
         params_dtype (torch.dtype): Dtype used for KV cache.
-        num_layers (int): Number of layers.
+        num_layers (int): Number of layers on this pipeline parallel rank.
         kv_channels (int): Hidden dimension per attention head.
         num_attention_heads (int): Number of attention heads.
         max_sequence_length (int): Max possible sequence length (prompt + output)
             that will occur.
-        buffer_size_gb (float): Total buffer size (GB), shared by main and
-            fallback contexts.
+        buffer_size_gb (float): Buffer size reserved on the GPU for the KV cache.
+            if `unified_memory_level` >= 1, then CPU memory is additionally
+            utilized, resulting in a total buffer size of `2 * buffer_size_gb`.
+            Regardless of total buffer size, the KV cache is conceptually divided
+            into 50% active requests and 50% paused requests.
+        max_tokens (int): Max number of tokens to use for forward passes. This is
+            primarily limited by prefill activation memory usage. (Defaults to
+            16384).
         block_size_tokens (int): Size of KV cache block size.
-        buffer_guaranteed_fraction (float): Fraction of the memory buffer that is
-            reserved to guarantee that one or more active requests are able to
-            run to completion. Without reserving this memory, paused requests are
-            able to fill the memory buffer and block execution of any requests.
-        buffer_overflow_factor (Optional[float]): Scaling factor over the buffer
-            size for auto computing `max_requests` and `max_tokens`. This scaling
-            factor is used for fitting more requests and tokens in the memory
-            buffer than it can safely hold, which in turn increases throughput.
-        max_requests_override (Optional[int]): If set, overrides value computed
-            from `buffer_overflow_factor`.
-        max_tokens_override (Optional[int]): If set, overrides value computed
-            from `buffer_overflow_factor`.
         tensor_model_parallel_size (Optional[int]): Tensor model parallel size.
         num_cuda_graphs (Optional[int]): Maximum number of cuda graphs to capture,
-            where the cuda graph batch sizes range from 1 to `max_requests` (as
-            computed below). Due to rounding, the actual number of cuda graphs may
-            not equal this argument.
+            where the cuda graph batch sizes range from 1 to `max_active_requests`
+            (as computed below). Due to rounding, the actual number of cuda graphs
+            may not equal this argument.
         materialize_only_last_token_logits (Optional[bool]): Whether to only
             materialize logits for the last token. This should be set to False
             if returning log probs.
-        layer_type_list (Optional[List[str]]): A list of strings that indicates
-            the layer type (Mamba / Attention / MLP) for each layer.
-            See `megatron/core/ssm/mamba_hybrid_layer_allocation.py` for the list
-            of symbols. This must be provided for hybrid models.
-        mamba_conv_states_shape: (Optional[Tuple[int]]): Mamba conv states shape per request.
-            This must be provided for hybrid models.
-        mamba_ssm_states_shape: (Optional[Tuple[int]]): Mamba ssm states shape per request.
-            This must be provided for hybrid models.
+        mamba_inference_state_config (Optional[MambaInferenceStateConfig]): The Mamba
+            inference state config if the model is a hybrid model.
         use_cuda_graphs_for_non_decode_steps (bool): If True, use cuda graphs for non-decode
             engine steps.
         unified_memory_level (Optional[int]): Set unified memory usage within the
@@ -250,10 +237,17 @@ class DynamicInferenceContext(BaseInferenceContext):
             allocate `memory_buffer` in unified memory. Eventually, additional
             levels will be included to control other tensors within the context.
         use_flashinfer_fused_rope (bool): If True, use flashinfer's fused rope implementation.
-        If None, defaults to using flash-infer if available.
+            If None, defaults to using flash-infer if available.
         metrics_writer (Optional['WandbModule']): Wandb module for writing metrics.
+        num_request_metadata (Optional[int]): Number of metadata fields to track per request.
+            These represent metadata that is needed by the text generation controller,
+            and that must be kept in sync with active requests through update_requests.
     """
 
+    DEFAULT_MAX_TOKENS = 16384
+    TOKEN_ROUNDER = 64
+    REQUEST_ROUNDER = 4
+
     def __init__(
         self,
         *,
@@ -263,24 +257,20 @@ def __init__(
         num_attention_heads: int,
         max_sequence_length: int,
         buffer_size_gb: float,
-        buffer_guaranteed_fraction: float,
+        max_tokens: int = DEFAULT_MAX_TOKENS,
         block_size_tokens: int = 256,
-        buffer_overflow_factor: Optional[float] = None,
-        max_requests_override: Optional[int] = None,
-        max_tokens_override: Optional[int] = None,
         tensor_model_parallel_size: Optional[int] = None,
         cache_mla_latent: bool = False,
         kv_lora_rank: Optional[int] = None,
         qk_pos_emb_head_dim: Optional[int] = None,
         num_cuda_graphs: Optional[int] = None,
         materialize_only_last_token_logits: Optional[bool] = True,
-        layer_type_list: Optional[List[str]] = None,
-        mamba_conv_states_shape: Optional[Tuple[int]] = None,
-        mamba_ssm_states_shape: Optional[Tuple[int]] = None,
+        mamba_inference_state_config: Optional[MambaInferenceStateConfig] = None,
         use_cuda_graphs_for_non_decode_steps: bool = True,
         use_flashinfer_fused_rope: bool = False,
-        unified_memory_level: Optional[int] = 0,
+        unified_memory_level: Optional[int] = 1,
         metrics_writer: Optional['WandbModule'] = None,
+        num_request_metadata: Optional[int] = None,
     ):
         super().__init__(materialize_only_last_token_logits=materialize_only_last_token_logits)
 
@@ -298,36 +288,40 @@ def __init__(
             tp_size = parallel_state.get_tensor_model_parallel_world_size()
         else:
             tp_size = tensor_model_parallel_size
-        hidden_size_per_attention_head = core_divide(projection_size, num_attention_heads)
-        num_attention_heads_per_partition = core_divide(num_attention_heads, tp_size)
+        self.hidden_size_per_attention_head = core_divide(projection_size, num_attention_heads)
+        self.num_attention_heads_per_partition = core_divide(num_attention_heads, tp_size)
 
         # Mamba states.
-        self.is_hybrid_model = layer_type_list is not None and Symbols.MAMBA in layer_type_list
+        self.is_hybrid_model = mamba_inference_state_config is not None
         if self.is_hybrid_model:
+            mamba_conv_states_shape = mamba_inference_state_config.mamba_conv_states_shape
+            mamba_ssm_states_shape = mamba_inference_state_config.mamba_ssm_states_shape
             assert (
                 mamba_conv_states_shape is not None
             ), "`mamba_conv_states_shape` must be specified for hybrid models"
             assert (
                 mamba_ssm_states_shape is not None
             ), "`mamba_ssm_states_shape` must be specified for hybrid models"
-            assert (
-                not use_cuda_graphs_for_non_decode_steps
+            assert not (
+                num_cuda_graphs is not None and use_cuda_graphs_for_non_decode_steps
             ), "Non-decode CUDA graphs not yet supported for hybrid models"
 
             # For hybrid models, the layer map converts the global layer index to the
             # corresponding attention layer index or Mamba layer index depending on the
             # layer type.
-            attention_layer_map, mamba_layer_map, _ = get_layer_maps_from_layer_type_list(
-                layer_type_list
+            attention_layer_map, mamba_layer_map, _, _ = get_layer_maps_from_layer_type_list(
+                mamba_inference_state_config.layer_type_list
             )
             self.num_attention_layers = len(attention_layer_map)
             self.num_mamba_layers = len(mamba_layer_map)
+            self.mamba_conv_states_shape = mamba_conv_states_shape
+            self.mamba_ssm_states_shape = mamba_ssm_states_shape
             self.layer_map = attention_layer_map | mamba_layer_map
         else:
             # The layer map is the identity function for pure Transformer models.
             self.num_attention_layers = num_layers
             self.num_mamba_layers = 0
-            (mamba_conv_states_shape, mamba_ssm_states_shape) = (None, None)
+            (self.mamba_conv_states_shape, self.mamba_ssm_states_shape) = (None, None)
             self.layer_map = {i: i for i in range(self.num_attention_layers)}
 
         if self.num_attention_layers == 0:
@@ -340,10 +334,12 @@ def __init__(
         self.block_size_tokens = block_size_tokens
         if self.cache_mla_latent:
             #   one vector  c_t  (rank)  +  optional RoPE phase slice
-            kv_reduced_dim = kv_lora_rank + qk_pos_emb_head_dim
-            self.kv_reduced_dim = kv_reduced_dim
+            self.kv_reduced_dim = kv_lora_rank + qk_pos_emb_head_dim
             self.block_size_bytes = (
-                dtype_size_bytes * num_layers * self.block_size_tokens * kv_reduced_dim
+                dtype_size_bytes
+                * self.num_attention_layers
+                * self.block_size_tokens
+                * self.kv_reduced_dim
             )
         else:
             self.block_size_bytes = (
@@ -351,62 +347,18 @@ def __init__(
                 * 2  # key, value
                 * self.num_attention_layers
                 * self.block_size_tokens
-                * num_attention_heads_per_partition
-                * hidden_size_per_attention_head
+                * self.num_attention_heads_per_partition
+                * self.hidden_size_per_attention_head
             )
         assert self.block_size_bytes > 0
 
-        # Adjust buffer to be a multiple of block size.
-        buffer_size_bytes = int(buffer_size_gb * 1024**3)
-        buffer_size_bytes_rem = buffer_size_bytes % self.block_size_bytes
-        buffer_size_bytes = buffer_size_bytes - buffer_size_bytes_rem
-
         mamba_states_memory_per_request = 0
         if self.is_hybrid_model:
-            mamba_states_memory_per_request += math.prod(mamba_conv_states_shape)
-            mamba_states_memory_per_request += math.prod(mamba_ssm_states_shape)
+            mamba_states_memory_per_request += math.prod(self.mamba_conv_states_shape)
+            mamba_states_memory_per_request += math.prod(self.mamba_ssm_states_shape)
             mamba_states_memory_per_request *= self.num_mamba_layers
             mamba_states_memory_per_request *= dtype_size_bytes
 
-        # Compute max_requets, max_tokens from buffer size, overflow factor, and Mamba state size.
-        def bytes_to_max_requests_and_tokens(n_bytes):
-            bytes_per_token = self.block_size_bytes / self.block_size_tokens
-            cost_per_request_bytes = (
-                mamba_states_memory_per_request + max_sequence_length * bytes_per_token
-            )
-            # TODO(ksanthanam): Leave room for an extra request in the event of padding
-            # for non-decode CUDA graphs
-            n_requests = n_bytes / cost_per_request_bytes
-            n_tokens = n_requests * max_sequence_length
-            n_requests = self.round_up_requests(int(n_requests), tp_size=tp_size)
-            n_tokens = self.round_up_tokens(int(n_tokens), tp_size=tp_size)
-            return n_requests, n_tokens
-
-        self.max_requests, self.max_tokens = bytes_to_max_requests_and_tokens(buffer_size_bytes)
-        if buffer_overflow_factor is not None:
-            self.max_requests = self.round_up_requests(
-                int(self.max_requests * buffer_overflow_factor), tp_size=tp_size
-            )
-            self.max_tokens = self.round_up_tokens(
-                int(self.max_tokens * buffer_overflow_factor / 50.0), tp_size=tp_size
-            )
-
-        if max_requests_override is not None:
-            self.max_requests = (
-                max_requests_override
-                if max_requests_override < self.REQUEST_ROUNDER
-                else self.round_up_requests(max_requests_override, tp_size=tp_size)
-            )
-
-        if max_tokens_override is not None:
-            self.max_tokens = self.round_up_tokens(max_tokens_override, tp_size=tp_size)
-
-        self.max_requests = min(self.max_requests, self.max_tokens)  # e.g., decode only.
-
-        # Initialize context state.
-        self.params_dtype = params_dtype
-        self.max_sequence_length = max_sequence_length
-
         # Unified memory.
         self.unified_memory_level = unified_memory_level
         if unified_memory_level > 0:
@@ -419,6 +371,38 @@ def bytes_to_max_requests_and_tokens(n_bytes):
                     )
                 self.unified_memory_level = 0
 
+        # Initialize block allocator.
+        buffer_size_bytes = int(buffer_size_gb * 1024**3)
+        block_count_total = buffer_size_bytes // (
+            self.block_size_bytes + mamba_states_memory_per_request
+        )
+        self.block_allocator = BlockAllocator(
+            context=self,
+            total_count=(
+                block_count_total if self.unified_memory_level == 0 else 2 * block_count_total
+            ),
+        )
+
+        # Set max_total_requests, max_active_requests, max_tokens.
+        self.max_total_requests = self.block_allocator.total_count - 1  # -1 for dummy block
+        self.max_active_requests = self.block_allocator.active_count
+        self.max_tokens = max_tokens or self.DEFAULT_MAX_TOKENS
+
+        assert self.max_tokens >= self.max_active_requests, (
+            f"max_tokens ({self.max_tokens}) must be >= "
+            f"max_active_requests ({self.max_active_requests}), "
+            "to have consistency between cuda graph sizes and the block table size."
+        )
+
+        # Track request metadata.
+        if num_request_metadata is None:
+            num_request_metadata = len(DynamicInferenceRequest.get_metadata_labels())
+        self.num_request_metadata = num_request_metadata
+
+        # Initialize context state.
+        self.params_dtype = params_dtype
+        self.max_sequence_length = max_sequence_length
+
         # Request and token counts.
         self.total_request_count = 0
         self.active_token_count = 0
@@ -427,93 +411,19 @@ def bytes_to_max_requests_and_tokens(n_bytes):
         self.padded_active_request_count = None
         self.paused_tokens = None
 
-        # Per-request state.
-        self.request_ids = torch.full(
-            (self.max_requests,), -1, dtype=torch.int32, device=torch.cuda.current_device()
-        )
-        # request_query_lengths is the input prompt tokens length during prefill phase (1st step) and then 1 for the decode phase (i.e During generation)
-        self.request_query_lengths = torch.empty_like(self.request_ids)
-        # request_output_lengths is len(input_prompt_tokens) + num_tokens_to_generate
-        self.request_output_lengths = torch.empty_like(self.request_ids)
-        # request_kv_length_offsets is the same as query length during prefill phase (1st step) and then 1 for the decode phase (i.e During generation)
-        self.request_kv_length_offsets = torch.empty_like(self.request_ids)
-        self.request_kv_block_counts = torch.empty_like(self.request_ids)
-        self.request_last_kv_block_id = torch.empty_like(self.request_ids)
-        # request_last_kv_block_offset represents number of tokens in the last kv block
-        self.request_last_kv_block_offset = torch.empty_like(self.request_ids)
-
-        # Per-token state.
-        self.token_to_input_ids = torch.full(
-            (self.max_tokens,), 0, dtype=torch.long, device=torch.cuda.current_device()
-        )
-        self.token_to_pos_ids = torch.full_like(self.token_to_input_ids, 0)
-        self.token_to_request_idx = torch.empty_like(self.token_to_input_ids)
-        self.token_to_block_idx = torch.empty_like(self.token_to_input_ids)
-        # i.e For a set of tokens A B C D E F ..  and block_size 4:
-        # token_to_position_in_request is  [0, 1, 2, 3, 4, 5]
-        # token_to_local_position_within_kv_block is [0 , 1, 2, 3, 0, 1, 2]
-        self.token_to_position_in_request = torch.empty_like(self.token_to_input_ids)
-        self.token_to_local_position_within_kv_block = torch.empty_like(self.token_to_input_ids)
-
-        # Calculate the total number of chunks available in the buffer
-        total_mamba_states_memory = mamba_states_memory_per_request * self.max_requests
-        block_count_total = (
-            max(0, buffer_size_bytes - total_mamba_states_memory) // self.block_size_bytes
-        )
-
-        # Memory buffer.
-        ctx_manager = (
-            torch.cuda.use_mem_pool(self.unified_memory_mempool)
-            if self.unified_memory_level > 0
-            else nullcontext()
-        )
-        with ctx_manager:
-            if cache_mla_latent:
-                self.memory_buffer = torch.full(
-                    (
-                        self.num_attention_layers,
-                        block_count_total,
-                        self.block_size_tokens,
-                        kv_reduced_dim,
-                    ),
-                    -1,
-                    dtype=self.params_dtype,
-                    device=torch.cuda.current_device(),
-                )
-            else:
-                self.memory_buffer = torch.full(
-                    (
-                        2,  # key and value
-                        self.num_attention_layers,
-                        block_count_total,
-                        self.block_size_tokens,
-                        num_attention_heads_per_partition,
-                        hidden_size_per_attention_head,
-                    ),
-                    -1,
-                    dtype=self.params_dtype,
-                    device=torch.cuda.current_device(),
-                )
-
         # Block ids.
         self.max_kv_block_count = math.ceil(self.max_sequence_length / self.block_size_tokens)
-        self.request_to_kv_block_ids = torch.full(
-            (self.max_requests, self.max_kv_block_count),
-            -1,
-            dtype=torch.int,
-            device=torch.cuda.current_device(),
-        )
 
         # Cuda graph token-counts (i.e., token counts used by cuda-graph steps, both decode and non-decode).
         self.cuda_graph_token_counts = None
         if num_cuda_graphs is not None:
 
             # Ensure valid num_cuda_graphs.
-            num_cuda_graphs = min(max(num_cuda_graphs, 1), self.max_requests)
+            num_cuda_graphs = min(max(num_cuda_graphs, 1), self.max_active_requests)
 
             # Cuda graph step size.
             cuda_graph_rounder = 8
-            self.cuda_graph_step_size = self.max_requests / num_cuda_graphs
+            self.cuda_graph_step_size = self.max_active_requests / num_cuda_graphs
             self.cuda_graph_step_size = (
                 math.ceil(self.cuda_graph_step_size / cuda_graph_rounder) * cuda_graph_rounder
             )
@@ -522,13 +432,17 @@ def bytes_to_max_requests_and_tokens(n_bytes):
 
             # Cuda graph token counts.
             if num_cuda_graphs == 1:
-                self.cuda_graph_token_counts = [self.max_requests]
+                self.cuda_graph_token_counts = [self.max_active_requests]
             else:
                 self.cuda_graph_token_counts = list(
-                    range(self.cuda_graph_step_size, self.max_requests, self.cuda_graph_step_size)
+                    range(
+                        self.cuda_graph_step_size,
+                        self.max_active_requests,
+                        self.cuda_graph_step_size,
+                    )
                 )
-                if self.cuda_graph_token_counts[-1] != self.max_requests:
-                    self.cuda_graph_token_counts.append(self.max_requests)
+                if self.cuda_graph_token_counts[-1] != self.max_active_requests:
+                    self.cuda_graph_token_counts.append(self.max_active_requests)
                 self.cuda_graph_token_counts.reverse()
 
             # Set used for validating active cuda graph token count.
@@ -550,82 +464,205 @@ def bytes_to_max_requests_and_tokens(n_bytes):
         self.active_attn_metadata = None
 
         self.graph_attn_metadata["mha_metadata"] = GraphedMHAMetadata(
-            block_count_total=block_count_total,
+            block_count_total=self.block_allocator.total_count,
             max_kv_block_count=self.max_kv_block_count,
-            max_requests=self.max_requests,
+            max_requests=self.max_total_requests,
             block_size_tokens=self.block_size_tokens,
             max_seqlen=self.max_sequence_length,
         )
 
         self.non_graph_attn_metadata["mha_metadata"] = NonGraphedMHAMetadata(
-            block_count_total=block_count_total,
+            block_count_total=self.block_allocator.total_count,
             max_kv_block_count=self.max_kv_block_count,
-            max_requests=self.max_requests,
+            max_requests=self.max_total_requests,
             block_size_tokens=self.block_size_tokens,
             max_seqlen=self.max_sequence_length,
         )
 
-        # Guaranteed active requests.
-        # * See details in the class docstring above. `gtd_request_fraction` is
-        #   the fraction of blocks in the memory buffer that are reserved for
-        #   guaranteeing that some number of active requests can always proceed
-        #   with their generations. The number of blocks defined by
-        #   `buffer_guaranteed_fraction * block_count_total` is converted to a
-        #   number of requests that this reserved space can safely handle
-        #   (`gtd_request_count`).
-        # * Note: computing the size of this guaranteed space from blocks rather
-        #   than bytes is safer due to the non-linear impacts of a large
-        #   `block_size_tokens` or `max_kv_block_count`. When computing from
-        #   blocks, this space will always be less than `block_count_total`. When
-        #   computing from bytes, this space can unexpectedly be much larger than
-        #   `block_count_total`, resulting in stalled generations.
-        gtd_block_count = int(buffer_guaranteed_fraction * block_count_total)
-        gtd_block_count = min(gtd_block_count, block_count_total)
-        self.gtd_request_count = max(1, gtd_block_count // self.max_kv_block_count)
-        self.gtd_block_count = self.gtd_request_count * self.max_kv_block_count
-
-        # Initialize allocator for KV memory blocks
-        self.block_allocator = BlockAllocator(
-            block_count_total=block_count_total, gtd_block_count=self.gtd_block_count
+        # Deal with chunked prefill
+        self.chunked_prefill_request_id = -1
+
+        # FlashInfer.
+        if use_flashinfer_fused_rope is True:
+            assert HAVE_FLASHINFER, "flashinfer is not installed"
+        elif use_flashinfer_fused_rope is None:
+            use_flashinfer_fused_rope = HAVE_FLASHINFER
+        self.use_flashinfer_fused_rope = use_flashinfer_fused_rope
+
+        # Allocate GPU state.
+        self.is_tensor_state_allocated = False
+        self.allocate_all_tensors(is_init=True)
+
+        # Print info.
+        logging.info(
+            "DynamicInferenceContext: allocated context with active buffer size %s (%d blocks)."
+            % (
+                get_mem_size_str(self.block_allocator.active_count * self.block_size_bytes),
+                self.block_allocator.active_count,
+            )
         )
 
-        # Optional state tensors for hybrid models
-        if self.is_hybrid_model:
-            self.mamba_metadata = MambaMetadata(max_requests=self.max_requests)
+    def allocate_all_tensors(self, *, is_init: bool) -> None:
+        """Allocate GPU state.
+
+        This method is used for both 1) initial allocation, and 2) resuming the
+        GPU state after a suspend.
+
+        Args:
+            is_init (bool): True if this is being called from `__init__()`.
+        """
+
+        # Only allocate tensors when not using unified memory at all (level 0),
+        # or for initial allocation during `__init__()`. For levels 1 and 2, we do
+        # not perform any explicit allocations or deallocations after the initial
+        # call to `__init__()`.
+        if self.unified_memory_level != 0 and not is_init:
+            return
+
+        # Mark allocated.
+        if self.is_tensor_state_allocated:
+            return
+        self.is_tensor_state_allocated = True
+
+        # Validate no tensors allocated prior to this method.
+        for key in vars(self).keys():
+            value = getattr(self, key)
+            assert not isinstance(value, torch.Tensor), (
+                "All tensors should be allocated within `allocate_all_tensors()."
+                f"Please move tensor '{key}'."
+            )
+
+        # Per-request state.
+        self.request_ids = torch.full(
+            (self.max_total_requests,), -1, dtype=torch.int32, device=torch.cuda.current_device()
+        )
+        # request_query_lengths is the input prompt tokens length during prefill phase (1st step) and then 1 for the decode phase (i.e During generation)
+        self.request_query_lengths = torch.empty_like(self.request_ids)
+        # request_output_lengths is len(input_prompt_tokens) + num_tokens_to_generate
+        self.request_output_lengths = torch.empty_like(self.request_ids)
+        # request_kv_length_offsets is the same as query length during prefill phase (1st step) and then 1 for the decode phase (i.e During generation)
+        self.request_kv_length_offsets = torch.empty_like(self.request_ids)
+        self.request_kv_block_counts = torch.empty_like(self.request_ids)
+        self.request_last_kv_block_id = torch.empty_like(self.request_ids)
+        # request_last_kv_block_offset represents number of tokens in the last kv block
+        self.request_last_kv_block_offset = torch.empty_like(self.request_ids)
+        self.request_to_kv_block_ids = torch.full(
+            (self.max_total_requests, self.max_kv_block_count),
+            -1,
+            dtype=torch.int,
+            device=torch.cuda.current_device(),
+        )
+
+        # Track request metadata.
+        self.request_metadata = torch.empty(
+            (self.max_total_requests, self.num_request_metadata),
+            dtype=torch.float32,
+            device=torch.cuda.current_device(),
+        )
 
-            with ctx_manager:
+        # Per-token state.
+        self.token_to_input_ids = torch.full(
+            (self.max_tokens,), 0, dtype=torch.long, device=torch.cuda.current_device()
+        )
+        self.token_to_pos_ids = torch.full_like(self.token_to_input_ids, 0)
+        self.token_to_request_idx = torch.empty_like(self.token_to_input_ids)
+        self.token_to_block_idx = torch.empty_like(self.token_to_input_ids)
+        # i.e For a set of tokens A B C D E F ..  and block_size 4:
+        # token_to_position_in_request is  [0, 1, 2, 3, 4, 5]
+        # token_to_local_position_within_kv_block is [0 , 1, 2, 3, 0, 1, 2]
+        self.token_to_position_in_request = torch.empty_like(self.token_to_input_ids)
+        self.token_to_local_position_within_kv_block = torch.empty_like(self.token_to_input_ids)
+
+        # Memory buffer.
+        def allocate_memory_buffer():
+            """Allocate the memory buffer. This function is called below within
+            `with ctx_manager:`."""
+            if self.cache_mla_latent:
+                self.memory_buffer = torch.full(
+                    (
+                        self.num_attention_layers,
+                        self.block_allocator.total_count,
+                        self.block_size_tokens,
+                        self.kv_reduced_dim,
+                    ),
+                    -1,
+                    dtype=self.params_dtype,
+                    device=torch.cuda.current_device(),
+                )
+            else:
+                self.memory_buffer = torch.full(
+                    (
+                        2,  # key and value
+                        self.num_attention_layers,
+                        self.block_allocator.total_count,
+                        self.block_size_tokens,
+                        self.num_attention_heads_per_partition,
+                        self.hidden_size_per_attention_head,
+                    ),
+                    -1,
+                    dtype=self.params_dtype,
+                    device=torch.cuda.current_device(),
+                )
+
+        # Optional state tensors for hybrid models
+        def allocate_mamba_states():
+            """Allocate Mamba states. This function is called below within
+            `with ctx_manager:`."""
+            if self.is_hybrid_model:
+                self.mamba_metadata = MambaMetadata(max_requests=self.max_total_requests)
                 self.mamba_conv_states = torch.zeros(
-                    (self.num_mamba_layers, self.max_requests) + mamba_conv_states_shape,
+                    (self.num_mamba_layers, self.max_total_requests) + self.mamba_conv_states_shape,
                     dtype=self.params_dtype,
                     device=torch.cuda.current_device(),
                 )
                 self.mamba_ssm_states = torch.zeros(
-                    (self.num_mamba_layers, self.max_requests) + mamba_ssm_states_shape,
+                    (self.num_mamba_layers, self.max_total_requests) + self.mamba_ssm_states_shape,
                     dtype=self.params_dtype,
                     device=torch.cuda.current_device(),
                 )
 
-        else:
-            self.mamba_metadata = None
-
-        # Store the dummy block idx reference for convenience
-        self.dummy_block_idx = self.block_allocator.dummy_block_idx
+            else:
+                self.mamba_metadata = None
 
-        # Deal with chunked prefill
-        self.chunked_prefill_request_id = -1
+        # Allocate `ctx_manager`-managed buffers. (For currently unknown reasons,
+        # `ctx_manager` can only be used once.)
+        ctx_manager = (
+            torch.cuda.use_mem_pool(self.unified_memory_mempool)
+            if self.unified_memory_level > 0
+            else nullcontext()
+        )
+        with ctx_manager:
+            allocate_memory_buffer()
+            allocate_mamba_states()
 
         # Reset attention and Mamba state.
         self.reset_attention_state()
         self.reset_mamba_state()
 
-        if use_flashinfer_fused_rope is True:
-            assert HAVE_FLASHINFER, "flashinfer is not installed"
-        elif use_flashinfer_fused_rope is None:
-            use_flashinfer_fused_rope = HAVE_FLASHINFER
-        self.use_flashinfer_fused_rope = use_flashinfer_fused_rope
+    def deallocate_all_tensors(self):
+        """Deallocate GPU state.
 
-    TOKEN_ROUNDER = 64
-    REQUEST_ROUNDER = 4
+        This method is used for suspending the dynamic engine.
+        """
+
+        # Only deallocate tensors when not using unified memory at all (level 0).
+        # For levels 1 and 2, we do not perform any explicit allocations or
+        # deallocations after the initial call to `__init__()`.
+        if self.unified_memory_level != 0:
+            return
+
+        # Mark deallocated.
+        if not self.is_tensor_state_allocated:
+            return
+        self.is_tensor_state_allocated = False
+
+        # Delete all tensor attributes.
+        # TODO(@lmcafee): check that device == 'cuda'?
+        keys = list(vars(self).keys())
+        for key in keys:
+            value = getattr(self, key)
+            if isinstance(value, torch.Tensor):
+                delattr(self, key)
 
     @classmethod
     def round_up_tokens(cls, value, tp_size=None):
@@ -656,13 +693,13 @@ def from_config(
         max_batch_size: int,
         buffer_size_gb: float = 40,
         num_cuda_graphs: int = None,
+        mamba_inference_state_config: Optional[MambaInferenceStateConfig] = None,
     ):
         """
         Instantiate a `DynamicInferenceContext` from a `TransformerConfig` and an `InferenceWrapperConfig`.
         """
         # TODO: Add other necessary configs from inference_config
 
-        buffer_guaranteed_fraction = 0.1
         model_config = model.config
         max_sequence_length = (
             inference_config.inference_max_seq_length or model_config.max_sequence_length
@@ -670,16 +707,15 @@ def from_config(
         max_sequence_length = max(max_sequence_length, max_batch_size)
         return cls(
             params_dtype=inference_config.params_dtype,
-            num_layers=model_config.num_layers,
+            num_layers=model_config.num_layers // model_config.pipeline_model_parallel_size,
             kv_channels=model_config.kv_channels,
             num_attention_heads=model_config.num_query_groups,
             max_sequence_length=inference_config.inference_max_seq_length,
             buffer_size_gb=buffer_size_gb,
-            buffer_guaranteed_fraction=buffer_guaranteed_fraction,
             materialize_only_last_token_logits=False,
-            max_requests_override=max_batch_size,
             num_cuda_graphs=num_cuda_graphs,
             use_flashinfer_fused_rope=None,
+            mamba_inference_state_config=mamba_inference_state_config,
         )
 
     @classmethod
@@ -820,6 +856,7 @@ def key_value_cache(self, layer_number: int) -> Tuple[Tensor, Tensor]:
             to blocks within the block-level memory buffer.
         """
         attention_layer_number = self.layer_map[layer_number - 1]
+
         if self.cache_mla_latent:
             return (
                 self.memory_buffer[attention_layer_number],
@@ -988,7 +1025,7 @@ def initialize_attention_state(
         Args:
             num_warmup_tokens (Optional[int]): Number of tokens to use for
                 warming up cuda graphs. Must be less than or equal to
-                `max_requests`.
+                `max_active_requests`.
             warmup_engine_mode (WarmupEngineMode): Denote whether to setup
                 for a decode or a non-decode cuda-graph warmup.
             num_warmup_requests (Optional[int]): [DEPRECATED] Use num_warmup_tokens instead.
@@ -1008,8 +1045,8 @@ def initialize_attention_state(
 
         # warmup both decode and non-decode engine steps
         if num_warmup_tokens is not None:
-            if num_warmup_tokens > self.max_requests:
-                raise ActiveRequestCountOverflowError(self.max_requests, num_warmup_tokens)
+            if num_warmup_tokens > self.max_active_requests:
+                raise ActiveRequestCountOverflowError(self.max_active_requests, num_warmup_tokens)
 
             if warmup_engine_mode == WarmupEngineMode.NON_DECODE:
                 assert self.non_decode_cuda_graphs, "Set non-decode cuda graphs to True"
@@ -1028,7 +1065,9 @@ def initialize_attention_state(
                 math.ceil(active_token_count / self.cuda_graph_step_size)
                 * self.cuda_graph_step_size
             )
-            self.padded_active_token_count = min(self.padded_active_token_count, self.max_requests)
+            self.padded_active_token_count = min(
+                self.padded_active_token_count, self.max_active_requests
+            )
             assert (
                 self.padded_active_token_count in self.cuda_graph_token_counts_set
             ), f"padded_active_token_count: {self.padded_active_token_count} not in cuda_graph_token_counts_set: {self.cuda_graph_token_counts_set}"
@@ -1038,7 +1077,7 @@ def initialize_attention_state(
             if self.is_decode_only():
                 # For decode-only, the padded active token count cannot exceed max-requests.
                 self.padded_active_token_count = min(
-                    self.padded_active_token_count, self.max_requests
+                    self.padded_active_token_count, self.max_active_requests
                 )
 
         # How are we calculating the padded active request count?
@@ -1056,7 +1095,7 @@ def initialize_attention_state(
 
         # Update token position indexes.
         self.token_to_block_idx[self.active_token_count : self.padded_active_token_count] = (
-            self.dummy_block_idx
+            self.block_allocator.dummy_block_idx
         )
         self.token_to_local_position_within_kv_block[
             self.active_token_count : self.padded_active_token_count
@@ -1131,6 +1170,7 @@ def reset(self) -> None:
         self.request_last_kv_block_id.fill_(-1)
         self.request_last_kv_block_offset.fill_(0)
         self.request_to_kv_block_ids.fill_(-1)
+        self.request_metadata.fill_(0)
 
         # Reset token indexes.
         self.token_to_input_ids.fill_(0)
@@ -1198,20 +1238,20 @@ def last_token_logits(self, logits: Tensor) -> Tensor:
 
         return last_token_logits
 
-    def check_availability(
-        self, req: DynamicInferenceRequest, safe: bool = False
-    ) -> (bool, bool, bool):
+    def check_availability(self, req: DynamicInferenceRequest) -> (bool, bool, bool):
         """
         Check if the request can be added to the context.
         """
-        request_can_be_added = self.total_request_count < self.max_requests
+        request_can_be_added = (
+            self.total_request_count - self.paused_request_count < self.max_active_requests
+        )
         request_tokens_can_be_added = (
             self.active_token_count + req.remaining_prompt_length <= self.max_tokens
         )
         blocks = math.ceil(
             (req.remaining_prompt_length + req.finished_chunk_token_count) / self.block_size_tokens
         ) - math.ceil(req.finished_chunk_token_count / self.block_size_tokens)
-        kv_cache_available = self.block_allocator.is_memory_available(blocks, safe=safe)
+        kv_cache_available = self.block_allocator.is_memory_available(blocks)
         return request_can_be_added, request_tokens_can_be_added, kv_cache_available
 
     def add_request(self, req: DynamicInferenceRequest, chunk_length: Optional[int] = None) -> None:
@@ -1224,6 +1264,12 @@ def add_request(self, req: DynamicInferenceRequest, chunk_length: Optional[int]
         Return:
             None
         """
+
+        # If tensor state is deallocated, do not add request.
+        if not self.is_tensor_state_allocated:
+            raise TensorStateDeallocatedError(req.request_id)
+
+        # Chunk length.
         if chunk_length is None:
             chunk_length = req.remaining_prompt_length
 
@@ -1251,9 +1297,7 @@ def add_request(self, req: DynamicInferenceRequest, chunk_length: Optional[int]
         num_blocks_needed = overall_required_blocks - already_allocated_blocks
 
         if num_blocks_needed > 0:
-            new_block_ids = self.block_allocator.allocate_memory_blocks(
-                num_blocks_needed, safe=not is_chunked_prefill
-            )
+            new_block_ids = self.block_allocator.allocate_memory_blocks(num_blocks_needed)
             if new_block_ids is None or len(new_block_ids) != num_blocks_needed:
                 raise BlockOverflowError(req.request_id)
 
@@ -1271,13 +1315,22 @@ def add_request(self, req: DynamicInferenceRequest, chunk_length: Optional[int]
         else:
             current_id = self.total_request_count
 
-        if current_id >= self.max_requests:
+        if current_id >= self.max_active_requests:
             raise RequestOverflowError(req.request_id)
 
         if self.active_token_count + chunk_length > self.max_tokens:
             raise TokenOverflowError(req.request_id)
 
         self.request_ids[current_id] = req.request_id
+        # Handle request metadata.
+        metadata = req.tracked_metadata
+        assert (
+            len(metadata) == self.num_request_metadata
+        ), "Request added to context with invalid metadata length"
+        self.request_metadata[current_id] = torch.tensor(
+            metadata, dtype=torch.float32, device=self.request_metadata.device
+        )
+        # Handle length and block assignments.
         self.request_query_lengths[current_id] = chunk_length
         self.request_output_lengths[current_id] = (
             req.finished_chunk_token_count
@@ -1342,6 +1395,7 @@ def _move_book_keeping_tensors(self, src_idxs, dst_idxs, next_tokens):
         self.request_kv_length_offsets[dst_idxs] = self.request_kv_length_offsets[src_idxs]
         self.request_query_lengths[dst_idxs] = self.request_query_lengths[src_idxs]
         self.request_output_lengths[dst_idxs] = self.request_output_lengths[src_idxs]
+        self.request_metadata[dst_idxs] = self.request_metadata[src_idxs]
         self.request_ids[dst_idxs] = self.request_ids[src_idxs]
         next_tokens[dst_idxs] = next_tokens[src_idxs]
 
@@ -1362,6 +1416,7 @@ def _swap_book_keeping_tensors(self, src_idxs, dst_idxs, next_tokens):
         tensor_swap(self.request_kv_length_offsets, src_idxs, dst_idxs)
         tensor_swap(self.request_query_lengths, src_idxs, dst_idxs)
         tensor_swap(self.request_output_lengths, src_idxs, dst_idxs)
+        tensor_swap(self.request_metadata, src_idxs, dst_idxs)
         tensor_swap(self.request_ids, src_idxs, dst_idxs)
         tensor_swap(next_tokens, src_idxs, dst_idxs)
         tensor_swap(self.request_to_kv_block_ids, src_idxs, dst_idxs)
@@ -1372,6 +1427,14 @@ def _swap_book_keeping_tensors(self, src_idxs, dst_idxs, next_tokens):
         if self.is_hybrid_model:
             tensor_swap(self.mamba_metadata.request_to_mamba_state_idx, src_idxs, dst_idxs)
 
+    def get_index_of_chunked_prefill_request(self) -> int:
+        """Get the index of the chunked prefill request in the context.
+
+        Return:
+            (int) Index of the chunked prefill request, or -1 if none exists.
+        """
+        return torch.where(self.request_ids == self.chunked_prefill_request_id)[0][0]
+
     # TODO: see if we can compile this function
     def update_requests(self, active_requests_mask: Tensor, new_tokens: Tensor) -> Tensor:
         """Update context state after calling engine.step().
@@ -1389,7 +1452,7 @@ def update_requests(self, active_requests_mask: Tensor, new_tokens: Tensor) -> T
         between these request groups.
         - 0:paused_request_count -> paused requests
         - paused_request_count:total_request_count -> active requests
-        - total_request_count:max_requests -> completed requests are moved here.
+        - total_request_count:max_active_requests -> completed requests are moved here.
         The reason for maintaining contiguous tensors rather than multiple
         smaller (e.g., per-group or per-request) tensors is for both 1) speed
         (avoid unnecessary tensor allocations), and 2) compatibility with the
@@ -1413,6 +1476,7 @@ def update_requests(self, active_requests_mask: Tensor, new_tokens: Tensor) -> T
         Return:
             (Tensor) Newly paused request IDs.
         """
+
         # 1. The active token mask tells us which requests are still active and which are completed
         # active_request_count -> This corresponds to requests that have not reached EOD or max length
         # finished_request_count are requests that have reached the termination criterion
@@ -1432,6 +1496,9 @@ def update_requests(self, active_requests_mask: Tensor, new_tokens: Tensor) -> T
         # Reset attention state.
         self.reset_attention_state()
 
+        # Update total_request_count.
+        self.total_request_count = active_request_count + self.paused_request_count
+
         # 2. If no paused requests are present and no active requests we release memory and reset.
         if active_request_count + self.paused_request_count == 0:
             if finished_request_count > 0:
@@ -1524,13 +1591,19 @@ def update_requests(self, active_requests_mask: Tensor, new_tokens: Tensor) -> T
 
             if self.chunked_prefill_request_id != -1:
                 # find the id in request_ids that is the chunked_prefill_request_id. Only one request should be chunked.
-                pos = torch.where(self.request_ids == self.chunked_prefill_request_id)[0][0]
-                active_requests_requiring_new_block[pos] = 0  # chunked prefill should not be paused
+                active_requests_requiring_new_block[self.get_index_of_chunked_prefill_request()] = (
+                    0  # chunked prefill should not be paused
+                )
 
             active_requests_requiring_new_block_count = (
                 (active_requests_requiring_new_block == 1).sum().item()
             )
 
+            if active_requests_requiring_new_block_count > 0:
+                newly_paused_request_ids = self.request_ids[
+                    torch.nonzero(active_requests_requiring_new_block) + self.paused_request_count
+                ]
+
             # Swap unfinished active requests on the left side with paused requests on the right side
             # NOTE : We add paused request count because we concatenate
             # paused tokens to the left at the beginning of update requests
@@ -1563,7 +1636,6 @@ def update_requests(self, active_requests_mask: Tensor, new_tokens: Tensor) -> T
                 self._move_book_keeping_tensors(
                     src_idxs=src_idxs, dst_idxs=dst_idxs, next_tokens=next_tokens
                 )
-                newly_paused_request_ids = self.request_ids[dst_idxs]
 
             self.paused_request_count += active_requests_requiring_new_block_count
             active_request_count -= active_requests_requiring_new_block_count
@@ -1572,26 +1644,26 @@ def update_requests(self, active_requests_mask: Tensor, new_tokens: Tensor) -> T
         # We determine how many requests we can resume and resume them
         # Assign released blocks to paused requests.
         # todo: @shanmugamr, un-pause requests using FIFO, rather than LIFO.
-        num_non_gtd_blocks = max(0, self.block_allocator.block_count_avail - self.gtd_block_count)
-        if num_non_gtd_blocks:
-            # if we have non-gtd blocks, use them. Do not dip into the gtd-block pool
-            resume_request_count = min(num_non_gtd_blocks, self.paused_request_count)
-        else:
-            # only dip into the gtd-block pool if we have run out of non-gtd-blocks and the active
-            # request count has fallen below a certain threshold.
+        resume_request_count = 0
+        if self.paused_request_count > 0:
+            active_block_count_avail = self.block_allocator.get_active_avail()
+            paused_block_counts = self.request_kv_block_counts[: self.paused_request_count]
+            paused_block_counts = paused_block_counts.flip(dims=[0])
+            paused_block_counts += 1  # +1 for newly added block
+            paused_block_counts_cumsum = paused_block_counts.cumsum(dim=0)
             resume_request_count = min(
-                max(self.gtd_request_count - active_request_count, 0), self.paused_request_count
+                torch.nonzero(paused_block_counts_cumsum <= active_block_count_avail).numel(),
+                self.block_allocator.total_avail,
             )
 
         self.paused_request_count -= resume_request_count
         active_request_count += resume_request_count
         assert active_request_count > 0, "active_request_count == %d." % active_request_count
 
-        # finally, swap the chunked prefill to the end of the active requests to obey the invariant
+        # finally, swap the chunked prefill to the end of the active requests to obey the invariance
         if self.chunked_prefill_request_id != -1:
-            pos = torch.where(self.request_ids == self.chunked_prefill_request_id)[0][0]
             self._swap_book_keeping_tensors(
-                src_idxs=torch.tensor([pos]),
+                src_idxs=torch.tensor([self.get_index_of_chunked_prefill_request()]),
                 dst_idxs=torch.tensor([active_request_count + self.paused_request_count - 1]),
                 next_tokens=next_tokens,
             )
@@ -1640,6 +1712,7 @@ def update_requests(self, active_requests_mask: Tensor, new_tokens: Tensor) -> T
                 == 0
             ), "The request_last_kv_block_offset should be 0 for the requests that just got resumed this step. "
 
+            assert resume_request_count <= self.block_allocator.total_avail
             block_ids = self.block_allocator.allocate_memory_blocks(resume_request_count)
             row_idx = torch.arange(
                 self.paused_request_count,
@@ -1761,11 +1834,11 @@ def get_kvcache_utilization_stats(self) -> dict:
             }
         """
         # Total usable blocks exclude the reserved dummy block.
-        total_blocks = max(self.block_allocator.block_count_total - 1, 1)
-        block_count_avail = int(self.block_allocator.block_count_avail)
+        total_blocks = max(self.block_allocator.total_count - 1, 1)
+        block_count_avail = int(self.block_allocator.total_avail)
 
         # Overall allocated blocks in the buffer right now.
-        allocated_blocks = (self.block_allocator.block_count_total - 1) - block_count_avail
+        allocated_blocks = (self.block_allocator.total_count - 1) - block_count_avail
         allocated_blocks = int(max(0, allocated_blocks))
 
         # Active unique blocks referenced by current active requests only.
@@ -1787,7 +1860,6 @@ def get_kvcache_utilization_stats(self) -> dict:
         active_utilization = float(active_unique_blocks) / float(total_blocks)
 
         # Diagnostic helpers
-        num_non_gtd_blocks = max(0, block_count_avail - int(self.gtd_block_count))
         total_request_count = int(self.total_request_count)
         return {
             'total_blocks': int(total_blocks),
@@ -1797,10 +1869,9 @@ def get_kvcache_utilization_stats(self) -> dict:
             'active_utilization': active_utilization,
             'active_request_count': int(self.get_active_request_count()),
             'paused_request_count': int(self.paused_request_count),
-            'gtd_block_count': int(self.gtd_block_count),
             'block_count_avail': int(block_count_avail),
-            'num_non_gtd_blocks': int(num_non_gtd_blocks),
             'active_token_count': int(self.active_token_count),
             'total_request_count': int(total_request_count),
-            'max_requests': int(self.max_requests),
+            'max_total_requests': int(self.max_total_requests),
+            'max_active_requests': int(self.max_active_requests),
         }
diff --git a/megatron/core/inference/data_parallel_inference_coordinator.py b/megatron/core/inference/data_parallel_inference_coordinator.py
index 0045d5947a1..e1fe7b21566 100644
--- a/megatron/core/inference/data_parallel_inference_coordinator.py
+++ b/megatron/core/inference/data_parallel_inference_coordinator.py
@@ -9,7 +9,7 @@
 
 import torch
 
-from megatron.core.inference.headers import Headers
+from megatron.core.inference.headers import Headers, UnknownHeaderError
 
 try:
     import zmq
@@ -109,6 +109,8 @@ def __init__(self, inference_coordinator_port: int, data_parallel_size: int):
             self.identities_of_data_parallel_ranks.append(identity)
         logging.info("Inference Coordinator: Connected with data parallel ranks...")
         self.data_parallel_rank_iterator = cycle(self.identities_of_data_parallel_ranks)
+        self.data_parallel_pause_acks = set()
+        self.data_parallel_stop_acks = set()
 
         self.request_id_to_client_id = {}
         self.request_id_to_client_request_id = {}
@@ -151,7 +153,7 @@ def start(self):
                 # print(f"New client connected: {sender_identity}")
                 known_clients.add(sender_identity)
                 self.router_socket.send_multipart(
-                    [sender_identity, msgpack.packb([Headers.ACK.value], use_bin_type=True)]
+                    [sender_identity, msgpack.packb([Headers.CONNECT_ACK.value], use_bin_type=True)]
                 )
 
             elif header == Headers.SUBMIT_REQUEST:
@@ -193,7 +195,13 @@ def start(self):
                         ),
                     ]
                 )
-            elif header in [Headers.PAUSE, Headers.UNPAUSE, Headers.STOP]:
+            elif header in [
+                Headers.PAUSE,
+                Headers.UNPAUSE,
+                Headers.SUSPEND,
+                Headers.RESUME,
+                Headers.STOP,
+            ]:
                 # control signals for the engine
                 # broadcast to all data parallel ranks
                 if sender_identity not in known_clients:
@@ -202,13 +210,57 @@ def start(self):
                     self.router_socket.send_multipart(
                         [data_parallel_rank_id, msgpack.packb([header.value], use_bin_type=True)]
                     )
+                if header == Headers.UNPAUSE:
+                    self.data_parallel_pause_acks = set()
+            elif header == Headers.PAUSE_ACK:
+                # control signal ack from the engine
+                assert sender_identity in self.identities_of_data_parallel_ranks
+                assert sender_identity not in self.data_parallel_pause_acks
+                self.data_parallel_pause_acks.add(sender_identity)
+                # route to all clients only once we have gotten an ack from all data parallel ranks
+                if len(self.data_parallel_pause_acks) == self.data_parallel_size:
+                    for client_id in known_clients:
+                        self.router_socket.send_multipart(
+                            [
+                                client_id,
+                                msgpack.packb([header.value, sender_identity], use_bin_type=True),
+                            ]
+                        )
+                    for data_parallel_rank_id in self.identities_of_data_parallel_ranks:
+                        self.router_socket.send_multipart(
+                            [
+                                data_parallel_rank_id,
+                                msgpack.packb([Headers.PAUSE_ACK.value], use_bin_type=True),
+                            ]
+                        )
+            elif header == Headers.STOP_ACK:
+                # control signal ack from the engine
+                assert sender_identity in self.identities_of_data_parallel_ranks
+                assert sender_identity not in self.data_parallel_stop_acks
+                self.data_parallel_stop_acks.add(sender_identity)
+                # route to all clients only once we have gotten an ack from all data parallel ranks
+                if len(self.data_parallel_stop_acks) == self.data_parallel_size:
+                    for client_id in known_clients:
+                        self.router_socket.send_multipart(
+                            [
+                                client_id,
+                                msgpack.packb([header.value, sender_identity], use_bin_type=True),
+                            ]
+                        )
+                    for data_parallel_rank_id in self.identities_of_data_parallel_ranks:
+                        self.router_socket.send_multipart(
+                            [
+                                data_parallel_rank_id,
+                                msgpack.packb([Headers.STOP_ACK.value], use_bin_type=True),
+                            ]
+                        )
             elif header == Headers.ENGINE_REPLY:
                 # This is the output of a single engine step on some data parallel rank.
                 assert sender_identity in self.identities_of_data_parallel_ranks
-                finished_requests = deserialized_payload[1]
+                finished_request_records = deserialized_payload[1]
 
-                for finished_request in finished_requests:
-                    fid = finished_request["request_id"]
+                for finished_request_record in finished_request_records:
+                    fid = finished_request_record["requests"][0]["request_id"]
                     client_identity = self.request_id_to_client_id[fid]
                     client_request_identity = self.request_id_to_client_request_id[fid]
                     del self.request_id_to_client_id[fid]
@@ -218,11 +270,15 @@ def start(self):
                         [
                             client_identity,
                             msgpack.packb(
-                                [client_request_identity, finished_request], use_bin_type=True
+                                [header.value, client_request_identity, finished_request_record],
+                                use_bin_type=True,
                             ),
                         ]
                     )
 
+            else:
+                raise UnknownHeaderError(header)
+
     @classmethod
     def entrypoint(
         cls, ready_event: Event, inference_coordinator_port: int, data_parallel_size: int
diff --git a/megatron/core/inference/engines/__init__.py b/megatron/core/inference/engines/__init__.py
index 9cd902d9d63..d6a4f6eb694 100644
--- a/megatron/core/inference/engines/__init__.py
+++ b/megatron/core/inference/engines/__init__.py
@@ -1,5 +1,5 @@
 # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
 
 from .abstract_engine import AbstractEngine
-from .dynamic_engine import DynamicInferenceEngine
+from .dynamic_engine import DynamicInferenceEngine, EngineSuspendedError
 from .static_engine import StaticInferenceEngine
diff --git a/megatron/core/inference/engines/dynamic_engine.py b/megatron/core/inference/engines/dynamic_engine.py
index 4bff4f85fa8..5fad1369308 100644
--- a/megatron/core/inference/engines/dynamic_engine.py
+++ b/megatron/core/inference/engines/dynamic_engine.py
@@ -4,10 +4,13 @@
 import logging
 import multiprocessing
 import os
+import socket
 import struct
 import time
 import warnings
 from collections import deque
+from contextlib import contextmanager
+from dataclasses import dataclass
 from datetime import datetime
 from itertools import repeat
 from typing import Dict, List, Optional, Tuple, Union
@@ -27,14 +30,19 @@
     DataParallelInferenceCoordinator,
 )
 from megatron.core.inference.engines.abstract_engine import AbstractEngine
-from megatron.core.inference.headers import Headers
-from megatron.core.inference.inference_request import DynamicInferenceRequest, Status
+from megatron.core.inference.headers import Headers, UnknownHeaderError
+from megatron.core.inference.inference_request import (
+    DynamicInferenceRequest,
+    DynamicInferenceRequestRecord,
+    Status,
+)
 from megatron.core.inference.sampling_params import SamplingParams
 from megatron.core.inference.text_generation_controllers.text_generation_controller import (
     TextGenerationController,
 )
 from megatron.core.inference.utils import Counter, await_process_event
-from megatron.core.utils import get_asyncio_loop, trace_async_exceptions
+from megatron.core.transformer.cuda_graphs import delete_cuda_graphs
+from megatron.core.utils import get_asyncio_loop, internal_api, trace_async_exceptions
 
 try:
     from tqdm import tqdm
@@ -65,6 +73,19 @@
     HAVE_WANDB = False
     wandb = None
 
+try:
+    import psutil
+
+    HAVE_PSUTIL = True
+except ImportError:
+    HAVE_PSUTIL = False
+
+
+class EngineSuspendedError(Exception):
+    """Engine is currently suspended and not performing steps."""
+
+    pass
+
 
 def format_mem_bytes(mem_bytes):
     """Convert a byte count to a human-readable string in tb, gb, mb, kb, or bytes."""
@@ -75,6 +96,14 @@ def format_mem_bytes(mem_bytes):
     return "%d bytes" % mem_bytes
 
 
+@dataclass(kw_only=True)
+class RequestEntry:
+    """Entry in the engine's `self.requests` dict."""
+
+    record: DynamicInferenceRequestRecord
+    future: asyncio.Future
+
+
 # pylint: disable=line-too-long
 class DynamicInferenceEngine(AbstractEngine):
     """The dynamic inference engine.
@@ -94,9 +123,6 @@ class DynamicInferenceEngine(AbstractEngine):
             batching and a dynamic block-level KV cache (similar to paged attention).
         random_seed (Optional[int]): Use a random seed if you want deterministic
             results. Defaults to None.
-        static_sampling (bool): If True, all requests are assumed to have the same
-            sampling parameters. This avoids needing to loop through all requests and
-            their sampling parameters every generation step, improving latency.
         inference_logging_step_interval (int): The step interval at which to log
         inference metrics to wandb. Defaults to 0, which means no logging.
     """
@@ -110,17 +136,9 @@ def __init__(
         *,
         track_paused_request_events: bool = False,
         enable_chunked_prefill: bool = True,
-        static_sampling: bool = False,
         inference_logging_step_interval: int = 0,
     ):
 
-        if enable_cuda_graph is not None:
-            warnings.warn(
-                "The `enable_cuda_graph` argument is deprecated and will be "
-                "removed in `megatron-core 0.15`. `enable_cuda_graph` is now "
-                "read directly from the transformer config object."
-            )
-
         assert isinstance(
             controller, TextGenerationController
         ), f"controller must be a TextGenerationController, got {type(controller)}"
@@ -129,31 +147,41 @@ def __init__(
         ), f"context must be a DynamicInferenceContext, got {type(context)}"
         assert isinstance(random_seed, int), f"random_seed must be an int, got {type(random_seed)}"
 
-        self.request_counter = Counter()
+        # Deprecate `enable_cuda_graph`.
+        if enable_cuda_graph is not None:
+            warnings.warn(
+                "The `enable_cuda_graph` argument is deprecated and will be "
+                "removed in `megatron-core 0.15`. `enable_cuda_graph` is now "
+                "read directly from the transformer config object."
+            )
+            self.enable_cuda_graph = enable_cuda_graph
+        else:
+            self.enable_cuda_graph = (
+                controller.inference_wrapped_model.model.config.enable_cuda_graph
+            )
+
+        # Initialization options.
         self.controller = controller
         self.context = context
         self.random_seed = random_seed
         self.track_paused_request_events = track_paused_request_events
-        self.step_count = 0
-        self.finished_request_count = 0
-        self.waiting_request_ids = deque()
-        self.failed_request_ids = []  # deque()
-        self.request_counter = Counter()
-        self.requests: Dict[int, DynamicInferenceRequest] = {}
-        self.request_completion_futures: Dict[int, asyncio.Future] = {}
-        self.step_start_event = torch.cuda.Event(enable_timing=True)
-        self.step_end_event = torch.cuda.Event(enable_timing=True)
-        self.paused = False
-        self.stopped = False
         self.enable_chunked_prefill = enable_chunked_prefill
-        self.static_sampling = static_sampling
-
         self.inference_logging_step_interval = inference_logging_step_interval
+        self.unified_memory_level = context.unified_memory_level
+
+        if enable_cuda_graph is not None:
+            self.cuda_graph_impl = "local" if enable_cuda_graph else "none"
+        else:
+            self.cuda_graph_impl = controller.inference_wrapped_model.model.config.cuda_graph_impl
+
+        # Initialize engine.
+        self.reset()
+
         # Configure wandb to use separate step counter for inference metrics (only once)
         if self.inference_logging_step_interval > 0 and self.context.metrics_writer is not None:
             logging.info(
                 f"\033[1;93m[INFERENCE]\033[0m "
-                f"\033[1;95mLogging inference metrics to wandb (rank {torch.distributed.get_rank()})\033[0m"
+                f"\033[1;95mLogging inference metrics to wandb (rank {self.rank})\033[0m"
             )
             if HAVE_WANDB and self.context.metrics_writer.__name__ == "wandb":
                 # Make all inference/* metrics use inference_step as their x-axis
@@ -174,21 +202,43 @@ def __init__(
                             max_step = int(val)
                     self.inference_step_offset = int(max_step)
 
-        # Initialize the asyncio loop if it has not already been initialized.
-        # TODO: Start the engine loop here.
-        self._loop = get_asyncio_loop()
-        self._cond = asyncio.Condition()
+        # Create cuda graphs.
+        self.create_cuda_graphs()
 
-        # Capture cuda graph.
-        self.capture_stats = None
+    def reset(self) -> None:
+        """Reset by removing all requests and reset all state."""
 
-        if enable_cuda_graph is not None:
-            self.cuda_graph_impl = "local" if enable_cuda_graph else "none"
-        else:
-            self.cuda_graph_impl = controller.inference_wrapped_model.model.config.cuda_graph_impl
+        self.context.reset()
 
-        if self.cuda_graph_impl == "local":
-            self.create_cuda_graphs()
+        # Request state.
+        self.request_counter = Counter()
+        self.finished_request_count = 0
+
+        self.requests: Dict[int, RequestEntry] = {}
+        self.waiting_request_ids = deque()
+        self.failed_request_ids = []
+
+        # Timing and logging variables.
+        self.rank = torch.distributed.get_rank()
+        self.step_count = 0
+        self.step_start_event = torch.cuda.Event(enable_timing=True)
+        self.step_end_event = torch.cuda.Event(enable_timing=True)
+        self.capture_stats = None
+
+        # Runtime state.
+        self._loop = get_asyncio_loop(getattr(self, "_loop", None))
+        self._cond = asyncio.Condition()
+        self.running = asyncio.Event()
+        self.paused = asyncio.Event()
+        self.stopped = asyncio.Event()
+        self.received_pause: bool = False
+        self.received_stop: bool = False
+        self.suspend_signal = False
+        self.is_suspended = False
+        self.resume_request_ids = None
+
+        # Coordinator state.
+        self.use_coordinator = False
 
     def create_cuda_graphs(self, reset_context: bool = True):
         """Create cuda graphs.
@@ -199,6 +249,10 @@ def create_cuda_graphs(self, reset_context: bool = True):
         Args:
             reset_context (bool): Whether to reset the context after building cuda graphs.
         """
+
+        if self.cuda_graph_impl != "local":
+            return
+
         context = self.context
         controller = self.controller
 
@@ -207,7 +261,7 @@ def create_cuda_graphs(self, reset_context: bool = True):
 
         if moe_pad_experts and context.non_decode_cuda_graphs:
             context.non_decode_cuda_graphs = False
-            if torch.distributed.get_rank() == 0:
+            if self.rank == 0:
                 warnings.warn(
                     "MoE models do not support non-decode cuda graphs. "
                     "Forcing non_decode_cuda_graphs to False."
@@ -292,10 +346,12 @@ def create_cuda_graphs(self, reset_context: bool = True):
 
         self.capture_stats = capture_stats
 
+    @internal_api
     async def start_listening_to_data_parallel_coordinator(
         self,
         inference_coordinator_port: int,
         launch_inference_coordinator: bool = True,
+        verbose: bool = False,
         *,
         loop: Optional[asyncio.AbstractEventLoop] = None,
     ):
@@ -306,16 +362,18 @@ async def start_listening_to_data_parallel_coordinator(
         `InferenceCoordinator`. It configures different ZMQ socket patterns
         based on the rank's role within the distributed topology.
 
+        Note that this method must be called on all ranks, as it uses blocking torch broadcasts.
+
         The setup involves two primary roles within each data-parallel group:
-        1.  **TP Coordinator (TP_rank=0, PP_rank=0)**: This rank connects directly
+        1.  **MP Coordinator (TP_rank=0, PP_rank=0)**: This rank connects directly
             to the central coordinator via a ZMQ `DEALER` socket. It receives
             requests and uses a ZMQ `PUB` (publisher) socket to broadcast them
-            to all other ranks within its tensor-parallel (TP) group.
-        2.  **TP Workers (all other ranks)**: These ranks use ZMQ `SUB` (subscriber)
-            sockets to listen for requests broadcast by their local TP Coordinator.
+            to all other ranks within its model-parallel (MP) group.
+        2.  **MP Workers (all other ranks)**: These ranks use ZMQ `SUB` (subscriber)
+            sockets to listen for requests broadcast by their local MP Coordinator.
 
-        This architecture uses fast Inter-Process Communication (`ipc`) sockets for
-        intra-node broadcasts within a TP group.
+        This architecture uses TCP sockets for both inter-node and intra-node broadcasts
+        within an MP group.
 
         Finally, after setting up the communication channels and ensuring all ranks
         are synchronized, this method starts the main engine processing loop
@@ -327,12 +385,7 @@ async def start_listening_to_data_parallel_coordinator(
             launch_inference_coordinator (bool, optional): If True, the global rank 0
                 process will spawn and manage the `InferenceCoordinator`
                 process. Defaults to True.
-
-        Note:
-            The current implementation uses `ipc` sockets for broadcasting requests
-            within a Tensor Parallel group, which limits each TP group to a single
-            physical node. For example, if you have 8 GPUs per node, then this will only
-            work with TP=[1,2,4,8]
+            verbose (bool): Whether to run in verbose mode.
         """
 
         assert HAVE_ZMQ, (
@@ -343,7 +396,25 @@ async def start_listening_to_data_parallel_coordinator(
             "pip install msgpack"
         )
 
-        if launch_inference_coordinator and torch.distributed.get_rank() == 0:
+        self.zmq_context = zmq.Context().instance()
+        self.zmq_sockets = []  # keep track of all sockets created by this engine
+
+        # Get world info.
+        dp_group = parallel_state.get_data_parallel_group()
+        dp_src = parallel_state.get_data_parallel_src_rank()
+        dp_size = parallel_state.get_data_parallel_world_size()
+        dp_rank = parallel_state.get_data_parallel_rank()
+
+        mp_group = parallel_state.get_model_parallel_group()
+        mp_src = parallel_state.get_model_parallel_src_rank()
+        tp_rank = parallel_state.get_tensor_model_parallel_rank()
+        pp_rank = parallel_state.get_pipeline_model_parallel_rank()
+
+        self.is_mp_coordinator = tp_rank == 0 and pp_rank == 0
+        self.is_dp_coordinator = (dp_rank == 0) and self.is_mp_coordinator
+
+        # Spawn a DP coordinator process and get the connection info.
+        if launch_inference_coordinator and self.is_dp_coordinator:
             spawn_context = multiprocessing.get_context('spawn')
             coordinator_ready_event = spawn_context.Event()
             self.inference_coordinator_process = spawn_context.Process(
@@ -356,67 +427,223 @@ async def start_listening_to_data_parallel_coordinator(
             )
             self.inference_coordinator_process.start()
 
-        # Todo [Siddharth]: can we move this code to another file?
-        self.zmq_context = zmq.Context()
-        self.zmq_sockets = []  # keep track of all sockets created by this engine
+        # Find available ports for MP and bind to them.
+        if self.is_mp_coordinator:
+            local_ip = socket.gethostname()
+            mp_req_sock = self.zmq_context.socket(zmq.PUB)
+            mp_req_sock.bind_to_random_port(f"tcp://{local_ip}")
+            mp_req_addr = mp_req_sock.getsockopt_string(zmq.LAST_ENDPOINT)
+
+            mp_len_sock = self.zmq_context.socket(zmq.PUB)
+            mp_len_sock.bind_to_random_port(f"tcp://{local_ip}")
+            mp_len_addr = mp_len_sock.getsockopt_string(zmq.LAST_ENDPOINT)
+        else:
+            mp_req_addr = None
+            mp_len_addr = None
+
+        # Broadcast addresses to respective ranks.
+        bcast = [mp_req_addr, mp_len_addr]
+        torch.distributed.broadcast_object_list(bcast, src=mp_src, group=mp_group)
+        [mp_req_addr, mp_len_addr] = bcast
+
         ip_address_of_dp_coordinator = os.getenv('MASTER_ADDR', '127.0.0.1')
-        identity = f'tp-coord-{parallel_state.get_data_parallel_rank()}'
-        if (
-            parallel_state.get_tensor_model_parallel_rank() == 0
-            and parallel_state.get_pipeline_model_parallel_rank() == 0
-        ):
+        dp_addr = f"tcp://{ip_address_of_dp_coordinator}:{inference_coordinator_port}"
+        identity = f'mp-coord-{dp_rank}'
+        if self.is_mp_coordinator:
             # 1. Create dealer sockets where tp_rank = 0 and pp_rank = 0
             #    These will receive requests from an InferenceCoordinator.
             self.socket_for_receiving_requests = self.zmq_context.socket(zmq.DEALER)
 
             self.socket_for_receiving_requests.setsockopt(zmq.IDENTITY, identity.encode('utf-8'))
-            self.socket_for_receiving_requests.connect(
-                f"tcp://{ip_address_of_dp_coordinator}:{inference_coordinator_port}"
-            )
+            self.socket_for_receiving_requests.connect(dp_addr)
 
             # send empty string. this is used to register with the coordinator.
             self.socket_for_receiving_requests.send(b"")
 
             # 2. Create a publisher socket. This is used to publish or broadcast
-            #    requests within the tensor parallel group
-            self.tensor_parallel_publisher_socket = self.zmq_context.socket(zmq.PUB)
-            self.tensor_parallel_publisher_socket.bind(f"ipc:///tmp/{identity}-tp-bcast-socket-req")
+            #    requests within the model parallel group
+            self.model_parallel_publisher_socket = mp_req_sock
 
             # 3. Create another publisher socket to broadcast the number of messages to receive.
-            self.tensor_parallel_num_msgs_publisher_socket = self.zmq_context.socket(zmq.PUB)
-            self.tensor_parallel_num_msgs_publisher_socket.bind(
-                f"ipc:///tmp/{identity}-tp-bcast-socket-len"
-            )
+            self.model_parallel_num_msgs_publisher_socket = mp_len_sock
             self.zmq_sockets += [
                 self.socket_for_receiving_requests,
-                self.tensor_parallel_num_msgs_publisher_socket,
-                self.tensor_parallel_publisher_socket,
+                self.model_parallel_num_msgs_publisher_socket,
+                self.model_parallel_publisher_socket,
             ]
-        # All TP ranks subscribe to the two publisher sockets
-        self.tensor_parallel_subscriber_socket = self.zmq_context.socket(zmq.SUB)
-        self.tensor_parallel_subscriber_socket.connect(f"ipc:///tmp/{identity}-tp-bcast-socket-req")
-        self.tensor_parallel_subscriber_socket.setsockopt_string(zmq.SUBSCRIBE, "")
-
-        self.tensor_parallel_num_msgs_subscriber_socket = self.zmq_context.socket(zmq.SUB)
-        self.tensor_parallel_num_msgs_subscriber_socket.connect(
-            f"ipc:///tmp/{identity}-tp-bcast-socket-len"
-        )
-        self.tensor_parallel_num_msgs_subscriber_socket.setsockopt_string(zmq.SUBSCRIBE, "")
+        # All MP ranks subscribe to the two publisher sockets
+        self.model_parallel_subscriber_socket = self.zmq_context.socket(zmq.SUB)
+        self.model_parallel_subscriber_socket.connect(mp_req_addr)
+        self.model_parallel_subscriber_socket.setsockopt_string(zmq.SUBSCRIBE, "")
+
+        self.model_parallel_num_msgs_subscriber_socket = self.zmq_context.socket(zmq.SUB)
+        self.model_parallel_num_msgs_subscriber_socket.connect(mp_len_addr)
+        self.model_parallel_num_msgs_subscriber_socket.setsockopt_string(zmq.SUBSCRIBE, "")
 
         self.zmq_sockets += [
-            self.tensor_parallel_subscriber_socket,
-            self.tensor_parallel_num_msgs_subscriber_socket,
+            self.model_parallel_subscriber_socket,
+            self.model_parallel_num_msgs_subscriber_socket,
         ]
 
-        torch.distributed.barrier(parallel_state.get_tensor_model_parallel_group())
+        torch.distributed.barrier(mp_group)
 
-        if launch_inference_coordinator and torch.distributed.get_rank() == 0:
+        if launch_inference_coordinator and self.is_dp_coordinator:
             await await_process_event(coordinator_ready_event, self.inference_coordinator_process)
             logging.info("Inference co-ordinator is ready to receive requests!")
 
         # Finally run the engine infinite loop
         loop = get_asyncio_loop(loop)
-        self.engine_loop_task = loop.create_task(self.run_engine_with_coordinator(loop=loop))
+        self.engine_loop_task = loop.create_task(
+            self.run_engine_with_coordinator(loop=loop, verbose=verbose)
+        )
+
+    @contextmanager
+    @staticmethod
+    def suspend_resume_ctx(key: str, *, unified_memory_level: int) -> None:
+        """Context manager for of suspending and resuming the engine.
+
+        This context manager records the time and memory usage when suspending
+        and resuming the context. TODO(@lmcafee): add argument to optionally
+        return nullcontext, to avoid overhead.
+
+        Args:
+            key (str): Key that identifies caller (e.g., 'suspend' or 'resume').
+
+        Return:
+            None.
+        """
+
+        try:
+
+            start_mem = torch.cuda.memory_stats()
+            start_time = time.time()
+            torch.cuda.synchronize()
+
+            yield
+
+        finally:
+
+            end_time = time.time()
+
+            end_mem = torch.cuda.memory_stats()
+            start_mem_alloc = start_mem["allocated_bytes.all.current"]
+            end_mem_alloc = end_mem["allocated_bytes.all.current"]
+            start_mem_res = start_mem["reserved_bytes.all.current"]
+            end_mem_res = end_mem["reserved_bytes.all.current"]
+
+            rank_str = torch.distributed.get_rank()
+            dir_str = "deallocating" if end_mem_alloc <= start_mem_alloc else "allocating"
+            relative_time_str = f"{end_time - start_time:.3f} sec"
+            relative_mem_str = f"{abs(start_mem_alloc - end_mem_alloc) / 1024**3:.1f} gb"
+
+            if HAVE_PSUTIL:
+                process = psutil.Process()
+                mem_info = process.memory_info()
+                cpu_mem_str = f"{mem_info.rss / 1024**3:.1f} gb"
+            else:
+                cpu_mem_str = "--"
+
+            total_mem_str = ", ".join(
+                (
+                    f"cpu: {cpu_mem_str}",
+                    f"gpu: alloc {end_mem_alloc / 1024**3:.1f} gb",
+                    f"res {end_mem_res / 1024**3:.1f} gb",
+                )
+            )
+            logging.info(
+                f"[rank {rank_str}] dynamic engine {key}, "
+                f"unified {unified_memory_level}, "
+                f"{dir_str} "
+                f"{relative_mem_str} in {relative_time_str} ... "
+                f"abs mem usage: {total_mem_str}"
+            )
+
+    def suspend(self):
+        """Suspend engine by deallocating context's GPU state."""
+
+        # Skip if already suspended, which can happen when using the inference
+        # coordinator.
+        if self.is_suspended:
+            return
+        self.is_suspended = True
+
+        # Deallocate context tensors.
+        with self.__class__.suspend_resume_ctx(
+            "suspended", unified_memory_level=self.unified_memory_level
+        ):
+            self.context.deallocate_all_tensors()
+
+        # Delete cuda graphs when not using unified memory at all (level 0). For
+        # levels 1 and 2, the context's tensors maintain static memory addresses,
+        # so the cuda graphs are re-used.
+        if self.unified_memory_level == 0:
+            delete_cuda_graphs()
+
+        # Maintain references to requests before reset.
+        waiting_request_ids = list(self.waiting_request_ids)
+        active_request_ids = set(self.requests.keys()) - set(waiting_request_ids)
+        self.resume_request_ids = [*active_request_ids, *waiting_request_ids]
+        self.waiting_request_ids.clear()
+
+        # Suspend requests objects.
+        for request_id in active_request_ids:
+            self.requests[request_id].record.suspend(self.controller.tokenizer)
+
+    def resume(self):
+        """Resume engine by reallocating context's GPU state."""
+
+        # Skip if not suspended, which can happen when using the inference
+        # coordinator.
+        if not self.is_suspended:
+            return
+        self.is_suspended = False
+
+        # Resume.
+        with self.__class__.suspend_resume_ctx(
+            "resumed", unified_memory_level=self.unified_memory_level
+        ):
+
+            # Allocate context tensors.
+            alloc_time = time.time()
+            torch.cuda.synchronize()
+            self.context.allocate_all_tensors(is_init=False)
+            torch.cuda.synchronize()
+            alloc_time = time.time() - alloc_time
+
+            # Reset context and request data.
+            self.context.reset()
+
+            # Create cuda graphs (before adding requests, to be in decode mode).
+            # Only create cuda graphs when not using unified memory at all (level
+            # 0). For levels 1 and 2, the context's tensors maintain static
+            # memory addresses, so the cuda graphs are re-used.
+            capture_time = time.time()
+            if self.unified_memory_level == 0:
+                self.create_cuda_graphs()
+            capture_time = time.time() - capture_time
+
+            # Add requests.
+            add_time = time.time()
+            torch.cuda.synchronize()
+            for request_id in self.resume_request_ids:
+                self._add_request(self.get_request(request_id))
+            torch.cuda.synchronize()
+            add_time = time.time() - add_time
+
+        # Print inner timing (must be outside context manager above for correct formatting).
+        logging.info(
+            "    > "
+            + ", ".join(
+                (
+                    f"inner timing: alloc {alloc_time:.3f}",
+                    f"add {add_time:.3f}",
+                    f"capture {capture_time:.3f}.",
+                )
+            )
+        )
+
+        # Notify event loop.
+        self._loop.call_soon_threadsafe(asyncio.create_task, self._notify_cond_for_new_request())
 
     @trace_async_exceptions
     async def _notify_cond_for_new_request(self):
@@ -428,19 +655,31 @@ def has_unfinished_requests(self) -> bool:
         """Test if context contains unfinished requests."""
         return self.context.has_unfinished_requests() or len(self.waiting_request_ids) > 0
 
-    def reset(self) -> None:
-        """Reset by removing all requests and reset all state."""
-        self.context.reset()
-        self.waiting_request_ids.clear()
-        self.step_count = 0
-        self.finished_request_count = 0
+    def get_request(self, request_id: int) -> DynamicInferenceRequest:
+        """Get most recent request from a request record.
+
+        Args:
+            request_id (int): Request id.
+
+        Returns:
+            (DynamicInferenceRequest) The most recent request in the record.
+        """
+        return self.requests[request_id].record[-1]
 
     def _add_request(
         self, request: DynamicInferenceRequest
     ) -> asyncio.Future[DynamicInferenceRequest]:
 
         request_id = request.request_id
-        self.requests[request_id] = request
+
+        # Add request to self.requests. If the engine has previously been
+        # suspended, then the request may already exist.
+        if request_id not in self.requests:
+            self.requests[request_id] = RequestEntry(
+                record=DynamicInferenceRequestRecord.from_request(request),
+                future=self._loop.create_future(),
+            )
+
         if request.status is None:
             request.status = Status.ACTIVE_AND_GENERATING_TOKENS
 
@@ -456,6 +695,17 @@ def _add_request(
             request.sampling_params.num_tokens_to_generate = self.context.max_sequence_length - len(
                 request.prompt_tokens
             )
+        if request.sampling_params.termination_id is None:
+            try:
+                eod = self.controller.tokenizer.eod
+            except AttributeError:
+                if self.rank == 0:
+                    warnings.warn(
+                        "Termination ID not specified, and tokenizer does not define eod."
+                        "Defaulting to not using termination id."
+                    )
+                eod = -1
+            request.sampling_params.termination_id = eod
 
         if (
             len(request.prompt_tokens) + request.sampling_params.num_tokens_to_generate
@@ -470,10 +720,10 @@ def _add_request(
 
         if request.status != Status.FAILED:
             self.waiting_request_ids.append(request_id)
+        else:
+            self.failed_request_ids.append(request_id)
 
-        # Create a new asyncio Future to notify the user when the request has completed.
-        self.request_completion_futures[request_id] = self._loop.create_future()
-        return self.request_completion_futures[request_id]
+        return self.requests[request_id].future
 
     def add_request(
         self,
@@ -491,7 +741,6 @@ def add_request(
         Return:
             Returns an asyncio `Future[DynamicInferenceRequest]` for the user to wait on.
         """
-
         prompt_str = None
         # Tokenize prompt if text.
         if isinstance(prompt, str):
@@ -520,8 +769,8 @@ def add_request(
 
         # Initialize request.
         request = DynamicInferenceRequest(
-            prompt=prompt_str,
             request_id=request_id,
+            prompt=prompt_str,
             prompt_tokens=tokens,
             sampling_params=sampling_params,
         )
@@ -550,9 +799,9 @@ def post_process_requests(
         Returns:
             A list of active requests and completed requests as `DynamicInferenceRequest` objects
         """
-        active_requests: List[DynamicInferenceRequest] = []
-        finished_requests: List[DynamicInferenceRequest] = []
+        active_request_ids: list[int] = []
         finished_request_ids = set(finished_request_ids.tolist())
+        finished_request_records: list[DynamicInferenceRequestRecord] = []
         self.finished_request_count += len(finished_request_ids)
 
         log_probs_iter = log_probs if log_probs else repeat(None)
@@ -560,7 +809,7 @@ def post_process_requests(
         for request_id, token, request_log_probs in zip(
             request_ids.tolist(), sample.tolist(), log_probs_iter
         ):
-            request: DynamicInferenceRequest = self.requests[request_id]
+            request: DynamicInferenceRequest = self.get_request(request_id)
             if request_id != self.context.chunked_prefill_request_id:
                 request.generated_tokens.append(token)
                 if request.tpot is None:
@@ -594,19 +843,20 @@ def post_process_requests(
                 if request_id in finished_request_ids:
                     request.generated_length = len(request.generated_tokens)
                     request.status = Status.COMPLETED
-                    finished_request = self.requests.pop(request_id)
+                    finished_entry = self.requests.pop(request_id)
+                    finished_request = finished_entry.record[-1]
                     if finished_request.prompt is None:
                         finished_request.prompt = self.controller.tokenizer.detokenize(
                             finished_request.prompt_tokens.tolist()
                         )
                     finished_request.generated_length = len(finished_request.generated_tokens)
-                    finished_requests.append(finished_request)
                     finished_request.generated_text = self.controller.tokenizer.detokenize(
                         finished_request.generated_tokens
                     )
-                    self.request_completion_futures[request_id].set_result(finished_request)
+                    finished_request_records.append(finished_entry.record)
+                    finished_entry.future.set_result(finished_entry.record)
                 else:
-                    active_requests.append(request)
+                    active_request_ids.append(request_id)
             else:
                 # The chunked prefill produces useless tokens
                 # so we are not appending them to the generated tokens.
@@ -624,9 +874,9 @@ def post_process_requests(
                             request.prompt_log_probs = []
                         request.prompt_log_probs.extend(request_log_probs)
                         request.generated_log_probs = []
-                    active_requests.append(request)
+                    active_request_ids.append(request_id)
 
-        return active_requests, finished_requests
+        return active_request_ids, finished_request_records
 
     def schedule_waiting_requests(self):
         """Tries to schedule any requests in the waiting pool."""
@@ -640,9 +890,9 @@ def schedule_non_chunked_prefill(self):
         Perform the same original scheduling logic for non-chunked runs
         """
         while self.waiting_request_ids:
-            req = self.requests[self.waiting_request_ids[0]]
+            req = self.get_request(self.waiting_request_ids[0])
             request_can_be_added, request_tokens_can_be_added, kv_cache_available = (
-                self.context.check_availability(req, safe=True)
+                self.context.check_availability(req)
             )
             if request_can_be_added and request_tokens_can_be_added and kv_cache_available:
                 self.context.add_request(req)
@@ -655,37 +905,6 @@ def schedule_non_chunked_prefill(self):
             else:
                 break
 
-    def get_active_sampling_map(self) -> List[Tuple[SamplingParams, List[int]]]:
-        """Gets a map of sampling methods to active requests indices in the context."""
-        # Get all active request IDs.
-        active_request_ids = self.context.request_ids[
-            self.context.paused_request_count : self.context.total_request_count
-        ].tolist()
-        if self.static_sampling:
-            return [(next(iter(self.requests.values())).sampling_params, active_request_ids)]
-
-        # Get a map from request_id to context array index.
-        context_id_map = {r: i for i, r in enumerate(active_request_ids)}
-
-        # Create map of sampling methods to context array indices.
-        sampling_map: List[Tuple[SamplingParams, List[int]]] = []
-        for request_id, request in self.requests.items():
-            if request_id not in context_id_map:
-                continue
-            context_id = context_id_map[request_id]
-            sp = request.sampling_params
-
-            # Look for a pre-existing group with these sampling parameters.
-            for sampling, indices in sampling_map:
-                if sampling == sp:
-                    indices.append(context_id)
-                    break
-            # If no group exists, create a new one.
-            else:
-                sampling_map.append((sp, [context_id]))
-
-        return sampling_map
-
     def schedule_chunked_prefill(self):
         """
         This function schedules chunked prefill requests.
@@ -704,7 +923,7 @@ def schedule_chunked_prefill(self):
         can_schedule = True
         while self.waiting_request_ids and can_schedule:
             can_schedule = False
-            req = self.requests[self.waiting_request_ids[0]]
+            req = self.get_request(self.waiting_request_ids[0])
 
             # is_continuing_chunked_prefill is True if we are scheduling next
             # chunk of a existing chunked prefill request
@@ -716,9 +935,7 @@ def schedule_chunked_prefill(self):
                 self.context.active_token_count + remaining_len <= self.context.max_tokens
             )
             token_partially_can_be_added = self.context.active_token_count < self.context.max_tokens
-            request_can_be_added, _, kv_cache_available = self.context.check_availability(
-                req, safe=not is_continuing_chunked_prefill
-            )
+            request_can_be_added, _, kv_cache_available = self.context.check_availability(req)
             request_can_be_added = is_continuing_chunked_prefill or request_can_be_added
 
             if request_can_be_added and kv_cache_available:
@@ -747,104 +964,157 @@ def schedule_chunked_prefill(self):
                     # chunked prefill request at the head of the waiting queue
                     # Note that we do not need to continue check the queue, as the tokens are full
 
-    async def async_step(
-        self, *, verbose: Optional[bool] = False
-    ) -> Tuple[List[DynamicInferenceRequest], List[DynamicInferenceRequest], float]:
-        """
-        Wrapper for controller.generate_output_tokens_dynamic_batch(), to
-        match vLLM API. Uses `asyncio` for continuous generation which allows this
-        method to sleep and wake up when new requests are available.
-
-        Args:
-            sampling_params (SamplingParams): The sampling parameters.
-            verbose (bool): Whether to run in verbose mode.
+    async def async_forward(self) -> Tuple[Dict, Dict, float, int]:
+        """Uses `asyncio` for continuous generation.
+        Sleeps when no requests are available, until new requests have been added.
 
         Returns:
             A tuple comprised of:
-                1. Requests that ran in the last step and are still active.
-                2. Requests that ran in the last step and have now finished.
-                3. The step time in seconds.
+                step_result (Optional[Dict]): The result of the step.
+                context_state (Dict): A tuple consisting of the state of the context.
+                is_decode_only, total/paused request count, active token count.
+                step_time (float): How long this step took.
         """
+
+        # If suspended, no stepping.
+        if self.is_suspended:
+            raise EngineSuspendedError(self.step_count)
+
         # schedule requests
         self.schedule_waiting_requests()
 
-        # Previous context state, for printing output below.
-        prev_is_decode_only = self.context.is_decode_only()
-        prev_total_request_count = self.context.total_request_count
-        prev_paused_request_count = self.context.paused_request_count
-        prev_active_token_count = self.context.active_token_count
-
-        range_push("Prefill" if not prev_is_decode_only else "Decode")
+        # Saving pre-step state, for printing output below.
+        is_decode_only = self.context.is_decode_only()
+        pre_step_context_state = {
+            "is_decode_only": is_decode_only,
+            "total_request_count": self.context.total_request_count,
+            "paused_request_count": self.context.paused_request_count,
+            "active_token_count": self.context.active_token_count,
+        }
 
         # Generate tokens.
-        is_decode_only = self.context.is_decode_only()
-        # save the is_decode_only AFTER scheduling, BEFORE update
+        range_push("Prefill" if not is_decode_only else "Decode")
+        # TODO @TDE: Account for this line when overlapping forward and bookkeep.
         self.is_decode_only = is_decode_only
+
         self.step_start_event.record()
-        sampling_map = self.get_active_sampling_map()
-        result = await self.controller.async_generate_output_tokens_dynamic_batch(sampling_map)
+        result = await self.controller.async_generate_output_tokens_dynamic_batch()
         self.step_end_event.record()
         self.step_end_event.synchronize()
         step_time = self.step_start_event.elapsed_time(self.step_end_event) / 1e3
+        self.step_count += 1
+
+        range_pop()
+
+        if (
+            self.inference_logging_step_interval > 0
+            and step_count > 0
+            and step_count % self.inference_logging_step_interval == 0
+            and self.context.metrics_writer is not None
+        ):
+            kvcache_util_stats = self.context.get_kvcache_utilization_stats()
+        else:
+            kvcache_util_stats = None
+
+        post_step_context_state = {
+            "waiting_request_count": len(self.waiting_request_ids),
+            "finished_request_count": self.finished_request_count,
+            "kv_stats": kvcache_util_stats,
+            "padded_active_token_count": self.context.padded_active_token_count,
+            "using_cuda_graph_this_step": self.context.using_cuda_graph_this_step(),
+            "total_active_block_count": self.context.block_allocator.active_count,
+            "total_paused_block_count": self.context.block_allocator.paused_count,
+            "total_active_used_blocks": self.context.block_allocator.get_active_used(),
+            "total_paused_used_blocks": self.context.block_allocator.get_paused_used(),
+        }
+
+        context_state = {**pre_step_context_state, **post_step_context_state}
+
+        return result, context_state, step_time, self.step_count
+
+    async def async_bookkeep(
+        self,
+        step_result: Optional[Dict],
+        context_state: Dict,
+        step_time: float,
+        step_count: int,
+        *,
+        verbose: bool = False,
+    ):
+        """Uses `asyncio` for continuous bookkeeping.
+
+        Args:
+            step_result (Optional[Dict]): The result of the step.
+            context_state (Dict): is_decode_only, total/paused request count, active token count.
+            step_time (float): How long this step took.
+            step_count (int): The count of the step.
+            verbose (bool): Whether to run in verbose mode.
 
+        Returns:
+            A dictionary containing:
+                active_requests (List): Requests that ran in the last step and are still active.
+                finished_requests (List): Requests that ran in the last step and have now finished.
+                step_time (float): The step time in seconds.
+                cuda_graph_request_count (int): The CUDA graph batch size matching this step.
+        """
         # Increment finished_request_count.
         cuda_graph_request_count = None
-        if result is not None:
-            active_request_ids = result["active_request_ids"]
-            newly_paused_request_ids = result["newly_paused_request_ids"]
-            finished_request_ids = result["finished_request_ids"]
-            sample = result["sample"]
-            log_probs = result["log_probs"]
-            cuda_graph_request_count = result["cuda_graph_request_count"]
+        if step_result is not None:
+            active_request_ids = step_result["active_request_ids"]
+            newly_paused_request_ids = step_result["newly_paused_request_ids"]
+            finished_request_ids = step_result["finished_request_ids"]
+            sample = step_result["sample"]
+            log_probs = step_result["log_probs"]
+            cuda_graph_request_count = step_result["cuda_graph_request_count"]
 
             # Add paused events.
             if newly_paused_request_ids is not None and self.track_paused_request_events:
                 newly_paused_request_ids = newly_paused_request_ids.tolist()
-                [self.requests[i].add_event_pause() for i in newly_paused_request_ids]
+                [self.get_request(i).add_event_pause() for i in newly_paused_request_ids]
 
             # Mark requests finished.
-            [self.requests[i].add_event_finish() for i in finished_request_ids.tolist()]
+            [self.get_request(i).add_event_finish() for i in finished_request_ids.tolist()]
 
             # Add finished events.
-            (active_requests, finished_requests) = self.post_process_requests(
+            active_request_ids, finished_request_records = self.post_process_requests(
                 active_request_ids, finished_request_ids, step_time, sample, log_probs
             )
 
         else:
-            active_requests: List[DynamicInferenceRequest] = []
-            finished_requests: List[DynamicInferenceRequest] = []
+            active_request_ids: list[int] = []
+            finished_request_records: list[DynamicInferenceRequestRecord] = []
 
         # Failed requests.
         for failed_request_id in self.failed_request_ids:
-            failed_request = self.requests.pop(failed_request_id)
+            failed_entry = self.requests.pop(failed_request_id)
+            failed_request = failed_entry.record[-1]
             failed_request.status = Status.FAILED
             failed_request.add_event_fail()
-            finished_requests.append(failed_request)
-            self.request_completion_futures[failed_request_id].set_result(failed_request)
+            finished_request_records.append(failed_entry.record)
+            failed_entry.future.set_result(failed_entry.record)
         self.failed_request_ids.clear()
 
-        # Log KV cache utilization stats to W&B
-        if (
-            self.inference_logging_step_interval > 0
-            and self.step_count > 0
-            and self.step_count % self.inference_logging_step_interval == 0
-            and self.context.metrics_writer is not None
-        ):
-
-            # Get KV cache utilization stats from dynamic context
-            kv_stats = self.context.get_kvcache_utilization_stats()
+        # Handle necessary ZMQ DP coordinator communication.
+        if self.use_coordinator and self.is_mp_coordinator and finished_request_records:
+            payload = msgpack.packb(
+                [Headers.ENGINE_REPLY.value, [r.serialize() for r in finished_request_records]],
+                use_bin_type=True,
+            )
+            self.socket_for_receiving_requests.send(payload)
 
+        # Log KV cache utilization stats to W&B
+        if context_state["kv_stats"] is not None:
             # Prepare metrics dictionary with all stats
             # Use 'inference/' prefix for all metrics to separate from training metrics
             metrics = {
-                'inference/inference_step': int(self.inference_step_offset + int(self.step_count)),
+                'inference/inference_step': int(self.inference_step_offset + int(step_count)),
                 'inference/step_time_s': float(step_time),
                 'inference/waiting_queue_len': int(len(self.waiting_request_ids)),
                 'inference/total_requests_dict_size': int(len(self.requests)),
             }
             # Add KV stats with inference/ prefix
             # Convert utilization metrics from 0-1 range to 0-100 percentage range for better visualization
-            for key, value in kv_stats.items():
+            for key, value in context_state["kv_stats"].items():
                 if 'utilization' in key:
                     # Convert to percentage (0-100) and group under kvcache_utilization
                     metrics[f'inference/{key}'] = float(value * 100.0)
@@ -860,15 +1130,16 @@ async def async_step(
 
         # Print context state.
         if verbose:
-            context = self.context
             mem = torch.cuda.memory_stats()
-            step_type = "decode" if is_decode_only else "non-decode"
+            step_type = "decode" if context_state["is_decode_only"] else "non-decode"
             output_str = (
-                "* step %d | %s ... time: %.3f%s ... "
-                "reqs: %d [ gtd %d, active %d, paused %d, finished %d ] ... "
+                "* rank %d | step %d | %s ... time: %.3f%s ... "
+                "reqs: a %d/%d, p %d/%d, w %d, f %d ... "
+                "blocks: a %d/%d, p %d/%d ... "
                 "mem: tensors %d, alloc %.1f gb, res %.1f gb."
                 % (
-                    self.step_count,
+                    self.rank,
+                    step_count,
                     datetime.now().strftime("%H:%M:%S"),
                     step_time,
                     (
@@ -877,44 +1148,71 @@ async def async_step(
                             step_type,
                             (
                                 "DIM %d:%d"
-                                % (context.padded_active_token_count, prev_active_token_count)
-                                if self.context.using_cuda_graph_this_step()
+                                % (
+                                    context_state["padded_active_token_count"],
+                                    context_state["active_token_count"],
+                                )
+                                if context_state["using_cuda_graph_this_step"]
                                 else "OFF"
                             ),
                         )
                     ),
-                    prev_total_request_count,
-                    context.gtd_request_count,
-                    prev_total_request_count - prev_paused_request_count,
-                    prev_paused_request_count,
-                    self.finished_request_count,
+                    context_state["total_request_count"] - context_state["paused_request_count"],
+                    context_state["total_active_block_count"],
+                    context_state["paused_request_count"],
+                    context_state["total_paused_block_count"],
+                    context_state["waiting_request_count"],
+                    context_state["finished_request_count"],
+                    context_state["total_active_used_blocks"],
+                    context_state["total_active_block_count"],
+                    context_state["total_paused_used_blocks"],
+                    context_state["total_paused_block_count"],
                     mem["allocation.all.current"],
                     mem["allocated_bytes.all.current"] / (1024**3),
                     mem["reserved_bytes.all.current"] / (1024**3),
                 )
             )
-            if prev_is_decode_only:
+            if context_state["is_decode_only"]:
                 output_str = f"\033[94m{output_str}\033[0m"
             logging.info(output_str)
 
-        self.step_count += 1
-
-        range_pop()
         return {
-            "active_requests": active_requests,
-            "finished_requests": finished_requests,
+            "active_request_ids": active_request_ids,
+            "finished_request_records": finished_request_records,
             "step_time": step_time,
             "cuda_graph_request_count": cuda_graph_request_count,
         }
 
+    async def async_step(
+        self, *, verbose: bool = False
+    ) -> Tuple[List[DynamicInferenceRequest], List[DynamicInferenceRequest], float]:
+        """
+        Wrapper for controller.generate_output_tokens_dynamic_batch(), to
+        match vLLM API. Uses `asyncio` for continuous generation which allows this
+        method to sleep and wake up when new requests are available.
+
+        Args:
+            verbose (bool): Whether to run in verbose mode.
+
+        Returns:
+            A tuple comprised of:
+                1. Requests that ran in the last step and are still active.
+                2. Requests that ran in the last step and have now finished.
+                3. The step time in seconds.
+        """
+        last_step_data = await self.async_forward()
+        ret = await self.async_bookkeep(*last_step_data, verbose=verbose)
+        # Keep for compatibility with current test suite.
+        return ret
+
     def step_modern(
-        self, *, verbose: Optional[bool] = False
+        self, *, verbose: bool = False
     ) -> Tuple[List[DynamicInferenceRequest], List[DynamicInferenceRequest], float]:
         """Synchronous wrapper for `self.async_step`."""
         return self._loop.run_until_complete(self.async_step(verbose=verbose))
 
     def step_legacy(
-        self, sampling_params: SamplingParams, *, verbose: Optional[bool] = False
+        self, sampling_params: SamplingParams, *, verbose: bool = False
     ) -> Tuple[List[DynamicInferenceRequest], List[DynamicInferenceRequest], float]:
         """Synchronous wrapper for `self.async_step`."""
         warnings.warn(
@@ -922,10 +1220,10 @@ def step_legacy(
             "0.16. Please use `step_modern()` going forward, which will eventually "
             "be renamed to `step()`."
         )
-        result = self._loop.run_until_complete(
-            self.async_step(sampling_params=sampling_params, verbose=verbose)
-        )
-        return (result["active_requests"], result["finished_requests"], result["step_time"])
+        result = self._loop.run_until_complete(self.async_step(verbose=verbose))
+        active_requests = [self.get_request(i) for i in result["active_request_ids"]]
+        finished_requests = [r.merge() for r in result["finished_request_records"]]
+        return active_requests, finished_requests, result["step_time"]
 
     # For backwards compatibility, point `step()` to `step_legacy()`. Starting in
     # `megatron-core` 0.16, `step_modern()` will be renamed to `step()`.
@@ -940,39 +1238,40 @@ def generate(
             request_id = int(next(self.request_counter))
             _ = self.add_request(request_id, prompt, sampling_params)
 
-        finished_requests_list = []
+        finished_request_records_list = []
         while self.has_unfinished_requests():
             result = self.step_modern()
-            finished_requests_list.extend(result["finished_requests"])
+            finished_request_records_list.extend(result["finished_request_records"])
 
-        # Ensure requests are returned in the same order they were passed in
-        finished_requests_list.sort(key=lambda x: x.request_id)
+        # Ensure requests are returned in the same order they were passed in.
+        finished_request_records_list.sort(key=lambda r: r.request_id)
 
-        return finished_requests_list
+        return finished_request_records_list
 
     def schedule_requests(self) -> int:
         """Drains the ZMQ socket for a batch of requests and adds them to the engine.
 
         This method is a collective and synchronous operation that must be called
-        by all ranks in a Tensor Parallel (TP) group at the same time. It ensures
+        by all ranks in a Model Parallel (MP) group at the same time. It ensures
         that all ranks process the exact same batch of incoming requests and
         control signals.
 
         The synchronization works as follows:
-        1.  The TP rank 0 drains all pending messages from its subscriber socket
+        1.  The MP rank 0 drains all pending messages from its subscriber socket
             in a non-blocking manner.
-        2.  TP rank 0 then broadcasts the number of messages it received to all other
-            ranks in its TP group using a dedicated publisher socket.
-        3.  The other TP ranks wait to receive this count, and then receive exactly
+        2.  MP rank 0 then broadcasts the number of messages it received to all other
+            ranks in its MP group using a dedicated publisher socket.
+        3.  The other MP ranks wait to receive this count, and then receive exactly
             that many messages from their subscriber sockets.
 
         Once all ranks have the same batch of messages, they are unpacked and
         processed. New requests are added to the engine's queue, and control
-        signals (PAUSE, STOP, UNPAUSE) update the engine's internal state.
+        signals (PAUSE, UNPAUSE, SUSPEND, RESUME, STOP) update the engine's
+        internal state.
 
         Note:
             This function is synchronous and must be called collectively by all
-            ranks in a TP group. It should not be launched in a separate coroutine
+            ranks in a MP group. It should not be launched in a separate coroutine
             to ensure all ranks execute it in lockstep before proceeding to the
             next engine step.
 
@@ -980,10 +1279,9 @@ def schedule_requests(self) -> int:
             int: The number of messages that were received and processed in this batch.
         """
 
-        rank = parallel_state.get_tensor_model_parallel_rank()
         torch.cuda.nvtx.range_push("drain_zmq_socket")
         all_messages = []
-        if rank == 0:
+        if self.is_mp_coordinator:
             while True:
                 try:
                     # Receive messages in a non-blocking way.
@@ -995,37 +1293,72 @@ def schedule_requests(self) -> int:
             # First publish the number of messages to dequeue.
             # This is important because we want all tensor parallel ranks
             # to dequeue the same number of messages.
-            self.tensor_parallel_num_msgs_publisher_socket.send(
+            self.model_parallel_num_msgs_publisher_socket.send(
                 struct.pack('!i', messages_to_dequeue)
             )
-            # Now publish the actual messages to all tensor parallel ranks
-            for message in all_messages:
-                self.tensor_parallel_publisher_socket.send(message)
+            # Now publish the actual messages to all model parallel ranks
+            if messages_to_dequeue > 0:
+                self.model_parallel_publisher_socket.send_multipart(all_messages)
         else:
-            # First, receive the number of messages to dequeue from tp-rank 0
+            # First, receive the number of messages to dequeue from mp-rank 0
             messages_to_dequeue = struct.unpack(
-                '!i', self.tensor_parallel_num_msgs_subscriber_socket.recv()
+                '!i', self.model_parallel_num_msgs_subscriber_socket.recv()
             )[0]
             # Now, dequeue the same number of messages from the subscriber socket.
             # Note that these receives are blocking, because the messages
             # are guaranteed to be available after the tp-rank 0 has sent them.
-            for _ in range(messages_to_dequeue):
-                all_messages.append(self.tensor_parallel_subscriber_socket.recv())
+            if messages_to_dequeue > 0:
+                all_messages = self.model_parallel_subscriber_socket.recv_multipart()
+            else:
+                all_messages = []
 
         torch.cuda.nvtx.range_pop()
         for message in all_messages:
             data = msgpack.unpackb(message, raw=False)
             header = Headers(data[0])
+
+            if self.received_stop:
+                assert (
+                    header == Headers.STOP_ACK
+                ), "Engine is shutting down. No other messages allowed except STOP_ACK."
+
             if header == Headers.SUBMIT_REQUEST:
                 request_id, prompt, sampling_params = data[1:]
                 sampling_params = SamplingParams.deserialize(sampling_params)
                 self.add_request(request_id, prompt, sampling_params)
             elif header == Headers.PAUSE:
-                self.paused = True
+                # Pause thyself.
+                self.received_pause = True
+                self.running.clear()
+                # Send PAUSE_ACK back to coordinator.
+                if self.is_mp_coordinator:
+                    payload = msgpack.packb([Headers.PAUSE_ACK.value], use_bin_type=True)
+                    self.socket_for_receiving_requests.send(payload)
             elif header == Headers.STOP:
-                self.stopped = True
+                # Stop thyself.
+                self.received_stop = True
+                self.running.clear()
+                # Send STOP_ACK back to coordinator.
+                if self.is_mp_coordinator:
+                    payload = msgpack.packb([Headers.STOP_ACK.value], use_bin_type=True)
+                    self.socket_for_receiving_requests.send(payload)
+            elif header == Headers.PAUSE_ACK:
+                self.paused.set()
+                self.received_pause = False
+            elif header == Headers.STOP_ACK:
+                self.stopped.set()
+                self.stop()
             elif header == Headers.UNPAUSE:
-                self.paused = False
+                self.paused.clear()
+                self.running.set()
+            elif header == Headers.SUSPEND:
+                self.suspend_signal = True
+            elif header == Headers.RESUME:
+                self.suspend_signal = False
+            elif header == Headers.STOP:
+                self.stopped = True
+            else:
+                raise UnknownHeaderError(header)
 
         return len(all_messages)
 
@@ -1043,7 +1376,6 @@ def stop(self):
         for socket in self.zmq_sockets:
             socket.close()
         self.zmq_context.term()
-        parallel_state.destroy_model_parallel()
 
     @trace_async_exceptions
     async def run_engine(
@@ -1051,15 +1383,20 @@ async def run_engine(
     ):
         """Continually steps the engine asynchronously."""
         self._loop = get_asyncio_loop(loop)
+        self.use_coordinator = False
         try:
             while True:
                 # Wait until there are active requests before proceeding.
                 async with self._cond:
                     await self._cond.wait_for(
-                        lambda: self.context.get_active_request_count() > 0
-                        or self.waiting_request_ids
+                        lambda: (
+                            not self.is_suspended
+                            and (
+                                self.context.get_active_request_count() > 0
+                                or self.waiting_request_ids
+                            )
+                        )
                     )
-
                 await self.async_step(verbose=verbose)
         except asyncio.CancelledError:
             pass
@@ -1070,14 +1407,14 @@ async def run_engine_with_coordinator(
     ):
         """Continually steps the engine asynchronously."""
         self._loop = get_asyncio_loop(loop)
+        self.use_coordinator = True
         try:
             while True:
                 self.schedule_requests()
-                if self.stopped:
-                    self.stop()
-                    return
+                if self.stopped.is_set():
+                    break
 
-                # for the cases below (engine is paused or no active requests),
+                # for the cases below (no active requests, or undergoing a state-change)
                 # do not use asyncio.sleep(0)
                 # as tp-rank=0 will flood the num_messages publisher
                 # with "0" repeatedly. This causes some packets to drop.
@@ -1089,10 +1426,20 @@ async def run_engine_with_coordinator(
 
                 # todo [Siddharth]: Can this hardcoded sleep be avoided
                 # with asyncio zmq sockets?
-                if self.paused:
+                if self.paused.is_set() or self.received_pause or self.received_stop:
+                    await asyncio.sleep(0.02)
+                    continue
+
+                # Suspend, resume.
+                if self.suspend_signal:
+                    self.suspend()
                     await asyncio.sleep(0.02)
                     continue
 
+                else:
+                    self.resume()
+
+                # No requests.
                 if (
                     self.context.get_active_request_count() == 0
                     and len(self.waiting_request_ids) == 0
@@ -1100,25 +1447,7 @@ async def run_engine_with_coordinator(
                     await asyncio.sleep(0.02)
                     continue
 
-                engine_output = await self.async_step(verbose=verbose)
-
-                is_tp0_and_pp0 = (
-                    parallel_state.get_tensor_model_parallel_rank() == 0
-                    and parallel_state.get_pipeline_model_parallel_rank() == 0
-                )
-                if (
-                    is_tp0_and_pp0
-                    and engine_output is not None
-                    and engine_output["finished_requests"]
-                ):
-                    payload = msgpack.packb(
-                        [
-                            Headers.ENGINE_REPLY.value,
-                            [r.serializable() for r in engine_output["finished_requests"]],
-                        ],
-                        use_bin_type=True,
-                    )
-                    self.socket_for_receiving_requests.send(payload)
+                await self.async_step(verbose=verbose)
 
         except asyncio.CancelledError:
             pass
diff --git a/megatron/core/inference/engines/static_engine.py b/megatron/core/inference/engines/static_engine.py
index dc86eb775f9..d4c61965d2b 100644
--- a/megatron/core/inference/engines/static_engine.py
+++ b/megatron/core/inference/engines/static_engine.py
@@ -17,7 +17,7 @@
 from megatron.core.inference.text_generation_controllers.text_generation_controller import (
     TextGenerationController,
 )
-from megatron.core.utils import get_asyncio_loop
+from megatron.core.utils import get_asyncio_loop, get_mamba_inference_state_config_from_model
 
 try:
     from tqdm import tqdm
@@ -93,6 +93,10 @@ def __init__(
         # Store original context in case we need to fall back to legacy static engine
         original_context = text_generation_controller.inference_wrapped_model.inference_context
 
+        mamba_inference_state_config = get_mamba_inference_state_config_from_model(
+            text_generation_controller.inference_wrapped_model.model
+        )
+
         try:
             if not legacy:
                 dynamic_context = DynamicInferenceContext.from_config(
@@ -101,16 +105,17 @@ def __init__(
                     max_batch_size=max_batch_size,
                     buffer_size_gb=buffer_size_gb,
                     num_cuda_graphs=1,
+                    mamba_inference_state_config=mamba_inference_state_config,
                 )
                 self.controller.inference_wrapped_model.inference_context = dynamic_context
                 self.controller.inference_wrapped_model.prep_model_for_inference()
+                self.controller._init_dynamic_sampling_tensors()
 
                 self.dynamic_engine = DynamicInferenceEngine(
                     controller=self.controller,
                     random_seed=self.random_seed,
                     context=dynamic_context,
                     enable_cuda_graph=True,
-                    static_sampling=True,
                 )
         except Exception as e:
             # Get exception details for better debugging
diff --git a/megatron/core/inference/headers.py b/megatron/core/inference/headers.py
index ff894cc1918..a22d1328679 100644
--- a/megatron/core/inference/headers.py
+++ b/megatron/core/inference/headers.py
@@ -1,6 +1,6 @@
 # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
 
-from enum import Enum
+from enum import Enum, auto
 
 
 class Headers(Enum):
@@ -8,10 +8,21 @@ class Headers(Enum):
     Enum representing headers used for communication with the inference-coordinator.
     """
 
-    CONNECT = 0
-    ACK = 1
-    SUBMIT_REQUEST = 2
-    ENGINE_REPLY = 3
-    PAUSE = 4
-    UNPAUSE = 5
-    STOP = 6
+    CONNECT = auto()
+    CONNECT_ACK = auto()
+    SUBMIT_REQUEST = auto()
+    ENGINE_REPLY = auto()
+    PAUSE = auto()
+    PAUSE_ACK = auto()
+    UNPAUSE = auto()
+    SUSPEND = auto()
+    RESUME = auto()
+    STOP = auto()
+    STOP_ACK = auto()
+
+
+class UnknownHeaderError(Exception):
+    """A signal with an unrecognized header was received by the coordinator."""
+
+    def __init_(self, header):
+        super().__init__(f"specialize for {header}.")
diff --git a/megatron/core/inference/inference_client.py b/megatron/core/inference/inference_client.py
index 53daac091b0..8a19e226c46 100644
--- a/megatron/core/inference/inference_client.py
+++ b/megatron/core/inference/inference_client.py
@@ -4,9 +4,9 @@
 import logging
 import os
 import time
-from typing import List, Union
+from typing import Awaitable, List, Optional, Union
 
-from megatron.core.inference.inference_request import DynamicInferenceRequest
+from megatron.core.inference.inference_request import DynamicInferenceRequestRecord
 from megatron.core.inference.sampling_params import SamplingParams
 from megatron.core.utils import get_asyncio_loop, trace_async_exceptions
 
@@ -73,6 +73,11 @@ def __init__(self, inference_coordinator_port: int):
         inference_coordinator_address = os.getenv('MASTER_ADDR', '127.0.0.1')
         socket.connect(f"tcp://{inference_coordinator_address}:{inference_coordinator_port}")
 
+        self._loop = None
+        self.running = asyncio.Event()
+        self.paused = asyncio.Event()
+        self.stopped = asyncio.Event()
+
         self.socket = socket
         self.completion_futures = {}
         self.request_submission_times = {}
@@ -92,41 +97,55 @@ def add_request(
             prompt (str): The input prompt to send to the language model.
             sampling_params: An object containing the sampling parameters for
                 text generation (e.g., temperature, top_p). It must have a
-                `serializable()` method.
+                `serialize()` method.
 
         Returns:
             asyncio.Future: A future that will be resolved with a
-            `DynamicInferenceRequest` object containing the completed result.
+            `DynamicInferenceRequestRecord` object containing the completed result.
         """
+        if not self.running.is_set():
+            raise RuntimeError("InferenceClient is not currently running.")
         request_id = self.next_request_id
         self.next_request_id += 1
-        payload = [Headers.SUBMIT_REQUEST.value, request_id, prompt, sampling_params.serializable()]
+        payload = [Headers.SUBMIT_REQUEST.value, request_id, prompt, sampling_params.serialize()]
         payload_serialized = msgpack.packb(payload, use_bin_type=True)
         self.socket.send(payload_serialized)
         assert request_id not in self.completion_futures
-        self.completion_futures[request_id] = get_asyncio_loop().create_future()
+        self.completion_futures[request_id] = self._loop.create_future()
         self.request_submission_times[request_id] = time.perf_counter()
         return self.completion_futures[request_id]
 
     @trace_async_exceptions
-    async def _listen_for_completed_requests(self):
+    async def _recv_task(self):
         """
         Listens for completed inference requests from the coordinator.
 
         This coroutine runs in an infinite loop, continuously polling the socket
-        for replies. When a reply is received, it unpacks the message, finds the
+        for data.
+        When a request reply is received, it unpacks the message, finds the
         corresponding Future using the request ID, and sets the result.
+        Other control packets are handled appropriately.
 
         This method is started as a background task by the `start()` method.
         """
         while True:
             try:
-                request_id, reply = msgpack.unpackb(self.socket.recv(flags=zmq.NOBLOCK), raw=False)
-                reply['latency'] = time.perf_counter() - self.request_submission_times.pop(
-                    request_id
-                )
-                completion_future = self.completion_futures.pop(request_id)
-                completion_future.set_result(DynamicInferenceRequest.deserialize(reply))
+                data = msgpack.unpackb(self.socket.recv(flags=zmq.NOBLOCK), raw=False)
+                header = Headers(data[0])
+                if header == Headers.ENGINE_REPLY:
+                    request_id, reply = data[1:]
+                    reply['latency'] = time.perf_counter() - self.request_submission_times.pop(
+                        request_id
+                    )
+                    completion_future = self.completion_futures.pop(request_id)
+                    if completion_future.done():
+                        logging.warning(f"Client: The future for {request_id} has been cancelled!")
+                        continue
+                    completion_future.set_result(DynamicInferenceRequestRecord.deserialize(reply))
+                elif header == Headers.PAUSE_ACK:
+                    self.paused.set()
+                elif header == Headers.STOP_ACK:
+                    self.stopped.set()
             except zmq.Again:
                 await asyncio.sleep(0.005)
                 continue
@@ -137,15 +156,15 @@ def _connect_with_inference_coordinator(self):
         """
         Performs the initial handshake with the inference coordinator.
 
-        Sends a CONNECT signal and waits for an ACK reply to ensure the
+        Sends a CONNECT signal and waits for a CONNECT_ACK reply to ensure the
         connection is established and acknowledged by the coordinator.
         """
         payload = [Headers.CONNECT.value]
         self.socket.send(msgpack.packb(payload, use_bin_type=True))
         reply = msgpack.unpackb(self.socket.recv(), raw=False)[0]
-        assert Headers(reply) == Headers.ACK
+        assert Headers(reply) == Headers.CONNECT_ACK
 
-    async def start(self):
+    async def start(self, loop: Optional[asyncio.AbstractEventLoop] = None):
         """
         Connects to the coordinator and starts the background listener task.
 
@@ -154,8 +173,12 @@ async def start(self):
         coroutine.
         """
         logging.info("Client: Connecting to InferenceCoordinator...")
+        self._loop = get_asyncio_loop(loop)
+        self.running.set()
+        self.paused.clear()
+        self.stopped.clear()
         self._connect_with_inference_coordinator()
-        self.listener_task = asyncio.create_task(self._listen_for_completed_requests())
+        self.listener_task = self._loop.create_task(self._recv_task())
 
     def _send_signal_to_engines(self, signal):
         """
@@ -168,17 +191,52 @@ def _send_signal_to_engines(self, signal):
         payload_serialized = msgpack.packb(payload, use_bin_type=True)
         self.socket.send(payload_serialized)
 
-    def pause_engines(self):
+    def pause_engines(self) -> Awaitable:
+        """Sends a signal to pause all inference engines.
+
+        The signal first propagates thru the coordinator to all engines.
+        All engines acknowledge this signal and clear their `running` flags.
+        The coordinator awaits all acknowledgements before forwarding the ACK
+            back to the client, as well as to the engines.
+        The engines set their `paused` flags upon seeing the ACK.
+
+        Returns:
+            Awaitable: An awaitable that resolves when all engines have paused.
+        """
+        self._send_signal_to_engines(Headers.PAUSE)
+        return self.paused.wait()
+
+    def unpause_engines(self) -> None:
+        """Sends a signal to unpause all inference engines."""
+        self.paused.clear()
+        self.running.set()
+        self._send_signal_to_engines(Headers.UNPAUSE)
+
+    def suspend_engines(self):
         """Sends a signal to pause all inference engines."""
         self._send_signal_to_engines(Headers.PAUSE)
+        self._send_signal_to_engines(Headers.SUSPEND)
 
-    def unpause_engines(self):
+    def resume_engines(self):
         """Sends a signal to unpause all inference engines."""
+        self._send_signal_to_engines(Headers.RESUME)
         self._send_signal_to_engines(Headers.UNPAUSE)
 
-    def stop_engines(self):
-        """Sends a signal to gracefully stop all inference engines."""
+    def stop_engines(self) -> Awaitable:
+        """Sends a signal to gracefully stop all inference engines.
+
+        The signal first propagates thru the coordinator to all engines.
+        All engines acknowledge this signal and clear their `running` flags.
+        The coordinator awaits all acknowledgements before forwarding the ACK
+            back to the client, as well as to the engines.
+        The engines set their `stopped` flags upon seeing the ACK.
+
+        Returns:
+            Awaitable: An awaitable that resolves when all engines have stopped.
+        """
         self._send_signal_to_engines(Headers.STOP)
+        self.running.clear()
+        return self.stopped.wait()
 
     def stop(self):
         """
diff --git a/megatron/core/inference/inference_request.py b/megatron/core/inference/inference_request.py
index 21ff7786d6a..b58fac1b281 100644
--- a/megatron/core/inference/inference_request.py
+++ b/megatron/core/inference/inference_request.py
@@ -11,10 +11,18 @@
 import torch
 
 from megatron.core.inference.sampling_params import SamplingParams
+from megatron.core.tokenizers import MegatronTokenizer
 
 
-def serialize_tensor(tensor):
-    """Serialize tensor to bytes."""
+def serialize_tensor(tensor: torch.Tensor) -> bytes:
+    """Serialize tensor to bytes.
+
+    Args:
+        tensor (Tensor): Tensor.
+
+    Returns:
+        (bytes) Byte representation of tensor.
+    """
     buffer = io.BytesIO()
     torch.save(tensor, buffer)
     buffer.seek(0)
@@ -22,8 +30,15 @@ def serialize_tensor(tensor):
     return tensor_bytes
 
 
-def deserialize_tensor(tensor_bytes):
-    """Deserialize tensor from bytes."""
+def deserialize_tensor(tensor_bytes: bytes) -> torch.Tensor:
+    """Deserialize tensor from bytes.
+
+    Args:
+        tensor_bytes (bytes): Byte representation of tensor.
+
+    Returns:
+        (Tensor) Tensor.
+    """
     buffer = io.BytesIO(tensor_bytes)
     tensor = torch.load(buffer)
     return tensor
@@ -76,11 +91,12 @@ def __post_init__(self):
             )
             self.sampling_params = self.inference_parameters
 
-    def serializable(self):
-        """
-        Converts the instance into a serializable dictionary.
+    def serialize(self) -> dict:
+        """Converts the instance into a serializable dictionary.
+
         Returns:
-            dict: A dictionary representation of the instance suitable for serialization.
+            (dict) A dictionary representation of the instance suitable for
+                serialization.
         """
 
         # Dataclass to dict.
@@ -169,11 +185,12 @@ def __str__(self):
         payload_str = "" if self.payload is None else f", {type(self.payload).__name__}"
         return f"[{self.timestamp:.3f}] {self.type.name}{payload_str}"
 
-    def serialize(self):
-        """
-        Converts the instance into a serializable dictionary.
+    def serialize(self) -> dict:
+        """Converts the instance into a serializable dictionary.
+
         Returns:
-            dict: A dictionary representation of the instance suitable for serialization.
+            (dict) A dictionary representation of the instance suitable for
+                serialization.
         """
 
         # Dataclass to dict.
@@ -253,13 +270,14 @@ def __str__(self):
             )
         )
 
-    def serializable(self):
-        """
-        Converts the instance into a serializable dictionary.
+    def serialize(self):
+        """Converts the instance into a serializable dictionary.
+
         Returns:
-            dict: A dictionary representation of the instance suitable for serialization.
+            (dict) A dictionary representation of the instance suitable for
+                serialization.
         """
-        obj = super().serializable()
+        obj = super().serialize()
         obj["events"] = [e.serialize() for e in self.events]
         return obj
 
@@ -277,6 +295,39 @@ def deserialize(cls, obj: dict) -> "DynamicInferenceRequest":
         request.events = [DynamicInferenceEvent.deserialize(e) for e in obj["events"]]
         return request
 
+    @property
+    def tracked_metadata(self) -> List[Any]:
+        """Obtain an ordered list of all request metadata to be tracked by the context.
+
+        This consists of metadata that is used to inform text generation.
+        The values of such fields are tensorized and kept aligned with the current active batch.
+
+        Note that while the general request object is mutable, this metadata is
+        inherently assumed to remain immutable once the request becomes active.
+        """
+        sp = self.sampling_params
+        if sp.termination_id is None:
+            if not torch.distributed.is_initialized() or torch.distributed.get_rank() == 0:
+                warnings.warn(
+                    f"DynamicInferenceRequest {self.request_id} has no termination_id set "
+                    "in its sampling_params. Defaulting to -1."
+                )
+            sp.termination_id = -1
+        return [getattr(sp, field) for field in self.get_metadata_labels().keys()]
+
+    @staticmethod
+    def get_metadata_labels() -> Dict[str, int]:
+        """Provides human-readable labels for the tracked metadata fields."""
+        ret = [
+            "temperature",
+            "top_k",
+            "top_p",
+            "termination_id",
+            "return_log_probs",
+            "skip_prompt_log_probs",
+        ]
+        return {k: v for v, k in enumerate(ret)}
+
     def add_event(self, type: DynamicInferenceEventType, payload: Optional[Any] = None) -> None:
         """Add event."""
         self.events.append(DynamicInferenceEvent(type=type, payload=payload))
@@ -314,6 +365,158 @@ def failed(self) -> bool:
         return self.status == Status.FAILED
 
 
+@dataclass(kw_only=True)
+class DynamicInferenceRequestRecord:
+    """History of DynamicInferenceRequest objects over multiple suspend and
+    resumes."""
+
+    requests: list[DynamicInferenceRequest] = field(default_factory=list)
+    latency: Optional[float] = None
+
+    @classmethod
+    def from_request(cls, request: DynamicInferenceRequest) -> "DynamicInferenceRequestRecord":
+        """Initialize record from a single request.
+
+        Args:
+            request (DynamicInferenceRequest): Initial request.
+
+        Returns:
+            (DynamicInferenceRequestRecord) A record.
+        """
+        record = cls()
+        record.requests.append(request)
+        return record
+
+    def __getitem__(self, idx: int) -> DynamicInferenceRequest:
+        """Get request by index.
+
+        Args:
+            idx (int): Request index.
+
+        Returns:
+            (DynamicInferenceRequest) Request object.
+        """
+        return self.requests[idx]
+
+    @property
+    def request_id(self) -> int:
+        """Get request id.
+
+        Returns:
+            (int) Request id.
+        """
+        return self.requests[0].request_id
+
+    def suspend(self, tokenizer: MegatronTokenizer):
+        """Suspend request by storing references to previous prompt, generations,
+        and sampling params.
+
+        Args:
+            tokenizer (MegatronTokenizer): The tokenizer.
+        """
+
+        old_request = self[-1]
+
+        # New prompt (concatenate prompt + generated tokens).
+        new_prompt_tokens = torch.cat(
+            (
+                old_request.prompt_tokens,
+                torch.tensor(
+                    old_request.generated_tokens,
+                    dtype=old_request.prompt_tokens.dtype,
+                    device=old_request.prompt_tokens.device,
+                ),
+            ),
+            dim=0,
+        )
+        new_prompt_str = tokenizer.detokenize(new_prompt_tokens.tolist())
+
+        # New sampling params.
+        new_sampling_params = SamplingParams(
+            **{
+                **asdict(old_request.sampling_params),
+                "num_tokens_to_generate": (
+                    old_request.sampling_params.num_tokens_to_generate
+                    - len(old_request.generated_tokens)
+                ),
+            }
+        )
+
+        # New request.
+        new_request = DynamicInferenceRequest(
+            request_id=old_request.request_id,
+            prompt=new_prompt_str,
+            prompt_tokens=new_prompt_tokens,
+            sampling_params=new_sampling_params,
+        )
+        self.requests.append(new_request)
+
+    def merge(self, tokenizer: MegatronTokenizer) -> DynamicInferenceRequest:
+        """Merge requests into a single suspend-agnostic request object.
+
+        Args:
+            tokenizer (MegatronTokenizer): The tokenizer.
+
+        Returns:
+            (DynamicInferenceRequest) Merged request.
+        """
+
+        def merge_lists(key):
+            if getattr(self.requests[0], key) is None:
+                return None
+            else:
+                return [v for r in self.requests for v in getattr(r, key)]
+
+        prompt_tokens = self.requests[0].prompt_tokens
+        generated_tokens = merge_lists("generated_tokens")
+
+        # Merged request.
+        request = DynamicInferenceRequest(
+            request_id=self.requests[0].request_id,
+            prompt=tokenizer.detokenize(prompt_tokens.tolist()),
+            prompt_tokens=prompt_tokens,
+            prompt_log_probs=self.requests[0].prompt_log_probs,
+            prompt_top_n_logprobs=self.requests[0].prompt_top_n_logprobs,
+            generated_text=tokenizer.detokenize(generated_tokens),
+            generated_tokens=generated_tokens,
+            generated_length=len(generated_tokens),
+            generated_log_probs=merge_lists("generated_log_probs"),
+            generated_top_n_logprobs=merge_lists("generated_top_n_logprobs"),
+            sampling_params=self.requests[0].sampling_params,
+            tpot=merge_lists("tpot"),
+            status=self.requests[-1].status,
+            latency=self.latency,
+            events=merge_lists("events"),
+        )
+
+        return request
+
+    def serialize(self) -> dict:
+        """Converts the instance into a serializable dictionary.
+
+        Returns:
+            (dict) A dictionary representation of the instance suitable for
+                serialization.
+        """
+        obj = asdict(self)
+        obj["requests"] = [r.serialize() for r in self.requests]
+        return obj
+
+    @classmethod
+    def deserialize(cls, obj: dict) -> "DynamicInferenceRequestRecord":
+        """Deserialize record.
+
+        Args:
+            obj (dict): Serialized record data.
+
+        Returns:
+            (DynamicInferenceRequestRecord) Deserialized record.
+        """
+        request = cls(**obj)
+        request.requests = [DynamicInferenceRequest.deserialize(r) for r in obj["requests"]]
+        return request
+
+
 @dataclass(kw_only=True)
 class VLMInferenceRequest(InferenceRequest):
     """Class for a VLM inference request"""
diff --git a/megatron/core/inference/sampling_params.py b/megatron/core/inference/sampling_params.py
index e215b3f134b..d85b2816c80 100644
--- a/megatron/core/inference/sampling_params.py
+++ b/megatron/core/inference/sampling_params.py
@@ -44,7 +44,7 @@ def add_attributes(self, attribute_value_pair: dict):
         for key, value in attribute_value_pair.items():
             setattr(self, key, value)
 
-    def serializable(self) -> dict:
+    def serialize(self) -> dict:
         """Return a dictionary that is msgpack-serializable."""
         return self.__dict__.copy()
 
diff --git a/megatron/core/inference/text_generation_controllers/text_generation_controller.py b/megatron/core/inference/text_generation_controllers/text_generation_controller.py
index 2bda1425710..0aed3df079e 100644
--- a/megatron/core/inference/text_generation_controllers/text_generation_controller.py
+++ b/megatron/core/inference/text_generation_controllers/text_generation_controller.py
@@ -23,7 +23,11 @@
     MaxSequenceLengthOverflowError,
     WarmupEngineMode,
 )
-from megatron.core.inference.inference_request import InferenceRequest, Status
+from megatron.core.inference.inference_request import (
+    DynamicInferenceRequest,
+    InferenceRequest,
+    Status,
+)
 from megatron.core.inference.model_inference_wrappers.abstract_model_inference_wrapper import (
     AbstractModelInferenceWrapper,
 )
@@ -74,6 +78,35 @@ def __init__(
         self.sampling_rng = torch.Generator(device=torch.cuda.current_device())
         self.sampling_rng.manual_seed(model_config.inference_sampling_seed)
 
+        if self.inference_wrapped_model.inference_context.is_dynamic_batching():
+            self._init_dynamic_sampling_tensors()
+
+    def _init_dynamic_sampling_tensors(self):
+        """Initialize tensors needed for dynamic sampling."""
+        context = self.inference_wrapped_model.inference_context
+        max_requests = context.max_total_requests
+
+        device = torch.cuda.current_device()
+        logits_dtype = self.inference_wrapped_model.inference_wrapper_config.params_dtype
+        # Use padded vocab size because tokenizer vocab size might pad to nearest power of 2.
+        vocab_size = self.inference_wrapped_model.inference_wrapper_config.padded_vocab_size
+
+        # Initialize bookkeeping tensors.
+        self.sampling_logits_cuda = torch.empty(
+            max_requests, vocab_size, dtype=logits_dtype, device=device
+        )
+        self.sampled_tokens_cuda = torch.empty(max_requests, dtype=torch.int64, device=device)
+
+        self.temperature_cuda = torch.empty_like(self.sampled_tokens_cuda, dtype=torch.float)
+        self.top_k_cuda = torch.empty_like(self.sampled_tokens_cuda, dtype=torch.int32)
+        self.top_p_cuda = torch.empty_like(self.sampled_tokens_cuda, dtype=torch.float)
+        self.termination_id_cuda = torch.empty(max_requests, dtype=torch.int64, device=device)
+        self.return_log_probs_cuda = torch.empty(max_requests, dtype=torch.bool, device=device)
+        self.skip_prompt_log_probs_cuda = torch.empty(max_requests, dtype=torch.bool, device=device)
+
+        # Used for inefficient torch sampling.
+        self.torch_sampling_buckets: List[Tensor] = []
+
     def tokenize_prompt(self, prompt: str, add_BOS: bool = False) -> List[int]:
         """Utility to tokenize the input prompts.
 
@@ -177,16 +210,14 @@ def detokenize_generations(
 
         return text, prompts_plus_generations_segments
 
-    def sample_from_logits(
+    def _torch_sampling_func(
         self,
         last_token_logits: torch.Tensor,
-        sampling_params: Optional[SamplingParams] = None,
+        temperature: float,
+        top_k: int,
+        top_p: float,
         vocab_size: Optional[int] = None,
-        generation_started: Optional[torch.Tensor] = None,
-        top_n_logprobs_dict: Dict[int, List[Dict[str, float]]] = None,
-        logits: Optional[torch.Tensor] = None,
-        **kwargs,
-    ) -> torch.Tensor:
+    ):
         """Samples the logits to generate outputs
 
         Given the logits of the last token, this function samples it
@@ -196,26 +227,15 @@ def sample_from_logits(
 
         Args:
             last_token_logits (torch.Tensor): The last token logits. A tensor of
-                size [batch_size, vocab_size]
-            sampling_params (SamplingParams): The parameters to use for inference.
-            vocab_size (int): Obtained from the tokenizer. Defaults to None
-            generation_started (torch.Tensor): A boolean tensor of shape [batch_size]. True
-                            indicates the prompt at that index has started generating tokens.
-            top_n_logprobs_dict (top_n_logprobs_dict): The dict to be updated
+                size [batch_size, vocab_size].
+            temperature (float): The temperature to use for sampling.
+            top_k (int): The top-k value to use for sampling.
+            top_p (float): The top-p value to use for sampling.
+            vocab_size (int): Obtained from the tokenizer. Defaults to None.
 
         Returns:
             sampled_logits (torch.Tensor): 1D tensor with [batch_size] elements
-            top_n_logprobs_this_step (torch.return_types.topk): a topk tensor with values as logits
-                and indices as the top k elements. None if sampling params top_n_logprobs is 0.
         """
-
-        if kwargs.get("common_inference_params"):
-            sampling_params = kwargs["common_inference_params"]
-
-        top_p = sampling_params.top_p
-        top_k = sampling_params.top_k
-        temperature = sampling_params.temperature
-
         assert isinstance(top_p, float)
         assert isinstance(top_k, int)
         assert not (top_k > 0 and top_p > 0.0), "Cannot have top-p and top-k both greater than zero"
@@ -246,53 +266,6 @@ def modify_logits_for_top_p_filtering(logits, top_p):
             filter_ = filter_.scatter(1, sorted_indices, filter_)
             logits.masked_fill_(filter_, float("-Inf"))
 
-        if sampling_params.top_n_logprobs > 0:
-            # NOTE : This thing can also be clubbed with where we compute log probs
-            # when --return-log-probs is enabled. This is just more efficient
-            assert generation_started is not None
-            if logits is None:
-                batch_size = last_token_logits.shape[0]
-                last_token_log_probs = F.log_softmax(last_token_logits, dim=1).to(torch.float32)
-                top_n_logits_this_step = torch.topk(
-                    last_token_log_probs, k=sampling_params.top_n_logprobs
-                )
-                top_n_logprobs_this_step = top_n_logits_this_step.values.cpu()
-                top_n_logprobs_indices = top_n_logits_this_step.indices.cpu()
-
-                # If we return prompt top_n_log_probs then we always append to the
-                # logprobs dict. Otherwise we only append for generated tokens.
-                if sampling_params.return_prompt_top_n_logprobs:
-                    mask = torch.ones(batch_size, dtype=torch.bool)
-                else:
-                    mask = generation_started.cpu()
-
-                self._update_top_n_logprobs_dict(
-                    top_n_logprobs_this_step, top_n_logprobs_indices, mask, top_n_logprobs_dict
-                )
-            else:
-                assert sampling_params.return_prompt_top_n_logprobs
-
-                # Compute the prompt logprobs
-                batch_size, seq_length, _ = logits.shape
-                log_probs = F.log_softmax(logits, dim=2).to(torch.float32)
-                top_n_logits_this_step = torch.topk(log_probs, k=sampling_params.top_n_logprobs)
-
-                # Move the token dimension to the front and then add each token logprobs
-                # individually for every request in the batch
-                top_n_logprobs_this_step = top_n_logits_this_step.values.permute(1, 0, 2).cpu()
-                top_n_logprobs_indices = top_n_logits_this_step.indices.permute(1, 0, 2).cpu()
-
-                # We append to the logprobs dict for every prompt token
-                mask = torch.ones(batch_size, dtype=torch.bool)
-
-                for i in range(seq_length):
-                    self._update_top_n_logprobs_dict(
-                        top_n_logprobs_this_step[i],
-                        top_n_logprobs_indices[i],
-                        mask,
-                        top_n_logprobs_dict,
-                    )
-
         # Greedy sampling
         if top_k == 1:
             sampled_logits = torch.argmax(last_token_logits, dim=-1)
@@ -322,10 +295,10 @@ def modify_logits_for_top_p_filtering(logits, top_p):
 
         return sampled_logits
 
-    def sample_from_dynamic_logits(
+    def sample_from_logits(
         self,
         last_token_logits: torch.Tensor,
-        active_sampling_map: List[Tuple[SamplingParams, List[int]]],
+        sampling_params: Optional[SamplingParams] = None,
         vocab_size: Optional[int] = None,
         generation_started: Optional[torch.Tensor] = None,
         top_n_logprobs_dict: Dict[int, List[Dict[str, float]]] = None,
@@ -335,16 +308,14 @@ def sample_from_dynamic_logits(
         """Samples the logits to generate outputs
 
         Given the logits of the last token, this function samples it
-        according to the parameters defined in active_sampling_map
+        according to the parameters defined in sampling_params
         and returns the samples. If sampling parameters top_n_logprobs > 0
         at each step it also updates the top_n_logprobs dict.
 
         Args:
             last_token_logits (torch.Tensor): The last token logits. A tensor of
                 size [batch_size, vocab_size]
-            active_sampling_map (List[Tuple[SamplingParams, List[int]]]): A list of tuples
-                matching each unique set of sampling params to the context array indices
-                of the corresponding active requests.
+            sampling_params (SamplingParams): The parameters to use for inference.
             vocab_size (int): Obtained from the tokenizer. Defaults to None
             generation_started (torch.Tensor): A boolean tensor of shape [batch_size]. True
                             indicates the prompt at that index has started generating tokens.
@@ -352,29 +323,65 @@ def sample_from_dynamic_logits(
 
         Returns:
             sampled_logits (torch.Tensor): 1D tensor with [batch_size] elements
-            termination_id (torch.Tensor): Tensor of shape [batch_size] with termination ids
             top_n_logprobs_this_step (torch.return_types.topk): a topk tensor with values as logits
                 and indices as the top k elements. None if sampling params top_n_logprobs is 0.
         """
-        batch_size = last_token_logits.size(0)
-        new_sample = torch.zeros(batch_size, dtype=torch.int64, device=last_token_logits.device)
-        termination_id = torch.zeros_like(new_sample, dtype=torch.int64)
-
-        for sampling_params, mask in active_sampling_map:
-            # Filter out indices that are out of bounds for the current batch
-            valid_mask = [i for i in mask if i < batch_size]
-            if valid_mask:
-                new_sample[valid_mask] = self.sample_from_logits(
-                    last_token_logits[valid_mask],
-                    sampling_params=sampling_params,
-                    vocab_size=vocab_size,
+
+        if kwargs.get("common_inference_params"):
+            sampling_params = kwargs["common_inference_params"]
+
+        if sampling_params.top_n_logprobs > 0:
+            # NOTE : This thing can also be clubbed with where we compute log probs
+            # when --return-log-probs is enabled. This is just more efficient
+            assert generation_started is not None
+            if logits is None:
+                batch_size = last_token_logits.shape[0]
+                last_token_log_probs = F.log_softmax(last_token_logits, dim=1).to(torch.float32)
+                top_n_logits_this_step = torch.topk(
+                    last_token_log_probs, k=sampling_params.top_n_logprobs
                 )
-                if sampling_params.termination_id is not None:
-                    termination_id[valid_mask] = sampling_params.termination_id
+                top_n_logprobs_this_step = top_n_logits_this_step.values.cpu()
+                top_n_logprobs_indices = top_n_logits_this_step.indices.cpu()
+
+                # If we return prompt top_n_log_probs then we always append to the
+                # logprobs dict. Otherwise we only append for generated tokens.
+                if sampling_params.return_prompt_top_n_logprobs:
+                    mask = torch.ones(batch_size, dtype=torch.bool)
                 else:
-                    termination_id[valid_mask] = self.tokenizer.eod
+                    mask = generation_started.cpu()
+
+                self._update_top_n_logprobs_dict(
+                    top_n_logprobs_this_step, top_n_logprobs_indices, mask, top_n_logprobs_dict
+                )
+            else:
+                assert sampling_params.return_prompt_top_n_logprobs
+
+                # Compute the prompt logprobs
+                batch_size, seq_length, _ = logits.shape
+                log_probs = F.log_softmax(logits, dim=2).to(torch.float32)
+                top_n_logits_this_step = torch.topk(log_probs, k=sampling_params.top_n_logprobs)
+
+                # Move the token dimension to the front and then add each token logprobs
+                # individually for every request in the batch
+                top_n_logprobs_this_step = top_n_logits_this_step.values.permute(1, 0, 2).cpu()
+                top_n_logprobs_indices = top_n_logits_this_step.indices.permute(1, 0, 2).cpu()
 
-        return new_sample, termination_id
+                # We append to the logprobs dict for every prompt token
+                mask = torch.ones(batch_size, dtype=torch.bool)
+
+                for i in range(seq_length):
+                    self._update_top_n_logprobs_dict(
+                        top_n_logprobs_this_step[i],
+                        top_n_logprobs_indices[i],
+                        mask,
+                        top_n_logprobs_dict,
+                    )
+
+        top_p = sampling_params.top_p
+        top_k = sampling_params.top_k
+        temperature = sampling_params.temperature
+
+        return self._torch_sampling_func(last_token_logits, temperature, top_k, top_p, vocab_size)
 
     def update_generation_status(
         self,
@@ -535,10 +542,12 @@ def _dynamic_step_forward_logits(self, input_ids: Tensor, position_ids: Tensor)
             input_ids (Tensor): The input token IDs.
             position_ids (Tensor): The position IDs.
         """
+        inference_wrapper_config = self.inference_wrapped_model.inference_wrapper_config
+
         context = self.inference_wrapped_model.inference_context
         materialize_only_last_token_logits = context.materialize_only_last_token_logits
 
-        inference_wrapper_config = self.inference_wrapped_model.inference_wrapper_config
+        active_request_count = context.total_request_count - context.paused_request_count
 
         with torch.inference_mode():
             logits = self.inference_wrapped_model.run_one_forward_step(
@@ -546,9 +555,8 @@ def _dynamic_step_forward_logits(self, input_ids: Tensor, position_ids: Tensor)
             )
 
         if self.model_is_pipeline_parallel:
-            batch_size = context.total_request_count - context.paused_request_count
             logits_seq_len = (
-                batch_size if materialize_only_last_token_logits else input_ids.shape[1]
+                active_request_count if materialize_only_last_token_logits else input_ids.shape[1]
             )
             vocab_size = inference_wrapper_config.padded_vocab_size
             logits_shape = [1, logits_seq_len, vocab_size]
@@ -556,8 +564,6 @@ def _dynamic_step_forward_logits(self, input_ids: Tensor, position_ids: Tensor)
             if is_pipeline_last_stage(self.pp_group):
                 assert logits is not None and torch.Size(logits_shape) == logits.shape
 
-            # TODO(ksanthanam): Evaluate whether it makes more sense to sample on 1 rank
-            # and then broadcast the sampled tokens rather than broadcasting the raw logits.
             logits = broadcast_from_last_pipeline_stage(
                 logits_shape,
                 dtype=inference_wrapper_config.params_dtype,
@@ -567,31 +573,95 @@ def _dynamic_step_forward_logits(self, input_ids: Tensor, position_ids: Tensor)
         return logits
 
     def _dynamic_step_sample_bookkeeping(
-        self, active_sampling_map: List[Tuple[SamplingParams, List[int]]]
+        self,
+        *,
+        backend: str = "torch",
+        request_metadata: Optional[Tensor] = None,
+        request_metadata_labels: Dict[str, int] = None,
     ):
-        """Perform bookkeeping necessary to sample logits for dynamic batching."""
-        pass
+        """Perform bookkeeping necessary to sample logits for dynamic batching.
 
-    def _dynamic_step_sample_logits(
-        self, logits: Tensor, active_sampling_map: List[Tuple[SamplingParams, List[int]]]
-    ) -> Tensor:
-        """Sample logits for dynamic batching.
+        The ability to override the context's data is solely intended for
+            standalone use or testing, and should never be used in a running system.
 
         Args:
-            logits (Tensor): The logits from the forward step.
-            active_sampling_map (List[Tuple[SamplingParams, List[int]]]): A list of tuples
-                matching each unique set of sampling params to the context array indices
-                of the corresponding active requests.
+            backend (str): The sampling backend to use.
+            request_metadata (Optional[Tensor]): An override for the tensor that manages all
+                request metadata, such as sampling parameters. By default, this metadata is
+                retrieved from the context.
+            request_metadata_labels (Optional[Dict]): An override for the map of metadata labels
+                to their index in the request_metadata tensor. By default, this metadata is
+                retrieved from the request object.
+        """
+        assert backend in ["torch"]
+        context = self.inference_wrapped_model.inference_context
+
+        if request_metadata is None:
+            request_metadata = context.request_metadata[
+                context.paused_request_count : context.total_request_count, :
+            ]
+        if request_metadata_labels is None:
+            request_metadata_labels = DynamicInferenceRequest.get_metadata_labels()
+        active_request_count = request_metadata.size(0)
+
+        # Shorthand these, because the torch backend needs them.
+        temp = request_metadata[:, request_metadata_labels["temperature"]]
+        top_k = request_metadata[:, request_metadata_labels["top_k"]]
+        top_p = request_metadata[:, request_metadata_labels["top_p"]]
+
+        # Copy data into relevant tensors.
+        self.temperature_cuda[:active_request_count].copy_(temp, non_blocking=True)
+        self.top_k_cuda[:active_request_count] = top_k.to(
+            dtype=torch.int32, copy=True, non_blocking=True
+        )
+        self.top_p_cuda[:active_request_count].copy_(top_p, non_blocking=True)
+        self.termination_id_cuda[:active_request_count] = request_metadata[
+            :, request_metadata_labels["termination_id"]
+        ].to(dtype=torch.int64, copy=True, non_blocking=True)
+        self.return_log_probs_cuda[:active_request_count] = request_metadata[
+            :, request_metadata_labels["return_log_probs"]
+        ].to(dtype=torch.bool, copy=True, non_blocking=True)
+        self.skip_prompt_log_probs_cuda[:active_request_count] = request_metadata[
+            :, request_metadata_labels["skip_prompt_log_probs"]
+        ].to(dtype=torch.bool, copy=True, non_blocking=True)
+
+        if backend == "torch":
+            # Bucketize the core sampling parameters.
+            core_params = torch.stack((temp, top_k, top_p), dim=1)
+            _, inv_indices, cnts = torch.unique(
+                core_params, dim=0, return_inverse=True, return_counts=True
+            )
+            order = torch.argsort(inv_indices, stable=True)
+            sampling_buckets = torch.split(order, cnts.tolist())
+            # Perform the D2H sync needed by `_torch_sampling_func` here.
+            group_reps = torch.stack([indices[0] for indices in sampling_buckets], dim=0)
+            core_params_reps = core_params[group_reps].detach().cpu()
+            temp_reps = core_params_reps[:, 0].tolist()
+            top_k_reps = core_params_reps[:, 1].to(torch.int32).tolist()
+            top_p_reps = core_params_reps[:, 2].tolist()
+            # Store the buckets and their equivalence class representatives.
+            self.torch_sampling_buckets = (
+                (sampling_buckets[idx], temp_reps[idx], top_k_reps[idx], top_p_reps[idx])
+                for idx in range(len(sampling_buckets))
+            )
+
+    def _dynamic_step_sample_logits(self, logits: Tensor, backend: str = "torch") -> Tensor:
+        """Sample tokens from logits for dynamic batching.
+
+        Args:
+            logits (Tensor): The logits to sample from.
+            backend (str): The sampling backend to use.
 
         Returns:
-            new_sample (Tensor): The sampled tokens for each active request.
-            termination_id (int): The termination token IDs of each active request.
+            new_sample (Tensor): The sampled tokens.
         """
+        # TODO(ksanthanam): Evaluate whether it makes more sense to sample on 1 rank
+        # and then broadcast the sampled tokens rather than broadcasting the raw logits.
+        assert backend in ["torch"]
+
         context = self.inference_wrapped_model.inference_context
         materialize_only_last_token_logits = context.materialize_only_last_token_logits
 
-        inference_wrapper_config = self.inference_wrapped_model.inference_wrapper_config
-
         # Last token logits.
         if materialize_only_last_token_logits:
             # When materialize_only_last_token_logits is true, last_token_logits is
@@ -599,60 +669,72 @@ def _dynamic_step_sample_logits(
             last_token_logits = logits.squeeze(0)
         else:
             last_token_logits = context.last_token_logits(logits)
+        active_request_count = last_token_logits.size(0)
+        # Copy last_token_logits to contiguous buffer.
+        self.sampling_logits_cuda[:active_request_count].copy_(last_token_logits, non_blocking=True)
+
+        if backend == "torch":
+            # Concatenate the outputs once to prevent repeated small writes.
+            token_list = []
+            indices_list = []
+
+            for indices, temp, top_k, top_p in self.torch_sampling_buckets:
+                token_list.append(
+                    self._torch_sampling_func(
+                        self.sampling_logits_cuda[indices, :], temp, top_k, top_p
+                    )
+                )
+                indices_list.append(indices)
 
-        # Sample.
-        # Use padded vocab size because tokenizer vocab size might not include padding
-        # to nearest power of 2.
-        vocab_size = inference_wrapper_config.padded_vocab_size
-        new_sample, termination_id = self.sample_from_dynamic_logits(
-            last_token_logits, active_sampling_map, vocab_size=vocab_size
-        )
-        return new_sample, termination_id
+            # Single write to the output tensor.
+            sampled_tokens = torch.cat(token_list, dim=0)
+            sampled_indices = torch.cat(indices_list, dim=0)
+            self.sampled_tokens_cuda.index_copy_(0, sampled_indices, sampled_tokens)
+        return self.sampled_tokens_cuda[:active_request_count].clone()
 
-    def _dynamic_step_log_probs_bookkeeping(self):
+    def _dynamic_step_log_probs_bookkeeping(self) -> bool:
         """Perform bookkeeping necessary to compute log probs for dynamic batching."""
-        pass
-
-    def _dynamic_step_calculate_log_probs(
-        self,
-        logits: Tensor,
-        new_sample: Tensor,
-        active_sampling_map: List[Tuple[SamplingParams, List[int]]],
-    ) -> Optional[Tensor]:
         context = self.inference_wrapped_model.inference_context
         materialize_only_last_token_logits = context.materialize_only_last_token_logits
 
-        log_probs = None
-        return_log_probs = False
-        for sampling_params, mask in active_sampling_map:
-            if sampling_params.return_log_probs:
-                assert (
-                    sampling_params.skip_prompt_log_probs
-                    or materialize_only_last_token_logits is False
-                ), "Materialize only last token logits must be false for returning log probs"
-                return_log_probs = True
+        active_request_count = context.total_request_count - context.paused_request_count
 
-        if return_log_probs:
-            log_probs = context.calculate_log_probs(
-                logits, new_sample, only_last_token_logits=materialize_only_last_token_logits
-            )
+        to_check = self.return_log_probs_cuda[:active_request_count]
+        to_check &= ~self.skip_prompt_log_probs_cuda[:active_request_count]
 
-        return log_probs
+        assert not (
+            to_check.any() and materialize_only_last_token_logits
+        ), "Prompt log probs cannot be calculated if only last token logits are materialized."
 
-    def _dynamic_step_context_bookkeeping(
-        self, new_sample: Tensor, termination_id: int
-    ) -> Tuple[Tensor, Tensor, Tensor]:
-        """Update the dynamic inference context after sampling.
+        return self.return_log_probs_cuda[:active_request_count].any()
 
-        Args:
-            new_sample (Tensor): The newly sampled tokens for each active request.
-            termination_id (int): The token ID that indicates termination.
+    def _dynamic_step_calculate_log_probs(self, logits: Tensor) -> Optional[Tensor]:
+        """Calculate log probs from logits."""
+        context = self.inference_wrapped_model.inference_context
+        materialize_only_last_token_logits = context.materialize_only_last_token_logits
+
+        active_request_count = context.total_request_count - context.paused_request_count
+
+        ret = context.calculate_log_probs(
+            logits,
+            self.sampled_tokens_cuda[:active_request_count],
+            only_last_token_logits=materialize_only_last_token_logits,
+        )
+        return ret
+
+    def _dynamic_step_context_bookkeeping(self, new_sample) -> Dict[str, Tensor]:
+        """Update the dynamic inference context after sampling.
 
         Return:
-            Tuple[Tensor, Tensor, Tensor]: active / paused / finished request IDs.
+            Dict [str, Tensor]: A dictionary containing:
+                active_request_ids (Tensor): Current active request IDs.
+                newly_paused_request_ids (Tensor): Newly paused request IDs.
+                finished_request_ids (Tensor): Finished request IDs.
         """
         context = self.inference_wrapped_model.inference_context
 
+        active_request_count = context.total_request_count - context.paused_request_count
+
         # Active sequence lengths.
         active_request_ids = context.request_ids[
             context.paused_request_count : context.total_request_count
@@ -663,9 +745,10 @@ def _dynamic_step_context_bookkeeping(
 
         # Request finished if termination_id or length >= max_sequence_length.
         # Note: termination_id tensor has per-request termination IDs from mixed sampling
-        active_request_mask = (new_sample != termination_id).byte() & torch.less(
-            active_sequence_lengths, max_sequence_lengths
-        ).byte()
+        active_request_mask = (
+            self.sampled_tokens_cuda[:active_request_count]
+            != self.termination_id_cuda[:active_request_count]
+        ).byte() & torch.less(active_sequence_lengths, max_sequence_lengths).byte()
         finished_idxs = (
             torch.nonzero(active_request_mask == 0, as_tuple=True)[0] + context.paused_request_count
         )
@@ -685,16 +768,11 @@ def _dynamic_step_context_bookkeeping(
 
     @torch.inference_mode()
     async def async_generate_output_tokens_dynamic_batch(
-        self,
-        active_sampling_map: List[Tuple[SamplingParams, List[int]]],
-        skip_bookkeeping: Optional[bool] = False,
+        self, skip_bookkeeping: Optional[bool] = False
     ) -> Optional[Dict]:
         """Forward step the model and update the inference context.
 
         Args:
-            active_sampling_map (List[Tuple[SamplingParams, List[int]]]): A list of tuples
-                matching each unique set of sampling params to the context array indices
-                of the corresponding active requests.
             skip_bookkeeping (Optional[bool]): If true, skip the context bookkeeping step.
 
         Return:
@@ -715,13 +793,12 @@ async def async_generate_output_tokens_dynamic_batch(
         if context.active_token_count == 0:
             return None
 
-        # This method only performs computations using CPU tensors.
         input_ids, position_ids = self._dynamic_step_context_init()
+
         cuda_graph_request_count = (
             context.padded_active_request_count if context.is_decode_only() else None
         )
 
-        # This method only performs computations using GPU tensors.
         logits = self._dynamic_step_forward_logits(input_ids, position_ids)
 
         # This is the best place to yield control back to event loop.
@@ -733,41 +810,35 @@ async def async_generate_output_tokens_dynamic_batch(
         # NOTE [TDE]: This will be moved once CPU and GPU methods are separated.
         await asyncio.sleep(0)
 
-        # This method will only perform computations using CPU tensors in the future.
-        self._dynamic_step_sample_bookkeeping(active_sampling_map)
-        # This method will only perform computations using GPU tensors in the future.
-        new_sample, termination_id = self._dynamic_step_sample_logits(logits, active_sampling_map)
+        self._dynamic_step_sample_bookkeeping()
+        new_sample = self._dynamic_step_sample_logits(logits)
 
-        # This method will only perform computations using CPU tensors in the future.
-        self._dynamic_step_log_probs_bookkeeping()
-        # This method will only perform computations using GPU tensors in the future.
-        log_probs = self._dynamic_step_calculate_log_probs(logits, new_sample, active_sampling_map)
+        return_log_probs = self._dynamic_step_log_probs_bookkeeping()
+        if return_log_probs:
+            log_probs = self._dynamic_step_calculate_log_probs(logits)
+        else:
+            log_probs = None
 
-        # This method only performs computations using CPU tensors.
         if skip_bookkeeping:
-            request_bookeeping = {}
+            request_bookkeeping = {}
         else:
-            request_bookeeping = self._dynamic_step_context_bookkeeping(new_sample, termination_id)
+            request_bookkeeping = self._dynamic_step_context_bookkeeping(new_sample)
 
         ret = {
             "sample": new_sample,
             "log_probs": log_probs,
             "cuda_graph_request_count": cuda_graph_request_count,
         }
-        ret.update(request_bookeeping)
+        ret.update(request_bookkeeping)
         return ret
 
     @torch.inference_mode()
     def generate_output_tokens_dynamic_batch(
-        self,
-        active_sampling_map: List[Tuple[SamplingParams, List[int]]],
-        loop: Optional[asyncio.AbstractEventLoop] = None,
+        self, loop: Optional[asyncio.AbstractEventLoop] = None
     ) -> Optional[Dict]:
         """Synchronous wrapper for `self.async_generate_output_tokens_dynamic_batch."""
         loop = get_asyncio_loop(loop)
-        return loop.run_until_complete(
-            self.async_generate_output_tokens_dynamic_batch(active_sampling_map)
-        )
+        return loop.run_until_complete(self.async_generate_output_tokens_dynamic_batch())
 
     def _update_top_n_logprobs_dict(
         self,
diff --git a/megatron/core/inference/unified_memory.py b/megatron/core/inference/unified_memory.py
index 6e5e85ed668..e06e3022561 100644
--- a/megatron/core/inference/unified_memory.py
+++ b/megatron/core/inference/unified_memory.py
@@ -56,9 +56,9 @@ def compile_allocator():
 
     EXPORT void* managed_malloc(size_t size, int device, void* stream) {
       (void)stream;
-      int cur = -1;
-      cudaGetDevice(&cur);
-      if (device != cur && device >= 0) cudaSetDevice(device);
+      int prev_device = -1;
+      cudaGetDevice(&prev_device);
+      if (device != prev_device && device >= 0) cudaSetDevice(device);
 
       // cudaMallocManaged allows for more memory to be allocated than the device memory size.
       // The cudaMemAttachGlobal flag makes the memory accessible from both host and device.
@@ -69,13 +69,32 @@ def compile_allocator():
       if (device >= 0) {
         // cudaMemAdviseSetPreferredLocation sets the preferred location for the memory.
         // This is a hint that tries to prevent data from being migrated away from the device.
-        cudaMemAdvise(ptr, (size_t)size, cudaMemAdviseSetPreferredLocation, device);
-        // cudaMemAdviseSetAccessedBy ensures the memory always lives in the device's page table.
-        // Even if the memory has to be migrated away from the device, it still does not page fault.
-        // The CUDA docs claim that cudaMemAdviseSetPreferredLocation completely overrides this flag,
-        // but there is no harm in adding this flag as well for future-proofing.
-        cudaMemAdvise(ptr, (size_t)size, cudaMemAdviseSetAccessedBy, device);
+
+        #if CUDART_VERSION >= 13000
+          // For CUDA >= 13, the cudaMemAdvise device arg is type cudaMemLocation
+          // instead of an int, so we setup the location and conditionally use it
+          // in calls to cudaMemAdvise.
+          cudaMemLocation location;
+          location.type = cudaMemLocationTypeDevice;
+          location.id = device;
+
+          cudaMemAdvise(ptr, (size_t)size, cudaMemAdviseSetPreferredLocation, location);
+
+          // cudaMemAdviseSetAccessedBy ensures the memory always lives in the device's page table.
+          // Even if the memory has to be migrated away from the device, it still does not page fault.
+          // The CUDA docs claim that cudaMemAdviseSetPreferredLocation completely overrides this flag,
+          // but there is no harm in adding this flag as well for future-proofing.
+          cudaMemAdvise(ptr, (size_t)size, cudaMemAdviseSetAccessedBy, location);
+        #else
+          cudaMemAdvise(ptr, (size_t)size, cudaMemAdviseSetPreferredLocation, device);
+          // cudaMemAdviseSetAccessedBy ensures the memory always lives in the device's page table.
+          // Even if the memory has to be migrated away from the device, it still does not page fault.
+          // The CUDA docs claim that cudaMemAdviseSetPreferredLocation completely overrides this flag,
+          // but there is no harm in adding this flag as well for future-proofing.
+          cudaMemAdvise(ptr, (size_t)size, cudaMemAdviseSetAccessedBy, device);
+        #endif
       }
+      if (device != prev_device && prev_device >= 0) cudaSetDevice(prev_device);
       return ptr;
     }
 
@@ -100,13 +119,29 @@ def compile_allocator():
                 functions=[],
                 with_cuda=True,
                 extra_ldflags=_extra_ldflags,
-                verbose=False,
+                verbose=True,
             )
             _so_path = Path(_mod.__file__).as_posix()
             _alloc = CUDAPluggableAllocator(_so_path, "managed_malloc", "managed_free").allocator()
             _compilation_state = CompilationState.SUCCESS
-        except (RuntimeError, ImportError, OSError):
-            warnings.warn("Failed to create unified memory mempool.")
+        except (RuntimeError, ImportError, OSError) as e:
+            warnings.warn(f"Failed to create unified memory mempool: '{e}'.")
+            _compilation_state = CompilationState.FAILURE
+
+        # Synchronize failure state across ranks. (For currently unknown reasons,
+        # one rank can show as FAILURE while the remaining ranks show as SUCCESS.)
+        import torch
+
+        local_state = torch.tensor(
+            [_compilation_state.value], dtype=torch.uint8, device=torch.cuda.current_device()
+        )
+        world_states = [
+            torch.empty(1, dtype=torch.uint8, device=torch.cuda.current_device())
+            for _ in range(torch.distributed.get_world_size())
+        ]
+        torch.distributed.all_gather(world_states, local_state)
+        world_states = set(s.item() for s in world_states)
+        if CompilationState.FAILURE.value in world_states:
             _compilation_state = CompilationState.FAILURE
 
 
diff --git a/megatron/core/inference/utils.py b/megatron/core/inference/utils.py
index d58f3c3a652..55536a52088 100644
--- a/megatron/core/inference/utils.py
+++ b/megatron/core/inference/utils.py
@@ -2,6 +2,7 @@
 
 import asyncio
 import multiprocessing
+import sys
 
 import torch
 
@@ -161,3 +162,57 @@ async def await_process_event(
             raise RuntimeError(
                 f"Process {process.name} (pid {process.pid}) has exited unexpectedly."
             )
+
+
+# Compatibility for Python < 3.13 asyncio Queue functionality.
+# This is necessary because asyncio Queues are broken in Python < 3.13.
+if sys.version_info < (3, 13):
+
+    _SHUTDOWN_SENTINEL = object()
+
+    class asyncio_QueueShutDown(Exception):
+        """Compatibility exception for Python < 3.13."""
+
+        pass
+
+    class asyncio_Queue(asyncio.Queue):
+        """An asyncio.Queue with Python 3.13 compatibility features for Python < 3.13."""
+
+        def __init__(self, maxsize: int = 0):
+            super().__init__(maxsize)
+            self._is_shutdown = False
+
+        async def get(self):
+            """Get an item from the queue with Python < 3.13 compatibility."""
+            if self._is_shutdown and self.empty():
+                raise asyncio_QueueShutDown
+            ret = await super().get()
+            if ret is _SHUTDOWN_SENTINEL:
+                super().put_nowait(_SHUTDOWN_SENTINEL)
+                super().task_done()
+                raise asyncio_QueueShutDown
+            return ret
+
+        def put_nowait(self, item):
+            """Put an item into the queue without blocking"""
+            if self._is_shutdown:
+                raise asyncio_QueueShutDown
+            if item is _SHUTDOWN_SENTINEL:
+                raise ValueError(f"{item} is reserved for shutdown purposes for Python < 3.13")
+            super().put_nowait(item)
+
+        def shutdown(self):
+            """Shutdown the queue for Python < 3.13.
+
+            Note that the listening side of the queue can continue to get old data
+            off the queue even after it has already been shutdown. The listener only
+            shutdowns when the queue is BOTH shutdown AND empty.
+            """
+            if not self._is_shutdown:
+                super().put_nowait(_SHUTDOWN_SENTINEL)
+                super().task_done()
+                self._is_shutdown = True
+
+else:
+    asyncio_QueueShutDown = asyncio.QueueShutDown
+    asyncio_Queue = asyncio.Queue
diff --git a/megatron/core/models/backends.py b/megatron/core/models/backends.py
index abda7c47787..29169285b3e 100644
--- a/megatron/core/models/backends.py
+++ b/megatron/core/models/backends.py
@@ -22,6 +22,19 @@
     LNImpl = WrappedTorchNorm
     HAVE_APEX = False
 
+from megatron.core.extensions.transformer_engine import (
+    TEActivationOp,
+    TEColumnParallelLinear,
+    TEDotProductAttention,
+    TELinear,
+    TENorm,
+)
+from megatron.core.tensor_parallel.inference_layers import (
+    InferenceLayerNormColumnParallelLinear,
+    InferenceRowParallelLinear,
+)
+from megatron.core.utils import is_te_min_version
+
 
 class BackendSpecProvider(Protocol):
     """A protocol for providing the submodules used in Spec building."""
@@ -119,3 +132,51 @@ def grouped_mlp_modules(
     def activation_func(self) -> type:
         """Which module to use for activation function"""
         return None
+
+
+class InferenceSpecProvider(BackendSpecProvider):
+    """A protocol for providing the submodules used in Spec building."""
+
+    def linear(self) -> type:
+        """Which linear module TE backend uses"""
+        return TELinear
+
+    def column_parallel_linear(self) -> type:
+        """Which column parallel linear module TE backend uses"""
+        return TEColumnParallelLinear
+
+    def row_parallel_linear(self) -> type:
+        """Which row parallel linear module TE backend uses"""
+        return InferenceRowParallelLinear
+
+    def fuse_layernorm_and_linear(self) -> bool:
+        """TE backend chooses a single module for layernorm and linear"""
+        return True
+
+    def column_parallel_layer_norm_linear(self) -> Optional[type]:
+        """Which module for sequential layernorm and linear"""
+        return InferenceLayerNormColumnParallelLinear
+
+    def layer_norm(self, rms_norm: bool = False, for_qk: bool = False) -> type:
+        """Which module to use for layer norm"""
+        if for_qk and not is_te_min_version("1.9.0"):
+            # TENorm significantly harms convergence when used
+            # for QKLayerNorm if TE Version < 1.9;
+            # we instead use the Apex implementation.
+            return FusedLayerNorm
+        return TENorm
+
+    def core_attention(self) -> type:
+        """Which module to use for attention"""
+        return TEDotProductAttention
+
+    def activation_func(self) -> type:
+        """Which module to use for activation function"""
+        return TEActivationOp
+
+    def grouped_mlp_modules(
+        self, moe_use_grouped_gemm: bool, moe_use_legacy_grouped_gemm: bool
+    ) -> Tuple[type, Optional[MLPSubmodules]]:
+        raise NotImplementedError(
+            "MOE is not supported with inference optimized transformer implementation."
+        )
diff --git a/megatron/core/models/gpt/gpt_layer_specs.py b/megatron/core/models/gpt/gpt_layer_specs.py
index c5c9caa3d67..7405150c4b3 100755
--- a/megatron/core/models/gpt/gpt_layer_specs.py
+++ b/megatron/core/models/gpt/gpt_layer_specs.py
@@ -4,7 +4,11 @@
 from typing import Optional, Union
 
 from megatron.core.fusions.fused_bias_dropout import get_bias_dropout_add
-from megatron.core.models.backends import BackendSpecProvider, LocalSpecProvider
+from megatron.core.models.backends import (
+    BackendSpecProvider,
+    InferenceSpecProvider,
+    LocalSpecProvider,
+)
 from megatron.core.models.gpt.linear_attention_module_specs import (
     get_linear_attention_module_spec_for_backend,
 )
@@ -73,6 +77,102 @@
     HAVE_APEX = False
 
 
+def get_gpt_layer_with_inference_spec(
+    qk_layernorm: Optional[bool] = False,
+    multi_latent_attention: Optional[bool] = False,
+    qk_l2_norm: Optional[bool] = False,
+) -> ModuleSpec:
+    """Use this spec to use inference optimized linear layers.
+    Args:
+        qk_layernorm (bool, optional): To use layernorm for queries/keys. Defaults to False.
+        multi_latent_attention (bool, optional): To use MLA. Defaults to False.
+        qk_l2_norm (bool, optional): To use l2 norm for queries/keys. Defaults to False.
+    """
+    assert HAVE_TE, "--transformer-impl inference_optimized requires transformer engine"
+    backend = InferenceSpecProvider()
+
+    mlp = get_mlp_module_spec_for_backend(
+        backend=backend,
+        num_experts=None,
+        moe_grouped_gemm=False,
+        moe_use_legacy_grouped_gemm=False,
+        use_te_op_fuser=False,
+        use_te_activation_func=False,
+    )
+
+    if multi_latent_attention:
+        assert qk_l2_norm is False, "qk_l2_norm is not supported with MLA."
+        linear_q_up_proj = (
+            backend.column_parallel_layer_norm_linear()
+            if qk_layernorm
+            else backend.column_parallel_linear()
+        )
+        linear_kv_up_proj = (
+            backend.column_parallel_layer_norm_linear()
+            if qk_layernorm
+            else backend.column_parallel_linear()
+        )
+        return ModuleSpec(
+            module=TransformerLayer,
+            submodules=TransformerLayerSubmodules(
+                input_layernorm=backend.layer_norm(),
+                self_attention=ModuleSpec(
+                    module=MLASelfAttention,
+                    params={"attn_mask_type": AttnMaskType.causal},
+                    submodules=MLASelfAttentionSubmodules(
+                        linear_q_proj=backend.column_parallel_linear(),
+                        linear_q_down_proj=backend.linear(),
+                        linear_q_up_proj=linear_q_up_proj,
+                        linear_kv_down_proj=backend.linear(),
+                        linear_kv_up_proj=linear_kv_up_proj,
+                        core_attention=backend.core_attention(),
+                        linear_proj=backend.row_parallel_linear(),
+                        q_layernorm=IdentityOp,
+                        kv_layernorm=IdentityOp,
+                    ),
+                ),
+                self_attn_bda=get_bias_dropout_add,
+                pre_mlp_layernorm=IdentityOp,
+                mlp=mlp,
+                mlp_bda=get_bias_dropout_add,
+            ),
+        )
+    else:
+        qk_norm = backend.layer_norm(for_qk=True)
+        return ModuleSpec(
+            module=TransformerLayer,
+            submodules=TransformerLayerSubmodules(
+                self_attention=ModuleSpec(
+                    module=SelfAttention,
+                    params={"attn_mask_type": AttnMaskType.causal},
+                    submodules=SelfAttentionSubmodules(
+                        linear_qkv=backend.column_parallel_layer_norm_linear(),
+                        core_attention=backend.core_attention(),
+                        linear_proj=backend.row_parallel_linear(),
+                        q_layernorm=(
+                            L2Norm if qk_l2_norm else (qk_norm if qk_layernorm else IdentityOp)
+                        ),
+                        k_layernorm=(
+                            L2Norm if qk_l2_norm else (qk_norm if qk_layernorm else IdentityOp)
+                        ),
+                    ),
+                ),
+                self_attn_bda=get_bias_dropout_add,
+                pre_mlp_layernorm=IdentityOp,
+                mlp=mlp,
+                mlp_bda=get_bias_dropout_add,
+                sharded_state_dict_keys_map={
+                    "mlp.0.weight": "mlp.linear_fc1.layer_norm_weight",
+                    "mlp.0.bias": "mlp.linear_fc1.layer_norm_bias",
+                    "mlp.1.basic_ops.0.weight": "mlp.linear_fc1.weight",
+                    "mlp.1.basic_ops.1.bias": "mlp.linear_fc1.bias",
+                    "mlp.3.basic_ops.0.weight": "mlp.linear_fc2.weight",
+                    "mlp.3.basic_ops.1.bias": "mlp.linear_fc2.bias",
+                },
+            ),
+        )
+
+
 def get_gpt_layer_with_transformer_engine_spec(
     num_experts: Optional[int] = None,
     moe_grouped_gemm: Optional[bool] = False,
diff --git a/megatron/core/models/gpt/moe_module_specs.py b/megatron/core/models/gpt/moe_module_specs.py
index 1de0f14efcd..62ee4537cfc 100755
--- a/megatron/core/models/gpt/moe_module_specs.py
+++ b/megatron/core/models/gpt/moe_module_specs.py
@@ -2,21 +2,13 @@
 
 from typing import Optional
 
+from megatron.core.extensions.transformer_engine_spec_provider import TESpecProvider
 from megatron.core.models.backends import BackendSpecProvider, LocalSpecProvider
 from megatron.core.transformer.mlp import MLPSubmodules
 from megatron.core.transformer.moe.moe_layer import MoELayer, MoESubmodules
 from megatron.core.transformer.moe.shared_experts import SharedExpertMLP
 from megatron.core.transformer.spec_utils import ModuleSpec
 
-try:
-    import transformer_engine as te  # pylint: disable=unused-import
-
-    from megatron.core.extensions.transformer_engine_spec_provider import TESpecProvider
-
-    HAVE_TE = True
-except ImportError:
-    HAVE_TE = False
-
 
 def get_moe_module_spec(
     use_te: Optional[bool] = True,
diff --git a/megatron/core/models/mamba/mamba_layer_specs.py b/megatron/core/models/mamba/mamba_layer_specs.py
index 8ef4a2ab3e4..bfe38c2bbc8 100755
--- a/megatron/core/models/mamba/mamba_layer_specs.py
+++ b/megatron/core/models/mamba/mamba_layer_specs.py
@@ -3,9 +3,11 @@
 from megatron.core.extensions.transformer_engine import (
     TEDotProductAttention,
     TELayerNormColumnParallelLinear,
+    TENorm,
     TERowParallelLinear,
 )
 from megatron.core.fusions.fused_bias_dropout import get_bias_dropout_add
+from megatron.core.models.gpt.moe_module_specs import get_moe_module_spec
 from megatron.core.ssm.mamba_block import MambaStack, MambaStackSubmodules
 from megatron.core.ssm.mamba_layer import MambaLayer, MambaLayerSubmodules
 from megatron.core.ssm.mamba_mixer import MambaMixer, MambaMixerSubmodules
@@ -16,6 +18,13 @@
 from megatron.core.transformer.spec_utils import ModuleSpec
 from megatron.core.transformer.transformer_layer import TransformerLayer, TransformerLayerSubmodules
 
+moe = get_moe_module_spec(
+    use_te=True,
+    num_experts=8,  # Can be any positive integer (must not be None).
+    moe_grouped_gemm=True,
+    moe_use_legacy_grouped_gemm=False,
+)
+
 mamba_stack_spec = ModuleSpec(
     module=MambaStack,
     submodules=MambaStackSubmodules(
@@ -64,5 +73,12 @@
                 mlp_bda=get_bias_dropout_add,
             ),
         ),
+        moe_layer=ModuleSpec(
+            # TODO (rwaleffe): change this to be an "MoELayer" to work with CudaGraphs?
+            module=TransformerLayer,
+            submodules=TransformerLayerSubmodules(
+                pre_mlp_layernorm=TENorm, mlp=moe, mlp_bda=get_bias_dropout_add
+            ),
+        ),
     ),
 )
diff --git a/megatron/core/optimizer/__init__.py b/megatron/core/optimizer/__init__.py
index c254b2f6882..061cb25f5b8 100644
--- a/megatron/core/optimizer/__init__.py
+++ b/megatron/core/optimizer/__init__.py
@@ -1,7 +1,9 @@
 # Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+import copy
 import logging
 import warnings
-from typing import Callable, Dict, List, Optional, Tuple
+from dataclasses import astuple
+from typing import Callable, Dict, List, Optional, Tuple, Union
 
 import torch
 from torch.optim import SGD as CPUSGD
@@ -48,100 +50,114 @@
     MegatronOptimizer,
     param_group_identifier_keys,
 )
-from .optimizer_config import OptimizerConfig
+from .optimizer_config import AdamOptimizerConfig, OptimizerConfig, ParamKey, SGDOptimizerConfig
 
 logger = logging.getLogger(__name__)
 
 
+def _matches(param: torch.nn.Parameter, param_name: str, param_key: ParamKey) -> bool:
+    """Returns true if passed-in parameter (with name) matches `param_key`.
+
+    Args:
+        param (torch.nn.Parameter): Handle to parameter object.
+        param_name (str): Name of parameter in underlying PyTorch module.
+        param_key (ParamKey): ParamKey object.
+
+    Returns:
+        bool: True if parameter matches passed-in param_key.
+    """
+
+    # Check if name matches.
+    if isinstance(param_key.name, str):
+        target_names = [param_key.name]
+    else:
+        target_names = list(param_key.name)
+    for target_name in target_names:
+        if param_name in target_name:
+            return True
+
+    # Check if attribute matches.
+    if isinstance(param_key.attr, str):
+        target_attrs = [param_key.attr]
+    else:
+        target_attrs = list(param_key.attr)
+    for target_attr in target_attrs:
+        if getattr(param, target_attr, False):
+            return True
+
+    return False
+
+
 def _get_param_groups(
     model_chunks: List[MegatronModule],
-    no_weight_decay_cond: Optional[Callable],
-    scale_lr_cond: Optional[Callable],
-    lr_mult: float,
-    lr: float,
-    min_lr: float,
-    decoupled_lr: Optional[float],
-    decoupled_min_lr: Optional[float],
-    default_skip_embedding_weight_decay: bool = False,
+    config: OptimizerConfig,
+    config_overrides: Optional[Dict[ParamKey, OptimizerConfig]],
 ) -> List[Dict]:
     """Create parameter groups for optimizer.
 
-    Creates parameter groups based on weight decay condition (regularized vs
-    non regularized), learning rate scale condition (lr vs lr_mult * lr),
-    and whether it is expert parameters. scale_lr_cond is used during finetuning
-    where head of the network requires a scaled version of the base learning rate.
+    Creates parameter groups from provided optimizer config object.
 
     Args:
         model_chunks (List[MegatronModule]): model chunks to create parameter
             groups for.
-        no_weight_decay_cond (func, optional): function to determine whether a
-            parameter should not perform weight decay.
-        scale_lr_cond (func, optional): function to determine whether a parameter
-            should have a scaled learning rate.
-        lr_mult (float): learning rate multiplier for parameters that
-            satisfy scale_lr_cond.
-        lr (float): learning rate.
-        min_lr (float): minimum learning rate.
-        decoupled_lr (Optional[float]): optional decoupled learning rate.
-        decoupled_min_lr (Optional[float]): optional decoupled minimum learning rate.
-        default_skip_embedding_weight_decay (bool): whether to skip weight decay for embedding
-            parameters by default, if no_weight_decay_cond is not provided.
-
+        config (OptimizerConfig): optimizer configuration object.
+        config_overrides (Optional[Dict[LayerKey, OptimizerConfig]): optimizer overrides,
+            specified on a per-layer basis.
     Returns:
         List of parameter groups.
     """
 
-    use_decoupled_learning_rate = decoupled_lr is not None
-
-    # Map (wd_mult, lr_mult, is_expert_parallel, is_decoupled_lr) to params.
+    # Map (wd_mult, is_expert_parallel, param_group_hyperparameters_config) to params.
     params_map = {}
+    configs_map = {}
+
     for model_chunk in model_chunks:
         for name, param in model_chunk.named_parameters():
             if not param.requires_grad:
                 continue
 
-            is_expert_parallel = not getattr(param, 'allreduce', True)
-
-            if no_weight_decay_cond is not None:
-                no_wd: bool = no_weight_decay_cond(name, param)
+            uses_default_config = False
+            # Get optimizer config for this parameter.
+            if config_overrides is None:
+                config_for_param = config
+                uses_default_config = True
             else:
-                # Do not regularize biases and norm parameters.
-                #  optionally, also skip weight decay for embedding parameters if requested
-                #  (useful if you do not want embeddings to shrink to zero in training
-                #  https://arxiv.org/abs/2312.16903)
-                no_wd = (
-                    name.endswith(".bias")
-                    or len(param.shape) == 1
-                    or (default_skip_embedding_weight_decay and "embedding" in name)
-                )
+                config_for_param = None
+                for param_key in config_overrides:
+                    if _matches(param, name, param_key):
+                        config_for_param = config_overrides[param_key]
+                        break
+                # Fall back to default config.
+                if config_for_param is None:
+                    config_for_param = config
+                    uses_default_config = True
 
-            if scale_lr_cond is not None:
-                scale_lr = scale_lr_cond(name, param)
-            else:
-                scale_lr = False
-
-            if not no_wd and not scale_lr:
-                wd_mult, _lr_mult = 1.0, 1.0
-            elif not no_wd and scale_lr:
-                wd_mult, _lr_mult = 1.0, lr_mult
-            elif no_wd and not scale_lr:
-                wd_mult, _lr_mult = 0.0, 1.0
-            else:
-                wd_mult, _lr_mult = 0.0, lr_mult
-
-            is_decoupled_lr = False
-            # For input/embedding and output layer: embedding.word_embeddings.weight /
-            # output_layer.weight.
-            if use_decoupled_learning_rate and getattr(
-                param, 'is_embedding_or_output_parameter', False
-            ):
-                is_decoupled_lr = True
+            is_expert_parallel = not getattr(param, 'allreduce', True)
 
-            key = (wd_mult, _lr_mult, is_expert_parallel, is_decoupled_lr)
+            # TODO: Make sure there is a way to support old no_weight_decay_func functionality
+            # and default_skip_embedding_weight_decay:
+            #     or (default_skip_embedding_weight_decay and "embedding" in name)
+            no_wd = name.endswith(".bias") or len(param.shape) == 1
+            if not no_wd:
+                wd_mult = 1.0
+            else:
+                wd_mult = 0.0
+
+            # Create config_tuple that is hash-able. Remove timers object before
+            # creating config_tuple.
+            config_for_param_copy = copy.deepcopy(config_for_param)
+            config_for_param_copy.timers = None
+            config_tuple = astuple(config_for_param_copy)
+            key = (wd_mult, is_expert_parallel, config_tuple)
             if key not in params_map:
                 params_map[key] = []
             params_map[key].append(param)
 
+            if key in configs_map:
+                assert (config_for_param, uses_default_config) == configs_map[key]
+            else:
+                configs_map[key] = (config_for_param, uses_default_config)
+
     # Distributed checkpoint requires all ranks to have the same param groups,
     # so we need to align the param groups across ranks, otherwise we may have
     # runtime error when loading the checkpoint or numerical error when resuming training.
@@ -155,67 +171,33 @@ def _get_param_groups(
 
     param_groups = []
     for key in params_key:
-        wd_mult, _lr_mult, is_expert_parallel, is_decoupled_lr = key
+        wd_mult, is_expert_parallel, _ = key
         params = params_map[key] if key in params_map else []
+        config, uses_default_config = None, True
+        if key not in configs_map:
+            assert params == []
+        else:
+            config, uses_default_config = configs_map[key]
+            assert config is not None
+
+        # TODO: Remove "backwards compatible" fields below eventually.
         param_group = {
             'params': params,
-            'wd_mult': wd_mult,
-            'lr_mult': _lr_mult,
+            'wd_mult': wd_mult,  # For backwards compatibility.
+            'lr_mult': 1.0,  # For backwards compatibility.
             'is_expert_parallel': is_expert_parallel,
-            'is_decoupled_lr': is_decoupled_lr,
+            'is_decoupled_lr': False,  # For backwards compatibility.
+            'default_config': uses_default_config,
         }
-        # Ensure param_group has required keys for matching when loading optimizer state
-        # See MegatronOptimizer._filter_and_reorder_param_groups.
-        assert set(param_group.keys()) - set(param_group_identifier_keys) == {'params'}
-        param_groups.append(param_group)
-
-    param_groups = _update_min_and_max_lr_in_param_groups(
-        param_groups,
-        lr=lr,
-        min_lr=min_lr,
-        decoupled_lr=decoupled_lr,
-        decoupled_min_lr=decoupled_min_lr,
-    )
-
-    return param_groups
-
 
-def _update_min_and_max_lr_in_param_groups(
-    param_groups: List[Dict],
-    lr: float,
-    min_lr: float,
-    decoupled_lr: Optional[float],
-    decoupled_min_lr: Optional[float],
-) -> List[Dict]:
-    """
-    Updates `max_lr` and `min_lr` values in each parameter group, and returns new list.
-    By default, each group will use `lr` / `min_lr` as `max_lr` / `min_lr`.
-    If `decoupled_lr` is provided, then `decoupled_lr` / `decoupled_min_lr` will be used
-    as `max_lr` / `min_lr` for the input and output layer.
-
-    Args:
-        param_groups (List): parameter groups whose 'max_lr' and `min_lr` fields need to
-            be adjusted.
-        lr (float): learning rate.
-        min_lr (float): minimum learning rate.
-        decoupled_lr (Optional[float]): optional decoupled learning rate.
-        decoupled_min_lr (Optional[float]): optional decoupled minimum learning rate.
-
-    Returns:
-        List of adjusted parameter groups.
-    """
-
-    if decoupled_min_lr is None:
-        decoupled_min_lr = min_lr
+        # Stick relevant fields into param_group from config object.
+        if config is not None:
+            param_group['max_lr'] = config.lr
+            param_group['min_lr'] = config.min_lr
+            # TODO: Add other relevant arguments (e.g., weight decay, optimizer)
+            # here as well.
+        param_groups.append(param_group)
 
-    for param_group in param_groups:
-        if param_group['is_decoupled_lr']:
-            assert decoupled_lr is not None
-            param_group['max_lr'] = decoupled_lr
-            param_group['min_lr'] = decoupled_min_lr
-        else:
-            param_group['max_lr'] = lr
-            param_group['min_lr'] = min_lr
     return param_groups
 
 
@@ -223,12 +205,9 @@ def _get_param_groups_and_buffers(
     model_chunks: List[MegatronModule],
     model_chunk_offset: int,
     config: OptimizerConfig,
-    no_weight_decay_cond: Optional[Callable],
-    scale_lr_cond: Optional[Callable],
-    lr_mult: float,
+    config_overrides: Optional[Dict[ParamKey, OptimizerConfig]],
     filter_fn: Callable,
     buffer_name: str,
-    default_skip_embedding_weight_decay: bool = False,
 ) -> Tuple[List[Dict], Dict[int, List[_ParamAndGradBuffer]]]:
     """Returns parameter groups and buffer for optimizer.
 
@@ -237,33 +216,17 @@ def _get_param_groups_and_buffers(
             groups for.
         model_chunk_offset (int): offset of model_chunks in global model_chunks list.
         config (OptimizerConfig): optimizer configuration object.
-        no_weight_decay_cond (func, optional): function to determine whether a
-            parameter should not perform weight decay.
-        scale_lr_cond (func, optional): function to determine whether a parameter
-            should have a scaled learning rate.
-        lr_mult (float): learning rate multiplier for parameters that
-            satisfy scale_lr_cond.
+        config_overrides (Optional[Dict[LayerKey, OptimizerConfig]): optimizer overrides,
+            specified on a per-layer basis.
         lr (float): learning rate.
         min_lr (float): minimum learning rate.
         filter_fn (callable): filtering function for param_groups.
         buffer_name (str): name of buffer.
-        default_skip_embedding_weight_decay (bool): whether to skip weight decay for
-            embedding parameters by default, if no_weight_decay_cond is not provided.
 
     Returns:
         List of parameter groups and dictionary of model chunk IDs to buffers.
     """
-    param_groups = _get_param_groups(
-        model_chunks,
-        no_weight_decay_cond,
-        scale_lr_cond,
-        lr_mult,
-        lr=config.lr,
-        min_lr=config.min_lr,
-        decoupled_lr=config.decoupled_lr,
-        decoupled_min_lr=config.decoupled_min_lr,
-        default_skip_embedding_weight_decay=default_skip_embedding_weight_decay,
-    )
+    param_groups = _get_param_groups(model_chunks, config, config_overrides)
     param_groups = list(filter(filter_fn, param_groups))
     buffers = {}
     for model_chunk_idx, model_chunk in enumerate(model_chunks):
@@ -304,9 +267,12 @@ def _get_megatron_optimizer_based_on_param_groups(
     Returns:
         Instance of MegatronOptimizer.
     """
-    # when freezing sub-models we may have no trainable parameters on a rank and
+    # TODO: Logic needs to be updated to handle different optimizer types (i.e., param_groups
+    # passed into this function need to correspond to the same optimizer).
+
+    # When freezing sub-models we may have no trainable parameters on a rank and
     # hence an empty param_groups. However, we still need to create an optimizer
-    # for the purposes of grad stats reductions
+    # for the purposes of grad stats reductions.
     if param_groups:
         if config.optimizer_cpu_offload:
             if torch.__version__ < '2.3.0':
@@ -476,11 +442,8 @@ def init_state_fn(opt, config=None):
 def get_megatron_optimizer(
     config: OptimizerConfig,
     model_chunks: List[MegatronModule],
-    no_weight_decay_cond: Optional[Callable] = None,
-    scale_lr_cond: Optional[Callable] = None,
-    lr_mult: float = 1.0,
+    config_overrides: Optional[Dict[ParamKey, OptimizerConfig]] = None,
     use_gloo_process_groups: bool = True,
-    default_skip_embedding_weight_decay: bool = False,
     pg_collection: Optional[ProcessGroupCollection] = None,
     dump_param_to_param_group_map: Optional[str] = None,
 ) -> MegatronOptimizer:
@@ -491,18 +454,11 @@ def get_megatron_optimizer(
     Args:
         config (OptimizerConfig): optimizer configuration object.
         model_chunks (List[MegatronModule]): model chunks to get optimizer for.
-        no_weight_decay_cond (func, optional): function to determine whether a parameter
-            should not perform weight decay. Defaults to None.
-        scale_lr_cond (func, optional): function to determine whether a parameter
-            should have a scaled learning rate. Defaults to None.
-        lr_mult (float, optional): learning rate multiplier for parameters that
-            satisfy scale_lr_cond. Defaults to 1.0.
+        config_overrides (Optional[Dict[ParamKey, OptimizerConfig]]): optional dictionary of
+            optimizer configuration objects to override default optimizer behavior for different
+            subsets of parameters (identified by ParamKey).
         use_gloo_process_groups (bool): if false, disable use of Gloo process groups
             in underlying Megatron optimizers.
-        default_skip_embedding_weight_decay (bool): whether to skip weight decay for
-            embedding parameters by default, if no_weight_decay_cond is not provided.
-            This is useful if you do not want embeddings to shrink to zero in training
-            as recommended in https://arxiv.org/abs/2312.16903
         pg_collection: Optional unified process group for distributed training.
         dump_param_to_param_group_map (Optional[str]): path to dump parameter to param group map.
 
@@ -512,6 +468,20 @@ def get_megatron_optimizer(
 
     log_single_rank(logger, logging.INFO, f'Setting up optimizer with config {config}')
 
+    # TODO: Remove `optimizer` from this eventually (e.g., if we use Muon for some layers and
+    # Adam for other layers). This would need some more refactoring to work though (param_groups
+    # filtered by optimizer passed into _get_megatron_optimizer_based_on_param_groups).
+    fields_to_check_for_consistency = [
+        'overlap_param_gather_with_optimizer_step',
+        'optimizer',
+        'optimizer_cpu_offload',
+    ]
+    for field_name in fields_to_check_for_consistency:
+        field = getattr(config, field_name, None)
+        if config_overrides is not None:
+            all_configs = list(config_overrides.values())
+            assert all([getattr(x, field_name, None) == field for x in all_configs])
+
     # Separate out first model chunk if overlapping param AG with optimizer step.
     if config.overlap_param_gather_with_optimizer_step:
         all_dense_model_chunks = [[model_chunks[0]], model_chunks[1:]]
@@ -553,17 +523,14 @@ def get_megatron_optimizer(
                 model_chunk,
                 model_chunk_offset=model_chunk_offset,
                 config=config,
-                no_weight_decay_cond=no_weight_decay_cond,
-                scale_lr_cond=scale_lr_cond,
-                lr_mult=lr_mult,
+                config_overrides=config_overrides,
                 filter_fn=lambda g: True,
                 buffer_name='buffers',
-                default_skip_embedding_weight_decay=default_skip_embedding_weight_decay,
             )
 
             optimizers.append(
                 _get_megatron_optimizer_based_on_param_groups(
-                    config,
+                    config=config,
                     model_chunks=model_chunk,
                     param_groups=param_groups,
                     per_model_buffers=buffers,
@@ -592,12 +559,9 @@ def get_megatron_optimizer(
             dense_model_chunks,
             model_chunk_offset=model_chunk_offset,
             config=config,
-            no_weight_decay_cond=no_weight_decay_cond,
-            scale_lr_cond=scale_lr_cond,
-            lr_mult=lr_mult,
+            config_overrides=config_overrides,
             filter_fn=lambda g: not g['is_expert_parallel'],
             buffer_name='buffers',
-            default_skip_embedding_weight_decay=default_skip_embedding_weight_decay,
         )
         for model_chunk in dense_model_chunks:
             model_chunk.overlap_param_gather_with_optimizer_step = (
@@ -613,7 +577,7 @@ def get_megatron_optimizer(
         # Pass Gloo process groups into optimizer only if needed.
         optimizers.append(
             _get_megatron_optimizer_based_on_param_groups(
-                config,
+                config=config,
                 model_chunks=dense_model_chunks,
                 param_groups=param_groups,
                 per_model_buffers=buffers,
@@ -631,12 +595,9 @@ def get_megatron_optimizer(
         model_chunks,
         model_chunk_offset=0,
         config=config,
-        no_weight_decay_cond=no_weight_decay_cond,
-        scale_lr_cond=scale_lr_cond,
-        lr_mult=lr_mult,
+        config_overrides=config_overrides,
         filter_fn=lambda g: g['is_expert_parallel'],
         buffer_name='expert_parallel_buffers',
-        default_skip_embedding_weight_decay=default_skip_embedding_weight_decay,
     )
     if dump_param_to_param_group_map is not None:
         for param_group in moe_param_groups:
@@ -653,7 +614,7 @@ def get_megatron_optimizer(
             expt_data_parallel_group_gloo = None
         optimizers.append(
             _get_megatron_optimizer_based_on_param_groups(
-                config,
+                config=config,
                 model_chunks=model_chunks,
                 param_groups=moe_param_groups,
                 per_model_buffers=moe_buffers,
diff --git a/megatron/core/optimizer/muon.py b/megatron/core/optimizer/muon.py
index ddf20b0abb8..2b1f0502e46 100644
--- a/megatron/core/optimizer/muon.py
+++ b/megatron/core/optimizer/muon.py
@@ -3,7 +3,7 @@
 """Megatron muon optimizer wrapper to handle tensor-parallel."""
 
 import logging
-from typing import Any, Callable, List, Literal, Optional
+from typing import Any, Callable, Dict, List, Literal, Optional
 
 import torch
 from torch.optim.optimizer import ParamsT
@@ -21,7 +21,7 @@
     FP32Optimizer,
     MegatronOptimizer,
 )
-from .optimizer_config import OptimizerConfig
+from .optimizer_config import OptimizerConfig, ParamKey
 
 try:
     from emerging_optimizers.orthogonalized_optimizers import (
@@ -166,9 +166,7 @@ def orthogonalize(self, p: torch.Tensor, grad: torch.Tensor, **kwargs: Any) -> t
 def get_megatron_muon_optimizer(
     config: OptimizerConfig,
     model_chunks: List[MegatronModule],
-    no_weight_decay_cond: Optional[Callable] = None,
-    scale_lr_cond: Optional[Callable] = None,
-    lr_mult: float = 1.0,
+    config_overrides: Optional[Dict[ParamKey, OptimizerConfig]] = None,
     use_gloo_process_groups: bool = True,
     layer_wise_distributed_optimizer: bool = False,
     pg_collection: Optional[ProcessGroupCollection] = None,
@@ -179,17 +177,15 @@ def get_megatron_muon_optimizer(
     Args:
         config (OptimizerConfig): optimizer configuration object.
         model_chunks (List[MegatronModule]): model chunks to get optimizer for.
-        no_weight_decay_cond (func, optional): function to determine whether a parameter
-            should not perform weight decay. Defaults to None.
-        scale_lr_cond (func, optional): function to determine whether a parameter
-            should have a scaled learning rate. Defaults to None.
-        lr_mult (float, optional): learning rate multiplier for parameters that
-            satisfy scale_lr_cond. Defaults to 1.0.
         use_gloo_process_groups (bool): if false, disable use of Gloo process groups
             in underlying Megatron optimizers.
         layer_wise_distributed_optimizer (bool): if true, use layer-wise distributed optimizer.
             Defaults to False.
     """
+    # Muon currently use adam config. setting str here to call regular get for adam creation
+    # side effect is muon optimizer will have wrong name, i.e. config.optimizer == 'adam'
+    config.optimizer = 'adam'
+
     assert HAVE_EMERGING_OPTIMIZERS, "Emerging Optimizers is not installed."
 
     # dist-optim is not supported due to strong coupling with how DDP init grad buffer
@@ -246,16 +242,7 @@ def get_megatron_muon_optimizer(
     for param in nonlinear_params:
         param.requires_grad = False
 
-    linear_param_groups = _get_param_groups(
-        model_chunks,
-        no_weight_decay_cond,
-        scale_lr_cond,
-        lr_mult,
-        lr=config.lr,
-        min_lr=config.min_lr,
-        decoupled_lr=config.decoupled_lr,
-        decoupled_min_lr=config.decoupled_min_lr,
-    )
+    linear_param_groups = _get_param_groups(model_chunks, config, config_overrides)
 
     optimizer = TensorParallelMuon(
         linear_param_groups,
@@ -274,13 +261,6 @@ def get_megatron_muon_optimizer(
         mode=config.muon_tp_mode,
     )
 
-    # set config here to:
-    # 1. get adam for rest of layer
-    # 2. avoid ChainedOptimizer check fail that assert all optimizers are same kind
-    # side effect is muon optimizer will have wrong name str, i.e. config.optimizer == 'adam'
-    # TODO(deyuf): allow user to select optimizer mix and relax ChainedOptimizer design
-    config.optimizer = 'adam'
-
     # Needed for torch_dist ckpt_format, unlike torch ckpt_format
     # For other emerging optimizers, need to implement init_state_fn as well
     # TODO(boxiangw): Improve usability after optimizer refactor
@@ -331,7 +311,10 @@ def adam_init_state_fn(opt, config=None):
 
     # call original get. linear params will be skipped since they're freezed
     chained_adam = get_megatron_optimizer(
-        config, model_chunks, no_weight_decay_cond, scale_lr_cond, lr_mult, use_gloo_process_groups
+        config,
+        model_chunks,
+        config_overrides=config_overrides,
+        use_gloo_process_groups=use_gloo_process_groups,
     )
 
     # unfreeze everything
diff --git a/megatron/core/optimizer/optimizer.py b/megatron/core/optimizer/optimizer.py
index 1829cb424f1..54e7f67c629 100644
--- a/megatron/core/optimizer/optimizer.py
+++ b/megatron/core/optimizer/optimizer.py
@@ -3,6 +3,7 @@
 """Megatron optimizer."""
 
 import copy
+import logging
 import math
 import warnings
 from abc import ABC, abstractmethod
diff --git a/megatron/core/optimizer/optimizer_config.py b/megatron/core/optimizer/optimizer_config.py
index 8692d1e9b52..6a4199a1f7a 100644
--- a/megatron/core/optimizer/optimizer_config.py
+++ b/megatron/core/optimizer/optimizer_config.py
@@ -1,23 +1,34 @@
 # Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
-from dataclasses import dataclass
-from typing import Callable, Optional
+from dataclasses import dataclass, field
+from typing import Callable, Optional, Tuple, Union
 
 import torch
 
 from ..utils import is_te_min_version
 
 
+@dataclass(frozen=True, slots=True)
+class ParamKey:
+    """Key to group parameters by. All such grouped parameters can share an
+    optimizer config specification."""
+
+    # TODO: Can add layer_id here later.
+
+    name: Union[str, Tuple[str]] = field(default_factory=tuple)
+    """Parameter name(s)."""
+
+    attr: Union[str, Tuple[str]] = field(default_factory=tuple)
+    """Parameter attribute(s)."""
+
+
 @dataclass
 class OptimizerConfig:
-    """Configuration for optimizer."""
+    """Base optimizer configuration object."""
 
     ##############
     # General
     ##############
-    optimizer: str = 'adam'
-    """Optimizer to use (one of Adam, SGD, or Muon)."""
-
     lr: Optional[float] = None
     """Initial learning rate. Depending on decay style and initial warmup, the learning rate at each
        iteration would be different.
@@ -26,14 +37,6 @@ class OptimizerConfig:
     min_lr: Optional[float] = None
     """Minumum value for learning rate. The scheduler clip values below this threshold."""
 
-    decoupled_lr: Optional[float] = None
-    """Separate learning rate for the input and output layer."""
-
-    decoupled_min_lr: Optional[float] = None
-    """Minimum value for learning rate for the input and output layer. The scheduler clip values
-       below this threshold.
-    """
-
     weight_decay: float = 0.01
     """Weight decay coefficient for L2 regularization."""
 
@@ -78,6 +81,9 @@ class OptimizerConfig:
     exp_avg_sq_dtype: torch.dtype = torch.float32
     """dtype of exp_avg_sq when enabling precision-aware-optimizer"""
 
+    optimizer: str = 'adam'
+    """Optimizer name. NOTE: Deprecated, use individual optimizer classes instead."""
+
     ###############
     # Loss scaling
     ###############
@@ -98,10 +104,10 @@ class OptimizerConfig:
     hysteresis: int = 2
     """Hysteresis for dynamic loss scaling."""
 
-    ##############
-    # Optimizer
-    ##############
-    # Adam
+    ###################################################################################
+    # Optimizer (NOTE: Deprecated, use individual optimizer classes instead.).
+    ###################################################################################
+    # Adam.
     adam_beta1: float = 0.9
     """First coefficient for computing running averages of gradient and its square in Adam
     optimizer.
@@ -259,6 +265,7 @@ def __post_init__(self):
             try:
                 import inspect
 
+                # TODO: Move this below?
                 from transformer_engine.pytorch.optimizers import FusedAdam as Adam
 
                 adam_args = inspect.signature(Adam).parameters
@@ -291,3 +298,35 @@ def __post_init__(self):
             assert (
                 self.exp_avg_sq_dtype == torch.float32
             ), "exp_avg_sq_dtype can only be fp32 when not using precision-aware optimizer"
+
+
+@dataclass
+class AdamOptimizerConfig(OptimizerConfig):
+    """Adam optimizer configuration object."""
+
+    optimizer: str = 'adam'
+    """Optimizer name."""
+
+    adam_beta1: float = 0.9
+    """First coefficient for computing running averages of gradient and its square in Adam
+    optimizer.
+    """
+
+    adam_beta2: float = 0.999
+    """Second coefficient for computing running averages of gradient and its square in Adam
+    optimizer.
+    """
+
+    adam_eps: float = 1e-08
+    """Term added to the denominator to improve numerical stability in Adam optimizer."""
+
+
+@dataclass
+class SGDOptimizerConfig(OptimizerConfig):
+    """SGD optimizer configuration object."""
+
+    optimizer: str = 'sgd'
+    """Optimizer name."""
+
+    sgd_momentum: float = 0.9
+    """Momentum factor for SGD optimizer."""
diff --git a/megatron/core/optimizer_param_scheduler.py b/megatron/core/optimizer_param_scheduler.py
index da7e0787676..9f771c612e8 100644
--- a/megatron/core/optimizer_param_scheduler.py
+++ b/megatron/core/optimizer_param_scheduler.py
@@ -95,19 +95,30 @@ def __init__(
         self.step(0)
         log_single_rank(logger, logging.INFO, f"> learning rate decay style: {self.lr_decay_style}")
 
-    def get_wd(self) -> float:
-        """Weight decay incr functions"""
+    def get_wd(self, param_group: Optional[dict] = None) -> float:
+        """Weight decay incr functions
+
+        Args:
+            param_group (dict): parameter group from the optimizer."""
+
+        if param_group is not None:
+            start_wd = param_group.get('start_wd', self.start_wd)
+            end_wd = param_group.get('end_wd', self.end_wd)
+        else:
+            start_wd = self.start_wd
+            end_wd = self.end_wd
+
         if self.num_steps > self.wd_incr_steps:
-            return self.end_wd
+            return end_wd
 
         if self.wd_incr_style == 'constant':
-            assert self.start_wd == self.end_wd
-            return self.end_wd
+            assert start_wd == end_wd
+            return end_wd
 
         incr_ratio = float(self.num_steps) / float(self.wd_incr_steps)
         assert incr_ratio >= 0.0
         assert incr_ratio <= 1.0
-        delta_wd = self.end_wd - self.start_wd
+        delta_wd = end_wd - start_wd
 
         if self.wd_incr_style == 'linear':
             coeff = incr_ratio
@@ -116,7 +127,7 @@ def get_wd(self) -> float:
         else:
             raise Exception(f'{self.wd_incr_style} weight decay increment style is not supported.')
 
-        return self.start_wd + coeff * delta_wd
+        return start_wd + coeff * delta_wd
 
     def get_lr(self, param_group: dict) -> float:
         """Learning rate decay functions from:
@@ -191,11 +202,9 @@ def step(self, increment: int) -> None:
             increment (int): number of steps to increment
         """
         self.num_steps += increment
-        new_wd = self.get_wd()
         for param_group in self.optimizer.param_groups:
-            new_lr = self.get_lr(param_group)
-            param_group['lr'] = new_lr * param_group.get('lr_mult', 1.0)
-            param_group['weight_decay'] = new_wd * param_group.get('wd_mult', 1.0)
+            param_group['lr'] = self.get_lr(param_group)
+            param_group['weight_decay'] = self.get_wd(param_group) * param_group.get('wd_mult', 1.0)
 
     def state_dict(self) -> dict:
         """Return the state dict."""
diff --git a/megatron/core/parallel_state.py b/megatron/core/parallel_state.py
index 1e41bf9d8c2..1916bfff079 100644
--- a/megatron/core/parallel_state.py
+++ b/megatron/core/parallel_state.py
@@ -1122,6 +1122,7 @@ def initialize_model_parallel(
     for ranks in expert_decoder_rank_generator.get_ranks('ep'):
         group = create_group(
             ranks,
+            timeout=timeout,
             pg_options=get_nccl_options("ep", nccl_comm_cfgs),
             group_desc="EXPERT_MODEL_PARALLEL_GROUP",
         )
diff --git a/megatron/core/process_groups_config.py b/megatron/core/process_groups_config.py
index 07c922ea685..ef8f31ea150 100644
--- a/megatron/core/process_groups_config.py
+++ b/megatron/core/process_groups_config.py
@@ -140,6 +140,23 @@ def __init__(self, **kwargs):
             else:
                 raise ValueError(f"Unknown attribute: {key}")
 
+    def __repr__(self):
+        """Return a concise representation showing which process groups exist and their sizes."""
+        active_pgs = []
+        for field_info in fields(self):
+            if hasattr(self, field_info.name):
+                pg = getattr(self, field_info.name)
+                if pg is not None:
+                    active_pgs.append(f"{field_info.name}({pg.size()})")
+                else:
+                    # Field exists but is None
+                    active_pgs.append(f"{field_info.name}(None)")
+        return (
+            f"ProcessGroupCollection({', '.join(active_pgs)})"
+            if active_pgs
+            else "ProcessGroupCollection(empty)"
+        )
+
     @classmethod
     def use_mpu_process_groups(cls, required_pgs: Optional[List[str]] = None):
         """
diff --git a/megatron/core/safe_globals.py b/megatron/core/safe_globals.py
index d2baed2a4a0..cc5eb8809e8 100755
--- a/megatron/core/safe_globals.py
+++ b/megatron/core/safe_globals.py
@@ -11,6 +11,7 @@
 from numpy.dtypes import UInt32DType
 
 from megatron.core.enums import ModelType
+from megatron.core.optimizer import OptimizerConfig
 from megatron.core.rerun_state_machine import RerunDiagnostic, RerunMode, RerunState
 from megatron.core.transformer.enums import AttnBackend
 
@@ -24,6 +25,7 @@
     Namespace,
     AttnBackend,
     ModelType,
+    OptimizerConfig,
     RerunDiagnostic,
     RerunMode,
     RerunState,
diff --git a/megatron/core/ssm/mamba_block.py b/megatron/core/ssm/mamba_block.py
index 1bcadd0af10..de27bb89d2e 100644
--- a/megatron/core/ssm/mamba_block.py
+++ b/megatron/core/ssm/mamba_block.py
@@ -5,10 +5,8 @@
 # This source code is licensed under the Apache license found in the
 # LICENSE file in the root directory of this source tree.
 
-import math
 from contextlib import nullcontext
 from dataclasses import dataclass
-from functools import partial
 from typing import Optional, Tuple, Union
 
 import torch
@@ -23,7 +21,6 @@
 from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.ssm.mamba_hybrid_layer_allocation import Symbols as LayerSymbols
 from megatron.core.ssm.mamba_hybrid_layer_allocation import allocate_layers
-from megatron.core.tensor_parallel import get_cuda_rng_tracker
 from megatron.core.transformer import TransformerConfig
 from megatron.core.transformer.identity_op import IdentityOp
 from megatron.core.transformer.module import MegatronModule
@@ -33,50 +30,6 @@
 from megatron.core.utils import WrappedTensor, deprecate_inference_params, make_viewless_tensor
 
 
-# https://github.com/huggingface/transformers/blob/c28d04e9e252a1a099944e325685f14d242ecdcd/src/transformers/models/gpt2/modeling_gpt2.py#L454
-def _init_weights(
-    module,
-    n_layer,
-    initializer_range=0.02,  # Now only used for embedding layer.
-    rescale_prenorm_residual=True,
-    n_residuals_per_layer=1,  # Change to 2 if we have MLP
-):
-    with get_cuda_rng_tracker().fork():
-        if isinstance(module, nn.Linear):
-            if not getattr(module.weight, "_no_reinit", False):
-                nn.init.normal_(module.weight, std=initializer_range)
-            if module.bias is not None:
-                if not getattr(module.bias, "_no_reinit", False):
-                    nn.init.zeros_(module.bias)
-        elif isinstance(module, nn.Embedding):
-            nn.init.normal_(module.weight, std=initializer_range)
-
-        for name, p in module.named_parameters():
-            if name in ["conv1d.weight", "out_proj.weight"]:
-                nn.init.kaiming_uniform_(p, a=math.sqrt(5))
-            if name in ["in_proj.weight"]:
-                nn.init.normal_(p, mean=0.0, std=initializer_range)
-
-        if rescale_prenorm_residual:
-            # Reinitialize selected weights subject to the OpenAI GPT-2 Paper Scheme:
-            #   > A modified initialization which accounts for the accumulation on the
-            #   > residual path with model depth. Scale
-            #   > the weights of residual layers at initialization by a factor of
-            #   > 1/√N where N is the # of residual layers.
-            #   >   -- GPT-2 :: https://openai.com/blog/better-language-models/
-            #
-            # Reference (Megatron-LM):
-            # https://github.com/NVIDIA/Megatron-LM/blob/main/megatron/model/gpt_model.py
-            for name, p in module.named_parameters():
-                if name in ["out_proj.weight", "fc2.weight"]:
-                    # Special Scaled Initialization
-                    nn.init.normal_(
-                        p,
-                        mean=0.0,
-                        std=initializer_range / math.sqrt(n_residuals_per_layer * n_layer),
-                    )
-
-
 @dataclass
 class MambaStackSubmodules:
     """
@@ -86,6 +39,7 @@ class MambaStackSubmodules:
     mamba_layer: Union[ModuleSpec, type] = IdentityOp
     attention_layer: Union[ModuleSpec, type] = IdentityOp
     mlp_layer: Union[ModuleSpec, type] = IdentityOp
+    moe_layer: Union[ModuleSpec, type] = IdentityOp
 
 
 class MambaStack(MegatronModule):
@@ -171,6 +125,7 @@ def __init__(
                         config=self.config,
                         residual_in_fp32=residual_in_fp32,
                         layer_number=i + 1 + pp_layer_offset,
+                        pp_layer_offset=pp_layer_offset,
                         pg_collection=pg_collection,
                     )
                 elif layer_type == LayerSymbols.ATTENTION:
@@ -189,6 +144,11 @@ def __init__(
                         layer_number=i + 1,
                         pg_collection=pg_collection,
                     )
+                elif layer_type == LayerSymbols.MOE:
+                    # Transformer layers apply their own pp_layer_offset
+                    layer = build_module(
+                        submodules.moe_layer, config=self.config, layer_number=i + 1
+                    )
                 else:
                     assert False, "unexpected layer_type"
             self.layers.append(layer)
@@ -204,15 +164,6 @@ def __init__(
                 eps=self.config.layernorm_epsilon,
             )
 
-        if self.config.perform_initialization:
-            self.apply(
-                partial(
-                    _init_weights,
-                    n_layer=self.config.num_layers,
-                    initializer_range=self.config.init_method_std,
-                )
-            )
-
     def _select_layers_for_pipeline_parallel(self, layer_type_list):
         num_layers_per_pipeline_rank = self.config.num_layers // self.pp_group.size()
 
diff --git a/megatron/core/ssm/mamba_hybrid_layer_allocation.py b/megatron/core/ssm/mamba_hybrid_layer_allocation.py
index 7407bfe899f..fe997e2249a 100644
--- a/megatron/core/ssm/mamba_hybrid_layer_allocation.py
+++ b/megatron/core/ssm/mamba_hybrid_layer_allocation.py
@@ -28,7 +28,8 @@ class Symbols:
     MAMBA = "M"
     ATTENTION = "*"
     MLP = "-"
-    VALID = {MAMBA, ATTENTION, MLP}
+    MOE = 'E'
+    VALID = {MAMBA, ATTENTION, MLP, MOE}
 
 
 def _allocate_auto(
@@ -172,9 +173,9 @@ def get_layer_maps_from_layer_type_list(
 ) -> Tuple[Dict[int, int], Dict[int, int], Dict[int, int]]:
     """
     Returns maps from global layer index to the corresponding layer index
-    for each layer type in [Attention, Mamba, MLP] given a layer type list.
+    for each layer type in [Attention, Mamba, MLP, MoE] given a layer type list.
     """
-    layer_types = [Symbols.ATTENTION, Symbols.MAMBA, Symbols.MLP]
+    layer_types = [Symbols.ATTENTION, Symbols.MAMBA, Symbols.MLP, Symbols.MOE]
     layer_maps = {layer_type: {} for layer_type in layer_types}
     for global_layer_idx, layer_type in enumerate(layer_type_list):
         layer_map = layer_maps[layer_type]
diff --git a/megatron/core/ssm/mamba_layer.py b/megatron/core/ssm/mamba_layer.py
index 69d5ef21c81..6514050ac63 100644
--- a/megatron/core/ssm/mamba_layer.py
+++ b/megatron/core/ssm/mamba_layer.py
@@ -61,6 +61,7 @@ def __init__(
         layer_number: int = 1,
         residual_in_fp32=False,
         pg_collection: ProcessGroupCollection = None,
+        pp_layer_offset: int = 0,
     ):
         """Initialize Mamba Layer."""
         super().__init__(config)
@@ -77,6 +78,7 @@ def __init__(
             d_model=self.config.hidden_size,
             layer_number=layer_number,
             pg_collection=pg_collection,
+            pp_layer_offset=pp_layer_offset,
         )
         self.norm = build_module(submodules.norm, self.config, self.config.hidden_size)
         self.mamba_bda = build_module(submodules.mamba_bda)
diff --git a/megatron/core/ssm/mamba_mixer.py b/megatron/core/ssm/mamba_mixer.py
index b792f8a2f1f..91dc266e590 100644
--- a/megatron/core/ssm/mamba_mixer.py
+++ b/megatron/core/ssm/mamba_mixer.py
@@ -162,6 +162,7 @@ def __init__(
         headdim=None,
         ngroups=None,
         pg_collection: ProcessGroupCollection = None,
+        pp_layer_offset: int = 0,
     ):
         if not HAVE_MAMBA_SSM:
             raise ImportError(
@@ -183,6 +184,7 @@ def __init__(
         self.norm_before_gate = norm_before_gate
         self.chunk_size = chunk_size
         self.layer_number = layer_number
+        self.pp_layer_offset = pp_layer_offset
         self.cached_batch_size = None
         assert pg_collection is not None, "pg_collection must be provided for MambaMixer"
         self.pg_collection = pg_collection
@@ -297,9 +299,12 @@ def __init__(
         setattr(self.conv1d.weight, "tensor_model_parallel", True)
         setattr(self.conv1d.bias, "tensor_model_parallel", True)
 
-        if self.config.perform_initialization and self.conv_init is not None:
+        if self.config.perform_initialization:
             with get_cuda_rng_tracker().fork():
-                nn.init.uniform_(self.conv1d.weight, -self.conv_init, self.conv_init)
+                if self.conv_init is not None:
+                    nn.init.uniform_(self.conv1d.weight, -self.conv_init, self.conv_init)
+                else:
+                    nn.init.kaiming_uniform_(self.conv1d.weight, a=math.sqrt(5))
 
         self.activation = "silu"
         self.act = nn.SiLU()
@@ -324,13 +329,6 @@ def __init__(
             )
 
         self.dt_bias = nn.Parameter(inv_dt)
-        # Our initialization would set all Linear.bias to zero,
-        # need to mark this one as _no_reinit
-        self.dt_bias._no_reinit = True
-        # Just to be explicit. Without this we already don't
-        # put wd on dt_bias because of the check
-        # name.endswith("bias") in param_grouping.py
-        self.dt_bias._no_weight_decay = True
         setattr(self.dt_bias, "tensor_model_parallel", True)
 
         # A parameter
@@ -342,7 +340,6 @@ def __init__(
             A = A.uniform_(*A_init_range)
         A_log = torch.log(A)  # Keep A_log in fp32
         self.A_log = nn.Parameter(A_log)
-        self.A_log._no_weight_decay = True
         setattr(self.A_log, "tensor_model_parallel", True)
 
         # D "skip" parameter
@@ -352,7 +349,6 @@ def __init__(
                 device=torch.cuda.current_device(),
             )
         )  # Keep in fp32
-        self.D._no_weight_decay = True
         setattr(self.D, "tensor_model_parallel", True)
 
         if self.rmsnorm:
@@ -365,6 +361,7 @@ def __init__(
                 device=torch.cuda.current_device(),
                 dtype=config.params_dtype,
             )
+            setattr(self.norm.weight, "tensor_model_parallel", True)
 
         # Assume sequence parallelism: input is partitioned along d_inner and
         # output is partitioned along the sequence dimension
@@ -458,7 +455,7 @@ def dynamic_inference(self, hidden_states: torch.Tensor, context: DynamicInferen
         )
         assert sequence_packing_available, reason_for_no_sequence_packing
 
-        conv_state, ssm_state = context.mamba_states_cache(self.layer_number)
+        conv_state, ssm_state = context.mamba_states_cache(self.layer_number - self.pp_layer_offset)
 
         # Fast path: decode-only
         if context.is_decode_only():
@@ -504,7 +501,10 @@ def dynamic_inference(self, hidden_states: torch.Tensor, context: DynamicInferen
             zxBCdt_chunked_prefill = zxBCdt[
                 active_token_count - chunked_prefill_request_token_count : active_token_count
             ]
-            batch_index_chunked_prefill = batch_indices[context.chunked_prefill_request_id]
+
+            batch_index_chunked_prefill = batch_indices[
+                context.get_index_of_chunked_prefill_request()
+            ]
 
             y_prefill_chunked = self.ssm_prefill(
                 zxBCdt_chunked_prefill,
@@ -941,6 +941,12 @@ def ssm_decode(
             x_reshaped = rearrange(x, "b (h p) -> b h p", p=self.headdim)
             if not self.rmsnorm:
                 z = rearrange(z, "b (h p) -> b h p", p=self.headdim)
+
+            # Upcast the batch_indices to prevent integer overflow errors in the case of
+            # large max request counts.
+            if batch_indices is not None:
+                batch_indices = batch_indices.to(torch.int64)
+
             y = selective_state_update(
                 ssm_state,
                 x_reshaped,
diff --git a/megatron/core/tensor_parallel/inference_layers.py b/megatron/core/tensor_parallel/inference_layers.py
new file mode 100644
index 00000000000..05f7b88d095
--- /dev/null
+++ b/megatron/core/tensor_parallel/inference_layers.py
@@ -0,0 +1,151 @@
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+
+from typing import Callable, Optional
+
+import torch
+import torch.distributed as dist
+
+from megatron.core.extensions.transformer_engine import (
+    TELayerNormColumnParallelLinear,
+    TERowParallelLinear,
+)
+from megatron.core.model_parallel_config import ModelParallelConfig
+from megatron.core.transformer.transformer_config import TransformerConfig
+from megatron.core.utils import get_tensor_model_parallel_group_if_none
+
+try:
+    import transformer_engine.pytorch.cpp_extensions as tex
+    from transformer_engine.pytorch.constants import TE_DType
+    from transformer_engine.pytorch.distributed import (
+        gather_along_first_dim,
+        reduce_scatter_along_first_dim,
+    )
+
+    HAVE_TE = True
+except ImportError:
+    HAVE_TE = False
+
+
+def _te_rms_norm_kernel(x: torch.Tensor, weight: torch.Tensor, eps: float):
+    x_shape = x.shape
+    x = x.view(-1, x.size(-1))
+    out, _, _ = tex.rmsnorm_fwd(
+        x, weight, eps, None, None, TE_DType[x.dtype], 16, False  # sm-margin  # zero centered gamma
+    )
+    out = out.view(*x_shape[:-1], -1)
+    return out.to(x.dtype)
+
+
+class InferenceLayerNormColumnParallelLinear(TELayerNormColumnParallelLinear):
+    """
+    Inference optimized version of TELayerNormColumnParallelLinear.
+    """
+
+    def __init__(
+        self,
+        input_size: int,
+        output_size: int,
+        *,
+        config: TransformerConfig,
+        init_method: Callable,
+        gather_output: bool,
+        bias: bool,
+        skip_bias_add: bool,
+        is_expert: bool,
+        skip_weight_param_allocation: bool = False,
+        tp_comm_buffer_name: Optional[str] = None,
+        tp_group: Optional[torch.distributed.ProcessGroup] = None,
+    ):
+        assert HAVE_TE, "--transformer-impl=inference_optimized requires transformer engine"
+        super().__init__(
+            input_size,
+            output_size,
+            config=config,
+            init_method=init_method,
+            gather_output=gather_output,
+            bias=bias,
+            skip_bias_add=skip_bias_add,
+            is_expert=is_expert,
+            skip_weight_param_allocation=skip_weight_param_allocation,
+            tp_comm_buffer_name=tp_comm_buffer_name,
+            tp_group=tp_group,
+        )
+        self.tp_group = get_tensor_model_parallel_group_if_none(tp_group, is_expert=is_expert)
+        self.tp_size = dist.get_world_size(self.tp_group)
+
+        assert (
+            output_size % self.tp_size == 0
+        ), f"output_size ({output_size}) must be divisible by tp_size ({self.tp_size})"
+
+        self.eps = config.layernorm_epsilon
+
+        if self.tp_size > 1:
+            assert (
+                config.sequence_parallel
+            ), "--transformer-impl=inference_optimized requires --sequence-parallel"
+
+    @torch.no_grad()
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """
+        Forward pass.
+        """
+        x = _te_rms_norm_kernel(x=x, weight=self.layer_norm_weight, eps=self.eps)
+        if self.tp_size > 1:
+            x, _ = gather_along_first_dim(x, process_group=self.tp_group)
+        x = torch.matmul(x, self.weight.t())
+        return x, None
+
+
+class InferenceRowParallelLinear(TERowParallelLinear):
+    """
+    Inference optimized version of TERowParallelLinear.
+    """
+
+    def __init__(
+        self,
+        input_size: int,
+        output_size: int,
+        *,
+        config: ModelParallelConfig,
+        init_method: Callable,
+        bias: bool,
+        input_is_parallel: bool,
+        skip_bias_add: bool,
+        is_expert: bool,
+        tp_comm_buffer_name: Optional[str] = None,
+        tp_group: Optional[torch.distributed.ProcessGroup] = None,
+    ):
+        assert HAVE_TE, "--transformer-impl=inference_optimized requires transformer engine"
+        super().__init__(
+            input_size,
+            output_size,
+            config=config,
+            init_method=init_method,
+            bias=bias,
+            input_is_parallel=input_is_parallel,
+            skip_bias_add=skip_bias_add,
+            is_expert=is_expert,
+            tp_comm_buffer_name=tp_comm_buffer_name,
+            tp_group=tp_group,
+        )
+        self.tp_group = get_tensor_model_parallel_group_if_none(tp_group, is_expert=is_expert)
+        self.tp_size = dist.get_world_size(self.tp_group)
+        assert (
+            input_size % self.tp_size == 0
+        ), f"input_size ({input_size}) must be divisible by tp_size ({self.tp_size})"
+
+        if self.tp_size > 1:
+            assert (
+                config.sequence_parallel
+            ), "--transformer-impl=inference_optimized requires --sequence-parallel"
+
+    @torch.no_grad()
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """
+        Forward pass.
+        """
+        x = torch.matmul(x, self.weight.t())
+        if self.tp_size > 1:
+            x, _ = reduce_scatter_along_first_dim(x, tp_group=self.tp_group)
+        return x, None
diff --git a/megatron/core/tokenizers/text/libraries/huggingface_tokenizer.py b/megatron/core/tokenizers/text/libraries/huggingface_tokenizer.py
index c68b0ef89b1..458689fa1f4 100644
--- a/megatron/core/tokenizers/text/libraries/huggingface_tokenizer.py
+++ b/megatron/core/tokenizers/text/libraries/huggingface_tokenizer.py
@@ -69,7 +69,6 @@ def __init__(
                     pretrained_model_name_or_path=tokenizer_path,
                     use_fast=use_fast,
                     trust_remote_code=trust_remote_code,
-                    chat_template=chat_template,
                 )
             elif merges_file is None:
                 self.tokenizer = AutoTokenizer.from_pretrained(
@@ -77,7 +76,6 @@ def __init__(
                     vocab_file=vocab_file,
                     use_fast=use_fast,
                     trust_remote_code=trust_remote_code,
-                    chat_template=chat_template,
                 )
             else:
                 self.tokenizer = AutoTokenizer.from_pretrained(
@@ -86,7 +84,6 @@ def __init__(
                     merge_files=merges_file,
                     use_fast=use_fast,
                     trust_remote_code=trust_remote_code,
-                    chat_template=chat_template,
                 )
         except Exception as e:
             raise ValueError(
@@ -94,6 +91,14 @@ def __init__(
                 f'for {tokenizer_path}. Exception: {e}'
             )
 
+        # Store the tokenizer's existing chat template if the user does not provide
+        # a custom chat template. Otherwise, override the default chat template with
+        # the user-provided template.
+        if chat_template is None:
+            chat_template = self.tokenizer.chat_template
+        else:
+            self.tokenizer.chat_template = chat_template
+
         self.include_special_tokens = include_special_tokens
         self.original_vocab_size = len(self.tokenizer)
         self.chat_template = chat_template
diff --git a/megatron/core/tokenizers/text/libraries/null_tokenizer.py b/megatron/core/tokenizers/text/libraries/null_tokenizer.py
index 13d56436192..4ddf77fc774 100644
--- a/megatron/core/tokenizers/text/libraries/null_tokenizer.py
+++ b/megatron/core/tokenizers/text/libraries/null_tokenizer.py
@@ -25,6 +25,14 @@ def ids_to_text(self, ids):
         text = [str(x) for x in ids]
         return ' '.join(text)
 
+    def tokens_to_ids(self, tokens):
+        """Converts tokens to ids."""
+        return [int(x) for x in tokens]
+
+    def ids_to_tokens(self, ids):
+        """Converts ids to tokens."""
+        return [str(x) for x in ids]
+
     def offsets(self, ids: list[int], text: str) -> list[int]:
         """Returns offsets."""
         offsets, start_idx = [], 0
diff --git a/megatron/core/tokenizers/text/text_tokenizer.py b/megatron/core/tokenizers/text/text_tokenizer.py
index 2107cf9dce4..4e0c624e006 100644
--- a/megatron/core/tokenizers/text/text_tokenizer.py
+++ b/megatron/core/tokenizers/text/text_tokenizer.py
@@ -37,13 +37,17 @@ def __init__(self, path: str, config: dict, **kwargs) -> None:
         self._tokenizer = self._restore_model(**kwargs)
         self.additional_args = kwargs
         self.path = path
-        if (
-            config.get("chat_template", None) is None
-            and kwargs.get("chat_template", None) is not None
-        ):
-            self.chat_template = kwargs.get("chat_template", None)
+
+        config_template = config.get("chat_template", None)
+        tokenizer_template = getattr(self._tokenizer, "chat_template", None)
+        kwargs_template = kwargs.get("chat_template", None)
+
+        if config_template is not None:
+            self.chat_template = config_template
+        elif tokenizer_template is not None:
+            self.chat_template = tokenizer_template
         else:
-            self.chat_template = config.get("chat_template", None)
+            self.chat_template = kwargs_template
 
     def _restore_model(self, **kwargs) -> MegatronTokenizerTextAbstract:
         """Returns tokenizer library object."""
diff --git a/megatron/core/transformer/attention.py b/megatron/core/transformer/attention.py
index 74031f38219..7bb9a12c697 100644
--- a/megatron/core/transformer/attention.py
+++ b/megatron/core/transformer/attention.py
@@ -35,6 +35,7 @@
 from megatron.core.utils import (
     deprecate_inference_params,
     divide,
+    get_pg_rank,
     get_pg_size,
     is_fa_min_version,
     is_te_min_version,
@@ -158,6 +159,7 @@ def __init__(
 
         self.config = config
         self.layer_number = layer_number
+
         self.attn_mask_type = attn_mask_type
         self.attention_type = attention_type
 
@@ -306,6 +308,19 @@ def _allocate_memory(self, inference_max_sequence_length, batch_size, dim, dtype
             device=torch.cuda.current_device(),
         )
 
+    def _get_pp_layer_offset_for_inference(self):
+        """Return the pipeline parallel layer offset for inference."""
+        assert (
+            self.config.virtual_pipeline_model_parallel_size is None
+        ), "Virtual pipeline parallelism is not supported for inference"
+
+        # Import here to avoid circular imports
+        from megatron.core.transformer.transformer_layer import get_transformer_layer_offset
+
+        return get_transformer_layer_offset(
+            self.config, vp_stage=None, pp_rank=get_pg_rank(self.pg_collection.pp)
+        )
+
     def _adjust_key_value_for_inference(
         self,
         inference_context: BaseInferenceContext,
@@ -371,9 +386,15 @@ def _adjust_key_value_for_inference(
                     inference_context.key_value_memory_dict[self.layer_number]
                 )
 
-        if not inference_context.is_static_batching() or inference_context.sequence_len_offset > 0:
+        if (
+            not inference_context.is_static_batching() or inference_context.sequence_len_offset > 0
+        ) and (not self.training or not is_te_min_version("2.2.0")):
             # This should mean that we are past the prompt forward_step
             # and so we need to turn off masking
+            # Note: in ModelOpt, we may use inference_context for speculative decoding
+            # in training. In that case, we do not want to turn off masking as we need
+            # customized attention mask for speculative decoding.
+
             attn_mask_type = AttnMaskType.no_mask
 
         if inference_context.is_static_batching():
@@ -444,6 +465,8 @@ def _adjust_key_value_for_inference(
             key = inference_key_memory[:sequence_end, batch_start:batch_end, ...]
             value = inference_value_memory[:sequence_end, batch_start:batch_end, ...]
         else:
+            pp_layer_offset = self._get_pp_layer_offset_for_inference()
+
             # Apply rotary embeddings before appending KV cache.
             if inference_context.use_flashinfer_fused_rope and (rotary_pos_cos_sin is not None):
                 query, key = inference_context.apply_fused_qk_rotary_emb(
@@ -458,17 +481,23 @@ def _adjust_key_value_for_inference(
                 rotary_pos_emb = (q_pos_emb, None)  # key rotary emb has been applied
 
             # Append key/value data tensors to cache.
-            inference_context.append_key_value_cache(self.layer_number, key, value)
+            inference_context.append_key_value_cache(
+                self.layer_number - pp_layer_offset, key, value
+            )
 
             _, max_seqlen_q = inference_context.cu_query_lengths()
             if getattr(self.config, "cache_mla_latents", None) and max_seqlen_q > 1:
                 # Doing unabsorbed MLA Attention with cached mla latents (prefill/mixed mode)
-                kv_cache, _, block_table = inference_context.key_value_cache(self.layer_number)
+                kv_cache, _, block_table = inference_context.key_value_cache(
+                    self.layer_number - pp_layer_offset
+                )
                 # Uncompress the KV cache for prefill/mixed mode
                 key, value = self.uncompress_kv_from_cache(kv_cache)
             else:
                 # Read key/value *pointer* tensors from cache.
-                key, value, block_table = inference_context.key_value_cache(self.layer_number)
+                key, value, block_table = inference_context.key_value_cache(
+                    self.layer_number - pp_layer_offset
+                )
         return query, key, value, rotary_pos_emb, attn_mask_type, block_table
 
     @abstractmethod
diff --git a/megatron/core/transformer/cuda_graphs.py b/megatron/core/transformer/cuda_graphs.py
index 12f15ee980a..10a739e11c0 100644
--- a/megatron/core/transformer/cuda_graphs.py
+++ b/megatron/core/transformer/cuda_graphs.py
@@ -368,9 +368,26 @@ def create_cudagraphs():
 def delete_cuda_graphs():
     """Delete all CUDA graphs."""
 
+    # Reset runners.
+    for record in [
+        *_CudagraphGlobalRecord.cudagraph_record,
+        *_CudagraphGlobalRecord.cudagraph_inference_record,
+    ]:
+        runner = record[0]
+        assert isinstance(runner, _CudaGraphRunner)
+
+        runner.cudagraph_created = False
+        runner.fwd_graph_recorded = False
+        runner.bwd_graph_recorded = False
+        runner.fwd_graph = None
+        runner.bwd_graph = None
+        runner.fwd_mempool = None
+        runner.bwd_mempool = None
+
     # Reset global tracking state
     _CudagraphGlobalRecord.cudagraph_created = False
     _CudagraphGlobalRecord.cudagraph_record = []
+    _CudagraphGlobalRecord.cudagraph_inference_record = []
 
     # TODO: Optional?: Force garbage collection to clean up memory
     gc.collect()
diff --git a/megatron/core/transformer/fsdp_dtensor_checkpoint.py b/megatron/core/transformer/fsdp_dtensor_checkpoint.py
index 65e2f5f9dff..04ec982e6ff 100644
--- a/megatron/core/transformer/fsdp_dtensor_checkpoint.py
+++ b/megatron/core/transformer/fsdp_dtensor_checkpoint.py
@@ -484,6 +484,6 @@ def get_global_unique_param_name(model_chunks, param):
 
     # Get EP unique parameter name
     num_experts = model_chunks[0].config.num_moe_experts if model_chunks else None
-    param_name = list(handle_experts_in_state_dict({param_name: None}, num_experts).keys())[0]
+    param_name = next(iter(handle_experts_in_state_dict({param_name: None}, num_experts).keys()))
 
     return param_name
diff --git a/megatron/core/transformer/moe/token_dispatcher.py b/megatron/core/transformer/moe/token_dispatcher.py
index b2135fdb00d..8754e938348 100644
--- a/megatron/core/transformer/moe/token_dispatcher.py
+++ b/megatron/core/transformer/moe/token_dispatcher.py
@@ -48,6 +48,8 @@
      num_global_tokens: num_local_tokens*TP*EP
 """
 
+logger = logging.getLogger(__name__)
+
 
 class MoETokenDispatcher:
     """
@@ -1270,7 +1272,6 @@ def _pad_routing_map(
         # Check if there are enough tokens to pad
         enough_tokens_to_pad = torch.all(target_tokens_per_expert <= num_input_tokens)
         if not enough_tokens_to_pad:
-            logger = logging.getLogger(__name__)
             logger.warning(
                 "Not enough tokens to pad. The total number of tokens received in this rank "
                 "is smaller than the target number of tokens for each expert. "
diff --git a/megatron/core/transformer/transformer_config.py b/megatron/core/transformer/transformer_config.py
index fae2e2f5d4d..3f8c97099da 100644
--- a/megatron/core/transformer/transformer_config.py
+++ b/megatron/core/transformer/transformer_config.py
@@ -749,6 +749,9 @@ class TransformerConfig(ModelParallelConfig):
     symmetric_ar_type: Optional[str] = None
     """Type of symmetric all reduce to use"""
 
+    use_inference_optimized_layers: bool = False
+    """If True, use inference optimized transformer layers during inference."""
+
     mrope_section: Optional[List[int]] = None
     """ Multimodal rope section is for channel dimension of temporal, height and width
     in rope calculation. """
@@ -1874,6 +1877,13 @@ def __post_init__(self):
                         f"for context parallelism, but got {self.cp_comm_type=} instead."
                     )
 
+        if self.transformer_impl == "inference_optimized":
+            assert self.normalization == "RMSNorm"
+            assert not self.layernorm_zero_centered_gamma
+            assert not self.add_bias_linear
+            assert not self.add_qkv_bias
+            assert not self.use_kitchen
+
 
 @dataclass
 class MLATransformerConfig(TransformerConfig):
diff --git a/megatron/core/utils.py b/megatron/core/utils.py
index 9b62b18d400..77a004a6845 100644
--- a/megatron/core/utils.py
+++ b/megatron/core/utils.py
@@ -24,7 +24,7 @@
 from functools import lru_cache, reduce, wraps
 from importlib.metadata import version
 from types import TracebackType
-from typing import Any, Callable, Coroutine, Dict, List, Optional, Tuple, Type, Union
+from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Union
 
 import numpy
 import torch
@@ -2140,23 +2140,28 @@ def maybe_cat(a, b, dim=0, *, required=False):
     return xs[0] if len(xs) == 1 else torch.cat(xs, dim=dim)
 
 
+_ASYNC_IO_LOOP: asyncio.AbstractEventLoop | None = None
+
+
 def get_asyncio_loop(loop: asyncio.AbstractEventLoop | None = None) -> asyncio.AbstractEventLoop:
     """Creates an asyncio loop if necessary and then returns the current asyncio loop."""
+    global _ASYNC_IO_LOOP
     if loop is None:
         try:
             loop = asyncio.get_running_loop()
         except RuntimeError as e:
-            loop = asyncio.new_event_loop()
-            asyncio.set_event_loop(loop)
+            if _ASYNC_IO_LOOP is not None:
+                return _ASYNC_IO_LOOP
+            else:
+                _ASYNC_IO_LOOP = loop = asyncio.new_event_loop()
+                asyncio.set_event_loop(loop)
     return loop
 
 
 _ASYNC_TASK_STATS = defaultdict(lambda: [0, 0.0])  # cnt, total_time
 
 
-def trace_async_exceptions(
-    func: Optional[Callable[..., Coroutine]], *, verbose: bool = False
-) -> Callable[..., Coroutine]:
+def trace_async_exceptions(func: Optional[Callable] = None, *, verbose: bool = False):
     """Decorator to be applied to every coroutine that runs in a separate task.
 
     This is needed because asyncio tasks do not propagate exceptions.
@@ -2171,41 +2176,81 @@ async def my_coroutine(...):
     ```
     """
 
-    def _decorate(fn):
-        if not asyncio.iscoroutinefunction(fn):
-            raise TypeError("trace_async_exceptions can only be used with async functions")
-
-        @functools.wraps(fn)
-        async def wrapper(*args, **kwargs):
-            if verbose:
-                start = time.perf_counter()
-            try:
-                return await fn(*args, **kwargs)
-            except Exception as e:
-                logger.error(f"Exception in async function {fn.__name__}: {e}")
-                traceback.print_exc()
-                sys.exit(1)
-            finally:
+    def _log_verbose(name: str, start: float) -> None:
+        elapsed = (time.perf_counter() - start) * 1000.0
+        cnt, tot = _ASYNC_TASK_STATS[name]
+        _ASYNC_TASK_STATS[name] = [cnt + 1, tot + elapsed]
+        avg = _ASYNC_TASK_STATS[name][1] / _ASYNC_TASK_STATS[name][0]
+
+        log10 = numpy.log10(max(cnt, 1))
+        if numpy.isclose(log10, round(log10)):
+            logger.info(
+                f"{name} completed in {elapsed:.3f} ms, "
+                f"lifetime avg: {avg:.3f} ms, "
+                f"lifetime cnt: {cnt + 1}"
+            )
+
+    def _decorate(fn: Callable):
+        if asyncio.iscoroutinefunction(fn):
+
+            @functools.wraps(fn)
+            async def wrapper(*args, **kwargs):
                 if verbose:
-                    elapsed = (time.perf_counter() - start) * 1000.0
-                    name = fn.__qualname__
-                    cnt, tot = _ASYNC_TASK_STATS[name]
-                    _ASYNC_TASK_STATS[name] = [cnt + 1, tot + elapsed]
-                    avg = _ASYNC_TASK_STATS[name][1] / _ASYNC_TASK_STATS[name][0]
-
-                    log10 = numpy.log10(max(cnt, 1))
-                    if numpy.isclose(log10, round(log10)):
-                        logger.info(
-                            f"{name} completed in {elapsed:.3f} ms, "
-                            f"lifetime avg: {avg:.3f} ms, "
-                            f"lifetime cnt: {cnt + 1}"
-                        )
+                    start = time.perf_counter()
+                try:
+                    return await fn(*args, **kwargs)
+                except Exception as e:
+                    logger.error(f"Exception in async function {fn.__name__}: {e}")
+                    traceback.print_exc()
+                    sys.exit(1)
+                finally:
+                    if verbose:
+                        _log_verbose(fn.__qualname__, start)
+
+        elif inspect.isasyncgenfunction(fn):
+
+            @functools.wraps(fn)
+            async def wrapper(*args, **kwargs):
+                if verbose:
+                    start = time.perf_counter()
+                agen = fn(*args, **kwargs)
+                try:
+                    async for item in agen:
+                        yield item
+                except Exception as e:
+                    logger.error(f"Exception in async generator {fn.__name__}: {e}")
+                    traceback.print_exc()
+                    sys.exit(1)
+                finally:
+                    if verbose:
+                        _log_verbose(fn.__qualname__, start)
 
+        else:
+            raise TypeError("trace_async_exceptions must be used on async functions or generators")
         return wrapper
 
     return _decorate if func is None else _decorate(func)
 
 
+def get_mamba_inference_state_config_from_model(model) -> Optional["MambaInferenceStateConfig"]:
+    """Returns Mamba inference state config from the model if it is a hybrid model."""
+    from megatron.core.inference.contexts.attention_context.mamba_metadata import (
+        MambaInferenceStateConfig,
+    )
+    from megatron.core.ssm.mamba_hybrid_layer_allocation import Symbols
+
+    decoder = get_attr_wrapped_model(model, "decoder")
+    layer_type_list = getattr(decoder, "layer_type_list", None)
+    if layer_type_list is not None and Symbols.MAMBA in layer_type_list:
+        (mamba_conv_states_shape, mamba_ssm_states_shape) = decoder.mamba_state_shapes_per_request()
+        return MambaInferenceStateConfig(
+            layer_type_list=layer_type_list,
+            mamba_conv_states_shape=mamba_conv_states_shape,
+            mamba_ssm_states_shape=mamba_ssm_states_shape,
+        )
+    return None
+
+
 # ============================================================================
 # Backward Compatibility Decorators
 # ============================================================================
diff --git a/megatron/legacy/data/biencoder_dataset_utils.py b/megatron/legacy/data/biencoder_dataset_utils.py
index 6fa391c8a22..6d69fabbe48 100644
--- a/megatron/legacy/data/biencoder_dataset_utils.py
+++ b/megatron/legacy/data/biencoder_dataset_utils.py
@@ -5,11 +5,14 @@
 import numpy as np
 import torch
 
-from megatron.training import get_args, get_tokenizer, print_rank_0
 from megatron.core import mpu, tensor_parallel
-from megatron.legacy.data.dataset_utils import create_masked_lm_predictions, \
-                                            pad_and_convert_to_numpy
-from megatron.legacy.data.data_samplers import MegatronPretrainingSampler
+from megatron.legacy.data.dataset_utils import (
+    create_masked_lm_predictions,
+    pad_and_convert_to_numpy,
+)
+from megatron.training import get_args, get_tokenizer, print_rank_0
+from megatron.training.datasets.data_samplers import MegatronPretrainingSampler
+
 
 def make_attention_mask(source_block, target_block):
     """
diff --git a/megatron/legacy/data/vit_dataset.py b/megatron/legacy/data/vit_dataset.py
index e65c536c897..504075a5506 100644
--- a/megatron/legacy/data/vit_dataset.py
+++ b/megatron/legacy/data/vit_dataset.py
@@ -1,15 +1,17 @@
 # Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
 import os
 import random
+
 import numpy as np
 import torch
 import torchvision.transforms as T
+from PIL import Image, ImageFilter, ImageOps
 from torchvision import datasets
-from megatron.training import get_args
-from megatron.legacy.data.image_folder import ImageFolder
+
 from megatron.legacy.data.autoaugment import ImageNetPolicy
-from megatron.legacy.data.data_samplers import RandomSeedDataset
-from PIL import Image, ImageFilter, ImageOps
+from megatron.legacy.data.image_folder import ImageFolder
+from megatron.training import get_args
+from megatron.training.datasets.data_samplers import RandomSeedDataset
 
 
 class GaussianBlur(object):
@@ -236,7 +238,7 @@ def build_train_valid_datasets(data_path, image_size=224):
         classes_fraction=args.classes_fraction,
         data_per_class_fraction=args.data_per_class_fraction
     )
-    train_data = RandomSeedDataset(train_data)
+    train_data = RandomSeedDataset(train_data, args.seed)
 
     # validation dataset
     val_data_path = data_path[1]
@@ -244,6 +246,6 @@ def build_train_valid_datasets(data_path, image_size=224):
         root=val_data_path,
         transform=val_transform
     )
-    val_data = RandomSeedDataset(val_data)
+    val_data = RandomSeedDataset(val_data, args.seed)
 
     return train_data, val_data
diff --git a/megatron/post_training/algos/__init__.py b/megatron/post_training/algos/__init__.py
deleted file mode 100644
index f8011007a50..00000000000
--- a/megatron/post_training/algos/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
diff --git a/megatron/post_training/algos/distillation.py b/megatron/post_training/algos/distillation.py
deleted file mode 100644
index c54add0a8d7..00000000000
--- a/megatron/post_training/algos/distillation.py
+++ /dev/null
@@ -1,601 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
-
-"""Distillation loss function(s)."""
-
-import logging
-import re
-import types
-from abc import ABCMeta
-from typing import Any, Callable, Dict, List, Optional, Tuple, Union
-
-import modelopt.torch.distill as mtd
-import modelopt.torch.opt as mto
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-import yaml
-from torch import Tensor
-from torch.nn.modules.loss import _Loss
-
-from megatron.core.dist_checkpointing.mapping import ShardedStateDict
-from megatron.core.parallel_state import (
-    get_context_parallel_group,
-    get_pipeline_model_parallel_world_size,
-    get_tensor_and_context_parallel_rank,
-    get_tensor_model_parallel_group,
-    get_virtual_pipeline_model_parallel_world_size,
-    is_pipeline_last_stage,
-)
-from megatron.core.pipeline_parallel.schedules import get_tensor_shapes
-from megatron.core.transformer import MegatronModule, TransformerConfig, TransformerLayer
-from megatron.core.utils import get_model_config
-
-logger = logging.getLogger(__name__)
-
-
-def load_distillation_config(
-    config_path: Optional[str], student_cfg: TransformerConfig, teacher_cfg: TransformerConfig
-) -> Dict[str, Any]:
-    """Read the distillation yaml config file specified by ``args.export_kd_cfg``.
-
-    Args:
-        config_path: Path to user-defined distillation settings yaml file.
-            If `None`, uses default logits-only distillation mode for GPT models.
-        student_cfg: Model config for student model.
-        teacher_cfg: Model config for teacher model.
-
-    WARNING: Assumes intermediate hidden sizes are always that found in the model config's ``hidden_size`` attribute.
-    """
-    if not config_path:
-        logger.warning("Distillation config not provided. Using default.")
-        cfg = {
-            "logit_layers": ["output_layer", "output_layer"],
-            "intermediate_layer_pairs": [],
-            "skip_lm_loss": True,
-            "kd_loss_scale": 1.0,
-        }
-    else:
-        with open(config_path) as f:
-            cfg = yaml.safe_load(f)
-
-    intermediate_pairs = cfg.get("intermediate_layer_pairs", [])
-    logit_pair = cfg["logit_layers"]
-    skip_lm_loss = cfg["skip_lm_loss"]
-    loss_scale = cfg["kd_loss_scale"]
-
-    criterion = {}
-    if student_cfg.pipeline_model_parallel_size == 1 or is_pipeline_last_stage():
-        criterion[tuple(logit_pair)] = LogitsKLLoss(student_cfg)
-        # NOTE: Projection layer shared among intermediate layer pairs.
-        projection_layer = ProjectionLayer(student_cfg, teacher_cfg)
-
-        for entry in intermediate_pairs:
-            if len(entry) == 2:
-                student_layer, teacher_layer = entry
-                loss = "hidden_cosine"
-            elif len(entry) == 3:
-                student_layer, teacher_layer, loss = entry
-
-            loss_fn = None
-
-            if loss == "mse":
-                loss_fn = MSELoss
-            elif loss == "hidden_cosine":
-                loss_fn = HiddenStateCosineLoss
-            else:
-                assert False, f"loss passed was {loss=}"
-
-            if get_tensor_and_context_parallel_rank() == 0:
-                print(
-                    "Distillation: Adding intermediate loss between"
-                    f" `{student_layer}` of student (hidden size {student_cfg.hidden_size}) and"
-                    f" `{teacher_layer}` of teacher (hidden size {teacher_cfg.hidden_size})."
-                )
-            student_layer = _adjust_layer_index_for_pp(student_layer, student_cfg)
-            teacher_layer = _adjust_layer_index_for_pp(teacher_layer, teacher_cfg)
-            criterion[(student_layer, teacher_layer)] = loss_fn(
-                student_cfg, projection_layer=projection_layer
-            )
-
-    loss_balancer = LogitsAndIntermediatesLossBalancer(
-        kd_loss_scale=loss_scale, skip_original_loss=skip_lm_loss
-    )
-
-    cfg["criterion"] = criterion
-    cfg["loss_balancer"] = loss_balancer
-
-    return cfg
-
-
-def _adjust_layer_index_for_pp(submodule_name, model_cfg):
-    """Adjust any sequence-based layer indices found in a submodule name for Pipeline Parallelism."""
-
-    match = re.search(r'(?<=\.)\d+(?=\.)', submodule_name)
-    if not match:
-        return submodule_name
-
-    offset = TransformerLayer._get_layer_offset(model_cfg)
-    new_layer_idx = int(match.group(0)) - offset
-    if new_layer_idx < 0:
-        raise ValueError(f"Layer {submodule_name} does not fall on final PP rank.")
-
-    new_submodule_name = submodule_name.replace(match.group(0), str(new_layer_idx))
-    if get_tensor_and_context_parallel_rank() == 0:
-        print(
-            f'Distillation: Renamed layer "{submodule_name}" on final PP rank to "{new_submodule_name}"'
-        )
-    return new_submodule_name
-
-
-########################################################
-
-
-class BaseLoss(_Loss, metaclass=ABCMeta):
-    """Abstract base class for Megatron distillation losses."""
-
-    def __init__(
-        self, model_config: TransformerConfig, projection_layer: Optional[nn.Module] = None
-    ):
-        """
-        Constructor.
-
-        Args:
-            model_config: MCore transformer config.
-            projection_layer: Module which projects student activations to teacher's hidden dim.
-        """
-        super().__init__()
-        self._config = model_config
-        self._projection = projection_layer
-
-    def pre_forward(self, predictions: Tensor, targets: Tensor) -> Tuple[Tensor, Tensor]:
-        """Performs projection of student tensor to match teacher's size if necessary."""
-        if isinstance(predictions, tuple):
-            # `ColumnParallelLinear` returns bias too
-            predictions, targets = predictions[0], targets[0]
-
-        if self._projection is not None:
-            predictions = self._projection(predictions)
-        targets = targets.detach()
-
-        return predictions, targets
-
-    def post_forward(self, loss: Tensor, tp_reduce: bool = False, is_sequence_parallel: bool = False) -> Tensor:
-        """Reshapes tensor from [s, b] to [b, s] for upcoming loss masking."""
-        loss = loss.transpose(0, 1).contiguous()
-        return (loss, tp_reduce, is_sequence_parallel)
-
-
-class HiddenStateCosineLoss(BaseLoss):
-    """
-    Calculates Cosine loss between two tensors without reducing the sequence dim.
-
-    The tensors are assumed to be intermediate activations, so extra restrictions are in place.
-    """
-
-    def __init__(
-        self, model_config: TransformerConfig, projection_layer: Optional[nn.Module] = None
-    ):
-        """
-        Constructor.
-
-        Args:
-            model_config: MCore transformer config.
-            projection_layer: Module which projects student activations to teacher's hidden dim.
-        """
-        super().__init__(model_config, projection_layer=projection_layer)
-
-        if self._config.tensor_model_parallel_size > 1 and not self._config.sequence_parallel:
-            logger.warning(
-                "``HiddenStateCosineLoss`` only works with tensors with full hidden dim. Ensure the "
-                "tensor inputs meet this requirement or use `--sequence_parallel` if tensor parallel is enabled."
-            )
-
-    def forward(self, predictions: Tensor, targets: Tensor) -> Tensor:
-        """
-        Forward function.
-
-        Args:
-            predictions: Student model tensors (size [s, b, h])
-            targets: Teacher model tensors (size [s, b, h])
-
-        Returns:
-            Cosine loss of tensors (size [b, s])
-        """
-        predictions, targets = self.pre_forward(predictions, targets)
-
-        loss = F.cosine_embedding_loss(
-            predictions.view(-1, predictions.size(-1)),
-            targets.view(-1, targets.size(-1)),
-            targets.new_ones(1),
-            reduction="none",
-        )
-        loss = loss.view(*predictions.shape[:2])
-
-        # NOTE: Tensor sequence length is still split among TP ranks.
-        return self.post_forward(loss, is_sequence_parallel=self._config.sequence_parallel)
-
-
-class MSELoss(BaseLoss):
-    """Calculates MSE loss between two tensors without reducing the sequence dim."""
-
-    def forward(self, predictions: Tensor, targets: Tensor) -> Tensor:
-        """Forward function.
-
-        Args:
-            predictions: Student model tensors (size [s, b, h])
-            targets: Teacher model tensors (size [s, b, h])
-
-        Returns:
-            MSE loss of tensors (size [b, s])
-        """
-        predictions, targets = self.pre_forward(predictions, targets)
-
-        loss = F.mse_loss(predictions, targets, reduction="none")
-        loss = loss.mean(dim=-1)
-
-        return self.post_forward(loss, is_sequence_parallel=self._config.sequence_parallel)
-
-
-class LogitsKLLoss(BaseLoss):
-    """Calculates KL-Divergence loss between two logits tensors without reducing the sequence dim."""
-
-    def __init__(
-        self, model_config: TransformerConfig, temperature: float = 1.0, reverse: bool = False
-    ):
-        """
-        Constructor.
-
-        Args:
-            model_config: MCore transformer config.
-            temperature: Divide tensors by this value prior to calculating loss.
-            reverse: Whether to reverse the loss as KLD(teacher, student) instead of KLD(student, teacher)
-        """
-        super().__init__(model_config)
-        self._temperature = temperature
-        self._reverse = reverse
-
-    def forward(self, predictions: Tensor, targets: Tensor) -> Tensor:
-        """
-        Forward function.
-
-        Args:
-            predictions: Student model tensors (size [s, b, h])
-            targets: Teacher model tensors (size [s, b, h])
-
-        Returns:
-            KLD loss of tensors (size [b, s])
-        """
-        predictions, targets = self.pre_forward(predictions, targets)
-
-        # Division by temp should happen prior to finding max for both student and teacher.
-        # Currently we don't use temperature in any of ours runs (temp=1.0)
-        output_teacher = targets.float() / self._temperature
-        output_student = predictions.float() / self._temperature
-
-        # Compute local softmax, and the reweight to compute global softmax.
-        if self._config.tensor_model_parallel_size > 1:
-
-            # Maximum value along vocab dimension across all GPUs.
-            teacher_logits_max, _ = torch.max(output_teacher, dim=-1)
-            torch.distributed.all_reduce(
-                teacher_logits_max,
-                op=torch.distributed.ReduceOp.MAX,
-                group=get_tensor_model_parallel_group(),
-            )
-            output_teacher = output_teacher - teacher_logits_max.unsqueeze(dim=-1)
-
-            denom_teacher = torch.sum(torch.exp(output_teacher), dim=-1)
-            # We can't use standard reduction function here since the computation
-            # that follows it isn't identical across TP ranks.
-            denom_teacher = all_reduce_autograd(
-                denom_teacher, group=get_tensor_model_parallel_group()
-            )
-
-            # Maximum value along vocab dimension across all GPUs.
-            student_logits_max, _ = torch.max(output_student, dim=-1)
-            torch.distributed.all_reduce(
-                student_logits_max,
-                op=torch.distributed.ReduceOp.MAX,
-                group=get_tensor_model_parallel_group(),
-            )
-            output_student = output_student - student_logits_max.unsqueeze(dim=-1).detach()
-
-            denom_student = torch.sum(torch.exp(output_student), dim=-1)
-            denom_student = all_reduce_autograd(
-                denom_student, group=get_tensor_model_parallel_group()
-            )
-
-            slen, bsz, sharded_vocab_size = output_student.shape
-            student_log_prob = output_student - torch.log(denom_student).view(slen, bsz, 1).expand(
-                slen, bsz, sharded_vocab_size
-            )
-            teacher_log_prob = output_teacher - torch.log(denom_teacher).view(slen, bsz, 1).expand(
-                slen, bsz, sharded_vocab_size
-            )
-
-            if self._reverse:
-                loss = torch.sum(
-                    F.kl_div(teacher_log_prob, student_log_prob, reduction="none", log_target=True),
-                    dim=-1,
-                )
-            else:
-                loss = torch.sum(
-                    F.kl_div(student_log_prob, teacher_log_prob, reduction="none", log_target=True),
-                    dim=-1,
-                )
-
-        else:
-            if self._reverse:
-                loss = torch.sum(
-                    F.kl_div(
-                        F.log_softmax(output_teacher, dim=-1),
-                        F.softmax(output_student, dim=-1),
-                        reduction="none",
-                    ),
-                    dim=-1,
-                )
-            else:
-                loss = torch.sum(
-                    F.kl_div(
-                        F.log_softmax(output_student, dim=-1),
-                        F.softmax(output_teacher, dim=-1),
-                        reduction="none",
-                    ),
-                    dim=-1,
-                )
-
-        return self.post_forward(loss, tp_reduce=True)
-
-
-########################################################
-
-
-class LogitsAndIntermediatesLossBalancer(mtd.DistillationLossBalancer):
-    """
-    LossBalancer implementation for Logit and Intermediate losses.
-
-    Dynamically weighs distillation and original losses to balance during training.
-    """
-
-    def __init__(self, kd_loss_scale: float = 1.0, skip_original_loss: bool = False):
-        """Constructor.
-
-        Args:
-            kd_loss_scale: Multiply distillation losses by this before weighing.
-                (Not used when `skip_original_loss` is True.)
-            skip_original_loss: Used to signal whether the original loss should be used, regardless
-                of whether it was passed into ``mtd.DistillationModel.compute_kd_loss()`` or not.
-        """
-        super().__init__()
-        self._kd_loss_scale = kd_loss_scale
-        self._skip_original_loss = skip_original_loss
-
-    def forward(self, loss_dict: Dict[str, Tensor]) -> Tensor:
-        """Forward function.
-
-        Args:
-            loss_dict: All individual scalar losses, passed in during ``mtd.DistillationModel.compute_kd_loss()``
-
-        Returns:
-            Aggregate total scalar loss.
-        """
-        original_loss = loss_dict.pop(mtd.loss_balancers.STUDENT_LOSS_KEY)
-        for _key in loss_dict:
-            if _key.startswith(LogitsKLLoss.__name__):
-                logits_key = _key  # should only be one
-        logits_loss = loss_dict.pop(logits_key)
-        intermediate_loss = sum(loss_dict.values()) / max(len(loss_dict), 1)
-
-        if intermediate_loss > 0:
-            dynamic_scale = logits_loss.item() / intermediate_loss.item()
-            intermediate_loss_scaled = intermediate_loss * dynamic_scale
-            kd_loss_scale = self._kd_loss_scale / 2.0
-        else:
-            kd_loss_scale = self._kd_loss_scale
-            intermediate_loss = logits_loss.new_tensor(intermediate_loss)
-            intermediate_loss_scaled = intermediate_loss
-
-        if self._skip_original_loss:
-            total_loss = logits_loss + intermediate_loss_scaled
-        else:
-            kd_loss = (logits_loss + intermediate_loss_scaled) * kd_loss_scale
-            dynamic_scale = original_loss.item() / kd_loss.item()
-            total_loss = original_loss + kd_loss * dynamic_scale
-
-        out_dict = {
-            "kd_loss": total_loss,
-            "logits_loss": logits_loss,
-            "intermediate_loss": intermediate_loss,
-        }
-        return out_dict
-
-
-########################################################
-
-
-class ProjectionLayer(MegatronModule):
-    """Module to project student layer activations to teacher's size."""
-
-    def __init__(self, student_config: TransformerConfig, teacher_config: TransformerConfig):
-        """
-        Constructor.
-
-        Args:
-            student_config: Student's MCore transformer config.
-            teacher_config: Teacher's MCore transformer config.
-        """
-        super().__init__(config=student_config)
-        if student_config.hidden_size == teacher_config.hidden_size:
-            self._fit = nn.Identity()
-        else:
-            self._fit = nn.Linear(student_config.hidden_size, teacher_config.hidden_size)
-            self.apply(self._init_weights)
-            # Attribute below needed to reduce gradients during backward properly.
-            setattr(self._fit.weight, "sequence_parallel", self.config.sequence_parallel)
-            setattr(self._fit.bias, "sequence_parallel", self.config.sequence_parallel)
-
-    def forward(self, student_tensor: Tensor):
-        """
-        Forward function.
-
-        Args:
-            student_tensor: Tensor to be fit to teacher size.
-        """
-        return self._fit(student_tensor)
-
-    def _init_weights(self, module):
-        """Initialize the weights."""
-        if isinstance(module, nn.Linear):
-            module.weight.data.normal_(mean=0.0, std=0.01)
-            if module.bias is not None:
-                module.bias.data.zero_()
-
-
-class _AllReduce(torch.autograd.Function):
-    """Implementation from old PyTorch `torch.distributed.nn.parallel`."""
-
-    @staticmethod
-    def forward(ctx, op, group, tensor):
-        ctx.group, ctx.op = group, op
-        tensor = tensor.clone()
-        torch.distributed.all_reduce(tensor, op=op, group=group)
-        return tensor
-
-    @staticmethod
-    def backward(ctx, grad_output):
-        return (None, None, _AllReduce.apply(ctx.op, ctx.group, grad_output))
-
-
-def all_reduce_autograd(
-    tensor, op=torch.distributed.ReduceOp.SUM, group=torch.distributed.group.WORLD
-):
-    """Custom all-reduce function.
-
-    Needed instead of other all-reduce functions available when the computation following
-    the all-reduce call differs per rank. In KL loss, this corresponds to the different numerators.
-    """
-    return _AllReduce.apply(op, group, tensor)
-
-
-########################################################
-
-
-def adjust_distillation_model_for_mcore(model: mtd.DistillationModel, distill_cfg: Dict[str, Any]):
-    """Extra modifcations to ``mtd.DistillationModel`` requried for Megatron-Core."""
-
-    # HACK: Get rid of ModelOpt Distillation state
-    # NOTE: If re-placed, above losses need modifcation as `TransformerConfig` has non-pickleable elements.
-    mto.ModeloptStateManager(model)._state.pop()
-
-    # HACK: Hide teacher during `sharded_state_dict` method.
-    def _sharded_state_dict(self, *args, **kwargs) -> ShardedStateDict:
-        with self.hide_teacher_model():
-            return type(self).sharded_state_dict(self, *args, **kwargs)
-
-    model.sharded_state_dict = types.MethodType(_sharded_state_dict, model)
-
-    # HACK: Skip `lm_loss` bypassing it when training if not needed for backprop.
-    def _compute_language_model_loss(self, labels, logits) -> Tensor:
-        if distill_cfg["skip_lm_loss"] and self.training:
-            return torch.zeros_like(labels)
-        return type(self).compute_language_model_loss(self, labels, logits)
-
-    model.compute_language_model_loss = types.MethodType(_compute_language_model_loss, model)
-
-    # HACK: Skip `lm_loss` always for teacher.
-    def _compute_language_model_loss(self, labels, logits) -> Tensor:
-        return torch.zeros_like(labels)
-
-    model.teacher_model.compute_language_model_loss = types.MethodType(
-        _compute_language_model_loss, model.teacher_model
-    )
-
-    # HACK: Pipeline-parallel Distillation requires splitting input tensor into student and teacher parts.
-    def _set_student_input_tensor_shape(self, shapes: List[Tuple[int]]):
-        self._tensor_split_idx = shapes[0][-1]
-
-    def _set_input_tensor(self, input_tensors: List[Tensor]):
-        teacher_inputs = [t[..., self._tensor_split_idx:] if t is not None else t for t in input_tensors]
-        student_inputs = [t[..., :self._tensor_split_idx] if t is not None else t for t in input_tensors]
-        type(self).set_input_tensor(self.teacher_model, teacher_inputs)
-        type(self).set_input_tensor(self, student_inputs)
-
-    model.set_student_input_tensor_shape = types.MethodType(_set_student_input_tensor_shape, model)
-    model.set_input_tensor = types.MethodType(_set_input_tensor, model)
-
-    # HACK: Concatenate output tensors when PP>1 so they can be passed between ranks.
-    def _forward(self, *args, **kwargs):
-        if not self.training:
-            with self.only_student_forward():
-                return type(self).forward(self, *args, **kwargs)
-
-        with torch.no_grad():
-            self._teacher_model.eval()
-            teacher_output = self._teacher_model(*args, **kwargs)
-        with self.only_student_forward():
-            student_output = type(self).forward(self, *args, **kwargs)
-
-        if not is_pipeline_last_stage():
-            return torch.cat([student_output, teacher_output], dim=-1)
-        else:
-            return student_output
-
-    model.forward = types.MethodType(_forward, model)
-
-
-def get_tensor_shapes_adjust_fn_for_distillation(
-    model: Union[torch.nn.Module, List[torch.nn.Module]],
-    seq_length: int,
-    micro_batch_size: int,
-    decoder_seq_length: Optional[int] = None,
-    forward_only: bool = False,
-) -> Union[Callable, None]:
-    if (
-        forward_only
-        or get_pipeline_model_parallel_world_size() == 1
-        or get_virtual_pipeline_model_parallel_world_size() is not None
-    ):
-        return None
-    # Unwrap
-    if isinstance(model, list):
-        model = model[0]
-    while hasattr(model, "module"):
-        model = model.module
-    if not isinstance(model, mtd.DistillationModel):
-        return None
-
-    def adjust_tensor_shapes(recv_tensor_shapes: List[Tuple[int, ...]], send_tensor_shapes: List[Tuple[int, ...]]):
-        teacher_config = get_model_config(model.teacher_model)
-        tp_group = get_tensor_model_parallel_group()
-        cp_group = get_context_parallel_group()
-
-        teacher_recv_tensor_shapes = get_tensor_shapes(
-            seq_length=seq_length,
-            micro_batch_size=micro_batch_size,
-            decoder_seq_length=decoder_seq_length,
-            config=teacher_config,
-            tp_group=tp_group,
-            cp_group=cp_group,
-        )
-        teacher_send_tensor_shapes = get_tensor_shapes(
-            seq_length=seq_length,
-            micro_batch_size=micro_batch_size,
-            decoder_seq_length=decoder_seq_length,
-            config=teacher_config,
-            tp_group=tp_group,
-            cp_group=cp_group,
-        )
-        model.set_student_input_tensor_shape(recv_tensor_shapes)
-
-        for i, shape in enumerate(recv_tensor_shapes):
-            shape = list(shape)
-            shape[-1] += teacher_recv_tensor_shapes[0][-1]
-            recv_tensor_shapes[i] = tuple(shape)
-        for i, shape in enumerate(send_tensor_shapes):
-            shape = list(shape)
-            shape[-1] += teacher_send_tensor_shapes[0][-1]
-            send_tensor_shapes[i] = tuple(shape)
-
-        return recv_tensor_shapes, send_tensor_shapes
-
-    return adjust_tensor_shapes
diff --git a/megatron/post_training/checkpointing.py b/megatron/post_training/checkpointing.py
index aac59341e37..143cbb9c6ab 100644
--- a/megatron/post_training/checkpointing.py
+++ b/megatron/post_training/checkpointing.py
@@ -183,14 +183,7 @@ def _remove_prefix_state_dict_pre_hook(
             logger.warning(f"PyTorch version {get_torch_version()} below 2.6 detected."
                        f" Forcing dist_ckpt_save_pre_mcore_014 behavior.")
 
-        # NOTE: singleton_local_shards only take care of the weight and bias. There are be issue when linear_fc1._amax
-        #       is a matrix such as NVFP4 real quant, awq, and blockwise 128.
-        if args.dist_ckpt_save_pre_mcore_014 or force_pre_mcore_014:
-            metadata = {"singleton_local_shards": False}
-        else:
-            metadata = {"singleton_local_shards": True}
-
-        sharded_state_dict = unwrapped_model[0].sharded_state_dict(prefix=additional_sharded_prefix, metadata=metadata)
+        sharded_state_dict = unwrapped_model[0].sharded_state_dict(prefix=additional_sharded_prefix)
 
         if additional_sharded_prefix:
             unwrapped_model[0]._register_load_state_dict_pre_hook(
diff --git a/megatron/post_training/docs/distillation.md b/megatron/post_training/docs/distillation.md
index 6ca1ec18417..9f0d5524176 100644
--- a/megatron/post_training/docs/distillation.md
+++ b/megatron/post_training/docs/distillation.md
@@ -75,7 +75,7 @@ Model Optimizer modifies the model using the loss criterion present in the disti
 defines a loss function between two module attribute names of the teacher and student model, respectively.
 
 Default loss function used between logits is a KL-Divergence Loss and loss used among intermediate tensors is Cosine-Similarity,
-both defined in `megatron/inference/algos/distillation.py`.
+both defined in `modelopt.torch.distill.plugins.megatron`.
 
 ## Restrictions
 
diff --git a/megatron/post_training/generate.py b/megatron/post_training/generate.py
index 0c5be3eceab..2a124734a30 100644
--- a/megatron/post_training/generate.py
+++ b/megatron/post_training/generate.py
@@ -104,7 +104,7 @@ def simple_speculative_generate(
     input_ids: torch.Tensor,
     images: Optional[torch.Tensor] = None,
     osl: int = 32,
-    draft_length: int = 0,
+    steps: int = 0,
     eos_token_id: List[int] = [],
     disable_tqdm: bool = False,
 ):
@@ -127,7 +127,7 @@ def simple_speculative_generate(
 
         # Speculative decoding forward
         # NOTE: PP is not yet supported.
-        new_token, draft_tokens = model.pseudo_speculative_generate(input_ids, steps=draft_length)
+        new_token, draft_tokens = model.pseudo_speculative_generate(input_ids, steps=steps)
 
         # Always accept the first token.
         input_ids = output_ids[:, : offset]
@@ -138,6 +138,8 @@ def simple_speculative_generate(
         for i in range(draft_tokens.shape[-1]):
             if torch.equal(draft_tokens[:, i : i + 1], output_ids[:, offset: offset + 1]):
                 offset += 1
+            else:
+                break
 
         # Broadcast the accepted offset from the last rank.
         offset = [offset]
diff --git a/megatron/post_training/loss_func.py b/megatron/post_training/loss_func.py
index eb8dbca1c6a..9c99529172d 100644
--- a/megatron/post_training/loss_func.py
+++ b/megatron/post_training/loss_func.py
@@ -55,16 +55,18 @@ def loss_func(loss_mask: torch.Tensor, output_tensor: torch.Tensor, model: GPTMo
     num_tokens = loss_mask.sum().clone().detach().to(torch.int)
     report = {'lm loss': torch.cat([loss_lm.clone().detach().view(1), num_tokens.view(1)])}
 
-    if model.training and args.export_kd_teacher_load:
+    if args.export_kd_teacher_load:
         # [ModelOpt]: Handle knowledge distillation
         losses = model.compute_kd_loss(
             student_loss=loss_lm,
             loss_reduction_fn=lambda x: _mask_loss(x, loss_mask),
         )
-        loss = losses["kd_loss"]
 
         report["total loss"] = torch.cat([losses["kd_loss"].clone().detach().view(1), num_tokens.view(1)])
         report["logits distillation loss"] = torch.cat([losses["logits_loss"].clone().detach().view(1), num_tokens.view(1)])
         report["intermediate distillation loss"] = torch.cat([losses["intermediate_loss"].clone().detach().view(1), num_tokens.view(1)])
 
+        if model.training:
+            loss = losses["kd_loss"]
+
     return loss, num_tokens, report
diff --git a/megatron/post_training/model_builder.py b/megatron/post_training/model_builder.py
index 34daa279651..cb2654e7107 100644
--- a/megatron/post_training/model_builder.py
+++ b/megatron/post_training/model_builder.py
@@ -7,6 +7,8 @@
 from typing import Any, Dict
 
 import modelopt.torch.distill as mtd
+import modelopt.torch.distill.plugins.megatron as mtd_mcore
+import modelopt.torch.opt as mto
 import yaml
 
 from megatron.core.models.gpt import GPTModel as MCoreGPTModel
@@ -18,7 +20,6 @@
 from megatron.core.post_training.modelopt.gpt.state_dict_hooks import (
     mcore_gpt_load_te_state_dict_pre_hook,
 )
-from megatron.post_training.algos import distillation
 from megatron.post_training.checkpointing import load_modelopt_checkpoint, load_modelopt_state
 from megatron.training import get_args, print_rank_0
 from megatron.training.arguments import core_transformer_config_from_args
@@ -285,7 +286,7 @@ def modelopt_gpt_mamba_builder(args, pre_process, post_process, vp_stage=None, c
             ), "ModelOpt Distillation currently incompatible with interleaved pipeline schedule."
 
         teacher_config = _load_teacher_model_config(args.export_kd_teacher_load)
-        distill_cfg = distillation.load_distillation_config(
+        distill_cfg = mtd_mcore.setup_distillation_config(
             args.export_kd_cfg, student_cfg=config, teacher_cfg=core_transformer_config_from_args(teacher_config)
         )
         if "hybrid_override_pattern" in teacher_config and args.is_hybrid_model:
@@ -297,14 +298,15 @@ def modelopt_gpt_mamba_builder(args, pre_process, post_process, vp_stage=None, c
 
         kd_config = {
             "teacher_model": (_teacher_provider, [teacher_config, model_kwargs], {}),
-            "criterion": distill_cfg["criterion"],
-            "loss_balancer": distill_cfg["loss_balancer"],
+            "criterion": distill_cfg.criterion,
+            "loss_balancer": distill_cfg.loss_balancer,
         }
         model = mtd.convert(model, mode=[("kd_loss", kd_config)])
 
-        # Additional tweaks needed for MCore/Nemo.
-        # NOTE: Distillation state manually removed in this function.
-        # ModelOpt state restoration above will not return a `mtd.DistillationModel` for simplicity reasons.
-        distillation.adjust_distillation_model_for_mcore(model, distill_cfg)
+        # Additional tweaks needed for MCore.
+        # (accounts for sharded state, pipeline parallel, and potentially skipping LM loss)
+        mtd_mcore.adjust_distillation_model_for_mcore(model, distill_cfg)
+        # Also remove KD mode state to prevent issues with re-conversion after restore.
+        mto.ModeloptStateManager(model).state_dict().pop()  # TODO(aanoosheh): remove once fixed in ModelOpt
 
     return model
diff --git a/megatron/post_training/non_loss_data_func.py b/megatron/post_training/non_loss_data_func.py
index 49fb9220258..49c29b4912c 100644
--- a/megatron/post_training/non_loss_data_func.py
+++ b/megatron/post_training/non_loss_data_func.py
@@ -8,10 +8,11 @@
 from megatron.training.utils import unwrap_model
 
 
-def report_draft_acceptance_length(model, osl: int = 64, draft_length: int = 7):
+def report_draft_acceptance_length(model, osl: int = 64, draft_steps: int = 7):
     """Report MTBench acceptance length."""
     tokenizer = get_tokenizer()._tokenizer
     unwrapped_model = unwrap_model(model)[0]
+    parallel_draft_step = unwrapped_model.eagle_config.parallel_draft_step if hasattr(unwrapped_model, "eagle_config") else 1
 
     if unwrapped_model.training:
         return
@@ -33,15 +34,15 @@ def report_draft_acceptance_length(model, osl: int = 64, draft_length: int = 7):
             conversations, return_tensors="pt", add_generation_prompt=True
         ).to(torch.cuda.current_device())
         output_ids, actual_osl, steps = simple_speculative_generate(
-            unwrapped_model, input_ids, osl=osl, draft_length=draft_length, disable_tqdm=True
+            unwrapped_model, input_ids, osl=osl, steps=draft_steps, disable_tqdm=True
         )
         total_osl += actual_osl
         total_steps += steps
         if torch.distributed.get_rank() == 0:
             al = actual_osl / steps
-            ar = al / draft_length
+            ar = al / (draft_steps + parallel_draft_step - 1)
             print(
-                "Rank {:3}/{:3} {:12} AL {:.1f} AR {:.2f} STEPS {:5}/{:5} DRAFT {:2}".format(
+                "Rank {:3}/{:3} {:12} AL {:.1f} AR {:.2f} STEPS {:5}/{:5} DRAFT {:2} PARALLEL {:2}".format(
                     torch.distributed.get_rank(),
                     torch.distributed.get_world_size(),
                     category,
@@ -49,15 +50,16 @@ def report_draft_acceptance_length(model, osl: int = 64, draft_length: int = 7):
                     ar,
                     steps,
                     actual_osl,
-                    draft_length,
+                    draft_steps,
+                    parallel_draft_step,
                 ),
                 flush=True,
             )
     if torch.distributed.get_rank() == 0:
         al = total_osl / total_steps
-        ar = al / draft_length
+        ar = al / (draft_steps + parallel_draft_step - 1)
         print(
-            "Rank {:3}/{:3} {:12} AL {:.1f} AR {:.2f} STEPS {:5}/{:5} DRAFT {:2}".format(
+            "Rank {:3}/{:3} {:12} AL {:.1f} AR {:.2f} STEPS {:5}/{:5} DRAFT {:2} PARALLEL {:2}".format(
                 torch.distributed.get_rank(),
                 torch.distributed.get_world_size(),
                 "average",
@@ -65,7 +67,8 @@ def report_draft_acceptance_length(model, osl: int = 64, draft_length: int = 7):
                 ar,
                 total_steps,
                 total_osl,
-                draft_length,
+                draft_steps,
+                parallel_draft_step,
             ),
             flush=True,
         )
diff --git a/megatron/post_training/utils.py b/megatron/post_training/utils.py
index 5d9f301cd41..4bec8c96cf1 100644
--- a/megatron/post_training/utils.py
+++ b/megatron/post_training/utils.py
@@ -1,5 +1,6 @@
 # Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
 
+import os
 import torch
 from datasets import load_dataset
 
@@ -34,7 +35,7 @@ def mtbench_to_oai_chat(example):
         example["conversations"] = conversations
         return example
 
-    dataset = load_dataset("HuggingFaceH4/mt_bench_prompts", split="train")
+    dataset = load_dataset("HuggingFaceH4/mt_bench_prompts", split="train", token=os.environ.get("HF_TOKEN", None))
     return dataset.map(mtbench_to_oai_chat)
 
 def to_empty_if_meta(module: torch.nn.Module, *, device: torch.device, recurse=True):
diff --git a/megatron/rl/inference/megatron.py b/megatron/rl/inference/megatron.py
index 58613b364a6..ad22bd14ac9 100644
--- a/megatron/rl/inference/megatron.py
+++ b/megatron/rl/inference/megatron.py
@@ -5,10 +5,11 @@
 from argparse import Namespace
 
 from pydantic import PrivateAttr
+import torch.distributed as dist
 
 from megatron.core import parallel_state
+from megatron.core.inference.inference_client import InferenceClient
 from megatron.core.inference.contexts.dynamic_context import DynamicInferenceContext
-from megatron.core.inference.coordinator import DynamicEngineCoordinator
 from megatron.core.inference.engines.abstract_engine import AbstractEngine
 from megatron.core.inference.engines.dynamic_engine import DynamicInferenceEngine
 from megatron.core.inference.engines.mcore_engine import MCoreEngine
@@ -23,9 +24,11 @@
     SimpleTextGenerationController,
 )
 from megatron.core.models.gpt.gpt_model import GPTModel
+from megatron.core.ssm.mamba_hybrid_layer_allocation import Symbols
 from megatron.core.transformer.module import MegatronModule
-from megatron.core.utils import log_single_rank
+from megatron.core.utils import get_mamba_inference_state_config_from_model, log_single_rank
 from megatron.training.global_vars import get_args, get_tokenizer
+from megatron.training import get_wandb_writer
 
 from ..inference.inference_interface import (
     ChatInferenceInterface,
@@ -102,38 +105,36 @@ def get_dynamic_inference_engine(args: Namespace, model: MegatronModule, inferen
     """
     tokenizer = get_tokenizer()
 
-    num_cuda_graphs = None
-    if args.enable_cuda_graph:
-        num_cuda_graphs = args.inference_dynamic_batching_num_cuda_graphs
+    enable_cuda_graph = args.cuda_graph_impl == "local"
 
-    module = model.module.module if hasattr(model.module, "module") else model.module
+    mamba_inference_state_config = get_mamba_inference_state_config_from_model(model)
 
     # Inference context.
     inference_context = DynamicInferenceContext(
         params_dtype=args.params_dtype,
-        num_layers=args.num_layers,
+        num_layers=args.num_layers // args.pipeline_model_parallel_size,
         kv_channels=args.kv_channels,
         num_attention_heads=(
             args.num_query_groups if args.group_query_attention else args.num_attention_heads
         ),
         max_sequence_length=args.inference_max_seq_length,
-        num_cuda_graphs=num_cuda_graphs,
+        num_cuda_graphs=(
+            args.inference_dynamic_batching_num_cuda_graphs
+            if enable_cuda_graph
+            else None
+        ),
+        block_size_tokens=args.inference_dynamic_batching_block_size,
         buffer_size_gb=args.inference_dynamic_batching_buffer_size_gb,
-        buffer_guaranteed_fraction=args.inference_dynamic_batching_buffer_guaranteed_fraction,
-        chunk_size_tokens=args.inference_dynamic_batching_chunk_size,
-        buffer_overflow_factor=args.inference_dynamic_batching_buffer_overflow_factor,
-        max_requests_override=args.inference_dynamic_batching_max_requests_override,
-        max_tokens_override=args.inference_dynamic_batching_max_tokens_override,
+        max_tokens=args.inference_dynamic_batching_max_tokens,
         tensor_model_parallel_size=args.tensor_model_parallel_size,
         materialize_only_last_token_logits=True,
-        unified_memory_kvcache=args.inference_dynamic_batching_unified_memory_kvcache,
-        is_hybrid_model=args.is_hybrid_model,
-        layer_type_list=module.decoder.layer_type_list if args.is_hybrid_model else None,
-        mamba_head_dim=args.mamba_head_dim,
-        mamba_num_groups=args.mamba_num_groups,
-        mamba_d_model=args.hidden_size,
-        mamba_d_conv=4 if args.is_hybrid_model else None,
-        mamba_d_state=args.mamba_state_dim,
+        mamba_inference_state_config=mamba_inference_state_config,
+        cache_mla_latent=args.multi_latent_attention and args.cache_mla_latents,
+        kv_lora_rank=args.kv_lora_rank if args.multi_latent_attention else None,
+        qk_pos_emb_head_dim=args.qk_pos_emb_head_dim,
+        use_cuda_graphs_for_non_decode_steps=not args.decode_only_cuda_graphs,
+        use_flashinfer_fused_rope=None,
+        unified_memory_level=args.inference_dynamic_batching_unified_memory_level,
         metrics_writer=metrics_writer,
     )
 
@@ -150,7 +151,7 @@ def get_dynamic_inference_engine(args: Namespace, model: MegatronModule, inferen
     return DynamicInferenceEngine(
         controller=text_generation_controller,
         context=inference_context,
-        enable_cuda_graph=args.enable_cuda_graph,
+        enable_cuda_graph=enable_cuda_graph,
         random_seed=args.seed,
         inference_logging_step_interval=inference_logging_step_interval,
     )
@@ -159,9 +160,8 @@ def get_dynamic_inference_engine(args: Namespace, model: MegatronModule, inferen
 class MegatronLocal(InferenceServer, ReturnsTokens, ReturnsRaw):
     """Interface to use MCoreEngine directly as an inference engine."""
 
-    _coordinator: DynamicEngineCoordinator = PrivateAttr(None)
-    _engine_task: asyncio.Task = PrivateAttr(None)
-    _kill_engine: bool = PrivateAttr(False)
+    _client: InferenceClient = PrivateAttr(None)
+    _inference_engine: DynamicInferenceEngine = PrivateAttr(None)
 
     async def base_generate(self, request: InferenceRequest):
 
@@ -174,25 +174,29 @@ async def base_generate(self, request: InferenceRequest):
             isinstance(p, str) for p in request.prompt
         ), "MegatronLocal only supports string prompts."
 
+        assert self._client is not None, "Client is not initialized"
+
         tokenizer = get_tokenizer()
 
         sampling_params = SamplingParams(
-            num_tokens_to_generate=request.generation_args.max_tokens or 1024,
+            num_tokens_to_generate=None,
+            num_tokens_total=request.generation_args.max_tokens,
             temperature=request.generation_args.temperature or 1.0,
             top_k=request.generation_args.top_k or 0,
             top_p=request.generation_args.top_p or 0.0,
-            termination_id=self._coordinator.engine.controller.tokenizer.eod,
+            termination_id=self._inference_engine.controller.tokenizer.eod,
             return_log_probs=True,
             skip_prompt_log_probs=True,
             add_BOS=tokenizer.bos is not None,
         )
-        request_ids = [
-            self._coordinator.schedule_request(prompt=prompt, sampling_params=sampling_params)
+        requests = [
+            self._client.add_request(prompt=prompt, sampling_params=sampling_params)
             for prompt in request.prompt
         ]
-        responses = await asyncio.gather(
-            *[self._coordinator.get_response(id) for id in request_ids]
+        records = await asyncio.gather(
+            *requests
         )
+        responses = [record[-1] for record in records]
         return [
             InferenceResponse(
                 response=r.generated_text,
@@ -229,28 +233,32 @@ async def launch(cls, model: GPTModel, **kwargs):
                            "wandb module is available. Inference logging will be disabled.")
 
         inference_engine: DynamicInferenceEngine = get_dynamic_inference_engine(args, model, inference_logging_step_interval, metrics_writer)
-        coordinator = DynamicEngineCoordinator(
-            inference_engine,
-            inference_max_requests=inference_engine.context.max_requests,
-            log_level=0,
-        )
+        await inference_engine.start_listening_to_data_parallel_coordinator(inference_coordinator_port=41521, launch_inference_coordinator=True)
+        if dist.get_rank() == 0:
+            # TODO: We have to do this only on the rank 0 process, should be fixed in the future when we have support for multiple inference clients. !2278
+            client = InferenceClient(inference_coordinator_port=41521)
+            await client.start()
+        else:
+            client = None
         launched_server = cls(**kwargs)
-        launched_server._coordinator = coordinator
-
-        loop = asyncio.get_running_loop()
-
-        coordinator.startup(loop)
+        launched_server._client = client
+        launched_server._inference_engine = inference_engine
 
         return launched_server
 
     async def kill(self):
-        await self._coordinator.shutdown()
+        if dist.get_rank() == 0:
+            await self._client.stop_engines()
+        await self._inference_engine.stopped.wait()
 
     async def suspend(self):
-        await self._coordinator.suspend_engine()
-
-    def resume(self):
-        self._coordinator.resume_engine()
-
+        if dist.get_rank() == 0:
+            await self._client.pause_engines()
+        await self._inference_engine.paused.wait()
+
+    async def resume(self):
+        if dist.get_rank() == 0:
+            self._client.unpause_engines()
+        await self._inference_engine.running.wait()
 
 class MegatronChatLocal(ChatInferenceInterface, MegatronLocal): ...
diff --git a/megatron/rl/rl_utils.py b/megatron/rl/rl_utils.py
index c0992778d57..11e005f74af 100644
--- a/megatron/rl/rl_utils.py
+++ b/megatron/rl/rl_utils.py
@@ -24,7 +24,7 @@
 
 from megatron.core import mpu
 from megatron.core.datasets.megatron_tokenizer import MegatronLegacyTokenizer
-from megatron.core.inference.utils import get_event_loop
+from megatron.core.utils import get_asyncio_loop
 from megatron.core.models.common.language_module.language_module import LanguageModule
 from megatron.core.num_microbatches_calculator import get_num_microbatches
 from megatron.core.optimizer import MegatronOptimizer
@@ -607,11 +607,11 @@ def get_environment_rollouts(
     ), "n_prompts must be divisible by data_parallel_world_size"
 
     with nvtx_range("rollout-collection"):
-        loop = get_event_loop()
+        loop = get_asyncio_loop()
         with megatron_rl_inference_mode(
             model,
             optimizer,
-            args.enable_cuda_graph,
+            args.cuda_graph_impl,
             args.rl_reset_cuda_graphs,
             args.rl_offload_optimizer_during_inference,
             args.rl_offload_kv_cache_during_training,
@@ -1006,7 +1006,7 @@ def prepare_trajectories(
     args = get_args()
     # Only process if we have inference_logprobs
     if inference_logprobs and any(lp is not None for lp in inference_logprobs):
-        if args.use_sequence_packing:
+        if args.rl_use_sequence_packing:
             # For sequence packing, we need to pad all logprobs to the same size
             padded_logprobs = []
             for logprobs in inference_logprobs:
@@ -1207,14 +1207,14 @@ def prepare_data_for_update(
         # [g, group_size]
         # Making an assumption that all groups are of the same size!
         # For packing mode, use all rollouts to compute rewards
-        rollouts_for_rewards = all_rollouts if args.use_sequence_packing else rollouts
+        rollouts_for_rewards = all_rollouts if args.rl_use_sequence_packing else rollouts
         rewards = torch.tensor(
             [[rollout.reward for rollout in group] for group in rollouts_for_rewards], device='cpu'
         )
 
         # We flatten them for logging.
         with nvtx_range("prepare_trajectories"):
-            if args.use_sequence_packing:
+            if args.rl_use_sequence_packing:
                 trajs, generation_masks, inference_logprobs = prepare_packed_trajectories(
                     all_rollouts, tokenizer, args
                 )
@@ -1228,14 +1228,14 @@ def prepare_data_for_update(
         # Sequence packing or standard processing
         packing_context = {}  # Store all packing-related data
 
-        if args.use_sequence_packing:
+        if args.rl_use_sequence_packing:
             with nvtx_range("sequence_packing"):
                 timers('sequence-packing-overhead', log_level=1).start()
 
-                bin_size = args.sequence_packing_bin_size
+                bin_size = args.rl_sequence_packing_bin_size
 
                 # Create packer with max sequences per bin limit to prevent extreme imbalance
-                max_sequences_per_bin = getattr(args, 'sequence_packing_max_sequences_per_bin', 100)
+                max_sequences_per_bin = getattr(args, 'rl_sequence_packing_max_sequences_per_bin', 100)
                 packer = SequencePacker(
                     bin_size=bin_size,
                     pad_token=tokenizer.pad,
@@ -1276,7 +1276,7 @@ def prepare_data_for_update(
                 world_size = mpu.get_expert_data_parallel_world_size()
 
                 # Choose distribution algorithm based on args.sequence_packing_algo
-                packing_algo = getattr(args, 'sequence_packing_algo', 'fifo')
+                packing_algo = getattr(args, 'rl_sequence_packing_algo', 'fifo')
 
                 if packing_algo == 'round-robin':
                     # Round-robin assignment: rank i gets bins [i, i+world_size, i+2*world_size, ...]
@@ -1596,7 +1596,7 @@ def prepare_data_for_update(
             )
             original_loss_mask[~generation_masks] = 0.0
 
-        if not args.use_sequence_packing:
+        if not args.rl_use_sequence_packing:
             # Use original masks if not packing
             attention_mask = original_attention_mask
             loss_mask = original_loss_mask
@@ -1606,7 +1606,7 @@ def prepare_data_for_update(
             timers('compute-logprobs', log_level=0).start()
             # Before we can update the model, we need to get the logprobs for the \pi_{old} model.
             # Use packed sequences if packing is enabled for performance benefits
-            if args.use_sequence_packing and 'packed_trajs' in packing_context:
+            if args.rl_use_sequence_packing and 'packed_trajs' in packing_context:
                 compute_trajs = packing_context['packed_trajs']
                 compute_position_ids = packing_context['packed_position_ids']
                 compute_attention_mask = packing_context['packed_attention_mask']
@@ -1661,7 +1661,7 @@ def prepare_data_for_update(
         if (
             inference_logprobs is not None
             and args.rl_inference_logprobs_is_correction
-            and not args.use_sequence_packing
+            and not args.rl_use_sequence_packing
         ):
             inference_logprobs = align_unpacked_inference_logprobs(
                 inference_logprobs=inference_logprobs,
@@ -1670,14 +1670,14 @@ def prepare_data_for_update(
                 group_stats=group_stats,
             )
         else:
-            if not args.use_sequence_packing:
+            if not args.rl_use_sequence_packing:
                 # Keep inference_logprobs as None instead of zeros
                 inference_logprobs = None
             # For sequence packing, inference_logprobs will be handled separately
 
         # Handle packing of inference_logprobs for sequence packing mode
         if (
-            args.use_sequence_packing
+            args.rl_use_sequence_packing
             and inference_logprobs is not None
             and args.rl_inference_logprobs_is_correction
         ):
@@ -1687,7 +1687,7 @@ def prepare_data_for_update(
                     inference_logprobs=inference_logprobs,
                     packing_info=packing_context['packing_info'],
                     generation_masks=generation_masks,
-                    bin_size=args.sequence_packing_bin_size,
+                    bin_size=args.rl_sequence_packing_bin_size,
                 )
 
                 # Store packed inference logprobs in packing context
@@ -1754,7 +1754,7 @@ def prepare_data_for_update(
 
             timers('prepare-advantages').stop()
         with nvtx_range("create_dataloader"):
-            if args.use_sequence_packing:
+            if args.rl_use_sequence_packing:
                 # Store packing context in runtime state for forward_step
                 runtime_state = get_rl_runtime_state()
                 runtime_state.packing_context = packing_context
@@ -2049,14 +2049,14 @@ def evaluate_and_print_results_rl(
         with megatron_rl_inference_mode(
             model,
             optimizer,
-            args.enable_cuda_graph,
+            args.cuda_graph_impl,
             args.rl_reset_cuda_graphs,
             args.rl_offload_optimizer_during_inference,
             args.rl_offload_kv_cache_during_training,
             args.rl_remove_kv_cache_during_training,
         ) as inference_interface:
 
-            loop = get_event_loop()
+            loop = get_asyncio_loop()
 
             rank = torch.distributed.get_rank()
             if rank == 0:
@@ -2230,7 +2230,7 @@ def calculate_grpo_loss(
 def megatron_rl_inference_mode(
     model: list[LanguageModule],
     optimizer: MegatronOptimizer,
-    enable_cuda_graph: bool,
+    cuda_graph_impl: str,
     reset_cuda_graphs: bool,
     offload_optimizer_during_inference: bool,
     offload_kv_cache_during_training: bool,
@@ -2241,7 +2241,7 @@ def megatron_rl_inference_mode(
     Args:
         model: model to prepare.
         optimizer: optimizer used to train the model.
-        enable_cuda_graph: use cuda graphs or not.
+        cuda_graph_impl: which cuda graph implementation to use.
         reset_cuda_graphs: rebuild cuda graphs for each inference stage or not.
         offload_optimizer_during_inference: move optimizer to cpu during inference or not.
         offload_kv_cache_during_training: manually offload kv cache to host before training or not.
@@ -2252,7 +2252,7 @@ def megatron_rl_inference_mode(
 
     """
     args = get_args()
-    loop = get_event_loop()
+    loop = get_asyncio_loop()
     nvtx_range = get_nvtx_range()
 
     print(f"[{dist.get_rank()}:DP] Entering inference mode")
@@ -2275,8 +2275,9 @@ def megatron_rl_inference_mode(
             with nvtx_range("offload-optimizer-before-inference"):
                 optimizer.offload_to_cpu()
 
-        if enable_cuda_graph:
-            toggle_cuda_graphs(lang_module, True, reset_cuda_graphs=reset_cuda_graphs)
+        # TODO: Remove this if statement once a change to `toggle_cuda_graphs` makes it safe to.
+        if cuda_graph_impl != "none":
+            toggle_cuda_graphs(lang_module, cuda_graph_impl, reset_cuda_graphs=reset_cuda_graphs)
 
         inference_interface = get_inference_interface(args, loop, model)
 
@@ -2286,25 +2287,28 @@ def megatron_rl_inference_mode(
                     reset_cuda_graphs
                 ), "reset_cuda_graphs must be True when offloading kv cache during training"
                 print(
-                    f"[{dist.get_rank()}:DP] Restoring kv cache ({inference_interface._coordinator.engine.context.memory_buffer.numel() / 1024**3:.2f} GB) to GPU"
+                    f"[{dist.get_rank()}:DP] Restoring kv cache ({inference_interface._inference_engine.context.memory_buffer.numel() / 1024**3:.2f} GB) to GPU"
                 )
-                kv_cache = inference_interface._coordinator.engine.context.memory_buffer
-                inference_interface._coordinator.engine.context.memory_buffer = kv_cache.cuda()
+                kv_cache = inference_interface._inference_engine.context.memory_buffer
+                inference_interface._inference_engine.context.memory_buffer = kv_cache.cuda()
             elif remove_kv_cache_during_training:
-                if inference_interface._coordinator.engine.context.memory_buffer is None:
-                    inference_interface._coordinator.engine.context.build_memory_buffer()
+                if inference_interface._inference_engine.context.memory_buffer is None:
+                    inference_interface._inference_engine.context.build_memory_buffer()
 
-        if enable_cuda_graph and not _CudagraphGlobalRecord.cudagraph_created:
+        # TODO: Improve this if statement once a change is made to CUDA graph handling.
+        cuda_graph_exists = len(_CudagraphGlobalRecord.cudagraph_inference_record) != 0
+        if cuda_graph_impl != "none" and not cuda_graph_exists:
             with nvtx_range("wait-for-decode-only"):
-                while not inference_interface._coordinator.engine.context.is_decode_only():
+                while not inference_interface._inference_engine.context.is_decode_only():
                     active_requests, finished_requests, step_time = loop.run_until_complete(
-                        inference_interface._coordinator.engine.async_step()
+                        inference_interface._inference_engine.async_step()
                     )
             with nvtx_range("build-cuda-graphs"):
-                inference_interface._coordinator.engine.build_cuda_graphs(reset_context=False)
+                inference_interface._inference_engine.create_cuda_graphs(reset_context=True)
 
-        inference_interface.resume()
+        loop.run_until_complete(inference_interface.resume())
 
+        print(f"[{dist.get_rank()}:DP] Entered inference mode")
         yield inference_interface
 
         with nvtx_range("suspend-engine"):
@@ -2312,16 +2316,17 @@ def megatron_rl_inference_mode(
 
         with nvtx_range("offload-kv-cache-after-inference"):
             if offload_kv_cache_during_training:
-                kv_cache = inference_interface._coordinator.engine.context.memory_buffer
+                kv_cache = inference_interface._inference_engine.context.memory_buffer
                 print(
                     f"[{dist.get_rank()}:DP] Offloading kv cache ({kv_cache.numel() * kv_cache.element_size() / 1024**3:.2f} GB) to CPU"
                 )
-                inference_interface._coordinator.engine.context.memory_buffer = kv_cache.cpu()
+                inference_interface._inference_engine.context.memory_buffer = kv_cache.cpu()
             elif remove_kv_cache_during_training:
-                inference_interface._coordinator.engine.context.memory_buffer = None
+                inference_interface._inference_engine.context.memory_buffer = None
 
-        if enable_cuda_graph:
-            toggle_cuda_graphs(lang_module, False, reset_cuda_graphs=reset_cuda_graphs)
+        # TODO: Remove this if statement once a change to `toggle_cuda_graphs` makes it safe to.
+        if cuda_graph_impl != "none":
+            toggle_cuda_graphs(lang_module, 'none', reset_cuda_graphs=reset_cuda_graphs)
 
         if offload_optimizer_during_inference:
             with nvtx_range("onload-optimizer-after-inference"):
@@ -2348,7 +2353,7 @@ def get_iteration_sequence_count(args):
 
 def update_sequence_packing_metrics(args):
     """Update bin tracking for sequence packing mode."""
-    if args.use_sequence_packing:
+    if args.rl_use_sequence_packing:
         bin_count = (
             mpu.get_data_parallel_world_size() * args.micro_batch_size * get_num_microbatches()
         )
diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py
index bb1b17e9ba2..be667e32419 100644
--- a/megatron/training/arguments.py
+++ b/megatron/training/arguments.py
@@ -9,7 +9,6 @@
 from pathlib import Path
 import re
 import types
-import warnings
 
 import torch
 import torch.nn.functional as F
@@ -35,6 +34,7 @@
 )
 from megatron.core.activations import squared_relu
 from megatron.core.fusions.fused_bias_geglu import quick_gelu
+from megatron.training.dist_signal_handler import SIGNAL_MAP
 from megatron.training.utils import (
     get_device_arch_version,
     update_use_dist_ckpt,
@@ -1062,8 +1062,6 @@ def validate_args(args, defaults={}):
     # MoE Spec check
     if args.num_experts == 0:
         args.num_experts = None
-    if args.num_experts is not None:
-        assert args.spec is None, "Model Spec must be None when using MoEs"
     if args.num_experts is not None and args.moe_ffn_hidden_size is None:
         args.moe_ffn_hidden_size = args.ffn_hidden_size
         print("Warning: moe_ffn_hidden_size is not set, using ffn_hidden_size for MoE instead.")
@@ -1108,6 +1106,20 @@ def validate_args(args, defaults={}):
            any([args.train_data_path, args.valid_data_path, args.test_data_path]) \
            <= 1, "A single data source must be provided in training mode, else None"
 
+    if args.fim_data:
+        extra_tokens = [
+            args.fim_prefix_token,
+            args.fim_middle_token,
+            args.fim_suffix_token,
+            args.fim_pad_token,
+            args.fim_eod_token,
+        ]
+        assert not args.mock_data, "Mock dataset is not supported with FIM dataset."
+        assert not args.legacy_tokenizer, "FIM dataset is not supported with legacy tokenizers."
+        assert args.fim_rate, "--fim-rate should be specified."
+        assert args.fim_spm_rate, "--fim-spm-rate should be specified."
+        assert all(token is not None for token in extra_tokens), "FIM extra tokens should be specified."
+
     # Deterministic mode
     if args.deterministic_mode:
         assert not args.use_flash_attn, "Flash attention can not be used in deterministic mode."
@@ -1182,7 +1194,6 @@ def validate_args(args, defaults={}):
     if args.inference_dynamic_batching:
         assert args.inference_dynamic_batching_buffer_size_gb is not None
         assert args.inference_dynamic_batching_block_size % 256 == 0, "block size should be a multiple of 256"
-        assert args.inference_dynamic_batching_buffer_guaranteed_fraction is not None
 
     # MoE upcycling check
     if args.moe_use_upcycling:
@@ -1407,7 +1418,7 @@ def _add_transformer_engine_args(parser):
                        help='Execute wgrad in higher precision even for FP8 runs',
                        dest='fp8_wgrad')
     group.add_argument('--transformer-impl', default='transformer_engine',
-                       choices=['local', 'transformer_engine'],
+                       choices=['local', 'transformer_engine', 'inference_optimized'],
                        help='Which Transformer implementation to use.')
     group.add_argument('--fallback-to-eager-attn', action='store_true',
                        help='Fallback to eager attention in TE implementation. '
@@ -1516,34 +1527,22 @@ def _add_inference_args(parser):
                        help='Enable dynamic batching mode.')
     group.add_argument('--inference-dynamic-batching-buffer-size-gb',
                        type=float, default=40.,
-                       help='Total buffer size (GB) allocated for the block-level KV '
-                       'memory.')
+                       help='Amount of on-GPU memory allocated for the KV cache. '
+                       'The total amount of memory allocated for the KV cache '
+                       '(CPU + GPU memory) depends on the value set for the '
+                       'unified virtual memory (UVM) level (via '
+                       '`--inference-dynamic-batching-unified-memory-level`).'
+                       'If the UVM level is 0, then only GPU memory is used and '
+                       'the total memory equals `buffer_size_gb`. If the UVM '
+                       'level is 1, then additional memory is utilized on the '
+                       'CPU and the total memory equals `2 * buffer_size_gb`.')
     group.add_argument('--inference-dynamic-batching-block-size',
                        type=int, default=256,
                        help='KV cache block size. '
                        'It should be a multiple of 256')
-    group.add_argument('--inference-dynamic-batching-buffer-guaranteed-fraction',
-                       type=float, default=0.2,
-                       help='Space is reserved within the inference context '
-                       'memory buffer to guarantee that a minimum number of '
-                       'active requests will always be able to run to '
-                       'completion. This is to avoid the context being deadlocked '
-                       'by paused requests.')
-    group.add_argument('--inference-dynamic-batching-buffer-overflow-factor',
-                       type=float, default=None,
-                       help='Scaling factor over the memory buffer size for auto '
-                       'computing `max_requests` and `max_tokens`. This scaling '
-                       'factor is used for fitting more requests and tokens in '
-                       'the memory buffer than it can safely hold, which in turn '
-                       'increases throughput.')
-    group.add_argument('--inference-dynamic-batching-max-requests-override',
-                       type=int, default=None,
-                       help='If set, this overrides the max requests as computed '
-                       'from `--inference-dynamic-batching-buffer-overflow-factor`.')
-    group.add_argument('--inference-dynamic-batching-max-tokens-override',
+    group.add_argument('--inference-dynamic-batching-max-tokens',
                        type=int, default=None,
-                       help='If set, this overrides the max tokens as computed '
-                       'from `--inference-dynamic-batching-buffer-overflow-factor`.')
+                       help='Override the inference context\'s default `max_tokens`.')
     group.add_argument('--inference-dynamic-batching-num-cuda-graphs',
                        type=int, default=16,
                        help='Maximum number of cuda graphs to capture, where the '
@@ -1560,7 +1559,7 @@ def _add_inference_args(parser):
                        action='store_true', default=False,
                        help='Only use cuda graphs for decode-only steps, not prefill and mixed steps.')
     group.add_argument('--inference-dynamic-batching-unified-memory-level',
-                       type=int, default=0, choices=[0, 1],
+                       type=int, default=1, choices=[0, 1],
                        help='Set unified memory usage within the dynamic '
                        'inference context. The levels are: 0) no unified memory, '
                        '1) allocate `memory_buffer` in unified memory. '
@@ -1580,7 +1579,8 @@ def _add_inference_args(parser):
     group.add_argument('--inference-wandb-logging-step-interval', type=int, default=0,
                        help='Step interval for logging inference metrics to wandb. '
                             'Default to 0 to disable inference wandb logging.')
-
+    group.add_argument("--inference-coordinator-port", type=int, default=12346,
+                       help="This port will be used to setup the inference coordinator on node-0")
     return parser
 
 
@@ -2273,7 +2273,10 @@ def _add_training_args(parser):
                        help='Exit the program after this many minutes.')
     group.add_argument('--exit-signal-handler', action='store_true',
                        help='Dynamically save the checkpoint and shutdown the '
-                       'training if SIGTERM is received')
+                       'training if signal is received')
+    group.add_argument('--exit-signal', type=str, default='SIGTERM',
+                       choices=list(SIGNAL_MAP.keys()),
+                       help='Signal to use for exit signal handler. If not specified, defaults to SIGTERM.')
     group.add_argument('--tensorboard-dir', type=str, default=None,
                        help='Write TensorBoard logs to this directory.')
     group.add_argument('--no-masked-softmax-fusion',
@@ -3043,6 +3046,27 @@ def _add_data_args(parser):
                        'If instead this argument is set, the training flow will treat all tokens '
                        'that share the same id as the pad token as true pad tokens, potentially '
                        'causing severe training instability.')
+    group.add_argument('--fim-data', action='store_true', help='Whether to use the FIM dataset.')
+    group.add_argument('--fim-rate', type=float, default=0.5,
+                       help='Probability to convert a training sample into a FIM format.')
+    group.add_argument('--fim-spm-rate', type=float, default=0.5,
+                       help='Probability that the a FIM sample uses the SPM format over the PSM format.')
+    group.add_argument('--fim-split-sample', type=str, default=None,
+                       help='String around which to split the sample for FIM.')
+    group.add_argument('--fim-fragment-rate', type=float, default=None,
+                       help='Rate of FIM on each fragment when --fim-split-sample is not None.')
+    group.add_argument('--fim-no-prefix', type=str, default=None,
+                       help='Do not apply FIM to fragments that start with this prefix')
+    group.add_argument('--fim-prefix-token', type=str, default='<fim_prefix>',
+                       help='FIM prefix token')
+    group.add_argument('--fim-middle-token', type=str, default='<fim_middle>',
+                       help='FIM middle token')
+    group.add_argument('--fim-suffix-token', type=str, default='<fim_suffix>',
+                       help='FIM suffix token')
+    group.add_argument('--fim-pad-token', type=str, default='<fim_pad>',
+                       help='FIM PAD token')
+    group.add_argument('--fim-eod-token', type=str, default='<|endoftext|>',
+                       help='FIM EOD token')
     return parser
 
 
diff --git a/megatron/training/checkpointing.py b/megatron/training/checkpointing.py
index feacccba162..48a2025fa63 100644
--- a/megatron/training/checkpointing.py
+++ b/megatron/training/checkpointing.py
@@ -270,7 +270,7 @@ def checkpoint_exists(checkpoints_path):
 def read_metadata(tracker_filename):
     # Read the tracker file and either set the iteration or
     # mark it as a release checkpoint.
-    iteration = 0
+    iteration = -1
     release = False
 
     with open_file(tracker_filename, 'r') as f:
@@ -283,7 +283,10 @@ def read_metadata(tracker_filename):
                 print_rank_0('ERROR: Invalid metadata file {}. Exiting'.format(
                     tracker_filename))
                 sys.exit()
-    assert iteration > 0 or release, 'error parsing metadata file {}'.format(
+            else:
+                # Set iteration to 0 for release checkpoints
+                iteration = 0
+    assert iteration > -1 or release, 'error parsing metadata file {}'.format(
         tracker_filename)
 
     # Get the max iteration retrieved across the ranks.
@@ -1828,6 +1831,16 @@ def load_model_state_dict(module, state_dict, strict: bool):
         is_local_chkpt = (ckpt_type == CheckpointType.LOCAL)
         ft_integration.on_checkpoint_loaded(is_local_chkpt=is_local_chkpt)
 
+    # Patch checkpoint as needed if required field is not found.
+    if optimizer is not None:
+        log_printed = False
+        for param_group in optimizer.param_groups:
+            if 'default_config' not in param_group:
+                param_group['default_config'] = True
+                if not log_printed:
+                    print_rank_0(">>> Inserting 'default_config' field into optimizer.param_groups...")
+                log_printed = True
+
     return iteration, num_floating_point_operations_so_far
 
 
diff --git a/megatron/training/datasets/README.md b/megatron/training/datasets/README.md
new file mode 100644
index 00000000000..d5543c3d1b5
--- /dev/null
+++ b/megatron/training/datasets/README.md
@@ -0,0 +1,34 @@
+# Data Pipeline
+
+## FIM dataset
+
+`GPTFIMDataset` extends Megatron-Core’s `GPTDataset` to support **Fill-in-the-Middle (FIM)** data augmentation.
+It probabilistically converts samples into FIM format using configurable rates, with support for both PSM and SPM patterns, fragment-level splitting, and length-preserving output.
+
+`GPTFIMDatasetConfig` provides the configuration needed to enable this behavior.
+`GPTFIMDatasetConfig` configuration object extending `GPTDatasetConfig` to enable FIM preprocessing.
+
+**Attributes**
+
+- `rate`: Probability of converting a sample into a FIM example. A value of `1.0` means FIM is always applied. a value of `0.0` means FIM is never applied.
+- `spm_rate`: Probability of using the SPM FIM pattern (vs PSM). The remaining probability (`1 - spm_rate`) selects the PSM (prefix-suffix-middle) pattern instead. For example, if `spm_rate = 0.3`: 30% SPM, 70% PSM.
+- `extra_tokens`: Dictionary containing the FIM special tokens: {"prefix", "middle", "suffix", "pad", "eod"}.
+- `split_sample`: Optional token around which samples are split before applying FIM. If provided, the input sequence is divided at every occurrence of this token, and FIM is applied independently to each fragment. `A B C <SPLI_SAMPLE> D E F <SPLIT_SAMPLE> G H` -> `FIM(Fragment 1) <SPLI_SAMPLE> FIM(Fragment 2) <SPLI_SAMPLE> FIM(Fragment 3)`.
+- `fragment_rate`: Probability of applying FIM to each fragment when split_sample is used.
+- `no_prefix`: If the decoded sequence starts with this prefix, FIM is skipped.
+`GPTFIMDataset` dataset class that loads token sequences from an `IndexedDataset` and applies FIM transformations before returning each sample.
+
+**PSM Format**
+```
+[prefix_tok] prefix [suffix_tok] suffix [middle_tok] middle
+```
+
+**SPM Format**
+```
+[prefix_tok, suffix_tok] suffix [middle_tok] prefix middle
+```
+
+**Special cases:**
+
+- If the sequence starts with no_prefix, FIM is skipped.
+- If FIM is not applied, the sample is returned unchanged.
\ No newline at end of file
diff --git a/megatron/legacy/data/data_samplers.py b/megatron/training/datasets/data_samplers.py
similarity index 56%
rename from megatron/legacy/data/data_samplers.py
rename to megatron/training/datasets/data_samplers.py
index 1bf1bf5ee91..1e7f47510d1 100644
--- a/megatron/legacy/data/data_samplers.py
+++ b/megatron/training/datasets/data_samplers.py
@@ -4,13 +4,17 @@
 
 
 import random
-import torch
+
 import numpy as np
+import torch
 from torch.utils.data import Dataset
-from megatron.training import get_args
+
 from megatron.core import mpu
 from megatron.core.datasets.utils import Split
 
+from megatron.training import get_args
+from megatron.training.dist_signal_handler import DistributedSignalHandler
+
 
 def build_pretraining_data_loader(dataset, consumed_samples):
     """Build dataloader given an input dataset."""
@@ -18,10 +22,10 @@ def build_pretraining_data_loader(dataset, consumed_samples):
     if dataset is None:
         return None
     args = get_args()
-    
-    if hasattr(dataset,'split'):
+
+    if hasattr(dataset, 'split'):
         split = dataset.split
-    elif hasattr(dataset,'index_split'):
+    elif hasattr(dataset, 'index_split'):
         split = dataset.index_split
     else:
         split = None
@@ -32,7 +36,8 @@ def build_pretraining_data_loader(dataset, consumed_samples):
             consumed_samples=0,
             micro_batch_size=args.micro_batch_size,
             data_parallel_rank=mpu.get_data_parallel_rank(),
-            data_parallel_size=mpu.get_data_parallel_world_size())
+            data_parallel_size=mpu.get_data_parallel_world_size(),
+        )
     elif args.dataloader_type == 'single':
         # Megatron sampler
         batch_sampler = MegatronPretrainingSampler(
@@ -40,7 +45,8 @@ def build_pretraining_data_loader(dataset, consumed_samples):
             consumed_samples=consumed_samples,
             micro_batch_size=args.micro_batch_size,
             data_parallel_rank=mpu.get_data_parallel_rank(),
-            data_parallel_size=mpu.get_data_parallel_world_size())
+            data_parallel_size=mpu.get_data_parallel_world_size(),
+        )
     elif args.dataloader_type == 'cyclic':
         batch_sampler = MegatronPretrainingRandomSampler(
             dataset,
@@ -49,52 +55,82 @@ def build_pretraining_data_loader(dataset, consumed_samples):
             micro_batch_size=args.micro_batch_size,
             data_parallel_rank=mpu.get_data_parallel_rank(),
             data_parallel_size=mpu.get_data_parallel_world_size(),
-            data_sharding=args.data_sharding)
+            data_sharding=args.data_sharding,
+        )
     elif args.dataloader_type == "external":
         # External dataloaders are passed through. User is expected to provide a
         # torch-compatible dataloader and define samplers, if needed.
         return dataset
     else:
-        raise Exception('{} dataloader type is not supported.'.format(
-                args.dataloader_type))
+        raise Exception('{} dataloader type is not supported.'.format(args.dataloader_type))
+
+    def worker_init_fn(_):
+        DistributedSignalHandler(args.exit_signal).__enter__()
 
+    maybe_worker_init_fn = (
+        worker_init_fn if args.exit_signal_handler and args.num_workers > 0 else None
+    )
     # Torch dataloader.
-    return torch.utils.data.DataLoader(dataset,
-                                       batch_sampler=batch_sampler,
-                                       num_workers=args.num_workers,
-                                       pin_memory=True,
-                                       persistent_workers=True if args.num_workers > 0 else False,
-                                       )
+    return torch.utils.data.DataLoader(
+        dataset,
+        batch_sampler=batch_sampler,
+        num_workers=args.num_workers,
+        pin_memory=True,
+        persistent_workers=True if args.num_workers > 0 else False,
+        worker_init_fn=maybe_worker_init_fn,
+    )
+
 
 class MegatronPretrainingSampler:
+    """
+    Sampler for Megatron pretraining dataloaders that divides data samples across
+    data parallel workers. Each worker receives a contiguous chunk of data determined by
+    its rank and the micro batch size. Supports dropping the last incomplete batch if
+    specified, and keeps track of total and consumed samples. Designed to work with
+    distributed training using Megatron's data parallelism.
+    """
 
-    def __init__(self, total_samples, consumed_samples, micro_batch_size,
-                 data_parallel_rank, data_parallel_size, drop_last=True):
+    def __init__(
+        self,
+        total_samples,
+        consumed_samples,
+        micro_batch_size,
+        data_parallel_rank,
+        data_parallel_size,
+        drop_last=True,
+    ):
         # Keep a copy of input params for later use.
         self.total_samples = total_samples
         self.consumed_samples = consumed_samples
         self.micro_batch_size = micro_batch_size
         self.data_parallel_rank = data_parallel_rank
-        self.micro_batch_times_data_parallel_size = \
-            self.micro_batch_size * data_parallel_size
+        self.micro_batch_times_data_parallel_size = self.micro_batch_size * data_parallel_size
         self.drop_last = drop_last
 
         # Sanity checks.
-        assert self.total_samples > 0, \
-            'no sample to consume: {}'.format(self.total_samples)
-        assert self.consumed_samples < self.total_samples, \
-            'no samples left to consume: {}, {}'.format(self.consumed_samples,
-                                                        self.total_samples)
+        assert self.total_samples > 0, 'no sample to consume: {}'.format(self.total_samples)
+        assert (
+            self.consumed_samples < self.total_samples
+        ), 'no samples left to consume: {}, {}'.format(self.consumed_samples, self.total_samples)
         assert self.micro_batch_size > 0
         assert data_parallel_size > 0
-        assert self.data_parallel_rank < data_parallel_size, \
-            'data_parallel_rank should be smaller than data size: {}, ' \
-            '{}'.format(self.data_parallel_rank, data_parallel_size)
+        assert (
+            self.data_parallel_rank < data_parallel_size
+        ), 'data_parallel_rank should be smaller than data size: {}, ' '{}'.format(
+            self.data_parallel_rank, data_parallel_size
+        )
 
     def __len__(self):
         return self.total_samples
 
     def get_start_end_idx(self):
+        """
+        Calculate the start and end indices for the current data parallel worker's
+        chunk within a batch.
+
+        Returns:
+            tuple: (start_idx, end_idx) indicating the slice of the batch for this worker.
+        """
         start_idx = self.data_parallel_rank * self.micro_batch_size
         end_idx = start_idx + self.micro_batch_size
         return start_idx, end_idx
@@ -116,17 +152,37 @@ def __iter__(self):
 
 
 class RandomSeedDataset(Dataset):
+    """
+    A dataset wrapper that resets the random seed before each sample.
 
-    def __init__(self, dataset):
-        args = get_args()
-        self.base_seed = args.seed
-        self.curr_seed = args.seed
+    This ensures deterministic behavior per sample by setting the RNG state
+    for torch, numpy, and random before accessing each underlying data sample.
+    The base seed is retrieved from training arguments, and can be varied per epoch
+    using the set_epoch method to ensure different shuffling or augmentation each epoch.
+
+    Args:
+        dataset: The underlying dataset to wrap.
+
+    Methods:
+        set_epoch(epoch): Change the seed offset so each epoch produces different randomization.
+        __getitem__(idx): Sets the seed based on the sample index and current epoch.
+    """
+
+    def __init__(self, dataset, seed):
+        self.base_seed = seed
+        self.curr_seed = seed
         self.dataset = dataset
 
     def __len__(self):
         return len(self.dataset)
 
     def set_epoch(self, epoch):
+        """
+        Change the seed offset so each epoch produces different randomization.
+
+        Args:
+            epoch: The epoch number to use as the seed offset.
+        """
         self.curr_seed = self.base_seed + epoch
 
     def __getitem__(self, idx):
@@ -138,9 +194,23 @@ def __getitem__(self, idx):
 
 
 class MegatronPretrainingRandomSampler:
+    """
+    Sampler for Megatron pretraining dataloaders that performs random sampling
+    across data parallel workers. Supports data sharding to divide the dataset
+    into buckets and shuffle within each bucket. Designed to work with distributed
+    training using Megatron's data parallelism.
+    """
 
-    def __init__(self, dataset, total_samples, consumed_samples, micro_batch_size,
-                 data_parallel_rank, data_parallel_size, data_sharding):
+    def __init__(
+        self,
+        dataset,
+        total_samples,
+        consumed_samples,
+        micro_batch_size,
+        data_parallel_rank,
+        data_parallel_size,
+        data_sharding,
+    ):
         # Keep a copy of input params for later use.
         self.dataset = dataset
         self.total_samples = total_samples
@@ -149,19 +219,18 @@ def __init__(self, dataset, total_samples, consumed_samples, micro_batch_size,
         self.data_parallel_rank = data_parallel_rank
         self.data_parallel_size = data_parallel_size
         self.data_sharding = data_sharding
-        self.micro_batch_times_data_parallel_size = \
-            self.micro_batch_size * data_parallel_size
-        self.last_batch_size = \
-            self.total_samples % self.micro_batch_times_data_parallel_size
+        self.micro_batch_times_data_parallel_size = self.micro_batch_size * data_parallel_size
+        self.last_batch_size = self.total_samples % self.micro_batch_times_data_parallel_size
 
         # Sanity checks.
-        assert self.total_samples > 0, \
-            'no sample to consume: {}'.format(self.total_samples)
+        assert self.total_samples > 0, 'no sample to consume: {}'.format(self.total_samples)
         assert self.micro_batch_size > 0
         assert data_parallel_size > 0
-        assert self.data_parallel_rank < data_parallel_size, \
-            'data_parallel_rank should be smaller than data size: {}, ' \
-            '{}'.format(self.data_parallel_rank, data_parallel_size)
+        assert (
+            self.data_parallel_rank < data_parallel_size
+        ), 'data_parallel_rank should be smaller than data size: {}, ' '{}'.format(
+            self.data_parallel_rank, data_parallel_size
+        )
 
     def __len__(self):
         return self.total_samples
@@ -177,8 +246,9 @@ def __iter__(self):
 
         # data sharding and random sampling
         if self.data_sharding:
-            bucket_size = (self.total_samples // self.micro_batch_times_data_parallel_size) \
-                           * self.micro_batch_size
+            bucket_size = (
+                self.total_samples // self.micro_batch_times_data_parallel_size
+            ) * self.micro_batch_size
             bucket_offset = current_epoch_samples // self.data_parallel_size
             start_idx = self.data_parallel_rank * bucket_size
 
@@ -187,15 +257,13 @@ def __iter__(self):
             random_idx = torch.randperm(bucket_size, generator=g).tolist()
             idx_range = [start_idx + x for x in random_idx[bucket_offset:]]
         else:
-            full_bucket_size = (self.total_samples // self.micro_batch_size) \
-                                * self.micro_batch_size
+            full_bucket_size = (self.total_samples // self.micro_batch_size) * self.micro_batch_size
             full_bucket_offset = current_epoch_samples
             g = torch.Generator()
             g.manual_seed(self.epoch)
-            idx_range_total = \
-                torch.randperm(full_bucket_size, generator=g).tolist()
+            idx_range_total = torch.randperm(full_bucket_size, generator=g).tolist()
             idx_range_active = idx_range_total[full_bucket_offset:]
-            idx_range = idx_range_active[self.data_parallel_rank::self.data_parallel_size]
+            idx_range = idx_range_active[self.data_parallel_rank :: self.data_parallel_size]
 
         batch = []
         # Last batch if not complete will be dropped.
diff --git a/megatron/training/datasets/fim_dataset.py b/megatron/training/datasets/fim_dataset.py
new file mode 100644
index 00000000000..730b7e033a1
--- /dev/null
+++ b/megatron/training/datasets/fim_dataset.py
@@ -0,0 +1,308 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+
+from typing import Dict, Tuple, Optional
+from dataclasses import dataclass, field
+
+import numpy as np
+import logging
+from megatron.core.datasets.gpt_dataset import GPTDataset, GPTDatasetConfig
+from megatron.core.datasets.indexed_dataset import IndexedDataset
+from megatron.core.datasets.utils import Split
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class GPTFIMDatasetConfig(GPTDatasetConfig):
+    """Configuration object for Megatron Core GPT FIM datasets"""
+
+    fim_rate: float = None
+    """Probability to convert a training sample into a FIM format"""
+
+    fim_spm_rate: float = None
+    """Probability that the a FIM sample uses the SPM format over the PSM format"""
+
+    fim_extra_tokens: Dict = None
+    """FIM extra tokens. Should consist of prefix, middle, suffix, PAD, and EOD tokens."""
+
+    fim_split_sample: Optional[str] = None
+    """String around which to split the sample for FIM"""
+
+    fim_fragment_rate: Optional[float] = None
+    """Rate of FIM on each fragment when split_sample is not None"""
+
+    fim_no_prefix: Optional[str] = None
+    """Do not apply FIM to fragments that start with this prefix"""
+
+
+class GPTFIMDataset(GPTDataset):
+    """The base GPT dataset
+
+    Args:
+        indexed_dataset (IndexedDataset): The IndexedDataset around which to build the
+        MegatronDataset
+
+        indexed_indices (np.ndarray): The set of the documents indices to expose
+
+        num_samples (int): The number of samples to draw from the indexed dataset
+
+        index_split (Split): The indexed_indices Split
+
+        config (GPTFIMDatasetConfig): The GPT-specific container for all config sourced parameters
+    """
+
+    def __init__(
+        self,
+        indexed_dataset: IndexedDataset,
+        dataset_path: str,
+        indexed_indices: np.ndarray,
+        num_samples: int,
+        index_split: Split,
+        config: GPTFIMDatasetConfig,
+    ) -> None:
+        super().__init__(
+            indexed_dataset, dataset_path, indexed_indices, num_samples, index_split, config
+        )
+
+        self.np_rng = np.random.RandomState(seed=self.config.random_seed)
+        logger.info(f"Initialized FIM RNG with seed = {self.config.random_seed}")
+        # get FIM params
+        self.fim_rate = self.config.fim_rate
+        self.fim_spm_rate = self.config.fim_spm_rate
+        self.fragment_fim_rate = self.config.fim_fragment_rate
+        fim_split_sample = self.config.fim_split_sample
+        self.no_fim_prefix = self.config.fim_no_prefix
+        if fim_split_sample:
+            fim_split_sample_ids = self.config.tokenizer._tokenizer.tokens_to_ids(fim_split_sample)
+            assert isinstance(fim_split_sample_ids, int) or len(fim_split_sample_ids) == 1
+            self.fim_split_sample = (
+                fim_split_sample_ids
+                if isinstance(fim_split_sample_ids, int)
+                else fim_split_sample_ids[0]
+            )
+        else:
+            self.fim_split_sample = None
+
+        # get extra tokens ids
+        fim_tokens = self.config.fim_extra_tokens
+        fim_tokens = [
+            fim_tokens["prefix"],
+            fim_tokens["middle"],
+            fim_tokens["suffix"],
+            fim_tokens["pad"],
+            fim_tokens["eod"],
+        ]
+        fim_tokens_ids = self.config.tokenizer._tokenizer.tokens_to_ids(fim_tokens)
+        (
+            self.prefix_tok_id,
+            self.middle_tok_id,
+            self.suffix_tok_id,
+            self.pad_tok_id,
+            self.eod_tok_id,
+        ) = fim_tokens_ids
+
+    def _query_document_sample_shuffle_indices(self, idx: int) -> Tuple[np.ndarray, np.ndarray]:
+        """Get the text (token ids) and document ids for a given index
+
+        Args:
+            idx (int): The index into the dataset
+
+        Returns:
+            Tuple[np.ndarray, np.ndarray]: The text ids and document ids
+        """
+        # Do the shuffle mapping
+        idx = self.shuffle_index[idx]
+
+        # Get the beginning and end documents and offsets
+        doc_index_beg, doc_index_beg_offset = self.sample_index[idx]
+        doc_index_end, doc_index_end_offset = self.sample_index[idx + 1]
+
+        document_ids = []
+        sample_parts = []
+
+        # Sample spans a single document
+        if doc_index_beg == doc_index_end:
+            # Add the document id
+            document_ids.append(self.document_index[doc_index_beg])
+
+            # Add the entire sample
+            sample_parts.append(
+                self.dataset.get(
+                    self.document_index[doc_index_beg],
+                    offset=doc_index_beg_offset,
+                    length=doc_index_end_offset - doc_index_beg_offset + 1,
+                )
+            )
+
+        # Sample spans multiple documents
+        else:
+            for i in range(doc_index_beg, doc_index_end + 1):
+                # Add the document id
+                document_ids.append(self.document_index[i])
+
+                # Add the sample part
+                offset = 0 if i > doc_index_beg else doc_index_beg_offset
+                length = None if i < doc_index_end else doc_index_end_offset + 1
+                sample_parts.append(
+                    self.dataset.get(self.document_index[i], offset=offset, length=length)
+                )
+
+        sample = np.concatenate(sample_parts)
+
+        sample_len = sample.shape[0]
+        segment_breaks = np.argwhere(sample == self.eod_tok_id)
+
+        if segment_breaks.shape != (0, 1):  # then there is an EOD token in this example
+            curr_start_position = 0
+            new_samples = []
+            for loc in np.nditer(segment_breaks):
+                # Only permute non-empty segments.
+                if loc - curr_start_position > 0:
+                    # permute {prefix, suffix, middle} or {suffix, prefix, middle}
+                    permuted = self._fim_split_and_permute_sequence(sample[curr_start_position:loc])
+                    new_samples += [permuted, [self.eod_tok_id]]
+
+                curr_start_position = loc + 1  # jump over the EOD token
+            # Permute the segment after the last EOD
+            permuted = self._fim_split_and_permute_sequence(sample[curr_start_position:])
+            new_samples.append(permuted)
+
+            sample = np.concatenate(new_samples)
+        else:
+            sample = self._fim_split_and_permute_sequence(sample)
+
+        diff = sample.shape[0] - sample_len
+        if diff > 0:  # too long
+            sample = sample[:sample_len]
+        elif diff < 0:  # too short
+            sample = np.concatenate([sample, np.full((-1 * diff), self.pad_tok_id)])
+
+        assert sample.shape[0] == sample_len
+
+        return (np.array(sample, dtype=np.int64), np.array(document_ids, dtype=np.int64))
+
+    def _fim_permute_sequence(self, sequence, rate):
+        return self._permute(
+            sequence,
+            rate,
+            self.fim_spm_rate,
+            self.config.tokenizer,
+            truncate_or_pad=False,
+            suffix_tok_id=self.suffix_tok_id,
+            prefix_tok_id=self.prefix_tok_id,
+            middle_tok_id=self.middle_tok_id,
+            pad_tok_id=self.pad_tok_id,
+            no_fim_prefix=self.no_fim_prefix,
+        )
+
+    def _fim_split_and_permute_sequence(self, sequence):
+        """
+        If self.fim_split_sample is not None, split the sequence.
+        Then apply FIM on the fragments, or the whole sequence if self.fim_split_sample is None.
+        """
+        if self.fim_split_sample is None:
+            return self._fim_permute_sequence(sequence, self.fim_rate)
+        # fim_split_sample is set: split the sample on this token and permute each fragment separately.
+        # Typically, if each sample is a repository, then we split again on the file level.
+        # Each fragment is a file, and we permute the files.
+        fragment_breaks = np.argwhere(sequence == self.fim_split_sample)
+        if fragment_breaks.shape == (0, 1):
+            # no split token in this sample
+            return self._fim_permute_sequence(sequence, self.fim_rate)
+        if not self.np_rng.binomial(1, self.fim_rate):
+            # don't do FIM preproc
+            return sequence
+        # Do FIM on each fragment
+        curr_start_position = 0
+        new_samples = []
+        for loc in np.nditer(fragment_breaks):
+            if loc - curr_start_position > 0:
+                permuted = self._fim_permute_sequence(
+                    sequence[curr_start_position:loc], self.fragment_fim_rate
+                )
+                new_samples += [permuted, [self.fim_split_sample]]
+            curr_start_position = loc + 1  # Jump over the split token
+        # Permute the segment after the last split token
+        permuted = self._fim_permute_sequence(
+            sequence[curr_start_position:], self.fragment_fim_rate
+        )
+        new_samples.append(permuted)
+
+        return np.concatenate(new_samples)
+
+    def _permute(
+        self,
+        sample,
+        fim_rate,
+        fim_spm_rate,
+        tokenizer,
+        truncate_or_pad=True,
+        suffix_tok_id=None,
+        prefix_tok_id=None,
+        middle_tok_id=None,
+        pad_tok_id=None,
+        no_fim_prefix=None,
+    ):
+        """
+        Take in a sample (np array w/ size (0,chunklength)) and perform a FIM transformation on it.
+        Maintain the same sample length (if transform creates a few extra tokens, drop them).
+        """
+        if self.np_rng.binomial(1, fim_rate):  # sample bernoulli dist
+
+            contents = tokenizer._tokenizer.ids_to_text(sample)
+
+            # Do not apply FIM if the sample starts with no_fim_prefix
+            if no_fim_prefix is not None and contents.startswith(no_fim_prefix):
+                return sample
+
+            try:
+                # A boundary can be =0 (prefix will be empty)
+                # a boundary can be =len(contents) (suffix will be empty)
+                # The two boundaries can be equal (middle will be empty)
+                boundaries = list(self.np_rng.randint(low=0, high=len(contents) + 1, size=2))
+                boundaries.sort()
+            except ValueError as e:
+                print(len(contents), contents)
+                print(e)
+                raise e
+
+            prefix = contents[: boundaries[0]]
+            middle = contents[boundaries[0] : boundaries[1]]
+            suffix = contents[boundaries[1] :]
+
+            prefix = np.array([*tokenizer._tokenizer.text_to_ids(prefix)], dtype=np.int64)
+            middle = np.array([*tokenizer._tokenizer.text_to_ids(middle)], dtype=np.int64)
+            suffix = np.array([*tokenizer._tokenizer.text_to_ids(suffix)], dtype=np.int64)
+
+            # here we truncate each given segment to fit the same length as it was before
+            # A consequence is that we never reach the end of a file?
+            # we should rather truncate at the context-level
+            if truncate_or_pad:
+                # need to make same length as the input. Take the 3 sentinel tokens into account
+                new_length = suffix.shape[0] + prefix.shape[0] + middle.shape[0] + 3
+                diff = new_length - sample.shape[0]
+                if diff > 0:  # too long
+                    if (
+                        suffix.shape[0] <= diff
+                    ):  # if there's no space to truncate the suffix: stop and report it. atm i should have stopped this from happening
+                        return sample
+                    suffix = suffix[: suffix.shape[0] - diff]
+                elif diff < 0:  # too short
+                    suffix = np.concatenate([suffix, np.full((-1 * diff), pad_tok_id)])
+
+            if self.np_rng.binomial(1, fim_spm_rate):
+                # SPM (variant 2 from FIM paper)
+                new_sample = np.concatenate(
+                    [[prefix_tok_id, suffix_tok_id], suffix, [middle_tok_id], prefix, middle]
+                )
+            else:
+                # PSM
+                new_sample = np.concatenate(
+                    [[prefix_tok_id], prefix, [suffix_tok_id], suffix, [middle_tok_id], middle]
+                )
+
+        else:
+            # don't do FIM preproc
+            new_sample = sample
+
+        return new_sample
diff --git a/megatron/training/dist_signal_handler.py b/megatron/training/dist_signal_handler.py
index f4b4fbf5c0d..f1f3725c8a9 100644
--- a/megatron/training/dist_signal_handler.py
+++ b/megatron/training/dist_signal_handler.py
@@ -3,6 +3,12 @@
 
 import torch
 
+SIGNAL_MAP = {
+    'SIGTERM': signal.SIGTERM,
+    'SIGINT': signal.SIGINT,
+    'SIGUSR1': signal.SIGUSR1,
+    'SIGUSR2': signal.SIGUSR2
+}
 
 def get_world_size():
     if torch.distributed.is_available() and torch.distributed.is_initialized():
@@ -49,8 +55,8 @@ def all_gather_item(item, dtype, group=None, async_op=False, local_rank=None):
 
 
 class DistributedSignalHandler:
-    def __init__(self, sig=signal.SIGTERM):
-        self.sig = sig
+    def __init__(self, sig: str = 'SIGTERM'):
+        self.sig = SIGNAL_MAP.get(sig, signal.SIGTERM)
 
     def signals_received(self):
         all_received = all_gather_item(
diff --git a/megatron/training/global_vars.py b/megatron/training/global_vars.py
index ec402263d29..a718877b40c 100644
--- a/megatron/training/global_vars.py
+++ b/megatron/training/global_vars.py
@@ -11,7 +11,7 @@
 from megatron.core.energy_monitor import EnergyMonitor
 from megatron.core.jit import disable_jit_fuser
 from megatron.core.num_microbatches_calculator import init_num_microbatches_calculator, unset_num_microbatches_calculator
-from megatron.training import dist_signal_handler
+from megatron.training.dist_signal_handler import DistributedSignalHandler
 from megatron.training.tokenizer import build_tokenizer
 
 _GLOBAL_ARGS = None
@@ -74,10 +74,11 @@ def get_signal_handler():
     return _GLOBAL_SIGNAL_HANDLER
 
 
-def _set_signal_handler():
+def _set_signal_handler(exit_signal):
+
     global _GLOBAL_SIGNAL_HANDLER
     _ensure_var_is_not_initialized(_GLOBAL_SIGNAL_HANDLER, 'signal handler')
-    _GLOBAL_SIGNAL_HANDLER = dist_signal_handler.DistributedSignalHandler().__enter__()
+    _GLOBAL_SIGNAL_HANDLER = DistributedSignalHandler(exit_signal).__enter__()
 
 
@@ -110,7 +111,7 @@ def set_global_variables(args, build_tokenizer=True):
         set_experimental_flag(True)
 
     if args.exit_signal_handler:
-        _set_signal_handler()
+        _set_signal_handler(args.exit_signal)
 
     if args.disable_jit_fuser:
         disable_jit_fuser()
diff --git a/megatron/training/training.py b/megatron/training/training.py
index 9986f931641..58dcfbde734 100644
--- a/megatron/training/training.py
+++ b/megatron/training/training.py
@@ -2,6 +2,7 @@
 
 """Pretrain utilities."""
 
+import copy
 import dataclasses
 from datetime import datetime, timedelta
 import functools
@@ -11,7 +12,7 @@
 import math
 import os
 import sys
-from typing import List, Optional
+from typing import Any, Optional
 
 import torch.distributed
 
@@ -33,7 +34,7 @@
 except ImportError:
     has_rl_utils = False
 try:
-    from megatron.post_training.algos.distillation import (
+    from modelopt.torch.distill.plugins.megatron import (
         get_tensor_shapes_adjust_fn_for_distillation,
     )
 
@@ -75,7 +76,7 @@
 
 from megatron.core.distributed import finalize_model_grads
 from megatron.core.enums import ModelType
-from megatron.core.optimizer import get_megatron_optimizer, OptimizerConfig
+from megatron.core.optimizer import get_megatron_optimizer, AdamOptimizerConfig, SGDOptimizerConfig, OptimizerConfig, ParamKey
 from megatron.core.optimizer.muon import get_megatron_muon_optimizer
 from megatron.core.rerun_state_machine import (
     get_rerun_state_machine,
@@ -87,7 +88,7 @@
 from megatron.training.initialize import write_args_to_tensorboard
 from megatron.training.initialize import set_jit_fusion_options
 from megatron.training.utils import get_batch_on_this_cp_rank, get_batch_on_this_tp_rank
-from megatron.legacy.data.data_samplers import build_pretraining_data_loader
+from megatron.training.datasets.data_samplers import build_pretraining_data_loader
 from megatron.core.optimizer_param_scheduler import OptimizerParamScheduler
 from megatron.core.transformer.moe import upcycling_utils
 from megatron.core.transformer.moe.moe_utils import track_moe_metrics
@@ -161,22 +162,32 @@ def num_floating_point_operations(args, batch_size):
     def calculate_layer_counts():
         """Calculate the number of attention, Mamba, and MLP layers."""
         if args.hybrid_override_pattern:
-            counts = {'M': 0, '*': 0, '-': 0}
+            counts = {'M': 0, '*': 0, '-': 0, 'E':0}
             for layer_type in args.hybrid_override_pattern:
                 if layer_type in counts:
                     counts[layer_type] += 1
-            return counts['*'], counts['M'], counts['-']
+            return counts['*'], counts['M'], counts['-'], counts['E']
         else:
             num_attn_layers = round(args.num_layers * args.hybrid_attention_ratio)
             num_mlp_layers = round(args.num_layers * args.hybrid_mlp_ratio)
             num_mamba_layers = args.num_layers - num_attn_layers - num_mlp_layers
-            return num_attn_layers, num_mamba_layers, num_mlp_layers
+            num_moe_layers = 0
+            return num_attn_layers, num_mamba_layers, num_mlp_layers, num_moe_layers
 
     def mlp_layer_flops(batch_size, seq_len, hidden_size, expansion=4.0, swiglu=False):
         """Calculate FLOPs for an MLP layer."""
         scale_factor = 3.0 / 2.0 if swiglu else 1.0
         return 4 * expansion * scale_factor * batch_size * seq_len * hidden_size**2
 
+    def moe_layer_flops(batch_size, seq_len, hidden_size, moe_ffn_hidden_size,
+                        shared_expert_ffn_hidden_size, num_experts_routed_to, swiglu=False):
+        """Calculate FLOPs for an MoE layer."""
+        scale_factor = 3.0 / 2.0 if swiglu else 1.0
+        routed_flops = (4 * batch_size * seq_len * hidden_size *
+                        moe_ffn_hidden_size * num_experts_routed_to * scale_factor)
+        shared_flops = 4 * batch_size * seq_len * hidden_size * shared_expert_ffn_hidden_size * scale_factor
+        return routed_flops + shared_flops
+
     def attn_layer_flops(
         batch_size, seq_len, hidden_size, num_heads, gqa=True, gqa_groups=8, kv_channels=None
     ):
@@ -215,12 +226,13 @@ def mamba_layer_flops(batch_size, seq_len, hidden_size, state_dim=16,
         )
 
     def hybrid_flops(batch_size, seq_len, hidden_size,
-                     num_attn_layers, num_mamba_layers, num_mlp_layers,
+                     num_attn_layers, num_mamba_layers, num_mlp_layers, num_moe_layers,
                      mamba_state_dim=128, mamba_head_dim=64,
                      mamba_num_groups=8, mamba_num_heads=128,
-                     num_attn_heads=32,gqa=True,
+                     num_attn_heads=32, gqa=True,
                      gqa_groups=8, kv_channels=None,
                      mlp_expansion=4.0, swiglu=False,
+                     moe_ffn_hidden_size=2048, shared_expert_ffn_hidden_size=2048, num_experts_routed_to=1,
                      vocab_size=256000):
         """Calculate total FLOPs for the hybrid model."""
         flops_fwd = (
@@ -231,6 +243,8 @@ def hybrid_flops(batch_size, seq_len, hidden_size,
                 num_mamba_layers * mamba_layer_flops(batch_size, seq_len, hidden_size,
                                                      mamba_state_dim, mamba_head_dim,
                                                      mamba_num_groups, mamba_num_heads) +
+                num_moe_layers * moe_layer_flops(batch_size, seq_len, hidden_size, moe_ffn_hidden_size,
+                                                 shared_expert_ffn_hidden_size, num_experts_routed_to, swiglu) +
                 (2 * batch_size * seq_len * hidden_size * vocab_size)  # logits computation
         )
         return flops_fwd * 3
@@ -479,7 +493,7 @@ def transformer_flops():
     # Main entrypoint for FLOPs calculation.
     if args.is_hybrid_model:
         # Calculate the number of each type of layer.
-        num_attn_layers, num_mamba_layers, num_mlp_layers = calculate_layer_counts()
+        num_attn_layers, num_mamba_layers, num_mlp_layers, num_moe_layers = calculate_layer_counts()
 
         # Compute hybrid model FLOPs.
         return hybrid_flops(
@@ -489,6 +503,7 @@ def transformer_flops():
             num_attn_layers=num_attn_layers,
             num_mamba_layers=num_mamba_layers,
             num_mlp_layers=num_mlp_layers,
+            num_moe_layers=num_moe_layers,
             mamba_state_dim=args.mamba_state_dim,
             mamba_head_dim=args.mamba_head_dim,
             mamba_num_groups=args.mamba_num_groups,
@@ -499,6 +514,11 @@ def transformer_flops():
             kv_channels=args.kv_channels,
             mlp_expansion=args.ffn_hidden_size / args.hidden_size,
             swiglu=args.swiglu,
+            moe_ffn_hidden_size=(args.moe_ffn_hidden_size if args.moe_ffn_hidden_size is not None
+                                 else args.ffn_hidden_size),
+            shared_expert_ffn_hidden_size=(0 if args.moe_shared_expert_intermediate_size is None
+                                           else args.moe_shared_expert_intermediate_size),
+            num_experts_routed_to=args.moe_router_topk,
             vocab_size=args.padded_vocab_size,
         )
     else:
@@ -594,30 +614,6 @@ def reorder_inner_param_groups(optimizer_state_dict):
     return preprocessed_common_state_dict
 
 
-def get_no_weight_decay_cond(no_weight_decay_cond_type, default_skip_embedding_weight_decay):
-    """Get the no weight decay condition function."""
-
-    # Default case: no_weight_decay_cond_type is None
-    no_weight_decay_cond_fn = None
-
-    if no_weight_decay_cond_type == 'apply_wd_to_qk_layernorm':
-        # Qwen3-Next applies weight decay to qk layernorm as a special case
-        def apply_wd_to_qk_layernorm_fn(name, param):
-            if "q_layernorm" in name or "k_layernorm" in name:
-                no_wd = False
-            else:
-                no_wd = (
-                    name.endswith(".bias")
-                    or len(param.shape) == 1
-                    or (default_skip_embedding_weight_decay and "embedding" in name)
-                )
-            return no_wd
-        no_weight_decay_cond_fn = apply_wd_to_qk_layernorm_fn
-    elif no_weight_decay_cond_type is not None:
-        raise ValueError(f"Invalid no_weight_decay_cond_type: {no_weight_decay_cond_type}")
-
-    return no_weight_decay_cond_fn
-
 def pretrain(
     train_valid_test_dataset_provider,
     model_provider,
@@ -754,15 +750,8 @@ def pretrain(
 
     # Model, optimizer, and learning rate.
     timers('model-and-optimizer-setup', log_level=0).start(barrier=True)
-    no_weight_decay_cond = get_no_weight_decay_cond(
-        args.no_weight_decay_cond_type,
-        default_skip_embedding_weight_decay=args.embedding_init_method_std is not None,
-    )
     model, optimizer, opt_param_scheduler = setup_model_and_optimizer(
-        model_provider,
-        model_type,
-        checkpointing_context=checkpointing_context,
-        no_weight_decay_cond=no_weight_decay_cond,
+        model_provider, model_type, checkpointing_context=checkpointing_context
     )
 
     timers('model-and-optimizer-setup').stop()
@@ -1178,12 +1167,45 @@ def get_optimizer_param_scheduler(optimizer):
     return opt_param_scheduler
 
 
+def get_megatron_optimizer_config(args: Any) -> OptimizerConfig:
+    """Return a Megatron optimizer config object from Megatron's arguments."""
+
+    config = None
+    if args.optimizer == 'adam' or 'muon' in args.optimizer:
+        # TODO(deyuf): Muon needs both adam + muon but get() only receive one config
+        # So for now we keep using adam config that's back compat with old way
+        kwargs = {}
+        for f in dataclasses.fields(AdamOptimizerConfig):
+            if hasattr(args, f.name):
+                kwargs[f.name] = getattr(args, f.name)
+        config = AdamOptimizerConfig(**kwargs)
+    elif args.optimizer == 'sgd':
+        kwargs = {}
+        for f in dataclasses.fields(SGDOptimizerConfig):
+            if hasattr(args, f.name):
+                kwargs[f.name] = getattr(args, f.name)
+        config = SGDOptimizerConfig(**kwargs)
+    else:
+        raise ValueError("Invalid optimizer type!")
+
+    # Construct the appropriate config_overrides object.
+    # TODO: add more logic here as needed down the road.
+    if args.decoupled_lr is not None:
+        decoupled_param_key = ParamKey(attr="is_embedding_or_output_parameter")
+        decoupled_optimizer_config = copy.deepcopy(config)
+        decoupled_optimizer_config.lr = args.decoupled_lr
+        if args.decoupled_min_lr is not None:
+            decoupled_optimizer_config.min_lr = args.decoupled_min_lr
+        config_overrides = {decoupled_param_key: decoupled_optimizer_config}
+    else:
+        config_overrides = None
+
+    return config, config_overrides
+
+
 def setup_model_and_optimizer(
     model_provider_func,
     model_type,
-    no_weight_decay_cond=None,
-    scale_lr_cond=None,
-    lr_mult=1.0,
     checkpointing_context=None,
 ):
     """Setup model and optimizer."""
@@ -1195,33 +1217,25 @@ def setup_model_and_optimizer(
     unwrapped_model = unwrap_model(model)
 
     one_logger and one_logger.log_metrics({"app_build_optimzer_start_time": one_logger_utils.get_timestamp_in_ms()})
-    kwargs = {}
-    for f in dataclasses.fields(OptimizerConfig):
-        if hasattr(args, f.name):
-            kwargs[f.name] = getattr(args, f.name)
-    config = OptimizerConfig(**kwargs)
+    config, config_overrides = get_megatron_optimizer_config(args)
     config.timers = timers
 
     if 'muon' not in config.optimizer:
+        # If the user is asking for a non-zero embedding init std, skip weight decay for embeddings
+        # to avoid embeddings from shrinking to zero as recommended in https://arxiv.org/abs/2312.16903
+        # default_skip_embedding_weight_decay=args.embedding_init_method_std is not None,
         optimizer = get_megatron_optimizer(
             config,
             model,
-            no_weight_decay_cond,
-            scale_lr_cond,
-            lr_mult,
+            config_overrides=config_overrides,
             use_gloo_process_groups=args.enable_gloo_process_groups,
-            # If the user is asking for a non-zero embedding init std, skip weight decay for embeddings
-            #  to avoid embeddings from shrinking to zero as recommended in https://arxiv.org/abs/2312.16903
-            default_skip_embedding_weight_decay=args.embedding_init_method_std is not None,
             dump_param_to_param_group_map=args.dump_param_to_param_group_map,
         )
     else:
         optimizer = get_megatron_muon_optimizer(
             config,
             model,
-            no_weight_decay_cond,
-            scale_lr_cond,
-            lr_mult,
+            config_overrides=config_overrides,
             use_gloo_process_groups=args.enable_gloo_process_groups,
             layer_wise_distributed_optimizer='dist' in config.optimizer,
         )
@@ -1365,7 +1379,10 @@ def train_step(forward_step_func, data_iterator, model, optimizer, opt_param_sch
         if has_nvidia_modelopt:
             # [ModelOpt]: Pipeline-parallel Distillation stacks student and teacher tensors
             adjust_tensor_shapes_fn = get_tensor_shapes_adjust_fn_for_distillation(
-                model, args.seq_length, args.micro_batch_size, args.decoder_seq_length
+                model,
+                seq_length=args.seq_length,
+                micro_batch_size=args.micro_batch_size,
+                decoder_seq_length=args.decoder_seq_length,
             )
         else:
             adjust_tensor_shapes_fn = None
@@ -1494,7 +1511,6 @@ def training_log(
     loss_dict,
     total_loss_dict,
     learning_rate,
-    decoupled_learning_rate,
     iteration,
     loss_scale,
     report_memory_flag,
@@ -1599,8 +1615,6 @@ def training_log(
         writer.add_scalar('learning-rate vs samples', learning_rate, args.consumed_train_samples)
         if wandb_writer:
             wandb_writer.log({'learning-rate': learning_rate}, iteration)
-        if args.decoupled_lr is not None:
-            writer.add_scalar('decoupled-learning-rate', decoupled_learning_rate, iteration)
         if args.skipped_train_samples > 0:
             writer.add_scalar('skipped-train-samples', args.skipped_train_samples, iteration)
             if wandb_writer:
@@ -1680,6 +1694,12 @@ def training_log(
             track_names.append("global_load_balancing_loss")
         if args.moe_z_loss_coeff is not None:
             track_names.append("z_loss")
+
+        if args.is_hybrid_model:
+            layers = args.hybrid_override_pattern.count('E')
+        else:
+            layers = args.num_layers
+
         track_moe_metrics(
             loss_scale=moe_loss_scale,
             iteration=iteration,
@@ -1689,7 +1709,7 @@ def training_log(
             per_layer_logging=args.moe_per_layer_logging,
             force_initialize=True,
             track_names=track_names,
-            num_layers=args.num_layers,
+            num_layers=layers,
             moe_layer_freq=args.moe_layer_freq,
             mtp_num_layers=args.mtp_num_layers,
         )
@@ -1750,14 +1770,6 @@ def training_log(
                 wandb_writer.log({'power/gpu': power}, iteration)
         # Decoupled_learning_rate should be not None only on first and last pipeline stage.
         log_string += f' learning rate: {learning_rate:.6E} |'
-        if args.decoupled_lr is not None and (
-            mpu.is_pipeline_first_stage(ignore_virtual=True)
-            or mpu.is_pipeline_last_stage(ignore_virtual=True)
-        ):
-            assert decoupled_learning_rate is not None
-            log_string += f' decoupled learning rate: {decoupled_learning_rate:.6E} |'
-        else:
-            assert decoupled_learning_rate is None
         log_string += f' global batch size: {batch_size:5d} |'
         for key in total_loss_dict:
             if key not in [advanced_iters_key, skipped_iters_key, nan_iters_key]:
@@ -2523,19 +2535,15 @@ def get_e2e_base_metrics():
         if args.log_params_norm:
             params_norm = calc_params_l2_norm(model)
         learning_rate = None
-        decoupled_learning_rate = None
         for param_group in optimizer.param_groups:
             if len(param_group['params']) == 0:
                 continue
-            if param_group['is_decoupled_lr']:
-                decoupled_learning_rate = param_group['lr']
-            else:
+            if param_group['default_config']:
                 learning_rate = param_group['lr']
         report_memory_flag = training_log(
             loss_dict,
             total_loss_dict,
             learning_rate,
-            decoupled_learning_rate,
             iteration,
             loss_scale,
             report_memory_flag,
diff --git a/pretrain_gpt.py b/pretrain_gpt.py
index ecb7163ff70..9b13d66c7a7 100644
--- a/pretrain_gpt.py
+++ b/pretrain_gpt.py
@@ -20,6 +20,7 @@
 from megatron.training.arguments import core_transformer_config_from_args
 from megatron.training import get_args, get_timers, get_tokenizer, inprocess_restart, pretrain, print_rank_0
 from megatron.training.datasets.sft_dataset import SFTDataset
+from megatron.training.datasets.fim_dataset import GPTFIMDataset, GPTFIMDatasetConfig
 from megatron.training.utils import (
     get_batch_on_this_cp_rank,
     get_batch_on_this_tp_rank,
@@ -185,26 +186,49 @@ def core_gpt_dataset_config_from_args(args):
     blend_per_split: Optional[List[Optional[Tuple[List[str], Optional[List[float]]]]]]
     blend, blend_per_split = get_blend_and_blend_per_split(args)
 
-    return GPTDatasetConfig(
-        random_seed=args.seed,
-        sequence_length=args.seq_length,
-        blend=blend,
-        blend_per_split=blend_per_split,
-        split=args.split,
-        multiple_validation_sets=args.multiple_validation_sets,
-        full_validation=args.full_validation,
-        num_dataset_builder_threads=args.num_dataset_builder_threads,
-        path_to_cache=args.data_cache_path,
-        mmap_bin_files=args.mmap_bin_files,
-        tokenizer=tokenizer,
-        reset_position_ids=args.reset_position_ids,
-        reset_attention_mask=args.reset_attention_mask,
-        eod_mask_loss=args.eod_mask_loss,
-        create_attention_mask=args.create_attention_mask_in_dataloader,
-        object_storage_cache_path=args.object_storage_cache_path,
-        mid_level_dataset_surplus=args.mid_level_dataset_surplus,
-        allow_ambiguous_pad_tokens=args.allow_ambiguous_pad_tokens,
-    )
+    data_args = {
+        "random_seed": args.seed,
+        "sequence_length": args.seq_length,
+        "blend": blend,
+        "blend_per_split": blend_per_split,
+        "split": args.split,
+        "multiple_validation_sets": args.multiple_validation_sets,
+        "full_validation": args.full_validation,
+        "num_dataset_builder_threads": args.num_dataset_builder_threads,
+        "path_to_cache": args.data_cache_path,
+        "mmap_bin_files": args.mmap_bin_files,
+        "tokenizer": tokenizer,
+        "reset_position_ids": args.reset_position_ids,
+        "reset_attention_mask": args.reset_attention_mask,
+        "eod_mask_loss": args.eod_mask_loss,
+        "create_attention_mask": args.create_attention_mask_in_dataloader,
+        "object_storage_cache_path": args.object_storage_cache_path,
+        "mid_level_dataset_surplus": args.mid_level_dataset_surplus,
+        "allow_ambiguous_pad_tokens": args.allow_ambiguous_pad_tokens,
+    }
+
+    # add FIM args to the config
+    if args.fim_data:
+        extra_tokens = {
+            "prefix": args.fim_prefix_token,
+            "middle": args.fim_middle_token,
+            "suffix": args.fim_suffix_token,
+            "pad": args.fim_pad_token,
+            "eod": args.fim_eod_token,
+        }
+        data_args.update(
+            {
+                "fim_rate": args.fim_rate,
+                "fim_spm_rate": args.fim_spm_rate,
+                "fim_extra_tokens": extra_tokens,
+                "fim_split_sample": args.fim_split_sample,
+                "fim_fragment_rate": args.fim_fragment_rate,
+                "fim_no_prefix": args.fim_no_prefix,
+            }
+        )
+        return GPTFIMDatasetConfig(**data_args)
+
+    return GPTDatasetConfig(**data_args)
 
 
 def train_valid_test_datasets_provider(train_val_test_num_samples, vp_stage=None):
@@ -222,6 +246,8 @@ def train_valid_test_datasets_provider(train_val_test_num_samples, vp_stage=None
     else:
         if args.mock_data:
             dataset_type = MockGPTDataset
+        elif args.fim_data:
+            dataset_type = GPTFIMDataset
         else:
             dataset_type = GPTDataset
 
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_fim_dataset/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_fim_dataset/golden_values_dev_dgx_h100.json
new file mode 100644
index 00000000000..cd90888e65d
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_fim_dataset/golden_values_dev_dgx_h100.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.89074,
+            "2": 10.89234,
+            "3": 10.89032,
+            "4": 10.89221,
+            "5": 10.89416,
+            "6": 10.90226,
+            "7": 10.8884,
+            "8": 10.90211,
+            "9": 10.90202,
+            "10": 10.88512,
+            "11": 10.87636,
+            "12": 10.89499,
+            "13": 10.89837,
+            "14": 10.89182,
+            "15": 10.85125,
+            "16": 10.8534,
+            "17": 10.82862,
+            "18": 10.83653,
+            "19": 10.82847,
+            "20": 10.74583,
+            "21": 10.73117,
+            "22": 10.61256,
+            "23": 10.72616,
+            "24": 10.62932,
+            "25": 10.59394,
+            "26": 10.63357,
+            "27": 10.63137,
+            "28": 10.58201,
+            "29": 10.58671,
+            "30": 10.40936,
+            "31": 10.15873,
+            "32": 10.48319,
+            "33": 10.46977,
+            "34": 10.23978,
+            "35": 10.28144,
+            "36": 10.23894,
+            "37": 10.35198,
+            "38": 10.20565,
+            "39": 10.40496,
+            "40": 10.09271,
+            "41": 10.16148,
+            "42": 10.2231,
+            "43": 9.84152,
+            "44": 9.97329,
+            "45": 9.84544,
+            "46": 9.82102,
+            "47": 10.14261,
+            "48": 9.86553,
+            "49": 9.54033,
+            "50": 9.9169
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1544.0,
+            "2": 1729.0,
+            "3": 1672.0,
+            "4": 1807.0,
+            "5": 1942.0,
+            "6": 1736.0,
+            "7": 1956.0,
+            "8": 1716.0,
+            "9": 2011.0,
+            "10": 1385.0,
+            "11": 1864.0,
+            "12": 1767.0,
+            "13": 2019.0,
+            "14": 1787.0,
+            "15": 1828.0,
+            "16": 1908.0,
+            "17": 1718.0,
+            "18": 1602.0,
+            "19": 1785.0,
+            "20": 1679.0,
+            "21": 1917.0,
+            "22": 1712.0,
+            "23": 2034.0,
+            "24": 1752.0,
+            "25": 1645.0,
+            "26": 1820.0,
+            "27": 1915.0,
+            "28": 1996.0,
+            "29": 2051.0,
+            "30": 1890.0,
+            "31": 1577.0,
+            "32": 1886.0,
+            "33": 2116.0,
+            "34": 1912.0,
+            "35": 2037.0,
+            "36": 1924.0,
+            "37": 2462.0,
+            "38": 2241.0,
+            "39": 2321.0,
+            "40": 2221.0,
+            "41": 2345.0,
+            "42": 2386.0,
+            "43": 2027.0,
+            "44": 2211.0,
+            "45": 2096.0,
+            "46": 2285.0,
+            "47": 2536.0,
+            "48": 2289.0,
+            "49": 2270.0,
+            "50": 2421.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 581489664.0,
+            "2": 581489664.0,
+            "3": 581489664.0,
+            "4": 581489664.0,
+            "5": 581489664.0,
+            "6": 581489664.0,
+            "7": 581489664.0,
+            "8": 581489664.0,
+            "9": 581489664.0,
+            "10": 581489664.0,
+            "11": 581489664.0,
+            "12": 581489664.0,
+            "13": 581489664.0,
+            "14": 581489664.0,
+            "15": 581489664.0,
+            "16": 581489664.0,
+            "17": 581489664.0,
+            "18": 581489664.0,
+            "19": 581489664.0,
+            "20": 581489664.0,
+            "21": 581489664.0,
+            "22": 581489664.0,
+            "23": 581489664.0,
+            "24": 581489664.0,
+            "25": 581489664.0,
+            "26": 581489664.0,
+            "27": 581489664.0,
+            "28": 581489664.0,
+            "29": 581489664.0,
+            "30": 581489664.0,
+            "31": 581489664.0,
+            "32": 581489664.0,
+            "33": 581489664.0,
+            "34": 581489664.0,
+            "35": 581489664.0,
+            "36": 581489664.0,
+            "37": 581489664.0,
+            "38": 581489664.0,
+            "39": 581489664.0,
+            "40": 581489664.0,
+            "41": 581489664.0,
+            "42": 581489664.0,
+            "43": 581489664.0,
+            "44": 581489664.0,
+            "45": 581489664.0,
+            "46": 581489664.0,
+            "47": 581489664.0,
+            "48": 581489664.0,
+            "49": 581489664.0,
+            "50": 581489664.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 4605814272.0,
+            "2": 4702430720.0,
+            "3": 4702430720.0,
+            "4": 4702430720.0,
+            "5": 4702430720.0,
+            "6": 4702430720.0,
+            "7": 4702430720.0,
+            "8": 4702430720.0,
+            "9": 4702430720.0,
+            "10": 4702430720.0,
+            "11": 4702430720.0,
+            "12": 4702430720.0,
+            "13": 4702430720.0,
+            "14": 4702430720.0,
+            "15": 4702430720.0,
+            "16": 4702430720.0,
+            "17": 4702430720.0,
+            "18": 4702430720.0,
+            "19": 4702430720.0,
+            "20": 4702430720.0,
+            "21": 4702430720.0,
+            "22": 4702430720.0,
+            "23": 4702430720.0,
+            "24": 4702430720.0,
+            "25": 4702430720.0,
+            "26": 4702430720.0,
+            "27": 4702430720.0,
+            "28": 4702430720.0,
+            "29": 4702430720.0,
+            "30": 4702430720.0,
+            "31": 4702430720.0,
+            "32": 4702430720.0,
+            "33": 4702430720.0,
+            "34": 4702430720.0,
+            "35": 4702430720.0,
+            "36": 4702430720.0,
+            "37": 4702430720.0,
+            "38": 4702430720.0,
+            "39": 4702430720.0,
+            "40": 4702430720.0,
+            "41": 4702430720.0,
+            "42": 4702430720.0,
+            "43": 4702430720.0,
+            "44": 4702430720.0,
+            "45": 4702430720.0,
+            "46": 4702430720.0,
+            "47": 4702430720.0,
+            "48": 4702430720.0,
+            "49": 4702430720.0,
+            "50": 4702430720.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 6.95394,
+            "2": 0.0878,
+            "3": 0.06953,
+            "4": 0.07916,
+            "5": 0.06775,
+            "6": 0.07681,
+            "7": 0.06695,
+            "8": 0.0786,
+            "9": 0.0664,
+            "10": 0.08059,
+            "11": 0.06554,
+            "12": 0.07501,
+            "13": 0.06663,
+            "14": 0.06608,
+            "15": 0.06585,
+            "16": 0.06738,
+            "17": 0.067,
+            "18": 0.06553,
+            "19": 0.06755,
+            "20": 0.06723,
+            "21": 0.06559,
+            "22": 0.0664,
+            "23": 0.06722,
+            "24": 0.06553,
+            "25": 0.06829,
+            "26": 0.06873,
+            "27": 0.06733,
+            "28": 0.06731,
+            "29": 0.06824,
+            "30": 0.06696,
+            "31": 0.06661,
+            "32": 0.06587,
+            "33": 0.06588,
+            "34": 0.06564,
+            "35": 0.06761,
+            "36": 0.06655,
+            "37": 0.06712,
+            "38": 0.06601,
+            "39": 0.06661,
+            "40": 0.06632,
+            "41": 0.0691,
+            "42": 0.06551,
+            "43": 0.06839,
+            "44": 0.06528,
+            "45": 0.06744,
+            "46": 0.0675,
+            "47": 0.06698,
+            "48": 0.0649,
+            "49": 0.06596,
+            "50": 0.06581
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_fim_dataset/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_fim_dataset/model_config.yaml
new file mode 100644
index 00000000000..ddc8286573b
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_fim_dataset/model_config.yaml
@@ -0,0 +1,56 @@
+ENV_VARS:
+  CUDA_DEVICE_MAX_CONNECTIONS: 1
+  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
+  NCCL_ALGO: Ring
+  CUBLAS_WORKSPACE_CONFIG: :4096:8
+MODEL_ARGS:
+  --num-layers: 12
+  --hidden-size: 512
+  --num-attention-heads: 8
+  --log-params-norm: true
+  --log-num-zeros-in-grad: true
+  --log-validation-ppl-to-tensorboard: true
+  --log-timers-to-tensorboard: true
+  --tensorboard-dir: ${TENSORBOARD_PATH}
+  --micro-batch-size: 4
+  --global-batch-size: 32
+  --seq-length: 1024
+  --max-position-embeddings: 1024
+  --train-iters: 50
+  --timing-log-level: 0
+  --lr-decay-iters: 320000
+  --save: ${CHECKPOINT_SAVE_PATH}
+  --load: ${CHECKPOINT_LOAD_PATH}
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
+  --split: 949,50,1
+  --distributed-backend: nccl
+  --lr: 0.00015
+  --lr-decay-style: cosine
+  --min-lr: 1.0e-5
+  --weight-decay: 1e-2
+  --clip-grad: 1.0
+  --lr-warmup-fraction: .01
+  --log-interval: 1
+  --save-interval: 10000
+  --eval-interval: 1000
+  --eval-iters: 10
+  --transformer-impl: transformer_engine
+  --tensor-model-parallel-size: 1
+  --pipeline-model-parallel-size: 1
+  --use-distributed-optimizer: true
+  --deterministic-mode: true
+  --no-gradient-accumulation-fusion: true
+  --attention-softmax-in-fp32: true
+  --use-mcore-models: true
+  --ckpt-format: torch_dist
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
+  --data-cache-path: ${DATA_CACHE_PATH}
+  --bf16: true
+  --attention-backend: unfused
+  --log-memory-to-tensorboard: true
+  --fim-data: true
+  --fim-rate: 0.5
+  --fim-spm-rate: 0.5
+TEST_TYPE: regular
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch/golden_values_dev_dgx_h100.json
index 12a9b70df83..cbc5f4fa3ae 100644
--- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch/golden_values_dev_dgx_h100.json
@@ -1,178 +1,187 @@
 {
-    "0": {
-        "input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.",
-        "generated_text": " And then you get to the end of the movie, and you realize that this is not New York at all. This is New York at the end",
-        "generated_tokens": [
-            3060,
-            2430,
-            1636,
-            2012,
-            1317,
-            1278,
-            2362,
-            1307,
-            1278,
-            16070,
-            1044,
-            1321,
-            1636,
-            23067,
-            1455,
-            1593,
-            1395,
-            1605,
-            3140,
-            5152,
-            1513,
-            1747,
-            1046,
-            2409,
-            1395,
-            3140,
-            5152,
-            1513,
-            1278,
-            2362
-        ],
-        "latency": 0.29413437843322754,
-        "cuda_graph_request_count_map": {
-            "372": 0,
-            "360": 0,
-            "336": 0,
-            "312": 0,
-            "288": 0,
-            "264": 0,
-            "240": 0,
-            "216": 0,
-            "192": 0,
-            "168": 0,
-            "144": 0,
-            "120": 0,
-            "96": 0,
-            "72": 0,
-            "48": 0,
-            "24": 29
-        },
-        "step_count": 240,
-        "logprobs": [
-            -9.362494468688965,
-            -2.827894449234009,
-            -4.557381629943848,
-            -1.4968647956848145,
-            -0.717312216758728,
-            -1.7262351512908936,
-            -2.522736072540283,
-            -2.1782360076904297,
-            -2.3603432178497314,
-            -6.136383533477783,
-            -1.4676916599273682,
-            -3.468963384628296,
-            -4.424870491027832,
-            -3.7345848083496094,
-            -2.012619972229004,
-            -1.8833301067352295,
-            -3.5708768367767334,
-            -6.8197832107543945,
-            -0.3122292757034302,
-            -0.9820290207862854,
-            -6.532033443450928,
-            -7.498172760009766,
-            -12.615165710449219,
-            -2.409003496170044,
-            -3.8550546169281006,
-            -0.5105050802230835,
-            -4.2802581787109375,
-            -0.06971167027950287,
-            -0.054025799036026,
-            -3.319596767425537,
-            -9.703240394592285,
-            -1.0997297763824463,
-            -6.224854469299316,
-            -5.234503269195557,
-            -3.934987783432007,
-            -2.5263679027557373,
-            -3.1843955516815186,
-            -5.880871295928955,
-            -1.8436813354492188,
-            -5.906496047973633,
-            -12.15787410736084,
-            -12.5841064453125,
-            -0.0819428563117981,
-            -2.6212656497955322,
-            -1.4329369068145752,
-            -2.885145425796509,
-            -1.2901865243911743,
-            -0.006647023372352123,
-            -3.5115818977355957,
-            -12.945953369140625,
-            -3.793078899383545,
-            -3.0094375610351562,
-            -5.966838836669922,
-            -0.8998424410820007,
-            -0.040962252765893936,
-            -1.5467679500579834,
-            -1.0785343647003174,
-            -5.73494815826416,
-            -0.38491737842559814,
-            -5.017007827758789,
-            -0.5568072199821472,
-            -0.5968841910362244,
-            -2.3609962463378906,
-            -13.582086563110352,
-            -0.09050048142671585,
-            -3.7264108657836914,
-            -1.1208789348602295,
-            -6.052675247192383,
-            -0.5848909616470337,
-            -3.5906238555908203,
-            -0.9494907855987549,
-            -1.5676641464233398,
-            -5.127577781677246,
-            -17.19189453125,
-            -6.698403835296631,
-            -1.0449178218841553,
-            -4.365664958953857,
-            -1.1243419647216797,
-            -2.2092156410217285,
-            -1.8081634044647217,
-            -0.23330983519554138,
-            -9.439546585083008,
-            -0.2947109341621399,
-            -7.253565788269043,
-            -2.3855936527252197,
-            -4.629369258880615,
-            -3.4186267852783203,
-            -1.9727531671524048,
-            -2.331681251525879,
-            -1.5606917142868042,
-            -2.454296588897705,
-            -1.5334703922271729,
-            -1.2631131410598755,
-            -2.657367706298828,
-            -0.6480202078819275,
-            -0.4550393521785736,
-            -1.3625166416168213,
-            -0.8142069578170776,
-            -0.4496593475341797,
-            -0.9312890768051147,
-            -1.732723355293274,
-            -0.44613128900527954,
-            -1.6895122528076172,
-            -0.6082233190536499,
-            -1.0978344678878784,
-            -1.1122435331344604,
-            -0.002520838286727667,
-            -1.4072327613830566,
-            -0.007462364621460438,
-            -0.7548662424087524,
-            -0.9937503337860107,
-            -0.0675487294793129,
-            -0.9595617055892944,
-            -0.029961343854665756,
-            -2.205785036087036,
-            -1.2615025043487549,
-            -0.7878209352493286
-        ]
-    },
-    "throughput": [104.98559493782837, 104.98559493782837]
+ "0": {
+  "input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.",
+  "generated_text": " And then you get to the end of the movie, and you realize that this is not New York at all. This is New York at the end",
+  "generated_tokens": [
+   3060,
+   2430,
+   1636,
+   2012,
+   1317,
+   1278,
+   2362,
+   1307,
+   1278,
+   16070,
+   1044,
+   1321,
+   1636,
+   23067,
+   1455,
+   1593,
+   1395,
+   1605,
+   3140,
+   5152,
+   1513,
+   1747,
+   1046,
+   2409,
+   1395,
+   3140,
+   5152,
+   1513,
+   1278,
+   2362
+  ],
+  "latency": 0.2963709831237793,
+  "cuda_graph_request_count_map": {
+   "852": 0,
+   "840": 0,
+   "784": 0,
+   "728": 0,
+   "672": 0,
+   "616": 0,
+   "560": 0,
+   "504": 0,
+   "448": 0,
+   "392": 0,
+   "336": 0,
+   "280": 0,
+   "224": 0,
+   "168": 0,
+   "112": 0,
+   "56": 29
+  },
+  "step_count": 240,
+  "logprobs": [
+   -9.362494468688965,
+   -2.827894449234009,
+   -4.557381629943848,
+   -1.4968647956848145,
+   -0.717312216758728,
+   -1.7262351512908936,
+   -2.522736072540283,
+   -2.1782360076904297,
+   -2.3603432178497314,
+   -6.136383533477783,
+   -1.4676916599273682,
+   -3.468963384628296,
+   -4.424870491027832,
+   -3.7345848083496094,
+   -2.012619972229004,
+   -1.8833301067352295,
+   -3.5708768367767334,
+   -6.8197832107543945,
+   -0.3122292757034302,
+   -0.9820290207862854,
+   -6.532033443450928,
+   -7.498172760009766,
+   -12.615165710449219,
+   -2.409003496170044,
+   -3.8550546169281006,
+   -0.5105050802230835,
+   -4.2802581787109375,
+   -0.06971167027950287,
+   -0.054025799036026,
+   -3.319596767425537,
+   -9.703240394592285,
+   -1.0997297763824463,
+   -6.224854469299316,
+   -5.234503269195557,
+   -3.934987783432007,
+   -2.5263679027557373,
+   -3.1843955516815186,
+   -5.880871295928955,
+   -1.8436813354492188,
+   -5.906496047973633,
+   -12.15787410736084,
+   -12.5841064453125,
+   -0.0819428563117981,
+   -2.6212656497955322,
+   -1.4329369068145752,
+   -2.885145425796509,
+   -1.2901865243911743,
+   -0.006647023372352123,
+   -3.5115818977355957,
+   -12.945953369140625,
+   -3.793078899383545,
+   -3.0094375610351562,
+   -5.966838836669922,
+   -0.8998424410820007,
+   -0.040962252765893936,
+   -1.5467679500579834,
+   -1.0785343647003174,
+   -5.73494815826416,
+   -0.38491737842559814,
+   -5.017007827758789,
+   -0.5568072199821472,
+   -0.5968841910362244,
+   -2.3609962463378906,
+   -13.582086563110352,
+   -0.09050048142671585,
+   -3.7264108657836914,
+   -1.1208789348602295,
+   -6.052675247192383,
+   -0.5848909616470337,
+   -3.5906238555908203,
+   -0.9494907855987549,
+   -1.5676641464233398,
+   -5.127577781677246,
+   -17.19189453125,
+   -6.698403835296631,
+   -1.0449178218841553,
+   -4.365664958953857,
+   -1.1243419647216797,
+   -2.2092156410217285,
+   -1.8081634044647217,
+   -0.23330983519554138,
+   -9.439546585083008,
+   -0.2947109341621399,
+   -7.253565788269043,
+   -2.3855936527252197,
+   -4.629369258880615,
+   -3.4186267852783203,
+   -1.9727531671524048,
+   -2.354729652404785,
+   -1.474542498588562,
+   -2.48478364944458,
+   -1.7641210556030273,
+   -1.1853944063186646,
+   -2.8624324798583984,
+   -0.5740103125572205,
+   -0.4542185962200165,
+   -1.4300930500030518,
+   -0.8807456493377686,
+   -0.4597663879394531,
+   -0.9252307415008545,
+   -1.648141860961914,
+   -0.44453874230384827,
+   -1.818476915359497,
+   -0.5714479088783264,
+   -1.2115143537521362,
+   -1.0910619497299194,
+   -0.0023161747958511114,
+   -1.3206473588943481,
+   -0.008621376007795334,
+   -0.7551823854446411,
+   -0.9404395818710327,
+   -0.07279698550701141,
+   -0.9365248680114746,
+   -0.03344438225030899,
+   -1.9720849990844727,
+   -1.3928067684173584,
+   -0.7453650832176208
+  ]
+ },
+ "throughput": [
+  5.425516447410972,
+  95.53889537647129,
+  98.64633360458717,
+  100.31860128598137,
+  100.41338716203114,
+  100.2318180695741,
+  100.30260782227111,
+  100.30996418216475
+ ]
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch/model_config.yaml
index 0675b047464..15a4a655049 100644
--- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch/model_config.yaml
@@ -46,8 +46,6 @@ MODEL_ARGS:
   --return-log-probs: true
   --num-tokens-to-generate: 30
   --enable-cuda-graph: true
-  --inference-dynamic-batching-buffer-guaranteed-fraction: 0
-  --inference-dynamic-batching-buffer-overflow-factor: 0.2
   --inference-dynamic-batching-buffer-size-gb: 20
   --dist-ckpt-strictness: log_unexpected
   --inference-ckpt-non-strict: true # To handle the extra_state errors
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_logitsmatch_decode_graphs_only/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_logitsmatch_decode_graphs_only/golden_values_dev_dgx_h100.json
index 8e07dfee229..c22bb604f94 100644
--- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_logitsmatch_decode_graphs_only/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_logitsmatch_decode_graphs_only/golden_values_dev_dgx_h100.json
@@ -1,178 +1,187 @@
 {
-    "0": {
-        "input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.",
-        "generated_text": " And then you get to the end of the movie, and you realize that this is not New York at all. This is New York at the end",
-        "generated_tokens": [
-            3060,
-            2430,
-            1636,
-            2012,
-            1317,
-            1278,
-            2362,
-            1307,
-            1278,
-            16070,
-            1044,
-            1321,
-            1636,
-            23067,
-            1455,
-            1593,
-            1395,
-            1605,
-            3140,
-            5152,
-            1513,
-            1747,
-            1046,
-            2409,
-            1395,
-            3140,
-            5152,
-            1513,
-            1278,
-            2362
-        ],
-        "latency": 0.3712351322174072,
-        "cuda_graph_request_count_map": {
-            "372": 0,
-            "360": 0,
-            "336": 0,
-            "312": 0,
-            "288": 0,
-            "264": 0,
-            "240": 0,
-            "216": 0,
-            "192": 0,
-            "168": 0,
-            "144": 0,
-            "120": 0,
-            "96": 0,
-            "72": 0,
-            "48": 0,
-            "24": 29
-        },
-        "step_count": 240,
-        "logprobs": [
-            -9.362494468688965,
-            -2.827894449234009,
-            -4.557381629943848,
-            -1.4968647956848145,
-            -0.717312216758728,
-            -1.7262351512908936,
-            -2.522736072540283,
-            -2.1782360076904297,
-            -2.3603432178497314,
-            -6.136383533477783,
-            -1.4676916599273682,
-            -3.468963384628296,
-            -4.424870491027832,
-            -3.7345848083496094,
-            -2.012619972229004,
-            -1.8833301067352295,
-            -3.5708768367767334,
-            -6.8197832107543945,
-            -0.3122292757034302,
-            -0.9820290207862854,
-            -6.532033443450928,
-            -7.498172760009766,
-            -12.615165710449219,
-            -2.409003496170044,
-            -3.8550546169281006,
-            -0.5105050802230835,
-            -4.2802581787109375,
-            -0.06971167027950287,
-            -0.054025799036026,
-            -3.319596767425537,
-            -9.703240394592285,
-            -1.0997297763824463,
-            -6.224854469299316,
-            -5.234503269195557,
-            -3.934987783432007,
-            -2.5263679027557373,
-            -3.1843955516815186,
-            -5.880871295928955,
-            -1.8436813354492188,
-            -5.906496047973633,
-            -12.15787410736084,
-            -12.5841064453125,
-            -0.0819428563117981,
-            -2.6212656497955322,
-            -1.4329369068145752,
-            -2.885145425796509,
-            -1.2901865243911743,
-            -0.006647023372352123,
-            -3.5115818977355957,
-            -12.945953369140625,
-            -3.793078899383545,
-            -3.0094375610351562,
-            -5.966838836669922,
-            -0.8998424410820007,
-            -0.040962252765893936,
-            -1.5467679500579834,
-            -1.0785343647003174,
-            -5.73494815826416,
-            -0.38491737842559814,
-            -5.017007827758789,
-            -0.5568072199821472,
-            -0.5968841910362244,
-            -2.3609962463378906,
-            -13.582086563110352,
-            -0.09050048142671585,
-            -3.7264108657836914,
-            -1.1208789348602295,
-            -6.052675247192383,
-            -0.5848909616470337,
-            -3.5906238555908203,
-            -0.9494907855987549,
-            -1.5676641464233398,
-            -5.127577781677246,
-            -17.19189453125,
-            -6.698403835296631,
-            -1.0449178218841553,
-            -4.365664958953857,
-            -1.1243419647216797,
-            -2.2092156410217285,
-            -1.8081634044647217,
-            -0.23330983519554138,
-            -9.439546585083008,
-            -0.2947109341621399,
-            -7.253565788269043,
-            -2.3855936527252197,
-            -4.629369258880615,
-            -3.4186267852783203,
-            -1.9727531671524048,
-            -2.331681251525879,
-            -1.5606917142868042,
-            -2.454296588897705,
-            -1.5334703922271729,
-            -1.2631131410598755,
-            -2.657367706298828,
-            -0.6480202078819275,
-            -0.4550393521785736,
-            -1.3625166416168213,
-            -0.8142069578170776,
-            -0.4496593475341797,
-            -0.9312890768051147,
-            -1.732723355293274,
-            -0.44613128900527954,
-            -1.6895122528076172,
-            -0.6082233190536499,
-            -1.0978344678878784,
-            -1.1122435331344604,
-            -0.002520838286727667,
-            -1.4072327613830566,
-            -0.007462364621460438,
-            -0.7548662424087524,
-            -0.9937503337860107,
-            -0.0675487294793129,
-            -0.9595617055892944,
-            -0.029961343854665756,
-            -2.205785036087036,
-            -1.2615025043487549,
-            -0.7878209352493286
-        ]
-    },
-    "throughput": [79.88988160240554, 79.88988160240554]
+ "0": {
+  "input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.",
+  "generated_text": " And then you get to the end of the movie, and you realize that this is not New York at all. This is New York at the end",
+  "generated_tokens": [
+   3060,
+   2430,
+   1636,
+   2012,
+   1317,
+   1278,
+   2362,
+   1307,
+   1278,
+   16070,
+   1044,
+   1321,
+   1636,
+   23067,
+   1455,
+   1593,
+   1395,
+   1605,
+   3140,
+   5152,
+   1513,
+   1747,
+   1046,
+   2409,
+   1395,
+   3140,
+   5152,
+   1513,
+   1278,
+   2362
+  ],
+  "latency": 0.38181447982788086,
+  "cuda_graph_request_count_map": {
+   "852": 0,
+   "840": 0,
+   "784": 0,
+   "728": 0,
+   "672": 0,
+   "616": 0,
+   "560": 0,
+   "504": 0,
+   "448": 0,
+   "392": 0,
+   "336": 0,
+   "280": 0,
+   "224": 0,
+   "168": 0,
+   "112": 0,
+   "56": 29
+  },
+  "step_count": 240,
+  "logprobs": [
+   -9.362494468688965,
+   -2.827894449234009,
+   -4.557381629943848,
+   -1.4968647956848145,
+   -0.717312216758728,
+   -1.7262351512908936,
+   -2.522736072540283,
+   -2.1782360076904297,
+   -2.3603432178497314,
+   -6.136383533477783,
+   -1.4676916599273682,
+   -3.468963384628296,
+   -4.424870491027832,
+   -3.7345848083496094,
+   -2.012619972229004,
+   -1.8833301067352295,
+   -3.5708768367767334,
+   -6.8197832107543945,
+   -0.3122292757034302,
+   -0.9820290207862854,
+   -6.532033443450928,
+   -7.498172760009766,
+   -12.615165710449219,
+   -2.409003496170044,
+   -3.8550546169281006,
+   -0.5105050802230835,
+   -4.2802581787109375,
+   -0.06971167027950287,
+   -0.054025799036026,
+   -3.319596767425537,
+   -9.703240394592285,
+   -1.0997297763824463,
+   -6.224854469299316,
+   -5.234503269195557,
+   -3.934987783432007,
+   -2.5263679027557373,
+   -3.1843955516815186,
+   -5.880871295928955,
+   -1.8436813354492188,
+   -5.906496047973633,
+   -12.15787410736084,
+   -12.5841064453125,
+   -0.0819428563117981,
+   -2.6212656497955322,
+   -1.4329369068145752,
+   -2.885145425796509,
+   -1.2901865243911743,
+   -0.006647023372352123,
+   -3.5115818977355957,
+   -12.945953369140625,
+   -3.793078899383545,
+   -3.0094375610351562,
+   -5.966838836669922,
+   -0.8998424410820007,
+   -0.040962252765893936,
+   -1.5467679500579834,
+   -1.0785343647003174,
+   -5.73494815826416,
+   -0.38491737842559814,
+   -5.017007827758789,
+   -0.5568072199821472,
+   -0.5968841910362244,
+   -2.3609962463378906,
+   -13.582086563110352,
+   -0.09050048142671585,
+   -3.7264108657836914,
+   -1.1208789348602295,
+   -6.052675247192383,
+   -0.5848909616470337,
+   -3.5906238555908203,
+   -0.9494907855987549,
+   -1.5676641464233398,
+   -5.127577781677246,
+   -17.19189453125,
+   -6.698403835296631,
+   -1.0449178218841553,
+   -4.365664958953857,
+   -1.1243419647216797,
+   -2.2092156410217285,
+   -1.8081634044647217,
+   -0.23330983519554138,
+   -9.439546585083008,
+   -0.2947109341621399,
+   -7.253565788269043,
+   -2.3855936527252197,
+   -4.629369258880615,
+   -3.4186267852783203,
+   -1.9727531671524048,
+   -2.354729652404785,
+   -1.474542498588562,
+   -2.48478364944458,
+   -1.7641210556030273,
+   -1.1853944063186646,
+   -2.8624324798583984,
+   -0.5740103125572205,
+   -0.4542185962200165,
+   -1.4300930500030518,
+   -0.8807456493377686,
+   -0.4597663879394531,
+   -0.9252307415008545,
+   -1.648141860961914,
+   -0.44453874230384827,
+   -1.818476915359497,
+   -0.5714479088783264,
+   -1.2115143537521362,
+   -1.0910619497299194,
+   -0.0023161747958511114,
+   -1.3206473588943481,
+   -0.008621376007795334,
+   -0.7551823854446411,
+   -0.9404395818710327,
+   -0.07279698550701141,
+   -0.9365248680114746,
+   -0.03344438225030899,
+   -1.9720849990844727,
+   -1.3928067684173584,
+   -0.7453650832176208
+  ]
+ },
+ "throughput": [
+  3.896181563640281,
+  77.1287764739343,
+  77.17674536709352,
+  76.8666671960972,
+  77.944911028325,
+  77.95118832563914,
+  78.13236085816422,
+  78.0046829173943
+ ]
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_logitsmatch_decode_graphs_only/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_logitsmatch_decode_graphs_only/model_config.yaml
index 2ba9050ceaf..b368242b9af 100644
--- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_logitsmatch_decode_graphs_only/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_logitsmatch_decode_graphs_only/model_config.yaml
@@ -47,8 +47,6 @@ MODEL_ARGS:
   --num-tokens-to-generate: 30
   --enable-cuda-graph: true
   --decode-only-cuda-graphs: true
-  --inference-dynamic-batching-buffer-guaranteed-fraction: 0
-  --inference-dynamic-batching-buffer-overflow-factor: 0.2
   --inference-dynamic-batching-buffer-size-gb: 20
   --dist-ckpt-strictness: log_unexpected
   --inference-ckpt-non-strict: true # To handle the extra_state errors
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_dp8_583m_logitsmatch_zmq/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_dp8_583m_logitsmatch_zmq/model_config.yaml
index a4f47d3705f..7fcf9e9cf81 100644
--- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_dp8_583m_logitsmatch_zmq/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_dp8_583m_logitsmatch_zmq/model_config.yaml
@@ -22,7 +22,8 @@ MODEL_ARGS:
   --load: ${CHECKPOINT_LOAD_PATH}/model/mcore_mistral/nemo_minitron-0.5b/v1/
   --distributed-backend: nccl
   --log-interval: 1
-  --transformer-impl: transformer_engine
+  --transformer-impl: inference_optimized
+  --sequence-parallel: true
   --tensor-model-parallel-size: 1
   --pipeline-model-parallel-size: 1
   --deterministic-mode: true
@@ -41,9 +42,6 @@ MODEL_ARGS:
   --top_k: 1
   --return-log-probs: true
   --num-tokens-to-generate: 30
-  --inference-dynamic-batching-max-requests-override: 8 # hardcode decode padding tokens to 7 for reproducibility
-  --inference-dynamic-batching-buffer-guaranteed-fraction: 0
-  --inference-dynamic-batching-buffer-overflow-factor: 0.2
   --inference-dynamic-batching-buffer-size-gb: 20
   --dist-ckpt-strictness: log_unexpected
   --inference-ckpt-non-strict: true # To handle the extra_state errors
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_dp8_583m_throughputtest_zmq/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_dp8_583m_throughputtest_zmq/golden_values_dev_dgx_h100.json
new file mode 100644
index 00000000000..9be8a9dc0ca
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_dp8_583m_throughputtest_zmq/golden_values_dev_dgx_h100.json
@@ -0,0 +1,1028 @@
+{
+    "throughput": [
+        94.6087716527102,
+        115.85992244026639,
+        138.9562527069375,
+        133.18726531918395,
+        81.97861561771212,
+        134.30726469422635,
+        86.456140428456,
+        114.99456351298251,
+        147.3101800153954,
+        3.0364623744653003,
+        124.7590786954667,
+        134.2276982994434,
+        3.0580463134110167,
+        117.03969654341354,
+        130.92134521286803,
+        48.493091604204935,
+        1.4498729599486508,
+        128.01470907994928,
+        1.8330770354872434,
+        66.31842482241125,
+        82.24189975425459,
+        1.07058112939944,
+        1.8815468970982412,
+        0.9373246942729808,
+        134.9963160815443,
+        2.285771114682068,
+        43.068220270070434,
+        134.9677086822377,
+        82.44946740133796,
+        47.71839155542011,
+        114.4199568886962,
+        29.67621576315833,
+        144.1589742491705,
+        95.8164720809401,
+        122.80562228460093,
+        39.21436814433054,
+        3.041180292262413,
+        3.2867844729646842,
+        72.43808226229888,
+        0.8371525937296347,
+        1.2212635079980698,
+        145.6869075644325,
+        42.317711349146016,
+        109.1196064871946,
+        73.6281770453198,
+        140.4495689387567,
+        1.219834296561022,
+        138.66856497329005,
+        23.33818821323391,
+        67.82342558671365,
+        130.09683254313987,
+        147.60199288178146,
+        0.9427431720755464,
+        3.2856495013162523,
+        79.12426666101076,
+        86.41557345094756,
+        120.17346279825053,
+        137.16615251640926,
+        108.93291864542198,
+        110.10504114490513,
+        46.19253755421628,
+        0.950218846923012,
+        136.50642826951463,
+        142.73168666846448,
+        1.2206786818073785,
+        1.898581377105612,
+        131.72636154091063,
+        2.2842414327001976,
+        89.76521170090028,
+        114.66053545744656,
+        58.64474290044525,
+        0.8367865961030284,
+        128.01767795820945,
+        60.87292097103301,
+        124.20016865241587,
+        119.59336898055426,
+        0.9425820346281929,
+        93.70053305431952,
+        1.0728113870213674,
+        135.7596767309971,
+        112.89357243644062,
+        89.2743296587299,
+        137.86411291342458,
+        135.6974706051771,
+        102.59633828443238,
+        129.82058179399326,
+        139.57672703148444,
+        140.5642311163746,
+        78.49182953675201,
+        123.40912657074227,
+        82.74099904578694,
+        75.5490641626476,
+        93.38596238341951,
+        141.19058076067225,
+        1.072254167577298,
+        100.8669047802279,
+        132.77382347347034,
+        92.29086179175866,
+        137.20301032384705,
+        89.57723938765776,
+        67.5465256589703,
+        0.9498935124108836,
+        1.0716887464650027,
+        0.8365472180547067,
+        137.902625307774,
+        132.67132600219722,
+        1.45201860416265,
+        1.8366476879619427,
+        88.65095604379363,
+        132.1806036761347,
+        126.0481874394642,
+        127.43750324083169,
+        93.27238135265156,
+        109.83884164204308,
+        102.30516355984702,
+        141.10387096377744,
+        0.9425154448032942,
+        95.04281981148903,
+        103.11525529548061,
+        0.8361762901534399,
+        135.3171561172067,
+        123.30032998064965,
+        118.75691144485415,
+        82.21375599642211,
+        66.37216333263251,
+        120.02349229491865,
+        27.339414655466246,
+        133.1312422227687,
+        123.02377779863252,
+        111.0798894329,
+        58.88405247768833,
+        131.31767475108893,
+        40.19076958615912,
+        123.58362152151858,
+        130.6541142941889,
+        61.39555613504246,
+        43.92154495664044,
+        1.037012527495492,
+        127.16052127606021,
+        137.06554800183082,
+        85.67161160523041,
+        1.0253417447981334,
+        139.20903624514017,
+        140.19068787455728,
+        117.67416498245059,
+        23.410837515725987,
+        130.73052473972666,
+        22.561824695346466,
+        1.028901717647808,
+        119.30712483977753,
+        117.77548263464804,
+        135.2959098119142,
+        142.10193821260228,
+        1.0366044325624144,
+        1.0350271698893887,
+        132.8943567509843,
+        51.50353963446039,
+        113.39559408843714,
+        124.25424103796537,
+        129.60407993083075,
+        136.8566687186031,
+        1.036163010240988,
+        1.0345739017743927,
+        118.72350056844492,
+        32.453707095990595,
+        43.851925176925825,
+        139.39206855448938,
+        141.0979597861742,
+        132.81461728578432,
+        80.95956255477945,
+        133.42483643501154,
+        57.27721135575491,
+        81.47649794801364,
+        79.39765285063396,
+        56.40255861789973,
+        0.8890603607397893,
+        137.59325887086797,
+        118.03982850100024,
+        53.04390121587005,
+        88.31177924841927,
+        1.0287550608831881,
+        54.67393025836421,
+        54.73556135447348,
+        129.6143036059356,
+        123.57095756116274,
+        146.05184555314386,
+        55.506024155977386,
+        84.40666358740559,
+        62.68531518105107,
+        147.42894642823578,
+        1.0274253590993496,
+        145.9063526676371,
+        76.36231256557768,
+        1.035808949157935,
+        136.1858098182613,
+        93.13144140533397,
+        54.57886608953819,
+        1.0251956490815057,
+        1.0270063804838983,
+        67.96952180390161,
+        136.90103479290272,
+        78.62986077133174,
+        129.97235998681177,
+        70.57784076609056,
+        1.028567312218149,
+        69.64434330087829,
+        1.0266016363366386,
+        25.142311727265525,
+        139.54750333578679,
+        118.80547132463877,
+        1.0342055876192149,
+        132.79991800938092,
+        88.25494664060619,
+        132.4600307114398,
+        1.026200775415348,
+        111.33264788932784,
+        1.031301270403004,
+        104.45912302410692,
+        1.0337771723701492,
+        124.53550504281608,
+        1.0283501183885058,
+        126.53361938982871,
+        139.83512785200963,
+        102.28350299734186,
+        122.68389734539087,
+        139.27095111763788,
+        1.0333552237490158,
+        97.04945381465573,
+        60.63422077140298,
+        1.0248694052483192,
+        96.77644543721476,
+        118.38370846079931,
+        1.0309087229819596,
+        136.0487423665781,
+        1.032932214377732,
+        104.96525711514936,
+        50.75370028394122,
+        125.67617176346853,
+        125.47392048276225,
+        101.59371483024698,
+        119.1183231384482,
+        134.24568445137294,
+        1.0323996653747745,
+        119.28563313083153,
+        50.183581144589674,
+        107.50817556608582,
+        127.4693561344537,
+        116.0234844098742,
+        149.0429439759437,
+        127.77855747904051,
+        1.0319900690130652,
+        129.7400124946839,
+        60.27584011696136,
+        1.0245534026749026,
+        113.8687773549026,
+        129.9927880985222,
+        41.55332067297356,
+        12.991853549713621,
+        144.9384518471586,
+        127.77570879015505,
+        79.09214991388126,
+        1.0326234729165304,
+        144.50618896622706,
+        44.461452482592826,
+        145.75357879817352,
+        150.5618330832813,
+        123.17802281879979,
+        147.0133924731902,
+        57.07203337285457,
+        140.17944630269687,
+        44.5066568841284,
+        150.2834791394652,
+        146.37106237628518,
+        135.59553639884948,
+        21.91845075979551,
+        1.0391172002596458,
+        92.42182316100705,
+        14.98578222593142,
+        19.944740287073653,
+        32.75622847272977,
+        58.94666795839769,
+        1.0428676908165904,
+        97.94938911630567,
+        140.5399781540016,
+        36.397689902912774,
+        1.0322919875583962,
+        33.76444948259586,
+        147.54902815924785,
+        51.316830076622495,
+        153.55703202636914,
+        46.423895018386204,
+        140.271682540213,
+        1.0340651759548871,
+        85.22971449383292,
+        141.80480996358014,
+        1.0234621691055457,
+        1.0355322329825165,
+        136.96321865236195,
+        138.2293990177049,
+        136.89440582973347,
+        96.94919171687799,
+        54.992986423891566,
+        142.91167590864902,
+        138.73615931624403,
+        86.32837448704223,
+        1.0424247604140402,
+        127.58052889290863,
+        138.2472241943501,
+        1.0338260095695477,
+        1.0317372756221133,
+        150.59249576769173,
+        1.0229533138894364,
+        149.1711141084735,
+        1.0419379125129562,
+        1.040305113121658,
+        150.13261057757276,
+        62.47975017460808,
+        70.20443057037575,
+        76.88821624674898,
+        1.0225242667788867,
+        136.83301633777177,
+        1.0414381555227956,
+        131.6044067829552,
+        1.038902005769604,
+        1.0335832618537684,
+        83.38230404797935,
+        3.047737981863063,
+        140.9843162162637,
+        1.0352264324041114,
+        1.0409374510445146,
+        103.17228299164871,
+        1.0383219913492376,
+        67.5151836065632,
+        126.94018489907108,
+        95.29974174831813,
+        1.022161551972834,
+        1.0348032799350415,
+        93.24855217625235,
+        140.00831851627856,
+        142.46553219867087,
+        80.52507876480331,
+        149.47939431741142,
+        125.60095189608528,
+        92.57991472689042,
+        153.09192667088175,
+        98.78787611117323,
+        136.9802701171813,
+        1.0378200246498124,
+        79.05370338483348,
+        145.63143231877774,
+        107.86253722014555,
+        113.1390555766259,
+        150.4596904971142,
+        6.010262757833046,
+        138.11675690694213,
+        1.0371929842524894,
+        55.1702723554103,
+        148.4142582794926,
+        108.62464742566522,
+        142.2515578682958,
+        149.5588988951372,
+        1.0310870179234204,
+        32.798276334675066,
+        145.8363475163408,
+        82.52497836005318,
+        144.77105210255448,
+        140.95035733017403,
+        145.4844811663436,
+        145.0646083055648,
+        139.1641494303434,
+        1.0401220454548914,
+        146.10598185112948,
+        1.0335329080843159,
+        1.0316085392161136,
+        133.98012837767038,
+        129.62059667226987,
+        151.2681266565858,
+        1.030719335336581,
+        135.9600336007384,
+        1.0366589924031362,
+        107.70864165999221,
+        118.06361914834272,
+        148.4615541738592,
+        135.1206190516379,
+        1.0788915925864082,
+        1.0662361391973343,
+        1.0784094142292293,
+        145.5492563111853,
+        100.1745158858024,
+        89.97448812790176,
+        140.13008352060388,
+        8.378443606045758,
+        19.841723966559687,
+        31.11972559764219,
+        127.75589035167928,
+        144.649118240912,
+        83.40454687650907,
+        13.609558087727212,
+        144.14916775068022,
+        143.0831699051951,
+        144.53789580070173,
+        129.35689525213576,
+        126.54760361436873,
+        136.72725454688293,
+        83.66753329456253,
+        35.238850690537326,
+        138.73588075606074,
+        148.39285997484404,
+        141.43706957675556,
+        35.20788617289704,
+        140.22918428708584,
+        141.42288954532623,
+        80.8071906111917,
+        53.480908541665116,
+        96.60869116876205,
+        138.83030943256392,
+        146.89537016655746,
+        1.0659353965573166,
+        138.66041009897964,
+        138.0783824554628,
+        54.95061283513892,
+        1.0688789370964418,
+        145.4981195236156,
+        107.91672388693667,
+        147.39387423946786,
+        143.49840246862203,
+        1.0781871694837721,
+        125.37215873599833,
+        46.390553110182545,
+        1.0683430650310588,
+        60.55314896188811,
+        128.32962060837178,
+        142.6648214311374,
+        1.065532502621677,
+        145.06202945295232,
+        149.5985088362253,
+        43.61426254132819,
+        139.2120402464869,
+        138.80120892663803,
+        142.59390751862693,
+        147.27000174003754,
+        139.5980537408405,
+        142.37081759892675,
+        76.47257166426981,
+        0.8663971721944621,
+        1.067847671923619,
+        1.0752972325757186,
+        139.11225337731244,
+        154.1012640338781,
+        91.85315813315137,
+        7.34066705730821,
+        1.0763437477764217,
+        56.03391448680589,
+        1.067309924884827,
+        1.0747789028833068,
+        1.057667310022394,
+        146.4284745539176,
+        142.32867288307636,
+        132.81801172672715,
+        142.5746724111237,
+        43.178263922620026,
+        140.19958418325498,
+        1.0742201855279276,
+        139.95237701874325,
+        124.69044225989671,
+        89.93275546978569,
+        1.0778110524743836,
+        108.03753008375865,
+        0.8649825661375887,
+        101.22782607000799,
+        138.6615942910557,
+        1.0572642952018412,
+        143.509260845593,
+        1.0651693329533294,
+        97.454990956795,
+        1.075960473594851,
+        104.89429761368234,
+        153.46849816095335,
+        143.28204379991922,
+        112.57923589922926,
+        145.35468060283986,
+        119.53338040876814,
+        132.53105489182144,
+        146.60735281445733,
+        0.8648000721123511,
+        132.61504628627392,
+        140.81953388748138,
+        1.05684091289561,
+        147.29646966899597,
+        1.0646855258714663,
+        1.0772400203863821,
+        137.87592499226204,
+        101.79954304062817,
+        134.45893707567646,
+        1.0737967838723397,
+        147.3289039421509,
+        142.95955673278567,
+        123.11846557585149,
+        139.7223884224781,
+        5.274894457437767,
+        0.8646226703470901,
+        135.27010135142623,
+        134.53222451904563,
+        140.4520894166607,
+        148.6784682726068,
+        148.83999547746723,
+        144.76059628877204,
+        146.09818079047014,
+        0.8644123666240657,
+        133.05795012757028,
+        141.21253159110282,
+        147.08086640702987,
+        153.13511211461227,
+        147.72437078211334,
+        53.87242850230838,
+        61.34701685378028,
+        74.50771860339175,
+        16.40780504974564,
+        16.448796993269678,
+        144.08505364828036,
+        143.78069847853888,
+        145.08382905436133,
+        139.4144567792124,
+        1.113422304912727,
+        23.732299099149245,
+        146.716938504402,
+        1.1150428401994323,
+        1.1070863332993708,
+        147.462815334713,
+        15.300506166735937,
+        142.89311901203018,
+        35.881455163220174,
+        0.8959120615185874,
+        134.50389621984408,
+        79.91603718165896,
+        145.31776951960734,
+        153.19384567886857,
+        142.494036234602,
+        130.58249312188119,
+        1.1128817603274543,
+        56.157995916719756,
+        35.81413980204931,
+        116.5213087641768,
+        63.30354399512571,
+        55.0117106848875,
+        47.52954249314361,
+        153.04709230401787,
+        1.112276523473745,
+        80.1523559974256,
+        136.20373724941714,
+        1.114673225365626,
+        1.1067132158651183,
+        149.29883052073288,
+        145.10950784560325,
+        130.53765167080937,
+        1.111788125890117,
+        0.8957719496064405,
+        1.1050775451489783,
+        17.522300994030367,
+        154.45472111064055,
+        152.07616582090188,
+        1.1020107149905272,
+        138.6808068419634,
+        76.87873177159636,
+        51.43702839643221,
+        138.95045176064437,
+        138.64177504011988,
+        140.72197385602811,
+        132.80947742972836,
+        149.78872816785005,
+        139.94034036065392,
+        154.2632802491591,
+        55.57148538150843,
+        1.1044580058296936,
+        147.1712801496827,
+        77.84198065949245,
+        142.38330204183904,
+        151.76812011990265,
+        145.19131540821485,
+        147.26566215388425,
+        87.12413393605841,
+        1.1038403429439656,
+        141.4935550752979,
+        145.7397470598185,
+        3.3080164659931235,
+        123.0327553358976,
+        146.24080278853327,
+        148.10448175245884,
+        29.234562433775857,
+        151.30177873039895,
+        135.4653748135468,
+        144.3293913931314,
+        148.16163203136404,
+        1.1015876034201657,
+        1.1114790318458536,
+        136.68047783885697,
+        77.72584511329579,
+        125.73692105352463,
+        106.98755729483561,
+        96.25926845246491,
+        1.109721323323522,
+        141.71073652156545,
+        130.22006710827588,
+        145.24478945746003,
+        80.67459353439743,
+        1.1033551544760267,
+        150.03177939272493,
+        154.12875534463626,
+        150.04771421074818,
+        1.1010813815407388,
+        1.1110434127990452,
+        145.385699877379,
+        86.86487551811825,
+        130.16687493633253,
+        143.8726181331947,
+        111.91340621077623,
+        146.0394914387852,
+        1.1006353022455784,
+        134.47903589563677,
+        148.6907436994389,
+        102.87151097507036,
+        137.41724911494663,
+        1.1146766644704549,
+        143.85952373403495,
+        146.92280951248307,
+        1.100156488603178,
+        144.04783334738536,
+        148.53630346113712,
+        58.74848466983248,
+        147.0485685726298,
+        141.32891699761203,
+        142.8441702922343,
+        131.04366253726744,
+        128.6305301075303,
+        1.1106412111686195,
+        147.90025888582002,
+        0.8959265584913588,
+        149.5194069726666,
+        137.43649451567626,
+        1.1068068376551545,
+        68.05269425995475,
+        138.94056631255367,
+        138.43818227469507,
+        69.60391199895408,
+        114.83395091462887,
+        151.34107787433956,
+        141.57237630997332,
+        146.07433910500515,
+        9.941778754980154,
+        131.297822968639,
+        10.386636719874664,
+        10.545636067043365,
+        114.58677137445733,
+        75.28902943071078,
+        90.63452059810655,
+        143.58694736923238,
+        9.901118804514459,
+        144.5206530902411,
+        144.78737732574044,
+        79.81136215142409,
+        84.9314508821071,
+        120.18939827456474,
+        10.225253542151219,
+        9.702822548173124,
+        103.1188517219872,
+        138.5008491242522,
+        92.02238700298246,
+        151.99592340131602,
+        9.807595290716304,
+        150.0447954775559,
+        134.2614008494909,
+        149.38544573345007,
+        149.62298116309924,
+        124.32358754465251,
+        132.817456221544,
+        10.50607995390264,
+        9.78317681034783,
+        151.07916494121415,
+        146.93545537009487,
+        118.45851163082196,
+        145.03008316360754,
+        154.4449202186591,
+        146.86002069809945,
+        150.6932855951215,
+        110.74803327496042,
+        127.40788523389726,
+        150.81323854197058,
+        150.0047673310006,
+        149.6063654551971,
+        133.87244996538675,
+        10.329695475492791,
+        9.414695716712222,
+        106.77032789813472,
+        118.34636653947105,
+        123.44441062862572,
+        144.9015592115516,
+        153.74652990582067,
+        10.065713405335144,
+        129.38998560194165,
+        117.69087049838025,
+        99.15650839997046,
+        127.90462338199198,
+        147.3574863739125,
+        9.696544883885949,
+        9.8853852911422,
+        128.35872796896587,
+        145.2939860705264,
+        128.72081963712404,
+        94.09935653689803,
+        142.8780531031409,
+        130.5213122981276,
+        126.89288883528536,
+        153.36107852781166,
+        149.17239657923582,
+        9.177632630803961,
+        9.387171298727486,
+        109.68196882316985,
+        148.55536204011432,
+        152.61730207818772,
+        9.648922236946333,
+        132.805446535875,
+        138.74295200738652,
+        141.66118217831166,
+        124.0399127789103,
+        113.05005278683446,
+        149.71230902297984,
+        25.727698431920004,
+        129.56419655827216,
+        130.40687823665095,
+        128.46470366050013,
+        150.46298369674685,
+        9.22073843893938,
+        110.36443029340542,
+        148.23878821929193,
+        10.219508495480236,
+        9.615051521185155,
+        9.8723813087942,
+        149.91378148843256,
+        9.149056684599877,
+        130.37704092008303,
+        114.86611671621016,
+        134.53633480709703,
+        131.11593468604048,
+        149.74665952988033,
+        136.60701891253495,
+        146.50864617645632,
+        9.094221140419737,
+        149.69902295915708,
+        126.93245475406366,
+        141.2463933703881,
+        10.18172163650932,
+        136.76582155059438,
+        155.5823388453975,
+        144.68082947663285,
+        142.0128061769988,
+        116.20800508912414,
+        101.13756407758095,
+        10.050927550768915,
+        10.14139856150474,
+        9.573219645146107,
+        146.33874064646594,
+        137.22302119976462,
+        132.14965518046,
+        148.08190796641483,
+        117.6843964457568,
+        153.04352772565807,
+        146.79238076404926,
+        9.522740968586977,
+        145.93484469600287,
+        13.925952420322696,
+        12.697420287309185,
+        146.39122941822845,
+        113.94298610788566,
+        13.844109957456581,
+        154.57922917096633,
+        13.525210269101805,
+        103.83976095796662,
+        97.75660804271413,
+        135.83818209343426,
+        158.60060111529293,
+        111.57793188874757,
+        13.768524263105455,
+        154.2203592546867,
+        108.85242762118563,
+        111.15752259030245,
+        149.5942138872604,
+        119.77102605185765,
+        120.68065341205389,
+        105.29698904913548,
+        151.41465167808087,
+        138.90606724001483,
+        13.437371194424983,
+        119.97194649055415,
+        144.6223725248399,
+        146.9934910169238,
+        149.45319992777343,
+        121.48260402443249,
+        13.662736071688842,
+        14.448955892498802,
+        144.5545360346381,
+        154.00382983055897,
+        151.8635735223181,
+        137.2321484611102,
+        119.71487519948164,
+        88.24978714231261,
+        147.74815341218743,
+        142.1113258863455,
+        132.08775922189477,
+        124.63351274554526,
+        145.72256212355262,
+        100.50708502243579,
+        139.16363846809003,
+        114.82662827063822,
+        154.78307253831395,
+        149.22879563842886,
+        152.6744734255461,
+        145.81022434241217,
+        152.68018782123758,
+        116.75549006136289,
+        12.968595875688791,
+        6.824624970615158,
+        125.05116103474757,
+        147.66072487793718,
+        147.5735120742967,
+        139.1302141298083,
+        146.48542990069834,
+        12.674865288395944,
+        147.88858853602966,
+        6.8124480142416175,
+        137.54766974463703,
+        130.89979405333307,
+        13.364169845161861,
+        14.116086127002273,
+        130.3002929300388,
+        116.98398239487472,
+        152.70827610346095,
+        98.51470626500011,
+        135.1252373635164,
+        14.405992358855888,
+        154.13709739001223,
+        146.28661687368685,
+        137.87827066214206,
+        12.621081453489012,
+        154.04574874294514,
+        6.802625211185703,
+        152.18661864386252,
+        149.30257880598677,
+        13.244501725269068,
+        138.34068638798834,
+        150.95140747506372,
+        141.8441899037163,
+        152.99022366652198,
+        103.95004802425926,
+        140.28144756248412,
+        154.51222806007945,
+        85.40777548962518,
+        154.7067128296305,
+        120.47843952303268,
+        12.568053995018431,
+        12.916583075889136,
+        105.92477484543576,
+        137.92878859711615,
+        135.13853669037294,
+        137.88549737290148,
+        157.83019925734393,
+        145.48927689323145,
+        12.509532718065461,
+        150.6233829715981,
+        119.23669844460764,
+        138.49099023171033,
+        154.0870149904812,
+        140.1862744667834,
+        148.860174031694,
+        147.54629689336036,
+        12.448861769003683,
+        152.4711466483636,
+        102.47079224461186,
+        152.40864885890767,
+        156.21773232766026,
+        13.139291580904986,
+        150.30653960489693,
+        145.43571147072188,
+        132.8965387342577,
+        144.85972103961666,
+        125.5438694385711,
+        158.07457773478276,
+        14.359506122440205,
+        137.7658155977229,
+        153.68125116011197,
+        156.57780724945528,
+        12.394708947912125,
+        12.874702780202174,
+        110.61518572692995,
+        149.4338565730422,
+        149.67552030435513,
+        146.20909415912828,
+        9.308833539527914,
+        26.176147260970783,
+        8.701217384742513,
+        66.92241449340185,
+        105.12940849136734,
+        145.25326276553395,
+        139.68219350261262,
+        131.60335890332783,
+        150.53420884400245,
+        17.552483447968918,
+        99.60476667168517,
+        9.003208512207522,
+        8.539560747895454,
+        9.946172723540226,
+        150.55644446784382,
+        9.608936841972842,
+        104.80864366760326,
+        25.95068644438624,
+        99.42592550150236,
+        108.35979254469888,
+        113.9171427720856,
+        9.905905876631499,
+        131.1684982861573,
+        154.7989292174601,
+        151.34753888952145,
+        150.11816141981262,
+        143.00557828542912,
+        126.2310299151925,
+        113.53830001728545,
+        148.13405630794878,
+        150.7564429392251,
+        155.252325076404,
+        18.20048176554747,
+        25.725436761645142,
+        8.678711562613207,
+        143.3683328827327,
+        127.0294451168928,
+        137.50119476282134,
+        10.068367539846923,
+        155.64822784014916,
+        153.2789382926615,
+        25.46950813818654,
+        142.9138107220956,
+        155.10510899417167,
+        107.40557834412083,
+        9.871948602847068,
+        144.4712732194919,
+        140.17802930301565,
+        9.286026243902361,
+        129.1488895575147,
+        124.35586045151207,
+        140.1410811550992,
+        96.63692877337894,
+        153.62093095799207,
+        156.05800033315097,
+        9.587609950939838,
+        140.09721428165886,
+        134.898750425008,
+        8.652809034763463,
+        8.989448046931262,
+        107.64260577858933,
+        9.825071080298192,
+        150.6237132142087,
+        143.76058852986372,
+        154.01627264735168,
+        140.85322298632985,
+        143.63714834446708,
+        149.7259575806535,
+        8.53942846683121,
+        157.02635815805976,
+        150.83913162907433,
+        154.0283691261865,
+        9.246842209481716,
+        154.5851361854829,
+        133.4662155767381,
+        137.55396410787307,
+        105.77910782321499,
+        148.97953057255376,
+        111.3041581371634,
+        9.543858351726714,
+        142.71996301994741,
+        144.2417836324451,
+        148.5293262803374,
+        8.95331376662564,
+        105.2724164655814,
+        149.16646109060707,
+        151.1947852118465,
+        9.503293907683512,
+        133.40055362812345,
+        8.776394391795916,
+        148.3675722527084,
+        154.66946641450528,
+        122.71674068416665,
+        149.62192317697068,
+        153.40159484208397,
+        9.46860898864519,
+        146.10526710538994,
+        143.96020057925128,
+        8.62472208077336,
+        8.906885562515198,
+        105.7754218686014,
+        150.17957794387223,
+        144.0451331512576,
+        149.95461039551162,
+        151.46311089131117,
+        142.22104279807664,
+        147.3679944003333,
+        140.5394711174869,
+        123.62157744638432,
+        152.32796921399395,
+        156.6603241829257,
+        9.43621164630811,
+        158.2241383954169,
+        149.33346139426692,
+        144.12074054746773,
+        143.1977521817863,
+        8.536662624511228,
+        9.785635570067782,
+        147.61880087321424,
+        9.402323265876474,
+        159.1161790596516,
+        146.56796834276156,
+        147.64890403285438,
+        157.70847517328534,
+        114.64282143770687,
+        148.5000942425868,
+        10.052761003641129,
+        147.38801074409378
+    ]
+}
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_dp8_583m_throughputtest_zmq/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_dp8_583m_throughputtest_zmq/model_config.yaml
new file mode 100644
index 00000000000..2d65c154a0e
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_dp8_583m_throughputtest_zmq/model_config.yaml
@@ -0,0 +1,59 @@
+ENV_VARS:
+  CUDA_DEVICE_MAX_CONNECTIONS: 1
+  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
+  NCCL_ALGO: Ring
+  CUBLAS_WORKSPACE_CONFIG: :4096:8
+TEST_TYPE: frozen-start
+MODE: inference
+MODEL_ARGS:
+  --tiktoken-pattern: v2
+  --use-mcore-models: true
+  --tokenizer-type: TikTokenizer
+  --tokenizer-model: ${CHECKPOINT_LOAD_PATH}/model/mcore_mistral/nemo_minitron-0.5b/v1/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json
+  --auto-detect-ckpt-format: true
+  --max-tokens-to-oom: 3600000
+  --inference-max-seq-length: 4096
+  --attention-backend: flash
+  --use-checkpoint-args: true
+  --micro-batch-size: 1
+  --no-load-optim: true
+  --no-use-tokenizer-model-from-checkpoint-args: true
+  --timing-log-level: 0
+  --load: ${CHECKPOINT_LOAD_PATH}/model/mcore_mistral/nemo_minitron-0.5b/v1/
+  --distributed-backend: nccl
+  --log-interval: 1
+  --transformer-impl: transformer_engine
+  --tensor-model-parallel-size: 1
+  --pipeline-model-parallel-size: 1
+  --ckpt-format: torch_dist
+  --bf16: true
+  --log-memory-to-tensorboard: true
+  --log-num-zeros-in-grad: true
+  --log-validation-ppl-to-tensorboard: true
+  --log-timers-to-tensorboard: true
+  --num-layers: 24
+  --hidden-size: 1152
+  --num-attention-heads: 16
+  --max-position-embeddings: 1024
+  --seq-length: 1024
+  --temperature: 1.0
+  --top_k: 1
+  --seed: 42
+  --return-log-probs: true
+  --num-tokens-from-file: true
+  --inference-dynamic-batching-buffer-size-gb: 20
+  --cuda-graph-impl: local
+  --cuda-graph-scope: full_iteration
+  --disable-chunked-prefill: true
+  --dist-ckpt-strictness: log_unexpected
+  --inference-ckpt-non-strict: true # To handle the extra_state errors
+  --output-path: ${TENSORBOARD_PATH}
+  --output-every-n-results: 32
+  --prompt-file: ${DATA_PATH}/text/sharegpt-vicuna/filtered/processed.jsonl
+  --prompt-file-num-truncate: 1024
+  --incoming-requests-per-step: 128
+  --use-flashinfer-fused-rope: true
+  --throughput-check-only: true
+METRICS:
+  - "generated_tokens"
+  - "logprobs"
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp8_dp1_583m_logitsmatch_zmq/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp8_dp1_583m_logitsmatch_zmq/golden_values_dev_dgx_h100.json
new file mode 100644
index 00000000000..07adf271434
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp8_dp1_583m_logitsmatch_zmq/golden_values_dev_dgx_h100.json
@@ -0,0 +1,158 @@
+{
+ "0": {
+  "input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.",
+  "generated_text": " And that this is the place where you can be yourself, and be yourself in the most beautiful way. And that this is the place where you can",
+  "generated_tokens": [
+   3060,
+   1455,
+   1593,
+   1395,
+   1278,
+   3535,
+   2478,
+   1636,
+   1710,
+   1402,
+   14019,
+   1044,
+   1321,
+   1402,
+   14019,
+   1294,
+   1278,
+   2725,
+   15568,
+   3039,
+   1046,
+   3060,
+   1455,
+   1593,
+   1395,
+   1278,
+   3535,
+   2478,
+   1636,
+   1710
+  ],
+  "latency": 2.020272731781006,
+  "logprobs": [
+   -9.358587265014648,
+   -2.7594826221466064,
+   -4.608366012573242,
+   -1.4093360900878906,
+   -0.6152952313423157,
+   -1.7217562198638916,
+   -2.496668815612793,
+   -2.0547454357147217,
+   -2.441960573196411,
+   -6.280838966369629,
+   -1.5643692016601562,
+   -3.462346076965332,
+   -4.428728103637695,
+   -3.8633861541748047,
+   -1.9936373233795166,
+   -1.8929449319839478,
+   -3.796365737915039,
+   -6.8360137939453125,
+   -0.2901247441768646,
+   -0.9246833324432373,
+   -6.633338928222656,
+   -7.166708469390869,
+   -12.771251678466797,
+   -2.198296308517456,
+   -3.7778120040893555,
+   -0.4983733296394348,
+   -4.381269454956055,
+   -0.0666784718632698,
+   -0.09580295532941818,
+   -3.2437636852264404,
+   -10.079947471618652,
+   -1.172220230102539,
+   -5.977442741394043,
+   -5.046236038208008,
+   -3.855658531188965,
+   -2.5585858821868896,
+   -3.356245994567871,
+   -5.557229518890381,
+   -1.6787731647491455,
+   -5.483290672302246,
+   -12.218501091003418,
+   -12.61402702331543,
+   -0.09662941098213196,
+   -2.5431432723999023,
+   -1.4071024656295776,
+   -2.9154715538024902,
+   -1.1964417695999146,
+   -0.006458481773734093,
+   -3.3625335693359375,
+   -13.262511253356934,
+   -4.314079761505127,
+   -2.617699146270752,
+   -5.987792015075684,
+   -0.778266429901123,
+   -0.048888545483350754,
+   -1.548882007598877,
+   -1.1381981372833252,
+   -5.627166748046875,
+   -0.4078553318977356,
+   -4.958505630493164,
+   -0.6187160611152649,
+   -0.7174848914146423,
+   -2.469533920288086,
+   -13.620073318481445,
+   -0.09088654816150665,
+   -3.526974678039551,
+   -1.4195809364318848,
+   -6.402483940124512,
+   -0.5898402333259583,
+   -3.565917491912842,
+   -0.8561318516731262,
+   -1.6140165328979492,
+   -5.370549201965332,
+   -17.159223556518555,
+   -6.583524703979492,
+   -0.8855001926422119,
+   -4.19431209564209,
+   -1.2012220621109009,
+   -2.2563133239746094,
+   -1.7674944400787354,
+   -0.22064533829689026,
+   -9.292220115661621,
+   -0.12445646524429321,
+   -7.29617977142334,
+   -2.526529312133789,
+   -4.071560859680176,
+   -3.5568013191223145,
+   -1.926215410232544,
+   -2.349026918411255,
+   -2.2132363319396973,
+   -0.3125414550304413,
+   -1.4718132019042969,
+   -2.149106740951538,
+   -1.0855519771575928,
+   -1.631832242012024,
+   -1.3751734495162964,
+   -1.9396103620529175,
+   -1.5293723344802856,
+   -0.8444125056266785,
+   -1.2414811849594116,
+   -1.9522171020507812,
+   -2.4338042736053467,
+   -1.5651824474334717,
+   -0.9498789310455322,
+   -1.8044980764389038,
+   -2.356677770614624,
+   -1.247452974319458,
+   -1.550165057182312,
+   -0.5635553598403931,
+   -0.6177330017089844,
+   -0.4778785705566406,
+   -0.020452087745070457,
+   -0.48500269651412964,
+   -0.23854275047779083,
+   -0.06543659418821335,
+   -0.11837350577116013,
+   -0.0585334412753582
+  ]
+ }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp8_dp1_583m_logitsmatch_zmq/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp8_dp1_583m_logitsmatch_zmq/model_config.yaml
new file mode 100644
index 00000000000..96d3fd0fc0c
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp8_dp1_583m_logitsmatch_zmq/model_config.yaml
@@ -0,0 +1,58 @@
+ENV_VARS:
+  CUDA_DEVICE_MAX_CONNECTIONS: 1
+  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
+  NCCL_ALGO: Ring
+  CUBLAS_WORKSPACE_CONFIG: :4096:8
+TEST_TYPE: frozen-start
+MODE: inference
+MODEL_ARGS:
+  --tiktoken-pattern: v2
+  --use-mcore-models: true
+  --tokenizer-type: TikTokenizer
+  --tokenizer-model: ${CHECKPOINT_LOAD_PATH}/model/mcore_mistral/nemo_minitron-0.5b/v1/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json
+  --auto-detect-ckpt-format: true
+  --max-tokens-to-oom: 3600000
+  --inference-max-seq-length: 4096
+  --attention-backend: flash
+  --use-checkpoint-args: true
+  --micro-batch-size: 1
+  --no-load-optim: true
+  --no-use-tokenizer-model-from-checkpoint-args: true
+  --timing-log-level: 0
+  --load: ${CHECKPOINT_LOAD_PATH}/model/mcore_mistral/nemo_minitron-0.5b/v1/
+  --distributed-backend: nccl
+  --log-interval: 1
+  --transformer-impl: inference_optimized
+  --sequence-parallel: true
+  --tensor-model-parallel-size: 1
+  --pipeline-model-parallel-size: 8
+  --deterministic-mode: true
+  --ckpt-format: torch_dist
+  --bf16: true
+  --log-memory-to-tensorboard: true
+  --log-num-zeros-in-grad: true
+  --log-validation-ppl-to-tensorboard: true
+  --log-timers-to-tensorboard: true
+  --num-layers: 24
+  --hidden-size: 1152
+  --num-attention-heads: 16
+  --max-position-embeddings: 1024
+  --seq-length: 1024
+  --temperature: 1.0
+  --top_k: 1
+  --return-log-probs: true
+  --num-tokens-to-generate: 30
+  --inference-dynamic-batching-max-requests-override: 8 # hardcode decode padding tokens to 7 for reproducibility
+  --inference-dynamic-batching-buffer-guaranteed-fraction: 0
+  --inference-dynamic-batching-buffer-overflow-factor: 0.2
+  --inference-dynamic-batching-buffer-size-gb: 20
+  --dist-ckpt-strictness: log_unexpected
+  --inference-ckpt-non-strict: true # To handle the extra_state errors
+  --output-path: ${TENSORBOARD_PATH}
+  --prompts: "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies."
+  --incoming-requests-per-step: 32
+  --use-flashinfer-fused-rope: true
+
+METRICS:
+  - "generated_tokens"
+  - "logprobs"
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp2_pp2_dp2_583m_logitsmatch_zmq/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp2_pp2_dp2_583m_logitsmatch_zmq/golden_values_dev_dgx_h100.json
new file mode 100644
index 00000000000..55d6955055a
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp2_pp2_dp2_583m_logitsmatch_zmq/golden_values_dev_dgx_h100.json
@@ -0,0 +1,158 @@
+{
+    "0": {
+        "input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.",
+        "generated_text": " And then you get to the end of the movie, and you realize that this is not New York at all. This is New York at the end",
+        "generated_tokens": [
+            3060,
+            2430,
+            1636,
+            2012,
+            1317,
+            1278,
+            2362,
+            1307,
+            1278,
+            16070,
+            1044,
+            1321,
+            1636,
+            23067,
+            1455,
+            1593,
+            1395,
+            1605,
+            3140,
+            5152,
+            1513,
+            1747,
+            1046,
+            2409,
+            1395,
+            3140,
+            5152,
+            1513,
+            1278,
+            2362
+        ],
+        "latency": 44.73653959017247,
+        "logprobs": [
+            -9.358970642089844,
+            -2.7523813247680664,
+            -4.628502368927002,
+            -1.4058877229690552,
+            -0.6050865054130554,
+            -1.7354254722595215,
+            -2.4828507900238037,
+            -2.0520384311676025,
+            -2.4089853763580322,
+            -6.2649126052856445,
+            -1.5644135475158691,
+            -3.4096615314483643,
+            -4.358163833618164,
+            -3.866471767425537,
+            -2.0575876235961914,
+            -1.904883623123169,
+            -3.7622976303100586,
+            -6.835415363311768,
+            -0.2829523980617523,
+            -0.9827429056167603,
+            -6.655940055847168,
+            -7.188957214355469,
+            -12.757233619689941,
+            -2.1933951377868652,
+            -3.808887481689453,
+            -0.515199601650238,
+            -4.323916912078857,
+            -0.067625492811203,
+            -0.09976530075073242,
+            -3.228640556335449,
+            -10.129311561584473,
+            -1.1787357330322266,
+            -5.97692346572876,
+            -5.036575794219971,
+            -3.8267176151275635,
+            -2.6010468006134033,
+            -3.366438865661621,
+            -5.553505897521973,
+            -1.6046268939971924,
+            -5.442874908447266,
+            -12.218503952026367,
+            -12.597894668579102,
+            -0.0976092740893364,
+            -2.530579090118408,
+            -1.4139617681503296,
+            -2.8606526851654053,
+            -1.1690009832382202,
+            -0.0066696410067379475,
+            -3.361189365386963,
+            -13.191482543945312,
+            -4.413737773895264,
+            -2.639688491821289,
+            -6.0114641189575195,
+            -0.7672993540763855,
+            -0.047326065599918365,
+            -1.550362467765808,
+            -1.137772798538208,
+            -5.627618789672852,
+            -0.40103790163993835,
+            -4.908735275268555,
+            -0.5704602599143982,
+            -0.6625558733940125,
+            -2.364135503768921,
+            -13.609526634216309,
+            -0.08865148574113846,
+            -3.5251970291137695,
+            -1.3791766166687012,
+            -6.395696640014648,
+            -0.588782787322998,
+            -3.566770076751709,
+            -0.8742034435272217,
+            -1.5827170610427856,
+            -5.3912353515625,
+            -17.150842666625977,
+            -6.6234588623046875,
+            -0.885993242263794,
+            -4.162992477416992,
+            -1.1942744255065918,
+            -2.281689405441284,
+            -1.7708709239959717,
+            -0.22030864655971527,
+            -9.292593955993652,
+            -0.1258234828710556,
+            -7.346449851989746,
+            -2.5470826625823975,
+            -4.115433692932129,
+            -3.5646262168884277,
+            -1.9410749673843384,
+            -2.3247878551483154,
+            -1.523364543914795,
+            -2.360647678375244,
+            -1.708706021308899,
+            -1.131014108657837,
+            -2.944424867630005,
+            -0.5273782014846802,
+            -0.44912564754486084,
+            -1.753378987312317,
+            -0.8341047167778015,
+            -0.4124295711517334,
+            -0.9006240367889404,
+            -1.4890273809432983,
+            -0.4379286766052246,
+            -1.6497018337249756,
+            -0.5444425344467163,
+            -1.2305881977081299,
+            -1.164027214050293,
+            -0.002498721005395055,
+            -1.165798544883728,
+            -0.007112303748726845,
+            -0.718407154083252,
+            -0.7442683577537537,
+            -0.04299728572368622,
+            -0.8688321113586426,
+            -0.021008115261793137,
+            -2.033963680267334,
+            -1.2936673164367676,
+            -0.78721684217453
+        ]
+    }
+}
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp2_pp2_dp2_583m_logitsmatch_zmq/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp2_pp2_dp2_583m_logitsmatch_zmq/model_config.yaml
new file mode 100644
index 00000000000..306c12bd653
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp2_pp2_dp2_583m_logitsmatch_zmq/model_config.yaml
@@ -0,0 +1,58 @@
+ENV_VARS:
+  CUDA_DEVICE_MAX_CONNECTIONS: 1
+  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
+  NCCL_ALGO: Ring
+  CUBLAS_WORKSPACE_CONFIG: :4096:8
+TEST_TYPE: frozen-start
+MODE: inference
+MODEL_ARGS:
+  --tiktoken-pattern: v2
+  --use-mcore-models: true
+  --tokenizer-type: TikTokenizer
+  --tokenizer-model: ${CHECKPOINT_LOAD_PATH}/model/mcore_mistral/nemo_minitron-0.5b/v1/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json
+  --auto-detect-ckpt-format: true
+  --max-tokens-to-oom: 3600000
+  --inference-max-seq-length: 4096
+  --attention-backend: flash
+  --use-checkpoint-args: true
+  --micro-batch-size: 1
+  --no-load-optim: true
+  --no-use-tokenizer-model-from-checkpoint-args: true
+  --timing-log-level: 0
+  --load: ${CHECKPOINT_LOAD_PATH}/model/mcore_mistral/nemo_minitron-0.5b/v1/
+  --distributed-backend: nccl
+  --log-interval: 1
+  --transformer-impl: inference_optimized
+  --sequence-parallel: true
+  --tensor-model-parallel-size: 2
+  --pipeline-model-parallel-size: 2
+  --deterministic-mode: true
+  --ckpt-format: torch_dist
+  --bf16: true
+  --log-memory-to-tensorboard: true
+  --log-num-zeros-in-grad: true
+  --log-validation-ppl-to-tensorboard: true
+  --log-timers-to-tensorboard: true
+  --num-layers: 24
+  --hidden-size: 1152
+  --num-attention-heads: 16
+  --max-position-embeddings: 1024
+  --seq-length: 1024
+  --temperature: 1.0
+  --top_k: 1
+  --return-log-probs: true
+  --num-tokens-to-generate: 30
+  --inference-dynamic-batching-max-requests-override: 8 # hardcode decode padding tokens to 7 for reproducibility
+  --inference-dynamic-batching-buffer-guaranteed-fraction: 0
+  --inference-dynamic-batching-buffer-overflow-factor: 0.2
+  --inference-dynamic-batching-buffer-size-gb: 20
+  --dist-ckpt-strictness: log_unexpected
+  --inference-ckpt-non-strict: true # To handle the extra_state errors
+  --output-path: ${TENSORBOARD_PATH}
+  --prompts: "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies."
+  --incoming-requests-per-step: 32
+  --use-flashinfer-fused-rope: true
+
+METRICS:
+  - "generated_tokens"
+  - "logprobs"
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_583m_logitsmatch/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_583m_logitsmatch/golden_values_dev_dgx_h100.json
index 6ef98105cbd..f32580e937f 100644
--- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_583m_logitsmatch/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_583m_logitsmatch/golden_values_dev_dgx_h100.json
@@ -157,5 +157,5 @@
             -0.0585334412753582
         ]
     },
-    "throughput": [13.93210545115292, 13.93210545115292]
-}
\ No newline at end of file
+    "throughput": [12.319796866345767, 12.319796866345767]
+}
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_583m_logitsmatch/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_583m_logitsmatch/model_config.yaml
index 59186f8d532..e6b659cf46f 100644
--- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_583m_logitsmatch/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_583m_logitsmatch/model_config.yaml
@@ -41,10 +41,7 @@ MODEL_ARGS:
   --top_k: 1
   --return-log-probs: true
   --num-tokens-to-generate: 30
-  --inference-dynamic-batching-max-requests-override: 8 # hardcode decode padding tokens to 7 for reproducibility
-  --inference-dynamic-batching-buffer-guaranteed-fraction: 0
-  --inference-dynamic-batching-buffer-overflow-factor: 0.2
-  --inference-dynamic-batching-buffer-size-gb: 20
+  --inference-dynamic-batching-buffer-size-gb: 10
   --dist-ckpt-strictness: log_unexpected
   --inference-ckpt-non-strict: true # To handle the extra_state errors
   --output-path: ${TENSORBOARD_PATH}
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_dp1_583m_logitsmatch_zmq/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_dp1_583m_logitsmatch_zmq/golden_values_dev_dgx_h100.json
index 07adf271434..4ebaf72f5e7 100644
--- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_dp1_583m_logitsmatch_zmq/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_dp1_583m_logitsmatch_zmq/golden_values_dev_dgx_h100.json
@@ -1,158 +1,158 @@
 {
- "0": {
-  "input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.",
-  "generated_text": " And that this is the place where you can be yourself, and be yourself in the most beautiful way. And that this is the place where you can",
-  "generated_tokens": [
-   3060,
-   1455,
-   1593,
-   1395,
-   1278,
-   3535,
-   2478,
-   1636,
-   1710,
-   1402,
-   14019,
-   1044,
-   1321,
-   1402,
-   14019,
-   1294,
-   1278,
-   2725,
-   15568,
-   3039,
-   1046,
-   3060,
-   1455,
-   1593,
-   1395,
-   1278,
-   3535,
-   2478,
-   1636,
-   1710
-  ],
-  "latency": 2.020272731781006,
-  "logprobs": [
-   -9.358587265014648,
-   -2.7594826221466064,
-   -4.608366012573242,
-   -1.4093360900878906,
-   -0.6152952313423157,
-   -1.7217562198638916,
-   -2.496668815612793,
-   -2.0547454357147217,
-   -2.441960573196411,
-   -6.280838966369629,
-   -1.5643692016601562,
-   -3.462346076965332,
-   -4.428728103637695,
-   -3.8633861541748047,
-   -1.9936373233795166,
-   -1.8929449319839478,
-   -3.796365737915039,
-   -6.8360137939453125,
-   -0.2901247441768646,
-   -0.9246833324432373,
-   -6.633338928222656,
-   -7.166708469390869,
-   -12.771251678466797,
-   -2.198296308517456,
-   -3.7778120040893555,
-   -0.4983733296394348,
-   -4.381269454956055,
-   -0.0666784718632698,
-   -0.09580295532941818,
-   -3.2437636852264404,
-   -10.079947471618652,
-   -1.172220230102539,
-   -5.977442741394043,
-   -5.046236038208008,
-   -3.855658531188965,
-   -2.5585858821868896,
-   -3.356245994567871,
-   -5.557229518890381,
-   -1.6787731647491455,
-   -5.483290672302246,
-   -12.218501091003418,
-   -12.61402702331543,
-   -0.09662941098213196,
-   -2.5431432723999023,
-   -1.4071024656295776,
-   -2.9154715538024902,
-   -1.1964417695999146,
-   -0.006458481773734093,
-   -3.3625335693359375,
-   -13.262511253356934,
-   -4.314079761505127,
-   -2.617699146270752,
-   -5.987792015075684,
-   -0.778266429901123,
-   -0.048888545483350754,
-   -1.548882007598877,
-   -1.1381981372833252,
-   -5.627166748046875,
-   -0.4078553318977356,
-   -4.958505630493164,
-   -0.6187160611152649,
-   -0.7174848914146423,
-   -2.469533920288086,
-   -13.620073318481445,
-   -0.09088654816150665,
-   -3.526974678039551,
-   -1.4195809364318848,
-   -6.402483940124512,
-   -0.5898402333259583,
-   -3.565917491912842,
-   -0.8561318516731262,
-   -1.6140165328979492,
-   -5.370549201965332,
-   -17.159223556518555,
-   -6.583524703979492,
-   -0.8855001926422119,
-   -4.19431209564209,
-   -1.2012220621109009,
-   -2.2563133239746094,
-   -1.7674944400787354,
-   -0.22064533829689026,
-   -9.292220115661621,
-   -0.12445646524429321,
-   -7.29617977142334,
-   -2.526529312133789,
-   -4.071560859680176,
-   -3.5568013191223145,
-   -1.926215410232544,
-   -2.349026918411255,
-   -2.2132363319396973,
-   -0.3125414550304413,
-   -1.4718132019042969,
-   -2.149106740951538,
-   -1.0855519771575928,
-   -1.631832242012024,
-   -1.3751734495162964,
-   -1.9396103620529175,
-   -1.5293723344802856,
-   -0.8444125056266785,
-   -1.2414811849594116,
-   -1.9522171020507812,
-   -2.4338042736053467,
-   -1.5651824474334717,
-   -0.9498789310455322,
-   -1.8044980764389038,
-   -2.356677770614624,
-   -1.247452974319458,
-   -1.550165057182312,
-   -0.5635553598403931,
-   -0.6177330017089844,
-   -0.4778785705566406,
-   -0.020452087745070457,
-   -0.48500269651412964,
-   -0.23854275047779083,
-   -0.06543659418821335,
-   -0.11837350577116013,
-   -0.0585334412753582
-  ]
- }
-}
\ No newline at end of file
+    "0": {
+        "input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.",
+        "generated_text": " And then you get to the end of the movie, and you realize that this is not New York at all. This is New York at the end",
+        "generated_tokens": [
+            3060,
+            2430,
+            1636,
+            2012,
+            1317,
+            1278,
+            2362,
+            1307,
+            1278,
+            16070,
+            1044,
+            1321,
+            1636,
+            23067,
+            1455,
+            1593,
+            1395,
+            1605,
+            3140,
+            5152,
+            1513,
+            1747,
+            1046,
+            2409,
+            1395,
+            3140,
+            5152,
+            1513,
+            1278,
+            2362
+        ],
+        "latency": 42.63835311005823,
+        "logprobs": [
+            -9.358713150024414,
+            -2.724055767059326,
+            -4.5792131423950195,
+            -1.4844143390655518,
+            -0.6546129584312439,
+            -1.7303215265274048,
+            -2.4795279502868652,
+            -2.0776171684265137,
+            -2.4553134441375732,
+            -6.219150066375732,
+            -1.566371202468872,
+            -3.486889362335205,
+            -4.418787479400635,
+            -3.8580172061920166,
+            -2.0664010047912598,
+            -1.843908667564392,
+            -3.744598627090454,
+            -6.82543420791626,
+            -0.2880207300186157,
+            -0.9257857799530029,
+            -6.612694263458252,
+            -7.218401908874512,
+            -12.827808380126953,
+            -2.1861495971679688,
+            -3.8218231201171875,
+            -0.5008565187454224,
+            -4.383245468139648,
+            -0.06934759020805359,
+            -0.09667497128248215,
+            -3.2640299797058105,
+            -10.102912902832031,
+            -1.1498218774795532,
+            -5.979549407958984,
+            -5.0192108154296875,
+            -3.8367133140563965,
+            -2.581653356552124,
+            -3.4087462425231934,
+            -5.545716285705566,
+            -1.6541939973831177,
+            -5.547749996185303,
+            -12.21850872039795,
+            -12.582784652709961,
+            -0.09534379839897156,
+            -2.522055149078369,
+            -1.4054086208343506,
+            -2.8758127689361572,
+            -1.1866405010223389,
+            -0.005799253936856985,
+            -3.3871712684631348,
+            -13.193516731262207,
+            -4.389392852783203,
+            -2.520228862762451,
+            -6.023908615112305,
+            -0.7408540844917297,
+            -0.04526234790682793,
+            -1.5508661270141602,
+            -1.1332746744155884,
+            -5.653256416320801,
+            -0.4028852581977844,
+            -4.9457244873046875,
+            -0.618165135383606,
+            -0.6616490483283997,
+            -2.36385178565979,
+            -13.6455078125,
+            -0.08668932318687439,
+            -3.5266754627227783,
+            -1.3801541328430176,
+            -6.351947784423828,
+            -0.5434023141860962,
+            -3.5673093795776367,
+            -0.871107816696167,
+            -1.618450403213501,
+            -5.378700256347656,
+            -17.17119026184082,
+            -6.662005424499512,
+            -0.9221409559249878,
+            -4.141905784606934,
+            -1.2047083377838135,
+            -2.227570056915283,
+            -1.7645721435546875,
+            -0.21892313659191132,
+            -9.296550750732422,
+            -0.11995092779397964,
+            -7.402207851409912,
+            -2.512965679168701,
+            -4.100971221923828,
+            -3.580245018005371,
+            -1.9462040662765503,
+            -2.347074031829834,
+            -1.5288957357406616,
+            -2.4033043384552,
+            -1.7311294078826904,
+            -1.1686863899230957,
+            -2.938558340072632,
+            -0.5278136730194092,
+            -0.4748117923736572,
+            -1.749883770942688,
+            -0.8397680521011353,
+            -0.4109693169593811,
+            -0.9552587270736694,
+            -1.5238327980041504,
+            -0.4656376838684082,
+            -1.6448218822479248,
+            -0.5414345264434814,
+            -1.2422380447387695,
+            -1.1426063776016235,
+            -0.002245525596663356,
+            -1.252556562423706,
+            -0.007873333990573883,
+            -0.7185167670249939,
+            -0.7521701455116272,
+            -0.042445242404937744,
+            -0.8852499723434448,
+            -0.02266514115035534,
+            -2.0951969623565674,
+            -1.348037838935852,
+            -0.8296748399734497
+        ]
+    }
+}
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_dp1_583m_logitsmatch_zmq/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_dp1_583m_logitsmatch_zmq/model_config.yaml
index 612e621534d..551ba8115cb 100644
--- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_dp1_583m_logitsmatch_zmq/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_dp1_583m_logitsmatch_zmq/model_config.yaml
@@ -22,8 +22,9 @@ MODEL_ARGS:
   --load: ${CHECKPOINT_LOAD_PATH}/model/mcore_mistral/nemo_minitron-0.5b/v1/
   --distributed-backend: nccl
   --log-interval: 1
-  --transformer-impl: transformer_engine
-  --tensor-model-parallel-size: 1
+  --transformer-impl: inference_optimized
+  --sequence-parallel: true
+  --tensor-model-parallel-size: 8
   --pipeline-model-parallel-size: 1
   --deterministic-mode: true
   --ckpt-format: torch_dist
@@ -51,6 +52,7 @@ MODEL_ARGS:
   --prompts: "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies."
   --incoming-requests-per-step: 32
   --use-flashinfer-fused-rope: true
+
 METRICS:
   - "generated_tokens"
   - "logprobs"
diff --git a/tests/functional_tests/test_cases/hybrid/hybrid_dynamic_inference_tp1_pp1_dp8_583m/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/hybrid/hybrid_dynamic_inference_tp1_pp1_dp8_583m/golden_values_dev_dgx_h100.json
new file mode 100644
index 00000000000..dccdd34a5e7
--- /dev/null
+++ b/tests/functional_tests/test_cases/hybrid/hybrid_dynamic_inference_tp1_pp1_dp8_583m/golden_values_dev_dgx_h100.json
@@ -0,0 +1,135 @@
+{
+ "0": {
+  "input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.",
+  "generated_text": " Then, when you're ready, go home and watch the movie again.</s>",
+  "generated_tokens": [
+   6830,
+   1044,
+   2200,
+   1636,
+   6185,
+   11831,
+   1044,
+   1974,
+   4590,
+   1321,
+   9951,
+   1278,
+   16070,
+   2790,
+   1046,
+   2
+  ],
+  "latency": 22.701347589492798,
+  "cuda_graph_request_count_map": null,
+  "step_count": 16,
+  "logprobs": [
+   -9.498085021972656,
+   -3.787536859512329,
+   -3.0404648780822754,
+   -1.7445809841156006,
+   -0.29672086238861084,
+   -1.3661342859268188,
+   -2.3458175659179688,
+   -1.83931303024292,
+   -1.4894113540649414,
+   -6.440437316894531,
+   -0.8176816701889038,
+   -1.790361762046814,
+   -3.6521127223968506,
+   -3.7014482021331787,
+   -1.5858951807022095,
+   -1.5492421388626099,
+   -2.844204902648926,
+   -6.694585800170898,
+   -0.06552714854478836,
+   -1.333437204360962,
+   -6.077418327331543,
+   -9.448220252990723,
+   -10.46927261352539,
+   -1.4987666606903076,
+   -4.727880001068115,
+   -0.7596290111541748,
+   -2.152517795562744,
+   -0.013758113607764244,
+   -0.040566492825746536,
+   -3.1010313034057617,
+   -8.735280990600586,
+   -1.5446771383285522,
+   -5.841436862945557,
+   -3.0970406532287598,
+   -4.0269670486450195,
+   -3.769413948059082,
+   -2.466399669647217,
+   -2.3482255935668945,
+   -0.47234833240509033,
+   -1.114174723625183,
+   -5.310229778289795,
+   -8.236719131469727,
+   -0.015452657826244831,
+   -2.854970932006836,
+   -1.2198810577392578,
+   -3.923705577850342,
+   -0.9644856452941895,
+   -0.0026721982285380363,
+   -3.096668243408203,
+   -11.110801696777344,
+   -3.688267230987549,
+   -2.3297765254974365,
+   -4.670788764953613,
+   -0.09854680299758911,
+   -0.06234245002269745,
+   -1.3255000114440918,
+   -2.169330596923828,
+   -4.490111827850342,
+   -0.4412422776222229,
+   -3.9356117248535156,
+   -0.5775455832481384,
+   -0.2409835010766983,
+   -2.9197134971618652,
+   -13.475022315979004,
+   -0.10248012840747833,
+   -3.5023770332336426,
+   -0.8544933795928955,
+   -5.194520473480225,
+   -0.32954925298690796,
+   -2.3026833534240723,
+   -0.5346049070358276,
+   -1.2862977981567383,
+   -4.881562232971191,
+   -15.555293083190918,
+   -4.919404029846191,
+   -0.22008435428142548,
+   -6.644532680511475,
+   -0.8938115239143372,
+   -2.1304054260253906,
+   -1.8866363763809204,
+   -0.20106904208660126,
+   -5.917205810546875,
+   -0.0056310598738491535,
+   -7.453446388244629,
+   -3.1677205562591553,
+   -3.706507682800293,
+   -2.136584520339966,
+   -2.9287283420562744,
+   -1.4792609214782715,
+   -2.4399306774139404,
+   -1.2330785989761353,
+   -1.9715899229049683,
+   -1.9578948020935059,
+   -0.23143476247787476,
+   -2.052696466445923,
+   -1.0413113832473755,
+   -1.1709030866622925,
+   -2.825991630554199,
+   -1.6848523616790771,
+   -2.2008259296417236,
+   -1.5216114521026611,
+   -1.2439141273498535,
+   -1.412055253982544
+  ]
+ },
+ "throughput": [
+  13.750125804204401, 13.955213632130931
+ ]
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/hybrid/hybrid_dynamic_inference_tp1_pp1_dp8_583m/model_config.yaml b/tests/functional_tests/test_cases/hybrid/hybrid_dynamic_inference_tp1_pp1_dp8_583m/model_config.yaml
new file mode 100644
index 00000000000..4ae5c719291
--- /dev/null
+++ b/tests/functional_tests/test_cases/hybrid/hybrid_dynamic_inference_tp1_pp1_dp8_583m/model_config.yaml
@@ -0,0 +1,72 @@
+ENV_VARS:
+  CUDA_DEVICE_MAX_CONNECTIONS: 1
+  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
+  NCCL_ALGO: Ring
+  CUBLAS_WORKSPACE_CONFIG: :4096:8
+TEST_TYPE: frozen-start
+MODE: inference
+MODEL_ARGS:
+  --log-num-zeros-in-grad: true
+  --log-validation-ppl-to-tensorboard: true
+  --log-timers-to-tensorboard: true
+  --log-memory-to-tensorboard: true
+  --timing-log-level: 0
+  --load: ${CHECKPOINT_LOAD_PATH}/model/mamba_hybrid_2b/dcp/mcore-v1_bf16/checkpoint
+  --tokenizer-model: ${CHECKPOINT_LOAD_PATH}/model/mamba_hybrid_2b/dcp/mcore-v1_bf16/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json
+  --tokenizer-type: TikTokenizer
+  --tiktoken-pattern: v2
+  --distributed-backend: nccl
+  --log-interval: 1
+  --transformer-impl: transformer_engine
+  --tensor-model-parallel-size: 1
+  --pipeline-model-parallel-size: 1
+  --expert-model-parallel-size: 1
+  --use-mcore-models: true
+  --is-hybrid-model: true
+  --model-provider: mamba
+  --init-method-std: 0.0198
+  --untie-embeddings-and-output-weights: true
+  --disable-bias-linear: true
+  --init-method-std: 0.014
+  --position-embedding-type: none
+  --num-layers: 50
+  --hidden-size: 2048
+  --ffn-hidden-size: 11264
+  --num-attention-heads: 16
+  --kv-channels: 128
+  --hybrid-override-pattern: M-M-M-M*-M-M-M-M*-M-M-M-M-M*-M-M-M-M-M*-M-M-M-M-M-
+  --spec: megatron.core.models.mamba.mamba_layer_specs mamba_stack_spec
+  --normalization: RMSNorm
+  --swiglu: true
+  --attention-dropout: 0.0
+  --hidden-dropout: 0.0
+  --seq-length: 4096
+  --max-position-embeddings: 4096
+  --micro-batch-size: 1
+  --ckpt-format: torch_dist
+  --ckpt-fully-parallel-save: true
+  --ckpt-fully-parallel-load: true
+  --ckpt-assume-constant-structure: true
+  --dist-ckpt-strictness: log_unexpected
+  --bf16: true
+  --attention-backend: flash
+  --no-create-attention-mask-in-dataloader: true
+  --num-workers: 8
+  --use-checkpoint-args: true
+  --no-use-tokenizer-model-from-checkpoint-args: true
+  --no-load-optim: true
+  --deterministic-mode: true
+  --save-interval: 2000
+  --temperature: 1.0
+  --top_k: 1
+  --return-log-probs: true
+  --num-tokens-to-generate: 30
+  --max-tokens-to-oom: 3600000
+  --inference-max-seq-length: 4096
+  --output-path: ${TENSORBOARD_PATH}
+  --prompts: "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies."
+  --incoming-requests-per-step: 32 
+  --inference-repeat-n: 3
+METRICS:
+  - "generated_tokens"
+  - "logprobs"
diff --git a/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgx_h100.json
index 1a9705f8181..d9a60d1ae11 100644
--- a/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgx_h100.json
@@ -174,5 +174,5 @@
       -0.5394397377967834
     ]
   },
-  "throughput": [25.35687538450034, 25.35687538450034]
+  "throughput": [34.95064017365726, 34.95064017365726]
 }
diff --git a/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch/model_config.yaml
index 0e1f9110793..e97dc0b56a4 100644
--- a/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch/model_config.yaml
@@ -80,6 +80,7 @@ MODEL_ARGS:
   --prompts: "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies."
   --incoming-requests-per-sec: -1
   --inference-repeat-n: 8
+  --inference-dynamic-batching-buffer-size-gb: 20
 METRICS:
   - "generated_tokens"
   - "logprobs"
diff --git a/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_tp4_pp1_ep4_16B_logitsmatch/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_tp4_pp1_ep4_16B_logitsmatch/model_config.yaml
index 1b9eaaf1f65..6c119cc548b 100644
--- a/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_tp4_pp1_ep4_16B_logitsmatch/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_tp4_pp1_ep4_16B_logitsmatch/model_config.yaml
@@ -76,6 +76,7 @@ MODEL_ARGS:
   --prompts: "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies."
   --incoming-requests-per-sec: -1 # all requests arrive up front.
   --inference-repeat-n: 8
+  --inference-dynamic-batching-buffer-size-gb: 20
 METRICS:
   - "generated_tokens"
   - "logprobs"
diff --git a/tests/test_utils/python_scripts/auto_reminder_github.py b/tests/test_utils/python_scripts/auto_reminder_github.py
index df75ec0542c..7484244b717 100644
--- a/tests/test_utils/python_scripts/auto_reminder_github.py
+++ b/tests/test_utils/python_scripts/auto_reminder_github.py
@@ -58,27 +58,42 @@ def get_user_email(self, username: str):
 
         try:
             user = self.github.get_user(username)
+            public_email = None
 
             # 1. Try public profile email first
             if user.email and not user.email.endswith("@users.noreply.github.com"):
-                self.email_cache[username] = user.email
-                return user.email
+                if user.email.endswith("@nvidia.com"):
+                    self.email_cache[username] = user.email
+                    return user.email
+                else:
+                    public_email = user.email
 
             # 2. If no public email, check recent commits on the main repo
             try:
                 # Use get_commits(author=...) which is more direct than search_commits
                 for commit in self.repo.get_commits(author=user)[:10]:
                     email = commit.commit.author.email
-                    if email and not email.endswith("@users.noreply.github.com"):
+                    if (
+                        email
+                        and not email.endswith("@users.noreply.github.com")
+                        and email.endswith("@nvidia.com")
+                    ):
                         self.email_cache[username] = email
                         return email
+                    elif (
+                        email
+                        and not email.endswith("@users.noreply.github.com")
+                        and public_email is None
+                    ):
+                        public_email = email
             except Exception as e:
                 logger.debug(f"Could not check commits for {username}: {e}")
 
-            # 3. Fallback to public email (even if noreply) or a constructed noreply
-            email = user.email or f"{username}@users.noreply.github.com"
-            self.email_cache[username] = email
-            return email
+            if public_email is None:
+                public_email = f"{username}@users.noreply.github.com"
+
+            self.email_cache[username] = public_email
+            return public_email
 
         except Exception as e:
             logger.warning(f"Could not get user object for {username}: {e}")
diff --git a/tests/test_utils/recipes/gpt-dynamic-inference-with-coordinator.yaml b/tests/test_utils/recipes/gpt-dynamic-inference-with-coordinator.yaml
index c61128aaca2..6a3d582d3ae 100644
--- a/tests/test_utils/recipes/gpt-dynamic-inference-with-coordinator.yaml
+++ b/tests/test_utils/recipes/gpt-dynamic-inference-with-coordinator.yaml
@@ -39,7 +39,7 @@ spec:
     ARGUMENTS=(
         "CHECKPOINT_LOAD_PATH=/mnt/artifacts"
         "CHECKPOINT_SAVE_PATH=/tmp/checkpoints"
-        "DATA_PATH=null"
+        "DATA_PATH=/mnt/artifacts/"
         "DATA_CACHE_PATH=/workspace/data/cache"
         "TRAINING_SCRIPT_PATH=examples/inference/gpt/gpt_dynamic_inference_with_coordinator.py"
         "TRAINING_PARAMS_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml"
@@ -59,8 +59,23 @@ products:
       - environment: [dev]
         scope: [flaky]
         platforms: [dgx_h100]
+  - test_case: [gpt_dynamic_inference_tp1_pp8_dp1_583m_logitsmatch_zmq]
+    products:
+      - environment: [dev]
+        scope: [flaky]
+        platforms: [dgx_h100]
   - test_case: [gpt_dynamic_inference_tp1_pp1_dp8_583m_logitsmatch_zmq]
     products:
       - environment: [dev]
         scope: [mr, mr-github]
         platforms: [dgx_h100]
+  - test_case: [gpt_dynamic_inference_tp1_pp1_dp8_583m_throughputtest_zmq]
+    products:
+      - environment: [dev]
+        scope: [mr, mr-github]
+        platforms: [dgx_h100]
+  - test_case: [gpt_dynamic_inference_tp2_pp2_dp2_583m_logitsmatch_zmq]
+    products:
+      - environment: [dev]
+        scope: [flaky]
+
diff --git a/tests/test_utils/recipes/gpt.yaml b/tests/test_utils/recipes/gpt.yaml
index 0b3606fd702..34030e4923a 100644
--- a/tests/test_utils/recipes/gpt.yaml
+++ b/tests/test_utils/recipes/gpt.yaml
@@ -114,6 +114,11 @@ products:
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
+  - test_case: [gpt3_mcore_te_tp1_pp1_dist_optimizer_fim_dataset]
+    products:
+      - environment: [dev]
+        scope: [mr, mr-github]
+        platforms: [dgx_h100]
   - test_case: [gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer]
     products:
       - environment: [dev]
diff --git a/tests/test_utils/recipes/mamba-dynamic-inference.yaml b/tests/test_utils/recipes/mamba-dynamic-inference.yaml
new file mode 100644
index 00000000000..9ca1bab4402
--- /dev/null
+++ b/tests/test_utils/recipes/mamba-dynamic-inference.yaml
@@ -0,0 +1,61 @@
+type: basic
+format_version: 1
+maintainers: [mcore]
+loggers: [stdout]
+spec:
+  name: "{test_case}_{environment}_{platforms}"
+  model: hybrid
+  build: mcore-pyt-{environment}
+  nodes: 1
+  gpus: 1
+  n_repeat: 1
+  platforms: dgx_a100
+  script_setup: |
+    unset https_proxy
+    echo "machine gitlab-master.nvidia.com login okoenig password $RO_API_TOKEN" | tee -a /root/.netrc
+
+    # Checkout latest
+    cd /opt
+    rm -rf /opt/megatron-lm; mkdir megatron-lm; cd megatron-lm
+    git init
+    git remote add origin $MCORE_REPO
+    git fetch origin '+refs/merge-requests/*:refs/remotes/merge-requests/*'
+    git fetch origin $MCORE_MR_COMMIT
+    git checkout $MCORE_MR_COMMIT
+    git rev-parse HEAD
+    # Checkout backwards-ref
+    cd /opt
+    rm -rf /opt/megatron-lm-legacy; mkdir megatron-lm-legacy; cd megatron-lm-legacy
+    git init
+    git remote add origin $MCORE_REPO
+    git fetch origin $MCORE_BACKWARDS_COMMIT
+    git checkout $MCORE_BACKWARDS_COMMIT
+    git rev-parse HEAD
+    rm -rf megatron; cp -a /opt/megatron-lm/megatron ./
+  script: |-
+    ls
+    cd /opt/megatron-lm
+
+    ARGUMENTS=(
+        "CHECKPOINT_LOAD_PATH=/mnt/artifacts"
+        "CHECKPOINT_SAVE_PATH=/tmp/checkpoints"
+        "DATA_PATH=null"
+        "DATA_CACHE_PATH=/workspace/data/cache"
+        "TRAINING_SCRIPT_PATH=examples/inference/gpt/gpt_dynamic_inference.py"
+        "TRAINING_PARAMS_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml"
+        "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_{platforms}.json"
+        "OUTPUT_PATH={assets_dir}"
+        "TENSORBOARD_PATH={assets_dir}/generations_{environment}_{platforms}.json"
+        "N_REPEAT={n_repeat}"
+        "ENABLE_LIGHTWEIGHT_MODE=${{ENABLE_LIGHTWEIGHT_MODE}}"
+        "RECORD_CHECKPOINTS=${{RECORD_CHECKPOINTS}}"
+    )
+
+    bash ./tests/functional_tests/shell_test_utils/run_ci_test.sh ${{ARGUMENTS[@]}}
+
+products:
+  - test_case: [hybrid_dynamic_inference_tp1_pp1_dp8_583m]
+    products:
+      - environment: [dev]
+        scope: [mr, mr-github]
+        platforms: [dgx_h100]
diff --git a/tests/unit_tests/data/test_fim_dataset.py b/tests/unit_tests/data/test_fim_dataset.py
new file mode 100644
index 00000000000..7022a4b5fa9
--- /dev/null
+++ b/tests/unit_tests/data/test_fim_dataset.py
@@ -0,0 +1,87 @@
+# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+
+import pytest
+import torch
+
+from megatron.core.datasets.blended_megatron_dataset_builder import BlendedMegatronDatasetBuilder
+from megatron.core.datasets.utils import compile_helpers, get_blend_from_list
+from megatron.core.tokenizers import MegatronTokenizer
+from megatron.training.datasets.fim_dataset import GPTFIMDataset, GPTFIMDatasetConfig
+from tests.unit_tests.test_utilities import Utils
+
+
+@pytest.mark.parametrize("spm_rate", [0.0, 1.0])
+@pytest.mark.parametrize("split_sample", [None, "python"])
+def test_fim_gpt_dataset(spm_rate, split_sample):
+    if torch.distributed.is_available():
+        Utils.initialize_distributed()
+        if torch.distributed.get_rank() == 0:
+            compile_helpers()
+        torch.distributed.barrier()
+    else:
+        compile_helpers()
+
+    tokenizer = MegatronTokenizer.from_pretrained(
+        tokenizer_path="/opt/data/tokenizers/huggingface",
+        metadata_path={"library": "huggingface"},
+        additional_special_tokens=["<prefix>", "<middle>", "<suffix>", "<pad>", "<eod>"],
+        include_special_tokens=True,
+    )
+    blend = get_blend_from_list(["/opt/data/datasets/fim/fim_text_document"])
+    extra_tokens = {
+        "prefix": "<prefix>",
+        "middle": "<middle>",
+        "suffix": "<suffix>",
+        "pad": "<pad>",
+        "eod": "<eod>",
+    }
+    seq_length = 32
+    rate = 1.0
+    fragment_rate = 1.0
+    config = GPTFIMDatasetConfig(
+        blend=blend,
+        random_seed=1234,
+        sequence_length=seq_length,
+        split="990,9,1",
+        tokenizer=tokenizer,
+        reset_position_ids=True,
+        reset_attention_mask=True,
+        eod_mask_loss=True,
+        fim_extra_tokens=extra_tokens,
+        fim_rate=rate,
+        fim_spm_rate=spm_rate,
+        fim_fragment_rate=fragment_rate,
+        fim_split_sample=split_sample,
+    )
+
+    datasets = BlendedMegatronDatasetBuilder(
+        GPTFIMDataset, [10, 10, 10], lambda: True, config
+    ).build()
+
+    prefix_id = tokenizer.tokenize("<prefix>")[1]
+    suffix_id = tokenizer.tokenize("<suffix>")[1]
+    middle_id = tokenizer.tokenize("<middle>")[1]
+
+    dataset = datasets[0]
+    assert dataset.fim_rate == rate
+    assert dataset.fim_spm_rate == spm_rate
+    assert dataset.fragment_fim_rate == fragment_rate
+
+    tokens = dataset[0]["tokens"].tolist()
+    if split_sample:
+        split_sample_id = tokenizer.tokenize(split_sample)[1]
+        split_sample_index = tokens.index(split_sample_id)
+        assert prefix_id == tokens[split_sample_index + 1]
+    if spm_rate == 0.0:
+        assert prefix_id == tokens[0]
+        assert suffix_id in tokens
+        assert middle_id in tokens
+        assert tokens.index(suffix_id) < tokens.index(middle_id)
+    else:
+        assert prefix_id == tokens[0]
+        assert suffix_id == tokens[1]
+        assert middle_id in tokens
+
+
+if __name__ == "__main__":
+    test_fim_gpt_dataset()
diff --git a/tests/unit_tests/inference/contexts/test_dynamic_context.py b/tests/unit_tests/inference/contexts/test_dynamic_context.py
index 0674cdfcabd..1baf9034c9d 100644
--- a/tests/unit_tests/inference/contexts/test_dynamic_context.py
+++ b/tests/unit_tests/inference/contexts/test_dynamic_context.py
@@ -5,6 +5,9 @@
 import pytest
 import torch
 
+from megatron.core.inference.contexts.attention_context.mamba_metadata import (
+    MambaInferenceStateConfig,
+)
 from megatron.core.inference.contexts.dynamic_context import (
     DynamicInferenceContext,
     RequestOverflowError,
@@ -28,6 +31,8 @@ class TestDynamicContext:
 
     def _setup_model_parallel_group(self, tensor_parallel_size, pipeline_parallel_size):
 
+        self.pp_size = pipeline_parallel_size
+
         Utils.initialize_model_parallel(
             tensor_model_parallel_size=tensor_parallel_size,
             pipeline_model_parallel_size=pipeline_parallel_size,
@@ -43,38 +48,39 @@ def _get_dynamic_context(
         max_sequence_length,
         buffer_size_gb,
         block_size_tokens,
-        buffer_guaranteed_fraction,
-        buffer_overflow_factor,
-        max_requests_override,
-        max_tokens_override,
+        max_tokens,
         is_hybrid_model=False,
         layer_type_list=None,
         rounder=64,
     ):
         set_rounder(rounder)
 
-        if is_hybrid_model and layer_type_list is None:
-            layer_type_list = [Symbols.MAMBA, Symbols.MLP, Symbols.ATTENTION, Symbols.MLP]
+        if is_hybrid_model:
+            if layer_type_list is None:
+                layer_type_list = [Symbols.MAMBA, Symbols.MLP, Symbols.ATTENTION, Symbols.MLP]
+            mamba_conv_states_shape = (544, 4)
+            mamba_ssm_states_shape = (8, 64, 16)
+            mamba_inference_state_config = MambaInferenceStateConfig(
+                layer_type_list, mamba_conv_states_shape, mamba_ssm_states_shape
+            )
+        else:
+            mamba_inference_state_config = None
 
         dynamic_context = DynamicInferenceContext(
             params_dtype=params_dtype,
-            num_layers=num_layers,
+            num_layers=num_layers // self.pp_size,
             kv_channels=kv_channels,
             num_attention_heads=num_attention_heads,
             max_sequence_length=max_sequence_length,
             num_cuda_graphs=None,
             use_cuda_graphs_for_non_decode_steps=not is_hybrid_model,
             buffer_size_gb=buffer_size_gb,
-            buffer_guaranteed_fraction=buffer_guaranteed_fraction,
             block_size_tokens=block_size_tokens,
-            buffer_overflow_factor=buffer_overflow_factor,
-            max_requests_override=max_requests_override,
-            max_tokens_override=max_tokens_override,
-            layer_type_list=layer_type_list,
-            mamba_conv_states_shape=(544, 4),
-            mamba_ssm_states_shape=(8, 64, 16),
+            max_tokens=max_tokens,
+            mamba_inference_state_config=mamba_inference_state_config,
             use_flashinfer_fused_rope=None,  # default to using flash-infer if available
             # this is for compatibility with the LTS environment
+            unified_memory_level=0,  # unit tests currently broken with UVM
         )
         return dynamic_context
 
@@ -93,28 +99,25 @@ def test_initialize_dynamic_context(self, is_hybrid_model: bool):
             num_attention_heads=2,
             max_sequence_length=512,
             buffer_size_gb=0.03,
-            buffer_guaranteed_fraction=0.1,
             block_size_tokens=128,
-            max_requests_override=None,
-            max_tokens_override=None,
-            buffer_overflow_factor=None,
+            max_tokens=None,
             is_hybrid_model=is_hybrid_model,
         )
 
         if not is_hybrid_model:
-            assert dynamic_context.gtd_block_count == 48
-            assert dynamic_context.gtd_request_count == 12
-            assert dynamic_context.block_allocator.block_count_total == 491
-            assert dynamic_context.max_requests == 128
-            assert dynamic_context.max_tokens == 62848
+            assert dynamic_context.block_allocator.total_count == 491
+            assert dynamic_context.block_allocator.active_count == 245
+            assert dynamic_context.max_total_requests == 490
+            assert dynamic_context.max_active_requests == 245
+            assert dynamic_context.max_tokens == 16384
             assert dynamic_context.num_mamba_layers == 0
             assert dynamic_context.mamba_metadata is None
         else:
-            assert dynamic_context.gtd_block_count == 112
-            assert dynamic_context.gtd_request_count == 28
-            assert dynamic_context.block_allocator.block_count_total == 1156
-            assert dynamic_context.max_requests == 320
-            assert dynamic_context.max_tokens == 154176
+            assert dynamic_context.block_allocator.total_count == 555
+            assert dynamic_context.block_allocator.active_count == 277
+            assert dynamic_context.max_total_requests == 554
+            assert dynamic_context.max_active_requests == 277
+            assert dynamic_context.max_tokens == 16384
             assert dynamic_context.num_mamba_layers == 1
             assert dynamic_context.mamba_metadata is not None
 
@@ -131,11 +134,8 @@ def test_is_static_batching(self):
             num_attention_heads=8,
             max_sequence_length=512,
             buffer_size_gb=1.0,
-            buffer_guaranteed_fraction=0.1,
             block_size_tokens=128,
-            max_requests_override=None,
-            max_tokens_override=None,
-            buffer_overflow_factor=None,
+            max_tokens=None,
         )
         assert not dynamic_context.is_static_batching()
 
@@ -150,26 +150,18 @@ def test_is_memory_available(self, is_hybrid_model):
             num_attention_heads=8,
             max_sequence_length=512,
             buffer_size_gb=1.0,
-            buffer_guaranteed_fraction=0.1,
             block_size_tokens=128,
-            max_requests_override=None,
-            max_tokens_override=None,
-            buffer_overflow_factor=None,
+            max_tokens=None,
             is_hybrid_model=is_hybrid_model,
         )
-        dynamic_context.block_allocator.block_count_avail = 10
+        dynamic_context.block_allocator.active_count = 10
         assert dynamic_context.block_allocator.is_memory_available(10)
         assert not dynamic_context.block_allocator.is_memory_available(11)
 
         assert dynamic_context.block_allocator.is_memory_available(1)
-        dynamic_context.block_allocator.block_count_avail = 0
+        dynamic_context.block_allocator.active_count = 0
         assert not dynamic_context.block_allocator.is_memory_available(1)
 
-        dynamic_context.block_allocator.block_count_avail = 10
-        dynamic_context.gtd_block_count = 5
-        assert dynamic_context.block_allocator.is_memory_available(6)
-        assert not dynamic_context.block_allocator.is_memory_available(6, safe=True)
-
     @pytest.mark.internal
     @pytest.mark.parametrize("is_hybrid_model", [False, True])
     def test_request_overflow(self, is_hybrid_model: bool):
@@ -182,16 +174,14 @@ def test_request_overflow(self, is_hybrid_model: bool):
             num_attention_heads=8,
             max_sequence_length=128,
             buffer_size_gb=0.01,
-            buffer_guaranteed_fraction=0.1,
             block_size_tokens=32,
-            max_requests_override=None,
-            max_tokens_override=None,
-            buffer_overflow_factor=None,
+            max_tokens=None,
             rounder=1,
             is_hybrid_model=is_hybrid_model,
         )
+        dynamic_context.max_active_requests //= 2
         with pytest.raises(RequestOverflowError):
-            for i in range(dynamic_context.max_requests + 1):
+            for i in range(dynamic_context.max_active_requests + 1):
                 dynamic_context.add_request(
                     DynamicInferenceRequest(
                         request_id=i,
@@ -214,11 +204,8 @@ def test_token_overflow_error(self, is_hybrid_model: bool):
             num_attention_heads=8,
             max_sequence_length=512,
             buffer_size_gb=0.1,
-            buffer_guaranteed_fraction=0.1,
             block_size_tokens=128,
-            buffer_overflow_factor=1.0,
-            max_requests_override=2,
-            max_tokens_override=20,  # Setting a very low token limit
+            max_tokens=200,  # setting low, but >= context.max_active_requests.
             rounder=1,
             is_hybrid_model=is_hybrid_model,
         )
@@ -227,7 +214,7 @@ def test_token_overflow_error(self, is_hybrid_model: bool):
             dynamic_context.add_request(
                 DynamicInferenceRequest(
                     request_id=1,
-                    prompt_tokens=torch.arange(0, 25, device='cuda'),
+                    prompt_tokens=torch.arange(0, 225, device='cuda'),
                     sampling_params=SamplingParams(
                         num_tokens_to_generate=dynamic_context.max_tokens - 25
                     ),
@@ -246,11 +233,8 @@ def test_reset(self, is_hybrid_model: bool):
             num_attention_heads=8,
             max_sequence_length=128,
             buffer_size_gb=1.0,
-            buffer_guaranteed_fraction=0.1,
             block_size_tokens=128,
-            max_requests_override=None,
-            max_tokens_override=None,
-            buffer_overflow_factor=None,
+            max_tokens=None,
             is_hybrid_model=is_hybrid_model,
         )
 
@@ -273,7 +257,6 @@ def test_reset(self, is_hybrid_model: bool):
         dynamic_context.token_to_position_in_request.fill_(1)
         dynamic_context.token_to_block_idx.fill_(1)
         dynamic_context.token_to_local_position_within_kv_block.fill_(1)
-        dynamic_context.block_allocator.block_count_avail = 5
         dynamic_context.memory_buffer.fill_(1)
         dynamic_context.request_to_kv_block_ids.fill_(1)
         if is_hybrid_model:
@@ -303,8 +286,8 @@ def test_reset(self, is_hybrid_model: bool):
         assert torch.all(dynamic_context.token_to_block_idx == -1)
         assert torch.all(dynamic_context.token_to_local_position_within_kv_block == 0)
         assert (
-            dynamic_context.block_allocator.block_count_avail
-            == dynamic_context.block_allocator.block_count_total - 1
+            dynamic_context.block_allocator.active_count
+            == dynamic_context.block_allocator.total_count // 2
         )
         assert torch.all(dynamic_context.request_to_kv_block_ids == -1)
         if is_hybrid_model:
@@ -323,16 +306,13 @@ def test_allocate_and_release_memory_blocks(self, is_hybrid_model):
             num_attention_heads=2,
             max_sequence_length=512,
             buffer_size_gb=0.03,
-            buffer_guaranteed_fraction=0.1,
             block_size_tokens=128,
-            max_requests_override=None,
-            max_tokens_override=None,
-            buffer_overflow_factor=None,
+            max_tokens=None,
             is_hybrid_model=is_hybrid_model,
         )
 
         if is_hybrid_model:
-            expected_memory_blocks = [1151, 1152, 1153, 1154]
+            expected_memory_blocks = [550, 551, 552, 553]
         else:
             expected_memory_blocks = [486, 487, 488, 489]
         expected_block_count_avail = expected_memory_blocks[0]
@@ -345,20 +325,20 @@ def test_allocate_and_release_memory_blocks(self, is_hybrid_model):
             .tolist()
             == expected_memory_blocks
         )
-        assert dynamic_context.block_allocator.block_count_avail == expected_block_count_avail
+        assert dynamic_context.block_allocator.total_avail == expected_block_count_avail
         dynamic_context.block_allocator.release_memory_blocks(
             torch.tensor(expected_memory_blocks[-2:], device='cuda')
         )
-        assert dynamic_context.block_allocator.block_count_avail == expected_block_count_avail + 2
+        assert dynamic_context.block_allocator.total_avail == expected_block_count_avail + 2
         assert (
             dynamic_context.block_allocator.allocate_memory_blocks(1).item()
             == expected_memory_blocks[-1]
         )
-        assert dynamic_context.block_allocator.block_count_avail == expected_block_count_avail + 1
+        assert dynamic_context.block_allocator.total_avail == expected_block_count_avail + 1
         # Should return None since we allocate more blocks than what we have.
         assert (
             dynamic_context.block_allocator.allocate_memory_blocks(
-                dynamic_context.block_allocator.block_count_avail + 100
+                dynamic_context.block_allocator.total_avail + 100
             )
             == None
         )
@@ -375,11 +355,8 @@ def test_add_request(self, is_hybrid_model: bool):
             num_attention_heads=2,
             max_sequence_length=512,
             buffer_size_gb=0.03,
-            buffer_guaranteed_fraction=0.1,
             block_size_tokens=128,
-            max_requests_override=None,
-            max_tokens_override=None,
-            buffer_overflow_factor=None,
+            max_tokens=None,
             is_hybrid_model=is_hybrid_model,
         )
         assert dynamic_context.block_size_tokens == 128
@@ -401,7 +378,7 @@ def test_add_request(self, is_hybrid_model: bool):
         assert dynamic_context.request_kv_length_offsets[0] == 0
         assert dynamic_context.request_kv_block_counts[0] == 2
         assert dynamic_context.request_last_kv_block_id[0].item() == (
-            1154 if is_hybrid_model else 489
+            553 if is_hybrid_model else 489
         )
         assert dynamic_context.request_last_kv_block_offset[0].item() == 15
         assert torch.all(
@@ -451,11 +428,8 @@ def test_update_request(self, is_hybrid_model: bool):
             num_attention_heads=2,
             max_sequence_length=512,
             buffer_size_gb=0.03,
-            buffer_guaranteed_fraction=0.1,
             block_size_tokens=128,
-            max_requests_override=None,
-            max_tokens_override=None,
-            buffer_overflow_factor=None,
+            max_tokens=None,
             is_hybrid_model=is_hybrid_model,
         )
 
@@ -464,7 +438,7 @@ def test_update_request(self, is_hybrid_model: bool):
         dynamic_context.paused_request_count = 0
         dynamic_context.total_request_count = 3
         dynamic_context.request_kv_block_counts[0:3] = 1
-        new_block_ids = dynamic_context.block_allocator.allocate_memory_blocks(3, safe=True)
+        new_block_ids = dynamic_context.block_allocator.allocate_memory_blocks(3)
         dynamic_context.request_to_kv_block_ids[0:3, 0] = new_block_ids
 
         if is_hybrid_model:
@@ -498,11 +472,8 @@ def test_update_request(self, is_hybrid_model: bool):
             num_attention_heads=2,
             max_sequence_length=512,
             buffer_size_gb=0.03,
-            buffer_guaranteed_fraction=0.1,
             block_size_tokens=128,
-            max_requests_override=None,
-            max_tokens_override=None,
-            buffer_overflow_factor=None,
+            max_tokens=None,
             is_hybrid_model=is_hybrid_model,
         )
 
@@ -520,18 +491,16 @@ def test_update_request(self, is_hybrid_model: bool):
             )
 
         total_request_count = 10
-        dynamic_context.block_allocator.block_count_avail -= 11  # We align 11 blocks to the 10 requests we have. 3rd request alone we setup like it requires 2 blocks
+        dynamic_context.block_allocator.total_avail -= 11  # We align 11 blocks to the 10 requests we have. 3rd request alone we setup like it requires 2 blocks
         dynamic_context.total_request_count = total_request_count
 
         dynamic_context.request_to_kv_block_ids[0:total_request_count, 0] = torch.arange(
-            dynamic_context.block_allocator.block_count_avail,
-            dynamic_context.block_allocator.block_count_avail + 10,
+            dynamic_context.block_allocator.total_avail,
+            dynamic_context.block_allocator.total_avail + 10,
         )
         dynamic_context.request_to_kv_block_ids[3][
             1
-        ] = (
-            dynamic_context.block_allocator.block_count_avail
-        )  # Assign one extra block  to request 3.
+        ] = dynamic_context.block_allocator.total_avail  # Assign one extra block  to request 3.
         dynamic_context.request_kv_length_offsets[0:total_request_count] = 10
         # For 0, 1, 5, 6, the total number of tokens in last block is block size -1, so that they will all need extra blocks
         dynamic_context.request_kv_length_offsets[0:2] = dynamic_context.block_size_tokens - 1
@@ -617,13 +586,13 @@ def test_update_request(self, is_hybrid_model: bool):
                 dynamic_context.request_to_kv_block_ids[0:10].cpu()
                 == torch.tensor(
                     [
-                        [1144, 1147, -1, -1],
-                        [1145, 1144, -1, -1],
-                        [1149, 1151, -1, -1],
-                        [1150, 1152, -1, -1],
-                        [1148, -1, -1, -1],
-                        [1146, -1, -1, -1],
-                        [1153, -1, -1, -1],
+                        [543, 546, -1, -1],
+                        [544, 543, -1, -1],
+                        [548, 550, -1, -1],
+                        [549, 551, -1, -1],
+                        [547, -1, -1, -1],
+                        [545, -1, -1, -1],
+                        [552, -1, -1, -1],
                         [-1, -1, -1, -1],
                         [-1, -1, -1, -1],
                         [-1, -1, -1, -1],
@@ -662,22 +631,19 @@ def test_release_memory_blocks_for_finished_requests(self, is_hybrid_model):
             num_attention_heads=2,
             max_sequence_length=512,
             buffer_size_gb=0.03,
-            buffer_guaranteed_fraction=0.1,
             block_size_tokens=128,
-            max_requests_override=None,
-            max_tokens_override=None,
-            buffer_overflow_factor=None,
+            max_tokens=None,
             is_hybrid_model=is_hybrid_model,
         )
 
         # Set up the initial state with 5 requests
         # Allocate 5 blocks for 5 requests
-        initial_blocks = dynamic_context.block_allocator.allocate_memory_blocks(5, safe=True)
+        initial_blocks = dynamic_context.block_allocator.allocate_memory_blocks(5)
         dynamic_context.total_request_count = 5
         dynamic_context.paused_request_count = 0
 
         # Record the available blocks before releasing memory
-        initial_available_blocks = dynamic_context.block_allocator.block_count_avail
+        initial_available_blocks = dynamic_context.block_allocator.total_avail
 
         # Assign blocks to the requests (one block per request)
         for i in range(5):
@@ -708,7 +674,7 @@ def test_release_memory_blocks_for_finished_requests(self, is_hybrid_model):
         assert dynamic_context.active_token_count == 2
 
         # Verify that 3 blocks were released by checking the available blocks
-        assert dynamic_context.block_allocator.block_count_avail == initial_available_blocks + 3
+        assert dynamic_context.block_allocator.total_avail == initial_available_blocks + 3
 
         if is_hybrid_model:
             # Request at position 3 now moves into finished request position 0
@@ -737,22 +703,19 @@ def test_finished_requests_with_multiple_blocks(self, is_hybrid_model):
             num_attention_heads=2,
             max_sequence_length=512,
             buffer_size_gb=0.03,
-            buffer_guaranteed_fraction=0.1,
             block_size_tokens=128,
-            max_requests_override=None,
-            max_tokens_override=None,
-            buffer_overflow_factor=None,
+            max_tokens=None,
             is_hybrid_model=is_hybrid_model,
         )
 
         # Set up the initial state with 3 requests, where some use multiple blocks
         # Allocate 6 blocks in total for the requests
-        initial_blocks = dynamic_context.block_allocator.allocate_memory_blocks(6, safe=True)
+        initial_blocks = dynamic_context.block_allocator.allocate_memory_blocks(6)
         dynamic_context.total_request_count = 3
         dynamic_context.paused_request_count = 0
 
         # Record the available blocks before releasing memory
-        initial_available_blocks = dynamic_context.block_allocator.block_count_avail
+        initial_available_blocks = dynamic_context.block_allocator.total_avail
 
         # Assign blocks to the requests:
         # - Request 0: 1 block
@@ -792,7 +755,7 @@ def test_finished_requests_with_multiple_blocks(self, is_hybrid_model):
         assert dynamic_context.active_token_count == 0
 
         # Verify that all 6 blocks were released by checking the available blocks
-        assert dynamic_context.block_allocator.block_count_avail == initial_available_blocks + 6
+        assert dynamic_context.block_allocator.total_avail == initial_available_blocks + 6
 
         if is_hybrid_model:
             # All mamba states should be zeroed out
@@ -813,11 +776,8 @@ def test_mamba_states_cache(self, is_hybrid_model: bool):
                 num_attention_heads=2,
                 max_sequence_length=512,
                 buffer_size_gb=0.03,
-                buffer_guaranteed_fraction=0.1,
                 block_size_tokens=128,
-                max_requests_override=None,
-                max_tokens_override=None,
-                buffer_overflow_factor=None,
+                max_tokens=None,
                 is_hybrid_model=False,
             )
             with pytest.raises(AssertionError) as error:
@@ -831,11 +791,8 @@ def test_mamba_states_cache(self, is_hybrid_model: bool):
             num_attention_heads=2,
             max_sequence_length=512,
             buffer_size_gb=0.03,
-            buffer_guaranteed_fraction=0.1,
             block_size_tokens=128,
-            max_requests_override=None,
-            max_tokens_override=None,
-            buffer_overflow_factor=None,
+            max_tokens=None,
             is_hybrid_model=is_hybrid_model,
             layer_type_list=[Symbols.MAMBA, Symbols.ATTENTION, Symbols.MAMBA, Symbols.ATTENTION],
         )
@@ -890,11 +847,8 @@ def test_calculate_and_store_log_probs(self):
             num_attention_heads=2,
             max_sequence_length=512,
             buffer_size_gb=0.03,
-            buffer_guaranteed_fraction=0.1,
             block_size_tokens=128,
-            max_requests_override=None,
-            max_tokens_override=None,
-            buffer_overflow_factor=None,
+            max_tokens=None,
         )
 
         # Add a few requests to the context
@@ -1097,56 +1051,3 @@ def test_calculate_and_store_log_probs(self):
                 )
 
                 current_global_token_offset += expected_len
-
-    @pytest.mark.internal
-    def test_unified_memory(self):
-
-        from megatron.core.inference.unified_memory import (
-            UnifiedMemoryUnsupportedError,
-            create_unified_mempool,
-        )
-
-        # Check UVM support.
-        try:
-            create_unified_mempool()
-        except UnifiedMemoryUnsupportedError:
-            pytest.skip("Unified memory not available due to bad environment.")
-
-        # Setup.
-        self._setup_model_parallel_group(1, 1)
-
-        # Compute number of contexts needed to fill GPU memory.
-        gpu_size_gb = (
-            torch.cuda.get_device_properties(torch.cuda.current_device()).total_memory / 1024**3
-        )
-        buffer_size_gb = 20
-        num_contexts = math.ceil(gpu_size_gb / buffer_size_gb) + 1
-
-        # Allocate enough contexts to fill GPU memory.
-        def init_contexts(*, unified_memory_level):
-            contexts = []
-            for i in range(num_contexts):
-                contexts.append(
-                    DynamicInferenceContext(
-                        params_dtype=torch.float32,
-                        num_layers=4,
-                        kv_channels=8,
-                        num_attention_heads=2,
-                        max_sequence_length=512,
-                        buffer_size_gb=buffer_size_gb,
-                        buffer_overflow_factor=1,
-                        buffer_guaranteed_fraction=0,
-                        unified_memory_level=unified_memory_level,
-                    )
-                )
-
-        # Pure GPU memory test should OOM.
-        try:
-            init_contexts(unified_memory_level=0)
-        except torch.OutOfMemoryError:
-            pass
-        else:
-            raise Exception("expected OOM.")
-
-        # Unified memory test should succeed.
-        init_contexts(unified_memory_level=1)
diff --git a/tests/unit_tests/inference/engines/test_dynamic_engine.py b/tests/unit_tests/inference/engines/test_dynamic_engine.py
index 0ac4b296746..174bf89350b 100644
--- a/tests/unit_tests/inference/engines/test_dynamic_engine.py
+++ b/tests/unit_tests/inference/engines/test_dynamic_engine.py
@@ -1,9 +1,10 @@
 # Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 import asyncio
+import math
 import random
 import types
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from typing import Dict, List, Optional, Tuple
 
 import pytest
@@ -12,6 +13,9 @@
 from transformer_engine.pytorch.fp8 import check_fp8_support
 
 from megatron.core import parallel_state
+from megatron.core.inference.contexts.attention_context.mamba_metadata import (
+    MambaInferenceStateConfig,
+)
 from megatron.core.inference.contexts.dynamic_context import (
     ActiveRequestCountOverflowError,
     BlockOverflowError,
@@ -34,6 +38,7 @@
 )
 from megatron.core.models.gpt.gpt_layer_specs import (
     get_gpt_layer_local_spec,
+    get_gpt_layer_with_inference_spec,
     get_gpt_layer_with_transformer_engine_spec,
 )
 from megatron.core.models.gpt.gpt_model import GPTModel
@@ -44,7 +49,7 @@
 from megatron.core.transformer.transformer_config import TransformerConfig
 from megatron.core.utils import (
     check_mamba_sequence_packing_support,
-    get_attr_wrapped_model,
+    get_mamba_inference_state_config_from_model,
     is_fa_min_version,
     is_te_min_version,
 )
@@ -86,10 +91,7 @@ class DynamicEngineTestConfig:
 
     context_buffer_size_gb: float = 0.1  # enough room for all tokens.
     context_block_size_tokens: int = 256
-    context_buffer_guaranteed_fraction: float = 0.01
-    context_buffer_overflow_factor: Optional[float] = None
-    context_max_requests_override: Optional[int] = None
-    context_max_tokens_override: Optional[int] = None
+    context_max_tokens: Optional[int] = None
     tensor_model_parallel_size: int = 1
     pipeline_model_parallel_size: int = 1
     expert_model_parallel_size: int = 1
@@ -105,12 +107,14 @@ class DynamicEngineTestConfig:
     skip_prompt_log_probs: bool = False
     cuda_graph_scope: List[str] = None
     force_build_cuda_graphs: bool = False
+    transformer_impl: str = "local"
     # If False, do not build cuda graphs in the tests, even if
     # num_cuda_graphs is set.
     # For tests concerning cuda-graph warmups, we set this to False
     # to avoid the overhead of building the graphs, which is not
     # relevant to the test. The tests only check if the required
     # context attributes are set correctly.
+    suspend_resume_interval: Optional[int] = None
 
     fp8: bool = False
 
@@ -125,17 +129,6 @@ def __post_init__(self):
             assert self.num_tokens_total is not None
             self.max_sequence_length = self.num_tokens_total
 
-        # Update overrides if not using overflow factor.
-        if self.context_buffer_overflow_factor is None:
-
-            # Enough room for all requests.
-            if self.context_max_requests_override is None:
-                self.context_max_requests_override = self.num_requests
-
-            # Enough room for all tokens.
-            if self.context_max_tokens_override is None:
-                self.context_max_tokens_override = self.num_requests * self.max_sequence_length
-
         if self.cuda_graph_scope is None:
             self.cuda_graph_scope = ["full_iteration"]
 
@@ -147,6 +140,9 @@ class DynamicEngineTestEnv:
     config: DynamicEngineTestConfig
     requests: List[DynamicInferenceRequest]
     engine: DynamicInferenceEngine
+    mem_usage: dict = field(
+        default_factory=lambda: {"start": None, "end": None, "suspend_resume": {}}
+    )
 
 
 class TestDynamicInferenceEngine:
@@ -215,34 +211,29 @@ def _build_inference_context(
         test_config: DynamicEngineTestConfig,
         transformer_config: TransformerConfig,
         requests: List[DynamicInferenceRequest],
-        layer_type_list: Optional[List[str]],
-        mamba_conv_states_shape: Optional[Tuple[int]] = None,
-        mamba_ssm_states_shape: Optional[Tuple[int]] = None,
+        mamba_inference_state_config: Optional[MambaInferenceStateConfig] = None,
     ):
         """The inference context manages the KV cache and other inference state."""
 
         # Inference context.
         context = DynamicInferenceContext(
             params_dtype=transformer_config.params_dtype,
-            num_layers=transformer_config.num_layers,
+            num_layers=transformer_config.num_layers
+            // transformer_config.pipeline_model_parallel_size,
             kv_channels=transformer_config.kv_channels,
             num_attention_heads=transformer_config.num_query_groups,
             max_sequence_length=test_config.max_sequence_length,
             num_cuda_graphs=test_config.num_cuda_graphs,
             use_cuda_graphs_for_non_decode_steps=not test_config.model_provider == "mamba",
             buffer_size_gb=test_config.context_buffer_size_gb,
-            buffer_guaranteed_fraction=test_config.context_buffer_guaranteed_fraction,
             block_size_tokens=test_config.context_block_size_tokens,
-            buffer_overflow_factor=test_config.context_buffer_overflow_factor,
-            max_requests_override=test_config.context_max_requests_override,
-            max_tokens_override=test_config.context_max_tokens_override,
+            max_tokens=test_config.context_max_tokens,
             tensor_model_parallel_size=transformer_config.tensor_model_parallel_size,
-            layer_type_list=layer_type_list,
-            mamba_conv_states_shape=mamba_conv_states_shape,
-            mamba_ssm_states_shape=mamba_ssm_states_shape,
+            mamba_inference_state_config=mamba_inference_state_config,
             materialize_only_last_token_logits=test_config.materialize_only_last_token_logits,
             use_flashinfer_fused_rope=None,  # default to using flash-infer if available
             # this is for compatibility with the LTS environment
+            unified_memory_level=0,  # unit tests currently broken with UVM
         )
 
         return context
@@ -295,16 +286,26 @@ def _build_test_env(cls, test_config):
                 ),
                 sequence_parallel=test_config.sequence_parallel,
                 pipeline_dtype=torch.bfloat16,
-                add_bias_linear=test_config.expert_model_parallel_size == 1,
+                add_bias_linear=test_config.expert_model_parallel_size == 1
+                and not (test_config.transformer_impl == "inference_optimized"),
                 fp8="hybrid" if test_config.fp8 else None,
                 fp8_recipe="tensorwise" if test_config.fp8 else None,
                 inference_sampling_seed=test_config.random_seed,
                 cuda_graph_scope=test_config.cuda_graph_scope,
+                transformer_impl=test_config.transformer_impl,
+                normalization=(
+                    "RMSNorm"
+                    if test_config.transformer_impl == "inference_optimized"
+                    else "LayerNorm"
+                ),
+                # inference optimized currently only supports RMS Norm
             )
-            if test_config.fp8:
+            if test_config.fp8 or test_config.transformer_impl == "transformer_engine":
                 layer_spec = get_gpt_layer_with_transformer_engine_spec()
-            else:
+            elif test_config.transformer_impl == "local":
                 layer_spec = get_gpt_layer_local_spec()
+            elif test_config.transformer_impl == "inference_optimized":
+                layer_spec = get_gpt_layer_with_inference_spec()
 
             # GPT model.
             model = GPTModel(
@@ -317,10 +318,13 @@ def _build_test_env(cls, test_config):
                 post_process=parallel_state.is_pipeline_last_stage(),
             ).cuda()
         elif test_config.model_provider == "mamba":
+            pp_size = test_config.pipeline_model_parallel_size
             # Transformer config.
             transformer_config = TransformerConfig(
                 params_dtype=torch.bfloat16,
-                num_layers=3,  # 1 Mamba layer, 1 attention layer, 1 MLP layer
+                num_layers=(
+                    3 if pp_size == 1 else 6
+                ),  # 1 Mamba layer, 1 attention layer, 1 MLP layer
                 hidden_size=256,  # The Mamba layer places several constraints on this
                 mamba_num_heads=16,
                 num_attention_heads=16,
@@ -333,7 +337,7 @@ def _build_test_env(cls, test_config):
                 ),
                 inference_rng_tracker=True,
                 tensor_model_parallel_size=test_config.tensor_model_parallel_size,
-                pipeline_model_parallel_size=test_config.pipeline_model_parallel_size,
+                pipeline_model_parallel_size=pp_size,
                 expert_model_parallel_size=test_config.expert_model_parallel_size,
                 num_moe_experts=(
                     None
@@ -346,6 +350,7 @@ def _build_test_env(cls, test_config):
                 fp8="hybrid" if test_config.fp8 else None,
                 fp8_recipe="tensorwise" if test_config.fp8 else None,
                 cuda_graph_scope=test_config.cuda_graph_scope,
+                is_hybrid_model=True,  # Needs to be set for correct out_proj init
             )
 
             # Mamba model.
@@ -368,22 +373,7 @@ def _build_test_env(cls, test_config):
 
         model.eval()
 
-        # Layer type list for hybrid models
-        decoder = get_attr_wrapped_model(model, "decoder")
-        layer_type_list = getattr(decoder, "layer_type_list", None)
-        if test_config.model_provider == "mamba":
-            mamba_states_shapes = decoder.mamba_state_shapes_per_request()
-            if mamba_states_shapes is not None:
-                (mamba_conv_states_shape, mamba_ssm_states_shape) = mamba_states_shapes
-            else:
-                # A `MambaBlock` can only not have a `MambaLayer` if using pipeline parallelism
-                # and a particular pipeline stage was not assigned a `MambaLayer`.
-                assert test_config.pipeline_model_parallel_size > 1
-                mamba_conv_states_shape = None
-                mamba_ssm_states_shape = None
-        else:
-            mamba_conv_states_shape = None
-            mamba_ssm_states_shape = None
+        mamba_inference_state_config = get_mamba_inference_state_config_from_model(model)
 
         # Inference config.
         inference_config = InferenceWrapperConfig(
@@ -400,9 +390,7 @@ def _build_test_env(cls, test_config):
             test_config=test_config,
             transformer_config=transformer_config,
             requests=requests,
-            layer_type_list=layer_type_list,
-            mamba_conv_states_shape=mamba_conv_states_shape,
-            mamba_ssm_states_shape=mamba_ssm_states_shape,
+            mamba_inference_state_config=mamba_inference_state_config,
         )
 
         # Inference model wrapper.
@@ -416,7 +404,9 @@ def _build_test_env(cls, test_config):
         # Text generation controller.
         text_generation_controller = TextGenerationController(
             inference_wrapped_model=inference_wrapped_model,
-            tokenizer=types.SimpleNamespace(vocab_size=test_config.vocab_size),
+            tokenizer=types.SimpleNamespace(
+                vocab_size=test_config.vocab_size, detokenize=lambda tokens: "tokenized_prompt"
+            ),
         )
 
         # Reset global cuda graph state.
@@ -435,12 +425,6 @@ def _build_test_env(cls, test_config):
         # Test env.
         env = DynamicEngineTestEnv(config=test_config, requests=requests, engine=engine)
 
-        # Mock the detokenize method to return predictable result
-        def mock_detokenize_prompt(tokens):
-            return "tokenized_prompt"
-
-        env.engine.controller.tokenizer.detokenize = mock_detokenize_prompt
-
         return env
 
     @classmethod
@@ -453,7 +437,31 @@ def _run_step(cls, env):
         # and engine.async_step() doesn't use this sampling param's
         # num_tokens_to_generate.
         result = env.engine.step_modern(verbose=False)
-        finished_requests = result["finished_requests"]
+
+        # Suspend + resume.
+        if (
+            env.config.suspend_resume_interval is not None
+            and env.engine.step_count % env.config.suspend_resume_interval == 0
+        ):
+            suspend_resume_mems = {}
+            suspend_resume_mems["start"] = torch.cuda.memory_stats()
+            env.engine.suspend()  # suspend.
+            suspend_resume_mems["mid"] = torch.cuda.memory_stats()
+            env.engine.resume()  # resume.
+            suspend_resume_mems["end"] = torch.cuda.memory_stats()
+            env.mem_usage["suspend_resume"][env.engine.step_count] = suspend_resume_mems
+
+        # Nothing done?
+        finished_request_records = result["finished_request_records"]
+        if len(finished_request_records) == 0:
+            return
+
+        # Append output tokens.
+        for finished_request_record in finished_request_records:
+            finished_request = finished_request_record.merge(env.engine.controller.tokenizer)
+            request = env.requests[finished_request.request_id]
+            request.output = finished_request.generated_tokens
+            request.status = finished_request.status
 
     @classmethod
     @torch.inference_mode()
@@ -463,10 +471,12 @@ def _run_test(cls, **test_config_kwargs):
         env = cls._build_test_env(test_config)
 
         # Add requests to engine.
+        env.mem_usage["start"] = torch.cuda.memory_stats()
         for request in tqdm(env.requests, "add requests"):
 
             # Add request.
             env.engine._add_request(request)
+            request.state = "pending"
 
             # Insert gap steps between adding requests.
             for _ in range(test_config.num_gap_steps):
@@ -493,14 +503,20 @@ def _run_test(cls, **test_config_kwargs):
                 if num_tokens_total is None
                 else num_tokens_total - len(request.prompt_tokens)
             )
-            assert (
-                (num_tokens_to_generate is None and num_tokens_total is None)
-                or len(request.generated_tokens) == num_tokens_expected
-                or request.status == Status.FAILED
-            ), (
-                f"Request {request.request_id} expected to generate {num_tokens_to_generate} "
-                f"tokens but generated {len(request.generated_tokens)}"
-            )
+
+            # Validate the output length only if suspend_resume_interval is None.
+            # If it is not None, then the output length could be anything in the
+            # range [1, num_tokens_to_generate].
+            if test_config.suspend_resume_interval is None:
+                assert (
+                    (num_tokens_to_generate is None and num_tokens_total is None)
+                    or len(request.generated_tokens) <= num_tokens_expected
+                    or request.status == Status.FAILED
+                ), (
+                    f"Request {request.request_id} expected to generate {num_tokens_to_generate} "
+                    f"tokens but generated {len(request.generated_tokens)}"
+                )
+        env.mem_usage["end"] = torch.cuda.memory_stats()
 
         return env
 
@@ -518,40 +534,40 @@ def teardown_method(self, method):
     def test_simple(self, model_provider, num_cuda_graphs, cuda_graph_scope) -> None:
         """Simple test that runs without errors, and validates output."""
         skip_if_mamba_sequence_packing_not_available(model_provider)
+        num_tokens_to_generate = 16
 
         # Run test.
         env = self._run_test(
+            num_tokens_to_generate=num_tokens_to_generate,
             model_provider=model_provider,
             num_cuda_graphs=num_cuda_graphs,
-            context_max_requests_override=32,
             cuda_graph_scope=cuda_graph_scope,
             force_build_cuda_graphs=True,
         )
 
         # Validate max_requests, max_tokens.
-        assert env.engine.context.max_requests == 32
-        assert env.engine.context.max_tokens == 160
+        assert env.engine.context.max_tokens == DynamicInferenceContext.DEFAULT_MAX_TOKENS
 
-        # Validate output tokens.
+        # Validate generated tokens.
         gpt_expected_generated_tokens = [
-            [69, 85, 55, 74],
-            [29, 54, 85, 89],
-            [33, 30, 64, 59],
-            [45, 76, 33, 67],
-            [41, 56, 15, 58],
-            [28, 17, 6, 37],
-            [17, 2, 54, 47],
-            [],  # this request is failed due to max sequence length overflow
+            [69, 85, 55, 74, 56, 89, 64, 59, 55, 67, 15, 58, 6, 37, 54, 47],
+            [29, 54, 33, 72, 45, 76, 41, 56, 28, 25, 17, 2, 61, 6, 98, 76],
+            [35, 78, 54, 16, 79, 98, 22, 5, 60, 0, 1, 76, 77, 11, 25, 7],
+            [25, 75, 57, 85, 81, 37, 88, 17, 71, 15, 70, 64, 50, 0, 64, 45],
+            [32, 5, 85, 75, 30, 68, 23, 33, 20, 26, 89, 20, 92, 97, 38, 81],
+            [33, 69, 32, 49, 93, 24, 33, 6, 97, 36, 37, 99],
+            [82, 78, 78, 65, 22, 1, 87, 42, 36, 26, 27, 56, 82, 32, 8, 80],
+            [],
         ]
 
         mamba_expected_generated_tokens = [
-            [74, 72, 83, 59],
-            [25, 54, 1, 70],
-            [28, 14, 15, 89],
-            [87, 27, 30, 52],
-            [44, 13, 82, 70],
-            [28, 74, 64, 16],
-            [8, 4, 83, 5],
+            [74, 72, 9, 59, 1, 70, 15, 89, 30, 52, 82, 70, 64, 16, 83, 5],
+            [25, 54, 28, 14, 87, 27, 60, 92, 28, 74, 8, 63, 60, 68, 87, 82],
+            [31, 21, 87, 25, 96, 13, 32, 49, 40, 54, 55, 68, 73, 2, 64, 96],
+            [72, 80, 35, 72, 77, 85, 98, 36, 4, 97, 37, 46, 79, 95, 83, 25],
+            [8, 80, 56, 4, 87, 1, 43, 98, 85, 7, 50, 38, 24, 28, 18, 80],
+            [9, 94, 36, 16, 87, 57, 25, 76, 64, 92, 47, 86, 73, 72, 71, 97],
+            [17, 5, 62, 66, 15, 52, 32, 75, 66, 18, 90, 14, 67, 37, 94, 33],
             [],
         ]
 
@@ -562,6 +578,10 @@ def test_simple(self, model_provider, num_cuda_graphs, cuda_graph_scope) -> None
         else:
             raise ValueError(f"Invalid model_provider {model_provider}")
 
+        print(f"Validating {len(env.requests)} requests.")
+        print(f"Expected generated tokens: {expected_generated_tokens_list}")
+        print(f"Actual generated tokens: {[request.generated_tokens for request in env.requests]}")
+
         assert len(env.requests) == len(expected_generated_tokens_list)
 
         for request, expected_generated_tokens in zip(env.requests, expected_generated_tokens_list):
@@ -571,41 +591,6 @@ def test_simple(self, model_provider, num_cuda_graphs, cuda_graph_scope) -> None
                 f"expected ({expected_generated_tokens})."
             )
 
-    @pytest.mark.internal
-    @pytest.mark.skipif(
-        not is_fa_min_version("2.7.3"), reason="need latest flash attn for dynamic batching"
-    )
-    def test_overflow_factor(self, model_provider: str = "gpt") -> None:
-        """Test overflow factor arg."""
-        skip_if_mamba_sequence_packing_not_available(model_provider)
-
-        # Run test.
-        env = self._run_test(
-            context_buffer_overflow_factor=0.1,
-            context_max_requests_override=None,
-            context_max_tokens_override=None,
-            model_provider=model_provider,
-        )
-
-        # Validate max_requests, max_tokens.
-        if model_provider == "gpt":
-            assert env.engine.context.max_requests == 420
-            assert env.engine.context.max_tokens == 420
-        elif model_provider == "mamba":
-            assert env.engine.context.max_requests == 16
-            assert env.engine.context.max_tokens == 16
-
-    @pytest.mark.internal
-    @pytest.mark.skipif(
-        not is_fa_min_version("2.7.3"), reason="need latest flash attn for dynamic batching"
-    )
-    @pytest.mark.parametrize("model_provider", ["gpt", "mamba"])
-    def test_request_overflow(self, model_provider: str) -> None:
-        """Test request overflow."""
-        skip_if_mamba_sequence_packing_not_available(model_provider)
-
-        self._run_test(context_max_requests_override=4, model_provider=model_provider)
-
     @pytest.mark.skipif(
         not is_fa_min_version("2.7.3"), reason="need latest flash attn for dynamic batching"
     )
@@ -613,7 +598,11 @@ def test_request_overflow(self, model_provider: str) -> None:
     def test_token_overflow_transient(self) -> None:
         """Test token overflow."""
         test_config = DynamicEngineTestConfig(
-            num_requests=2, min_prompt_length=8, max_prompt_length=8, context_max_tokens_override=12
+            num_requests=2,
+            min_prompt_length=512,
+            max_prompt_length=512,
+            num_tokens_to_generate=2,
+            context_max_tokens=900,
         )
         env = self._build_test_env(test_config)
         env.engine._add_request(env.requests[0])
@@ -632,7 +621,7 @@ def test_token_overflow_transient(self) -> None:
     )
     def test_token_overflow_nontransient(self) -> None:
         """Test token overflow (non-transient)."""
-        test_config = DynamicEngineTestConfig(context_max_tokens_override=8)
+        test_config = DynamicEngineTestConfig(context_max_tokens=8)
         env = self._build_test_env(test_config)
         try:
             env.engine._add_request(env.requests[0])
@@ -689,19 +678,21 @@ def test_cuda_graph_token_counts(self) -> None:
 
         # Test num_cuda_graphs.
         for num_cuda_graphs, expected_cuda_graph_token_counts in [
-            (0, [64]),
-            (1, [64]),
-            (2, [64, 32]),
-            (4, [64, 48, 32, 16]),
-            (8, [64, 56, 48, 40, 32, 24, 16, 8]),
-            (16, [64, 56, 48, 40, 32, 24, 16, 8]),
-            (64, [64, 56, 48, 40, 32, 24, 16, 8]),
-            (1024, [64, 56, 48, 40, 32, 24, 16, 8]),
+            (0, [40]),
+            (1, [40]),
+            (2, [40, 24]),
+            (4, [40, 32, 16]),
+            (8, [40, 32, 24, 16, 8]),
+            (16, [40, 32, 24, 16, 8]),
+            (64, [40, 32, 24, 16, 8]),
+            (1024, [40, 32, 24, 16, 8]),
         ]:
 
             # Build cuda graphs (inside dynamic engine).
             env = self._build_test_env(
-                DynamicEngineTestConfig(num_requests=64, num_cuda_graphs=num_cuda_graphs)
+                DynamicEngineTestConfig(
+                    context_buffer_size_gb=0.01, num_cuda_graphs=num_cuda_graphs
+                )
             )
             actual_cuda_graph_token_counts = env.engine.context.cuda_graph_token_counts
             assert (
@@ -721,19 +712,7 @@ def test_cuda_graph_token_counts(self) -> None:
     )
     @pytest.mark.parametrize(
         "num_warmup_tokens, expected_cuda_graph_token_count",
-        [
-            (1, 8),
-            (2, 8),
-            (4, 8),
-            (8, 8),
-            (10, 16),
-            (12, 16),
-            (16, 16),
-            (20, 24),
-            (24, 24),
-            (28, 32),
-            (32, 32),
-        ],
+        [(1, 8), (2, 8), (4, 8), (8, 8), (10, 16), (12, 16), (16, 16)],
     )
     @torch.inference_mode()
     def test_cuda_graph_warmup(
@@ -748,17 +727,16 @@ def test_cuda_graph_warmup(
 
         # Initialize context.
         env = self._build_test_env(
-            DynamicEngineTestConfig(num_requests=32, num_cuda_graphs=8, num_tokens_to_generate=1)
+            DynamicEngineTestConfig(
+                context_buffer_size_gb=0.0041, num_cuda_graphs=8, num_tokens_to_generate=1
+            )
         )
 
         context = env.engine.context
         assert context.is_decode_only()
-        assert context.cuda_graph_token_counts == [
-            32,
-            24,
-            16,
-            8,
-        ], "cuda_graph_token_counts: %s." % str(context.cuda_graph_token_counts)
+        assert context.cuda_graph_token_counts == [16, 8], "cuda_graph_token_counts: %s." % str(
+            context.cuda_graph_token_counts
+        )
 
         context.initialize_attention_state(
             num_warmup_tokens=num_warmup_tokens, warmup_engine_mode=warmup_engine_mode
@@ -851,7 +829,10 @@ def mock_tokenize_prompt(prompt, add_BOS=False):
         # Call the generate function.
         # It's safe to use request 0's sampling params here because all sampling
         # params are identical as long as use_fixed_output_lengths == False.
-        finished_requests = env.engine.generate(prompts, env.requests[0].sampling_params)
+        finished_request_records = env.engine.generate(prompts, env.requests[0].sampling_params)
+        finished_requests = [
+            r.merge(env.engine.controller.tokenizer) for r in finished_request_records
+        ]
 
         # Verify results
         assert len(finished_requests) == len(
@@ -901,10 +882,11 @@ async def test_run_engine(self):
                 num_tokens_to_generate = env.requests[
                     request_id
                 ].sampling_params.num_tokens_to_generate
-                result = fut.result()
-                assert result.generated_length == num_tokens_to_generate, (
+                request_record = fut.result()
+                request = request_record.merge(env.engine.controller.tokenizer)
+                assert request.generated_length == num_tokens_to_generate, (
                     f"Request {request_id} expected to generate {num_tokens_to_generate} "
-                    f"tokens but generated {result.generated_length}"
+                    f"tokens but generated {request.generated_length}"
                 )
 
             engine_task.cancel()
@@ -951,6 +933,7 @@ def test_return_log_probs(self):
     @pytest.mark.parametrize("pp_size", [1, 2])
     @pytest.mark.parametrize("tp_size", [1, 2])
     @pytest.mark.parametrize("model_provider", ["gpt", "mamba"])
+    @pytest.mark.parametrize("transformer_impl", ["local", "inference_optimized"])
     @torch.inference_mode()
     def test_parallel_inference(
         self,
@@ -960,6 +943,7 @@ def test_parallel_inference(
         ep_size,
         sequence_parallel,
         materialize_only_last_token_logits,
+        transformer_impl,
     ):
         skip_if_mamba_sequence_packing_not_available(model_provider)
 
@@ -975,13 +959,22 @@ def test_parallel_inference(
             pytest.skip(reason="Sequence parallelism requires tp_size > 1")
         elif tp_size > 1 and ep_size > 1 and not sequence_parallel:
             pytest.skip(reason="Sequence parallelism must be used with tp_size > 1 and ep_size > 1")
-        elif pp_size > 1 and model_provider == "mamba":
-            pytest.skip(
-                reason=(
-                    "Running hybrid models with pp_size > 1 and no attention on some "
-                    "pipeline stages is not supported yet."
+        elif transformer_impl == "inference_optimized":
+            if ep_size > 1:
+                pytest.skip(
+                    reason="MoE models are not supported with the inference optimized transformer."
+                )
+            if tp_size > 1 and not sequence_parallel:
+                pytest.skip(
+                    reason=(
+                        "The inference optimized transformer requires sequence parallelism "
+                        "when tp_size > 1."
+                    )
+                )
+            if model_provider == "mamba":
+                pytest.skip(
+                    reason="Mamba model is not supported with the inference optimized transformer."
                 )
-            )
 
         env = self._run_test(
             model_provider=model_provider,
@@ -990,6 +983,7 @@ def test_parallel_inference(
             expert_model_parallel_size=ep_size,
             sequence_parallel=sequence_parallel,
             materialize_only_last_token_logits=materialize_only_last_token_logits,
+            transformer_impl=transformer_impl,
         )
 
     @pytest.mark.internal
@@ -1038,8 +1032,7 @@ def test_events(self):
             max_prompt_length=10,
             num_tokens_to_generate=32,
             context_buffer_size_gb=0.001,  # 0.001, # 8 blocks
-            context_max_requests_override=8,
-            context_max_tokens_override=8,
+            context_max_tokens=8,
             num_gap_steps=1,
         )
 
@@ -1088,27 +1081,58 @@ def test_chunked_prefill(self, model_provider: str):
             materialize_only_last_token_logits=False,
             model_provider=model_provider,
             context_block_size_tokens=256,
-            context_max_tokens_override=300,
+            context_max_tokens=1000,
         )
 
-
-if __name__ == "__main__":
-    test = TestDynamicInferenceEngine()
-    test.test_simple(4)
-    test.test_overflow_factor()
-    test.test_request_overflow()
-    test.test_token_overflow_transient()
-    # test.test_token_overflow_nontransient() # uncomment in megatron-core 0.16
-    test.test_block_overflow()
-    test.test_multi_add()
-    test.test_fixed_output_lengths()
-    test.test_cuda_graph_request_counts()
-    test.test_cuda_graph_warmup(WarmupEngineMode.DECODE, 1, 8)
-    test.test_generate_function()
-    asyncio.run(test.test_run_engine())
-    test.test_return_log_probs()
-    test.test_parallel_inference()
-    # test.test_events() # uncomment in megatron-core 0.16
-    test.teardown_method(None)
-    print("~~~")
-    print("success.")
+    @pytest.mark.internal
+    @pytest.mark.skipif(
+        not is_fa_min_version("2.7.3"), reason="need latest flash attn for dynamic batching"
+    )
+    @pytest.mark.skip(
+        reason="test works in isolation, but memory dynamics change when run "
+        "within unt test suite."
+    )
+    def test_suspend_resume_memory(self):
+
+        # Run tests.
+        mem_usages = {}
+        for suspend_resume_interval in None, 8, 4, 2:  # interval 1 acts funny.
+
+            # Run test.
+            env = self._run_test(suspend_resume_interval=suspend_resume_interval, num_gap_steps=1)
+
+            # Record memory usage.
+            mem_usages[suspend_resume_interval] = env.mem_usage
+
+            # Clear memory to make recorded memories consistent between tests.
+            # TODO(@lmcafee): why is memory not automatically cleared?
+            # env.engine.suspend() # TODO(@lmcafee): useful?
+            del env
+
+        # Utility methods.
+        get_alloc = lambda mem_stats: mem_stats["allocated_bytes.all.current"]
+
+        # Validate overall 'end' memory usage.
+        golden_end_bytes = get_alloc(mem_usages[None]["end"])
+        for interval, mem_usage in mem_usages.items():
+            current_end_bytes = get_alloc(mem_usage["end"])
+            assert math.isclose(
+                golden_end_bytes, current_end_bytes, rel_tol=0.01
+            ), f"{current_end_bytes} != {golden_end_bytes}."
+
+        # Validate 'suspend/resume' memory usage.
+        get_suspend_resume_bytes = lambda key: list(
+            get_alloc(list(d["suspend_resume"].values())[-1][key])
+            for i, d in mem_usages.items()
+            if i is not None
+        )
+        suspend_resume_mid_bytes = get_suspend_resume_bytes("mid")
+        suspend_resume_end_bytes = get_suspend_resume_bytes("end")
+        for mid_bytes in suspend_resume_mid_bytes:
+            assert math.isclose(
+                suspend_resume_mid_bytes[0], mid_bytes, rel_tol=0.01
+            ), f"{mid_bytes} != {suspend_resume_mid_bytes[0]}."
+        for end_bytes in suspend_resume_end_bytes:
+            assert math.isclose(
+                suspend_resume_end_bytes[0], end_bytes, rel_tol=0.01
+            ), f"{end_bytes} != {suspend_resume_end_bytes[0]}."
diff --git a/tests/unit_tests/inference/engines/test_static_engine.py b/tests/unit_tests/inference/engines/test_static_engine.py
index 699a4d1f473..40187a5eff9 100644
--- a/tests/unit_tests/inference/engines/test_static_engine.py
+++ b/tests/unit_tests/inference/engines/test_static_engine.py
@@ -12,7 +12,11 @@
 from megatron.core import parallel_state
 from megatron.core.inference.contexts import StaticInferenceContext
 from megatron.core.inference.engines import StaticInferenceEngine
-from megatron.core.inference.inference_request import InferenceRequest, Status
+from megatron.core.inference.inference_request import (
+    DynamicInferenceRequestRecord,
+    InferenceRequest,
+    Status,
+)
 from megatron.core.inference.model_inference_wrappers.gpt.gpt_inference_wrapper import (
     GPTInferenceWrapper,
 )
@@ -188,12 +192,19 @@ def test_generate_dynamic(self, batch_size: int, num_trials: int, empty_prompt:
                 prompts = ["" for i in range(batch_size)]
             else:
                 prompts = ["sample" * (i + 1) for i in range(batch_size)]
-            results: List[InferenceRequest] = self.static_engine.generate(
-                prompts, sampling_params=SamplingParams(num_tokens_to_generate=10)
+            results: List[Union[InferenceRequest, DynamicInferenceRequestRecord]] = (
+                self.static_engine.generate(
+                    prompts, sampling_params=SamplingParams(num_tokens_to_generate=10)
+                )
             )
 
             assert len(results) == batch_size
             for result in results:
+                if isinstance(result, DynamicInferenceRequestRecord):
+                    result = result.merge(self.static_engine.controller.tokenizer)
+                assert isinstance(result, InferenceRequest), (
+                    "expected <InferenceRequest>; found <%s>." % type(result).__name__
+                )
                 assert (
                     result.status == Status.COMPLETED
                 ), f"Status should be completed but its {result.status}"
diff --git a/tests/unit_tests/inference/test_data_parallel_inference_coordinator.py b/tests/unit_tests/inference/test_data_parallel_inference_coordinator.py
new file mode 100644
index 00000000000..7b4fb4b4250
--- /dev/null
+++ b/tests/unit_tests/inference/test_data_parallel_inference_coordinator.py
@@ -0,0 +1,471 @@
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+import asyncio
+import random
+import time
+from collections import deque
+from dataclasses import dataclass, field
+from typing import Dict, List, Optional, Tuple
+
+import pytest
+import torch.distributed as dist
+from tqdm import tqdm
+
+from megatron.core.inference.data_parallel_inference_coordinator import (
+    DataParallelInferenceCoordinator,
+)
+from megatron.core.inference.engines.dynamic_engine import DynamicInferenceEngine, RequestEntry
+from megatron.core.inference.inference_client import InferenceClient
+from megatron.core.inference.inference_request import (
+    DynamicInferenceRequest,
+    DynamicInferenceRequestRecord,
+    Status,
+)
+from megatron.core.inference.sampling_params import SamplingParams
+from megatron.core.utils import get_asyncio_loop
+from tests.unit_tests.test_utilities import Utils
+
+try:
+    import zmq
+
+    HAVE_ZMQ = True
+except Exception:
+    HAVE_ZMQ = False
+
+IS_ZMQ_FLAKY = True
+
+
+class DummyContext:
+    """Dummy inference context."""
+
+    def __init__(self):
+        self.active_cnt = 0
+
+    def get_active_request_count(self) -> int:
+        return self.active_cnt
+
+
+class DummyEngine(DynamicInferenceEngine):
+    """Dummy inference engine that only implements coordinator-related methods."""
+
+    def __init__(self):
+        """We cannot call super().__init__() because it requires complex setup."""
+        self.waiting_request_ids = deque()
+        self.requests: Dict[int, RequestEntry] = {}
+        self.suspend_signal = False
+        self.is_suspended = False
+        self._loop = get_asyncio_loop()
+        self.context = DummyContext()
+        self.running = asyncio.Event()
+        self.paused = asyncio.Event()
+        self.stopped = asyncio.Event()
+        self.pending_microbatch = deque()
+        self.received_pause: bool = False
+        self.received_stop: bool = False
+
+    def add_request(
+        self, request_id: int, prompt: str, sampling_params: Optional[SamplingParams] = None
+    ) -> asyncio.Future[DynamicInferenceRequestRecord]:
+        """Dummy add_request."""
+
+        self.requests[request_id] = RequestEntry(
+            record=DynamicInferenceRequestRecord.from_request(
+                DynamicInferenceRequest(
+                    prompt=prompt,
+                    request_id=request_id,
+                    sampling_params=sampling_params,
+                    status=Status.WAITING_IN_QUEUE,
+                )
+            ),
+            future=self._loop.create_future(),
+        )
+        self.waiting_request_ids.append(request_id)
+
+        return self.requests[request_id].future
+
+    async def async_step(self, *, verbose: Optional[bool] = False) -> Dict:
+        """Dummy async_step."""
+        # Finish "active" requests.
+        finished_request_records = []
+        to_remove = []
+        for request_id, entry in self.requests.items():
+            request = entry.record[-1]
+            if request.status == Status.ACTIVE_AND_GENERATING_TOKENS:
+                request.sampling_params.num_tokens_to_generate -= 1
+                if request.sampling_params.num_tokens_to_generate > 0:
+                    continue
+                request.status = Status.COMPLETED
+                self.context.active_cnt -= 1
+                finished_request_records.append(entry.record)
+                entry.future.set_result(entry.record)
+                to_remove.append(request_id)
+        for request_id in to_remove:
+            del self.requests[request_id]
+
+        # Activate queued requests. They will "process" for 1 step.
+        active_request_ids = []
+        while self.waiting_request_ids:
+            request_id = self.waiting_request_ids.popleft()
+            record = self.requests[request_id].record
+            record[-1].status = Status.ACTIVE_AND_GENERATING_TOKENS
+            self.context.active_cnt += 1
+            active_request_ids.append(request_id)
+
+        return {
+            "active_request_ids": active_request_ids,
+            "finished_request_records": finished_request_records,
+            "step_time": 0.01,
+            "cuda_graph_request_count": 1,
+        }
+
+
+@dataclass
+class CoordinatorTestConfig:
+    """Test configuration args."""
+
+    port: int = 46581
+    mp_port: int = 49581
+    launch_inference_coordinator: bool = True
+    stop_engines: bool = True
+    verify_results: bool = True
+
+    num_requests: int = 10**1
+    min_time_offset: float = 10 ** (-4)
+    max_time_offset: float = 10 ** (-3)
+    num_steps_to_finish: int = 1
+    num_iterations: int = 1
+
+    tensor_model_parallel_size: int = 1
+    pipeline_model_parallel_size: int = 1
+
+
+@dataclass
+class CoordinatorTestEnv:
+    """Test environment, including requests."""
+
+    config: CoordinatorTestConfig
+    requests: List[Tuple]
+    engine: DummyEngine
+    responses: List[List[DynamicInferenceRequest]] = field(default_factory=list)
+    timing_data: Dict[str, Optional[float]] = field(
+        default_factory=lambda: {
+            "start_time": None,
+            "init_time": None,
+            "done_time": None,
+            "stop_time": None,
+        }
+    )
+
+
+class TestCoordinator:
+
+    @classmethod
+    def _build_requests(cls, test_config: CoordinatorTestConfig) -> List[Tuple]:
+        ret = []
+
+        for _ in range(test_config.num_requests):
+            arrival_delta = random.uniform(test_config.min_time_offset, test_config.max_time_offset)
+            num_tokens = test_config.num_steps_to_finish
+            ret.append(
+                ("Hello world!", SamplingParams(num_tokens_to_generate=num_tokens), arrival_delta)
+            )
+        return ret
+
+    @classmethod
+    def _build_test_env(cls, test_config):
+        Utils.initialize_model_parallel(
+            tensor_model_parallel_size=test_config.tensor_model_parallel_size,
+            pipeline_model_parallel_size=test_config.pipeline_model_parallel_size,
+        )
+        requests = cls._build_requests(test_config)
+        engine = DummyEngine()
+        engine.num_steps_to_finish = test_config.num_steps_to_finish
+        return CoordinatorTestEnv(config=test_config, requests=requests, engine=engine)
+
+    @classmethod
+    async def _run_test(cls, **test_config_kwargs):
+        # Test environment.
+        test_config = CoordinatorTestConfig(**test_config_kwargs)
+        env = cls._build_test_env(test_config)
+
+        # Connect each engine to their respective processes.
+        env.timing_data["start_time"] = time.time()
+        await env.engine.start_listening_to_data_parallel_coordinator(
+            inference_coordinator_port=test_config.port,
+            launch_inference_coordinator=test_config.launch_inference_coordinator,
+        )
+
+        results_success = False
+        shutdown_success = False
+        try:
+            if dist.get_rank() == 0:
+                client = InferenceClient(test_config.port)
+                await client.start()
+                env.timing_data["init_time"] = time.time()
+
+                all_results = []
+                for _ in range(test_config.num_iterations):
+                    futures = []
+                    for request in tqdm(env.requests, "add_requests"):
+                        prompt, sampling_params, arrival_delta = request
+                        await asyncio.sleep(arrival_delta)
+                        fut = client.add_request(prompt=prompt, sampling_params=sampling_params)
+                        futures.append(fut)
+                    results = await asyncio.wait_for(asyncio.gather(*futures), timeout=10.0)
+                    all_results.append(results)
+                env.timing_data["done_time"] = time.time()
+            results_success = True
+        finally:
+            try:
+                if dist.get_rank() == 0:
+                    if test_config.stop_engines:
+                        await asyncio.wait_for(client.stop_engines(), timeout=10.0)
+                    client.stop()
+                if test_config.stop_engines:
+                    await asyncio.wait_for(env.engine.engine_loop_task, timeout=10.0)
+                shutdown_success = True
+            except:
+                env.engine.engine_loop_task.cancel()
+
+        env.timing_data["stop_time"] = time.time()
+
+        assert results_success, "Did not receive all results successfully."
+        assert shutdown_success, "Did not shutdown successfully."
+        if dist.get_rank() == 0:
+            env.responses = all_results
+            if test_config.verify_results:
+                for batch in all_results:
+                    for record in batch:
+                        request = record[-1]
+                        assert request.status == Status.COMPLETED
+
+        return env
+
+    def teardown_method(self, method):
+        Utils.destroy_model_parallel()
+
+    @pytest.mark.internal
+    @pytest.mark.skipif(IS_ZMQ_FLAKY, reason="pyzmq is flaky in CI")
+    @pytest.mark.skipif(not HAVE_ZMQ, reason="pyzmq is required for this test")
+    @pytest.mark.asyncio
+    async def test_simple(self):
+        """Simple test with no TP or PP."""
+        env = await self._run_test(tensor_model_parallel_size=1, pipeline_model_parallel_size=1)
+
+    @pytest.mark.internal
+    @pytest.mark.skipif(IS_ZMQ_FLAKY, reason="pyzmq is flaky in CI")
+    @pytest.mark.skipif(not HAVE_ZMQ, reason="pyzmq is required for this test")
+    @pytest.mark.asyncio
+    async def test_tp(self):
+        """Simple test with TP, but no PP."""
+        env = await self._run_test(tensor_model_parallel_size=2, pipeline_model_parallel_size=1)
+
+    @pytest.mark.internal
+    @pytest.mark.skipif(IS_ZMQ_FLAKY, reason="pyzmq is flaky in CI")
+    @pytest.mark.skipif(not HAVE_ZMQ, reason="pyzmq is required for this test")
+    @pytest.mark.asyncio
+    async def test_pp(self):
+        """Simple test with no TP, but PP."""
+        env = await self._run_test(tensor_model_parallel_size=1, pipeline_model_parallel_size=2)
+
+    @pytest.mark.internal
+    @pytest.mark.skipif(IS_ZMQ_FLAKY, reason="pyzmq is flaky in CI")
+    @pytest.mark.skipif(not HAVE_ZMQ, reason="pyzmq is required for this test")
+    @pytest.mark.asyncio
+    async def test_tp_pp(self):
+        """Simple test with both TP and PP."""
+        env = await self._run_test(tensor_model_parallel_size=2, pipeline_model_parallel_size=2)
+
+    @pytest.mark.internal
+    @pytest.mark.skipif(IS_ZMQ_FLAKY, reason="pyzmq is flaky in CI")
+    @pytest.mark.skipif(not HAVE_ZMQ, reason="pyzmq is required for this test")
+    @pytest.mark.asyncio
+    async def test_pp(self):
+        """Simple test with no TP, but PP."""
+        env = await self._run_test(tensor_model_parallel_size=1, pipeline_model_parallel_size=2)
+
+    @pytest.mark.internal
+    @pytest.mark.skipif(not HAVE_ZMQ, reason="pyzmq is required for this test")
+    @pytest.mark.skipif(IS_ZMQ_FLAKY, reason="pyzmq is flaky in CI")
+    @pytest.mark.asyncio
+    async def test_tp_pp(self):
+        """Simple test with both TP and PP."""
+        env = await self._run_test(tensor_model_parallel_size=2, pipeline_model_parallel_size=2)
+
+    @pytest.mark.internal
+    @pytest.mark.skipif(not HAVE_ZMQ, reason="pyzmq is required for this test")
+    @pytest.mark.skipif(IS_ZMQ_FLAKY, reason="pyzmq is flaky in CI")
+    @pytest.mark.asyncio
+    async def test_pause(self):
+        """Pause/resume test."""
+        test_config = CoordinatorTestConfig(
+            tensor_model_parallel_size=2, pipeline_model_parallel_size=1, num_requests=32
+        )
+        env = self._build_test_env(test_config)
+
+        await env.engine.start_listening_to_data_parallel_coordinator(
+            inference_coordinator_port=test_config.port, launch_inference_coordinator=True
+        )
+
+        success = False
+        try:
+            if dist.get_rank() == 0:
+                # Start client as usual.
+                client = InferenceClient(test_config.port)
+                await client.start()
+
+                ### TEST 1: Pause after all requests have finished.
+                futures = []
+                for i, request in enumerate(env.requests[:2]):
+                    prompt, sampling_params, _ = request
+                    fut = client.add_request(prompt=prompt, sampling_params=sampling_params)
+                    futures.append(fut)
+                # Wait a sufficient time for the requests to complete.
+                await asyncio.sleep(0.1)
+                # Get a pause awaitable.
+                to_pause = client.pause_engines()
+                awaitables = futures + [to_pause]
+                # Gather all awaitables; assert that the requests actually complete.
+                try:
+                    await asyncio.wait_for(asyncio.gather(*awaitables), timeout=0.1)
+                except asyncio.TimeoutError:
+                    pytest.fail("Simple pause did not succeed.")
+
+                ### TEST 2: Ensure that requests can be added while paused.
+                prompt, sampling_params, _ = env.requests[2]
+                paused_fut = client.add_request(prompt=prompt, sampling_params=sampling_params)
+                with pytest.raises(asyncio.TimeoutError):
+                    await asyncio.wait_for(paused_fut, timeout=0.1)
+
+                ### TEST 3: Resume after pause and drain the queued requests.
+                client.unpause_engines()
+                # TODO: The system should not be incorrectly raising a cancelled error here.
+                with pytest.raises(asyncio.CancelledError):
+                    await paused_fut
+
+                ### TEST 4: Add new requests after resume.
+                futures = []
+                for i, request in enumerate(env.requests[3:4]):
+                    prompt, sampling_params, _ = request
+                    fut = client.add_request(prompt=prompt, sampling_params=sampling_params)
+                    futures.append(fut)
+                # Wait a sufficient time for the requests to complete.
+                await asyncio.sleep(0.1)
+                # Gather all awaitables; assert that the requests actually complete.
+                try:
+                    await asyncio.wait_for(asyncio.gather(*futures), timeout=0.1)
+                except asyncio.TimeoutError:
+                    pytest.fail("Simple resume did not succeed.")
+
+                ### TEST 5: Pause while requests are being processed.
+                ### Note: this situation cannot occur in a synchronous system.
+                if False:
+                    for request in env.engine.requests[4:6]:
+                        request.sampling_params.num_tokens_to_generate = 100
+                    futures = []
+                    for i, request in enumerate(env.requests[4:6]):
+                        prompt, sampling_params, _ = request
+                        fut = client.add_request(prompt=prompt, sampling_params=sampling_params)
+                        futures.append(fut)
+                    # Do not wait for the requests to complete.
+                    await client.pause_engines()
+                    # Gather all awaitables; assert that the requests do not complete.
+                    with pytest.raises(asyncio.TimeoutError):
+                        await asyncio.wait_for(asyncio.gather(*futures), timeout=0.1)
+            success = True
+        finally:
+            try:
+                if dist.get_rank() == 0:
+                    await asyncio.wait_for(client.stop_engines(), timeout=5.0)
+                    client.stop()
+                await asyncio.wait_for(env.engine.engine_loop_task, timeout=5.0)
+            except asyncio.TimeoutError:
+                env.engine.engine_loop_task.cancel()
+        assert success, "Pause/resume test did not complete successfully."
+
+    @pytest.mark.internal
+    @pytest.mark.skipif(not HAVE_ZMQ, reason="pyzmq is required for this test")
+    @pytest.mark.skipif(IS_ZMQ_FLAKY, reason="pyzmq is flaky in CI")
+    @pytest.mark.asyncio
+    async def test_throughput(self):
+        """Throughput test with no TP or PP."""
+        import torch
+        import torch.distributed as dist
+
+        env = await self._run_test(
+            tensor_model_parallel_size=1,
+            pipeline_model_parallel_size=1,
+            num_requests=10**4,
+            num_iterations=10,
+            min_time_offset=0.0,
+            max_time_offset=0.0,
+        )
+
+        flags = torch.tensor([1, 1, 1], dtype=torch.int, device=torch.cuda.current_device())
+
+        init_duration = golden_init_duration = None
+        run_duration = golden_run_duration = None
+        stop_duration = golden_stop_duration = None
+
+        if dist.get_rank() == 0:
+            init_duration = (env.timing_data["init_time"] - env.timing_data["start_time"]) * 10**3
+            golden_init_duration = 4445.64  # ms
+            run_duration = (env.timing_data["done_time"] - env.timing_data["init_time"]) * 10**3
+            golden_run_duration = 2906.29  # ms
+            stop_duration = (env.timing_data["stop_time"] - env.timing_data["done_time"]) * 10**3
+            golden_stop_duration = 33.17  # ms
+
+            def clamp_to_golden_value(value, golden_value, delta=0.1):
+                return value > golden_value * (1 - delta) and value < golden_value * (1 + delta)
+
+            if not clamp_to_golden_value(init_duration, golden_init_duration, delta=0.5):
+                flags[0] = 0
+            if not clamp_to_golden_value(run_duration, golden_run_duration, delta=0.2):
+                flags[1] = 0
+            if not clamp_to_golden_value(stop_duration, golden_stop_duration, delta=1.0):
+                flags[2] = 0
+
+        # Synchronize results
+        dist.broadcast(flags, src=0)
+
+        if dist.get_rank() == 0:
+            # Print current results.
+            print(f"Initialization time: {init_duration:.2f} ms")
+            print(f"Run time: {run_duration:.2f} ms")
+            print(f"Stop time: {stop_duration:.2f} ms")
+
+            assert flags[0].item() == 1, (
+                f"WARNING: Init duration {init_duration:.2f}s deviates from "
+                f"golden value {golden_init_duration:.2f}s"
+            )
+            assert flags[1].item() == 1, (
+                f"WARNING: Run duration {run_duration:.2f}s deviates from "
+                f"golden value {golden_run_duration:.2f}s"
+            )
+            assert flags[2].item() == 1, (
+                f"WARNING: Stop duration {stop_duration:.2f}s deviates from "
+                f"golden value {golden_stop_duration:.2f}s"
+            )
+
+            print(
+                f"ZMQ throughput is approximately "
+                f"{env.config.num_requests * env.config.num_iterations / (run_duration):.2f} "
+                f"requests/ms"
+            )
+        else:
+            assert flags[0].item() == 1
+            assert flags[1].item() == 1
+            assert flags[2].item() == 1
+
+
+if __name__ == "__main__":
+    test = TestCoordinator()
+    asyncio.run(test.test_simple())
+    asyncio.run(test.test_tp())
+    asyncio.run(test.test_pp())
+    asyncio.run(test.test_tp_pp())
+    asyncio.run(test.test_pause())
+    asyncio.run(test.test_throughput())
+    test.teardown_method(None)
+    print("~~~")
+    print("success.")
diff --git a/tests/unit_tests/inference/test_wandb_logging.py b/tests/unit_tests/inference/test_wandb_logging.py
index 1512e805f9c..1d5d054b80e 100644
--- a/tests/unit_tests/inference/test_wandb_logging.py
+++ b/tests/unit_tests/inference/test_wandb_logging.py
@@ -50,7 +50,6 @@ def _get_dynamic_context(
         max_sequence_length=512,
         buffer_size_gb=0.03,
         block_size_tokens=128,
-        buffer_guaranteed_fraction=0.1,
         metrics_writer=None,
     ):
         """Helper to create a DynamicInferenceContext."""
@@ -62,9 +61,9 @@ def _get_dynamic_context(
             max_sequence_length=max_sequence_length,
             num_cuda_graphs=None,
             buffer_size_gb=buffer_size_gb,
-            buffer_guaranteed_fraction=buffer_guaranteed_fraction,
             block_size_tokens=block_size_tokens,
             metrics_writer=metrics_writer,
+            unified_memory_level=0,  # unit tests currently broken with UVM
         )
 
     @pytest.mark.internal
@@ -83,12 +82,11 @@ def test_get_kvcache_utilization_stats_with_requests(self):
         assert 'active_utilization' in stats
         assert 'active_request_count' in stats
         assert 'paused_request_count' in stats
-        assert 'gtd_block_count' in stats
         assert 'block_count_avail' in stats
-        assert 'num_non_gtd_blocks' in stats
         assert 'active_token_count' in stats
         assert 'total_request_count' in stats
-        assert 'max_requests' in stats
+        assert 'max_total_requests' in stats
+        assert 'max_active_requests' in stats
 
         # Verify values for empty context
         assert stats['allocated_blocks'] == 0
@@ -134,12 +132,11 @@ def test_get_kvcache_utilization_stats_with_requests(self):
         assert stats_after['total_blocks'] == stats['total_blocks']
         assert stats_after['total_blocks'] > 0
 
-        # Verify that gtd_block_count remains constant
-        assert stats_after['gtd_block_count'] == stats['gtd_block_count']
-
         # Verify that max_requests remains constant
-        assert stats_after['max_requests'] == stats['max_requests']
-        assert stats_after['max_requests'] > 0
+        assert stats_after['max_total_requests'] == stats['max_total_requests']
+        assert stats_after['max_total_requests'] > 0
+        assert stats_after['max_active_requests'] == stats['max_active_requests']
+        assert stats_after['max_active_requests'] > 0
 
         # Verify block availability decreased after allocation
         assert stats_after['block_count_avail'] < stats['block_count_avail']
@@ -147,7 +144,7 @@ def test_get_kvcache_utilization_stats_with_requests(self):
         # Verify relationship: allocated_blocks + block_count_avail + 1 (dummy) = total
         assert (
             stats_after['allocated_blocks'] + stats_after['block_count_avail'] + 1
-            == dynamic_context.block_allocator.block_count_total
+            == dynamic_context.block_allocator.total_count
         )
 
         # Verify utilization bounds [0, 1]
@@ -180,12 +177,11 @@ def test_kvcache_utilization_stats_types(self):
             'active_unique_blocks',
             'active_request_count',
             'paused_request_count',
-            'gtd_block_count',
             'block_count_avail',
-            'num_non_gtd_blocks',
             'active_token_count',
             'total_request_count',
-            'max_requests',
+            'max_total_requests',
+            'max_active_requests',
         ]
 
         for field in int_fields:
@@ -240,8 +236,8 @@ def test_paused_requests_in_stats(self):
             max_sequence_length=128,
             num_cuda_graphs=None,
             buffer_size_gb=0.01,  # Small buffer to force pausing
-            buffer_guaranteed_fraction=0.1,
             block_size_tokens=32,
+            unified_memory_level=0,  # unit tests currently broken with UVM
         )
 
         # Add multiple requests to potentially trigger pausing
diff --git a/tests/unit_tests/inference/text_generation_controllers/test_simple_text_generation_controller.py b/tests/unit_tests/inference/text_generation_controllers/test_simple_text_generation_controller.py
index 10ffe2fdd40..ee6bc5b2468 100644
--- a/tests/unit_tests/inference/text_generation_controllers/test_simple_text_generation_controller.py
+++ b/tests/unit_tests/inference/text_generation_controllers/test_simple_text_generation_controller.py
@@ -80,6 +80,9 @@ def setup_model(
             fp8="hybrid" if fp8 else None,
             fp8_recipe="tensorwise" if fp8 else None,
             fp8_param=fp8,
+            tensor_model_parallel_size=tensor_model_parallel_size,
+            pipeline_model_parallel_size=pipeline_model_parallel_size,
+            pipeline_dtype=dtype,
         )
         if dtype == torch.bfloat16:
             transformer_config.bf16 = True
@@ -112,15 +115,15 @@ def setup_model(
         else:
             inference_context = DynamicInferenceContext(
                 params_dtype=dtype,
-                num_layers=transformer_config.num_layers,
+                num_layers=transformer_config.num_layers // pipeline_model_parallel_size,
                 kv_channels=transformer_config.kv_channels,
                 num_attention_heads=transformer_config.num_attention_heads,
                 max_sequence_length=2048,
-                buffer_size_gb=1,
-                buffer_guaranteed_fraction=0.1,
+                buffer_size_gb=0.2,
                 materialize_only_last_token_logits=False,
                 use_flashinfer_fused_rope=None,  # default to using flash-infer if available
                 # this is for compatibility with the LTS environment
+                unified_memory_level=0,  # unit tests currently broken with UVM
             )
 
         inference_wrapped_model = GPTInferenceWrapper(
@@ -228,41 +231,75 @@ def detokenize(self, inp, skip_special_tokens=False):
             sampled_logits >= expected_min_value
         ), f"The sampled logits should all be greater than {expected_min_value} but its {sampled_logits}"
 
-    def test_sample_from_dynamic_logits(self):
+    @pytest.mark.parametrize("backend", ["torch"])
+    def test_sample_from_dynamic_logits(self, backend):
         batch_size = 12
         self.setup_model(torch.float32, batch_size=batch_size, static=False)
         self.mock_tokenizer.eod = self.vocab_size
 
-        active_sampling_map: List[Tuple[SamplingParams, List[int]]] = [
-            (SamplingParams(top_k=3), [0, 3, 2]),
+        context = self.text_generation_controller.inference_wrapped_model.inference_context
+        context.materialize_only_last_token_logits = True
+
+        # Prepare sampling params in human-readable format, to aid with test maintenance.
+        sampling_test_cases: List[Tuple[SamplingParams, List[int]]] = [
+            (SamplingParams(temperature=0.1, top_p=0.01), [9, 6, 10]),
+            (SamplingParams(temperature=5.0, top_k=15), [0, 3, 2]),
             (SamplingParams(top_p=0.8), [4, 1, 7]),
-            (SamplingParams(top_k=5), [11, 5, 8]),
-            # (SamplingParams(top_k=5, top_p=0.7), [11, 5, 8]), # uncomment for FlashInfer sampling
-            (SamplingParams(temperature=2.0), [9, 6, 10]),
+            (SamplingParams(temperature=10.0, top_k=5), [11, 5, 8]),
         ]
-        rev_sampling_map: List[SamplingParams] = [None] * batch_size
-        for sampling_params, indices in active_sampling_map:
+        # For non-torch backends, test simultaneous top_k and top_p sampling.
+        if backend != "torch":
+            sampling_test_cases[3][0].top_p = 0.8
+
+        # Convert sampling params to non-readable format.
+        rev_sampling_dict: List[SamplingParams] = [None] * batch_size
+        for sampling_params, indices in sampling_test_cases:
             for idx in indices:
-                rev_sampling_map[idx] = sampling_params
+                rev_sampling_dict[idx] = sampling_params
 
-        last_token_logits = torch.arange(0, self.vocab_size).repeat(batch_size, 1).float().cuda()
-        sampled_logits, _ = self.text_generation_controller.sample_from_dynamic_logits(
-            last_token_logits, active_sampling_map, vocab_size=self.vocab_size
+        # Prepare metadata for sample bookkeeping.
+        request_metadata_labels = DynamicInferenceRequest.get_metadata_labels()
+        request_metadata = torch.empty(
+            (batch_size, len(request_metadata_labels)), dtype=torch.float32
+        ).cuda()
+        top_k_values = torch.Tensor([s.top_k for s in rev_sampling_dict]).cuda()
+        request_metadata[:, request_metadata_labels["top_k"]] = top_k_values
+        top_p_values = torch.Tensor([s.top_p for s in rev_sampling_dict]).cuda()
+        request_metadata[:, request_metadata_labels["top_p"]] = top_p_values
+        temp_values = torch.Tensor([s.temperature for s in rev_sampling_dict]).cuda()
+        request_metadata[:, request_metadata_labels["temperature"]] = temp_values
+
+        # Bookkeeping.
+        self.text_generation_controller._dynamic_step_sample_bookkeeping(
+            request_metadata=request_metadata
+        )
+
+        # Sampling.
+        logits = torch.arange(0, self.vocab_size).repeat(batch_size, 1).unsqueeze(0).float().cuda()
+        sampled_logits = self.text_generation_controller._dynamic_step_sample_logits(
+            logits, backend=backend
         )
-        top_k_values = torch.Tensor([s.top_k for s in rev_sampling_map]).cuda().unsqueeze(1)
-        top_k_values[top_k_values == 0] = self.vocab_size
-        top_p_values = torch.Tensor([s.top_p for s in rev_sampling_map]).cuda().unsqueeze(1)
-        temp_values = torch.Tensor([s.temperature for s in rev_sampling_map]).cuda().unsqueeze(1)
         vocab_indices = torch.arange(self.vocab_size).cuda()
 
+        top_k_values[top_k_values == 0] = self.vocab_size
         assert torch.all(
             sampled_logits >= self.vocab_size - top_k_values
         ), f"The sampled logits should all be greater than {self.vocab_size - top_k_values} but its {sampled_logits}"
-        l = last_token_logits[0]
-        sampled_l = l.div(temp_values).softmax(dim=-1)
-        top_k_mask = vocab_indices.unsqueeze(0) < (self.vocab_size - top_k_values)
+        l = logits.squeeze(0)
+        sampled_l = l.div(temp_values.unsqueeze(1)).softmax(dim=-1)
+        top_k_mask = vocab_indices.unsqueeze(0) < (self.vocab_size - top_k_values.unsqueeze(1))
         sampled_l.masked_fill_(top_k_mask, 0.0)
-        expected_min_values = sampled_l[sampled_l.cumsum(dim=-1) > top_p_values].amax(dim=-1)
+        top_p_mask = sampled_l.cumsum(dim=-1) > top_p_values.unsqueeze(1)
+
+        first_excluded = torch.where(
+            top_p_mask.any(dim=-1),
+            top_p_mask.float().argmax(dim=-1),
+            torch.full((batch_size,), self.vocab_size, device=top_p_mask.device),
+        )
+        last_included = torch.clamp(first_excluded - 1, min=0)
+        start_idx = torch.clamp(self.vocab_size - top_k_values, min=0).long()
+        last_included = torch.max(last_included, start_idx)
+        expected_min_values = l.gather(1, last_included.unsqueeze(1)).squeeze(1)
         assert torch.all(
             sampled_logits >= expected_min_values
         ), f"The sampled logits should all be greater than {expected_min_values} but its {sampled_logits}"
@@ -773,14 +810,15 @@ def test_sampled_tokens_match_with_parallelism(self, static, tp_size, pp_size):
                         ),
                     )
                 )
-            sampling_params = SamplingParams(top_k=10, return_log_probs=True, termination_id=-1)
-            sampling_map = [(sampling_params, list(range(len(active_requests))))]
+            expected_active_requests = set(int(x) for x in active_requests.keys())
             while context.has_unfinished_requests():
-                result = self.text_generation_controller.generate_output_tokens_dynamic_batch(
-                    active_sampling_map=sampling_map
-                )
+                result = self.text_generation_controller.generate_output_tokens_dynamic_batch()
                 new_tokens = result["sample"]
-                assert len(new_tokens) == len(active_requests)
+                active_ids = result["active_request_ids"].tolist()
+                finished_ids = result["finished_request_ids"].tolist()
+                assert len(new_tokens) == len(expected_active_requests)
+                assert set(active_ids) == expected_active_requests
+                expected_active_requests -= set(finished_ids)
                 for i, token in enumerate(new_tokens.tolist()):
                     all_generated_tokens[i].append(token)
 
diff --git a/tests/unit_tests/test_checkpointing.py b/tests/unit_tests/test_checkpointing.py
index 194f9721300..4bbf54301f5 100644
--- a/tests/unit_tests/test_checkpointing.py
+++ b/tests/unit_tests/test_checkpointing.py
@@ -9,6 +9,8 @@
 import torch
 import torch.distributed.checkpoint
 
+from megatron.core.distributed import DistributedDataParallelConfig
+from megatron.core.distributed.fsdp.mcore_fsdp_adapter import FullyShardedDataParallel
 from megatron.core.num_microbatches_calculator import (
     init_num_microbatches_calculator,
     unset_num_microbatches_calculator,
@@ -23,6 +25,7 @@
     _load_base_checkpoint,
     get_checkpoint_tracker_filename,
     load_checkpoint,
+    read_metadata,
     save_checkpoint,
 )
 from megatron.training.global_vars import set_args
@@ -51,6 +54,9 @@ def __init__(self, state_dict):
         self.is_stub_optimizer = False
         self._called_metadata = []
 
+        # Optimizers are expected to have this attribute for checkpointing.
+        self.param_groups = []
+
     def state_dict(self, is_loading=False):
         return self._state_dict
 
@@ -111,6 +117,8 @@ def create_args():
     args.retro_add_retriever = False
     args.ckpt_convert_update_legacy_dist_opt_format = False
     args.ckpt_step = None
+    args.swiglu = True
+    args.num_experts = 1
 
     yield args
 
@@ -191,7 +199,7 @@ def test_load_base_checkpoint(
     assert ckpt_type == expected_ckpt_type
 
 
-@pytest.mark.parametrize("ckpt_format", ["torch", "torch_dcp"])
+@pytest.mark.parametrize("ckpt_format", ["torch", "torch_dcp", "fsdp_dtensor"])
 def test_save_checkpoint(init_model_parallel, create_args, tmp_path_dist_ckpt, ckpt_format):
     """Test save_checkpoint."""
     args = create_args
@@ -207,6 +215,15 @@ def test_save_checkpoint(init_model_parallel, create_args, tmp_path_dist_ckpt, c
     config = TransformerConfig(num_layers=1, kv_channels=1)
     model = MockModel(config)
     optimizer = MockState({"optimizer": "optimizer_state"})
+    if ckpt_format == "fsdp_dtensor":
+        model = FullyShardedDataParallel(
+            config=config,
+            ddp_config=DistributedDataParallelConfig(
+                use_distributed_optimizer=True, use_megatron_fsdp=True
+            ),
+            module=model,
+        )
+        optimizer = MockState({"state": {}})
     opt_param_scheduler = MockState({"opt_param_scheduler": "scheduler_state"})
     num_floating_point_operations_so_far = 456
 
@@ -226,7 +243,7 @@ def test_save_checkpoint(init_model_parallel, create_args, tmp_path_dist_ckpt, c
         expected_ckpt_path = None
         if ckpt_format == "torch":
             expected_ckpt_path = ckpt_dir / "mp_rank_00" / "model_optim_rng.pt"
-        elif ckpt_format == "torch_dcp":
+        elif ckpt_format in ["torch_dcp", "fsdp_dtensor"]:
             expected_ckpt_path = ckpt_dir / ".metadata"
 
         assert os.path.exists(expected_ckpt_path)
@@ -337,3 +354,27 @@ def test_dist_checkpoint_versioning(init_model_parallel, tmp_path_dist_ckpt, cre
             first_job_mock_metadata,
             second_job_mock_metadata,
         ]
+
+
+@pytest.mark.parametrize(
+    "metadata_content,expected_iter,expected_release",
+    [
+        ("456", 456, False),  # Normal iteration
+        ("release", 0, True),  # Release checkpoint should return iteration=1
+        ("123", 123, False),  # Another normal iteration
+    ],
+)
+def test_read_metadata_non_distributed(tmp_path, metadata_content, expected_iter, expected_release):
+    """Test read_metadata without torch.distributed initialized."""
+    test_dir = tmp_path / "test_read_metadata_non_distributed"
+    test_dir.mkdir(parents=True, exist_ok=True)
+    tracker_file = test_dir / "latest_checkpointed_iteration.txt"
+
+    with open(tracker_file, "w") as f:
+        f.write(metadata_content)
+
+    with mock.patch('torch.distributed.is_initialized', return_value=False):
+        max_iter, release = read_metadata(str(tracker_file))
+
+    assert max_iter == expected_iter, f"Expected iteration {expected_iter}, got {max_iter}"
+    assert release == expected_release, f"Expected release={expected_release}, got {release}"
diff --git a/tests/unit_tests/test_process_groups_config.py b/tests/unit_tests/test_process_groups_config.py
index 032de47e951..013bc6746d4 100644
--- a/tests/unit_tests/test_process_groups_config.py
+++ b/tests/unit_tests/test_process_groups_config.py
@@ -67,6 +67,29 @@ def test_hierarchical_context_parallel_groups(self, mocker):
         assert model_pgs.hcp[0] == mock_pg1
         assert model_pgs.hcp[1] == mock_pg2
 
+    def test_repr(self, mocker):
+        """Test __repr__ shows active process groups and their sizes."""
+        tp_size = 4
+        pp_size = 2
+        mock_tp = mocker.Mock(spec=dist.ProcessGroup)
+        mock_tp.size.return_value = tp_size
+        mock_pp = mocker.Mock(spec=dist.ProcessGroup)
+        mock_pp.size.return_value = pp_size
+
+        # Test empty collection
+        empty_pgs = ProcessGroupCollection()
+        assert repr(empty_pgs) == "ProcessGroupCollection(empty)"
+
+        # Test collection with process groups
+        model_pgs = ProcessGroupCollection()
+        model_pgs.tp = mock_tp
+        model_pgs.pp = mock_pp
+
+        repr_str = repr(model_pgs)
+        assert "ProcessGroupCollection(" in repr_str
+        assert f"tp({tp_size})" in repr_str
+        assert f"pp({pp_size})" in repr_str
+
 
 class TestPGConfigDefaultInitialization:
 
diff --git a/tests/unit_tests/test_rl_utils.py b/tests/unit_tests/test_rl_utils.py
new file mode 100644
index 00000000000..5ea89ff2a02
--- /dev/null
+++ b/tests/unit_tests/test_rl_utils.py
@@ -0,0 +1,656 @@
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+from unittest.mock import patch
+
+import torch
+
+from megatron.core.distributed import DistributedDataParallel, DistributedDataParallelConfig
+from megatron.core.models.common.language_module.language_module import LanguageModule
+from megatron.core.models.gpt.gpt_layer_specs import get_gpt_layer_with_transformer_engine_spec
+from megatron.core.models.gpt.gpt_model import GPTModel
+from megatron.core.optimizer import OptimizerConfig, get_megatron_optimizer
+from megatron.core.tensor_parallel.random import model_parallel_cuda_manual_seed
+from megatron.core.transformer import TransformerConfig
+from megatron.core.transformer.module import Float16Module
+from megatron.rl import rl_utils
+from megatron.rl.agent.api import TokenRollout
+from megatron.training import arguments, global_vars
+from tests.unit_tests.test_utilities import Utils
+
+BATCH = 2
+SEQ = 4
+VOCAB = 754
+
+
+class MockModel(LanguageModule):
+    def __init__(self, batch=BATCH, seq=SEQ, vocab=VOCAB):
+        self.batch = batch
+        self.seq = seq
+        self.vocab = vocab
+        self.config = TransformerConfig(num_attention_heads=1, num_layers=1)
+
+    def __call__(self, x, position_ids, attention_mask, **kwargs):
+        del position_ids
+        del attention_mask
+        batch, seq = x.shape
+        mock_model_outputs = torch.ones((batch, seq, self.vocab), device=x.device)
+        return mock_model_outputs
+
+    def load_state_dict(self, params):
+        del params
+
+    def train(self, mode=True):
+        del mode
+
+    def state_dict(self):
+        return {}
+
+
+class MockTokenizer:
+    def __init__(self):
+        self.pad = 42
+        self.eod = 43
+        self.vocab_size = VOCAB
+        self.bos = None
+
+    def detokenize(self, tokens):
+        return [str(tok) for tok in tokens]
+
+
+def test_get_logprobs():
+    """Test that getting logprobs at least does not crash."""
+    # We use args inside of get_logprobs, we need to initialize them.
+    args = arguments.parse_args(ignore_unknown_args=True)
+    global_vars.set_args(args)
+
+    tokens = torch.ones((BATCH, SEQ), dtype=torch.long)
+    logprobs = rl_utils.get_logprobs(MockModel(), tokens, position_ids=None, attention_mask=None)
+    # We chop off 1 element from the sequence dimension.
+    assert logprobs.shape == (BATCH, SEQ - 1)
+    # As we return ones as logits, all logprobs should be the same.
+    assert torch.all(logprobs == logprobs[0, 0]).item()
+
+
+def test_get_logprobs_with_sequence_packing():
+    """Test that getting logprobs at least does not crash."""
+    # We use args inside of get_logprobs, we need to initialize them.
+    args = arguments.parse_args(ignore_unknown_args=True)
+    setattr(args, 'rl_use_sequence_packing', True)
+    global_vars.set_args(args)
+
+    tokens = torch.ones((BATCH, SEQ), dtype=torch.long)
+    logprobs = rl_utils.get_logprobs(MockModel(), tokens, position_ids=None, attention_mask=None)
+    # We chop off 1 element from the sequence dimension.
+    assert logprobs.shape == (BATCH, SEQ - 1)
+    # As we return ones as logits, all logprobs should be the same.
+    assert torch.all(logprobs == logprobs[0, 0]).item()
+
+
+def test_prepare_trajectories():
+    # Make sure sequence packing is disabled for this test
+    import megatron.training.global_vars as global_vars
+
+    old_args = global_vars.get_args() if global_vars.get_args() is not None else None
+
+    # Create minimal args without sequence packing
+    args = type('Args', (), {})()
+    args.rl_use_sequence_packing = False
+    args.rl_inference_logprobs_is_correction = True
+    global_vars.set_args(args)
+
+    tokenizer = MockTokenizer()
+    r1 = TokenRollout(
+        trajectory=[1, 2, tokenizer.eod],
+        reward=3.14,
+        generation_mask=[False, True, True],
+        logprobs=[0.1, 0.2, 0.3],
+        env_id='MEGAENV',
+        problem_id="2",
+    )
+    r2 = TokenRollout(
+        trajectory=[1, 2, tokenizer.eod],
+        reward=0.14,
+        generation_mask=[False, True, True],
+        logprobs=[0.1, 0.2, 0.3],
+        env_id='MEGAENV',
+        problem_id="2",
+    )
+    rollouts = [[r1, r2]]
+    seq_len = 7
+
+    trajs, genmask, inference_logprobs = rl_utils.prepare_trajectories(rollouts, tokenizer, seq_len)
+
+    # Check that inference logprobs are being returned.
+    torch.testing.assert_close(inference_logprobs[0], torch.tensor([0.1, 0.2, 0.3]))
+    torch.testing.assert_close(inference_logprobs[1], torch.tensor([0.1, 0.2, 0.3]))
+
+    expected_mask = torch.tensor(
+        [
+            [False, True, True, False, False, False, False],
+            [False, True, True, False, False, False, False],
+        ]
+    )
+    torch.testing.assert_close(genmask, expected_mask)
+
+    expected_trajs = torch.tensor([[1, 2, 43, 42, 42, 42, 42], [1, 2, 43, 42, 42, 42, 42]])
+    torch.testing.assert_close(trajs, expected_trajs)
+
+
+def test_prepare_trajectories_with_packing():
+    """Test that rollouts data is properly prepared with sequence packing enabled."""
+    # Initialize args for sequence packing
+    args = arguments.parse_args(ignore_unknown_args=True)
+    setattr(args, 'micro_batch_size', 1)
+    setattr(args, 'global_batch_size', 1)
+    setattr(args, 'rl_use_sequence_packing', True)
+    global_vars.set_args(args)
+
+    tokenizer = MockTokenizer()
+    r1 = TokenRollout(
+        trajectory=[1, 2, tokenizer.eod],
+        reward=3.14,
+        generation_mask=[False, True, True],
+        logprobs=[0.1, 0.2, 0.3],
+        env_id='MEGAENV',
+        problem_id="2",
+    )
+    r2 = TokenRollout(
+        trajectory=[1, 2, 3, tokenizer.eod],
+        reward=0.14,
+        generation_mask=[False, True, True, True],
+        logprobs=[0.1, 0.2, 0.3, -1.2],
+        env_id='MEGAENV',
+        problem_id="2",
+    )
+    rollouts = [[r1, r2]]
+    seq_len = 7
+
+    trajs, genmask, inference_logprobs = rl_utils.prepare_trajectories(rollouts, tokenizer, seq_len)
+
+    # With sequence packing, inference logprobs should be padded to same length
+    assert isinstance(inference_logprobs, torch.Tensor)
+    assert inference_logprobs.shape == (2, 7)  # 2 sequences, each padded to seq_len
+
+    # Check values (padded with zeros)
+    torch.testing.assert_close(
+        inference_logprobs[0], torch.tensor([0.1, 0.2, 0.3, 0.0, 0.0, 0.0, 0.0])
+    )
+    torch.testing.assert_close(
+        inference_logprobs[1], torch.tensor([0.1, 0.2, 0.3, -1.2, 0.0, 0.0, 0.0])
+    )
+
+    expected_mask = torch.tensor(
+        [
+            [False, True, True, False, False, False, False],
+            [False, True, True, True, False, False, False],
+        ]
+    )
+    torch.testing.assert_close(genmask, expected_mask)
+
+    expected_trajs = torch.tensor([[1, 2, 43, 42, 42, 42, 42], [1, 2, 3, 43, 42, 42, 42]])
+    torch.testing.assert_close(trajs, expected_trajs)
+
+
+def test_grpo_loss_calculation_all_pi_eq():
+    # All policies are equal: clamping is inactive, ratios are ones.
+    current_logprobs = torch.ones(BATCH, SEQ)
+    old_logprobs = torch.ones(BATCH, SEQ)
+    ref_logprobs = torch.ones(BATCH, SEQ)
+    advantages = torch.zeros(BATCH)
+    loss, kl_term, ratios, entropy_term, _, _ = rl_utils.calculate_grpo_loss(
+        current_logprobs=current_logprobs,
+        old_logprobs=old_logprobs,
+        ref_logprobs=ref_logprobs,
+        advantages=advantages,
+        clamp_eps_lower=0.1,
+        clamp_eps_upper=0.1,
+        kl_beta=0.1,
+        entropy_weight=0.0,
+    )
+    torch.testing.assert_close(loss, torch.zeros_like(loss))
+    torch.testing.assert_close(kl_term, torch.zeros_like(kl_term))
+    torch.testing.assert_close(ratios, torch.ones_like(ratios))
+    torch.testing.assert_close(entropy_term, torch.ones_like(ratios) * torch.e)
+
+
+def test_grpo_loss_calculation_2x_ratios():
+    # All policies are equal: clamping is inactive, ratios are ones.
+    current_logprobs = torch.ones(BATCH, SEQ)
+    old_logprobs = torch.ones(BATCH, SEQ) - torch.log(torch.Tensor([2]))
+    ref_logprobs = torch.ones(BATCH, SEQ)
+    advantages = torch.ones(BATCH)
+    loss, kl_term, ratios, _, _, _ = rl_utils.calculate_grpo_loss(
+        current_logprobs=current_logprobs,
+        old_logprobs=old_logprobs,
+        ref_logprobs=ref_logprobs,
+        advantages=advantages,
+        clamp_eps_lower=2.1,
+        clamp_eps_upper=2.1,
+        kl_beta=0.0,
+        entropy_weight=0.0,
+    )
+    # Clamping does not affect us, as 2.1 [eps] > 2 [ratio].
+    # kl_beta = 0 -> we only have the non-kl term of the loss active.
+    torch.testing.assert_close(loss, -torch.ones_like(loss) * 2)
+    # pi and pi_{ref} are the same here.
+    torch.testing.assert_close(kl_term, torch.zeros_like(kl_term))
+    # Current probs are 2x more probable than old pi.
+    torch.testing.assert_close(ratios, torch.ones_like(ratios) * 2)
+
+
+def test_entropy_calculation():
+    # All policies are equal: clamping is inactive, ratios are ones.
+    current_logprobs = torch.ones(BATCH, SEQ)
+    old_logprobs = torch.ones(BATCH, SEQ)
+    ref_logprobs = torch.ones(BATCH, SEQ)
+    advantages = torch.zeros(BATCH)
+    loss, _, ratios, entropy_term, _, _ = rl_utils.calculate_grpo_loss(
+        current_logprobs=current_logprobs,
+        old_logprobs=old_logprobs,
+        ref_logprobs=ref_logprobs,
+        advantages=advantages,
+        clamp_eps_lower=0.1,
+        clamp_eps_upper=0.1,
+        kl_beta=0.0,
+        entropy_weight=1.0,
+    )
+    torch.testing.assert_close(loss, torch.ones_like(ratios) * torch.e)
+    torch.testing.assert_close(entropy_term, torch.ones_like(ratios) * torch.e)
+
+
+def test_grpo_loss_truncation():
+
+    # All ratios are 2
+    _, _, _, _, truncated_from_above, truncated_from_below = rl_utils.calculate_grpo_loss(
+        current_logprobs=torch.ones(BATCH, SEQ),
+        old_logprobs=0.5 * torch.ones(BATCH, SEQ),
+        ref_logprobs=torch.ones(BATCH, SEQ),
+        advantages=torch.zeros(BATCH),
+        clamp_eps_lower=0.1,
+        clamp_eps_upper=0.1,
+        kl_beta=0.1,
+        entropy_weight=0.0,
+    )
+    assert truncated_from_above.float().mean() == 1
+    assert truncated_from_below.float().sum() == 0
+
+    # All ratios are 0.01
+    _, _, _, _, truncated_from_above, truncated_from_below = rl_utils.calculate_grpo_loss(
+        current_logprobs=0.01 * torch.ones(BATCH, SEQ),
+        old_logprobs=torch.ones(BATCH, SEQ),
+        ref_logprobs=torch.ones(BATCH, SEQ),
+        advantages=torch.zeros(BATCH),
+        clamp_eps_lower=0.1,
+        clamp_eps_upper=0.1,
+        kl_beta=0.1,
+        entropy_weight=0.0,
+    )
+    assert truncated_from_above.float().sum() == 0
+    assert truncated_from_below.float().mean() == 1
+
+    current_logprobs = torch.tensor([[1.0, 1.0], [1.0, 1.0]])
+    old_logprobs = torch.tensor([[0.5, 2.0], [0.05, 1.0]])
+    _, _, _, _, truncated_from_above, truncated_from_below = rl_utils.calculate_grpo_loss(
+        current_logprobs=current_logprobs,
+        old_logprobs=old_logprobs,
+        ref_logprobs=old_logprobs,
+        advantages=torch.zeros(BATCH),
+        clamp_eps_lower=0.1,
+        clamp_eps_upper=0.1,
+        kl_beta=0.1,
+        entropy_weight=0.0,
+    )
+    # ratios: [[2., 0.5],[20., 1.]]
+    torch.testing.assert_close(truncated_from_above, torch.tensor([[True, False], [True, False]]))
+    torch.testing.assert_close(truncated_from_below, torch.tensor([[False, True], [False, False]]))
+
+
+@patch('megatron.rl.rl_utils.mpu')
+def test_prepare_data_for_update(mock_mpu):
+    """Test that getting logprobs at least does not crash."""
+    mock_mpu.get_expert_data_parallel_world_size.return_value = 0
+    # We use args inside of get_logprobs, we need to initialize them.
+
+    args = arguments.parse_args(ignore_unknown_args=True)
+    setattr(args, 'data_parallel_size', 1)
+    setattr(args, 'micro_batch_size', 2)
+    setattr(args, 'global_batch_size', 2)
+    setattr(args, 'seq_length', 4)
+    setattr(args, 'curr_iteration', 1)
+    global_vars.unset_global_variables()
+    global_vars.set_global_variables(args, build_tokenizer=False)
+
+    model = MockModel()
+    tokenizer = MockTokenizer()
+
+    r1 = TokenRollout(
+        trajectory=[1, 2, 3],
+        reward=3.14,
+        generation_mask=[False, True, True],
+        logprobs=[0.1, 0.2, 0.3],
+        env_id='MEGAENV',
+        problem_id="2",
+    )
+    r2 = TokenRollout(
+        trajectory=[1, 2, 3, 4],
+        reward=0.14,
+        generation_mask=[False, True, True, True],
+        logprobs=[0.1, 0.2, 0.3, -1.2],
+        env_id='MEGAENV',
+        problem_id="2",
+    )
+    rollouts = [[r1, r2]]
+    try:
+        data_iter = rl_utils.prepare_data_for_update([model], {}, rollouts, tokenizer)
+    except AssertionError as e:
+        # We expect trajectories to come padded there.
+        assert str(e).startswith('Rollout is not the correct length')
+
+    r1 = TokenRollout(
+        trajectory=torch.Tensor([1, 2, 3, tokenizer.eod]).cuda(),
+        reward=3.14,
+        generation_mask=torch.Tensor([False, True, True, True]).cuda(),
+        logprobs=torch.Tensor([-0.2, -0.3, -3.2]).cuda(),
+        env_id='MEGAENV',
+        problem_id="2",
+    )
+    r2 = TokenRollout(
+        trajectory=torch.Tensor([1, 2, 234, tokenizer.eod]).cuda(),
+        reward=0.14,
+        generation_mask=torch.Tensor([False, True, True, True]).cuda(),
+        logprobs=torch.Tensor([-0.2, -0.3, -1.2]),
+        env_id='MEGAENV',
+        problem_id="2",
+    )
+    rollouts = [[r1, r2]]
+    data_iter = rl_utils.prepare_data_for_update([model], {}, rollouts, tokenizer)
+
+    _, _, old_logprobs, _, _, _, _ = next(data_iter)
+    # All logits are ones in the MockModel.
+    # All probabilities should be uniform.
+    torch.testing.assert_close(old_logprobs.exp(), torch.ones_like(old_logprobs) / VOCAB)
+
+
+def test_sequence_packing_basic():
+    """Test basic sequence packing functionality."""
+    # Initialize args as required by SequencePacker
+    args = arguments.parse_args(ignore_unknown_args=True)
+    setattr(args, 'seq_length', 16)
+    global_vars.set_args(args)
+
+    tokenizer = MockTokenizer()
+    bin_size = 16
+    packer = rl_utils.SequencePacker(bin_size=bin_size, pad_token=tokenizer.pad)
+
+    # Create test sequences of varying lengths, all padded to same length
+    max_len = 5
+    sequences = [
+        torch.cat(
+            [
+                torch.tensor([1, 2, 3, tokenizer.eod]),
+                torch.full((1,), tokenizer.pad, dtype=torch.long),
+            ]
+        ),  # length 4 -> 5
+        torch.cat(
+            [torch.tensor([4, 5, tokenizer.eod]), torch.full((2,), tokenizer.pad, dtype=torch.long)]
+        ),  # length 3 -> 5
+        torch.tensor([6, 7, 8, 9, tokenizer.eod]),  # length 5
+        torch.cat(
+            [torch.tensor([10, tokenizer.eod]), torch.full((3,), tokenizer.pad, dtype=torch.long)]
+        ),  # length 2 -> 5
+    ]
+
+    generation_masks = torch.tensor(
+        [
+            [False, True, True, True, False],  # Matches padded length
+            [False, True, True, False, False],
+            [False, True, True, True, True],
+            [False, True, False, False, False],
+        ]
+    )
+
+    rewards = torch.tensor([1.0, 2.0, 3.0, 4.0])
+
+    # Pack sequences
+    packed_trajs, packed_position_ids, packed_attention_mask, packed_loss_mask, packing_info = (
+        packer.pack_sequences(sequences, generation_masks)
+    )
+
+    # Verify packed data structure
+    assert packed_trajs is not None
+    assert packed_position_ids is not None
+    assert packed_attention_mask is not None
+    assert packed_loss_mask is not None
+    assert packing_info is not None
+
+    # Check that sequences fit in bins properly
+    # The packer trims sequences to their actual length (removing padding)
+    # Actual lengths: 4, 3, 5, 2 = 14 total tokens
+    # With bin_size=16, this should fit in 1 bin
+    assert packed_trajs.shape[0] >= 1  # At least one bin
+    assert packed_trajs.shape[1] == bin_size
+
+    # Verify position_ids are correct
+    for bin_idx in range(packed_trajs.shape[0]):
+        # Check that position_ids reset for each sequence in the bin
+        for i in range(packed_trajs.shape[1]):
+            if i == 0 or packed_trajs[bin_idx, i - 1] == tokenizer.eod:
+                # Start of a new sequence
+                if packed_trajs[bin_idx, i] != tokenizer.pad:
+                    assert packed_position_ids[bin_idx, i] == 0
+
+
+def test_sequence_packing_with_generation_masks():
+    """Test sequence packing with generation masks."""
+    # Initialize args as required by SequencePacker
+    args = arguments.parse_args(ignore_unknown_args=True)
+    setattr(args, 'seq_length', 20)
+    global_vars.set_args(args)
+
+    tokenizer = MockTokenizer()
+    bin_size = 20
+    packer = rl_utils.SequencePacker(bin_size=bin_size, pad_token=tokenizer.pad)
+
+    # Create test data with generation masks
+    sequences = [torch.tensor([1, 2, 3, tokenizer.eod]), torch.tensor([4, 5, 6, 7, tokenizer.eod])]
+
+    # Pad sequences to same length for stacking
+    max_len = max(len(s) for s in sequences)
+    padded_sequences = []
+    for seq in sequences:
+        padded = torch.cat([seq, torch.full((max_len - len(seq),), tokenizer.pad, dtype=seq.dtype)])
+        padded_sequences.append(padded)
+
+    generation_masks = torch.tensor(
+        [
+            [False, True, True, True, False],  # Padded to match max_len
+            [False, True, True, True, True],
+        ]
+    )
+
+    # Pack sequences
+    packed_trajs, packed_position_ids, packed_attention_mask, packed_loss_mask, packing_info = (
+        packer.pack_sequences(padded_sequences, generation_masks)
+    )
+
+    # Verify packed tensors
+    assert packed_trajs.shape[0] == 1  # One bin
+    assert packed_trajs.shape[1] == bin_size
+
+    # Check that loss mask is set correctly for generation tokens
+    # The loss mask should be 1 for generation tokens and 0 for padding/prompt
+
+
+def test_sequence_packing_empty_bins():
+    """Test that empty bins are created correctly."""
+    # Initialize args if needed
+    args = arguments.parse_args(ignore_unknown_args=True)
+    setattr(args, 'seq_length', 8)
+    global_vars.set_args(args)
+
+    tokenizer = MockTokenizer()
+    bin_size = 8
+    num_empty_bins = 3
+
+    # Create a simple packed data structure
+    packed_trajs = torch.tensor(
+        [[1, 2, 3, tokenizer.eod, tokenizer.pad, tokenizer.pad, tokenizer.pad, tokenizer.pad]]
+    )
+    packed_position_ids = torch.tensor([[0, 1, 2, 3, 0, 0, 0, 0]])
+    packed_loss_mask = torch.tensor([[1, 1, 1, 1, 0, 0, 0, 0]], dtype=torch.float)
+    packed_attention_mask = torch.ones(1, bin_size, bin_size)  # Simple full attention mask
+
+    # Create empty bins
+    empty_trajs, empty_position_ids, empty_loss_mask, empty_attention_mask, empty_packing_info = (
+        rl_utils.create_empty_bins(
+            num_empty_bins=num_empty_bins,
+            bin_size=bin_size,
+            packed_trajs=packed_trajs,
+            packed_position_ids=packed_position_ids,
+            packed_loss_mask=packed_loss_mask,
+            packed_attention_mask=packed_attention_mask,
+            tokenizer=tokenizer,
+        )
+    )
+
+    # Verify shapes
+    assert empty_trajs.shape[0] == num_empty_bins
+    assert empty_trajs.shape[1] == bin_size
+
+    # Check that empty bins are filled with padding
+    for i in range(num_empty_bins):
+        assert torch.all(empty_trajs[i] == tokenizer.pad)
+        assert torch.all(empty_position_ids[i] == 0)
+        assert torch.all(empty_loss_mask[i] == 0)
+
+    # Verify packing info for empty bins
+    assert len(empty_packing_info) == num_empty_bins
+    for info in empty_packing_info:
+        assert len(info['bin_seq_indices']) == 0  # No sequences in empty bins
+        assert len(info['seq_starts']) == 0  # No sequence starts
+
+
+def test_prepare_trajectories_with_sequence_packing():
+    """Test prepare_trajectories with sequence packing enabled."""
+    # Set up args with sequence packing
+    args = arguments.parse_args(ignore_unknown_args=True)
+    setattr(args, 'rl_use_sequence_packing', True)
+    setattr(args, 'rl_sequence_packing_bin_size', 16)
+    setattr(args, 'data_parallel_size', 1)
+    setattr(args, 'micro_batch_size', 2)
+    setattr(args, 'global_batch_size', 2)
+    setattr(args, 'seq_length', 16)
+    setattr(args, 'curr_iteration', 1)
+    global_vars.unset_global_variables()
+    global_vars.set_global_variables(args, build_tokenizer=False)
+
+    tokenizer = MockTokenizer()
+
+    # Create rollouts of varying lengths
+    r1 = TokenRollout(
+        trajectory=[1, 2, tokenizer.eod],
+        reward=3.14,
+        generation_mask=[False, True, True],
+        logprobs=[0.1, 0.2, 0.3],
+        env_id='MEGAENV',
+        problem_id="1",
+    )
+    r2 = TokenRollout(
+        trajectory=[4, 5, 6, 7, tokenizer.eod],
+        reward=0.14,
+        generation_mask=[False, True, True, True, True],
+        logprobs=[0.4, 0.5, 0.6, 0.7, 0.8],
+        env_id='MEGAENV',
+        problem_id="2",
+    )
+    r3 = TokenRollout(
+        trajectory=[8, 9, tokenizer.eod],
+        reward=2.71,
+        generation_mask=[False, True, True],
+        logprobs=[0.9, 1.0, 1.1],
+        env_id='MEGAENV',
+        problem_id="3",
+    )
+
+    rollouts = [[r1, r2, r3]]
+    seq_len = 16
+
+    # Call prepare_trajectories with sequence packing
+    trajs, genmask, inference_logprobs = rl_utils.prepare_trajectories(rollouts, tokenizer, seq_len)
+
+    # With sequence packing enabled but called from prepare_trajectories,
+    # it might still return individual sequences (not packed into bins yet)
+    # because the actual packing happens later in prepare_data_for_update
+    assert trajs.shape[0] == 3  # Three sequences
+    assert trajs.shape[1] == seq_len
+
+    # Verify that each sequence is properly padded
+    # Sequence 1: [1, 2, eod, pad] + padding
+    assert trajs[0, 0] == 1
+    assert trajs[0, 1] == 2
+    assert trajs[0, 2] == tokenizer.eod
+    assert trajs[0, 3] == tokenizer.pad
+
+    # Sequence 2: [4, 5, 6, 7, eod, pad] + padding
+    assert trajs[1, 0] == 4
+    assert trajs[1, 1] == 5
+    assert trajs[1, 4] == tokenizer.eod
+    assert trajs[1, 5] == tokenizer.pad
+
+
+def test_sequence_packing_integration():
+    """Simple integration test for sequence packing - just verifies the packing works."""
+    # Initialize minimal args needed for SequencePacker
+    args = arguments.parse_args(ignore_unknown_args=True)
+    setattr(args, 'seq_length', 16)
+    global_vars.set_args(args)
+
+    tokenizer = MockTokenizer()
+    bin_size = 16
+
+    # Test that we can pack sequences and get expected outputs
+    packer = rl_utils.SequencePacker(bin_size=bin_size, pad_token=tokenizer.pad)
+
+    # Create test data - need to pad to same length for stacking
+    max_len = 5
+    sequences = [
+        torch.cat(
+            [
+                torch.tensor([1, 2, 3, tokenizer.eod]),
+                torch.full((1,), tokenizer.pad, dtype=torch.long),
+            ]
+        ),  # length 4 -> 5
+        torch.cat(
+            [torch.tensor([4, 5, tokenizer.eod]), torch.full((2,), tokenizer.pad, dtype=torch.long)]
+        ),  # length 3 -> 5
+        torch.tensor([6, 7, 8, 9, tokenizer.eod]),  # length 5
+    ]
+    generation_masks = [
+        torch.tensor([False, True, True, True, False]),
+        torch.tensor([False, True, True, False, False]),
+        torch.tensor([False, True, True, True, True]),
+    ]
+
+    # Pack sequences
+    packed_trajs, packed_position_ids, packed_attention_mask, packed_loss_mask, packing_info = (
+        packer.pack_sequences(sequences, generation_masks)
+    )
+
+    # Basic assertions
+    assert packed_trajs is not None
+    assert packed_trajs.shape[1] == bin_size  # Each bin should be bin_size
+    assert packed_position_ids.shape == packed_trajs.shape
+    assert packed_loss_mask.shape == packed_trajs.shape
+
+    # Verify the sequences are packed correctly
+    # Total length: 4 + 3 + 5 = 12, should fit in 1 bin
+    assert packed_trajs.shape[0] == 1
+
+    # The packer sorts sequences by length (descending), so order is: seq3 (len 5), seq1 (len 4), seq2 (len 3)
+    expected_start = torch.tensor(
+        [6, 7, 8, 9, tokenizer.eod, 1, 2, 3, tokenizer.eod, 4, 5, tokenizer.eod]
+    )
+    assert torch.all(packed_trajs[0, :12] == expected_start)
+
+    # Rest should be padding
+    assert torch.all(packed_trajs[0, 12:] == tokenizer.pad)
diff --git a/tests/unit_tests/transformer/moe/test_token_dispatcher.py b/tests/unit_tests/transformer/moe/test_token_dispatcher.py
index 4b4cfa567c5..6a155920e2f 100644
--- a/tests/unit_tests/transformer/moe/test_token_dispatcher.py
+++ b/tests/unit_tests/transformer/moe/test_token_dispatcher.py
@@ -417,7 +417,10 @@ def is_hybrid_ep_available():
     return HAVE_HYBRIDEP
 
 
-@pytest.mark.skipif(True, reason="Deep EP and Hybrid EP are not available")
+@pytest.mark.skipif(
+    not is_deep_ep_available() and not is_hybrid_ep_available(),
+    reason="Deep EP and Hybrid EP are not available",
+)
 class TestFlexDispatcher:
     def setup_method(self, method):
         pass
diff --git a/tools/run_inference_performance_test.py b/tools/run_inference_performance_test.py
index 01e5ab58898..dda2b8284b3 100644
--- a/tools/run_inference_performance_test.py
+++ b/tools/run_inference_performance_test.py
@@ -24,9 +24,8 @@
 from megatron.core.inference.text_generation_controllers.text_generation_controller import (
     TextGenerationController,
 )
-from megatron.core.ssm.mamba_hybrid_layer_allocation import Symbols
 from megatron.core.transformer.module import MegatronModule
-from megatron.core.utils import get_attr_wrapped_model
+from megatron.core.utils import get_mamba_inference_state_config_from_model
 from model_provider import model_provider
 
 sys.path.append(
@@ -89,14 +88,7 @@ def get_inference_engine(args: argparse.Namespace, model: MegatronModule) -> Abs
         moe_pad_experts_for_cuda_graph_inference=args.moe_pad_experts_for_cuda_graph_inference,
     )
 
-    # Layer type list for hybrid models
-    decoder = get_attr_wrapped_model(model, "decoder")
-    layer_type_list = getattr(decoder, "layer_type_list", None)
-    if layer_type_list is not None and Symbols.MAMBA in layer_type_list:
-        (mamba_conv_states_shape, mamba_ssm_states_shape) = decoder.mamba_state_shapes_per_request()
-    else:
-        mamba_conv_states_shape = None
-        mamba_ssm_states_shape = None
+    mamba_inference_state_config = get_mamba_inference_state_config_from_model(model)
 
     if args.engine_type == "static":
         inference_wrapped_model = GPTInferenceWrapper(model, inference_wrapper_config)
@@ -129,9 +121,7 @@ def get_inference_engine(args: argparse.Namespace, model: MegatronModule) -> Abs
             block_size_tokens=args.inference_dynamic_batching_block_size,
             tensor_model_parallel_size=args.tensor_model_parallel_size,
             materialize_only_last_token_logits=not args.return_log_probs,
-            layer_type_list=layer_type_list,
-            mamba_conv_states_shape=mamba_conv_states_shape,
-            mamba_ssm_states_shape=mamba_ssm_states_shape,
+            mamba_inference_state_config=mamba_inference_state_config,
             cache_mla_latent=args.multi_latent_attention and args.cache_mla_latents,
             kv_lora_rank=args.kv_lora_rank if args.multi_latent_attention else None,
             qk_pos_emb_head_dim=args.qk_pos_emb_head_dim,
diff --git a/train_rl.py b/train_rl.py
index 479498d392a..bf632d81e2c 100644
--- a/train_rl.py
+++ b/train_rl.py
@@ -191,7 +191,7 @@ def forward_step(data_iterator, model: GPTModel, loss_only: bool = False):
     seq_lengths = None
     attention_mask = None
 
-    if args.use_sequence_packing:
+    if args.rl_use_sequence_packing:
         # Get bin index from data iterator
         bin_tensor = batch_data[0]
         bin_idx = bin_tensor.item()

From b9c48ecb99af17c659d6409c50ff2c81c81216e3 Mon Sep 17 00:00:00 2001
From: Michael Wojcikiewicz <mwojcikiewic@nvidia.com>
Date: Tue, 25 Nov 2025 17:12:23 -0500
Subject: [PATCH 155/334] adding action for checking whether PR author is
 nvidia employee or not for selecting ephemeral ci hosts (#2402)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 .../check-nvidia-sso-membership/action.yml    | 139 ++++++++++++++++++
 .github/workflows/cicd-main.yml               |  66 ++++-----
 2 files changed, 166 insertions(+), 39 deletions(-)
 create mode 100644 .github/actions/check-nvidia-sso-membership/action.yml

diff --git a/.github/actions/check-nvidia-sso-membership/action.yml b/.github/actions/check-nvidia-sso-membership/action.yml
new file mode 100644
index 00000000000..71926c4547d
--- /dev/null
+++ b/.github/actions/check-nvidia-sso-membership/action.yml
@@ -0,0 +1,139 @@
+name: 'Check NVIDIA SSO Membership'
+description: 'Check if a GitHub username exists in the NVIDIA SSO users list from github-audits'
+author: 'NVIDIA'
+
+inputs:
+  username:
+    description: 'GitHub username to check'
+    required: true
+  github_audits_repo:
+    description: 'Repository containing SSO users file'
+    required: false
+    default: 'NVIDIA-GitHub-Management/github-audits'
+  github_audits_version:
+    description: 'Release version tag'
+    required: false
+    default: 'v0.1.0'
+  sso_users_filename:
+    description: 'Filename of SSO users JSON'
+    required: false
+    default: 'users_sso.json'
+  github_token:
+    description: 'GitHub token with access to github-audits repo'
+    required: true
+
+outputs:
+  is_member:
+    description: 'Boolean - true if user is in NVIDIA SSO list, false otherwise'
+    value: ${{ steps.check-membership.outputs.is_member }}
+  is_org_member:
+    description: 'Boolean - true if user has NVIDIA or NVIDIA-NeMo in org_roles'
+    value: ${{ steps.check-membership.outputs.is_org_member }}
+  user_orgs:
+    description: 'Comma-separated list of orgs user is member of'
+    value: ${{ steps.check-membership.outputs.user_orgs }}
+  sso_file_available:
+    description: 'Boolean - true if SSO file was successfully downloaded'
+    value: ${{ steps.download-sso.outputs.sso_file_available }}
+  user_count:
+    description: 'Number of users in the SSO file (0 if download failed)'
+    value: ${{ steps.download-sso.outputs.user_count }}
+
+runs:
+  using: 'composite'
+  steps:
+    - name: Download NVIDIA SSO users from github-audits
+      id: download-sso
+      shell: bash
+      env:
+        GH_TOKEN: ${{ inputs.github_token }}
+      run: |
+        echo "Downloading ${{ inputs.sso_users_filename }} from ${{ inputs.github_audits_repo }} ${{ inputs.github_audits_version }} release..."
+
+        # Download the release asset using gh CLI
+        gh release download ${{ inputs.github_audits_version }} \
+          --repo ${{ inputs.github_audits_repo }} \
+          --pattern ${{ inputs.sso_users_filename }} \
+          --clobber 2>&1 || {
+            echo "ERROR: Failed to download ${{ inputs.sso_users_filename }} from github-audits release"
+            echo "sso_file_available=false" >> $GITHUB_OUTPUT
+            echo "user_count=0" >> $GITHUB_OUTPUT
+            exit 0
+          }
+
+        # Verify file was downloaded and is valid JSON
+        if [ ! -f ${{ inputs.sso_users_filename }} ]; then
+          echo "ERROR: ${{ inputs.sso_users_filename }} file not found after download"
+          echo "sso_file_available=false" >> $GITHUB_OUTPUT
+          echo "user_count=0" >> $GITHUB_OUTPUT
+          exit 0
+        fi
+
+        # Validate JSON structure
+        if ! jq -e 'type == "object"' ${{ inputs.sso_users_filename }} > /dev/null 2>&1; then
+          echo "ERROR: ${{ inputs.sso_users_filename }} is not a valid JSON object"
+          echo "sso_file_available=false" >> $GITHUB_OUTPUT
+          echo "user_count=0" >> $GITHUB_OUTPUT
+          exit 0
+        fi
+
+        USER_COUNT=$(jq 'length' ${{ inputs.sso_users_filename }})
+        echo "Successfully downloaded ${{ inputs.sso_users_filename }} with $USER_COUNT NVIDIA SSO users"
+        echo "sso_file_available=true" >> $GITHUB_OUTPUT
+        echo "user_count=$USER_COUNT" >> $GITHUB_OUTPUT
+
+    - name: Check if user is in SSO list
+      id: check-membership
+      shell: bash
+      run: |
+        USERNAME="${{ inputs.username }}"
+        SSO_FILE="${{ inputs.sso_users_filename }}"
+
+        echo "Checking if $USERNAME is in NVIDIA SSO users list..."
+
+        # Check if SSO file is available
+        if [ "${{ steps.download-sso.outputs.sso_file_available }}" != "true" ] || [ ! -f "$SSO_FILE" ]; then
+          echo "ERROR: $SSO_FILE not available - cannot check membership"
+          echo "is_member=false" >> $GITHUB_OUTPUT
+          echo "is_org_member=false" >> $GITHUB_OUTPUT
+          echo "user_orgs=" >> $GITHUB_OUTPUT
+          exit 0
+        fi
+
+        # Check if username exists as a key in the JSON object
+        if jq -e --arg user "$USERNAME" 'has($user)' "$SSO_FILE" > /dev/null 2>&1; then
+          echo "$USERNAME found in NVIDIA SSO users"
+          echo "is_member=true" >> $GITHUB_OUTPUT
+
+          # Extract and check org membership
+          IS_ORG_MEMBER=$(jq -r --arg user "$USERNAME" '
+            .[$user].org_roles // [] |
+            map(select(test("^(NVIDIA|NVIDIA-NeMo):Member$"))) |
+            length > 0
+          ' "$SSO_FILE")
+
+          USER_ORGS=$(jq -r --arg user "$USERNAME" '
+            .[$user].org_roles // [] |
+            map(split(":")[0]) |
+            unique |
+            join(",")
+          ' "$SSO_FILE")
+
+          echo "is_org_member=$IS_ORG_MEMBER" >> $GITHUB_OUTPUT
+          echo "user_orgs=$USER_ORGS" >> $GITHUB_OUTPUT
+
+          if [ "$IS_ORG_MEMBER" == "true" ]; then
+            echo "$USERNAME is a member of NVIDIA or NVIDIA-NeMo org"
+          else
+            echo "$USERNAME has @nvidia.com email but is not in NVIDIA or NVIDIA-NeMo org (orgs: $USER_ORGS)"
+          fi
+        else
+          echo "$USERNAME NOT found in NVIDIA SSO users"
+          echo "is_member=false" >> $GITHUB_OUTPUT
+          echo "is_org_member=false" >> $GITHUB_OUTPUT
+          echo "user_orgs=" >> $GITHUB_OUTPUT
+        fi
+
+branding:
+  icon: 'shield'
+  color: 'green'
diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml
index b05b6c55b84..d76d68e463e 100644
--- a/.github/workflows/cicd-main.yml
+++ b/.github/workflows/cicd-main.yml
@@ -20,8 +20,8 @@ on:
     branches:
       - dev
       - main
-      - "pull-request/[0-9]+"
-      - "deploy-release/*"
+      - 'pull-request/[0-9]+'
+      - 'deploy-release/*'
   merge_group:
     types: [checks_requested]
   workflow_dispatch:
@@ -43,6 +43,8 @@ jobs:
     if: github.repository == 'NVIDIA/Megatron-LM'
     outputs:
       is_external_contributor: ${{ github.event.pull_request.user.type == 'User' }}
+      is_maintainer: ${{ steps.check-membership.outputs.is_maintainer }}
+      selected_runner: ${{ steps.check-membership.outputs.is_maintainer == 'true' && 'nvidia-ci-aws-gpu-x8' || 'nvidia-ci-aws-gpu-x8-ephemeral' }}
     permissions:
       issues: write
       pull-requests: write
@@ -60,7 +62,14 @@ jobs:
         if: startsWith(github.ref, 'refs/heads/pull-request/')
         uses: nv-gha-runners/get-pr-info@main
 
-      - name: Check membership
+      - name: Check NVIDIA SSO membership
+        id: check-sso
+        uses: ./.github/actions/check-nvidia-sso-membership
+        with:
+          username: ${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').user.login }}
+          github_token: ${{ secrets.NVIDIA_MANAGEMENT_ORG_PAT }}
+
+      - name: Set maintainer status
         id: check-membership
         env:
           IS_MAIN_BRANCH: ${{ github.ref == 'refs/heads/main' }}
@@ -68,38 +77,15 @@ jobs:
           IS_MERGE_GROUP: ${{ github.event_name == 'merge_group' }}
           SCHEDULED_JOB: ${{ github.event_name == 'schedule' }}
         run: |
-          PR_AUTHOR=${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').user.login }}
-
+          # Skip SSO check for scheduled jobs, main branch, dev branch, or merge groups
           if [ "${{ env.SCHEDULED_JOB }}" == "true" ] || [ "${IS_MAIN_BRANCH}" == "true" ] || [ "${IS_DEV_BRANCH}" == "true" ] || [ "${IS_MERGE_GROUP}" == "true" ]; then
             echo "is_maintainer=true" | tee -a $GITHUB_OUTPUT
             exit 0
           fi
 
-          echo "Checking if $PR_AUTHOR is a repo collaborator..."
-          API_URL="https://api.github.com/repos/$REPO/collaborators/$PR_AUTHOR"
-          REPO_MEMBERSHIP_RESPONSE=$(curl -s -o /dev/null -w "%{http_code}" -L \
-            -H "Accept: application/vnd.github+json" \
-            -H "Authorization: Bearer $GITHUB_TOKEN" \
-            -H "X-GitHub-Api-Version: 2022-11-28" \
-            $API_URL)
-
-          echo "Checking if $PR_AUTHOR is an org collaborator to NVIDIA-NeMo..."
-          API_URL="https://api.github.com/orgs/NVIDIA-NeMo/members/$PR_AUTHOR"
-          ORG_NVIDIA_NEMO_MEMBERSHIP_RESPONSE=$(curl -s -o /dev/null -w "%{http_code}" -L \
-            -H "Accept: application/vnd.github+json" \
-            -H "Authorization: Bearer $GITHUB_TOKEN" \
-            -H "X-GitHub-Api-Version: 2022-11-28" \
-            $API_URL)
-
-          echo "Checking if $PR_AUTHOR is an org collaborator to NVIDIA..."
-          API_URL="https://api.github.com/orgs/NVIDIA/members/$PR_AUTHOR"
-          ORG_NVIDIA_MEMBERSHIP_RESPONSE=$(curl -s -o /dev/null -w "%{http_code}" -L \
-            -H "Accept: application/vnd.github+json" \
-            -H "Authorization: Bearer $GITHUB_TOKEN" \
-            -H "X-GitHub-Api-Version: 2022-11-28" \
-            $API_URL)
-
-          if [ "$REPO_MEMBERSHIP_RESPONSE" -eq 204 ] || [ "$ORG_NVIDIA_NEMO_MEMBERSHIP_RESPONSE" -eq 204 ] || [ "$ORG_NVIDIA_MEMBERSHIP_RESPONSE" -eq 204 ]; then
+          # Use SSO membership check result
+          IS_MEMBER="${{ steps.check-sso.outputs.is_member }}"
+          if [ "$IS_MEMBER" == "true" ]; then
             echo "is_maintainer=true" | tee -a $GITHUB_OUTPUT
           else
             echo "is_maintainer=false" | tee -a $GITHUB_OUTPUT
@@ -112,7 +98,7 @@ jobs:
         with:
           issue-number: ${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number }}
           repository: ${{ github.repository }}
-          body-includes: "<!--external-contributor-comment-->"
+          body-includes: '<!--external-contributor-comment-->'
 
       - name: Delete comment
         uses: actions/github-script@v7
@@ -212,8 +198,8 @@ jobs:
           echo "is_merge_group: ${{ needs.pre-flight.outputs.is_merge_group }}"
 
   cicd-container-build:
-    needs: [pre-flight, cicd-wait-in-queue]
-    runs-on: nvidia-ci-aws-gpu-x8
+    needs: [is-not-external-contributor, pre-flight, cicd-wait-in-queue]
+    runs-on: ${{ needs.is-not-external-contributor.outputs.selected_runner }}
     if: |
       (
         success()
@@ -362,12 +348,13 @@ jobs:
       matrix:
         include: ${{ fromJson(needs.cicd-parse-unit-tests.outputs.unit-tests) }}
     needs:
+      - is-not-external-contributor
       - pre-flight
       - cicd-wait-in-queue
       - cicd-container-build
       - cicd-parse-unit-tests
-    runs-on: nvidia-ci-aws-gpu-x8
-    name: "${{ matrix.bucket }} - latest"
+    runs-on: ${{ needs.is-not-external-contributor.outputs.selected_runner }}
+    name: '${{ matrix.bucket }} - latest'
     if: |
       (
         success()
@@ -389,7 +376,7 @@ jobs:
           test_case: ${{ matrix.bucket }}
           tag: latest
           timeout: ${{ matrix.timeout || 30 }}
-          is_unit_test: "true"
+          is_unit_test: 'true'
           PAT: ${{ secrets.PAT }}
           container-image: ${{ env.container-registry }}/megatron-lm:${{ github.sha }}
 
@@ -474,12 +461,13 @@ jobs:
       matrix:
         include: ${{ fromJson(needs.cicd-parse-integration-tests.outputs.integration-tests) }}
     needs:
+      - is-not-external-contributor
       - pre-flight
       - cicd-wait-in-queue
       - cicd-parse-integration-tests
       - cicd-unit-tests-latest
-    runs-on: nvidia-ci-aws-gpu-x8
-    name: "${{ matrix.model }}/${{ matrix.test_case }} - latest"
+    runs-on: ${{ needs.is-not-external-contributor.outputs.selected_runner }}
+    name: '${{ matrix.model }}/${{ matrix.test_case }} - latest'
     env:
       PIP_DISABLE_PIP_VERSION_CHECK: 1
       PIP_NO_PYTHON_VERSION_WARNING: 1
@@ -502,7 +490,7 @@ jobs:
           model: ${{ matrix.model }}
           tag: latest
           timeout: ${{ matrix.timeout || 30 }}
-          is_unit_test: "false"
+          is_unit_test: 'false'
           PAT: ${{ secrets.PAT }}
           container-image: ${{ env.container-registry }}/megatron-lm:${{ github.sha }}
 

From 3aa0c4e9e99c7f48517f41072cabcf1229259df9 Mon Sep 17 00:00:00 2001
From: Michael Wojcikiewicz <mwojcikiewic@nvidia.com>
Date: Wed, 26 Nov 2025 10:16:10 -0500
Subject: [PATCH 156/334] fix: exit failure when PR author is external
 contributor removed (#2410)

---
 .github/workflows/cicd-main.yml | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml
index d76d68e463e..fe4da54df4f 100644
--- a/.github/workflows/cicd-main.yml
+++ b/.github/workflows/cicd-main.yml
@@ -127,14 +127,6 @@ jobs:
 
             Thank you for your understanding.
 
-      - name: exit
-        run: |
-          if [ "${{ steps.check-membership.outputs.is_maintainer }}" == "true" ]; then
-            exit 0
-          else
-            exit 1
-          fi
-
   pre-flight:
     needs: [is-not-external-contributor]
     if: github.repository == 'NVIDIA/Megatron-LM'

From b750bdba73b87741c1d49c86f5cfb5c1015b86ce Mon Sep 17 00:00:00 2001
From: Michael Wojcikiewicz <mwojcikiewic@nvidia.com>
Date: Thu, 27 Nov 2025 15:57:44 -0500
Subject: [PATCH 157/334] fix: adding k8s taints for ephermeral jobs (#2420)

---
 .github/workflows/cicd-main.yml | 84 +++++++++++++++++++++++++++++++++
 1 file changed, 84 insertions(+)

diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml
index fe4da54df4f..ef37210cea3 100644
--- a/.github/workflows/cicd-main.yml
+++ b/.github/workflows/cicd-main.yml
@@ -201,6 +201,34 @@ jobs:
       && needs.pre-flight.outputs.is_merge_group == 'false'
       && !cancelled()
     steps:
+      - name: Taint node for job isolation
+        if: contains(needs.is-not-external-contributor.outputs.selected_runner, 'ephemeral')
+        shell: bash
+        run: |
+          # Verify prerequisites
+          if [ -z "$NODE_NAME" ]; then
+            echo "ERROR: NODE_NAME not set"
+            exit 1
+          fi
+
+          if ! command -v kubectl &> /dev/null; then
+            echo "ERROR: kubectl not found"
+            exit 1
+          fi
+
+          # Apply taint
+          JOB_ID="${GITHUB_RUN_ID}-${GITHUB_JOB}"
+          echo "=== Adding node taint for job isolation ==="
+          echo "Node: $NODE_NAME"
+          echo "Job ID: $JOB_ID"
+
+          kubectl taint node "$NODE_NAME" "github.com/job-id=${JOB_ID}:NoSchedule" --overwrite=true
+          kubectl label node "$NODE_NAME" \
+            "github.com/workflow=${GITHUB_WORKFLOW}" \
+            "github.com/run-id=${GITHUB_RUN_ID}" \
+            "github.com/job=${GITHUB_JOB}" \
+            --overwrite=true
+
       - name: Checkout
         uses: actions/checkout@v4
 
@@ -360,6 +388,34 @@ jobs:
       PIP_NO_PYTHON_VERSION_WARNING: 1
       PIP_ROOT_USER_ACTION: ignore
     steps:
+      - name: Taint node for job isolation
+        if: contains(needs.is-not-external-contributor.outputs.selected_runner, 'ephemeral')
+        shell: bash
+        run: |
+          # Verify prerequisites
+          if [ -z "$NODE_NAME" ]; then
+            echo "ERROR: NODE_NAME not set"
+            exit 1
+          fi
+
+          if ! command -v kubectl &> /dev/null; then
+            echo "ERROR: kubectl not found"
+            exit 1
+          fi
+
+          # Apply taint
+          JOB_ID="${GITHUB_RUN_ID}-${GITHUB_JOB}"
+          echo "=== Adding node taint for job isolation ==="
+          echo "Node: $NODE_NAME"
+          echo "Job ID: $JOB_ID"
+
+          kubectl taint node "$NODE_NAME" "github.com/job-id=${JOB_ID}:NoSchedule" --overwrite=true
+          kubectl label node "$NODE_NAME" \
+            "github.com/workflow=${GITHUB_WORKFLOW}" \
+            "github.com/run-id=${GITHUB_RUN_ID}" \
+            "github.com/job=${GITHUB_JOB}" \
+            --overwrite=true
+
       - name: Checkout
         uses: actions/checkout@v4
       - name: main
@@ -473,6 +529,34 @@ jobs:
       && needs.pre-flight.outputs.is_merge_group == 'false'
       && !cancelled()
     steps:
+      - name: Taint node for job isolation
+        if: contains(needs.is-not-external-contributor.outputs.selected_runner, 'ephemeral')
+        shell: bash
+        run: |
+          # Verify prerequisites
+          if [ -z "$NODE_NAME" ]; then
+            echo "ERROR: NODE_NAME not set"
+            exit 1
+          fi
+
+          if ! command -v kubectl &> /dev/null; then
+            echo "ERROR: kubectl not found"
+            exit 1
+          fi
+
+          # Apply taint
+          JOB_ID="${GITHUB_RUN_ID}-${GITHUB_JOB}"
+          echo "=== Adding node taint for job isolation ==="
+          echo "Node: $NODE_NAME"
+          echo "Job ID: $JOB_ID"
+
+          kubectl taint node "$NODE_NAME" "github.com/job-id=${JOB_ID}:NoSchedule" --overwrite=true
+          kubectl label node "$NODE_NAME" \
+            "github.com/workflow=${GITHUB_WORKFLOW}" \
+            "github.com/run-id=${GITHUB_RUN_ID}" \
+            "github.com/job=${GITHUB_JOB}" \
+            --overwrite=true
+
       - name: Checkout
         uses: actions/checkout@v4
       - name: main

From c12909b7b589d125bbcea88e07218404747d185f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Thu, 27 Nov 2025 23:10:12 +0100
Subject: [PATCH 158/334] ci: Enable functional tests (#2419)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 .github/actions/action.yml                    |  57 ++++----
 .github/workflows/cicd-main.yml               |  19 ++-
 ...pt-dynamic-inference-with-coordinator.yaml |   7 +-
 .../recipes/gpt-dynamic-inference.yaml        |   8 +-
 .../recipes/gpt-static-inference.yaml         |  10 +-
 tests/test_utils/recipes/gpt.yaml             | 124 +++++++++---------
 .../recipes/mamba-dynamic-inference.yaml      |   4 +-
 .../recipes/mamba-static-inference.yaml       |   6 +-
 tests/test_utils/recipes/mamba.yaml           |  10 +-
 .../recipes/moe-dynamic-inference.yaml        |   6 +-
 .../recipes/moe-static-inference.yaml         |   8 +-
 tests/test_utils/recipes/moe.yaml             |  24 ++--
 .../test_utils/recipes/multimodal-llava.yaml  |   6 +-
 13 files changed, 156 insertions(+), 133 deletions(-)

diff --git a/.github/actions/action.yml b/.github/actions/action.yml
index 8c6ca3a6865..5c35385b036 100644
--- a/.github/actions/action.yml
+++ b/.github/actions/action.yml
@@ -11,28 +11,28 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-name: "Test Template"
-description: "Template for running NeMo tests in a containerized environment"
+name: 'Test Template'
+description: 'Template for running NeMo tests in a containerized environment'
 
 inputs:
   container-image:
-    description: "Container image to use for test"
+    description: 'Container image to use for test'
     required: true
   timeout:
-    description: "Max runtime of test in minutes"
+    description: 'Max runtime of test in minutes'
     required: false
-    default: "30"
+    default: '30'
   script:
-    description: "Test script to execute"
+    description: 'Test script to execute'
     required: true
   is-optional:
-    description: "Pass this job on failure."
+    description: 'Pass this job on failure.'
     required: false
-    default: "false"
+    default: 'false'
   is_unit_test:
-    description: "Upload coverage as unit test"
+    description: 'Upload coverage as unit test'
     required: false
-    default: "false"
+    default: 'false'
   tag:
     description: Latest or legacy test suite
     required: true
@@ -43,11 +43,11 @@ inputs:
     description: Model to launch
     required: false
   PAT:
-    description: "GitHub Personal Access Token"
+    description: 'GitHub Personal Access Token'
     required: true
 
 runs:
-  using: "composite"
+  using: 'composite'
   steps:
     - name: Checkout repository
       uses: actions/checkout@v2
@@ -114,6 +114,16 @@ runs:
         HAS_RUN_TESTS_LABEL=$(gh pr view $PR_NUMBER --json labels | jq '[.labels[].name] | any(. == "Run tests")') || echo "false"
         echo "main=$HAS_RUN_TESTS_LABEL" | tee -a $GITHUB_OUTPUT
 
+    - name: Has Run functional tests label
+      shell: bash -x -e -u -o pipefail {0}
+      id: has-run-functional-tests-label
+      env:
+        GH_TOKEN: ${{ github.token }}
+      run: |
+        PR_NUMBER=${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number }}
+        HAS_RUN_FUNCTIONAL_TESTS_LABEL=$(gh pr view $PR_NUMBER --json labels | jq '[.labels[].name] | any(. == "Run functional tests")') || echo "false"
+        echo "main=$HAS_RUN_FUNCTIONAL_TESTS_LABEL" | tee -a $GITHUB_OUTPUT
+
     - name: Create run-script (e2e test)
       shell: bash -x -e -u -o pipefail {0}
       if: inputs.is_unit_test == 'false'
@@ -126,16 +136,19 @@ runs:
         set -euxo pipefail
 
         if [ "${{ steps.has-run-tests-label.outputs.main }}" == "true" ]; then
-            ARGS=(
-              --scope mr-github
-              --enable-lightweight-mode
-            )
-          else
-            ARGS=(
-              --scope mr-slim
-              --enable-lightweight-mode
-            )
-          fi
+          ARGS=(
+            --scope mr-github
+            --enable-lightweight-mode
+          )
+        elif [ "${{ steps.has-run-functional-tests-label.outputs.main }}" == "true" ]; then
+          ARGS=(
+            --scope mr-github
+          )
+        else
+          ARGS=(
+            --scope mr-github-slim
+          )
+        fi
 
         export PYTHONPATH=$(pwd)
         export NEMORUN_HOME=$(pwd)
diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml
index ef37210cea3..2fb08030686 100644
--- a/.github/workflows/cicd-main.yml
+++ b/.github/workflows/cicd-main.yml
@@ -259,8 +259,6 @@ jobs:
 
       - name: Download test data
         shell: bash
-        env:
-          GH_TOKEN: ${{ secrets.PAT }}
         run: |
           echo "::group::Download test data"
           pip install --no-cache-dir pygithub click
@@ -463,10 +461,20 @@ jobs:
           HAS_RUN_TESTS_LABEL=$(gh pr view $PR_NUMBER --json labels | jq '[.labels[].name] | any(. == "Run tests")') || echo "false"
           echo "main=$HAS_RUN_TESTS_LABEL" | tee -a $GITHUB_OUTPUT
 
+      - name: Has Run functional tests label
+        id: has-run-functional-tests-label
+        env:
+          GH_TOKEN: ${{ secrets.PAT }}
+        run: |
+          PR_NUMBER=${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number }}
+          HAS_RUN_FUNCTIONAL_TESTS_LABEL=$(gh pr view $PR_NUMBER --json labels | jq '[.labels[].name] | any(. == "Run functional tests")') || echo "false"
+          echo "main=$HAS_RUN_FUNCTIONAL_TESTS_LABEL" | tee -a $GITHUB_OUTPUT
+
       - name: Parse functional tests
         id: main
         env:
           HAS_RUN_TESTS_LABEL: ${{ steps.has-run-tests-label.outputs.main }}
+          HAS_RUN_FUNCTIONAL_TESTS_LABEL: ${{ steps.has-run-functional-tests-label.outputs.main }}
         run: |
           export PYTHONPATH=$(pwd)
 
@@ -475,10 +483,13 @@ jobs:
               --scope mr-github
               --enable-lightweight-mode
             )
+          elif [ "$HAS_RUN_FUNCTIONAL_TESTS_LABEL" == "true" ]; then
+            ARGS=(
+              --scope mr-github
+            )
           else
             ARGS=(
-              --scope mr-slim
-              --enable-lightweight-mode
+              --scope mr-github-slim
             )
           fi
 
diff --git a/tests/test_utils/recipes/gpt-dynamic-inference-with-coordinator.yaml b/tests/test_utils/recipes/gpt-dynamic-inference-with-coordinator.yaml
index 6a3d582d3ae..e882d721860 100644
--- a/tests/test_utils/recipes/gpt-dynamic-inference-with-coordinator.yaml
+++ b/tests/test_utils/recipes/gpt-dynamic-inference-with-coordinator.yaml
@@ -3,7 +3,7 @@ format_version: 1
 maintainers: [mcore]
 loggers: [stdout]
 spec:
-  name: "{test_case}_{environment}_{platforms}"
+  name: '{test_case}_{environment}_{platforms}'
   model: gpt
   build: mcore-pyt-{environment}
   nodes: 1
@@ -67,15 +67,14 @@ products:
   - test_case: [gpt_dynamic_inference_tp1_pp1_dp8_583m_logitsmatch_zmq]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr]
         platforms: [dgx_h100]
   - test_case: [gpt_dynamic_inference_tp1_pp1_dp8_583m_throughputtest_zmq]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr]
         platforms: [dgx_h100]
   - test_case: [gpt_dynamic_inference_tp2_pp2_dp2_583m_logitsmatch_zmq]
     products:
       - environment: [dev]
         scope: [flaky]
-
diff --git a/tests/test_utils/recipes/gpt-dynamic-inference.yaml b/tests/test_utils/recipes/gpt-dynamic-inference.yaml
index 66fa6887de8..a3853c3d9e1 100644
--- a/tests/test_utils/recipes/gpt-dynamic-inference.yaml
+++ b/tests/test_utils/recipes/gpt-dynamic-inference.yaml
@@ -3,7 +3,7 @@ format_version: 1
 maintainers: [mcore]
 loggers: [stdout]
 spec:
-  name: "{test_case}_{environment}_{platforms}"
+  name: '{test_case}_{environment}_{platforms}'
   model: gpt
   build: mcore-pyt-{environment}
   nodes: 1
@@ -62,15 +62,15 @@ products:
   - test_case: [gpt_dynamic_inference_tp8_pp1_583m_logitsmatch]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr]
         platforms: [dgx_h100]
   - test_case: [gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr]
         platforms: [dgx_h100]
   - test_case: [gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_logitsmatch_decode_graphs_only]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr]
         platforms: [dgx_h100]
diff --git a/tests/test_utils/recipes/gpt-static-inference.yaml b/tests/test_utils/recipes/gpt-static-inference.yaml
index 033c6c35116..39c2c3c934e 100644
--- a/tests/test_utils/recipes/gpt-static-inference.yaml
+++ b/tests/test_utils/recipes/gpt-static-inference.yaml
@@ -3,7 +3,7 @@ format_version: 1
 maintainers: [mcore]
 loggers: [stdout]
 spec:
-  name: "{test_case}_{environment}_{platforms}"
+  name: '{test_case}_{environment}_{platforms}'
   model: gpt
   build: mcore-pyt-{environment}
   nodes: 1
@@ -57,20 +57,20 @@ products:
   - test_case: [gpt_static_inference_tp1_pp1_583m_logitsmatch]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr]
         platforms: [dgx_h100]
   - test_case: [gpt_static_inference_tp1_pp1_583m_cudagraphs]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr]
         platforms: [dgx_h100]
   - test_case: [gpt_static_inference_tp1_pp1_583m_fp8_cudagraphs]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr]
         platforms: [dgx_h100]
   - test_case: [gpt_static_inference_tp1_pp1_16b_multiprompt_tokensmatch]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr]
         platforms: [dgx_h100]
diff --git a/tests/test_utils/recipes/gpt.yaml b/tests/test_utils/recipes/gpt.yaml
index 34030e4923a..eae09a6e16a 100644
--- a/tests/test_utils/recipes/gpt.yaml
+++ b/tests/test_utils/recipes/gpt.yaml
@@ -110,14 +110,14 @@ products:
   - test_case: [gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
   - test_case: [gpt3_mcore_te_tp1_pp1_dist_optimizer_fim_dataset]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr]
         platforms: [dgx_h100]
   - test_case: [gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer]
     products:
@@ -129,201 +129,201 @@ products:
   - test_case: [gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
   # - test_case: [gpt3_mcore_te_tp1_pp2_resume_torch_dist_cp4_a2a_p2p_nondeterministic]
   #   products:
   #     - environment: [dev]
-  #       scope: [mr, mr-github]
+  #       scope: [mr]
   #     - environment: [lts]
   #       scope: [nightly] # Non-deterministic: #487
   - test_case: [gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr]
         platforms: [dgx_h100]
       # - environment: [lts]
       #   scope: [nightly] # outdated TE: #501
   - test_case: [gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
   - test_case: [gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr]
         platforms: [dgx_h100]
       # - environment: [lts]
       #   scope: [nightly] # non-determinism: #436
   - test_case: [gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
   - test_case: [gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr]
         platforms: [dgx_h100]
       # - environment: [lts]
       #   scope: [nightly] # non-determinism: #437
   - test_case: [gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
   - test_case: [gpt3_mcore_te_tp1_pp4_vp1]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
   - test_case: [gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
   - test_case: [gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
       # - test_case: [gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist]
       #   products:
       #     - environment: [dev]
-      #       scope: [mr, mr-github]
+      #       scope: [mr]
       #       platforms: [dgx_h100] # Hangs: #513
       # - environment: [lts]
       #   scope: [nightly]
   - test_case: [gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
   - test_case: [gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather]
     products:
       # - environment: [dev]
-      #   scope: [mr, mr-github]
+      #   scope: [mr]
       #   platforms: [dgx_h100] # Hangs: #513
       - environment: [lts]
         scope: [nightly]
   - test_case: [gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied]
     products:
       # - environment: [dev]
-      #   scope: [mr, mr-github] # Hangs: #513
+      #   scope: [mr] # Hangs: #513
       #   platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
   - test_case: [gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap]
     products:
       # - environment: [dev]
-      #   scope: [mr, mr-github] # Hangs: #513
+      #   scope: [mr] # Hangs: #513
       #   platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
   - test_case: [gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
   - test_case: [gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
   - test_case: [gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
   - test_case: [gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
   - test_case: [gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
   - test_case: [gpt3_mcore_te_tp2_pp2_cp2]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr]
         platforms: [dgx_h100]
   - test_case: [gpt3_mcore_te_tp2_pp2_cp2_etp4_dp_last]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr]
         platforms: [dgx_h100]
   - test_case: [gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr]
         platforms: [dgx_h100]
   - test_case: [gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr]
         platforms: [dgx_h100]
   - test_case: [gpt3_mcore_te_tp2_pp2_cp2_nondeterministic]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
   - test_case: [gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
   - test_case: [gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
   - test_case: [gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
   - test_case: [gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
@@ -331,14 +331,14 @@ products:
   - test_case: [gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
   - test_case: [gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
@@ -350,96 +350,96 @@ products:
   - test_case: [gpt3_mcore_te_tp2_pp2_mla]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr]
         platforms: [dgx_h100]
   - test_case: [gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
   - test_case: [gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
   - test_case: [gpt3_mcore_te_tp2_pp2_resume_torch_dist]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
   - test_case: [gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader]
     products:
       # - environment: [dev]
-      #   scope: [mr, mr-github] # Hangs: #513
+      #   scope: [mr] # Hangs: #513
       #   platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
   - test_case: [gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
   - test_case: [gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
   - test_case: [gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
   - test_case: [gpt3_mcore_tp2_pp2_uninstall_te]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
   - test_case: [gpt3_7b_tp1_pp4_memory_speed]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr]
         platforms: [dgx_h100]
       # - environment: [lts]
       #   scope: [nightly] # OOM: #434
   - test_case: [gpt3_7b_tp4_pp1_memory_speed]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr]
         platforms: [dgx_h100]
       # - environment: [lts]
       #   scope: [nightly] # OOM: #434
   - test_case: [gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr]
         platforms: [dgx_h100]
       # - environment: [lts]
       #   scope: [nightly]
   - test_case: [gpt3_mcore_te_tp2_pp1_modelopt_distill_resume]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr]
         platforms: [dgx_h100]
       # - environment: [lts]
       #   scope: [nightly] # Outdated: #502
   # - test_case: [gpt3_mcore_te_tp2_pp1_fsdp2_resume_torch_dist]
   #   products:
   # - environment: [dev]
-  #   scope: [mr, mr-github] # Broken: #484
+  #   scope: [mr] # Broken: #484
   # - environment: [lts]
   #   scope: [nightly] # Requires PyT 2.4: #481
   #######################################################################
@@ -455,57 +455,57 @@ products:
   # - test_case: [gpt3_mcore_reruns_persistent_2]
   #   products:
   # - environment: [dev]
-  #   scope: [mr, mr-github]
+  #   scope: [mr]
   #   platforms: [dgx_h100]
   # - environment: [lts]
   #   scope: [nightly]
   - test_case: [gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer]
     products:
       - environment: [lts]
-        scope: [mr, mr-github]
+        scope: [mr]
       - environment: [dev]
-        scope: [mr, mr-github, mr-slim]
+        scope: [mr, mr-github, mr-github-slim]
         platforms: [dgx_h100]
   - test_case: [gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr]
         platforms: [dgx_h100]
       - environment: [lts]
-        scope: [mr, mr-github]
+        scope: [mr]
   - test_case: [gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather]
     products:
       - environment: [lts]
-        scope: [mr, mr-github]
+        scope: [mr]
       - environment: [dev]
-        scope: [mr, mr-github, mr-slim]
+        scope: [mr, mr-github, mr-github-slim]
         platforms: [dgx_h100]
   - test_case: [gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr]
         platforms: [dgx_h100]
       - environment: [lts]
-        scope: [mr, mr-github]
+        scope: [mr]
   # - test_case: [gpt3_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone]
   #   products:
   #     - environment: [dev]
-  #       scope: [mr, mr-github]
+  #       scope: [mr]
   #       platforms: [dgx_h100]
   # - test_case: [gpt3_mcore_te_tp1_pp1_frozen_resume_torch_dist_dist_optimizer]
   #   products:
   #     - environment: [dev]
-  #       scope: [mr, mr-github]
+  #       scope: [mr]
   #       platforms: [dgx_h100]
   # - test_case: [gpt3_mcore_te_tp1_pp4_frozen_resume_torch_dist_swiglu]
   #   products:
   #     - environment: [dev]
-  #       scope: [mr, mr-github]
+  #       scope: [mr]
   #       platforms: [dgx_h100]
   # - test_case: [gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_cp2_nondeterministic]
   #   products:
   #     - environment: [dev]
-  #       scope: [mr, mr-github]
+  #       scope: [mr]
   #       platforms: [dgx_a100, dgx_h100]
   # - test_case: [gpt3_weekly_dgx_b200_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap]
   #   products:
@@ -555,4 +555,4 @@ products:
   # - test_case: [gpt3_mcore_tp2_pp2_resume_torch_dist_uninstall_te]
   #   products:
   #     - environment: [dev, lts]
-  #       scope: [mr, mr-github] # Non-deterministic: #483
+  #       scope: [mr] # Non-deterministic: #483
diff --git a/tests/test_utils/recipes/mamba-dynamic-inference.yaml b/tests/test_utils/recipes/mamba-dynamic-inference.yaml
index 9ca1bab4402..0d02ce29a54 100644
--- a/tests/test_utils/recipes/mamba-dynamic-inference.yaml
+++ b/tests/test_utils/recipes/mamba-dynamic-inference.yaml
@@ -3,7 +3,7 @@ format_version: 1
 maintainers: [mcore]
 loggers: [stdout]
 spec:
-  name: "{test_case}_{environment}_{platforms}"
+  name: '{test_case}_{environment}_{platforms}'
   model: hybrid
   build: mcore-pyt-{environment}
   nodes: 1
@@ -57,5 +57,5 @@ products:
   - test_case: [hybrid_dynamic_inference_tp1_pp1_dp8_583m]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr]
         platforms: [dgx_h100]
diff --git a/tests/test_utils/recipes/mamba-static-inference.yaml b/tests/test_utils/recipes/mamba-static-inference.yaml
index 06107618916..9645b1b0b8a 100644
--- a/tests/test_utils/recipes/mamba-static-inference.yaml
+++ b/tests/test_utils/recipes/mamba-static-inference.yaml
@@ -3,7 +3,7 @@ format_version: 1
 maintainers: [mcore]
 loggers: [stdout]
 spec:
-  name: "{test_case}_{environment}_{platforms}"
+  name: '{test_case}_{environment}_{platforms}'
   model: hybrid
   build: mcore-pyt-{environment}
   nodes: 1
@@ -57,10 +57,10 @@ products:
   - test_case: [hybrid_static_inference_tp1_pp1_2B_logitsmatch]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr]
         platforms: [dgx_h100]
   - test_case: [hybrid_static_inference_tp1_pp1_2B_cudagraphs]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr]
         platforms: [dgx_h100]
diff --git a/tests/test_utils/recipes/mamba.yaml b/tests/test_utils/recipes/mamba.yaml
index bb742200d26..92b799d3d1c 100644
--- a/tests/test_utils/recipes/mamba.yaml
+++ b/tests/test_utils/recipes/mamba.yaml
@@ -3,7 +3,7 @@ format_version: 1
 maintainers: [mcore]
 loggers: [stdout]
 spec:
-  name: "{test_case}_{environment}_{platforms}"
+  name: '{test_case}_{environment}_{platforms}'
   model: hybrid
   build: mcore-pyt-{environment}
   nodes: 1
@@ -58,7 +58,7 @@ products:
   - test_case: [hybrid_mr_mcore_te_tp1_pp1_cp1_dgx_a100_1N8G]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr]
         platforms: [dgx_h100]
       # - environment: [lts] # disabled until triton is bumped
       #   scope: [nightly]
@@ -67,14 +67,14 @@ products:
   # - test_case: [hybrid_mr_mcore_te_tp1_pp4_cp1_dgx_a100_1N8G]
   #   products:
   #     - environment: [dev]
-  #       scope: [mr, mr-github]
+  #       scope: [mr]
   #     - environment: [lts] # disabled until triton is bumped
   #       scope: [nightly]
 
   - test_case: [hybrid_mr_mcore_te_tp2_pp1_cp1_dgx_a100_1N8G]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr]
         platforms: [dgx_h100]
       # - environment: [lts] # disabled until triton is bumped
       #   scope: [nightly]
@@ -82,7 +82,7 @@ products:
   - test_case: [hybrid_mr_mcore_te_tp2_pp1_cp4_dgx_a100_1N8G]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr]
         platforms: [dgx_h100]
       # - environment: [lts] # disabled until triton is bumped
       #   scope: [nightly]
diff --git a/tests/test_utils/recipes/moe-dynamic-inference.yaml b/tests/test_utils/recipes/moe-dynamic-inference.yaml
index 9bb23f8a322..6d8fdc533e1 100644
--- a/tests/test_utils/recipes/moe-dynamic-inference.yaml
+++ b/tests/test_utils/recipes/moe-dynamic-inference.yaml
@@ -3,7 +3,7 @@ format_version: 1
 maintainers: [mcore]
 loggers: [stdout]
 spec:
-  name: "{test_case}_{environment}_{platforms}"
+  name: '{test_case}_{environment}_{platforms}'
   model: moe
   build: mcore-pyt-{environment}
   nodes: 1
@@ -57,10 +57,10 @@ products:
   - test_case: [gpt_dynamic_inference_tp4_pp1_ep4_16B_logitsmatch]
     products:
       - environment: [dev]
-        scope: [mr-broken, mr-github]
+        scope: [mr-broken]
         platforms: [dgx_h100]
   - test_case: [gpt_dynamic_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr]
         platforms: [dgx_h100]
diff --git a/tests/test_utils/recipes/moe-static-inference.yaml b/tests/test_utils/recipes/moe-static-inference.yaml
index 136606d0955..9cebb66f2e2 100644
--- a/tests/test_utils/recipes/moe-static-inference.yaml
+++ b/tests/test_utils/recipes/moe-static-inference.yaml
@@ -3,7 +3,7 @@ format_version: 1
 maintainers: [mcore]
 loggers: [stdout]
 spec:
-  name: "{test_case}_{environment}_{platforms}"
+  name: '{test_case}_{environment}_{platforms}'
   model: moe
   build: mcore-pyt-{environment}
   nodes: 1
@@ -57,15 +57,15 @@ products:
   - test_case: [gpt_static_inference_tp1_pp1_ep1_16B_logitsmatch]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr]
         platforms: [dgx_h100]
   - test_case: [gpt_static_inference_tp4_pp1_ep4_16B_logitsmatch]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr]
         platforms: [dgx_h100]
   - test_case: [gpt_static_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr]
         platforms: [dgx_h100]
diff --git a/tests/test_utils/recipes/moe.yaml b/tests/test_utils/recipes/moe.yaml
index 2d4e8c4c94c..285d16c99f3 100644
--- a/tests/test_utils/recipes/moe.yaml
+++ b/tests/test_utils/recipes/moe.yaml
@@ -3,7 +3,7 @@ format_version: 1
 maintainers: [mcore]
 loggers: [stdout]
 spec:
-  name: "{test_case}_{environment}_{platforms}"
+  name: '{test_case}_{environment}_{platforms}'
   model: moe
   build: mcore-pyt-{environment}
   nodes: 1
@@ -84,27 +84,27 @@ products:
   - test_case: [gpt3_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr]
         platforms: [dgx_h100]
   - test_case: [gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr]
         platforms: [dgx_h100] # hang: #513
   # - test_case: [gpt3_mcore_te_tp2_pp2_ep4_etp1_selective_recompute_experimental]
   #   products:
   #     - environment: [dev]
-  #       scope: [mr, mr-github]
+  #       scope: [mr]
   #       platforms: [dgx_h100] # hang: #513
   - test_case: [gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr]
         platforms: [dgx_h100]
   - test_case: [gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr]
         platforms: [dgx_h100]
   - test_case: [gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router]
     products:
@@ -114,12 +114,12 @@ products:
   - test_case: [gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr]
         platforms: [dgx_h100]
   - test_case: [gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_optimizer]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr]
         platforms: [dgx_h100]
   # - test_case: [gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_muon]
   #   products:
@@ -152,12 +152,12 @@ products:
   # - test_case: [gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_dist_optimizer]
   #   products:
   #     - environment: [dev]
-  #       scope: [mr, mr-github]
+  #       scope: [mr]
   #       platforms: [dgx_h100]
   # - test_case: [gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_groupedGEMM]
   #   products:
   #     - environment: [dev]
-  #       scope: [mr, mr-github]
+  #       scope: [mr]
   #       platforms: [dgx_h100]
   ###########################
   # Merge train tests       #
@@ -165,12 +165,12 @@ products:
   - test_case: [gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer]
     products:
       - environment: [dev]
-        scope: [mr, mr-github, mr-slim]
+        scope: [mr, mr-github, mr-github-slim]
         platforms: [dgx_h100]
   - test_case: [gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed]
     products:
       - environment: [dev]
-        scope: [mr, mr-github, mr-slim]
+        scope: [mr]
         platforms: [dgx_h100]
   - test_case: [gpt3_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8]
     products:
diff --git a/tests/test_utils/recipes/multimodal-llava.yaml b/tests/test_utils/recipes/multimodal-llava.yaml
index 0e199764c09..72702de33c5 100644
--- a/tests/test_utils/recipes/multimodal-llava.yaml
+++ b/tests/test_utils/recipes/multimodal-llava.yaml
@@ -3,7 +3,7 @@ format_version: 1
 maintainers: [mcore]
 loggers: [stdout]
 spec:
-  name: "{test_case}_{environment}_{platforms}"
+  name: '{test_case}_{environment}_{platforms}'
   model: multimodal-llava
   build: mcore-pyt-{environment}
   nodes: 1
@@ -61,10 +61,10 @@ products:
   - test_case: [multimodal_llava_mcore_te_tp1_pp1]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr]
         platforms: [dgx_h100]
   - test_case: [multimodal_llava_mcore_te_tp4_sp_cp2]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr]
         platforms: [dgx_h100]

From 44933d7cc202e0eb197936231ceaf9c6f3d8518c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Fri, 28 Nov 2025 00:24:49 +0100
Subject: [PATCH 159/334] Reapply "build: Upgrade deps (NVIDIA#2289)" (#2408)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 .github/workflows/cicd-main.yml               |    2 +-
 .gitlab/scripts/build.sh                      |    5 +-
 docker/Dockerfile.ci.dev                      |    1 +
 .../core/dist_checkpointing/exchange_utils.py |    2 +-
 megatron/core/dist_checkpointing/mapping.py   |    2 +-
 .../core/dist_checkpointing/validation.py     |    2 +-
 pyproject.toml                                |   35 +-
 .../download_unit_tests_dataset.py            |  205 +-
 tests/unit_tests/conftest.py                  |    9 +-
 uv.lock                                       | 2832 ++++++++---------
 10 files changed, 1376 insertions(+), 1719 deletions(-)

diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml
index 2fb08030686..7043e022c95 100644
--- a/.github/workflows/cicd-main.yml
+++ b/.github/workflows/cicd-main.yml
@@ -261,7 +261,7 @@ jobs:
         shell: bash
         run: |
           echo "::group::Download test data"
-          pip install --no-cache-dir pygithub click
+          pip install --no-cache-dir click requests
           python tests/test_utils/python_scripts/download_unit_tests_dataset.py --assets-dir ./assets
           echo "::endgroup::"
 
diff --git a/.gitlab/scripts/build.sh b/.gitlab/scripts/build.sh
index 960af104628..e64434e834d 100644
--- a/.gitlab/scripts/build.sh
+++ b/.gitlab/scripts/build.sh
@@ -7,9 +7,9 @@ eval "IMAGE=\$$IMAGE"
 # Start a named container in detached mode
 docker run -d --name download_test_data -w /workdir/ python:3.12-slim bash -c 'sleep infinity'
 docker cp tests/. download_test_data:/workdir/tests
-docker exec -e GH_TOKEN=$GH_TOKEN download_test_data bash -c '
+docker exec download_test_data bash -c '
     ls -al /workdir/
-    pip install --no-cache-dir pygithub click
+    pip install --no-cache-dir click requests
     python tests/test_utils/python_scripts/download_unit_tests_dataset.py --assets-dir ./assets
 '
 docker cp download_test_data:/workdir/assets ./
@@ -50,6 +50,7 @@ DOCKER_BUILDKIT=1 docker build \
     --builder=container \
     --build-arg JET_API_VERSION=$JET_API_VERSION \
     --cache-from type=registry,ref=${IMAGE}-buildcache:${CI_MERGE_REQUEST_IID} \
+    --cache-from type=registry,ref=${IMAGE}-buildcache:dev \
     --cache-from type=registry,ref=${IMAGE}-buildcache:main \
     --build-arg FROM_IMAGE_NAME=$BASE_IMAGE \
     --push \
diff --git a/docker/Dockerfile.ci.dev b/docker/Dockerfile.ci.dev
index 6596fc01aaf..482c6af460c 100644
--- a/docker/Dockerfile.ci.dev
+++ b/docker/Dockerfile.ci.dev
@@ -36,6 +36,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
         --no-install-package torch \
         --no-install-package torchvision \
         --no-install-package triton \
+        --no-install-package transformer-engine-cu12 \
         --no-install-package nvidia-cublas-cu12 \
         --no-install-package nvidia-cuda-cupti-cu12 \
         --no-install-package nvidia-cuda-nvrtc-cu12 \
diff --git a/megatron/core/dist_checkpointing/exchange_utils.py b/megatron/core/dist_checkpointing/exchange_utils.py
index def79fb778e..2f791449057 100644
--- a/megatron/core/dist_checkpointing/exchange_utils.py
+++ b/megatron/core/dist_checkpointing/exchange_utils.py
@@ -63,7 +63,7 @@ class ShardDistribution(NamedTuple):
 def _shard_size(sh_ten: ShardedTensor):
     """Returns size in bytes of a given sharded tensor."""
     if sh_ten.flattened_range is None:
-        numel = np.product(sh_ten.local_shape)
+        numel = np.prod(sh_ten.local_shape)
     else:
         numel = sh_ten.flattened_range.stop - sh_ten.flattened_range.start
     return numel * torch._utils._element_size(sh_ten.dtype)
diff --git a/megatron/core/dist_checkpointing/mapping.py b/megatron/core/dist_checkpointing/mapping.py
index d38ea57eee0..45a105666ab 100644
--- a/megatron/core/dist_checkpointing/mapping.py
+++ b/megatron/core/dist_checkpointing/mapping.py
@@ -216,7 +216,7 @@ def local_coordinates(self) -> Tuple[np.ndarray, ...]:
             )
 
         # TODO: np.unravel_index?
-        mask = np.zeros(np.product(self.local_shape), dtype=bool)
+        mask = np.zeros(np.prod(self.local_shape), dtype=bool)
         mask[self.flattened_range] = True
         return np.nonzero(mask.reshape(self.local_shape))
 
diff --git a/megatron/core/dist_checkpointing/validation.py b/megatron/core/dist_checkpointing/validation.py
index 96945055319..9bcb59bdbf4 100644
--- a/megatron/core/dist_checkpointing/validation.py
+++ b/megatron/core/dist_checkpointing/validation.py
@@ -519,7 +519,7 @@ def _validate_sharding_for_key_flattened(tensors_by_shard):
         all_slices.append((sharding.flattened_range.start, sharding.flattened_range.stop))
 
     starts, stops = map(np.asarray, zip(*sorted(all_slices)))
-    expected_size = np.product(local_shape)
+    expected_size = np.prod(local_shape)
     if starts[0] != 0 or stops[-1] != expected_size or not np.all(starts[1:] == stops[:-1]):
         raise CheckpointingException(
             f"Flattened ranges dont cover the whole shard {tensors_by_shard[0]} of size {expected_size}. Ranges: {(starts, stops)}"
diff --git a/pyproject.toml b/pyproject.toml
index 7f734927c1a..553f898ae6f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -20,7 +20,7 @@ dynamic = ["version", "readme"]
 description = "Megatron Core - a library for efficient and scalable training of transformer based models"
 requires-python = ">=3.10"
 license = { text = "Apache 2.0" }
-dependencies = ["torch", "numpy<2.0.0", "packaging>=24.2"]
+dependencies = ["torch", "numpy", "packaging>=24.2"]
 authors = [{ name = "NVIDIA", email = "nemo-toolkit@nvidia.com" }]
 maintainers = [{ name = "NVIDIA", email = "nemo-toolkit@nvidia.com" }]
 keywords = [
@@ -67,37 +67,44 @@ Homepage = "https://github.com/NVIDIA/Megatron-LM/megatron/core"
 mlm = ["flask-restful", "sentencepiece", "tiktoken", "wandb", "transformers"]
 
 dev = [
-    "nvidia-modelopt[torch]>=0.33.0a0,<0.34.0; sys_platform != 'darwin'",
-    "transformer-engine[pytorch]>=2.9.0a0,<2.10.0",
-    "nvidia-resiliency-ext>=0.4.0a0,<0.5.0",
+    "nvidia-modelopt[torch]; sys_platform != 'darwin'",
+    "transformer-engine[pytorch,core_cu13]>=2.9.0a0,<2.10.0",
+    "nvidia-resiliency-ext",
     "tqdm",
     "einops~=0.8",
     "tensorstore~=0.1,!=0.1.46,!=0.1.72",
     "nvtx~=0.2",
     "multi-storage-client~=0.27",
     "opentelemetry-api~=1.33.1",
-    "setuptools<80.0.0",
     "mamba-ssm~=2.2",
     "causal-conv1d~=1.5",
     "nv-grouped-gemm~=1.1",
     "megatron-energon[av_decode]~=6.0",
-    "av<16.0.0",                                        # At the time, av 16.0.0 is not compatible with Python 3.12
+    "av",
     "flashinfer-python",
     "wget",
     "onnxscript",
     "flash-linear-attention~=0.3.2",
     "emerging_optimizers",
+    "fastapi~=0.50",                                          # Forcing a little bit more recent version of fastapi to be compatible with pydantic 2.0
 ]
 
 lts = [
     "tqdm",
-    "einops",
-    "tensorstore!=0.1.46,!=0.1.72",
-    "nvtx",
-    "transformers",
-    "zarr",
-    "setuptools<80.0.0",
+    "einops~=0.8",
+    "tensorstore~=0.1,!=0.1.46,!=0.1.72",
+    "nvtx~=0.2",
+    "multi-storage-client~=0.27",
+    "opentelemetry-api~=1.33.1",
+    "mamba-ssm~=2.2",
+    "causal-conv1d~=1.5",
+    "nv-grouped-gemm~=1.1",
+    "megatron-energon[av_decode]~=6.0",
+    "av",
+    "flashinfer-python",
     "wget",
+    "onnxscript",
+    "fastapi~=0.50",                      # Forcing a little bit more recent version of fastapi to be compatible with pydantic 2.0
 ]
 
 [dependency-groups]
@@ -141,7 +148,7 @@ linting = [
     "pylint==3.2.6",
 ]
 ci = ["python-gitlab", "slack-sdk", "pandas"]
-flash_mla = ["flash_mla"]
+no_pypi_wheels = ["flash_mla", "emerging_optimizers"]
 
 [tool.uv]
 default-groups = ["linting", "build", "test"]
@@ -168,7 +175,7 @@ override-dependencies = [
 flash_mla = [
     { git = "https://github.com/deepseek-ai/FlashMLA", rev = "9edee0c022cd0938148a18e334203b0aab43aa19" },
 ]
-transformer-engine = { git = "https://github.com/NVIDIA/TransformerEngine.git", rev = "release_v2.9" } # on `release_v2.9`
+# transformer-engine = { git = "https://github.com/NVIDIA/TransformerEngine.git", rev = "release_v2.9" } # on `release_v2.9`
 nemo-run = { git = "https://github.com/NVIDIA-NeMo/Run.git", rev = "01a9a8ba360f7b2908728ad0516e0ad9d936966d" }
 emerging_optimizers = { git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git", rev = "v0.1.0" }
 
diff --git a/tests/test_utils/python_scripts/download_unit_tests_dataset.py b/tests/test_utils/python_scripts/download_unit_tests_dataset.py
index 04470c2f820..a29394c29de 100644
--- a/tests/test_utils/python_scripts/download_unit_tests_dataset.py
+++ b/tests/test_utils/python_scripts/download_unit_tests_dataset.py
@@ -1,21 +1,35 @@
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
 #!/usr/bin/env python3
 """
 Script to fetch the oldest release of NVIDIA/Megatron-LM on GitHub and list its assets.
 Uses the PyGithub SDK to interact with the GitHub API.
 """
 
-import os
-import sys
+import logging
 import tarfile
 import zipfile
 from pathlib import Path
 
 import click
 import requests
-from github import Github
 
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+ASSETS = [
+    {
+        "name": "datasets.zip",
+        "url": "https://github.com/NVIDIA/Megatron-LM/releases/download/v2.5/datasets.zip",
+    },
+    {
+        "name": "tokenizers.zip",
+        "url": "https://github.com/NVIDIA/Megatron-LM/releases/download/v2.5/tokenizers.zip",
+    },
+]
 
-def download_and_extract_asset(asset_url: str, asset_name: str, assets_dir: Path) -> bool:
+
+def download_and_extract_asset(assets_dir: Path) -> bool:
     """
     Download and extract an asset to the assets directory.
 
@@ -27,144 +41,43 @@ def download_and_extract_asset(asset_url: str, asset_name: str, assets_dir: Path
     Returns:
         bool: True if successful, False otherwise
     """
-    try:
-        # Download the asset
-        print(f"  Downloading {asset_name}...")
-        response = requests.get(asset_url, stream=True)
-        response.raise_for_status()
-
-        # Save to temporary file
-        temp_file = assets_dir / asset_name
-        with open(temp_file, 'wb') as f:
-            for chunk in response.iter_content(chunk_size=8192):
-                f.write(chunk)
-
-        print(f"  Extracting {asset_name} to {assets_dir}...")
-
-        # Extract based on file type
-        if asset_name.endswith('.zip'):
-            with zipfile.ZipFile(temp_file, 'r') as zip_ref:
-                zip_ref.extractall(assets_dir)
-        elif asset_name.endswith(('.tar.gz', '.tgz')):
-            with tarfile.open(temp_file, 'r:gz') as tar_ref:
-                tar_ref.extractall(assets_dir)
-        elif asset_name.endswith('.tar'):
-            with tarfile.open(temp_file, 'r') as tar_ref:
-                tar_ref.extractall(assets_dir)
-        else:
-            print(f"  Warning: Unknown file type for {asset_name}, skipping extraction")
-            return False
-
-        # Clean up temporary file
-        temp_file.unlink()
-        print(f"  Successfully extracted to {assets_dir}")
-        return True
-
-    except Exception as e:
-        print(f"  Error downloading/extracting {asset_name}: {e}")
-        return False
-
-
-def get_oldest_release_and_assets(
-    repo_name: str = "NVIDIA/Megatron-LM", assets_dir: str = "assets"
-) -> None:
-    """
-    Fetch the oldest release of a GitHub repository and list its assets.
-
-    Args:
-        repo_name: The repository name in format "owner/repo"
-        assets_dir: Directory to extract assets to
-    """
-    try:
-        # Initialize GitHub client
-        g = Github(login_or_token=os.getenv('GH_TOKEN', None))
-
-        # Get the repository
-        repo = g.get_repo(repo_name)
-        print(f"Repository: {repo.full_name}")
-        print(f"Description: {repo.description}")
-        print(f"URL: {repo.html_url}")
-        print("-" * 80)
-
-        # Get all releases
-        releases = list(repo.get_releases())
-
-        if not releases:
-            print("No releases found for this repository.")
-            return
-
-        # Sort releases by creation date to find the oldest
-        releases.sort(key=lambda x: x.created_at)
-        oldest_release = releases[0]
-
-        print(f"Oldest Release:")
-        print(f"  Tag: {oldest_release.tag_name}")
-        print(f"  Title: {oldest_release.title}")
-        print(f"  Created: {oldest_release.created_at}")
-        print(f"  Published: {oldest_release.published_at}")
-        print(f"  Draft: {oldest_release.draft}")
-        print(f"  Prerelease: {oldest_release.prerelease}")
-        print(f"  URL: {oldest_release.html_url}")
-
-        if oldest_release.body:
-            print(f"  Description: {oldest_release.body[:200]}...")
-
-        print("-" * 80)
-
-        # List assets
-        assets = list(oldest_release.get_assets())
-
-        if not assets:
-            print("No assets found for this release.")
-            return
-
-        print(f"Assets ({len(assets)} total):")
-        print("-" * 80)
-
-        for i, asset in enumerate(assets, 1):
-            print(f"{i}. {asset.name}")
-            print(f"   Size: {asset.size} bytes ({asset.size / 1024 / 1024:.2f} MB)")
-            print(f"   Downloads: {asset.download_count}")
-            print(f"   Content Type: {asset.content_type}")
-            print(f"   URL: {asset.browser_download_url}")
-            print(f"   Created: {asset.created_at}")
-            print(f"   Updated: {asset.updated_at}")
-            print()
-
-        # Summary
-        total_size = sum(asset.size for asset in assets)
-        total_downloads = sum(asset.download_count for asset in assets)
-
-        print(f"Summary:")
-        print(f"  Total assets: {len(assets)}")
-        print(f"  Total size: {total_size} bytes ({total_size / 1024 / 1024:.2f} MB)")
-        print(f"  Total downloads: {total_downloads}")
-
-        # Download and extract assets if requested
-        if assets:
-            print("-" * 80)
-            print("Downloading and extracting assets...")
-
-            # Create assets directory
-            assets_path = Path(assets_dir)
-            assets_path.mkdir(parents=True, exist_ok=True)
-            print(f"Created assets directory: {assets_path.absolute()}")
-
-            successful_downloads = 0
-            for asset in assets:
-                print(f"\nProcessing asset: {asset.name}")
-                if download_and_extract_asset(asset.browser_download_url, asset.name, assets_path):
-                    successful_downloads += 1
-
-            print(f"\nDownload Summary:")
-            print(
-                f"  Successfully downloaded and extracted: {successful_downloads}/{len(assets)} assets"
-            )
-            print(f"  Assets directory: {assets_path.absolute()}")
-
-    except Exception as e:
-        print(f"Error: {e}")
-        sys.exit(1)
+    for asset in ASSETS:
+        asset_name, asset_url = asset.values()
+        try:
+            # Download the asset
+            logger.info(f"  Downloading {asset_name}...")
+            response = requests.get(asset_url, stream=True)
+            response.raise_for_status()
+
+            # Save to temporary file
+            temp_file = assets_dir / asset_name
+            with open(temp_file, 'wb') as f:
+                for chunk in response.iter_content(chunk_size=8192):
+                    f.write(chunk)
+
+            logger.info(f"  Extracting {asset_name} to {assets_dir}...")
+
+            # Extract based on file type
+            if asset_name.endswith('.zip'):
+                with zipfile.ZipFile(temp_file, 'r') as zip_ref:
+                    zip_ref.extractall(assets_dir)
+            elif asset_name.endswith(('.tar.gz', '.tgz')):
+                with tarfile.open(temp_file, 'r:gz') as tar_ref:
+                    tar_ref.extractall(assets_dir)
+            elif asset_name.endswith('.tar'):
+                with tarfile.open(temp_file, 'r') as tar_ref:
+                    tar_ref.extractall(assets_dir)
+            else:
+                logger.warning(
+                    f"  Warning: Unknown file type for {asset_name}, skipping extraction"
+                )
+
+            # Clean up temporary file
+            temp_file.unlink()
+            logger.info(f"  Successfully extracted to {assets_dir}")
+
+        except Exception as e:
+            logger.error(f"  Error downloading/extracting {asset_name}: {e}")
 
 
 @click.command()
@@ -174,10 +87,12 @@ def get_oldest_release_and_assets(
 @click.option('--assets-dir', default='assets', help='Directory to extract assets to')
 def main(repo, assets_dir):
     """Fetch the oldest release of a GitHub repository and download its assets."""
-    print(f"Fetching oldest release of {repo}...")
-    print("=" * 80)
+    logger.info(f"Fetching oldest release of {repo}...")
+    logger.info("=" * 80)
+
+    Path(assets_dir).mkdir(parents=True, exist_ok=True)
 
-    get_oldest_release_and_assets(repo_name=repo, assets_dir=assets_dir)
+    download_and_extract_asset(Path(assets_dir))
 
 
 if __name__ == "__main__":
diff --git a/tests/unit_tests/conftest.py b/tests/unit_tests/conftest.py
index 611f9ae6098..e251a3c1e7e 100644
--- a/tests/unit_tests/conftest.py
+++ b/tests/unit_tests/conftest.py
@@ -1,5 +1,6 @@
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
 import os
-import sys
 from pathlib import Path
 
 import pytest
@@ -8,9 +9,7 @@
 
 from megatron.core import config
 from megatron.core.utils import is_te_min_version
-from tests.test_utils.python_scripts.download_unit_tests_dataset import (
-    get_oldest_release_and_assets,
-)
+from tests.test_utils.python_scripts.download_unit_tests_dataset import download_and_extract_asset
 from tests.unit_tests.dist_checkpointing import TempNamedDir
 from tests.unit_tests.test_utilities import Utils
 
@@ -83,7 +82,7 @@ def ensure_test_data():
 
         try:
             # Download assets to /opt/data
-            get_oldest_release_and_assets(assets_dir=str(data_path))
+            download_and_extract_asset(assets_dir=str(data_path))
 
             print("Test data downloaded successfully.")
 
diff --git a/uv.lock b/uv.lock
index f636a791f12..af8e548b625 100644
--- a/uv.lock
+++ b/uv.lock
@@ -2,50 +2,16 @@ version = 1
 revision = 2
 requires-python = ">=3.10"
 resolution-markers = [
-    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.12.*' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.12.*' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.11.*' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.11.*' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version < '3.11' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version < '3.11' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.12.*' and sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.12.*' and sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.11.*' and sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.11.*' and sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version < '3.11' and sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version < '3.11' and sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.12.*' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.12.*' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.12.*' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.12.*' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.11.*' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.11.*' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version < '3.11' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version < '3.11' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version >= '3.14' and sys_platform == 'linux'",
+    "python_full_version == '3.13.*' and sys_platform == 'linux'",
+    "python_full_version == '3.12.*' and sys_platform == 'linux'",
+    "python_full_version >= '3.14' and sys_platform != 'linux'",
+    "python_full_version == '3.13.*' and sys_platform != 'linux'",
+    "python_full_version == '3.12.*' and sys_platform != 'linux'",
+    "python_full_version == '3.11.*' and sys_platform == 'linux'",
+    "python_full_version == '3.11.*' and sys_platform != 'linux'",
+    "python_full_version < '3.11' and sys_platform == 'linux'",
+    "python_full_version < '3.11' and sys_platform != 'linux'",
 ]
 conflicts = [[
     { package = "megatron-core", extra = "dev" },
@@ -82,7 +48,7 @@ wheels = [
 
 [[package]]
 name = "aiobotocore"
-version = "2.25.1"
+version = "2.26.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "aiohttp" },
@@ -91,11 +57,11 @@ dependencies = [
     { name = "jmespath" },
     { name = "multidict" },
     { name = "python-dateutil" },
-    { name = "wrapt", version = "1.17.3", source = { registry = "https://pypi.org/simple" } },
+    { name = "wrapt" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/62/94/2e4ec48cf1abb89971cb2612d86f979a6240520f0a659b53a43116d344dc/aiobotocore-2.25.1.tar.gz", hash = "sha256:ea9be739bfd7ece8864f072ec99bb9ed5c7e78ebb2b0b15f29781fbe02daedbc", size = 120560, upload-time = "2025-10-28T22:33:21.787Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/4d/f8/99fa90d9c25b78292899fd4946fce97b6353838b5ecc139ad8ba1436e70c/aiobotocore-2.26.0.tar.gz", hash = "sha256:50567feaf8dfe2b653570b4491f5bc8c6e7fb9622479d66442462c021db4fadc", size = 122026, upload-time = "2025-11-28T07:54:59.956Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/95/2a/d275ec4ce5cd0096665043995a7d76f5d0524853c76a3d04656de49f8808/aiobotocore-2.25.1-py3-none-any.whl", hash = "sha256:eb6daebe3cbef5b39a0bb2a97cffbe9c7cb46b2fcc399ad141f369f3c2134b1f", size = 86039, upload-time = "2025-10-28T22:33:19.949Z" },
+    { url = "https://files.pythonhosted.org/packages/b7/58/3bf0b7d474607dc7fd67dd1365c4e0f392c8177eaf4054e5ddee3ebd53b5/aiobotocore-2.26.0-py3-none-any.whl", hash = "sha256:a793db51c07930513b74ea7a95bd79aaa42f545bdb0f011779646eafa216abec", size = 87333, upload-time = "2025-11-28T07:54:58.457Z" },
 ]
 
 [[package]]
@@ -229,11 +195,11 @@ wheels = [
 
 [[package]]
 name = "aioitertools"
-version = "0.12.0"
+version = "0.13.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/06/de/38491a84ab323b47c7f86e94d2830e748780525f7a10c8600b67ead7e9ea/aioitertools-0.12.0.tar.gz", hash = "sha256:c2a9055b4fbb7705f561b9d86053e8af5d10cc845d22c32008c43490b2d8dd6b", size = 19369, upload-time = "2024-09-02T03:33:40.349Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/fd/3c/53c4a17a05fb9ea2313ee1777ff53f5e001aefd5cc85aa2f4c2d982e1e38/aioitertools-0.13.0.tar.gz", hash = "sha256:620bd241acc0bbb9ec819f1ab215866871b4bbd1f73836a55f799200ee86950c", size = 19322, upload-time = "2025-11-06T22:17:07.609Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/85/13/58b70a580de00893223d61de8fea167877a3aed97d4a5e1405c9159ef925/aioitertools-0.12.0-py3-none-any.whl", hash = "sha256:fc1f5fac3d737354de8831cbba3eb04f79dd649d8f3afb4c5b114925e662a796", size = 24345, upload-time = "2024-09-02T03:34:59.454Z" },
+    { url = "https://files.pythonhosted.org/packages/10/a1/510b0a7fadc6f43a6ce50152e69dbd86415240835868bb0bd9b5b88b1e06/aioitertools-0.13.0-py3-none-any.whl", hash = "sha256:0be0292b856f08dfac90e31f4739432f4cb6d7520ab9eb73e143f4f2fa5259be", size = 24182, upload-time = "2025-11-06T22:17:06.502Z" },
 ]
 
 [[package]]
@@ -269,11 +235,11 @@ wheels = [
 
 [[package]]
 name = "annotated-doc"
-version = "0.0.3"
+version = "0.0.4"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/d7/a6/dc46877b911e40c00d395771ea710d5e77b6de7bacd5fdcd78d70cc5a48f/annotated_doc-0.0.3.tar.gz", hash = "sha256:e18370014c70187422c33e945053ff4c286f453a984eba84d0dbfa0c935adeda", size = 5535, upload-time = "2025-10-24T14:57:10.718Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/57/ba/046ceea27344560984e26a590f90bc7f4a75b06701f653222458922b558c/annotated_doc-0.0.4.tar.gz", hash = "sha256:fbcda96e87e9c92ad167c2e53839e57503ecfda18804ea28102353485033faa4", size = 7288, upload-time = "2025-11-10T22:07:42.062Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/02/b7/cf592cb5de5cb3bade3357f8d2cf42bf103bbe39f459824b4939fd212911/annotated_doc-0.0.3-py3-none-any.whl", hash = "sha256:348ec6664a76f1fd3be81f43dffbee4c7e8ce931ba71ec67cc7f4ade7fbbb580", size = 5488, upload-time = "2025-10-24T14:57:09.462Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/d3/26bf1008eb3d2daa8ef4cacc7f3bfdc11818d111f7e2d0201bc6e3b49d45/annotated_doc-0.0.4-py3-none-any.whl", hash = "sha256:571ac1dc6991c450b25a9c2d84a3705e2ae7a53467b5d111c24fa8baabbed320", size = 5303, upload-time = "2025-11-10T22:07:40.673Z" },
 ]
 
 [[package]]
@@ -308,44 +274,38 @@ wheels = [
 
 [[package]]
 name = "apache-tvm-ffi"
-version = "0.1.1"
+version = "0.1.3"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/d8/e8/7db1ca6db40877d190a8538cc378f740aae247c6fe063815898607c2d2ca/apache_tvm_ffi-0.1.1.tar.gz", hash = "sha256:728ce3f4ae02b89a7147b718f7f670afac3c6d1f96df38d488757274643709fc", size = 1259223, upload-time = "2025-11-04T02:43:38.154Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/8a/ad/550aff4c9652ee8297f90a04c3ab4143ece1d373101010d85b5c9a9a2e7d/apache_tvm_ffi-0.1.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:af0de7bb9581ac9e090276cba37c4e7ffaeed601a2b2b546bf0e2daed3810cec", size = 1723658, upload-time = "2025-11-04T02:42:37.628Z" },
-    { url = "https://files.pythonhosted.org/packages/48/5a/01e65f4a6c2b146f7c40f6d8d663d76b60c3be324159f8fb8223ea505738/apache_tvm_ffi-0.1.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:eb7d6828652803cb8c0e13d1f06d01fc6bfb8e79e77e3de7e6fd4b5fae5ee9d2", size = 1882437, upload-time = "2025-11-04T02:42:39.647Z" },
-    { url = "https://files.pythonhosted.org/packages/6b/bd/b52b71d03637d7a82388c2e90d48dddec2c46121be1333c9851d6a135824/apache_tvm_ffi-0.1.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1fe072b55a7949720a792a9d455c0659aa097825e709a16a4667d720137b8b5c", size = 1954949, upload-time = "2025-11-04T02:42:41.119Z" },
-    { url = "https://files.pythonhosted.org/packages/ac/ef/ff85926928694785f2399a4c5b793bcfecf8c3cf806dedf9202b7db73b8b/apache_tvm_ffi-0.1.1-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b25178b265903dabd9a35bd767db26928be3b7869f681fe1d6e1aed93d7c0799", size = 1837395, upload-time = "2025-11-04T02:42:42.954Z" },
-    { url = "https://files.pythonhosted.org/packages/de/69/f048bda5e5445a89200737062a202cb39097d3b1902e886654de9cd6b624/apache_tvm_ffi-0.1.1-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d5552af3c625750361d1b7d646d499a28caf94858967e74c9cce6ed7d4629b28", size = 1947740, upload-time = "2025-11-04T02:42:44.49Z" },
-    { url = "https://files.pythonhosted.org/packages/dc/df/295f71613502edeb39a39b30c8bbb9ec8fcc06bd95b3043dd99b55fa98a8/apache_tvm_ffi-0.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:c102ba5899ce106c8068a3f21155c106790b5b0141fba52a52ed6e9aeb286aff", size = 1710966, upload-time = "2025-11-04T02:42:46.037Z" },
-    { url = "https://files.pythonhosted.org/packages/8f/a9/544767d7058f825c0ceb5bc25760ad3a821b2efcc6a3dbe2e3988a3aee86/apache_tvm_ffi-0.1.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7cbf31c472920cdc5b3f75f2d2720b8a6b37ddbdb11d573fa94524815ea5a144", size = 1725662, upload-time = "2025-11-04T02:42:47.528Z" },
-    { url = "https://files.pythonhosted.org/packages/54/c3/fe1a9f8968d5ce2d3b674e397c2bf01961e32a72b723817478c67c9780e3/apache_tvm_ffi-0.1.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d7602bc37019387a4705677b6e742059c7e1973a899b6918af235febcb3d3b47", size = 1884278, upload-time = "2025-11-04T02:42:48.998Z" },
-    { url = "https://files.pythonhosted.org/packages/24/b9/80cbba18b2d7d9013031d8c13671986912275b9ca6aaea70a1dd9b361c39/apache_tvm_ffi-0.1.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7941f82a2ae4549f55c07d82d37c5765628d70f29dace98628393fcea525e870", size = 1957018, upload-time = "2025-11-04T02:42:50.538Z" },
-    { url = "https://files.pythonhosted.org/packages/b4/0c/d27beb98d6841a3929468648433ed2c53e4da953fadb73c754b9372b2356/apache_tvm_ffi-0.1.1-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2e0d6d8e0888ee3a3defd2cbe1eff7a65c05900b4e8fa0e18c890048fc6a44a6", size = 1839279, upload-time = "2025-11-04T02:42:52.438Z" },
-    { url = "https://files.pythonhosted.org/packages/0f/10/d7cf7779c65047ad2ca652234a174c2908d936cb69bc4f5156e17382fa91/apache_tvm_ffi-0.1.1-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:549c2150e1c2d7ca7912cad173f62a192aec90cd981c024bd246161283ea5d78", size = 1950476, upload-time = "2025-11-04T02:42:54.159Z" },
-    { url = "https://files.pythonhosted.org/packages/53/71/bb5ee4bca52a37a8f9580ab1f1de1be5366808a194981c324a756dabbe15/apache_tvm_ffi-0.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:3fbcfe526b458bc8edeafdc769388782d3bb4321c46a987e50bcece93ae78af8", size = 1711278, upload-time = "2025-11-04T02:42:55.56Z" },
-    { url = "https://files.pythonhosted.org/packages/d1/1e/f8d16dbe2303d1e7348037b4207d6c1093c554573484c97c8f3cde61a060/apache_tvm_ffi-0.1.1-cp312-abi3-macosx_11_0_arm64.whl", hash = "sha256:f2c0164a5c6286f9c333ddedeb448b855cbc1225688d0a4c9aeab006ddfa1180", size = 1701072, upload-time = "2025-11-04T02:42:57.28Z" },
-    { url = "https://files.pythonhosted.org/packages/3d/47/f7a55e9b5b741f901ed9101a3ef46fd250f2c1519a6479e055432ff4f308/apache_tvm_ffi-0.1.1-cp312-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:33cc35738e0c44f2a67e550457b6b7dc7de9109ca64422a9e7063b1ba43c336e", size = 1854467, upload-time = "2025-11-04T02:43:00.158Z" },
-    { url = "https://files.pythonhosted.org/packages/f2/db/f3adbe1e2d092fbb18908971a25ceb5496669ec65d01a28b7dd57f471ae0/apache_tvm_ffi-0.1.1-cp312-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e9db6484259120b1bdc600f736084ee3d574775b1f4a3e8fef110323e3a9d2b6", size = 1930968, upload-time = "2025-11-04T02:43:01.96Z" },
-    { url = "https://files.pythonhosted.org/packages/3b/da/7f678675ccc8af1c7d313322f3875e2c829f1faaa58c0d982431beeb3b3e/apache_tvm_ffi-0.1.1-cp312-abi3-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c7bd812058ce9046cb69fd7b3e18538d1d0eefa1719822a1441b00bb841f7af4", size = 1811173, upload-time = "2025-11-04T02:43:03.404Z" },
-    { url = "https://files.pythonhosted.org/packages/e1/11/c8b3b7d69ceebd219dcb06f5e4a3997edea3bc2e0bbdd8f57ae65bba4f2f/apache_tvm_ffi-0.1.1-cp312-abi3-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:807def3039fb336a228c120ca8c32eb794bdfd2d7aff218c8611f287ad913736", size = 1922690, upload-time = "2025-11-04T02:43:04.846Z" },
-    { url = "https://files.pythonhosted.org/packages/fd/0b/f816735d761049e53eb388264238655f58fcb42a31e0d1848a4fb6a6556b/apache_tvm_ffi-0.1.1-cp312-abi3-win_amd64.whl", hash = "sha256:624b4430ca3949f85fffd9ef498ebaf1155ff0ac659fc764eec6c6fd66ec7986", size = 1690969, upload-time = "2025-11-04T02:43:06.581Z" },
-    { url = "https://files.pythonhosted.org/packages/12/aa/df81df8f8b39d3c41fbac41b1e6661d192d9987a3ef317fabcefecf727a6/apache_tvm_ffi-0.1.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c93d9de81c1ba9560fcc696cf84d777f88016eb53f05ee2d6288ddcb95a5e72f", size = 1732582, upload-time = "2025-11-04T02:43:08.042Z" },
-    { url = "https://files.pythonhosted.org/packages/a8/55/861090532e4accd855e119f0e67e0e482b42abb866c9505edd8956148ebc/apache_tvm_ffi-0.1.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f9e0227179a0ce83384132b34757fd05f492270f1c031eae615870a5641b5039", size = 1870196, upload-time = "2025-11-04T02:43:09.911Z" },
-    { url = "https://files.pythonhosted.org/packages/2a/c6/470493934559e371ad699e1764649176efc5e022267c6dd0a565217177ad/apache_tvm_ffi-0.1.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:78e75e193d675b9639e6fd0c33c60c3a4259d4c9f848f60baa6a3194df7e1fea", size = 1941999, upload-time = "2025-11-04T02:43:11.467Z" },
-    { url = "https://files.pythonhosted.org/packages/85/b8/84eba0d266c9b10beae59a6863ef5c68044e20a6f12d46a42116e80db774/apache_tvm_ffi-0.1.1-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:49792622720421525a18e378d848411731d32fcb05a00b6e54b84d05ff46cc22", size = 1823965, upload-time = "2025-11-04T02:43:12.941Z" },
-    { url = "https://files.pythonhosted.org/packages/64/73/ca73a43260a1374b1f34d0e6fcf6f8af16f66867a89dfd562b26184af1bd/apache_tvm_ffi-0.1.1-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:039293086d44e7f601bf8231e369198afe7ad38986330969ddb1a5fc7622976b", size = 1933779, upload-time = "2025-11-04T02:43:14.543Z" },
-    { url = "https://files.pythonhosted.org/packages/5b/91/687c3b9ff3313addeebc1188ac50b299a82944ef1784b91890fc6f250ebd/apache_tvm_ffi-0.1.1-cp314-cp314t-win_amd64.whl", hash = "sha256:3f6cbd214bee2e52719d5264f05a2685c955ae7b096980f0361d917a5a9f47a6", size = 1751905, upload-time = "2025-11-04T02:43:16.286Z" },
-]
-
-[[package]]
-name = "asciitree"
-version = "0.3.3"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/2d/6a/885bc91484e1aa8f618f6f0228d76d0e67000b0fdd6090673b777e311913/asciitree-0.3.3.tar.gz", hash = "sha256:4aa4b9b649f85e3fcb343363d97564aa1fb62e249677f2e18a96765145cc0f6e", size = 3951, upload-time = "2016-09-05T19:10:42.681Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/f9/f0/af641a18833f35b37f01ecbdbf9baa0095805475adf8cd52ebeb7698fa8c/apache_tvm_ffi-0.1.3.tar.gz", hash = "sha256:d33f0bc0d028cddf321d69724c916504272a7f03dfc1d8e507d9d0f88b6f7cbf", size = 1276869, upload-time = "2025-11-21T05:11:00.562Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/68/13/ad0af6fb5203df6c92e404c5465d44a60bae7de0741a93fb1a3b4829692e/apache_tvm_ffi-0.1.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d8999f431b3acd04a2d79f38e2ebfbb089d0f43ed87528674d7bda6d3f796ddc", size = 1743043, upload-time = "2025-11-21T05:10:05.255Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/64/f362d0010daacea93a928de0c31df6b7d40ef8cd57e9117535ee0adc2704/apache_tvm_ffi-0.1.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:81f187d08d9040ec98b22fb6906c68b1df60b41567f2b507293f53f630b0136f", size = 1895551, upload-time = "2025-11-21T05:10:07.223Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/98/daa0f491312ebe4dccc7d84799c0b5b1bc5eee6b1093208a4fbb98175579/apache_tvm_ffi-0.1.3-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:dacfd2974a60a6b531a5fe8a3985f60368fc88a8ab3872c381fc1a80315d3d24", size = 1969790, upload-time = "2025-11-21T05:10:09.032Z" },
+    { url = "https://files.pythonhosted.org/packages/87/9c/68e30812874e60b141b99202dd3c4e4de964a7cb62cf6455de170b3a5111/apache_tvm_ffi-0.1.3-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ff65bf8a96dbbd2725937ff1502e52571e7a90d81d355a21a303328dd06449cc", size = 1844888, upload-time = "2025-11-21T05:10:10.871Z" },
+    { url = "https://files.pythonhosted.org/packages/49/97/ffe70c4679aebef0c1e32eec3970dc7e35113995d318aeb8c2ef0e4a3eb9/apache_tvm_ffi-0.1.3-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:48ad3df2224f1b0943344895c6cba2f3f0a53bc67ddafdd3e9d7a34f56100aa9", size = 1953886, upload-time = "2025-11-21T05:10:12.55Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/f3/e03e5716a4e025d060585a9ca3123ce76e13dff8f464cda4d5e48ef9a26a/apache_tvm_ffi-0.1.3-cp310-cp310-win_amd64.whl", hash = "sha256:6d56b2026aa614bd56d20375e5062ddb8d4baebd7a6b93476bbe3f0339cfa095", size = 1725820, upload-time = "2025-11-21T05:10:14.043Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/f0/d19a0b8e97e102f8376e18cd8234cc0a5f37d5c935ce74bf587e15f8450e/apache_tvm_ffi-0.1.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:fae211bb8693c118109e106b73393164e3ca878823185cfd6e03765e04056f37", size = 1742398, upload-time = "2025-11-21T05:10:15.384Z" },
+    { url = "https://files.pythonhosted.org/packages/5b/0c/699e26a3b7db2c1627ac87335deccf8a8b6cb2e218766fe9acd5aadb5f78/apache_tvm_ffi-0.1.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:79ff39b5d6a2ed8665f4b91282391a052e8c7c76ac0f12f776ad0747f212f201", size = 1895272, upload-time = "2025-11-21T05:10:17.164Z" },
+    { url = "https://files.pythonhosted.org/packages/22/39/f64a1f1a23dc3298d3f50ceb275eb9b98b6898ea3df52e6d95fed756610c/apache_tvm_ffi-0.1.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e2cc20f00d98e263ca35fef9a139fe65992988deddd570498ff77c11780ce22e", size = 1969033, upload-time = "2025-11-21T05:10:18.855Z" },
+    { url = "https://files.pythonhosted.org/packages/51/dc/fb9e25b83a57ae7b4df7308d839febf13d2e77b481ea79800e89f1eee470/apache_tvm_ffi-0.1.3-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b5b2d1c8c421aaa0685fcc77347566da68e45d8d2dc150c2ee957906b1186d62", size = 1844972, upload-time = "2025-11-21T05:10:20.201Z" },
+    { url = "https://files.pythonhosted.org/packages/63/f2/ef1521e617254c2fe38b2f60440694de426b2402b225e1cc4ae04e9a22c2/apache_tvm_ffi-0.1.3-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:adbc2f3b496d67199adaa999baecb9a3c9137cf1fc32163a4834950062bd0dd7", size = 1954220, upload-time = "2025-11-21T05:10:21.571Z" },
+    { url = "https://files.pythonhosted.org/packages/96/7c/1cadf17119f75b4d22761f8c003a767e63d456aac3f738ae42403ef7d990/apache_tvm_ffi-0.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:d797b29f70ea8c1843f4141a6b12b9770579a2b770f76898a96b721d2f987a23", size = 1725528, upload-time = "2025-11-21T05:10:23.043Z" },
+    { url = "https://files.pythonhosted.org/packages/21/b4/9983c1df90d239cc15055469c795a894bab85ffd75f9325d2f5e392dbf09/apache_tvm_ffi-0.1.3-cp312-abi3-macosx_11_0_arm64.whl", hash = "sha256:71d1de0c139cae3824c1e8b511acf6b2bfd37deccfc640cb83b80ba17b33d6e3", size = 1719369, upload-time = "2025-11-21T05:10:24.768Z" },
+    { url = "https://files.pythonhosted.org/packages/01/e3/1b47af4391863351d9db42ab1ed116e3eba2c4ef49c1e161e4cd0ba379d9/apache_tvm_ffi-0.1.3-cp312-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:b0bc38da581c54c862840960c5bf0da5bb78aa007630d6f026675d1d4b1df898", size = 1867353, upload-time = "2025-11-21T05:10:26.481Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/6e/0d12246b90534be733accdfbfe6e2d5bde8d7c722293c21821fe10b09412/apache_tvm_ffi-0.1.3-cp312-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:48160e8fa0235e8f3fad45102c4e856edb798c8b2954603f80f6721e3c0fd7ef", size = 1945829, upload-time = "2025-11-21T05:10:27.831Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/89/c4ad96b76a6e2d38795871bfb048c74aa60d1a7c01fab48cbe4e8c10f1a2/apache_tvm_ffi-0.1.3-cp312-abi3-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b1c215d4608e17d7f2382f3c6b2903a4696255727ac905041f3a005c50a98afc", size = 1817481, upload-time = "2025-11-21T05:10:29.543Z" },
+    { url = "https://files.pythonhosted.org/packages/7b/c7/2f6bc83fcc987c2eb00037c3f27f1d182c2f0d8976a16807ef1395a8ece1/apache_tvm_ffi-0.1.3-cp312-abi3-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b75cc773bc29db64bb69f11d260ec66e88ad0a4a951d25650f69d3b2c9f9a186", size = 1927595, upload-time = "2025-11-21T05:10:30.882Z" },
+    { url = "https://files.pythonhosted.org/packages/12/a0/597c522588abef7fcf3fe38492cf832eed8ba9123f01d3c33dfaec174dcc/apache_tvm_ffi-0.1.3-cp312-abi3-win_amd64.whl", hash = "sha256:86fd1e1012ec2ec25213f714f5f28e6f6b897360776872d5f71c4be8cae8aeb8", size = 1706236, upload-time = "2025-11-21T05:10:32.25Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/76/8404875ee3fb61a3c97026e2eaab8d97e7f974601e444d5abb37a765c686/apache_tvm_ffi-0.1.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:0ef290a792d6e3734e2fe1ff19b2b82e6bd3af6714216c7fe32d0a39c0d0e8df", size = 1750006, upload-time = "2025-11-21T05:10:33.594Z" },
+    { url = "https://files.pythonhosted.org/packages/98/98/7989ccb343044f97491cb1e46e675da75defc82a56495c320dcb1e31583b/apache_tvm_ffi-0.1.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:c7b137ab0c7ec6507f61e88885ddbd3541d7d14d8ca25938f5fa106ca06996d3", size = 1880792, upload-time = "2025-11-21T05:10:35.239Z" },
+    { url = "https://files.pythonhosted.org/packages/64/2e/f772e75f947ebfa2faa305980ba2c172ae26a53f66c8f0c1f8915c4fa690/apache_tvm_ffi-0.1.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d5187a90cf1c0663b8071f34f621f49ba83866412298deed9c4a94d1d991711b", size = 1953343, upload-time = "2025-11-21T05:10:36.879Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/a8/7d1d75f70d5a2cd283ded60784d9657c59fa7516f4b3c32437f70901d117/apache_tvm_ffi-0.1.3-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:54001ceab111e708a1638fd9e40713d9d55f6a073037a2d4a9f1982f8dda3c69", size = 1829560, upload-time = "2025-11-21T05:10:38.421Z" },
+    { url = "https://files.pythonhosted.org/packages/21/3a/6bee12cf517ace0bb8fd83bb72f6ca227743a49bab0c30918f523b5428df/apache_tvm_ffi-0.1.3-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:996d87d6f180250e734ce7b7cce39f234e3ad3369fffb3882c8f29c79d280db4", size = 1937457, upload-time = "2025-11-21T05:10:40.505Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/99/107f082536447dba2a628e1571dd423b577df6bd8e441896e3f8b0929001/apache_tvm_ffi-0.1.3-cp314-cp314t-win_amd64.whl", hash = "sha256:6010c918c62fb19995e70c4f149dfc5c248783da0d22d5c40e84649bd89a9357", size = 1766053, upload-time = "2025-11-21T05:10:41.859Z" },
+]
 
 [[package]]
 name = "astroid"
@@ -379,52 +339,59 @@ wheels = [
 
 [[package]]
 name = "av"
-version = "15.1.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/e9/c3/83e6e73d1592bc54436eae0bc61704ae0cff0c3cfbde7b58af9ed67ebb49/av-15.1.0.tar.gz", hash = "sha256:39cda2dc810e11c1938f8cb5759c41d6b630550236b3365790e67a313660ec85", size = 3774192, upload-time = "2025-08-30T04:41:56.076Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/3a/6a/91e3e68ae0d1b53b480ec69a96f2ae820fb007bc60e6b821741f31c7ba4e/av-15.1.0-cp310-cp310-macosx_13_0_arm64.whl", hash = "sha256:cf067b66cee2248220b29df33b60eb4840d9e7b9b75545d6b922f9c41d88c4ee", size = 21781685, upload-time = "2025-08-30T04:39:13.118Z" },
-    { url = "https://files.pythonhosted.org/packages/bc/6d/afa951b9cb615c3bc6d95c4eed280c6cefb52c006f4e15e79043626fab39/av-15.1.0-cp310-cp310-macosx_13_0_x86_64.whl", hash = "sha256:26426163d96fc3bde9a015ba4d60da09ef848d9284fe79b4ca5e60965a008fc5", size = 26962481, upload-time = "2025-08-30T04:39:16.875Z" },
-    { url = "https://files.pythonhosted.org/packages/3c/42/0c384884235c42c439cef28cbd129e4624ad60229119bf3c6c6020805119/av-15.1.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:92f524541ce74b8a12491d8934164a5c57e983da24826547c212f60123de400b", size = 37571839, upload-time = "2025-08-30T04:39:20.325Z" },
-    { url = "https://files.pythonhosted.org/packages/25/c0/5c967b0872fce1add80a8f50fa7ce11e3e3e5257c2b079263570bc854699/av-15.1.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:659f9d6145fb2c58e8b31907283b6ba876570f5dd6e7e890d74c09614c436c8e", size = 39070227, upload-time = "2025-08-30T04:39:24.079Z" },
-    { url = "https://files.pythonhosted.org/packages/e2/81/e333056d49363c35a74b828ed5f87c96dfbcc1a506b49d79a31ac773b94d/av-15.1.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:07a8ae30c0cfc3132eff320a6b27d18a5e0dda36effd0ae28892888f4ee14729", size = 39619362, upload-time = "2025-08-30T04:39:27.7Z" },
-    { url = "https://files.pythonhosted.org/packages/d5/ae/50cc2af1bf68452cbfec8d1b2554c18f6d167c8ba6d7ad7707797dfd1541/av-15.1.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:e33a76e38f03bb5de026b9f66ccf23dc01ddd2223221096992cb52ac22e62538", size = 40371627, upload-time = "2025-08-30T04:39:31.207Z" },
-    { url = "https://files.pythonhosted.org/packages/50/e6/381edf1779106dd31c9ef1ac9842f643af4465b8a87cbc278d3eaa76229a/av-15.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:aa4bf12bdce20edc2a3b13a2776c474c5ab63e1817d53793714504476eeba82e", size = 31340369, upload-time = "2025-08-30T04:39:34.774Z" },
-    { url = "https://files.pythonhosted.org/packages/47/58/4e44cf6939be7aba96a4abce024e1be11ba7539ecac74d09369b8c03aa05/av-15.1.0-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:b785948762a8d45fc58fc24a20251496829ace1817e9a7a508a348d6de2182c3", size = 21767323, upload-time = "2025-08-30T04:39:37.989Z" },
-    { url = "https://files.pythonhosted.org/packages/9b/f6/a946544cdb49f6d892d2761b1d61a8bc6ce912fe57ba06769bdc640c0a7f/av-15.1.0-cp311-cp311-macosx_13_0_x86_64.whl", hash = "sha256:9c7131494a3a318612b4ee4db98fe5bc50eb705f6b6536127c7ab776c524fd8b", size = 26946268, upload-time = "2025-08-30T04:39:40.601Z" },
-    { url = "https://files.pythonhosted.org/packages/70/7c/b33513c0af73d0033af59a98f035b521c5b93445a6af7e9efbf41a6e8383/av-15.1.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:2b9623ae848625c59213b610c8665817924f913580c7c5c91e0dc18936deb00d", size = 38062118, upload-time = "2025-08-30T04:39:43.928Z" },
-    { url = "https://files.pythonhosted.org/packages/5e/95/31b7fb34f9fea7c7389240364194f4f56ad2d460095038cc720f50a90bb3/av-15.1.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:c8ef597087db560514617143532b1fafc4825ebb2dda9a22418f548b113a0cc7", size = 39571086, upload-time = "2025-08-30T04:39:47.109Z" },
-    { url = "https://files.pythonhosted.org/packages/e7/b0/7b0b45474a4e90c35c11d0032947d8b3c7386872957ce29c6f12add69a74/av-15.1.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:08eac47a90ebae1e2bd5935f400dd515166019bab4ff5b03c4625fa6ac3a0a5e", size = 40112634, upload-time = "2025-08-30T04:39:50.981Z" },
-    { url = "https://files.pythonhosted.org/packages/aa/04/038b94bc9a1ee10a451c867d4a2fc91e845f83bfc2dae9df25893abcb57f/av-15.1.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:d3f66ff200ea166e606cb3c5cb1bd2fc714effbec2e262a5d67ce60450c8234a", size = 40878695, upload-time = "2025-08-30T04:39:54.493Z" },
-    { url = "https://files.pythonhosted.org/packages/1d/3d/9f8f96c0deeaaf648485a3dbd1699b2f0580f2ce8a36cb616c0138ba7615/av-15.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:57b99544d91121b8bea570e4ddf61700f679a6b677c1f37966bc1a22e1d4cd5c", size = 31335683, upload-time = "2025-08-30T04:39:57.861Z" },
-    { url = "https://files.pythonhosted.org/packages/d1/58/de78b276d20db6ffcd4371283df771721a833ba525a3d57e753d00a9fe79/av-15.1.0-cp312-cp312-macosx_13_0_arm64.whl", hash = "sha256:40c5df37f4c354ab8190c6fd68dab7881d112f527906f64ca73da4c252a58cee", size = 21760991, upload-time = "2025-08-30T04:40:00.801Z" },
-    { url = "https://files.pythonhosted.org/packages/56/cc/45f85775304ae60b66976360d82ba5b152ad3fd91f9267d5020a51e9a828/av-15.1.0-cp312-cp312-macosx_13_0_x86_64.whl", hash = "sha256:af455ce65ada3d361f80c90c810d9bced4db5655ab9aa513024d6c71c5c476d5", size = 26953097, upload-time = "2025-08-30T04:40:03.998Z" },
-    { url = "https://files.pythonhosted.org/packages/f3/f8/2d781e5e71d02fc829487e775ccb1185e72f95340d05f2e84eb57a11e093/av-15.1.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:86226d2474c80c3393fa07a9c366106029ae500716098b72b3ec3f67205524c3", size = 38319710, upload-time = "2025-08-30T04:40:07.701Z" },
-    { url = "https://files.pythonhosted.org/packages/ac/13/37737ef2193e83862ccacff23580c39de251da456a1bf0459e762cca273c/av-15.1.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:11326f197e7001c4ca53a83b2dbc67fd39ddff8cdf62ce6be3b22d9f3f9338bd", size = 39915519, upload-time = "2025-08-30T04:40:11.066Z" },
-    { url = "https://files.pythonhosted.org/packages/26/e9/e8032c7b8f2a4129a03f63f896544f8b7cf068e2db2950326fa2400d5c47/av-15.1.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a631ea879cc553080ee62874f4284765c42ba08ee0279851a98a85e2ceb3cc8d", size = 40286166, upload-time = "2025-08-30T04:40:14.561Z" },
-    { url = "https://files.pythonhosted.org/packages/e2/23/612c0fd809444d04b8387a2dfd942ccc77829507bd78a387ff65a9d98c24/av-15.1.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:8f383949b010c3e731c245f80351d19dc0c08f345e194fc46becb1cb279be3ff", size = 41150592, upload-time = "2025-08-30T04:40:17.951Z" },
-    { url = "https://files.pythonhosted.org/packages/15/74/6f8e38a3b0aea5f28e72813672ff45b64615f2c69e6a4a558718c95edb9f/av-15.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:d5921aa45f4c1f8c1a8d8185eb347e02aa4c3071278a2e2dd56368d54433d643", size = 31336093, upload-time = "2025-08-30T04:40:21.393Z" },
-    { url = "https://files.pythonhosted.org/packages/2e/bc/78b2ffa8235eeffc29aa4a8cc47b02e660cfec32f601f39a00975fb06d0e/av-15.1.0-cp313-cp313-macosx_13_0_arm64.whl", hash = "sha256:2f77853c3119c59d1bff4214ccbe46e3133eccff85ed96adee51c68684443f4e", size = 21726244, upload-time = "2025-08-30T04:40:24.14Z" },
-    { url = "https://files.pythonhosted.org/packages/1a/99/66d69453a2dce028e6e8ebea085d90e880aac03d3a3ab7d8ec16755ffd75/av-15.1.0-cp313-cp313-macosx_13_0_x86_64.whl", hash = "sha256:c0bc4471c156a0a1c70a607502434f477bc8dfe085eef905e55b4b0d66bcd3a5", size = 26918663, upload-time = "2025-08-30T04:40:27.557Z" },
-    { url = "https://files.pythonhosted.org/packages/fa/51/1a7dfbeda71f2772bc46d758af0e7fab1cc8388ce4bc7f24aecbc4bfd764/av-15.1.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:37839d4fa1407f047af82560dfc0f94d8d6266071eff49e1cbe16c4483054621", size = 38041408, upload-time = "2025-08-30T04:40:30.811Z" },
-    { url = "https://files.pythonhosted.org/packages/d7/97/2c4e0288ad4359b6064cb06ae79c2ff3a84ac73d27e91f2161b75fcd86fa/av-15.1.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:729179cd8622815e8b6f6854d13a806fe710576e08895c77e5e4ad254609de9a", size = 39642563, upload-time = "2025-08-30T04:40:34.617Z" },
-    { url = "https://files.pythonhosted.org/packages/ea/94/2362502149e276d00957edabcc201a5f4d5109a8a7b4fd30793714a532f3/av-15.1.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4abdf085bfa4eec318efccff567831b361ea56c045cc38366811552e3127c665", size = 40022119, upload-time = "2025-08-30T04:40:37.703Z" },
-    { url = "https://files.pythonhosted.org/packages/df/58/1a0ce1b3835d9728da0a7a54aeffaa0a2b1a88405eaed9322efd55212a54/av-15.1.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f985661644879e4520d28a995fcb2afeb951bc15a1d51412eb8e5f36da85b6fe", size = 40885158, upload-time = "2025-08-30T04:40:40.952Z" },
-    { url = "https://files.pythonhosted.org/packages/30/e6/054bb64e424d90b77ed5fc6a7358e4013fb436154c998fc90a89a186313f/av-15.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:7d7804a44c8048bb4b014a99353dd124663a12cd1d4613ba2bd3b457c3b1d539", size = 31312256, upload-time = "2025-08-30T04:40:44.224Z" },
-    { url = "https://files.pythonhosted.org/packages/6f/8b/89eae6dca10d7d2b83c131025a31ccc750be78699ac0304439faa1d1df99/av-15.1.0-cp314-cp314-macosx_13_0_arm64.whl", hash = "sha256:5dd73c6447947edcb82e5fecf96e1f146aeda0f169c7ad4c54df4d9f66f63fde", size = 21730645, upload-time = "2025-08-30T04:40:47.259Z" },
-    { url = "https://files.pythonhosted.org/packages/a3/f0/abffaf69405ed68041524be12a1e294faf396971d6a0e70eb00e93687df7/av-15.1.0-cp314-cp314-macosx_13_0_x86_64.whl", hash = "sha256:a81cd515934a5d51290aa66b059b7ed29c4a212e704f3c5e99e32877ff1c312c", size = 26913753, upload-time = "2025-08-30T04:40:50.445Z" },
-    { url = "https://files.pythonhosted.org/packages/37/9e/7af078bcfc3cd340c981ac5d613c090ab007023d2ac13b05acd52f22f069/av-15.1.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:57cc7a733a7e7d7a153682f35c9cf5d01e8269367b049c954779de36fc3d0b10", size = 38027048, upload-time = "2025-08-30T04:40:54.076Z" },
-    { url = "https://files.pythonhosted.org/packages/02/76/1f9dac11ad713e3619288993ea04e9c9cf4ec0f04e5ee81e83b8129dd8f3/av-15.1.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:a77b75bdb6899a64302ff923a5246e0747b3f0a3ecee7d61118db407a22c3f53", size = 39565396, upload-time = "2025-08-30T04:40:57.84Z" },
-    { url = "https://files.pythonhosted.org/packages/8b/32/2188c46e2747247458ffc26b230c57dd28e61f65ff7b9e6223a411af5e98/av-15.1.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:d0a1154ce081f1720082a133cfe12356c59f62dad2b93a7a1844bf1dcd010d85", size = 40015050, upload-time = "2025-08-30T04:41:01.091Z" },
-    { url = "https://files.pythonhosted.org/packages/1e/41/b57fbce9994580619d7574817ece0fe0e7b822cde2af57904549d0150b8d/av-15.1.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:8a7bf5a34dee15c86790414fa86a144e6d0dcc788bc83b565fdcbc080b4fbc90", size = 40821225, upload-time = "2025-08-30T04:41:04.349Z" },
-    { url = "https://files.pythonhosted.org/packages/b1/36/e85cd1f0d3369c6764ad422882895d082f7ececb66d3df8aeae3234ef7a6/av-15.1.0-cp314-cp314-win_amd64.whl", hash = "sha256:e30c9a6fd9734784941384a2e25fad3c22881a7682f378914676aa7e795acdb7", size = 31311750, upload-time = "2025-08-30T04:41:07.744Z" },
-    { url = "https://files.pythonhosted.org/packages/80/d8/08a681758a4e49adfda409a6a35eff533f42654c6a6cfa102bc5cae1a728/av-15.1.0-cp314-cp314t-macosx_13_0_arm64.whl", hash = "sha256:60666833d7e65ebcfc48034a072de74349edbb62c9aaa3e6722fef31ca028eb6", size = 21828343, upload-time = "2025-08-30T04:41:10.81Z" },
-    { url = "https://files.pythonhosted.org/packages/4a/52/29bec3fe68669b21f7d1ab5d94e21f597b8dfd37f50a3e3c9af6a8da925c/av-15.1.0-cp314-cp314t-macosx_13_0_x86_64.whl", hash = "sha256:53fbdae45aa2a49a22e864ff4f4017416ef62c060a172085d3247ba0a101104e", size = 27001666, upload-time = "2025-08-30T04:41:13.822Z" },
-    { url = "https://files.pythonhosted.org/packages/9d/54/2c1d1faced66d708f5df328e800997cb47f90b500a214130c3a0f2ad601e/av-15.1.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:e6c51061667983dc801502aff9140bbc4f0e0d97f879586f17fb2f9a7e49c381", size = 39496753, upload-time = "2025-08-30T04:41:16.759Z" },
-    { url = "https://files.pythonhosted.org/packages/c3/76/06ded5e52c4dcc2d9b5184c6da8de5ea77bd7ecb79a59a2b9700f1984949/av-15.1.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:2f80ec387f04aa34868662b11018b5f09654ae1530a61e24e92a142a24b10b62", size = 40784729, upload-time = "2025-08-30T04:41:20.491Z" },
-    { url = "https://files.pythonhosted.org/packages/52/ef/797b76f3b39c99a96e387f501bbc07dca340b27d3dda12862fe694066b63/av-15.1.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:4975e03177d37d8165c99c8d494175675ba8acb72458fb5d7e43f746a53e0374", size = 41284953, upload-time = "2025-08-30T04:41:23.949Z" },
-    { url = "https://files.pythonhosted.org/packages/31/47/e4656f00e62fd059ea5a40b492dea784f5aecfe1dfac10c0d7a0664ce200/av-15.1.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:8f78f3dad11780b4cdd024cdb92ce43cb170929297c00f2f4555c2b103f51e55", size = 41985340, upload-time = "2025-08-30T04:41:27.561Z" },
-    { url = "https://files.pythonhosted.org/packages/b1/c9/15bb4fd7a1f39d70db35af2b9c20a0ae19e4220eb58a8b8446e903b98d72/av-15.1.0-cp314-cp314t-win_amd64.whl", hash = "sha256:9a20c5eba3ec49c2f4b281797021923fc68a86aeb66c5cda4fd0252fa8004951", size = 31487337, upload-time = "2025-08-30T04:41:30.591Z" },
+version = "16.0.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/15/c3/fd72a0315bc6c943ced1105aaac6e0ec1be57c70d8a616bd05acaa21ffee/av-16.0.1.tar.gz", hash = "sha256:dd2ce779fa0b5f5889a6d9e00fbbbc39f58e247e52d31044272648fe16ff1dbf", size = 3904030, upload-time = "2025-10-13T12:28:51.082Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e8/3c/eefa29b7d0f5afdf7af9197bbecad8ec2ad06bcb5ac7e909c05a624b00a6/av-16.0.1-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:8b141aaa29a3afc96a1d467d106790782c1914628b57309eaadb8c10c299c9c0", size = 27206679, upload-time = "2025-10-13T12:24:41.145Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/89/a474feb07d5b94aa5af3771b0fe328056e2e0a840039b329f4fa2a1fd13a/av-16.0.1-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:4b8a08a59a5be0082af063d3f4b216e3950340121c6ea95b505a3f5f5cc8f21d", size = 21774556, upload-time = "2025-10-13T12:24:44.332Z" },
+    { url = "https://files.pythonhosted.org/packages/be/e5/4361010dcac398bc224823e4b2a47803845e159af9f95164662c523770dc/av-16.0.1-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:792e7fc3c08eae005ff36486983966476e553cbb55aaeb0ec99adc4909377320", size = 38176763, upload-time = "2025-10-13T12:24:46.98Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/db/b27bdd20c9dc80de5b8792dae16dd6f4edf16408c0c7b28070c6228a8057/av-16.0.1-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:4e8ef5df76d8d0ee56139789f80bb90ad1a82a7e6df6e080e2e95c06fa22aea7", size = 39696277, upload-time = "2025-10-13T12:24:50.951Z" },
+    { url = "https://files.pythonhosted.org/packages/4e/c8/dd48e6a3ac1e922c141475a0dc30e2b6dfdef9751b3274829889a9281cce/av-16.0.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:4f7a6985784a7464f078e419c71f5528c3e550ee5d605e7149b4a37a111eb136", size = 39576660, upload-time = "2025-10-13T12:24:55.773Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/f0/223d047e2e60672a2fb5e51e28913de8d52195199f3e949cbfda1e6cd64b/av-16.0.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:3f45c8d7b803b6faa2a25a26de5964a0a897de68298d9c9672c7af9d65d8b48a", size = 40752775, upload-time = "2025-10-13T12:25:00.827Z" },
+    { url = "https://files.pythonhosted.org/packages/18/73/73acad21c9203bc63d806e8baf42fe705eb5d36dafd1996b71ab5861a933/av-16.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:58e6faf1d9328d8cc6be14c5aadacb7d2965ed6d6ae1af32696993096543ff00", size = 32302328, upload-time = "2025-10-13T12:25:06.042Z" },
+    { url = "https://files.pythonhosted.org/packages/49/d3/f2a483c5273fccd556dfa1fce14fab3b5d6d213b46e28e54e254465a2255/av-16.0.1-cp311-cp311-macosx_11_0_x86_64.whl", hash = "sha256:e310d1fb42879df9bad2152a8db6d2ff8bf332c8c36349a09d62cc122f5070fb", size = 27191982, upload-time = "2025-10-13T12:25:10.622Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/39/dff28bd252131b3befd09d8587992fe18c09d5125eaefc83a6434d5f56ff/av-16.0.1-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:2f4b357e5615457a84e6b6290916b22864b76b43d5079e1a73bc27581a5b9bac", size = 21760305, upload-time = "2025-10-13T12:25:14.882Z" },
+    { url = "https://files.pythonhosted.org/packages/4a/4d/2312d50a09c84a9b4269f7fea5de84f05dd2b7c7113dd961d31fad6c64c4/av-16.0.1-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:286665c77034c3a98080169b8b5586d5568a15da81fbcdaf8099252f2d232d7c", size = 38691616, upload-time = "2025-10-13T12:25:20.063Z" },
+    { url = "https://files.pythonhosted.org/packages/15/9a/3d2d30b56252f998e53fced13720e2ce809c4db477110f944034e0fa4c9f/av-16.0.1-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:f88de8e5b8ea29e41af4d8d61df108323d050ccfbc90f15b13ec1f99ce0e841e", size = 40216464, upload-time = "2025-10-13T12:25:24.848Z" },
+    { url = "https://files.pythonhosted.org/packages/98/cb/3860054794a47715b4be0006105158c7119a57be58d9e8882b72e4d4e1dd/av-16.0.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:0cdb71ebe4d1b241cf700f8f0c44a7d2a6602b921e16547dd68c0842113736e1", size = 40094077, upload-time = "2025-10-13T12:25:30.238Z" },
+    { url = "https://files.pythonhosted.org/packages/41/58/79830fb8af0a89c015250f7864bbd427dff09c70575c97847055f8a302f7/av-16.0.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:28c27a65d40e8cf82b6db2543f8feeb8b56d36c1938f50773494cd3b073c7223", size = 41279948, upload-time = "2025-10-13T12:25:35.24Z" },
+    { url = "https://files.pythonhosted.org/packages/83/79/6e1463b04382f379f857113b851cf5f9d580a2f7bd794211cd75352f4e04/av-16.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:ffea39ac7574f234f5168f9b9602e8d4ecdd81853238ec4d661001f03a6d3f64", size = 32297586, upload-time = "2025-10-13T12:25:39.826Z" },
+    { url = "https://files.pythonhosted.org/packages/44/78/12a11d7a44fdd8b26a65e2efa1d8a5826733c8887a989a78306ec4785956/av-16.0.1-cp312-cp312-macosx_11_0_x86_64.whl", hash = "sha256:e41a8fef85dfb2c717349f9ff74f92f9560122a9f1a94b1c6c9a8a9c9462ba71", size = 27206375, upload-time = "2025-10-13T12:25:44.423Z" },
+    { url = "https://files.pythonhosted.org/packages/27/19/3a4d3882852a0ee136121979ce46f6d2867b974eb217a2c9a070939f55ad/av-16.0.1-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:6352a64b25c9f985d4f279c2902db9a92424e6f2c972161e67119616f0796cb9", size = 21752603, upload-time = "2025-10-13T12:25:49.122Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/6e/f7abefba6e008e2f69bebb9a17ba38ce1df240c79b36a5b5fcacf8c8fcfd/av-16.0.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:5201f7b4b5ed2128118cb90c2a6d64feedb0586ca7c783176896c78ffb4bbd5c", size = 38931978, upload-time = "2025-10-13T12:25:55.021Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/7a/1305243ab47f724fdd99ddef7309a594e669af7f0e655e11bdd2c325dfae/av-16.0.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:daecc2072b82b6a942acbdaa9a2e00c05234c61fef976b22713983c020b07992", size = 40549383, upload-time = "2025-10-13T12:26:00.897Z" },
+    { url = "https://files.pythonhosted.org/packages/32/b2/357cc063185043eb757b4a48782bff780826103bcad1eb40c3ddfc050b7e/av-16.0.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6573da96e8bebc3536860a7def108d7dbe1875c86517072431ced702447e6aea", size = 40241993, upload-time = "2025-10-13T12:26:06.993Z" },
+    { url = "https://files.pythonhosted.org/packages/20/bb/ced42a4588ba168bf0ef1e9d016982e3ba09fde6992f1dda586fd20dcf71/av-16.0.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:4bc064e48a8de6c087b97dd27cf4ef8c13073f0793108fbce3ecd721201b2502", size = 41532235, upload-time = "2025-10-13T12:26:12.488Z" },
+    { url = "https://files.pythonhosted.org/packages/15/37/c7811eca0f318d5fd3212f7e8c3d8335f75a54907c97a89213dc580b8056/av-16.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0c669b6b6668c8ae74451c15ec6d6d8a36e4c3803dc5d9910f607a174dd18f17", size = 32296912, upload-time = "2025-10-13T12:26:19.187Z" },
+    { url = "https://files.pythonhosted.org/packages/86/59/972f199ccc4f8c9e51f59e0f8962a09407396b3f6d11355e2c697ba555f9/av-16.0.1-cp313-cp313-macosx_11_0_x86_64.whl", hash = "sha256:4c61c6c120f5c5d95c711caf54e2c4a9fb2f1e613ac0a9c273d895f6b2602e44", size = 27170433, upload-time = "2025-10-13T12:26:24.673Z" },
+    { url = "https://files.pythonhosted.org/packages/53/9d/0514cbc185fb20353ab25da54197fbd169a233e39efcbb26533c36a9dbb9/av-16.0.1-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:7ecc2e41320c69095f44aff93470a0d32c30892b2dbad0a08040441c81efa379", size = 21717654, upload-time = "2025-10-13T12:26:29.12Z" },
+    { url = "https://files.pythonhosted.org/packages/32/8c/881409dd124b4e07d909d2b70568acb21126fc747656390840a2238651c9/av-16.0.1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:036f0554d6faef3f4a94acaeb0cedd388e3ab96eb0eb5a14ec27c17369c466c9", size = 38651601, upload-time = "2025-10-13T12:26:33.919Z" },
+    { url = "https://files.pythonhosted.org/packages/35/fd/867ba4cc3ab504442dc89b0c117e6a994fc62782eb634c8f31304586f93e/av-16.0.1-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:876415470a62e4a3550cc38db2fc0094c25e64eea34d7293b7454125d5958190", size = 40278604, upload-time = "2025-10-13T12:26:39.2Z" },
+    { url = "https://files.pythonhosted.org/packages/b3/87/63cde866c0af09a1fa9727b4f40b34d71b0535785f5665c27894306f1fbc/av-16.0.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:56902a06bd0828d13f13352874c370670882048267191ff5829534b611ba3956", size = 39984854, upload-time = "2025-10-13T12:26:44.581Z" },
+    { url = "https://files.pythonhosted.org/packages/71/3b/8f40a708bff0e6b0f957836e2ef1f4d4429041cf8d99a415a77ead8ac8a3/av-16.0.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:fe988c2bf0fc2d952858f791f18377ea4ae4e19ba3504793799cd6c2a2562edf", size = 41270352, upload-time = "2025-10-13T12:26:50.817Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/b5/c114292cb58a7269405ae13b7ba48c7d7bfeebbb2e4e66c8073c065a4430/av-16.0.1-cp313-cp313-win_amd64.whl", hash = "sha256:708a66c248848029bf518f0482b81c5803846f1b597ef8013b19c014470b620f", size = 32273242, upload-time = "2025-10-13T12:26:55.788Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/e9/a5b714bc078fdcca8b46c8a0b38484ae5c24cd81d9c1703d3e8ae2b57259/av-16.0.1-cp313-cp313t-macosx_11_0_x86_64.whl", hash = "sha256:79a77ee452537030c21a0b41139bedaf16629636bf764b634e93b99c9d5f4558", size = 27248984, upload-time = "2025-10-13T12:27:00.564Z" },
+    { url = "https://files.pythonhosted.org/packages/06/ef/ff777aaf1f88e3f6ce94aca4c5806a0c360e68d48f9d9f0214e42650f740/av-16.0.1-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:080823a6ff712f81e7089ae9756fb1512ca1742a138556a852ce50f58e457213", size = 21828098, upload-time = "2025-10-13T12:27:05.433Z" },
+    { url = "https://files.pythonhosted.org/packages/34/d7/a484358d24a42bedde97f61f5d6ee568a7dd866d9df6e33731378db92d9e/av-16.0.1-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:04e00124afa8b46a850ed48951ddda61de874407fb8307d6a875bba659d5727e", size = 40051697, upload-time = "2025-10-13T12:27:10.525Z" },
+    { url = "https://files.pythonhosted.org/packages/73/87/6772d6080837da5d5c810a98a95bde6977e1f5a6e2e759e8c9292af9ec69/av-16.0.1-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:bc098c1c6dc4e7080629a7e9560e67bd4b5654951e17e5ddfd2b1515cfcd37db", size = 41352596, upload-time = "2025-10-13T12:27:16.217Z" },
+    { url = "https://files.pythonhosted.org/packages/bd/58/fe448c60cf7f85640a0ed8936f16bac874846aa35e1baa521028949c1ea3/av-16.0.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:e6ffd3559a72c46a76aa622630751a821499ba5a780b0047ecc75105d43a6b61", size = 41183156, upload-time = "2025-10-13T12:27:21.574Z" },
+    { url = "https://files.pythonhosted.org/packages/85/c6/a039a0979d0c278e1bed6758d5a6186416c3ccb8081970df893fdf9a0d99/av-16.0.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:7a3f1a36b550adadd7513f4f5ee956f9e06b01a88e59f3150ef5fec6879d6f79", size = 42302331, upload-time = "2025-10-13T12:27:26.953Z" },
+    { url = "https://files.pythonhosted.org/packages/18/7b/2ca4a9e3609ff155436dac384e360f530919cb1e328491f7df294be0f0dc/av-16.0.1-cp313-cp313t-win_amd64.whl", hash = "sha256:c6de794abe52b8c0be55d8bb09ade05905efa74b1a5ab4860b4b9c2bfb6578bf", size = 32462194, upload-time = "2025-10-13T12:27:32.942Z" },
+    { url = "https://files.pythonhosted.org/packages/14/9a/6d17e379906cf53a7a44dfac9cf7e4b2e7df2082ba2dbf07126055effcc1/av-16.0.1-cp314-cp314-macosx_11_0_x86_64.whl", hash = "sha256:4b55ba69a943ae592ad7900da67129422954789de9dc384685d6b529925f542e", size = 27167101, upload-time = "2025-10-13T12:27:38.886Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/34/891816cd82d5646cb5a51d201d20be0a578232536d083b7d939734258067/av-16.0.1-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:d4a0c47b6c9bbadad8909b82847f5fe64a608ad392f0b01704e427349bcd9a47", size = 21722708, upload-time = "2025-10-13T12:27:43.29Z" },
+    { url = "https://files.pythonhosted.org/packages/1d/20/c24ad34038423ab8c9728cef3301e0861727c188442dcfd70a4a10834c63/av-16.0.1-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:8bba52f3035708456f6b1994d10b0371b45cfd8f917b5e84ff81aef4ec2f08bf", size = 38638842, upload-time = "2025-10-13T12:27:49.776Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/32/034412309572ba3ad713079d07a3ffc13739263321aece54a3055d7a4f1f/av-16.0.1-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:08e34c7e7b5e55e29931180bbe21095e1874ac120992bf6b8615d39574487617", size = 40197789, upload-time = "2025-10-13T12:27:55.688Z" },
+    { url = "https://files.pythonhosted.org/packages/fb/9c/40496298c32f9094e7df28641c5c58aa6fb07554dc232a9ac98a9894376f/av-16.0.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:0d6250ab9db80c641b299987027c987f14935ea837ea4c02c5f5182f6b69d9e5", size = 39980829, upload-time = "2025-10-13T12:28:01.507Z" },
+    { url = "https://files.pythonhosted.org/packages/4a/7e/5c38268ac1d424f309b13b2de4597ad28daea6039ee5af061e62918b12a8/av-16.0.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:7b621f28d8bcbb07cdcd7b18943ddc040739ad304545715ae733873b6e1b739d", size = 41205928, upload-time = "2025-10-13T12:28:08.431Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/07/3176e02692d8753a6c4606021c60e4031341afb56292178eee633b6760a4/av-16.0.1-cp314-cp314-win_amd64.whl", hash = "sha256:92101f49082392580c9dba4ba2fe5b931b3bb0fb75a1a848bfb9a11ded68be91", size = 32272836, upload-time = "2025-10-13T12:28:13.405Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/47/10e03b88de097385d1550cbb6d8de96159131705c13adb92bd9b7e677425/av-16.0.1-cp314-cp314t-macosx_11_0_x86_64.whl", hash = "sha256:07c464bf2bc362a154eccc82e235ef64fd3aaf8d76fc8ed63d0ae520943c6d3f", size = 27248864, upload-time = "2025-10-13T12:28:17.467Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/60/7447f206bec3e55e81371f1989098baa2fe9adb7b46c149e6937b7e7c1ca/av-16.0.1-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:750da0673864b669c95882c7b25768cd93ece0e47010d74ebcc29dbb14d611f8", size = 21828185, upload-time = "2025-10-13T12:28:21.461Z" },
+    { url = "https://files.pythonhosted.org/packages/68/48/ee2680e7a01bc4911bbe902b814346911fa2528697a44f3043ee68e0f07e/av-16.0.1-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:0b7c0d060863b2e341d07cd26851cb9057b7979814148b028fb7ee5d5eb8772d", size = 40040572, upload-time = "2025-10-13T12:28:26.585Z" },
+    { url = "https://files.pythonhosted.org/packages/da/68/2c43d28871721ae07cde432d6e36ae2f7035197cbadb43764cc5bf3d4b33/av-16.0.1-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:e67c2eca6023ca7d76b0709c5f392b23a5defba499f4c262411f8155b1482cbd", size = 41344288, upload-time = "2025-10-13T12:28:32.512Z" },
+    { url = "https://files.pythonhosted.org/packages/ec/7f/1d801bff43ae1af4758c45eee2eaae64f303bbb460e79f352f08587fd179/av-16.0.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e3243d54d84986e8fbdc1946db634b0c41fe69b6de35a99fa8b763e18503d040", size = 41175142, upload-time = "2025-10-13T12:28:38.356Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/06/bb363138687066bbf8997c1433dbd9c81762bae120955ea431fb72d69d26/av-16.0.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:a1bcf73efab5379601e6510abd7afe5f397d0f6defe69b1610c2f37a4a17996b", size = 42293932, upload-time = "2025-10-13T12:28:43.442Z" },
+    { url = "https://files.pythonhosted.org/packages/92/15/5e713098a085f970ccf88550194d277d244464d7b3a7365ad92acb4b6dc1/av-16.0.1-cp314-cp314t-win_amd64.whl", hash = "sha256:6368d4ff153d75469d2a3217bc403630dc870a72fe0a014d9135de550d731a86", size = 32460624, upload-time = "2025-10-13T12:28:48.767Z" },
 ]
 
 [[package]]
@@ -667,16 +634,16 @@ wheels = [
 
 [[package]]
 name = "botocore"
-version = "1.40.61"
+version = "1.41.5"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "jmespath" },
     { name = "python-dateutil" },
     { name = "urllib3" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/28/a3/81d3a47c2dbfd76f185d3b894f2ad01a75096c006a2dd91f237dca182188/botocore-1.40.61.tar.gz", hash = "sha256:a2487ad69b090f9cccd64cf07c7021cd80ee9c0655ad974f87045b02f3ef52cd", size = 14393956, upload-time = "2025-10-28T19:26:46.108Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/90/22/7fe08c726a2e3b11a0aef8bf177e83891c9cb2dc1809d35c9ed91a9e60e6/botocore-1.41.5.tar.gz", hash = "sha256:0367622b811597d183bfcaab4a350f0d3ede712031ce792ef183cabdee80d3bf", size = 14668152, upload-time = "2025-11-26T20:27:38.026Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/38/c5/f6ce561004db45f0b847c2cd9b19c67c6bf348a82018a48cb718be6b58b0/botocore-1.40.61-py3-none-any.whl", hash = "sha256:17ebae412692fd4824f99cde0f08d50126dc97954008e5ba2b522eb049238aa7", size = 14055973, upload-time = "2025-10-28T19:26:42.15Z" },
+    { url = "https://files.pythonhosted.org/packages/4e/4e/21cd0b8f365449f1576f93de1ec8718ed18a7a3bc086dfbdeb79437bba7a/botocore-1.41.5-py3-none-any.whl", hash = "sha256:3fef7fcda30c82c27202d232cfdbd6782cb27f20f8e7e21b20606483e66ee73a", size = 14337008, upload-time = "2025-11-26T20:27:35.208Z" },
 ]
 
 [[package]]
@@ -719,11 +686,11 @@ sdist = { url = "https://files.pythonhosted.org/packages/64/cb/104778c728dc3d5ea
 
 [[package]]
 name = "certifi"
-version = "2025.10.5"
+version = "2025.11.12"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/4c/5b/b6ce21586237c77ce67d01dc5507039d444b630dd76611bbca2d8e5dcd91/certifi-2025.10.5.tar.gz", hash = "sha256:47c09d31ccf2acf0be3f701ea53595ee7e0b8fa08801c6624be771df09ae7b43", size = 164519, upload-time = "2025-10-05T04:12:15.808Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/a2/8c/58f469717fa48465e4a50c014a0400602d3c437d7c0c468e17ada824da3a/certifi-2025.11.12.tar.gz", hash = "sha256:d8ab5478f2ecd78af242878415affce761ca6bc54a22a27e026d7c25357c3316", size = 160538, upload-time = "2025-11-12T02:54:51.517Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/e4/37/af0d2ef3967ac0d6113837b44a4f0bfe1328c2b9763bd5b1744520e5cfed/certifi-2025.10.5-py3-none-any.whl", hash = "sha256:0f212c2744a9bb6de0c56639a6f68afe01ecd92d91f14ae897c4fe7bbeeef0de", size = 163286, upload-time = "2025-10-05T04:12:14.03Z" },
+    { url = "https://files.pythonhosted.org/packages/70/7d/9bc192684cea499815ff478dfcdc13835ddf401365057044fb721ec6bddb/certifi-2025.11.12-py3-none-any.whl", hash = "sha256:97de8790030bbd5c2d96b7ec782fc2f7820ef8dba6db909ccf95449f2d062d4b", size = 159438, upload-time = "2025-11-12T02:54:49.735Z" },
 ]
 
 [[package]]
@@ -899,14 +866,14 @@ wheels = [
 
 [[package]]
 name = "click"
-version = "8.3.0"
+version = "8.3.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "colorama", marker = "sys_platform == 'win32'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/46/61/de6cd827efad202d7057d93e0fed9294b96952e188f7384832791c7b2254/click-8.3.0.tar.gz", hash = "sha256:e7b8232224eba16f4ebe410c25ced9f7875cb5f3263ffc93cc3e8da705e229c4", size = 276943, upload-time = "2025-09-18T17:32:23.696Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/3d/fa/656b739db8587d7b5dfa22e22ed02566950fbfbcdc20311993483657a5c0/click-8.3.1.tar.gz", hash = "sha256:12ff4785d337a1bb490bb7e9c2b1ee5da3112e94a8622f26a6c77f5d2fc6842a", size = 295065, upload-time = "2025-11-15T20:45:42.706Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/db/d3/9dcc0f5797f070ec8edf30fbadfb200e71d9db6b84d211e3b2085a7589a0/click-8.3.0-py3-none-any.whl", hash = "sha256:9b9f285302c6e3064f4330c05f05b81945b2a39544279343e6e7c5f27a9baddc", size = 107295, upload-time = "2025-09-18T17:32:22.42Z" },
+    { url = "https://files.pythonhosted.org/packages/98/78/01c019cdb5d6498122777c1a43056ebb3ebfeef2076d9d026bfe15583b2b/click-8.3.1-py3-none-any.whl", hash = "sha256:981153a64e25f12d547d3426c367a4857371575ee7ad18df2a6183ab0545b2a6", size = 108274, upload-time = "2025-11-15T20:45:41.139Z" },
 ]
 
 [[package]]
@@ -938,101 +905,101 @@ wheels = [
 
 [[package]]
 name = "coverage"
-version = "7.11.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/1c/38/ee22495420457259d2f3390309505ea98f98a5eed40901cf62196abad006/coverage-7.11.0.tar.gz", hash = "sha256:167bd504ac1ca2af7ff3b81d245dfea0292c5032ebef9d66cc08a7d28c1b8050", size = 811905, upload-time = "2025-10-15T15:15:08.542Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/12/95/c49df0aceb5507a80b9fe5172d3d39bf23f05be40c23c8d77d556df96cec/coverage-7.11.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:eb53f1e8adeeb2e78962bade0c08bfdc461853c7969706ed901821e009b35e31", size = 215800, upload-time = "2025-10-15T15:12:19.824Z" },
-    { url = "https://files.pythonhosted.org/packages/dc/c6/7bb46ce01ed634fff1d7bb53a54049f539971862cc388b304ff3c51b4f66/coverage-7.11.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d9a03ec6cb9f40a5c360f138b88266fd8f58408d71e89f536b4f91d85721d075", size = 216198, upload-time = "2025-10-15T15:12:22.549Z" },
-    { url = "https://files.pythonhosted.org/packages/94/b2/75d9d8fbf2900268aca5de29cd0a0fe671b0f69ef88be16767cc3c828b85/coverage-7.11.0-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:0d7f0616c557cbc3d1c2090334eddcbb70e1ae3a40b07222d62b3aa47f608fab", size = 242953, upload-time = "2025-10-15T15:12:24.139Z" },
-    { url = "https://files.pythonhosted.org/packages/65/ac/acaa984c18f440170525a8743eb4b6c960ace2dbad80dc22056a437fc3c6/coverage-7.11.0-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:e44a86a47bbdf83b0a3ea4d7df5410d6b1a0de984fbd805fa5101f3624b9abe0", size = 244766, upload-time = "2025-10-15T15:12:25.974Z" },
-    { url = "https://files.pythonhosted.org/packages/d8/0d/938d0bff76dfa4a6b228c3fc4b3e1c0e2ad4aa6200c141fcda2bd1170227/coverage-7.11.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:596763d2f9a0ee7eec6e643e29660def2eef297e1de0d334c78c08706f1cb785", size = 246625, upload-time = "2025-10-15T15:12:27.387Z" },
-    { url = "https://files.pythonhosted.org/packages/38/54/8f5f5e84bfa268df98f46b2cb396b1009734cfb1e5d6adb663d284893b32/coverage-7.11.0-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:ef55537ff511b5e0a43edb4c50a7bf7ba1c3eea20b4f49b1490f1e8e0e42c591", size = 243568, upload-time = "2025-10-15T15:12:28.799Z" },
-    { url = "https://files.pythonhosted.org/packages/68/30/8ba337c2877fe3f2e1af0ed7ff4be0c0c4aca44d6f4007040f3ca2255e99/coverage-7.11.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:9cbabd8f4d0d3dc571d77ae5bdbfa6afe5061e679a9d74b6797c48d143307088", size = 244665, upload-time = "2025-10-15T15:12:30.297Z" },
-    { url = "https://files.pythonhosted.org/packages/cc/fb/c6f1d6d9a665536b7dde2333346f0cc41dc6a60bd1ffc10cd5c33e7eb000/coverage-7.11.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:e24045453384e0ae2a587d562df2a04d852672eb63051d16096d3f08aa4c7c2f", size = 242681, upload-time = "2025-10-15T15:12:32.326Z" },
-    { url = "https://files.pythonhosted.org/packages/be/38/1b532319af5f991fa153c20373291dc65c2bf532af7dbcffdeef745c8f79/coverage-7.11.0-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:7161edd3426c8d19bdccde7d49e6f27f748f3c31cc350c5de7c633fea445d866", size = 242912, upload-time = "2025-10-15T15:12:34.079Z" },
-    { url = "https://files.pythonhosted.org/packages/67/3d/f39331c60ef6050d2a861dc1b514fa78f85f792820b68e8c04196ad733d6/coverage-7.11.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:3d4ed4de17e692ba6415b0587bc7f12bc80915031fc9db46a23ce70fc88c9841", size = 243559, upload-time = "2025-10-15T15:12:35.809Z" },
-    { url = "https://files.pythonhosted.org/packages/4b/55/cb7c9df9d0495036ce582a8a2958d50c23cd73f84a23284bc23bd4711a6f/coverage-7.11.0-cp310-cp310-win32.whl", hash = "sha256:765c0bc8fe46f48e341ef737c91c715bd2a53a12792592296a095f0c237e09cf", size = 218266, upload-time = "2025-10-15T15:12:37.429Z" },
-    { url = "https://files.pythonhosted.org/packages/68/a8/b79cb275fa7bd0208767f89d57a1b5f6ba830813875738599741b97c2e04/coverage-7.11.0-cp310-cp310-win_amd64.whl", hash = "sha256:24d6f3128f1b2d20d84b24f4074475457faedc3d4613a7e66b5e769939c7d969", size = 219169, upload-time = "2025-10-15T15:12:39.25Z" },
-    { url = "https://files.pythonhosted.org/packages/49/3a/ee1074c15c408ddddddb1db7dd904f6b81bc524e01f5a1c5920e13dbde23/coverage-7.11.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3d58ecaa865c5b9fa56e35efc51d1014d4c0d22838815b9fce57a27dd9576847", size = 215912, upload-time = "2025-10-15T15:12:40.665Z" },
-    { url = "https://files.pythonhosted.org/packages/70/c4/9f44bebe5cb15f31608597b037d78799cc5f450044465bcd1ae8cb222fe1/coverage-7.11.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b679e171f1c104a5668550ada700e3c4937110dbdd153b7ef9055c4f1a1ee3cc", size = 216310, upload-time = "2025-10-15T15:12:42.461Z" },
-    { url = "https://files.pythonhosted.org/packages/42/01/5e06077cfef92d8af926bdd86b84fb28bf9bc6ad27343d68be9b501d89f2/coverage-7.11.0-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:ca61691ba8c5b6797deb221a0d09d7470364733ea9c69425a640f1f01b7c5bf0", size = 246706, upload-time = "2025-10-15T15:12:44.001Z" },
-    { url = "https://files.pythonhosted.org/packages/40/b8/7a3f1f33b35cc4a6c37e759137533119560d06c0cc14753d1a803be0cd4a/coverage-7.11.0-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:aef1747ede4bd8ca9cfc04cc3011516500c6891f1b33a94add3253f6f876b7b7", size = 248634, upload-time = "2025-10-15T15:12:45.768Z" },
-    { url = "https://files.pythonhosted.org/packages/7a/41/7f987eb33de386bc4c665ab0bf98d15fcf203369d6aacae74f5dd8ec489a/coverage-7.11.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a1839d08406e4cba2953dcc0ffb312252f14d7c4c96919f70167611f4dee2623", size = 250741, upload-time = "2025-10-15T15:12:47.222Z" },
-    { url = "https://files.pythonhosted.org/packages/23/c1/a4e0ca6a4e83069fb8216b49b30a7352061ca0cb38654bd2dc96b7b3b7da/coverage-7.11.0-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e0eb0a2dcc62478eb5b4cbb80b97bdee852d7e280b90e81f11b407d0b81c4287", size = 246837, upload-time = "2025-10-15T15:12:48.904Z" },
-    { url = "https://files.pythonhosted.org/packages/5d/03/ced062a17f7c38b4728ff76c3acb40d8465634b20b4833cdb3cc3a74e115/coverage-7.11.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:bc1fbea96343b53f65d5351d8fd3b34fd415a2670d7c300b06d3e14a5af4f552", size = 248429, upload-time = "2025-10-15T15:12:50.73Z" },
-    { url = "https://files.pythonhosted.org/packages/97/af/a7c6f194bb8c5a2705ae019036b8fe7f49ea818d638eedb15fdb7bed227c/coverage-7.11.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:214b622259dd0cf435f10241f1333d32caa64dbc27f8790ab693428a141723de", size = 246490, upload-time = "2025-10-15T15:12:52.646Z" },
-    { url = "https://files.pythonhosted.org/packages/ab/c3/aab4df02b04a8fde79068c3c41ad7a622b0ef2b12e1ed154da986a727c3f/coverage-7.11.0-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:258d9967520cca899695d4eb7ea38be03f06951d6ca2f21fb48b1235f791e601", size = 246208, upload-time = "2025-10-15T15:12:54.586Z" },
-    { url = "https://files.pythonhosted.org/packages/30/d8/e282ec19cd658238d60ed404f99ef2e45eed52e81b866ab1518c0d4163cf/coverage-7.11.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:cf9e6ff4ca908ca15c157c409d608da77a56a09877b97c889b98fb2c32b6465e", size = 247126, upload-time = "2025-10-15T15:12:56.485Z" },
-    { url = "https://files.pythonhosted.org/packages/d1/17/a635fa07fac23adb1a5451ec756216768c2767efaed2e4331710342a3399/coverage-7.11.0-cp311-cp311-win32.whl", hash = "sha256:fcc15fc462707b0680cff6242c48625da7f9a16a28a41bb8fd7a4280920e676c", size = 218314, upload-time = "2025-10-15T15:12:58.365Z" },
-    { url = "https://files.pythonhosted.org/packages/2a/29/2ac1dfcdd4ab9a70026edc8d715ece9b4be9a1653075c658ee6f271f394d/coverage-7.11.0-cp311-cp311-win_amd64.whl", hash = "sha256:865965bf955d92790f1facd64fe7ff73551bd2c1e7e6b26443934e9701ba30b9", size = 219203, upload-time = "2025-10-15T15:12:59.902Z" },
-    { url = "https://files.pythonhosted.org/packages/03/21/5ce8b3a0133179115af4c041abf2ee652395837cb896614beb8ce8ddcfd9/coverage-7.11.0-cp311-cp311-win_arm64.whl", hash = "sha256:5693e57a065760dcbeb292d60cc4d0231a6d4b6b6f6a3191561e1d5e8820b745", size = 217879, upload-time = "2025-10-15T15:13:01.35Z" },
-    { url = "https://files.pythonhosted.org/packages/c4/db/86f6906a7c7edc1a52b2c6682d6dd9be775d73c0dfe2b84f8923dfea5784/coverage-7.11.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:9c49e77811cf9d024b95faf86c3f059b11c0c9be0b0d61bc598f453703bd6fd1", size = 216098, upload-time = "2025-10-15T15:13:02.916Z" },
-    { url = "https://files.pythonhosted.org/packages/21/54/e7b26157048c7ba555596aad8569ff903d6cd67867d41b75287323678ede/coverage-7.11.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a61e37a403a778e2cda2a6a39abcc895f1d984071942a41074b5c7ee31642007", size = 216331, upload-time = "2025-10-15T15:13:04.403Z" },
-    { url = "https://files.pythonhosted.org/packages/b9/19/1ce6bf444f858b83a733171306134a0544eaddf1ca8851ede6540a55b2ad/coverage-7.11.0-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:c79cae102bb3b1801e2ef1511fb50e91ec83a1ce466b2c7c25010d884336de46", size = 247825, upload-time = "2025-10-15T15:13:05.92Z" },
-    { url = "https://files.pythonhosted.org/packages/71/0b/d3bcbbc259fcced5fb67c5d78f6e7ee965f49760c14afd931e9e663a83b2/coverage-7.11.0-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:16ce17ceb5d211f320b62df002fa7016b7442ea0fd260c11cec8ce7730954893", size = 250573, upload-time = "2025-10-15T15:13:07.471Z" },
-    { url = "https://files.pythonhosted.org/packages/58/8d/b0ff3641a320abb047258d36ed1c21d16be33beed4152628331a1baf3365/coverage-7.11.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:80027673e9d0bd6aef86134b0771845e2da85755cf686e7c7c59566cf5a89115", size = 251706, upload-time = "2025-10-15T15:13:09.4Z" },
-    { url = "https://files.pythonhosted.org/packages/59/c8/5a586fe8c7b0458053d9c687f5cff515a74b66c85931f7fe17a1c958b4ac/coverage-7.11.0-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:4d3ffa07a08657306cd2215b0da53761c4d73cb54d9143b9303a6481ec0cd415", size = 248221, upload-time = "2025-10-15T15:13:10.964Z" },
-    { url = "https://files.pythonhosted.org/packages/d0/ff/3a25e3132804ba44cfa9a778cdf2b73dbbe63ef4b0945e39602fc896ba52/coverage-7.11.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a3b6a5f8b2524fd6c1066bc85bfd97e78709bb5e37b5b94911a6506b65f47186", size = 249624, upload-time = "2025-10-15T15:13:12.5Z" },
-    { url = "https://files.pythonhosted.org/packages/c5/12/ff10c8ce3895e1b17a73485ea79ebc1896a9e466a9d0f4aef63e0d17b718/coverage-7.11.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:fcc0a4aa589de34bc56e1a80a740ee0f8c47611bdfb28cd1849de60660f3799d", size = 247744, upload-time = "2025-10-15T15:13:14.554Z" },
-    { url = "https://files.pythonhosted.org/packages/16/02/d500b91f5471b2975947e0629b8980e5e90786fe316b6d7299852c1d793d/coverage-7.11.0-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:dba82204769d78c3fd31b35c3d5f46e06511936c5019c39f98320e05b08f794d", size = 247325, upload-time = "2025-10-15T15:13:16.438Z" },
-    { url = "https://files.pythonhosted.org/packages/77/11/dee0284fbbd9cd64cfce806b827452c6df3f100d9e66188e82dfe771d4af/coverage-7.11.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:81b335f03ba67309a95210caf3eb43bd6fe75a4e22ba653ef97b4696c56c7ec2", size = 249180, upload-time = "2025-10-15T15:13:17.959Z" },
-    { url = "https://files.pythonhosted.org/packages/59/1b/cdf1def928f0a150a057cab03286774e73e29c2395f0d30ce3d9e9f8e697/coverage-7.11.0-cp312-cp312-win32.whl", hash = "sha256:037b2d064c2f8cc8716fe4d39cb705779af3fbf1ba318dc96a1af858888c7bb5", size = 218479, upload-time = "2025-10-15T15:13:19.608Z" },
-    { url = "https://files.pythonhosted.org/packages/ff/55/e5884d55e031da9c15b94b90a23beccc9d6beee65e9835cd6da0a79e4f3a/coverage-7.11.0-cp312-cp312-win_amd64.whl", hash = "sha256:d66c0104aec3b75e5fd897e7940188ea1892ca1d0235316bf89286d6a22568c0", size = 219290, upload-time = "2025-10-15T15:13:21.593Z" },
-    { url = "https://files.pythonhosted.org/packages/23/a8/faa930cfc71c1d16bc78f9a19bb73700464f9c331d9e547bfbc1dbd3a108/coverage-7.11.0-cp312-cp312-win_arm64.whl", hash = "sha256:d91ebeac603812a09cf6a886ba6e464f3bbb367411904ae3790dfe28311b15ad", size = 217924, upload-time = "2025-10-15T15:13:23.39Z" },
-    { url = "https://files.pythonhosted.org/packages/60/7f/85e4dfe65e400645464b25c036a26ac226cf3a69d4a50c3934c532491cdd/coverage-7.11.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:cc3f49e65ea6e0d5d9bd60368684fe52a704d46f9e7fc413918f18d046ec40e1", size = 216129, upload-time = "2025-10-15T15:13:25.371Z" },
-    { url = "https://files.pythonhosted.org/packages/96/5d/dc5fa98fea3c175caf9d360649cb1aa3715e391ab00dc78c4c66fabd7356/coverage-7.11.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f39ae2f63f37472c17b4990f794035c9890418b1b8cca75c01193f3c8d3e01be", size = 216380, upload-time = "2025-10-15T15:13:26.976Z" },
-    { url = "https://files.pythonhosted.org/packages/b2/f5/3da9cc9596708273385189289c0e4d8197d37a386bdf17619013554b3447/coverage-7.11.0-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:7db53b5cdd2917b6eaadd0b1251cf4e7d96f4a8d24e174bdbdf2f65b5ea7994d", size = 247375, upload-time = "2025-10-15T15:13:28.923Z" },
-    { url = "https://files.pythonhosted.org/packages/65/6c/f7f59c342359a235559d2bc76b0c73cfc4bac7d61bb0df210965cb1ecffd/coverage-7.11.0-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:10ad04ac3a122048688387828b4537bc9cf60c0bf4869c1e9989c46e45690b82", size = 249978, upload-time = "2025-10-15T15:13:30.525Z" },
-    { url = "https://files.pythonhosted.org/packages/e7/8c/042dede2e23525e863bf1ccd2b92689692a148d8b5fd37c37899ba882645/coverage-7.11.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4036cc9c7983a2b1f2556d574d2eb2154ac6ed55114761685657e38782b23f52", size = 251253, upload-time = "2025-10-15T15:13:32.174Z" },
-    { url = "https://files.pythonhosted.org/packages/7b/a9/3c58df67bfa809a7bddd786356d9c5283e45d693edb5f3f55d0986dd905a/coverage-7.11.0-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:7ab934dd13b1c5e94b692b1e01bd87e4488cb746e3a50f798cb9464fd128374b", size = 247591, upload-time = "2025-10-15T15:13:34.147Z" },
-    { url = "https://files.pythonhosted.org/packages/26/5b/c7f32efd862ee0477a18c41e4761305de6ddd2d49cdeda0c1116227570fd/coverage-7.11.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:59a6e5a265f7cfc05f76e3bb53eca2e0dfe90f05e07e849930fecd6abb8f40b4", size = 249411, upload-time = "2025-10-15T15:13:38.425Z" },
-    { url = "https://files.pythonhosted.org/packages/76/b5/78cb4f1e86c1611431c990423ec0768122905b03837e1b4c6a6f388a858b/coverage-7.11.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:df01d6c4c81e15a7c88337b795bb7595a8596e92310266b5072c7e301168efbd", size = 247303, upload-time = "2025-10-15T15:13:40.464Z" },
-    { url = "https://files.pythonhosted.org/packages/87/c9/23c753a8641a330f45f221286e707c427e46d0ffd1719b080cedc984ec40/coverage-7.11.0-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:8c934bd088eed6174210942761e38ee81d28c46de0132ebb1801dbe36a390dcc", size = 247157, upload-time = "2025-10-15T15:13:42.087Z" },
-    { url = "https://files.pythonhosted.org/packages/c5/42/6e0cc71dc8a464486e944a4fa0d85bdec031cc2969e98ed41532a98336b9/coverage-7.11.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5a03eaf7ec24078ad64a07f02e30060aaf22b91dedf31a6b24d0d98d2bba7f48", size = 248921, upload-time = "2025-10-15T15:13:43.715Z" },
-    { url = "https://files.pythonhosted.org/packages/e8/1c/743c2ef665e6858cccb0f84377dfe3a4c25add51e8c7ef19249be92465b6/coverage-7.11.0-cp313-cp313-win32.whl", hash = "sha256:695340f698a5f56f795b2836abe6fb576e7c53d48cd155ad2f80fd24bc63a040", size = 218526, upload-time = "2025-10-15T15:13:45.336Z" },
-    { url = "https://files.pythonhosted.org/packages/ff/d5/226daadfd1bf8ddbccefbd3aa3547d7b960fb48e1bdac124e2dd13a2b71a/coverage-7.11.0-cp313-cp313-win_amd64.whl", hash = "sha256:2727d47fce3ee2bac648528e41455d1b0c46395a087a229deac75e9f88ba5a05", size = 219317, upload-time = "2025-10-15T15:13:47.401Z" },
-    { url = "https://files.pythonhosted.org/packages/97/54/47db81dcbe571a48a298f206183ba8a7ba79200a37cd0d9f4788fcd2af4a/coverage-7.11.0-cp313-cp313-win_arm64.whl", hash = "sha256:0efa742f431529699712b92ecdf22de8ff198df41e43aeaaadf69973eb93f17a", size = 217948, upload-time = "2025-10-15T15:13:49.096Z" },
-    { url = "https://files.pythonhosted.org/packages/e5/8b/cb68425420154e7e2a82fd779a8cc01549b6fa83c2ad3679cd6c088ebd07/coverage-7.11.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:587c38849b853b157706407e9ebdca8fd12f45869edb56defbef2daa5fb0812b", size = 216837, upload-time = "2025-10-15T15:13:51.09Z" },
-    { url = "https://files.pythonhosted.org/packages/33/55/9d61b5765a025685e14659c8d07037247de6383c0385757544ffe4606475/coverage-7.11.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:b971bdefdd75096163dd4261c74be813c4508477e39ff7b92191dea19f24cd37", size = 217061, upload-time = "2025-10-15T15:13:52.747Z" },
-    { url = "https://files.pythonhosted.org/packages/52/85/292459c9186d70dcec6538f06ea251bc968046922497377bf4a1dc9a71de/coverage-7.11.0-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:269bfe913b7d5be12ab13a95f3a76da23cf147be7fa043933320ba5625f0a8de", size = 258398, upload-time = "2025-10-15T15:13:54.45Z" },
-    { url = "https://files.pythonhosted.org/packages/1f/e2/46edd73fb8bf51446c41148d81944c54ed224854812b6ca549be25113ee0/coverage-7.11.0-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:dadbcce51a10c07b7c72b0ce4a25e4b6dcb0c0372846afb8e5b6307a121eb99f", size = 260574, upload-time = "2025-10-15T15:13:56.145Z" },
-    { url = "https://files.pythonhosted.org/packages/07/5e/1df469a19007ff82e2ca8fe509822820a31e251f80ee7344c34f6cd2ec43/coverage-7.11.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9ed43fa22c6436f7957df036331f8fe4efa7af132054e1844918866cd228af6c", size = 262797, upload-time = "2025-10-15T15:13:58.635Z" },
-    { url = "https://files.pythonhosted.org/packages/f9/50/de216b31a1434b94d9b34a964c09943c6be45069ec704bfc379d8d89a649/coverage-7.11.0-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:9516add7256b6713ec08359b7b05aeff8850c98d357784c7205b2e60aa2513fa", size = 257361, upload-time = "2025-10-15T15:14:00.409Z" },
-    { url = "https://files.pythonhosted.org/packages/82/1e/3f9f8344a48111e152e0fd495b6fff13cc743e771a6050abf1627a7ba918/coverage-7.11.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:eb92e47c92fcbcdc692f428da67db33337fa213756f7adb6a011f7b5a7a20740", size = 260349, upload-time = "2025-10-15T15:14:02.188Z" },
-    { url = "https://files.pythonhosted.org/packages/65/9b/3f52741f9e7d82124272f3070bbe316006a7de1bad1093f88d59bfc6c548/coverage-7.11.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:d06f4fc7acf3cabd6d74941d53329e06bab00a8fe10e4df2714f0b134bfc64ef", size = 258114, upload-time = "2025-10-15T15:14:03.907Z" },
-    { url = "https://files.pythonhosted.org/packages/0b/8b/918f0e15f0365d50d3986bbd3338ca01178717ac5678301f3f547b6619e6/coverage-7.11.0-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:6fbcee1a8f056af07ecd344482f711f563a9eb1c2cad192e87df00338ec3cdb0", size = 256723, upload-time = "2025-10-15T15:14:06.324Z" },
-    { url = "https://files.pythonhosted.org/packages/44/9e/7776829f82d3cf630878a7965a7d70cc6ca94f22c7d20ec4944f7148cb46/coverage-7.11.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:dbbf012be5f32533a490709ad597ad8a8ff80c582a95adc8d62af664e532f9ca", size = 259238, upload-time = "2025-10-15T15:14:08.002Z" },
-    { url = "https://files.pythonhosted.org/packages/9a/b8/49cf253e1e7a3bedb85199b201862dd7ca4859f75b6cf25ffa7298aa0760/coverage-7.11.0-cp313-cp313t-win32.whl", hash = "sha256:cee6291bb4fed184f1c2b663606a115c743df98a537c969c3c64b49989da96c2", size = 219180, upload-time = "2025-10-15T15:14:09.786Z" },
-    { url = "https://files.pythonhosted.org/packages/ac/e1/1a541703826be7ae2125a0fb7f821af5729d56bb71e946e7b933cc7a89a4/coverage-7.11.0-cp313-cp313t-win_amd64.whl", hash = "sha256:a386c1061bf98e7ea4758e4313c0ab5ecf57af341ef0f43a0bf26c2477b5c268", size = 220241, upload-time = "2025-10-15T15:14:11.471Z" },
-    { url = "https://files.pythonhosted.org/packages/d5/d1/5ee0e0a08621140fd418ec4020f595b4d52d7eb429ae6a0c6542b4ba6f14/coverage-7.11.0-cp313-cp313t-win_arm64.whl", hash = "sha256:f9ea02ef40bb83823b2b04964459d281688fe173e20643870bb5d2edf68bc836", size = 218510, upload-time = "2025-10-15T15:14:13.46Z" },
-    { url = "https://files.pythonhosted.org/packages/f4/06/e923830c1985ce808e40a3fa3eb46c13350b3224b7da59757d37b6ce12b8/coverage-7.11.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:c770885b28fb399aaf2a65bbd1c12bf6f307ffd112d6a76c5231a94276f0c497", size = 216110, upload-time = "2025-10-15T15:14:15.157Z" },
-    { url = "https://files.pythonhosted.org/packages/42/82/cdeed03bfead45203fb651ed756dfb5266028f5f939e7f06efac4041dad5/coverage-7.11.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:a3d0e2087dba64c86a6b254f43e12d264b636a39e88c5cc0a01a7c71bcfdab7e", size = 216395, upload-time = "2025-10-15T15:14:16.863Z" },
-    { url = "https://files.pythonhosted.org/packages/fc/ba/e1c80caffc3199aa699813f73ff097bc2df7b31642bdbc7493600a8f1de5/coverage-7.11.0-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:73feb83bb41c32811973b8565f3705caf01d928d972b72042b44e97c71fd70d1", size = 247433, upload-time = "2025-10-15T15:14:18.589Z" },
-    { url = "https://files.pythonhosted.org/packages/80/c0/5b259b029694ce0a5bbc1548834c7ba3db41d3efd3474489d7efce4ceb18/coverage-7.11.0-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:c6f31f281012235ad08f9a560976cc2fc9c95c17604ff3ab20120fe480169bca", size = 249970, upload-time = "2025-10-15T15:14:20.307Z" },
-    { url = "https://files.pythonhosted.org/packages/8c/86/171b2b5e1aac7e2fd9b43f7158b987dbeb95f06d1fbecad54ad8163ae3e8/coverage-7.11.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e9570ad567f880ef675673992222746a124b9595506826b210fbe0ce3f0499cd", size = 251324, upload-time = "2025-10-15T15:14:22.419Z" },
-    { url = "https://files.pythonhosted.org/packages/1a/7e/7e10414d343385b92024af3932a27a1caf75c6e27ee88ba211221ff1a145/coverage-7.11.0-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:8badf70446042553a773547a61fecaa734b55dc738cacf20c56ab04b77425e43", size = 247445, upload-time = "2025-10-15T15:14:24.205Z" },
-    { url = "https://files.pythonhosted.org/packages/c4/3b/e4f966b21f5be8c4bf86ad75ae94efa0de4c99c7bbb8114476323102e345/coverage-7.11.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:a09c1211959903a479e389685b7feb8a17f59ec5a4ef9afde7650bd5eabc2777", size = 249324, upload-time = "2025-10-15T15:14:26.234Z" },
-    { url = "https://files.pythonhosted.org/packages/00/a2/8479325576dfcd909244d0df215f077f47437ab852ab778cfa2f8bf4d954/coverage-7.11.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:5ef83b107f50db3f9ae40f69e34b3bd9337456c5a7fe3461c7abf8b75dd666a2", size = 247261, upload-time = "2025-10-15T15:14:28.42Z" },
-    { url = "https://files.pythonhosted.org/packages/7b/d8/3a9e2db19d94d65771d0f2e21a9ea587d11b831332a73622f901157cc24b/coverage-7.11.0-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:f91f927a3215b8907e214af77200250bb6aae36eca3f760f89780d13e495388d", size = 247092, upload-time = "2025-10-15T15:14:30.784Z" },
-    { url = "https://files.pythonhosted.org/packages/b3/b1/bbca3c472544f9e2ad2d5116b2379732957048be4b93a9c543fcd0207e5f/coverage-7.11.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:cdbcd376716d6b7fbfeedd687a6c4be019c5a5671b35f804ba76a4c0a778cba4", size = 248755, upload-time = "2025-10-15T15:14:32.585Z" },
-    { url = "https://files.pythonhosted.org/packages/89/49/638d5a45a6a0f00af53d6b637c87007eb2297042186334e9923a61aa8854/coverage-7.11.0-cp314-cp314-win32.whl", hash = "sha256:bab7ec4bb501743edc63609320aaec8cd9188b396354f482f4de4d40a9d10721", size = 218793, upload-time = "2025-10-15T15:14:34.972Z" },
-    { url = "https://files.pythonhosted.org/packages/30/cc/b675a51f2d068adb3cdf3799212c662239b0ca27f4691d1fff81b92ea850/coverage-7.11.0-cp314-cp314-win_amd64.whl", hash = "sha256:3d4ba9a449e9364a936a27322b20d32d8b166553bfe63059bd21527e681e2fad", size = 219587, upload-time = "2025-10-15T15:14:37.047Z" },
-    { url = "https://files.pythonhosted.org/packages/93/98/5ac886876026de04f00820e5094fe22166b98dcb8b426bf6827aaf67048c/coverage-7.11.0-cp314-cp314-win_arm64.whl", hash = "sha256:ce37f215223af94ef0f75ac68ea096f9f8e8c8ec7d6e8c346ee45c0d363f0479", size = 218168, upload-time = "2025-10-15T15:14:38.861Z" },
-    { url = "https://files.pythonhosted.org/packages/14/d1/b4145d35b3e3ecf4d917e97fc8895bcf027d854879ba401d9ff0f533f997/coverage-7.11.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:f413ce6e07e0d0dc9c433228727b619871532674b45165abafe201f200cc215f", size = 216850, upload-time = "2025-10-15T15:14:40.651Z" },
-    { url = "https://files.pythonhosted.org/packages/ca/d1/7f645fc2eccd318369a8a9948acc447bb7c1ade2911e31d3c5620544c22b/coverage-7.11.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:05791e528a18f7072bf5998ba772fe29db4da1234c45c2087866b5ba4dea710e", size = 217071, upload-time = "2025-10-15T15:14:42.755Z" },
-    { url = "https://files.pythonhosted.org/packages/54/7d/64d124649db2737ceced1dfcbdcb79898d5868d311730f622f8ecae84250/coverage-7.11.0-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:cacb29f420cfeb9283b803263c3b9a068924474ff19ca126ba9103e1278dfa44", size = 258570, upload-time = "2025-10-15T15:14:44.542Z" },
-    { url = "https://files.pythonhosted.org/packages/6c/3f/6f5922f80dc6f2d8b2c6f974835c43f53eb4257a7797727e6ca5b7b2ec1f/coverage-7.11.0-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:314c24e700d7027ae3ab0d95fbf8d53544fca1f20345fd30cd219b737c6e58d3", size = 260738, upload-time = "2025-10-15T15:14:46.436Z" },
-    { url = "https://files.pythonhosted.org/packages/0e/5f/9e883523c4647c860b3812b417a2017e361eca5b635ee658387dc11b13c1/coverage-7.11.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:630d0bd7a293ad2fc8b4b94e5758c8b2536fdf36c05f1681270203e463cbfa9b", size = 262994, upload-time = "2025-10-15T15:14:48.3Z" },
-    { url = "https://files.pythonhosted.org/packages/07/bb/43b5a8e94c09c8bf51743ffc65c4c841a4ca5d3ed191d0a6919c379a1b83/coverage-7.11.0-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e89641f5175d65e2dbb44db15fe4ea48fade5d5bbb9868fdc2b4fce22f4a469d", size = 257282, upload-time = "2025-10-15T15:14:50.236Z" },
-    { url = "https://files.pythonhosted.org/packages/aa/e5/0ead8af411411330b928733e1d201384b39251a5f043c1612970310e8283/coverage-7.11.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:c9f08ea03114a637dab06cedb2e914da9dc67fa52c6015c018ff43fdde25b9c2", size = 260430, upload-time = "2025-10-15T15:14:52.413Z" },
-    { url = "https://files.pythonhosted.org/packages/ae/66/03dd8bb0ba5b971620dcaac145461950f6d8204953e535d2b20c6b65d729/coverage-7.11.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:ce9f3bde4e9b031eaf1eb61df95c1401427029ea1bfddb8621c1161dcb0fa02e", size = 258190, upload-time = "2025-10-15T15:14:54.268Z" },
-    { url = "https://files.pythonhosted.org/packages/45/ae/28a9cce40bf3174426cb2f7e71ee172d98e7f6446dff936a7ccecee34b14/coverage-7.11.0-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:e4dc07e95495923d6fd4d6c27bf70769425b71c89053083843fd78f378558996", size = 256658, upload-time = "2025-10-15T15:14:56.436Z" },
-    { url = "https://files.pythonhosted.org/packages/5c/7c/3a44234a8599513684bfc8684878fd7b126c2760f79712bb78c56f19efc4/coverage-7.11.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:424538266794db2861db4922b05d729ade0940ee69dcf0591ce8f69784db0e11", size = 259342, upload-time = "2025-10-15T15:14:58.538Z" },
-    { url = "https://files.pythonhosted.org/packages/e1/e6/0108519cba871af0351725ebdb8660fd7a0fe2ba3850d56d32490c7d9b4b/coverage-7.11.0-cp314-cp314t-win32.whl", hash = "sha256:4c1eeb3fb8eb9e0190bebafd0462936f75717687117339f708f395fe455acc73", size = 219568, upload-time = "2025-10-15T15:15:00.382Z" },
-    { url = "https://files.pythonhosted.org/packages/c9/76/44ba876e0942b4e62fdde23ccb029ddb16d19ba1bef081edd00857ba0b16/coverage-7.11.0-cp314-cp314t-win_amd64.whl", hash = "sha256:b56efee146c98dbf2cf5cffc61b9829d1e94442df4d7398b26892a53992d3547", size = 220687, upload-time = "2025-10-15T15:15:02.322Z" },
-    { url = "https://files.pythonhosted.org/packages/b9/0c/0df55ecb20d0d0ed5c322e10a441775e1a3a5d78c60f0c4e1abfe6fcf949/coverage-7.11.0-cp314-cp314t-win_arm64.whl", hash = "sha256:b5c2705afa83f49bd91962a4094b6b082f94aef7626365ab3f8f4bd159c5acf3", size = 218711, upload-time = "2025-10-15T15:15:04.575Z" },
-    { url = "https://files.pythonhosted.org/packages/5f/04/642c1d8a448ae5ea1369eac8495740a79eb4e581a9fb0cbdce56bbf56da1/coverage-7.11.0-py3-none-any.whl", hash = "sha256:4b7589765348d78fb4e5fb6ea35d07564e387da2fc5efff62e0222971f155f68", size = 207761, upload-time = "2025-10-15T15:15:06.439Z" },
+version = "7.12.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/89/26/4a96807b193b011588099c3b5c89fbb05294e5b90e71018e065465f34eb6/coverage-7.12.0.tar.gz", hash = "sha256:fc11e0a4e372cb5f282f16ef90d4a585034050ccda536451901abfb19a57f40c", size = 819341, upload-time = "2025-11-18T13:34:20.766Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/26/4a/0dc3de1c172d35abe512332cfdcc43211b6ebce629e4cc42e6cd25ed8f4d/coverage-7.12.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:32b75c2ba3f324ee37af3ccee5b30458038c50b349ad9b88cee85096132a575b", size = 217409, upload-time = "2025-11-18T13:31:53.122Z" },
+    { url = "https://files.pythonhosted.org/packages/01/c3/086198b98db0109ad4f84241e8e9ea7e5fb2db8c8ffb787162d40c26cc76/coverage-7.12.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:cb2a1b6ab9fe833714a483a915de350abc624a37149649297624c8d57add089c", size = 217927, upload-time = "2025-11-18T13:31:54.458Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/5f/34614dbf5ce0420828fc6c6f915126a0fcb01e25d16cf141bf5361e6aea6/coverage-7.12.0-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:5734b5d913c3755e72f70bf6cc37a0518d4f4745cde760c5d8e12005e62f9832", size = 244678, upload-time = "2025-11-18T13:31:55.805Z" },
+    { url = "https://files.pythonhosted.org/packages/55/7b/6b26fb32e8e4a6989ac1d40c4e132b14556131493b1d06bc0f2be169c357/coverage-7.12.0-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:b527a08cdf15753279b7afb2339a12073620b761d79b81cbe2cdebdb43d90daa", size = 246507, upload-time = "2025-11-18T13:31:57.05Z" },
+    { url = "https://files.pythonhosted.org/packages/06/42/7d70e6603d3260199b90fb48b537ca29ac183d524a65cc31366b2e905fad/coverage-7.12.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9bb44c889fb68004e94cab71f6a021ec83eac9aeabdbb5a5a88821ec46e1da73", size = 248366, upload-time = "2025-11-18T13:31:58.362Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/4a/d86b837923878424c72458c5b25e899a3c5ca73e663082a915f5b3c4d749/coverage-7.12.0-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:4b59b501455535e2e5dde5881739897967b272ba25988c89145c12d772810ccb", size = 245366, upload-time = "2025-11-18T13:31:59.572Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/c2/2adec557e0aa9721875f06ced19730fdb7fc58e31b02b5aa56f2ebe4944d/coverage-7.12.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:d8842f17095b9868a05837b7b1b73495293091bed870e099521ada176aa3e00e", size = 246408, upload-time = "2025-11-18T13:32:00.784Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/4b/8bd1f1148260df11c618e535fdccd1e5aaf646e55b50759006a4f41d8a26/coverage-7.12.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:c5a6f20bf48b8866095c6820641e7ffbe23f2ac84a2efc218d91235e404c7777", size = 244416, upload-time = "2025-11-18T13:32:01.963Z" },
+    { url = "https://files.pythonhosted.org/packages/0e/13/3a248dd6a83df90414c54a4e121fd081fb20602ca43955fbe1d60e2312a9/coverage-7.12.0-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:5f3738279524e988d9da2893f307c2093815c623f8d05a8f79e3eff3a7a9e553", size = 244681, upload-time = "2025-11-18T13:32:03.408Z" },
+    { url = "https://files.pythonhosted.org/packages/76/30/aa833827465a5e8c938935f5d91ba055f70516941078a703740aaf1aa41f/coverage-7.12.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:e0d68c1f7eabbc8abe582d11fa393ea483caf4f44b0af86881174769f185c94d", size = 245300, upload-time = "2025-11-18T13:32:04.686Z" },
+    { url = "https://files.pythonhosted.org/packages/38/24/f85b3843af1370fb3739fa7571819b71243daa311289b31214fe3e8c9d68/coverage-7.12.0-cp310-cp310-win32.whl", hash = "sha256:7670d860e18b1e3ee5930b17a7d55ae6287ec6e55d9799982aa103a2cc1fa2ef", size = 220008, upload-time = "2025-11-18T13:32:05.806Z" },
+    { url = "https://files.pythonhosted.org/packages/3a/a2/c7da5b9566f7164db9eefa133d17761ecb2c2fde9385d754e5b5c80f710d/coverage-7.12.0-cp310-cp310-win_amd64.whl", hash = "sha256:f999813dddeb2a56aab5841e687b68169da0d3f6fc78ccf50952fa2463746022", size = 220943, upload-time = "2025-11-18T13:32:07.166Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/0c/0dfe7f0487477d96432e4815537263363fb6dd7289743a796e8e51eabdf2/coverage-7.12.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:aa124a3683d2af98bd9d9c2bfa7a5076ca7e5ab09fdb96b81fa7d89376ae928f", size = 217535, upload-time = "2025-11-18T13:32:08.812Z" },
+    { url = "https://files.pythonhosted.org/packages/9b/f5/f9a4a053a5bbff023d3bec259faac8f11a1e5a6479c2ccf586f910d8dac7/coverage-7.12.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d93fbf446c31c0140208dcd07c5d882029832e8ed7891a39d6d44bd65f2316c3", size = 218044, upload-time = "2025-11-18T13:32:10.329Z" },
+    { url = "https://files.pythonhosted.org/packages/95/c5/84fc3697c1fa10cd8571919bf9693f693b7373278daaf3b73e328d502bc8/coverage-7.12.0-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:52ca620260bd8cd6027317bdd8b8ba929be1d741764ee765b42c4d79a408601e", size = 248440, upload-time = "2025-11-18T13:32:12.536Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/36/2d93fbf6a04670f3874aed397d5a5371948a076e3249244a9e84fb0e02d6/coverage-7.12.0-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:f3433ffd541380f3a0e423cff0f4926d55b0cc8c1d160fdc3be24a4c03aa65f7", size = 250361, upload-time = "2025-11-18T13:32:13.852Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/49/66dc65cc456a6bfc41ea3d0758c4afeaa4068a2b2931bf83be6894cf1058/coverage-7.12.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f7bbb321d4adc9f65e402c677cd1c8e4c2d0105d3ce285b51b4d87f1d5db5245", size = 252472, upload-time = "2025-11-18T13:32:15.068Z" },
+    { url = "https://files.pythonhosted.org/packages/35/1f/ebb8a18dffd406db9fcd4b3ae42254aedcaf612470e8712f12041325930f/coverage-7.12.0-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:22a7aade354a72dff3b59c577bfd18d6945c61f97393bc5fb7bd293a4237024b", size = 248592, upload-time = "2025-11-18T13:32:16.328Z" },
+    { url = "https://files.pythonhosted.org/packages/da/a8/67f213c06e5ea3b3d4980df7dc344d7fea88240b5fe878a5dcbdfe0e2315/coverage-7.12.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:3ff651dcd36d2fea66877cd4a82de478004c59b849945446acb5baf9379a1b64", size = 250167, upload-time = "2025-11-18T13:32:17.687Z" },
+    { url = "https://files.pythonhosted.org/packages/f0/00/e52aef68154164ea40cc8389c120c314c747fe63a04b013a5782e989b77f/coverage-7.12.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:31b8b2e38391a56e3cea39d22a23faaa7c3fc911751756ef6d2621d2a9daf742", size = 248238, upload-time = "2025-11-18T13:32:19.2Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/a4/4d88750bcf9d6d66f77865e5a05a20e14db44074c25fd22519777cb69025/coverage-7.12.0-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:297bc2da28440f5ae51c845a47c8175a4db0553a53827886e4fb25c66633000c", size = 247964, upload-time = "2025-11-18T13:32:21.027Z" },
+    { url = "https://files.pythonhosted.org/packages/a7/6b/b74693158899d5b47b0bf6238d2c6722e20ba749f86b74454fac0696bb00/coverage-7.12.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:6ff7651cc01a246908eac162a6a86fc0dbab6de1ad165dfb9a1e2ec660b44984", size = 248862, upload-time = "2025-11-18T13:32:22.304Z" },
+    { url = "https://files.pythonhosted.org/packages/18/de/6af6730227ce0e8ade307b1cc4a08e7f51b419a78d02083a86c04ccceb29/coverage-7.12.0-cp311-cp311-win32.whl", hash = "sha256:313672140638b6ddb2c6455ddeda41c6a0b208298034544cfca138978c6baed6", size = 220033, upload-time = "2025-11-18T13:32:23.714Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/a1/e7f63021a7c4fe20994359fcdeae43cbef4a4d0ca36a5a1639feeea5d9e1/coverage-7.12.0-cp311-cp311-win_amd64.whl", hash = "sha256:a1783ed5bd0d5938d4435014626568dc7f93e3cb99bc59188cc18857c47aa3c4", size = 220966, upload-time = "2025-11-18T13:32:25.599Z" },
+    { url = "https://files.pythonhosted.org/packages/77/e8/deae26453f37c20c3aa0c4433a1e32cdc169bf415cce223a693117aa3ddd/coverage-7.12.0-cp311-cp311-win_arm64.whl", hash = "sha256:4648158fd8dd9381b5847622df1c90ff314efbfc1df4550092ab6013c238a5fc", size = 219637, upload-time = "2025-11-18T13:32:27.265Z" },
+    { url = "https://files.pythonhosted.org/packages/02/bf/638c0427c0f0d47638242e2438127f3c8ee3cfc06c7fdeb16778ed47f836/coverage-7.12.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:29644c928772c78512b48e14156b81255000dcfd4817574ff69def189bcb3647", size = 217704, upload-time = "2025-11-18T13:32:28.906Z" },
+    { url = "https://files.pythonhosted.org/packages/08/e1/706fae6692a66c2d6b871a608bbde0da6281903fa0e9f53a39ed441da36a/coverage-7.12.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8638cbb002eaa5d7c8d04da667813ce1067080b9a91099801a0053086e52b736", size = 218064, upload-time = "2025-11-18T13:32:30.161Z" },
+    { url = "https://files.pythonhosted.org/packages/a9/8b/eb0231d0540f8af3ffda39720ff43cb91926489d01524e68f60e961366e4/coverage-7.12.0-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:083631eeff5eb9992c923e14b810a179798bb598e6a0dd60586819fc23be6e60", size = 249560, upload-time = "2025-11-18T13:32:31.835Z" },
+    { url = "https://files.pythonhosted.org/packages/e9/a1/67fb52af642e974d159b5b379e4d4c59d0ebe1288677fbd04bbffe665a82/coverage-7.12.0-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:99d5415c73ca12d558e07776bd957c4222c687b9f1d26fa0e1b57e3598bdcde8", size = 252318, upload-time = "2025-11-18T13:32:33.178Z" },
+    { url = "https://files.pythonhosted.org/packages/41/e5/38228f31b2c7665ebf9bdfdddd7a184d56450755c7e43ac721c11a4b8dab/coverage-7.12.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e949ebf60c717c3df63adb4a1a366c096c8d7fd8472608cd09359e1bd48ef59f", size = 253403, upload-time = "2025-11-18T13:32:34.45Z" },
+    { url = "https://files.pythonhosted.org/packages/ec/4b/df78e4c8188f9960684267c5a4897836f3f0f20a20c51606ee778a1d9749/coverage-7.12.0-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:6d907ddccbca819afa2cd014bc69983b146cca2735a0b1e6259b2a6c10be1e70", size = 249984, upload-time = "2025-11-18T13:32:35.747Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/51/bb163933d195a345c6f63eab9e55743413d064c291b6220df754075c2769/coverage-7.12.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b1518ecbad4e6173f4c6e6c4a46e49555ea5679bf3feda5edb1b935c7c44e8a0", size = 251339, upload-time = "2025-11-18T13:32:37.352Z" },
+    { url = "https://files.pythonhosted.org/packages/15/40/c9b29cdb8412c837cdcbc2cfa054547dd83affe6cbbd4ce4fdb92b6ba7d1/coverage-7.12.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:51777647a749abdf6f6fd8c7cffab12de68ab93aab15efc72fbbb83036c2a068", size = 249489, upload-time = "2025-11-18T13:32:39.212Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/da/b3131e20ba07a0de4437a50ef3b47840dfabf9293675b0cd5c2c7f66dd61/coverage-7.12.0-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:42435d46d6461a3b305cdfcad7cdd3248787771f53fe18305548cba474e6523b", size = 249070, upload-time = "2025-11-18T13:32:40.598Z" },
+    { url = "https://files.pythonhosted.org/packages/70/81/b653329b5f6302c08d683ceff6785bc60a34be9ae92a5c7b63ee7ee7acec/coverage-7.12.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:5bcead88c8423e1855e64b8057d0544e33e4080b95b240c2a355334bb7ced937", size = 250929, upload-time = "2025-11-18T13:32:42.915Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/00/250ac3bca9f252a5fb1338b5ad01331ebb7b40223f72bef5b1b2cb03aa64/coverage-7.12.0-cp312-cp312-win32.whl", hash = "sha256:dcbb630ab034e86d2a0f79aefd2be07e583202f41e037602d438c80044957baa", size = 220241, upload-time = "2025-11-18T13:32:44.665Z" },
+    { url = "https://files.pythonhosted.org/packages/64/1c/77e79e76d37ce83302f6c21980b45e09f8aa4551965213a10e62d71ce0ab/coverage-7.12.0-cp312-cp312-win_amd64.whl", hash = "sha256:2fd8354ed5d69775ac42986a691fbf68b4084278710cee9d7c3eaa0c28fa982a", size = 221051, upload-time = "2025-11-18T13:32:46.008Z" },
+    { url = "https://files.pythonhosted.org/packages/31/f5/641b8a25baae564f9e52cac0e2667b123de961985709a004e287ee7663cc/coverage-7.12.0-cp312-cp312-win_arm64.whl", hash = "sha256:737c3814903be30695b2de20d22bcc5428fdae305c61ba44cdc8b3252984c49c", size = 219692, upload-time = "2025-11-18T13:32:47.372Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/14/771700b4048774e48d2c54ed0c674273702713c9ee7acdfede40c2666747/coverage-7.12.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:47324fffca8d8eae7e185b5bb20c14645f23350f870c1649003618ea91a78941", size = 217725, upload-time = "2025-11-18T13:32:49.22Z" },
+    { url = "https://files.pythonhosted.org/packages/17/a7/3aa4144d3bcb719bf67b22d2d51c2d577bf801498c13cb08f64173e80497/coverage-7.12.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:ccf3b2ede91decd2fb53ec73c1f949c3e034129d1e0b07798ff1d02ea0c8fa4a", size = 218098, upload-time = "2025-11-18T13:32:50.78Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/9c/b846bbc774ff81091a12a10203e70562c91ae71badda00c5ae5b613527b1/coverage-7.12.0-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:b365adc70a6936c6b0582dc38746b33b2454148c02349345412c6e743efb646d", size = 249093, upload-time = "2025-11-18T13:32:52.554Z" },
+    { url = "https://files.pythonhosted.org/packages/76/b6/67d7c0e1f400b32c883e9342de4a8c2ae7c1a0b57c5de87622b7262e2309/coverage-7.12.0-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:bc13baf85cd8a4cfcf4a35c7bc9d795837ad809775f782f697bf630b7e200211", size = 251686, upload-time = "2025-11-18T13:32:54.862Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/75/b095bd4b39d49c3be4bffbb3135fea18a99a431c52dd7513637c0762fecb/coverage-7.12.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:099d11698385d572ceafb3288a5b80fe1fc58bf665b3f9d362389de488361d3d", size = 252930, upload-time = "2025-11-18T13:32:56.417Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/f3/466f63015c7c80550bead3093aacabf5380c1220a2a93c35d374cae8f762/coverage-7.12.0-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:473dc45d69694069adb7680c405fb1e81f60b2aff42c81e2f2c3feaf544d878c", size = 249296, upload-time = "2025-11-18T13:32:58.074Z" },
+    { url = "https://files.pythonhosted.org/packages/27/86/eba2209bf2b7e28c68698fc13437519a295b2d228ba9e0ec91673e09fa92/coverage-7.12.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:583f9adbefd278e9de33c33d6846aa8f5d164fa49b47144180a0e037f0688bb9", size = 251068, upload-time = "2025-11-18T13:32:59.646Z" },
+    { url = "https://files.pythonhosted.org/packages/ec/55/ca8ae7dbba962a3351f18940b359b94c6bafdd7757945fdc79ec9e452dc7/coverage-7.12.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:b2089cc445f2dc0af6f801f0d1355c025b76c24481935303cf1af28f636688f0", size = 249034, upload-time = "2025-11-18T13:33:01.481Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/d7/39136149325cad92d420b023b5fd900dabdd1c3a0d1d5f148ef4a8cedef5/coverage-7.12.0-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:950411f1eb5d579999c5f66c62a40961f126fc71e5e14419f004471957b51508", size = 248853, upload-time = "2025-11-18T13:33:02.935Z" },
+    { url = "https://files.pythonhosted.org/packages/fe/b6/76e1add8b87ef60e00643b0b7f8f7bb73d4bf5249a3be19ebefc5793dd25/coverage-7.12.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b1aab7302a87bafebfe76b12af681b56ff446dc6f32ed178ff9c092ca776e6bc", size = 250619, upload-time = "2025-11-18T13:33:04.336Z" },
+    { url = "https://files.pythonhosted.org/packages/95/87/924c6dc64f9203f7a3c1832a6a0eee5a8335dbe5f1bdadcc278d6f1b4d74/coverage-7.12.0-cp313-cp313-win32.whl", hash = "sha256:d7e0d0303c13b54db495eb636bc2465b2fb8475d4c8bcec8fe4b5ca454dfbae8", size = 220261, upload-time = "2025-11-18T13:33:06.493Z" },
+    { url = "https://files.pythonhosted.org/packages/91/77/dd4aff9af16ff776bf355a24d87eeb48fc6acde54c907cc1ea89b14a8804/coverage-7.12.0-cp313-cp313-win_amd64.whl", hash = "sha256:ce61969812d6a98a981d147d9ac583a36ac7db7766f2e64a9d4d059c2fe29d07", size = 221072, upload-time = "2025-11-18T13:33:07.926Z" },
+    { url = "https://files.pythonhosted.org/packages/70/49/5c9dc46205fef31b1b226a6e16513193715290584317fd4df91cdaf28b22/coverage-7.12.0-cp313-cp313-win_arm64.whl", hash = "sha256:bcec6f47e4cb8a4c2dc91ce507f6eefc6a1b10f58df32cdc61dff65455031dfc", size = 219702, upload-time = "2025-11-18T13:33:09.631Z" },
+    { url = "https://files.pythonhosted.org/packages/9b/62/f87922641c7198667994dd472a91e1d9b829c95d6c29529ceb52132436ad/coverage-7.12.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:459443346509476170d553035e4a3eed7b860f4fe5242f02de1010501956ce87", size = 218420, upload-time = "2025-11-18T13:33:11.153Z" },
+    { url = "https://files.pythonhosted.org/packages/85/dd/1cc13b2395ef15dbb27d7370a2509b4aee77890a464fb35d72d428f84871/coverage-7.12.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:04a79245ab2b7a61688958f7a855275997134bc84f4a03bc240cf64ff132abf6", size = 218773, upload-time = "2025-11-18T13:33:12.569Z" },
+    { url = "https://files.pythonhosted.org/packages/74/40/35773cc4bb1e9d4658d4fb669eb4195b3151bef3bbd6f866aba5cd5dac82/coverage-7.12.0-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:09a86acaaa8455f13d6a99221d9654df249b33937b4e212b4e5a822065f12aa7", size = 260078, upload-time = "2025-11-18T13:33:14.037Z" },
+    { url = "https://files.pythonhosted.org/packages/ec/ee/231bb1a6ffc2905e396557585ebc6bdc559e7c66708376d245a1f1d330fc/coverage-7.12.0-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:907e0df1b71ba77463687a74149c6122c3f6aac56c2510a5d906b2f368208560", size = 262144, upload-time = "2025-11-18T13:33:15.601Z" },
+    { url = "https://files.pythonhosted.org/packages/28/be/32f4aa9f3bf0b56f3971001b56508352c7753915345d45fab4296a986f01/coverage-7.12.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9b57e2d0ddd5f0582bae5437c04ee71c46cd908e7bc5d4d0391f9a41e812dd12", size = 264574, upload-time = "2025-11-18T13:33:17.354Z" },
+    { url = "https://files.pythonhosted.org/packages/68/7c/00489fcbc2245d13ab12189b977e0cf06ff3351cb98bc6beba8bd68c5902/coverage-7.12.0-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:58c1c6aa677f3a1411fe6fb28ec3a942e4f665df036a3608816e0847fad23296", size = 259298, upload-time = "2025-11-18T13:33:18.958Z" },
+    { url = "https://files.pythonhosted.org/packages/96/b4/f0760d65d56c3bea95b449e02570d4abd2549dc784bf39a2d4721a2d8ceb/coverage-7.12.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:4c589361263ab2953e3c4cd2a94db94c4ad4a8e572776ecfbad2389c626e4507", size = 262150, upload-time = "2025-11-18T13:33:20.644Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/71/9a9314df00f9326d78c1e5a910f520d599205907432d90d1c1b7a97aa4b1/coverage-7.12.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:91b810a163ccad2e43b1faa11d70d3cf4b6f3d83f9fd5f2df82a32d47b648e0d", size = 259763, upload-time = "2025-11-18T13:33:22.189Z" },
+    { url = "https://files.pythonhosted.org/packages/10/34/01a0aceed13fbdf925876b9a15d50862eb8845454301fe3cdd1df08b2182/coverage-7.12.0-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:40c867af715f22592e0d0fb533a33a71ec9e0f73a6945f722a0c85c8c1cbe3a2", size = 258653, upload-time = "2025-11-18T13:33:24.239Z" },
+    { url = "https://files.pythonhosted.org/packages/8d/04/81d8fd64928acf1574bbb0181f66901c6c1c6279c8ccf5f84259d2c68ae9/coverage-7.12.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:68b0d0a2d84f333de875666259dadf28cc67858bc8fd8b3f1eae84d3c2bec455", size = 260856, upload-time = "2025-11-18T13:33:26.365Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/76/fa2a37bfaeaf1f766a2d2360a25a5297d4fb567098112f6517475eee120b/coverage-7.12.0-cp313-cp313t-win32.whl", hash = "sha256:73f9e7fbd51a221818fd11b7090eaa835a353ddd59c236c57b2199486b116c6d", size = 220936, upload-time = "2025-11-18T13:33:28.165Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/52/60f64d932d555102611c366afb0eb434b34266b1d9266fc2fe18ab641c47/coverage-7.12.0-cp313-cp313t-win_amd64.whl", hash = "sha256:24cff9d1f5743f67db7ba46ff284018a6e9aeb649b67aa1e70c396aa1b7cb23c", size = 222001, upload-time = "2025-11-18T13:33:29.656Z" },
+    { url = "https://files.pythonhosted.org/packages/77/df/c303164154a5a3aea7472bf323b7c857fed93b26618ed9fc5c2955566bb0/coverage-7.12.0-cp313-cp313t-win_arm64.whl", hash = "sha256:c87395744f5c77c866d0f5a43d97cc39e17c7f1cb0115e54a2fe67ca75c5d14d", size = 220273, upload-time = "2025-11-18T13:33:31.415Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/2e/fc12db0883478d6e12bbd62d481210f0c8daf036102aa11434a0c5755825/coverage-7.12.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:a1c59b7dc169809a88b21a936eccf71c3895a78f5592051b1af8f4d59c2b4f92", size = 217777, upload-time = "2025-11-18T13:33:32.86Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/c1/ce3e525d223350c6ec16b9be8a057623f54226ef7f4c2fee361ebb6a02b8/coverage-7.12.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:8787b0f982e020adb732b9f051f3e49dd5054cebbc3f3432061278512a2b1360", size = 218100, upload-time = "2025-11-18T13:33:34.532Z" },
+    { url = "https://files.pythonhosted.org/packages/15/87/113757441504aee3808cb422990ed7c8bcc2d53a6779c66c5adef0942939/coverage-7.12.0-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:5ea5a9f7dc8877455b13dd1effd3202e0bca72f6f3ab09f9036b1bcf728f69ac", size = 249151, upload-time = "2025-11-18T13:33:36.135Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/1d/9529d9bd44049b6b05bb319c03a3a7e4b0a8a802d28fa348ad407e10706d/coverage-7.12.0-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:fdba9f15849534594f60b47c9a30bc70409b54947319a7c4fd0e8e3d8d2f355d", size = 251667, upload-time = "2025-11-18T13:33:37.996Z" },
+    { url = "https://files.pythonhosted.org/packages/11/bb/567e751c41e9c03dc29d3ce74b8c89a1e3396313e34f255a2a2e8b9ebb56/coverage-7.12.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a00594770eb715854fb1c57e0dea08cce6720cfbc531accdb9850d7c7770396c", size = 253003, upload-time = "2025-11-18T13:33:39.553Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/b3/c2cce2d8526a02fb9e9ca14a263ca6fc074449b33a6afa4892838c903528/coverage-7.12.0-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:5560c7e0d82b42eb1951e4f68f071f8017c824ebfd5a6ebe42c60ac16c6c2434", size = 249185, upload-time = "2025-11-18T13:33:42.086Z" },
+    { url = "https://files.pythonhosted.org/packages/0e/a7/967f93bb66e82c9113c66a8d0b65ecf72fc865adfba5a145f50c7af7e58d/coverage-7.12.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:d6c2e26b481c9159c2773a37947a9718cfdc58893029cdfb177531793e375cfc", size = 251025, upload-time = "2025-11-18T13:33:43.634Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/b2/f2f6f56337bc1af465d5b2dc1ee7ee2141b8b9272f3bf6213fcbc309a836/coverage-7.12.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:6e1a8c066dabcde56d5d9fed6a66bc19a2883a3fe051f0c397a41fc42aedd4cc", size = 248979, upload-time = "2025-11-18T13:33:46.04Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/7a/bf4209f45a4aec09d10a01a57313a46c0e0e8f4c55ff2965467d41a92036/coverage-7.12.0-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:f7ba9da4726e446d8dd8aae5a6cd872511184a5d861de80a86ef970b5dacce3e", size = 248800, upload-time = "2025-11-18T13:33:47.546Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/b7/1e01b8696fb0521810f60c5bbebf699100d6754183e6cc0679bf2ed76531/coverage-7.12.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:e0f483ab4f749039894abaf80c2f9e7ed77bbf3c737517fb88c8e8e305896a17", size = 250460, upload-time = "2025-11-18T13:33:49.537Z" },
+    { url = "https://files.pythonhosted.org/packages/71/ae/84324fb9cb46c024760e706353d9b771a81b398d117d8c1fe010391c186f/coverage-7.12.0-cp314-cp314-win32.whl", hash = "sha256:76336c19a9ef4a94b2f8dc79f8ac2da3f193f625bb5d6f51a328cd19bfc19933", size = 220533, upload-time = "2025-11-18T13:33:51.16Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/71/1033629deb8460a8f97f83e6ac4ca3b93952e2b6f826056684df8275e015/coverage-7.12.0-cp314-cp314-win_amd64.whl", hash = "sha256:7c1059b600aec6ef090721f8f633f60ed70afaffe8ecab85b59df748f24b31fe", size = 221348, upload-time = "2025-11-18T13:33:52.776Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/5f/ac8107a902f623b0c251abdb749be282dc2ab61854a8a4fcf49e276fce2f/coverage-7.12.0-cp314-cp314-win_arm64.whl", hash = "sha256:172cf3a34bfef42611963e2b661302a8931f44df31629e5b1050567d6b90287d", size = 219922, upload-time = "2025-11-18T13:33:54.316Z" },
+    { url = "https://files.pythonhosted.org/packages/79/6e/f27af2d4da367f16077d21ef6fe796c874408219fa6dd3f3efe7751bd910/coverage-7.12.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:aa7d48520a32cb21c7a9b31f81799e8eaec7239db36c3b670be0fa2403828d1d", size = 218511, upload-time = "2025-11-18T13:33:56.343Z" },
+    { url = "https://files.pythonhosted.org/packages/67/dd/65fd874aa460c30da78f9d259400d8e6a4ef457d61ab052fd248f0050558/coverage-7.12.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:90d58ac63bc85e0fb919f14d09d6caa63f35a5512a2205284b7816cafd21bb03", size = 218771, upload-time = "2025-11-18T13:33:57.966Z" },
+    { url = "https://files.pythonhosted.org/packages/55/e0/7c6b71d327d8068cb79c05f8f45bf1b6145f7a0de23bbebe63578fe5240a/coverage-7.12.0-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:ca8ecfa283764fdda3eae1bdb6afe58bf78c2c3ec2b2edcb05a671f0bba7b3f9", size = 260151, upload-time = "2025-11-18T13:33:59.597Z" },
+    { url = "https://files.pythonhosted.org/packages/49/ce/4697457d58285b7200de6b46d606ea71066c6e674571a946a6ea908fb588/coverage-7.12.0-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:874fe69a0785d96bd066059cd4368022cebbec1a8958f224f0016979183916e6", size = 262257, upload-time = "2025-11-18T13:34:01.166Z" },
+    { url = "https://files.pythonhosted.org/packages/2f/33/acbc6e447aee4ceba88c15528dbe04a35fb4d67b59d393d2e0d6f1e242c1/coverage-7.12.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5b3c889c0b8b283a24d721a9eabc8ccafcfc3aebf167e4cd0d0e23bf8ec4e339", size = 264671, upload-time = "2025-11-18T13:34:02.795Z" },
+    { url = "https://files.pythonhosted.org/packages/87/ec/e2822a795c1ed44d569980097be839c5e734d4c0c1119ef8e0a073496a30/coverage-7.12.0-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:8bb5b894b3ec09dcd6d3743229dc7f2c42ef7787dc40596ae04c0edda487371e", size = 259231, upload-time = "2025-11-18T13:34:04.397Z" },
+    { url = "https://files.pythonhosted.org/packages/72/c5/a7ec5395bb4a49c9b7ad97e63f0c92f6bf4a9e006b1393555a02dae75f16/coverage-7.12.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:79a44421cd5fba96aa57b5e3b5a4d3274c449d4c622e8f76882d76635501fd13", size = 262137, upload-time = "2025-11-18T13:34:06.068Z" },
+    { url = "https://files.pythonhosted.org/packages/67/0c/02c08858b764129f4ecb8e316684272972e60777ae986f3865b10940bdd6/coverage-7.12.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:33baadc0efd5c7294f436a632566ccc1f72c867f82833eb59820ee37dc811c6f", size = 259745, upload-time = "2025-11-18T13:34:08.04Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/04/4fd32b7084505f3829a8fe45c1a74a7a728cb251aaadbe3bec04abcef06d/coverage-7.12.0-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:c406a71f544800ef7e9e0000af706b88465f3573ae8b8de37e5f96c59f689ad1", size = 258570, upload-time = "2025-11-18T13:34:09.676Z" },
+    { url = "https://files.pythonhosted.org/packages/48/35/2365e37c90df4f5342c4fa202223744119fe31264ee2924f09f074ea9b6d/coverage-7.12.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:e71bba6a40883b00c6d571599b4627f50c360b3d0d02bfc658168936be74027b", size = 260899, upload-time = "2025-11-18T13:34:11.259Z" },
+    { url = "https://files.pythonhosted.org/packages/05/56/26ab0464ca733fa325e8e71455c58c1c374ce30f7c04cebb88eabb037b18/coverage-7.12.0-cp314-cp314t-win32.whl", hash = "sha256:9157a5e233c40ce6613dead4c131a006adfda70e557b6856b97aceed01b0e27a", size = 221313, upload-time = "2025-11-18T13:34:12.863Z" },
+    { url = "https://files.pythonhosted.org/packages/da/1c/017a3e1113ed34d998b27d2c6dba08a9e7cb97d362f0ec988fcd873dcf81/coverage-7.12.0-cp314-cp314t-win_amd64.whl", hash = "sha256:e84da3a0fd233aeec797b981c51af1cabac74f9bd67be42458365b30d11b5291", size = 222423, upload-time = "2025-11-18T13:34:15.14Z" },
+    { url = "https://files.pythonhosted.org/packages/4c/36/bcc504fdd5169301b52568802bb1b9cdde2e27a01d39fbb3b4b508ab7c2c/coverage-7.12.0-cp314-cp314t-win_arm64.whl", hash = "sha256:01d24af36fedda51c2b1aca56e4330a3710f83b02a5ff3743a6b015ffa7c9384", size = 220459, upload-time = "2025-11-18T13:34:17.222Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/a3/43b749004e3c09452e39bb56347a008f0a0668aad37324a99b5c8ca91d9e/coverage-7.12.0-py3-none-any.whl", hash = "sha256:159d50c0b12e060b15ed3d39f87ed43d4f7f7ad40b8a534f4dd331adbb51104a", size = 209503, upload-time = "2025-11-18T13:34:18.892Z" },
 ]
 
 [package.optional-dependencies]
@@ -1040,82 +1007,6 @@ toml = [
     { name = "tomli", marker = "python_full_version <= '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
 
-[[package]]
-name = "crc32c"
-version = "2.8"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/e3/66/7e97aa77af7cf6afbff26e3651b564fe41932599bc2d3dce0b2f73d4829a/crc32c-2.8.tar.gz", hash = "sha256:578728964e59c47c356aeeedee6220e021e124b9d3e8631d95d9a5e5f06e261c", size = 48179, upload-time = "2025-10-17T06:20:13.61Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/c4/a0/28b4686a8db0bb0f77970f4c6ccede90d1d5740a1d4b4703bd54c3e75655/crc32c-2.8-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:2c0f4eb01fe7c0a3e3f973a418e04d52101bb077dd77626fd80c658ec60aaf95", size = 66321, upload-time = "2025-10-17T06:18:53.543Z" },
-    { url = "https://files.pythonhosted.org/packages/76/1f/1697f5b8b770f715ed9b264d79e36b4f77ae0527f81f3c749ef08937a32e/crc32c-2.8-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6baefcfbca82b1a9678455416da24f18629769a76920c640d5a538620a7d12bb", size = 62985, upload-time = "2025-10-17T06:18:54.97Z" },
-    { url = "https://files.pythonhosted.org/packages/e0/e5/333cfa5ffa8d5779733aced2b984b5e5139b4a8ceaa2c6bc563e9a1092f3/crc32c-2.8-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d7f959fcf6c5aad1c4a653ee1a50f05760dab1d1c35d98ec4d7f0f68643f7612", size = 61517, upload-time = "2025-10-17T06:18:55.795Z" },
-    { url = "https://files.pythonhosted.org/packages/e1/d8/362a009e8140dd926a153b44d56753e3aa7cb50aca243779a84adadbff11/crc32c-2.8-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:9bb678507a4e4cf3f0506607b046ecc4ed1c58a19e08a3fb3c2d25441c480bf1", size = 79385, upload-time = "2025-10-17T06:18:56.598Z" },
-    { url = "https://files.pythonhosted.org/packages/4a/9f/0d4ea3aa71ffb15f1285669d23024cc40779388ce32157d339dc2584491c/crc32c-2.8-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1a16f7ffa4c242a909558565567cbba95148603717b53538ea299c98da68e7a9", size = 80965, upload-time = "2025-10-17T06:18:57.384Z" },
-    { url = "https://files.pythonhosted.org/packages/20/44/d77657aaca4a2c0283f2356a3da6f8e91b003567bb8f09daaf540cbf192f/crc32c-2.8-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:0184369aad562d801f91f454c81f56b9ecb966f6b96684c4d6cf82fc8741d2ad", size = 79993, upload-time = "2025-10-17T06:18:58.503Z" },
-    { url = "https://files.pythonhosted.org/packages/ab/c0/07017a93ebf85d9408028b7e03ef96d5c6bfb14cb77cfe90d35eedcc1501/crc32c-2.8-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:86d2eeb5f0189bd803720abe7387019328ea34c4acde62999e5723f789bc316b", size = 79243, upload-time = "2025-10-17T06:18:59.273Z" },
-    { url = "https://files.pythonhosted.org/packages/c7/1a/b3c5ac4cf2fd1f82395173d0bd8e1a15d09f0bc1eccdf10ea7f8caaccd67/crc32c-2.8-cp310-cp310-win32.whl", hash = "sha256:51da61904a9e753780a2e6011885677d601db1fa840be4b68799643a113e6f08", size = 64888, upload-time = "2025-10-17T06:19:00.089Z" },
-    { url = "https://files.pythonhosted.org/packages/b6/f2/60c45fc7bb2221d3c93c7a872e921be591f40d45228fe46f879b1d8c0424/crc32c-2.8-cp310-cp310-win_amd64.whl", hash = "sha256:b2d6a1f2500daaf2e4b08f97ad0349aa2eff5faaaa5fd3350314a26eade334cd", size = 66639, upload-time = "2025-10-17T06:19:00.974Z" },
-    { url = "https://files.pythonhosted.org/packages/dc/0b/5e03b22d913698e9cc563f39b9f6bbd508606bf6b8e9122cd6bf196b87ea/crc32c-2.8-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:e560a97fbb96c9897cb1d9b5076ef12fc12e2e25622530a1afd0de4240f17e1f", size = 66329, upload-time = "2025-10-17T06:19:01.771Z" },
-    { url = "https://files.pythonhosted.org/packages/6b/38/2fe0051ffe8c6a650c8b1ac0da31b8802d1dbe5fa40a84e4b6b6f5583db5/crc32c-2.8-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6762d276d90331a490ef7e71ffee53b9c0eb053bd75a272d786f3b08d3fe3671", size = 62988, upload-time = "2025-10-17T06:19:02.953Z" },
-    { url = "https://files.pythonhosted.org/packages/3e/30/5837a71c014be83aba1469c58820d287fc836512a0cad6b8fdd43868accd/crc32c-2.8-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:60670569f5ede91e39f48fb0cb4060e05b8d8704dd9e17ede930bf441b2f73ef", size = 61522, upload-time = "2025-10-17T06:19:03.796Z" },
-    { url = "https://files.pythonhosted.org/packages/ca/29/63972fc1452778e2092ae998c50cbfc2fc93e3fa9798a0278650cd6169c5/crc32c-2.8-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:711743da6ccc70b3c6718c328947b0b6f34a1fe6a6c27cc6c1d69cc226bf70e9", size = 80200, upload-time = "2025-10-17T06:19:04.617Z" },
-    { url = "https://files.pythonhosted.org/packages/cb/3a/60eb49d7bdada4122b3ffd45b0df54bdc1b8dd092cda4b069a287bdfcff4/crc32c-2.8-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5eb4094a2054774f13b26f21bf56792bb44fa1fcee6c6ad099387a43ffbfb4fa", size = 81757, upload-time = "2025-10-17T06:19:05.496Z" },
-    { url = "https://files.pythonhosted.org/packages/f5/63/6efc1b64429ef7d23bd58b75b7ac24d15df327e3ebbe9c247a0f7b1c2ed1/crc32c-2.8-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:fff15bf2bd3e95780516baae935ed12be88deaa5ebe6143c53eb0d26a7bdc7b7", size = 80830, upload-time = "2025-10-17T06:19:06.621Z" },
-    { url = "https://files.pythonhosted.org/packages/e1/eb/0ae9f436f8004f1c88f7429e659a7218a3879bd11a6b18ed1257aad7e98b/crc32c-2.8-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:4c0e11e3826668121fa53e0745635baf5e4f0ded437e8ff63ea56f38fc4f970a", size = 80095, upload-time = "2025-10-17T06:19:07.381Z" },
-    { url = "https://files.pythonhosted.org/packages/9e/81/4afc9d468977a4cd94a2eb62908553345009a7c0d30e74463a15d4b48ec3/crc32c-2.8-cp311-cp311-win32.whl", hash = "sha256:38f915336715d1f1353ab07d7d786f8a789b119e273aea106ba55355dfc9101d", size = 64886, upload-time = "2025-10-17T06:19:08.497Z" },
-    { url = "https://files.pythonhosted.org/packages/d6/e8/94e839c9f7e767bf8479046a207afd440a08f5c59b52586e1af5e64fa4a0/crc32c-2.8-cp311-cp311-win_amd64.whl", hash = "sha256:60e0a765b1caab8d31b2ea80840639253906a9351d4b861551c8c8625ea20f86", size = 66639, upload-time = "2025-10-17T06:19:09.338Z" },
-    { url = "https://files.pythonhosted.org/packages/b6/36/fd18ef23c42926b79c7003e16cb0f79043b5b179c633521343d3b499e996/crc32c-2.8-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:572ffb1b78cce3d88e8d4143e154d31044a44be42cb3f6fbbf77f1e7a941c5ab", size = 66379, upload-time = "2025-10-17T06:19:10.115Z" },
-    { url = "https://files.pythonhosted.org/packages/7f/b8/c584958e53f7798dd358f5bdb1bbfc97483134f053ee399d3eeb26cca075/crc32c-2.8-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:cf827b3758ee0c4aacd21ceca0e2da83681f10295c38a10bfeb105f7d98f7a68", size = 63042, upload-time = "2025-10-17T06:19:10.946Z" },
-    { url = "https://files.pythonhosted.org/packages/62/e6/6f2af0ec64a668a46c861e5bc778ea3ee42171fedfc5440f791f470fd783/crc32c-2.8-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:106fbd79013e06fa92bc3b51031694fcc1249811ed4364ef1554ee3dd2c7f5a2", size = 61528, upload-time = "2025-10-17T06:19:11.768Z" },
-    { url = "https://files.pythonhosted.org/packages/17/8b/4a04bd80a024f1a23978f19ae99407783e06549e361ab56e9c08bba3c1d3/crc32c-2.8-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:6dde035f91ffbfe23163e68605ee5a4bb8ceebd71ed54bb1fb1d0526cdd125a2", size = 80028, upload-time = "2025-10-17T06:19:12.554Z" },
-    { url = "https://files.pythonhosted.org/packages/21/8f/01c7afdc76ac2007d0e6a98e7300b4470b170480f8188475b597d1f4b4c6/crc32c-2.8-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e41ebe7c2f0fdcd9f3a3fd206989a36b460b4d3f24816d53e5be6c7dba72c5e1", size = 81531, upload-time = "2025-10-17T06:19:13.406Z" },
-    { url = "https://files.pythonhosted.org/packages/32/2b/8f78c5a8cc66486be5f51b6f038fc347c3ba748d3ea68be17a014283c331/crc32c-2.8-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ecf66cf90266d9c15cea597d5cc86c01917cd1a238dc3c51420c7886fa750d7e", size = 80608, upload-time = "2025-10-17T06:19:14.223Z" },
-    { url = "https://files.pythonhosted.org/packages/db/86/fad1a94cdeeeb6b6e2323c87f970186e74bfd6fbfbc247bf5c88ad0873d5/crc32c-2.8-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:59eee5f3a69ad0793d5fa9cdc9b9d743b0cd50edf7fccc0a3988a821fef0208c", size = 79886, upload-time = "2025-10-17T06:19:15.345Z" },
-    { url = "https://files.pythonhosted.org/packages/d5/db/1a7cb6757a1e32376fa2dfce00c815ea4ee614a94f9bff8228e37420c183/crc32c-2.8-cp312-cp312-win32.whl", hash = "sha256:a73d03ce3604aa5d7a2698e9057a0eef69f529c46497b27ee1c38158e90ceb76", size = 64896, upload-time = "2025-10-17T06:19:16.457Z" },
-    { url = "https://files.pythonhosted.org/packages/bf/8e/2024de34399b2e401a37dcb54b224b56c747b0dc46de4966886827b4d370/crc32c-2.8-cp312-cp312-win_amd64.whl", hash = "sha256:56b3b7d015247962cf58186e06d18c3d75a1a63d709d3233509e1c50a2d36aa2", size = 66645, upload-time = "2025-10-17T06:19:17.235Z" },
-    { url = "https://files.pythonhosted.org/packages/e8/d8/3ae227890b3be40955a7144106ef4dd97d6123a82c2a5310cdab58ca49d8/crc32c-2.8-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:36f1e03ee9e9c6938e67d3bcb60e36f260170aa5f37da1185e04ef37b56af395", size = 66380, upload-time = "2025-10-17T06:19:18.009Z" },
-    { url = "https://files.pythonhosted.org/packages/bd/8b/178d3f987cd0e049b484615512d3f91f3d2caeeb8ff336bb5896ae317438/crc32c-2.8-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b2f3226b94b85a8dd9b3533601d7a63e9e3e8edf03a8a169830ee8303a199aeb", size = 63048, upload-time = "2025-10-17T06:19:18.853Z" },
-    { url = "https://files.pythonhosted.org/packages/f2/a1/48145ae2545ebc0169d3283ebe882da580ea4606bfb67cf4ca922ac3cfc3/crc32c-2.8-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:6e08628bc72d5b6bc8e0730e8f142194b610e780a98c58cb6698e665cb885a5b", size = 61530, upload-time = "2025-10-17T06:19:19.974Z" },
-    { url = "https://files.pythonhosted.org/packages/06/4b/cf05ed9d934cc30e5ae22f97c8272face420a476090e736615d9a6b53de0/crc32c-2.8-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:086f64793c5ec856d1ab31a026d52ad2b895ac83d7a38fce557d74eb857f0a82", size = 80001, upload-time = "2025-10-17T06:19:20.784Z" },
-    { url = "https://files.pythonhosted.org/packages/15/ab/4b04801739faf36345f6ba1920be5b1c70282fec52f8280afd3613fb13e2/crc32c-2.8-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bcf72ee7e0135b3d941c34bb2c26c3fc6bc207106b49fd89aaafaeae223ae209", size = 81543, upload-time = "2025-10-17T06:19:21.557Z" },
-    { url = "https://files.pythonhosted.org/packages/a9/1b/6e38dde5bfd2ea69b7f2ab6ec229fcd972a53d39e2db4efe75c0ac0382ce/crc32c-2.8-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:8a717dd9c3fd777d9bc6603717eae172887d402c4ab589d124ebd0184a83f89e", size = 80644, upload-time = "2025-10-17T06:19:22.325Z" },
-    { url = "https://files.pythonhosted.org/packages/ce/45/012176ffee90059ae8ec7131019c71724ea472aa63e72c0c8edbd1fad1d7/crc32c-2.8-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:0450bb845b3c3c7b9bdc0b4e95620ec9a40824abdc8c86d6285c919a90743c1a", size = 79919, upload-time = "2025-10-17T06:19:23.101Z" },
-    { url = "https://files.pythonhosted.org/packages/f0/2b/f557629842f9dec2b3461cb3a0d854bb586ec45b814cea58b082c32f0dde/crc32c-2.8-cp313-cp313-win32.whl", hash = "sha256:765d220bfcbcffa6598ac11eb1e10af0ee4802b49fe126aa6bf79f8ddb9931d1", size = 64896, upload-time = "2025-10-17T06:19:23.88Z" },
-    { url = "https://files.pythonhosted.org/packages/d0/db/fd0f698c15d1e21d47c64181a98290665a08fcbb3940cd559e9c15bda57e/crc32c-2.8-cp313-cp313-win_amd64.whl", hash = "sha256:171ff0260d112c62abcce29332986950a57bddee514e0a2418bfde493ea06bb3", size = 66646, upload-time = "2025-10-17T06:19:24.702Z" },
-    { url = "https://files.pythonhosted.org/packages/db/b9/8e5d7054fe8e7eecab10fd0c8e7ffb01439417bdb6de1d66a81c38fc4a20/crc32c-2.8-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:b977a32a3708d6f51703c8557008f190aaa434d7347431efb0e86fcbe78c2a50", size = 66203, upload-time = "2025-10-17T06:19:25.872Z" },
-    { url = "https://files.pythonhosted.org/packages/55/5f/cc926c70057a63cc0c98a3c8a896eb15fc7e74d3034eadd53c94917c6cc3/crc32c-2.8-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:7399b01db4adaf41da2fb36fe2408e75a8d82a179a9564ed7619412e427b26d6", size = 62956, upload-time = "2025-10-17T06:19:26.652Z" },
-    { url = "https://files.pythonhosted.org/packages/a1/8a/0660c44a2dd2cb6ccbb529eb363b9280f5c766f1017bc8355ed8d695bd94/crc32c-2.8-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:4379f73f9cdad31958a673d11a332ec725ca71572401ca865867229f5f15e853", size = 61442, upload-time = "2025-10-17T06:19:27.74Z" },
-    { url = "https://files.pythonhosted.org/packages/f5/5a/6108d2dfc0fe33522ce83ba07aed4b22014911b387afa228808a278e27cd/crc32c-2.8-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:2e68264555fab19bab08331550dab58573e351a63ed79c869d455edd3b0aa417", size = 79109, upload-time = "2025-10-17T06:19:28.535Z" },
-    { url = "https://files.pythonhosted.org/packages/84/1e/c054f9e390090c197abf3d2936f4f9effaf0c6ee14569ae03d6ddf86958a/crc32c-2.8-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b48f2486727b8d0e7ccbae4a34cb0300498433d2a9d6b49cb13cb57c2e3f19cb", size = 80987, upload-time = "2025-10-17T06:19:29.305Z" },
-    { url = "https://files.pythonhosted.org/packages/c8/ad/1650e5c3341e4a485f800ea83116d72965030c5d48ccc168fcc685756e4d/crc32c-2.8-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:ecf123348934a086df8c8fde7f9f2d716d523ca0707c5a1367b8bb00d8134823", size = 79994, upload-time = "2025-10-17T06:19:30.109Z" },
-    { url = "https://files.pythonhosted.org/packages/d7/3b/f2ed924b177729cbb2ab30ca2902abff653c31d48c95e7b66717a9ca9fcc/crc32c-2.8-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:e636ac60f76de538f7a2c0d0f3abf43104ee83a8f5e516f6345dc283ed1a4df7", size = 79046, upload-time = "2025-10-17T06:19:30.894Z" },
-    { url = "https://files.pythonhosted.org/packages/4b/80/413b05ee6ace613208b31b3670c3135ee1cf451f0e72a9c839b4946acc04/crc32c-2.8-cp313-cp313t-win32.whl", hash = "sha256:8dd4a19505e0253892e1b2f1425cc3bd47f79ae5a04cb8800315d00aad7197f2", size = 64837, upload-time = "2025-10-17T06:19:32.03Z" },
-    { url = "https://files.pythonhosted.org/packages/3b/1b/85eddb6ac5b38496c4e35c20298aae627970c88c3c624a22ab33e84f16c7/crc32c-2.8-cp313-cp313t-win_amd64.whl", hash = "sha256:4bb18e4bd98fb266596523ffc6be9c5b2387b2fa4e505ec56ca36336f49cb639", size = 66574, upload-time = "2025-10-17T06:19:33.143Z" },
-    { url = "https://files.pythonhosted.org/packages/aa/df/50e9079b532ff53dbfc0e66eed781374bd455af02ed5df8b56ad538de4ff/crc32c-2.8-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:3a3b2e4bcf7b3ee333050e7d3ff38e2ba46ea205f1d73d8949b248aaffe937ac", size = 66399, upload-time = "2025-10-17T06:19:34.279Z" },
-    { url = "https://files.pythonhosted.org/packages/5a/2e/67e3b0bc3d30e46ea5d16365cc81203286387671e22f2307eb41f19abb9c/crc32c-2.8-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:445e559e66dff16be54f8a4ef95aa6b01db799a639956d995c5498ba513fccc2", size = 63044, upload-time = "2025-10-17T06:19:35.062Z" },
-    { url = "https://files.pythonhosted.org/packages/36/ea/1723b17437e4344ed8d067456382ecb1f5b535d83fdc5aaebab676c6d273/crc32c-2.8-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:bf3040919e17afa5782e01b1875d6a05f44b8f19c05f211d8b9f8a1deb8bbd9c", size = 61541, upload-time = "2025-10-17T06:19:36.204Z" },
-    { url = "https://files.pythonhosted.org/packages/4c/6a/cbec8a235c5b46a01f319939b538958662159aec0ed3a74944e3a6de21f1/crc32c-2.8-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:5607ab8221e1ffd411f64aa40dbb6850cf06dd2908c9debd05d371e1acf62ff3", size = 80139, upload-time = "2025-10-17T06:19:37.351Z" },
-    { url = "https://files.pythonhosted.org/packages/21/31/d096722fe74b692d6e8206c27da1ea5f6b2a12ff92c54a62a6ba2f376254/crc32c-2.8-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c7f5db4f16816926986d3c94253314920689706ae13a9bf4888b47336c6735ce", size = 81736, upload-time = "2025-10-17T06:19:38.16Z" },
-    { url = "https://files.pythonhosted.org/packages/f6/a2/f75ef716ff7e3c22f385ba6ef30c5de80c19a21ebe699dc90824a1903275/crc32c-2.8-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:70b0153c4d418b673309d3529334d117e1074c4a3b2d7f676e430d72c14de67b", size = 80795, upload-time = "2025-10-17T06:19:38.948Z" },
-    { url = "https://files.pythonhosted.org/packages/d8/94/6d647a12d96ab087d9b8eacee3da073f981987827d57c7072f89ffc7b6cd/crc32c-2.8-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:5c8933531442042438753755a5c8a9034e4d88b01da9eb796f7e151b31a7256c", size = 80042, upload-time = "2025-10-17T06:19:39.725Z" },
-    { url = "https://files.pythonhosted.org/packages/cd/dc/32b8896b40a0afee7a3c040536d0da5a73e68df2be9fadd21770fd158e16/crc32c-2.8-cp314-cp314-win32.whl", hash = "sha256:cdc83a3fe6c4e5df9457294cfd643de7d95bd4e9382c1dd6ed1e0f0f9169172c", size = 64914, upload-time = "2025-10-17T06:19:40.527Z" },
-    { url = "https://files.pythonhosted.org/packages/f2/b4/4308b27d307e8ecaf8dd1dcc63bbb0e47ae1826d93faa3e62d1ee00ee2d5/crc32c-2.8-cp314-cp314-win_amd64.whl", hash = "sha256:509e10035106df66770fe24b9eb8d9e32b6fb967df17744402fb67772d8b2bc7", size = 66723, upload-time = "2025-10-17T06:19:42.449Z" },
-    { url = "https://files.pythonhosted.org/packages/90/d5/a19d2489fa997a143bfbbf971a5c9a43f8b1ba9e775b1fb362d8fb15260c/crc32c-2.8-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:864359a39777a07b09b28eb31337c0cc603d5c1bf0fc328c3af736a8da624ec0", size = 66201, upload-time = "2025-10-17T06:19:43.273Z" },
-    { url = "https://files.pythonhosted.org/packages/98/c2/5f82f22d2c1242cb6f6fe92aa9a42991ebea86de994b8f9974d9c1d128e2/crc32c-2.8-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:14511d7cfc5d9f5e1a6c6b64caa6225c2bdc1ed00d725e9a374a3e84073ce180", size = 62956, upload-time = "2025-10-17T06:19:44.099Z" },
-    { url = "https://files.pythonhosted.org/packages/9b/61/3d43d33489cf974fb78bfb3500845770e139ae6d1d83473b660bd8f79a6c/crc32c-2.8-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:918b7999b52b5dcbcea34081e9a02d46917d571921a3f209956a9a429b2e06e5", size = 61443, upload-time = "2025-10-17T06:19:44.89Z" },
-    { url = "https://files.pythonhosted.org/packages/52/6d/f306ce64a352a3002f76b0fc88a1373f4541f9d34fad3668688610bab14b/crc32c-2.8-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:cc445da03fc012a5a03b71da1df1b40139729e6a5571fd4215ab40bfb39689c7", size = 79106, upload-time = "2025-10-17T06:19:45.688Z" },
-    { url = "https://files.pythonhosted.org/packages/a5/b7/1f74965dd7ea762954a69d172dfb3a706049c84ffa45d31401d010a4a126/crc32c-2.8-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1e3dde2ec59a8a830511d72a086ead95c0b0b7f0d418f93ea106244c5e77e350", size = 80983, upload-time = "2025-10-17T06:19:46.792Z" },
-    { url = "https://files.pythonhosted.org/packages/1b/50/af93f0d91ccd61833ce77374ebfbd16f5805f5c17d18c6470976d9866d76/crc32c-2.8-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:61d51681a08b6a2a2e771b7f0cd1947fb87cb28f38ed55a01cb7c40b2ac4cdd8", size = 80009, upload-time = "2025-10-17T06:19:47.619Z" },
-    { url = "https://files.pythonhosted.org/packages/ee/fa/94f394beb68a88258af694dab2f1284f55a406b615d7900bdd6235283bc4/crc32c-2.8-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:67c0716c3b1a02d5235be649487b637eed21f2d070f2b3f63f709dcd2fefb4c7", size = 79066, upload-time = "2025-10-17T06:19:48.409Z" },
-    { url = "https://files.pythonhosted.org/packages/91/c6/a6050e0c64fd73c67a97da96cb59f08b05111e00b958fb87ecdce99f17ac/crc32c-2.8-cp314-cp314t-win32.whl", hash = "sha256:2e8fe863fbbd8bdb6b414a2090f1b0f52106e76e9a9c96a413495dbe5ebe492a", size = 64869, upload-time = "2025-10-17T06:19:49.197Z" },
-    { url = "https://files.pythonhosted.org/packages/08/1f/c7735034e401cb1ea14f996a224518e3a3fa9987cb13680e707328a7d779/crc32c-2.8-cp314-cp314t-win_amd64.whl", hash = "sha256:20a9cfb897693eb6da19e52e2a7be2026fd4d9fc8ae318f086c0d71d5dd2d8e0", size = 66633, upload-time = "2025-10-17T06:19:50.003Z" },
-    { url = "https://files.pythonhosted.org/packages/a7/1d/dd926c68eb8aac8b142a1a10b8eb62d95212c1cf81775644373fe7cceac2/crc32c-2.8-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:5833f4071da7ea182c514ba17d1eee8aec3c5be927d798222fbfbbd0f5eea02c", size = 62345, upload-time = "2025-10-17T06:20:09.39Z" },
-    { url = "https://files.pythonhosted.org/packages/51/be/803404e5abea2ef2c15042edca04bbb7f625044cca879e47f186b43887c2/crc32c-2.8-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:1dc4da036126ac07b39dd9d03e93e585ec615a2ad28ff12757aef7de175295a8", size = 61229, upload-time = "2025-10-17T06:20:10.236Z" },
-    { url = "https://files.pythonhosted.org/packages/fc/3a/00cc578cd27ed0b22c9be25cef2c24539d92df9fa80ebd67a3fc5419724c/crc32c-2.8-pp311-pypy311_pp73-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:15905fa78344654e241371c47e6ed2411f9eeb2b8095311c68c88eccf541e8b4", size = 64108, upload-time = "2025-10-17T06:20:11.072Z" },
-    { url = "https://files.pythonhosted.org/packages/6b/bc/0587ef99a1c7629f95dd0c9d4f3d894de383a0df85831eb16c48a6afdae4/crc32c-2.8-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c596f918688821f796434e89b431b1698396c38bf0b56de873621528fe3ecb1e", size = 64815, upload-time = "2025-10-17T06:20:11.919Z" },
-    { url = "https://files.pythonhosted.org/packages/73/42/94f2b8b92eae9064fcfb8deef2b971514065bd606231f8857ff8ae02bebd/crc32c-2.8-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:8d23c4fe01b3844cb6e091044bc1cebdef7d16472e058ce12d9fadf10d2614af", size = 66659, upload-time = "2025-10-17T06:20:12.766Z" },
-]
-
 [[package]]
 name = "cryptography"
 version = "42.0.8"
@@ -1207,40 +1098,40 @@ wheels = [
 
 [[package]]
 name = "cython"
-version = "3.2.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/52/82/01f0b63287cb922e5ba96c5147c30f1e51f541ce91bd178025bb3518b1ba/cython-3.2.0.tar.gz", hash = "sha256:41fdce8237baee2d961c292ed0386903dfe126f131e450a62de0fd7a5280d4b2", size = 3267264, upload-time = "2025-11-05T13:35:04.231Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/57/8d/b2e9578d960d38b1b04a278bf66e13008486aa73e73967186f2015d63d1c/cython-3.2.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ee408125b2d218ec7d7a061e09d24715fcab9bf7ea1a4ac01907c3f8ec8730b3", size = 2953775, upload-time = "2025-11-05T13:35:22.291Z" },
-    { url = "https://files.pythonhosted.org/packages/19/dd/cfd684f98bac9e0f505af1cbb7998498c59d713275e920a72b40dab03bfa/cython-3.2.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c93ce307b05fcd86a5bb0e4a7d7fab238e2f0e9936636097a60bc0e21f2def30", size = 3361627, upload-time = "2025-11-05T13:35:24.519Z" },
-    { url = "https://files.pythonhosted.org/packages/9c/c1/75acdbe9f6292514f0bb92ab1b78df5eedd7049235f4cbd194d2c6c46bfc/cython-3.2.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:191cfc2fa84642ad41a52d5abaacfb330d9a6653a465e4bf0a5681f66197a967", size = 3529751, upload-time = "2025-11-05T13:35:26.341Z" },
-    { url = "https://files.pythonhosted.org/packages/f2/ce/d0468eb6d87b956902b02909f5007ad61e3839d4c07ab235b514911d869b/cython-3.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:a259053037ef82959b743b7fde238bd191ee43f88eb8e51101d5f3d8849f1e32", size = 2758839, upload-time = "2025-11-05T13:35:28.36Z" },
-    { url = "https://files.pythonhosted.org/packages/ff/2b/904493fceda95747ba83971b40a66c8cc29ff009313429903f38ee620140/cython-3.2.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e9e4b2248dc3a98b86aeba65e9862d2cc881d072c163c0fb31b511d4d72e93c8", size = 2946248, upload-time = "2025-11-05T13:35:30.406Z" },
-    { url = "https://files.pythonhosted.org/packages/89/fe/abe926699fe6c580967e30bc4035da54b5e31355ba9b1f4c0cf574228a84/cython-3.2.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:02fb4990a83d5d6f780dda18ed8baa8d587cb6523f57b4d72bc0b41ad3766c96", size = 3236384, upload-time = "2025-11-05T13:35:32.233Z" },
-    { url = "https://files.pythonhosted.org/packages/1b/36/6b6266549802234286438298d494152deb19922a94928d9dcd256659ebd1/cython-3.2.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8a98925517819d62ea25d2cf40057df60a9bcf75fdd1d6ed3882e6ae0730d82f", size = 3372915, upload-time = "2025-11-05T13:35:34.082Z" },
-    { url = "https://files.pythonhosted.org/packages/29/fa/5cf15466b428f9248e38a28515cf0fd98078ae869aa395cfb300315964c4/cython-3.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:4c959a5d4cd6331e8498822ba47200bd2ff4bf74517c0c91475d5bc21da3b4d5", size = 2762735, upload-time = "2025-11-05T13:35:35.806Z" },
-    { url = "https://files.pythonhosted.org/packages/57/d3/2e6f5f2552c860bb9c00653d092103521846114f6a2ae0648ecf84c0816c/cython-3.2.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:511d823d9f8a1b850178ec355d6df0a1731b9c20b08ee6d1a780f68215e9013f", size = 2959932, upload-time = "2025-11-05T13:35:37.518Z" },
-    { url = "https://files.pythonhosted.org/packages/dd/bf/7bdc7f231fff6780f78586f939c1740475adecaa03bf256fcb62b2353952/cython-3.2.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bbadeedcb2d135655bcce7380fb28c9e2a75b6810426c12b6e5a6fe6106fafb4", size = 3218588, upload-time = "2025-11-05T13:35:39.642Z" },
-    { url = "https://files.pythonhosted.org/packages/be/81/7d7a81010897dc5abee59691f5fc85849dcc4c8a7687b22ed01bc8d86a7a/cython-3.2.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:92d2394a3e3fe704210b5324eb8118333b514af72c98b1e02a6503945825b231", size = 3381940, upload-time = "2025-11-05T13:35:41.886Z" },
-    { url = "https://files.pythonhosted.org/packages/4f/9d/35e7fb7b591bd9912685a772fcc773d7bb951a8feb6fb9be20addbc38928/cython-3.2.0-cp312-cp312-win_amd64.whl", hash = "sha256:73435e56654a34ece57d4c3304a4556a8402cc4ae2d0e30f71c237a985dc5246", size = 2750886, upload-time = "2025-11-05T13:35:43.629Z" },
-    { url = "https://files.pythonhosted.org/packages/5d/d0/dc4b260e8fde81b23ab4dca56948b3e69617ef470247ec6a3e09370a9849/cython-3.2.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:d900e58e826f9a5a27b0e2b50e33473e9986a5bae375c39b0f2e19f2c545fa23", size = 2950437, upload-time = "2025-11-05T13:35:45.427Z" },
-    { url = "https://files.pythonhosted.org/packages/c8/53/c322bf0486a938ad954a645866b67e978777d79183cf0a042bda6bea11de/cython-3.2.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a9d38cd3aab720d21fa6d6ee168228352f69aea0a95bd4fb84e8879c6ed38fbb", size = 3209331, upload-time = "2025-11-05T13:35:47.278Z" },
-    { url = "https://files.pythonhosted.org/packages/cd/48/55d02dba0606768d3450afd088e2bbcd6f8a54977dce041c2c3c1894631c/cython-3.2.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:92b31d0b7b0a49b3d2aa94faaf75d44a03174cff2616b341a8853c919e511d51", size = 3370974, upload-time = "2025-11-05T13:35:49.534Z" },
-    { url = "https://files.pythonhosted.org/packages/ce/bd/6dab19652b68464572b7a137d07a91ebe86db2a81c35842ff5e49ef23403/cython-3.2.0-cp313-cp313-win_amd64.whl", hash = "sha256:2847b74e76dbad612f6fc7182c12a5f78cffb0d05808fd2c4b638cf02d1aade6", size = 2746274, upload-time = "2025-11-05T13:35:51.522Z" },
-    { url = "https://files.pythonhosted.org/packages/e2/db/de5331ca6489da1761078825709257e1f24e543b4040f86a2502a4b841f9/cython-3.2.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:a0a8274959d538d12f865193dcd67bb5630906e020190c890d2b7c13d31713c6", size = 2961164, upload-time = "2025-11-05T13:35:53.826Z" },
-    { url = "https://files.pythonhosted.org/packages/54/3e/64e37e419331f7c4c540ad25c0b3e6d8f44d597f21ab8861afbc66aa7e02/cython-3.2.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0a1c800833c25195833805c7c3626a2c30b3baaaa9ba361a1af3bbc379662a8d", size = 3249627, upload-time = "2025-11-05T13:35:55.524Z" },
-    { url = "https://files.pythonhosted.org/packages/9b/fc/9faedfcc2de807f77115d97a4910c260dd4693f4fa9e0e3be0d9ae89e260/cython-3.2.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:df15af08c21c18a2e848df5954d6fd3310735089b60405132fa4111e2cf7482a", size = 3375458, upload-time = "2025-11-05T13:35:57.279Z" },
-    { url = "https://files.pythonhosted.org/packages/31/e0/30d449cd97ee0d6395aba18f2646b61b52ab3dc5a3851a346e2d363a7d85/cython-3.2.0-cp314-cp314-win_amd64.whl", hash = "sha256:9d6876af2132757fff1b42a2f4eaa72482f991863160e3f0dc8f2c812b300ebf", size = 2783210, upload-time = "2025-11-05T13:35:59.54Z" },
-    { url = "https://files.pythonhosted.org/packages/dd/6b/9e1e171fe19274465d84dffa4610d46f434b1ae945e946802db396695d67/cython-3.2.0-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:04821ce06598a3aa5c9e0270d98960cfe6556dedbd1418c65e4479162b8ae74a", size = 2869249, upload-time = "2025-11-05T13:36:08.944Z" },
-    { url = "https://files.pythonhosted.org/packages/c4/f1/f461726f664668a96072b2a245bdfae566d68e2eb1393ec72780cc59c21e/cython-3.2.0-cp39-abi3-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:54b5b1c72a63da822b3f4739a0e31546c0a19f8e834b174906bf817ed5f9d65f", size = 3204332, upload-time = "2025-11-05T13:36:11.386Z" },
-    { url = "https://files.pythonhosted.org/packages/78/d8/73c07ce64cae496e5f5a6dfe3e53574af1a8ef777e2a834d10dae8b67a4e/cython-3.2.0-cp39-abi3-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:6155a6c360e32af1aaa16fa10b0119b49deeadff42a1958973324150870af1b5", size = 2851317, upload-time = "2025-11-05T13:36:13.14Z" },
-    { url = "https://files.pythonhosted.org/packages/bc/d9/d9f321637b8034b5028fa5fe7d1085ffa9351fea350af6510d5cb924c014/cython-3.2.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:861258ac3878b76c57b9b5a379787d772a0bc47fec9167b43986777de542c474", size = 2987155, upload-time = "2025-11-05T13:36:15.018Z" },
-    { url = "https://files.pythonhosted.org/packages/f8/b5/9f9e7d261f083b4066d734b27a7872b0c584fd4c3578196652dbf72b3f62/cython-3.2.0-cp39-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:85dbf955e3193893d0288105afa0fa5f4e835ff587061681f240a4f0487c44fb", size = 2884219, upload-time = "2025-11-05T13:36:17.334Z" },
-    { url = "https://files.pythonhosted.org/packages/88/64/5aeb6e43e0ded9efedc5a516f87a487fdca8e434491cc352e5a805380459/cython-3.2.0-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:3b3f13822526726bac43275c0e92916bbcc2c30e9f559edc4c1132670b70498d", size = 3218067, upload-time = "2025-11-05T13:36:19.493Z" },
-    { url = "https://files.pythonhosted.org/packages/c4/a0/1958f54cd79d8251a330b9c9652b2a5ceba6a3fcec10782dd03e2a23c74f/cython-3.2.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:ab18d09673d219008be5b6174bcbb6dbfd50904e66371f104a8a4698b791472d", size = 3108277, upload-time = "2025-11-05T13:36:21.203Z" },
-    { url = "https://files.pythonhosted.org/packages/9c/84/9b8112160cab922b97edef00616ed18771567d88b5ba9d30d1736880c345/cython-3.2.0-cp39-abi3-win32.whl", hash = "sha256:c9fd986413fc52929b916187630a9abab9f876299951488c4b905ad5346afee6", size = 2430852, upload-time = "2025-11-05T13:36:23.049Z" },
-    { url = "https://files.pythonhosted.org/packages/8f/57/65d3de140b51c45dd6892846bfabdfaaa032e2418f1cb1a2f46058c1fe42/cython-3.2.0-cp39-abi3-win_arm64.whl", hash = "sha256:ee2ea79ddeb721f912e7efea039b9db059c81767ff04fbf9a995f64e1187df99", size = 2435793, upload-time = "2025-11-05T13:36:25.139Z" },
-    { url = "https://files.pythonhosted.org/packages/20/58/1f798ddb7fe6bfddf85f4f97d2d4ad63a491a7b643e85c1e274d0f09138e/cython-3.2.0-py3-none-any.whl", hash = "sha256:73f7f4c75acde5b5b4df05b11fdc2705ec637b99241d1bc2f4ebf345f7a2ea90", size = 1252818, upload-time = "2025-11-05T13:35:00.391Z" },
+version = "3.2.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/83/36/cce2972e13e83ffe58bc73bfd9d37340b5e5113e8243841a57511c7ae1c2/cython-3.2.1.tar.gz", hash = "sha256:2be1e4d0cbdf7f4cd4d9b8284a034e1989b59fd060f6bd4d24bf3729394d2ed8", size = 3270455, upload-time = "2025-11-12T19:02:59.847Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/87/74/f9fe9e7034f24aef407e7816880c012d8e863bedaa6b42b9ff33e79ea139/cython-3.2.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f1d10b3731171a33563ba81fdcba39c229e45087269dfbe07a1c00e7dcb2537f", size = 2957374, upload-time = "2025-11-12T19:03:10.132Z" },
+    { url = "https://files.pythonhosted.org/packages/65/47/f9dd519117f520aaf4d723c88fd9e9139262a0379edc01e71a1e9825e082/cython-3.2.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:92b814b6066d178a5057b557d372e2a03854e947e41cb9dec21db732fbd14c3c", size = 3366838, upload-time = "2025-11-12T19:03:11.742Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/3e/d967acfafef00056c3ba832692b9bb358ede2919f641e4a2d24828adacc6/cython-3.2.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c9fc6abd0532007827d8c6143b2bfedf80c7cb89a3c1c12f058336663489ed2e", size = 3535901, upload-time = "2025-11-12T19:03:13.545Z" },
+    { url = "https://files.pythonhosted.org/packages/68/79/bc46e714ecb010f80a8aa7f7eaf412c53cbabbe7489590d6aba5f4478ba5/cython-3.2.1-cp310-cp310-win_amd64.whl", hash = "sha256:14f1ed135347587cfddcd3c3219667cac4f0ea0b66aa1c4c0187d50a1b92c222", size = 2764043, upload-time = "2025-11-12T19:03:15.584Z" },
+    { url = "https://files.pythonhosted.org/packages/48/d4/ba7b9f341ec168de78bd659600e04bb7de3b2d069bf98b2178a135e88ea4/cython-3.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3cb32c650e7f4476941d1f735cae75a2067d5e3279576273bb8802e8ea907222", size = 2949720, upload-time = "2025-11-12T19:03:17.492Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/47/c42417f424c0b928361f48d7dd0ae72716ee21f647b73ceb16f66b98663e/cython-3.2.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8a2b306813d7f28aa0a2c3e4e63ada1427a8109917532df942cd5429db228252", size = 3242127, upload-time = "2025-11-12T19:03:19.227Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/fc/1040460889129551649ec35be45e05169871fbcf71bd8e13c533e86f9468/cython-3.2.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0959d9a36d4f004ce63acc1474b3c606745af98b65e8ae709efd0c10988e9d6b", size = 3377094, upload-time = "2025-11-12T19:03:21.25Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/f2/8c754298eefa40e21af0ae3592837c6e71254900d5aea1c8859e96b11de5/cython-3.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:60c62e734421365135cc2842013d883136054a26c617c001be494235edfc447a", size = 2767824, upload-time = "2025-11-12T19:03:23.317Z" },
+    { url = "https://files.pythonhosted.org/packages/ee/0e/19d5041b87f98ed19c94c388607cd27c1f7458078c3bad5de2dead55b2e1/cython-3.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ea5097d97afd2ab14e98637b7033eba5146de29a5dedf89f5e946076396ab891", size = 2966736, upload-time = "2025-11-12T19:03:25.064Z" },
+    { url = "https://files.pythonhosted.org/packages/84/b8/bcc36d9d2464348106984956608a52a42a01ab44ea64031207dffdebc078/cython-3.2.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a4bf12de0475bb6a21e2336a4a04dc4a2b4dd0507a2a3c703e045f3484266605", size = 3221633, upload-time = "2025-11-12T19:03:26.754Z" },
+    { url = "https://files.pythonhosted.org/packages/79/20/7d4807fe4ebcef9f20f2e5f93312d0f5d02f9f76524fd4e37706d04e83f7/cython-3.2.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:18c64a0f69a1b8164de70ec7efc72250c589fec21519170de21582300f6aaed9", size = 3389542, upload-time = "2025-11-12T19:03:28.656Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/92/b06ba6721299293bc41e89732070132c453bdbaaeabb8f8cc76851b75345/cython-3.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:5ba14907d5826d8010e82306ce279a0d3650f5b50a4813c80836a17b2213c520", size = 2755307, upload-time = "2025-11-12T19:03:30.684Z" },
+    { url = "https://files.pythonhosted.org/packages/40/28/c6e36c214baeb27ae45b518552e74457536c7c964b1a55b5900b047fa467/cython-3.2.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:b4e850fc7a2f72d19679dd083fe4d20bf66860fceabb4f3207112f240249d708", size = 2957307, upload-time = "2025-11-12T19:03:32.471Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/c8/b0b9ba64f81f2875c42aab5c0979d6454cd1ac6b3c1e2373ad552701565d/cython-3.2.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3d20ca4afe993f7dccad3aeddbf4c3536cb0fd3ad6dc7a225935a666a5655af2", size = 3210919, upload-time = "2025-11-12T19:03:34.274Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/33/5d9ca6abba0e77e1851b843dd1b3c4095fbc6373166935e83c4414f80e88/cython-3.2.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f5a54a757d01ca6a260b02ce5baf17d9db1c2253566ab5844ee4966ff2a69c19", size = 3373350, upload-time = "2025-11-12T19:03:35.927Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/29/4408c3486ff380a2d6ae0d4b71da5195efcef3c4360017113ee7d1cb7335/cython-3.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:1b81e56584727a328e00d91c164f8f0f2c59b02bf6857c3f000cd830fa571453", size = 2753425, upload-time = "2025-11-12T19:03:38.157Z" },
+    { url = "https://files.pythonhosted.org/packages/f0/32/c1aa03ccadda89487ff31b90d8651c3706ce2744bf4f2c2ae213147e89bd/cython-3.2.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:d7af6ad01c0fe1965d1d3badaeb6df53c1f37383ebae1ccb405b73f628f87713", size = 2967833, upload-time = "2025-11-12T19:03:40.233Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/dc/3488d3ade0635408a2ebb05561a3009e2f54616bfefd1f107088dfeb2c4c/cython-3.2.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e3ea7cd085b62acb67c0fbde5cd17a7d9e47992c965e81ec977cf9ea7c59cd65", size = 3256237, upload-time = "2025-11-12T19:03:42.005Z" },
+    { url = "https://files.pythonhosted.org/packages/7b/ba/f3d35d3803c9a424fa8812893847114deb9e2440c1bc67a31ab9ec4b9355/cython-3.2.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:986aea38fdf231e78d73745f83271c5654852c822dc5141a1d3fba64429a6aa6", size = 3383100, upload-time = "2025-11-12T19:03:43.675Z" },
+    { url = "https://files.pythonhosted.org/packages/86/dc/d72dbb2f8e7ca95d2d18fd86f32b2e385996576230e7ecddd7d250786825/cython-3.2.1-cp314-cp314-win_amd64.whl", hash = "sha256:4960e26cd34c1385f21646339f2e0361fcdd2ed3c01cdb50fe734add577ec56a", size = 2790322, upload-time = "2025-11-12T19:03:45.373Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/7e/1194f4ba98b981bbdca945a292e4f49e87ea09d69516b24445409e7cf611/cython-3.2.1-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:4e9167316bf6ecfea33dcca62f074605648fb93cc053ef46b5deb3e5d12fc0d3", size = 2872858, upload-time = "2025-11-12T19:03:55.074Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/1a/393ca8ffec7ad3f02b8e4bffaba3dba4fb62c4a1c4c0b6dbf3b80e709fe3/cython-3.2.1-cp39-abi3-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:3095df6cd470064742f428c937bed7200c5123b9e19ee04aa09ec61281e565a3", size = 3209664, upload-time = "2025-11-12T19:03:56.771Z" },
+    { url = "https://files.pythonhosted.org/packages/37/57/f209f64c609d3d8fac60a572e56da2f621dc1789e399c58db61d5645a31f/cython-3.2.1-cp39-abi3-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:db3f53b2d9afb206075a2605f1150aa019f0733c7795a38eccc6119c2e9c3f7b", size = 2854607, upload-time = "2025-11-12T19:03:59.413Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/af/1e5c73fe52423f40776130b0be914fd9f9f8dc26c4f6ea4c2ed04772d558/cython-3.2.1-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:0fc5e7687ac8f8e2b2fb95648f43e9e074ebaa72fd5cb3d8e20e5f1e8b8e02d9", size = 2991567, upload-time = "2025-11-12T19:04:02.209Z" },
+    { url = "https://files.pythonhosted.org/packages/39/2c/3ea175b6b1fdfb429f9e9c395240d894155b3c0615caced05fef43264cba/cython-3.2.1-cp39-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:bbb3bc152bc0de82b031c8d355418fa4890a92424209d59366c2c0bc9e6cf53c", size = 2889178, upload-time = "2025-11-12T19:04:05.272Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/88/b2ab22a3a3feac78c62354a823c5c0c33659909e9918f53aa05904532b4b/cython-3.2.1-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:a2022bc48ad0c2c0e0485bf0b54902913a3d81086b7d435f4437620c667799f6", size = 3223755, upload-time = "2025-11-12T19:04:07.262Z" },
+    { url = "https://files.pythonhosted.org/packages/0b/56/9ba58629a03cbffb5965a3c65ccd91fa683d95d588c21a875da72fdc249b/cython-3.2.1-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:99fdd4ffc2dcb513f4be9ce71c6fedd895b96b1f814655b6bbab196df497b090", size = 3113456, upload-time = "2025-11-12T19:04:09.175Z" },
+    { url = "https://files.pythonhosted.org/packages/56/5b/148c1a7ea5aebe460a70cad716a77e5fd0205be2de9fc5250491eb13ad8c/cython-3.2.1-cp39-abi3-win32.whl", hash = "sha256:06071f85bd5ce040464d43b2f9f287742a79f905e81b709fe904567230f1ed51", size = 2434223, upload-time = "2025-11-12T19:04:11.294Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/54/bb9b0c9db2a92a5e93747ca3027cfc645741411f8f1c6af2fb2a7b82df5d/cython-3.2.1-cp39-abi3-win_arm64.whl", hash = "sha256:e87c131d59480aee1ebac622b64f287c0e1d665ad1a1b7d498ac48accdb36c6b", size = 2439268, upload-time = "2025-11-12T19:04:12.931Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/30/373775b8d933d781d055c1dd0f110f275a101f320dab724c8c63a7c1b945/cython-3.2.1-py3-none-any.whl", hash = "sha256:cd72c46e7bffe8250c52d400e72c8d5d3086437b6aeec5b0eca99ccd337f5834", size = 1254219, upload-time = "2025-11-12T19:02:56.14Z" },
 ]
 
 [[package]]
@@ -1254,7 +1145,8 @@ dependencies = [
     { name = "httpx" },
     { name = "huggingface-hub" },
     { name = "multiprocess" },
-    { name = "numpy" },
+    { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.3.5", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "packaging" },
     { name = "pandas" },
     { name = "pyarrow" },
@@ -1291,8 +1183,7 @@ name = "deprecated"
 version = "1.3.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "wrapt", version = "1.17.3", source = { registry = "https://pypi.org/simple" }, marker = "extra == 'extra-13-megatron-core-dev'" },
-    { name = "wrapt", version = "2.0.0", source = { registry = "https://pypi.org/simple" }, marker = "extra == 'extra-13-megatron-core-lts' or extra != 'extra-13-megatron-core-dev'" },
+    { name = "wrapt" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/49/85/12f0a49a7c4ffb70572b6c2ef13c90c88fd190debda93b23f026b25f9634/deprecated-1.3.1.tar.gz", hash = "sha256:b1b50e0ff0c1fddaa5708a2c6b0a6588bb09b892825ab2b214ac9ea9d92a5223", size = 2932523, upload-time = "2025-10-30T08:19:02.757Z" }
 wheels = [
@@ -1340,18 +1231,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/8f/d7/9322c609343d929e75e7e5e6255e614fcc67572cfd083959cdef3b7aad79/docutils-0.21.2-py3-none-any.whl", hash = "sha256:dafca5b9e384f0e419294eb4d2ff9fa826435bf15f15b7bd45723e8ad76811b2", size = 587408, upload-time = "2024-04-23T18:57:14.835Z" },
 ]
 
-[[package]]
-name = "donfig"
-version = "0.8.1.post1"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "pyyaml", marker = "python_full_version >= '3.11'" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/25/71/80cc718ff6d7abfbabacb1f57aaa42e9c1552bfdd01e64ddd704e4a03638/donfig-0.8.1.post1.tar.gz", hash = "sha256:3bef3413a4c1c601b585e8d297256d0c1470ea012afa6e8461dc28bfb7c23f52", size = 19506, upload-time = "2024-05-23T14:14:31.513Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/0c/d5/c5db1ea3394c6e1732fb3286b3bd878b59507a8f77d32a2cebda7d7b7cd4/donfig-0.8.1.post1-py3-none-any.whl", hash = "sha256:2a3175ce74a06109ff9307d90a230f81215cbac9a751f4d1c6194644b8204f9d", size = 21592, upload-time = "2024-05-23T14:13:55.283Z" },
-]
-
 [[package]]
 name = "ebmlite"
 version = "3.4.1"
@@ -1382,14 +1261,14 @@ dependencies = [
 
 [[package]]
 name = "exceptiongroup"
-version = "1.3.0"
+version = "1.3.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "typing-extensions", marker = "python_full_version < '3.13'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/0b/9f/a65090624ecf468cdca03533906e7c69ed7588582240cfe7cc9e770b50eb/exceptiongroup-1.3.0.tar.gz", hash = "sha256:b241f5885f560bc56a59ee63ca4c6a8bfa46ae4ad651af316d4e81817bb9fd88", size = 29749, upload-time = "2025-05-10T17:42:51.123Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/50/79/66800aadf48771f6b62f7eb014e352e5d06856655206165d775e675a02c9/exceptiongroup-1.3.1.tar.gz", hash = "sha256:8b412432c6055b0b7d14c310000ae93352ed6754f70fa8f7c34141f91c4e3219", size = 30371, upload-time = "2025-11-21T23:01:54.787Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/36/f4/c6e662dade71f56cd2f3735141b265c3c79293c109549c1e6933b0651ffc/exceptiongroup-1.3.0-py3-none-any.whl", hash = "sha256:4d111e6e0c13d0644cad6ddaa7ed0261a0b36971f6d23e7ec9b4b9097da78a10", size = 16674, upload-time = "2025-05-10T17:42:49.33Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/0e/97c33bf5009bdbac74fd2beace167cab3f978feb69cc36f1ef79360d6c4e/exceptiongroup-1.3.1-py3-none-any.whl", hash = "sha256:a7a39a3bd276781e98394987d3a5701d0c4edffb633bb7a5144577f82c773598", size = 16740, upload-time = "2025-11-21T23:01:53.443Z" },
 ]
 
 [[package]]
@@ -1409,7 +1288,7 @@ wheels = [
 
 [[package]]
 name = "fastapi"
-version = "0.121.0"
+version = "0.122.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "annotated-doc" },
@@ -1417,18 +1296,9 @@ dependencies = [
     { name = "starlette" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/8c/e3/77a2df0946703973b9905fd0cde6172c15e0781984320123b4f5079e7113/fastapi-0.121.0.tar.gz", hash = "sha256:06663356a0b1ee93e875bbf05a31fb22314f5bed455afaaad2b2dad7f26e98fa", size = 342412, upload-time = "2025-11-03T10:25:54.818Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/dd/2c/42277afc1ba1a18f8358561eee40785d27becab8f80a1f945c0a3051c6eb/fastapi-0.121.0-py3-none-any.whl", hash = "sha256:8bdf1b15a55f4e4b0d6201033da9109ea15632cb76cf156e7b8b4019f2172106", size = 109183, upload-time = "2025-11-03T10:25:53.27Z" },
-]
-
-[[package]]
-name = "fasteners"
-version = "0.20"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/2d/18/7881a99ba5244bfc82f06017316ffe93217dbbbcfa52b887caa1d4f2a6d3/fasteners-0.20.tar.gz", hash = "sha256:55dce8792a41b56f727ba6e123fcaee77fd87e638a6863cec00007bfea84c8d8", size = 25087, upload-time = "2025-08-11T10:19:37.785Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/b2/de/3ee97a4f6ffef1fb70bf20561e4f88531633bb5045dc6cebc0f8471f764d/fastapi-0.122.0.tar.gz", hash = "sha256:cd9b5352031f93773228af8b4c443eedc2ac2aa74b27780387b853c3726fb94b", size = 346436, upload-time = "2025-11-24T19:17:47.95Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/51/ac/e5d886f892666d2d1e5cb8c1a41146e1d79ae8896477b1153a21711d3b44/fasteners-0.20-py3-none-any.whl", hash = "sha256:9422c40d1e350e4259f509fb2e608d6bc43c0136f79a00db1b49046029d0b3b7", size = 18702, upload-time = "2025-08-11T10:19:35.716Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/93/aa8072af4ff37b795f6bbf43dcaf61115f40f49935c7dbb180c9afc3f421/fastapi-0.122.0-py3-none-any.whl", hash = "sha256:a456e8915dfc6c8914a50d9651133bd47ec96d331c5b44600baa635538a30d67", size = 110671, upload-time = "2025-11-24T19:17:45.96Z" },
 ]
 
 [[package]]
@@ -1513,14 +1383,15 @@ source = { git = "https://github.com/deepseek-ai/FlashMLA?rev=9edee0c022cd093814
 
 [[package]]
 name = "flashinfer-python"
-version = "0.5.1"
+version = "0.5.3"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "apache-tvm-ffi" },
     { name = "click" },
     { name = "einops" },
     { name = "ninja" },
-    { name = "numpy" },
+    { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.3.5", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "nvidia-cudnn-frontend" },
     { name = "nvidia-cutlass-dsl" },
     { name = "nvidia-ml-py" },
@@ -1530,9 +1401,9 @@ dependencies = [
     { name = "torch", marker = "sys_platform == 'never'" },
     { name = "tqdm" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/6c/bb/897c3b9d683dcf6490f70e468efb585eebcd673970b13a04ed947b491982/flashinfer_python-0.5.1.tar.gz", hash = "sha256:f12b32d88d8cc10a396456df8ab017f1c4661fbf257e14f4d2461961ec0d090e", size = 4627606, upload-time = "2025-11-04T05:55:02.376Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/b4/91/cca69baeff24bb3efd12c7479a026432c8717ee47193694010494c528b22/flashinfer_python-0.5.3.tar.gz", hash = "sha256:100d59b0ede47878d2808cd3a1b9039d7a952d66338bc9f68dac192ae1b2e3f1", size = 4682367, upload-time = "2025-11-20T21:22:46.976Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/f4/f1/33dedad087a2bc3d66244126bd5d1c79721ea22d1f2124299f9e5bdaf3b1/flashinfer_python-0.5.1-py3-none-any.whl", hash = "sha256:ec8434d21e53a0ec333734a3c61946a0f7d2f972e344aefa99ba5b87e63aa76a", size = 6932706, upload-time = "2025-11-04T05:55:00.335Z" },
+    { url = "https://files.pythonhosted.org/packages/76/78/6dc7e7da8cb87c9965644ea0d2439457a1bc9256c45ceda0044595be4143/flashinfer_python-0.5.3-py3-none-any.whl", hash = "sha256:b601293b72f9138bad173edc28df84b9f239a013be974e2e79d4ba98aeb38cf5", size = 6998069, upload-time = "2025-11-20T21:22:45.104Z" },
 ]
 
 [[package]]
@@ -1820,7 +1691,7 @@ wheels = [
 
 [[package]]
 name = "hatchling"
-version = "1.27.0"
+version = "1.28.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "packaging" },
@@ -1829,9 +1700,9 @@ dependencies = [
     { name = "tomli", marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "trove-classifiers" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/8f/8a/cc1debe3514da292094f1c3a700e4ca25442489731ef7c0814358816bb03/hatchling-1.27.0.tar.gz", hash = "sha256:971c296d9819abb3811112fc52c7a9751c8d381898f36533bb16f9791e941fd6", size = 54983, upload-time = "2024-12-15T17:08:11.894Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/0b/8e/e480359492affde4119a131da729dd26da742c2c9b604dff74836e47eef9/hatchling-1.28.0.tar.gz", hash = "sha256:4d50b02aece6892b8cd0b3ce6c82cb218594d3ec5836dbde75bf41a21ab004c8", size = 55365, upload-time = "2025-11-27T00:31:13.766Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/08/e7/ae38d7a6dfba0533684e0b2136817d667588ae3ec984c1a4e5df5eb88482/hatchling-1.27.0-py3-none-any.whl", hash = "sha256:d3a2f3567c4f926ea39849cdf924c7e99e6686c9c8e288ae1037c8fa2a5d937b", size = 75794, upload-time = "2024-12-15T17:08:10.364Z" },
+    { url = "https://files.pythonhosted.org/packages/0d/a5/48cb7efb8b4718b1a4c0c331e3364a3a33f614ff0d6afd2b93ee883d3c47/hatchling-1.28.0-py3-none-any.whl", hash = "sha256:dc48722b68b3f4bbfa3ff618ca07cdea6750e7d03481289ffa8be1521d18a961", size = 76075, upload-time = "2025-11-27T00:31:12.544Z" },
 ]
 
 [[package]]
@@ -1956,74 +1827,14 @@ wheels = [
 name = "importlib-metadata"
 version = "8.6.1"
 source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform == 'linux'",
-    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform == 'linux'",
-    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform == 'linux'",
-    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform == 'linux'",
-    "python_full_version == '3.12.*' and sys_platform == 'linux'",
-    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform != 'linux'",
-    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform != 'linux'",
-    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform != 'linux'",
-    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform != 'linux'",
-    "python_full_version == '3.12.*' and sys_platform != 'linux'",
-    "python_full_version == '3.11.*' and sys_platform == 'linux'",
-    "python_full_version == '3.11.*' and sys_platform != 'linux'",
-    "python_full_version < '3.11' and sys_platform == 'linux'",
-    "python_full_version < '3.11' and sys_platform != 'linux'",
-]
 dependencies = [
-    { name = "zipp", marker = "extra == 'extra-13-megatron-core-dev'" },
+    { name = "zipp" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/33/08/c1395a292bb23fd03bdf572a1357c5a733d3eecbab877641ceacab23db6e/importlib_metadata-8.6.1.tar.gz", hash = "sha256:310b41d755445d74569f993ccfc22838295d9fe005425094fad953d7f15c8580", size = 55767, upload-time = "2025-01-20T22:21:30.429Z" }
 wheels = [
     { url = "https://files.pythonhosted.org/packages/79/9d/0fb148dc4d6fa4a7dd1d8378168d9b4cd8d4560a6fbf6f0121c5fc34eb68/importlib_metadata-8.6.1-py3-none-any.whl", hash = "sha256:02a89390c1e15fdfdc0d7c6b25cb3e62650d0494005c97d6f148bf5b9787525e", size = 26971, upload-time = "2025-01-20T22:21:29.177Z" },
 ]
 
-[[package]]
-name = "importlib-metadata"
-version = "8.7.0"
-source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.12.*' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.12.*' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.11.*' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.11.*' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version < '3.11' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version < '3.11' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.12.*' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.12.*' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.12.*' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.12.*' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.11.*' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.11.*' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version < '3.11' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version < '3.11' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-]
-dependencies = [
-    { name = "zipp", marker = "extra == 'extra-13-megatron-core-lts' or extra != 'extra-13-megatron-core-dev'" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/76/66/650a33bd90f786193e4de4b3ad86ea60b53c89b669a5c7be931fac31cdb0/importlib_metadata-8.7.0.tar.gz", hash = "sha256:d13b81ad223b890aa16c5471f2ac3056cf76c5f10f82d6f9292f0b415f389000", size = 56641, upload-time = "2025-04-27T15:29:01.736Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/20/b0/36bd937216ec521246249be3bf9855081de4c5e06a0c9b4219dbeda50373/importlib_metadata-8.7.0-py3-none-any.whl", hash = "sha256:e5dd1551894c77868a30651cef00984d50e1002d06942a7101d34870c5f02afd", size = 27656, upload-time = "2025-04-27T15:29:00.214Z" },
-]
-
 [[package]]
 name = "iniconfig"
 version = "2.3.0"
@@ -2150,7 +1961,7 @@ wheels = [
 
 [[package]]
 name = "leptonai"
-version = "0.26.6"
+version = "0.26.7"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "anyio" },
@@ -2175,7 +1986,7 @@ dependencies = [
     { name = "uvicorn" },
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/68/b4/e29dfe5a6e63a0e55fc26115a8eef55fbbc004c7677544bbd88798e1c003/leptonai-0.26.6-py3-none-any.whl", hash = "sha256:e76846b52d6ffc186b26a1fa40ebf0432eb1d8108dda1fb2f7785a1f25c803c2", size = 2443372, upload-time = "2025-09-23T08:04:27.984Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/4d/2b5ab13294b23326ba1d8ef6ad703b1d9535bf72a0617030ddd6238eb925/leptonai-0.26.7-py3-none-any.whl", hash = "sha256:74996da36bf177d2b148887dd349627ab8cd78b94623d543bc91ed9ad65ba0e2", size = 2452890, upload-time = "2025-11-07T20:07:14.99Z" },
 ]
 
 [[package]]
@@ -2414,7 +2225,8 @@ wheels = [
 name = "megatron-core"
 source = { editable = "." }
 dependencies = [
-    { name = "numpy" },
+    { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.3.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "packaging" },
     { name = "torch", marker = "sys_platform == 'never' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
@@ -2425,6 +2237,7 @@ dev = [
     { name = "causal-conv1d" },
     { name = "einops" },
     { name = "emerging-optimizers" },
+    { name = "fastapi" },
     { name = "flash-linear-attention" },
     { name = "flashinfer-python" },
     { name = "mamba-ssm" },
@@ -2434,27 +2247,31 @@ dev = [
     { name = "nvidia-modelopt", marker = "(sys_platform != 'darwin' and extra == 'extra-13-megatron-core-dev') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "nvidia-resiliency-ext" },
     { name = "nvtx" },
-    { name = "onnxscript", version = "0.5.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.13' and extra == 'extra-13-megatron-core-dev') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "onnxscript", version = "0.5.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.13' and extra == 'extra-13-megatron-core-dev') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "onnxscript" },
     { name = "opentelemetry-api" },
-    { name = "setuptools" },
-    { name = "tensorstore", version = "0.1.74", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.13' and extra == 'extra-13-megatron-core-dev') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "tensorstore", version = "0.1.78", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.13' and extra == 'extra-13-megatron-core-dev') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "tensorstore", version = "0.1.78", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "tensorstore", version = "0.1.79", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "tqdm" },
-    { name = "transformer-engine", marker = "extra == 'extra-13-megatron-core-dev'" },
+    { name = "transformer-engine", extra = ["core-cu13", "pytorch"], marker = "extra == 'extra-13-megatron-core-dev'" },
     { name = "wget" },
 ]
 lts = [
+    { name = "av" },
+    { name = "causal-conv1d" },
     { name = "einops" },
+    { name = "fastapi" },
+    { name = "flashinfer-python" },
+    { name = "mamba-ssm" },
+    { name = "megatron-energon", extra = ["av-decode"], marker = "extra == 'extra-13-megatron-core-lts'" },
+    { name = "multi-storage-client" },
+    { name = "nv-grouped-gemm" },
     { name = "nvtx" },
-    { name = "setuptools" },
-    { name = "tensorstore", version = "0.1.74", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.13' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "tensorstore", version = "0.1.78", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.13' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "onnxscript" },
+    { name = "opentelemetry-api" },
+    { name = "tensorstore", version = "0.1.78", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "tensorstore", version = "0.1.79", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "tqdm" },
-    { name = "transformers" },
     { name = "wget" },
-    { name = "zarr", version = "2.18.3", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "zarr", version = "3.1.3", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
 mlm = [
     { name = "flask-restful" },
@@ -2489,9 +2306,6 @@ docs = [
     { name = "sphinx-autodoc2" },
     { name = "sphinx-copybutton" },
 ]
-flash-mla = [
-    { name = "flash-mla" },
-]
 linting = [
     { name = "black" },
     { name = "flake8" },
@@ -2499,6 +2313,10 @@ linting = [
     { name = "pylint" },
     { name = "ruff" },
 ]
+no-pypi-wheels = [
+    { name = "emerging-optimizers" },
+    { name = "flash-mla" },
+]
 test = [
     { name = "coverage" },
     { name = "nemo-run" },
@@ -2512,48 +2330,54 @@ test = [
     { name = "pytest-random-order" },
     { name = "pyyaml" },
     { name = "tensorboard" },
-    { name = "wrapt", version = "1.17.3", source = { registry = "https://pypi.org/simple" }, marker = "extra == 'extra-13-megatron-core-dev'" },
-    { name = "wrapt", version = "2.0.0", source = { registry = "https://pypi.org/simple" }, marker = "extra == 'extra-13-megatron-core-lts' or extra != 'extra-13-megatron-core-dev'" },
+    { name = "wrapt" },
 ]
 
 [package.metadata]
 requires-dist = [
-    { name = "av", marker = "extra == 'dev'", specifier = "<16.0.0" },
+    { name = "av", marker = "extra == 'dev'" },
+    { name = "av", marker = "extra == 'lts'" },
     { name = "causal-conv1d", marker = "extra == 'dev'", specifier = "~=1.5" },
+    { name = "causal-conv1d", marker = "extra == 'lts'", specifier = "~=1.5" },
     { name = "einops", marker = "extra == 'dev'", specifier = "~=0.8" },
-    { name = "einops", marker = "extra == 'lts'" },
+    { name = "einops", marker = "extra == 'lts'", specifier = "~=0.8" },
     { name = "emerging-optimizers", marker = "extra == 'dev'", git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git?rev=v0.1.0" },
+    { name = "fastapi", marker = "extra == 'dev'", specifier = "~=0.50" },
+    { name = "fastapi", marker = "extra == 'lts'", specifier = "~=0.50" },
     { name = "flash-linear-attention", marker = "extra == 'dev'", specifier = "~=0.3.2" },
     { name = "flashinfer-python", marker = "extra == 'dev'" },
+    { name = "flashinfer-python", marker = "extra == 'lts'" },
     { name = "flask-restful", marker = "extra == 'mlm'" },
     { name = "mamba-ssm", marker = "extra == 'dev'", specifier = "~=2.2" },
+    { name = "mamba-ssm", marker = "extra == 'lts'", specifier = "~=2.2" },
     { name = "megatron-energon", extras = ["av-decode"], marker = "extra == 'dev'", specifier = "~=6.0" },
+    { name = "megatron-energon", extras = ["av-decode"], marker = "extra == 'lts'", specifier = "~=6.0" },
     { name = "multi-storage-client", marker = "extra == 'dev'", specifier = "~=0.27" },
-    { name = "numpy", specifier = "<2.0.0" },
+    { name = "multi-storage-client", marker = "extra == 'lts'", specifier = "~=0.27" },
+    { name = "numpy" },
     { name = "nv-grouped-gemm", marker = "extra == 'dev'", specifier = "~=1.1" },
-    { name = "nvidia-modelopt", extras = ["torch"], marker = "sys_platform != 'darwin' and extra == 'dev'", specifier = ">=0.33.0a0,<0.34.0" },
-    { name = "nvidia-resiliency-ext", marker = "extra == 'dev'", specifier = ">=0.4.0a0,<0.5.0" },
+    { name = "nv-grouped-gemm", marker = "extra == 'lts'", specifier = "~=1.1" },
+    { name = "nvidia-modelopt", extras = ["torch"], marker = "sys_platform != 'darwin' and extra == 'dev'" },
+    { name = "nvidia-resiliency-ext", marker = "extra == 'dev'" },
     { name = "nvtx", marker = "extra == 'dev'", specifier = "~=0.2" },
-    { name = "nvtx", marker = "extra == 'lts'" },
+    { name = "nvtx", marker = "extra == 'lts'", specifier = "~=0.2" },
     { name = "onnxscript", marker = "extra == 'dev'" },
+    { name = "onnxscript", marker = "extra == 'lts'" },
     { name = "opentelemetry-api", marker = "extra == 'dev'", specifier = "~=1.33.1" },
+    { name = "opentelemetry-api", marker = "extra == 'lts'", specifier = "~=1.33.1" },
     { name = "packaging", specifier = ">=24.2" },
     { name = "sentencepiece", marker = "extra == 'mlm'" },
-    { name = "setuptools", marker = "extra == 'dev'", specifier = "<80.0.0" },
-    { name = "setuptools", marker = "extra == 'lts'", specifier = "<80.0.0" },
     { name = "tensorstore", marker = "extra == 'dev'", specifier = "~=0.1,!=0.1.46,!=0.1.72" },
-    { name = "tensorstore", marker = "extra == 'lts'", specifier = "!=0.1.46,!=0.1.72" },
+    { name = "tensorstore", marker = "extra == 'lts'", specifier = "~=0.1,!=0.1.46,!=0.1.72" },
     { name = "tiktoken", marker = "extra == 'mlm'" },
     { name = "torch" },
     { name = "tqdm", marker = "extra == 'dev'" },
     { name = "tqdm", marker = "extra == 'lts'" },
-    { name = "transformer-engine", extras = ["pytorch"], marker = "extra == 'dev'", git = "https://github.com/NVIDIA/TransformerEngine.git?rev=release_v2.9" },
-    { name = "transformers", marker = "extra == 'lts'" },
+    { name = "transformer-engine", extras = ["core-cu13", "pytorch"], marker = "extra == 'dev'", specifier = ">=2.9.0a0,<2.10.0" },
     { name = "transformers", marker = "extra == 'mlm'" },
     { name = "wandb", marker = "extra == 'mlm'" },
     { name = "wget", marker = "extra == 'dev'" },
     { name = "wget", marker = "extra == 'lts'" },
-    { name = "zarr", marker = "extra == 'lts'" },
 ]
 provides-extras = ["mlm", "dev", "lts"]
 
@@ -2580,7 +2404,6 @@ docs = [
     { name = "sphinx-autodoc2" },
     { name = "sphinx-copybutton" },
 ]
-flash-mla = [{ name = "flash-mla", git = "https://github.com/deepseek-ai/FlashMLA?rev=9edee0c022cd0938148a18e334203b0aab43aa19" }]
 linting = [
     { name = "black", specifier = "==24.4.2" },
     { name = "flake8", specifier = "==7.1.0" },
@@ -2588,6 +2411,10 @@ linting = [
     { name = "pylint", specifier = "==3.2.6" },
     { name = "ruff", specifier = "~=0.9.0" },
 ]
+no-pypi-wheels = [
+    { name = "emerging-optimizers", git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git?rev=v0.1.0" },
+    { name = "flash-mla", git = "https://github.com/deepseek-ai/FlashMLA?rev=9edee0c022cd0938148a18e334203b0aab43aa19" },
+]
 test = [
     { name = "coverage" },
     { name = "nemo-run", git = "https://github.com/NVIDIA-NeMo/Run.git?rev=01a9a8ba360f7b2908728ad0516e0ad9d936966d" },
@@ -2612,7 +2439,8 @@ dependencies = [
     { name = "braceexpand" },
     { name = "click" },
     { name = "multi-storage-client" },
-    { name = "numpy" },
+    { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.3.5", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "pillow" },
     { name = "pyyaml" },
     { name = "s3fs" },
@@ -2637,84 +2465,48 @@ av-decode = [
 
 [[package]]
 name = "ml-dtypes"
-version = "0.4.1"
-source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform == 'linux'",
-    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform == 'linux'",
-    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform == 'linux'",
-    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform == 'linux'",
-    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform != 'linux'",
-    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform != 'linux'",
-    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform != 'linux'",
-    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform != 'linux'",
-]
-dependencies = [
-    { name = "numpy", marker = "python_full_version >= '3.13'" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/fd/15/76f86faa0902836cc133939732f7611ace68cf54148487a99c539c272dc8/ml_dtypes-0.4.1.tar.gz", hash = "sha256:fad5f2de464fd09127e49b7fd1252b9006fb43d2edc1ff112d390c324af5ca7a", size = 692594, upload-time = "2024-09-13T19:07:11.624Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/56/9e/76b84f77c7afee3b116dc8407903a2d5004ba3059a8f3dcdcfa6ebf33fff/ml_dtypes-0.4.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:1fe8b5b5e70cd67211db94b05cfd58dace592f24489b038dc6f9fe347d2e07d5", size = 397975, upload-time = "2024-09-13T19:06:44.265Z" },
-    { url = "https://files.pythonhosted.org/packages/03/7b/32650e1b2a2713a5923a0af2a8503d0d4a8fc99d1e1e0a1c40e996634460/ml_dtypes-0.4.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c09a6d11d8475c2a9fd2bc0695628aec105f97cab3b3a3fb7c9660348ff7d24", size = 2182570, upload-time = "2024-09-13T19:06:46.189Z" },
-    { url = "https://files.pythonhosted.org/packages/16/86/a9f7569e7e4f5395f927de38a13b92efa73f809285d04f2923b291783dd2/ml_dtypes-0.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9f5e8f75fa371020dd30f9196e7d73babae2abd51cf59bdd56cb4f8de7e13354", size = 2160365, upload-time = "2024-09-13T19:06:48.198Z" },
-    { url = "https://files.pythonhosted.org/packages/04/1b/9a3afb437702503514f3934ec8d7904270edf013d28074f3e700e5dfbb0f/ml_dtypes-0.4.1-cp310-cp310-win_amd64.whl", hash = "sha256:15fdd922fea57e493844e5abb930b9c0bd0af217d9edd3724479fc3d7ce70e3f", size = 126633, upload-time = "2024-09-13T19:06:50.656Z" },
-    { url = "https://files.pythonhosted.org/packages/d1/76/9835c8609c29f2214359e88f29255fc4aad4ea0f613fb48aa8815ceda1b6/ml_dtypes-0.4.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:2d55b588116a7085d6e074cf0cdb1d6fa3875c059dddc4d2c94a4cc81c23e975", size = 397973, upload-time = "2024-09-13T19:06:51.748Z" },
-    { url = "https://files.pythonhosted.org/packages/7e/99/e68c56fac5de973007a10254b6e17a0362393724f40f66d5e4033f4962c2/ml_dtypes-0.4.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e138a9b7a48079c900ea969341a5754019a1ad17ae27ee330f7ebf43f23877f9", size = 2185134, upload-time = "2024-09-13T19:06:53.197Z" },
-    { url = "https://files.pythonhosted.org/packages/28/bc/6a2344338ea7b61cd7b46fb24ec459360a5a0903b57c55b156c1e46c644a/ml_dtypes-0.4.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:74c6cfb5cf78535b103fde9ea3ded8e9f16f75bc07789054edc7776abfb3d752", size = 2163661, upload-time = "2024-09-13T19:06:54.519Z" },
-    { url = "https://files.pythonhosted.org/packages/e8/d3/ddfd9878b223b3aa9a930c6100a99afca5cfab7ea703662e00323acb7568/ml_dtypes-0.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:274cc7193dd73b35fb26bef6c5d40ae3eb258359ee71cd82f6e96a8c948bdaa6", size = 126727, upload-time = "2024-09-13T19:06:55.897Z" },
-    { url = "https://files.pythonhosted.org/packages/ba/1a/99e924f12e4b62139fbac87419698c65f956d58de0dbfa7c028fa5b096aa/ml_dtypes-0.4.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:827d3ca2097085cf0355f8fdf092b888890bb1b1455f52801a2d7756f056f54b", size = 405077, upload-time = "2024-09-13T19:06:57.538Z" },
-    { url = "https://files.pythonhosted.org/packages/8f/8c/7b610bd500617854c8cc6ed7c8cfb9d48d6a5c21a1437a36a4b9bc8a3598/ml_dtypes-0.4.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:772426b08a6172a891274d581ce58ea2789cc8abc1c002a27223f314aaf894e7", size = 2181554, upload-time = "2024-09-13T19:06:59.196Z" },
-    { url = "https://files.pythonhosted.org/packages/c7/c6/f89620cecc0581dc1839e218c4315171312e46c62a62da6ace204bda91c0/ml_dtypes-0.4.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:126e7d679b8676d1a958f2651949fbfa182832c3cd08020d8facd94e4114f3e9", size = 2160488, upload-time = "2024-09-13T19:07:03.131Z" },
-    { url = "https://files.pythonhosted.org/packages/ae/11/a742d3c31b2cc8557a48efdde53427fd5f9caa2fa3c9c27d826e78a66f51/ml_dtypes-0.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:df0fb650d5c582a9e72bb5bd96cfebb2cdb889d89daff621c8fbc60295eba66c", size = 127462, upload-time = "2024-09-13T19:07:04.916Z" },
-]
-
-[[package]]
-name = "ml-dtypes"
-version = "0.5.3"
-source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version == '3.12.*' and sys_platform == 'linux'",
-    "python_full_version == '3.12.*' and sys_platform != 'linux'",
-    "python_full_version == '3.11.*' and sys_platform == 'linux'",
-    "python_full_version == '3.11.*' and sys_platform != 'linux'",
-    "python_full_version < '3.11' and sys_platform == 'linux'",
-    "python_full_version < '3.11' and sys_platform != 'linux'",
-]
-dependencies = [
-    { name = "numpy", marker = "python_full_version < '3.13'" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/78/a7/aad060393123cfb383956dca68402aff3db1e1caffd5764887ed5153f41b/ml_dtypes-0.5.3.tar.gz", hash = "sha256:95ce33057ba4d05df50b1f3cfefab22e351868a843b3b15a46c65836283670c9", size = 692316, upload-time = "2025-07-29T18:39:19.454Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/ac/bb/1f32124ab6d3a279ea39202fe098aea95b2d81ef0ce1d48612b6bf715e82/ml_dtypes-0.5.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:0a1d68a7cb53e3f640b2b6a34d12c0542da3dd935e560fdf463c0c77f339fc20", size = 667409, upload-time = "2025-07-29T18:38:17.321Z" },
-    { url = "https://files.pythonhosted.org/packages/1d/ac/e002d12ae19136e25bb41c7d14d7e1a1b08f3c0e99a44455ff6339796507/ml_dtypes-0.5.3-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0cd5a6c711b5350f3cbc2ac28def81cd1c580075ccb7955e61e9d8f4bfd40d24", size = 4960702, upload-time = "2025-07-29T18:38:19.616Z" },
-    { url = "https://files.pythonhosted.org/packages/dd/12/79e9954e6b3255a4b1becb191a922d6e2e94d03d16a06341ae9261963ae8/ml_dtypes-0.5.3-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bdcf26c2dbc926b8a35ec8cbfad7eff1a8bd8239e12478caca83a1fc2c400dc2", size = 4933471, upload-time = "2025-07-29T18:38:21.809Z" },
-    { url = "https://files.pythonhosted.org/packages/d5/aa/d1eff619e83cd1ddf6b561d8240063d978e5d887d1861ba09ef01778ec3a/ml_dtypes-0.5.3-cp310-cp310-win_amd64.whl", hash = "sha256:aecbd7c5272c82e54d5b99d8435fd10915d1bc704b7df15e4d9ca8dc3902be61", size = 206330, upload-time = "2025-07-29T18:38:23.663Z" },
-    { url = "https://files.pythonhosted.org/packages/af/f1/720cb1409b5d0c05cff9040c0e9fba73fa4c67897d33babf905d5d46a070/ml_dtypes-0.5.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:4a177b882667c69422402df6ed5c3428ce07ac2c1f844d8a1314944651439458", size = 667412, upload-time = "2025-07-29T18:38:25.275Z" },
-    { url = "https://files.pythonhosted.org/packages/6a/d5/05861ede5d299f6599f86e6bc1291714e2116d96df003cfe23cc54bcc568/ml_dtypes-0.5.3-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9849ce7267444c0a717c80c6900997de4f36e2815ce34ac560a3edb2d9a64cd2", size = 4964606, upload-time = "2025-07-29T18:38:27.045Z" },
-    { url = "https://files.pythonhosted.org/packages/db/dc/72992b68de367741bfab8df3b3fe7c29f982b7279d341aa5bf3e7ef737ea/ml_dtypes-0.5.3-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c3f5ae0309d9f888fd825c2e9d0241102fadaca81d888f26f845bc8c13c1e4ee", size = 4938435, upload-time = "2025-07-29T18:38:29.193Z" },
-    { url = "https://files.pythonhosted.org/packages/81/1c/d27a930bca31fb07d975a2d7eaf3404f9388114463b9f15032813c98f893/ml_dtypes-0.5.3-cp311-cp311-win_amd64.whl", hash = "sha256:58e39349d820b5702bb6f94ea0cb2dc8ec62ee81c0267d9622067d8333596a46", size = 206334, upload-time = "2025-07-29T18:38:30.687Z" },
-    { url = "https://files.pythonhosted.org/packages/1a/d8/6922499effa616012cb8dc445280f66d100a7ff39b35c864cfca019b3f89/ml_dtypes-0.5.3-cp311-cp311-win_arm64.whl", hash = "sha256:66c2756ae6cfd7f5224e355c893cfd617fa2f747b8bbd8996152cbdebad9a184", size = 157584, upload-time = "2025-07-29T18:38:32.187Z" },
-    { url = "https://files.pythonhosted.org/packages/0d/eb/bc07c88a6ab002b4635e44585d80fa0b350603f11a2097c9d1bfacc03357/ml_dtypes-0.5.3-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:156418abeeda48ea4797db6776db3c5bdab9ac7be197c1233771e0880c304057", size = 663864, upload-time = "2025-07-29T18:38:33.777Z" },
-    { url = "https://files.pythonhosted.org/packages/cf/89/11af9b0f21b99e6386b6581ab40fb38d03225f9de5f55cf52097047e2826/ml_dtypes-0.5.3-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1db60c154989af253f6c4a34e8a540c2c9dce4d770784d426945e09908fbb177", size = 4951313, upload-time = "2025-07-29T18:38:36.45Z" },
-    { url = "https://files.pythonhosted.org/packages/d8/a9/b98b86426c24900b0c754aad006dce2863df7ce0bb2bcc2c02f9cc7e8489/ml_dtypes-0.5.3-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1b255acada256d1fa8c35ed07b5f6d18bc21d1556f842fbc2d5718aea2cd9e55", size = 4928805, upload-time = "2025-07-29T18:38:38.29Z" },
-    { url = "https://files.pythonhosted.org/packages/50/c1/85e6be4fc09c6175f36fb05a45917837f30af9a5146a5151cb3a3f0f9e09/ml_dtypes-0.5.3-cp312-cp312-win_amd64.whl", hash = "sha256:da65e5fd3eea434ccb8984c3624bc234ddcc0d9f4c81864af611aaebcc08a50e", size = 208182, upload-time = "2025-07-29T18:38:39.72Z" },
-    { url = "https://files.pythonhosted.org/packages/9e/17/cf5326d6867be057f232d0610de1458f70a8ce7b6290e4b4a277ea62b4cd/ml_dtypes-0.5.3-cp312-cp312-win_arm64.whl", hash = "sha256:8bb9cd1ce63096567f5f42851f5843b5a0ea11511e50039a7649619abfb4ba6d", size = 161560, upload-time = "2025-07-29T18:38:41.072Z" },
-    { url = "https://files.pythonhosted.org/packages/2d/87/1bcc98a66de7b2455dfb292f271452cac9edc4e870796e0d87033524d790/ml_dtypes-0.5.3-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:5103856a225465371fe119f2fef737402b705b810bd95ad5f348e6e1a6ae21af", size = 663781, upload-time = "2025-07-29T18:38:42.984Z" },
-    { url = "https://files.pythonhosted.org/packages/fd/2c/bd2a79ba7c759ee192b5601b675b180a3fd6ccf48ffa27fe1782d280f1a7/ml_dtypes-0.5.3-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4cae435a68861660af81fa3c5af16b70ca11a17275c5b662d9c6f58294e0f113", size = 4956217, upload-time = "2025-07-29T18:38:44.65Z" },
-    { url = "https://files.pythonhosted.org/packages/14/f3/091ba84e5395d7fe5b30c081a44dec881cd84b408db1763ee50768b2ab63/ml_dtypes-0.5.3-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6936283b56d74fbec431ca57ce58a90a908fdbd14d4e2d22eea6d72bb208a7b7", size = 4933109, upload-time = "2025-07-29T18:38:46.405Z" },
-    { url = "https://files.pythonhosted.org/packages/bc/24/054036dbe32c43295382c90a1363241684c4d6aaa1ecc3df26bd0c8d5053/ml_dtypes-0.5.3-cp313-cp313-win_amd64.whl", hash = "sha256:d0f730a17cf4f343b2c7ad50cee3bd19e969e793d2be6ed911f43086460096e4", size = 208187, upload-time = "2025-07-29T18:38:48.24Z" },
-    { url = "https://files.pythonhosted.org/packages/a6/3d/7dc3ec6794a4a9004c765e0c341e32355840b698f73fd2daff46f128afc1/ml_dtypes-0.5.3-cp313-cp313-win_arm64.whl", hash = "sha256:2db74788fc01914a3c7f7da0763427280adfc9cd377e9604b6b64eb8097284bd", size = 161559, upload-time = "2025-07-29T18:38:50.493Z" },
-    { url = "https://files.pythonhosted.org/packages/12/91/e6c7a0d67a152b9330445f9f0cf8ae6eee9b83f990b8c57fe74631e42a90/ml_dtypes-0.5.3-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:93c36a08a6d158db44f2eb9ce3258e53f24a9a4a695325a689494f0fdbc71770", size = 689321, upload-time = "2025-07-29T18:38:52.03Z" },
-    { url = "https://files.pythonhosted.org/packages/9e/6c/b7b94b84a104a5be1883305b87d4c6bd6ae781504474b4cca067cb2340ec/ml_dtypes-0.5.3-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0e44a3761f64bc009d71ddb6d6c71008ba21b53ab6ee588dadab65e2fa79eafc", size = 5274495, upload-time = "2025-07-29T18:38:53.797Z" },
-    { url = "https://files.pythonhosted.org/packages/5b/38/6266604dffb43378055394ea110570cf261a49876fc48f548dfe876f34cc/ml_dtypes-0.5.3-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bdf40d2aaabd3913dec11840f0d0ebb1b93134f99af6a0a4fd88ffe924928ab4", size = 5285422, upload-time = "2025-07-29T18:38:56.603Z" },
-    { url = "https://files.pythonhosted.org/packages/7c/88/8612ff177d043a474b9408f0382605d881eeb4125ba89d4d4b3286573a83/ml_dtypes-0.5.3-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:aec640bd94c4c85c0d11e2733bd13cbb10438fb004852996ec0efbc6cacdaf70", size = 661182, upload-time = "2025-07-29T18:38:58.414Z" },
-    { url = "https://files.pythonhosted.org/packages/6f/2b/0569a5e88b29240d373e835107c94ae9256fb2191d3156b43b2601859eff/ml_dtypes-0.5.3-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bda32ce212baa724e03c68771e5c69f39e584ea426bfe1a701cb01508ffc7035", size = 4956187, upload-time = "2025-07-29T18:39:00.611Z" },
-    { url = "https://files.pythonhosted.org/packages/51/66/273c2a06ae44562b104b61e6b14444da00061fd87652506579d7eb2c40b1/ml_dtypes-0.5.3-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c205cac07d24a29840c163d6469f61069ce4b065518519216297fc2f261f8db9", size = 4930911, upload-time = "2025-07-29T18:39:02.405Z" },
-    { url = "https://files.pythonhosted.org/packages/93/ab/606be3e87dc0821bd360c8c1ee46108025c31a4f96942b63907bb441b87d/ml_dtypes-0.5.3-cp314-cp314-win_amd64.whl", hash = "sha256:cd7c0bb22d4ff86d65ad61b5dd246812e8993fbc95b558553624c33e8b6903ea", size = 216664, upload-time = "2025-07-29T18:39:03.927Z" },
-    { url = "https://files.pythonhosted.org/packages/30/a2/e900690ca47d01dffffd66375c5de8c4f8ced0f1ef809ccd3b25b3e6b8fa/ml_dtypes-0.5.3-cp314-cp314-win_arm64.whl", hash = "sha256:9d55ea7f7baf2aed61bf1872116cefc9d0c3693b45cae3916897ee27ef4b835e", size = 160203, upload-time = "2025-07-29T18:39:05.671Z" },
-    { url = "https://files.pythonhosted.org/packages/53/21/783dfb51f40d2660afeb9bccf3612b99f6a803d980d2a09132b0f9d216ab/ml_dtypes-0.5.3-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:e12e29764a0e66a7a31e9b8bf1de5cc0423ea72979f45909acd4292de834ccd3", size = 689324, upload-time = "2025-07-29T18:39:07.567Z" },
-    { url = "https://files.pythonhosted.org/packages/09/f7/a82d249c711abf411ac027b7163f285487f5e615c3e0716c61033ce996ab/ml_dtypes-0.5.3-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:19f6c3a4f635c2fc9e2aa7d91416bd7a3d649b48350c51f7f715a09370a90d93", size = 5275917, upload-time = "2025-07-29T18:39:09.339Z" },
-    { url = "https://files.pythonhosted.org/packages/7f/3c/541c4b30815ab90ebfbb51df15d0b4254f2f9f1e2b4907ab229300d5e6f2/ml_dtypes-0.5.3-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5ab039ffb40f3dc0aeeeba84fd6c3452781b5e15bef72e2d10bcb33e4bbffc39", size = 5285284, upload-time = "2025-07-29T18:39:11.532Z" },
+version = "0.5.4"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.3.5", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/0e/4a/c27b42ed9b1c7d13d9ba8b6905dece787d6259152f2309338aed29b2447b/ml_dtypes-0.5.4.tar.gz", hash = "sha256:8ab06a50fb9bf9666dd0fe5dfb4676fa2b0ac0f31ecff72a6c3af8e22c063453", size = 692314, upload-time = "2025-11-17T22:32:31.031Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/fe/3a/c5b855752a70267ff729c349e650263adb3c206c29d28cc8ea7ace30a1d5/ml_dtypes-0.5.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:b95e97e470fe60ed493fd9ae3911d8da4ebac16bd21f87ffa2b7c588bf22ea2c", size = 679735, upload-time = "2025-11-17T22:31:31.367Z" },
+    { url = "https://files.pythonhosted.org/packages/41/79/7433f30ee04bd4faa303844048f55e1eb939131c8e5195a00a96a0939b64/ml_dtypes-0.5.4-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b4b801ebe0b477be666696bda493a9be8356f1f0057a57f1e35cd26928823e5a", size = 5051883, upload-time = "2025-11-17T22:31:33.658Z" },
+    { url = "https://files.pythonhosted.org/packages/10/b1/8938e8830b0ee2e167fc75a094dea766a1152bde46752cd9bfc57ee78a82/ml_dtypes-0.5.4-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:388d399a2152dd79a3f0456a952284a99ee5c93d3e2f8dfe25977511e0515270", size = 5030369, upload-time = "2025-11-17T22:31:35.595Z" },
+    { url = "https://files.pythonhosted.org/packages/c7/a3/51886727bd16e2f47587997b802dd56398692ce8c6c03c2e5bb32ecafe26/ml_dtypes-0.5.4-cp310-cp310-win_amd64.whl", hash = "sha256:4ff7f3e7ca2972e7de850e7b8fcbb355304271e2933dd90814c1cb847414d6e2", size = 210738, upload-time = "2025-11-17T22:31:37.43Z" },
+    { url = "https://files.pythonhosted.org/packages/c6/5e/712092cfe7e5eb667b8ad9ca7c54442f21ed7ca8979745f1000e24cf8737/ml_dtypes-0.5.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:6c7ecb74c4bd71db68a6bea1edf8da8c34f3d9fe218f038814fd1d310ac76c90", size = 679734, upload-time = "2025-11-17T22:31:39.223Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/cf/912146dfd4b5c0eea956836c01dcd2fce6c9c844b2691f5152aca196ce4f/ml_dtypes-0.5.4-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bc11d7e8c44a65115d05e2ab9989d1e045125d7be8e05a071a48bc76eb6d6040", size = 5056165, upload-time = "2025-11-17T22:31:41.071Z" },
+    { url = "https://files.pythonhosted.org/packages/a9/80/19189ea605017473660e43762dc853d2797984b3c7bf30ce656099add30c/ml_dtypes-0.5.4-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:19b9a53598f21e453ea2fbda8aa783c20faff8e1eeb0d7ab899309a0053f1483", size = 5034975, upload-time = "2025-11-17T22:31:42.758Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/24/70bd59276883fdd91600ca20040b41efd4902a923283c4d6edcb1de128d2/ml_dtypes-0.5.4-cp311-cp311-win_amd64.whl", hash = "sha256:7c23c54a00ae43edf48d44066a7ec31e05fdc2eee0be2b8b50dd1903a1db94bb", size = 210742, upload-time = "2025-11-17T22:31:44.068Z" },
+    { url = "https://files.pythonhosted.org/packages/a0/c9/64230ef14e40aa3f1cb254ef623bf812735e6bec7772848d19131111ac0d/ml_dtypes-0.5.4-cp311-cp311-win_arm64.whl", hash = "sha256:557a31a390b7e9439056644cb80ed0735a6e3e3bb09d67fd5687e4b04238d1de", size = 160709, upload-time = "2025-11-17T22:31:46.557Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/b8/3c70881695e056f8a32f8b941126cf78775d9a4d7feba8abcb52cb7b04f2/ml_dtypes-0.5.4-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:a174837a64f5b16cab6f368171a1a03a27936b31699d167684073ff1c4237dac", size = 676927, upload-time = "2025-11-17T22:31:48.182Z" },
+    { url = "https://files.pythonhosted.org/packages/54/0f/428ef6881782e5ebb7eca459689448c0394fa0a80bea3aa9262cba5445ea/ml_dtypes-0.5.4-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a7f7c643e8b1320fd958bf098aa7ecf70623a42ec5154e3be3be673f4c34d900", size = 5028464, upload-time = "2025-11-17T22:31:50.135Z" },
+    { url = "https://files.pythonhosted.org/packages/3a/cb/28ce52eb94390dda42599c98ea0204d74799e4d8047a0eb559b6fd648056/ml_dtypes-0.5.4-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9ad459e99793fa6e13bd5b7e6792c8f9190b4e5a1b45c63aba14a4d0a7f1d5ff", size = 5009002, upload-time = "2025-11-17T22:31:52.001Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/f0/0cfadd537c5470378b1b32bd859cf2824972174b51b873c9d95cfd7475a5/ml_dtypes-0.5.4-cp312-cp312-win_amd64.whl", hash = "sha256:c1a953995cccb9e25a4ae19e34316671e4e2edaebe4cf538229b1fc7109087b7", size = 212222, upload-time = "2025-11-17T22:31:53.742Z" },
+    { url = "https://files.pythonhosted.org/packages/16/2e/9acc86985bfad8f2c2d30291b27cd2bb4c74cea08695bd540906ed744249/ml_dtypes-0.5.4-cp312-cp312-win_arm64.whl", hash = "sha256:9bad06436568442575beb2d03389aa7456c690a5b05892c471215bfd8cf39460", size = 160793, upload-time = "2025-11-17T22:31:55.358Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/a1/4008f14bbc616cfb1ac5b39ea485f9c63031c4634ab3f4cf72e7541f816a/ml_dtypes-0.5.4-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:8c760d85a2f82e2bed75867079188c9d18dae2ee77c25a54d60e9cc79be1bc48", size = 676888, upload-time = "2025-11-17T22:31:56.907Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/b7/dff378afc2b0d5a7d6cd9d3209b60474d9819d1189d347521e1688a60a53/ml_dtypes-0.5.4-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ce756d3a10d0c4067172804c9cc276ba9cc0ff47af9078ad439b075d1abdc29b", size = 5036993, upload-time = "2025-11-17T22:31:58.497Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/33/40cd74219417e78b97c47802037cf2d87b91973e18bb968a7da48a96ea44/ml_dtypes-0.5.4-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:533ce891ba774eabf607172254f2e7260ba5f57bdd64030c9a4fcfbd99815d0d", size = 5010956, upload-time = "2025-11-17T22:31:59.931Z" },
+    { url = "https://files.pythonhosted.org/packages/e1/8b/200088c6859d8221454825959df35b5244fa9bdf263fd0249ac5fb75e281/ml_dtypes-0.5.4-cp313-cp313-win_amd64.whl", hash = "sha256:f21c9219ef48ca5ee78402d5cc831bd58ea27ce89beda894428bc67a52da5328", size = 212224, upload-time = "2025-11-17T22:32:01.349Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/75/dfc3775cb36367816e678f69a7843f6f03bd4e2bcd79941e01ea960a068e/ml_dtypes-0.5.4-cp313-cp313-win_arm64.whl", hash = "sha256:35f29491a3e478407f7047b8a4834e4640a77d2737e0b294d049746507af5175", size = 160798, upload-time = "2025-11-17T22:32:02.864Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/74/e9ddb35fd1dd43b1106c20ced3f53c2e8e7fc7598c15638e9f80677f81d4/ml_dtypes-0.5.4-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:304ad47faa395415b9ccbcc06a0350800bc50eda70f0e45326796e27c62f18b6", size = 702083, upload-time = "2025-11-17T22:32:04.08Z" },
+    { url = "https://files.pythonhosted.org/packages/74/f5/667060b0aed1aa63166b22897fdf16dca9eb704e6b4bbf86848d5a181aa7/ml_dtypes-0.5.4-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6a0df4223b514d799b8a1629c65ddc351b3efa833ccf7f8ea0cf654a61d1e35d", size = 5354111, upload-time = "2025-11-17T22:32:05.546Z" },
+    { url = "https://files.pythonhosted.org/packages/40/49/0f8c498a28c0efa5f5c95a9e374c83ec1385ca41d0e85e7cf40e5d519a21/ml_dtypes-0.5.4-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:531eff30e4d368cb6255bc2328d070e35836aa4f282a0fb5f3a0cd7260257298", size = 5366453, upload-time = "2025-11-17T22:32:07.115Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/27/12607423d0a9c6bbbcc780ad19f1f6baa2b68b18ce4bddcdc122c4c68dc9/ml_dtypes-0.5.4-cp313-cp313t-win_amd64.whl", hash = "sha256:cb73dccfc991691c444acc8c0012bee8f2470da826a92e3a20bb333b1a7894e6", size = 225612, upload-time = "2025-11-17T22:32:08.615Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/80/5a5929e92c72936d5b19872c5fb8fc09327c1da67b3b68c6a13139e77e20/ml_dtypes-0.5.4-cp313-cp313t-win_arm64.whl", hash = "sha256:3bbbe120b915090d9dd1375e4684dd17a20a2491ef25d640a908281da85e73f1", size = 164145, upload-time = "2025-11-17T22:32:09.782Z" },
+    { url = "https://files.pythonhosted.org/packages/72/4e/1339dc6e2557a344f5ba5590872e80346f76f6cb2ac3dd16e4666e88818c/ml_dtypes-0.5.4-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:2b857d3af6ac0d39db1de7c706e69c7f9791627209c3d6dedbfca8c7e5faec22", size = 673781, upload-time = "2025-11-17T22:32:11.364Z" },
+    { url = "https://files.pythonhosted.org/packages/04/f9/067b84365c7e83bda15bba2b06c6ca250ce27b20630b1128c435fb7a09aa/ml_dtypes-0.5.4-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:805cef3a38f4eafae3a5bf9ebdcdb741d0bcfd9e1bd90eb54abd24f928cd2465", size = 5036145, upload-time = "2025-11-17T22:32:12.783Z" },
+    { url = "https://files.pythonhosted.org/packages/c6/bb/82c7dcf38070b46172a517e2334e665c5bf374a262f99a283ea454bece7c/ml_dtypes-0.5.4-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:14a4fd3228af936461db66faccef6e4f41c1d82fcc30e9f8d58a08916b1d811f", size = 5010230, upload-time = "2025-11-17T22:32:14.38Z" },
+    { url = "https://files.pythonhosted.org/packages/e9/93/2bfed22d2498c468f6bcd0d9f56b033eaa19f33320389314c19ef6766413/ml_dtypes-0.5.4-cp314-cp314-win_amd64.whl", hash = "sha256:8c6a2dcebd6f3903e05d51960a8058d6e131fe69f952a5397e5dbabc841b6d56", size = 221032, upload-time = "2025-11-17T22:32:15.763Z" },
+    { url = "https://files.pythonhosted.org/packages/76/a3/9c912fe6ea747bb10fe2f8f54d027eb265db05dfb0c6335e3e063e74e6e8/ml_dtypes-0.5.4-cp314-cp314-win_arm64.whl", hash = "sha256:5a0f68ca8fd8d16583dfa7793973feb86f2fbb56ce3966daf9c9f748f52a2049", size = 163353, upload-time = "2025-11-17T22:32:16.932Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/02/48aa7d84cc30ab4ee37624a2fd98c56c02326785750cd212bc0826c2f15b/ml_dtypes-0.5.4-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:bfc534409c5d4b0bf945af29e5d0ab075eae9eecbb549ff8a29280db822f34f9", size = 702085, upload-time = "2025-11-17T22:32:18.175Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/e7/85cb99fe80a7a5513253ec7faa88a65306be071163485e9a626fce1b6e84/ml_dtypes-0.5.4-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2314892cdc3fcf05e373d76d72aaa15fda9fb98625effa73c1d646f331fcecb7", size = 5355358, upload-time = "2025-11-17T22:32:19.7Z" },
+    { url = "https://files.pythonhosted.org/packages/79/2b/a826ba18d2179a56e144aef69e57fb2ab7c464ef0b2111940ee8a3a223a2/ml_dtypes-0.5.4-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0d2ffd05a2575b1519dc928c0b93c06339eb67173ff53acb00724502cda231cf", size = 5366332, upload-time = "2025-11-17T22:32:21.193Z" },
+    { url = "https://files.pythonhosted.org/packages/84/44/f4d18446eacb20ea11e82f133ea8f86e2bf2891785b67d9da8d0ab0ef525/ml_dtypes-0.5.4-cp314-cp314t-win_amd64.whl", hash = "sha256:4381fe2f2452a2d7589689693d3162e876b3ddb0a832cde7a414f8e1adf7eab1", size = 236612, upload-time = "2025-11-17T22:32:22.579Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/3f/3d42e9a78fe5edf792a83c074b13b9b770092a4fbf3462872f4303135f09/ml_dtypes-0.5.4-cp314-cp314t-win_arm64.whl", hash = "sha256:11942cbf2cf92157db91e5022633c0d9474d4dfd813a909383bd23ce828a4b7d", size = 168825, upload-time = "2025-11-17T22:32:23.766Z" },
 ]
 
 [[package]]
@@ -2789,7 +2581,7 @@ wheels = [
 
 [[package]]
 name = "multi-storage-client"
-version = "0.33.0"
+version = "0.36.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "filelock" },
@@ -2802,26 +2594,27 @@ dependencies = [
     { name = "python-dateutil" },
     { name = "pyyaml" },
     { name = "tqdm" },
+    { name = "tzdata" },
     { name = "wcmatch" },
     { name = "xattr" },
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/5c/c4/6279fb7d4b8b0a7af060047d592f00f8d49c547adfebe50bcd8d0d2dc8a5/multi_storage_client-0.33.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:df52b3040ef5698c6388fa589bd63812ae0d2f967d358a792abcad5638686590", size = 5282006, upload-time = "2025-10-23T03:45:37.761Z" },
-    { url = "https://files.pythonhosted.org/packages/22/3b/23d8beccd73b887c4552bf884275611255b5028388fa3317365cd56c2a93/multi_storage_client-0.33.0-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:370da04b1e56a601ba505a29d42fcabc19b583e10d725a37bc0c11ba3573d211", size = 5403083, upload-time = "2025-10-23T03:53:11.998Z" },
-    { url = "https://files.pythonhosted.org/packages/b0/ad/dc355d05fd369da0d800e5f7de24da0393f542c5a6f775f6bcee7edcacb1/multi_storage_client-0.33.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c57749a28ec5d49440f465fd73e4e2feaab18ece9b6e57c73395308b41950f66", size = 3178432, upload-time = "2025-10-23T04:07:00.543Z" },
-    { url = "https://files.pythonhosted.org/packages/e0/ad/97b54419d8a58f696b85504568391a627641152f80650d7d2697fc2702ed/multi_storage_client-0.33.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c7d95f5fe094aab00a240bf6aa11dfe85bec293b76b3688ec3a9c33d86c751d2", size = 3351102, upload-time = "2025-10-23T03:47:47.622Z" },
-    { url = "https://files.pythonhosted.org/packages/52/28/1038a68b9df1b179a61967ce9f7d2e80b9954cdb289801afecde5f7660db/multi_storage_client-0.33.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4b5a0f5a0b7684835be20ae6782070884982a86665e9bab317375a56a20294d1", size = 5281523, upload-time = "2025-10-23T04:06:36.671Z" },
-    { url = "https://files.pythonhosted.org/packages/6c/c5/e18de5e2a2671efdc0a12383b8d63f523044ca453525725b3450d0179c0e/multi_storage_client-0.33.0-cp311-cp311-macosx_11_0_x86_64.whl", hash = "sha256:0db694311f90f44ee8f6f7734a14a0857738a467f2ae201649218a3ecf1f6ab2", size = 5403353, upload-time = "2025-10-23T04:07:25.941Z" },
-    { url = "https://files.pythonhosted.org/packages/7e/c9/d9f65eb2370151dbbb06925f4216ee017e6cdbf7657263fd98e60944e52b/multi_storage_client-0.33.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2cbe3a0b856f0b968f9fc693670a521b5a995b625351241ca008f866fdfff62a", size = 3180052, upload-time = "2025-10-23T03:57:32.797Z" },
-    { url = "https://files.pythonhosted.org/packages/e7/38/08b9d84c93b19ae87caf542ae77f17dfa44a85281ba09de660ffcf3a7718/multi_storage_client-0.33.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:018e7e82255feeff973ff02563f11a30f5e507e4cbc87a2167a9568740144ef2", size = 3351389, upload-time = "2025-10-23T04:02:07.348Z" },
-    { url = "https://files.pythonhosted.org/packages/6a/31/c95634a27723b5ba9d2d74158444cc5e40b151b51ae59ca196fc9993f039/multi_storage_client-0.33.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:030b3a592c6352605e9ebdb8d9303dd42daf5d171ffa684f3283d4a5c6e2edfe", size = 5273976, upload-time = "2025-10-23T04:04:35.99Z" },
-    { url = "https://files.pythonhosted.org/packages/8c/cf/82d1778d73c3baaec331da4ae8d01fa7934bcd73336aa88a08d86d080347/multi_storage_client-0.33.0-cp312-cp312-macosx_11_0_x86_64.whl", hash = "sha256:14dc0ace16d3830917427d6376d14ef62bd053fb2509f893998555ca1e9c4dcb", size = 5400735, upload-time = "2025-10-23T03:58:37.149Z" },
-    { url = "https://files.pythonhosted.org/packages/fc/34/a6194ec725ef80c02de58b5ed3520bb1711807df75a27f7214effd22df34/multi_storage_client-0.33.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a2821765d5c6de365b5b1dcdc7cf2ebba719ff4061fd02975639629f8aa319f6", size = 3182623, upload-time = "2025-10-23T04:03:29.551Z" },
-    { url = "https://files.pythonhosted.org/packages/8f/36/7ec85178fd1dd69c278407a82acaccfb806449deda13f3dbd41f653d73bd/multi_storage_client-0.33.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f92f89480c58067fa53c178785b86e7650e16f277a61a732a8a7019173b16129", size = 3352104, upload-time = "2025-10-23T04:08:51.005Z" },
-    { url = "https://files.pythonhosted.org/packages/88/ef/f2eb2efefb0e0588b29ed573b8354ecd72c38e6143da7ed5ecf53e859bf8/multi_storage_client-0.33.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:ed9af7e77e3cbac1f614816062b36975dcbc610bd3f8c86741d48aa18c718781", size = 5272154, upload-time = "2025-10-23T04:07:49.572Z" },
-    { url = "https://files.pythonhosted.org/packages/1e/49/050aa4fccb2579d2ef5bd0d27169ec98fe85c92bba7a2c31154c491a4f75/multi_storage_client-0.33.0-cp313-cp313-macosx_11_0_x86_64.whl", hash = "sha256:c9d75e95a266ee858cf20c88ed255021552de67a40af9c8884d2fc22037dcd2b", size = 5399474, upload-time = "2025-10-23T04:09:14.545Z" },
-    { url = "https://files.pythonhosted.org/packages/f6/4b/70c2df3b60c28360f185188d351e9c3958b702614963a09ffb1dc251c1ca/multi_storage_client-0.33.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:48195a2ab9e6e9a2763bde17184cad2bdef82684353e210d0d325f20cea18869", size = 3181788, upload-time = "2025-10-23T04:03:10.404Z" },
-    { url = "https://files.pythonhosted.org/packages/9b/96/5008852677fdad10eb9d8dd08a6ea58c6f7e820199a3b2c56607186ac6d5/multi_storage_client-0.33.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bd64403efdcee2a6efcf7bfdb01422dd174c146014563b09f44590346fd835e6", size = 3351269, upload-time = "2025-10-23T04:00:34.714Z" },
+    { url = "https://files.pythonhosted.org/packages/be/5f/8011fd041f695670b339c25f059b68207c315250ccc25a08f190bff78318/multi_storage_client-0.36.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:763cdb5e24b78adf33882b1d1c0d15021cc2c0088ffc6e7b0269259f0cd45fd2", size = 5299321, upload-time = "2025-11-26T20:03:58.147Z" },
+    { url = "https://files.pythonhosted.org/packages/51/06/cfd17d307fe29fbbce9f196ec1d8dda3f93fd44711c0adb282d9c393a2b2/multi_storage_client-0.36.0-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:eb84ea0bdffcfddf9beb7239c6d0b1950a67a0afe36ef970da70ba4ab373c0c9", size = 5420867, upload-time = "2025-11-26T20:05:32.445Z" },
+    { url = "https://files.pythonhosted.org/packages/7c/7f/bf22f9c67c70d5ec2f6a7a4798cb106f3023bf25ba6c21b0ade1a53fa5b3/multi_storage_client-0.36.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ff03a0213ce1377abee61e8deb87607f0ccd35c245fbaab2fee51d2e591e833e", size = 3188237, upload-time = "2025-11-26T20:01:51.354Z" },
+    { url = "https://files.pythonhosted.org/packages/fb/20/c0c019b3dc7719f79c1826364fc9c3e1bbe9b00246b1d7414ce2b4defd0b/multi_storage_client-0.36.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f16e577ef4ee6f8ac481b3f2290e7b0525676efd82c71fb694ba4e6c65a8facd", size = 3363259, upload-time = "2025-11-26T20:00:10.679Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/f8/eea6be7f4258c811373dc989e8eaa23a404499c2574059f6fd876d6904e4/multi_storage_client-0.36.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:6c913b132573fbd7a5ada63086d3ce2669b913b79206f86867cc674d57b9164d", size = 5299844, upload-time = "2025-11-26T20:00:32.46Z" },
+    { url = "https://files.pythonhosted.org/packages/df/aa/b73441dc17097ee92e7efac5080e2cfb8fe4515dd4dc91ca351829e6b7a9/multi_storage_client-0.36.0-cp311-cp311-macosx_11_0_x86_64.whl", hash = "sha256:4dd2ccf67deae403098a5e867ce33d35ce348d2acd1a743c9ef485b3b1eea65c", size = 5424007, upload-time = "2025-11-26T19:55:30.305Z" },
+    { url = "https://files.pythonhosted.org/packages/54/d6/850550de6b0dc740ced2f8fbf83f13f757860b5fdaa652e477c567c01f34/multi_storage_client-0.36.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:04b31b6a5d6a3c90a592b23a4b90368fa1dcca8cb03f76a862d307f8b072c1d3", size = 3188451, upload-time = "2025-11-26T19:56:32.191Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/c5/93e038c0cce46cb9b1b8e19f7215ce3e7fa1af5e0a9662f36dfe47062f7e/multi_storage_client-0.36.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:252f84116f674962eabd066e16040f0304f6191c06ab09ef2ec02dbfd2c4d2ea", size = 3366554, upload-time = "2025-11-26T19:58:37.742Z" },
+    { url = "https://files.pythonhosted.org/packages/28/a2/46320db394150a2f0547930b902e8ad045a084fb519f408e2c9b4ca673a0/multi_storage_client-0.36.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2058e8e8f8fd9eef033171b0bf1966596e9862c7f20c2886101ad979996c453b", size = 5293778, upload-time = "2025-11-26T20:07:11.731Z" },
+    { url = "https://files.pythonhosted.org/packages/00/2d/658af3b4104c4f2aa2621469482dca8270490601e98d8f7997361499adaa/multi_storage_client-0.36.0-cp312-cp312-macosx_11_0_x86_64.whl", hash = "sha256:22b69c7f3c9ffa166f38bafa7e08f6b664a5dbee8c88d5d740bed719e6f410a1", size = 5418642, upload-time = "2025-11-26T19:58:15.717Z" },
+    { url = "https://files.pythonhosted.org/packages/09/2f/6441794bf8dc195d614d63ad2b7068ad7703972fd6f960d43202d29748b1/multi_storage_client-0.36.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b384fb326637e79706ff706e60f384b24fdbcc824420bb66ef615a9ef5ffb4ec", size = 3194133, upload-time = "2025-11-26T20:05:54.618Z" },
+    { url = "https://files.pythonhosted.org/packages/0e/ba/b07361ff84e5bd263e299b03776382f59bd92862573c915dd705a09f3c1d/multi_storage_client-0.36.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7111567b971a68719c0eb68245d49a0a3c3bf5af2f609351446f20ac3e83c0d5", size = 3364563, upload-time = "2025-11-26T20:04:20.3Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/4a/cbd61589a457e2f4fbacd08b7e7dd11cdb74690857f4b40042844b1ff894/multi_storage_client-0.36.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a8137558d5f05e4722c54540e2d6067ea61e9ce3d736fa9cb5c541c7f94d1b48", size = 5293550, upload-time = "2025-11-26T20:03:36.459Z" },
+    { url = "https://files.pythonhosted.org/packages/a7/3d/7499a9d537fa950a9acf11604b1f9372ed2cadd582b55f1c7cb885ce6f40/multi_storage_client-0.36.0-cp313-cp313-macosx_11_0_x86_64.whl", hash = "sha256:5394c5e040c32433b42e902d9fcf03f8a475c5c9ff1cca80743b2cb944c8af9e", size = 5417538, upload-time = "2025-11-26T20:06:16.782Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/c3/1b1adc3b3b8569d258a34dbedb6a8c51fc94b947b2df276e251f0f1e23a2/multi_storage_client-0.36.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:195e8c8d57d812b73efd41b96cd60825c484d317ec86379fad3e435e9365a4a6", size = 3193426, upload-time = "2025-11-26T20:00:56.034Z" },
+    { url = "https://files.pythonhosted.org/packages/60/f5/f8b97a87d928057b493733760f37de70ae5ffff84b86f6efae101cdd57a2/multi_storage_client-0.36.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8402d0e1cefedf38ad9eefe8b3c56d3a44cfec7775ef711da18e7dbf72669444", size = 3363531, upload-time = "2025-11-26T20:02:35.296Z" },
 ]
 
 [[package]]
@@ -3025,7 +2818,7 @@ dependencies = [
     { name = "jinja2" },
     { name = "leptonai" },
     { name = "networkx", version = "3.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "networkx", version = "3.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "networkx", version = "3.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "omegaconf" },
     { name = "packaging" },
     { name = "rich" },
@@ -3049,51 +2842,21 @@ wheels = [
 
 [[package]]
 name = "networkx"
-version = "3.5"
+version = "3.6"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
-    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.12.*' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.12.*' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.11.*' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.11.*' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.12.*' and sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.12.*' and sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.11.*' and sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.11.*' and sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.12.*' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.12.*' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.12.*' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.12.*' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.11.*' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.11.*' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-]
-sdist = { url = "https://files.pythonhosted.org/packages/6c/4f/ccdb8ad3a38e583f214547fd2f7ff1fc160c43a75af88e6aec213404b96a/networkx-3.5.tar.gz", hash = "sha256:d4c6f9cf81f52d69230866796b82afbccdec3db7ae4fbd1b65ea750feed50037", size = 2471065, upload-time = "2025-05-29T11:35:07.804Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/eb/8d/776adee7bbf76365fdd7f2552710282c79a4ead5d2a46408c9043a2b70ba/networkx-3.5-py3-none-any.whl", hash = "sha256:0030d386a9a06dee3565298b4a734b68589749a544acbb6c412dc9e2489ec6ec", size = 2034406, upload-time = "2025-05-29T11:35:04.961Z" },
+    "python_full_version >= '3.14' and sys_platform == 'linux'",
+    "python_full_version == '3.13.*' and sys_platform == 'linux'",
+    "python_full_version == '3.12.*' and sys_platform == 'linux'",
+    "python_full_version >= '3.14' and sys_platform != 'linux'",
+    "python_full_version == '3.13.*' and sys_platform != 'linux'",
+    "python_full_version == '3.12.*' and sys_platform != 'linux'",
+    "python_full_version == '3.11.*' and sys_platform == 'linux'",
+    "python_full_version == '3.11.*' and sys_platform != 'linux'",
+]
+sdist = { url = "https://files.pythonhosted.org/packages/e8/fc/7b6fd4d22c8c4dc5704430140d8b3f520531d4fe7328b8f8d03f5a7950e8/networkx-3.6.tar.gz", hash = "sha256:285276002ad1f7f7da0f7b42f004bcba70d381e936559166363707fdad3d72ad", size = 2511464, upload-time = "2025-11-24T03:03:47.158Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/07/c7/d64168da60332c17d24c0d2f08bdf3987e8d1ae9d84b5bbd0eec2eb26a55/networkx-3.6-py3-none-any.whl", hash = "sha256:cdb395b105806062473d3be36458d8f1459a4e4b98e236a66c3a48996e07684f", size = 2063713, upload-time = "2025-11-24T03:03:45.21Z" },
 ]
 
 [[package]]
@@ -3138,170 +2901,373 @@ wheels = [
 ]
 
 [[package]]
-name = "numcodecs"
-version = "0.13.1"
+name = "numpy"
+version = "2.2.6"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
     "python_full_version < '3.11' and sys_platform == 'linux'",
     "python_full_version < '3.11' and sys_platform != 'linux'",
 ]
-dependencies = [
-    { name = "numpy", marker = "python_full_version < '3.11'" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/85/56/8895a76abe4ec94ebd01eeb6d74f587bc4cddd46569670e1402852a5da13/numcodecs-0.13.1.tar.gz", hash = "sha256:a3cf37881df0898f3a9c0d4477df88133fe85185bffe57ba31bcc2fa207709bc", size = 5955215, upload-time = "2024-10-09T16:28:00.188Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/14/c0/6d72cde772bcec196b7188731d41282993b2958440f77fdf0db216f722da/numcodecs-0.13.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:96add4f783c5ce57cc7e650b6cac79dd101daf887c479a00a29bc1487ced180b", size = 1580012, upload-time = "2024-10-09T16:27:19.069Z" },
-    { url = "https://files.pythonhosted.org/packages/94/1d/f81fc1fa9210bbea97258242393a1f9feab4f6d8fb201f81f76003005e4b/numcodecs-0.13.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:237b7171609e868a20fd313748494444458ccd696062f67e198f7f8f52000c15", size = 1176919, upload-time = "2024-10-09T16:27:21.634Z" },
-    { url = "https://files.pythonhosted.org/packages/16/e4/b9ec2f4dfc34ecf724bc1beb96a9f6fa9b91801645688ffadacd485089da/numcodecs-0.13.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:96e42f73c31b8c24259c5fac6adba0c3ebf95536e37749dc6c62ade2989dca28", size = 8625842, upload-time = "2024-10-09T16:27:24.168Z" },
-    { url = "https://files.pythonhosted.org/packages/fe/90/299952e1477954ec4f92813fa03e743945e3ff711bb4f6c9aace431cb3da/numcodecs-0.13.1-cp310-cp310-win_amd64.whl", hash = "sha256:eda7d7823c9282e65234731fd6bd3986b1f9e035755f7fed248d7d366bb291ab", size = 828638, upload-time = "2024-10-09T16:27:27.063Z" },
-    { url = "https://files.pythonhosted.org/packages/f0/78/34b8e869ef143e88d62e8231f4dbfcad85e5c41302a11fc5bd2228a13df5/numcodecs-0.13.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:2eda97dd2f90add98df6d295f2c6ae846043396e3d51a739ca5db6c03b5eb666", size = 1580199, upload-time = "2024-10-09T16:27:29.336Z" },
-    { url = "https://files.pythonhosted.org/packages/3b/cf/f70797d86bb585d258d1e6993dced30396f2044725b96ce8bcf87a02be9c/numcodecs-0.13.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2a86f5367af9168e30f99727ff03b27d849c31ad4522060dde0bce2923b3a8bc", size = 1177203, upload-time = "2024-10-09T16:27:31.011Z" },
-    { url = "https://files.pythonhosted.org/packages/a8/b5/d14ad69b63fde041153dfd05d7181a49c0d4864de31a7a1093c8370da957/numcodecs-0.13.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:233bc7f26abce24d57e44ea8ebeb5cd17084690b4e7409dd470fdb75528d615f", size = 8868743, upload-time = "2024-10-09T16:27:32.833Z" },
-    { url = "https://files.pythonhosted.org/packages/13/d4/27a7b5af0b33f6d61e198faf177fbbf3cb83ff10d9d1a6857b7efc525ad5/numcodecs-0.13.1-cp311-cp311-win_amd64.whl", hash = "sha256:796b3e6740107e4fa624cc636248a1580138b3f1c579160f260f76ff13a4261b", size = 829603, upload-time = "2024-10-09T16:27:35.415Z" },
-    { url = "https://files.pythonhosted.org/packages/37/3a/bc09808425e7d3df41e5fc73fc7a802c429ba8c6b05e55f133654ade019d/numcodecs-0.13.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:5195bea384a6428f8afcece793860b1ab0ae28143c853f0b2b20d55a8947c917", size = 1575806, upload-time = "2024-10-09T16:27:37.804Z" },
-    { url = "https://files.pythonhosted.org/packages/3a/cc/dc74d0bfdf9ec192332a089d199f1e543e747c556b5659118db7a437dcca/numcodecs-0.13.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3501a848adaddce98a71a262fee15cd3618312692aa419da77acd18af4a6a3f6", size = 1178233, upload-time = "2024-10-09T16:27:40.169Z" },
-    { url = "https://files.pythonhosted.org/packages/d4/ce/434e8e3970b8e92ae9ab6d9db16cb9bc7aa1cd02e17c11de6848224100a1/numcodecs-0.13.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:da2230484e6102e5fa3cc1a5dd37ca1f92dfbd183d91662074d6f7574e3e8f53", size = 8857827, upload-time = "2024-10-09T16:27:42.743Z" },
-    { url = "https://files.pythonhosted.org/packages/83/e7/1d8b1b266a92f9013c755b1c146c5ad71a2bff147ecbc67f86546a2e4d6a/numcodecs-0.13.1-cp312-cp312-win_amd64.whl", hash = "sha256:e5db4824ebd5389ea30e54bc8aeccb82d514d28b6b68da6c536b8fa4596f4bca", size = 826539, upload-time = "2024-10-09T16:27:44.808Z" },
-    { url = "https://files.pythonhosted.org/packages/83/8b/06771dead2cc4a8ae1ea9907737cf1c8d37a323392fa28f938a586373468/numcodecs-0.13.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:7a60d75179fd6692e301ddfb3b266d51eb598606dcae7b9fc57f986e8d65cb43", size = 1571660, upload-time = "2024-10-09T16:27:47.125Z" },
-    { url = "https://files.pythonhosted.org/packages/f9/ea/d925bf85f92dfe4635356018da9fe4bfecb07b1c72f62b01c1bc47f936b1/numcodecs-0.13.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3f593c7506b0ab248961a3b13cb148cc6e8355662ff124ac591822310bc55ecf", size = 1169925, upload-time = "2024-10-09T16:27:49.512Z" },
-    { url = "https://files.pythonhosted.org/packages/0f/d6/643a3839d571d8e439a2c77dc4b0b8cab18d96ac808e4a81dbe88e959ab6/numcodecs-0.13.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:80d3071465f03522e776a31045ddf2cfee7f52df468b977ed3afdd7fe5869701", size = 8814257, upload-time = "2024-10-09T16:27:52.059Z" },
-    { url = "https://files.pythonhosted.org/packages/a6/c5/f3e56bc9b4e438a287fff738993d6d11abef368c0328a612ac2842ba9fca/numcodecs-0.13.1-cp313-cp313-win_amd64.whl", hash = "sha256:90d3065ae74c9342048ae0046006f99dcb1388b7288da5a19b3bddf9c30c3176", size = 821887, upload-time = "2024-10-09T16:27:55.039Z" },
+sdist = { url = "https://files.pythonhosted.org/packages/76/21/7d2a95e4bba9dc13d043ee156a356c0a8f0c6309dff6b21b4d71a073b8a8/numpy-2.2.6.tar.gz", hash = "sha256:e29554e2bef54a90aa5cc07da6ce955accb83f21ab5de01a62c8478897b264fd", size = 20276440, upload-time = "2025-05-17T22:38:04.611Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/9a/3e/ed6db5be21ce87955c0cbd3009f2803f59fa08df21b5df06862e2d8e2bdd/numpy-2.2.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b412caa66f72040e6d268491a59f2c43bf03eb6c96dd8f0307829feb7fa2b6fb", size = 21165245, upload-time = "2025-05-17T21:27:58.555Z" },
+    { url = "https://files.pythonhosted.org/packages/22/c2/4b9221495b2a132cc9d2eb862e21d42a009f5a60e45fc44b00118c174bff/numpy-2.2.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8e41fd67c52b86603a91c1a505ebaef50b3314de0213461c7a6e99c9a3beff90", size = 14360048, upload-time = "2025-05-17T21:28:21.406Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/77/dc2fcfc66943c6410e2bf598062f5959372735ffda175b39906d54f02349/numpy-2.2.6-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:37e990a01ae6ec7fe7fa1c26c55ecb672dd98b19c3d0e1d1f326fa13cb38d163", size = 5340542, upload-time = "2025-05-17T21:28:30.931Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/4f/1cb5fdc353a5f5cc7feb692db9b8ec2c3d6405453f982435efc52561df58/numpy-2.2.6-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:5a6429d4be8ca66d889b7cf70f536a397dc45ba6faeb5f8c5427935d9592e9cf", size = 6878301, upload-time = "2025-05-17T21:28:41.613Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/17/96a3acd228cec142fcb8723bd3cc39c2a474f7dcf0a5d16731980bcafa95/numpy-2.2.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:efd28d4e9cd7d7a8d39074a4d44c63eda73401580c5c76acda2ce969e0a38e83", size = 14297320, upload-time = "2025-05-17T21:29:02.78Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/63/3de6a34ad7ad6646ac7d2f55ebc6ad439dbbf9c4370017c50cf403fb19b5/numpy-2.2.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fc7b73d02efb0e18c000e9ad8b83480dfcd5dfd11065997ed4c6747470ae8915", size = 16801050, upload-time = "2025-05-17T21:29:27.675Z" },
+    { url = "https://files.pythonhosted.org/packages/07/b6/89d837eddef52b3d0cec5c6ba0456c1bf1b9ef6a6672fc2b7873c3ec4e2e/numpy-2.2.6-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:74d4531beb257d2c3f4b261bfb0fc09e0f9ebb8842d82a7b4209415896adc680", size = 15807034, upload-time = "2025-05-17T21:29:51.102Z" },
+    { url = "https://files.pythonhosted.org/packages/01/c8/dc6ae86e3c61cfec1f178e5c9f7858584049b6093f843bca541f94120920/numpy-2.2.6-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:8fc377d995680230e83241d8a96def29f204b5782f371c532579b4f20607a289", size = 18614185, upload-time = "2025-05-17T21:30:18.703Z" },
+    { url = "https://files.pythonhosted.org/packages/5b/c5/0064b1b7e7c89137b471ccec1fd2282fceaae0ab3a9550f2568782d80357/numpy-2.2.6-cp310-cp310-win32.whl", hash = "sha256:b093dd74e50a8cba3e873868d9e93a85b78e0daf2e98c6797566ad8044e8363d", size = 6527149, upload-time = "2025-05-17T21:30:29.788Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/dd/4b822569d6b96c39d1215dbae0582fd99954dcbcf0c1a13c61783feaca3f/numpy-2.2.6-cp310-cp310-win_amd64.whl", hash = "sha256:f0fd6321b839904e15c46e0d257fdd101dd7f530fe03fd6359c1ea63738703f3", size = 12904620, upload-time = "2025-05-17T21:30:48.994Z" },
+    { url = "https://files.pythonhosted.org/packages/da/a8/4f83e2aa666a9fbf56d6118faaaf5f1974d456b1823fda0a176eff722839/numpy-2.2.6-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f9f1adb22318e121c5c69a09142811a201ef17ab257a1e66ca3025065b7f53ae", size = 21176963, upload-time = "2025-05-17T21:31:19.36Z" },
+    { url = "https://files.pythonhosted.org/packages/b3/2b/64e1affc7972decb74c9e29e5649fac940514910960ba25cd9af4488b66c/numpy-2.2.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c820a93b0255bc360f53eca31a0e676fd1101f673dda8da93454a12e23fc5f7a", size = 14406743, upload-time = "2025-05-17T21:31:41.087Z" },
+    { url = "https://files.pythonhosted.org/packages/4a/9f/0121e375000b5e50ffdd8b25bf78d8e1a5aa4cca3f185d41265198c7b834/numpy-2.2.6-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:3d70692235e759f260c3d837193090014aebdf026dfd167834bcba43e30c2a42", size = 5352616, upload-time = "2025-05-17T21:31:50.072Z" },
+    { url = "https://files.pythonhosted.org/packages/31/0d/b48c405c91693635fbe2dcd7bc84a33a602add5f63286e024d3b6741411c/numpy-2.2.6-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:481b49095335f8eed42e39e8041327c05b0f6f4780488f61286ed3c01368d491", size = 6889579, upload-time = "2025-05-17T21:32:01.712Z" },
+    { url = "https://files.pythonhosted.org/packages/52/b8/7f0554d49b565d0171eab6e99001846882000883998e7b7d9f0d98b1f934/numpy-2.2.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b64d8d4d17135e00c8e346e0a738deb17e754230d7e0810ac5012750bbd85a5a", size = 14312005, upload-time = "2025-05-17T21:32:23.332Z" },
+    { url = "https://files.pythonhosted.org/packages/b3/dd/2238b898e51bd6d389b7389ffb20d7f4c10066d80351187ec8e303a5a475/numpy-2.2.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba10f8411898fc418a521833e014a77d3ca01c15b0c6cdcce6a0d2897e6dbbdf", size = 16821570, upload-time = "2025-05-17T21:32:47.991Z" },
+    { url = "https://files.pythonhosted.org/packages/83/6c/44d0325722cf644f191042bf47eedad61c1e6df2432ed65cbe28509d404e/numpy-2.2.6-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:bd48227a919f1bafbdda0583705e547892342c26fb127219d60a5c36882609d1", size = 15818548, upload-time = "2025-05-17T21:33:11.728Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/9d/81e8216030ce66be25279098789b665d49ff19eef08bfa8cb96d4957f422/numpy-2.2.6-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:9551a499bf125c1d4f9e250377c1ee2eddd02e01eac6644c080162c0c51778ab", size = 18620521, upload-time = "2025-05-17T21:33:39.139Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/fd/e19617b9530b031db51b0926eed5345ce8ddc669bb3bc0044b23e275ebe8/numpy-2.2.6-cp311-cp311-win32.whl", hash = "sha256:0678000bb9ac1475cd454c6b8c799206af8107e310843532b04d49649c717a47", size = 6525866, upload-time = "2025-05-17T21:33:50.273Z" },
+    { url = "https://files.pythonhosted.org/packages/31/0a/f354fb7176b81747d870f7991dc763e157a934c717b67b58456bc63da3df/numpy-2.2.6-cp311-cp311-win_amd64.whl", hash = "sha256:e8213002e427c69c45a52bbd94163084025f533a55a59d6f9c5b820774ef3303", size = 12907455, upload-time = "2025-05-17T21:34:09.135Z" },
+    { url = "https://files.pythonhosted.org/packages/82/5d/c00588b6cf18e1da539b45d3598d3557084990dcc4331960c15ee776ee41/numpy-2.2.6-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:41c5a21f4a04fa86436124d388f6ed60a9343a6f767fced1a8a71c3fbca038ff", size = 20875348, upload-time = "2025-05-17T21:34:39.648Z" },
+    { url = "https://files.pythonhosted.org/packages/66/ee/560deadcdde6c2f90200450d5938f63a34b37e27ebff162810f716f6a230/numpy-2.2.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:de749064336d37e340f640b05f24e9e3dd678c57318c7289d222a8a2f543e90c", size = 14119362, upload-time = "2025-05-17T21:35:01.241Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/65/4baa99f1c53b30adf0acd9a5519078871ddde8d2339dc5a7fde80d9d87da/numpy-2.2.6-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:894b3a42502226a1cac872f840030665f33326fc3dac8e57c607905773cdcde3", size = 5084103, upload-time = "2025-05-17T21:35:10.622Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/89/e5a34c071a0570cc40c9a54eb472d113eea6d002e9ae12bb3a8407fb912e/numpy-2.2.6-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:71594f7c51a18e728451bb50cc60a3ce4e6538822731b2933209a1f3614e9282", size = 6625382, upload-time = "2025-05-17T21:35:21.414Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/35/8c80729f1ff76b3921d5c9487c7ac3de9b2a103b1cd05e905b3090513510/numpy-2.2.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f2618db89be1b4e05f7a1a847a9c1c0abd63e63a1607d892dd54668dd92faf87", size = 14018462, upload-time = "2025-05-17T21:35:42.174Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/3d/1e1db36cfd41f895d266b103df00ca5b3cbe965184df824dec5c08c6b803/numpy-2.2.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd83c01228a688733f1ded5201c678f0c53ecc1006ffbc404db9f7a899ac6249", size = 16527618, upload-time = "2025-05-17T21:36:06.711Z" },
+    { url = "https://files.pythonhosted.org/packages/61/c6/03ed30992602c85aa3cd95b9070a514f8b3c33e31124694438d88809ae36/numpy-2.2.6-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:37c0ca431f82cd5fa716eca9506aefcabc247fb27ba69c5062a6d3ade8cf8f49", size = 15505511, upload-time = "2025-05-17T21:36:29.965Z" },
+    { url = "https://files.pythonhosted.org/packages/b7/25/5761d832a81df431e260719ec45de696414266613c9ee268394dd5ad8236/numpy-2.2.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fe27749d33bb772c80dcd84ae7e8df2adc920ae8297400dabec45f0dedb3f6de", size = 18313783, upload-time = "2025-05-17T21:36:56.883Z" },
+    { url = "https://files.pythonhosted.org/packages/57/0a/72d5a3527c5ebffcd47bde9162c39fae1f90138c961e5296491ce778e682/numpy-2.2.6-cp312-cp312-win32.whl", hash = "sha256:4eeaae00d789f66c7a25ac5f34b71a7035bb474e679f410e5e1a94deb24cf2d4", size = 6246506, upload-time = "2025-05-17T21:37:07.368Z" },
+    { url = "https://files.pythonhosted.org/packages/36/fa/8c9210162ca1b88529ab76b41ba02d433fd54fecaf6feb70ef9f124683f1/numpy-2.2.6-cp312-cp312-win_amd64.whl", hash = "sha256:c1f9540be57940698ed329904db803cf7a402f3fc200bfe599334c9bd84a40b2", size = 12614190, upload-time = "2025-05-17T21:37:26.213Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/5c/6657823f4f594f72b5471f1db1ab12e26e890bb2e41897522d134d2a3e81/numpy-2.2.6-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0811bb762109d9708cca4d0b13c4f67146e3c3b7cf8d34018c722adb2d957c84", size = 20867828, upload-time = "2025-05-17T21:37:56.699Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/9e/14520dc3dadf3c803473bd07e9b2bd1b69bc583cb2497b47000fed2fa92f/numpy-2.2.6-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:287cc3162b6f01463ccd86be154f284d0893d2b3ed7292439ea97eafa8170e0b", size = 14143006, upload-time = "2025-05-17T21:38:18.291Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/06/7e96c57d90bebdce9918412087fc22ca9851cceaf5567a45c1f404480e9e/numpy-2.2.6-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:f1372f041402e37e5e633e586f62aa53de2eac8d98cbfb822806ce4bbefcb74d", size = 5076765, upload-time = "2025-05-17T21:38:27.319Z" },
+    { url = "https://files.pythonhosted.org/packages/73/ed/63d920c23b4289fdac96ddbdd6132e9427790977d5457cd132f18e76eae0/numpy-2.2.6-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:55a4d33fa519660d69614a9fad433be87e5252f4b03850642f88993f7b2ca566", size = 6617736, upload-time = "2025-05-17T21:38:38.141Z" },
+    { url = "https://files.pythonhosted.org/packages/85/c5/e19c8f99d83fd377ec8c7e0cf627a8049746da54afc24ef0a0cb73d5dfb5/numpy-2.2.6-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f92729c95468a2f4f15e9bb94c432a9229d0d50de67304399627a943201baa2f", size = 14010719, upload-time = "2025-05-17T21:38:58.433Z" },
+    { url = "https://files.pythonhosted.org/packages/19/49/4df9123aafa7b539317bf6d342cb6d227e49f7a35b99c287a6109b13dd93/numpy-2.2.6-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1bc23a79bfabc5d056d106f9befb8d50c31ced2fbc70eedb8155aec74a45798f", size = 16526072, upload-time = "2025-05-17T21:39:22.638Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/6c/04b5f47f4f32f7c2b0e7260442a8cbcf8168b0e1a41ff1495da42f42a14f/numpy-2.2.6-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e3143e4451880bed956e706a3220b4e5cf6172ef05fcc397f6f36a550b1dd868", size = 15503213, upload-time = "2025-05-17T21:39:45.865Z" },
+    { url = "https://files.pythonhosted.org/packages/17/0a/5cd92e352c1307640d5b6fec1b2ffb06cd0dabe7d7b8227f97933d378422/numpy-2.2.6-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b4f13750ce79751586ae2eb824ba7e1e8dba64784086c98cdbbcc6a42112ce0d", size = 18316632, upload-time = "2025-05-17T21:40:13.331Z" },
+    { url = "https://files.pythonhosted.org/packages/f0/3b/5cba2b1d88760ef86596ad0f3d484b1cbff7c115ae2429678465057c5155/numpy-2.2.6-cp313-cp313-win32.whl", hash = "sha256:5beb72339d9d4fa36522fc63802f469b13cdbe4fdab4a288f0c441b74272ebfd", size = 6244532, upload-time = "2025-05-17T21:43:46.099Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/3b/d58c12eafcb298d4e6d0d40216866ab15f59e55d148a5658bb3132311fcf/numpy-2.2.6-cp313-cp313-win_amd64.whl", hash = "sha256:b0544343a702fa80c95ad5d3d608ea3599dd54d4632df855e4c8d24eb6ecfa1c", size = 12610885, upload-time = "2025-05-17T21:44:05.145Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/9e/4bf918b818e516322db999ac25d00c75788ddfd2d2ade4fa66f1f38097e1/numpy-2.2.6-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0bca768cd85ae743b2affdc762d617eddf3bcf8724435498a1e80132d04879e6", size = 20963467, upload-time = "2025-05-17T21:40:44Z" },
+    { url = "https://files.pythonhosted.org/packages/61/66/d2de6b291507517ff2e438e13ff7b1e2cdbdb7cb40b3ed475377aece69f9/numpy-2.2.6-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:fc0c5673685c508a142ca65209b4e79ed6740a4ed6b2267dbba90f34b0b3cfda", size = 14225144, upload-time = "2025-05-17T21:41:05.695Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/25/480387655407ead912e28ba3a820bc69af9adf13bcbe40b299d454ec011f/numpy-2.2.6-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:5bd4fc3ac8926b3819797a7c0e2631eb889b4118a9898c84f585a54d475b7e40", size = 5200217, upload-time = "2025-05-17T21:41:15.903Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/4a/6e313b5108f53dcbf3aca0c0f3e9c92f4c10ce57a0a721851f9785872895/numpy-2.2.6-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:fee4236c876c4e8369388054d02d0e9bb84821feb1a64dd59e137e6511a551f8", size = 6712014, upload-time = "2025-05-17T21:41:27.321Z" },
+    { url = "https://files.pythonhosted.org/packages/b7/30/172c2d5c4be71fdf476e9de553443cf8e25feddbe185e0bd88b096915bcc/numpy-2.2.6-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e1dda9c7e08dc141e0247a5b8f49cf05984955246a327d4c48bda16821947b2f", size = 14077935, upload-time = "2025-05-17T21:41:49.738Z" },
+    { url = "https://files.pythonhosted.org/packages/12/fb/9e743f8d4e4d3c710902cf87af3512082ae3d43b945d5d16563f26ec251d/numpy-2.2.6-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f447e6acb680fd307f40d3da4852208af94afdfab89cf850986c3ca00562f4fa", size = 16600122, upload-time = "2025-05-17T21:42:14.046Z" },
+    { url = "https://files.pythonhosted.org/packages/12/75/ee20da0e58d3a66f204f38916757e01e33a9737d0b22373b3eb5a27358f9/numpy-2.2.6-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:389d771b1623ec92636b0786bc4ae56abafad4a4c513d36a55dce14bd9ce8571", size = 15586143, upload-time = "2025-05-17T21:42:37.464Z" },
+    { url = "https://files.pythonhosted.org/packages/76/95/bef5b37f29fc5e739947e9ce5179ad402875633308504a52d188302319c8/numpy-2.2.6-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:8e9ace4a37db23421249ed236fdcdd457d671e25146786dfc96835cd951aa7c1", size = 18385260, upload-time = "2025-05-17T21:43:05.189Z" },
+    { url = "https://files.pythonhosted.org/packages/09/04/f2f83279d287407cf36a7a8053a5abe7be3622a4363337338f2585e4afda/numpy-2.2.6-cp313-cp313t-win32.whl", hash = "sha256:038613e9fb8c72b0a41f025a7e4c3f0b7a1b5d768ece4796b674c8f3fe13efff", size = 6377225, upload-time = "2025-05-17T21:43:16.254Z" },
+    { url = "https://files.pythonhosted.org/packages/67/0e/35082d13c09c02c011cf21570543d202ad929d961c02a147493cb0c2bdf5/numpy-2.2.6-cp313-cp313t-win_amd64.whl", hash = "sha256:6031dd6dfecc0cf9f668681a37648373bddd6421fff6c66ec1624eed0180ee06", size = 12771374, upload-time = "2025-05-17T21:43:35.479Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/3b/d94a75f4dbf1ef5d321523ecac21ef23a3cd2ac8b78ae2aac40873590229/numpy-2.2.6-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:0b605b275d7bd0c640cad4e5d30fa701a8d59302e127e5f79138ad62762c3e3d", size = 21040391, upload-time = "2025-05-17T21:44:35.948Z" },
+    { url = "https://files.pythonhosted.org/packages/17/f4/09b2fa1b58f0fb4f7c7963a1649c64c4d315752240377ed74d9cd878f7b5/numpy-2.2.6-pp310-pypy310_pp73-macosx_14_0_x86_64.whl", hash = "sha256:7befc596a7dc9da8a337f79802ee8adb30a552a94f792b9c9d18c840055907db", size = 6786754, upload-time = "2025-05-17T21:44:47.446Z" },
+    { url = "https://files.pythonhosted.org/packages/af/30/feba75f143bdc868a1cc3f44ccfa6c4b9ec522b36458e738cd00f67b573f/numpy-2.2.6-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce47521a4754c8f4593837384bd3424880629f718d87c5d44f8ed763edd63543", size = 16643476, upload-time = "2025-05-17T21:45:11.871Z" },
+    { url = "https://files.pythonhosted.org/packages/37/48/ac2a9584402fb6c0cd5b5d1a91dcf176b15760130dd386bbafdbfe3640bf/numpy-2.2.6-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:d042d24c90c41b54fd506da306759e06e568864df8ec17ccc17e9e884634fd00", size = 12812666, upload-time = "2025-05-17T21:45:31.426Z" },
 ]
 
 [[package]]
-name = "numcodecs"
-version = "0.16.3"
+name = "numpy"
+version = "2.3.5"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
-    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform == 'linux'",
-    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform == 'linux'",
-    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform == 'linux'",
-    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform == 'linux'",
+    "python_full_version >= '3.14' and sys_platform == 'linux'",
+    "python_full_version == '3.13.*' and sys_platform == 'linux'",
     "python_full_version == '3.12.*' and sys_platform == 'linux'",
-    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform != 'linux'",
-    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform != 'linux'",
-    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform != 'linux'",
-    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform != 'linux'",
+    "python_full_version >= '3.14' and sys_platform != 'linux'",
+    "python_full_version == '3.13.*' and sys_platform != 'linux'",
     "python_full_version == '3.12.*' and sys_platform != 'linux'",
     "python_full_version == '3.11.*' and sys_platform == 'linux'",
     "python_full_version == '3.11.*' and sys_platform != 'linux'",
 ]
+sdist = { url = "https://files.pythonhosted.org/packages/76/65/21b3bc86aac7b8f2862db1e808f1ea22b028e30a225a34a5ede9bf8678f2/numpy-2.3.5.tar.gz", hash = "sha256:784db1dcdab56bf0517743e746dfb0f885fc68d948aba86eeec2cba234bdf1c0", size = 20584950, upload-time = "2025-11-16T22:52:42.067Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/43/77/84dd1d2e34d7e2792a236ba180b5e8fcc1e3e414e761ce0253f63d7f572e/numpy-2.3.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:de5672f4a7b200c15a4127042170a694d4df43c992948f5e1af57f0174beed10", size = 17034641, upload-time = "2025-11-16T22:49:19.336Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/ea/25e26fa5837106cde46ae7d0b667e20f69cbbc0efd64cba8221411ab26ae/numpy-2.3.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:acfd89508504a19ed06ef963ad544ec6664518c863436306153e13e94605c218", size = 12528324, upload-time = "2025-11-16T22:49:22.582Z" },
+    { url = "https://files.pythonhosted.org/packages/4d/1a/e85f0eea4cf03d6a0228f5c0256b53f2df4bc794706e7df019fc622e47f1/numpy-2.3.5-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:ffe22d2b05504f786c867c8395de703937f934272eb67586817b46188b4ded6d", size = 5356872, upload-time = "2025-11-16T22:49:25.408Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/bb/35ef04afd567f4c989c2060cde39211e4ac5357155c1833bcd1166055c61/numpy-2.3.5-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:872a5cf366aec6bb1147336480fef14c9164b154aeb6542327de4970282cd2f5", size = 6893148, upload-time = "2025-11-16T22:49:27.549Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/2b/05bbeb06e2dff5eab512dfc678b1cc5ee94d8ac5956a0885c64b6b26252b/numpy-2.3.5-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3095bdb8dd297e5920b010e96134ed91d852d81d490e787beca7e35ae1d89cf7", size = 14557282, upload-time = "2025-11-16T22:49:30.964Z" },
+    { url = "https://files.pythonhosted.org/packages/65/fb/2b23769462b34398d9326081fad5655198fcf18966fcb1f1e49db44fbf31/numpy-2.3.5-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8cba086a43d54ca804ce711b2a940b16e452807acebe7852ff327f1ecd49b0d4", size = 16897903, upload-time = "2025-11-16T22:49:34.191Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/14/085f4cf05fc3f1e8aa95e85404e984ffca9b2275a5dc2b1aae18a67538b8/numpy-2.3.5-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:6cf9b429b21df6b99f4dee7a1218b8b7ffbbe7df8764dc0bd60ce8a0708fed1e", size = 16341672, upload-time = "2025-11-16T22:49:37.2Z" },
+    { url = "https://files.pythonhosted.org/packages/6f/3b/1f73994904142b2aa290449b3bb99772477b5fd94d787093e4f24f5af763/numpy-2.3.5-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:396084a36abdb603546b119d96528c2f6263921c50df3c8fd7cb28873a237748", size = 18838896, upload-time = "2025-11-16T22:49:39.727Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/b9/cf6649b2124f288309ffc353070792caf42ad69047dcc60da85ee85fea58/numpy-2.3.5-cp311-cp311-win32.whl", hash = "sha256:b0c7088a73aef3d687c4deef8452a3ac7c1be4e29ed8bf3b366c8111128ac60c", size = 6563608, upload-time = "2025-11-16T22:49:42.079Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/44/9fe81ae1dcc29c531843852e2874080dc441338574ccc4306b39e2ff6e59/numpy-2.3.5-cp311-cp311-win_amd64.whl", hash = "sha256:a414504bef8945eae5f2d7cb7be2d4af77c5d1cb5e20b296c2c25b61dff2900c", size = 13078442, upload-time = "2025-11-16T22:49:43.99Z" },
+    { url = "https://files.pythonhosted.org/packages/6d/a7/f99a41553d2da82a20a2f22e93c94f928e4490bb447c9ff3c4ff230581d3/numpy-2.3.5-cp311-cp311-win_arm64.whl", hash = "sha256:0cd00b7b36e35398fa2d16af7b907b65304ef8bb4817a550e06e5012929830fa", size = 10458555, upload-time = "2025-11-16T22:49:47.092Z" },
+    { url = "https://files.pythonhosted.org/packages/44/37/e669fe6cbb2b96c62f6bbedc6a81c0f3b7362f6a59230b23caa673a85721/numpy-2.3.5-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:74ae7b798248fe62021dbf3c914245ad45d1a6b0cb4a29ecb4b31d0bfbc4cc3e", size = 16733873, upload-time = "2025-11-16T22:49:49.84Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/65/df0db6c097892c9380851ab9e44b52d4f7ba576b833996e0080181c0c439/numpy-2.3.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ee3888d9ff7c14604052b2ca5535a30216aa0a58e948cdd3eeb8d3415f638769", size = 12259838, upload-time = "2025-11-16T22:49:52.863Z" },
+    { url = "https://files.pythonhosted.org/packages/5b/e1/1ee06e70eb2136797abe847d386e7c0e830b67ad1d43f364dd04fa50d338/numpy-2.3.5-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:612a95a17655e213502f60cfb9bf9408efdc9eb1d5f50535cc6eb365d11b42b5", size = 5088378, upload-time = "2025-11-16T22:49:55.055Z" },
+    { url = "https://files.pythonhosted.org/packages/6d/9c/1ca85fb86708724275103b81ec4cf1ac1d08f465368acfc8da7ab545bdae/numpy-2.3.5-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:3101e5177d114a593d79dd79658650fe28b5a0d8abeb8ce6f437c0e6df5be1a4", size = 6628559, upload-time = "2025-11-16T22:49:57.371Z" },
+    { url = "https://files.pythonhosted.org/packages/74/78/fcd41e5a0ce4f3f7b003da85825acddae6d7ecb60cf25194741b036ca7d6/numpy-2.3.5-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8b973c57ff8e184109db042c842423ff4f60446239bd585a5131cc47f06f789d", size = 14250702, upload-time = "2025-11-16T22:49:59.632Z" },
+    { url = "https://files.pythonhosted.org/packages/b6/23/2a1b231b8ff672b4c450dac27164a8b2ca7d9b7144f9c02d2396518352eb/numpy-2.3.5-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0d8163f43acde9a73c2a33605353a4f1bc4798745a8b1d73183b28e5b435ae28", size = 16606086, upload-time = "2025-11-16T22:50:02.127Z" },
+    { url = "https://files.pythonhosted.org/packages/a0/c5/5ad26fbfbe2012e190cc7d5003e4d874b88bb18861d0829edc140a713021/numpy-2.3.5-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:51c1e14eb1e154ebd80e860722f9e6ed6ec89714ad2db2d3aa33c31d7c12179b", size = 16025985, upload-time = "2025-11-16T22:50:04.536Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/fa/dd48e225c46c819288148d9d060b047fd2a6fb1eb37eae25112ee4cb4453/numpy-2.3.5-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b46b4ec24f7293f23adcd2d146960559aaf8020213de8ad1909dba6c013bf89c", size = 18542976, upload-time = "2025-11-16T22:50:07.557Z" },
+    { url = "https://files.pythonhosted.org/packages/05/79/ccbd23a75862d95af03d28b5c6901a1b7da4803181513d52f3b86ed9446e/numpy-2.3.5-cp312-cp312-win32.whl", hash = "sha256:3997b5b3c9a771e157f9aae01dd579ee35ad7109be18db0e85dbdbe1de06e952", size = 6285274, upload-time = "2025-11-16T22:50:10.746Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/57/8aeaf160312f7f489dea47ab61e430b5cb051f59a98ae68b7133ce8fa06a/numpy-2.3.5-cp312-cp312-win_amd64.whl", hash = "sha256:86945f2ee6d10cdfd67bcb4069c1662dd711f7e2a4343db5cecec06b87cf31aa", size = 12782922, upload-time = "2025-11-16T22:50:12.811Z" },
+    { url = "https://files.pythonhosted.org/packages/78/a6/aae5cc2ca78c45e64b9ef22f089141d661516856cf7c8a54ba434576900d/numpy-2.3.5-cp312-cp312-win_arm64.whl", hash = "sha256:f28620fe26bee16243be2b7b874da327312240a7cdc38b769a697578d2100013", size = 10194667, upload-time = "2025-11-16T22:50:16.16Z" },
+    { url = "https://files.pythonhosted.org/packages/db/69/9cde09f36da4b5a505341180a3f2e6fadc352fd4d2b7096ce9778db83f1a/numpy-2.3.5-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:d0f23b44f57077c1ede8c5f26b30f706498b4862d3ff0a7298b8411dd2f043ff", size = 16728251, upload-time = "2025-11-16T22:50:19.013Z" },
+    { url = "https://files.pythonhosted.org/packages/79/fb/f505c95ceddd7027347b067689db71ca80bd5ecc926f913f1a23e65cf09b/numpy-2.3.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:aa5bc7c5d59d831d9773d1170acac7893ce3a5e130540605770ade83280e7188", size = 12254652, upload-time = "2025-11-16T22:50:21.487Z" },
+    { url = "https://files.pythonhosted.org/packages/78/da/8c7738060ca9c31b30e9301ee0cf6c5ffdbf889d9593285a1cead337f9a5/numpy-2.3.5-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:ccc933afd4d20aad3c00bcef049cb40049f7f196e0397f1109dba6fed63267b0", size = 5083172, upload-time = "2025-11-16T22:50:24.562Z" },
+    { url = "https://files.pythonhosted.org/packages/a4/b4/ee5bb2537fb9430fd2ef30a616c3672b991a4129bb1c7dcc42aa0abbe5d7/numpy-2.3.5-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:afaffc4393205524af9dfa400fa250143a6c3bc646c08c9f5e25a9f4b4d6a903", size = 6622990, upload-time = "2025-11-16T22:50:26.47Z" },
+    { url = "https://files.pythonhosted.org/packages/95/03/dc0723a013c7d7c19de5ef29e932c3081df1c14ba582b8b86b5de9db7f0f/numpy-2.3.5-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9c75442b2209b8470d6d5d8b1c25714270686f14c749028d2199c54e29f20b4d", size = 14248902, upload-time = "2025-11-16T22:50:28.861Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/10/ca162f45a102738958dcec8023062dad0cbc17d1ab99d68c4e4a6c45fb2b/numpy-2.3.5-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:11e06aa0af8c0f05104d56450d6093ee639e15f24ecf62d417329d06e522e017", size = 16597430, upload-time = "2025-11-16T22:50:31.56Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/51/c1e29be863588db58175175f057286900b4b3327a1351e706d5e0f8dd679/numpy-2.3.5-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ed89927b86296067b4f81f108a2271d8926467a8868e554eaf370fc27fa3ccaf", size = 16024551, upload-time = "2025-11-16T22:50:34.242Z" },
+    { url = "https://files.pythonhosted.org/packages/83/68/8236589d4dbb87253d28259d04d9b814ec0ecce7cb1c7fed29729f4c3a78/numpy-2.3.5-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:51c55fe3451421f3a6ef9a9c1439e82101c57a2c9eab9feb196a62b1a10b58ce", size = 18533275, upload-time = "2025-11-16T22:50:37.651Z" },
+    { url = "https://files.pythonhosted.org/packages/40/56/2932d75b6f13465239e3b7b7e511be27f1b8161ca2510854f0b6e521c395/numpy-2.3.5-cp313-cp313-win32.whl", hash = "sha256:1978155dd49972084bd6ef388d66ab70f0c323ddee6f693d539376498720fb7e", size = 6277637, upload-time = "2025-11-16T22:50:40.11Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/88/e2eaa6cffb115b85ed7c7c87775cb8bcf0816816bc98ca8dbfa2ee33fe6e/numpy-2.3.5-cp313-cp313-win_amd64.whl", hash = "sha256:00dc4e846108a382c5869e77c6ed514394bdeb3403461d25a829711041217d5b", size = 12779090, upload-time = "2025-11-16T22:50:42.503Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/88/3f41e13a44ebd4034ee17baa384acac29ba6a4fcc2aca95f6f08ca0447d1/numpy-2.3.5-cp313-cp313-win_arm64.whl", hash = "sha256:0472f11f6ec23a74a906a00b48a4dcf3849209696dff7c189714511268d103ae", size = 10194710, upload-time = "2025-11-16T22:50:44.971Z" },
+    { url = "https://files.pythonhosted.org/packages/13/cb/71744144e13389d577f867f745b7df2d8489463654a918eea2eeb166dfc9/numpy-2.3.5-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:414802f3b97f3c1eef41e530aaba3b3c1620649871d8cb38c6eaff034c2e16bd", size = 16827292, upload-time = "2025-11-16T22:50:47.715Z" },
+    { url = "https://files.pythonhosted.org/packages/71/80/ba9dc6f2a4398e7f42b708a7fdc841bb638d353be255655498edbf9a15a8/numpy-2.3.5-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:5ee6609ac3604fa7780e30a03e5e241a7956f8e2fcfe547d51e3afa5247ac47f", size = 12378897, upload-time = "2025-11-16T22:50:51.327Z" },
+    { url = "https://files.pythonhosted.org/packages/2e/6d/db2151b9f64264bcceccd51741aa39b50150de9b602d98ecfe7e0c4bff39/numpy-2.3.5-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:86d835afea1eaa143012a2d7a3f45a3adce2d7adc8b4961f0b362214d800846a", size = 5207391, upload-time = "2025-11-16T22:50:54.542Z" },
+    { url = "https://files.pythonhosted.org/packages/80/ae/429bacace5ccad48a14c4ae5332f6aa8ab9f69524193511d60ccdfdc65fa/numpy-2.3.5-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:30bc11310e8153ca664b14c5f1b73e94bd0503681fcf136a163de856f3a50139", size = 6721275, upload-time = "2025-11-16T22:50:56.794Z" },
+    { url = "https://files.pythonhosted.org/packages/74/5b/1919abf32d8722646a38cd527bc3771eb229a32724ee6ba340ead9b92249/numpy-2.3.5-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1062fde1dcf469571705945b0f221b73928f34a20c904ffb45db101907c3454e", size = 14306855, upload-time = "2025-11-16T22:50:59.208Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/87/6831980559434973bebc30cd9c1f21e541a0f2b0c280d43d3afd909b66d0/numpy-2.3.5-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ce581db493ea1a96c0556360ede6607496e8bf9b3a8efa66e06477267bc831e9", size = 16657359, upload-time = "2025-11-16T22:51:01.991Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/91/c797f544491ee99fd00495f12ebb7802c440c1915811d72ac5b4479a3356/numpy-2.3.5-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:cc8920d2ec5fa99875b670bb86ddeb21e295cb07aa331810d9e486e0b969d946", size = 16093374, upload-time = "2025-11-16T22:51:05.291Z" },
+    { url = "https://files.pythonhosted.org/packages/74/a6/54da03253afcbe7a72785ec4da9c69fb7a17710141ff9ac5fcb2e32dbe64/numpy-2.3.5-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:9ee2197ef8c4f0dfe405d835f3b6a14f5fee7782b5de51ba06fb65fc9b36e9f1", size = 18594587, upload-time = "2025-11-16T22:51:08.585Z" },
+    { url = "https://files.pythonhosted.org/packages/80/e9/aff53abbdd41b0ecca94285f325aff42357c6b5abc482a3fcb4994290b18/numpy-2.3.5-cp313-cp313t-win32.whl", hash = "sha256:70b37199913c1bd300ff6e2693316c6f869c7ee16378faf10e4f5e3275b299c3", size = 6405940, upload-time = "2025-11-16T22:51:11.541Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/81/50613fec9d4de5480de18d4f8ef59ad7e344d497edbef3cfd80f24f98461/numpy-2.3.5-cp313-cp313t-win_amd64.whl", hash = "sha256:b501b5fa195cc9e24fe102f21ec0a44dffc231d2af79950b451e0d99cea02234", size = 12920341, upload-time = "2025-11-16T22:51:14.312Z" },
+    { url = "https://files.pythonhosted.org/packages/bb/ab/08fd63b9a74303947f34f0bd7c5903b9c5532c2d287bead5bdf4c556c486/numpy-2.3.5-cp313-cp313t-win_arm64.whl", hash = "sha256:a80afd79f45f3c4a7d341f13acbe058d1ca8ac017c165d3fa0d3de6bc1a079d7", size = 10262507, upload-time = "2025-11-16T22:51:16.846Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/97/1a914559c19e32d6b2e233cf9a6a114e67c856d35b1d6babca571a3e880f/numpy-2.3.5-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:bf06bc2af43fa8d32d30fae16ad965663e966b1a3202ed407b84c989c3221e82", size = 16735706, upload-time = "2025-11-16T22:51:19.558Z" },
+    { url = "https://files.pythonhosted.org/packages/57/d4/51233b1c1b13ecd796311216ae417796b88b0616cfd8a33ae4536330748a/numpy-2.3.5-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:052e8c42e0c49d2575621c158934920524f6c5da05a1d3b9bab5d8e259e045f0", size = 12264507, upload-time = "2025-11-16T22:51:22.492Z" },
+    { url = "https://files.pythonhosted.org/packages/45/98/2fe46c5c2675b8306d0b4a3ec3494273e93e1226a490f766e84298576956/numpy-2.3.5-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:1ed1ec893cff7040a02c8aa1c8611b94d395590d553f6b53629a4461dc7f7b63", size = 5093049, upload-time = "2025-11-16T22:51:25.171Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/0e/0698378989bb0ac5f1660c81c78ab1fe5476c1a521ca9ee9d0710ce54099/numpy-2.3.5-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:2dcd0808a421a482a080f89859a18beb0b3d1e905b81e617a188bd80422d62e9", size = 6626603, upload-time = "2025-11-16T22:51:27Z" },
+    { url = "https://files.pythonhosted.org/packages/5e/a6/9ca0eecc489640615642a6cbc0ca9e10df70df38c4d43f5a928ff18d8827/numpy-2.3.5-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:727fd05b57df37dc0bcf1a27767a3d9a78cbbc92822445f32cc3436ba797337b", size = 14262696, upload-time = "2025-11-16T22:51:29.402Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/f6/07ec185b90ec9d7217a00eeeed7383b73d7e709dae2a9a021b051542a708/numpy-2.3.5-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fffe29a1ef00883599d1dc2c51aa2e5d80afe49523c261a74933df395c15c520", size = 16597350, upload-time = "2025-11-16T22:51:32.167Z" },
+    { url = "https://files.pythonhosted.org/packages/75/37/164071d1dde6a1a84c9b8e5b414fa127981bad47adf3a6b7e23917e52190/numpy-2.3.5-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:8f7f0e05112916223d3f438f293abf0727e1181b5983f413dfa2fefc4098245c", size = 16040190, upload-time = "2025-11-16T22:51:35.403Z" },
+    { url = "https://files.pythonhosted.org/packages/08/3c/f18b82a406b04859eb026d204e4e1773eb41c5be58410f41ffa511d114ae/numpy-2.3.5-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2e2eb32ddb9ccb817d620ac1d8dae7c3f641c1e5f55f531a33e8ab97960a75b8", size = 18536749, upload-time = "2025-11-16T22:51:39.698Z" },
+    { url = "https://files.pythonhosted.org/packages/40/79/f82f572bf44cf0023a2fe8588768e23e1592585020d638999f15158609e1/numpy-2.3.5-cp314-cp314-win32.whl", hash = "sha256:66f85ce62c70b843bab1fb14a05d5737741e74e28c7b8b5a064de10142fad248", size = 6335432, upload-time = "2025-11-16T22:51:42.476Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/2e/235b4d96619931192c91660805e5e49242389742a7a82c27665021db690c/numpy-2.3.5-cp314-cp314-win_amd64.whl", hash = "sha256:e6a0bc88393d65807d751a614207b7129a310ca4fe76a74e5c7da5fa5671417e", size = 12919388, upload-time = "2025-11-16T22:51:45.275Z" },
+    { url = "https://files.pythonhosted.org/packages/07/2b/29fd75ce45d22a39c61aad74f3d718e7ab67ccf839ca8b60866054eb15f8/numpy-2.3.5-cp314-cp314-win_arm64.whl", hash = "sha256:aeffcab3d4b43712bb7a60b65f6044d444e75e563ff6180af8f98dd4b905dfd2", size = 10476651, upload-time = "2025-11-16T22:51:47.749Z" },
+    { url = "https://files.pythonhosted.org/packages/17/e1/f6a721234ebd4d87084cfa68d081bcba2f5cfe1974f7de4e0e8b9b2a2ba1/numpy-2.3.5-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:17531366a2e3a9e30762c000f2c43a9aaa05728712e25c11ce1dbe700c53ad41", size = 16834503, upload-time = "2025-11-16T22:51:50.443Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/1c/baf7ffdc3af9c356e1c135e57ab7cf8d247931b9554f55c467efe2c69eff/numpy-2.3.5-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:d21644de1b609825ede2f48be98dfde4656aefc713654eeee280e37cadc4e0ad", size = 12381612, upload-time = "2025-11-16T22:51:53.609Z" },
+    { url = "https://files.pythonhosted.org/packages/74/91/f7f0295151407ddc9ba34e699013c32c3c91944f9b35fcf9281163dc1468/numpy-2.3.5-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:c804e3a5aba5460c73955c955bdbd5c08c354954e9270a2c1565f62e866bdc39", size = 5210042, upload-time = "2025-11-16T22:51:56.213Z" },
+    { url = "https://files.pythonhosted.org/packages/2e/3b/78aebf345104ec50dd50a4d06ddeb46a9ff5261c33bcc58b1c4f12f85ec2/numpy-2.3.5-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:cc0a57f895b96ec78969c34f682c602bf8da1a0270b09bc65673df2e7638ec20", size = 6724502, upload-time = "2025-11-16T22:51:58.584Z" },
+    { url = "https://files.pythonhosted.org/packages/02/c6/7c34b528740512e57ef1b7c8337ab0b4f0bddf34c723b8996c675bc2bc91/numpy-2.3.5-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:900218e456384ea676e24ea6a0417f030a3b07306d29d7ad843957b40a9d8d52", size = 14308962, upload-time = "2025-11-16T22:52:01.698Z" },
+    { url = "https://files.pythonhosted.org/packages/80/35/09d433c5262bc32d725bafc619e095b6a6651caf94027a03da624146f655/numpy-2.3.5-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:09a1bea522b25109bf8e6f3027bd810f7c1085c64a0c7ce050c1676ad0ba010b", size = 16655054, upload-time = "2025-11-16T22:52:04.267Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/ab/6a7b259703c09a88804fa2430b43d6457b692378f6b74b356155283566ac/numpy-2.3.5-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:04822c00b5fd0323c8166d66c701dc31b7fbd252c100acd708c48f763968d6a3", size = 16091613, upload-time = "2025-11-16T22:52:08.651Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/88/330da2071e8771e60d1038166ff9d73f29da37b01ec3eb43cb1427464e10/numpy-2.3.5-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:d6889ec4ec662a1a37eb4b4fb26b6100841804dac55bd9df579e326cdc146227", size = 18591147, upload-time = "2025-11-16T22:52:11.453Z" },
+    { url = "https://files.pythonhosted.org/packages/51/41/851c4b4082402d9ea860c3626db5d5df47164a712cb23b54be028b184c1c/numpy-2.3.5-cp314-cp314t-win32.whl", hash = "sha256:93eebbcf1aafdf7e2ddd44c2923e2672e1010bddc014138b229e49725b4d6be5", size = 6479806, upload-time = "2025-11-16T22:52:14.641Z" },
+    { url = "https://files.pythonhosted.org/packages/90/30/d48bde1dfd93332fa557cff1972fbc039e055a52021fbef4c2c4b1eefd17/numpy-2.3.5-cp314-cp314t-win_amd64.whl", hash = "sha256:c8a9958e88b65c3b27e22ca2a076311636850b612d6bbfb76e8d156aacde2aaf", size = 13105760, upload-time = "2025-11-16T22:52:17.975Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/fd/4b5eb0b3e888d86aee4d198c23acec7d214baaf17ea93c1adec94c9518b9/numpy-2.3.5-cp314-cp314t-win_arm64.whl", hash = "sha256:6203fdf9f3dc5bdaed7319ad8698e685c7a3be10819f41d32a0723e611733b42", size = 10545459, upload-time = "2025-11-16T22:52:20.55Z" },
+    { url = "https://files.pythonhosted.org/packages/c6/65/f9dea8e109371ade9c782b4e4756a82edf9d3366bca495d84d79859a0b79/numpy-2.3.5-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:f0963b55cdd70fad460fa4c1341f12f976bb26cb66021a5580329bd498988310", size = 16910689, upload-time = "2025-11-16T22:52:23.247Z" },
+    { url = "https://files.pythonhosted.org/packages/00/4f/edb00032a8fb92ec0a679d3830368355da91a69cab6f3e9c21b64d0bb986/numpy-2.3.5-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:f4255143f5160d0de972d28c8f9665d882b5f61309d8362fdd3e103cf7bf010c", size = 12457053, upload-time = "2025-11-16T22:52:26.367Z" },
+    { url = "https://files.pythonhosted.org/packages/16/a4/e8a53b5abd500a63836a29ebe145fc1ab1f2eefe1cfe59276020373ae0aa/numpy-2.3.5-pp311-pypy311_pp73-macosx_14_0_arm64.whl", hash = "sha256:a4b9159734b326535f4dd01d947f919c6eefd2d9827466a696c44ced82dfbc18", size = 5285635, upload-time = "2025-11-16T22:52:29.266Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/2f/37eeb9014d9c8b3e9c55bc599c68263ca44fdbc12a93e45a21d1d56df737/numpy-2.3.5-pp311-pypy311_pp73-macosx_14_0_x86_64.whl", hash = "sha256:2feae0d2c91d46e59fcd62784a3a83b3fb677fead592ce51b5a6fbb4f95965ff", size = 6801770, upload-time = "2025-11-16T22:52:31.421Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/e4/68d2f474df2cb671b2b6c2986a02e520671295647dad82484cde80ca427b/numpy-2.3.5-pp311-pypy311_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ffac52f28a7849ad7576293c0cb7b9f08304e8f7d738a8cb8a90ec4c55a998eb", size = 14391768, upload-time = "2025-11-16T22:52:33.593Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/50/94ccd8a2b141cb50651fddd4f6a48874acb3c91c8f0842b08a6afc4b0b21/numpy-2.3.5-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:63c0e9e7eea69588479ebf4a8a270d5ac22763cc5854e9a7eae952a3908103f7", size = 16729263, upload-time = "2025-11-16T22:52:36.369Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/ee/346fa473e666fe14c52fcdd19ec2424157290a032d4c41f98127bfb31ac7/numpy-2.3.5-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:f16417ec91f12f814b10bafe79ef77e70113a2f5f7018640e7425ff979253425", size = 12967213, upload-time = "2025-11-16T22:52:39.38Z" },
+]
+
+[[package]]
+name = "nv-grouped-gemm"
+version = "1.1.4.post6"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "absl-py" },
+    { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.3.5", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "torch", marker = "sys_platform == 'never'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/05/79/87c45f32e661b25e0aaa1e325ba166511f57be5dff8f0fcabc12d3e73b64/nv_grouped_gemm-1.1.4.post6.tar.gz", hash = "sha256:dad6115f4b4ff7ceb0bc40ad44e923c13a24fc88cfe1e20b1a6b4c9cf24c445c", size = 26508, upload-time = "2025-10-10T18:52:29.508Z" }
+
+[[package]]
+name = "nv-one-logger-core"
+version = "2.3.1"
+source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "numpy", marker = "python_full_version >= '3.11'" },
-    { name = "typing-extensions", marker = "python_full_version >= '3.11'" },
+    { name = "overrides" },
+    { name = "pydantic" },
+    { name = "strenum" },
+    { name = "toml" },
+    { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/f6/48/6188e359b90a9d8a1850f2bc888c023e66f4a8b2b496820babbea414f008/numcodecs-0.16.3.tar.gz", hash = "sha256:53d705865faaf0a7927c973af3777532001c8fbb653de119c1e844608614d799", size = 6275704, upload-time = "2025-09-18T18:54:57.221Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/3b/37/963095797035f371e0db6ea761f5aaccb624fc786af217115b423baeb0e2/nv_one_logger_core-2.3.1.tar.gz", hash = "sha256:cbb2f87604c78b96a302f32d87199902129d76153a73a20f8455a250b3246c1d", size = 52640, upload-time = "2025-10-29T21:11:55.812Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/d4/cc/917a85972537498f2bbd7914047efc98babc8667587ceb9dcb228378978a/numcodecs-0.16.3-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:95c9f2a49bef10cf91ad614a761cba9bfe96656b60c12540e1080de5d909b4ca", size = 1642356, upload-time = "2025-09-18T18:54:36.402Z" },
-    { url = "https://files.pythonhosted.org/packages/3b/6a/64c25a089e8537441fe67c09ecb7f3f7fb5d98cd04faf01f605d43aca41c/numcodecs-0.16.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e2afe73d5ebaf9ca0cd5c83aad945da80d29a33d860a80d43a7248491d8813ff", size = 1169186, upload-time = "2025-09-18T18:54:37.838Z" },
-    { url = "https://files.pythonhosted.org/packages/d8/a0/0de627baeb43e2045a3d4b3de99bf8b69af329a33df1ed4cda468d70c1fb/numcodecs-0.16.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:913f08194d82dcb37594e6705e6d4ae6ccd4b6571500b832fb3e4a155de1dfe8", size = 8341668, upload-time = "2025-09-18T18:54:39.444Z" },
-    { url = "https://files.pythonhosted.org/packages/b6/0f/49d1f74a216149240c4b9403218111f11670bd11af0919fda357bb056bf2/numcodecs-0.16.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:85a7f1cae9eb18b85709af46570bf9c60056e7155c4c8f610e8080c68124d0e5", size = 8866611, upload-time = "2025-09-18T18:54:41.168Z" },
-    { url = "https://files.pythonhosted.org/packages/aa/51/03aece765108fe247717105b5131856546e5428f22a56a14ffdebd017424/numcodecs-0.16.3-cp311-cp311-win_amd64.whl", hash = "sha256:f7bb7f2c46eb7ec8a1c5f8d8fe1a72c222256dd6d6df5af9eaac7a6b905f3575", size = 806787, upload-time = "2025-09-18T18:54:42.78Z" },
-    { url = "https://files.pythonhosted.org/packages/0d/78/e4b34803a3aa1d0769919695de4b133266c18c80c474d32ebc462fa1a9bd/numcodecs-0.16.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:c77454d92941a335d148b0b822f5d4783103f392774d5d76283bbf7f21b49529", size = 1681108, upload-time = "2025-09-18T18:54:43.856Z" },
-    { url = "https://files.pythonhosted.org/packages/25/cf/ca36f463b03a4097767d2a1c1b72f31810e8c6384e9449dd9b925203783c/numcodecs-0.16.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:270e7a33ee96bdf5c957acf25a2487002a233811a125a155c400c2f036b69c73", size = 1165589, upload-time = "2025-09-18T18:54:44.954Z" },
-    { url = "https://files.pythonhosted.org/packages/ed/ae/670260c3c4b5ed34a0674561355f3d4ce7fcbdf09a667e5bc841526d271c/numcodecs-0.16.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:12f43fa4a347d1dba775c4506a1c9b15b90144c258433b81f79f1c1b1a990db5", size = 8316365, upload-time = "2025-09-18T18:54:46.073Z" },
-    { url = "https://files.pythonhosted.org/packages/bb/fa/94e022419c751a60ff0f53642ebae5ef81ed3cc3640f958588e3ad3dc18d/numcodecs-0.16.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:44869ef564a50aa545215c6a0d42ba5bbc34e9715523fb2336ada3d1fb2b331d", size = 8846228, upload-time = "2025-09-18T18:54:47.858Z" },
-    { url = "https://files.pythonhosted.org/packages/71/60/f23733589f3e059bf8589508acd23ffeec230bdf179f138a54f5ab16e0a6/numcodecs-0.16.3-cp312-cp312-win_amd64.whl", hash = "sha256:9aae6996172ba10c5f5111b2998709071b5aeba6b58b1ee0b26b61ed6aa7f2f4", size = 806260, upload-time = "2025-09-18T18:54:49.41Z" },
-    { url = "https://files.pythonhosted.org/packages/3c/d5/d3536d06ac1e5fb848a3186958204082b68b106364c9a3669652dd786731/numcodecs-0.16.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:947406b01c20f2ce7ce2e631e7f21b782e8a9d4b57b374a41c9e7b1341a8f3a2", size = 1677129, upload-time = "2025-09-18T18:54:50.5Z" },
-    { url = "https://files.pythonhosted.org/packages/e1/fd/b0513a3428dc2b38ec85eea771703ae69c49f09b9650d6c44c9105c80073/numcodecs-0.16.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7cf50e351398a34b45817974c411527629e88937b7683695e276afd65da6ed6f", size = 1159058, upload-time = "2025-09-18T18:54:51.675Z" },
-    { url = "https://files.pythonhosted.org/packages/98/05/b7c127283cfb154a97abb284363825401b69302d71a28608af66f73257cc/numcodecs-0.16.3-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f7938502fcc060ed9543814f38ca67048b33d7bd2667756e36e6b1060455b17e", size = 8260987, upload-time = "2025-09-18T18:54:52.883Z" },
-    { url = "https://files.pythonhosted.org/packages/ff/46/320d960aff884bc63abaaf846ffa3de4803e83e8070b6f84c5688464839c/numcodecs-0.16.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:010d628c95be1214536fb22c0df4ced58da954b404b1fcb25ddebf64e4a3f7f3", size = 8805295, upload-time = "2025-09-18T18:54:54.698Z" },
-    { url = "https://files.pythonhosted.org/packages/31/ae/acc2e0f1f49ba32afa2174578f170673139248ef86f77e334f2619133867/numcodecs-0.16.3-cp313-cp313-win_amd64.whl", hash = "sha256:e83115e3c32de798c7b7164503e06aae9f9746c1cef564d029616eb44bd6cd90", size = 803204, upload-time = "2025-09-18T18:54:56.192Z" },
+    { url = "https://files.pythonhosted.org/packages/ee/c4/ea91554c4fcbff66057f667690101d7a4b965605741350ac661b03fa6c46/nv_one_logger_core-2.3.1-py3-none-any.whl", hash = "sha256:0c8b77bcdac4daa1ea913bf8d4afd2a057bd5526e3654ac39f67caba157341a6", size = 63066, upload-time = "2025-10-29T21:11:52.753Z" },
 ]
 
-[package.optional-dependencies]
-crc32c = [
-    { name = "crc32c", marker = "python_full_version >= '3.11'" },
+[[package]]
+name = "nv-one-logger-training-telemetry"
+version = "2.3.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "nv-one-logger-core" },
+    { name = "strenum" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c5/21/016fa067967734d52f1ccf5a2a37a1a65216f2d7053bc2b85872cce956ca/nv_one_logger_training_telemetry-2.3.1.tar.gz", hash = "sha256:8c67940ea71799afaf1f46df3ba2f52f93aea26321c6f1c1d54aae02efc2a4af", size = 44435, upload-time = "2025-10-29T21:21:42.035Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e5/15/97e6e4ddfe5fc35bcee74a45b7c33fb73abb83713c7dfa26420b971a86c3/nv_one_logger_training_telemetry-2.3.1-py3-none-any.whl", hash = "sha256:5319443829b59378a498c3c62ac98973e14f31be675c229ff2b14e2fe109aa0b", size = 44140, upload-time = "2025-10-29T21:21:40.72Z" },
 ]
 
 [[package]]
-name = "numpy"
-version = "1.26.4"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/65/6e/09db70a523a96d25e115e71cc56a6f9031e7b8cd166c1ac8438307c14058/numpy-1.26.4.tar.gz", hash = "sha256:2a02aba9ed12e4ac4eb3ea9421c420301a0c6460d9830d74a9df87efa4912010", size = 15786129, upload-time = "2024-02-06T00:26:44.495Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/a7/94/ace0fdea5241a27d13543ee117cbc65868e82213fb31a8eb7fe9ff23f313/numpy-1.26.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9ff0f4f29c51e2803569d7a51c2304de5554655a60c5d776e35b4a41413830d0", size = 20631468, upload-time = "2024-02-05T23:48:01.194Z" },
-    { url = "https://files.pythonhosted.org/packages/20/f7/b24208eba89f9d1b58c1668bc6c8c4fd472b20c45573cb767f59d49fb0f6/numpy-1.26.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2e4ee3380d6de9c9ec04745830fd9e2eccb3e6cf790d39d7b98ffd19b0dd754a", size = 13966411, upload-time = "2024-02-05T23:48:29.038Z" },
-    { url = "https://files.pythonhosted.org/packages/fc/a5/4beee6488160798683eed5bdb7eead455892c3b4e1f78d79d8d3f3b084ac/numpy-1.26.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d209d8969599b27ad20994c8e41936ee0964e6da07478d6c35016bc386b66ad4", size = 14219016, upload-time = "2024-02-05T23:48:54.098Z" },
-    { url = "https://files.pythonhosted.org/packages/4b/d7/ecf66c1cd12dc28b4040b15ab4d17b773b87fa9d29ca16125de01adb36cd/numpy-1.26.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ffa75af20b44f8dba823498024771d5ac50620e6915abac414251bd971b4529f", size = 18240889, upload-time = "2024-02-05T23:49:25.361Z" },
-    { url = "https://files.pythonhosted.org/packages/24/03/6f229fe3187546435c4f6f89f6d26c129d4f5bed40552899fcf1f0bf9e50/numpy-1.26.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:62b8e4b1e28009ef2846b4c7852046736bab361f7aeadeb6a5b89ebec3c7055a", size = 13876746, upload-time = "2024-02-05T23:49:51.983Z" },
-    { url = "https://files.pythonhosted.org/packages/39/fe/39ada9b094f01f5a35486577c848fe274e374bbf8d8f472e1423a0bbd26d/numpy-1.26.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:a4abb4f9001ad2858e7ac189089c42178fcce737e4169dc61321660f1a96c7d2", size = 18078620, upload-time = "2024-02-05T23:50:22.515Z" },
-    { url = "https://files.pythonhosted.org/packages/d5/ef/6ad11d51197aad206a9ad2286dc1aac6a378059e06e8cf22cd08ed4f20dc/numpy-1.26.4-cp310-cp310-win32.whl", hash = "sha256:bfe25acf8b437eb2a8b2d49d443800a5f18508cd811fea3181723922a8a82b07", size = 5972659, upload-time = "2024-02-05T23:50:35.834Z" },
-    { url = "https://files.pythonhosted.org/packages/19/77/538f202862b9183f54108557bfda67e17603fc560c384559e769321c9d92/numpy-1.26.4-cp310-cp310-win_amd64.whl", hash = "sha256:b97fe8060236edf3662adfc2c633f56a08ae30560c56310562cb4f95500022d5", size = 15808905, upload-time = "2024-02-05T23:51:03.701Z" },
-    { url = "https://files.pythonhosted.org/packages/11/57/baae43d14fe163fa0e4c47f307b6b2511ab8d7d30177c491960504252053/numpy-1.26.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4c66707fabe114439db9068ee468c26bbdf909cac0fb58686a42a24de1760c71", size = 20630554, upload-time = "2024-02-05T23:51:50.149Z" },
-    { url = "https://files.pythonhosted.org/packages/1a/2e/151484f49fd03944c4a3ad9c418ed193cfd02724e138ac8a9505d056c582/numpy-1.26.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:edd8b5fe47dab091176d21bb6de568acdd906d1887a4584a15a9a96a1dca06ef", size = 13997127, upload-time = "2024-02-05T23:52:15.314Z" },
-    { url = "https://files.pythonhosted.org/packages/79/ae/7e5b85136806f9dadf4878bf73cf223fe5c2636818ba3ab1c585d0403164/numpy-1.26.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7ab55401287bfec946ced39700c053796e7cc0e3acbef09993a9ad2adba6ca6e", size = 14222994, upload-time = "2024-02-05T23:52:47.569Z" },
-    { url = "https://files.pythonhosted.org/packages/3a/d0/edc009c27b406c4f9cbc79274d6e46d634d139075492ad055e3d68445925/numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:666dbfb6ec68962c033a450943ded891bed2d54e6755e35e5835d63f4f6931d5", size = 18252005, upload-time = "2024-02-05T23:53:15.637Z" },
-    { url = "https://files.pythonhosted.org/packages/09/bf/2b1aaf8f525f2923ff6cfcf134ae5e750e279ac65ebf386c75a0cf6da06a/numpy-1.26.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:96ff0b2ad353d8f990b63294c8986f1ec3cb19d749234014f4e7eb0112ceba5a", size = 13885297, upload-time = "2024-02-05T23:53:42.16Z" },
-    { url = "https://files.pythonhosted.org/packages/df/a0/4e0f14d847cfc2a633a1c8621d00724f3206cfeddeb66d35698c4e2cf3d2/numpy-1.26.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:60dedbb91afcbfdc9bc0b1f3f402804070deed7392c23eb7a7f07fa857868e8a", size = 18093567, upload-time = "2024-02-05T23:54:11.696Z" },
-    { url = "https://files.pythonhosted.org/packages/d2/b7/a734c733286e10a7f1a8ad1ae8c90f2d33bf604a96548e0a4a3a6739b468/numpy-1.26.4-cp311-cp311-win32.whl", hash = "sha256:1af303d6b2210eb850fcf03064d364652b7120803a0b872f5211f5234b399f20", size = 5968812, upload-time = "2024-02-05T23:54:26.453Z" },
-    { url = "https://files.pythonhosted.org/packages/3f/6b/5610004206cf7f8e7ad91c5a85a8c71b2f2f8051a0c0c4d5916b76d6cbb2/numpy-1.26.4-cp311-cp311-win_amd64.whl", hash = "sha256:cd25bcecc4974d09257ffcd1f098ee778f7834c3ad767fe5db785be9a4aa9cb2", size = 15811913, upload-time = "2024-02-05T23:54:53.933Z" },
-    { url = "https://files.pythonhosted.org/packages/95/12/8f2020a8e8b8383ac0177dc9570aad031a3beb12e38847f7129bacd96228/numpy-1.26.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b3ce300f3644fb06443ee2222c2201dd3a89ea6040541412b8fa189341847218", size = 20335901, upload-time = "2024-02-05T23:55:32.801Z" },
-    { url = "https://files.pythonhosted.org/packages/75/5b/ca6c8bd14007e5ca171c7c03102d17b4f4e0ceb53957e8c44343a9546dcc/numpy-1.26.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:03a8c78d01d9781b28a6989f6fa1bb2c4f2d51201cf99d3dd875df6fbd96b23b", size = 13685868, upload-time = "2024-02-05T23:55:56.28Z" },
-    { url = "https://files.pythonhosted.org/packages/79/f8/97f10e6755e2a7d027ca783f63044d5b1bc1ae7acb12afe6a9b4286eac17/numpy-1.26.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9fad7dcb1aac3c7f0584a5a8133e3a43eeb2fe127f47e3632d43d677c66c102b", size = 13925109, upload-time = "2024-02-05T23:56:20.368Z" },
-    { url = "https://files.pythonhosted.org/packages/0f/50/de23fde84e45f5c4fda2488c759b69990fd4512387a8632860f3ac9cd225/numpy-1.26.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:675d61ffbfa78604709862923189bad94014bef562cc35cf61d3a07bba02a7ed", size = 17950613, upload-time = "2024-02-05T23:56:56.054Z" },
-    { url = "https://files.pythonhosted.org/packages/4c/0c/9c603826b6465e82591e05ca230dfc13376da512b25ccd0894709b054ed0/numpy-1.26.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:ab47dbe5cc8210f55aa58e4805fe224dac469cde56b9f731a4c098b91917159a", size = 13572172, upload-time = "2024-02-05T23:57:21.56Z" },
-    { url = "https://files.pythonhosted.org/packages/76/8c/2ba3902e1a0fc1c74962ea9bb33a534bb05984ad7ff9515bf8d07527cadd/numpy-1.26.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:1dda2e7b4ec9dd512f84935c5f126c8bd8b9f2fc001e9f54af255e8c5f16b0e0", size = 17786643, upload-time = "2024-02-05T23:57:56.585Z" },
-    { url = "https://files.pythonhosted.org/packages/28/4a/46d9e65106879492374999e76eb85f87b15328e06bd1550668f79f7b18c6/numpy-1.26.4-cp312-cp312-win32.whl", hash = "sha256:50193e430acfc1346175fcbdaa28ffec49947a06918b7b92130744e81e640110", size = 5677803, upload-time = "2024-02-05T23:58:08.963Z" },
-    { url = "https://files.pythonhosted.org/packages/16/2e/86f24451c2d530c88daf997cb8d6ac622c1d40d19f5a031ed68a4b73a374/numpy-1.26.4-cp312-cp312-win_amd64.whl", hash = "sha256:08beddf13648eb95f8d867350f6a018a4be2e5ad54c8d8caed89ebca558b2818", size = 15517754, upload-time = "2024-02-05T23:58:36.364Z" },
+name = "nvidia-cublas-cu12"
+version = "12.8.4.1"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/29/99/db44d685f0e257ff0e213ade1964fc459b4a690a73293220e98feb3307cf/nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:b86f6dd8935884615a0683b663891d43781b819ac4f2ba2b0c9604676af346d0", size = 590537124, upload-time = "2025-03-07T01:43:53.556Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/61/e24b560ab2e2eaeb3c839129175fb330dfcfc29e5203196e5541a4c44682/nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:8ac4e771d5a348c551b2a426eda6193c19aa630236b418086020df5ba9667142", size = 594346921, upload-time = "2025-03-07T01:44:31.254Z" },
+    { url = "https://files.pythonhosted.org/packages/70/61/7d7b3c70186fb651d0fbd35b01dbfc8e755f69fd58f817f3d0f642df20c3/nvidia_cublas_cu12-12.8.4.1-py3-none-win_amd64.whl", hash = "sha256:47e9b82132fa8d2b4944e708049229601448aaad7e6f296f630f2d1a32de35af", size = 567544208, upload-time = "2025-03-07T01:53:30.535Z" },
 ]
 
 [[package]]
-name = "nv-grouped-gemm"
-version = "1.1.4.post6"
+name = "nvidia-cuda-cupti-cu12"
+version = "12.8.90"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d5/1f/b3bd73445e5cb342727fd24fe1f7b748f690b460acadc27ea22f904502c8/nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:4412396548808ddfed3f17a467b104ba7751e6b58678a4b840675c56d21cf7ed", size = 9533318, upload-time = "2025-03-07T01:40:10.421Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/02/2adcaa145158bf1a8295d83591d22e4103dbfd821bcaf6f3f53151ca4ffa/nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ea0cb07ebda26bb9b29ba82cda34849e73c166c18162d3913575b0c9db9a6182", size = 10248621, upload-time = "2025-03-07T01:40:21.213Z" },
+    { url = "https://files.pythonhosted.org/packages/41/bc/83f5426095d93694ae39fe1311431b5d5a9bb82e48bf0dd8e19be2765942/nvidia_cuda_cupti_cu12-12.8.90-py3-none-win_amd64.whl", hash = "sha256:bb479dcdf7e6d4f8b0b01b115260399bf34154a1a2e9fe11c85c517d87efd98e", size = 7015759, upload-time = "2025-03-07T01:51:11.355Z" },
+]
+
+[[package]]
+name = "nvidia-cuda-nvrtc-cu12"
+version = "12.8.93"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/05/6b/32f747947df2da6994e999492ab306a903659555dddc0fbdeb9d71f75e52/nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:a7756528852ef889772a84c6cd89d41dfa74667e24cca16bb31f8f061e3e9994", size = 88040029, upload-time = "2025-03-07T01:42:13.562Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/d1/e50d0acaab360482034b84b6e27ee83c6738f7d32182b987f9c7a4e32962/nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fc1fec1e1637854b4c0a65fb9a8346b51dd9ee69e61ebaccc82058441f15bce8", size = 43106076, upload-time = "2025-03-07T01:41:59.817Z" },
+    { url = "https://files.pythonhosted.org/packages/45/51/52a3d84baa2136cc8df15500ad731d74d3a1114d4c123e043cb608d4a32b/nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-win_amd64.whl", hash = "sha256:7a4b6b2904850fe78e0bd179c4b655c404d4bb799ef03ddc60804247099ae909", size = 73586838, upload-time = "2025-03-07T01:52:13.483Z" },
+]
+
+[[package]]
+name = "nvidia-cuda-runtime-cu12"
+version = "12.8.90"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/7c/75/f865a3b236e4647605ea34cc450900854ba123834a5f1598e160b9530c3a/nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:52bf7bbee900262ffefe5e9d5a2a69a30d97e2bc5bb6cc866688caa976966e3d", size = 965265, upload-time = "2025-03-07T01:39:43.533Z" },
+    { url = "https://files.pythonhosted.org/packages/0d/9b/a997b638fcd068ad6e4d53b8551a7d30fe8b404d6f1804abf1df69838932/nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:adade8dcbd0edf427b7204d480d6066d33902cab2a4707dcfc48a2d0fd44ab90", size = 954765, upload-time = "2025-03-07T01:40:01.615Z" },
+    { url = "https://files.pythonhosted.org/packages/30/a5/a515b7600ad361ea14bfa13fb4d6687abf500adc270f19e89849c0590492/nvidia_cuda_runtime_cu12-12.8.90-py3-none-win_amd64.whl", hash = "sha256:c0c6027f01505bfed6c3b21ec546f69c687689aad5f1a377554bc6ca4aa993a8", size = 944318, upload-time = "2025-03-07T01:51:01.794Z" },
+]
+
+[[package]]
+name = "nvidia-cudnn-cu12"
+version = "9.10.2.21"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "absl-py" },
-    { name = "numpy" },
-    { name = "torch", marker = "sys_platform == 'never'" },
+    { name = "nvidia-cublas-cu12" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/fa/41/e79269ce215c857c935fd86bcfe91a451a584dfc27f1e068f568b9ad1ab7/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:c9132cc3f8958447b4910a1720036d9eff5928cc3179b0a51fb6d167c6cc87d8", size = 705026878, upload-time = "2025-06-06T21:52:51.348Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/51/e123d997aa098c61d029f76663dedbfb9bc8dcf8c60cbd6adbe42f76d049/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:949452be657fa16687d0930933f032835951ef0892b37d2d53824d1a84dc97a8", size = 706758467, upload-time = "2025-06-06T21:54:08.597Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/90/0bd6e586701b3a890fd38aa71c387dab4883d619d6e5ad912ccbd05bfd67/nvidia_cudnn_cu12-9.10.2.21-py3-none-win_amd64.whl", hash = "sha256:c6288de7d63e6cf62988f0923f96dc339cea362decb1bf5b3141883392a7d65e", size = 692992268, upload-time = "2025-06-06T21:55:18.114Z" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/05/79/87c45f32e661b25e0aaa1e325ba166511f57be5dff8f0fcabc12d3e73b64/nv_grouped_gemm-1.1.4.post6.tar.gz", hash = "sha256:dad6115f4b4ff7ceb0bc40ad44e923c13a24fc88cfe1e20b1a6b4c9cf24c445c", size = 26508, upload-time = "2025-10-10T18:52:29.508Z" }
 
 [[package]]
 name = "nvidia-cudnn-frontend"
-version = "1.15.0"
+version = "1.16.0"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/fa/cf/3cd3cc682df5488288c6043fc0977090497ff015a082ab160076fecb080a/nvidia_cudnn_frontend-1.16.0-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:83ecbe6d1145dc208a9ae82aa0b45b2c8f74ed8a43d3a102a13eef2117e2fedd", size = 1835542, upload-time = "2025-11-07T01:28:20.133Z" },
+    { url = "https://files.pythonhosted.org/packages/92/45/87f3f2d94a928be21459949b03b0b8bcea13531d30094ad84a8ae4fca761/nvidia_cudnn_frontend-1.16.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:77cb06b91877c8489363867434ba1d9936f3e10bf7ed98d82e98f5f578611920", size = 1950339, upload-time = "2025-11-07T01:31:41.69Z" },
+    { url = "https://files.pythonhosted.org/packages/be/f5/1662f18084ef4441bfb3a01383cbf77194905b53474dcb51c0d0f373c74b/nvidia_cudnn_frontend-1.16.0-cp310-cp310-win_amd64.whl", hash = "sha256:ee3f3886f107919dad48cbc905fa6ae9207c8d7d5a24165e55625ea96f0fe40f", size = 1367883, upload-time = "2025-11-07T01:25:17.791Z" },
+    { url = "https://files.pythonhosted.org/packages/10/b7/d0a3a337f5e83f26ff79a7fd63a859181ff2911f1d905d6fbab5fc80170d/nvidia_cudnn_frontend-1.16.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c360d5840d6eb597aade9e9c8780e24aec283b8e6bc97d52881c821a35c92aa9", size = 1837573, upload-time = "2025-11-07T01:29:05.507Z" },
+    { url = "https://files.pythonhosted.org/packages/95/dc/465a14f2d235778405f2e84fce336d07ab045bf1c7df6404bdf8033e06a8/nvidia_cudnn_frontend-1.16.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5c4a8fc573d85a86e08b15d9bf37f729e2487298781867a492a59cde6ac295e2", size = 1952630, upload-time = "2025-11-07T01:32:00.242Z" },
+    { url = "https://files.pythonhosted.org/packages/3b/89/f14435f616603a999975930c4456d6140127f6acb19a877c752beccad837/nvidia_cudnn_frontend-1.16.0-cp311-cp311-win_amd64.whl", hash = "sha256:a257f10a932ffde9741f644efd3611acf77e2fd89d493d81bc6a8353c48f1ec2", size = 1368775, upload-time = "2025-11-07T01:25:42.252Z" },
+    { url = "https://files.pythonhosted.org/packages/00/39/79b606e805abd67ab4fa72f752a5413a496159f10d94fbdb1d67bb5ae86c/nvidia_cudnn_frontend-1.16.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:dd6fdd71c0896ff2ca1809d914cbd17f2904d55863f8881f47946e1d634c7a88", size = 1839271, upload-time = "2025-11-07T01:29:53.06Z" },
+    { url = "https://files.pythonhosted.org/packages/09/21/a0e0d50ba8d7b639fe635500fee0d9c0319561b1ae72176d7024ec04b439/nvidia_cudnn_frontend-1.16.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:16efb069d4bda4d3b99134f59f376cfd4d09558298bd96af778fdc7f2851e696", size = 1954062, upload-time = "2025-11-07T01:32:18.556Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/d6/30ae67bb9c010e9459d1211c56d73373eb4e3dd9f57f4c3c1fe0966efcb1/nvidia_cudnn_frontend-1.16.0-cp312-cp312-win_amd64.whl", hash = "sha256:7b7860db03767c158accbe0b4e9c9553506513cc970ff08ed28c7761681ac466", size = 1368435, upload-time = "2025-11-07T01:26:28.022Z" },
+    { url = "https://files.pythonhosted.org/packages/32/2c/b4376afef0a6342c56e82e3465c1f8f5c719f588293a50dd04019a22ae6e/nvidia_cudnn_frontend-1.16.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b6bcb3a2fbff80538958e21e2227520f082a961164865aaeedaac527f61084f9", size = 1839805, upload-time = "2025-11-07T01:30:31.056Z" },
+    { url = "https://files.pythonhosted.org/packages/71/13/836b90354036154ab82db3861210e5736983fe1fc44bb39c146ad93b333b/nvidia_cudnn_frontend-1.16.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:cbdad88b2bec5dde837f8fa7632022334cddb4756f923b5421c06a712cb59d31", size = 1953953, upload-time = "2025-11-07T01:33:03.781Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/30/3025f34f2c86ceef85134dc1f323f8cf2a26d3ffddc5ada48528c80bfae1/nvidia_cudnn_frontend-1.16.0-cp313-cp313-win_amd64.whl", hash = "sha256:138de2bc4697fabb2eb2f0f601a7e31f8fe97874908e26e33d737276f335473c", size = 1368359, upload-time = "2025-11-07T01:26:51.561Z" },
+]
+
+[[package]]
+name = "nvidia-cufft-cu12"
+version = "11.3.3.83"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "nvidia-nvjitlink-cu12" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/60/bc/7771846d3a0272026c416fbb7e5f4c1f146d6d80704534d0b187dd6f4800/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:848ef7224d6305cdb2a4df928759dca7b1201874787083b6e7550dd6765ce69a", size = 193109211, upload-time = "2025-03-07T01:44:56.873Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/13/ee4e00f30e676b66ae65b4f08cb5bcbb8392c03f54f2d5413ea99a5d1c80/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d2dd21ec0b88cf61b62e6b43564355e5222e4a3fb394cac0db101f2dd0d4f74", size = 193118695, upload-time = "2025-03-07T01:45:27.821Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/ec/ce1629f1e478bb5ccd208986b5f9e0316a78538dd6ab1d0484f012f8e2a1/nvidia_cufft_cu12-11.3.3.83-py3-none-win_amd64.whl", hash = "sha256:7a64a98ef2a7c47f905aaf8931b69a3a43f27c55530c698bb2ed7c75c0b42cb7", size = 192216559, upload-time = "2025-03-07T01:53:57.106Z" },
+]
+
+[[package]]
+name = "nvidia-cufile-cu12"
+version = "1.13.1.3"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/bb/fe/1bcba1dfbfb8d01be8d93f07bfc502c93fa23afa6fd5ab3fc7c1df71038a/nvidia_cufile_cu12-1.13.1.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1d069003be650e131b21c932ec3d8969c1715379251f8d23a1860554b1cb24fc", size = 1197834, upload-time = "2025-03-07T01:45:50.723Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/f5/5607710447a6fe9fd9b3283956fceeee8a06cda1d2f56ce31371f595db2a/nvidia_cufile_cu12-1.13.1.3-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:4beb6d4cce47c1a0f1013d72e02b0994730359e17801d395bdcbf20cfb3bb00a", size = 1120705, upload-time = "2025-03-07T01:45:41.434Z" },
+]
+
+[[package]]
+name = "nvidia-curand-cu12"
+version = "10.3.9.90"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/45/5e/92aa15eca622a388b80fbf8375d4760738df6285b1e92c43d37390a33a9a/nvidia_curand_cu12-10.3.9.90-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:dfab99248034673b779bc6decafdc3404a8a6f502462201f2f31f11354204acd", size = 63625754, upload-time = "2025-03-07T01:46:10.735Z" },
+    { url = "https://files.pythonhosted.org/packages/fb/aa/6584b56dc84ebe9cf93226a5cde4d99080c8e90ab40f0c27bda7a0f29aa1/nvidia_curand_cu12-10.3.9.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:b32331d4f4df5d6eefa0554c565b626c7216f87a06a4f56fab27c3b68a830ec9", size = 63619976, upload-time = "2025-03-07T01:46:23.323Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/75/70c05b2f3ed5be3bb30b7102b6eb78e100da4bbf6944fd6725c012831cab/nvidia_curand_cu12-10.3.9.90-py3-none-win_amd64.whl", hash = "sha256:f149a8ca457277da854f89cf282d6ef43176861926c7ac85b2a0fbd237c587ec", size = 62765309, upload-time = "2025-03-07T01:54:20.478Z" },
+]
+
+[[package]]
+name = "nvidia-cusolver-cu12"
+version = "11.7.3.90"
 source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "nvidia-cublas-cu12" },
+    { name = "nvidia-cusparse-cu12" },
+    { name = "nvidia-nvjitlink-cu12" },
+]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/d7/3f/d7bf811f4a76f4e9aa4ef390b11217562bba06f0c77f9e14c765681ccba6/nvidia_cudnn_frontend-1.15.0-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8b4e8c77e848502ad79f8aef6b6c699613a6b5139572aba1f55f626d7bf31b44", size = 1743761, upload-time = "2025-10-10T18:54:15.142Z" },
-    { url = "https://files.pythonhosted.org/packages/3e/b8/286f7fb3f1068acf0014a851f86863ed9fec69aff79a10dcc0dfbffe0523/nvidia_cudnn_frontend-1.15.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:64a926602e52268e09127cf7a227e6b3d7c6e9e2a97fb57eebe88132aec8d9c8", size = 1859188, upload-time = "2025-10-10T18:56:59.386Z" },
-    { url = "https://files.pythonhosted.org/packages/e8/f7/6e55b0122ca5924f0cdbd717392d35a92f43c6ed4b6d64c7d378ee01f301/nvidia_cudnn_frontend-1.15.0-cp310-cp310-win_amd64.whl", hash = "sha256:7a21ec041fa4009cc8b76b2d26ad73010ab5e005804e4df8b1c1abdba5e23cd5", size = 1296575, upload-time = "2025-10-10T18:45:45.04Z" },
-    { url = "https://files.pythonhosted.org/packages/80/b8/d0f1ab5c309c513fe1e4235e860872fc7ee60876e69b30eb0a20fe8c35d8/nvidia_cudnn_frontend-1.15.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:570c2e028ff9b8293f9625b31484084a638de6fb685802194b8dfe16db5a44b4", size = 1747611, upload-time = "2025-10-10T18:54:51.427Z" },
-    { url = "https://files.pythonhosted.org/packages/0e/52/5b77edb810063c10040ac34e1517ee62690c4f030f0cf68298a4608552bc/nvidia_cudnn_frontend-1.15.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:21ac16e4add264839a8db570d5378bb6583bf9539649d80bc8802ded00098a20", size = 1860815, upload-time = "2025-10-10T18:57:17.393Z" },
-    { url = "https://files.pythonhosted.org/packages/de/2b/1fa26eee0479ae0b40582679c1bd08eb78a0b49bb5893ec3edce2a606e9f/nvidia_cudnn_frontend-1.15.0-cp311-cp311-win_amd64.whl", hash = "sha256:c1be7480e3200606c2f2f49263cc13adc72c2a38e38f31f18e9b3727d99618b2", size = 1297355, upload-time = "2025-10-10T18:46:10.171Z" },
-    { url = "https://files.pythonhosted.org/packages/cb/9c/0c2340454f8c9cc4143fdbccef8218dad1e49042d62b26c1781915617c40/nvidia_cudnn_frontend-1.15.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6c2cfe2a0f94bff71614bd3add0ae077f513f7d14909c223afca01ac8056ff84", size = 1749017, upload-time = "2025-10-10T18:55:29.412Z" },
-    { url = "https://files.pythonhosted.org/packages/19/b4/c35104b8fc32986111b611b3080bbcf35fd3fd6794d4aec4e068136ea628/nvidia_cudnn_frontend-1.15.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:aab1098ad4c79935b6e8dc251e9145129a04a8dc6ff75eb30871aacdd1487946", size = 1865629, upload-time = "2025-10-10T18:57:35.941Z" },
-    { url = "https://files.pythonhosted.org/packages/a6/d7/6534807d209a27817d101cf86745e335896e96379bf2d207195cfe9f24ab/nvidia_cudnn_frontend-1.15.0-cp312-cp312-win_amd64.whl", hash = "sha256:13e58a5b001154899f0744165716a7ad24cd7567d759a8229a9ada730a1046b2", size = 1297335, upload-time = "2025-10-10T18:46:35.069Z" },
-    { url = "https://files.pythonhosted.org/packages/9b/75/5a75942aae2bb3a0c1cc44378e9f80c1213a6d7b952c8df19b8845836a34/nvidia_cudnn_frontend-1.15.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:fda240405eba3c04866e30b3c1beae26ea7775af4fa4d555cd598695067d32ac", size = 1750048, upload-time = "2025-10-10T18:56:06.057Z" },
-    { url = "https://files.pythonhosted.org/packages/79/70/2ed9802725cb305189dac906a67c799eeb47e4f395b97df0249a750c56fe/nvidia_cudnn_frontend-1.15.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:14941c05a6484d3f05f3089cd290c9b1e6614298f37e07cd01789933932c9f28", size = 1867440, upload-time = "2025-10-10T18:57:53.964Z" },
-    { url = "https://files.pythonhosted.org/packages/d1/04/519fd6e3ea12fe7fe98c497c4d51f6c5c87763d02e90ea3102cef32a6ef1/nvidia_cudnn_frontend-1.15.0-cp313-cp313-win_amd64.whl", hash = "sha256:7c8c6f12534b73b0cd55956c5e9419b7840a01e4c260837606112450ce1ca0d9", size = 1297324, upload-time = "2025-10-10T18:46:53.104Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/32/f7cd6ce8a7690544d084ea21c26e910a97e077c9b7f07bf5de623ee19981/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:db9ed69dbef9715071232caa9b69c52ac7de3a95773c2db65bdba85916e4e5c0", size = 267229841, upload-time = "2025-03-07T01:46:54.356Z" },
+    { url = "https://files.pythonhosted.org/packages/85/48/9a13d2975803e8cf2777d5ed57b87a0b6ca2cc795f9a4f59796a910bfb80/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:4376c11ad263152bd50ea295c05370360776f8c3427b30991df774f9fb26c450", size = 267506905, upload-time = "2025-03-07T01:47:16.273Z" },
+    { url = "https://files.pythonhosted.org/packages/13/c0/76ca8551b8a84146ffa189fec81c26d04adba4bc0dbe09cd6e6fd9b7de04/nvidia_cusolver_cu12-11.7.3.90-py3-none-win_amd64.whl", hash = "sha256:4a550db115fcabc4d495eb7d39ac8b58d4ab5d8e63274d3754df1c0ad6a22d34", size = 256720438, upload-time = "2025-03-07T01:54:39.898Z" },
+]
+
+[[package]]
+name = "nvidia-cusparse-cu12"
+version = "12.5.8.93"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "nvidia-nvjitlink-cu12" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/bc/f7/cd777c4109681367721b00a106f491e0d0d15cfa1fd59672ce580ce42a97/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:9b6c161cb130be1a07a27ea6923df8141f3c295852f4b260c65f18f3e0a091dc", size = 288117129, upload-time = "2025-03-07T01:47:40.407Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/f5/e1854cb2f2bcd4280c44736c93550cc300ff4b8c95ebe370d0aa7d2b473d/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1ec05d76bbbd8b61b06a80e1eaf8cf4959c3d4ce8e711b65ebd0443bb0ebb13b", size = 288216466, upload-time = "2025-03-07T01:48:13.779Z" },
+    { url = "https://files.pythonhosted.org/packages/62/07/f3b2ad63f8e3d257a599f422ae34eb565e70c41031aecefa3d18b62cabd1/nvidia_cusparse_cu12-12.5.8.93-py3-none-win_amd64.whl", hash = "sha256:9a33604331cb2cac199f2e7f5104dfbb8a5a898c367a53dfda9ff2acb6b6b4dd", size = 284937404, upload-time = "2025-03-07T01:55:07.742Z" },
+]
+
+[[package]]
+name = "nvidia-cusparselt-cu12"
+version = "0.7.1"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/73/b9/598f6ff36faaece4b3c50d26f50e38661499ff34346f00e057760b35cc9d/nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_aarch64.whl", hash = "sha256:8878dce784d0fac90131b6817b607e803c36e629ba34dc5b433471382196b6a5", size = 283835557, upload-time = "2025-02-26T00:16:54.265Z" },
+    { url = "https://files.pythonhosted.org/packages/56/79/12978b96bd44274fe38b5dde5cfb660b1d114f70a65ef962bcbbed99b549/nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_x86_64.whl", hash = "sha256:f1bb701d6b930d5a7cea44c19ceb973311500847f81b634d802b7b539dc55623", size = 287193691, upload-time = "2025-02-26T00:15:44.104Z" },
+    { url = "https://files.pythonhosted.org/packages/2f/d8/a6b0d0d0c2435e9310f3e2bb0d9c9dd4c33daef86aa5f30b3681defd37ea/nvidia_cusparselt_cu12-0.7.1-py3-none-win_amd64.whl", hash = "sha256:f67fbb5831940ec829c9117b7f33807db9f9678dc2a617fbe781cac17b4e1075", size = 271020911, upload-time = "2025-02-26T00:14:47.204Z" },
 ]
 
 [[package]]
 name = "nvidia-cutlass-dsl"
-version = "4.2.1"
+version = "4.3.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "cuda-python" },
-    { name = "numpy" },
+    { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.3.5", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "typing-extensions" },
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/b3/0f/1e96ce9fbe07e8c39484fae4d2cf36e328bdf434b311d88ccedccbfed7db/nvidia_cutlass_dsl-4.2.1-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:1628bacedde042c60c7ebb1aeccce5a82501197f5e5c4fbbf803712fa45fba59", size = 58540319, upload-time = "2025-09-23T14:38:00.634Z" },
-    { url = "https://files.pythonhosted.org/packages/7c/e3/bc6071743d0ad43d837bf633139bfe1202260c28d893e30f247cf0aa8019/nvidia_cutlass_dsl-4.2.1-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:aec74b50f700a8ef455f15863de4cb5f1486f72b7bd4becea88624c58c555a13", size = 62233601, upload-time = "2025-09-23T14:39:50.44Z" },
-    { url = "https://files.pythonhosted.org/packages/1d/2a/e65312728338e5bb00b592ce0be12b51e7594a3ef288cd8c99bc1c456968/nvidia_cutlass_dsl-4.2.1-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:04e605417773957405cad0ac6c2d46139a88aca07a783b4f66e1363f3a91a835", size = 58540069, upload-time = "2025-09-23T14:38:56.002Z" },
-    { url = "https://files.pythonhosted.org/packages/be/f3/20eacdf9876abd892668c191003edc5d7100e45fabfa027d9f3f99d21871/nvidia_cutlass_dsl-4.2.1-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:874aa3620b3d3dc6598af2226fa3b78f2e7998b8656929b492259e0c9f778786", size = 62233009, upload-time = "2025-09-23T14:39:23.308Z" },
-    { url = "https://files.pythonhosted.org/packages/1e/1d/f168a3dbd8570e5dbbe0deca217d7b374c977b4a4970ebadf3b6d0f1174f/nvidia_cutlass_dsl-4.2.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:10ace6e2005cb0bc04d158c7660f8ec104ab29aeffb26f1ed3bb0b5a577ccc34", size = 58535504, upload-time = "2025-09-23T14:38:29.028Z" },
-    { url = "https://files.pythonhosted.org/packages/02/ab/5bcc0c8c620af5d4acbc71abce10e3eb3023e50342e6bc29b6461f72530e/nvidia_cutlass_dsl-4.2.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:d7ddc9c1f5bb803718d736c907fac857fc606f1fce630c0b1d741935a72723b9", size = 62230361, upload-time = "2025-09-23T14:40:18.156Z" },
-    { url = "https://files.pythonhosted.org/packages/cf/d5/9b79faaec3fa12c52b7de1e727af94c54184b00f280c79b667ab045550db/nvidia_cutlass_dsl-4.2.1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:c0985124a74ba435e1f756aa78e89f64c6d01e4f54de1d5a5d218ebbc1c92eff", size = 58535424, upload-time = "2025-09-23T14:37:33.064Z" },
-    { url = "https://files.pythonhosted.org/packages/43/86/78c8cd3fa1a684f3976535d7ac69e54f4ede165b5abca7979fd0820f74f2/nvidia_cutlass_dsl-4.2.1-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:9356604afc8f62aac46634b3a12baf8cb3f3a6f2e44e398dcfe6ec98ff1a8d1b", size = 62230122, upload-time = "2025-09-23T14:40:46.621Z" },
+    { url = "https://files.pythonhosted.org/packages/75/c3/3cd4c440f386a24c348c7c67adff5e38bb2405d08579ae3ac9312fa14ee4/nvidia_cutlass_dsl-4.3.1-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:29d6ccb56955e6528c818591fe752a820305951a73fbb69f9a816b3e228d57f8", size = 58726035, upload-time = "2025-11-28T00:59:03.749Z" },
+    { url = "https://files.pythonhosted.org/packages/35/b5/854b713e2355e6211624dfc9df65aca5ebc2a8aaae97a696def34a4b9c9a/nvidia_cutlass_dsl-4.3.1-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:f54d98339d4fca37d39390933186c4a7987291b57129da9bf45c7746d47786af", size = 58591793, upload-time = "2025-11-28T01:03:01.473Z" },
+    { url = "https://files.pythonhosted.org/packages/45/24/432ab11c9da47742518e008f61c58166b3cced5d39df987155d103d5e18e/nvidia_cutlass_dsl-4.3.1-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:c7b27b3faf2d3cb4e9504ad55129ac58c09aa59f3af6eaabb88f4bda010a2792", size = 58725123, upload-time = "2025-11-28T00:58:11.337Z" },
+    { url = "https://files.pythonhosted.org/packages/a2/07/59509304cac496275a0a7bdae436c267829611b38e4500b2622424c9f737/nvidia_cutlass_dsl-4.3.1-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:24cfbf55aad55b3dd06ddaa340d13028b4e49b15e0e557105187a9d0bbc260db", size = 58592193, upload-time = "2025-11-28T00:59:54.448Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/c5/f1586c64fcf569b890da776d08a32836a3ef2450cbe9e3ac2971dbecbcce/nvidia_cutlass_dsl-4.3.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:025a8c7a0fb80626e2a893954ea19b2e1ece8d131078c7da12b7fabc2634d04d", size = 58726236, upload-time = "2025-11-28T00:59:29.376Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/5b/fe6a2db1688a690a94f8ad03706fa6db2055d82fab0c4fab764e8c89640f/nvidia_cutlass_dsl-4.3.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:b95ce5633e09f12c8d1fcd30c5db06b8325d41b3da0875d3e8a4c110ed5b5cdf", size = 58591826, upload-time = "2025-11-28T01:00:19.559Z" },
+    { url = "https://files.pythonhosted.org/packages/40/fe/5e48c63ff5a510c0edbac5167921a819c70f71daf3b6ead0e0e5346b2a42/nvidia_cutlass_dsl-4.3.1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:c8e816cc061b34e016906fa87948f2b0fa836a95f27732c14097f3ddda8286e2", size = 58725695, upload-time = "2025-11-28T01:01:32.1Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/ef/34b1bdd375226b818cd810145e207cceb50fd12eaa87e88a6e67820574d4/nvidia_cutlass_dsl-4.3.1-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:f71adcfb56607fc86ea621edcf9503eaa31f66f70efd7ab719c33683db082183", size = 58592065, upload-time = "2025-11-28T01:02:35.83Z" },
 ]
 
 [[package]]
 name = "nvidia-mathdx"
-version = "25.1.1"
+version = "25.6.0"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/59/00/f1a73ac224d466b31b6eb09794656112e896185678720b05668777e87db3/nvidia_mathdx-25.1.1-py3-none-any.whl", hash = "sha256:4fb948fe4842d24e679f3d0c140c8a0e8e24c3c7ae5eb6e08584253ad94a198b", size = 39894902, upload-time = "2025-05-06T22:58:32.29Z" },
+    { url = "https://files.pythonhosted.org/packages/20/1a/a418b8c1adc58abd87fd69414c19883af5c1b10514e3dbfcc27cde831b13/nvidia_mathdx-25.6.0-py3-none-any.whl", hash = "sha256:22e6ad5d0d005f836be5cbd14e836cf2e9ea42c82deb602707246ce8198eaa96", size = 23013087, upload-time = "2025-11-13T18:25:11.228Z" },
 ]
 
 [[package]]
@@ -3315,13 +3281,13 @@ wheels = [
 
 [[package]]
 name = "nvidia-modelopt"
-version = "0.33.1"
+version = "0.39.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "ninja" },
-    { name = "numpy" },
+    { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.3.5", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "nvidia-ml-py" },
-    { name = "nvidia-modelopt-core" },
     { name = "packaging" },
     { name = "pulp" },
     { name = "pydantic" },
@@ -3332,52 +3298,76 @@ dependencies = [
     { name = "scipy", version = "1.16.3", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "torch", marker = "sys_platform == 'never'" },
     { name = "torchprofile" },
-    { name = "torchvision", marker = "sys_platform == 'never'" },
     { name = "tqdm" },
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/ca/cb/4af39357792a96f334c7877ea0380c9337aec210ff4794a7dd95beb7c349/nvidia_modelopt-0.33.1-py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:6c51091683a117cd40fdb96a0ec28579f2276f6b627db7ccddc370df544e1dd7", size = 751683, upload-time = "2025-08-12T18:37:48.832Z" },
-    { url = "https://files.pythonhosted.org/packages/0a/b1/fc2f468d140ef58e90fac584759d0cc449db9bc4f64668cdff750ef38fef/nvidia_modelopt-0.33.1-py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:ef78a98901890f265596ec413dffac177d4a1865201d89a14f29f4fa0cf8e710", size = 751683, upload-time = "2025-08-12T18:36:59.964Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/d5/b03ad3ffa28984b629a72da678fa98f912fc45bac3b514c4a70cf2a82fe3/nvidia_modelopt-0.39.0-py3-none-any.whl", hash = "sha256:32f05317c81be1ff2ffeab749e5258b7bea8e4c6e60a09c760584f25ad03f648", size = 864981, upload-time = "2025-11-13T07:35:42.761Z" },
 ]
 
 [[package]]
-name = "nvidia-modelopt-core"
-version = "0.33.1"
+name = "nvidia-nccl-cu12"
+version = "2.27.5"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/6a/21/d12ca11f5554340684d11958aae6c6e7755cf0aaae10a2d2c9db217228cf/nvidia_modelopt_core-0.33.1-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:f25f6a817609c693ee39d1bcf2d3aeef462b9769f971590133de8b1b0310885b", size = 1307716, upload-time = "2025-08-12T18:41:12.086Z" },
-    { url = "https://files.pythonhosted.org/packages/eb/df/7bead24d4854274d9f2818f1ae780fc24260aab60b7b6f73e1af4f056ce5/nvidia_modelopt_core-0.33.1-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:195f32f06d19bc9f9d858811f1864bddcc1db6278974d98ea6309cb3553427f1", size = 1326896, upload-time = "2025-08-12T18:39:48.243Z" },
-    { url = "https://files.pythonhosted.org/packages/a1/36/3318980c670292d827ace5ac6110ab6054d0f2d87e507382842ea9e7c78f/nvidia_modelopt_core-0.33.1-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:ffd008a90d8867660ae41c98002156b526e368a4cdf39e225fe20f478adce8b2", size = 1376104, upload-time = "2025-08-12T18:41:47.358Z" },
-    { url = "https://files.pythonhosted.org/packages/27/97/99d1ddabe01ab262c18621619c996e1c2c119bc058607d2bc9ce7eb85fe7/nvidia_modelopt_core-0.33.1-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:be49121b2f74db4cb73955396a7bb83935d92232c5a20bcfd7b8e7cae68e482f", size = 1393729, upload-time = "2025-08-12T18:40:07.86Z" },
-    { url = "https://files.pythonhosted.org/packages/9b/b5/ba79b1c52b634b24e45dca409f133f947217a5c7ec5c256266e4ec5fa3eb/nvidia_modelopt_core-0.33.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:1ddd9279d8312f8e972b302692a26e6180f1c9fd277232f5925a5589f42b1b76", size = 1338081, upload-time = "2025-08-12T18:40:36.156Z" },
-    { url = "https://files.pythonhosted.org/packages/13/40/4427583475dfd8eb1b8c7522d75d4d059f0512ff03dcc62d6986a22ab918/nvidia_modelopt_core-0.33.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:69d5ace564f2b056c916117be2023f2b7fc01cd1501073915e6b2ced2b8a5394", size = 1363366, upload-time = "2025-08-12T18:39:28.854Z" },
+    { url = "https://files.pythonhosted.org/packages/bb/1c/857979db0ef194ca5e21478a0612bcdbbe59458d7694361882279947b349/nvidia_nccl_cu12-2.27.5-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:31432ad4d1fb1004eb0c56203dc9bc2178a1ba69d1d9e02d64a6938ab5e40e7a", size = 322400625, upload-time = "2025-06-26T04:11:04.496Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/89/f7a07dc961b60645dbbf42e80f2bc85ade7feb9a491b11a1e973aa00071f/nvidia_nccl_cu12-2.27.5-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ad730cf15cb5d25fe849c6e6ca9eb5b76db16a80f13f425ac68d8e2e55624457", size = 322348229, upload-time = "2025-06-26T04:11:28.385Z" },
+]
+
+[[package]]
+name = "nvidia-nvjitlink-cu12"
+version = "12.8.93"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f6/74/86a07f1d0f42998ca31312f998bd3b9a7eff7f52378f4f270c8679c77fb9/nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:81ff63371a7ebd6e6451970684f916be2eab07321b73c9d244dc2b4da7f73b88", size = 39254836, upload-time = "2025-03-07T01:49:55.661Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/a2/8cee5da30d13430e87bf99bb33455d2724d0a4a9cb5d7926d80ccb96d008/nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:adccd7161ace7261e01bb91e44e88da350895c270d23f744f0820c818b7229e7", size = 38386204, upload-time = "2025-03-07T01:49:43.612Z" },
+    { url = "https://files.pythonhosted.org/packages/ed/d7/34f02dad2e30c31b10a51f6b04e025e5dd60e5f936af9045a9b858a05383/nvidia_nvjitlink_cu12-12.8.93-py3-none-win_amd64.whl", hash = "sha256:bd93fbeeee850917903583587f4fc3a4eafa022e34572251368238ab5e6bd67f", size = 268553710, upload-time = "2025-03-07T01:56:24.13Z" },
+]
+
+[[package]]
+name = "nvidia-nvshmem-cu12"
+version = "3.3.20"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/92/9d/3dd98852568fb845ec1f7902c90a22b240fe1cbabda411ccedf2fd737b7b/nvidia_nvshmem_cu12-3.3.20-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0b0b960da3842212758e4fa4696b94f129090b30e5122fea3c5345916545cff0", size = 124484616, upload-time = "2025-08-04T20:24:59.172Z" },
+    { url = "https://files.pythonhosted.org/packages/3b/6c/99acb2f9eb85c29fc6f3a7ac4dccfd992e22666dd08a642b303311326a97/nvidia_nvshmem_cu12-3.3.20-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d00f26d3f9b2e3c3065be895e3059d6479ea5c638a3f38c9fec49b1b9dd7c1e5", size = 124657145, upload-time = "2025-08-04T20:25:19.995Z" },
+]
+
+[[package]]
+name = "nvidia-nvtx-cu12"
+version = "12.8.90"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/10/c0/1b303feea90d296f6176f32a2a70b5ef230f9bdeb3a72bddb0dc922dc137/nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d7ad891da111ebafbf7e015d34879f7112832fc239ff0d7d776b6cb685274615", size = 91161, upload-time = "2025-03-07T01:42:23.922Z" },
+    { url = "https://files.pythonhosted.org/packages/a2/eb/86626c1bbc2edb86323022371c39aa48df6fd8b0a1647bc274577f72e90b/nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5b17e2001cc0d751a5bc2c6ec6d26ad95913324a4adb86788c944f8ce9ba441f", size = 89954, upload-time = "2025-03-07T01:42:44.131Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/99/4c9c0c329bf9fc125008c3b54c7c94c0023518d06fc025ae36431375e1fe/nvidia_nvtx_cu12-12.8.90-py3-none-win_amd64.whl", hash = "sha256:619c8304aedc69f02ea82dd244541a83c3d9d40993381b3b590f1adaed3db41e", size = 56492, upload-time = "2025-03-07T01:52:24.69Z" },
 ]
 
 [[package]]
 name = "nvidia-resiliency-ext"
-version = "0.4.1"
+version = "0.5.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "defusedxml" },
+    { name = "nv-one-logger-core" },
+    { name = "nv-one-logger-training-telemetry" },
     { name = "nvidia-ml-py" },
     { name = "packaging" },
     { name = "psutil" },
-    { name = "pynvml" },
     { name = "pyyaml" },
     { name = "torch", marker = "sys_platform == 'never'" },
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/a7/8c/6547d9fdea9730d4f69a19ca492ccbe221768f8473b82502a78a824acc3d/nvidia_resiliency_ext-0.4.1-cp310-cp310-manylinux_2_31_aarch64.whl", hash = "sha256:cf80599411018ebbf03da64769527dee6b37746b72b8606f919b7999633770b8", size = 442891, upload-time = "2025-07-17T03:53:38.878Z" },
-    { url = "https://files.pythonhosted.org/packages/34/0d/520cab980949ad11bd5291784fea309bcd6654a9c97943a3a87644c1d111/nvidia_resiliency_ext-0.4.1-cp310-cp310-manylinux_2_31_x86_64.whl", hash = "sha256:0c23e621d598ba436549db83deeb3569c19df0194b89fe6169d62b6ead711be3", size = 448044, upload-time = "2025-07-17T03:48:30.851Z" },
-    { url = "https://files.pythonhosted.org/packages/46/77/8cda264b262e2868a4e6ebcddaea112200b1e34b8d5a35a2fe3b4978d137/nvidia_resiliency_ext-0.4.1-cp311-cp311-manylinux_2_31_aarch64.whl", hash = "sha256:d8ca454a8b8abef72e0ff0e33914686c263414e8891471c02a9f6af9d2d6b925", size = 443649, upload-time = "2025-07-17T03:49:16.183Z" },
-    { url = "https://files.pythonhosted.org/packages/3a/53/029cc7493b5833cb8dfa201f15a1e422e2e1cc6308d34c5b0a90028a73fd/nvidia_resiliency_ext-0.4.1-cp311-cp311-manylinux_2_31_x86_64.whl", hash = "sha256:dde6034f29350ac6326cdd861ceec641bdd93be0eddbf034739f4cd9452a4dd9", size = 449189, upload-time = "2025-07-17T03:52:15.24Z" },
-    { url = "https://files.pythonhosted.org/packages/70/05/38d491962273c7905708762279f440520eb79f3c00b67a023497215ad023/nvidia_resiliency_ext-0.4.1-cp312-cp312-manylinux_2_31_aarch64.whl", hash = "sha256:b3bd5f01535574b16d0f38bca6e39afe3806c4a2896eee1b321cd944e00025a7", size = 444570, upload-time = "2025-07-17T03:50:58.877Z" },
-    { url = "https://files.pythonhosted.org/packages/18/8b/4cb8aa2bbdf3705d3034c3f3dacdadb03b3b7dd3dc7f5200e64663fb477f/nvidia_resiliency_ext-0.4.1-cp312-cp312-manylinux_2_31_x86_64.whl", hash = "sha256:ca9f8de465af345952bedbea53c90c0e2323d88cfd830ded0e806fad91845c0e", size = 450280, upload-time = "2025-07-17T03:49:55.327Z" },
+    { url = "https://files.pythonhosted.org/packages/df/18/1898cad3bdd643c6bfa5f7aee125a5ef308ab1701ab15106e3e9c66bb416/nvidia_resiliency_ext-0.5.0-cp310-cp310-manylinux_2_39_aarch64.whl", hash = "sha256:97d4b68d3949f3b8370addb474d8662d6ac5008c3c1296420cdeb93a88d6a804", size = 402915, upload-time = "2025-11-13T21:28:34.578Z" },
+    { url = "https://files.pythonhosted.org/packages/fa/48/10fc3f278898e3b2aacc3bea65f0ac4b579e6e0e8447b467742d75adeec1/nvidia_resiliency_ext-0.5.0-cp310-cp310-manylinux_2_39_x86_64.whl", hash = "sha256:ceb04ec5a7bc9301fd6f14449bda6b0d1f37ead4fbe37aa3bf1d7b2ad5b662d4", size = 406483, upload-time = "2025-11-13T21:28:58.732Z" },
+    { url = "https://files.pythonhosted.org/packages/14/17/c19dfed8d4aced307a1c1404f0917ee6c1b319db8092b3cfe2af4e76de6d/nvidia_resiliency_ext-0.5.0-cp311-cp311-manylinux_2_39_aarch64.whl", hash = "sha256:62d396356adcf898cb86a54956eeece29017a41b5872db0b364c8449d23f2f66", size = 404062, upload-time = "2025-11-13T21:29:46.873Z" },
+    { url = "https://files.pythonhosted.org/packages/7f/99/b4324595171c3cdffb03cef070006ab9a3de7fca90a22403576ec6423b69/nvidia_resiliency_ext-0.5.0-cp311-cp311-manylinux_2_39_x86_64.whl", hash = "sha256:c4fcd006ef69300f753bb30d17efbb6bcee6699f044e3532209b2825d22e9977", size = 407027, upload-time = "2025-11-13T21:30:09.124Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/73/232d9f25558f3c6165ff1d15c980a434b47c13e8f527f999cd265859abcf/nvidia_resiliency_ext-0.5.0-cp312-cp312-manylinux_2_39_aarch64.whl", hash = "sha256:81e3d827885e90bed369e67f76dda6709dd4073c2e5fa1228df85d6987cee495", size = 403317, upload-time = "2025-11-13T21:31:24.603Z" },
+    { url = "https://files.pythonhosted.org/packages/44/89/4d7f39416aa3be72ee9f1260a7af56af40f2570f5add1e039d96279a8764/nvidia_resiliency_ext-0.5.0-cp312-cp312-manylinux_2_39_x86_64.whl", hash = "sha256:eb720cd25feabef07f971d4051c7bcac2f9ec73642a9031953d2663307950cb9", size = 407963, upload-time = "2025-11-13T21:30:28.998Z" },
 ]
 
 [[package]]
 name = "nvidia-sphinx-theme"
-version = "0.0.8"
+version = "0.0.9.post1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "pydata-sphinx-theme" },
@@ -3385,27 +3375,26 @@ dependencies = [
     { name = "sphinx", version = "8.2.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/dd/74/996dbc314da8ed670cd5e040d0b4b5be79ff1fc3db3fe25e63134deebe9a/nvidia_sphinx_theme-0.0.8-py3-none-any.whl", hash = "sha256:18f117aa154a3a156251a75647279c541464f3e75f7df2ae283e720cc7d0bc2c", size = 140678, upload-time = "2025-03-24T21:56:25.621Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/79/017fab2f7167a9a9795665f894d04f77aafceca80821b51589bb4b23ff5c/nvidia_sphinx_theme-0.0.9.post1-py3-none-any.whl", hash = "sha256:21ca60206dff2f380d7783d64bbaf71a5b9cacae53c7d0686f089c16b5a3d45a", size = 143816, upload-time = "2025-11-09T23:16:55.719Z" },
 ]
 
 [[package]]
 name = "nvtx"
-version = "0.2.13"
+version = "0.2.14"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/97/02/b3fd3da4ba51764cfc0e4d2b22d5a61511fa79d825344d4704f8429c0bd6/nvtx-0.2.13.tar.gz", hash = "sha256:9db7ba135168e14e1f038866100bf8ed42d3e00b404e9bc7b6280ee3af828b92", size = 112104, upload-time = "2025-08-05T03:27:16.383Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/53/64/d27e344632116da937100a81054c88b0fd6a259de09d6778e03e8231216b/nvtx-0.2.13-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:462bdcc65a12b53bfa3e7df564ddfb72092a030a923dccd1cf88c4b771ecae3f", size = 470534, upload-time = "2025-08-04T19:36:19.389Z" },
-    { url = "https://files.pythonhosted.org/packages/34/15/0b56e9b3020613d7d167bc4cdee3ba8686f6320c6aa62e85ed17b54c4dcb/nvtx-0.2.13-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7874534af889ab7c2c63554c73119d193d2beb7671b551b7f43de5b97ceb5971", size = 474158, upload-time = "2025-08-04T19:39:39.801Z" },
-    { url = "https://files.pythonhosted.org/packages/2b/be/e00ab0d21f4fb46ad66b0eae89d9e9f7d53af65a37c3db2414a590e05e97/nvtx-0.2.13-cp310-cp310-win_amd64.whl", hash = "sha256:4f26d04b5ea5b96096941cb9a7115a73454e9e9d5c247bfcd34ec584559cf9dd", size = 99104, upload-time = "2025-08-04T19:24:01.775Z" },
-    { url = "https://files.pythonhosted.org/packages/22/02/f74e26cedbdb136440d1234a646cedfddf9a43d19586e1ee466d6275e6b6/nvtx-0.2.13-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1ad794a0c046ef268b2fb3b6812a35bb3bce5cd19207d164689943f0031ac45f", size = 522330, upload-time = "2025-08-04T19:34:49.075Z" },
-    { url = "https://files.pythonhosted.org/packages/1d/55/e1e43201959dd854005c72b8a13ec86b775c349cdcb1d23423d841bbad58/nvtx-0.2.13-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5640ca4b8be2c19a8fc4ca8403d3c2598165ea27541940b4897138a7b0a717fe", size = 522841, upload-time = "2025-08-04T19:38:27.819Z" },
-    { url = "https://files.pythonhosted.org/packages/a9/8c/89d1f499a4880e30e0b5bdf429cbd1d8c612d09c49c13016384ce9cd156d/nvtx-0.2.13-cp311-cp311-win_amd64.whl", hash = "sha256:be6d53143cb2bd44e04aecdb7f3b34b48ded96f3673ae41362239d9f54bcfe27", size = 99106, upload-time = "2025-08-04T19:22:49.181Z" },
-    { url = "https://files.pythonhosted.org/packages/c5/73/ad21e09dc2534f1e9723bbe5871fa5f03361ac51ca4d411fea6f765b5b6a/nvtx-0.2.13-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3435cbbffa132f6aaba3abdb01e71a1b961a20858b4cb791883895a25b9305d6", size = 539358, upload-time = "2025-08-04T19:33:16.494Z" },
-    { url = "https://files.pythonhosted.org/packages/12/ab/762da984e7671f7c34ae87e5b70523c3eeb4563759268bfaea07c97f32a6/nvtx-0.2.13-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:453d838dd1424a04303281ee57a73e2b8dca0e03039bc609a945861b8fe7d7d9", size = 545588, upload-time = "2025-08-04T19:37:40.64Z" },
-    { url = "https://files.pythonhosted.org/packages/2a/b6/55bc5916386db70b93cbf543b1e880ead786d9ff0cdcfa262f5a2af46c74/nvtx-0.2.13-cp312-cp312-win_amd64.whl", hash = "sha256:0722d743e0e41e1fb866ebe6446e0cd0d268ca8671313f8da4f8c969956b74d3", size = 99123, upload-time = "2025-08-04T19:24:24.391Z" },
-    { url = "https://files.pythonhosted.org/packages/41/73/98c0669d5f9387a36d56b0e62ea3919124dd8dd7582d896ed1cae2998f57/nvtx-0.2.13-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a1561d2111c698b1b1075899ff9c3fa7ba83603fc27c2e8ef567de6bbbe85ce1", size = 519840, upload-time = "2025-08-04T19:34:00.877Z" },
-    { url = "https://files.pythonhosted.org/packages/14/4b/21e975997def8a387543ba2bbe227551ad466781c39fc67f37f53555f37e/nvtx-0.2.13-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:edd7b729ed0211350258a21dd13422f59bc521de2b2fd21feb6c177af492f4e1", size = 524711, upload-time = "2025-08-04T19:38:03.559Z" },
-    { url = "https://files.pythonhosted.org/packages/21/d7/0ca146afd875f1e02636323840960071f768b5d8ba3e7d37f2ac9192bfd9/nvtx-0.2.13-cp313-cp313-win_amd64.whl", hash = "sha256:f0524bb71443d5a1f19a6409a9a81405fc437e53c5edfc4c44b6f4504ccf46e3", size = 97317, upload-time = "2025-08-04T19:24:46.391Z" },
+    { url = "https://files.pythonhosted.org/packages/ed/ca/fa76ea4985fd8f3d8c437bffec2580b1cac7f2401671089ac842610ae466/nvtx-0.2.14-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b70b2415ab97edf19514be226d5058898922c6b6bb1d7fdd5ef92d1e086f3e0f", size = 695204, upload-time = "2025-11-27T17:28:52.688Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/1f/0aa62d52062d700dbed36dd2ebfddf5133c72180d448cce66545e5ccbe5d/nvtx-0.2.14-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:23ab874f9c70e5433f39e40ca318ffcfc14fb43ed6798e6be5a30f74e4ca831f", size = 686698, upload-time = "2025-11-27T17:23:19.335Z" },
+    { url = "https://files.pythonhosted.org/packages/18/c9/a12d48157221a8e939f3f7ec8f8a543e232fb9248820afb164ff9eb3eaa7/nvtx-0.2.14-cp310-cp310-win_amd64.whl", hash = "sha256:3a22be895546ca609e83e54614b56739200ab6f4d13e15f5685544082b1b7908", size = 119654, upload-time = "2025-11-27T17:32:08.536Z" },
+    { url = "https://files.pythonhosted.org/packages/87/a6/4d473abd7c07a6d1060c0f708e21ddf46a960258532ffc897681db5c0f46/nvtx-0.2.14-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:227f6406d2fe1a4b890be17eb1f4c1f5bd4df8f7032dd1cb8c7651d379f35541", size = 732764, upload-time = "2025-11-27T17:26:21.853Z" },
+    { url = "https://files.pythonhosted.org/packages/94/06/3ab72e5a463af1b95934638cb8377e99f58e5ef21a47cbf69b92267d6602/nvtx-0.2.14-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0664aa75b24e2ad0abdd0fa52c49e9c8a120652f2194289c85dc2d93cbc6017f", size = 724555, upload-time = "2025-11-27T17:22:36.402Z" },
+    { url = "https://files.pythonhosted.org/packages/18/1d/64f6078a5ab4134af91ba294035ee1ebb3512edaaa9d60d8f0f023178620/nvtx-0.2.14-cp311-cp311-win_amd64.whl", hash = "sha256:10f5971661d61c1a90cd36c3069240452c904ecec4b3a08d0d6fdba1e5398165", size = 119660, upload-time = "2025-11-27T17:32:30.406Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/de/2cc15bb805b1b18317b60837b853ed023757730d0db82de291635fc88bc3/nvtx-0.2.14-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3ece46f555e725db879df06549980744f89db5923a77e6f7a5aecda75292421a", size = 727708, upload-time = "2025-11-27T17:25:20.836Z" },
+    { url = "https://files.pythonhosted.org/packages/81/94/b37d634fef8677ce525b5bfd2886737ea2c064bc3576fc84423973ff5b97/nvtx-0.2.14-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:17efe5d903996bceb0c8a12cae80fa9b66bee7ee895923bd9d8ec2a5af1aabd8", size = 737691, upload-time = "2025-11-27T17:21:27.87Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/c1/f633aa32003050ff83626a19402f03c83990a15b4df658a7bf1b590ee83e/nvtx-0.2.14-cp312-cp312-win_amd64.whl", hash = "sha256:f40db4746714d525d3020c702a0df866c2335efd6a27c41e869e577402a53a4b", size = 119193, upload-time = "2025-11-27T17:31:42.943Z" },
+    { url = "https://files.pythonhosted.org/packages/04/a3/603ecdfd5cd97feee59c7e51da4929e22eac8dbe68ac78df53e74152813f/nvtx-0.2.14-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8cd1f2b464675b4d3c2036b7bbaf975baa9307f0795107dc69c556c0c8d191d", size = 710057, upload-time = "2025-11-27T17:28:08.127Z" },
+    { url = "https://files.pythonhosted.org/packages/97/29/945dd440e6bd459e6064f321ed425dbae7d03d39ffa97a38e5434fbcda27/nvtx-0.2.14-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6532556d81f782e24eb12c5e0c75e297493d6ab0431177c93c12bb29c523ea9e", size = 717825, upload-time = "2025-11-27T17:22:57.556Z" },
+    { url = "https://files.pythonhosted.org/packages/16/3e/5d7872f2a0809237e3d524f81a7a3c7fbeb98bdc9dcec4723b75a45cd552/nvtx-0.2.14-cp313-cp313-win_amd64.whl", hash = "sha256:cd86f78ed56aede301b03e5ab8cb1aaeb8ba0b5ed683f98f87fbe474996d73f2", size = 118546, upload-time = "2025-11-27T17:30:32.549Z" },
 ]
 
 [[package]]
@@ -3423,141 +3412,75 @@ wheels = [
 
 [[package]]
 name = "onnx"
-version = "1.19.0"
+version = "1.19.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "ml-dtypes", version = "0.4.1", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.13' and extra == 'extra-13-megatron-core-dev') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "ml-dtypes", version = "0.5.3", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.13' and extra == 'extra-13-megatron-core-dev') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "numpy" },
+    { name = "ml-dtypes" },
+    { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.3.5", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "protobuf" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/5b/bf/b0a63ee9f3759dcd177b28c6f2cb22f2aecc6d9b3efecaabc298883caa5f/onnx-1.19.0.tar.gz", hash = "sha256:aa3f70b60f54a29015e41639298ace06adf1dd6b023b9b30f1bca91bb0db9473", size = 11949859, upload-time = "2025-08-27T02:34:27.107Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/00/b3/8a6f3b05d18dffdc7c18839bd829587c826c8513f4bdbe21ddf37dacce50/onnx-1.19.0-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:e927d745939d590f164e43c5aec7338c5a75855a15130ee795f492fc3a0fa565", size = 18310869, upload-time = "2025-08-27T02:32:47.346Z" },
-    { url = "https://files.pythonhosted.org/packages/b9/92/550d6155ab3f2c00e95add1726397c95b4b79d6eb4928d049ff591ad4c84/onnx-1.19.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:c6cdcb237c5c4202463bac50417c5a7f7092997a8469e8b7ffcd09f51de0f4a9", size = 18028144, upload-time = "2025-08-27T02:32:50.306Z" },
-    { url = "https://files.pythonhosted.org/packages/79/21/9bcc715ea6d9aab3f6c583bfc59504a14777e39e0591030e7345f4e40315/onnx-1.19.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ed0b85a33deacb65baffe6ca4ce91adf2bb906fa2dee3856c3c94e163d2eb563", size = 18200923, upload-time = "2025-08-27T02:32:54.325Z" },
-    { url = "https://files.pythonhosted.org/packages/c8/90/3a6f0741ff22270e2f4b741f440ab68ba5525ebc94775cd6f2c01f531374/onnx-1.19.0-cp310-cp310-win32.whl", hash = "sha256:89a9cefe75547aec14a796352c2243e36793bbbcb642d8897118595ab0c2395b", size = 16332097, upload-time = "2025-08-27T02:32:56.997Z" },
-    { url = "https://files.pythonhosted.org/packages/4c/4c/ef61d359865712803d488672607023d36bfcd21fa008d8dc1d6ee8e8b23c/onnx-1.19.0-cp310-cp310-win_amd64.whl", hash = "sha256:a16a82bfdf4738691c0a6eda5293928645ab8b180ab033df84080817660b5e66", size = 16451402, upload-time = "2025-08-27T02:33:00.534Z" },
-    { url = "https://files.pythonhosted.org/packages/db/5c/b959b17608cfb6ccf6359b39fe56a5b0b7d965b3d6e6a3c0add90812c36e/onnx-1.19.0-cp311-cp311-macosx_12_0_universal2.whl", hash = "sha256:206f00c47b85b5c7af79671e3307147407991a17994c26974565aadc9e96e4e4", size = 18312580, upload-time = "2025-08-27T02:33:03.081Z" },
-    { url = "https://files.pythonhosted.org/packages/2c/ee/ac052bbbc832abe0debb784c2c57f9582444fb5f51d63c2967fd04432444/onnx-1.19.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:4d7bee94abaac28988b50da675ae99ef8dd3ce16210d591fbd0b214a5930beb3", size = 18029165, upload-time = "2025-08-27T02:33:05.771Z" },
-    { url = "https://files.pythonhosted.org/packages/5c/c9/8687ba0948d46fd61b04e3952af9237883bbf8f16d716e7ed27e688d73b8/onnx-1.19.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7730b96b68c0c354bbc7857961bb4909b9aaa171360a8e3708d0a4c749aaadeb", size = 18202125, upload-time = "2025-08-27T02:33:09.325Z" },
-    { url = "https://files.pythonhosted.org/packages/e2/16/6249c013e81bd689f46f96c7236d7677f1af5dd9ef22746716b48f10e506/onnx-1.19.0-cp311-cp311-win32.whl", hash = "sha256:7cb7a3ad8059d1a0dfdc5e0a98f71837d82002e441f112825403b137227c2c97", size = 16332738, upload-time = "2025-08-27T02:33:12.448Z" },
-    { url = "https://files.pythonhosted.org/packages/6a/28/34a1e2166e418c6a78e5c82e66f409d9da9317832f11c647f7d4e23846a6/onnx-1.19.0-cp311-cp311-win_amd64.whl", hash = "sha256:d75452a9be868bd30c3ef6aa5991df89bbfe53d0d90b2325c5e730fbd91fff85", size = 16452303, upload-time = "2025-08-27T02:33:15.176Z" },
-    { url = "https://files.pythonhosted.org/packages/e6/b7/639664626e5ba8027860c4d2a639ee02b37e9c322215c921e9222513c3aa/onnx-1.19.0-cp311-cp311-win_arm64.whl", hash = "sha256:23c7959370d7b3236f821e609b0af7763cff7672a758e6c1fc877bac099e786b", size = 16425340, upload-time = "2025-08-27T02:33:17.78Z" },
-    { url = "https://files.pythonhosted.org/packages/0d/94/f56f6ca5e2f921b28c0f0476705eab56486b279f04e1d568ed64c14e7764/onnx-1.19.0-cp312-cp312-macosx_12_0_universal2.whl", hash = "sha256:61d94e6498ca636756f8f4ee2135708434601b2892b7c09536befb19bc8ca007", size = 18322331, upload-time = "2025-08-27T02:33:20.373Z" },
-    { url = "https://files.pythonhosted.org/packages/c8/00/8cc3f3c40b54b28f96923380f57c9176872e475face726f7d7a78bd74098/onnx-1.19.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:224473354462f005bae985c72028aaa5c85ab11de1b71d55b06fdadd64a667dd", size = 18027513, upload-time = "2025-08-27T02:33:23.44Z" },
-    { url = "https://files.pythonhosted.org/packages/61/90/17c4d2566fd0117a5e412688c9525f8950d467f477fbd574e6b32bc9cb8d/onnx-1.19.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1ae475c85c89bc4d1f16571006fd21a3e7c0e258dd2c091f6e8aafb083d1ed9b", size = 18202278, upload-time = "2025-08-27T02:33:26.103Z" },
-    { url = "https://files.pythonhosted.org/packages/bc/6e/a9383d9cf6db4ac761a129b081e9fa5d0cd89aad43cf1e3fc6285b915c7d/onnx-1.19.0-cp312-cp312-win32.whl", hash = "sha256:323f6a96383a9cdb3960396cffea0a922593d221f3929b17312781e9f9b7fb9f", size = 16333080, upload-time = "2025-08-27T02:33:28.559Z" },
-    { url = "https://files.pythonhosted.org/packages/a7/2e/3ff480a8c1fa7939662bdc973e41914add2d4a1f2b8572a3c39c2e4982e5/onnx-1.19.0-cp312-cp312-win_amd64.whl", hash = "sha256:50220f3499a499b1a15e19451a678a58e22ad21b34edf2c844c6ef1d9febddc2", size = 16453927, upload-time = "2025-08-27T02:33:31.177Z" },
-    { url = "https://files.pythonhosted.org/packages/57/37/ad500945b1b5c154fe9d7b826b30816ebd629d10211ea82071b5bcc30aa4/onnx-1.19.0-cp312-cp312-win_arm64.whl", hash = "sha256:efb768299580b786e21abe504e1652ae6189f0beed02ab087cd841cb4bb37e43", size = 16426022, upload-time = "2025-08-27T02:33:33.515Z" },
-    { url = "https://files.pythonhosted.org/packages/be/29/d7b731f63d243f815d9256dce0dca3c151dcaa1ac59f73e6ee06c9afbe91/onnx-1.19.0-cp313-cp313-macosx_12_0_universal2.whl", hash = "sha256:9aed51a4b01acc9ea4e0fe522f34b2220d59e9b2a47f105ac8787c2e13ec5111", size = 18322412, upload-time = "2025-08-27T02:33:36.723Z" },
-    { url = "https://files.pythonhosted.org/packages/58/f5/d3106becb42cb374f0e17ff4c9933a97f1ee1d6a798c9452067f7d3ff61b/onnx-1.19.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ce2cdc3eb518bb832668c4ea9aeeda01fbaa59d3e8e5dfaf7aa00f3d37119404", size = 18026565, upload-time = "2025-08-27T02:33:39.493Z" },
-    { url = "https://files.pythonhosted.org/packages/83/fa/b086d17bab3900754c7ffbabfb244f8e5e5da54a34dda2a27022aa2b373b/onnx-1.19.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:8b546bd7958734b6abcd40cfede3d025e9c274fd96334053a288ab11106bd0aa", size = 18202077, upload-time = "2025-08-27T02:33:42.115Z" },
-    { url = "https://files.pythonhosted.org/packages/35/f2/5e2dfb9d4cf873f091c3f3c6d151f071da4295f9893fbf880f107efe3447/onnx-1.19.0-cp313-cp313-win32.whl", hash = "sha256:03086bffa1cf5837430cf92f892ca0cd28c72758d8905578c2bf8ffaf86c6743", size = 16333198, upload-time = "2025-08-27T02:33:45.172Z" },
-    { url = "https://files.pythonhosted.org/packages/79/67/b3751a35c2522f62f313156959575619b8fa66aa883db3adda9d897d8eb2/onnx-1.19.0-cp313-cp313-win_amd64.whl", hash = "sha256:1715b51eb0ab65272e34ef51cb34696160204b003566cd8aced2ad20a8f95cb8", size = 16453836, upload-time = "2025-08-27T02:33:47.779Z" },
-    { url = "https://files.pythonhosted.org/packages/14/b9/1df85effc960fbbb90bb7bc36eb3907c676b104bc2f88bce022bcfdaef63/onnx-1.19.0-cp313-cp313-win_arm64.whl", hash = "sha256:6bf5acdb97a3ddd6e70747d50b371846c313952016d0c41133cbd8f61b71a8d5", size = 16425877, upload-time = "2025-08-27T02:33:50.357Z" },
-    { url = "https://files.pythonhosted.org/packages/23/2b/089174a1427be9149f37450f8959a558ba20f79fca506ba461d59379d3a1/onnx-1.19.0-cp313-cp313t-macosx_12_0_universal2.whl", hash = "sha256:46cf29adea63e68be0403c68de45ba1b6acc9bb9592c5ddc8c13675a7c71f2cb", size = 18348546, upload-time = "2025-08-27T02:33:56.132Z" },
-    { url = "https://files.pythonhosted.org/packages/c0/d6/3458f0e3a9dc7677675d45d7d6528cb84ad321c8670cc10c69b32c3e03da/onnx-1.19.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:246f0de1345498d990a443d55a5b5af5101a3e25a05a2c3a5fe8b7bd7a7d0707", size = 18033067, upload-time = "2025-08-27T02:33:58.661Z" },
-    { url = "https://files.pythonhosted.org/packages/e4/16/6e4130e1b4b29465ee1fb07d04e8d6f382227615c28df8f607ba50909e2a/onnx-1.19.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ae0d163ffbc250007d984b8dd692a4e2e4506151236b50ca6e3560b612ccf9ff", size = 18205741, upload-time = "2025-08-27T02:34:01.538Z" },
-    { url = "https://files.pythonhosted.org/packages/fe/d8/f64d010fd024b2a2b11ce0c4ee179e4f8f6d4ccc95f8184961c894c22af1/onnx-1.19.0-cp313-cp313t-win_amd64.whl", hash = "sha256:7c151604c7cca6ae26161c55923a7b9b559df3344938f93ea0074d2d49e7fe78", size = 16453839, upload-time = "2025-08-27T02:34:06.515Z" },
-    { url = "https://files.pythonhosted.org/packages/67/ec/8761048eabef4dad55af4c002c672d139b9bd47c3616abaed642a1710063/onnx-1.19.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:236bc0e60d7c0f4159300da639953dd2564df1c195bce01caba172a712e75af4", size = 18027605, upload-time = "2025-08-27T02:34:08.962Z" },
-]
-
-[[package]]
-name = "onnx-ir"
-version = "0.1.8"
-source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform == 'linux'",
-    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform == 'linux'",
-    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform == 'linux'",
-    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform == 'linux'",
-    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform != 'linux'",
-    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform != 'linux'",
-    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform != 'linux'",
-    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform != 'linux'",
-]
-dependencies = [
-    { name = "ml-dtypes", version = "0.4.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.13'" },
-    { name = "numpy", marker = "python_full_version >= '3.13'" },
-    { name = "onnx", marker = "python_full_version >= '3.13'" },
-    { name = "typing-extensions", marker = "python_full_version >= '3.13'" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/af/4a/7ea3952e556e7281b8bfe7f7fce016a13fdac85544d6d6af8ebca5cae160/onnx_ir-0.1.8.tar.gz", hash = "sha256:85ea59eaf165b2b107788193480a260e2723cfc7a1dac1bde7085fd0b7e380d7", size = 108961, upload-time = "2025-09-05T15:45:33.887Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/0f/1c/3bb51fa9e278cbc655a1943c8016163d76a6e24137e73e5198ebc20fc965/onnx_ir-0.1.8-py3-none-any.whl", hash = "sha256:61a42021b6249e566ff3b89a03342bc88dce4dc2d984b97cfb060f33ef179f8a", size = 125316, upload-time = "2025-09-05T15:45:31.211Z" },
+sdist = { url = "https://files.pythonhosted.org/packages/27/2f/c619eb65769357e9b6de9212c9a821ab39cd484448e5d6b3fb5fb0a64c6d/onnx-1.19.1.tar.gz", hash = "sha256:737524d6eb3907d3499ea459c6f01c5a96278bb3a0f2ff8ae04786fb5d7f1ed5", size = 12033525, upload-time = "2025-10-10T04:01:34.342Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/5b/f3/892eea0206ed13a986239bd508c82b974387ef1b0ffd83ece0ce0725aaf6/onnx-1.19.1-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:7343250cc5276cf439fe623b8f92e11cf0d1eebc733ae4a8b2e86903bb72ae68", size = 18319433, upload-time = "2025-10-10T03:59:47.236Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/f3/c7ea4a1dfda9b9ddeff914a601ffaf5ed151b3352529f223eae74c03c8d1/onnx-1.19.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:1fb8f79de7f3920bb82b537f3c6ac70c0ce59f600471d9c3eed2b5f8b079b748", size = 18043327, upload-time = "2025-10-10T03:59:50.854Z" },
+    { url = "https://files.pythonhosted.org/packages/8d/eb/30159bb6a108b03f2b7521410369a5bd8d296be3fbf0b30ab7acd9ef42ad/onnx-1.19.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:92b9d2dece41cc84213dbbfd1acbc2a28c27108c53bd28ddb6d1043fbfcbd2d5", size = 18216877, upload-time = "2025-10-10T03:59:54.512Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/86/dc034e5a723a20ca45aa8dd76dda53c358a5f955908e1436f42c21bdfb3a/onnx-1.19.1-cp310-cp310-win32.whl", hash = "sha256:c0b1a2b6bb19a0fc9f5de7661a547136d082c03c169a5215e18ff3ececd2a82f", size = 16344116, upload-time = "2025-10-10T03:59:57.991Z" },
+    { url = "https://files.pythonhosted.org/packages/b6/60/537f2c19050f71445ee00ed91e78a396b6189dd1fce61b29ac6a0d651c7e/onnx-1.19.1-cp310-cp310-win_amd64.whl", hash = "sha256:1c0498c00db05fcdb3426697d330dcecc3f60020015065e2c76fa795f2c9a605", size = 16462819, upload-time = "2025-10-10T04:00:01.157Z" },
+    { url = "https://files.pythonhosted.org/packages/36/07/0019c72924909e4f64b9199770630ab7b8d7914b912b03230e68f5eda7ae/onnx-1.19.1-cp311-cp311-macosx_12_0_universal2.whl", hash = "sha256:17aaf5832126de0a5197a5864e4f09a764dd7681d3035135547959b4b6b77a09", size = 18320936, upload-time = "2025-10-10T04:00:04.235Z" },
+    { url = "https://files.pythonhosted.org/packages/af/2f/5c47acf740dc35f0decc640844260fbbdc0efa0565657c93fd7ff30f13f3/onnx-1.19.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:01b292a4d0b197c45d8184545bbc8ae1df83466341b604187c1b05902cb9c920", size = 18044269, upload-time = "2025-10-10T04:00:07.449Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/61/6c457ee8c3a62a3cad0a4bfa4c5436bb3ac4df90c3551d40bee1224b5b51/onnx-1.19.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1839af08ab4a909e4af936b8149c27f8c64b96138981024e251906e0539d8bf9", size = 18218092, upload-time = "2025-10-10T04:00:11.135Z" },
+    { url = "https://files.pythonhosted.org/packages/54/d5/ab832e1369505e67926a70e9a102061f89ad01f91aa296c4b1277cb81b25/onnx-1.19.1-cp311-cp311-win32.whl", hash = "sha256:0bdbb676e3722bd32f9227c465d552689f49086f986a696419d865cb4e70b989", size = 16344809, upload-time = "2025-10-10T04:00:14.634Z" },
+    { url = "https://files.pythonhosted.org/packages/8b/b5/6eb4611d24b85002f878ba8476b4cecbe6f9784c0236a3c5eff85236cc0a/onnx-1.19.1-cp311-cp311-win_amd64.whl", hash = "sha256:1346853df5c1e3ebedb2e794cf2a51e0f33759affd655524864ccbcddad7035b", size = 16464319, upload-time = "2025-10-10T04:00:18.235Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/ff/f0e1f06420c70e20d497fec7c94a864d069943b6312bedd4224c0ab946f8/onnx-1.19.1-cp311-cp311-win_arm64.whl", hash = "sha256:2d69c280c0e665b7f923f499243b9bb84fe97970b7a4668afa0032045de602c8", size = 16437503, upload-time = "2025-10-10T04:00:21.247Z" },
+    { url = "https://files.pythonhosted.org/packages/50/07/f6c5b2cffef8c29e739616d1415aea22f7b7ef1f19c17f02b7cff71f5498/onnx-1.19.1-cp312-cp312-macosx_12_0_universal2.whl", hash = "sha256:3612193a89ddbce5c4e86150869b9258780a82fb8c4ca197723a4460178a6ce9", size = 18327840, upload-time = "2025-10-10T04:00:24.259Z" },
+    { url = "https://files.pythonhosted.org/packages/93/20/0568ebd52730287ae80cac8ac893a7301c793ea1630984e2519ee92b02a9/onnx-1.19.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:6c2fd2f744e7a3880ad0c262efa2edf6d965d0bd02b8f327ec516ad4cb0f2f15", size = 18042539, upload-time = "2025-10-10T04:00:27.693Z" },
+    { url = "https://files.pythonhosted.org/packages/14/fd/cd7a0fd10a04f8cc5ae436b63e0022e236fe51b9dbb8ee6317fd48568c72/onnx-1.19.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:485d3674d50d789e0ee72fa6f6e174ab81cb14c772d594f992141bd744729d8a", size = 18218271, upload-time = "2025-10-10T04:00:30.495Z" },
+    { url = "https://files.pythonhosted.org/packages/65/68/cc8b8c05469fe08384b446304ad7e6256131ca0463bf6962366eebec98c0/onnx-1.19.1-cp312-cp312-win32.whl", hash = "sha256:638bc56ff1a5718f7441e887aeb4e450f37a81c6eac482040381b140bd9ba601", size = 16345111, upload-time = "2025-10-10T04:00:34.982Z" },
+    { url = "https://files.pythonhosted.org/packages/c7/5e/d1cb16693598a512c2cf9ffe0841d8d8fd2c83ae8e889efd554f5aa427cf/onnx-1.19.1-cp312-cp312-win_amd64.whl", hash = "sha256:bc7e2e4e163e679721e547958b5a7db875bf822cad371b7c1304aa4401a7c7a4", size = 16465621, upload-time = "2025-10-10T04:00:39.107Z" },
+    { url = "https://files.pythonhosted.org/packages/90/32/da116cc61fdef334782aa7f87a1738431dd1af1a5d1a44bd95d6d51ad260/onnx-1.19.1-cp312-cp312-win_arm64.whl", hash = "sha256:17c215b1c0f20fe93b4cbe62668247c1d2294b9bc7f6be0ca9ced28e980c07b7", size = 16437505, upload-time = "2025-10-10T04:00:42.255Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/b8/ab1fdfe2e8502f4dc4289fc893db35816bd20d080d8370f86e74dda5f598/onnx-1.19.1-cp313-cp313-macosx_12_0_universal2.whl", hash = "sha256:4e5f938c68c4dffd3e19e4fd76eb98d298174eb5ebc09319cdd0ec5fe50050dc", size = 18327815, upload-time = "2025-10-10T04:00:45.682Z" },
+    { url = "https://files.pythonhosted.org/packages/04/40/eb875745a4b92aea10e5e32aa2830f409c4d7b6f7b48ca1c4eaad96636c5/onnx-1.19.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:86e20a5984b017feeef2dbf4ceff1c7c161ab9423254968dd77d3696c38691d0", size = 18041464, upload-time = "2025-10-10T04:00:48.557Z" },
+    { url = "https://files.pythonhosted.org/packages/cf/8e/8586135f40dbe4989cec4d413164bc8fc5c73d37c566f33f5ea3a7f2b6f6/onnx-1.19.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c8d9c467f0f29993c12f330736af87972f30adb8329b515f39d63a0db929cb2c", size = 18218244, upload-time = "2025-10-10T04:00:51.891Z" },
+    { url = "https://files.pythonhosted.org/packages/51/b5/4201254b8683129db5da3fb55aa1f7e56d0a8d45c66ce875dec21ca1ff25/onnx-1.19.1-cp313-cp313-win32.whl", hash = "sha256:65eee353a51b4e4ca3e797784661e5376e2b209f17557e04921eac9166a8752e", size = 16345330, upload-time = "2025-10-10T04:00:54.858Z" },
+    { url = "https://files.pythonhosted.org/packages/69/67/c6d239afbcdbeb6805432969b908b5c9f700c96d332b34e3f99518d76caf/onnx-1.19.1-cp313-cp313-win_amd64.whl", hash = "sha256:c3bc87e38b53554b1fc9ef7b275c81c6f5c93c90a91935bb0aa8d4d498a6d48e", size = 16465567, upload-time = "2025-10-10T04:00:57.893Z" },
+    { url = "https://files.pythonhosted.org/packages/99/fe/89f1e40f5bc54595ff0dcf5391ce19e578b528973ccc74dd99800196d30d/onnx-1.19.1-cp313-cp313-win_arm64.whl", hash = "sha256:e41496f400afb980ec643d80d5164753a88a85234fa5c06afdeebc8b7d1ec252", size = 16437562, upload-time = "2025-10-10T04:01:00.703Z" },
+    { url = "https://files.pythonhosted.org/packages/86/43/b186ccbc8fe7e93643a6a6d40bbf2bb6ce4fb9469bbd3453c77e270c50ad/onnx-1.19.1-cp313-cp313t-macosx_12_0_universal2.whl", hash = "sha256:5f6274abf0fd74e80e78ecbb44bd44509409634525c89a9b38276c8af47dc0a2", size = 18355703, upload-time = "2025-10-10T04:01:03.735Z" },
+    { url = "https://files.pythonhosted.org/packages/60/f1/22ee4d8b8f9fa4cb1d1b9579da3b4b5187ddab33846ec5ac744af02c0e2b/onnx-1.19.1-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:07dcd4d83584eb4bf8f21ac04c82643712e5e93ac2a0ed10121ec123cb127e1e", size = 18047830, upload-time = "2025-10-10T04:01:06.552Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/a4/8f3d51e3a095d42cdf2039a590cff06d024f2a10efbd0b1a2a6b3825f019/onnx-1.19.1-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1975860c3e720db25d37f1619976582828264bdcc64fa7511c321ac4fc01add3", size = 18221126, upload-time = "2025-10-10T04:01:09.77Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/0d/f9d6c2237083f1aac14b37f0b03b0d81f1147a8e2af0c3828165e0a6a67b/onnx-1.19.1-cp313-cp313t-win_amd64.whl", hash = "sha256:9807d0e181f6070ee3a6276166acdc571575d1bd522fc7e89dba16fd6e7ffed9", size = 16465560, upload-time = "2025-10-10T04:01:13.212Z" },
+    { url = "https://files.pythonhosted.org/packages/36/70/8418a58faa7d606d6a92cab69ae8d361b3b3969bf7e7e9a65a86d5d1b674/onnx-1.19.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:b6ee83e6929d75005482d9f304c502ac7c9b8d6db153aa6b484dae74d0f28570", size = 18042812, upload-time = "2025-10-10T04:01:15.919Z" },
 ]
 
 [[package]]
 name = "onnx-ir"
 version = "0.1.12"
 source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version == '3.12.*' and sys_platform == 'linux'",
-    "python_full_version == '3.12.*' and sys_platform != 'linux'",
-    "python_full_version == '3.11.*' and sys_platform == 'linux'",
-    "python_full_version == '3.11.*' and sys_platform != 'linux'",
-    "python_full_version < '3.11' and sys_platform == 'linux'",
-    "python_full_version < '3.11' and sys_platform != 'linux'",
-]
 dependencies = [
-    { name = "ml-dtypes", version = "0.5.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.13'" },
-    { name = "numpy", marker = "python_full_version < '3.13'" },
-    { name = "onnx", marker = "python_full_version < '3.13'" },
-    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
+    { name = "ml-dtypes" },
+    { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.3.5", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "onnx" },
+    { name = "typing-extensions" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/6c/1a/2a94112a39d01a9d1490f5ef3c205d8a17fe1ca27f307b026c40d62d8e9f/onnx_ir-0.1.12.tar.gz", hash = "sha256:742e0bff875d0547724187560b3f441833191c8aa939c05f14176f4892784deb", size = 112699, upload-time = "2025-10-28T23:43:54.129Z" }
 wheels = [
     { url = "https://files.pythonhosted.org/packages/c8/36/c4df116f5dcaa82ec7944e5d25624a3811f6603fd190660b0b079ea759fb/onnx_ir-0.1.12-py3-none-any.whl", hash = "sha256:17f86faf8a53b979430bde1bc6022c7a162b0d1534550ddb17a1d37eb993e765", size = 129277, upload-time = "2025-10-28T23:43:52.493Z" },
 ]
 
-[[package]]
-name = "onnxscript"
-version = "0.5.0"
-source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform == 'linux'",
-    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform == 'linux'",
-    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform == 'linux'",
-    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform == 'linux'",
-    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform != 'linux'",
-    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform != 'linux'",
-    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform != 'linux'",
-    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform != 'linux'",
-]
-dependencies = [
-    { name = "ml-dtypes", version = "0.4.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.13'" },
-    { name = "numpy", marker = "python_full_version >= '3.13'" },
-    { name = "onnx", marker = "python_full_version >= '3.13'" },
-    { name = "onnx-ir", version = "0.1.8", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.13'" },
-    { name = "packaging", marker = "python_full_version >= '3.13'" },
-    { name = "typing-extensions", marker = "python_full_version >= '3.13'" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/f5/2f/0bb2b6ca727e4d5173f640527f402ab4225def4bc8d667269b83047be8c4/onnxscript-0.5.0.tar.gz", hash = "sha256:4aba215e1f80fbcd07ba0d97d6bca96797fc3e9639eacb5434d35317ce1406aa", size = 588762, upload-time = "2025-09-12T16:57:46.484Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/e7/f7/f0eb0b10771637a8c176a3b0594c65c5ba3cea440847741297901cef2c5e/onnxscript-0.5.0-py3-none-any.whl", hash = "sha256:da33715ac8ec80e0263a5200f1ad1b3532225804c05a13a0d6ea83712b5b4a8f", size = 684685, upload-time = "2025-09-12T16:57:48.869Z" },
-]
-
 [[package]]
 name = "onnxscript"
 version = "0.5.6"
 source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version == '3.12.*' and sys_platform == 'linux'",
-    "python_full_version == '3.12.*' and sys_platform != 'linux'",
-    "python_full_version == '3.11.*' and sys_platform == 'linux'",
-    "python_full_version == '3.11.*' and sys_platform != 'linux'",
-    "python_full_version < '3.11' and sys_platform == 'linux'",
-    "python_full_version < '3.11' and sys_platform != 'linux'",
-]
 dependencies = [
-    { name = "ml-dtypes", version = "0.5.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.13'" },
-    { name = "numpy", marker = "python_full_version < '3.13'" },
-    { name = "onnx", marker = "python_full_version < '3.13'" },
-    { name = "onnx-ir", version = "0.1.12", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.13'" },
-    { name = "packaging", marker = "python_full_version < '3.13'" },
-    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
+    { name = "ml-dtypes" },
+    { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.3.5", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "onnx" },
+    { name = "onnx-ir" },
+    { name = "packaging" },
+    { name = "typing-extensions" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/fb/4b/eed2199327bbf12c3443d7835893e3c4c23b1c1a4aa13efe0f7fbe0a6bf9/onnxscript-0.5.6.tar.gz", hash = "sha256:cc3338b2976daffd2af0bb6ac4866a4dca76aefface1666a0d7bc65ad9850822", size = 587017, upload-time = "2025-10-31T03:50:38.656Z" }
 wheels = [
@@ -3570,13 +3493,22 @@ version = "1.33.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "deprecated" },
-    { name = "importlib-metadata", version = "8.6.1", source = { registry = "https://pypi.org/simple" } },
+    { name = "importlib-metadata" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/9a/8d/1f5a45fbcb9a7d87809d460f09dc3399e3fbd31d7f3e14888345e9d29951/opentelemetry_api-1.33.1.tar.gz", hash = "sha256:1c6055fc0a2d3f23a50c7e17e16ef75ad489345fd3df1f8b8af7c0bbf8a109e8", size = 65002, upload-time = "2025-05-16T18:52:41.146Z" }
 wheels = [
     { url = "https://files.pythonhosted.org/packages/05/44/4c45a34def3506122ae61ad684139f0bbc4e00c39555d4f7e20e0e001c8a/opentelemetry_api-1.33.1-py3-none-any.whl", hash = "sha256:4db83ebcf7ea93e64637ec6ee6fabee45c5cbe4abd9cf3da95c43828ddb50b83", size = 65771, upload-time = "2025-05-16T18:52:17.419Z" },
 ]
 
+[[package]]
+name = "overrides"
+version = "7.7.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/36/86/b585f53236dec60aba864e050778b25045f857e17f6e5ea0ae95fe80edd2/overrides-7.7.0.tar.gz", hash = "sha256:55158fa3d93b98cc75299b1e67078ad9003ca27945c76162c1c0766d6f91820a", size = 22812, upload-time = "2024-01-27T21:01:33.423Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/2c/ab/fc8290c6a4c722e5514d80f62b2dc4c4df1a68a41d1364e625c35990fcf3/overrides-7.7.0-py3-none-any.whl", hash = "sha256:c7ed9d062f78b8e4c1a7b70bd8796b35ead4d9f510227ef9c5dc7626c60d7e49", size = 17832, upload-time = "2024-01-27T21:01:31.393Z" },
+]
+
 [[package]]
 name = "packaging"
 version = "25.0"
@@ -3591,7 +3523,8 @@ name = "pandas"
 version = "2.3.3"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "numpy" },
+    { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.3.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "python-dateutil" },
     { name = "pytz" },
     { name = "tzdata" },
@@ -3798,14 +3731,14 @@ wheels = [
 
 [[package]]
 name = "prettytable"
-version = "3.16.0"
+version = "3.17.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "wcwidth" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/99/b1/85e18ac92afd08c533603e3393977b6bc1443043115a47bb094f3b98f94f/prettytable-3.16.0.tar.gz", hash = "sha256:3c64b31719d961bf69c9a7e03d0c1e477320906a98da63952bc6698d6164ff57", size = 66276, upload-time = "2025-03-24T19:39:04.008Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/79/45/b0847d88d6cfeb4413566738c8bbf1e1995fad3d42515327ff32cc1eb578/prettytable-3.17.0.tar.gz", hash = "sha256:59f2590776527f3c9e8cf9fe7b66dd215837cca96a9c39567414cbc632e8ddb0", size = 67892, upload-time = "2025-11-14T17:33:20.212Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/02/c7/5613524e606ea1688b3bdbf48aa64bafb6d0a4ac3750274c43b6158a390f/prettytable-3.16.0-py3-none-any.whl", hash = "sha256:b5eccfabb82222f5aa46b798ff02a8452cf530a352c31bddfa29be41242863aa", size = 33863, upload-time = "2025-03-24T19:39:02.359Z" },
+    { url = "https://files.pythonhosted.org/packages/ee/8c/83087ebc47ab0396ce092363001fa37c17153119ee282700c0713a195853/prettytable-3.17.0-py3-none-any.whl", hash = "sha256:aad69b294ddbe3e1f95ef8886a060ed1666a0b83018bbf56295f6f226c43d287", size = 34433, upload-time = "2025-11-14T17:33:19.093Z" },
 ]
 
 [[package]]
@@ -3958,17 +3891,17 @@ wheels = [
 
 [[package]]
 name = "protobuf"
-version = "6.33.0"
+version = "6.33.1"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/19/ff/64a6c8f420818bb873713988ca5492cba3a7946be57e027ac63495157d97/protobuf-6.33.0.tar.gz", hash = "sha256:140303d5c8d2037730c548f8c7b93b20bb1dc301be280c378b82b8894589c954", size = 443463, upload-time = "2025-10-15T20:39:52.159Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/0a/03/a1440979a3f74f16cab3b75b0da1a1a7f922d56a8ddea96092391998edc0/protobuf-6.33.1.tar.gz", hash = "sha256:97f65757e8d09870de6fd973aeddb92f85435607235d20b2dfed93405d00c85b", size = 443432, upload-time = "2025-11-13T16:44:18.895Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/7e/ee/52b3fa8feb6db4a833dfea4943e175ce645144532e8a90f72571ad85df4e/protobuf-6.33.0-cp310-abi3-win32.whl", hash = "sha256:d6101ded078042a8f17959eccd9236fb7a9ca20d3b0098bbcb91533a5680d035", size = 425593, upload-time = "2025-10-15T20:39:40.29Z" },
-    { url = "https://files.pythonhosted.org/packages/7b/c6/7a465f1825872c55e0341ff4a80198743f73b69ce5d43ab18043699d1d81/protobuf-6.33.0-cp310-abi3-win_amd64.whl", hash = "sha256:9a031d10f703f03768f2743a1c403af050b6ae1f3480e9c140f39c45f81b13ee", size = 436882, upload-time = "2025-10-15T20:39:42.841Z" },
-    { url = "https://files.pythonhosted.org/packages/e1/a9/b6eee662a6951b9c3640e8e452ab3e09f117d99fc10baa32d1581a0d4099/protobuf-6.33.0-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:905b07a65f1a4b72412314082c7dbfae91a9e8b68a0cc1577515f8df58ecf455", size = 427521, upload-time = "2025-10-15T20:39:43.803Z" },
-    { url = "https://files.pythonhosted.org/packages/10/35/16d31e0f92c6d2f0e77c2a3ba93185130ea13053dd16200a57434c882f2b/protobuf-6.33.0-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:e0697ece353e6239b90ee43a9231318302ad8353c70e6e45499fa52396debf90", size = 324445, upload-time = "2025-10-15T20:39:44.932Z" },
-    { url = "https://files.pythonhosted.org/packages/e6/eb/2a981a13e35cda8b75b5585aaffae2eb904f8f351bdd3870769692acbd8a/protobuf-6.33.0-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:e0a1715e4f27355afd9570f3ea369735afc853a6c3951a6afe1f80d8569ad298", size = 339159, upload-time = "2025-10-15T20:39:46.186Z" },
-    { url = "https://files.pythonhosted.org/packages/21/51/0b1cbad62074439b867b4e04cc09b93f6699d78fd191bed2bbb44562e077/protobuf-6.33.0-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:35be49fd3f4fefa4e6e2aacc35e8b837d6703c37a2168a55ac21e9b1bc7559ef", size = 323172, upload-time = "2025-10-15T20:39:47.465Z" },
-    { url = "https://files.pythonhosted.org/packages/07/d1/0a28c21707807c6aacd5dc9c3704b2aa1effbf37adebd8caeaf68b17a636/protobuf-6.33.0-py3-none-any.whl", hash = "sha256:25c9e1963c6734448ea2d308cfa610e692b801304ba0908d7bfa564ac5132995", size = 170477, upload-time = "2025-10-15T20:39:51.311Z" },
+    { url = "https://files.pythonhosted.org/packages/06/f1/446a9bbd2c60772ca36556bac8bfde40eceb28d9cc7838755bc41e001d8f/protobuf-6.33.1-cp310-abi3-win32.whl", hash = "sha256:f8d3fdbc966aaab1d05046d0240dd94d40f2a8c62856d41eaa141ff64a79de6b", size = 425593, upload-time = "2025-11-13T16:44:06.275Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/79/8780a378c650e3df849b73de8b13cf5412f521ca2ff9b78a45c247029440/protobuf-6.33.1-cp310-abi3-win_amd64.whl", hash = "sha256:923aa6d27a92bf44394f6abf7ea0500f38769d4b07f4be41cb52bd8b1123b9ed", size = 436883, upload-time = "2025-11-13T16:44:09.222Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/93/26213ff72b103ae55bb0d73e7fb91ea570ef407c3ab4fd2f1f27cac16044/protobuf-6.33.1-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:fe34575f2bdde76ac429ec7b570235bf0c788883e70aee90068e9981806f2490", size = 427522, upload-time = "2025-11-13T16:44:10.475Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/32/df4a35247923393aa6b887c3b3244a8c941c32a25681775f96e2b418f90e/protobuf-6.33.1-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:f8adba2e44cde2d7618996b3fc02341f03f5bc3f2748be72dc7b063319276178", size = 324445, upload-time = "2025-11-13T16:44:11.869Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/d0/d796e419e2ec93d2f3fa44888861c3f88f722cde02b7c3488fcc6a166820/protobuf-6.33.1-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:0f4cf01222c0d959c2b399142deb526de420be8236f22c71356e2a544e153c53", size = 339161, upload-time = "2025-11-13T16:44:12.778Z" },
+    { url = "https://files.pythonhosted.org/packages/1d/2a/3c5f05a4af06649547027d288747f68525755de692a26a7720dced3652c0/protobuf-6.33.1-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:8fd7d5e0eb08cd5b87fd3df49bc193f5cfd778701f47e11d127d0afc6c39f1d1", size = 323171, upload-time = "2025-11-13T16:44:14.035Z" },
+    { url = "https://files.pythonhosted.org/packages/08/b4/46310463b4f6ceef310f8348786f3cff181cea671578e3d9743ba61a459e/protobuf-6.33.1-py3-none-any.whl", hash = "sha256:d595a9fd694fdeb061a62fbe10eb039cc1e444df81ec9bb70c7fc59ebcb1eafa", size = 170477, upload-time = "2025-11-13T16:44:17.633Z" },
 ]
 
 [[package]]
@@ -4092,7 +4025,7 @@ wheels = [
 
 [[package]]
 name = "pydantic"
-version = "2.12.4"
+version = "2.12.5"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "annotated-types" },
@@ -4100,9 +4033,9 @@ dependencies = [
     { name = "typing-extensions" },
     { name = "typing-inspection" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/96/ad/a17bc283d7d81837c061c49e3eaa27a45991759a1b7eae1031921c6bd924/pydantic-2.12.4.tar.gz", hash = "sha256:0f8cb9555000a4b5b617f66bfd2566264c4984b27589d3b845685983e8ea85ac", size = 821038, upload-time = "2025-11-05T10:50:08.59Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/69/44/36f1a6e523abc58ae5f928898e4aca2e0ea509b5aa6f6f392a5d882be928/pydantic-2.12.5.tar.gz", hash = "sha256:4d351024c75c0f085a9febbb665ce8c0c6ec5d30e903bdb6394b7ede26aebb49", size = 821591, upload-time = "2025-11-26T15:11:46.471Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/82/2f/e68750da9b04856e2a7ec56fc6f034a5a79775e9b9a81882252789873798/pydantic-2.12.4-py3-none-any.whl", hash = "sha256:92d3d202a745d46f9be6df459ac5a064fdaa3c1c4cd8adcfa332ccf3c05f871e", size = 463400, upload-time = "2025-11-05T10:50:06.732Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/87/b70ad306ebb6f9b585f114d0ac2137d792b48be34d732d60e597c2f8465a/pydantic-2.12.5-py3-none-any.whl", hash = "sha256:e561593fccf61e8a20fc46dfc2dfe075b8be7d0188df33f221ad1f0139180f9d", size = 463580, upload-time = "2025-11-26T15:11:44.605Z" },
 ]
 
 [[package]]
@@ -4311,51 +4244,39 @@ wheels = [
 
 [[package]]
 name = "pynacl"
-version = "1.6.0"
+version = "1.6.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "cffi", marker = "platform_python_implementation != 'PyPy' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/06/c6/a3124dee667a423f2c637cfd262a54d67d8ccf3e160f3c50f622a85b7723/pynacl-1.6.0.tar.gz", hash = "sha256:cb36deafe6e2bce3b286e5d1f3e1c246e0ccdb8808ddb4550bb2792f2df298f2", size = 3505641, upload-time = "2025-09-10T23:39:22.308Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/70/24/1b639176401255605ba7c2b93a7b1eb1e379e0710eca62613633eb204201/pynacl-1.6.0-cp314-cp314t-macosx_10_10_universal2.whl", hash = "sha256:f46386c24a65383a9081d68e9c2de909b1834ec74ff3013271f1bca9c2d233eb", size = 384141, upload-time = "2025-09-10T23:38:28.675Z" },
-    { url = "https://files.pythonhosted.org/packages/5e/7b/874efdf57d6bf172db0df111b479a553c3d9e8bb4f1f69eb3ffff772d6e8/pynacl-1.6.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:dea103a1afcbc333bc0e992e64233d360d393d1e63d0bc88554f572365664348", size = 808132, upload-time = "2025-09-10T23:38:38.995Z" },
-    { url = "https://files.pythonhosted.org/packages/f3/61/9b53f5913f3b75ac3d53170cdb897101b2b98afc76f4d9d3c8de5aa3ac05/pynacl-1.6.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:04f20784083014e265ad58c1b2dd562c3e35864b5394a14ab54f5d150ee9e53e", size = 1407253, upload-time = "2025-09-10T23:38:40.492Z" },
-    { url = "https://files.pythonhosted.org/packages/7c/0a/b138916b22bbf03a1bdbafecec37d714e7489dd7bcaf80cd17852f8b67be/pynacl-1.6.0-cp314-cp314t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bbcc4452a1eb10cd5217318c822fde4be279c9de8567f78bad24c773c21254f8", size = 843719, upload-time = "2025-09-10T23:38:30.87Z" },
-    { url = "https://files.pythonhosted.org/packages/01/3b/17c368197dfb2c817ce033f94605a47d0cc27901542109e640cef263f0af/pynacl-1.6.0-cp314-cp314t-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:51fed9fe1bec9e7ff9af31cd0abba179d0e984a2960c77e8e5292c7e9b7f7b5d", size = 1445441, upload-time = "2025-09-10T23:38:33.078Z" },
-    { url = "https://files.pythonhosted.org/packages/35/3c/f79b185365ab9be80cd3cd01dacf30bf5895f9b7b001e683b369e0bb6d3d/pynacl-1.6.0-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:10d755cf2a455d8c0f8c767a43d68f24d163b8fe93ccfaabfa7bafd26be58d73", size = 825691, upload-time = "2025-09-10T23:38:34.832Z" },
-    { url = "https://files.pythonhosted.org/packages/f7/1f/8b37d25e95b8f2a434a19499a601d4d272b9839ab8c32f6b0fc1e40c383f/pynacl-1.6.0-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:536703b8f90e911294831a7fbcd0c062b837f3ccaa923d92a6254e11178aaf42", size = 1410726, upload-time = "2025-09-10T23:38:36.893Z" },
-    { url = "https://files.pythonhosted.org/packages/bd/93/5a4a4cf9913014f83d615ad6a2df9187330f764f606246b3a744c0788c03/pynacl-1.6.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:6b08eab48c9669d515a344fb0ef27e2cbde847721e34bba94a343baa0f33f1f4", size = 801035, upload-time = "2025-09-10T23:38:42.109Z" },
-    { url = "https://files.pythonhosted.org/packages/bf/60/40da6b0fe6a4d5fd88f608389eb1df06492ba2edca93fca0b3bebff9b948/pynacl-1.6.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5789f016e08e5606803161ba24de01b5a345d24590a80323379fc4408832d290", size = 1371854, upload-time = "2025-09-10T23:38:44.16Z" },
-    { url = "https://files.pythonhosted.org/packages/44/b2/37ac1d65008f824cba6b5bf68d18b76d97d0f62d7a032367ea69d4a187c8/pynacl-1.6.0-cp314-cp314t-win32.whl", hash = "sha256:4853c154dc16ea12f8f3ee4b7e763331876316cc3a9f06aeedf39bcdca8f9995", size = 230345, upload-time = "2025-09-10T23:38:48.276Z" },
-    { url = "https://files.pythonhosted.org/packages/f4/5a/9234b7b45af890d02ebee9aae41859b9b5f15fb4a5a56d88e3b4d1659834/pynacl-1.6.0-cp314-cp314t-win_amd64.whl", hash = "sha256:347dcddce0b4d83ed3f32fd00379c83c425abee5a9d2cd0a2c84871334eaff64", size = 243103, upload-time = "2025-09-10T23:38:45.503Z" },
-    { url = "https://files.pythonhosted.org/packages/c9/2c/c1a0f19d720ab0af3bc4241af2bdf4d813c3ecdcb96392b5e1ddf2d8f24f/pynacl-1.6.0-cp314-cp314t-win_arm64.whl", hash = "sha256:2d6cd56ce4998cb66a6c112fda7b1fdce5266c9f05044fa72972613bef376d15", size = 187778, upload-time = "2025-09-10T23:38:46.731Z" },
-    { url = "https://files.pythonhosted.org/packages/63/37/87c72df19857c5b3b47ace6f211a26eb862ada495cc96daa372d96048fca/pynacl-1.6.0-cp38-abi3-macosx_10_10_universal2.whl", hash = "sha256:f4b3824920e206b4f52abd7de621ea7a44fd3cb5c8daceb7c3612345dfc54f2e", size = 382610, upload-time = "2025-09-10T23:38:49.459Z" },
-    { url = "https://files.pythonhosted.org/packages/0c/64/3ce958a5817fd3cc6df4ec14441c43fd9854405668d73babccf77f9597a3/pynacl-1.6.0-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:16dd347cdc8ae0b0f6187a2608c0af1c8b7ecbbe6b4a06bff8253c192f696990", size = 798744, upload-time = "2025-09-10T23:38:58.531Z" },
-    { url = "https://files.pythonhosted.org/packages/e4/8a/3f0dd297a0a33fa3739c255feebd0206bb1df0b44c52fbe2caf8e8bc4425/pynacl-1.6.0-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:16c60daceee88d04f8d41d0a4004a7ed8d9a5126b997efd2933e08e93a3bd850", size = 1397879, upload-time = "2025-09-10T23:39:00.44Z" },
-    { url = "https://files.pythonhosted.org/packages/41/94/028ff0434a69448f61348d50d2c147dda51aabdd4fbc93ec61343332174d/pynacl-1.6.0-cp38-abi3-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:25720bad35dfac34a2bcdd61d9e08d6bfc6041bebc7751d9c9f2446cf1e77d64", size = 833907, upload-time = "2025-09-10T23:38:50.936Z" },
-    { url = "https://files.pythonhosted.org/packages/52/bc/a5cff7f8c30d5f4c26a07dfb0bcda1176ab8b2de86dda3106c00a02ad787/pynacl-1.6.0-cp38-abi3-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8bfaa0a28a1ab718bad6239979a5a57a8d1506d0caf2fba17e524dbb409441cf", size = 1436649, upload-time = "2025-09-10T23:38:52.783Z" },
-    { url = "https://files.pythonhosted.org/packages/7a/20/c397be374fd5d84295046e398de4ba5f0722dc14450f65db76a43c121471/pynacl-1.6.0-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:ef214b90556bb46a485b7da8258e59204c244b1b5b576fb71848819b468c44a7", size = 817142, upload-time = "2025-09-10T23:38:54.4Z" },
-    { url = "https://files.pythonhosted.org/packages/12/30/5efcef3406940cda75296c6d884090b8a9aad2dcc0c304daebb5ae99fb4a/pynacl-1.6.0-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:49c336dd80ea54780bcff6a03ee1a476be1612423010472e60af83452aa0f442", size = 1401794, upload-time = "2025-09-10T23:38:56.614Z" },
-    { url = "https://files.pythonhosted.org/packages/be/e1/a8fe1248cc17ccb03b676d80fa90763760a6d1247da434844ea388d0816c/pynacl-1.6.0-cp38-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:f3482abf0f9815e7246d461fab597aa179b7524628a4bc36f86a7dc418d2608d", size = 772161, upload-time = "2025-09-10T23:39:01.93Z" },
-    { url = "https://files.pythonhosted.org/packages/a3/76/8a62702fb657d6d9104ce13449db221a345665d05e6a3fdefb5a7cafd2ad/pynacl-1.6.0-cp38-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:140373378e34a1f6977e573033d1dd1de88d2a5d90ec6958c9485b2fd9f3eb90", size = 1370720, upload-time = "2025-09-10T23:39:03.531Z" },
-    { url = "https://files.pythonhosted.org/packages/6d/38/9e9e9b777a1c4c8204053733e1a0269672c0bd40852908c9ad6b6eaba82c/pynacl-1.6.0-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:6b393bc5e5a0eb86bb85b533deb2d2c815666665f840a09e0aa3362bb6088736", size = 791252, upload-time = "2025-09-10T23:39:05.058Z" },
-    { url = "https://files.pythonhosted.org/packages/63/ef/d972ce3d92ae05c9091363cf185e8646933f91c376e97b8be79ea6e96c22/pynacl-1.6.0-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:4a25cfede801f01e54179b8ff9514bd7b5944da560b7040939732d1804d25419", size = 1362910, upload-time = "2025-09-10T23:39:06.924Z" },
-    { url = "https://files.pythonhosted.org/packages/35/2c/ee0b373a1861f66a7ca8bdb999331525615061320dd628527a50ba8e8a60/pynacl-1.6.0-cp38-abi3-win32.whl", hash = "sha256:dcdeb41c22ff3c66eef5e63049abf7639e0db4edee57ba70531fc1b6b133185d", size = 226461, upload-time = "2025-09-10T23:39:11.894Z" },
-    { url = "https://files.pythonhosted.org/packages/75/f7/41b6c0b9dd9970173b6acc026bab7b4c187e4e5beef2756d419ad65482da/pynacl-1.6.0-cp38-abi3-win_amd64.whl", hash = "sha256:cf831615cc16ba324240de79d925eacae8265b7691412ac6b24221db157f6bd1", size = 238802, upload-time = "2025-09-10T23:39:08.966Z" },
-    { url = "https://files.pythonhosted.org/packages/8e/0f/462326910c6172fa2c6ed07922b22ffc8e77432b3affffd9e18f444dbfbb/pynacl-1.6.0-cp38-abi3-win_arm64.whl", hash = "sha256:84709cea8f888e618c21ed9a0efdb1a59cc63141c403db8bf56c469b71ad56f2", size = 183846, upload-time = "2025-09-10T23:39:10.552Z" },
-]
-
-[[package]]
-name = "pynvml"
-version = "13.0.1"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "nvidia-ml-py" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/5c/57/da7dc63a79f59e082e26a66ac02d87d69ea316b35b35b7a00d82f3ce3d2f/pynvml-13.0.1.tar.gz", hash = "sha256:1245991d9db786b4d2f277ce66869bd58f38ac654e38c9397d18f243c8f6e48f", size = 35226, upload-time = "2025-09-05T20:33:25.377Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/d7/4a/cac76c174bb439a0c46c9a4413fcbea5c6cabfb01879f7bbdb9fdfaed76c/pynvml-13.0.1-py3-none-any.whl", hash = "sha256:e2b20e0a501eeec951e2455b7ab444759cf048e0e13a57b08049fa2775266aa8", size = 28810, upload-time = "2025-09-05T20:33:24.13Z" },
+sdist = { url = "https://files.pythonhosted.org/packages/b2/46/aeca065d227e2265125aea590c9c47fbf5786128c9400ee0eb7c88931f06/pynacl-1.6.1.tar.gz", hash = "sha256:8d361dac0309f2b6ad33b349a56cd163c98430d409fa503b10b70b3ad66eaa1d", size = 3506616, upload-time = "2025-11-10T16:02:13.195Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/75/d6/4b2dca33ed512de8f54e5c6074aa06eaeb225bfbcd9b16f33a414389d6bd/pynacl-1.6.1-cp314-cp314t-macosx_10_10_universal2.whl", hash = "sha256:7d7c09749450c385301a3c20dca967a525152ae4608c0a096fe8464bfc3df93d", size = 389109, upload-time = "2025-11-10T16:01:28.79Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/30/e8dbb8ff4fa2559bbbb2187ba0d0d7faf728d17cb8396ecf4a898b22d3da/pynacl-1.6.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fc734c1696ffd49b40f7c1779c89ba908157c57345cf626be2e0719488a076d3", size = 808254, upload-time = "2025-11-10T16:01:37.839Z" },
+    { url = "https://files.pythonhosted.org/packages/44/f9/f5449c652f31da00249638dbab065ad4969c635119094b79b17c3a4da2ab/pynacl-1.6.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3cd787ec1f5c155dc8ecf39b1333cfef41415dc96d392f1ce288b4fe970df489", size = 1407365, upload-time = "2025-11-10T16:01:40.454Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/2f/9aa5605f473b712065c0a193ebf4ad4725d7a245533f0cd7e5dcdbc78f35/pynacl-1.6.1-cp314-cp314t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6b35d93ab2df03ecb3aa506be0d3c73609a51449ae0855c2e89c7ed44abde40b", size = 843842, upload-time = "2025-11-10T16:01:30.524Z" },
+    { url = "https://files.pythonhosted.org/packages/32/8d/748f0f6956e207453da8f5f21a70885fbbb2e060d5c9d78e0a4a06781451/pynacl-1.6.1-cp314-cp314t-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dece79aecbb8f4640a1adbb81e4aa3bfb0e98e99834884a80eb3f33c7c30e708", size = 1445559, upload-time = "2025-11-10T16:01:33.663Z" },
+    { url = "https://files.pythonhosted.org/packages/78/d0/2387f0dcb0e9816f38373999e48db4728ed724d31accdd4e737473319d35/pynacl-1.6.1-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:c2228054f04bf32d558fb89bb99f163a8197d5a9bf4efa13069a7fa8d4b93fc3", size = 825791, upload-time = "2025-11-10T16:01:34.823Z" },
+    { url = "https://files.pythonhosted.org/packages/18/3d/ef6fb7eb072aaf15f280bc66f26ab97e7fc9efa50fb1927683013ef47473/pynacl-1.6.1-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:2b12f1b97346f177affcdfdc78875ff42637cb40dcf79484a97dae3448083a78", size = 1410843, upload-time = "2025-11-10T16:01:36.401Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/fb/23824a017526850ee7d8a1cc4cd1e3e5082800522c10832edbbca8619537/pynacl-1.6.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e735c3a1bdfde3834503baf1a6d74d4a143920281cb724ba29fb84c9f49b9c48", size = 801140, upload-time = "2025-11-10T16:01:42.013Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/d1/ebc6b182cb98603a35635b727d62f094bc201bf610f97a3bb6357fe688d2/pynacl-1.6.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:3384a454adf5d716a9fadcb5eb2e3e72cd49302d1374a60edc531c9957a9b014", size = 1371966, upload-time = "2025-11-10T16:01:43.297Z" },
+    { url = "https://files.pythonhosted.org/packages/64/f4/c9d7b6f02924b1f31db546c7bd2a83a2421c6b4a8e6a2e53425c9f2802e0/pynacl-1.6.1-cp314-cp314t-win32.whl", hash = "sha256:d8615ee34d01c8e0ab3f302dcdd7b32e2bcf698ba5f4809e7cc407c8cdea7717", size = 230482, upload-time = "2025-11-10T16:01:47.688Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/2c/942477957fba22da7bf99131850e5ebdff66623418ab48964e78a7a8293e/pynacl-1.6.1-cp314-cp314t-win_amd64.whl", hash = "sha256:5f5b35c1a266f8a9ad22525049280a600b19edd1f785bccd01ae838437dcf935", size = 243232, upload-time = "2025-11-10T16:01:45.208Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/0c/bdbc0d04a53b96a765ab03aa2cf9a76ad8653d70bf1665459b9a0dedaa1c/pynacl-1.6.1-cp314-cp314t-win_arm64.whl", hash = "sha256:d984c91fe3494793b2a1fb1e91429539c6c28e9ec8209d26d25041ec599ccf63", size = 187907, upload-time = "2025-11-10T16:01:46.328Z" },
+    { url = "https://files.pythonhosted.org/packages/49/41/3cfb3b4f3519f6ff62bf71bf1722547644bcfb1b05b8fdbdc300249ba113/pynacl-1.6.1-cp38-abi3-macosx_10_10_universal2.whl", hash = "sha256:a6f9fd6d6639b1e81115c7f8ff16b8dedba1e8098d2756275d63d208b0e32021", size = 387591, upload-time = "2025-11-10T16:01:49.1Z" },
+    { url = "https://files.pythonhosted.org/packages/18/21/b8a6563637799f617a3960f659513eccb3fcc655d5fc2be6e9dc6416826f/pynacl-1.6.1-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:e49a3f3d0da9f79c1bec2aa013261ab9fa651c7da045d376bd306cf7c1792993", size = 798866, upload-time = "2025-11-10T16:01:55.688Z" },
+    { url = "https://files.pythonhosted.org/packages/e8/6c/dc38033bc3ea461e05ae8f15a81e0e67ab9a01861d352ae971c99de23e7c/pynacl-1.6.1-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7713f8977b5d25f54a811ec9efa2738ac592e846dd6e8a4d3f7578346a841078", size = 1398001, upload-time = "2025-11-10T16:01:57.101Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/05/3ec0796a9917100a62c5073b20c4bce7bf0fea49e99b7906d1699cc7b61b/pynacl-1.6.1-cp38-abi3-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5a3becafc1ee2e5ea7f9abc642f56b82dcf5be69b961e782a96ea52b55d8a9fc", size = 834024, upload-time = "2025-11-10T16:01:50.228Z" },
+    { url = "https://files.pythonhosted.org/packages/f0/b7/ae9982be0f344f58d9c64a1c25d1f0125c79201634efe3c87305ac7cb3e3/pynacl-1.6.1-cp38-abi3-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4ce50d19f1566c391fedc8dc2f2f5be265ae214112ebe55315e41d1f36a7f0a9", size = 1436766, upload-time = "2025-11-10T16:01:51.886Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/51/b2ccbf89cf3025a02e044dd68a365cad593ebf70f532299f2c047d2b7714/pynacl-1.6.1-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:543f869140f67d42b9b8d47f922552d7a967e6c116aad028c9bfc5f3f3b3a7b7", size = 817275, upload-time = "2025-11-10T16:01:53.351Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/6c/dd9ee8214edf63ac563b08a9b30f98d116942b621d39a751ac3256694536/pynacl-1.6.1-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:a2bb472458c7ca959aeeff8401b8efef329b0fc44a89d3775cffe8fad3398ad8", size = 1401891, upload-time = "2025-11-10T16:01:54.587Z" },
+    { url = "https://files.pythonhosted.org/packages/0f/c1/97d3e1c83772d78ee1db3053fd674bc6c524afbace2bfe8d419fd55d7ed1/pynacl-1.6.1-cp38-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:3206fa98737fdc66d59b8782cecc3d37d30aeec4593d1c8c145825a345bba0f0", size = 772291, upload-time = "2025-11-10T16:01:58.111Z" },
+    { url = "https://files.pythonhosted.org/packages/4d/ca/691ff2fe12f3bb3e43e8e8df4b806f6384593d427f635104d337b8e00291/pynacl-1.6.1-cp38-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:53543b4f3d8acb344f75fd4d49f75e6572fce139f4bfb4815a9282296ff9f4c0", size = 1370839, upload-time = "2025-11-10T16:01:59.252Z" },
+    { url = "https://files.pythonhosted.org/packages/30/27/06fe5389d30391fce006442246062cc35773c84fbcad0209fbbf5e173734/pynacl-1.6.1-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:319de653ef84c4f04e045eb250e6101d23132372b0a61a7acf91bac0fda8e58c", size = 791371, upload-time = "2025-11-10T16:02:01.075Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/7a/e2bde8c9d39074a5aa046c7d7953401608d1f16f71e237f4bef3fb9d7e49/pynacl-1.6.1-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:262a8de6bba4aee8a66f5edf62c214b06647461c9b6b641f8cd0cb1e3b3196fe", size = 1363031, upload-time = "2025-11-10T16:02:02.656Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/b6/63fd77264dae1087770a1bb414bc604470f58fbc21d83822fc9c76248076/pynacl-1.6.1-cp38-abi3-win32.whl", hash = "sha256:9fd1a4eb03caf8a2fe27b515a998d26923adb9ddb68db78e35ca2875a3830dde", size = 226585, upload-time = "2025-11-10T16:02:07.116Z" },
+    { url = "https://files.pythonhosted.org/packages/12/c8/b419180f3fdb72ab4d45e1d88580761c267c7ca6eda9a20dcbcba254efe6/pynacl-1.6.1-cp38-abi3-win_amd64.whl", hash = "sha256:a569a4069a7855f963940040f35e87d8bc084cb2d6347428d5ad20550a0a1a21", size = 238923, upload-time = "2025-11-10T16:02:04.401Z" },
+    { url = "https://files.pythonhosted.org/packages/35/76/c34426d532e4dce7ff36e4d92cb20f4cbbd94b619964b93d24e8f5b5510f/pynacl-1.6.1-cp38-abi3-win_arm64.whl", hash = "sha256:5953e8b8cfadb10889a6e7bd0f53041a745d1b3d30111386a1bb37af171e6daf", size = 183970, upload-time = "2025-11-10T16:02:05.786Z" },
 ]
 
 [[package]]
@@ -4390,16 +4311,16 @@ wheels = [
 
 [[package]]
 name = "pytest-asyncio"
-version = "1.2.0"
+version = "1.3.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "backports-asyncio-runner", marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "pytest" },
     { name = "typing-extensions", marker = "python_full_version < '3.13' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/42/86/9e3c5f48f7b7b638b216e4b9e645f54d199d7abbbab7a64a13b4e12ba10f/pytest_asyncio-1.2.0.tar.gz", hash = "sha256:c609a64a2a8768462d0c99811ddb8bd2583c33fd33cf7f21af1c142e824ffb57", size = 50119, upload-time = "2025-09-12T07:33:53.816Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/90/2c/8af215c0f776415f3590cac4f9086ccefd6fd463befeae41cd4d3f193e5a/pytest_asyncio-1.3.0.tar.gz", hash = "sha256:d7f52f36d231b80ee124cd216ffb19369aa168fc10095013c6b014a34d3ee9e5", size = 50087, upload-time = "2025-11-10T16:07:47.256Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/04/93/2fa34714b7a4ae72f2f8dad66ba17dd9a2c793220719e736dda28b7aec27/pytest_asyncio-1.2.0-py3-none-any.whl", hash = "sha256:8e17ae5e46d8e7efe51ab6494dd2010f4ca8dae51652aa3c8d55acf50bfb2e99", size = 15095, upload-time = "2025-09-12T07:33:52.639Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/35/f8b19922b6a25bc0880171a2f1a003eaeb93657475193ab516fd87cac9da/pytest_asyncio-1.3.0-py3-none-any.whl", hash = "sha256:611e26147c7f77640e6d0a92a38ed17c3e9848063698d5c93d5aa7aa11cebff5", size = 15075, upload-time = "2025-11-10T16:07:45.537Z" },
 ]
 
 [[package]]
@@ -4595,7 +4516,7 @@ wheels = [
 
 [[package]]
 name = "ray"
-version = "2.49.2"
+version = "2.51.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "click" },
@@ -4608,25 +4529,21 @@ dependencies = [
     { name = "requests" },
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/e4/99/517f224ffd073689c4905bdb185c21d9d8936d75066a96d454878f9e1e47/ray-2.49.2-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:08bec467576bc030d8bd0638004e1b8e075588929349112988a4bd4928684e8c", size = 66869076, upload-time = "2025-09-19T19:14:37.371Z" },
-    { url = "https://files.pythonhosted.org/packages/61/c5/c2ceba832fe3f47cfd7e11cd7cc7a1bbc2c028424c5bca70435aa4ca1dec/ray-2.49.2-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:3e441bf2acd7f368cf45132752066c5c3b83d88cd5f85762e703774bba4f2b6d", size = 69263514, upload-time = "2025-09-19T19:14:45.519Z" },
-    { url = "https://files.pythonhosted.org/packages/63/0e/830df5a0f7e2b582422ee8ad0cdf2a2a9563aa63bb8e60be9ceec494981c/ray-2.49.2-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:eae07b3fed45f5b041a8bf9795cd26fad2464be5126efd447e4484905a29b677", size = 69125462, upload-time = "2025-09-19T19:14:51.029Z" },
-    { url = "https://files.pythonhosted.org/packages/c0/85/a340eba596db3f66d3a338aff43942d8bac32732fb4cf4a20ed4bbbd07eb/ray-2.49.2-cp310-cp310-manylinux2014_x86_64.whl", hash = "sha256:74566876af7bf4e48ea4b9b3b75b34db053d1064cc4d4b1670dc4ce78f6894af", size = 69935752, upload-time = "2025-09-19T19:14:56.191Z" },
-    { url = "https://files.pythonhosted.org/packages/ac/e6/809730d87cdf762e76728ea6bb3f96e38fa2dc7ef7d572a49c0d7ebcde95/ray-2.49.2-cp310-cp310-win_amd64.whl", hash = "sha256:e6becc2026d900ca0ba07eff12a130c9d651a91290bb24d43594842b575cc4e5", size = 26246695, upload-time = "2025-09-19T19:15:00.9Z" },
-    { url = "https://files.pythonhosted.org/packages/b5/63/27c7fb49513c816b825c809dd33a8570b35d511d1b5e568a4b33b0557997/ray-2.49.2-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:4fb9f9bf62fd5c92d22da20cd2aacb4ade1fb23033765fa9274f0a0c50bc42f6", size = 66869606, upload-time = "2025-09-19T19:15:05.838Z" },
-    { url = "https://files.pythonhosted.org/packages/52/9a/9728d1e9dc5473acf0e4f67081dc323d3333c8c87a1e9260ea8878720017/ray-2.49.2-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:9ece957a13985f7bbf4077f4ff0204314d7e99a941f95dff2a16b453d5376dc3", size = 69273124, upload-time = "2025-09-19T19:15:11.348Z" },
-    { url = "https://files.pythonhosted.org/packages/38/67/93f0d6d558874a730581059eb6dfa8860991a5410502ea0685dba5e788e4/ray-2.49.2-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:eada9dd89ccda643a3c6c2cba7016b59898432d126e10b38fed52d74165364f4", size = 69266231, upload-time = "2025-09-19T19:15:16.92Z" },
-    { url = "https://files.pythonhosted.org/packages/c1/2b/f2efd0e7bcef06d51422db1af48cc5695a3f9b40a444f9d270a2d4663252/ray-2.49.2-cp311-cp311-manylinux2014_x86_64.whl", hash = "sha256:54077dde338c5ffba349a4ab61b72352a3c3be69ea5b4f1b436d98d40b312763", size = 70070382, upload-time = "2025-09-19T19:15:22.048Z" },
-    { url = "https://files.pythonhosted.org/packages/d7/b5/dfe1240e13d88dc68de03ee7c617f7578ef026e8569a42f7eeeb4729c5e3/ray-2.49.2-cp311-cp311-win_amd64.whl", hash = "sha256:41e11802ebbc487380e6c21dc041cb405e69fdda717a4eafdfeea294c6c3f9ca", size = 26243798, upload-time = "2025-09-19T19:15:26.405Z" },
-    { url = "https://files.pythonhosted.org/packages/01/66/0d4e518d611486244b357a6cf58a31d7d184f5558e03d5e482c335749616/ray-2.49.2-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:d6d612de5c6341b776fc75edeee5b698bb4af7ee84a2ff30552b32a9e6e4a772", size = 66857495, upload-time = "2025-09-19T19:15:31.427Z" },
-    { url = "https://files.pythonhosted.org/packages/1a/4c/76f2c7c0946645fdd8d286a3e00e2c42130d676286de206be5d60d271218/ray-2.49.2-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:6784e076e4418222ef8ee3b6a8bfeb867d8797803b25bcfcce3bf3bc5414bef1", size = 69262599, upload-time = "2025-09-19T19:15:36.732Z" },
-    { url = "https://files.pythonhosted.org/packages/da/99/23b732c0b7b2ee2ffd28bf632257fb98924a03251d251810cb637512fcab/ray-2.49.2-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:dd0d8d8641d142fafe6d83e87d3c19bd5637d21e34608d3ff69ad71ea3e2f462", size = 69287193, upload-time = "2025-09-19T19:15:42.093Z" },
-    { url = "https://files.pythonhosted.org/packages/69/ca/94791be5c3b68ed0df85589a8ca558334818a47bf2978000f85533245aed/ray-2.49.2-cp312-cp312-manylinux2014_x86_64.whl", hash = "sha256:2ecaaa51f588ccdda2b61563a8be3843bf65dfaaa83a240588a307f4ebb82471", size = 70114942, upload-time = "2025-09-19T19:15:47.536Z" },
-    { url = "https://files.pythonhosted.org/packages/e0/22/3f4b77498eefb3152a5946f9f544fcf336e7b9970c5c8af8e2d5eed13f0b/ray-2.49.2-cp312-cp312-win_amd64.whl", hash = "sha256:cba59684f031c9e778c588bc925777967e1b49bab3f00c638e4980bfdab07aec", size = 26223595, upload-time = "2025-09-19T19:15:51.803Z" },
-    { url = "https://files.pythonhosted.org/packages/99/dc/a7e569bf7030e0ec50163aed731189e744ca857d74f51b24361ce426697a/ray-2.49.2-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:2e2fe20fa90562e73630da9ff7932d3ed6507e73291c4d9bdf566537ae9deddf", size = 66803846, upload-time = "2025-09-19T19:15:56.928Z" },
-    { url = "https://files.pythonhosted.org/packages/4e/cf/6667e01f39cd28637f082273e9147f16d5f8fff34e2fb0ca60cc5da76e22/ray-2.49.2-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:b2f4f0fed936faf688e87ffdcc9356c034513c00259a2f1a8589e345fcfbdbc0", size = 69208426, upload-time = "2025-09-19T19:16:02.085Z" },
-    { url = "https://files.pythonhosted.org/packages/c5/84/5361bcdc9c9fb9f4abbf836801803b7df75c76c16a56493413eb154b8a34/ray-2.49.2-cp313-cp313-manylinux2014_aarch64.whl", hash = "sha256:b4c7869688c518e902f7b6288edec2365ab4d28a464291e6d0a7040c7d01b5f7", size = 69198140, upload-time = "2025-09-19T19:16:07.413Z" },
-    { url = "https://files.pythonhosted.org/packages/b0/0c/9e49c3da7502f18483e4deb3273a3104d501c5e9cf1664a136b8ea36df48/ray-2.49.2-cp313-cp313-manylinux2014_x86_64.whl", hash = "sha256:b7d8214cff86df044fec727eeeabccc3bfc9b0271d28d61ba92c09f0d127d01d", size = 70027331, upload-time = "2025-09-19T19:16:12.968Z" },
+    { url = "https://files.pythonhosted.org/packages/72/4b/8ded0ecb0ed08b75af47340fac4b14b15196a76a6d733f3945cc5cb77354/ray-2.51.1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:e8ce218c85e9f4043c37136fc90b41343bdb844fcdc9520f21c000d1d8d49f89", size = 68039113, upload-time = "2025-11-01T03:23:30.619Z" },
+    { url = "https://files.pythonhosted.org/packages/6d/a7/aba274bd1e1014cb232ee04548cc3d7aab9b84eb13c44d71b72d189421f9/ray-2.51.1-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:36feb519f31c52d3b4dbcd68ffb2baf93195ceec06ea711e21559096bab95fed", size = 70340511, upload-time = "2025-11-01T03:23:38.217Z" },
+    { url = "https://files.pythonhosted.org/packages/fa/42/a5712f4f8c911ea5b8b3cb406ceef18a1c1bc98490c66fa902cb72391af3/ray-2.51.1-cp310-cp310-manylinux2014_x86_64.whl", hash = "sha256:8a21f5914baa3deefcb4fa5f3878e03b589c190b864fe1b80e6dc0cbfba26004", size = 71166513, upload-time = "2025-11-01T03:23:44.123Z" },
+    { url = "https://files.pythonhosted.org/packages/91/1e/eeae1da4ffac6eeeeafce2d11c0b6133fd4df1b3e53bc44d61c30c05b6d9/ray-2.51.1-cp310-cp310-win_amd64.whl", hash = "sha256:a82417b89260ed751a76e9cfaef6d11392ab0da464cde1a9d07a0bb7dc272a7b", size = 26695587, upload-time = "2025-11-01T03:23:49.739Z" },
+    { url = "https://files.pythonhosted.org/packages/43/66/f1e11291d9fdf0634ea763cfb167cf449773d13918bb04390e6263b7129b/ray-2.51.1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:bd8211fc033be1bce9c039e474e97a9077be593020978fdcfba1d770bdc40ba5", size = 68043927, upload-time = "2025-11-01T03:23:59.655Z" },
+    { url = "https://files.pythonhosted.org/packages/be/89/9a11d0addbba6143f5a34929ed1fdef51159328b9b76a877c0c7f98b2848/ray-2.51.1-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:d2d7c8af45441ff50bc002352d31e0afec5c85dd5075bf527027178931497bce", size = 70460551, upload-time = "2025-11-01T03:24:05.77Z" },
+    { url = "https://files.pythonhosted.org/packages/f7/67/40a8d63e4cb3ff1a1a5a12db77ca655e21cb13f10e024a9513f24ed11d98/ray-2.51.1-cp311-cp311-manylinux2014_x86_64.whl", hash = "sha256:dd353010d2548bc345e46c45795f70291bb460c236aa6a3393b51a9cd861b56f", size = 71280610, upload-time = "2025-11-01T03:24:11.981Z" },
+    { url = "https://files.pythonhosted.org/packages/62/97/90bcfed6b8c986f9ea24def19bbb81480575dd5fa87630eeaa4c92652507/ray-2.51.1-cp311-cp311-win_amd64.whl", hash = "sha256:606c6e0733eb18fc307c9645ea84ccbd1aad8a5ba8bad764bed54b94e926d33c", size = 26691238, upload-time = "2025-11-01T03:24:16.978Z" },
+    { url = "https://files.pythonhosted.org/packages/f6/95/51e44ce79e42f02ca1c4d4c5501e6dd49f3a384c5f6324aceb4e0015988a/ray-2.51.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:ef847b025ca758baee4571a1ca001d973897cad772f8e95d7f303d24c38b649e", size = 68029226, upload-time = "2025-11-01T03:24:21.928Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/b5/a93e39e131067edb7cba3385a609f61aaaf7aa54728cd3a7474bfbf3b0fc/ray-2.51.1-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:0bed9408712bad1511e65683a455302f88d94e5e5cb6a58cc4a154b61d8a0b4a", size = 70502423, upload-time = "2025-11-01T03:24:27.398Z" },
+    { url = "https://files.pythonhosted.org/packages/ee/59/69b7a653ed8176fc7fd894d462ed34bb1477e7fa71700324de99179b5b7e/ray-2.51.1-cp312-cp312-manylinux2014_x86_64.whl", hash = "sha256:4e786da7862cf73664977d0212a505d6d5a585beadf63e7dc1e1c129259bee20", size = 71353730, upload-time = "2025-11-01T03:24:33.495Z" },
+    { url = "https://files.pythonhosted.org/packages/38/91/0c4fe7aed34baa14d9c050c88f39ff16083d555bd6dcd6c4ffb4332a6f8a/ray-2.51.1-cp312-cp312-win_amd64.whl", hash = "sha256:198fda93074a6863555f4003e9013bb2ba0cd50b59b18c02affdc294b28a2eef", size = 26674921, upload-time = "2025-11-01T03:24:38.394Z" },
+    { url = "https://files.pythonhosted.org/packages/65/1c/3ebf7277d8ae5f99150a5890bff4bdc627021e3a1be7caacd075d2996c7a/ray-2.51.1-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:d81547886435142dbd79bff1d4e4edf578a5f20e3b11bbd4ced49cfafbd37d27", size = 67974221, upload-time = "2025-11-01T03:24:44.118Z" },
+    { url = "https://files.pythonhosted.org/packages/f6/47/13ba6c4d0e97aff94dcf8537f2832d1101c2080a0aea5c973a4de1d4d8bd/ray-2.51.1-cp313-cp313-manylinux2014_aarch64.whl", hash = "sha256:3f2bd2acf9b7f4738c17d08592caaad26eafb7a4fc380ad9ab42d5f0a78f73ad", size = 70410610, upload-time = "2025-11-01T03:24:50.075Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/87/3cdf6d0504659d8192baa6576dd7a17ea395a4d969010274f7cc0e894281/ray-2.51.1-cp313-cp313-manylinux2014_x86_64.whl", hash = "sha256:265ecd6fd6d4a695b09c686e17d58fca0c09e7198c073628ae7bf4974b03e9ca", size = 71269225, upload-time = "2025-11-01T03:24:55.929Z" },
 ]
 
 [[package]]
@@ -4801,124 +4718,124 @@ wheels = [
 
 [[package]]
 name = "rpds-py"
-version = "0.28.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/48/dc/95f074d43452b3ef5d06276696ece4b3b5d696e7c9ad7173c54b1390cd70/rpds_py-0.28.0.tar.gz", hash = "sha256:abd4df20485a0983e2ca334a216249b6186d6e3c1627e106651943dbdb791aea", size = 27419, upload-time = "2025-10-22T22:24:29.327Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/82/f8/13bb772dc7cbf2c3c5b816febc34fa0cb2c64a08e0569869585684ce6631/rpds_py-0.28.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:7b6013db815417eeb56b2d9d7324e64fcd4fa289caeee6e7a78b2e11fc9b438a", size = 362820, upload-time = "2025-10-22T22:21:15.074Z" },
-    { url = "https://files.pythonhosted.org/packages/84/91/6acce964aab32469c3dbe792cb041a752d64739c534e9c493c701ef0c032/rpds_py-0.28.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1a4c6b05c685c0c03f80dabaeb73e74218c49deea965ca63f76a752807397207", size = 348499, upload-time = "2025-10-22T22:21:17.658Z" },
-    { url = "https://files.pythonhosted.org/packages/f1/93/c05bb1f4f5e0234db7c4917cb8dd5e2e0a9a7b26dc74b1b7bee3c9cfd477/rpds_py-0.28.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f4794c6c3fbe8f9ac87699b131a1f26e7b4abcf6d828da46a3a52648c7930eba", size = 379356, upload-time = "2025-10-22T22:21:19.847Z" },
-    { url = "https://files.pythonhosted.org/packages/5c/37/e292da436f0773e319753c567263427cdf6c645d30b44f09463ff8216cda/rpds_py-0.28.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2e8456b6ee5527112ff2354dd9087b030e3429e43a74f480d4a5ca79d269fd85", size = 390151, upload-time = "2025-10-22T22:21:21.569Z" },
-    { url = "https://files.pythonhosted.org/packages/76/87/a4e3267131616e8faf10486dc00eaedf09bd61c87f01e5ef98e782ee06c9/rpds_py-0.28.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:beb880a9ca0a117415f241f66d56025c02037f7c4efc6fe59b5b8454f1eaa50d", size = 524831, upload-time = "2025-10-22T22:21:23.394Z" },
-    { url = "https://files.pythonhosted.org/packages/e1/c8/4a4ca76f0befae9515da3fad11038f0fce44f6bb60b21fe9d9364dd51fb0/rpds_py-0.28.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6897bebb118c44b38c9cb62a178e09f1593c949391b9a1a6fe777ccab5934ee7", size = 404687, upload-time = "2025-10-22T22:21:25.201Z" },
-    { url = "https://files.pythonhosted.org/packages/6a/65/118afe854424456beafbbebc6b34dcf6d72eae3a08b4632bc4220f8240d9/rpds_py-0.28.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b1b553dd06e875249fd43efd727785efb57a53180e0fde321468222eabbeaafa", size = 382683, upload-time = "2025-10-22T22:21:26.536Z" },
-    { url = "https://files.pythonhosted.org/packages/f7/bc/0625064041fb3a0c77ecc8878c0e8341b0ae27ad0f00cf8f2b57337a1e63/rpds_py-0.28.0-cp310-cp310-manylinux_2_31_riscv64.whl", hash = "sha256:f0b2044fdddeea5b05df832e50d2a06fe61023acb44d76978e1b060206a8a476", size = 398927, upload-time = "2025-10-22T22:21:27.864Z" },
-    { url = "https://files.pythonhosted.org/packages/5d/1a/fed7cf2f1ee8a5e4778f2054153f2cfcf517748875e2f5b21cf8907cd77d/rpds_py-0.28.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:05cf1e74900e8da73fa08cc76c74a03345e5a3e37691d07cfe2092d7d8e27b04", size = 411590, upload-time = "2025-10-22T22:21:29.474Z" },
-    { url = "https://files.pythonhosted.org/packages/c1/64/a8e0f67fa374a6c472dbb0afdaf1ef744724f165abb6899f20e2f1563137/rpds_py-0.28.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:efd489fec7c311dae25e94fe7eeda4b3d06be71c68f2cf2e8ef990ffcd2cd7e8", size = 559843, upload-time = "2025-10-22T22:21:30.917Z" },
-    { url = "https://files.pythonhosted.org/packages/a9/ea/e10353f6d7c105be09b8135b72787a65919971ae0330ad97d87e4e199880/rpds_py-0.28.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:ada7754a10faacd4f26067e62de52d6af93b6d9542f0df73c57b9771eb3ba9c4", size = 584188, upload-time = "2025-10-22T22:21:32.827Z" },
-    { url = "https://files.pythonhosted.org/packages/18/b0/a19743e0763caf0c89f6fc6ba6fbd9a353b24ffb4256a492420c5517da5a/rpds_py-0.28.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:c2a34fd26588949e1e7977cfcbb17a9a42c948c100cab890c6d8d823f0586457", size = 550052, upload-time = "2025-10-22T22:21:34.702Z" },
-    { url = "https://files.pythonhosted.org/packages/de/bc/ec2c004f6c7d6ab1e25dae875cdb1aee087c3ebed5b73712ed3000e3851a/rpds_py-0.28.0-cp310-cp310-win32.whl", hash = "sha256:f9174471d6920cbc5e82a7822de8dfd4dcea86eb828b04fc8c6519a77b0ee51e", size = 215110, upload-time = "2025-10-22T22:21:36.645Z" },
-    { url = "https://files.pythonhosted.org/packages/6c/de/4ce8abf59674e17187023933547d2018363e8fc76ada4f1d4d22871ccb6e/rpds_py-0.28.0-cp310-cp310-win_amd64.whl", hash = "sha256:6e32dd207e2c4f8475257a3540ab8a93eff997abfa0a3fdb287cae0d6cd874b8", size = 223850, upload-time = "2025-10-22T22:21:38.006Z" },
-    { url = "https://files.pythonhosted.org/packages/a6/34/058d0db5471c6be7bef82487ad5021ff8d1d1d27794be8730aad938649cf/rpds_py-0.28.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:03065002fd2e287725d95fbc69688e0c6daf6c6314ba38bdbaa3895418e09296", size = 362344, upload-time = "2025-10-22T22:21:39.713Z" },
-    { url = "https://files.pythonhosted.org/packages/5d/67/9503f0ec8c055a0782880f300c50a2b8e5e72eb1f94dfc2053da527444dd/rpds_py-0.28.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:28ea02215f262b6d078daec0b45344c89e161eab9526b0d898221d96fdda5f27", size = 348440, upload-time = "2025-10-22T22:21:41.056Z" },
-    { url = "https://files.pythonhosted.org/packages/68/2e/94223ee9b32332a41d75b6f94b37b4ce3e93878a556fc5f152cbd856a81f/rpds_py-0.28.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25dbade8fbf30bcc551cb352376c0ad64b067e4fc56f90e22ba70c3ce205988c", size = 379068, upload-time = "2025-10-22T22:21:42.593Z" },
-    { url = "https://files.pythonhosted.org/packages/b4/25/54fd48f9f680cfc44e6a7f39a5fadf1d4a4a1fd0848076af4a43e79f998c/rpds_py-0.28.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3c03002f54cc855860bfdc3442928ffdca9081e73b5b382ed0b9e8efe6e5e205", size = 390518, upload-time = "2025-10-22T22:21:43.998Z" },
-    { url = "https://files.pythonhosted.org/packages/1b/85/ac258c9c27f2ccb1bd5d0697e53a82ebcf8088e3186d5d2bf8498ee7ed44/rpds_py-0.28.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b9699fa7990368b22032baf2b2dce1f634388e4ffc03dfefaaac79f4695edc95", size = 525319, upload-time = "2025-10-22T22:21:45.645Z" },
-    { url = "https://files.pythonhosted.org/packages/40/cb/c6734774789566d46775f193964b76627cd5f42ecf246d257ce84d1912ed/rpds_py-0.28.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b9b06fe1a75e05e0713f06ea0c89ecb6452210fd60e2f1b6ddc1067b990e08d9", size = 404896, upload-time = "2025-10-22T22:21:47.544Z" },
-    { url = "https://files.pythonhosted.org/packages/1f/53/14e37ce83202c632c89b0691185dca9532288ff9d390eacae3d2ff771bae/rpds_py-0.28.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ac9f83e7b326a3f9ec3ef84cda98fb0a74c7159f33e692032233046e7fd15da2", size = 382862, upload-time = "2025-10-22T22:21:49.176Z" },
-    { url = "https://files.pythonhosted.org/packages/6a/83/f3642483ca971a54d60caa4449f9d6d4dbb56a53e0072d0deff51b38af74/rpds_py-0.28.0-cp311-cp311-manylinux_2_31_riscv64.whl", hash = "sha256:0d3259ea9ad8743a75a43eb7819324cdab393263c91be86e2d1901ee65c314e0", size = 398848, upload-time = "2025-10-22T22:21:51.024Z" },
-    { url = "https://files.pythonhosted.org/packages/44/09/2d9c8b2f88e399b4cfe86efdf2935feaf0394e4f14ab30c6c5945d60af7d/rpds_py-0.28.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9a7548b345f66f6695943b4ef6afe33ccd3f1b638bd9afd0f730dd255c249c9e", size = 412030, upload-time = "2025-10-22T22:21:52.665Z" },
-    { url = "https://files.pythonhosted.org/packages/dd/f5/e1cec473d4bde6df1fd3738be8e82d64dd0600868e76e92dfeaebbc2d18f/rpds_py-0.28.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c9a40040aa388b037eb39416710fbcce9443498d2eaab0b9b45ae988b53f5c67", size = 559700, upload-time = "2025-10-22T22:21:54.123Z" },
-    { url = "https://files.pythonhosted.org/packages/8d/be/73bb241c1649edbf14e98e9e78899c2c5e52bbe47cb64811f44d2cc11808/rpds_py-0.28.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:8f60c7ea34e78c199acd0d3cda37a99be2c861dd2b8cf67399784f70c9f8e57d", size = 584581, upload-time = "2025-10-22T22:21:56.102Z" },
-    { url = "https://files.pythonhosted.org/packages/9c/9c/ffc6e9218cd1eb5c2c7dbd276c87cd10e8c2232c456b554169eb363381df/rpds_py-0.28.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:1571ae4292649100d743b26d5f9c63503bb1fedf538a8f29a98dce2d5ba6b4e6", size = 549981, upload-time = "2025-10-22T22:21:58.253Z" },
-    { url = "https://files.pythonhosted.org/packages/5f/50/da8b6d33803a94df0149345ee33e5d91ed4d25fc6517de6a25587eae4133/rpds_py-0.28.0-cp311-cp311-win32.whl", hash = "sha256:5cfa9af45e7c1140af7321fa0bef25b386ee9faa8928c80dc3a5360971a29e8c", size = 214729, upload-time = "2025-10-22T22:21:59.625Z" },
-    { url = "https://files.pythonhosted.org/packages/12/fd/b0f48c4c320ee24c8c20df8b44acffb7353991ddf688af01eef5f93d7018/rpds_py-0.28.0-cp311-cp311-win_amd64.whl", hash = "sha256:dd8d86b5d29d1b74100982424ba53e56033dc47720a6de9ba0259cf81d7cecaa", size = 223977, upload-time = "2025-10-22T22:22:01.092Z" },
-    { url = "https://files.pythonhosted.org/packages/b4/21/c8e77a2ac66e2ec4e21f18a04b4e9a0417ecf8e61b5eaeaa9360a91713b4/rpds_py-0.28.0-cp311-cp311-win_arm64.whl", hash = "sha256:4e27d3a5709cc2b3e013bf93679a849213c79ae0573f9b894b284b55e729e120", size = 217326, upload-time = "2025-10-22T22:22:02.944Z" },
-    { url = "https://files.pythonhosted.org/packages/b8/5c/6c3936495003875fe7b14f90ea812841a08fca50ab26bd840e924097d9c8/rpds_py-0.28.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:6b4f28583a4f247ff60cd7bdda83db8c3f5b05a7a82ff20dd4b078571747708f", size = 366439, upload-time = "2025-10-22T22:22:04.525Z" },
-    { url = "https://files.pythonhosted.org/packages/56/f9/a0f1ca194c50aa29895b442771f036a25b6c41a35e4f35b1a0ea713bedae/rpds_py-0.28.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d678e91b610c29c4b3d52a2c148b641df2b4676ffe47c59f6388d58b99cdc424", size = 348170, upload-time = "2025-10-22T22:22:06.397Z" },
-    { url = "https://files.pythonhosted.org/packages/18/ea/42d243d3a586beb72c77fa5def0487daf827210069a95f36328e869599ea/rpds_py-0.28.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e819e0e37a44a78e1383bf1970076e2ccc4dc8c2bbaa2f9bd1dc987e9afff628", size = 378838, upload-time = "2025-10-22T22:22:07.932Z" },
-    { url = "https://files.pythonhosted.org/packages/e7/78/3de32e18a94791af8f33601402d9d4f39613136398658412a4e0b3047327/rpds_py-0.28.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5ee514e0f0523db5d3fb171f397c54875dbbd69760a414dccf9d4d7ad628b5bd", size = 393299, upload-time = "2025-10-22T22:22:09.435Z" },
-    { url = "https://files.pythonhosted.org/packages/13/7e/4bdb435afb18acea2eb8a25ad56b956f28de7c59f8a1d32827effa0d4514/rpds_py-0.28.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5f3fa06d27fdcee47f07a39e02862da0100cb4982508f5ead53ec533cd5fe55e", size = 518000, upload-time = "2025-10-22T22:22:11.326Z" },
-    { url = "https://files.pythonhosted.org/packages/31/d0/5f52a656875cdc60498ab035a7a0ac8f399890cc1ee73ebd567bac4e39ae/rpds_py-0.28.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:46959ef2e64f9e4a41fc89aa20dbca2b85531f9a72c21099a3360f35d10b0d5a", size = 408746, upload-time = "2025-10-22T22:22:13.143Z" },
-    { url = "https://files.pythonhosted.org/packages/3e/cd/49ce51767b879cde77e7ad9fae164ea15dce3616fe591d9ea1df51152706/rpds_py-0.28.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8455933b4bcd6e83fde3fefc987a023389c4b13f9a58c8d23e4b3f6d13f78c84", size = 386379, upload-time = "2025-10-22T22:22:14.602Z" },
-    { url = "https://files.pythonhosted.org/packages/6a/99/e4e1e1ee93a98f72fc450e36c0e4d99c35370220e815288e3ecd2ec36a2a/rpds_py-0.28.0-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:ad50614a02c8c2962feebe6012b52f9802deec4263946cddea37aaf28dd25a66", size = 401280, upload-time = "2025-10-22T22:22:16.063Z" },
-    { url = "https://files.pythonhosted.org/packages/61/35/e0c6a57488392a8b319d2200d03dad2b29c0db9996f5662c3b02d0b86c02/rpds_py-0.28.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e5deca01b271492553fdb6c7fd974659dce736a15bae5dad7ab8b93555bceb28", size = 412365, upload-time = "2025-10-22T22:22:17.504Z" },
-    { url = "https://files.pythonhosted.org/packages/ff/6a/841337980ea253ec797eb084665436007a1aad0faac1ba097fb906c5f69c/rpds_py-0.28.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:735f8495a13159ce6a0d533f01e8674cec0c57038c920495f87dcb20b3ddb48a", size = 559573, upload-time = "2025-10-22T22:22:19.108Z" },
-    { url = "https://files.pythonhosted.org/packages/e7/5e/64826ec58afd4c489731f8b00729c5f6afdb86f1df1df60bfede55d650bb/rpds_py-0.28.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:961ca621ff10d198bbe6ba4957decca61aa2a0c56695384c1d6b79bf61436df5", size = 583973, upload-time = "2025-10-22T22:22:20.768Z" },
-    { url = "https://files.pythonhosted.org/packages/b6/ee/44d024b4843f8386a4eeaa4c171b3d31d55f7177c415545fd1a24c249b5d/rpds_py-0.28.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:2374e16cc9131022e7d9a8f8d65d261d9ba55048c78f3b6e017971a4f5e6353c", size = 553800, upload-time = "2025-10-22T22:22:22.25Z" },
-    { url = "https://files.pythonhosted.org/packages/7d/89/33e675dccff11a06d4d85dbb4d1865f878d5020cbb69b2c1e7b2d3f82562/rpds_py-0.28.0-cp312-cp312-win32.whl", hash = "sha256:d15431e334fba488b081d47f30f091e5d03c18527c325386091f31718952fe08", size = 216954, upload-time = "2025-10-22T22:22:24.105Z" },
-    { url = "https://files.pythonhosted.org/packages/af/36/45f6ebb3210887e8ee6dbf1bc710ae8400bb417ce165aaf3024b8360d999/rpds_py-0.28.0-cp312-cp312-win_amd64.whl", hash = "sha256:a410542d61fc54710f750d3764380b53bf09e8c4edbf2f9141a82aa774a04f7c", size = 227844, upload-time = "2025-10-22T22:22:25.551Z" },
-    { url = "https://files.pythonhosted.org/packages/57/91/f3fb250d7e73de71080f9a221d19bd6a1c1eb0d12a1ea26513f6c1052ad6/rpds_py-0.28.0-cp312-cp312-win_arm64.whl", hash = "sha256:1f0cfd1c69e2d14f8c892b893997fa9a60d890a0c8a603e88dca4955f26d1edd", size = 217624, upload-time = "2025-10-22T22:22:26.914Z" },
-    { url = "https://files.pythonhosted.org/packages/d3/03/ce566d92611dfac0085c2f4b048cd53ed7c274a5c05974b882a908d540a2/rpds_py-0.28.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:e9e184408a0297086f880556b6168fa927d677716f83d3472ea333b42171ee3b", size = 366235, upload-time = "2025-10-22T22:22:28.397Z" },
-    { url = "https://files.pythonhosted.org/packages/00/34/1c61da1b25592b86fd285bd7bd8422f4c9d748a7373b46126f9ae792a004/rpds_py-0.28.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:edd267266a9b0448f33dc465a97cfc5d467594b600fe28e7fa2f36450e03053a", size = 348241, upload-time = "2025-10-22T22:22:30.171Z" },
-    { url = "https://files.pythonhosted.org/packages/fc/00/ed1e28616848c61c493a067779633ebf4b569eccaacf9ccbdc0e7cba2b9d/rpds_py-0.28.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:85beb8b3f45e4e32f6802fb6cd6b17f615ef6c6a52f265371fb916fae02814aa", size = 378079, upload-time = "2025-10-22T22:22:31.644Z" },
-    { url = "https://files.pythonhosted.org/packages/11/b2/ccb30333a16a470091b6e50289adb4d3ec656fd9951ba8c5e3aaa0746a67/rpds_py-0.28.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d2412be8d00a1b895f8ad827cc2116455196e20ed994bb704bf138fe91a42724", size = 393151, upload-time = "2025-10-22T22:22:33.453Z" },
-    { url = "https://files.pythonhosted.org/packages/8c/d0/73e2217c3ee486d555cb84920597480627d8c0240ff3062005c6cc47773e/rpds_py-0.28.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cf128350d384b777da0e68796afdcebc2e9f63f0e9f242217754e647f6d32491", size = 517520, upload-time = "2025-10-22T22:22:34.949Z" },
-    { url = "https://files.pythonhosted.org/packages/c4/91/23efe81c700427d0841a4ae7ea23e305654381831e6029499fe80be8a071/rpds_py-0.28.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a2036d09b363aa36695d1cc1a97b36865597f4478470b0697b5ee9403f4fe399", size = 408699, upload-time = "2025-10-22T22:22:36.584Z" },
-    { url = "https://files.pythonhosted.org/packages/ca/ee/a324d3198da151820a326c1f988caaa4f37fc27955148a76fff7a2d787a9/rpds_py-0.28.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8e1e9be4fa6305a16be628959188e4fd5cd6f1b0e724d63c6d8b2a8adf74ea6", size = 385720, upload-time = "2025-10-22T22:22:38.014Z" },
-    { url = "https://files.pythonhosted.org/packages/19/ad/e68120dc05af8b7cab4a789fccd8cdcf0fe7e6581461038cc5c164cd97d2/rpds_py-0.28.0-cp313-cp313-manylinux_2_31_riscv64.whl", hash = "sha256:0a403460c9dd91a7f23fc3188de6d8977f1d9603a351d5db6cf20aaea95b538d", size = 401096, upload-time = "2025-10-22T22:22:39.869Z" },
-    { url = "https://files.pythonhosted.org/packages/99/90/c1e070620042459d60df6356b666bb1f62198a89d68881816a7ed121595a/rpds_py-0.28.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d7366b6553cdc805abcc512b849a519167db8f5e5c3472010cd1228b224265cb", size = 411465, upload-time = "2025-10-22T22:22:41.395Z" },
-    { url = "https://files.pythonhosted.org/packages/68/61/7c195b30d57f1b8d5970f600efee72a4fad79ec829057972e13a0370fd24/rpds_py-0.28.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:5b43c6a3726efd50f18d8120ec0551241c38785b68952d240c45ea553912ac41", size = 558832, upload-time = "2025-10-22T22:22:42.871Z" },
-    { url = "https://files.pythonhosted.org/packages/b0/3d/06f3a718864773f69941d4deccdf18e5e47dd298b4628062f004c10f3b34/rpds_py-0.28.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:0cb7203c7bc69d7c1585ebb33a2e6074492d2fc21ad28a7b9d40457ac2a51ab7", size = 583230, upload-time = "2025-10-22T22:22:44.877Z" },
-    { url = "https://files.pythonhosted.org/packages/66/df/62fc783781a121e77fee9a21ead0a926f1b652280a33f5956a5e7833ed30/rpds_py-0.28.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:7a52a5169c664dfb495882adc75c304ae1d50df552fbd68e100fdc719dee4ff9", size = 553268, upload-time = "2025-10-22T22:22:46.441Z" },
-    { url = "https://files.pythonhosted.org/packages/84/85/d34366e335140a4837902d3dea89b51f087bd6a63c993ebdff59e93ee61d/rpds_py-0.28.0-cp313-cp313-win32.whl", hash = "sha256:2e42456917b6687215b3e606ab46aa6bca040c77af7df9a08a6dcfe8a4d10ca5", size = 217100, upload-time = "2025-10-22T22:22:48.342Z" },
-    { url = "https://files.pythonhosted.org/packages/3c/1c/f25a3f3752ad7601476e3eff395fe075e0f7813fbb9862bd67c82440e880/rpds_py-0.28.0-cp313-cp313-win_amd64.whl", hash = "sha256:e0a0311caedc8069d68fc2bf4c9019b58a2d5ce3cd7cb656c845f1615b577e1e", size = 227759, upload-time = "2025-10-22T22:22:50.219Z" },
-    { url = "https://files.pythonhosted.org/packages/e0/d6/5f39b42b99615b5bc2f36ab90423ea404830bdfee1c706820943e9a645eb/rpds_py-0.28.0-cp313-cp313-win_arm64.whl", hash = "sha256:04c1b207ab8b581108801528d59ad80aa83bb170b35b0ddffb29c20e411acdc1", size = 217326, upload-time = "2025-10-22T22:22:51.647Z" },
-    { url = "https://files.pythonhosted.org/packages/5c/8b/0c69b72d1cee20a63db534be0df271effe715ef6c744fdf1ff23bb2b0b1c/rpds_py-0.28.0-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:f296ea3054e11fc58ad42e850e8b75c62d9a93a9f981ad04b2e5ae7d2186ff9c", size = 355736, upload-time = "2025-10-22T22:22:53.211Z" },
-    { url = "https://files.pythonhosted.org/packages/f7/6d/0c2ee773cfb55c31a8514d2cece856dd299170a49babd50dcffb15ddc749/rpds_py-0.28.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:5a7306c19b19005ad98468fcefeb7100b19c79fc23a5f24a12e06d91181193fa", size = 342677, upload-time = "2025-10-22T22:22:54.723Z" },
-    { url = "https://files.pythonhosted.org/packages/e2/1c/22513ab25a27ea205144414724743e305e8153e6abe81833b5e678650f5a/rpds_py-0.28.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e5d9b86aa501fed9862a443c5c3116f6ead8bc9296185f369277c42542bd646b", size = 371847, upload-time = "2025-10-22T22:22:56.295Z" },
-    { url = "https://files.pythonhosted.org/packages/60/07/68e6ccdb4b05115ffe61d31afc94adef1833d3a72f76c9632d4d90d67954/rpds_py-0.28.0-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e5bbc701eff140ba0e872691d573b3d5d30059ea26e5785acba9132d10c8c31d", size = 381800, upload-time = "2025-10-22T22:22:57.808Z" },
-    { url = "https://files.pythonhosted.org/packages/73/bf/6d6d15df80781d7f9f368e7c1a00caf764436518c4877fb28b029c4624af/rpds_py-0.28.0-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9a5690671cd672a45aa8616d7374fdf334a1b9c04a0cac3c854b1136e92374fe", size = 518827, upload-time = "2025-10-22T22:22:59.826Z" },
-    { url = "https://files.pythonhosted.org/packages/7b/d3/2decbb2976cc452cbf12a2b0aaac5f1b9dc5dd9d1f7e2509a3ee00421249/rpds_py-0.28.0-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9f1d92ecea4fa12f978a367c32a5375a1982834649cdb96539dcdc12e609ab1a", size = 399471, upload-time = "2025-10-22T22:23:01.968Z" },
-    { url = "https://files.pythonhosted.org/packages/b1/2c/f30892f9e54bd02e5faca3f6a26d6933c51055e67d54818af90abed9748e/rpds_py-0.28.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8d252db6b1a78d0a3928b6190156042d54c93660ce4d98290d7b16b5296fb7cc", size = 377578, upload-time = "2025-10-22T22:23:03.52Z" },
-    { url = "https://files.pythonhosted.org/packages/f0/5d/3bce97e5534157318f29ac06bf2d279dae2674ec12f7cb9c12739cee64d8/rpds_py-0.28.0-cp313-cp313t-manylinux_2_31_riscv64.whl", hash = "sha256:d61b355c3275acb825f8777d6c4505f42b5007e357af500939d4a35b19177259", size = 390482, upload-time = "2025-10-22T22:23:05.391Z" },
-    { url = "https://files.pythonhosted.org/packages/e3/f0/886bd515ed457b5bd93b166175edb80a0b21a210c10e993392127f1e3931/rpds_py-0.28.0-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:acbe5e8b1026c0c580d0321c8aae4b0a1e1676861d48d6e8c6586625055b606a", size = 402447, upload-time = "2025-10-22T22:23:06.93Z" },
-    { url = "https://files.pythonhosted.org/packages/42/b5/71e8777ac55e6af1f4f1c05b47542a1eaa6c33c1cf0d300dca6a1c6e159a/rpds_py-0.28.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:8aa23b6f0fc59b85b4c7d89ba2965af274346f738e8d9fc2455763602e62fd5f", size = 552385, upload-time = "2025-10-22T22:23:08.557Z" },
-    { url = "https://files.pythonhosted.org/packages/5d/cb/6ca2d70cbda5a8e36605e7788c4aa3bea7c17d71d213465a5a675079b98d/rpds_py-0.28.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:7b14b0c680286958817c22d76fcbca4800ddacef6f678f3a7c79a1fe7067fe37", size = 575642, upload-time = "2025-10-22T22:23:10.348Z" },
-    { url = "https://files.pythonhosted.org/packages/4a/d4/407ad9960ca7856d7b25c96dcbe019270b5ffdd83a561787bc682c797086/rpds_py-0.28.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:bcf1d210dfee61a6c86551d67ee1031899c0fdbae88b2d44a569995d43797712", size = 544507, upload-time = "2025-10-22T22:23:12.434Z" },
-    { url = "https://files.pythonhosted.org/packages/51/31/2f46fe0efcac23fbf5797c6b6b7e1c76f7d60773e525cb65fcbc582ee0f2/rpds_py-0.28.0-cp313-cp313t-win32.whl", hash = "sha256:3aa4dc0fdab4a7029ac63959a3ccf4ed605fee048ba67ce89ca3168da34a1342", size = 205376, upload-time = "2025-10-22T22:23:13.979Z" },
-    { url = "https://files.pythonhosted.org/packages/92/e4/15947bda33cbedfc134490a41841ab8870a72a867a03d4969d886f6594a2/rpds_py-0.28.0-cp313-cp313t-win_amd64.whl", hash = "sha256:7b7d9d83c942855e4fdcfa75d4f96f6b9e272d42fffcb72cd4bb2577db2e2907", size = 215907, upload-time = "2025-10-22T22:23:15.5Z" },
-    { url = "https://files.pythonhosted.org/packages/08/47/ffe8cd7a6a02833b10623bf765fbb57ce977e9a4318ca0e8cf97e9c3d2b3/rpds_py-0.28.0-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:dcdcb890b3ada98a03f9f2bb108489cdc7580176cb73b4f2d789e9a1dac1d472", size = 353830, upload-time = "2025-10-22T22:23:17.03Z" },
-    { url = "https://files.pythonhosted.org/packages/f9/9f/890f36cbd83a58491d0d91ae0db1702639edb33fb48eeb356f80ecc6b000/rpds_py-0.28.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:f274f56a926ba2dc02976ca5b11c32855cbd5925534e57cfe1fda64e04d1add2", size = 341819, upload-time = "2025-10-22T22:23:18.57Z" },
-    { url = "https://files.pythonhosted.org/packages/09/e3/921eb109f682aa24fb76207698fbbcf9418738f35a40c21652c29053f23d/rpds_py-0.28.0-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4fe0438ac4a29a520ea94c8c7f1754cdd8feb1bc490dfda1bfd990072363d527", size = 373127, upload-time = "2025-10-22T22:23:20.216Z" },
-    { url = "https://files.pythonhosted.org/packages/23/13/bce4384d9f8f4989f1a9599c71b7a2d877462e5fd7175e1f69b398f729f4/rpds_py-0.28.0-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8a358a32dd3ae50e933347889b6af9a1bdf207ba5d1a3f34e1a38cd3540e6733", size = 382767, upload-time = "2025-10-22T22:23:21.787Z" },
-    { url = "https://files.pythonhosted.org/packages/23/e1/579512b2d89a77c64ccef5a0bc46a6ef7f72ae0cf03d4b26dcd52e57ee0a/rpds_py-0.28.0-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e80848a71c78aa328fefaba9c244d588a342c8e03bda518447b624ea64d1ff56", size = 517585, upload-time = "2025-10-22T22:23:23.699Z" },
-    { url = "https://files.pythonhosted.org/packages/62/3c/ca704b8d324a2591b0b0adcfcaadf9c862375b11f2f667ac03c61b4fd0a6/rpds_py-0.28.0-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f586db2e209d54fe177e58e0bc4946bea5fb0102f150b1b2f13de03e1f0976f8", size = 399828, upload-time = "2025-10-22T22:23:25.713Z" },
-    { url = "https://files.pythonhosted.org/packages/da/37/e84283b9e897e3adc46b4c88bb3f6ec92a43bd4d2f7ef5b13459963b2e9c/rpds_py-0.28.0-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ae8ee156d6b586e4292491e885d41483136ab994e719a13458055bec14cf370", size = 375509, upload-time = "2025-10-22T22:23:27.32Z" },
-    { url = "https://files.pythonhosted.org/packages/1a/c2/a980beab869d86258bf76ec42dec778ba98151f253a952b02fe36d72b29c/rpds_py-0.28.0-cp314-cp314-manylinux_2_31_riscv64.whl", hash = "sha256:a805e9b3973f7e27f7cab63a6b4f61d90f2e5557cff73b6e97cd5b8540276d3d", size = 392014, upload-time = "2025-10-22T22:23:29.332Z" },
-    { url = "https://files.pythonhosted.org/packages/da/b5/b1d3c5f9d3fa5aeef74265f9c64de3c34a0d6d5cd3c81c8b17d5c8f10ed4/rpds_py-0.28.0-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5d3fd16b6dc89c73a4da0b4ac8b12a7ecc75b2864b95c9e5afed8003cb50a728", size = 402410, upload-time = "2025-10-22T22:23:31.14Z" },
-    { url = "https://files.pythonhosted.org/packages/74/ae/cab05ff08dfcc052afc73dcb38cbc765ffc86f94e966f3924cd17492293c/rpds_py-0.28.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:6796079e5d24fdaba6d49bda28e2c47347e89834678f2bc2c1b4fc1489c0fb01", size = 553593, upload-time = "2025-10-22T22:23:32.834Z" },
-    { url = "https://files.pythonhosted.org/packages/70/80/50d5706ea2a9bfc9e9c5f401d91879e7c790c619969369800cde202da214/rpds_py-0.28.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:76500820c2af232435cbe215e3324c75b950a027134e044423f59f5b9a1ba515", size = 576925, upload-time = "2025-10-22T22:23:34.47Z" },
-    { url = "https://files.pythonhosted.org/packages/ab/12/85a57d7a5855a3b188d024b099fd09c90db55d32a03626d0ed16352413ff/rpds_py-0.28.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:bbdc5640900a7dbf9dd707fe6388972f5bbd883633eb68b76591044cfe346f7e", size = 542444, upload-time = "2025-10-22T22:23:36.093Z" },
-    { url = "https://files.pythonhosted.org/packages/6c/65/10643fb50179509150eb94d558e8837c57ca8b9adc04bd07b98e57b48f8c/rpds_py-0.28.0-cp314-cp314-win32.whl", hash = "sha256:adc8aa88486857d2b35d75f0640b949759f79dc105f50aa2c27816b2e0dd749f", size = 207968, upload-time = "2025-10-22T22:23:37.638Z" },
-    { url = "https://files.pythonhosted.org/packages/b4/84/0c11fe4d9aaea784ff4652499e365963222481ac647bcd0251c88af646eb/rpds_py-0.28.0-cp314-cp314-win_amd64.whl", hash = "sha256:66e6fa8e075b58946e76a78e69e1a124a21d9a48a5b4766d15ba5b06869d1fa1", size = 218876, upload-time = "2025-10-22T22:23:39.179Z" },
-    { url = "https://files.pythonhosted.org/packages/0f/e0/3ab3b86ded7bb18478392dc3e835f7b754cd446f62f3fc96f4fe2aca78f6/rpds_py-0.28.0-cp314-cp314-win_arm64.whl", hash = "sha256:a6fe887c2c5c59413353b7c0caff25d0e566623501ccfff88957fa438a69377d", size = 212506, upload-time = "2025-10-22T22:23:40.755Z" },
-    { url = "https://files.pythonhosted.org/packages/51/ec/d5681bb425226c3501eab50fc30e9d275de20c131869322c8a1729c7b61c/rpds_py-0.28.0-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:7a69df082db13c7070f7b8b1f155fa9e687f1d6aefb7b0e3f7231653b79a067b", size = 355433, upload-time = "2025-10-22T22:23:42.259Z" },
-    { url = "https://files.pythonhosted.org/packages/be/ec/568c5e689e1cfb1ea8b875cffea3649260955f677fdd7ddc6176902d04cd/rpds_py-0.28.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:b1cde22f2c30ebb049a9e74c5374994157b9b70a16147d332f89c99c5960737a", size = 342601, upload-time = "2025-10-22T22:23:44.372Z" },
-    { url = "https://files.pythonhosted.org/packages/32/fe/51ada84d1d2a1d9d8f2c902cfddd0133b4a5eb543196ab5161d1c07ed2ad/rpds_py-0.28.0-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5338742f6ba7a51012ea470bd4dc600a8c713c0c72adaa0977a1b1f4327d6592", size = 372039, upload-time = "2025-10-22T22:23:46.025Z" },
-    { url = "https://files.pythonhosted.org/packages/07/c1/60144a2f2620abade1a78e0d91b298ac2d9b91bc08864493fa00451ef06e/rpds_py-0.28.0-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e1460ebde1bcf6d496d80b191d854adedcc619f84ff17dc1c6d550f58c9efbba", size = 382407, upload-time = "2025-10-22T22:23:48.098Z" },
-    { url = "https://files.pythonhosted.org/packages/45/ed/091a7bbdcf4038a60a461df50bc4c82a7ed6d5d5e27649aab61771c17585/rpds_py-0.28.0-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e3eb248f2feba84c692579257a043a7699e28a77d86c77b032c1d9fbb3f0219c", size = 518172, upload-time = "2025-10-22T22:23:50.16Z" },
-    { url = "https://files.pythonhosted.org/packages/54/dd/02cc90c2fd9c2ef8016fd7813bfacd1c3a1325633ec8f244c47b449fc868/rpds_py-0.28.0-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bd3bbba5def70b16cd1c1d7255666aad3b290fbf8d0fe7f9f91abafb73611a91", size = 399020, upload-time = "2025-10-22T22:23:51.81Z" },
-    { url = "https://files.pythonhosted.org/packages/ab/81/5d98cc0329bbb911ccecd0b9e19fbf7f3a5de8094b4cda5e71013b2dd77e/rpds_py-0.28.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3114f4db69ac5a1f32e7e4d1cbbe7c8f9cf8217f78e6e002cedf2d54c2a548ed", size = 377451, upload-time = "2025-10-22T22:23:53.711Z" },
-    { url = "https://files.pythonhosted.org/packages/b4/07/4d5bcd49e3dfed2d38e2dcb49ab6615f2ceb9f89f5a372c46dbdebb4e028/rpds_py-0.28.0-cp314-cp314t-manylinux_2_31_riscv64.whl", hash = "sha256:4b0cb8a906b1a0196b863d460c0222fb8ad0f34041568da5620f9799b83ccf0b", size = 390355, upload-time = "2025-10-22T22:23:55.299Z" },
-    { url = "https://files.pythonhosted.org/packages/3f/79/9f14ba9010fee74e4f40bf578735cfcbb91d2e642ffd1abe429bb0b96364/rpds_py-0.28.0-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:cf681ac76a60b667106141e11a92a3330890257e6f559ca995fbb5265160b56e", size = 403146, upload-time = "2025-10-22T22:23:56.929Z" },
-    { url = "https://files.pythonhosted.org/packages/39/4c/f08283a82ac141331a83a40652830edd3a4a92c34e07e2bbe00baaea2f5f/rpds_py-0.28.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:1e8ee6413cfc677ce8898d9cde18cc3a60fc2ba756b0dec5b71eb6eb21c49fa1", size = 552656, upload-time = "2025-10-22T22:23:58.62Z" },
-    { url = "https://files.pythonhosted.org/packages/61/47/d922fc0666f0dd8e40c33990d055f4cc6ecff6f502c2d01569dbed830f9b/rpds_py-0.28.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:b3072b16904d0b5572a15eb9d31c1954e0d3227a585fc1351aa9878729099d6c", size = 576782, upload-time = "2025-10-22T22:24:00.312Z" },
-    { url = "https://files.pythonhosted.org/packages/d3/0c/5bafdd8ccf6aa9d3bfc630cfece457ff5b581af24f46a9f3590f790e3df2/rpds_py-0.28.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:b670c30fd87a6aec281c3c9896d3bae4b205fd75d79d06dc87c2503717e46092", size = 544671, upload-time = "2025-10-22T22:24:02.297Z" },
-    { url = "https://files.pythonhosted.org/packages/2c/37/dcc5d8397caa924988693519069d0beea077a866128719351a4ad95e82fc/rpds_py-0.28.0-cp314-cp314t-win32.whl", hash = "sha256:8014045a15b4d2b3476f0a287fcc93d4f823472d7d1308d47884ecac9e612be3", size = 205749, upload-time = "2025-10-22T22:24:03.848Z" },
-    { url = "https://files.pythonhosted.org/packages/d7/69/64d43b21a10d72b45939a28961216baeb721cc2a430f5f7c3bfa21659a53/rpds_py-0.28.0-cp314-cp314t-win_amd64.whl", hash = "sha256:7a4e59c90d9c27c561eb3160323634a9ff50b04e4f7820600a2beb0ac90db578", size = 216233, upload-time = "2025-10-22T22:24:05.471Z" },
-    { url = "https://files.pythonhosted.org/packages/ae/bc/b43f2ea505f28119bd551ae75f70be0c803d2dbcd37c1b3734909e40620b/rpds_py-0.28.0-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:f5e7101145427087e493b9c9b959da68d357c28c562792300dd21a095118ed16", size = 363913, upload-time = "2025-10-22T22:24:07.129Z" },
-    { url = "https://files.pythonhosted.org/packages/28/f2/db318195d324c89a2c57dc5195058cbadd71b20d220685c5bd1da79ee7fe/rpds_py-0.28.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:31eb671150b9c62409a888850aaa8e6533635704fe2b78335f9aaf7ff81eec4d", size = 350452, upload-time = "2025-10-22T22:24:08.754Z" },
-    { url = "https://files.pythonhosted.org/packages/ae/f2/1391c819b8573a4898cedd6b6c5ec5bc370ce59e5d6bdcebe3c9c1db4588/rpds_py-0.28.0-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:48b55c1f64482f7d8bd39942f376bfdf2f6aec637ee8c805b5041e14eeb771db", size = 380957, upload-time = "2025-10-22T22:24:10.826Z" },
-    { url = "https://files.pythonhosted.org/packages/5a/5c/e5de68ee7eb7248fce93269833d1b329a196d736aefb1a7481d1e99d1222/rpds_py-0.28.0-pp311-pypy311_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:24743a7b372e9a76171f6b69c01aedf927e8ac3e16c474d9fe20d552a8cb45c7", size = 391919, upload-time = "2025-10-22T22:24:12.559Z" },
-    { url = "https://files.pythonhosted.org/packages/fb/4f/2376336112cbfeb122fd435d608ad8d5041b3aed176f85a3cb32c262eb80/rpds_py-0.28.0-pp311-pypy311_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:389c29045ee8bbb1627ea190b4976a310a295559eaf9f1464a1a6f2bf84dde78", size = 528541, upload-time = "2025-10-22T22:24:14.197Z" },
-    { url = "https://files.pythonhosted.org/packages/68/53/5ae232e795853dd20da7225c5dd13a09c0a905b1a655e92bdf8d78a99fd9/rpds_py-0.28.0-pp311-pypy311_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:23690b5827e643150cf7b49569679ec13fe9a610a15949ed48b85eb7f98f34ec", size = 405629, upload-time = "2025-10-22T22:24:16.001Z" },
-    { url = "https://files.pythonhosted.org/packages/b9/2d/351a3b852b683ca9b6b8b38ed9efb2347596973849ba6c3a0e99877c10aa/rpds_py-0.28.0-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6f0c9266c26580e7243ad0d72fc3e01d6b33866cfab5084a6da7576bcf1c4f72", size = 384123, upload-time = "2025-10-22T22:24:17.585Z" },
-    { url = "https://files.pythonhosted.org/packages/e0/15/870804daa00202728cc91cb8e2385fa9f1f4eb49857c49cfce89e304eae6/rpds_py-0.28.0-pp311-pypy311_pp73-manylinux_2_31_riscv64.whl", hash = "sha256:4c6c4db5d73d179746951486df97fd25e92396be07fc29ee8ff9a8f5afbdfb27", size = 400923, upload-time = "2025-10-22T22:24:19.512Z" },
-    { url = "https://files.pythonhosted.org/packages/53/25/3706b83c125fa2a0bccceac951de3f76631f6bd0ee4d02a0ed780712ef1b/rpds_py-0.28.0-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a3b695a8fa799dd2cfdb4804b37096c5f6dba1ac7f48a7fbf6d0485bcd060316", size = 413767, upload-time = "2025-10-22T22:24:21.316Z" },
-    { url = "https://files.pythonhosted.org/packages/ef/f9/ce43dbe62767432273ed2584cef71fef8411bddfb64125d4c19128015018/rpds_py-0.28.0-pp311-pypy311_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:6aa1bfce3f83baf00d9c5fcdbba93a3ab79958b4c7d7d1f55e7fe68c20e63912", size = 561530, upload-time = "2025-10-22T22:24:22.958Z" },
-    { url = "https://files.pythonhosted.org/packages/46/c9/ffe77999ed8f81e30713dd38fd9ecaa161f28ec48bb80fa1cd9118399c27/rpds_py-0.28.0-pp311-pypy311_pp73-musllinux_1_2_i686.whl", hash = "sha256:7b0f9dceb221792b3ee6acb5438eb1f02b0cb2c247796a72b016dcc92c6de829", size = 585453, upload-time = "2025-10-22T22:24:24.779Z" },
-    { url = "https://files.pythonhosted.org/packages/ed/d2/4a73b18821fd4669762c855fd1f4e80ceb66fb72d71162d14da58444a763/rpds_py-0.28.0-pp311-pypy311_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:5d0145edba8abd3db0ab22b5300c99dc152f5c9021fab861be0f0544dc3cbc5f", size = 552199, upload-time = "2025-10-22T22:24:26.54Z" },
+version = "0.29.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/98/33/23b3b3419b6a3e0f559c7c0d2ca8fc1b9448382b25245033788785921332/rpds_py-0.29.0.tar.gz", hash = "sha256:fe55fe686908f50154d1dc599232016e50c243b438c3b7432f24e2895b0e5359", size = 69359, upload-time = "2025-11-16T14:50:39.532Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/9e/7a/c5b2ff381b74bc742768e8d870f26babac4ef256ba160bdbf8d57af56461/rpds_py-0.29.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:4ae4b88c6617e1b9e5038ab3fccd7bac0842fdda2b703117b2aa99bc85379113", size = 372385, upload-time = "2025-11-16T14:47:36.287Z" },
+    { url = "https://files.pythonhosted.org/packages/28/36/531f1eb4d5bed4a9c150f363a7ec4a98d2dc746151bba5473bc38ee85dec/rpds_py-0.29.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:7d9128ec9d8cecda6f044001fde4fb71ea7c24325336612ef8179091eb9596b9", size = 362869, upload-time = "2025-11-16T14:47:38.196Z" },
+    { url = "https://files.pythonhosted.org/packages/54/df/7e9c0493a2015d9c82807a2d5f023ea9774e27a4c15b33ef1cdb7456138d/rpds_py-0.29.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d37812c3da8e06f2bb35b3cf10e4a7b68e776a706c13058997238762b4e07f4f", size = 391582, upload-time = "2025-11-16T14:47:39.746Z" },
+    { url = "https://files.pythonhosted.org/packages/15/38/42a981c3592ef46fbd7e17adbf8730cc5ec87e6aa1770c658c44bbb52960/rpds_py-0.29.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:66786c3fb1d8de416a7fa8e1cb1ec6ba0a745b2b0eee42f9b7daa26f1a495545", size = 405685, upload-time = "2025-11-16T14:47:41.472Z" },
+    { url = "https://files.pythonhosted.org/packages/12/45/628b8c15856c3849c3f52ec6dac93c046ed5faeed4a435af03b70525fd29/rpds_py-0.29.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b58f5c77f1af888b5fd1876c9a0d9858f6f88a39c9dd7c073a88e57e577da66d", size = 527067, upload-time = "2025-11-16T14:47:43.036Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/ba/6b56d09badeabd95098016d72a437d4a0fd82d4672ce92a7607df5d70a42/rpds_py-0.29.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:799156ef1f3529ed82c36eb012b5d7a4cf4b6ef556dd7cc192148991d07206ae", size = 412532, upload-time = "2025-11-16T14:47:44.484Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/39/2f1f3db92888314b50b8f9641f679188bd24b3665a8cb9923b7201ae8011/rpds_py-0.29.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:453783477aa4f2d9104c4b59b08c871431647cb7af51b549bbf2d9eb9c827756", size = 392736, upload-time = "2025-11-16T14:47:46.053Z" },
+    { url = "https://files.pythonhosted.org/packages/60/43/3c3b1dcd827e50f2ae28786d846b8a351080d8a69a3b49bc10ae44cc39b1/rpds_py-0.29.0-cp310-cp310-manylinux_2_31_riscv64.whl", hash = "sha256:24a7231493e3c4a4b30138b50cca089a598e52c34cf60b2f35cebf62f274fdea", size = 406300, upload-time = "2025-11-16T14:47:47.268Z" },
+    { url = "https://files.pythonhosted.org/packages/da/02/bc96021b67f8525e6bcdd68935c4543ada61e1f3dcb067ed037d68b8c6d2/rpds_py-0.29.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:7033c1010b1f57bb44d8067e8c25aa6fa2e944dbf46ccc8c92b25043839c3fd2", size = 423641, upload-time = "2025-11-16T14:47:48.878Z" },
+    { url = "https://files.pythonhosted.org/packages/38/e9/c435ddb602ced19a80b8277a41371734f33ad3f91cc4ceb4d82596800a3c/rpds_py-0.29.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:0248b19405422573621172ab8e3a1f29141362d13d9f72bafa2e28ea0cdca5a2", size = 574153, upload-time = "2025-11-16T14:47:50.435Z" },
+    { url = "https://files.pythonhosted.org/packages/84/82/dc3c32e1f89ecba8a59600d4cd65fe0ad81b6c636ccdbf6cd177fd6a7bac/rpds_py-0.29.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:f9f436aee28d13b9ad2c764fc273e0457e37c2e61529a07b928346b219fcde3b", size = 600304, upload-time = "2025-11-16T14:47:51.599Z" },
+    { url = "https://files.pythonhosted.org/packages/35/98/785290e0b7142470735dc1b1f68fb33aae29e5296f062c88396eedf796c8/rpds_py-0.29.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:24a16cb7163933906c62c272de20ea3c228e4542c8c45c1d7dc2b9913e17369a", size = 562211, upload-time = "2025-11-16T14:47:53.094Z" },
+    { url = "https://files.pythonhosted.org/packages/30/58/4eeddcb0737c6875f3e30c65dc9d7e7a10dfd5779646a990fa602c6d56c5/rpds_py-0.29.0-cp310-cp310-win32.whl", hash = "sha256:1a409b0310a566bfd1be82119891fefbdce615ccc8aa558aff7835c27988cbef", size = 221803, upload-time = "2025-11-16T14:47:54.404Z" },
+    { url = "https://files.pythonhosted.org/packages/54/77/b35a8dbdcbeb32505500547cdafaa9f8863e85f8faac50ef34464ec5a256/rpds_py-0.29.0-cp310-cp310-win_amd64.whl", hash = "sha256:c5523b0009e7c3c1263471b69d8da1c7d41b3ecb4cb62ef72be206b92040a950", size = 235530, upload-time = "2025-11-16T14:47:56.061Z" },
+    { url = "https://files.pythonhosted.org/packages/36/ab/7fb95163a53ab122c74a7c42d2d2f012819af2cf3deb43fb0d5acf45cc1a/rpds_py-0.29.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:9b9c764a11fd637e0322a488560533112837f5334ffeb48b1be20f6d98a7b437", size = 372344, upload-time = "2025-11-16T14:47:57.279Z" },
+    { url = "https://files.pythonhosted.org/packages/b3/45/f3c30084c03b0d0f918cb4c5ae2c20b0a148b51ba2b3f6456765b629bedd/rpds_py-0.29.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3fd2164d73812026ce970d44c3ebd51e019d2a26a4425a5dcbdfa93a34abc383", size = 363041, upload-time = "2025-11-16T14:47:58.908Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/e9/4d044a1662608c47a87cbb37b999d4d5af54c6d6ebdda93a4d8bbf8b2a10/rpds_py-0.29.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4a097b7f7f7274164566ae90a221fd725363c0e9d243e2e9ed43d195ccc5495c", size = 391775, upload-time = "2025-11-16T14:48:00.197Z" },
+    { url = "https://files.pythonhosted.org/packages/50/c9/7616d3ace4e6731aeb6e3cd85123e03aec58e439044e214b9c5c60fd8eb1/rpds_py-0.29.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:7cdc0490374e31cedefefaa1520d5fe38e82fde8748cbc926e7284574c714d6b", size = 405624, upload-time = "2025-11-16T14:48:01.496Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/e2/6d7d6941ca0843609fd2d72c966a438d6f22617baf22d46c3d2156c31350/rpds_py-0.29.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:89ca2e673ddd5bde9b386da9a0aac0cab0e76f40c8f0aaf0d6311b6bbf2aa311", size = 527894, upload-time = "2025-11-16T14:48:03.167Z" },
+    { url = "https://files.pythonhosted.org/packages/8d/f7/aee14dc2db61bb2ae1e3068f134ca9da5f28c586120889a70ff504bb026f/rpds_py-0.29.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a5d9da3ff5af1ca1249b1adb8ef0573b94c76e6ae880ba1852f033bf429d4588", size = 412720, upload-time = "2025-11-16T14:48:04.413Z" },
+    { url = "https://files.pythonhosted.org/packages/2f/e2/2293f236e887c0360c2723d90c00d48dee296406994d6271faf1712e94ec/rpds_py-0.29.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8238d1d310283e87376c12f658b61e1ee23a14c0e54c7c0ce953efdbdc72deed", size = 392945, upload-time = "2025-11-16T14:48:06.252Z" },
+    { url = "https://files.pythonhosted.org/packages/14/cd/ceea6147acd3bd1fd028d1975228f08ff19d62098078d5ec3eed49703797/rpds_py-0.29.0-cp311-cp311-manylinux_2_31_riscv64.whl", hash = "sha256:2d6fb2ad1c36f91c4646989811e84b1ea5e0c3cf9690b826b6e32b7965853a63", size = 406385, upload-time = "2025-11-16T14:48:07.575Z" },
+    { url = "https://files.pythonhosted.org/packages/52/36/fe4dead19e45eb77a0524acfdbf51e6cda597b26fc5b6dddbff55fbbb1a5/rpds_py-0.29.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:534dc9df211387547267ccdb42253aa30527482acb38dd9b21c5c115d66a96d2", size = 423943, upload-time = "2025-11-16T14:48:10.175Z" },
+    { url = "https://files.pythonhosted.org/packages/a1/7b/4551510803b582fa4abbc8645441a2d15aa0c962c3b21ebb380b7e74f6a1/rpds_py-0.29.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d456e64724a075441e4ed648d7f154dc62e9aabff29bcdf723d0c00e9e1d352f", size = 574204, upload-time = "2025-11-16T14:48:11.499Z" },
+    { url = "https://files.pythonhosted.org/packages/64/ba/071ccdd7b171e727a6ae079f02c26f75790b41555f12ca8f1151336d2124/rpds_py-0.29.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:a738f2da2f565989401bd6fd0b15990a4d1523c6d7fe83f300b7e7d17212feca", size = 600587, upload-time = "2025-11-16T14:48:12.822Z" },
+    { url = "https://files.pythonhosted.org/packages/03/09/96983d48c8cf5a1e03c7d9cc1f4b48266adfb858ae48c7c2ce978dbba349/rpds_py-0.29.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:a110e14508fd26fd2e472bb541f37c209409876ba601cf57e739e87d8a53cf95", size = 562287, upload-time = "2025-11-16T14:48:14.108Z" },
+    { url = "https://files.pythonhosted.org/packages/40/f0/8c01aaedc0fa92156f0391f39ea93b5952bc0ec56b897763858f95da8168/rpds_py-0.29.0-cp311-cp311-win32.whl", hash = "sha256:923248a56dd8d158389a28934f6f69ebf89f218ef96a6b216a9be6861804d3f4", size = 221394, upload-time = "2025-11-16T14:48:15.374Z" },
+    { url = "https://files.pythonhosted.org/packages/7e/a5/a8b21c54c7d234efdc83dc034a4d7cd9668e3613b6316876a29b49dece71/rpds_py-0.29.0-cp311-cp311-win_amd64.whl", hash = "sha256:539eb77eb043afcc45314d1be09ea6d6cafb3addc73e0547c171c6d636957f60", size = 235713, upload-time = "2025-11-16T14:48:16.636Z" },
+    { url = "https://files.pythonhosted.org/packages/a7/1f/df3c56219523947b1be402fa12e6323fe6d61d883cf35d6cb5d5bb6db9d9/rpds_py-0.29.0-cp311-cp311-win_arm64.whl", hash = "sha256:bdb67151ea81fcf02d8f494703fb728d4d34d24556cbff5f417d74f6f5792e7c", size = 229157, upload-time = "2025-11-16T14:48:17.891Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/50/bc0e6e736d94e420df79be4deb5c9476b63165c87bb8f19ef75d100d21b3/rpds_py-0.29.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:a0891cfd8db43e085c0ab93ab7e9b0c8fee84780d436d3b266b113e51e79f954", size = 376000, upload-time = "2025-11-16T14:48:19.141Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/3a/46676277160f014ae95f24de53bed0e3b7ea66c235e7de0b9df7bd5d68ba/rpds_py-0.29.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3897924d3f9a0361472d884051f9a2460358f9a45b1d85a39a158d2f8f1ad71c", size = 360575, upload-time = "2025-11-16T14:48:20.443Z" },
+    { url = "https://files.pythonhosted.org/packages/75/ba/411d414ed99ea1afdd185bbabeeaac00624bd1e4b22840b5e9967ade6337/rpds_py-0.29.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2a21deb8e0d1571508c6491ce5ea5e25669b1dd4adf1c9d64b6314842f708b5d", size = 392159, upload-time = "2025-11-16T14:48:22.12Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/b1/e18aa3a331f705467a48d0296778dc1fea9d7f6cf675bd261f9a846c7e90/rpds_py-0.29.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9efe71687d6427737a0a2de9ca1c0a216510e6cd08925c44162be23ed7bed2d5", size = 410602, upload-time = "2025-11-16T14:48:23.563Z" },
+    { url = "https://files.pythonhosted.org/packages/2f/6c/04f27f0c9f2299274c76612ac9d2c36c5048bb2c6c2e52c38c60bf3868d9/rpds_py-0.29.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:40f65470919dc189c833e86b2c4bd21bd355f98436a2cef9e0a9a92aebc8e57e", size = 515808, upload-time = "2025-11-16T14:48:24.949Z" },
+    { url = "https://files.pythonhosted.org/packages/83/56/a8412aa464fb151f8bc0d91fb0bb888adc9039bd41c1c6ba8d94990d8cf8/rpds_py-0.29.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:def48ff59f181130f1a2cb7c517d16328efac3ec03951cca40c1dc2049747e83", size = 416015, upload-time = "2025-11-16T14:48:26.782Z" },
+    { url = "https://files.pythonhosted.org/packages/04/4c/f9b8a05faca3d9e0a6397c90d13acb9307c9792b2bff621430c58b1d6e76/rpds_py-0.29.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ad7bd570be92695d89285a4b373006930715b78d96449f686af422debb4d3949", size = 395325, upload-time = "2025-11-16T14:48:28.055Z" },
+    { url = "https://files.pythonhosted.org/packages/34/60/869f3bfbf8ed7b54f1ad9a5543e0fdffdd40b5a8f587fe300ee7b4f19340/rpds_py-0.29.0-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:5a572911cd053137bbff8e3a52d31c5d2dba51d3a67ad902629c70185f3f2181", size = 410160, upload-time = "2025-11-16T14:48:29.338Z" },
+    { url = "https://files.pythonhosted.org/packages/91/aa/e5b496334e3aba4fe4c8a80187b89f3c1294c5c36f2a926da74338fa5a73/rpds_py-0.29.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d583d4403bcbf10cffc3ab5cee23d7643fcc960dff85973fd3c2d6c86e8dbb0c", size = 425309, upload-time = "2025-11-16T14:48:30.691Z" },
+    { url = "https://files.pythonhosted.org/packages/85/68/4e24a34189751ceb6d66b28f18159922828dd84155876551f7ca5b25f14f/rpds_py-0.29.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:070befbb868f257d24c3bb350dbd6e2f645e83731f31264b19d7231dd5c396c7", size = 574644, upload-time = "2025-11-16T14:48:31.964Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/cf/474a005ea4ea9c3b4f17b6108b6b13cebfc98ebaff11d6e1b193204b3a93/rpds_py-0.29.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:fc935f6b20b0c9f919a8ff024739174522abd331978f750a74bb68abd117bd19", size = 601605, upload-time = "2025-11-16T14:48:33.252Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/b1/c56f6a9ab8c5f6bb5c65c4b5f8229167a3a525245b0773f2c0896686b64e/rpds_py-0.29.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:8c5a8ecaa44ce2d8d9d20a68a2483a74c07f05d72e94a4dff88906c8807e77b0", size = 564593, upload-time = "2025-11-16T14:48:34.643Z" },
+    { url = "https://files.pythonhosted.org/packages/b3/13/0494cecce4848f68501e0a229432620b4b57022388b071eeff95f3e1e75b/rpds_py-0.29.0-cp312-cp312-win32.whl", hash = "sha256:ba5e1aeaf8dd6d8f6caba1f5539cddda87d511331714b7b5fc908b6cfc3636b7", size = 223853, upload-time = "2025-11-16T14:48:36.419Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/6a/51e9aeb444a00cdc520b032a28b07e5f8dc7bc328b57760c53e7f96997b4/rpds_py-0.29.0-cp312-cp312-win_amd64.whl", hash = "sha256:b5f6134faf54b3cb83375db0f113506f8b7770785be1f95a631e7e2892101977", size = 239895, upload-time = "2025-11-16T14:48:37.956Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/d4/8bce56cdad1ab873e3f27cb31c6a51d8f384d66b022b820525b879f8bed1/rpds_py-0.29.0-cp312-cp312-win_arm64.whl", hash = "sha256:b016eddf00dca7944721bf0cd85b6af7f6c4efaf83ee0b37c4133bd39757a8c7", size = 230321, upload-time = "2025-11-16T14:48:39.71Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/d9/c5de60d9d371bbb186c3e9bf75f4fc5665e11117a25a06a6b2e0afb7380e/rpds_py-0.29.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:1585648d0760b88292eecab5181f5651111a69d90eff35d6b78aa32998886a61", size = 375710, upload-time = "2025-11-16T14:48:41.063Z" },
+    { url = "https://files.pythonhosted.org/packages/b3/b3/0860cdd012291dc21272895ce107f1e98e335509ba986dd83d72658b82b9/rpds_py-0.29.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:521807963971a23996ddaf764c682b3e46459b3c58ccd79fefbe16718db43154", size = 360582, upload-time = "2025-11-16T14:48:42.423Z" },
+    { url = "https://files.pythonhosted.org/packages/92/8a/a18c2f4a61b3407e56175f6aab6deacdf9d360191a3d6f38566e1eaf7266/rpds_py-0.29.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a8896986efaa243ab713c69e6491a4138410f0fe36f2f4c71e18bd5501e8014", size = 391172, upload-time = "2025-11-16T14:48:43.75Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/49/e93354258508c50abc15cdcd5fcf7ac4117f67bb6233ad7859f75e7372a0/rpds_py-0.29.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1d24564a700ef41480a984c5ebed62b74e6ce5860429b98b1fede76049e953e6", size = 409586, upload-time = "2025-11-16T14:48:45.498Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/8d/a27860dae1c19a6bdc901f90c81f0d581df1943355802961a57cdb5b6cd1/rpds_py-0.29.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e6596b93c010d386ae46c9fba9bfc9fc5965fa8228edeac51576299182c2e31c", size = 516339, upload-time = "2025-11-16T14:48:47.308Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/ad/a75e603161e79b7110c647163d130872b271c6b28712c803c65d492100f7/rpds_py-0.29.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5cc58aac218826d054c7da7f95821eba94125d88be673ff44267bb89d12a5866", size = 416201, upload-time = "2025-11-16T14:48:48.615Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/42/555b4ee17508beafac135c8b450816ace5a96194ce97fefc49d58e5652ea/rpds_py-0.29.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:de73e40ebc04dd5d9556f50180395322193a78ec247e637e741c1b954810f295", size = 395095, upload-time = "2025-11-16T14:48:50.027Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/f0/c90b671b9031e800ec45112be42ea9f027f94f9ac25faaac8770596a16a1/rpds_py-0.29.0-cp313-cp313-manylinux_2_31_riscv64.whl", hash = "sha256:295ce5ac7f0cf69a651ea75c8f76d02a31f98e5698e82a50a5f4d4982fbbae3b", size = 410077, upload-time = "2025-11-16T14:48:51.515Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/80/9af8b640b81fe21e6f718e9dec36c0b5f670332747243130a5490f292245/rpds_py-0.29.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1ea59b23ea931d494459c8338056fe7d93458c0bf3ecc061cd03916505369d55", size = 424548, upload-time = "2025-11-16T14:48:53.237Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/0b/b5647446e991736e6a495ef510e6710df91e880575a586e763baeb0aa770/rpds_py-0.29.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f49d41559cebd608042fdcf54ba597a4a7555b49ad5c1c0c03e0af82692661cd", size = 573661, upload-time = "2025-11-16T14:48:54.769Z" },
+    { url = "https://files.pythonhosted.org/packages/f7/b3/1b1c9576839ff583d1428efbf59f9ee70498d8ce6c0b328ac02f1e470879/rpds_py-0.29.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:05a2bd42768ea988294ca328206efbcc66e220d2d9b7836ee5712c07ad6340ea", size = 600937, upload-time = "2025-11-16T14:48:56.247Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/7b/b6cfca2f9fee4c4494ce54f7fb1b9f578867495a9aa9fc0d44f5f735c8e0/rpds_py-0.29.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:33ca7bdfedd83339ca55da3a5e1527ee5870d4b8369456b5777b197756f3ca22", size = 564496, upload-time = "2025-11-16T14:48:57.691Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/fb/ba29ec7f0f06eb801bac5a23057a9ff7670623b5e8013bd59bec4aa09de8/rpds_py-0.29.0-cp313-cp313-win32.whl", hash = "sha256:20c51ae86a0bb9accc9ad4e6cdeec58d5ebb7f1b09dd4466331fc65e1766aae7", size = 223126, upload-time = "2025-11-16T14:48:59.058Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/6b/0229d3bed4ddaa409e6d90b0ae967ed4380e4bdd0dad6e59b92c17d42457/rpds_py-0.29.0-cp313-cp313-win_amd64.whl", hash = "sha256:6410e66f02803600edb0b1889541f4b5cc298a5ccda0ad789cc50ef23b54813e", size = 239771, upload-time = "2025-11-16T14:49:00.872Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/38/d2868f058b164f8efd89754d85d7b1c08b454f5c07ac2e6cc2e9bd4bd05b/rpds_py-0.29.0-cp313-cp313-win_arm64.whl", hash = "sha256:56838e1cd9174dc23c5691ee29f1d1be9eab357f27efef6bded1328b23e1ced2", size = 229994, upload-time = "2025-11-16T14:49:02.673Z" },
+    { url = "https://files.pythonhosted.org/packages/52/91/5de91c5ec7d41759beec9b251630824dbb8e32d20c3756da1a9a9d309709/rpds_py-0.29.0-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:37d94eadf764d16b9a04307f2ab1d7af6dc28774bbe0535c9323101e14877b4c", size = 365886, upload-time = "2025-11-16T14:49:04.133Z" },
+    { url = "https://files.pythonhosted.org/packages/85/7c/415d8c1b016d5f47ecec5145d9d6d21002d39dce8761b30f6c88810b455a/rpds_py-0.29.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:d472cf73efe5726a067dce63eebe8215b14beabea7c12606fd9994267b3cfe2b", size = 355262, upload-time = "2025-11-16T14:49:05.543Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/14/bf83e2daa4f980e4dc848aed9299792a8b84af95e12541d9e7562f84a6ef/rpds_py-0.29.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:72fdfd5ff8992e4636621826371e3ac5f3e3b8323e9d0e48378e9c13c3dac9d0", size = 384826, upload-time = "2025-11-16T14:49:07.301Z" },
+    { url = "https://files.pythonhosted.org/packages/33/b8/53330c50a810ae22b4fbba5e6cf961b68b9d72d9bd6780a7c0a79b070857/rpds_py-0.29.0-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2549d833abdf8275c901313b9e8ff8fba57e50f6a495035a2a4e30621a2f7cc4", size = 394234, upload-time = "2025-11-16T14:49:08.782Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/32/01e2e9645cef0e584f518cfde4567563e57db2257244632b603f61b40e50/rpds_py-0.29.0-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4448dad428f28a6a767c3e3b80cde3446a22a0efbddaa2360f4bb4dc836d0688", size = 520008, upload-time = "2025-11-16T14:49:10.253Z" },
+    { url = "https://files.pythonhosted.org/packages/98/c3/0d1b95a81affae2b10f950782e33a1fd2edd6ce2a479966cac98c9a66f57/rpds_py-0.29.0-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:115f48170fd4296a33938d8c11f697f5f26e0472e43d28f35624764173a60e4d", size = 409569, upload-time = "2025-11-16T14:49:12.478Z" },
+    { url = "https://files.pythonhosted.org/packages/fa/60/aa3b8678f3f009f675b99174fa2754302a7fbfe749162e8043d111de2d88/rpds_py-0.29.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8e5bb73ffc029820f4348e9b66b3027493ae00bca6629129cd433fd7a76308ee", size = 385188, upload-time = "2025-11-16T14:49:13.88Z" },
+    { url = "https://files.pythonhosted.org/packages/92/02/5546c1c8aa89c18d40c1fcffdcc957ba730dee53fb7c3ca3a46f114761d2/rpds_py-0.29.0-cp313-cp313t-manylinux_2_31_riscv64.whl", hash = "sha256:b1581fcde18fcdf42ea2403a16a6b646f8eb1e58d7f90a0ce693da441f76942e", size = 398587, upload-time = "2025-11-16T14:49:15.339Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/e0/ad6eeaf47e236eba052fa34c4073078b9e092bd44da6bbb35aaae9580669/rpds_py-0.29.0-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:16e9da2bda9eb17ea318b4c335ec9ac1818e88922cbe03a5743ea0da9ecf74fb", size = 416641, upload-time = "2025-11-16T14:49:16.832Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/93/0acedfd50ad9cdd3879c615a6dc8c5f1ce78d2fdf8b87727468bb5bb4077/rpds_py-0.29.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:28fd300326dd21198f311534bdb6d7e989dd09b3418b3a91d54a0f384c700967", size = 566683, upload-time = "2025-11-16T14:49:18.342Z" },
+    { url = "https://files.pythonhosted.org/packages/62/53/8c64e0f340a9e801459fc6456821abc15b3582cb5dc3932d48705a9d9ac7/rpds_py-0.29.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:2aba991e041d031c7939e1358f583ae405a7bf04804ca806b97a5c0e0af1ea5e", size = 592730, upload-time = "2025-11-16T14:49:19.767Z" },
+    { url = "https://files.pythonhosted.org/packages/85/ef/3109b6584f8c4b0d2490747c916df833c127ecfa82be04d9a40a376f2090/rpds_py-0.29.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:7f437026dbbc3f08c99cc41a5b2570c6e1a1ddbe48ab19a9b814254128d4ea7a", size = 557361, upload-time = "2025-11-16T14:49:21.574Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/3b/61586475e82d57f01da2c16edb9115a618afe00ce86fe1b58936880b15af/rpds_py-0.29.0-cp313-cp313t-win32.whl", hash = "sha256:6e97846e9800a5d0fe7be4d008f0c93d0feeb2700da7b1f7528dabafb31dfadb", size = 211227, upload-time = "2025-11-16T14:49:23.03Z" },
+    { url = "https://files.pythonhosted.org/packages/3b/3a/12dc43f13594a54ea0c9d7e9d43002116557330e3ad45bc56097ddf266e2/rpds_py-0.29.0-cp313-cp313t-win_amd64.whl", hash = "sha256:f49196aec7c4b406495f60e6f947ad71f317a765f956d74bbd83996b9edc0352", size = 225248, upload-time = "2025-11-16T14:49:24.841Z" },
+    { url = "https://files.pythonhosted.org/packages/89/b1/0b1474e7899371d9540d3bbb2a499a3427ae1fc39c998563fe9035a1073b/rpds_py-0.29.0-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:394d27e4453d3b4d82bb85665dc1fcf4b0badc30fc84282defed71643b50e1a1", size = 363731, upload-time = "2025-11-16T14:49:26.683Z" },
+    { url = "https://files.pythonhosted.org/packages/28/12/3b7cf2068d0a334ed1d7b385a9c3c8509f4c2bcba3d4648ea71369de0881/rpds_py-0.29.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:55d827b2ae95425d3be9bc9a5838b6c29d664924f98146557f7715e331d06df8", size = 354343, upload-time = "2025-11-16T14:49:28.24Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/73/5afcf8924bc02a749416eda64e17ac9c9b28f825f4737385295a0e99b0c1/rpds_py-0.29.0-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fc31a07ed352e5462d3ee1b22e89285f4ce97d5266f6d1169da1142e78045626", size = 385406, upload-time = "2025-11-16T14:49:29.943Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/37/5db736730662508535221737a21563591b6f43c77f2e388951c42f143242/rpds_py-0.29.0-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c4695dd224212f6105db7ea62197144230b808d6b2bba52238906a2762f1d1e7", size = 396162, upload-time = "2025-11-16T14:49:31.833Z" },
+    { url = "https://files.pythonhosted.org/packages/70/0d/491c1017d14f62ce7bac07c32768d209a50ec567d76d9f383b4cfad19b80/rpds_py-0.29.0-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fcae1770b401167f8b9e1e3f566562e6966ffa9ce63639916248a9e25fa8a244", size = 517719, upload-time = "2025-11-16T14:49:33.804Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/25/b11132afcb17cd5d82db173f0c8dab270ffdfaba43e5ce7a591837ae9649/rpds_py-0.29.0-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:90f30d15f45048448b8da21c41703b31c61119c06c216a1bf8c245812a0f0c17", size = 409498, upload-time = "2025-11-16T14:49:35.222Z" },
+    { url = "https://files.pythonhosted.org/packages/0f/7d/e6543cedfb2e6403a1845710a5ab0e0ccf8fc288e0b5af9a70bfe2c12053/rpds_py-0.29.0-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:44a91e0ab77bdc0004b43261a4b8cd6d6b451e8d443754cfda830002b5745b32", size = 382743, upload-time = "2025-11-16T14:49:36.704Z" },
+    { url = "https://files.pythonhosted.org/packages/75/11/a4ebc9f654293ae9fefb83b2b6be7f3253e85ea42a5db2f77d50ad19aaeb/rpds_py-0.29.0-cp314-cp314-manylinux_2_31_riscv64.whl", hash = "sha256:4aa195e5804d32c682e453b34474f411ca108e4291c6a0f824ebdc30a91c973c", size = 400317, upload-time = "2025-11-16T14:49:39.132Z" },
+    { url = "https://files.pythonhosted.org/packages/52/18/97677a60a81c7f0e5f64e51fb3f8271c5c8fcabf3a2df18e97af53d7c2bf/rpds_py-0.29.0-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:7971bdb7bf4ee0f7e6f67fa4c7fbc6019d9850cc977d126904392d363f6f8318", size = 416979, upload-time = "2025-11-16T14:49:40.575Z" },
+    { url = "https://files.pythonhosted.org/packages/f0/69/28ab391a9968f6c746b2a2db181eaa4d16afaa859fedc9c2f682d19f7e18/rpds_py-0.29.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:8ae33ad9ce580c7a47452c3b3f7d8a9095ef6208e0a0c7e4e2384f9fc5bf8212", size = 567288, upload-time = "2025-11-16T14:49:42.24Z" },
+    { url = "https://files.pythonhosted.org/packages/3b/d3/0c7afdcdb830eee94f5611b64e71354ffe6ac8df82d00c2faf2bfffd1d4e/rpds_py-0.29.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:c661132ab2fb4eeede2ef69670fd60da5235209874d001a98f1542f31f2a8a94", size = 593157, upload-time = "2025-11-16T14:49:43.782Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/ac/a0fcbc2feed4241cf26d32268c195eb88ddd4bd862adfc9d4b25edfba535/rpds_py-0.29.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:bb78b3a0d31ac1bde132c67015a809948db751cb4e92cdb3f0b242e430b6ed0d", size = 554741, upload-time = "2025-11-16T14:49:45.557Z" },
+    { url = "https://files.pythonhosted.org/packages/0f/f1/fcc24137c470df8588674a677f33719d5800ec053aaacd1de8a5d5d84d9e/rpds_py-0.29.0-cp314-cp314-win32.whl", hash = "sha256:f475f103488312e9bd4000bc890a95955a07b2d0b6e8884aef4be56132adbbf1", size = 215508, upload-time = "2025-11-16T14:49:47.562Z" },
+    { url = "https://files.pythonhosted.org/packages/7b/c7/1d169b2045512eac019918fc1021ea07c30e84a4343f9f344e3e0aa8c788/rpds_py-0.29.0-cp314-cp314-win_amd64.whl", hash = "sha256:b9cf2359a4fca87cfb6801fae83a76aedf66ee1254a7a151f1341632acf67f1b", size = 228125, upload-time = "2025-11-16T14:49:49.064Z" },
+    { url = "https://files.pythonhosted.org/packages/be/36/0cec88aaba70ec4a6e381c444b0d916738497d27f0c30406e3d9fcbd3bc2/rpds_py-0.29.0-cp314-cp314-win_arm64.whl", hash = "sha256:9ba8028597e824854f0f1733d8b964e914ae3003b22a10c2c664cb6927e0feb9", size = 221992, upload-time = "2025-11-16T14:49:50.777Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/fa/a2e524631717c9c0eb5d90d30f648cfba6b731047821c994acacb618406c/rpds_py-0.29.0-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:e71136fd0612556b35c575dc2726ae04a1669e6a6c378f2240312cf5d1a2ab10", size = 366425, upload-time = "2025-11-16T14:49:52.691Z" },
+    { url = "https://files.pythonhosted.org/packages/a2/a4/6d43ebe0746ff694a30233f63f454aed1677bd50ab7a59ff6b2bb5ac61f2/rpds_py-0.29.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:76fe96632d53f3bf0ea31ede2f53bbe3540cc2736d4aec3b3801b0458499ef3a", size = 355282, upload-time = "2025-11-16T14:49:54.292Z" },
+    { url = "https://files.pythonhosted.org/packages/fa/a7/52fd8270e0320b09eaf295766ae81dd175f65394687906709b3e75c71d06/rpds_py-0.29.0-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9459a33f077130dbb2c7c3cea72ee9932271fb3126404ba2a2661e4fe9eb7b79", size = 384968, upload-time = "2025-11-16T14:49:55.857Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/7d/e6bc526b7a14e1ef80579a52c1d4ad39260a058a51d66c6039035d14db9d/rpds_py-0.29.0-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5c9546cfdd5d45e562cc0444b6dddc191e625c62e866bf567a2c69487c7ad28a", size = 394714, upload-time = "2025-11-16T14:49:57.343Z" },
+    { url = "https://files.pythonhosted.org/packages/c0/3f/f0ade3954e7db95c791e7eaf978aa7e08a756d2046e8bdd04d08146ed188/rpds_py-0.29.0-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:12597d11d97b8f7e376c88929a6e17acb980e234547c92992f9f7c058f1a7310", size = 520136, upload-time = "2025-11-16T14:49:59.162Z" },
+    { url = "https://files.pythonhosted.org/packages/87/b3/07122ead1b97009715ab9d4082be6d9bd9546099b2b03fae37c3116f72be/rpds_py-0.29.0-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28de03cf48b8a9e6ec10318f2197b83946ed91e2891f651a109611be4106ac4b", size = 409250, upload-time = "2025-11-16T14:50:00.698Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/c6/dcbee61fd1dc892aedcb1b489ba661313101aa82ec84b1a015d4c63ebfda/rpds_py-0.29.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd7951c964069039acc9d67a8ff1f0a7f34845ae180ca542b17dc1456b1f1808", size = 384940, upload-time = "2025-11-16T14:50:02.312Z" },
+    { url = "https://files.pythonhosted.org/packages/47/11/914ecb6f3574cf9bf8b38aced4063e0f787d6e1eb30b181a7efbc6c1da9a/rpds_py-0.29.0-cp314-cp314t-manylinux_2_31_riscv64.whl", hash = "sha256:c07d107b7316088f1ac0177a7661ca0c6670d443f6fe72e836069025e6266761", size = 399392, upload-time = "2025-11-16T14:50:03.829Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/fd/2f4bd9433f58f816434bb934313584caa47dbc6f03ce5484df8ac8980561/rpds_py-0.29.0-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1de2345af363d25696969befc0c1688a6cb5e8b1d32b515ef84fc245c6cddba3", size = 416796, upload-time = "2025-11-16T14:50:05.558Z" },
+    { url = "https://files.pythonhosted.org/packages/79/a5/449f0281af33efa29d5c71014399d74842342ae908d8cd38260320167692/rpds_py-0.29.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:00e56b12d2199ca96068057e1ae7f9998ab6e99cda82431afafd32f3ec98cca9", size = 566843, upload-time = "2025-11-16T14:50:07.243Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/32/0a6a1ccee2e37fcb1b7ba9afde762b77182dbb57937352a729c6cd3cf2bb/rpds_py-0.29.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:3919a3bbecee589300ed25000b6944174e07cd20db70552159207b3f4bbb45b8", size = 593956, upload-time = "2025-11-16T14:50:09.029Z" },
+    { url = "https://files.pythonhosted.org/packages/4a/3d/eb820f95dce4306f07a495ede02fb61bef36ea201d9137d4fcd5ab94ec1e/rpds_py-0.29.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:e7fa2ccc312bbd91e43aa5e0869e46bc03278a3dddb8d58833150a18b0f0283a", size = 557288, upload-time = "2025-11-16T14:50:10.73Z" },
+    { url = "https://files.pythonhosted.org/packages/e9/f8/b8ff786f40470462a252918e0836e0db903c28e88e3eec66bc4a7856ee5d/rpds_py-0.29.0-cp314-cp314t-win32.whl", hash = "sha256:97c817863ffc397f1e6a6e9d2d89fe5408c0a9922dac0329672fb0f35c867ea5", size = 211382, upload-time = "2025-11-16T14:50:12.827Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/7f/1a65ae870bc9d0576aebb0c501ea5dccf1ae2178fe2821042150ebd2e707/rpds_py-0.29.0-cp314-cp314t-win_amd64.whl", hash = "sha256:2023473f444752f0f82a58dfcbee040d0a1b3d1b3c2ec40e884bd25db6d117d2", size = 225919, upload-time = "2025-11-16T14:50:14.734Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/ac/b97e80bf107159e5b9ba9c91df1ab95f69e5e41b435f27bdd737f0d583ac/rpds_py-0.29.0-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:acd82a9e39082dc5f4492d15a6b6c8599aa21db5c35aaf7d6889aea16502c07d", size = 373963, upload-time = "2025-11-16T14:50:16.205Z" },
+    { url = "https://files.pythonhosted.org/packages/40/5a/55e72962d5d29bd912f40c594e68880d3c7a52774b0f75542775f9250712/rpds_py-0.29.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:715b67eac317bf1c7657508170a3e011a1ea6ccb1c9d5f296e20ba14196be6b3", size = 364644, upload-time = "2025-11-16T14:50:18.22Z" },
+    { url = "https://files.pythonhosted.org/packages/99/2a/6b6524d0191b7fc1351c3c0840baac42250515afb48ae40c7ed15499a6a2/rpds_py-0.29.0-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f3b1b87a237cb2dba4db18bcfaaa44ba4cd5936b91121b62292ff21df577fc43", size = 393847, upload-time = "2025-11-16T14:50:20.012Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/b8/c5692a7df577b3c0c7faed7ac01ee3c608b81750fc5d89f84529229b6873/rpds_py-0.29.0-pp311-pypy311_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1c3c3e8101bb06e337c88eb0c0ede3187131f19d97d43ea0e1c5407ea74c0cbf", size = 407281, upload-time = "2025-11-16T14:50:21.64Z" },
+    { url = "https://files.pythonhosted.org/packages/f0/57/0546c6f84031b7ea08b76646a8e33e45607cc6bd879ff1917dc077bb881e/rpds_py-0.29.0-pp311-pypy311_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2b8e54d6e61f3ecd3abe032065ce83ea63417a24f437e4a3d73d2f85ce7b7cfe", size = 529213, upload-time = "2025-11-16T14:50:23.219Z" },
+    { url = "https://files.pythonhosted.org/packages/fa/c1/01dd5f444233605555bc11fe5fed6a5c18f379f02013870c176c8e630a23/rpds_py-0.29.0-pp311-pypy311_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3fbd4e9aebf110473a420dea85a238b254cf8a15acb04b22a5a6b5ce8925b760", size = 413808, upload-time = "2025-11-16T14:50:25.262Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/0a/60f98b06156ea2a7af849fb148e00fbcfdb540909a5174a5ed10c93745c7/rpds_py-0.29.0-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:80fdf53d36e6c72819993e35d1ebeeb8e8fc688d0c6c2b391b55e335b3afba5a", size = 394600, upload-time = "2025-11-16T14:50:26.956Z" },
+    { url = "https://files.pythonhosted.org/packages/37/f1/dc9312fc9bec040ece08396429f2bd9e0977924ba7a11c5ad7056428465e/rpds_py-0.29.0-pp311-pypy311_pp73-manylinux_2_31_riscv64.whl", hash = "sha256:ea7173df5d86f625f8dde6d5929629ad811ed8decda3b60ae603903839ac9ac0", size = 408634, upload-time = "2025-11-16T14:50:28.989Z" },
+    { url = "https://files.pythonhosted.org/packages/ed/41/65024c9fd40c89bb7d604cf73beda4cbdbcebe92d8765345dd65855b6449/rpds_py-0.29.0-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:76054d540061eda273274f3d13a21a4abdde90e13eaefdc205db37c05230efce", size = 426064, upload-time = "2025-11-16T14:50:30.674Z" },
+    { url = "https://files.pythonhosted.org/packages/a2/e0/cf95478881fc88ca2fdbf56381d7df36567cccc39a05394beac72182cd62/rpds_py-0.29.0-pp311-pypy311_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:9f84c549746a5be3bc7415830747a3a0312573afc9f95785eb35228bb17742ec", size = 575871, upload-time = "2025-11-16T14:50:33.428Z" },
+    { url = "https://files.pythonhosted.org/packages/ea/c0/df88097e64339a0218b57bd5f9ca49898e4c394db756c67fccc64add850a/rpds_py-0.29.0-pp311-pypy311_pp73-musllinux_1_2_i686.whl", hash = "sha256:0ea962671af5cb9a260489e311fa22b2e97103e3f9f0caaea6f81390af96a9ed", size = 601702, upload-time = "2025-11-16T14:50:36.051Z" },
+    { url = "https://files.pythonhosted.org/packages/87/f4/09ffb3ebd0cbb9e2c7c9b84d252557ecf434cd71584ee1e32f66013824df/rpds_py-0.29.0-pp311-pypy311_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:f7728653900035fb7b8d06e1e5900545d8088efc9d5d4545782da7df03ec803f", size = 564054, upload-time = "2025-11-16T14:50:37.733Z" },
 ]
 
 [[package]]
@@ -4962,24 +4879,28 @@ wheels = [
 
 [[package]]
 name = "safetensors"
-version = "0.6.2"
+version = "0.7.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/ac/cc/738f3011628920e027a11754d9cae9abec1aed00f7ae860abbf843755233/safetensors-0.6.2.tar.gz", hash = "sha256:43ff2aa0e6fa2dc3ea5524ac7ad93a9839256b8703761e76e2d0b2a3fa4f15d9", size = 197968, upload-time = "2025-08-08T13:13:58.654Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/4d/b1/3f5fd73c039fc87dba3ff8b5d528bfc5a32b597fea8e7a6a4800343a17c7/safetensors-0.6.2-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:9c85ede8ec58f120bad982ec47746981e210492a6db876882aa021446af8ffba", size = 454797, upload-time = "2025-08-08T13:13:52.066Z" },
-    { url = "https://files.pythonhosted.org/packages/8c/c9/bb114c158540ee17907ec470d01980957fdaf87b4aa07914c24eba87b9c6/safetensors-0.6.2-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:d6675cf4b39c98dbd7d940598028f3742e0375a6b4d4277e76beb0c35f4b843b", size = 432206, upload-time = "2025-08-08T13:13:50.931Z" },
-    { url = "https://files.pythonhosted.org/packages/d3/8e/f70c34e47df3110e8e0bb268d90db8d4be8958a54ab0336c9be4fe86dac8/safetensors-0.6.2-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1d2d2b3ce1e2509c68932ca03ab8f20570920cd9754b05063d4368ee52833ecd", size = 473261, upload-time = "2025-08-08T13:13:41.259Z" },
-    { url = "https://files.pythonhosted.org/packages/2a/f5/be9c6a7c7ef773e1996dc214e73485286df1836dbd063e8085ee1976f9cb/safetensors-0.6.2-cp38-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:93de35a18f46b0f5a6a1f9e26d91b442094f2df02e9fd7acf224cfec4238821a", size = 485117, upload-time = "2025-08-08T13:13:43.506Z" },
-    { url = "https://files.pythonhosted.org/packages/c9/55/23f2d0a2c96ed8665bf17a30ab4ce5270413f4d74b6d87dd663258b9af31/safetensors-0.6.2-cp38-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:89a89b505f335640f9120fac65ddeb83e40f1fd081cb8ed88b505bdccec8d0a1", size = 616154, upload-time = "2025-08-08T13:13:45.096Z" },
-    { url = "https://files.pythonhosted.org/packages/98/c6/affb0bd9ce02aa46e7acddbe087912a04d953d7a4d74b708c91b5806ef3f/safetensors-0.6.2-cp38-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fc4d0d0b937e04bdf2ae6f70cd3ad51328635fe0e6214aa1fc811f3b576b3bda", size = 520713, upload-time = "2025-08-08T13:13:46.25Z" },
-    { url = "https://files.pythonhosted.org/packages/fe/5d/5a514d7b88e310c8b146e2404e0dc161282e78634d9358975fd56dfd14be/safetensors-0.6.2-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8045db2c872db8f4cbe3faa0495932d89c38c899c603f21e9b6486951a5ecb8f", size = 485835, upload-time = "2025-08-08T13:13:49.373Z" },
-    { url = "https://files.pythonhosted.org/packages/7a/7b/4fc3b2ba62c352b2071bea9cfbad330fadda70579f617506ae1a2f129cab/safetensors-0.6.2-cp38-abi3-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:81e67e8bab9878bb568cffbc5f5e655adb38d2418351dc0859ccac158f753e19", size = 521503, upload-time = "2025-08-08T13:13:47.651Z" },
-    { url = "https://files.pythonhosted.org/packages/5a/50/0057e11fe1f3cead9254315a6c106a16dd4b1a19cd247f7cc6414f6b7866/safetensors-0.6.2-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:b0e4d029ab0a0e0e4fdf142b194514695b1d7d3735503ba700cf36d0fc7136ce", size = 652256, upload-time = "2025-08-08T13:13:53.167Z" },
-    { url = "https://files.pythonhosted.org/packages/e9/29/473f789e4ac242593ac1656fbece6e1ecd860bb289e635e963667807afe3/safetensors-0.6.2-cp38-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:fa48268185c52bfe8771e46325a1e21d317207bcabcb72e65c6e28e9ffeb29c7", size = 747281, upload-time = "2025-08-08T13:13:54.656Z" },
-    { url = "https://files.pythonhosted.org/packages/68/52/f7324aad7f2df99e05525c84d352dc217e0fa637a4f603e9f2eedfbe2c67/safetensors-0.6.2-cp38-abi3-musllinux_1_2_i686.whl", hash = "sha256:d83c20c12c2d2f465997c51b7ecb00e407e5f94d7dec3ea0cc11d86f60d3fde5", size = 692286, upload-time = "2025-08-08T13:13:55.884Z" },
-    { url = "https://files.pythonhosted.org/packages/ad/fe/cad1d9762868c7c5dc70c8620074df28ebb1a8e4c17d4c0cb031889c457e/safetensors-0.6.2-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:d944cea65fad0ead848b6ec2c37cc0b197194bec228f8020054742190e9312ac", size = 655957, upload-time = "2025-08-08T13:13:57.029Z" },
-    { url = "https://files.pythonhosted.org/packages/59/a7/e2158e17bbe57d104f0abbd95dff60dda916cf277c9f9663b4bf9bad8b6e/safetensors-0.6.2-cp38-abi3-win32.whl", hash = "sha256:cab75ca7c064d3911411461151cb69380c9225798a20e712b102edda2542ddb1", size = 308926, upload-time = "2025-08-08T13:14:01.095Z" },
-    { url = "https://files.pythonhosted.org/packages/2c/c3/c0be1135726618dc1e28d181b8c442403d8dbb9e273fd791de2d4384bcdd/safetensors-0.6.2-cp38-abi3-win_amd64.whl", hash = "sha256:c7b214870df923cbc1593c3faee16bec59ea462758699bd3fee399d00aac072c", size = 320192, upload-time = "2025-08-08T13:13:59.467Z" },
+sdist = { url = "https://files.pythonhosted.org/packages/29/9c/6e74567782559a63bd040a236edca26fd71bc7ba88de2ef35d75df3bca5e/safetensors-0.7.0.tar.gz", hash = "sha256:07663963b67e8bd9f0b8ad15bb9163606cd27cc5a1b96235a50d8369803b96b0", size = 200878, upload-time = "2025-11-19T15:18:43.199Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/fa/47/aef6c06649039accf914afef490268e1067ed82be62bcfa5b7e886ad15e8/safetensors-0.7.0-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:c82f4d474cf725255d9e6acf17252991c3c8aac038d6ef363a4bf8be2f6db517", size = 467781, upload-time = "2025-11-19T15:18:35.84Z" },
+    { url = "https://files.pythonhosted.org/packages/e8/00/374c0c068e30cd31f1e1b46b4b5738168ec79e7689ca82ee93ddfea05109/safetensors-0.7.0-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:94fd4858284736bb67a897a41608b5b0c2496c9bdb3bf2af1fa3409127f20d57", size = 447058, upload-time = "2025-11-19T15:18:34.416Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/06/578ffed52c2296f93d7fd2d844cabfa92be51a587c38c8afbb8ae449ca89/safetensors-0.7.0-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e07d91d0c92a31200f25351f4acb2bc6aff7f48094e13ebb1d0fb995b54b6542", size = 491748, upload-time = "2025-11-19T15:18:09.79Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/33/1debbbb70e4791dde185edb9413d1fe01619255abb64b300157d7f15dddd/safetensors-0.7.0-cp38-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8469155f4cb518bafb4acf4865e8bb9d6804110d2d9bdcaa78564b9fd841e104", size = 503881, upload-time = "2025-11-19T15:18:16.145Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/1c/40c2ca924d60792c3be509833df711b553c60effbd91da6f5284a83f7122/safetensors-0.7.0-cp38-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:54bef08bf00a2bff599982f6b08e8770e09cc012d7bba00783fc7ea38f1fb37d", size = 623463, upload-time = "2025-11-19T15:18:21.11Z" },
+    { url = "https://files.pythonhosted.org/packages/9b/3a/13784a9364bd43b0d61eef4bea2845039bc2030458b16594a1bd787ae26e/safetensors-0.7.0-cp38-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:42cb091236206bb2016d245c377ed383aa7f78691748f3bb6ee1bfa51ae2ce6a", size = 532855, upload-time = "2025-11-19T15:18:25.719Z" },
+    { url = "https://files.pythonhosted.org/packages/a0/60/429e9b1cb3fc651937727befe258ea24122d9663e4d5709a48c9cbfceecb/safetensors-0.7.0-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dac7252938f0696ddea46f5e855dd3138444e82236e3be475f54929f0c510d48", size = 507152, upload-time = "2025-11-19T15:18:33.023Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/a8/4b45e4e059270d17af60359713ffd83f97900d45a6afa73aaa0d737d48b6/safetensors-0.7.0-cp38-abi3-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1d060c70284127fa805085d8f10fbd0962792aed71879d00864acda69dbab981", size = 541856, upload-time = "2025-11-19T15:18:31.075Z" },
+    { url = "https://files.pythonhosted.org/packages/06/87/d26d8407c44175d8ae164a95b5a62707fcc445f3c0c56108e37d98070a3d/safetensors-0.7.0-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:cdab83a366799fa730f90a4ebb563e494f28e9e92c4819e556152ad55e43591b", size = 674060, upload-time = "2025-11-19T15:18:37.211Z" },
+    { url = "https://files.pythonhosted.org/packages/11/f5/57644a2ff08dc6325816ba7217e5095f17269dada2554b658442c66aed51/safetensors-0.7.0-cp38-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:672132907fcad9f2aedcb705b2d7b3b93354a2aec1b2f706c4db852abe338f85", size = 771715, upload-time = "2025-11-19T15:18:38.689Z" },
+    { url = "https://files.pythonhosted.org/packages/86/31/17883e13a814bd278ae6e266b13282a01049b0c81341da7fd0e3e71a80a3/safetensors-0.7.0-cp38-abi3-musllinux_1_2_i686.whl", hash = "sha256:5d72abdb8a4d56d4020713724ba81dac065fedb7f3667151c4a637f1d3fb26c0", size = 714377, upload-time = "2025-11-19T15:18:40.162Z" },
+    { url = "https://files.pythonhosted.org/packages/4a/d8/0c8a7dc9b41dcac53c4cbf9df2b9c83e0e0097203de8b37a712b345c0be5/safetensors-0.7.0-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:b0f6d66c1c538d5a94a73aa9ddca8ccc4227e6c9ff555322ea40bdd142391dd4", size = 677368, upload-time = "2025-11-19T15:18:41.627Z" },
+    { url = "https://files.pythonhosted.org/packages/05/e5/cb4b713c8a93469e3c5be7c3f8d77d307e65fe89673e731f5c2bfd0a9237/safetensors-0.7.0-cp38-abi3-win32.whl", hash = "sha256:c74af94bf3ac15ac4d0f2a7c7b4663a15f8c2ab15ed0fc7531ca61d0835eccba", size = 326423, upload-time = "2025-11-19T15:18:45.74Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/e6/ec8471c8072382cb91233ba7267fd931219753bb43814cbc71757bfd4dab/safetensors-0.7.0-cp38-abi3-win_amd64.whl", hash = "sha256:d1239932053f56f3456f32eb9625590cc7582e905021f94636202a864d470755", size = 341380, upload-time = "2025-11-19T15:18:44.427Z" },
+    { url = "https://files.pythonhosted.org/packages/a7/6a/4d08d89a6fcbe905c5ae68b8b34f0791850882fc19782d0d02c65abbdf3b/safetensors-0.7.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f4729811a6640d019a4b7ba8638ee2fd21fa5ca8c7e7bdf0fed62068fcaac737", size = 492430, upload-time = "2025-11-19T15:18:11.884Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/29/59ed8152b30f72c42d00d241e58eaca558ae9dbfa5695206e2e0f54c7063/safetensors-0.7.0-pp310-pypy310_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:12f49080303fa6bb424b362149a12949dfbbf1e06811a88f2307276b0c131afd", size = 503977, upload-time = "2025-11-19T15:18:17.523Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/0b/4811bfec67fa260e791369b16dab105e4bae82686120554cc484064e22b4/safetensors-0.7.0-pp310-pypy310_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0071bffba4150c2f46cae1432d31995d77acfd9f8db598b5d1a2ce67e8440ad2", size = 623890, upload-time = "2025-11-19T15:18:22.666Z" },
+    { url = "https://files.pythonhosted.org/packages/58/5b/632a58724221ef03d78ab65062e82a1010e1bef8e8e0b9d7c6d7b8044841/safetensors-0.7.0-pp310-pypy310_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:473b32699f4200e69801bf5abf93f1a4ecd432a70984df164fc22ccf39c4a6f3", size = 531885, upload-time = "2025-11-19T15:18:27.146Z" },
 ]
 
 [[package]]
@@ -4991,7 +4912,7 @@ resolution-markers = [
     "python_full_version < '3.11' and sys_platform != 'linux'",
 ]
 dependencies = [
-    { name = "numpy", marker = "python_full_version < '3.11'" },
+    { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/0f/37/6964b830433e654ec7485e45a00fc9a27cf868d622838f6b6d9c5ec0d532/scipy-1.15.3.tar.gz", hash = "sha256:eae3cf522bc7df64b42cad3925c876e1b0b6c35c1337c93e12c0f366f55b0eaf", size = 59419214, upload-time = "2025-05-08T16:13:05.955Z" }
 wheels = [
@@ -5047,21 +4968,17 @@ name = "scipy"
 version = "1.16.3"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
-    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform == 'linux'",
-    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform == 'linux'",
-    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform == 'linux'",
-    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform == 'linux'",
+    "python_full_version >= '3.14' and sys_platform == 'linux'",
+    "python_full_version == '3.13.*' and sys_platform == 'linux'",
     "python_full_version == '3.12.*' and sys_platform == 'linux'",
-    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform != 'linux'",
-    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform != 'linux'",
-    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform != 'linux'",
-    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform != 'linux'",
+    "python_full_version >= '3.14' and sys_platform != 'linux'",
+    "python_full_version == '3.13.*' and sys_platform != 'linux'",
     "python_full_version == '3.12.*' and sys_platform != 'linux'",
     "python_full_version == '3.11.*' and sys_platform == 'linux'",
     "python_full_version == '3.11.*' and sys_platform != 'linux'",
 ]
 dependencies = [
-    { name = "numpy", marker = "python_full_version >= '3.11'" },
+    { name = "numpy", version = "2.3.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/0a/ca/d8ace4f98322d01abcd52d381134344bf7b431eba7ed8b42bdea5a3c2ac9/scipy-1.16.3.tar.gz", hash = "sha256:01e87659402762f43bd2fee13370553a17ada367d42e7487800bf2916535aecb", size = 30597883, upload-time = "2025-10-28T17:38:54.068Z" }
 wheels = [
@@ -5193,15 +5110,15 @@ wheels = [
 
 [[package]]
 name = "sentry-sdk"
-version = "2.43.0"
+version = "2.46.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "certifi" },
     { name = "urllib3" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/b3/18/09875b4323b03ca9025bae7e6539797b27e4fc032998a466b4b9c3d24653/sentry_sdk-2.43.0.tar.gz", hash = "sha256:52ed6e251c5d2c084224d73efee56b007ef5c2d408a4a071270e82131d336e20", size = 368953, upload-time = "2025-10-29T11:26:08.156Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/7c/d7/c140a5837649e2bf2ec758494fde1d9a016c76777eab64e75ef38d685bbb/sentry_sdk-2.46.0.tar.gz", hash = "sha256:91821a23460725734b7741523021601593f35731808afc0bb2ba46c27b8acd91", size = 374761, upload-time = "2025-11-24T09:34:13.932Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/69/31/8228fa962f7fd8814d634e4ebece8780e2cdcfbdf0cd2e14d4a6861a7cd5/sentry_sdk-2.43.0-py2.py3-none-any.whl", hash = "sha256:4aacafcf1756ef066d359ae35030881917160ba7f6fc3ae11e0e58b09edc2d5d", size = 400997, upload-time = "2025-10-29T11:26:05.77Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/b6/ce7c502a366f4835b1f9c057753f6989a92d3c70cbadb168193f5fb7499b/sentry_sdk-2.46.0-py2.py3-none-any.whl", hash = "sha256:4eeeb60198074dff8d066ea153fa6f241fef1668c10900ea53a4200abc8da9b1", size = 406266, upload-time = "2025-11-24T09:34:12.114Z" },
 ]
 
 [[package]]
@@ -5233,11 +5150,11 @@ wheels = [
 
 [[package]]
 name = "slack-sdk"
-version = "3.37.0"
+version = "3.39.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/8e/c2/0a174a155623d7dc3ed4d1360cdf755590acdc2c3fc9ce0d2340f468909f/slack_sdk-3.37.0.tar.gz", hash = "sha256:242d6cffbd9e843af807487ff04853189b812081aeaa22f90a8f159f20220ed9", size = 241612, upload-time = "2025-10-06T23:07:20.856Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/b6/dd/645f3eb93fce38eadbb649e85684730b1fc3906c2674ca59bddc2ca2bd2e/slack_sdk-3.39.0.tar.gz", hash = "sha256:6a56be10dc155c436ff658c6b776e1c082e29eae6a771fccf8b0a235822bbcb1", size = 247207, upload-time = "2025-11-20T15:27:57.556Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/07/fd/a502ee24d8c7d12a8f749878ae0949b8eeb50aeac22dc5a613d417a256d0/slack_sdk-3.37.0-py2.py3-none-any.whl", hash = "sha256:e108a0836eafda74d8a95e76c12c2bcb010e645d504d8497451e4c7ebb229c87", size = 302751, upload-time = "2025-10-06T23:07:19.542Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/1f/32bcf088e535c1870b1a1f2e3b916129c66fdfe565a793316317241d41e5/slack_sdk-3.39.0-py2.py3-none-any.whl", hash = "sha256:b1556b2f5b8b12b94e5ea3f56c4f2c7f04462e4e1013d325c5764ff118044fa8", size = 309850, upload-time = "2025-11-20T15:27:55.729Z" },
 ]
 
 [[package]]
@@ -5282,7 +5199,8 @@ version = "0.13.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "cffi" },
-    { name = "numpy" },
+    { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.3.5", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/e1/41/9b873a8c055582859b239be17902a85339bec6a30ad162f98c9b0288a2cc/soundfile-0.13.1.tar.gz", hash = "sha256:b2c68dab1e30297317080a5b43df57e302584c49e2942defdde0acccc53f0e5b", size = 46156, upload-time = "2025-01-25T09:17:04.831Z" }
 wheels = [
@@ -5341,44 +5259,14 @@ name = "sphinx"
 version = "8.2.3"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
-    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.12.*' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.12.*' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.11.*' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.11.*' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.12.*' and sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.12.*' and sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.11.*' and sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.11.*' and sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.12.*' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.12.*' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.12.*' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.12.*' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.11.*' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.11.*' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version >= '3.14' and sys_platform == 'linux'",
+    "python_full_version == '3.13.*' and sys_platform == 'linux'",
+    "python_full_version == '3.12.*' and sys_platform == 'linux'",
+    "python_full_version >= '3.14' and sys_platform != 'linux'",
+    "python_full_version == '3.13.*' and sys_platform != 'linux'",
+    "python_full_version == '3.12.*' and sys_platform != 'linux'",
+    "python_full_version == '3.11.*' and sys_platform == 'linux'",
+    "python_full_version == '3.11.*' and sys_platform != 'linux'",
 ]
 dependencies = [
     { name = "alabaster", marker = "python_full_version >= '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
@@ -5430,44 +5318,14 @@ name = "sphinx-autobuild"
 version = "2025.8.25"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
-    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.12.*' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.12.*' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.11.*' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.11.*' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.12.*' and sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.12.*' and sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.11.*' and sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.11.*' and sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.12.*' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.12.*' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.12.*' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.12.*' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.11.*' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.11.*' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
+    "python_full_version >= '3.14' and sys_platform == 'linux'",
+    "python_full_version == '3.13.*' and sys_platform == 'linux'",
+    "python_full_version == '3.12.*' and sys_platform == 'linux'",
+    "python_full_version >= '3.14' and sys_platform != 'linux'",
+    "python_full_version == '3.13.*' and sys_platform != 'linux'",
+    "python_full_version == '3.12.*' and sys_platform != 'linux'",
+    "python_full_version == '3.11.*' and sys_platform == 'linux'",
+    "python_full_version == '3.11.*' and sys_platform != 'linux'",
 ]
 dependencies = [
     { name = "colorama", marker = "python_full_version >= '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
@@ -5565,15 +5423,24 @@ wheels = [
 
 [[package]]
 name = "starlette"
-version = "0.49.3"
+version = "0.50.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "anyio" },
-    { name = "typing-extensions", marker = "python_full_version < '3.13' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/de/1a/608df0b10b53b0beb96a37854ee05864d182ddd4b1156a22f1ad3860425a/starlette-0.49.3.tar.gz", hash = "sha256:1c14546f299b5901a1ea0e34410575bc33bbd741377a10484a54445588d00284", size = 2655031, upload-time = "2025-11-01T15:12:26.13Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/ba/b8/73a0e6a6e079a9d9cfa64113d771e421640b6f679a52eeb9b32f72d871a1/starlette-0.50.0.tar.gz", hash = "sha256:a2a17b22203254bcbc2e1f926d2d55f3f9497f769416b3190768befe598fa3ca", size = 2646985, upload-time = "2025-11-01T15:25:27.516Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/a3/e0/021c772d6a662f43b63044ab481dc6ac7592447605b5b35a957785363122/starlette-0.49.3-py3-none-any.whl", hash = "sha256:b579b99715fdc2980cf88c8ec96d3bf1ce16f5a8051a7c2b84ef9b1cdecaea2f", size = 74340, upload-time = "2025-11-01T15:12:24.387Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/52/1064f510b141bd54025f9b55105e26d1fa970b9be67ad766380a3c9b74b0/starlette-0.50.0-py3-none-any.whl", hash = "sha256:9e5391843ec9b6e472eed1365a78c8098cfceb7a74bfd4d6b1c0c0095efb3bca", size = 74033, upload-time = "2025-11-01T15:25:25.461Z" },
+]
+
+[[package]]
+name = "strenum"
+version = "0.4.15"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/85/ad/430fb60d90e1d112a62ff57bdd1f286ec73a2a0331272febfddd21f330e1/StrEnum-0.4.15.tar.gz", hash = "sha256:878fb5ab705442070e4dd1929bb5e2249511c0bcf2b0eeacf3bcd80875c82eff", size = 23384, upload-time = "2023-06-29T22:02:58.399Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/81/69/297302c5f5f59c862faa31e6cb9a4cd74721cd1e052b38e464c5b402df8b/StrEnum-0.4.15-py3-none-any.whl", hash = "sha256:a30cda4af7cc6b5bf52c8055bc4bf4b2b6b14a93b574626da33df53cf7740659", size = 8851, upload-time = "2023-06-29T22:02:56.947Z" },
 ]
 
 [[package]]
@@ -5581,7 +5448,7 @@ name = "sympy"
 version = "1.14.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "mpmath", marker = "sys_platform != 'linux'" },
+    { name = "mpmath" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/83/d3/803453b36afefb7c2bb238361cd4ae6125a569b4db67cd9e79846ba2d68c/sympy-1.14.0.tar.gz", hash = "sha256:d3d3fe8df1e5a0b42f0e7bdf50541697dbe7d23746e894990c030e2b05e72517", size = 7793921, upload-time = "2025-04-27T18:05:01.611Z" }
 wheels = [
@@ -5605,7 +5472,8 @@ dependencies = [
     { name = "absl-py" },
     { name = "grpcio" },
     { name = "markdown" },
-    { name = "numpy" },
+    { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.3.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "packaging" },
     { name = "pillow" },
     { name = "protobuf" },
@@ -5627,63 +5495,17 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/73/c6/825dab04195756cf8ff2e12698f22513b3db2f64925bdd41671bfb33aaa5/tensorboard_data_server-0.7.2-py3-none-manylinux_2_31_x86_64.whl", hash = "sha256:ef687163c24185ae9754ed5650eb5bc4d84ff257aabdc33f0cc6f74d8ba54530", size = 6590363, upload-time = "2023-10-23T21:23:35.583Z" },
 ]
 
-[[package]]
-name = "tensorstore"
-version = "0.1.74"
-source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform == 'linux'",
-    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform == 'linux'",
-    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform == 'linux'",
-    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform == 'linux'",
-    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform != 'linux'",
-    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform != 'linux'",
-    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform != 'linux'",
-    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform != 'linux'",
-]
-dependencies = [
-    { name = "ml-dtypes", version = "0.4.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.13'" },
-    { name = "numpy", marker = "python_full_version >= '3.13'" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/3c/b9/ea25aba62c688a87d7d7d9cc5926d602e2f9e84fa72586825486fb180b7e/tensorstore-0.1.74.tar.gz", hash = "sha256:a062875f27283d30ce4959c408c253ecb336fce8e3f9837c064e3d30cda79203", size = 6795605, upload-time = "2025-04-24T15:42:18.829Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/f2/20/1e7e776dc30f2f07416223c12f9ad244ec539af5fa1fbef9320812a9a3b6/tensorstore-0.1.74-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:edfae80aceb05640ac2209a11a4b76cecd5d9c4a95c01ede8c89c8edaa90f9d5", size = 15292660, upload-time = "2025-04-24T15:41:18.253Z" },
-    { url = "https://files.pythonhosted.org/packages/76/cc/81bf2d6a4caa239d38905b439864d3a8bf06b27d6d31bb2396e3f4f5cc55/tensorstore-0.1.74-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ab985d767d53e9478987c23dc7aea8f7e8aed2ef90ec8f7f939e8b399667feb1", size = 13260438, upload-time = "2025-04-24T15:41:22.596Z" },
-    { url = "https://files.pythonhosted.org/packages/88/4c/a26c4c8b8e7573d2b552505cd46a658b9a68a80d88e9d3c68f16d10e4d62/tensorstore-0.1.74-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d16d1181c292ea065ebd203e823420c65e365d0407eea8f0a3dd82995da0cc65", size = 17041531, upload-time = "2025-04-24T15:41:25.492Z" },
-    { url = "https://files.pythonhosted.org/packages/e4/a9/3859b1b497dacf2093e196e1d4ed3b95e8553c7d7c9fe1f88216c72253a9/tensorstore-0.1.74-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f327e813152705b5297f251824a91106e17a06fd2f6b5f6e94c6401c5937da8c", size = 18392852, upload-time = "2025-04-24T15:41:28.136Z" },
-    { url = "https://files.pythonhosted.org/packages/2d/3b/b7494ea0a37dd4cd3721f104fc52d4c953354b801eb1adf08e40bc08aaa0/tensorstore-0.1.74-cp310-cp310-win_amd64.whl", hash = "sha256:e56e9690cc20463951a52a6908e18056a93ce5bcd4a881834e2b5962801a1125", size = 12429998, upload-time = "2025-04-24T15:41:30.794Z" },
-    { url = "https://files.pythonhosted.org/packages/0d/3e/d67bb3d9bb7409469d15fb90ef5756e6ac8b835af7f27c02fc542c4b4059/tensorstore-0.1.74-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:8353e619d9140ca50fc0cb5b846e07c68462dd5015b4714752a0a664e48a03d3", size = 15294582, upload-time = "2025-04-24T15:41:33.794Z" },
-    { url = "https://files.pythonhosted.org/packages/01/f4/49cb5ea8e63303fcb0a6ebf0ed546aaec63982a4abca0e9801da5e3a24e3/tensorstore-0.1.74-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a3ad1bfbb257ab84de1a5c9b79a60cebb5fbb7a411ddb1c246c21c9795789ba1", size = 13261395, upload-time = "2025-04-24T15:41:36.372Z" },
-    { url = "https://files.pythonhosted.org/packages/ad/7b/9c12d4687e6ff19222f12719286c13a546f1714e5dbed75d52a4267534ed/tensorstore-0.1.74-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f3ad9daf4c757db41ad091a1a5502807baeb848be0937986d8766049c39c8466", size = 17042621, upload-time = "2025-04-24T15:41:39.284Z" },
-    { url = "https://files.pythonhosted.org/packages/b5/07/cf0dc4540a78bc715fbcf4417c5dc708f3d12ed1664bf117f22463f411fc/tensorstore-0.1.74-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0a35364804e7d71bf5e86d2dae4de04c90249b61ff71448b9713b4e72b2389bd", size = 18393581, upload-time = "2025-04-24T15:41:42.554Z" },
-    { url = "https://files.pythonhosted.org/packages/ac/42/edf004c5a101e021f052ea3564250d773d7cf6458f92934456ffa967383f/tensorstore-0.1.74-cp311-cp311-win_amd64.whl", hash = "sha256:15dcb6ce282e32d005caad34d595b0be070947578448a2861c63fdd608fc7394", size = 12431849, upload-time = "2025-04-24T15:41:45.263Z" },
-    { url = "https://files.pythonhosted.org/packages/a1/14/2e6d1cad744af9e9a1a78d881a908a859ad95b61b15de10397069f55fbd8/tensorstore-0.1.74-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:7218722ee5d74e4d01f357917d3b1b7b1d6b1c068aa73e3d801cb3d58fc45116", size = 15334307, upload-time = "2025-04-24T15:41:48.315Z" },
-    { url = "https://files.pythonhosted.org/packages/b2/ac/8d572b8c6d689eb50db0252e9d35ee6278a6aed481b64d7e025cf51e32c4/tensorstore-0.1.74-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a6926554a8633d0210bdba619d3996fff6a6af4214237fbca626e6ddfcc8ea39", size = 13288669, upload-time = "2025-04-24T15:41:50.808Z" },
-    { url = "https://files.pythonhosted.org/packages/9d/6c/3e76d614ad70b61670686d91abaa3ddee6b01255bf2b40f050beb15b7970/tensorstore-0.1.74-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d584e468eb4ef8195f5d21a9da4780cf96c6074b87ef219b43a89efce3d503ca", size = 17031720, upload-time = "2025-04-24T15:41:55.092Z" },
-    { url = "https://files.pythonhosted.org/packages/31/f3/09d7c3ad7c9517f89b5be9b4460b83333e98dce1c9ab0a52464ded0bab67/tensorstore-0.1.74-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e0af2225431d59f8a2bb4db4c1519252f10ee407e6550875d78212d3d34ee743", size = 18378829, upload-time = "2025-04-24T15:41:58.167Z" },
-    { url = "https://files.pythonhosted.org/packages/a7/f2/45ece38705280ed9ebf4ccaf084ed1e76e35b1eeec8c510e589978ac8dcd/tensorstore-0.1.74-cp312-cp312-win_amd64.whl", hash = "sha256:4e35f3679873cdc488aae20b9ae2cea4589c7b147a80edb07eb3f09eba47d43d", size = 12432300, upload-time = "2025-04-24T15:42:00.761Z" },
-    { url = "https://files.pythonhosted.org/packages/fb/e9/a08c6a6eb7d6b4b26053d4575196a06c6fccf4e89f9bc625f81e7c91bb5d/tensorstore-0.1.74-cp313-cp313-macosx_10_14_x86_64.whl", hash = "sha256:f7d2c80de9ab352ca14aeca798d6650c5670725e6f8eac73f4fcc8f3147ca614", size = 15334469, upload-time = "2025-04-24T15:42:03.731Z" },
-    { url = "https://files.pythonhosted.org/packages/9a/a9/64b90c6e66e0b8043e641090144c6614b0c78d9a719b9110d953d13a516d/tensorstore-0.1.74-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:ceef7d2dcfd1caf61356f7eeb9a37896b4825b4be2750b00615cf5fb1ae47a8b", size = 13288791, upload-time = "2025-04-24T15:42:06.145Z" },
-    { url = "https://files.pythonhosted.org/packages/62/e8/226cfc25d7eac00e783ff2ee4994830c4a42cd8690e207c4a8b93210f3d9/tensorstore-0.1.74-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e71637002a806bc1b0f0f05556d1c33493a43f3ab35f9632b3d48855677d93dc", size = 17031815, upload-time = "2025-04-24T15:42:09.239Z" },
-    { url = "https://files.pythonhosted.org/packages/9a/09/dce8a0942d84f6bb039b5ea3e8bc6a479b1a9535cd216b0d42dd03c4f761/tensorstore-0.1.74-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c799edf9000aee68d6676e3d2f73d4e1a56fc817c47e150732f6d3bd2b1ef46d", size = 18378091, upload-time = "2025-04-24T15:42:13.546Z" },
-    { url = "https://files.pythonhosted.org/packages/a6/23/5218575d25de9d8debfb3faf290a1e3b9a7b6be9e77ba07ff3a63a0bc899/tensorstore-0.1.74-cp313-cp313-win_amd64.whl", hash = "sha256:5da86437ffa1ee0f0c590c38daa2f4b548890ce66b1f470ac98714cb0eabdbf5", size = 12432635, upload-time = "2025-04-24T15:42:16.275Z" },
-]
-
 [[package]]
 name = "tensorstore"
 version = "0.1.78"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
-    "python_full_version == '3.12.*' and sys_platform == 'linux'",
-    "python_full_version == '3.12.*' and sys_platform != 'linux'",
-    "python_full_version == '3.11.*' and sys_platform == 'linux'",
-    "python_full_version == '3.11.*' and sys_platform != 'linux'",
     "python_full_version < '3.11' and sys_platform == 'linux'",
     "python_full_version < '3.11' and sys_platform != 'linux'",
 ]
 dependencies = [
-    { name = "ml-dtypes", version = "0.5.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.13'" },
-    { name = "numpy", marker = "python_full_version < '3.13'" },
+    { name = "ml-dtypes", marker = "python_full_version < '3.11'" },
+    { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/9f/ee/05eb424437f4db63331c90e4605025eedc0f71da3faff97161d5d7b405af/tensorstore-0.1.78.tar.gz", hash = "sha256:e26074ffe462394cf54197eb76d6569b500f347573cd74da3f4dd5f510a4ad7c", size = 6913502, upload-time = "2025-10-06T17:44:29.649Z" }
 wheels = [
@@ -5709,6 +5531,48 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/db/a2/dbd1af0e97d5d549051309d72c6e3f2fe81fae636f9db3692d21adc9c731/tensorstore-0.1.78-cp313-cp313-win_amd64.whl", hash = "sha256:e0073de8fa3074bc4cc92ced0210310fd89851899faf42a5ba256f0ba87d095c", size = 12711250, upload-time = "2025-10-06T17:44:27.926Z" },
 ]
 
+[[package]]
+name = "tensorstore"
+version = "0.1.79"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version >= '3.14' and sys_platform == 'linux'",
+    "python_full_version == '3.13.*' and sys_platform == 'linux'",
+    "python_full_version == '3.12.*' and sys_platform == 'linux'",
+    "python_full_version >= '3.14' and sys_platform != 'linux'",
+    "python_full_version == '3.13.*' and sys_platform != 'linux'",
+    "python_full_version == '3.12.*' and sys_platform != 'linux'",
+    "python_full_version == '3.11.*' and sys_platform == 'linux'",
+    "python_full_version == '3.11.*' and sys_platform != 'linux'",
+]
+dependencies = [
+    { name = "ml-dtypes", marker = "python_full_version >= '3.11'" },
+    { name = "numpy", version = "2.3.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/26/2c/50ab489a0862ca88d2d766130a6fec45ccd5174f0e04081d8b7b07a8aedd/tensorstore-0.1.79.tar.gz", hash = "sha256:8dad44a8a7f2952a5d0030a8bd868b3cfdff048bd40ab53e7226f3d8b0881c5e", size = 7075782, upload-time = "2025-11-11T22:05:23.824Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/68/a9/1695d7ea197c4568c2f02f34b203eef702ec8080422331f00a65c6fb2a37/tensorstore-0.1.79-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:11a2c62694ea9c21770bc5a09938d3d15c4b9662b738ae6e1e513c26ed96251a", size = 16466511, upload-time = "2025-11-11T22:04:18.614Z" },
+    { url = "https://files.pythonhosted.org/packages/db/0e/5ce8a615c7f9ad7cf8ed4ac6e182fe0ef46fd06fef89757e49ba84a6ba9e/tensorstore-0.1.79-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5e152d334bf34fbabdfe8e5bc35b87d1f9947065924ff83c29e659308b36e948", size = 14499810, upload-time = "2025-11-11T22:04:21.725Z" },
+    { url = "https://files.pythonhosted.org/packages/c0/29/2cb9552138fe84ab29421489121350e4af0502eafff31ccd9017490be0d8/tensorstore-0.1.79-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c4230b8fd29795e88e441f749d881973eca8dadf33c5262b367839fb8891f79b", size = 18937510, upload-time = "2025-11-11T22:04:24.221Z" },
+    { url = "https://files.pythonhosted.org/packages/42/70/d2a672a93faebdd176cd8541405cd5614b14d3d8dc812fbeaf2cf46d390a/tensorstore-0.1.79-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:83072ee0e551d6dca582e154b64c8b8066d276ec0759784e3149c28212a61f18", size = 20910324, upload-time = "2025-11-11T22:04:26.769Z" },
+    { url = "https://files.pythonhosted.org/packages/91/d5/7958cbfb614c4ffa5070ae9575874d46937067c0d81a7739e67fb1d62de5/tensorstore-0.1.79-cp311-cp311-win_amd64.whl", hash = "sha256:6c98c6b74c00e00eba7969292144e471d5c45d67088f0dc08e3a4c60a15ee191", size = 13206191, upload-time = "2025-11-11T22:04:29.254Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/a2/a77be16b4a882ace36da0748305795f35306bdad568472f208bd89b96b9d/tensorstore-0.1.79-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:71aa9b45436d888c37b965f7b71195916d15438119b7dccb66a3b0776bfba367", size = 16485740, upload-time = "2025-11-11T22:04:33.478Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/e4/7fe268ec41aa70b71a1c56b1ec83346fbcbf12f4bfbefc79d14fb9c03408/tensorstore-0.1.79-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:108c0e867aa2c87d4982cc6325a2de0c4f5bd63c2bea18adb193a370c40594ce", size = 14508736, upload-time = "2025-11-11T22:04:38.613Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/f1/b1248dae02598ce534834413e841f915a32ab185c36ecd05e4c67bdc8d19/tensorstore-0.1.79-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:debd435042c00be68ba1fb3cf59325a7babb3f4a3cf4744c87dde346802cbbb4", size = 18947817, upload-time = "2025-11-11T22:04:40.768Z" },
+    { url = "https://files.pythonhosted.org/packages/87/4a/60e234147570e21bbab4ac70ab79dd794a5ef9a4945d36c34c1914a73205/tensorstore-0.1.79-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:608f7178ec6e4e4a3c26545b0a44f44bf83438d04bf2d960cd0e7699eaa99ef6", size = 20929832, upload-time = "2025-11-11T22:04:43.613Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/48/0531868bce12a2f520002e810d4200ec6f01ba33a2f27b6bd7289fbc197b/tensorstore-0.1.79-cp312-cp312-win_amd64.whl", hash = "sha256:a071c6c255b7e412957a6aa563bc4250242c7894edad06ae6358e3d30b7d88ce", size = 13211970, upload-time = "2025-11-11T22:04:46.179Z" },
+    { url = "https://files.pythonhosted.org/packages/fa/0b/54a44e55836d8e8f576343134c0e3db71c6c837d39a0ac44699aba5b01df/tensorstore-0.1.79-cp313-cp313-macosx_10_14_x86_64.whl", hash = "sha256:1e8e2d098829919caac6a62cf568902e34789069ceddb28497d6e36ebcb95c0b", size = 16485855, upload-time = "2025-11-11T22:04:48.734Z" },
+    { url = "https://files.pythonhosted.org/packages/04/59/cadb9a45896d480882476df4759cda1659c70669aff87a4d5a4a07ded084/tensorstore-0.1.79-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:29cf4336153af136ac8ac528e2ed46df19367edae7e14e37bca1a8b7c4848ef2", size = 14508277, upload-time = "2025-11-11T22:04:50.775Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/cb/3647bdd03c7692882ebc10c19df9ede49f290c216b2906f785edbdb53ef1/tensorstore-0.1.79-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:94d8fc9df1721b0287046aca7209fd5040889cad4202e7b73a1fdb77cd9b71c6", size = 18949307, upload-time = "2025-11-11T22:04:53.145Z" },
+    { url = "https://files.pythonhosted.org/packages/20/a0/f91ac492cf2ee9f7541aefaaed4ad1258e73e33f3cd3e06cdce5859431db/tensorstore-0.1.79-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c9f2dc3342e4686af98f6e259dc9fb377f1bf657b649c247bf6647bbe4f98090", size = 20930427, upload-time = "2025-11-11T22:04:55.353Z" },
+    { url = "https://files.pythonhosted.org/packages/69/a6/752fd11747eb9fead715b02d389da7fb180a56172b885de0b48b20237d1e/tensorstore-0.1.79-cp313-cp313-win_amd64.whl", hash = "sha256:0fd6165f3df49abc7c9de029b2b72d74bebd2ff2481a5ced003607eb61c56d3e", size = 13212196, upload-time = "2025-11-11T22:05:00.451Z" },
+    { url = "https://files.pythonhosted.org/packages/46/57/1649019893accb3f195780fec55b8bf6793343faf140040bc73f1c28d6a5/tensorstore-0.1.79-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:6f8f5a940eab434a951c2dadcc7c0516c7bef6d8b7a7144054f7a0c56152b5f5", size = 16488849, upload-time = "2025-11-11T22:05:03.014Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/23/2668cb120e855a6a7a8a5eb0eba30e2e7020da932a4d3fa13c6ee3c41f9f/tensorstore-0.1.79-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:97756d2cba3c5ce21e15602c2af5a02521cc0ecda7f9fb6d18da2f3bd51827f4", size = 14511448, upload-time = "2025-11-11T22:05:05.58Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/0e/c38f079f3933cc284aab53d52976f6cb4f1ad43bb6a704ac27e0b710f176/tensorstore-0.1.79-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:847982652273fb7b2d694b789205747aaf3e50ae64738c5cb7b5eb03d86a9947", size = 18949282, upload-time = "2025-11-11T22:05:07.562Z" },
+    { url = "https://files.pythonhosted.org/packages/6f/99/03479deea5bfd27a0d8a8c75d5f1d85417a7bbc9c6c7a90fb85b4a4e347a/tensorstore-0.1.79-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7af9422269c2bfcdecf9dd55309060665ab9c2d7f6c892377ed32c032400feea", size = 20931601, upload-time = "2025-11-11T22:05:10.098Z" },
+    { url = "https://files.pythonhosted.org/packages/26/36/2617edf6c6d6fc73b3ff96d9d0b97332adf0d0c56fa2014a226bf4f7dfa6/tensorstore-0.1.79-cp314-cp314-win_amd64.whl", hash = "sha256:bbd8c1ab7d2e3c03ded3d40bb373ee9a67668e33a564484927865ce43b210386", size = 13599766, upload-time = "2025-11-11T22:05:12.265Z" },
+]
+
 [[package]]
 name = "tiktoken"
 version = "0.12.0"
@@ -5864,48 +5728,63 @@ wheels = [
 
 [[package]]
 name = "torch"
-version = "2.9.0"
+version = "2.9.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "filelock", marker = "sys_platform != 'linux'" },
-    { name = "fsspec", marker = "sys_platform != 'linux'" },
-    { name = "jinja2", marker = "sys_platform != 'linux'" },
-    { name = "networkx", version = "3.4.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and sys_platform != 'linux') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "networkx", version = "3.5", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and sys_platform != 'linux') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "setuptools", marker = "python_full_version >= '3.12' and sys_platform != 'linux'" },
-    { name = "sympy", marker = "sys_platform != 'linux'" },
+    { name = "filelock" },
+    { name = "fsspec" },
+    { name = "jinja2" },
+    { name = "networkx", version = "3.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "networkx", version = "3.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "nvidia-cublas-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-cuda-cupti-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-cuda-nvrtc-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-cuda-runtime-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-cudnn-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-cufft-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-cufile-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-curand-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-cusolver-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-cusparse-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-cusparselt-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-nccl-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-nvjitlink-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-nvshmem-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-nvtx-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "setuptools", marker = "python_full_version >= '3.12'" },
+    { name = "sympy" },
     { name = "triton", marker = "sys_platform == 'never'" },
-    { name = "typing-extensions", marker = "sys_platform != 'linux'" },
-]
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/bb/86/245c240d2138c17ed572c943c289056c2721abab70810d772c6bf5495b28/torch-2.9.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:030bbfe367379ae6a4ae4042b6c44da25383343b8b3c68abaa9c7231efbaf2dd", size = 104213554, upload-time = "2025-10-15T15:45:59.798Z" },
-    { url = "https://files.pythonhosted.org/packages/58/1d/fd1e88ae0948825efcab7dd66d12bec23f05d4d38ed81573c8d453c14c06/torch-2.9.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:51cb63902182a78e90886e8068befd8ea102af4b00e420263591a3d70c7d3c6c", size = 899795167, upload-time = "2025-10-15T15:47:12.695Z" },
-    { url = "https://files.pythonhosted.org/packages/63/5a/496197b45c14982bef4e079b24c61dc108e3ab0d0cc9718dba9f54f45a46/torch-2.9.0-cp310-cp310-win_amd64.whl", hash = "sha256:3f6aad4d2f0ee2248bac25339d74858ff846c3969b27d14ac235821f055af83d", size = 109310314, upload-time = "2025-10-15T15:46:16.633Z" },
-    { url = "https://files.pythonhosted.org/packages/58/b0/2b4e647b0fc706e88eb6c253d05511865578f5f67b55fad639bf3272a4a1/torch-2.9.0-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:413e1654c9203733138858780e184d9fc59442f0b3b209e16f39354eb893db9b", size = 74452019, upload-time = "2025-10-15T15:46:04.296Z" },
-    { url = "https://files.pythonhosted.org/packages/58/fe/334225e6330e672b36aef23d77451fa906ea12881570c08638a91331a212/torch-2.9.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:c596708b5105d0b199215acf0c9be7c1db5f1680d88eddadf4b75a299259a677", size = 104230578, upload-time = "2025-10-15T15:46:08.182Z" },
-    { url = "https://files.pythonhosted.org/packages/05/cc/49566caaa218872ec9a2912456f470ff92649894a4bc2e5274aa9ef87c4a/torch-2.9.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:51de31219c97c51cf4bf2be94d622e3deb5dcc526c6dc00e97c17eaec0fc1d67", size = 899815990, upload-time = "2025-10-15T15:48:03.336Z" },
-    { url = "https://files.pythonhosted.org/packages/74/25/e9ab21d5925b642d008f139d4a3c9664fc9ee1faafca22913c080cc4c0a5/torch-2.9.0-cp311-cp311-win_amd64.whl", hash = "sha256:dd515c70059afd95f48b8192733764c08ca37a1d19803af6401b5ecad7c8676e", size = 109313698, upload-time = "2025-10-15T15:46:12.425Z" },
-    { url = "https://files.pythonhosted.org/packages/b3/b7/205ef3e94de636feffd64b28bb59a0dfac0771221201b9871acf9236f5ca/torch-2.9.0-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:614a185e4986326d526a91210c8fc1397e76e8cfafa78baf6296a790e53a9eec", size = 74463678, upload-time = "2025-10-15T15:46:29.779Z" },
-    { url = "https://files.pythonhosted.org/packages/d1/d3/3985739f3b8e88675127bf70f82b3a48ae083e39cda56305dbd90398fec0/torch-2.9.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:e5f7af1dc4c0a7c4a260c2534f41ddaf209714f7c89145e644c44712fbd6b642", size = 104107898, upload-time = "2025-10-15T15:46:20.883Z" },
-    { url = "https://files.pythonhosted.org/packages/a5/4b/f4bb2e6c25d0272f798cd6d7a04ed315da76cec68c602d87040c7847287f/torch-2.9.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:01cff95ecd9a212ea2f141db28acccdceb6a4c54f64e6c51091146f5e2a772c6", size = 899738273, upload-time = "2025-10-15T15:50:04.188Z" },
-    { url = "https://files.pythonhosted.org/packages/66/11/c1c5ba6691cda6279087c35bd626536e4fd29521fe740abf5008377a9a02/torch-2.9.0-cp312-cp312-win_amd64.whl", hash = "sha256:4582b162f541651f0cb184d3e291c05c2f556c7117c64a9873e2ee158d40062b", size = 109280887, upload-time = "2025-10-15T15:46:26.228Z" },
-    { url = "https://files.pythonhosted.org/packages/dd/5f/b85bd8c05312d71de9402bf5868d217c38827cfd09d8f8514e5be128a52b/torch-2.9.0-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:33f58e9a102a91259af289d50525c30323b5c9ae1d31322b6447c0814da68695", size = 74478983, upload-time = "2025-10-15T15:46:39.406Z" },
-    { url = "https://files.pythonhosted.org/packages/c2/1c/90eb13833cdf4969ea9707586d7b57095c3b6e2b223a7256bf111689bcb8/torch-2.9.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:c30a17fc83eeab346913e237c64b15b5ba6407fff812f6c541e322e19bc9ea0e", size = 104111330, upload-time = "2025-10-15T15:46:35.238Z" },
-    { url = "https://files.pythonhosted.org/packages/0e/21/2254c54b8d523592c25ef4434769aa23e29b1e6bf5f4c0ad9e27bf442927/torch-2.9.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:8f25033b8667b57857dfd01458fbf2a9e6a6df1f8def23aef0dc46292f6aa642", size = 899750243, upload-time = "2025-10-15T15:48:57.459Z" },
-    { url = "https://files.pythonhosted.org/packages/b7/a5/5cb94fa4fd1e78223455c23c200f30f6dc10c6d4a2bcc8f6e7f2a2588370/torch-2.9.0-cp313-cp313-win_amd64.whl", hash = "sha256:d037f1b4ffd25013be4a7bf3651a0a910c68554956c7b2c92ebe87c76475dece", size = 109284513, upload-time = "2025-10-15T15:46:45.061Z" },
-    { url = "https://files.pythonhosted.org/packages/66/e8/fc414d8656250ee46120b44836ffbb3266343db424b3e18ca79ebbf69d4f/torch-2.9.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e4e5b5cba837a2a8d1a497ba9a58dae46fa392593eaa13b871c42f71847503a5", size = 74830362, upload-time = "2025-10-15T15:46:48.983Z" },
-    { url = "https://files.pythonhosted.org/packages/ed/5f/9474c98fc5ae0cd04b9466035428cd360e6611a86b8352a0fc2fa504acdc/torch-2.9.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:64693568f5dc4dbd5f880a478b1cea0201cc6b510d91d1bc54fea86ac5d1a637", size = 104144940, upload-time = "2025-10-15T15:47:29.076Z" },
-    { url = "https://files.pythonhosted.org/packages/2d/5a/8e0c1cf57830172c109d4bd6be2708cabeaf550983eee7029291322447a0/torch-2.9.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:f8ed31ddd7d10bfb3fbe0b9fe01b1243577f13d75e6f4a0839a283915ce3791e", size = 899744054, upload-time = "2025-10-15T15:48:29.864Z" },
-    { url = "https://files.pythonhosted.org/packages/6d/28/82c28b30fcb4b7c9cdd995763d18bbb830d6521356712faebbad92ffa61d/torch-2.9.0-cp313-cp313t-win_amd64.whl", hash = "sha256:eff527d4e4846e6f70d2afd8058b73825761203d66576a7e04ea2ecfebcb4ab8", size = 109517546, upload-time = "2025-10-15T15:47:33.395Z" },
-    { url = "https://files.pythonhosted.org/packages/ff/c3/a91f96ec74347fa5fd24453fa514bc61c61ecc79196fa760b012a1873d96/torch-2.9.0-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:f8877779cf56d1ce431a7636703bdb13307f5960bb1af49716d8b179225e0e6a", size = 74480732, upload-time = "2025-10-15T15:47:38.002Z" },
-    { url = "https://files.pythonhosted.org/packages/5c/73/9f70af34b334a7e0ef496ceec96b7ec767bd778ea35385ce6f77557534d1/torch-2.9.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:7e614fae699838038d888729f82b687c03413c5989ce2a9481f9a7e7a396e0bb", size = 74433037, upload-time = "2025-10-15T15:47:41.894Z" },
-    { url = "https://files.pythonhosted.org/packages/b7/84/37cf88625901934c97109e583ecc21777d21c6f54cda97a7e5bbad1ee2f2/torch-2.9.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:dfb5b8cd310ba3436c7e14e8b7833ef658cf3045e50d2bdaed23c8fc517065eb", size = 104116482, upload-time = "2025-10-15T15:47:46.266Z" },
-    { url = "https://files.pythonhosted.org/packages/56/8e/ca8b17866943a8d4f4664d402ea84210aa274588b4c5d89918f5caa24eec/torch-2.9.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:b3d29524993a478e46f5d598b249cd824b7ed98d7fba538bd9c4cde6c803948f", size = 899746916, upload-time = "2025-10-15T15:50:40.294Z" },
-    { url = "https://files.pythonhosted.org/packages/43/65/3b17c0fbbdab6501c5b320a52a648628d0d44e7379f64e27d9eef701b6bf/torch-2.9.0-cp314-cp314-win_amd64.whl", hash = "sha256:71c7578984f5ec0eb645eb4816ac8435fcf3e3e2ae1901bcd2f519a9cafb5125", size = 109275151, upload-time = "2025-10-15T15:49:20.715Z" },
-    { url = "https://files.pythonhosted.org/packages/83/36/74f8c051f785500396e42f93542422422dfd874a174f21f8d955d36e5d64/torch-2.9.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:71d9309aee457bbe0b164bce2111cd911c4ed4e847e65d5077dbbcd3aba6befc", size = 74823353, upload-time = "2025-10-15T15:49:16.59Z" },
-    { url = "https://files.pythonhosted.org/packages/62/51/dc3b4e2f9ba98ae27238f0153ca098bf9340b2dafcc67fde645d496dfc2a/torch-2.9.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:c08fb654d783899e204a32cca758a7ce8a45b2d78eeb89517cc937088316f78e", size = 104140340, upload-time = "2025-10-15T15:50:19.67Z" },
-    { url = "https://files.pythonhosted.org/packages/c0/8d/b00657f8141ac16af7bb6cda2e67de18499a3263b78d516b9a93fcbc98e3/torch-2.9.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:ec8feb0099b2daa5728fbc7abb0b05730fd97e0f359ff8bda09865aaa7bd7d4b", size = 899731750, upload-time = "2025-10-15T15:49:36.673Z" },
-    { url = "https://files.pythonhosted.org/packages/fc/29/bd361e0cbb2c79ce6450f42643aaf6919956f89923a50571b0ebfe92d142/torch-2.9.0-cp314-cp314t-win_amd64.whl", hash = "sha256:695ba920f234ad4170c9c50e28d56c848432f8f530e6bc7f88fcb15ddf338e75", size = 109503850, upload-time = "2025-10-15T15:50:24.118Z" },
+    { name = "typing-extensions" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/5f/56/9577683b23072075ed2e40d725c52c2019d71a972fab8e083763da8e707e/torch-2.9.1-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:1cc208435f6c379f9b8fdfd5ceb5be1e3b72a6bdf1cb46c0d2812aa73472db9e", size = 104207681, upload-time = "2025-11-12T15:19:56.48Z" },
+    { url = "https://files.pythonhosted.org/packages/38/45/be5a74f221df8f4b609b78ff79dc789b0cc9017624544ac4dd1c03973150/torch-2.9.1-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:9fd35c68b3679378c11f5eb73220fdcb4e6f4592295277fbb657d31fd053237c", size = 899794036, upload-time = "2025-11-12T15:21:01.886Z" },
+    { url = "https://files.pythonhosted.org/packages/67/95/a581e8a382596b69385a44bab2733f1273d45c842f5d4a504c0edc3133b6/torch-2.9.1-cp310-cp310-win_amd64.whl", hash = "sha256:2af70e3be4a13becba4655d6cc07dcfec7ae844db6ac38d6c1dafeb245d17d65", size = 110969861, upload-time = "2025-11-12T15:21:30.145Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/51/1756dc128d2bf6ea4e0a915cb89ea5e730315ff33d60c1ff56fd626ba3eb/torch-2.9.1-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:a83b0e84cc375e3318a808d032510dde99d696a85fe9473fc8575612b63ae951", size = 74452222, upload-time = "2025-11-12T15:20:46.223Z" },
+    { url = "https://files.pythonhosted.org/packages/15/db/c064112ac0089af3d2f7a2b5bfbabf4aa407a78b74f87889e524b91c5402/torch-2.9.1-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:62b3fd888277946918cba4478cf849303da5359f0fb4e3bfb86b0533ba2eaf8d", size = 104220430, upload-time = "2025-11-12T15:20:31.705Z" },
+    { url = "https://files.pythonhosted.org/packages/56/be/76eaa36c9cd032d3b01b001e2c5a05943df75f26211f68fae79e62f87734/torch-2.9.1-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:d033ff0ac3f5400df862a51bdde9bad83561f3739ea0046e68f5401ebfa67c1b", size = 899821446, upload-time = "2025-11-12T15:20:15.544Z" },
+    { url = "https://files.pythonhosted.org/packages/47/cc/7a2949e38dfe3244c4df21f0e1c27bce8aedd6c604a587dd44fc21017cb4/torch-2.9.1-cp311-cp311-win_amd64.whl", hash = "sha256:0d06b30a9207b7c3516a9e0102114024755a07045f0c1d2f2a56b1819ac06bcb", size = 110973074, upload-time = "2025-11-12T15:21:39.958Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/ce/7d251155a783fb2c1bb6837b2b7023c622a2070a0a72726ca1df47e7ea34/torch-2.9.1-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:52347912d868653e1528b47cafaf79b285b98be3f4f35d5955389b1b95224475", size = 74463887, upload-time = "2025-11-12T15:20:36.611Z" },
+    { url = "https://files.pythonhosted.org/packages/0f/27/07c645c7673e73e53ded71705045d6cb5bae94c4b021b03aa8d03eee90ab/torch-2.9.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:da5f6f4d7f4940a173e5572791af238cb0b9e21b1aab592bd8b26da4c99f1cd6", size = 104126592, upload-time = "2025-11-12T15:20:41.62Z" },
+    { url = "https://files.pythonhosted.org/packages/19/17/e377a460603132b00760511299fceba4102bd95db1a0ee788da21298ccff/torch-2.9.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:27331cd902fb4322252657f3902adf1c4f6acad9dcad81d8df3ae14c7c4f07c4", size = 899742281, upload-time = "2025-11-12T15:22:17.602Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/1a/64f5769025db846a82567fa5b7d21dba4558a7234ee631712ee4771c436c/torch-2.9.1-cp312-cp312-win_amd64.whl", hash = "sha256:81a285002d7b8cfd3fdf1b98aa8df138d41f1a8334fd9ea37511517cedf43083", size = 110940568, upload-time = "2025-11-12T15:21:18.689Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/ab/07739fd776618e5882661d04c43f5b5586323e2f6a2d7d84aac20d8f20bd/torch-2.9.1-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:c0d25d1d8e531b8343bea0ed811d5d528958f1dcbd37e7245bc686273177ad7e", size = 74479191, upload-time = "2025-11-12T15:21:25.816Z" },
+    { url = "https://files.pythonhosted.org/packages/20/60/8fc5e828d050bddfab469b3fe78e5ab9a7e53dda9c3bdc6a43d17ce99e63/torch-2.9.1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:c29455d2b910b98738131990394da3e50eea8291dfeb4b12de71ecf1fdeb21cb", size = 104135743, upload-time = "2025-11-12T15:21:34.936Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/b7/6d3f80e6918213babddb2a37b46dbb14c15b14c5f473e347869a51f40e1f/torch-2.9.1-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:524de44cd13931208ba2c4bde9ec7741fd4ae6bfd06409a604fc32f6520c2bc9", size = 899749493, upload-time = "2025-11-12T15:24:36.356Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/47/c7843d69d6de8938c1cbb1eba426b1d48ddf375f101473d3e31a5fc52b74/torch-2.9.1-cp313-cp313-win_amd64.whl", hash = "sha256:545844cc16b3f91e08ce3b40e9c2d77012dd33a48d505aed34b7740ed627a1b2", size = 110944162, upload-time = "2025-11-12T15:21:53.151Z" },
+    { url = "https://files.pythonhosted.org/packages/28/0e/2a37247957e72c12151b33a01e4df651d9d155dd74d8cfcbfad15a79b44a/torch-2.9.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:5be4bf7496f1e3ffb1dd44b672adb1ac3f081f204c5ca81eba6442f5f634df8e", size = 74830751, upload-time = "2025-11-12T15:21:43.792Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/f7/7a18745edcd7b9ca2381aa03353647bca8aace91683c4975f19ac233809d/torch-2.9.1-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:30a3e170a84894f3652434b56d59a64a2c11366b0ed5776fab33c2439396bf9a", size = 104142929, upload-time = "2025-11-12T15:21:48.319Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/dd/f1c0d879f2863ef209e18823a988dc7a1bf40470750e3ebe927efdb9407f/torch-2.9.1-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:8301a7b431e51764629208d0edaa4f9e4c33e6df0f2f90b90e261d623df6a4e2", size = 899748978, upload-time = "2025-11-12T15:23:04.568Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/9f/6986b83a53b4d043e36f3f898b798ab51f7f20fdf1a9b01a2720f445043d/torch-2.9.1-cp313-cp313t-win_amd64.whl", hash = "sha256:2e1c42c0ae92bf803a4b2409fdfed85e30f9027a66887f5e7dcdbc014c7531db", size = 111176995, upload-time = "2025-11-12T15:22:01.618Z" },
+    { url = "https://files.pythonhosted.org/packages/40/60/71c698b466dd01e65d0e9514b5405faae200c52a76901baf6906856f17e4/torch-2.9.1-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:2c14b3da5df416cf9cb5efab83aa3056f5b8cd8620b8fde81b4987ecab730587", size = 74480347, upload-time = "2025-11-12T15:21:57.648Z" },
+    { url = "https://files.pythonhosted.org/packages/48/50/c4b5112546d0d13cc9eaa1c732b823d676a9f49ae8b6f97772f795874a03/torch-2.9.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1edee27a7c9897f4e0b7c14cfc2f3008c571921134522d5b9b5ec4ebbc69041a", size = 74433245, upload-time = "2025-11-12T15:22:39.027Z" },
+    { url = "https://files.pythonhosted.org/packages/81/c9/2628f408f0518b3bae49c95f5af3728b6ab498c8624ab1e03a43dd53d650/torch-2.9.1-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:19d144d6b3e29921f1fc70503e9f2fc572cde6a5115c0c0de2f7ca8b1483e8b6", size = 104134804, upload-time = "2025-11-12T15:22:35.222Z" },
+    { url = "https://files.pythonhosted.org/packages/28/fc/5bc91d6d831ae41bf6e9e6da6468f25330522e92347c9156eb3f1cb95956/torch-2.9.1-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:c432d04376f6d9767a9852ea0def7b47a7bbc8e7af3b16ac9cf9ce02b12851c9", size = 899747132, upload-time = "2025-11-12T15:23:36.068Z" },
+    { url = "https://files.pythonhosted.org/packages/63/5d/e8d4e009e52b6b2cf1684bde2a6be157b96fb873732542fb2a9a99e85a83/torch-2.9.1-cp314-cp314-win_amd64.whl", hash = "sha256:d187566a2cdc726fc80138c3cdb260970fab1c27e99f85452721f7759bbd554d", size = 110934845, upload-time = "2025-11-12T15:22:48.367Z" },
+    { url = "https://files.pythonhosted.org/packages/bd/b2/2d15a52516b2ea3f414643b8de68fa4cb220d3877ac8b1028c83dc8ca1c4/torch-2.9.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:cb10896a1f7fedaddbccc2017ce6ca9ecaaf990f0973bdfcf405439750118d2c", size = 74823558, upload-time = "2025-11-12T15:22:43.392Z" },
+    { url = "https://files.pythonhosted.org/packages/86/5c/5b2e5d84f5b9850cd1e71af07524d8cbb74cba19379800f1f9f7c997fc70/torch-2.9.1-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:0a2bd769944991c74acf0c4ef23603b9c777fdf7637f115605a4b2d8023110c7", size = 104145788, upload-time = "2025-11-12T15:23:52.109Z" },
+    { url = "https://files.pythonhosted.org/packages/a9/8c/3da60787bcf70add986c4ad485993026ac0ca74f2fc21410bc4eb1bb7695/torch-2.9.1-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:07c8a9660bc9414c39cac530ac83b1fb1b679d7155824144a40a54f4a47bfa73", size = 899735500, upload-time = "2025-11-12T15:24:08.788Z" },
+    { url = "https://files.pythonhosted.org/packages/db/2b/f7818f6ec88758dfd21da46b6cd46af9d1b3433e53ddbb19ad1e0da17f9b/torch-2.9.1-cp314-cp314t-win_amd64.whl", hash = "sha256:c88d3299ddeb2b35dcc31753305612db485ab6f1823e37fb29451c8b2732b87e", size = 111163659, upload-time = "2025-11-12T15:23:20.009Z" },
 ]
 
 [[package]]
@@ -5913,7 +5792,8 @@ name = "torchprofile"
 version = "0.0.4"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "numpy" },
+    { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.3.5", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "torch", marker = "sys_platform == 'never'" },
     { name = "torchvision", marker = "sys_platform == 'never'" },
 ]
@@ -5924,42 +5804,43 @@ wheels = [
 
 [[package]]
 name = "torchvision"
-version = "0.24.0"
+version = "0.24.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "numpy", marker = "sys_platform != 'linux'" },
+    { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.3.5", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "pillow", marker = "sys_platform != 'linux'" },
     { name = "torch", marker = "sys_platform == 'never'" },
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/63/5b/1404eeab00819df71a30e916c2081654366741f7838fcc4fff86b7bd9e7e/torchvision-0.24.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5e8d5e667deff87bd66d26df6d225f46224bb0782d4f3f8f5d2f3068b5fd4492", size = 1891723, upload-time = "2025-10-15T15:51:08.5Z" },
-    { url = "https://files.pythonhosted.org/packages/88/e3/1b003ecd52bd721f8304aeb66691edfbc2002747ec83d36188ad6abab506/torchvision-0.24.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:a110a51c75e89807a8382b0d8034f5e180fb9319570be3389ffd3d4ac4fd57a9", size = 2418988, upload-time = "2025-10-15T15:51:25.195Z" },
-    { url = "https://files.pythonhosted.org/packages/56/2e/3c19a35e62da0f606baf8f6e2ceeab1eb66aaa2f84c6528538b06b416d54/torchvision-0.24.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:81d5b12a6df1bb2cc8bdbad837b637d6ea446f2866e6d94f1b5d478856331be3", size = 8046769, upload-time = "2025-10-15T15:51:15.221Z" },
-    { url = "https://files.pythonhosted.org/packages/e0/1d/e7ab614a1ace820a2366eab1532679fbe81bd9501ffd6a1b7be14936366d/torchvision-0.24.0-cp310-cp310-win_amd64.whl", hash = "sha256:0839dbb305d34671f5a64f558782095134b04bbeff8b90f11eb80515d7d50092", size = 3686529, upload-time = "2025-10-15T15:51:20.982Z" },
-    { url = "https://files.pythonhosted.org/packages/a3/17/54ed2ec6944ea972b461a86424c8c7f98835982c90cbc45bf59bd962863a/torchvision-0.24.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f771cf918351ad509a28488be475f3e9cc71a750d6b1467842bfb64863a5e986", size = 1891719, upload-time = "2025-10-15T15:51:10.384Z" },
-    { url = "https://files.pythonhosted.org/packages/f8/07/0cd6776eee784742ad3cb2bfd3295383d84cb2f9e87386119333d1587f0f/torchvision-0.24.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:bbd63bf4ebff84c48c50123eba90526cc9f794fe45bc9f5dd07cec19e8c62bce", size = 2420513, upload-time = "2025-10-15T15:51:18.087Z" },
-    { url = "https://files.pythonhosted.org/packages/1a/f4/6026c08011ddcefcbc14161c5aa9dce55c35c6b045e04ef0952e88bf4594/torchvision-0.24.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:78fe414b3bb6dbf7e6f6da6f733ba96881f6b29a9b997228de7c5f603e5ed940", size = 8048018, upload-time = "2025-10-15T15:51:13.579Z" },
-    { url = "https://files.pythonhosted.org/packages/2f/b4/362b4e67ed87cee0fb4f8f0363a852eaeef527968bf62c07ed56f764d729/torchvision-0.24.0-cp311-cp311-win_amd64.whl", hash = "sha256:629584b94e52f32a6278f2a35d85eeaae95fcc38730fcb765064f26c3c96df5d", size = 4027686, upload-time = "2025-10-15T15:51:19.189Z" },
-    { url = "https://files.pythonhosted.org/packages/47/ef/81e4e69e02e2c4650b30e8c11c8974f946682a30e0ab7e9803a831beff76/torchvision-0.24.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c61d40bcd2e2451e932902a702ad495ba1ec6f279e90b1e15cef2bb55dc911e2", size = 1891726, upload-time = "2025-10-15T15:51:16.977Z" },
-    { url = "https://files.pythonhosted.org/packages/00/7b/e3809b3302caea9a12c13f3adebe4fef127188438e719fd6c8dc93db1da6/torchvision-0.24.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:b0531d1483fc322d7da0d83be52f0df860a75114ab87dbeeb9de765feaeda843", size = 2419495, upload-time = "2025-10-15T15:51:11.885Z" },
-    { url = "https://files.pythonhosted.org/packages/7e/e6/7324ead6793075a8c75c56abeed1236d1750de16a5613cfe2ddad164a92a/torchvision-0.24.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:26b9dd9c083f8e5f7ac827de6d5b88c615d9c582dc87666770fbdf16887e4c25", size = 8050480, upload-time = "2025-10-15T15:51:24.012Z" },
-    { url = "https://files.pythonhosted.org/packages/3e/ad/3c56fcd2a0d6e8afa80e115b5ade4302232ec99655220a51d05709819523/torchvision-0.24.0-cp312-cp312-win_amd64.whl", hash = "sha256:060b7c50ed4b3fb0316b08e2e31bfd874ec2f63ef5ae02f81e54341ca4e88703", size = 4292225, upload-time = "2025-10-15T15:51:27.699Z" },
-    { url = "https://files.pythonhosted.org/packages/4f/b5/b2008e4b77a8d6aada828dd0f6a438d8f94befa23fdd2d62fa0ac6e60113/torchvision-0.24.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:84d79cfc6457310107ce4d712de7a3d388b24484bc9aeded4a76d8f8e3a2813d", size = 1891722, upload-time = "2025-10-15T15:51:28.854Z" },
-    { url = "https://files.pythonhosted.org/packages/8f/02/e2f6b0ff93ca4db5751ac9c5be43f13d5e53d9e9412324f464dca1775027/torchvision-0.24.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:fec12a269cf80f6b0b71471c8d498cd3bdd9d8e892c425bf39fecb604852c3b0", size = 2371478, upload-time = "2025-10-15T15:51:37.842Z" },
-    { url = "https://files.pythonhosted.org/packages/77/85/42e5fc4f716ec7b73cf1f32eeb5c77961be4d4054b26cd6a5ff97f20c966/torchvision-0.24.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:7323a9be5e3da695605753f501cdc87824888c5655d27735cdeaa9986b45884c", size = 8050200, upload-time = "2025-10-15T15:51:46.276Z" },
-    { url = "https://files.pythonhosted.org/packages/93/c2/48cb0b6b26276d2120b1e0dbc877579a748eae02b4091a7522ce54f6d5e1/torchvision-0.24.0-cp313-cp313-win_amd64.whl", hash = "sha256:08cad8b204196e945f0b2d73adee952d433db1c03645851d52b22a45f1015b13", size = 4309939, upload-time = "2025-10-15T15:51:39.002Z" },
-    { url = "https://files.pythonhosted.org/packages/7d/d7/3dd10830b047eeb46ae6b465474258d7b4fbb7d8872dca69bd42449f5c82/torchvision-0.24.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:6ab956a6e588623353e0f20d4b03eb1656cb4a3c75ca4dd8b4e32e01bc43271a", size = 2028355, upload-time = "2025-10-15T15:51:22.384Z" },
-    { url = "https://files.pythonhosted.org/packages/f7/cf/2d7e43409089ce7070f5336161f9216d58653ee1cb26bcb5d6c84cc2de36/torchvision-0.24.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:b1b3db80609c32a088554e8e94b4fc31f1033fe5bb4ac0673ec49c3eb03fb4da", size = 2374466, upload-time = "2025-10-15T15:51:35.382Z" },
-    { url = "https://files.pythonhosted.org/packages/e9/30/8f7c328fd7e0a9665da4b6b56b1c627665c18470bfe62f3729ad3eda9aec/torchvision-0.24.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:e6635f100d455c80b43f297df4b8585a76c6a2e114802f6567ddd28d7b5479b0", size = 8217068, upload-time = "2025-10-15T15:51:36.623Z" },
-    { url = "https://files.pythonhosted.org/packages/55/a2/b6f9e40e2904574c80b3bb872c66af20bbd642053e7c8e1b9e99ab396535/torchvision-0.24.0-cp313-cp313t-win_amd64.whl", hash = "sha256:4ce158bbdc3a9086034bced0b5212888bd5b251fee6d08a9eff151d30b4b228a", size = 4273912, upload-time = "2025-10-15T15:51:33.866Z" },
-    { url = "https://files.pythonhosted.org/packages/1b/24/790a39645cc8c71bf442d54a76da9bda5caeb2a44c5f7e02498649cd99d4/torchvision-0.24.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:4bdfc85a5ed706421555f32cdc5e3ddb6d40bf65ef03a274ce3c176393e2904b", size = 2028335, upload-time = "2025-10-15T15:51:26.252Z" },
-    { url = "https://files.pythonhosted.org/packages/b0/d7/69479a066ea773653e88eda99031e38681e9094046f87cb957af5036db0e/torchvision-0.24.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:73576a9c4a593223fbae85a64e8bbd77049abd1101893ecf3c5e981284fd58b4", size = 2371609, upload-time = "2025-10-15T15:51:29.859Z" },
-    { url = "https://files.pythonhosted.org/packages/46/64/3c7fdb3771ec992b9445a1f7a969466b23ce2cdb14e09303b3db351a0655/torchvision-0.24.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:dd565b1b06666ff399d0801d4d1824fa570c0167a179ca700a5be232527b3c62", size = 8214918, upload-time = "2025-10-15T15:51:41.465Z" },
-    { url = "https://files.pythonhosted.org/packages/58/51/abc416bc34d574ad479af738e413d9ebf93027ee92d0f4ae38f966b818f7/torchvision-0.24.0-cp314-cp314-win_amd64.whl", hash = "sha256:eb45d12ac48d757738788fd3fb8e88e647d6b2ab2424134ca87556efc72d81b5", size = 4257776, upload-time = "2025-10-15T15:51:42.642Z" },
-    { url = "https://files.pythonhosted.org/packages/08/f7/261d1353c611820541ecd43046b89da3f1ae998dc786e4288b890a009883/torchvision-0.24.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:68120e7e03c31900e499a10bb7fdd63cfd67f0054c9fa108e7e27f9cd372f315", size = 2028359, upload-time = "2025-10-15T15:51:32.119Z" },
-    { url = "https://files.pythonhosted.org/packages/a2/fd/615d8a86db1578345de7fa1edaf476fbcf4f057bf7e4fd898306b620c487/torchvision-0.24.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:64e54494043eecf9f57a9881c6fdea49c62282782e737c002ae8b1639e6ea80e", size = 2374469, upload-time = "2025-10-15T15:51:40.19Z" },
-    { url = "https://files.pythonhosted.org/packages/04/98/bac11e8fdbf00d6c398246ff2781370aa72c99f2ac685c01ce79354c9a32/torchvision-0.24.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:75ef9546323b321a451239d886f0cb528f7e98bb294da47a3200effd4e572064", size = 8217060, upload-time = "2025-10-15T15:51:45.033Z" },
-    { url = "https://files.pythonhosted.org/packages/47/6f/9fba8abc468c904570699eceeb51588f9622172b8fffa4ab11bcf15598c2/torchvision-0.24.0-cp314-cp314t-win_amd64.whl", hash = "sha256:2efb617667950814fc8bb9437e5893861b3616e214285be33cbc364a3f42c599", size = 4358490, upload-time = "2025-10-15T15:51:43.884Z" },
+    { url = "https://files.pythonhosted.org/packages/f7/09/d51aadf8591138e08b74c64a6eb783630c7a31ca2634416277115a9c3a2b/torchvision-0.24.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ded5e625788572e4e1c4d155d1bbc48805c113794100d70e19c76e39e4d53465", size = 1891441, upload-time = "2025-11-12T15:25:01.687Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/49/a35df863e7c153aad82af7505abd8264a5b510306689712ef86bea862822/torchvision-0.24.1-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:54ed17c3d30e718e08d8da3fd5b30ea44b0311317e55647cb97077a29ecbc25b", size = 2386226, upload-time = "2025-11-12T15:25:05.449Z" },
+    { url = "https://files.pythonhosted.org/packages/49/20/f2d7cd1eea052887c1083afff0b8df5228ec93b53e03759f20b1a3c6d22a/torchvision-0.24.1-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:f476da4e085b7307aaab6f540219617d46d5926aeda24be33e1359771c83778f", size = 8046093, upload-time = "2025-11-12T15:25:09.425Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/cf/0ff4007c09903199307da5f53a192ff5d62b45447069e9ef3a19bdc5ff12/torchvision-0.24.1-cp310-cp310-win_amd64.whl", hash = "sha256:fbdbdae5e540b868a681240b7dbd6473986c862445ee8a138680a6a97d6c34ff", size = 3696202, upload-time = "2025-11-12T15:25:10.657Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/69/30f5f03752aa1a7c23931d2519b31e557f3f10af5089d787cddf3b903ecf/torchvision-0.24.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:056c525dc875f18fe8e9c27079ada166a7b2755cea5a2199b0bc7f1f8364e600", size = 1891436, upload-time = "2025-11-12T15:25:04.3Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/69/49aae86edb75fe16460b59a191fcc0f568c2378f780bb063850db0fe007a/torchvision-0.24.1-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:1e39619de698e2821d71976c92c8a9e50cdfd1e993507dfb340f2688bfdd8283", size = 2387757, upload-time = "2025-11-12T15:25:06.795Z" },
+    { url = "https://files.pythonhosted.org/packages/11/c9/1dfc3db98797b326f1d0c3f3bb61c83b167a813fc7eab6fcd2edb8c7eb9d/torchvision-0.24.1-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:a0f106663e60332aa4fcb1ca2159ef8c3f2ed266b0e6df88de261048a840e0df", size = 8047682, upload-time = "2025-11-12T15:25:21.125Z" },
+    { url = "https://files.pythonhosted.org/packages/fa/bb/cfc6a6f6ccc84a534ed1fdf029ae5716dd6ff04e57ed9dc2dab38bf652d5/torchvision-0.24.1-cp311-cp311-win_amd64.whl", hash = "sha256:a9308cdd37d8a42e14a3e7fd9d271830c7fecb150dd929b642f3c1460514599a", size = 4037588, upload-time = "2025-11-12T15:25:14.402Z" },
+    { url = "https://files.pythonhosted.org/packages/f0/af/18e2c6b9538a045f60718a0c5a058908ccb24f88fde8e6f0fc12d5ff7bd3/torchvision-0.24.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:e48bf6a8ec95872eb45763f06499f87bd2fb246b9b96cb00aae260fda2f96193", size = 1891433, upload-time = "2025-11-12T15:25:03.232Z" },
+    { url = "https://files.pythonhosted.org/packages/9d/43/600e5cfb0643d10d633124f5982d7abc2170dfd7ce985584ff16edab3e76/torchvision-0.24.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:7fb7590c737ebe3e1c077ad60c0e5e2e56bb26e7bccc3b9d04dbfc34fd09f050", size = 2386737, upload-time = "2025-11-12T15:25:08.288Z" },
+    { url = "https://files.pythonhosted.org/packages/93/b1/db2941526ecddd84884132e2742a55c9311296a6a38627f9e2627f5ac889/torchvision-0.24.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:66a98471fc18cad9064123106d810a75f57f0838eee20edc56233fd8484b0cc7", size = 8049868, upload-time = "2025-11-12T15:25:13.058Z" },
+    { url = "https://files.pythonhosted.org/packages/69/98/16e583f59f86cd59949f59d52bfa8fc286f86341a229a9d15cbe7a694f0c/torchvision-0.24.1-cp312-cp312-win_amd64.whl", hash = "sha256:4aa6cb806eb8541e92c9b313e96192c6b826e9eb0042720e2fa250d021079952", size = 4302006, upload-time = "2025-11-12T15:25:16.184Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/97/ab40550f482577f2788304c27220e8ba02c63313bd74cf2f8920526aac20/torchvision-0.24.1-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:8a6696db7fb71eadb2c6a48602106e136c785642e598eb1533e0b27744f2cce6", size = 1891435, upload-time = "2025-11-12T15:25:28.642Z" },
+    { url = "https://files.pythonhosted.org/packages/30/65/ac0a3f9be6abdbe4e1d82c915d7e20de97e7fd0e9a277970508b015309f3/torchvision-0.24.1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:db2125c46f9cb25dc740be831ce3ce99303cfe60439249a41b04fd9f373be671", size = 2338718, upload-time = "2025-11-12T15:25:26.19Z" },
+    { url = "https://files.pythonhosted.org/packages/10/b5/5bba24ff9d325181508501ed7f0c3de8ed3dd2edca0784d48b144b6c5252/torchvision-0.24.1-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:f035f0cacd1f44a8ff6cb7ca3627d84c54d685055961d73a1a9fb9827a5414c8", size = 8049661, upload-time = "2025-11-12T15:25:22.558Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/ec/54a96ae9ab6a0dd66d4bba27771f892e36478a9c3489fa56e51c70abcc4d/torchvision-0.24.1-cp313-cp313-win_amd64.whl", hash = "sha256:16274823b93048e0a29d83415166a2e9e0bf4e1b432668357b657612a4802864", size = 4319808, upload-time = "2025-11-12T15:25:17.318Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/f3/a90a389a7e547f3eb8821b13f96ea7c0563cdefbbbb60a10e08dda9720ff/torchvision-0.24.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e3f96208b4bef54cd60e415545f5200346a65024e04f29a26cd0006dbf9e8e66", size = 2005342, upload-time = "2025-11-12T15:25:11.871Z" },
+    { url = "https://files.pythonhosted.org/packages/a9/fe/ff27d2ed1b524078164bea1062f23d2618a5fc3208e247d6153c18c91a76/torchvision-0.24.1-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:f231f6a4f2aa6522713326d0d2563538fa72d613741ae364f9913027fa52ea35", size = 2341708, upload-time = "2025-11-12T15:25:25.08Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/b9/d6c903495cbdfd2533b3ef6f7b5643ff589ea062f8feb5c206ee79b9d9e5/torchvision-0.24.1-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:1540a9e7f8cf55fe17554482f5a125a7e426347b71de07327d5de6bfd8d17caa", size = 8177239, upload-time = "2025-11-12T15:25:18.554Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/2b/ba02e4261369c3798310483028495cf507e6cb3f394f42e4796981ecf3a7/torchvision-0.24.1-cp313-cp313t-win_amd64.whl", hash = "sha256:d83e16d70ea85d2f196d678bfb702c36be7a655b003abed84e465988b6128938", size = 4251604, upload-time = "2025-11-12T15:25:34.069Z" },
+    { url = "https://files.pythonhosted.org/packages/42/84/577b2cef8f32094add5f52887867da4c2a3e6b4261538447e9b48eb25812/torchvision-0.24.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:cccf4b4fec7fdfcd3431b9ea75d1588c0a8596d0333245dafebee0462abe3388", size = 2005319, upload-time = "2025-11-12T15:25:23.827Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/34/ecb786bffe0159a3b49941a61caaae089853132f3cd1e8f555e3621f7e6f/torchvision-0.24.1-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:1b495edd3a8f9911292424117544f0b4ab780452e998649425d1f4b2bed6695f", size = 2338844, upload-time = "2025-11-12T15:25:32.625Z" },
+    { url = "https://files.pythonhosted.org/packages/51/99/a84623786a6969504c87f2dc3892200f586ee13503f519d282faab0bb4f0/torchvision-0.24.1-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:ab211e1807dc3e53acf8f6638df9a7444c80c0ad050466e8d652b3e83776987b", size = 8175144, upload-time = "2025-11-12T15:25:31.355Z" },
+    { url = "https://files.pythonhosted.org/packages/6d/ba/8fae3525b233e109317ce6a9c1de922ab2881737b029a7e88021f81e068f/torchvision-0.24.1-cp314-cp314-win_amd64.whl", hash = "sha256:18f9cb60e64b37b551cd605a3d62c15730c086362b40682d23e24b616a697d41", size = 4234459, upload-time = "2025-11-12T15:25:19.859Z" },
+    { url = "https://files.pythonhosted.org/packages/50/33/481602c1c72d0485d4b3a6b48c9534b71c2957c9d83bf860eb837bf5a620/torchvision-0.24.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:ec9d7379c519428395e4ffda4dbb99ec56be64b0a75b95989e00f9ec7ae0b2d7", size = 2005336, upload-time = "2025-11-12T15:25:27.225Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/7f/372de60bf3dd8f5593bd0d03f4aecf0d1fd58f5bc6943618d9d913f5e6d5/torchvision-0.24.1-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:af9201184c2712d808bd4eb656899011afdfce1e83721c7cb08000034df353fe", size = 2341704, upload-time = "2025-11-12T15:25:29.857Z" },
+    { url = "https://files.pythonhosted.org/packages/36/9b/0f3b9ff3d0225ee2324ec663de0e7fb3eb855615ca958ac1875f22f1f8e5/torchvision-0.24.1-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:9ef95d819fd6df81bc7cc97b8f21a15d2c0d3ac5dbfaab5cbc2d2ce57114b19e", size = 8177422, upload-time = "2025-11-12T15:25:37.357Z" },
+    { url = "https://files.pythonhosted.org/packages/d6/ab/e2bcc7c2f13d882a58f8b30ff86f794210b075736587ea50f8c545834f8a/torchvision-0.24.1-cp314-cp314t-win_amd64.whl", hash = "sha256:480b271d6edff83ac2e8d69bbb4cf2073f93366516a50d48f140ccfceedb002e", size = 4335190, upload-time = "2025-11-12T15:25:35.745Z" },
 ]
 
 [[package]]
@@ -5971,8 +5852,7 @@ dependencies = [
     { name = "docstring-parser" },
     { name = "filelock" },
     { name = "fsspec" },
-    { name = "importlib-metadata", version = "8.6.1", source = { registry = "https://pypi.org/simple" }, marker = "extra == 'extra-13-megatron-core-dev'" },
-    { name = "importlib-metadata", version = "8.7.0", source = { registry = "https://pypi.org/simple" }, marker = "extra == 'extra-13-megatron-core-lts' or extra != 'extra-13-megatron-core-dev'" },
+    { name = "importlib-metadata" },
     { name = "pyre-extensions" },
     { name = "pyyaml" },
     { name = "tabulate" },
@@ -5997,27 +5877,70 @@ wheels = [
 
 [[package]]
 name = "transformer-engine"
-version = "2.9.0+70f53666"
-source = { git = "https://github.com/NVIDIA/TransformerEngine.git?rev=release_v2.9#70f536662ae10a62a54f4ed1ba92e3314c5cfd69" }
+version = "2.9.0"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/6d/5c/21152e73aa46ac7c969d694ce86cdeb199024c7810b2d700e900ea4efb1a/transformer_engine-2.9.0-py3-none-any.whl", hash = "sha256:953147ed4c490e54c9884bb0d876a1341f05c5c5b7d304bf61f4740f6faee5af", size = 662107, upload-time = "2025-11-11T15:50:49.167Z" },
+]
+
+[package.optional-dependencies]
+core-cu13 = [
+    { name = "transformer-engine-cu13" },
+]
+pytorch = [
+    { name = "transformer-engine-torch" },
+]
+
+[[package]]
+name = "transformer-engine-cu12"
+version = "2.9.0"
+source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "einops" },
-    { name = "importlib-metadata", version = "8.6.1", source = { registry = "https://pypi.org/simple" } },
-    { name = "onnx" },
-    { name = "onnxscript", version = "0.5.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.13' and extra == 'extra-13-megatron-core-dev') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "onnxscript", version = "0.5.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.13' and extra == 'extra-13-megatron-core-dev') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "importlib-metadata" },
     { name = "packaging" },
     { name = "pydantic" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a6/af/1c449ad0c43d3d6b5c529c812a4e8338b20965ae5361a9b612c7dce21e4d/transformer_engine_cu12-2.9.0-py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:81162874c0618f3e62eb5ffba0bb1b608b4e56d70238205b1dced7ee965d82b3", size = 303669451, upload-time = "2025-11-11T15:54:12.008Z" },
+    { url = "https://files.pythonhosted.org/packages/82/21/aa351994d8ade95681763df2b10770c768900ecc7f1cedbfa4e89fe1935a/transformer_engine_cu12-2.9.0-py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:ad14981cbbd964f8e4446c35199d1bc5349ea30244e76bc57c1cceb5d469dd24", size = 304164366, upload-time = "2025-11-11T15:50:22.169Z" },
+]
+
+[[package]]
+name = "transformer-engine-cu13"
+version = "2.9.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "importlib-metadata" },
+    { name = "packaging" },
+    { name = "pydantic" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ab/b9/c1c788875848bf50faa22749107d91e92e9c0c78bb1878b99939209e40f9/transformer_engine_cu13-2.9.0-py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:590aaeb3a4d552fe9ebc7019d43315f3e61153fcd1c5a07dc0c90bd8b278316e", size = 185010342, upload-time = "2025-11-13T22:35:04.742Z" },
+    { url = "https://files.pythonhosted.org/packages/95/7f/3019c21565f63eeb79d24fa7d3bae39b5b73f21c72d7d5123d21d7ce945a/transformer_engine_cu13-2.9.0-py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:4e869f5a0fd74aaa05a5d801a96688ed21827d23efe9774bd3038d5f2802ef46", size = 185669069, upload-time = "2025-11-13T22:35:13.709Z" },
+]
+
+[[package]]
+name = "transformer-engine-torch"
+version = "2.9.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "einops" },
+    { name = "onnx" },
+    { name = "onnxscript" },
     { name = "torch", marker = "sys_platform == 'never'" },
+    { name = "transformer-engine-cu12" },
 ]
+sdist = { url = "https://files.pythonhosted.org/packages/a2/a3/401d741eceb8f402595e63ee0b1828d60cae988b22f2f23c9cfcc24185bd/transformer_engine_torch-2.9.0.tar.gz", hash = "sha256:abbc59f6acf635abf865085ecdf90e7d4ca9a3782bc91a9845e38adb2655a547", size = 215138, upload-time = "2025-11-11T15:49:04.258Z" }
 
 [[package]]
 name = "transformers"
-version = "4.57.1"
+version = "4.57.3"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "filelock" },
     { name = "huggingface-hub" },
-    { name = "numpy" },
+    { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.3.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "packaging" },
     { name = "pyyaml" },
     { name = "regex" },
@@ -6026,39 +5949,39 @@ dependencies = [
     { name = "tokenizers" },
     { name = "tqdm" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/d6/68/a39307bcc4116a30b2106f2e689130a48de8bd8a1e635b5e1030e46fcd9e/transformers-4.57.1.tar.gz", hash = "sha256:f06c837959196c75039809636cd964b959f6604b75b8eeec6fdfc0440b89cc55", size = 10142511, upload-time = "2025-10-14T15:39:26.18Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/dd/70/d42a739e8dfde3d92bb2fff5819cbf331fe9657323221e79415cd5eb65ee/transformers-4.57.3.tar.gz", hash = "sha256:df4945029aaddd7c09eec5cad851f30662f8bd1746721b34cc031d70c65afebc", size = 10139680, upload-time = "2025-11-25T15:51:30.139Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/71/d3/c16c3b3cf7655a67db1144da94b021c200ac1303f82428f2beef6c2e72bb/transformers-4.57.1-py3-none-any.whl", hash = "sha256:b10d05da8fa67dc41644dbbf9bc45a44cb86ae33da6f9295f5fbf5b7890bd267", size = 11990925, upload-time = "2025-10-14T15:39:23.085Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/6b/2f416568b3c4c91c96e5a365d164f8a4a4a88030aa8ab4644181fdadce97/transformers-4.57.3-py3-none-any.whl", hash = "sha256:c77d353a4851b1880191603d36acb313411d3577f6e2897814f333841f7003f4", size = 11993463, upload-time = "2025-11-25T15:51:26.493Z" },
 ]
 
 [[package]]
 name = "triton"
-version = "3.5.0"
+version = "3.5.1"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/dd/22/507b6f58a35e05e84381630b2dc2a3cee1a7a2a7eaf4cba857c638a18a24/triton-3.5.0-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6f90de6a6566bb619b4c0adc9855729e1b1b5e26533fca1bf6206e96b6d277a3", size = 159827599, upload-time = "2025-10-15T19:15:43.87Z" },
-    { url = "https://files.pythonhosted.org/packages/0b/eb/09e31d107a5d00eb281aa7e6635ca463e9bca86515944e399480eadb71f8/triton-3.5.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d5d3b3d480debf24eaa739623c9a42446b0b77f95593d30eb1f64cd2278cc1f0", size = 170333110, upload-time = "2025-10-13T16:37:49.588Z" },
-    { url = "https://files.pythonhosted.org/packages/79/f9/b6f60f978397c616fd8dacca2305759fe4f80d397b20ef72534803244bd5/triton-3.5.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8457b22148defefdcb7fa8144b05ce211b9faefad650a1ce85b23df488d5549c", size = 159926731, upload-time = "2025-10-15T19:15:49.682Z" },
-    { url = "https://files.pythonhosted.org/packages/3d/78/949a04391c21956c816523678f0e5fa308eb5b1e7622d88c4e4ef5fceca0/triton-3.5.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f34bfa21c5b3a203c0f0eab28dcc1e49bd1f67d22724e77fb6665a659200a4ec", size = 170433488, upload-time = "2025-10-13T16:37:57.132Z" },
-    { url = "https://files.pythonhosted.org/packages/87/9b/30988039e1e84df7554fba24e6a734d2d0e847af33cabdf9b532b3c51456/triton-3.5.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7da21fccceafc163e3a5e857abe34351ef76345af06cabf9637a914742671f0b", size = 159946647, upload-time = "2025-10-15T19:15:56.325Z" },
-    { url = "https://files.pythonhosted.org/packages/f5/3a/e991574f3102147b642e49637e0281e9bb7c4ba254edb2bab78247c85e01/triton-3.5.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c9e71db82261c4ffa3921cd050cd5faa18322d2d405c30eb56084afaff3b0833", size = 170476535, upload-time = "2025-10-13T16:38:05.18Z" },
-    { url = "https://files.pythonhosted.org/packages/cd/85/e37f1197acb04c8f3d83851d23d5d6ed5060ef74580668b112e23fdfa203/triton-3.5.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:188da5b81fa2f8322c27fec1627703eac24cb9bb7ab0dfbe9925973bc1b070d3", size = 159958970, upload-time = "2025-10-15T19:16:01.717Z" },
-    { url = "https://files.pythonhosted.org/packages/6c/29/10728de8a6e932e517c10773486b8e99f85d1b1d9dd87d9a9616e1fef4a1/triton-3.5.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e6bb9aa5519c084a333acdba443789e50012a4b851cd486c54f0b8dc2a8d3a12", size = 170487289, upload-time = "2025-10-13T16:38:11.662Z" },
-    { url = "https://files.pythonhosted.org/packages/b8/1d/38258f05010ac17a7b058c022911c9cae6526e149b7397134a048cf5a6c2/triton-3.5.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:03127d9b33aaf979c856676b394bc059ec1d68cb6da68ae03f62dd8ad77a04ae", size = 160073012, upload-time = "2025-10-15T19:16:07.477Z" },
-    { url = "https://files.pythonhosted.org/packages/5c/38/db80e48b9220c9bce872b0f616ad0446cdf554a40b85c7865cbca99ab3c2/triton-3.5.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c83f2343e1a220a716c7b3ab9fccfcbe3ad4020d189549200e2d2e8d5868bed9", size = 170577179, upload-time = "2025-10-13T16:38:17.865Z" },
-    { url = "https://files.pythonhosted.org/packages/91/fe/8f5771d00227f4eb1ee034f218ed427102b989366d2275fe3b3c105a3921/triton-3.5.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:468936651d383f4a6d10068d34a627505e13af55be5d002b9f27b987e7a5f0ac", size = 159957460, upload-time = "2025-10-15T19:16:12.626Z" },
-    { url = "https://files.pythonhosted.org/packages/ff/60/1810655d1d856c9a4fcc90ee8966d85f552d98c53a6589f95ab2cbe27bb8/triton-3.5.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:da0fa67ccd76c3dcfb0bffe1b1c57c685136a6bd33d141c24d9655d4185b1289", size = 170487949, upload-time = "2025-10-13T16:38:24.881Z" },
-    { url = "https://files.pythonhosted.org/packages/78/59/99edd103958fe6e42b50b9ad8ce4f223ddf4ccf475259cf7d2b53381dc6c/triton-3.5.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c7ceef21410229ac23173a28eee5cfc0e37c1dfdb8b4bc11ecda2e3ecec7c686", size = 160075629, upload-time = "2025-10-15T19:16:18.746Z" },
-    { url = "https://files.pythonhosted.org/packages/fb/b7/1dec8433ac604c061173d0589d99217fe7bf90a70bdc375e745d044b8aad/triton-3.5.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:317fe477ea8fd4524a6a8c499fb0a36984a56d0b75bf9c9cb6133a1c56d5a6e7", size = 170580176, upload-time = "2025-10-13T16:38:31.14Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/2e/f95e673222afa2c7f0c687d8913e98fcf2589ef0b1405de76894e37fe18f/triton-3.5.1-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f63e34dcb32d7bd3a1d0195f60f30d2aee8b08a69a0424189b71017e23dfc3d2", size = 159821655, upload-time = "2025-11-11T17:51:44.09Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/6e/676ab5019b4dde8b9b7bab71245102fc02778ef3df48218b298686b9ffd6/triton-3.5.1-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5fc53d849f879911ea13f4a877243afc513187bc7ee92d1f2c0f1ba3169e3c94", size = 170320692, upload-time = "2025-11-11T17:40:46.074Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/dc/6ce44d055f2fc2403c4ec6b3cfd3a9b25f57b7d95efadccdea91497f8e81/triton-3.5.1-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:da47169e30a779bade679ce78df4810fca6d78a955843d2ddb11f226adc517dc", size = 159928005, upload-time = "2025-11-11T17:51:50.008Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/72/ec90c3519eaf168f22cb1757ad412f3a2add4782ad3a92861c9ad135d886/triton-3.5.1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:61413522a48add32302353fdbaaf92daaaab06f6b5e3229940d21b5207f47579", size = 170425802, upload-time = "2025-11-11T17:40:53.209Z" },
+    { url = "https://files.pythonhosted.org/packages/db/53/2bcc46879910991f09c063eea07627baef2bc62fe725302ba8f46a2c1ae5/triton-3.5.1-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:275a045b6ed670dd1bd005c3e6c2d61846c74c66f4512d6f33cc027b11de8fd4", size = 159940689, upload-time = "2025-11-11T17:51:55.938Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/50/9a8358d3ef58162c0a415d173cfb45b67de60176e1024f71fbc4d24c0b6d/triton-3.5.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d2c6b915a03888ab931a9fd3e55ba36785e1fe70cbea0b40c6ef93b20fc85232", size = 170470207, upload-time = "2025-11-11T17:41:00.253Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/ba/805684a992ee32d486b7948d36aed2f5e3c643fc63883bf8bdca1c3f3980/triton-3.5.1-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:56765ffe12c554cd560698398b8a268db1f616c120007bfd8829d27139abd24a", size = 159955460, upload-time = "2025-11-11T17:52:01.861Z" },
+    { url = "https://files.pythonhosted.org/packages/27/46/8c3bbb5b0a19313f50edcaa363b599e5a1a5ac9683ead82b9b80fe497c8d/triton-3.5.1-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f3f4346b6ebbd4fad18773f5ba839114f4826037c9f2f34e0148894cd5dd3dba", size = 170470410, upload-time = "2025-11-11T17:41:06.319Z" },
+    { url = "https://files.pythonhosted.org/packages/84/1e/7df59baef41931e21159371c481c31a517ff4c2517343b62503d0cd2be99/triton-3.5.1-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:02c770856f5e407d24d28ddc66e33cf026e6f4d360dcb8b2fabe6ea1fc758621", size = 160072799, upload-time = "2025-11-11T17:52:07.293Z" },
+    { url = "https://files.pythonhosted.org/packages/37/92/e97fcc6b2c27cdb87ce5ee063d77f8f26f19f06916aa680464c8104ef0f6/triton-3.5.1-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0b4d2c70127fca6a23e247f9348b8adde979d2e7a20391bfbabaac6aebc7e6a8", size = 170579924, upload-time = "2025-11-11T17:41:12.455Z" },
+    { url = "https://files.pythonhosted.org/packages/14/f9/0430e879c1e63a1016cb843261528fd3187c872c3a9539132efc39514753/triton-3.5.1-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f617aa7925f9ea9968ec2e1adaf93e87864ff51549c8f04ce658f29bbdb71e2d", size = 159956163, upload-time = "2025-11-11T17:52:12.999Z" },
+    { url = "https://files.pythonhosted.org/packages/a4/e6/c595c35e5c50c4bc56a7bac96493dad321e9e29b953b526bbbe20f9911d0/triton-3.5.1-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d0637b1efb1db599a8e9dc960d53ab6e4637db7d4ab6630a0974705d77b14b60", size = 170480488, upload-time = "2025-11-11T17:41:18.222Z" },
+    { url = "https://files.pythonhosted.org/packages/41/1e/63d367c576c75919e268e4fbc33c1cb33b6dc12bb85e8bfe531c2a8bd5d3/triton-3.5.1-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8932391d7f93698dfe5bc9bead77c47a24f97329e9f20c10786bb230a9083f56", size = 160073620, upload-time = "2025-11-11T17:52:18.403Z" },
+    { url = "https://files.pythonhosted.org/packages/16/b5/b0d3d8b901b6a04ca38df5e24c27e53afb15b93624d7fd7d658c7cd9352a/triton-3.5.1-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bac7f7d959ad0f48c0e97d6643a1cc0fd5786fe61cb1f83b537c6b2d54776478", size = 170582192, upload-time = "2025-11-11T17:41:23.963Z" },
 ]
 
 [[package]]
 name = "trove-classifiers"
-version = "2025.9.11.17"
+version = "2025.11.14.15"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/ca/9a/778622bc06632529817c3c524c82749a112603ae2bbcf72ee3eb33a2c4f1/trove_classifiers-2025.9.11.17.tar.gz", hash = "sha256:931ca9841a5e9c9408bc2ae67b50d28acf85bef56219b56860876dd1f2d024dd", size = 16975, upload-time = "2025-09-11T17:07:50.97Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/bf/a9/880cccf76af9e7b322112f52e4e2dbb3534cbe671197b8f443a42189dfc7/trove_classifiers-2025.11.14.15.tar.gz", hash = "sha256:6b60f49d40bbd895bc61d8dc414fc2f2286d70eb72ed23548db8cf94f62804ca", size = 16995, upload-time = "2025-11-14T15:23:13.78Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/e1/85/a4ff8758c66f1fc32aa5e9a145908394bf9cf1c79ffd1113cfdeb77e74e4/trove_classifiers-2025.9.11.17-py3-none-any.whl", hash = "sha256:5d392f2d244deb1866556457d6f3516792124a23d1c3a463a2e8668a5d1c15dd", size = 14158, upload-time = "2025-09-11T17:07:49.886Z" },
+    { url = "https://files.pythonhosted.org/packages/49/f6/73c4aa003d1237ee9bea8a46f49dc38c45dfe95af4f0da7e60678d388011/trove_classifiers-2025.11.14.15-py3-none-any.whl", hash = "sha256:d1dac259c1e908939862e3331177931c6df0a37af2c1a8debcc603d9115fcdd9", size = 14191, upload-time = "2025-11-14T15:23:12.467Z" },
 ]
 
 [[package]]
@@ -6144,7 +6067,7 @@ wheels = [
 
 [[package]]
 name = "wandb"
-version = "0.22.3"
+version = "0.23.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "click" },
@@ -6158,17 +6081,17 @@ dependencies = [
     { name = "sentry-sdk" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/c1/d1/6b70f365ed86bd69debba8ad55dec8606fc21006e7ca703a5a091bd3b719/wandb-0.22.3.tar.gz", hash = "sha256:04468a8ab2769a46f5e384c9c4ada5da0dced005ca689a8424e4b8b5cb2a0291", size = 44337368, upload-time = "2025-10-28T23:59:10.275Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/ef/8b/db2d44395c967cd452517311fd6ede5d1e07310769f448358d4874248512/wandb-0.23.0.tar.gz", hash = "sha256:e5f98c61a8acc3ee84583ca78057f64344162ce026b9f71cb06eea44aec27c93", size = 44413921, upload-time = "2025-11-11T21:06:30.737Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/23/02/87fb60f587ec249f784a40bd91c30de1b2b24d691ee72675d5b66c3d0728/wandb-0.22.3-py3-none-macosx_12_0_arm64.whl", hash = "sha256:81b3b6e405f38342b0a080898b7d00c5b9375432f5ba358942a09e65cdcfe781", size = 18758047, upload-time = "2025-10-28T23:58:46.56Z" },
-    { url = "https://files.pythonhosted.org/packages/26/88/64081740ef2b2efc7fbcb2139a07a849e42bcb09ae0c56ae50c41bd0ad63/wandb-0.22.3-py3-none-macosx_12_0_x86_64.whl", hash = "sha256:d29c16817cca6401b4919069ec7570c781eacb67dc0b1ff2e0096a9a59581720", size = 19798011, upload-time = "2025-10-28T23:58:49.718Z" },
-    { url = "https://files.pythonhosted.org/packages/19/72/c4f922b33dbb84d1c81ee045ff8791dd14e26d79e1e9bbafff964b7043e2/wandb-0.22.3-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb955d73a4ba55df9adc61fafbabef5556784d33fc39c7b5c8165d2694ddeb3b", size = 18542713, upload-time = "2025-10-28T23:58:51.927Z" },
-    { url = "https://files.pythonhosted.org/packages/ad/98/3ce5f6e2086d91b0c51b38ae7ff591109e7da2bb25fe1a12eec0cdbaa494/wandb-0.22.3-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:23f3ebe41a26506117a098fdfd2706ed0e50b37899bfbefe3a0628fcbd70c69d", size = 19984910, upload-time = "2025-10-28T23:58:54.641Z" },
-    { url = "https://files.pythonhosted.org/packages/5e/57/e68cb38427b60490d6ddf1b992e6c7f36be83be1079d291ce87a8d347f48/wandb-0.22.3-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:2973462bed5d4a653b1a97cf9fc350673bb200fb356a2f4eba34beae9b87e0aa", size = 18581776, upload-time = "2025-10-28T23:58:56.975Z" },
-    { url = "https://files.pythonhosted.org/packages/66/6d/543f907ce0c6b6da13628b23d19ca7282c559fd73eb47b04977b9a61d0c6/wandb-0.22.3-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:c5c2bd18f95c1639863c527da0a5818ac6b0e5194f9c691426b265908ddd8b2c", size = 20078800, upload-time = "2025-10-28T23:58:59.217Z" },
-    { url = "https://files.pythonhosted.org/packages/da/91/1decaf1a6ac2017481c782e0fad7f90bc9ae4057f3d76d478cb6527f3dd3/wandb-0.22.3-py3-none-win32.whl", hash = "sha256:09ca1edfe0fd6dc30447d368acddb825668e60ee705c98594a6bbfd30d34d47e", size = 19160297, upload-time = "2025-10-28T23:59:01.536Z" },
-    { url = "https://files.pythonhosted.org/packages/4c/ba/3b092634279994b0c79fe05220532822be09f3a353ae95c54e7142769db8/wandb-0.22.3-py3-none-win_amd64.whl", hash = "sha256:55403bf93872c9978433d101324f51e43e78c70c809bf6d06ca7b2760e39f497", size = 19160300, upload-time = "2025-10-28T23:59:04.06Z" },
-    { url = "https://files.pythonhosted.org/packages/7f/80/4662fce9eebcc8c71f5083e9152ccaf7d43d4ca9c446e1422f9aa784a51c/wandb-0.22.3-py3-none-win_arm64.whl", hash = "sha256:49f66b05882abfa53816cc8d01b3c2435a89c5a090176802fa6928b5979d34d9", size = 17461959, upload-time = "2025-10-28T23:59:07.059Z" },
+    { url = "https://files.pythonhosted.org/packages/41/61/a3220c7fa4cadfb2b2a5c09e3fa401787326584ade86d7c1f58bf1cd43bd/wandb-0.23.0-py3-none-macosx_12_0_arm64.whl", hash = "sha256:b682ec5e38fc97bd2e868ac7615a0ab4fc6a15220ee1159e87270a5ebb7a816d", size = 18992250, upload-time = "2025-11-11T21:06:03.412Z" },
+    { url = "https://files.pythonhosted.org/packages/90/16/e69333cf3d11e7847f424afc6c8ae325e1f6061b2e5118d7a17f41b6525d/wandb-0.23.0-py3-none-macosx_12_0_x86_64.whl", hash = "sha256:ec094eb71b778e77db8c188da19e52c4f96cb9d5b4421d7dc05028afc66fd7e7", size = 20045616, upload-time = "2025-11-11T21:06:07.109Z" },
+    { url = "https://files.pythonhosted.org/packages/62/79/42dc6c7bb0b425775fe77f1a3f1a22d75d392841a06b43e150a3a7f2553a/wandb-0.23.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e43f1f04b98c34f407dcd2744cec0a590abce39bed14a61358287f817514a7b", size = 18758848, upload-time = "2025-11-11T21:06:09.832Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/94/d6ddb78334996ccfc1179444bfcfc0f37ffd07ee79bb98940466da6f68f8/wandb-0.23.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6e5847f98cbb3175caf5291932374410141f5bb3b7c25f9c5e562c1988ce0bf5", size = 20231493, upload-time = "2025-11-11T21:06:12.323Z" },
+    { url = "https://files.pythonhosted.org/packages/52/4d/0ad6df0e750c19dabd24d2cecad0938964f69a072f05fbdab7281bec2b64/wandb-0.23.0-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:6151355fd922539926e870be811474238c9614b96541773b990f1ce53368aef6", size = 18793473, upload-time = "2025-11-11T21:06:14.967Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/da/c2ba49c5573dff93dafc0acce691bb1c3d57361bf834b2f2c58e6193439b/wandb-0.23.0-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:df62e426e448ebc44269140deb7240df474e743b12d4b1f53b753afde4aa06d4", size = 20332882, upload-time = "2025-11-11T21:06:17.865Z" },
+    { url = "https://files.pythonhosted.org/packages/40/65/21bfb10ee5cd93fbcaf794958863c7e05bac4bbeb1cc1b652094aa3743a5/wandb-0.23.0-py3-none-win32.whl", hash = "sha256:6c21d3eadda17aef7df6febdffdddfb0b4835c7754435fc4fe27631724269f5c", size = 19433198, upload-time = "2025-11-11T21:06:21.913Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/33/cbe79e66c171204e32cf940c7fdfb8b5f7d2af7a00f301c632f3a38aa84b/wandb-0.23.0-py3-none-win_amd64.whl", hash = "sha256:b50635fa0e16e528bde25715bf446e9153368428634ca7a5dbd7a22c8ae4e915", size = 19433201, upload-time = "2025-11-11T21:06:24.607Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/a0/5ecfae12d78ea036a746c071e4c13b54b28d641efbba61d2947c73b3e6f9/wandb-0.23.0-py3-none-win_arm64.whl", hash = "sha256:fa0181b02ce4d1993588f4a728d8b73ae487eb3cb341e6ce01c156be7a98ec72", size = 17678649, upload-time = "2025-11-11T21:06:27.289Z" },
 ]
 
 [[package]]
@@ -6301,7 +6224,8 @@ version = "1.0.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "braceexpand" },
-    { name = "numpy" },
+    { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.3.5", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "pyyaml" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/5a/3a/68800d92e065cf4750ebecf973b13979c0c929b439e1293012938862038d/webdataset-1.0.2.tar.gz", hash = "sha256:7f0498be827cfa46cc5430a58768a24e2c6a410676a61be1838f53d61afdaab4", size = 80090, upload-time = "2025-06-19T23:26:21.945Z" }
@@ -6399,22 +6323,6 @@ wheels = [
 name = "wrapt"
 version = "1.17.3"
 source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform == 'linux'",
-    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform == 'linux'",
-    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform == 'linux'",
-    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform == 'linux'",
-    "python_full_version == '3.12.*' and sys_platform == 'linux'",
-    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform != 'linux'",
-    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform != 'linux'",
-    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform != 'linux'",
-    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform != 'linux'",
-    "python_full_version == '3.12.*' and sys_platform != 'linux'",
-    "python_full_version == '3.11.*' and sys_platform == 'linux'",
-    "python_full_version == '3.11.*' and sys_platform != 'linux'",
-    "python_full_version < '3.11' and sys_platform == 'linux'",
-    "python_full_version < '3.11' and sys_platform != 'linux'",
-]
 sdist = { url = "https://files.pythonhosted.org/packages/95/8f/aeb76c5b46e273670962298c23e7ddde79916cb74db802131d49a85e4b7d/wrapt-1.17.3.tar.gz", hash = "sha256:f66eb08feaa410fe4eebd17f2a2c8e2e46d3476e9f8c783daa8e09e0faa666d0", size = 55547, upload-time = "2025-08-12T05:53:21.714Z" }
 wheels = [
     { url = "https://files.pythonhosted.org/packages/3f/23/bb82321b86411eb51e5a5db3fb8f8032fd30bd7c2d74bfe936136b2fa1d6/wrapt-1.17.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:88bbae4d40d5a46142e70d58bf664a89b6b4befaea7b2ecc14e03cedb8e06c04", size = 53482, upload-time = "2025-08-12T05:51:44.467Z" },
@@ -6480,131 +6388,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/1f/f6/a933bd70f98e9cf3e08167fc5cd7aaaca49147e48411c0bd5ae701bb2194/wrapt-1.17.3-py3-none-any.whl", hash = "sha256:7171ae35d2c33d326ac19dd8facb1e82e5fd04ef8c6c0e394d7af55a55051c22", size = 23591, upload-time = "2025-08-12T05:53:20.674Z" },
 ]
 
-[[package]]
-name = "wrapt"
-version = "2.0.0"
-source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.12.*' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.12.*' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.11.*' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.11.*' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version < '3.11' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version < '3.11' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts'",
-    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.12.*' and platform_python_implementation != 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.12.*' and platform_python_implementation == 'PyPy' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.12.*' and platform_python_implementation != 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.12.*' and platform_python_implementation == 'PyPy' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.11.*' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version == '3.11.*' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version < '3.11' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-    "python_full_version < '3.11' and sys_platform != 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts'",
-]
-sdist = { url = "https://files.pythonhosted.org/packages/49/19/5e5bcd855d808892fe02d49219f97a50f64cd6d8313d75df3494ee97b1a3/wrapt-2.0.0.tar.gz", hash = "sha256:35a542cc7a962331d0279735c30995b024e852cf40481e384fd63caaa391cbb9", size = 81722, upload-time = "2025-10-19T23:47:54.07Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/ee/db/ac9546e89b645e525686727f8749847485e3b45ffc4507b61c4669358638/wrapt-2.0.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a7cebcee61f21b1e46aa32db8d9d93826d0fbf1ad85defc2ccfb93b4adef1435", size = 77431, upload-time = "2025-10-19T23:45:25.177Z" },
-    { url = "https://files.pythonhosted.org/packages/74/bc/3b57c8012bbd0d02eec5ae838681c1a819df6c5e765ebc897f52623b5eb1/wrapt-2.0.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:827e6e3a3a560f6ec1f5ee92d4319c21a0549384f896ec692f3201eda31ebd11", size = 60644, upload-time = "2025-10-19T23:45:27.511Z" },
-    { url = "https://files.pythonhosted.org/packages/b8/6e/b5e7d47713e3d46c30ec6ae83fafd369bc34de8148668c6e3168d9301863/wrapt-2.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1a91075a5383a7cbfe46aed1845ef7c3f027e8e20e7d9a8a75e36ebc9b0dd15e", size = 61526, upload-time = "2025-10-19T23:45:28.789Z" },
-    { url = "https://files.pythonhosted.org/packages/28/8d/d5df2af58ae479785473607a3b25726c295640cdcaee830847cee339eff9/wrapt-2.0.0-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:b6a18c813196e18146b8d041e20875bdb0cb09b94ac1d1e1146e0fa87b2deb0d", size = 113638, upload-time = "2025-10-19T23:45:31.977Z" },
-    { url = "https://files.pythonhosted.org/packages/f9/b7/9501c45ab93b4d6ba396ef02fcfb55867866bc8579fff045bb54cae58423/wrapt-2.0.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ec5028d26011a53c76bd91bb6198b30b438c6e0f7adb45f2ad84fe2655b6a104", size = 115651, upload-time = "2025-10-19T23:45:33.257Z" },
-    { url = "https://files.pythonhosted.org/packages/5e/3a/bfebe2ba51cf98ae80c5dbb6fa5892ae75d1acf1a4c404eda88e28f5ab06/wrapt-2.0.0-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:bed9b04900204721a24bcefc652ca267b01c1e8ad8bc8c0cff81558a45a3aadc", size = 112060, upload-time = "2025-10-19T23:45:30.298Z" },
-    { url = "https://files.pythonhosted.org/packages/00/e7/cd50a32bed022d98f61a90e57faf782aa063f7930f57eb67eb105d3189be/wrapt-2.0.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:03442f2b45fa3f2b98a94a1917f52fb34670de8f96c0a009c02dbd512d855a3d", size = 114829, upload-time = "2025-10-19T23:45:34.23Z" },
-    { url = "https://files.pythonhosted.org/packages/9d/2c/c709578271df0c70a27ab8f797c44c258650f24a32b452f03d7afedc070d/wrapt-2.0.0-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:17d0b5c42495ba142a1cee52b76414f9210591c84aae94dffda70240753bfb3c", size = 111249, upload-time = "2025-10-19T23:45:35.554Z" },
-    { url = "https://files.pythonhosted.org/packages/60/ef/cb58f6eea41f129600bda68d1ae4c80b14d4e0663eec1d5220cbffe50be5/wrapt-2.0.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:ee44215e7d13e112a8fc74e12ed1a1f41cab2bc07b11cc703f2398cd114b261c", size = 113312, upload-time = "2025-10-19T23:45:36.66Z" },
-    { url = "https://files.pythonhosted.org/packages/59/55/97e6c4e1c175fb27f8dec717a3e36493ff0c4e50173a95f439496556910f/wrapt-2.0.0-cp310-cp310-win32.whl", hash = "sha256:fe6eafac3bc3c957ab6597a0c0654a0a308868458d00d218743e5b5fae51951c", size = 57961, upload-time = "2025-10-19T23:45:40.958Z" },
-    { url = "https://files.pythonhosted.org/packages/3b/0a/898b1d81ae1f3dd9a79fd2e0330a7c8dd793982f815a318548777cb21ee5/wrapt-2.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:9e070c3491397fba0445b8977900271eca9656570cca7c900d9b9352186703a0", size = 60311, upload-time = "2025-10-19T23:45:38.033Z" },
-    { url = "https://files.pythonhosted.org/packages/44/f1/e7e92f9535f5624ee22879f09456df9d1f1ae9bb338eef711077b48e456a/wrapt-2.0.0-cp310-cp310-win_arm64.whl", hash = "sha256:806e2e73186eb5e3546f39fb5d0405040e0088db0fc8b2f667fd1863de2b3c99", size = 58822, upload-time = "2025-10-19T23:45:39.785Z" },
-    { url = "https://files.pythonhosted.org/packages/12/8f/8e4c8b6da60b4205191d588cbac448fb9ff4f5ed89f4e555dc4813ab30cf/wrapt-2.0.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:b7e221abb6c5387819db9323dac3c875b459695057449634f1111955d753c621", size = 77433, upload-time = "2025-10-19T23:45:42.543Z" },
-    { url = "https://files.pythonhosted.org/packages/22/9a/01a29ccb029aa8e78241f8b53cb89ae8826c240129abbbb6ebba3416eff9/wrapt-2.0.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1147a84c8fc852426580af8b6e33138461ddbc65aa459a25ea539374d32069fa", size = 60641, upload-time = "2025-10-19T23:45:43.866Z" },
-    { url = "https://files.pythonhosted.org/packages/3d/ec/e058997971428b7665b5c3665a55b18bb251ea7e08d002925e3ca017c020/wrapt-2.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5d6691d4a711504a0bc10de789842ad6ac627bed22937b10f37a1211a8ab7bb3", size = 61526, upload-time = "2025-10-19T23:45:44.839Z" },
-    { url = "https://files.pythonhosted.org/packages/70/c3/c82263503f554715aa1847e85dc75a69631a54e9d7ab0f1a55e34a22d44a/wrapt-2.0.0-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:f460e1eb8e75a17c3918c8e35ba57625721eef2439ef0bcf05304ac278a65e1d", size = 114069, upload-time = "2025-10-19T23:45:47.223Z" },
-    { url = "https://files.pythonhosted.org/packages/dc/97/d95e88a3a1bc2890a1aa47880c2762cf0eb6d231b5a64048e351cec6f071/wrapt-2.0.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:12c37784b77bf043bf65cc96c7195a5db474b8e54173208af076bdbb61df7b3e", size = 116109, upload-time = "2025-10-19T23:45:48.252Z" },
-    { url = "https://files.pythonhosted.org/packages/dc/36/cba0bf954f2303897b80fa5342499b43f8c5201110dddf0d578d6841b149/wrapt-2.0.0-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:75e5c049eb583835f7a0e0e311d9dde9bfbaac723a6dd89d052540f9b2809977", size = 112500, upload-time = "2025-10-19T23:45:45.838Z" },
-    { url = "https://files.pythonhosted.org/packages/d7/2b/8cb88e63bec989f641d208acb3fd198bfdbbb4ef7dfb71f0cac3c90b07a9/wrapt-2.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:e50bcbd5b65dac21b82319fcf18486e6ac439947e9305034b00704eb7405f553", size = 115356, upload-time = "2025-10-19T23:45:49.249Z" },
-    { url = "https://files.pythonhosted.org/packages/bb/60/a6d5fb94648cd430648705bef9f4241bd22ead123ead552b6d2873ad5240/wrapt-2.0.0-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:06b78cb6b9320f57737a52fede882640d93cface98332d1a3df0c5696ec9ae9f", size = 111754, upload-time = "2025-10-19T23:45:51.21Z" },
-    { url = "https://files.pythonhosted.org/packages/d0/44/1963854edf0592ae806307899dc7bf891e76cec19e598f55845c94603a65/wrapt-2.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:8c8349ebfc3cd98bc9105e0112dd8c8ac1f3c7cb5601f9d02248cae83a63f748", size = 113789, upload-time = "2025-10-19T23:45:52.473Z" },
-    { url = "https://files.pythonhosted.org/packages/62/ec/4b1d76cb6d96ac511aaaa92efc57f528e57f06082a595b8b2663fcdb0f20/wrapt-2.0.0-cp311-cp311-win32.whl", hash = "sha256:028f19ec29e204fe725139d4a8b09f77ecfb64f8f02b7ab5ee822c85e330b68b", size = 57954, upload-time = "2025-10-19T23:45:57.03Z" },
-    { url = "https://files.pythonhosted.org/packages/d4/cf/df8ff9bd64d4a75f9a9f6c1c93480a51904d0c9bd71c11994301c47d8a33/wrapt-2.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:c6961f05e58d919153ba311b397b7b904b907132b7b8344dde47865d4bb5ec89", size = 60308, upload-time = "2025-10-19T23:45:54.314Z" },
-    { url = "https://files.pythonhosted.org/packages/69/d8/61e245fe387d58d84b3f913d5da9d909c4f239b887db692a05105aaf2a1b/wrapt-2.0.0-cp311-cp311-win_arm64.whl", hash = "sha256:be7e316c2accd5a31dbcc230de19e2a846a325f8967fdea72704d00e38e6af06", size = 58822, upload-time = "2025-10-19T23:45:55.772Z" },
-    { url = "https://files.pythonhosted.org/packages/3c/28/7f266b5bf50c3ad0c99c524d99faa0f7d6eecb045d950e7d2c9e1f0e1338/wrapt-2.0.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:73c6f734aecb1a030d9a265c13a425897e1ea821b73249bb14471445467ca71c", size = 78078, upload-time = "2025-10-19T23:45:58.855Z" },
-    { url = "https://files.pythonhosted.org/packages/06/0c/bbdcad7eb535fae9d6b0fcfa3995c364797cd8e2b423bba5559ab2d88dcf/wrapt-2.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b4a7f8023b8ce8a36370154733c747f8d65c8697cb977d8b6efeb89291fff23e", size = 61158, upload-time = "2025-10-19T23:46:00.096Z" },
-    { url = "https://files.pythonhosted.org/packages/d3/8a/bba3e7a4ebf4d1624103ee59d97b78a1fbb08fb5753ff5d1b69f5ef5e863/wrapt-2.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a1cb62f686c50e9dab5983c68f6c8e9cbf14a6007935e683662898a7d892fa69", size = 61646, upload-time = "2025-10-19T23:46:01.279Z" },
-    { url = "https://files.pythonhosted.org/packages/ff/0c/0f565294897a72493dbafe7b46229b5f09f3776795a894d6b737e98387de/wrapt-2.0.0-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:43dc0550ae15e33e6bb45a82a5e1b5495be2587fbaa996244b509921810ee49f", size = 121442, upload-time = "2025-10-19T23:46:04.287Z" },
-    { url = "https://files.pythonhosted.org/packages/da/80/7f03501a8a078ad79b19b1a888f9192a9494e62ddf8985267902766a4f30/wrapt-2.0.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:39c5b45b056d630545e40674d1f5e1b51864b3546f25ab6a4a331943de96262e", size = 123018, upload-time = "2025-10-19T23:46:06.052Z" },
-    { url = "https://files.pythonhosted.org/packages/37/6b/ad0e1ff98359f13b4b0c2c52848e792841146fe79ac5f56899b9a028fc0d/wrapt-2.0.0-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:804e88f824b76240a1b670330637ccfd2d18b9efa3bb4f02eb20b2f64880b324", size = 117369, upload-time = "2025-10-19T23:46:02.53Z" },
-    { url = "https://files.pythonhosted.org/packages/ac/6c/a90437bba8cb1ce2ed639af979515e09784678c2a7f4ffc79f2cf7de809e/wrapt-2.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:c2c476aa3fc2b9899c3f7b20963fac4f952e7edb74a31fc92f7745389a2e3618", size = 121453, upload-time = "2025-10-19T23:46:07.747Z" },
-    { url = "https://files.pythonhosted.org/packages/2c/a9/b3982f9bd15bd45857a23c48b7c36e47d05db4a4dcc5061c31f169238845/wrapt-2.0.0-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:8d851e526891216f89fcb7a1820dad9bd503ba3468fb9635ee28e93c781aa98e", size = 116250, upload-time = "2025-10-19T23:46:09.385Z" },
-    { url = "https://files.pythonhosted.org/packages/73/e2/b7a8b1afac9f791d8f5eac0d9726559f1d7ec4a2b5a6b4e67ac145b007a5/wrapt-2.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b95733c2360c4a8656ee93c7af78e84c0bd617da04a236d7a456c8faa34e7a2d", size = 120575, upload-time = "2025-10-19T23:46:11.882Z" },
-    { url = "https://files.pythonhosted.org/packages/a2/0f/37920eeea96094f450ae35505d39f1135df951a2cdee0d4e01d4f843396a/wrapt-2.0.0-cp312-cp312-win32.whl", hash = "sha256:ea56817176834edf143df1109ae8fdaa087be82fdad3492648de0baa8ae82bf2", size = 58175, upload-time = "2025-10-19T23:46:15.678Z" },
-    { url = "https://files.pythonhosted.org/packages/f0/db/b395f3b0c7f2c60d9219afacc54ceb699801ccf2d3d969ba556dc6d3af20/wrapt-2.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:3c7d3bee7be7a2665286103f4d1f15405c8074e6e1f89dac5774f9357c9a3809", size = 60415, upload-time = "2025-10-19T23:46:12.913Z" },
-    { url = "https://files.pythonhosted.org/packages/86/22/33d660214548af47fc59d9eec8c0e0693bcedc5b3a0b52e8cbdd61f3b646/wrapt-2.0.0-cp312-cp312-win_arm64.whl", hash = "sha256:680f707e1d26acbc60926659799b15659f077df5897a6791c7c598a5d4a211c4", size = 58911, upload-time = "2025-10-19T23:46:13.889Z" },
-    { url = "https://files.pythonhosted.org/packages/18/0a/dd88abfe756b1aa79f0777e5ee4ce9e4b5dc4999bd805e9b04b52efc7b18/wrapt-2.0.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:e2ea096db28d5eb64d381af0e93464621ace38a7003a364b6b5ffb7dd713aabe", size = 78083, upload-time = "2025-10-19T23:46:16.937Z" },
-    { url = "https://files.pythonhosted.org/packages/7f/b9/8afebc1655a863bb2178b23c2d699b8743f3a7dab466904adc6155f3c858/wrapt-2.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:c92b5a82d28491e3f14f037e1aae99a27a5e6e0bb161e65f52c0445a3fa7c940", size = 61156, upload-time = "2025-10-19T23:46:17.927Z" },
-    { url = "https://files.pythonhosted.org/packages/bb/8b/f710a6528ccc52e21943f42c8cf64814cde90f9adbd3bcd58c7c274b4f75/wrapt-2.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:81d234718aabe632d179fac52c7f69f0f99fbaac4d4bcd670e62462bbcbfcad7", size = 61641, upload-time = "2025-10-19T23:46:19.229Z" },
-    { url = "https://files.pythonhosted.org/packages/e4/5f/e4eabd0cc6684c5b208c2abc5c3459449c4d15be1694a9bbcf51e0e135fd/wrapt-2.0.0-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:db2eea83c43f84e4e41dbbb4c1de371a53166e55f900a6b130c3ef51c6345c1a", size = 121454, upload-time = "2025-10-19T23:46:21.808Z" },
-    { url = "https://files.pythonhosted.org/packages/6f/c4/ec31ee17cc7866960d323609ba7402be786d211a6d713a59f776c4270bb3/wrapt-2.0.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:65f50e356c425c061e1e17fe687ff30e294fed9bf3441dc1f13ef73859c2a817", size = 123063, upload-time = "2025-10-19T23:46:23.545Z" },
-    { url = "https://files.pythonhosted.org/packages/b0/2b/a4b10c3c0022e40aeae9bec009bafb049f440493f0575ebb27ecf61c32f8/wrapt-2.0.0-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:887f2a667e3cbfb19e204032d42ad7dedaa43972e4861dc7a3d51ae951d9b578", size = 117401, upload-time = "2025-10-19T23:46:20.433Z" },
-    { url = "https://files.pythonhosted.org/packages/2a/4a/ade23a76967e1f148e461076a4d0e24a7950a5f18b394c9107fe60224ae2/wrapt-2.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:9054829da4be461e3ad3192e4b6bbf1fc18af64c9975ce613aec191924e004dc", size = 121485, upload-time = "2025-10-19T23:46:24.85Z" },
-    { url = "https://files.pythonhosted.org/packages/cb/ba/33b5f3e2edede4e1cfd259f0d9c203cf370f259bb9b215dd58fc6cbb94e9/wrapt-2.0.0-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:b952ffd77133a5a2798ee3feb18e51b0a299d2f440961e5bb7737dbb02e57289", size = 116276, upload-time = "2025-10-19T23:46:27.006Z" },
-    { url = "https://files.pythonhosted.org/packages/eb/bf/b7f95bb4529a35ca11eb95d48f9d1a563b495471f7cf404c644566fb4293/wrapt-2.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e25fde03c480061b8234d8ee4863eb5f40a9be4fb258ce105b364de38fc6bcf9", size = 120578, upload-time = "2025-10-19T23:46:28.679Z" },
-    { url = "https://files.pythonhosted.org/packages/f8/71/984849df6f052592474a44aafd6b847e1cffad39b0debc5390a04aa46331/wrapt-2.0.0-cp313-cp313-win32.whl", hash = "sha256:49e982b7860d325094978292a49e0418833fc7fc42c0dc7cd0b7524d7d06ee74", size = 58178, upload-time = "2025-10-19T23:46:32.372Z" },
-    { url = "https://files.pythonhosted.org/packages/f9/3b/4e1fc0f2e1355fbc55ab248311bf4c958dbbd96bd9183b9e96882cc16213/wrapt-2.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:6e5c86389d9964050ce50babe247d172a5e3911d59a64023b90db2b4fa00ae7c", size = 60423, upload-time = "2025-10-19T23:46:30.041Z" },
-    { url = "https://files.pythonhosted.org/packages/20/0a/9384e0551f56fe361f41bb8f209a13bb9ef689c3a18264225b249849b12c/wrapt-2.0.0-cp313-cp313-win_arm64.whl", hash = "sha256:b96fdaa4611e05c7231937930567d3c16782be9dbcf03eb9f60d83e57dd2f129", size = 58918, upload-time = "2025-10-19T23:46:31.056Z" },
-    { url = "https://files.pythonhosted.org/packages/68/70/37b90d3ee5bf0d0dc4859306383da08b685c9a51abff6fd6b0a7c052e117/wrapt-2.0.0-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:f2c7b7fead096dbf1dcc455b7f59facb05de3f5bfb04f60a69f98cdfe6049e5f", size = 81980, upload-time = "2025-10-19T23:46:33.368Z" },
-    { url = "https://files.pythonhosted.org/packages/95/23/0ce69cc90806b90b3ee4cfd9ad8d2ee9becc3a1aab7df3c3bfc7d0904cb6/wrapt-2.0.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:04c7c8393f25b11c0faa5d907dd9eb462e87e4e7ba55e308a046d7ed37f4bbe2", size = 62900, upload-time = "2025-10-19T23:46:34.415Z" },
-    { url = "https://files.pythonhosted.org/packages/54/76/03ec08170c02f38f3be3646977920976b968e0b704a0693a98f95d02f4d2/wrapt-2.0.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:a93e0f8b376c0735b2f4daf58018b4823614d2b896cb72b6641c4d3dbdca1d75", size = 63636, upload-time = "2025-10-19T23:46:35.643Z" },
-    { url = "https://files.pythonhosted.org/packages/75/c1/04ce0511e504cdcd84cdb6980bc7d4efa38ac358e8103d6dd0cd278bfc6d/wrapt-2.0.0-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:b42d13603da4416c43c430dbc6313c8d7ff745c40942f146ed4f6dd02c7d2547", size = 152650, upload-time = "2025-10-19T23:46:38.717Z" },
-    { url = "https://files.pythonhosted.org/packages/17/06/cd2e32b5f744701189c954f9ab5eee449c86695b13f414bb8ea7a83f6d48/wrapt-2.0.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c8bbd2472abf8c33480ad2314b1f8fac45d592aba6cc093e8839a7b2045660e6", size = 158811, upload-time = "2025-10-19T23:46:40.875Z" },
-    { url = "https://files.pythonhosted.org/packages/7d/a2/a6d920695cca62563c1b969064e5cd2051344a6e330c184b6f80383d87e4/wrapt-2.0.0-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e64a3a1fd9a308ab9b815a2ad7a65b679730629dbf85f8fc3f7f970d634ee5df", size = 146033, upload-time = "2025-10-19T23:46:37.351Z" },
-    { url = "https://files.pythonhosted.org/packages/c6/90/7fd2abe4ec646bc43cb6b0d05086be6fcf15e64f06f51fc4198804396d68/wrapt-2.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:d61214525eaf88e0d0edf3d1ad5b5889863c6f88e588c6cdc6aa4ee5d1f10a4a", size = 155673, upload-time = "2025-10-19T23:46:42.582Z" },
-    { url = "https://files.pythonhosted.org/packages/5f/8d/6cce7f8c41633e677ac8aa34e84b53a22a645ec2a680deb991785ca2798d/wrapt-2.0.0-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:04f7a5f92c5f7324a1735043cc467b1295a1c5b4e0c1395472b7c44706e3dc61", size = 144364, upload-time = "2025-10-19T23:46:44.381Z" },
-    { url = "https://files.pythonhosted.org/packages/72/42/9570349e03afa9d83daf7f33ffb17e8cdc62d7e84c0d09005d0f51912efa/wrapt-2.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:2356f76cb99b3de5b4e5b8210367fbbb81c7309fe39b622f5d199dd88eb7f765", size = 150275, upload-time = "2025-10-19T23:46:45.662Z" },
-    { url = "https://files.pythonhosted.org/packages/f2/d8/448728e6fe030e5c4f1022c82cd3af1de1c672fa53d2d5b36b32a55ce7bf/wrapt-2.0.0-cp313-cp313t-win32.whl", hash = "sha256:0a921b657a224e40e4bc161b5d33934583b34f0c9c5bdda4e6ac66f9d2fcb849", size = 59867, upload-time = "2025-10-19T23:46:49.593Z" },
-    { url = "https://files.pythonhosted.org/packages/8f/b1/ad812b1fe1cd85f6498dc3a3c9809a1e880d6108283b1735119bec217041/wrapt-2.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:c16f6d4eea98080f6659a8a7fc559d4a0a337ee66960659265cad2c8a40f7c0f", size = 63170, upload-time = "2025-10-19T23:46:46.87Z" },
-    { url = "https://files.pythonhosted.org/packages/7f/29/c105b1e76650c82823c491952a7a8eafe09b78944f7a43f22d37ed860229/wrapt-2.0.0-cp313-cp313t-win_arm64.whl", hash = "sha256:52878edc13dc151c58a9966621d67163a80654bc6cff4b2e1c79fa62d0352b26", size = 60339, upload-time = "2025-10-19T23:46:47.862Z" },
-    { url = "https://files.pythonhosted.org/packages/f8/38/0dd39f83163fd28326afba84e3e416656938df07e60a924ac4d992b30220/wrapt-2.0.0-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:79a53d86c2aff7b32cc77267e3a308365d1fcb881e74bc9cbe26f63ee90e37f0", size = 78242, upload-time = "2025-10-19T23:46:51.096Z" },
-    { url = "https://files.pythonhosted.org/packages/08/ef/fa7a5c1d73f8690c712f9d2e4615700c6809942536dd3f441b9ba650a310/wrapt-2.0.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:d731a4f22ed6ffa4cb551b4d2b0c24ff940c27a88edaf8e3490a5ee3a05aef71", size = 61207, upload-time = "2025-10-19T23:46:52.558Z" },
-    { url = "https://files.pythonhosted.org/packages/23/d9/67cb93da492eb0a1cb17b7ed18220d059e58f00467ce6728b674d3441b3d/wrapt-2.0.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:3e02ab8c0ac766a5a6e81cd3b6cc39200c69051826243182175555872522bd5a", size = 61748, upload-time = "2025-10-19T23:46:54.468Z" },
-    { url = "https://files.pythonhosted.org/packages/e5/be/912bbd70cc614f491b526a1d7fe85695b283deed19287b9f32460178c54d/wrapt-2.0.0-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:895870602d65d7338edb3b6a717d856632ad9f14f7ff566214e4fb11f0816649", size = 120424, upload-time = "2025-10-19T23:46:57.575Z" },
-    { url = "https://files.pythonhosted.org/packages/b2/e1/10df8937e7da2aa9bc3662a4b623e51a323c68f42cad7b13f0e61a700ce2/wrapt-2.0.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0b9ad4fab76a0086dc364c4f17f39ad289600e73ef5c6e9ab529aff22cac1ac3", size = 122804, upload-time = "2025-10-19T23:46:59.308Z" },
-    { url = "https://files.pythonhosted.org/packages/f3/60/576751b1919adab9f63168e3b5fd46c0d1565871b1cc4c2569503ccf4be6/wrapt-2.0.0-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e7ca0562606d7bad2736b2c18f61295d61f50cd3f4bfc51753df13614dbcce1b", size = 117398, upload-time = "2025-10-19T23:46:55.814Z" },
-    { url = "https://files.pythonhosted.org/packages/ec/55/243411f360cc27bae5f8e21c16f1a8d87674c5534f4558e8a97c1e0d1c6f/wrapt-2.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:fe089d9f5a4a3dea0108a8ae34bced114d0c4cca417bada1c5e8f42d98af9050", size = 121230, upload-time = "2025-10-19T23:47:01.347Z" },
-    { url = "https://files.pythonhosted.org/packages/d6/23/2f21f692c3b3f0857cb82708ce0c341fbac55a489d4025ae4e3fd5d5de8c/wrapt-2.0.0-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:e761f2d2f8dbc80384af3d547b522a80e67db3e319c7b02e7fd97aded0a8a678", size = 116296, upload-time = "2025-10-19T23:47:02.659Z" },
-    { url = "https://files.pythonhosted.org/packages/bd/ed/678957fad212cfb1b65b2359d62f5619f5087d1d1cf296c6a996be45171c/wrapt-2.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:17ba1bdc52d0c783481850996aa26cea5237720769197335abea2ae6b4c23bc0", size = 119602, upload-time = "2025-10-19T23:47:03.775Z" },
-    { url = "https://files.pythonhosted.org/packages/dc/e3/aeb4c3b052d3eed95e61babc20dcb1a512651e098cca4b84a6896585c06a/wrapt-2.0.0-cp314-cp314-win32.whl", hash = "sha256:f73318741b141223a4674ba96992aa2291b1b3f7a5e85cb3c2c964f86171eb45", size = 58649, upload-time = "2025-10-19T23:47:07.382Z" },
-    { url = "https://files.pythonhosted.org/packages/aa/2a/a71c51cb211798405b59172c7df5789a5b934b18317223cf22e0c6f852de/wrapt-2.0.0-cp314-cp314-win_amd64.whl", hash = "sha256:8e08d4edb13cafe7b3260f31d4de033f73d3205774540cf583bffaa4bec97db9", size = 60897, upload-time = "2025-10-19T23:47:04.862Z" },
-    { url = "https://files.pythonhosted.org/packages/f8/a5/acc5628035d06f69e9144cca543ca54c33b42a5a23b6f1e8fa131026db89/wrapt-2.0.0-cp314-cp314-win_arm64.whl", hash = "sha256:af01695c2b7bbd8d67b869d8e3de2b123a7bfbee0185bdd138c2775f75373b83", size = 59306, upload-time = "2025-10-19T23:47:05.883Z" },
-    { url = "https://files.pythonhosted.org/packages/a7/e6/1318ca07d7fcee57e4592a78dacd9d5493b8ddd971c553a62904fb2c0cf2/wrapt-2.0.0-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:057f02c13cce7b26c79624c06a3e1c2353e6dc9708525232232f6768118042ca", size = 81987, upload-time = "2025-10-19T23:47:08.7Z" },
-    { url = "https://files.pythonhosted.org/packages/e7/bf/ffac358ddf61c3923d94a8b0e7620f2af1cd1b637a0fe4963a3919aa62b7/wrapt-2.0.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:79bdd84570267f3f43d609c892ae2d30b91ee4b8614c2cbfd311a2965f1c9bdb", size = 62902, upload-time = "2025-10-19T23:47:10.248Z" },
-    { url = "https://files.pythonhosted.org/packages/b5/af/387c51f9e7b544fe95d852fc94f9f3866e3f7d7d39c2ee65041752f90bc2/wrapt-2.0.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:93c8b4f4d54fd401a817abbfc9bf482aa72fd447f8adf19ce81d035b3f5c762c", size = 63635, upload-time = "2025-10-19T23:47:11.746Z" },
-    { url = "https://files.pythonhosted.org/packages/7c/99/d38d8c80b9cc352531d4d539a17e3674169a5cc25a7e6e5e3c27bc29893e/wrapt-2.0.0-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:5e09ffd31001dce71c2c2a4fc201bdba9a2f9f62b23700cf24af42266e784741", size = 152659, upload-time = "2025-10-19T23:47:15.344Z" },
-    { url = "https://files.pythonhosted.org/packages/5a/2a/e154432f274e22ecf2465583386c5ceffa5e0bab3947c1c5b26cc8e7b275/wrapt-2.0.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d87c285ff04e26083c4b03546e7b74df7ba4f1f32f1dcb92e9ac13c2dbb4c379", size = 158818, upload-time = "2025-10-19T23:47:17.569Z" },
-    { url = "https://files.pythonhosted.org/packages/c5/7a/3a40c453300e2898e99c27495b8109ff7cd526997d12cfb8ebd1843199a4/wrapt-2.0.0-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e52e50ea0a72ea48d1291cf8b8aaedcc99072d9dc5baba6b820486dcf4c67da8", size = 146113, upload-time = "2025-10-19T23:47:13.026Z" },
-    { url = "https://files.pythonhosted.org/packages/9e/e2/3116a9eade8bea2bf5eedba3fa420e3c7d193d4b047440330d8eaf1098de/wrapt-2.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:1fd4c95536975895f32571073446e614d5e2810b666b64955586dcddfd438fd3", size = 155689, upload-time = "2025-10-19T23:47:19.397Z" },
-    { url = "https://files.pythonhosted.org/packages/43/1c/277d3fbe9d177830ab9e54fe9253f38455b75a22d639a4bd9fa092d55ae5/wrapt-2.0.0-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:d6ebfe9283209220ed9de80a3e9442aab8fc2be5a9bbf8491b99e02ca9349a89", size = 144403, upload-time = "2025-10-19T23:47:20.779Z" },
-    { url = "https://files.pythonhosted.org/packages/d8/37/ab6ddaf182248aac5ed925725ef4c69a510594764665ecbd95bdd4481f16/wrapt-2.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5d3ebd784804f146b7ea55359beb138e23cc18e5a5cc2cf26ad438723c00ce3a", size = 150307, upload-time = "2025-10-19T23:47:22.604Z" },
-    { url = "https://files.pythonhosted.org/packages/f6/d7/df9e2d8040a3af618ff9496261cf90ca4f886fd226af0f4a69ac0c020c3b/wrapt-2.0.0-cp314-cp314t-win32.whl", hash = "sha256:9b15940ae9debc8b40b15dc57e1ce4433f7fb9d3f8761c7fab1ddd94cb999d99", size = 60557, upload-time = "2025-10-19T23:47:26.73Z" },
-    { url = "https://files.pythonhosted.org/packages/b4/c2/502bd4557a3a9199ea73cc5932cf83354bd362682162f0b14164d2e90216/wrapt-2.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:7a0efbbc06d3e2077476a04f55859819d23206600b4c33f791359a8e6fa3c362", size = 63988, upload-time = "2025-10-19T23:47:23.826Z" },
-    { url = "https://files.pythonhosted.org/packages/1f/f2/632b13942f45db7af709f346ff38b8992c8c21b004e61ab320b0dec525fe/wrapt-2.0.0-cp314-cp314t-win_arm64.whl", hash = "sha256:7fec8a9455c029c8cf4ff143a53b6e7c463268d42be6c17efa847ebd2f809965", size = 60584, upload-time = "2025-10-19T23:47:25.396Z" },
-    { url = "https://files.pythonhosted.org/packages/00/5c/c34575f96a0a038579683c7f10fca943c15c7946037d1d254ab9db1536ec/wrapt-2.0.0-py3-none-any.whl", hash = "sha256:02482fb0df89857e35427dfb844319417e14fae05878f295ee43fa3bf3b15502", size = 43998, upload-time = "2025-10-19T23:47:52.858Z" },
-]
-
 [[package]]
 name = "xattr"
 version = "1.3.0"
@@ -6902,55 +6685,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/73/ae/b48f95715333080afb75a4504487cbe142cae1268afc482d06692d605ae6/yarl-1.22.0-py3-none-any.whl", hash = "sha256:1380560bdba02b6b6c90de54133c81c9f2a453dee9912fe58c1dcced1edb7cff", size = 46814, upload-time = "2025-10-06T14:12:53.872Z" },
 ]
 
-[[package]]
-name = "zarr"
-version = "2.18.3"
-source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version < '3.11' and sys_platform == 'linux'",
-    "python_full_version < '3.11' and sys_platform != 'linux'",
-]
-dependencies = [
-    { name = "asciitree", marker = "python_full_version < '3.11'" },
-    { name = "fasteners", marker = "python_full_version < '3.11' and sys_platform != 'emscripten'" },
-    { name = "numcodecs", version = "0.13.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
-    { name = "numpy", marker = "python_full_version < '3.11'" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/23/c4/187a21ce7cf7c8f00c060dd0e04c2a81139bb7b1ab178bba83f2e1134ce2/zarr-2.18.3.tar.gz", hash = "sha256:2580d8cb6dd84621771a10d31c4d777dca8a27706a1a89b29f42d2d37e2df5ce", size = 3603224, upload-time = "2024-09-04T23:20:16.595Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/ed/c9/142095e654c2b97133ff71df60979422717b29738b08bc8a1709a5d5e0d0/zarr-2.18.3-py3-none-any.whl", hash = "sha256:b1f7dfd2496f436745cdd4c7bcf8d3b4bc1dceef5fdd0d589c87130d842496dd", size = 210723, upload-time = "2024-09-04T23:20:14.491Z" },
-]
-
-[[package]]
-name = "zarr"
-version = "3.1.3"
-source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform == 'linux'",
-    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform == 'linux'",
-    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform == 'linux'",
-    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform == 'linux'",
-    "python_full_version == '3.12.*' and sys_platform == 'linux'",
-    "python_full_version >= '3.14' and platform_python_implementation != 'PyPy' and sys_platform != 'linux'",
-    "python_full_version == '3.13.*' and platform_python_implementation != 'PyPy' and sys_platform != 'linux'",
-    "python_full_version >= '3.14' and platform_python_implementation == 'PyPy' and sys_platform != 'linux'",
-    "python_full_version == '3.13.*' and platform_python_implementation == 'PyPy' and sys_platform != 'linux'",
-    "python_full_version == '3.12.*' and sys_platform != 'linux'",
-    "python_full_version == '3.11.*' and sys_platform == 'linux'",
-    "python_full_version == '3.11.*' and sys_platform != 'linux'",
-]
-dependencies = [
-    { name = "donfig", marker = "python_full_version >= '3.11'" },
-    { name = "numcodecs", version = "0.16.3", source = { registry = "https://pypi.org/simple" }, extra = ["crc32c"], marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "numpy", marker = "python_full_version >= '3.11'" },
-    { name = "packaging", marker = "python_full_version >= '3.11'" },
-    { name = "typing-extensions", marker = "python_full_version >= '3.11'" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/d6/67/14be68a7bad15eecda09b1e81fca2420f7533645fe187bf4d6104c1aad52/zarr-3.1.3.tar.gz", hash = "sha256:01342f3e26a02ed5670db608a5576fbdb8d76acb5c280bd2d0082454b1ba6f79", size = 349125, upload-time = "2025-09-18T19:32:41.688Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/1a/71/9de7229515a53d1cc5705ca9c411530f711a2242f962214d9dbfe2741aa4/zarr-3.1.3-py3-none-any.whl", hash = "sha256:45f67f87f65f14fa453f99dd8110a5936b7ac69f3a21981d33e90407c80c302a", size = 276427, upload-time = "2025-09-18T19:32:40.042Z" },
-]
-
 [[package]]
 name = "zipp"
 version = "3.23.0"

From 98c64b29d6a2cf2a55436bb17cc0595f022bbcba Mon Sep 17 00:00:00 2001
From: Michael Wojcikiewicz <mwojcikiewic@nvidia.com>
Date: Thu, 27 Nov 2025 18:21:58 -0500
Subject: [PATCH 160/334] fix: use a script to do node tainting in the cicd
 workflow (#2421)

---
 .github/workflows/cicd-main.yml | 75 ++-------------------------------
 1 file changed, 3 insertions(+), 72 deletions(-)

diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml
index 7043e022c95..eff0ad2e3fe 100644
--- a/.github/workflows/cicd-main.yml
+++ b/.github/workflows/cicd-main.yml
@@ -204,30 +204,7 @@ jobs:
       - name: Taint node for job isolation
         if: contains(needs.is-not-external-contributor.outputs.selected_runner, 'ephemeral')
         shell: bash
-        run: |
-          # Verify prerequisites
-          if [ -z "$NODE_NAME" ]; then
-            echo "ERROR: NODE_NAME not set"
-            exit 1
-          fi
-
-          if ! command -v kubectl &> /dev/null; then
-            echo "ERROR: kubectl not found"
-            exit 1
-          fi
-
-          # Apply taint
-          JOB_ID="${GITHUB_RUN_ID}-${GITHUB_JOB}"
-          echo "=== Adding node taint for job isolation ==="
-          echo "Node: $NODE_NAME"
-          echo "Job ID: $JOB_ID"
-
-          kubectl taint node "$NODE_NAME" "github.com/job-id=${JOB_ID}:NoSchedule" --overwrite=true
-          kubectl label node "$NODE_NAME" \
-            "github.com/workflow=${GITHUB_WORKFLOW}" \
-            "github.com/run-id=${GITHUB_RUN_ID}" \
-            "github.com/job=${GITHUB_JOB}" \
-            --overwrite=true
+        run: taint-node.sh
 
       - name: Checkout
         uses: actions/checkout@v4
@@ -389,30 +366,7 @@ jobs:
       - name: Taint node for job isolation
         if: contains(needs.is-not-external-contributor.outputs.selected_runner, 'ephemeral')
         shell: bash
-        run: |
-          # Verify prerequisites
-          if [ -z "$NODE_NAME" ]; then
-            echo "ERROR: NODE_NAME not set"
-            exit 1
-          fi
-
-          if ! command -v kubectl &> /dev/null; then
-            echo "ERROR: kubectl not found"
-            exit 1
-          fi
-
-          # Apply taint
-          JOB_ID="${GITHUB_RUN_ID}-${GITHUB_JOB}"
-          echo "=== Adding node taint for job isolation ==="
-          echo "Node: $NODE_NAME"
-          echo "Job ID: $JOB_ID"
-
-          kubectl taint node "$NODE_NAME" "github.com/job-id=${JOB_ID}:NoSchedule" --overwrite=true
-          kubectl label node "$NODE_NAME" \
-            "github.com/workflow=${GITHUB_WORKFLOW}" \
-            "github.com/run-id=${GITHUB_RUN_ID}" \
-            "github.com/job=${GITHUB_JOB}" \
-            --overwrite=true
+        run: taint-node.sh
 
       - name: Checkout
         uses: actions/checkout@v4
@@ -543,30 +497,7 @@ jobs:
       - name: Taint node for job isolation
         if: contains(needs.is-not-external-contributor.outputs.selected_runner, 'ephemeral')
         shell: bash
-        run: |
-          # Verify prerequisites
-          if [ -z "$NODE_NAME" ]; then
-            echo "ERROR: NODE_NAME not set"
-            exit 1
-          fi
-
-          if ! command -v kubectl &> /dev/null; then
-            echo "ERROR: kubectl not found"
-            exit 1
-          fi
-
-          # Apply taint
-          JOB_ID="${GITHUB_RUN_ID}-${GITHUB_JOB}"
-          echo "=== Adding node taint for job isolation ==="
-          echo "Node: $NODE_NAME"
-          echo "Job ID: $JOB_ID"
-
-          kubectl taint node "$NODE_NAME" "github.com/job-id=${JOB_ID}:NoSchedule" --overwrite=true
-          kubectl label node "$NODE_NAME" \
-            "github.com/workflow=${GITHUB_WORKFLOW}" \
-            "github.com/run-id=${GITHUB_RUN_ID}" \
-            "github.com/job=${GITHUB_JOB}" \
-            --overwrite=true
+        run: taint-node.sh
 
       - name: Checkout
         uses: actions/checkout@v4

From 03150b48272d5fc28e03cf75ff29a1286909ed5d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Fri, 28 Nov 2025 16:30:50 +0000
Subject: [PATCH 161/334] Revert "[DEV] pull main Nov 25 (#2395)"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts commit 56682f80b0db4492afeee013a07187eadfa9dc8f.

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 .github/copy-pr-bot.yaml                      |    2 +-
 .github/workflows/auto-update-copy-pr-bot.yml |    6 +-
 .github/workflows/cicd-main.yml               |    3 +
 .github/workflows/community-bot.yml           |    3 +-
 .../inference/gpt/gpt_dynamic_inference.py    |  238 ++--
 .../gpt/gpt_dynamic_inference_12b.sh          |   10 +-
 .../gpt/gpt_dynamic_inference_357m.sh         |   10 +-
 .../gpt_dynamic_inference_with_coordinator.py |  206 +---
 examples/inference/gpt/utils.py               |   74 +-
 examples/post_training/modelopt/.gitignore    |    1 -
 examples/post_training/modelopt/ADVANCED.md   |   93 +-
 examples/post_training/modelopt/Dockerfile    |    2 +-
 examples/post_training/modelopt/README.md     |   97 +-
 .../post_training/modelopt/conf/arguments.sh  |    3 -
 .../conf/moonshotai/kimi_k2_instruct.sh       |    7 -
 .../moonshotai/kimi_k2_instruct_export.sh     |   15 -
 .../nvidia/NVIDIA-Nemotron-Nano-9B-v2-Base.sh |   42 +-
 .../conf/nvidia/NVIDIA-Nemotron-Nano-9B-v2.sh |   41 -
 .../{Qwen => qwen}/Qwen2.5-0.5B-Instruct.sh   |    0
 .../{Qwen => qwen}/Qwen2.5-7B-Instruct.sh     |    0
 .../conf/{Qwen => qwen}/Qwen3-0.6B.sh         |    0
 .../conf/{Qwen => qwen}/Qwen3-235B-A22B.sh    |    0
 .../conf/{Qwen => qwen}/Qwen3-30B-A3B.sh      |    0
 .../modelopt/conf/{Qwen => qwen}/Qwen3-8B.sh  |    0
 .../post_training/modelopt/convert_model.py   |   12 +-
 examples/post_training/modelopt/finetune.py   |    7 +-
 examples/post_training/modelopt/finetune.sh   |    3 -
 examples/post_training/modelopt/prune.py      |   38 +-
 examples/post_training/modelopt/prune.sh      |   36 +-
 .../modelopt/slurm/env_setup_template.sh      |    7 -
 .../post_training/modelopt/slurm/sbatch.sh    |   63 -
 examples/post_training/modelopt/validate.sh   |    8 +-
 gpt_builders.py                               |   10 -
 .../fsdp/src/megatron_fsdp/megatron_fsdp.py   |    7 +-
 .../core/fusions/fused_pad_routing_map.py     |    3 +-
 .../core/inference/communication_utils.py     |    3 +-
 .../attention_context/mamba_metadata.py       |   22 +-
 .../contexts/dynamic_block_allocator.py       |   86 +-
 .../inference/contexts/dynamic_context.py     |  643 +++++------
 .../data_parallel_inference_coordinator.py    |   70 +-
 megatron/core/inference/engines/__init__.py   |    2 +-
 .../core/inference/engines/dynamic_engine.py  |  883 +++++---------
 .../core/inference/engines/static_engine.py   |    9 +-
 megatron/core/inference/headers.py            |   27 +-
 megatron/core/inference/inference_client.py   |  102 +-
 megatron/core/inference/inference_request.py  |  237 +---
 megatron/core/inference/sampling_params.py    |    2 +-
 .../text_generation_controller.py             |  435 +++----
 megatron/core/inference/unified_memory.py     |   59 +-
 megatron/core/inference/utils.py              |   55 -
 megatron/core/models/backends.py              |   61 -
 megatron/core/models/gpt/gpt_layer_specs.py   |  102 +-
 megatron/core/models/gpt/moe_module_specs.py  |   10 +-
 .../core/models/mamba/mamba_layer_specs.py    |   16 -
 megatron/core/optimizer/__init__.py           |  307 ++---
 megatron/core/optimizer/muon.py               |   41 +-
 megatron/core/optimizer/optimizer.py          |    1 -
 megatron/core/optimizer/optimizer_config.py   |   75 +-
 megatron/core/optimizer_param_scheduler.py    |   31 +-
 megatron/core/parallel_state.py               |    1 -
 megatron/core/process_groups_config.py        |   17 -
 megatron/core/safe_globals.py                 |    2 -
 megatron/core/ssm/mamba_block.py              |   63 +-
 .../core/ssm/mamba_hybrid_layer_allocation.py |    7 +-
 megatron/core/ssm/mamba_layer.py              |    2 -
 megatron/core/ssm/mamba_mixer.py              |   32 +-
 .../core/tensor_parallel/inference_layers.py  |  151 ---
 .../text/libraries/huggingface_tokenizer.py   |   11 +-
 .../text/libraries/null_tokenizer.py          |    8 -
 .../core/tokenizers/text/text_tokenizer.py    |   16 +-
 megatron/core/transformer/attention.py        |   37 +-
 megatron/core/transformer/cuda_graphs.py      |   17 -
 .../transformer/fsdp_dtensor_checkpoint.py    |    2 +-
 .../core/transformer/moe/token_dispatcher.py  |    3 +-
 .../core/transformer/transformer_config.py    |   10 -
 megatron/core/utils.py                        |  113 +-
 .../legacy/data/biencoder_dataset_utils.py    |   11 +-
 .../datasets => legacy/data}/data_samplers.py |  168 +--
 megatron/legacy/data/vit_dataset.py           |   14 +-
 megatron/post_training/algos/__init__.py      |    1 +
 megatron/post_training/algos/distillation.py  |  601 ++++++++++
 megatron/post_training/checkpointing.py       |    9 +-
 megatron/post_training/docs/distillation.md   |    2 +-
 megatron/post_training/generate.py            |    6 +-
 megatron/post_training/loss_func.py           |    6 +-
 megatron/post_training/model_builder.py       |   18 +-
 megatron/post_training/non_loss_data_func.py  |   19 +-
 megatron/post_training/utils.py               |    3 +-
 megatron/rl/inference/megatron.py             |  102 +-
 megatron/rl/rl_utils.py                       |   85 +-
 megatron/training/arguments.py                |   86 +-
 megatron/training/checkpointing.py            |   17 +-
 megatron/training/datasets/README.md          |   34 -
 megatron/training/datasets/fim_dataset.py     |  308 -----
 megatron/training/dist_signal_handler.py      |   10 +-
 megatron/training/global_vars.py              |    9 +-
 megatron/training/training.py                 |  162 ++-
 pretrain_gpt.py                               |   66 +-
 .../golden_values_dev_dgx_h100.json           |  287 -----
 .../model_config.yaml                         |   56 -
 .../golden_values_dev_dgx_h100.json           |  361 +++---
 .../model_config.yaml                         |    2 +
 .../golden_values_dev_dgx_h100.json           |  361 +++---
 .../model_config.yaml                         |    2 +
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_h100.json           | 1028 -----------------
 .../model_config.yaml                         |   59 -
 .../golden_values_dev_dgx_h100.json           |  158 ---
 .../model_config.yaml                         |   58 -
 .../golden_values_dev_dgx_h100.json           |  158 ---
 .../model_config.yaml                         |   58 -
 .../golden_values_dev_dgx_h100.json           |    4 +-
 .../model_config.yaml                         |    5 +-
 .../golden_values_dev_dgx_h100.json           |  314 ++---
 .../model_config.yaml                         |    6 +-
 .../golden_values_dev_dgx_h100.json           |  135 ---
 .../model_config.yaml                         |   72 --
 .../golden_values_dev_dgx_h100.json           |    2 +-
 .../model_config.yaml                         |    1 -
 .../model_config.yaml                         |    1 -
 .../python_scripts/auto_reminder_github.py    |   29 +-
 ...pt-dynamic-inference-with-coordinator.yaml |   16 +-
 tests/test_utils/recipes/gpt.yaml             |    5 -
 .../recipes/mamba-dynamic-inference.yaml      |   61 -
 tests/unit_tests/data/test_fim_dataset.py     |   87 --
 .../contexts/test_dynamic_context.py          |  251 ++--
 .../inference/engines/test_dynamic_engine.py  |  398 +++----
 .../inference/engines/test_static_engine.py   |   17 +-
 ...est_data_parallel_inference_coordinator.py |  471 --------
 .../inference/test_wandb_logging.py           |   26 +-
 .../test_simple_text_generation_controller.py |   96 +-
 tests/unit_tests/test_checkpointing.py        |   45 +-
 .../unit_tests/test_process_groups_config.py  |   23 -
 tests/unit_tests/test_rl_utils.py             |  656 -----------
 .../transformer/moe/test_token_dispatcher.py  |    5 +-
 tools/run_inference_performance_test.py       |   16 +-
 train_rl.py                                   |    2 +-
 137 files changed, 3400 insertions(+), 8493 deletions(-)
 delete mode 100644 examples/post_training/modelopt/.gitignore
 delete mode 100644 examples/post_training/modelopt/conf/moonshotai/kimi_k2_instruct.sh
 delete mode 100644 examples/post_training/modelopt/conf/moonshotai/kimi_k2_instruct_export.sh
 mode change 120000 => 100644 examples/post_training/modelopt/conf/nvidia/NVIDIA-Nemotron-Nano-9B-v2-Base.sh
 delete mode 100644 examples/post_training/modelopt/conf/nvidia/NVIDIA-Nemotron-Nano-9B-v2.sh
 rename examples/post_training/modelopt/conf/{Qwen => qwen}/Qwen2.5-0.5B-Instruct.sh (100%)
 rename examples/post_training/modelopt/conf/{Qwen => qwen}/Qwen2.5-7B-Instruct.sh (100%)
 rename examples/post_training/modelopt/conf/{Qwen => qwen}/Qwen3-0.6B.sh (100%)
 rename examples/post_training/modelopt/conf/{Qwen => qwen}/Qwen3-235B-A22B.sh (100%)
 rename examples/post_training/modelopt/conf/{Qwen => qwen}/Qwen3-30B-A3B.sh (100%)
 rename examples/post_training/modelopt/conf/{Qwen => qwen}/Qwen3-8B.sh (100%)
 delete mode 100644 examples/post_training/modelopt/slurm/env_setup_template.sh
 delete mode 100644 examples/post_training/modelopt/slurm/sbatch.sh
 delete mode 100644 megatron/core/tensor_parallel/inference_layers.py
 rename megatron/{training/datasets => legacy/data}/data_samplers.py (56%)
 create mode 100644 megatron/post_training/algos/__init__.py
 create mode 100644 megatron/post_training/algos/distillation.py
 delete mode 100644 megatron/training/datasets/README.md
 delete mode 100644 megatron/training/datasets/fim_dataset.py
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_fim_dataset/golden_values_dev_dgx_h100.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_fim_dataset/model_config.yaml
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_dp8_583m_throughputtest_zmq/golden_values_dev_dgx_h100.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_dp8_583m_throughputtest_zmq/model_config.yaml
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp8_dp1_583m_logitsmatch_zmq/golden_values_dev_dgx_h100.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp8_dp1_583m_logitsmatch_zmq/model_config.yaml
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp2_pp2_dp2_583m_logitsmatch_zmq/golden_values_dev_dgx_h100.json
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp2_pp2_dp2_583m_logitsmatch_zmq/model_config.yaml
 delete mode 100644 tests/functional_tests/test_cases/hybrid/hybrid_dynamic_inference_tp1_pp1_dp8_583m/golden_values_dev_dgx_h100.json
 delete mode 100644 tests/functional_tests/test_cases/hybrid/hybrid_dynamic_inference_tp1_pp1_dp8_583m/model_config.yaml
 delete mode 100644 tests/test_utils/recipes/mamba-dynamic-inference.yaml
 delete mode 100644 tests/unit_tests/data/test_fim_dataset.py
 delete mode 100644 tests/unit_tests/inference/test_data_parallel_inference_coordinator.py
 delete mode 100644 tests/unit_tests/test_rl_utils.py

diff --git a/.github/copy-pr-bot.yaml b/.github/copy-pr-bot.yaml
index 8e703301ca7..7013df60dc2 100644
--- a/.github/copy-pr-bot.yaml
+++ b/.github/copy-pr-bot.yaml
@@ -1,4 +1,4 @@
 enabled: true
 auto_sync_draft: false
 auto_sync_ready: true
-trustees_override: ["AAnoosheh", "ArEsKay3", "Autumn1998", "BestJuly", "BoxiangW", "ChenhanYu", "FDecaYed", "HaochenYuan", "ISEEKYAN", "JRD971000", "QiZhangNV", "ShriyaRishab", "Victarry", "Wohox", "ZhiyuLi-Nvidia", "aklife97", "ananthsub", "asolergi-nv", "buptzyb", "chtruong814", "cspades", "cuichenx", "deepakn94", "dimapihtar", "duncanriach", "erhoo82", "ericharper", "fanshiqing", "gautham-kollu", "guyueh1", "hxbai", "jaredcasper", "jiemingz", "jkamalu", "jon-barker", "kanz-nv", "kevalmorabia97", "ko3n1g", "kunlunl", "kvareddy", "layalir", "lhb8125", "lmcafee-nvidia", "maanug-nv", "mathemakitten", "matthieule", "mehraakash", "mkhona-nvidia", "pablo-garay", "parthmannan", "pthombre", "rogerwaleffe", "sanandaraj5597", "santhnm2", "sbak5", "shanmugamr1992", "shifangx", "shjwudp", "sidsingh-nvidia", "skyw", "tdene", "theothermike", "thomasdhc", "trintamaki", "tylerpoon", "wdykas", "xiaoyao0115", "xuwchen", "yanring", "yaox12", "yaoyu-33", "yashaswikarnati", "yeyu-nvidia", "yobibyte", "youngeunkwon0405", "yuzhongw-nvidia", "zhongbozhu"]
+trustees_override: ["AAnoosheh", "ArEsKay3", "Autumn1998", "BestJuly", "BoxiangW", "ChenhanYu", "FDecaYed", "HaochenYuan", "ISEEKYAN", "JRD971000", "QiZhangNV", "ShriyaRishab", "Victarry", "Wohox", "ZhiyuLi-Nvidia", "aklife97", "ananthsub", "asolergi-nv", "buptzyb", "chtruong814", "cspades", "cuichenx", "deepakn94", "dimapihtar", "duncanriach", "erhoo82", "ericharper", "fanshiqing", "gautham-kollu", "hxbai", "jaredcasper", "jiemingz", "jkamalu", "jon-barker", "kanz-nv", "kevalmorabia97", "ko3n1g", "kunlunl", "kvareddy", "layalir", "lhb8125", "lmcafee-nvidia", "maanug-nv", "mathemakitten", "matthieule", "mehraakash", "mkhona-nvidia", "pablo-garay", "parthmannan", "pthombre", "rogerwaleffe", "sanandaraj5597", "santhnm2", "sbak5", "shanmugamr1992", "shifangx", "shjwudp", "sidsingh-nvidia", "skyw", "tdene", "theothermike", "thomasdhc", "trintamaki", "tylerpoon", "wdykas", "xiaoyao0115", "xuwchen", "yanring", "yaox12", "yaoyu-33", "yashaswikarnati", "yobibyte", "youngeunkwon0405", "yuzhongw-nvidia", "zhongbozhu"]
diff --git a/.github/workflows/auto-update-copy-pr-bot.yml b/.github/workflows/auto-update-copy-pr-bot.yml
index b04d34251f0..969c46e3fdd 100644
--- a/.github/workflows/auto-update-copy-pr-bot.yml
+++ b/.github/workflows/auto-update-copy-pr-bot.yml
@@ -48,10 +48,8 @@ jobs:
           mv .github/copy-pr-bot.yaml.new .github/copy-pr-bot.yaml
 
       - name: Commit changes
-        env:
-          GH_TOKEN: ${{ secrets.PAT }}
         run: |
-          git remote set-url origin https://x-access-token:${GH_TOKEN}@github.com/NVIDIA/Megatron-LM.git
+          git remote set-url origin https://x-access-token:${{ secrets.PAT }}@github.com/NVIDIA/Megatron-LM.git
           git config --global user.name "GitHub Actions"
           git config --global user.email "github-actions[bot]@users.noreply.github.com"
           git add .github/copy-pr-bot.yaml
@@ -60,4 +58,4 @@ jobs:
             exit 0
           fi
           git commit -m "Update copy-pr-bot.yaml [skip ci]"
-          git push -u origin main
+          git push
diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml
index eff0ad2e3fe..a5a7a82287e 100644
--- a/.github/workflows/cicd-main.yml
+++ b/.github/workflows/cicd-main.yml
@@ -40,6 +40,7 @@ env:
 jobs:
   is-not-external-contributor:
     runs-on: ubuntu-latest
+    environment: nemo-ci
     if: github.repository == 'NVIDIA/Megatron-LM'
     outputs:
       is_external_contributor: ${{ github.event.pull_request.user.type == 'User' }}
@@ -387,6 +388,7 @@ jobs:
       - cicd-wait-in-queue
       - cicd-container-build
       - cicd-unit-tests-latest
+    environment: nemo-ci
     if: |
       (
         success()
@@ -566,6 +568,7 @@ jobs:
       && needs.pre-flight.outputs.is_ci_workload == 'false'
       && !cancelled()
       && github.repository == 'NVIDIA/Megatron-LM'
+    environment: nemo-ci
     steps:
       - name: Generate fake coverage report
         uses: actions/github-script@v6
diff --git a/.github/workflows/community-bot.yml b/.github/workflows/community-bot.yml
index 1a98ece0f85..3b102894e1f 100644
--- a/.github/workflows/community-bot.yml
+++ b/.github/workflows/community-bot.yml
@@ -22,8 +22,7 @@ on:
 jobs:
   community-bot:
     uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_community_bot.yml@v0.65.10
-    with:
-      community_project_id: ${{ vars.COMMUNITY_PROJECT_ID }}
     if: github.repository == 'NVIDIA/Megatron-LM'
     secrets:
       GH_TOKEN: ${{ secrets.PAT }}
+      environment: main
diff --git a/examples/inference/gpt/gpt_dynamic_inference.py b/examples/inference/gpt/gpt_dynamic_inference.py
index 1a537870020..251aa100cba 100644
--- a/examples/inference/gpt/gpt_dynamic_inference.py
+++ b/examples/inference/gpt/gpt_dynamic_inference.py
@@ -1,7 +1,6 @@
 # Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 import hashlib
-import io
 import json
 import math
 import os
@@ -14,26 +13,14 @@
 from tqdm import tqdm
 from typing import Dict, List, Tuple, Optional
 
-sys.path.append(
-    os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir, os.path.pardir))
-)
+import torch
+from tqdm import tqdm
 
-import megatron
-from examples.inference.gpt.utils import (
-    Request,
-    add_common_inference_args,
-    build_dynamic_engine_setup_prefix,
-    build_requests,
-    get_curr_time,
-)
 from megatron.core.inference.contexts.dynamic_context import (
     ContextOverflowError,
     DynamicInferenceContext,
 )
-from megatron.core.inference.contexts.attention_context.mamba_metadata import (
-    MambaInferenceStateConfig,
-)
-from megatron.core.inference.engines import DynamicInferenceEngine, EngineSuspendedError
+from megatron.core.inference.engines import DynamicInferenceEngine
 from megatron.core.inference.model_inference_wrappers.gpt.gpt_inference_wrapper import (
     GPTInferenceWrapper,
 )
@@ -41,9 +28,10 @@
 from megatron.core.inference.text_generation_controllers.text_generation_controller import (
     TextGenerationController,
 )
+from megatron.core.ssm.mamba_hybrid_layer_allocation import Symbols
 from megatron.core.tokenizers.text.utils.build_tokenizer import build_tokenizer
 from megatron.core.transformer.module import MegatronModule
-from megatron.core.utils import get_mamba_inference_state_config_from_model
+from megatron.core.utils import get_attr_wrapped_model
 
 sys.path.append(
     os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir, os.path.pardir))
@@ -65,14 +53,14 @@
     build_requests,
     get_curr_time,
 )
+from megatron.training import get_args
+from megatron.training import get_model as _get_model
+from megatron.training import get_tokenizer, initialize_megatron
 from megatron.training.checkpointing import load_checkpoint
 
-from model_provider import model_provider
-from gpt_builders import gpt_builder
-
-torch.serialization.add_safe_globals([io.BytesIO])
-torch.serialization.add_safe_globals([megatron.core.rerun_state_machine.RerunState])
-torch.serialization.add_safe_globals([megatron.core.rerun_state_machine.RerunDiagnostic])
+import torch
+import io
+import megatron
 
 
 def add_dynamic_inference_args(parser: ArgumentParser) -> ArgumentParser:
@@ -88,24 +76,9 @@ def add_dynamic_inference_args(parser: ArgumentParser) -> ArgumentParser:
     )
     group.add_argument(
         "--termination-id", type=int, default=None,
-        help="Termination ID that overrides `tokenizer.eod`.",
-    )
-    group.add_argument(
-        "--suspend-resume-interval", type=int, default=None,
-        help="Suspend and resume the dynamic engine every "
-        "`suspend_resume_interval` steps. This is used to tet the suspend/resume "
-        "system.",
-    )
-    group.add_argument(
-        "--inference-repeat-n", type=int, default=1,
-        help="Repeat inference iterations N times for benchmarking."
-    )
-    group.add_argument(
-        "--throughput-check-only",
-        action='store_true',
-        default=False,
-        help="If true, only run throughput check without verifying outputs."
+        help="Termination ID that overrides `tokenizer.eod`."
     )
+    group.add_argument('--inference-repeat-n', type=int, default=1, help="Repeat inference iterations N times for benchmarking.")
 
     return parser
 
@@ -152,12 +125,13 @@ def get_inference_context(
     requests: List[Request],
     sampling_params: Optional[SamplingParams] = None,
     calculate_max_sequence_length_from_requests: bool = True,
-    mamba_inference_state_config: Optional[MambaInferenceStateConfig] = None,
+    layer_type_list: Optional[List[str]] = None,
+    mamba_conv_states_shape: Optional[Tuple[int]] = None,
+    mamba_ssm_states_shape: Optional[Tuple[int]] = None,
 ):
     """The inference context manages the KV cache and other inference state."""
 
     args = get_args()
-
     # Max sequence length.
     if calculate_max_sequence_length_from_requests:
         max_gen_length = sampling_params.num_tokens_to_generate    
@@ -173,7 +147,7 @@ def get_inference_context(
     # Inference context.
     context = DynamicInferenceContext(
         params_dtype=args.params_dtype,
-        num_layers=args.num_layers // args.pipeline_model_parallel_size,
+        num_layers=args.num_layers,
         kv_channels=args.kv_channels,
         num_attention_heads=(
             args.num_query_groups if args.group_query_attention else args.num_attention_heads
@@ -186,10 +160,15 @@ def get_inference_context(
         ),
         block_size_tokens=args.inference_dynamic_batching_block_size,
         buffer_size_gb=args.inference_dynamic_batching_buffer_size_gb,
-        max_tokens=args.inference_dynamic_batching_max_tokens,
+        buffer_guaranteed_fraction=args.inference_dynamic_batching_buffer_guaranteed_fraction,
+        buffer_overflow_factor=args.inference_dynamic_batching_buffer_overflow_factor,
+        max_requests_override=args.inference_dynamic_batching_max_requests_override,
+        max_tokens_override=args.inference_dynamic_batching_max_tokens_override,
         tensor_model_parallel_size=args.tensor_model_parallel_size,
         materialize_only_last_token_logits=not args.return_log_probs,
-        mamba_inference_state_config=mamba_inference_state_config,
+        layer_type_list=layer_type_list,
+        mamba_conv_states_shape=mamba_conv_states_shape,
+        mamba_ssm_states_shape=mamba_ssm_states_shape,
         cache_mla_latent=args.multi_latent_attention and args.cache_mla_latents,
         kv_lora_rank=args.kv_lora_rank if args.multi_latent_attention else None,
         qk_pos_emb_head_dim=args.qk_pos_emb_head_dim,
@@ -271,12 +250,12 @@ def run_inference(
     num_requests_total = len(requests)
     num_requests_added = 0
     num_requests_finished = 0
+    step_id = 0
     step_times = {"prefill": [], "decode": []}
     add_times = []
     output_times = []
     tbar = tqdm(total=num_requests_total)
     total_output_tokens = 0
-    attempted_step_count = 0
     if args.cuda_graph_impl == "local":
         cuda_graph_request_count_map = {r:0 for r in engine.context.cuda_graph_request_counts}
     else:
@@ -319,37 +298,10 @@ def _add_request():
 
         # Step inference engine (i.e., generate a token for each active request).
         # Before step, we haven't done the scheduling, so we cannot know the is_decode_only
-        try:
-            result = engine.step_modern(verbose=True)
-        except EngineSuspendedError as e:
-            result = e
-            pass # ignore error in order to call 'engine.resume()' below.
-        attempted_step_count += 1
-
+        result = engine.step_modern(verbose=True)
         # After step, we lost track of last iteration's is_decode_only, so we need to get it from the engine
         is_decode_only = engine.is_decode_only 
-
-        # Test suspending and resuming engine.
-        if args.suspend_resume_interval is not None:
-
-            # Suspend.
-            if attempted_step_count % args.suspend_resume_interval == 0:
-                print("**** step %d/%d ... suspend." % (engine.step_count, attempted_step_count))
-                engine.suspend()
-
-            # Resume, 0+ attempted steps later.
-            if (
-                attempted_step_count > 0
-                and
-                (attempted_step_count - args.suspend_resume_interval // 2)
-                    % args.suspend_resume_interval == 0
-            ):
-                print("**** step %d/%d ... resume." % (engine.step_count, attempted_step_count))
-                engine.resume()
-
-        # If engine suspended, continue to next iter.
-        if isinstance(result, EngineSuspendedError):
-            continue
+        step_id += 1
 
         # Record cuda_graph_request_count.
         cuda_graph_request_count = result["cuda_graph_request_count"]
@@ -357,10 +309,10 @@ def _add_request():
             cuda_graph_request_count_map[cuda_graph_request_count] += 1
 
         # Update requests.
-        active_request_ids = result["active_request_ids"]
-        finished_request_records = result["finished_request_records"]
+        active_requests = result["active_requests"]
+        finished_requests = result["finished_requests"]
         step_time = result["step_time"]
-        if len(active_request_ids) > 0 or len(finished_request_records) > 0:
+        if len(active_requests) > 0 or len(finished_requests) > 0:
             if is_decode_only:
                 step_times["decode"].append(step_time)
             else:
@@ -368,26 +320,14 @@ def _add_request():
 
             # Append output tokens.
             output_start = get_curr_time()
-            for finished_request_record in finished_request_records:
-
-                finished_request = finished_request_record.merge(engine.controller.tokenizer)
-
-                # Update local request object.
+            for finished_request in finished_requests:
                 request = requests[finished_request.request_id]
+                request.output_tokens = finished_request.generated_tokens
+                total_output_tokens += len(request.output_tokens)
                 request.time_end = get_curr_time()
+                request.output_text = finished_request.generated_text
                 request.state = "finished"
                 request.request_id = finished_request.request_id
-
-                # Update prompt, in case engine has been suspended and resumed.
-                request.prompt_tokens = finished_request.prompt_tokens
-                request.prompt_text = finished_request.prompt
-
-                # Get output tokens and text.
-                request.output_tokens = finished_request.generated_tokens
-                request.output_text = finished_request.generated_text
-                total_output_tokens += len(request.output_tokens)
-
-                # Log probs.
                 if finished_request.sampling_params.return_log_probs:
                     request.log_probs = (
                         finished_request.prompt_log_probs + finished_request.generated_log_probs
@@ -441,14 +381,23 @@ def main():
 
     model = get_model()
 
-    mamba_inference_state_config = get_mamba_inference_state_config_from_model(model)
+    # Layer type list for hybrid models
+    decoder = get_attr_wrapped_model(model, "decoder")
+    layer_type_list = getattr(decoder, "layer_type_list", None)
+    if layer_type_list is not None and Symbols.MAMBA in layer_type_list:
+        (mamba_conv_states_shape, mamba_ssm_states_shape) = decoder.mamba_state_shapes_per_request()
+    else:
+        mamba_conv_states_shape = None
+        mamba_ssm_states_shape = None
 
     # Requests, context, controller.
     requests = build_requests(args, tokenizer, sampling_params)
     context = get_inference_context(
         requests,
         sampling_params,
-        mamba_inference_state_config=mamba_inference_state_config,
+        layer_type_list=layer_type_list,
+        mamba_conv_states_shape=mamba_conv_states_shape,
+        mamba_ssm_states_shape=mamba_ssm_states_shape,
     )
     controller = get_inference_controller(model, context)
 
@@ -514,9 +463,7 @@ def escape_str(s):
             unique_prompt_map[request.prompt_text].append(request_idx)
 
         # Print unique prompts + outputs.
-        text_hashes = []
         for unique_idx, (prompt_text, request_idxs) in enumerate(unique_prompt_map.items()):
-
             # ---- Prompt summary line ----
             prompt_len = len(requests[request_idxs[0]].prompt_tokens)
             escaped_prompt_text = escape_str(prompt_text)
@@ -531,20 +478,15 @@ def escape_str(s):
             # ---- Print each unique output ----
             for output_text, output_request_idxs in output_map.items():
                 if output_text is not None:
-                    # Use hash of prompt + generated text in case engine was
-                    # suspended and resumed, which misaligns boundary between
-                    # prompt and generated tokens.
-                    o_hash = hashlib.sha256(
-                        (prompt_text + output_text).encode()
-                    ).hexdigest()[:6]
+                    o_hash = hashlib.sha256(output_text.encode()).hexdigest()[:6]
                     o_len = len(requests[output_request_idxs[0]].output_tokens)
                     escaped_output_text = escape_str(output_text)
+                    print(f"  >>>> [n {len(output_request_idxs)}, l {o_len}, hash {o_hash}] {escaped_output_text}")
                 else:
                     o_hash = "--"
                     o_len = 0
                     escaped_output_text = "--"
-                print(f"  >>>> [n {len(output_request_idxs)}, {o_len} tokens, hash {o_hash}] {escaped_output_text}")
-                text_hashes.append(o_hash)
+                    print(f"  >>>> [n {len(output_request_idxs)}, {o_len} tokens, hash {o_hash}] {escaped_output_text}")
 
         # Write results to JSON. Primarily used for functional testing.
         if args.output_path:
@@ -572,49 +514,47 @@ def escape_str(s):
             with open(args.output_path, "w") as fp:
                 json.dump(json_results, fp, indent=1)
 
-        # Timing results.
-        stats = torch.cuda.memory_stats()
-        throughput = total_output_tokens / total_time
-        print("~~~")
-        peak_alloc_gb = stats["allocated_bytes.all.peak"] / 1024**3
-        peak_resvd_gb = stats["reserved_bytes.all.peak"] / 1024**3
-
-        p_times = step_times["prefill"]
-        d_times = step_times["decode"]
-
-        p_total = sum(p_times)
-        d_total = sum(d_times)
-
-        p_count = len(p_times)
-        d_count = len(d_times)
-
-        p_mean = p_total / p_count
-        d_mean = d_total / d_count if d_count != 0 else 0.
-
-        # Commented out for now as the step/add/output times are not calculated correctly.
-        # print(
-        #     f"{setup_prefix} … "
-        #     f"mem {peak_alloc_gb:.1f}/{peak_resvd_gb:.1f} GB … "
-        #     f"total time: {step_total:.3f}s … "
-        #     f"step time: total {step_total:.3f}s "
-        #     f"[ p {p_total:.3f}s, d {d_total:.3f}s ], "
-        #     f"mean [ p {p_mean:.3f}s, d {d_mean:.3f}s ], "
-        #     f"count [ p {p_count}, d {d_count} ]."
-        # )
-        capture_str = (
-            f"{engine.capture_stats['time']:.2f} sec"
-            if engine.capture_stats else
-            "--"
-        )
-        print(
-            f"{setup_prefix} … "
-            f"throughput: {throughput:.3f} tok/s",
-            f"total time: {total_time:.3f}s … "
-            f"mem {peak_alloc_gb:.1f}/{peak_resvd_gb:.1f} GB … "
-            f"steps: {engine.step_count:d} … "
-            f"capture {capture_str} … "
-        )
-        print("~~~")
+    # Timing results.
+    print("~~~")
+    peak_alloc_gb = stats["allocated_bytes.all.peak"] / 1024**3
+    peak_resvd_gb = stats["reserved_bytes.all.peak"] / 1024**3
+
+    p_times = step_times["prefill"]
+    d_times = step_times["decode"]
+
+    p_total = sum(p_times)
+    d_total = sum(d_times)
+
+    p_count = len(p_times)
+    d_count = len(d_times)
+
+    p_mean = p_total / p_count
+    d_mean = d_total / d_count
+
+    # Commented out for now as the step/add/output times are not calculated correctly.
+    # print(
+    #     f"{setup_prefix} … "
+    #     f"mem {peak_alloc_gb:.1f}/{peak_resvd_gb:.1f} GB … "
+    #     f"total time: {step_total:.3f}s … "
+    #     f"step time: total {step_total:.3f}s "
+    #     f"[ p {p_total:.3f}s, d {d_total:.3f}s ], "
+    #     f"mean [ p {p_mean:.3f}s, d {d_mean:.3f}s ], "
+    #     f"count [ p {p_count}, d {d_count} ]."
+    # )
+    capture_str = (
+        f"{engine.capture_stats['time']:.2f} sec"
+        if engine.capture_stats else
+        "--"
+    )
+    print(
+        f"{setup_prefix} … "
+        f"capture {capture_str} … "
+        f"mem {peak_alloc_gb:.1f}/{peak_resvd_gb:.1f} GB … "
+        f"total time: {total_time:.3f}s … "
+        f"steps: {engine.step_count:d} … "
+        f"throughput: {throughput:.3f} tok/s"
+    )
+    print("~~~")
 
     # Stop Nsight profiler.
     if os.environ.get("NSIGHT_PREFIX"):
diff --git a/examples/inference/gpt/gpt_dynamic_inference_12b.sh b/examples/inference/gpt/gpt_dynamic_inference_12b.sh
index 20f1a29cb5b..a16fe5176d5 100644
--- a/examples/inference/gpt/gpt_dynamic_inference_12b.sh
+++ b/examples/inference/gpt/gpt_dynamic_inference_12b.sh
@@ -24,9 +24,13 @@ export CUDA_DEVICE_MAX_CONNECTIONS=1
 
 # Dynamic context.
 : ${BUFFER_SIZE_GB=50.}
+: ${BUFFER_OVERFLOW_FACTOR=1.}
+: ${BUFFER_GUARANTEED_FRACTION=0.05}
 
 # Cuda graphs.
+: ${CUDA_GRAPH_IMPL=local}
 : ${NUM_CUDA_GRAPHS=16}
+: ${CUDA_GRAPH_SHARE_IO_BUFFERS=1}
 
 # Miscellaneous.
 : ${USE_COORDINATOR=0}
@@ -75,6 +79,8 @@ ARGS=" \
     \
     --inference-dynamic-batching \
     --inference-dynamic-batching-buffer-size-gb ${BUFFER_SIZE_GB} \
+    --inference-dynamic-batching-buffer-overflow-factor ${BUFFER_OVERFLOW_FACTOR} \
+    --inference-dynamic-batching-buffer-guaranteed-fraction ${BUFFER_GUARANTEED_FRACTION} \
     \
     ${EXTRA_ARGS} \
 "
@@ -85,10 +91,6 @@ if [ "${NUM_CUDA_GRAPHS}" != "0" ]; then
         --cuda-graph-impl local \
         --inference-dynamic-batching-num-cuda-graphs ${NUM_CUDA_GRAPHS} \
     "
-else
-    ARGS+=" \
-        --cuda-graph-impl none \
-    "
 fi
 
 # Prompts.
diff --git a/examples/inference/gpt/gpt_dynamic_inference_357m.sh b/examples/inference/gpt/gpt_dynamic_inference_357m.sh
index 215cc2bac8f..c095371714f 100644
--- a/examples/inference/gpt/gpt_dynamic_inference_357m.sh
+++ b/examples/inference/gpt/gpt_dynamic_inference_357m.sh
@@ -25,9 +25,13 @@ export CUDA_DEVICE_MAX_CONNECTIONS=1
 
 # Dynamic context.
 : ${BUFFER_SIZE_GB=50.}
+: ${BUFFER_OVERFLOW_FACTOR=1.}
+: ${BUFFER_GUARANTEED_FRACTION=0.05}
 
 # Cuda graphs.
+: ${CUDA_GRAPH_IMPL=local}
 : ${NUM_CUDA_GRAPHS=16}
+: ${CUDA_GRAPH_SHARE_IO_BUFFERS=1}
 
 # Miscellaneous.
 : ${USE_COORDINATOR=0}
@@ -61,6 +65,8 @@ ARGS=" \
     \
     --inference-dynamic-batching \
     --inference-dynamic-batching-buffer-size-gb ${BUFFER_SIZE_GB} \
+    --inference-dynamic-batching-buffer-overflow-factor ${BUFFER_OVERFLOW_FACTOR} \
+    --inference-dynamic-batching-buffer-guaranteed-fraction ${BUFFER_GUARANTEED_FRACTION} \
     \
     ${EXTRA_ARGS} \
 "
@@ -71,10 +77,6 @@ if [ "${NUM_CUDA_GRAPHS}" != "0" ]; then
         --cuda-graph-impl local \
         --inference-dynamic-batching-num-cuda-graphs ${NUM_CUDA_GRAPHS} \
     "
-else
-    ARGS+=" \
-        --cuda-graph-impl none \
-    "
 fi
 
 # Prompts.
diff --git a/examples/inference/gpt/gpt_dynamic_inference_with_coordinator.py b/examples/inference/gpt/gpt_dynamic_inference_with_coordinator.py
index 7869002fff3..9e2b6bfa983 100644
--- a/examples/inference/gpt/gpt_dynamic_inference_with_coordinator.py
+++ b/examples/inference/gpt/gpt_dynamic_inference_with_coordinator.py
@@ -1,41 +1,26 @@
 # Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
+from megatron.core.inference.inference_client import InferenceClient
+from examples.inference.gpt.utils import add_common_inference_args
 import asyncio
-import json
-import os
-import time
-import torch
 import torch.distributed as dist
-from collections import defaultdict
+from examples.inference.gpt.gpt_dynamic_inference import get_model, get_inference_context, get_inference_controller, add_dynamic_inference_args
+from megatron.core.inference.inference_request import DynamicInferenceRequest
+from megatron.training import initialize_megatron
+import torch
+import os 
+from megatron.training import get_args, get_tokenizer 
+from megatron.core.inference.sampling_params import SamplingParams
+from examples.inference.gpt.utils import build_requests, build_dynamic_engine_setup_prefix, Request
+from megatron.core.inference.engines import DynamicInferenceEngine
+import time
 from tqdm import tqdm
 from typing import List
-import warnings
-import logging
-
-from examples.inference.gpt.gpt_dynamic_inference import (
-    add_dynamic_inference_args,
-    get_inference_context,
-    get_inference_controller,
-    get_model,
-)
-from examples.inference.gpt.utils import (
-    Request, 
-    build_dynamic_engine_setup_prefix, 
-    build_requests,
-    add_common_inference_args
-)
-
-from megatron.core import parallel_state
-from megatron.core.inference.engines import DynamicInferenceEngine
-from megatron.core.inference.inference_client import InferenceClient
-from megatron.core.inference.inference_request import DynamicInferenceRequestRecord
-from megatron.core.inference.sampling_params import SamplingParams
-from megatron.core.utils import get_mamba_inference_state_config_from_model
-
-from megatron.training import get_args, get_tokenizer, initialize_megatron
+import json
 from megatron.training.arguments import parse_args
+from megatron.core import parallel_state
 
-# pylint: disable=line-too-long
+import logging
 
 logging.basicConfig(level=logging.INFO, force=True)
 
@@ -53,45 +38,19 @@ async def main(
         )
     # once you call engine.start_listening_to_data_parallel_coordinator,
     # the engine will start accepting requests from the data parallel coordinator.
-    # and processing them in an asyncio coroutine.
-    
-    await engine.start_listening_to_data_parallel_coordinator(
-        inference_coordinator_port=port,
-        launch_inference_coordinator=True,
-        verbose=True,
+    # and processing them in an asyncio coroutine. 
+    await engine.start_listening_to_data_parallel_coordinator( 
+        inference_coordinator_port=port, launch_inference_coordinator=True
     )
-
-    # if you want to use your own inference coordinator -
+    # if you want to use your own inference coordinator - 
     # 1. set launch_inference_coordinator to False
     # 2. setup a router socket at tcp://MASTER_ADDR:PORT
     # 3. wait for data parallel groups to establish connection (BasicInferenceCoordinator.__init__)
     # 4. look at InferenceCoordinator.start() to see how we can route requests from users <-> data parallel groups
-    #   based on headers.
-    # 5. look at InferenceClient to see how we create requests with headers.
-
-    args = get_args()
-
-    # Test suspend/resume intervals.
-    if args.suspend_resume_interval is not None:
-        # Since the client doesn't directly call engine.async_step here, we test
-        # the suspend-resume system ~4 times.
-        suspend_resume_interval = max(1, len(requests) // 4)
-        suspend_idxs = set(range(
-            suspend_resume_interval,
-            len(requests) + 1,
-            suspend_resume_interval,
-        ))
-        resume_idxs = set(
-            min(len(requests), i + suspend_resume_interval // 2)
-            for i in suspend_idxs
-        )
-    else:
-        suspend_idxs = set()
-        resume_idxs = set()
-
-    # Create client and run example.
-    if dist.get_rank() == 0:
-        client = InferenceClient(port)  # submits requests to the inference coordinator
+    #   based on headers. 
+    # 5. look at InferenceClient to see how we create requests with headers. 
+    if dist.get_rank() == 0: 
+        client = InferenceClient(port) # submits requests to the inference coordinator
         await client.start()
         base_arrival_time = time.time_ns() / 10**9
         for request in requests:
@@ -99,104 +58,61 @@ async def main(
         futures = []
         num_requests_total = len(requests)
         num_requests_added = 0
-        
+        #tbar = tqdm(total=num_requests_total)
         while True:
             current_time = time.time_ns() / 10**9
-            if args.incoming_requests_per_step is None:
-                # Only add requests that have arrived at the current time.
-                while num_requests_added < num_requests_total and requests[num_requests_added].time_arrival <= current_time:
-                    request = requests[num_requests_added]
-                    # These add-request calls will queue up the request on a zmq socket and return
-                    # instantaneously. They will return an asyncio future which can be awaited for
-                    # request completion.
-                    futures.append(client.add_request(request.prompt_text, request.sampling_params))
-                    num_requests_added += 1
-
-                    # Test suspend/resume.
-                    if num_requests_added in suspend_idxs:
-                        client.suspend_engines()
-                    if num_requests_added in resume_idxs:
-                        client.resume_engines()
-
-            else:
-                # Add deterministic number of requests (generally used for debugging).
-                for i in range(min(
-                    args.incoming_requests_per_step,
-                    num_requests_total - num_requests_added
-                )):
-                    # Change sampling parameters to force different generation lengths.
-                    request = requests[num_requests_added]
-                    n = request.sampling_params.num_tokens_to_generate
-                    request.sampling_params.num_tokens_to_generate = n + i
-                    futures.append(client.add_request(request.prompt_text, request.sampling_params))
-                    num_requests_added += 1
-
-                    # Test suspend/resume.
-                    if num_requests_added in suspend_idxs:
-                        client.suspend_engines()
-                    if num_requests_added in resume_idxs:
-                        client.resume_engines()
-
+            # Only add requests that have arrived at the current time.
+            while num_requests_added < num_requests_total and requests[num_requests_added].time_arrival <= current_time:
+                request = requests[num_requests_added]
+                # These add-request calls will queue up the request on a zmq socket and return
+                # instantaneously. They will return an asyncio future which can be awaited for
+                # request completion.
+                futures.append(client.add_request(request.prompt_text, request.sampling_params))
+                num_requests_added += 1
+                #tbar.update(1)
             if num_requests_added == num_requests_total:
                 break
-            # Relinquish control since there are no more requests to add at the moment. This allows the engine to run.
+            # Relinquish control since there are no more requests to add at the moment. This allows the engine to run. 
             await asyncio.sleep(0)
-        
         # While we wait for the requests to complete, the engine runs in the background.
-        results: List[DynamicInferenceRequestRecord] = await asyncio.gather(*futures)
+        results: List[DynamicInferenceRequest] = await asyncio.gather(*futures)
+        
 
     if dist.get_rank() == 0:
         # Write results to JSON. Primarily used for functional testing.
         if args.output_path:
             json_results = {}
-            throughputs = []
 
-            for record in results:
-                req = record.merge(engine.controller.tokenizer)
+            for req in results:
                 result_dict = {
                     "input_prompt": req.prompt,
                     "generated_text": req.generated_text.replace("\n", "\\n"),
                     "generated_tokens": req.generated_tokens,
-                    "latency": req.latency,  # InferenceClient populates this field in the returned future.
+                    "latency": req.latency, #InferenceClient populates this field in the returned future.
                 }
                 if req.sampling_params["return_log_probs"]:
                     result_dict["logprobs"] = req.prompt_log_probs + req.generated_log_probs
-                throughput = len(req.generated_tokens) / req.latency
-                throughputs.append(throughput)
                 json_results[req.request_id] = result_dict
-            throughput_dict = {"throughput": throughputs}
-            if args.throughput_check_only:
-                json_results = throughput_dict
             with open(args.output_path, "w") as fp:
                 json.dump(json_results, fp, indent=4)
         else:
             print("Results:")
-            unique_prompt_map = defaultdict(list)
-            for record in results:
-                req = record.merge(engine.controller.tokenizer)
-                unique_prompt_map[req.prompt].append(req)
-            for idx, (prompt_text, reqs) in enumerate(unique_prompt_map.items()):
-                print(f"%d/%d. prompt '%s' ... [%d] output '%s'." % (
-                    idx,
-                    len(unique_prompt_map),
-                    prompt_text.replace("\n", "\\n"),
-                    len(reqs),
-                    reqs[0].generated_text.replace("\n", "\\n"),
-                ))
-
+            for req in results:
+                print(f"rid: {req.request_id}\nprompt: {req.prompt!r}\noutput: {req.generated_text!r}\n\n")
+ 
         # kill the engines and suspend the client
         client.stop_engines()
         client.stop()
-
+        
     # once the stop signal eventually makes its way to each GPU, the engines will stop.
     await asyncio.gather(engine.engine_loop_task)
 
-
 if __name__ == "__main__":
-    # enable inference mode in the very beginning as some fp-8 optimizations
+    # enable inference mode in the very beginning as some fp-8 optimizations 
     # check for it.
     with torch.inference_mode():
         initialize_megatron(
+            #parsed_args=args
             extra_args_provider=add_dynamic_inference_args,
             args_defaults={'no_load_rng': True, 'no_load_optim': True},
         )
@@ -215,25 +131,17 @@ async def main(
             top_p=args.top_p,
             return_log_probs=args.return_log_probs,
             num_tokens_to_generate=args.num_tokens_to_generate,
-            termination_id=(
-                args.termination_id if args.termination_id is not None else tokenizer.eod
-            ),
+            termination_id=args.termination_id if args.termination_id is not None else tokenizer.eod,
         )
 
         # Requests, context, conroller.
         model = get_model()
-        mamba_inference_state_config = get_mamba_inference_state_config_from_model(model)
-        requests = (
-            build_requests(args, tokenizer, sampling_params) if dist.get_rank() == 0 else None
-        )
-
-        context = get_inference_context(
-            None,
-            None,
-            calculate_max_sequence_length_from_requests=False,
-            mamba_inference_state_config=mamba_inference_state_config,
-        )
+        requests = build_requests(args, tokenizer, sampling_params) if dist.get_rank() == 0 else None
 
+        context = get_inference_context(None, 
+                                        None,
+                                        calculate_max_sequence_length_from_requests=False)
+        
         controller = get_inference_controller(model, context)
 
         # Inference engine.
@@ -242,19 +150,17 @@ async def main(
             context,
             enable_cuda_graph=args.cuda_graph_impl == "local",
             random_seed=args.seed,
-            enable_chunked_prefill=not args.disable_chunked_prefill,
+            enable_chunked_prefill=not args.disable_chunked_prefill
         )
 
+        
         if dist.get_rank() == 0:
             setup_prefix = build_dynamic_engine_setup_prefix(args, model, context, requests)
             print("~~~")
             print(setup_prefix)
             print("~~~")
+        
+        asyncio.run(main(engine, 
+                        requests,
+                        args.inference_coordinator_port))
 
-        asyncio.run(
-            main(
-                engine,
-                requests,
-                args.inference_coordinator_port,
-            )
-        )
diff --git a/examples/inference/gpt/utils.py b/examples/inference/gpt/utils.py
index efd4fdab4fc..0ea1f5a3df0 100644
--- a/examples/inference/gpt/utils.py
+++ b/examples/inference/gpt/utils.py
@@ -1,6 +1,5 @@
 # Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
-import copy
 import json
 import itertools
 import random
@@ -12,12 +11,12 @@
 
 from megatron.core.inference.inference_request import DynamicInferenceRequest
 from megatron.core.inference.contexts import DynamicInferenceContext
-from megatron.core.inference.contexts.dynamic_context import get_mem_size_str
 from megatron.core.transformer.module import MegatronModule
 
 from megatron.core.inference.sampling_params import SamplingParams
 
 
+
 def add_common_inference_args(parser: ArgumentParser) -> ArgumentParser:
     """Common inference arguments."""
 
@@ -54,12 +53,6 @@ def add_common_inference_args(parser: ArgumentParser) -> ArgumentParser:
         default=30,
         help='Number of tokens to generate for each prompt',
     )
-    group.add_argument(
-        "--num-tokens-from-file",
-        action='store_true',
-        default=False,
-        help='Use per-prompt num_tokens_to_generate from prompt file',
-    )
     group.add_argument(
         "--top-n-logprobs",
         type=int,
@@ -72,7 +65,7 @@ def add_common_inference_args(parser: ArgumentParser) -> ArgumentParser:
         help="Add a deterministic number of requests per step. This arg is "
         "prioritized over `--incoming-requests-per-sec` below (which is non-"
         "deterministic). Note that the number of requests added per step is "
-        "additionally limited by the inference context's `max_active_requests`, "
+        "additionally limited by the inference context's `max_requests`, "
         "`max_tokens`, and KV buffer size.",
     )
     group.add_argument(
@@ -123,6 +116,12 @@ def add_common_inference_args(parser: ArgumentParser) -> ArgumentParser:
         '`--prompt-file` above). The first `--prompt-file-num-truncate` samples '
         'will be used, in order.',
     )
+    group.add_argument(
+        "--inference-coordinator-port",
+        type=int,
+        help="This port will be used to setup the inference co-ordinator on node-0",
+        default=12346
+    )
     group.add_argument(
         "--use-flashinfer-fused-rope",
         action='store_true',
@@ -177,7 +176,6 @@ def __init__(self, prompt_text: str, time_offset: float, tokenizer: Any, samplin
         self.time_end = None
         self.state = "not-started"
         self.sampling_params: SamplingParams = sampling_params if sampling_params is not None else get_default_sampling_params(tokenizer.eod)
-        self.sampling_params = copy.deepcopy(self.sampling_params)
 
     def __str__(self) -> str:
         return "state '%s'; toffset %.1e; prompt len %d; output len %d; '%s'" % (
@@ -264,27 +262,10 @@ def get_synthetic_requests(
         int(args.incoming_requests_per_sec * args.incoming_requests_duration),
     )
 
-    # Build prompts with expected lengths.
-    assert (
-        len(args.num_tokens_to_prompt) == 2
-        and
-        args.num_tokens_to_prompt[1] >= args.num_tokens_to_prompt[0]
-    )
-    max_prompt_length = args.num_tokens_to_prompt[1]
-    max_prompt_text = "hi " * max_prompt_length
-    max_prompt_tokens = tokenizer.tokenize(max_prompt_text)
-    prompt_lengths = [
-        random.randint(*args.num_tokens_to_prompt)
-        for _ in time_offsets
-    ]
-    prompt_tokens_list = [ max_prompt_tokens[:l] for l in prompt_lengths ]
-    prompt_texts = [ tokenizer.detokenize(tt) for tt in prompt_tokens_list ]
-
     # Init requests.
-    assert len(prompt_texts) == len(time_offsets)
     requests = [
-        Request(t, o, tokenizer, sampling_params=sampling_params)
-        for t, o in zip(prompt_texts, time_offsets)
+        Request("hi " * random.randint(*args.num_tokens_to_prompt), t, tokenizer, sampling_params)
+        for t in time_offsets
     ]
 
     return requests
@@ -300,18 +281,9 @@ def get_requests_from_file(
     # Load prompts.
     n_prompts = sum(1 for _ in open(args.prompt_file))
     prompts = []
-    sampling_params = get_default_sampling_params(tokenizer.eod)
-    sampling_params_list = []
     with open(args.prompt_file) as f:
         for line in tqdm(f.readlines(), "read prompt file", total=n_prompts):
-            line_dict = json.loads(line)
-            prompts.append(line_dict["text"])
-
-            sp = copy.deepcopy(sampling_params)
-            if args.num_tokens_from_file:
-                sp.num_tokens_to_generate = line_dict["chatgpt_output_token_length"]
-            sampling_params_list.append(sp)
-
+            prompts.append(json.loads(line)["text"])
             if len(prompts) == args.prompt_file_num_truncate:
                 break
 
@@ -325,8 +297,8 @@ def get_requests_from_file(
 
     # Init requests.
     requests = [
-        Request(p, t, tokenizer, sp)
-        for p, t, sp in tqdm(zip(prompts, time_offsets, sampling_params_list), "init requests", total=len(prompts))
+        Request(p, t, tokenizer, sampling_params)
+        for p, t in tqdm(zip(prompts, time_offsets), "init requests", total=len(prompts))
     ]
 
     return requests
@@ -370,7 +342,7 @@ def build_dynamic_engine_setup_prefix(
 
     Args:
         args (Namespace): Command-line arguments for this run.
-        context (DynamicInferenceContext): Stores limits such as `max_active_requests`,
+        context (DynamicInferenceContext): Stores limits such as `max_requests`,
             `max_tokens`, and `gtd_request_count`.
         requests (List[DynamicInferenceRequest]): List of inference requests.
 
@@ -380,9 +352,7 @@ def build_dynamic_engine_setup_prefix(
     # CUDA graph config
     if args.cuda_graph_impl == "local":
         cg_str = (
-            "graphs "
-            f"[{len(context.cuda_graph_token_counts)}] "
-            f"{context.cuda_graph_token_counts[0]}:"
+            f"graphs {context.cuda_graph_token_counts[0]}:"
             f"{context.cuda_graph_token_counts[-1]}"
         )
     else:
@@ -409,10 +379,17 @@ def build_dynamic_engine_setup_prefix(
     )
 
     # Buffer limits config
+    flw = args.inference_dynamic_batching_buffer_overflow_factor
+    flw_str = "no overflow" if flw is None else f"{flw:.1f}"
     buffer_limits_str = (
-        f"bf: {get_mem_size_str(args.inference_dynamic_batching_buffer_size_gb*1024**3)}, "
-        f"{context.block_allocator.active_count} chunks "
-        f"[r {context.max_active_requests}, t {context.max_tokens}]"
+        f"bf {args.inference_dynamic_batching_buffer_size_gb:.0f}, {flw_str} "
+        f"[r {context.max_requests}, t {context.max_tokens}]"
+    )
+
+    # Guaranteed request config
+    guaranteed_fraction_str = (
+        f"gtd {args.inference_dynamic_batching_buffer_guaranteed_fraction:.2f} "
+        f"[r {context.gtd_request_count}]"
     )
 
     parts = [
@@ -422,6 +399,7 @@ def build_dynamic_engine_setup_prefix(
         uvm_str,
         request_str,
         buffer_limits_str,
+        guaranteed_fraction_str,
     ]
 
     return " | ".join(parts)
diff --git a/examples/post_training/modelopt/.gitignore b/examples/post_training/modelopt/.gitignore
deleted file mode 100644
index b9272bd3eb2..00000000000
--- a/examples/post_training/modelopt/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-!slurm*
diff --git a/examples/post_training/modelopt/ADVANCED.md b/examples/post_training/modelopt/ADVANCED.md
index 28aad7d7964..20b17831b70 100644
--- a/examples/post_training/modelopt/ADVANCED.md
+++ b/examples/post_training/modelopt/ADVANCED.md
@@ -1,93 +1,12 @@
 <div align="center">
 
-# Advanced Usage
+# TensorRT Model Optimizer Integration Advanced Topics
 
-[Advanced Configuration](#advanced-configuration) |
-[Slurm Examples](#slurm-examples) |
-[Checkpoint Resume](#checkpoint-resume) |
+[Local Examples](#getting-started-in-a-local-environment) |
+[Configuration](#learn-more-about-configuration) |
+[Slurm Examples](ADVANCED.md#slurm-examples) |
+[Advanced Topics](ADVANCED.md) |
+[Megatron-LM Integration](https://github.com/NVIDIA/Megatron-LM/tree/main/examples/post_training/modelopt)
 
 </div>
 
-## Advanced Configuration
-
-### Understanding Configuration Variables
-
-For simplicity, we use `shell` scripts and variables as arguments. Each script has at least 1 positional
-argument `[model_conf]`. Some scripts may require more such as `[qformat]` is needed for
-quantization.
-
-```sh
-\
-    HF_MODEL_CKPT=<pretrained_model_name_or_path> \
-    bash quantize.sh [model_conf] [qformat]
-```
-
-> **❗ IMPORTANT:** `model_conf` is used to get the corresponding Megatron-LM `${MODEL_ARGS}`. For example,
-> `meta-llama/Llama-3.1-8B-Instruct` or `deepseek-ai/DeepSeek-R1` are both supported.
->
-> Provide the pretrained checkpoint through variable `${HF_MODEL_CKPT}` in commandline or
-> in a configuration shell script. More variables (e.g. `${TP}`, `${EP}`, ...) can be provided through
-> commandline but we recommend passing all variables in a separate `shell` script.
-
-### Using Configuration Scripts
-
-When `${HF_MODEL_CKPT}` is not set through the commandline, `./env_setup_template.sh` can be used
-to pass all variables instead. If you have your own script, use `${SANDBOX_ENV_SETUP}`.
-
-```sh
-\
-    SANDBOX_ENV_SETUP=<path_to_your_script> \
-    bash quantize.sh [model_conf] [qformat]
-```
-
-**For Slurm execution**, you **MUST USE** `${SANDBOX_ENV_SETUP}` (default: `./env_setup_template.sh`).
-Other variables are not passed through `sbatch` and `srun` automatically.
-
-### Common Configuration Variables
-
-- `HF_MODEL_CKPT`: Path to pretrained model checkpoint
-- `TP`: Tensor parallelism degree
-- `PP`: Pipeline parallelism degree
-- `EP`: Expert parallelism degree (for MoE models)
-- `ETP`: Expert tensor parallelism degree (for MoE models)
-- `MLM_MODEL_SAVE`: Path to save Megatron-LM checkpoint
-- `MLM_MODEL_LOAD`: Path to load Megatron-LM checkpoint
-- `MLM_EXTRA_ARGS`: Additional Megatron-LM arguments (e.g., for uneven PP)
-
-## Slurm Examples
-
-For models that require multi-node, our scripts in Megatron-LM examples also support `slurm` with a sbatch wrapper.
-Start with the example `slurm/sbatch.sh` with some minor modification or use your existing `sbatch`
-script.
-
-Different from local environment, we only allow passing variables through a shell script (default: `env_setup_template.sh`).
-Commandline variable passthrough is not supported.
-
-<br>
-
-### ⭐ BF16 Kimi-K2-Instruct EAGLE3 Training
-
- `conf/moonshotai/kimi_k2_instruct.sh` is a config that has been tested
-with 8 nodes of DGX H100 (TP=8, ETP=1, EP=64, overall 64 H100 GPUs in total). Update `HF_MODEL_CKPT` to the exact
-checkpoint path in the container to start:
-
-```sh
-export USER_FSW=<path_to_scratch_space>
-export CONTAINER_IMAGE=<path_to_container_image>
-export SANDBOX_ENV_SETUP=./conf/moonshotai/kimi_k2_instruct.sh
-sbatch --nodes=8 slurm/sbatch.sh "eagle3.sh moonshotai/Kimi-K2-Instruct"
-```
-
-To export the trained EAGLE3 model, switch to `kimi_k2_instruct_export.sh`.
-**We only support pipeline-parallel (PP) export.** In this case, 2 nodes are used (PP=16).
-
-```sh
-export USER_FSW=<path_to_scratch_space>
-export CONTAINER_IMAGE=<path_to_container_image>
-export SANDBOX_ENV_SETUP=./conf/moonshotai/kimi_k2_instruct_export.sh
-sbatch --nodes=2 slurm/sbatch.sh "export.sh moonshotai/Kimi-K2-Instruct"
-```
-
-## Checkpoint Resume
-
-WIP
diff --git a/examples/post_training/modelopt/Dockerfile b/examples/post_training/modelopt/Dockerfile
index e127215904d..e0b4f00021e 100644
--- a/examples/post_training/modelopt/Dockerfile
+++ b/examples/post_training/modelopt/Dockerfile
@@ -4,7 +4,7 @@ ARG PIP_CONSTRAINT=
 
 WORKDIR /workspace/nmm-sandbox
 
-RUN pip install jsonlines omegaconf
+RUN pip install jsonlines omegaconf pulp torchprofile
 RUN pip install flask flask_restful fire nltk
 RUN pip install tiktoken blobfile
 
diff --git a/examples/post_training/modelopt/README.md b/examples/post_training/modelopt/README.md
index 33528c30097..be455019096 100644
--- a/examples/post_training/modelopt/README.md
+++ b/examples/post_training/modelopt/README.md
@@ -5,21 +5,22 @@
 
 [TensorRT Model Optimizer](https://github.com/NVIDIA/TensorRT-Model-Optimizer) |
 [Local Examples](#getting-started-in-a-local-environment) |
-[Configuration](./ADVANCED.md#advanced-configuration) |
-[Slurm Examples](./ADVANCED.md#slurm-examples) |
-[Speculative Decoding](./speculative.md) |
-[Advanced Topics](./ADVANCED.md)
+[Configuration](ADVANCED.md#learn-more-about-configuration) |
+[Slurm Examples](ADVANCED.md#slurm-examples) |
+[Speculative Decoding](speculative.md) |
+[Advanced Topics](ADVANCED.md)
 
 </div>
 
 [TensorRT Model Optimizer](https://github.com/NVIDIA/TensorRT-Model-Optimizer) (**ModelOpt**, `nvidia-modelopt`)
-provides end-to-end model optimization for NVIDIA hardware including quantization (real or simulated),
-knowledge distillation, pruning, speculative decoding, and more.
+provides end-to-end model optimization for
+NVIDIA hardware including quantization (real or simulated), sparsity, knowledge distillation, pruning,
+neural architecture search, and speulative decoding.
 
 
 ## Major Features
 
-- Start from Hugging Face pretrained model checkpoint with on-the-fly conversion to Megatron-LM checkpoint format.
+- Start from Hugging Face pretrained model checkpoint with on-the-fly conversion.
 - Support all kinds of model parallelism (TP, EP, ETP, PP).
 - Export to TensorRT-LLM, vLLM, and SGLang ready unified checkpoint.
 
@@ -27,14 +28,11 @@ knowledge distillation, pruning, speculative decoding, and more.
 
 | Model (`conf/`) | Quantization | EAGLE3 | Pruning (PP only) | Distillation |
 | :---: | :---: | :---: | :---: | :---: |
-| `deepseek-ai/DeepSeek-R1` | ✅ | ✅ | - | - |
-| `meta-llama/Llama-{3.1-8B, 3.1-405B, 3.2-1B}-Instruct` | ✅ | ✅ | ✅ | ✅ |
-| `meta-llama/Llama-4-{Scout,Maverick}-17B-{16,128}E-Instruct` | ✅ | ✅ | - | - |
 | `moonshotai/Kimi-K2-Instruct` | ✅ | ✅ | - | - |
-| `nvidia/NVIDIA-Nemotron-Nano-9B-v2` | ✅ | - | ✅ | ✅ |
-| `openai/gpt-oss-{20b, 120b}` | ✅ | **Online** | ✅ | ✅ |
+| `Qwen/Qwen3-{30B-A3B, 235B-A22B}` | **WAR** | ✅ | - | - |
 | `Qwen/Qwen3-{0.6B, 8B}` | ✅ | ✅ | ✅ | ✅ |
-| `Qwen/Qwen3-{30B-A3B, 235B-A22B}` | **WAR** | ✅ | ✅ | ✅ |
+| `deepseek-ai/DeepSeek-R1` | ✅ | ✅ | - | - |
+| `meta-llama/Llama-{3.1-8B, 3.1-405B, 3.2-1B}-Instruct` | ✅ | ✅ | ✅ | ✅ |
 
 ## Getting Started in a Local Environment
 
@@ -45,10 +43,6 @@ pip install -U nvidia-modelopt
 Alternatively, you can install from [source](https://github.com/NVIDIA/TensorRT-Model-Optimizer)
 to try our latest features.
 
-> **❗ IMPORTANT:** The first positional argument (e.g. `meta-llama/Llama-3.2-1B-Instruct`) of each script
-> is the config name used to match the supported model config in `conf/`. The pretrained HF checkpoint should
-> be downloaded and provided through `${HF_MODEL_CKPT}`.
-
 
 ### ⭐ NVFP4 Quantization, Qauntization-Aware Training, and Model Export
 
@@ -61,7 +55,7 @@ provide `${EXPORT_DIR}` to `export.sh`.
 > low-precision numerical behavior (fake-quant) which can be run on GPUs with compute > 80.
 > Real low-precision paramters (e.g. `E4M3` or `E2M1`)
 > and low-precision compute (e.g. `FP8Linear`) are also supported depending on GPU compute capability.
-> **See [Adanvanced Topics](./ADVANCED.md) for details**.
+> **See [Adanvanced Topics](advanced.md) for details**.
 
 ```sh
 \
@@ -78,6 +72,31 @@ provide `${EXPORT_DIR}` to `export.sh`.
     ./export.sh meta-llama/Llama-3.2-1B-Instruct
 ```
 
+> **❗ IMPORTANT:** The first positional arugment (e.g. `meta-llama/Llama-3.2-1B-Instruct`) of each script
+> is the config name used to match the supported model config in `conf/`. The pretrained checkpoint should
+> be downloaded and provided through `${HF_MODEL_CKPT}`.
+
+Loading the saved distributed checkpoint, the quantized Megatron model can be resumed for inference
+(generate or evaluate) or training (SFT or PEFT). To read more about these features, see
+[Adanvanced Topics](advanced.md). To learn more about the design, see our [Design]() document [WIP].
+
+```sh
+\
+    TP=1 \
+    MLM_MODEL_CKPT=/tmp/Llama-3.2-1B-Instruct_quant \
+    ./generate.sh meta-llama/Llama-3.2-1B-Instruct
+
+\
+    TP=1 \
+    MLM_MODEL_CKPT=/tmp/Llama-3.2-1B-Instruct_quant \
+    ./mmlu.sh meta-llama/Llama-3.2-1B-Instruct
+
+\
+    TP=1 \
+    MLM_MODEL_CKPT=/tmp/Llama-3.2-1B-Instruct_quant \
+    ./finetune.sh meta-llama/Llama-3.2-1B-Instruct
+```
+
 ### ⭐ Online BF16 EAGLE3 Training
 
 Online EAGLE3 training has both the target (frozen) and draft models in the memory where the `hidden_states`
@@ -100,23 +119,19 @@ deployment.
     ./export.sh meta-llama/Llama-3.2-1B-Instruct
 ```
 
-See [Adanvanced Topics](./ADVANCED.md) for a `moonshotai/Kimi-K2-Instruct` EAGLE3 training example using `slurm`.
+See [Adanvanced Topics](ADVANCED.md) for a `moonshotai/Kimi-K2-Instruct` EAGLE3 training example using `slurm`.
 
 ### ⭐ Pruning
 
 Checkout pruning getting started section and guidelines for configuring pruning parameters in the [ModelOpt pruning README](https://github.com/NVIDIA/TensorRT-Model-Optimizer/tree/main/examples/pruning).
 
-Pruning is supported for GPT and Mamba models in Pipeline Parallel mode. Available pruning dimensions are:
-
+Pruning is supported for GPT and Mamba models. Available pruning options are:
 - `TARGET_FFN_HIDDEN_SIZE`
 - `TARGET_HIDDEN_SIZE`
 - `TARGET_NUM_ATTENTION_HEADS`
 - `TARGET_NUM_QUERY_GROUPS`
 - `TARGET_MAMBA_NUM_HEADS`
 - `TARGET_MAMBA_HEAD_DIM`
-- `TARGET_NUM_MOE_EXPERTS`
-- `TARGET_MOE_FFN_HIDDEN_SIZE`
-- `TARGET_MOE_SHARED_EXPERT_INTERMEDIATE_SIZE`
 - `TARGET_NUM_LAYERS`
 - `LAYERS_TO_DROP` (comma separated, 1-indexed list of layer numbers to directly drop)
 
@@ -127,44 +142,12 @@ PP=1 \
 TARGET_NUM_LAYERS=24 \
 HF_MODEL_CKPT=<pretrained_model_name_or_path> \
 MLM_MODEL_SAVE=Qwen3-8B-Pruned \
-./prune.sh Qwen/Qwen3-8B
+./prune.sh qwen/Qwen3-8B
 ```
 
 > [!TIP]
 > If number of layers in the model is not divisible by pipeline parallel size (PP), you can configure uneven
 > PP by setting `MLM_EXTRA_ARGS="--decoder-first-pipeline-num-layers <X> --decoder-last-pipeline-num-layers <Y>"`
 
-> [!TIP]
-> You can reuse pruning scores for pruning same model again to different architectures by setting
-> `PRUNE_ARGS="--pruning-scores-path <path_to_save_scores>"`
-
-> [!NOTE]
-> When loading pruned M-LM checkpoint for subsequent steps, make sure overwrite the pruned parameters in the
-> default `conf/` by setting `MLM_EXTRA_ARGS`. E.g.: for loading above pruned Qwen3-8B checkpoint for mmlu, set:
-> `MLM_EXTRA_ARGS="--num-layers 24"`
-
-### ⭐ Inference and Training
-
-The saved Megatron-LM distributed checkpoint (output of above scripts) can be resumed for inference
-(generate or evaluate) or training (SFT or PEFT). To read more about these features, see
-[Advanced Topics](./ADVANCED.md).
-
-```sh
-\
-    TP=1 \
-    MLM_MODEL_CKPT=/tmp/Llama-3.2-1B-Instruct_quant \
-    ./generate.sh meta-llama/Llama-3.2-1B-Instruct
-
-\
-    TP=1 \
-    MLM_MODEL_CKPT=/tmp/Llama-3.2-1B-Instruct_quant \
-    ./mmlu.sh meta-llama/Llama-3.2-1B-Instruct
-
-\
-    TP=1 \
-    MLM_MODEL_CKPT=/tmp/Llama-3.2-1B-Instruct_quant \
-    ./finetune.sh meta-llama/Llama-3.2-1B-Instruct
-```
-
 ## Advanced Usage
 TBD
diff --git a/examples/post_training/modelopt/conf/arguments.sh b/examples/post_training/modelopt/conf/arguments.sh
index 0193bf8b643..f29e0a9d989 100644
--- a/examples/post_training/modelopt/conf/arguments.sh
+++ b/examples/post_training/modelopt/conf/arguments.sh
@@ -1,6 +1,3 @@
-#!/bin/bash
-set -e
-
 MLM_MODEL_CFG=$1
 
 # Bash coloring
diff --git a/examples/post_training/modelopt/conf/moonshotai/kimi_k2_instruct.sh b/examples/post_training/modelopt/conf/moonshotai/kimi_k2_instruct.sh
deleted file mode 100644
index 4f301f31c1d..00000000000
--- a/examples/post_training/modelopt/conf/moonshotai/kimi_k2_instruct.sh
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/bin/bash
-
-HF_MODEL_CKPT=/workspace/scratch/moonshotai/Kimi-K2-Instruct
-TP=8
-ETP=1
-EP=64
-
diff --git a/examples/post_training/modelopt/conf/moonshotai/kimi_k2_instruct_export.sh b/examples/post_training/modelopt/conf/moonshotai/kimi_k2_instruct_export.sh
deleted file mode 100644
index 73ee80a6d93..00000000000
--- a/examples/post_training/modelopt/conf/moonshotai/kimi_k2_instruct_export.sh
+++ /dev/null
@@ -1,15 +0,0 @@
-#!/bin/bash
-
-HF_MODEL_CKPT=/workspace/scratch/moonshotai/Kimi-K2-Instruct
-
-MLM_EXTRA_ARGS=" \
-    --decoder-first-pipeline-num-layers 3 \
-    --decoder-last-pipeline-num-layers 2 \
-    --init-model-with-meta-device \
-    --use-cpu-initialization \
-
-"
-
-# Layer distribution over PP: 3, [4] * 14, 2.
-PP=16
-
diff --git a/examples/post_training/modelopt/conf/nvidia/NVIDIA-Nemotron-Nano-9B-v2-Base.sh b/examples/post_training/modelopt/conf/nvidia/NVIDIA-Nemotron-Nano-9B-v2-Base.sh
deleted file mode 120000
index 3771c930263..00000000000
--- a/examples/post_training/modelopt/conf/nvidia/NVIDIA-Nemotron-Nano-9B-v2-Base.sh
+++ /dev/null
@@ -1 +0,0 @@
-NVIDIA-Nemotron-Nano-9B-v2.sh
\ No newline at end of file
diff --git a/examples/post_training/modelopt/conf/nvidia/NVIDIA-Nemotron-Nano-9B-v2-Base.sh b/examples/post_training/modelopt/conf/nvidia/NVIDIA-Nemotron-Nano-9B-v2-Base.sh
new file mode 100644
index 00000000000..d6ba1e1dcc4
--- /dev/null
+++ b/examples/post_training/modelopt/conf/nvidia/NVIDIA-Nemotron-Nano-9B-v2-Base.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+if [ -z ${HF_MODEL_CKPT} ]; then
+    HF_MODEL_CKPT=nvidia/NVIDIA-Nemotron-Nano-9B-v2-Base
+    TOKENIZER_MODEL=nvidia/NVIDIA-Nemotron-Nano-9B-v2-Base
+else
+    TOKENIZER_MODEL=${HF_MODEL_CKPT}
+fi
+
+MODEL_ARGS=" \
+    --save-interval 100000 \
+    --micro-batch-size 1 \
+    --bf16 \
+    --no-masked-softmax-fusion \
+    --disable-bias-linear \
+    --untie-embeddings-and-output-weights \
+    --position-embedding-type none \
+    --no-rope-fusion \
+    --normalization RMSNorm \
+    --squared-relu \
+    --num-layers 56 \
+    --hidden-size 4480 \
+    --ffn-hidden-size 15680 \
+    --num-attention-heads 40 \
+    --kv-channels 128 \
+    --group-query-attention \
+    --num-query-groups 8 \
+    --hybrid-override-pattern M-M-M-MM-M-M-M*-M-M-M*-M-M-M-M*-M-M-M-M*-M-MM-M-M-M-M-M- \
+    --is-hybrid-model \
+    --mamba-head-dim 80 \
+    --mamba-num-heads 128 \
+    --mamba-num-groups 8 \
+    --mamba-state-dim 128 \
+    --seq-length 4096 \
+    --max-position-embeddings 131072 \
+    --tokenizer-type HuggingFaceTokenizer \
+    --make-vocab-size-divisible-by 1 \
+    --use-mcore-models \
+    --export-model-type MambaModel \
+    --padded-vocab-size 131072 \
+"
diff --git a/examples/post_training/modelopt/conf/nvidia/NVIDIA-Nemotron-Nano-9B-v2.sh b/examples/post_training/modelopt/conf/nvidia/NVIDIA-Nemotron-Nano-9B-v2.sh
deleted file mode 100644
index d6ba1e1dcc4..00000000000
--- a/examples/post_training/modelopt/conf/nvidia/NVIDIA-Nemotron-Nano-9B-v2.sh
+++ /dev/null
@@ -1,41 +0,0 @@
-#!/bin/bash
-
-if [ -z ${HF_MODEL_CKPT} ]; then
-    HF_MODEL_CKPT=nvidia/NVIDIA-Nemotron-Nano-9B-v2-Base
-    TOKENIZER_MODEL=nvidia/NVIDIA-Nemotron-Nano-9B-v2-Base
-else
-    TOKENIZER_MODEL=${HF_MODEL_CKPT}
-fi
-
-MODEL_ARGS=" \
-    --save-interval 100000 \
-    --micro-batch-size 1 \
-    --bf16 \
-    --no-masked-softmax-fusion \
-    --disable-bias-linear \
-    --untie-embeddings-and-output-weights \
-    --position-embedding-type none \
-    --no-rope-fusion \
-    --normalization RMSNorm \
-    --squared-relu \
-    --num-layers 56 \
-    --hidden-size 4480 \
-    --ffn-hidden-size 15680 \
-    --num-attention-heads 40 \
-    --kv-channels 128 \
-    --group-query-attention \
-    --num-query-groups 8 \
-    --hybrid-override-pattern M-M-M-MM-M-M-M*-M-M-M*-M-M-M-M*-M-M-M-M*-M-MM-M-M-M-M-M- \
-    --is-hybrid-model \
-    --mamba-head-dim 80 \
-    --mamba-num-heads 128 \
-    --mamba-num-groups 8 \
-    --mamba-state-dim 128 \
-    --seq-length 4096 \
-    --max-position-embeddings 131072 \
-    --tokenizer-type HuggingFaceTokenizer \
-    --make-vocab-size-divisible-by 1 \
-    --use-mcore-models \
-    --export-model-type MambaModel \
-    --padded-vocab-size 131072 \
-"
diff --git a/examples/post_training/modelopt/conf/Qwen/Qwen2.5-0.5B-Instruct.sh b/examples/post_training/modelopt/conf/qwen/Qwen2.5-0.5B-Instruct.sh
similarity index 100%
rename from examples/post_training/modelopt/conf/Qwen/Qwen2.5-0.5B-Instruct.sh
rename to examples/post_training/modelopt/conf/qwen/Qwen2.5-0.5B-Instruct.sh
diff --git a/examples/post_training/modelopt/conf/Qwen/Qwen2.5-7B-Instruct.sh b/examples/post_training/modelopt/conf/qwen/Qwen2.5-7B-Instruct.sh
similarity index 100%
rename from examples/post_training/modelopt/conf/Qwen/Qwen2.5-7B-Instruct.sh
rename to examples/post_training/modelopt/conf/qwen/Qwen2.5-7B-Instruct.sh
diff --git a/examples/post_training/modelopt/conf/Qwen/Qwen3-0.6B.sh b/examples/post_training/modelopt/conf/qwen/Qwen3-0.6B.sh
similarity index 100%
rename from examples/post_training/modelopt/conf/Qwen/Qwen3-0.6B.sh
rename to examples/post_training/modelopt/conf/qwen/Qwen3-0.6B.sh
diff --git a/examples/post_training/modelopt/conf/Qwen/Qwen3-235B-A22B.sh b/examples/post_training/modelopt/conf/qwen/Qwen3-235B-A22B.sh
similarity index 100%
rename from examples/post_training/modelopt/conf/Qwen/Qwen3-235B-A22B.sh
rename to examples/post_training/modelopt/conf/qwen/Qwen3-235B-A22B.sh
diff --git a/examples/post_training/modelopt/conf/Qwen/Qwen3-30B-A3B.sh b/examples/post_training/modelopt/conf/qwen/Qwen3-30B-A3B.sh
similarity index 100%
rename from examples/post_training/modelopt/conf/Qwen/Qwen3-30B-A3B.sh
rename to examples/post_training/modelopt/conf/qwen/Qwen3-30B-A3B.sh
diff --git a/examples/post_training/modelopt/conf/Qwen/Qwen3-8B.sh b/examples/post_training/modelopt/conf/qwen/Qwen3-8B.sh
similarity index 100%
rename from examples/post_training/modelopt/conf/Qwen/Qwen3-8B.sh
rename to examples/post_training/modelopt/conf/qwen/Qwen3-8B.sh
diff --git a/examples/post_training/modelopt/convert_model.py b/examples/post_training/modelopt/convert_model.py
index 20ee59a2fe0..9790d73fc4c 100644
--- a/examples/post_training/modelopt/convert_model.py
+++ b/examples/post_training/modelopt/convert_model.py
@@ -162,7 +162,17 @@ def check_arguments():
             if eagle_module is not None:
                 mcore_eagle_state_dict = torch.load(args.extra_model_path)
                 eagle_module.load_state_dict(mcore_eagle_state_dict, strict=False)
-                
+
+        # Add mask tokens for parallel draft
+        if unwrapped_model.eagle_config.parallel_draft_step > 1:
+            assert unwrapped_model.eagle_config.parallel_draft_step <= 4, "Parallel draft only supports steps less than or equal to 4."
+            tokenizer = get_tokenizer()
+            for i in range(unwrapped_model.eagle_config.parallel_draft_step - 1):
+                mask_token = "[MASK_{}]".format(i)
+                tokenizer._tokenizer.add_tokens([mask_token], special_tokens=True)
+                token_id = tokenizer._tokenizer.convert_tokens_to_ids(mask_token)
+                setattr(unwrapped_model, "mask_token_{}".format(i), torch.tensor(token_id))
+
     elif args.algorithm == "medusa":
         config = {"medusa_num_heads": args.export_num_medusa_heads, "medusa_num_layers": 1}
         unwrapped_model = mtsp.convert(unwrapped_model, [("medusa", config)])
diff --git a/examples/post_training/modelopt/finetune.py b/examples/post_training/modelopt/finetune.py
index 6489d394392..bd0569bb513 100755
--- a/examples/post_training/modelopt/finetune.py
+++ b/examples/post_training/modelopt/finetune.py
@@ -167,7 +167,7 @@ def __init__(
             hf_dataset_kwargs = SFTDataset.hf_dataset_to_kwargs.get(
                 self.hf_dataset, {"split": "train"}
             )
-            self._raw_samples = datasets.load_dataset(self.hf_dataset, token=os.environ.get("HF_TOKEN", None), **hf_dataset_kwargs)
+            self._raw_samples = datasets.load_dataset(self.hf_dataset, **hf_dataset_kwargs)
             self._raw_samples = self._raw_samples.shard(
                 num_shards=self.num_shards, index=shard_index
             )
@@ -455,10 +455,7 @@ def non_loss_data_func(model: GPTModel):
     """Callback to compute the acceptance length."""
     args = get_args()
     if not args.export_offline_model:
-        try:
-            report_draft_acceptance_length(model)
-        except Exception as e:
-            print(e)
+        report_draft_acceptance_length(model)
 
 
diff --git a/examples/post_training/modelopt/finetune.sh b/examples/post_training/modelopt/finetune.sh
index 21493697374..0579dd69157 100755
--- a/examples/post_training/modelopt/finetune.sh
+++ b/examples/post_training/modelopt/finetune.sh
@@ -14,7 +14,6 @@ MLM_DEFAULT_ARGS=" \
     --distributed-timeout-minutes 30 \
     --auto-detect-ckpt-format \
     --export-te-mcore-model \
-    --finetune \
 "
 
 
@@ -68,8 +67,6 @@ if [ -z ${MLM_EVAL_ARGS} ]; then
     "
 fi
 
-export HF_TOKEN=${HF_TOKEN}
-
 ${LAUNCH_SCRIPT} ${SCRIPT_DIR}/finetune.py \
     ${MODEL_ARGS} \
     --tensor-model-parallel-size ${TP} \
diff --git a/examples/post_training/modelopt/prune.py b/examples/post_training/modelopt/prune.py
index 6a0178a1420..7819b2ed2af 100644
--- a/examples/post_training/modelopt/prune.py
+++ b/examples/post_training/modelopt/prune.py
@@ -20,7 +20,6 @@
 from modelopt.torch.export import import_mcore_gpt_from_hf
 from modelopt.torch.prune.plugins.mcore_minitron import SUPPORTED_HPARAMS
 
-from megatron.core.parallel_state import get_pipeline_model_parallel_group, get_tensor_model_parallel_group
 from megatron.post_training.arguments import add_modelopt_args
 from megatron.post_training.checkpointing import load_modelopt_checkpoint
 from megatron.post_training.generate import simple_generate
@@ -92,21 +91,6 @@ def add_prune_args(parser):
         type=int,
         help="Prune dimension of Mamba attention heads to this value",
     )
-    group.add_argument(
-        "--target-num-moe-experts",
-        type=int,
-        help="Prune number of MoE experts to this value",
-    )
-    group.add_argument(
-        "--target-moe-ffn-hidden-size",
-        type=int,
-        help="Prune MoE FFN hidden size to this value",
-    )
-    group.add_argument(
-        "--target-moe-shared-expert-intermediate-size",
-        type=int,
-        help="Prune MoE shared expert intermediate size to this value",
-    )
     group.add_argument(
         "--target-num-layers",
         type=int,
@@ -120,12 +104,6 @@ def add_prune_args(parser):
         nargs="*",
         help="Drop specific model layers (1-indexed). Cannot be used with rest of the pruning options",
     )
-    group.add_argument(
-        "--pruning-scores-path",
-        type=str,
-        default=None,
-        help="Path to the cache and reuse pruning scores for pruning again to different params",
-    )
     add_modelopt_args(parser)
     return parser
 
@@ -147,14 +125,6 @@ def get_calib_dataloader(calib_size=1024, max_sequence_length=512):
         yield dataset[i][text_column][:max_sequence_length]
 
 
-def get_params(model):
-    params = sum(p.numel() for p in model.parameters())
-    reduced_params = torch.Tensor([params]).to(device=next(model.parameters()).device)
-    torch.distributed.all_reduce(reduced_params, group=get_pipeline_model_parallel_group())
-    torch.distributed.all_reduce(reduced_params, group=get_tensor_model_parallel_group())
-    return reduced_params.item()
-
-
 if __name__ == "__main__":
     initialize_megatron(
         extra_args_provider=add_prune_args,
@@ -211,7 +181,7 @@ def _hf_dataset_forword_loop_func(model):
             simple_generate(model, tokens.input_ids.cuda(), osl=1)
 
     if args.layers_to_drop:
-        mtp.mcore_minitron.drop_mcore_language_model_layers(model, layers_to_drop=args.layers_to_drop)
+        mtp.plugins.drop_mcore_language_model_layers(model, layers_to_drop=args.layers_to_drop)
     else:
         print_rank_0("Pruning model...")
         export_config = {
@@ -219,22 +189,18 @@ def _hf_dataset_forword_loop_func(model):
             for k in SUPPORTED_HPARAMS
             if getattr(args, f"target_{k}", None) is not None
         }
-        config = {"forward_loop": _hf_dataset_forword_loop_func}
-        if args.pruning_scores_path is not None:
-            config["scores_path"] = args.pruning_scores_path
         mtp.prune(
             unwrapped_model,
             mode="mcore_minitron",
             constraints={"export_config": export_config},
             dummy_input=None,  # Not used
-            config=config,
+            config={"forward_loop": _hf_dataset_forword_loop_func},
         )
         # [WAR till modelopt 0.39]: Remove prune state to avoid converting again on restore which forces TP=1.
         if mto.ModeloptStateManager.has_state_for_mode_type("prune", model=unwrapped_model):
             mto.ModeloptStateManager.remove_state(unwrapped_model)
 
     print_rank_0(f"Pruned Model:\n {unwrapped_model}")
-    print_rank_0(f"Pruned Model Params: {get_params(unwrapped_model)/1e9:.2f}B")
 
     _custom_prompt_forward_loop_func(unwrapped_model)
 
diff --git a/examples/post_training/modelopt/prune.sh b/examples/post_training/modelopt/prune.sh
index 33f3e615e96..ef86260b062 100755
--- a/examples/post_training/modelopt/prune.sh
+++ b/examples/post_training/modelopt/prune.sh
@@ -23,27 +23,23 @@ MLM_DEFAULT_ARGS="
 # Example: export LAYERS_TO_DROP="1 5 10"
 
 # Define pruning argument mappings: "env_var:cli_arg"
-# List of environment variables we want to check for pruning CLI args
-PRUNE_ENV_VARS=(
-    TARGET_FFN_HIDDEN_SIZE
-    TARGET_HIDDEN_SIZE
-    TARGET_NUM_ATTENTION_HEADS
-    TARGET_NUM_QUERY_GROUPS
-    TARGET_MAMBA_NUM_HEADS
-    TARGET_MAMBA_HEAD_DIM
-    TARGET_NUM_MOE_EXPERTS
-    TARGET_MOE_FFN_HIDDEN_SIZE
-    TARGET_MOE_SHARED_EXPERT_INTERMEDIATE_SIZE
-    TARGET_NUM_LAYERS
-    LAYERS_TO_DROP
+PRUNE_ARG_MAPPINGS=(
+    "TARGET_FFN_HIDDEN_SIZE:--target-ffn-hidden-size"
+    "TARGET_HIDDEN_SIZE:--target-hidden-size"
+    "TARGET_NUM_ATTENTION_HEADS:--target-num-attention-heads"
+    "TARGET_NUM_QUERY_GROUPS:--target-num-query-groups"
+    "TARGET_MAMBA_NUM_HEADS:--target-mamba-num-heads"
+    "TARGET_MAMBA_HEAD_DIM:--target-mamba-head-dim"
+    "TARGET_NUM_LAYERS:--target-num-layers"
+    "LAYERS_TO_DROP:--layers-to-drop"
 )
 
-# Build arguments from environment variables (TARGET_NUM_LAYERS -> --target-num-layers, etc.)
-PRUNE_ARGS=${PRUNE_ARGS:-""}
-for env_var in "${PRUNE_ENV_VARS[@]}"; do
+# Build arguments from environment variables
+PRUNE_ARGS=""
+for mapping in "${PRUNE_ARG_MAPPINGS[@]}"; do
+    env_var="${mapping%%:*}"
+    cli_arg="${mapping##*:}"
     if [ ! -z "${!env_var}" ]; then
-        # prepend --, convert to lowercase, replace _ with -
-        cli_arg="--$(echo "${env_var}" | tr '[:upper:]' '[:lower:]' | tr '_' '-')"
         PRUNE_ARGS="${PRUNE_ARGS} ${cli_arg} ${!env_var}"
     fi
 done
@@ -63,9 +59,6 @@ else
     LOAD_ARGS="--load ${MLM_MODEL_CKPT}"
 fi
 
-
-set -ex
-
 ${LAUNCH_SCRIPT} ${SCRIPT_DIR}/prune.py \
     ${MODEL_ARGS} \
     ${LOAD_ARGS} \
@@ -74,5 +67,6 @@ ${LAUNCH_SCRIPT} ${SCRIPT_DIR}/prune.py \
     --tokenizer-model ${TOKENIZER_MODEL} \
     --save ${MLM_MODEL_SAVE} \
     --references "${MLM_REF_LABEL}" \
+    --calib-size 1024 \
     ${PRUNE_ARGS} \
     ${MLM_DEFAULT_ARGS} ${MLM_EXTRA_ARGS}
diff --git a/examples/post_training/modelopt/slurm/env_setup_template.sh b/examples/post_training/modelopt/slurm/env_setup_template.sh
deleted file mode 100644
index 12b59f06eed..00000000000
--- a/examples/post_training/modelopt/slurm/env_setup_template.sh
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/bin/bash
-
-HF_MODEL_CKPT=/workspace/scratch/meta-llama/Llama-3.2-1B-Instruct
-TP=1
-ETP=1
-EP=1
-PP=1
diff --git a/examples/post_training/modelopt/slurm/sbatch.sh b/examples/post_training/modelopt/slurm/sbatch.sh
deleted file mode 100644
index 3916c5de2b5..00000000000
--- a/examples/post_training/modelopt/slurm/sbatch.sh
+++ /dev/null
@@ -1,63 +0,0 @@
-#!/bin/bash
-
-#SBATCH -A <account>
-#SBATCH -p <partition>
-#SBATCH --job-name=<job-name>
-#SBATCH --nodes=1 --ntasks-per-node=8 --gpus-per-node=8
-#SBATCH -t 04:00:00
-#SBATCH --exclusive --mem=0 --overcommit
-
-# Bash coloring
-RED='\033[0;31m'
-YELLOW='\033[0;33m'
-GREEN='\033[0;32m'
-BLUE='\033[0;34m'
-PURPLE='\033[0;35m'
-WHITE='\033[0;37m'
-
-# Predefined logging
-MLM_ERROR="${RED}ERROR:  ${WHITE}"
-MLM_WARNING="${YELLOW}WARNING:${WHITE}"
-
-# CHANGE THE FOLLOWING TO YOUR DATA, MEGATRON, and CHECKPOINT DIR
-if [[ -z ${USER_FSW} ]]; then
-    printf "${MLM_ERROR} Variable USER_FSW (read/write scratch space) must be set!\n"
-    exit 1
-fi
-
-if [ -z ${SANDBOX_DIR} ]; then
-    SANDBOX_DIR="$(pwd)"
-    printf "${MLM_WARNING} Variable SANDBOX_DIR not set! (default: ${SANDBOX_DIR})\n"
-fi
-
-if [ -z ${SANDBOX_ENV_SETUP} ]; then
-    SANDBOX_ENV_SETUP=./env_setup_template.sh
-    printf "${MLM_WARNING} Variable SANDBOX_ENV_SETUP not set! (default: ${SANDBOX_ENV_SETUP})\n"
-fi
-
-if [ -z ${CONTAINER_IMAGE} ]; then
-    CONTAINER_IMAGE="nvidia-modelopt-megatron:latest"
-    printf "${MLM_WARNING} Variable CONTAINER_IMAGE not set! (default: ${CONTAINER_IMAGE})\n"
-fi
-
-if [ -z ${LAUNCH_SCRIPT} ]; then
-    LAUNCH_SCRIPT="python"
-    printf "${MLM_WARNING} Variable LAUNCH_SCRIPT not set! (default: ${LAUNCH_SCRIPT})\n"
-fi
-
-# DO NOT MODIFY THE VALUES BELOW UNLESS YOU KNOW WHAT YOU ARE DOING!!!
-DATETIME=`date +'date_%y-%m-%d_time_%H-%M-%S'`
-
-CONTAINER_MOUNT="${SANDBOX_DIR}:/workspace/nmm-sandbox,${USER_FSW}:/workspace/scratch"
-
-srun -l \
-    --mpi=pmix \
-    --output=%x_%j_$DATETIME.log \
-    --container-image ${CONTAINER_IMAGE} \
-    --container-workdir "/workspace/nmm-sandbox" \
-    --container-mounts ${CONTAINER_MOUNT} \
-    --export "HF_MODEL_CKPT=${HF_MODEL_CKPT},SANDBOX_ENV_SETUP=${SANDBOX_ENV_SETUP},LAUNCH_SCRIPT=${LAUNCH_SCRIPT}" \
-    bash ${1}
-
-set +x
-
diff --git a/examples/post_training/modelopt/validate.sh b/examples/post_training/modelopt/validate.sh
index 796231e508e..90ff4810117 100644
--- a/examples/post_training/modelopt/validate.sh
+++ b/examples/post_training/modelopt/validate.sh
@@ -16,9 +16,8 @@ if [ -z ${MLM_MODEL_CKPT} ]; then
 fi
 
 if [ -z ${PROMPTS_PATH} ]; then
-    PROMPT_ARGS=""
-else
-    PROMPT_ARGS="--prompts-path ${PROMPTS_PATH}"
+    printf "${MLM_ERROR} Variable ${PURPLE}PROMPTS_PATH${WHITE} must be set!\n"
+    exit 1
 fi
 
 if [ -z ${STEPS} ]; then
@@ -41,7 +40,6 @@ if [ -z ${OSL} ]; then
     STEPS=64
 fi
 
-export HF_TOKEN=${HF_TOKEN}
 
 ${LAUNCH_SCRIPT} ${SCRIPT_DIR}/validate.py \
     ${MODEL_ARGS} \
@@ -51,9 +49,9 @@ ${LAUNCH_SCRIPT} ${SCRIPT_DIR}/validate.py \
     --pipeline-model-parallel-size ${PP} \
     --tokenizer-model ${TOKENIZER_MODEL} \
     --load ${MLM_MODEL_CKPT} \
+    --prompts-path ${PROMPTS_PATH} \
     --steps ${STEPS} \
     --osl ${OSL} \
-    ${PROMPT_ARGS} \
     ${GT_ARGS} \
     ${SAVE_ARGS} \
     ${MLM_DEFAULT_ARGS} ${MLM_EXTRA_ARGS}
diff --git a/gpt_builders.py b/gpt_builders.py
index 2ef41846f2c..9fa1aff72c7 100644
--- a/gpt_builders.py
+++ b/gpt_builders.py
@@ -5,7 +5,6 @@
     get_gpt_decoder_block_spec,
     get_gpt_layer_local_spec,
     get_gpt_layer_with_transformer_engine_spec,
-    get_gpt_layer_with_inference_spec,
     get_gpt_mtp_block_spec,
     get_gpt_decoder_layer_specs,
 )
@@ -44,7 +43,6 @@ def gpt_builder(args, pre_process, post_process, vp_stage=None, config=None):
             use_te = args.transformer_impl == "transformer_engine"
 
             if args.num_experts or (args.linear_attention_type is not None):
-                assert not (config.transformer_impl == "inference_optimized")
                 # Define the decoder block spec
                 transformer_layer_spec = get_gpt_decoder_block_spec(
                     config,
@@ -54,14 +52,12 @@ def gpt_builder(args, pre_process, post_process, vp_stage=None, config=None):
                     vp_stage=vp_stage,
                 )
             elif args.heterogeneous_layers_config_path is not None:
-                assert not (config.transformer_impl == "inference_optimized")
                 transformer_layer_spec = get_gpt_heterogeneous_layer_spec(config, use_te)
             else:
                 # Define the decoder layer spec
                 transformer_layer_spec = _get_transformer_layer_spec(use_te, config)
         mtp_block_spec = None
         if args.mtp_num_layers is not None:
-            assert not (config.transformer_impl == "inference_optimized")
             # Get GPT decoder layer specs for the model.
             if args.spec is not None:
                 mtp_transformer_layer_spec = import_module(args.spec)
@@ -124,12 +120,6 @@ def _get_transformer_layer_spec(use_te, config):
             use_kitchen=config.use_kitchen,
             fallback_to_eager_attn=config.fallback_to_eager_attn,
         )
-    elif config.transformer_impl == "inference_optimized":
-        return get_gpt_layer_with_inference_spec(
-            args.qk_layernorm,
-            args.multi_latent_attention,
-            qk_l2_norm=args.qk_l2_norm,
-        )
     else:
         return get_gpt_layer_local_spec(
             args.num_experts,
diff --git a/megatron/core/distributed/fsdp/src/megatron_fsdp/megatron_fsdp.py b/megatron/core/distributed/fsdp/src/megatron_fsdp/megatron_fsdp.py
index 8a63e0f5cf7..d6ef5f6210e 100644
--- a/megatron/core/distributed/fsdp/src/megatron_fsdp/megatron_fsdp.py
+++ b/megatron/core/distributed/fsdp/src/megatron_fsdp/megatron_fsdp.py
@@ -898,10 +898,9 @@ def forward_hook(_module, inputs, output):
 
         # Register pre state_dict hook to ensure that the module parameters are
         # distributed before saving the state_dict.
-        for name, module in self.named_modules():
-            module.register_state_dict_pre_hook(
-                lambda *args, **kwargs: self._replace_param_with_distributed_if_needed()
-            )
+        self._state_dict_pre_hook = self.module.register_state_dict_pre_hook(
+            lambda *args, **kwargs: self._replace_param_with_distributed_if_needed()
+        )
 
     @contextmanager
     def no_sync(self):
diff --git a/megatron/core/fusions/fused_pad_routing_map.py b/megatron/core/fusions/fused_pad_routing_map.py
index 8e4d1763270..c382178b6c9 100644
--- a/megatron/core/fusions/fused_pad_routing_map.py
+++ b/megatron/core/fusions/fused_pad_routing_map.py
@@ -6,7 +6,7 @@
 from packaging import version
 
 from megatron.core.jit import jit_fuser
-from megatron.core.utils import experimental_fn, null_decorator
+from megatron.core.utils import null_decorator
 
 try:
     import triton
@@ -70,7 +70,6 @@ def _pad_routing_map_kernel(
     tl.store(output_row_ptr + token_indices, output_row, mask=token_mask)
 
 
-@experimental_fn(introduced_with_version="0.13.0")
 @jit_fuser
 def fused_pad_routing_map(routing_map: torch.Tensor, pad_multiple: int) -> torch.Tensor:
     """Fused version of pad_routing_map.
diff --git a/megatron/core/inference/communication_utils.py b/megatron/core/inference/communication_utils.py
index a5bfe75fbb6..18fbb18f2f0 100644
--- a/megatron/core/inference/communication_utils.py
+++ b/megatron/core/inference/communication_utils.py
@@ -71,7 +71,8 @@ def broadcast_from_last_pipeline_stage(
             tensor.shape
         ), f"Expected tensor of shape {size} but got {list(tensor.shape)}"
         assert dtype == tensor.dtype, f"Expected tensor of type {dtype} but got {tensor.dtype}"
-        _is_cuda_contiguous(tensor)
+        _is_cuda(tensor)
+        assert tensor.is_contiguous()
     else:
         tensor = torch.empty(size, dtype=dtype, device=torch.cuda.current_device())
 
diff --git a/megatron/core/inference/contexts/attention_context/mamba_metadata.py b/megatron/core/inference/contexts/attention_context/mamba_metadata.py
index ecb0296559f..e9cd99a6c48 100644
--- a/megatron/core/inference/contexts/attention_context/mamba_metadata.py
+++ b/megatron/core/inference/contexts/attention_context/mamba_metadata.py
@@ -1,28 +1,8 @@
 # Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
-from dataclasses import dataclass
-from typing import List, Optional, Tuple
-
 import torch
 
 
-@dataclass
-class MambaInferenceStateConfig:
-    """Config for initializing Mamba model inference state tensors."""
-
-    layer_type_list: List[str]
-    """
-    A list of strings that indicates the layer type (Mamba / Attention / MLP) for each layer.
-    See `megatron/core/ssm/mamba_hybrid_layer_allocation.py` for the list of symbols.
-    """
-
-    mamba_conv_states_shape: Tuple[int]
-    """Mamba conv states shape per request."""
-
-    mamba_ssm_states_shape: Tuple[int]
-    """Mamba ssm states shape per request."""
-
-
 class MambaMetadata:
     """Manages the metadata tensors required for Mamba layers during inference."""
 
@@ -84,7 +64,7 @@ def update_cudagraph_mapping(
         """
         self.request_to_mamba_state_idx_cudagraph_only[0:num_active_requests] = active_mamba_indices
 
-    def allocate_slot(self) -> Optional[int]:
+    def allocate_slot(self) -> int:
         """
         Allocates a new slot for a request in the Mamba state buffers.
 
diff --git a/megatron/core/inference/contexts/dynamic_block_allocator.py b/megatron/core/inference/contexts/dynamic_block_allocator.py
index 026ee47d094..4baa3f5212c 100644
--- a/megatron/core/inference/contexts/dynamic_block_allocator.py
+++ b/megatron/core/inference/contexts/dynamic_block_allocator.py
@@ -13,86 +13,60 @@ class BlockAllocator:
     - Initializing a pool of block IDs
     - Allocating blocks from the pool
     - Releasing blocks back to the pool
+    - Managing the guaranteed block count for active requests
 
     Args:
-        context (DynamicInferenceContext): Dynamic inference context.
-        active_count (int): Total number of active blocks available in the buffer.
-            The full buffer size is 2*active_count, to accommodate an equal-size
-            space for paused requests that live on the CPU.
+        block_count_total (int): Total number of blocks available in the buffer.
+        gtd_block_count (int): Number of blocks reserved for guaranteed requests.
     """
 
-    def __init__(self, context: "DynamicInferenceContext", total_count: int):
+    def __init__(self, block_count_total: int, gtd_block_count: int):
+        self.block_count_total = block_count_total
+        self.gtd_block_count = gtd_block_count
 
-        self.context = context
-
-        active_count = (total_count - 1) // 2  # -1 for dummy_block_idx (see below)
-        active_count = max(1, active_count)  # need at least one block
-        self.total_count = 2 * active_count + 1  # +1 for dummy_block_idx
-        self.total_avail = self.total_count - 1  # -1 for dummy_block_idx
-        self.active_count = active_count
-        self.paused_count = self.total_count - self.active_count - 1  # -1 for dummy_block_idx
-        self.dummy_block_idx = self.total_count - 1
+        # Reserve last block ID as dummy block for decode-only inference steps
+        self.block_count_avail = self.block_count_total - 1
+        self.dummy_block_idx = self.block_count_total - 1
 
         # Initialize block pool as a "stack" data structure
         self.block_bag = torch.arange(
-            self.total_count, dtype=torch.int32, device=torch.cuda.current_device()
-        )
-
-    def __str__(self):
-        return (
-            f"total avail {self.total_avail} / {self.total_count - 1}"
-            f"; active {self.active_count}"
+            self.block_count_total, dtype=torch.int32, device=torch.cuda.current_device()
         )
 
-    def get_active_used(self):
-        """Compute number of active blocks used."""
-        return (
-            self.context.request_kv_block_counts[
-                self.context.paused_request_count : self.context.total_request_count
-            ]
-            .sum()
-            .item()
-        )
-
-    def get_paused_used(self):
-        """Compute number of paused blocks used."""
-        return (
-            self.context.request_kv_block_counts[: self.context.paused_request_count].sum().item()
-        )
-
-    def get_active_avail(self):
-        """Compute number of active blocks available."""
-        return self.active_count - self.get_active_used()
-
-    def get_paused_avail(self):
-        """Compute number of paused blocks available."""
-        return self.paused_count - self.get_paused_used()
-
-    def is_memory_available(self, num_blocks: int) -> bool:
+    def is_memory_available(self, num_blocks: int, safe: bool = False) -> bool:
         """Check if memory blocks are available.
 
+        Use 'safe' to avoid all requests being deadlocked. A fraction of the KV cache
+        memory buffer is reserved to guarantee that a minimum number of active
+        requests can run on any given step.
+
         Args:
             num_blocks (int): Number of blocks to check.
+            safe (bool): Include extra space for guaranteeing ability to run
+                requests to completion.
 
         Return:
             (bool) Is memory available?
         """
-        return self.get_active_avail() >= num_blocks
+        if safe:
+            return self.block_count_avail >= num_blocks + self.gtd_block_count
+        else:
+            return self.block_count_avail >= num_blocks
 
-    def allocate_memory_blocks(self, num_blocks: int) -> Optional[Tensor]:
+    def allocate_memory_blocks(self, num_blocks: int = 1, safe: bool = False) -> Optional[Tensor]:
         """Allocate memory blocks if available, else return None.
 
         Args:
             num_blocks (int): Number of blocks to allocate.
+            safe (bool): Include extra space for guaranteeing ability to run
+                requests to completion.
 
         Return:
             (Optional[Tensor]) Allocated block IDs.
         """
-        if self.is_memory_available(num_blocks):
-            self.total_avail -= num_blocks
-            block_ids = self.block_bag[self.total_avail : (self.total_avail + num_blocks)]
-            assert num_blocks == block_ids.numel()
-            return block_ids
+        if self.is_memory_available(num_blocks, safe):
+            self.block_count_avail -= num_blocks
+            return self.block_bag[self.block_count_avail : (self.block_count_avail + num_blocks)]
         else:
             return None
 
@@ -106,8 +80,8 @@ def release_memory_blocks(self, blocks: Tensor) -> None:
             None
         """
         num_blocks = blocks.size(dim=0)
-        self.block_bag[self.total_avail : (self.total_avail + num_blocks)] = blocks
-        self.total_avail += num_blocks
+        self.block_bag[self.block_count_avail : (self.block_count_avail + num_blocks)] = blocks
+        self.block_count_avail += num_blocks
 
     def reset(self) -> None:
         """Reset the allocator to initial state.
@@ -115,4 +89,4 @@ def reset(self) -> None:
         This resets the available block count to the entire memory pool
         (except for the dummy block).
         """
-        self.total_avail = self.total_count - 1
+        self.block_count_avail = self.block_count_total - 1
diff --git a/megatron/core/inference/contexts/dynamic_context.py b/megatron/core/inference/contexts/dynamic_context.py
index d15daa90d10..000b58200f8 100644
--- a/megatron/core/inference/contexts/dynamic_context.py
+++ b/megatron/core/inference/contexts/dynamic_context.py
@@ -1,6 +1,5 @@
 # Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
-import logging
 import math
 import warnings
 from contextlib import nullcontext
@@ -24,11 +23,14 @@
 from megatron.core.inference.utils import tensor_swap
 from megatron.core.models.common.embeddings.rope_utils import apply_rotary_pos_emb
 from megatron.core.package_info import __version__ as mcore_version
-from megatron.core.ssm.mamba_hybrid_layer_allocation import get_layer_maps_from_layer_type_list
+from megatron.core.ssm.mamba_hybrid_layer_allocation import (
+    Symbols,
+    get_layer_maps_from_layer_type_list,
+)
 from megatron.core.transformer import TransformerConfig
 from megatron.core.utils import divide as core_divide
 
-from .attention_context.mamba_metadata import MambaInferenceStateConfig, MambaMetadata
+from .attention_context.mamba_metadata import MambaMetadata
 from .attention_context.mha_metadata import GraphedMHAMetadata, NonGraphedMHAMetadata
 from .base_context import BaseInferenceContext
 from .dynamic_block_allocator import BlockAllocator
@@ -111,7 +113,7 @@ class BlockOverflowError(ContextOverflowError):
 
 class ActiveRequestCountOverflowError(ContextOverflowError):
     '''Used when `initialize_attention_state()` is called with
-    `num_warmup_requests > max_active_requests.'''
+    `num_warmup_requests > max_requests.'''
 
     def __init__(self, max_request_count, active_request_count):
         assert active_request_count > max_request_count
@@ -122,13 +124,6 @@ def __init__(self, max_request_count, active_request_count):
         )
 
 
-class TensorStateDeallocatedError(ContextOverflowError):
-    """Context's tensor state is currently deallocated, such as when the engine
-    has been suspended."""
-
-    pass
-
-
 class ContextErrorFactory:
     """Factory class for serializing/deserializing context errors."""
 
@@ -180,15 +175,6 @@ class WarmupEngineMode(Enum):
     NON_DECODE = "non_decode"
 
 
-def get_mem_size_str(n_bytes: int) -> str:
-    """Convert number of bytes to human-readable string."""
-    for exp, suffix in ((4, "TB"), (3, "GB"), (2, "MB"), (3, "KB"), (0, "bytes")):
-        nquery = int(1024**exp)
-        if round(n_bytes / nquery) >= 1:
-            return "%.3g %s" % (n_bytes / nquery, suffix)
-    raise Exception(f"something went wrong, n_bytes={n_bytes}.")
-
-
 # pylint: disable=line-too-long
 class DynamicInferenceContext(BaseInferenceContext):
     """Inference context that is passed to the main model in order
@@ -199,37 +185,64 @@ class DynamicInferenceContext(BaseInferenceContext):
     arbitrary sequence length may be added, paused, or removed from the context
     at any step. The only constraint is the maximum number of requests or tokens
     that the context is defined to support. For the block-level KV cache, a memory
-    buffer is allocated up front (size `buffer_size_gb` if `unified_memory_level`
-    == 0, or `2 * buffer_size_gb` if `unified_memory_level` == 1), that is
-    divided into blocks and dynamically assigned to requests. At any given step,
-    any unassigned blocks equate to unused space.
+    buffer is allocated up front (size `buffer_size_gb`), that is divided into
+    blocks and dynamically assigned to requests. At any given step, any unassigned
+    blocks equate to unused space.
+
+    Additionally, a fraction of the memory buffer (`gtd_request_fraction`, i.e.,
+    the 'guaranteed' request fraction) is reserved for guaranteeing that a
+    minimum number of active requests may continue to generate tokens on any step.
+    The reason for this is that the context manages two pools of requests: 1)
+    active requests, and 2) paused requests. Paused requests are requests where
+    insufficient memory blocks remain for future assignment, and these requests
+    are set aside until enough memory blocks are available. Active requests are
+    requests that have sufficient memory blocks to proceed with their generations.
+
+    The situation can arise where all requests eventually become paused due to all
+    memory blocks being assigned. In this case, there are no active requests and
+    thus no progress can be made. To handle this case, a fraction of the memory
+    buffer is reserved that only allows active requests, and no paused requests.
+    This fraction must be carefully tuned, as it can have an order of magnitude
+    impact on overall latency.
 
     Args:
         params_dtype (torch.dtype): Dtype used for KV cache.
-        num_layers (int): Number of layers on this pipeline parallel rank.
+        num_layers (int): Number of layers.
         kv_channels (int): Hidden dimension per attention head.
         num_attention_heads (int): Number of attention heads.
         max_sequence_length (int): Max possible sequence length (prompt + output)
             that will occur.
-        buffer_size_gb (float): Buffer size reserved on the GPU for the KV cache.
-            if `unified_memory_level` >= 1, then CPU memory is additionally
-            utilized, resulting in a total buffer size of `2 * buffer_size_gb`.
-            Regardless of total buffer size, the KV cache is conceptually divided
-            into 50% active requests and 50% paused requests.
-        max_tokens (int): Max number of tokens to use for forward passes. This is
-            primarily limited by prefill activation memory usage. (Defaults to
-            16384).
+        buffer_size_gb (float): Total buffer size (GB), shared by main and
+            fallback contexts.
         block_size_tokens (int): Size of KV cache block size.
+        buffer_guaranteed_fraction (float): Fraction of the memory buffer that is
+            reserved to guarantee that one or more active requests are able to
+            run to completion. Without reserving this memory, paused requests are
+            able to fill the memory buffer and block execution of any requests.
+        buffer_overflow_factor (Optional[float]): Scaling factor over the buffer
+            size for auto computing `max_requests` and `max_tokens`. This scaling
+            factor is used for fitting more requests and tokens in the memory
+            buffer than it can safely hold, which in turn increases throughput.
+        max_requests_override (Optional[int]): If set, overrides value computed
+            from `buffer_overflow_factor`.
+        max_tokens_override (Optional[int]): If set, overrides value computed
+            from `buffer_overflow_factor`.
         tensor_model_parallel_size (Optional[int]): Tensor model parallel size.
         num_cuda_graphs (Optional[int]): Maximum number of cuda graphs to capture,
-            where the cuda graph batch sizes range from 1 to `max_active_requests`
-            (as computed below). Due to rounding, the actual number of cuda graphs
-            may not equal this argument.
+            where the cuda graph batch sizes range from 1 to `max_requests` (as
+            computed below). Due to rounding, the actual number of cuda graphs may
+            not equal this argument.
         materialize_only_last_token_logits (Optional[bool]): Whether to only
             materialize logits for the last token. This should be set to False
             if returning log probs.
-        mamba_inference_state_config (Optional[MambaInferenceStateConfig]): The Mamba
-            inference state config if the model is a hybrid model.
+        layer_type_list (Optional[List[str]]): A list of strings that indicates
+            the layer type (Mamba / Attention / MLP) for each layer.
+            See `megatron/core/ssm/mamba_hybrid_layer_allocation.py` for the list
+            of symbols. This must be provided for hybrid models.
+        mamba_conv_states_shape: (Optional[Tuple[int]]): Mamba conv states shape per request.
+            This must be provided for hybrid models.
+        mamba_ssm_states_shape: (Optional[Tuple[int]]): Mamba ssm states shape per request.
+            This must be provided for hybrid models.
         use_cuda_graphs_for_non_decode_steps (bool): If True, use cuda graphs for non-decode
             engine steps.
         unified_memory_level (Optional[int]): Set unified memory usage within the
@@ -237,17 +250,10 @@ class DynamicInferenceContext(BaseInferenceContext):
             allocate `memory_buffer` in unified memory. Eventually, additional
             levels will be included to control other tensors within the context.
         use_flashinfer_fused_rope (bool): If True, use flashinfer's fused rope implementation.
-            If None, defaults to using flash-infer if available.
+        If None, defaults to using flash-infer if available.
         metrics_writer (Optional['WandbModule']): Wandb module for writing metrics.
-        num_request_metadata (Optional[int]): Number of metadata fields to track per request.
-            These represent metadata that is needed by the text generation controller,
-            and that must be kept in sync with active requests through update_requests.
     """
 
-    DEFAULT_MAX_TOKENS = 16384
-    TOKEN_ROUNDER = 64
-    REQUEST_ROUNDER = 4
-
     def __init__(
         self,
         *,
@@ -257,20 +263,24 @@ def __init__(
         num_attention_heads: int,
         max_sequence_length: int,
         buffer_size_gb: float,
-        max_tokens: int = DEFAULT_MAX_TOKENS,
+        buffer_guaranteed_fraction: float,
         block_size_tokens: int = 256,
+        buffer_overflow_factor: Optional[float] = None,
+        max_requests_override: Optional[int] = None,
+        max_tokens_override: Optional[int] = None,
         tensor_model_parallel_size: Optional[int] = None,
         cache_mla_latent: bool = False,
         kv_lora_rank: Optional[int] = None,
         qk_pos_emb_head_dim: Optional[int] = None,
         num_cuda_graphs: Optional[int] = None,
         materialize_only_last_token_logits: Optional[bool] = True,
-        mamba_inference_state_config: Optional[MambaInferenceStateConfig] = None,
+        layer_type_list: Optional[List[str]] = None,
+        mamba_conv_states_shape: Optional[Tuple[int]] = None,
+        mamba_ssm_states_shape: Optional[Tuple[int]] = None,
         use_cuda_graphs_for_non_decode_steps: bool = True,
         use_flashinfer_fused_rope: bool = False,
-        unified_memory_level: Optional[int] = 1,
+        unified_memory_level: Optional[int] = 0,
         metrics_writer: Optional['WandbModule'] = None,
-        num_request_metadata: Optional[int] = None,
     ):
         super().__init__(materialize_only_last_token_logits=materialize_only_last_token_logits)
 
@@ -288,40 +298,36 @@ def __init__(
             tp_size = parallel_state.get_tensor_model_parallel_world_size()
         else:
             tp_size = tensor_model_parallel_size
-        self.hidden_size_per_attention_head = core_divide(projection_size, num_attention_heads)
-        self.num_attention_heads_per_partition = core_divide(num_attention_heads, tp_size)
+        hidden_size_per_attention_head = core_divide(projection_size, num_attention_heads)
+        num_attention_heads_per_partition = core_divide(num_attention_heads, tp_size)
 
         # Mamba states.
-        self.is_hybrid_model = mamba_inference_state_config is not None
+        self.is_hybrid_model = layer_type_list is not None and Symbols.MAMBA in layer_type_list
         if self.is_hybrid_model:
-            mamba_conv_states_shape = mamba_inference_state_config.mamba_conv_states_shape
-            mamba_ssm_states_shape = mamba_inference_state_config.mamba_ssm_states_shape
             assert (
                 mamba_conv_states_shape is not None
             ), "`mamba_conv_states_shape` must be specified for hybrid models"
             assert (
                 mamba_ssm_states_shape is not None
             ), "`mamba_ssm_states_shape` must be specified for hybrid models"
-            assert not (
-                num_cuda_graphs is not None and use_cuda_graphs_for_non_decode_steps
+            assert (
+                not use_cuda_graphs_for_non_decode_steps
             ), "Non-decode CUDA graphs not yet supported for hybrid models"
 
             # For hybrid models, the layer map converts the global layer index to the
             # corresponding attention layer index or Mamba layer index depending on the
             # layer type.
-            attention_layer_map, mamba_layer_map, _, _ = get_layer_maps_from_layer_type_list(
-                mamba_inference_state_config.layer_type_list
+            attention_layer_map, mamba_layer_map, _ = get_layer_maps_from_layer_type_list(
+                layer_type_list
             )
             self.num_attention_layers = len(attention_layer_map)
             self.num_mamba_layers = len(mamba_layer_map)
-            self.mamba_conv_states_shape = mamba_conv_states_shape
-            self.mamba_ssm_states_shape = mamba_ssm_states_shape
             self.layer_map = attention_layer_map | mamba_layer_map
         else:
             # The layer map is the identity function for pure Transformer models.
             self.num_attention_layers = num_layers
             self.num_mamba_layers = 0
-            (self.mamba_conv_states_shape, self.mamba_ssm_states_shape) = (None, None)
+            (mamba_conv_states_shape, mamba_ssm_states_shape) = (None, None)
             self.layer_map = {i: i for i in range(self.num_attention_layers)}
 
         if self.num_attention_layers == 0:
@@ -334,12 +340,10 @@ def __init__(
         self.block_size_tokens = block_size_tokens
         if self.cache_mla_latent:
             #   one vector  c_t  (rank)  +  optional RoPE phase slice
-            self.kv_reduced_dim = kv_lora_rank + qk_pos_emb_head_dim
+            kv_reduced_dim = kv_lora_rank + qk_pos_emb_head_dim
+            self.kv_reduced_dim = kv_reduced_dim
             self.block_size_bytes = (
-                dtype_size_bytes
-                * self.num_attention_layers
-                * self.block_size_tokens
-                * self.kv_reduced_dim
+                dtype_size_bytes * num_layers * self.block_size_tokens * kv_reduced_dim
             )
         else:
             self.block_size_bytes = (
@@ -347,18 +351,62 @@ def __init__(
                 * 2  # key, value
                 * self.num_attention_layers
                 * self.block_size_tokens
-                * self.num_attention_heads_per_partition
-                * self.hidden_size_per_attention_head
+                * num_attention_heads_per_partition
+                * hidden_size_per_attention_head
             )
         assert self.block_size_bytes > 0
 
+        # Adjust buffer to be a multiple of block size.
+        buffer_size_bytes = int(buffer_size_gb * 1024**3)
+        buffer_size_bytes_rem = buffer_size_bytes % self.block_size_bytes
+        buffer_size_bytes = buffer_size_bytes - buffer_size_bytes_rem
+
         mamba_states_memory_per_request = 0
         if self.is_hybrid_model:
-            mamba_states_memory_per_request += math.prod(self.mamba_conv_states_shape)
-            mamba_states_memory_per_request += math.prod(self.mamba_ssm_states_shape)
+            mamba_states_memory_per_request += math.prod(mamba_conv_states_shape)
+            mamba_states_memory_per_request += math.prod(mamba_ssm_states_shape)
             mamba_states_memory_per_request *= self.num_mamba_layers
             mamba_states_memory_per_request *= dtype_size_bytes
 
+        # Compute max_requets, max_tokens from buffer size, overflow factor, and Mamba state size.
+        def bytes_to_max_requests_and_tokens(n_bytes):
+            bytes_per_token = self.block_size_bytes / self.block_size_tokens
+            cost_per_request_bytes = (
+                mamba_states_memory_per_request + max_sequence_length * bytes_per_token
+            )
+            # TODO(ksanthanam): Leave room for an extra request in the event of padding
+            # for non-decode CUDA graphs
+            n_requests = n_bytes / cost_per_request_bytes
+            n_tokens = n_requests * max_sequence_length
+            n_requests = self.round_up_requests(int(n_requests), tp_size=tp_size)
+            n_tokens = self.round_up_tokens(int(n_tokens), tp_size=tp_size)
+            return n_requests, n_tokens
+
+        self.max_requests, self.max_tokens = bytes_to_max_requests_and_tokens(buffer_size_bytes)
+        if buffer_overflow_factor is not None:
+            self.max_requests = self.round_up_requests(
+                int(self.max_requests * buffer_overflow_factor), tp_size=tp_size
+            )
+            self.max_tokens = self.round_up_tokens(
+                int(self.max_tokens * buffer_overflow_factor / 50.0), tp_size=tp_size
+            )
+
+        if max_requests_override is not None:
+            self.max_requests = (
+                max_requests_override
+                if max_requests_override < self.REQUEST_ROUNDER
+                else self.round_up_requests(max_requests_override, tp_size=tp_size)
+            )
+
+        if max_tokens_override is not None:
+            self.max_tokens = self.round_up_tokens(max_tokens_override, tp_size=tp_size)
+
+        self.max_requests = min(self.max_requests, self.max_tokens)  # e.g., decode only.
+
+        # Initialize context state.
+        self.params_dtype = params_dtype
+        self.max_sequence_length = max_sequence_length
+
         # Unified memory.
         self.unified_memory_level = unified_memory_level
         if unified_memory_level > 0:
@@ -371,38 +419,6 @@ def __init__(
                     )
                 self.unified_memory_level = 0
 
-        # Initialize block allocator.
-        buffer_size_bytes = int(buffer_size_gb * 1024**3)
-        block_count_total = buffer_size_bytes // (
-            self.block_size_bytes + mamba_states_memory_per_request
-        )
-        self.block_allocator = BlockAllocator(
-            context=self,
-            total_count=(
-                block_count_total if self.unified_memory_level == 0 else 2 * block_count_total
-            ),
-        )
-
-        # Set max_total_requests, max_active_requests, max_tokens.
-        self.max_total_requests = self.block_allocator.total_count - 1  # -1 for dummy block
-        self.max_active_requests = self.block_allocator.active_count
-        self.max_tokens = max_tokens or self.DEFAULT_MAX_TOKENS
-
-        assert self.max_tokens >= self.max_active_requests, (
-            f"max_tokens ({self.max_tokens}) must be >= "
-            f"max_active_requests ({self.max_active_requests}), "
-            "to have consistency between cuda graph sizes and the block table size."
-        )
-
-        # Track request metadata.
-        if num_request_metadata is None:
-            num_request_metadata = len(DynamicInferenceRequest.get_metadata_labels())
-        self.num_request_metadata = num_request_metadata
-
-        # Initialize context state.
-        self.params_dtype = params_dtype
-        self.max_sequence_length = max_sequence_length
-
         # Request and token counts.
         self.total_request_count = 0
         self.active_token_count = 0
@@ -411,19 +427,93 @@ def __init__(
         self.padded_active_request_count = None
         self.paused_tokens = None
 
+        # Per-request state.
+        self.request_ids = torch.full(
+            (self.max_requests,), -1, dtype=torch.int32, device=torch.cuda.current_device()
+        )
+        # request_query_lengths is the input prompt tokens length during prefill phase (1st step) and then 1 for the decode phase (i.e During generation)
+        self.request_query_lengths = torch.empty_like(self.request_ids)
+        # request_output_lengths is len(input_prompt_tokens) + num_tokens_to_generate
+        self.request_output_lengths = torch.empty_like(self.request_ids)
+        # request_kv_length_offsets is the same as query length during prefill phase (1st step) and then 1 for the decode phase (i.e During generation)
+        self.request_kv_length_offsets = torch.empty_like(self.request_ids)
+        self.request_kv_block_counts = torch.empty_like(self.request_ids)
+        self.request_last_kv_block_id = torch.empty_like(self.request_ids)
+        # request_last_kv_block_offset represents number of tokens in the last kv block
+        self.request_last_kv_block_offset = torch.empty_like(self.request_ids)
+
+        # Per-token state.
+        self.token_to_input_ids = torch.full(
+            (self.max_tokens,), 0, dtype=torch.long, device=torch.cuda.current_device()
+        )
+        self.token_to_pos_ids = torch.full_like(self.token_to_input_ids, 0)
+        self.token_to_request_idx = torch.empty_like(self.token_to_input_ids)
+        self.token_to_block_idx = torch.empty_like(self.token_to_input_ids)
+        # i.e For a set of tokens A B C D E F ..  and block_size 4:
+        # token_to_position_in_request is  [0, 1, 2, 3, 4, 5]
+        # token_to_local_position_within_kv_block is [0 , 1, 2, 3, 0, 1, 2]
+        self.token_to_position_in_request = torch.empty_like(self.token_to_input_ids)
+        self.token_to_local_position_within_kv_block = torch.empty_like(self.token_to_input_ids)
+
+        # Calculate the total number of chunks available in the buffer
+        total_mamba_states_memory = mamba_states_memory_per_request * self.max_requests
+        block_count_total = (
+            max(0, buffer_size_bytes - total_mamba_states_memory) // self.block_size_bytes
+        )
+
+        # Memory buffer.
+        ctx_manager = (
+            torch.cuda.use_mem_pool(self.unified_memory_mempool)
+            if self.unified_memory_level > 0
+            else nullcontext()
+        )
+        with ctx_manager:
+            if cache_mla_latent:
+                self.memory_buffer = torch.full(
+                    (
+                        self.num_attention_layers,
+                        block_count_total,
+                        self.block_size_tokens,
+                        kv_reduced_dim,
+                    ),
+                    -1,
+                    dtype=self.params_dtype,
+                    device=torch.cuda.current_device(),
+                )
+            else:
+                self.memory_buffer = torch.full(
+                    (
+                        2,  # key and value
+                        self.num_attention_layers,
+                        block_count_total,
+                        self.block_size_tokens,
+                        num_attention_heads_per_partition,
+                        hidden_size_per_attention_head,
+                    ),
+                    -1,
+                    dtype=self.params_dtype,
+                    device=torch.cuda.current_device(),
+                )
+
         # Block ids.
         self.max_kv_block_count = math.ceil(self.max_sequence_length / self.block_size_tokens)
+        self.request_to_kv_block_ids = torch.full(
+            (self.max_requests, self.max_kv_block_count),
+            -1,
+            dtype=torch.int,
+            device=torch.cuda.current_device(),
+        )
 
         # Cuda graph token-counts (i.e., token counts used by cuda-graph steps, both decode and non-decode).
         self.cuda_graph_token_counts = None
         if num_cuda_graphs is not None:
 
             # Ensure valid num_cuda_graphs.
-            num_cuda_graphs = min(max(num_cuda_graphs, 1), self.max_active_requests)
+            num_cuda_graphs = min(max(num_cuda_graphs, 1), self.max_requests)
 
             # Cuda graph step size.
             cuda_graph_rounder = 8
-            self.cuda_graph_step_size = self.max_active_requests / num_cuda_graphs
+            self.cuda_graph_step_size = self.max_requests / num_cuda_graphs
             self.cuda_graph_step_size = (
                 math.ceil(self.cuda_graph_step_size / cuda_graph_rounder) * cuda_graph_rounder
             )
@@ -432,17 +522,13 @@ def __init__(
 
             # Cuda graph token counts.
             if num_cuda_graphs == 1:
-                self.cuda_graph_token_counts = [self.max_active_requests]
+                self.cuda_graph_token_counts = [self.max_requests]
             else:
                 self.cuda_graph_token_counts = list(
-                    range(
-                        self.cuda_graph_step_size,
-                        self.max_active_requests,
-                        self.cuda_graph_step_size,
-                    )
+                    range(self.cuda_graph_step_size, self.max_requests, self.cuda_graph_step_size)
                 )
-                if self.cuda_graph_token_counts[-1] != self.max_active_requests:
-                    self.cuda_graph_token_counts.append(self.max_active_requests)
+                if self.cuda_graph_token_counts[-1] != self.max_requests:
+                    self.cuda_graph_token_counts.append(self.max_requests)
                 self.cuda_graph_token_counts.reverse()
 
             # Set used for validating active cuda graph token count.
@@ -464,205 +550,82 @@ def __init__(
         self.active_attn_metadata = None
 
         self.graph_attn_metadata["mha_metadata"] = GraphedMHAMetadata(
-            block_count_total=self.block_allocator.total_count,
+            block_count_total=block_count_total,
             max_kv_block_count=self.max_kv_block_count,
-            max_requests=self.max_total_requests,
+            max_requests=self.max_requests,
             block_size_tokens=self.block_size_tokens,
             max_seqlen=self.max_sequence_length,
         )
 
         self.non_graph_attn_metadata["mha_metadata"] = NonGraphedMHAMetadata(
-            block_count_total=self.block_allocator.total_count,
+            block_count_total=block_count_total,
             max_kv_block_count=self.max_kv_block_count,
-            max_requests=self.max_total_requests,
+            max_requests=self.max_requests,
             block_size_tokens=self.block_size_tokens,
             max_seqlen=self.max_sequence_length,
         )
 
-        # Deal with chunked prefill
-        self.chunked_prefill_request_id = -1
-
-        # FlashInfer.
-        if use_flashinfer_fused_rope is True:
-            assert HAVE_FLASHINFER, "flashinfer is not installed"
-        elif use_flashinfer_fused_rope is None:
-            use_flashinfer_fused_rope = HAVE_FLASHINFER
-        self.use_flashinfer_fused_rope = use_flashinfer_fused_rope
-
-        # Allocate GPU state.
-        self.is_tensor_state_allocated = False
-        self.allocate_all_tensors(is_init=True)
-
-        # Print info.
-        logging.info(
-            "DynamicInferenceContext: allocated context with active buffer size %s (%d blocks)."
-            % (
-                get_mem_size_str(self.block_allocator.active_count * self.block_size_bytes),
-                self.block_allocator.active_count,
-            )
-        )
-
-    def allocate_all_tensors(self, *, is_init: bool) -> None:
-        """Allocate GPU state.
-
-        This method is used for both 1) initial allocation, and 2) resuming the
-        GPU state after a suspend.
-
-        Args:
-            is_init (bool): True if this is being called from `__init__()`.
-        """
-
-        # Only allocate tensors when not using unified memory at all (level 0),
-        # or for initial allocation during `__init__()`. For levels 1 and 2, we do
-        # not perform any explicit allocations or deallocations after the initial
-        # call to `__init__()`.
-        if self.unified_memory_level != 0 and not is_init:
-            return
-
-        # Mark allocated.
-        if self.is_tensor_state_allocated:
-            return
-        self.is_tensor_state_allocated = True
-
-        # Validate no tensors allocated prior to this method.
-        for key in vars(self).keys():
-            value = getattr(self, key)
-            assert not isinstance(value, torch.Tensor), (
-                "All tensors should be allocated within `allocate_all_tensors()."
-                f"Please move tensor '{key}'."
-            )
-
-        # Per-request state.
-        self.request_ids = torch.full(
-            (self.max_total_requests,), -1, dtype=torch.int32, device=torch.cuda.current_device()
-        )
-        # request_query_lengths is the input prompt tokens length during prefill phase (1st step) and then 1 for the decode phase (i.e During generation)
-        self.request_query_lengths = torch.empty_like(self.request_ids)
-        # request_output_lengths is len(input_prompt_tokens) + num_tokens_to_generate
-        self.request_output_lengths = torch.empty_like(self.request_ids)
-        # request_kv_length_offsets is the same as query length during prefill phase (1st step) and then 1 for the decode phase (i.e During generation)
-        self.request_kv_length_offsets = torch.empty_like(self.request_ids)
-        self.request_kv_block_counts = torch.empty_like(self.request_ids)
-        self.request_last_kv_block_id = torch.empty_like(self.request_ids)
-        # request_last_kv_block_offset represents number of tokens in the last kv block
-        self.request_last_kv_block_offset = torch.empty_like(self.request_ids)
-        self.request_to_kv_block_ids = torch.full(
-            (self.max_total_requests, self.max_kv_block_count),
-            -1,
-            dtype=torch.int,
-            device=torch.cuda.current_device(),
-        )
-
-        # Track request metadata.
-        self.request_metadata = torch.empty(
-            (self.max_total_requests, self.num_request_metadata),
-            dtype=torch.float32,
-            device=torch.cuda.current_device(),
+        # Guaranteed active requests.
+        # * See details in the class docstring above. `gtd_request_fraction` is
+        #   the fraction of blocks in the memory buffer that are reserved for
+        #   guaranteeing that some number of active requests can always proceed
+        #   with their generations. The number of blocks defined by
+        #   `buffer_guaranteed_fraction * block_count_total` is converted to a
+        #   number of requests that this reserved space can safely handle
+        #   (`gtd_request_count`).
+        # * Note: computing the size of this guaranteed space from blocks rather
+        #   than bytes is safer due to the non-linear impacts of a large
+        #   `block_size_tokens` or `max_kv_block_count`. When computing from
+        #   blocks, this space will always be less than `block_count_total`. When
+        #   computing from bytes, this space can unexpectedly be much larger than
+        #   `block_count_total`, resulting in stalled generations.
+        gtd_block_count = int(buffer_guaranteed_fraction * block_count_total)
+        gtd_block_count = min(gtd_block_count, block_count_total)
+        self.gtd_request_count = max(1, gtd_block_count // self.max_kv_block_count)
+        self.gtd_block_count = self.gtd_request_count * self.max_kv_block_count
+
+        # Initialize allocator for KV memory blocks
+        self.block_allocator = BlockAllocator(
+            block_count_total=block_count_total, gtd_block_count=self.gtd_block_count
         )
 
-        # Per-token state.
-        self.token_to_input_ids = torch.full(
-            (self.max_tokens,), 0, dtype=torch.long, device=torch.cuda.current_device()
-        )
-        self.token_to_pos_ids = torch.full_like(self.token_to_input_ids, 0)
-        self.token_to_request_idx = torch.empty_like(self.token_to_input_ids)
-        self.token_to_block_idx = torch.empty_like(self.token_to_input_ids)
-        # i.e For a set of tokens A B C D E F ..  and block_size 4:
-        # token_to_position_in_request is  [0, 1, 2, 3, 4, 5]
-        # token_to_local_position_within_kv_block is [0 , 1, 2, 3, 0, 1, 2]
-        self.token_to_position_in_request = torch.empty_like(self.token_to_input_ids)
-        self.token_to_local_position_within_kv_block = torch.empty_like(self.token_to_input_ids)
-
-        # Memory buffer.
-        def allocate_memory_buffer():
-            """Allocate the memory buffer. This function is called below within
-            `with ctx_manager:`."""
-            if self.cache_mla_latent:
-                self.memory_buffer = torch.full(
-                    (
-                        self.num_attention_layers,
-                        self.block_allocator.total_count,
-                        self.block_size_tokens,
-                        self.kv_reduced_dim,
-                    ),
-                    -1,
-                    dtype=self.params_dtype,
-                    device=torch.cuda.current_device(),
-                )
-            else:
-                self.memory_buffer = torch.full(
-                    (
-                        2,  # key and value
-                        self.num_attention_layers,
-                        self.block_allocator.total_count,
-                        self.block_size_tokens,
-                        self.num_attention_heads_per_partition,
-                        self.hidden_size_per_attention_head,
-                    ),
-                    -1,
-                    dtype=self.params_dtype,
-                    device=torch.cuda.current_device(),
-                )
-
         # Optional state tensors for hybrid models
-        def allocate_mamba_states():
-            """Allocate Mamba states. This function is called below within
-            `with ctx_manager:`."""
-            if self.is_hybrid_model:
-                self.mamba_metadata = MambaMetadata(max_requests=self.max_total_requests)
+        if self.is_hybrid_model:
+            self.mamba_metadata = MambaMetadata(max_requests=self.max_requests)
+
+            with ctx_manager:
                 self.mamba_conv_states = torch.zeros(
-                    (self.num_mamba_layers, self.max_total_requests) + self.mamba_conv_states_shape,
+                    (self.num_mamba_layers, self.max_requests) + mamba_conv_states_shape,
                     dtype=self.params_dtype,
                     device=torch.cuda.current_device(),
                 )
                 self.mamba_ssm_states = torch.zeros(
-                    (self.num_mamba_layers, self.max_total_requests) + self.mamba_ssm_states_shape,
+                    (self.num_mamba_layers, self.max_requests) + mamba_ssm_states_shape,
                     dtype=self.params_dtype,
                     device=torch.cuda.current_device(),
                 )
 
-            else:
-                self.mamba_metadata = None
+        else:
+            self.mamba_metadata = None
 
-        # Allocate `ctx_manager`-managed buffers. (For currently unknown reasons,
-        # `ctx_manager` can only be used once.)
-        ctx_manager = (
-            torch.cuda.use_mem_pool(self.unified_memory_mempool)
-            if self.unified_memory_level > 0
-            else nullcontext()
-        )
-        with ctx_manager:
-            allocate_memory_buffer()
-            allocate_mamba_states()
+        # Store the dummy block idx reference for convenience
+        self.dummy_block_idx = self.block_allocator.dummy_block_idx
+
+        # Deal with chunked prefill
+        self.chunked_prefill_request_id = -1
 
         # Reset attention and Mamba state.
         self.reset_attention_state()
         self.reset_mamba_state()
 
-    def deallocate_all_tensors(self):
-        """Deallocate GPU state.
-
-        This method is used for suspending the dynamic engine.
-        """
-
-        # Only deallocate tensors when not using unified memory at all (level 0).
-        # For levels 1 and 2, we do not perform any explicit allocations or
-        # deallocations after the initial call to `__init__()`.
-        if self.unified_memory_level != 0:
-            return
-
-        # Mark deallocated.
-        if not self.is_tensor_state_allocated:
-            return
-        self.is_tensor_state_allocated = False
+        if use_flashinfer_fused_rope is True:
+            assert HAVE_FLASHINFER, "flashinfer is not installed"
+        elif use_flashinfer_fused_rope is None:
+            use_flashinfer_fused_rope = HAVE_FLASHINFER
+        self.use_flashinfer_fused_rope = use_flashinfer_fused_rope
 
-        # Delete all tensor attributes.
-        # TODO(@lmcafee): check that device == 'cuda'?
-        keys = list(vars(self).keys())
-        for key in keys:
-            value = getattr(self, key)
-            if isinstance(value, torch.Tensor):
-                delattr(self, key)
+    TOKEN_ROUNDER = 64
+    REQUEST_ROUNDER = 4
 
     @classmethod
     def round_up_tokens(cls, value, tp_size=None):
@@ -693,13 +656,13 @@ def from_config(
         max_batch_size: int,
         buffer_size_gb: float = 40,
         num_cuda_graphs: int = None,
-        mamba_inference_state_config: Optional[MambaInferenceStateConfig] = None,
     ):
         """
         Instantiate a `DynamicInferenceContext` from a `TransformerConfig` and an `InferenceWrapperConfig`.
         """
         # TODO: Add other necessary configs from inference_config
 
+        buffer_guaranteed_fraction = 0.1
         model_config = model.config
         max_sequence_length = (
             inference_config.inference_max_seq_length or model_config.max_sequence_length
@@ -707,15 +670,16 @@ def from_config(
         max_sequence_length = max(max_sequence_length, max_batch_size)
         return cls(
             params_dtype=inference_config.params_dtype,
-            num_layers=model_config.num_layers // model_config.pipeline_model_parallel_size,
+            num_layers=model_config.num_layers,
             kv_channels=model_config.kv_channels,
             num_attention_heads=model_config.num_query_groups,
             max_sequence_length=inference_config.inference_max_seq_length,
             buffer_size_gb=buffer_size_gb,
+            buffer_guaranteed_fraction=buffer_guaranteed_fraction,
             materialize_only_last_token_logits=False,
+            max_requests_override=max_batch_size,
             num_cuda_graphs=num_cuda_graphs,
             use_flashinfer_fused_rope=None,
-            mamba_inference_state_config=mamba_inference_state_config,
         )
 
     @classmethod
@@ -856,7 +820,6 @@ def key_value_cache(self, layer_number: int) -> Tuple[Tensor, Tensor]:
             to blocks within the block-level memory buffer.
         """
         attention_layer_number = self.layer_map[layer_number - 1]
-
         if self.cache_mla_latent:
             return (
                 self.memory_buffer[attention_layer_number],
@@ -1025,7 +988,7 @@ def initialize_attention_state(
         Args:
             num_warmup_tokens (Optional[int]): Number of tokens to use for
                 warming up cuda graphs. Must be less than or equal to
-                `max_active_requests`.
+                `max_requests`.
             warmup_engine_mode (WarmupEngineMode): Denote whether to setup
                 for a decode or a non-decode cuda-graph warmup.
             num_warmup_requests (Optional[int]): [DEPRECATED] Use num_warmup_tokens instead.
@@ -1045,8 +1008,8 @@ def initialize_attention_state(
 
         # warmup both decode and non-decode engine steps
         if num_warmup_tokens is not None:
-            if num_warmup_tokens > self.max_active_requests:
-                raise ActiveRequestCountOverflowError(self.max_active_requests, num_warmup_tokens)
+            if num_warmup_tokens > self.max_requests:
+                raise ActiveRequestCountOverflowError(self.max_requests, num_warmup_tokens)
 
             if warmup_engine_mode == WarmupEngineMode.NON_DECODE:
                 assert self.non_decode_cuda_graphs, "Set non-decode cuda graphs to True"
@@ -1065,9 +1028,7 @@ def initialize_attention_state(
                 math.ceil(active_token_count / self.cuda_graph_step_size)
                 * self.cuda_graph_step_size
             )
-            self.padded_active_token_count = min(
-                self.padded_active_token_count, self.max_active_requests
-            )
+            self.padded_active_token_count = min(self.padded_active_token_count, self.max_requests)
             assert (
                 self.padded_active_token_count in self.cuda_graph_token_counts_set
             ), f"padded_active_token_count: {self.padded_active_token_count} not in cuda_graph_token_counts_set: {self.cuda_graph_token_counts_set}"
@@ -1077,7 +1038,7 @@ def initialize_attention_state(
             if self.is_decode_only():
                 # For decode-only, the padded active token count cannot exceed max-requests.
                 self.padded_active_token_count = min(
-                    self.padded_active_token_count, self.max_active_requests
+                    self.padded_active_token_count, self.max_requests
                 )
 
         # How are we calculating the padded active request count?
@@ -1095,7 +1056,7 @@ def initialize_attention_state(
 
         # Update token position indexes.
         self.token_to_block_idx[self.active_token_count : self.padded_active_token_count] = (
-            self.block_allocator.dummy_block_idx
+            self.dummy_block_idx
         )
         self.token_to_local_position_within_kv_block[
             self.active_token_count : self.padded_active_token_count
@@ -1170,7 +1131,6 @@ def reset(self) -> None:
         self.request_last_kv_block_id.fill_(-1)
         self.request_last_kv_block_offset.fill_(0)
         self.request_to_kv_block_ids.fill_(-1)
-        self.request_metadata.fill_(0)
 
         # Reset token indexes.
         self.token_to_input_ids.fill_(0)
@@ -1238,20 +1198,20 @@ def last_token_logits(self, logits: Tensor) -> Tensor:
 
         return last_token_logits
 
-    def check_availability(self, req: DynamicInferenceRequest) -> (bool, bool, bool):
+    def check_availability(
+        self, req: DynamicInferenceRequest, safe: bool = False
+    ) -> (bool, bool, bool):
         """
         Check if the request can be added to the context.
         """
-        request_can_be_added = (
-            self.total_request_count - self.paused_request_count < self.max_active_requests
-        )
+        request_can_be_added = self.total_request_count < self.max_requests
         request_tokens_can_be_added = (
             self.active_token_count + req.remaining_prompt_length <= self.max_tokens
         )
         blocks = math.ceil(
             (req.remaining_prompt_length + req.finished_chunk_token_count) / self.block_size_tokens
         ) - math.ceil(req.finished_chunk_token_count / self.block_size_tokens)
-        kv_cache_available = self.block_allocator.is_memory_available(blocks)
+        kv_cache_available = self.block_allocator.is_memory_available(blocks, safe=safe)
         return request_can_be_added, request_tokens_can_be_added, kv_cache_available
 
     def add_request(self, req: DynamicInferenceRequest, chunk_length: Optional[int] = None) -> None:
@@ -1264,12 +1224,6 @@ def add_request(self, req: DynamicInferenceRequest, chunk_length: Optional[int]
         Return:
             None
         """
-
-        # If tensor state is deallocated, do not add request.
-        if not self.is_tensor_state_allocated:
-            raise TensorStateDeallocatedError(req.request_id)
-
-        # Chunk length.
         if chunk_length is None:
             chunk_length = req.remaining_prompt_length
 
@@ -1297,7 +1251,9 @@ def add_request(self, req: DynamicInferenceRequest, chunk_length: Optional[int]
         num_blocks_needed = overall_required_blocks - already_allocated_blocks
 
         if num_blocks_needed > 0:
-            new_block_ids = self.block_allocator.allocate_memory_blocks(num_blocks_needed)
+            new_block_ids = self.block_allocator.allocate_memory_blocks(
+                num_blocks_needed, safe=not is_chunked_prefill
+            )
             if new_block_ids is None or len(new_block_ids) != num_blocks_needed:
                 raise BlockOverflowError(req.request_id)
 
@@ -1315,22 +1271,13 @@ def add_request(self, req: DynamicInferenceRequest, chunk_length: Optional[int]
         else:
             current_id = self.total_request_count
 
-        if current_id >= self.max_active_requests:
+        if current_id >= self.max_requests:
             raise RequestOverflowError(req.request_id)
 
         if self.active_token_count + chunk_length > self.max_tokens:
             raise TokenOverflowError(req.request_id)
 
         self.request_ids[current_id] = req.request_id
-        # Handle request metadata.
-        metadata = req.tracked_metadata
-        assert (
-            len(metadata) == self.num_request_metadata
-        ), "Request added to context with invalid metadata length"
-        self.request_metadata[current_id] = torch.tensor(
-            metadata, dtype=torch.float32, device=self.request_metadata.device
-        )
-        # Handle length and block assignments.
         self.request_query_lengths[current_id] = chunk_length
         self.request_output_lengths[current_id] = (
             req.finished_chunk_token_count
@@ -1395,7 +1342,6 @@ def _move_book_keeping_tensors(self, src_idxs, dst_idxs, next_tokens):
         self.request_kv_length_offsets[dst_idxs] = self.request_kv_length_offsets[src_idxs]
         self.request_query_lengths[dst_idxs] = self.request_query_lengths[src_idxs]
         self.request_output_lengths[dst_idxs] = self.request_output_lengths[src_idxs]
-        self.request_metadata[dst_idxs] = self.request_metadata[src_idxs]
         self.request_ids[dst_idxs] = self.request_ids[src_idxs]
         next_tokens[dst_idxs] = next_tokens[src_idxs]
 
@@ -1416,7 +1362,6 @@ def _swap_book_keeping_tensors(self, src_idxs, dst_idxs, next_tokens):
         tensor_swap(self.request_kv_length_offsets, src_idxs, dst_idxs)
         tensor_swap(self.request_query_lengths, src_idxs, dst_idxs)
         tensor_swap(self.request_output_lengths, src_idxs, dst_idxs)
-        tensor_swap(self.request_metadata, src_idxs, dst_idxs)
         tensor_swap(self.request_ids, src_idxs, dst_idxs)
         tensor_swap(next_tokens, src_idxs, dst_idxs)
         tensor_swap(self.request_to_kv_block_ids, src_idxs, dst_idxs)
@@ -1427,14 +1372,6 @@ def _swap_book_keeping_tensors(self, src_idxs, dst_idxs, next_tokens):
         if self.is_hybrid_model:
             tensor_swap(self.mamba_metadata.request_to_mamba_state_idx, src_idxs, dst_idxs)
 
-    def get_index_of_chunked_prefill_request(self) -> int:
-        """Get the index of the chunked prefill request in the context.
-
-        Return:
-            (int) Index of the chunked prefill request, or -1 if none exists.
-        """
-        return torch.where(self.request_ids == self.chunked_prefill_request_id)[0][0]
-
     # TODO: see if we can compile this function
     def update_requests(self, active_requests_mask: Tensor, new_tokens: Tensor) -> Tensor:
         """Update context state after calling engine.step().
@@ -1452,7 +1389,7 @@ def update_requests(self, active_requests_mask: Tensor, new_tokens: Tensor) -> T
         between these request groups.
         - 0:paused_request_count -> paused requests
         - paused_request_count:total_request_count -> active requests
-        - total_request_count:max_active_requests -> completed requests are moved here.
+        - total_request_count:max_requests -> completed requests are moved here.
         The reason for maintaining contiguous tensors rather than multiple
         smaller (e.g., per-group or per-request) tensors is for both 1) speed
         (avoid unnecessary tensor allocations), and 2) compatibility with the
@@ -1476,7 +1413,6 @@ def update_requests(self, active_requests_mask: Tensor, new_tokens: Tensor) -> T
         Return:
             (Tensor) Newly paused request IDs.
         """
-
         # 1. The active token mask tells us which requests are still active and which are completed
         # active_request_count -> This corresponds to requests that have not reached EOD or max length
         # finished_request_count are requests that have reached the termination criterion
@@ -1496,9 +1432,6 @@ def update_requests(self, active_requests_mask: Tensor, new_tokens: Tensor) -> T
         # Reset attention state.
         self.reset_attention_state()
 
-        # Update total_request_count.
-        self.total_request_count = active_request_count + self.paused_request_count
-
         # 2. If no paused requests are present and no active requests we release memory and reset.
         if active_request_count + self.paused_request_count == 0:
             if finished_request_count > 0:
@@ -1591,19 +1524,13 @@ def update_requests(self, active_requests_mask: Tensor, new_tokens: Tensor) -> T
 
             if self.chunked_prefill_request_id != -1:
                 # find the id in request_ids that is the chunked_prefill_request_id. Only one request should be chunked.
-                active_requests_requiring_new_block[self.get_index_of_chunked_prefill_request()] = (
-                    0  # chunked prefill should not be paused
-                )
+                pos = torch.where(self.request_ids == self.chunked_prefill_request_id)[0][0]
+                active_requests_requiring_new_block[pos] = 0  # chunked prefill should not be paused
 
             active_requests_requiring_new_block_count = (
                 (active_requests_requiring_new_block == 1).sum().item()
             )
 
-            if active_requests_requiring_new_block_count > 0:
-                newly_paused_request_ids = self.request_ids[
-                    torch.nonzero(active_requests_requiring_new_block) + self.paused_request_count
-                ]
-
             # Swap unfinished active requests on the left side with paused requests on the right side
             # NOTE : We add paused request count because we concatenate
             # paused tokens to the left at the beginning of update requests
@@ -1636,6 +1563,7 @@ def update_requests(self, active_requests_mask: Tensor, new_tokens: Tensor) -> T
                 self._move_book_keeping_tensors(
                     src_idxs=src_idxs, dst_idxs=dst_idxs, next_tokens=next_tokens
                 )
+                newly_paused_request_ids = self.request_ids[dst_idxs]
 
             self.paused_request_count += active_requests_requiring_new_block_count
             active_request_count -= active_requests_requiring_new_block_count
@@ -1644,26 +1572,26 @@ def update_requests(self, active_requests_mask: Tensor, new_tokens: Tensor) -> T
         # We determine how many requests we can resume and resume them
         # Assign released blocks to paused requests.
         # todo: @shanmugamr, un-pause requests using FIFO, rather than LIFO.
-        resume_request_count = 0
-        if self.paused_request_count > 0:
-            active_block_count_avail = self.block_allocator.get_active_avail()
-            paused_block_counts = self.request_kv_block_counts[: self.paused_request_count]
-            paused_block_counts = paused_block_counts.flip(dims=[0])
-            paused_block_counts += 1  # +1 for newly added block
-            paused_block_counts_cumsum = paused_block_counts.cumsum(dim=0)
+        num_non_gtd_blocks = max(0, self.block_allocator.block_count_avail - self.gtd_block_count)
+        if num_non_gtd_blocks:
+            # if we have non-gtd blocks, use them. Do not dip into the gtd-block pool
+            resume_request_count = min(num_non_gtd_blocks, self.paused_request_count)
+        else:
+            # only dip into the gtd-block pool if we have run out of non-gtd-blocks and the active
+            # request count has fallen below a certain threshold.
             resume_request_count = min(
-                torch.nonzero(paused_block_counts_cumsum <= active_block_count_avail).numel(),
-                self.block_allocator.total_avail,
+                max(self.gtd_request_count - active_request_count, 0), self.paused_request_count
             )
 
         self.paused_request_count -= resume_request_count
         active_request_count += resume_request_count
         assert active_request_count > 0, "active_request_count == %d." % active_request_count
 
-        # finally, swap the chunked prefill to the end of the active requests to obey the invariance
+        # finally, swap the chunked prefill to the end of the active requests to obey the invariant
         if self.chunked_prefill_request_id != -1:
+            pos = torch.where(self.request_ids == self.chunked_prefill_request_id)[0][0]
             self._swap_book_keeping_tensors(
-                src_idxs=torch.tensor([self.get_index_of_chunked_prefill_request()]),
+                src_idxs=torch.tensor([pos]),
                 dst_idxs=torch.tensor([active_request_count + self.paused_request_count - 1]),
                 next_tokens=next_tokens,
             )
@@ -1712,7 +1640,6 @@ def update_requests(self, active_requests_mask: Tensor, new_tokens: Tensor) -> T
                 == 0
             ), "The request_last_kv_block_offset should be 0 for the requests that just got resumed this step. "
 
-            assert resume_request_count <= self.block_allocator.total_avail
             block_ids = self.block_allocator.allocate_memory_blocks(resume_request_count)
             row_idx = torch.arange(
                 self.paused_request_count,
@@ -1834,11 +1761,11 @@ def get_kvcache_utilization_stats(self) -> dict:
             }
         """
         # Total usable blocks exclude the reserved dummy block.
-        total_blocks = max(self.block_allocator.total_count - 1, 1)
-        block_count_avail = int(self.block_allocator.total_avail)
+        total_blocks = max(self.block_allocator.block_count_total - 1, 1)
+        block_count_avail = int(self.block_allocator.block_count_avail)
 
         # Overall allocated blocks in the buffer right now.
-        allocated_blocks = (self.block_allocator.total_count - 1) - block_count_avail
+        allocated_blocks = (self.block_allocator.block_count_total - 1) - block_count_avail
         allocated_blocks = int(max(0, allocated_blocks))
 
         # Active unique blocks referenced by current active requests only.
@@ -1860,6 +1787,7 @@ def get_kvcache_utilization_stats(self) -> dict:
         active_utilization = float(active_unique_blocks) / float(total_blocks)
 
         # Diagnostic helpers
+        num_non_gtd_blocks = max(0, block_count_avail - int(self.gtd_block_count))
         total_request_count = int(self.total_request_count)
         return {
             'total_blocks': int(total_blocks),
@@ -1869,9 +1797,10 @@ def get_kvcache_utilization_stats(self) -> dict:
             'active_utilization': active_utilization,
             'active_request_count': int(self.get_active_request_count()),
             'paused_request_count': int(self.paused_request_count),
+            'gtd_block_count': int(self.gtd_block_count),
             'block_count_avail': int(block_count_avail),
+            'num_non_gtd_blocks': int(num_non_gtd_blocks),
             'active_token_count': int(self.active_token_count),
             'total_request_count': int(total_request_count),
-            'max_total_requests': int(self.max_total_requests),
-            'max_active_requests': int(self.max_active_requests),
+            'max_requests': int(self.max_requests),
         }
diff --git a/megatron/core/inference/data_parallel_inference_coordinator.py b/megatron/core/inference/data_parallel_inference_coordinator.py
index e1fe7b21566..0045d5947a1 100644
--- a/megatron/core/inference/data_parallel_inference_coordinator.py
+++ b/megatron/core/inference/data_parallel_inference_coordinator.py
@@ -9,7 +9,7 @@
 
 import torch
 
-from megatron.core.inference.headers import Headers, UnknownHeaderError
+from megatron.core.inference.headers import Headers
 
 try:
     import zmq
@@ -109,8 +109,6 @@ def __init__(self, inference_coordinator_port: int, data_parallel_size: int):
             self.identities_of_data_parallel_ranks.append(identity)
         logging.info("Inference Coordinator: Connected with data parallel ranks...")
         self.data_parallel_rank_iterator = cycle(self.identities_of_data_parallel_ranks)
-        self.data_parallel_pause_acks = set()
-        self.data_parallel_stop_acks = set()
 
         self.request_id_to_client_id = {}
         self.request_id_to_client_request_id = {}
@@ -153,7 +151,7 @@ def start(self):
                 # print(f"New client connected: {sender_identity}")
                 known_clients.add(sender_identity)
                 self.router_socket.send_multipart(
-                    [sender_identity, msgpack.packb([Headers.CONNECT_ACK.value], use_bin_type=True)]
+                    [sender_identity, msgpack.packb([Headers.ACK.value], use_bin_type=True)]
                 )
 
             elif header == Headers.SUBMIT_REQUEST:
@@ -195,13 +193,7 @@ def start(self):
                         ),
                     ]
                 )
-            elif header in [
-                Headers.PAUSE,
-                Headers.UNPAUSE,
-                Headers.SUSPEND,
-                Headers.RESUME,
-                Headers.STOP,
-            ]:
+            elif header in [Headers.PAUSE, Headers.UNPAUSE, Headers.STOP]:
                 # control signals for the engine
                 # broadcast to all data parallel ranks
                 if sender_identity not in known_clients:
@@ -210,57 +202,13 @@ def start(self):
                     self.router_socket.send_multipart(
                         [data_parallel_rank_id, msgpack.packb([header.value], use_bin_type=True)]
                     )
-                if header == Headers.UNPAUSE:
-                    self.data_parallel_pause_acks = set()
-            elif header == Headers.PAUSE_ACK:
-                # control signal ack from the engine
-                assert sender_identity in self.identities_of_data_parallel_ranks
-                assert sender_identity not in self.data_parallel_pause_acks
-                self.data_parallel_pause_acks.add(sender_identity)
-                # route to all clients only once we have gotten an ack from all data parallel ranks
-                if len(self.data_parallel_pause_acks) == self.data_parallel_size:
-                    for client_id in known_clients:
-                        self.router_socket.send_multipart(
-                            [
-                                client_id,
-                                msgpack.packb([header.value, sender_identity], use_bin_type=True),
-                            ]
-                        )
-                    for data_parallel_rank_id in self.identities_of_data_parallel_ranks:
-                        self.router_socket.send_multipart(
-                            [
-                                data_parallel_rank_id,
-                                msgpack.packb([Headers.PAUSE_ACK.value], use_bin_type=True),
-                            ]
-                        )
-            elif header == Headers.STOP_ACK:
-                # control signal ack from the engine
-                assert sender_identity in self.identities_of_data_parallel_ranks
-                assert sender_identity not in self.data_parallel_stop_acks
-                self.data_parallel_stop_acks.add(sender_identity)
-                # route to all clients only once we have gotten an ack from all data parallel ranks
-                if len(self.data_parallel_stop_acks) == self.data_parallel_size:
-                    for client_id in known_clients:
-                        self.router_socket.send_multipart(
-                            [
-                                client_id,
-                                msgpack.packb([header.value, sender_identity], use_bin_type=True),
-                            ]
-                        )
-                    for data_parallel_rank_id in self.identities_of_data_parallel_ranks:
-                        self.router_socket.send_multipart(
-                            [
-                                data_parallel_rank_id,
-                                msgpack.packb([Headers.STOP_ACK.value], use_bin_type=True),
-                            ]
-                        )
             elif header == Headers.ENGINE_REPLY:
                 # This is the output of a single engine step on some data parallel rank.
                 assert sender_identity in self.identities_of_data_parallel_ranks
-                finished_request_records = deserialized_payload[1]
+                finished_requests = deserialized_payload[1]
 
-                for finished_request_record in finished_request_records:
-                    fid = finished_request_record["requests"][0]["request_id"]
+                for finished_request in finished_requests:
+                    fid = finished_request["request_id"]
                     client_identity = self.request_id_to_client_id[fid]
                     client_request_identity = self.request_id_to_client_request_id[fid]
                     del self.request_id_to_client_id[fid]
@@ -270,15 +218,11 @@ def start(self):
                         [
                             client_identity,
                             msgpack.packb(
-                                [header.value, client_request_identity, finished_request_record],
-                                use_bin_type=True,
+                                [client_request_identity, finished_request], use_bin_type=True
                             ),
                         ]
                     )
 
-            else:
-                raise UnknownHeaderError(header)
-
     @classmethod
     def entrypoint(
         cls, ready_event: Event, inference_coordinator_port: int, data_parallel_size: int
diff --git a/megatron/core/inference/engines/__init__.py b/megatron/core/inference/engines/__init__.py
index d6a4f6eb694..9cd902d9d63 100644
--- a/megatron/core/inference/engines/__init__.py
+++ b/megatron/core/inference/engines/__init__.py
@@ -1,5 +1,5 @@
 # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
 
 from .abstract_engine import AbstractEngine
-from .dynamic_engine import DynamicInferenceEngine, EngineSuspendedError
+from .dynamic_engine import DynamicInferenceEngine
 from .static_engine import StaticInferenceEngine
diff --git a/megatron/core/inference/engines/dynamic_engine.py b/megatron/core/inference/engines/dynamic_engine.py
index 5fad1369308..4bff4f85fa8 100644
--- a/megatron/core/inference/engines/dynamic_engine.py
+++ b/megatron/core/inference/engines/dynamic_engine.py
@@ -4,13 +4,10 @@
 import logging
 import multiprocessing
 import os
-import socket
 import struct
 import time
 import warnings
 from collections import deque
-from contextlib import contextmanager
-from dataclasses import dataclass
 from datetime import datetime
 from itertools import repeat
 from typing import Dict, List, Optional, Tuple, Union
@@ -30,19 +27,14 @@
     DataParallelInferenceCoordinator,
 )
 from megatron.core.inference.engines.abstract_engine import AbstractEngine
-from megatron.core.inference.headers import Headers, UnknownHeaderError
-from megatron.core.inference.inference_request import (
-    DynamicInferenceRequest,
-    DynamicInferenceRequestRecord,
-    Status,
-)
+from megatron.core.inference.headers import Headers
+from megatron.core.inference.inference_request import DynamicInferenceRequest, Status
 from megatron.core.inference.sampling_params import SamplingParams
 from megatron.core.inference.text_generation_controllers.text_generation_controller import (
     TextGenerationController,
 )
 from megatron.core.inference.utils import Counter, await_process_event
-from megatron.core.transformer.cuda_graphs import delete_cuda_graphs
-from megatron.core.utils import get_asyncio_loop, internal_api, trace_async_exceptions
+from megatron.core.utils import get_asyncio_loop, trace_async_exceptions
 
 try:
     from tqdm import tqdm
@@ -73,19 +65,6 @@
     HAVE_WANDB = False
     wandb = None
 
-try:
-    import psutil
-
-    HAVE_PSUTIL = True
-except ImportError:
-    HAVE_PSUTIL = False
-
-
-class EngineSuspendedError(Exception):
-    """Engine is currently suspended and not performing steps."""
-
-    pass
-
 
 def format_mem_bytes(mem_bytes):
     """Convert a byte count to a human-readable string in tb, gb, mb, kb, or bytes."""
@@ -96,14 +75,6 @@ def format_mem_bytes(mem_bytes):
     return "%d bytes" % mem_bytes
 
 
-@dataclass(kw_only=True)
-class RequestEntry:
-    """Entry in the engine's `self.requests` dict."""
-
-    record: DynamicInferenceRequestRecord
-    future: asyncio.Future
-
-
 # pylint: disable=line-too-long
 class DynamicInferenceEngine(AbstractEngine):
     """The dynamic inference engine.
@@ -123,6 +94,9 @@ class DynamicInferenceEngine(AbstractEngine):
             batching and a dynamic block-level KV cache (similar to paged attention).
         random_seed (Optional[int]): Use a random seed if you want deterministic
             results. Defaults to None.
+        static_sampling (bool): If True, all requests are assumed to have the same
+            sampling parameters. This avoids needing to loop through all requests and
+            their sampling parameters every generation step, improving latency.
         inference_logging_step_interval (int): The step interval at which to log
         inference metrics to wandb. Defaults to 0, which means no logging.
     """
@@ -136,9 +110,17 @@ def __init__(
         *,
         track_paused_request_events: bool = False,
         enable_chunked_prefill: bool = True,
+        static_sampling: bool = False,
         inference_logging_step_interval: int = 0,
     ):
 
+        if enable_cuda_graph is not None:
+            warnings.warn(
+                "The `enable_cuda_graph` argument is deprecated and will be "
+                "removed in `megatron-core 0.15`. `enable_cuda_graph` is now "
+                "read directly from the transformer config object."
+            )
+
         assert isinstance(
             controller, TextGenerationController
         ), f"controller must be a TextGenerationController, got {type(controller)}"
@@ -147,41 +129,31 @@ def __init__(
         ), f"context must be a DynamicInferenceContext, got {type(context)}"
         assert isinstance(random_seed, int), f"random_seed must be an int, got {type(random_seed)}"
 
-        # Deprecate `enable_cuda_graph`.
-        if enable_cuda_graph is not None:
-            warnings.warn(
-                "The `enable_cuda_graph` argument is deprecated and will be "
-                "removed in `megatron-core 0.15`. `enable_cuda_graph` is now "
-                "read directly from the transformer config object."
-            )
-            self.enable_cuda_graph = enable_cuda_graph
-        else:
-            self.enable_cuda_graph = (
-                controller.inference_wrapped_model.model.config.enable_cuda_graph
-            )
-
-        # Initialization options.
+        self.request_counter = Counter()
         self.controller = controller
         self.context = context
         self.random_seed = random_seed
         self.track_paused_request_events = track_paused_request_events
+        self.step_count = 0
+        self.finished_request_count = 0
+        self.waiting_request_ids = deque()
+        self.failed_request_ids = []  # deque()
+        self.request_counter = Counter()
+        self.requests: Dict[int, DynamicInferenceRequest] = {}
+        self.request_completion_futures: Dict[int, asyncio.Future] = {}
+        self.step_start_event = torch.cuda.Event(enable_timing=True)
+        self.step_end_event = torch.cuda.Event(enable_timing=True)
+        self.paused = False
+        self.stopped = False
         self.enable_chunked_prefill = enable_chunked_prefill
-        self.inference_logging_step_interval = inference_logging_step_interval
-        self.unified_memory_level = context.unified_memory_level
-
-        if enable_cuda_graph is not None:
-            self.cuda_graph_impl = "local" if enable_cuda_graph else "none"
-        else:
-            self.cuda_graph_impl = controller.inference_wrapped_model.model.config.cuda_graph_impl
-
-        # Initialize engine.
-        self.reset()
+        self.static_sampling = static_sampling
 
+        self.inference_logging_step_interval = inference_logging_step_interval
         # Configure wandb to use separate step counter for inference metrics (only once)
         if self.inference_logging_step_interval > 0 and self.context.metrics_writer is not None:
             logging.info(
                 f"\033[1;93m[INFERENCE]\033[0m "
-                f"\033[1;95mLogging inference metrics to wandb (rank {self.rank})\033[0m"
+                f"\033[1;95mLogging inference metrics to wandb (rank {torch.distributed.get_rank()})\033[0m"
             )
             if HAVE_WANDB and self.context.metrics_writer.__name__ == "wandb":
                 # Make all inference/* metrics use inference_step as their x-axis
@@ -202,43 +174,21 @@ def __init__(
                             max_step = int(val)
                     self.inference_step_offset = int(max_step)
 
-        # Create cuda graphs.
-        self.create_cuda_graphs()
-
-    def reset(self) -> None:
-        """Reset by removing all requests and reset all state."""
-
-        self.context.reset()
-
-        # Request state.
-        self.request_counter = Counter()
-        self.finished_request_count = 0
-
-        self.requests: Dict[int, RequestEntry] = {}
-        self.waiting_request_ids = deque()
-        self.failed_request_ids = []
+        # Initialize the asyncio loop if it has not already been initialized.
+        # TODO: Start the engine loop here.
+        self._loop = get_asyncio_loop()
+        self._cond = asyncio.Condition()
 
-        # Timing and logging variables.
-        self.rank = torch.distributed.get_rank()
-        self.step_count = 0
-        self.step_start_event = torch.cuda.Event(enable_timing=True)
-        self.step_end_event = torch.cuda.Event(enable_timing=True)
+        # Capture cuda graph.
         self.capture_stats = None
 
-        # Runtime state.
-        self._loop = get_asyncio_loop(getattr(self, "_loop", None))
-        self._cond = asyncio.Condition()
-        self.running = asyncio.Event()
-        self.paused = asyncio.Event()
-        self.stopped = asyncio.Event()
-        self.received_pause: bool = False
-        self.received_stop: bool = False
-        self.suspend_signal = False
-        self.is_suspended = False
-        self.resume_request_ids = None
-
-        # Coordinator state.
-        self.use_coordinator = False
+        if enable_cuda_graph is not None:
+            self.cuda_graph_impl = "local" if enable_cuda_graph else "none"
+        else:
+            self.cuda_graph_impl = controller.inference_wrapped_model.model.config.cuda_graph_impl
+
+        if self.cuda_graph_impl == "local":
+            self.create_cuda_graphs()
 
     def create_cuda_graphs(self, reset_context: bool = True):
         """Create cuda graphs.
@@ -249,10 +199,6 @@ def create_cuda_graphs(self, reset_context: bool = True):
         Args:
             reset_context (bool): Whether to reset the context after building cuda graphs.
         """
-
-        if self.cuda_graph_impl != "local":
-            return
-
         context = self.context
         controller = self.controller
 
@@ -261,7 +207,7 @@ def create_cuda_graphs(self, reset_context: bool = True):
 
         if moe_pad_experts and context.non_decode_cuda_graphs:
             context.non_decode_cuda_graphs = False
-            if self.rank == 0:
+            if torch.distributed.get_rank() == 0:
                 warnings.warn(
                     "MoE models do not support non-decode cuda graphs. "
                     "Forcing non_decode_cuda_graphs to False."
@@ -346,12 +292,10 @@ def create_cuda_graphs(self, reset_context: bool = True):
 
         self.capture_stats = capture_stats
 
-    @internal_api
     async def start_listening_to_data_parallel_coordinator(
         self,
         inference_coordinator_port: int,
         launch_inference_coordinator: bool = True,
-        verbose: bool = False,
         *,
         loop: Optional[asyncio.AbstractEventLoop] = None,
     ):
@@ -362,18 +306,16 @@ async def start_listening_to_data_parallel_coordinator(
         `InferenceCoordinator`. It configures different ZMQ socket patterns
         based on the rank's role within the distributed topology.
 
-        Note that this method must be called on all ranks, as it uses blocking torch broadcasts.
-
         The setup involves two primary roles within each data-parallel group:
-        1.  **MP Coordinator (TP_rank=0, PP_rank=0)**: This rank connects directly
+        1.  **TP Coordinator (TP_rank=0, PP_rank=0)**: This rank connects directly
             to the central coordinator via a ZMQ `DEALER` socket. It receives
             requests and uses a ZMQ `PUB` (publisher) socket to broadcast them
-            to all other ranks within its model-parallel (MP) group.
-        2.  **MP Workers (all other ranks)**: These ranks use ZMQ `SUB` (subscriber)
-            sockets to listen for requests broadcast by their local MP Coordinator.
+            to all other ranks within its tensor-parallel (TP) group.
+        2.  **TP Workers (all other ranks)**: These ranks use ZMQ `SUB` (subscriber)
+            sockets to listen for requests broadcast by their local TP Coordinator.
 
-        This architecture uses TCP sockets for both inter-node and intra-node broadcasts
-        within an MP group.
+        This architecture uses fast Inter-Process Communication (`ipc`) sockets for
+        intra-node broadcasts within a TP group.
 
         Finally, after setting up the communication channels and ensuring all ranks
         are synchronized, this method starts the main engine processing loop
@@ -385,7 +327,12 @@ async def start_listening_to_data_parallel_coordinator(
             launch_inference_coordinator (bool, optional): If True, the global rank 0
                 process will spawn and manage the `InferenceCoordinator`
                 process. Defaults to True.
-            verbose (bool): Whether to run in verbose mode.
+
+        Note:
+            The current implementation uses `ipc` sockets for broadcasting requests
+            within a Tensor Parallel group, which limits each TP group to a single
+            physical node. For example, if you have 8 GPUs per node, then this will only
+            work with TP=[1,2,4,8]
         """
 
         assert HAVE_ZMQ, (
@@ -396,25 +343,7 @@ async def start_listening_to_data_parallel_coordinator(
             "pip install msgpack"
         )
 
-        self.zmq_context = zmq.Context().instance()
-        self.zmq_sockets = []  # keep track of all sockets created by this engine
-
-        # Get world info.
-        dp_group = parallel_state.get_data_parallel_group()
-        dp_src = parallel_state.get_data_parallel_src_rank()
-        dp_size = parallel_state.get_data_parallel_world_size()
-        dp_rank = parallel_state.get_data_parallel_rank()
-
-        mp_group = parallel_state.get_model_parallel_group()
-        mp_src = parallel_state.get_model_parallel_src_rank()
-        tp_rank = parallel_state.get_tensor_model_parallel_rank()
-        pp_rank = parallel_state.get_pipeline_model_parallel_rank()
-
-        self.is_mp_coordinator = tp_rank == 0 and pp_rank == 0
-        self.is_dp_coordinator = (dp_rank == 0) and self.is_mp_coordinator
-
-        # Spawn a DP coordinator process and get the connection info.
-        if launch_inference_coordinator and self.is_dp_coordinator:
+        if launch_inference_coordinator and torch.distributed.get_rank() == 0:
             spawn_context = multiprocessing.get_context('spawn')
             coordinator_ready_event = spawn_context.Event()
             self.inference_coordinator_process = spawn_context.Process(
@@ -427,223 +356,67 @@ async def start_listening_to_data_parallel_coordinator(
             )
             self.inference_coordinator_process.start()
 
-        # Find available ports for MP and bind to them.
-        if self.is_mp_coordinator:
-            local_ip = socket.gethostname()
-            mp_req_sock = self.zmq_context.socket(zmq.PUB)
-            mp_req_sock.bind_to_random_port(f"tcp://{local_ip}")
-            mp_req_addr = mp_req_sock.getsockopt_string(zmq.LAST_ENDPOINT)
-
-            mp_len_sock = self.zmq_context.socket(zmq.PUB)
-            mp_len_sock.bind_to_random_port(f"tcp://{local_ip}")
-            mp_len_addr = mp_len_sock.getsockopt_string(zmq.LAST_ENDPOINT)
-        else:
-            mp_req_addr = None
-            mp_len_addr = None
-
-        # Broadcast addresses to respective ranks.
-        bcast = [mp_req_addr, mp_len_addr]
-        torch.distributed.broadcast_object_list(bcast, src=mp_src, group=mp_group)
-        [mp_req_addr, mp_len_addr] = bcast
-
+        # Todo [Siddharth]: can we move this code to another file?
+        self.zmq_context = zmq.Context()
+        self.zmq_sockets = []  # keep track of all sockets created by this engine
         ip_address_of_dp_coordinator = os.getenv('MASTER_ADDR', '127.0.0.1')
-        dp_addr = f"tcp://{ip_address_of_dp_coordinator}:{inference_coordinator_port}"
-        identity = f'mp-coord-{dp_rank}'
-        if self.is_mp_coordinator:
+        identity = f'tp-coord-{parallel_state.get_data_parallel_rank()}'
+        if (
+            parallel_state.get_tensor_model_parallel_rank() == 0
+            and parallel_state.get_pipeline_model_parallel_rank() == 0
+        ):
             # 1. Create dealer sockets where tp_rank = 0 and pp_rank = 0
             #    These will receive requests from an InferenceCoordinator.
             self.socket_for_receiving_requests = self.zmq_context.socket(zmq.DEALER)
 
             self.socket_for_receiving_requests.setsockopt(zmq.IDENTITY, identity.encode('utf-8'))
-            self.socket_for_receiving_requests.connect(dp_addr)
+            self.socket_for_receiving_requests.connect(
+                f"tcp://{ip_address_of_dp_coordinator}:{inference_coordinator_port}"
+            )
 
             # send empty string. this is used to register with the coordinator.
             self.socket_for_receiving_requests.send(b"")
 
             # 2. Create a publisher socket. This is used to publish or broadcast
-            #    requests within the model parallel group
-            self.model_parallel_publisher_socket = mp_req_sock
+            #    requests within the tensor parallel group
+            self.tensor_parallel_publisher_socket = self.zmq_context.socket(zmq.PUB)
+            self.tensor_parallel_publisher_socket.bind(f"ipc:///tmp/{identity}-tp-bcast-socket-req")
 
             # 3. Create another publisher socket to broadcast the number of messages to receive.
-            self.model_parallel_num_msgs_publisher_socket = mp_len_sock
+            self.tensor_parallel_num_msgs_publisher_socket = self.zmq_context.socket(zmq.PUB)
+            self.tensor_parallel_num_msgs_publisher_socket.bind(
+                f"ipc:///tmp/{identity}-tp-bcast-socket-len"
+            )
             self.zmq_sockets += [
                 self.socket_for_receiving_requests,
-                self.model_parallel_num_msgs_publisher_socket,
-                self.model_parallel_publisher_socket,
+                self.tensor_parallel_num_msgs_publisher_socket,
+                self.tensor_parallel_publisher_socket,
             ]
-        # All MP ranks subscribe to the two publisher sockets
-        self.model_parallel_subscriber_socket = self.zmq_context.socket(zmq.SUB)
-        self.model_parallel_subscriber_socket.connect(mp_req_addr)
-        self.model_parallel_subscriber_socket.setsockopt_string(zmq.SUBSCRIBE, "")
-
-        self.model_parallel_num_msgs_subscriber_socket = self.zmq_context.socket(zmq.SUB)
-        self.model_parallel_num_msgs_subscriber_socket.connect(mp_len_addr)
-        self.model_parallel_num_msgs_subscriber_socket.setsockopt_string(zmq.SUBSCRIBE, "")
+        # All TP ranks subscribe to the two publisher sockets
+        self.tensor_parallel_subscriber_socket = self.zmq_context.socket(zmq.SUB)
+        self.tensor_parallel_subscriber_socket.connect(f"ipc:///tmp/{identity}-tp-bcast-socket-req")
+        self.tensor_parallel_subscriber_socket.setsockopt_string(zmq.SUBSCRIBE, "")
+
+        self.tensor_parallel_num_msgs_subscriber_socket = self.zmq_context.socket(zmq.SUB)
+        self.tensor_parallel_num_msgs_subscriber_socket.connect(
+            f"ipc:///tmp/{identity}-tp-bcast-socket-len"
+        )
+        self.tensor_parallel_num_msgs_subscriber_socket.setsockopt_string(zmq.SUBSCRIBE, "")
 
         self.zmq_sockets += [
-            self.model_parallel_subscriber_socket,
-            self.model_parallel_num_msgs_subscriber_socket,
+            self.tensor_parallel_subscriber_socket,
+            self.tensor_parallel_num_msgs_subscriber_socket,
         ]
 
-        torch.distributed.barrier(mp_group)
+        torch.distributed.barrier(parallel_state.get_tensor_model_parallel_group())
 
-        if launch_inference_coordinator and self.is_dp_coordinator:
+        if launch_inference_coordinator and torch.distributed.get_rank() == 0:
             await await_process_event(coordinator_ready_event, self.inference_coordinator_process)
             logging.info("Inference co-ordinator is ready to receive requests!")
 
         # Finally run the engine infinite loop
         loop = get_asyncio_loop(loop)
-        self.engine_loop_task = loop.create_task(
-            self.run_engine_with_coordinator(loop=loop, verbose=verbose)
-        )
-
-    @contextmanager
-    @staticmethod
-    def suspend_resume_ctx(key: str, *, unified_memory_level: int) -> None:
-        """Context manager for of suspending and resuming the engine.
-
-        This context manager records the time and memory usage when suspending
-        and resuming the context. TODO(@lmcafee): add argument to optionally
-        return nullcontext, to avoid overhead.
-
-        Args:
-            key (str): Key that identifies caller (e.g., 'suspend' or 'resume').
-
-        Return:
-            None.
-        """
-
-        try:
-
-            start_mem = torch.cuda.memory_stats()
-            start_time = time.time()
-            torch.cuda.synchronize()
-
-            yield
-
-        finally:
-
-            end_time = time.time()
-
-            end_mem = torch.cuda.memory_stats()
-            start_mem_alloc = start_mem["allocated_bytes.all.current"]
-            end_mem_alloc = end_mem["allocated_bytes.all.current"]
-            start_mem_res = start_mem["reserved_bytes.all.current"]
-            end_mem_res = end_mem["reserved_bytes.all.current"]
-
-            rank_str = torch.distributed.get_rank()
-            dir_str = "deallocating" if end_mem_alloc <= start_mem_alloc else "allocating"
-            relative_time_str = f"{end_time - start_time:.3f} sec"
-            relative_mem_str = f"{abs(start_mem_alloc - end_mem_alloc) / 1024**3:.1f} gb"
-
-            if HAVE_PSUTIL:
-                process = psutil.Process()
-                mem_info = process.memory_info()
-                cpu_mem_str = f"{mem_info.rss / 1024**3:.1f} gb"
-            else:
-                cpu_mem_str = "--"
-
-            total_mem_str = ", ".join(
-                (
-                    f"cpu: {cpu_mem_str}",
-                    f"gpu: alloc {end_mem_alloc / 1024**3:.1f} gb",
-                    f"res {end_mem_res / 1024**3:.1f} gb",
-                )
-            )
-            logging.info(
-                f"[rank {rank_str}] dynamic engine {key}, "
-                f"unified {unified_memory_level}, "
-                f"{dir_str} "
-                f"{relative_mem_str} in {relative_time_str} ... "
-                f"abs mem usage: {total_mem_str}"
-            )
-
-    def suspend(self):
-        """Suspend engine by deallocating context's GPU state."""
-
-        # Skip if already suspended, which can happen when using the inference
-        # coordinator.
-        if self.is_suspended:
-            return
-        self.is_suspended = True
-
-        # Deallocate context tensors.
-        with self.__class__.suspend_resume_ctx(
-            "suspended", unified_memory_level=self.unified_memory_level
-        ):
-            self.context.deallocate_all_tensors()
-
-        # Delete cuda graphs when not using unified memory at all (level 0). For
-        # levels 1 and 2, the context's tensors maintain static memory addresses,
-        # so the cuda graphs are re-used.
-        if self.unified_memory_level == 0:
-            delete_cuda_graphs()
-
-        # Maintain references to requests before reset.
-        waiting_request_ids = list(self.waiting_request_ids)
-        active_request_ids = set(self.requests.keys()) - set(waiting_request_ids)
-        self.resume_request_ids = [*active_request_ids, *waiting_request_ids]
-        self.waiting_request_ids.clear()
-
-        # Suspend requests objects.
-        for request_id in active_request_ids:
-            self.requests[request_id].record.suspend(self.controller.tokenizer)
-
-    def resume(self):
-        """Resume engine by reallocating context's GPU state."""
-
-        # Skip if not suspended, which can happen when using the inference
-        # coordinator.
-        if not self.is_suspended:
-            return
-        self.is_suspended = False
-
-        # Resume.
-        with self.__class__.suspend_resume_ctx(
-            "resumed", unified_memory_level=self.unified_memory_level
-        ):
-
-            # Allocate context tensors.
-            alloc_time = time.time()
-            torch.cuda.synchronize()
-            self.context.allocate_all_tensors(is_init=False)
-            torch.cuda.synchronize()
-            alloc_time = time.time() - alloc_time
-
-            # Reset context and request data.
-            self.context.reset()
-
-            # Create cuda graphs (before adding requests, to be in decode mode).
-            # Only create cuda graphs when not using unified memory at all (level
-            # 0). For levels 1 and 2, the context's tensors maintain static
-            # memory addresses, so the cuda graphs are re-used.
-            capture_time = time.time()
-            if self.unified_memory_level == 0:
-                self.create_cuda_graphs()
-            capture_time = time.time() - capture_time
-
-            # Add requests.
-            add_time = time.time()
-            torch.cuda.synchronize()
-            for request_id in self.resume_request_ids:
-                self._add_request(self.get_request(request_id))
-            torch.cuda.synchronize()
-            add_time = time.time() - add_time
-
-        # Print inner timing (must be outside context manager above for correct formatting).
-        logging.info(
-            "    > "
-            + ", ".join(
-                (
-                    f"inner timing: alloc {alloc_time:.3f}",
-                    f"add {add_time:.3f}",
-                    f"capture {capture_time:.3f}.",
-                )
-            )
-        )
-
-        # Notify event loop.
-        self._loop.call_soon_threadsafe(asyncio.create_task, self._notify_cond_for_new_request())
+        self.engine_loop_task = loop.create_task(self.run_engine_with_coordinator(loop=loop))
 
     @trace_async_exceptions
     async def _notify_cond_for_new_request(self):
@@ -655,31 +428,19 @@ def has_unfinished_requests(self) -> bool:
         """Test if context contains unfinished requests."""
         return self.context.has_unfinished_requests() or len(self.waiting_request_ids) > 0
 
-    def get_request(self, request_id: int) -> DynamicInferenceRequest:
-        """Get most recent request from a request record.
-
-        Args:
-            request_id (int): Request id.
-
-        Returns:
-            (DynamicInferenceRequest) The most recent request in the record.
-        """
-        return self.requests[request_id].record[-1]
+    def reset(self) -> None:
+        """Reset by removing all requests and reset all state."""
+        self.context.reset()
+        self.waiting_request_ids.clear()
+        self.step_count = 0
+        self.finished_request_count = 0
 
     def _add_request(
         self, request: DynamicInferenceRequest
     ) -> asyncio.Future[DynamicInferenceRequest]:
 
         request_id = request.request_id
-
-        # Add request to self.requests. If the engine has previously been
-        # suspended, then the request may already exist.
-        if request_id not in self.requests:
-            self.requests[request_id] = RequestEntry(
-                record=DynamicInferenceRequestRecord.from_request(request),
-                future=self._loop.create_future(),
-            )
-
+        self.requests[request_id] = request
         if request.status is None:
             request.status = Status.ACTIVE_AND_GENERATING_TOKENS
 
@@ -695,17 +456,6 @@ def _add_request(
             request.sampling_params.num_tokens_to_generate = self.context.max_sequence_length - len(
                 request.prompt_tokens
             )
-        if request.sampling_params.termination_id is None:
-            try:
-                eod = self.controller.tokenizer.eod
-            except AttributeError:
-                if self.rank == 0:
-                    warnings.warn(
-                        "Termination ID not specified, and tokenizer does not define eod."
-                        "Defaulting to not using termination id."
-                    )
-                eod = -1
-            request.sampling_params.termination_id = eod
 
         if (
             len(request.prompt_tokens) + request.sampling_params.num_tokens_to_generate
@@ -720,10 +470,10 @@ def _add_request(
 
         if request.status != Status.FAILED:
             self.waiting_request_ids.append(request_id)
-        else:
-            self.failed_request_ids.append(request_id)
 
-        return self.requests[request_id].future
+        # Create a new asyncio Future to notify the user when the request has completed.
+        self.request_completion_futures[request_id] = self._loop.create_future()
+        return self.request_completion_futures[request_id]
 
     def add_request(
         self,
@@ -741,6 +491,7 @@ def add_request(
         Return:
             Returns an asyncio `Future[DynamicInferenceRequest]` for the user to wait on.
         """
+
         prompt_str = None
         # Tokenize prompt if text.
         if isinstance(prompt, str):
@@ -769,8 +520,8 @@ def add_request(
 
         # Initialize request.
         request = DynamicInferenceRequest(
-            request_id=request_id,
             prompt=prompt_str,
+            request_id=request_id,
             prompt_tokens=tokens,
             sampling_params=sampling_params,
         )
@@ -799,9 +550,9 @@ def post_process_requests(
         Returns:
             A list of active requests and completed requests as `DynamicInferenceRequest` objects
         """
-        active_request_ids: list[int] = []
+        active_requests: List[DynamicInferenceRequest] = []
+        finished_requests: List[DynamicInferenceRequest] = []
         finished_request_ids = set(finished_request_ids.tolist())
-        finished_request_records: list[DynamicInferenceRequestRecord] = []
         self.finished_request_count += len(finished_request_ids)
 
         log_probs_iter = log_probs if log_probs else repeat(None)
@@ -809,7 +560,7 @@ def post_process_requests(
         for request_id, token, request_log_probs in zip(
             request_ids.tolist(), sample.tolist(), log_probs_iter
         ):
-            request: DynamicInferenceRequest = self.get_request(request_id)
+            request: DynamicInferenceRequest = self.requests[request_id]
             if request_id != self.context.chunked_prefill_request_id:
                 request.generated_tokens.append(token)
                 if request.tpot is None:
@@ -843,20 +594,19 @@ def post_process_requests(
                 if request_id in finished_request_ids:
                     request.generated_length = len(request.generated_tokens)
                     request.status = Status.COMPLETED
-                    finished_entry = self.requests.pop(request_id)
-                    finished_request = finished_entry.record[-1]
+                    finished_request = self.requests.pop(request_id)
                     if finished_request.prompt is None:
                         finished_request.prompt = self.controller.tokenizer.detokenize(
                             finished_request.prompt_tokens.tolist()
                         )
                     finished_request.generated_length = len(finished_request.generated_tokens)
+                    finished_requests.append(finished_request)
                     finished_request.generated_text = self.controller.tokenizer.detokenize(
                         finished_request.generated_tokens
                     )
-                    finished_request_records.append(finished_entry.record)
-                    finished_entry.future.set_result(finished_entry.record)
+                    self.request_completion_futures[request_id].set_result(finished_request)
                 else:
-                    active_request_ids.append(request_id)
+                    active_requests.append(request)
             else:
                 # The chunked prefill produces useless tokens
                 # so we are not appending them to the generated tokens.
@@ -874,9 +624,9 @@ def post_process_requests(
                             request.prompt_log_probs = []
                         request.prompt_log_probs.extend(request_log_probs)
                         request.generated_log_probs = []
-                    active_request_ids.append(request_id)
+                    active_requests.append(request)
 
-        return active_request_ids, finished_request_records
+        return active_requests, finished_requests
 
     def schedule_waiting_requests(self):
         """Tries to schedule any requests in the waiting pool."""
@@ -890,9 +640,9 @@ def schedule_non_chunked_prefill(self):
         Perform the same original scheduling logic for non-chunked runs
         """
         while self.waiting_request_ids:
-            req = self.get_request(self.waiting_request_ids[0])
+            req = self.requests[self.waiting_request_ids[0]]
             request_can_be_added, request_tokens_can_be_added, kv_cache_available = (
-                self.context.check_availability(req)
+                self.context.check_availability(req, safe=True)
             )
             if request_can_be_added and request_tokens_can_be_added and kv_cache_available:
                 self.context.add_request(req)
@@ -905,6 +655,37 @@ def schedule_non_chunked_prefill(self):
             else:
                 break
 
+    def get_active_sampling_map(self) -> List[Tuple[SamplingParams, List[int]]]:
+        """Gets a map of sampling methods to active requests indices in the context."""
+        # Get all active request IDs.
+        active_request_ids = self.context.request_ids[
+            self.context.paused_request_count : self.context.total_request_count
+        ].tolist()
+        if self.static_sampling:
+            return [(next(iter(self.requests.values())).sampling_params, active_request_ids)]
+
+        # Get a map from request_id to context array index.
+        context_id_map = {r: i for i, r in enumerate(active_request_ids)}
+
+        # Create map of sampling methods to context array indices.
+        sampling_map: List[Tuple[SamplingParams, List[int]]] = []
+        for request_id, request in self.requests.items():
+            if request_id not in context_id_map:
+                continue
+            context_id = context_id_map[request_id]
+            sp = request.sampling_params
+
+            # Look for a pre-existing group with these sampling parameters.
+            for sampling, indices in sampling_map:
+                if sampling == sp:
+                    indices.append(context_id)
+                    break
+            # If no group exists, create a new one.
+            else:
+                sampling_map.append((sp, [context_id]))
+
+        return sampling_map
+
     def schedule_chunked_prefill(self):
         """
         This function schedules chunked prefill requests.
@@ -923,7 +704,7 @@ def schedule_chunked_prefill(self):
         can_schedule = True
         while self.waiting_request_ids and can_schedule:
             can_schedule = False
-            req = self.get_request(self.waiting_request_ids[0])
+            req = self.requests[self.waiting_request_ids[0]]
 
             # is_continuing_chunked_prefill is True if we are scheduling next
             # chunk of a existing chunked prefill request
@@ -935,7 +716,9 @@ def schedule_chunked_prefill(self):
                 self.context.active_token_count + remaining_len <= self.context.max_tokens
             )
             token_partially_can_be_added = self.context.active_token_count < self.context.max_tokens
-            request_can_be_added, _, kv_cache_available = self.context.check_availability(req)
+            request_can_be_added, _, kv_cache_available = self.context.check_availability(
+                req, safe=not is_continuing_chunked_prefill
+            )
             request_can_be_added = is_continuing_chunked_prefill or request_can_be_added
 
             if request_can_be_added and kv_cache_available:
@@ -964,157 +747,104 @@ def schedule_chunked_prefill(self):
                     # chunked prefill request at the head of the waiting queue
                     # Note that we do not need to continue check the queue, as the tokens are full
 
-    async def async_forward(self) -> Tuple[Dict, Dict, float, int]:
-        """Uses `asyncio` for continuous generation.
-        Sleeps when no requests are available, until new requests have been added.
+    async def async_step(
+        self, *, verbose: Optional[bool] = False
+    ) -> Tuple[List[DynamicInferenceRequest], List[DynamicInferenceRequest], float]:
+        """
+        Wrapper for controller.generate_output_tokens_dynamic_batch(), to
+        match vLLM API. Uses `asyncio` for continuous generation which allows this
+        method to sleep and wake up when new requests are available.
+
+        Args:
+            sampling_params (SamplingParams): The sampling parameters.
+            verbose (bool): Whether to run in verbose mode.
 
         Returns:
             A tuple comprised of:
-                step_result (Optional[Dict]): The result of the step.
-                context_state (Dict): A tuple consisting of the state of the context.
-                is_decode_only, total/paused request count, active token count.
-                step_time (float): How long this step took.
+                1. Requests that ran in the last step and are still active.
+                2. Requests that ran in the last step and have now finished.
+                3. The step time in seconds.
         """
-
-        # If suspended, no stepping.
-        if self.is_suspended:
-            raise EngineSuspendedError(self.step_count)
-
         # schedule requests
         self.schedule_waiting_requests()
 
-        # Saving pre-step state, for printing output below.
-        is_decode_only = self.context.is_decode_only()
-        pre_step_context_state = {
-            "is_decode_only": is_decode_only,
-            "total_request_count": self.context.total_request_count,
-            "paused_request_count": self.context.paused_request_count,
-            "active_token_count": self.context.active_token_count,
-        }
+        # Previous context state, for printing output below.
+        prev_is_decode_only = self.context.is_decode_only()
+        prev_total_request_count = self.context.total_request_count
+        prev_paused_request_count = self.context.paused_request_count
+        prev_active_token_count = self.context.active_token_count
+
+        range_push("Prefill" if not prev_is_decode_only else "Decode")
 
         # Generate tokens.
-        range_push("Prefill" if not is_decode_only else "Decode")
-        # TODO @TDE: Account for this line when overlapping forward and bookkeep.
+        is_decode_only = self.context.is_decode_only()
+        # save the is_decode_only AFTER scheduling, BEFORE update
         self.is_decode_only = is_decode_only
-
         self.step_start_event.record()
-        result = await self.controller.async_generate_output_tokens_dynamic_batch()
+        sampling_map = self.get_active_sampling_map()
+        result = await self.controller.async_generate_output_tokens_dynamic_batch(sampling_map)
         self.step_end_event.record()
         self.step_end_event.synchronize()
         step_time = self.step_start_event.elapsed_time(self.step_end_event) / 1e3
-        self.step_count += 1
-
-        range_pop()
-
-        if (
-            self.inference_logging_step_interval > 0
-            and step_count > 0
-            and step_count % self.inference_logging_step_interval == 0
-            and self.context.metrics_writer is not None
-        ):
-            kvcache_util_stats = self.context.get_kvcache_utilization_stats()
-        else:
-            kvcache_util_stats = None
-
-        post_step_context_state = {
-            "waiting_request_count": len(self.waiting_request_ids),
-            "finished_request_count": self.finished_request_count,
-            "kv_stats": kvcache_util_stats,
-            "padded_active_token_count": self.context.padded_active_token_count,
-            "using_cuda_graph_this_step": self.context.using_cuda_graph_this_step(),
-            "total_active_block_count": self.context.block_allocator.active_count,
-            "total_paused_block_count": self.context.block_allocator.paused_count,
-            "total_active_used_blocks": self.context.block_allocator.get_active_used(),
-            "total_paused_used_blocks": self.context.block_allocator.get_paused_used(),
-        }
-
-        context_state = {**pre_step_context_state, **post_step_context_state}
-
-        return result, context_state, step_time, self.step_count
-
-    async def async_bookkeep(
-        self,
-        step_result: Optional[Dict],
-        context_state: Dict,
-        step_time: float,
-        step_count: int,
-        *,
-        verbose: bool = False,
-    ):
-        """Uses `asyncio` for continuous bookkeeping.
-
-        Args:
-            step_result (Optional[Dict]): The result of the step.
-            context_state (Dict): is_decode_only, total/paused request count, active token count.
-            step_time (float): How long this step took.
-            step_count (int): The count of the step.
-            verbose (bool): Whether to run in verbose mode.
 
-        Returns:
-            A dictionary containing:
-                active_requests (List): Requests that ran in the last step and are still active.
-                finished_requests (List): Requests that ran in the last step and have now finished.
-                step_time (float): The step time in seconds.
-                cuda_graph_request_count (int): The CUDA graph batch size matching this step.
-        """
         # Increment finished_request_count.
         cuda_graph_request_count = None
-        if step_result is not None:
-            active_request_ids = step_result["active_request_ids"]
-            newly_paused_request_ids = step_result["newly_paused_request_ids"]
-            finished_request_ids = step_result["finished_request_ids"]
-            sample = step_result["sample"]
-            log_probs = step_result["log_probs"]
-            cuda_graph_request_count = step_result["cuda_graph_request_count"]
+        if result is not None:
+            active_request_ids = result["active_request_ids"]
+            newly_paused_request_ids = result["newly_paused_request_ids"]
+            finished_request_ids = result["finished_request_ids"]
+            sample = result["sample"]
+            log_probs = result["log_probs"]
+            cuda_graph_request_count = result["cuda_graph_request_count"]
 
             # Add paused events.
             if newly_paused_request_ids is not None and self.track_paused_request_events:
                 newly_paused_request_ids = newly_paused_request_ids.tolist()
-                [self.get_request(i).add_event_pause() for i in newly_paused_request_ids]
+                [self.requests[i].add_event_pause() for i in newly_paused_request_ids]
 
             # Mark requests finished.
-            [self.get_request(i).add_event_finish() for i in finished_request_ids.tolist()]
+            [self.requests[i].add_event_finish() for i in finished_request_ids.tolist()]
 
             # Add finished events.
-            active_request_ids, finished_request_records = self.post_process_requests(
+            (active_requests, finished_requests) = self.post_process_requests(
                 active_request_ids, finished_request_ids, step_time, sample, log_probs
             )
 
         else:
-            active_request_ids: list[int] = []
-            finished_request_records: list[DynamicInferenceRequestRecord] = []
+            active_requests: List[DynamicInferenceRequest] = []
+            finished_requests: List[DynamicInferenceRequest] = []
 
         # Failed requests.
         for failed_request_id in self.failed_request_ids:
-            failed_entry = self.requests.pop(failed_request_id)
-            failed_request = failed_entry.record[-1]
+            failed_request = self.requests.pop(failed_request_id)
             failed_request.status = Status.FAILED
             failed_request.add_event_fail()
-            finished_request_records.append(failed_entry.record)
-            failed_entry.future.set_result(failed_entry.record)
+            finished_requests.append(failed_request)
+            self.request_completion_futures[failed_request_id].set_result(failed_request)
         self.failed_request_ids.clear()
 
-        # Handle necessary ZMQ DP coordinator communication.
-        if self.use_coordinator and self.is_mp_coordinator and finished_request_records:
-            payload = msgpack.packb(
-                [Headers.ENGINE_REPLY.value, [r.serialize() for r in finished_request_records]],
-                use_bin_type=True,
-            )
-            self.socket_for_receiving_requests.send(payload)
-
         # Log KV cache utilization stats to W&B
-        if context_state["kv_stats"] is not None:
+        if (
+            self.inference_logging_step_interval > 0
+            and self.step_count > 0
+            and self.step_count % self.inference_logging_step_interval == 0
+            and self.context.metrics_writer is not None
+        ):
+
+            # Get KV cache utilization stats from dynamic context
+            kv_stats = self.context.get_kvcache_utilization_stats()
+
             # Prepare metrics dictionary with all stats
             # Use 'inference/' prefix for all metrics to separate from training metrics
             metrics = {
-                'inference/inference_step': int(self.inference_step_offset + int(step_count)),
+                'inference/inference_step': int(self.inference_step_offset + int(self.step_count)),
                 'inference/step_time_s': float(step_time),
                 'inference/waiting_queue_len': int(len(self.waiting_request_ids)),
                 'inference/total_requests_dict_size': int(len(self.requests)),
             }
             # Add KV stats with inference/ prefix
             # Convert utilization metrics from 0-1 range to 0-100 percentage range for better visualization
-            for key, value in context_state["kv_stats"].items():
+            for key, value in kv_stats.items():
                 if 'utilization' in key:
                     # Convert to percentage (0-100) and group under kvcache_utilization
                     metrics[f'inference/{key}'] = float(value * 100.0)
@@ -1130,16 +860,15 @@ async def async_bookkeep(
 
         # Print context state.
         if verbose:
+            context = self.context
             mem = torch.cuda.memory_stats()
-            step_type = "decode" if context_state["is_decode_only"] else "non-decode"
+            step_type = "decode" if is_decode_only else "non-decode"
             output_str = (
-                "* rank %d | step %d | %s ... time: %.3f%s ... "
-                "reqs: a %d/%d, p %d/%d, w %d, f %d ... "
-                "blocks: a %d/%d, p %d/%d ... "
+                "* step %d | %s ... time: %.3f%s ... "
+                "reqs: %d [ gtd %d, active %d, paused %d, finished %d ] ... "
                 "mem: tensors %d, alloc %.1f gb, res %.1f gb."
                 % (
-                    self.rank,
-                    step_count,
+                    self.step_count,
                     datetime.now().strftime("%H:%M:%S"),
                     step_time,
                     (
@@ -1148,71 +877,44 @@ async def async_bookkeep(
                             step_type,
                             (
                                 "DIM %d:%d"
-                                % (
-                                    context_state["padded_active_token_count"],
-                                    context_state["active_token_count"],
-                                )
-                                if context_state["using_cuda_graph_this_step"]
+                                % (context.padded_active_token_count, prev_active_token_count)
+                                if self.context.using_cuda_graph_this_step()
                                 else "OFF"
                             ),
                         )
                     ),
-                    context_state["total_request_count"] - context_state["paused_request_count"],
-                    context_state["total_active_block_count"],
-                    context_state["paused_request_count"],
-                    context_state["total_paused_block_count"],
-                    context_state["waiting_request_count"],
-                    context_state["finished_request_count"],
-                    context_state["total_active_used_blocks"],
-                    context_state["total_active_block_count"],
-                    context_state["total_paused_used_blocks"],
-                    context_state["total_paused_block_count"],
+                    prev_total_request_count,
+                    context.gtd_request_count,
+                    prev_total_request_count - prev_paused_request_count,
+                    prev_paused_request_count,
+                    self.finished_request_count,
                     mem["allocation.all.current"],
                     mem["allocated_bytes.all.current"] / (1024**3),
                     mem["reserved_bytes.all.current"] / (1024**3),
                 )
             )
-            if context_state["is_decode_only"]:
+            if prev_is_decode_only:
                 output_str = f"\033[94m{output_str}\033[0m"
             logging.info(output_str)
 
+        self.step_count += 1
+
+        range_pop()
         return {
-            "active_request_ids": active_request_ids,
-            "finished_request_records": finished_request_records,
+            "active_requests": active_requests,
+            "finished_requests": finished_requests,
             "step_time": step_time,
             "cuda_graph_request_count": cuda_graph_request_count,
         }
 
-    async def async_step(
-        self, *, verbose: bool = False
-    ) -> Tuple[List[DynamicInferenceRequest], List[DynamicInferenceRequest], float]:
-        """
-        Wrapper for controller.generate_output_tokens_dynamic_batch(), to
-        match vLLM API. Uses `asyncio` for continuous generation which allows this
-        method to sleep and wake up when new requests are available.
-
-        Args:
-            verbose (bool): Whether to run in verbose mode.
-
-        Returns:
-            A tuple comprised of:
-                1. Requests that ran in the last step and are still active.
-                2. Requests that ran in the last step and have now finished.
-                3. The step time in seconds.
-        """
-        last_step_data = await self.async_forward()
-        ret = await self.async_bookkeep(*last_step_data, verbose=verbose)
-        # Keep for compatibility with current test suite.
-        return ret
-
     def step_modern(
-        self, *, verbose: bool = False
+        self, *, verbose: Optional[bool] = False
     ) -> Tuple[List[DynamicInferenceRequest], List[DynamicInferenceRequest], float]:
         """Synchronous wrapper for `self.async_step`."""
         return self._loop.run_until_complete(self.async_step(verbose=verbose))
 
     def step_legacy(
-        self, sampling_params: SamplingParams, *, verbose: bool = False
+        self, sampling_params: SamplingParams, *, verbose: Optional[bool] = False
     ) -> Tuple[List[DynamicInferenceRequest], List[DynamicInferenceRequest], float]:
         """Synchronous wrapper for `self.async_step`."""
         warnings.warn(
@@ -1220,10 +922,10 @@ def step_legacy(
             "0.16. Please use `step_modern()` going forward, which will eventually "
             "be renamed to `step()`."
         )
-        result = self._loop.run_until_complete(self.async_step(verbose=verbose))
-        active_requests = [self.get_request(i) for i in result["active_request_ids"]]
-        finished_requests = [r.merge() for r in result["finished_request_records"]]
-        return active_requests, finished_requests, result["step_time"]
+        result = self._loop.run_until_complete(
+            self.async_step(sampling_params=sampling_params, verbose=verbose)
+        )
+        return (result["active_requests"], result["finished_requests"], result["step_time"])
 
     # For backwards compatibility, point `step()` to `step_legacy()`. Starting in
     # `megatron-core` 0.16, `step_modern()` will be renamed to `step()`.
@@ -1238,40 +940,39 @@ def generate(
             request_id = int(next(self.request_counter))
             _ = self.add_request(request_id, prompt, sampling_params)
 
-        finished_request_records_list = []
+        finished_requests_list = []
         while self.has_unfinished_requests():
             result = self.step_modern()
-            finished_request_records_list.extend(result["finished_request_records"])
+            finished_requests_list.extend(result["finished_requests"])
 
-        # Ensure requests are returned in the same order they were passed in.
-        finished_request_records_list.sort(key=lambda r: r.request_id)
+        # Ensure requests are returned in the same order they were passed in
+        finished_requests_list.sort(key=lambda x: x.request_id)
 
-        return finished_request_records_list
+        return finished_requests_list
 
     def schedule_requests(self) -> int:
         """Drains the ZMQ socket for a batch of requests and adds them to the engine.
 
         This method is a collective and synchronous operation that must be called
-        by all ranks in a Model Parallel (MP) group at the same time. It ensures
+        by all ranks in a Tensor Parallel (TP) group at the same time. It ensures
         that all ranks process the exact same batch of incoming requests and
         control signals.
 
         The synchronization works as follows:
-        1.  The MP rank 0 drains all pending messages from its subscriber socket
+        1.  The TP rank 0 drains all pending messages from its subscriber socket
             in a non-blocking manner.
-        2.  MP rank 0 then broadcasts the number of messages it received to all other
-            ranks in its MP group using a dedicated publisher socket.
-        3.  The other MP ranks wait to receive this count, and then receive exactly
+        2.  TP rank 0 then broadcasts the number of messages it received to all other
+            ranks in its TP group using a dedicated publisher socket.
+        3.  The other TP ranks wait to receive this count, and then receive exactly
             that many messages from their subscriber sockets.
 
         Once all ranks have the same batch of messages, they are unpacked and
         processed. New requests are added to the engine's queue, and control
-        signals (PAUSE, UNPAUSE, SUSPEND, RESUME, STOP) update the engine's
-        internal state.
+        signals (PAUSE, STOP, UNPAUSE) update the engine's internal state.
 
         Note:
             This function is synchronous and must be called collectively by all
-            ranks in a MP group. It should not be launched in a separate coroutine
+            ranks in a TP group. It should not be launched in a separate coroutine
             to ensure all ranks execute it in lockstep before proceeding to the
             next engine step.
 
@@ -1279,9 +980,10 @@ def schedule_requests(self) -> int:
             int: The number of messages that were received and processed in this batch.
         """
 
+        rank = parallel_state.get_tensor_model_parallel_rank()
         torch.cuda.nvtx.range_push("drain_zmq_socket")
         all_messages = []
-        if self.is_mp_coordinator:
+        if rank == 0:
             while True:
                 try:
                     # Receive messages in a non-blocking way.
@@ -1293,72 +995,37 @@ def schedule_requests(self) -> int:
             # First publish the number of messages to dequeue.
             # This is important because we want all tensor parallel ranks
             # to dequeue the same number of messages.
-            self.model_parallel_num_msgs_publisher_socket.send(
+            self.tensor_parallel_num_msgs_publisher_socket.send(
                 struct.pack('!i', messages_to_dequeue)
             )
-            # Now publish the actual messages to all model parallel ranks
-            if messages_to_dequeue > 0:
-                self.model_parallel_publisher_socket.send_multipart(all_messages)
+            # Now publish the actual messages to all tensor parallel ranks
+            for message in all_messages:
+                self.tensor_parallel_publisher_socket.send(message)
         else:
-            # First, receive the number of messages to dequeue from mp-rank 0
+            # First, receive the number of messages to dequeue from tp-rank 0
             messages_to_dequeue = struct.unpack(
-                '!i', self.model_parallel_num_msgs_subscriber_socket.recv()
+                '!i', self.tensor_parallel_num_msgs_subscriber_socket.recv()
             )[0]
             # Now, dequeue the same number of messages from the subscriber socket.
             # Note that these receives are blocking, because the messages
             # are guaranteed to be available after the tp-rank 0 has sent them.
-            if messages_to_dequeue > 0:
-                all_messages = self.model_parallel_subscriber_socket.recv_multipart()
-            else:
-                all_messages = []
+            for _ in range(messages_to_dequeue):
+                all_messages.append(self.tensor_parallel_subscriber_socket.recv())
 
         torch.cuda.nvtx.range_pop()
         for message in all_messages:
             data = msgpack.unpackb(message, raw=False)
             header = Headers(data[0])
-
-            if self.received_stop:
-                assert (
-                    header == Headers.STOP_ACK
-                ), "Engine is shutting down. No other messages allowed except STOP_ACK."
-
             if header == Headers.SUBMIT_REQUEST:
                 request_id, prompt, sampling_params = data[1:]
                 sampling_params = SamplingParams.deserialize(sampling_params)
                 self.add_request(request_id, prompt, sampling_params)
             elif header == Headers.PAUSE:
-                # Pause thyself.
-                self.received_pause = True
-                self.running.clear()
-                # Send PAUSE_ACK back to coordinator.
-                if self.is_mp_coordinator:
-                    payload = msgpack.packb([Headers.PAUSE_ACK.value], use_bin_type=True)
-                    self.socket_for_receiving_requests.send(payload)
-            elif header == Headers.STOP:
-                # Stop thyself.
-                self.received_stop = True
-                self.running.clear()
-                # Send STOP_ACK back to coordinator.
-                if self.is_mp_coordinator:
-                    payload = msgpack.packb([Headers.STOP_ACK.value], use_bin_type=True)
-                    self.socket_for_receiving_requests.send(payload)
-            elif header == Headers.PAUSE_ACK:
-                self.paused.set()
-                self.received_pause = False
-            elif header == Headers.STOP_ACK:
-                self.stopped.set()
-                self.stop()
-            elif header == Headers.UNPAUSE:
-                self.paused.clear()
-                self.running.set()
-            elif header == Headers.SUSPEND:
-                self.suspend_signal = True
-            elif header == Headers.RESUME:
-                self.suspend_signal = False
+                self.paused = True
             elif header == Headers.STOP:
                 self.stopped = True
-            else:
-                raise UnknownHeaderError(header)
+            elif header == Headers.UNPAUSE:
+                self.paused = False
 
         return len(all_messages)
 
@@ -1376,6 +1043,7 @@ def stop(self):
         for socket in self.zmq_sockets:
             socket.close()
         self.zmq_context.term()
+        parallel_state.destroy_model_parallel()
 
     @trace_async_exceptions
     async def run_engine(
@@ -1383,20 +1051,15 @@ async def run_engine(
     ):
         """Continually steps the engine asynchronously."""
         self._loop = get_asyncio_loop(loop)
-        self.use_coordinator = False
         try:
             while True:
                 # Wait until there are active requests before proceeding.
                 async with self._cond:
                     await self._cond.wait_for(
-                        lambda: (
-                            not self.is_suspended
-                            and (
-                                self.context.get_active_request_count() > 0
-                                or self.waiting_request_ids
-                            )
-                        )
+                        lambda: self.context.get_active_request_count() > 0
+                        or self.waiting_request_ids
                     )
+
                 await self.async_step(verbose=verbose)
         except asyncio.CancelledError:
             pass
@@ -1407,14 +1070,14 @@ async def run_engine_with_coordinator(
     ):
         """Continually steps the engine asynchronously."""
         self._loop = get_asyncio_loop(loop)
-        self.use_coordinator = True
         try:
             while True:
                 self.schedule_requests()
-                if self.stopped.is_set():
-                    break
+                if self.stopped:
+                    self.stop()
+                    return
 
-                # for the cases below (no active requests, or undergoing a state-change)
+                # for the cases below (engine is paused or no active requests),
                 # do not use asyncio.sleep(0)
                 # as tp-rank=0 will flood the num_messages publisher
                 # with "0" repeatedly. This causes some packets to drop.
@@ -1426,20 +1089,10 @@ async def run_engine_with_coordinator(
 
                 # todo [Siddharth]: Can this hardcoded sleep be avoided
                 # with asyncio zmq sockets?
-                if self.paused.is_set() or self.received_pause or self.received_stop:
-                    await asyncio.sleep(0.02)
-                    continue
-
-                # Suspend, resume.
-                if self.suspend_signal:
-                    self.suspend()
+                if self.paused:
                     await asyncio.sleep(0.02)
                     continue
 
-                else:
-                    self.resume()
-
-                # No requests.
                 if (
                     self.context.get_active_request_count() == 0
                     and len(self.waiting_request_ids) == 0
@@ -1447,7 +1100,25 @@ async def run_engine_with_coordinator(
                     await asyncio.sleep(0.02)
                     continue
 
-                await self.async_step(verbose=verbose)
+                engine_output = await self.async_step(verbose=verbose)
+
+                is_tp0_and_pp0 = (
+                    parallel_state.get_tensor_model_parallel_rank() == 0
+                    and parallel_state.get_pipeline_model_parallel_rank() == 0
+                )
+                if (
+                    is_tp0_and_pp0
+                    and engine_output is not None
+                    and engine_output["finished_requests"]
+                ):
+                    payload = msgpack.packb(
+                        [
+                            Headers.ENGINE_REPLY.value,
+                            [r.serializable() for r in engine_output["finished_requests"]],
+                        ],
+                        use_bin_type=True,
+                    )
+                    self.socket_for_receiving_requests.send(payload)
 
         except asyncio.CancelledError:
             pass
diff --git a/megatron/core/inference/engines/static_engine.py b/megatron/core/inference/engines/static_engine.py
index d4c61965d2b..dc86eb775f9 100644
--- a/megatron/core/inference/engines/static_engine.py
+++ b/megatron/core/inference/engines/static_engine.py
@@ -17,7 +17,7 @@
 from megatron.core.inference.text_generation_controllers.text_generation_controller import (
     TextGenerationController,
 )
-from megatron.core.utils import get_asyncio_loop, get_mamba_inference_state_config_from_model
+from megatron.core.utils import get_asyncio_loop
 
 try:
     from tqdm import tqdm
@@ -93,10 +93,6 @@ def __init__(
         # Store original context in case we need to fall back to legacy static engine
         original_context = text_generation_controller.inference_wrapped_model.inference_context
 
-        mamba_inference_state_config = get_mamba_inference_state_config_from_model(
-            text_generation_controller.inference_wrapped_model.model
-        )
-
         try:
             if not legacy:
                 dynamic_context = DynamicInferenceContext.from_config(
@@ -105,17 +101,16 @@ def __init__(
                     max_batch_size=max_batch_size,
                     buffer_size_gb=buffer_size_gb,
                     num_cuda_graphs=1,
-                    mamba_inference_state_config=mamba_inference_state_config,
                 )
                 self.controller.inference_wrapped_model.inference_context = dynamic_context
                 self.controller.inference_wrapped_model.prep_model_for_inference()
-                self.controller._init_dynamic_sampling_tensors()
 
                 self.dynamic_engine = DynamicInferenceEngine(
                     controller=self.controller,
                     random_seed=self.random_seed,
                     context=dynamic_context,
                     enable_cuda_graph=True,
+                    static_sampling=True,
                 )
         except Exception as e:
             # Get exception details for better debugging
diff --git a/megatron/core/inference/headers.py b/megatron/core/inference/headers.py
index a22d1328679..ff894cc1918 100644
--- a/megatron/core/inference/headers.py
+++ b/megatron/core/inference/headers.py
@@ -1,6 +1,6 @@
 # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
 
-from enum import Enum, auto
+from enum import Enum
 
 
 class Headers(Enum):
@@ -8,21 +8,10 @@ class Headers(Enum):
     Enum representing headers used for communication with the inference-coordinator.
     """
 
-    CONNECT = auto()
-    CONNECT_ACK = auto()
-    SUBMIT_REQUEST = auto()
-    ENGINE_REPLY = auto()
-    PAUSE = auto()
-    PAUSE_ACK = auto()
-    UNPAUSE = auto()
-    SUSPEND = auto()
-    RESUME = auto()
-    STOP = auto()
-    STOP_ACK = auto()
-
-
-class UnknownHeaderError(Exception):
-    """A signal with an unrecognized header was received by the coordinator."""
-
-    def __init_(self, header):
-        super().__init__(f"specialize for {header}.")
+    CONNECT = 0
+    ACK = 1
+    SUBMIT_REQUEST = 2
+    ENGINE_REPLY = 3
+    PAUSE = 4
+    UNPAUSE = 5
+    STOP = 6
diff --git a/megatron/core/inference/inference_client.py b/megatron/core/inference/inference_client.py
index 8a19e226c46..53daac091b0 100644
--- a/megatron/core/inference/inference_client.py
+++ b/megatron/core/inference/inference_client.py
@@ -4,9 +4,9 @@
 import logging
 import os
 import time
-from typing import Awaitable, List, Optional, Union
+from typing import List, Union
 
-from megatron.core.inference.inference_request import DynamicInferenceRequestRecord
+from megatron.core.inference.inference_request import DynamicInferenceRequest
 from megatron.core.inference.sampling_params import SamplingParams
 from megatron.core.utils import get_asyncio_loop, trace_async_exceptions
 
@@ -73,11 +73,6 @@ def __init__(self, inference_coordinator_port: int):
         inference_coordinator_address = os.getenv('MASTER_ADDR', '127.0.0.1')
         socket.connect(f"tcp://{inference_coordinator_address}:{inference_coordinator_port}")
 
-        self._loop = None
-        self.running = asyncio.Event()
-        self.paused = asyncio.Event()
-        self.stopped = asyncio.Event()
-
         self.socket = socket
         self.completion_futures = {}
         self.request_submission_times = {}
@@ -97,55 +92,41 @@ def add_request(
             prompt (str): The input prompt to send to the language model.
             sampling_params: An object containing the sampling parameters for
                 text generation (e.g., temperature, top_p). It must have a
-                `serialize()` method.
+                `serializable()` method.
 
         Returns:
             asyncio.Future: A future that will be resolved with a
-            `DynamicInferenceRequestRecord` object containing the completed result.
+            `DynamicInferenceRequest` object containing the completed result.
         """
-        if not self.running.is_set():
-            raise RuntimeError("InferenceClient is not currently running.")
         request_id = self.next_request_id
         self.next_request_id += 1
-        payload = [Headers.SUBMIT_REQUEST.value, request_id, prompt, sampling_params.serialize()]
+        payload = [Headers.SUBMIT_REQUEST.value, request_id, prompt, sampling_params.serializable()]
         payload_serialized = msgpack.packb(payload, use_bin_type=True)
         self.socket.send(payload_serialized)
         assert request_id not in self.completion_futures
-        self.completion_futures[request_id] = self._loop.create_future()
+        self.completion_futures[request_id] = get_asyncio_loop().create_future()
         self.request_submission_times[request_id] = time.perf_counter()
         return self.completion_futures[request_id]
 
     @trace_async_exceptions
-    async def _recv_task(self):
+    async def _listen_for_completed_requests(self):
         """
         Listens for completed inference requests from the coordinator.
 
         This coroutine runs in an infinite loop, continuously polling the socket
-        for data.
-        When a request reply is received, it unpacks the message, finds the
+        for replies. When a reply is received, it unpacks the message, finds the
         corresponding Future using the request ID, and sets the result.
-        Other control packets are handled appropriately.
 
         This method is started as a background task by the `start()` method.
         """
         while True:
             try:
-                data = msgpack.unpackb(self.socket.recv(flags=zmq.NOBLOCK), raw=False)
-                header = Headers(data[0])
-                if header == Headers.ENGINE_REPLY:
-                    request_id, reply = data[1:]
-                    reply['latency'] = time.perf_counter() - self.request_submission_times.pop(
-                        request_id
-                    )
-                    completion_future = self.completion_futures.pop(request_id)
-                    if completion_future.done():
-                        logging.warning(f"Client: The future for {request_id} has been cancelled!")
-                        continue
-                    completion_future.set_result(DynamicInferenceRequestRecord.deserialize(reply))
-                elif header == Headers.PAUSE_ACK:
-                    self.paused.set()
-                elif header == Headers.STOP_ACK:
-                    self.stopped.set()
+                request_id, reply = msgpack.unpackb(self.socket.recv(flags=zmq.NOBLOCK), raw=False)
+                reply['latency'] = time.perf_counter() - self.request_submission_times.pop(
+                    request_id
+                )
+                completion_future = self.completion_futures.pop(request_id)
+                completion_future.set_result(DynamicInferenceRequest.deserialize(reply))
             except zmq.Again:
                 await asyncio.sleep(0.005)
                 continue
@@ -156,15 +137,15 @@ def _connect_with_inference_coordinator(self):
         """
         Performs the initial handshake with the inference coordinator.
 
-        Sends a CONNECT signal and waits for a CONNECT_ACK reply to ensure the
+        Sends a CONNECT signal and waits for an ACK reply to ensure the
         connection is established and acknowledged by the coordinator.
         """
         payload = [Headers.CONNECT.value]
         self.socket.send(msgpack.packb(payload, use_bin_type=True))
         reply = msgpack.unpackb(self.socket.recv(), raw=False)[0]
-        assert Headers(reply) == Headers.CONNECT_ACK
+        assert Headers(reply) == Headers.ACK
 
-    async def start(self, loop: Optional[asyncio.AbstractEventLoop] = None):
+    async def start(self):
         """
         Connects to the coordinator and starts the background listener task.
 
@@ -173,12 +154,8 @@ async def start(self, loop: Optional[asyncio.AbstractEventLoop] = None):
         coroutine.
         """
         logging.info("Client: Connecting to InferenceCoordinator...")
-        self._loop = get_asyncio_loop(loop)
-        self.running.set()
-        self.paused.clear()
-        self.stopped.clear()
         self._connect_with_inference_coordinator()
-        self.listener_task = self._loop.create_task(self._recv_task())
+        self.listener_task = asyncio.create_task(self._listen_for_completed_requests())
 
     def _send_signal_to_engines(self, signal):
         """
@@ -191,52 +168,17 @@ def _send_signal_to_engines(self, signal):
         payload_serialized = msgpack.packb(payload, use_bin_type=True)
         self.socket.send(payload_serialized)
 
-    def pause_engines(self) -> Awaitable:
-        """Sends a signal to pause all inference engines.
-
-        The signal first propagates thru the coordinator to all engines.
-        All engines acknowledge this signal and clear their `running` flags.
-        The coordinator awaits all acknowledgements before forwarding the ACK
-            back to the client, as well as to the engines.
-        The engines set their `paused` flags upon seeing the ACK.
-
-        Returns:
-            Awaitable: An awaitable that resolves when all engines have paused.
-        """
-        self._send_signal_to_engines(Headers.PAUSE)
-        return self.paused.wait()
-
-    def unpause_engines(self) -> None:
-        """Sends a signal to unpause all inference engines."""
-        self.paused.clear()
-        self.running.set()
-        self._send_signal_to_engines(Headers.UNPAUSE)
-
-    def suspend_engines(self):
+    def pause_engines(self):
         """Sends a signal to pause all inference engines."""
         self._send_signal_to_engines(Headers.PAUSE)
-        self._send_signal_to_engines(Headers.SUSPEND)
 
-    def resume_engines(self):
+    def unpause_engines(self):
         """Sends a signal to unpause all inference engines."""
-        self._send_signal_to_engines(Headers.RESUME)
         self._send_signal_to_engines(Headers.UNPAUSE)
 
-    def stop_engines(self) -> Awaitable:
-        """Sends a signal to gracefully stop all inference engines.
-
-        The signal first propagates thru the coordinator to all engines.
-        All engines acknowledge this signal and clear their `running` flags.
-        The coordinator awaits all acknowledgements before forwarding the ACK
-            back to the client, as well as to the engines.
-        The engines set their `stopped` flags upon seeing the ACK.
-
-        Returns:
-            Awaitable: An awaitable that resolves when all engines have stopped.
-        """
+    def stop_engines(self):
+        """Sends a signal to gracefully stop all inference engines."""
         self._send_signal_to_engines(Headers.STOP)
-        self.running.clear()
-        return self.stopped.wait()
 
     def stop(self):
         """
diff --git a/megatron/core/inference/inference_request.py b/megatron/core/inference/inference_request.py
index b58fac1b281..21ff7786d6a 100644
--- a/megatron/core/inference/inference_request.py
+++ b/megatron/core/inference/inference_request.py
@@ -11,18 +11,10 @@
 import torch
 
 from megatron.core.inference.sampling_params import SamplingParams
-from megatron.core.tokenizers import MegatronTokenizer
 
 
-def serialize_tensor(tensor: torch.Tensor) -> bytes:
-    """Serialize tensor to bytes.
-
-    Args:
-        tensor (Tensor): Tensor.
-
-    Returns:
-        (bytes) Byte representation of tensor.
-    """
+def serialize_tensor(tensor):
+    """Serialize tensor to bytes."""
     buffer = io.BytesIO()
     torch.save(tensor, buffer)
     buffer.seek(0)
@@ -30,15 +22,8 @@ def serialize_tensor(tensor: torch.Tensor) -> bytes:
     return tensor_bytes
 
 
-def deserialize_tensor(tensor_bytes: bytes) -> torch.Tensor:
-    """Deserialize tensor from bytes.
-
-    Args:
-        tensor_bytes (bytes): Byte representation of tensor.
-
-    Returns:
-        (Tensor) Tensor.
-    """
+def deserialize_tensor(tensor_bytes):
+    """Deserialize tensor from bytes."""
     buffer = io.BytesIO(tensor_bytes)
     tensor = torch.load(buffer)
     return tensor
@@ -91,12 +76,11 @@ def __post_init__(self):
             )
             self.sampling_params = self.inference_parameters
 
-    def serialize(self) -> dict:
-        """Converts the instance into a serializable dictionary.
-
+    def serializable(self):
+        """
+        Converts the instance into a serializable dictionary.
         Returns:
-            (dict) A dictionary representation of the instance suitable for
-                serialization.
+            dict: A dictionary representation of the instance suitable for serialization.
         """
 
         # Dataclass to dict.
@@ -185,12 +169,11 @@ def __str__(self):
         payload_str = "" if self.payload is None else f", {type(self.payload).__name__}"
         return f"[{self.timestamp:.3f}] {self.type.name}{payload_str}"
 
-    def serialize(self) -> dict:
-        """Converts the instance into a serializable dictionary.
-
+    def serialize(self):
+        """
+        Converts the instance into a serializable dictionary.
         Returns:
-            (dict) A dictionary representation of the instance suitable for
-                serialization.
+            dict: A dictionary representation of the instance suitable for serialization.
         """
 
         # Dataclass to dict.
@@ -270,14 +253,13 @@ def __str__(self):
             )
         )
 
-    def serialize(self):
-        """Converts the instance into a serializable dictionary.
-
+    def serializable(self):
+        """
+        Converts the instance into a serializable dictionary.
         Returns:
-            (dict) A dictionary representation of the instance suitable for
-                serialization.
+            dict: A dictionary representation of the instance suitable for serialization.
         """
-        obj = super().serialize()
+        obj = super().serializable()
         obj["events"] = [e.serialize() for e in self.events]
         return obj
 
@@ -295,39 +277,6 @@ def deserialize(cls, obj: dict) -> "DynamicInferenceRequest":
         request.events = [DynamicInferenceEvent.deserialize(e) for e in obj["events"]]
         return request
 
-    @property
-    def tracked_metadata(self) -> List[Any]:
-        """Obtain an ordered list of all request metadata to be tracked by the context.
-
-        This consists of metadata that is used to inform text generation.
-        The values of such fields are tensorized and kept aligned with the current active batch.
-
-        Note that while the general request object is mutable, this metadata is
-        inherently assumed to remain immutable once the request becomes active.
-        """
-        sp = self.sampling_params
-        if sp.termination_id is None:
-            if not torch.distributed.is_initialized() or torch.distributed.get_rank() == 0:
-                warnings.warn(
-                    f"DynamicInferenceRequest {self.request_id} has no termination_id set "
-                    "in its sampling_params. Defaulting to -1."
-                )
-            sp.termination_id = -1
-        return [getattr(sp, field) for field in self.get_metadata_labels().keys()]
-
-    @staticmethod
-    def get_metadata_labels() -> Dict[str, int]:
-        """Provides human-readable labels for the tracked metadata fields."""
-        ret = [
-            "temperature",
-            "top_k",
-            "top_p",
-            "termination_id",
-            "return_log_probs",
-            "skip_prompt_log_probs",
-        ]
-        return {k: v for v, k in enumerate(ret)}
-
     def add_event(self, type: DynamicInferenceEventType, payload: Optional[Any] = None) -> None:
         """Add event."""
         self.events.append(DynamicInferenceEvent(type=type, payload=payload))
@@ -365,158 +314,6 @@ def failed(self) -> bool:
         return self.status == Status.FAILED
 
 
-@dataclass(kw_only=True)
-class DynamicInferenceRequestRecord:
-    """History of DynamicInferenceRequest objects over multiple suspend and
-    resumes."""
-
-    requests: list[DynamicInferenceRequest] = field(default_factory=list)
-    latency: Optional[float] = None
-
-    @classmethod
-    def from_request(cls, request: DynamicInferenceRequest) -> "DynamicInferenceRequestRecord":
-        """Initialize record from a single request.
-
-        Args:
-            request (DynamicInferenceRequest): Initial request.
-
-        Returns:
-            (DynamicInferenceRequestRecord) A record.
-        """
-        record = cls()
-        record.requests.append(request)
-        return record
-
-    def __getitem__(self, idx: int) -> DynamicInferenceRequest:
-        """Get request by index.
-
-        Args:
-            idx (int): Request index.
-
-        Returns:
-            (DynamicInferenceRequest) Request object.
-        """
-        return self.requests[idx]
-
-    @property
-    def request_id(self) -> int:
-        """Get request id.
-
-        Returns:
-            (int) Request id.
-        """
-        return self.requests[0].request_id
-
-    def suspend(self, tokenizer: MegatronTokenizer):
-        """Suspend request by storing references to previous prompt, generations,
-        and sampling params.
-
-        Args:
-            tokenizer (MegatronTokenizer): The tokenizer.
-        """
-
-        old_request = self[-1]
-
-        # New prompt (concatenate prompt + generated tokens).
-        new_prompt_tokens = torch.cat(
-            (
-                old_request.prompt_tokens,
-                torch.tensor(
-                    old_request.generated_tokens,
-                    dtype=old_request.prompt_tokens.dtype,
-                    device=old_request.prompt_tokens.device,
-                ),
-            ),
-            dim=0,
-        )
-        new_prompt_str = tokenizer.detokenize(new_prompt_tokens.tolist())
-
-        # New sampling params.
-        new_sampling_params = SamplingParams(
-            **{
-                **asdict(old_request.sampling_params),
-                "num_tokens_to_generate": (
-                    old_request.sampling_params.num_tokens_to_generate
-                    - len(old_request.generated_tokens)
-                ),
-            }
-        )
-
-        # New request.
-        new_request = DynamicInferenceRequest(
-            request_id=old_request.request_id,
-            prompt=new_prompt_str,
-            prompt_tokens=new_prompt_tokens,
-            sampling_params=new_sampling_params,
-        )
-        self.requests.append(new_request)
-
-    def merge(self, tokenizer: MegatronTokenizer) -> DynamicInferenceRequest:
-        """Merge requests into a single suspend-agnostic request object.
-
-        Args:
-            tokenizer (MegatronTokenizer): The tokenizer.
-
-        Returns:
-            (DynamicInferenceRequest) Merged request.
-        """
-
-        def merge_lists(key):
-            if getattr(self.requests[0], key) is None:
-                return None
-            else:
-                return [v for r in self.requests for v in getattr(r, key)]
-
-        prompt_tokens = self.requests[0].prompt_tokens
-        generated_tokens = merge_lists("generated_tokens")
-
-        # Merged request.
-        request = DynamicInferenceRequest(
-            request_id=self.requests[0].request_id,
-            prompt=tokenizer.detokenize(prompt_tokens.tolist()),
-            prompt_tokens=prompt_tokens,
-            prompt_log_probs=self.requests[0].prompt_log_probs,
-            prompt_top_n_logprobs=self.requests[0].prompt_top_n_logprobs,
-            generated_text=tokenizer.detokenize(generated_tokens),
-            generated_tokens=generated_tokens,
-            generated_length=len(generated_tokens),
-            generated_log_probs=merge_lists("generated_log_probs"),
-            generated_top_n_logprobs=merge_lists("generated_top_n_logprobs"),
-            sampling_params=self.requests[0].sampling_params,
-            tpot=merge_lists("tpot"),
-            status=self.requests[-1].status,
-            latency=self.latency,
-            events=merge_lists("events"),
-        )
-
-        return request
-
-    def serialize(self) -> dict:
-        """Converts the instance into a serializable dictionary.
-
-        Returns:
-            (dict) A dictionary representation of the instance suitable for
-                serialization.
-        """
-        obj = asdict(self)
-        obj["requests"] = [r.serialize() for r in self.requests]
-        return obj
-
-    @classmethod
-    def deserialize(cls, obj: dict) -> "DynamicInferenceRequestRecord":
-        """Deserialize record.
-
-        Args:
-            obj (dict): Serialized record data.
-
-        Returns:
-            (DynamicInferenceRequestRecord) Deserialized record.
-        """
-        request = cls(**obj)
-        request.requests = [DynamicInferenceRequest.deserialize(r) for r in obj["requests"]]
-        return request
-
-
 @dataclass(kw_only=True)
 class VLMInferenceRequest(InferenceRequest):
     """Class for a VLM inference request"""
diff --git a/megatron/core/inference/sampling_params.py b/megatron/core/inference/sampling_params.py
index d85b2816c80..e215b3f134b 100644
--- a/megatron/core/inference/sampling_params.py
+++ b/megatron/core/inference/sampling_params.py
@@ -44,7 +44,7 @@ def add_attributes(self, attribute_value_pair: dict):
         for key, value in attribute_value_pair.items():
             setattr(self, key, value)
 
-    def serialize(self) -> dict:
+    def serializable(self) -> dict:
         """Return a dictionary that is msgpack-serializable."""
         return self.__dict__.copy()
 
diff --git a/megatron/core/inference/text_generation_controllers/text_generation_controller.py b/megatron/core/inference/text_generation_controllers/text_generation_controller.py
index 0aed3df079e..2bda1425710 100644
--- a/megatron/core/inference/text_generation_controllers/text_generation_controller.py
+++ b/megatron/core/inference/text_generation_controllers/text_generation_controller.py
@@ -23,11 +23,7 @@
     MaxSequenceLengthOverflowError,
     WarmupEngineMode,
 )
-from megatron.core.inference.inference_request import (
-    DynamicInferenceRequest,
-    InferenceRequest,
-    Status,
-)
+from megatron.core.inference.inference_request import InferenceRequest, Status
 from megatron.core.inference.model_inference_wrappers.abstract_model_inference_wrapper import (
     AbstractModelInferenceWrapper,
 )
@@ -78,35 +74,6 @@ def __init__(
         self.sampling_rng = torch.Generator(device=torch.cuda.current_device())
         self.sampling_rng.manual_seed(model_config.inference_sampling_seed)
 
-        if self.inference_wrapped_model.inference_context.is_dynamic_batching():
-            self._init_dynamic_sampling_tensors()
-
-    def _init_dynamic_sampling_tensors(self):
-        """Initialize tensors needed for dynamic sampling."""
-        context = self.inference_wrapped_model.inference_context
-        max_requests = context.max_total_requests
-
-        device = torch.cuda.current_device()
-        logits_dtype = self.inference_wrapped_model.inference_wrapper_config.params_dtype
-        # Use padded vocab size because tokenizer vocab size might pad to nearest power of 2.
-        vocab_size = self.inference_wrapped_model.inference_wrapper_config.padded_vocab_size
-
-        # Initialize bookkeeping tensors.
-        self.sampling_logits_cuda = torch.empty(
-            max_requests, vocab_size, dtype=logits_dtype, device=device
-        )
-        self.sampled_tokens_cuda = torch.empty(max_requests, dtype=torch.int64, device=device)
-
-        self.temperature_cuda = torch.empty_like(self.sampled_tokens_cuda, dtype=torch.float)
-        self.top_k_cuda = torch.empty_like(self.sampled_tokens_cuda, dtype=torch.int32)
-        self.top_p_cuda = torch.empty_like(self.sampled_tokens_cuda, dtype=torch.float)
-        self.termination_id_cuda = torch.empty(max_requests, dtype=torch.int64, device=device)
-        self.return_log_probs_cuda = torch.empty(max_requests, dtype=torch.bool, device=device)
-        self.skip_prompt_log_probs_cuda = torch.empty(max_requests, dtype=torch.bool, device=device)
-
-        # Used for inefficient torch sampling.
-        self.torch_sampling_buckets: List[Tensor] = []
-
     def tokenize_prompt(self, prompt: str, add_BOS: bool = False) -> List[int]:
         """Utility to tokenize the input prompts.
 
@@ -210,14 +177,16 @@ def detokenize_generations(
 
         return text, prompts_plus_generations_segments
 
-    def _torch_sampling_func(
+    def sample_from_logits(
         self,
         last_token_logits: torch.Tensor,
-        temperature: float,
-        top_k: int,
-        top_p: float,
+        sampling_params: Optional[SamplingParams] = None,
         vocab_size: Optional[int] = None,
-    ):
+        generation_started: Optional[torch.Tensor] = None,
+        top_n_logprobs_dict: Dict[int, List[Dict[str, float]]] = None,
+        logits: Optional[torch.Tensor] = None,
+        **kwargs,
+    ) -> torch.Tensor:
         """Samples the logits to generate outputs
 
         Given the logits of the last token, this function samples it
@@ -227,15 +196,26 @@ def _torch_sampling_func(
 
         Args:
             last_token_logits (torch.Tensor): The last token logits. A tensor of
-                size [batch_size, vocab_size].
-            temperature (float): The temperature to use for sampling.
-            top_k (int): The top-k value to use for sampling.
-            top_p (float): The top-p value to use for sampling.
-            vocab_size (int): Obtained from the tokenizer. Defaults to None.
+                size [batch_size, vocab_size]
+            sampling_params (SamplingParams): The parameters to use for inference.
+            vocab_size (int): Obtained from the tokenizer. Defaults to None
+            generation_started (torch.Tensor): A boolean tensor of shape [batch_size]. True
+                            indicates the prompt at that index has started generating tokens.
+            top_n_logprobs_dict (top_n_logprobs_dict): The dict to be updated
 
         Returns:
             sampled_logits (torch.Tensor): 1D tensor with [batch_size] elements
+            top_n_logprobs_this_step (torch.return_types.topk): a topk tensor with values as logits
+                and indices as the top k elements. None if sampling params top_n_logprobs is 0.
         """
+
+        if kwargs.get("common_inference_params"):
+            sampling_params = kwargs["common_inference_params"]
+
+        top_p = sampling_params.top_p
+        top_k = sampling_params.top_k
+        temperature = sampling_params.temperature
+
         assert isinstance(top_p, float)
         assert isinstance(top_k, int)
         assert not (top_k > 0 and top_p > 0.0), "Cannot have top-p and top-k both greater than zero"
@@ -266,6 +246,53 @@ def modify_logits_for_top_p_filtering(logits, top_p):
             filter_ = filter_.scatter(1, sorted_indices, filter_)
             logits.masked_fill_(filter_, float("-Inf"))
 
+        if sampling_params.top_n_logprobs > 0:
+            # NOTE : This thing can also be clubbed with where we compute log probs
+            # when --return-log-probs is enabled. This is just more efficient
+            assert generation_started is not None
+            if logits is None:
+                batch_size = last_token_logits.shape[0]
+                last_token_log_probs = F.log_softmax(last_token_logits, dim=1).to(torch.float32)
+                top_n_logits_this_step = torch.topk(
+                    last_token_log_probs, k=sampling_params.top_n_logprobs
+                )
+                top_n_logprobs_this_step = top_n_logits_this_step.values.cpu()
+                top_n_logprobs_indices = top_n_logits_this_step.indices.cpu()
+
+                # If we return prompt top_n_log_probs then we always append to the
+                # logprobs dict. Otherwise we only append for generated tokens.
+                if sampling_params.return_prompt_top_n_logprobs:
+                    mask = torch.ones(batch_size, dtype=torch.bool)
+                else:
+                    mask = generation_started.cpu()
+
+                self._update_top_n_logprobs_dict(
+                    top_n_logprobs_this_step, top_n_logprobs_indices, mask, top_n_logprobs_dict
+                )
+            else:
+                assert sampling_params.return_prompt_top_n_logprobs
+
+                # Compute the prompt logprobs
+                batch_size, seq_length, _ = logits.shape
+                log_probs = F.log_softmax(logits, dim=2).to(torch.float32)
+                top_n_logits_this_step = torch.topk(log_probs, k=sampling_params.top_n_logprobs)
+
+                # Move the token dimension to the front and then add each token logprobs
+                # individually for every request in the batch
+                top_n_logprobs_this_step = top_n_logits_this_step.values.permute(1, 0, 2).cpu()
+                top_n_logprobs_indices = top_n_logits_this_step.indices.permute(1, 0, 2).cpu()
+
+                # We append to the logprobs dict for every prompt token
+                mask = torch.ones(batch_size, dtype=torch.bool)
+
+                for i in range(seq_length):
+                    self._update_top_n_logprobs_dict(
+                        top_n_logprobs_this_step[i],
+                        top_n_logprobs_indices[i],
+                        mask,
+                        top_n_logprobs_dict,
+                    )
+
         # Greedy sampling
         if top_k == 1:
             sampled_logits = torch.argmax(last_token_logits, dim=-1)
@@ -295,10 +322,10 @@ def modify_logits_for_top_p_filtering(logits, top_p):
 
         return sampled_logits
 
-    def sample_from_logits(
+    def sample_from_dynamic_logits(
         self,
         last_token_logits: torch.Tensor,
-        sampling_params: Optional[SamplingParams] = None,
+        active_sampling_map: List[Tuple[SamplingParams, List[int]]],
         vocab_size: Optional[int] = None,
         generation_started: Optional[torch.Tensor] = None,
         top_n_logprobs_dict: Dict[int, List[Dict[str, float]]] = None,
@@ -308,14 +335,16 @@ def sample_from_logits(
         """Samples the logits to generate outputs
 
         Given the logits of the last token, this function samples it
-        according to the parameters defined in sampling_params
+        according to the parameters defined in active_sampling_map
         and returns the samples. If sampling parameters top_n_logprobs > 0
         at each step it also updates the top_n_logprobs dict.
 
         Args:
             last_token_logits (torch.Tensor): The last token logits. A tensor of
                 size [batch_size, vocab_size]
-            sampling_params (SamplingParams): The parameters to use for inference.
+            active_sampling_map (List[Tuple[SamplingParams, List[int]]]): A list of tuples
+                matching each unique set of sampling params to the context array indices
+                of the corresponding active requests.
             vocab_size (int): Obtained from the tokenizer. Defaults to None
             generation_started (torch.Tensor): A boolean tensor of shape [batch_size]. True
                             indicates the prompt at that index has started generating tokens.
@@ -323,65 +352,29 @@ def sample_from_logits(
 
         Returns:
             sampled_logits (torch.Tensor): 1D tensor with [batch_size] elements
+            termination_id (torch.Tensor): Tensor of shape [batch_size] with termination ids
             top_n_logprobs_this_step (torch.return_types.topk): a topk tensor with values as logits
                 and indices as the top k elements. None if sampling params top_n_logprobs is 0.
         """
-
-        if kwargs.get("common_inference_params"):
-            sampling_params = kwargs["common_inference_params"]
-
-        if sampling_params.top_n_logprobs > 0:
-            # NOTE : This thing can also be clubbed with where we compute log probs
-            # when --return-log-probs is enabled. This is just more efficient
-            assert generation_started is not None
-            if logits is None:
-                batch_size = last_token_logits.shape[0]
-                last_token_log_probs = F.log_softmax(last_token_logits, dim=1).to(torch.float32)
-                top_n_logits_this_step = torch.topk(
-                    last_token_log_probs, k=sampling_params.top_n_logprobs
+        batch_size = last_token_logits.size(0)
+        new_sample = torch.zeros(batch_size, dtype=torch.int64, device=last_token_logits.device)
+        termination_id = torch.zeros_like(new_sample, dtype=torch.int64)
+
+        for sampling_params, mask in active_sampling_map:
+            # Filter out indices that are out of bounds for the current batch
+            valid_mask = [i for i in mask if i < batch_size]
+            if valid_mask:
+                new_sample[valid_mask] = self.sample_from_logits(
+                    last_token_logits[valid_mask],
+                    sampling_params=sampling_params,
+                    vocab_size=vocab_size,
                 )
-                top_n_logprobs_this_step = top_n_logits_this_step.values.cpu()
-                top_n_logprobs_indices = top_n_logits_this_step.indices.cpu()
-
-                # If we return prompt top_n_log_probs then we always append to the
-                # logprobs dict. Otherwise we only append for generated tokens.
-                if sampling_params.return_prompt_top_n_logprobs:
-                    mask = torch.ones(batch_size, dtype=torch.bool)
+                if sampling_params.termination_id is not None:
+                    termination_id[valid_mask] = sampling_params.termination_id
                 else:
-                    mask = generation_started.cpu()
-
-                self._update_top_n_logprobs_dict(
-                    top_n_logprobs_this_step, top_n_logprobs_indices, mask, top_n_logprobs_dict
-                )
-            else:
-                assert sampling_params.return_prompt_top_n_logprobs
-
-                # Compute the prompt logprobs
-                batch_size, seq_length, _ = logits.shape
-                log_probs = F.log_softmax(logits, dim=2).to(torch.float32)
-                top_n_logits_this_step = torch.topk(log_probs, k=sampling_params.top_n_logprobs)
-
-                # Move the token dimension to the front and then add each token logprobs
-                # individually for every request in the batch
-                top_n_logprobs_this_step = top_n_logits_this_step.values.permute(1, 0, 2).cpu()
-                top_n_logprobs_indices = top_n_logits_this_step.indices.permute(1, 0, 2).cpu()
+                    termination_id[valid_mask] = self.tokenizer.eod
 
-                # We append to the logprobs dict for every prompt token
-                mask = torch.ones(batch_size, dtype=torch.bool)
-
-                for i in range(seq_length):
-                    self._update_top_n_logprobs_dict(
-                        top_n_logprobs_this_step[i],
-                        top_n_logprobs_indices[i],
-                        mask,
-                        top_n_logprobs_dict,
-                    )
-
-        top_p = sampling_params.top_p
-        top_k = sampling_params.top_k
-        temperature = sampling_params.temperature
-
-        return self._torch_sampling_func(last_token_logits, temperature, top_k, top_p, vocab_size)
+        return new_sample, termination_id
 
     def update_generation_status(
         self,
@@ -542,12 +535,10 @@ def _dynamic_step_forward_logits(self, input_ids: Tensor, position_ids: Tensor)
             input_ids (Tensor): The input token IDs.
             position_ids (Tensor): The position IDs.
         """
-        inference_wrapper_config = self.inference_wrapped_model.inference_wrapper_config
-
         context = self.inference_wrapped_model.inference_context
         materialize_only_last_token_logits = context.materialize_only_last_token_logits
 
-        active_request_count = context.total_request_count - context.paused_request_count
+        inference_wrapper_config = self.inference_wrapped_model.inference_wrapper_config
 
         with torch.inference_mode():
             logits = self.inference_wrapped_model.run_one_forward_step(
@@ -555,8 +546,9 @@ def _dynamic_step_forward_logits(self, input_ids: Tensor, position_ids: Tensor)
             )
 
         if self.model_is_pipeline_parallel:
+            batch_size = context.total_request_count - context.paused_request_count
             logits_seq_len = (
-                active_request_count if materialize_only_last_token_logits else input_ids.shape[1]
+                batch_size if materialize_only_last_token_logits else input_ids.shape[1]
             )
             vocab_size = inference_wrapper_config.padded_vocab_size
             logits_shape = [1, logits_seq_len, vocab_size]
@@ -564,6 +556,8 @@ def _dynamic_step_forward_logits(self, input_ids: Tensor, position_ids: Tensor)
             if is_pipeline_last_stage(self.pp_group):
                 assert logits is not None and torch.Size(logits_shape) == logits.shape
 
+            # TODO(ksanthanam): Evaluate whether it makes more sense to sample on 1 rank
+            # and then broadcast the sampled tokens rather than broadcasting the raw logits.
             logits = broadcast_from_last_pipeline_stage(
                 logits_shape,
                 dtype=inference_wrapper_config.params_dtype,
@@ -573,95 +567,31 @@ def _dynamic_step_forward_logits(self, input_ids: Tensor, position_ids: Tensor)
         return logits
 
     def _dynamic_step_sample_bookkeeping(
-        self,
-        *,
-        backend: str = "torch",
-        request_metadata: Optional[Tensor] = None,
-        request_metadata_labels: Dict[str, int] = None,
+        self, active_sampling_map: List[Tuple[SamplingParams, List[int]]]
     ):
-        """Perform bookkeeping necessary to sample logits for dynamic batching.
+        """Perform bookkeeping necessary to sample logits for dynamic batching."""
+        pass
 
-        The ability to override the context's data is solely intended for
-            standalone use or testing, and should never be used in a running system.
+    def _dynamic_step_sample_logits(
+        self, logits: Tensor, active_sampling_map: List[Tuple[SamplingParams, List[int]]]
+    ) -> Tensor:
+        """Sample logits for dynamic batching.
 
         Args:
-            backend (str): The sampling backend to use.
-            request_metadata (Optional[Tensor]): An override for the tensor that manages all
-                request metadata, such as sampling parameters. By default, this metadata is
-                retrieved from the context.
-            request_metadata_labels (Optional[Dict]): An override for the map of metadata labels
-                to their index in the request_metadata tensor. By default, this metadata is
-                retrieved from the request object.
-        """
-        assert backend in ["torch"]
-        context = self.inference_wrapped_model.inference_context
-
-        if request_metadata is None:
-            request_metadata = context.request_metadata[
-                context.paused_request_count : context.total_request_count, :
-            ]
-        if request_metadata_labels is None:
-            request_metadata_labels = DynamicInferenceRequest.get_metadata_labels()
-        active_request_count = request_metadata.size(0)
-
-        # Shorthand these, because the torch backend needs them.
-        temp = request_metadata[:, request_metadata_labels["temperature"]]
-        top_k = request_metadata[:, request_metadata_labels["top_k"]]
-        top_p = request_metadata[:, request_metadata_labels["top_p"]]
-
-        # Copy data into relevant tensors.
-        self.temperature_cuda[:active_request_count].copy_(temp, non_blocking=True)
-        self.top_k_cuda[:active_request_count] = top_k.to(
-            dtype=torch.int32, copy=True, non_blocking=True
-        )
-        self.top_p_cuda[:active_request_count].copy_(top_p, non_blocking=True)
-        self.termination_id_cuda[:active_request_count] = request_metadata[
-            :, request_metadata_labels["termination_id"]
-        ].to(dtype=torch.int64, copy=True, non_blocking=True)
-        self.return_log_probs_cuda[:active_request_count] = request_metadata[
-            :, request_metadata_labels["return_log_probs"]
-        ].to(dtype=torch.bool, copy=True, non_blocking=True)
-        self.skip_prompt_log_probs_cuda[:active_request_count] = request_metadata[
-            :, request_metadata_labels["skip_prompt_log_probs"]
-        ].to(dtype=torch.bool, copy=True, non_blocking=True)
-
-        if backend == "torch":
-            # Bucketize the core sampling parameters.
-            core_params = torch.stack((temp, top_k, top_p), dim=1)
-            _, inv_indices, cnts = torch.unique(
-                core_params, dim=0, return_inverse=True, return_counts=True
-            )
-            order = torch.argsort(inv_indices, stable=True)
-            sampling_buckets = torch.split(order, cnts.tolist())
-            # Perform the D2H sync needed by `_torch_sampling_func` here.
-            group_reps = torch.stack([indices[0] for indices in sampling_buckets], dim=0)
-            core_params_reps = core_params[group_reps].detach().cpu()
-            temp_reps = core_params_reps[:, 0].tolist()
-            top_k_reps = core_params_reps[:, 1].to(torch.int32).tolist()
-            top_p_reps = core_params_reps[:, 2].tolist()
-            # Store the buckets and their equivalence class representatives.
-            self.torch_sampling_buckets = (
-                (sampling_buckets[idx], temp_reps[idx], top_k_reps[idx], top_p_reps[idx])
-                for idx in range(len(sampling_buckets))
-            )
-
-    def _dynamic_step_sample_logits(self, logits: Tensor, backend: str = "torch") -> Tensor:
-        """Sample tokens from logits for dynamic batching.
-
-        Args:
-            logits (Tensor): The logits to sample from.
-            backend (str): The sampling backend to use.
+            logits (Tensor): The logits from the forward step.
+            active_sampling_map (List[Tuple[SamplingParams, List[int]]]): A list of tuples
+                matching each unique set of sampling params to the context array indices
+                of the corresponding active requests.
 
         Returns:
-            new_sample (Tensor): The sampled tokens.
+            new_sample (Tensor): The sampled tokens for each active request.
+            termination_id (int): The termination token IDs of each active request.
         """
-        # TODO(ksanthanam): Evaluate whether it makes more sense to sample on 1 rank
-        # and then broadcast the sampled tokens rather than broadcasting the raw logits.
-        assert backend in ["torch"]
-
         context = self.inference_wrapped_model.inference_context
         materialize_only_last_token_logits = context.materialize_only_last_token_logits
 
+        inference_wrapper_config = self.inference_wrapped_model.inference_wrapper_config
+
         # Last token logits.
         if materialize_only_last_token_logits:
             # When materialize_only_last_token_logits is true, last_token_logits is
@@ -669,72 +599,60 @@ def _dynamic_step_sample_logits(self, logits: Tensor, backend: str = "torch") ->
             last_token_logits = logits.squeeze(0)
         else:
             last_token_logits = context.last_token_logits(logits)
-        active_request_count = last_token_logits.size(0)
-        # Copy last_token_logits to contiguous buffer.
-        self.sampling_logits_cuda[:active_request_count].copy_(last_token_logits, non_blocking=True)
-
-        if backend == "torch":
-            # Concatenate the outputs once to prevent repeated small writes.
-            token_list = []
-            indices_list = []
-
-            for indices, temp, top_k, top_p in self.torch_sampling_buckets:
-                token_list.append(
-                    self._torch_sampling_func(
-                        self.sampling_logits_cuda[indices, :], temp, top_k, top_p
-                    )
-                )
-                indices_list.append(indices)
 
-            # Single write to the output tensor.
-            sampled_tokens = torch.cat(token_list, dim=0)
-            sampled_indices = torch.cat(indices_list, dim=0)
-            self.sampled_tokens_cuda.index_copy_(0, sampled_indices, sampled_tokens)
-        return self.sampled_tokens_cuda[:active_request_count].clone()
+        # Sample.
+        # Use padded vocab size because tokenizer vocab size might not include padding
+        # to nearest power of 2.
+        vocab_size = inference_wrapper_config.padded_vocab_size
+        new_sample, termination_id = self.sample_from_dynamic_logits(
+            last_token_logits, active_sampling_map, vocab_size=vocab_size
+        )
+        return new_sample, termination_id
 
-    def _dynamic_step_log_probs_bookkeeping(self) -> bool:
+    def _dynamic_step_log_probs_bookkeeping(self):
         """Perform bookkeeping necessary to compute log probs for dynamic batching."""
-        context = self.inference_wrapped_model.inference_context
-        materialize_only_last_token_logits = context.materialize_only_last_token_logits
+        pass
 
-        active_request_count = context.total_request_count - context.paused_request_count
-
-        to_check = self.return_log_probs_cuda[:active_request_count]
-        to_check &= ~self.skip_prompt_log_probs_cuda[:active_request_count]
-
-        assert not (
-            to_check.any() and materialize_only_last_token_logits
-        ), "Prompt log probs cannot be calculated if only last token logits are materialized."
-
-        return self.return_log_probs_cuda[:active_request_count].any()
-
-    def _dynamic_step_calculate_log_probs(self, logits: Tensor) -> Optional[Tensor]:
-        """Calculate log probs from logits."""
+    def _dynamic_step_calculate_log_probs(
+        self,
+        logits: Tensor,
+        new_sample: Tensor,
+        active_sampling_map: List[Tuple[SamplingParams, List[int]]],
+    ) -> Optional[Tensor]:
         context = self.inference_wrapped_model.inference_context
         materialize_only_last_token_logits = context.materialize_only_last_token_logits
 
-        active_request_count = context.total_request_count - context.paused_request_count
+        log_probs = None
+        return_log_probs = False
+        for sampling_params, mask in active_sampling_map:
+            if sampling_params.return_log_probs:
+                assert (
+                    sampling_params.skip_prompt_log_probs
+                    or materialize_only_last_token_logits is False
+                ), "Materialize only last token logits must be false for returning log probs"
+                return_log_probs = True
 
-        ret = context.calculate_log_probs(
-            logits,
-            self.sampled_tokens_cuda[:active_request_count],
-            only_last_token_logits=materialize_only_last_token_logits,
-        )
-        return ret
+        if return_log_probs:
+            log_probs = context.calculate_log_probs(
+                logits, new_sample, only_last_token_logits=materialize_only_last_token_logits
+            )
 
-    def _dynamic_step_context_bookkeeping(self, new_sample) -> Dict[str, Tensor]:
+        return log_probs
+
+    def _dynamic_step_context_bookkeeping(
+        self, new_sample: Tensor, termination_id: int
+    ) -> Tuple[Tensor, Tensor, Tensor]:
         """Update the dynamic inference context after sampling.
 
+        Args:
+            new_sample (Tensor): The newly sampled tokens for each active request.
+            termination_id (int): The token ID that indicates termination.
+
         Return:
-            Dict [str, Tensor]: A dictionary containing:
-                active_request_ids (Tensor): Current active request IDs.
-                newly_paused_request_ids (Tensor): Newly paused request IDs.
-                finished_request_ids (Tensor): Finished request IDs.
+            Tuple[Tensor, Tensor, Tensor]: active / paused / finished request IDs.
         """
         context = self.inference_wrapped_model.inference_context
 
-        active_request_count = context.total_request_count - context.paused_request_count
-
         # Active sequence lengths.
         active_request_ids = context.request_ids[
             context.paused_request_count : context.total_request_count
@@ -745,10 +663,9 @@ def _dynamic_step_context_bookkeeping(self, new_sample) -> Dict[str, Tensor]:
 
         # Request finished if termination_id or length >= max_sequence_length.
         # Note: termination_id tensor has per-request termination IDs from mixed sampling
-        active_request_mask = (
-            self.sampled_tokens_cuda[:active_request_count]
-            != self.termination_id_cuda[:active_request_count]
-        ).byte() & torch.less(active_sequence_lengths, max_sequence_lengths).byte()
+        active_request_mask = (new_sample != termination_id).byte() & torch.less(
+            active_sequence_lengths, max_sequence_lengths
+        ).byte()
         finished_idxs = (
             torch.nonzero(active_request_mask == 0, as_tuple=True)[0] + context.paused_request_count
         )
@@ -768,11 +685,16 @@ def _dynamic_step_context_bookkeeping(self, new_sample) -> Dict[str, Tensor]:
 
     @torch.inference_mode()
     async def async_generate_output_tokens_dynamic_batch(
-        self, skip_bookkeeping: Optional[bool] = False
+        self,
+        active_sampling_map: List[Tuple[SamplingParams, List[int]]],
+        skip_bookkeeping: Optional[bool] = False,
     ) -> Optional[Dict]:
         """Forward step the model and update the inference context.
 
         Args:
+            active_sampling_map (List[Tuple[SamplingParams, List[int]]]): A list of tuples
+                matching each unique set of sampling params to the context array indices
+                of the corresponding active requests.
             skip_bookkeeping (Optional[bool]): If true, skip the context bookkeeping step.
 
         Return:
@@ -793,12 +715,13 @@ async def async_generate_output_tokens_dynamic_batch(
         if context.active_token_count == 0:
             return None
 
+        # This method only performs computations using CPU tensors.
         input_ids, position_ids = self._dynamic_step_context_init()
-
         cuda_graph_request_count = (
             context.padded_active_request_count if context.is_decode_only() else None
         )
 
+        # This method only performs computations using GPU tensors.
         logits = self._dynamic_step_forward_logits(input_ids, position_ids)
 
         # This is the best place to yield control back to event loop.
@@ -810,35 +733,41 @@ async def async_generate_output_tokens_dynamic_batch(
         # NOTE [TDE]: This will be moved once CPU and GPU methods are separated.
         await asyncio.sleep(0)
 
-        self._dynamic_step_sample_bookkeeping()
-        new_sample = self._dynamic_step_sample_logits(logits)
+        # This method will only perform computations using CPU tensors in the future.
+        self._dynamic_step_sample_bookkeeping(active_sampling_map)
+        # This method will only perform computations using GPU tensors in the future.
+        new_sample, termination_id = self._dynamic_step_sample_logits(logits, active_sampling_map)
 
-        return_log_probs = self._dynamic_step_log_probs_bookkeeping()
-        if return_log_probs:
-            log_probs = self._dynamic_step_calculate_log_probs(logits)
-        else:
-            log_probs = None
+        # This method will only perform computations using CPU tensors in the future.
+        self._dynamic_step_log_probs_bookkeeping()
+        # This method will only perform computations using GPU tensors in the future.
+        log_probs = self._dynamic_step_calculate_log_probs(logits, new_sample, active_sampling_map)
 
+        # This method only performs computations using CPU tensors.
         if skip_bookkeeping:
-            request_bookkeeping = {}
+            request_bookeeping = {}
         else:
-            request_bookkeeping = self._dynamic_step_context_bookkeeping(new_sample)
+            request_bookeeping = self._dynamic_step_context_bookkeeping(new_sample, termination_id)
 
         ret = {
             "sample": new_sample,
             "log_probs": log_probs,
             "cuda_graph_request_count": cuda_graph_request_count,
         }
-        ret.update(request_bookkeeping)
+        ret.update(request_bookeeping)
         return ret
 
     @torch.inference_mode()
     def generate_output_tokens_dynamic_batch(
-        self, loop: Optional[asyncio.AbstractEventLoop] = None
+        self,
+        active_sampling_map: List[Tuple[SamplingParams, List[int]]],
+        loop: Optional[asyncio.AbstractEventLoop] = None,
     ) -> Optional[Dict]:
         """Synchronous wrapper for `self.async_generate_output_tokens_dynamic_batch."""
         loop = get_asyncio_loop(loop)
-        return loop.run_until_complete(self.async_generate_output_tokens_dynamic_batch())
+        return loop.run_until_complete(
+            self.async_generate_output_tokens_dynamic_batch(active_sampling_map)
+        )
 
     def _update_top_n_logprobs_dict(
         self,
diff --git a/megatron/core/inference/unified_memory.py b/megatron/core/inference/unified_memory.py
index e06e3022561..6e5e85ed668 100644
--- a/megatron/core/inference/unified_memory.py
+++ b/megatron/core/inference/unified_memory.py
@@ -56,9 +56,9 @@ def compile_allocator():
 
     EXPORT void* managed_malloc(size_t size, int device, void* stream) {
       (void)stream;
-      int prev_device = -1;
-      cudaGetDevice(&prev_device);
-      if (device != prev_device && device >= 0) cudaSetDevice(device);
+      int cur = -1;
+      cudaGetDevice(&cur);
+      if (device != cur && device >= 0) cudaSetDevice(device);
 
       // cudaMallocManaged allows for more memory to be allocated than the device memory size.
       // The cudaMemAttachGlobal flag makes the memory accessible from both host and device.
@@ -69,32 +69,13 @@ def compile_allocator():
       if (device >= 0) {
         // cudaMemAdviseSetPreferredLocation sets the preferred location for the memory.
         // This is a hint that tries to prevent data from being migrated away from the device.
-
-        #if CUDART_VERSION >= 13000
-          // For CUDA >= 13, the cudaMemAdvise device arg is type cudaMemLocation
-          // instead of an int, so we setup the location and conditionally use it
-          // in calls to cudaMemAdvise.
-          cudaMemLocation location;
-          location.type = cudaMemLocationTypeDevice;
-          location.id = device;
-
-          cudaMemAdvise(ptr, (size_t)size, cudaMemAdviseSetPreferredLocation, location);
-
-          // cudaMemAdviseSetAccessedBy ensures the memory always lives in the device's page table.
-          // Even if the memory has to be migrated away from the device, it still does not page fault.
-          // The CUDA docs claim that cudaMemAdviseSetPreferredLocation completely overrides this flag,
-          // but there is no harm in adding this flag as well for future-proofing.
-          cudaMemAdvise(ptr, (size_t)size, cudaMemAdviseSetAccessedBy, location);
-        #else
-          cudaMemAdvise(ptr, (size_t)size, cudaMemAdviseSetPreferredLocation, device);
-          // cudaMemAdviseSetAccessedBy ensures the memory always lives in the device's page table.
-          // Even if the memory has to be migrated away from the device, it still does not page fault.
-          // The CUDA docs claim that cudaMemAdviseSetPreferredLocation completely overrides this flag,
-          // but there is no harm in adding this flag as well for future-proofing.
-          cudaMemAdvise(ptr, (size_t)size, cudaMemAdviseSetAccessedBy, device);
-        #endif
+        cudaMemAdvise(ptr, (size_t)size, cudaMemAdviseSetPreferredLocation, device);
+        // cudaMemAdviseSetAccessedBy ensures the memory always lives in the device's page table.
+        // Even if the memory has to be migrated away from the device, it still does not page fault.
+        // The CUDA docs claim that cudaMemAdviseSetPreferredLocation completely overrides this flag,
+        // but there is no harm in adding this flag as well for future-proofing.
+        cudaMemAdvise(ptr, (size_t)size, cudaMemAdviseSetAccessedBy, device);
       }
-      if (device != prev_device && prev_device >= 0) cudaSetDevice(prev_device);
       return ptr;
     }
 
@@ -119,29 +100,13 @@ def compile_allocator():
                 functions=[],
                 with_cuda=True,
                 extra_ldflags=_extra_ldflags,
-                verbose=True,
+                verbose=False,
             )
             _so_path = Path(_mod.__file__).as_posix()
             _alloc = CUDAPluggableAllocator(_so_path, "managed_malloc", "managed_free").allocator()
             _compilation_state = CompilationState.SUCCESS
-        except (RuntimeError, ImportError, OSError) as e:
-            warnings.warn(f"Failed to create unified memory mempool: '{e}'.")
-            _compilation_state = CompilationState.FAILURE
-
-        # Synchronize failure state across ranks. (For currently unknown reasons,
-        # one rank can show as FAILURE while the remaining ranks show as SUCCESS.)
-        import torch
-
-        local_state = torch.tensor(
-            [_compilation_state.value], dtype=torch.uint8, device=torch.cuda.current_device()
-        )
-        world_states = [
-            torch.empty(1, dtype=torch.uint8, device=torch.cuda.current_device())
-            for _ in range(torch.distributed.get_world_size())
-        ]
-        torch.distributed.all_gather(world_states, local_state)
-        world_states = set(s.item() for s in world_states)
-        if CompilationState.FAILURE.value in world_states:
+        except (RuntimeError, ImportError, OSError):
+            warnings.warn("Failed to create unified memory mempool.")
             _compilation_state = CompilationState.FAILURE
 
 
diff --git a/megatron/core/inference/utils.py b/megatron/core/inference/utils.py
index 55536a52088..d58f3c3a652 100644
--- a/megatron/core/inference/utils.py
+++ b/megatron/core/inference/utils.py
@@ -2,7 +2,6 @@
 
 import asyncio
 import multiprocessing
-import sys
 
 import torch
 
@@ -162,57 +161,3 @@ async def await_process_event(
             raise RuntimeError(
                 f"Process {process.name} (pid {process.pid}) has exited unexpectedly."
             )
-
-
-# Compatibility for Python < 3.13 asyncio Queue functionality.
-# This is necessary because asyncio Queues are broken in Python < 3.13.
-if sys.version_info < (3, 13):
-
-    _SHUTDOWN_SENTINEL = object()
-
-    class asyncio_QueueShutDown(Exception):
-        """Compatibility exception for Python < 3.13."""
-
-        pass
-
-    class asyncio_Queue(asyncio.Queue):
-        """An asyncio.Queue with Python 3.13 compatibility features for Python < 3.13."""
-
-        def __init__(self, maxsize: int = 0):
-            super().__init__(maxsize)
-            self._is_shutdown = False
-
-        async def get(self):
-            """Get an item from the queue with Python < 3.13 compatibility."""
-            if self._is_shutdown and self.empty():
-                raise asyncio_QueueShutDown
-            ret = await super().get()
-            if ret is _SHUTDOWN_SENTINEL:
-                super().put_nowait(_SHUTDOWN_SENTINEL)
-                super().task_done()
-                raise asyncio_QueueShutDown
-            return ret
-
-        def put_nowait(self, item):
-            """Put an item into the queue without blocking"""
-            if self._is_shutdown:
-                raise asyncio_QueueShutDown
-            if item is _SHUTDOWN_SENTINEL:
-                raise ValueError(f"{item} is reserved for shutdown purposes for Python < 3.13")
-            super().put_nowait(item)
-
-        def shutdown(self):
-            """Shutdown the queue for Python < 3.13.
-
-            Note that the listening side of the queue can continue to get old data
-            off the queue even after it has already been shutdown. The listener only
-            shutdowns when the queue is BOTH shutdown AND empty.
-            """
-            if not self._is_shutdown:
-                super().put_nowait(_SHUTDOWN_SENTINEL)
-                super().task_done()
-                self._is_shutdown = True
-
-else:
-    asyncio_QueueShutDown = asyncio.QueueShutDown
-    asyncio_Queue = asyncio.Queue
diff --git a/megatron/core/models/backends.py b/megatron/core/models/backends.py
index 29169285b3e..abda7c47787 100644
--- a/megatron/core/models/backends.py
+++ b/megatron/core/models/backends.py
@@ -22,19 +22,6 @@
     LNImpl = WrappedTorchNorm
     HAVE_APEX = False
 
-from megatron.core.extensions.transformer_engine import (
-    TEActivationOp,
-    TEColumnParallelLinear,
-    TEDotProductAttention,
-    TELinear,
-    TENorm,
-)
-from megatron.core.tensor_parallel.inference_layers import (
-    InferenceLayerNormColumnParallelLinear,
-    InferenceRowParallelLinear,
-)
-from megatron.core.utils import is_te_min_version
-
 
 class BackendSpecProvider(Protocol):
     """A protocol for providing the submodules used in Spec building."""
@@ -132,51 +119,3 @@ def grouped_mlp_modules(
     def activation_func(self) -> type:
         """Which module to use for activation function"""
         return None
-
-
-class InferenceSpecProvider(BackendSpecProvider):
-    """A protocol for providing the submodules used in Spec building."""
-
-    def linear(self) -> type:
-        """Which linear module TE backend uses"""
-        return TELinear
-
-    def column_parallel_linear(self) -> type:
-        """Which column parallel linear module TE backend uses"""
-        return TEColumnParallelLinear
-
-    def row_parallel_linear(self) -> type:
-        """Which row parallel linear module TE backend uses"""
-        return InferenceRowParallelLinear
-
-    def fuse_layernorm_and_linear(self) -> bool:
-        """TE backend chooses a single module for layernorm and linear"""
-        return True
-
-    def column_parallel_layer_norm_linear(self) -> Optional[type]:
-        """Which module for sequential layernorm and linear"""
-        return InferenceLayerNormColumnParallelLinear
-
-    def layer_norm(self, rms_norm: bool = False, for_qk: bool = False) -> type:
-        """Which module to use for layer norm"""
-        if for_qk and not is_te_min_version("1.9.0"):
-            # TENorm significantly harms convergence when used
-            # for QKLayerNorm if TE Version < 1.9;
-            # we instead use the Apex implementation.
-            return FusedLayerNorm
-        return TENorm
-
-    def core_attention(self) -> type:
-        """Which module to use for attention"""
-        return TEDotProductAttention
-
-    def activation_func(self) -> type:
-        """Which module to use for activation function"""
-        return TEActivationOp
-
-    def grouped_mlp_modules(
-        self, moe_use_grouped_gemm: bool, moe_use_legacy_grouped_gemm: bool
-    ) -> Tuple[type, Optional[MLPSubmodules]]:
-        raise NotImplementedError(
-            "MOE is not supported with inference optimized transformer implementation."
-        )
diff --git a/megatron/core/models/gpt/gpt_layer_specs.py b/megatron/core/models/gpt/gpt_layer_specs.py
index 7405150c4b3..c5c9caa3d67 100755
--- a/megatron/core/models/gpt/gpt_layer_specs.py
+++ b/megatron/core/models/gpt/gpt_layer_specs.py
@@ -4,11 +4,7 @@
 from typing import Optional, Union
 
 from megatron.core.fusions.fused_bias_dropout import get_bias_dropout_add
-from megatron.core.models.backends import (
-    BackendSpecProvider,
-    InferenceSpecProvider,
-    LocalSpecProvider,
-)
+from megatron.core.models.backends import BackendSpecProvider, LocalSpecProvider
 from megatron.core.models.gpt.linear_attention_module_specs import (
     get_linear_attention_module_spec_for_backend,
 )
@@ -77,102 +73,6 @@
     HAVE_APEX = False
 
 
-def get_gpt_layer_with_inference_spec(
-    qk_layernorm: Optional[bool] = False,
-    multi_latent_attention: Optional[bool] = False,
-    qk_l2_norm: Optional[bool] = False,
-) -> ModuleSpec:
-    """Use this spec to use inference optimized linear layers.
-    Args:
-        qk_layernorm (bool, optional): To use layernorm for queries/keys. Defaults to False.
-        multi_latent_attention (bool, optional): To use MLA. Defaults to False.
-        qk_l2_norm (bool, optional): To use l2 norm for queries/keys. Defaults to False.
-    """
-    assert HAVE_TE, "--transformer-impl inference_optimized requires transformer engine"
-    backend = InferenceSpecProvider()
-
-    mlp = get_mlp_module_spec_for_backend(
-        backend=backend,
-        num_experts=None,
-        moe_grouped_gemm=False,
-        moe_use_legacy_grouped_gemm=False,
-        use_te_op_fuser=False,
-        use_te_activation_func=False,
-    )
-
-    if multi_latent_attention:
-        assert qk_l2_norm is False, "qk_l2_norm is not supported with MLA."
-        linear_q_up_proj = (
-            backend.column_parallel_layer_norm_linear()
-            if qk_layernorm
-            else backend.column_parallel_linear()
-        )
-        linear_kv_up_proj = (
-            backend.column_parallel_layer_norm_linear()
-            if qk_layernorm
-            else backend.column_parallel_linear()
-        )
-        return ModuleSpec(
-            module=TransformerLayer,
-            submodules=TransformerLayerSubmodules(
-                input_layernorm=backend.layer_norm(),
-                self_attention=ModuleSpec(
-                    module=MLASelfAttention,
-                    params={"attn_mask_type": AttnMaskType.causal},
-                    submodules=MLASelfAttentionSubmodules(
-                        linear_q_proj=backend.column_parallel_linear(),
-                        linear_q_down_proj=backend.linear(),
-                        linear_q_up_proj=linear_q_up_proj,
-                        linear_kv_down_proj=backend.linear(),
-                        linear_kv_up_proj=linear_kv_up_proj,
-                        core_attention=backend.core_attention(),
-                        linear_proj=backend.row_parallel_linear(),
-                        q_layernorm=IdentityOp,
-                        kv_layernorm=IdentityOp,
-                    ),
-                ),
-                self_attn_bda=get_bias_dropout_add,
-                pre_mlp_layernorm=IdentityOp,
-                mlp=mlp,
-                mlp_bda=get_bias_dropout_add,
-            ),
-        )
-    else:
-        qk_norm = backend.layer_norm(for_qk=True)
-        return ModuleSpec(
-            module=TransformerLayer,
-            submodules=TransformerLayerSubmodules(
-                self_attention=ModuleSpec(
-                    module=SelfAttention,
-                    params={"attn_mask_type": AttnMaskType.causal},
-                    submodules=SelfAttentionSubmodules(
-                        linear_qkv=backend.column_parallel_layer_norm_linear(),
-                        core_attention=backend.core_attention(),
-                        linear_proj=backend.row_parallel_linear(),
-                        q_layernorm=(
-                            L2Norm if qk_l2_norm else (qk_norm if qk_layernorm else IdentityOp)
-                        ),
-                        k_layernorm=(
-                            L2Norm if qk_l2_norm else (qk_norm if qk_layernorm else IdentityOp)
-                        ),
-                    ),
-                ),
-                self_attn_bda=get_bias_dropout_add,
-                pre_mlp_layernorm=IdentityOp,
-                mlp=mlp,
-                mlp_bda=get_bias_dropout_add,
-                sharded_state_dict_keys_map={
-                    "mlp.0.weight": "mlp.linear_fc1.layer_norm_weight",
-                    "mlp.0.bias": "mlp.linear_fc1.layer_norm_bias",
-                    "mlp.1.basic_ops.0.weight": "mlp.linear_fc1.weight",
-                    "mlp.1.basic_ops.1.bias": "mlp.linear_fc1.bias",
-                    "mlp.3.basic_ops.0.weight": "mlp.linear_fc2.weight",
-                    "mlp.3.basic_ops.1.bias": "mlp.linear_fc2.bias",
-                },
-            ),
-        )
-
-
 def get_gpt_layer_with_transformer_engine_spec(
     num_experts: Optional[int] = None,
     moe_grouped_gemm: Optional[bool] = False,
diff --git a/megatron/core/models/gpt/moe_module_specs.py b/megatron/core/models/gpt/moe_module_specs.py
index 62ee4537cfc..1de0f14efcd 100755
--- a/megatron/core/models/gpt/moe_module_specs.py
+++ b/megatron/core/models/gpt/moe_module_specs.py
@@ -2,13 +2,21 @@
 
 from typing import Optional
 
-from megatron.core.extensions.transformer_engine_spec_provider import TESpecProvider
 from megatron.core.models.backends import BackendSpecProvider, LocalSpecProvider
 from megatron.core.transformer.mlp import MLPSubmodules
 from megatron.core.transformer.moe.moe_layer import MoELayer, MoESubmodules
 from megatron.core.transformer.moe.shared_experts import SharedExpertMLP
 from megatron.core.transformer.spec_utils import ModuleSpec
 
+try:
+    import transformer_engine as te  # pylint: disable=unused-import
+
+    from megatron.core.extensions.transformer_engine_spec_provider import TESpecProvider
+
+    HAVE_TE = True
+except ImportError:
+    HAVE_TE = False
+
 
 def get_moe_module_spec(
     use_te: Optional[bool] = True,
diff --git a/megatron/core/models/mamba/mamba_layer_specs.py b/megatron/core/models/mamba/mamba_layer_specs.py
index bfe38c2bbc8..8ef4a2ab3e4 100755
--- a/megatron/core/models/mamba/mamba_layer_specs.py
+++ b/megatron/core/models/mamba/mamba_layer_specs.py
@@ -3,11 +3,9 @@
 from megatron.core.extensions.transformer_engine import (
     TEDotProductAttention,
     TELayerNormColumnParallelLinear,
-    TENorm,
     TERowParallelLinear,
 )
 from megatron.core.fusions.fused_bias_dropout import get_bias_dropout_add
-from megatron.core.models.gpt.moe_module_specs import get_moe_module_spec
 from megatron.core.ssm.mamba_block import MambaStack, MambaStackSubmodules
 from megatron.core.ssm.mamba_layer import MambaLayer, MambaLayerSubmodules
 from megatron.core.ssm.mamba_mixer import MambaMixer, MambaMixerSubmodules
@@ -18,13 +16,6 @@
 from megatron.core.transformer.spec_utils import ModuleSpec
 from megatron.core.transformer.transformer_layer import TransformerLayer, TransformerLayerSubmodules
 
-moe = get_moe_module_spec(
-    use_te=True,
-    num_experts=8,  # Can be any positive integer (must not be None).
-    moe_grouped_gemm=True,
-    moe_use_legacy_grouped_gemm=False,
-)
-
 mamba_stack_spec = ModuleSpec(
     module=MambaStack,
     submodules=MambaStackSubmodules(
@@ -73,12 +64,5 @@
                 mlp_bda=get_bias_dropout_add,
             ),
         ),
-        moe_layer=ModuleSpec(
-            # TODO (rwaleffe): change this to be an "MoELayer" to work with CudaGraphs?
-            module=TransformerLayer,
-            submodules=TransformerLayerSubmodules(
-                pre_mlp_layernorm=TENorm, mlp=moe, mlp_bda=get_bias_dropout_add
-            ),
-        ),
     ),
 )
diff --git a/megatron/core/optimizer/__init__.py b/megatron/core/optimizer/__init__.py
index 061cb25f5b8..c254b2f6882 100644
--- a/megatron/core/optimizer/__init__.py
+++ b/megatron/core/optimizer/__init__.py
@@ -1,9 +1,7 @@
 # Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
-import copy
 import logging
 import warnings
-from dataclasses import astuple
-from typing import Callable, Dict, List, Optional, Tuple, Union
+from typing import Callable, Dict, List, Optional, Tuple
 
 import torch
 from torch.optim import SGD as CPUSGD
@@ -50,114 +48,100 @@
     MegatronOptimizer,
     param_group_identifier_keys,
 )
-from .optimizer_config import AdamOptimizerConfig, OptimizerConfig, ParamKey, SGDOptimizerConfig
+from .optimizer_config import OptimizerConfig
 
 logger = logging.getLogger(__name__)
 
 
-def _matches(param: torch.nn.Parameter, param_name: str, param_key: ParamKey) -> bool:
-    """Returns true if passed-in parameter (with name) matches `param_key`.
-
-    Args:
-        param (torch.nn.Parameter): Handle to parameter object.
-        param_name (str): Name of parameter in underlying PyTorch module.
-        param_key (ParamKey): ParamKey object.
-
-    Returns:
-        bool: True if parameter matches passed-in param_key.
-    """
-
-    # Check if name matches.
-    if isinstance(param_key.name, str):
-        target_names = [param_key.name]
-    else:
-        target_names = list(param_key.name)
-    for target_name in target_names:
-        if param_name in target_name:
-            return True
-
-    # Check if attribute matches.
-    if isinstance(param_key.attr, str):
-        target_attrs = [param_key.attr]
-    else:
-        target_attrs = list(param_key.attr)
-    for target_attr in target_attrs:
-        if getattr(param, target_attr, False):
-            return True
-
-    return False
-
-
 def _get_param_groups(
     model_chunks: List[MegatronModule],
-    config: OptimizerConfig,
-    config_overrides: Optional[Dict[ParamKey, OptimizerConfig]],
+    no_weight_decay_cond: Optional[Callable],
+    scale_lr_cond: Optional[Callable],
+    lr_mult: float,
+    lr: float,
+    min_lr: float,
+    decoupled_lr: Optional[float],
+    decoupled_min_lr: Optional[float],
+    default_skip_embedding_weight_decay: bool = False,
 ) -> List[Dict]:
     """Create parameter groups for optimizer.
 
-    Creates parameter groups from provided optimizer config object.
+    Creates parameter groups based on weight decay condition (regularized vs
+    non regularized), learning rate scale condition (lr vs lr_mult * lr),
+    and whether it is expert parameters. scale_lr_cond is used during finetuning
+    where head of the network requires a scaled version of the base learning rate.
 
     Args:
         model_chunks (List[MegatronModule]): model chunks to create parameter
             groups for.
-        config (OptimizerConfig): optimizer configuration object.
-        config_overrides (Optional[Dict[LayerKey, OptimizerConfig]): optimizer overrides,
-            specified on a per-layer basis.
+        no_weight_decay_cond (func, optional): function to determine whether a
+            parameter should not perform weight decay.
+        scale_lr_cond (func, optional): function to determine whether a parameter
+            should have a scaled learning rate.
+        lr_mult (float): learning rate multiplier for parameters that
+            satisfy scale_lr_cond.
+        lr (float): learning rate.
+        min_lr (float): minimum learning rate.
+        decoupled_lr (Optional[float]): optional decoupled learning rate.
+        decoupled_min_lr (Optional[float]): optional decoupled minimum learning rate.
+        default_skip_embedding_weight_decay (bool): whether to skip weight decay for embedding
+            parameters by default, if no_weight_decay_cond is not provided.
+
     Returns:
         List of parameter groups.
     """
 
-    # Map (wd_mult, is_expert_parallel, param_group_hyperparameters_config) to params.
-    params_map = {}
-    configs_map = {}
+    use_decoupled_learning_rate = decoupled_lr is not None
 
+    # Map (wd_mult, lr_mult, is_expert_parallel, is_decoupled_lr) to params.
+    params_map = {}
     for model_chunk in model_chunks:
         for name, param in model_chunk.named_parameters():
             if not param.requires_grad:
                 continue
 
-            uses_default_config = False
-            # Get optimizer config for this parameter.
-            if config_overrides is None:
-                config_for_param = config
-                uses_default_config = True
-            else:
-                config_for_param = None
-                for param_key in config_overrides:
-                    if _matches(param, name, param_key):
-                        config_for_param = config_overrides[param_key]
-                        break
-                # Fall back to default config.
-                if config_for_param is None:
-                    config_for_param = config
-                    uses_default_config = True
-
             is_expert_parallel = not getattr(param, 'allreduce', True)
 
-            # TODO: Make sure there is a way to support old no_weight_decay_func functionality
-            # and default_skip_embedding_weight_decay:
-            #     or (default_skip_embedding_weight_decay and "embedding" in name)
-            no_wd = name.endswith(".bias") or len(param.shape) == 1
-            if not no_wd:
-                wd_mult = 1.0
+            if no_weight_decay_cond is not None:
+                no_wd: bool = no_weight_decay_cond(name, param)
             else:
-                wd_mult = 0.0
-
-            # Create config_tuple that is hash-able. Remove timers object before
-            # creating config_tuple.
-            config_for_param_copy = copy.deepcopy(config_for_param)
-            config_for_param_copy.timers = None
-            config_tuple = astuple(config_for_param_copy)
-            key = (wd_mult, is_expert_parallel, config_tuple)
+                # Do not regularize biases and norm parameters.
+                #  optionally, also skip weight decay for embedding parameters if requested
+                #  (useful if you do not want embeddings to shrink to zero in training
+                #  https://arxiv.org/abs/2312.16903)
+                no_wd = (
+                    name.endswith(".bias")
+                    or len(param.shape) == 1
+                    or (default_skip_embedding_weight_decay and "embedding" in name)
+                )
+
+            if scale_lr_cond is not None:
+                scale_lr = scale_lr_cond(name, param)
+            else:
+                scale_lr = False
+
+            if not no_wd and not scale_lr:
+                wd_mult, _lr_mult = 1.0, 1.0
+            elif not no_wd and scale_lr:
+                wd_mult, _lr_mult = 1.0, lr_mult
+            elif no_wd and not scale_lr:
+                wd_mult, _lr_mult = 0.0, 1.0
+            else:
+                wd_mult, _lr_mult = 0.0, lr_mult
+
+            is_decoupled_lr = False
+            # For input/embedding and output layer: embedding.word_embeddings.weight /
+            # output_layer.weight.
+            if use_decoupled_learning_rate and getattr(
+                param, 'is_embedding_or_output_parameter', False
+            ):
+                is_decoupled_lr = True
+
+            key = (wd_mult, _lr_mult, is_expert_parallel, is_decoupled_lr)
             if key not in params_map:
                 params_map[key] = []
             params_map[key].append(param)
 
-            if key in configs_map:
-                assert (config_for_param, uses_default_config) == configs_map[key]
-            else:
-                configs_map[key] = (config_for_param, uses_default_config)
-
     # Distributed checkpoint requires all ranks to have the same param groups,
     # so we need to align the param groups across ranks, otherwise we may have
     # runtime error when loading the checkpoint or numerical error when resuming training.
@@ -171,33 +155,67 @@ def _get_param_groups(
 
     param_groups = []
     for key in params_key:
-        wd_mult, is_expert_parallel, _ = key
+        wd_mult, _lr_mult, is_expert_parallel, is_decoupled_lr = key
         params = params_map[key] if key in params_map else []
-        config, uses_default_config = None, True
-        if key not in configs_map:
-            assert params == []
-        else:
-            config, uses_default_config = configs_map[key]
-            assert config is not None
-
-        # TODO: Remove "backwards compatible" fields below eventually.
         param_group = {
             'params': params,
-            'wd_mult': wd_mult,  # For backwards compatibility.
-            'lr_mult': 1.0,  # For backwards compatibility.
+            'wd_mult': wd_mult,
+            'lr_mult': _lr_mult,
             'is_expert_parallel': is_expert_parallel,
-            'is_decoupled_lr': False,  # For backwards compatibility.
-            'default_config': uses_default_config,
+            'is_decoupled_lr': is_decoupled_lr,
         }
-
-        # Stick relevant fields into param_group from config object.
-        if config is not None:
-            param_group['max_lr'] = config.lr
-            param_group['min_lr'] = config.min_lr
-            # TODO: Add other relevant arguments (e.g., weight decay, optimizer)
-            # here as well.
+        # Ensure param_group has required keys for matching when loading optimizer state
+        # See MegatronOptimizer._filter_and_reorder_param_groups.
+        assert set(param_group.keys()) - set(param_group_identifier_keys) == {'params'}
         param_groups.append(param_group)
 
+    param_groups = _update_min_and_max_lr_in_param_groups(
+        param_groups,
+        lr=lr,
+        min_lr=min_lr,
+        decoupled_lr=decoupled_lr,
+        decoupled_min_lr=decoupled_min_lr,
+    )
+
+    return param_groups
+
+
+def _update_min_and_max_lr_in_param_groups(
+    param_groups: List[Dict],
+    lr: float,
+    min_lr: float,
+    decoupled_lr: Optional[float],
+    decoupled_min_lr: Optional[float],
+) -> List[Dict]:
+    """
+    Updates `max_lr` and `min_lr` values in each parameter group, and returns new list.
+    By default, each group will use `lr` / `min_lr` as `max_lr` / `min_lr`.
+    If `decoupled_lr` is provided, then `decoupled_lr` / `decoupled_min_lr` will be used
+    as `max_lr` / `min_lr` for the input and output layer.
+
+    Args:
+        param_groups (List): parameter groups whose 'max_lr' and `min_lr` fields need to
+            be adjusted.
+        lr (float): learning rate.
+        min_lr (float): minimum learning rate.
+        decoupled_lr (Optional[float]): optional decoupled learning rate.
+        decoupled_min_lr (Optional[float]): optional decoupled minimum learning rate.
+
+    Returns:
+        List of adjusted parameter groups.
+    """
+
+    if decoupled_min_lr is None:
+        decoupled_min_lr = min_lr
+
+    for param_group in param_groups:
+        if param_group['is_decoupled_lr']:
+            assert decoupled_lr is not None
+            param_group['max_lr'] = decoupled_lr
+            param_group['min_lr'] = decoupled_min_lr
+        else:
+            param_group['max_lr'] = lr
+            param_group['min_lr'] = min_lr
     return param_groups
 
 
@@ -205,9 +223,12 @@ def _get_param_groups_and_buffers(
     model_chunks: List[MegatronModule],
     model_chunk_offset: int,
     config: OptimizerConfig,
-    config_overrides: Optional[Dict[ParamKey, OptimizerConfig]],
+    no_weight_decay_cond: Optional[Callable],
+    scale_lr_cond: Optional[Callable],
+    lr_mult: float,
     filter_fn: Callable,
     buffer_name: str,
+    default_skip_embedding_weight_decay: bool = False,
 ) -> Tuple[List[Dict], Dict[int, List[_ParamAndGradBuffer]]]:
     """Returns parameter groups and buffer for optimizer.
 
@@ -216,17 +237,33 @@ def _get_param_groups_and_buffers(
             groups for.
         model_chunk_offset (int): offset of model_chunks in global model_chunks list.
         config (OptimizerConfig): optimizer configuration object.
-        config_overrides (Optional[Dict[LayerKey, OptimizerConfig]): optimizer overrides,
-            specified on a per-layer basis.
+        no_weight_decay_cond (func, optional): function to determine whether a
+            parameter should not perform weight decay.
+        scale_lr_cond (func, optional): function to determine whether a parameter
+            should have a scaled learning rate.
+        lr_mult (float): learning rate multiplier for parameters that
+            satisfy scale_lr_cond.
         lr (float): learning rate.
         min_lr (float): minimum learning rate.
         filter_fn (callable): filtering function for param_groups.
         buffer_name (str): name of buffer.
+        default_skip_embedding_weight_decay (bool): whether to skip weight decay for
+            embedding parameters by default, if no_weight_decay_cond is not provided.
 
     Returns:
         List of parameter groups and dictionary of model chunk IDs to buffers.
     """
-    param_groups = _get_param_groups(model_chunks, config, config_overrides)
+    param_groups = _get_param_groups(
+        model_chunks,
+        no_weight_decay_cond,
+        scale_lr_cond,
+        lr_mult,
+        lr=config.lr,
+        min_lr=config.min_lr,
+        decoupled_lr=config.decoupled_lr,
+        decoupled_min_lr=config.decoupled_min_lr,
+        default_skip_embedding_weight_decay=default_skip_embedding_weight_decay,
+    )
     param_groups = list(filter(filter_fn, param_groups))
     buffers = {}
     for model_chunk_idx, model_chunk in enumerate(model_chunks):
@@ -267,12 +304,9 @@ def _get_megatron_optimizer_based_on_param_groups(
     Returns:
         Instance of MegatronOptimizer.
     """
-    # TODO: Logic needs to be updated to handle different optimizer types (i.e., param_groups
-    # passed into this function need to correspond to the same optimizer).
-
-    # When freezing sub-models we may have no trainable parameters on a rank and
+    # when freezing sub-models we may have no trainable parameters on a rank and
     # hence an empty param_groups. However, we still need to create an optimizer
-    # for the purposes of grad stats reductions.
+    # for the purposes of grad stats reductions
     if param_groups:
         if config.optimizer_cpu_offload:
             if torch.__version__ < '2.3.0':
@@ -442,8 +476,11 @@ def init_state_fn(opt, config=None):
 def get_megatron_optimizer(
     config: OptimizerConfig,
     model_chunks: List[MegatronModule],
-    config_overrides: Optional[Dict[ParamKey, OptimizerConfig]] = None,
+    no_weight_decay_cond: Optional[Callable] = None,
+    scale_lr_cond: Optional[Callable] = None,
+    lr_mult: float = 1.0,
     use_gloo_process_groups: bool = True,
+    default_skip_embedding_weight_decay: bool = False,
     pg_collection: Optional[ProcessGroupCollection] = None,
     dump_param_to_param_group_map: Optional[str] = None,
 ) -> MegatronOptimizer:
@@ -454,11 +491,18 @@ def get_megatron_optimizer(
     Args:
         config (OptimizerConfig): optimizer configuration object.
         model_chunks (List[MegatronModule]): model chunks to get optimizer for.
-        config_overrides (Optional[Dict[ParamKey, OptimizerConfig]]): optional dictionary of
-            optimizer configuration objects to override default optimizer behavior for different
-            subsets of parameters (identified by ParamKey).
+        no_weight_decay_cond (func, optional): function to determine whether a parameter
+            should not perform weight decay. Defaults to None.
+        scale_lr_cond (func, optional): function to determine whether a parameter
+            should have a scaled learning rate. Defaults to None.
+        lr_mult (float, optional): learning rate multiplier for parameters that
+            satisfy scale_lr_cond. Defaults to 1.0.
         use_gloo_process_groups (bool): if false, disable use of Gloo process groups
             in underlying Megatron optimizers.
+        default_skip_embedding_weight_decay (bool): whether to skip weight decay for
+            embedding parameters by default, if no_weight_decay_cond is not provided.
+            This is useful if you do not want embeddings to shrink to zero in training
+            as recommended in https://arxiv.org/abs/2312.16903
         pg_collection: Optional unified process group for distributed training.
         dump_param_to_param_group_map (Optional[str]): path to dump parameter to param group map.
 
@@ -468,20 +512,6 @@ def get_megatron_optimizer(
 
     log_single_rank(logger, logging.INFO, f'Setting up optimizer with config {config}')
 
-    # TODO: Remove `optimizer` from this eventually (e.g., if we use Muon for some layers and
-    # Adam for other layers). This would need some more refactoring to work though (param_groups
-    # filtered by optimizer passed into _get_megatron_optimizer_based_on_param_groups).
-    fields_to_check_for_consistency = [
-        'overlap_param_gather_with_optimizer_step',
-        'optimizer',
-        'optimizer_cpu_offload',
-    ]
-    for field_name in fields_to_check_for_consistency:
-        field = getattr(config, field_name, None)
-        if config_overrides is not None:
-            all_configs = list(config_overrides.values())
-            assert all([getattr(x, field_name, None) == field for x in all_configs])
-
     # Separate out first model chunk if overlapping param AG with optimizer step.
     if config.overlap_param_gather_with_optimizer_step:
         all_dense_model_chunks = [[model_chunks[0]], model_chunks[1:]]
@@ -523,14 +553,17 @@ def get_megatron_optimizer(
                 model_chunk,
                 model_chunk_offset=model_chunk_offset,
                 config=config,
-                config_overrides=config_overrides,
+                no_weight_decay_cond=no_weight_decay_cond,
+                scale_lr_cond=scale_lr_cond,
+                lr_mult=lr_mult,
                 filter_fn=lambda g: True,
                 buffer_name='buffers',
+                default_skip_embedding_weight_decay=default_skip_embedding_weight_decay,
             )
 
             optimizers.append(
                 _get_megatron_optimizer_based_on_param_groups(
-                    config=config,
+                    config,
                     model_chunks=model_chunk,
                     param_groups=param_groups,
                     per_model_buffers=buffers,
@@ -559,9 +592,12 @@ def get_megatron_optimizer(
             dense_model_chunks,
             model_chunk_offset=model_chunk_offset,
             config=config,
-            config_overrides=config_overrides,
+            no_weight_decay_cond=no_weight_decay_cond,
+            scale_lr_cond=scale_lr_cond,
+            lr_mult=lr_mult,
             filter_fn=lambda g: not g['is_expert_parallel'],
             buffer_name='buffers',
+            default_skip_embedding_weight_decay=default_skip_embedding_weight_decay,
         )
         for model_chunk in dense_model_chunks:
             model_chunk.overlap_param_gather_with_optimizer_step = (
@@ -577,7 +613,7 @@ def get_megatron_optimizer(
         # Pass Gloo process groups into optimizer only if needed.
         optimizers.append(
             _get_megatron_optimizer_based_on_param_groups(
-                config=config,
+                config,
                 model_chunks=dense_model_chunks,
                 param_groups=param_groups,
                 per_model_buffers=buffers,
@@ -595,9 +631,12 @@ def get_megatron_optimizer(
         model_chunks,
         model_chunk_offset=0,
         config=config,
-        config_overrides=config_overrides,
+        no_weight_decay_cond=no_weight_decay_cond,
+        scale_lr_cond=scale_lr_cond,
+        lr_mult=lr_mult,
         filter_fn=lambda g: g['is_expert_parallel'],
         buffer_name='expert_parallel_buffers',
+        default_skip_embedding_weight_decay=default_skip_embedding_weight_decay,
     )
     if dump_param_to_param_group_map is not None:
         for param_group in moe_param_groups:
@@ -614,7 +653,7 @@ def get_megatron_optimizer(
             expt_data_parallel_group_gloo = None
         optimizers.append(
             _get_megatron_optimizer_based_on_param_groups(
-                config=config,
+                config,
                 model_chunks=model_chunks,
                 param_groups=moe_param_groups,
                 per_model_buffers=moe_buffers,
diff --git a/megatron/core/optimizer/muon.py b/megatron/core/optimizer/muon.py
index 2b1f0502e46..ddf20b0abb8 100644
--- a/megatron/core/optimizer/muon.py
+++ b/megatron/core/optimizer/muon.py
@@ -3,7 +3,7 @@
 """Megatron muon optimizer wrapper to handle tensor-parallel."""
 
 import logging
-from typing import Any, Callable, Dict, List, Literal, Optional
+from typing import Any, Callable, List, Literal, Optional
 
 import torch
 from torch.optim.optimizer import ParamsT
@@ -21,7 +21,7 @@
     FP32Optimizer,
     MegatronOptimizer,
 )
-from .optimizer_config import OptimizerConfig, ParamKey
+from .optimizer_config import OptimizerConfig
 
 try:
     from emerging_optimizers.orthogonalized_optimizers import (
@@ -166,7 +166,9 @@ def orthogonalize(self, p: torch.Tensor, grad: torch.Tensor, **kwargs: Any) -> t
 def get_megatron_muon_optimizer(
     config: OptimizerConfig,
     model_chunks: List[MegatronModule],
-    config_overrides: Optional[Dict[ParamKey, OptimizerConfig]] = None,
+    no_weight_decay_cond: Optional[Callable] = None,
+    scale_lr_cond: Optional[Callable] = None,
+    lr_mult: float = 1.0,
     use_gloo_process_groups: bool = True,
     layer_wise_distributed_optimizer: bool = False,
     pg_collection: Optional[ProcessGroupCollection] = None,
@@ -177,15 +179,17 @@ def get_megatron_muon_optimizer(
     Args:
         config (OptimizerConfig): optimizer configuration object.
         model_chunks (List[MegatronModule]): model chunks to get optimizer for.
+        no_weight_decay_cond (func, optional): function to determine whether a parameter
+            should not perform weight decay. Defaults to None.
+        scale_lr_cond (func, optional): function to determine whether a parameter
+            should have a scaled learning rate. Defaults to None.
+        lr_mult (float, optional): learning rate multiplier for parameters that
+            satisfy scale_lr_cond. Defaults to 1.0.
         use_gloo_process_groups (bool): if false, disable use of Gloo process groups
             in underlying Megatron optimizers.
         layer_wise_distributed_optimizer (bool): if true, use layer-wise distributed optimizer.
             Defaults to False.
     """
-    # Muon currently use adam config. setting str here to call regular get for adam creation
-    # side effect is muon optimizer will have wrong name, i.e. config.optimizer == 'adam'
-    config.optimizer = 'adam'
-
     assert HAVE_EMERGING_OPTIMIZERS, "Emerging Optimizers is not installed."
 
     # dist-optim is not supported due to strong coupling with how DDP init grad buffer
@@ -242,7 +246,16 @@ def get_megatron_muon_optimizer(
     for param in nonlinear_params:
         param.requires_grad = False
 
-    linear_param_groups = _get_param_groups(model_chunks, config, config_overrides)
+    linear_param_groups = _get_param_groups(
+        model_chunks,
+        no_weight_decay_cond,
+        scale_lr_cond,
+        lr_mult,
+        lr=config.lr,
+        min_lr=config.min_lr,
+        decoupled_lr=config.decoupled_lr,
+        decoupled_min_lr=config.decoupled_min_lr,
+    )
 
     optimizer = TensorParallelMuon(
         linear_param_groups,
@@ -261,6 +274,13 @@ def get_megatron_muon_optimizer(
         mode=config.muon_tp_mode,
     )
 
+    # set config here to:
+    # 1. get adam for rest of layer
+    # 2. avoid ChainedOptimizer check fail that assert all optimizers are same kind
+    # side effect is muon optimizer will have wrong name str, i.e. config.optimizer == 'adam'
+    # TODO(deyuf): allow user to select optimizer mix and relax ChainedOptimizer design
+    config.optimizer = 'adam'
+
     # Needed for torch_dist ckpt_format, unlike torch ckpt_format
     # For other emerging optimizers, need to implement init_state_fn as well
     # TODO(boxiangw): Improve usability after optimizer refactor
@@ -311,10 +331,7 @@ def adam_init_state_fn(opt, config=None):
 
     # call original get. linear params will be skipped since they're freezed
     chained_adam = get_megatron_optimizer(
-        config,
-        model_chunks,
-        config_overrides=config_overrides,
-        use_gloo_process_groups=use_gloo_process_groups,
+        config, model_chunks, no_weight_decay_cond, scale_lr_cond, lr_mult, use_gloo_process_groups
     )
 
     # unfreeze everything
diff --git a/megatron/core/optimizer/optimizer.py b/megatron/core/optimizer/optimizer.py
index 54e7f67c629..1829cb424f1 100644
--- a/megatron/core/optimizer/optimizer.py
+++ b/megatron/core/optimizer/optimizer.py
@@ -3,7 +3,6 @@
 """Megatron optimizer."""
 
 import copy
-import logging
 import math
 import warnings
 from abc import ABC, abstractmethod
diff --git a/megatron/core/optimizer/optimizer_config.py b/megatron/core/optimizer/optimizer_config.py
index 6a4199a1f7a..8692d1e9b52 100644
--- a/megatron/core/optimizer/optimizer_config.py
+++ b/megatron/core/optimizer/optimizer_config.py
@@ -1,34 +1,23 @@
 # Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
-from dataclasses import dataclass, field
-from typing import Callable, Optional, Tuple, Union
+from dataclasses import dataclass
+from typing import Callable, Optional
 
 import torch
 
 from ..utils import is_te_min_version
 
 
-@dataclass(frozen=True, slots=True)
-class ParamKey:
-    """Key to group parameters by. All such grouped parameters can share an
-    optimizer config specification."""
-
-    # TODO: Can add layer_id here later.
-
-    name: Union[str, Tuple[str]] = field(default_factory=tuple)
-    """Parameter name(s)."""
-
-    attr: Union[str, Tuple[str]] = field(default_factory=tuple)
-    """Parameter attribute(s)."""
-
-
 @dataclass
 class OptimizerConfig:
-    """Base optimizer configuration object."""
+    """Configuration for optimizer."""
 
     ##############
     # General
     ##############
+    optimizer: str = 'adam'
+    """Optimizer to use (one of Adam, SGD, or Muon)."""
+
     lr: Optional[float] = None
     """Initial learning rate. Depending on decay style and initial warmup, the learning rate at each
        iteration would be different.
@@ -37,6 +26,14 @@ class OptimizerConfig:
     min_lr: Optional[float] = None
     """Minumum value for learning rate. The scheduler clip values below this threshold."""
 
+    decoupled_lr: Optional[float] = None
+    """Separate learning rate for the input and output layer."""
+
+    decoupled_min_lr: Optional[float] = None
+    """Minimum value for learning rate for the input and output layer. The scheduler clip values
+       below this threshold.
+    """
+
     weight_decay: float = 0.01
     """Weight decay coefficient for L2 regularization."""
 
@@ -81,9 +78,6 @@ class OptimizerConfig:
     exp_avg_sq_dtype: torch.dtype = torch.float32
     """dtype of exp_avg_sq when enabling precision-aware-optimizer"""
 
-    optimizer: str = 'adam'
-    """Optimizer name. NOTE: Deprecated, use individual optimizer classes instead."""
-
     ###############
     # Loss scaling
     ###############
@@ -104,10 +98,10 @@ class OptimizerConfig:
     hysteresis: int = 2
     """Hysteresis for dynamic loss scaling."""
 
-    ###################################################################################
-    # Optimizer (NOTE: Deprecated, use individual optimizer classes instead.).
-    ###################################################################################
-    # Adam.
+    ##############
+    # Optimizer
+    ##############
+    # Adam
     adam_beta1: float = 0.9
     """First coefficient for computing running averages of gradient and its square in Adam
     optimizer.
@@ -265,7 +259,6 @@ def __post_init__(self):
             try:
                 import inspect
 
-                # TODO: Move this below?
                 from transformer_engine.pytorch.optimizers import FusedAdam as Adam
 
                 adam_args = inspect.signature(Adam).parameters
@@ -298,35 +291,3 @@ def __post_init__(self):
             assert (
                 self.exp_avg_sq_dtype == torch.float32
             ), "exp_avg_sq_dtype can only be fp32 when not using precision-aware optimizer"
-
-
-@dataclass
-class AdamOptimizerConfig(OptimizerConfig):
-    """Adam optimizer configuration object."""
-
-    optimizer: str = 'adam'
-    """Optimizer name."""
-
-    adam_beta1: float = 0.9
-    """First coefficient for computing running averages of gradient and its square in Adam
-    optimizer.
-    """
-
-    adam_beta2: float = 0.999
-    """Second coefficient for computing running averages of gradient and its square in Adam
-    optimizer.
-    """
-
-    adam_eps: float = 1e-08
-    """Term added to the denominator to improve numerical stability in Adam optimizer."""
-
-
-@dataclass
-class SGDOptimizerConfig(OptimizerConfig):
-    """SGD optimizer configuration object."""
-
-    optimizer: str = 'sgd'
-    """Optimizer name."""
-
-    sgd_momentum: float = 0.9
-    """Momentum factor for SGD optimizer."""
diff --git a/megatron/core/optimizer_param_scheduler.py b/megatron/core/optimizer_param_scheduler.py
index 9f771c612e8..da7e0787676 100644
--- a/megatron/core/optimizer_param_scheduler.py
+++ b/megatron/core/optimizer_param_scheduler.py
@@ -95,30 +95,19 @@ def __init__(
         self.step(0)
         log_single_rank(logger, logging.INFO, f"> learning rate decay style: {self.lr_decay_style}")
 
-    def get_wd(self, param_group: Optional[dict] = None) -> float:
-        """Weight decay incr functions
-
-        Args:
-            param_group (dict): parameter group from the optimizer."""
-
-        if param_group is not None:
-            start_wd = param_group.get('start_wd', self.start_wd)
-            end_wd = param_group.get('end_wd', self.end_wd)
-        else:
-            start_wd = self.start_wd
-            end_wd = self.end_wd
-
+    def get_wd(self) -> float:
+        """Weight decay incr functions"""
         if self.num_steps > self.wd_incr_steps:
-            return end_wd
+            return self.end_wd
 
         if self.wd_incr_style == 'constant':
-            assert start_wd == end_wd
-            return end_wd
+            assert self.start_wd == self.end_wd
+            return self.end_wd
 
         incr_ratio = float(self.num_steps) / float(self.wd_incr_steps)
         assert incr_ratio >= 0.0
         assert incr_ratio <= 1.0
-        delta_wd = end_wd - start_wd
+        delta_wd = self.end_wd - self.start_wd
 
         if self.wd_incr_style == 'linear':
             coeff = incr_ratio
@@ -127,7 +116,7 @@ def get_wd(self, param_group: Optional[dict] = None) -> float:
         else:
             raise Exception(f'{self.wd_incr_style} weight decay increment style is not supported.')
 
-        return start_wd + coeff * delta_wd
+        return self.start_wd + coeff * delta_wd
 
     def get_lr(self, param_group: dict) -> float:
         """Learning rate decay functions from:
@@ -202,9 +191,11 @@ def step(self, increment: int) -> None:
             increment (int): number of steps to increment
         """
         self.num_steps += increment
+        new_wd = self.get_wd()
         for param_group in self.optimizer.param_groups:
-            param_group['lr'] = self.get_lr(param_group)
-            param_group['weight_decay'] = self.get_wd(param_group) * param_group.get('wd_mult', 1.0)
+            new_lr = self.get_lr(param_group)
+            param_group['lr'] = new_lr * param_group.get('lr_mult', 1.0)
+            param_group['weight_decay'] = new_wd * param_group.get('wd_mult', 1.0)
 
     def state_dict(self) -> dict:
         """Return the state dict."""
diff --git a/megatron/core/parallel_state.py b/megatron/core/parallel_state.py
index 1916bfff079..1e41bf9d8c2 100644
--- a/megatron/core/parallel_state.py
+++ b/megatron/core/parallel_state.py
@@ -1122,7 +1122,6 @@ def initialize_model_parallel(
     for ranks in expert_decoder_rank_generator.get_ranks('ep'):
         group = create_group(
             ranks,
-            timeout=timeout,
             pg_options=get_nccl_options("ep", nccl_comm_cfgs),
             group_desc="EXPERT_MODEL_PARALLEL_GROUP",
         )
diff --git a/megatron/core/process_groups_config.py b/megatron/core/process_groups_config.py
index ef8f31ea150..07c922ea685 100644
--- a/megatron/core/process_groups_config.py
+++ b/megatron/core/process_groups_config.py
@@ -140,23 +140,6 @@ def __init__(self, **kwargs):
             else:
                 raise ValueError(f"Unknown attribute: {key}")
 
-    def __repr__(self):
-        """Return a concise representation showing which process groups exist and their sizes."""
-        active_pgs = []
-        for field_info in fields(self):
-            if hasattr(self, field_info.name):
-                pg = getattr(self, field_info.name)
-                if pg is not None:
-                    active_pgs.append(f"{field_info.name}({pg.size()})")
-                else:
-                    # Field exists but is None
-                    active_pgs.append(f"{field_info.name}(None)")
-        return (
-            f"ProcessGroupCollection({', '.join(active_pgs)})"
-            if active_pgs
-            else "ProcessGroupCollection(empty)"
-        )
-
     @classmethod
     def use_mpu_process_groups(cls, required_pgs: Optional[List[str]] = None):
         """
diff --git a/megatron/core/safe_globals.py b/megatron/core/safe_globals.py
index cc5eb8809e8..d2baed2a4a0 100755
--- a/megatron/core/safe_globals.py
+++ b/megatron/core/safe_globals.py
@@ -11,7 +11,6 @@
 from numpy.dtypes import UInt32DType
 
 from megatron.core.enums import ModelType
-from megatron.core.optimizer import OptimizerConfig
 from megatron.core.rerun_state_machine import RerunDiagnostic, RerunMode, RerunState
 from megatron.core.transformer.enums import AttnBackend
 
@@ -25,7 +24,6 @@
     Namespace,
     AttnBackend,
     ModelType,
-    OptimizerConfig,
     RerunDiagnostic,
     RerunMode,
     RerunState,
diff --git a/megatron/core/ssm/mamba_block.py b/megatron/core/ssm/mamba_block.py
index de27bb89d2e..1bcadd0af10 100644
--- a/megatron/core/ssm/mamba_block.py
+++ b/megatron/core/ssm/mamba_block.py
@@ -5,8 +5,10 @@
 # This source code is licensed under the Apache license found in the
 # LICENSE file in the root directory of this source tree.
 
+import math
 from contextlib import nullcontext
 from dataclasses import dataclass
+from functools import partial
 from typing import Optional, Tuple, Union
 
 import torch
@@ -21,6 +23,7 @@
 from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.ssm.mamba_hybrid_layer_allocation import Symbols as LayerSymbols
 from megatron.core.ssm.mamba_hybrid_layer_allocation import allocate_layers
+from megatron.core.tensor_parallel import get_cuda_rng_tracker
 from megatron.core.transformer import TransformerConfig
 from megatron.core.transformer.identity_op import IdentityOp
 from megatron.core.transformer.module import MegatronModule
@@ -30,6 +33,50 @@
 from megatron.core.utils import WrappedTensor, deprecate_inference_params, make_viewless_tensor
 
 
+# https://github.com/huggingface/transformers/blob/c28d04e9e252a1a099944e325685f14d242ecdcd/src/transformers/models/gpt2/modeling_gpt2.py#L454
+def _init_weights(
+    module,
+    n_layer,
+    initializer_range=0.02,  # Now only used for embedding layer.
+    rescale_prenorm_residual=True,
+    n_residuals_per_layer=1,  # Change to 2 if we have MLP
+):
+    with get_cuda_rng_tracker().fork():
+        if isinstance(module, nn.Linear):
+            if not getattr(module.weight, "_no_reinit", False):
+                nn.init.normal_(module.weight, std=initializer_range)
+            if module.bias is not None:
+                if not getattr(module.bias, "_no_reinit", False):
+                    nn.init.zeros_(module.bias)
+        elif isinstance(module, nn.Embedding):
+            nn.init.normal_(module.weight, std=initializer_range)
+
+        for name, p in module.named_parameters():
+            if name in ["conv1d.weight", "out_proj.weight"]:
+                nn.init.kaiming_uniform_(p, a=math.sqrt(5))
+            if name in ["in_proj.weight"]:
+                nn.init.normal_(p, mean=0.0, std=initializer_range)
+
+        if rescale_prenorm_residual:
+            # Reinitialize selected weights subject to the OpenAI GPT-2 Paper Scheme:
+            #   > A modified initialization which accounts for the accumulation on the
+            #   > residual path with model depth. Scale
+            #   > the weights of residual layers at initialization by a factor of
+            #   > 1/√N where N is the # of residual layers.
+            #   >   -- GPT-2 :: https://openai.com/blog/better-language-models/
+            #
+            # Reference (Megatron-LM):
+            # https://github.com/NVIDIA/Megatron-LM/blob/main/megatron/model/gpt_model.py
+            for name, p in module.named_parameters():
+                if name in ["out_proj.weight", "fc2.weight"]:
+                    # Special Scaled Initialization
+                    nn.init.normal_(
+                        p,
+                        mean=0.0,
+                        std=initializer_range / math.sqrt(n_residuals_per_layer * n_layer),
+                    )
+
+
 @dataclass
 class MambaStackSubmodules:
     """
@@ -39,7 +86,6 @@ class MambaStackSubmodules:
     mamba_layer: Union[ModuleSpec, type] = IdentityOp
     attention_layer: Union[ModuleSpec, type] = IdentityOp
     mlp_layer: Union[ModuleSpec, type] = IdentityOp
-    moe_layer: Union[ModuleSpec, type] = IdentityOp
 
 
 class MambaStack(MegatronModule):
@@ -125,7 +171,6 @@ def __init__(
                         config=self.config,
                         residual_in_fp32=residual_in_fp32,
                         layer_number=i + 1 + pp_layer_offset,
-                        pp_layer_offset=pp_layer_offset,
                         pg_collection=pg_collection,
                     )
                 elif layer_type == LayerSymbols.ATTENTION:
@@ -144,11 +189,6 @@ def __init__(
                         layer_number=i + 1,
                         pg_collection=pg_collection,
                     )
-                elif layer_type == LayerSymbols.MOE:
-                    # Transformer layers apply their own pp_layer_offset
-                    layer = build_module(
-                        submodules.moe_layer, config=self.config, layer_number=i + 1
-                    )
                 else:
                     assert False, "unexpected layer_type"
             self.layers.append(layer)
@@ -164,6 +204,15 @@ def __init__(
                 eps=self.config.layernorm_epsilon,
             )
 
+        if self.config.perform_initialization:
+            self.apply(
+                partial(
+                    _init_weights,
+                    n_layer=self.config.num_layers,
+                    initializer_range=self.config.init_method_std,
+                )
+            )
+
     def _select_layers_for_pipeline_parallel(self, layer_type_list):
         num_layers_per_pipeline_rank = self.config.num_layers // self.pp_group.size()
 
diff --git a/megatron/core/ssm/mamba_hybrid_layer_allocation.py b/megatron/core/ssm/mamba_hybrid_layer_allocation.py
index fe997e2249a..7407bfe899f 100644
--- a/megatron/core/ssm/mamba_hybrid_layer_allocation.py
+++ b/megatron/core/ssm/mamba_hybrid_layer_allocation.py
@@ -28,8 +28,7 @@ class Symbols:
     MAMBA = "M"
     ATTENTION = "*"
     MLP = "-"
-    MOE = 'E'
-    VALID = {MAMBA, ATTENTION, MLP, MOE}
+    VALID = {MAMBA, ATTENTION, MLP}
 
 
 def _allocate_auto(
@@ -173,9 +172,9 @@ def get_layer_maps_from_layer_type_list(
 ) -> Tuple[Dict[int, int], Dict[int, int], Dict[int, int]]:
     """
     Returns maps from global layer index to the corresponding layer index
-    for each layer type in [Attention, Mamba, MLP, MoE] given a layer type list.
+    for each layer type in [Attention, Mamba, MLP] given a layer type list.
     """
-    layer_types = [Symbols.ATTENTION, Symbols.MAMBA, Symbols.MLP, Symbols.MOE]
+    layer_types = [Symbols.ATTENTION, Symbols.MAMBA, Symbols.MLP]
     layer_maps = {layer_type: {} for layer_type in layer_types}
     for global_layer_idx, layer_type in enumerate(layer_type_list):
         layer_map = layer_maps[layer_type]
diff --git a/megatron/core/ssm/mamba_layer.py b/megatron/core/ssm/mamba_layer.py
index 6514050ac63..69d5ef21c81 100644
--- a/megatron/core/ssm/mamba_layer.py
+++ b/megatron/core/ssm/mamba_layer.py
@@ -61,7 +61,6 @@ def __init__(
         layer_number: int = 1,
         residual_in_fp32=False,
         pg_collection: ProcessGroupCollection = None,
-        pp_layer_offset: int = 0,
     ):
         """Initialize Mamba Layer."""
         super().__init__(config)
@@ -78,7 +77,6 @@ def __init__(
             d_model=self.config.hidden_size,
             layer_number=layer_number,
             pg_collection=pg_collection,
-            pp_layer_offset=pp_layer_offset,
         )
         self.norm = build_module(submodules.norm, self.config, self.config.hidden_size)
         self.mamba_bda = build_module(submodules.mamba_bda)
diff --git a/megatron/core/ssm/mamba_mixer.py b/megatron/core/ssm/mamba_mixer.py
index 91dc266e590..b792f8a2f1f 100644
--- a/megatron/core/ssm/mamba_mixer.py
+++ b/megatron/core/ssm/mamba_mixer.py
@@ -162,7 +162,6 @@ def __init__(
         headdim=None,
         ngroups=None,
         pg_collection: ProcessGroupCollection = None,
-        pp_layer_offset: int = 0,
     ):
         if not HAVE_MAMBA_SSM:
             raise ImportError(
@@ -184,7 +183,6 @@ def __init__(
         self.norm_before_gate = norm_before_gate
         self.chunk_size = chunk_size
         self.layer_number = layer_number
-        self.pp_layer_offset = pp_layer_offset
         self.cached_batch_size = None
         assert pg_collection is not None, "pg_collection must be provided for MambaMixer"
         self.pg_collection = pg_collection
@@ -299,12 +297,9 @@ def __init__(
         setattr(self.conv1d.weight, "tensor_model_parallel", True)
         setattr(self.conv1d.bias, "tensor_model_parallel", True)
 
-        if self.config.perform_initialization:
+        if self.config.perform_initialization and self.conv_init is not None:
             with get_cuda_rng_tracker().fork():
-                if self.conv_init is not None:
-                    nn.init.uniform_(self.conv1d.weight, -self.conv_init, self.conv_init)
-                else:
-                    nn.init.kaiming_uniform_(self.conv1d.weight, a=math.sqrt(5))
+                nn.init.uniform_(self.conv1d.weight, -self.conv_init, self.conv_init)
 
         self.activation = "silu"
         self.act = nn.SiLU()
@@ -329,6 +324,13 @@ def __init__(
             )
 
         self.dt_bias = nn.Parameter(inv_dt)
+        # Our initialization would set all Linear.bias to zero,
+        # need to mark this one as _no_reinit
+        self.dt_bias._no_reinit = True
+        # Just to be explicit. Without this we already don't
+        # put wd on dt_bias because of the check
+        # name.endswith("bias") in param_grouping.py
+        self.dt_bias._no_weight_decay = True
         setattr(self.dt_bias, "tensor_model_parallel", True)
 
         # A parameter
@@ -340,6 +342,7 @@ def __init__(
             A = A.uniform_(*A_init_range)
         A_log = torch.log(A)  # Keep A_log in fp32
         self.A_log = nn.Parameter(A_log)
+        self.A_log._no_weight_decay = True
         setattr(self.A_log, "tensor_model_parallel", True)
 
         # D "skip" parameter
@@ -349,6 +352,7 @@ def __init__(
                 device=torch.cuda.current_device(),
             )
         )  # Keep in fp32
+        self.D._no_weight_decay = True
         setattr(self.D, "tensor_model_parallel", True)
 
         if self.rmsnorm:
@@ -361,7 +365,6 @@ def __init__(
                 device=torch.cuda.current_device(),
                 dtype=config.params_dtype,
             )
-            setattr(self.norm.weight, "tensor_model_parallel", True)
 
         # Assume sequence parallelism: input is partitioned along d_inner and
         # output is partitioned along the sequence dimension
@@ -455,7 +458,7 @@ def dynamic_inference(self, hidden_states: torch.Tensor, context: DynamicInferen
         )
         assert sequence_packing_available, reason_for_no_sequence_packing
 
-        conv_state, ssm_state = context.mamba_states_cache(self.layer_number - self.pp_layer_offset)
+        conv_state, ssm_state = context.mamba_states_cache(self.layer_number)
 
         # Fast path: decode-only
         if context.is_decode_only():
@@ -501,10 +504,7 @@ def dynamic_inference(self, hidden_states: torch.Tensor, context: DynamicInferen
             zxBCdt_chunked_prefill = zxBCdt[
                 active_token_count - chunked_prefill_request_token_count : active_token_count
             ]
-
-            batch_index_chunked_prefill = batch_indices[
-                context.get_index_of_chunked_prefill_request()
-            ]
+            batch_index_chunked_prefill = batch_indices[context.chunked_prefill_request_id]
 
             y_prefill_chunked = self.ssm_prefill(
                 zxBCdt_chunked_prefill,
@@ -941,12 +941,6 @@ def ssm_decode(
             x_reshaped = rearrange(x, "b (h p) -> b h p", p=self.headdim)
             if not self.rmsnorm:
                 z = rearrange(z, "b (h p) -> b h p", p=self.headdim)
-
-            # Upcast the batch_indices to prevent integer overflow errors in the case of
-            # large max request counts.
-            if batch_indices is not None:
-                batch_indices = batch_indices.to(torch.int64)
-
             y = selective_state_update(
                 ssm_state,
                 x_reshaped,
diff --git a/megatron/core/tensor_parallel/inference_layers.py b/megatron/core/tensor_parallel/inference_layers.py
deleted file mode 100644
index 05f7b88d095..00000000000
--- a/megatron/core/tensor_parallel/inference_layers.py
+++ /dev/null
@@ -1,151 +0,0 @@
-# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-
-
-from typing import Callable, Optional
-
-import torch
-import torch.distributed as dist
-
-from megatron.core.extensions.transformer_engine import (
-    TELayerNormColumnParallelLinear,
-    TERowParallelLinear,
-)
-from megatron.core.model_parallel_config import ModelParallelConfig
-from megatron.core.transformer.transformer_config import TransformerConfig
-from megatron.core.utils import get_tensor_model_parallel_group_if_none
-
-try:
-    import transformer_engine.pytorch.cpp_extensions as tex
-    from transformer_engine.pytorch.constants import TE_DType
-    from transformer_engine.pytorch.distributed import (
-        gather_along_first_dim,
-        reduce_scatter_along_first_dim,
-    )
-
-    HAVE_TE = True
-except ImportError:
-    HAVE_TE = False
-
-
-def _te_rms_norm_kernel(x: torch.Tensor, weight: torch.Tensor, eps: float):
-    x_shape = x.shape
-    x = x.view(-1, x.size(-1))
-    out, _, _ = tex.rmsnorm_fwd(
-        x, weight, eps, None, None, TE_DType[x.dtype], 16, False  # sm-margin  # zero centered gamma
-    )
-    out = out.view(*x_shape[:-1], -1)
-    return out.to(x.dtype)
-
-
-class InferenceLayerNormColumnParallelLinear(TELayerNormColumnParallelLinear):
-    """
-    Inference optimized version of TELayerNormColumnParallelLinear.
-    """
-
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int,
-        *,
-        config: TransformerConfig,
-        init_method: Callable,
-        gather_output: bool,
-        bias: bool,
-        skip_bias_add: bool,
-        is_expert: bool,
-        skip_weight_param_allocation: bool = False,
-        tp_comm_buffer_name: Optional[str] = None,
-        tp_group: Optional[torch.distributed.ProcessGroup] = None,
-    ):
-        assert HAVE_TE, "--transformer-impl=inference_optimized requires transformer engine"
-        super().__init__(
-            input_size,
-            output_size,
-            config=config,
-            init_method=init_method,
-            gather_output=gather_output,
-            bias=bias,
-            skip_bias_add=skip_bias_add,
-            is_expert=is_expert,
-            skip_weight_param_allocation=skip_weight_param_allocation,
-            tp_comm_buffer_name=tp_comm_buffer_name,
-            tp_group=tp_group,
-        )
-        self.tp_group = get_tensor_model_parallel_group_if_none(tp_group, is_expert=is_expert)
-        self.tp_size = dist.get_world_size(self.tp_group)
-
-        assert (
-            output_size % self.tp_size == 0
-        ), f"output_size ({output_size}) must be divisible by tp_size ({self.tp_size})"
-
-        self.eps = config.layernorm_epsilon
-
-        if self.tp_size > 1:
-            assert (
-                config.sequence_parallel
-            ), "--transformer-impl=inference_optimized requires --sequence-parallel"
-
-    @torch.no_grad()
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """
-        Forward pass.
-        """
-        x = _te_rms_norm_kernel(x=x, weight=self.layer_norm_weight, eps=self.eps)
-        if self.tp_size > 1:
-            x, _ = gather_along_first_dim(x, process_group=self.tp_group)
-        x = torch.matmul(x, self.weight.t())
-        return x, None
-
-
-class InferenceRowParallelLinear(TERowParallelLinear):
-    """
-    Inference optimized version of TERowParallelLinear.
-    """
-
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int,
-        *,
-        config: ModelParallelConfig,
-        init_method: Callable,
-        bias: bool,
-        input_is_parallel: bool,
-        skip_bias_add: bool,
-        is_expert: bool,
-        tp_comm_buffer_name: Optional[str] = None,
-        tp_group: Optional[torch.distributed.ProcessGroup] = None,
-    ):
-        assert HAVE_TE, "--transformer-impl=inference_optimized requires transformer engine"
-        super().__init__(
-            input_size,
-            output_size,
-            config=config,
-            init_method=init_method,
-            bias=bias,
-            input_is_parallel=input_is_parallel,
-            skip_bias_add=skip_bias_add,
-            is_expert=is_expert,
-            tp_comm_buffer_name=tp_comm_buffer_name,
-            tp_group=tp_group,
-        )
-        self.tp_group = get_tensor_model_parallel_group_if_none(tp_group, is_expert=is_expert)
-        self.tp_size = dist.get_world_size(self.tp_group)
-        assert (
-            input_size % self.tp_size == 0
-        ), f"input_size ({input_size}) must be divisible by tp_size ({self.tp_size})"
-
-        if self.tp_size > 1:
-            assert (
-                config.sequence_parallel
-            ), "--transformer-impl=inference_optimized requires --sequence-parallel"
-
-    @torch.no_grad()
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """
-        Forward pass.
-        """
-        x = torch.matmul(x, self.weight.t())
-        if self.tp_size > 1:
-            x, _ = reduce_scatter_along_first_dim(x, tp_group=self.tp_group)
-        return x, None
diff --git a/megatron/core/tokenizers/text/libraries/huggingface_tokenizer.py b/megatron/core/tokenizers/text/libraries/huggingface_tokenizer.py
index 458689fa1f4..c68b0ef89b1 100644
--- a/megatron/core/tokenizers/text/libraries/huggingface_tokenizer.py
+++ b/megatron/core/tokenizers/text/libraries/huggingface_tokenizer.py
@@ -69,6 +69,7 @@ def __init__(
                     pretrained_model_name_or_path=tokenizer_path,
                     use_fast=use_fast,
                     trust_remote_code=trust_remote_code,
+                    chat_template=chat_template,
                 )
             elif merges_file is None:
                 self.tokenizer = AutoTokenizer.from_pretrained(
@@ -76,6 +77,7 @@ def __init__(
                     vocab_file=vocab_file,
                     use_fast=use_fast,
                     trust_remote_code=trust_remote_code,
+                    chat_template=chat_template,
                 )
             else:
                 self.tokenizer = AutoTokenizer.from_pretrained(
@@ -84,6 +86,7 @@ def __init__(
                     merge_files=merges_file,
                     use_fast=use_fast,
                     trust_remote_code=trust_remote_code,
+                    chat_template=chat_template,
                 )
         except Exception as e:
             raise ValueError(
@@ -91,14 +94,6 @@ def __init__(
                 f'for {tokenizer_path}. Exception: {e}'
             )
 
-        # Store the tokenizer's existing chat template if the user does not provide
-        # a custom chat template. Otherwise, override the default chat template with
-        # the user-provided template.
-        if chat_template is None:
-            chat_template = self.tokenizer.chat_template
-        else:
-            self.tokenizer.chat_template = chat_template
-
         self.include_special_tokens = include_special_tokens
         self.original_vocab_size = len(self.tokenizer)
         self.chat_template = chat_template
diff --git a/megatron/core/tokenizers/text/libraries/null_tokenizer.py b/megatron/core/tokenizers/text/libraries/null_tokenizer.py
index 4ddf77fc774..13d56436192 100644
--- a/megatron/core/tokenizers/text/libraries/null_tokenizer.py
+++ b/megatron/core/tokenizers/text/libraries/null_tokenizer.py
@@ -25,14 +25,6 @@ def ids_to_text(self, ids):
         text = [str(x) for x in ids]
         return ' '.join(text)
 
-    def tokens_to_ids(self, tokens):
-        """Converts tokens to ids."""
-        return [int(x) for x in tokens]
-
-    def ids_to_tokens(self, ids):
-        """Converts ids to tokens."""
-        return [str(x) for x in ids]
-
     def offsets(self, ids: list[int], text: str) -> list[int]:
         """Returns offsets."""
         offsets, start_idx = [], 0
diff --git a/megatron/core/tokenizers/text/text_tokenizer.py b/megatron/core/tokenizers/text/text_tokenizer.py
index 4e0c624e006..2107cf9dce4 100644
--- a/megatron/core/tokenizers/text/text_tokenizer.py
+++ b/megatron/core/tokenizers/text/text_tokenizer.py
@@ -37,17 +37,13 @@ def __init__(self, path: str, config: dict, **kwargs) -> None:
         self._tokenizer = self._restore_model(**kwargs)
         self.additional_args = kwargs
         self.path = path
-
-        config_template = config.get("chat_template", None)
-        tokenizer_template = getattr(self._tokenizer, "chat_template", None)
-        kwargs_template = kwargs.get("chat_template", None)
-
-        if config_template is not None:
-            self.chat_template = config_template
-        elif tokenizer_template is not None:
-            self.chat_template = tokenizer_template
+        if (
+            config.get("chat_template", None) is None
+            and kwargs.get("chat_template", None) is not None
+        ):
+            self.chat_template = kwargs.get("chat_template", None)
         else:
-            self.chat_template = kwargs_template
+            self.chat_template = config.get("chat_template", None)
 
     def _restore_model(self, **kwargs) -> MegatronTokenizerTextAbstract:
         """Returns tokenizer library object."""
diff --git a/megatron/core/transformer/attention.py b/megatron/core/transformer/attention.py
index 7bb9a12c697..74031f38219 100644
--- a/megatron/core/transformer/attention.py
+++ b/megatron/core/transformer/attention.py
@@ -35,7 +35,6 @@
 from megatron.core.utils import (
     deprecate_inference_params,
     divide,
-    get_pg_rank,
     get_pg_size,
     is_fa_min_version,
     is_te_min_version,
@@ -159,7 +158,6 @@ def __init__(
 
         self.config = config
         self.layer_number = layer_number
-
         self.attn_mask_type = attn_mask_type
         self.attention_type = attention_type
 
@@ -308,19 +306,6 @@ def _allocate_memory(self, inference_max_sequence_length, batch_size, dim, dtype
             device=torch.cuda.current_device(),
         )
 
-    def _get_pp_layer_offset_for_inference(self):
-        """Return the pipeline parallel layer offset for inference."""
-        assert (
-            self.config.virtual_pipeline_model_parallel_size is None
-        ), "Virtual pipeline parallelism is not supported for inference"
-
-        # Import here to avoid circular imports
-        from megatron.core.transformer.transformer_layer import get_transformer_layer_offset
-
-        return get_transformer_layer_offset(
-            self.config, vp_stage=None, pp_rank=get_pg_rank(self.pg_collection.pp)
-        )
-
     def _adjust_key_value_for_inference(
         self,
         inference_context: BaseInferenceContext,
@@ -386,15 +371,9 @@ def _adjust_key_value_for_inference(
                     inference_context.key_value_memory_dict[self.layer_number]
                 )
 
-        if (
-            not inference_context.is_static_batching() or inference_context.sequence_len_offset > 0
-        ) and (not self.training or not is_te_min_version("2.2.0")):
+        if not inference_context.is_static_batching() or inference_context.sequence_len_offset > 0:
             # This should mean that we are past the prompt forward_step
             # and so we need to turn off masking
-            # Note: in ModelOpt, we may use inference_context for speculative decoding
-            # in training. In that case, we do not want to turn off masking as we need
-            # customized attention mask for speculative decoding.
-
             attn_mask_type = AttnMaskType.no_mask
 
         if inference_context.is_static_batching():
@@ -465,8 +444,6 @@ def _adjust_key_value_for_inference(
             key = inference_key_memory[:sequence_end, batch_start:batch_end, ...]
             value = inference_value_memory[:sequence_end, batch_start:batch_end, ...]
         else:
-            pp_layer_offset = self._get_pp_layer_offset_for_inference()
-
             # Apply rotary embeddings before appending KV cache.
             if inference_context.use_flashinfer_fused_rope and (rotary_pos_cos_sin is not None):
                 query, key = inference_context.apply_fused_qk_rotary_emb(
@@ -481,23 +458,17 @@ def _adjust_key_value_for_inference(
                 rotary_pos_emb = (q_pos_emb, None)  # key rotary emb has been applied
 
             # Append key/value data tensors to cache.
-            inference_context.append_key_value_cache(
-                self.layer_number - pp_layer_offset, key, value
-            )
+            inference_context.append_key_value_cache(self.layer_number, key, value)
 
             _, max_seqlen_q = inference_context.cu_query_lengths()
             if getattr(self.config, "cache_mla_latents", None) and max_seqlen_q > 1:
                 # Doing unabsorbed MLA Attention with cached mla latents (prefill/mixed mode)
-                kv_cache, _, block_table = inference_context.key_value_cache(
-                    self.layer_number - pp_layer_offset
-                )
+                kv_cache, _, block_table = inference_context.key_value_cache(self.layer_number)
                 # Uncompress the KV cache for prefill/mixed mode
                 key, value = self.uncompress_kv_from_cache(kv_cache)
             else:
                 # Read key/value *pointer* tensors from cache.
-                key, value, block_table = inference_context.key_value_cache(
-                    self.layer_number - pp_layer_offset
-                )
+                key, value, block_table = inference_context.key_value_cache(self.layer_number)
         return query, key, value, rotary_pos_emb, attn_mask_type, block_table
 
     @abstractmethod
diff --git a/megatron/core/transformer/cuda_graphs.py b/megatron/core/transformer/cuda_graphs.py
index 10a739e11c0..12f15ee980a 100644
--- a/megatron/core/transformer/cuda_graphs.py
+++ b/megatron/core/transformer/cuda_graphs.py
@@ -368,26 +368,9 @@ def create_cudagraphs():
 def delete_cuda_graphs():
     """Delete all CUDA graphs."""
 
-    # Reset runners.
-    for record in [
-        *_CudagraphGlobalRecord.cudagraph_record,
-        *_CudagraphGlobalRecord.cudagraph_inference_record,
-    ]:
-        runner = record[0]
-        assert isinstance(runner, _CudaGraphRunner)
-
-        runner.cudagraph_created = False
-        runner.fwd_graph_recorded = False
-        runner.bwd_graph_recorded = False
-        runner.fwd_graph = None
-        runner.bwd_graph = None
-        runner.fwd_mempool = None
-        runner.bwd_mempool = None
-
     # Reset global tracking state
     _CudagraphGlobalRecord.cudagraph_created = False
     _CudagraphGlobalRecord.cudagraph_record = []
-    _CudagraphGlobalRecord.cudagraph_inference_record = []
 
     # TODO: Optional?: Force garbage collection to clean up memory
     gc.collect()
diff --git a/megatron/core/transformer/fsdp_dtensor_checkpoint.py b/megatron/core/transformer/fsdp_dtensor_checkpoint.py
index 04ec982e6ff..65e2f5f9dff 100644
--- a/megatron/core/transformer/fsdp_dtensor_checkpoint.py
+++ b/megatron/core/transformer/fsdp_dtensor_checkpoint.py
@@ -484,6 +484,6 @@ def get_global_unique_param_name(model_chunks, param):
 
     # Get EP unique parameter name
     num_experts = model_chunks[0].config.num_moe_experts if model_chunks else None
-    param_name = next(iter(handle_experts_in_state_dict({param_name: None}, num_experts).keys()))
+    param_name = list(handle_experts_in_state_dict({param_name: None}, num_experts).keys())[0]
 
     return param_name
diff --git a/megatron/core/transformer/moe/token_dispatcher.py b/megatron/core/transformer/moe/token_dispatcher.py
index 8754e938348..b2135fdb00d 100644
--- a/megatron/core/transformer/moe/token_dispatcher.py
+++ b/megatron/core/transformer/moe/token_dispatcher.py
@@ -48,8 +48,6 @@
      num_global_tokens: num_local_tokens*TP*EP
 """
 
-logger = logging.getLogger(__name__)
-
 
 class MoETokenDispatcher:
     """
@@ -1272,6 +1270,7 @@ def _pad_routing_map(
         # Check if there are enough tokens to pad
         enough_tokens_to_pad = torch.all(target_tokens_per_expert <= num_input_tokens)
         if not enough_tokens_to_pad:
+            logger = logging.getLogger(__name__)
             logger.warning(
                 "Not enough tokens to pad. The total number of tokens received in this rank "
                 "is smaller than the target number of tokens for each expert. "
diff --git a/megatron/core/transformer/transformer_config.py b/megatron/core/transformer/transformer_config.py
index 3f8c97099da..fae2e2f5d4d 100644
--- a/megatron/core/transformer/transformer_config.py
+++ b/megatron/core/transformer/transformer_config.py
@@ -749,9 +749,6 @@ class TransformerConfig(ModelParallelConfig):
     symmetric_ar_type: Optional[str] = None
     """Type of symmetric all reduce to use"""
 
-    use_inference_optimized_layers: bool = False
-    """If True, use inference optimized transformer layers during inference."""
-
     mrope_section: Optional[List[int]] = None
     """ Multimodal rope section is for channel dimension of temporal, height and width
     in rope calculation. """
@@ -1877,13 +1874,6 @@ def __post_init__(self):
                         f"for context parallelism, but got {self.cp_comm_type=} instead."
                     )
 
-        if self.transformer_impl == "inference_optimized":
-            assert self.normalization == "RMSNorm"
-            assert not self.layernorm_zero_centered_gamma
-            assert not self.add_bias_linear
-            assert not self.add_qkv_bias
-            assert not self.use_kitchen
-
 
 @dataclass
 class MLATransformerConfig(TransformerConfig):
diff --git a/megatron/core/utils.py b/megatron/core/utils.py
index 77a004a6845..9b62b18d400 100644
--- a/megatron/core/utils.py
+++ b/megatron/core/utils.py
@@ -24,7 +24,7 @@
 from functools import lru_cache, reduce, wraps
 from importlib.metadata import version
 from types import TracebackType
-from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Union
+from typing import Any, Callable, Coroutine, Dict, List, Optional, Tuple, Type, Union
 
 import numpy
 import torch
@@ -2140,28 +2140,23 @@ def maybe_cat(a, b, dim=0, *, required=False):
     return xs[0] if len(xs) == 1 else torch.cat(xs, dim=dim)
 
 
-_ASYNC_IO_LOOP: asyncio.AbstractEventLoop | None = None
-
-
 def get_asyncio_loop(loop: asyncio.AbstractEventLoop | None = None) -> asyncio.AbstractEventLoop:
     """Creates an asyncio loop if necessary and then returns the current asyncio loop."""
-    global _ASYNC_IO_LOOP
     if loop is None:
         try:
             loop = asyncio.get_running_loop()
         except RuntimeError as e:
-            if _ASYNC_IO_LOOP is not None:
-                return _ASYNC_IO_LOOP
-            else:
-                _ASYNC_IO_LOOP = loop = asyncio.new_event_loop()
-                asyncio.set_event_loop(loop)
+            loop = asyncio.new_event_loop()
+            asyncio.set_event_loop(loop)
     return loop
 
 
 _ASYNC_TASK_STATS = defaultdict(lambda: [0, 0.0])  # cnt, total_time
 
 
-def trace_async_exceptions(func: Optional[Callable] = None, *, verbose: bool = False):
+def trace_async_exceptions(
+    func: Optional[Callable[..., Coroutine]], *, verbose: bool = False
+) -> Callable[..., Coroutine]:
     """Decorator to be applied to every coroutine that runs in a separate task.
 
     This is needed because asyncio tasks do not propagate exceptions.
@@ -2176,81 +2171,41 @@ async def my_coroutine(...):
     ```
     """
 
-    def _log_verbose(name: str, start: float) -> None:
-        elapsed = (time.perf_counter() - start) * 1000.0
-        cnt, tot = _ASYNC_TASK_STATS[name]
-        _ASYNC_TASK_STATS[name] = [cnt + 1, tot + elapsed]
-        avg = _ASYNC_TASK_STATS[name][1] / _ASYNC_TASK_STATS[name][0]
-
-        log10 = numpy.log10(max(cnt, 1))
-        if numpy.isclose(log10, round(log10)):
-            logger.info(
-                f"{name} completed in {elapsed:.3f} ms, "
-                f"lifetime avg: {avg:.3f} ms, "
-                f"lifetime cnt: {cnt + 1}"
-            )
-
-    def _decorate(fn: Callable):
-        if asyncio.iscoroutinefunction(fn):
-
-            @functools.wraps(fn)
-            async def wrapper(*args, **kwargs):
+    def _decorate(fn):
+        if not asyncio.iscoroutinefunction(fn):
+            raise TypeError("trace_async_exceptions can only be used with async functions")
+
+        @functools.wraps(fn)
+        async def wrapper(*args, **kwargs):
+            if verbose:
+                start = time.perf_counter()
+            try:
+                return await fn(*args, **kwargs)
+            except Exception as e:
+                logger.error(f"Exception in async function {fn.__name__}: {e}")
+                traceback.print_exc()
+                sys.exit(1)
+            finally:
                 if verbose:
-                    start = time.perf_counter()
-                try:
-                    return await fn(*args, **kwargs)
-                except Exception as e:
-                    logger.error(f"Exception in async function {fn.__name__}: {e}")
-                    traceback.print_exc()
-                    sys.exit(1)
-                finally:
-                    if verbose:
-                        _log_verbose(fn.__qualname__, start)
-
-        elif inspect.isasyncgenfunction(fn):
-
-            @functools.wraps(fn)
-            async def wrapper(*args, **kwargs):
-                if verbose:
-                    start = time.perf_counter()
-                agen = fn(*args, **kwargs)
-                try:
-                    async for item in agen:
-                        yield item
-                except Exception as e:
-                    logger.error(f"Exception in async generator {fn.__name__}: {e}")
-                    traceback.print_exc()
-                    sys.exit(1)
-                finally:
-                    if verbose:
-                        _log_verbose(fn.__qualname__, start)
+                    elapsed = (time.perf_counter() - start) * 1000.0
+                    name = fn.__qualname__
+                    cnt, tot = _ASYNC_TASK_STATS[name]
+                    _ASYNC_TASK_STATS[name] = [cnt + 1, tot + elapsed]
+                    avg = _ASYNC_TASK_STATS[name][1] / _ASYNC_TASK_STATS[name][0]
+
+                    log10 = numpy.log10(max(cnt, 1))
+                    if numpy.isclose(log10, round(log10)):
+                        logger.info(
+                            f"{name} completed in {elapsed:.3f} ms, "
+                            f"lifetime avg: {avg:.3f} ms, "
+                            f"lifetime cnt: {cnt + 1}"
+                        )
 
-        else:
-            raise TypeError("trace_async_exceptions must be used on async functions or generators")
         return wrapper
 
     return _decorate if func is None else _decorate(func)
 
 
-def get_mamba_inference_state_config_from_model(model) -> Optional["MambaInferenceStateConfig"]:
-    """Returns Mamba inference state config from the model if it is a hybrid model."""
-    from megatron.core.inference.contexts.attention_context.mamba_metadata import (
-        MambaInferenceStateConfig,
-    )
-    from megatron.core.ssm.mamba_hybrid_layer_allocation import Symbols
-
-    decoder = get_attr_wrapped_model(model, "decoder")
-    layer_type_list = getattr(decoder, "layer_type_list", None)
-    if layer_type_list is not None and Symbols.MAMBA in layer_type_list:
-        (mamba_conv_states_shape, mamba_ssm_states_shape) = decoder.mamba_state_shapes_per_request()
-        return MambaInferenceStateConfig(
-            layer_type_list=layer_type_list,
-            mamba_conv_states_shape=mamba_conv_states_shape,
-            mamba_ssm_states_shape=mamba_ssm_states_shape,
-        )
-    return None
-
-
 # ============================================================================
 # Backward Compatibility Decorators
 # ============================================================================
diff --git a/megatron/legacy/data/biencoder_dataset_utils.py b/megatron/legacy/data/biencoder_dataset_utils.py
index 6d69fabbe48..6fa391c8a22 100644
--- a/megatron/legacy/data/biencoder_dataset_utils.py
+++ b/megatron/legacy/data/biencoder_dataset_utils.py
@@ -5,14 +5,11 @@
 import numpy as np
 import torch
 
-from megatron.core import mpu, tensor_parallel
-from megatron.legacy.data.dataset_utils import (
-    create_masked_lm_predictions,
-    pad_and_convert_to_numpy,
-)
 from megatron.training import get_args, get_tokenizer, print_rank_0
-from megatron.training.datasets.data_samplers import MegatronPretrainingSampler
-
+from megatron.core import mpu, tensor_parallel
+from megatron.legacy.data.dataset_utils import create_masked_lm_predictions, \
+                                            pad_and_convert_to_numpy
+from megatron.legacy.data.data_samplers import MegatronPretrainingSampler
 
 def make_attention_mask(source_block, target_block):
     """
diff --git a/megatron/training/datasets/data_samplers.py b/megatron/legacy/data/data_samplers.py
similarity index 56%
rename from megatron/training/datasets/data_samplers.py
rename to megatron/legacy/data/data_samplers.py
index 1e7f47510d1..1bf1bf5ee91 100644
--- a/megatron/training/datasets/data_samplers.py
+++ b/megatron/legacy/data/data_samplers.py
@@ -4,17 +4,13 @@
 
 
 import random
-
-import numpy as np
 import torch
+import numpy as np
 from torch.utils.data import Dataset
-
+from megatron.training import get_args
 from megatron.core import mpu
 from megatron.core.datasets.utils import Split
 
-from megatron.training import get_args
-from megatron.training.dist_signal_handler import DistributedSignalHandler
-
 
 def build_pretraining_data_loader(dataset, consumed_samples):
     """Build dataloader given an input dataset."""
@@ -22,10 +18,10 @@ def build_pretraining_data_loader(dataset, consumed_samples):
     if dataset is None:
         return None
     args = get_args()
-
-    if hasattr(dataset, 'split'):
+    
+    if hasattr(dataset,'split'):
         split = dataset.split
-    elif hasattr(dataset, 'index_split'):
+    elif hasattr(dataset,'index_split'):
         split = dataset.index_split
     else:
         split = None
@@ -36,8 +32,7 @@ def build_pretraining_data_loader(dataset, consumed_samples):
             consumed_samples=0,
             micro_batch_size=args.micro_batch_size,
             data_parallel_rank=mpu.get_data_parallel_rank(),
-            data_parallel_size=mpu.get_data_parallel_world_size(),
-        )
+            data_parallel_size=mpu.get_data_parallel_world_size())
     elif args.dataloader_type == 'single':
         # Megatron sampler
         batch_sampler = MegatronPretrainingSampler(
@@ -45,8 +40,7 @@ def build_pretraining_data_loader(dataset, consumed_samples):
             consumed_samples=consumed_samples,
             micro_batch_size=args.micro_batch_size,
             data_parallel_rank=mpu.get_data_parallel_rank(),
-            data_parallel_size=mpu.get_data_parallel_world_size(),
-        )
+            data_parallel_size=mpu.get_data_parallel_world_size())
     elif args.dataloader_type == 'cyclic':
         batch_sampler = MegatronPretrainingRandomSampler(
             dataset,
@@ -55,82 +49,52 @@ def build_pretraining_data_loader(dataset, consumed_samples):
             micro_batch_size=args.micro_batch_size,
             data_parallel_rank=mpu.get_data_parallel_rank(),
             data_parallel_size=mpu.get_data_parallel_world_size(),
-            data_sharding=args.data_sharding,
-        )
+            data_sharding=args.data_sharding)
     elif args.dataloader_type == "external":
         # External dataloaders are passed through. User is expected to provide a
         # torch-compatible dataloader and define samplers, if needed.
         return dataset
     else:
-        raise Exception('{} dataloader type is not supported.'.format(args.dataloader_type))
-
-    def worker_init_fn(_):
-        DistributedSignalHandler(args.exit_signal).__enter__()
+        raise Exception('{} dataloader type is not supported.'.format(
+                args.dataloader_type))
 
-    maybe_worker_init_fn = (
-        worker_init_fn if args.exit_signal_handler and args.num_workers > 0 else None
-    )
     # Torch dataloader.
-    return torch.utils.data.DataLoader(
-        dataset,
-        batch_sampler=batch_sampler,
-        num_workers=args.num_workers,
-        pin_memory=True,
-        persistent_workers=True if args.num_workers > 0 else False,
-        worker_init_fn=maybe_worker_init_fn,
-    )
-
+    return torch.utils.data.DataLoader(dataset,
+                                       batch_sampler=batch_sampler,
+                                       num_workers=args.num_workers,
+                                       pin_memory=True,
+                                       persistent_workers=True if args.num_workers > 0 else False,
+                                       )
 
 class MegatronPretrainingSampler:
-    """
-    Sampler for Megatron pretraining dataloaders that divides data samples across
-    data parallel workers. Each worker receives a contiguous chunk of data determined by
-    its rank and the micro batch size. Supports dropping the last incomplete batch if
-    specified, and keeps track of total and consumed samples. Designed to work with
-    distributed training using Megatron's data parallelism.
-    """
 
-    def __init__(
-        self,
-        total_samples,
-        consumed_samples,
-        micro_batch_size,
-        data_parallel_rank,
-        data_parallel_size,
-        drop_last=True,
-    ):
+    def __init__(self, total_samples, consumed_samples, micro_batch_size,
+                 data_parallel_rank, data_parallel_size, drop_last=True):
         # Keep a copy of input params for later use.
         self.total_samples = total_samples
         self.consumed_samples = consumed_samples
         self.micro_batch_size = micro_batch_size
         self.data_parallel_rank = data_parallel_rank
-        self.micro_batch_times_data_parallel_size = self.micro_batch_size * data_parallel_size
+        self.micro_batch_times_data_parallel_size = \
+            self.micro_batch_size * data_parallel_size
         self.drop_last = drop_last
 
         # Sanity checks.
-        assert self.total_samples > 0, 'no sample to consume: {}'.format(self.total_samples)
-        assert (
-            self.consumed_samples < self.total_samples
-        ), 'no samples left to consume: {}, {}'.format(self.consumed_samples, self.total_samples)
+        assert self.total_samples > 0, \
+            'no sample to consume: {}'.format(self.total_samples)
+        assert self.consumed_samples < self.total_samples, \
+            'no samples left to consume: {}, {}'.format(self.consumed_samples,
+                                                        self.total_samples)
         assert self.micro_batch_size > 0
         assert data_parallel_size > 0
-        assert (
-            self.data_parallel_rank < data_parallel_size
-        ), 'data_parallel_rank should be smaller than data size: {}, ' '{}'.format(
-            self.data_parallel_rank, data_parallel_size
-        )
+        assert self.data_parallel_rank < data_parallel_size, \
+            'data_parallel_rank should be smaller than data size: {}, ' \
+            '{}'.format(self.data_parallel_rank, data_parallel_size)
 
     def __len__(self):
         return self.total_samples
 
     def get_start_end_idx(self):
-        """
-        Calculate the start and end indices for the current data parallel worker's
-        chunk within a batch.
-
-        Returns:
-            tuple: (start_idx, end_idx) indicating the slice of the batch for this worker.
-        """
         start_idx = self.data_parallel_rank * self.micro_batch_size
         end_idx = start_idx + self.micro_batch_size
         return start_idx, end_idx
@@ -152,37 +116,17 @@ def __iter__(self):
 
 
 class RandomSeedDataset(Dataset):
-    """
-    A dataset wrapper that resets the random seed before each sample.
 
-    This ensures deterministic behavior per sample by setting the RNG state
-    for torch, numpy, and random before accessing each underlying data sample.
-    The base seed is retrieved from training arguments, and can be varied per epoch
-    using the set_epoch method to ensure different shuffling or augmentation each epoch.
-
-    Args:
-        dataset: The underlying dataset to wrap.
-
-    Methods:
-        set_epoch(epoch): Change the seed offset so each epoch produces different randomization.
-        __getitem__(idx): Sets the seed based on the sample index and current epoch.
-    """
-
-    def __init__(self, dataset, seed):
-        self.base_seed = seed
-        self.curr_seed = seed
+    def __init__(self, dataset):
+        args = get_args()
+        self.base_seed = args.seed
+        self.curr_seed = args.seed
         self.dataset = dataset
 
     def __len__(self):
         return len(self.dataset)
 
     def set_epoch(self, epoch):
-        """
-        Change the seed offset so each epoch produces different randomization.
-
-        Args:
-            epoch: The epoch number to use as the seed offset.
-        """
         self.curr_seed = self.base_seed + epoch
 
     def __getitem__(self, idx):
@@ -194,23 +138,9 @@ def __getitem__(self, idx):
 
 
 class MegatronPretrainingRandomSampler:
-    """
-    Sampler for Megatron pretraining dataloaders that performs random sampling
-    across data parallel workers. Supports data sharding to divide the dataset
-    into buckets and shuffle within each bucket. Designed to work with distributed
-    training using Megatron's data parallelism.
-    """
 
-    def __init__(
-        self,
-        dataset,
-        total_samples,
-        consumed_samples,
-        micro_batch_size,
-        data_parallel_rank,
-        data_parallel_size,
-        data_sharding,
-    ):
+    def __init__(self, dataset, total_samples, consumed_samples, micro_batch_size,
+                 data_parallel_rank, data_parallel_size, data_sharding):
         # Keep a copy of input params for later use.
         self.dataset = dataset
         self.total_samples = total_samples
@@ -219,18 +149,19 @@ def __init__(
         self.data_parallel_rank = data_parallel_rank
         self.data_parallel_size = data_parallel_size
         self.data_sharding = data_sharding
-        self.micro_batch_times_data_parallel_size = self.micro_batch_size * data_parallel_size
-        self.last_batch_size = self.total_samples % self.micro_batch_times_data_parallel_size
+        self.micro_batch_times_data_parallel_size = \
+            self.micro_batch_size * data_parallel_size
+        self.last_batch_size = \
+            self.total_samples % self.micro_batch_times_data_parallel_size
 
         # Sanity checks.
-        assert self.total_samples > 0, 'no sample to consume: {}'.format(self.total_samples)
+        assert self.total_samples > 0, \
+            'no sample to consume: {}'.format(self.total_samples)
         assert self.micro_batch_size > 0
         assert data_parallel_size > 0
-        assert (
-            self.data_parallel_rank < data_parallel_size
-        ), 'data_parallel_rank should be smaller than data size: {}, ' '{}'.format(
-            self.data_parallel_rank, data_parallel_size
-        )
+        assert self.data_parallel_rank < data_parallel_size, \
+            'data_parallel_rank should be smaller than data size: {}, ' \
+            '{}'.format(self.data_parallel_rank, data_parallel_size)
 
     def __len__(self):
         return self.total_samples
@@ -246,9 +177,8 @@ def __iter__(self):
 
         # data sharding and random sampling
         if self.data_sharding:
-            bucket_size = (
-                self.total_samples // self.micro_batch_times_data_parallel_size
-            ) * self.micro_batch_size
+            bucket_size = (self.total_samples // self.micro_batch_times_data_parallel_size) \
+                           * self.micro_batch_size
             bucket_offset = current_epoch_samples // self.data_parallel_size
             start_idx = self.data_parallel_rank * bucket_size
 
@@ -257,13 +187,15 @@ def __iter__(self):
             random_idx = torch.randperm(bucket_size, generator=g).tolist()
             idx_range = [start_idx + x for x in random_idx[bucket_offset:]]
         else:
-            full_bucket_size = (self.total_samples // self.micro_batch_size) * self.micro_batch_size
+            full_bucket_size = (self.total_samples // self.micro_batch_size) \
+                                * self.micro_batch_size
             full_bucket_offset = current_epoch_samples
             g = torch.Generator()
             g.manual_seed(self.epoch)
-            idx_range_total = torch.randperm(full_bucket_size, generator=g).tolist()
+            idx_range_total = \
+                torch.randperm(full_bucket_size, generator=g).tolist()
             idx_range_active = idx_range_total[full_bucket_offset:]
-            idx_range = idx_range_active[self.data_parallel_rank :: self.data_parallel_size]
+            idx_range = idx_range_active[self.data_parallel_rank::self.data_parallel_size]
 
         batch = []
         # Last batch if not complete will be dropped.
diff --git a/megatron/legacy/data/vit_dataset.py b/megatron/legacy/data/vit_dataset.py
index 504075a5506..e65c536c897 100644
--- a/megatron/legacy/data/vit_dataset.py
+++ b/megatron/legacy/data/vit_dataset.py
@@ -1,17 +1,15 @@
 # Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
 import os
 import random
-
 import numpy as np
 import torch
 import torchvision.transforms as T
-from PIL import Image, ImageFilter, ImageOps
 from torchvision import datasets
-
-from megatron.legacy.data.autoaugment import ImageNetPolicy
-from megatron.legacy.data.image_folder import ImageFolder
 from megatron.training import get_args
-from megatron.training.datasets.data_samplers import RandomSeedDataset
+from megatron.legacy.data.image_folder import ImageFolder
+from megatron.legacy.data.autoaugment import ImageNetPolicy
+from megatron.legacy.data.data_samplers import RandomSeedDataset
+from PIL import Image, ImageFilter, ImageOps
 
 
 class GaussianBlur(object):
@@ -238,7 +236,7 @@ def build_train_valid_datasets(data_path, image_size=224):
         classes_fraction=args.classes_fraction,
         data_per_class_fraction=args.data_per_class_fraction
     )
-    train_data = RandomSeedDataset(train_data, args.seed)
+    train_data = RandomSeedDataset(train_data)
 
     # validation dataset
     val_data_path = data_path[1]
@@ -246,6 +244,6 @@ def build_train_valid_datasets(data_path, image_size=224):
         root=val_data_path,
         transform=val_transform
     )
-    val_data = RandomSeedDataset(val_data, args.seed)
+    val_data = RandomSeedDataset(val_data)
 
     return train_data, val_data
diff --git a/megatron/post_training/algos/__init__.py b/megatron/post_training/algos/__init__.py
new file mode 100644
index 00000000000..f8011007a50
--- /dev/null
+++ b/megatron/post_training/algos/__init__.py
@@ -0,0 +1 @@
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
diff --git a/megatron/post_training/algos/distillation.py b/megatron/post_training/algos/distillation.py
new file mode 100644
index 00000000000..c54add0a8d7
--- /dev/null
+++ b/megatron/post_training/algos/distillation.py
@@ -0,0 +1,601 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+
+"""Distillation loss function(s)."""
+
+import logging
+import re
+import types
+from abc import ABCMeta
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+
+import modelopt.torch.distill as mtd
+import modelopt.torch.opt as mto
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import yaml
+from torch import Tensor
+from torch.nn.modules.loss import _Loss
+
+from megatron.core.dist_checkpointing.mapping import ShardedStateDict
+from megatron.core.parallel_state import (
+    get_context_parallel_group,
+    get_pipeline_model_parallel_world_size,
+    get_tensor_and_context_parallel_rank,
+    get_tensor_model_parallel_group,
+    get_virtual_pipeline_model_parallel_world_size,
+    is_pipeline_last_stage,
+)
+from megatron.core.pipeline_parallel.schedules import get_tensor_shapes
+from megatron.core.transformer import MegatronModule, TransformerConfig, TransformerLayer
+from megatron.core.utils import get_model_config
+
+logger = logging.getLogger(__name__)
+
+
+def load_distillation_config(
+    config_path: Optional[str], student_cfg: TransformerConfig, teacher_cfg: TransformerConfig
+) -> Dict[str, Any]:
+    """Read the distillation yaml config file specified by ``args.export_kd_cfg``.
+
+    Args:
+        config_path: Path to user-defined distillation settings yaml file.
+            If `None`, uses default logits-only distillation mode for GPT models.
+        student_cfg: Model config for student model.
+        teacher_cfg: Model config for teacher model.
+
+    WARNING: Assumes intermediate hidden sizes are always that found in the model config's ``hidden_size`` attribute.
+    """
+    if not config_path:
+        logger.warning("Distillation config not provided. Using default.")
+        cfg = {
+            "logit_layers": ["output_layer", "output_layer"],
+            "intermediate_layer_pairs": [],
+            "skip_lm_loss": True,
+            "kd_loss_scale": 1.0,
+        }
+    else:
+        with open(config_path) as f:
+            cfg = yaml.safe_load(f)
+
+    intermediate_pairs = cfg.get("intermediate_layer_pairs", [])
+    logit_pair = cfg["logit_layers"]
+    skip_lm_loss = cfg["skip_lm_loss"]
+    loss_scale = cfg["kd_loss_scale"]
+
+    criterion = {}
+    if student_cfg.pipeline_model_parallel_size == 1 or is_pipeline_last_stage():
+        criterion[tuple(logit_pair)] = LogitsKLLoss(student_cfg)
+        # NOTE: Projection layer shared among intermediate layer pairs.
+        projection_layer = ProjectionLayer(student_cfg, teacher_cfg)
+
+        for entry in intermediate_pairs:
+            if len(entry) == 2:
+                student_layer, teacher_layer = entry
+                loss = "hidden_cosine"
+            elif len(entry) == 3:
+                student_layer, teacher_layer, loss = entry
+
+            loss_fn = None
+
+            if loss == "mse":
+                loss_fn = MSELoss
+            elif loss == "hidden_cosine":
+                loss_fn = HiddenStateCosineLoss
+            else:
+                assert False, f"loss passed was {loss=}"
+
+            if get_tensor_and_context_parallel_rank() == 0:
+                print(
+                    "Distillation: Adding intermediate loss between"
+                    f" `{student_layer}` of student (hidden size {student_cfg.hidden_size}) and"
+                    f" `{teacher_layer}` of teacher (hidden size {teacher_cfg.hidden_size})."
+                )
+            student_layer = _adjust_layer_index_for_pp(student_layer, student_cfg)
+            teacher_layer = _adjust_layer_index_for_pp(teacher_layer, teacher_cfg)
+            criterion[(student_layer, teacher_layer)] = loss_fn(
+                student_cfg, projection_layer=projection_layer
+            )
+
+    loss_balancer = LogitsAndIntermediatesLossBalancer(
+        kd_loss_scale=loss_scale, skip_original_loss=skip_lm_loss
+    )
+
+    cfg["criterion"] = criterion
+    cfg["loss_balancer"] = loss_balancer
+
+    return cfg
+
+
+def _adjust_layer_index_for_pp(submodule_name, model_cfg):
+    """Adjust any sequence-based layer indices found in a submodule name for Pipeline Parallelism."""
+
+    match = re.search(r'(?<=\.)\d+(?=\.)', submodule_name)
+    if not match:
+        return submodule_name
+
+    offset = TransformerLayer._get_layer_offset(model_cfg)
+    new_layer_idx = int(match.group(0)) - offset
+    if new_layer_idx < 0:
+        raise ValueError(f"Layer {submodule_name} does not fall on final PP rank.")
+
+    new_submodule_name = submodule_name.replace(match.group(0), str(new_layer_idx))
+    if get_tensor_and_context_parallel_rank() == 0:
+        print(
+            f'Distillation: Renamed layer "{submodule_name}" on final PP rank to "{new_submodule_name}"'
+        )
+    return new_submodule_name
+
+
+########################################################
+
+
+class BaseLoss(_Loss, metaclass=ABCMeta):
+    """Abstract base class for Megatron distillation losses."""
+
+    def __init__(
+        self, model_config: TransformerConfig, projection_layer: Optional[nn.Module] = None
+    ):
+        """
+        Constructor.
+
+        Args:
+            model_config: MCore transformer config.
+            projection_layer: Module which projects student activations to teacher's hidden dim.
+        """
+        super().__init__()
+        self._config = model_config
+        self._projection = projection_layer
+
+    def pre_forward(self, predictions: Tensor, targets: Tensor) -> Tuple[Tensor, Tensor]:
+        """Performs projection of student tensor to match teacher's size if necessary."""
+        if isinstance(predictions, tuple):
+            # `ColumnParallelLinear` returns bias too
+            predictions, targets = predictions[0], targets[0]
+
+        if self._projection is not None:
+            predictions = self._projection(predictions)
+        targets = targets.detach()
+
+        return predictions, targets
+
+    def post_forward(self, loss: Tensor, tp_reduce: bool = False, is_sequence_parallel: bool = False) -> Tensor:
+        """Reshapes tensor from [s, b] to [b, s] for upcoming loss masking."""
+        loss = loss.transpose(0, 1).contiguous()
+        return (loss, tp_reduce, is_sequence_parallel)
+
+
+class HiddenStateCosineLoss(BaseLoss):
+    """
+    Calculates Cosine loss between two tensors without reducing the sequence dim.
+
+    The tensors are assumed to be intermediate activations, so extra restrictions are in place.
+    """
+
+    def __init__(
+        self, model_config: TransformerConfig, projection_layer: Optional[nn.Module] = None
+    ):
+        """
+        Constructor.
+
+        Args:
+            model_config: MCore transformer config.
+            projection_layer: Module which projects student activations to teacher's hidden dim.
+        """
+        super().__init__(model_config, projection_layer=projection_layer)
+
+        if self._config.tensor_model_parallel_size > 1 and not self._config.sequence_parallel:
+            logger.warning(
+                "``HiddenStateCosineLoss`` only works with tensors with full hidden dim. Ensure the "
+                "tensor inputs meet this requirement or use `--sequence_parallel` if tensor parallel is enabled."
+            )
+
+    def forward(self, predictions: Tensor, targets: Tensor) -> Tensor:
+        """
+        Forward function.
+
+        Args:
+            predictions: Student model tensors (size [s, b, h])
+            targets: Teacher model tensors (size [s, b, h])
+
+        Returns:
+            Cosine loss of tensors (size [b, s])
+        """
+        predictions, targets = self.pre_forward(predictions, targets)
+
+        loss = F.cosine_embedding_loss(
+            predictions.view(-1, predictions.size(-1)),
+            targets.view(-1, targets.size(-1)),
+            targets.new_ones(1),
+            reduction="none",
+        )
+        loss = loss.view(*predictions.shape[:2])
+
+        # NOTE: Tensor sequence length is still split among TP ranks.
+        return self.post_forward(loss, is_sequence_parallel=self._config.sequence_parallel)
+
+
+class MSELoss(BaseLoss):
+    """Calculates MSE loss between two tensors without reducing the sequence dim."""
+
+    def forward(self, predictions: Tensor, targets: Tensor) -> Tensor:
+        """Forward function.
+
+        Args:
+            predictions: Student model tensors (size [s, b, h])
+            targets: Teacher model tensors (size [s, b, h])
+
+        Returns:
+            MSE loss of tensors (size [b, s])
+        """
+        predictions, targets = self.pre_forward(predictions, targets)
+
+        loss = F.mse_loss(predictions, targets, reduction="none")
+        loss = loss.mean(dim=-1)
+
+        return self.post_forward(loss, is_sequence_parallel=self._config.sequence_parallel)
+
+
+class LogitsKLLoss(BaseLoss):
+    """Calculates KL-Divergence loss between two logits tensors without reducing the sequence dim."""
+
+    def __init__(
+        self, model_config: TransformerConfig, temperature: float = 1.0, reverse: bool = False
+    ):
+        """
+        Constructor.
+
+        Args:
+            model_config: MCore transformer config.
+            temperature: Divide tensors by this value prior to calculating loss.
+            reverse: Whether to reverse the loss as KLD(teacher, student) instead of KLD(student, teacher)
+        """
+        super().__init__(model_config)
+        self._temperature = temperature
+        self._reverse = reverse
+
+    def forward(self, predictions: Tensor, targets: Tensor) -> Tensor:
+        """
+        Forward function.
+
+        Args:
+            predictions: Student model tensors (size [s, b, h])
+            targets: Teacher model tensors (size [s, b, h])
+
+        Returns:
+            KLD loss of tensors (size [b, s])
+        """
+        predictions, targets = self.pre_forward(predictions, targets)
+
+        # Division by temp should happen prior to finding max for both student and teacher.
+        # Currently we don't use temperature in any of ours runs (temp=1.0)
+        output_teacher = targets.float() / self._temperature
+        output_student = predictions.float() / self._temperature
+
+        # Compute local softmax, and the reweight to compute global softmax.
+        if self._config.tensor_model_parallel_size > 1:
+
+            # Maximum value along vocab dimension across all GPUs.
+            teacher_logits_max, _ = torch.max(output_teacher, dim=-1)
+            torch.distributed.all_reduce(
+                teacher_logits_max,
+                op=torch.distributed.ReduceOp.MAX,
+                group=get_tensor_model_parallel_group(),
+            )
+            output_teacher = output_teacher - teacher_logits_max.unsqueeze(dim=-1)
+
+            denom_teacher = torch.sum(torch.exp(output_teacher), dim=-1)
+            # We can't use standard reduction function here since the computation
+            # that follows it isn't identical across TP ranks.
+            denom_teacher = all_reduce_autograd(
+                denom_teacher, group=get_tensor_model_parallel_group()
+            )
+
+            # Maximum value along vocab dimension across all GPUs.
+            student_logits_max, _ = torch.max(output_student, dim=-1)
+            torch.distributed.all_reduce(
+                student_logits_max,
+                op=torch.distributed.ReduceOp.MAX,
+                group=get_tensor_model_parallel_group(),
+            )
+            output_student = output_student - student_logits_max.unsqueeze(dim=-1).detach()
+
+            denom_student = torch.sum(torch.exp(output_student), dim=-1)
+            denom_student = all_reduce_autograd(
+                denom_student, group=get_tensor_model_parallel_group()
+            )
+
+            slen, bsz, sharded_vocab_size = output_student.shape
+            student_log_prob = output_student - torch.log(denom_student).view(slen, bsz, 1).expand(
+                slen, bsz, sharded_vocab_size
+            )
+            teacher_log_prob = output_teacher - torch.log(denom_teacher).view(slen, bsz, 1).expand(
+                slen, bsz, sharded_vocab_size
+            )
+
+            if self._reverse:
+                loss = torch.sum(
+                    F.kl_div(teacher_log_prob, student_log_prob, reduction="none", log_target=True),
+                    dim=-1,
+                )
+            else:
+                loss = torch.sum(
+                    F.kl_div(student_log_prob, teacher_log_prob, reduction="none", log_target=True),
+                    dim=-1,
+                )
+
+        else:
+            if self._reverse:
+                loss = torch.sum(
+                    F.kl_div(
+                        F.log_softmax(output_teacher, dim=-1),
+                        F.softmax(output_student, dim=-1),
+                        reduction="none",
+                    ),
+                    dim=-1,
+                )
+            else:
+                loss = torch.sum(
+                    F.kl_div(
+                        F.log_softmax(output_student, dim=-1),
+                        F.softmax(output_teacher, dim=-1),
+                        reduction="none",
+                    ),
+                    dim=-1,
+                )
+
+        return self.post_forward(loss, tp_reduce=True)
+
+
+########################################################
+
+
+class LogitsAndIntermediatesLossBalancer(mtd.DistillationLossBalancer):
+    """
+    LossBalancer implementation for Logit and Intermediate losses.
+
+    Dynamically weighs distillation and original losses to balance during training.
+    """
+
+    def __init__(self, kd_loss_scale: float = 1.0, skip_original_loss: bool = False):
+        """Constructor.
+
+        Args:
+            kd_loss_scale: Multiply distillation losses by this before weighing.
+                (Not used when `skip_original_loss` is True.)
+            skip_original_loss: Used to signal whether the original loss should be used, regardless
+                of whether it was passed into ``mtd.DistillationModel.compute_kd_loss()`` or not.
+        """
+        super().__init__()
+        self._kd_loss_scale = kd_loss_scale
+        self._skip_original_loss = skip_original_loss
+
+    def forward(self, loss_dict: Dict[str, Tensor]) -> Tensor:
+        """Forward function.
+
+        Args:
+            loss_dict: All individual scalar losses, passed in during ``mtd.DistillationModel.compute_kd_loss()``
+
+        Returns:
+            Aggregate total scalar loss.
+        """
+        original_loss = loss_dict.pop(mtd.loss_balancers.STUDENT_LOSS_KEY)
+        for _key in loss_dict:
+            if _key.startswith(LogitsKLLoss.__name__):
+                logits_key = _key  # should only be one
+        logits_loss = loss_dict.pop(logits_key)
+        intermediate_loss = sum(loss_dict.values()) / max(len(loss_dict), 1)
+
+        if intermediate_loss > 0:
+            dynamic_scale = logits_loss.item() / intermediate_loss.item()
+            intermediate_loss_scaled = intermediate_loss * dynamic_scale
+            kd_loss_scale = self._kd_loss_scale / 2.0
+        else:
+            kd_loss_scale = self._kd_loss_scale
+            intermediate_loss = logits_loss.new_tensor(intermediate_loss)
+            intermediate_loss_scaled = intermediate_loss
+
+        if self._skip_original_loss:
+            total_loss = logits_loss + intermediate_loss_scaled
+        else:
+            kd_loss = (logits_loss + intermediate_loss_scaled) * kd_loss_scale
+            dynamic_scale = original_loss.item() / kd_loss.item()
+            total_loss = original_loss + kd_loss * dynamic_scale
+
+        out_dict = {
+            "kd_loss": total_loss,
+            "logits_loss": logits_loss,
+            "intermediate_loss": intermediate_loss,
+        }
+        return out_dict
+
+
+########################################################
+
+
+class ProjectionLayer(MegatronModule):
+    """Module to project student layer activations to teacher's size."""
+
+    def __init__(self, student_config: TransformerConfig, teacher_config: TransformerConfig):
+        """
+        Constructor.
+
+        Args:
+            student_config: Student's MCore transformer config.
+            teacher_config: Teacher's MCore transformer config.
+        """
+        super().__init__(config=student_config)
+        if student_config.hidden_size == teacher_config.hidden_size:
+            self._fit = nn.Identity()
+        else:
+            self._fit = nn.Linear(student_config.hidden_size, teacher_config.hidden_size)
+            self.apply(self._init_weights)
+            # Attribute below needed to reduce gradients during backward properly.
+            setattr(self._fit.weight, "sequence_parallel", self.config.sequence_parallel)
+            setattr(self._fit.bias, "sequence_parallel", self.config.sequence_parallel)
+
+    def forward(self, student_tensor: Tensor):
+        """
+        Forward function.
+
+        Args:
+            student_tensor: Tensor to be fit to teacher size.
+        """
+        return self._fit(student_tensor)
+
+    def _init_weights(self, module):
+        """Initialize the weights."""
+        if isinstance(module, nn.Linear):
+            module.weight.data.normal_(mean=0.0, std=0.01)
+            if module.bias is not None:
+                module.bias.data.zero_()
+
+
+class _AllReduce(torch.autograd.Function):
+    """Implementation from old PyTorch `torch.distributed.nn.parallel`."""
+
+    @staticmethod
+    def forward(ctx, op, group, tensor):
+        ctx.group, ctx.op = group, op
+        tensor = tensor.clone()
+        torch.distributed.all_reduce(tensor, op=op, group=group)
+        return tensor
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        return (None, None, _AllReduce.apply(ctx.op, ctx.group, grad_output))
+
+
+def all_reduce_autograd(
+    tensor, op=torch.distributed.ReduceOp.SUM, group=torch.distributed.group.WORLD
+):
+    """Custom all-reduce function.
+
+    Needed instead of other all-reduce functions available when the computation following
+    the all-reduce call differs per rank. In KL loss, this corresponds to the different numerators.
+    """
+    return _AllReduce.apply(op, group, tensor)
+
+
+########################################################
+
+
+def adjust_distillation_model_for_mcore(model: mtd.DistillationModel, distill_cfg: Dict[str, Any]):
+    """Extra modifcations to ``mtd.DistillationModel`` requried for Megatron-Core."""
+
+    # HACK: Get rid of ModelOpt Distillation state
+    # NOTE: If re-placed, above losses need modifcation as `TransformerConfig` has non-pickleable elements.
+    mto.ModeloptStateManager(model)._state.pop()
+
+    # HACK: Hide teacher during `sharded_state_dict` method.
+    def _sharded_state_dict(self, *args, **kwargs) -> ShardedStateDict:
+        with self.hide_teacher_model():
+            return type(self).sharded_state_dict(self, *args, **kwargs)
+
+    model.sharded_state_dict = types.MethodType(_sharded_state_dict, model)
+
+    # HACK: Skip `lm_loss` bypassing it when training if not needed for backprop.
+    def _compute_language_model_loss(self, labels, logits) -> Tensor:
+        if distill_cfg["skip_lm_loss"] and self.training:
+            return torch.zeros_like(labels)
+        return type(self).compute_language_model_loss(self, labels, logits)
+
+    model.compute_language_model_loss = types.MethodType(_compute_language_model_loss, model)
+
+    # HACK: Skip `lm_loss` always for teacher.
+    def _compute_language_model_loss(self, labels, logits) -> Tensor:
+        return torch.zeros_like(labels)
+
+    model.teacher_model.compute_language_model_loss = types.MethodType(
+        _compute_language_model_loss, model.teacher_model
+    )
+
+    # HACK: Pipeline-parallel Distillation requires splitting input tensor into student and teacher parts.
+    def _set_student_input_tensor_shape(self, shapes: List[Tuple[int]]):
+        self._tensor_split_idx = shapes[0][-1]
+
+    def _set_input_tensor(self, input_tensors: List[Tensor]):
+        teacher_inputs = [t[..., self._tensor_split_idx:] if t is not None else t for t in input_tensors]
+        student_inputs = [t[..., :self._tensor_split_idx] if t is not None else t for t in input_tensors]
+        type(self).set_input_tensor(self.teacher_model, teacher_inputs)
+        type(self).set_input_tensor(self, student_inputs)
+
+    model.set_student_input_tensor_shape = types.MethodType(_set_student_input_tensor_shape, model)
+    model.set_input_tensor = types.MethodType(_set_input_tensor, model)
+
+    # HACK: Concatenate output tensors when PP>1 so they can be passed between ranks.
+    def _forward(self, *args, **kwargs):
+        if not self.training:
+            with self.only_student_forward():
+                return type(self).forward(self, *args, **kwargs)
+
+        with torch.no_grad():
+            self._teacher_model.eval()
+            teacher_output = self._teacher_model(*args, **kwargs)
+        with self.only_student_forward():
+            student_output = type(self).forward(self, *args, **kwargs)
+
+        if not is_pipeline_last_stage():
+            return torch.cat([student_output, teacher_output], dim=-1)
+        else:
+            return student_output
+
+    model.forward = types.MethodType(_forward, model)
+
+
+def get_tensor_shapes_adjust_fn_for_distillation(
+    model: Union[torch.nn.Module, List[torch.nn.Module]],
+    seq_length: int,
+    micro_batch_size: int,
+    decoder_seq_length: Optional[int] = None,
+    forward_only: bool = False,
+) -> Union[Callable, None]:
+    if (
+        forward_only
+        or get_pipeline_model_parallel_world_size() == 1
+        or get_virtual_pipeline_model_parallel_world_size() is not None
+    ):
+        return None
+    # Unwrap
+    if isinstance(model, list):
+        model = model[0]
+    while hasattr(model, "module"):
+        model = model.module
+    if not isinstance(model, mtd.DistillationModel):
+        return None
+
+    def adjust_tensor_shapes(recv_tensor_shapes: List[Tuple[int, ...]], send_tensor_shapes: List[Tuple[int, ...]]):
+        teacher_config = get_model_config(model.teacher_model)
+        tp_group = get_tensor_model_parallel_group()
+        cp_group = get_context_parallel_group()
+
+        teacher_recv_tensor_shapes = get_tensor_shapes(
+            seq_length=seq_length,
+            micro_batch_size=micro_batch_size,
+            decoder_seq_length=decoder_seq_length,
+            config=teacher_config,
+            tp_group=tp_group,
+            cp_group=cp_group,
+        )
+        teacher_send_tensor_shapes = get_tensor_shapes(
+            seq_length=seq_length,
+            micro_batch_size=micro_batch_size,
+            decoder_seq_length=decoder_seq_length,
+            config=teacher_config,
+            tp_group=tp_group,
+            cp_group=cp_group,
+        )
+        model.set_student_input_tensor_shape(recv_tensor_shapes)
+
+        for i, shape in enumerate(recv_tensor_shapes):
+            shape = list(shape)
+            shape[-1] += teacher_recv_tensor_shapes[0][-1]
+            recv_tensor_shapes[i] = tuple(shape)
+        for i, shape in enumerate(send_tensor_shapes):
+            shape = list(shape)
+            shape[-1] += teacher_send_tensor_shapes[0][-1]
+            send_tensor_shapes[i] = tuple(shape)
+
+        return recv_tensor_shapes, send_tensor_shapes
+
+    return adjust_tensor_shapes
diff --git a/megatron/post_training/checkpointing.py b/megatron/post_training/checkpointing.py
index 143cbb9c6ab..aac59341e37 100644
--- a/megatron/post_training/checkpointing.py
+++ b/megatron/post_training/checkpointing.py
@@ -183,7 +183,14 @@ def _remove_prefix_state_dict_pre_hook(
             logger.warning(f"PyTorch version {get_torch_version()} below 2.6 detected."
                        f" Forcing dist_ckpt_save_pre_mcore_014 behavior.")
 
-        sharded_state_dict = unwrapped_model[0].sharded_state_dict(prefix=additional_sharded_prefix)
+        # NOTE: singleton_local_shards only take care of the weight and bias. There are be issue when linear_fc1._amax
+        #       is a matrix such as NVFP4 real quant, awq, and blockwise 128.
+        if args.dist_ckpt_save_pre_mcore_014 or force_pre_mcore_014:
+            metadata = {"singleton_local_shards": False}
+        else:
+            metadata = {"singleton_local_shards": True}
+
+        sharded_state_dict = unwrapped_model[0].sharded_state_dict(prefix=additional_sharded_prefix, metadata=metadata)
 
         if additional_sharded_prefix:
             unwrapped_model[0]._register_load_state_dict_pre_hook(
diff --git a/megatron/post_training/docs/distillation.md b/megatron/post_training/docs/distillation.md
index 9f0d5524176..6ca1ec18417 100644
--- a/megatron/post_training/docs/distillation.md
+++ b/megatron/post_training/docs/distillation.md
@@ -75,7 +75,7 @@ Model Optimizer modifies the model using the loss criterion present in the disti
 defines a loss function between two module attribute names of the teacher and student model, respectively.
 
 Default loss function used between logits is a KL-Divergence Loss and loss used among intermediate tensors is Cosine-Similarity,
-both defined in `modelopt.torch.distill.plugins.megatron`.
+both defined in `megatron/inference/algos/distillation.py`.
 
 ## Restrictions
 
diff --git a/megatron/post_training/generate.py b/megatron/post_training/generate.py
index 2a124734a30..0c5be3eceab 100644
--- a/megatron/post_training/generate.py
+++ b/megatron/post_training/generate.py
@@ -104,7 +104,7 @@ def simple_speculative_generate(
     input_ids: torch.Tensor,
     images: Optional[torch.Tensor] = None,
     osl: int = 32,
-    steps: int = 0,
+    draft_length: int = 0,
     eos_token_id: List[int] = [],
     disable_tqdm: bool = False,
 ):
@@ -127,7 +127,7 @@ def simple_speculative_generate(
 
         # Speculative decoding forward
         # NOTE: PP is not yet supported.
-        new_token, draft_tokens = model.pseudo_speculative_generate(input_ids, steps=steps)
+        new_token, draft_tokens = model.pseudo_speculative_generate(input_ids, steps=draft_length)
 
         # Always accept the first token.
         input_ids = output_ids[:, : offset]
@@ -138,8 +138,6 @@ def simple_speculative_generate(
         for i in range(draft_tokens.shape[-1]):
             if torch.equal(draft_tokens[:, i : i + 1], output_ids[:, offset: offset + 1]):
                 offset += 1
-            else:
-                break
 
         # Broadcast the accepted offset from the last rank.
         offset = [offset]
diff --git a/megatron/post_training/loss_func.py b/megatron/post_training/loss_func.py
index 9c99529172d..eb8dbca1c6a 100644
--- a/megatron/post_training/loss_func.py
+++ b/megatron/post_training/loss_func.py
@@ -55,18 +55,16 @@ def loss_func(loss_mask: torch.Tensor, output_tensor: torch.Tensor, model: GPTMo
     num_tokens = loss_mask.sum().clone().detach().to(torch.int)
     report = {'lm loss': torch.cat([loss_lm.clone().detach().view(1), num_tokens.view(1)])}
 
-    if args.export_kd_teacher_load:
+    if model.training and args.export_kd_teacher_load:
         # [ModelOpt]: Handle knowledge distillation
         losses = model.compute_kd_loss(
             student_loss=loss_lm,
             loss_reduction_fn=lambda x: _mask_loss(x, loss_mask),
         )
+        loss = losses["kd_loss"]
 
         report["total loss"] = torch.cat([losses["kd_loss"].clone().detach().view(1), num_tokens.view(1)])
         report["logits distillation loss"] = torch.cat([losses["logits_loss"].clone().detach().view(1), num_tokens.view(1)])
         report["intermediate distillation loss"] = torch.cat([losses["intermediate_loss"].clone().detach().view(1), num_tokens.view(1)])
 
-        if model.training:
-            loss = losses["kd_loss"]
-
     return loss, num_tokens, report
diff --git a/megatron/post_training/model_builder.py b/megatron/post_training/model_builder.py
index cb2654e7107..34daa279651 100644
--- a/megatron/post_training/model_builder.py
+++ b/megatron/post_training/model_builder.py
@@ -7,8 +7,6 @@
 from typing import Any, Dict
 
 import modelopt.torch.distill as mtd
-import modelopt.torch.distill.plugins.megatron as mtd_mcore
-import modelopt.torch.opt as mto
 import yaml
 
 from megatron.core.models.gpt import GPTModel as MCoreGPTModel
@@ -20,6 +18,7 @@
 from megatron.core.post_training.modelopt.gpt.state_dict_hooks import (
     mcore_gpt_load_te_state_dict_pre_hook,
 )
+from megatron.post_training.algos import distillation
 from megatron.post_training.checkpointing import load_modelopt_checkpoint, load_modelopt_state
 from megatron.training import get_args, print_rank_0
 from megatron.training.arguments import core_transformer_config_from_args
@@ -286,7 +285,7 @@ def modelopt_gpt_mamba_builder(args, pre_process, post_process, vp_stage=None, c
             ), "ModelOpt Distillation currently incompatible with interleaved pipeline schedule."
 
         teacher_config = _load_teacher_model_config(args.export_kd_teacher_load)
-        distill_cfg = mtd_mcore.setup_distillation_config(
+        distill_cfg = distillation.load_distillation_config(
             args.export_kd_cfg, student_cfg=config, teacher_cfg=core_transformer_config_from_args(teacher_config)
         )
         if "hybrid_override_pattern" in teacher_config and args.is_hybrid_model:
@@ -298,15 +297,14 @@ def modelopt_gpt_mamba_builder(args, pre_process, post_process, vp_stage=None, c
 
         kd_config = {
             "teacher_model": (_teacher_provider, [teacher_config, model_kwargs], {}),
-            "criterion": distill_cfg.criterion,
-            "loss_balancer": distill_cfg.loss_balancer,
+            "criterion": distill_cfg["criterion"],
+            "loss_balancer": distill_cfg["loss_balancer"],
         }
         model = mtd.convert(model, mode=[("kd_loss", kd_config)])
 
-        # Additional tweaks needed for MCore.
-        # (accounts for sharded state, pipeline parallel, and potentially skipping LM loss)
-        mtd_mcore.adjust_distillation_model_for_mcore(model, distill_cfg)
-        # Also remove KD mode state to prevent issues with re-conversion after restore.
-        mto.ModeloptStateManager(model).state_dict().pop()  # TODO(aanoosheh): remove once fixed in ModelOpt
+        # Additional tweaks needed for MCore/Nemo.
+        # NOTE: Distillation state manually removed in this function.
+        # ModelOpt state restoration above will not return a `mtd.DistillationModel` for simplicity reasons.
+        distillation.adjust_distillation_model_for_mcore(model, distill_cfg)
 
     return model
diff --git a/megatron/post_training/non_loss_data_func.py b/megatron/post_training/non_loss_data_func.py
index 49c29b4912c..49fb9220258 100644
--- a/megatron/post_training/non_loss_data_func.py
+++ b/megatron/post_training/non_loss_data_func.py
@@ -8,11 +8,10 @@
 from megatron.training.utils import unwrap_model
 
 
-def report_draft_acceptance_length(model, osl: int = 64, draft_steps: int = 7):
+def report_draft_acceptance_length(model, osl: int = 64, draft_length: int = 7):
     """Report MTBench acceptance length."""
     tokenizer = get_tokenizer()._tokenizer
     unwrapped_model = unwrap_model(model)[0]
-    parallel_draft_step = unwrapped_model.eagle_config.parallel_draft_step if hasattr(unwrapped_model, "eagle_config") else 1
 
     if unwrapped_model.training:
         return
@@ -34,15 +33,15 @@ def report_draft_acceptance_length(model, osl: int = 64, draft_steps: int = 7):
             conversations, return_tensors="pt", add_generation_prompt=True
         ).to(torch.cuda.current_device())
         output_ids, actual_osl, steps = simple_speculative_generate(
-            unwrapped_model, input_ids, osl=osl, steps=draft_steps, disable_tqdm=True
+            unwrapped_model, input_ids, osl=osl, draft_length=draft_length, disable_tqdm=True
         )
         total_osl += actual_osl
         total_steps += steps
         if torch.distributed.get_rank() == 0:
             al = actual_osl / steps
-            ar = al / (draft_steps + parallel_draft_step - 1)
+            ar = al / draft_length
             print(
-                "Rank {:3}/{:3} {:12} AL {:.1f} AR {:.2f} STEPS {:5}/{:5} DRAFT {:2} PARALLEL {:2}".format(
+                "Rank {:3}/{:3} {:12} AL {:.1f} AR {:.2f} STEPS {:5}/{:5} DRAFT {:2}".format(
                     torch.distributed.get_rank(),
                     torch.distributed.get_world_size(),
                     category,
@@ -50,16 +49,15 @@ def report_draft_acceptance_length(model, osl: int = 64, draft_steps: int = 7):
                     ar,
                     steps,
                     actual_osl,
-                    draft_steps,
-                    parallel_draft_step,
+                    draft_length,
                 ),
                 flush=True,
             )
     if torch.distributed.get_rank() == 0:
         al = total_osl / total_steps
-        ar = al / (draft_steps + parallel_draft_step - 1)
+        ar = al / draft_length
         print(
-            "Rank {:3}/{:3} {:12} AL {:.1f} AR {:.2f} STEPS {:5}/{:5} DRAFT {:2} PARALLEL {:2}".format(
+            "Rank {:3}/{:3} {:12} AL {:.1f} AR {:.2f} STEPS {:5}/{:5} DRAFT {:2}".format(
                 torch.distributed.get_rank(),
                 torch.distributed.get_world_size(),
                 "average",
@@ -67,8 +65,7 @@ def report_draft_acceptance_length(model, osl: int = 64, draft_steps: int = 7):
                 ar,
                 total_steps,
                 total_osl,
-                draft_steps,
-                parallel_draft_step,
+                draft_length,
             ),
             flush=True,
         )
diff --git a/megatron/post_training/utils.py b/megatron/post_training/utils.py
index 4bec8c96cf1..5d9f301cd41 100644
--- a/megatron/post_training/utils.py
+++ b/megatron/post_training/utils.py
@@ -1,6 +1,5 @@
 # Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
 
-import os
 import torch
 from datasets import load_dataset
 
@@ -35,7 +34,7 @@ def mtbench_to_oai_chat(example):
         example["conversations"] = conversations
         return example
 
-    dataset = load_dataset("HuggingFaceH4/mt_bench_prompts", split="train", token=os.environ.get("HF_TOKEN", None))
+    dataset = load_dataset("HuggingFaceH4/mt_bench_prompts", split="train")
     return dataset.map(mtbench_to_oai_chat)
 
 def to_empty_if_meta(module: torch.nn.Module, *, device: torch.device, recurse=True):
diff --git a/megatron/rl/inference/megatron.py b/megatron/rl/inference/megatron.py
index ad22bd14ac9..58613b364a6 100644
--- a/megatron/rl/inference/megatron.py
+++ b/megatron/rl/inference/megatron.py
@@ -5,11 +5,10 @@
 from argparse import Namespace
 
 from pydantic import PrivateAttr
-import torch.distributed as dist
 
 from megatron.core import parallel_state
-from megatron.core.inference.inference_client import InferenceClient
 from megatron.core.inference.contexts.dynamic_context import DynamicInferenceContext
+from megatron.core.inference.coordinator import DynamicEngineCoordinator
 from megatron.core.inference.engines.abstract_engine import AbstractEngine
 from megatron.core.inference.engines.dynamic_engine import DynamicInferenceEngine
 from megatron.core.inference.engines.mcore_engine import MCoreEngine
@@ -24,11 +23,9 @@
     SimpleTextGenerationController,
 )
 from megatron.core.models.gpt.gpt_model import GPTModel
-from megatron.core.ssm.mamba_hybrid_layer_allocation import Symbols
 from megatron.core.transformer.module import MegatronModule
-from megatron.core.utils import get_mamba_inference_state_config_from_model, log_single_rank
+from megatron.core.utils import log_single_rank
 from megatron.training.global_vars import get_args, get_tokenizer
-from megatron.training import get_wandb_writer
 
 from ..inference.inference_interface import (
     ChatInferenceInterface,
@@ -105,36 +102,38 @@ def get_dynamic_inference_engine(args: Namespace, model: MegatronModule, inferen
     """
     tokenizer = get_tokenizer()
 
-    enable_cuda_graph = args.cuda_graph_impl == "local"
+    num_cuda_graphs = None
+    if args.enable_cuda_graph:
+        num_cuda_graphs = args.inference_dynamic_batching_num_cuda_graphs
 
-    mamba_inference_state_config = get_mamba_inference_state_config_from_model(model)
+    module = model.module.module if hasattr(model.module, "module") else model.module
 
     # Inference context.
     inference_context = DynamicInferenceContext(
         params_dtype=args.params_dtype,
-        num_layers=args.num_layers // args.pipeline_model_parallel_size,
+        num_layers=args.num_layers,
         kv_channels=args.kv_channels,
         num_attention_heads=(
             args.num_query_groups if args.group_query_attention else args.num_attention_heads
         ),
         max_sequence_length=args.inference_max_seq_length,
-        num_cuda_graphs=(
-            args.inference_dynamic_batching_num_cuda_graphs
-            if enable_cuda_graph
-            else None
-        ),
-        block_size_tokens=args.inference_dynamic_batching_block_size,
+        num_cuda_graphs=num_cuda_graphs,
         buffer_size_gb=args.inference_dynamic_batching_buffer_size_gb,
-        max_tokens=args.inference_dynamic_batching_max_tokens,
+        buffer_guaranteed_fraction=args.inference_dynamic_batching_buffer_guaranteed_fraction,
+        chunk_size_tokens=args.inference_dynamic_batching_chunk_size,
+        buffer_overflow_factor=args.inference_dynamic_batching_buffer_overflow_factor,
+        max_requests_override=args.inference_dynamic_batching_max_requests_override,
+        max_tokens_override=args.inference_dynamic_batching_max_tokens_override,
         tensor_model_parallel_size=args.tensor_model_parallel_size,
         materialize_only_last_token_logits=True,
-        mamba_inference_state_config=mamba_inference_state_config,
-        cache_mla_latent=args.multi_latent_attention and args.cache_mla_latents,
-        kv_lora_rank=args.kv_lora_rank if args.multi_latent_attention else None,
-        qk_pos_emb_head_dim=args.qk_pos_emb_head_dim,
-        use_cuda_graphs_for_non_decode_steps=not args.decode_only_cuda_graphs,
-        use_flashinfer_fused_rope=None,
-        unified_memory_level=args.inference_dynamic_batching_unified_memory_level,
+        unified_memory_kvcache=args.inference_dynamic_batching_unified_memory_kvcache,
+        is_hybrid_model=args.is_hybrid_model,
+        layer_type_list=module.decoder.layer_type_list if args.is_hybrid_model else None,
+        mamba_head_dim=args.mamba_head_dim,
+        mamba_num_groups=args.mamba_num_groups,
+        mamba_d_model=args.hidden_size,
+        mamba_d_conv=4 if args.is_hybrid_model else None,
+        mamba_d_state=args.mamba_state_dim,
         metrics_writer=metrics_writer,
     )
 
@@ -151,7 +150,7 @@ def get_dynamic_inference_engine(args: Namespace, model: MegatronModule, inferen
     return DynamicInferenceEngine(
         controller=text_generation_controller,
         context=inference_context,
-        enable_cuda_graph=enable_cuda_graph,
+        enable_cuda_graph=args.enable_cuda_graph,
         random_seed=args.seed,
         inference_logging_step_interval=inference_logging_step_interval,
     )
@@ -160,8 +159,9 @@ def get_dynamic_inference_engine(args: Namespace, model: MegatronModule, inferen
 class MegatronLocal(InferenceServer, ReturnsTokens, ReturnsRaw):
     """Interface to use MCoreEngine directly as an inference engine."""
 
-    _client: InferenceClient = PrivateAttr(None)
-    _inference_engine: DynamicInferenceEngine = PrivateAttr(None)
+    _coordinator: DynamicEngineCoordinator = PrivateAttr(None)
+    _engine_task: asyncio.Task = PrivateAttr(None)
+    _kill_engine: bool = PrivateAttr(False)
 
     async def base_generate(self, request: InferenceRequest):
 
@@ -174,29 +174,25 @@ async def base_generate(self, request: InferenceRequest):
             isinstance(p, str) for p in request.prompt
         ), "MegatronLocal only supports string prompts."
 
-        assert self._client is not None, "Client is not initialized"
-
         tokenizer = get_tokenizer()
 
         sampling_params = SamplingParams(
-            num_tokens_to_generate=None,
-            num_tokens_total=request.generation_args.max_tokens,
+            num_tokens_to_generate=request.generation_args.max_tokens or 1024,
             temperature=request.generation_args.temperature or 1.0,
             top_k=request.generation_args.top_k or 0,
             top_p=request.generation_args.top_p or 0.0,
-            termination_id=self._inference_engine.controller.tokenizer.eod,
+            termination_id=self._coordinator.engine.controller.tokenizer.eod,
             return_log_probs=True,
             skip_prompt_log_probs=True,
             add_BOS=tokenizer.bos is not None,
         )
-        requests = [
-            self._client.add_request(prompt=prompt, sampling_params=sampling_params)
+        request_ids = [
+            self._coordinator.schedule_request(prompt=prompt, sampling_params=sampling_params)
             for prompt in request.prompt
         ]
-        records = await asyncio.gather(
-            *requests
+        responses = await asyncio.gather(
+            *[self._coordinator.get_response(id) for id in request_ids]
         )
-        responses = [record[-1] for record in records]
         return [
             InferenceResponse(
                 response=r.generated_text,
@@ -233,32 +229,28 @@ async def launch(cls, model: GPTModel, **kwargs):
                            "wandb module is available. Inference logging will be disabled.")
 
         inference_engine: DynamicInferenceEngine = get_dynamic_inference_engine(args, model, inference_logging_step_interval, metrics_writer)
-        await inference_engine.start_listening_to_data_parallel_coordinator(inference_coordinator_port=41521, launch_inference_coordinator=True)
-        if dist.get_rank() == 0:
-            # TODO: We have to do this only on the rank 0 process, should be fixed in the future when we have support for multiple inference clients. !2278
-            client = InferenceClient(inference_coordinator_port=41521)
-            await client.start()
-        else:
-            client = None
+        coordinator = DynamicEngineCoordinator(
+            inference_engine,
+            inference_max_requests=inference_engine.context.max_requests,
+            log_level=0,
+        )
         launched_server = cls(**kwargs)
-        launched_server._client = client
-        launched_server._inference_engine = inference_engine
+        launched_server._coordinator = coordinator
+
+        loop = asyncio.get_running_loop()
+
+        coordinator.startup(loop)
 
         return launched_server
 
     async def kill(self):
-        if dist.get_rank() == 0:
-            await self._client.stop_engines()
-        await self._inference_engine.stopped.wait()
+        await self._coordinator.shutdown()
 
     async def suspend(self):
-        if dist.get_rank() == 0:
-            await self._client.pause_engines()
-        await self._inference_engine.paused.wait()
-
-    async def resume(self):
-        if dist.get_rank() == 0:
-            self._client.unpause_engines()
-        await self._inference_engine.running.wait()
+        await self._coordinator.suspend_engine()
+
+    def resume(self):
+        self._coordinator.resume_engine()
+
 
 class MegatronChatLocal(ChatInferenceInterface, MegatronLocal): ...
diff --git a/megatron/rl/rl_utils.py b/megatron/rl/rl_utils.py
index 11e005f74af..c0992778d57 100644
--- a/megatron/rl/rl_utils.py
+++ b/megatron/rl/rl_utils.py
@@ -24,7 +24,7 @@
 
 from megatron.core import mpu
 from megatron.core.datasets.megatron_tokenizer import MegatronLegacyTokenizer
-from megatron.core.utils import get_asyncio_loop
+from megatron.core.inference.utils import get_event_loop
 from megatron.core.models.common.language_module.language_module import LanguageModule
 from megatron.core.num_microbatches_calculator import get_num_microbatches
 from megatron.core.optimizer import MegatronOptimizer
@@ -607,11 +607,11 @@ def get_environment_rollouts(
     ), "n_prompts must be divisible by data_parallel_world_size"
 
     with nvtx_range("rollout-collection"):
-        loop = get_asyncio_loop()
+        loop = get_event_loop()
         with megatron_rl_inference_mode(
             model,
             optimizer,
-            args.cuda_graph_impl,
+            args.enable_cuda_graph,
             args.rl_reset_cuda_graphs,
             args.rl_offload_optimizer_during_inference,
             args.rl_offload_kv_cache_during_training,
@@ -1006,7 +1006,7 @@ def prepare_trajectories(
     args = get_args()
     # Only process if we have inference_logprobs
     if inference_logprobs and any(lp is not None for lp in inference_logprobs):
-        if args.rl_use_sequence_packing:
+        if args.use_sequence_packing:
             # For sequence packing, we need to pad all logprobs to the same size
             padded_logprobs = []
             for logprobs in inference_logprobs:
@@ -1207,14 +1207,14 @@ def prepare_data_for_update(
         # [g, group_size]
         # Making an assumption that all groups are of the same size!
         # For packing mode, use all rollouts to compute rewards
-        rollouts_for_rewards = all_rollouts if args.rl_use_sequence_packing else rollouts
+        rollouts_for_rewards = all_rollouts if args.use_sequence_packing else rollouts
         rewards = torch.tensor(
             [[rollout.reward for rollout in group] for group in rollouts_for_rewards], device='cpu'
         )
 
         # We flatten them for logging.
         with nvtx_range("prepare_trajectories"):
-            if args.rl_use_sequence_packing:
+            if args.use_sequence_packing:
                 trajs, generation_masks, inference_logprobs = prepare_packed_trajectories(
                     all_rollouts, tokenizer, args
                 )
@@ -1228,14 +1228,14 @@ def prepare_data_for_update(
         # Sequence packing or standard processing
         packing_context = {}  # Store all packing-related data
 
-        if args.rl_use_sequence_packing:
+        if args.use_sequence_packing:
             with nvtx_range("sequence_packing"):
                 timers('sequence-packing-overhead', log_level=1).start()
 
-                bin_size = args.rl_sequence_packing_bin_size
+                bin_size = args.sequence_packing_bin_size
 
                 # Create packer with max sequences per bin limit to prevent extreme imbalance
-                max_sequences_per_bin = getattr(args, 'rl_sequence_packing_max_sequences_per_bin', 100)
+                max_sequences_per_bin = getattr(args, 'sequence_packing_max_sequences_per_bin', 100)
                 packer = SequencePacker(
                     bin_size=bin_size,
                     pad_token=tokenizer.pad,
@@ -1276,7 +1276,7 @@ def prepare_data_for_update(
                 world_size = mpu.get_expert_data_parallel_world_size()
 
                 # Choose distribution algorithm based on args.sequence_packing_algo
-                packing_algo = getattr(args, 'rl_sequence_packing_algo', 'fifo')
+                packing_algo = getattr(args, 'sequence_packing_algo', 'fifo')
 
                 if packing_algo == 'round-robin':
                     # Round-robin assignment: rank i gets bins [i, i+world_size, i+2*world_size, ...]
@@ -1596,7 +1596,7 @@ def prepare_data_for_update(
             )
             original_loss_mask[~generation_masks] = 0.0
 
-        if not args.rl_use_sequence_packing:
+        if not args.use_sequence_packing:
             # Use original masks if not packing
             attention_mask = original_attention_mask
             loss_mask = original_loss_mask
@@ -1606,7 +1606,7 @@ def prepare_data_for_update(
             timers('compute-logprobs', log_level=0).start()
             # Before we can update the model, we need to get the logprobs for the \pi_{old} model.
             # Use packed sequences if packing is enabled for performance benefits
-            if args.rl_use_sequence_packing and 'packed_trajs' in packing_context:
+            if args.use_sequence_packing and 'packed_trajs' in packing_context:
                 compute_trajs = packing_context['packed_trajs']
                 compute_position_ids = packing_context['packed_position_ids']
                 compute_attention_mask = packing_context['packed_attention_mask']
@@ -1661,7 +1661,7 @@ def prepare_data_for_update(
         if (
             inference_logprobs is not None
             and args.rl_inference_logprobs_is_correction
-            and not args.rl_use_sequence_packing
+            and not args.use_sequence_packing
         ):
             inference_logprobs = align_unpacked_inference_logprobs(
                 inference_logprobs=inference_logprobs,
@@ -1670,14 +1670,14 @@ def prepare_data_for_update(
                 group_stats=group_stats,
             )
         else:
-            if not args.rl_use_sequence_packing:
+            if not args.use_sequence_packing:
                 # Keep inference_logprobs as None instead of zeros
                 inference_logprobs = None
             # For sequence packing, inference_logprobs will be handled separately
 
         # Handle packing of inference_logprobs for sequence packing mode
         if (
-            args.rl_use_sequence_packing
+            args.use_sequence_packing
             and inference_logprobs is not None
             and args.rl_inference_logprobs_is_correction
         ):
@@ -1687,7 +1687,7 @@ def prepare_data_for_update(
                     inference_logprobs=inference_logprobs,
                     packing_info=packing_context['packing_info'],
                     generation_masks=generation_masks,
-                    bin_size=args.rl_sequence_packing_bin_size,
+                    bin_size=args.sequence_packing_bin_size,
                 )
 
                 # Store packed inference logprobs in packing context
@@ -1754,7 +1754,7 @@ def prepare_data_for_update(
 
             timers('prepare-advantages').stop()
         with nvtx_range("create_dataloader"):
-            if args.rl_use_sequence_packing:
+            if args.use_sequence_packing:
                 # Store packing context in runtime state for forward_step
                 runtime_state = get_rl_runtime_state()
                 runtime_state.packing_context = packing_context
@@ -2049,14 +2049,14 @@ def evaluate_and_print_results_rl(
         with megatron_rl_inference_mode(
             model,
             optimizer,
-            args.cuda_graph_impl,
+            args.enable_cuda_graph,
             args.rl_reset_cuda_graphs,
             args.rl_offload_optimizer_during_inference,
             args.rl_offload_kv_cache_during_training,
             args.rl_remove_kv_cache_during_training,
         ) as inference_interface:
 
-            loop = get_asyncio_loop()
+            loop = get_event_loop()
 
             rank = torch.distributed.get_rank()
             if rank == 0:
@@ -2230,7 +2230,7 @@ def calculate_grpo_loss(
 def megatron_rl_inference_mode(
     model: list[LanguageModule],
     optimizer: MegatronOptimizer,
-    cuda_graph_impl: str,
+    enable_cuda_graph: bool,
     reset_cuda_graphs: bool,
     offload_optimizer_during_inference: bool,
     offload_kv_cache_during_training: bool,
@@ -2241,7 +2241,7 @@ def megatron_rl_inference_mode(
     Args:
         model: model to prepare.
         optimizer: optimizer used to train the model.
-        cuda_graph_impl: which cuda graph implementation to use.
+        enable_cuda_graph: use cuda graphs or not.
         reset_cuda_graphs: rebuild cuda graphs for each inference stage or not.
         offload_optimizer_during_inference: move optimizer to cpu during inference or not.
         offload_kv_cache_during_training: manually offload kv cache to host before training or not.
@@ -2252,7 +2252,7 @@ def megatron_rl_inference_mode(
 
     """
     args = get_args()
-    loop = get_asyncio_loop()
+    loop = get_event_loop()
     nvtx_range = get_nvtx_range()
 
     print(f"[{dist.get_rank()}:DP] Entering inference mode")
@@ -2275,9 +2275,8 @@ def megatron_rl_inference_mode(
             with nvtx_range("offload-optimizer-before-inference"):
                 optimizer.offload_to_cpu()
 
-        # TODO: Remove this if statement once a change to `toggle_cuda_graphs` makes it safe to.
-        if cuda_graph_impl != "none":
-            toggle_cuda_graphs(lang_module, cuda_graph_impl, reset_cuda_graphs=reset_cuda_graphs)
+        if enable_cuda_graph:
+            toggle_cuda_graphs(lang_module, True, reset_cuda_graphs=reset_cuda_graphs)
 
         inference_interface = get_inference_interface(args, loop, model)
 
@@ -2287,28 +2286,25 @@ def megatron_rl_inference_mode(
                     reset_cuda_graphs
                 ), "reset_cuda_graphs must be True when offloading kv cache during training"
                 print(
-                    f"[{dist.get_rank()}:DP] Restoring kv cache ({inference_interface._inference_engine.context.memory_buffer.numel() / 1024**3:.2f} GB) to GPU"
+                    f"[{dist.get_rank()}:DP] Restoring kv cache ({inference_interface._coordinator.engine.context.memory_buffer.numel() / 1024**3:.2f} GB) to GPU"
                 )
-                kv_cache = inference_interface._inference_engine.context.memory_buffer
-                inference_interface._inference_engine.context.memory_buffer = kv_cache.cuda()
+                kv_cache = inference_interface._coordinator.engine.context.memory_buffer
+                inference_interface._coordinator.engine.context.memory_buffer = kv_cache.cuda()
             elif remove_kv_cache_during_training:
-                if inference_interface._inference_engine.context.memory_buffer is None:
-                    inference_interface._inference_engine.context.build_memory_buffer()
+                if inference_interface._coordinator.engine.context.memory_buffer is None:
+                    inference_interface._coordinator.engine.context.build_memory_buffer()
 
-        # TODO: Improve this if statement once a change is made to CUDA graph handling.
-        cuda_graph_exists = len(_CudagraphGlobalRecord.cudagraph_inference_record) != 0
-        if cuda_graph_impl != "none" and not cuda_graph_exists:
+        if enable_cuda_graph and not _CudagraphGlobalRecord.cudagraph_created:
             with nvtx_range("wait-for-decode-only"):
-                while not inference_interface._inference_engine.context.is_decode_only():
+                while not inference_interface._coordinator.engine.context.is_decode_only():
                     active_requests, finished_requests, step_time = loop.run_until_complete(
-                        inference_interface._inference_engine.async_step()
+                        inference_interface._coordinator.engine.async_step()
                     )
             with nvtx_range("build-cuda-graphs"):
-                inference_interface._inference_engine.create_cuda_graphs(reset_context=True)
+                inference_interface._coordinator.engine.build_cuda_graphs(reset_context=False)
 
-        loop.run_until_complete(inference_interface.resume())
+        inference_interface.resume()
 
-        print(f"[{dist.get_rank()}:DP] Entered inference mode")
         yield inference_interface
 
         with nvtx_range("suspend-engine"):
@@ -2316,17 +2312,16 @@ def megatron_rl_inference_mode(
 
         with nvtx_range("offload-kv-cache-after-inference"):
             if offload_kv_cache_during_training:
-                kv_cache = inference_interface._inference_engine.context.memory_buffer
+                kv_cache = inference_interface._coordinator.engine.context.memory_buffer
                 print(
                     f"[{dist.get_rank()}:DP] Offloading kv cache ({kv_cache.numel() * kv_cache.element_size() / 1024**3:.2f} GB) to CPU"
                 )
-                inference_interface._inference_engine.context.memory_buffer = kv_cache.cpu()
+                inference_interface._coordinator.engine.context.memory_buffer = kv_cache.cpu()
             elif remove_kv_cache_during_training:
-                inference_interface._inference_engine.context.memory_buffer = None
+                inference_interface._coordinator.engine.context.memory_buffer = None
 
-        # TODO: Remove this if statement once a change to `toggle_cuda_graphs` makes it safe to.
-        if cuda_graph_impl != "none":
-            toggle_cuda_graphs(lang_module, 'none', reset_cuda_graphs=reset_cuda_graphs)
+        if enable_cuda_graph:
+            toggle_cuda_graphs(lang_module, False, reset_cuda_graphs=reset_cuda_graphs)
 
         if offload_optimizer_during_inference:
             with nvtx_range("onload-optimizer-after-inference"):
@@ -2353,7 +2348,7 @@ def get_iteration_sequence_count(args):
 
 def update_sequence_packing_metrics(args):
     """Update bin tracking for sequence packing mode."""
-    if args.rl_use_sequence_packing:
+    if args.use_sequence_packing:
         bin_count = (
             mpu.get_data_parallel_world_size() * args.micro_batch_size * get_num_microbatches()
         )
diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py
index be667e32419..bb1b17e9ba2 100644
--- a/megatron/training/arguments.py
+++ b/megatron/training/arguments.py
@@ -9,6 +9,7 @@
 from pathlib import Path
 import re
 import types
+import warnings
 
 import torch
 import torch.nn.functional as F
@@ -34,7 +35,6 @@
 )
 from megatron.core.activations import squared_relu
 from megatron.core.fusions.fused_bias_geglu import quick_gelu
-from megatron.training.dist_signal_handler import SIGNAL_MAP
 from megatron.training.utils import (
     get_device_arch_version,
     update_use_dist_ckpt,
@@ -1062,6 +1062,8 @@ def validate_args(args, defaults={}):
     # MoE Spec check
     if args.num_experts == 0:
         args.num_experts = None
+    if args.num_experts is not None:
+        assert args.spec is None, "Model Spec must be None when using MoEs"
     if args.num_experts is not None and args.moe_ffn_hidden_size is None:
         args.moe_ffn_hidden_size = args.ffn_hidden_size
         print("Warning: moe_ffn_hidden_size is not set, using ffn_hidden_size for MoE instead.")
@@ -1106,20 +1108,6 @@ def validate_args(args, defaults={}):
            any([args.train_data_path, args.valid_data_path, args.test_data_path]) \
            <= 1, "A single data source must be provided in training mode, else None"
 
-    if args.fim_data:
-        extra_tokens = [
-            args.fim_prefix_token,
-            args.fim_middle_token,
-            args.fim_suffix_token,
-            args.fim_pad_token,
-            args.fim_eod_token,
-        ]
-        assert not args.mock_data, "Mock dataset is not supported with FIM dataset."
-        assert not args.legacy_tokenizer, "FIM dataset is not supported with legacy tokenizers."
-        assert args.fim_rate, "--fim-rate should be specified."
-        assert args.fim_spm_rate, "--fim-spm-rate should be specified."
-        assert all(token is not None for token in extra_tokens), "FIM extra tokens should be specified."
-
     # Deterministic mode
     if args.deterministic_mode:
         assert not args.use_flash_attn, "Flash attention can not be used in deterministic mode."
@@ -1194,6 +1182,7 @@ def validate_args(args, defaults={}):
     if args.inference_dynamic_batching:
         assert args.inference_dynamic_batching_buffer_size_gb is not None
         assert args.inference_dynamic_batching_block_size % 256 == 0, "block size should be a multiple of 256"
+        assert args.inference_dynamic_batching_buffer_guaranteed_fraction is not None
 
     # MoE upcycling check
     if args.moe_use_upcycling:
@@ -1418,7 +1407,7 @@ def _add_transformer_engine_args(parser):
                        help='Execute wgrad in higher precision even for FP8 runs',
                        dest='fp8_wgrad')
     group.add_argument('--transformer-impl', default='transformer_engine',
-                       choices=['local', 'transformer_engine', 'inference_optimized'],
+                       choices=['local', 'transformer_engine'],
                        help='Which Transformer implementation to use.')
     group.add_argument('--fallback-to-eager-attn', action='store_true',
                        help='Fallback to eager attention in TE implementation. '
@@ -1527,22 +1516,34 @@ def _add_inference_args(parser):
                        help='Enable dynamic batching mode.')
     group.add_argument('--inference-dynamic-batching-buffer-size-gb',
                        type=float, default=40.,
-                       help='Amount of on-GPU memory allocated for the KV cache. '
-                       'The total amount of memory allocated for the KV cache '
-                       '(CPU + GPU memory) depends on the value set for the '
-                       'unified virtual memory (UVM) level (via '
-                       '`--inference-dynamic-batching-unified-memory-level`).'
-                       'If the UVM level is 0, then only GPU memory is used and '
-                       'the total memory equals `buffer_size_gb`. If the UVM '
-                       'level is 1, then additional memory is utilized on the '
-                       'CPU and the total memory equals `2 * buffer_size_gb`.')
+                       help='Total buffer size (GB) allocated for the block-level KV '
+                       'memory.')
     group.add_argument('--inference-dynamic-batching-block-size',
                        type=int, default=256,
                        help='KV cache block size. '
                        'It should be a multiple of 256')
-    group.add_argument('--inference-dynamic-batching-max-tokens',
+    group.add_argument('--inference-dynamic-batching-buffer-guaranteed-fraction',
+                       type=float, default=0.2,
+                       help='Space is reserved within the inference context '
+                       'memory buffer to guarantee that a minimum number of '
+                       'active requests will always be able to run to '
+                       'completion. This is to avoid the context being deadlocked '
+                       'by paused requests.')
+    group.add_argument('--inference-dynamic-batching-buffer-overflow-factor',
+                       type=float, default=None,
+                       help='Scaling factor over the memory buffer size for auto '
+                       'computing `max_requests` and `max_tokens`. This scaling '
+                       'factor is used for fitting more requests and tokens in '
+                       'the memory buffer than it can safely hold, which in turn '
+                       'increases throughput.')
+    group.add_argument('--inference-dynamic-batching-max-requests-override',
+                       type=int, default=None,
+                       help='If set, this overrides the max requests as computed '
+                       'from `--inference-dynamic-batching-buffer-overflow-factor`.')
+    group.add_argument('--inference-dynamic-batching-max-tokens-override',
                        type=int, default=None,
-                       help='Override the inference context\'s default `max_tokens`.')
+                       help='If set, this overrides the max tokens as computed '
+                       'from `--inference-dynamic-batching-buffer-overflow-factor`.')
     group.add_argument('--inference-dynamic-batching-num-cuda-graphs',
                        type=int, default=16,
                        help='Maximum number of cuda graphs to capture, where the '
@@ -1559,7 +1560,7 @@ def _add_inference_args(parser):
                        action='store_true', default=False,
                        help='Only use cuda graphs for decode-only steps, not prefill and mixed steps.')
     group.add_argument('--inference-dynamic-batching-unified-memory-level',
-                       type=int, default=1, choices=[0, 1],
+                       type=int, default=0, choices=[0, 1],
                        help='Set unified memory usage within the dynamic '
                        'inference context. The levels are: 0) no unified memory, '
                        '1) allocate `memory_buffer` in unified memory. '
@@ -1579,8 +1580,7 @@ def _add_inference_args(parser):
     group.add_argument('--inference-wandb-logging-step-interval', type=int, default=0,
                        help='Step interval for logging inference metrics to wandb. '
                             'Default to 0 to disable inference wandb logging.')
-    group.add_argument("--inference-coordinator-port", type=int, default=12346,
-                       help="This port will be used to setup the inference coordinator on node-0")
+
     return parser
 
 
@@ -2273,10 +2273,7 @@ def _add_training_args(parser):
                        help='Exit the program after this many minutes.')
     group.add_argument('--exit-signal-handler', action='store_true',
                        help='Dynamically save the checkpoint and shutdown the '
-                       'training if signal is received')
-    group.add_argument('--exit-signal', type=str, default='SIGTERM',
-                       choices=list(SIGNAL_MAP.keys()),
-                       help='Signal to use for exit signal handler. If not specified, defaults to SIGTERM.')
+                       'training if SIGTERM is received')
     group.add_argument('--tensorboard-dir', type=str, default=None,
                        help='Write TensorBoard logs to this directory.')
     group.add_argument('--no-masked-softmax-fusion',
@@ -3046,27 +3043,6 @@ def _add_data_args(parser):
                        'If instead this argument is set, the training flow will treat all tokens '
                        'that share the same id as the pad token as true pad tokens, potentially '
                        'causing severe training instability.')
-    group.add_argument('--fim-data', action='store_true', help='Whether to use the FIM dataset.')
-    group.add_argument('--fim-rate', type=float, default=0.5,
-                       help='Probability to convert a training sample into a FIM format.')
-    group.add_argument('--fim-spm-rate', type=float, default=0.5,
-                       help='Probability that the a FIM sample uses the SPM format over the PSM format.')
-    group.add_argument('--fim-split-sample', type=str, default=None,
-                       help='String around which to split the sample for FIM.')
-    group.add_argument('--fim-fragment-rate', type=float, default=None,
-                       help='Rate of FIM on each fragment when --fim-split-sample is not None.')
-    group.add_argument('--fim-no-prefix', type=str, default=None,
-                       help='Do not apply FIM to fragments that start with this prefix')
-    group.add_argument('--fim-prefix-token', type=str, default='<fim_prefix>',
-                       help='FIM prefix token')
-    group.add_argument('--fim-middle-token', type=str, default='<fim_middle>',
-                       help='FIM middle token')
-    group.add_argument('--fim-suffix-token', type=str, default='<fim_suffix>',
-                       help='FIM suffix token')
-    group.add_argument('--fim-pad-token', type=str, default='<fim_pad>',
-                       help='FIM PAD token')
-    group.add_argument('--fim-eod-token', type=str, default='<|endoftext|>',
-                       help='FIM EOD token')
     return parser
 
 
diff --git a/megatron/training/checkpointing.py b/megatron/training/checkpointing.py
index 48a2025fa63..feacccba162 100644
--- a/megatron/training/checkpointing.py
+++ b/megatron/training/checkpointing.py
@@ -270,7 +270,7 @@ def checkpoint_exists(checkpoints_path):
 def read_metadata(tracker_filename):
     # Read the tracker file and either set the iteration or
     # mark it as a release checkpoint.
-    iteration = -1
+    iteration = 0
     release = False
 
     with open_file(tracker_filename, 'r') as f:
@@ -283,10 +283,7 @@ def read_metadata(tracker_filename):
                 print_rank_0('ERROR: Invalid metadata file {}. Exiting'.format(
                     tracker_filename))
                 sys.exit()
-            else:
-                # Set iteration to 0 for release checkpoints
-                iteration = 0
-    assert iteration > -1 or release, 'error parsing metadata file {}'.format(
+    assert iteration > 0 or release, 'error parsing metadata file {}'.format(
         tracker_filename)
 
     # Get the max iteration retrieved across the ranks.
@@ -1831,16 +1828,6 @@ def load_model_state_dict(module, state_dict, strict: bool):
         is_local_chkpt = (ckpt_type == CheckpointType.LOCAL)
         ft_integration.on_checkpoint_loaded(is_local_chkpt=is_local_chkpt)
 
-    # Patch checkpoint as needed if required field is not found.
-    if optimizer is not None:
-        log_printed = False
-        for param_group in optimizer.param_groups:
-            if 'default_config' not in param_group:
-                param_group['default_config'] = True
-                if not log_printed:
-                    print_rank_0(">>> Inserting 'default_config' field into optimizer.param_groups...")
-                log_printed = True
-
     return iteration, num_floating_point_operations_so_far
 
 
diff --git a/megatron/training/datasets/README.md b/megatron/training/datasets/README.md
deleted file mode 100644
index d5543c3d1b5..00000000000
--- a/megatron/training/datasets/README.md
+++ /dev/null
@@ -1,34 +0,0 @@
-# Data Pipeline
-
-## FIM dataset
-
-`GPTFIMDataset` extends Megatron-Core’s `GPTDataset` to support **Fill-in-the-Middle (FIM)** data augmentation.
-It probabilistically converts samples into FIM format using configurable rates, with support for both PSM and SPM patterns, fragment-level splitting, and length-preserving output.
-
-`GPTFIMDatasetConfig` provides the configuration needed to enable this behavior.
-`GPTFIMDatasetConfig` configuration object extending `GPTDatasetConfig` to enable FIM preprocessing.
-
-**Attributes**
-
-- `rate`: Probability of converting a sample into a FIM example. A value of `1.0` means FIM is always applied. a value of `0.0` means FIM is never applied.
-- `spm_rate`: Probability of using the SPM FIM pattern (vs PSM). The remaining probability (`1 - spm_rate`) selects the PSM (prefix-suffix-middle) pattern instead. For example, if `spm_rate = 0.3`: 30% SPM, 70% PSM.
-- `extra_tokens`: Dictionary containing the FIM special tokens: {"prefix", "middle", "suffix", "pad", "eod"}.
-- `split_sample`: Optional token around which samples are split before applying FIM. If provided, the input sequence is divided at every occurrence of this token, and FIM is applied independently to each fragment. `A B C <SPLI_SAMPLE> D E F <SPLIT_SAMPLE> G H` -> `FIM(Fragment 1) <SPLI_SAMPLE> FIM(Fragment 2) <SPLI_SAMPLE> FIM(Fragment 3)`.
-- `fragment_rate`: Probability of applying FIM to each fragment when split_sample is used.
-- `no_prefix`: If the decoded sequence starts with this prefix, FIM is skipped.
-`GPTFIMDataset` dataset class that loads token sequences from an `IndexedDataset` and applies FIM transformations before returning each sample.
-
-**PSM Format**
-```
-[prefix_tok] prefix [suffix_tok] suffix [middle_tok] middle
-```
-
-**SPM Format**
-```
-[prefix_tok, suffix_tok] suffix [middle_tok] prefix middle
-```
-
-**Special cases:**
-
-- If the sequence starts with no_prefix, FIM is skipped.
-- If FIM is not applied, the sample is returned unchanged.
\ No newline at end of file
diff --git a/megatron/training/datasets/fim_dataset.py b/megatron/training/datasets/fim_dataset.py
deleted file mode 100644
index 730b7e033a1..00000000000
--- a/megatron/training/datasets/fim_dataset.py
+++ /dev/null
@@ -1,308 +0,0 @@
-# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
-
-from typing import Dict, Tuple, Optional
-from dataclasses import dataclass, field
-
-import numpy as np
-import logging
-from megatron.core.datasets.gpt_dataset import GPTDataset, GPTDatasetConfig
-from megatron.core.datasets.indexed_dataset import IndexedDataset
-from megatron.core.datasets.utils import Split
-
-logger = logging.getLogger(__name__)
-
-
-@dataclass
-class GPTFIMDatasetConfig(GPTDatasetConfig):
-    """Configuration object for Megatron Core GPT FIM datasets"""
-
-    fim_rate: float = None
-    """Probability to convert a training sample into a FIM format"""
-
-    fim_spm_rate: float = None
-    """Probability that the a FIM sample uses the SPM format over the PSM format"""
-
-    fim_extra_tokens: Dict = None
-    """FIM extra tokens. Should consist of prefix, middle, suffix, PAD, and EOD tokens."""
-
-    fim_split_sample: Optional[str] = None
-    """String around which to split the sample for FIM"""
-
-    fim_fragment_rate: Optional[float] = None
-    """Rate of FIM on each fragment when split_sample is not None"""
-
-    fim_no_prefix: Optional[str] = None
-    """Do not apply FIM to fragments that start with this prefix"""
-
-
-class GPTFIMDataset(GPTDataset):
-    """The base GPT dataset
-
-    Args:
-        indexed_dataset (IndexedDataset): The IndexedDataset around which to build the
-        MegatronDataset
-
-        indexed_indices (np.ndarray): The set of the documents indices to expose
-
-        num_samples (int): The number of samples to draw from the indexed dataset
-
-        index_split (Split): The indexed_indices Split
-
-        config (GPTFIMDatasetConfig): The GPT-specific container for all config sourced parameters
-    """
-
-    def __init__(
-        self,
-        indexed_dataset: IndexedDataset,
-        dataset_path: str,
-        indexed_indices: np.ndarray,
-        num_samples: int,
-        index_split: Split,
-        config: GPTFIMDatasetConfig,
-    ) -> None:
-        super().__init__(
-            indexed_dataset, dataset_path, indexed_indices, num_samples, index_split, config
-        )
-
-        self.np_rng = np.random.RandomState(seed=self.config.random_seed)
-        logger.info(f"Initialized FIM RNG with seed = {self.config.random_seed}")
-        # get FIM params
-        self.fim_rate = self.config.fim_rate
-        self.fim_spm_rate = self.config.fim_spm_rate
-        self.fragment_fim_rate = self.config.fim_fragment_rate
-        fim_split_sample = self.config.fim_split_sample
-        self.no_fim_prefix = self.config.fim_no_prefix
-        if fim_split_sample:
-            fim_split_sample_ids = self.config.tokenizer._tokenizer.tokens_to_ids(fim_split_sample)
-            assert isinstance(fim_split_sample_ids, int) or len(fim_split_sample_ids) == 1
-            self.fim_split_sample = (
-                fim_split_sample_ids
-                if isinstance(fim_split_sample_ids, int)
-                else fim_split_sample_ids[0]
-            )
-        else:
-            self.fim_split_sample = None
-
-        # get extra tokens ids
-        fim_tokens = self.config.fim_extra_tokens
-        fim_tokens = [
-            fim_tokens["prefix"],
-            fim_tokens["middle"],
-            fim_tokens["suffix"],
-            fim_tokens["pad"],
-            fim_tokens["eod"],
-        ]
-        fim_tokens_ids = self.config.tokenizer._tokenizer.tokens_to_ids(fim_tokens)
-        (
-            self.prefix_tok_id,
-            self.middle_tok_id,
-            self.suffix_tok_id,
-            self.pad_tok_id,
-            self.eod_tok_id,
-        ) = fim_tokens_ids
-
-    def _query_document_sample_shuffle_indices(self, idx: int) -> Tuple[np.ndarray, np.ndarray]:
-        """Get the text (token ids) and document ids for a given index
-
-        Args:
-            idx (int): The index into the dataset
-
-        Returns:
-            Tuple[np.ndarray, np.ndarray]: The text ids and document ids
-        """
-        # Do the shuffle mapping
-        idx = self.shuffle_index[idx]
-
-        # Get the beginning and end documents and offsets
-        doc_index_beg, doc_index_beg_offset = self.sample_index[idx]
-        doc_index_end, doc_index_end_offset = self.sample_index[idx + 1]
-
-        document_ids = []
-        sample_parts = []
-
-        # Sample spans a single document
-        if doc_index_beg == doc_index_end:
-            # Add the document id
-            document_ids.append(self.document_index[doc_index_beg])
-
-            # Add the entire sample
-            sample_parts.append(
-                self.dataset.get(
-                    self.document_index[doc_index_beg],
-                    offset=doc_index_beg_offset,
-                    length=doc_index_end_offset - doc_index_beg_offset + 1,
-                )
-            )
-
-        # Sample spans multiple documents
-        else:
-            for i in range(doc_index_beg, doc_index_end + 1):
-                # Add the document id
-                document_ids.append(self.document_index[i])
-
-                # Add the sample part
-                offset = 0 if i > doc_index_beg else doc_index_beg_offset
-                length = None if i < doc_index_end else doc_index_end_offset + 1
-                sample_parts.append(
-                    self.dataset.get(self.document_index[i], offset=offset, length=length)
-                )
-
-        sample = np.concatenate(sample_parts)
-
-        sample_len = sample.shape[0]
-        segment_breaks = np.argwhere(sample == self.eod_tok_id)
-
-        if segment_breaks.shape != (0, 1):  # then there is an EOD token in this example
-            curr_start_position = 0
-            new_samples = []
-            for loc in np.nditer(segment_breaks):
-                # Only permute non-empty segments.
-                if loc - curr_start_position > 0:
-                    # permute {prefix, suffix, middle} or {suffix, prefix, middle}
-                    permuted = self._fim_split_and_permute_sequence(sample[curr_start_position:loc])
-                    new_samples += [permuted, [self.eod_tok_id]]
-
-                curr_start_position = loc + 1  # jump over the EOD token
-            # Permute the segment after the last EOD
-            permuted = self._fim_split_and_permute_sequence(sample[curr_start_position:])
-            new_samples.append(permuted)
-
-            sample = np.concatenate(new_samples)
-        else:
-            sample = self._fim_split_and_permute_sequence(sample)
-
-        diff = sample.shape[0] - sample_len
-        if diff > 0:  # too long
-            sample = sample[:sample_len]
-        elif diff < 0:  # too short
-            sample = np.concatenate([sample, np.full((-1 * diff), self.pad_tok_id)])
-
-        assert sample.shape[0] == sample_len
-
-        return (np.array(sample, dtype=np.int64), np.array(document_ids, dtype=np.int64))
-
-    def _fim_permute_sequence(self, sequence, rate):
-        return self._permute(
-            sequence,
-            rate,
-            self.fim_spm_rate,
-            self.config.tokenizer,
-            truncate_or_pad=False,
-            suffix_tok_id=self.suffix_tok_id,
-            prefix_tok_id=self.prefix_tok_id,
-            middle_tok_id=self.middle_tok_id,
-            pad_tok_id=self.pad_tok_id,
-            no_fim_prefix=self.no_fim_prefix,
-        )
-
-    def _fim_split_and_permute_sequence(self, sequence):
-        """
-        If self.fim_split_sample is not None, split the sequence.
-        Then apply FIM on the fragments, or the whole sequence if self.fim_split_sample is None.
-        """
-        if self.fim_split_sample is None:
-            return self._fim_permute_sequence(sequence, self.fim_rate)
-        # fim_split_sample is set: split the sample on this token and permute each fragment separately.
-        # Typically, if each sample is a repository, then we split again on the file level.
-        # Each fragment is a file, and we permute the files.
-        fragment_breaks = np.argwhere(sequence == self.fim_split_sample)
-        if fragment_breaks.shape == (0, 1):
-            # no split token in this sample
-            return self._fim_permute_sequence(sequence, self.fim_rate)
-        if not self.np_rng.binomial(1, self.fim_rate):
-            # don't do FIM preproc
-            return sequence
-        # Do FIM on each fragment
-        curr_start_position = 0
-        new_samples = []
-        for loc in np.nditer(fragment_breaks):
-            if loc - curr_start_position > 0:
-                permuted = self._fim_permute_sequence(
-                    sequence[curr_start_position:loc], self.fragment_fim_rate
-                )
-                new_samples += [permuted, [self.fim_split_sample]]
-            curr_start_position = loc + 1  # Jump over the split token
-        # Permute the segment after the last split token
-        permuted = self._fim_permute_sequence(
-            sequence[curr_start_position:], self.fragment_fim_rate
-        )
-        new_samples.append(permuted)
-
-        return np.concatenate(new_samples)
-
-    def _permute(
-        self,
-        sample,
-        fim_rate,
-        fim_spm_rate,
-        tokenizer,
-        truncate_or_pad=True,
-        suffix_tok_id=None,
-        prefix_tok_id=None,
-        middle_tok_id=None,
-        pad_tok_id=None,
-        no_fim_prefix=None,
-    ):
-        """
-        Take in a sample (np array w/ size (0,chunklength)) and perform a FIM transformation on it.
-        Maintain the same sample length (if transform creates a few extra tokens, drop them).
-        """
-        if self.np_rng.binomial(1, fim_rate):  # sample bernoulli dist
-
-            contents = tokenizer._tokenizer.ids_to_text(sample)
-
-            # Do not apply FIM if the sample starts with no_fim_prefix
-            if no_fim_prefix is not None and contents.startswith(no_fim_prefix):
-                return sample
-
-            try:
-                # A boundary can be =0 (prefix will be empty)
-                # a boundary can be =len(contents) (suffix will be empty)
-                # The two boundaries can be equal (middle will be empty)
-                boundaries = list(self.np_rng.randint(low=0, high=len(contents) + 1, size=2))
-                boundaries.sort()
-            except ValueError as e:
-                print(len(contents), contents)
-                print(e)
-                raise e
-
-            prefix = contents[: boundaries[0]]
-            middle = contents[boundaries[0] : boundaries[1]]
-            suffix = contents[boundaries[1] :]
-
-            prefix = np.array([*tokenizer._tokenizer.text_to_ids(prefix)], dtype=np.int64)
-            middle = np.array([*tokenizer._tokenizer.text_to_ids(middle)], dtype=np.int64)
-            suffix = np.array([*tokenizer._tokenizer.text_to_ids(suffix)], dtype=np.int64)
-
-            # here we truncate each given segment to fit the same length as it was before
-            # A consequence is that we never reach the end of a file?
-            # we should rather truncate at the context-level
-            if truncate_or_pad:
-                # need to make same length as the input. Take the 3 sentinel tokens into account
-                new_length = suffix.shape[0] + prefix.shape[0] + middle.shape[0] + 3
-                diff = new_length - sample.shape[0]
-                if diff > 0:  # too long
-                    if (
-                        suffix.shape[0] <= diff
-                    ):  # if there's no space to truncate the suffix: stop and report it. atm i should have stopped this from happening
-                        return sample
-                    suffix = suffix[: suffix.shape[0] - diff]
-                elif diff < 0:  # too short
-                    suffix = np.concatenate([suffix, np.full((-1 * diff), pad_tok_id)])
-
-            if self.np_rng.binomial(1, fim_spm_rate):
-                # SPM (variant 2 from FIM paper)
-                new_sample = np.concatenate(
-                    [[prefix_tok_id, suffix_tok_id], suffix, [middle_tok_id], prefix, middle]
-                )
-            else:
-                # PSM
-                new_sample = np.concatenate(
-                    [[prefix_tok_id], prefix, [suffix_tok_id], suffix, [middle_tok_id], middle]
-                )
-
-        else:
-            # don't do FIM preproc
-            new_sample = sample
-
-        return new_sample
diff --git a/megatron/training/dist_signal_handler.py b/megatron/training/dist_signal_handler.py
index f1f3725c8a9..f4b4fbf5c0d 100644
--- a/megatron/training/dist_signal_handler.py
+++ b/megatron/training/dist_signal_handler.py
@@ -3,12 +3,6 @@
 
 import torch
 
-SIGNAL_MAP = {
-    'SIGTERM': signal.SIGTERM,
-    'SIGINT': signal.SIGINT,
-    'SIGUSR1': signal.SIGUSR1,
-    'SIGUSR2': signal.SIGUSR2
-}
 
 def get_world_size():
     if torch.distributed.is_available() and torch.distributed.is_initialized():
@@ -55,8 +49,8 @@ def all_gather_item(item, dtype, group=None, async_op=False, local_rank=None):
 
 
 class DistributedSignalHandler:
-    def __init__(self, sig: str = 'SIGTERM'):
-        self.sig = SIGNAL_MAP.get(sig, signal.SIGTERM)
+    def __init__(self, sig=signal.SIGTERM):
+        self.sig = sig
 
     def signals_received(self):
         all_received = all_gather_item(
diff --git a/megatron/training/global_vars.py b/megatron/training/global_vars.py
index a718877b40c..ec402263d29 100644
--- a/megatron/training/global_vars.py
+++ b/megatron/training/global_vars.py
@@ -11,7 +11,7 @@
 from megatron.core.energy_monitor import EnergyMonitor
 from megatron.core.jit import disable_jit_fuser
 from megatron.core.num_microbatches_calculator import init_num_microbatches_calculator, unset_num_microbatches_calculator
-from megatron.training.dist_signal_handler import DistributedSignalHandler
+from megatron.training import dist_signal_handler
 from megatron.training.tokenizer import build_tokenizer
 
 _GLOBAL_ARGS = None
@@ -74,11 +74,10 @@ def get_signal_handler():
     return _GLOBAL_SIGNAL_HANDLER
 
 
-def _set_signal_handler(exit_signal):
-
+def _set_signal_handler():
     global _GLOBAL_SIGNAL_HANDLER
     _ensure_var_is_not_initialized(_GLOBAL_SIGNAL_HANDLER, 'signal handler')
-    _GLOBAL_SIGNAL_HANDLER = DistributedSignalHandler(exit_signal).__enter__()
+    _GLOBAL_SIGNAL_HANDLER = dist_signal_handler.DistributedSignalHandler().__enter__()
 
 
@@ -111,7 +110,7 @@ def set_global_variables(args, build_tokenizer=True):
         set_experimental_flag(True)
 
     if args.exit_signal_handler:
-        _set_signal_handler(args.exit_signal)
+        _set_signal_handler()
 
     if args.disable_jit_fuser:
         disable_jit_fuser()
diff --git a/megatron/training/training.py b/megatron/training/training.py
index 58dcfbde734..9986f931641 100644
--- a/megatron/training/training.py
+++ b/megatron/training/training.py
@@ -2,7 +2,6 @@
 
 """Pretrain utilities."""
 
-import copy
 import dataclasses
 from datetime import datetime, timedelta
 import functools
@@ -12,7 +11,7 @@
 import math
 import os
 import sys
-from typing import Any, Optional
+from typing import List, Optional
 
 import torch.distributed
 
@@ -34,7 +33,7 @@
 except ImportError:
     has_rl_utils = False
 try:
-    from modelopt.torch.distill.plugins.megatron import (
+    from megatron.post_training.algos.distillation import (
         get_tensor_shapes_adjust_fn_for_distillation,
     )
 
@@ -76,7 +75,7 @@
 
 from megatron.core.distributed import finalize_model_grads
 from megatron.core.enums import ModelType
-from megatron.core.optimizer import get_megatron_optimizer, AdamOptimizerConfig, SGDOptimizerConfig, OptimizerConfig, ParamKey
+from megatron.core.optimizer import get_megatron_optimizer, OptimizerConfig
 from megatron.core.optimizer.muon import get_megatron_muon_optimizer
 from megatron.core.rerun_state_machine import (
     get_rerun_state_machine,
@@ -88,7 +87,7 @@
 from megatron.training.initialize import write_args_to_tensorboard
 from megatron.training.initialize import set_jit_fusion_options
 from megatron.training.utils import get_batch_on_this_cp_rank, get_batch_on_this_tp_rank
-from megatron.training.datasets.data_samplers import build_pretraining_data_loader
+from megatron.legacy.data.data_samplers import build_pretraining_data_loader
 from megatron.core.optimizer_param_scheduler import OptimizerParamScheduler
 from megatron.core.transformer.moe import upcycling_utils
 from megatron.core.transformer.moe.moe_utils import track_moe_metrics
@@ -162,32 +161,22 @@ def num_floating_point_operations(args, batch_size):
     def calculate_layer_counts():
         """Calculate the number of attention, Mamba, and MLP layers."""
         if args.hybrid_override_pattern:
-            counts = {'M': 0, '*': 0, '-': 0, 'E':0}
+            counts = {'M': 0, '*': 0, '-': 0}
             for layer_type in args.hybrid_override_pattern:
                 if layer_type in counts:
                     counts[layer_type] += 1
-            return counts['*'], counts['M'], counts['-'], counts['E']
+            return counts['*'], counts['M'], counts['-']
         else:
             num_attn_layers = round(args.num_layers * args.hybrid_attention_ratio)
             num_mlp_layers = round(args.num_layers * args.hybrid_mlp_ratio)
             num_mamba_layers = args.num_layers - num_attn_layers - num_mlp_layers
-            num_moe_layers = 0
-            return num_attn_layers, num_mamba_layers, num_mlp_layers, num_moe_layers
+            return num_attn_layers, num_mamba_layers, num_mlp_layers
 
     def mlp_layer_flops(batch_size, seq_len, hidden_size, expansion=4.0, swiglu=False):
         """Calculate FLOPs for an MLP layer."""
         scale_factor = 3.0 / 2.0 if swiglu else 1.0
         return 4 * expansion * scale_factor * batch_size * seq_len * hidden_size**2
 
-    def moe_layer_flops(batch_size, seq_len, hidden_size, moe_ffn_hidden_size,
-                        shared_expert_ffn_hidden_size, num_experts_routed_to, swiglu=False):
-        """Calculate FLOPs for an MoE layer."""
-        scale_factor = 3.0 / 2.0 if swiglu else 1.0
-        routed_flops = (4 * batch_size * seq_len * hidden_size *
-                        moe_ffn_hidden_size * num_experts_routed_to * scale_factor)
-        shared_flops = 4 * batch_size * seq_len * hidden_size * shared_expert_ffn_hidden_size * scale_factor
-        return routed_flops + shared_flops
-
     def attn_layer_flops(
         batch_size, seq_len, hidden_size, num_heads, gqa=True, gqa_groups=8, kv_channels=None
     ):
@@ -226,13 +215,12 @@ def mamba_layer_flops(batch_size, seq_len, hidden_size, state_dim=16,
         )
 
     def hybrid_flops(batch_size, seq_len, hidden_size,
-                     num_attn_layers, num_mamba_layers, num_mlp_layers, num_moe_layers,
+                     num_attn_layers, num_mamba_layers, num_mlp_layers,
                      mamba_state_dim=128, mamba_head_dim=64,
                      mamba_num_groups=8, mamba_num_heads=128,
-                     num_attn_heads=32, gqa=True,
+                     num_attn_heads=32,gqa=True,
                      gqa_groups=8, kv_channels=None,
                      mlp_expansion=4.0, swiglu=False,
-                     moe_ffn_hidden_size=2048, shared_expert_ffn_hidden_size=2048, num_experts_routed_to=1,
                      vocab_size=256000):
         """Calculate total FLOPs for the hybrid model."""
         flops_fwd = (
@@ -243,8 +231,6 @@ def hybrid_flops(batch_size, seq_len, hidden_size,
                 num_mamba_layers * mamba_layer_flops(batch_size, seq_len, hidden_size,
                                                      mamba_state_dim, mamba_head_dim,
                                                      mamba_num_groups, mamba_num_heads) +
-                num_moe_layers * moe_layer_flops(batch_size, seq_len, hidden_size, moe_ffn_hidden_size,
-                                                 shared_expert_ffn_hidden_size, num_experts_routed_to, swiglu) +
                 (2 * batch_size * seq_len * hidden_size * vocab_size)  # logits computation
         )
         return flops_fwd * 3
@@ -493,7 +479,7 @@ def transformer_flops():
     # Main entrypoint for FLOPs calculation.
     if args.is_hybrid_model:
         # Calculate the number of each type of layer.
-        num_attn_layers, num_mamba_layers, num_mlp_layers, num_moe_layers = calculate_layer_counts()
+        num_attn_layers, num_mamba_layers, num_mlp_layers = calculate_layer_counts()
 
         # Compute hybrid model FLOPs.
         return hybrid_flops(
@@ -503,7 +489,6 @@ def transformer_flops():
             num_attn_layers=num_attn_layers,
             num_mamba_layers=num_mamba_layers,
             num_mlp_layers=num_mlp_layers,
-            num_moe_layers=num_moe_layers,
             mamba_state_dim=args.mamba_state_dim,
             mamba_head_dim=args.mamba_head_dim,
             mamba_num_groups=args.mamba_num_groups,
@@ -514,11 +499,6 @@ def transformer_flops():
             kv_channels=args.kv_channels,
             mlp_expansion=args.ffn_hidden_size / args.hidden_size,
             swiglu=args.swiglu,
-            moe_ffn_hidden_size=(args.moe_ffn_hidden_size if args.moe_ffn_hidden_size is not None
-                                 else args.ffn_hidden_size),
-            shared_expert_ffn_hidden_size=(0 if args.moe_shared_expert_intermediate_size is None
-                                           else args.moe_shared_expert_intermediate_size),
-            num_experts_routed_to=args.moe_router_topk,
             vocab_size=args.padded_vocab_size,
         )
     else:
@@ -614,6 +594,30 @@ def reorder_inner_param_groups(optimizer_state_dict):
     return preprocessed_common_state_dict
 
 
+def get_no_weight_decay_cond(no_weight_decay_cond_type, default_skip_embedding_weight_decay):
+    """Get the no weight decay condition function."""
+
+    # Default case: no_weight_decay_cond_type is None
+    no_weight_decay_cond_fn = None
+
+    if no_weight_decay_cond_type == 'apply_wd_to_qk_layernorm':
+        # Qwen3-Next applies weight decay to qk layernorm as a special case
+        def apply_wd_to_qk_layernorm_fn(name, param):
+            if "q_layernorm" in name or "k_layernorm" in name:
+                no_wd = False
+            else:
+                no_wd = (
+                    name.endswith(".bias")
+                    or len(param.shape) == 1
+                    or (default_skip_embedding_weight_decay and "embedding" in name)
+                )
+            return no_wd
+        no_weight_decay_cond_fn = apply_wd_to_qk_layernorm_fn
+    elif no_weight_decay_cond_type is not None:
+        raise ValueError(f"Invalid no_weight_decay_cond_type: {no_weight_decay_cond_type}")
+
+    return no_weight_decay_cond_fn
+
 def pretrain(
     train_valid_test_dataset_provider,
     model_provider,
@@ -750,8 +754,15 @@ def pretrain(
 
     # Model, optimizer, and learning rate.
     timers('model-and-optimizer-setup', log_level=0).start(barrier=True)
+    no_weight_decay_cond = get_no_weight_decay_cond(
+        args.no_weight_decay_cond_type,
+        default_skip_embedding_weight_decay=args.embedding_init_method_std is not None,
+    )
     model, optimizer, opt_param_scheduler = setup_model_and_optimizer(
-        model_provider, model_type, checkpointing_context=checkpointing_context
+        model_provider,
+        model_type,
+        checkpointing_context=checkpointing_context,
+        no_weight_decay_cond=no_weight_decay_cond,
     )
 
     timers('model-and-optimizer-setup').stop()
@@ -1167,45 +1178,12 @@ def get_optimizer_param_scheduler(optimizer):
     return opt_param_scheduler
 
 
-def get_megatron_optimizer_config(args: Any) -> OptimizerConfig:
-    """Return a Megatron optimizer config object from Megatron's arguments."""
-
-    config = None
-    if args.optimizer == 'adam' or 'muon' in args.optimizer:
-        # TODO(deyuf): Muon needs both adam + muon but get() only receive one config
-        # So for now we keep using adam config that's back compat with old way
-        kwargs = {}
-        for f in dataclasses.fields(AdamOptimizerConfig):
-            if hasattr(args, f.name):
-                kwargs[f.name] = getattr(args, f.name)
-        config = AdamOptimizerConfig(**kwargs)
-    elif args.optimizer == 'sgd':
-        kwargs = {}
-        for f in dataclasses.fields(SGDOptimizerConfig):
-            if hasattr(args, f.name):
-                kwargs[f.name] = getattr(args, f.name)
-        config = SGDOptimizerConfig(**kwargs)
-    else:
-        raise ValueError("Invalid optimizer type!")
-
-    # Construct the appropriate config_overrides object.
-    # TODO: add more logic here as needed down the road.
-    if args.decoupled_lr is not None:
-        decoupled_param_key = ParamKey(attr="is_embedding_or_output_parameter")
-        decoupled_optimizer_config = copy.deepcopy(config)
-        decoupled_optimizer_config.lr = args.decoupled_lr
-        if args.decoupled_min_lr is not None:
-            decoupled_optimizer_config.min_lr = args.decoupled_min_lr
-        config_overrides = {decoupled_param_key: decoupled_optimizer_config}
-    else:
-        config_overrides = None
-
-    return config, config_overrides
-
-
 def setup_model_and_optimizer(
     model_provider_func,
     model_type,
+    no_weight_decay_cond=None,
+    scale_lr_cond=None,
+    lr_mult=1.0,
     checkpointing_context=None,
 ):
     """Setup model and optimizer."""
@@ -1217,25 +1195,33 @@ def setup_model_and_optimizer(
     unwrapped_model = unwrap_model(model)
 
     one_logger and one_logger.log_metrics({"app_build_optimzer_start_time": one_logger_utils.get_timestamp_in_ms()})
-    config, config_overrides = get_megatron_optimizer_config(args)
+    kwargs = {}
+    for f in dataclasses.fields(OptimizerConfig):
+        if hasattr(args, f.name):
+            kwargs[f.name] = getattr(args, f.name)
+    config = OptimizerConfig(**kwargs)
     config.timers = timers
 
     if 'muon' not in config.optimizer:
-        # If the user is asking for a non-zero embedding init std, skip weight decay for embeddings
-        # to avoid embeddings from shrinking to zero as recommended in https://arxiv.org/abs/2312.16903
-        # default_skip_embedding_weight_decay=args.embedding_init_method_std is not None,
         optimizer = get_megatron_optimizer(
             config,
             model,
-            config_overrides=config_overrides,
+            no_weight_decay_cond,
+            scale_lr_cond,
+            lr_mult,
             use_gloo_process_groups=args.enable_gloo_process_groups,
+            # If the user is asking for a non-zero embedding init std, skip weight decay for embeddings
+            #  to avoid embeddings from shrinking to zero as recommended in https://arxiv.org/abs/2312.16903
+            default_skip_embedding_weight_decay=args.embedding_init_method_std is not None,
             dump_param_to_param_group_map=args.dump_param_to_param_group_map,
         )
     else:
         optimizer = get_megatron_muon_optimizer(
             config,
             model,
-            config_overrides=config_overrides,
+            no_weight_decay_cond,
+            scale_lr_cond,
+            lr_mult,
             use_gloo_process_groups=args.enable_gloo_process_groups,
             layer_wise_distributed_optimizer='dist' in config.optimizer,
         )
@@ -1379,10 +1365,7 @@ def train_step(forward_step_func, data_iterator, model, optimizer, opt_param_sch
         if has_nvidia_modelopt:
             # [ModelOpt]: Pipeline-parallel Distillation stacks student and teacher tensors
             adjust_tensor_shapes_fn = get_tensor_shapes_adjust_fn_for_distillation(
-                model,
-                seq_length=args.seq_length,
-                micro_batch_size=args.micro_batch_size,
-                decoder_seq_length=args.decoder_seq_length,
+                model, args.seq_length, args.micro_batch_size, args.decoder_seq_length
             )
         else:
             adjust_tensor_shapes_fn = None
@@ -1511,6 +1494,7 @@ def training_log(
     loss_dict,
     total_loss_dict,
     learning_rate,
+    decoupled_learning_rate,
     iteration,
     loss_scale,
     report_memory_flag,
@@ -1615,6 +1599,8 @@ def training_log(
         writer.add_scalar('learning-rate vs samples', learning_rate, args.consumed_train_samples)
         if wandb_writer:
             wandb_writer.log({'learning-rate': learning_rate}, iteration)
+        if args.decoupled_lr is not None:
+            writer.add_scalar('decoupled-learning-rate', decoupled_learning_rate, iteration)
         if args.skipped_train_samples > 0:
             writer.add_scalar('skipped-train-samples', args.skipped_train_samples, iteration)
             if wandb_writer:
@@ -1694,12 +1680,6 @@ def training_log(
             track_names.append("global_load_balancing_loss")
         if args.moe_z_loss_coeff is not None:
             track_names.append("z_loss")
-
-        if args.is_hybrid_model:
-            layers = args.hybrid_override_pattern.count('E')
-        else:
-            layers = args.num_layers
-
         track_moe_metrics(
             loss_scale=moe_loss_scale,
             iteration=iteration,
@@ -1709,7 +1689,7 @@ def training_log(
             per_layer_logging=args.moe_per_layer_logging,
             force_initialize=True,
             track_names=track_names,
-            num_layers=layers,
+            num_layers=args.num_layers,
             moe_layer_freq=args.moe_layer_freq,
             mtp_num_layers=args.mtp_num_layers,
         )
@@ -1770,6 +1750,14 @@ def training_log(
                 wandb_writer.log({'power/gpu': power}, iteration)
         # Decoupled_learning_rate should be not None only on first and last pipeline stage.
         log_string += f' learning rate: {learning_rate:.6E} |'
+        if args.decoupled_lr is not None and (
+            mpu.is_pipeline_first_stage(ignore_virtual=True)
+            or mpu.is_pipeline_last_stage(ignore_virtual=True)
+        ):
+            assert decoupled_learning_rate is not None
+            log_string += f' decoupled learning rate: {decoupled_learning_rate:.6E} |'
+        else:
+            assert decoupled_learning_rate is None
         log_string += f' global batch size: {batch_size:5d} |'
         for key in total_loss_dict:
             if key not in [advanced_iters_key, skipped_iters_key, nan_iters_key]:
@@ -2535,15 +2523,19 @@ def get_e2e_base_metrics():
         if args.log_params_norm:
             params_norm = calc_params_l2_norm(model)
         learning_rate = None
+        decoupled_learning_rate = None
         for param_group in optimizer.param_groups:
             if len(param_group['params']) == 0:
                 continue
-            if param_group['default_config']:
+            if param_group['is_decoupled_lr']:
+                decoupled_learning_rate = param_group['lr']
+            else:
                 learning_rate = param_group['lr']
         report_memory_flag = training_log(
             loss_dict,
             total_loss_dict,
             learning_rate,
+            decoupled_learning_rate,
             iteration,
             loss_scale,
             report_memory_flag,
diff --git a/pretrain_gpt.py b/pretrain_gpt.py
index 9b13d66c7a7..ecb7163ff70 100644
--- a/pretrain_gpt.py
+++ b/pretrain_gpt.py
@@ -20,7 +20,6 @@
 from megatron.training.arguments import core_transformer_config_from_args
 from megatron.training import get_args, get_timers, get_tokenizer, inprocess_restart, pretrain, print_rank_0
 from megatron.training.datasets.sft_dataset import SFTDataset
-from megatron.training.datasets.fim_dataset import GPTFIMDataset, GPTFIMDatasetConfig
 from megatron.training.utils import (
     get_batch_on_this_cp_rank,
     get_batch_on_this_tp_rank,
@@ -186,49 +185,26 @@ def core_gpt_dataset_config_from_args(args):
     blend_per_split: Optional[List[Optional[Tuple[List[str], Optional[List[float]]]]]]
     blend, blend_per_split = get_blend_and_blend_per_split(args)
 
-    data_args = {
-        "random_seed": args.seed,
-        "sequence_length": args.seq_length,
-        "blend": blend,
-        "blend_per_split": blend_per_split,
-        "split": args.split,
-        "multiple_validation_sets": args.multiple_validation_sets,
-        "full_validation": args.full_validation,
-        "num_dataset_builder_threads": args.num_dataset_builder_threads,
-        "path_to_cache": args.data_cache_path,
-        "mmap_bin_files": args.mmap_bin_files,
-        "tokenizer": tokenizer,
-        "reset_position_ids": args.reset_position_ids,
-        "reset_attention_mask": args.reset_attention_mask,
-        "eod_mask_loss": args.eod_mask_loss,
-        "create_attention_mask": args.create_attention_mask_in_dataloader,
-        "object_storage_cache_path": args.object_storage_cache_path,
-        "mid_level_dataset_surplus": args.mid_level_dataset_surplus,
-        "allow_ambiguous_pad_tokens": args.allow_ambiguous_pad_tokens,
-    }
-
-    # add FIM args to the config
-    if args.fim_data:
-        extra_tokens = {
-            "prefix": args.fim_prefix_token,
-            "middle": args.fim_middle_token,
-            "suffix": args.fim_suffix_token,
-            "pad": args.fim_pad_token,
-            "eod": args.fim_eod_token,
-        }
-        data_args.update(
-            {
-                "fim_rate": args.fim_rate,
-                "fim_spm_rate": args.fim_spm_rate,
-                "fim_extra_tokens": extra_tokens,
-                "fim_split_sample": args.fim_split_sample,
-                "fim_fragment_rate": args.fim_fragment_rate,
-                "fim_no_prefix": args.fim_no_prefix,
-            }
-        )
-        return GPTFIMDatasetConfig(**data_args)
-
-    return GPTDatasetConfig(**data_args)
+    return GPTDatasetConfig(
+        random_seed=args.seed,
+        sequence_length=args.seq_length,
+        blend=blend,
+        blend_per_split=blend_per_split,
+        split=args.split,
+        multiple_validation_sets=args.multiple_validation_sets,
+        full_validation=args.full_validation,
+        num_dataset_builder_threads=args.num_dataset_builder_threads,
+        path_to_cache=args.data_cache_path,
+        mmap_bin_files=args.mmap_bin_files,
+        tokenizer=tokenizer,
+        reset_position_ids=args.reset_position_ids,
+        reset_attention_mask=args.reset_attention_mask,
+        eod_mask_loss=args.eod_mask_loss,
+        create_attention_mask=args.create_attention_mask_in_dataloader,
+        object_storage_cache_path=args.object_storage_cache_path,
+        mid_level_dataset_surplus=args.mid_level_dataset_surplus,
+        allow_ambiguous_pad_tokens=args.allow_ambiguous_pad_tokens,
+    )
 
 
 def train_valid_test_datasets_provider(train_val_test_num_samples, vp_stage=None):
@@ -246,8 +222,6 @@ def train_valid_test_datasets_provider(train_val_test_num_samples, vp_stage=None
     else:
         if args.mock_data:
             dataset_type = MockGPTDataset
-        elif args.fim_data:
-            dataset_type = GPTFIMDataset
         else:
             dataset_type = GPTDataset
 
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_fim_dataset/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_fim_dataset/golden_values_dev_dgx_h100.json
deleted file mode 100644
index cd90888e65d..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_fim_dataset/golden_values_dev_dgx_h100.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.89074,
-            "2": 10.89234,
-            "3": 10.89032,
-            "4": 10.89221,
-            "5": 10.89416,
-            "6": 10.90226,
-            "7": 10.8884,
-            "8": 10.90211,
-            "9": 10.90202,
-            "10": 10.88512,
-            "11": 10.87636,
-            "12": 10.89499,
-            "13": 10.89837,
-            "14": 10.89182,
-            "15": 10.85125,
-            "16": 10.8534,
-            "17": 10.82862,
-            "18": 10.83653,
-            "19": 10.82847,
-            "20": 10.74583,
-            "21": 10.73117,
-            "22": 10.61256,
-            "23": 10.72616,
-            "24": 10.62932,
-            "25": 10.59394,
-            "26": 10.63357,
-            "27": 10.63137,
-            "28": 10.58201,
-            "29": 10.58671,
-            "30": 10.40936,
-            "31": 10.15873,
-            "32": 10.48319,
-            "33": 10.46977,
-            "34": 10.23978,
-            "35": 10.28144,
-            "36": 10.23894,
-            "37": 10.35198,
-            "38": 10.20565,
-            "39": 10.40496,
-            "40": 10.09271,
-            "41": 10.16148,
-            "42": 10.2231,
-            "43": 9.84152,
-            "44": 9.97329,
-            "45": 9.84544,
-            "46": 9.82102,
-            "47": 10.14261,
-            "48": 9.86553,
-            "49": 9.54033,
-            "50": 9.9169
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 1544.0,
-            "2": 1729.0,
-            "3": 1672.0,
-            "4": 1807.0,
-            "5": 1942.0,
-            "6": 1736.0,
-            "7": 1956.0,
-            "8": 1716.0,
-            "9": 2011.0,
-            "10": 1385.0,
-            "11": 1864.0,
-            "12": 1767.0,
-            "13": 2019.0,
-            "14": 1787.0,
-            "15": 1828.0,
-            "16": 1908.0,
-            "17": 1718.0,
-            "18": 1602.0,
-            "19": 1785.0,
-            "20": 1679.0,
-            "21": 1917.0,
-            "22": 1712.0,
-            "23": 2034.0,
-            "24": 1752.0,
-            "25": 1645.0,
-            "26": 1820.0,
-            "27": 1915.0,
-            "28": 1996.0,
-            "29": 2051.0,
-            "30": 1890.0,
-            "31": 1577.0,
-            "32": 1886.0,
-            "33": 2116.0,
-            "34": 1912.0,
-            "35": 2037.0,
-            "36": 1924.0,
-            "37": 2462.0,
-            "38": 2241.0,
-            "39": 2321.0,
-            "40": 2221.0,
-            "41": 2345.0,
-            "42": 2386.0,
-            "43": 2027.0,
-            "44": 2211.0,
-            "45": 2096.0,
-            "46": 2285.0,
-            "47": 2536.0,
-            "48": 2289.0,
-            "49": 2270.0,
-            "50": 2421.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 581489664.0,
-            "2": 581489664.0,
-            "3": 581489664.0,
-            "4": 581489664.0,
-            "5": 581489664.0,
-            "6": 581489664.0,
-            "7": 581489664.0,
-            "8": 581489664.0,
-            "9": 581489664.0,
-            "10": 581489664.0,
-            "11": 581489664.0,
-            "12": 581489664.0,
-            "13": 581489664.0,
-            "14": 581489664.0,
-            "15": 581489664.0,
-            "16": 581489664.0,
-            "17": 581489664.0,
-            "18": 581489664.0,
-            "19": 581489664.0,
-            "20": 581489664.0,
-            "21": 581489664.0,
-            "22": 581489664.0,
-            "23": 581489664.0,
-            "24": 581489664.0,
-            "25": 581489664.0,
-            "26": 581489664.0,
-            "27": 581489664.0,
-            "28": 581489664.0,
-            "29": 581489664.0,
-            "30": 581489664.0,
-            "31": 581489664.0,
-            "32": 581489664.0,
-            "33": 581489664.0,
-            "34": 581489664.0,
-            "35": 581489664.0,
-            "36": 581489664.0,
-            "37": 581489664.0,
-            "38": 581489664.0,
-            "39": 581489664.0,
-            "40": 581489664.0,
-            "41": 581489664.0,
-            "42": 581489664.0,
-            "43": 581489664.0,
-            "44": 581489664.0,
-            "45": 581489664.0,
-            "46": 581489664.0,
-            "47": 581489664.0,
-            "48": 581489664.0,
-            "49": 581489664.0,
-            "50": 581489664.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 4605814272.0,
-            "2": 4702430720.0,
-            "3": 4702430720.0,
-            "4": 4702430720.0,
-            "5": 4702430720.0,
-            "6": 4702430720.0,
-            "7": 4702430720.0,
-            "8": 4702430720.0,
-            "9": 4702430720.0,
-            "10": 4702430720.0,
-            "11": 4702430720.0,
-            "12": 4702430720.0,
-            "13": 4702430720.0,
-            "14": 4702430720.0,
-            "15": 4702430720.0,
-            "16": 4702430720.0,
-            "17": 4702430720.0,
-            "18": 4702430720.0,
-            "19": 4702430720.0,
-            "20": 4702430720.0,
-            "21": 4702430720.0,
-            "22": 4702430720.0,
-            "23": 4702430720.0,
-            "24": 4702430720.0,
-            "25": 4702430720.0,
-            "26": 4702430720.0,
-            "27": 4702430720.0,
-            "28": 4702430720.0,
-            "29": 4702430720.0,
-            "30": 4702430720.0,
-            "31": 4702430720.0,
-            "32": 4702430720.0,
-            "33": 4702430720.0,
-            "34": 4702430720.0,
-            "35": 4702430720.0,
-            "36": 4702430720.0,
-            "37": 4702430720.0,
-            "38": 4702430720.0,
-            "39": 4702430720.0,
-            "40": 4702430720.0,
-            "41": 4702430720.0,
-            "42": 4702430720.0,
-            "43": 4702430720.0,
-            "44": 4702430720.0,
-            "45": 4702430720.0,
-            "46": 4702430720.0,
-            "47": 4702430720.0,
-            "48": 4702430720.0,
-            "49": 4702430720.0,
-            "50": 4702430720.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 6.95394,
-            "2": 0.0878,
-            "3": 0.06953,
-            "4": 0.07916,
-            "5": 0.06775,
-            "6": 0.07681,
-            "7": 0.06695,
-            "8": 0.0786,
-            "9": 0.0664,
-            "10": 0.08059,
-            "11": 0.06554,
-            "12": 0.07501,
-            "13": 0.06663,
-            "14": 0.06608,
-            "15": 0.06585,
-            "16": 0.06738,
-            "17": 0.067,
-            "18": 0.06553,
-            "19": 0.06755,
-            "20": 0.06723,
-            "21": 0.06559,
-            "22": 0.0664,
-            "23": 0.06722,
-            "24": 0.06553,
-            "25": 0.06829,
-            "26": 0.06873,
-            "27": 0.06733,
-            "28": 0.06731,
-            "29": 0.06824,
-            "30": 0.06696,
-            "31": 0.06661,
-            "32": 0.06587,
-            "33": 0.06588,
-            "34": 0.06564,
-            "35": 0.06761,
-            "36": 0.06655,
-            "37": 0.06712,
-            "38": 0.06601,
-            "39": 0.06661,
-            "40": 0.06632,
-            "41": 0.0691,
-            "42": 0.06551,
-            "43": 0.06839,
-            "44": 0.06528,
-            "45": 0.06744,
-            "46": 0.0675,
-            "47": 0.06698,
-            "48": 0.0649,
-            "49": 0.06596,
-            "50": 0.06581
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_fim_dataset/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_fim_dataset/model_config.yaml
deleted file mode 100644
index ddc8286573b..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_fim_dataset/model_config.yaml
+++ /dev/null
@@ -1,56 +0,0 @@
-ENV_VARS:
-  CUDA_DEVICE_MAX_CONNECTIONS: 1
-  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
-  NCCL_ALGO: Ring
-  CUBLAS_WORKSPACE_CONFIG: :4096:8
-MODEL_ARGS:
-  --num-layers: 12
-  --hidden-size: 512
-  --num-attention-heads: 8
-  --log-params-norm: true
-  --log-num-zeros-in-grad: true
-  --log-validation-ppl-to-tensorboard: true
-  --log-timers-to-tensorboard: true
-  --tensorboard-dir: ${TENSORBOARD_PATH}
-  --micro-batch-size: 4
-  --global-batch-size: 32
-  --seq-length: 1024
-  --max-position-embeddings: 1024
-  --train-iters: 50
-  --timing-log-level: 0
-  --lr-decay-iters: 320000
-  --save: ${CHECKPOINT_SAVE_PATH}
-  --load: ${CHECKPOINT_LOAD_PATH}
-  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
-  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
-  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
-  --split: 949,50,1
-  --distributed-backend: nccl
-  --lr: 0.00015
-  --lr-decay-style: cosine
-  --min-lr: 1.0e-5
-  --weight-decay: 1e-2
-  --clip-grad: 1.0
-  --lr-warmup-fraction: .01
-  --log-interval: 1
-  --save-interval: 10000
-  --eval-interval: 1000
-  --eval-iters: 10
-  --transformer-impl: transformer_engine
-  --tensor-model-parallel-size: 1
-  --pipeline-model-parallel-size: 1
-  --use-distributed-optimizer: true
-  --deterministic-mode: true
-  --no-gradient-accumulation-fusion: true
-  --attention-softmax-in-fp32: true
-  --use-mcore-models: true
-  --ckpt-format: torch_dist
-  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
-  --data-cache-path: ${DATA_CACHE_PATH}
-  --bf16: true
-  --attention-backend: unfused
-  --log-memory-to-tensorboard: true
-  --fim-data: true
-  --fim-rate: 0.5
-  --fim-spm-rate: 0.5
-TEST_TYPE: regular
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch/golden_values_dev_dgx_h100.json
index cbc5f4fa3ae..12a9b70df83 100644
--- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch/golden_values_dev_dgx_h100.json
@@ -1,187 +1,178 @@
 {
- "0": {
-  "input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.",
-  "generated_text": " And then you get to the end of the movie, and you realize that this is not New York at all. This is New York at the end",
-  "generated_tokens": [
-   3060,
-   2430,
-   1636,
-   2012,
-   1317,
-   1278,
-   2362,
-   1307,
-   1278,
-   16070,
-   1044,
-   1321,
-   1636,
-   23067,
-   1455,
-   1593,
-   1395,
-   1605,
-   3140,
-   5152,
-   1513,
-   1747,
-   1046,
-   2409,
-   1395,
-   3140,
-   5152,
-   1513,
-   1278,
-   2362
-  ],
-  "latency": 0.2963709831237793,
-  "cuda_graph_request_count_map": {
-   "852": 0,
-   "840": 0,
-   "784": 0,
-   "728": 0,
-   "672": 0,
-   "616": 0,
-   "560": 0,
-   "504": 0,
-   "448": 0,
-   "392": 0,
-   "336": 0,
-   "280": 0,
-   "224": 0,
-   "168": 0,
-   "112": 0,
-   "56": 29
-  },
-  "step_count": 240,
-  "logprobs": [
-   -9.362494468688965,
-   -2.827894449234009,
-   -4.557381629943848,
-   -1.4968647956848145,
-   -0.717312216758728,
-   -1.7262351512908936,
-   -2.522736072540283,
-   -2.1782360076904297,
-   -2.3603432178497314,
-   -6.136383533477783,
-   -1.4676916599273682,
-   -3.468963384628296,
-   -4.424870491027832,
-   -3.7345848083496094,
-   -2.012619972229004,
-   -1.8833301067352295,
-   -3.5708768367767334,
-   -6.8197832107543945,
-   -0.3122292757034302,
-   -0.9820290207862854,
-   -6.532033443450928,
-   -7.498172760009766,
-   -12.615165710449219,
-   -2.409003496170044,
-   -3.8550546169281006,
-   -0.5105050802230835,
-   -4.2802581787109375,
-   -0.06971167027950287,
-   -0.054025799036026,
-   -3.319596767425537,
-   -9.703240394592285,
-   -1.0997297763824463,
-   -6.224854469299316,
-   -5.234503269195557,
-   -3.934987783432007,
-   -2.5263679027557373,
-   -3.1843955516815186,
-   -5.880871295928955,
-   -1.8436813354492188,
-   -5.906496047973633,
-   -12.15787410736084,
-   -12.5841064453125,
-   -0.0819428563117981,
-   -2.6212656497955322,
-   -1.4329369068145752,
-   -2.885145425796509,
-   -1.2901865243911743,
-   -0.006647023372352123,
-   -3.5115818977355957,
-   -12.945953369140625,
-   -3.793078899383545,
-   -3.0094375610351562,
-   -5.966838836669922,
-   -0.8998424410820007,
-   -0.040962252765893936,
-   -1.5467679500579834,
-   -1.0785343647003174,
-   -5.73494815826416,
-   -0.38491737842559814,
-   -5.017007827758789,
-   -0.5568072199821472,
-   -0.5968841910362244,
-   -2.3609962463378906,
-   -13.582086563110352,
-   -0.09050048142671585,
-   -3.7264108657836914,
-   -1.1208789348602295,
-   -6.052675247192383,
-   -0.5848909616470337,
-   -3.5906238555908203,
-   -0.9494907855987549,
-   -1.5676641464233398,
-   -5.127577781677246,
-   -17.19189453125,
-   -6.698403835296631,
-   -1.0449178218841553,
-   -4.365664958953857,
-   -1.1243419647216797,
-   -2.2092156410217285,
-   -1.8081634044647217,
-   -0.23330983519554138,
-   -9.439546585083008,
-   -0.2947109341621399,
-   -7.253565788269043,
-   -2.3855936527252197,
-   -4.629369258880615,
-   -3.4186267852783203,
-   -1.9727531671524048,
-   -2.354729652404785,
-   -1.474542498588562,
-   -2.48478364944458,
-   -1.7641210556030273,
-   -1.1853944063186646,
-   -2.8624324798583984,
-   -0.5740103125572205,
-   -0.4542185962200165,
-   -1.4300930500030518,
-   -0.8807456493377686,
-   -0.4597663879394531,
-   -0.9252307415008545,
-   -1.648141860961914,
-   -0.44453874230384827,
-   -1.818476915359497,
-   -0.5714479088783264,
-   -1.2115143537521362,
-   -1.0910619497299194,
-   -0.0023161747958511114,
-   -1.3206473588943481,
-   -0.008621376007795334,
-   -0.7551823854446411,
-   -0.9404395818710327,
-   -0.07279698550701141,
-   -0.9365248680114746,
-   -0.03344438225030899,
-   -1.9720849990844727,
-   -1.3928067684173584,
-   -0.7453650832176208
-  ]
- },
- "throughput": [
-  5.425516447410972,
-  95.53889537647129,
-  98.64633360458717,
-  100.31860128598137,
-  100.41338716203114,
-  100.2318180695741,
-  100.30260782227111,
-  100.30996418216475
- ]
+    "0": {
+        "input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.",
+        "generated_text": " And then you get to the end of the movie, and you realize that this is not New York at all. This is New York at the end",
+        "generated_tokens": [
+            3060,
+            2430,
+            1636,
+            2012,
+            1317,
+            1278,
+            2362,
+            1307,
+            1278,
+            16070,
+            1044,
+            1321,
+            1636,
+            23067,
+            1455,
+            1593,
+            1395,
+            1605,
+            3140,
+            5152,
+            1513,
+            1747,
+            1046,
+            2409,
+            1395,
+            3140,
+            5152,
+            1513,
+            1278,
+            2362
+        ],
+        "latency": 0.29413437843322754,
+        "cuda_graph_request_count_map": {
+            "372": 0,
+            "360": 0,
+            "336": 0,
+            "312": 0,
+            "288": 0,
+            "264": 0,
+            "240": 0,
+            "216": 0,
+            "192": 0,
+            "168": 0,
+            "144": 0,
+            "120": 0,
+            "96": 0,
+            "72": 0,
+            "48": 0,
+            "24": 29
+        },
+        "step_count": 240,
+        "logprobs": [
+            -9.362494468688965,
+            -2.827894449234009,
+            -4.557381629943848,
+            -1.4968647956848145,
+            -0.717312216758728,
+            -1.7262351512908936,
+            -2.522736072540283,
+            -2.1782360076904297,
+            -2.3603432178497314,
+            -6.136383533477783,
+            -1.4676916599273682,
+            -3.468963384628296,
+            -4.424870491027832,
+            -3.7345848083496094,
+            -2.012619972229004,
+            -1.8833301067352295,
+            -3.5708768367767334,
+            -6.8197832107543945,
+            -0.3122292757034302,
+            -0.9820290207862854,
+            -6.532033443450928,
+            -7.498172760009766,
+            -12.615165710449219,
+            -2.409003496170044,
+            -3.8550546169281006,
+            -0.5105050802230835,
+            -4.2802581787109375,
+            -0.06971167027950287,
+            -0.054025799036026,
+            -3.319596767425537,
+            -9.703240394592285,
+            -1.0997297763824463,
+            -6.224854469299316,
+            -5.234503269195557,
+            -3.934987783432007,
+            -2.5263679027557373,
+            -3.1843955516815186,
+            -5.880871295928955,
+            -1.8436813354492188,
+            -5.906496047973633,
+            -12.15787410736084,
+            -12.5841064453125,
+            -0.0819428563117981,
+            -2.6212656497955322,
+            -1.4329369068145752,
+            -2.885145425796509,
+            -1.2901865243911743,
+            -0.006647023372352123,
+            -3.5115818977355957,
+            -12.945953369140625,
+            -3.793078899383545,
+            -3.0094375610351562,
+            -5.966838836669922,
+            -0.8998424410820007,
+            -0.040962252765893936,
+            -1.5467679500579834,
+            -1.0785343647003174,
+            -5.73494815826416,
+            -0.38491737842559814,
+            -5.017007827758789,
+            -0.5568072199821472,
+            -0.5968841910362244,
+            -2.3609962463378906,
+            -13.582086563110352,
+            -0.09050048142671585,
+            -3.7264108657836914,
+            -1.1208789348602295,
+            -6.052675247192383,
+            -0.5848909616470337,
+            -3.5906238555908203,
+            -0.9494907855987549,
+            -1.5676641464233398,
+            -5.127577781677246,
+            -17.19189453125,
+            -6.698403835296631,
+            -1.0449178218841553,
+            -4.365664958953857,
+            -1.1243419647216797,
+            -2.2092156410217285,
+            -1.8081634044647217,
+            -0.23330983519554138,
+            -9.439546585083008,
+            -0.2947109341621399,
+            -7.253565788269043,
+            -2.3855936527252197,
+            -4.629369258880615,
+            -3.4186267852783203,
+            -1.9727531671524048,
+            -2.331681251525879,
+            -1.5606917142868042,
+            -2.454296588897705,
+            -1.5334703922271729,
+            -1.2631131410598755,
+            -2.657367706298828,
+            -0.6480202078819275,
+            -0.4550393521785736,
+            -1.3625166416168213,
+            -0.8142069578170776,
+            -0.4496593475341797,
+            -0.9312890768051147,
+            -1.732723355293274,
+            -0.44613128900527954,
+            -1.6895122528076172,
+            -0.6082233190536499,
+            -1.0978344678878784,
+            -1.1122435331344604,
+            -0.002520838286727667,
+            -1.4072327613830566,
+            -0.007462364621460438,
+            -0.7548662424087524,
+            -0.9937503337860107,
+            -0.0675487294793129,
+            -0.9595617055892944,
+            -0.029961343854665756,
+            -2.205785036087036,
+            -1.2615025043487549,
+            -0.7878209352493286
+        ]
+    },
+    "throughput": [104.98559493782837, 104.98559493782837]
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch/model_config.yaml
index 15a4a655049..0675b047464 100644
--- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch/model_config.yaml
@@ -46,6 +46,8 @@ MODEL_ARGS:
   --return-log-probs: true
   --num-tokens-to-generate: 30
   --enable-cuda-graph: true
+  --inference-dynamic-batching-buffer-guaranteed-fraction: 0
+  --inference-dynamic-batching-buffer-overflow-factor: 0.2
   --inference-dynamic-batching-buffer-size-gb: 20
   --dist-ckpt-strictness: log_unexpected
   --inference-ckpt-non-strict: true # To handle the extra_state errors
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_logitsmatch_decode_graphs_only/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_logitsmatch_decode_graphs_only/golden_values_dev_dgx_h100.json
index c22bb604f94..8e07dfee229 100644
--- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_logitsmatch_decode_graphs_only/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_logitsmatch_decode_graphs_only/golden_values_dev_dgx_h100.json
@@ -1,187 +1,178 @@
 {
- "0": {
-  "input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.",
-  "generated_text": " And then you get to the end of the movie, and you realize that this is not New York at all. This is New York at the end",
-  "generated_tokens": [
-   3060,
-   2430,
-   1636,
-   2012,
-   1317,
-   1278,
-   2362,
-   1307,
-   1278,
-   16070,
-   1044,
-   1321,
-   1636,
-   23067,
-   1455,
-   1593,
-   1395,
-   1605,
-   3140,
-   5152,
-   1513,
-   1747,
-   1046,
-   2409,
-   1395,
-   3140,
-   5152,
-   1513,
-   1278,
-   2362
-  ],
-  "latency": 0.38181447982788086,
-  "cuda_graph_request_count_map": {
-   "852": 0,
-   "840": 0,
-   "784": 0,
-   "728": 0,
-   "672": 0,
-   "616": 0,
-   "560": 0,
-   "504": 0,
-   "448": 0,
-   "392": 0,
-   "336": 0,
-   "280": 0,
-   "224": 0,
-   "168": 0,
-   "112": 0,
-   "56": 29
-  },
-  "step_count": 240,
-  "logprobs": [
-   -9.362494468688965,
-   -2.827894449234009,
-   -4.557381629943848,
-   -1.4968647956848145,
-   -0.717312216758728,
-   -1.7262351512908936,
-   -2.522736072540283,
-   -2.1782360076904297,
-   -2.3603432178497314,
-   -6.136383533477783,
-   -1.4676916599273682,
-   -3.468963384628296,
-   -4.424870491027832,
-   -3.7345848083496094,
-   -2.012619972229004,
-   -1.8833301067352295,
-   -3.5708768367767334,
-   -6.8197832107543945,
-   -0.3122292757034302,
-   -0.9820290207862854,
-   -6.532033443450928,
-   -7.498172760009766,
-   -12.615165710449219,
-   -2.409003496170044,
-   -3.8550546169281006,
-   -0.5105050802230835,
-   -4.2802581787109375,
-   -0.06971167027950287,
-   -0.054025799036026,
-   -3.319596767425537,
-   -9.703240394592285,
-   -1.0997297763824463,
-   -6.224854469299316,
-   -5.234503269195557,
-   -3.934987783432007,
-   -2.5263679027557373,
-   -3.1843955516815186,
-   -5.880871295928955,
-   -1.8436813354492188,
-   -5.906496047973633,
-   -12.15787410736084,
-   -12.5841064453125,
-   -0.0819428563117981,
-   -2.6212656497955322,
-   -1.4329369068145752,
-   -2.885145425796509,
-   -1.2901865243911743,
-   -0.006647023372352123,
-   -3.5115818977355957,
-   -12.945953369140625,
-   -3.793078899383545,
-   -3.0094375610351562,
-   -5.966838836669922,
-   -0.8998424410820007,
-   -0.040962252765893936,
-   -1.5467679500579834,
-   -1.0785343647003174,
-   -5.73494815826416,
-   -0.38491737842559814,
-   -5.017007827758789,
-   -0.5568072199821472,
-   -0.5968841910362244,
-   -2.3609962463378906,
-   -13.582086563110352,
-   -0.09050048142671585,
-   -3.7264108657836914,
-   -1.1208789348602295,
-   -6.052675247192383,
-   -0.5848909616470337,
-   -3.5906238555908203,
-   -0.9494907855987549,
-   -1.5676641464233398,
-   -5.127577781677246,
-   -17.19189453125,
-   -6.698403835296631,
-   -1.0449178218841553,
-   -4.365664958953857,
-   -1.1243419647216797,
-   -2.2092156410217285,
-   -1.8081634044647217,
-   -0.23330983519554138,
-   -9.439546585083008,
-   -0.2947109341621399,
-   -7.253565788269043,
-   -2.3855936527252197,
-   -4.629369258880615,
-   -3.4186267852783203,
-   -1.9727531671524048,
-   -2.354729652404785,
-   -1.474542498588562,
-   -2.48478364944458,
-   -1.7641210556030273,
-   -1.1853944063186646,
-   -2.8624324798583984,
-   -0.5740103125572205,
-   -0.4542185962200165,
-   -1.4300930500030518,
-   -0.8807456493377686,
-   -0.4597663879394531,
-   -0.9252307415008545,
-   -1.648141860961914,
-   -0.44453874230384827,
-   -1.818476915359497,
-   -0.5714479088783264,
-   -1.2115143537521362,
-   -1.0910619497299194,
-   -0.0023161747958511114,
-   -1.3206473588943481,
-   -0.008621376007795334,
-   -0.7551823854446411,
-   -0.9404395818710327,
-   -0.07279698550701141,
-   -0.9365248680114746,
-   -0.03344438225030899,
-   -1.9720849990844727,
-   -1.3928067684173584,
-   -0.7453650832176208
-  ]
- },
- "throughput": [
-  3.896181563640281,
-  77.1287764739343,
-  77.17674536709352,
-  76.8666671960972,
-  77.944911028325,
-  77.95118832563914,
-  78.13236085816422,
-  78.0046829173943
- ]
+    "0": {
+        "input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.",
+        "generated_text": " And then you get to the end of the movie, and you realize that this is not New York at all. This is New York at the end",
+        "generated_tokens": [
+            3060,
+            2430,
+            1636,
+            2012,
+            1317,
+            1278,
+            2362,
+            1307,
+            1278,
+            16070,
+            1044,
+            1321,
+            1636,
+            23067,
+            1455,
+            1593,
+            1395,
+            1605,
+            3140,
+            5152,
+            1513,
+            1747,
+            1046,
+            2409,
+            1395,
+            3140,
+            5152,
+            1513,
+            1278,
+            2362
+        ],
+        "latency": 0.3712351322174072,
+        "cuda_graph_request_count_map": {
+            "372": 0,
+            "360": 0,
+            "336": 0,
+            "312": 0,
+            "288": 0,
+            "264": 0,
+            "240": 0,
+            "216": 0,
+            "192": 0,
+            "168": 0,
+            "144": 0,
+            "120": 0,
+            "96": 0,
+            "72": 0,
+            "48": 0,
+            "24": 29
+        },
+        "step_count": 240,
+        "logprobs": [
+            -9.362494468688965,
+            -2.827894449234009,
+            -4.557381629943848,
+            -1.4968647956848145,
+            -0.717312216758728,
+            -1.7262351512908936,
+            -2.522736072540283,
+            -2.1782360076904297,
+            -2.3603432178497314,
+            -6.136383533477783,
+            -1.4676916599273682,
+            -3.468963384628296,
+            -4.424870491027832,
+            -3.7345848083496094,
+            -2.012619972229004,
+            -1.8833301067352295,
+            -3.5708768367767334,
+            -6.8197832107543945,
+            -0.3122292757034302,
+            -0.9820290207862854,
+            -6.532033443450928,
+            -7.498172760009766,
+            -12.615165710449219,
+            -2.409003496170044,
+            -3.8550546169281006,
+            -0.5105050802230835,
+            -4.2802581787109375,
+            -0.06971167027950287,
+            -0.054025799036026,
+            -3.319596767425537,
+            -9.703240394592285,
+            -1.0997297763824463,
+            -6.224854469299316,
+            -5.234503269195557,
+            -3.934987783432007,
+            -2.5263679027557373,
+            -3.1843955516815186,
+            -5.880871295928955,
+            -1.8436813354492188,
+            -5.906496047973633,
+            -12.15787410736084,
+            -12.5841064453125,
+            -0.0819428563117981,
+            -2.6212656497955322,
+            -1.4329369068145752,
+            -2.885145425796509,
+            -1.2901865243911743,
+            -0.006647023372352123,
+            -3.5115818977355957,
+            -12.945953369140625,
+            -3.793078899383545,
+            -3.0094375610351562,
+            -5.966838836669922,
+            -0.8998424410820007,
+            -0.040962252765893936,
+            -1.5467679500579834,
+            -1.0785343647003174,
+            -5.73494815826416,
+            -0.38491737842559814,
+            -5.017007827758789,
+            -0.5568072199821472,
+            -0.5968841910362244,
+            -2.3609962463378906,
+            -13.582086563110352,
+            -0.09050048142671585,
+            -3.7264108657836914,
+            -1.1208789348602295,
+            -6.052675247192383,
+            -0.5848909616470337,
+            -3.5906238555908203,
+            -0.9494907855987549,
+            -1.5676641464233398,
+            -5.127577781677246,
+            -17.19189453125,
+            -6.698403835296631,
+            -1.0449178218841553,
+            -4.365664958953857,
+            -1.1243419647216797,
+            -2.2092156410217285,
+            -1.8081634044647217,
+            -0.23330983519554138,
+            -9.439546585083008,
+            -0.2947109341621399,
+            -7.253565788269043,
+            -2.3855936527252197,
+            -4.629369258880615,
+            -3.4186267852783203,
+            -1.9727531671524048,
+            -2.331681251525879,
+            -1.5606917142868042,
+            -2.454296588897705,
+            -1.5334703922271729,
+            -1.2631131410598755,
+            -2.657367706298828,
+            -0.6480202078819275,
+            -0.4550393521785736,
+            -1.3625166416168213,
+            -0.8142069578170776,
+            -0.4496593475341797,
+            -0.9312890768051147,
+            -1.732723355293274,
+            -0.44613128900527954,
+            -1.6895122528076172,
+            -0.6082233190536499,
+            -1.0978344678878784,
+            -1.1122435331344604,
+            -0.002520838286727667,
+            -1.4072327613830566,
+            -0.007462364621460438,
+            -0.7548662424087524,
+            -0.9937503337860107,
+            -0.0675487294793129,
+            -0.9595617055892944,
+            -0.029961343854665756,
+            -2.205785036087036,
+            -1.2615025043487549,
+            -0.7878209352493286
+        ]
+    },
+    "throughput": [79.88988160240554, 79.88988160240554]
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_logitsmatch_decode_graphs_only/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_logitsmatch_decode_graphs_only/model_config.yaml
index b368242b9af..2ba9050ceaf 100644
--- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_logitsmatch_decode_graphs_only/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_logitsmatch_decode_graphs_only/model_config.yaml
@@ -47,6 +47,8 @@ MODEL_ARGS:
   --num-tokens-to-generate: 30
   --enable-cuda-graph: true
   --decode-only-cuda-graphs: true
+  --inference-dynamic-batching-buffer-guaranteed-fraction: 0
+  --inference-dynamic-batching-buffer-overflow-factor: 0.2
   --inference-dynamic-batching-buffer-size-gb: 20
   --dist-ckpt-strictness: log_unexpected
   --inference-ckpt-non-strict: true # To handle the extra_state errors
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_dp8_583m_logitsmatch_zmq/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_dp8_583m_logitsmatch_zmq/model_config.yaml
index 7fcf9e9cf81..a4f47d3705f 100644
--- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_dp8_583m_logitsmatch_zmq/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_dp8_583m_logitsmatch_zmq/model_config.yaml
@@ -22,8 +22,7 @@ MODEL_ARGS:
   --load: ${CHECKPOINT_LOAD_PATH}/model/mcore_mistral/nemo_minitron-0.5b/v1/
   --distributed-backend: nccl
   --log-interval: 1
-  --transformer-impl: inference_optimized
-  --sequence-parallel: true
+  --transformer-impl: transformer_engine
   --tensor-model-parallel-size: 1
   --pipeline-model-parallel-size: 1
   --deterministic-mode: true
@@ -42,6 +41,9 @@ MODEL_ARGS:
   --top_k: 1
   --return-log-probs: true
   --num-tokens-to-generate: 30
+  --inference-dynamic-batching-max-requests-override: 8 # hardcode decode padding tokens to 7 for reproducibility
+  --inference-dynamic-batching-buffer-guaranteed-fraction: 0
+  --inference-dynamic-batching-buffer-overflow-factor: 0.2
   --inference-dynamic-batching-buffer-size-gb: 20
   --dist-ckpt-strictness: log_unexpected
   --inference-ckpt-non-strict: true # To handle the extra_state errors
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_dp8_583m_throughputtest_zmq/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_dp8_583m_throughputtest_zmq/golden_values_dev_dgx_h100.json
deleted file mode 100644
index 9be8a9dc0ca..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_dp8_583m_throughputtest_zmq/golden_values_dev_dgx_h100.json
+++ /dev/null
@@ -1,1028 +0,0 @@
-{
-    "throughput": [
-        94.6087716527102,
-        115.85992244026639,
-        138.9562527069375,
-        133.18726531918395,
-        81.97861561771212,
-        134.30726469422635,
-        86.456140428456,
-        114.99456351298251,
-        147.3101800153954,
-        3.0364623744653003,
-        124.7590786954667,
-        134.2276982994434,
-        3.0580463134110167,
-        117.03969654341354,
-        130.92134521286803,
-        48.493091604204935,
-        1.4498729599486508,
-        128.01470907994928,
-        1.8330770354872434,
-        66.31842482241125,
-        82.24189975425459,
-        1.07058112939944,
-        1.8815468970982412,
-        0.9373246942729808,
-        134.9963160815443,
-        2.285771114682068,
-        43.068220270070434,
-        134.9677086822377,
-        82.44946740133796,
-        47.71839155542011,
-        114.4199568886962,
-        29.67621576315833,
-        144.1589742491705,
-        95.8164720809401,
-        122.80562228460093,
-        39.21436814433054,
-        3.041180292262413,
-        3.2867844729646842,
-        72.43808226229888,
-        0.8371525937296347,
-        1.2212635079980698,
-        145.6869075644325,
-        42.317711349146016,
-        109.1196064871946,
-        73.6281770453198,
-        140.4495689387567,
-        1.219834296561022,
-        138.66856497329005,
-        23.33818821323391,
-        67.82342558671365,
-        130.09683254313987,
-        147.60199288178146,
-        0.9427431720755464,
-        3.2856495013162523,
-        79.12426666101076,
-        86.41557345094756,
-        120.17346279825053,
-        137.16615251640926,
-        108.93291864542198,
-        110.10504114490513,
-        46.19253755421628,
-        0.950218846923012,
-        136.50642826951463,
-        142.73168666846448,
-        1.2206786818073785,
-        1.898581377105612,
-        131.72636154091063,
-        2.2842414327001976,
-        89.76521170090028,
-        114.66053545744656,
-        58.64474290044525,
-        0.8367865961030284,
-        128.01767795820945,
-        60.87292097103301,
-        124.20016865241587,
-        119.59336898055426,
-        0.9425820346281929,
-        93.70053305431952,
-        1.0728113870213674,
-        135.7596767309971,
-        112.89357243644062,
-        89.2743296587299,
-        137.86411291342458,
-        135.6974706051771,
-        102.59633828443238,
-        129.82058179399326,
-        139.57672703148444,
-        140.5642311163746,
-        78.49182953675201,
-        123.40912657074227,
-        82.74099904578694,
-        75.5490641626476,
-        93.38596238341951,
-        141.19058076067225,
-        1.072254167577298,
-        100.8669047802279,
-        132.77382347347034,
-        92.29086179175866,
-        137.20301032384705,
-        89.57723938765776,
-        67.5465256589703,
-        0.9498935124108836,
-        1.0716887464650027,
-        0.8365472180547067,
-        137.902625307774,
-        132.67132600219722,
-        1.45201860416265,
-        1.8366476879619427,
-        88.65095604379363,
-        132.1806036761347,
-        126.0481874394642,
-        127.43750324083169,
-        93.27238135265156,
-        109.83884164204308,
-        102.30516355984702,
-        141.10387096377744,
-        0.9425154448032942,
-        95.04281981148903,
-        103.11525529548061,
-        0.8361762901534399,
-        135.3171561172067,
-        123.30032998064965,
-        118.75691144485415,
-        82.21375599642211,
-        66.37216333263251,
-        120.02349229491865,
-        27.339414655466246,
-        133.1312422227687,
-        123.02377779863252,
-        111.0798894329,
-        58.88405247768833,
-        131.31767475108893,
-        40.19076958615912,
-        123.58362152151858,
-        130.6541142941889,
-        61.39555613504246,
-        43.92154495664044,
-        1.037012527495492,
-        127.16052127606021,
-        137.06554800183082,
-        85.67161160523041,
-        1.0253417447981334,
-        139.20903624514017,
-        140.19068787455728,
-        117.67416498245059,
-        23.410837515725987,
-        130.73052473972666,
-        22.561824695346466,
-        1.028901717647808,
-        119.30712483977753,
-        117.77548263464804,
-        135.2959098119142,
-        142.10193821260228,
-        1.0366044325624144,
-        1.0350271698893887,
-        132.8943567509843,
-        51.50353963446039,
-        113.39559408843714,
-        124.25424103796537,
-        129.60407993083075,
-        136.8566687186031,
-        1.036163010240988,
-        1.0345739017743927,
-        118.72350056844492,
-        32.453707095990595,
-        43.851925176925825,
-        139.39206855448938,
-        141.0979597861742,
-        132.81461728578432,
-        80.95956255477945,
-        133.42483643501154,
-        57.27721135575491,
-        81.47649794801364,
-        79.39765285063396,
-        56.40255861789973,
-        0.8890603607397893,
-        137.59325887086797,
-        118.03982850100024,
-        53.04390121587005,
-        88.31177924841927,
-        1.0287550608831881,
-        54.67393025836421,
-        54.73556135447348,
-        129.6143036059356,
-        123.57095756116274,
-        146.05184555314386,
-        55.506024155977386,
-        84.40666358740559,
-        62.68531518105107,
-        147.42894642823578,
-        1.0274253590993496,
-        145.9063526676371,
-        76.36231256557768,
-        1.035808949157935,
-        136.1858098182613,
-        93.13144140533397,
-        54.57886608953819,
-        1.0251956490815057,
-        1.0270063804838983,
-        67.96952180390161,
-        136.90103479290272,
-        78.62986077133174,
-        129.97235998681177,
-        70.57784076609056,
-        1.028567312218149,
-        69.64434330087829,
-        1.0266016363366386,
-        25.142311727265525,
-        139.54750333578679,
-        118.80547132463877,
-        1.0342055876192149,
-        132.79991800938092,
-        88.25494664060619,
-        132.4600307114398,
-        1.026200775415348,
-        111.33264788932784,
-        1.031301270403004,
-        104.45912302410692,
-        1.0337771723701492,
-        124.53550504281608,
-        1.0283501183885058,
-        126.53361938982871,
-        139.83512785200963,
-        102.28350299734186,
-        122.68389734539087,
-        139.27095111763788,
-        1.0333552237490158,
-        97.04945381465573,
-        60.63422077140298,
-        1.0248694052483192,
-        96.77644543721476,
-        118.38370846079931,
-        1.0309087229819596,
-        136.0487423665781,
-        1.032932214377732,
-        104.96525711514936,
-        50.75370028394122,
-        125.67617176346853,
-        125.47392048276225,
-        101.59371483024698,
-        119.1183231384482,
-        134.24568445137294,
-        1.0323996653747745,
-        119.28563313083153,
-        50.183581144589674,
-        107.50817556608582,
-        127.4693561344537,
-        116.0234844098742,
-        149.0429439759437,
-        127.77855747904051,
-        1.0319900690130652,
-        129.7400124946839,
-        60.27584011696136,
-        1.0245534026749026,
-        113.8687773549026,
-        129.9927880985222,
-        41.55332067297356,
-        12.991853549713621,
-        144.9384518471586,
-        127.77570879015505,
-        79.09214991388126,
-        1.0326234729165304,
-        144.50618896622706,
-        44.461452482592826,
-        145.75357879817352,
-        150.5618330832813,
-        123.17802281879979,
-        147.0133924731902,
-        57.07203337285457,
-        140.17944630269687,
-        44.5066568841284,
-        150.2834791394652,
-        146.37106237628518,
-        135.59553639884948,
-        21.91845075979551,
-        1.0391172002596458,
-        92.42182316100705,
-        14.98578222593142,
-        19.944740287073653,
-        32.75622847272977,
-        58.94666795839769,
-        1.0428676908165904,
-        97.94938911630567,
-        140.5399781540016,
-        36.397689902912774,
-        1.0322919875583962,
-        33.76444948259586,
-        147.54902815924785,
-        51.316830076622495,
-        153.55703202636914,
-        46.423895018386204,
-        140.271682540213,
-        1.0340651759548871,
-        85.22971449383292,
-        141.80480996358014,
-        1.0234621691055457,
-        1.0355322329825165,
-        136.96321865236195,
-        138.2293990177049,
-        136.89440582973347,
-        96.94919171687799,
-        54.992986423891566,
-        142.91167590864902,
-        138.73615931624403,
-        86.32837448704223,
-        1.0424247604140402,
-        127.58052889290863,
-        138.2472241943501,
-        1.0338260095695477,
-        1.0317372756221133,
-        150.59249576769173,
-        1.0229533138894364,
-        149.1711141084735,
-        1.0419379125129562,
-        1.040305113121658,
-        150.13261057757276,
-        62.47975017460808,
-        70.20443057037575,
-        76.88821624674898,
-        1.0225242667788867,
-        136.83301633777177,
-        1.0414381555227956,
-        131.6044067829552,
-        1.038902005769604,
-        1.0335832618537684,
-        83.38230404797935,
-        3.047737981863063,
-        140.9843162162637,
-        1.0352264324041114,
-        1.0409374510445146,
-        103.17228299164871,
-        1.0383219913492376,
-        67.5151836065632,
-        126.94018489907108,
-        95.29974174831813,
-        1.022161551972834,
-        1.0348032799350415,
-        93.24855217625235,
-        140.00831851627856,
-        142.46553219867087,
-        80.52507876480331,
-        149.47939431741142,
-        125.60095189608528,
-        92.57991472689042,
-        153.09192667088175,
-        98.78787611117323,
-        136.9802701171813,
-        1.0378200246498124,
-        79.05370338483348,
-        145.63143231877774,
-        107.86253722014555,
-        113.1390555766259,
-        150.4596904971142,
-        6.010262757833046,
-        138.11675690694213,
-        1.0371929842524894,
-        55.1702723554103,
-        148.4142582794926,
-        108.62464742566522,
-        142.2515578682958,
-        149.5588988951372,
-        1.0310870179234204,
-        32.798276334675066,
-        145.8363475163408,
-        82.52497836005318,
-        144.77105210255448,
-        140.95035733017403,
-        145.4844811663436,
-        145.0646083055648,
-        139.1641494303434,
-        1.0401220454548914,
-        146.10598185112948,
-        1.0335329080843159,
-        1.0316085392161136,
-        133.98012837767038,
-        129.62059667226987,
-        151.2681266565858,
-        1.030719335336581,
-        135.9600336007384,
-        1.0366589924031362,
-        107.70864165999221,
-        118.06361914834272,
-        148.4615541738592,
-        135.1206190516379,
-        1.0788915925864082,
-        1.0662361391973343,
-        1.0784094142292293,
-        145.5492563111853,
-        100.1745158858024,
-        89.97448812790176,
-        140.13008352060388,
-        8.378443606045758,
-        19.841723966559687,
-        31.11972559764219,
-        127.75589035167928,
-        144.649118240912,
-        83.40454687650907,
-        13.609558087727212,
-        144.14916775068022,
-        143.0831699051951,
-        144.53789580070173,
-        129.35689525213576,
-        126.54760361436873,
-        136.72725454688293,
-        83.66753329456253,
-        35.238850690537326,
-        138.73588075606074,
-        148.39285997484404,
-        141.43706957675556,
-        35.20788617289704,
-        140.22918428708584,
-        141.42288954532623,
-        80.8071906111917,
-        53.480908541665116,
-        96.60869116876205,
-        138.83030943256392,
-        146.89537016655746,
-        1.0659353965573166,
-        138.66041009897964,
-        138.0783824554628,
-        54.95061283513892,
-        1.0688789370964418,
-        145.4981195236156,
-        107.91672388693667,
-        147.39387423946786,
-        143.49840246862203,
-        1.0781871694837721,
-        125.37215873599833,
-        46.390553110182545,
-        1.0683430650310588,
-        60.55314896188811,
-        128.32962060837178,
-        142.6648214311374,
-        1.065532502621677,
-        145.06202945295232,
-        149.5985088362253,
-        43.61426254132819,
-        139.2120402464869,
-        138.80120892663803,
-        142.59390751862693,
-        147.27000174003754,
-        139.5980537408405,
-        142.37081759892675,
-        76.47257166426981,
-        0.8663971721944621,
-        1.067847671923619,
-        1.0752972325757186,
-        139.11225337731244,
-        154.1012640338781,
-        91.85315813315137,
-        7.34066705730821,
-        1.0763437477764217,
-        56.03391448680589,
-        1.067309924884827,
-        1.0747789028833068,
-        1.057667310022394,
-        146.4284745539176,
-        142.32867288307636,
-        132.81801172672715,
-        142.5746724111237,
-        43.178263922620026,
-        140.19958418325498,
-        1.0742201855279276,
-        139.95237701874325,
-        124.69044225989671,
-        89.93275546978569,
-        1.0778110524743836,
-        108.03753008375865,
-        0.8649825661375887,
-        101.22782607000799,
-        138.6615942910557,
-        1.0572642952018412,
-        143.509260845593,
-        1.0651693329533294,
-        97.454990956795,
-        1.075960473594851,
-        104.89429761368234,
-        153.46849816095335,
-        143.28204379991922,
-        112.57923589922926,
-        145.35468060283986,
-        119.53338040876814,
-        132.53105489182144,
-        146.60735281445733,
-        0.8648000721123511,
-        132.61504628627392,
-        140.81953388748138,
-        1.05684091289561,
-        147.29646966899597,
-        1.0646855258714663,
-        1.0772400203863821,
-        137.87592499226204,
-        101.79954304062817,
-        134.45893707567646,
-        1.0737967838723397,
-        147.3289039421509,
-        142.95955673278567,
-        123.11846557585149,
-        139.7223884224781,
-        5.274894457437767,
-        0.8646226703470901,
-        135.27010135142623,
-        134.53222451904563,
-        140.4520894166607,
-        148.6784682726068,
-        148.83999547746723,
-        144.76059628877204,
-        146.09818079047014,
-        0.8644123666240657,
-        133.05795012757028,
-        141.21253159110282,
-        147.08086640702987,
-        153.13511211461227,
-        147.72437078211334,
-        53.87242850230838,
-        61.34701685378028,
-        74.50771860339175,
-        16.40780504974564,
-        16.448796993269678,
-        144.08505364828036,
-        143.78069847853888,
-        145.08382905436133,
-        139.4144567792124,
-        1.113422304912727,
-        23.732299099149245,
-        146.716938504402,
-        1.1150428401994323,
-        1.1070863332993708,
-        147.462815334713,
-        15.300506166735937,
-        142.89311901203018,
-        35.881455163220174,
-        0.8959120615185874,
-        134.50389621984408,
-        79.91603718165896,
-        145.31776951960734,
-        153.19384567886857,
-        142.494036234602,
-        130.58249312188119,
-        1.1128817603274543,
-        56.157995916719756,
-        35.81413980204931,
-        116.5213087641768,
-        63.30354399512571,
-        55.0117106848875,
-        47.52954249314361,
-        153.04709230401787,
-        1.112276523473745,
-        80.1523559974256,
-        136.20373724941714,
-        1.114673225365626,
-        1.1067132158651183,
-        149.29883052073288,
-        145.10950784560325,
-        130.53765167080937,
-        1.111788125890117,
-        0.8957719496064405,
-        1.1050775451489783,
-        17.522300994030367,
-        154.45472111064055,
-        152.07616582090188,
-        1.1020107149905272,
-        138.6808068419634,
-        76.87873177159636,
-        51.43702839643221,
-        138.95045176064437,
-        138.64177504011988,
-        140.72197385602811,
-        132.80947742972836,
-        149.78872816785005,
-        139.94034036065392,
-        154.2632802491591,
-        55.57148538150843,
-        1.1044580058296936,
-        147.1712801496827,
-        77.84198065949245,
-        142.38330204183904,
-        151.76812011990265,
-        145.19131540821485,
-        147.26566215388425,
-        87.12413393605841,
-        1.1038403429439656,
-        141.4935550752979,
-        145.7397470598185,
-        3.3080164659931235,
-        123.0327553358976,
-        146.24080278853327,
-        148.10448175245884,
-        29.234562433775857,
-        151.30177873039895,
-        135.4653748135468,
-        144.3293913931314,
-        148.16163203136404,
-        1.1015876034201657,
-        1.1114790318458536,
-        136.68047783885697,
-        77.72584511329579,
-        125.73692105352463,
-        106.98755729483561,
-        96.25926845246491,
-        1.109721323323522,
-        141.71073652156545,
-        130.22006710827588,
-        145.24478945746003,
-        80.67459353439743,
-        1.1033551544760267,
-        150.03177939272493,
-        154.12875534463626,
-        150.04771421074818,
-        1.1010813815407388,
-        1.1110434127990452,
-        145.385699877379,
-        86.86487551811825,
-        130.16687493633253,
-        143.8726181331947,
-        111.91340621077623,
-        146.0394914387852,
-        1.1006353022455784,
-        134.47903589563677,
-        148.6907436994389,
-        102.87151097507036,
-        137.41724911494663,
-        1.1146766644704549,
-        143.85952373403495,
-        146.92280951248307,
-        1.100156488603178,
-        144.04783334738536,
-        148.53630346113712,
-        58.74848466983248,
-        147.0485685726298,
-        141.32891699761203,
-        142.8441702922343,
-        131.04366253726744,
-        128.6305301075303,
-        1.1106412111686195,
-        147.90025888582002,
-        0.8959265584913588,
-        149.5194069726666,
-        137.43649451567626,
-        1.1068068376551545,
-        68.05269425995475,
-        138.94056631255367,
-        138.43818227469507,
-        69.60391199895408,
-        114.83395091462887,
-        151.34107787433956,
-        141.57237630997332,
-        146.07433910500515,
-        9.941778754980154,
-        131.297822968639,
-        10.386636719874664,
-        10.545636067043365,
-        114.58677137445733,
-        75.28902943071078,
-        90.63452059810655,
-        143.58694736923238,
-        9.901118804514459,
-        144.5206530902411,
-        144.78737732574044,
-        79.81136215142409,
-        84.9314508821071,
-        120.18939827456474,
-        10.225253542151219,
-        9.702822548173124,
-        103.1188517219872,
-        138.5008491242522,
-        92.02238700298246,
-        151.99592340131602,
-        9.807595290716304,
-        150.0447954775559,
-        134.2614008494909,
-        149.38544573345007,
-        149.62298116309924,
-        124.32358754465251,
-        132.817456221544,
-        10.50607995390264,
-        9.78317681034783,
-        151.07916494121415,
-        146.93545537009487,
-        118.45851163082196,
-        145.03008316360754,
-        154.4449202186591,
-        146.86002069809945,
-        150.6932855951215,
-        110.74803327496042,
-        127.40788523389726,
-        150.81323854197058,
-        150.0047673310006,
-        149.6063654551971,
-        133.87244996538675,
-        10.329695475492791,
-        9.414695716712222,
-        106.77032789813472,
-        118.34636653947105,
-        123.44441062862572,
-        144.9015592115516,
-        153.74652990582067,
-        10.065713405335144,
-        129.38998560194165,
-        117.69087049838025,
-        99.15650839997046,
-        127.90462338199198,
-        147.3574863739125,
-        9.696544883885949,
-        9.8853852911422,
-        128.35872796896587,
-        145.2939860705264,
-        128.72081963712404,
-        94.09935653689803,
-        142.8780531031409,
-        130.5213122981276,
-        126.89288883528536,
-        153.36107852781166,
-        149.17239657923582,
-        9.177632630803961,
-        9.387171298727486,
-        109.68196882316985,
-        148.55536204011432,
-        152.61730207818772,
-        9.648922236946333,
-        132.805446535875,
-        138.74295200738652,
-        141.66118217831166,
-        124.0399127789103,
-        113.05005278683446,
-        149.71230902297984,
-        25.727698431920004,
-        129.56419655827216,
-        130.40687823665095,
-        128.46470366050013,
-        150.46298369674685,
-        9.22073843893938,
-        110.36443029340542,
-        148.23878821929193,
-        10.219508495480236,
-        9.615051521185155,
-        9.8723813087942,
-        149.91378148843256,
-        9.149056684599877,
-        130.37704092008303,
-        114.86611671621016,
-        134.53633480709703,
-        131.11593468604048,
-        149.74665952988033,
-        136.60701891253495,
-        146.50864617645632,
-        9.094221140419737,
-        149.69902295915708,
-        126.93245475406366,
-        141.2463933703881,
-        10.18172163650932,
-        136.76582155059438,
-        155.5823388453975,
-        144.68082947663285,
-        142.0128061769988,
-        116.20800508912414,
-        101.13756407758095,
-        10.050927550768915,
-        10.14139856150474,
-        9.573219645146107,
-        146.33874064646594,
-        137.22302119976462,
-        132.14965518046,
-        148.08190796641483,
-        117.6843964457568,
-        153.04352772565807,
-        146.79238076404926,
-        9.522740968586977,
-        145.93484469600287,
-        13.925952420322696,
-        12.697420287309185,
-        146.39122941822845,
-        113.94298610788566,
-        13.844109957456581,
-        154.57922917096633,
-        13.525210269101805,
-        103.83976095796662,
-        97.75660804271413,
-        135.83818209343426,
-        158.60060111529293,
-        111.57793188874757,
-        13.768524263105455,
-        154.2203592546867,
-        108.85242762118563,
-        111.15752259030245,
-        149.5942138872604,
-        119.77102605185765,
-        120.68065341205389,
-        105.29698904913548,
-        151.41465167808087,
-        138.90606724001483,
-        13.437371194424983,
-        119.97194649055415,
-        144.6223725248399,
-        146.9934910169238,
-        149.45319992777343,
-        121.48260402443249,
-        13.662736071688842,
-        14.448955892498802,
-        144.5545360346381,
-        154.00382983055897,
-        151.8635735223181,
-        137.2321484611102,
-        119.71487519948164,
-        88.24978714231261,
-        147.74815341218743,
-        142.1113258863455,
-        132.08775922189477,
-        124.63351274554526,
-        145.72256212355262,
-        100.50708502243579,
-        139.16363846809003,
-        114.82662827063822,
-        154.78307253831395,
-        149.22879563842886,
-        152.6744734255461,
-        145.81022434241217,
-        152.68018782123758,
-        116.75549006136289,
-        12.968595875688791,
-        6.824624970615158,
-        125.05116103474757,
-        147.66072487793718,
-        147.5735120742967,
-        139.1302141298083,
-        146.48542990069834,
-        12.674865288395944,
-        147.88858853602966,
-        6.8124480142416175,
-        137.54766974463703,
-        130.89979405333307,
-        13.364169845161861,
-        14.116086127002273,
-        130.3002929300388,
-        116.98398239487472,
-        152.70827610346095,
-        98.51470626500011,
-        135.1252373635164,
-        14.405992358855888,
-        154.13709739001223,
-        146.28661687368685,
-        137.87827066214206,
-        12.621081453489012,
-        154.04574874294514,
-        6.802625211185703,
-        152.18661864386252,
-        149.30257880598677,
-        13.244501725269068,
-        138.34068638798834,
-        150.95140747506372,
-        141.8441899037163,
-        152.99022366652198,
-        103.95004802425926,
-        140.28144756248412,
-        154.51222806007945,
-        85.40777548962518,
-        154.7067128296305,
-        120.47843952303268,
-        12.568053995018431,
-        12.916583075889136,
-        105.92477484543576,
-        137.92878859711615,
-        135.13853669037294,
-        137.88549737290148,
-        157.83019925734393,
-        145.48927689323145,
-        12.509532718065461,
-        150.6233829715981,
-        119.23669844460764,
-        138.49099023171033,
-        154.0870149904812,
-        140.1862744667834,
-        148.860174031694,
-        147.54629689336036,
-        12.448861769003683,
-        152.4711466483636,
-        102.47079224461186,
-        152.40864885890767,
-        156.21773232766026,
-        13.139291580904986,
-        150.30653960489693,
-        145.43571147072188,
-        132.8965387342577,
-        144.85972103961666,
-        125.5438694385711,
-        158.07457773478276,
-        14.359506122440205,
-        137.7658155977229,
-        153.68125116011197,
-        156.57780724945528,
-        12.394708947912125,
-        12.874702780202174,
-        110.61518572692995,
-        149.4338565730422,
-        149.67552030435513,
-        146.20909415912828,
-        9.308833539527914,
-        26.176147260970783,
-        8.701217384742513,
-        66.92241449340185,
-        105.12940849136734,
-        145.25326276553395,
-        139.68219350261262,
-        131.60335890332783,
-        150.53420884400245,
-        17.552483447968918,
-        99.60476667168517,
-        9.003208512207522,
-        8.539560747895454,
-        9.946172723540226,
-        150.55644446784382,
-        9.608936841972842,
-        104.80864366760326,
-        25.95068644438624,
-        99.42592550150236,
-        108.35979254469888,
-        113.9171427720856,
-        9.905905876631499,
-        131.1684982861573,
-        154.7989292174601,
-        151.34753888952145,
-        150.11816141981262,
-        143.00557828542912,
-        126.2310299151925,
-        113.53830001728545,
-        148.13405630794878,
-        150.7564429392251,
-        155.252325076404,
-        18.20048176554747,
-        25.725436761645142,
-        8.678711562613207,
-        143.3683328827327,
-        127.0294451168928,
-        137.50119476282134,
-        10.068367539846923,
-        155.64822784014916,
-        153.2789382926615,
-        25.46950813818654,
-        142.9138107220956,
-        155.10510899417167,
-        107.40557834412083,
-        9.871948602847068,
-        144.4712732194919,
-        140.17802930301565,
-        9.286026243902361,
-        129.1488895575147,
-        124.35586045151207,
-        140.1410811550992,
-        96.63692877337894,
-        153.62093095799207,
-        156.05800033315097,
-        9.587609950939838,
-        140.09721428165886,
-        134.898750425008,
-        8.652809034763463,
-        8.989448046931262,
-        107.64260577858933,
-        9.825071080298192,
-        150.6237132142087,
-        143.76058852986372,
-        154.01627264735168,
-        140.85322298632985,
-        143.63714834446708,
-        149.7259575806535,
-        8.53942846683121,
-        157.02635815805976,
-        150.83913162907433,
-        154.0283691261865,
-        9.246842209481716,
-        154.5851361854829,
-        133.4662155767381,
-        137.55396410787307,
-        105.77910782321499,
-        148.97953057255376,
-        111.3041581371634,
-        9.543858351726714,
-        142.71996301994741,
-        144.2417836324451,
-        148.5293262803374,
-        8.95331376662564,
-        105.2724164655814,
-        149.16646109060707,
-        151.1947852118465,
-        9.503293907683512,
-        133.40055362812345,
-        8.776394391795916,
-        148.3675722527084,
-        154.66946641450528,
-        122.71674068416665,
-        149.62192317697068,
-        153.40159484208397,
-        9.46860898864519,
-        146.10526710538994,
-        143.96020057925128,
-        8.62472208077336,
-        8.906885562515198,
-        105.7754218686014,
-        150.17957794387223,
-        144.0451331512576,
-        149.95461039551162,
-        151.46311089131117,
-        142.22104279807664,
-        147.3679944003333,
-        140.5394711174869,
-        123.62157744638432,
-        152.32796921399395,
-        156.6603241829257,
-        9.43621164630811,
-        158.2241383954169,
-        149.33346139426692,
-        144.12074054746773,
-        143.1977521817863,
-        8.536662624511228,
-        9.785635570067782,
-        147.61880087321424,
-        9.402323265876474,
-        159.1161790596516,
-        146.56796834276156,
-        147.64890403285438,
-        157.70847517328534,
-        114.64282143770687,
-        148.5000942425868,
-        10.052761003641129,
-        147.38801074409378
-    ]
-}
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_dp8_583m_throughputtest_zmq/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_dp8_583m_throughputtest_zmq/model_config.yaml
deleted file mode 100644
index 2d65c154a0e..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_dp8_583m_throughputtest_zmq/model_config.yaml
+++ /dev/null
@@ -1,59 +0,0 @@
-ENV_VARS:
-  CUDA_DEVICE_MAX_CONNECTIONS: 1
-  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
-  NCCL_ALGO: Ring
-  CUBLAS_WORKSPACE_CONFIG: :4096:8
-TEST_TYPE: frozen-start
-MODE: inference
-MODEL_ARGS:
-  --tiktoken-pattern: v2
-  --use-mcore-models: true
-  --tokenizer-type: TikTokenizer
-  --tokenizer-model: ${CHECKPOINT_LOAD_PATH}/model/mcore_mistral/nemo_minitron-0.5b/v1/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json
-  --auto-detect-ckpt-format: true
-  --max-tokens-to-oom: 3600000
-  --inference-max-seq-length: 4096
-  --attention-backend: flash
-  --use-checkpoint-args: true
-  --micro-batch-size: 1
-  --no-load-optim: true
-  --no-use-tokenizer-model-from-checkpoint-args: true
-  --timing-log-level: 0
-  --load: ${CHECKPOINT_LOAD_PATH}/model/mcore_mistral/nemo_minitron-0.5b/v1/
-  --distributed-backend: nccl
-  --log-interval: 1
-  --transformer-impl: transformer_engine
-  --tensor-model-parallel-size: 1
-  --pipeline-model-parallel-size: 1
-  --ckpt-format: torch_dist
-  --bf16: true
-  --log-memory-to-tensorboard: true
-  --log-num-zeros-in-grad: true
-  --log-validation-ppl-to-tensorboard: true
-  --log-timers-to-tensorboard: true
-  --num-layers: 24
-  --hidden-size: 1152
-  --num-attention-heads: 16
-  --max-position-embeddings: 1024
-  --seq-length: 1024
-  --temperature: 1.0
-  --top_k: 1
-  --seed: 42
-  --return-log-probs: true
-  --num-tokens-from-file: true
-  --inference-dynamic-batching-buffer-size-gb: 20
-  --cuda-graph-impl: local
-  --cuda-graph-scope: full_iteration
-  --disable-chunked-prefill: true
-  --dist-ckpt-strictness: log_unexpected
-  --inference-ckpt-non-strict: true # To handle the extra_state errors
-  --output-path: ${TENSORBOARD_PATH}
-  --output-every-n-results: 32
-  --prompt-file: ${DATA_PATH}/text/sharegpt-vicuna/filtered/processed.jsonl
-  --prompt-file-num-truncate: 1024
-  --incoming-requests-per-step: 128
-  --use-flashinfer-fused-rope: true
-  --throughput-check-only: true
-METRICS:
-  - "generated_tokens"
-  - "logprobs"
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp8_dp1_583m_logitsmatch_zmq/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp8_dp1_583m_logitsmatch_zmq/golden_values_dev_dgx_h100.json
deleted file mode 100644
index 07adf271434..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp8_dp1_583m_logitsmatch_zmq/golden_values_dev_dgx_h100.json
+++ /dev/null
@@ -1,158 +0,0 @@
-{
- "0": {
-  "input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.",
-  "generated_text": " And that this is the place where you can be yourself, and be yourself in the most beautiful way. And that this is the place where you can",
-  "generated_tokens": [
-   3060,
-   1455,
-   1593,
-   1395,
-   1278,
-   3535,
-   2478,
-   1636,
-   1710,
-   1402,
-   14019,
-   1044,
-   1321,
-   1402,
-   14019,
-   1294,
-   1278,
-   2725,
-   15568,
-   3039,
-   1046,
-   3060,
-   1455,
-   1593,
-   1395,
-   1278,
-   3535,
-   2478,
-   1636,
-   1710
-  ],
-  "latency": 2.020272731781006,
-  "logprobs": [
-   -9.358587265014648,
-   -2.7594826221466064,
-   -4.608366012573242,
-   -1.4093360900878906,
-   -0.6152952313423157,
-   -1.7217562198638916,
-   -2.496668815612793,
-   -2.0547454357147217,
-   -2.441960573196411,
-   -6.280838966369629,
-   -1.5643692016601562,
-   -3.462346076965332,
-   -4.428728103637695,
-   -3.8633861541748047,
-   -1.9936373233795166,
-   -1.8929449319839478,
-   -3.796365737915039,
-   -6.8360137939453125,
-   -0.2901247441768646,
-   -0.9246833324432373,
-   -6.633338928222656,
-   -7.166708469390869,
-   -12.771251678466797,
-   -2.198296308517456,
-   -3.7778120040893555,
-   -0.4983733296394348,
-   -4.381269454956055,
-   -0.0666784718632698,
-   -0.09580295532941818,
-   -3.2437636852264404,
-   -10.079947471618652,
-   -1.172220230102539,
-   -5.977442741394043,
-   -5.046236038208008,
-   -3.855658531188965,
-   -2.5585858821868896,
-   -3.356245994567871,
-   -5.557229518890381,
-   -1.6787731647491455,
-   -5.483290672302246,
-   -12.218501091003418,
-   -12.61402702331543,
-   -0.09662941098213196,
-   -2.5431432723999023,
-   -1.4071024656295776,
-   -2.9154715538024902,
-   -1.1964417695999146,
-   -0.006458481773734093,
-   -3.3625335693359375,
-   -13.262511253356934,
-   -4.314079761505127,
-   -2.617699146270752,
-   -5.987792015075684,
-   -0.778266429901123,
-   -0.048888545483350754,
-   -1.548882007598877,
-   -1.1381981372833252,
-   -5.627166748046875,
-   -0.4078553318977356,
-   -4.958505630493164,
-   -0.6187160611152649,
-   -0.7174848914146423,
-   -2.469533920288086,
-   -13.620073318481445,
-   -0.09088654816150665,
-   -3.526974678039551,
-   -1.4195809364318848,
-   -6.402483940124512,
-   -0.5898402333259583,
-   -3.565917491912842,
-   -0.8561318516731262,
-   -1.6140165328979492,
-   -5.370549201965332,
-   -17.159223556518555,
-   -6.583524703979492,
-   -0.8855001926422119,
-   -4.19431209564209,
-   -1.2012220621109009,
-   -2.2563133239746094,
-   -1.7674944400787354,
-   -0.22064533829689026,
-   -9.292220115661621,
-   -0.12445646524429321,
-   -7.29617977142334,
-   -2.526529312133789,
-   -4.071560859680176,
-   -3.5568013191223145,
-   -1.926215410232544,
-   -2.349026918411255,
-   -2.2132363319396973,
-   -0.3125414550304413,
-   -1.4718132019042969,
-   -2.149106740951538,
-   -1.0855519771575928,
-   -1.631832242012024,
-   -1.3751734495162964,
-   -1.9396103620529175,
-   -1.5293723344802856,
-   -0.8444125056266785,
-   -1.2414811849594116,
-   -1.9522171020507812,
-   -2.4338042736053467,
-   -1.5651824474334717,
-   -0.9498789310455322,
-   -1.8044980764389038,
-   -2.356677770614624,
-   -1.247452974319458,
-   -1.550165057182312,
-   -0.5635553598403931,
-   -0.6177330017089844,
-   -0.4778785705566406,
-   -0.020452087745070457,
-   -0.48500269651412964,
-   -0.23854275047779083,
-   -0.06543659418821335,
-   -0.11837350577116013,
-   -0.0585334412753582
-  ]
- }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp8_dp1_583m_logitsmatch_zmq/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp8_dp1_583m_logitsmatch_zmq/model_config.yaml
deleted file mode 100644
index 96d3fd0fc0c..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp8_dp1_583m_logitsmatch_zmq/model_config.yaml
+++ /dev/null
@@ -1,58 +0,0 @@
-ENV_VARS:
-  CUDA_DEVICE_MAX_CONNECTIONS: 1
-  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
-  NCCL_ALGO: Ring
-  CUBLAS_WORKSPACE_CONFIG: :4096:8
-TEST_TYPE: frozen-start
-MODE: inference
-MODEL_ARGS:
-  --tiktoken-pattern: v2
-  --use-mcore-models: true
-  --tokenizer-type: TikTokenizer
-  --tokenizer-model: ${CHECKPOINT_LOAD_PATH}/model/mcore_mistral/nemo_minitron-0.5b/v1/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json
-  --auto-detect-ckpt-format: true
-  --max-tokens-to-oom: 3600000
-  --inference-max-seq-length: 4096
-  --attention-backend: flash
-  --use-checkpoint-args: true
-  --micro-batch-size: 1
-  --no-load-optim: true
-  --no-use-tokenizer-model-from-checkpoint-args: true
-  --timing-log-level: 0
-  --load: ${CHECKPOINT_LOAD_PATH}/model/mcore_mistral/nemo_minitron-0.5b/v1/
-  --distributed-backend: nccl
-  --log-interval: 1
-  --transformer-impl: inference_optimized
-  --sequence-parallel: true
-  --tensor-model-parallel-size: 1
-  --pipeline-model-parallel-size: 8
-  --deterministic-mode: true
-  --ckpt-format: torch_dist
-  --bf16: true
-  --log-memory-to-tensorboard: true
-  --log-num-zeros-in-grad: true
-  --log-validation-ppl-to-tensorboard: true
-  --log-timers-to-tensorboard: true
-  --num-layers: 24
-  --hidden-size: 1152
-  --num-attention-heads: 16
-  --max-position-embeddings: 1024
-  --seq-length: 1024
-  --temperature: 1.0
-  --top_k: 1
-  --return-log-probs: true
-  --num-tokens-to-generate: 30
-  --inference-dynamic-batching-max-requests-override: 8 # hardcode decode padding tokens to 7 for reproducibility
-  --inference-dynamic-batching-buffer-guaranteed-fraction: 0
-  --inference-dynamic-batching-buffer-overflow-factor: 0.2
-  --inference-dynamic-batching-buffer-size-gb: 20
-  --dist-ckpt-strictness: log_unexpected
-  --inference-ckpt-non-strict: true # To handle the extra_state errors
-  --output-path: ${TENSORBOARD_PATH}
-  --prompts: "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies."
-  --incoming-requests-per-step: 32
-  --use-flashinfer-fused-rope: true
-
-METRICS:
-  - "generated_tokens"
-  - "logprobs"
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp2_pp2_dp2_583m_logitsmatch_zmq/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp2_pp2_dp2_583m_logitsmatch_zmq/golden_values_dev_dgx_h100.json
deleted file mode 100644
index 55d6955055a..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp2_pp2_dp2_583m_logitsmatch_zmq/golden_values_dev_dgx_h100.json
+++ /dev/null
@@ -1,158 +0,0 @@
-{
-    "0": {
-        "input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.",
-        "generated_text": " And then you get to the end of the movie, and you realize that this is not New York at all. This is New York at the end",
-        "generated_tokens": [
-            3060,
-            2430,
-            1636,
-            2012,
-            1317,
-            1278,
-            2362,
-            1307,
-            1278,
-            16070,
-            1044,
-            1321,
-            1636,
-            23067,
-            1455,
-            1593,
-            1395,
-            1605,
-            3140,
-            5152,
-            1513,
-            1747,
-            1046,
-            2409,
-            1395,
-            3140,
-            5152,
-            1513,
-            1278,
-            2362
-        ],
-        "latency": 44.73653959017247,
-        "logprobs": [
-            -9.358970642089844,
-            -2.7523813247680664,
-            -4.628502368927002,
-            -1.4058877229690552,
-            -0.6050865054130554,
-            -1.7354254722595215,
-            -2.4828507900238037,
-            -2.0520384311676025,
-            -2.4089853763580322,
-            -6.2649126052856445,
-            -1.5644135475158691,
-            -3.4096615314483643,
-            -4.358163833618164,
-            -3.866471767425537,
-            -2.0575876235961914,
-            -1.904883623123169,
-            -3.7622976303100586,
-            -6.835415363311768,
-            -0.2829523980617523,
-            -0.9827429056167603,
-            -6.655940055847168,
-            -7.188957214355469,
-            -12.757233619689941,
-            -2.1933951377868652,
-            -3.808887481689453,
-            -0.515199601650238,
-            -4.323916912078857,
-            -0.067625492811203,
-            -0.09976530075073242,
-            -3.228640556335449,
-            -10.129311561584473,
-            -1.1787357330322266,
-            -5.97692346572876,
-            -5.036575794219971,
-            -3.8267176151275635,
-            -2.6010468006134033,
-            -3.366438865661621,
-            -5.553505897521973,
-            -1.6046268939971924,
-            -5.442874908447266,
-            -12.218503952026367,
-            -12.597894668579102,
-            -0.0976092740893364,
-            -2.530579090118408,
-            -1.4139617681503296,
-            -2.8606526851654053,
-            -1.1690009832382202,
-            -0.0066696410067379475,
-            -3.361189365386963,
-            -13.191482543945312,
-            -4.413737773895264,
-            -2.639688491821289,
-            -6.0114641189575195,
-            -0.7672993540763855,
-            -0.047326065599918365,
-            -1.550362467765808,
-            -1.137772798538208,
-            -5.627618789672852,
-            -0.40103790163993835,
-            -4.908735275268555,
-            -0.5704602599143982,
-            -0.6625558733940125,
-            -2.364135503768921,
-            -13.609526634216309,
-            -0.08865148574113846,
-            -3.5251970291137695,
-            -1.3791766166687012,
-            -6.395696640014648,
-            -0.588782787322998,
-            -3.566770076751709,
-            -0.8742034435272217,
-            -1.5827170610427856,
-            -5.3912353515625,
-            -17.150842666625977,
-            -6.6234588623046875,
-            -0.885993242263794,
-            -4.162992477416992,
-            -1.1942744255065918,
-            -2.281689405441284,
-            -1.7708709239959717,
-            -0.22030864655971527,
-            -9.292593955993652,
-            -0.1258234828710556,
-            -7.346449851989746,
-            -2.5470826625823975,
-            -4.115433692932129,
-            -3.5646262168884277,
-            -1.9410749673843384,
-            -2.3247878551483154,
-            -1.523364543914795,
-            -2.360647678375244,
-            -1.708706021308899,
-            -1.131014108657837,
-            -2.944424867630005,
-            -0.5273782014846802,
-            -0.44912564754486084,
-            -1.753378987312317,
-            -0.8341047167778015,
-            -0.4124295711517334,
-            -0.9006240367889404,
-            -1.4890273809432983,
-            -0.4379286766052246,
-            -1.6497018337249756,
-            -0.5444425344467163,
-            -1.2305881977081299,
-            -1.164027214050293,
-            -0.002498721005395055,
-            -1.165798544883728,
-            -0.007112303748726845,
-            -0.718407154083252,
-            -0.7442683577537537,
-            -0.04299728572368622,
-            -0.8688321113586426,
-            -0.021008115261793137,
-            -2.033963680267334,
-            -1.2936673164367676,
-            -0.78721684217453
-        ]
-    }
-}
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp2_pp2_dp2_583m_logitsmatch_zmq/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp2_pp2_dp2_583m_logitsmatch_zmq/model_config.yaml
deleted file mode 100644
index 306c12bd653..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp2_pp2_dp2_583m_logitsmatch_zmq/model_config.yaml
+++ /dev/null
@@ -1,58 +0,0 @@
-ENV_VARS:
-  CUDA_DEVICE_MAX_CONNECTIONS: 1
-  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
-  NCCL_ALGO: Ring
-  CUBLAS_WORKSPACE_CONFIG: :4096:8
-TEST_TYPE: frozen-start
-MODE: inference
-MODEL_ARGS:
-  --tiktoken-pattern: v2
-  --use-mcore-models: true
-  --tokenizer-type: TikTokenizer
-  --tokenizer-model: ${CHECKPOINT_LOAD_PATH}/model/mcore_mistral/nemo_minitron-0.5b/v1/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json
-  --auto-detect-ckpt-format: true
-  --max-tokens-to-oom: 3600000
-  --inference-max-seq-length: 4096
-  --attention-backend: flash
-  --use-checkpoint-args: true
-  --micro-batch-size: 1
-  --no-load-optim: true
-  --no-use-tokenizer-model-from-checkpoint-args: true
-  --timing-log-level: 0
-  --load: ${CHECKPOINT_LOAD_PATH}/model/mcore_mistral/nemo_minitron-0.5b/v1/
-  --distributed-backend: nccl
-  --log-interval: 1
-  --transformer-impl: inference_optimized
-  --sequence-parallel: true
-  --tensor-model-parallel-size: 2
-  --pipeline-model-parallel-size: 2
-  --deterministic-mode: true
-  --ckpt-format: torch_dist
-  --bf16: true
-  --log-memory-to-tensorboard: true
-  --log-num-zeros-in-grad: true
-  --log-validation-ppl-to-tensorboard: true
-  --log-timers-to-tensorboard: true
-  --num-layers: 24
-  --hidden-size: 1152
-  --num-attention-heads: 16
-  --max-position-embeddings: 1024
-  --seq-length: 1024
-  --temperature: 1.0
-  --top_k: 1
-  --return-log-probs: true
-  --num-tokens-to-generate: 30
-  --inference-dynamic-batching-max-requests-override: 8 # hardcode decode padding tokens to 7 for reproducibility
-  --inference-dynamic-batching-buffer-guaranteed-fraction: 0
-  --inference-dynamic-batching-buffer-overflow-factor: 0.2
-  --inference-dynamic-batching-buffer-size-gb: 20
-  --dist-ckpt-strictness: log_unexpected
-  --inference-ckpt-non-strict: true # To handle the extra_state errors
-  --output-path: ${TENSORBOARD_PATH}
-  --prompts: "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies."
-  --incoming-requests-per-step: 32
-  --use-flashinfer-fused-rope: true
-
-METRICS:
-  - "generated_tokens"
-  - "logprobs"
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_583m_logitsmatch/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_583m_logitsmatch/golden_values_dev_dgx_h100.json
index f32580e937f..6ef98105cbd 100644
--- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_583m_logitsmatch/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_583m_logitsmatch/golden_values_dev_dgx_h100.json
@@ -157,5 +157,5 @@
             -0.0585334412753582
         ]
     },
-    "throughput": [12.319796866345767, 12.319796866345767]
-}
+    "throughput": [13.93210545115292, 13.93210545115292]
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_583m_logitsmatch/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_583m_logitsmatch/model_config.yaml
index e6b659cf46f..59186f8d532 100644
--- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_583m_logitsmatch/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_583m_logitsmatch/model_config.yaml
@@ -41,7 +41,10 @@ MODEL_ARGS:
   --top_k: 1
   --return-log-probs: true
   --num-tokens-to-generate: 30
-  --inference-dynamic-batching-buffer-size-gb: 10
+  --inference-dynamic-batching-max-requests-override: 8 # hardcode decode padding tokens to 7 for reproducibility
+  --inference-dynamic-batching-buffer-guaranteed-fraction: 0
+  --inference-dynamic-batching-buffer-overflow-factor: 0.2
+  --inference-dynamic-batching-buffer-size-gb: 20
   --dist-ckpt-strictness: log_unexpected
   --inference-ckpt-non-strict: true # To handle the extra_state errors
   --output-path: ${TENSORBOARD_PATH}
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_dp1_583m_logitsmatch_zmq/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_dp1_583m_logitsmatch_zmq/golden_values_dev_dgx_h100.json
index 4ebaf72f5e7..07adf271434 100644
--- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_dp1_583m_logitsmatch_zmq/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_dp1_583m_logitsmatch_zmq/golden_values_dev_dgx_h100.json
@@ -1,158 +1,158 @@
 {
-    "0": {
-        "input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.",
-        "generated_text": " And then you get to the end of the movie, and you realize that this is not New York at all. This is New York at the end",
-        "generated_tokens": [
-            3060,
-            2430,
-            1636,
-            2012,
-            1317,
-            1278,
-            2362,
-            1307,
-            1278,
-            16070,
-            1044,
-            1321,
-            1636,
-            23067,
-            1455,
-            1593,
-            1395,
-            1605,
-            3140,
-            5152,
-            1513,
-            1747,
-            1046,
-            2409,
-            1395,
-            3140,
-            5152,
-            1513,
-            1278,
-            2362
-        ],
-        "latency": 42.63835311005823,
-        "logprobs": [
-            -9.358713150024414,
-            -2.724055767059326,
-            -4.5792131423950195,
-            -1.4844143390655518,
-            -0.6546129584312439,
-            -1.7303215265274048,
-            -2.4795279502868652,
-            -2.0776171684265137,
-            -2.4553134441375732,
-            -6.219150066375732,
-            -1.566371202468872,
-            -3.486889362335205,
-            -4.418787479400635,
-            -3.8580172061920166,
-            -2.0664010047912598,
-            -1.843908667564392,
-            -3.744598627090454,
-            -6.82543420791626,
-            -0.2880207300186157,
-            -0.9257857799530029,
-            -6.612694263458252,
-            -7.218401908874512,
-            -12.827808380126953,
-            -2.1861495971679688,
-            -3.8218231201171875,
-            -0.5008565187454224,
-            -4.383245468139648,
-            -0.06934759020805359,
-            -0.09667497128248215,
-            -3.2640299797058105,
-            -10.102912902832031,
-            -1.1498218774795532,
-            -5.979549407958984,
-            -5.0192108154296875,
-            -3.8367133140563965,
-            -2.581653356552124,
-            -3.4087462425231934,
-            -5.545716285705566,
-            -1.6541939973831177,
-            -5.547749996185303,
-            -12.21850872039795,
-            -12.582784652709961,
-            -0.09534379839897156,
-            -2.522055149078369,
-            -1.4054086208343506,
-            -2.8758127689361572,
-            -1.1866405010223389,
-            -0.005799253936856985,
-            -3.3871712684631348,
-            -13.193516731262207,
-            -4.389392852783203,
-            -2.520228862762451,
-            -6.023908615112305,
-            -0.7408540844917297,
-            -0.04526234790682793,
-            -1.5508661270141602,
-            -1.1332746744155884,
-            -5.653256416320801,
-            -0.4028852581977844,
-            -4.9457244873046875,
-            -0.618165135383606,
-            -0.6616490483283997,
-            -2.36385178565979,
-            -13.6455078125,
-            -0.08668932318687439,
-            -3.5266754627227783,
-            -1.3801541328430176,
-            -6.351947784423828,
-            -0.5434023141860962,
-            -3.5673093795776367,
-            -0.871107816696167,
-            -1.618450403213501,
-            -5.378700256347656,
-            -17.17119026184082,
-            -6.662005424499512,
-            -0.9221409559249878,
-            -4.141905784606934,
-            -1.2047083377838135,
-            -2.227570056915283,
-            -1.7645721435546875,
-            -0.21892313659191132,
-            -9.296550750732422,
-            -0.11995092779397964,
-            -7.402207851409912,
-            -2.512965679168701,
-            -4.100971221923828,
-            -3.580245018005371,
-            -1.9462040662765503,
-            -2.347074031829834,
-            -1.5288957357406616,
-            -2.4033043384552,
-            -1.7311294078826904,
-            -1.1686863899230957,
-            -2.938558340072632,
-            -0.5278136730194092,
-            -0.4748117923736572,
-            -1.749883770942688,
-            -0.8397680521011353,
-            -0.4109693169593811,
-            -0.9552587270736694,
-            -1.5238327980041504,
-            -0.4656376838684082,
-            -1.6448218822479248,
-            -0.5414345264434814,
-            -1.2422380447387695,
-            -1.1426063776016235,
-            -0.002245525596663356,
-            -1.252556562423706,
-            -0.007873333990573883,
-            -0.7185167670249939,
-            -0.7521701455116272,
-            -0.042445242404937744,
-            -0.8852499723434448,
-            -0.02266514115035534,
-            -2.0951969623565674,
-            -1.348037838935852,
-            -0.8296748399734497
-        ]
-    }
-}
+ "0": {
+  "input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.",
+  "generated_text": " And that this is the place where you can be yourself, and be yourself in the most beautiful way. And that this is the place where you can",
+  "generated_tokens": [
+   3060,
+   1455,
+   1593,
+   1395,
+   1278,
+   3535,
+   2478,
+   1636,
+   1710,
+   1402,
+   14019,
+   1044,
+   1321,
+   1402,
+   14019,
+   1294,
+   1278,
+   2725,
+   15568,
+   3039,
+   1046,
+   3060,
+   1455,
+   1593,
+   1395,
+   1278,
+   3535,
+   2478,
+   1636,
+   1710
+  ],
+  "latency": 2.020272731781006,
+  "logprobs": [
+   -9.358587265014648,
+   -2.7594826221466064,
+   -4.608366012573242,
+   -1.4093360900878906,
+   -0.6152952313423157,
+   -1.7217562198638916,
+   -2.496668815612793,
+   -2.0547454357147217,
+   -2.441960573196411,
+   -6.280838966369629,
+   -1.5643692016601562,
+   -3.462346076965332,
+   -4.428728103637695,
+   -3.8633861541748047,
+   -1.9936373233795166,
+   -1.8929449319839478,
+   -3.796365737915039,
+   -6.8360137939453125,
+   -0.2901247441768646,
+   -0.9246833324432373,
+   -6.633338928222656,
+   -7.166708469390869,
+   -12.771251678466797,
+   -2.198296308517456,
+   -3.7778120040893555,
+   -0.4983733296394348,
+   -4.381269454956055,
+   -0.0666784718632698,
+   -0.09580295532941818,
+   -3.2437636852264404,
+   -10.079947471618652,
+   -1.172220230102539,
+   -5.977442741394043,
+   -5.046236038208008,
+   -3.855658531188965,
+   -2.5585858821868896,
+   -3.356245994567871,
+   -5.557229518890381,
+   -1.6787731647491455,
+   -5.483290672302246,
+   -12.218501091003418,
+   -12.61402702331543,
+   -0.09662941098213196,
+   -2.5431432723999023,
+   -1.4071024656295776,
+   -2.9154715538024902,
+   -1.1964417695999146,
+   -0.006458481773734093,
+   -3.3625335693359375,
+   -13.262511253356934,
+   -4.314079761505127,
+   -2.617699146270752,
+   -5.987792015075684,
+   -0.778266429901123,
+   -0.048888545483350754,
+   -1.548882007598877,
+   -1.1381981372833252,
+   -5.627166748046875,
+   -0.4078553318977356,
+   -4.958505630493164,
+   -0.6187160611152649,
+   -0.7174848914146423,
+   -2.469533920288086,
+   -13.620073318481445,
+   -0.09088654816150665,
+   -3.526974678039551,
+   -1.4195809364318848,
+   -6.402483940124512,
+   -0.5898402333259583,
+   -3.565917491912842,
+   -0.8561318516731262,
+   -1.6140165328979492,
+   -5.370549201965332,
+   -17.159223556518555,
+   -6.583524703979492,
+   -0.8855001926422119,
+   -4.19431209564209,
+   -1.2012220621109009,
+   -2.2563133239746094,
+   -1.7674944400787354,
+   -0.22064533829689026,
+   -9.292220115661621,
+   -0.12445646524429321,
+   -7.29617977142334,
+   -2.526529312133789,
+   -4.071560859680176,
+   -3.5568013191223145,
+   -1.926215410232544,
+   -2.349026918411255,
+   -2.2132363319396973,
+   -0.3125414550304413,
+   -1.4718132019042969,
+   -2.149106740951538,
+   -1.0855519771575928,
+   -1.631832242012024,
+   -1.3751734495162964,
+   -1.9396103620529175,
+   -1.5293723344802856,
+   -0.8444125056266785,
+   -1.2414811849594116,
+   -1.9522171020507812,
+   -2.4338042736053467,
+   -1.5651824474334717,
+   -0.9498789310455322,
+   -1.8044980764389038,
+   -2.356677770614624,
+   -1.247452974319458,
+   -1.550165057182312,
+   -0.5635553598403931,
+   -0.6177330017089844,
+   -0.4778785705566406,
+   -0.020452087745070457,
+   -0.48500269651412964,
+   -0.23854275047779083,
+   -0.06543659418821335,
+   -0.11837350577116013,
+   -0.0585334412753582
+  ]
+ }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_dp1_583m_logitsmatch_zmq/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_dp1_583m_logitsmatch_zmq/model_config.yaml
index 551ba8115cb..612e621534d 100644
--- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_dp1_583m_logitsmatch_zmq/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_dp1_583m_logitsmatch_zmq/model_config.yaml
@@ -22,9 +22,8 @@ MODEL_ARGS:
   --load: ${CHECKPOINT_LOAD_PATH}/model/mcore_mistral/nemo_minitron-0.5b/v1/
   --distributed-backend: nccl
   --log-interval: 1
-  --transformer-impl: inference_optimized
-  --sequence-parallel: true
-  --tensor-model-parallel-size: 8
+  --transformer-impl: transformer_engine
+  --tensor-model-parallel-size: 1
   --pipeline-model-parallel-size: 1
   --deterministic-mode: true
   --ckpt-format: torch_dist
@@ -52,7 +51,6 @@ MODEL_ARGS:
   --prompts: "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies."
   --incoming-requests-per-step: 32
   --use-flashinfer-fused-rope: true
-
 METRICS:
   - "generated_tokens"
   - "logprobs"
diff --git a/tests/functional_tests/test_cases/hybrid/hybrid_dynamic_inference_tp1_pp1_dp8_583m/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/hybrid/hybrid_dynamic_inference_tp1_pp1_dp8_583m/golden_values_dev_dgx_h100.json
deleted file mode 100644
index dccdd34a5e7..00000000000
--- a/tests/functional_tests/test_cases/hybrid/hybrid_dynamic_inference_tp1_pp1_dp8_583m/golden_values_dev_dgx_h100.json
+++ /dev/null
@@ -1,135 +0,0 @@
-{
- "0": {
-  "input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.",
-  "generated_text": " Then, when you're ready, go home and watch the movie again.</s>",
-  "generated_tokens": [
-   6830,
-   1044,
-   2200,
-   1636,
-   6185,
-   11831,
-   1044,
-   1974,
-   4590,
-   1321,
-   9951,
-   1278,
-   16070,
-   2790,
-   1046,
-   2
-  ],
-  "latency": 22.701347589492798,
-  "cuda_graph_request_count_map": null,
-  "step_count": 16,
-  "logprobs": [
-   -9.498085021972656,
-   -3.787536859512329,
-   -3.0404648780822754,
-   -1.7445809841156006,
-   -0.29672086238861084,
-   -1.3661342859268188,
-   -2.3458175659179688,
-   -1.83931303024292,
-   -1.4894113540649414,
-   -6.440437316894531,
-   -0.8176816701889038,
-   -1.790361762046814,
-   -3.6521127223968506,
-   -3.7014482021331787,
-   -1.5858951807022095,
-   -1.5492421388626099,
-   -2.844204902648926,
-   -6.694585800170898,
-   -0.06552714854478836,
-   -1.333437204360962,
-   -6.077418327331543,
-   -9.448220252990723,
-   -10.46927261352539,
-   -1.4987666606903076,
-   -4.727880001068115,
-   -0.7596290111541748,
-   -2.152517795562744,
-   -0.013758113607764244,
-   -0.040566492825746536,
-   -3.1010313034057617,
-   -8.735280990600586,
-   -1.5446771383285522,
-   -5.841436862945557,
-   -3.0970406532287598,
-   -4.0269670486450195,
-   -3.769413948059082,
-   -2.466399669647217,
-   -2.3482255935668945,
-   -0.47234833240509033,
-   -1.114174723625183,
-   -5.310229778289795,
-   -8.236719131469727,
-   -0.015452657826244831,
-   -2.854970932006836,
-   -1.2198810577392578,
-   -3.923705577850342,
-   -0.9644856452941895,
-   -0.0026721982285380363,
-   -3.096668243408203,
-   -11.110801696777344,
-   -3.688267230987549,
-   -2.3297765254974365,
-   -4.670788764953613,
-   -0.09854680299758911,
-   -0.06234245002269745,
-   -1.3255000114440918,
-   -2.169330596923828,
-   -4.490111827850342,
-   -0.4412422776222229,
-   -3.9356117248535156,
-   -0.5775455832481384,
-   -0.2409835010766983,
-   -2.9197134971618652,
-   -13.475022315979004,
-   -0.10248012840747833,
-   -3.5023770332336426,
-   -0.8544933795928955,
-   -5.194520473480225,
-   -0.32954925298690796,
-   -2.3026833534240723,
-   -0.5346049070358276,
-   -1.2862977981567383,
-   -4.881562232971191,
-   -15.555293083190918,
-   -4.919404029846191,
-   -0.22008435428142548,
-   -6.644532680511475,
-   -0.8938115239143372,
-   -2.1304054260253906,
-   -1.8866363763809204,
-   -0.20106904208660126,
-   -5.917205810546875,
-   -0.0056310598738491535,
-   -7.453446388244629,
-   -3.1677205562591553,
-   -3.706507682800293,
-   -2.136584520339966,
-   -2.9287283420562744,
-   -1.4792609214782715,
-   -2.4399306774139404,
-   -1.2330785989761353,
-   -1.9715899229049683,
-   -1.9578948020935059,
-   -0.23143476247787476,
-   -2.052696466445923,
-   -1.0413113832473755,
-   -1.1709030866622925,
-   -2.825991630554199,
-   -1.6848523616790771,
-   -2.2008259296417236,
-   -1.5216114521026611,
-   -1.2439141273498535,
-   -1.412055253982544
-  ]
- },
- "throughput": [
-  13.750125804204401, 13.955213632130931
- ]
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/hybrid/hybrid_dynamic_inference_tp1_pp1_dp8_583m/model_config.yaml b/tests/functional_tests/test_cases/hybrid/hybrid_dynamic_inference_tp1_pp1_dp8_583m/model_config.yaml
deleted file mode 100644
index 4ae5c719291..00000000000
--- a/tests/functional_tests/test_cases/hybrid/hybrid_dynamic_inference_tp1_pp1_dp8_583m/model_config.yaml
+++ /dev/null
@@ -1,72 +0,0 @@
-ENV_VARS:
-  CUDA_DEVICE_MAX_CONNECTIONS: 1
-  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
-  NCCL_ALGO: Ring
-  CUBLAS_WORKSPACE_CONFIG: :4096:8
-TEST_TYPE: frozen-start
-MODE: inference
-MODEL_ARGS:
-  --log-num-zeros-in-grad: true
-  --log-validation-ppl-to-tensorboard: true
-  --log-timers-to-tensorboard: true
-  --log-memory-to-tensorboard: true
-  --timing-log-level: 0
-  --load: ${CHECKPOINT_LOAD_PATH}/model/mamba_hybrid_2b/dcp/mcore-v1_bf16/checkpoint
-  --tokenizer-model: ${CHECKPOINT_LOAD_PATH}/model/mamba_hybrid_2b/dcp/mcore-v1_bf16/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json
-  --tokenizer-type: TikTokenizer
-  --tiktoken-pattern: v2
-  --distributed-backend: nccl
-  --log-interval: 1
-  --transformer-impl: transformer_engine
-  --tensor-model-parallel-size: 1
-  --pipeline-model-parallel-size: 1
-  --expert-model-parallel-size: 1
-  --use-mcore-models: true
-  --is-hybrid-model: true
-  --model-provider: mamba
-  --init-method-std: 0.0198
-  --untie-embeddings-and-output-weights: true
-  --disable-bias-linear: true
-  --init-method-std: 0.014
-  --position-embedding-type: none
-  --num-layers: 50
-  --hidden-size: 2048
-  --ffn-hidden-size: 11264
-  --num-attention-heads: 16
-  --kv-channels: 128
-  --hybrid-override-pattern: M-M-M-M*-M-M-M-M*-M-M-M-M-M*-M-M-M-M-M*-M-M-M-M-M-
-  --spec: megatron.core.models.mamba.mamba_layer_specs mamba_stack_spec
-  --normalization: RMSNorm
-  --swiglu: true
-  --attention-dropout: 0.0
-  --hidden-dropout: 0.0
-  --seq-length: 4096
-  --max-position-embeddings: 4096
-  --micro-batch-size: 1
-  --ckpt-format: torch_dist
-  --ckpt-fully-parallel-save: true
-  --ckpt-fully-parallel-load: true
-  --ckpt-assume-constant-structure: true
-  --dist-ckpt-strictness: log_unexpected
-  --bf16: true
-  --attention-backend: flash
-  --no-create-attention-mask-in-dataloader: true
-  --num-workers: 8
-  --use-checkpoint-args: true
-  --no-use-tokenizer-model-from-checkpoint-args: true
-  --no-load-optim: true
-  --deterministic-mode: true
-  --save-interval: 2000
-  --temperature: 1.0
-  --top_k: 1
-  --return-log-probs: true
-  --num-tokens-to-generate: 30
-  --max-tokens-to-oom: 3600000
-  --inference-max-seq-length: 4096
-  --output-path: ${TENSORBOARD_PATH}
-  --prompts: "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies."
-  --incoming-requests-per-step: 32 
-  --inference-repeat-n: 3
-METRICS:
-  - "generated_tokens"
-  - "logprobs"
diff --git a/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgx_h100.json
index d9a60d1ae11..1a9705f8181 100644
--- a/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgx_h100.json
@@ -174,5 +174,5 @@
       -0.5394397377967834
     ]
   },
-  "throughput": [34.95064017365726, 34.95064017365726]
+  "throughput": [25.35687538450034, 25.35687538450034]
 }
diff --git a/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch/model_config.yaml
index e97dc0b56a4..0e1f9110793 100644
--- a/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch/model_config.yaml
@@ -80,7 +80,6 @@ MODEL_ARGS:
   --prompts: "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies."
   --incoming-requests-per-sec: -1
   --inference-repeat-n: 8
-  --inference-dynamic-batching-buffer-size-gb: 20
 METRICS:
   - "generated_tokens"
   - "logprobs"
diff --git a/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_tp4_pp1_ep4_16B_logitsmatch/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_tp4_pp1_ep4_16B_logitsmatch/model_config.yaml
index 6c119cc548b..1b9eaaf1f65 100644
--- a/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_tp4_pp1_ep4_16B_logitsmatch/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_tp4_pp1_ep4_16B_logitsmatch/model_config.yaml
@@ -76,7 +76,6 @@ MODEL_ARGS:
   --prompts: "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies."
   --incoming-requests-per-sec: -1 # all requests arrive up front.
   --inference-repeat-n: 8
-  --inference-dynamic-batching-buffer-size-gb: 20
 METRICS:
   - "generated_tokens"
   - "logprobs"
diff --git a/tests/test_utils/python_scripts/auto_reminder_github.py b/tests/test_utils/python_scripts/auto_reminder_github.py
index 7484244b717..df75ec0542c 100644
--- a/tests/test_utils/python_scripts/auto_reminder_github.py
+++ b/tests/test_utils/python_scripts/auto_reminder_github.py
@@ -58,42 +58,27 @@ def get_user_email(self, username: str):
 
         try:
             user = self.github.get_user(username)
-            public_email = None
 
             # 1. Try public profile email first
             if user.email and not user.email.endswith("@users.noreply.github.com"):
-                if user.email.endswith("@nvidia.com"):
-                    self.email_cache[username] = user.email
-                    return user.email
-                else:
-                    public_email = user.email
+                self.email_cache[username] = user.email
+                return user.email
 
             # 2. If no public email, check recent commits on the main repo
             try:
                 # Use get_commits(author=...) which is more direct than search_commits
                 for commit in self.repo.get_commits(author=user)[:10]:
                     email = commit.commit.author.email
-                    if (
-                        email
-                        and not email.endswith("@users.noreply.github.com")
-                        and email.endswith("@nvidia.com")
-                    ):
+                    if email and not email.endswith("@users.noreply.github.com"):
                         self.email_cache[username] = email
                         return email
-                    elif (
-                        email
-                        and not email.endswith("@users.noreply.github.com")
-                        and public_email is None
-                    ):
-                        public_email = email
             except Exception as e:
                 logger.debug(f"Could not check commits for {username}: {e}")
 
-            if public_email is None:
-                public_email = f"{username}@users.noreply.github.com"
-
-            self.email_cache[username] = public_email
-            return public_email
+            # 3. Fallback to public email (even if noreply) or a constructed noreply
+            email = user.email or f"{username}@users.noreply.github.com"
+            self.email_cache[username] = email
+            return email
 
         except Exception as e:
             logger.warning(f"Could not get user object for {username}: {e}")
diff --git a/tests/test_utils/recipes/gpt-dynamic-inference-with-coordinator.yaml b/tests/test_utils/recipes/gpt-dynamic-inference-with-coordinator.yaml
index e882d721860..1b4786e8230 100644
--- a/tests/test_utils/recipes/gpt-dynamic-inference-with-coordinator.yaml
+++ b/tests/test_utils/recipes/gpt-dynamic-inference-with-coordinator.yaml
@@ -39,7 +39,7 @@ spec:
     ARGUMENTS=(
         "CHECKPOINT_LOAD_PATH=/mnt/artifacts"
         "CHECKPOINT_SAVE_PATH=/tmp/checkpoints"
-        "DATA_PATH=/mnt/artifacts/"
+        "DATA_PATH=null"
         "DATA_CACHE_PATH=/workspace/data/cache"
         "TRAINING_SCRIPT_PATH=examples/inference/gpt/gpt_dynamic_inference_with_coordinator.py"
         "TRAINING_PARAMS_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml"
@@ -59,22 +59,8 @@ products:
       - environment: [dev]
         scope: [flaky]
         platforms: [dgx_h100]
-  - test_case: [gpt_dynamic_inference_tp1_pp8_dp1_583m_logitsmatch_zmq]
-    products:
-      - environment: [dev]
-        scope: [flaky]
-        platforms: [dgx_h100]
   - test_case: [gpt_dynamic_inference_tp1_pp1_dp8_583m_logitsmatch_zmq]
     products:
       - environment: [dev]
         scope: [mr]
         platforms: [dgx_h100]
-  - test_case: [gpt_dynamic_inference_tp1_pp1_dp8_583m_throughputtest_zmq]
-    products:
-      - environment: [dev]
-        scope: [mr]
-        platforms: [dgx_h100]
-  - test_case: [gpt_dynamic_inference_tp2_pp2_dp2_583m_logitsmatch_zmq]
-    products:
-      - environment: [dev]
-        scope: [flaky]
diff --git a/tests/test_utils/recipes/gpt.yaml b/tests/test_utils/recipes/gpt.yaml
index eae09a6e16a..0b068c55220 100644
--- a/tests/test_utils/recipes/gpt.yaml
+++ b/tests/test_utils/recipes/gpt.yaml
@@ -114,11 +114,6 @@ products:
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
-  - test_case: [gpt3_mcore_te_tp1_pp1_dist_optimizer_fim_dataset]
-    products:
-      - environment: [dev]
-        scope: [mr]
-        platforms: [dgx_h100]
   - test_case: [gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer]
     products:
       - environment: [dev]
diff --git a/tests/test_utils/recipes/mamba-dynamic-inference.yaml b/tests/test_utils/recipes/mamba-dynamic-inference.yaml
deleted file mode 100644
index 0d02ce29a54..00000000000
--- a/tests/test_utils/recipes/mamba-dynamic-inference.yaml
+++ /dev/null
@@ -1,61 +0,0 @@
-type: basic
-format_version: 1
-maintainers: [mcore]
-loggers: [stdout]
-spec:
-  name: '{test_case}_{environment}_{platforms}'
-  model: hybrid
-  build: mcore-pyt-{environment}
-  nodes: 1
-  gpus: 1
-  n_repeat: 1
-  platforms: dgx_a100
-  script_setup: |
-    unset https_proxy
-    echo "machine gitlab-master.nvidia.com login okoenig password $RO_API_TOKEN" | tee -a /root/.netrc
-
-    # Checkout latest
-    cd /opt
-    rm -rf /opt/megatron-lm; mkdir megatron-lm; cd megatron-lm
-    git init
-    git remote add origin $MCORE_REPO
-    git fetch origin '+refs/merge-requests/*:refs/remotes/merge-requests/*'
-    git fetch origin $MCORE_MR_COMMIT
-    git checkout $MCORE_MR_COMMIT
-    git rev-parse HEAD
-    # Checkout backwards-ref
-    cd /opt
-    rm -rf /opt/megatron-lm-legacy; mkdir megatron-lm-legacy; cd megatron-lm-legacy
-    git init
-    git remote add origin $MCORE_REPO
-    git fetch origin $MCORE_BACKWARDS_COMMIT
-    git checkout $MCORE_BACKWARDS_COMMIT
-    git rev-parse HEAD
-    rm -rf megatron; cp -a /opt/megatron-lm/megatron ./
-  script: |-
-    ls
-    cd /opt/megatron-lm
-
-    ARGUMENTS=(
-        "CHECKPOINT_LOAD_PATH=/mnt/artifacts"
-        "CHECKPOINT_SAVE_PATH=/tmp/checkpoints"
-        "DATA_PATH=null"
-        "DATA_CACHE_PATH=/workspace/data/cache"
-        "TRAINING_SCRIPT_PATH=examples/inference/gpt/gpt_dynamic_inference.py"
-        "TRAINING_PARAMS_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml"
-        "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_{platforms}.json"
-        "OUTPUT_PATH={assets_dir}"
-        "TENSORBOARD_PATH={assets_dir}/generations_{environment}_{platforms}.json"
-        "N_REPEAT={n_repeat}"
-        "ENABLE_LIGHTWEIGHT_MODE=${{ENABLE_LIGHTWEIGHT_MODE}}"
-        "RECORD_CHECKPOINTS=${{RECORD_CHECKPOINTS}}"
-    )
-
-    bash ./tests/functional_tests/shell_test_utils/run_ci_test.sh ${{ARGUMENTS[@]}}
-
-products:
-  - test_case: [hybrid_dynamic_inference_tp1_pp1_dp8_583m]
-    products:
-      - environment: [dev]
-        scope: [mr]
-        platforms: [dgx_h100]
diff --git a/tests/unit_tests/data/test_fim_dataset.py b/tests/unit_tests/data/test_fim_dataset.py
deleted file mode 100644
index 7022a4b5fa9..00000000000
--- a/tests/unit_tests/data/test_fim_dataset.py
+++ /dev/null
@@ -1,87 +0,0 @@
-# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
-
-import pytest
-import torch
-
-from megatron.core.datasets.blended_megatron_dataset_builder import BlendedMegatronDatasetBuilder
-from megatron.core.datasets.utils import compile_helpers, get_blend_from_list
-from megatron.core.tokenizers import MegatronTokenizer
-from megatron.training.datasets.fim_dataset import GPTFIMDataset, GPTFIMDatasetConfig
-from tests.unit_tests.test_utilities import Utils
-
-
-@pytest.mark.parametrize("spm_rate", [0.0, 1.0])
-@pytest.mark.parametrize("split_sample", [None, "python"])
-def test_fim_gpt_dataset(spm_rate, split_sample):
-    if torch.distributed.is_available():
-        Utils.initialize_distributed()
-        if torch.distributed.get_rank() == 0:
-            compile_helpers()
-        torch.distributed.barrier()
-    else:
-        compile_helpers()
-
-    tokenizer = MegatronTokenizer.from_pretrained(
-        tokenizer_path="/opt/data/tokenizers/huggingface",
-        metadata_path={"library": "huggingface"},
-        additional_special_tokens=["<prefix>", "<middle>", "<suffix>", "<pad>", "<eod>"],
-        include_special_tokens=True,
-    )
-    blend = get_blend_from_list(["/opt/data/datasets/fim/fim_text_document"])
-    extra_tokens = {
-        "prefix": "<prefix>",
-        "middle": "<middle>",
-        "suffix": "<suffix>",
-        "pad": "<pad>",
-        "eod": "<eod>",
-    }
-    seq_length = 32
-    rate = 1.0
-    fragment_rate = 1.0
-    config = GPTFIMDatasetConfig(
-        blend=blend,
-        random_seed=1234,
-        sequence_length=seq_length,
-        split="990,9,1",
-        tokenizer=tokenizer,
-        reset_position_ids=True,
-        reset_attention_mask=True,
-        eod_mask_loss=True,
-        fim_extra_tokens=extra_tokens,
-        fim_rate=rate,
-        fim_spm_rate=spm_rate,
-        fim_fragment_rate=fragment_rate,
-        fim_split_sample=split_sample,
-    )
-
-    datasets = BlendedMegatronDatasetBuilder(
-        GPTFIMDataset, [10, 10, 10], lambda: True, config
-    ).build()
-
-    prefix_id = tokenizer.tokenize("<prefix>")[1]
-    suffix_id = tokenizer.tokenize("<suffix>")[1]
-    middle_id = tokenizer.tokenize("<middle>")[1]
-
-    dataset = datasets[0]
-    assert dataset.fim_rate == rate
-    assert dataset.fim_spm_rate == spm_rate
-    assert dataset.fragment_fim_rate == fragment_rate
-
-    tokens = dataset[0]["tokens"].tolist()
-    if split_sample:
-        split_sample_id = tokenizer.tokenize(split_sample)[1]
-        split_sample_index = tokens.index(split_sample_id)
-        assert prefix_id == tokens[split_sample_index + 1]
-    if spm_rate == 0.0:
-        assert prefix_id == tokens[0]
-        assert suffix_id in tokens
-        assert middle_id in tokens
-        assert tokens.index(suffix_id) < tokens.index(middle_id)
-    else:
-        assert prefix_id == tokens[0]
-        assert suffix_id == tokens[1]
-        assert middle_id in tokens
-
-
-if __name__ == "__main__":
-    test_fim_gpt_dataset()
diff --git a/tests/unit_tests/inference/contexts/test_dynamic_context.py b/tests/unit_tests/inference/contexts/test_dynamic_context.py
index 1baf9034c9d..0674cdfcabd 100644
--- a/tests/unit_tests/inference/contexts/test_dynamic_context.py
+++ b/tests/unit_tests/inference/contexts/test_dynamic_context.py
@@ -5,9 +5,6 @@
 import pytest
 import torch
 
-from megatron.core.inference.contexts.attention_context.mamba_metadata import (
-    MambaInferenceStateConfig,
-)
 from megatron.core.inference.contexts.dynamic_context import (
     DynamicInferenceContext,
     RequestOverflowError,
@@ -31,8 +28,6 @@ class TestDynamicContext:
 
     def _setup_model_parallel_group(self, tensor_parallel_size, pipeline_parallel_size):
 
-        self.pp_size = pipeline_parallel_size
-
         Utils.initialize_model_parallel(
             tensor_model_parallel_size=tensor_parallel_size,
             pipeline_model_parallel_size=pipeline_parallel_size,
@@ -48,39 +43,38 @@ def _get_dynamic_context(
         max_sequence_length,
         buffer_size_gb,
         block_size_tokens,
-        max_tokens,
+        buffer_guaranteed_fraction,
+        buffer_overflow_factor,
+        max_requests_override,
+        max_tokens_override,
         is_hybrid_model=False,
         layer_type_list=None,
         rounder=64,
     ):
         set_rounder(rounder)
 
-        if is_hybrid_model:
-            if layer_type_list is None:
-                layer_type_list = [Symbols.MAMBA, Symbols.MLP, Symbols.ATTENTION, Symbols.MLP]
-            mamba_conv_states_shape = (544, 4)
-            mamba_ssm_states_shape = (8, 64, 16)
-            mamba_inference_state_config = MambaInferenceStateConfig(
-                layer_type_list, mamba_conv_states_shape, mamba_ssm_states_shape
-            )
-        else:
-            mamba_inference_state_config = None
+        if is_hybrid_model and layer_type_list is None:
+            layer_type_list = [Symbols.MAMBA, Symbols.MLP, Symbols.ATTENTION, Symbols.MLP]
 
         dynamic_context = DynamicInferenceContext(
             params_dtype=params_dtype,
-            num_layers=num_layers // self.pp_size,
+            num_layers=num_layers,
             kv_channels=kv_channels,
             num_attention_heads=num_attention_heads,
             max_sequence_length=max_sequence_length,
             num_cuda_graphs=None,
             use_cuda_graphs_for_non_decode_steps=not is_hybrid_model,
             buffer_size_gb=buffer_size_gb,
+            buffer_guaranteed_fraction=buffer_guaranteed_fraction,
             block_size_tokens=block_size_tokens,
-            max_tokens=max_tokens,
-            mamba_inference_state_config=mamba_inference_state_config,
+            buffer_overflow_factor=buffer_overflow_factor,
+            max_requests_override=max_requests_override,
+            max_tokens_override=max_tokens_override,
+            layer_type_list=layer_type_list,
+            mamba_conv_states_shape=(544, 4),
+            mamba_ssm_states_shape=(8, 64, 16),
             use_flashinfer_fused_rope=None,  # default to using flash-infer if available
             # this is for compatibility with the LTS environment
-            unified_memory_level=0,  # unit tests currently broken with UVM
         )
         return dynamic_context
 
@@ -99,25 +93,28 @@ def test_initialize_dynamic_context(self, is_hybrid_model: bool):
             num_attention_heads=2,
             max_sequence_length=512,
             buffer_size_gb=0.03,
+            buffer_guaranteed_fraction=0.1,
             block_size_tokens=128,
-            max_tokens=None,
+            max_requests_override=None,
+            max_tokens_override=None,
+            buffer_overflow_factor=None,
             is_hybrid_model=is_hybrid_model,
         )
 
         if not is_hybrid_model:
-            assert dynamic_context.block_allocator.total_count == 491
-            assert dynamic_context.block_allocator.active_count == 245
-            assert dynamic_context.max_total_requests == 490
-            assert dynamic_context.max_active_requests == 245
-            assert dynamic_context.max_tokens == 16384
+            assert dynamic_context.gtd_block_count == 48
+            assert dynamic_context.gtd_request_count == 12
+            assert dynamic_context.block_allocator.block_count_total == 491
+            assert dynamic_context.max_requests == 128
+            assert dynamic_context.max_tokens == 62848
             assert dynamic_context.num_mamba_layers == 0
             assert dynamic_context.mamba_metadata is None
         else:
-            assert dynamic_context.block_allocator.total_count == 555
-            assert dynamic_context.block_allocator.active_count == 277
-            assert dynamic_context.max_total_requests == 554
-            assert dynamic_context.max_active_requests == 277
-            assert dynamic_context.max_tokens == 16384
+            assert dynamic_context.gtd_block_count == 112
+            assert dynamic_context.gtd_request_count == 28
+            assert dynamic_context.block_allocator.block_count_total == 1156
+            assert dynamic_context.max_requests == 320
+            assert dynamic_context.max_tokens == 154176
             assert dynamic_context.num_mamba_layers == 1
             assert dynamic_context.mamba_metadata is not None
 
@@ -134,8 +131,11 @@ def test_is_static_batching(self):
             num_attention_heads=8,
             max_sequence_length=512,
             buffer_size_gb=1.0,
+            buffer_guaranteed_fraction=0.1,
             block_size_tokens=128,
-            max_tokens=None,
+            max_requests_override=None,
+            max_tokens_override=None,
+            buffer_overflow_factor=None,
         )
         assert not dynamic_context.is_static_batching()
 
@@ -150,18 +150,26 @@ def test_is_memory_available(self, is_hybrid_model):
             num_attention_heads=8,
             max_sequence_length=512,
             buffer_size_gb=1.0,
+            buffer_guaranteed_fraction=0.1,
             block_size_tokens=128,
-            max_tokens=None,
+            max_requests_override=None,
+            max_tokens_override=None,
+            buffer_overflow_factor=None,
             is_hybrid_model=is_hybrid_model,
         )
-        dynamic_context.block_allocator.active_count = 10
+        dynamic_context.block_allocator.block_count_avail = 10
         assert dynamic_context.block_allocator.is_memory_available(10)
         assert not dynamic_context.block_allocator.is_memory_available(11)
 
         assert dynamic_context.block_allocator.is_memory_available(1)
-        dynamic_context.block_allocator.active_count = 0
+        dynamic_context.block_allocator.block_count_avail = 0
         assert not dynamic_context.block_allocator.is_memory_available(1)
 
+        dynamic_context.block_allocator.block_count_avail = 10
+        dynamic_context.gtd_block_count = 5
+        assert dynamic_context.block_allocator.is_memory_available(6)
+        assert not dynamic_context.block_allocator.is_memory_available(6, safe=True)
+
     @pytest.mark.internal
     @pytest.mark.parametrize("is_hybrid_model", [False, True])
     def test_request_overflow(self, is_hybrid_model: bool):
@@ -174,14 +182,16 @@ def test_request_overflow(self, is_hybrid_model: bool):
             num_attention_heads=8,
             max_sequence_length=128,
             buffer_size_gb=0.01,
+            buffer_guaranteed_fraction=0.1,
             block_size_tokens=32,
-            max_tokens=None,
+            max_requests_override=None,
+            max_tokens_override=None,
+            buffer_overflow_factor=None,
             rounder=1,
             is_hybrid_model=is_hybrid_model,
         )
-        dynamic_context.max_active_requests //= 2
         with pytest.raises(RequestOverflowError):
-            for i in range(dynamic_context.max_active_requests + 1):
+            for i in range(dynamic_context.max_requests + 1):
                 dynamic_context.add_request(
                     DynamicInferenceRequest(
                         request_id=i,
@@ -204,8 +214,11 @@ def test_token_overflow_error(self, is_hybrid_model: bool):
             num_attention_heads=8,
             max_sequence_length=512,
             buffer_size_gb=0.1,
+            buffer_guaranteed_fraction=0.1,
             block_size_tokens=128,
-            max_tokens=200,  # setting low, but >= context.max_active_requests.
+            buffer_overflow_factor=1.0,
+            max_requests_override=2,
+            max_tokens_override=20,  # Setting a very low token limit
             rounder=1,
             is_hybrid_model=is_hybrid_model,
         )
@@ -214,7 +227,7 @@ def test_token_overflow_error(self, is_hybrid_model: bool):
             dynamic_context.add_request(
                 DynamicInferenceRequest(
                     request_id=1,
-                    prompt_tokens=torch.arange(0, 225, device='cuda'),
+                    prompt_tokens=torch.arange(0, 25, device='cuda'),
                     sampling_params=SamplingParams(
                         num_tokens_to_generate=dynamic_context.max_tokens - 25
                     ),
@@ -233,8 +246,11 @@ def test_reset(self, is_hybrid_model: bool):
             num_attention_heads=8,
             max_sequence_length=128,
             buffer_size_gb=1.0,
+            buffer_guaranteed_fraction=0.1,
             block_size_tokens=128,
-            max_tokens=None,
+            max_requests_override=None,
+            max_tokens_override=None,
+            buffer_overflow_factor=None,
             is_hybrid_model=is_hybrid_model,
         )
 
@@ -257,6 +273,7 @@ def test_reset(self, is_hybrid_model: bool):
         dynamic_context.token_to_position_in_request.fill_(1)
         dynamic_context.token_to_block_idx.fill_(1)
         dynamic_context.token_to_local_position_within_kv_block.fill_(1)
+        dynamic_context.block_allocator.block_count_avail = 5
         dynamic_context.memory_buffer.fill_(1)
         dynamic_context.request_to_kv_block_ids.fill_(1)
         if is_hybrid_model:
@@ -286,8 +303,8 @@ def test_reset(self, is_hybrid_model: bool):
         assert torch.all(dynamic_context.token_to_block_idx == -1)
         assert torch.all(dynamic_context.token_to_local_position_within_kv_block == 0)
         assert (
-            dynamic_context.block_allocator.active_count
-            == dynamic_context.block_allocator.total_count // 2
+            dynamic_context.block_allocator.block_count_avail
+            == dynamic_context.block_allocator.block_count_total - 1
         )
         assert torch.all(dynamic_context.request_to_kv_block_ids == -1)
         if is_hybrid_model:
@@ -306,13 +323,16 @@ def test_allocate_and_release_memory_blocks(self, is_hybrid_model):
             num_attention_heads=2,
             max_sequence_length=512,
             buffer_size_gb=0.03,
+            buffer_guaranteed_fraction=0.1,
             block_size_tokens=128,
-            max_tokens=None,
+            max_requests_override=None,
+            max_tokens_override=None,
+            buffer_overflow_factor=None,
             is_hybrid_model=is_hybrid_model,
         )
 
         if is_hybrid_model:
-            expected_memory_blocks = [550, 551, 552, 553]
+            expected_memory_blocks = [1151, 1152, 1153, 1154]
         else:
             expected_memory_blocks = [486, 487, 488, 489]
         expected_block_count_avail = expected_memory_blocks[0]
@@ -325,20 +345,20 @@ def test_allocate_and_release_memory_blocks(self, is_hybrid_model):
             .tolist()
             == expected_memory_blocks
         )
-        assert dynamic_context.block_allocator.total_avail == expected_block_count_avail
+        assert dynamic_context.block_allocator.block_count_avail == expected_block_count_avail
         dynamic_context.block_allocator.release_memory_blocks(
             torch.tensor(expected_memory_blocks[-2:], device='cuda')
         )
-        assert dynamic_context.block_allocator.total_avail == expected_block_count_avail + 2
+        assert dynamic_context.block_allocator.block_count_avail == expected_block_count_avail + 2
         assert (
             dynamic_context.block_allocator.allocate_memory_blocks(1).item()
             == expected_memory_blocks[-1]
         )
-        assert dynamic_context.block_allocator.total_avail == expected_block_count_avail + 1
+        assert dynamic_context.block_allocator.block_count_avail == expected_block_count_avail + 1
         # Should return None since we allocate more blocks than what we have.
         assert (
             dynamic_context.block_allocator.allocate_memory_blocks(
-                dynamic_context.block_allocator.total_avail + 100
+                dynamic_context.block_allocator.block_count_avail + 100
             )
             == None
         )
@@ -355,8 +375,11 @@ def test_add_request(self, is_hybrid_model: bool):
             num_attention_heads=2,
             max_sequence_length=512,
             buffer_size_gb=0.03,
+            buffer_guaranteed_fraction=0.1,
             block_size_tokens=128,
-            max_tokens=None,
+            max_requests_override=None,
+            max_tokens_override=None,
+            buffer_overflow_factor=None,
             is_hybrid_model=is_hybrid_model,
         )
         assert dynamic_context.block_size_tokens == 128
@@ -378,7 +401,7 @@ def test_add_request(self, is_hybrid_model: bool):
         assert dynamic_context.request_kv_length_offsets[0] == 0
         assert dynamic_context.request_kv_block_counts[0] == 2
         assert dynamic_context.request_last_kv_block_id[0].item() == (
-            553 if is_hybrid_model else 489
+            1154 if is_hybrid_model else 489
         )
         assert dynamic_context.request_last_kv_block_offset[0].item() == 15
         assert torch.all(
@@ -428,8 +451,11 @@ def test_update_request(self, is_hybrid_model: bool):
             num_attention_heads=2,
             max_sequence_length=512,
             buffer_size_gb=0.03,
+            buffer_guaranteed_fraction=0.1,
             block_size_tokens=128,
-            max_tokens=None,
+            max_requests_override=None,
+            max_tokens_override=None,
+            buffer_overflow_factor=None,
             is_hybrid_model=is_hybrid_model,
         )
 
@@ -438,7 +464,7 @@ def test_update_request(self, is_hybrid_model: bool):
         dynamic_context.paused_request_count = 0
         dynamic_context.total_request_count = 3
         dynamic_context.request_kv_block_counts[0:3] = 1
-        new_block_ids = dynamic_context.block_allocator.allocate_memory_blocks(3)
+        new_block_ids = dynamic_context.block_allocator.allocate_memory_blocks(3, safe=True)
         dynamic_context.request_to_kv_block_ids[0:3, 0] = new_block_ids
 
         if is_hybrid_model:
@@ -472,8 +498,11 @@ def test_update_request(self, is_hybrid_model: bool):
             num_attention_heads=2,
             max_sequence_length=512,
             buffer_size_gb=0.03,
+            buffer_guaranteed_fraction=0.1,
             block_size_tokens=128,
-            max_tokens=None,
+            max_requests_override=None,
+            max_tokens_override=None,
+            buffer_overflow_factor=None,
             is_hybrid_model=is_hybrid_model,
         )
 
@@ -491,16 +520,18 @@ def test_update_request(self, is_hybrid_model: bool):
             )
 
         total_request_count = 10
-        dynamic_context.block_allocator.total_avail -= 11  # We align 11 blocks to the 10 requests we have. 3rd request alone we setup like it requires 2 blocks
+        dynamic_context.block_allocator.block_count_avail -= 11  # We align 11 blocks to the 10 requests we have. 3rd request alone we setup like it requires 2 blocks
         dynamic_context.total_request_count = total_request_count
 
         dynamic_context.request_to_kv_block_ids[0:total_request_count, 0] = torch.arange(
-            dynamic_context.block_allocator.total_avail,
-            dynamic_context.block_allocator.total_avail + 10,
+            dynamic_context.block_allocator.block_count_avail,
+            dynamic_context.block_allocator.block_count_avail + 10,
         )
         dynamic_context.request_to_kv_block_ids[3][
             1
-        ] = dynamic_context.block_allocator.total_avail  # Assign one extra block  to request 3.
+        ] = (
+            dynamic_context.block_allocator.block_count_avail
+        )  # Assign one extra block  to request 3.
         dynamic_context.request_kv_length_offsets[0:total_request_count] = 10
         # For 0, 1, 5, 6, the total number of tokens in last block is block size -1, so that they will all need extra blocks
         dynamic_context.request_kv_length_offsets[0:2] = dynamic_context.block_size_tokens - 1
@@ -586,13 +617,13 @@ def test_update_request(self, is_hybrid_model: bool):
                 dynamic_context.request_to_kv_block_ids[0:10].cpu()
                 == torch.tensor(
                     [
-                        [543, 546, -1, -1],
-                        [544, 543, -1, -1],
-                        [548, 550, -1, -1],
-                        [549, 551, -1, -1],
-                        [547, -1, -1, -1],
-                        [545, -1, -1, -1],
-                        [552, -1, -1, -1],
+                        [1144, 1147, -1, -1],
+                        [1145, 1144, -1, -1],
+                        [1149, 1151, -1, -1],
+                        [1150, 1152, -1, -1],
+                        [1148, -1, -1, -1],
+                        [1146, -1, -1, -1],
+                        [1153, -1, -1, -1],
                         [-1, -1, -1, -1],
                         [-1, -1, -1, -1],
                         [-1, -1, -1, -1],
@@ -631,19 +662,22 @@ def test_release_memory_blocks_for_finished_requests(self, is_hybrid_model):
             num_attention_heads=2,
             max_sequence_length=512,
             buffer_size_gb=0.03,
+            buffer_guaranteed_fraction=0.1,
             block_size_tokens=128,
-            max_tokens=None,
+            max_requests_override=None,
+            max_tokens_override=None,
+            buffer_overflow_factor=None,
             is_hybrid_model=is_hybrid_model,
         )
 
         # Set up the initial state with 5 requests
         # Allocate 5 blocks for 5 requests
-        initial_blocks = dynamic_context.block_allocator.allocate_memory_blocks(5)
+        initial_blocks = dynamic_context.block_allocator.allocate_memory_blocks(5, safe=True)
         dynamic_context.total_request_count = 5
         dynamic_context.paused_request_count = 0
 
         # Record the available blocks before releasing memory
-        initial_available_blocks = dynamic_context.block_allocator.total_avail
+        initial_available_blocks = dynamic_context.block_allocator.block_count_avail
 
         # Assign blocks to the requests (one block per request)
         for i in range(5):
@@ -674,7 +708,7 @@ def test_release_memory_blocks_for_finished_requests(self, is_hybrid_model):
         assert dynamic_context.active_token_count == 2
 
         # Verify that 3 blocks were released by checking the available blocks
-        assert dynamic_context.block_allocator.total_avail == initial_available_blocks + 3
+        assert dynamic_context.block_allocator.block_count_avail == initial_available_blocks + 3
 
         if is_hybrid_model:
             # Request at position 3 now moves into finished request position 0
@@ -703,19 +737,22 @@ def test_finished_requests_with_multiple_blocks(self, is_hybrid_model):
             num_attention_heads=2,
             max_sequence_length=512,
             buffer_size_gb=0.03,
+            buffer_guaranteed_fraction=0.1,
             block_size_tokens=128,
-            max_tokens=None,
+            max_requests_override=None,
+            max_tokens_override=None,
+            buffer_overflow_factor=None,
             is_hybrid_model=is_hybrid_model,
         )
 
         # Set up the initial state with 3 requests, where some use multiple blocks
         # Allocate 6 blocks in total for the requests
-        initial_blocks = dynamic_context.block_allocator.allocate_memory_blocks(6)
+        initial_blocks = dynamic_context.block_allocator.allocate_memory_blocks(6, safe=True)
         dynamic_context.total_request_count = 3
         dynamic_context.paused_request_count = 0
 
         # Record the available blocks before releasing memory
-        initial_available_blocks = dynamic_context.block_allocator.total_avail
+        initial_available_blocks = dynamic_context.block_allocator.block_count_avail
 
         # Assign blocks to the requests:
         # - Request 0: 1 block
@@ -755,7 +792,7 @@ def test_finished_requests_with_multiple_blocks(self, is_hybrid_model):
         assert dynamic_context.active_token_count == 0
 
         # Verify that all 6 blocks were released by checking the available blocks
-        assert dynamic_context.block_allocator.total_avail == initial_available_blocks + 6
+        assert dynamic_context.block_allocator.block_count_avail == initial_available_blocks + 6
 
         if is_hybrid_model:
             # All mamba states should be zeroed out
@@ -776,8 +813,11 @@ def test_mamba_states_cache(self, is_hybrid_model: bool):
                 num_attention_heads=2,
                 max_sequence_length=512,
                 buffer_size_gb=0.03,
+                buffer_guaranteed_fraction=0.1,
                 block_size_tokens=128,
-                max_tokens=None,
+                max_requests_override=None,
+                max_tokens_override=None,
+                buffer_overflow_factor=None,
                 is_hybrid_model=False,
             )
             with pytest.raises(AssertionError) as error:
@@ -791,8 +831,11 @@ def test_mamba_states_cache(self, is_hybrid_model: bool):
             num_attention_heads=2,
             max_sequence_length=512,
             buffer_size_gb=0.03,
+            buffer_guaranteed_fraction=0.1,
             block_size_tokens=128,
-            max_tokens=None,
+            max_requests_override=None,
+            max_tokens_override=None,
+            buffer_overflow_factor=None,
             is_hybrid_model=is_hybrid_model,
             layer_type_list=[Symbols.MAMBA, Symbols.ATTENTION, Symbols.MAMBA, Symbols.ATTENTION],
         )
@@ -847,8 +890,11 @@ def test_calculate_and_store_log_probs(self):
             num_attention_heads=2,
             max_sequence_length=512,
             buffer_size_gb=0.03,
+            buffer_guaranteed_fraction=0.1,
             block_size_tokens=128,
-            max_tokens=None,
+            max_requests_override=None,
+            max_tokens_override=None,
+            buffer_overflow_factor=None,
         )
 
         # Add a few requests to the context
@@ -1051,3 +1097,56 @@ def test_calculate_and_store_log_probs(self):
                 )
 
                 current_global_token_offset += expected_len
+
+    @pytest.mark.internal
+    def test_unified_memory(self):
+
+        from megatron.core.inference.unified_memory import (
+            UnifiedMemoryUnsupportedError,
+            create_unified_mempool,
+        )
+
+        # Check UVM support.
+        try:
+            create_unified_mempool()
+        except UnifiedMemoryUnsupportedError:
+            pytest.skip("Unified memory not available due to bad environment.")
+
+        # Setup.
+        self._setup_model_parallel_group(1, 1)
+
+        # Compute number of contexts needed to fill GPU memory.
+        gpu_size_gb = (
+            torch.cuda.get_device_properties(torch.cuda.current_device()).total_memory / 1024**3
+        )
+        buffer_size_gb = 20
+        num_contexts = math.ceil(gpu_size_gb / buffer_size_gb) + 1
+
+        # Allocate enough contexts to fill GPU memory.
+        def init_contexts(*, unified_memory_level):
+            contexts = []
+            for i in range(num_contexts):
+                contexts.append(
+                    DynamicInferenceContext(
+                        params_dtype=torch.float32,
+                        num_layers=4,
+                        kv_channels=8,
+                        num_attention_heads=2,
+                        max_sequence_length=512,
+                        buffer_size_gb=buffer_size_gb,
+                        buffer_overflow_factor=1,
+                        buffer_guaranteed_fraction=0,
+                        unified_memory_level=unified_memory_level,
+                    )
+                )
+
+        # Pure GPU memory test should OOM.
+        try:
+            init_contexts(unified_memory_level=0)
+        except torch.OutOfMemoryError:
+            pass
+        else:
+            raise Exception("expected OOM.")
+
+        # Unified memory test should succeed.
+        init_contexts(unified_memory_level=1)
diff --git a/tests/unit_tests/inference/engines/test_dynamic_engine.py b/tests/unit_tests/inference/engines/test_dynamic_engine.py
index 174bf89350b..0ac4b296746 100644
--- a/tests/unit_tests/inference/engines/test_dynamic_engine.py
+++ b/tests/unit_tests/inference/engines/test_dynamic_engine.py
@@ -1,10 +1,9 @@
 # Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 import asyncio
-import math
 import random
 import types
-from dataclasses import dataclass, field
+from dataclasses import dataclass
 from typing import Dict, List, Optional, Tuple
 
 import pytest
@@ -13,9 +12,6 @@
 from transformer_engine.pytorch.fp8 import check_fp8_support
 
 from megatron.core import parallel_state
-from megatron.core.inference.contexts.attention_context.mamba_metadata import (
-    MambaInferenceStateConfig,
-)
 from megatron.core.inference.contexts.dynamic_context import (
     ActiveRequestCountOverflowError,
     BlockOverflowError,
@@ -38,7 +34,6 @@
 )
 from megatron.core.models.gpt.gpt_layer_specs import (
     get_gpt_layer_local_spec,
-    get_gpt_layer_with_inference_spec,
     get_gpt_layer_with_transformer_engine_spec,
 )
 from megatron.core.models.gpt.gpt_model import GPTModel
@@ -49,7 +44,7 @@
 from megatron.core.transformer.transformer_config import TransformerConfig
 from megatron.core.utils import (
     check_mamba_sequence_packing_support,
-    get_mamba_inference_state_config_from_model,
+    get_attr_wrapped_model,
     is_fa_min_version,
     is_te_min_version,
 )
@@ -91,7 +86,10 @@ class DynamicEngineTestConfig:
 
     context_buffer_size_gb: float = 0.1  # enough room for all tokens.
     context_block_size_tokens: int = 256
-    context_max_tokens: Optional[int] = None
+    context_buffer_guaranteed_fraction: float = 0.01
+    context_buffer_overflow_factor: Optional[float] = None
+    context_max_requests_override: Optional[int] = None
+    context_max_tokens_override: Optional[int] = None
     tensor_model_parallel_size: int = 1
     pipeline_model_parallel_size: int = 1
     expert_model_parallel_size: int = 1
@@ -107,14 +105,12 @@ class DynamicEngineTestConfig:
     skip_prompt_log_probs: bool = False
     cuda_graph_scope: List[str] = None
     force_build_cuda_graphs: bool = False
-    transformer_impl: str = "local"
     # If False, do not build cuda graphs in the tests, even if
     # num_cuda_graphs is set.
     # For tests concerning cuda-graph warmups, we set this to False
     # to avoid the overhead of building the graphs, which is not
     # relevant to the test. The tests only check if the required
     # context attributes are set correctly.
-    suspend_resume_interval: Optional[int] = None
 
     fp8: bool = False
 
@@ -129,6 +125,17 @@ def __post_init__(self):
             assert self.num_tokens_total is not None
             self.max_sequence_length = self.num_tokens_total
 
+        # Update overrides if not using overflow factor.
+        if self.context_buffer_overflow_factor is None:
+
+            # Enough room for all requests.
+            if self.context_max_requests_override is None:
+                self.context_max_requests_override = self.num_requests
+
+            # Enough room for all tokens.
+            if self.context_max_tokens_override is None:
+                self.context_max_tokens_override = self.num_requests * self.max_sequence_length
+
         if self.cuda_graph_scope is None:
             self.cuda_graph_scope = ["full_iteration"]
 
@@ -140,9 +147,6 @@ class DynamicEngineTestEnv:
     config: DynamicEngineTestConfig
     requests: List[DynamicInferenceRequest]
     engine: DynamicInferenceEngine
-    mem_usage: dict = field(
-        default_factory=lambda: {"start": None, "end": None, "suspend_resume": {}}
-    )
 
 
 class TestDynamicInferenceEngine:
@@ -211,29 +215,34 @@ def _build_inference_context(
         test_config: DynamicEngineTestConfig,
         transformer_config: TransformerConfig,
         requests: List[DynamicInferenceRequest],
-        mamba_inference_state_config: Optional[MambaInferenceStateConfig] = None,
+        layer_type_list: Optional[List[str]],
+        mamba_conv_states_shape: Optional[Tuple[int]] = None,
+        mamba_ssm_states_shape: Optional[Tuple[int]] = None,
     ):
         """The inference context manages the KV cache and other inference state."""
 
         # Inference context.
         context = DynamicInferenceContext(
             params_dtype=transformer_config.params_dtype,
-            num_layers=transformer_config.num_layers
-            // transformer_config.pipeline_model_parallel_size,
+            num_layers=transformer_config.num_layers,
             kv_channels=transformer_config.kv_channels,
             num_attention_heads=transformer_config.num_query_groups,
             max_sequence_length=test_config.max_sequence_length,
             num_cuda_graphs=test_config.num_cuda_graphs,
             use_cuda_graphs_for_non_decode_steps=not test_config.model_provider == "mamba",
             buffer_size_gb=test_config.context_buffer_size_gb,
+            buffer_guaranteed_fraction=test_config.context_buffer_guaranteed_fraction,
             block_size_tokens=test_config.context_block_size_tokens,
-            max_tokens=test_config.context_max_tokens,
+            buffer_overflow_factor=test_config.context_buffer_overflow_factor,
+            max_requests_override=test_config.context_max_requests_override,
+            max_tokens_override=test_config.context_max_tokens_override,
             tensor_model_parallel_size=transformer_config.tensor_model_parallel_size,
-            mamba_inference_state_config=mamba_inference_state_config,
+            layer_type_list=layer_type_list,
+            mamba_conv_states_shape=mamba_conv_states_shape,
+            mamba_ssm_states_shape=mamba_ssm_states_shape,
             materialize_only_last_token_logits=test_config.materialize_only_last_token_logits,
             use_flashinfer_fused_rope=None,  # default to using flash-infer if available
             # this is for compatibility with the LTS environment
-            unified_memory_level=0,  # unit tests currently broken with UVM
         )
 
         return context
@@ -286,26 +295,16 @@ def _build_test_env(cls, test_config):
                 ),
                 sequence_parallel=test_config.sequence_parallel,
                 pipeline_dtype=torch.bfloat16,
-                add_bias_linear=test_config.expert_model_parallel_size == 1
-                and not (test_config.transformer_impl == "inference_optimized"),
+                add_bias_linear=test_config.expert_model_parallel_size == 1,
                 fp8="hybrid" if test_config.fp8 else None,
                 fp8_recipe="tensorwise" if test_config.fp8 else None,
                 inference_sampling_seed=test_config.random_seed,
                 cuda_graph_scope=test_config.cuda_graph_scope,
-                transformer_impl=test_config.transformer_impl,
-                normalization=(
-                    "RMSNorm"
-                    if test_config.transformer_impl == "inference_optimized"
-                    else "LayerNorm"
-                ),
-                # inference optimized currently only supports RMS Norm
             )
-            if test_config.fp8 or test_config.transformer_impl == "transformer_engine":
+            if test_config.fp8:
                 layer_spec = get_gpt_layer_with_transformer_engine_spec()
-            elif test_config.transformer_impl == "local":
+            else:
                 layer_spec = get_gpt_layer_local_spec()
-            elif test_config.transformer_impl == "inference_optimized":
-                layer_spec = get_gpt_layer_with_inference_spec()
 
             # GPT model.
             model = GPTModel(
@@ -318,13 +317,10 @@ def _build_test_env(cls, test_config):
                 post_process=parallel_state.is_pipeline_last_stage(),
             ).cuda()
         elif test_config.model_provider == "mamba":
-            pp_size = test_config.pipeline_model_parallel_size
             # Transformer config.
             transformer_config = TransformerConfig(
                 params_dtype=torch.bfloat16,
-                num_layers=(
-                    3 if pp_size == 1 else 6
-                ),  # 1 Mamba layer, 1 attention layer, 1 MLP layer
+                num_layers=3,  # 1 Mamba layer, 1 attention layer, 1 MLP layer
                 hidden_size=256,  # The Mamba layer places several constraints on this
                 mamba_num_heads=16,
                 num_attention_heads=16,
@@ -337,7 +333,7 @@ def _build_test_env(cls, test_config):
                 ),
                 inference_rng_tracker=True,
                 tensor_model_parallel_size=test_config.tensor_model_parallel_size,
-                pipeline_model_parallel_size=pp_size,
+                pipeline_model_parallel_size=test_config.pipeline_model_parallel_size,
                 expert_model_parallel_size=test_config.expert_model_parallel_size,
                 num_moe_experts=(
                     None
@@ -350,7 +346,6 @@ def _build_test_env(cls, test_config):
                 fp8="hybrid" if test_config.fp8 else None,
                 fp8_recipe="tensorwise" if test_config.fp8 else None,
                 cuda_graph_scope=test_config.cuda_graph_scope,
-                is_hybrid_model=True,  # Needs to be set for correct out_proj init
             )
 
             # Mamba model.
@@ -373,7 +368,22 @@ def _build_test_env(cls, test_config):
 
         model.eval()
 
-        mamba_inference_state_config = get_mamba_inference_state_config_from_model(model)
+        # Layer type list for hybrid models
+        decoder = get_attr_wrapped_model(model, "decoder")
+        layer_type_list = getattr(decoder, "layer_type_list", None)
+        if test_config.model_provider == "mamba":
+            mamba_states_shapes = decoder.mamba_state_shapes_per_request()
+            if mamba_states_shapes is not None:
+                (mamba_conv_states_shape, mamba_ssm_states_shape) = mamba_states_shapes
+            else:
+                # A `MambaBlock` can only not have a `MambaLayer` if using pipeline parallelism
+                # and a particular pipeline stage was not assigned a `MambaLayer`.
+                assert test_config.pipeline_model_parallel_size > 1
+                mamba_conv_states_shape = None
+                mamba_ssm_states_shape = None
+        else:
+            mamba_conv_states_shape = None
+            mamba_ssm_states_shape = None
 
         # Inference config.
         inference_config = InferenceWrapperConfig(
@@ -390,7 +400,9 @@ def _build_test_env(cls, test_config):
             test_config=test_config,
             transformer_config=transformer_config,
             requests=requests,
-            mamba_inference_state_config=mamba_inference_state_config,
+            layer_type_list=layer_type_list,
+            mamba_conv_states_shape=mamba_conv_states_shape,
+            mamba_ssm_states_shape=mamba_ssm_states_shape,
         )
 
         # Inference model wrapper.
@@ -404,9 +416,7 @@ def _build_test_env(cls, test_config):
         # Text generation controller.
         text_generation_controller = TextGenerationController(
             inference_wrapped_model=inference_wrapped_model,
-            tokenizer=types.SimpleNamespace(
-                vocab_size=test_config.vocab_size, detokenize=lambda tokens: "tokenized_prompt"
-            ),
+            tokenizer=types.SimpleNamespace(vocab_size=test_config.vocab_size),
         )
 
         # Reset global cuda graph state.
@@ -425,6 +435,12 @@ def _build_test_env(cls, test_config):
         # Test env.
         env = DynamicEngineTestEnv(config=test_config, requests=requests, engine=engine)
 
+        # Mock the detokenize method to return predictable result
+        def mock_detokenize_prompt(tokens):
+            return "tokenized_prompt"
+
+        env.engine.controller.tokenizer.detokenize = mock_detokenize_prompt
+
         return env
 
     @classmethod
@@ -437,31 +453,7 @@ def _run_step(cls, env):
         # and engine.async_step() doesn't use this sampling param's
         # num_tokens_to_generate.
         result = env.engine.step_modern(verbose=False)
-
-        # Suspend + resume.
-        if (
-            env.config.suspend_resume_interval is not None
-            and env.engine.step_count % env.config.suspend_resume_interval == 0
-        ):
-            suspend_resume_mems = {}
-            suspend_resume_mems["start"] = torch.cuda.memory_stats()
-            env.engine.suspend()  # suspend.
-            suspend_resume_mems["mid"] = torch.cuda.memory_stats()
-            env.engine.resume()  # resume.
-            suspend_resume_mems["end"] = torch.cuda.memory_stats()
-            env.mem_usage["suspend_resume"][env.engine.step_count] = suspend_resume_mems
-
-        # Nothing done?
-        finished_request_records = result["finished_request_records"]
-        if len(finished_request_records) == 0:
-            return
-
-        # Append output tokens.
-        for finished_request_record in finished_request_records:
-            finished_request = finished_request_record.merge(env.engine.controller.tokenizer)
-            request = env.requests[finished_request.request_id]
-            request.output = finished_request.generated_tokens
-            request.status = finished_request.status
+        finished_requests = result["finished_requests"]
 
     @classmethod
     @torch.inference_mode()
@@ -471,12 +463,10 @@ def _run_test(cls, **test_config_kwargs):
         env = cls._build_test_env(test_config)
 
         # Add requests to engine.
-        env.mem_usage["start"] = torch.cuda.memory_stats()
         for request in tqdm(env.requests, "add requests"):
 
             # Add request.
             env.engine._add_request(request)
-            request.state = "pending"
 
             # Insert gap steps between adding requests.
             for _ in range(test_config.num_gap_steps):
@@ -503,20 +493,14 @@ def _run_test(cls, **test_config_kwargs):
                 if num_tokens_total is None
                 else num_tokens_total - len(request.prompt_tokens)
             )
-
-            # Validate the output length only if suspend_resume_interval is None.
-            # If it is not None, then the output length could be anything in the
-            # range [1, num_tokens_to_generate].
-            if test_config.suspend_resume_interval is None:
-                assert (
-                    (num_tokens_to_generate is None and num_tokens_total is None)
-                    or len(request.generated_tokens) <= num_tokens_expected
-                    or request.status == Status.FAILED
-                ), (
-                    f"Request {request.request_id} expected to generate {num_tokens_to_generate} "
-                    f"tokens but generated {len(request.generated_tokens)}"
-                )
-        env.mem_usage["end"] = torch.cuda.memory_stats()
+            assert (
+                (num_tokens_to_generate is None and num_tokens_total is None)
+                or len(request.generated_tokens) == num_tokens_expected
+                or request.status == Status.FAILED
+            ), (
+                f"Request {request.request_id} expected to generate {num_tokens_to_generate} "
+                f"tokens but generated {len(request.generated_tokens)}"
+            )
 
         return env
 
@@ -534,40 +518,40 @@ def teardown_method(self, method):
     def test_simple(self, model_provider, num_cuda_graphs, cuda_graph_scope) -> None:
         """Simple test that runs without errors, and validates output."""
         skip_if_mamba_sequence_packing_not_available(model_provider)
-        num_tokens_to_generate = 16
 
         # Run test.
         env = self._run_test(
-            num_tokens_to_generate=num_tokens_to_generate,
             model_provider=model_provider,
             num_cuda_graphs=num_cuda_graphs,
+            context_max_requests_override=32,
             cuda_graph_scope=cuda_graph_scope,
             force_build_cuda_graphs=True,
         )
 
         # Validate max_requests, max_tokens.
-        assert env.engine.context.max_tokens == DynamicInferenceContext.DEFAULT_MAX_TOKENS
+        assert env.engine.context.max_requests == 32
+        assert env.engine.context.max_tokens == 160
 
-        # Validate generated tokens.
+        # Validate output tokens.
         gpt_expected_generated_tokens = [
-            [69, 85, 55, 74, 56, 89, 64, 59, 55, 67, 15, 58, 6, 37, 54, 47],
-            [29, 54, 33, 72, 45, 76, 41, 56, 28, 25, 17, 2, 61, 6, 98, 76],
-            [35, 78, 54, 16, 79, 98, 22, 5, 60, 0, 1, 76, 77, 11, 25, 7],
-            [25, 75, 57, 85, 81, 37, 88, 17, 71, 15, 70, 64, 50, 0, 64, 45],
-            [32, 5, 85, 75, 30, 68, 23, 33, 20, 26, 89, 20, 92, 97, 38, 81],
-            [33, 69, 32, 49, 93, 24, 33, 6, 97, 36, 37, 99],
-            [82, 78, 78, 65, 22, 1, 87, 42, 36, 26, 27, 56, 82, 32, 8, 80],
-            [],
+            [69, 85, 55, 74],
+            [29, 54, 85, 89],
+            [33, 30, 64, 59],
+            [45, 76, 33, 67],
+            [41, 56, 15, 58],
+            [28, 17, 6, 37],
+            [17, 2, 54, 47],
+            [],  # this request is failed due to max sequence length overflow
         ]
 
         mamba_expected_generated_tokens = [
-            [74, 72, 9, 59, 1, 70, 15, 89, 30, 52, 82, 70, 64, 16, 83, 5],
-            [25, 54, 28, 14, 87, 27, 60, 92, 28, 74, 8, 63, 60, 68, 87, 82],
-            [31, 21, 87, 25, 96, 13, 32, 49, 40, 54, 55, 68, 73, 2, 64, 96],
-            [72, 80, 35, 72, 77, 85, 98, 36, 4, 97, 37, 46, 79, 95, 83, 25],
-            [8, 80, 56, 4, 87, 1, 43, 98, 85, 7, 50, 38, 24, 28, 18, 80],
-            [9, 94, 36, 16, 87, 57, 25, 76, 64, 92, 47, 86, 73, 72, 71, 97],
-            [17, 5, 62, 66, 15, 52, 32, 75, 66, 18, 90, 14, 67, 37, 94, 33],
+            [74, 72, 83, 59],
+            [25, 54, 1, 70],
+            [28, 14, 15, 89],
+            [87, 27, 30, 52],
+            [44, 13, 82, 70],
+            [28, 74, 64, 16],
+            [8, 4, 83, 5],
             [],
         ]
 
@@ -578,10 +562,6 @@ def test_simple(self, model_provider, num_cuda_graphs, cuda_graph_scope) -> None
         else:
             raise ValueError(f"Invalid model_provider {model_provider}")
 
-        print(f"Validating {len(env.requests)} requests.")
-        print(f"Expected generated tokens: {expected_generated_tokens_list}")
-        print(f"Actual generated tokens: {[request.generated_tokens for request in env.requests]}")
-
         assert len(env.requests) == len(expected_generated_tokens_list)
 
         for request, expected_generated_tokens in zip(env.requests, expected_generated_tokens_list):
@@ -591,6 +571,41 @@ def test_simple(self, model_provider, num_cuda_graphs, cuda_graph_scope) -> None
                 f"expected ({expected_generated_tokens})."
             )
 
+    @pytest.mark.internal
+    @pytest.mark.skipif(
+        not is_fa_min_version("2.7.3"), reason="need latest flash attn for dynamic batching"
+    )
+    def test_overflow_factor(self, model_provider: str = "gpt") -> None:
+        """Test overflow factor arg."""
+        skip_if_mamba_sequence_packing_not_available(model_provider)
+
+        # Run test.
+        env = self._run_test(
+            context_buffer_overflow_factor=0.1,
+            context_max_requests_override=None,
+            context_max_tokens_override=None,
+            model_provider=model_provider,
+        )
+
+        # Validate max_requests, max_tokens.
+        if model_provider == "gpt":
+            assert env.engine.context.max_requests == 420
+            assert env.engine.context.max_tokens == 420
+        elif model_provider == "mamba":
+            assert env.engine.context.max_requests == 16
+            assert env.engine.context.max_tokens == 16
+
+    @pytest.mark.internal
+    @pytest.mark.skipif(
+        not is_fa_min_version("2.7.3"), reason="need latest flash attn for dynamic batching"
+    )
+    @pytest.mark.parametrize("model_provider", ["gpt", "mamba"])
+    def test_request_overflow(self, model_provider: str) -> None:
+        """Test request overflow."""
+        skip_if_mamba_sequence_packing_not_available(model_provider)
+
+        self._run_test(context_max_requests_override=4, model_provider=model_provider)
+
     @pytest.mark.skipif(
         not is_fa_min_version("2.7.3"), reason="need latest flash attn for dynamic batching"
     )
@@ -598,11 +613,7 @@ def test_simple(self, model_provider, num_cuda_graphs, cuda_graph_scope) -> None
     def test_token_overflow_transient(self) -> None:
         """Test token overflow."""
         test_config = DynamicEngineTestConfig(
-            num_requests=2,
-            min_prompt_length=512,
-            max_prompt_length=512,
-            num_tokens_to_generate=2,
-            context_max_tokens=900,
+            num_requests=2, min_prompt_length=8, max_prompt_length=8, context_max_tokens_override=12
         )
         env = self._build_test_env(test_config)
         env.engine._add_request(env.requests[0])
@@ -621,7 +632,7 @@ def test_token_overflow_transient(self) -> None:
     )
     def test_token_overflow_nontransient(self) -> None:
         """Test token overflow (non-transient)."""
-        test_config = DynamicEngineTestConfig(context_max_tokens=8)
+        test_config = DynamicEngineTestConfig(context_max_tokens_override=8)
         env = self._build_test_env(test_config)
         try:
             env.engine._add_request(env.requests[0])
@@ -678,21 +689,19 @@ def test_cuda_graph_token_counts(self) -> None:
 
         # Test num_cuda_graphs.
         for num_cuda_graphs, expected_cuda_graph_token_counts in [
-            (0, [40]),
-            (1, [40]),
-            (2, [40, 24]),
-            (4, [40, 32, 16]),
-            (8, [40, 32, 24, 16, 8]),
-            (16, [40, 32, 24, 16, 8]),
-            (64, [40, 32, 24, 16, 8]),
-            (1024, [40, 32, 24, 16, 8]),
+            (0, [64]),
+            (1, [64]),
+            (2, [64, 32]),
+            (4, [64, 48, 32, 16]),
+            (8, [64, 56, 48, 40, 32, 24, 16, 8]),
+            (16, [64, 56, 48, 40, 32, 24, 16, 8]),
+            (64, [64, 56, 48, 40, 32, 24, 16, 8]),
+            (1024, [64, 56, 48, 40, 32, 24, 16, 8]),
         ]:
 
             # Build cuda graphs (inside dynamic engine).
             env = self._build_test_env(
-                DynamicEngineTestConfig(
-                    context_buffer_size_gb=0.01, num_cuda_graphs=num_cuda_graphs
-                )
+                DynamicEngineTestConfig(num_requests=64, num_cuda_graphs=num_cuda_graphs)
             )
             actual_cuda_graph_token_counts = env.engine.context.cuda_graph_token_counts
             assert (
@@ -712,7 +721,19 @@ def test_cuda_graph_token_counts(self) -> None:
     )
     @pytest.mark.parametrize(
         "num_warmup_tokens, expected_cuda_graph_token_count",
-        [(1, 8), (2, 8), (4, 8), (8, 8), (10, 16), (12, 16), (16, 16)],
+        [
+            (1, 8),
+            (2, 8),
+            (4, 8),
+            (8, 8),
+            (10, 16),
+            (12, 16),
+            (16, 16),
+            (20, 24),
+            (24, 24),
+            (28, 32),
+            (32, 32),
+        ],
     )
     @torch.inference_mode()
     def test_cuda_graph_warmup(
@@ -727,16 +748,17 @@ def test_cuda_graph_warmup(
 
         # Initialize context.
         env = self._build_test_env(
-            DynamicEngineTestConfig(
-                context_buffer_size_gb=0.0041, num_cuda_graphs=8, num_tokens_to_generate=1
-            )
+            DynamicEngineTestConfig(num_requests=32, num_cuda_graphs=8, num_tokens_to_generate=1)
         )
 
         context = env.engine.context
         assert context.is_decode_only()
-        assert context.cuda_graph_token_counts == [16, 8], "cuda_graph_token_counts: %s." % str(
-            context.cuda_graph_token_counts
-        )
+        assert context.cuda_graph_token_counts == [
+            32,
+            24,
+            16,
+            8,
+        ], "cuda_graph_token_counts: %s." % str(context.cuda_graph_token_counts)
 
         context.initialize_attention_state(
             num_warmup_tokens=num_warmup_tokens, warmup_engine_mode=warmup_engine_mode
@@ -829,10 +851,7 @@ def mock_tokenize_prompt(prompt, add_BOS=False):
         # Call the generate function.
         # It's safe to use request 0's sampling params here because all sampling
         # params are identical as long as use_fixed_output_lengths == False.
-        finished_request_records = env.engine.generate(prompts, env.requests[0].sampling_params)
-        finished_requests = [
-            r.merge(env.engine.controller.tokenizer) for r in finished_request_records
-        ]
+        finished_requests = env.engine.generate(prompts, env.requests[0].sampling_params)
 
         # Verify results
         assert len(finished_requests) == len(
@@ -882,11 +901,10 @@ async def test_run_engine(self):
                 num_tokens_to_generate = env.requests[
                     request_id
                 ].sampling_params.num_tokens_to_generate
-                request_record = fut.result()
-                request = request_record.merge(env.engine.controller.tokenizer)
-                assert request.generated_length == num_tokens_to_generate, (
+                result = fut.result()
+                assert result.generated_length == num_tokens_to_generate, (
                     f"Request {request_id} expected to generate {num_tokens_to_generate} "
-                    f"tokens but generated {request.generated_length}"
+                    f"tokens but generated {result.generated_length}"
                 )
 
             engine_task.cancel()
@@ -933,7 +951,6 @@ def test_return_log_probs(self):
     @pytest.mark.parametrize("pp_size", [1, 2])
     @pytest.mark.parametrize("tp_size", [1, 2])
     @pytest.mark.parametrize("model_provider", ["gpt", "mamba"])
-    @pytest.mark.parametrize("transformer_impl", ["local", "inference_optimized"])
     @torch.inference_mode()
     def test_parallel_inference(
         self,
@@ -943,7 +960,6 @@ def test_parallel_inference(
         ep_size,
         sequence_parallel,
         materialize_only_last_token_logits,
-        transformer_impl,
     ):
         skip_if_mamba_sequence_packing_not_available(model_provider)
 
@@ -959,22 +975,13 @@ def test_parallel_inference(
             pytest.skip(reason="Sequence parallelism requires tp_size > 1")
         elif tp_size > 1 and ep_size > 1 and not sequence_parallel:
             pytest.skip(reason="Sequence parallelism must be used with tp_size > 1 and ep_size > 1")
-        elif transformer_impl == "inference_optimized":
-            if ep_size > 1:
-                pytest.skip(
-                    reason="MoE models are not supported with the inference optimized transformer."
-                )
-            if tp_size > 1 and not sequence_parallel:
-                pytest.skip(
-                    reason=(
-                        "The inference optimized transformer requires sequence parallelism "
-                        "when tp_size > 1."
-                    )
-                )
-            if model_provider == "mamba":
-                pytest.skip(
-                    reason="Mamba model is not supported with the inference optimized transformer."
+        elif pp_size > 1 and model_provider == "mamba":
+            pytest.skip(
+                reason=(
+                    "Running hybrid models with pp_size > 1 and no attention on some "
+                    "pipeline stages is not supported yet."
                 )
+            )
 
         env = self._run_test(
             model_provider=model_provider,
@@ -983,7 +990,6 @@ def test_parallel_inference(
             expert_model_parallel_size=ep_size,
             sequence_parallel=sequence_parallel,
             materialize_only_last_token_logits=materialize_only_last_token_logits,
-            transformer_impl=transformer_impl,
         )
 
     @pytest.mark.internal
@@ -1032,7 +1038,8 @@ def test_events(self):
             max_prompt_length=10,
             num_tokens_to_generate=32,
             context_buffer_size_gb=0.001,  # 0.001, # 8 blocks
-            context_max_tokens=8,
+            context_max_requests_override=8,
+            context_max_tokens_override=8,
             num_gap_steps=1,
         )
 
@@ -1081,58 +1088,27 @@ def test_chunked_prefill(self, model_provider: str):
             materialize_only_last_token_logits=False,
             model_provider=model_provider,
             context_block_size_tokens=256,
-            context_max_tokens=1000,
+            context_max_tokens_override=300,
         )
 
-    @pytest.mark.internal
-    @pytest.mark.skipif(
-        not is_fa_min_version("2.7.3"), reason="need latest flash attn for dynamic batching"
-    )
-    @pytest.mark.skip(
-        reason="test works in isolation, but memory dynamics change when run "
-        "within unt test suite."
-    )
-    def test_suspend_resume_memory(self):
-
-        # Run tests.
-        mem_usages = {}
-        for suspend_resume_interval in None, 8, 4, 2:  # interval 1 acts funny.
-
-            # Run test.
-            env = self._run_test(suspend_resume_interval=suspend_resume_interval, num_gap_steps=1)
-
-            # Record memory usage.
-            mem_usages[suspend_resume_interval] = env.mem_usage
-
-            # Clear memory to make recorded memories consistent between tests.
-            # TODO(@lmcafee): why is memory not automatically cleared?
-            # env.engine.suspend() # TODO(@lmcafee): useful?
-            del env
-
-        # Utility methods.
-        get_alloc = lambda mem_stats: mem_stats["allocated_bytes.all.current"]
-
-        # Validate overall 'end' memory usage.
-        golden_end_bytes = get_alloc(mem_usages[None]["end"])
-        for interval, mem_usage in mem_usages.items():
-            current_end_bytes = get_alloc(mem_usage["end"])
-            assert math.isclose(
-                golden_end_bytes, current_end_bytes, rel_tol=0.01
-            ), f"{current_end_bytes} != {golden_end_bytes}."
-
-        # Validate 'suspend/resume' memory usage.
-        get_suspend_resume_bytes = lambda key: list(
-            get_alloc(list(d["suspend_resume"].values())[-1][key])
-            for i, d in mem_usages.items()
-            if i is not None
-        )
-        suspend_resume_mid_bytes = get_suspend_resume_bytes("mid")
-        suspend_resume_end_bytes = get_suspend_resume_bytes("end")
-        for mid_bytes in suspend_resume_mid_bytes:
-            assert math.isclose(
-                suspend_resume_mid_bytes[0], mid_bytes, rel_tol=0.01
-            ), f"{mid_bytes} != {suspend_resume_mid_bytes[0]}."
-        for end_bytes in suspend_resume_end_bytes:
-            assert math.isclose(
-                suspend_resume_end_bytes[0], end_bytes, rel_tol=0.01
-            ), f"{end_bytes} != {suspend_resume_end_bytes[0]}."
+
+if __name__ == "__main__":
+    test = TestDynamicInferenceEngine()
+    test.test_simple(4)
+    test.test_overflow_factor()
+    test.test_request_overflow()
+    test.test_token_overflow_transient()
+    # test.test_token_overflow_nontransient() # uncomment in megatron-core 0.16
+    test.test_block_overflow()
+    test.test_multi_add()
+    test.test_fixed_output_lengths()
+    test.test_cuda_graph_request_counts()
+    test.test_cuda_graph_warmup(WarmupEngineMode.DECODE, 1, 8)
+    test.test_generate_function()
+    asyncio.run(test.test_run_engine())
+    test.test_return_log_probs()
+    test.test_parallel_inference()
+    # test.test_events() # uncomment in megatron-core 0.16
+    test.teardown_method(None)
+    print("~~~")
+    print("success.")
diff --git a/tests/unit_tests/inference/engines/test_static_engine.py b/tests/unit_tests/inference/engines/test_static_engine.py
index 40187a5eff9..699a4d1f473 100644
--- a/tests/unit_tests/inference/engines/test_static_engine.py
+++ b/tests/unit_tests/inference/engines/test_static_engine.py
@@ -12,11 +12,7 @@
 from megatron.core import parallel_state
 from megatron.core.inference.contexts import StaticInferenceContext
 from megatron.core.inference.engines import StaticInferenceEngine
-from megatron.core.inference.inference_request import (
-    DynamicInferenceRequestRecord,
-    InferenceRequest,
-    Status,
-)
+from megatron.core.inference.inference_request import InferenceRequest, Status
 from megatron.core.inference.model_inference_wrappers.gpt.gpt_inference_wrapper import (
     GPTInferenceWrapper,
 )
@@ -192,19 +188,12 @@ def test_generate_dynamic(self, batch_size: int, num_trials: int, empty_prompt:
                 prompts = ["" for i in range(batch_size)]
             else:
                 prompts = ["sample" * (i + 1) for i in range(batch_size)]
-            results: List[Union[InferenceRequest, DynamicInferenceRequestRecord]] = (
-                self.static_engine.generate(
-                    prompts, sampling_params=SamplingParams(num_tokens_to_generate=10)
-                )
+            results: List[InferenceRequest] = self.static_engine.generate(
+                prompts, sampling_params=SamplingParams(num_tokens_to_generate=10)
             )
 
             assert len(results) == batch_size
             for result in results:
-                if isinstance(result, DynamicInferenceRequestRecord):
-                    result = result.merge(self.static_engine.controller.tokenizer)
-                assert isinstance(result, InferenceRequest), (
-                    "expected <InferenceRequest>; found <%s>." % type(result).__name__
-                )
                 assert (
                     result.status == Status.COMPLETED
                 ), f"Status should be completed but its {result.status}"
diff --git a/tests/unit_tests/inference/test_data_parallel_inference_coordinator.py b/tests/unit_tests/inference/test_data_parallel_inference_coordinator.py
deleted file mode 100644
index 7b4fb4b4250..00000000000
--- a/tests/unit_tests/inference/test_data_parallel_inference_coordinator.py
+++ /dev/null
@@ -1,471 +0,0 @@
-# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-
-import asyncio
-import random
-import time
-from collections import deque
-from dataclasses import dataclass, field
-from typing import Dict, List, Optional, Tuple
-
-import pytest
-import torch.distributed as dist
-from tqdm import tqdm
-
-from megatron.core.inference.data_parallel_inference_coordinator import (
-    DataParallelInferenceCoordinator,
-)
-from megatron.core.inference.engines.dynamic_engine import DynamicInferenceEngine, RequestEntry
-from megatron.core.inference.inference_client import InferenceClient
-from megatron.core.inference.inference_request import (
-    DynamicInferenceRequest,
-    DynamicInferenceRequestRecord,
-    Status,
-)
-from megatron.core.inference.sampling_params import SamplingParams
-from megatron.core.utils import get_asyncio_loop
-from tests.unit_tests.test_utilities import Utils
-
-try:
-    import zmq
-
-    HAVE_ZMQ = True
-except Exception:
-    HAVE_ZMQ = False
-
-IS_ZMQ_FLAKY = True
-
-
-class DummyContext:
-    """Dummy inference context."""
-
-    def __init__(self):
-        self.active_cnt = 0
-
-    def get_active_request_count(self) -> int:
-        return self.active_cnt
-
-
-class DummyEngine(DynamicInferenceEngine):
-    """Dummy inference engine that only implements coordinator-related methods."""
-
-    def __init__(self):
-        """We cannot call super().__init__() because it requires complex setup."""
-        self.waiting_request_ids = deque()
-        self.requests: Dict[int, RequestEntry] = {}
-        self.suspend_signal = False
-        self.is_suspended = False
-        self._loop = get_asyncio_loop()
-        self.context = DummyContext()
-        self.running = asyncio.Event()
-        self.paused = asyncio.Event()
-        self.stopped = asyncio.Event()
-        self.pending_microbatch = deque()
-        self.received_pause: bool = False
-        self.received_stop: bool = False
-
-    def add_request(
-        self, request_id: int, prompt: str, sampling_params: Optional[SamplingParams] = None
-    ) -> asyncio.Future[DynamicInferenceRequestRecord]:
-        """Dummy add_request."""
-
-        self.requests[request_id] = RequestEntry(
-            record=DynamicInferenceRequestRecord.from_request(
-                DynamicInferenceRequest(
-                    prompt=prompt,
-                    request_id=request_id,
-                    sampling_params=sampling_params,
-                    status=Status.WAITING_IN_QUEUE,
-                )
-            ),
-            future=self._loop.create_future(),
-        )
-        self.waiting_request_ids.append(request_id)
-
-        return self.requests[request_id].future
-
-    async def async_step(self, *, verbose: Optional[bool] = False) -> Dict:
-        """Dummy async_step."""
-        # Finish "active" requests.
-        finished_request_records = []
-        to_remove = []
-        for request_id, entry in self.requests.items():
-            request = entry.record[-1]
-            if request.status == Status.ACTIVE_AND_GENERATING_TOKENS:
-                request.sampling_params.num_tokens_to_generate -= 1
-                if request.sampling_params.num_tokens_to_generate > 0:
-                    continue
-                request.status = Status.COMPLETED
-                self.context.active_cnt -= 1
-                finished_request_records.append(entry.record)
-                entry.future.set_result(entry.record)
-                to_remove.append(request_id)
-        for request_id in to_remove:
-            del self.requests[request_id]
-
-        # Activate queued requests. They will "process" for 1 step.
-        active_request_ids = []
-        while self.waiting_request_ids:
-            request_id = self.waiting_request_ids.popleft()
-            record = self.requests[request_id].record
-            record[-1].status = Status.ACTIVE_AND_GENERATING_TOKENS
-            self.context.active_cnt += 1
-            active_request_ids.append(request_id)
-
-        return {
-            "active_request_ids": active_request_ids,
-            "finished_request_records": finished_request_records,
-            "step_time": 0.01,
-            "cuda_graph_request_count": 1,
-        }
-
-
-@dataclass
-class CoordinatorTestConfig:
-    """Test configuration args."""
-
-    port: int = 46581
-    mp_port: int = 49581
-    launch_inference_coordinator: bool = True
-    stop_engines: bool = True
-    verify_results: bool = True
-
-    num_requests: int = 10**1
-    min_time_offset: float = 10 ** (-4)
-    max_time_offset: float = 10 ** (-3)
-    num_steps_to_finish: int = 1
-    num_iterations: int = 1
-
-    tensor_model_parallel_size: int = 1
-    pipeline_model_parallel_size: int = 1
-
-
-@dataclass
-class CoordinatorTestEnv:
-    """Test environment, including requests."""
-
-    config: CoordinatorTestConfig
-    requests: List[Tuple]
-    engine: DummyEngine
-    responses: List[List[DynamicInferenceRequest]] = field(default_factory=list)
-    timing_data: Dict[str, Optional[float]] = field(
-        default_factory=lambda: {
-            "start_time": None,
-            "init_time": None,
-            "done_time": None,
-            "stop_time": None,
-        }
-    )
-
-
-class TestCoordinator:
-
-    @classmethod
-    def _build_requests(cls, test_config: CoordinatorTestConfig) -> List[Tuple]:
-        ret = []
-
-        for _ in range(test_config.num_requests):
-            arrival_delta = random.uniform(test_config.min_time_offset, test_config.max_time_offset)
-            num_tokens = test_config.num_steps_to_finish
-            ret.append(
-                ("Hello world!", SamplingParams(num_tokens_to_generate=num_tokens), arrival_delta)
-            )
-        return ret
-
-    @classmethod
-    def _build_test_env(cls, test_config):
-        Utils.initialize_model_parallel(
-            tensor_model_parallel_size=test_config.tensor_model_parallel_size,
-            pipeline_model_parallel_size=test_config.pipeline_model_parallel_size,
-        )
-        requests = cls._build_requests(test_config)
-        engine = DummyEngine()
-        engine.num_steps_to_finish = test_config.num_steps_to_finish
-        return CoordinatorTestEnv(config=test_config, requests=requests, engine=engine)
-
-    @classmethod
-    async def _run_test(cls, **test_config_kwargs):
-        # Test environment.
-        test_config = CoordinatorTestConfig(**test_config_kwargs)
-        env = cls._build_test_env(test_config)
-
-        # Connect each engine to their respective processes.
-        env.timing_data["start_time"] = time.time()
-        await env.engine.start_listening_to_data_parallel_coordinator(
-            inference_coordinator_port=test_config.port,
-            launch_inference_coordinator=test_config.launch_inference_coordinator,
-        )
-
-        results_success = False
-        shutdown_success = False
-        try:
-            if dist.get_rank() == 0:
-                client = InferenceClient(test_config.port)
-                await client.start()
-                env.timing_data["init_time"] = time.time()
-
-                all_results = []
-                for _ in range(test_config.num_iterations):
-                    futures = []
-                    for request in tqdm(env.requests, "add_requests"):
-                        prompt, sampling_params, arrival_delta = request
-                        await asyncio.sleep(arrival_delta)
-                        fut = client.add_request(prompt=prompt, sampling_params=sampling_params)
-                        futures.append(fut)
-                    results = await asyncio.wait_for(asyncio.gather(*futures), timeout=10.0)
-                    all_results.append(results)
-                env.timing_data["done_time"] = time.time()
-            results_success = True
-        finally:
-            try:
-                if dist.get_rank() == 0:
-                    if test_config.stop_engines:
-                        await asyncio.wait_for(client.stop_engines(), timeout=10.0)
-                    client.stop()
-                if test_config.stop_engines:
-                    await asyncio.wait_for(env.engine.engine_loop_task, timeout=10.0)
-                shutdown_success = True
-            except:
-                env.engine.engine_loop_task.cancel()
-
-        env.timing_data["stop_time"] = time.time()
-
-        assert results_success, "Did not receive all results successfully."
-        assert shutdown_success, "Did not shutdown successfully."
-        if dist.get_rank() == 0:
-            env.responses = all_results
-            if test_config.verify_results:
-                for batch in all_results:
-                    for record in batch:
-                        request = record[-1]
-                        assert request.status == Status.COMPLETED
-
-        return env
-
-    def teardown_method(self, method):
-        Utils.destroy_model_parallel()
-
-    @pytest.mark.internal
-    @pytest.mark.skipif(IS_ZMQ_FLAKY, reason="pyzmq is flaky in CI")
-    @pytest.mark.skipif(not HAVE_ZMQ, reason="pyzmq is required for this test")
-    @pytest.mark.asyncio
-    async def test_simple(self):
-        """Simple test with no TP or PP."""
-        env = await self._run_test(tensor_model_parallel_size=1, pipeline_model_parallel_size=1)
-
-    @pytest.mark.internal
-    @pytest.mark.skipif(IS_ZMQ_FLAKY, reason="pyzmq is flaky in CI")
-    @pytest.mark.skipif(not HAVE_ZMQ, reason="pyzmq is required for this test")
-    @pytest.mark.asyncio
-    async def test_tp(self):
-        """Simple test with TP, but no PP."""
-        env = await self._run_test(tensor_model_parallel_size=2, pipeline_model_parallel_size=1)
-
-    @pytest.mark.internal
-    @pytest.mark.skipif(IS_ZMQ_FLAKY, reason="pyzmq is flaky in CI")
-    @pytest.mark.skipif(not HAVE_ZMQ, reason="pyzmq is required for this test")
-    @pytest.mark.asyncio
-    async def test_pp(self):
-        """Simple test with no TP, but PP."""
-        env = await self._run_test(tensor_model_parallel_size=1, pipeline_model_parallel_size=2)
-
-    @pytest.mark.internal
-    @pytest.mark.skipif(IS_ZMQ_FLAKY, reason="pyzmq is flaky in CI")
-    @pytest.mark.skipif(not HAVE_ZMQ, reason="pyzmq is required for this test")
-    @pytest.mark.asyncio
-    async def test_tp_pp(self):
-        """Simple test with both TP and PP."""
-        env = await self._run_test(tensor_model_parallel_size=2, pipeline_model_parallel_size=2)
-
-    @pytest.mark.internal
-    @pytest.mark.skipif(IS_ZMQ_FLAKY, reason="pyzmq is flaky in CI")
-    @pytest.mark.skipif(not HAVE_ZMQ, reason="pyzmq is required for this test")
-    @pytest.mark.asyncio
-    async def test_pp(self):
-        """Simple test with no TP, but PP."""
-        env = await self._run_test(tensor_model_parallel_size=1, pipeline_model_parallel_size=2)
-
-    @pytest.mark.internal
-    @pytest.mark.skipif(not HAVE_ZMQ, reason="pyzmq is required for this test")
-    @pytest.mark.skipif(IS_ZMQ_FLAKY, reason="pyzmq is flaky in CI")
-    @pytest.mark.asyncio
-    async def test_tp_pp(self):
-        """Simple test with both TP and PP."""
-        env = await self._run_test(tensor_model_parallel_size=2, pipeline_model_parallel_size=2)
-
-    @pytest.mark.internal
-    @pytest.mark.skipif(not HAVE_ZMQ, reason="pyzmq is required for this test")
-    @pytest.mark.skipif(IS_ZMQ_FLAKY, reason="pyzmq is flaky in CI")
-    @pytest.mark.asyncio
-    async def test_pause(self):
-        """Pause/resume test."""
-        test_config = CoordinatorTestConfig(
-            tensor_model_parallel_size=2, pipeline_model_parallel_size=1, num_requests=32
-        )
-        env = self._build_test_env(test_config)
-
-        await env.engine.start_listening_to_data_parallel_coordinator(
-            inference_coordinator_port=test_config.port, launch_inference_coordinator=True
-        )
-
-        success = False
-        try:
-            if dist.get_rank() == 0:
-                # Start client as usual.
-                client = InferenceClient(test_config.port)
-                await client.start()
-
-                ### TEST 1: Pause after all requests have finished.
-                futures = []
-                for i, request in enumerate(env.requests[:2]):
-                    prompt, sampling_params, _ = request
-                    fut = client.add_request(prompt=prompt, sampling_params=sampling_params)
-                    futures.append(fut)
-                # Wait a sufficient time for the requests to complete.
-                await asyncio.sleep(0.1)
-                # Get a pause awaitable.
-                to_pause = client.pause_engines()
-                awaitables = futures + [to_pause]
-                # Gather all awaitables; assert that the requests actually complete.
-                try:
-                    await asyncio.wait_for(asyncio.gather(*awaitables), timeout=0.1)
-                except asyncio.TimeoutError:
-                    pytest.fail("Simple pause did not succeed.")
-
-                ### TEST 2: Ensure that requests can be added while paused.
-                prompt, sampling_params, _ = env.requests[2]
-                paused_fut = client.add_request(prompt=prompt, sampling_params=sampling_params)
-                with pytest.raises(asyncio.TimeoutError):
-                    await asyncio.wait_for(paused_fut, timeout=0.1)
-
-                ### TEST 3: Resume after pause and drain the queued requests.
-                client.unpause_engines()
-                # TODO: The system should not be incorrectly raising a cancelled error here.
-                with pytest.raises(asyncio.CancelledError):
-                    await paused_fut
-
-                ### TEST 4: Add new requests after resume.
-                futures = []
-                for i, request in enumerate(env.requests[3:4]):
-                    prompt, sampling_params, _ = request
-                    fut = client.add_request(prompt=prompt, sampling_params=sampling_params)
-                    futures.append(fut)
-                # Wait a sufficient time for the requests to complete.
-                await asyncio.sleep(0.1)
-                # Gather all awaitables; assert that the requests actually complete.
-                try:
-                    await asyncio.wait_for(asyncio.gather(*futures), timeout=0.1)
-                except asyncio.TimeoutError:
-                    pytest.fail("Simple resume did not succeed.")
-
-                ### TEST 5: Pause while requests are being processed.
-                ### Note: this situation cannot occur in a synchronous system.
-                if False:
-                    for request in env.engine.requests[4:6]:
-                        request.sampling_params.num_tokens_to_generate = 100
-                    futures = []
-                    for i, request in enumerate(env.requests[4:6]):
-                        prompt, sampling_params, _ = request
-                        fut = client.add_request(prompt=prompt, sampling_params=sampling_params)
-                        futures.append(fut)
-                    # Do not wait for the requests to complete.
-                    await client.pause_engines()
-                    # Gather all awaitables; assert that the requests do not complete.
-                    with pytest.raises(asyncio.TimeoutError):
-                        await asyncio.wait_for(asyncio.gather(*futures), timeout=0.1)
-            success = True
-        finally:
-            try:
-                if dist.get_rank() == 0:
-                    await asyncio.wait_for(client.stop_engines(), timeout=5.0)
-                    client.stop()
-                await asyncio.wait_for(env.engine.engine_loop_task, timeout=5.0)
-            except asyncio.TimeoutError:
-                env.engine.engine_loop_task.cancel()
-        assert success, "Pause/resume test did not complete successfully."
-
-    @pytest.mark.internal
-    @pytest.mark.skipif(not HAVE_ZMQ, reason="pyzmq is required for this test")
-    @pytest.mark.skipif(IS_ZMQ_FLAKY, reason="pyzmq is flaky in CI")
-    @pytest.mark.asyncio
-    async def test_throughput(self):
-        """Throughput test with no TP or PP."""
-        import torch
-        import torch.distributed as dist
-
-        env = await self._run_test(
-            tensor_model_parallel_size=1,
-            pipeline_model_parallel_size=1,
-            num_requests=10**4,
-            num_iterations=10,
-            min_time_offset=0.0,
-            max_time_offset=0.0,
-        )
-
-        flags = torch.tensor([1, 1, 1], dtype=torch.int, device=torch.cuda.current_device())
-
-        init_duration = golden_init_duration = None
-        run_duration = golden_run_duration = None
-        stop_duration = golden_stop_duration = None
-
-        if dist.get_rank() == 0:
-            init_duration = (env.timing_data["init_time"] - env.timing_data["start_time"]) * 10**3
-            golden_init_duration = 4445.64  # ms
-            run_duration = (env.timing_data["done_time"] - env.timing_data["init_time"]) * 10**3
-            golden_run_duration = 2906.29  # ms
-            stop_duration = (env.timing_data["stop_time"] - env.timing_data["done_time"]) * 10**3
-            golden_stop_duration = 33.17  # ms
-
-            def clamp_to_golden_value(value, golden_value, delta=0.1):
-                return value > golden_value * (1 - delta) and value < golden_value * (1 + delta)
-
-            if not clamp_to_golden_value(init_duration, golden_init_duration, delta=0.5):
-                flags[0] = 0
-            if not clamp_to_golden_value(run_duration, golden_run_duration, delta=0.2):
-                flags[1] = 0
-            if not clamp_to_golden_value(stop_duration, golden_stop_duration, delta=1.0):
-                flags[2] = 0
-
-        # Synchronize results
-        dist.broadcast(flags, src=0)
-
-        if dist.get_rank() == 0:
-            # Print current results.
-            print(f"Initialization time: {init_duration:.2f} ms")
-            print(f"Run time: {run_duration:.2f} ms")
-            print(f"Stop time: {stop_duration:.2f} ms")
-
-            assert flags[0].item() == 1, (
-                f"WARNING: Init duration {init_duration:.2f}s deviates from "
-                f"golden value {golden_init_duration:.2f}s"
-            )
-            assert flags[1].item() == 1, (
-                f"WARNING: Run duration {run_duration:.2f}s deviates from "
-                f"golden value {golden_run_duration:.2f}s"
-            )
-            assert flags[2].item() == 1, (
-                f"WARNING: Stop duration {stop_duration:.2f}s deviates from "
-                f"golden value {golden_stop_duration:.2f}s"
-            )
-
-            print(
-                f"ZMQ throughput is approximately "
-                f"{env.config.num_requests * env.config.num_iterations / (run_duration):.2f} "
-                f"requests/ms"
-            )
-        else:
-            assert flags[0].item() == 1
-            assert flags[1].item() == 1
-            assert flags[2].item() == 1
-
-
-if __name__ == "__main__":
-    test = TestCoordinator()
-    asyncio.run(test.test_simple())
-    asyncio.run(test.test_tp())
-    asyncio.run(test.test_pp())
-    asyncio.run(test.test_tp_pp())
-    asyncio.run(test.test_pause())
-    asyncio.run(test.test_throughput())
-    test.teardown_method(None)
-    print("~~~")
-    print("success.")
diff --git a/tests/unit_tests/inference/test_wandb_logging.py b/tests/unit_tests/inference/test_wandb_logging.py
index 1d5d054b80e..1512e805f9c 100644
--- a/tests/unit_tests/inference/test_wandb_logging.py
+++ b/tests/unit_tests/inference/test_wandb_logging.py
@@ -50,6 +50,7 @@ def _get_dynamic_context(
         max_sequence_length=512,
         buffer_size_gb=0.03,
         block_size_tokens=128,
+        buffer_guaranteed_fraction=0.1,
         metrics_writer=None,
     ):
         """Helper to create a DynamicInferenceContext."""
@@ -61,9 +62,9 @@ def _get_dynamic_context(
             max_sequence_length=max_sequence_length,
             num_cuda_graphs=None,
             buffer_size_gb=buffer_size_gb,
+            buffer_guaranteed_fraction=buffer_guaranteed_fraction,
             block_size_tokens=block_size_tokens,
             metrics_writer=metrics_writer,
-            unified_memory_level=0,  # unit tests currently broken with UVM
         )
 
     @pytest.mark.internal
@@ -82,11 +83,12 @@ def test_get_kvcache_utilization_stats_with_requests(self):
         assert 'active_utilization' in stats
         assert 'active_request_count' in stats
         assert 'paused_request_count' in stats
+        assert 'gtd_block_count' in stats
         assert 'block_count_avail' in stats
+        assert 'num_non_gtd_blocks' in stats
         assert 'active_token_count' in stats
         assert 'total_request_count' in stats
-        assert 'max_total_requests' in stats
-        assert 'max_active_requests' in stats
+        assert 'max_requests' in stats
 
         # Verify values for empty context
         assert stats['allocated_blocks'] == 0
@@ -132,11 +134,12 @@ def test_get_kvcache_utilization_stats_with_requests(self):
         assert stats_after['total_blocks'] == stats['total_blocks']
         assert stats_after['total_blocks'] > 0
 
+        # Verify that gtd_block_count remains constant
+        assert stats_after['gtd_block_count'] == stats['gtd_block_count']
+
         # Verify that max_requests remains constant
-        assert stats_after['max_total_requests'] == stats['max_total_requests']
-        assert stats_after['max_total_requests'] > 0
-        assert stats_after['max_active_requests'] == stats['max_active_requests']
-        assert stats_after['max_active_requests'] > 0
+        assert stats_after['max_requests'] == stats['max_requests']
+        assert stats_after['max_requests'] > 0
 
         # Verify block availability decreased after allocation
         assert stats_after['block_count_avail'] < stats['block_count_avail']
@@ -144,7 +147,7 @@ def test_get_kvcache_utilization_stats_with_requests(self):
         # Verify relationship: allocated_blocks + block_count_avail + 1 (dummy) = total
         assert (
             stats_after['allocated_blocks'] + stats_after['block_count_avail'] + 1
-            == dynamic_context.block_allocator.total_count
+            == dynamic_context.block_allocator.block_count_total
         )
 
         # Verify utilization bounds [0, 1]
@@ -177,11 +180,12 @@ def test_kvcache_utilization_stats_types(self):
             'active_unique_blocks',
             'active_request_count',
             'paused_request_count',
+            'gtd_block_count',
             'block_count_avail',
+            'num_non_gtd_blocks',
             'active_token_count',
             'total_request_count',
-            'max_total_requests',
-            'max_active_requests',
+            'max_requests',
         ]
 
         for field in int_fields:
@@ -236,8 +240,8 @@ def test_paused_requests_in_stats(self):
             max_sequence_length=128,
             num_cuda_graphs=None,
             buffer_size_gb=0.01,  # Small buffer to force pausing
+            buffer_guaranteed_fraction=0.1,
             block_size_tokens=32,
-            unified_memory_level=0,  # unit tests currently broken with UVM
         )
 
         # Add multiple requests to potentially trigger pausing
diff --git a/tests/unit_tests/inference/text_generation_controllers/test_simple_text_generation_controller.py b/tests/unit_tests/inference/text_generation_controllers/test_simple_text_generation_controller.py
index ee6bc5b2468..10ffe2fdd40 100644
--- a/tests/unit_tests/inference/text_generation_controllers/test_simple_text_generation_controller.py
+++ b/tests/unit_tests/inference/text_generation_controllers/test_simple_text_generation_controller.py
@@ -80,9 +80,6 @@ def setup_model(
             fp8="hybrid" if fp8 else None,
             fp8_recipe="tensorwise" if fp8 else None,
             fp8_param=fp8,
-            tensor_model_parallel_size=tensor_model_parallel_size,
-            pipeline_model_parallel_size=pipeline_model_parallel_size,
-            pipeline_dtype=dtype,
         )
         if dtype == torch.bfloat16:
             transformer_config.bf16 = True
@@ -115,15 +112,15 @@ def setup_model(
         else:
             inference_context = DynamicInferenceContext(
                 params_dtype=dtype,
-                num_layers=transformer_config.num_layers // pipeline_model_parallel_size,
+                num_layers=transformer_config.num_layers,
                 kv_channels=transformer_config.kv_channels,
                 num_attention_heads=transformer_config.num_attention_heads,
                 max_sequence_length=2048,
-                buffer_size_gb=0.2,
+                buffer_size_gb=1,
+                buffer_guaranteed_fraction=0.1,
                 materialize_only_last_token_logits=False,
                 use_flashinfer_fused_rope=None,  # default to using flash-infer if available
                 # this is for compatibility with the LTS environment
-                unified_memory_level=0,  # unit tests currently broken with UVM
             )
 
         inference_wrapped_model = GPTInferenceWrapper(
@@ -231,75 +228,41 @@ def detokenize(self, inp, skip_special_tokens=False):
             sampled_logits >= expected_min_value
         ), f"The sampled logits should all be greater than {expected_min_value} but its {sampled_logits}"
 
-    @pytest.mark.parametrize("backend", ["torch"])
-    def test_sample_from_dynamic_logits(self, backend):
+    def test_sample_from_dynamic_logits(self):
         batch_size = 12
         self.setup_model(torch.float32, batch_size=batch_size, static=False)
         self.mock_tokenizer.eod = self.vocab_size
 
-        context = self.text_generation_controller.inference_wrapped_model.inference_context
-        context.materialize_only_last_token_logits = True
-
-        # Prepare sampling params in human-readable format, to aid with test maintenance.
-        sampling_test_cases: List[Tuple[SamplingParams, List[int]]] = [
-            (SamplingParams(temperature=0.1, top_p=0.01), [9, 6, 10]),
-            (SamplingParams(temperature=5.0, top_k=15), [0, 3, 2]),
+        active_sampling_map: List[Tuple[SamplingParams, List[int]]] = [
+            (SamplingParams(top_k=3), [0, 3, 2]),
             (SamplingParams(top_p=0.8), [4, 1, 7]),
-            (SamplingParams(temperature=10.0, top_k=5), [11, 5, 8]),
+            (SamplingParams(top_k=5), [11, 5, 8]),
+            # (SamplingParams(top_k=5, top_p=0.7), [11, 5, 8]), # uncomment for FlashInfer sampling
+            (SamplingParams(temperature=2.0), [9, 6, 10]),
         ]
-        # For non-torch backends, test simultaneous top_k and top_p sampling.
-        if backend != "torch":
-            sampling_test_cases[3][0].top_p = 0.8
-
-        # Convert sampling params to non-readable format.
-        rev_sampling_dict: List[SamplingParams] = [None] * batch_size
-        for sampling_params, indices in sampling_test_cases:
+        rev_sampling_map: List[SamplingParams] = [None] * batch_size
+        for sampling_params, indices in active_sampling_map:
             for idx in indices:
-                rev_sampling_dict[idx] = sampling_params
-
-        # Prepare metadata for sample bookkeeping.
-        request_metadata_labels = DynamicInferenceRequest.get_metadata_labels()
-        request_metadata = torch.empty(
-            (batch_size, len(request_metadata_labels)), dtype=torch.float32
-        ).cuda()
-        top_k_values = torch.Tensor([s.top_k for s in rev_sampling_dict]).cuda()
-        request_metadata[:, request_metadata_labels["top_k"]] = top_k_values
-        top_p_values = torch.Tensor([s.top_p for s in rev_sampling_dict]).cuda()
-        request_metadata[:, request_metadata_labels["top_p"]] = top_p_values
-        temp_values = torch.Tensor([s.temperature for s in rev_sampling_dict]).cuda()
-        request_metadata[:, request_metadata_labels["temperature"]] = temp_values
-
-        # Bookkeeping.
-        self.text_generation_controller._dynamic_step_sample_bookkeeping(
-            request_metadata=request_metadata
-        )
+                rev_sampling_map[idx] = sampling_params
 
-        # Sampling.
-        logits = torch.arange(0, self.vocab_size).repeat(batch_size, 1).unsqueeze(0).float().cuda()
-        sampled_logits = self.text_generation_controller._dynamic_step_sample_logits(
-            logits, backend=backend
+        last_token_logits = torch.arange(0, self.vocab_size).repeat(batch_size, 1).float().cuda()
+        sampled_logits, _ = self.text_generation_controller.sample_from_dynamic_logits(
+            last_token_logits, active_sampling_map, vocab_size=self.vocab_size
         )
+        top_k_values = torch.Tensor([s.top_k for s in rev_sampling_map]).cuda().unsqueeze(1)
+        top_k_values[top_k_values == 0] = self.vocab_size
+        top_p_values = torch.Tensor([s.top_p for s in rev_sampling_map]).cuda().unsqueeze(1)
+        temp_values = torch.Tensor([s.temperature for s in rev_sampling_map]).cuda().unsqueeze(1)
         vocab_indices = torch.arange(self.vocab_size).cuda()
 
-        top_k_values[top_k_values == 0] = self.vocab_size
         assert torch.all(
             sampled_logits >= self.vocab_size - top_k_values
         ), f"The sampled logits should all be greater than {self.vocab_size - top_k_values} but its {sampled_logits}"
-        l = logits.squeeze(0)
-        sampled_l = l.div(temp_values.unsqueeze(1)).softmax(dim=-1)
-        top_k_mask = vocab_indices.unsqueeze(0) < (self.vocab_size - top_k_values.unsqueeze(1))
+        l = last_token_logits[0]
+        sampled_l = l.div(temp_values).softmax(dim=-1)
+        top_k_mask = vocab_indices.unsqueeze(0) < (self.vocab_size - top_k_values)
         sampled_l.masked_fill_(top_k_mask, 0.0)
-        top_p_mask = sampled_l.cumsum(dim=-1) > top_p_values.unsqueeze(1)
-
-        first_excluded = torch.where(
-            top_p_mask.any(dim=-1),
-            top_p_mask.float().argmax(dim=-1),
-            torch.full((batch_size,), self.vocab_size, device=top_p_mask.device),
-        )
-        last_included = torch.clamp(first_excluded - 1, min=0)
-        start_idx = torch.clamp(self.vocab_size - top_k_values, min=0).long()
-        last_included = torch.max(last_included, start_idx)
-        expected_min_values = l.gather(1, last_included.unsqueeze(1)).squeeze(1)
+        expected_min_values = sampled_l[sampled_l.cumsum(dim=-1) > top_p_values].amax(dim=-1)
         assert torch.all(
             sampled_logits >= expected_min_values
         ), f"The sampled logits should all be greater than {expected_min_values} but its {sampled_logits}"
@@ -810,15 +773,14 @@ def test_sampled_tokens_match_with_parallelism(self, static, tp_size, pp_size):
                         ),
                     )
                 )
-            expected_active_requests = set(int(x) for x in active_requests.keys())
+            sampling_params = SamplingParams(top_k=10, return_log_probs=True, termination_id=-1)
+            sampling_map = [(sampling_params, list(range(len(active_requests))))]
             while context.has_unfinished_requests():
-                result = self.text_generation_controller.generate_output_tokens_dynamic_batch()
+                result = self.text_generation_controller.generate_output_tokens_dynamic_batch(
+                    active_sampling_map=sampling_map
+                )
                 new_tokens = result["sample"]
-                active_ids = result["active_request_ids"].tolist()
-                finished_ids = result["finished_request_ids"].tolist()
-                assert len(new_tokens) == len(expected_active_requests)
-                assert set(active_ids) == expected_active_requests
-                expected_active_requests -= set(finished_ids)
+                assert len(new_tokens) == len(active_requests)
                 for i, token in enumerate(new_tokens.tolist()):
                     all_generated_tokens[i].append(token)
 
diff --git a/tests/unit_tests/test_checkpointing.py b/tests/unit_tests/test_checkpointing.py
index 4bbf54301f5..194f9721300 100644
--- a/tests/unit_tests/test_checkpointing.py
+++ b/tests/unit_tests/test_checkpointing.py
@@ -9,8 +9,6 @@
 import torch
 import torch.distributed.checkpoint
 
-from megatron.core.distributed import DistributedDataParallelConfig
-from megatron.core.distributed.fsdp.mcore_fsdp_adapter import FullyShardedDataParallel
 from megatron.core.num_microbatches_calculator import (
     init_num_microbatches_calculator,
     unset_num_microbatches_calculator,
@@ -25,7 +23,6 @@
     _load_base_checkpoint,
     get_checkpoint_tracker_filename,
     load_checkpoint,
-    read_metadata,
     save_checkpoint,
 )
 from megatron.training.global_vars import set_args
@@ -54,9 +51,6 @@ def __init__(self, state_dict):
         self.is_stub_optimizer = False
         self._called_metadata = []
 
-        # Optimizers are expected to have this attribute for checkpointing.
-        self.param_groups = []
-
     def state_dict(self, is_loading=False):
         return self._state_dict
 
@@ -117,8 +111,6 @@ def create_args():
     args.retro_add_retriever = False
     args.ckpt_convert_update_legacy_dist_opt_format = False
     args.ckpt_step = None
-    args.swiglu = True
-    args.num_experts = 1
 
     yield args
 
@@ -199,7 +191,7 @@ def test_load_base_checkpoint(
     assert ckpt_type == expected_ckpt_type
 
 
-@pytest.mark.parametrize("ckpt_format", ["torch", "torch_dcp", "fsdp_dtensor"])
+@pytest.mark.parametrize("ckpt_format", ["torch", "torch_dcp"])
 def test_save_checkpoint(init_model_parallel, create_args, tmp_path_dist_ckpt, ckpt_format):
     """Test save_checkpoint."""
     args = create_args
@@ -215,15 +207,6 @@ def test_save_checkpoint(init_model_parallel, create_args, tmp_path_dist_ckpt, c
     config = TransformerConfig(num_layers=1, kv_channels=1)
     model = MockModel(config)
     optimizer = MockState({"optimizer": "optimizer_state"})
-    if ckpt_format == "fsdp_dtensor":
-        model = FullyShardedDataParallel(
-            config=config,
-            ddp_config=DistributedDataParallelConfig(
-                use_distributed_optimizer=True, use_megatron_fsdp=True
-            ),
-            module=model,
-        )
-        optimizer = MockState({"state": {}})
     opt_param_scheduler = MockState({"opt_param_scheduler": "scheduler_state"})
     num_floating_point_operations_so_far = 456
 
@@ -243,7 +226,7 @@ def test_save_checkpoint(init_model_parallel, create_args, tmp_path_dist_ckpt, c
         expected_ckpt_path = None
         if ckpt_format == "torch":
             expected_ckpt_path = ckpt_dir / "mp_rank_00" / "model_optim_rng.pt"
-        elif ckpt_format in ["torch_dcp", "fsdp_dtensor"]:
+        elif ckpt_format == "torch_dcp":
             expected_ckpt_path = ckpt_dir / ".metadata"
 
         assert os.path.exists(expected_ckpt_path)
@@ -354,27 +337,3 @@ def test_dist_checkpoint_versioning(init_model_parallel, tmp_path_dist_ckpt, cre
             first_job_mock_metadata,
             second_job_mock_metadata,
         ]
-
-
-@pytest.mark.parametrize(
-    "metadata_content,expected_iter,expected_release",
-    [
-        ("456", 456, False),  # Normal iteration
-        ("release", 0, True),  # Release checkpoint should return iteration=1
-        ("123", 123, False),  # Another normal iteration
-    ],
-)
-def test_read_metadata_non_distributed(tmp_path, metadata_content, expected_iter, expected_release):
-    """Test read_metadata without torch.distributed initialized."""
-    test_dir = tmp_path / "test_read_metadata_non_distributed"
-    test_dir.mkdir(parents=True, exist_ok=True)
-    tracker_file = test_dir / "latest_checkpointed_iteration.txt"
-
-    with open(tracker_file, "w") as f:
-        f.write(metadata_content)
-
-    with mock.patch('torch.distributed.is_initialized', return_value=False):
-        max_iter, release = read_metadata(str(tracker_file))
-
-    assert max_iter == expected_iter, f"Expected iteration {expected_iter}, got {max_iter}"
-    assert release == expected_release, f"Expected release={expected_release}, got {release}"
diff --git a/tests/unit_tests/test_process_groups_config.py b/tests/unit_tests/test_process_groups_config.py
index 013bc6746d4..032de47e951 100644
--- a/tests/unit_tests/test_process_groups_config.py
+++ b/tests/unit_tests/test_process_groups_config.py
@@ -67,29 +67,6 @@ def test_hierarchical_context_parallel_groups(self, mocker):
         assert model_pgs.hcp[0] == mock_pg1
         assert model_pgs.hcp[1] == mock_pg2
 
-    def test_repr(self, mocker):
-        """Test __repr__ shows active process groups and their sizes."""
-        tp_size = 4
-        pp_size = 2
-        mock_tp = mocker.Mock(spec=dist.ProcessGroup)
-        mock_tp.size.return_value = tp_size
-        mock_pp = mocker.Mock(spec=dist.ProcessGroup)
-        mock_pp.size.return_value = pp_size
-
-        # Test empty collection
-        empty_pgs = ProcessGroupCollection()
-        assert repr(empty_pgs) == "ProcessGroupCollection(empty)"
-
-        # Test collection with process groups
-        model_pgs = ProcessGroupCollection()
-        model_pgs.tp = mock_tp
-        model_pgs.pp = mock_pp
-
-        repr_str = repr(model_pgs)
-        assert "ProcessGroupCollection(" in repr_str
-        assert f"tp({tp_size})" in repr_str
-        assert f"pp({pp_size})" in repr_str
-
 
 class TestPGConfigDefaultInitialization:
 
diff --git a/tests/unit_tests/test_rl_utils.py b/tests/unit_tests/test_rl_utils.py
deleted file mode 100644
index 5ea89ff2a02..00000000000
--- a/tests/unit_tests/test_rl_utils.py
+++ /dev/null
@@ -1,656 +0,0 @@
-# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-
-from unittest.mock import patch
-
-import torch
-
-from megatron.core.distributed import DistributedDataParallel, DistributedDataParallelConfig
-from megatron.core.models.common.language_module.language_module import LanguageModule
-from megatron.core.models.gpt.gpt_layer_specs import get_gpt_layer_with_transformer_engine_spec
-from megatron.core.models.gpt.gpt_model import GPTModel
-from megatron.core.optimizer import OptimizerConfig, get_megatron_optimizer
-from megatron.core.tensor_parallel.random import model_parallel_cuda_manual_seed
-from megatron.core.transformer import TransformerConfig
-from megatron.core.transformer.module import Float16Module
-from megatron.rl import rl_utils
-from megatron.rl.agent.api import TokenRollout
-from megatron.training import arguments, global_vars
-from tests.unit_tests.test_utilities import Utils
-
-BATCH = 2
-SEQ = 4
-VOCAB = 754
-
-
-class MockModel(LanguageModule):
-    def __init__(self, batch=BATCH, seq=SEQ, vocab=VOCAB):
-        self.batch = batch
-        self.seq = seq
-        self.vocab = vocab
-        self.config = TransformerConfig(num_attention_heads=1, num_layers=1)
-
-    def __call__(self, x, position_ids, attention_mask, **kwargs):
-        del position_ids
-        del attention_mask
-        batch, seq = x.shape
-        mock_model_outputs = torch.ones((batch, seq, self.vocab), device=x.device)
-        return mock_model_outputs
-
-    def load_state_dict(self, params):
-        del params
-
-    def train(self, mode=True):
-        del mode
-
-    def state_dict(self):
-        return {}
-
-
-class MockTokenizer:
-    def __init__(self):
-        self.pad = 42
-        self.eod = 43
-        self.vocab_size = VOCAB
-        self.bos = None
-
-    def detokenize(self, tokens):
-        return [str(tok) for tok in tokens]
-
-
-def test_get_logprobs():
-    """Test that getting logprobs at least does not crash."""
-    # We use args inside of get_logprobs, we need to initialize them.
-    args = arguments.parse_args(ignore_unknown_args=True)
-    global_vars.set_args(args)
-
-    tokens = torch.ones((BATCH, SEQ), dtype=torch.long)
-    logprobs = rl_utils.get_logprobs(MockModel(), tokens, position_ids=None, attention_mask=None)
-    # We chop off 1 element from the sequence dimension.
-    assert logprobs.shape == (BATCH, SEQ - 1)
-    # As we return ones as logits, all logprobs should be the same.
-    assert torch.all(logprobs == logprobs[0, 0]).item()
-
-
-def test_get_logprobs_with_sequence_packing():
-    """Test that getting logprobs at least does not crash."""
-    # We use args inside of get_logprobs, we need to initialize them.
-    args = arguments.parse_args(ignore_unknown_args=True)
-    setattr(args, 'rl_use_sequence_packing', True)
-    global_vars.set_args(args)
-
-    tokens = torch.ones((BATCH, SEQ), dtype=torch.long)
-    logprobs = rl_utils.get_logprobs(MockModel(), tokens, position_ids=None, attention_mask=None)
-    # We chop off 1 element from the sequence dimension.
-    assert logprobs.shape == (BATCH, SEQ - 1)
-    # As we return ones as logits, all logprobs should be the same.
-    assert torch.all(logprobs == logprobs[0, 0]).item()
-
-
-def test_prepare_trajectories():
-    # Make sure sequence packing is disabled for this test
-    import megatron.training.global_vars as global_vars
-
-    old_args = global_vars.get_args() if global_vars.get_args() is not None else None
-
-    # Create minimal args without sequence packing
-    args = type('Args', (), {})()
-    args.rl_use_sequence_packing = False
-    args.rl_inference_logprobs_is_correction = True
-    global_vars.set_args(args)
-
-    tokenizer = MockTokenizer()
-    r1 = TokenRollout(
-        trajectory=[1, 2, tokenizer.eod],
-        reward=3.14,
-        generation_mask=[False, True, True],
-        logprobs=[0.1, 0.2, 0.3],
-        env_id='MEGAENV',
-        problem_id="2",
-    )
-    r2 = TokenRollout(
-        trajectory=[1, 2, tokenizer.eod],
-        reward=0.14,
-        generation_mask=[False, True, True],
-        logprobs=[0.1, 0.2, 0.3],
-        env_id='MEGAENV',
-        problem_id="2",
-    )
-    rollouts = [[r1, r2]]
-    seq_len = 7
-
-    trajs, genmask, inference_logprobs = rl_utils.prepare_trajectories(rollouts, tokenizer, seq_len)
-
-    # Check that inference logprobs are being returned.
-    torch.testing.assert_close(inference_logprobs[0], torch.tensor([0.1, 0.2, 0.3]))
-    torch.testing.assert_close(inference_logprobs[1], torch.tensor([0.1, 0.2, 0.3]))
-
-    expected_mask = torch.tensor(
-        [
-            [False, True, True, False, False, False, False],
-            [False, True, True, False, False, False, False],
-        ]
-    )
-    torch.testing.assert_close(genmask, expected_mask)
-
-    expected_trajs = torch.tensor([[1, 2, 43, 42, 42, 42, 42], [1, 2, 43, 42, 42, 42, 42]])
-    torch.testing.assert_close(trajs, expected_trajs)
-
-
-def test_prepare_trajectories_with_packing():
-    """Test that rollouts data is properly prepared with sequence packing enabled."""
-    # Initialize args for sequence packing
-    args = arguments.parse_args(ignore_unknown_args=True)
-    setattr(args, 'micro_batch_size', 1)
-    setattr(args, 'global_batch_size', 1)
-    setattr(args, 'rl_use_sequence_packing', True)
-    global_vars.set_args(args)
-
-    tokenizer = MockTokenizer()
-    r1 = TokenRollout(
-        trajectory=[1, 2, tokenizer.eod],
-        reward=3.14,
-        generation_mask=[False, True, True],
-        logprobs=[0.1, 0.2, 0.3],
-        env_id='MEGAENV',
-        problem_id="2",
-    )
-    r2 = TokenRollout(
-        trajectory=[1, 2, 3, tokenizer.eod],
-        reward=0.14,
-        generation_mask=[False, True, True, True],
-        logprobs=[0.1, 0.2, 0.3, -1.2],
-        env_id='MEGAENV',
-        problem_id="2",
-    )
-    rollouts = [[r1, r2]]
-    seq_len = 7
-
-    trajs, genmask, inference_logprobs = rl_utils.prepare_trajectories(rollouts, tokenizer, seq_len)
-
-    # With sequence packing, inference logprobs should be padded to same length
-    assert isinstance(inference_logprobs, torch.Tensor)
-    assert inference_logprobs.shape == (2, 7)  # 2 sequences, each padded to seq_len
-
-    # Check values (padded with zeros)
-    torch.testing.assert_close(
-        inference_logprobs[0], torch.tensor([0.1, 0.2, 0.3, 0.0, 0.0, 0.0, 0.0])
-    )
-    torch.testing.assert_close(
-        inference_logprobs[1], torch.tensor([0.1, 0.2, 0.3, -1.2, 0.0, 0.0, 0.0])
-    )
-
-    expected_mask = torch.tensor(
-        [
-            [False, True, True, False, False, False, False],
-            [False, True, True, True, False, False, False],
-        ]
-    )
-    torch.testing.assert_close(genmask, expected_mask)
-
-    expected_trajs = torch.tensor([[1, 2, 43, 42, 42, 42, 42], [1, 2, 3, 43, 42, 42, 42]])
-    torch.testing.assert_close(trajs, expected_trajs)
-
-
-def test_grpo_loss_calculation_all_pi_eq():
-    # All policies are equal: clamping is inactive, ratios are ones.
-    current_logprobs = torch.ones(BATCH, SEQ)
-    old_logprobs = torch.ones(BATCH, SEQ)
-    ref_logprobs = torch.ones(BATCH, SEQ)
-    advantages = torch.zeros(BATCH)
-    loss, kl_term, ratios, entropy_term, _, _ = rl_utils.calculate_grpo_loss(
-        current_logprobs=current_logprobs,
-        old_logprobs=old_logprobs,
-        ref_logprobs=ref_logprobs,
-        advantages=advantages,
-        clamp_eps_lower=0.1,
-        clamp_eps_upper=0.1,
-        kl_beta=0.1,
-        entropy_weight=0.0,
-    )
-    torch.testing.assert_close(loss, torch.zeros_like(loss))
-    torch.testing.assert_close(kl_term, torch.zeros_like(kl_term))
-    torch.testing.assert_close(ratios, torch.ones_like(ratios))
-    torch.testing.assert_close(entropy_term, torch.ones_like(ratios) * torch.e)
-
-
-def test_grpo_loss_calculation_2x_ratios():
-    # All policies are equal: clamping is inactive, ratios are ones.
-    current_logprobs = torch.ones(BATCH, SEQ)
-    old_logprobs = torch.ones(BATCH, SEQ) - torch.log(torch.Tensor([2]))
-    ref_logprobs = torch.ones(BATCH, SEQ)
-    advantages = torch.ones(BATCH)
-    loss, kl_term, ratios, _, _, _ = rl_utils.calculate_grpo_loss(
-        current_logprobs=current_logprobs,
-        old_logprobs=old_logprobs,
-        ref_logprobs=ref_logprobs,
-        advantages=advantages,
-        clamp_eps_lower=2.1,
-        clamp_eps_upper=2.1,
-        kl_beta=0.0,
-        entropy_weight=0.0,
-    )
-    # Clamping does not affect us, as 2.1 [eps] > 2 [ratio].
-    # kl_beta = 0 -> we only have the non-kl term of the loss active.
-    torch.testing.assert_close(loss, -torch.ones_like(loss) * 2)
-    # pi and pi_{ref} are the same here.
-    torch.testing.assert_close(kl_term, torch.zeros_like(kl_term))
-    # Current probs are 2x more probable than old pi.
-    torch.testing.assert_close(ratios, torch.ones_like(ratios) * 2)
-
-
-def test_entropy_calculation():
-    # All policies are equal: clamping is inactive, ratios are ones.
-    current_logprobs = torch.ones(BATCH, SEQ)
-    old_logprobs = torch.ones(BATCH, SEQ)
-    ref_logprobs = torch.ones(BATCH, SEQ)
-    advantages = torch.zeros(BATCH)
-    loss, _, ratios, entropy_term, _, _ = rl_utils.calculate_grpo_loss(
-        current_logprobs=current_logprobs,
-        old_logprobs=old_logprobs,
-        ref_logprobs=ref_logprobs,
-        advantages=advantages,
-        clamp_eps_lower=0.1,
-        clamp_eps_upper=0.1,
-        kl_beta=0.0,
-        entropy_weight=1.0,
-    )
-    torch.testing.assert_close(loss, torch.ones_like(ratios) * torch.e)
-    torch.testing.assert_close(entropy_term, torch.ones_like(ratios) * torch.e)
-
-
-def test_grpo_loss_truncation():
-
-    # All ratios are 2
-    _, _, _, _, truncated_from_above, truncated_from_below = rl_utils.calculate_grpo_loss(
-        current_logprobs=torch.ones(BATCH, SEQ),
-        old_logprobs=0.5 * torch.ones(BATCH, SEQ),
-        ref_logprobs=torch.ones(BATCH, SEQ),
-        advantages=torch.zeros(BATCH),
-        clamp_eps_lower=0.1,
-        clamp_eps_upper=0.1,
-        kl_beta=0.1,
-        entropy_weight=0.0,
-    )
-    assert truncated_from_above.float().mean() == 1
-    assert truncated_from_below.float().sum() == 0
-
-    # All ratios are 0.01
-    _, _, _, _, truncated_from_above, truncated_from_below = rl_utils.calculate_grpo_loss(
-        current_logprobs=0.01 * torch.ones(BATCH, SEQ),
-        old_logprobs=torch.ones(BATCH, SEQ),
-        ref_logprobs=torch.ones(BATCH, SEQ),
-        advantages=torch.zeros(BATCH),
-        clamp_eps_lower=0.1,
-        clamp_eps_upper=0.1,
-        kl_beta=0.1,
-        entropy_weight=0.0,
-    )
-    assert truncated_from_above.float().sum() == 0
-    assert truncated_from_below.float().mean() == 1
-
-    current_logprobs = torch.tensor([[1.0, 1.0], [1.0, 1.0]])
-    old_logprobs = torch.tensor([[0.5, 2.0], [0.05, 1.0]])
-    _, _, _, _, truncated_from_above, truncated_from_below = rl_utils.calculate_grpo_loss(
-        current_logprobs=current_logprobs,
-        old_logprobs=old_logprobs,
-        ref_logprobs=old_logprobs,
-        advantages=torch.zeros(BATCH),
-        clamp_eps_lower=0.1,
-        clamp_eps_upper=0.1,
-        kl_beta=0.1,
-        entropy_weight=0.0,
-    )
-    # ratios: [[2., 0.5],[20., 1.]]
-    torch.testing.assert_close(truncated_from_above, torch.tensor([[True, False], [True, False]]))
-    torch.testing.assert_close(truncated_from_below, torch.tensor([[False, True], [False, False]]))
-
-
-@patch('megatron.rl.rl_utils.mpu')
-def test_prepare_data_for_update(mock_mpu):
-    """Test that getting logprobs at least does not crash."""
-    mock_mpu.get_expert_data_parallel_world_size.return_value = 0
-    # We use args inside of get_logprobs, we need to initialize them.
-
-    args = arguments.parse_args(ignore_unknown_args=True)
-    setattr(args, 'data_parallel_size', 1)
-    setattr(args, 'micro_batch_size', 2)
-    setattr(args, 'global_batch_size', 2)
-    setattr(args, 'seq_length', 4)
-    setattr(args, 'curr_iteration', 1)
-    global_vars.unset_global_variables()
-    global_vars.set_global_variables(args, build_tokenizer=False)
-
-    model = MockModel()
-    tokenizer = MockTokenizer()
-
-    r1 = TokenRollout(
-        trajectory=[1, 2, 3],
-        reward=3.14,
-        generation_mask=[False, True, True],
-        logprobs=[0.1, 0.2, 0.3],
-        env_id='MEGAENV',
-        problem_id="2",
-    )
-    r2 = TokenRollout(
-        trajectory=[1, 2, 3, 4],
-        reward=0.14,
-        generation_mask=[False, True, True, True],
-        logprobs=[0.1, 0.2, 0.3, -1.2],
-        env_id='MEGAENV',
-        problem_id="2",
-    )
-    rollouts = [[r1, r2]]
-    try:
-        data_iter = rl_utils.prepare_data_for_update([model], {}, rollouts, tokenizer)
-    except AssertionError as e:
-        # We expect trajectories to come padded there.
-        assert str(e).startswith('Rollout is not the correct length')
-
-    r1 = TokenRollout(
-        trajectory=torch.Tensor([1, 2, 3, tokenizer.eod]).cuda(),
-        reward=3.14,
-        generation_mask=torch.Tensor([False, True, True, True]).cuda(),
-        logprobs=torch.Tensor([-0.2, -0.3, -3.2]).cuda(),
-        env_id='MEGAENV',
-        problem_id="2",
-    )
-    r2 = TokenRollout(
-        trajectory=torch.Tensor([1, 2, 234, tokenizer.eod]).cuda(),
-        reward=0.14,
-        generation_mask=torch.Tensor([False, True, True, True]).cuda(),
-        logprobs=torch.Tensor([-0.2, -0.3, -1.2]),
-        env_id='MEGAENV',
-        problem_id="2",
-    )
-    rollouts = [[r1, r2]]
-    data_iter = rl_utils.prepare_data_for_update([model], {}, rollouts, tokenizer)
-
-    _, _, old_logprobs, _, _, _, _ = next(data_iter)
-    # All logits are ones in the MockModel.
-    # All probabilities should be uniform.
-    torch.testing.assert_close(old_logprobs.exp(), torch.ones_like(old_logprobs) / VOCAB)
-
-
-def test_sequence_packing_basic():
-    """Test basic sequence packing functionality."""
-    # Initialize args as required by SequencePacker
-    args = arguments.parse_args(ignore_unknown_args=True)
-    setattr(args, 'seq_length', 16)
-    global_vars.set_args(args)
-
-    tokenizer = MockTokenizer()
-    bin_size = 16
-    packer = rl_utils.SequencePacker(bin_size=bin_size, pad_token=tokenizer.pad)
-
-    # Create test sequences of varying lengths, all padded to same length
-    max_len = 5
-    sequences = [
-        torch.cat(
-            [
-                torch.tensor([1, 2, 3, tokenizer.eod]),
-                torch.full((1,), tokenizer.pad, dtype=torch.long),
-            ]
-        ),  # length 4 -> 5
-        torch.cat(
-            [torch.tensor([4, 5, tokenizer.eod]), torch.full((2,), tokenizer.pad, dtype=torch.long)]
-        ),  # length 3 -> 5
-        torch.tensor([6, 7, 8, 9, tokenizer.eod]),  # length 5
-        torch.cat(
-            [torch.tensor([10, tokenizer.eod]), torch.full((3,), tokenizer.pad, dtype=torch.long)]
-        ),  # length 2 -> 5
-    ]
-
-    generation_masks = torch.tensor(
-        [
-            [False, True, True, True, False],  # Matches padded length
-            [False, True, True, False, False],
-            [False, True, True, True, True],
-            [False, True, False, False, False],
-        ]
-    )
-
-    rewards = torch.tensor([1.0, 2.0, 3.0, 4.0])
-
-    # Pack sequences
-    packed_trajs, packed_position_ids, packed_attention_mask, packed_loss_mask, packing_info = (
-        packer.pack_sequences(sequences, generation_masks)
-    )
-
-    # Verify packed data structure
-    assert packed_trajs is not None
-    assert packed_position_ids is not None
-    assert packed_attention_mask is not None
-    assert packed_loss_mask is not None
-    assert packing_info is not None
-
-    # Check that sequences fit in bins properly
-    # The packer trims sequences to their actual length (removing padding)
-    # Actual lengths: 4, 3, 5, 2 = 14 total tokens
-    # With bin_size=16, this should fit in 1 bin
-    assert packed_trajs.shape[0] >= 1  # At least one bin
-    assert packed_trajs.shape[1] == bin_size
-
-    # Verify position_ids are correct
-    for bin_idx in range(packed_trajs.shape[0]):
-        # Check that position_ids reset for each sequence in the bin
-        for i in range(packed_trajs.shape[1]):
-            if i == 0 or packed_trajs[bin_idx, i - 1] == tokenizer.eod:
-                # Start of a new sequence
-                if packed_trajs[bin_idx, i] != tokenizer.pad:
-                    assert packed_position_ids[bin_idx, i] == 0
-
-
-def test_sequence_packing_with_generation_masks():
-    """Test sequence packing with generation masks."""
-    # Initialize args as required by SequencePacker
-    args = arguments.parse_args(ignore_unknown_args=True)
-    setattr(args, 'seq_length', 20)
-    global_vars.set_args(args)
-
-    tokenizer = MockTokenizer()
-    bin_size = 20
-    packer = rl_utils.SequencePacker(bin_size=bin_size, pad_token=tokenizer.pad)
-
-    # Create test data with generation masks
-    sequences = [torch.tensor([1, 2, 3, tokenizer.eod]), torch.tensor([4, 5, 6, 7, tokenizer.eod])]
-
-    # Pad sequences to same length for stacking
-    max_len = max(len(s) for s in sequences)
-    padded_sequences = []
-    for seq in sequences:
-        padded = torch.cat([seq, torch.full((max_len - len(seq),), tokenizer.pad, dtype=seq.dtype)])
-        padded_sequences.append(padded)
-
-    generation_masks = torch.tensor(
-        [
-            [False, True, True, True, False],  # Padded to match max_len
-            [False, True, True, True, True],
-        ]
-    )
-
-    # Pack sequences
-    packed_trajs, packed_position_ids, packed_attention_mask, packed_loss_mask, packing_info = (
-        packer.pack_sequences(padded_sequences, generation_masks)
-    )
-
-    # Verify packed tensors
-    assert packed_trajs.shape[0] == 1  # One bin
-    assert packed_trajs.shape[1] == bin_size
-
-    # Check that loss mask is set correctly for generation tokens
-    # The loss mask should be 1 for generation tokens and 0 for padding/prompt
-
-
-def test_sequence_packing_empty_bins():
-    """Test that empty bins are created correctly."""
-    # Initialize args if needed
-    args = arguments.parse_args(ignore_unknown_args=True)
-    setattr(args, 'seq_length', 8)
-    global_vars.set_args(args)
-
-    tokenizer = MockTokenizer()
-    bin_size = 8
-    num_empty_bins = 3
-
-    # Create a simple packed data structure
-    packed_trajs = torch.tensor(
-        [[1, 2, 3, tokenizer.eod, tokenizer.pad, tokenizer.pad, tokenizer.pad, tokenizer.pad]]
-    )
-    packed_position_ids = torch.tensor([[0, 1, 2, 3, 0, 0, 0, 0]])
-    packed_loss_mask = torch.tensor([[1, 1, 1, 1, 0, 0, 0, 0]], dtype=torch.float)
-    packed_attention_mask = torch.ones(1, bin_size, bin_size)  # Simple full attention mask
-
-    # Create empty bins
-    empty_trajs, empty_position_ids, empty_loss_mask, empty_attention_mask, empty_packing_info = (
-        rl_utils.create_empty_bins(
-            num_empty_bins=num_empty_bins,
-            bin_size=bin_size,
-            packed_trajs=packed_trajs,
-            packed_position_ids=packed_position_ids,
-            packed_loss_mask=packed_loss_mask,
-            packed_attention_mask=packed_attention_mask,
-            tokenizer=tokenizer,
-        )
-    )
-
-    # Verify shapes
-    assert empty_trajs.shape[0] == num_empty_bins
-    assert empty_trajs.shape[1] == bin_size
-
-    # Check that empty bins are filled with padding
-    for i in range(num_empty_bins):
-        assert torch.all(empty_trajs[i] == tokenizer.pad)
-        assert torch.all(empty_position_ids[i] == 0)
-        assert torch.all(empty_loss_mask[i] == 0)
-
-    # Verify packing info for empty bins
-    assert len(empty_packing_info) == num_empty_bins
-    for info in empty_packing_info:
-        assert len(info['bin_seq_indices']) == 0  # No sequences in empty bins
-        assert len(info['seq_starts']) == 0  # No sequence starts
-
-
-def test_prepare_trajectories_with_sequence_packing():
-    """Test prepare_trajectories with sequence packing enabled."""
-    # Set up args with sequence packing
-    args = arguments.parse_args(ignore_unknown_args=True)
-    setattr(args, 'rl_use_sequence_packing', True)
-    setattr(args, 'rl_sequence_packing_bin_size', 16)
-    setattr(args, 'data_parallel_size', 1)
-    setattr(args, 'micro_batch_size', 2)
-    setattr(args, 'global_batch_size', 2)
-    setattr(args, 'seq_length', 16)
-    setattr(args, 'curr_iteration', 1)
-    global_vars.unset_global_variables()
-    global_vars.set_global_variables(args, build_tokenizer=False)
-
-    tokenizer = MockTokenizer()
-
-    # Create rollouts of varying lengths
-    r1 = TokenRollout(
-        trajectory=[1, 2, tokenizer.eod],
-        reward=3.14,
-        generation_mask=[False, True, True],
-        logprobs=[0.1, 0.2, 0.3],
-        env_id='MEGAENV',
-        problem_id="1",
-    )
-    r2 = TokenRollout(
-        trajectory=[4, 5, 6, 7, tokenizer.eod],
-        reward=0.14,
-        generation_mask=[False, True, True, True, True],
-        logprobs=[0.4, 0.5, 0.6, 0.7, 0.8],
-        env_id='MEGAENV',
-        problem_id="2",
-    )
-    r3 = TokenRollout(
-        trajectory=[8, 9, tokenizer.eod],
-        reward=2.71,
-        generation_mask=[False, True, True],
-        logprobs=[0.9, 1.0, 1.1],
-        env_id='MEGAENV',
-        problem_id="3",
-    )
-
-    rollouts = [[r1, r2, r3]]
-    seq_len = 16
-
-    # Call prepare_trajectories with sequence packing
-    trajs, genmask, inference_logprobs = rl_utils.prepare_trajectories(rollouts, tokenizer, seq_len)
-
-    # With sequence packing enabled but called from prepare_trajectories,
-    # it might still return individual sequences (not packed into bins yet)
-    # because the actual packing happens later in prepare_data_for_update
-    assert trajs.shape[0] == 3  # Three sequences
-    assert trajs.shape[1] == seq_len
-
-    # Verify that each sequence is properly padded
-    # Sequence 1: [1, 2, eod, pad] + padding
-    assert trajs[0, 0] == 1
-    assert trajs[0, 1] == 2
-    assert trajs[0, 2] == tokenizer.eod
-    assert trajs[0, 3] == tokenizer.pad
-
-    # Sequence 2: [4, 5, 6, 7, eod, pad] + padding
-    assert trajs[1, 0] == 4
-    assert trajs[1, 1] == 5
-    assert trajs[1, 4] == tokenizer.eod
-    assert trajs[1, 5] == tokenizer.pad
-
-
-def test_sequence_packing_integration():
-    """Simple integration test for sequence packing - just verifies the packing works."""
-    # Initialize minimal args needed for SequencePacker
-    args = arguments.parse_args(ignore_unknown_args=True)
-    setattr(args, 'seq_length', 16)
-    global_vars.set_args(args)
-
-    tokenizer = MockTokenizer()
-    bin_size = 16
-
-    # Test that we can pack sequences and get expected outputs
-    packer = rl_utils.SequencePacker(bin_size=bin_size, pad_token=tokenizer.pad)
-
-    # Create test data - need to pad to same length for stacking
-    max_len = 5
-    sequences = [
-        torch.cat(
-            [
-                torch.tensor([1, 2, 3, tokenizer.eod]),
-                torch.full((1,), tokenizer.pad, dtype=torch.long),
-            ]
-        ),  # length 4 -> 5
-        torch.cat(
-            [torch.tensor([4, 5, tokenizer.eod]), torch.full((2,), tokenizer.pad, dtype=torch.long)]
-        ),  # length 3 -> 5
-        torch.tensor([6, 7, 8, 9, tokenizer.eod]),  # length 5
-    ]
-    generation_masks = [
-        torch.tensor([False, True, True, True, False]),
-        torch.tensor([False, True, True, False, False]),
-        torch.tensor([False, True, True, True, True]),
-    ]
-
-    # Pack sequences
-    packed_trajs, packed_position_ids, packed_attention_mask, packed_loss_mask, packing_info = (
-        packer.pack_sequences(sequences, generation_masks)
-    )
-
-    # Basic assertions
-    assert packed_trajs is not None
-    assert packed_trajs.shape[1] == bin_size  # Each bin should be bin_size
-    assert packed_position_ids.shape == packed_trajs.shape
-    assert packed_loss_mask.shape == packed_trajs.shape
-
-    # Verify the sequences are packed correctly
-    # Total length: 4 + 3 + 5 = 12, should fit in 1 bin
-    assert packed_trajs.shape[0] == 1
-
-    # The packer sorts sequences by length (descending), so order is: seq3 (len 5), seq1 (len 4), seq2 (len 3)
-    expected_start = torch.tensor(
-        [6, 7, 8, 9, tokenizer.eod, 1, 2, 3, tokenizer.eod, 4, 5, tokenizer.eod]
-    )
-    assert torch.all(packed_trajs[0, :12] == expected_start)
-
-    # Rest should be padding
-    assert torch.all(packed_trajs[0, 12:] == tokenizer.pad)
diff --git a/tests/unit_tests/transformer/moe/test_token_dispatcher.py b/tests/unit_tests/transformer/moe/test_token_dispatcher.py
index 6a155920e2f..4b4cfa567c5 100644
--- a/tests/unit_tests/transformer/moe/test_token_dispatcher.py
+++ b/tests/unit_tests/transformer/moe/test_token_dispatcher.py
@@ -417,10 +417,7 @@ def is_hybrid_ep_available():
     return HAVE_HYBRIDEP
 
 
-@pytest.mark.skipif(
-    not is_deep_ep_available() and not is_hybrid_ep_available(),
-    reason="Deep EP and Hybrid EP are not available",
-)
+@pytest.mark.skipif(True, reason="Deep EP and Hybrid EP are not available")
 class TestFlexDispatcher:
     def setup_method(self, method):
         pass
diff --git a/tools/run_inference_performance_test.py b/tools/run_inference_performance_test.py
index dda2b8284b3..01e5ab58898 100644
--- a/tools/run_inference_performance_test.py
+++ b/tools/run_inference_performance_test.py
@@ -24,8 +24,9 @@
 from megatron.core.inference.text_generation_controllers.text_generation_controller import (
     TextGenerationController,
 )
+from megatron.core.ssm.mamba_hybrid_layer_allocation import Symbols
 from megatron.core.transformer.module import MegatronModule
-from megatron.core.utils import get_mamba_inference_state_config_from_model
+from megatron.core.utils import get_attr_wrapped_model
 from model_provider import model_provider
 
 sys.path.append(
@@ -88,7 +89,14 @@ def get_inference_engine(args: argparse.Namespace, model: MegatronModule) -> Abs
         moe_pad_experts_for_cuda_graph_inference=args.moe_pad_experts_for_cuda_graph_inference,
     )
 
-    mamba_inference_state_config = get_mamba_inference_state_config_from_model(model)
+    # Layer type list for hybrid models
+    decoder = get_attr_wrapped_model(model, "decoder")
+    layer_type_list = getattr(decoder, "layer_type_list", None)
+    if layer_type_list is not None and Symbols.MAMBA in layer_type_list:
+        (mamba_conv_states_shape, mamba_ssm_states_shape) = decoder.mamba_state_shapes_per_request()
+    else:
+        mamba_conv_states_shape = None
+        mamba_ssm_states_shape = None
 
     if args.engine_type == "static":
         inference_wrapped_model = GPTInferenceWrapper(model, inference_wrapper_config)
@@ -121,7 +129,9 @@ def get_inference_engine(args: argparse.Namespace, model: MegatronModule) -> Abs
             block_size_tokens=args.inference_dynamic_batching_block_size,
             tensor_model_parallel_size=args.tensor_model_parallel_size,
             materialize_only_last_token_logits=not args.return_log_probs,
-            mamba_inference_state_config=mamba_inference_state_config,
+            layer_type_list=layer_type_list,
+            mamba_conv_states_shape=mamba_conv_states_shape,
+            mamba_ssm_states_shape=mamba_ssm_states_shape,
             cache_mla_latent=args.multi_latent_attention and args.cache_mla_latents,
             kv_lora_rank=args.kv_lora_rank if args.multi_latent_attention else None,
             qk_pos_emb_head_dim=args.qk_pos_emb_head_dim,
diff --git a/train_rl.py b/train_rl.py
index bf632d81e2c..479498d392a 100644
--- a/train_rl.py
+++ b/train_rl.py
@@ -191,7 +191,7 @@ def forward_step(data_iterator, model: GPTModel, loss_only: bool = False):
     seq_lengths = None
     attention_mask = None
 
-    if args.rl_use_sequence_packing:
+    if args.use_sequence_packing:
         # Get bin index from data iterator
         bin_tensor = batch_data[0]
         bin_idx = bin_tensor.item()

From 6ca67bc4a345d56fc047998b32b8c807d84c7402 Mon Sep 17 00:00:00 2001
From: Li Tao <lit@nvidia.com>
Date: Mon, 1 Dec 2025 11:45:40 +0800
Subject: [PATCH 162/334] [Dev] Support packed seq in MTP (#2043)

Signed-off-by: Li Tao <lit@nvidia.com>
Signed-off-by: lit <lit@nvidia.com>
---
 megatron/core/models/gpt/gpt_model.py         |  14 +-
 .../transformer/multi_token_prediction.py     | 118 +++++++++-
 .../test_multi_token_prediction.py            | 208 +++++++++++++++++-
 3 files changed, 331 insertions(+), 9 deletions(-)

diff --git a/megatron/core/models/gpt/gpt_model.py b/megatron/core/models/gpt/gpt_model.py
index e840fca99b3..ce1e8e76bd9 100644
--- a/megatron/core/models/gpt/gpt_model.py
+++ b/megatron/core/models/gpt/gpt_model.py
@@ -575,9 +575,19 @@ def _postprocess(
                     runtime_gather_output=runtime_gather_output,
                 )
                 # Calc loss for the current Multi-Token Prediction (MTP) layers.
-                mtp_labels, _ = roll_tensor(mtp_labels, shifts=-1, dims=-1, cp_group=self.cp_group)
+                mtp_labels, _ = roll_tensor(
+                    mtp_labels,
+                    shifts=-1,
+                    dims=-1,
+                    cp_group=self.cp_group,
+                    packed_seq_params=packed_seq_params,
+                )
                 loss_mask, num_tokens = roll_tensor(
-                    loss_mask, shifts=-1, dims=-1, cp_group=self.cp_group
+                    loss_mask,
+                    shifts=-1,
+                    dims=-1,
+                    cp_group=self.cp_group,
+                    packed_seq_params=packed_seq_params,
                 )
                 mtp_loss = self.compute_language_model_loss(mtp_labels, mtp_logits)
                 mtp_loss = loss_mask * mtp_loss
diff --git a/megatron/core/transformer/multi_token_prediction.py b/megatron/core/transformer/multi_token_prediction.py
index e79af23ef04..a8f4abfcdd3 100755
--- a/megatron/core/transformer/multi_token_prediction.py
+++ b/megatron/core/transformer/multi_token_prediction.py
@@ -126,7 +126,7 @@ def tie_output_layer_state_dict(
     )
 
 
-def roll_tensor(tensor, shifts=-1, dims=-1, cp_group=None):
+def roll_tensor(tensor, shifts=-1, dims=-1, cp_group=None, packed_seq_params=None):
     """Roll the tensor input along the sequence dimension with Context Parallelism (CP) support.
 
     This function extends the original roll_tensor to support Context Parallelism, which allows
@@ -138,15 +138,24 @@ def roll_tensor(tensor, shifts=-1, dims=-1, cp_group=None):
     For CP>1: Splits tensor into chunks, performs rolling within each chunk, then exchanges
     boundary elements between adjacent CP ranks to maintain sequence continuity.
 
+    For packed sequences: Respects sequence boundaries when rolling to avoid mixing tokens
+    from different sequences.
+
     Args:
         tensor (Tensor): The input tensor to roll.
         shifts (int): The shift of the tensor (typically -1 for MTP).
         dims (int): The dimension to roll (typically -1 for sequence dimension).
         cp_group (ProcessGroup): The context parallelism process group. If None or size=1,
                                falls back to standard rolling behavior.
+        packed_seq_params (PackedSeqParams): Parameters for packed sequence processing.
+                                            If provided, respects sequence boundaries.
     Returns:
         tuple: (rolled_tensor, sum_of_rolled_tensor)
     """
+    # Handle packed sequences cases
+    if packed_seq_params is not None:
+        return _roll_tensor_packed_seq(tensor, shifts, dims, packed_seq_params, cp_group)
+
     # Standard rolling behavior when CP is not enabled (cp_group is None or size=1)
     if cp_group is None or cp_group.size() == 1:
         rolled_tensor = torch.roll(tensor, shifts=shifts, dims=dims)
@@ -215,6 +224,91 @@ def roll_tensor(tensor, shifts=-1, dims=-1, cp_group=None):
     return rolled_tensor, rolled_tensor.sum()
 
 
+def _roll_tensor_packed_seq(tensor, shifts, dims, packed_seq_params, cp_group=None):
+    """Roll tensor with packed sequence support.
+    This function handles rolling for packed sequences by respecting sequence boundaries
+    """
+
+    # Notice: This is a naive implementation to test the correctness,
+    # a better solution will only sync the boundary tokens once.
+    assert (
+        dims == -1 or dims == tensor.dim() - 1
+    ), "Packed sequence roll only supports the last dimension."
+    assert shifts == -1, "Packed sequence roll only supports a single-token left shift."
+    cu_seqlens = packed_seq_params.cu_seqlens_q
+    assert cu_seqlens is not None, "Packed sequence parameters must provide cu_seqlens_q."
+
+    rolled_tensor = tensor.clone()
+
+    cp_size = cp_group.size() if cp_group is not None else 1
+    if cp_size == 1:
+        # CP disabled: roll each packed sequence independently within its boundaries
+        for i in range(len(cu_seqlens) - 1):
+            start_idx = cu_seqlens[i]
+            end_idx = cu_seqlens[i + 1]
+            seq_slice = tensor[..., start_idx:end_idx]
+            rolled_seq = torch.roll(seq_slice, shifts=shifts, dims=dims)
+            # Zero out the last position(s) that would cross sequence boundaries
+            rolled_seq[..., shifts:] = 0
+            rolled_tensor[..., start_idx:end_idx] = rolled_seq
+        return rolled_tensor, rolled_tensor.sum()
+
+    # CP enabled: each rank owns two chunks per sequence (front and mirrored tail).
+    local_rank = torch.distributed.get_rank(group=cp_group)
+    global_ranks = torch.distributed.get_process_group_ranks(group=cp_group)
+    next_rank = global_ranks[(local_rank + 1) % cp_size]
+    prev_rank = global_ranks[(local_rank - 1) % cp_size]
+
+    # Iterate over each sequence individually
+    for i in range(len(cu_seqlens) - 1):
+        start_idx = cu_seqlens[i]
+        end_idx = cu_seqlens[i + 1]
+
+        # the idx has been multiplied by cp_size, need to divide it by cp_size to get the local idx
+        local_start_idx = start_idx // cp_size
+        local_end_idx = end_idx // cp_size
+        tensor_slice = rolled_tensor[..., local_start_idx:local_end_idx].clone()
+
+        # The following code is very similar as the code in roll_tensor function
+        local_chunks = tensor_slice.chunk(2, dim=dims)
+        rolled_chunks = [torch.roll(chunk, shifts=shifts, dims=dims) for chunk in local_chunks]
+
+        tensor_send_list = []
+        tensor_recv_list = []
+        for chunk in rolled_chunks:
+            boundary = chunk.select(dims, shifts).contiguous().clone()
+            tensor_send_list.append(boundary)
+            tensor_recv_list.append(torch.empty_like(boundary))
+
+        ops = []
+        if local_rank != 0:
+            ops.append(torch.distributed.isend(tensor=tensor_send_list[0], dst=prev_rank))
+            ops.append(torch.distributed.irecv(tensor=tensor_recv_list[1], src=prev_rank))
+        else:
+            tensor_recv_list[1].zero_()
+
+        if local_rank != cp_size - 1:
+            ops.append(torch.distributed.irecv(tensor=tensor_recv_list[0], src=next_rank))
+            ops.append(torch.distributed.isend(tensor=tensor_send_list[1], dst=next_rank))
+        else:
+            tensor_recv_list[0].copy_(tensor_send_list[1])
+
+        for op in ops:
+            op.wait()
+
+        index = [slice(None)] * rolled_chunks[0].dim()
+        index[dims] = shifts
+        for chunk, recv in zip(rolled_chunks, tensor_recv_list):
+            chunk[tuple(index)] = recv
+
+        seq_result = torch.cat(rolled_chunks, dim=dims)
+
+        # update the rolled tensor
+        rolled_tensor[..., local_start_idx:local_end_idx] = seq_result
+
+    return rolled_tensor, rolled_tensor.sum()
+
+
 class MTPLossLoggingHelper:
     """Helper class for logging MTP losses."""
 
@@ -595,6 +689,7 @@ def _get_embeddings(
         position_ids: torch.Tensor,
         embedding: Callable,
         hidden_states: torch.Tensor,
+        packed_seq_params: Optional[PackedSeqParams] = None,
     ):
         """
         Preprocesses input data for the Multi-Token Prediction (MTP) layers.
@@ -609,10 +704,23 @@ def _get_embeddings(
                 from gpt model to compute the decoder input.
             hidden_states (torch.Tensor): hidden states tensor of shape [s, b, h] where s is the
                 sequence length, b is the batch size, and h is the hidden size.
+            packed_seq_params (PackedSeqParams): Parameters for packed sequence processing.
         """
         # Calc logits for the current Multi-Token Prediction (MTP) layers.
-        input_ids, _ = roll_tensor(input_ids, shifts=-1, dims=-1, cp_group=self.cp_group)
-        position_ids, _ = roll_tensor(position_ids, shifts=-1, dims=-1, cp_group=self.cp_group)
+        input_ids, _ = roll_tensor(
+            input_ids,
+            shifts=-1,
+            dims=-1,
+            cp_group=self.cp_group,
+            packed_seq_params=packed_seq_params,
+        )
+        position_ids, _ = roll_tensor(
+            position_ids,
+            shifts=-1,
+            dims=-1,
+            cp_group=self.cp_group,
+            packed_seq_params=packed_seq_params,
+        )
         # embedding
         decoder_input = embedding(input_ids=input_ids, position_ids=position_ids)
 
@@ -795,15 +903,13 @@ def forward(
             [s, b, h], and optionally the updated context tensor if cross-attention is used.
         """
         assert context is None, f"multi token prediction + cross attention is not yet supported."
-        assert (
-            packed_seq_params is None
-        ), f"multi token prediction + sequence packing is not yet supported."
 
         input_ids, position_ids, decoder_input, hidden_states = self._get_embeddings(
             input_ids=input_ids,
             position_ids=position_ids,
             embedding=embedding,
             hidden_states=hidden_states,
+            packed_seq_params=packed_seq_params,
         )
 
         if self.config.recompute_granularity == 'full' and self.training:
diff --git a/tests/unit_tests/transformer/test_multi_token_prediction.py b/tests/unit_tests/transformer/test_multi_token_prediction.py
index 9b9d2c67881..ddfa9bfba16 100644
--- a/tests/unit_tests/transformer/test_multi_token_prediction.py
+++ b/tests/unit_tests/transformer/test_multi_token_prediction.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 import os
 import sys
@@ -14,11 +14,14 @@
 )
 from megatron.core.models.gpt.gpt_model import GPTModel
 from megatron.core.num_microbatches_calculator import destroy_num_microbatches_calculator
+from megatron.core.packed_seq_params import PackedSeqParams
+from megatron.core.parallel_state import get_context_parallel_group
 from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.tensor_parallel.random import model_parallel_cuda_manual_seed
 from megatron.core.transformer.multi_token_prediction import (
     MTPLossLoggingHelper,
     MultiTokenPredictionBlock,
+    roll_tensor,
 )
 from megatron.core.transformer.transformer_config import TransformerConfig
 from megatron.core.utils import is_te_min_version
@@ -245,6 +248,66 @@ def get_batch(self, seq_length, micro_batch_size):
         }
         return batch
 
+    def get_packed_batch(self, seq_lengths, micro_batch_size):
+        """
+        Create a packed sequence batch with multiple sequences of varying lengths.
+
+        Args:
+            seq_lengths: List of sequence lengths (e.g., [10, 15, 8] for 3 sequences)
+            micro_batch_size: Batch size (typically 1 for packed sequences)
+
+        Returns:
+            batch: Dictionary containing packed sequences and PackedSeqParams
+        """
+        total_seq_length = sum(seq_lengths)
+
+        # Create packed input_ids, labels, and position_ids
+        input_ids_list = []
+        labels_list = []
+        position_ids_list = []
+
+        for seq_len in seq_lengths:
+            data = list(range(seq_len))
+            input_ids_list.extend(data)
+            labels_list.extend([x + 1 for x in data])
+            position_ids_list.extend(data)
+
+        # Convert to tensors with shape [batch, total_seq_length]
+        input_ids = torch.tensor(input_ids_list, dtype=torch.int64).unsqueeze(0).cuda()
+        labels = torch.tensor(labels_list, dtype=torch.int64).unsqueeze(0).cuda()
+        position_ids = torch.tensor(position_ids_list, dtype=torch.int64).unsqueeze(0).cuda()
+
+        # Create attention mask for packed sequences (all ones for simplicity)
+        attention_mask = torch.ones(
+            (micro_batch_size, 1, total_seq_length, total_seq_length), dtype=bool
+        ).cuda()
+
+        # Create loss mask with shape [batch, total_seq_length]
+        loss_mask = torch.ones(micro_batch_size, total_seq_length).cuda()
+
+        # Create cumulative sequence lengths for PackedSeqParams
+        cu_seqlens = torch.tensor(
+            [0] + [sum(seq_lengths[: i + 1]) for i in range(len(seq_lengths))], dtype=torch.int32
+        ).cuda()
+
+        packed_seq_params = PackedSeqParams(
+            cu_seqlens_q=cu_seqlens,
+            cu_seqlens_kv=cu_seqlens,
+            max_seqlen_q=max(seq_lengths),
+            max_seqlen_kv=max(seq_lengths),
+            qkv_format='thd',
+        )
+
+        batch = {
+            'tokens': input_ids,
+            'labels': labels,
+            'loss_mask': loss_mask,
+            'attention_mask': attention_mask,
+            'position_ids': position_ids,
+            'packed_seq_params': packed_seq_params,
+        }
+        return batch
+
     @pytest.mark.skipif(
         not HAVE_TE or not is_te_min_version("2.1.0"),
         reason="grouped_gemm requires TransformerEngine >= 2.1.0",
@@ -404,6 +467,149 @@ def test_fp8_support(self, full_recompute):
         loss = output.mean()
         loss.backward()
 
+    @pytest.mark.skipif(
+        not HAVE_TE or not is_te_min_version("2.1.0"),
+        reason="grouped_gemm requires TransformerEngine >= 2.1.0",
+    )
+    @pytest.mark.parametrize(("tp", "cp"), [(1, 1), (2, 1), (2, 2)])
+    def test_packed_sequences(self, tp, cp):
+        """Test MTP with packed sequences."""
+        # Create args with packed sequences support
+        seq_lengths = [16, 24, 12]  # Three sequences of different lengths
+        total_seq_length = sum(seq_lengths)
+
+        args = self.create_test_args(tp, cp, total_seq_length, micro_batch_size=1)
+        set_args(args)
+
+        torch.manual_seed(_SEED)
+        Utils.initialize_model_parallel(tensor_model_parallel_size=tp, context_parallel_size=cp)
+
+        # Get packed batch
+        batch = self.get_packed_batch(seq_lengths, micro_batch_size=1)
+        tokens = batch['tokens']
+        labels = batch['labels']
+        loss_mask = batch['loss_mask']
+        attention_mask = batch['attention_mask']
+        position_ids = batch['position_ids']
+        packed_seq_params = batch['packed_seq_params']
+
+        # Create model
+        gpt_model, optimizer, opt_param_scheduler = setup_model_and_optimizer(
+            self.model_provider, ModelType.encoder_or_decoder
+        )
+
+        # Forward pass with packed sequences
+        output = gpt_model[0].forward(
+            input_ids=tokens,
+            position_ids=position_ids,
+            attention_mask=attention_mask,
+            labels=labels,
+            loss_mask=loss_mask,
+            packed_seq_params=packed_seq_params,
+        )
+
+        # Verify output shape
+        assert output.shape[0] == 1  # batch size
+        assert output.shape[1] == total_seq_length
+
+        # Verify MTP loss was computed
+        tracker = MTPLossLoggingHelper.tracker
+        assert "values" in tracker
+        mtp_loss = tracker['values'].clone()
+        assert mtp_loss.shape[0] == args.mtp_num_layers
+        MTPLossLoggingHelper.clean_loss_in_tracker()
+
+        # Backward pass
+        loss = output.mean()
+        loss.backward()
+
+        # Verify gradients exist
+        for name, param in gpt_model[0].named_parameters():
+            assert param.main_grad is not None, f"Gradient missing for {name}"
+
+    @pytest.mark.parametrize("cp", [1, 2])
+    def test_roll_tensor_with_packed_sequences(self, cp):
+        """Test roll_tensor function with packed sequences, with and without CP.
+
+        For CP=1: Tests standard packed sequence rolling with verified expected values
+        For CP=2: Tests CP-enabled rolling executes without errors
+        """
+        Utils.initialize_model_parallel(tensor_model_parallel_size=1, context_parallel_size=cp)
+        cp_group = get_context_parallel_group() if cp > 1 else None
+        cp_rank = torch.distributed.get_rank(group=cp_group) if cp_group is not None else 0
+
+        if cp == 1:
+            # Test case: Simple packed sequences (CP disabled)
+            tensor = torch.tensor([1, 2, 3, 4, 5], dtype=torch.float32).cuda()
+            cu_seqlens = torch.tensor([0, 3, 5], dtype=torch.int32).cuda()
+
+            packed_seq_params = PackedSeqParams(
+                cu_seqlens_q=cu_seqlens,
+                cu_seqlens_kv=cu_seqlens,
+                max_seqlen_q=3,
+                max_seqlen_kv=3,
+                qkv_format='thd',
+            )
+
+            # Roll by -1 (shift left)
+            rolled, sum_val = roll_tensor(
+                tensor, shifts=-1, dims=0, cp_group=cp_group, packed_seq_params=packed_seq_params
+            )
+
+            # Expected: [2, 3, 0, 5, 0] - boundaries at indices 2 and 4 are zeroed
+            expected = torch.tensor([2, 3, 0, 5, 0], dtype=torch.float32).cuda()
+            assert torch.equal(rolled, expected), f"Expected {expected}, got {rolled}"
+        else:
+            # Test case: Packed sequences with CP=2
+            # Two sequences:
+            #   seq1 = [1, 2, 3, 4, 5, 6, 7, 8]
+            #   seq2 = [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]
+
+            if cp_rank == 0:
+                # CP Rank 0: first half of each sequence
+                tensor = torch.tensor(
+                    [1, 2, 7, 8, 11, 12, 13, 20, 21, 22], dtype=torch.float32
+                ).cuda()
+                expected = torch.tensor(
+                    [2, 3, 8, 0, 12, 13, 14, 21, 22, 0], dtype=torch.float32
+                ).cuda()
+            else:
+                # CP Rank 1: second half of each sequence
+                tensor = torch.tensor(
+                    [3, 4, 5, 6, 14, 15, 16, 17, 18, 19], dtype=torch.float32
+                ).cuda()
+                expected = torch.tensor(
+                    [4, 5, 6, 7, 15, 16, 17, 18, 19, 20], dtype=torch.float32
+                ).cuda()
+
+            cu_seqlens = torch.tensor([0, 8, 20], dtype=torch.int32).cuda()
+
+            packed_seq_params = PackedSeqParams(
+                cu_seqlens_q=cu_seqlens,
+                cu_seqlens_kv=cu_seqlens,
+                max_seqlen_q=6,  # max(4, 6) - max local seq length per sequence
+                max_seqlen_kv=6,
+                qkv_format='thd',
+            )
+
+            # Roll by -1 (shift left) with CP communication
+            rolled, sum_val = roll_tensor(
+                tensor, shifts=-1, dims=0, cp_group=cp_group, packed_seq_params=packed_seq_params
+            )
+
+            # Verify the rolled tensor matches expected values
+            assert (
+                rolled.shape == expected.shape
+            ), f"Shape mismatch: expected {expected.shape}, got {rolled.shape}"
+            assert torch.equal(
+                rolled, expected
+            ), f"CP Rank {cp_rank}: Expected\n{expected}\nbut got\n{rolled}\nDiff:\n{rolled - expected}"
+
+            # Verify sum is correct
+            assert sum_val.numel() == 1, "Sum should be a scalar"
+
+        Utils.destroy_model_parallel()
+
 
 class TestMTPLossLoggingHelper:
     def setup_method(self, method):

From 11caf01283f4b3e17f12807099a1aad04ff3a9c2 Mon Sep 17 00:00:00 2001
From: Santosh Bhavani <santosh.bhavani@live.com>
Date: Sun, 30 Nov 2025 20:49:11 -0800
Subject: [PATCH 163/334] Fix runaway Etpt in straggler detector by resetting
 FLOPs accumulator (#2128)

Signed-off-by: Santosh Bhavani <santosh.bhavani@live.com>
Co-authored-by: Li Ruixiao <cgruixiao@outlook.com>
---
 megatron/training/training.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/megatron/training/training.py b/megatron/training/training.py
index 9986f931641..9fe372a3780 100644
--- a/megatron/training/training.py
+++ b/megatron/training/training.py
@@ -1929,6 +1929,7 @@ def post_training_step_callbacks(
 
     # Straggler detector.
     if iteration % args.log_interval == 0 and args.log_straggler:
+        # Use FLOPs accumulated since last log event and then reset the counter
         stimer.report(num_floating_point_operations_since_last_log_event, args.log_interval)
         num_floating_point_operations_since_last_log_event = 0.0
 
@@ -1970,6 +1971,9 @@ def post_training_step_callbacks(
         if args.manual_gc_interval != 0 and iteration % args.manual_gc_interval == 0:
             gc.collect()
 
+    # Return updated FLOPs accumulator so caller can persist the reset
+    return num_floating_point_operations_since_last_log_event
+
 
 def checkpoint_and_decide_exit(
     model,
@@ -2585,8 +2589,9 @@ def get_e2e_base_metrics():
                 energy_monitor.resume()
 
         # Miscellaneous post-training-step functions (e.g., FT heartbeats, GC).
-        # Some of these only happen at specific iterations.
-        post_training_step_callbacks(
+        # Some of these only happen at specific iterations. Capture updated FLOPs accumulator
+        # (it is reset inside the callback after logging).
+        num_floating_point_operations_since_last_log_event = post_training_step_callbacks(
             model,
             optimizer,
             opt_param_scheduler,

From 92c8482e6dcd11c3666c61bb8d1f7e8d0730ed13 Mon Sep 17 00:00:00 2001
From: Robin Zhang <robinz@nvidia.com>
Date: Mon, 1 Dec 2025 13:09:36 +0800
Subject: [PATCH 164/334] [Dev] feat(MoE): Refactor cuda_graph_scope - part2
 (#2353)

Signed-off-by: Robin Zhang <robinz@nvidia.com>
---
 .../text_generation_controller.py             |   3 +-
 .../common/language_module/language_module.py |   5 +-
 megatron/core/models/gpt/gpt_model.py         |   4 +-
 megatron/core/pipeline_parallel/schedules.py  |   7 +-
 megatron/core/ssm/mamba_block.py              |   3 +-
 megatron/core/transformer/attention.py        |   4 +-
 megatron/core/transformer/cuda_graphs.py      |  47 +++++--
 megatron/core/transformer/enums.py            |  12 ++
 megatron/core/transformer/moe/fused_a2a.py    |   8 ++
 megatron/core/transformer/moe/moe_utils.py    |   7 +-
 .../core/transformer/moe/token_dispatcher.py  |  12 +-
 .../core/transformer/transformer_block.py     |   4 +-
 .../core/transformer/transformer_config.py    | 112 +++++++++--------
 .../core/transformer/transformer_layer.py     |  47 +++----
 megatron/training/arguments.py                |  18 ++-
 megatron/training/training.py                 |   9 +-
 .../inference/engines/test_dynamic_engine.py  |  12 +-
 tests/unit_tests/test_fp8_param.py            |  24 ++--
 .../transformer/test_cuda_graphs.py           | 117 ++++++++++++------
 19 files changed, 302 insertions(+), 153 deletions(-)

diff --git a/megatron/core/inference/text_generation_controllers/text_generation_controller.py b/megatron/core/inference/text_generation_controllers/text_generation_controller.py
index 2bda1425710..6e00f58ac23 100644
--- a/megatron/core/inference/text_generation_controllers/text_generation_controller.py
+++ b/megatron/core/inference/text_generation_controllers/text_generation_controller.py
@@ -29,6 +29,7 @@
 )
 from megatron.core.inference.sampling_params import SamplingParams
 from megatron.core.inference.utils import get_attention_mask, set_decode_expert_padding
+from megatron.core.transformer.enums import CudaGraphScope
 from megatron.core.transformer.moe.moe_layer import BaseMoELayer
 from megatron.core.transformer.utils import set_model_to_sequence_parallel
 from megatron.core.utils import get_asyncio_loop, get_model_config, unwrap_model
@@ -851,7 +852,7 @@ def generate_all_output_tokens_static_batch(
         # Check whether CUDA graphs are enabled
         enable_cuda_graph = (
             model_config.cuda_graph_impl == "local"
-            and "full_iteration" not in model_config.cuda_graph_scope
+            and CudaGraphScope.full_iteration not in model_config.cuda_graph_scope
         )
 
         # Pad batch tokens if necessary
diff --git a/megatron/core/models/common/language_module/language_module.py b/megatron/core/models/common/language_module/language_module.py
index de2ecfb8011..259bb716a93 100644
--- a/megatron/core/models/common/language_module/language_module.py
+++ b/megatron/core/models/common/language_module/language_module.py
@@ -21,7 +21,7 @@
     is_vp_last_stage,
 )
 from megatron.core.process_groups_config import ProcessGroupCollection
-from megatron.core.transformer.enums import AttnBackend
+from megatron.core.transformer.enums import AttnBackend, CudaGraphScope
 from megatron.core.transformer.module import MegatronModule
 from megatron.core.transformer.transformer_config import TransformerConfig
 from megatron.core.transformer.utils import ensure_metadata_has_dp_cp_group
@@ -144,8 +144,7 @@ def compute_language_model_loss(self, labels: Tensor, logits: Tensor) -> Tensor:
                     # Use is_cg_capturable=True for full iteration CUDA graphs to avoid torch.equal checks
                     is_cg_capturable = (
                         hasattr(self.config, 'cuda_graph_scope')
-                        and self.config.cuda_graph_scope
-                        and 'full_iteration' in self.config.cuda_graph_scope
+                        and CudaGraphScope.full_iteration in self.config.cuda_graph_scope
                     )
                     if is_cg_capturable and not is_te_min_version("2.7.0"):
                         from megatron.core.utils import get_te_version
diff --git a/megatron/core/models/gpt/gpt_model.py b/megatron/core/models/gpt/gpt_model.py
index ce1e8e76bd9..a3d1a8bfc00 100644
--- a/megatron/core/models/gpt/gpt_model.py
+++ b/megatron/core/models/gpt/gpt_model.py
@@ -24,7 +24,7 @@
 from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.quantization.utils import get_quant_config_or_none
 from megatron.core.tensor_parallel import gather_from_sequence_parallel_region
-from megatron.core.transformer.enums import ModelType
+from megatron.core.transformer.enums import CudaGraphScope, ModelType
 from megatron.core.transformer.multi_token_prediction import (
     MTPLossAutoScaler,
     MTPLossLoggingHelper,
@@ -374,7 +374,7 @@ def _preprocess(
             and (
                 (
                     self.config.cuda_graph_impl == "local"
-                    and "full_iteration" not in self.config.cuda_graph_scope
+                    and CudaGraphScope.full_iteration not in self.config.cuda_graph_scope
                 )
                 or self.config.flash_decode
             )
diff --git a/megatron/core/pipeline_parallel/schedules.py b/megatron/core/pipeline_parallel/schedules.py
index d0b912349b4..18344429c45 100644
--- a/megatron/core/pipeline_parallel/schedules.py
+++ b/megatron/core/pipeline_parallel/schedules.py
@@ -21,6 +21,7 @@
 )
 from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.transformer.cuda_graphs import create_cudagraphs
+from megatron.core.transformer.enums import CudaGraphScope
 from megatron.core.transformer.moe.router import MoEAuxLossAutoScaler
 from megatron.core.utils import (
     drain_embedding_wgrad_compute,
@@ -656,7 +657,7 @@ def forward_backward_no_pipelining(
     if (
         hasattr(config, 'cuda_graph_impl')
         and config.cuda_graph_impl == "local"
-        and "full_iteration" not in config.cuda_graph_scope
+        and CudaGraphScope.full_iteration not in config.cuda_graph_scope
     ):
         create_cudagraphs()
 
@@ -1923,7 +1924,7 @@ def pp_post_backward(input_tensor_grad, vp_stage=None):
     if (
         hasattr(config, 'cuda_graph_impl')
         and config.cuda_graph_impl == "local"
-        and "full_iteration" not in config.cuda_graph_scope
+        and CudaGraphScope.full_iteration not in config.cuda_graph_scope
     ):
         create_cudagraphs()
     nvtx_range_pop(suffix="misc")
@@ -2310,7 +2311,7 @@ def enable_grad_sync():
     if (
         hasattr(config, 'cuda_graph_impl')
         and config.cuda_graph_impl == "local"
-        and "full_iteration" not in config.cuda_graph_scope
+        and CudaGraphScope.full_iteration not in config.cuda_graph_scope
     ):
         create_cudagraphs()
 
diff --git a/megatron/core/ssm/mamba_block.py b/megatron/core/ssm/mamba_block.py
index 1bcadd0af10..3201a8bfb28 100644
--- a/megatron/core/ssm/mamba_block.py
+++ b/megatron/core/ssm/mamba_block.py
@@ -25,6 +25,7 @@
 from megatron.core.ssm.mamba_hybrid_layer_allocation import allocate_layers
 from megatron.core.tensor_parallel import get_cuda_rng_tracker
 from megatron.core.transformer import TransformerConfig
+from megatron.core.transformer.enums import CudaGraphScope
 from megatron.core.transformer.identity_op import IdentityOp
 from megatron.core.transformer.module import MegatronModule
 from megatron.core.transformer.spec_utils import ModuleSpec, build_module
@@ -294,7 +295,7 @@ def forward(
             (
                 (
                     self.config.cuda_graph_impl == "local"
-                    and "full_iteration" not in self.config.cuda_graph_scope
+                    and CudaGraphScope.full_iteration not in self.config.cuda_graph_scope
                 )
                 or self.config.flash_decode
             )
diff --git a/megatron/core/transformer/attention.py b/megatron/core/transformer/attention.py
index 74031f38219..57ba494742b 100644
--- a/megatron/core/transformer/attention.py
+++ b/megatron/core/transformer/attention.py
@@ -45,7 +45,7 @@
 from ..models.common.embeddings.yarn_rotary_pos_embedding import (
     _yarn_get_concentration_factor_from_config,
 )
-from .enums import AttnMaskType
+from .enums import AttnMaskType, CudaGraphScope
 from .transformer_config import TransformerConfig
 
 try:
@@ -828,7 +828,7 @@ def forward(
         if (
             in_decode_mode
             and self.config.cuda_graph_impl == "local"
-            and "full_iteration" not in self.config.cuda_graph_scope
+            and CudaGraphScope.full_iteration not in self.config.cuda_graph_scope
             and inference_context.is_static_batching()
         ):
             raise ValueError(f"CUDA graphs must use flash decode with static batching!")
diff --git a/megatron/core/transformer/cuda_graphs.py b/megatron/core/transformer/cuda_graphs.py
index 12f15ee980a..5b0a0333d9e 100644
--- a/megatron/core/transformer/cuda_graphs.py
+++ b/megatron/core/transformer/cuda_graphs.py
@@ -21,6 +21,7 @@
     get_all_rng_states,
     get_cuda_rng_tracker,
 )
+from megatron.core.transformer.enums import CudaGraphScope
 from megatron.core.transformer.identity_op import IdentityOp
 from megatron.core.transformer.module import GraphableMegatronModule, MegatronModule
 from megatron.core.transformer.transformer_config import TransformerConfig
@@ -1344,24 +1345,24 @@ def _layer_is_graphable(layer, config):
     from megatron.core.transformer.moe.moe_layer import MoELayer
     from megatron.core.transformer.transformer_layer import TransformerLayer
 
-    if isinstance(layer, MambaLayer) and 'mamba' in config.cuda_graph_scope:
+    if isinstance(layer, MambaLayer) and CudaGraphScope.mamba in config.cuda_graph_scope:
         # mamba layer.
         return True
     if isinstance(layer, TransformerLayer):
-        if 'attn' in config.cuda_graph_scope and not (
+        if CudaGraphScope.attn in config.cuda_graph_scope and not (
             isinstance(layer.self_attention, IdentityOp)
             and isinstance(layer.cross_attention, IdentityOp)
         ):
             # attn layer.
             return True
         if (
-            'moe' in config.cuda_graph_scope
-            or 'moe_router' in config.cuda_graph_scope
-            or 'moe_preprocess' in config.cuda_graph_scope
+            CudaGraphScope.moe in config.cuda_graph_scope
+            or CudaGraphScope.moe_router in config.cuda_graph_scope
+            or CudaGraphScope.moe_preprocess in config.cuda_graph_scope
         ) and isinstance(layer.mlp, MoELayer):
             # moe layer.
             return True
-        if 'mlp' in config.cuda_graph_scope and isinstance(layer.mlp, MLP):
+        if CudaGraphScope.mlp in config.cuda_graph_scope and isinstance(layer.mlp, MLP):
             # mlp layer.
             return True
     return False
@@ -1388,7 +1389,7 @@ def __init__(self, model, config, seq_length, micro_batch_size, optimizers=[]):
             "Setting NCCL_GRAPH_REGISTER=0 to avoid illegal memory access when using "
             "CUDA Graph with PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True."
         )
-        assert "full_iteration" not in config.cuda_graph_scope, (
+        assert CudaGraphScope.full_iteration not in config.cuda_graph_scope, (
             "full_iteration cuda graph is not supported for cuda_graph_impl=transformer_engine. "
             "Please use cuda_graph_impl=local instead."
         )
@@ -1529,7 +1530,7 @@ def get_rotary_pos_emb(transformer_module, transformer_input):
                         and not isinstance(layer.self_attention, IdentityOp)
                         and (
                             not self.config.cuda_graph_scope
-                            or 'attn' in self.config.cuda_graph_scope
+                            or CudaGraphScope.attn in self.config.cuda_graph_scope
                         )
                     )
                     if is_te_min_version("1.10.0"):
@@ -1712,3 +1713,33 @@ def cuda_graph_set_manual_hooks(self):
             model_chunk = self.model[chunk_number]
             for layer in layers:
                 layer.setup_manual_hooks(model_chunk._make_forward_pre_hook)
+
+    def delete_cuda_graphs(self):
+        """
+        Delete all CUDA graphs.
+        """
+        assert self._graphs_created, "CUDA Graphs have not been created."
+
+        graph_resettable = is_te_min_version("2.10.0")
+        graphs_reset, graphs_not_reset = 0, 0
+        for layers in self.callables_per_chunk:
+            for layer in layers:
+                for graph in layer.cuda_graphs:
+                    if graph_resettable:
+                        graph.reset()
+                        graphs_reset += 1
+                    else:
+                        graphs_not_reset += 1
+                layer.cuda_graphs = []
+                layer.cuda_graph_manual_hooks = []
+
+        log_on_each_pipeline_stage(
+            logger=logger,
+            tp_group=None,
+            dp_cp_group=None,
+            level=logging.INFO,
+            msg=f'Rank {torch.distributed.get_rank()}: '
+            f'{graphs_reset} graphs deleted with explicit reset, '
+            f'{graphs_not_reset} graphs deleted without explicit reset.',
+        )
+        self._graphs_created = False
diff --git a/megatron/core/transformer/enums.py b/megatron/core/transformer/enums.py
index 52b82029f90..d06d58d65f2 100644
--- a/megatron/core/transformer/enums.py
+++ b/megatron/core/transformer/enums.py
@@ -65,3 +65,15 @@ class AttnBackend(enum.Enum):
     unfused = 3
     local = 4
     auto = 5
+
+
+class CudaGraphScope(enum.Enum):
+    """Cuda Graph Scope - defines which parts of the model to capture."""
+
+    full_iteration = 1  # Captures the entire training/inference iteration
+    attn = 2  # Captures attention layers
+    mlp = 3  # Captures MLP layers (dense layers only)
+    moe = 4  # Captures MoE layers (drop-and-pad MoE layers only)
+    moe_router = 5  # Captures MoE router part
+    moe_preprocess = 6  # Captures MoE preprocessing part (requires moe_router)
+    mamba = 7  # Captures Mamba layers
diff --git a/megatron/core/transformer/moe/fused_a2a.py b/megatron/core/transformer/moe/fused_a2a.py
index 60b0b11a32c..045a93039b3 100644
--- a/megatron/core/transformer/moe/fused_a2a.py
+++ b/megatron/core/transformer/moe/fused_a2a.py
@@ -320,6 +320,14 @@ def init_hybrid_ep_buffer(
     )
 
 
+def reset_hybrid_ep_buffer():
+    '''
+    Reset the HybridEP buffer
+    '''
+    global _hybrid_ep_buffer
+    _hybrid_ep_buffer = None
+
+
 class HybridEPDispatch(torch.autograd.Function):
     '''
     Fused dispatch operation for permute + dispatch a2a + permute using the HybridEP backend
diff --git a/megatron/core/transformer/moe/moe_utils.py b/megatron/core/transformer/moe/moe_utils.py
index d28cbfea3fe..3ed31d375e2 100644
--- a/megatron/core/transformer/moe/moe_utils.py
+++ b/megatron/core/transformer/moe/moe_utils.py
@@ -11,6 +11,7 @@
 from megatron.core.fp8_utils import get_fp8_align_size
 from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.transformer.cuda_graphs import is_graph_capturing
+from megatron.core.transformer.enums import CudaGraphScope
 from megatron.core.transformer.transformer_config import TransformerConfig
 
 try:
@@ -1205,13 +1206,13 @@ def maybe_raise_signal(moe_layer, **kwargs):
         ):
             if (
                 step_condition == "route"
-                and 'moe_router' in moe_layer.config.cuda_graph_scope
-                and 'moe_preprocess' not in moe_layer.config.cuda_graph_scope
+                and CudaGraphScope.moe_router in moe_layer.config.cuda_graph_scope
+                and CudaGraphScope.moe_preprocess not in moe_layer.config.cuda_graph_scope
             ):
                 raise MoECudaGraphPartialCaptureSignal(moe_layer, "route", **kwargs)
             elif (
                 step_condition == "preprocess"
-                and 'moe_preprocess' in moe_layer.config.cuda_graph_scope
+                and CudaGraphScope.moe_preprocess in moe_layer.config.cuda_graph_scope
             ):
                 raise MoECudaGraphPartialCaptureSignal(moe_layer, "preprocess", **kwargs)
 
diff --git a/megatron/core/transformer/moe/token_dispatcher.py b/megatron/core/transformer/moe/token_dispatcher.py
index b2135fdb00d..af8ae572adb 100644
--- a/megatron/core/transformer/moe/token_dispatcher.py
+++ b/megatron/core/transformer/moe/token_dispatcher.py
@@ -16,6 +16,7 @@
     gather_from_sequence_parallel_region,
     reduce_scatter_to_sequence_parallel_region,
 )
+from megatron.core.transformer.enums import CudaGraphScope
 from megatron.core.transformer.moe.fused_a2a import (
     fused_combine,
     fused_dispatch,
@@ -436,7 +437,7 @@ def __init__(
         }
         if (
             config.cuda_graph_impl == "transformer_engine"
-            and 'moe_preprocess' in config.cuda_graph_scope
+            and CudaGraphScope.moe_preprocess in config.cuda_graph_scope
         ):
             self.cuda_dtoh_point = "before_ep_alltoall"
         else:
@@ -1075,10 +1076,13 @@ def combine(
             num_permuted_tokens=self.num_permuted_tokens,
             pad_multiple=self.pad_multiple,
         )
-        # Release the used handle/num_permuted_tokens which could change in each iteration
+        # Release the used handle/num_permuted_tokens which could change in each iteration.
+        # For drop_and_pad mode, we don't need to reset the num_permuted_tokens and
+        # num_dispatched_tokens, because their values never change.
         self.handle = None
-        self.num_permuted_tokens = None
-        self.num_dispatched_tokens = None
+        if not self.drop_and_pad:
+            self.num_permuted_tokens = None
+            self.num_dispatched_tokens = None
         return hidden_states
 
     def get_permuted_hidden_states_by_experts(self, hidden_states: torch.Tensor) -> torch.Tensor:
diff --git a/megatron/core/transformer/transformer_block.py b/megatron/core/transformer/transformer_block.py
index 6f69927e9e8..023db1fe75a 100755
--- a/megatron/core/transformer/transformer_block.py
+++ b/megatron/core/transformer/transformer_block.py
@@ -21,7 +21,7 @@
 )
 from megatron.core.pipeline_parallel.utils import is_vp_first_stage, is_vp_last_stage
 from megatron.core.process_groups_config import ProcessGroupCollection
-from megatron.core.transformer.enums import LayerType
+from megatron.core.transformer.enums import CudaGraphScope, LayerType
 from megatron.core.transformer.module import GraphableMegatronModule, MegatronModule
 from megatron.core.transformer.spec_utils import ModuleSpec, build_module
 from megatron.core.transformer.transformer_config import TransformerConfig
@@ -555,7 +555,7 @@ def _should_call_local_cudagraph(self, *args, **kwargs):
                 kwargs.get('inference_context') is not None
                 or kwargs.get('inference_params') is not None
             )
-            and 'full_iteration' in self.config.cuda_graph_scope
+            and CudaGraphScope.full_iteration in self.config.cuda_graph_scope
         ):
             if kwargs['inference_context'].is_static_batching():
                 using_cuda_graph = kwargs['inference_context'].is_decode_only()
diff --git a/megatron/core/transformer/transformer_config.py b/megatron/core/transformer/transformer_config.py
index fae2e2f5d4d..cc714e9ac15 100644
--- a/megatron/core/transformer/transformer_config.py
+++ b/megatron/core/transformer/transformer_config.py
@@ -9,7 +9,7 @@
 
 from megatron.core.enums import Fp4Recipe, Fp8Recipe
 from megatron.core.quantization.quant_config import RecipeConfig
-from megatron.core.transformer.enums import AttnBackend
+from megatron.core.transformer.enums import AttnBackend, CudaGraphScope
 from megatron.core.transformer.pipeline_parallel_layer_layout import PipelineParallelLayerLayout
 
 from ..fusions.fused_bias_geglu import quick_gelu
@@ -711,7 +711,7 @@ class TransformerConfig(ModelParallelConfig):
     excluding optimizer) is enabled.
     "transformer_engine": capture the CUDA graph using TE make_graphed_callables()."""
 
-    cuda_graph_scope: Optional[List[str]] = None
+    cuda_graph_scope: Optional[List[CudaGraphScope]] = None
     """Determines the CUDA graphs capturing scope.
     When cuda_graph_impl is set to "transformer_engine", valid values are "attn", "mlp", "moe",
     "moe_router", "moe_preprocess", "mamba". None means the full layer.
@@ -1593,65 +1593,76 @@ def __post_init__(self):
                     'use cuda_graph_impl=transformer_engine instead.'
                 )
                 self.cuda_graph_impl = "transformer_engine"
+
         if self.cuda_graph_scope is None:
             self.cuda_graph_scope = []
+        elif not isinstance(self.cuda_graph_scope, list):
+            if isinstance(self.cuda_graph_scope, CudaGraphScope):
+                self.cuda_graph_scope = [self.cuda_graph_scope]
+            else:
+                assert isinstance(self.cuda_graph_scope, str), (
+                    "cuda_graph_scope must be a string that can be converted to a list of "
+                    f"CudaGraphScope, got {self.cuda_graph_scope}."
+                )
+                self.cuda_graph_scope = self.cuda_graph_scope.split(',')
+        if all(isinstance(scope, str) for scope in self.cuda_graph_scope):
+            # Backward compatibility for "full" scope. Now we use an empty list instead.
+            if "full" in self.cuda_graph_scope:
+                assert self.cuda_graph_scope == [
+                    "full"
+                ], "full scope cannot be used with other scopes."
+                warnings.warn(
+                    "full scope is deprecated. "
+                    "Use empty cuda_graph_scope to capture the whole layer."
+                )
+                self.cuda_graph_scope = []
+            else:
+                self.cuda_graph_scope = [CudaGraphScope[scope] for scope in self.cuda_graph_scope]
+        assert all(
+            isinstance(scope, CudaGraphScope) for scope in self.cuda_graph_scope
+        ), f"cuda_graph_scope must be a list of CudaGraphScope, got {self.cuda_graph_scope}."
+
         if self.cuda_graph_impl != "none":
             assert self.cuda_graph_impl in [
                 "transformer_engine",
                 "local",
             ], f"Invalid cuda graph implementation: {self.cuda_graph_impl}"
+
             if self.cpu_offloading:
                 raise ValueError("CUDA graphs not supported with CPU offloading.")
 
-            elif not isinstance(self.cuda_graph_scope, list):
-                assert isinstance(self.cuda_graph_scope, str), (
-                    "cuda_graph_scope must be a string or a list of strings, "
-                    f"got {self.cuda_graph_scope}."
-                )
-                self.cuda_graph_scope = [self.cuda_graph_scope]
-
             if self.cuda_graph_impl == "local":
-                assert not self.cuda_graph_scope or self.cuda_graph_scope == ["full_iteration"], (
-                    "For local cuda graph implementation, the only valid value "
-                    "for cuda_graph_scope is full_iteration. "
-                    "To use other scopes, use cuda_graph_impl=transformer_engine."
+                assert not self.cuda_graph_scope or self.cuda_graph_scope == [
+                    CudaGraphScope.full_iteration
+                ], (
+                    "For local cuda graph implementation, the only valid value for "
+                    "cuda_graph_scope is full_iteration, or an empty list to denote layerwise "
+                    "graphs. To use other scopes, use cuda_graph_impl=transformer_engine."
                 )
 
             if self.cuda_graph_impl == "transformer_engine":
-                assert "full_iteration" not in self.cuda_graph_scope, (
+                assert CudaGraphScope.full_iteration not in self.cuda_graph_scope, (
                     "To use full iteration cuda graph, please use "
-                    "cuda_graph_impl=transformer_engine instead of cuda_graph_impl=local."
+                    "cuda_graph_impl=local instead of cuda_graph_impl=transformer_engine."
                 )
-                for scope in self.cuda_graph_scope:
-                    assert scope in [
-                        'attn',
-                        'mlp',
-                        'moe',
-                        'moe_router',
-                        'moe_preprocess',
-                        'mamba',
-                    ], (
-                        "--cuda-graph-scope should be attn, mlp, moe, moe_router, moe_preprocess, "
-                        f"or mamba, got {self.cuda_graph_scope}."
-                    )
-
                 assert (
-                    'moe' not in self.cuda_graph_scope or 'moe_router' not in self.cuda_graph_scope
+                    CudaGraphScope.moe not in self.cuda_graph_scope
+                    or CudaGraphScope.moe_router not in self.cuda_graph_scope
                 ), 'cuda_graph_scope must not contain both moe and moe_router.'
-                if 'moe_preprocess' in self.cuda_graph_scope:
+                if CudaGraphScope.moe_preprocess in self.cuda_graph_scope:
                     assert (
-                        'moe_router' in self.cuda_graph_scope
+                        CudaGraphScope.moe_router in self.cuda_graph_scope
                     ), 'moe_preprocess cuda graph is only supported with moe_router cuda graph.'
                 if self.num_moe_experts is None or self.num_moe_experts <= 1:
                     assert (
-                        'moe' not in self.cuda_graph_scope
-                        and 'moe_router' not in self.cuda_graph_scope
+                        CudaGraphScope.moe not in self.cuda_graph_scope
+                        and CudaGraphScope.moe_router not in self.cuda_graph_scope
                     ), 'moe cuda graph is only supported for MoE.'
                 else:
                     if self.moe_layer_freq == 1 or (
                         isinstance(self.moe_layer_freq, list) and 0 not in self.moe_layer_freq
                     ):
-                        assert 'mlp' not in self.cuda_graph_scope, (
+                        assert CudaGraphScope.mlp not in self.cuda_graph_scope, (
                             'mlp cuda graph is only supported for dense layers, '
                             'but not found in the model.'
                         )
@@ -1660,13 +1671,13 @@ def __post_init__(self):
                         or not self.moe_pad_expert_input_to_capacity
                     ):
                         assert (
-                            'moe' not in self.cuda_graph_scope
+                            CudaGraphScope.moe not in self.cuda_graph_scope
                         ), 'moe cuda graph is only supported with drop-padding MoE.'
                         if self.moe_token_dispatcher_type == 'alltoall' and (
                             self.moe_expert_capacity_factor is not None
                             or self.moe_router_padding_for_quantization
                         ):
-                            assert 'moe_preprocess' not in self.cuda_graph_scope, (
+                            assert CudaGraphScope.moe_preprocess not in self.cuda_graph_scope, (
                                 'moe_preprocess cuda graph is not supported when there are '
                                 'DtoH copies and synchronizations in the preprocess step.'
                             )
@@ -1676,25 +1687,28 @@ def __post_init__(self):
                     raise ValueError(
                         "Full-layer CUDA graphs not supported with activation recomputation."
                     )
-                elif self.cuda_graph_scope != ['full_iteration']:
+                elif self.cuda_graph_scope != [CudaGraphScope.full_iteration]:
                     # For scoped CUDA graphs, only the non-graphed parts of the layer can be
                     # recomputed. So check if there are overlaps between the recomputed parts
                     # and the graphed parts.
-                    if "attn" in self.cuda_graph_scope:
+                    if CudaGraphScope.attn in self.cuda_graph_scope:
                         for module in self.recompute_modules:
                             if module in ['core_attn', 'mla_up_proj']:
                                 raise ValueError(
                                     f'attn cuda graph is not supported with {module} recompute.'
                                 )
-                    if "mlp" in self.cuda_graph_scope and "mlp" in self.recompute_modules:
+                    if (
+                        CudaGraphScope.mlp in self.cuda_graph_scope
+                        and "mlp" in self.recompute_modules
+                    ):
                         raise ValueError(f'mlp cuda graph is not supported with mlp recompute.')
-                    if "moe" in self.cuda_graph_scope:
+                    if CudaGraphScope.moe in self.cuda_graph_scope:
                         for module in self.recompute_modules:
                             if module in ['moe_act', 'moe', 'shared_experts']:
                                 raise ValueError(
                                     f'moe cuda graph is not supported with {module} recompute.'
                                 )
-                    if "moe_router" in self.cuda_graph_scope:
+                    if CudaGraphScope.moe_router in self.cuda_graph_scope:
                         for module in self.recompute_modules:
                             if module in ['moe', 'shared_experts']:
                                 raise ValueError(
@@ -1703,25 +1717,25 @@ def __post_init__(self):
                                 )
                     if "layernorm" in self.recompute_modules:
                         if (
-                            "attn" in self.cuda_graph_scope
-                            and "mlp" in self.cuda_graph_scope
+                            CudaGraphScope.attn in self.cuda_graph_scope
+                            and CudaGraphScope.mlp in self.cuda_graph_scope
                             and (
-                                "moe" in self.cuda_graph_scope
-                                or "moe_router" in self.cuda_graph_scope
+                                CudaGraphScope.moe in self.cuda_graph_scope
+                                or CudaGraphScope.moe_router in self.cuda_graph_scope
                             )
                         ):
                             raise ValueError(
                                 'cuda graph is not supported with layernorm recompute.'
                             )
-                        if "attn" in self.cuda_graph_scope:
+                        if CudaGraphScope.attn in self.cuda_graph_scope:
                             warnings.warn(
                                 "input_layernorm recompute is not supported with attention "
                                 "cudagraph. Will only recompute the pre_mlp_layernorm."
                             )
                         if (
-                            "mlp" in self.cuda_graph_scope
-                            or "moe" in self.cuda_graph_scope
-                            or "moe_router" in self.cuda_graph_scope
+                            CudaGraphScope.mlp in self.cuda_graph_scope
+                            or CudaGraphScope.moe in self.cuda_graph_scope
+                            or CudaGraphScope.moe_router in self.cuda_graph_scope
                         ):
                             warnings.warn(
                                 "pre_mlp_layernorm recompute is not supported with mlp/moe "
diff --git a/megatron/core/transformer/transformer_layer.py b/megatron/core/transformer/transformer_layer.py
index f89678e6216..3ea40577009 100644
--- a/megatron/core/transformer/transformer_layer.py
+++ b/megatron/core/transformer/transformer_layer.py
@@ -16,7 +16,7 @@
 from megatron.core.packed_seq_params import PackedSeqParams
 from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.transformer.cuda_graphs import is_graph_capturing
-from megatron.core.transformer.enums import LayerType
+from megatron.core.transformer.enums import CudaGraphScope, LayerType
 from megatron.core.transformer.identity_op import IdentityFuncOp, IdentityOp
 from megatron.core.transformer.mlp import MLP
 from megatron.core.transformer.module import GraphableMegatronModule
@@ -382,18 +382,21 @@ def __init__(
             if "layernorm" in self.config.recompute_modules:
                 if not isinstance(self.input_layernorm, IdentityOp) and (
                     self.config.cuda_graph_impl == "none"
-                    or 'attn' not in self.config.cuda_graph_scope
+                    or CudaGraphScope.attn not in self.config.cuda_graph_scope
                 ):
                     self.recompute_input_layernorm = True
                     if self.config.fp8 or self.config.fp4:
                         self.self_attention.set_for_recompute_input_layernorm()
                 if not isinstance(self.pre_mlp_layernorm, IdentityOp) and (
                     self.config.cuda_graph_impl == "none"
-                    or (not self.is_moe_layer and 'mlp' not in self.config.cuda_graph_scope)
+                    or (
+                        not self.is_moe_layer
+                        and CudaGraphScope.mlp not in self.config.cuda_graph_scope
+                    )
                     or (
                         self.is_moe_layer
-                        and 'moe' not in self.config.cuda_graph_scope
-                        and 'moe_router' not in self.config.cuda_graph_scope
+                        and CudaGraphScope.moe not in self.config.cuda_graph_scope
+                        and CudaGraphScope.moe_router not in self.config.cuda_graph_scope
                     )
                 ):
                     self.recompute_pre_mlp_layernorm = True
@@ -634,12 +637,13 @@ def _forward_mlp(self, hidden_states, inference_context=None):
             and self.config.cuda_graph_impl == "transformer_engine"
             and self.training
             and is_graph_capturing()
-            and 'moe_router' in self.config.cuda_graph_scope
+            and CudaGraphScope.moe_router in self.config.cuda_graph_scope
         ):
             assert (
                 not self.recompute_pre_mlp_layernorm
             ), "Recomputation is not supported for CUDA graph."
             cudagraph_outputs = self.mlp(pre_mlp_layernorm_output)
+            nvtx_range_pop(suffix="mlp")
             return cudagraph_outputs + [residual]
         elif self.recompute_mlp:
             if self.config.fp8 or self.config.fp4:
@@ -694,6 +698,7 @@ def _forward_post_mlp(self, mlp_output_with_bias, residual):
         Returns:
             output (Tensor): Transformed hidden states of shape [s, b, h].
         """
+
         from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
             fine_grained_offloading_group_commit,
         )
@@ -757,7 +762,7 @@ def get_layer_static_inputs(self, seq_length, micro_batch_size):
         static_inputs = super().get_layer_static_inputs(seq_length, micro_batch_size)
 
         if not isinstance(self.self_attention, IdentityOp) and (
-            not self.config.cuda_graph_scope or 'attn' in self.config.cuda_graph_scope
+            not self.config.cuda_graph_scope or CudaGraphScope.attn in self.config.cuda_graph_scope
         ):
             slen_per_cp = seq_length // self.config.context_parallel_size
             static_inputs["attention_mask"] = (
@@ -776,18 +781,18 @@ def _get_submodules_under_cudagraphs(self):
             return super()._get_submodules_under_cudagraphs()
 
         submodules = []
-        if 'attn' in self.config.cuda_graph_scope:
+        if CudaGraphScope.attn in self.config.cuda_graph_scope:
             submodules += [
                 self.input_layernorm,
                 self.self_attention,
                 self.pre_cross_attn_layernorm,
                 self.cross_attention,
             ]
-        if (not self.is_moe_layer and 'mlp' in self.config.cuda_graph_scope) or (
-            self.is_moe_layer and 'moe' in self.config.cuda_graph_scope
+        if (not self.is_moe_layer and CudaGraphScope.mlp in self.config.cuda_graph_scope) or (
+            self.is_moe_layer and CudaGraphScope.moe in self.config.cuda_graph_scope
         ):
             submodules += [self.pre_mlp_layernorm, self.mlp]
-        elif self.is_moe_layer and 'moe_router' in self.config.cuda_graph_scope:
+        elif self.is_moe_layer and CudaGraphScope.moe_router in self.config.cuda_graph_scope:
             submodules += [self.pre_mlp_layernorm, self.mlp.router]
             if (
                 self.config.moe_shared_expert_intermediate_size is not None
@@ -805,7 +810,7 @@ def _te_cuda_graph_capture(self, *args, **kwargs):
         2. If context is None, it cannot be returned as output.
         """
         context = None
-        if not self.config.cuda_graph_scope or 'attn' in self.config.cuda_graph_scope:
+        if not self.config.cuda_graph_scope or CudaGraphScope.attn in self.config.cuda_graph_scope:
             hidden_states, context = self._forward_attention(*args, **kwargs)
         else:
             if len(args) > 0:
@@ -815,12 +820,12 @@ def _te_cuda_graph_capture(self, *args, **kwargs):
 
         if (
             not self.config.cuda_graph_scope
-            or (not self.is_moe_layer and 'mlp' in self.config.cuda_graph_scope)
+            or (not self.is_moe_layer and CudaGraphScope.mlp in self.config.cuda_graph_scope)
             or (
                 self.is_moe_layer
                 and (
-                    'moe' in self.config.cuda_graph_scope
-                    or 'moe_router' in self.config.cuda_graph_scope
+                    CudaGraphScope.moe in self.config.cuda_graph_scope
+                    or CudaGraphScope.moe_router in self.config.cuda_graph_scope
                 )
             )
         ):
@@ -841,7 +846,7 @@ def _te_cuda_graph_replay(self, *args, **kwargs):
         Hence, `inference_context` and `packed_seq_params` are excluded from input list.
         """
         context = None
-        if self.config.cuda_graph_scope and 'attn' not in self.config.cuda_graph_scope:
+        if self.config.cuda_graph_scope and CudaGraphScope.attn not in self.config.cuda_graph_scope:
             hidden_states, context = self._forward_attention(*args, **kwargs)
             args = (hidden_states,)
             kwargs = {}
@@ -861,13 +866,13 @@ def _te_cuda_graph_replay(self, *args, **kwargs):
 
         if (
             not self.config.cuda_graph_scope
-            or (not self.is_moe_layer and 'mlp' in self.config.cuda_graph_scope)
-            or (self.is_moe_layer and 'moe' in self.config.cuda_graph_scope)
+            or (not self.is_moe_layer and CudaGraphScope.mlp in self.config.cuda_graph_scope)
+            or (self.is_moe_layer and CudaGraphScope.moe in self.config.cuda_graph_scope)
         ):
             # CUDA Graph captures the whole MLP/MoE part. CUDA Graph output is the layer output.
             assert len(cuda_graph_output) == 1, "CUDA Graph output should be the layer output."
             output = cuda_graph_output.pop()
-        elif self.is_moe_layer and 'moe_router' in self.config.cuda_graph_scope:
+        elif self.is_moe_layer and CudaGraphScope.moe_router in self.config.cuda_graph_scope:
             # CUDA Graph partially captures the MoE.
             # The rest of the layer should go to the normal pass.
             shared_expert_output, routing_map, residual = None, None, None
@@ -882,7 +887,7 @@ def _te_cuda_graph_replay(self, *args, **kwargs):
             # Split cudagraph outputs into function outputs and attribute outputs, and
             # process them separately. Function outputs should have three tensors.
             func_output, attr_outputs = cuda_graph_output[:3], cuda_graph_output[3:]
-            if 'moe_preprocess' in self.config.cuda_graph_scope:
+            if CudaGraphScope.moe_preprocess in self.config.cuda_graph_scope:
                 hidden_states, probs, residual = func_output
                 valid_cudagraph_attrs = self.mlp.token_dispatcher.valid_cudagraph_attrs
                 assert len(attr_outputs) == len(
@@ -989,7 +994,7 @@ def _should_call_local_cudagraph(self, *args, **kwargs):
                 (kwargs.get('inference_context') is not None)
                 or (kwargs.get('inference_params') is not None)
             )
-            and 'full_iteration' not in self.config.cuda_graph_scope
+            and CudaGraphScope.full_iteration not in self.config.cuda_graph_scope
         ):
             if kwargs['inference_context'].is_static_batching():
                 using_cuda_graph = kwargs['inference_context'].is_decode_only()
diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py
index bb1b17e9ba2..15576e2ceac 100644
--- a/megatron/training/arguments.py
+++ b/megatron/training/arguments.py
@@ -23,7 +23,7 @@
 from megatron.core.rerun_state_machine import RerunStateMachine
 from megatron.core.transformer import MLATransformerConfig, TransformerConfig
 from megatron.core.transformer.pipeline_parallel_layer_layout import PipelineParallelLayerLayout
-from megatron.core.transformer.enums import AttnBackend
+from megatron.core.transformer.enums import AttnBackend, CudaGraphScope
 from megatron.core.transformer.heterogeneous.heterogeneous_config import (
     HeterogeneousTransformerConfig,
     MLPConfig,
@@ -772,7 +772,7 @@ def validate_args(args, defaults={}):
             if args.rank == 0:
                 print('accumulate and all-reduce gradients in fp32 for '
                       'bfloat16 data type.', flush=True)
-    if args.cuda_graph_impl == "local" and "full_iteration" in args.cuda_graph_scope:
+    if args.cuda_graph_impl == "local" and CudaGraphScope.full_iteration in args.cuda_graph_scope:
         if not args.inference_dynamic_batching:
             assert not args.check_for_nan_in_loss_and_grad, \
             "--no-check-for-nan-in-loss-and-grad should be set with full_iteration CUDA graph"
@@ -1265,6 +1265,15 @@ def validate_args(args, defaults={}):
         assert (
             args.recompute_granularity != 'full'
         ), 'recompute_granularity must not be full when CUDA Graphs are enabled.'
+    if args.cuda_graph_scope == "full" or (
+        isinstance(args.cuda_graph_scope, list) and "full" in args.cuda_graph_scope
+    ):
+        if isinstance(args.cuda_graph_scope, list):
+            assert args.cuda_graph_scope == ["full"], "full scope cannot be used with other scopes."
+        args.cuda_graph_scope = []
+        warn_rank_0(
+            'full scope is deprecated. Use empty cuda_graph_scope to capture the whole layer.'
+        )
     
     if args.multi_latent_attention:
         assert not args.group_query_attention, "Group query attention is mutually exclusive with multi latent attention."
@@ -1486,7 +1495,7 @@ def _add_inference_args(parser):
                        '"none": no CUDA graph. '
                        '"local": capture the CUDA graph using MCore local implementation. --cuda-graph-scope=\"full_iteration\" enables whole iteration CUDA graph. '
                        '"transformer_engine": capture the CUDA graph using TE make_graphed_callables().')
-    group.add_argument('--cuda-graph-scope', nargs='+', type=str, default=[],
+    group.add_argument('--cuda-graph-scope', nargs='+', type=lambda scope: CudaGraphScope[scope] if scope != "full" else scope, default=[],
                        help='Determines the CUDA graphs capturing scope. '
                        'choices: "attn", "mlp", "moe", "moe_router", "moe_preprocess", "mamba", "full_iteration". '
                        '"attn": captures operations in TransformerLayer._forward_attention(). '
@@ -1498,7 +1507,8 @@ def _add_inference_args(parser):
                        '"mamba": captures the mamba layer. '
                        '"full_iteration": captures a whole iteration. '
                        'full_iteration scope is only supported with --cuda-graph-impl=local, other scopes are only supported with --cuda-graph-impl=transformer_engine. '
-                       'If not specified, the default scope is to capture the whole Transformer layer.')
+                       'If not specified, the default scope is to capture the whole Transformer layer. '
+                       'For backward compatibility, we still allow passing "full" to specify capturing the whole layer, and convert it to an empty list.')
     group.add_argument('--use-legacy-static-engine', action='store_true', default=False,
                        help='Use legacy static engine. (Current static engine uses dynamic engine under the hood)',
                        dest='use_legacy_static_engine')
diff --git a/megatron/training/training.py b/megatron/training/training.py
index 9fe372a3780..555cc0ecfee 100644
--- a/megatron/training/training.py
+++ b/megatron/training/training.py
@@ -59,6 +59,7 @@
 from megatron.training.checkpointing import checkpoint_exists
 from megatron.core.full_cuda_graph import FullCudaGraphWrapper
 from megatron.core.transformer.cuda_graphs import TECudaGraphHelper
+from megatron.core.transformer.enums import CudaGraphScope
 from megatron.core.transformer.module import Float16Module
 from megatron.core.distributed import DistributedDataParallelConfig, TorchFullyShardedDataParallelConfig
 from megatron.core.distributed import DistributedDataParallel as DDP
@@ -2265,7 +2266,7 @@ def train(
     eval_iterations = 0
     # Wrap forward_backward_func for Full iteration CUDA graph
     forward_backward_func = get_forward_backward_func()
-    if args.cuda_graph_impl == "local" and "full_iteration" in args.cuda_graph_scope:
+    if args.cuda_graph_impl == "local" and CudaGraphScope.full_iteration in args.cuda_graph_scope:
         forward_backward_func = FullCudaGraphWrapper(forward_backward_func, cuda_graph_warmup_steps=args.cuda_graph_warmup_steps)
 
     def get_e2e_base_metrics():
@@ -2614,6 +2615,10 @@ def get_e2e_base_metrics():
         if should_exit:
             break
 
+    # Destroy CUDA Graphs.
+    if args.cuda_graph_impl == "transformer_engine" and cuda_graph_helper.graphs_created():
+        cuda_graph_helper.delete_cuda_graphs()
+
     one_logger_utils.track_e2e_metrics()
 
     # Flush TensorBoard, WandB writers and one-logger.
@@ -2687,7 +2692,7 @@ def evaluate(
     eval_batch_size = args.global_batch_size
     eval_num_microbatches = eval_batch_size // (args.micro_batch_size * args.data_parallel_size)
     forward_backward_func = get_forward_backward_func()
-    if args.cuda_graph_impl == "local" and "full_iteration" in args.cuda_graph_scope:
+    if args.cuda_graph_impl == "local" and CudaGraphScope.full_iteration in args.cuda_graph_scope:
         forward_backward_func = FullCudaGraphWrapper(forward_backward_func, cuda_graph_warmup_steps=args.cuda_graph_warmup_steps)
 
     if eval_iters is None:
diff --git a/tests/unit_tests/inference/engines/test_dynamic_engine.py b/tests/unit_tests/inference/engines/test_dynamic_engine.py
index 0ac4b296746..26d3dcfbd6d 100644
--- a/tests/unit_tests/inference/engines/test_dynamic_engine.py
+++ b/tests/unit_tests/inference/engines/test_dynamic_engine.py
@@ -3,7 +3,7 @@
 import asyncio
 import random
 import types
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from typing import Dict, List, Optional, Tuple
 
 import pytest
@@ -41,6 +41,7 @@
 from megatron.core.models.mamba.mamba_model import MambaModel
 from megatron.core.tensor_parallel.random import model_parallel_cuda_manual_seed
 from megatron.core.transformer.cuda_graphs import CudaGraphManager, _CudagraphGlobalRecord
+from megatron.core.transformer.enums import CudaGraphScope
 from megatron.core.transformer.transformer_config import TransformerConfig
 from megatron.core.utils import (
     check_mamba_sequence_packing_support,
@@ -103,7 +104,9 @@ class DynamicEngineTestConfig:
     return_log_probs: bool = False
     materialize_only_last_token_logits: bool = True
     skip_prompt_log_probs: bool = False
-    cuda_graph_scope: List[str] = None
+    cuda_graph_scope: List[CudaGraphScope] = field(
+        default_factory=lambda: [CudaGraphScope.full_iteration]
+    )
     force_build_cuda_graphs: bool = False
     # If False, do not build cuda graphs in the tests, even if
     # num_cuda_graphs is set.
@@ -136,9 +139,6 @@ def __post_init__(self):
             if self.context_max_tokens_override is None:
                 self.context_max_tokens_override = self.num_requests * self.max_sequence_length
 
-        if self.cuda_graph_scope is None:
-            self.cuda_graph_scope = ["full_iteration"]
-
 
 @dataclass
 class DynamicEngineTestEnv:
@@ -514,7 +514,7 @@ def teardown_method(self, method):
     )
     @pytest.mark.parametrize("model_provider", ["gpt", "mamba"])
     @pytest.mark.parametrize("num_cuda_graphs", [None, 1, 4])
-    @pytest.mark.parametrize("cuda_graph_scope", [[], ["full_iteration"]])
+    @pytest.mark.parametrize("cuda_graph_scope", [[], [CudaGraphScope.full_iteration]])
     def test_simple(self, model_provider, num_cuda_graphs, cuda_graph_scope) -> None:
         """Simple test that runs without errors, and validates output."""
         skip_if_mamba_sequence_packing_not_available(model_provider)
diff --git a/tests/unit_tests/test_fp8_param.py b/tests/unit_tests/test_fp8_param.py
index 0b8d41769ec..361698f7127 100644
--- a/tests/unit_tests/test_fp8_param.py
+++ b/tests/unit_tests/test_fp8_param.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 import contextlib
 import gc
@@ -36,7 +36,10 @@
 try:
     from transformer_engine.pytorch.tensor.utils import post_all_gather_processing
 
-    cuda_graph_supported = True
+    if is_te_min_version("2.10.0"):
+        cuda_graph_supported = True
+    else:
+        reason_for_no_cuda_graph = "Need newer TransformerEngine"
 except ImportError:
     reason_for_no_cuda_graph = "Need newer TransformerEngine"
 
@@ -65,12 +68,16 @@ class TestFP8Param:
     def setup_method(self, method):
         self.seq_length = 512
         self.micro_batch_size = 2
+        self.cuda_graph_helper = None
         os.environ['CUDA_DEVICE_MAX_CONNECTIONS'] = '1'
 
     def teardown_method(self, method):
         Utils.destroy_model_parallel()
         destroy_global_vars()
         destroy_num_microbatches_calculator()
+        if self.cuda_graph_helper is not None and self.cuda_graph_helper.graphs_created():
+            self.cuda_graph_helper.delete_cuda_graphs()
+            self.cuda_graph_helper = None
         gc.collect()
 
     def model_provider(
@@ -209,13 +216,12 @@ def _run_test_helper(
             )
         assert len(gpt_model) == 1  # Assume only one model in the model provider.
 
-        cuda_graph_helper = None
         # Hard coded to use cuda_graph_impl="transformer_engine"
         cuda_graph_impl = "transformer_engine"
         if use_cuda_graph and cuda_graph_impl == "transformer_engine":
             from megatron.core.transformer.cuda_graphs import TECudaGraphHelper
 
-            cuda_graph_helper = TECudaGraphHelper(
+            self.cuda_graph_helper = TECudaGraphHelper(
                 model=gpt_model,
                 config=gpt_model[0].config,
                 seq_length=self.seq_length,
@@ -250,13 +256,13 @@ def _run_test_helper(
             # Capture CUDA graphs after warmup if helper is provided.
             # Hard coded cuda_graph_warmup_steps = 0.
             cuda_graph_warmup_steps = 0
-            if cuda_graph_helper is not None and i == cuda_graph_warmup_steps:
+            if self.cuda_graph_helper is not None and i == cuda_graph_warmup_steps:
                 if should_disable_forward_pre_hook(args):
                     disable_forward_pre_hook(gpt_model, param_sync=False)
-                cuda_graph_helper.create_cudagraphs()
+                self.cuda_graph_helper.create_cudagraphs()
                 if should_disable_forward_pre_hook(args):
                     enable_forward_pre_hook(gpt_model)
-                    cuda_graph_helper.cuda_graph_set_manual_hooks()
+                    self.cuda_graph_helper.cuda_graph_set_manual_hooks()
 
             # For the mxfp8_param with reuse_grad_buf_for_mxfp8_param_ag and dp_ag_overlap,
             # we need to call the _copy_main_params_to_param_buffer() after the grad buffer
@@ -297,6 +303,10 @@ def _run_test_helper(
 
             loss_list.append(loss.item())
 
+        if self.cuda_graph_helper is not None and self.cuda_graph_helper.graphs_created():
+            self.cuda_graph_helper.delete_cuda_graphs()
+            self.cuda_graph_helper = None
+
         return torch.tensor(loss_list)
 
     def run_test(self, tp_size, recipe, inference: bool = False, **kwargs):
diff --git a/tests/unit_tests/transformer/test_cuda_graphs.py b/tests/unit_tests/transformer/test_cuda_graphs.py
index 3ad0262a1cf..cee75171560 100644
--- a/tests/unit_tests/transformer/test_cuda_graphs.py
+++ b/tests/unit_tests/transformer/test_cuda_graphs.py
@@ -9,6 +9,7 @@
 
 import pytest
 import torch
+from transformer_engine.pytorch.fp8 import check_fp8_support
 
 from megatron.core import parallel_state
 from megatron.core.enums import ModelType
@@ -25,6 +26,7 @@
     TextGenerationController,
 )
 from megatron.core.models.gpt.gpt_layer_specs import (
+    get_gpt_decoder_block_spec,
     get_gpt_layer_local_spec,
     get_gpt_layer_with_transformer_engine_spec,
     get_gpt_mtp_block_spec,
@@ -41,6 +43,8 @@
     model_parallel_cuda_manual_seed,
 )
 from megatron.core.transformer.cuda_graphs import CudaGraphManager, _CudagraphGlobalRecord
+from megatron.core.transformer.enums import CudaGraphScope
+from megatron.core.transformer.moe.fused_a2a import reset_hybrid_ep_buffer
 from megatron.core.transformer.transformer_block import TransformerBlock
 from megatron.core.transformer.transformer_config import TransformerConfig
 from megatron.core.utils import is_fa_min_version, is_te_min_version
@@ -54,6 +58,8 @@
 from megatron.training.training import setup_model_and_optimizer
 from tests.unit_tests.test_utilities import Utils
 
+fp8_available, _ = check_fp8_support()
+
 
 class TestParallelTransformerBlockCudagraphs:
     def setup_method(self, method):
@@ -747,6 +753,9 @@ class TestPartialCudaGraph:
     def setup_method(self, method):
         self.seq_length = 512
         self.micro_batch_size = 2
+        self.tp_size = 2
+        self.cp_size = 2
+        self.cuda_graph_helper = None
         # Store original environment variable values
         self.original_env = {
             'CUDA_DEVICE_MAX_CONNECTIONS': os.environ.get('CUDA_DEVICE_MAX_CONNECTIONS'),
@@ -762,22 +771,28 @@ def teardown_method(self, method):
                 os.environ.pop(key, None)
             else:
                 os.environ[key] = value
-        Utils.destroy_model_parallel()
         destroy_global_vars()
         destroy_num_microbatches_calculator()
+        if self.cuda_graph_helper is not None and self.cuda_graph_helper.graphs_created():
+            self.cuda_graph_helper.delete_cuda_graphs()
+            self.cuda_graph_helper = None
         gc.collect()
 
     def model_provider(
         self,
         pre_process=True,
         post_process=True,
-        layer_spec_fn=get_gpt_layer_with_transformer_engine_spec,
+        layer_spec_fn=get_gpt_decoder_block_spec,
         **config_kwargs,
     ):
-        model_parallel_cuda_manual_seed(123)
         args = get_args()
         config = core_transformer_config_from_args(args)
-        transformer_layer_spec = layer_spec_fn()
+        transformer_layer_spec = layer_spec_fn(
+            config,
+            use_transformer_engine=True,
+            normalization=args.normalization,
+            qk_l2_norm=args.qk_l2_norm,
+        )
         if args.mtp_num_layers:
             mtp_block_spec = get_gpt_mtp_block_spec(
                 config, transformer_layer_spec, use_transformer_engine=True
@@ -810,18 +825,17 @@ def create_test_args(
         args.num_layers = 4
         args.mtp_num_layers = 1
         args.vocab_size = 1024
-        args.hidden_size = 128
+        args.hidden_size = 512
         args.num_attention_heads = 8
         args.max_position_embeddings = 512
-        args.global_batch_size = self.micro_batch_size * 8
+        args.global_batch_size = self.micro_batch_size * 8 // self.tp_size // self.cp_size
         args.micro_batch_size = self.micro_batch_size
         args.create_attention_mask_in_dataloader = True
         args.seq_length = self.seq_length
-        args.tensor_model_parallel_size = 2
-        args.sequence_parallel = True
+        args.tensor_model_parallel_size = self.tp_size
+        args.sequence_parallel = True if self.tp_size > 1 else False
         args.pipeline_model_parallel_size = 1
-        args.context_parallel_size = 1
-        args.expert_model_parallel_size = ep_size
+        args.context_parallel_size = self.cp_size
         args.train_iters = 10
         args.lr = 3e-5
         args.bf16 = True
@@ -836,17 +850,26 @@ def create_test_args(
         # MoE settings
         args.num_experts = 4
         args.expert_model_parallel_size = ep_size
+        args.expert_tensor_parallel_size = 1 if ep_size > 1 else self.tp_size
         args.moe_shared_expert_intermediate_size = 1024
-        args.moe_layer_freq = "[0,0,1,1]"
+        args.moe_layer_freq = [0, 0, 1, 1]
         args.moe_permute_fusion = True
         args.moe_router_fusion = True
         args.moe_router_topk = 2
+        args.moe_router_dtype = "fp32"
 
         # CUDA graph settings
         args.cuda_graph_impl = cuda_graph_impl
         args.cuda_graph_scope = cuda_graph_scope
         args.cuda_graph_warmup_steps = cuda_graph_warmup_steps
-        args.use_te_rng_tracker = cuda_graph_impl != "none"
+
+        # fp8 settings
+        if fp8_available:
+            args.fp8 = "e4m3"
+            args.fp8_recipe = "tensorwise"
+            args.first_last_layers_bf16 = True
+            args.num_layers_at_start_in_bf16 = 1
+            args.num_layers_at_end_in_bf16 = 1
 
         for key, value in kwargs.items():
             assert hasattr(args, key)
@@ -856,15 +879,15 @@ def create_test_args(
         set_global_variables(args, False)
         return args
 
-    def get_batch(self, seq_length, micro_batch_size):
-        data = list(range(seq_length))
+    def get_batch(self, seq_length, micro_batch_size, cp_size):
+        data = list(range(seq_length // cp_size))
         input_ids = torch.tensor(data, dtype=torch.int64).repeat((micro_batch_size, 1)).cuda()
         labels = 1 + torch.tensor(data, dtype=torch.int64).repeat((micro_batch_size, 1)).cuda()
         position_ids = torch.tensor(data, dtype=torch.int64).repeat((micro_batch_size, 1)).cuda()
         attention_mask = torch.ones(
-            (micro_batch_size, 1, seq_length, seq_length), dtype=bool
+            (micro_batch_size, 1, seq_length // cp_size, seq_length), dtype=bool
         ).cuda()
-        loss_mask = torch.ones(seq_length).repeat((micro_batch_size, 1)).cuda()
+        loss_mask = torch.ones(seq_length // cp_size).repeat((micro_batch_size, 1)).cuda()
         return input_ids, labels, position_ids, attention_mask, loss_mask
 
     def _run_test_helper(
@@ -877,12 +900,10 @@ def _run_test_helper(
 
         set_args(args)
         torch.manual_seed(123)
-        Utils.initialize_model_parallel(
-            tensor_model_parallel_size=2, expert_model_parallel_size=ep_size
-        )
+        model_parallel_cuda_manual_seed(123)
 
         input_ids, labels, position_ids, attention_mask, loss_mask = self.get_batch(
-            self.seq_length, self.micro_batch_size
+            self.seq_length, self.micro_batch_size, self.cp_size
         )
 
         gpt_model, optimizer, _ = setup_model_and_optimizer(
@@ -890,13 +911,10 @@ def _run_test_helper(
         )
         assert len(gpt_model) == 1  # Assume only one model in the model provider.
 
-        loss_list = []
-
-        cuda_graph_helper = None
         if cuda_graph_impl == "transformer_engine":
             from megatron.core.transformer.cuda_graphs import TECudaGraphHelper
 
-            cuda_graph_helper = TECudaGraphHelper(
+            self.cuda_graph_helper = TECudaGraphHelper(
                 model=gpt_model,
                 config=gpt_model[0].config,
                 seq_length=self.seq_length,
@@ -904,14 +922,17 @@ def _run_test_helper(
                 optimizers=[optimizer],
             )
 
+        loss_list = []
+
         for i in range(100):
             gpt_model[0].zero_grad_buffer()
             optimizer.zero_grad()
 
             # Capture CUDA graphs after warmup if helper is provided
-            if cuda_graph_helper is not None and i == cuda_graph_warmup_steps:
-                cuda_graph_helper.create_cudagraphs()
+            if self.cuda_graph_helper is not None and i == cuda_graph_warmup_steps:
+                self.cuda_graph_helper.create_cudagraphs()
 
+            gpt_model[0].set_is_first_microbatch()
             output = gpt_model[0].forward(
                 input_ids=input_ids,
                 position_ids=position_ids,
@@ -922,7 +943,7 @@ def _run_test_helper(
 
             # Check output shapes
             assert output.shape[0] == self.micro_batch_size
-            assert output.shape[1] == self.seq_length
+            assert output.shape[1] == self.seq_length // self.cp_size
 
             # Verify gradients
             loss = output.mean()
@@ -936,16 +957,29 @@ def _run_test_helper(
 
             loss_list.append(loss.item())
 
+        if self.cuda_graph_helper is not None and self.cuda_graph_helper.graphs_created():
+            self.cuda_graph_helper.delete_cuda_graphs()
+            self.cuda_graph_helper = None
+
         return torch.tensor(loss_list)
 
     @pytest.mark.skipif(
-        not (HAVE_TE and is_te_min_version("1.14.0")),
-        reason="Partial CUDA graph support requires TransformerEngine version >= 1.14.0",
+        not (HAVE_TE and is_te_min_version("2.10.0")),
+        reason="Partial CUDA graph UT support requires TransformerEngine version >= 2.10.0",
     )
     @pytest.mark.parametrize("ep_size", [1, 4])
     @pytest.mark.parametrize("moe_dropless_dispatcher", [False, True])
     @pytest.mark.parametrize("moe_dispatcher_type", ["alltoall", "deepep", "hybridep"])
     def test_moe_partial_cudagraph(self, ep_size, moe_dropless_dispatcher, moe_dispatcher_type):
+        initialize_rng_tracker(use_te_rng_tracker=True, force_reset=True)
+        Utils.initialize_model_parallel(
+            tensor_model_parallel_size=self.tp_size,
+            context_parallel_size=self.cp_size,
+            pipeline_model_parallel_size=1,
+            expert_tensor_parallel_size=1 if ep_size > 1 else self.tp_size,
+            expert_model_parallel_size=ep_size,
+        )
+
         extra_kwargs = {}
         if moe_dispatcher_type == "deepep":
             if not is_deep_ep_available():
@@ -962,19 +996,28 @@ def test_moe_partial_cudagraph(self, ep_size, moe_dropless_dispatcher, moe_dispa
         if not moe_dropless_dispatcher:
             if moe_dispatcher_type == "deepep":
                 pytest.skip("Deep EP doesn't support drop&pad MoE")
+            if moe_dispatcher_type == "hybridep" and ep_size == 1:
+                pytest.skip("Hybrid EP doesn't support drop&pad MoE with ep_size == 1")
             extra_kwargs["moe_expert_capacity_factor"] = 1.0
             extra_kwargs["moe_pad_expert_input_to_capacity"] = True
 
         loss_list_ref = self._run_test_helper(ep_size, "none", None, 0, **extra_kwargs)
         for cuda_graph_scope in [
             None,
-            ["attn"],
-            ["moe"],
-            ["mlp", "moe_router"],
-            ["attn", "mlp", "moe_router", "moe_preprocess"],
+            [CudaGraphScope.attn],
+            [CudaGraphScope.moe],
+            [CudaGraphScope.mlp, CudaGraphScope.moe_router],
+            [
+                CudaGraphScope.attn,
+                CudaGraphScope.mlp,
+                CudaGraphScope.moe_router,
+                CudaGraphScope.moe_preprocess,
+            ],
         ]:
-            if moe_dropless_dispatcher and (cuda_graph_scope is None or "moe" in cuda_graph_scope):
-                # Dropless MoE doesn't work with "moe" scope cudagraph. Skip.
+            if (moe_dropless_dispatcher or moe_dispatcher_type == "hybridep") and (
+                cuda_graph_scope is None or CudaGraphScope.moe in cuda_graph_scope
+            ):
+                # Dropless MoE or Hybrid EP doesn't work with "moe" scope cudagraph. Skip.
                 continue
             cuda_graph_warmup_steps = 3
             loss_list = self._run_test_helper(
@@ -986,6 +1029,10 @@ def test_moe_partial_cudagraph(self, ep_size, moe_dropless_dispatcher, moe_dispa
             )
             assert torch.equal(loss_list, loss_list_ref)
 
+        if moe_dispatcher_type == "hybridep":
+            reset_hybrid_ep_buffer()
+        Utils.destroy_model_parallel()
+
 
 if __name__ == "__main__":
 

From b0c96b3c99dcb4037a638f0f2a35128786a11939 Mon Sep 17 00:00:00 2001
From: Kunlun Li <94586211+kunlunl@users.noreply.github.com>
Date: Mon, 1 Dec 2025 17:30:28 +0800
Subject: [PATCH 165/334] [dev] DeepSeek V3.2 support (#2154)

Signed-off-by: kunlunl <kunlunl@nvidia.com>
---
 gpt_builders.py                               |    7 +-
 ...rimental_attention_variant_module_specs.py |  132 ++
 megatron/core/models/gpt/gpt_layer_specs.py   |   52 +-
 .../gpt/linear_attention_module_specs.py      |   27 -
 megatron/core/transformer/attention.py        |    1 +
 .../experimental_attention_variant/dsa.py     |  822 +++++++++++
 .../transformer/multi_latent_attention.py     |   87 +-
 .../core/transformer/transformer_config.py    |   42 +-
 megatron/training/arguments.py                |   35 +-
 megatron/training/training.py                 |   16 +-
 tests/unit_tests/ssm/test_gated_delta_net.py  |    4 +-
 .../transformer/test_attention_variant_dsa.py | 1271 +++++++++++++++++
 12 files changed, 2404 insertions(+), 92 deletions(-)
 create mode 100644 megatron/core/models/gpt/experimental_attention_variant_module_specs.py
 delete mode 100644 megatron/core/models/gpt/linear_attention_module_specs.py
 create mode 100644 megatron/core/transformer/experimental_attention_variant/dsa.py
 create mode 100644 tests/unit_tests/transformer/test_attention_variant_dsa.py

diff --git a/gpt_builders.py b/gpt_builders.py
index 9fa1aff72c7..61d159b9967 100644
--- a/gpt_builders.py
+++ b/gpt_builders.py
@@ -42,7 +42,8 @@ def gpt_builder(args, pre_process, post_process, vp_stage=None, config=None):
         else:
             use_te = args.transformer_impl == "transformer_engine"
 
-            if args.num_experts or (args.linear_attention_type is not None):
+            linear_attention_variants = ["gated_delta_net"]
+            if args.num_experts or args.experimental_attention_variant in linear_attention_variants:
                 # Define the decoder block spec
                 transformer_layer_spec = get_gpt_decoder_block_spec(
                     config,
@@ -114,7 +115,7 @@ def _get_transformer_layer_spec(use_te, config):
             args.moe_grouped_gemm,
             args.qk_layernorm,
             args.multi_latent_attention,
-            args.linear_attention_type,
+            args.experimental_attention_variant,
             moe_use_legacy_grouped_gemm=args.moe_use_legacy_grouped_gemm,
             qk_l2_norm=args.qk_l2_norm,
             use_kitchen=config.use_kitchen,
@@ -126,7 +127,7 @@ def _get_transformer_layer_spec(use_te, config):
             args.moe_grouped_gemm,
             args.qk_layernorm,
             args.multi_latent_attention,
-            args.linear_attention_type,
+            args.experimental_attention_variant,
             moe_use_legacy_grouped_gemm=args.moe_use_legacy_grouped_gemm,
             normalization=args.normalization,
             use_kitchen=config.use_kitchen,
diff --git a/megatron/core/models/gpt/experimental_attention_variant_module_specs.py b/megatron/core/models/gpt/experimental_attention_variant_module_specs.py
new file mode 100644
index 00000000000..cbe59618baf
--- /dev/null
+++ b/megatron/core/models/gpt/experimental_attention_variant_module_specs.py
@@ -0,0 +1,132 @@
+# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+
+from typing import Optional
+
+from megatron.core.models.backends import BackendSpecProvider
+from megatron.core.ssm.gated_delta_net import GatedDeltaNet, GatedDeltaNetSubmodules
+from megatron.core.transformer.enums import AttnMaskType
+from megatron.core.transformer.experimental_attention_variant.dsa import (
+    DSAIndexer,
+    DSAIndexerSubmodules,
+    DSAttention,
+    DSAttentionSubmodules,
+)
+from megatron.core.transformer.identity_op import IdentityOp
+from megatron.core.transformer.multi_latent_attention import (
+    MLASelfAttention,
+    MLASelfAttentionSubmodules,
+)
+from megatron.core.transformer.spec_utils import ModuleSpec
+
+
+def get_gated_delta_net_module_spec_for_backend(
+    backend: BackendSpecProvider, normalization: Optional[str] = None
+) -> ModuleSpec:
+    """Helper function to get module spec for Linear Attention"""
+    rms_norm = normalization == "RMSNorm"
+    attention = ModuleSpec(
+        module=GatedDeltaNet,
+        submodules=GatedDeltaNetSubmodules(
+            in_proj=backend.column_parallel_layer_norm_linear(),
+            out_norm=backend.layer_norm(rms_norm=rms_norm, for_qk=False),
+            out_proj=backend.row_parallel_linear(),
+        ),
+        metainfo={"fuse_input_layernorm": True},
+    )
+    return attention
+
+
+def get_dsa_module_spec_for_backend(
+    backend: BackendSpecProvider,
+    qk_layernorm: Optional[bool] = False,
+    qk_l2_norm: Optional[bool] = False,
+    multi_latent_attention: Optional[bool] = False,
+    mla_down_proj_use_column_parallel: Optional[bool] = False,
+    normalization: Optional[str] = None,
+    fallback_to_eager_attn: Optional[bool] = False,
+) -> ModuleSpec:
+    """Helper function to get module spec for Sparse Attention."""
+    assert multi_latent_attention, "Currently only MLA supports sparse attention."
+    assert qk_l2_norm is False, "qk_l2_norm is not supported with MLA."
+    assert fallback_to_eager_attn is False, "Fallback to eager attention is not supported with DSA."
+
+    linear_q_down_proj = (
+        backend.column_parallel_linear() if mla_down_proj_use_column_parallel else backend.linear()
+    )
+    linear_kv_down_proj = (
+        backend.column_parallel_linear() if mla_down_proj_use_column_parallel else backend.linear()
+    )
+    linear_q_up_proj = backend.column_parallel_linear()
+    linear_kv_up_proj = backend.column_parallel_linear()
+
+    # Because TransformerEngine does not support sparse attention yet, we use local
+    # implementation whether the backend is TransformerEngine or not.
+    core_attention = ModuleSpec(
+        module=DSAttention,
+        submodules=DSAttentionSubmodules(
+            indexer=ModuleSpec(
+                module=DSAIndexer,
+                submodules=DSAIndexerSubmodules(
+                    linear_wq_b=backend.linear(),
+                    linear_wk=backend.linear(),
+                    k_norm=backend.layer_norm(rms_norm=False, for_qk=True),
+                    linear_weights_proj=backend.linear(),
+                ),
+            )
+        ),
+    )
+
+    # Adjust for RMS norm.
+    rms_norm = normalization == "RMSNorm"
+    qk_norm = backend.layer_norm(rms_norm=rms_norm, for_qk=True) if qk_layernorm else IdentityOp
+
+    attention = ModuleSpec(
+        module=MLASelfAttention,
+        params={"attn_mask_type": AttnMaskType.causal},
+        submodules=MLASelfAttentionSubmodules(
+            linear_q_proj=backend.column_parallel_linear(),
+            linear_q_down_proj=linear_q_down_proj,
+            linear_q_up_proj=linear_q_up_proj,
+            linear_kv_down_proj=linear_kv_down_proj,
+            linear_kv_up_proj=linear_kv_up_proj,
+            core_attention=core_attention,
+            linear_proj=backend.row_parallel_linear(),
+            q_layernorm=qk_norm,
+            kv_layernorm=qk_norm,
+        ),
+        metainfo={"fuse_input_layernorm": False},
+    )
+
+    return attention
+
+
+def get_experimental_attention_variant_module_spec_for_backend(
+    backend: BackendSpecProvider,
+    sharded_state_dict_keys_map: dict,
+    experimental_attention_variant: Optional[str] = None,
+    qk_layernorm: Optional[bool] = False,
+    qk_l2_norm: Optional[bool] = False,
+    multi_latent_attention: Optional[bool] = False,
+    mla_down_proj_use_column_parallel: Optional[bool] = False,
+    normalization: Optional[str] = None,
+    fallback_to_eager_attn: Optional[bool] = False,
+) -> ModuleSpec:
+    """Helper function to get module spec for Attention"""
+    if experimental_attention_variant == "gated_delta_net":
+        return get_gated_delta_net_module_spec_for_backend(
+            backend=backend, normalization=normalization
+        )
+    elif experimental_attention_variant == "dsa":
+        return get_dsa_module_spec_for_backend(
+            backend=backend,
+            qk_layernorm=qk_layernorm,
+            qk_l2_norm=qk_l2_norm,
+            multi_latent_attention=multi_latent_attention,
+            mla_down_proj_use_column_parallel=mla_down_proj_use_column_parallel,
+            normalization=normalization,
+            fallback_to_eager_attn=fallback_to_eager_attn,
+        )
+    else:
+        raise ValueError(
+            f"Invalid experimental attention variant: {experimental_attention_variant}"
+        )
diff --git a/megatron/core/models/gpt/gpt_layer_specs.py b/megatron/core/models/gpt/gpt_layer_specs.py
index c5c9caa3d67..5395b158749 100755
--- a/megatron/core/models/gpt/gpt_layer_specs.py
+++ b/megatron/core/models/gpt/gpt_layer_specs.py
@@ -5,8 +5,8 @@
 
 from megatron.core.fusions.fused_bias_dropout import get_bias_dropout_add
 from megatron.core.models.backends import BackendSpecProvider, LocalSpecProvider
-from megatron.core.models.gpt.linear_attention_module_specs import (
-    get_linear_attention_module_spec_for_backend,
+from megatron.core.models.gpt.experimental_attention_variant_module_specs import (
+    get_experimental_attention_variant_module_spec_for_backend,
 )
 from megatron.core.models.gpt.moe_module_specs import get_moe_module_spec_for_backend
 from megatron.core.transformer.attention import SelfAttention, SelfAttentionSubmodules
@@ -78,7 +78,7 @@ def get_gpt_layer_with_transformer_engine_spec(
     moe_grouped_gemm: Optional[bool] = False,
     qk_layernorm: Optional[bool] = False,
     multi_latent_attention: Optional[bool] = False,
-    linear_attention_type: Optional[str] = None,
+    experimental_attention_variant: Optional[str] = None,
     fp8: Optional[str] = None,  # pylint: disable=unused-argument
     moe_use_legacy_grouped_gemm: Optional[bool] = False,
     normalization: Optional[str] = None,
@@ -96,7 +96,8 @@ def get_gpt_layer_with_transformer_engine_spec(
         moe_grouped_gemm (bool, optional): To use Grouped GEMM. Defaults to False.
         qk_layernorm (bool, optional): To use layernorm for queries/keys. Defaults to False.
         multi_latent_attention (bool, optional): To use multi-latent attention. Defaults to False.
-        linear_attention_type (str, optional): The type of linear attention. Defaults to None.
+        experimental_attention_variant (str, optional): The type of experimental attention variant.
+                                                        Defaults to None.
         fp8 (str, optional): Deprecated. For temporary Nemo compatibility.
         moe_use_legacy_grouped_gemm (bool, optional): Force use the legacy GroupedMLP.
                                                       Defaults to False.
@@ -133,7 +134,7 @@ def get_gpt_layer_with_transformer_engine_spec(
     attention = get_attention_module_spec_for_backend(
         backend=backend,
         sharded_state_dict_keys_map=sharded_state_dict_keys_map,
-        linear_attention_type=linear_attention_type,
+        experimental_attention_variant=experimental_attention_variant,
         qk_layernorm=qk_layernorm,
         qk_l2_norm=qk_l2_norm,
         multi_latent_attention=multi_latent_attention,
@@ -166,7 +167,7 @@ def get_gpt_layer_local_spec(
     moe_grouped_gemm: Optional[bool] = False,
     qk_layernorm: Optional[bool] = False,
     multi_latent_attention: Optional[bool] = False,
-    linear_attention_type: Optional[str] = None,
+    experimental_attention_variant: Optional[str] = None,
     fp8: Optional[str] = None,  # pylint: disable=unused-argument
     moe_use_legacy_grouped_gemm: Optional[bool] = False,
     normalization: Optional[str] = None,
@@ -181,7 +182,8 @@ def get_gpt_layer_local_spec(
         moe_grouped_gemm (bool, optional): To use Grouped GEMM. Defaults to False.
         qk_layernorm (bool, optional): To use layernorm for queries/keys. Defaults to False.
         multi_latent_attention (bool, optional): To use multi-latent attention. Defaults to False.
-        linear_attention_type (str, optional): The type of linear attention. Defaults to None.
+        experimental_attention_variant (str, optional): The type of experimental attention variant.
+                                                        Defaults to None.
         fp8 (str, optional): Deprecated. For temporary Nemo compatibility.
         moe_use_legacy_grouped_gemm (bool, optional): Force use the legacy GroupedMLP.
                                                       Defaults to False.
@@ -205,15 +207,17 @@ def get_gpt_layer_local_spec(
             " and will be removed soon. Please update your code accordingly."
         )
 
-    if linear_attention_type is not None:
-        raise NotImplementedError("Linear attention is not supported with local spec yet.")
+    if experimental_attention_variant is not None:
+        raise NotImplementedError(
+            "Experimental attention variant is not supported with local spec yet."
+        )
 
     sharded_state_dict_keys_map = {}
 
     attention = get_attention_module_spec_for_backend(
         backend=backend,
         sharded_state_dict_keys_map=sharded_state_dict_keys_map,
-        linear_attention_type=linear_attention_type,
+        experimental_attention_variant=experimental_attention_variant,
         qk_layernorm=qk_layernorm,
         qk_l2_norm=qk_l2_norm,
         multi_latent_attention=multi_latent_attention,
@@ -278,7 +282,7 @@ def get_transformer_layer_spec_for_backend(
 def get_attention_module_spec_for_backend(
     backend: BackendSpecProvider,
     sharded_state_dict_keys_map: dict,
-    linear_attention_type: Optional[str] = None,
+    experimental_attention_variant: Optional[str] = None,
     qk_layernorm: Optional[bool] = False,
     qk_l2_norm: Optional[bool] = False,
     multi_latent_attention: Optional[bool] = False,
@@ -288,11 +292,17 @@ def get_attention_module_spec_for_backend(
 ) -> ModuleSpec:
     """Helper function to get module spec for Attention"""
 
-    if linear_attention_type is not None:
-        return get_linear_attention_module_spec_for_backend(
-            backend=backend,
-            linear_attention_type=linear_attention_type,
-            normalization=normalization,
+    if experimental_attention_variant is not None:
+        return get_experimental_attention_variant_module_spec_for_backend(
+            backend,
+            sharded_state_dict_keys_map,
+            experimental_attention_variant,
+            qk_layernorm,
+            qk_l2_norm,
+            multi_latent_attention,
+            mla_down_proj_use_column_parallel,
+            normalization,
+            fallback_to_eager_attn,
         )
 
     # Adjust for RMS norm.
@@ -526,13 +536,12 @@ def get_gpt_decoder_layer_specs(
                 num_experts = None
                 moe_grouped_gemm = None
             if attention_type == "linear_attention":
-                if config.linear_attention_type is None:
+                linear_attention_variants = ["gated_delta_net"]
+                if config.experimental_attention_variant not in linear_attention_variants:
                     # Skip if there is no linear attention layer in the model.
                     continue
-                linear_attention_type = config.linear_attention_type
                 multi_latent_attention = None
             else:
-                linear_attention_type = None
                 multi_latent_attention = config.multi_latent_attention
 
             layer_spec_key = f"{mlp_type}_{attention_type}"
@@ -540,7 +549,7 @@ def get_gpt_decoder_layer_specs(
                 num_experts=num_experts,
                 moe_grouped_gemm=moe_grouped_gemm,
                 multi_latent_attention=multi_latent_attention,
-                linear_attention_type=linear_attention_type,
+                experimental_attention_variant=config.experimental_attention_variant,
                 **get_layer_spec_kwargs,
             )
 
@@ -583,7 +592,8 @@ def get_gpt_decoder_layer_specs(
             f"current linear attention pattern: {config.linear_attention_freq}"
         )
     elif config.linear_attention_freq is None:
-        if config.linear_attention_type is None:
+        linear_attention_variants = ["gated_delta_net"]
+        if config.experimental_attention_variant not in linear_attention_variants:
             linear_attention_pattern = [0] * config.num_layers
         else:
             linear_attention_pattern = [1] * config.num_layers
diff --git a/megatron/core/models/gpt/linear_attention_module_specs.py b/megatron/core/models/gpt/linear_attention_module_specs.py
deleted file mode 100644
index 7e76d845cff..00000000000
--- a/megatron/core/models/gpt/linear_attention_module_specs.py
+++ /dev/null
@@ -1,27 +0,0 @@
-# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
-
-from typing import Optional
-
-from megatron.core.models.backends import BackendSpecProvider
-from megatron.core.ssm.gated_delta_net import GatedDeltaNet, GatedDeltaNetSubmodules
-from megatron.core.transformer.spec_utils import ModuleSpec
-
-
-def get_linear_attention_module_spec_for_backend(
-    backend: BackendSpecProvider, linear_attention_type: str, normalization: Optional[str] = None
-) -> ModuleSpec:
-    """Helper function to get module spec for Linear Attention"""
-    rms_norm = normalization == "RMSNorm"
-    if linear_attention_type == "gated_delta_net":
-        attention = ModuleSpec(
-            module=GatedDeltaNet,
-            submodules=GatedDeltaNetSubmodules(
-                in_proj=backend.column_parallel_layer_norm_linear(),
-                out_norm=backend.layer_norm(rms_norm=rms_norm, for_qk=False),
-                out_proj=backend.row_parallel_linear(),
-            ),
-            metainfo={"fuse_input_layernorm": True},
-        )
-    else:
-        raise ValueError(f"Invalid linear attention type: {linear_attention_type}")
-    return attention
diff --git a/megatron/core/transformer/attention.py b/megatron/core/transformer/attention.py
index 57ba494742b..5cf22d25a4b 100644
--- a/megatron/core/transformer/attention.py
+++ b/megatron/core/transformer/attention.py
@@ -190,6 +190,7 @@ def __init__(
         self.key_hidden_size = self.hidden_size_per_attention_head
         self.val_hidden_size = self.hidden_size_per_attention_head
 
+        # TODO: This is built twice when using MLA, should be refactored.
         self.core_attention = build_module(
             submodules.core_attention,
             config=self.config,
diff --git a/megatron/core/transformer/experimental_attention_variant/dsa.py b/megatron/core/transformer/experimental_attention_variant/dsa.py
new file mode 100644
index 00000000000..fc994490b1b
--- /dev/null
+++ b/megatron/core/transformer/experimental_attention_variant/dsa.py
@@ -0,0 +1,822 @@
+# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+
+import copy
+import math
+from dataclasses import dataclass
+from typing import Optional, Tuple, Union
+
+import torch
+
+from megatron.core import parallel_state
+from megatron.core.models.common.embeddings import (
+    RotaryEmbedding,
+    YarnRotaryEmbedding,
+    apply_rotary_pos_emb,
+)
+from megatron.core.packed_seq_params import PackedSeqParams
+from megatron.core.process_groups_config import ProcessGroupCollection
+from megatron.core.tensor_parallel.mappings import gather_from_sequence_parallel_region
+from megatron.core.transformer.enums import AttnMaskType
+from megatron.core.transformer.module import MegatronModule
+from megatron.core.transformer.spec_utils import ModuleSpec, build_module
+from megatron.core.transformer.transformer_config import TransformerConfig
+
+try:
+    from fast_hadamard_transform import hadamard_transform
+except ImportError:
+    hadamard_transform = None
+
+
+def rotate_activation(x: torch.Tensor) -> torch.Tensor:
+    """Apply Hadamard rotation activation.
+    Reference:
+        https://github.com/deepseek-ai/DeepSeek-V3.2-Exp/blob/main/inference/model.py#L424-L428
+
+    Args:
+        x: Input tensor (must be bfloat16).
+
+    Returns:
+        Rotated tensor.
+    """
+    assert (
+        x.dtype == torch.bfloat16
+    ), f"rotate_activation only support bf16 input, but got {x.dtype}"
+    assert hadamard_transform is not None, "fast_hadamard_transform is not installed."
+    hidden_size = x.size(-1)
+    return hadamard_transform(x, scale=hidden_size**-0.5)
+
+
+class DSAIndexerLossLoggingHelper:
+    """Helper class for logging sparse attention indexer losses."""
+
+    tracker = {}
+
+    @staticmethod
+    def save_loss_to_tracker(
+        loss: torch.Tensor,
+        layer_number: int,
+        num_layers: int,
+        reduce_group: torch.distributed.ProcessGroup = None,
+        avg_group: torch.distributed.ProcessGroup = None,
+    ):
+        """Save the indexer loss for logging.
+
+        Args:
+            loss: The loss tensor.
+            layer_number: Layer index of the loss, 1-indexed.
+            num_layers: The number of total layers.
+            reduce_group: The group for reducing the loss.
+            avg_group: The group for averaging the loss.
+        """
+        # Skip indexer loss logging if layer_number is None.
+        if layer_number is None:
+            return
+
+        tracker = DSAIndexerLossLoggingHelper.tracker
+        if "values" not in tracker:
+            tracker["values"] = torch.zeros(num_layers, device=torch.cuda.current_device())
+        tracker["values"][layer_number - 1] += loss.detach()
+        tracker["reduce_group"] = reduce_group
+        tracker["avg_group"] = avg_group
+
+    @staticmethod
+    def clean_loss_in_tracker():
+        """Clear the indexer losses."""
+        tracker = DSAIndexerLossLoggingHelper.tracker
+        if "values" in tracker:
+            tracker["values"].zero_()
+        tracker["reduce_group"] = None
+        tracker["avg_group"] = None
+
+    @staticmethod
+    def reduce_loss_in_tracker():
+        """Collect and reduce the indexer losses across ranks."""
+        tracker = DSAIndexerLossLoggingHelper.tracker
+        if "values" not in tracker:
+            return
+        values = tracker["values"]
+
+        torch.distributed.all_reduce(
+            values, group=parallel_state.get_pipeline_model_parallel_group()
+        )
+        # Reduce indexer losses across ranks.
+        if tracker.get('reduce_group') is not None:
+            torch.distributed.all_reduce(values, group=tracker.get('reduce_group'))
+        if tracker.get('avg_group') is not None:
+            torch.distributed.all_reduce(
+                values, group=tracker['avg_group'], op=torch.distributed.ReduceOp.AVG
+            )
+        torch.distributed.all_reduce(
+            values,
+            group=parallel_state.get_data_parallel_group(with_context_parallel=False),
+            op=torch.distributed.ReduceOp.AVG,
+        )
+
+    @staticmethod
+    def track_indexer_metrics(
+        loss_scale: float,
+        iteration: int,
+        writer,
+        wandb_writer=None,
+        total_loss_dict=None,
+        per_layer_logging: bool = False,
+    ):
+        """Track the sparse attention indexer metrics for logging.
+
+        Args:
+            loss_scale: Scale factor for the loss.
+            iteration: Current training iteration.
+            writer: TensorBoard writer.
+            wandb_writer: Weights & Biases writer.
+            total_loss_dict: Dictionary to accumulate total losses.
+            per_layer_logging: Whether to log per-layer losses.
+        """
+        DSAIndexerLossLoggingHelper.reduce_loss_in_tracker()
+        tracker = DSAIndexerLossLoggingHelper.tracker
+        if "values" not in tracker:
+            return
+
+        indexer_loss_values = tracker["values"] * loss_scale
+        num_layers = indexer_loss_values.shape[0]
+
+        # Average across all layers (assuming all layers have sparse attention)
+        avg_indexer_loss = indexer_loss_values.sum() / num_layers
+
+        # Log average loss
+        if total_loss_dict is not None:
+            if "indexer loss" in total_loss_dict:
+                total_loss_dict["indexer loss"] += avg_indexer_loss
+            else:
+                total_loss_dict["indexer loss"] = avg_indexer_loss
+
+        if writer is not None:
+            writer.add_scalar("indexer loss", avg_indexer_loss, iteration)
+
+        if wandb_writer is not None:
+            wandb_writer.log({"indexer loss": avg_indexer_loss}, iteration)
+
+        DSAIndexerLossLoggingHelper.clean_loss_in_tracker()
+
+
+def compute_dsa_indexer_loss(
+    index_scores: torch.Tensor,
+    topk_indices: torch.Tensor,
+    query: torch.Tensor,
+    key: torch.Tensor,
+    softmax_scale: float,
+    loss_coeff: float,
+    sparse_loss: bool,
+    pg_collection: ProcessGroupCollection,
+) -> torch.Tensor:
+    """
+    Compute KL divergence loss between index_scores and true attention_scores.
+
+    This loss trains the indexer to predict which tokens are important by matching the distribution
+    of true attention scores.
+
+    Reference: Section 2.1 of
+        https://github.com/deepseek-ai/DeepSeek-V3.2-Exp/blob/main/DeepSeek_V3_2.pdf
+
+    Args:
+        index_scores: Scores predicted by indexer [batch, seqlen_q, seqlen_k].
+        topk_indices: Top-k indices [batch, seqlen_q, index_topk].
+        query: Query tensor [seqlen_q, batch, heads, dim].
+        key: Key tensor [seqlen_k, batch, heads, dim].
+        softmax_scale: Scale coefficient after q @ k^T.
+        loss_coeff: Coefficient for the indexer KL divergence loss.
+        sparse_loss: bool, whether to use sparse indexer loss. If True, only the topk
+            indices will be used to compute the loss.
+        pg_collection: Process group collection, must have TP process group.
+
+    Returns:
+        index_loss: KL divergence loss (scalar).
+    """
+    sq, b, np, hn = query.size()
+    sk = key.size(0)
+
+    # [sq, b, np, hn] -> [b, np, sq, hn] -> [b * np, sq, hn]
+    query = query.permute(1, 2, 0, 3).reshape(b * np, sq, hn)
+    # [sk, b, np, hn] -> [b, np, hn, sk] -> [b * np, hn, sk]
+    key = key.permute(1, 2, 3, 0).reshape(b * np, hn, sk)
+    # Compute attention scores [b * np, sq, sk]
+    attention_scores = torch.bmm(query.float(), key.float()) * softmax_scale
+    # Reshape to [b, np, sq, sk]
+    attention_scores = attention_scores.reshape(b, np, sq, sk)
+
+    # causal_mask [sq, sk]
+    causal_mask = torch.triu(
+        torch.full((sq, sk), float('-inf'), dtype=torch.float32, device=attention_scores.device),
+        diagonal=1,
+    )
+    # index_mask [b, sq, sk]
+    index_mask = torch.full(
+        (b, sq, sk), float("-inf"), dtype=torch.float32, device=causal_mask.device
+    ).scatter_(-1, topk_indices, 0)
+
+    # [b, np, sq, skv] + [1, 1, sq, skv] -> [b, np, sq, skv]
+    attention_scores += causal_mask.view(1, 1, sq, sk)
+    if sparse_loss:
+        # [b, np, sq, sk] + [b, 1, sq, sk] -> [b, np, sq, sk]
+        attention_scores += index_mask.view(b, 1, sq, sk)
+        # [b, sq, sk] + [b, sq, sk] -> [b, sq, sk]
+        index_scores += index_mask
+
+    # [b, np, sq, sk] -> [b, np, sq, sk]
+    attention_scores = torch.nn.functional.softmax(attention_scores, dim=-1, dtype=torch.float32)
+    # [b, sq, sk] -> [b, sq, sk]
+    index_scores = torch.nn.functional.softmax(index_scores, dim=-1, dtype=torch.float32)
+
+    # Sum attention scores across heads.
+    # [batch, heads, seqlen_q, seqlen_k] -> [batch, seqlen_q, seqlen_k]
+    attention_scores = attention_scores.sum(dim=1)
+    if pg_collection.tp.size() > 1:
+        # attention scores are scattered to TP ranks in head dimension.
+        torch.distributed.all_reduce(attention_scores.contiguous(), group=pg_collection.tp)
+    # L1 normalize target on the last dimension. Doesn't use abs() because attention_scores are
+    # obtained from softmax so they are already non-negative.
+    attention_scores = attention_scores / attention_scores.sum(dim=-1, keepdim=True)
+
+    # Compute KL divergence: KL(target || index) = target(x) * log(target(x) / index(x))
+    # kl_per_element [b, sq, sk]
+    kl_per_element = attention_scores * (
+        torch.log(attention_scores + 1e-10) - torch.log(index_scores + 1e-10)
+    )
+
+    # [b, sq, sk] -> [b, sq] -> [1]
+    # Each token has same weight in the loss.
+    kl_div = kl_per_element.sum(dim=-1).mean()
+
+    # Scale by coefficient.
+    indexer_loss = kl_div * loss_coeff
+
+    return indexer_loss
+
+
+class DSAIndexerLossAutoScaler(torch.autograd.Function):
+    """An AutoScaler that triggers the backward pass and scales the grad for indexer loss.
+
+    This custom autograd function attaches a KL divergence loss to the activation
+    to train the indexer to predict attention scores without affecting the forward pass.
+    """
+
+    main_loss_backward_scale: torch.Tensor = None
+
+    @staticmethod
+    def forward(ctx, output: torch.Tensor, indexer_loss: torch.Tensor):
+        """Preserve the indexer_loss by storing it in the context to avoid garbage collection.
+
+        Args:
+            output: The output tensor (activation).
+            indexer_loss: The indexer KL divergence loss tensor.
+
+        Returns:
+            torch.Tensor: The output tensor unchanged.
+        """
+        ctx.save_for_backward(indexer_loss)
+        return output
+
+    @staticmethod
+    def backward(ctx, grad_output: torch.Tensor):
+        """Compute and scale the gradient for indexer loss.
+
+        Args:
+            grad_output: The gradient of the output.
+
+        Returns:
+            Tuple[torch.Tensor, torch.Tensor]: The gradient of the output, scaled indexer loss
+                gradient.
+        """
+        (indexer_loss,) = ctx.saved_tensors
+        if DSAIndexerLossAutoScaler.main_loss_backward_scale is None:
+            DSAIndexerLossAutoScaler.main_loss_backward_scale = torch.tensor(
+                1.0, device=indexer_loss.device
+            )
+        indexer_loss_backward_scale = DSAIndexerLossAutoScaler.main_loss_backward_scale
+        scaled_indexer_loss_grad = torch.ones_like(indexer_loss) * indexer_loss_backward_scale
+        return grad_output, scaled_indexer_loss_grad
+
+    @staticmethod
+    def set_loss_scale(scale: torch.Tensor):
+        """Set the scale of the indexer loss.
+
+        Args:
+            scale: The scale value to set.
+        """
+        if DSAIndexerLossAutoScaler.main_loss_backward_scale is None:
+            DSAIndexerLossAutoScaler.main_loss_backward_scale = scale
+        else:
+            DSAIndexerLossAutoScaler.main_loss_backward_scale.copy_(scale)
+
+
+@dataclass
+class DSAIndexerSubmodules:
+    """
+    Configuration class for specifying the submodules of an DSA Indexer.
+
+    Args:
+        linear_wq_b: Linear projection for query bottleneck expansion.
+        linear_wk: Linear projection for key.
+        k_norm: Layer normalization for key.
+        linear_weights_proj: Linear projection for attention weights.
+    """
+
+    linear_wq_b: Union[ModuleSpec, type] = None
+    linear_wk: Union[ModuleSpec, type] = None
+    k_norm: Union[ModuleSpec, type] = None
+    linear_weights_proj: Union[ModuleSpec, type] = None
+
+
+@dataclass
+class DSAttentionSubmodules:
+    """
+    Configuration class for specifying the submodules of DSAttention.
+
+    Args:
+        indexer: DSA Indexer module for computing sparse attention indices.
+    """
+
+    indexer: Union[ModuleSpec, type] = None
+
+
+class DSAIndexer(MegatronModule):
+    """
+    DSA Lightning Indexer for DeepSeek Sparse Attention.
+
+    Computes index scores to identify the top-k most relevant key-value pairs for each query in
+    sparse attention.
+
+    Reference:
+        https://github.com/deepseek-ai/DeepSeek-V3.2-Exp/blob/main/inference/model.py#L431-L480
+    """
+
+    def __init__(
+        self,
+        config: TransformerConfig,
+        submodules: DSAIndexerSubmodules,
+        pg_collection: Optional[ProcessGroupCollection] = None,
+    ) -> None:
+        """Initialize the indexer.
+
+        Args:
+            config (TransformerConfig): The configuration for the transformer model.
+            submodules (DSAIndexerSubmodules): Indexer submodules specification.
+            pg_collection (ProcessGroupCollection, optional): Process groups for the indexer.
+        """
+        super().__init__(config=config)
+        self.hidden_size = self.config.hidden_size
+        self.qk_pos_emb_head_dim = self.config.qk_pos_emb_head_dim
+        self.q_lora_rank = (
+            self.config.q_lora_rank
+            if self.config.q_lora_rank is not None
+            else self.config.hidden_size
+        )
+
+        self.index_n_heads = self.config.dsa_indexer_n_heads
+        self.index_head_dim = self.config.dsa_indexer_head_dim
+        self.index_topk = self.config.dsa_indexer_topk
+
+        self.softmax_scale: float = self.index_head_dim**-0.5
+
+        if pg_collection is None:
+            pg_collection = ProcessGroupCollection.use_mpu_process_groups(required_pgs=['tp', 'cp'])
+        self.pg_collection = pg_collection
+
+        # Initialize Position Embedding.
+        if self.config.rope_type == 'rope':
+            self.rotary_pos_emb = RotaryEmbedding(
+                self.qk_pos_emb_head_dim,
+                rotary_percent=self.config.rotary_percent,
+                rotary_base=self.config.rotary_base,
+                cp_group=self.pg_collection.cp,
+            )
+        elif self.config.rope_type == 'yarn':
+            self.rotary_pos_emb = YarnRotaryEmbedding(
+                self.qk_pos_emb_head_dim,
+                rotary_base=self.config.rotary_base,
+                scaling_factor=self.config.rotary_scaling_factor,
+                original_max_position_embeddings=self.config.original_max_position_embeddings,
+                beta_fast=self.config.beta_fast,
+                beta_slow=self.config.beta_slow,
+                mscale=self.config.mscale,
+                mscale_all_dim=self.config.mscale_all_dim,
+                cp_group=self.pg_collection.cp,
+            )
+        else:
+            raise ValueError(
+                f'Unsupported RoPE type: {self.config.rope_type}, supported types are "rope" and '
+                f'"yarn"'
+            )
+
+        self.linear_wq_b = build_module(
+            submodules.linear_wq_b,
+            self.q_lora_rank,
+            self.index_n_heads * self.index_head_dim,
+            config=self.config,
+            init_method=self.config.init_method,
+            bias=False,
+            skip_bias_add=False,
+            skip_weight_param_allocation=False,
+            parallel_mode="duplicated",
+        )
+
+        self.linear_wk = build_module(
+            submodules.linear_wk,
+            self.hidden_size,
+            self.index_head_dim,
+            config=self.config,
+            init_method=self.config.init_method,
+            bias=False,
+            skip_bias_add=False,
+            skip_weight_param_allocation=False,
+            parallel_mode="duplicated",
+        )
+
+        k_norm_config = copy.copy(self.config)
+        k_norm_config.normalization = "LayerNorm"
+        self.k_norm = build_module(
+            submodules.k_norm,
+            config=k_norm_config,
+            hidden_size=self.index_head_dim,
+            eps=self.config.layernorm_epsilon,
+        )
+
+        self.linear_weights_proj = build_module(
+            submodules.linear_weights_proj,
+            self.hidden_size,
+            self.index_n_heads,
+            config=self.config,
+            init_method=self.config.init_method,
+            bias=False,
+            skip_bias_add=False,
+            skip_weight_param_allocation=False,
+            parallel_mode="duplicated",
+        )
+
+    def _apply_rope(self, x: torch.Tensor, rotary_pos_emb: torch.Tensor, mscale: float):
+        """Apply RoPE to the input tensor."""
+        # x_nope [seqlen, batch, *, index_head_dim - qk_pos_emb_head_dim]
+        # x_pe   [seqlen, batch, *, qk_pos_emb_head_dim]
+        x_nope, x_pe = torch.split(
+            x, [self.index_head_dim - self.qk_pos_emb_head_dim, self.qk_pos_emb_head_dim], dim=-1
+        )
+        x_pe = apply_rotary_pos_emb(
+            x_pe,
+            rotary_pos_emb,
+            config=self.config,
+            cu_seqlens=None,
+            mscale=mscale,
+            cp_group=self.pg_collection.cp,
+        )
+        # [seqlen, batch, *, index_head_dim]
+        x = torch.cat([x_nope, x_pe], dim=-1)
+        return x
+
+    def _compute_index_scores(
+        self, q: torch.Tensor, weights: torch.Tensor, k: torch.Tensor
+    ) -> torch.Tensor:
+        """
+        Perform index score using BF16 precision.
+
+        Reference:
+            https://github.com/deepseek-ai/DeepSeek-V3.2-Exp/blob/main/inference/kernel.py#L254-L274
+        This is a BF16 implementation of the `fp8_index` logic:
+            1. Compute attention scores: q @ k^T;
+            2. Apply ReLU activation;
+            3. Weight by attention weights;
+            4. Sum across attention heads.
+
+        Args:
+            q: BF16 [seqlen_q, batch, index_n_heads, index_head_dim], the query tensor.
+            weights: BF16 [seqlen_q, batch, index_n_heads], the attention weights.
+            k: BF16 [seqlen_k, batch, index_head_dim], the key tensor.
+
+        Returns:
+            index_scores: FP32 [batch, seqlen_q, seqlen_k], the index scores.
+        """
+        # Compute attention scores: q @ k^T
+        # [seqlen_q, batch, index_n_heads, index_head_dim] @ [seqlen_k, batch, index_head_dim]^T
+        #   -> [seqlen_q, batch, index_n_heads, seqlen_k]
+        index_scores = torch.einsum('sbhd,tbd->sbht', q.float(), k.float())
+
+        # Apply ReLU activation.
+        index_scores = torch.relu(index_scores)
+
+        # Weight each head by attention weights.
+        # [seqlen_q, batch, index_n_heads, seqlen_k] * [seqlen_q, batch, index_n_heads, 1]
+        #   -> [seqlen_q, batch, index_n_heads, seqlen_k]
+        index_scores = index_scores * weights.unsqueeze(-1)
+
+        # Sum across attention heads.
+        # [seqlen_q, batch, index_n_heads, seqlen_k] -> [seqlen_q, batch, seqlen_k]
+        index_scores = index_scores.sum(dim=2)
+
+        # Transpose to [batch, seqlen_q, seqlen_k].
+        index_scores = index_scores.transpose(0, 1)
+
+        return index_scores
+
+    def forward_with_scores(
+        self,
+        x: torch.Tensor,
+        qr: torch.Tensor,
+        mask: Optional[torch.Tensor] = None,
+        packed_seq_params: Optional[PackedSeqParams] = None,
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        """
+        Forward pass for DSA Indexer that returns both index scores and top-k indices.
+
+        This is used when KL loss is enabled to compare indexer scores with true attention scores.
+
+        Args:
+            x: hidden states [seqlen, batch, hidden_size].
+            qr: Low-rank query tensor [seqlen, batch, q_lora_rank].
+            mask: Attention mask [batch, seqlen, seqlen].
+            packed_seq_params: Packed sequence parameters for variable length sequences.
+
+        Returns:
+            index_scores: Index scores [batch, seqlen, seqlen].
+            topk_indices: Top-k indices [batch, seqlen, index_topk].
+        """
+        assert packed_seq_params is None, "Packed sequence is not supported for DSAttention"
+
+        # =========================================
+        # Prepare RoPE params
+        # =========================================
+        rotary_seq_len = self.rotary_pos_emb.get_rotary_seq_len(
+            None, None, x, self.config, packed_seq_params
+        )
+        if self.config.rope_type == "rope":
+            rotary_pos_emb = self.rotary_pos_emb(rotary_seq_len, packed_seq=False)
+            mscale = 1.0
+        else:
+            rotary_pos_emb, mscale = self.rotary_pos_emb(rotary_seq_len, packed_seq=False)
+
+        # =========================================
+        # Gather inputs if sp is enabled
+        # =========================================
+        if self.config.sequence_parallel and self.pg_collection.tp.size() > 1:
+            x = gather_from_sequence_parallel_region(x, group=self.pg_collection.tp)
+            qr = gather_from_sequence_parallel_region(qr, group=self.pg_collection.tp)
+
+        # =========================================
+        # Get sequence length and batch size
+        # =========================================
+        seqlen, bsz, _ = x.size()
+
+        # =========================================
+        # q linear and apply rope to q
+        # =========================================
+        # [seqlen, batch, q_lora_rank] -> [seqlen, batch, index_n_heads * index_head_dim]
+        q, _ = self.linear_wq_b(qr)
+        # [seqlen, batch, index_n_heads * index_head_dim]
+        #   -> [seqlen, batch, index_n_heads, index_head_dim]
+        q = q.reshape(seqlen, bsz, self.index_n_heads, self.index_head_dim)
+        q = self._apply_rope(q, rotary_pos_emb, mscale)
+
+        # =========================================
+        # k linear and apply rope to k
+        # =========================================
+        # [seqlen, batch, hidden_size] -> [seqlen, batch, index_head_dim]
+        k, _ = self.linear_wk(x)
+        k = self.k_norm(k)
+        # [seqlen, batch, index_head_dim] -> [seqlen, batch, 1, index_head_dim]
+        k = k.reshape(seqlen, bsz, 1, self.index_head_dim)
+        k = self._apply_rope(k, rotary_pos_emb, mscale)
+        # [seqlen, batch, 1, index_head_dim] -> [seqlen, batch, index_head_dim]
+        k = k.reshape(seqlen, bsz, self.index_head_dim)
+
+        # =========================================
+        # Rotate activation
+        # =========================================
+        q = rotate_activation(q)
+        k = rotate_activation(k)
+
+        # =========================================
+        # Compute index scores
+        # =========================================
+        # [seqlen, batch, hidden_size] -> [seqlen, batch, index_n_heads]
+        weights, _ = self.linear_weights_proj(x)
+        weights = weights * (self.index_n_heads**-0.5) * self.softmax_scale
+        # [batch, seqlen, seqlen]
+        index_scores = self._compute_index_scores(q, weights, k)
+        if mask is not None:
+            assert mask.dtype == index_scores.dtype, "Mask dtype must match index scores dtype"
+            index_scores = index_scores + mask
+
+        # =========================================
+        # Select top-k indices
+        # =========================================
+        topk_k = min(self.index_topk, seqlen)
+        # [batch, seqlen, index_topk]
+        topk_indices = index_scores.topk(topk_k, dim=-1)[1]
+
+        return index_scores, topk_indices
+
+    def forward(
+        self,
+        x: torch.Tensor,
+        qr: torch.Tensor,
+        mask: Optional[torch.Tensor] = None,
+        packed_seq_params: Optional[PackedSeqParams] = None,
+    ):
+        """
+        Forward pass for DSA Indexer.
+
+        Args:
+            x: hidden states [seqlen, batch, hidden_size].
+            qr: Low-rank query tensor [seqlen, batch, q_lora_rank].
+            mask: Attention mask [batch, seqlen, seqlen].
+            packed_seq_params: Packed sequence parameters for variable length sequences.
+
+        Returns:
+            topk_indices: Top-k indices for sparse attention [batch, seqlen, index_topk].
+        """
+        _, topk_indices = self.forward_with_scores(x, qr, mask, packed_seq_params)
+        return topk_indices
+
+
+def unfused_dsa_fn(query, key, value, topk_indices, softmax_scale):
+    """
+    Unfused sparse attention implementation.
+    """
+    sq, b, np, hn = query.size()
+    skv = key.size(0)
+    hnv = value.size(3)
+
+    # ===================================
+    # Raw attention scores [b, np, sq, skv]
+    # ===================================
+    # [sq, b, np, hn] -> [b, np, sq, hn] -> [b * np, sq, hn]
+    query = query.permute(1, 2, 0, 3).reshape(b * np, sq, hn)
+    # [skv, b, np, hn] -> [b, np, hn, skv] -> [b * np, hn, skv]
+    key = key.permute(1, 2, 3, 0).reshape(b * np, hn, skv)
+    # Compute attention scores [b * np, sq, skv]
+    attention_scores = torch.bmm(query.float(), key.float()) * softmax_scale
+    # Reshape to [b, np, sq, skv]
+    attention_scores = attention_scores.reshape(b, np, sq, skv)
+
+    # ===================================
+    # Apply sparse mask from indexer
+    # ===================================
+    # index_mask [b, sq, skv]
+    index_mask = torch.full((b, sq, skv), float("-inf"), device=attention_scores.device)
+    index_mask.scatter_(-1, topk_indices, 0)
+    # causal_mask [sq, skv]
+    causal_mask = torch.triu(
+        torch.full((sq, skv), float('-inf'), dtype=torch.float32, device=index_mask.device),
+        diagonal=1,
+    )
+    # [b, sq, skv] + [1, sq, skv] -> [b, sq, skv]
+    index_mask += causal_mask.view(1, sq, skv)
+    # [b, np, sq, skv] + [b, 1, sq, skv] -> [b, np, sq, skv]
+    attention_scores += index_mask.unsqueeze(1)
+    attention_scores = torch.nn.functional.softmax(attention_scores, dim=-1, dtype=torch.float32)
+
+    # ===================================
+    # Output
+    # ===================================
+    # [skv, b, np, hnv] -> [b, np, skv, hnv] -> [b * np, skv, hnv]
+    value = value.permute(1, 2, 0, 3).reshape(b * np, skv, hnv)
+    # Reshape attention_scores: [b, np, sq, skv] -> [b * np, sq, skv]
+    attention_scores = attention_scores.reshape(b * np, sq, skv)
+    # Compute output: [b * np, sq, hnv]
+    output = torch.bmm(attention_scores.to(value.dtype), value)
+    # Reshape output: [b * np, sq, hnv] -> [b, np, sq, hnv] -> [sq, b, np, hnv]
+    output = output.reshape(b, np, sq, hnv).permute(2, 0, 1, 3).contiguous()
+    # Flatten: [sq, b, np, hnv] -> [sq, b, np * hnv]
+    output = output.reshape(sq, b, np * hnv)
+    return output
+
+
+class DSAttention(MegatronModule):
+    """
+    This module implements sparse attention mechanism using an DSA Indexer to compute top-k
+    attention indices for reducing computational complexity.
+
+    Reference:
+        https://github.com/deepseek-ai/DeepSeek-V3.2-Exp/blob/main/inference/model.py#L491-L597
+    """
+
+    def __init__(
+        self,
+        config: TransformerConfig,
+        submodules: DSAttentionSubmodules,
+        layer_number: int,
+        attn_mask_type: AttnMaskType,
+        attention_type: str,
+        attention_dropout: Optional[float] = None,
+        softmax_scale: Optional[float] = None,
+        k_channels: Optional[int] = None,
+        v_channels: Optional[int] = None,
+        cp_comm_type: str = "p2p",
+        pg_collection: ProcessGroupCollection = None,
+    ):
+        super().__init__(config=config)
+
+        self.layer_number = layer_number
+
+        self.indexer = build_module(
+            submodules.indexer, config=self.config, pg_collection=pg_collection
+        )
+
+        if softmax_scale is None:
+            softmax_scale = 1.0 / math.sqrt(
+                k_channels if k_channels is not None else config.kv_channels
+            )
+        self.softmax_scale = softmax_scale
+
+    def forward(
+        self,
+        query: torch.Tensor,
+        key: torch.Tensor,
+        value: torch.Tensor,
+        x: torch.Tensor,
+        qr: torch.Tensor,
+        attention_mask: torch.Tensor,
+        attn_mask_type: AttnMaskType = None,
+        attention_bias: torch.Tensor = None,
+        packed_seq_params: PackedSeqParams = None,
+    ):
+        """
+        Forward pass for Sparse Attention.
+
+        Args:
+            query: Query tensor [sq, b, np, hn].
+            key: Key tensor [skv, b, np, hn].
+            value: Value tensor [skv, b, np, hnv].
+            x: Original hidden states [sq, b, hidden_size].
+            qr: Low-rank query representation [sq, b, q_lora_rank].
+            attention_mask: Attention mask tensor [b, 1, sq, sk].
+            attn_mask_type: Type of attention mask.
+            attention_bias: Optional attention bias.
+            packed_seq_params: Packed sequence parameters.
+
+        Returns:
+            output: Output tensor [sq, b, hidden_size]
+        """
+        sq, b, np, hn = query.size()
+        skv = key.size(0)
+        hnv = value.size(3)
+
+        # Detach x and qr to prevent gradients of indexer from flowing back to the main model.
+        x = x.detach()
+        qr = qr.detach()
+
+        # Get a FP32 mask with -inf for masked positions.
+        if attn_mask_type is not None:
+            assert attn_mask_type == AttnMaskType.causal, 'Only causal mask is supported for now'
+            # Generate upper triangular mask with -inf above diagonal, 0 elsewhere
+            # torch.triu with diagonal=1 creates upper triangular matrix (excluding main diagonal)
+            # float_mask [sq, skv]
+            float_mask = torch.triu(
+                torch.full((sq, skv), float('-inf'), dtype=torch.float32, device=x.device),
+                diagonal=1,
+            )
+        else:
+            assert attention_mask.shape == (b, 1, sq, skv), 'attention_mask shape mismatch'
+            # [b, 1, sq, skv] -> [b, sq, skv]
+            mask = attention_mask.squeeze()
+            # float_mask [b, sq, skv]
+            float_mask = torch.zeros_like(mask, dtype=torch.float32).masked_fill(
+                mask, float('-inf')
+            )
+
+        # ===================================
+        # Get index scores and top-k indices
+        # ===================================
+        index_scores, topk_indices = self.indexer.forward_with_scores(
+            x, qr, mask=float_mask, packed_seq_params=packed_seq_params
+        )
+
+        # ===================================
+        # Run sparse attention kernel
+        # ===================================
+        output = unfused_dsa_fn(query, key, value, topk_indices, self.softmax_scale)
+
+        # ===================================
+        # Attach indexer loss
+        # ===================================
+        if self.training and torch.is_grad_enabled():
+            # Compute KL divergence loss between indexer scores and true attention scores
+            indexer_loss_coeff = getattr(self.config, 'dsa_indexer_loss_coeff', 0.0)
+            indexer_loss = compute_dsa_indexer_loss(
+                index_scores,
+                topk_indices,
+                query.detach(),
+                key.detach(),
+                self.softmax_scale,
+                indexer_loss_coeff,
+                getattr(self.config, "dsa_indexer_use_sparse_loss", False),
+                self.indexer.pg_collection,
+            )
+            # Save indexer loss for logging
+            if indexer_loss_coeff > 0:
+                DSAIndexerLossLoggingHelper.save_loss_to_tracker(
+                    loss=indexer_loss,
+                    layer_number=self.layer_number,
+                    num_layers=self.config.num_layers,
+                )
+            # Attach loss to output
+            output = DSAIndexerLossAutoScaler.apply(output, indexer_loss)
+
+        return output
diff --git a/megatron/core/transformer/multi_latent_attention.py b/megatron/core/transformer/multi_latent_attention.py
index 074523afd7b..3953d933b45 100644
--- a/megatron/core/transformer/multi_latent_attention.py
+++ b/megatron/core/transformer/multi_latent_attention.py
@@ -243,13 +243,28 @@ def forward(
         # Get the query, key and value tensors based on the type of attention -
         # self or cross attn.
         # query: [96, 1, 16, 128], key:[96, 1, 16, 128], value:[96, 1, 16, 128]
-        query, key, value = self.get_query_key_value_tensors(
-            hidden_states,
-            key_value_states,
-            position_ids,
-            packed_seq_params,
-            inference_context=inference_context,
-        )
+        if self.config.experimental_attention_variant is None:
+            query, key, value = self.get_query_key_value_tensors(
+                hidden_states,
+                key_value_states,
+                position_ids,
+                packed_seq_params,
+                inference_context=inference_context,
+            )
+        elif self.config.experimental_attention_variant == "dsa":
+            query, key, value, q_compressed, _ = self.get_query_key_value_tensors(
+                hidden_states,
+                key_value_states,
+                position_ids,
+                packed_seq_params,
+                inference_context=inference_context,
+                return_compressed_tensors=True,
+            )
+        else:
+            raise ValueError(
+                f"Unsupported experimental attention variant: "
+                f"{self.config.experimental_attention_variant}"
+            )
 
         # ===================================================
         # Adjust key, value for inference
@@ -281,14 +296,34 @@ def forward(
 
             if inference_context is None or inference_context.is_static_batching():
                 with get_fine_grained_offloading_context(self.offload_core_attention):
-                    core_attn_out = self.core_attention(
-                        query,
-                        key,
-                        value,
-                        attention_mask,
-                        packed_seq_params=packed_seq_params,
-                        attn_mask_type=attn_mask_type,
-                    )
+                    if self.config.experimental_attention_variant is None:
+                        core_attn_out = self.core_attention(
+                            query,
+                            key,
+                            value,
+                            attention_mask,
+                            packed_seq_params=packed_seq_params,
+                            attn_mask_type=attn_mask_type,
+                        )
+                    elif self.config.experimental_attention_variant == "dsa":
+                        # For dsa we need to pass in the original hidden states and the compressed
+                        # query representation.
+                        core_attn_out = self.core_attention(
+                            query,
+                            key,
+                            value,
+                            x=hidden_states,
+                            qr=q_compressed,
+                            attention_mask=attention_mask,
+                            attn_mask_type=attn_mask_type,
+                            attention_bias=None,
+                            packed_seq_params=packed_seq_params,
+                        )
+                    else:
+                        raise ValueError(
+                            f"Unsupported attention variant: "
+                            f"{self.config.experimental_attention_variant}"
+                        )
             elif self.cache_mla_latents:
                 # Dynamic batching attention kernel.
                 q, k, v = (query, key, value)
@@ -494,6 +529,7 @@ def get_query_key_value_tensors(
         inference_context=None,
         *,
         inference_params=None,
+        return_compressed_tensors=False,
     ):
         """
         Derives `query`, `key` and `value` tensors from `hidden_states`.
@@ -603,6 +639,16 @@ def get_query_key_value_tensors(
             kv_compressed = kv_compressed.squeeze(1)
             k_pos_emb = k_pos_emb.squeeze(1)
 
+        # =========================================
+        # Apply norm
+        # =========================================
+
+        if self.config.q_lora_rank is not None:
+            # q_compressed: [num_tokens, q_lora_rank]
+            q_compressed = self.q_layernorm(q_compressed)
+
+        kv_compressed = self.kv_layernorm(kv_compressed)
+
         # =========================================
         # QKV up projection and RoPE apply
         # =========================================
@@ -613,7 +659,6 @@ def qkv_up_proj_and_rope_apply_for_cached_latent_kv(
             if self.config.q_lora_rank is not None:
                 # q_compressed: [num_tokens, q_lora_rank]
                 # q: [num_tokens, n * (qk_head_dim + qk_pos_emb_head_dim)]
-                q_compressed = self.q_layernorm(q_compressed)
                 q, _ = self.linear_q_up_proj(q_compressed)
             else:
                 # q_compressed: [num_tokens, hidden_size]
@@ -623,8 +668,6 @@ def qkv_up_proj_and_rope_apply_for_cached_latent_kv(
             # q: [num_tokens, n, q_head_dim]
             q = q.view(*q.size()[:-1], self.num_attention_heads_per_partition, self.q_head_dim)
 
-            kv_compressed = self.kv_layernorm(kv_compressed)
-
             # [num_tokens, qk_pos_emb_head_dim] -> [num_tokens, 1, qk_pos_emb_head_dim]
             k_pos_emb = torch.unsqueeze(k_pos_emb, -2)
 
@@ -688,7 +731,6 @@ def qkv_up_proj_and_rope_apply(q_compressed, kv_compressed, k_pos_emb, rotary_po
             if self.config.q_lora_rank is not None:
                 # q_compressed: [num_tokens, q_lora_rank]
                 # q: [num_tokens, n * (qk_head_dim + qk_pos_emb_head_dim)]
-                q_compressed = self.q_layernorm(q_compressed)
                 q, _ = self.linear_q_up_proj(q_compressed)
             else:
                 # q_compressed: [num_tokens, hidden_size]
@@ -698,8 +740,6 @@ def qkv_up_proj_and_rope_apply(q_compressed, kv_compressed, k_pos_emb, rotary_po
             # q: [num_tokens, n, q_head_dim]
             q = q.view(*q.size()[:-1], self.num_attention_heads_per_partition, self.q_head_dim)
 
-            kv_compressed = self.kv_layernorm(kv_compressed)
-
             # kv: [num_tokens, n * (qk_head_dim + v_head_dim)]
             kv, _ = self.linear_kv_up_proj(kv_compressed)
 
@@ -824,7 +864,10 @@ def qkv_up_proj_and_rope_apply(q_compressed, kv_compressed, k_pos_emb, rotary_po
                     q_compressed, kv_compressed, k_pos_emb, rotary_pos_emb
                 )
 
-        return query, key, value
+        if return_compressed_tensors:
+            return query, key, value, q_compressed, kv_compressed
+        else:
+            return query, key, value
 
     def uncompress_kv_from_cache(self, kv_cached):
         """
diff --git a/megatron/core/transformer/transformer_config.py b/megatron/core/transformer/transformer_config.py
index cc714e9ac15..a3a16754977 100644
--- a/megatron/core/transformer/transformer_config.py
+++ b/megatron/core/transformer/transformer_config.py
@@ -233,11 +233,14 @@ class TransformerConfig(ModelParallelConfig):
     16 SMs can generally achieve good bandwidth."""
 
     ####################
-    # linear attention
+    # attention variant
     ####################
-    linear_attention_type: Optional[str] = None
-    """Type of linear attention to use. Currently support gated_delta_net."""
+    experimental_attention_variant: Optional[str] = None
+    """Type of attention variant to use. Currently support gated_delta_net and dsa."""
 
+    ####################
+    # attention variant: gated_delta_net
+    ####################
     linear_attention_freq: Optional[Union[int, List[int]]] = None
     """Frequency between LA (linear attention) layers 
     and SDPA (scaled dot-product attention) layers.
@@ -260,6 +263,25 @@ class TransformerConfig(ModelParallelConfig):
     linear_num_value_heads: Optional[int] = None
     """Number of value and gate heads for the gated delta net."""
 
+    ####################
+    # attention variant: dsa
+    ####################
+    dsa_indexer_n_heads: Optional[int] = None
+    """Number of DSA indexer heads."""
+
+    dsa_indexer_head_dim: Optional[int] = None
+    """Dimension per DSA indexer head."""
+
+    dsa_indexer_topk: Optional[int] = None
+    """Number of top-k tokens to select in DSA indexer."""
+
+    dsa_indexer_loss_coeff: Optional[float] = None
+    """Coefficient for the DSA indexer KL divergence loss. Set to 0 to disable indexer loss."""
+
+    dsa_indexer_use_sparse_loss: Optional[bool] = None
+    """Whether to use sparse DSA indexer loss. If True, the indexer loss will be computed using the
+    top-k indices."""
+
     ####################
     # initialization
     ####################
@@ -855,17 +877,12 @@ def __post_init__(self):
                 f"tensor_model_parallel_size ({self.tensor_model_parallel_size})."
             )
 
-        if self.linear_attention_type is not None:
-            supported_la_types = ["gated_delta_net"]
-            assert self.linear_attention_type in supported_la_types, (
-                f"linear_attention_type ({self.linear_attention_type}) only support"
-                f" one of {supported_la_types}."
-            )
+        if self.experimental_attention_variant in ["gated_delta_net"]:
             assert (
                 self.linear_attention_freq is not None
             ), f"linear_attention_freq must be set for linear attention."
 
-            if self.linear_attention_type == "gated_delta_net":
+            if self.experimental_attention_variant == "gated_delta_net":
                 # Check required parameters
                 assert (
                     self.linear_conv_kernel_dim is not None
@@ -900,6 +917,11 @@ def __post_init__(self):
                     f"Gated delta net does not support context parallel for now,"
                     f" but got {self.context_parallel_size=}."
                 )
+        elif self.experimental_attention_variant == "dsa":
+            assert (
+                self.context_parallel_size == 1
+            ), "Currently context parallelism is not supported by DSAttention!"
+            assert not self.apply_rope_fusion, "RoPE fusion is not supported for DSAttention"
 
         if self.fp8:
             # cannot support first last layer bf16 with delayed scaling
diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py
index 15576e2ceac..0cf2d006863 100644
--- a/megatron/training/arguments.py
+++ b/megatron/training/arguments.py
@@ -69,7 +69,7 @@ def add_megatron_arguments(parser: argparse.ArgumentParser):
     parser = _add_vision_args(parser)
     parser = _add_moe_args(parser)
     parser = _add_mla_args(parser)
-    parser = _add_linear_attention_args(parser)
+    parser = _add_experimental_attention_variant_args(parser)
     parser = _add_heterogeneous_args(parser)
     parser = _add_logging_args(parser)
     parser = _add_straggler_detector_args(parser)
@@ -1194,13 +1194,21 @@ def validate_args(args, defaults={}):
             args.no_load_rng = True
             print('Warning: disabling --no-load-rng for upcycling.')
 
+    if args.linear_attention_type is not None:
+        print_rank_0(
+            '--linear-attention-type is deprecated, use --experimental-attention-variant instead.',
+            args.rank,
+        )
+        args.experimental_attention_variant = args.linear_attention_type
+        del args.linear_attention_type
+
     # Muon optimizercheck
     if 'muon' in args.optimizer:
         assert not args.use_distributed_optimizer, "Muon optimizer does not support distributed optimizer for now."
         assert not args.use_torch_fsdp2, "Muon optimizer does not support Torch-FSDP2 for now."
         assert not args.use_megatron_fsdp, "Muon optimizer does not support Megatron-FSDP for now."
         assert args.ckpt_format in ["torch", "torch_dist"], "Muon optimizer supports torch and torch_dist checkpoint format."
-        assert args.linear_attention_type is None, "Muon optimizer does not support linear attention type for now."
+        assert args.experimental_attention_variant is None, "Muon optimizer does not support attention variant for now."
         assert not args.attention_output_gate, "Muon optimizer does not support attention output gate for now."
 
     # Optimizer CPU offload check
@@ -3361,10 +3369,14 @@ def _add_mla_args(parser):
 
     return parser
 
-def _add_linear_attention_args(parser):
-    group = parser.add_argument_group(title="la")
+def _add_experimental_attention_variant_args(parser):
+    group = parser.add_argument_group(title="experimental_attention_variant")
+    group.add_argument('--experimental-attention-variant', default=None, choices=['gated_delta_net', 'dsa'], type=str,
+                       help='Type of attention variant to use. Currently support gated_delta_net and dsa.')
+
+    # Linear attention
     group.add_argument('--linear-attention-type', default=None, choices=['gated_delta_net'], type=str,
-                       help='Type of linear attention to use. Currently support gated_delta_net.')
+                       help='(Deprecated, use --experimental-attention-variant instead) Type of linear attention to use. Currently support gated_delta_net.')
     group.add_argument('--linear-attention-freq', type=la_freq_type, default=None,
                        help='Frequency between LA (linear attention) layers and'
                             ' SDPA (scaled dot-product attention) layers. Accepts either: '
@@ -3384,6 +3396,19 @@ def _add_linear_attention_args(parser):
                        help='Number of query and key heads for the gated delta net.')
     group.add_argument('--linear-num-value-heads', default=32, type=int,
                        help='Number of value and gate heads for the gated delta net.')
+
+    # DSA
+    group.add_argument('--dsa-indexer-n-heads', default=None, type=int,
+                       help='Number of indexer heads for sparse attention. If not set, defaults to num-attention-heads.')
+    group.add_argument('--dsa-indexer-head-dim', default=None, type=int,
+                       help='Dimension per indexer head for sparse attention. If not set, defaults to kv-channels.')
+    group.add_argument('--dsa-indexer-topk', default=None, type=int,
+                       help='Number of top-k tokens to select in sparse attention indexer.')
+    group.add_argument('--dsa-indexer-loss-coeff', default=0.0, type=float,
+                       help='Coefficient for the indexer KL divergence loss. Set to 0 to disable indexer loss.')
+    group.add_argument('--dsa-indexer-use-sparse-loss', action='store_true',
+                       help='Use sparse indexer loss. If set, the indexer loss will be computed using the top-k indices.')
+
     return parser
 
 def _add_heterogeneous_args(parser):
diff --git a/megatron/training/training.py b/megatron/training/training.py
index 555cc0ecfee..e88b9839d28 100644
--- a/megatron/training/training.py
+++ b/megatron/training/training.py
@@ -92,6 +92,7 @@
 from megatron.core.optimizer_param_scheduler import OptimizerParamScheduler
 from megatron.core.transformer.moe import upcycling_utils
 from megatron.core.transformer.moe.moe_utils import track_moe_metrics
+from megatron.core.transformer.experimental_attention_variant.dsa import DSAIndexerLossLoggingHelper
 from megatron.core.transformer.multi_token_prediction import MTPLossLoggingHelper
 from megatron.core.parallel_state import (
     destroy_global_memory_buffer,
@@ -376,7 +377,8 @@ def transformer_flops():
                 )
             )
 
-        if args.linear_attention_type is not None:
+        linear_attention_variants = ["gated_delta_net"]
+        if args.experimental_attention_variant in linear_attention_variants:
             # Calculate number of dense and MoE Transformer MLPs.
             if isinstance(args.linear_attention_freq, int):
                 linear_attention_pattern = [
@@ -401,7 +403,7 @@ def transformer_flops():
             num_linear_attention_layers = sum(linear_attention_pattern)
             num_standard_attention_layers = num_layers - num_linear_attention_layers
 
-            if args.linear_attention_type == "gated_delta_net":
+            if args.experimental_attention_variant == "gated_delta_net":
                 # Calculate the FLOPs for the gated delta net attention.
                 qk_head_dim = args.linear_key_head_dim
                 v_head_dim = args.linear_value_head_dim
@@ -1699,6 +1701,16 @@ def training_log(
         MTPLossLoggingHelper.track_mtp_metrics(
             mtp_loss_scale, iteration, writer, wandb_writer, total_loss_dict
         )
+    # Track sparse attention indexer loss
+    if args.dsa_indexer_loss_coeff is not None and args.dsa_indexer_loss_coeff > 0:
+        indexer_loss_scale = 1 / get_num_microbatches()
+        DSAIndexerLossLoggingHelper.track_indexer_metrics(
+            loss_scale=indexer_loss_scale,
+            iteration=iteration,
+            writer=writer,
+            wandb_writer=wandb_writer,
+            total_loss_dict=total_loss_dict,
+        )
     if iteration % args.log_interval == 0:
         if args.record_memory_history and (is_last_rank() or torch.distributed.get_backend() == 'fake'):
             snapshot = torch.cuda.memory._snapshot()
diff --git a/tests/unit_tests/ssm/test_gated_delta_net.py b/tests/unit_tests/ssm/test_gated_delta_net.py
index dbf8d203634..89a185e3755 100644
--- a/tests/unit_tests/ssm/test_gated_delta_net.py
+++ b/tests/unit_tests/ssm/test_gated_delta_net.py
@@ -88,7 +88,7 @@ def setup_method(self, tp_size, sp, cp_size):
             context_parallel_size=cp_size,
         )
         gdn_submodules = get_gpt_layer_with_transformer_engine_spec(
-            linear_attention_type="gated_delta_net", normalization="RMSNorm"
+            experimental_attention_variant="gated_delta_net", normalization="RMSNorm"
         ).submodules.self_attention.submodules
 
         self.gdn = GatedDeltaNet(
@@ -157,7 +157,7 @@ def test_parallel_gated_delta_net_correctness(tmp_path_dist_ckpt, tp, sp, cp):
     # Model initialization function
     def initialize_gpt_model(config, pre_process=True, post_process=True, vp_stage=None):
         layer_spec = get_gpt_layer_with_transformer_engine_spec(
-            linear_attention_type="gated_delta_net", normalization=normalization
+            experimental_attention_variant="gated_delta_net", normalization=normalization
         )
         gpt_model = GPTModel(
             config=config,
diff --git a/tests/unit_tests/transformer/test_attention_variant_dsa.py b/tests/unit_tests/transformer/test_attention_variant_dsa.py
new file mode 100644
index 00000000000..bd106aa6f0e
--- /dev/null
+++ b/tests/unit_tests/transformer/test_attention_variant_dsa.py
@@ -0,0 +1,1271 @@
+# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+
+from unittest.mock import patch
+
+import pytest
+import torch
+
+import megatron.core.parallel_state as parallel_state
+from megatron.core.models.gpt.gpt_layer_specs import get_gpt_layer_with_transformer_engine_spec
+from megatron.core.process_groups_config import ProcessGroupCollection
+from megatron.core.tensor_parallel.random import model_parallel_cuda_manual_seed
+from megatron.core.transformer import TransformerConfig
+from megatron.core.transformer.enums import AttnMaskType
+from megatron.core.transformer.experimental_attention_variant.dsa import (
+    DSAIndexer,
+    DSAIndexerLossAutoScaler,
+    DSAIndexerSubmodules,
+    DSAttention,
+    DSAttentionSubmodules,
+    compute_dsa_indexer_loss,
+    rotate_activation,
+)
+from megatron.core.transformer.transformer_config import MLATransformerConfig
+from tests.unit_tests.test_utilities import Utils
+
+try:
+    from fast_hadamard_transform import hadamard_transform as _hadamard_transform
+
+    HAVE_HADAMARD = True
+except ImportError:
+    HAVE_HADAMARD = False
+    _hadamard_transform = None
+
+
+def mock_hadamard_transform(x: torch.Tensor, scale: float = 1.0) -> torch.Tensor:
+    """Mock implementation of hadamard_transform for testing without the library installed.
+
+    This is a simple identity-like transformation that preserves shape and applies scaling.
+    """
+    return x * scale
+
+
+@pytest.fixture(autouse=True)
+def patch_hadamard_if_needed():
+    """Automatically patch hadamard_transform in dsa module if not installed."""
+    if not HAVE_HADAMARD:
+        with patch(
+            'megatron.core.transformer.experimental_attention_variant.dsa.hadamard_transform',
+            mock_hadamard_transform,
+        ):
+            yield
+    else:
+        yield
+
+
+class TestRotateActivation:
+    """Test rotate_activation function."""
+
+    @pytest.fixture(scope='function', autouse=True)
+    def setup_method(self):
+        Utils.initialize_model_parallel(
+            tensor_model_parallel_size=1, pipeline_model_parallel_size=1
+        )
+        yield
+        Utils.destroy_model_parallel()
+
+    def test_rotate_activation_shape(self):
+        """Test that rotate_activation preserves shape."""
+        batch_size = 2
+        seq_len = 16
+        hidden_size = 128
+
+        x = torch.randn(seq_len, batch_size, hidden_size, dtype=torch.bfloat16).cuda()
+        output = rotate_activation(x)
+
+        assert output.shape == x.shape
+        assert output.dtype == torch.bfloat16
+
+    def test_rotate_activation_dtype_check(self):
+        """Test that rotate_activation only accepts bfloat16."""
+        x = torch.randn(16, 2, 128, dtype=torch.float32).cuda()
+
+        with pytest.raises(AssertionError, match="only support bf16"):
+            rotate_activation(x)
+
+
+@pytest.mark.parametrize("seqlen_and_topk", [[16, 32], [64, 32]])
+class TestComputeDSAIndexerLoss:
+    """Test compute_dsa_indexer_loss function."""
+
+    @pytest.fixture(scope='function', autouse=True)
+    def setup_method(self):
+        Utils.initialize_model_parallel(
+            tensor_model_parallel_size=1, pipeline_model_parallel_size=1
+        )
+        self.pg_collection = ProcessGroupCollection.use_mpu_process_groups(required_pgs=['tp'])
+        yield
+        Utils.destroy_model_parallel()
+
+    @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
+    def test_dsa_indexer_loss_shape(self, seqlen_and_topk):
+        """Test that indexer loss returns a scalar."""
+        batch_size = 2
+        seqlen = seqlen_and_topk[0]
+        num_heads = 4
+        head_dim = 128
+        index_topk = seqlen_and_topk[1]
+
+        # Create dummy index scores
+        index_scores = torch.randn(batch_size, seqlen, seqlen, dtype=torch.float32).cuda()
+
+        # Apply causal mask to index_scores before computing topk
+        causal_mask = torch.triu(
+            torch.full(
+                (seqlen, seqlen), float('-inf'), dtype=torch.float32, device=index_scores.device
+            ),
+            diagonal=1,
+        )
+        # [batch_size, seqlen, seqlen] + [seqlen, seqlen] -> [batch_size, seqlen, seqlen]
+        masked_index_scores = index_scores + causal_mask
+
+        # Get topk indices from masked index_scores
+        topk_k = min(index_topk, seqlen)
+        topk_indices = masked_index_scores.topk(topk_k, dim=-1)[1]
+
+        query = torch.randn(seqlen, batch_size, num_heads, head_dim, dtype=torch.bfloat16).cuda()
+        key = torch.randn(seqlen, batch_size, num_heads, head_dim, dtype=torch.bfloat16).cuda()
+        softmax_scale = head_dim**-0.5
+
+        loss = compute_dsa_indexer_loss(
+            index_scores=index_scores,
+            topk_indices=topk_indices,
+            query=query,
+            key=key,
+            softmax_scale=softmax_scale,
+            loss_coeff=1.0,
+            sparse_loss=False,
+            pg_collection=self.pg_collection,
+        )
+
+        assert loss.shape == torch.Size([])
+        assert loss.dtype == torch.float32
+        assert loss >= 0  # KL divergence should be non-negative
+
+    @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
+    def test_dsa_indexer_loss_sparse(self, seqlen_and_topk):
+        """Test sparse indexer loss computation."""
+        batch_size = 2
+        seqlen = seqlen_and_topk[0]
+        num_heads = 4
+        head_dim = 128
+        index_topk = seqlen_and_topk[1]
+
+        # Create dummy index scores
+        index_scores = torch.randn(batch_size, seqlen, seqlen, dtype=torch.float32).cuda()
+
+        # Apply causal mask to index_scores before computing topk
+        causal_mask = torch.triu(
+            torch.full(
+                (seqlen, seqlen), float('-inf'), dtype=torch.float32, device=index_scores.device
+            ),
+            diagonal=1,
+        )
+        # [batch_size, seqlen, seqlen] + [seqlen, seqlen] -> [batch_size, seqlen, seqlen]
+        masked_index_scores = index_scores + causal_mask
+
+        # Get topk indices from masked index_scores
+        topk_k = min(index_topk, seqlen)
+        topk_indices = masked_index_scores.topk(topk_k, dim=-1)[1]
+
+        query = torch.randn(seqlen, batch_size, num_heads, head_dim, dtype=torch.bfloat16).cuda()
+        key = torch.randn(seqlen, batch_size, num_heads, head_dim, dtype=torch.bfloat16).cuda()
+        softmax_scale = head_dim**-0.5
+
+        loss_sparse = compute_dsa_indexer_loss(
+            index_scores=index_scores,
+            topk_indices=topk_indices,
+            query=query,
+            key=key,
+            softmax_scale=softmax_scale,
+            loss_coeff=1.0,
+            sparse_loss=True,
+            pg_collection=self.pg_collection,
+        )
+
+        loss_dense = compute_dsa_indexer_loss(
+            index_scores=index_scores,
+            topk_indices=topk_indices,
+            query=query,
+            key=key,
+            softmax_scale=softmax_scale,
+            loss_coeff=1.0,
+            sparse_loss=False,
+            pg_collection=self.pg_collection,
+        )
+
+        # Sparse loss should be different from dense loss
+        if seqlen > index_topk:
+            assert loss_sparse != loss_dense
+        else:
+            assert loss_sparse == loss_dense
+        assert loss_sparse >= 0
+        assert loss_dense >= 0
+
+
+class TestDSAIndexerLossAutoScaler:
+    """Test DSAIndexerLossAutoScaler autograd function."""
+
+    @pytest.fixture(scope='function', autouse=True)
+    def setup_method(self):
+        Utils.initialize_model_parallel(
+            tensor_model_parallel_size=1, pipeline_model_parallel_size=1
+        )
+        yield
+        Utils.destroy_model_parallel()
+
+    @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
+    def test_forward_pass(self):
+        """Test that forward pass preserves output."""
+        output = torch.randn(16, 2, 128).cuda()
+        output.requires_grad_(True)
+        indexer_loss = torch.tensor(0.5).cuda()
+        indexer_loss.requires_grad_(True)
+
+        result = DSAIndexerLossAutoScaler.apply(output, indexer_loss)
+
+        assert torch.allclose(result, output, atol=0, rtol=0)
+
+    @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
+    def test_backward_pass(self):
+        """Test that backward pass triggers indexer loss backward and scales gradient correctly."""
+        output = torch.randn(16, 2, 128).cuda()
+        output.requires_grad_(True)
+
+        # Create indexer_loss with computation graph
+        # This simulates compute_dsa_indexer_loss which computes KL divergence
+        dummy_input = torch.randn(10).cuda()
+        dummy_input.requires_grad_(True)
+        indexer_loss = dummy_input.mean()
+
+        # Set loss scale
+        scale = torch.tensor(2.0).cuda()
+        DSAIndexerLossAutoScaler.set_loss_scale(scale)
+
+        # Apply the autograd function
+        result = DSAIndexerLossAutoScaler.apply(output, indexer_loss)
+
+        # Trigger backward
+        main_loss = result.sum()
+        main_loss.backward()
+
+        # Check that gradients flow back to output
+        assert output.grad is not None, "Gradient should flow back to parameters"
+
+        # Check that indexer_loss backward was triggered
+        assert dummy_input.grad is not None, "Indexer loss backward should be triggered"
+
+        # Verify the gradient is scaled correctly
+        expected_grad_per_element = scale.item() / len(dummy_input)
+        assert torch.allclose(
+            dummy_input.grad,
+            torch.full_like(dummy_input, expected_grad_per_element),
+            rtol=0,
+            atol=0,
+        ), f"Gradient should be scaled by loss scale, expected {expected_grad_per_element}, got {dummy_input.grad[0].item()}"
+
+
+@pytest.mark.parametrize("seqlen", [16, 64])
+class TestDSAIndexer:
+    """Test DSA Indexer module basic functionality with TP=1."""
+
+    @pytest.fixture(scope='function', autouse=True)
+    def setup_method(self):
+        Utils.initialize_model_parallel(
+            tensor_model_parallel_size=1, pipeline_model_parallel_size=1
+        )
+        torch.manual_seed(123)
+        model_parallel_cuda_manual_seed(123)
+
+        # Create MLA config with sparse attention parameters
+        self.index_topk = 32
+        self.config = MLATransformerConfig(
+            num_layers=2,
+            hidden_size=256,
+            num_attention_heads=16,
+            use_cpu_initialization=True,
+            bf16=True,
+            params_dtype=torch.bfloat16,
+            # MLA specific configs
+            q_lora_rank=64,
+            kv_lora_rank=64,
+            qk_head_dim=64,
+            qk_pos_emb_head_dim=32,
+            v_head_dim=64,
+            rope_type='rope',
+            rotary_base=10000,
+            rotary_percent=1.0,
+            # Sparse attention specific configs
+            dsa_indexer_n_heads=8,
+            dsa_indexer_head_dim=64,
+            dsa_indexer_topk=self.index_topk,
+        )
+
+        # Create indexer submodules spec
+        from megatron.core.extensions.transformer_engine import TELinear, TENorm
+        from megatron.core.transformer.spec_utils import ModuleSpec
+
+        indexer_submodules = DSAIndexerSubmodules(
+            linear_wq_b=ModuleSpec(module=TELinear),
+            linear_wk=ModuleSpec(module=TELinear),
+            k_norm=ModuleSpec(module=TENorm),
+            linear_weights_proj=ModuleSpec(module=TELinear),
+        )
+
+        self.pg_collection = ProcessGroupCollection.use_mpu_process_groups(
+            required_pgs=['tp', 'cp']
+        )
+        self.indexer = DSAIndexer(self.config, indexer_submodules, self.pg_collection)
+
+        yield
+        Utils.destroy_model_parallel()
+
+    def test_dsa_indexer_constructor(self, seqlen):
+        """Test indexer initialization."""
+        assert isinstance(self.indexer, DSAIndexer)
+        assert self.indexer.hidden_size == 256
+        assert self.indexer.index_n_heads == 8
+        assert self.indexer.index_head_dim == 64
+        assert self.indexer.index_topk == 32
+
+    @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
+    def test_dsa_indexer_forward(self, seqlen):
+        """Test indexer forward pass."""
+        batch_size = 2
+
+        self.indexer.cuda()
+
+        # Create input tensors
+        x = torch.randn(seqlen, batch_size, self.config.hidden_size, dtype=torch.bfloat16).cuda()
+        qr = torch.randn(seqlen, batch_size, self.config.q_lora_rank, dtype=torch.bfloat16).cuda()
+
+        # Forward pass
+        topk_indices = self.indexer(x, qr)
+
+        # Check output shape
+        assert topk_indices.shape == (batch_size, seqlen, min(self.config.dsa_indexer_topk, seqlen))
+        assert topk_indices.dtype == torch.long
+        assert torch.all((topk_indices >= 0) & (topk_indices < seqlen))
+        # Make sure no duplicate indices are selected
+        assert torch.all(
+            torch.sort(topk_indices, dim=-1).values[:, :, 1:]
+            != torch.sort(topk_indices, dim=-1).values[:, :, :-1]
+        )
+
+    @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
+    def test_dsa_indexer_forward_with_scores(self, seqlen):
+        """Test indexer forward pass with scores."""
+        batch_size = 2
+
+        self.indexer.cuda()
+
+        # Create input tensors
+        x = torch.randn(seqlen, batch_size, self.config.hidden_size, dtype=torch.bfloat16).cuda()
+        qr = torch.randn(seqlen, batch_size, self.config.q_lora_rank, dtype=torch.bfloat16).cuda()
+
+        # Forward pass with scores
+        index_scores, topk_indices = self.indexer.forward_with_scores(x, qr)
+
+        # Check output shapes
+        assert index_scores.shape == (batch_size, seqlen, seqlen)
+        assert topk_indices.shape == (batch_size, seqlen, min(self.config.dsa_indexer_topk, seqlen))
+        assert index_scores.dtype == torch.float32
+        assert topk_indices.dtype == torch.long
+        assert torch.all((topk_indices >= 0) & (topk_indices < seqlen))
+        # Make sure no duplicate indices are selected
+        assert torch.all(
+            torch.sort(topk_indices, dim=-1).values[:, :, 1:]
+            != torch.sort(topk_indices, dim=-1).values[:, :, :-1]
+        )
+
+    @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
+    def test_dsa_indexer_with_mask(self, seqlen):
+        """Test indexer with attention mask."""
+        batch_size = 2
+
+        self.indexer.cuda()
+
+        # Create input tensors
+        x = torch.randn(seqlen, batch_size, self.config.hidden_size, dtype=torch.bfloat16).cuda()
+        qr = torch.randn(seqlen, batch_size, self.config.q_lora_rank, dtype=torch.bfloat16).cuda()
+        mask = torch.triu(
+            torch.full((batch_size, seqlen, seqlen), float('-inf'), dtype=torch.float32).cuda(),
+            diagonal=1,
+        )
+
+        # Forward pass with mask
+        index_scores, topk_indices = self.indexer.forward_with_scores(x, qr, mask=mask)
+
+        # Check that masked positions are not selected
+        # For causal mask, topk_indices[b, i, :] should all be <= i (except for the case that
+        # i < index_topk).
+        for b in range(batch_size):
+            for i in range(seqlen):
+                assert torch.all(topk_indices[b, i] <= max(self.index_topk, i))
+
+
+class TestDSAttention:
+    """Test DSAttention module basic functionality with TP=1."""
+
+    @pytest.fixture(scope='function', autouse=True)
+    def setup_method(self):
+        Utils.initialize_model_parallel(
+            tensor_model_parallel_size=1, pipeline_model_parallel_size=1
+        )
+        torch.manual_seed(123)
+        model_parallel_cuda_manual_seed(123)
+
+        # Create MLA config with sparse attention parameters
+        self.config = MLATransformerConfig(
+            num_layers=2,
+            hidden_size=256,
+            num_attention_heads=16,
+            use_cpu_initialization=True,
+            bf16=True,
+            params_dtype=torch.bfloat16,
+            # MLA specific configs
+            q_lora_rank=64,
+            kv_lora_rank=64,
+            qk_head_dim=64,
+            qk_pos_emb_head_dim=32,
+            v_head_dim=64,
+            rope_type='rope',
+            rotary_base=10000,
+            rotary_percent=1.0,
+            # Sparse attention specific configs
+            dsa_indexer_n_heads=8,
+            dsa_indexer_head_dim=64,
+            dsa_indexer_topk=32,
+            dsa_indexer_loss_coeff=1.0,
+            dsa_indexer_use_sparse_loss=False,
+        )
+
+        # Create sparse attention submodules spec
+        from megatron.core.extensions.transformer_engine import TELinear, TENorm
+        from megatron.core.transformer.spec_utils import ModuleSpec
+
+        indexer_submodules = DSAIndexerSubmodules(
+            linear_wq_b=ModuleSpec(module=TELinear),
+            linear_wk=ModuleSpec(module=TELinear),
+            k_norm=ModuleSpec(module=TENorm),
+            linear_weights_proj=ModuleSpec(module=TELinear),
+        )
+        indexer_spec = ModuleSpec(module=DSAIndexer, submodules=indexer_submodules)
+        sparse_attention_submodules = DSAttentionSubmodules(indexer=indexer_spec)
+
+        self.pg_collection = ProcessGroupCollection.use_mpu_process_groups(
+            required_pgs=['tp', 'cp']
+        )
+
+        self.sparse_attention = DSAttention(
+            config=self.config,
+            submodules=sparse_attention_submodules,
+            layer_number=1,
+            attn_mask_type=AttnMaskType.causal,
+            attention_type='self',
+            pg_collection=self.pg_collection,
+        )
+
+        yield
+        Utils.destroy_model_parallel()
+
+    def test_dsa_constructor(self):
+        """Test sparse attention initialization."""
+        assert isinstance(self.sparse_attention, DSAttention)
+        assert hasattr(self.sparse_attention, 'indexer')
+        assert isinstance(self.sparse_attention.indexer, DSAIndexer)
+
+    @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
+    def test_dsa_forward(self):
+        """Test sparse attention forward pass."""
+        seq_len = 16
+        batch_size = 2
+        num_heads = self.config.num_attention_heads
+        head_dim = self.config.hidden_size // num_heads
+
+        self.sparse_attention.cuda()
+
+        # Create input tensors [seq_len, batch, num_heads, head_dim]
+        query = (
+            torch.randn(seq_len, batch_size, num_heads, head_dim, dtype=torch.bfloat16)
+            .cuda()
+            .requires_grad_(True)
+        )
+        key = (
+            torch.randn(seq_len, batch_size, num_heads, head_dim, dtype=torch.bfloat16)
+            .cuda()
+            .requires_grad_(True)
+        )
+        value = (
+            torch.randn(seq_len, batch_size, num_heads, head_dim, dtype=torch.bfloat16)
+            .cuda()
+            .requires_grad_(True)
+        )
+
+        # Original hidden states and low-rank query
+        x = torch.randn(seq_len, batch_size, self.config.hidden_size, dtype=torch.bfloat16).cuda()
+        qr = torch.randn(seq_len, batch_size, self.config.q_lora_rank, dtype=torch.bfloat16).cuda()
+
+        # Create causal attention mask
+        attention_mask = torch.ones(batch_size, 1, seq_len, seq_len, dtype=torch.bool).cuda()
+        attention_mask = torch.tril(attention_mask)
+
+        # Forward pass
+        output = self.sparse_attention(
+            query=query,
+            key=key,
+            value=value,
+            x=x,
+            qr=qr,
+            attention_mask=attention_mask,
+            attn_mask_type=AttnMaskType.causal,
+        )
+
+        # Check output shape
+        assert output.shape == (seq_len, batch_size, self.config.hidden_size)
+        assert output.dtype == torch.bfloat16
+
+    @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
+    def test_dsa_backward(self):
+        """Test sparse attention backward pass with indexer loss."""
+        seq_len = 16
+        batch_size = 2
+        num_heads = self.config.num_attention_heads
+        head_dim = self.config.hidden_size // num_heads
+
+        self.sparse_attention.train()
+        self.sparse_attention.cuda()
+
+        # Create input tensors
+        query = (
+            torch.randn(seq_len, batch_size, num_heads, head_dim, dtype=torch.bfloat16)
+            .cuda()
+            .requires_grad_(True)
+        )
+        key = (
+            torch.randn(seq_len, batch_size, num_heads, head_dim, dtype=torch.bfloat16)
+            .cuda()
+            .requires_grad_(True)
+        )
+        value = (
+            torch.randn(seq_len, batch_size, num_heads, head_dim, dtype=torch.bfloat16)
+            .cuda()
+            .requires_grad_(True)
+        )
+
+        # Original hidden states and low-rank query
+        x = torch.randn(seq_len, batch_size, self.config.hidden_size, dtype=torch.bfloat16).cuda()
+        qr = torch.randn(seq_len, batch_size, self.config.q_lora_rank, dtype=torch.bfloat16).cuda()
+
+        # Create causal attention mask
+        attention_mask = torch.ones(batch_size, 1, seq_len, seq_len, dtype=torch.bool).cuda()
+        attention_mask = torch.tril(attention_mask)
+
+        # Forward pass
+        output = self.sparse_attention(
+            query=query,
+            key=key,
+            value=value,
+            x=x,
+            qr=qr,
+            attention_mask=attention_mask,
+            attn_mask_type=AttnMaskType.causal,
+        )
+
+        # Backward pass
+        loss = output.sum()
+        loss.backward()
+
+        # Check that gradients are computed for inputs
+        assert query.grad is not None
+        assert key.grad is not None
+        assert value.grad is not None
+
+        # Check that indexer parameters have gradients
+        for name, param in self.sparse_attention.indexer.named_parameters():
+            if param.requires_grad:
+                assert param.grad is not None, f"Indexer parameter {name} has no gradient"
+
+    @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
+    def test_dsa_topk_selection(self):
+        """Test that sparse attention correctly selects top-k indices."""
+        seq_len = 16
+        batch_size = 2
+        num_heads = self.config.num_attention_heads
+        head_dim = self.config.hidden_size // num_heads
+
+        self.sparse_attention.eval()
+        self.sparse_attention.cuda()
+
+        # Create input tensors
+        query = torch.randn(seq_len, batch_size, num_heads, head_dim, dtype=torch.bfloat16).cuda()
+        key = torch.randn(seq_len, batch_size, num_heads, head_dim, dtype=torch.bfloat16).cuda()
+        value = torch.randn(seq_len, batch_size, num_heads, head_dim, dtype=torch.bfloat16).cuda()
+
+        # Original hidden states and low-rank query
+        x = torch.randn(seq_len, batch_size, self.config.hidden_size, dtype=torch.bfloat16).cuda()
+        qr = torch.randn(seq_len, batch_size, self.config.q_lora_rank, dtype=torch.bfloat16).cuda()
+
+        # Create causal attention mask
+        attention_mask = torch.ones(batch_size, 1, seq_len, seq_len, dtype=torch.bool).cuda()
+        attention_mask = torch.tril(attention_mask)
+
+        with torch.no_grad():
+            # Get topk indices from indexer
+            _, topk_indices = self.sparse_attention.indexer.forward_with_scores(x, qr)
+
+            # Forward pass
+            output = self.sparse_attention(
+                query=query,
+                key=key,
+                value=value,
+                x=x,
+                qr=qr,
+                attention_mask=attention_mask,
+                attn_mask_type=AttnMaskType.causal,
+            )
+
+        # Check that topk_indices are valid
+        assert torch.all(topk_indices >= 0)
+        assert torch.all(topk_indices < seq_len)
+        assert topk_indices.shape[2] == min(self.config.dsa_indexer_topk, seq_len)
+
+
+# ======================================================================================
+# Tensor Parallel Consistency Tests
+# ======================================================================================
+
+
+@pytest.mark.parametrize("tensor_model_parallel_size", [2, 4, 8])
+@pytest.mark.parametrize("sequence_parallel", [False, True])
+class TestIndexerTensorParallel:
+    """Test DSA Indexer with different TP sizes and SP settings, compare with TP=1 baseline."""
+
+    def _create_config(self, sequence_parallel=False):
+        """Helper to create MLA config."""
+        # Get TP size from parallel_state
+        tensor_model_parallel_size = parallel_state.get_tensor_model_parallel_world_size()
+
+        return MLATransformerConfig(
+            num_layers=2,
+            hidden_size=256,
+            num_attention_heads=16,
+            use_cpu_initialization=True,
+            bf16=True,
+            params_dtype=torch.bfloat16,
+            tensor_model_parallel_size=tensor_model_parallel_size,
+            sequence_parallel=sequence_parallel,
+            # MLA specific configs
+            q_lora_rank=64,
+            kv_lora_rank=64,
+            qk_head_dim=64,
+            qk_pos_emb_head_dim=32,
+            v_head_dim=64,
+            rope_type='rope',
+            rotary_base=10000,
+            rotary_percent=1.0,
+            # Sparse attention specific configs
+            dsa_indexer_n_heads=8,
+            dsa_indexer_head_dim=64,
+            dsa_indexer_topk=32,
+        )
+
+    def _create_indexer(self, config, pg_collection):
+        """Helper to create indexer."""
+        from megatron.core.extensions.transformer_engine import TELinear, TENorm
+        from megatron.core.transformer.spec_utils import ModuleSpec
+
+        indexer_submodules = DSAIndexerSubmodules(
+            linear_wq_b=ModuleSpec(module=TELinear),
+            linear_wk=ModuleSpec(module=TELinear),
+            k_norm=ModuleSpec(module=TENorm),
+            linear_weights_proj=ModuleSpec(module=TELinear),
+        )
+
+        return DSAIndexer(config, indexer_submodules, pg_collection)
+
+    @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
+    def test_dsa_indexer_weight_consistency(self, tensor_model_parallel_size, sequence_parallel):
+        """Test that indexer weights are identical across ALL GPUs."""
+        Utils.initialize_model_parallel(
+            tensor_model_parallel_size=tensor_model_parallel_size, pipeline_model_parallel_size=1
+        )
+        torch.manual_seed(123)
+        model_parallel_cuda_manual_seed(123)
+
+        config = self._create_config(sequence_parallel=sequence_parallel)
+        pg_collection = ProcessGroupCollection.use_mpu_process_groups(required_pgs=['tp', 'cp'])
+        indexer = self._create_indexer(config, pg_collection).cuda()
+
+        # Check that all weights are identical across ALL ranks (not just TP group)
+        world_size = torch.distributed.get_world_size()
+        world_rank = torch.distributed.get_rank()
+
+        if world_size > 1:
+            for name, param in indexer.named_parameters():
+                # Gather weights from ALL ranks in WORLD group
+                param_list = [torch.zeros_like(param.data) for _ in range(world_size)]
+                torch.distributed.all_gather(param_list, param.data)
+
+                # All weights should be identical across all GPUs
+                for i in range(1, world_size):
+                    assert torch.allclose(
+                        param_list[0], param_list[i], rtol=0, atol=0
+                    ), f"Parameter {name} differs between rank 0 and rank {i} (world)"
+
+        Utils.destroy_model_parallel()
+
+    @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
+    def test_dsa_indexer_forward_consistency(self, tensor_model_parallel_size, sequence_parallel):
+        """Test that indexer gives consistent results across different TP sizes and SP settings."""
+        # First run with TP=1 to get baseline
+        Utils.initialize_model_parallel(
+            tensor_model_parallel_size=1, pipeline_model_parallel_size=1
+        )
+        torch.manual_seed(123)
+        model_parallel_cuda_manual_seed(123)
+
+        config_tp1 = self._create_config(sequence_parallel=False)  # TP=1 doesn't use SP
+        pg_collection_tp1 = ProcessGroupCollection.use_mpu_process_groups(required_pgs=['tp', 'cp'])
+        indexer_tp1 = self._create_indexer(config_tp1, pg_collection_tp1).cuda()
+
+        seq_len = 64
+        batch_size = 2
+
+        # Create one common input (all ranks create same input with same seed)
+        x_input = torch.randn(
+            seq_len, batch_size, config_tp1.hidden_size, dtype=torch.bfloat16
+        ).cuda()
+        qr_input = torch.randn(
+            seq_len, batch_size, config_tp1.q_lora_rank, dtype=torch.bfloat16
+        ).cuda()
+
+        # Forward pass with gradients enabled
+        index_scores_tp1, topk_indices_tp1 = indexer_tp1.forward_with_scores(x_input, qr_input)
+
+        # Backward pass
+        loss_tp1 = index_scores_tp1.sum()
+        loss_tp1.backward()
+
+        # Save gradients from TP=1
+        indexer_tp1_grads = {
+            name: param.grad.clone().cpu()
+            for name, param in indexer_tp1.named_parameters()
+            if param.grad is not None
+        }
+
+        Utils.destroy_model_parallel()
+
+        # Now run with target TP size
+        Utils.initialize_model_parallel(
+            tensor_model_parallel_size=tensor_model_parallel_size, pipeline_model_parallel_size=1
+        )
+        torch.manual_seed(123)
+        model_parallel_cuda_manual_seed(123)
+
+        config_tpn = self._create_config(sequence_parallel=sequence_parallel)
+        pg_collection_tpn = ProcessGroupCollection.use_mpu_process_groups(required_pgs=['tp', 'cp'])
+        indexer_tpn = self._create_indexer(config_tpn, pg_collection_tpn).cuda()
+
+        # Prepare input: split along seqlen if SP is enabled
+        if sequence_parallel:
+            tp_rank = parallel_state.get_tensor_model_parallel_rank()
+            seq_per_rank = seq_len // tensor_model_parallel_size
+            start_idx = tp_rank * seq_per_rank
+            end_idx = (tp_rank + 1) * seq_per_rank
+            x_tpn = x_input[start_idx:end_idx]
+            qr_tpn = qr_input[start_idx:end_idx]
+        else:
+            # No SP: all TP ranks see full input
+            x_tpn = x_input
+            qr_tpn = qr_input
+
+        # Forward pass with gradients enabled
+        index_scores_tpn, topk_indices_tpn = indexer_tpn.forward_with_scores(x_tpn, qr_tpn)
+
+        # Backward pass
+        loss_tpn = index_scores_tpn.sum()
+        loss_tpn.backward()
+
+        # Compare forward outputs
+        assert index_scores_tpn.shape == index_scores_tp1.shape
+        assert topk_indices_tpn.shape == topk_indices_tp1.shape
+
+        # Check that index scores are close (allow for floating point accumulation errors)
+        assert torch.allclose(
+            index_scores_tpn, index_scores_tp1, rtol=0, atol=0
+        ), f"Index scores mismatch between TP=1 and TP={tensor_model_parallel_size}, SP={sequence_parallel}"
+
+        # Check that topk indices are exactly the same
+        assert torch.equal(
+            topk_indices_tpn, topk_indices_tp1
+        ), f"Top-k indices mismatch between TP=1 and TP={tensor_model_parallel_size}, SP={sequence_parallel}"
+
+        # Compare gradients - indexer grads should be identical (duplicated weights)
+        for name, param in indexer_tpn.named_parameters():
+            if param.grad is not None and name in indexer_tp1_grads:
+                assert torch.allclose(
+                    param.grad.cpu(), indexer_tp1_grads[name], rtol=0, atol=0
+                ), f"Indexer gradient {name} mismatch between TP=1 and TP={tensor_model_parallel_size}, SP={sequence_parallel}"
+
+        Utils.destroy_model_parallel()
+
+    @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
+    def test_dsa_indexer_gradient_sync(self, tensor_model_parallel_size, sequence_parallel):
+        """Test that gradients are properly synchronized within TP group."""
+        Utils.initialize_model_parallel(
+            tensor_model_parallel_size=tensor_model_parallel_size, pipeline_model_parallel_size=1
+        )
+        torch.manual_seed(123)
+        model_parallel_cuda_manual_seed(123)
+
+        config = self._create_config(sequence_parallel=sequence_parallel)
+        pg_collection = ProcessGroupCollection.use_mpu_process_groups(required_pgs=['tp', 'cp'])
+        indexer = self._create_indexer(config, pg_collection).cuda()
+
+        seq_len = 64
+        batch_size = 2
+
+        # Create one common input (all ranks create same input with same seed)
+        x_input = torch.randn(seq_len, batch_size, config.hidden_size, dtype=torch.bfloat16).cuda()
+        qr_input = torch.randn(seq_len, batch_size, config.q_lora_rank, dtype=torch.bfloat16).cuda()
+
+        # Prepare input: split along seqlen if SP is enabled
+        if sequence_parallel:
+            tp_rank = parallel_state.get_tensor_model_parallel_rank()
+            tp_size = parallel_state.get_tensor_model_parallel_world_size()
+            seq_per_rank = seq_len // tp_size
+            start_idx = tp_rank * seq_per_rank
+            end_idx = (tp_rank + 1) * seq_per_rank
+            x = x_input[start_idx:end_idx]
+            qr = qr_input[start_idx:end_idx]
+        else:
+            # No SP: all TP ranks see full input
+            x = x_input
+            qr = qr_input
+
+        # Forward and backward
+        index_scores, topk_indices = indexer.forward_with_scores(x, qr)
+        loss = index_scores.sum()
+        loss.backward()
+
+        # Check that all parameters have gradients
+        for name, param in indexer.named_parameters():
+            if param.requires_grad:
+                assert param.grad is not None, f"Parameter {name} has no gradient"
+
+        # After TP sync, check that gradients are identical within TP group
+        # Note: We only check TP group because DDP sync happens separately
+        tp_size = parallel_state.get_tensor_model_parallel_world_size()
+        if tp_size > 1:
+            for name, param in indexer.named_parameters():
+                if param.requires_grad and param.grad is not None:
+                    # Gather gradients from all ranks in TP group only
+                    grad_list = [torch.zeros_like(param.grad) for _ in range(tp_size)]
+                    torch.distributed.all_gather(grad_list, param.grad, group=pg_collection.tp)
+
+                    # All gradients should be identical within TP group after sync
+                    for i in range(1, tp_size):
+                        assert torch.allclose(
+                            grad_list[0], grad_list[i], rtol=0, atol=0
+                        ), f"Gradient for {name} differs between TP rank 0 and rank {i} after TP sync"
+
+        Utils.destroy_model_parallel()
+
+
+@pytest.mark.parametrize("tensor_model_parallel_size", [2, 4])
+@pytest.mark.parametrize("sequence_parallel", [False, True])
+@pytest.mark.parametrize("use_sparse_indexer_loss", [False, True])
+class TestDSAttentionTensorParallel:
+    """Test DSAttention with different TP sizes, SP settings, and sparse indexer loss."""
+
+    def _create_config(self, sequence_parallel=False, use_sparse_indexer_loss=False):
+        """Helper to create MLA config."""
+        # Get TP size from parallel_state
+        tensor_model_parallel_size = parallel_state.get_tensor_model_parallel_world_size()
+
+        return MLATransformerConfig(
+            num_layers=2,
+            hidden_size=256,
+            num_attention_heads=16,
+            use_cpu_initialization=True,
+            bf16=True,
+            params_dtype=torch.bfloat16,
+            tensor_model_parallel_size=tensor_model_parallel_size,
+            sequence_parallel=sequence_parallel,
+            # MLA specific configs
+            q_lora_rank=64,
+            kv_lora_rank=64,
+            qk_head_dim=64,
+            qk_pos_emb_head_dim=32,
+            v_head_dim=64,
+            rope_type='rope',
+            rotary_base=10000,
+            rotary_percent=1.0,
+            # Sparse attention specific configs
+            dsa_indexer_n_heads=8,
+            dsa_indexer_head_dim=64,
+            dsa_indexer_topk=32,
+            dsa_indexer_loss_coeff=1.0,
+            dsa_indexer_use_sparse_loss=use_sparse_indexer_loss,
+        )
+
+    def _create_sparse_attention(self, config, pg_collection):
+        """Helper to create sparse attention."""
+        from megatron.core.extensions.transformer_engine import TELinear, TENorm
+        from megatron.core.transformer.spec_utils import ModuleSpec
+
+        indexer_submodules = DSAIndexerSubmodules(
+            linear_wq_b=ModuleSpec(module=TELinear),
+            linear_wk=ModuleSpec(module=TELinear),
+            k_norm=ModuleSpec(module=TENorm),
+            linear_weights_proj=ModuleSpec(module=TELinear),
+        )
+        indexer_spec = ModuleSpec(module=DSAIndexer, submodules=indexer_submodules)
+        sparse_attention_submodules = DSAttentionSubmodules(indexer=indexer_spec)
+
+        return DSAttention(
+            config=config,
+            submodules=sparse_attention_submodules,
+            layer_number=1,
+            attn_mask_type=AttnMaskType.causal,
+            attention_type='self',
+            pg_collection=pg_collection,
+        )
+
+    @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
+    def test_dsa_weight_consistency(
+        self, tensor_model_parallel_size, sequence_parallel, use_sparse_indexer_loss
+    ):
+        """Test that sparse attention indexer weights are identical across ALL GPUs."""
+        Utils.initialize_model_parallel(
+            tensor_model_parallel_size=tensor_model_parallel_size, pipeline_model_parallel_size=1
+        )
+        torch.manual_seed(123)
+        model_parallel_cuda_manual_seed(123)
+
+        config = self._create_config(
+            sequence_parallel=sequence_parallel, use_sparse_indexer_loss=use_sparse_indexer_loss
+        )
+        pg_collection = ProcessGroupCollection.use_mpu_process_groups(required_pgs=['tp', 'cp'])
+        sparse_attention = self._create_sparse_attention(config, pg_collection).cuda()
+
+        # Check that all indexer weights are identical across ALL ranks
+        world_size = torch.distributed.get_world_size()
+        world_rank = torch.distributed.get_rank()
+
+        if world_size > 1:
+            for name, param in sparse_attention.indexer.named_parameters():
+                # Gather weights from ALL ranks in WORLD group
+                param_list = [torch.zeros_like(param.data) for _ in range(world_size)]
+                torch.distributed.all_gather(param_list, param.data)
+
+                # All weights should be identical across all GPUs
+                for i in range(1, world_size):
+                    torch.testing.assert_close(param_list[0], param_list[i], rtol=0, atol=0)
+
+        Utils.destroy_model_parallel()
+
+    @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
+    def test_dsa_forward_consistency(
+        self, tensor_model_parallel_size, sequence_parallel, use_sparse_indexer_loss
+    ):
+        """Test that sparse attention gives consistent results across different TP, SP, and sparse loss settings."""
+        # First run with TP=1 to get baseline
+        Utils.initialize_model_parallel(
+            tensor_model_parallel_size=1, pipeline_model_parallel_size=1
+        )
+        torch.manual_seed(123)
+        model_parallel_cuda_manual_seed(123)
+
+        config_tp1 = self._create_config(
+            sequence_parallel=False, use_sparse_indexer_loss=use_sparse_indexer_loss
+        )  # TP=1 doesn't use SP
+        pg_collection_tp1 = ProcessGroupCollection.use_mpu_process_groups(required_pgs=['tp', 'cp'])
+        sparse_attention_tp1 = self._create_sparse_attention(config_tp1, pg_collection_tp1).cuda()
+
+        seq_len = 64
+        batch_size = 2
+        num_heads = config_tp1.num_attention_heads
+        head_dim = config_tp1.hidden_size // num_heads
+
+        # Create one common input (all ranks create same input with same seed)
+        query_input = (
+            torch.randn(seq_len, batch_size, num_heads, head_dim, dtype=torch.bfloat16)
+            .cuda()
+            .requires_grad_(True)
+        )
+        key_input = (
+            torch.randn(seq_len, batch_size, num_heads, head_dim, dtype=torch.bfloat16)
+            .cuda()
+            .requires_grad_(True)
+        )
+        value_input = (
+            torch.randn(seq_len, batch_size, num_heads, head_dim, dtype=torch.bfloat16)
+            .cuda()
+            .requires_grad_(True)
+        )
+        x_input = torch.randn(
+            seq_len, batch_size, config_tp1.hidden_size, dtype=torch.bfloat16
+        ).cuda()
+        qr_input = torch.randn(
+            seq_len, batch_size, config_tp1.q_lora_rank, dtype=torch.bfloat16
+        ).cuda()
+        attention_mask = torch.ones(batch_size, 1, seq_len, seq_len, dtype=torch.bool).cuda()
+        attention_mask = torch.tril(attention_mask)
+
+        # Forward pass with gradients enabled
+        sparse_attention_tp1.train()
+        output_tp1 = sparse_attention_tp1(
+            query=query_input,
+            key=key_input,
+            value=value_input,
+            x=x_input,
+            qr=qr_input,
+            attention_mask=attention_mask,
+            attn_mask_type=AttnMaskType.causal,
+        )
+
+        # Backward pass
+        loss_tp1 = output_tp1.sum()
+        loss_tp1.backward()
+
+        # Save gradients from TP=1
+        indexer_tp1_grads = {
+            name: param.grad.clone()
+            for name, param in sparse_attention_tp1.indexer.named_parameters()
+            if param.grad is not None
+        }
+        query_tp1_grad = query_input.grad.clone().cpu()
+        key_tp1_grad = key_input.grad.clone().cpu()
+        value_tp1_grad = value_input.grad.clone().cpu()
+
+        Utils.destroy_model_parallel()
+
+        # Now run with target TP size
+        Utils.initialize_model_parallel(
+            tensor_model_parallel_size=tensor_model_parallel_size, pipeline_model_parallel_size=1
+        )
+        torch.manual_seed(123)
+        model_parallel_cuda_manual_seed(123)
+
+        config_tpn = self._create_config(
+            sequence_parallel=sequence_parallel, use_sparse_indexer_loss=use_sparse_indexer_loss
+        )
+        pg_collection_tpn = ProcessGroupCollection.use_mpu_process_groups(required_pgs=['tp', 'cp'])
+        sparse_attention_tpn = self._create_sparse_attention(config_tpn, pg_collection_tpn).cuda()
+
+        # Create one common input (all ranks create same input with same seed)
+        query_input = torch.randn(
+            seq_len, batch_size, num_heads, head_dim, dtype=torch.bfloat16
+        ).cuda()
+        key_input = torch.randn(
+            seq_len, batch_size, num_heads, head_dim, dtype=torch.bfloat16
+        ).cuda()
+        value_input = torch.randn(
+            seq_len, batch_size, num_heads, head_dim, dtype=torch.bfloat16
+        ).cuda()
+        x_input = torch.randn(
+            seq_len, batch_size, config_tp1.hidden_size, dtype=torch.bfloat16
+        ).cuda()
+        qr_input = torch.randn(
+            seq_len, batch_size, config_tp1.q_lora_rank, dtype=torch.bfloat16
+        ).cuda()
+        attention_mask = torch.ones(batch_size, 1, seq_len, seq_len, dtype=torch.bool).cuda()
+        attention_mask = torch.tril(attention_mask)
+
+        # Prepare input: split along seqlen if SP is enabled
+        tp_rank = parallel_state.get_tensor_model_parallel_rank()
+        if sequence_parallel:
+            seq_per_rank = seq_len // tensor_model_parallel_size
+            start_idx = tp_rank * seq_per_rank
+            end_idx = (tp_rank + 1) * seq_per_rank
+            x_tpn = x_input[start_idx:end_idx]
+            qr_tpn = qr_input[start_idx:end_idx]
+        else:
+            x_tpn = x_input
+            qr_tpn = qr_input
+
+        query_input = query_input.detach()
+        key_input = key_input.detach()
+        value_input = value_input.detach()
+        head_per_rank = num_heads // tensor_model_parallel_size
+        start_head = tp_rank * head_per_rank
+        end_head = (tp_rank + 1) * head_per_rank
+        query_tpn = query_input[:, :, start_head:end_head, :].clone().requires_grad_(True)
+        key_tpn = key_input[:, :, start_head:end_head, :].clone().requires_grad_(True)
+        value_tpn = value_input[:, :, start_head:end_head, :].clone().requires_grad_(True)
+        attention_mask_tpn = attention_mask
+
+        # Forward pass with gradients enabled
+        sparse_attention_tpn.train()
+        output_tpn = sparse_attention_tpn(
+            query=query_tpn,
+            key=key_tpn,
+            value=value_tpn,
+            x=x_tpn,
+            qr=qr_tpn,
+            attention_mask=attention_mask_tpn,
+            attn_mask_type=AttnMaskType.causal,
+        )
+
+        # Backward pass
+        loss_tpn = output_tpn.sum()
+        loss_tpn.backward()
+
+        from megatron.core.tensor_parallel.mappings import gather_from_tensor_model_parallel_region
+
+        output_tpn_gathered = gather_from_tensor_model_parallel_region(
+            output_tpn, group=pg_collection_tpn.tp
+        )
+        assert output_tpn_gathered.shape == output_tp1.shape
+        assert torch.allclose(
+            output_tpn_gathered.detach(), output_tp1.detach(), rtol=0, atol=0
+        ), f"Sparse attention outputs mismatch between TP=1 and TP={tensor_model_parallel_size}, SP={sequence_parallel}, sparse_loss={use_sparse_indexer_loss}"
+
+        # 1. Check indexer gradients.
+        for name, param in sparse_attention_tpn.indexer.named_parameters():
+            if param.grad is not None and name in indexer_tp1_grads:
+                torch.testing.assert_close(
+                    param.grad, indexer_tp1_grads[name], rtol=1e-5, atol=1e-5
+                )
+
+        # 2. Query/Key/Value gradients need to be gathered along num_heads dim (dim 2) if SP is enabled
+        # Flatten last two dims: [seq_len, batch, num_heads, head_dim] -> [seq_len, batch, num_heads * head_dim]
+        sq, b, nh, hd = query_tpn.grad.shape
+        query_grad_flat = query_tpn.grad.reshape(sq, b, nh * hd)
+        key_grad_flat = key_tpn.grad.reshape(sq, b, nh * hd)
+        value_grad_flat = value_tpn.grad.reshape(sq, b, nh * hd)
+
+        # Gather along last dim
+        query_grad_gathered_flat = gather_from_tensor_model_parallel_region(
+            query_grad_flat, group=pg_collection_tpn.tp
+        )
+        key_grad_gathered_flat = gather_from_tensor_model_parallel_region(
+            key_grad_flat, group=pg_collection_tpn.tp
+        )
+        value_grad_gathered_flat = gather_from_tensor_model_parallel_region(
+            value_grad_flat, group=pg_collection_tpn.tp
+        )
+
+        # Reshape back: [seq_len, batch, num_heads * head_dim] -> [seq_len, batch, num_heads, head_dim]
+        query_tpn_grad_gathered = query_grad_gathered_flat.reshape(sq, b, num_heads, hd)
+        key_tpn_grad_gathered = key_grad_gathered_flat.reshape(sq, b, num_heads, hd)
+        value_tpn_grad_gathered = value_grad_gathered_flat.reshape(sq, b, num_heads, hd)
+
+        assert torch.allclose(
+            query_tpn_grad_gathered.cpu(), query_tp1_grad, rtol=0, atol=0
+        ), f"Query gradient mismatch between TP=1 and TP={tensor_model_parallel_size}"
+        assert torch.allclose(
+            key_tpn_grad_gathered.cpu(), key_tp1_grad, rtol=0, atol=0
+        ), f"Key gradient mismatch between TP=1 and TP={tensor_model_parallel_size}"
+        assert torch.allclose(
+            value_tpn_grad_gathered.cpu(), value_tp1_grad, rtol=0, atol=0
+        ), f"Value gradient mismatch between TP=1 and TP={tensor_model_parallel_size}"
+
+        Utils.destroy_model_parallel()
+
+    @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
+    def test_dsa_gradient_sync(
+        self, tensor_model_parallel_size, sequence_parallel, use_sparse_indexer_loss
+    ):
+        """Test that indexer gradients are properly synchronized within TP group."""
+        Utils.initialize_model_parallel(
+            tensor_model_parallel_size=tensor_model_parallel_size, pipeline_model_parallel_size=1
+        )
+        torch.manual_seed(123)
+        model_parallel_cuda_manual_seed(123)
+
+        config = self._create_config(
+            sequence_parallel=sequence_parallel, use_sparse_indexer_loss=use_sparse_indexer_loss
+        )
+        pg_collection = ProcessGroupCollection.use_mpu_process_groups(required_pgs=['tp', 'cp'])
+        sparse_attention = self._create_sparse_attention(config, pg_collection).cuda()
+        sparse_attention.train()
+
+        seq_len = 64
+        batch_size = 2
+        num_heads = config.num_attention_heads
+        head_dim = config.hidden_size // num_heads
+
+        # Create one common input (all ranks create same input with same seed)
+        query_input = torch.randn(
+            seq_len, batch_size, num_heads, head_dim, dtype=torch.bfloat16
+        ).cuda()
+        key_input = torch.randn(
+            seq_len, batch_size, num_heads, head_dim, dtype=torch.bfloat16
+        ).cuda()
+        value_input = torch.randn(
+            seq_len, batch_size, num_heads, head_dim, dtype=torch.bfloat16
+        ).cuda()
+        x_input = torch.randn(seq_len, batch_size, config.hidden_size, dtype=torch.bfloat16).cuda()
+        qr_input = torch.randn(seq_len, batch_size, config.q_lora_rank, dtype=torch.bfloat16).cuda()
+
+        # Prepare input: split along seqlen if SP is enabled
+        tp_rank = parallel_state.get_tensor_model_parallel_rank()
+        if sequence_parallel:
+            tp_size = parallel_state.get_tensor_model_parallel_world_size()
+            seq_per_rank = seq_len // tp_size
+            start_idx = tp_rank * seq_per_rank
+            end_idx = (tp_rank + 1) * seq_per_rank
+            x = x_input[start_idx:end_idx]
+            qr = qr_input[start_idx:end_idx]
+        else:
+            x = x_input
+            qr = qr_input
+
+        # query, key, value should be split along num_heads dim
+        head_per_rank = num_heads // tensor_model_parallel_size
+        start_head = tp_rank * head_per_rank
+        end_head = (tp_rank + 1) * head_per_rank
+        query = query_input[:, :, start_head:end_head, :]
+        key = key_input[:, :, start_head:end_head, :]
+        value = value_input[:, :, start_head:end_head, :]
+
+        attention_mask = torch.ones(batch_size, 1, seq_len, seq_len, dtype=torch.bool).cuda()
+        attention_mask = torch.tril(attention_mask)
+
+        query.requires_grad_(True)
+        key.requires_grad_(True)
+        value.requires_grad_(True)
+
+        # Forward and backward
+        output = sparse_attention(
+            query=query,
+            key=key,
+            value=value,
+            x=x,
+            qr=qr,
+            attention_mask=attention_mask,
+            attn_mask_type=AttnMaskType.causal,
+        )
+
+        loss = output.sum()
+        loss.backward()
+
+        # Check that gradients exist before sync
+        assert query.grad is not None
+        assert key.grad is not None
+        assert value.grad is not None
+
+        # Check that indexer parameters have gradients
+        for name, param in sparse_attention.indexer.named_parameters():
+            if param.requires_grad:
+                assert param.grad is not None, f"Indexer parameter {name} has no gradient"
+
+        # Check that indexer gradients are identical within TP group
+        tp_size = parallel_state.get_tensor_model_parallel_world_size()
+        if tp_size > 1:
+            for name, param in sparse_attention.indexer.named_parameters():
+                if param.requires_grad and param.grad is not None:
+                    # Gather gradients from all ranks in TP group only
+                    grad_list = [torch.zeros_like(param.grad) for _ in range(tp_size)]
+                    torch.distributed.all_gather(grad_list, param.grad, group=pg_collection.tp)
+
+                    # All gradients should be identical within TP group after sync
+                    for i in range(1, tp_size):
+                        assert torch.allclose(
+                            grad_list[0], grad_list[i], rtol=0, atol=0
+                        ), f"Indexer gradient for {name} differs between TP rank 0 and rank {i} after TP sync"
+
+        Utils.destroy_model_parallel()

From 71357e2ba87c012245fd018eb987a59edffcf222 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Mon, 1 Dec 2025 18:27:16 +0000
Subject: [PATCH 166/334] Revert "[Dev] feat(MoE): Refactor cuda_graph_scope -
 part2 (#2353)"

This reverts commit 92c8482e6dcd11c3666c61bb8d1f7e8d0730ed13.
---
 .../text_generation_controller.py             |   3 +-
 .../common/language_module/language_module.py |   5 +-
 megatron/core/models/gpt/gpt_model.py         |   4 +-
 megatron/core/pipeline_parallel/schedules.py  |   7 +-
 megatron/core/ssm/mamba_block.py              |   3 +-
 megatron/core/transformer/attention.py        |   4 +-
 megatron/core/transformer/cuda_graphs.py      |  47 ++-----
 megatron/core/transformer/enums.py            |  12 --
 megatron/core/transformer/moe/fused_a2a.py    |   8 --
 megatron/core/transformer/moe/moe_utils.py    |   7 +-
 .../core/transformer/moe/token_dispatcher.py  |  12 +-
 .../core/transformer/transformer_block.py     |   4 +-
 .../core/transformer/transformer_config.py    | 112 ++++++++---------
 .../core/transformer/transformer_layer.py     |  47 ++++---
 megatron/training/arguments.py                |  18 +--
 megatron/training/training.py                 |   9 +-
 .../inference/engines/test_dynamic_engine.py  |  12 +-
 tests/unit_tests/test_fp8_param.py            |  24 ++--
 .../transformer/test_cuda_graphs.py           | 117 ++++++------------
 19 files changed, 153 insertions(+), 302 deletions(-)

diff --git a/megatron/core/inference/text_generation_controllers/text_generation_controller.py b/megatron/core/inference/text_generation_controllers/text_generation_controller.py
index 6e00f58ac23..2bda1425710 100644
--- a/megatron/core/inference/text_generation_controllers/text_generation_controller.py
+++ b/megatron/core/inference/text_generation_controllers/text_generation_controller.py
@@ -29,7 +29,6 @@
 )
 from megatron.core.inference.sampling_params import SamplingParams
 from megatron.core.inference.utils import get_attention_mask, set_decode_expert_padding
-from megatron.core.transformer.enums import CudaGraphScope
 from megatron.core.transformer.moe.moe_layer import BaseMoELayer
 from megatron.core.transformer.utils import set_model_to_sequence_parallel
 from megatron.core.utils import get_asyncio_loop, get_model_config, unwrap_model
@@ -852,7 +851,7 @@ def generate_all_output_tokens_static_batch(
         # Check whether CUDA graphs are enabled
         enable_cuda_graph = (
             model_config.cuda_graph_impl == "local"
-            and CudaGraphScope.full_iteration not in model_config.cuda_graph_scope
+            and "full_iteration" not in model_config.cuda_graph_scope
         )
 
         # Pad batch tokens if necessary
diff --git a/megatron/core/models/common/language_module/language_module.py b/megatron/core/models/common/language_module/language_module.py
index 259bb716a93..de2ecfb8011 100644
--- a/megatron/core/models/common/language_module/language_module.py
+++ b/megatron/core/models/common/language_module/language_module.py
@@ -21,7 +21,7 @@
     is_vp_last_stage,
 )
 from megatron.core.process_groups_config import ProcessGroupCollection
-from megatron.core.transformer.enums import AttnBackend, CudaGraphScope
+from megatron.core.transformer.enums import AttnBackend
 from megatron.core.transformer.module import MegatronModule
 from megatron.core.transformer.transformer_config import TransformerConfig
 from megatron.core.transformer.utils import ensure_metadata_has_dp_cp_group
@@ -144,7 +144,8 @@ def compute_language_model_loss(self, labels: Tensor, logits: Tensor) -> Tensor:
                     # Use is_cg_capturable=True for full iteration CUDA graphs to avoid torch.equal checks
                     is_cg_capturable = (
                         hasattr(self.config, 'cuda_graph_scope')
-                        and CudaGraphScope.full_iteration in self.config.cuda_graph_scope
+                        and self.config.cuda_graph_scope
+                        and 'full_iteration' in self.config.cuda_graph_scope
                     )
                     if is_cg_capturable and not is_te_min_version("2.7.0"):
                         from megatron.core.utils import get_te_version
diff --git a/megatron/core/models/gpt/gpt_model.py b/megatron/core/models/gpt/gpt_model.py
index a3d1a8bfc00..ce1e8e76bd9 100644
--- a/megatron/core/models/gpt/gpt_model.py
+++ b/megatron/core/models/gpt/gpt_model.py
@@ -24,7 +24,7 @@
 from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.quantization.utils import get_quant_config_or_none
 from megatron.core.tensor_parallel import gather_from_sequence_parallel_region
-from megatron.core.transformer.enums import CudaGraphScope, ModelType
+from megatron.core.transformer.enums import ModelType
 from megatron.core.transformer.multi_token_prediction import (
     MTPLossAutoScaler,
     MTPLossLoggingHelper,
@@ -374,7 +374,7 @@ def _preprocess(
             and (
                 (
                     self.config.cuda_graph_impl == "local"
-                    and CudaGraphScope.full_iteration not in self.config.cuda_graph_scope
+                    and "full_iteration" not in self.config.cuda_graph_scope
                 )
                 or self.config.flash_decode
             )
diff --git a/megatron/core/pipeline_parallel/schedules.py b/megatron/core/pipeline_parallel/schedules.py
index 18344429c45..d0b912349b4 100644
--- a/megatron/core/pipeline_parallel/schedules.py
+++ b/megatron/core/pipeline_parallel/schedules.py
@@ -21,7 +21,6 @@
 )
 from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.transformer.cuda_graphs import create_cudagraphs
-from megatron.core.transformer.enums import CudaGraphScope
 from megatron.core.transformer.moe.router import MoEAuxLossAutoScaler
 from megatron.core.utils import (
     drain_embedding_wgrad_compute,
@@ -657,7 +656,7 @@ def forward_backward_no_pipelining(
     if (
         hasattr(config, 'cuda_graph_impl')
         and config.cuda_graph_impl == "local"
-        and CudaGraphScope.full_iteration not in config.cuda_graph_scope
+        and "full_iteration" not in config.cuda_graph_scope
     ):
         create_cudagraphs()
 
@@ -1924,7 +1923,7 @@ def pp_post_backward(input_tensor_grad, vp_stage=None):
     if (
         hasattr(config, 'cuda_graph_impl')
         and config.cuda_graph_impl == "local"
-        and CudaGraphScope.full_iteration not in config.cuda_graph_scope
+        and "full_iteration" not in config.cuda_graph_scope
     ):
         create_cudagraphs()
     nvtx_range_pop(suffix="misc")
@@ -2311,7 +2310,7 @@ def enable_grad_sync():
     if (
         hasattr(config, 'cuda_graph_impl')
         and config.cuda_graph_impl == "local"
-        and CudaGraphScope.full_iteration not in config.cuda_graph_scope
+        and "full_iteration" not in config.cuda_graph_scope
     ):
         create_cudagraphs()
 
diff --git a/megatron/core/ssm/mamba_block.py b/megatron/core/ssm/mamba_block.py
index 3201a8bfb28..1bcadd0af10 100644
--- a/megatron/core/ssm/mamba_block.py
+++ b/megatron/core/ssm/mamba_block.py
@@ -25,7 +25,6 @@
 from megatron.core.ssm.mamba_hybrid_layer_allocation import allocate_layers
 from megatron.core.tensor_parallel import get_cuda_rng_tracker
 from megatron.core.transformer import TransformerConfig
-from megatron.core.transformer.enums import CudaGraphScope
 from megatron.core.transformer.identity_op import IdentityOp
 from megatron.core.transformer.module import MegatronModule
 from megatron.core.transformer.spec_utils import ModuleSpec, build_module
@@ -295,7 +294,7 @@ def forward(
             (
                 (
                     self.config.cuda_graph_impl == "local"
-                    and CudaGraphScope.full_iteration not in self.config.cuda_graph_scope
+                    and "full_iteration" not in self.config.cuda_graph_scope
                 )
                 or self.config.flash_decode
             )
diff --git a/megatron/core/transformer/attention.py b/megatron/core/transformer/attention.py
index 5cf22d25a4b..f6f40027789 100644
--- a/megatron/core/transformer/attention.py
+++ b/megatron/core/transformer/attention.py
@@ -45,7 +45,7 @@
 from ..models.common.embeddings.yarn_rotary_pos_embedding import (
     _yarn_get_concentration_factor_from_config,
 )
-from .enums import AttnMaskType, CudaGraphScope
+from .enums import AttnMaskType
 from .transformer_config import TransformerConfig
 
 try:
@@ -829,7 +829,7 @@ def forward(
         if (
             in_decode_mode
             and self.config.cuda_graph_impl == "local"
-            and CudaGraphScope.full_iteration not in self.config.cuda_graph_scope
+            and "full_iteration" not in self.config.cuda_graph_scope
             and inference_context.is_static_batching()
         ):
             raise ValueError(f"CUDA graphs must use flash decode with static batching!")
diff --git a/megatron/core/transformer/cuda_graphs.py b/megatron/core/transformer/cuda_graphs.py
index 5b0a0333d9e..12f15ee980a 100644
--- a/megatron/core/transformer/cuda_graphs.py
+++ b/megatron/core/transformer/cuda_graphs.py
@@ -21,7 +21,6 @@
     get_all_rng_states,
     get_cuda_rng_tracker,
 )
-from megatron.core.transformer.enums import CudaGraphScope
 from megatron.core.transformer.identity_op import IdentityOp
 from megatron.core.transformer.module import GraphableMegatronModule, MegatronModule
 from megatron.core.transformer.transformer_config import TransformerConfig
@@ -1345,24 +1344,24 @@ def _layer_is_graphable(layer, config):
     from megatron.core.transformer.moe.moe_layer import MoELayer
     from megatron.core.transformer.transformer_layer import TransformerLayer
 
-    if isinstance(layer, MambaLayer) and CudaGraphScope.mamba in config.cuda_graph_scope:
+    if isinstance(layer, MambaLayer) and 'mamba' in config.cuda_graph_scope:
         # mamba layer.
         return True
     if isinstance(layer, TransformerLayer):
-        if CudaGraphScope.attn in config.cuda_graph_scope and not (
+        if 'attn' in config.cuda_graph_scope and not (
             isinstance(layer.self_attention, IdentityOp)
             and isinstance(layer.cross_attention, IdentityOp)
         ):
             # attn layer.
             return True
         if (
-            CudaGraphScope.moe in config.cuda_graph_scope
-            or CudaGraphScope.moe_router in config.cuda_graph_scope
-            or CudaGraphScope.moe_preprocess in config.cuda_graph_scope
+            'moe' in config.cuda_graph_scope
+            or 'moe_router' in config.cuda_graph_scope
+            or 'moe_preprocess' in config.cuda_graph_scope
         ) and isinstance(layer.mlp, MoELayer):
             # moe layer.
             return True
-        if CudaGraphScope.mlp in config.cuda_graph_scope and isinstance(layer.mlp, MLP):
+        if 'mlp' in config.cuda_graph_scope and isinstance(layer.mlp, MLP):
             # mlp layer.
             return True
     return False
@@ -1389,7 +1388,7 @@ def __init__(self, model, config, seq_length, micro_batch_size, optimizers=[]):
             "Setting NCCL_GRAPH_REGISTER=0 to avoid illegal memory access when using "
             "CUDA Graph with PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True."
         )
-        assert CudaGraphScope.full_iteration not in config.cuda_graph_scope, (
+        assert "full_iteration" not in config.cuda_graph_scope, (
             "full_iteration cuda graph is not supported for cuda_graph_impl=transformer_engine. "
             "Please use cuda_graph_impl=local instead."
         )
@@ -1530,7 +1529,7 @@ def get_rotary_pos_emb(transformer_module, transformer_input):
                         and not isinstance(layer.self_attention, IdentityOp)
                         and (
                             not self.config.cuda_graph_scope
-                            or CudaGraphScope.attn in self.config.cuda_graph_scope
+                            or 'attn' in self.config.cuda_graph_scope
                         )
                     )
                     if is_te_min_version("1.10.0"):
@@ -1713,33 +1712,3 @@ def cuda_graph_set_manual_hooks(self):
             model_chunk = self.model[chunk_number]
             for layer in layers:
                 layer.setup_manual_hooks(model_chunk._make_forward_pre_hook)
-
-    def delete_cuda_graphs(self):
-        """
-        Delete all CUDA graphs.
-        """
-        assert self._graphs_created, "CUDA Graphs have not been created."
-
-        graph_resettable = is_te_min_version("2.10.0")
-        graphs_reset, graphs_not_reset = 0, 0
-        for layers in self.callables_per_chunk:
-            for layer in layers:
-                for graph in layer.cuda_graphs:
-                    if graph_resettable:
-                        graph.reset()
-                        graphs_reset += 1
-                    else:
-                        graphs_not_reset += 1
-                layer.cuda_graphs = []
-                layer.cuda_graph_manual_hooks = []
-
-        log_on_each_pipeline_stage(
-            logger=logger,
-            tp_group=None,
-            dp_cp_group=None,
-            level=logging.INFO,
-            msg=f'Rank {torch.distributed.get_rank()}: '
-            f'{graphs_reset} graphs deleted with explicit reset, '
-            f'{graphs_not_reset} graphs deleted without explicit reset.',
-        )
-        self._graphs_created = False
diff --git a/megatron/core/transformer/enums.py b/megatron/core/transformer/enums.py
index d06d58d65f2..52b82029f90 100644
--- a/megatron/core/transformer/enums.py
+++ b/megatron/core/transformer/enums.py
@@ -65,15 +65,3 @@ class AttnBackend(enum.Enum):
     unfused = 3
     local = 4
     auto = 5
-
-
-class CudaGraphScope(enum.Enum):
-    """Cuda Graph Scope - defines which parts of the model to capture."""
-
-    full_iteration = 1  # Captures the entire training/inference iteration
-    attn = 2  # Captures attention layers
-    mlp = 3  # Captures MLP layers (dense layers only)
-    moe = 4  # Captures MoE layers (drop-and-pad MoE layers only)
-    moe_router = 5  # Captures MoE router part
-    moe_preprocess = 6  # Captures MoE preprocessing part (requires moe_router)
-    mamba = 7  # Captures Mamba layers
diff --git a/megatron/core/transformer/moe/fused_a2a.py b/megatron/core/transformer/moe/fused_a2a.py
index 045a93039b3..60b0b11a32c 100644
--- a/megatron/core/transformer/moe/fused_a2a.py
+++ b/megatron/core/transformer/moe/fused_a2a.py
@@ -320,14 +320,6 @@ def init_hybrid_ep_buffer(
     )
 
 
-def reset_hybrid_ep_buffer():
-    '''
-    Reset the HybridEP buffer
-    '''
-    global _hybrid_ep_buffer
-    _hybrid_ep_buffer = None
-
-
 class HybridEPDispatch(torch.autograd.Function):
     '''
     Fused dispatch operation for permute + dispatch a2a + permute using the HybridEP backend
diff --git a/megatron/core/transformer/moe/moe_utils.py b/megatron/core/transformer/moe/moe_utils.py
index 3ed31d375e2..d28cbfea3fe 100644
--- a/megatron/core/transformer/moe/moe_utils.py
+++ b/megatron/core/transformer/moe/moe_utils.py
@@ -11,7 +11,6 @@
 from megatron.core.fp8_utils import get_fp8_align_size
 from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.transformer.cuda_graphs import is_graph_capturing
-from megatron.core.transformer.enums import CudaGraphScope
 from megatron.core.transformer.transformer_config import TransformerConfig
 
 try:
@@ -1206,13 +1205,13 @@ def maybe_raise_signal(moe_layer, **kwargs):
         ):
             if (
                 step_condition == "route"
-                and CudaGraphScope.moe_router in moe_layer.config.cuda_graph_scope
-                and CudaGraphScope.moe_preprocess not in moe_layer.config.cuda_graph_scope
+                and 'moe_router' in moe_layer.config.cuda_graph_scope
+                and 'moe_preprocess' not in moe_layer.config.cuda_graph_scope
             ):
                 raise MoECudaGraphPartialCaptureSignal(moe_layer, "route", **kwargs)
             elif (
                 step_condition == "preprocess"
-                and CudaGraphScope.moe_preprocess in moe_layer.config.cuda_graph_scope
+                and 'moe_preprocess' in moe_layer.config.cuda_graph_scope
             ):
                 raise MoECudaGraphPartialCaptureSignal(moe_layer, "preprocess", **kwargs)
 
diff --git a/megatron/core/transformer/moe/token_dispatcher.py b/megatron/core/transformer/moe/token_dispatcher.py
index af8ae572adb..b2135fdb00d 100644
--- a/megatron/core/transformer/moe/token_dispatcher.py
+++ b/megatron/core/transformer/moe/token_dispatcher.py
@@ -16,7 +16,6 @@
     gather_from_sequence_parallel_region,
     reduce_scatter_to_sequence_parallel_region,
 )
-from megatron.core.transformer.enums import CudaGraphScope
 from megatron.core.transformer.moe.fused_a2a import (
     fused_combine,
     fused_dispatch,
@@ -437,7 +436,7 @@ def __init__(
         }
         if (
             config.cuda_graph_impl == "transformer_engine"
-            and CudaGraphScope.moe_preprocess in config.cuda_graph_scope
+            and 'moe_preprocess' in config.cuda_graph_scope
         ):
             self.cuda_dtoh_point = "before_ep_alltoall"
         else:
@@ -1076,13 +1075,10 @@ def combine(
             num_permuted_tokens=self.num_permuted_tokens,
             pad_multiple=self.pad_multiple,
         )
-        # Release the used handle/num_permuted_tokens which could change in each iteration.
-        # For drop_and_pad mode, we don't need to reset the num_permuted_tokens and
-        # num_dispatched_tokens, because their values never change.
+        # Release the used handle/num_permuted_tokens which could change in each iteration
         self.handle = None
-        if not self.drop_and_pad:
-            self.num_permuted_tokens = None
-            self.num_dispatched_tokens = None
+        self.num_permuted_tokens = None
+        self.num_dispatched_tokens = None
         return hidden_states
 
     def get_permuted_hidden_states_by_experts(self, hidden_states: torch.Tensor) -> torch.Tensor:
diff --git a/megatron/core/transformer/transformer_block.py b/megatron/core/transformer/transformer_block.py
index 023db1fe75a..6f69927e9e8 100755
--- a/megatron/core/transformer/transformer_block.py
+++ b/megatron/core/transformer/transformer_block.py
@@ -21,7 +21,7 @@
 )
 from megatron.core.pipeline_parallel.utils import is_vp_first_stage, is_vp_last_stage
 from megatron.core.process_groups_config import ProcessGroupCollection
-from megatron.core.transformer.enums import CudaGraphScope, LayerType
+from megatron.core.transformer.enums import LayerType
 from megatron.core.transformer.module import GraphableMegatronModule, MegatronModule
 from megatron.core.transformer.spec_utils import ModuleSpec, build_module
 from megatron.core.transformer.transformer_config import TransformerConfig
@@ -555,7 +555,7 @@ def _should_call_local_cudagraph(self, *args, **kwargs):
                 kwargs.get('inference_context') is not None
                 or kwargs.get('inference_params') is not None
             )
-            and CudaGraphScope.full_iteration in self.config.cuda_graph_scope
+            and 'full_iteration' in self.config.cuda_graph_scope
         ):
             if kwargs['inference_context'].is_static_batching():
                 using_cuda_graph = kwargs['inference_context'].is_decode_only()
diff --git a/megatron/core/transformer/transformer_config.py b/megatron/core/transformer/transformer_config.py
index a3a16754977..656699ea2a2 100644
--- a/megatron/core/transformer/transformer_config.py
+++ b/megatron/core/transformer/transformer_config.py
@@ -9,7 +9,7 @@
 
 from megatron.core.enums import Fp4Recipe, Fp8Recipe
 from megatron.core.quantization.quant_config import RecipeConfig
-from megatron.core.transformer.enums import AttnBackend, CudaGraphScope
+from megatron.core.transformer.enums import AttnBackend
 from megatron.core.transformer.pipeline_parallel_layer_layout import PipelineParallelLayerLayout
 
 from ..fusions.fused_bias_geglu import quick_gelu
@@ -733,7 +733,7 @@ class TransformerConfig(ModelParallelConfig):
     excluding optimizer) is enabled.
     "transformer_engine": capture the CUDA graph using TE make_graphed_callables()."""
 
-    cuda_graph_scope: Optional[List[CudaGraphScope]] = None
+    cuda_graph_scope: Optional[List[str]] = None
     """Determines the CUDA graphs capturing scope.
     When cuda_graph_impl is set to "transformer_engine", valid values are "attn", "mlp", "moe",
     "moe_router", "moe_preprocess", "mamba". None means the full layer.
@@ -1615,76 +1615,65 @@ def __post_init__(self):
                     'use cuda_graph_impl=transformer_engine instead.'
                 )
                 self.cuda_graph_impl = "transformer_engine"
-
         if self.cuda_graph_scope is None:
             self.cuda_graph_scope = []
-        elif not isinstance(self.cuda_graph_scope, list):
-            if isinstance(self.cuda_graph_scope, CudaGraphScope):
-                self.cuda_graph_scope = [self.cuda_graph_scope]
-            else:
-                assert isinstance(self.cuda_graph_scope, str), (
-                    "cuda_graph_scope must be a string that can be converted to a list of "
-                    f"CudaGraphScope, got {self.cuda_graph_scope}."
-                )
-                self.cuda_graph_scope = self.cuda_graph_scope.split(',')
-        if all(isinstance(scope, str) for scope in self.cuda_graph_scope):
-            # Backward compatibility for "full" scope. Now we use an empty list instead.
-            if "full" in self.cuda_graph_scope:
-                assert self.cuda_graph_scope == [
-                    "full"
-                ], "full scope cannot be used with other scopes."
-                warnings.warn(
-                    "full scope is deprecated. "
-                    "Use empty cuda_graph_scope to capture the whole layer."
-                )
-                self.cuda_graph_scope = []
-            else:
-                self.cuda_graph_scope = [CudaGraphScope[scope] for scope in self.cuda_graph_scope]
-        assert all(
-            isinstance(scope, CudaGraphScope) for scope in self.cuda_graph_scope
-        ), f"cuda_graph_scope must be a list of CudaGraphScope, got {self.cuda_graph_scope}."
-
         if self.cuda_graph_impl != "none":
             assert self.cuda_graph_impl in [
                 "transformer_engine",
                 "local",
             ], f"Invalid cuda graph implementation: {self.cuda_graph_impl}"
-
             if self.cpu_offloading:
                 raise ValueError("CUDA graphs not supported with CPU offloading.")
 
+            elif not isinstance(self.cuda_graph_scope, list):
+                assert isinstance(self.cuda_graph_scope, str), (
+                    "cuda_graph_scope must be a string or a list of strings, "
+                    f"got {self.cuda_graph_scope}."
+                )
+                self.cuda_graph_scope = [self.cuda_graph_scope]
+
             if self.cuda_graph_impl == "local":
-                assert not self.cuda_graph_scope or self.cuda_graph_scope == [
-                    CudaGraphScope.full_iteration
-                ], (
-                    "For local cuda graph implementation, the only valid value for "
-                    "cuda_graph_scope is full_iteration, or an empty list to denote layerwise "
-                    "graphs. To use other scopes, use cuda_graph_impl=transformer_engine."
+                assert not self.cuda_graph_scope or self.cuda_graph_scope == ["full_iteration"], (
+                    "For local cuda graph implementation, the only valid value "
+                    "for cuda_graph_scope is full_iteration. "
+                    "To use other scopes, use cuda_graph_impl=transformer_engine."
                 )
 
             if self.cuda_graph_impl == "transformer_engine":
-                assert CudaGraphScope.full_iteration not in self.cuda_graph_scope, (
+                assert "full_iteration" not in self.cuda_graph_scope, (
                     "To use full iteration cuda graph, please use "
-                    "cuda_graph_impl=local instead of cuda_graph_impl=transformer_engine."
+                    "cuda_graph_impl=transformer_engine instead of cuda_graph_impl=local."
                 )
+                for scope in self.cuda_graph_scope:
+                    assert scope in [
+                        'attn',
+                        'mlp',
+                        'moe',
+                        'moe_router',
+                        'moe_preprocess',
+                        'mamba',
+                    ], (
+                        "--cuda-graph-scope should be attn, mlp, moe, moe_router, moe_preprocess, "
+                        f"or mamba, got {self.cuda_graph_scope}."
+                    )
+
                 assert (
-                    CudaGraphScope.moe not in self.cuda_graph_scope
-                    or CudaGraphScope.moe_router not in self.cuda_graph_scope
+                    'moe' not in self.cuda_graph_scope or 'moe_router' not in self.cuda_graph_scope
                 ), 'cuda_graph_scope must not contain both moe and moe_router.'
-                if CudaGraphScope.moe_preprocess in self.cuda_graph_scope:
+                if 'moe_preprocess' in self.cuda_graph_scope:
                     assert (
-                        CudaGraphScope.moe_router in self.cuda_graph_scope
+                        'moe_router' in self.cuda_graph_scope
                     ), 'moe_preprocess cuda graph is only supported with moe_router cuda graph.'
                 if self.num_moe_experts is None or self.num_moe_experts <= 1:
                     assert (
-                        CudaGraphScope.moe not in self.cuda_graph_scope
-                        and CudaGraphScope.moe_router not in self.cuda_graph_scope
+                        'moe' not in self.cuda_graph_scope
+                        and 'moe_router' not in self.cuda_graph_scope
                     ), 'moe cuda graph is only supported for MoE.'
                 else:
                     if self.moe_layer_freq == 1 or (
                         isinstance(self.moe_layer_freq, list) and 0 not in self.moe_layer_freq
                     ):
-                        assert CudaGraphScope.mlp not in self.cuda_graph_scope, (
+                        assert 'mlp' not in self.cuda_graph_scope, (
                             'mlp cuda graph is only supported for dense layers, '
                             'but not found in the model.'
                         )
@@ -1693,13 +1682,13 @@ def __post_init__(self):
                         or not self.moe_pad_expert_input_to_capacity
                     ):
                         assert (
-                            CudaGraphScope.moe not in self.cuda_graph_scope
+                            'moe' not in self.cuda_graph_scope
                         ), 'moe cuda graph is only supported with drop-padding MoE.'
                         if self.moe_token_dispatcher_type == 'alltoall' and (
                             self.moe_expert_capacity_factor is not None
                             or self.moe_router_padding_for_quantization
                         ):
-                            assert CudaGraphScope.moe_preprocess not in self.cuda_graph_scope, (
+                            assert 'moe_preprocess' not in self.cuda_graph_scope, (
                                 'moe_preprocess cuda graph is not supported when there are '
                                 'DtoH copies and synchronizations in the preprocess step.'
                             )
@@ -1709,28 +1698,25 @@ def __post_init__(self):
                     raise ValueError(
                         "Full-layer CUDA graphs not supported with activation recomputation."
                     )
-                elif self.cuda_graph_scope != [CudaGraphScope.full_iteration]:
+                elif self.cuda_graph_scope != ['full_iteration']:
                     # For scoped CUDA graphs, only the non-graphed parts of the layer can be
                     # recomputed. So check if there are overlaps between the recomputed parts
                     # and the graphed parts.
-                    if CudaGraphScope.attn in self.cuda_graph_scope:
+                    if "attn" in self.cuda_graph_scope:
                         for module in self.recompute_modules:
                             if module in ['core_attn', 'mla_up_proj']:
                                 raise ValueError(
                                     f'attn cuda graph is not supported with {module} recompute.'
                                 )
-                    if (
-                        CudaGraphScope.mlp in self.cuda_graph_scope
-                        and "mlp" in self.recompute_modules
-                    ):
+                    if "mlp" in self.cuda_graph_scope and "mlp" in self.recompute_modules:
                         raise ValueError(f'mlp cuda graph is not supported with mlp recompute.')
-                    if CudaGraphScope.moe in self.cuda_graph_scope:
+                    if "moe" in self.cuda_graph_scope:
                         for module in self.recompute_modules:
                             if module in ['moe_act', 'moe', 'shared_experts']:
                                 raise ValueError(
                                     f'moe cuda graph is not supported with {module} recompute.'
                                 )
-                    if CudaGraphScope.moe_router in self.cuda_graph_scope:
+                    if "moe_router" in self.cuda_graph_scope:
                         for module in self.recompute_modules:
                             if module in ['moe', 'shared_experts']:
                                 raise ValueError(
@@ -1739,25 +1725,25 @@ def __post_init__(self):
                                 )
                     if "layernorm" in self.recompute_modules:
                         if (
-                            CudaGraphScope.attn in self.cuda_graph_scope
-                            and CudaGraphScope.mlp in self.cuda_graph_scope
+                            "attn" in self.cuda_graph_scope
+                            and "mlp" in self.cuda_graph_scope
                             and (
-                                CudaGraphScope.moe in self.cuda_graph_scope
-                                or CudaGraphScope.moe_router in self.cuda_graph_scope
+                                "moe" in self.cuda_graph_scope
+                                or "moe_router" in self.cuda_graph_scope
                             )
                         ):
                             raise ValueError(
                                 'cuda graph is not supported with layernorm recompute.'
                             )
-                        if CudaGraphScope.attn in self.cuda_graph_scope:
+                        if "attn" in self.cuda_graph_scope:
                             warnings.warn(
                                 "input_layernorm recompute is not supported with attention "
                                 "cudagraph. Will only recompute the pre_mlp_layernorm."
                             )
                         if (
-                            CudaGraphScope.mlp in self.cuda_graph_scope
-                            or CudaGraphScope.moe in self.cuda_graph_scope
-                            or CudaGraphScope.moe_router in self.cuda_graph_scope
+                            "mlp" in self.cuda_graph_scope
+                            or "moe" in self.cuda_graph_scope
+                            or "moe_router" in self.cuda_graph_scope
                         ):
                             warnings.warn(
                                 "pre_mlp_layernorm recompute is not supported with mlp/moe "
diff --git a/megatron/core/transformer/transformer_layer.py b/megatron/core/transformer/transformer_layer.py
index 3ea40577009..f89678e6216 100644
--- a/megatron/core/transformer/transformer_layer.py
+++ b/megatron/core/transformer/transformer_layer.py
@@ -16,7 +16,7 @@
 from megatron.core.packed_seq_params import PackedSeqParams
 from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.transformer.cuda_graphs import is_graph_capturing
-from megatron.core.transformer.enums import CudaGraphScope, LayerType
+from megatron.core.transformer.enums import LayerType
 from megatron.core.transformer.identity_op import IdentityFuncOp, IdentityOp
 from megatron.core.transformer.mlp import MLP
 from megatron.core.transformer.module import GraphableMegatronModule
@@ -382,21 +382,18 @@ def __init__(
             if "layernorm" in self.config.recompute_modules:
                 if not isinstance(self.input_layernorm, IdentityOp) and (
                     self.config.cuda_graph_impl == "none"
-                    or CudaGraphScope.attn not in self.config.cuda_graph_scope
+                    or 'attn' not in self.config.cuda_graph_scope
                 ):
                     self.recompute_input_layernorm = True
                     if self.config.fp8 or self.config.fp4:
                         self.self_attention.set_for_recompute_input_layernorm()
                 if not isinstance(self.pre_mlp_layernorm, IdentityOp) and (
                     self.config.cuda_graph_impl == "none"
-                    or (
-                        not self.is_moe_layer
-                        and CudaGraphScope.mlp not in self.config.cuda_graph_scope
-                    )
+                    or (not self.is_moe_layer and 'mlp' not in self.config.cuda_graph_scope)
                     or (
                         self.is_moe_layer
-                        and CudaGraphScope.moe not in self.config.cuda_graph_scope
-                        and CudaGraphScope.moe_router not in self.config.cuda_graph_scope
+                        and 'moe' not in self.config.cuda_graph_scope
+                        and 'moe_router' not in self.config.cuda_graph_scope
                     )
                 ):
                     self.recompute_pre_mlp_layernorm = True
@@ -637,13 +634,12 @@ def _forward_mlp(self, hidden_states, inference_context=None):
             and self.config.cuda_graph_impl == "transformer_engine"
             and self.training
             and is_graph_capturing()
-            and CudaGraphScope.moe_router in self.config.cuda_graph_scope
+            and 'moe_router' in self.config.cuda_graph_scope
         ):
             assert (
                 not self.recompute_pre_mlp_layernorm
             ), "Recomputation is not supported for CUDA graph."
             cudagraph_outputs = self.mlp(pre_mlp_layernorm_output)
-            nvtx_range_pop(suffix="mlp")
             return cudagraph_outputs + [residual]
         elif self.recompute_mlp:
             if self.config.fp8 or self.config.fp4:
@@ -698,7 +694,6 @@ def _forward_post_mlp(self, mlp_output_with_bias, residual):
         Returns:
             output (Tensor): Transformed hidden states of shape [s, b, h].
         """
-
         from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
             fine_grained_offloading_group_commit,
         )
@@ -762,7 +757,7 @@ def get_layer_static_inputs(self, seq_length, micro_batch_size):
         static_inputs = super().get_layer_static_inputs(seq_length, micro_batch_size)
 
         if not isinstance(self.self_attention, IdentityOp) and (
-            not self.config.cuda_graph_scope or CudaGraphScope.attn in self.config.cuda_graph_scope
+            not self.config.cuda_graph_scope or 'attn' in self.config.cuda_graph_scope
         ):
             slen_per_cp = seq_length // self.config.context_parallel_size
             static_inputs["attention_mask"] = (
@@ -781,18 +776,18 @@ def _get_submodules_under_cudagraphs(self):
             return super()._get_submodules_under_cudagraphs()
 
         submodules = []
-        if CudaGraphScope.attn in self.config.cuda_graph_scope:
+        if 'attn' in self.config.cuda_graph_scope:
             submodules += [
                 self.input_layernorm,
                 self.self_attention,
                 self.pre_cross_attn_layernorm,
                 self.cross_attention,
             ]
-        if (not self.is_moe_layer and CudaGraphScope.mlp in self.config.cuda_graph_scope) or (
-            self.is_moe_layer and CudaGraphScope.moe in self.config.cuda_graph_scope
+        if (not self.is_moe_layer and 'mlp' in self.config.cuda_graph_scope) or (
+            self.is_moe_layer and 'moe' in self.config.cuda_graph_scope
         ):
             submodules += [self.pre_mlp_layernorm, self.mlp]
-        elif self.is_moe_layer and CudaGraphScope.moe_router in self.config.cuda_graph_scope:
+        elif self.is_moe_layer and 'moe_router' in self.config.cuda_graph_scope:
             submodules += [self.pre_mlp_layernorm, self.mlp.router]
             if (
                 self.config.moe_shared_expert_intermediate_size is not None
@@ -810,7 +805,7 @@ def _te_cuda_graph_capture(self, *args, **kwargs):
         2. If context is None, it cannot be returned as output.
         """
         context = None
-        if not self.config.cuda_graph_scope or CudaGraphScope.attn in self.config.cuda_graph_scope:
+        if not self.config.cuda_graph_scope or 'attn' in self.config.cuda_graph_scope:
             hidden_states, context = self._forward_attention(*args, **kwargs)
         else:
             if len(args) > 0:
@@ -820,12 +815,12 @@ def _te_cuda_graph_capture(self, *args, **kwargs):
 
         if (
             not self.config.cuda_graph_scope
-            or (not self.is_moe_layer and CudaGraphScope.mlp in self.config.cuda_graph_scope)
+            or (not self.is_moe_layer and 'mlp' in self.config.cuda_graph_scope)
             or (
                 self.is_moe_layer
                 and (
-                    CudaGraphScope.moe in self.config.cuda_graph_scope
-                    or CudaGraphScope.moe_router in self.config.cuda_graph_scope
+                    'moe' in self.config.cuda_graph_scope
+                    or 'moe_router' in self.config.cuda_graph_scope
                 )
             )
         ):
@@ -846,7 +841,7 @@ def _te_cuda_graph_replay(self, *args, **kwargs):
         Hence, `inference_context` and `packed_seq_params` are excluded from input list.
         """
         context = None
-        if self.config.cuda_graph_scope and CudaGraphScope.attn not in self.config.cuda_graph_scope:
+        if self.config.cuda_graph_scope and 'attn' not in self.config.cuda_graph_scope:
             hidden_states, context = self._forward_attention(*args, **kwargs)
             args = (hidden_states,)
             kwargs = {}
@@ -866,13 +861,13 @@ def _te_cuda_graph_replay(self, *args, **kwargs):
 
         if (
             not self.config.cuda_graph_scope
-            or (not self.is_moe_layer and CudaGraphScope.mlp in self.config.cuda_graph_scope)
-            or (self.is_moe_layer and CudaGraphScope.moe in self.config.cuda_graph_scope)
+            or (not self.is_moe_layer and 'mlp' in self.config.cuda_graph_scope)
+            or (self.is_moe_layer and 'moe' in self.config.cuda_graph_scope)
         ):
             # CUDA Graph captures the whole MLP/MoE part. CUDA Graph output is the layer output.
             assert len(cuda_graph_output) == 1, "CUDA Graph output should be the layer output."
             output = cuda_graph_output.pop()
-        elif self.is_moe_layer and CudaGraphScope.moe_router in self.config.cuda_graph_scope:
+        elif self.is_moe_layer and 'moe_router' in self.config.cuda_graph_scope:
             # CUDA Graph partially captures the MoE.
             # The rest of the layer should go to the normal pass.
             shared_expert_output, routing_map, residual = None, None, None
@@ -887,7 +882,7 @@ def _te_cuda_graph_replay(self, *args, **kwargs):
             # Split cudagraph outputs into function outputs and attribute outputs, and
             # process them separately. Function outputs should have three tensors.
             func_output, attr_outputs = cuda_graph_output[:3], cuda_graph_output[3:]
-            if CudaGraphScope.moe_preprocess in self.config.cuda_graph_scope:
+            if 'moe_preprocess' in self.config.cuda_graph_scope:
                 hidden_states, probs, residual = func_output
                 valid_cudagraph_attrs = self.mlp.token_dispatcher.valid_cudagraph_attrs
                 assert len(attr_outputs) == len(
@@ -994,7 +989,7 @@ def _should_call_local_cudagraph(self, *args, **kwargs):
                 (kwargs.get('inference_context') is not None)
                 or (kwargs.get('inference_params') is not None)
             )
-            and CudaGraphScope.full_iteration not in self.config.cuda_graph_scope
+            and 'full_iteration' not in self.config.cuda_graph_scope
         ):
             if kwargs['inference_context'].is_static_batching():
                 using_cuda_graph = kwargs['inference_context'].is_decode_only()
diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py
index 0cf2d006863..8be173c75a0 100644
--- a/megatron/training/arguments.py
+++ b/megatron/training/arguments.py
@@ -23,7 +23,7 @@
 from megatron.core.rerun_state_machine import RerunStateMachine
 from megatron.core.transformer import MLATransformerConfig, TransformerConfig
 from megatron.core.transformer.pipeline_parallel_layer_layout import PipelineParallelLayerLayout
-from megatron.core.transformer.enums import AttnBackend, CudaGraphScope
+from megatron.core.transformer.enums import AttnBackend
 from megatron.core.transformer.heterogeneous.heterogeneous_config import (
     HeterogeneousTransformerConfig,
     MLPConfig,
@@ -772,7 +772,7 @@ def validate_args(args, defaults={}):
             if args.rank == 0:
                 print('accumulate and all-reduce gradients in fp32 for '
                       'bfloat16 data type.', flush=True)
-    if args.cuda_graph_impl == "local" and CudaGraphScope.full_iteration in args.cuda_graph_scope:
+    if args.cuda_graph_impl == "local" and "full_iteration" in args.cuda_graph_scope:
         if not args.inference_dynamic_batching:
             assert not args.check_for_nan_in_loss_and_grad, \
             "--no-check-for-nan-in-loss-and-grad should be set with full_iteration CUDA graph"
@@ -1273,15 +1273,6 @@ def validate_args(args, defaults={}):
         assert (
             args.recompute_granularity != 'full'
         ), 'recompute_granularity must not be full when CUDA Graphs are enabled.'
-    if args.cuda_graph_scope == "full" or (
-        isinstance(args.cuda_graph_scope, list) and "full" in args.cuda_graph_scope
-    ):
-        if isinstance(args.cuda_graph_scope, list):
-            assert args.cuda_graph_scope == ["full"], "full scope cannot be used with other scopes."
-        args.cuda_graph_scope = []
-        warn_rank_0(
-            'full scope is deprecated. Use empty cuda_graph_scope to capture the whole layer.'
-        )
     
     if args.multi_latent_attention:
         assert not args.group_query_attention, "Group query attention is mutually exclusive with multi latent attention."
@@ -1503,7 +1494,7 @@ def _add_inference_args(parser):
                        '"none": no CUDA graph. '
                        '"local": capture the CUDA graph using MCore local implementation. --cuda-graph-scope=\"full_iteration\" enables whole iteration CUDA graph. '
                        '"transformer_engine": capture the CUDA graph using TE make_graphed_callables().')
-    group.add_argument('--cuda-graph-scope', nargs='+', type=lambda scope: CudaGraphScope[scope] if scope != "full" else scope, default=[],
+    group.add_argument('--cuda-graph-scope', nargs='+', type=str, default=[],
                        help='Determines the CUDA graphs capturing scope. '
                        'choices: "attn", "mlp", "moe", "moe_router", "moe_preprocess", "mamba", "full_iteration". '
                        '"attn": captures operations in TransformerLayer._forward_attention(). '
@@ -1515,8 +1506,7 @@ def _add_inference_args(parser):
                        '"mamba": captures the mamba layer. '
                        '"full_iteration": captures a whole iteration. '
                        'full_iteration scope is only supported with --cuda-graph-impl=local, other scopes are only supported with --cuda-graph-impl=transformer_engine. '
-                       'If not specified, the default scope is to capture the whole Transformer layer. '
-                       'For backward compatibility, we still allow passing "full" to specify capturing the whole layer, and convert it to an empty list.')
+                       'If not specified, the default scope is to capture the whole Transformer layer.')
     group.add_argument('--use-legacy-static-engine', action='store_true', default=False,
                        help='Use legacy static engine. (Current static engine uses dynamic engine under the hood)',
                        dest='use_legacy_static_engine')
diff --git a/megatron/training/training.py b/megatron/training/training.py
index e88b9839d28..5c9de623ce5 100644
--- a/megatron/training/training.py
+++ b/megatron/training/training.py
@@ -59,7 +59,6 @@
 from megatron.training.checkpointing import checkpoint_exists
 from megatron.core.full_cuda_graph import FullCudaGraphWrapper
 from megatron.core.transformer.cuda_graphs import TECudaGraphHelper
-from megatron.core.transformer.enums import CudaGraphScope
 from megatron.core.transformer.module import Float16Module
 from megatron.core.distributed import DistributedDataParallelConfig, TorchFullyShardedDataParallelConfig
 from megatron.core.distributed import DistributedDataParallel as DDP
@@ -2278,7 +2277,7 @@ def train(
     eval_iterations = 0
     # Wrap forward_backward_func for Full iteration CUDA graph
     forward_backward_func = get_forward_backward_func()
-    if args.cuda_graph_impl == "local" and CudaGraphScope.full_iteration in args.cuda_graph_scope:
+    if args.cuda_graph_impl == "local" and "full_iteration" in args.cuda_graph_scope:
         forward_backward_func = FullCudaGraphWrapper(forward_backward_func, cuda_graph_warmup_steps=args.cuda_graph_warmup_steps)
 
     def get_e2e_base_metrics():
@@ -2627,10 +2626,6 @@ def get_e2e_base_metrics():
         if should_exit:
             break
 
-    # Destroy CUDA Graphs.
-    if args.cuda_graph_impl == "transformer_engine" and cuda_graph_helper.graphs_created():
-        cuda_graph_helper.delete_cuda_graphs()
-
     one_logger_utils.track_e2e_metrics()
 
     # Flush TensorBoard, WandB writers and one-logger.
@@ -2704,7 +2699,7 @@ def evaluate(
     eval_batch_size = args.global_batch_size
     eval_num_microbatches = eval_batch_size // (args.micro_batch_size * args.data_parallel_size)
     forward_backward_func = get_forward_backward_func()
-    if args.cuda_graph_impl == "local" and CudaGraphScope.full_iteration in args.cuda_graph_scope:
+    if args.cuda_graph_impl == "local" and "full_iteration" in args.cuda_graph_scope:
         forward_backward_func = FullCudaGraphWrapper(forward_backward_func, cuda_graph_warmup_steps=args.cuda_graph_warmup_steps)
 
     if eval_iters is None:
diff --git a/tests/unit_tests/inference/engines/test_dynamic_engine.py b/tests/unit_tests/inference/engines/test_dynamic_engine.py
index 26d3dcfbd6d..0ac4b296746 100644
--- a/tests/unit_tests/inference/engines/test_dynamic_engine.py
+++ b/tests/unit_tests/inference/engines/test_dynamic_engine.py
@@ -3,7 +3,7 @@
 import asyncio
 import random
 import types
-from dataclasses import dataclass, field
+from dataclasses import dataclass
 from typing import Dict, List, Optional, Tuple
 
 import pytest
@@ -41,7 +41,6 @@
 from megatron.core.models.mamba.mamba_model import MambaModel
 from megatron.core.tensor_parallel.random import model_parallel_cuda_manual_seed
 from megatron.core.transformer.cuda_graphs import CudaGraphManager, _CudagraphGlobalRecord
-from megatron.core.transformer.enums import CudaGraphScope
 from megatron.core.transformer.transformer_config import TransformerConfig
 from megatron.core.utils import (
     check_mamba_sequence_packing_support,
@@ -104,9 +103,7 @@ class DynamicEngineTestConfig:
     return_log_probs: bool = False
     materialize_only_last_token_logits: bool = True
     skip_prompt_log_probs: bool = False
-    cuda_graph_scope: List[CudaGraphScope] = field(
-        default_factory=lambda: [CudaGraphScope.full_iteration]
-    )
+    cuda_graph_scope: List[str] = None
     force_build_cuda_graphs: bool = False
     # If False, do not build cuda graphs in the tests, even if
     # num_cuda_graphs is set.
@@ -139,6 +136,9 @@ def __post_init__(self):
             if self.context_max_tokens_override is None:
                 self.context_max_tokens_override = self.num_requests * self.max_sequence_length
 
+        if self.cuda_graph_scope is None:
+            self.cuda_graph_scope = ["full_iteration"]
+
 
 @dataclass
 class DynamicEngineTestEnv:
@@ -514,7 +514,7 @@ def teardown_method(self, method):
     )
     @pytest.mark.parametrize("model_provider", ["gpt", "mamba"])
     @pytest.mark.parametrize("num_cuda_graphs", [None, 1, 4])
-    @pytest.mark.parametrize("cuda_graph_scope", [[], [CudaGraphScope.full_iteration]])
+    @pytest.mark.parametrize("cuda_graph_scope", [[], ["full_iteration"]])
     def test_simple(self, model_provider, num_cuda_graphs, cuda_graph_scope) -> None:
         """Simple test that runs without errors, and validates output."""
         skip_if_mamba_sequence_packing_not_available(model_provider)
diff --git a/tests/unit_tests/test_fp8_param.py b/tests/unit_tests/test_fp8_param.py
index 361698f7127..0b8d41769ec 100644
--- a/tests/unit_tests/test_fp8_param.py
+++ b/tests/unit_tests/test_fp8_param.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
 
 import contextlib
 import gc
@@ -36,10 +36,7 @@
 try:
     from transformer_engine.pytorch.tensor.utils import post_all_gather_processing
 
-    if is_te_min_version("2.10.0"):
-        cuda_graph_supported = True
-    else:
-        reason_for_no_cuda_graph = "Need newer TransformerEngine"
+    cuda_graph_supported = True
 except ImportError:
     reason_for_no_cuda_graph = "Need newer TransformerEngine"
 
@@ -68,16 +65,12 @@ class TestFP8Param:
     def setup_method(self, method):
         self.seq_length = 512
         self.micro_batch_size = 2
-        self.cuda_graph_helper = None
         os.environ['CUDA_DEVICE_MAX_CONNECTIONS'] = '1'
 
     def teardown_method(self, method):
         Utils.destroy_model_parallel()
         destroy_global_vars()
         destroy_num_microbatches_calculator()
-        if self.cuda_graph_helper is not None and self.cuda_graph_helper.graphs_created():
-            self.cuda_graph_helper.delete_cuda_graphs()
-            self.cuda_graph_helper = None
         gc.collect()
 
     def model_provider(
@@ -216,12 +209,13 @@ def _run_test_helper(
             )
         assert len(gpt_model) == 1  # Assume only one model in the model provider.
 
+        cuda_graph_helper = None
         # Hard coded to use cuda_graph_impl="transformer_engine"
         cuda_graph_impl = "transformer_engine"
         if use_cuda_graph and cuda_graph_impl == "transformer_engine":
             from megatron.core.transformer.cuda_graphs import TECudaGraphHelper
 
-            self.cuda_graph_helper = TECudaGraphHelper(
+            cuda_graph_helper = TECudaGraphHelper(
                 model=gpt_model,
                 config=gpt_model[0].config,
                 seq_length=self.seq_length,
@@ -256,13 +250,13 @@ def _run_test_helper(
             # Capture CUDA graphs after warmup if helper is provided.
             # Hard coded cuda_graph_warmup_steps = 0.
             cuda_graph_warmup_steps = 0
-            if self.cuda_graph_helper is not None and i == cuda_graph_warmup_steps:
+            if cuda_graph_helper is not None and i == cuda_graph_warmup_steps:
                 if should_disable_forward_pre_hook(args):
                     disable_forward_pre_hook(gpt_model, param_sync=False)
-                self.cuda_graph_helper.create_cudagraphs()
+                cuda_graph_helper.create_cudagraphs()
                 if should_disable_forward_pre_hook(args):
                     enable_forward_pre_hook(gpt_model)
-                    self.cuda_graph_helper.cuda_graph_set_manual_hooks()
+                    cuda_graph_helper.cuda_graph_set_manual_hooks()
 
             # For the mxfp8_param with reuse_grad_buf_for_mxfp8_param_ag and dp_ag_overlap,
             # we need to call the _copy_main_params_to_param_buffer() after the grad buffer
@@ -303,10 +297,6 @@ def _run_test_helper(
 
             loss_list.append(loss.item())
 
-        if self.cuda_graph_helper is not None and self.cuda_graph_helper.graphs_created():
-            self.cuda_graph_helper.delete_cuda_graphs()
-            self.cuda_graph_helper = None
-
         return torch.tensor(loss_list)
 
     def run_test(self, tp_size, recipe, inference: bool = False, **kwargs):
diff --git a/tests/unit_tests/transformer/test_cuda_graphs.py b/tests/unit_tests/transformer/test_cuda_graphs.py
index cee75171560..3ad0262a1cf 100644
--- a/tests/unit_tests/transformer/test_cuda_graphs.py
+++ b/tests/unit_tests/transformer/test_cuda_graphs.py
@@ -9,7 +9,6 @@
 
 import pytest
 import torch
-from transformer_engine.pytorch.fp8 import check_fp8_support
 
 from megatron.core import parallel_state
 from megatron.core.enums import ModelType
@@ -26,7 +25,6 @@
     TextGenerationController,
 )
 from megatron.core.models.gpt.gpt_layer_specs import (
-    get_gpt_decoder_block_spec,
     get_gpt_layer_local_spec,
     get_gpt_layer_with_transformer_engine_spec,
     get_gpt_mtp_block_spec,
@@ -43,8 +41,6 @@
     model_parallel_cuda_manual_seed,
 )
 from megatron.core.transformer.cuda_graphs import CudaGraphManager, _CudagraphGlobalRecord
-from megatron.core.transformer.enums import CudaGraphScope
-from megatron.core.transformer.moe.fused_a2a import reset_hybrid_ep_buffer
 from megatron.core.transformer.transformer_block import TransformerBlock
 from megatron.core.transformer.transformer_config import TransformerConfig
 from megatron.core.utils import is_fa_min_version, is_te_min_version
@@ -58,8 +54,6 @@
 from megatron.training.training import setup_model_and_optimizer
 from tests.unit_tests.test_utilities import Utils
 
-fp8_available, _ = check_fp8_support()
-
 
 class TestParallelTransformerBlockCudagraphs:
     def setup_method(self, method):
@@ -753,9 +747,6 @@ class TestPartialCudaGraph:
     def setup_method(self, method):
         self.seq_length = 512
         self.micro_batch_size = 2
-        self.tp_size = 2
-        self.cp_size = 2
-        self.cuda_graph_helper = None
         # Store original environment variable values
         self.original_env = {
             'CUDA_DEVICE_MAX_CONNECTIONS': os.environ.get('CUDA_DEVICE_MAX_CONNECTIONS'),
@@ -771,28 +762,22 @@ def teardown_method(self, method):
                 os.environ.pop(key, None)
             else:
                 os.environ[key] = value
+        Utils.destroy_model_parallel()
         destroy_global_vars()
         destroy_num_microbatches_calculator()
-        if self.cuda_graph_helper is not None and self.cuda_graph_helper.graphs_created():
-            self.cuda_graph_helper.delete_cuda_graphs()
-            self.cuda_graph_helper = None
         gc.collect()
 
     def model_provider(
         self,
         pre_process=True,
         post_process=True,
-        layer_spec_fn=get_gpt_decoder_block_spec,
+        layer_spec_fn=get_gpt_layer_with_transformer_engine_spec,
         **config_kwargs,
     ):
+        model_parallel_cuda_manual_seed(123)
         args = get_args()
         config = core_transformer_config_from_args(args)
-        transformer_layer_spec = layer_spec_fn(
-            config,
-            use_transformer_engine=True,
-            normalization=args.normalization,
-            qk_l2_norm=args.qk_l2_norm,
-        )
+        transformer_layer_spec = layer_spec_fn()
         if args.mtp_num_layers:
             mtp_block_spec = get_gpt_mtp_block_spec(
                 config, transformer_layer_spec, use_transformer_engine=True
@@ -825,17 +810,18 @@ def create_test_args(
         args.num_layers = 4
         args.mtp_num_layers = 1
         args.vocab_size = 1024
-        args.hidden_size = 512
+        args.hidden_size = 128
         args.num_attention_heads = 8
         args.max_position_embeddings = 512
-        args.global_batch_size = self.micro_batch_size * 8 // self.tp_size // self.cp_size
+        args.global_batch_size = self.micro_batch_size * 8
         args.micro_batch_size = self.micro_batch_size
         args.create_attention_mask_in_dataloader = True
         args.seq_length = self.seq_length
-        args.tensor_model_parallel_size = self.tp_size
-        args.sequence_parallel = True if self.tp_size > 1 else False
+        args.tensor_model_parallel_size = 2
+        args.sequence_parallel = True
         args.pipeline_model_parallel_size = 1
-        args.context_parallel_size = self.cp_size
+        args.context_parallel_size = 1
+        args.expert_model_parallel_size = ep_size
         args.train_iters = 10
         args.lr = 3e-5
         args.bf16 = True
@@ -850,26 +836,17 @@ def create_test_args(
         # MoE settings
         args.num_experts = 4
         args.expert_model_parallel_size = ep_size
-        args.expert_tensor_parallel_size = 1 if ep_size > 1 else self.tp_size
         args.moe_shared_expert_intermediate_size = 1024
-        args.moe_layer_freq = [0, 0, 1, 1]
+        args.moe_layer_freq = "[0,0,1,1]"
         args.moe_permute_fusion = True
         args.moe_router_fusion = True
         args.moe_router_topk = 2
-        args.moe_router_dtype = "fp32"
 
         # CUDA graph settings
         args.cuda_graph_impl = cuda_graph_impl
         args.cuda_graph_scope = cuda_graph_scope
         args.cuda_graph_warmup_steps = cuda_graph_warmup_steps
-
-        # fp8 settings
-        if fp8_available:
-            args.fp8 = "e4m3"
-            args.fp8_recipe = "tensorwise"
-            args.first_last_layers_bf16 = True
-            args.num_layers_at_start_in_bf16 = 1
-            args.num_layers_at_end_in_bf16 = 1
+        args.use_te_rng_tracker = cuda_graph_impl != "none"
 
         for key, value in kwargs.items():
             assert hasattr(args, key)
@@ -879,15 +856,15 @@ def create_test_args(
         set_global_variables(args, False)
         return args
 
-    def get_batch(self, seq_length, micro_batch_size, cp_size):
-        data = list(range(seq_length // cp_size))
+    def get_batch(self, seq_length, micro_batch_size):
+        data = list(range(seq_length))
         input_ids = torch.tensor(data, dtype=torch.int64).repeat((micro_batch_size, 1)).cuda()
         labels = 1 + torch.tensor(data, dtype=torch.int64).repeat((micro_batch_size, 1)).cuda()
         position_ids = torch.tensor(data, dtype=torch.int64).repeat((micro_batch_size, 1)).cuda()
         attention_mask = torch.ones(
-            (micro_batch_size, 1, seq_length // cp_size, seq_length), dtype=bool
+            (micro_batch_size, 1, seq_length, seq_length), dtype=bool
         ).cuda()
-        loss_mask = torch.ones(seq_length // cp_size).repeat((micro_batch_size, 1)).cuda()
+        loss_mask = torch.ones(seq_length).repeat((micro_batch_size, 1)).cuda()
         return input_ids, labels, position_ids, attention_mask, loss_mask
 
     def _run_test_helper(
@@ -900,10 +877,12 @@ def _run_test_helper(
 
         set_args(args)
         torch.manual_seed(123)
-        model_parallel_cuda_manual_seed(123)
+        Utils.initialize_model_parallel(
+            tensor_model_parallel_size=2, expert_model_parallel_size=ep_size
+        )
 
         input_ids, labels, position_ids, attention_mask, loss_mask = self.get_batch(
-            self.seq_length, self.micro_batch_size, self.cp_size
+            self.seq_length, self.micro_batch_size
         )
 
         gpt_model, optimizer, _ = setup_model_and_optimizer(
@@ -911,10 +890,13 @@ def _run_test_helper(
         )
         assert len(gpt_model) == 1  # Assume only one model in the model provider.
 
+        loss_list = []
+
+        cuda_graph_helper = None
         if cuda_graph_impl == "transformer_engine":
             from megatron.core.transformer.cuda_graphs import TECudaGraphHelper
 
-            self.cuda_graph_helper = TECudaGraphHelper(
+            cuda_graph_helper = TECudaGraphHelper(
                 model=gpt_model,
                 config=gpt_model[0].config,
                 seq_length=self.seq_length,
@@ -922,17 +904,14 @@ def _run_test_helper(
                 optimizers=[optimizer],
             )
 
-        loss_list = []
-
         for i in range(100):
             gpt_model[0].zero_grad_buffer()
             optimizer.zero_grad()
 
             # Capture CUDA graphs after warmup if helper is provided
-            if self.cuda_graph_helper is not None and i == cuda_graph_warmup_steps:
-                self.cuda_graph_helper.create_cudagraphs()
+            if cuda_graph_helper is not None and i == cuda_graph_warmup_steps:
+                cuda_graph_helper.create_cudagraphs()
 
-            gpt_model[0].set_is_first_microbatch()
             output = gpt_model[0].forward(
                 input_ids=input_ids,
                 position_ids=position_ids,
@@ -943,7 +922,7 @@ def _run_test_helper(
 
             # Check output shapes
             assert output.shape[0] == self.micro_batch_size
-            assert output.shape[1] == self.seq_length // self.cp_size
+            assert output.shape[1] == self.seq_length
 
             # Verify gradients
             loss = output.mean()
@@ -957,29 +936,16 @@ def _run_test_helper(
 
             loss_list.append(loss.item())
 
-        if self.cuda_graph_helper is not None and self.cuda_graph_helper.graphs_created():
-            self.cuda_graph_helper.delete_cuda_graphs()
-            self.cuda_graph_helper = None
-
         return torch.tensor(loss_list)
 
     @pytest.mark.skipif(
-        not (HAVE_TE and is_te_min_version("2.10.0")),
-        reason="Partial CUDA graph UT support requires TransformerEngine version >= 2.10.0",
+        not (HAVE_TE and is_te_min_version("1.14.0")),
+        reason="Partial CUDA graph support requires TransformerEngine version >= 1.14.0",
     )
     @pytest.mark.parametrize("ep_size", [1, 4])
     @pytest.mark.parametrize("moe_dropless_dispatcher", [False, True])
     @pytest.mark.parametrize("moe_dispatcher_type", ["alltoall", "deepep", "hybridep"])
     def test_moe_partial_cudagraph(self, ep_size, moe_dropless_dispatcher, moe_dispatcher_type):
-        initialize_rng_tracker(use_te_rng_tracker=True, force_reset=True)
-        Utils.initialize_model_parallel(
-            tensor_model_parallel_size=self.tp_size,
-            context_parallel_size=self.cp_size,
-            pipeline_model_parallel_size=1,
-            expert_tensor_parallel_size=1 if ep_size > 1 else self.tp_size,
-            expert_model_parallel_size=ep_size,
-        )
-
         extra_kwargs = {}
         if moe_dispatcher_type == "deepep":
             if not is_deep_ep_available():
@@ -996,28 +962,19 @@ def test_moe_partial_cudagraph(self, ep_size, moe_dropless_dispatcher, moe_dispa
         if not moe_dropless_dispatcher:
             if moe_dispatcher_type == "deepep":
                 pytest.skip("Deep EP doesn't support drop&pad MoE")
-            if moe_dispatcher_type == "hybridep" and ep_size == 1:
-                pytest.skip("Hybrid EP doesn't support drop&pad MoE with ep_size == 1")
             extra_kwargs["moe_expert_capacity_factor"] = 1.0
             extra_kwargs["moe_pad_expert_input_to_capacity"] = True
 
         loss_list_ref = self._run_test_helper(ep_size, "none", None, 0, **extra_kwargs)
         for cuda_graph_scope in [
             None,
-            [CudaGraphScope.attn],
-            [CudaGraphScope.moe],
-            [CudaGraphScope.mlp, CudaGraphScope.moe_router],
-            [
-                CudaGraphScope.attn,
-                CudaGraphScope.mlp,
-                CudaGraphScope.moe_router,
-                CudaGraphScope.moe_preprocess,
-            ],
+            ["attn"],
+            ["moe"],
+            ["mlp", "moe_router"],
+            ["attn", "mlp", "moe_router", "moe_preprocess"],
         ]:
-            if (moe_dropless_dispatcher or moe_dispatcher_type == "hybridep") and (
-                cuda_graph_scope is None or CudaGraphScope.moe in cuda_graph_scope
-            ):
-                # Dropless MoE or Hybrid EP doesn't work with "moe" scope cudagraph. Skip.
+            if moe_dropless_dispatcher and (cuda_graph_scope is None or "moe" in cuda_graph_scope):
+                # Dropless MoE doesn't work with "moe" scope cudagraph. Skip.
                 continue
             cuda_graph_warmup_steps = 3
             loss_list = self._run_test_helper(
@@ -1029,10 +986,6 @@ def test_moe_partial_cudagraph(self, ep_size, moe_dropless_dispatcher, moe_dispa
             )
             assert torch.equal(loss_list, loss_list_ref)
 
-        if moe_dispatcher_type == "hybridep":
-            reset_hybrid_ep_buffer()
-        Utils.destroy_model_parallel()
-
 
 if __name__ == "__main__":
 

From fdcb0a400c9967eb2c8d6803c7dd4fbc8d3ab12c Mon Sep 17 00:00:00 2001
From: Robin Zhang <robinz@nvidia.com>
Date: Tue, 2 Dec 2025 11:15:30 +0800
Subject: [PATCH 167/334] Replay "[Dev] feat(MoE): Refactor cuda_graph_scope -
 part2 (#2353)" (#2447)

---
 .../text_generation_controller.py             |   3 +-
 .../common/language_module/language_module.py |   5 +-
 megatron/core/models/gpt/gpt_model.py         |   4 +-
 megatron/core/pipeline_parallel/schedules.py  |   7 +-
 megatron/core/safe_globals.py                 |   3 +-
 megatron/core/ssm/mamba_block.py              |   3 +-
 megatron/core/transformer/attention.py        |   4 +-
 megatron/core/transformer/cuda_graphs.py      |  47 +++++--
 megatron/core/transformer/enums.py            |  12 ++
 megatron/core/transformer/moe/fused_a2a.py    |   8 ++
 megatron/core/transformer/moe/moe_utils.py    |   7 +-
 .../core/transformer/moe/token_dispatcher.py  |  12 +-
 .../core/transformer/transformer_block.py     |   4 +-
 .../core/transformer/transformer_config.py    | 112 +++++++++--------
 .../core/transformer/transformer_layer.py     |  47 +++----
 megatron/training/arguments.py                |  18 ++-
 megatron/training/training.py                 |   9 +-
 .../inference/engines/test_dynamic_engine.py  |  12 +-
 tests/unit_tests/test_fp8_param.py            |  24 ++--
 .../transformer/test_cuda_graphs.py           | 117 ++++++++++++------
 20 files changed, 304 insertions(+), 154 deletions(-)

diff --git a/megatron/core/inference/text_generation_controllers/text_generation_controller.py b/megatron/core/inference/text_generation_controllers/text_generation_controller.py
index 2bda1425710..6e00f58ac23 100644
--- a/megatron/core/inference/text_generation_controllers/text_generation_controller.py
+++ b/megatron/core/inference/text_generation_controllers/text_generation_controller.py
@@ -29,6 +29,7 @@
 )
 from megatron.core.inference.sampling_params import SamplingParams
 from megatron.core.inference.utils import get_attention_mask, set_decode_expert_padding
+from megatron.core.transformer.enums import CudaGraphScope
 from megatron.core.transformer.moe.moe_layer import BaseMoELayer
 from megatron.core.transformer.utils import set_model_to_sequence_parallel
 from megatron.core.utils import get_asyncio_loop, get_model_config, unwrap_model
@@ -851,7 +852,7 @@ def generate_all_output_tokens_static_batch(
         # Check whether CUDA graphs are enabled
         enable_cuda_graph = (
             model_config.cuda_graph_impl == "local"
-            and "full_iteration" not in model_config.cuda_graph_scope
+            and CudaGraphScope.full_iteration not in model_config.cuda_graph_scope
         )
 
         # Pad batch tokens if necessary
diff --git a/megatron/core/models/common/language_module/language_module.py b/megatron/core/models/common/language_module/language_module.py
index de2ecfb8011..259bb716a93 100644
--- a/megatron/core/models/common/language_module/language_module.py
+++ b/megatron/core/models/common/language_module/language_module.py
@@ -21,7 +21,7 @@
     is_vp_last_stage,
 )
 from megatron.core.process_groups_config import ProcessGroupCollection
-from megatron.core.transformer.enums import AttnBackend
+from megatron.core.transformer.enums import AttnBackend, CudaGraphScope
 from megatron.core.transformer.module import MegatronModule
 from megatron.core.transformer.transformer_config import TransformerConfig
 from megatron.core.transformer.utils import ensure_metadata_has_dp_cp_group
@@ -144,8 +144,7 @@ def compute_language_model_loss(self, labels: Tensor, logits: Tensor) -> Tensor:
                     # Use is_cg_capturable=True for full iteration CUDA graphs to avoid torch.equal checks
                     is_cg_capturable = (
                         hasattr(self.config, 'cuda_graph_scope')
-                        and self.config.cuda_graph_scope
-                        and 'full_iteration' in self.config.cuda_graph_scope
+                        and CudaGraphScope.full_iteration in self.config.cuda_graph_scope
                     )
                     if is_cg_capturable and not is_te_min_version("2.7.0"):
                         from megatron.core.utils import get_te_version
diff --git a/megatron/core/models/gpt/gpt_model.py b/megatron/core/models/gpt/gpt_model.py
index ce1e8e76bd9..a3d1a8bfc00 100644
--- a/megatron/core/models/gpt/gpt_model.py
+++ b/megatron/core/models/gpt/gpt_model.py
@@ -24,7 +24,7 @@
 from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.quantization.utils import get_quant_config_or_none
 from megatron.core.tensor_parallel import gather_from_sequence_parallel_region
-from megatron.core.transformer.enums import ModelType
+from megatron.core.transformer.enums import CudaGraphScope, ModelType
 from megatron.core.transformer.multi_token_prediction import (
     MTPLossAutoScaler,
     MTPLossLoggingHelper,
@@ -374,7 +374,7 @@ def _preprocess(
             and (
                 (
                     self.config.cuda_graph_impl == "local"
-                    and "full_iteration" not in self.config.cuda_graph_scope
+                    and CudaGraphScope.full_iteration not in self.config.cuda_graph_scope
                 )
                 or self.config.flash_decode
             )
diff --git a/megatron/core/pipeline_parallel/schedules.py b/megatron/core/pipeline_parallel/schedules.py
index d0b912349b4..18344429c45 100644
--- a/megatron/core/pipeline_parallel/schedules.py
+++ b/megatron/core/pipeline_parallel/schedules.py
@@ -21,6 +21,7 @@
 )
 from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.transformer.cuda_graphs import create_cudagraphs
+from megatron.core.transformer.enums import CudaGraphScope
 from megatron.core.transformer.moe.router import MoEAuxLossAutoScaler
 from megatron.core.utils import (
     drain_embedding_wgrad_compute,
@@ -656,7 +657,7 @@ def forward_backward_no_pipelining(
     if (
         hasattr(config, 'cuda_graph_impl')
         and config.cuda_graph_impl == "local"
-        and "full_iteration" not in config.cuda_graph_scope
+        and CudaGraphScope.full_iteration not in config.cuda_graph_scope
     ):
         create_cudagraphs()
 
@@ -1923,7 +1924,7 @@ def pp_post_backward(input_tensor_grad, vp_stage=None):
     if (
         hasattr(config, 'cuda_graph_impl')
         and config.cuda_graph_impl == "local"
-        and "full_iteration" not in config.cuda_graph_scope
+        and CudaGraphScope.full_iteration not in config.cuda_graph_scope
     ):
         create_cudagraphs()
     nvtx_range_pop(suffix="misc")
@@ -2310,7 +2311,7 @@ def enable_grad_sync():
     if (
         hasattr(config, 'cuda_graph_impl')
         and config.cuda_graph_impl == "local"
-        and "full_iteration" not in config.cuda_graph_scope
+        and CudaGraphScope.full_iteration not in config.cuda_graph_scope
     ):
         create_cudagraphs()
 
diff --git a/megatron/core/safe_globals.py b/megatron/core/safe_globals.py
index d2baed2a4a0..41239c310b0 100755
--- a/megatron/core/safe_globals.py
+++ b/megatron/core/safe_globals.py
@@ -12,7 +12,7 @@
 
 from megatron.core.enums import ModelType
 from megatron.core.rerun_state_machine import RerunDiagnostic, RerunMode, RerunState
-from megatron.core.transformer.enums import AttnBackend
+from megatron.core.transformer.enums import AttnBackend, CudaGraphScope
 
 SAFE_GLOBALS = [
     SimpleNamespace,
@@ -23,6 +23,7 @@
     UInt32DType,
     Namespace,
     AttnBackend,
+    CudaGraphScope,
     ModelType,
     RerunDiagnostic,
     RerunMode,
diff --git a/megatron/core/ssm/mamba_block.py b/megatron/core/ssm/mamba_block.py
index 1bcadd0af10..3201a8bfb28 100644
--- a/megatron/core/ssm/mamba_block.py
+++ b/megatron/core/ssm/mamba_block.py
@@ -25,6 +25,7 @@
 from megatron.core.ssm.mamba_hybrid_layer_allocation import allocate_layers
 from megatron.core.tensor_parallel import get_cuda_rng_tracker
 from megatron.core.transformer import TransformerConfig
+from megatron.core.transformer.enums import CudaGraphScope
 from megatron.core.transformer.identity_op import IdentityOp
 from megatron.core.transformer.module import MegatronModule
 from megatron.core.transformer.spec_utils import ModuleSpec, build_module
@@ -294,7 +295,7 @@ def forward(
             (
                 (
                     self.config.cuda_graph_impl == "local"
-                    and "full_iteration" not in self.config.cuda_graph_scope
+                    and CudaGraphScope.full_iteration not in self.config.cuda_graph_scope
                 )
                 or self.config.flash_decode
             )
diff --git a/megatron/core/transformer/attention.py b/megatron/core/transformer/attention.py
index f6f40027789..5cf22d25a4b 100644
--- a/megatron/core/transformer/attention.py
+++ b/megatron/core/transformer/attention.py
@@ -45,7 +45,7 @@
 from ..models.common.embeddings.yarn_rotary_pos_embedding import (
     _yarn_get_concentration_factor_from_config,
 )
-from .enums import AttnMaskType
+from .enums import AttnMaskType, CudaGraphScope
 from .transformer_config import TransformerConfig
 
 try:
@@ -829,7 +829,7 @@ def forward(
         if (
             in_decode_mode
             and self.config.cuda_graph_impl == "local"
-            and "full_iteration" not in self.config.cuda_graph_scope
+            and CudaGraphScope.full_iteration not in self.config.cuda_graph_scope
             and inference_context.is_static_batching()
         ):
             raise ValueError(f"CUDA graphs must use flash decode with static batching!")
diff --git a/megatron/core/transformer/cuda_graphs.py b/megatron/core/transformer/cuda_graphs.py
index 12f15ee980a..5b0a0333d9e 100644
--- a/megatron/core/transformer/cuda_graphs.py
+++ b/megatron/core/transformer/cuda_graphs.py
@@ -21,6 +21,7 @@
     get_all_rng_states,
     get_cuda_rng_tracker,
 )
+from megatron.core.transformer.enums import CudaGraphScope
 from megatron.core.transformer.identity_op import IdentityOp
 from megatron.core.transformer.module import GraphableMegatronModule, MegatronModule
 from megatron.core.transformer.transformer_config import TransformerConfig
@@ -1344,24 +1345,24 @@ def _layer_is_graphable(layer, config):
     from megatron.core.transformer.moe.moe_layer import MoELayer
     from megatron.core.transformer.transformer_layer import TransformerLayer
 
-    if isinstance(layer, MambaLayer) and 'mamba' in config.cuda_graph_scope:
+    if isinstance(layer, MambaLayer) and CudaGraphScope.mamba in config.cuda_graph_scope:
         # mamba layer.
         return True
     if isinstance(layer, TransformerLayer):
-        if 'attn' in config.cuda_graph_scope and not (
+        if CudaGraphScope.attn in config.cuda_graph_scope and not (
             isinstance(layer.self_attention, IdentityOp)
             and isinstance(layer.cross_attention, IdentityOp)
         ):
             # attn layer.
             return True
         if (
-            'moe' in config.cuda_graph_scope
-            or 'moe_router' in config.cuda_graph_scope
-            or 'moe_preprocess' in config.cuda_graph_scope
+            CudaGraphScope.moe in config.cuda_graph_scope
+            or CudaGraphScope.moe_router in config.cuda_graph_scope
+            or CudaGraphScope.moe_preprocess in config.cuda_graph_scope
         ) and isinstance(layer.mlp, MoELayer):
             # moe layer.
             return True
-        if 'mlp' in config.cuda_graph_scope and isinstance(layer.mlp, MLP):
+        if CudaGraphScope.mlp in config.cuda_graph_scope and isinstance(layer.mlp, MLP):
             # mlp layer.
             return True
     return False
@@ -1388,7 +1389,7 @@ def __init__(self, model, config, seq_length, micro_batch_size, optimizers=[]):
             "Setting NCCL_GRAPH_REGISTER=0 to avoid illegal memory access when using "
             "CUDA Graph with PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True."
         )
-        assert "full_iteration" not in config.cuda_graph_scope, (
+        assert CudaGraphScope.full_iteration not in config.cuda_graph_scope, (
             "full_iteration cuda graph is not supported for cuda_graph_impl=transformer_engine. "
             "Please use cuda_graph_impl=local instead."
         )
@@ -1529,7 +1530,7 @@ def get_rotary_pos_emb(transformer_module, transformer_input):
                         and not isinstance(layer.self_attention, IdentityOp)
                         and (
                             not self.config.cuda_graph_scope
-                            or 'attn' in self.config.cuda_graph_scope
+                            or CudaGraphScope.attn in self.config.cuda_graph_scope
                         )
                     )
                     if is_te_min_version("1.10.0"):
@@ -1712,3 +1713,33 @@ def cuda_graph_set_manual_hooks(self):
             model_chunk = self.model[chunk_number]
             for layer in layers:
                 layer.setup_manual_hooks(model_chunk._make_forward_pre_hook)
+
+    def delete_cuda_graphs(self):
+        """
+        Delete all CUDA graphs.
+        """
+        assert self._graphs_created, "CUDA Graphs have not been created."
+
+        graph_resettable = is_te_min_version("2.10.0")
+        graphs_reset, graphs_not_reset = 0, 0
+        for layers in self.callables_per_chunk:
+            for layer in layers:
+                for graph in layer.cuda_graphs:
+                    if graph_resettable:
+                        graph.reset()
+                        graphs_reset += 1
+                    else:
+                        graphs_not_reset += 1
+                layer.cuda_graphs = []
+                layer.cuda_graph_manual_hooks = []
+
+        log_on_each_pipeline_stage(
+            logger=logger,
+            tp_group=None,
+            dp_cp_group=None,
+            level=logging.INFO,
+            msg=f'Rank {torch.distributed.get_rank()}: '
+            f'{graphs_reset} graphs deleted with explicit reset, '
+            f'{graphs_not_reset} graphs deleted without explicit reset.',
+        )
+        self._graphs_created = False
diff --git a/megatron/core/transformer/enums.py b/megatron/core/transformer/enums.py
index 52b82029f90..d06d58d65f2 100644
--- a/megatron/core/transformer/enums.py
+++ b/megatron/core/transformer/enums.py
@@ -65,3 +65,15 @@ class AttnBackend(enum.Enum):
     unfused = 3
     local = 4
     auto = 5
+
+
+class CudaGraphScope(enum.Enum):
+    """Cuda Graph Scope - defines which parts of the model to capture."""
+
+    full_iteration = 1  # Captures the entire training/inference iteration
+    attn = 2  # Captures attention layers
+    mlp = 3  # Captures MLP layers (dense layers only)
+    moe = 4  # Captures MoE layers (drop-and-pad MoE layers only)
+    moe_router = 5  # Captures MoE router part
+    moe_preprocess = 6  # Captures MoE preprocessing part (requires moe_router)
+    mamba = 7  # Captures Mamba layers
diff --git a/megatron/core/transformer/moe/fused_a2a.py b/megatron/core/transformer/moe/fused_a2a.py
index 60b0b11a32c..045a93039b3 100644
--- a/megatron/core/transformer/moe/fused_a2a.py
+++ b/megatron/core/transformer/moe/fused_a2a.py
@@ -320,6 +320,14 @@ def init_hybrid_ep_buffer(
     )
 
 
+def reset_hybrid_ep_buffer():
+    '''
+    Reset the HybridEP buffer
+    '''
+    global _hybrid_ep_buffer
+    _hybrid_ep_buffer = None
+
+
 class HybridEPDispatch(torch.autograd.Function):
     '''
     Fused dispatch operation for permute + dispatch a2a + permute using the HybridEP backend
diff --git a/megatron/core/transformer/moe/moe_utils.py b/megatron/core/transformer/moe/moe_utils.py
index d28cbfea3fe..3ed31d375e2 100644
--- a/megatron/core/transformer/moe/moe_utils.py
+++ b/megatron/core/transformer/moe/moe_utils.py
@@ -11,6 +11,7 @@
 from megatron.core.fp8_utils import get_fp8_align_size
 from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.transformer.cuda_graphs import is_graph_capturing
+from megatron.core.transformer.enums import CudaGraphScope
 from megatron.core.transformer.transformer_config import TransformerConfig
 
 try:
@@ -1205,13 +1206,13 @@ def maybe_raise_signal(moe_layer, **kwargs):
         ):
             if (
                 step_condition == "route"
-                and 'moe_router' in moe_layer.config.cuda_graph_scope
-                and 'moe_preprocess' not in moe_layer.config.cuda_graph_scope
+                and CudaGraphScope.moe_router in moe_layer.config.cuda_graph_scope
+                and CudaGraphScope.moe_preprocess not in moe_layer.config.cuda_graph_scope
             ):
                 raise MoECudaGraphPartialCaptureSignal(moe_layer, "route", **kwargs)
             elif (
                 step_condition == "preprocess"
-                and 'moe_preprocess' in moe_layer.config.cuda_graph_scope
+                and CudaGraphScope.moe_preprocess in moe_layer.config.cuda_graph_scope
             ):
                 raise MoECudaGraphPartialCaptureSignal(moe_layer, "preprocess", **kwargs)
 
diff --git a/megatron/core/transformer/moe/token_dispatcher.py b/megatron/core/transformer/moe/token_dispatcher.py
index b2135fdb00d..af8ae572adb 100644
--- a/megatron/core/transformer/moe/token_dispatcher.py
+++ b/megatron/core/transformer/moe/token_dispatcher.py
@@ -16,6 +16,7 @@
     gather_from_sequence_parallel_region,
     reduce_scatter_to_sequence_parallel_region,
 )
+from megatron.core.transformer.enums import CudaGraphScope
 from megatron.core.transformer.moe.fused_a2a import (
     fused_combine,
     fused_dispatch,
@@ -436,7 +437,7 @@ def __init__(
         }
         if (
             config.cuda_graph_impl == "transformer_engine"
-            and 'moe_preprocess' in config.cuda_graph_scope
+            and CudaGraphScope.moe_preprocess in config.cuda_graph_scope
         ):
             self.cuda_dtoh_point = "before_ep_alltoall"
         else:
@@ -1075,10 +1076,13 @@ def combine(
             num_permuted_tokens=self.num_permuted_tokens,
             pad_multiple=self.pad_multiple,
         )
-        # Release the used handle/num_permuted_tokens which could change in each iteration
+        # Release the used handle/num_permuted_tokens which could change in each iteration.
+        # For drop_and_pad mode, we don't need to reset the num_permuted_tokens and
+        # num_dispatched_tokens, because their values never change.
         self.handle = None
-        self.num_permuted_tokens = None
-        self.num_dispatched_tokens = None
+        if not self.drop_and_pad:
+            self.num_permuted_tokens = None
+            self.num_dispatched_tokens = None
         return hidden_states
 
     def get_permuted_hidden_states_by_experts(self, hidden_states: torch.Tensor) -> torch.Tensor:
diff --git a/megatron/core/transformer/transformer_block.py b/megatron/core/transformer/transformer_block.py
index 6f69927e9e8..023db1fe75a 100755
--- a/megatron/core/transformer/transformer_block.py
+++ b/megatron/core/transformer/transformer_block.py
@@ -21,7 +21,7 @@
 )
 from megatron.core.pipeline_parallel.utils import is_vp_first_stage, is_vp_last_stage
 from megatron.core.process_groups_config import ProcessGroupCollection
-from megatron.core.transformer.enums import LayerType
+from megatron.core.transformer.enums import CudaGraphScope, LayerType
 from megatron.core.transformer.module import GraphableMegatronModule, MegatronModule
 from megatron.core.transformer.spec_utils import ModuleSpec, build_module
 from megatron.core.transformer.transformer_config import TransformerConfig
@@ -555,7 +555,7 @@ def _should_call_local_cudagraph(self, *args, **kwargs):
                 kwargs.get('inference_context') is not None
                 or kwargs.get('inference_params') is not None
             )
-            and 'full_iteration' in self.config.cuda_graph_scope
+            and CudaGraphScope.full_iteration in self.config.cuda_graph_scope
         ):
             if kwargs['inference_context'].is_static_batching():
                 using_cuda_graph = kwargs['inference_context'].is_decode_only()
diff --git a/megatron/core/transformer/transformer_config.py b/megatron/core/transformer/transformer_config.py
index 656699ea2a2..a3a16754977 100644
--- a/megatron/core/transformer/transformer_config.py
+++ b/megatron/core/transformer/transformer_config.py
@@ -9,7 +9,7 @@
 
 from megatron.core.enums import Fp4Recipe, Fp8Recipe
 from megatron.core.quantization.quant_config import RecipeConfig
-from megatron.core.transformer.enums import AttnBackend
+from megatron.core.transformer.enums import AttnBackend, CudaGraphScope
 from megatron.core.transformer.pipeline_parallel_layer_layout import PipelineParallelLayerLayout
 
 from ..fusions.fused_bias_geglu import quick_gelu
@@ -733,7 +733,7 @@ class TransformerConfig(ModelParallelConfig):
     excluding optimizer) is enabled.
     "transformer_engine": capture the CUDA graph using TE make_graphed_callables()."""
 
-    cuda_graph_scope: Optional[List[str]] = None
+    cuda_graph_scope: Optional[List[CudaGraphScope]] = None
     """Determines the CUDA graphs capturing scope.
     When cuda_graph_impl is set to "transformer_engine", valid values are "attn", "mlp", "moe",
     "moe_router", "moe_preprocess", "mamba". None means the full layer.
@@ -1615,65 +1615,76 @@ def __post_init__(self):
                     'use cuda_graph_impl=transformer_engine instead.'
                 )
                 self.cuda_graph_impl = "transformer_engine"
+
         if self.cuda_graph_scope is None:
             self.cuda_graph_scope = []
+        elif not isinstance(self.cuda_graph_scope, list):
+            if isinstance(self.cuda_graph_scope, CudaGraphScope):
+                self.cuda_graph_scope = [self.cuda_graph_scope]
+            else:
+                assert isinstance(self.cuda_graph_scope, str), (
+                    "cuda_graph_scope must be a string that can be converted to a list of "
+                    f"CudaGraphScope, got {self.cuda_graph_scope}."
+                )
+                self.cuda_graph_scope = self.cuda_graph_scope.split(',')
+        if all(isinstance(scope, str) for scope in self.cuda_graph_scope):
+            # Backward compatibility for "full" scope. Now we use an empty list instead.
+            if "full" in self.cuda_graph_scope:
+                assert self.cuda_graph_scope == [
+                    "full"
+                ], "full scope cannot be used with other scopes."
+                warnings.warn(
+                    "full scope is deprecated. "
+                    "Use empty cuda_graph_scope to capture the whole layer."
+                )
+                self.cuda_graph_scope = []
+            else:
+                self.cuda_graph_scope = [CudaGraphScope[scope] for scope in self.cuda_graph_scope]
+        assert all(
+            isinstance(scope, CudaGraphScope) for scope in self.cuda_graph_scope
+        ), f"cuda_graph_scope must be a list of CudaGraphScope, got {self.cuda_graph_scope}."
+
         if self.cuda_graph_impl != "none":
             assert self.cuda_graph_impl in [
                 "transformer_engine",
                 "local",
             ], f"Invalid cuda graph implementation: {self.cuda_graph_impl}"
+
             if self.cpu_offloading:
                 raise ValueError("CUDA graphs not supported with CPU offloading.")
 
-            elif not isinstance(self.cuda_graph_scope, list):
-                assert isinstance(self.cuda_graph_scope, str), (
-                    "cuda_graph_scope must be a string or a list of strings, "
-                    f"got {self.cuda_graph_scope}."
-                )
-                self.cuda_graph_scope = [self.cuda_graph_scope]
-
             if self.cuda_graph_impl == "local":
-                assert not self.cuda_graph_scope or self.cuda_graph_scope == ["full_iteration"], (
-                    "For local cuda graph implementation, the only valid value "
-                    "for cuda_graph_scope is full_iteration. "
-                    "To use other scopes, use cuda_graph_impl=transformer_engine."
+                assert not self.cuda_graph_scope or self.cuda_graph_scope == [
+                    CudaGraphScope.full_iteration
+                ], (
+                    "For local cuda graph implementation, the only valid value for "
+                    "cuda_graph_scope is full_iteration, or an empty list to denote layerwise "
+                    "graphs. To use other scopes, use cuda_graph_impl=transformer_engine."
                 )
 
             if self.cuda_graph_impl == "transformer_engine":
-                assert "full_iteration" not in self.cuda_graph_scope, (
+                assert CudaGraphScope.full_iteration not in self.cuda_graph_scope, (
                     "To use full iteration cuda graph, please use "
-                    "cuda_graph_impl=transformer_engine instead of cuda_graph_impl=local."
+                    "cuda_graph_impl=local instead of cuda_graph_impl=transformer_engine."
                 )
-                for scope in self.cuda_graph_scope:
-                    assert scope in [
-                        'attn',
-                        'mlp',
-                        'moe',
-                        'moe_router',
-                        'moe_preprocess',
-                        'mamba',
-                    ], (
-                        "--cuda-graph-scope should be attn, mlp, moe, moe_router, moe_preprocess, "
-                        f"or mamba, got {self.cuda_graph_scope}."
-                    )
-
                 assert (
-                    'moe' not in self.cuda_graph_scope or 'moe_router' not in self.cuda_graph_scope
+                    CudaGraphScope.moe not in self.cuda_graph_scope
+                    or CudaGraphScope.moe_router not in self.cuda_graph_scope
                 ), 'cuda_graph_scope must not contain both moe and moe_router.'
-                if 'moe_preprocess' in self.cuda_graph_scope:
+                if CudaGraphScope.moe_preprocess in self.cuda_graph_scope:
                     assert (
-                        'moe_router' in self.cuda_graph_scope
+                        CudaGraphScope.moe_router in self.cuda_graph_scope
                     ), 'moe_preprocess cuda graph is only supported with moe_router cuda graph.'
                 if self.num_moe_experts is None or self.num_moe_experts <= 1:
                     assert (
-                        'moe' not in self.cuda_graph_scope
-                        and 'moe_router' not in self.cuda_graph_scope
+                        CudaGraphScope.moe not in self.cuda_graph_scope
+                        and CudaGraphScope.moe_router not in self.cuda_graph_scope
                     ), 'moe cuda graph is only supported for MoE.'
                 else:
                     if self.moe_layer_freq == 1 or (
                         isinstance(self.moe_layer_freq, list) and 0 not in self.moe_layer_freq
                     ):
-                        assert 'mlp' not in self.cuda_graph_scope, (
+                        assert CudaGraphScope.mlp not in self.cuda_graph_scope, (
                             'mlp cuda graph is only supported for dense layers, '
                             'but not found in the model.'
                         )
@@ -1682,13 +1693,13 @@ def __post_init__(self):
                         or not self.moe_pad_expert_input_to_capacity
                     ):
                         assert (
-                            'moe' not in self.cuda_graph_scope
+                            CudaGraphScope.moe not in self.cuda_graph_scope
                         ), 'moe cuda graph is only supported with drop-padding MoE.'
                         if self.moe_token_dispatcher_type == 'alltoall' and (
                             self.moe_expert_capacity_factor is not None
                             or self.moe_router_padding_for_quantization
                         ):
-                            assert 'moe_preprocess' not in self.cuda_graph_scope, (
+                            assert CudaGraphScope.moe_preprocess not in self.cuda_graph_scope, (
                                 'moe_preprocess cuda graph is not supported when there are '
                                 'DtoH copies and synchronizations in the preprocess step.'
                             )
@@ -1698,25 +1709,28 @@ def __post_init__(self):
                     raise ValueError(
                         "Full-layer CUDA graphs not supported with activation recomputation."
                     )
-                elif self.cuda_graph_scope != ['full_iteration']:
+                elif self.cuda_graph_scope != [CudaGraphScope.full_iteration]:
                     # For scoped CUDA graphs, only the non-graphed parts of the layer can be
                     # recomputed. So check if there are overlaps between the recomputed parts
                     # and the graphed parts.
-                    if "attn" in self.cuda_graph_scope:
+                    if CudaGraphScope.attn in self.cuda_graph_scope:
                         for module in self.recompute_modules:
                             if module in ['core_attn', 'mla_up_proj']:
                                 raise ValueError(
                                     f'attn cuda graph is not supported with {module} recompute.'
                                 )
-                    if "mlp" in self.cuda_graph_scope and "mlp" in self.recompute_modules:
+                    if (
+                        CudaGraphScope.mlp in self.cuda_graph_scope
+                        and "mlp" in self.recompute_modules
+                    ):
                         raise ValueError(f'mlp cuda graph is not supported with mlp recompute.')
-                    if "moe" in self.cuda_graph_scope:
+                    if CudaGraphScope.moe in self.cuda_graph_scope:
                         for module in self.recompute_modules:
                             if module in ['moe_act', 'moe', 'shared_experts']:
                                 raise ValueError(
                                     f'moe cuda graph is not supported with {module} recompute.'
                                 )
-                    if "moe_router" in self.cuda_graph_scope:
+                    if CudaGraphScope.moe_router in self.cuda_graph_scope:
                         for module in self.recompute_modules:
                             if module in ['moe', 'shared_experts']:
                                 raise ValueError(
@@ -1725,25 +1739,25 @@ def __post_init__(self):
                                 )
                     if "layernorm" in self.recompute_modules:
                         if (
-                            "attn" in self.cuda_graph_scope
-                            and "mlp" in self.cuda_graph_scope
+                            CudaGraphScope.attn in self.cuda_graph_scope
+                            and CudaGraphScope.mlp in self.cuda_graph_scope
                             and (
-                                "moe" in self.cuda_graph_scope
-                                or "moe_router" in self.cuda_graph_scope
+                                CudaGraphScope.moe in self.cuda_graph_scope
+                                or CudaGraphScope.moe_router in self.cuda_graph_scope
                             )
                         ):
                             raise ValueError(
                                 'cuda graph is not supported with layernorm recompute.'
                             )
-                        if "attn" in self.cuda_graph_scope:
+                        if CudaGraphScope.attn in self.cuda_graph_scope:
                             warnings.warn(
                                 "input_layernorm recompute is not supported with attention "
                                 "cudagraph. Will only recompute the pre_mlp_layernorm."
                             )
                         if (
-                            "mlp" in self.cuda_graph_scope
-                            or "moe" in self.cuda_graph_scope
-                            or "moe_router" in self.cuda_graph_scope
+                            CudaGraphScope.mlp in self.cuda_graph_scope
+                            or CudaGraphScope.moe in self.cuda_graph_scope
+                            or CudaGraphScope.moe_router in self.cuda_graph_scope
                         ):
                             warnings.warn(
                                 "pre_mlp_layernorm recompute is not supported with mlp/moe "
diff --git a/megatron/core/transformer/transformer_layer.py b/megatron/core/transformer/transformer_layer.py
index f89678e6216..3ea40577009 100644
--- a/megatron/core/transformer/transformer_layer.py
+++ b/megatron/core/transformer/transformer_layer.py
@@ -16,7 +16,7 @@
 from megatron.core.packed_seq_params import PackedSeqParams
 from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.transformer.cuda_graphs import is_graph_capturing
-from megatron.core.transformer.enums import LayerType
+from megatron.core.transformer.enums import CudaGraphScope, LayerType
 from megatron.core.transformer.identity_op import IdentityFuncOp, IdentityOp
 from megatron.core.transformer.mlp import MLP
 from megatron.core.transformer.module import GraphableMegatronModule
@@ -382,18 +382,21 @@ def __init__(
             if "layernorm" in self.config.recompute_modules:
                 if not isinstance(self.input_layernorm, IdentityOp) and (
                     self.config.cuda_graph_impl == "none"
-                    or 'attn' not in self.config.cuda_graph_scope
+                    or CudaGraphScope.attn not in self.config.cuda_graph_scope
                 ):
                     self.recompute_input_layernorm = True
                     if self.config.fp8 or self.config.fp4:
                         self.self_attention.set_for_recompute_input_layernorm()
                 if not isinstance(self.pre_mlp_layernorm, IdentityOp) and (
                     self.config.cuda_graph_impl == "none"
-                    or (not self.is_moe_layer and 'mlp' not in self.config.cuda_graph_scope)
+                    or (
+                        not self.is_moe_layer
+                        and CudaGraphScope.mlp not in self.config.cuda_graph_scope
+                    )
                     or (
                         self.is_moe_layer
-                        and 'moe' not in self.config.cuda_graph_scope
-                        and 'moe_router' not in self.config.cuda_graph_scope
+                        and CudaGraphScope.moe not in self.config.cuda_graph_scope
+                        and CudaGraphScope.moe_router not in self.config.cuda_graph_scope
                     )
                 ):
                     self.recompute_pre_mlp_layernorm = True
@@ -634,12 +637,13 @@ def _forward_mlp(self, hidden_states, inference_context=None):
             and self.config.cuda_graph_impl == "transformer_engine"
             and self.training
             and is_graph_capturing()
-            and 'moe_router' in self.config.cuda_graph_scope
+            and CudaGraphScope.moe_router in self.config.cuda_graph_scope
         ):
             assert (
                 not self.recompute_pre_mlp_layernorm
             ), "Recomputation is not supported for CUDA graph."
             cudagraph_outputs = self.mlp(pre_mlp_layernorm_output)
+            nvtx_range_pop(suffix="mlp")
             return cudagraph_outputs + [residual]
         elif self.recompute_mlp:
             if self.config.fp8 or self.config.fp4:
@@ -694,6 +698,7 @@ def _forward_post_mlp(self, mlp_output_with_bias, residual):
         Returns:
             output (Tensor): Transformed hidden states of shape [s, b, h].
         """
+
         from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
             fine_grained_offloading_group_commit,
         )
@@ -757,7 +762,7 @@ def get_layer_static_inputs(self, seq_length, micro_batch_size):
         static_inputs = super().get_layer_static_inputs(seq_length, micro_batch_size)
 
         if not isinstance(self.self_attention, IdentityOp) and (
-            not self.config.cuda_graph_scope or 'attn' in self.config.cuda_graph_scope
+            not self.config.cuda_graph_scope or CudaGraphScope.attn in self.config.cuda_graph_scope
         ):
             slen_per_cp = seq_length // self.config.context_parallel_size
             static_inputs["attention_mask"] = (
@@ -776,18 +781,18 @@ def _get_submodules_under_cudagraphs(self):
             return super()._get_submodules_under_cudagraphs()
 
         submodules = []
-        if 'attn' in self.config.cuda_graph_scope:
+        if CudaGraphScope.attn in self.config.cuda_graph_scope:
             submodules += [
                 self.input_layernorm,
                 self.self_attention,
                 self.pre_cross_attn_layernorm,
                 self.cross_attention,
             ]
-        if (not self.is_moe_layer and 'mlp' in self.config.cuda_graph_scope) or (
-            self.is_moe_layer and 'moe' in self.config.cuda_graph_scope
+        if (not self.is_moe_layer and CudaGraphScope.mlp in self.config.cuda_graph_scope) or (
+            self.is_moe_layer and CudaGraphScope.moe in self.config.cuda_graph_scope
         ):
             submodules += [self.pre_mlp_layernorm, self.mlp]
-        elif self.is_moe_layer and 'moe_router' in self.config.cuda_graph_scope:
+        elif self.is_moe_layer and CudaGraphScope.moe_router in self.config.cuda_graph_scope:
             submodules += [self.pre_mlp_layernorm, self.mlp.router]
             if (
                 self.config.moe_shared_expert_intermediate_size is not None
@@ -805,7 +810,7 @@ def _te_cuda_graph_capture(self, *args, **kwargs):
         2. If context is None, it cannot be returned as output.
         """
         context = None
-        if not self.config.cuda_graph_scope or 'attn' in self.config.cuda_graph_scope:
+        if not self.config.cuda_graph_scope or CudaGraphScope.attn in self.config.cuda_graph_scope:
             hidden_states, context = self._forward_attention(*args, **kwargs)
         else:
             if len(args) > 0:
@@ -815,12 +820,12 @@ def _te_cuda_graph_capture(self, *args, **kwargs):
 
         if (
             not self.config.cuda_graph_scope
-            or (not self.is_moe_layer and 'mlp' in self.config.cuda_graph_scope)
+            or (not self.is_moe_layer and CudaGraphScope.mlp in self.config.cuda_graph_scope)
             or (
                 self.is_moe_layer
                 and (
-                    'moe' in self.config.cuda_graph_scope
-                    or 'moe_router' in self.config.cuda_graph_scope
+                    CudaGraphScope.moe in self.config.cuda_graph_scope
+                    or CudaGraphScope.moe_router in self.config.cuda_graph_scope
                 )
             )
         ):
@@ -841,7 +846,7 @@ def _te_cuda_graph_replay(self, *args, **kwargs):
         Hence, `inference_context` and `packed_seq_params` are excluded from input list.
         """
         context = None
-        if self.config.cuda_graph_scope and 'attn' not in self.config.cuda_graph_scope:
+        if self.config.cuda_graph_scope and CudaGraphScope.attn not in self.config.cuda_graph_scope:
             hidden_states, context = self._forward_attention(*args, **kwargs)
             args = (hidden_states,)
             kwargs = {}
@@ -861,13 +866,13 @@ def _te_cuda_graph_replay(self, *args, **kwargs):
 
         if (
             not self.config.cuda_graph_scope
-            or (not self.is_moe_layer and 'mlp' in self.config.cuda_graph_scope)
-            or (self.is_moe_layer and 'moe' in self.config.cuda_graph_scope)
+            or (not self.is_moe_layer and CudaGraphScope.mlp in self.config.cuda_graph_scope)
+            or (self.is_moe_layer and CudaGraphScope.moe in self.config.cuda_graph_scope)
         ):
             # CUDA Graph captures the whole MLP/MoE part. CUDA Graph output is the layer output.
             assert len(cuda_graph_output) == 1, "CUDA Graph output should be the layer output."
             output = cuda_graph_output.pop()
-        elif self.is_moe_layer and 'moe_router' in self.config.cuda_graph_scope:
+        elif self.is_moe_layer and CudaGraphScope.moe_router in self.config.cuda_graph_scope:
             # CUDA Graph partially captures the MoE.
             # The rest of the layer should go to the normal pass.
             shared_expert_output, routing_map, residual = None, None, None
@@ -882,7 +887,7 @@ def _te_cuda_graph_replay(self, *args, **kwargs):
             # Split cudagraph outputs into function outputs and attribute outputs, and
             # process them separately. Function outputs should have three tensors.
             func_output, attr_outputs = cuda_graph_output[:3], cuda_graph_output[3:]
-            if 'moe_preprocess' in self.config.cuda_graph_scope:
+            if CudaGraphScope.moe_preprocess in self.config.cuda_graph_scope:
                 hidden_states, probs, residual = func_output
                 valid_cudagraph_attrs = self.mlp.token_dispatcher.valid_cudagraph_attrs
                 assert len(attr_outputs) == len(
@@ -989,7 +994,7 @@ def _should_call_local_cudagraph(self, *args, **kwargs):
                 (kwargs.get('inference_context') is not None)
                 or (kwargs.get('inference_params') is not None)
             )
-            and 'full_iteration' not in self.config.cuda_graph_scope
+            and CudaGraphScope.full_iteration not in self.config.cuda_graph_scope
         ):
             if kwargs['inference_context'].is_static_batching():
                 using_cuda_graph = kwargs['inference_context'].is_decode_only()
diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py
index 8be173c75a0..0cf2d006863 100644
--- a/megatron/training/arguments.py
+++ b/megatron/training/arguments.py
@@ -23,7 +23,7 @@
 from megatron.core.rerun_state_machine import RerunStateMachine
 from megatron.core.transformer import MLATransformerConfig, TransformerConfig
 from megatron.core.transformer.pipeline_parallel_layer_layout import PipelineParallelLayerLayout
-from megatron.core.transformer.enums import AttnBackend
+from megatron.core.transformer.enums import AttnBackend, CudaGraphScope
 from megatron.core.transformer.heterogeneous.heterogeneous_config import (
     HeterogeneousTransformerConfig,
     MLPConfig,
@@ -772,7 +772,7 @@ def validate_args(args, defaults={}):
             if args.rank == 0:
                 print('accumulate and all-reduce gradients in fp32 for '
                       'bfloat16 data type.', flush=True)
-    if args.cuda_graph_impl == "local" and "full_iteration" in args.cuda_graph_scope:
+    if args.cuda_graph_impl == "local" and CudaGraphScope.full_iteration in args.cuda_graph_scope:
         if not args.inference_dynamic_batching:
             assert not args.check_for_nan_in_loss_and_grad, \
             "--no-check-for-nan-in-loss-and-grad should be set with full_iteration CUDA graph"
@@ -1273,6 +1273,15 @@ def validate_args(args, defaults={}):
         assert (
             args.recompute_granularity != 'full'
         ), 'recompute_granularity must not be full when CUDA Graphs are enabled.'
+    if args.cuda_graph_scope == "full" or (
+        isinstance(args.cuda_graph_scope, list) and "full" in args.cuda_graph_scope
+    ):
+        if isinstance(args.cuda_graph_scope, list):
+            assert args.cuda_graph_scope == ["full"], "full scope cannot be used with other scopes."
+        args.cuda_graph_scope = []
+        warn_rank_0(
+            'full scope is deprecated. Use empty cuda_graph_scope to capture the whole layer.'
+        )
     
     if args.multi_latent_attention:
         assert not args.group_query_attention, "Group query attention is mutually exclusive with multi latent attention."
@@ -1494,7 +1503,7 @@ def _add_inference_args(parser):
                        '"none": no CUDA graph. '
                        '"local": capture the CUDA graph using MCore local implementation. --cuda-graph-scope=\"full_iteration\" enables whole iteration CUDA graph. '
                        '"transformer_engine": capture the CUDA graph using TE make_graphed_callables().')
-    group.add_argument('--cuda-graph-scope', nargs='+', type=str, default=[],
+    group.add_argument('--cuda-graph-scope', nargs='+', type=lambda scope: CudaGraphScope[scope] if scope != "full" else scope, default=[],
                        help='Determines the CUDA graphs capturing scope. '
                        'choices: "attn", "mlp", "moe", "moe_router", "moe_preprocess", "mamba", "full_iteration". '
                        '"attn": captures operations in TransformerLayer._forward_attention(). '
@@ -1506,7 +1515,8 @@ def _add_inference_args(parser):
                        '"mamba": captures the mamba layer. '
                        '"full_iteration": captures a whole iteration. '
                        'full_iteration scope is only supported with --cuda-graph-impl=local, other scopes are only supported with --cuda-graph-impl=transformer_engine. '
-                       'If not specified, the default scope is to capture the whole Transformer layer.')
+                       'If not specified, the default scope is to capture the whole Transformer layer. '
+                       'For backward compatibility, we still allow passing "full" to specify capturing the whole layer, and convert it to an empty list.')
     group.add_argument('--use-legacy-static-engine', action='store_true', default=False,
                        help='Use legacy static engine. (Current static engine uses dynamic engine under the hood)',
                        dest='use_legacy_static_engine')
diff --git a/megatron/training/training.py b/megatron/training/training.py
index 5c9de623ce5..e88b9839d28 100644
--- a/megatron/training/training.py
+++ b/megatron/training/training.py
@@ -59,6 +59,7 @@
 from megatron.training.checkpointing import checkpoint_exists
 from megatron.core.full_cuda_graph import FullCudaGraphWrapper
 from megatron.core.transformer.cuda_graphs import TECudaGraphHelper
+from megatron.core.transformer.enums import CudaGraphScope
 from megatron.core.transformer.module import Float16Module
 from megatron.core.distributed import DistributedDataParallelConfig, TorchFullyShardedDataParallelConfig
 from megatron.core.distributed import DistributedDataParallel as DDP
@@ -2277,7 +2278,7 @@ def train(
     eval_iterations = 0
     # Wrap forward_backward_func for Full iteration CUDA graph
     forward_backward_func = get_forward_backward_func()
-    if args.cuda_graph_impl == "local" and "full_iteration" in args.cuda_graph_scope:
+    if args.cuda_graph_impl == "local" and CudaGraphScope.full_iteration in args.cuda_graph_scope:
         forward_backward_func = FullCudaGraphWrapper(forward_backward_func, cuda_graph_warmup_steps=args.cuda_graph_warmup_steps)
 
     def get_e2e_base_metrics():
@@ -2626,6 +2627,10 @@ def get_e2e_base_metrics():
         if should_exit:
             break
 
+    # Destroy CUDA Graphs.
+    if args.cuda_graph_impl == "transformer_engine" and cuda_graph_helper.graphs_created():
+        cuda_graph_helper.delete_cuda_graphs()
+
     one_logger_utils.track_e2e_metrics()
 
     # Flush TensorBoard, WandB writers and one-logger.
@@ -2699,7 +2704,7 @@ def evaluate(
     eval_batch_size = args.global_batch_size
     eval_num_microbatches = eval_batch_size // (args.micro_batch_size * args.data_parallel_size)
     forward_backward_func = get_forward_backward_func()
-    if args.cuda_graph_impl == "local" and "full_iteration" in args.cuda_graph_scope:
+    if args.cuda_graph_impl == "local" and CudaGraphScope.full_iteration in args.cuda_graph_scope:
         forward_backward_func = FullCudaGraphWrapper(forward_backward_func, cuda_graph_warmup_steps=args.cuda_graph_warmup_steps)
 
     if eval_iters is None:
diff --git a/tests/unit_tests/inference/engines/test_dynamic_engine.py b/tests/unit_tests/inference/engines/test_dynamic_engine.py
index 0ac4b296746..26d3dcfbd6d 100644
--- a/tests/unit_tests/inference/engines/test_dynamic_engine.py
+++ b/tests/unit_tests/inference/engines/test_dynamic_engine.py
@@ -3,7 +3,7 @@
 import asyncio
 import random
 import types
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from typing import Dict, List, Optional, Tuple
 
 import pytest
@@ -41,6 +41,7 @@
 from megatron.core.models.mamba.mamba_model import MambaModel
 from megatron.core.tensor_parallel.random import model_parallel_cuda_manual_seed
 from megatron.core.transformer.cuda_graphs import CudaGraphManager, _CudagraphGlobalRecord
+from megatron.core.transformer.enums import CudaGraphScope
 from megatron.core.transformer.transformer_config import TransformerConfig
 from megatron.core.utils import (
     check_mamba_sequence_packing_support,
@@ -103,7 +104,9 @@ class DynamicEngineTestConfig:
     return_log_probs: bool = False
     materialize_only_last_token_logits: bool = True
     skip_prompt_log_probs: bool = False
-    cuda_graph_scope: List[str] = None
+    cuda_graph_scope: List[CudaGraphScope] = field(
+        default_factory=lambda: [CudaGraphScope.full_iteration]
+    )
     force_build_cuda_graphs: bool = False
     # If False, do not build cuda graphs in the tests, even if
     # num_cuda_graphs is set.
@@ -136,9 +139,6 @@ def __post_init__(self):
             if self.context_max_tokens_override is None:
                 self.context_max_tokens_override = self.num_requests * self.max_sequence_length
 
-        if self.cuda_graph_scope is None:
-            self.cuda_graph_scope = ["full_iteration"]
-
 
 @dataclass
 class DynamicEngineTestEnv:
@@ -514,7 +514,7 @@ def teardown_method(self, method):
     )
     @pytest.mark.parametrize("model_provider", ["gpt", "mamba"])
     @pytest.mark.parametrize("num_cuda_graphs", [None, 1, 4])
-    @pytest.mark.parametrize("cuda_graph_scope", [[], ["full_iteration"]])
+    @pytest.mark.parametrize("cuda_graph_scope", [[], [CudaGraphScope.full_iteration]])
     def test_simple(self, model_provider, num_cuda_graphs, cuda_graph_scope) -> None:
         """Simple test that runs without errors, and validates output."""
         skip_if_mamba_sequence_packing_not_available(model_provider)
diff --git a/tests/unit_tests/test_fp8_param.py b/tests/unit_tests/test_fp8_param.py
index 0b8d41769ec..361698f7127 100644
--- a/tests/unit_tests/test_fp8_param.py
+++ b/tests/unit_tests/test_fp8_param.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 import contextlib
 import gc
@@ -36,7 +36,10 @@
 try:
     from transformer_engine.pytorch.tensor.utils import post_all_gather_processing
 
-    cuda_graph_supported = True
+    if is_te_min_version("2.10.0"):
+        cuda_graph_supported = True
+    else:
+        reason_for_no_cuda_graph = "Need newer TransformerEngine"
 except ImportError:
     reason_for_no_cuda_graph = "Need newer TransformerEngine"
 
@@ -65,12 +68,16 @@ class TestFP8Param:
     def setup_method(self, method):
         self.seq_length = 512
         self.micro_batch_size = 2
+        self.cuda_graph_helper = None
         os.environ['CUDA_DEVICE_MAX_CONNECTIONS'] = '1'
 
     def teardown_method(self, method):
         Utils.destroy_model_parallel()
         destroy_global_vars()
         destroy_num_microbatches_calculator()
+        if self.cuda_graph_helper is not None and self.cuda_graph_helper.graphs_created():
+            self.cuda_graph_helper.delete_cuda_graphs()
+            self.cuda_graph_helper = None
         gc.collect()
 
     def model_provider(
@@ -209,13 +216,12 @@ def _run_test_helper(
             )
         assert len(gpt_model) == 1  # Assume only one model in the model provider.
 
-        cuda_graph_helper = None
         # Hard coded to use cuda_graph_impl="transformer_engine"
         cuda_graph_impl = "transformer_engine"
         if use_cuda_graph and cuda_graph_impl == "transformer_engine":
             from megatron.core.transformer.cuda_graphs import TECudaGraphHelper
 
-            cuda_graph_helper = TECudaGraphHelper(
+            self.cuda_graph_helper = TECudaGraphHelper(
                 model=gpt_model,
                 config=gpt_model[0].config,
                 seq_length=self.seq_length,
@@ -250,13 +256,13 @@ def _run_test_helper(
             # Capture CUDA graphs after warmup if helper is provided.
             # Hard coded cuda_graph_warmup_steps = 0.
             cuda_graph_warmup_steps = 0
-            if cuda_graph_helper is not None and i == cuda_graph_warmup_steps:
+            if self.cuda_graph_helper is not None and i == cuda_graph_warmup_steps:
                 if should_disable_forward_pre_hook(args):
                     disable_forward_pre_hook(gpt_model, param_sync=False)
-                cuda_graph_helper.create_cudagraphs()
+                self.cuda_graph_helper.create_cudagraphs()
                 if should_disable_forward_pre_hook(args):
                     enable_forward_pre_hook(gpt_model)
-                    cuda_graph_helper.cuda_graph_set_manual_hooks()
+                    self.cuda_graph_helper.cuda_graph_set_manual_hooks()
 
             # For the mxfp8_param with reuse_grad_buf_for_mxfp8_param_ag and dp_ag_overlap,
             # we need to call the _copy_main_params_to_param_buffer() after the grad buffer
@@ -297,6 +303,10 @@ def _run_test_helper(
 
             loss_list.append(loss.item())
 
+        if self.cuda_graph_helper is not None and self.cuda_graph_helper.graphs_created():
+            self.cuda_graph_helper.delete_cuda_graphs()
+            self.cuda_graph_helper = None
+
         return torch.tensor(loss_list)
 
     def run_test(self, tp_size, recipe, inference: bool = False, **kwargs):
diff --git a/tests/unit_tests/transformer/test_cuda_graphs.py b/tests/unit_tests/transformer/test_cuda_graphs.py
index 3ad0262a1cf..cee75171560 100644
--- a/tests/unit_tests/transformer/test_cuda_graphs.py
+++ b/tests/unit_tests/transformer/test_cuda_graphs.py
@@ -9,6 +9,7 @@
 
 import pytest
 import torch
+from transformer_engine.pytorch.fp8 import check_fp8_support
 
 from megatron.core import parallel_state
 from megatron.core.enums import ModelType
@@ -25,6 +26,7 @@
     TextGenerationController,
 )
 from megatron.core.models.gpt.gpt_layer_specs import (
+    get_gpt_decoder_block_spec,
     get_gpt_layer_local_spec,
     get_gpt_layer_with_transformer_engine_spec,
     get_gpt_mtp_block_spec,
@@ -41,6 +43,8 @@
     model_parallel_cuda_manual_seed,
 )
 from megatron.core.transformer.cuda_graphs import CudaGraphManager, _CudagraphGlobalRecord
+from megatron.core.transformer.enums import CudaGraphScope
+from megatron.core.transformer.moe.fused_a2a import reset_hybrid_ep_buffer
 from megatron.core.transformer.transformer_block import TransformerBlock
 from megatron.core.transformer.transformer_config import TransformerConfig
 from megatron.core.utils import is_fa_min_version, is_te_min_version
@@ -54,6 +58,8 @@
 from megatron.training.training import setup_model_and_optimizer
 from tests.unit_tests.test_utilities import Utils
 
+fp8_available, _ = check_fp8_support()
+
 
 class TestParallelTransformerBlockCudagraphs:
     def setup_method(self, method):
@@ -747,6 +753,9 @@ class TestPartialCudaGraph:
     def setup_method(self, method):
         self.seq_length = 512
         self.micro_batch_size = 2
+        self.tp_size = 2
+        self.cp_size = 2
+        self.cuda_graph_helper = None
         # Store original environment variable values
         self.original_env = {
             'CUDA_DEVICE_MAX_CONNECTIONS': os.environ.get('CUDA_DEVICE_MAX_CONNECTIONS'),
@@ -762,22 +771,28 @@ def teardown_method(self, method):
                 os.environ.pop(key, None)
             else:
                 os.environ[key] = value
-        Utils.destroy_model_parallel()
         destroy_global_vars()
         destroy_num_microbatches_calculator()
+        if self.cuda_graph_helper is not None and self.cuda_graph_helper.graphs_created():
+            self.cuda_graph_helper.delete_cuda_graphs()
+            self.cuda_graph_helper = None
         gc.collect()
 
     def model_provider(
         self,
         pre_process=True,
         post_process=True,
-        layer_spec_fn=get_gpt_layer_with_transformer_engine_spec,
+        layer_spec_fn=get_gpt_decoder_block_spec,
         **config_kwargs,
     ):
-        model_parallel_cuda_manual_seed(123)
         args = get_args()
         config = core_transformer_config_from_args(args)
-        transformer_layer_spec = layer_spec_fn()
+        transformer_layer_spec = layer_spec_fn(
+            config,
+            use_transformer_engine=True,
+            normalization=args.normalization,
+            qk_l2_norm=args.qk_l2_norm,
+        )
         if args.mtp_num_layers:
             mtp_block_spec = get_gpt_mtp_block_spec(
                 config, transformer_layer_spec, use_transformer_engine=True
@@ -810,18 +825,17 @@ def create_test_args(
         args.num_layers = 4
         args.mtp_num_layers = 1
         args.vocab_size = 1024
-        args.hidden_size = 128
+        args.hidden_size = 512
         args.num_attention_heads = 8
         args.max_position_embeddings = 512
-        args.global_batch_size = self.micro_batch_size * 8
+        args.global_batch_size = self.micro_batch_size * 8 // self.tp_size // self.cp_size
         args.micro_batch_size = self.micro_batch_size
         args.create_attention_mask_in_dataloader = True
         args.seq_length = self.seq_length
-        args.tensor_model_parallel_size = 2
-        args.sequence_parallel = True
+        args.tensor_model_parallel_size = self.tp_size
+        args.sequence_parallel = True if self.tp_size > 1 else False
         args.pipeline_model_parallel_size = 1
-        args.context_parallel_size = 1
-        args.expert_model_parallel_size = ep_size
+        args.context_parallel_size = self.cp_size
         args.train_iters = 10
         args.lr = 3e-5
         args.bf16 = True
@@ -836,17 +850,26 @@ def create_test_args(
         # MoE settings
         args.num_experts = 4
         args.expert_model_parallel_size = ep_size
+        args.expert_tensor_parallel_size = 1 if ep_size > 1 else self.tp_size
         args.moe_shared_expert_intermediate_size = 1024
-        args.moe_layer_freq = "[0,0,1,1]"
+        args.moe_layer_freq = [0, 0, 1, 1]
         args.moe_permute_fusion = True
         args.moe_router_fusion = True
         args.moe_router_topk = 2
+        args.moe_router_dtype = "fp32"
 
         # CUDA graph settings
         args.cuda_graph_impl = cuda_graph_impl
         args.cuda_graph_scope = cuda_graph_scope
         args.cuda_graph_warmup_steps = cuda_graph_warmup_steps
-        args.use_te_rng_tracker = cuda_graph_impl != "none"
+
+        # fp8 settings
+        if fp8_available:
+            args.fp8 = "e4m3"
+            args.fp8_recipe = "tensorwise"
+            args.first_last_layers_bf16 = True
+            args.num_layers_at_start_in_bf16 = 1
+            args.num_layers_at_end_in_bf16 = 1
 
         for key, value in kwargs.items():
             assert hasattr(args, key)
@@ -856,15 +879,15 @@ def create_test_args(
         set_global_variables(args, False)
         return args
 
-    def get_batch(self, seq_length, micro_batch_size):
-        data = list(range(seq_length))
+    def get_batch(self, seq_length, micro_batch_size, cp_size):
+        data = list(range(seq_length // cp_size))
         input_ids = torch.tensor(data, dtype=torch.int64).repeat((micro_batch_size, 1)).cuda()
         labels = 1 + torch.tensor(data, dtype=torch.int64).repeat((micro_batch_size, 1)).cuda()
         position_ids = torch.tensor(data, dtype=torch.int64).repeat((micro_batch_size, 1)).cuda()
         attention_mask = torch.ones(
-            (micro_batch_size, 1, seq_length, seq_length), dtype=bool
+            (micro_batch_size, 1, seq_length // cp_size, seq_length), dtype=bool
         ).cuda()
-        loss_mask = torch.ones(seq_length).repeat((micro_batch_size, 1)).cuda()
+        loss_mask = torch.ones(seq_length // cp_size).repeat((micro_batch_size, 1)).cuda()
         return input_ids, labels, position_ids, attention_mask, loss_mask
 
     def _run_test_helper(
@@ -877,12 +900,10 @@ def _run_test_helper(
 
         set_args(args)
         torch.manual_seed(123)
-        Utils.initialize_model_parallel(
-            tensor_model_parallel_size=2, expert_model_parallel_size=ep_size
-        )
+        model_parallel_cuda_manual_seed(123)
 
         input_ids, labels, position_ids, attention_mask, loss_mask = self.get_batch(
-            self.seq_length, self.micro_batch_size
+            self.seq_length, self.micro_batch_size, self.cp_size
         )
 
         gpt_model, optimizer, _ = setup_model_and_optimizer(
@@ -890,13 +911,10 @@ def _run_test_helper(
         )
         assert len(gpt_model) == 1  # Assume only one model in the model provider.
 
-        loss_list = []
-
-        cuda_graph_helper = None
         if cuda_graph_impl == "transformer_engine":
             from megatron.core.transformer.cuda_graphs import TECudaGraphHelper
 
-            cuda_graph_helper = TECudaGraphHelper(
+            self.cuda_graph_helper = TECudaGraphHelper(
                 model=gpt_model,
                 config=gpt_model[0].config,
                 seq_length=self.seq_length,
@@ -904,14 +922,17 @@ def _run_test_helper(
                 optimizers=[optimizer],
             )
 
+        loss_list = []
+
         for i in range(100):
             gpt_model[0].zero_grad_buffer()
             optimizer.zero_grad()
 
             # Capture CUDA graphs after warmup if helper is provided
-            if cuda_graph_helper is not None and i == cuda_graph_warmup_steps:
-                cuda_graph_helper.create_cudagraphs()
+            if self.cuda_graph_helper is not None and i == cuda_graph_warmup_steps:
+                self.cuda_graph_helper.create_cudagraphs()
 
+            gpt_model[0].set_is_first_microbatch()
             output = gpt_model[0].forward(
                 input_ids=input_ids,
                 position_ids=position_ids,
@@ -922,7 +943,7 @@ def _run_test_helper(
 
             # Check output shapes
             assert output.shape[0] == self.micro_batch_size
-            assert output.shape[1] == self.seq_length
+            assert output.shape[1] == self.seq_length // self.cp_size
 
             # Verify gradients
             loss = output.mean()
@@ -936,16 +957,29 @@ def _run_test_helper(
 
             loss_list.append(loss.item())
 
+        if self.cuda_graph_helper is not None and self.cuda_graph_helper.graphs_created():
+            self.cuda_graph_helper.delete_cuda_graphs()
+            self.cuda_graph_helper = None
+
         return torch.tensor(loss_list)
 
     @pytest.mark.skipif(
-        not (HAVE_TE and is_te_min_version("1.14.0")),
-        reason="Partial CUDA graph support requires TransformerEngine version >= 1.14.0",
+        not (HAVE_TE and is_te_min_version("2.10.0")),
+        reason="Partial CUDA graph UT support requires TransformerEngine version >= 2.10.0",
     )
     @pytest.mark.parametrize("ep_size", [1, 4])
     @pytest.mark.parametrize("moe_dropless_dispatcher", [False, True])
     @pytest.mark.parametrize("moe_dispatcher_type", ["alltoall", "deepep", "hybridep"])
     def test_moe_partial_cudagraph(self, ep_size, moe_dropless_dispatcher, moe_dispatcher_type):
+        initialize_rng_tracker(use_te_rng_tracker=True, force_reset=True)
+        Utils.initialize_model_parallel(
+            tensor_model_parallel_size=self.tp_size,
+            context_parallel_size=self.cp_size,
+            pipeline_model_parallel_size=1,
+            expert_tensor_parallel_size=1 if ep_size > 1 else self.tp_size,
+            expert_model_parallel_size=ep_size,
+        )
+
         extra_kwargs = {}
         if moe_dispatcher_type == "deepep":
             if not is_deep_ep_available():
@@ -962,19 +996,28 @@ def test_moe_partial_cudagraph(self, ep_size, moe_dropless_dispatcher, moe_dispa
         if not moe_dropless_dispatcher:
             if moe_dispatcher_type == "deepep":
                 pytest.skip("Deep EP doesn't support drop&pad MoE")
+            if moe_dispatcher_type == "hybridep" and ep_size == 1:
+                pytest.skip("Hybrid EP doesn't support drop&pad MoE with ep_size == 1")
             extra_kwargs["moe_expert_capacity_factor"] = 1.0
             extra_kwargs["moe_pad_expert_input_to_capacity"] = True
 
         loss_list_ref = self._run_test_helper(ep_size, "none", None, 0, **extra_kwargs)
         for cuda_graph_scope in [
             None,
-            ["attn"],
-            ["moe"],
-            ["mlp", "moe_router"],
-            ["attn", "mlp", "moe_router", "moe_preprocess"],
+            [CudaGraphScope.attn],
+            [CudaGraphScope.moe],
+            [CudaGraphScope.mlp, CudaGraphScope.moe_router],
+            [
+                CudaGraphScope.attn,
+                CudaGraphScope.mlp,
+                CudaGraphScope.moe_router,
+                CudaGraphScope.moe_preprocess,
+            ],
         ]:
-            if moe_dropless_dispatcher and (cuda_graph_scope is None or "moe" in cuda_graph_scope):
-                # Dropless MoE doesn't work with "moe" scope cudagraph. Skip.
+            if (moe_dropless_dispatcher or moe_dispatcher_type == "hybridep") and (
+                cuda_graph_scope is None or CudaGraphScope.moe in cuda_graph_scope
+            ):
+                # Dropless MoE or Hybrid EP doesn't work with "moe" scope cudagraph. Skip.
                 continue
             cuda_graph_warmup_steps = 3
             loss_list = self._run_test_helper(
@@ -986,6 +1029,10 @@ def test_moe_partial_cudagraph(self, ep_size, moe_dropless_dispatcher, moe_dispa
             )
             assert torch.equal(loss_list, loss_list_ref)
 
+        if moe_dispatcher_type == "hybridep":
+            reset_hybrid_ep_buffer()
+        Utils.destroy_model_parallel()
+
 
 if __name__ == "__main__":
 

From 14b19b1a9f347cb860064dc40291e9de79d99e4b Mon Sep 17 00:00:00 2001
From: Robin Zhang <robinz@nvidia.com>
Date: Tue, 2 Dec 2025 21:37:05 +0800
Subject: [PATCH 168/334] [Dev] Optimize TE cudagraph input memory (#2391)

Signed-off-by: Robin Zhang <robinz@nvidia.com>
---
 megatron/core/transformer/cuda_graphs.py      | 245 +++++++++++++----
 .../transformer/test_cuda_graphs.py           | 258 +++++++++++++++++-
 2 files changed, 444 insertions(+), 59 deletions(-)

diff --git a/megatron/core/transformer/cuda_graphs.py b/megatron/core/transformer/cuda_graphs.py
index 5b0a0333d9e..f0fb39e6500 100644
--- a/megatron/core/transformer/cuda_graphs.py
+++ b/megatron/core/transformer/cuda_graphs.py
@@ -1485,72 +1485,204 @@ def graphs_created(self):
         """
         return self._graphs_created
 
-    def _get_cuda_graph_input_data(self):
+    def _get_sample_arguments(self, order):
         """
-        Create the CUDA Graph capturing input data.
-        The data is organized per-chunk per-microbatch per-layer.
+        Generate sample arguments and keyword arguments for CUDA Graph capturing with
+        memory-optimized buffer reuse.
+
+        This method creates static input tensors for each (layer, microbatch) pair needed
+        by TE's make_graphed_callables(). It optimizes memory usage by reusing input buffers
+        across non-overlapping forward passes based on the pipeline parallel schedule.
+        This optimization is essential for reducing peak memory during CUDA Graph capturing with
+        many microbatches, as it allows buffers to be reused instead of allocating new ones for
+        later microbatches.
+
+        Memory Optimization Strategy:
+            The 1F1B (one-forward-one-backward) interleaved schedule in pipeline parallelism
+            means that once a microbatch's backward pass completes, its input buffers are no
+            longer needed. This method tracks buffer lifecycle and reuses "consumed" buffers
+            (those whose backward has completed) for new forward passes with matching tensor
+            signatures (shape, dtype, layout).
+
+            Example schedule: [1, 1, 1, 2, 2, 2, -2, 1, -2, 1, -2, 2, -1, 2, -1, -1, -2, -2, -1, -1]
+            - Positive values indicate forward passes (chunk_id = value)
+            - Negative values indicate backward passes (chunk_id = -value)
+            - When processing -2 (backward of chunk 2), its buffers become available for reuse
+            - The next forward with matching signature can reuse those buffers
+
+        Args:
+            order (List[int]): The forward/backward execution order from
+                convert_schedule_table_to_order(). Positive integers represent forward passes
+                (1-indexed chunk ID), negative integers represent backward passes.
+
+        Returns:
+            Tuple[List[Tuple], List[Dict]]: A tuple containing:
+                - sample_args: List of positional argument tuples for each (layer, microbatch).
+                    Length = num_layers * num_microbatches. Elements with the same tensor
+                    signature may share references to reduce memory allocation.
+                - sample_kwargs: List of keyword argument dicts for each (layer, microbatch).
+                    Length = num_layers * num_microbatches. Elements with the same tensor
+                    signature may share references to reduce memory allocation.
+
+        Data Structures:
+            - fwd_sample_queues: Dict[chunk_id, List[Tuple[sample_keys, fwd_idx]]]
+                Queue of forward samples per chunk awaiting their backward pass.
+            - consumed_sample_queue: Dict[sample_keys, List[fwd_idx]]
+                Pool of buffer indices whose backward is complete, keyed by tensor signature.
+            - sample_keys: Tuple of (shape, dtype, layout) for args + (key, shape, dtype, layout)
+                for kwargs, used to match compatible buffers for reuse.
         """
+        assert self.num_model_chunks == max(
+            order
+        ), "num_model_chunks must match the max chunk id in order."
+        assert (
+            get_num_microbatches() == len(order) // self.num_model_chunks // 2
+        ), "num_microbatches must match the number of microbatches in order."
+
+        # Generate sample arguments and keyword arguments for capturing.
+        sample_args = [None] * (len(self.flattened_callables) * get_num_microbatches())
+        sample_kwargs = [None] * (len(self.flattened_callables) * get_num_microbatches())
 
         rotary_pos_emb_cache = {}
 
-        def get_rotary_pos_emb(transformer_module, transformer_input):
-            if (
-                transformer_module.position_embedding_type == 'rope'
-                and not self.config.multi_latent_attention
-            ):
-                rotary_seq_len = transformer_module.rotary_pos_emb.get_rotary_seq_len(
-                    None, transformer_module.decoder, transformer_input, self.config, None
-                )
-                if rotary_seq_len not in rotary_pos_emb_cache:
-                    rotary_pos_emb_cache[rotary_seq_len] = transformer_module.rotary_pos_emb(
-                        rotary_seq_len
+        def _get_layer_static_inputs(layer, chunk_of_the_layer):
+            """
+            Get the static inputs for a layer.
+            """
+            assert layer in chunk_of_the_layer.decoder.layers or any(
+                layer is mtp_layer.transformer_layer for mtp_layer in chunk_of_the_layer.mtp.layers
+            ), "Layer is not in the chunk"
+
+            def get_rotary_pos_emb(transformer_module, transformer_input):
+                if (
+                    transformer_module.position_embedding_type == 'rope'
+                    and not self.config.multi_latent_attention
+                ):
+                    rotary_seq_len = transformer_module.rotary_pos_emb.get_rotary_seq_len(
+                        None, transformer_module.decoder, transformer_input, self.config, None
                     )
-                return rotary_pos_emb_cache[rotary_seq_len]
-            else:
-                return None
+                    if rotary_seq_len not in rotary_pos_emb_cache:
+                        rotary_pos_emb_cache[rotary_seq_len] = transformer_module.rotary_pos_emb(
+                            rotary_seq_len
+                        )
+                    return rotary_pos_emb_cache[rotary_seq_len]
+                else:
+                    return None
 
-        # Generate sample arguments and keyword arguments for capturing.
-        sample_args = []
-        sample_kwargs = []
-        for chunk_number, chunk_with_decoder in enumerate(self.chunks_with_decoder):
-            if chunk_with_decoder is None:
-                continue
-            layers = self.callables_per_chunk[chunk_number]
-            for _ in range(get_num_microbatches()):
-                for layer in layers:
-                    static_inputs = layer.get_layer_static_inputs(
-                        self.seq_length, self.micro_batch_size
-                    )
+            static_inputs = layer.get_layer_static_inputs(self.seq_length, self.micro_batch_size)
 
-                    from megatron.core.transformer.identity_op import IdentityOp
-                    from megatron.core.transformer.transformer_layer import TransformerLayer
+            from megatron.core.transformer.identity_op import IdentityOp
+            from megatron.core.transformer.transformer_layer import TransformerLayer
 
-                    contains_self_attn = (
-                        isinstance(layer, TransformerLayer)
-                        and not isinstance(layer.self_attention, IdentityOp)
-                        and (
-                            not self.config.cuda_graph_scope
-                            or CudaGraphScope.attn in self.config.cuda_graph_scope
-                        )
-                    )
-                    if is_te_min_version("1.10.0"):
-                        # te.make_graphed_callables() accepts keyword arguments since 1.10.0.
-                        hidden_states = static_inputs.pop("hidden_states")
-                        sample_args.append((hidden_states,))
-                        if contains_self_attn:
-                            rotary_pos_emb = get_rotary_pos_emb(chunk_with_decoder, hidden_states)
-                            if rotary_pos_emb is not None:
-                                static_inputs["rotary_pos_emb"] = rotary_pos_emb
-                        sample_kwargs.append(static_inputs)
-                    elif contains_self_attn:
-                        sample_args.append(
-                            (
-                                static_inputs.pop("hidden_states"),
-                                static_inputs.pop("attention_mask"),
+            contains_self_attn = (
+                isinstance(layer, TransformerLayer)
+                and not isinstance(layer.self_attention, IdentityOp)
+                and (
+                    not self.config.cuda_graph_scope
+                    or CudaGraphScope.attn in self.config.cuda_graph_scope
+                )
+            )
+
+            _sample_kwargs = {}
+            if is_te_min_version("1.10.0"):
+                # te.make_graphed_callables() accepts keyword arguments since 1.10.0.
+                hidden_states = static_inputs.pop("hidden_states")
+                _sample_args = (hidden_states,)
+                if contains_self_attn:
+                    rotary_pos_emb = get_rotary_pos_emb(chunk_of_the_layer, hidden_states)
+                    if rotary_pos_emb is not None:
+                        static_inputs["rotary_pos_emb"] = rotary_pos_emb
+                _sample_kwargs = static_inputs
+            elif contains_self_attn:
+                _sample_args = (
+                    static_inputs.pop("hidden_states"),
+                    static_inputs.pop("attention_mask"),
+                )
+            else:
+                _sample_args = (static_inputs.pop("hidden_states"),)
+            return _sample_args, _sample_kwargs
+
+        # Calculate the starting index of each chunk in callables for future use.
+        prefix_num_layers = [0]
+        for model_chunk_idx in range(self.num_model_chunks):
+            num_layers = self.num_layers_per_chunk[model_chunk_idx]
+            prefix_num_layers.append(prefix_num_layers[-1] + num_layers)
+
+        # Reorganize args and kwargs for input tensor reuse.
+        # fwd_sample_queues is keyed by model chunk index. The value is a queue of tuples.
+        # Each tuple contains the sample key signature and its fwd_idx. When we finish a backward
+        # chunk, we pop the corresponding fwd_idx and push to the consumed_sample_queue.
+        # consumed_sample_queue is keyed by the sample key signature. The value is a queue of the
+        # fwd_idx whose backward has been called so that we can reuse the same static buffers.
+        # In this way, we can reuse the same static input buffers for the non-overlapping samples
+        # with the same input signature.
+        fwd_sample_queues = {}
+        consumed_sample_queue = {}
+        fwd_idx = [0] * self.num_model_chunks
+        for chunk_id in order:
+            model_chunk_idx = abs(chunk_id) - 1
+
+            if chunk_id > 0:
+                sample_start_idx = (prefix_num_layers[model_chunk_idx] * get_num_microbatches()) + (
+                    fwd_idx[model_chunk_idx] * self.num_layers_per_chunk[model_chunk_idx]
+                )
+                fwd_sample_idx = [
+                    sample_start_idx + i for i in range(self.num_layers_per_chunk[model_chunk_idx])
+                ]
+                if model_chunk_idx not in fwd_sample_queues:
+                    fwd_sample_queues[model_chunk_idx] = []
+                for per_callable_fwd_idx in fwd_sample_idx:
+                    if sample_args[per_callable_fwd_idx] is None:
+                        sample_args[per_callable_fwd_idx], sample_kwargs[per_callable_fwd_idx] = (
+                            _get_layer_static_inputs(
+                                self.callables_per_chunk[model_chunk_idx][
+                                    per_callable_fwd_idx - sample_start_idx
+                                ],
+                                self.chunks_with_decoder[model_chunk_idx],
                             )
                         )
-                    else:
-                        sample_args.append((static_inputs.pop("hidden_states"),))
+
+                    sample_args_keys = tuple(
+                        (t.shape, t.dtype, t.layout) for t in sample_args[per_callable_fwd_idx]
+                    )
+                    sample_kwargs_keys = tuple(
+                        (k, v.shape, v.dtype, v.layout)
+                        for k, v in sorted(sample_kwargs[per_callable_fwd_idx].items())
+                    )
+                    sample_keys = sample_args_keys + sample_kwargs_keys
+
+                    fwd_sample_queues[model_chunk_idx].append((sample_keys, per_callable_fwd_idx))
+                    if consumed_sample_queue.get(sample_keys, []):
+                        reuse_fwd_idx = consumed_sample_queue[sample_keys].pop(0)
+                        assert (
+                            sample_args[reuse_fwd_idx] is not None
+                            and sample_kwargs[reuse_fwd_idx] is not None
+                        ), "sample_args and sample_kwargs must not be None when reusing."
+                        sample_args[per_callable_fwd_idx] = sample_args[reuse_fwd_idx]
+                        sample_kwargs[per_callable_fwd_idx] = sample_kwargs[reuse_fwd_idx]
+                fwd_idx[model_chunk_idx] += 1
+            else:
+                num_consumed_samples = min(
+                    len(fwd_sample_queues[model_chunk_idx]),
+                    self.num_layers_per_chunk[model_chunk_idx],
+                )
+                for sample_keys, per_callable_fwd_idx in fwd_sample_queues[model_chunk_idx][
+                    :num_consumed_samples
+                ]:
+                    if sample_keys not in consumed_sample_queue:
+                        consumed_sample_queue[sample_keys] = []
+                    consumed_sample_queue[sample_keys].append(per_callable_fwd_idx)
+                fwd_sample_queues[model_chunk_idx] = fwd_sample_queues[model_chunk_idx][
+                    num_consumed_samples:
+                ]
+
+        return sample_args, sample_kwargs
+
+    def _get_cuda_graph_input_data(self):
+        """
+        Create the CUDA Graph capturing input data.
+        The data is organized per-chunk per-microbatch per-layer.
+        """
 
         # Get the PP and VPP scheduling order.
         from megatron.core.pipeline_parallel.schedules import (
@@ -1581,6 +1713,9 @@ def get_rotary_pos_emb(transformer_module, transformer_input):
             msg=f'Rank {torch.distributed.get_rank()}: ORDER {order}',
         )
 
+        # Generate sample arguments and keyword arguments for capturing.
+        sample_args, sample_kwargs = self._get_sample_arguments(order)
+
         def get_make_graphed_callables_kwargs():
             kwargs = {'num_warmup_iters': 11, 'allow_unused_input': True, '_order': order}
 
diff --git a/tests/unit_tests/transformer/test_cuda_graphs.py b/tests/unit_tests/transformer/test_cuda_graphs.py
index cee75171560..0eac7c28c6d 100644
--- a/tests/unit_tests/transformer/test_cuda_graphs.py
+++ b/tests/unit_tests/transformer/test_cuda_graphs.py
@@ -33,7 +33,10 @@
 )
 from megatron.core.models.gpt.gpt_model import GPTModel
 from megatron.core.models.mamba.mamba_layer_specs import mamba_stack_spec
-from megatron.core.num_microbatches_calculator import destroy_num_microbatches_calculator
+from megatron.core.num_microbatches_calculator import (
+    destroy_num_microbatches_calculator,
+    init_num_microbatches_calculator,
+)
 from megatron.core.pipeline_parallel.schedules import set_current_microbatch
 from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.ssm.mamba_block import MambaStack
@@ -42,7 +45,11 @@
     initialize_rng_tracker,
     model_parallel_cuda_manual_seed,
 )
-from megatron.core.transformer.cuda_graphs import CudaGraphManager, _CudagraphGlobalRecord
+from megatron.core.transformer.cuda_graphs import (
+    CudaGraphManager,
+    TECudaGraphHelper,
+    _CudagraphGlobalRecord,
+)
 from megatron.core.transformer.enums import CudaGraphScope
 from megatron.core.transformer.moe.fused_a2a import reset_hybrid_ep_buffer
 from megatron.core.transformer.transformer_block import TransformerBlock
@@ -735,6 +742,251 @@ def test_capture_freeze_gc(self):
         )
 
 
+# Global storage for comparing unique buffer counts across different num_microbatches
+_unique_buffer_counts = None
+
+
+class TestTECudaGraphHelper:
+    def setup_method(self, method):
+        # Initialize parallel state
+        initialize_rng_tracker(use_te_rng_tracker=True, force_reset=True)
+        Utils.initialize_model_parallel(
+            tensor_model_parallel_size=1, pipeline_model_parallel_size=1
+        )
+        model_parallel_cuda_manual_seed(123)
+
+    def teardown_method(self, method):
+        Utils.destroy_model_parallel()
+        destroy_global_vars()
+        destroy_num_microbatches_calculator()
+        # Note: _unique_buffer_counts is intentionally NOT cleared here so we can
+        # compare values across parametrized test runs
+
+    @pytest.mark.parametrize("num_microbatches", [4, 16, 64, 256])
+    def test_get_cuda_graph_input_data(self, num_microbatches):
+        """Test _get_cuda_graph_input_data function in TECudaGraphHelper."""
+
+        # Set up test configuration
+        seq_length = 128
+        micro_batch_size = 2
+        num_layers = 4
+        vocab_size = 1024
+        hidden_size = 64
+        num_attention_heads = 4
+
+        # Initialize num_microbatches calculator
+        init_num_microbatches_calculator(
+            rank=0,
+            rampup_batch_size=None,
+            global_batch_size=micro_batch_size * num_microbatches,
+            micro_batch_size=micro_batch_size,
+            data_parallel_size=1,
+            decrease_batch_size_if_needed=False,
+        )
+
+        # Create transformer config directly
+        transformer_config = TransformerConfig(
+            num_layers=num_layers,
+            hidden_size=hidden_size,
+            num_attention_heads=num_attention_heads,
+            use_cpu_initialization=True,
+            cuda_graph_impl="transformer_engine",
+            use_te_rng_tracker=True,
+            bf16=True,
+            tensor_model_parallel_size=1,
+            pipeline_model_parallel_size=1,
+            context_parallel_size=1,
+        )
+
+        # Create model
+        torch.manual_seed(123)
+        model_parallel_cuda_manual_seed(123)
+
+        gpt_model = GPTModel(
+            config=transformer_config,
+            transformer_layer_spec=get_gpt_layer_with_transformer_engine_spec(),
+            vocab_size=vocab_size,
+            max_sequence_length=seq_length,
+            parallel_output=True,
+            position_embedding_type="rope",
+        )
+
+        # Move model to CUDA
+        gpt_model.cuda()
+
+        # Initialize TECudaGraphHelper
+        cuda_graph_helper = TECudaGraphHelper(
+            model=[gpt_model],
+            config=transformer_config,
+            seq_length=seq_length,
+            micro_batch_size=micro_batch_size,
+            optimizers=[],
+        )
+
+        # Call _get_cuda_graph_input_data (which internally calls _get_sample_arguments)
+        sample_args, make_graphed_callables_kwargs = cuda_graph_helper._get_cuda_graph_input_data()
+
+        # Extract sample_kwargs from the kwargs dict
+        # For TE >= 1.10.0, sample_kwargs should always be present
+        assert (
+            'sample_kwargs' in make_graphed_callables_kwargs
+        ), "sample_kwargs should be present in make_graphed_callables_kwargs for TE >= 1.10.0"
+        sample_kwargs = make_graphed_callables_kwargs['sample_kwargs']
+
+        # Basic checks
+        num_graphable_layers = len(cuda_graph_helper.flattened_callables)
+        expected_length = num_graphable_layers * num_microbatches
+        assert len(sample_args) == expected_length, (
+            f"sample_args length mismatch: expected {expected_length}, " f"got {len(sample_args)}"
+        )
+        assert len(sample_kwargs) == expected_length, (
+            f"sample_kwargs length mismatch: expected {expected_length}, "
+            f"got {len(sample_kwargs)}"
+        )
+
+        # Check that all elements are not None
+        for i, (args_item, kwargs_item) in enumerate(zip(sample_args, sample_kwargs)):
+            assert args_item is not None, f"sample_args[{i}] is None"
+            assert kwargs_item is not None, f"sample_kwargs[{i}] is None"
+            assert isinstance(args_item, tuple), f"sample_args[{i}] should be a tuple"
+            assert isinstance(kwargs_item, dict), f"sample_kwargs[{i}] should be a dict"
+            assert len(args_item) > 0, f"sample_args[{i}] should not be empty"
+            # Check that hidden_states is present
+            assert "hidden_states" in kwargs_item or (
+                len(args_item) > 0 and torch.is_tensor(args_item[0])
+            ), f"sample_args[{i}] or sample_kwargs[{i}] should contain hidden_states"
+
+        # Check tensor properties
+        for i, (args_item, kwargs_item) in enumerate(zip(sample_args, sample_kwargs)):
+            # Get hidden_states from args or kwargs
+            if len(args_item) > 0 and torch.is_tensor(args_item[0]):
+                hidden_states = args_item[0]
+            elif "hidden_states" in kwargs_item:
+                hidden_states = kwargs_item["hidden_states"]
+            else:
+                continue
+
+            assert torch.is_tensor(hidden_states), f"hidden_states at index {i} should be a tensor"
+            # Check shape matches expected (accounting for TP/CP)
+            expected_seq_len = seq_length // transformer_config.context_parallel_size
+            if transformer_config.sequence_parallel:
+                expected_seq_len = expected_seq_len // transformer_config.tensor_model_parallel_size
+            assert hidden_states.shape[0] == expected_seq_len, (
+                f"hidden_states seq_len mismatch at index {i}: "
+                f"expected {expected_seq_len}, got {hidden_states.shape[0]}"
+            )
+            assert hidden_states.shape[1] == micro_batch_size, (
+                f"hidden_states batch_size mismatch at index {i}: "
+                f"expected {micro_batch_size}, got {hidden_states.shape[1]}"
+            )
+            assert hidden_states.shape[2] == transformer_config.hidden_size, (
+                f"hidden_states hidden_size mismatch at index {i}: "
+                f"expected {transformer_config.hidden_size}, got {hidden_states.shape[2]}"
+            )
+
+        # Memory optimization check: verify that buffers with same signature are reused
+        # Create a mapping of sample_keys to indices
+        sample_keys_to_indices = {}
+        for idx, (args_item, kwargs_item) in enumerate(zip(sample_args, sample_kwargs)):
+            # Create sample_keys similar to the function
+            args_keys = tuple((t.shape, t.dtype, t.layout) for t in args_item if torch.is_tensor(t))
+            kwargs_keys = tuple(
+                (k, v.shape, v.dtype, v.layout)
+                for k, v in sorted(kwargs_item.items())
+                if torch.is_tensor(v)
+            )
+            sample_keys = args_keys + kwargs_keys
+
+            if sample_keys not in sample_keys_to_indices:
+                sample_keys_to_indices[sample_keys] = []
+            sample_keys_to_indices[sample_keys].append(idx)
+
+        # Check that buffers with same signature share references (memory optimization)
+        # The optimization reuses buffers when:
+        # 1. They have the same signature (shape, dtype, layout)
+        # 2. The backward pass of the original buffer has completed
+        # 3. A new forward pass with matching signature needs a buffer
+        # Count how many times each tensor is reused
+        unique_tensors = set()
+        tensor_reuse_count = {}
+        for idx, (args_item, kwargs_item) in enumerate(zip(sample_args, sample_kwargs)):
+            # Get the first tensor from args (hidden_states)
+            if len(args_item) > 0 and torch.is_tensor(args_item[0]):
+                tensor_ptr = args_item[0].data_ptr()
+                unique_tensors.add(tensor_ptr)
+                tensor_reuse_count[tensor_ptr] = tensor_reuse_count.get(tensor_ptr, 0) + 1
+
+        # With memory optimization, we should see some buffers reused
+        # (i.e., some tensors should appear multiple times)
+        max_reuse = max(tensor_reuse_count.values()) if tensor_reuse_count else 0
+        total_entries = len(sample_args)
+        unique_buffer_count = len(unique_tensors)
+
+        # Verify that memory optimization is working:
+        # - The number of unique buffers should be <= total entries
+        # - With the 1F1B schedule and multiple microbatches, we should see some buffer reuse
+        # - The number of unique buffers should be bounded as num_microbatches grows.
+        assert unique_buffer_count <= total_entries, (
+            f"Memory optimization check: unique_buffer_count ({unique_buffer_count}) "
+            f"should be <= total_entries ({total_entries})"
+        )
+        global _unique_buffer_counts
+        if _unique_buffer_counts is None:
+            _unique_buffer_counts = unique_buffer_count
+        else:
+            assert unique_buffer_count == _unique_buffer_counts, (
+                f"Unique buffer count mismatch: expected {_unique_buffer_counts}, "
+                f"got {unique_buffer_count}"
+            )
+
+        # Verify that buffers with the same signature can potentially be reused
+        # (the actual reuse depends on the schedule, but the mechanism should work)
+        if num_microbatches > 1 and num_graphable_layers > 0:
+            # Check that we have multiple entries with the same signature
+            has_duplicate_signatures = any(
+                len(indices) > 1 for indices in sample_keys_to_indices.values()
+            )
+            assert has_duplicate_signatures, (
+                "Memory optimization: expected duplicate signatures for buffer reuse, "
+                "but all signatures are unique"
+            )
+
+            # If we have duplicate signatures and the schedule allows it,
+            # some buffers should be reused (max_reuse > 1)
+            # Note: The exact amount of reuse depends on the schedule order
+            # With 1F1B interleaved schedule, we should see some reuse
+            if max_reuse > 1:
+                # Verify that reused buffers have the same signature
+                reused_tensors = [ptr for ptr, count in tensor_reuse_count.items() if count > 1]
+                assert len(reused_tensors) > 0, "Expected some reused tensors"
+
+        # Verify that make_graphed_callables_kwargs contains expected keys
+        assert (
+            '_order' in make_graphed_callables_kwargs
+        ), "make_graphed_callables_kwargs should contain '_order'"
+        assert (
+            'num_warmup_iters' in make_graphed_callables_kwargs
+        ), "make_graphed_callables_kwargs should contain 'num_warmup_iters'"
+        assert (
+            'allow_unused_input' in make_graphed_callables_kwargs
+        ), "make_graphed_callables_kwargs should contain 'allow_unused_input'"
+
+        # Verify the order in kwargs matches expectations
+        order = make_graphed_callables_kwargs['_order']
+        num_model_chunks = cuda_graph_helper.num_model_chunks
+        expected_order_length = num_microbatches * num_model_chunks * 2
+        assert (
+            len(order) == expected_order_length
+        ), f"Order length mismatch: expected {expected_order_length}, got {len(order)}"
+
+        # Verify that all forward passes in order have corresponding entries in sample_args
+        forward_count = sum(1 for chunk_id in order if chunk_id > 0)
+        assert forward_count == num_microbatches * num_model_chunks, (
+            f"Forward count mismatch: expected {num_microbatches * num_model_chunks}, "
+            f"got {forward_count}"
+        )
+
+
 def is_deep_ep_available():
     from megatron.core.transformer.moe.fused_a2a import HAVE_DEEP_EP
 
@@ -912,8 +1164,6 @@ def _run_test_helper(
         assert len(gpt_model) == 1  # Assume only one model in the model provider.
 
         if cuda_graph_impl == "transformer_engine":
-            from megatron.core.transformer.cuda_graphs import TECudaGraphHelper
-
             self.cuda_graph_helper = TECudaGraphHelper(
                 model=gpt_model,
                 config=gpt_model[0].config,

From b0f5746735a965e67852d936a8fd0ef8928e9a81 Mon Sep 17 00:00:00 2001
From: Lifu Zhang <lifuz@nvidia.com>
Date: Tue, 2 Dec 2025 06:14:02 -0800
Subject: [PATCH 169/334] Fix HSDP Registering Device Mesh (#2388)

Signed-off-by: Lifu Zhang <lifuz@login-lyris01.lyris.clusters.nvidia.com>
Co-authored-by: Lifu Zhang <lifuz@login-lyris01.lyris.clusters.nvidia.com>
Co-authored-by: Zijie Yan <zijiey@nvidia.com>
---
 megatron/core/distributed/fsdp/src/megatron_fsdp/utils.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/megatron/core/distributed/fsdp/src/megatron_fsdp/utils.py b/megatron/core/distributed/fsdp/src/megatron_fsdp/utils.py
index b94a332bb0d..2b8eccb69d3 100644
--- a/megatron/core/distributed/fsdp/src/megatron_fsdp/utils.py
+++ b/megatron/core/distributed/fsdp/src/megatron_fsdp/utils.py
@@ -772,6 +772,8 @@ def register_submesh(device_mesh, submesh, is_expert_parallel):
 
         # Register EP submeshes
         if self.expt_device_mesh is not None:
+            register_submesh(self.device_mesh, hsdp_submesh, True)
+            register_submesh(self.device_mesh, hsdp_tp_submesh, True)
             register_submesh(self.expt_device_mesh, tp_submesh, True)
             register_submesh(self.expt_device_mesh, fsdp_tp_submesh, True)
             register_submesh(self.expt_device_mesh, fsdp_submesh, True)

From 5375ad418ba3362d720badfa7f495b34ba49b962 Mon Sep 17 00:00:00 2001
From: Pablo Garay <pagaray@nvidia.com>
Date: Tue, 2 Dec 2025 10:31:32 -0800
Subject: [PATCH 170/334] fix: update baseline (#2468)

Signed-off-by: Pablo Garay <pagaray@nvidia.com>
---
 .../workflows/check_api_backwards_compatibility_workflow.yml    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/check_api_backwards_compatibility_workflow.yml b/.github/workflows/check_api_backwards_compatibility_workflow.yml
index 5f6adec4c91..c8f247b8439 100644
--- a/.github/workflows/check_api_backwards_compatibility_workflow.yml
+++ b/.github/workflows/check_api_backwards_compatibility_workflow.yml
@@ -66,7 +66,7 @@ jobs:
       # Default baseline for automatic PR checks
       # Can be: branch name (e.g., 'main'), commit hash, or tag
       # Will be resolved to commit hash during execution
-      DEFAULT_BASELINE: 'c6f277a7f869274c19aace594582d9938b06abac'
+      DEFAULT_BASELINE: 'b0f5746735a965e67852d936a8fd0ef8928e9a81'
       # Tag pattern for auto-detection (e.g., 'core_r*', 'core_v*')
       TAG_PATTERN: 'core_v*'
       # Tag regex filter (e.g., '^core_v[0-9]+\.[0-9]+\.[0-9]+$' for stable versions only)

From 79660b7bedd8ab18f36a712ed4c3de3d3fbc4e6a Mon Sep 17 00:00:00 2001
From: Pablo Garay <pagaray@nvidia.com>
Date: Tue, 2 Dec 2025 11:37:29 -0800
Subject: [PATCH 171/334] fix: Add merge_group support with pre-flight pattern
 (#2469)

Signed-off-by: Pablo Garay <pagaray@nvidia.com>
---
 ...k_api_backwards_compatibility_workflow.yml | 45 ++++++++++++++-----
 1 file changed, 34 insertions(+), 11 deletions(-)

diff --git a/.github/workflows/check_api_backwards_compatibility_workflow.yml b/.github/workflows/check_api_backwards_compatibility_workflow.yml
index c8f247b8439..707d5f76316 100644
--- a/.github/workflows/check_api_backwards_compatibility_workflow.yml
+++ b/.github/workflows/check_api_backwards_compatibility_workflow.yml
@@ -3,7 +3,12 @@ name: API Compatibility Check
 on:
   push:
     branches:
-      - "pull-request/[0-9]+"
+      - dev
+      - main
+      - 'pull-request/[0-9]+'
+      - 'deploy-release/*'
+  merge_group:
+    types: [checks_requested]
 
   # Allow manual trigger
   workflow_dispatch:
@@ -33,17 +38,35 @@ jobs:
             echo "Manual trigger - will run compatibility check"
             exit 0
           fi
-          
-          # Check if any relevant files changed
-          # Use merge-base to find common ancestor with dev
-          # This ensures we only detect changes actually made in this PR branch,
-          # not changes that happened in dev after the branch was created
-          BASE_SHA=$(git merge-base origin/dev HEAD)
-          echo "Comparing against merge-base: $BASE_SHA"
-          
+
+          # Determine base SHA based on event type
+          if [ "${{ github.event_name }}" == "merge_group" ]; then
+            BASE_SHA="${{ github.event.merge_group.base_sha }}"
+            echo "Merge group event - comparing against base: $BASE_SHA"
+          else
+            # For push events, use merge-base to find common ancestor
+            # This ensures we only detect changes actually made in this PR branch,
+            # not changes that happened in dev after the branch was created
+            BASE_SHA=$(git merge-base origin/dev HEAD 2>/dev/null || echo "")
+            if [ -z "$BASE_SHA" ]; then
+              # Fallback for branches targeting main
+              BASE_SHA=$(git merge-base origin/main HEAD 2>/dev/null || echo "")
+            fi
+            echo "Push event - comparing against merge-base: $BASE_SHA"
+          fi
+
+          if [ -z "$BASE_SHA" ]; then
+            echo "Could not determine base SHA - will run compatibility check"
+            echo "should_skip=false" >> $GITHUB_OUTPUT
+            exit 0
+          fi
+
           # Check for changes in megatron/core Python files (excluding tests and legacy)
-          CHANGED_FILES=$(git diff --name-only "$BASE_SHA" HEAD -- 'megatron/core/**/*.py' ':!megatron/core/tests/**' ':!megatron/legacy/**' || echo "")
-          
+          CHANGED_FILES=$(git diff --name-only "$BASE_SHA" HEAD -- \
+            'megatron/core/**/*.py' \
+            ':!megatron/core/tests/**' \
+            ':!megatron/legacy/**' 2>/dev/null || echo "")
+
           if [ -z "$CHANGED_FILES" ]; then
             echo "should_skip=true" >> $GITHUB_OUTPUT
             echo "No relevant megatron/core files changed - will skip compatibility check"

From d72b218d45e0ef7964331f06498b688f6dcf5227 Mon Sep 17 00:00:00 2001
From: Lifu Zhang <lifuz@nvidia.com>
Date: Wed, 3 Dec 2025 00:44:55 -0800
Subject: [PATCH 172/334] DeepSeek V3 FSDP Fix for Precision-Aware Optimizer
 (#2204)

Signed-off-by: Lifu Zhang <lifuz@login-lyris01.lyris.clusters.nvidia.com>
Co-authored-by: Lifu Zhang <lifuz@login-lyris01.lyris.clusters.nvidia.com>
Co-authored-by: Jianbin Chang <shjwudp@gmail.com>
---
 .../fsdp/src/megatron_fsdp/param_and_grad_buffer.py          | 5 +++--
 megatron/training/training.py                                | 2 --
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py b/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py
index 6a294b69602..88254d89988 100644
--- a/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py
+++ b/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py
@@ -2474,8 +2474,9 @@ def update_main_grads(self):
                 item_id, only_shard=sharded_optimizer_state
             )
             if group.main_weight_buffer is not None:
-                # Convert the gradient to the main weight buffer dtype.
-                optimizer_grad = optimizer_grad.to(param.dtype)
+                if not getattr(self, "use_precision_aware_optimizer", False):
+                    # Convert the gradient to the main weight buffer dtype.
+                    optimizer_grad = optimizer_grad.to(param.dtype)
 
             if name not in self.dist_main_grad:
                 # Register the gradient as a distributed tensor.
diff --git a/megatron/training/training.py b/megatron/training/training.py
index e88b9839d28..d47a8abd20e 100644
--- a/megatron/training/training.py
+++ b/megatron/training/training.py
@@ -1086,8 +1086,6 @@ def build_model():
             kwargs['pad_buckets_for_high_nccl_busbw'] = args.ddp_pad_buckets_for_high_nccl_busbw
             kwargs['reduce_scatter_with_fp32_accumulation'] = args.ddp_reduce_scatter_with_fp32_accumulation
             kwargs['average_in_collective'] = args.ddp_average_in_collective
-            if args.use_megatron_fsdp and args.use_precision_aware_optimizer:
-                kwargs["preserve_fp32_weights"] = False
             ddp_config = DistributedDataParallelConfig(**kwargs)
 
             # In the Megatron FSDP and DDP use path, we need to initialize the bucket size.

From 436065a86b749ca3b50eebca68f55c9e690a9f63 Mon Sep 17 00:00:00 2001
From: Hongbin Liu <lhb8125@users.noreply.github.com>
Date: Wed, 3 Dec 2025 21:31:57 +0800
Subject: [PATCH 173/334] [Dev] fix(moe): minor refactor for fine-grained
 activation offloading (#2285)

Signed-off-by: Hongbin Liu <hongbinl@nvidia.com>
Co-authored-by: Zijie Yan <zijiey@nvidia.com>
---
 .../core/extensions/transformer_engine.py     | 10 +++-
 .../fine_grained_activation_offload.py        | 48 +++----------------
 megatron/core/pipeline_parallel/utils.py      | 33 +++++++++++++
 3 files changed, 48 insertions(+), 43 deletions(-)

diff --git a/megatron/core/extensions/transformer_engine.py b/megatron/core/extensions/transformer_engine.py
index 85732c0f7ea..9da6e85d8e9 100644
--- a/megatron/core/extensions/transformer_engine.py
+++ b/megatron/core/extensions/transformer_engine.py
@@ -2187,8 +2187,14 @@ def set_save_original_input(module):
 
 try:
     # pylint: disable=unused-import
-    from transformer_engine.pytorch import cpu_offload
+    from transformer_engine.pytorch import cpu_offload_v1 as cpu_offload
+except ImportError:
+    try:
+        from transformer_engine.pytorch import cpu_offload
+    except ImportError:
+        cpu_offload = None
+try:
+    # pylint: disable=unused-import
     from transformer_engine.pytorch.float8_tensor import Float8Tensor
 except ImportError:
     Float8Tensor = None
-    cpu_offload = None
diff --git a/megatron/core/pipeline_parallel/fine_grained_activation_offload.py b/megatron/core/pipeline_parallel/fine_grained_activation_offload.py
index 1e280a09d35..138dcd8f7b1 100644
--- a/megatron/core/pipeline_parallel/fine_grained_activation_offload.py
+++ b/megatron/core/pipeline_parallel/fine_grained_activation_offload.py
@@ -1,12 +1,13 @@
 # Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
-import warnings
 from collections import deque
 from contextlib import nullcontext
 from typing import Any
 
 import torch
 
+from megatron.core.pipeline_parallel.utils import set_ideal_affinity_for_current_gpu
+
 # CPU offload implementation for pipeline parallelism
 DEBUG = False
 DEBUG_RANK = 0
@@ -22,39 +23,6 @@ def debug_rank(message):
         print(message)
 
 
-def set_ideal_affinity_for_current_gpu():
-    """Set CPU affinity for the current GPU to optimize host-device transfers."""
-    import uuid
-
-    try:
-        import cuda.bindings.driver as cuda_driver
-        import cuda.bindings.runtime as cuda_runtime
-    except ImportError:
-        try:
-            import cuda.cuda as cuda_driver
-            import cuda.cudart as cuda_runtime
-        except ImportError:
-            # print("cuda-python may not be installed, skipping GPU affinity setting")
-            warnings.warn("cuda-python may not be installed, skipping GPU affinity setting")
-            return
-    try:
-        import pynvml
-    except ImportError:
-        warnings.warn("pynvml is not installed, skipping GPU affinity setting")
-        return
-
-    # Get current CUDA device ID
-    err, device_id = cuda_runtime.cudaGetDevice()
-    assert err == cuda_runtime.cudaError_t.cudaSuccess
-    # Get device UUID
-    err, device_uuid = cuda_driver.cuDeviceGetUuid(device_id)
-    assert err == cuda_driver.CUresult.CUDA_SUCCESS
-    # Set CPU affinity based on GPU's NUMA node
-    pynvml.nvmlInit()
-    handle = pynvml.nvmlDeviceGetHandleByUUID("GPU-" + str(uuid.UUID(bytes=device_uuid.bytes)))
-    pynvml.nvmlDeviceSetCpuAffinity(handle)
-
-
 class PipelineOffloadManager:
     """
     Singleton manager for coordinating activation offloading across pipeline stages.
@@ -200,6 +168,8 @@ def __enter__(self):
 
         if cpu_offload is not None:
             cpu_offload.CPUOffloadEnabled = True
+        else:
+            raise RuntimeError("TE CPU offload is not available")
         self.inside_context = True
 
         torch._C._autograd._push_saved_tensors_default_hooks(
@@ -213,6 +183,8 @@ def __exit__(self, *args: Any):
 
         if cpu_offload is not None:
             cpu_offload.CPUOffloadEnabled = False
+        else:
+            raise RuntimeError("TE CPU offload is not available")
         self.inside_context = False
         torch._C._autograd._pop_saved_tensors_default_hooks()
 
@@ -244,24 +216,18 @@ class ChunkOffloadHandler:
     def offload(src_tensor, pin_memory=True):
         """Offload."""
         debug_rank("--------offload")
-        from megatron.core.extensions.transformer_engine import Float8Tensor
-
-        fp8_offload = isinstance(src_tensor, Float8Tensor) if Float8Tensor is not None else False
 
         if not src_tensor.is_contiguous():
             src_tensor = src_tensor.contiguous()
 
         cpu_backup = torch.empty(
             src_tensor.size(),
-            dtype=torch.uint8 if fp8_offload else src_tensor.dtype,
+            dtype=src_tensor.dtype,
             layout=src_tensor.layout,
             device="cpu",
             pin_memory=pin_memory,
         )
 
-        if fp8_offload:
-            cpu_backup = Float8Tensor.make_like(src_tensor, data=cpu_backup)
-
         cpu_backup.copy_(src_tensor, non_blocking=pin_memory)
         state = (src_tensor.device, cpu_backup)
         return state
diff --git a/megatron/core/pipeline_parallel/utils.py b/megatron/core/pipeline_parallel/utils.py
index fae8e5466da..c50c6ac7964 100644
--- a/megatron/core/pipeline_parallel/utils.py
+++ b/megatron/core/pipeline_parallel/utils.py
@@ -80,6 +80,39 @@ def make_viewless(e):
     return e
 
 
+def set_ideal_affinity_for_current_gpu():
+    """Set CPU affinity for the current GPU to optimize host-device transfers."""
+    import uuid
+
+    try:
+        import cuda.bindings.driver as cuda_driver
+        import cuda.bindings.runtime as cuda_runtime
+    except ImportError:
+        try:
+            import cuda.cuda as cuda_driver
+            import cuda.cudart as cuda_runtime
+        except ImportError:
+            # print("cuda-python may not be installed, skipping GPU affinity setting")
+            warnings.warn("cuda-python may not be installed, skipping GPU affinity setting")
+            return
+    try:
+        import pynvml
+    except ImportError:
+        warnings.warn("pynvml is not installed, skipping GPU affinity setting")
+        return
+
+    # Get current CUDA device ID
+    err, device_id = cuda_runtime.cudaGetDevice()
+    assert err == cuda_runtime.cudaError_t.cudaSuccess
+    # Get device UUID
+    err, device_uuid = cuda_driver.cuDeviceGetUuid(device_id)
+    assert err == cuda_driver.CUresult.CUDA_SUCCESS
+    # Set CPU affinity based on GPU's NUMA node
+    pynvml.nvmlInit()
+    handle = pynvml.nvmlDeviceGetHandleByUUID("GPU-" + str(uuid.UUID(bytes=device_uuid.bytes)))
+    pynvml.nvmlDeviceSetCpuAffinity(handle)
+
+
 @contextmanager
 def stream_acquire_context(stream, event):
     """Stream acquire context"""

From a4bee49f1460f7831e88e04e95e2b86f95185709 Mon Sep 17 00:00:00 2001
From: Yu Yao <54727607+yaoyu-33@users.noreply.github.com>
Date: Thu, 4 Dec 2025 09:38:54 -1000
Subject: [PATCH 174/334] [Dev] feat: m4 leftover changes (#2226)

Signed-off-by: dimapihtar <dpihtar@gmail.com>
Signed-off-by: yaoyu-33 <yaoyu.094@gmail.com>
Co-authored-by: dimapihtar <dpihtar@gmail.com>
---
 .../distributed/distributed_data_parallel.py  | 64 ++++++-------------
 .../core/extensions/transformer_engine.py     | 37 ++++++-----
 megatron/core/hyper_comm_grid.py              |  1 -
 megatron/core/optimizer/__init__.py           | 31 ++++++---
 megatron/core/optimizer/clip_grads.py         |  3 +-
 megatron/core/optimizer/optimizer.py          |  5 +-
 megatron/core/pipeline_parallel/schedules.py  | 17 +++--
 megatron/core/tensor_parallel/layers.py       | 12 ++--
 megatron/core/transformer/module.py           | 18 +++++-
 megatron/core/transformer/moe/experts.py      | 13 ++--
 megatron/core/transformer/moe/moe_utils.py    | 28 +++++---
 .../core/transformer/moe/shared_experts.py    |  4 +-
 .../transformer/multi_latent_attention.py     | 27 ++++++--
 megatron/core/utils.py                        | 24 ++++++-
 megatron/training/training.py                 |  6 ++
 15 files changed, 181 insertions(+), 109 deletions(-)

diff --git a/megatron/core/distributed/distributed_data_parallel.py b/megatron/core/distributed/distributed_data_parallel.py
index df1d7ae94db..e831d7cf4ec 100644
--- a/megatron/core/distributed/distributed_data_parallel.py
+++ b/megatron/core/distributed/distributed_data_parallel.py
@@ -6,7 +6,6 @@
 
 import torch
 
-from .. import parallel_state
 from ..config_logger import has_config_logger_enabled, log_config_to_disk
 from ..fp8_utils import is_float8tensor, post_all_gather_processing
 from ..process_groups_config import ProcessGroupCollection
@@ -55,10 +54,15 @@ def __init__(
         # If using very large dp_sizes, make buckets larger to ensure that chunks used in NCCL
         # ring-reduce implementations are large enough to remain bandwidth-bound rather than
         # latency-bound.
+        # Setup process groups, handling both None and provided pg_collection values.
+        process_group_dict = ProcessGroupCollection.setup_process_groups_for_ddp(
+            pg_collection, config, ddp_config
+        )
+
+        # If bucket_size is not provided as an input, use sane default based on dp_group size.
+        dp_group = process_group_dict['dp_group']
         if ddp_config.bucket_size is None:
-            ddp_config.bucket_size = max(
-                40000000, 1000000 * parallel_state.get_data_parallel_world_size()
-            )
+            ddp_config.bucket_size = max(40000000, 1000000 * dp_group.size())
         # Set bucket_size to infinity if overlap_grad_reduce is False.
         if not ddp_config.overlap_grad_reduce:
             ddp_config.bucket_size = None
@@ -70,45 +74,19 @@ def __init__(
             f'Setting up DistributedDataParallel with config {self.ddp_config}',
         )
 
-        if pg_collection is None:
-            self.dp_group = parallel_state.get_data_parallel_group(
-                with_context_parallel=False, partial_data_parallel=False
-            )
-            self.dp_cp_group = parallel_state.get_data_parallel_group(
-                with_context_parallel=True, partial_data_parallel=False
-            )
-            self.intra_dp_cp_group = parallel_state.get_data_parallel_group(
-                with_context_parallel=True, partial_data_parallel=True
-            )
-            self.expt_dp_group = parallel_state.get_expert_data_parallel_group()
-            self.intra_expt_dp_group = parallel_state.get_expert_data_parallel_group(
-                partial_expert_data_parallel=True
-            )
-            if self.ddp_config.num_distributed_optimizer_instances > 1:
-                self.inter_dist_opt_group = (
-                    parallel_state.get_inter_distributed_optimizer_instance_group()
-                )
-            self.tp_group = parallel_state.get_tensor_model_parallel_group()
-            self.pp_group = parallel_state.get_pipeline_model_parallel_group()
-            self.ep_group = parallel_state.get_expert_model_parallel_group()
-        else:
-            # Setup process groups using DDP-specific helper method
-            process_groups = ProcessGroupCollection.setup_process_groups_for_ddp(
-                pg_collection, config, self.ddp_config
-            )
-
-            self.dp_group = process_groups['dp_group']
-            self.dp_cp_group = process_groups['dp_cp_group']
-            self.intra_dp_cp_group = process_groups['intra_dp_cp_group']
-            self.expt_dp_group = process_groups['expt_dp_group']
-            self.intra_expt_dp_group = process_groups['intra_expt_dp_group']
-            self.tp_group = process_groups['tp_group']
-            self.pp_group = process_groups['pp_group']
-            self.ep_group = process_groups['ep_group']
-
-            # Set inter_dist_opt_group if multiple optimizer instances
-            if self.ddp_config.num_distributed_optimizer_instances > 1:
-                self.inter_dist_opt_group = process_groups['inter_dist_opt_group']
+        # Assign all required process groups
+        self.dp_group = process_group_dict['dp_group']
+        self.dp_cp_group = process_group_dict['dp_cp_group']
+        self.intra_dp_cp_group = process_group_dict['intra_dp_cp_group']
+        self.expt_dp_group = process_group_dict['expt_dp_group']
+        self.intra_expt_dp_group = process_group_dict['intra_expt_dp_group']
+        self.tp_group = process_group_dict['tp_group']
+        self.pp_group = process_group_dict['pp_group']
+        self.ep_group = process_group_dict['ep_group']
+
+        # Set inter_dist_opt_group if multiple optimizer instances
+        if self.ddp_config.num_distributed_optimizer_instances > 1:
+            self.inter_dist_opt_group = process_group_dict['inter_dist_opt_group']
 
         # Turn off bucketing if we are on a pipeline stage that is not the first (since
         # data-parallel communication on these stages is not on the critical path), or if
diff --git a/megatron/core/extensions/transformer_engine.py b/megatron/core/extensions/transformer_engine.py
index 9da6e85d8e9..ab9962cfb1c 100644
--- a/megatron/core/extensions/transformer_engine.py
+++ b/megatron/core/extensions/transformer_engine.py
@@ -20,9 +20,6 @@
 from megatron.core.packed_seq_params import PackedSeqParams
 from megatron.core.parallel_state import (
     get_context_parallel_group,
-    get_expert_data_parallel_rank,
-    get_expert_model_parallel_rank,
-    get_expert_model_parallel_world_size,
     get_hierarchical_context_parallel_groups,
     get_tensor_model_parallel_group,
     get_tensor_model_parallel_world_size,
@@ -372,9 +369,10 @@ def __init__(
             extra_kwargs["rng_tracker_name"] = rng_tracker_name
 
         te_parallel_mode = parallel_mode
+        tp_group_for_te = tp_group
         if parallel_mode == "duplicated":
             # Handle non-parallel case
-            tp_group = None
+            tp_group_for_te = None
             tp_size = 1
             explicit_expert_comm = False
             te_parallel_mode = None
@@ -389,7 +387,7 @@ def __init__(
                     input_size = divide(input_size, tp_size)
                 te_parallel_mode = None
                 tp_size = 1
-                tp_group = None
+                tp_group_for_te = None
 
         super().__init__(
             in_features=input_size,
@@ -397,7 +395,7 @@ def __init__(
             sequence_parallel=self.config.sequence_parallel,
             fuse_wgrad_accumulation=self.config.gradient_accumulation_fusion,
             # Pass None if not initialized for backward compatibility with the ckpt converter.
-            tp_group=tp_group if torch.distributed.is_initialized() else None,
+            tp_group=tp_group_for_te if torch.distributed.is_initialized() else None,
             tp_size=tp_size,
             get_rng_state_tracker=(
                 get_cuda_rng_tracker if get_cuda_rng_tracker().is_initialized() else None
@@ -1166,7 +1164,7 @@ def __init__(
             skip_bias_add: bool,
             is_expert: bool = False,
             tp_comm_buffer_name: Optional[str] = None,
-            tp_group: Optional[torch.distributed.ProcessGroup] = None,
+            pg_collection: Optional[ProcessGroupCollection] = None,
         ):
             self.config = config
 
@@ -1197,9 +1195,14 @@ def __init__(
 
             # The comms between TP and EP group is explicitly handled by MoE token dispatcher.
             # So we disable comms by making TE agnostic of model parallel.
-            tp_group = get_tensor_model_parallel_group_if_none(tp_group, is_expert=is_expert)
+            if pg_collection is None:
+                pg_collection = ProcessGroupCollection.use_mpu_process_groups()
+            self._pg_collection = pg_collection
+            assert is_expert, "TEGroupedLinear only supports expert parallelism"
+            tp_group = pg_collection.expt_tp
             self._tp_group = tp_group
             tp_size = get_pg_size(tp_group)
+            tp_group_for_te = tp_group
 
             self.explicit_expert_comm = is_expert and (tp_size > 1 or self.expert_parallel)
 
@@ -1210,7 +1213,7 @@ def __init__(
                     input_size = divide(input_size, tp_size)
                 parallel_mode = None
                 tp_size = 1
-                tp_group = None
+                tp_group_for_te = None
 
             super().__init__(
                 num_gemms=num_gemms,
@@ -1218,7 +1221,7 @@ def __init__(
                 out_features=output_size,
                 sequence_parallel=self.config.sequence_parallel,
                 fuse_wgrad_accumulation=self.config.gradient_accumulation_fusion,
-                tp_group=tp_group if torch.distributed.is_initialized() else None,
+                tp_group=tp_group_for_te if torch.distributed.is_initialized() else None,
                 tp_size=tp_size,
                 get_rng_state_tracker=(
                     get_cuda_rng_tracker if get_cuda_rng_tracker().is_initialized() else None
@@ -1411,8 +1414,8 @@ def _sharded_state_dict_grouped(
             singleton_local_shards = (metadata or {}).get('singleton_local_shards', False)
             sharded_state_dict = {}
             full_state_dict = self.state_dict(prefix="", keep_vars=True)
-            num_global_experts = get_expert_model_parallel_world_size() * self.num_gemms
-            local_expert_indices_offset = get_expert_model_parallel_rank() * self.num_gemms
+            num_global_experts = get_pg_size(self._pg_collection.ep) * self.num_gemms
+            local_expert_indices_offset = get_pg_rank(self._pg_collection.ep) * self.num_gemms
             ep_axis = len(sharded_offsets)
             extra_states = self._split_extra_state(full_state_dict["_extra_state"])
             for gemm_idx in range(self.num_gemms):
@@ -1461,7 +1464,7 @@ def _sharded_state_dict_grouped(
                 if getattr(sh_ten, "is_data_parallel_fully_shard", False):
                     edp_replica_id = 0
                 else:
-                    edp_replica_id = get_expert_data_parallel_rank()
+                    edp_replica_id = get_pg_rank(self._pg_collection.expt_dp)
                 sh_ten.replica_id = (*replica_id[:2], edp_replica_id)
             return sharded_state_dict
 
@@ -1491,7 +1494,7 @@ def __init__(
             skip_bias_add: bool,
             is_expert: bool,
             tp_comm_buffer_name: Optional[str] = None,
-            tp_group: Optional[torch.distributed.ProcessGroup] = None,
+            pg_collection: Optional[ProcessGroupCollection] = None,
         ):
             super().__init__(
                 num_gemms=num_gemms,
@@ -1504,7 +1507,7 @@ def __init__(
                 skip_bias_add=skip_bias_add,
                 is_expert=is_expert,
                 tp_comm_buffer_name=tp_comm_buffer_name,
-                tp_group=tp_group,
+                pg_collection=pg_collection,
             )
 
         def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None):
@@ -1537,7 +1540,7 @@ def __init__(
             skip_bias_add: bool,
             is_expert: bool,
             tp_comm_buffer_name: Optional[str] = None,
-            tp_group: Optional[torch.distributed.ProcessGroup] = None,
+            pg_collection: Optional[ProcessGroupCollection] = None,
         ):
             super().__init__(
                 num_gemms=num_gemms,
@@ -1550,7 +1553,7 @@ def __init__(
                 skip_bias_add=skip_bias_add,
                 is_expert=is_expert,
                 tp_comm_buffer_name=tp_comm_buffer_name,
-                tp_group=tp_group,
+                pg_collection=pg_collection,
             )
 
         def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None):
diff --git a/megatron/core/hyper_comm_grid.py b/megatron/core/hyper_comm_grid.py
index dce2aa16a7f..379bca69f74 100644
--- a/megatron/core/hyper_comm_grid.py
+++ b/megatron/core/hyper_comm_grid.py
@@ -160,7 +160,6 @@ def create_pg(self, dims: Union[str, list[str]], **kwargs: Any) -> dist.ProcessG
 
         logging.info(f"Generated process group for {unique_group_key} with enumeration {rank_enum}")
         self._pgs[unique_group_key] = pg
-
         return pg
 
     def get_pg(self, dims: Union[str, list[str]]) -> dist.ProcessGroup:
diff --git a/megatron/core/optimizer/__init__.py b/megatron/core/optimizer/__init__.py
index c254b2f6882..1496cc7d17a 100644
--- a/megatron/core/optimizer/__init__.py
+++ b/megatron/core/optimizer/__init__.py
@@ -284,6 +284,7 @@ def _get_megatron_optimizer_based_on_param_groups(
     data_parallel_group_idx: Optional[int] = None,
     intra_dist_opt_group: Optional[torch.distributed.ProcessGroup] = None,
     distributed_optimizer_instance_id: Optional[int] = 0,
+    pg_collection: Optional[ProcessGroupCollection] = None,
 ) -> MegatronOptimizer:
     """Get Megatron optimizer based on parameter groups.
 
@@ -470,6 +471,13 @@ def init_state_fn(opt, config=None):
         optimizer = FP32Optimizer(optimizer, config, init_state_fn)
         setattr(optimizer, 'grad_stats_parallel_group', model_parallel_group)
 
+    if pg_collection is None or not hasattr(pg_collection, 'tp'):
+        tp_group = parallel_state.get_tensor_model_parallel_group()
+    else:
+        tp_group = pg_collection.tp
+    # TODO(M4): plumb tp_group through optimizer constructors so this setattr disappears.
+    setattr(optimizer, 'tp_group', tp_group)
+
     return optimizer
 
 
@@ -521,23 +529,23 @@ def get_megatron_optimizer(
         overlap_param_gather_with_optimizer_step_flags = [False]
 
     # Setup process groups using helper method
-    process_groups = ProcessGroupCollection.setup_process_groups_for_optimizer(
+    process_groups_dict = ProcessGroupCollection.setup_process_groups_for_optimizer(
         pg_collection, model_chunks, use_gloo_process_groups
     )
 
-    dp_cp_group = process_groups['dp_cp_group']
-    intra_dp_cp_group = process_groups['intra_dp_cp_group']
-    intra_expt_dp_group = process_groups['intra_expt_dp_group']
-    mp_group = process_groups['mp_group']
-    expt_tp_pp_group = process_groups['expt_tp_pp_group']
-    intra_dp_cp_group_gloo = process_groups['intra_dp_cp_group_gloo']
-    intra_expt_dp_group_gloo = process_groups['intra_expt_dp_group_gloo']
-    intra_dist_opt_group = process_groups['intra_dist_opt_group']
+    dp_cp_group = process_groups_dict['dp_cp_group']
+    intra_dp_cp_group = process_groups_dict['intra_dp_cp_group']
+    intra_expt_dp_group = process_groups_dict['intra_expt_dp_group']
+    mp_group = process_groups_dict['mp_group']
+    expt_tp_pp_group = process_groups_dict['expt_tp_pp_group']
+    intra_dp_cp_group_gloo = process_groups_dict['intra_dp_cp_group_gloo']
+    intra_expt_dp_group_gloo = process_groups_dict['intra_expt_dp_group_gloo']
+    intra_dist_opt_group = process_groups_dict['intra_dist_opt_group']
 
     model_parallel_rank = get_pg_rank(mp_group)
 
     if get_pg_size(dp_cp_group) > get_pg_size(intra_dp_cp_group):
-        inter_dist_opt_group = process_groups['inter_dist_opt_group']
+        inter_dist_opt_group = process_groups_dict['inter_dist_opt_group']
         distributed_optimizer_instance_id = get_pg_rank(inter_dist_opt_group)
     else:
         distributed_optimizer_instance_id = 0
@@ -573,6 +581,7 @@ def get_megatron_optimizer(
                     data_parallel_group_idx=model_parallel_rank,
                     intra_dist_opt_group=intra_dist_opt_group,
                     distributed_optimizer_instance_id=distributed_optimizer_instance_id,
+                    pg_collection=pg_collection,
                 )
             )
             model_chunk_offset += 1
@@ -623,6 +632,7 @@ def get_megatron_optimizer(
                 data_parallel_group_idx=model_parallel_rank,
                 intra_dist_opt_group=intra_dist_opt_group,
                 distributed_optimizer_instance_id=distributed_optimizer_instance_id,
+                pg_collection=pg_collection,
             )
         )
         model_chunk_offset += 1
@@ -663,6 +673,7 @@ def get_megatron_optimizer(
                 data_parallel_group_idx=expt_model_parallel_rank,
                 intra_dist_opt_group=intra_dist_opt_group,
                 distributed_optimizer_instance_id=distributed_optimizer_instance_id,
+                pg_collection=pg_collection,
             )
         )
 
diff --git a/megatron/core/optimizer/clip_grads.py b/megatron/core/optimizer/clip_grads.py
index 70117858b77..cb2f23a685f 100644
--- a/megatron/core/optimizer/clip_grads.py
+++ b/megatron/core/optimizer/clip_grads.py
@@ -181,6 +181,7 @@ def count_zeros_fp32(
     parameters: Union[List[torch.Tensor], torch.Tensor],
     grad_stats_parallel_group: torch.distributed.ProcessGroup,
     use_decoupled_grad: bool = False,
+    tp_group: Optional[torch.distributed.ProcessGroup] = None,
 ) -> float:
     """Counts the number of zeros in gradients associated with the passed-in list of
     parameters.
@@ -218,7 +219,7 @@ def count_zeros_fp32(
         grad_attr = "decoupled_grad" if use_decoupled_grad else "grad"
         grad_not_none = hasattr(param, grad_attr) and getattr(param, grad_attr) is not None
         is_not_shared = param_is_not_shared(param)
-        is_not_tp_duplicate = param_is_not_tensor_parallel_duplicate(param)
+        is_not_tp_duplicate = param_is_not_tensor_parallel_duplicate(param, tp_group=tp_group)
         if grad_not_none and is_not_shared and is_not_tp_duplicate:
             grad_obj = getattr(param, grad_attr)
             data_parallel_group = get_data_parallel_group_if_dtensor(grad_obj, data_parallel_group)
diff --git a/megatron/core/optimizer/optimizer.py b/megatron/core/optimizer/optimizer.py
index 1829cb424f1..8d6fb65136b 100644
--- a/megatron/core/optimizer/optimizer.py
+++ b/megatron/core/optimizer/optimizer.py
@@ -152,7 +152,9 @@ def get_main_grads_for_grad_norm(self) -> List[torch.Tensor]:
                 grad = param.grad
             grad_not_none = grad is not None
             is_not_shared = param_is_not_shared(param)
-            is_not_tp_duplicate = tensor_parallel.param_is_not_tensor_parallel_duplicate(param)
+            is_not_tp_duplicate = tensor_parallel.param_is_not_tensor_parallel_duplicate(
+                param, getattr(self, 'tp_group', None)
+            )
             if grad_not_none and is_not_shared and is_not_tp_duplicate:
                 grads_for_norm.append(grad)
 
@@ -224,6 +226,7 @@ def count_zeros(self) -> float:
             params,
             grad_stats_parallel_group=self.get_grad_stats_parallel_group(),
             use_decoupled_grad=self.config.use_precision_aware_optimizer_no_fp8_or_ds_fp8,
+            tp_group=getattr(self, 'tp_group', None),
         )
 
     @abstractmethod
diff --git a/megatron/core/pipeline_parallel/schedules.py b/megatron/core/pipeline_parallel/schedules.py
index 18344429c45..97d8aefad85 100644
--- a/megatron/core/pipeline_parallel/schedules.py
+++ b/megatron/core/pipeline_parallel/schedules.py
@@ -41,7 +41,7 @@
 Shape = Union[List[int], torch.Size]
 
 
-def get_forward_backward_func():
+def get_forward_backward_func(pp_size: Optional[int] = None, vp_size: Optional[int] = None):
     """Retrieves the appropriate forward_backward function given the
     configuration of parallel_state.
 
@@ -124,10 +124,18 @@ def forward_step(data_iterator, model):
         respective list of shapes. Thus it is not used in the other forward-backward functions
         which have different shape handling.
 
+    Args:
+        pp_size (Optional[int]): Pipeline model parallel size to use.
+        vp_size (Optional[int]): Virtual pipeline model parallel size to use.
+            If both pp_size and vp_size are None, both values fall back to parallel_state.
+            Otherwise, provided values are used as-is and None is treated as an explicit input.
     """
-    pipeline_model_parallel_size = parallel_state.get_pipeline_model_parallel_world_size()
-    if pipeline_model_parallel_size > 1:
-        if parallel_state.get_virtual_pipeline_model_parallel_world_size() is not None:
+    if pp_size is None and vp_size is None:
+        pp_size = parallel_state.get_pipeline_model_parallel_world_size()
+        vp_size = parallel_state.get_virtual_pipeline_model_parallel_world_size()
+
+    if pp_size > 1:
+        if vp_size is not None:
             forward_backward_func = forward_backward_pipelining_with_interleaving
         else:
             forward_backward_func = forward_backward_pipelining_without_interleaving
@@ -513,6 +521,7 @@ def forward_backward_no_pipelining(
     collect_non_loss_data: bool = False,
     first_val_step: Optional[bool] = None,
     adjust_tensor_shapes_fn: Optional[Callable] = None,  # unused
+    p2p_communicator: Optional[P2PCommunicator] = None,  # unused
     pg_collection: Optional[ProcessGroupCollection] = None,
 ):
     """Run forward and backward passes with no pipeline parallelism"""
diff --git a/megatron/core/tensor_parallel/layers.py b/megatron/core/tensor_parallel/layers.py
index 221f3327e50..d3ec11aaf5c 100644
--- a/megatron/core/tensor_parallel/layers.py
+++ b/megatron/core/tensor_parallel/layers.py
@@ -86,12 +86,16 @@
     dist_reduce_scatter_func = torch.distributed._reduce_scatter_base
 
 
-def param_is_not_tensor_parallel_duplicate(param):
+def param_is_not_tensor_parallel_duplicate(param, tp_group=None):
     """Returns true if the passed-in parameter is not a duplicate parameter
     on another TP rank."""
-    return (hasattr(param, "tensor_model_parallel") and param.tensor_model_parallel) or (
-        get_tensor_model_parallel_rank() == 0
-    )
+    if hasattr(param, "tensor_model_parallel") and param.tensor_model_parallel:
+        return True
+    # Prefer provided tp_group when available (new explicit path).
+    if tp_group is not None:
+        return tp_group.rank() == 0
+    # Fallback to legacy global state (back-compat).
+    return get_tensor_model_parallel_rank() == 0
 
 
 def set_tensor_model_parallel_attributes(tensor, is_parallel, dim, stride):
diff --git a/megatron/core/transformer/module.py b/megatron/core/transformer/module.py
index 1058a207b12..2330df91b52 100644
--- a/megatron/core/transformer/module.py
+++ b/megatron/core/transformer/module.py
@@ -393,7 +393,9 @@ def __init__(self, config: TransformerConfig, module: torch.nn.Module):
         self.config = config
         self.fp16 = config.fp16
         self.bf16 = config.bf16
+        self.vp_size = config.virtual_pipeline_model_parallel_size
         self.vp_stage = getattr(module, 'vp_stage', None)
+        self.pg_collection = getattr(module, 'pg_collection', None)
 
         if self.fp16:
             self.add_module('module', module.half())
@@ -438,11 +440,23 @@ def forward(self, *inputs, fp32_output=True, **kwargs):
             The wrapped module's outputs, potentially upcast to fp32 depending on pipeline stage
             and ``fp32_output``.
         """
-        if parallel_state.is_pipeline_first_stage(ignore_virtual=False, vp_stage=self.vp_stage):
+        from megatron.core.pipeline_parallel.utils import (
+            is_pp_first_stage,
+            is_pp_last_stage,
+            is_vp_first_stage,
+            is_vp_last_stage,
+        )
+
+        if self.pg_collection is None:
+            pp_group = parallel_state.get_pipeline_model_parallel_group()
+        else:
+            pp_group = self.pg_collection.pp
+        if is_vp_first_stage(self.vp_stage, self.vp_size) and is_pp_first_stage(pp_group):
             inputs = fp32_to_float16(inputs, self.float16_convertor)
         outputs = self.module(*inputs, **kwargs)
         if (
-            parallel_state.is_pipeline_last_stage(ignore_virtual=False, vp_stage=self.vp_stage)
+            is_vp_last_stage(self.vp_stage, self.vp_size)
+            and is_pp_last_stage(pp_group)
             and fp32_output is True
         ):
             outputs = float16_to_fp32(outputs)
diff --git a/megatron/core/transformer/moe/experts.py b/megatron/core/transformer/moe/experts.py
index 7391bcaf123..83cf5b51ffc 100644
--- a/megatron/core/transformer/moe/experts.py
+++ b/megatron/core/transformer/moe/experts.py
@@ -50,6 +50,7 @@
     make_sharded_object_for_checkpoint,
     sharded_state_dict_default,
 )
+from megatron.core.utils import internal_api
 
 try:
     import transformer_engine as te  # pylint: disable=unused-import
@@ -69,6 +70,8 @@ class GroupedMLP(MegatronModule):
     Executes multiple experts in parallel to maximize computational efficiency.
     """
 
+    # TODO(M4): breaking api, switched from pass in tp_group to pass in pg_collection.
+    @internal_api
     def __init__(
         self,
         num_local_experts: int,
@@ -732,6 +735,8 @@ class TEGroupedMLP(MegatronModule):
     Executes multiple experts in parallel to maximize computational efficiency.
     """
 
+    # TODO(M4): breaking api, switched from pass in tp_group to pass in pg_collection.
+    @internal_api
     def __init__(
         self,
         num_local_experts,
@@ -754,7 +759,6 @@ def __init__(
         if self.config.gated_linear_unit:
             ffn_hidden_size *= 2
 
-        # TODO(Hepteract): pass pg_collection to submodule after refactoring Linear modules
         self.linear_fc1 = build_module(
             submodules.linear_fc1,
             self.num_local_experts,
@@ -766,7 +770,7 @@ def __init__(
             skip_bias_add=False,
             is_expert=True,
             tp_comm_buffer_name='fc1',
-            tp_group=pg_collection.expt_tp,
+            pg_collection=pg_collection,
         )
 
         if self.config.use_te_activation_func and not (submodules.activation_func is None):
@@ -774,7 +778,6 @@ def __init__(
         else:
             self.activation_func = self.config.activation_func
 
-        # TODO(Hepteract): pass pg_collection to submodule after refactoring Linear modules
         self.linear_fc2 = build_module(
             submodules.linear_fc2,
             self.num_local_experts,
@@ -786,7 +789,7 @@ def __init__(
             skip_bias_add=True,
             is_expert=True,
             tp_comm_buffer_name='fc2',
-            tp_group=pg_collection.expt_tp,
+            pg_collection=pg_collection,
         )
 
         self.offload_expert_fc1 = (
@@ -1040,6 +1043,8 @@ class SequentialMLP(MegatronModule):
     This class executes each expert sequentially.
     """
 
+    # TODO(M4): breaking api, switched from pass in tp_group to pass in pg_collection.
+    @internal_api
     def __init__(
         self,
         num_local_experts,
diff --git a/megatron/core/transformer/moe/moe_utils.py b/megatron/core/transformer/moe/moe_utils.py
index 3ed31d375e2..8bab8d70065 100644
--- a/megatron/core/transformer/moe/moe_utils.py
+++ b/megatron/core/transformer/moe/moe_utils.py
@@ -755,18 +755,29 @@ def clear_aux_losses_tracker():
         tracker[name]["values"].zero_()
 
 
-def reduce_aux_losses_tracker_across_ranks(track_names: Optional[List[str]] = None):
+def reduce_aux_losses_tracker_across_ranks(
+    track_names: Optional[List[str]] = None, pg_collection: Optional[ProcessGroupCollection] = None
+):
     """Collect and reduce the auxiliary losses across ranks."""
     tracker = get_moe_layer_wise_logging_tracker()
     if track_names is None:
         track_names = tracker.keys()
+
+    if pg_collection is None:
+        # Use parallel_state groups
+        pp_group = parallel_state.get_pipeline_model_parallel_group()
+        dp_group = parallel_state.get_data_parallel_group(
+            with_context_parallel=False, partial_data_parallel=False
+        )
+    else:
+        pp_group = pg_collection.pp
+        dp_group = pg_collection.dp
+
     for name in track_names:
         values = tracker[name]["values"]
         # TODO(Hepteract): delete the usage of the global parallel_state.
         # Collect aux losses across PP.
-        torch.distributed.all_reduce(
-            values, group=parallel_state.get_pipeline_model_parallel_group()
-        )
+        torch.distributed.all_reduce(values, group=pp_group)
         # Reduce aux losses across ranks.
         if tracker[name].get('reduce_group') is not None:
             torch.distributed.all_reduce(values, group=tracker[name].get('reduce_group'))
@@ -778,11 +789,7 @@ def reduce_aux_losses_tracker_across_ranks(track_names: Optional[List[str]] = No
         # The `global_load_balancing_loss` already uses `tp_dp_cp_group` in `reduce_group`,
         # so we don't need to reduce it again. Others use `tp_cp_group` in `reduce_group`.
         if name != "global_load_balancing_loss":
-            torch.distributed.all_reduce(
-                values,
-                group=parallel_state.get_data_parallel_group(with_context_parallel=False),
-                op=torch.distributed.ReduceOp.AVG,
-            )
+            torch.distributed.all_reduce(values, group=dp_group, op=torch.distributed.ReduceOp.AVG)
 
 
 def track_moe_metrics(
@@ -797,6 +804,7 @@ def track_moe_metrics(
     num_layers: Optional[int] = None,
     moe_layer_freq: Optional[Union[int, List[int]]] = None,
     mtp_num_layers: Optional[int] = None,
+    pg_collection: Optional[ProcessGroupCollection] = None,
 ):
     """Track the MoE metrics for logging."""
     # Aux loss logging
@@ -810,7 +818,7 @@ def track_moe_metrics(
                     tracker[key]["values"] = torch.zeros(num_layers, device="cuda")
                     tracker[key]["reduce_group"] = None
                     tracker[key]["avg_group"] = None
-    reduce_aux_losses_tracker_across_ranks(track_names)
+    reduce_aux_losses_tracker_across_ranks(track_names, pg_collection=pg_collection)
 
     # Get number of MoE layers
     if moe_layer_freq is None:
diff --git a/megatron/core/transformer/moe/shared_experts.py b/megatron/core/transformer/moe/shared_experts.py
index bf2c2072af9..ab075d94e52 100644
--- a/megatron/core/transformer/moe/shared_experts.py
+++ b/megatron/core/transformer/moe/shared_experts.py
@@ -1,7 +1,7 @@
 # Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
 
 import warnings
-from copy import deepcopy
+from copy import copy
 from typing import Optional
 
 import torch
@@ -43,7 +43,7 @@ def __init__(
         gate: bool,
         pg_collection: Optional[ProcessGroupCollection] = None,
     ):
-        config = deepcopy(config)
+        config = copy(config)
         assert config.add_bias_linear == False, "bias is not supported in the shared experts, "
         "please set '--disable-bias-linear' instead."
 
diff --git a/megatron/core/transformer/multi_latent_attention.py b/megatron/core/transformer/multi_latent_attention.py
index 3953d933b45..b65294fcc10 100644
--- a/megatron/core/transformer/multi_latent_attention.py
+++ b/megatron/core/transformer/multi_latent_attention.py
@@ -15,7 +15,7 @@
     HAVE_EINOPS = False
 
 
-from megatron.core import parallel_state, tensor_parallel
+from megatron.core import tensor_parallel
 from megatron.core.models.common.embeddings import (
     RotaryEmbedding,
     YarnRotaryEmbedding,
@@ -41,7 +41,7 @@
 from megatron.core.transformer.enums import AttnMaskType
 from megatron.core.transformer.spec_utils import ModuleSpec, build_module
 from megatron.core.transformer.transformer_config import MLATransformerConfig
-from megatron.core.utils import deprecate_inference_params, is_te_min_version
+from megatron.core.utils import deprecate_inference_params, get_pg_size, is_te_min_version
 
 try:
     from megatron.core.fusions.fused_mla_yarn_rope_apply import (
@@ -178,6 +178,7 @@ def __init__(
             skip_bias_add=True,
             is_expert=False,
             tp_comm_buffer_name='proj',
+            tp_group=self.pg_collection.tp,
         )
 
         if (
@@ -401,6 +402,9 @@ def __init__(
         cp_comm_type: Optional[str] = None,
         pg_collection: ProcessGroupCollection = None,
     ):
+        if pg_collection is None:
+            pg_collection = ProcessGroupCollection.use_mpu_process_groups()
+
         super().__init__(
             config=config,
             submodules=submodules,
@@ -450,6 +454,11 @@ def __init__(
                 is_expert=False,
                 tp_comm_buffer_name='q_down_proj',
                 skip_weight_param_allocation=False,
+                tp_group=(
+                    pg_collection.tp
+                    if q_down_proj_kwargs.get('parallel_mode') != 'duplicated'
+                    else None
+                ),
                 **q_down_proj_kwargs,
             )
 
@@ -464,6 +473,7 @@ def __init__(
                 skip_bias_add=False,
                 is_expert=False,
                 tp_comm_buffer_name='q_up_proj',
+                tp_group=pg_collection.tp,
             )
 
         kv_down_proj_kwargs = {}
@@ -489,6 +499,11 @@ def __init__(
             is_expert=False,
             tp_comm_buffer_name='kv_down_proj',
             skip_weight_param_allocation=False,
+            tp_group=(
+                pg_collection.tp
+                if kv_down_proj_kwargs.get('parallel_mode') != 'duplicated'
+                else None
+            ),
             **kv_down_proj_kwargs,
         )
 
@@ -503,6 +518,7 @@ def __init__(
             skip_bias_add=False,
             is_expert=False,
             tp_comm_buffer_name='kv_up_proj',
+            tp_group=pg_collection.tp,
         )
 
         if self.config.q_lora_rank is not None:
@@ -624,12 +640,9 @@ def get_query_key_value_tensors(
             kv_compressed, k_pos_emb = torch.split(
                 kv_combined, [self.config.kv_lora_rank, self.config.qk_pos_emb_head_dim], dim=-1
             )
-            if (
-                parallel_state.get_tensor_model_parallel_world_size() > 1
-                and self.config.sequence_parallel
-            ):
+            if get_pg_size(self.tp_group) > 1 and self.config.sequence_parallel:
                 # k_pos_emb: [s, b, qk_pos_emb_head_dim]
-                k_pos_emb = gather_from_sequence_parallel_region(k_pos_emb)
+                k_pos_emb = gather_from_sequence_parallel_region(k_pos_emb, group=self.tp_group)
 
         if packed_seq_params is not None:
             # If sequence packing, TE expect [t, h, d] shaped qkv input.
diff --git a/megatron/core/utils.py b/megatron/core/utils.py
index 9b62b18d400..431b56bd002 100644
--- a/megatron/core/utils.py
+++ b/megatron/core/utils.py
@@ -494,6 +494,10 @@ def get_tensor_model_parallel_group_if_none(tp_group, is_expert=False, check_ini
     if not torch.distributed.is_initialized():
         return None
 
+    # if parallel_state is not initialized, pass `tp_group` thru
+    if not parallel_state.is_initialized():
+        return tp_group
+
     if tp_group is None:
         if torch.distributed.is_initialized() and torch.distributed.get_rank() == 0:
             warnings.warn(
@@ -1942,9 +1946,17 @@ def is_submodule(module, parent_module, strict=True):
 ########################
 
 
-def get_batch_on_this_cp_rank(batch: Dict[str, Any]):
+def get_batch_on_this_cp_rank(
+    batch: Dict[str, Any], cp_group: Optional[torch.distributed.ProcessGroup] = None
+):
     """Slice batch input along sequence dimension into multiple chunks,
     which are parallelized across GPUs in a context parallel group.
+
+    Args:
+        batch (Dict[str, Any]): Input batch tensors.
+        cp_group (Optional[torch.distributed.ProcessGroup]): Context-parallel process group.
+            If provided, uses this group's size and rank. Otherwise, falls back to
+            the current context-parallel settings from parallel_state.
     """
 
     # With causal masking, each token only attends to its prior tokens. Simply split
@@ -1953,9 +1965,15 @@ def get_batch_on_this_cp_rank(batch: Dict[str, Any]):
     # we split sequence into 2*CP ranks. Assuming CP=2, we then get 4 chunks, chunk_0
     # and chunk_3 are assigned to GPU0, chunk_1 and chunk_2 are assigned to GPU1, so
     # that we can get balanced workload among GPUs in a context parallel group.
-    cp_size = parallel_state.get_context_parallel_world_size()
-    if cp_size > 1:
+    # Determine CP topology either from provided group or from current context parallel state
+    if cp_group is not None:
+        cp_size = get_pg_size(cp_group)
+        cp_rank = get_pg_rank(cp_group)
+    else:
+        cp_size = parallel_state.get_context_parallel_world_size()
         cp_rank = parallel_state.get_context_parallel_rank()
+
+    if cp_size > 1:
         for key, val in batch.items():
             if val is not None:
                 seq_dim = 1 if key != "attention_mask" else 2
diff --git a/megatron/training/training.py b/megatron/training/training.py
index d47a8abd20e..99fbd453426 100644
--- a/megatron/training/training.py
+++ b/megatron/training/training.py
@@ -50,6 +50,7 @@
 from megatron.core import mpu, tensor_parallel
 from megatron.core.utils import (
     check_param_hashes_across_dp_replicas,
+    get_attr_wrapped_model,
     get_model_config,
     StragglerDetector,
 )
@@ -1504,6 +1505,7 @@ def training_log(
     params_norm,
     num_zeros_in_grad,
     max_attention_logit,
+    pg_collection=None,
 ):
     """Log training information such as losses, timing, ...."""
     args = get_args()
@@ -1693,6 +1695,7 @@ def training_log(
             num_layers=args.num_layers,
             moe_layer_freq=args.moe_layer_freq,
             mtp_num_layers=args.mtp_num_layers,
+            pg_collection=pg_collection,
         )
     if args.mtp_num_layers is not None:
         mtp_loss_scale = 1 / get_num_microbatches()
@@ -2188,6 +2191,8 @@ def train(
     for model_module in model:
         model_module.train()
 
+    model_pg_collection = get_attr_wrapped_model(model[0], "pg_collection")
+
     # Tracking loss.
     total_loss_dict = {}
 
@@ -2559,6 +2564,7 @@ def get_e2e_base_metrics():
             params_norm,
             num_zeros_in_grad,
             max_attention_logit,
+            pg_collection=model_pg_collection,
         )
 
         # Evaluation.

From ad5a222b2ea9727b15fed108ace31c8bbd7b5c80 Mon Sep 17 00:00:00 2001
From: Pablo Garay <pagaray@nvidia.com>
Date: Thu, 4 Dec 2025 15:44:30 -0800
Subject: [PATCH 175/334] feat: add decorator: experimental_api (#2546)

Signed-off-by: Pablo Garay <pagaray@nvidia.com>
---
 ...k_api_backwards_compatibility_workflow.yml | 12 ++++++-
 docs/api-backwards-compatibility-check.md     | 31 ++++++++++++++---
 megatron/core/utils.py                        | 33 +++++++++++++++++++
 scripts/check_api_backwards_compatibility.py  |  4 +--
 4 files changed, 72 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/check_api_backwards_compatibility_workflow.yml b/.github/workflows/check_api_backwards_compatibility_workflow.yml
index 707d5f76316..551978cb84a 100644
--- a/.github/workflows/check_api_backwards_compatibility_workflow.yml
+++ b/.github/workflows/check_api_backwards_compatibility_workflow.yml
@@ -197,7 +197,17 @@ jobs:
           echo "      def internal_helper_function():"
           echo "          pass"
           echo ""
-          echo "3️⃣  USE DEPRECATION (For gradual API changes)"
+          echo "3️⃣  MARK AS EXPERIMENTAL API (If this is experimental code)"
+          echo "   → Add @experimental_api decorator from megatron.core.utils"
+          echo ""
+          echo "   Example:"
+          echo "      from megatron.core.utils import experimental_api"
+          echo ""
+          echo "      @experimental_api"
+          echo "      class ExperimentalFeature:"
+          echo "          pass"
+          echo ""
+          echo "4️⃣  USE DEPRECATION (For gradual API changes)"
           echo "   → Add @deprecated decorator for transition period"
           echo "   → Example:"
           echo "      from megatron.core.utils import deprecated"
diff --git a/docs/api-backwards-compatibility-check.md b/docs/api-backwards-compatibility-check.md
index e2fabbf4cd2..0e78eaec669 100644
--- a/docs/api-backwards-compatibility-check.md
+++ b/docs/api-backwards-compatibility-check.md
@@ -26,7 +26,7 @@ The compatibility checker:
 ### ⏭️ What Gets Skipped
 
 - **Test functions** - Functions starting with `test_`
-- **Exempt decorators** - Functions marked with `@internal_api` or `@deprecated`
+- **Exempt decorators** - Functions marked with `@internal_api`, `@experimental_api`, or `@deprecated`
 - **Excluded paths** - Code in `tests/`, `experimental/`, `legacy/`
 
 ### ✅ Allowed Changes
@@ -57,6 +57,8 @@ python scripts/check_api_backwards_compatibility.py --baseline core_r0.8.0 --cur
 
 If you need to make breaking changes to internal or experimental APIs:
 
+#### Internal API (for internal implementation details)
+
 ```python
 from megatron.core.utils import internal_api
 
@@ -69,11 +71,29 @@ def experimental_feature(x, y):
     pass
 ```
 
-**When to use:**
+**When to use `@internal_api`:**
 - Internal APIs not documented for external use
 - Experimental features explicitly marked as unstable
 - Functions in development that haven't been released yet
 
+#### Experimental API (for experimental features)
+
+```python
+from megatron.core.utils import experimental_api
+
+@experimental_api
+def new_experimental_feature(x, y):
+    """
+    This API is experimental and may change without notice.
+    """
+    pass
+```
+
+**When to use `@experimental_api`:**
+- Experimental features explicitly marked as unstable
+- New APIs under active development
+- Features that haven't been stabilized yet
+
 ### Deprecating APIs
 
 For planned API changes, use the deprecation workflow:
@@ -196,7 +216,7 @@ Script loads code via griffe:
   • Current: PR branch
     ↓
 Apply filtering:
-  • Skip @internal_api and @deprecated
+  • Skip @internal_api, @experimental_api, and @deprecated
   • Skip private functions (_prefix)
   • Skip test/experimental paths
     ↓
@@ -223,6 +243,7 @@ Edit `scripts/check_api_backwards_compatibility.py`:
 # Add more exempt decorators
 EXEMPT_DECORATORS = [
     "internal_api",
+    "experimental_api",
     "deprecated",
 ]
 
@@ -255,11 +276,11 @@ The workflow auto-detects the latest `core_r*` tag. To manually specify:
 
 ### Q: Can I disable the check for my PR?
 
-**A:** No, but you can mark specific functions as exempt using `@internal_api`.
+**A:** No, but you can mark specific functions as exempt using `@internal_api` or `@experimental_api`.
 
 ### Q: What if I need to make a breaking change?
 
-**A:** Use the `@deprecated` decorator for a gradual transition, or mark the function as exempt if it's internal/experimental.
+**A:** Use the `@deprecated` decorator for a gradual transition, or mark the function as exempt using `@internal_api` (for internal code) or `@experimental_api` (for experimental features).
 
 ### Q: Does this check all of Megatron-LM?
 
diff --git a/megatron/core/utils.py b/megatron/core/utils.py
index 431b56bd002..91b15dabf74 100644
--- a/megatron/core/utils.py
+++ b/megatron/core/utils.py
@@ -2325,3 +2325,36 @@ class ExperimentalFeature:
     """
     func._internal_api = True
     return func
+
+
+def experimental_api(func: Callable) -> Callable:
+    """
+    Mark a function or class as experimental API.
+
+    Use this decorator for:
+    - Experimental features that may change without notice
+    - New APIs under active development
+    - Features that are not yet stable
+
+    Objects marked with this decorator will be exempt from backward
+    compatibility checks, allowing rapid iteration during development.
+
+    Args:
+        func: The function or class to mark as experimental
+
+    Returns:
+        The original function/class with an experimental API marker
+
+    Example:
+        @experimental_api
+        def new_experimental_feature():
+            '''This API is experimental and may change'''
+            pass
+
+        @experimental_api
+        class ExperimentalModel:
+            '''This model is under active development'''
+            pass
+    """
+    func._experimental_api = True
+    return func
diff --git a/scripts/check_api_backwards_compatibility.py b/scripts/check_api_backwards_compatibility.py
index 9c1f29ca890..bf5492c2962 100644
--- a/scripts/check_api_backwards_compatibility.py
+++ b/scripts/check_api_backwards_compatibility.py
@@ -4,7 +4,7 @@
 Megatron Core API Compatibility Checker
 
 Simple checker using Griffe to find breaking changes between two versions.
-Objects decorated with @internal_api or @deprecated are excluded from checks.
+Objects decorated with @internal_api, @experimental_api, or @deprecated are excluded from checks.
 
 Usage:
     python scripts/check_api_backwards_compatibility.py --baseline core_v0.14.0
@@ -44,7 +44,7 @@
 
 
 # Decorators that exempt objects from compatibility checks
-EXEMPT_DECORATORS = ['internal_api', 'deprecated']
+EXEMPT_DECORATORS = ['internal_api', 'deprecated', 'experimental_api']
 
 
 def has_exempt_decorator(obj: Object) -> bool:

From 7d17116bf409059e20df998732b29022a8dae406 Mon Sep 17 00:00:00 2001
From: Pablo Garay <pagaray@nvidia.com>
Date: Thu, 4 Dec 2025 15:45:04 -0800
Subject: [PATCH 176/334] feat: API compat: ignore
 AttributeChangedValueBreakage (not a signature change) - dev (#2547)

Signed-off-by: Pablo Garay <pagaray@nvidia.com>
---
 ...check_api_backwards_compatibility_workflow.yml |  2 ++
 scripts/check_api_backwards_compatibility.py      | 15 ++++++++++++++-
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/check_api_backwards_compatibility_workflow.yml b/.github/workflows/check_api_backwards_compatibility_workflow.yml
index 551978cb84a..002a18194a3 100644
--- a/.github/workflows/check_api_backwards_compatibility_workflow.yml
+++ b/.github/workflows/check_api_backwards_compatibility_workflow.yml
@@ -62,7 +62,9 @@ jobs:
           fi
 
           # Check for changes in megatron/core Python files (excluding tests and legacy)
+          # Note: Using both *.py and **/*.py to match files at root and in subdirectories
           CHANGED_FILES=$(git diff --name-only "$BASE_SHA" HEAD -- \
+            'megatron/core/*.py' \
             'megatron/core/**/*.py' \
             ':!megatron/core/tests/**' \
             ':!megatron/legacy/**' 2>/dev/null || echo "")
diff --git a/scripts/check_api_backwards_compatibility.py b/scripts/check_api_backwards_compatibility.py
index bf5492c2962..4977b806433 100644
--- a/scripts/check_api_backwards_compatibility.py
+++ b/scripts/check_api_backwards_compatibility.py
@@ -46,6 +46,13 @@
 # Decorators that exempt objects from compatibility checks
 EXEMPT_DECORATORS = ['internal_api', 'deprecated', 'experimental_api']
 
+# Breakage kinds to ignore (not actual API signature changes)
+# AttributeChangedValueBreakage: Changing constant values (e.g., VERSION = "1.0" -> "2.0")
+#   is not a breaking API change - the constant still exists with the same name
+IGNORED_BREAKAGE_KINDS = [
+    'AttributeChangedValueBreakage',
+]
+
 
 def has_exempt_decorator(obj: Object) -> bool:
     """Check if a Griffe object has any exempt decorator.
@@ -206,9 +213,10 @@ def get_object_path(change) -> str:
 
 
 def should_skip_change(change, filtered_paths: set) -> bool:
-    """Determine if a breaking change should be skipped based on exempt decorators.
+    """Determine if a breaking change should be skipped.
     
     A change is skipped if:
+    - The change kind is in IGNORED_BREAKAGE_KINDS (not a signature change)
     - The changed object itself is in filtered_paths (exact match)
     - The changed object is a child of an exempt object (prefix match)
     
@@ -219,6 +227,11 @@ def should_skip_change(change, filtered_paths: set) -> bool:
     Returns:
         bool: True if the change should be skipped (filtered out)
     """
+    # Check if this breakage kind should be ignored (not a signature change)
+    change_kind = type(change).__name__
+    if change_kind in IGNORED_BREAKAGE_KINDS:
+        return True
+    
     path = get_object_path(change)
     if not path:
         return False

From 274e04d21fbcb7f53f63de992ee1217f275f1cf2 Mon Sep 17 00:00:00 2001
From: Parth Mannan <38387286+parthmannan@users.noreply.github.com>
Date: Thu, 4 Dec 2025 15:49:09 -0800
Subject: [PATCH 177/334] [Dev] Hybrid Data x Context Parallelism Feature
 (#2054)

Signed-off-by: tailaim <tailaim@nvidia.com>
Signed-off-by: Parth Mannan <pmannan@nvidia.com>
Co-authored-by: Mcore Bot <mcore-bot@nvidia.com>
Co-authored-by: tailaim <tailaim@nvidia.com>
Co-authored-by: kunlunl <kunlunl@nvidia.com>
Co-authored-by: Kunlun Li <94586211+kunlunl@users.noreply.github.com>
---
 megatron/core/datasets/data_schedule.py       | 301 ++++++++
 megatron/core/datasets/gpt_dataset.py         |  18 +
 .../core/extensions/transformer_engine.py     |  20 +
 megatron/core/model_parallel_config.py        |  19 +
 .../common/embeddings/rotary_pos_embedding.py |  61 +-
 .../embeddings/yarn_rotary_pos_embedding.py   |  44 +-
 megatron/core/models/gpt/gpt_model.py         |  12 +-
 megatron/core/packed_seq_params.py            |   3 +
 megatron/core/parallel_state.py               |  54 ++
 .../pipeline_parallel/hybrid_cp_schedule.py   | 660 ++++++++++++++++++
 megatron/core/pipeline_parallel/schedules.py  |  19 +
 megatron/core/transformer/attention.py        |   4 +-
 .../experimental_attention_variant/dsa.py     |   8 +-
 .../transformer/multi_latent_attention.py     |  17 +-
 megatron/core/utils.py                        | 108 ++-
 megatron/legacy/data/data_samplers.py         |  71 +-
 megatron/training/arguments.py                |  14 +
 megatron/training/initialize.py               |   1 +
 megatron/training/training.py                 |  34 +-
 megatron/training/utils.py                    |  96 ++-
 pretrain_gpt.py                               |  34 +-
 pretrain_mamba.py                             |   7 +
 tests/unit_tests/test_parallel_state.py       |  31 +
 23 files changed, 1558 insertions(+), 78 deletions(-)
 create mode 100644 megatron/core/datasets/data_schedule.py
 create mode 100644 megatron/core/pipeline_parallel/hybrid_cp_schedule.py

diff --git a/megatron/core/datasets/data_schedule.py b/megatron/core/datasets/data_schedule.py
new file mode 100644
index 00000000000..0f016473b6a
--- /dev/null
+++ b/megatron/core/datasets/data_schedule.py
@@ -0,0 +1,301 @@
+# Copyright (c) 2025 NVIDIA CORPORATION.  All rights reserved.
+
+from typing import Any, List, Optional
+
+import torch
+
+from megatron.core import parallel_state
+from megatron.core.pipeline_parallel.hybrid_cp_schedule import BalancedCPScheduler
+from megatron.core.process_groups_config import ProcessGroupCollection
+
+
+class HybridCPDataLoaderWrapper:
+    """
+    A wrapper class that wraps around an existing data_iterator.
+    For every __next__ call,
+    1. Each DP rank pulls a batch of packed samples.
+    2. Extracts the sequence lengths of each sub-sample and all-gathers across the DP group.
+    3. Schedules the sub-samples to the DPxCP ranks using the BalancedCPScheduler.
+    4. Based on the schedule, reroutes the sub-samples to the correct rank using all-to-all.
+    5. Returns the assigned sub-samples to this rank.
+
+    Args:
+        data_iterator: The original data_iterator to wrap around
+        config: The config object containing the max_seqlen_per_dp_cp_rank
+        dp_cp_group: Data parallel context parallel group.
+    """
+
+    def __init__(
+        self, data_iterator, config, pg_collection: Optional[ProcessGroupCollection] = None
+    ):
+        self.data_iterator = data_iterator
+        self.config = config
+        if pg_collection is None:
+            self.dp_cp_group = parallel_state.get_data_parallel_group(with_context_parallel=True)
+            self.dp_group = parallel_state.get_data_parallel_group()
+            self.tp_group = parallel_state.get_tensor_model_parallel_group()
+        else:
+            self.dp_cp_group = pg_collection.dp_cp
+            self.dp_group = pg_collection.dp
+            self.tp_group = pg_collection.tp
+        assert (
+            self.dp_cp_group is not None and self.dp_group is not None and self.tp_group is not None
+        ), "dp_cp_group, dp_group, tp_group must not be None when using hybrid context parallel"
+
+        self.cp_balancing_scheduler = BalancedCPScheduler(
+            max_seq_len_per_rank=self.config.max_seqlen_per_dp_cp_rank, dp_cp_group=self.dp_cp_group
+        )
+
+        self.total_hdp_gpus = self.dp_cp_group.size()
+
+    def __iter__(self):
+        """Return self as an iterator."""
+        return self
+
+    def get_global_seqlens(self, subsample_seqlens: torch.Tensor) -> List[int]:
+        """
+        Gathers the sequence lengths of all subsamples from all DP ranks.
+        Each DP rank loads the same number of microbatches but each microbatch
+        may have a different number of subsamples.
+
+        We find the number of subsamples each rank holds and then gather the
+        sequence lengths of all subsamples from all ranks.
+        """
+        # Collect the number of subsamples from all ranks
+        local_len = torch.tensor([subsample_seqlens.shape[0]], dtype=torch.int32).cuda()
+        dp_subsample_count = [torch.zeros_like(local_len) for _ in range(self.dp_group.size())]
+        torch.distributed.all_gather(dp_subsample_count, local_len, group=self.dp_group)
+
+        # Find the max number of subsamples across all ranks and pad subsample_seqlens to max length
+        dp_subsample_counts = torch.stack(dp_subsample_count, dim=0).cpu().view(-1)
+        max_sub_samples = int(dp_subsample_counts.max().item())
+
+        if local_len.item() < max_sub_samples:
+            subsample_seqlens_padded = torch.cat(
+                [
+                    subsample_seqlens,
+                    torch.zeros(max_sub_samples - local_len.item(), dtype=torch.int32).cuda(),
+                ],
+                dim=0,
+            )
+        else:
+            subsample_seqlens_padded = subsample_seqlens
+
+        # Gather the subsample_seqlens from all ranks
+        seqlens_gathered = [
+            torch.empty_like(subsample_seqlens_padded) for _ in range(self.dp_group.size())
+        ]
+        torch.distributed.all_gather(
+            seqlens_gathered, subsample_seqlens_padded, group=self.dp_group
+        )
+
+        # Trim each seqlens_gathered to the length of the correct sample
+        for dp_rank, seqlen in enumerate(seqlens_gathered):
+            seqlens_gathered[dp_rank] = seqlen[: dp_subsample_counts[dp_rank]]
+
+        seqlens_gathered = torch.cat(seqlens_gathered, dim=0)
+        seqlens_gathered = seqlens_gathered.cpu().tolist()
+
+        # Calculate the offsets to assign unique global ID to each subsample.
+        csum = torch.cumsum(dp_subsample_counts, dim=0, dtype=torch.int32)
+        offsets = torch.cat([torch.zeros(1, dtype=torch.int32), csum[:-1]], dim=0)
+
+        return seqlens_gathered, offsets
+
+    def get_global_id_seqlens(self, num_local_subsamples, offsets, seqlens_gathered):
+        """
+        Calculates the global ID for each subsample.
+
+        We assign a unique global ID to each subsample.
+
+        Returns:
+        global_id_seqlens: list of (global_id, seqlen) tuples for scheduling.
+        global_ids_this_rank: list of global IDs locally present on this rank.
+        """
+        dp_rank = self.dp_group.rank()
+        global_ids = torch.arange(len(seqlens_gathered), dtype=torch.int32).cuda()
+        # Create a list of (global_id, seqlen) tuples for scheduling
+        global_id_seqlens = [(i, seqlens_gathered[i]) for i in range(len(global_ids))]
+        # Get the global IDs locally present on this rank
+        global_ids_this_rank = global_ids[
+            offsets[dp_rank] : offsets[dp_rank] + num_local_subsamples
+        ]
+
+        return global_id_seqlens, global_ids_this_rank
+
+    def _gid_to_src_rank(self, gid: int, offsets: List[int]) -> int:
+        dp_src_rank = torch.bucketize(gid, offsets[1:] - 1)
+        # Since the torch.distributed.get_process_group_ranks
+        # provides the global rank, we need to consider TP
+        hdp_rank = (
+            torch.distributed.get_process_group_ranks(self.dp_group)[dp_src_rank]
+            // self.tp_group.size()
+        )
+        return hdp_rank
+
+    def reroute_samples_to_hdp_ranks(
+        self, batch, global_ids_this_rank, global_id_seqlens, sample_id_groups, offsets
+    ):
+        """
+        Reroutes the sub-samples to the correct rank after scheduling.
+
+        For each key in the batch dict, we perform an all-to-all communication
+        to transfer the data to the correct ranks.
+        Since all CP ranks within a DP group have the same data, we only need
+        to transfer data between matching CP ranks.
+        """
+        gid2local_id = {int(gid): i for i, gid in enumerate(global_ids_this_rank)}
+        hdp_rank = self.dp_cp_group.rank()
+        dp_ranks = torch.distributed.get_process_group_ranks(self.dp_group)
+        # Here we actually want to get the DP group's rank within the HDP group,
+        # we need to consider TP
+        dp_ranks = [r // self.tp_group.size() for r in dp_ranks]
+
+        data_keys = batch[0].keys()
+
+        # Create the send plan
+        combined_sample_id_groups: List[List[int]] = [[] for _ in range(self.total_hdp_gpus)]
+
+        for d in range(self.total_hdp_gpus):
+            for sample_id_group in sample_id_groups:
+                combined_sample_id_groups[d].extend(sample_id_group[d])
+
+        for dest_rank in range(self.total_hdp_gpus):
+            combined_sample_id_groups[dest_rank].sort()
+
+        # Filter out samples that are not present on this rank
+        send_ids_sorted = [
+            gid
+            for d in dp_ranks
+            for gid in combined_sample_id_groups[d]
+            if gid in global_ids_this_rank
+        ]
+        # send_counts = [len(combined_sample_id_groups[d]) for d in range(self.total_hdp_gpus)]
+
+        send_lens_split = [0] * self.total_hdp_gpus
+        for dest_rank in range(self.total_hdp_gpus):
+            if dest_rank in dp_ranks:
+                send_lens_split[dest_rank] = sum(
+                    [
+                        global_id_seqlens[gid][1]
+                        for gid in combined_sample_id_groups[dest_rank]
+                        if gid in global_ids_this_rank
+                    ]
+                )
+            else:
+                # We only need to share local data with DP ranks that have different data.
+                send_lens_split[dest_rank] = 0
+
+        # Create the recv plan
+        recv_sample_id_groups = [[] for _ in range(self.total_hdp_gpus)]
+        for gid in combined_sample_id_groups[hdp_rank]:
+            src_rank = self._gid_to_src_rank(gid, offsets)
+            recv_sample_id_groups[src_rank].append(gid)
+
+        recv_lens_split = [0] * self.total_hdp_gpus
+        for src_rank in range(self.total_hdp_gpus):
+            recv_lens_split[src_rank] = sum(
+                [global_id_seqlens[gid][1] for gid in recv_sample_id_groups[src_rank]]
+            )
+
+        recv_ids_sorted = [
+            gid for d in range(self.total_hdp_gpus) for gid in recv_sample_id_groups[d]
+        ]
+        recv_counts = [len(recv_sample_id_groups[d]) for d in range(self.total_hdp_gpus)]
+
+        recv_samples = [{k: None for k in data_keys} for _ in range(sum(recv_counts))]
+
+        def _pack_sample_by_key(key: str) -> torch.Tensor:
+            flattened_tensors = []
+            for gid in send_ids_sorted:
+                t = batch[gid2local_id[gid]][key].to(torch.cuda.current_device(), non_blocking=True)
+                flattened_tensors.append(t)
+            return (
+                torch.cat(flattened_tensors, dim=0)
+                if flattened_tensors
+                else torch.empty(0, device=torch.cuda.current_device(), dtype=batch[0][key].dtype)
+            )
+
+        def _unpack_sample_by_key(key: str, recv_tensor: torch.Tensor):
+            cursor = 0
+            for i, gid in enumerate(recv_ids_sorted):
+                sample_len = global_id_seqlens[gid][1]
+                recv_samples[i][key] = recv_tensor[cursor : cursor + sample_len]
+                cursor += sample_len
+
+        for key in data_keys:
+            send_tensor = _pack_sample_by_key(key)
+            recv_tensor = torch.empty(
+                sum(recv_lens_split), device=torch.cuda.current_device(), dtype=send_tensor.dtype
+            )
+            torch.distributed.all_to_all_single(
+                output=recv_tensor,
+                input=send_tensor,
+                output_split_sizes=recv_lens_split,
+                input_split_sizes=send_lens_split,
+                group=self.dp_cp_group,
+            )
+            _unpack_sample_by_key(key, recv_tensor)
+
+        recv_sample_with_id = {
+            recv_id: recv_samples[i] for i, recv_id in enumerate(recv_ids_sorted)
+        }
+        return recv_sample_with_id
+
+    def unpack_batch(self, batch):
+        """
+        Unpacks the packed samples into a list of sub-samples.
+        Since each sub-sample may be routed to different DPxCP ranks,
+        we unpack the sample here to avoid unnecessarily transferring
+        the entire packed sample.
+        """
+        batch_unpacked = []
+        for sample in batch:
+            for sub_sample in range(sample["cu_seqlens"].shape[0] - 1):
+                sub_sample_dict = {}
+                start_idx = sample["cu_seqlens"][sub_sample]
+                end_idx = sample["cu_seqlens"][sub_sample + 1]
+                if end_idx - start_idx == 0:
+                    continue
+                for key in sample.keys():
+                    if key in ["cu_seqlens", "batch_idx", "max_seqlen"]:
+                        continue
+                    sub_sample_dict[key] = sample[key][start_idx:end_idx]
+                batch_unpacked.append(sub_sample_dict)
+        return batch_unpacked
+
+    def __next__(self) -> Any:
+        """
+        Get the next item from the dataset, pull scheduling metadata and return it.
+        """
+        if self.data_iterator is None:
+            # TP0 reads from data_iterator, others receive via broadcast.
+            return None, None
+        else:
+            batch = next(self.data_iterator)
+        subsample_seqlens = []
+        for sample in batch:
+            subsample_seqlens.extend(
+                [
+                    int(sample["cu_seqlens"][i + 1] - sample["cu_seqlens"][i])
+                    for i in range(0, sample["cu_seqlens"].shape[0] - 1)
+                ]
+            )
+        subsample_seqlens = torch.tensor(subsample_seqlens, dtype=torch.int32).cuda()
+        subsample_seqlens = subsample_seqlens[subsample_seqlens != 0]
+
+        seqlens_gathered, offsets = self.get_global_seqlens(subsample_seqlens)
+
+        global_id_seqlens, global_ids_this_rank = self.get_global_id_seqlens(
+            subsample_seqlens.shape[0], offsets, seqlens_gathered
+        )
+
+        groups, sample_id_groups = self.cp_balancing_scheduler.get_groups_and_subsamples(
+            global_id_seqlens, self.config
+        )
+
+        batch = self.unpack_batch(batch)
+        samples_this_rank_with_id = self.reroute_samples_to_hdp_ranks(
+            batch, global_ids_this_rank, global_id_seqlens, sample_id_groups, offsets
+        )
+        return samples_this_rank_with_id, sample_id_groups
diff --git a/megatron/core/datasets/gpt_dataset.py b/megatron/core/datasets/gpt_dataset.py
index 710a4c684ff..f50a6a77f57 100644
--- a/megatron/core/datasets/gpt_dataset.py
+++ b/megatron/core/datasets/gpt_dataset.py
@@ -49,6 +49,24 @@ class GPTDatasetConfig(BlendedMegatronDatasetConfig):
     object_storage_cache_path: Optional[str] = None
     """Path for caching indices for s3 or msc dataloading."""
 
+    context_parallel_size: int = 1
+    """Option to enable context parallelism"""
+
+    data_parallel_size: int = 1
+    """Option to enable data parallelism"""
+
+    sequence_parallel_size: int = 0
+    """Option to indicate the sequence parallelism size when using TP
+    Set to 0 if sequence parallel is not enabled regardless of TP size.
+    """
+
+    hybrid_context_parallel: bool = False
+    """Option to enable hybrid context parallelism. When setting this to True, 
+    each sample should be divisible by the data parallel size * context parallel size * 2.
+    If sequence parallel is enabled, it should be divisible by the 
+    data parallel size * context parallel size * sequence parallel size * 2.
+    """
+
     def __post_init__(self) -> None:
         """Do asserts and set fields post init"""
         super().__post_init__()
diff --git a/megatron/core/extensions/transformer_engine.py b/megatron/core/extensions/transformer_engine.py
index ab9962cfb1c..acb93ef7853 100644
--- a/megatron/core/extensions/transformer_engine.py
+++ b/megatron/core/extensions/transformer_engine.py
@@ -1005,6 +1005,7 @@ def __init__(
         self.kept_packed_seq_params = set(
             field.name for field in dataclasses.fields(PackedSeqParams)
         )
+
         if get_te_version() < PkgVersion("1.3.0"):
             # TE 1.3.0 introduces precomputing max_seqlen to remove unnecessary kernels and D2H
             # copies (#555)
@@ -1055,6 +1056,25 @@ def forward(
         packed_seq_params: PackedSeqParams = None,
     ):
         """Forward."""
+        if packed_seq_params is not None:
+            # If Dynamic CP group is provided, update TE DPA CP group
+            if packed_seq_params.cp_group is not None:
+                self.cp_group = packed_seq_params.cp_group
+                super().set_context_parallel_group(
+                    self.cp_group,
+                    torch.distributed.get_process_group_ranks(self.cp_group),
+                    TEDotProductAttention.cp_stream,
+                    self.cp_comm_type,
+                )
+            # If cp_group is None but local_cp_size is provided,
+            # Indicates to turn off CP dynamically
+            elif packed_seq_params.local_cp_size is not None:
+                assert (
+                    packed_seq_params.local_cp_size == 1
+                ), "local_cp_size must be == 1 if provided without cp_group"
+                super().set_context_parallel_group(None, None, None, self.cp_comm_type)
+            self.kept_packed_seq_params.discard("cp_group")
+            self.kept_packed_seq_params.discard("local_cp_size")
         packed_seq_kwargs = (
             {key: getattr(packed_seq_params, key) for key in self.kept_packed_seq_params}
             if packed_seq_params is not None
diff --git a/megatron/core/model_parallel_config.py b/megatron/core/model_parallel_config.py
index e31fcd2577e..e75ff4a0273 100644
--- a/megatron/core/model_parallel_config.py
+++ b/megatron/core/model_parallel_config.py
@@ -6,8 +6,11 @@
 
 import torch
 
+from megatron.core.utils import internal_api
+
 
 @dataclass
+@internal_api
 class ModelParallelConfig:
     """Base configuration for Megatron Core
 
@@ -53,6 +56,22 @@ class ModelParallelConfig:
        type.
     """
 
+    max_seqlen_per_dp_cp_rank: Optional[int] = None
+    """
+    Maximum sequence length per DPxCP rank. This is the maximum sequence length each rank
+    can handle without overflowing the memory. Typically, a good starting point is to set this
+    to maximum sequence length / context parallel size.
+    This is used to calculate the number and length of sub-samples assigned to 
+    each rank when using hybrid_context_parallel.
+    """
+
+    hybrid_context_parallel: bool = False
+    """
+    If true, enables hybrid context parallel. This is used to balance the workload of 
+    each CP rank when we use packed samples with variable sequence lengths.
+    Please set max_seqlen_per_dp_cp_rank when using hybrid_context_parallel.
+    """
+
     expert_model_parallel_size: int = 1
     """Distributes Moe Experts across sub data parallel dimension."""
 
diff --git a/megatron/core/models/common/embeddings/rotary_pos_embedding.py b/megatron/core/models/common/embeddings/rotary_pos_embedding.py
index 0d7d5e626d0..5d7b69cd34e 100644
--- a/megatron/core/models/common/embeddings/rotary_pos_embedding.py
+++ b/megatron/core/models/common/embeddings/rotary_pos_embedding.py
@@ -25,7 +25,7 @@
     apply_rotary_pos_emb,
     get_pos_emb_on_this_cp_rank,
 )
-from megatron.core.utils import deprecate_inference_params
+from megatron.core.utils import deprecate_inference_params, internal_api
 
 logger = logging.getLogger(__name__)
 
@@ -148,13 +148,12 @@ def get_cos_sin(self, max_seq_len: int, offset: int = 0) -> (Tensor, Tensor):
         return cos, sin
 
     @lru_cache(maxsize=32)
-    def forward(self, max_seq_len: int, offset: int = 0, packed_seq: bool = False) -> Tensor:
-        """Forward pass of RoPE embedding.
+    def get_emb(self, max_seq_len: int, offset: int = 0) -> Tensor:
+        """Forward pass of RoPE embedding before CP sharding.
 
         Args:
             max_seq_len (int): Maximum size of sequence
             offset (int, optional): RoPE offset. Defaults to 0.
-            packed_seq (bool, optional): Whether to use packed sequence. Defaults to False.
 
         Returns:
             Tensor: Embeddings after applying RoPE.
@@ -174,10 +173,35 @@ def forward(self, max_seq_len: int, offset: int = 0, packed_seq: bool = False) -
             )
         # emb [seq_length, .., dim]
         emb = emb[:, None, None, :]
-        if self.cp_group is not None and self.cp_group.size() > 1 and not packed_seq:
-            # slice rotary_pos_emb along sequence dimension and select the parition of the current
-            # CP rank
-            emb = get_pos_emb_on_this_cp_rank(emb, 0, self.cp_group)
+        return emb
+
+    @internal_api
+    def forward(
+        self, max_seq_len: int, offset: int = 0, packed_seq_params: Optional[PackedSeqParams] = None
+    ) -> Tensor:
+        """Forward pass of RoPE embedding.
+
+        Args:
+            max_seq_len (int): Maximum size of sequence
+            offset (int, optional): RoPE offset. Defaults to 0.
+            packed_seq_params (PackedSeqParams, optional): Packed sequence params. Defaults to None.
+
+        Returns:
+            Tensor: Embeddings after applying RoPE.
+        """
+        emb = self.get_emb(max_seq_len, offset)
+        packed_seq = packed_seq_params is not None and packed_seq_params.qkv_format == 'thd'
+        if packed_seq_params is not None and packed_seq_params.local_cp_size is not None:
+            # Set CP group to dynamic CP group for CP slicing
+            cp_group = packed_seq_params.cp_group
+        else:
+            cp_group = self.cp_group
+
+        if cp_group is not None and cp_group.size() > 1 and not packed_seq:
+            # slice rotary_pos_emb along sequence dimension
+            # and select the parition of the current CP rank
+            emb = get_pos_emb_on_this_cp_rank(emb, 0, cp_group)
+
         return emb
 
     def _load_from_state_dict(self, state_dict, prefix, *args, **kwargs):
@@ -279,13 +303,19 @@ def __init__(
             else parallel_state.get_context_parallel_group(check_initialized=False)
         )
 
-    def forward(self, position_ids: torch.Tensor, mrope_section: List[int]) -> Tensor:
+    def forward(
+        self,
+        position_ids: torch.Tensor,
+        mrope_section: List[int],
+        packed_seq_params: Optional[PackedSeqParams] = None,
+    ) -> Tensor:
         """Forward pass of multimodal RoPE embedding.
 
         Args:
             position_ids (torch.Tensor): A postion_id tensor with shape [3, batchsize, seqlens]
             mrope_section (list[int]): Multimodal rope section is for channel dimension of temporal,
                 height and width in rope calculation.
+            packed_seq_params (PackedSeqParams, optional): Packed sequence params. Defaults to None.
 
         Returns:
             Tensor: Embeddings after applying RoPE.
@@ -318,8 +348,17 @@ def forward(self, position_ids: torch.Tensor, mrope_section: List[int]) -> Tenso
 
         # shape (seq_length, bs, 1, 2 * dim)
         emb = emb[..., None, :].transpose(0, 1).contiguous()
-        if self.cp_group is not None and self.cp_group.size() > 1:
+        if packed_seq_params is not None and packed_seq_params.local_cp_size is not None:
+            if packed_seq_params.local_cp_size > 1:
+                # Set CP group to dynamic CP group for CP slicing
+                cp_group = packed_seq_params.cp_group
+            else:
+                # Set CP group to None to avoid CP slicing
+                cp_group = None
+        else:
+            cp_group = self.cp_group
+        if cp_group is not None and cp_group.size() > 1:
             # slice rotary_pos_emb along sequence dimension and select the parition of the current
             # CP rank
-            emb = get_pos_emb_on_this_cp_rank(emb, 0, self.cp_group)
+            emb = get_pos_emb_on_this_cp_rank(emb, 0, cp_group)
         return emb
diff --git a/megatron/core/models/common/embeddings/yarn_rotary_pos_embedding.py b/megatron/core/models/common/embeddings/yarn_rotary_pos_embedding.py
index bcbb74b0dff..c2ef638050c 100644
--- a/megatron/core/models/common/embeddings/yarn_rotary_pos_embedding.py
+++ b/megatron/core/models/common/embeddings/yarn_rotary_pos_embedding.py
@@ -13,6 +13,7 @@
 from megatron.core.models.common.embeddings.rope_utils import get_pos_emb_on_this_cp_rank
 from megatron.core.models.common.embeddings.rotary_pos_embedding import RotaryEmbedding
 from megatron.core.transformer import TransformerConfig
+from megatron.core.utils import internal_api
 
 logger = logging.getLogger(__name__)
 
@@ -99,13 +100,12 @@ def __init__(
             )
 
     @lru_cache(maxsize=32)
-    def forward(self, max_seq_len: int, offset: int = 0, packed_seq: bool = False) -> Tensor:
+    def get_emb(self, max_seq_len: int, offset: int = 0) -> Tensor:
         """Forward pass of Yarn Rotary Embedding.
 
         Args:
             max_seq_len (int): Maximum size of sequence
             offset (int, optional): RoPE offset. Defaults to 0.
-            packed_seq (bool, optional): Whether to use packed sequence. Defaults to False.
 
         Returns:
             Tensor: Embeddings after applying Yarn RoPE.
@@ -151,19 +151,44 @@ def forward(self, max_seq_len: int, offset: int = 0, packed_seq: bool = False) -
         emb = torch.cat((freqs, freqs), dim=-1)
         # emb [seq_length, .., dim]
         emb = emb[:, None, None, :]
-        if self.cp_group is not None and self.cp_group.size() > 1 and not packed_seq:
+        return emb, _mscale
+
+    @internal_api
+    def forward(
+        self, max_seq_len: int, offset: int = 0, packed_seq_params: Optional[PackedSeqParams] = None
+    ) -> Tensor:
+        """Forward pass of Yarn Rotary Embedding.
+
+        Args:
+            max_seq_len (int): Maximum size of sequence
+            offset (int, optional): RoPE offset. Defaults to 0.
+            packed_seq_params (PackedSeqParams, optional): Packed sequence params. Defaults to None.
+
+        Returns:
+            Tensor: Embeddings after applying Yarn RoPE.
+        """
+        emb, _mscale = self.get_emb(max_seq_len, offset)
+        packed_seq = packed_seq_params is not None and packed_seq_params.qkv_format == 'thd'
+        if packed_seq_params is not None and packed_seq_params.local_cp_size is not None:
+            # Set CP group to dynamic CP group for CP slicing
+            cp_group = packed_seq_params.cp_group
+        else:
+            cp_group = self.cp_group
+        if cp_group is not None and cp_group.size() > 1 and not packed_seq:
             # slice rotary_pos_emb along sequence dimension
             # and select the parition of the current CP rank
-            emb = get_pos_emb_on_this_cp_rank(emb, 0, self.cp_group)
+            emb = get_pos_emb_on_this_cp_rank(emb, 0, cp_group)
         return emb, _mscale
 
-    def _set_cos_sin_cache(self, seq_len, offset, dtype, packed_seq=False):
+    def _set_cos_sin_cache(self, seq_len, offset, dtype, packed_seq_params=None):
         self.max_seq_len_cached = seq_len
         self.offset_cached = offset
         self.dtype_cached = dtype
-        self.packed_seq_cached = packed_seq
+        self.packed_seq_cached = (
+            packed_seq_params is not None and packed_seq_params.qkv_format == 'thd'
+        )
 
-        emb, _mscale = self.forward(seq_len, offset, packed_seq)
+        emb, _mscale = self.forward(seq_len, offset, packed_seq_params)
         self.register_buffer(
             "cos_cached", (emb.cos() * _mscale).to(dtype).contiguous(), persistent=False
         )
@@ -172,16 +197,17 @@ def _set_cos_sin_cache(self, seq_len, offset, dtype, packed_seq=False):
         )
 
     def get_cached_cos_sin(
-        self, seq_len, offset=0, dtype=torch.get_default_dtype(), packed_seq=False
+        self, seq_len, offset=0, dtype=torch.get_default_dtype(), packed_seq_params=None
     ):
         """Get cached cos and sin values."""
+        packed_seq = packed_seq_params is not None and packed_seq_params.qkv_format == 'thd'
         if (
             seq_len > self.max_seq_len_cached
             or offset != self.offset_cached
             or dtype != self.dtype_cached
             or packed_seq != self.packed_seq_cached
         ):
-            self._set_cos_sin_cache(seq_len, offset, dtype, packed_seq)
+            self._set_cos_sin_cache(seq_len, offset, dtype, packed_seq_params)
         return (self.cos_cached[:seq_len, ...], self.sin_cached[:seq_len, ...])
 
 
diff --git a/megatron/core/models/gpt/gpt_model.py b/megatron/core/models/gpt/gpt_model.py
index a3d1a8bfc00..70eea932683 100644
--- a/megatron/core/models/gpt/gpt_model.py
+++ b/megatron/core/models/gpt/gpt_model.py
@@ -344,16 +344,16 @@ def _preprocess(
                     inference_context, self.decoder, decoder_input, self.config, packed_seq_params
                 )
                 rotary_pos_emb = self.rotary_pos_emb(
-                    rotary_seq_len,
-                    packed_seq=packed_seq_params is not None
-                    and packed_seq_params.qkv_format == 'thd',
+                    rotary_seq_len, packed_seq_params=packed_seq_params
                 )
         elif self.position_embedding_type == 'yarn':
             if self.training or not self.config.flash_decode:
                 rotary_seq_len = self.rotary_pos_emb.get_rotary_seq_len(
                     inference_context, self.decoder, decoder_input, self.config, packed_seq_params
                 )
-                rotary_pos_emb, _ = self.rotary_pos_emb(rotary_seq_len)
+                rotary_pos_emb, _ = self.rotary_pos_emb(
+                    rotary_seq_len, packed_seq_params=packed_seq_params
+                )
             else:
                 raise NotImplementedError(
                     "Flash decoding uses precomputed cos and sin for RoPE, not implemented in "
@@ -361,7 +361,9 @@ def _preprocess(
                 )
         elif self.position_embedding_type == 'mrope' and not self.config.multi_latent_attention:
             if self.training or not self.config.flash_decode:
-                rotary_pos_emb = self.rotary_pos_emb(position_ids, self.mrope_section)
+                rotary_pos_emb = self.rotary_pos_emb(
+                    position_ids, self.mrope_section, packed_seq_params=packed_seq_params
+                )
             else:
                 # Flash decoding uses precomputed cos and sin for RoPE
                 raise NotImplementedError(
diff --git a/megatron/core/packed_seq_params.py b/megatron/core/packed_seq_params.py
index 330d0e03471..08ebdac67d8 100644
--- a/megatron/core/packed_seq_params.py
+++ b/megatron/core/packed_seq_params.py
@@ -1,6 +1,7 @@
 # Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
 from dataclasses import dataclass
 
+import torch.distributed as dist
 from torch import Tensor
 
 
@@ -18,3 +19,5 @@ class PackedSeqParams:
     cu_seqlens_kv_padded: Tensor = None
     max_seqlen_q: int = None
     max_seqlen_kv: int = None
+    local_cp_size: int = None
+    cp_group: dist.ProcessGroup = None
diff --git a/megatron/core/parallel_state.py b/megatron/core/parallel_state.py
index 1e41bf9d8c2..fd0d0d9b9d9 100644
--- a/megatron/core/parallel_state.py
+++ b/megatron/core/parallel_state.py
@@ -6,6 +6,7 @@
 import os
 import warnings
 from datetime import timedelta
+from math import log2
 from typing import Callable, List, Optional
 
 import numpy as np
@@ -110,6 +111,8 @@
 _CONTEXT_PARALLEL_GLOBAL_RANKS = None
 # Hierarchical context parallel groups
 _HIERARCHICAL_CONTEXT_PARALLEL_GROUPS = None
+# Hybrid context parallel groups
+_HYBRID_DP_CP_GROUPS = {}
 
 # Data parallel group information with context parallel combined.
 _DATA_PARALLEL_GROUP_WITH_CP = None
@@ -410,6 +413,31 @@ def create_hierarchical_groups(
     return hierarchical_groups, hierarchical_groups_gloo
 
 
+def create_hybrid_dp_cp_groups(rank, ranks, pg_options):
+    """
+    Creates groups required for hybrid DPxCP.
+    Creates a new group for every power of 2 up to the number of DPxCP ranks.
+    Returns a dictionary indexed by group size.
+    """
+    hybrid_dp_cp_groups = {}
+    # Generate group for every power of 2 up to the number of CP ranks
+    # We limit the allowed group sizes in order to avoid excessive overhead.
+    group_sizes = [2**i for i in range(int(log2(len(ranks))))][1:]
+    for group_size in group_sizes:
+        for i in range(0, len(ranks), group_size):
+            group = create_group(
+                ranks[i : i + group_size],
+                pg_options=pg_options,
+                group_desc=f"HYBRID_DP_CP_GROUP_{group_size}",
+            )
+            if rank in ranks[i : i + group_size]:
+                assert (
+                    group_size not in hybrid_dp_cp_groups
+                ), f"Rank {rank} appears in multiple Hybrid DP CP groups of size {group_size}"
+                hybrid_dp_cp_groups[group_size] = group
+    return hybrid_dp_cp_groups
+
+
 class RankGenerator(object):
     """A class for generating rank groups for different modes of parallelism."""
 
@@ -530,6 +558,7 @@ def initialize_model_parallel(
     create_gloo_process_groups: bool = True,
     high_priority_stream_groups: Optional[List[str]] = None,
     sharp_enabled_group: Optional[str] = None,
+    hybrid_context_parallel: bool = False,
 ) -> None:
     """Initialize model data parallel groups.
 
@@ -881,6 +910,19 @@ def initialize_model_parallel(
         if "NCCL_COLLNET_ENABLE" in os.environ:
             del os.environ["NCCL_COLLNET_ENABLE"]
 
+    if hybrid_context_parallel:
+        global _HYBRID_DP_CP_GROUPS
+        for ranks_with_cp in decoder_rank_generator.get_ranks('dp-cp'):
+            assert (
+                len(ranks_with_cp) % 2 == 0
+            ), "Hybrid context parallel requires an even number of ranks"
+            _HYBRID_DP_CP_GROUPS.update(
+                create_hybrid_dp_cp_groups(
+                    rank, ranks_with_cp, get_nccl_options("dp_cp", nccl_comm_cfgs)
+                )
+            )
+        # TODO: Are gloo groups needed for hybrid cp?
+
     for ranks in decoder_rank_generator.get_ranks('dp'):
         group = create_group(
             ranks,
@@ -1395,6 +1437,18 @@ def get_hierarchical_context_parallel_groups(check_initialized=True):
     return _HIERARCHICAL_CONTEXT_PARALLEL_GROUPS
 
 
+def get_hybrid_data_context_parallel_groups(check_initialized=True, group_size=None):
+    """Get the hybrid context parallel groups the caller rank belongs to."""
+    # If the group size is the same as the entire DPxCP group, return the original group
+    if get_data_parallel_world_size(with_context_parallel=True) == group_size:
+        if check_initialized:
+            assert _DATA_PARALLEL_GROUP_WITH_CP is not None
+        return _DATA_PARALLEL_GROUP_WITH_CP
+    if check_initialized:
+        assert _HYBRID_DP_CP_GROUPS is not None
+    return _HYBRID_DP_CP_GROUPS[group_size]
+
+
 def get_embedding_group(check_initialized=True):
     """Get the embedding group the caller rank belongs to."""
     if check_initialized:
diff --git a/megatron/core/pipeline_parallel/hybrid_cp_schedule.py b/megatron/core/pipeline_parallel/hybrid_cp_schedule.py
new file mode 100644
index 00000000000..27b5fc87945
--- /dev/null
+++ b/megatron/core/pipeline_parallel/hybrid_cp_schedule.py
@@ -0,0 +1,660 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+
+from collections import deque
+from functools import lru_cache
+from math import ceil, log2
+from typing import Callable, List, Optional, Tuple
+
+import torch
+
+from megatron.core import parallel_state
+from megatron.core.rerun_state_machine import RerunDataIterator
+
+
+class BalancedCPScheduler:
+    """
+    This class provides the functionality to form groups of sub-samples
+    such that all DPxCP ranks have a roughly balanced workload in the group.
+    """
+
+    def __init__(self, max_seq_len_per_rank: int, dp_cp_group: torch.distributed.ProcessGroup):
+        self.max_seq_len_per_rank = max_seq_len_per_rank
+        self.num_subsamples = 0
+        self.num_subsamples_processed = 0
+        self.free_resources = []
+        self.total_hdp_gpus = dp_cp_group.size()
+
+    @lru_cache(maxsize=128)
+    def get_total_workload(self, seq_length: int, cp_size: Optional[int] = None):
+        """
+        seq_length: sequence length of a sub-sample
+        cp_size: total number of CP ranks working on this sub-sample
+
+        Note:
+        This function is used to estimate the relative workload intensity
+        of a sub-sample. This is not meant to be an accurate flops calculator.
+
+        Returns: workload of a sub-sample
+        """
+        if cp_size is None:
+            cp_size = self.gpus_needed(seq_length)
+        return (seq_length * seq_length) / cp_size
+
+    @lru_cache(maxsize=128)
+    def gpus_needed(self, seq_len: int) -> int:
+        """
+        Calculates the number of GPUs needed for a given sequence length
+        and max sequence length per CP rank.
+        This is used to determine the CP size of a sub-sample.
+
+        The number is rounded up to the next power of 2 to match the available
+        hybrid context parallel process group sizes.
+        """
+        return max(1, 2 ** ceil(log2((seq_len / self.max_seq_len_per_rank))))
+
+    def make_buckets_equal(
+        self,
+        sample_seqlens: List[Tuple[int, int]],  # List of (sample_id, sequence_length) tuples
+        compute_estimator: Callable[[int], float],
+    ) -> List[deque]:
+        """
+        Makes as many buckets as unique CP sizes needed.
+        This keeps sample IDs tethered to their sequence lengths throughout the bucketing process.
+        """
+        # Extract just the sequence lengths for determining k
+        seqlens = [seq_len for _, seq_len in sample_seqlens]
+
+        # Determine k based on unique GPU categories needed
+        k = len({self.gpus_needed(L) for L in seqlens})
+
+        # Create a work target for each bucket
+        # This is the total work divided by the number of buckets
+        work = []
+        for _, s in sample_seqlens:
+            cp_size = self.gpus_needed(s)
+            work.append(compute_estimator(s, cp_size))
+        total_work = sum(work)
+        target = total_work / k
+        buckets, cur, cur_work = [], [], 0.0
+        remaining_work = total_work
+        remaining_k = k
+
+        for i, (sample_id, seq_len) in enumerate(sample_seqlens):
+            work = compute_estimator(seq_len)
+            projected = cur_work + work
+
+            # Check if we should close this bucket
+            if cur and (
+                projected > target * 1.1  # Too much work
+                or len(sample_seqlens) - i <= remaining_k - len(buckets)
+            ):  # Need to save sequences for remaining buckets
+                buckets.append(deque(cur))
+                cur, cur_work = [], 0.0
+                remaining_work -= sum(compute_estimator(seq_len) for _, seq_len in cur)
+                remaining_k -= 1
+
+            cur.append((sample_id, seq_len))
+            cur_work += work
+
+        if cur:
+            buckets.append(deque(cur))
+
+        return buckets
+
+    def next_hdp_group(
+        self,
+        sample_seqlens: List[Tuple[int, int]],  # List of (sample_id, sequence_length) tuples
+        compute_estimator: Callable[[int], float],
+        total_gpus: int,
+        delta: float = 0.05,  # balance slack (e.g. 5 %)
+        strategy: str = "dp",  # "dp" or "pp"
+        eps_bucket: float = 0.10,  # ε target for bucket balance
+    ) -> Tuple[List[List[int]], List[Tuple[int, int]], List[float], List[List[int]]]:
+        """
+        Given a list of (sample_id, sequence_length) tuples, this function aims to assign
+        sequences in a group such that all GPUs in the DPxCP group have a roughly balanced
+        workload. Once each group is roughly balanced, we exit and return the
+        group and the leftover sequences.
+
+        The function performs the following passes in order to form a balanced microbatch:
+        1. We create buckets of sequences that are roughly balanced.
+        We try to create as many buckets as possible CP sizes.
+        2. Given a bucket has sequences available, we assign the sample
+            a. To a new set of GPUs if there are enough free GPUs.
+            b. To an existing set of GPUs with the lowest load.
+        3. We check if the group is balanced whenever we need to move onto a new CP size
+        in the same set of GPUs.
+        4. We trim the group if removing the last added sequence helps improve balance.
+        5. If we run out of sequences to assign and there are empty GPUs,
+        we redistribute work to empty GPUs by recursively increasing the CP size of a
+        sample until no empty GPUs are left.
+
+        Returns (micro_batches, leftover_sample_seqlens, exec_times, sample_ids_per_gpu).
+        """
+        if not sample_seqlens:
+            return (
+                [[] for _ in range(total_gpus)],
+                [],
+                [0.0 for _ in range(total_gpus)],
+                [[] for _ in range(total_gpus)],
+            )
+
+        # Get buckets of sequences with balanced work
+        buckets = self.make_buckets_equal(sample_seqlens, compute_estimator)
+
+        # Initialize tracking structures
+        micro_batches = [[] for _ in range(total_gpus)]
+        exec_times = [0.0 for _ in range(total_gpus)]
+        sample_ids_per_gpu = [[] for _ in range(total_gpus)]
+
+        gpu_group_id = [None] * total_gpus
+        group_members = {}
+        group_size = {}
+        next_gid = 0
+
+        pp_cursor = 0
+        prev_needed = None
+        check_balance = False
+
+        while buckets:
+            # ---- Step 1 – pick the next sequence we COULD place ------------------
+            sample_seq_tuple = bucket_idx = None
+            needed = None
+
+            scan_order = (
+                range(len(buckets))
+                if strategy == "dp"
+                else [(pp_cursor + i) % len(buckets) for i in range(len(buckets))]
+            )
+
+            for idx in scan_order:
+                if not buckets[idx]:
+                    continue
+                cand_tuple = buckets[idx][0]  # This is now (sample_id, seq_len)
+                cand_seq_len = cand_tuple[1]
+                needed = self.gpus_needed(cand_seq_len)
+
+                # (a) Do we have an *existing* group of size `needed`?
+                candidate_gids = [gid for gid, sz in group_size.items() if sz == needed]
+
+                # (b) Or enough completely free GPUs to start a new group?
+                free_ranks = [r for r, gid in enumerate(gpu_group_id) if gid is None]
+                if candidate_gids or len(free_ranks) >= needed:
+                    sample_seq_tuple, bucket_idx = cand_tuple, idx
+                    break
+
+            # No place to put any remaining sequence – finish this micro‑batch
+            if sample_seq_tuple is None:
+                break
+
+            # TODO[pmannan]: PP not yet supported. Add PP scheduling.
+            if strategy == "pp":
+                pp_cursor = (bucket_idx + 1) % len(buckets)
+
+            sample_id, seq_len = sample_seq_tuple
+            needed = self.gpus_needed(seq_len)
+            if prev_needed is None:
+                prev_needed = needed
+
+            # (a)  Existing groups of exactly this size
+            candidate_gids = [gid for gid, sz in group_size.items() if sz == needed]
+            if candidate_gids:
+                best_gid, best_load = min(
+                    (
+                        (gid, max(exec_times[r] for r in group_members[gid]))
+                        for gid in candidate_gids
+                    ),
+                    key=lambda t: t[1],
+                )
+            else:
+                best_gid, best_load = None, float("inf")
+
+            # (b)  Hypothetical **new** group from completely free GPUs
+            free_ranks = [r for r, gid in enumerate(gpu_group_id) if gid is None]
+            if len(free_ranks) >= needed:
+                free_sorted = sorted(free_ranks, key=lambda r: exec_times[r])
+                new_members = free_sorted[:needed]
+                new_load = exec_times[new_members[-1]]
+
+                if new_load < best_load:
+                    best_gid = None
+                    chosen_members = new_members
+                else:
+                    chosen_members = group_members[best_gid]
+            else:
+                chosen_members = group_members[best_gid]
+
+            # ---- Step 2 – if we decided to create a fresh group ----------------
+            if best_gid is None:
+                best_gid = next_gid
+                next_gid += 1
+                group_members[best_gid] = chosen_members
+                group_size[best_gid] = needed
+                for r in chosen_members:
+                    gpu_group_id[r] = best_gid
+
+            # ---- Step 3 – assign the sequence to every member of that group ------
+            per_gpu_cost = compute_estimator(seq_len)
+
+            for r in chosen_members:
+                micro_batches[r].append(seq_len)
+                exec_times[r] += per_gpu_cost
+                sample_ids_per_gpu[r].append(sample_id)
+
+            # Remove the sequence definitively from its bucket
+            buckets[bucket_idx].popleft()
+
+            # ---- Step 4 – tidy, balance‑check, maybe early‑exit ------------------
+            while buckets and not buckets[0]:
+                buckets.pop(0)
+                pp_cursor %= max(1, len(buckets))
+
+            # TODO: Removing this helps reduce the number of groups when we have
+            # lots of samples with same CP size.
+            # But because we don't exit as soon as we get balanced,
+            # even if there is one group available that can take the next sample,
+            # we will keep adding samples to the same group.
+            # trim_overload() does not help because it only checks if removing the
+            # last added sample helps.
+            # We cannot check after adding every sample because there will always be imbalance
+            # if we don't wait for future scheduling.
+
+            # IMPORTANT: So we need a solution here
+            if needed < prev_needed:
+                # When we get into a lower CP size in the same group,
+                # we can start checking for balance. There is still a gotcha here.
+                # Let's say we have a group of 3 GPU 0-2, then we move onto group of 2.
+                # We keep assigning group of 2 as we do in descending order but GPU 7/15
+                # never sees a microbatch assigned to it
+                # until we run out of samples with CP2.
+                # This means we are never balanced as min(exec_times) will always be 0.
+                # We need a smart way of identifying that we have run out of big samples
+                # and if we are having to assign work to a GPU already working,
+                # is it because there are empty GPUs?
+                # Would assigning work to empty GPUs first by moving onto next CP bucket help?
+                # But we need to remember to come back to this CP size bucket and then
+                # check for balance. Maybe the scheduling algorithm should look at empty
+                # GPUs and find work rather than going sequence by sequence.
+                check_balance = True
+
+            if (
+                check_balance
+                and buckets
+                and max(exec_times) - min(exec_times) <= delta * max(exec_times)
+            ):
+                break
+
+        # Gather leftovers (flatten remaining buckets, preserve order)
+        leftovers = []
+        for b in buckets:
+            for sample_seq_tuple in b:
+                leftovers.append(sample_seq_tuple)
+
+        # ---------------------------------------------------------------------------
+        def trim_overload():
+            """
+            Iteratively pop the most‑recent sequence from the *most‑loaded group*
+            whenever doing so reduces the global slack.
+            """
+            while True:
+                cur_max = max(exec_times)
+                cur_min = min(exec_times)
+                cur_slack = cur_max - cur_min
+                if cur_slack <= delta * cur_max:
+                    # Slack is already within limit.
+                    break
+                if cur_min == 0:
+                    # There are empty GPUs that will be
+                    # handled in the next step.
+                    break
+
+                max_r = exec_times.index(cur_max)
+                gid = gpu_group_id[max_r]
+                members = group_members[gid]
+
+                if not micro_batches[max_r] or len(micro_batches[max_r]) <= 1:
+                    break
+
+                seq = micro_batches[max_r][-1]
+                need = group_size[gid]
+                per_gpu_cost = compute_estimator(seq)
+
+                proj_times = exec_times[:]
+                for r in members:
+                    proj_times[r] -= per_gpu_cost
+
+                proj_slack = max(proj_times) - min(proj_times)
+
+                # Check if trimming the workload helps imbalance
+                if proj_slack < cur_slack:
+                    sample_id_to_remove = sample_ids_per_gpu[max_r][-1]
+                    for r in members:
+                        micro_batches[r].pop()
+                        exec_times[r] -= per_gpu_cost
+                        sample_ids_per_gpu[r].pop()
+                    leftovers.append((sample_id_to_remove, seq))
+                else:
+                    break
+
+        trim_overload()
+
+        # Track samples in this group before redistribution to empty GPUs
+        total_work_before = sum(len(mb) for mb in micro_batches)
+
+        # Check for empty GPUs and redistribute work
+        def fill_empty_gpus(
+            micro_batches, exec_times, sample_ids_per_gpu, group_members, group_size
+        ):
+            """
+            Recursively check for empty GPUs and redistribute work by increasing
+            the number of GPUs sharing samples. This ensures all GPUs have work.
+            GPUs must be allocated consecutively so we may need to push existing
+            work to other ranks in order to expand samples.
+            """
+            # Find empty GPUs
+            empty_gpus = [i for i in range(total_gpus) if not micro_batches[i]]
+            if not empty_gpus:
+                return (
+                    micro_batches,
+                    exec_times,
+                    sample_ids_per_gpu,
+                    group_members,
+                    group_size,
+                )  # No empty GPUs, we're done
+
+            # Find the smallest group size that exists
+            existing_group_sizes = set(group_size.values())
+            assert (
+                existing_group_sizes
+            ), "There should be at least one group existing, cannot reditribute, "
+            "try to increase 'max-seqlen-per-cp-rank'."
+
+            min_group_size = min(existing_group_sizes)
+            # We have Hybrid DPxCP groups for every power of 2 of GPUs or the entire DPxCP group.
+            next_power = min(min_group_size * 2, total_gpus)
+
+            # Find the first group of min_group_size that can be expanded
+            expandable_gid = None
+            expandable_members = None
+            expandable_new_gpus = None
+
+            for gid, size in group_size.items():
+                if size == min_group_size:
+                    members = group_members[gid]
+                    needed_count = next_power - min_group_size
+                    group_start_gpu = members[0]
+                    group_end_gpu = members[-1]
+                    empty_gpu = [idx for idx, work in enumerate(micro_batches) if not work][0]
+                    assert not all(
+                        work for work in micro_batches[empty_gpu : empty_gpu + needed_count]
+                    ), f"Empty GPUs were detected but not enough to expand."
+                    work_to_push = micro_batches[
+                        group_end_gpu + 1 : empty_gpu
+                    ]  # This is work of all other subsequent sub-samples
+                    exec_times_to_push = exec_times[group_end_gpu + 1 : empty_gpu]
+                    sample_ids_to_push = sample_ids_per_gpu[group_end_gpu + 1 : empty_gpu]
+
+                    new_micro_batches = [[]] * len(micro_batches)
+                    new_exec_times = [0.0] * len(exec_times)
+                    new_sample_ids_per_gpu = [[]] * len(sample_ids_per_gpu)
+
+                    # No change in work until the group selected for expansion
+                    for i in range(group_start_gpu):
+                        new_micro_batches[i] = micro_batches[i]
+                        new_exec_times[i] = exec_times[i]
+                        new_sample_ids_per_gpu[i] = sample_ids_per_gpu[i]
+
+                    # The work is distributed across the expanded group
+                    for i in range(group_start_gpu, group_end_gpu + needed_count + 1):
+                        new_micro_batches[i] = micro_batches[group_end_gpu]
+                        new_exec_times[i] = self.get_total_workload(
+                            micro_batches[group_end_gpu][0], next_power
+                        )
+                        new_sample_ids_per_gpu[i] = sample_ids_per_gpu[group_end_gpu]
+
+                    # Any assigned work on expanded GPUs is pushed
+                    for i, work in enumerate(work_to_push):
+                        new_micro_batches[group_end_gpu + needed_count + 1 + i] = work
+                        new_exec_times[group_end_gpu + needed_count + 1 + i] = exec_times_to_push[i]
+                        new_sample_ids_per_gpu[group_end_gpu + needed_count + 1 + i] = (
+                            sample_ids_to_push[i]
+                        )
+
+                    group_size[gid] = next_power
+                    group_members[gid] = list(range(members[0], members[-1] + needed_count + 1))
+                    for pushed_gid in group_size.keys():
+                        if pushed_gid > gid:
+                            group_members[pushed_gid] = [
+                                x + needed_count for x in group_members[pushed_gid]
+                            ]
+
+                    return (
+                        new_micro_batches,
+                        new_exec_times,
+                        new_sample_ids_per_gpu,
+                        group_members,
+                        group_size,
+                    )
+
+        empty_gpus = any([not micro_batches[i] for i in range(total_gpus)])
+        while empty_gpus:
+            micro_batches, exec_times, sample_ids_per_gpu, group_members, group_size = (
+                fill_empty_gpus(
+                    micro_batches, exec_times, sample_ids_per_gpu, group_members, group_size
+                )
+            )
+            empty_gpus = any([not micro_batches[i] for i in range(total_gpus)])
+
+        # Assert that no sample has been completely removed
+        total_work_after = sum(len(mb) for mb in micro_batches)
+        assert (
+            total_work_after >= total_work_before
+        ), f"Samples were removed: {total_work_before} -> {total_work_after}"
+
+        return micro_batches, leftovers, exec_times, sample_ids_per_gpu
+
+    def get_groups_and_subsamples(self, sample_id_seqlens, config):
+        """
+        This function recursively forms groups of sub-samples such that all DPxCP ranks
+        have a roughly balanced workload in the group.
+        """
+        groups = []
+        sample_id_groups = []
+        # We assign a sample_id to each sub-sample in order to track assignment to each GPU.
+        sample_id_seqlens = sorted(sample_id_seqlens, key=lambda x: x[1], reverse=True)
+        while sample_id_seqlens:
+            mb, sample_id_seqlens, exec_times, sample_ids = self.next_hdp_group(
+                sample_id_seqlens, self.get_total_workload, self.total_hdp_gpus
+            )
+            groups.append(mb)
+            if len(sample_ids) < self.total_hdp_gpus:
+                sample_ids.extend([] * (self.total_hdp_gpus - len(sample_ids)))
+            sample_id_groups.append(sample_ids)
+
+        return groups, sample_id_groups
+
+
+def hybrid_context_parallel_forward_backward(
+    forward_step_func,
+    data_iterator,
+    model,
+    num_microbatches,
+    input_tensor,
+    output_tensor_grad,
+    forward_data_store,
+    config,
+    collect_non_loss_data,
+    first_val_step,
+    forward_only,
+    no_sync_func,
+    total_num_tokens,
+    check_first_val_step,
+    model_type,
+):
+    """
+    Scheduler for Hybrid Context Parallel.
+
+    This function performs the packed sample scheduling and determines
+    1. The number of microbatches to schedule for each CP rank
+    2. The number of groups each CP rank should execute
+    3. The number of sub-samples per group each CP rank should execute
+
+    A group is defined by a set of samples that can run across the CP domain without any barrier.
+    There are many reasons why we may not be able to run endless samples within a single group.
+    For example, if we have 8 GPUs,
+    if GPU 0-5 are assigned a long sample that requires CP6,
+    GPU 6-7 are assigned a short sample that requires CP2,
+    The next sample which requires CP4 can be assigned GPU 4-7.
+    But GPU 6-7 will finish first and get deadlocked if GPU 4-5 are not participating in the group.
+    """
+    from .schedules import backward_step, forward_step
+
+    def _broadcast(item):
+        if item is not None:
+            torch.distributed.broadcast(
+                item,
+                parallel_state.get_tensor_model_parallel_src_rank(),
+                group=parallel_state.get_tensor_model_parallel_group(),
+            )
+
+    def _broadcast_num_samples_this_group(num_samples_this_group):
+        dev = torch.cuda.current_device()
+        torch.distributed.barrier()
+
+        n = 0 if num_samples_this_group is None else int(num_samples_this_group.numel())
+        n = torch.tensor([n], dtype=torch.int64, device=dev)
+
+        _broadcast(n)
+        n = int(n.item())
+
+        assert n > 0, "there should be at least 1 sub samples in the group"
+        num_samples_this_group_broadcast = (
+            torch.empty(n, dtype=torch.int32, device=dev)
+            if num_samples_this_group is None
+            else num_samples_this_group
+        )
+        _broadcast(num_samples_this_group_broadcast)
+        return num_samples_this_group_broadcast
+
+    def _get_new_data_iterator(sample_id_in_group, group_id):
+        if is_first_tp_rank:
+            sub_sample_id = sample_ids_this_group[sample_id_in_group]
+            sample = batch[sub_sample_id]
+            partner_cp_size = len(
+                [True for sample_ids in sample_id_groups[group_id] if sub_sample_id in sample_ids]
+            )
+            sample["local_cp_size"] = torch.tensor(partner_cp_size, dtype=torch.int32)
+            new_data_iterator = RerunDataIterator(iter([sample]))
+            return new_data_iterator
+        else:
+            return None
+
+    # We get data once per global batch and schedule the sub-samples.
+    # TODO(pmannan): Should we wrap the data_iterator here instead of the training.py file?
+    hdp_rank = parallel_state.get_data_parallel_rank(with_context_parallel=True)
+    is_first_tp_rank = parallel_state.get_tensor_model_parallel_rank() == 0
+
+    if is_first_tp_rank:
+        data = next(data_iterator)
+        sample_id_groups = data[1]
+        batch = data[0]
+    else:
+        data, sample_id_groups, batch = None, None, None
+
+    num_samples_this_group = None
+    if is_first_tp_rank:
+        num_samples_this_group = torch.tensor(
+            [len(group[hdp_rank]) for group in sample_id_groups], dtype=torch.int32, device='cuda'
+        )
+
+    num_samples_this_group = _broadcast_num_samples_this_group(num_samples_this_group)
+    num_samples_this_group = num_samples_this_group.cpu().numpy()
+    num_total_groups = num_samples_this_group.shape[0]
+
+    current_microbatch = 0
+
+    # Upto last group, we don't need any sync.
+    with no_sync_func():
+        for j in range(num_total_groups - 1):
+            sample_ids_this_group = sample_id_groups[j][hdp_rank] if is_first_tp_rank else None
+            for i in range(num_samples_this_group[j]):
+                # Call forward step for each sub-sample
+                new_data_iterator = _get_new_data_iterator(i, j)
+                # TODO: Find the usage of current_microbatch and is_first_microbatch and
+                # how that may affect my usage.
+                output_tensor, num_tokens = forward_step(
+                    forward_step_func,
+                    new_data_iterator,
+                    model,
+                    num_microbatches,
+                    input_tensor,
+                    forward_data_store,
+                    config,
+                    collect_non_loss_data,
+                    is_first_microbatch=check_first_val_step(
+                        first_val_step, forward_only, current_microbatch == 0
+                    ),
+                    current_microbatch=current_microbatch,
+                )
+                current_microbatch += 1
+                total_num_tokens += num_tokens.item()
+                if not forward_only:
+                    backward_step(
+                        input_tensor, output_tensor, output_tensor_grad, model_type, config
+                    )
+
+            # Create a barrier at end of each group.
+            # This barrier ensures that all ranks are prepared to change assigned CP group sizes and
+            # no rank is starting a sub-sample ahead of it's partner ranks.
+            torch.distributed.barrier(
+                parallel_state.get_data_parallel_group(with_context_parallel=True)
+            )
+
+    # For the last group, we need to run the last sub-sample out of the context handler.
+    with no_sync_func():
+        sample_ids_this_group = sample_id_groups[-1][hdp_rank] if is_first_tp_rank else None
+        for i in range(num_samples_this_group[-1] - 1):
+            new_data_iterator = _get_new_data_iterator(i, -1)
+            # Call forward step for each sub-sample
+            output_tensor, num_tokens = forward_step(
+                forward_step_func,
+                new_data_iterator,
+                model,
+                num_microbatches,
+                input_tensor,
+                forward_data_store,
+                config,
+                collect_non_loss_data,
+                is_first_microbatch=check_first_val_step(
+                    first_val_step, forward_only, current_microbatch == 0
+                ),
+                current_microbatch=current_microbatch,
+            )
+            current_microbatch += 1
+            total_num_tokens += num_tokens.item()
+            if not forward_only:
+                backward_step(input_tensor, output_tensor, output_tensor_grad, model_type, config)
+
+    # The last sub-sample of the last group of the last microbatch is
+    # run out of the context handler.
+    new_data_iterator = _get_new_data_iterator(-1, -1)
+    # Call forward step for each sub-sample
+    output_tensor, num_tokens = forward_step(
+        forward_step_func,
+        new_data_iterator,
+        model,
+        num_microbatches,
+        input_tensor,
+        forward_data_store,
+        config,
+        collect_non_loss_data,
+        is_first_microbatch=check_first_val_step(
+            first_val_step, forward_only, current_microbatch == 0
+        ),
+        current_microbatch=current_microbatch,
+    )
+    total_num_tokens += num_tokens.item()
+    if not forward_only:
+        backward_step(input_tensor, output_tensor, output_tensor_grad, model_type, config)
+
+    return forward_data_store, total_num_tokens
diff --git a/megatron/core/pipeline_parallel/schedules.py b/megatron/core/pipeline_parallel/schedules.py
index 97d8aefad85..a8fdf2324f2 100644
--- a/megatron/core/pipeline_parallel/schedules.py
+++ b/megatron/core/pipeline_parallel/schedules.py
@@ -36,6 +36,7 @@
     combined_1f1b_schedule_for_interleaved_pipelining,
     combined_1f1b_schedule_for_no_pipelining,
 )
+from .hybrid_cp_schedule import hybrid_context_parallel_forward_backward
 
 # Types
 Shape = Union[List[int], torch.Size]
@@ -607,6 +608,24 @@ def forward_backward_no_pipelining(
             total_num_tokens,
             partial(check_first_val_step, first_val_step, forward_only),
         )
+    elif config.hybrid_context_parallel:
+        forward_data_store, total_num_tokens = hybrid_context_parallel_forward_backward(
+            forward_step_func,
+            data_iterator,
+            model,
+            num_microbatches,
+            input_tensor,
+            output_tensor_grad,
+            forward_data_store,
+            config,
+            collect_non_loss_data,
+            first_val_step,
+            forward_only,
+            no_sync_func,
+            total_num_tokens,
+            check_first_val_step,
+            model_type,
+        )
     else:
         with no_sync_func():
             for i in range(num_microbatches - 1):
diff --git a/megatron/core/transformer/attention.py b/megatron/core/transformer/attention.py
index 5cf22d25a4b..3c1c05f8c86 100644
--- a/megatron/core/transformer/attention.py
+++ b/megatron/core/transformer/attention.py
@@ -849,7 +849,7 @@ def forward(
                 )
             )
 
-        if packed_seq_params is not None:
+        if packed_seq_params is not None and packed_seq_params.qkv_format == 'thd':
             query = query.squeeze(1)
             key = key.squeeze(1)
             value = value.squeeze(1)
@@ -864,7 +864,7 @@ def forward(
         ):
             q_pos_emb, k_pos_emb = rotary_pos_emb
 
-            if packed_seq_params is not None:
+            if packed_seq_params is not None and packed_seq_params.qkv_format == 'thd':
                 if packed_seq_params.cu_seqlens_q_padded is not None:
                     cu_seqlens_q = packed_seq_params.cu_seqlens_q_padded
                 else:
diff --git a/megatron/core/transformer/experimental_attention_variant/dsa.py b/megatron/core/transformer/experimental_attention_variant/dsa.py
index fc994490b1b..353b31e9bcd 100644
--- a/megatron/core/transformer/experimental_attention_variant/dsa.py
+++ b/megatron/core/transformer/experimental_attention_variant/dsa.py
@@ -546,10 +546,14 @@ def forward_with_scores(
             None, None, x, self.config, packed_seq_params
         )
         if self.config.rope_type == "rope":
-            rotary_pos_emb = self.rotary_pos_emb(rotary_seq_len, packed_seq=False)
+            rotary_pos_emb = self.rotary_pos_emb(
+                rotary_seq_len, packed_seq_params=packed_seq_params
+            )
             mscale = 1.0
         else:
-            rotary_pos_emb, mscale = self.rotary_pos_emb(rotary_seq_len, packed_seq=False)
+            rotary_pos_emb, mscale = self.rotary_pos_emb(
+                rotary_seq_len, packed_seq_params=packed_seq_params
+            )
 
         # =========================================
         # Gather inputs if sp is enabled
diff --git a/megatron/core/transformer/multi_latent_attention.py b/megatron/core/transformer/multi_latent_attention.py
index b65294fcc10..ed90fdffa97 100644
--- a/megatron/core/transformer/multi_latent_attention.py
+++ b/megatron/core/transformer/multi_latent_attention.py
@@ -555,6 +555,11 @@ def get_query_key_value_tensors(
         assert (
             hidden_states.ndim == 3
         ), f"hidden_states should be 3D, [s, b, n*h], got {hidden_states.ndim}D"
+        if packed_seq_params is not None:
+            assert (
+                packed_seq_params.local_cp_size is None
+            ), "hybrid_context_parallel is not supported with MLA yet and is planned for future. \
+            Please disable hybrid_context_parallel."
 
         inference_context = deprecate_inference_params(inference_context, inference_params)
 
@@ -571,11 +576,13 @@ def get_query_key_value_tensors(
         rotary_pos_sin = None
         packed_seq = packed_seq_params is not None and packed_seq_params.qkv_format == 'thd'
         if self.config.rope_type == "rope":
-            rotary_pos_emb = self.rotary_pos_emb(rotary_seq_len, packed_seq=packed_seq)
+            rotary_pos_emb = self.rotary_pos_emb(
+                rotary_seq_len, packed_seq_params=packed_seq_params
+            )
         else:
             if self.config.apply_rope_fusion:
                 rotary_pos_cos, rotary_pos_sin = self.rotary_pos_emb.get_cached_cos_sin(
-                    rotary_seq_len, dtype=hidden_states.dtype, packed_seq=packed_seq
+                    rotary_seq_len, dtype=hidden_states.dtype, packed_seq_params=packed_seq_params
                 )
                 rotary_pos_emb = None
                 assert inference_context is None, "Inference with MLA RoPE fusion is not supported"
@@ -584,9 +591,11 @@ def get_query_key_value_tensors(
                     and fused_apply_mla_rope_for_kv is not None
                 ), "Fused MLA RoPE apply is not imported successfully"
             else:
-                rotary_pos_emb, mscale = self.rotary_pos_emb(rotary_seq_len, packed_seq=packed_seq)
+                rotary_pos_emb, mscale = self.rotary_pos_emb(
+                    rotary_seq_len, packed_seq_params=packed_seq_params
+                )
 
-        if packed_seq_params is not None:
+        if packed_seq_params is not None and packed_seq_params.qkv_format == 'thd':
             if packed_seq_params.cu_seqlens_q_padded is not None:
                 cu_seqlens_q = packed_seq_params.cu_seqlens_q_padded
             else:
diff --git a/megatron/core/utils.py b/megatron/core/utils.py
index 91b15dabf74..3a153468ae6 100644
--- a/megatron/core/utils.py
+++ b/megatron/core/utils.py
@@ -59,6 +59,15 @@
 
 logger = logging.getLogger(__name__)
 
+try:
+    # Register the TE CUDA kernels
+    import transformer_engine  # pylint: disable=unused-import
+
+    # Alias the PyTorch wrapper so we can call tex.* APIs
+    import transformer_engine_torch as tex
+except ImportError:
+    # TE isn’t installed or the torch wrapper is missing
+    tex = None
 
 try:
     _torch_version = PkgVersion(torch.__version__)
@@ -1976,7 +1985,7 @@ def get_batch_on_this_cp_rank(
     if cp_size > 1:
         for key, val in batch.items():
             if val is not None:
-                seq_dim = 1 if key != "attention_mask" else 2
+                seq_dim = 1 if key != 'attention_mask' else 2
                 val = val.view(
                     *val.shape[0:seq_dim],
                     2 * cp_size,
@@ -1993,6 +2002,103 @@ def get_batch_on_this_cp_rank(
     return batch
 
 
+def get_thd_batch_on_this_cp_rank(
+    batch: Dict[str, Any],
+    cu_seqlens: torch.Tensor,
+    cu_seqlens_padded: torch.Tensor,
+    max_seqlen: torch.Tensor,
+    cp_group: Optional[torch.distributed.ProcessGroup] = None,
+):
+    """Slice each sub-sample in a packed sample batch input along
+    sequence dimension into multiple chunks, which are parallelized
+    across GPUs in a context parallel group.
+    """
+    packed_seq_params = PackedSeqParams(
+        qkv_format="thd",
+        cu_seqlens_q=cu_seqlens,
+        cu_seqlens_kv=cu_seqlens,
+        cu_seqlens_q_padded=cu_seqlens_padded,
+        cu_seqlens_kv_padded=cu_seqlens_padded,
+        max_seqlen_q=int(max_seqlen[0].item()),
+        max_seqlen_kv=int(max_seqlen[0].item()),
+    )
+
+    if cp_group is not None:
+        cp_size = get_pg_size(cp_group)
+        cp_rank = get_pg_rank(cp_group)
+    else:
+        cp_size = parallel_state.get_context_parallel_world_size()
+        cp_rank = parallel_state.get_context_parallel_rank()
+    if cp_size > 1:  # slice batch along sequence dimension for context parallelism
+        assert tex is not None and is_te_min_version("1.10.0"), (
+            "Please update Transformer Engine to >= 1.10 to use "
+            "Context Parallel with THD format data"
+        )
+        index = tex.thd_get_partitioned_indices(
+            cu_seqlens_padded, batch['tokens'].size(1), cp_size, cp_rank
+        )
+        for key, data in batch.items():
+            if key in {'attention_mask', 'cu_seqlens', 'cu_seqlens_padded', 'max_seqlen'}:
+                continue
+            batch[key] = data.index_select(1, index)
+
+    return batch, packed_seq_params
+
+
+################################
+### hybrid context parallel ###
+################################
+
+
+def get_batch_on_this_hybrid_cp_rank(
+    batch: Dict[str, Any],
+    local_cp_size: int,
+    cp_group: Optional[torch.distributed.ProcessGroup] = None,
+):
+    """Slice batch input along sequence dimension into multiple chunks,
+    which are parallelized across GPUs in a context parallel group.
+    """
+    assert local_cp_size is not None
+    if cp_group is None:
+        # Get the local cp group required for as defined by the HybridCPDataLoaderWrapper
+        if local_cp_size > 1:
+            cp_group = parallel_state.get_hybrid_data_context_parallel_groups(
+                group_size=local_cp_size
+            )
+    else:
+        # If cp group is provided, it must match the local cp size
+        # as defined by the HybridCPDataLoaderWrapper
+        assert cp_group.size() == local_cp_size
+
+    # Convert [seqlen] to [1, seqlen] similar to default collate_fn
+    # as hybrid_context_parallel dataloader wrapper does not go through default collate_fn
+    for key, data in batch.items():
+        if key in ['attention_mask']:
+            continue
+        batch[key] = torch.stack([data], 0)
+    sample_length = batch['tokens'].shape[1]
+    # TODO(pmannan): Take care of padding tokens here if not divisible by cp_size*2
+    # Create packed_seq_params for SBHD format with cp group information.
+    packed_seq_params = PackedSeqParams(
+        qkv_format="sbhd",
+        cu_seqlens_q=torch.tensor([0, sample_length], device="cuda", pin_memory=True),
+        cu_seqlens_kv=torch.tensor([0, sample_length], device="cuda", pin_memory=True),
+        cu_seqlens_q_padded=torch.tensor([0, sample_length], device="cuda", pin_memory=True),
+        cu_seqlens_kv_padded=torch.tensor([0, sample_length], device="cuda", pin_memory=True),
+        max_seqlen_q=sample_length,
+        max_seqlen_kv=sample_length,
+        local_cp_size=local_cp_size,
+        cp_group=cp_group,
+    )
+
+    if cp_group is not None and cp_group.size() > 1:
+        # When using hybrid_context_parallel, each sub-sample of a packed sample is
+        # required to be divisible by CP*DP*2 or CP*DP*TP*2 (if using sequence parallel)
+        batch = get_batch_on_this_cp_rank(batch, cp_group)
+
+    return batch, packed_seq_params
+
+
 ######################
 ### NVTX profiling ###
 ######################
diff --git a/megatron/legacy/data/data_samplers.py b/megatron/legacy/data/data_samplers.py
index 1bf1bf5ee91..79bdc7b193f 100644
--- a/megatron/legacy/data/data_samplers.py
+++ b/megatron/legacy/data/data_samplers.py
@@ -34,13 +34,22 @@ def build_pretraining_data_loader(dataset, consumed_samples):
             data_parallel_rank=mpu.get_data_parallel_rank(),
             data_parallel_size=mpu.get_data_parallel_world_size())
     elif args.dataloader_type == 'single':
-        # Megatron sampler
-        batch_sampler = MegatronPretrainingSampler(
-            total_samples=len(dataset),
-            consumed_samples=consumed_samples,
-            micro_batch_size=args.micro_batch_size,
-            data_parallel_rank=mpu.get_data_parallel_rank(),
-            data_parallel_size=mpu.get_data_parallel_world_size())
+        if args.hybrid_context_parallel:
+            batch_sampler = HybridCPMegatronPretrainingSampler(
+                total_samples=len(dataset),
+                consumed_samples=consumed_samples,
+                micro_batch_size=args.micro_batch_size,
+                global_batch_size=args.global_batch_size,
+                data_parallel_rank=mpu.get_data_parallel_rank(),
+                data_parallel_size=mpu.get_data_parallel_world_size())
+        else:
+            # Megatron sampler
+            batch_sampler = MegatronPretrainingSampler(
+                total_samples=len(dataset),
+                consumed_samples=consumed_samples,
+                micro_batch_size=args.micro_batch_size,
+                data_parallel_rank=mpu.get_data_parallel_rank(),
+                data_parallel_size=mpu.get_data_parallel_world_size())
     elif args.dataloader_type == 'cyclic':
         batch_sampler = MegatronPretrainingRandomSampler(
             dataset,
@@ -59,11 +68,16 @@ def build_pretraining_data_loader(dataset, consumed_samples):
                 args.dataloader_type))
 
     # Torch dataloader.
+    if args.hybrid_context_parallel:
+        extra_kwargs = {"collate_fn": lambda x: x,}
+    else:
+        extra_kwargs = {}
     return torch.utils.data.DataLoader(dataset,
                                        batch_sampler=batch_sampler,
                                        num_workers=args.num_workers,
                                        pin_memory=True,
                                        persistent_workers=True if args.num_workers > 0 else False,
+                                       **extra_kwargs,
                                        )
 
 class MegatronPretrainingSampler:
@@ -114,6 +128,49 @@ def __iter__(self):
             start_idx, end_idx = self.get_start_end_idx()
             yield batch[start_idx:end_idx]
 
+class HybridCPMegatronPretrainingSampler(MegatronPretrainingSampler):
+    """
+    Data sampler for hybrid context parallel (Hybrid CP) format.
+    This data sampler pulls in the entire global batch at once across all data parallel ranks.
+    This helps provide the Hybrid CP Dataloader Wrapper to schedule and load balance sub-samples
+    of the entire global batch.
+    """
+
+    def __init__(self, total_samples, consumed_samples, micro_batch_size, global_batch_size,
+                 data_parallel_rank, data_parallel_size, drop_last=True):
+        super().__init__(total_samples, consumed_samples, micro_batch_size, data_parallel_rank, data_parallel_size, drop_last)
+        self.global_batch_size = global_batch_size
+        self.data_parallel_size = data_parallel_size
+        self.num_micro_batches = self.global_batch_size // self.micro_batch_times_data_parallel_size
+
+    def __len__(self):
+        return self.total_samples
+
+    def get_start_end_idx_global_batch(self):
+        start_idx = [self.data_parallel_rank * self.micro_batch_size + i * self.micro_batch_size * self.data_parallel_size for i in range(self.num_micro_batches)]
+        end_idx = [start_idx[i] + self.micro_batch_size for i in range(self.num_micro_batches)]
+        return start_idx, end_idx
+
+    def __iter__(self):
+        batch = []
+        # Last batch will be dropped if drop_last is not set False
+        for idx in range(self.consumed_samples, self.total_samples):
+            batch.append(idx)
+            if len(batch) == self.micro_batch_times_data_parallel_size * self.num_micro_batches:
+                start_idx, end_idx = self.get_start_end_idx_global_batch()
+                global_batch_idx = []
+                for i in range(self.num_micro_batches):
+                    global_batch_idx.extend(batch[start_idx[i]:end_idx[i]])
+                yield global_batch_idx
+                batch = []
+
+        # Check the last partial batch and see drop_last is set
+        if len(batch) > 0 and not self.drop_last:
+            start_idx, end_idx = self.get_start_end_idx_global_batch()
+            global_batch_idx = []
+            for i in range(self.num_micro_batches):
+                global_batch_idx.extend(batch[start_idx[i]:end_idx[i]])
+            yield global_batch_idx
 
 class RandomSeedDataset(Dataset):
 
diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py
index 0cf2d006863..c413c346b69 100644
--- a/megatron/training/arguments.py
+++ b/megatron/training/arguments.py
@@ -956,6 +956,13 @@ def validate_args(args, defaults={}):
     if args.tp_comm_overlap:
         assert args.sequence_parallel == True, 'Tensor parallel communication/GEMM overlap can happen only when sequence parallelism is enabled'
 
+    if args.hybrid_context_parallel:
+        assert not args.pipeline_model_parallel_size > 1, 'Hybrid context parallelism not supported with pipeline parallelism'
+        assert not args.enable_cuda_graph, 'Hybrid context parallelism not supported with CUDA Graph'
+        assert not args.use_megatron_fsdp, 'Hybrid context parallelism not supported with Megatron FSDP'
+        assert args.dataloader_type == 'single', 'Hybrid context parallelism only supported with single dataloader type'
+        assert args.calculate_per_token_loss, 'Hybrid context parallelism must be used with --calculate-per-token-loss'
+
     # disable async_tensor_model_parallel_allreduce when
     # model parallel memory optimization is enabled
     if (args.tensor_model_parallel_size > 1 or args.context_parallel_size > 1) \
@@ -2876,6 +2883,13 @@ def _add_distributed_args(parser):
                        '--hierarchical-context-parallel-sizes 2 4 indicates every two adjacent gpus '
                        'forms the first level of cp groups and the cp ranks with the same odevity '
                        'forms the second level of cp groups.')
+    group.add_argument('--max-seqlen-per-cp-rank', type=int, default=None,
+                       help='Maximum sequence length per CP rank. This is used to calculate the '
+                       'number of sub-samples assigned to each CP rank when using heterogeneous context parallel.')
+    group.add_argument('--hybrid-context-parallel', action='store_true', default=False,
+                       help='Enables hybrid context parallel. This is used to balance the workload '
+                       'of each CP rank when we use packed samples with variable sequence lengths. '
+                       'Requires --max-seqlen-per-cp-rank to be set.')
     group.add_argument('--nccl-communicator-config-path', type=str, default=None,
                        help='Path to the yaml file with NCCL communicator '
                        'configurations. The number of min/max thread groups and thread '
diff --git a/megatron/training/initialize.py b/megatron/training/initialize.py
index 8b585fdd87b..fb9a3aa273b 100644
--- a/megatron/training/initialize.py
+++ b/megatron/training/initialize.py
@@ -369,6 +369,7 @@ def _initialize_distributed(get_embedding_ranks, get_position_embedding_ranks, s
                 use_sharp=args.use_sharp,
                 context_parallel_size=args.context_parallel_size,
                 hierarchical_context_parallel_sizes=args.hierarchical_context_parallel_sizes,
+                hybrid_context_parallel=args.hybrid_context_parallel,
                 expert_model_parallel_size=args.expert_model_parallel_size,
                 num_distributed_optimizer_instances=args.num_distributed_optimizer_instances,
                 expert_tensor_parallel_size=args.expert_tensor_parallel_size,
diff --git a/megatron/training/training.py b/megatron/training/training.py
index 99fbd453426..a732e3917e5 100644
--- a/megatron/training/training.py
+++ b/megatron/training/training.py
@@ -90,6 +90,7 @@
 from megatron.training.initialize import set_jit_fusion_options
 from megatron.training.utils import get_batch_on_this_cp_rank, get_batch_on_this_tp_rank
 from megatron.legacy.data.data_samplers import build_pretraining_data_loader
+from megatron.core.datasets.data_schedule import HybridCPDataLoaderWrapper
 from megatron.core.optimizer_param_scheduler import OptimizerParamScheduler
 from megatron.core.transformer.moe import upcycling_utils
 from megatron.core.transformer.moe.moe_utils import track_moe_metrics
@@ -1451,28 +1452,14 @@ def train_step(forward_step_func, data_iterator, model, optimizer, opt_param_sch
         for key in losses_reduced[0].keys():
             val = [x[key].view(-1) for x in losses_reduced]
             if val[0].numel() == 2:
-                if args.sft:
-                    # in mcore the normalization happens on micro batch instead of global
-                    val = torch.vstack(val)
-                    val = val[:, 0] / val[:, 1]
-                    val = val.mean()
-                    torch.distributed.all_reduce(
-                        val,
-                        group=mpu.get_data_parallel_group(with_context_parallel=True)
-                    )
-                    val /= torch.distributed.get_world_size(
-                        group=mpu.get_data_parallel_group(with_context_parallel=True)
-                    )
-                    loss_reduced[key] = val
-                else:
-                    # there is one dict per microbatch. in new reporting, we average
-                    # over the total number of tokens across the global batch.
-                    val = torch.vstack(val).sum(dim=0)
-                    torch.distributed.all_reduce(
-                        val,
-                        group=mpu.get_data_parallel_group(with_context_parallel=True)
-                    )
-                    loss_reduced[key] = val[0] / val[1]
+                # there is one dict per microbatch. in new reporting, we average
+                # over the total number of tokens across the global batch.
+                val = torch.vstack(val).sum(dim=0)
+                torch.distributed.all_reduce(
+                    val,
+                    group=mpu.get_data_parallel_group(with_context_parallel=True)
+                )
+                loss_reduced[key] = val[0] / val[1]
             elif val[0].numel() == 1:
                 # legacy behavior, we average over the number of microbatches
                 val = torch.cat(val).mean()
@@ -2173,6 +2160,9 @@ def train(
     energy_monitor = get_energy_monitor()
     one_logger = get_one_logger()
 
+    if args.hybrid_context_parallel:
+        train_data_iterator = iter(HybridCPDataLoaderWrapper(train_data_iterator, config))
+
     if args.run_workload_inspector_server:
         try:
             from workload_inspector.utils.webserver import run_server
diff --git a/megatron/training/utils.py b/megatron/training/utils.py
index 52a3bf36d88..4730a525271 100644
--- a/megatron/training/utils.py
+++ b/megatron/training/utils.py
@@ -541,19 +541,58 @@ def _broadcast(item):
                 else data["attention_mask"].cuda(non_blocking=True)
             ),
             'position_ids': data["position_ids"].cuda(non_blocking=True),
+            'cu_seqlens': (
+                None
+                if "cu_seqlens" not in data
+                else data["cu_seqlens"].cuda(non_blocking=True)
+            ),
+            'max_seqlen': (
+                None
+                if "max_seqlen" not in data
+                else data["max_seqlen"].cuda(non_blocking=True)
+            ),
+            'local_cp_size': (
+                None
+                if "local_cp_size" not in data
+                else data["local_cp_size"].cuda(non_blocking=True)
+            ),
         }
 
+        def _broadcast_cu_seqlens(cu_seqlens):
+            dev = torch.cuda.current_device()
+            n = 0 if cu_seqlens is None else int(cu_seqlens.numel())
+            n_tensor = torch.tensor(n, dtype=torch.int64, device=dev)
+            _broadcast(n_tensor)
+
+            if n == 0:
+                buf = torch.empty(0, dtype=torch.int32, device=dev)
+            else:
+                assert isinstance(cu_seqlens, torch.Tensor)
+                assert cu_seqlens.dtype == torch.int32
+                assert cu_seqlens.shape[0] == 1, "micro-batch-size must be 1 for packing"
+                buf = cu_seqlens.to(device=dev, non_blocking=True).contiguous()
+            _broadcast(buf)
+
+        if args.hybrid_context_parallel:
+            seq_len = torch.tensor(batch['tokens'].shape[0], dtype=torch.int32, device=torch.cuda.current_device())
+            _broadcast(seq_len)
+            
         if args.pipeline_model_parallel_size == 1 or mtp_on_this_rank:
             _broadcast(batch['tokens'])
             _broadcast(batch['labels'])
             _broadcast(batch['loss_mask'])
             _broadcast(batch['attention_mask'])
             _broadcast(batch['position_ids'])
+            _broadcast_cu_seqlens(batch['cu_seqlens'])
+            _broadcast(batch['max_seqlen'])
+            _broadcast(batch['local_cp_size'])
 
         elif mpu.is_pipeline_first_stage():
             _broadcast(batch['tokens'])
             _broadcast(batch['attention_mask'])
             _broadcast(batch['position_ids'])
+            _broadcast_cu_seqlens(batch['cu_seqlens'])
+            _broadcast(batch['max_seqlen'])
 
         elif mpu.is_pipeline_last_stage():
             # Multi-Token Prediction (MTP) layers need tokens and position_ids to calculate embedding.
@@ -564,42 +603,79 @@ def _broadcast(item):
             _broadcast(batch['attention_mask'])
 
     else:
-
+        if args.hybrid_context_parallel:
+            seq_len = torch.tensor(0, dtype=torch.int32, device=torch.cuda.current_device())
+            _broadcast(seq_len)
+            shape = (seq_len.item())
+        else:
+            shape = (args.micro_batch_size, args.seq_length)
+            
         tokens = torch.empty(
-            (args.micro_batch_size, args.seq_length),
+            shape,
             dtype=torch.int64,
             device=torch.cuda.current_device(),
         )
         labels = torch.empty(
-            (args.micro_batch_size, args.seq_length),
+            shape,
             dtype=torch.int64,
             device=torch.cuda.current_device(),
         )
         loss_mask = torch.empty(
-            (args.micro_batch_size, args.seq_length),
+            shape,
             dtype=torch.float32,
             device=torch.cuda.current_device(),
         )
         if args.create_attention_mask_in_dataloader:
+            shape_attention_mask = (args.micro_batch_size, 1, args.seq_length, args.seq_length) if not args.hybrid_context_parallel else (1, 1, shape[0], shape[0])
             attention_mask = torch.empty(
-                (args.micro_batch_size, 1, args.seq_length, args.seq_length),
+                shape_attention_mask,
                 dtype=torch.bool,
                 device=torch.cuda.current_device(),
             )
         else:
             attention_mask = None
         position_ids = torch.empty(
-            (args.micro_batch_size, args.seq_length),
+            shape,
             dtype=torch.int64,
             device=torch.cuda.current_device(),
         )
 
+        cu_seqlens = None
+        max_seqlen = torch.empty(
+            1,
+            dtype=torch.int32,
+            device=torch.cuda.current_device(),
+        ) if args.hybrid_context_parallel else None
+        local_cp_size = torch.empty(
+            1,
+            dtype=torch.int32,
+            device=torch.cuda.current_device(),
+        ) if args.hybrid_context_parallel else None
+
+        def _broadcast_cu_seqlens():
+            dev = torch.cuda.current_device()
+
+            n = torch.empty((), dtype=torch.int64, device=dev)
+            _broadcast(n)
+            n = int(n.item())
+
+            if n == 0:
+                cu_seqlens = torch.empty(0, dtype=torch.int32, device=dev)
+            else:
+                cu_seqlens = torch.empty((args.micro_batch_size, n), dtype=torch.int32, device=dev)
+            _broadcast(cu_seqlens)
+
+            return cu_seqlens if n > 0 else None
+
         if args.pipeline_model_parallel_size == 1 or mtp_on_this_rank:
             _broadcast(tokens)
             _broadcast(labels)
             _broadcast(loss_mask)
             _broadcast(attention_mask)
             _broadcast(position_ids)
+            cu_seqlens = _broadcast_cu_seqlens()
+            _broadcast(max_seqlen)
+            _broadcast(local_cp_size)
 
         elif mpu.is_pipeline_first_stage():
             labels = None
@@ -608,6 +684,8 @@ def _broadcast(item):
             _broadcast(tokens)
             _broadcast(attention_mask)
             _broadcast(position_ids)
+            cu_seqlens = _broadcast_cu_seqlens()
+            _broadcast(max_seqlen)
 
         elif mpu.is_pipeline_last_stage():
             # Multi-Token Prediction (MTP) layers need tokens and position_ids to calculate embedding.
@@ -615,7 +693,8 @@ def _broadcast(item):
             # to broadcast tokens and position_ids to all of the tensor parallel ranks on the last stage.
             tokens = None
             position_ids = None
-
+            cu_seqlens = None
+            max_seqlen = None
             _broadcast(labels)
             _broadcast(loss_mask)
             _broadcast(attention_mask)
@@ -626,6 +705,9 @@ def _broadcast(item):
             'loss_mask': loss_mask,
             'attention_mask': attention_mask,
             'position_ids': position_ids,
+            'cu_seqlens': cu_seqlens,
+            'max_seqlen': max_seqlen,
+            'local_cp_size': local_cp_size,
         }
 
     return batch
diff --git a/pretrain_gpt.py b/pretrain_gpt.py
index ecb7163ff70..e976f5aff79 100644
--- a/pretrain_gpt.py
+++ b/pretrain_gpt.py
@@ -14,9 +14,9 @@
 from megatron.core.enums import ModelType
 from megatron.core.models.gpt import GPTModel
 from megatron.core.rerun_state_machine import get_rerun_state_machine
+from megatron.core.utils import get_attr_wrapped_model, get_thd_batch_on_this_cp_rank, get_batch_on_this_hybrid_cp_rank, StragglerDetector
 from megatron.core.tokenizers.text.utils.build_tokenizer import build_tokenizer
 from megatron.core.transformer.multi_token_prediction import mtp_on_this_rank, get_mtp_ranks
-from megatron.core.utils import StragglerDetector, get_attr_wrapped_model
 from megatron.training.arguments import core_transformer_config_from_args
 from megatron.training import get_args, get_timers, get_tokenizer, inprocess_restart, pretrain, print_rank_0
 from megatron.training.datasets.sft_dataset import SFTDataset
@@ -46,7 +46,7 @@ def get_batch(data_iterator, vp_stage: Optional[int] = None):
     # TODO: this is pretty hacky, find a better way
     if not is_first_or_last_pipeline_stage(vp_stage) and (
     (not mtp_on_this_rank(config, ignore_virtual=False, vp_stage=vp_stage))):
-        return None, None, None, None, None
+        return None, None, None, None, None, None
 
     # get batches based on the TP rank you are on
     batch = get_batch_on_this_tp_rank(
@@ -54,10 +54,24 @@ def get_batch(data_iterator, vp_stage: Optional[int] = None):
         mtp_on_this_rank=mtp_on_this_rank(config, ignore_virtual=False, vp_stage=vp_stage)
         )
 
-    # slice batch along sequence dimension for context parallelism
-    batch = get_batch_on_this_cp_rank(batch)
-
-    return batch.values()
+    cu_seqlens = batch.pop('cu_seqlens', None)
+    cu_seqlens_padded = batch.pop('cu_seqlens_padded', None)
+    max_seqlen = batch.pop('max_seqlen', None)
+    local_cp_size = batch.pop('local_cp_size', None)
+    if local_cp_size is not None:
+        local_cp_size = int(local_cp_size.item())
+
+    if cu_seqlens is None and local_cp_size is None:
+        # slice batch along sequence dimension for context parallelism
+        batch = get_batch_on_this_cp_rank(batch)  # The implementation of this function is in MCore
+        packed_seq_params = None
+    elif local_cp_size is None:  # Packed THD format
+        assert max_seqlen.dim() == 1
+        batch, packed_seq_params = get_thd_batch_on_this_cp_rank(batch, cu_seqlens, cu_seqlens_padded, max_seqlen)
+    else: # Hybrid CP format
+        batch, packed_seq_params = get_batch_on_this_hybrid_cp_rank(batch, local_cp_size)
+    
+    return (*batch.values(), packed_seq_params)
 
 
 # define spiky loss as a loss that's 10x the max loss observed
@@ -142,7 +156,7 @@ def forward_step(data_iterator, model: GPTModel, return_schedule_plan: bool = Fa
     global stimer
     with stimer(bdata=True):
         vp_stage = get_attr_wrapped_model(model, "vp_stage")
-        tokens, labels, loss_mask, attention_mask, position_ids = get_batch(data_iterator, vp_stage)
+        tokens, labels, loss_mask, attention_mask, position_ids, packed_seq_params = get_batch(data_iterator, vp_stage)
     timers('batch-generator').stop()
 
     with stimer:
@@ -158,7 +172,7 @@ def forward_step(data_iterator, model: GPTModel, return_schedule_plan: bool = Fa
                 return schedule_plan, partial(loss_func, loss_mask, model=model)
             else:
                 output_tensor = model(
-                    tokens, position_ids, attention_mask, labels=labels, loss_mask=loss_mask
+                    tokens, position_ids, attention_mask, labels=labels, loss_mask=loss_mask, packed_seq_params=packed_seq_params
                 )
 
     # [ModelOpt]: model is needed to access ModelOpt distillation losses
@@ -204,6 +218,10 @@ def core_gpt_dataset_config_from_args(args):
         object_storage_cache_path=args.object_storage_cache_path,
         mid_level_dataset_surplus=args.mid_level_dataset_surplus,
         allow_ambiguous_pad_tokens=args.allow_ambiguous_pad_tokens,
+        context_parallel_size=args.context_parallel_size,
+        data_parallel_size=args.data_parallel_size,
+        sequence_parallel_size=args.tensor_model_parallel_size*args.sequence_parallel,
+        hybrid_context_parallel=args.hybrid_context_parallel,
     )
 
 
diff --git a/pretrain_mamba.py b/pretrain_mamba.py
index 45b646a6cc0..ca2008620be 100644
--- a/pretrain_mamba.py
+++ b/pretrain_mamba.py
@@ -44,6 +44,13 @@ def get_batch(data_iterator, vp_stage=None):
 
     # get batches based on the TP rank you are on
     batch = get_batch_on_this_tp_rank(data_iterator)
+    
+    # Support for Packed Sequence (Unused in this script)
+    cu_seqlens = batch.pop('cu_seqlens', None)
+    cu_seqlens_padded = batch.pop('cu_seqlens_padded', None)
+    max_seqlen = batch.pop('max_seqlen', None)
+    # Support for Hybrid Context Parallel (Unused in this script)
+    local_cp_size = batch.pop('local_cp_size', None)
 
     # slice batch along sequence dimension for context parallelism
     batch = get_batch_on_this_cp_rank(batch)
diff --git a/tests/unit_tests/test_parallel_state.py b/tests/unit_tests/test_parallel_state.py
index 7218ed5b6e1..0c722ee0257 100644
--- a/tests/unit_tests/test_parallel_state.py
+++ b/tests/unit_tests/test_parallel_state.py
@@ -1,5 +1,7 @@
 # Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
 
+from math import log2
+
 import pytest
 import torch
 
@@ -499,3 +501,32 @@ def golden_rank_result_from_past_code(
     assert expert_dp_group == expert_rank_generator.get_ranks(
         "dp"
     ), f"{expert_dp_group} != {expert_rank_generator.get_ranks('dp')}."
+
+
+@pytest.mark.parametrize(
+    "world_size, tp_size, cp_size, dp_size",
+    [(8, 1, 2, 4), (8, 1, 1, 8)],  # 8 GPUs, 1 TP, 2 CP, 4 DP  # 8 GPUs, 1 TP, 1 CP, 8 DP
+)
+def test_hybrid_dp_cp_groups(world_size, tp_size, cp_size, dp_size):
+    """
+    Test that hybrid DPxCP groups are created correctly.
+    """
+    Utils.destroy_model_parallel()
+
+    # Skip if world size doesn't match
+    actual_world_size = torch.cuda.device_count()
+    if actual_world_size != world_size:
+        pytest.skip(f"Test requires world_size={world_size}, but got {actual_world_size}")
+    Utils.initialize_model_parallel(
+        tensor_model_parallel_size=tp_size,
+        context_parallel_size=cp_size,
+        hybrid_context_parallel=True,
+    )
+
+    dp_cp_size = ps.get_data_parallel_world_size(with_context_parallel=True)
+    group_sizes = [2**i for i in range(int(log2(dp_cp_size)))][1:]
+    for group_size in group_sizes:
+        group = ps.get_hybrid_data_context_parallel_groups(group_size=group_size)
+        assert group.size() == group_size
+
+    Utils.destroy_model_parallel()

From 87ac13dbe71bfbf88dff81f6cfe87f9dcf8a88db Mon Sep 17 00:00:00 2001
From: Pablo Garay <pagaray@nvidia.com>
Date: Thu, 4 Dec 2025 15:57:03 -0800
Subject: [PATCH 178/334] update API compat check baseline to 274e04d (#2548)

Signed-off-by: Pablo Garay <pagaray@nvidia.com>
---
 .../workflows/check_api_backwards_compatibility_workflow.yml    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/check_api_backwards_compatibility_workflow.yml b/.github/workflows/check_api_backwards_compatibility_workflow.yml
index 002a18194a3..0ccaa8ccc5e 100644
--- a/.github/workflows/check_api_backwards_compatibility_workflow.yml
+++ b/.github/workflows/check_api_backwards_compatibility_workflow.yml
@@ -91,7 +91,7 @@ jobs:
       # Default baseline for automatic PR checks
       # Can be: branch name (e.g., 'main'), commit hash, or tag
       # Will be resolved to commit hash during execution
-      DEFAULT_BASELINE: 'b0f5746735a965e67852d936a8fd0ef8928e9a81'
+      DEFAULT_BASELINE: '274e04d21fbcb7f53f63de992ee1217f275f1cf2'
       # Tag pattern for auto-detection (e.g., 'core_r*', 'core_v*')
       TAG_PATTERN: 'core_v*'
       # Tag regex filter (e.g., '^core_v[0-9]+\.[0-9]+\.[0-9]+$' for stable versions only)

From f0c1b55eee7dd9dd208d6b0c7b33a45dc1e9cba8 Mon Sep 17 00:00:00 2001
From: Pablo Garay <pagaray@nvidia.com>
Date: Thu, 4 Dec 2025 16:35:46 -0800
Subject: [PATCH 179/334] feat: mcore trigger mbridge (#2340) (#2552)

Signed-off-by: Pablo Garay <pagaray@nvidia.com>
---
 .github/workflows/trigger-mbridge-tests.yml | 183 ++++++++++++++++++++
 1 file changed, 183 insertions(+)
 create mode 100644 .github/workflows/trigger-mbridge-tests.yml

diff --git a/.github/workflows/trigger-mbridge-tests.yml b/.github/workflows/trigger-mbridge-tests.yml
new file mode 100644
index 00000000000..b1a3aa0089d
--- /dev/null
+++ b/.github/workflows/trigger-mbridge-tests.yml
@@ -0,0 +1,183 @@
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+name: Trigger MBridge Tests
+# Remote testing of MBridge from MCore
+# Triggers MBridge CI tests with current MCore commit to verify backward compatibility
+
+on:
+  # Manual trigger only
+  workflow_dispatch:
+    inputs:
+      mbridge_ref:
+        description: 'MBridge branch/ref to trigger'
+        required: false
+        type: string
+        default: 'main'
+      run_cicd_main:
+        description: 'Run cicd-main.yml (full CI/CD)'
+        required: false
+        type: boolean
+        default: true
+      run_install_test:
+        description: 'Run install-test.yml (quick install check)'
+        required: false
+        type: boolean
+        default: true
+      test_suite:
+        description: 'Test suite to run (for cicd-main)'
+        required: false
+        type: choice
+        options:
+          - 'all'
+          - 'unit-only'
+          - 'functional-only'
+        default: 'all'
+
+jobs:
+  # First job: Get MCore commit info (shared by all matrix jobs)
+  get-mcore-info:
+    runs-on: ubuntu-latest
+    outputs:
+      sha: ${{ steps.mcore_info.outputs.sha }}
+      short_sha: ${{ steps.mcore_info.outputs.short_sha }}
+      branch: ${{ steps.mcore_info.outputs.branch }}
+      repo_url: ${{ steps.mcore_info.outputs.repo_url }}
+    steps:
+      - name: Checkout MCore
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+      
+      - name: Get MCore commit info
+        id: mcore_info
+        run: |
+          echo "sha=$(git rev-parse HEAD)" >> $GITHUB_OUTPUT
+          echo "short_sha=$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT
+          echo "branch=${GITHUB_REF#refs/heads/}" >> $GITHUB_OUTPUT
+          
+          # Get repo URL from origin remote, fallback to constructing from github context
+          REPO_URL=$(git remote get-url origin 2>/dev/null || echo "${{ github.server_url }}/${{ github.repository }}.git")
+          echo "repo_url=${REPO_URL}" >> $GITHUB_OUTPUT
+
+          echo "📦 MCore commit: $(git rev-parse --short HEAD)"
+          echo "🌿 Branch: ${GITHUB_REF#refs/heads/}"
+          echo "📍 Repo: ${REPO_URL}"
+
+  # Matrix job: Trigger and monitor MBridge workflows in parallel
+  trigger-and-monitor:
+    needs: [get-mcore-info]
+    runs-on: ubuntu-latest
+    continue-on-error: true  # Don't fail workflow if monitoring times out
+    strategy:
+      fail-fast: false  # Continue other matrix jobs even if one fails
+      matrix:
+        include:
+          - workflow: install-test.yml
+            name: Install Test
+          - workflow: cicd-main.yml
+            name: CI/CD Main
+    
+    name: ${{ matrix.name }}
+    
+    steps:
+      - name: Check if workflow should run
+        id: should_run
+        run: |
+          if [[ "${{ matrix.workflow }}" == "install-test.yml" && "${{ inputs.run_install_test }}" == "true" ]]; then
+            echo "run=true" >> $GITHUB_OUTPUT
+          elif [[ "${{ matrix.workflow }}" == "cicd-main.yml" && "${{ inputs.run_cicd_main }}" == "true" ]]; then
+            echo "run=true" >> $GITHUB_OUTPUT
+          else
+            echo "run=false" >> $GITHUB_OUTPUT
+            echo "⏭️ Skipping ${{ matrix.workflow }} (not enabled)"
+          fi
+      
+      - name: Trigger ${{ matrix.workflow }}
+        if: steps.should_run.outputs.run == 'true'
+        id: trigger
+        env:
+          GH_TOKEN: ${{ secrets.PAT }}
+        run: |
+          echo "🚀 Triggering ${{ matrix.workflow }} | MCore: ${{ needs.get-mcore-info.outputs.short_sha }} | MBridge: ${{ inputs.mbridge_ref }}"
+          
+          gh workflow run ${{ matrix.workflow }} \
+            --repo NVIDIA-NeMo/Megatron-Bridge --ref ${{ inputs.mbridge_ref }} \
+            --field mcore_commit=${{ needs.get-mcore-info.outputs.sha }} \
+            --field mcore_branch=${{ needs.get-mcore-info.outputs.branch }} \
+            --field mcore_repo=${{ needs.get-mcore-info.outputs.repo_url }} \
+            --field test_suite=${{ inputs.test_suite }} \
+            --field triggered_by=mcore-ci
+      
+      - name: Get run ID
+        if: steps.should_run.outputs.run == 'true'
+        id: get_run_id
+        env:
+          GH_TOKEN: ${{ secrets.PAT }}
+        run: |
+          sleep 10  # Wait for run to appear
+          RUN_ID=$(gh run list \
+            --repo NVIDIA-NeMo/Megatron-Bridge \
+            --workflow=${{ matrix.workflow }} \
+            --limit 5 \
+            --json databaseId,createdAt \
+            --jq "sort_by(.createdAt) | reverse | .[0] | .databaseId")
+          
+          echo "run_id=${RUN_ID}" >> $GITHUB_OUTPUT
+          echo "📋 Run ID: ${RUN_ID}"
+          
+          cat >> $GITHUB_STEP_SUMMARY << EOF
+          ## 🔄 ${{ matrix.name }} Triggered
+          
+          **MCore:** \`${{ needs.get-mcore-info.outputs.short_sha }}\` | **MBridge:** \`${{ inputs.mbridge_ref }}\` | **Suite:** \`${{ inputs.test_suite }}\`
+          
+          - 🔄 [${{ matrix.workflow }}](https://github.com/NVIDIA-NeMo/Megatron-Bridge/actions/runs/${RUN_ID}) - Running...
+          - ⏳ Monitoring every 5 minutes until completion
+          
+          > **Note:** Tests run without approval when triggered from MCore
+          EOF
+      
+      - name: Monitor workflow
+        if: steps.should_run.outputs.run == 'true'
+        id: monitor
+        continue-on-error: true
+        env:
+          GH_TOKEN: ${{ secrets.PAT }}
+        run: |
+          RUN_ID="${{ steps.get_run_id.outputs.run_id }}"
+          echo "📊 Monitoring ${{ matrix.workflow }} (Run ID: ${RUN_ID})"
+          
+          gh run watch ${RUN_ID} --repo NVIDIA-NeMo/Megatron-Bridge --exit-status
+          
+          CONCLUSION=$(gh run view ${RUN_ID} --repo NVIDIA-NeMo/Megatron-Bridge --json conclusion --jq -r .conclusion)
+          echo "workflow_status=${CONCLUSION}" >> $GITHUB_ENV
+          echo "✅ Completed: ${CONCLUSION}"
+      
+      - name: Report results
+        if: always() && steps.should_run.outputs.run == 'true'
+        run: |
+          CONCLUSION="${{ env.workflow_status || 'unknown' }}"
+          RUN_ID="${{ steps.get_run_id.outputs.run_id }}"
+          
+          case "$CONCLUSION" in
+            "success") ICON="✅"; MSG="passed" ;;
+            "failure") ICON="❌"; MSG="failed"; EXIT_CODE=1 ;;
+            "cancelled") ICON="🚫"; MSG="cancelled"; EXIT_CODE=0 ;;
+            *) ICON="⏳"; MSG="still running or timed out"; EXIT_CODE=0 ;;
+          esac
+          
+          cat >> $GITHUB_STEP_SUMMARY << EOF
+          ## 📊 ${{ matrix.name }} Results
+          
+          ### ${ICON} ${{ matrix.workflow }}
+          **Status:** \`${CONCLUSION}\`
+          
+          [View full results →](https://github.com/NVIDIA-NeMo/Megatron-Bridge/actions/runs/${RUN_ID})
+          
+          ---
+          *Triggered from MCore \`${{ needs.get-mcore-info.outputs.short_sha }}\`*
+          EOF
+          
+          echo "${ICON} ${{ matrix.name }} ${MSG}"
+          exit ${EXIT_CODE:-0}
+

From 8de5a7f192d7e63b10af3677330e0f4f6e3fbb5d Mon Sep 17 00:00:00 2001
From: Robin Zhang <robinz@nvidia.com>
Date: Fri, 5 Dec 2025 09:58:26 +0800
Subject: [PATCH 180/334] [Dev] Optimize TE CUDA Graph capturing time (#2483)

Signed-off-by: Robin Zhang <robinz@nvidia.com>
---
 megatron/core/transformer/cuda_graphs.py      | 50 +++++++++++++---
 .../transformer/test_cuda_graphs.py           | 59 +++++++++++--------
 2 files changed, 74 insertions(+), 35 deletions(-)

diff --git a/megatron/core/transformer/cuda_graphs.py b/megatron/core/transformer/cuda_graphs.py
index f0fb39e6500..9f2bb2dd5f2 100644
--- a/megatron/core/transformer/cuda_graphs.py
+++ b/megatron/core/transformer/cuda_graphs.py
@@ -3,6 +3,7 @@
 import gc
 import inspect
 import logging
+import math
 import os
 import time
 from collections import defaultdict
@@ -1401,6 +1402,9 @@ def __init__(self, model, config, seq_length, micro_batch_size, optimizers=[]):
         self.optimizers = optimizers
         self.num_model_chunks = len(model)
 
+        # Number of microbatches to capture. The value will be set in _get_cuda_graph_input_data().
+        self.num_microbatches = None
+
         # Get callables with captureable layers.
         self.chunks_with_decoder = []
         self.num_layers_per_chunk = []
@@ -1536,12 +1540,12 @@ def _get_sample_arguments(self, order):
             order
         ), "num_model_chunks must match the max chunk id in order."
         assert (
-            get_num_microbatches() == len(order) // self.num_model_chunks // 2
+            self.num_microbatches == len(order) // self.num_model_chunks // 2
         ), "num_microbatches must match the number of microbatches in order."
 
         # Generate sample arguments and keyword arguments for capturing.
-        sample_args = [None] * (len(self.flattened_callables) * get_num_microbatches())
-        sample_kwargs = [None] * (len(self.flattened_callables) * get_num_microbatches())
+        sample_args = [None] * (len(self.flattened_callables) * self.num_microbatches)
+        sample_kwargs = [None] * (len(self.flattened_callables) * self.num_microbatches)
 
         rotary_pos_emb_cache = {}
 
@@ -1623,7 +1627,7 @@ def get_rotary_pos_emb(transformer_module, transformer_input):
             model_chunk_idx = abs(chunk_id) - 1
 
             if chunk_id > 0:
-                sample_start_idx = (prefix_num_layers[model_chunk_idx] * get_num_microbatches()) + (
+                sample_start_idx = (prefix_num_layers[model_chunk_idx] * self.num_microbatches) + (
                     fwd_idx[model_chunk_idx] * self.num_layers_per_chunk[model_chunk_idx]
                 )
                 fwd_sample_idx = [
@@ -1691,14 +1695,23 @@ def _get_cuda_graph_input_data(self):
             get_schedule_table,
         )
 
+        # If PP is not enabled, we only need to capture one microbatch.
+        if parallel_state.get_pipeline_model_parallel_world_size() == 1:
+            assert (
+                self.num_model_chunks == 1
+            ), "If PP is not enabled, there should be only one model chunk."
+            self.num_microbatches = 1
+        else:
+            self.num_microbatches = get_num_microbatches()
+
         _, _, num_warmup_microbatches, _ = get_pp_rank_microbatches(
-            get_num_microbatches(),
+            self.num_microbatches,
             self.num_model_chunks,
             self.config.microbatch_group_size_per_vp_stage,
             False,
         )
         schedule_table = get_schedule_table(
-            get_num_microbatches(),
+            self.num_microbatches,
             self.num_model_chunks,
             self.config.microbatch_group_size_per_vp_stage,
         )
@@ -1717,7 +1730,21 @@ def _get_cuda_graph_input_data(self):
         sample_args, sample_kwargs = self._get_sample_arguments(order)
 
         def get_make_graphed_callables_kwargs():
-            kwargs = {'num_warmup_iters': 11, 'allow_unused_input': True, '_order': order}
+            kwargs = {'allow_unused_input': True, '_order': order}
+
+            # Calculate the number of warmup iterations per layer per microbatch inside TE
+            # make_graphed_callables(). There are two rules:
+            # 1. There should be at least 1 warmup iteration per layer per microbatch inside TE
+            # make_graphed_callables().
+            # 2. There should be at least 10 warmup iterations per layer, counting the MCore warmup
+            # steps before going into this capture routine.
+            kwargs['num_warmup_iters'] = max(
+                1,
+                math.ceil(
+                    (10 - self.config.cuda_graph_warmup_steps * get_num_microbatches())
+                    / self.num_microbatches
+                ),
+            )
 
             if is_te_min_version("2.6.0"):
                 # Starting from TE 2.6.0, make_graphed_callables() accepts different number
@@ -1780,6 +1807,8 @@ def _start_capturing(self):
         torch.distributed.barrier()
         gc.collect()
         torch.cuda.empty_cache()
+        if FREEZE_GC:
+            gc.freeze()
 
         _set_capture_start()
         log_single_rank(logger, logging.INFO, f'Start CUDA Graphs capture...')
@@ -1807,6 +1836,9 @@ def _finish_capturing(self, start_time):
             optimizer.zero_grad()
         clear_aux_losses_tracker()
         reset_model_temporary_tensors(self.config, self.model)
+
+        if FREEZE_GC:
+            gc.unfreeze()
         gc.collect()
         torch.cuda.empty_cache()
 
@@ -1827,10 +1859,10 @@ def create_cudagraphs(self):
         for layers in self.callables_per_chunk:
             for layer_number, layer in enumerate(layers):
                 layer.cuda_graphs = []
-                for batch_number in range(get_num_microbatches()):
+                for batch_number in range(self.num_microbatches):
                     layer.cuda_graphs.append(
                         graphs[
-                            num_layers_accumulated * get_num_microbatches()
+                            num_layers_accumulated * self.num_microbatches
                             + batch_number * len(layers)
                             + layer_number
                         ]
diff --git a/tests/unit_tests/transformer/test_cuda_graphs.py b/tests/unit_tests/transformer/test_cuda_graphs.py
index 0eac7c28c6d..8133a3d2db0 100644
--- a/tests/unit_tests/transformer/test_cuda_graphs.py
+++ b/tests/unit_tests/transformer/test_cuda_graphs.py
@@ -742,18 +742,14 @@ def test_capture_freeze_gc(self):
         )
 
 
-# Global storage for comparing unique buffer counts across different num_microbatches
-_unique_buffer_counts = None
+# Global storage for comparing unique buffer counts across different num_microbatches, keyed by pp_size
+_unique_buffer_counts = {}
 
 
 class TestTECudaGraphHelper:
     def setup_method(self, method):
         # Initialize parallel state
         initialize_rng_tracker(use_te_rng_tracker=True, force_reset=True)
-        Utils.initialize_model_parallel(
-            tensor_model_parallel_size=1, pipeline_model_parallel_size=1
-        )
-        model_parallel_cuda_manual_seed(123)
 
     def teardown_method(self, method):
         Utils.destroy_model_parallel()
@@ -763,9 +759,14 @@ def teardown_method(self, method):
         # compare values across parametrized test runs
 
     @pytest.mark.parametrize("num_microbatches", [4, 16, 64, 256])
-    def test_get_cuda_graph_input_data(self, num_microbatches):
+    @pytest.mark.parametrize("pp_size", [1, 2, 4])
+    def test_get_cuda_graph_input_data(self, num_microbatches, pp_size):
         """Test _get_cuda_graph_input_data function in TECudaGraphHelper."""
 
+        Utils.initialize_model_parallel(
+            tensor_model_parallel_size=1, pipeline_model_parallel_size=pp_size
+        )
+
         # Set up test configuration
         seq_length = 128
         micro_batch_size = 2
@@ -794,7 +795,8 @@ def test_get_cuda_graph_input_data(self, num_microbatches):
             use_te_rng_tracker=True,
             bf16=True,
             tensor_model_parallel_size=1,
-            pipeline_model_parallel_size=1,
+            pipeline_model_parallel_size=pp_size,
+            pipeline_dtype=torch.bfloat16,
             context_parallel_size=1,
         )
 
@@ -835,7 +837,10 @@ def test_get_cuda_graph_input_data(self, num_microbatches):
 
         # Basic checks
         num_graphable_layers = len(cuda_graph_helper.flattened_callables)
-        expected_length = num_graphable_layers * num_microbatches
+        if pp_size > 1:
+            expected_length = num_graphable_layers * num_microbatches
+        else:
+            expected_length = num_graphable_layers
         assert len(sample_args) == expected_length, (
             f"sample_args length mismatch: expected {expected_length}, " f"got {len(sample_args)}"
         )
@@ -931,17 +936,17 @@ def test_get_cuda_graph_input_data(self, num_microbatches):
             f"should be <= total_entries ({total_entries})"
         )
         global _unique_buffer_counts
-        if _unique_buffer_counts is None:
-            _unique_buffer_counts = unique_buffer_count
+        if pp_size not in _unique_buffer_counts:
+            _unique_buffer_counts[pp_size] = unique_buffer_count
         else:
-            assert unique_buffer_count == _unique_buffer_counts, (
-                f"Unique buffer count mismatch: expected {_unique_buffer_counts}, "
+            assert unique_buffer_count == _unique_buffer_counts[pp_size], (
+                f"Unique buffer count mismatch: expected {_unique_buffer_counts[pp_size]}, "
                 f"got {unique_buffer_count}"
             )
 
         # Verify that buffers with the same signature can potentially be reused
         # (the actual reuse depends on the schedule, but the mechanism should work)
-        if num_microbatches > 1 and num_graphable_layers > 0:
+        if expected_length > 1:
             # Check that we have multiple entries with the same signature
             has_duplicate_signatures = any(
                 len(indices) > 1 for indices in sample_keys_to_indices.values()
@@ -955,10 +960,8 @@ def test_get_cuda_graph_input_data(self, num_microbatches):
             # some buffers should be reused (max_reuse > 1)
             # Note: The exact amount of reuse depends on the schedule order
             # With 1F1B interleaved schedule, we should see some reuse
-            if max_reuse > 1:
-                # Verify that reused buffers have the same signature
-                reused_tensors = [ptr for ptr, count in tensor_reuse_count.items() if count > 1]
-                assert len(reused_tensors) > 0, "Expected some reused tensors"
+            if pp_size > num_microbatches:
+                assert max_reuse > 1, "Expected some buffer reuse"
 
         # Verify that make_graphed_callables_kwargs contains expected keys
         assert (
@@ -974,18 +977,22 @@ def test_get_cuda_graph_input_data(self, num_microbatches):
         # Verify the order in kwargs matches expectations
         order = make_graphed_callables_kwargs['_order']
         num_model_chunks = cuda_graph_helper.num_model_chunks
-        expected_order_length = num_microbatches * num_model_chunks * 2
+        forward_count = sum(1 for chunk_id in order if chunk_id > 0)
+        if pp_size > 1:
+            # Verify that all forward passes in order have corresponding entries in sample_args
+            assert forward_count == num_microbatches * num_model_chunks, (
+                f"Forward count mismatch: expected {num_microbatches * num_model_chunks}, "
+                f"got {forward_count}"
+            )
+            expected_order_length = num_microbatches * num_model_chunks * 2
+        else:
+            assert num_model_chunks == 1, "Expected only one model chunk for pp_size == 1"
+            assert forward_count == 1, "Expected only one forward pass for pp_size == 1"
+            expected_order_length = 2
         assert (
             len(order) == expected_order_length
         ), f"Order length mismatch: expected {expected_order_length}, got {len(order)}"
 
-        # Verify that all forward passes in order have corresponding entries in sample_args
-        forward_count = sum(1 for chunk_id in order if chunk_id > 0)
-        assert forward_count == num_microbatches * num_model_chunks, (
-            f"Forward count mismatch: expected {num_microbatches * num_model_chunks}, "
-            f"got {forward_count}"
-        )
-
 
 def is_deep_ep_available():
     from megatron.core.transformer.moe.fused_a2a import HAVE_DEEP_EP

From 1f08cebac2f7e63159ad2966b3ebc6c9b7da3689 Mon Sep 17 00:00:00 2001
From: Jianbing <jianbingd@nvidia.com>
Date: Fri, 5 Dec 2025 10:21:13 +0800
Subject: [PATCH 181/334] [Dev] Feature: linear cross entropy fusion (#2256)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Jianbing Dong <jianbingd@nvidia.com>
Signed-off-by: Keval Morabia <28916987+kevalmorabia97@users.noreply.github.com>
Signed-off-by: oliver könig <okoenig@nvidia.com>
Signed-off-by: Ananth Subramaniam <ansubramania@nvidia.com>
Signed-off-by: dimapihtar <dpihtar@gmail.com>
Signed-off-by: Youngeun Kwon <youngeunk@nvidia.com>
Signed-off-by: Youngeun <kyeg9404@gmail.com>
Signed-off-by: Maanu Grover <maanug@nvidia.com>
Signed-off-by: ykarnati <ykarnati@nvidia.com>
Signed-off-by: Deepak Narayanan <dnarayanan@nvidia.com>
Signed-off-by: GitHub Actions <github-actions[bot]@users.noreply.github.com>
Signed-off-by: Charlie Truong <chtruong@nvidia.com>
Signed-off-by: Zhongbo Zhu <zhongboz@nvidia.com>
Signed-off-by: Xiaowei Ren <xren@nvidia.com>
Signed-off-by: Xin Yao <xiny@nvidia.com>
Signed-off-by: Keshav Santhanam <ksanthanam@nvidia.com>
Signed-off-by: Pablo Garay <pagaray@nvidia.com>
Signed-off-by: Asha Anoosheh <aanoosheh@nvidia.com>
Signed-off-by: Chen Cui <chcui@nvidia.com>
Signed-off-by: Li Tao <lit@nvidia.com>
Signed-off-by: lit <lit@nvidia.com>
Signed-off-by: Santosh Bhavani <santosh.bhavani@live.com>
Signed-off-by: Robin Zhang <robinz@nvidia.com>
Signed-off-by: kunlunl <kunlunl@nvidia.com>
Co-authored-by: Jianbin Chang <shjwudp@gmail.com>
Co-authored-by: Deyu Fu <Deyu.Foo@gmail.com>
Co-authored-by: Keval Morabia <28916987+kevalmorabia97@users.noreply.github.com>
Co-authored-by: Yashaswi Karnati <144376261+yashaswikarnati@users.noreply.github.com>
Co-authored-by: Jared Casper <155158+jaredcasper@users.noreply.github.com>
Co-authored-by: Antoni-Joan Solergibert <asolergibert@nvidia.com>
Co-authored-by: oliver könig <okoenig@nvidia.com>
Co-authored-by: Ananth Subramaniam <ansubramania@nvidia.com>
Co-authored-by: Teodor-Dumitru Ene <34819528+tdene@users.noreply.github.com>
Co-authored-by: Siddharth Singh <136645615+sidsingh-nvidia@users.noreply.github.com>
Co-authored-by: Mcore Bot <mcore-bot@nvidia.com>
Co-authored-by: Dmytro Pykhtar <37850217+dimapihtar@users.noreply.github.com>
Co-authored-by: Youngeun Kwon <youngeunk@nvidia.com>
Co-authored-by: Lawrence McAfee <85179052+lmcafee-nvidia@users.noreply.github.com>
Co-authored-by: Maanu Grover <109391026+maanug-nv@users.noreply.github.com>
Co-authored-by: Lawrence McAfee <lmcafee@nvidia.com>
Co-authored-by: AJ Schmidt <ajschmidt8@users.noreply.github.com>
Co-authored-by: Deepak Narayanan <2724038+deepakn94@users.noreply.github.com>
Co-authored-by: helen ngo <helenn@nvidia.com>
Co-authored-by: GitHub Actions <github-actions[bot]@users.noreply.github.com>
Co-authored-by: Aaron Gokaslan <aaronGokaslan@gmail.com>
Co-authored-by: Robert Kirby <rkirby@nvidia.com>
Co-authored-by: Teodor-Dumitru Ene <tene@nvidia.com>
Co-authored-by: yeyu-nvidia <yeyu@nvidia.com>
Co-authored-by: Abhinav Khattar <akhattar@nvidia.com>
Co-authored-by: Roger Waleffe <rwaleffe@nvidia.com>
Co-authored-by: Charlie Truong <chtruong@nvidia.com>
Co-authored-by: Tong Liu <liutongt1998@gmail.com>
Co-authored-by: Zhongbo Zhu <42691305+zhongbozhu@users.noreply.github.com>
Co-authored-by: Xiaowei Ren <xren@nvidia.com>
Co-authored-by: Xin Yao <xiny@nvidia.com>
Co-authored-by: Teodor-Dumitru Ene <teodord.ene@gmail.com>
Co-authored-by: Zijie Yan <zijiey@nvidia.com>
Co-authored-by: root <root@pool0-01101.cm.cluster>
Co-authored-by: Keshav Santhanam <ksanthanam@nvidia.com>
Co-authored-by: Pablo Garay <pagaray@nvidia.com>
Co-authored-by: Asha Anoosheh <aanoosheh@nvidia.com>
Co-authored-by: Kan Zhu <kanz@nvidia.com>
Co-authored-by: Robert Kirby <rkirby@cw-dfw-cs-001-vscode-01.cm.cluster>
Co-authored-by: Jorge Albericio <jalbericiola@nvidia.com>
Co-authored-by: Jon Barker <19699370+jon-barker@users.noreply.github.com>
Co-authored-by: Chen Cui <chcui@nvidia.com>
Co-authored-by: Pablo Garay <palenq@gmail.com>
Co-authored-by: Tong Liu <tongliu@nvidia.com>
Co-authored-by: Michael Wojcikiewicz <mwojcikiewic@nvidia.com>
Co-authored-by: Li Tao <lit@nvidia.com>
Co-authored-by: Santosh Bhavani <santosh.bhavani@live.com>
Co-authored-by: Li Ruixiao <cgruixiao@outlook.com>
Co-authored-by: Robin Zhang <robinz@nvidia.com>
Co-authored-by: Kunlun Li <94586211+kunlunl@users.noreply.github.com>
---
 .../fusions/fused_linear_cross_entropy.py     |  242 +++
 .../fusions/linear_cross_entropy/__init__.py  |    1 +
 .../blackwell/__init__.py                     |    1 +
 .../blackwell/bwd_partial_dlogits.py          |  667 ++++++++
 .../linear_cross_entropy/blackwell/entry.py   |  475 ++++++
 .../blackwell/fwd_mainloop.py                 |  693 ++++++++
 .../linear_cross_entropy/blackwell/triton.py  |  248 +++
 .../fusions/linear_cross_entropy/utils.py     |   43 +
 .../common/language_module/language_module.py |   65 +-
 megatron/core/models/gpt/gpt_model.py         |   42 +-
 megatron/core/models/mamba/mamba_model.py     |   19 +-
 megatron/training/arguments.py                |    2 +-
 .../test_fused_linear_cross_entropy.py        | 1509 +++++++++++++++++
 13 files changed, 3990 insertions(+), 17 deletions(-)
 create mode 100644 megatron/core/fusions/fused_linear_cross_entropy.py
 create mode 100644 megatron/core/fusions/linear_cross_entropy/__init__.py
 create mode 100644 megatron/core/fusions/linear_cross_entropy/blackwell/__init__.py
 create mode 100644 megatron/core/fusions/linear_cross_entropy/blackwell/bwd_partial_dlogits.py
 create mode 100644 megatron/core/fusions/linear_cross_entropy/blackwell/entry.py
 create mode 100644 megatron/core/fusions/linear_cross_entropy/blackwell/fwd_mainloop.py
 create mode 100644 megatron/core/fusions/linear_cross_entropy/blackwell/triton.py
 create mode 100644 megatron/core/fusions/linear_cross_entropy/utils.py
 create mode 100644 tests/unit_tests/fusions/test_fused_linear_cross_entropy.py

diff --git a/megatron/core/fusions/fused_linear_cross_entropy.py b/megatron/core/fusions/fused_linear_cross_entropy.py
new file mode 100644
index 00000000000..b533fef7aa3
--- /dev/null
+++ b/megatron/core/fusions/fused_linear_cross_entropy.py
@@ -0,0 +1,242 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+
+"""
+Linear Cross Entropy API
+Fuse cross entropy with linear layer.
+"""
+
+import typing
+from functools import lru_cache
+
+import torch
+
+
+class Platform:
+    """
+    Singleton class for targeted GPU platform.
+    """
+
+    _instance: typing.Optional["Platform"] = None
+
+    def __new__(cls) -> "Platform":
+        if cls._instance is None:
+            cls._instance = super().__new__(cls)
+        return cls._instance
+
+    def __init__(self) -> None:
+        if getattr(self, "_initialized", False):
+            return
+
+        assert torch.cuda.is_available(), "CUDA is not available"
+        device = torch.cuda.current_device()
+        cc = torch.cuda.get_device_capability(device)
+
+        if cc[0] == 10:
+            from .linear_cross_entropy.blackwell import entry as gpu_entry
+
+            self.forward_func: typing.Callable[..., typing.Any] = gpu_entry.forward
+            self.backward_func: typing.Callable[..., typing.Any] = gpu_entry.backward
+        else:
+            raise ValueError(f"Unsupported architecture: {cc[0]}")
+
+        self._initialized = True
+
+
+@lru_cache(maxsize=1)
+def _get_platform() -> Platform:
+    """
+    Helper function to lazy initialize the platform.
+    """
+    return Platform()
+
+
+class LinearCrossEntropy(torch.autograd.Function):
+    """
+    This class implements a custom autograd function for linear and cross entropy,
+    whose equivalent logic in PyTorch is:
+        ```python
+        def torch_entropy(hidden, weight, labels):
+            logits = torch.matmul(hidden, weight)
+            logprobs = torch.nn.functional.cross_entropy(logits, labels)
+            return logprobs
+        ```
+    """
+
+    @staticmethod
+    def forward(
+        ctx,
+        hidden: torch.Tensor,
+        weight: torch.Tensor,
+        labels: torch.Tensor,
+        tp_group: typing.Optional[torch.distributed.ProcessGroup] = None,
+        reduction: typing.Literal["none", "sum", "mean"] = "mean",
+        ignore_index: int = -100,
+        sequence_parallel: bool = False,
+    ) -> torch.Tensor:
+        """
+        The forward pass of the Linear Cross Entropy.
+        If tp_group is not None, the weight tensor to each TP rank should be
+        (global_vocab_size // world_size, dim).
+        Note that each of the ranks should get equal shards along the vocab_size dimension.
+
+        Args:
+            @param hidden: the input tensor with shape (num_tokens, dim)
+            @param weight: the lm_head weight tensor with shape (local_vocab_size, dim)
+            @param labels: the labels tensor with shape (num_tokens,)
+            @param tp_group: the distributed process group for TP.
+            @param reduction: Default to "mean", and can be one of "none", "sum", "mean".
+            @param ignore_index: The index to ignore. Default to -100.
+            @param sequence_parallel: Whether to use sequence parallel. Default to False.
+        Returns:
+            @return: logprobs with shape
+                - either (num_tokens,) when reduction is "none"
+                - or (1,) when reduction is "mean" or "sum"
+
+        tp_group is None ----------------------------------> DP
+                B
+            A   C
+        tp_group is not None & sequence_parallel is False -> TP
+                B0  B1
+            A   C0  C1
+        tp_group is not None & sequence_parallel is True --> SP
+                B0  B1
+            A0  C0  XX
+            A1  XX  C1
+
+        When tp_group is not None, the weight tensor will be split along the vocab_size
+        dimension, which means each rank will get equal shards along the global_vocab_size
+        dimension. Specifically, the weight tensor to each rank will be (local_vocab_size, dim).
+        And there is an assumption that each rank will get the same local_vocab_size.
+
+        When sequence_parallel is True, the hidden tensor will be split along the
+        sequence length dimension, which means each rank will get equal shards along
+        the sequence length dimension. Specifically, the hidden tensor to each rank
+        will be (local_num_tokens, dim). And there is an assumption that each rank
+        will get the same local_num_tokens.
+
+        In TP forward pass, the hidden tensor and label tensor shall be identical
+        among all TP ranks, and it's user's responsibility to ensure the hidden tensor
+        is identical among all TP ranks. Then this operation will produce identical
+        logprobs among all TP ranks.
+
+        In TP backward pass, the gradient of the logprobs shall be identical among all
+        TP ranks, and it's user's responsibility to ensure the gradient of the logprobs
+        is identical among all TP ranks. Then this operation will produce distinct gradients
+        for the local weight tensor, and identical gradients for the hidden tensor.
+
+        ```python
+        # ------------ forward pass ------------ #
+        hidden = tp_group.broadcast(hidden, src=0) # handled by framework
+        labels = tp_group.broadcast(labels, src=0) # handled by framework
+        logprobs = linear_cross_entropy(...)
+        # each rank will get the same logprobs
+
+        # ------------ backward pass ------------ #
+        g_logprobs = tp_group.broadcast(g_logprobs, src=0) # handled by framework
+        d_hidden, d_weight = torch.autograd.grad(...)
+        # each rank will get the same d_hidden,
+        # and distinct d_weight for local weight shard
+        ```
+
+        In SP forward pass, the hidden tensor shall be split along the sequence length dimension,
+        and the label tensor shall be identical among all TP ranks.
+        Then this operation will produce identical logprobs among all TP ranks.
+
+        In SP backward pass, the gradient of the logprobs shall be identical among all TP ranks,
+        Then this operation will produce distinct gradients for the local hidden tensor
+        and local weight tensor.
+        ```python
+        # ------------ forward pass ------------ #
+        hidden = global_hidden[tp_rank] # handled by framework
+        labels = tp_group.broadcast(labels, src=0) # handled by framework
+        logprobs = linear_cross_entropy(...)
+        # each rank will get the same logprobs
+
+        # ------------ backward pass ------------ #
+        g_logprobs = tp_group.broadcast(g_logprobs, src=0) # handled by framework
+        d_hidden, d_weight = torch.autograd.grad(...)
+        # each rank will get distinct local d_hidden and d_weight
+        ```
+        """
+        with torch.cuda.nvtx.range("LinearCrossEntropy-forward"):
+            (
+                logprobs,
+                _maximum,
+                _acc,
+                _num_valid_tokens,
+                tp_rank,
+                tp_world_size,
+                global_hidden,
+            ) = _get_platform().forward_func(
+                hidden, weight, labels, tp_group, reduction, ignore_index, sequence_parallel
+            )
+            ctx.save_for_backward(global_hidden, weight, labels, _maximum, _acc, _num_valid_tokens)
+            ctx.tp_group = tp_group
+            ctx.ignore_index = ignore_index
+            ctx.reduction = reduction
+            ctx.tp_rank = tp_rank
+            ctx.tp_world_size = tp_world_size
+            ctx.sequence_parallel = sequence_parallel
+
+        return logprobs
+
+    @staticmethod
+    def backward(
+        ctx, dlogprobs: torch.Tensor
+    ) -> typing.Tuple[torch.Tensor, torch.Tensor, None, None, None, None, None]:
+        """
+        The backward pass of the Linear Cross Entropy.
+        Args:
+            dlogprobs (torch.Tensor): The gradient of the cross entropy, with shape
+                - either (num_tokens,) when reduction is "none"
+                - or (1,) when reduction is "mean" or "sum"
+        Returns:
+            dhidden (torch.Tensor): The gradient of the hidden.
+            dweight (torch.Tensor): The gradient of the weight.
+        """
+        with torch.cuda.nvtx.range("LinearCrossEntropy-backward"):
+            (global_hidden, weight, labels, _maximum, _accu, _num_valid_tokens) = ctx.saved_tensors
+
+            tp_group = ctx.tp_group
+            ignore_index = ctx.ignore_index
+            reduction = ctx.reduction
+            tp_rank = ctx.tp_rank
+            tp_world_size = ctx.tp_world_size
+            sequence_parallel = ctx.sequence_parallel
+
+            d_hidden, d_weight = _get_platform().backward_func(
+                dlogprobs,
+                global_hidden,
+                weight,
+                labels,
+                _maximum,
+                _accu,
+                _num_valid_tokens,
+                reduction,
+                ignore_index,
+                tp_group,
+                tp_rank,
+                tp_world_size,
+                sequence_parallel,
+            )
+
+        return d_hidden, d_weight, None, None, None, None, None
+
+
+def linear_cross_entropy(
+    hidden: torch.Tensor,
+    weight: torch.Tensor,
+    labels: torch.Tensor,
+    tp_group: typing.Optional[torch.distributed.ProcessGroup] = None,
+    reduction: typing.Literal["none", "sum", "mean"] = "mean",
+    ignore_index: int = -100,
+    sequence_parallel: bool = False,
+) -> torch.Tensor:
+    """
+    helper function for linear cross entropy.
+    """
+    _impl = LinearCrossEntropy.apply
+    return _impl(hidden, weight, labels, tp_group, reduction, ignore_index, sequence_parallel)
+
+
+__all__ = ["linear_cross_entropy", "LinearCrossEntropy"]
diff --git a/megatron/core/fusions/linear_cross_entropy/__init__.py b/megatron/core/fusions/linear_cross_entropy/__init__.py
new file mode 100644
index 00000000000..b9a9591fa69
--- /dev/null
+++ b/megatron/core/fusions/linear_cross_entropy/__init__.py
@@ -0,0 +1 @@
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
diff --git a/megatron/core/fusions/linear_cross_entropy/blackwell/__init__.py b/megatron/core/fusions/linear_cross_entropy/blackwell/__init__.py
new file mode 100644
index 00000000000..b9a9591fa69
--- /dev/null
+++ b/megatron/core/fusions/linear_cross_entropy/blackwell/__init__.py
@@ -0,0 +1 @@
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
diff --git a/megatron/core/fusions/linear_cross_entropy/blackwell/bwd_partial_dlogits.py b/megatron/core/fusions/linear_cross_entropy/blackwell/bwd_partial_dlogits.py
new file mode 100644
index 00000000000..3178e8c6909
--- /dev/null
+++ b/megatron/core/fusions/linear_cross_entropy/blackwell/bwd_partial_dlogits.py
@@ -0,0 +1,667 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+
+import logging
+from typing import Optional, Tuple, Type
+
+try:
+    import cuda.bindings.driver as cuda  # type: ignore
+    import cutlass
+    import cutlass.cute as cute
+    import cutlass.pipeline as pipeline  # type: ignore
+    import cutlass.utils as utils  # type: ignore
+    import cutlass.utils.blackwell_helpers as sm100_utils  # type: ignore
+    from cutlass.cute.nvgpu import cpasync, tcgen05
+
+    SM100_TMEM_CAPACITY_COLUMNS: int = 512
+
+    def make_thread_cooperative_group(size: int, alignment: Optional[int] = None):
+        """
+        Create a thread cooperative group.
+        """
+        return pipeline.CooperativeGroup(
+            pipeline.Agent.Thread, size, alignment=alignment if alignment is not None else size
+        )
+
+    class BwdPartialDlogits:
+        """
+        This class implements the backward kernel for partial d_logits.
+        """
+
+        def __init__(
+            self,
+            reduction: int,
+            acc_dtype: Type[cutlass.Numeric] = cutlass.Float32,
+            use_2cta_instrs: bool = False,
+            mma_tiler_mn: Tuple[int, int] = (128, 256),
+            vocab_per_split: int = 512,
+        ):
+            self.REDUCTION: cutlass.Constexpr[cutlass.Int32] = cutlass.const_expr(reduction)
+            self.acc_dtype = acc_dtype
+            self.use_2cta_instrs = use_2cta_instrs
+            self.mma_tiler = (*mma_tiler_mn, 1)
+            self.vocab_per_split = vocab_per_split
+
+            self.cta_group = tcgen05.CtaGroup.TWO if self.use_2cta_instrs else tcgen05.CtaGroup.ONE
+            self.cluster_shape_mn = (2, 1) if self.use_2cta_instrs else (1, 1)
+
+            self.smem_capacity = utils.get_smem_capacity_in_bytes("sm_100")
+
+            self.threads_per_warp: int = 32
+
+            self.epi_warp_ids = (0, 1, 2, 3)
+            self.load_warp_ids = 4
+            self.mma_warp_ids = 5
+            self.empty_warp_ids = (6, 7)
+
+            self.threads_per_cta: int = self.threads_per_warp * len(
+                (*self.epi_warp_ids, self.load_warp_ids, self.mma_warp_ids, *self.empty_warp_ids)
+            )
+            self.cta_sync_barrier = pipeline.NamedBarrier(
+                barrier_id=1, num_threads=self.threads_per_cta
+            )
+
+            self.buffer_align_bytes: int = 1024
+            self.num_regs_other: int = 32
+            self.num_regs_epi: int = 192
+
+        def _compute_grid(
+            self,
+            problem_mnk: Tuple[int, int, int],
+            cluster_shape_mn: Tuple[int, int],
+            cta_tiler: Tuple[int, int, int],
+        ) -> Tuple[int, int, int]:
+            cluster_shape_mnk = (*cluster_shape_mn, 1)
+
+            grid = cute.round_up(
+                (
+                    cute.ceil_div(problem_mnk[0], cta_tiler[0]),
+                    cute.ceil_div(self.vocab_per_split, cta_tiler[1]),
+                    1,
+                ),
+                cluster_shape_mnk,
+            )
+            return grid
+
+        def _compute_stages(
+            self,
+            tiled_mma: cute.TiledMma,
+            mma_tiler: Tuple[int, int, int],
+            a_dtype: Type[cutlass.Numeric],
+            b_dtype: Type[cutlass.Numeric],
+        ):
+            num_acc_stage = 1
+            num_ab_stage = 4
+            num_epi_stage_per_tile = 4
+            return num_acc_stage, num_ab_stage, num_epi_stage_per_tile
+
+        def _setup_attributes(
+            self,
+            tiled_mma: cute.TiledMma,
+            a_dtype: Type[cutlass.Numeric],
+            b_dtype: Type[cutlass.Numeric],
+        ):
+            self.cluster_shape_mnk = (*self.cluster_shape_mn, 1)
+            self.cluster_layout_vmnk = cute.tiled_divide(
+                cute.make_layout(self.cluster_shape_mnk), (tiled_mma.thr_id.shape,)
+            )
+
+            mma_inst_shape_k = cute.size(tiled_mma.shape_mnk, mode=[2])
+            # it requires k-mode to be 128B aligned
+            mma_inst_tile_k: int = 4
+            self.mma_tiler = (
+                self.mma_tiler[0],
+                self.mma_tiler[1],
+                mma_inst_shape_k * mma_inst_tile_k,
+            )
+
+            self.num_acc_stage, self.num_ab_stage, self.num_epi_stage_per_tile = (
+                self._compute_stages(tiled_mma, self.mma_tiler, a_dtype, b_dtype)
+            )
+            self.tmem_alloc_cols = self.num_acc_stage * self.mma_tiler[1]
+            assert self.tmem_alloc_cols <= SM100_TMEM_CAPACITY_COLUMNS
+
+            self.cta_tile_shape_mnk = (
+                self.mma_tiler[0] // cute.size(tiled_mma.thr_id.shape),
+                self.mma_tiler[1],
+                self.mma_tiler[2],
+            )
+
+        @cute.kernel
+        def kernel(
+            self,
+            split_idx: cutlass.Int32,
+            tiled_mma: cute.TiledMma,
+            tma_atom_a: cute.CopyAtom,
+            mA: cute.Tensor,
+            tma_atom_b: cute.CopyAtom,
+            mB: cute.Tensor,
+            mLabels: cute.Tensor,
+            mDlogprobs: cute.Tensor,
+            mMaximum: cute.Tensor,
+            mAccu: cute.Tensor,
+            mDlogits_partial: cute.Tensor,
+            scalarNumValidTokens: cute.Pointer,
+            ignore_index: cutlass.Int64,
+            a_smem_layout_staged: cute.ComposedLayout,
+            b_smem_layout_staged: cute.ComposedLayout,
+            cluster_layout_vmnk: cute.Layout,
+            problem_mnk: Tuple[int, int, int],
+            rank: cutlass.Int32,
+        ) -> None:
+            """
+            The backward kernel for partial d_logits.
+            """
+            warp_idx = cute.arch.make_warp_uniform(cute.arch.warp_idx())
+            tidx, _, _ = cute.arch.thread_idx()
+            bidx, bidy, _ = cute.arch.block_idx()
+            # FIXME: block swizzling applied here
+            pidm, pidn = bidx, bidy
+
+            # FIXME: if 2 CTAs, modify here
+            cta_rank_in_cluster = 0
+            block_in_cluster_coord_vmnk = cluster_layout_vmnk.get_flat_coord(cta_rank_in_cluster)
+
+            # prefetch tma descriptors
+            if warp_idx == self.load_warp_ids:
+                cute.nvgpu.cpasync.prefetch_descriptor(tma_atom_a)
+                cute.nvgpu.cpasync.prefetch_descriptor(tma_atom_b)
+
+            smem = utils.SmemAllocator()
+            storage = smem.allocate(self.shared_storage)
+
+            ab_pipeline = pipeline.PipelineTmaUmma.create(
+                num_stages=self.num_ab_stage,
+                producer_group=make_thread_cooperative_group(len([self.load_warp_ids])),
+                consumer_group=make_thread_cooperative_group(len([self.mma_warp_ids])),
+                tx_count=self.tma_copy_ab_bytes,
+                barrier_storage=storage.load_ab_mbar_ptr.data_ptr(),
+            )
+            ab_producer_state = pipeline.make_pipeline_state(
+                pipeline.PipelineUserType.Producer, self.num_ab_stage
+            )
+            ab_consumer_state = pipeline.make_pipeline_state(
+                pipeline.PipelineUserType.Consumer, self.num_ab_stage
+            )
+
+            mma_pipeline = pipeline.PipelineUmmaAsync.create(
+                num_stages=self.num_acc_stage,
+                producer_group=make_thread_cooperative_group(len([self.mma_warp_ids])),
+                consumer_group=make_thread_cooperative_group(
+                    self.threads_per_warp * len(self.epi_warp_ids)
+                ),
+                barrier_storage=storage.mma_mbar_ptr.data_ptr(),
+            )
+            mma_producer_state = pipeline.make_pipeline_state(
+                pipeline.PipelineUserType.Producer, self.num_acc_stage
+            )
+            mma_consumer_state = pipeline.make_pipeline_state(
+                pipeline.PipelineUserType.Consumer, self.num_acc_stage
+            )
+
+            tmem_dealloc_mbar_ptr = storage.tmem_dealloc_mbar_ptr.data_ptr()
+            if warp_idx == self.empty_warp_ids[0]:
+                with cute.arch.elect_one():
+                    cute.arch.mbarrier_init(
+                        tmem_dealloc_mbar_ptr, self.threads_per_warp * len(self.epi_warp_ids)
+                    )
+                    cute.arch.mbarrier_init_fence()
+
+            # -------- tensor partition ------------ #
+            # swizzle o [(tileM, tileK), loopM, loopK, stage]
+            sA = storage.sA.get_tensor(
+                a_smem_layout_staged.outer, swizzle=a_smem_layout_staged.inner
+            )
+            # swizzle o [(tileN, tileK), loopN, loopK, stage]
+            sB = storage.sB.get_tensor(
+                b_smem_layout_staged.outer, swizzle=b_smem_layout_staged.inner
+            )
+
+            # FIXME: if 2 CTAs, modify here
+            thr_mma = tiled_mma.get_slice(0)
+            # [MMA, loopM, loopK, stage]
+            tCsA = thr_mma.make_fragment_A(sA)
+            # [MMA, loopN, loopK, stage]
+            tCsB = thr_mma.make_fragment_B(sB)
+
+            # [tileM, tileK, loopK]
+            gA = cute.local_tile(
+                mA, (self.cta_tile_shape_mnk[0], self.cta_tile_shape_mnk[2]), (pidm, None)
+            )
+            # [vocab_per_split, dim]
+            mB_n = cute.local_tile(
+                mB, (self.vocab_per_split, cute.size(mB.layout.shape, mode=[1])), (split_idx, 0)
+            )
+            # [tileN, tileK, loopK]
+            gB = cute.local_tile(
+                mB_n, (self.cta_tile_shape_mnk[1], self.cta_tile_shape_mnk[2]), (pidn, None)
+            )
+
+            a_cta_layout = cute.make_layout(cute.slice_(cluster_layout_vmnk, (0, 0, None, 0)).shape)
+            # just to make sure SMEM and GMEM tensor has the same size in the first rank
+            tCgA = thr_mma.partition_A(gA)
+            tCgB = thr_mma.partition_B(gB)
+            # [CPY, stage] & [CPY, loopK]
+            tTMAsA, tTMAgA = cpasync.tma_partition(
+                tma_atom_a,
+                block_in_cluster_coord_vmnk[2],  # cta_coord,
+                a_cta_layout,
+                cute.group_modes(sA, 0, 3),
+                cute.group_modes(tCgA, 0, 3),
+            )
+            b_cta_layout = cute.make_layout(cute.slice_(cluster_layout_vmnk, (0, None, 0, 0)).shape)
+            # [CPY, stage] & [CPY, loopK]
+            tTMAsB, tTMAgB = cpasync.tma_partition(
+                tma_atom_b,
+                block_in_cluster_coord_vmnk[1],  # cta_coord
+                b_cta_layout,
+                cute.group_modes(sB, 0, 3),
+                cute.group_modes(tCgB, 0, 3),
+            )
+
+            # ------ Allocate TMEM ------ #
+            tmem_holding_buf = storage.tmem_holding_buf
+            if warp_idx == self.empty_warp_ids[0]:
+                cute.arch.alloc_tmem(
+                    self.tmem_alloc_cols, tmem_holding_buf, is_two_cta=self.use_2cta_instrs
+                )
+            self.cta_sync_barrier.arrive_and_wait()
+            tmem_ptr = cute.arch.retrieve_tmem_ptr(
+                self.acc_dtype, alignment=16, ptr_to_buffer_holding_addr=tmem_holding_buf
+            )
+
+            tmem_shape = (128, self.tmem_alloc_cols)
+            acc_shape = thr_mma.partition_shape_C(tmem_shape)
+            tCtC_fake = thr_mma.make_fragment_C(acc_shape)
+            # [(tileM, tileN), loopM, loopN]
+            tCtC = cute.make_tensor(tmem_ptr, tCtC_fake.layout)
+
+            # ------ Empty ------ #
+            if warp_idx in self.empty_warp_ids:
+                cute.arch.warpgroup_reg_dealloc(self.num_regs_other)
+
+            # ------ Load ------ #
+            if warp_idx == self.load_warp_ids:
+                cute.arch.warpgroup_reg_dealloc(self.num_regs_other)
+
+                for k in cutlass.range(cute.size(gA, mode=[2])):
+                    ab_pipeline.producer_acquire(ab_producer_state)
+                    cute.copy(
+                        tma_atom_a,
+                        tTMAgA[(None, k)],
+                        tTMAsA[(None, ab_producer_state.index)],
+                        tma_bar_ptr=ab_pipeline.producer_get_barrier(ab_producer_state),
+                    )
+                    cute.copy(
+                        tma_atom_b,
+                        tTMAgB[(None, k)],
+                        tTMAsB[(None, ab_producer_state.index)],
+                        tma_bar_ptr=ab_pipeline.producer_get_barrier(ab_producer_state),
+                    )
+                    ab_pipeline.producer_commit(ab_producer_state)
+                    ab_producer_state.advance()
+
+            # ------ MMA ------ #
+            if warp_idx == self.mma_warp_ids:
+                cute.arch.warpgroup_reg_dealloc(self.num_regs_other)
+
+                tiled_mma.set(tcgen05.Field.ACCUMULATE, False)
+                mma_pipeline.producer_acquire(mma_producer_state)
+
+                for k in cutlass.range(cute.size(gA, mode=[2])):
+                    ab_pipeline.consumer_wait(ab_consumer_state)
+
+                    for kblock_idx in cutlass.range(cute.size(tCsA, mode=[2]), unroll_full=True):
+                        cute.gemm(
+                            tiled_mma,
+                            cute.append_ones(tCtC[(None, None, mma_producer_state.index)]),
+                            tCsA[(None, None, kblock_idx, ab_consumer_state.index)],
+                            tCsB[(None, None, kblock_idx, ab_consumer_state.index)],
+                            cute.append_ones(tCtC[(None, None, mma_producer_state.index)]),
+                        )
+                        tiled_mma.set(tcgen05.Field.ACCUMULATE, True)
+
+                    ab_pipeline.consumer_release(ab_consumer_state)
+                    ab_consumer_state.advance()
+
+                mma_pipeline.producer_commit(mma_producer_state)
+                mma_producer_state.advance()
+
+            # ------ EPI ------ #
+            if warp_idx in self.epi_warp_ids:
+                cute.arch.warpgroup_reg_alloc(self.num_regs_epi)
+
+                copy_atom_t2r = sm100_utils.get_tmem_load_op(
+                    self.cta_tile_shape_mnk,
+                    utils.LayoutEnum.ROW_MAJOR,
+                    self.acc_dtype,
+                    self.acc_dtype,
+                    (self.epi_tile[0], self.epi_tile[1] // self.num_epi_stage_per_tile),
+                    self.use_2cta_instrs,
+                )
+                # [tileM, subTileN, loopM, CntSubTileN, loopN]
+                tAcc_epi = cute.flat_divide(
+                    tCtC[((None, None), 0, None)],
+                    (self.epi_tile[0], self.epi_tile[1] // self.num_epi_stage_per_tile),
+                )
+                tiled_copy_t2r = tcgen05.make_tmem_copy(
+                    copy_atom_t2r, tAcc_epi[(None, None, 0, 0, 0)]
+                )
+                thr_copy_t2r = tiled_copy_t2r.get_slice(tidx)
+                tTMEM_load_tAcc = thr_copy_t2r.partition_S(tAcc_epi)
+                tTMEM_load_tAcc = cute.group_modes(
+                    tTMEM_load_tAcc, 3, cute.rank(tTMEM_load_tAcc) - 1
+                )
+
+                # predicates
+                cAcc = cute.make_identity_tensor(self.mma_tiler[:2])
+                tCcAcc = thr_mma.partition_C(cAcc)
+                tCcAcc_epi = cute.flat_divide(
+                    tCcAcc[((None, None), 0, None)],
+                    (self.epi_tile[0], self.epi_tile[1] // self.num_epi_stage_per_tile),
+                )
+                tTMEM_load_cAcc = thr_copy_t2r.partition_D(tCcAcc_epi)
+                tTMEM_load_cAcc_shape = cute.select(tTMEM_load_cAcc.shape, mode=[0, 1, 2])
+                tTMEM_load_rAcc = cute.make_fragment(tTMEM_load_cAcc_shape, self.acc_dtype)
+
+                copy_atom_g2r_int64 = cute.make_copy_atom(
+                    cute.nvgpu.CopyUniversalOp(), mLabels.element_type
+                )
+                copy_atom_g2r_fp32 = cute.make_copy_atom(
+                    cute.nvgpu.CopyUniversalOp(), mDlogprobs.element_type
+                )
+                epilogue_thread_layout = cute.make_layout((128, 1), stride=(1, 1))
+                tiled_copy_g2r_int64 = cute.make_tiled_copy_tv(
+                    copy_atom_g2r_int64, epilogue_thread_layout, cute.make_layout((1, 1))
+                )
+                tiled_copy_g2r_fp32 = cute.make_tiled_copy_tv(
+                    copy_atom_g2r_fp32, epilogue_thread_layout, cute.make_layout((1, 1))
+                )
+                thr_copy_g2r_int64 = tiled_copy_g2r_int64.get_slice(tidx)
+                thr_copy_g2r_fp32 = tiled_copy_g2r_fp32.get_slice(tidx)
+
+                # [tileM]
+                gLabels = cute.local_tile(mLabels, (self.epi_tile[0],), (pidm,))
+                gMaximum = cute.local_tile(mMaximum, (self.epi_tile[0],), (pidm,))
+                gAccu = cute.local_tile(mAccu, (self.epi_tile[0],), (pidm,))
+
+                # slice along M direction
+                tMCAcc = thr_copy_g2r_int64.partition_S(cAcc)[(None, None, 0)]
+                # [(1, 1), 1]
+                tMCAcc_mask = cute.make_fragment(tMCAcc.shape, cutlass.Boolean)
+                # to align shape with gMax and gAccu
+                tMCAcc_mask = cute.append_ones(tMCAcc_mask)
+                tMCAcc_mask[0] = cute.elem_less(
+                    pidm * self.epi_tile[0] + tidx, cute.size(mA, mode=[0])
+                )
+                # [(1, 1), 1, 1]
+                tMgLabels = thr_copy_g2r_int64.partition_S(cute.append_ones(gLabels))
+                tMrLabels = cute.make_fragment(tMgLabels.shape, tMgLabels.element_type)
+                cute.copy(tiled_copy_g2r_int64, tMgLabels, tMrLabels, pred=tMCAcc_mask)
+                tMgMaximum = thr_copy_g2r_fp32.partition_S(cute.append_ones(gMaximum))
+                tMrMaximum = cute.make_fragment(tMgMaximum.layout, tMgMaximum.element_type)
+                cute.copy(tiled_copy_g2r_fp32, tMgMaximum, tMrMaximum, pred=tMCAcc_mask)
+                tMgAccu = thr_copy_g2r_fp32.partition_S(cute.append_ones(gAccu))
+                tMrAccu = cute.make_fragment(tMgAccu.layout, tMgAccu.element_type)
+                cute.copy(tiled_copy_g2r_fp32, tMgAccu, tMrAccu, pred=tMCAcc_mask)
+
+                tMrDlogprobs = cute.make_fragment(tMgAccu.layout, mDlogprobs.element_type)
+                if cutlass.const_expr(self.REDUCTION == 2):
+                    # mean reduction
+                    num_valid_tokens = cute.make_tensor(scalarNumValidTokens, layout=(1,))
+                    tMrDlogprobs[0] = mDlogprobs[0] / num_valid_tokens[0].to(cutlass.Float32)
+                elif cutlass.const_expr(self.REDUCTION == 1):
+                    # sum reduction
+                    tMrDlogprobs[0] = mDlogprobs[0]
+                else:
+                    # no reduction
+                    gDlogprobs = cute.local_tile(mDlogprobs, (self.epi_tile[0],), (pidm,))
+                    tMgDlogprobs = thr_copy_g2r_fp32.partition_S(cute.append_ones(gDlogprobs))
+                    cute.copy(tiled_copy_g2r_fp32, tMgDlogprobs, tMrDlogprobs, pred=tMCAcc_mask)
+
+                tMrAccu[0] = cute.arch.rcp_approx(tMrAccu[0])
+                tMrDlogprobs[0] *= tMrLabels[0] != ignore_index
+                tMr_d_acc_exp_logits = tMrDlogprobs[0] * tMrAccu[0]
+
+                # ------ Partial output ------ #
+                # [tileM, tileN]
+                gDlogits_partial = cute.local_tile(
+                    mDlogits_partial, (self.epi_tile[0], self.epi_tile[1]), (pidm, pidn)
+                )
+                # blackwell supports STG.256
+                copy_atom_r2g = cute.make_copy_atom(
+                    cute.nvgpu.CopyUniversalOp(),
+                    gDlogits_partial.element_type,
+                    num_bits_per_copy=256,
+                )
+                tiled_copy_r2g = cute.make_tiled_copy_tv(
+                    copy_atom_r2g, epilogue_thread_layout, copy_atom_r2g.layout_dst_tv
+                )
+                thr_copy_r2g = tiled_copy_r2g.get_slice(tidx)
+
+                # [CPY, loopM, loopN]
+                tR2GCAcc = thr_copy_r2g.partition_S(cAcc)
+                tR2GCAcc_pred = cute.make_fragment(tR2GCAcc.shape, cutlass.Boolean)
+                for elem in cutlass.range(cute.size(tR2GCAcc_pred, mode=[0])):
+                    for row in cutlass.range(cute.size(tR2GCAcc_pred, mode=[1])):
+                        for col in cutlass.range(cute.size(tR2GCAcc_pred, mode=[2])):
+                            tR2GCAcc_pred[elem, row, col] = cute.elem_less(
+                                pidm * self.epi_tile[0] + tR2GCAcc[elem, row, col][0],
+                                problem_mnk[0],
+                            ) and cute.elem_less(
+                                split_idx * self.vocab_per_split
+                                + pidn * self.epi_tile[1]
+                                + tR2GCAcc[elem, row, col][1],
+                                problem_mnk[1],
+                            )
+
+                tR2GgDlogits = thr_copy_r2g.partition_D(gDlogits_partial)
+
+                # for type conversion
+                dLogits_half = cute.make_fragment(tTMEM_load_rAcc.shape, tR2GgDlogits.element_type)
+                dLogits_half = cute.tiled_divide(
+                    dLogits_half, (cute.size(tR2GgDlogits, mode=[0]), 1)
+                )
+                dLogits_half = cute.group_modes(dLogits_half, 2, cute.rank(dLogits_half))
+
+                mma_pipeline.consumer_wait(mma_consumer_state)
+
+                block_vocab_left_idx: cutlass.Int64 = (
+                    split_idx * self.vocab_per_split + pidn * self.epi_tile[1]
+                )
+                block_vocab_right_idx: cutlass.Int64 = min(
+                    split_idx * self.vocab_per_split + (pidn + 1) * self.epi_tile[1],
+                    min((split_idx + 1) * self.vocab_per_split, problem_mnk[1]),
+                )
+                num_n_subtiles: cutlass.Int64 = cute.ceil_div(
+                    (block_vocab_right_idx - block_vocab_left_idx),
+                    cute.size(tTMEM_load_rAcc, mode=[0]),
+                )
+                for n_subtile in cutlass.range(num_n_subtiles):
+                    cute.copy(
+                        tiled_copy_t2r,
+                        tTMEM_load_tAcc[(None, None, None, n_subtile, mma_consumer_state.index)],
+                        tTMEM_load_rAcc,
+                    )
+
+                    for idx in cutlass.range(
+                        cute.size(tTMEM_load_rAcc, mode=[0]), unroll_full=True
+                    ):
+                        # exp_logits
+                        tTMEM_load_rAcc[idx] = cute.exp(tTMEM_load_rAcc[idx] - tMrMaximum[0])
+
+                        position: cutlass.Int64 = (
+                            rank * problem_mnk[1]
+                            + split_idx * self.vocab_per_split
+                            + pidn * self.epi_tile[1]
+                            + n_subtile * cute.size(tTMEM_load_rAcc, mode=[0])
+                            + idx
+                        )
+                        mask: cutlass.Boolean = (
+                            position == tMrLabels[0] and tMrLabels[0] != ignore_index
+                        )
+                        # d_logits
+                        tTMEM_load_rAcc[idx] *= tMr_d_acc_exp_logits
+                        tTMEM_load_rAcc[idx] += mask * -tMrDlogprobs[0]
+                        dLogits_half[idx] = tTMEM_load_rAcc[idx].to(dLogits_half.element_type)
+
+                    for idx in cutlass.range(cute.size(dLogits_half, mode=[1]), unroll_full=True):
+                        copy_id = n_subtile * cute.size(dLogits_half, mode=[1]) + idx
+                        cute.copy(
+                            tiled_copy_r2g,
+                            dLogits_half[(None, idx, None)],
+                            tR2GgDlogits[(None, None, copy_id)],
+                            pred=tR2GCAcc_pred[((0, None), None, copy_id)],
+                        )
+
+                mma_pipeline.consumer_release(mma_consumer_state)
+                mma_consumer_state.advance()
+
+            # ------ Deallocate TMEM ------ #
+            self.cta_sync_barrier.arrive_and_wait()
+            if warp_idx == self.empty_warp_ids[0]:
+                cute.arch.relinquish_tmem_alloc_permit()
+                cute.arch.dealloc_tmem(
+                    tmem_ptr, self.tmem_alloc_cols, is_two_cta=self.use_2cta_instrs
+                )
+
+        @cute.jit
+        def __call__(
+            self,
+            split_idx: cutlass.Int32,
+            hidden: cute.Tensor,
+            weight: cute.Tensor,
+            labels: cute.Tensor,
+            dlogprobs: cute.Tensor,
+            maximum: cute.Tensor,
+            accu: cute.Tensor,
+            dlogits_partial: cute.Tensor,
+            scalarNumValidTokens: cute.Pointer,
+            ignore_index: cutlass.Int64,
+            rank: cutlass.Int32,
+            stream: cuda.CUstream,
+        ) -> None:
+            a_dtype: Type[cutlass.Numeric] = hidden.element_type
+            b_dtype: Type[cutlass.Numeric] = weight.element_type
+
+            if cutlass.const_expr(hidden.element_type != weight.element_type):
+                raise RuntimeError(
+                    f"data type don't match: {hidden.element_type} v.s. {weight.element_type}"
+                )
+            if cutlass.const_expr(hidden.element_type not in [cutlass.Float16, cutlass.BFloat16]):
+                raise RuntimeError("hidden can only be FP16 or BF16")
+            if cutlass.const_expr(hidden.layout.shape[1] != weight.layout.shape[1]):
+                raise RuntimeError("K dimension doesn't match")
+
+            problem_mnk = (hidden.layout.shape[0], weight.layout.shape[0], hidden.layout.shape[1])
+            if cutlass.const_expr((problem_mnk[2] * a_dtype.width // 8) % 16 != 0):
+                raise RuntimeError(f"K dimension is not 16B aligned: {problem_mnk[2]}")
+            if cutlass.const_expr((problem_mnk[2] * b_dtype.width // 8) % 128 != 0):
+                raise RuntimeError(f"N dimension is not 128B aligned: {problem_mnk[1]}")
+
+            grid = self._compute_grid(
+                problem_mnk=problem_mnk,
+                cluster_shape_mn=self.cluster_shape_mn,
+                cta_tiler=self.mma_tiler,
+            )
+
+            a_major_mode = utils.LayoutEnum.from_tensor(hidden).mma_major_mode()
+            b_major_mode = utils.LayoutEnum.from_tensor(weight).mma_major_mode()
+
+            tiled_mma = sm100_utils.make_trivial_tiled_mma(
+                a_dtype,
+                a_major_mode,
+                b_major_mode,
+                self.acc_dtype,
+                self.cta_group,
+                self.mma_tiler[:2],
+            )
+            self._setup_attributes(tiled_mma, a_dtype, b_dtype)
+
+            self.epi_tile = self.cta_tile_shape_mnk[:2]
+
+            # Swizzle o [(tileM, tileK), loopM, loopK, stage]
+            a_smem_layout_staged = sm100_utils.make_smem_layout_a(
+                tiled_mma, self.mma_tiler, a_dtype, self.num_ab_stage
+            )
+            # Swizzle o [(tileN, tileK), loopN, loopK, stage]
+            b_smem_layout_staged = sm100_utils.make_smem_layout_b(
+                tiled_mma, self.mma_tiler, b_dtype, self.num_ab_stage
+            )
+            tma_load_op = cpasync.CopyBulkTensorTileG2SOp(self.cta_group)
+            tma_store_op = cpasync.CopyBulkTensorTileS2GOp()
+
+            # Swizzle o [(tileM, tileK), loopM, loopK]
+            a_smem_layout = cute.select(a_smem_layout_staged, mode=[0, 1, 2])
+            tma_atom_a, tma_tensor_a = cute.nvgpu.make_tiled_tma_atom_A(
+                tma_load_op,
+                hidden,
+                a_smem_layout,
+                self.mma_tiler,
+                tiled_mma,
+                self.cluster_layout_vmnk.shape,
+            )
+            # Swizzle o [(tileN, tileK), loopN, loopK]
+            b_smem_layout = cute.select(b_smem_layout_staged, mode=[0, 1, 2])
+            tma_atom_b, tma_tensor_b = cute.nvgpu.make_tiled_tma_atom_B(
+                tma_load_op,
+                weight,
+                b_smem_layout,
+                self.mma_tiler,
+                tiled_mma,
+                self.cluster_layout_vmnk.shape,
+            )
+            a_copy_size = cute.size_in_bytes(a_dtype, a_smem_layout)
+            b_copy_size = cute.size_in_bytes(b_dtype, b_smem_layout)
+            self.tma_copy_ab_bytes = a_copy_size + b_copy_size
+
+            @cute.struct
+            class SharedStorage:
+                """
+                The shared storage for the backward kernel.
+                """
+
+                load_ab_mbar_ptr: cute.struct.MemRange[cutlass.Int64, self.num_ab_stage * 2]
+                mma_mbar_ptr: cute.struct.MemRange[cutlass.Int64, self.num_acc_stage * 2]
+
+                tmem_dealloc_mbar_ptr: cute.struct.MemRange[cutlass.Int64, 1]
+                tmem_holding_buf: cutlass.Int32
+
+                sA: cute.struct.Align[
+                    cute.struct.MemRange[a_dtype, cute.cosize(a_smem_layout_staged)],
+                    self.buffer_align_bytes,
+                ]
+                sB: cute.struct.Align[
+                    cute.struct.MemRange[b_dtype, cute.cosize(b_smem_layout_staged)],
+                    self.buffer_align_bytes,
+                ]
+
+            self.shared_storage = SharedStorage
+
+            self.kernel(
+                split_idx,
+                tiled_mma,
+                tma_atom_a,
+                tma_tensor_a,
+                tma_atom_b,
+                tma_tensor_b,
+                labels,
+                dlogprobs,
+                maximum,
+                accu,
+                dlogits_partial,
+                scalarNumValidTokens,
+                ignore_index,
+                a_smem_layout_staged,
+                b_smem_layout_staged,
+                self.cluster_layout_vmnk,
+                problem_mnk,
+                rank,
+            ).launch(
+                grid=grid,
+                block=[self.threads_per_cta, 1, 1],
+                cluster=self.cluster_shape_mnk,
+                stream=stream,
+            )
+
+except ImportError:
+    logging.warning("Cutlass or CUDA bindings not found. BwdPartialDlogits will not be available.")
diff --git a/megatron/core/fusions/linear_cross_entropy/blackwell/entry.py b/megatron/core/fusions/linear_cross_entropy/blackwell/entry.py
new file mode 100644
index 00000000000..dc369a7c558
--- /dev/null
+++ b/megatron/core/fusions/linear_cross_entropy/blackwell/entry.py
@@ -0,0 +1,475 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+
+import logging
+import os
+import typing
+from dataclasses import dataclass, field
+from functools import lru_cache
+
+try:
+    import cuda.bindings.driver as cuda  # type: ignore
+    import cutlass
+    import cutlass.cute as cute
+    import torch
+    import torch.distributed as dist
+    import triton  # type: ignore
+    from cutlass.cute.runtime import from_dlpack
+
+    import megatron.core.fusions.linear_cross_entropy.utils as utils
+    from megatron.core.fusions.linear_cross_entropy.blackwell import (
+        bwd_partial_dlogits as bwd_partial_dlogits,
+    )
+    from megatron.core.fusions.linear_cross_entropy.blackwell import fwd_mainloop as fwd_mainloop
+    from megatron.core.fusions.linear_cross_entropy.blackwell import triton as triton_kernels
+
+    @dataclass
+    class FwdConfig:
+        """
+        The configuration for the forward pass.
+        """
+
+        _dedicated_stream: torch.cuda.Stream = field(default_factory=torch.cuda.Stream)
+        _dedicated_events: typing.List[torch.cuda.Event] = field(default_factory=list)
+        _initialized: bool = field(default=False)
+        _fwd_mainloop_kernels: typing.Dict[str, cute.kernel] = field(default_factory=dict)
+        _vocab_per_split: int = field(
+            default=int(os.environ.get("LCE_FWD_VOCAB_SPLIT_SIZE", 512 * 6))
+        )
+
+    @dataclass
+    class BwdConfig:
+        """
+        The configuration for the backward pass.
+        """
+
+        _bwd_kernel: typing.Dict[str, cute.kernel] = field(default_factory=dict)
+        _vocab_per_split: int = field(
+            default=int(os.environ.get("LCE_BWD_VOCAB_SPLIT_SIZE", 512 * 6))
+        )
+        _backward_method: utils.BackwardMethodEnum = field(
+            default=utils.BackwardMethodEnum.kDlogitsSplitN
+        )
+
+    @lru_cache(maxsize=1)
+    def _get_fwd_config() -> FwdConfig:
+        """
+        Helper function to lazy initialize the forward configuration.
+        """
+        return FwdConfig()
+
+    @lru_cache(maxsize=1)
+    def _get_bwd_config() -> BwdConfig:
+        """
+        Helper function to lazy initialize the backward configuration.
+        """
+        return BwdConfig()
+
+    def forward(
+        hidden: torch.Tensor,
+        weight: torch.Tensor,
+        labels: torch.Tensor,
+        tp_group: typing.Optional[torch.distributed.ProcessGroup] = None,
+        reduction: typing.Literal["none", "sum", "mean"] = "mean",
+        ignore_index: int = -100,
+        sequence_parallel: bool = False,
+    ) -> typing.Tuple[
+        torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, int, int, torch.Tensor
+    ]:
+        """
+        forward host function
+        """
+        tp_rank = 0 if tp_group is None else torch.distributed.get_rank(tp_group)
+        tp_world_size = 1 if tp_group is None else torch.distributed.get_world_size(tp_group)
+        in_tp_mode = (tp_group is not None) and (tp_world_size > 1)
+
+        assert hidden.is_cuda and weight.is_cuda and labels.is_cuda
+        assert weight.device == hidden.device and labels.device == hidden.device
+
+        # hidden could be [batch, seqlen, dim] or [seqlen, batch, dim] or [tokens, dim]
+        assert hidden.dim() == 2 or hidden.dim() == 3
+        # weight must be [vocab_size, dim]
+        assert weight.dim() == 2
+        # labels could be [batch, seqlen] or [seqlen, batch] or [tokens]
+        assert (hidden.dim() == 2 and labels.dim() == 1) or (
+            hidden.dim() == 3 and labels.dim() == 2
+        )
+        assert hidden.is_contiguous() and weight.is_contiguous() and labels.is_contiguous()
+
+        hidden_view = hidden.view(-1, hidden.shape[-1])
+        labels_view = labels.view(-1)
+
+        assert (
+            sequence_parallel and hidden_view.shape[0] * tp_world_size == labels_view.shape[0]
+        ) or (not sequence_parallel and hidden_view.shape[0] == labels_view.shape[0])
+        assert hidden_view.shape[1] == weight.shape[1]
+
+        global_hidden = hidden
+        if in_tp_mode and sequence_parallel:
+            partial_hidden_shape = hidden.shape
+            global_hidden_shape = (
+                partial_hidden_shape[0] * tp_world_size,
+                *partial_hidden_shape[1:],
+            )
+            global_hidden = torch.empty(
+                global_hidden_shape, dtype=hidden.dtype, device=hidden.device
+            )
+            dist.all_gather_into_tensor(global_hidden, hidden, group=tp_group)
+            assert global_hidden.is_contiguous()
+            hidden_view = global_hidden.view(-1, global_hidden.shape[-1])
+
+        num_tokens, dim = hidden_view.shape
+        vocab_size, _ = weight.shape
+
+        if not _get_fwd_config()._initialized:
+            _get_fwd_config()._dedicated_stream = torch.cuda.Stream(hidden.device)
+            _get_fwd_config()._dedicated_events = [torch.cuda.Event() for _ in range(2)]
+            _get_fwd_config()._initialized = True
+
+        REDUCTION = utils.str_to_reduction_enum(reduction)
+        # declare logprobs
+        if REDUCTION == utils.EntropyReductionEnum.kNone:
+            logprobs = torch.empty((num_tokens,), device=hidden.device, dtype=torch.float32)
+            if in_tp_mode:
+                logprobs.zero_()
+        else:
+            logprobs = torch.zeros((), device=hidden.device, dtype=torch.float32)
+        # declare auxiliary tensors
+        maximum = torch.empty((num_tokens,), device=hidden.device, dtype=torch.float32)
+        accumulate = torch.empty_like(maximum, dtype=torch.float32)
+        num_valid_tokens = torch.empty((), device=hidden.device, dtype=torch.int64)
+        assert (
+            maximum.is_contiguous()
+            and accumulate.is_contiguous()
+            and num_valid_tokens.is_contiguous()
+        )
+        # declare intermediate tensors
+        # NOTE: this is a parameter for tuning
+        num_splits = (
+            vocab_size + _get_fwd_config()._vocab_per_split - 1
+        ) // _get_fwd_config()._vocab_per_split
+        _max = torch.empty((num_tokens, num_splits), device=hidden.device, dtype=torch.float32)
+        _accu = torch.empty((num_tokens, num_splits), device=hidden.device, dtype=torch.float32)
+        if REDUCTION == utils.EntropyReductionEnum.kNone:
+            _logprobs = logprobs
+        else:
+            _logprobs = torch.empty((num_tokens,), device=hidden.device, dtype=torch.float32)
+            if in_tp_mode:
+                _logprobs.zero_()
+        assert _max.is_contiguous() and _accu.is_contiguous() and _logprobs.is_contiguous()
+
+        triton_kernels.get_num_valid_tokens[(1,)](
+            num_tokens, ignore_index, labels_view, labels_view.stride(0), num_valid_tokens
+        )
+
+        # need to compile the kernel for the first time
+        hidden_packed = from_dlpack(
+            hidden_view.detach(), assumed_align=16
+        ).mark_compact_shape_dynamic(mode=0)
+        weight_packed = from_dlpack(weight.detach(), assumed_align=16)
+        labels_packed = from_dlpack(
+            labels_view.detach(), assumed_align=8
+        ).mark_compact_shape_dynamic(mode=0)
+        logprobs_packed = from_dlpack(_logprobs, assumed_align=16).mark_compact_shape_dynamic(
+            mode=0
+        )
+        _max_packed = from_dlpack(_max, assumed_align=8).mark_compact_shape_dynamic(
+            mode=0, stride_order=(0, 1)
+        )
+        _accu_packed = from_dlpack(_accu, assumed_align=8).mark_compact_shape_dynamic(
+            mode=0, stride_order=(0, 1)
+        )
+        cuda_stream = cuda.CUstream(torch.cuda.current_stream().cuda_stream)
+
+        # VocabSize and Dim are fixed for a given model,
+        # only the number of tokens can vary
+        key = f"vocab_size:{vocab_size}+dim:{dim}+dtype:{hidden_view.dtype}"
+        if _get_fwd_config()._fwd_mainloop_kernels.get(key) is None:
+            fwd_mainloop_kernel = fwd_mainloop.FwdMainLoop(
+                vocab_per_split=_get_fwd_config()._vocab_per_split
+            )
+            fwd_mainloop_compiled_kernel = cute.compile(
+                fwd_mainloop_kernel,
+                hidden_packed,
+                weight_packed,
+                labels_packed,
+                logprobs_packed,
+                _max_packed,
+                _accu_packed,
+                ignore_index,
+                tp_rank,
+                cuda_stream,
+            )
+            _get_fwd_config()._fwd_mainloop_kernels[key] = fwd_mainloop_compiled_kernel
+        else:
+            fwd_mainloop_compiled_kernel = _get_fwd_config()._fwd_mainloop_kernels[key]
+        fwd_mainloop_compiled_kernel(
+            hidden_packed,
+            weight_packed,
+            labels_packed,
+            logprobs_packed,
+            _max_packed,
+            _accu_packed,
+            ignore_index,
+            tp_rank,
+            cuda_stream,
+        )
+
+        if not in_tp_mode:
+
+            def grid(meta):
+                return (triton.cdiv(num_tokens, meta["BLOCK_SIZE_M"]),)
+
+            triton_kernels.forward_dp_epilogue[grid](
+                num_tokens,
+                num_splits,
+                ignore_index,
+                labels_view,
+                labels_view.stride(0),
+                num_valid_tokens,
+                _max,
+                _max.stride(0),
+                _max.stride(1),
+                _accu,
+                _accu.stride(0),
+                _accu.stride(1),
+                maximum,
+                maximum.stride(0),
+                accumulate,
+                maximum.stride(0),
+                _logprobs,
+                _logprobs.stride(0),
+                logprobs,
+                triton.language.constexpr(REDUCTION.value),
+            )
+        else:
+            _max_backup = _max.clone()
+            dist.all_reduce(_max, op=dist.ReduceOp.MAX, group=tp_group)
+
+            torch.cuda.current_stream().record_event(_get_fwd_config()._dedicated_events[0])
+            with torch.cuda.stream(_get_fwd_config()._dedicated_stream):
+                _get_fwd_config()._dedicated_stream.wait_event(
+                    _get_fwd_config()._dedicated_events[0]
+                )
+                dist.all_reduce(_logprobs, op=dist.ReduceOp.SUM, group=tp_group)
+                _get_fwd_config()._dedicated_stream.record_event(
+                    _get_fwd_config()._dedicated_events[1]
+                )
+
+            def grid(meta):
+                return (triton.cdiv(num_tokens, meta["BLOCK_SIZE_M"]),)
+
+            triton_kernels.forward_tp_epilogue[grid](
+                num_tokens,
+                num_splits,
+                _max,
+                _max.stride(0),
+                _max.stride(1),
+                _max_backup,
+                _max_backup.stride(0),
+                _max_backup.stride(1),
+                _accu,
+                _accu.stride(0),
+                _accu.stride(1),
+                maximum,
+                maximum.stride(0),
+                accumulate,
+                maximum.stride(0),
+            )
+            # reduce accumulate
+            dist.all_reduce(accumulate, op=dist.ReduceOp.SUM, group=tp_group)
+
+            # update logprobs
+            torch.cuda.current_stream().wait_event(_get_fwd_config()._dedicated_events[1])
+            triton_kernels.forward_tp_epilogue_update_logprobs[grid](
+                num_tokens,
+                ignore_index,
+                num_valid_tokens,
+                labels_view,
+                labels_view.stride(0),
+                _logprobs,
+                _logprobs.stride(0),
+                maximum,
+                maximum.stride(0),
+                accumulate,
+                accumulate.stride(0),
+                logprobs,
+                REDUCTION.value,
+            )
+
+        return (
+            logprobs,
+            maximum,
+            accumulate,
+            num_valid_tokens,
+            tp_rank,
+            tp_world_size,
+            global_hidden,
+        )
+
+    def backward(
+        dlogprobs: torch.Tensor,
+        global_hidden: torch.Tensor,
+        weight: torch.Tensor,
+        labels: torch.Tensor,
+        maximum: torch.Tensor,
+        accu: torch.Tensor,
+        num_valid_tokens: torch.Tensor,
+        reduction: typing.Literal["none", "sum", "mean"] = "mean",
+        ignore_index: int = -100,
+        tp_group: typing.Optional[dist.ProcessGroup] = None,
+        tp_rank: int = 0,
+        tp_world_size: int = 1,
+        sequence_parallel: bool = False,
+    ) -> typing.Tuple[torch.Tensor, torch.Tensor]:
+        """
+        backward host function
+        """
+        in_tp_mode = (tp_group is not None) and (tp_world_size > 1)
+
+        hidden_view = global_hidden.view(-1, global_hidden.shape[-1])
+        labels_view = labels.view(-1)
+
+        num_tokens, dim = hidden_view.shape
+        vocab_size, _ = weight.shape
+
+        REDUCTION = utils.str_to_reduction_enum(reduction)
+        dlogprobs_view = dlogprobs.view(-1)
+        assert (
+            REDUCTION == utils.EntropyReductionEnum.kNone and dlogprobs.shape == (num_tokens,)
+        ) or (REDUCTION != utils.EntropyReductionEnum.kNone and dlogprobs.dim() == 0)
+        assert dlogprobs.is_contiguous() and dlogprobs.is_cuda
+
+        assert (
+            num_valid_tokens.dim() == 0
+            and num_valid_tokens.is_cuda
+            and num_valid_tokens.dtype == torch.int64
+        )
+
+        d_hidden = torch.empty_like(global_hidden)
+        d_weight = torch.empty_like(weight)
+        assert d_hidden.is_contiguous() and d_weight.is_contiguous()
+
+        # FIXME: implement different backward methods
+        _backward_method = _get_bwd_config()._backward_method
+        if _backward_method == utils.BackwardMethodEnum.kDlogitsSplitN:
+            vocab_per_split = _get_bwd_config()._vocab_per_split
+            num_splits = (vocab_size + vocab_per_split - 1) // vocab_per_split
+
+            _d_logits = torch.empty(
+                (num_tokens, vocab_per_split),
+                device=global_hidden.device,
+                dtype=global_hidden.dtype,
+            )
+
+            hidden_packed = from_dlpack(
+                hidden_view.detach(), assumed_align=16
+            ).mark_compact_shape_dynamic(mode=0)
+            weight_packed = from_dlpack(weight.detach(), assumed_align=16)
+            labels_packed = from_dlpack(
+                labels_view.detach(), assumed_align=8
+            ).mark_compact_shape_dynamic(mode=0)
+            dlogprobs_packed = from_dlpack(
+                dlogprobs_view.detach(), assumed_align=8
+            ).mark_compact_shape_dynamic(mode=0)
+            maximum_packed = from_dlpack(
+                maximum.detach(), assumed_align=8
+            ).mark_compact_shape_dynamic(mode=0)
+            accu_packed = from_dlpack(accu.detach(), assumed_align=8).mark_compact_shape_dynamic(
+                mode=0
+            )
+            dlogits_packed = from_dlpack(_d_logits, assumed_align=32).mark_compact_shape_dynamic(
+                mode=0
+            )
+            scalarNumValidTokens_packed = cute.runtime.make_ptr(
+                cutlass.Int64, num_valid_tokens.data_ptr(), cute.AddressSpace.gmem, assumed_align=8
+            )
+
+            stream = cuda.CUstream(torch.cuda.current_stream().cuda_stream)
+
+            key = (
+                f"vocab_size:{vocab_size}+dim:{dim}+reduction:{REDUCTION}+dtype:{hidden_view.dtype}"
+            )
+            if _get_bwd_config()._bwd_kernel.get(key) is None:
+                bwd_kernel = bwd_partial_dlogits.BwdPartialDlogits(
+                    reduction=REDUCTION.value, vocab_per_split=vocab_per_split
+                )
+                bwd_kernel_compiled = cute.compile(
+                    bwd_kernel,
+                    0,  # split_idx
+                    hidden_packed,
+                    weight_packed,
+                    labels_packed,
+                    dlogprobs_packed,
+                    maximum_packed,
+                    accu_packed,
+                    dlogits_packed,
+                    scalarNumValidTokens_packed,
+                    ignore_index,
+                    tp_rank,
+                    stream,
+                )
+                _get_bwd_config()._bwd_kernel[key] = bwd_kernel_compiled
+            else:
+                bwd_kernel_compiled = _get_bwd_config()._bwd_kernel.get(key)
+
+            for split_idx in range(num_splits):
+                bwd_kernel_compiled(
+                    split_idx,
+                    hidden_packed,
+                    weight_packed,
+                    labels_packed,
+                    dlogprobs_packed,
+                    maximum_packed,
+                    accu_packed,
+                    dlogits_packed,
+                    scalarNumValidTokens_packed,
+                    ignore_index,
+                    tp_rank,
+                    stream,
+                )
+                # remove padding areas
+                # cublas can handle non-contiguous tensors
+                # therefore, we do not need to contiguous the tensor
+                vocab_right_bound = (
+                    min((split_idx + 1) * vocab_per_split, vocab_size) - split_idx * vocab_per_split
+                )
+                valid_d_logits = _d_logits[:, :vocab_right_bound]
+
+                torch.addmm(
+                    input=d_hidden.view(-1, dim),
+                    mat1=valid_d_logits,
+                    mat2=weight[split_idx * vocab_per_split : (split_idx + 1) * vocab_per_split, :],
+                    beta=(split_idx != 0),
+                    alpha=1.0,
+                    out=d_hidden.view(-1, dim),
+                )
+                torch.matmul(
+                    valid_d_logits.T,
+                    hidden_view,
+                    out=d_weight[
+                        split_idx * vocab_per_split : (split_idx + 1) * vocab_per_split, :
+                    ],
+                )
+        else:
+            raise NotImplementedError(f"Unsupported backward method: {_backward_method}")
+
+        if in_tp_mode:
+            dist.all_reduce(d_hidden, op=dist.ReduceOp.SUM, group=tp_group)
+            if sequence_parallel:
+                partial_hidden_shape = (
+                    global_hidden.shape[0] // tp_world_size,
+                    *global_hidden.shape[1:],
+                )
+                partial_num_tokens = num_tokens // tp_world_size
+                d_hidden = d_hidden.view(-1, d_hidden.shape[-1])[
+                    tp_rank * partial_num_tokens : (tp_rank + 1) * partial_num_tokens, :
+                ]
+                d_hidden = d_hidden.view(partial_hidden_shape).clone()
+
+        return d_hidden, d_weight
+
+except ImportError:
+    logging.warning(
+        "Cutlass or CUDA bindings not found. LinearCrossEntropy Blackwell entry "
+        "points will not be available."
+    )
diff --git a/megatron/core/fusions/linear_cross_entropy/blackwell/fwd_mainloop.py b/megatron/core/fusions/linear_cross_entropy/blackwell/fwd_mainloop.py
new file mode 100644
index 00000000000..93f5b9523e7
--- /dev/null
+++ b/megatron/core/fusions/linear_cross_entropy/blackwell/fwd_mainloop.py
@@ -0,0 +1,693 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+
+"""
+Implementations of the fusion lm_head(Linear) + Cross-Entropy kernel
+"""
+
+import logging
+from typing import Tuple, Type
+
+try:
+    import cuda.bindings.driver as cuda  # type: ignore
+    import cutlass
+    import cutlass.cute as cute
+    import cutlass.pipeline as pipeline  # type: ignore
+    import cutlass.utils as utils  # type: ignore
+    import cutlass.utils.blackwell_helpers as sm100_utils  # type: ignore
+    from cutlass.cute.nvgpu import cpasync, tcgen05
+
+    SM100_TMEM_CAPACITY_COLUMNS: int = 512
+
+    def make_thread_cooperative_group(size: int):
+        """
+        Create a thread cooperative group.
+        """
+        return pipeline.CooperativeGroup(pipeline.Agent.Thread, size, alignment=size)
+
+    class FwdMainLoop:
+        """
+        This class implements the mainloop for forward process.
+
+        Traits stored as attributes.
+
+        :param acc_dtype:
+        """
+
+        def __init__(
+            self,
+            acc_dtype: Type[cutlass.Numeric] = cutlass.Float32,
+            use_2cta_instrs: bool = False,
+            mma_tiler_mn: Tuple[int, int] = (128, 256),
+            vocab_per_split: int = 512,
+        ):
+            """
+            Configuration including:
+                - MMA instruction settings
+                - Cluster Shape
+            """
+            self.acc_dtype: Type[cutlass.Numeric] = acc_dtype
+            self.use_2cta_instrs = use_2cta_instrs
+            # This is the shape covered by tiledMMA, not just single MMA instruction
+            self.mma_tiler = (*mma_tiler_mn, 1)
+            self.cta_tiler = (self.mma_tiler[0], vocab_per_split, self.mma_tiler[2])
+            self.vocab_per_split = vocab_per_split
+
+            self.cta_group = tcgen05.CtaGroup.TWO if self.use_2cta_instrs else tcgen05.CtaGroup.ONE
+            self.cluster_shape_mn = (2, 1) if self.use_2cta_instrs else (1, 1)
+
+            self.occupancy = 1
+            # query SMEM capacity
+            self.smem_capacity = utils.get_smem_capacity_in_bytes("sm_100")
+
+            # the maximum columns per MMA is 256, and there is only one GEMM, so we can fully
+            # assign TMEM for that GEMM of different tiles.
+            # so 512 = 2 * 256
+
+            self.threads_per_warp: int = 32
+            # 1 warp for loading, 1 warp for issuing MMA, 1 WG for storing
+            self.epi_warp_ids = (0, 1, 2, 3)
+            self.load_warp_ids = 4
+            self.mma_warp_ids = 5
+            self.empty_warp_ids = (6, 7)
+
+            self.threads_per_cta: int = self.threads_per_warp * len(
+                (*self.epi_warp_ids, self.load_warp_ids, self.mma_warp_ids, *self.empty_warp_ids)
+            )
+
+            self.cta_sync_barrier = pipeline.NamedBarrier(
+                barrier_id=1, num_threads=self.threads_per_cta
+            )
+            self.tmem_alloc_barrier = pipeline.NamedBarrier(
+                barrier_id=2, num_threads=self.threads_per_cta
+            )
+
+            self.buffer_align_bytes: int = 1024
+            self.num_regs_other: int = 32
+            self.num_regs_epi: int = 192
+
+        def _compute_stages(
+            self,
+            tiled_mma: cute.TiledMma,
+            mma_tiler: Tuple[int, int, int],
+            a_dtype: Type[cutlass.Numeric],
+            b_dtype: Type[cutlass.Numeric],
+        ):
+            a_smem_layout_stage_one = sm100_utils.make_smem_layout_a(
+                tiled_mma, mma_tiler, a_dtype, 1  # only single stage
+            )
+            b_smem_layout_stage_one = sm100_utils.make_smem_layout_b(
+                tiled_mma, mma_tiler, b_dtype, 1
+            )
+            a_bytes_per_stage = cute.size_in_bytes(a_dtype, a_smem_layout_stage_one)
+            b_bytes_per_stage = cute.size_in_bytes(b_dtype, b_smem_layout_stage_one)
+            num_acc_stage = 2
+            num_a_stage = 4
+            num_b_stage = 4
+            num_epi_stage_per_tile = 4
+
+            return num_acc_stage, num_a_stage, num_b_stage, num_epi_stage_per_tile
+
+        def _setup_attributes(
+            self,
+            tiled_mma: cute.TiledMma,
+            a_dtype: Type[cutlass.Numeric],
+            b_dtype: Type[cutlass.Numeric],
+        ):
+            self.cluster_shape_mnk = (*self.cluster_shape_mn, 1)
+            self.cluster_layout_vmnk = cute.tiled_divide(
+                cute.make_layout(self.cluster_shape_mnk), (tiled_mma.thr_id.shape,)
+            )
+
+            # this is fixed for dense MMA, k=16
+            mma_inst_shape_k = cute.size(tiled_mma.shape_mnk, mode=[2])
+            # 16*4 = 64; 64 * sizeof(FP16) = 128Bytes
+            mma_inst_tile_k: int = 4
+            self.mma_tiler = (
+                self.mma_tiler[0],
+                self.mma_tiler[1],
+                mma_inst_shape_k * mma_inst_tile_k,
+            )
+
+            self.num_acc_stage, self.num_a_stage, self.num_b_stage, self.num_epi_stage_per_tile = (
+                self._compute_stages(tiled_mma, self.mma_tiler, a_dtype, b_dtype)
+            )
+            self.tmem_alloc_cols = self.num_acc_stage * self.mma_tiler[1]
+            assert self.tmem_alloc_cols <= SM100_TMEM_CAPACITY_COLUMNS
+
+            self.cta_tile_shape_mnk = (
+                self.mma_tiler[0] // cute.size(tiled_mma.thr_id.shape),
+                self.mma_tiler[1],
+                self.mma_tiler[2],
+            )
+
+        @cute.kernel
+        def kernel(
+            self,
+            tiled_mma: cute.TiledMma,
+            tma_atom_a: cute.CopyAtom,
+            mA: cute.Tensor,
+            tma_atom_b: cute.CopyAtom,
+            mB: cute.Tensor,
+            mLabels: cute.Tensor,
+            mMax: cute.Tensor,
+            mAccu: cute.Tensor,
+            mLogprobs: cute.Tensor,
+            a_smem_layout_staged: cute.ComposedLayout,
+            b_smem_layout_staged: cute.ComposedLayout,
+            cluster_layout_vmnk: cute.Layout,
+            problem_mnk: Tuple[int, int, int],
+            ignore_index: cutlass.Int64,
+            rank: cutlass.Int32,
+        ):
+            """
+            The forward kernel for the mainloop.
+            """
+            warp_idx = cute.arch.make_warp_uniform(cute.arch.warp_idx())
+            tidx, _, _ = cute.arch.thread_idx()
+            bidx, bidy, _ = cute.arch.block_idx()
+            # FIXME: block swizzling applied here
+            pidm, pidn = bidx, bidy
+
+            # prefetch tma descriptors
+            if warp_idx == self.load_warp_ids:
+                cute.nvgpu.cpasync.prefetch_descriptor(tma_atom_a)
+                cute.nvgpu.cpasync.prefetch_descriptor(tma_atom_b)
+
+            # declare SMEM
+            smem = utils.SmemAllocator()
+            storage = smem.allocate(self.shared_storage)
+
+            ab_pipeline = pipeline.PipelineTmaUmma.create(
+                num_stages=self.num_a_stage,
+                producer_group=make_thread_cooperative_group(len([self.load_warp_ids])),
+                consumer_group=make_thread_cooperative_group(len([self.mma_warp_ids])),
+                tx_count=self.tma_copy_a_bytes + self.tma_copy_b_bytes,
+                barrier_storage=storage.load_ab_mbar_ptr.data_ptr(),
+            )
+            ab_producer_state = pipeline.make_pipeline_state(
+                pipeline.PipelineUserType.Producer, self.num_a_stage
+            )
+            ab_consumer_state = pipeline.make_pipeline_state(
+                pipeline.PipelineUserType.Consumer, self.num_a_stage
+            )
+
+            mma_pipeline = pipeline.PipelineUmmaAsync.create(
+                num_stages=self.num_acc_stage,
+                producer_group=make_thread_cooperative_group(len([self.mma_warp_ids])),
+                consumer_group=make_thread_cooperative_group(
+                    self.threads_per_warp * len(self.epi_warp_ids)
+                ),
+                barrier_storage=storage.mma_mbar_ptr.data_ptr(),
+            )
+            mma_producer_state = pipeline.make_pipeline_state(
+                pipeline.PipelineUserType.Producer, self.num_acc_stage
+            )
+            mma_consumer_state = pipeline.make_pipeline_state(
+                pipeline.PipelineUserType.Consumer, self.num_acc_stage
+            )
+
+            tmem_dealloc_mbar_ptr = storage.tmem_dealloc_mbar_ptr.data_ptr()
+            if warp_idx == self.empty_warp_ids[0]:
+                with cute.arch.elect_one():
+                    cute.arch.mbarrier_init(
+                        tmem_dealloc_mbar_ptr, self.threads_per_warp * len(self.epi_warp_ids)
+                    )
+                    cute.arch.mbarrier_init_fence()
+
+            # -------- SMEM partition ------------ #
+            # swizzle o [(tileM, tileK), loopM, loopK, Stage]
+            sA = storage.sA.get_tensor(
+                a_smem_layout_staged.outer, swizzle=a_smem_layout_staged.inner
+            )
+            # swizzle o [(tileN, tileK), loopN, loopK, stage]
+            sB = storage.sB.get_tensor(
+                b_smem_layout_staged.outer, swizzle=b_smem_layout_staged.inner
+            )
+
+            # FIXME: if 2 CTAs, modify here
+            thr_mma = tiled_mma.get_slice(0)
+            # [MMA, loopM, loopK, stage]
+            tCsA = thr_mma.make_fragment_A(sA)
+            # [MMA, loopN, loopK, stage]
+            tCsB = thr_mma.make_fragment_B(sB)
+
+            # ---------- GMEM partition ----------- #
+            # [tileM, tileK, loopK]
+            gA = cute.local_tile(mA, (self.mma_tiler[0], self.mma_tiler[2]), (pidm, None))
+
+            # [vocab_size_per_split, dim]
+            mB_n = cute.local_tile(
+                mB, (self.vocab_per_split, cute.size(mB.layout.shape, mode=[1])), (pidn, 0)
+            )
+
+            # [tileN, tileK, loopN, loopK]
+            gB = cute.local_tile(mB_n, (self.mma_tiler[1], self.mma_tiler[2]), (None, None))
+
+            # [MMA, tileCntM, tileCntK, loopK]
+            tCgA = thr_mma.partition_A(gA)
+            # [MMA, tileCntN, tileCntK, loopN, loopK]
+            tCgB = thr_mma.partition_B(gB)
+
+            a_cta_layout = cute.make_layout(cute.slice_(cluster_layout_vmnk, (0, 0, None, 0)).shape)
+            # FIXME: if 2 CTAs, modify here
+            cta_rank_in_cluster = 0
+            block_in_cluster_coord_vmnk = cluster_layout_vmnk.get_flat_coord(cta_rank_in_cluster)
+            tTMAsA, tTMAgA = cpasync.tma_partition(
+                tma_atom_a,
+                block_in_cluster_coord_vmnk[2],  # cta_coord,
+                a_cta_layout,
+                cute.group_modes(sA, 0, 3),  # SMEM tensor
+                cute.group_modes(tCgA, 0, 3),  # GMEM tensor
+            )
+            b_cta_layout = cute.make_layout(cute.slice_(cluster_layout_vmnk, (0, None, 0, 0)).shape)
+            tTMAsB, tTMAgB = cpasync.tma_partition(
+                tma_atom_b,
+                block_in_cluster_coord_vmnk[1],  # cta_coord
+                b_cta_layout,
+                cute.group_modes(sB, 0, 3),
+                cute.group_modes(tCgB, 0, 3),
+            )
+
+            # Allocate TMEM
+            tmem_holding_buf = storage.tmem_holding_buf
+            if warp_idx == self.empty_warp_ids[0]:
+                cute.arch.alloc_tmem(
+                    self.tmem_alloc_cols, tmem_holding_buf, is_two_cta=self.use_2cta_instrs
+                )
+            self.cta_sync_barrier.arrive_and_wait()
+            tmem_ptr = cute.arch.retrieve_tmem_ptr(
+                self.acc_dtype, alignment=16, ptr_to_buffer_holding_addr=tmem_holding_buf
+            )
+
+            # [(tileM, tileN), loopM, loopN]
+            tmem_shape = (128, self.tmem_alloc_cols)
+            acc_shape = thr_mma.partition_shape_C(tmem_shape)
+            tCtC_fake = thr_mma.make_fragment_C(acc_shape)
+            tCtC = cute.make_tensor(tmem_ptr, tCtC_fake.layout)
+
+            block_vocab_left_idx: cutlass.Int64 = pidn * self.vocab_per_split
+            block_vocab_right_idx: cutlass.Int64 = min(
+                (pidn + 1) * self.vocab_per_split, problem_mnk[1]
+            )
+            num_n_tiles: cutlass.Int64 = cute.ceil_div(
+                (block_vocab_right_idx - block_vocab_left_idx), self.mma_tiler[1]
+            )
+
+            # ///////
+            # empty
+            # ///////
+            if warp_idx in self.empty_warp_ids:
+                cute.arch.warpgroup_reg_dealloc(self.num_regs_other)
+
+            # ///////
+            # load
+            # ///////
+            if warp_idx == self.load_warp_ids:
+                cute.arch.warpgroup_reg_dealloc(self.num_regs_other)
+
+                for n in cutlass.range(num_n_tiles):
+                    for k in cutlass.range(cute.size(gA, mode=[2])):
+                        ab_pipeline.producer_acquire(ab_producer_state)
+                        cute.copy(
+                            tma_atom_a,
+                            tTMAgA[(None, k)],
+                            tTMAsA[(None, ab_producer_state.index)],
+                            tma_bar_ptr=ab_pipeline.producer_get_barrier(ab_producer_state),
+                        )
+                        cute.copy(
+                            tma_atom_b,
+                            tTMAgB[(None, n, k)],
+                            tTMAsB[(None, ab_producer_state.index)],
+                            tma_bar_ptr=ab_pipeline.producer_get_barrier(ab_producer_state),
+                        )
+                        ab_pipeline.producer_commit(ab_producer_state)
+                        ab_producer_state.advance()
+
+            # ///////
+            # mma
+            # ///////
+            if warp_idx == self.mma_warp_ids:
+                cute.arch.warpgroup_reg_dealloc(self.num_regs_other)
+
+                for n in cutlass.range(num_n_tiles):
+                    # disable accumulate for the first tile
+                    tiled_mma.set(tcgen05.Field.ACCUMULATE, False)
+                    mma_pipeline.producer_acquire(mma_producer_state)
+
+                    for k in cutlass.range(cute.size(gA, mode=[2])):
+                        ab_pipeline.consumer_wait(ab_consumer_state)
+
+                        for kblock_idx in cutlass.range(
+                            cute.size(tCsA, mode=[2]), unroll_full=True
+                        ):
+                            cute.gemm(
+                                tiled_mma,
+                                cute.append_ones(tCtC[(None, None, mma_producer_state.index)]),
+                                tCsA[(None, None, kblock_idx, ab_consumer_state.index)],
+                                tCsB[(None, None, kblock_idx, ab_consumer_state.index)],
+                                cute.append_ones(tCtC[(None, None, mma_producer_state.index)]),
+                            )
+                            # enable accumulate for the next tile
+                            tiled_mma.set(tcgen05.Field.ACCUMULATE, True)
+
+                        ab_pipeline.consumer_release(ab_consumer_state)
+                        ab_consumer_state.advance()
+
+                    mma_pipeline.producer_commit(mma_producer_state)
+                    mma_producer_state.advance()
+
+            # //////////
+            # epilogue
+            # //////////
+            if warp_idx in self.epi_warp_ids:
+                cute.arch.warpgroup_reg_alloc(self.num_regs_epi)
+
+                # epilog TMEM copy and partition
+                copy_atom_t2r = sm100_utils.get_tmem_load_op(
+                    self.cta_tile_shape_mnk,
+                    utils.LayoutEnum.ROW_MAJOR,  # This is hard-coded
+                    self.acc_dtype,
+                    self.acc_dtype,
+                    (self.epi_tile[0], self.epi_tile[1] // self.num_epi_stage_per_tile),
+                    self.use_2cta_instrs,
+                )
+                # [tileM, subTileN, loopM, CntSubTileN, loopN]
+                tAcc_epi = cute.flat_divide(
+                    tCtC[((None, None), 0, None)],
+                    (self.epi_tile[0], self.epi_tile[1] // self.num_epi_stage_per_tile),
+                )
+                tiled_copy_t2r = tcgen05.make_tmem_copy(
+                    copy_atom_t2r, tAcc_epi[(None, None, 0, 0, 0)]
+                )
+                thr_copy_t2r = tiled_copy_t2r.get_slice(tidx)
+                tTMEM_load_tAcc = thr_copy_t2r.partition_S(tAcc_epi)
+                # [(pattern), loopM, loopN, CntTileM, CntTileN]
+                tTMEM_load_tAcc = cute.group_modes(
+                    tTMEM_load_tAcc, 3, cute.rank(tTMEM_load_tAcc) - 1
+                )
+
+                cAcc = cute.make_identity_tensor(self.mma_tiler[:2])
+                tCcAcc = thr_mma.partition_C(cAcc)
+                # [tileM, subTileN, loopM, CntSubTileN, CntTileN]
+                tCcAcc_epi = cute.flat_divide(
+                    tCcAcc[((None, None), 0, None)],
+                    (self.epi_tile[0], self.epi_tile[1] // self.num_epi_stage_per_tile),
+                )
+                tTMEM_load_cAcc = thr_copy_t2r.partition_D(tCcAcc_epi)
+                tTMEM_load_cAcc_shape = cute.select(tTMEM_load_cAcc.shape, mode=[0, 1, 2])
+
+                # epilogue layouts
+                epilogue_thread_layout = cute.make_layout((128, 1))
+                copy_atom_g2r = cute.make_copy_atom(
+                    cute.nvgpu.CopyUniversalOp(), mLabels.element_type
+                )
+                tiled_copy_g2r = cute.make_tiled_copy(
+                    copy_atom_g2r, epilogue_thread_layout, (128, 1)
+                )
+                thr_copy_g2r = tiled_copy_g2r.get_slice(tidx)
+
+                copy_atom_r2g = cute.make_copy_atom(cute.nvgpu.CopyUniversalOp(), cutlass.Float32)
+                tiled_copy_r2g = cute.make_tiled_copy(
+                    copy_atom_r2g, epilogue_thread_layout, (128, 1)
+                )
+                thr_copy_r2g = tiled_copy_r2g.get_slice(tidx)
+
+                # auxiliary tensors
+                # [tileM]
+                gLabels = cute.local_tile(mLabels, (self.epi_tile[0],), (pidm,))
+
+                tLabelsCAcc = thr_copy_g2r.partition_S(cAcc)[(None, None, 0)]
+                tLabelsCAcc_mask = cute.make_fragment(tLabelsCAcc.shape, cutlass.Boolean)
+                # [(1, 1), 1]
+                tLabelsCAcc_mask[0] = cute.elem_less(pidm * self.epi_tile[0] + tidx, problem_mnk[0])
+                # to align shape with gMax and gAccu
+                tLabelsCAcc_mask = cute.append_ones(tLabelsCAcc_mask)
+
+                # [(1, 1), 1, 1]
+                tLabelsgLabels = thr_copy_g2r.partition_S(cute.append_ones(gLabels))
+                tLabelsrLabels = cute.make_fragment(
+                    tLabelsgLabels.shape, tLabelsgLabels.element_type
+                )
+                cute.copy(tiled_copy_g2r, tLabelsgLabels, tLabelsrLabels, pred=tLabelsCAcc_mask)
+                valid_mask: cutlass.Boolean = (
+                    tLabelsrLabels[0] != ignore_index
+                ) and tLabelsCAcc_mask[0]
+
+                # [tileM, 1]
+                gMax = cute.local_tile(mMax, (self.epi_tile[0], 1), (pidm, pidn))
+                # [(CPYM, CPYN), loopM, loopN]
+                tR2GgMax = thr_copy_r2g.partition_D(gMax)
+                tR2GrMax = cute.make_fragment(tR2GgMax.shape, tR2GgMax.element_type)
+                tR2GrMax.fill(-1e30)
+
+                # [tileM, 1]
+                gAccu = cute.local_tile(mAccu, (self.epi_tile[0], 1), (pidm, pidn))
+                # [(CPYM, CPYN), loopM, loopN]
+                tR2GgAccu = thr_copy_r2g.partition_D(gAccu)
+                tR2GrAccu = cute.make_fragment(tR2GgAccu.shape, tR2GgAccu.element_type)
+                tR2GrAccu.fill(0.0)
+
+                # [tileM, 1]
+                gLogprobs = cute.append_ones(
+                    cute.local_tile(mLogprobs, (self.epi_tile[0],), (pidm,))
+                )
+                # [(CPYM, CPYN), loopM, loopN]
+                tR2GgLogprobs = thr_copy_r2g.partition_D(gLogprobs)
+                tR2GrLogprobs = cute.make_fragment(tR2GgLogprobs.shape, tR2GgLogprobs.element_type)
+                tR2GrLogprobs.fill(0.0)
+
+                # [(tileN // num_epi_stage_per_tile, 1), 1, 1]
+                tTMEM_load_rAcc = cute.make_fragment(tTMEM_load_cAcc_shape, self.acc_dtype)
+
+                for n in cutlass.range(num_n_tiles):
+                    mma_pipeline.consumer_wait(mma_consumer_state)
+
+                    left: cutlass.Int64 = block_vocab_left_idx + n * self.epi_tile[1]
+                    right: cutlass.Int64 = min(
+                        (n + 1) * self.epi_tile[1] + block_vocab_left_idx, block_vocab_right_idx
+                    )
+                    num_n_subtiles: cutlass.Int64 = cute.ceil_div(
+                        (right - left), cute.size(tTMEM_load_rAcc, mode=[0])
+                    )
+                    for n_subtile in cutlass.range(num_n_subtiles):
+                        cute.copy(
+                            tiled_copy_t2r,
+                            tTMEM_load_tAcc[
+                                (None, None, None, n_subtile, mma_consumer_state.index)
+                            ],
+                            tTMEM_load_rAcc,
+                        )
+
+                        for idx in cutlass.range(
+                            cute.size(tTMEM_load_rAcc, mode=[0]), unroll_full=True
+                        ):
+                            local_position: cutlass.Int64 = (
+                                n * self.epi_tile[1]
+                                + n_subtile * cute.size(tTMEM_load_rAcc, mode=[0])
+                                + idx
+                            )
+                            if (block_vocab_left_idx + local_position) < block_vocab_right_idx:
+                                _max_old = tR2GrMax[0]
+                                tR2GrMax[0] = cute.arch.fmax(tR2GrMax[0], tTMEM_load_rAcc[idx])
+                                exp_logits = cute.exp(tTMEM_load_rAcc[idx] - tR2GrMax[0])
+                                coeff = cute.exp(_max_old - tR2GrMax[0])
+                                tR2GrAccu[0] = coeff * tR2GrAccu[0] + exp_logits
+
+                                position: cutlass.Int64 = (
+                                    rank * problem_mnk[1]
+                                    + pidn * self.vocab_per_split
+                                    + local_position
+                                )
+                                mask: cutlass.Boolean = valid_mask and (
+                                    position == tLabelsrLabels[0]
+                                )
+                                tR2GrLogprobs[0] += mask * tTMEM_load_rAcc[idx]
+
+                    mma_pipeline.consumer_release(mma_consumer_state)
+                    mma_consumer_state.advance()
+
+                cute.copy(tiled_copy_r2g, tR2GrMax, tR2GgMax, pred=tLabelsCAcc_mask)
+                cute.copy(tiled_copy_r2g, tR2GrAccu, tR2GgAccu, pred=tLabelsCAcc_mask)
+
+                vocab_left_idx: cutlass.Int64 = rank * problem_mnk[1] + pidn * self.vocab_per_split
+                vocab_right_idx: cutlass.Int64 = rank * problem_mnk[1] + min(
+                    (pidn + 1) * self.vocab_per_split, problem_mnk[1]
+                )
+                valid: cutlass.Boolean = (
+                    tLabelsrLabels[0] >= vocab_left_idx and tLabelsrLabels[0] < vocab_right_idx
+                )
+                tLabelsCAcc_mask[0] &= valid
+
+                cute.copy(tiled_copy_r2g, tR2GrLogprobs, tR2GgLogprobs, pred=tLabelsCAcc_mask)
+
+            # Dealloc TMEM
+            self.cta_sync_barrier.arrive_and_wait()
+            if warp_idx == self.empty_warp_ids[0]:
+                cute.arch.relinquish_tmem_alloc_permit()
+                cute.arch.dealloc_tmem(
+                    tmem_ptr, self.tmem_alloc_cols, is_two_cta=self.use_2cta_instrs
+                )
+
+        @staticmethod
+        def _compute_grid(
+            problem_mnk: Tuple[int, int, int],
+            cluster_shape_mn: Tuple[int, int],
+            cta_tiler: Tuple[int, int, int],
+            num_splits: int,
+        ) -> Tuple[int, int, int]:
+
+            cluster_shape = (*cluster_shape_mn, 1)
+
+            grid = cute.round_up(
+                (cute.ceil_div(problem_mnk[0], cta_tiler[0]), num_splits, 1), cluster_shape
+            )
+            return grid
+
+        @cute.jit
+        def __call__(
+            self,
+            hidden: cute.Tensor,
+            weight: cute.Tensor,
+            labels: cute.Tensor,
+            _logprobs: cute.Tensor,
+            _max: cute.Tensor,
+            _accu: cute.Tensor,
+            ignore_index: cutlass.Int64,
+            rank: cutlass.Int32,
+            stream: cuda.CUstream,
+        ) -> None:
+            a_dtype: Type[cutlass.Numeric] = hidden.element_type
+            b_dtype: Type[cutlass.Numeric] = weight.element_type
+
+            if cutlass.const_expr(hidden.element_type != weight.element_type):
+                raise RuntimeError(
+                    f"data type don't match: {hidden.element_type} v.s. {weight.element_type}"
+                )
+            if cutlass.const_expr(hidden.element_type not in [cutlass.Float16, cutlass.BFloat16]):
+                raise RuntimeError("hidden can only be FP16 or BF16")
+            if cutlass.const_expr(hidden.layout.shape[1] != weight.layout.shape[1]):
+                raise RuntimeError("K dimension doesn't match")
+
+            problem_mnk = (hidden.layout.shape[0], weight.layout.shape[0], hidden.layout.shape[1])
+            if cutlass.const_expr((problem_mnk[2] * a_dtype.width // 8) % 16 != 0):
+                raise RuntimeError(f"K dimension is not 16B aligned: {problem_mnk[2]}")
+
+            num_splits = cute.ceil_div(problem_mnk[1], self.vocab_per_split)
+
+            grid = self._compute_grid(
+                problem_mnk=problem_mnk,
+                cluster_shape_mn=self.cluster_shape_mn,
+                cta_tiler=self.cta_tiler,
+                num_splits=num_splits,
+            )
+            a_major_mode = utils.LayoutEnum.from_tensor(hidden).mma_major_mode()
+            b_major_mode = utils.LayoutEnum.from_tensor(weight).mma_major_mode()
+
+            tiled_mma = sm100_utils.make_trivial_tiled_mma(
+                a_dtype,
+                a_major_mode,
+                b_major_mode,
+                self.acc_dtype,
+                self.cta_group,
+                self.mma_tiler[:2],
+            )
+
+            self._setup_attributes(tiled_mma, a_dtype, b_dtype)
+            if cutlass.const_expr((problem_mnk[2] * a_dtype.width // 8) % 128 != 0):
+                raise RuntimeError(f"K dimension is not 128B aligned: {problem_mnk[2]}")
+
+            self.epi_tile = self.mma_tiler[:2]
+
+            # Swizzle o [(tileM, tileK), loopM, loopK, stage]
+            a_smem_layout_staged = sm100_utils.make_smem_layout_a(
+                tiled_mma, self.mma_tiler, a_dtype, self.num_a_stage
+            )
+            # Swizzle o [(tileN, tileK), loopN, loopK, stage]
+            b_smem_layout_staged = sm100_utils.make_smem_layout_b(
+                tiled_mma, self.mma_tiler, b_dtype, self.num_b_stage
+            )
+
+            # TMA loading
+            tma_load_op = cpasync.CopyBulkTensorTileG2SOp(self.cta_group)
+            tma_store_op = cpasync.CopyBulkTensorTileS2GOp()
+
+            # Swizzle o [(tileM, tileK), loopM, loopK]
+            a_smem_layout = cute.select(a_smem_layout_staged, mode=[0, 1, 2])
+            # create tma copy atom for hidden,
+            # and the cooresponding tma descriptor tensor
+            tma_atom_a, tma_desc_a = cute.nvgpu.make_tiled_tma_atom_A(
+                tma_load_op,
+                hidden,  # gmem_tensor
+                a_smem_layout,  # SMEM layout
+                self.mma_tiler,  # MMA tiler
+                tiled_mma,  # TiledMMA
+                self.cluster_layout_vmnk.shape,  # cluster_shape_vmnk
+            )
+            # Swizzle o [(tileN, tileK), loopN, loopK]
+            b_smem_layout = cute.select(b_smem_layout_staged, mode=[0, 1, 2])
+            tma_atom_b, tma_desc_b = cute.nvgpu.make_tiled_tma_atom_B(
+                tma_load_op,
+                weight,  # gmem_tensor
+                b_smem_layout,  # SMEM layout
+                self.mma_tiler,  # MMA tiler
+                tiled_mma,  # TiledMMA
+                self.cluster_layout_vmnk.shape,  # cluster_shape_vmnk
+            )
+            a_copy_size = cute.size_in_bytes(a_dtype, a_smem_layout)
+            b_copy_size = cute.size_in_bytes(b_dtype, b_smem_layout)
+            self.tma_copy_a_bytes = a_copy_size
+            self.tma_copy_b_bytes = b_copy_size
+
+            assert self.num_a_stage == self.num_b_stage
+
+            @cute.struct
+            class SharedStorage:
+                """
+                The shared storage for the forward kernel.
+                """
+
+                # pipeline barriers, 2 = producer + consumer
+                load_ab_mbar_ptr: cute.struct.MemRange[cutlass.Int64, self.num_a_stage * 2]
+                mma_mbar_ptr: cute.struct.MemRange[cutlass.Int64, self.num_acc_stage * 2]
+                tmem_dealloc_mbar_ptr: cute.struct.MemRange[cutlass.Int64, 1]
+                # tmem holding buffer
+                tmem_holding_buf: cutlass.Int32
+                # SMEM tensors
+                sA: cute.struct.Align[
+                    cute.struct.MemRange[a_dtype, cute.cosize(a_smem_layout_staged)],
+                    self.buffer_align_bytes,
+                ]
+                sB: cute.struct.Align[
+                    cute.struct.MemRange[b_dtype, cute.cosize(b_smem_layout_staged)],
+                    self.buffer_align_bytes,
+                ]
+
+            self.shared_storage = SharedStorage
+
+            # launch kernel
+            self.kernel(
+                tiled_mma,
+                tma_atom_a,
+                tma_desc_a,
+                tma_atom_b,
+                tma_desc_b,
+                labels,
+                _max,
+                _accu,
+                _logprobs,
+                a_smem_layout_staged,
+                b_smem_layout_staged,
+                self.cluster_layout_vmnk,
+                problem_mnk,
+                ignore_index,
+                rank,
+            ).launch(
+                grid=grid,
+                block=[self.threads_per_cta, 1, 1],
+                cluster=self.cluster_shape_mnk,
+                stream=stream,
+            )
+            return None
+
+except ImportError:
+    logging.warning("Cutlass or CUDA Python bindings not found. FwdMainLoop will not be available.")
diff --git a/megatron/core/fusions/linear_cross_entropy/blackwell/triton.py b/megatron/core/fusions/linear_cross_entropy/blackwell/triton.py
new file mode 100644
index 00000000000..e025cc046f4
--- /dev/null
+++ b/megatron/core/fusions/linear_cross_entropy/blackwell/triton.py
@@ -0,0 +1,248 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+
+import triton  # type: ignore
+import triton.language as tl  # type: ignore
+
+# NOTE: tl.pointer_type() is not available in Triton 3.3.0
+
+
+@triton.autotune(
+    configs=[
+        triton.Config({"BLOCK_SIZE_M": 1024}, num_stages=3, num_warps=32),
+        triton.Config({"BLOCK_SIZE_M": 2048}, num_stages=3, num_warps=32),
+    ],
+    key=["num_tokens"],
+)
+@triton.jit
+def get_num_valid_tokens(
+    num_tokens: tl.int64,
+    ignore_index: tl.int64,
+    labels_ptr,  #: tl.pointer_type(tl.int64),
+    stride_labels: tl.int64,
+    num_valid_tokens_ptr,  #: tl.pointer_type(tl.int64),
+    BLOCK_SIZE_M: tl.constexpr,
+):
+    """
+    Calculate the number of valid tokens in the labels tensor.
+    """
+    num_pid_m: tl.int64 = tl.cdiv(num_tokens, BLOCK_SIZE_M)
+
+    num_valid_tokens: tl.int64 = tl.zeros((), dtype=tl.int64)
+    for m in range(0, num_pid_m):
+        offs_am = m * BLOCK_SIZE_M + tl.arange(0, BLOCK_SIZE_M)
+
+        labels = tl.load(
+            labels_ptr + offs_am * stride_labels, mask=offs_am < num_tokens, other=ignore_index
+        )
+
+        valid_labels_mask = labels != ignore_index
+        num_valid_tokens += (tl.sum(valid_labels_mask.to(tl.int32), axis=0)).to(tl.int64)
+    tl.store(num_valid_tokens_ptr, num_valid_tokens)
+
+
+@triton.autotune(
+    configs=[triton.Config({"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64})],
+    key=["num_tokens", "num_splits"],
+)
+@triton.jit
+def forward_dp_epilogue(
+    num_tokens: tl.int64,
+    num_splits: tl.int64,  # TODO: maybe this could be a constexpr
+    ignore_index: tl.int64,
+    labels_ptr,  #: tl.pointer_type(tl.int64),
+    stride_labels: tl.int64,
+    num_valid_tokens_ptr,  #: tl.pointer_type(tl.int64),
+    max_ptr,  #: tl.pointer_type(tl.float32),
+    stride_max_m: tl.int64,
+    stride_max_n: tl.int64,
+    accu_ptr,  #: tl.pointer_type(tl.float32),
+    stride_accu_m: tl.int64,
+    stride_accu_n: tl.int64,
+    global_max_ptr,  #: tl.pointer_type(tl.float32),
+    stride_global_max: tl.int64,
+    global_accu_ptr,  #: tl.pointer_type(tl.float32),
+    stride_global_accu: tl.int64,
+    global_logprobs_ptr,  #: tl.pointer_type(tl.float32),
+    stride_global_logprobs: tl.int64,
+    global_logprobs_scalar_ptr,  #: tl.pointer_type(tl.float32),
+    REDUCTION: tl.constexpr,
+    BLOCK_SIZE_M: tl.constexpr,
+    BLOCK_SIZE_N: tl.constexpr,
+):
+    """
+    forward epilogue in dp
+    """
+    pid_m = tl.program_id(axis=0)
+
+    offs_m = pid_m * BLOCK_SIZE_M + tl.arange(0, BLOCK_SIZE_M)
+    global_max = tl.zeros((BLOCK_SIZE_M,), dtype=tl.float32)
+    global_accu = tl.zeros((BLOCK_SIZE_M,), dtype=tl.float32)
+
+    for pid_n in range(0, tl.cdiv(num_splits, BLOCK_SIZE_N)):
+        offs_n = pid_n * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N)
+
+        _max = tl.load(
+            max_ptr + offs_m[:, None] * stride_max_m + offs_n[None, :] * stride_max_n,
+            mask=(offs_m[:, None] < num_tokens) & (offs_n[None, :] < num_splits),
+            other=0.0,
+        )
+        _accu = tl.load(
+            accu_ptr + offs_m[:, None] * stride_accu_m + offs_n[None, :] * stride_accu_n,
+            mask=(offs_m[:, None] < num_tokens) & (offs_n[None, :] < num_splits),
+            other=0.0,
+        )
+
+        # local reduction
+        _max_old = global_max
+        _local_max = tl.max(_max, axis=1, return_indices=False)
+        global_max = tl.maximum(global_max, _local_max)
+
+        _scale = tl.exp(_max - global_max[:, None])
+        _coeff = tl.exp(_max_old - global_max)
+        global_accu = _coeff * global_accu + tl.sum(_scale * _accu, axis=1)
+
+    # store maximum
+    tl.store(global_max_ptr + offs_m * stride_global_max, global_max, mask=offs_m < num_tokens)
+    # store accumulate
+    tl.store(global_accu_ptr + offs_m * stride_global_accu, global_accu, mask=offs_m < num_tokens)
+    # update logprobs
+    labels = tl.load(
+        labels_ptr + offs_m * stride_labels, mask=offs_m < num_tokens, other=ignore_index
+    )
+    global_logprobs_ptrs = global_logprobs_ptr + offs_m * stride_global_logprobs
+    global_logprobs = tl.load(global_logprobs_ptrs, mask=offs_m < num_tokens)
+    global_logprobs = global_max + tl.log(global_accu) - global_logprobs
+    label_mask = labels != ignore_index
+    global_logprobs = tl.where(label_mask, global_logprobs, 0.0)
+
+    if REDUCTION == 0:  # no-reduction
+        tl.store(global_logprobs_ptrs, global_logprobs, mask=offs_m < num_tokens)
+    elif REDUCTION == 1:  # sum
+        global_logprobs_scalar = tl.sum(global_logprobs, axis=0)
+        tl.atomic_add(global_logprobs_scalar_ptr, global_logprobs_scalar)
+    elif REDUCTION == 2:  # mean
+        num_valid_tokens = tl.load(num_valid_tokens_ptr)
+        global_logprobs_scalar = tl.fdiv(
+            tl.sum(global_logprobs, axis=0), num_valid_tokens.to(tl.float32)
+        )
+        tl.atomic_add(global_logprobs_scalar_ptr, global_logprobs_scalar)
+
+
+@triton.autotune(
+    configs=[triton.Config({"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64})],
+    key=["num_tokens", "num_splits"],
+)
+@triton.jit
+def forward_tp_epilogue(
+    num_tokens: tl.int64,
+    num_splits: tl.int64,
+    reduced_max_ptr,  #: tl.pointer_type(tl.float32),
+    stride_reduced_max_m: tl.int64,
+    stride_reduced_max_n: tl.int64,
+    original_max_ptr,  #: tl.pointer_type(tl.float32),
+    stride_original_max_m: tl.int64,
+    stride_original_max_n: tl.int64,
+    accu_ptr,  #: tl.pointer_type(tl.float32),
+    stride_accu_m: tl.int64,
+    stride_accu_n: tl.int64,
+    global_max_ptr,  #: tl.pointer_type(tl.float32),
+    stride_global_max: tl.int64,
+    global_accu_ptr,  #: tl.pointer_type(tl.float32),
+    stride_global_accu: tl.int64,
+    BLOCK_SIZE_M: tl.constexpr,
+    BLOCK_SIZE_N: tl.constexpr,
+):
+    """
+    forward epilogue in tp
+    """
+    pid_m = tl.program_id(axis=0)
+
+    offs_m = pid_m * BLOCK_SIZE_M + tl.arange(0, BLOCK_SIZE_M)
+
+    global_max = tl.zeros((BLOCK_SIZE_M,), dtype=tl.float32)
+    global_accu = tl.zeros((BLOCK_SIZE_M,), dtype=tl.float32)
+
+    for pid_n in range(0, tl.cdiv(num_splits, BLOCK_SIZE_N)):
+        offs_n = pid_n * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N)
+
+        _reduced_max = tl.load(
+            reduced_max_ptr
+            + offs_m[:, None] * stride_reduced_max_m
+            + offs_n[None, :] * stride_reduced_max_n,
+            mask=(offs_m[:, None] < num_tokens) & (offs_n[None, :] < num_splits),
+            other=0.0,
+        )
+        _original_max = tl.load(
+            original_max_ptr
+            + offs_m[:, None] * stride_original_max_m
+            + offs_n[None, :] * stride_original_max_n,
+            mask=(offs_m[:, None] < num_tokens) & (offs_n[None, :] < num_splits),
+            other=0.0,
+        )
+        _accu = tl.load(
+            accu_ptr + offs_m[:, None] * stride_accu_m + offs_n[None, :] * stride_accu_n,
+            mask=(offs_m[:, None] < num_tokens) & (offs_n[None, :] < num_splits),
+            other=0.0,
+        )
+
+        # local reduction
+        _max_old = global_max
+        _local_max = tl.max(_reduced_max, axis=1)
+        global_max = tl.maximum(global_max, _local_max)
+
+        # update accumulate
+        _coeff = tl.exp(_max_old - global_max)
+        _scale = tl.exp(_original_max - global_max[:, None])
+        global_accu = _coeff * global_accu + tl.sum(_scale * _accu, axis=1)
+
+    # store
+    tl.store(global_max_ptr + offs_m * stride_global_max, global_max, mask=offs_m < num_tokens)
+    tl.store(global_accu_ptr + offs_m * stride_global_accu, global_accu, mask=offs_m < num_tokens)
+
+
+@triton.autotune(configs=[triton.Config({"BLOCK_SIZE_M": 16})], key=["num_tokens"])
+@triton.jit
+def forward_tp_epilogue_update_logprobs(
+    num_tokens: tl.int64,
+    ignore_index: tl.int64,
+    num_valid_tokens_ptr,  #: tl.pointer_type(tl.int64),
+    labels_ptr,  #: tl.pointer_type(tl.int64),
+    stride_labels: tl.int64,
+    logprobs_ptr,  #: tl.pointer_type(tl.float32),
+    stride_logprobs: tl.int64,
+    maximum_ptr,  #: tl.pointer_type(tl.float32),
+    stride_maximum: tl.int64,
+    accumulate_ptr,  #: tl.pointer_type(tl.float32),
+    stride_accumulate: tl.int64,
+    logprobs_scalar_ptr,  #: tl.pointer_type(tl.float32),
+    REDUCTION: tl.constexpr,
+    BLOCK_SIZE_M: tl.constexpr,
+):
+    """
+    update logprobs in tp
+    """
+    pid_m = tl.program_id(axis=0)
+
+    offs_m = pid_m * BLOCK_SIZE_M + tl.arange(0, BLOCK_SIZE_M)
+
+    logprobs = tl.load(logprobs_ptr + offs_m * stride_logprobs, mask=offs_m < num_tokens)
+    maximum = tl.load(maximum_ptr + offs_m * stride_maximum, mask=offs_m < num_tokens)
+    accumulate = tl.load(accumulate_ptr + offs_m * stride_accumulate, mask=offs_m < num_tokens)
+
+    labels = tl.load(
+        labels_ptr + offs_m * stride_labels, mask=offs_m < num_tokens, other=ignore_index
+    )
+    label_mask = labels != ignore_index
+
+    logprobs = maximum + tl.log(accumulate) - logprobs
+    logprobs = tl.where(label_mask, logprobs, 0.0)
+
+    if REDUCTION == 0:  # no-reduction
+        tl.store(logprobs_ptr + offs_m * stride_logprobs, logprobs, mask=offs_m < num_tokens)
+    elif REDUCTION == 1:  # sum
+        logprobs_scalar = tl.sum(logprobs, axis=0)
+        tl.atomic_add(logprobs_scalar_ptr, logprobs_scalar)
+    elif REDUCTION == 2:  # mean
+        num_valid_tokens = tl.load(num_valid_tokens_ptr)
+        logprobs_scalar = tl.fdiv(tl.sum(logprobs, axis=0), num_valid_tokens.to(tl.float32))
+        tl.atomic_add(logprobs_scalar_ptr, logprobs_scalar)
diff --git a/megatron/core/fusions/linear_cross_entropy/utils.py b/megatron/core/fusions/linear_cross_entropy/utils.py
new file mode 100644
index 00000000000..d077d64ab17
--- /dev/null
+++ b/megatron/core/fusions/linear_cross_entropy/utils.py
@@ -0,0 +1,43 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+
+import typing
+from enum import Enum
+
+
+class EntropyReductionEnum(Enum):
+    """
+    Enum for the reduction method of cross entropy.
+    """
+
+    kNone = 0
+    kSum = 1
+    kMean = 2
+
+
+def str_to_reduction_enum(reduction: typing.Literal["none", "sum", "mean"]) -> EntropyReductionEnum:
+    """
+    str -> EntropyReductionEnum
+    """
+    _enum = EntropyReductionEnum.kNone
+    if reduction == "none":
+        _enum = EntropyReductionEnum.kNone
+    elif reduction == "sum":
+        _enum = EntropyReductionEnum.kSum
+    elif reduction == "mean":
+        _enum = EntropyReductionEnum.kMean
+    else:
+        raise ValueError(f"Invalid reduction: {reduction}")
+    return _enum
+
+
+class BackwardMethodEnum(Enum):
+    """
+    Enum for the backward method of linear cross entropy.
+    """
+
+    # two separate kernels for d_hidden and d_weight, respectively
+    kTwoKernels = 0
+    # calculate partial d_logits along its N dimension
+    kDlogitsSplitN = 1
+    # fuse d_hidden and d_weight into a single kernel
+    kFused = 2
diff --git a/megatron/core/models/common/language_module/language_module.py b/megatron/core/models/common/language_module/language_module.py
index 259bb716a93..13d74aa5271 100644
--- a/megatron/core/models/common/language_module/language_module.py
+++ b/megatron/core/models/common/language_module/language_module.py
@@ -1,7 +1,7 @@
 # Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 import logging
 import os
-from typing import Optional, Tuple
+from typing import Any, Dict, Literal, Optional, Tuple
 
 import torch
 from torch import Tensor
@@ -14,6 +14,7 @@
 except:
     te_parallel_cross_entropy = None
 from megatron.core.fusions.fused_cross_entropy import fused_vocab_parallel_cross_entropy
+from megatron.core.fusions.fused_linear_cross_entropy import linear_cross_entropy
 from megatron.core.pipeline_parallel.utils import (
     is_pp_first_stage,
     is_pp_last_stage,
@@ -125,6 +126,68 @@ def check_and_set_env_variable(
             check_and_set_env_variable("NVTE_FUSED_ATTN", 1, AttnBackend.auto)
             check_and_set_env_variable("NVTE_UNFUSED_ATTN", 1, AttnBackend.auto)
 
+    def compute_output_layer_and_language_model_loss(
+        self,
+        hidden: Tensor,
+        labels: Optional[Tensor],
+        weight: Tensor = None,
+        sequence_parallel_enabled: bool = False,
+        column_parallel_linear: torch.nn.Module = None,
+        col_linear_kwargs: Dict[str, Any] = {},
+        reduction: Literal["none", "sum", "mean"] = "none",
+        ignore_index: int = -100,
+    ) -> Tensor:
+        """Computes the language model logits and loss (Cross entropy across vocabulary)
+
+        Args:
+            hidden (Tensor): The hidden states from the transformer model
+            labels (Optional[Tensor]): The labels of dimension [batch size, seq length]
+            weight (Tensor): The weight tensor of shape [vocab size, hidden size].
+                Required if using fused linear cross entropy.
+            column_parallel_linear (torch.nn.Module): The column parallel linear
+                layer to use for computing logits when not using fused linear cross entropy.
+            col_linear_kwargs (Dict[str, Any]): Additional kwargs for column parallel linear layer
+            reduction (Optional[str]): The reduction method. Defaults to "none", and can be
+                one of "none", "sum", "mean".
+            ignore_index (Optional[int]): The index to ignore in the loss calculation.
+                Defaults to -100.
+
+        Returns:
+            Tensor: Loss tensor of dimensions [batch size, sequence_length].
+        """
+        if (
+            self.config.cross_entropy_loss_fusion
+            and self.config.cross_entropy_fusion_impl == 'linear'
+        ):
+            assert (
+                weight is not None
+            ), "weight cannot be None when using fused linear cross entropy."
+            assert (
+                labels is not None
+            ), "labels cannot be None when using fused linear cross entropy."
+            # [b s] => [s b]
+            labels = labels.transpose(0, 1).contiguous()
+            loss = linear_cross_entropy(
+                hidden,
+                weight,
+                labels,
+                tp_group=self.pg_collection.tp,
+                sequence_parallel=sequence_parallel_enabled,
+                reduction=reduction,
+                ignore_index=ignore_index,
+            )
+
+            # [s b] => [b, s]
+            loss = loss.view_as(labels).transpose(0, 1).contiguous()
+            return loss
+        else:
+            assert (
+                column_parallel_linear is not None
+            ), "column_parallel_linear cannot be None when not using fused linear cross entropy."
+            logits, _ = column_parallel_linear(hidden, **col_linear_kwargs)
+
+            return self.compute_language_model_loss(labels, logits)
+
     def compute_language_model_loss(self, labels: Tensor, logits: Tensor) -> Tensor:
         """Computes the language model loss (Cross entropy across vocabulary)
 
diff --git a/megatron/core/models/gpt/gpt_model.py b/megatron/core/models/gpt/gpt_model.py
index 70eea932683..4a6370bc49d 100644
--- a/megatron/core/models/gpt/gpt_model.py
+++ b/megatron/core/models/gpt/gpt_model.py
@@ -570,12 +570,6 @@ def _postprocess(
                 # if loss_mask is not provided, use all ones as loss_mask
                 loss_mask = torch.ones_like(mtp_labels)
             for mtp_layer_number in range(self.config.mtp_num_layers):
-                # output
-                mtp_logits, _ = self.output_layer(
-                    hidden_states_list[mtp_layer_number + 1],
-                    weight=output_weight,
-                    runtime_gather_output=runtime_gather_output,
-                )
                 # Calc loss for the current Multi-Token Prediction (MTP) layers.
                 mtp_labels, _ = roll_tensor(
                     mtp_labels,
@@ -591,7 +585,20 @@ def _postprocess(
                     cp_group=self.cp_group,
                     packed_seq_params=packed_seq_params,
                 )
-                mtp_loss = self.compute_language_model_loss(mtp_labels, mtp_logits)
+
+                # Compute mtp loss without storing logits to save memory.
+                mtp_loss = self.compute_output_layer_and_language_model_loss(
+                    hidden_states_list[mtp_layer_number + 1],
+                    labels=mtp_labels,
+                    weight=self.shared_embedding_or_output_weight(),
+                    sequence_parallel_enabled=self.output_layer.sequence_parallel,
+                    column_parallel_linear=self.output_layer,
+                    col_linear_kwargs={
+                        'weight': output_weight,
+                        'runtime_gather_output': runtime_gather_output,
+                    },
+                )
+
                 mtp_loss = loss_mask * mtp_loss
                 if self.training:
                     # TODO(shifangx): remove the use of parallel_state here
@@ -636,9 +643,12 @@ def _postprocess(
                     hidden_states.squeeze(1).unsqueeze(0)
                 ).unsqueeze(1)
 
-        logits, _ = self.output_layer(
-            hidden_states, weight=output_weight, runtime_gather_output=runtime_gather_output
-        )
+        if has_config_logger_enabled(self.config) or labels is None:
+            logits, _ = self.output_layer(
+                hidden_states, weight=output_weight, runtime_gather_output=runtime_gather_output
+            )
+        else:
+            logits = None
 
         # Restore sequence parallel execution to the output layer if necessary.
         if sequence_parallel_override:
@@ -665,7 +675,17 @@ def _postprocess(
             # [s b h] => [b s h]
             return logits.transpose(0, 1).contiguous()
 
-        loss = self.compute_language_model_loss(labels, logits)
+        loss = self.compute_output_layer_and_language_model_loss(
+            hidden_states,
+            labels=labels,
+            weight=self.shared_embedding_or_output_weight(),
+            sequence_parallel_enabled=self.output_layer.sequence_parallel,
+            column_parallel_linear=self.output_layer,
+            col_linear_kwargs={
+                'weight': output_weight,
+                'runtime_gather_output': runtime_gather_output,
+            },
+        )
 
         return loss
 
diff --git a/megatron/core/models/mamba/mamba_model.py b/megatron/core/models/mamba/mamba_model.py
index 378cf7e47d6..e4074eda806 100644
--- a/megatron/core/models/mamba/mamba_model.py
+++ b/megatron/core/models/mamba/mamba_model.py
@@ -267,9 +267,10 @@ def forward(
                     hidden_states.squeeze(1).unsqueeze(0)
                 ).unsqueeze(1)
 
-        logits, _ = self.output_layer(
-            hidden_states, weight=output_weight, runtime_gather_output=runtime_gather_output
-        )
+        if labels is None:
+            logits, _ = self.output_layer(
+                hidden_states, weight=output_weight, runtime_gather_output=runtime_gather_output
+            )
 
         # Restore sequence parallel execution to the output layer if necessary.
         if sequence_parallel_override:
@@ -284,6 +285,16 @@ def forward(
             # [s b h] => [b s h]
             return logits.transpose(0, 1).contiguous()
 
-        loss = self.compute_language_model_loss(labels, logits)
+        loss = self.compute_output_layer_and_language_model_loss(
+            hidden_states,
+            labels,
+            weight=self.shared_embedding_or_output_weight(),
+            sequence_parallel_enabled=self.output_layer.sequence_parallel,
+            column_parallel_linear=self.output_layer,
+            col_linear_kwargs={
+                "weight": output_weight,
+                "runtime_gather_output": runtime_gather_output,
+            },
+        )
 
         return loss
diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py
index c413c346b69..2c87532c919 100644
--- a/megatron/training/arguments.py
+++ b/megatron/training/arguments.py
@@ -2330,7 +2330,7 @@ def _add_training_args(parser):
                        help='Enabled fusion of cross entropy loss calculation.',
                        dest='cross_entropy_loss_fusion')
     group.add_argument('--cross-entropy-fusion-impl', type=str, default='native',
-                       choices=['native', 'te'],
+                       choices=['native', 'te', 'linear'],
                        help='Implementation of cross entropy loss calculation.')
     group.add_argument('--use-flash-attn', action='store_true',
                        help='use FlashAttention implementation of attention. '
diff --git a/tests/unit_tests/fusions/test_fused_linear_cross_entropy.py b/tests/unit_tests/fusions/test_fused_linear_cross_entropy.py
new file mode 100644
index 00000000000..3ac8e7f6200
--- /dev/null
+++ b/tests/unit_tests/fusions/test_fused_linear_cross_entropy.py
@@ -0,0 +1,1509 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+
+import contextlib
+import os
+import typing
+from contextlib import ExitStack
+from dataclasses import dataclass
+
+import numpy as np
+import pytest
+import torch
+import torch.distributed as dist
+from torch.utils.data import DataLoader, Dataset
+from torch.utils.data.distributed import DistributedSampler
+
+import megatron.core.parallel_state as ps
+from megatron.core.fusions.fused_linear_cross_entropy import linear_cross_entropy
+from megatron.core.models.gpt.gpt_layer_specs import (
+    get_gpt_decoder_block_spec,
+    get_gpt_mtp_block_spec,
+)
+from megatron.core.models.gpt.gpt_model import GPTModel
+from megatron.training.utils import get_device_arch_version
+from tests.unit_tests.a2a_overlap.utils import (
+    deterministic_mode,
+    get_test_config,
+    get_valid_fp8_flags,
+    get_valid_token_dispatcher_types,
+)
+from tests.unit_tests.test_utilities import Utils
+
+
+# 1. Define a standardized context to hold your distributed info
+@dataclass
+class DistContext:
+    rank: int
+    world_size: int
+    group: dist.ProcessGroup
+    is_chief: bool
+
+
+# 2. Create a module-scoped fixture
+# This runs ONE time per file, no matter how many test classes you have.
+@pytest.fixture(scope="module")
+def distributed_context():
+    # --- PRE-CHECK ---
+    if "WORLD_SIZE" not in os.environ or int(os.environ["WORLD_SIZE"]) < 2:
+        pytest.skip("Requires torchrun with multiple GPUs (WORLD_SIZE >= 2)")
+
+    # --- SETUP ---
+    is_external_init = dist.is_initialized()
+
+    if not is_external_init:
+        # Initialize only if not already done (e.g., by another test runner)
+        dist.init_process_group(
+            backend="nccl",
+            init_method="env://",
+            world_size=int(os.environ["WORLD_SIZE"]),
+            rank=int(os.environ["RANK"]),
+        )
+
+    # Set device immediately to avoid cross-device pollution
+    local_rank = int(os.environ.get("LOCAL_RANK", os.environ["RANK"]))
+    device = torch.device(f"cuda:{local_rank}")
+    torch.cuda.set_device(device)
+
+    # Gather context data
+    rank = dist.get_rank()
+    world_size = dist.get_world_size()
+    group = dist.group.WORLD
+
+    print(f"[INFO]: Initialized Rank: {rank} / {world_size}")
+
+    context = DistContext(rank=rank, world_size=world_size, group=group, is_chief=(rank == 0))
+
+    # Yield control to the tests
+    yield context
+
+    # --- TEARDOWN ---
+    # Only destroy if we were the ones who initialized it
+    if not is_external_init:
+        dist.destroy_process_group()
+
+
+class MockDataset(Dataset):
+    """
+    Mock dataset for torchtitan GPT training tests
+    Generates synthetic tokenized sequences on-the-fly
+    """
+
+    def __init__(
+        self,
+        num_samples=10000,
+        micro_batch_size=4,
+        sequence_length=2048,
+        vocab_size=128256,
+        seed=42,
+    ):
+        """
+        Initialize mock dataset
+
+        Args:
+            num_samples: Total number of samples
+            sequence_length: Length of each sequence
+            vocab_size: Size of vocabulary
+            seed: Random seed for reproducibility
+        """
+        self.num_samples = num_samples
+        self.micro_batch_size = micro_batch_size
+        self.sequence_length = sequence_length
+        self.vocab_size = vocab_size
+        self.seed = seed
+
+        # Set numpy seed for deterministic generation
+        np.random.seed(seed)
+
+    def __len__(self):
+        return self.num_samples
+
+    def __getitem__(self, idx):
+        """
+        Generate a single training sample
+
+        Returns:
+            dict with 'tokens' and 'labels'
+        """
+        # Use idx as seed for reproducible but varied samples
+        rng = np.random.RandomState(self.seed + idx)
+
+        # Generate random token sequence
+        tokens = rng.randint(0, self.vocab_size, size=self.sequence_length, dtype=np.int64)
+
+        # Labels are tokens shifted by 1 (next token prediction)
+        labels = rng.randint(0, self.vocab_size, size=self.sequence_length, dtype=np.int64)
+
+        return {
+            'input_ids': torch.from_numpy(tokens.copy()),
+            'labels': torch.from_numpy(labels.copy()),
+            "attention_mask": torch.ones(
+                (1, self.sequence_length, self.sequence_length), dtype=bool
+            ),
+        }
+
+
+def build_model(config):
+    max_seq_len = 300
+
+    # build layer spec
+    transformer_layer_spec = get_gpt_decoder_block_spec(config=config, use_transformer_engine=True)
+    mtp_block_spec = get_gpt_mtp_block_spec(config, transformer_layer_spec.layer_specs[-1], True)
+
+    # build model
+    gpt_model = GPTModel(
+        config=config,
+        transformer_layer_spec=transformer_layer_spec,
+        mtp_block_spec=mtp_block_spec,
+        vocab_size=100,
+        pre_process=True,
+        post_process=True,
+        max_sequence_length=max_seq_len,
+    )
+    return gpt_model
+
+
+# Define a reusable context manager
+@contextlib.contextmanager
+def init_model_parallel(tp=1, pp=1, ep=1):
+    try:
+        Utils.initialize_model_parallel(
+            tensor_model_parallel_size=tp,
+            pipeline_model_parallel_size=pp,
+            expert_model_parallel_size=ep,
+        )
+        yield
+    finally:
+        Utils.destroy_model_parallel()
+
+
+def init_gpt_dataloader(
+    dp_group, micro_batch_size=1, vocab_size=50257, sequence_length=128, batch_size=8
+):
+    dataset = MockDataset(
+        num_samples=1000,
+        micro_batch_size=micro_batch_size,
+        sequence_length=sequence_length,
+        vocab_size=vocab_size,
+        seed=42,
+    )
+    sampler = DistributedSampler(dataset, num_replicas=dp_group.size(), rank=dp_group.rank())
+    dataloader = DataLoader(dataset, batch_size=batch_size, sampler=sampler)
+    return dataloader
+
+
+# skip it for good
+@pytest.mark.skipif(
+    ("WORLD_SIZE" not in os.environ or int(os.environ["WORLD_SIZE"]) < 2) or True,
+    reason="Requires torchrun with multiple GPUs",
+)
+class TestFusedLinearCrossEntropyOnGptModel:
+    @pytest.mark.parametrize("fp8_flag", get_valid_fp8_flags())
+    @pytest.mark.parametrize("mtp_layers", [0, 1])
+    @pytest.mark.parametrize("dispatcher_type", get_valid_token_dispatcher_types())
+    @pytest.mark.parametrize("layer_num", [2])
+    def test_gpt_model(self, mtp_layers, dispatcher_type, fp8_flag, layer_num):
+        with ExitStack() as stack:
+            gpu_count = torch.cuda.device_count()
+            tp = min(2, gpu_count)
+            ep = gpu_count // tp
+            stack.enter_context(init_model_parallel(tp=tp, ep=ep))
+            stack.enter_context(deterministic_mode())
+
+            # create TransformerConfig
+            extra_kwargs = {
+                "moe_token_dispatcher_type": dispatcher_type,
+                "sequence_parallel": tp > 1,
+                "tensor_model_parallel_size": tp,
+            }
+            if dispatcher_type == "flex":
+                extra_kwargs["moe_enable_deepep"] = True
+                extra_kwargs["moe_router_dtype"] = "fp32"
+            if fp8_flag is not None:
+                extra_kwargs["fp8"] = fp8_flag[0]
+                extra_kwargs["fp8_recipe"] = fp8_flag[1]
+            if mtp_layers > 0:
+                extra_kwargs["mtp_num_layers"] = mtp_layers
+                extra_kwargs["mtp_loss_scaling_factor"] = 1.1
+
+            # build config
+            config = get_test_config(num_layers=layer_num, extra_kwargs=extra_kwargs)
+            config.expert_model_parallel_size = ep
+
+            # build model
+            gpt_model = build_model(config)
+            gpt_model.cuda()
+
+            dataloader = init_gpt_dataloader(
+                ps.get_data_parallel_group(),
+                vocab_size=gpt_model.vocab_size,
+                micro_batch_size=1,
+                sequence_length=gpt_model.max_sequence_length,
+                batch_size=4,
+            )
+            # for batch in dataloder:
+            for batch in dataloader:
+                batch["position_ids"] = torch.arange(
+                    gpt_model.max_sequence_length, dtype=torch.int64
+                )
+                batch = {k: v.cuda() for k, v in batch.items()}
+                gpt_model.zero_grad()
+                output = gpt_model(**batch)
+                loss = output.sum()
+                loss.backward()
+
+
+@pytest.mark.skipif(
+    "WORLD_SIZE" in os.environ and os.environ["WORLD_SIZE"] != "1", reason="Requires single GPU"
+)
+@pytest.mark.skipif(get_device_arch_version() != 10, reason="Requires GPU architecture = 10")
+class TestFusedLinearCrossEntropyDataParallel:
+    def cleanup(self):
+        torch.cuda.empty_cache()
+        torch.cuda.reset_peak_memory_stats()
+        import gc
+
+        gc.collect()
+        torch.cuda.synchronize()
+
+    @staticmethod
+    def torch_linear_cross_entropy(
+        hidden: torch.Tensor,
+        weight: torch.Tensor,
+        labels: torch.Tensor,
+        reduction: str,
+        ignore_index: int,
+    ):
+        # NOTE: need to convert to fp32 to fp32 accumulation,
+        # thus assure accuracy
+        logits = hidden.to(torch.float32) @ weight.T.to(torch.float32)
+        logprobs = torch.nn.functional.cross_entropy(
+            logits.view(-1, logits.shape[-1]),
+            labels.view(-1),
+            reduction=reduction,
+            ignore_index=ignore_index,
+        )
+        return logprobs.to(torch.float32)
+
+    @staticmethod
+    def get_problems():
+        return [
+            (80, 125, 64),
+            (80, 152064, 64),
+            (1024, 152064, 4096),
+            (4096, 152063, 8192),
+            ((1, 4096), 152064, 8192),
+            ((2, 4096), 152064, 8192),
+        ]
+
+    @staticmethod
+    def get_ignore_index():
+        return [-100, 4]
+
+    def test_kernel_launch(self):
+        """
+        Check if the compiled kernel can be
+        launched with different problem sizes
+        """
+        self.cleanup()
+
+        num_tokens = [15, 26, 128, 513, 2048, 8192]
+        vocab_size = 152064
+        dim = 4096
+        dtype = torch.bfloat16
+        reduction = "mean"
+        ignore_index = -100
+
+        weight = torch.randn(vocab_size, dim, dtype=dtype, device="cuda").requires_grad_()
+        for num_token in num_tokens:
+            hidden = torch.randn(num_token, dim, dtype=dtype, device="cuda").requires_grad_()
+            labels = torch.randint(0, vocab_size, (num_token,), dtype=torch.long, device="cuda")
+
+            logprobs = linear_cross_entropy(
+                hidden, weight, labels, reduction=reduction, ignore_index=ignore_index
+            )
+            assert not torch.isnan(logprobs).any()
+
+            gLogprobs = torch.randn_like(logprobs)
+            (d_hidden, d_weight) = torch.autograd.grad(
+                (logprobs,), (hidden, weight), (gLogprobs,), retain_graph=False
+            )
+            assert not torch.isnan(d_hidden).any()
+            assert not torch.isnan(d_weight).any()
+
+    @pytest.mark.parametrize("dtype", [torch.bfloat16, torch.float16])
+    @pytest.mark.parametrize("problem", get_problems())
+    @pytest.mark.parametrize("reduction", ["none", "mean", "sum"])
+    @pytest.mark.parametrize("ignore_index", get_ignore_index())
+    def test_correctness(self, dtype, problem, reduction, ignore_index):
+        num_tokens, vocabsize, dim = problem
+        hidden_shape = (num_tokens, dim) if isinstance(num_tokens, int) else (*num_tokens, dim)
+        labels_shape = (num_tokens,) if isinstance(num_tokens, int) else num_tokens
+
+        hidden = (
+            torch.empty(hidden_shape, dtype=dtype, device="cuda")
+            .uniform_(-0.1, 0.1)
+            .requires_grad_()
+        )
+        weight = (
+            torch.empty((vocabsize, dim), dtype=dtype, device="cuda")
+            .uniform_(-0.1, 0.1)
+            .requires_grad_()
+        )
+        labels = torch.randint(0, vocabsize, labels_shape, dtype=torch.long, device="cuda")
+        if ignore_index >= 0 and ignore_index < vocabsize:
+            pad_labels = torch.nn.functional.pad(labels, (0, 1), value=ignore_index)
+            labels = pad_labels[..., 1:].contiguous()
+
+        # forward
+        torch_logprobs = self.torch_linear_cross_entropy(
+            hidden, weight, labels, reduction=reduction, ignore_index=ignore_index
+        )
+
+        custom_logprobs = linear_cross_entropy(
+            hidden, weight, labels, reduction=reduction, ignore_index=ignore_index
+        )
+
+        torch.testing.assert_close(torch_logprobs, custom_logprobs)
+
+        # backward
+        g_logprobs = torch.empty_like(torch_logprobs).uniform_(-0.1, 0.1)
+
+        (d_torch_hidden, d_torch_weight) = torch.autograd.grad(
+            (torch_logprobs,), (hidden, weight), (g_logprobs,), retain_graph=False
+        )
+
+        (d_custom_hidden, d_custom_weight) = torch.autograd.grad(
+            (custom_logprobs,), (hidden, weight), (g_logprobs,), retain_graph=False
+        )
+
+        torch.testing.assert_close(d_torch_hidden, d_custom_hidden, atol=1e-3, rtol=1e-3)
+        torch.testing.assert_close(d_torch_weight, d_custom_weight, atol=1e-3, rtol=1e-3)
+
+    @pytest.mark.parametrize("problem", [((1, 4096), 129280, 7168)])
+    @pytest.mark.parametrize("dtype", [torch.bfloat16])
+    @pytest.mark.parametrize("reduction", ["mean"])
+    @pytest.mark.parametrize("ignore_index", [-100])
+    def test_performance(self, problem, dtype, reduction, ignore_index):
+        num_tokens, vocabsize, dim = problem
+        hidden_shape = (num_tokens, dim) if isinstance(num_tokens, int) else (*num_tokens, dim)
+        labels_shape = (num_tokens,) if isinstance(num_tokens, int) else num_tokens
+
+        start_event = torch.cuda.Event(enable_timing=True)
+        end_event = torch.cuda.Event(enable_timing=True)
+
+        torch_fwd_latency = list()
+        torch_bwd_latency = list()
+        custom_fwd_latency = list()
+        custom_bwd_latency = list()
+
+        iterations = 5
+        for i in range(iterations):
+            hidden = (
+                torch.empty(hidden_shape, dtype=dtype, device="cuda")
+                .uniform_(-0.1, 0.1)
+                .requires_grad_()
+            )
+            weight = (
+                torch.empty((vocabsize, dim), dtype=dtype, device="cuda")
+                .uniform_(-0.1, 0.1)
+                .requires_grad_()
+            )
+            labels = torch.randint(0, vocabsize, labels_shape, dtype=torch.long, device="cuda")
+            if ignore_index >= 0 and ignore_index < vocabsize:
+                pad_labels = torch.nn.functional.pad(labels, (0, 1), value=ignore_index)
+                labels = pad_labels[..., 1:].contiguous()
+
+            # -------- forward -------- #
+            start_event.record()
+            torch_logprobs = self.torch_linear_cross_entropy(
+                hidden, weight, labels, reduction=reduction, ignore_index=ignore_index
+            )
+            end_event.record()
+            torch.cuda.synchronize()
+            torch_fwd_latency.append(start_event.elapsed_time(end_event))
+
+            start_event.record()
+            custom_logprobs = linear_cross_entropy(
+                hidden, weight, labels, reduction=reduction, ignore_index=ignore_index
+            )
+            end_event.record()
+            torch.cuda.synchronize()
+            custom_fwd_latency.append(start_event.elapsed_time(end_event))
+
+            # -------- backward -------- #
+            g_logprobs = torch.empty_like(torch_logprobs).uniform_(-0.1, 0.1)
+
+            start_event.record()
+            (d_torch_hidden, d_torch_weight) = torch.autograd.grad(
+                (torch_logprobs,), (hidden, weight), (g_logprobs,), retain_graph=False
+            )
+            end_event.record()
+            torch.cuda.synchronize()
+            torch_bwd_latency.append(start_event.elapsed_time(end_event))
+
+            start_event.record()
+            (d_custom_hidden, d_custom_weight) = torch.autograd.grad(
+                (custom_logprobs,), (hidden, weight), (g_logprobs,), retain_graph=False
+            )
+            end_event.record()
+            torch.cuda.synchronize()
+            custom_bwd_latency.append(start_event.elapsed_time(end_event))
+
+        # --- remove first latency due to warmup --- #
+        torch_fwd_latency = torch_fwd_latency[1:]
+        torch_bwd_latency = torch_bwd_latency[1:]
+        custom_fwd_latency = custom_fwd_latency[1:]
+        custom_bwd_latency = custom_bwd_latency[1:]
+
+        print()
+        print(f"[INFO]: On problem {problem}, dtype {dtype}, reduction {reduction}:")
+        print(
+            f"[INFO]: Torch forward latency: {sum(torch_fwd_latency) / len(torch_fwd_latency):.2f} ms"
+        )
+        print(
+            f"[INFO]: Custom forward latency: {sum(custom_fwd_latency) / len(custom_fwd_latency):.2f} ms"
+        )
+        print(
+            f"[INFO]: Torch backward latency: {sum(torch_bwd_latency) / len(torch_bwd_latency):.2f} ms"
+        )
+        print(
+            f"[INFO]: Custom backward latency: {sum(custom_bwd_latency) / len(custom_bwd_latency):.2f} ms"
+        )
+
+    @pytest.mark.parametrize("problem", [((1, 4096), 129280, 7168)])
+    @pytest.mark.parametrize("dtype", [torch.bfloat16])
+    @pytest.mark.parametrize("reduction", ["mean"])
+    @pytest.mark.parametrize("ignore_index", [-100])
+    def test_storage(self, problem, dtype, reduction, ignore_index):
+        num_tokens, vocabsize, dim = problem
+        hidden_shape = (num_tokens, dim) if isinstance(num_tokens, int) else (*num_tokens, dim)
+        labels_shape = (num_tokens,) if isinstance(num_tokens, int) else num_tokens
+        print()
+        print(f"[INFO]: On problem {problem}, dtype {dtype}, reduction {reduction}:")
+
+        def torch_storage():
+            hidden = (
+                torch.empty(hidden_shape, dtype=dtype, device="cuda")
+                .uniform_(-0.1, 0.1)
+                .requires_grad_()
+            )
+            weight = (
+                torch.empty((vocabsize, dim), dtype=dtype, device="cuda")
+                .uniform_(-0.1, 0.1)
+                .requires_grad_()
+            )
+            labels = torch.randint(0, vocabsize, labels_shape, dtype=torch.long, device="cuda")
+            if ignore_index >= 0 and ignore_index < vocabsize:
+                pad_labels = torch.nn.functional.pad(labels, (0, 1), value=ignore_index)
+                labels = pad_labels[..., 1:].contiguous()
+
+            torch.cuda.reset_peak_memory_stats()
+            torch_logprobs = self.torch_linear_cross_entropy(
+                hidden, weight, labels, reduction=reduction, ignore_index=ignore_index
+            )
+            torch.cuda.synchronize()
+            torch_max_memory = torch.cuda.max_memory_allocated() / 1024 / 1024
+            print(f"[INFO]: Torch Forward pass peak memory: {torch_max_memory:.2f} MB")
+
+            torch.cuda.reset_peak_memory_stats()
+            g_logprobs = torch.empty_like(torch_logprobs).uniform_(-0.1, 0.1)
+            (d_torch_hidden, d_torch_weight) = torch.autograd.grad(
+                (torch_logprobs,), (hidden, weight), (g_logprobs,), retain_graph=False
+            )
+            torch.cuda.synchronize()
+            torch_backward_max_memory = torch.cuda.max_memory_allocated() / 1024 / 1024
+            print(f"[INFO]: Torch Backward pass peak memory: {torch_backward_max_memory:.2f} MB")
+
+        def custom_storage():
+            hidden = (
+                torch.empty(hidden_shape, dtype=dtype, device="cuda")
+                .uniform_(-0.1, 0.1)
+                .requires_grad_()
+            )
+            weight = (
+                torch.empty((vocabsize, dim), dtype=dtype, device="cuda")
+                .uniform_(-0.1, 0.1)
+                .requires_grad_()
+            )
+            labels = torch.randint(0, vocabsize, labels_shape, dtype=torch.long, device="cuda")
+            if ignore_index >= 0 and ignore_index < vocabsize:
+                pad_labels = torch.nn.functional.pad(labels, (0, 1), value=ignore_index)
+                labels = pad_labels[..., 1:].contiguous()
+
+            torch.cuda.reset_peak_memory_stats()
+            custom_logprobs = linear_cross_entropy(
+                hidden, weight, labels, reduction=reduction, ignore_index=ignore_index
+            )
+            torch.cuda.synchronize()
+            custom_max_memory = torch.cuda.max_memory_allocated() / 1024 / 1024
+            print(f"[INFO]: Custom Forward pass peak memory: {custom_max_memory:.2f} MB")
+
+            torch.cuda.reset_peak_memory_stats()
+            g_logprobs = torch.empty_like(custom_logprobs).uniform_(-0.1, 0.1)
+            (d_custom_hidden, d_custom_weight) = torch.autograd.grad(
+                (custom_logprobs,), (hidden, weight), (g_logprobs,), retain_graph=False
+            )
+            torch.cuda.synchronize()
+            custom_backward_max_memory = torch.cuda.max_memory_allocated() / 1024 / 1024
+            print(f"[INFO]: Custom Backward pass peak memory: {custom_backward_max_memory:.2f} MB")
+
+        self.cleanup()
+        torch_storage()
+        self.cleanup()
+        custom_storage()
+
+
+@pytest.mark.skipif(
+    ("WORLD_SIZE" not in os.environ or int(os.environ["WORLD_SIZE"]) < 2),  # or True,
+    reason="Requires torchrun with multiple GPUs",
+)
+@pytest.mark.skipif(get_device_arch_version() != 10, reason="Requires GPU architecture = 10")
+@pytest.mark.usefixtures("distributed_context")
+class TestFusedLinearCrossEntropyTensorParallel:
+    @pytest.fixture(autouse=True)
+    def setup_attrs(self, distributed_context):
+        """
+        Setup attributes for the test class.
+        """
+        self.tp_group = distributed_context.group
+        self.tp_rank = distributed_context.rank
+        self.tp_world_size = distributed_context.world_size
+        self.is_chief = distributed_context.is_chief
+
+    def cleanup(self):
+        torch.cuda.empty_cache()
+        torch.cuda.reset_peak_memory_stats()
+        import gc
+
+        gc.collect()
+        torch.cuda.synchronize()
+
+    @staticmethod
+    def torch_linear_cross_entropy_single_gpu(
+        hidden: torch.Tensor,
+        weight: torch.Tensor,
+        labels: torch.Tensor,
+        reduction: typing.Optional[str] = "mean",
+    ):
+        logits = hidden.to(torch.float32) @ weight.T.to(torch.float32)
+        logprobs = torch.nn.functional.cross_entropy(
+            logits.view(-1, logits.shape[-1]), labels.view(-1), reduction=reduction
+        )
+        return logprobs.to(torch.float32)
+
+    class TorchLinearCrossEntropy(torch.autograd.Function):
+        @staticmethod
+        def forward(
+            ctx,
+            hidden: torch.Tensor,
+            weight: torch.Tensor,
+            labels: torch.Tensor,
+            tp_group: torch.distributed.ProcessGroup,
+            reduction: typing.Optional[str] = "mean",
+        ):
+            tp_rank = 0 if tp_group is None else torch.distributed.get_rank(tp_group)
+            tp_world_size = 1 if tp_group is None else torch.distributed.get_world_size(tp_group)
+
+            logits = hidden.to(torch.float32) @ weight.T.to(torch.float32)
+
+            whole_logits = torch.empty(
+                (logits.shape[0], logits.shape[-1] * tp_world_size),
+                dtype=logits.dtype,
+                device=logits.device,
+            )
+            whole_logits_ref = [
+                whole_logits[..., i * logits.shape[-1] : (i + 1) * logits.shape[-1]]
+                for i in range(tp_world_size)
+            ]
+            dist.all_gather(whole_logits_ref, logits, group=tp_group)
+
+            logprobs = torch.nn.functional.cross_entropy(
+                whole_logits.view(-1, whole_logits.shape[-1]), labels.view(-1), reduction=reduction
+            )
+
+            # If we don't preserve whole_logits,
+            # we need to re-compute it in the backward pass
+            ctx.save_for_backward(hidden, weight, labels)
+            ctx.tp_group = tp_group
+            ctx.reduction = reduction
+            ctx.tp_rank = tp_rank
+            ctx.tp_world_size = tp_world_size
+
+            return logprobs.to(torch.float32)
+
+        @staticmethod
+        def backward(ctx, g_logprobs: torch.Tensor):
+            hidden, weight, labels = ctx.saved_tensors
+            tp_group = ctx.tp_group
+            reduction = ctx.reduction
+            tp_rank = ctx.tp_rank
+            tp_world_size = ctx.tp_world_size
+
+            num_tokens, dim = hidden.shape
+
+            if reduction == "mean":
+                _g_logprobs = torch.broadcast_to(g_logprobs / num_tokens, (num_tokens,))
+            elif reduction == "sum":
+                _g_logprobs = torch.broadcast_to(g_logprobs, (num_tokens,))
+            else:
+                _g_logprobs = g_logprobs
+
+            # re-compute whole_logits
+            logits = hidden.to(torch.float32) @ weight.T.to(torch.float32)
+            whole_logits = torch.empty(
+                (logits.shape[0], logits.shape[-1] * tp_world_size),
+                dtype=logits.dtype,
+                device=logits.device,
+            )
+            whole_logits_ref = [
+                whole_logits[..., i * logits.shape[-1] : (i + 1) * logits.shape[-1]]
+                for i in range(tp_world_size)
+            ]
+            dist.all_gather(whole_logits_ref, logits, group=tp_group)
+
+            one_hot = torch.zeros_like(whole_logits)
+            one_hot.scatter_(1, labels.view(-1).unsqueeze(-1), 1)
+
+            pd = torch.nn.functional.softmax(whole_logits, dim=-1)
+            d_logits = (pd - one_hot) * _g_logprobs.unsqueeze(-1)
+            d_logits = d_logits.to(hidden.dtype)
+
+            local_size = weight.size(0)
+            local_d_logits = d_logits[:, tp_rank * local_size : (tp_rank + 1) * local_size]
+
+            local_d_hidden = local_d_logits @ weight
+            local_d_weight = local_d_logits.T @ hidden
+
+            dist.all_reduce(local_d_hidden, op=dist.ReduceOp.SUM, group=tp_group)
+
+            return local_d_hidden, local_d_weight, None, None, None
+
+    @pytest.mark.parametrize("dtype", [torch.bfloat16, torch.float16])
+    @pytest.mark.parametrize("reduction", ["mean", "sum", "none"])
+    @pytest.mark.parametrize("problem", [(4096, 129280, 8192)])
+    def test_torch_tp_vs_single_gpu(self, dtype, reduction, problem):
+        num_tokens, vocabsize, dim = problem
+        vocabsize = vocabsize // self.tp_world_size
+
+        hidden = (
+            torch.empty((num_tokens, dim), dtype=dtype, device="cuda")
+            .uniform_(-0.1, 0.1)
+            .requires_grad_()
+        )
+        weight = (
+            torch.empty((vocabsize, dim), dtype=dtype, device="cuda")
+            .uniform_(-0.1, 0.1)
+            .requires_grad_()
+        )
+        labels = torch.randint(0, vocabsize, (num_tokens,), dtype=torch.long, device="cuda")
+
+        # ------------ forward pass ------------ #
+        dist.broadcast(hidden, src=0, group=self.tp_group)
+        dist.broadcast(labels, src=0, group=self.tp_group)
+
+        # single GPU
+        whole_weight = torch.empty(
+            (vocabsize * self.tp_world_size, dim), dtype=dtype, device="cuda"
+        )
+        whole_weight_view = [
+            whole_weight[i * vocabsize : (i + 1) * vocabsize, :] for i in range(self.tp_world_size)
+        ]
+        dist.all_gather(whole_weight_view, weight, group=self.tp_group)
+        whole_weight = whole_weight.clone().requires_grad_()
+        logprobs_single_gpu = self.torch_linear_cross_entropy_single_gpu(
+            hidden, whole_weight, labels, reduction=reduction
+        )
+
+        # TP
+        logprobs_tp = self.TorchLinearCrossEntropy.apply(
+            hidden, weight, labels, self.tp_group, reduction
+        )
+        torch.testing.assert_close(logprobs_single_gpu, logprobs_tp)
+
+        # ------------ backward pass ------------ #
+        g_logprobs = torch.empty_like(logprobs_single_gpu).uniform_(-0.1, 0.1)
+        dist.broadcast(g_logprobs, src=0, group=self.tp_group)
+
+        # single GPU
+        (d_hidden_single_gpu, d_weight_single_gpu) = torch.autograd.grad(
+            (logprobs_single_gpu,), (hidden, whole_weight), (g_logprobs,), retain_graph=False
+        )
+
+        # TP
+        (d_hidden_tp, d_weight_tp) = torch.autograd.grad(
+            (logprobs_tp,), (hidden, weight), (g_logprobs,), retain_graph=False
+        )
+        torch.testing.assert_close(d_hidden_single_gpu, d_hidden_tp, atol=1e-3, rtol=1e-3)
+        local_d_weight_single_gpu = d_weight_single_gpu[
+            self.tp_rank * weight.shape[0] : (self.tp_rank + 1) * weight.shape[0], :
+        ]
+        torch.testing.assert_close(local_d_weight_single_gpu, d_weight_tp, atol=1e-3, rtol=1e-3)
+
+    @staticmethod
+    def get_problems():
+        return [
+            (80, 125, 64),
+            (80, 152064, 64),
+            (1024, 152064, 4096),
+            (4096, 152063, 8192),
+            ((1, 4096), 152064, 8192),
+            ((2, 4096), 152064, 8192),
+        ]
+
+    @pytest.mark.parametrize("dtype", [torch.bfloat16, torch.float16])
+    @pytest.mark.parametrize("reduction", ["mean", "sum", "none"])
+    @pytest.mark.parametrize("problem", get_problems())
+    def test_correctness(self, dtype, reduction, problem):
+        num_tokens, vocabsize, dim = problem
+        hidden_shape = (num_tokens, dim) if isinstance(num_tokens, int) else (*num_tokens, dim)
+        labels_shape = (num_tokens,) if isinstance(num_tokens, int) else num_tokens
+
+        hidden = (
+            torch.empty(hidden_shape, dtype=dtype, device="cuda")
+            .uniform_(-0.1, 0.1)
+            .requires_grad_()
+        )
+        weight = (
+            torch.empty((vocabsize, dim), dtype=dtype, device="cuda")
+            .uniform_(-0.1, 0.1)
+            .requires_grad_()
+        )
+        labels = torch.randint(0, vocabsize, labels_shape, dtype=torch.long, device="cuda")
+
+        # ------ forward pass ------ #
+        dist.broadcast(hidden, src=0, group=self.tp_group)
+        dist.broadcast(labels, src=0, group=self.tp_group)
+
+        torch_logprobs = self.TorchLinearCrossEntropy.apply(
+            hidden.view(-1, dim), weight, labels, self.tp_group, reduction
+        )
+
+        custom_logprobs = linear_cross_entropy(
+            hidden, weight, labels, tp_group=self.tp_group, reduction=reduction
+        )
+
+        torch.testing.assert_close(torch_logprobs, custom_logprobs)
+
+        # ------- backward pass ------- #
+        g_logprobs = torch.empty_like(torch_logprobs).uniform_(-0.1, 0.1)
+        dist.broadcast(g_logprobs, src=0, group=self.tp_group)
+
+        (d_hidden_torch, d_weight_torch) = torch.autograd.grad(
+            (torch_logprobs,), (hidden, weight), (g_logprobs,), retain_graph=False
+        )
+        (d_hidden_custom, d_weight_custom) = torch.autograd.grad(
+            (custom_logprobs,), (hidden, weight), (g_logprobs,), retain_graph=False
+        )
+        torch.testing.assert_close(d_hidden_torch, d_hidden_custom, atol=1e-3, rtol=1e-3)
+        torch.testing.assert_close(d_weight_torch, d_weight_custom, atol=1e-4, rtol=1e-4)
+
+    @pytest.mark.parametrize("problem", [((1, 4096), 129280, 7168)])
+    @pytest.mark.parametrize("dtype", [torch.bfloat16])
+    @pytest.mark.parametrize("reduction", ["mean"])
+    def test_performance(self, problem, dtype, reduction):
+        num_tokens, vocabsize, dim = problem
+        hidden_shape = (num_tokens, dim) if isinstance(num_tokens, int) else (*num_tokens, dim)
+        labels_shape = (num_tokens,) if isinstance(num_tokens, int) else num_tokens
+
+        start_event = torch.cuda.Event(enable_timing=True)
+        end_event = torch.cuda.Event(enable_timing=True)
+
+        torch_fwd_latency = list()
+        torch_bwd_latency = list()
+        custom_fwd_latency = list()
+        custom_bwd_latency = list()
+
+        iterations = 5
+        for i in range(iterations):
+            hidden = (
+                torch.empty(hidden_shape, dtype=dtype, device="cuda")
+                .uniform_(-0.1, 0.1)
+                .requires_grad_()
+            )
+            weight = (
+                torch.empty((vocabsize, dim), dtype=dtype, device="cuda")
+                .uniform_(-0.1, 0.1)
+                .requires_grad_()
+            )
+            labels = torch.randint(0, vocabsize, labels_shape, dtype=torch.long, device="cuda")
+
+            # ------ forward pass ------ #
+            dist.broadcast(hidden, src=0, group=self.tp_group)
+            dist.broadcast(labels, src=0, group=self.tp_group)
+
+            start_event.record()
+            torch_logprobs = self.TorchLinearCrossEntropy.apply(
+                hidden.view(-1, dim), weight, labels, self.tp_group, reduction
+            )
+            end_event.record()
+            torch.cuda.synchronize()
+            torch_fwd_latency.append(start_event.elapsed_time(end_event))
+
+            start_event.record()
+            custom_logprobs = linear_cross_entropy(
+                hidden, weight, labels, tp_group=self.tp_group, reduction=reduction
+            )
+            end_event.record()
+            torch.cuda.synchronize()
+            custom_fwd_latency.append(start_event.elapsed_time(end_event))
+
+            # ------- backward pass ------- #
+            g_logprobs = torch.empty_like(torch_logprobs).uniform_(-0.1, 0.1)
+            dist.broadcast(g_logprobs, src=0, group=self.tp_group)
+
+            start_event.record()
+            (d_hidden_torch, d_weight_torch) = torch.autograd.grad(
+                (torch_logprobs,), (hidden, weight), (g_logprobs,), retain_graph=False
+            )
+            end_event.record()
+            torch.cuda.synchronize()
+            torch_bwd_latency.append(start_event.elapsed_time(end_event))
+
+            start_event.record()
+            (d_hidden_custom, d_weight_custom) = torch.autograd.grad(
+                (custom_logprobs,), (hidden, weight), (g_logprobs,), retain_graph=False
+            )
+            end_event.record()
+            torch.cuda.synchronize()
+            custom_bwd_latency.append(start_event.elapsed_time(end_event))
+
+        # --- remove first latency due to warmup --- #
+        torch_fwd_latency = torch_fwd_latency[1:]
+        torch_bwd_latency = torch_bwd_latency[1:]
+        custom_fwd_latency = custom_fwd_latency[1:]
+        custom_bwd_latency = custom_bwd_latency[1:]
+
+        if self.is_chief:
+            print()
+            print(
+                f"[INFO]: On problem {problem}, dtype {dtype}, reduction {reduction}, TP size {self.tp_world_size}:"
+            )
+            print(
+                f"[INFO]: Torch forward latency: {sum(torch_fwd_latency) / len(torch_fwd_latency):.2f} ms"
+            )
+            print(
+                f"[INFO]: Custom forward latency: {sum(custom_fwd_latency) / len(custom_fwd_latency):.2f} ms"
+            )
+            print(
+                f"[INFO]: Torch backward latency: {sum(torch_bwd_latency) / len(torch_bwd_latency):.2f} ms"
+            )
+            print(
+                f"[INFO]: Custom backward latency: {sum(custom_bwd_latency) / len(custom_bwd_latency):.2f} ms"
+            )
+
+    @pytest.mark.parametrize("problem", [((1, 4096), 129280, 7168)])
+    @pytest.mark.parametrize("dtype", [torch.bfloat16])
+    @pytest.mark.parametrize("reduction", ["mean"])
+    def test_storage(self, problem, dtype, reduction):
+        num_tokens, vocabsize, dim = problem
+        hidden_shape = (num_tokens, dim) if isinstance(num_tokens, int) else (*num_tokens, dim)
+        labels_shape = (num_tokens,) if isinstance(num_tokens, int) else num_tokens
+
+        if self.is_chief:
+            print()
+            print(
+                f"[INFO]: On problem {problem}, dtype {dtype}, reduction {reduction}, TP size {self.tp_world_size}:"
+            )
+
+        def torch_storage():
+            hidden = (
+                torch.empty(hidden_shape, dtype=dtype, device="cuda")
+                .uniform_(-0.1, 0.1)
+                .requires_grad_()
+            )
+            weight = (
+                torch.empty((vocabsize, dim), dtype=dtype, device="cuda")
+                .uniform_(-0.1, 0.1)
+                .requires_grad_()
+            )
+            labels = torch.randint(0, vocabsize, labels_shape, dtype=torch.long, device="cuda")
+
+            dist.broadcast(hidden, src=0, group=self.tp_group)
+            dist.broadcast(labels, src=0, group=self.tp_group)
+
+            torch.cuda.reset_peak_memory_stats()
+            torch_logprobs = self.TorchLinearCrossEntropy.apply(
+                hidden.view(-1, dim), weight, labels, self.tp_group, reduction
+            )
+            torch.cuda.synchronize()
+            torch_max_memory = torch.cuda.max_memory_allocated() / 1024 / 1024
+            if self.is_chief:
+                print(
+                    f"[INFO]: On GPU {self.tp_rank}, Torch Forward pass peak memory: {torch_max_memory:.2f} MB"
+                )
+
+            g_logprobs = torch.empty_like(torch_logprobs).uniform_(-0.1, 0.1)
+            dist.broadcast(g_logprobs, src=0, group=self.tp_group)
+
+            torch.cuda.reset_peak_memory_stats()
+            (d_hidden_torch, d_weight_torch) = torch.autograd.grad(
+                (torch_logprobs,), (hidden, weight), (g_logprobs,), retain_graph=False
+            )
+            torch.cuda.synchronize()
+            torch_max_memory = torch.cuda.max_memory_allocated() / 1024 / 1024
+            if self.is_chief:
+                print(
+                    f"[INFO]: On GPU {self.tp_rank}, Torch Backward pass peak memory: {torch_max_memory:.2f} MB"
+                )
+
+        def custom_storage():
+            hidden = (
+                torch.empty(hidden_shape, dtype=dtype, device="cuda")
+                .uniform_(-0.1, 0.1)
+                .requires_grad_()
+            )
+            weight = (
+                torch.empty((vocabsize, dim), dtype=dtype, device="cuda")
+                .uniform_(-0.1, 0.1)
+                .requires_grad_()
+            )
+            labels = torch.randint(0, vocabsize, labels_shape, dtype=torch.long, device="cuda")
+
+            dist.broadcast(hidden, src=0, group=self.tp_group)
+            dist.broadcast(labels, src=0, group=self.tp_group)
+
+            torch.cuda.reset_peak_memory_stats()
+            custom_logprobs = linear_cross_entropy(
+                hidden, weight, labels, tp_group=self.tp_group, reduction=reduction
+            )
+            torch.cuda.synchronize()
+            custom_max_memory = torch.cuda.max_memory_allocated() / 1024 / 1024
+            if self.is_chief:
+                print(
+                    f"[INFO]: On GPU {self.tp_rank}, Custom Forward pass peak memory: {custom_max_memory:.2f} MB"
+                )
+
+            g_logprobs = torch.empty_like(custom_logprobs).uniform_(-0.1, 0.1)
+            dist.broadcast(g_logprobs, src=0, group=self.tp_group)
+
+            torch.cuda.reset_peak_memory_stats()
+            (d_hidden_custom, d_weight_custom) = torch.autograd.grad(
+                (custom_logprobs,), (hidden, weight), (g_logprobs,), retain_graph=False
+            )
+            torch.cuda.synchronize()
+            custom_max_memory = torch.cuda.max_memory_allocated() / 1024 / 1024
+            if self.is_chief:
+                print(
+                    f"[INFO]: On GPU {self.tp_rank}, Custom Backward pass peak memory: {custom_max_memory:.2f} MB"
+                )
+
+        self.cleanup()
+        torch_storage()
+        self.cleanup()
+        custom_storage()
+
+
+@pytest.mark.skipif(
+    "WORLD_SIZE" not in os.environ or int(os.environ["WORLD_SIZE"]) < 2,
+    reason="Requires torchrun with multiple GPUs",
+)
+@pytest.mark.skipif(get_device_arch_version() != 10, reason="Requires GPU architecture = 10")
+@pytest.mark.usefixtures("distributed_context")
+class TestFusedLinearCrossEntropySequenceParallel:
+    @pytest.fixture(autouse=True)
+    def setup_attrs(self, distributed_context):
+        """
+        Setup attributes for the test class.
+        """
+        self.tp_group = distributed_context.group
+        self.tp_rank = distributed_context.rank
+        self.tp_world_size = distributed_context.world_size
+        self.is_chief = distributed_context.is_chief
+
+    @staticmethod
+    def timed_barrier(timeout_s=10):
+        import time
+
+        work = torch.distributed.barrier(async_op=True)
+        t0 = time.time()
+        while not work.is_completed():
+            if time.time() - t0 > timeout_s:
+                exit(1)
+            time.sleep(0.05)
+        work.wait()
+
+    def cleanup(self):
+        torch.cuda.empty_cache()
+        torch.cuda.reset_peak_memory_stats()
+        import gc
+
+        gc.collect()
+        torch.cuda.synchronize()
+
+    @staticmethod
+    def torch_linear_cross_entropy_single_gpu(
+        hidden: torch.Tensor,
+        weight: torch.Tensor,
+        labels: torch.Tensor,
+        reduction: typing.Optional[str] = "mean",
+    ):
+        logits = hidden.to(torch.float32) @ weight.T.to(torch.float32)
+        logprobs = torch.nn.functional.cross_entropy(
+            logits.view(-1, logits.shape[-1]), labels.view(-1), reduction=reduction
+        )
+        return logprobs.to(torch.float32)
+
+    class TorchLinearCrossEntropy(torch.autograd.Function):
+        @staticmethod
+        def forward(
+            ctx,
+            hidden: torch.Tensor,
+            weight: torch.Tensor,
+            labels: torch.Tensor,
+            tp_group: torch.distributed.ProcessGroup,
+            reduction: typing.Optional[str] = "mean",
+        ):
+            tp_rank = 0 if tp_group is None else torch.distributed.get_rank(tp_group)
+            tp_world_size = 1 if tp_group is None else torch.distributed.get_world_size(tp_group)
+
+            whole_hidden = torch.empty(
+                (hidden.shape[0] * tp_world_size, hidden.shape[-1]),
+                dtype=hidden.dtype,
+                device=hidden.device,
+            )
+            dist.all_gather_into_tensor(whole_hidden, hidden, group=tp_group)
+
+            logits = whole_hidden.to(torch.float32) @ weight.T.to(torch.float32)
+
+            whole_logits = torch.empty(
+                (logits.shape[0], logits.shape[-1] * tp_world_size),
+                dtype=logits.dtype,
+                device=logits.device,
+            )
+            whole_logits_ref = [
+                whole_logits[..., i * logits.shape[-1] : (i + 1) * logits.shape[-1]]
+                for i in range(tp_world_size)
+            ]
+            dist.all_gather(whole_logits_ref, logits, group=tp_group)
+
+            logprobs = torch.nn.functional.cross_entropy(
+                whole_logits.view(-1, whole_logits.shape[-1]), labels.view(-1), reduction=reduction
+            )
+
+            # If we don't preserve whole_logits,
+            # we need to re-compute it in the backward pass
+            ctx.save_for_backward(whole_hidden, weight, labels)
+            ctx.tp_group = tp_group
+            ctx.reduction = reduction
+            ctx.tp_rank = tp_rank
+            ctx.tp_world_size = tp_world_size
+
+            return logprobs.to(torch.float32)
+
+        @staticmethod
+        def backward(ctx, g_logprobs: torch.Tensor):
+            whole_hidden, weight, labels = ctx.saved_tensors
+            tp_group = ctx.tp_group
+            reduction = ctx.reduction
+            tp_rank = ctx.tp_rank
+            tp_world_size = ctx.tp_world_size
+
+            num_tokens, dim = whole_hidden.shape
+
+            if reduction == "mean":
+                _g_logprobs = torch.broadcast_to(g_logprobs / num_tokens, (num_tokens,))
+            elif reduction == "sum":
+                _g_logprobs = torch.broadcast_to(g_logprobs, (num_tokens,))
+            else:
+                _g_logprobs = g_logprobs
+
+            # re-compute whole_logits
+            logits = whole_hidden.to(torch.float32) @ weight.T.to(torch.float32)
+            whole_logits = torch.empty(
+                (logits.shape[0], logits.shape[-1] * tp_world_size),
+                dtype=logits.dtype,
+                device=logits.device,
+            )
+            whole_logits_ref = [
+                whole_logits[..., i * logits.shape[-1] : (i + 1) * logits.shape[-1]]
+                for i in range(tp_world_size)
+            ]
+            dist.all_gather(whole_logits_ref, logits, group=tp_group)
+
+            one_hot = torch.zeros_like(whole_logits)
+            one_hot.scatter_(1, labels.view(-1).unsqueeze(-1), 1)
+
+            pd = torch.nn.functional.softmax(whole_logits, dim=-1)
+            d_logits = (pd - one_hot) * _g_logprobs.unsqueeze(-1)
+            d_logits = d_logits.to(whole_hidden.dtype)
+
+            local_size = weight.size(0)
+            local_d_logits = d_logits[:, tp_rank * local_size : (tp_rank + 1) * local_size]
+
+            d_hidden = local_d_logits @ weight
+            local_d_weight = local_d_logits.T @ whole_hidden
+
+            # dist.all_reduce(
+            #     local_d_hidden,
+            #     op=dist.ReduceOp.SUM,
+            #     group=tp_group
+            # )
+
+            # split the local_d_hidden along the sequence length dimension
+            local_num_tokens = num_tokens // tp_world_size
+            # local_d_hidden = local_d_hidden[tp_rank * local_num_tokens : (tp_rank + 1) * local_num_tokens, :]
+
+            local_d_hidden = torch.empty(
+                (local_num_tokens, dim), dtype=weight.dtype, device=weight.device
+            )
+            dist.reduce_scatter_tensor(
+                local_d_hidden, d_hidden, op=dist.ReduceOp.SUM, group=tp_group
+            )
+            return local_d_hidden, local_d_weight, None, None, None
+
+    @pytest.mark.parametrize("dtype", [torch.bfloat16, torch.float16])
+    @pytest.mark.parametrize("reduction", ["mean", "sum", "none"])
+    @pytest.mark.parametrize("problem", [(256, 129280, 8192)])
+    def test_torch_sp_vs_single_gpu(self, dtype, reduction, problem):
+        num_tokens, vocabsize, dim = problem
+        vocabsize = vocabsize // self.tp_world_size
+
+        hidden = (
+            torch.empty((num_tokens, dim), dtype=dtype, device="cuda")
+            .uniform_(-0.1, 0.1)
+            .requires_grad_()
+        )
+        weight = (
+            torch.empty((vocabsize, dim), dtype=dtype, device="cuda")
+            .uniform_(-0.1, 0.1)
+            .requires_grad_()
+        )
+        labels = torch.randint(
+            0, vocabsize, (num_tokens * self.tp_world_size,), dtype=torch.long, device="cuda"
+        )
+
+        # ------------ forward pass ------------ #
+        dist.broadcast(labels, src=0, group=self.tp_group)
+
+        # single GPU
+        whole_hidden = torch.empty(
+            (num_tokens * self.tp_world_size, dim), dtype=dtype, device="cuda"
+        )
+        dist.all_gather_into_tensor(whole_hidden, hidden, group=self.tp_group)
+        whole_hidden = whole_hidden.clone().requires_grad_()
+
+        whole_weight = torch.empty(
+            (vocabsize * self.tp_world_size, dim), dtype=dtype, device="cuda"
+        )
+        whole_weight_view = [
+            whole_weight[i * vocabsize : (i + 1) * vocabsize, :] for i in range(self.tp_world_size)
+        ]
+        dist.all_gather(whole_weight_view, weight, group=self.tp_group)
+        whole_weight = whole_weight.clone().requires_grad_()
+        logprobs_single_gpu = self.torch_linear_cross_entropy_single_gpu(
+            whole_hidden, whole_weight, labels, reduction=reduction
+        )
+
+        # TP
+        logprobs_tp = self.TorchLinearCrossEntropy.apply(
+            hidden, weight, labels, self.tp_group, reduction
+        )
+        torch.testing.assert_close(logprobs_single_gpu, logprobs_tp)
+
+        # ------------ backward pass ------------ #
+        g_logprobs = torch.empty_like(logprobs_single_gpu).uniform_(-0.1, 0.1)
+        dist.broadcast(g_logprobs, src=0, group=self.tp_group)
+
+        # single GPU
+        (d_hidden_single_gpu, d_weight_single_gpu) = torch.autograd.grad(
+            (logprobs_single_gpu,), (whole_hidden, whole_weight), (g_logprobs,), retain_graph=False
+        )
+
+        # TP
+        (d_hidden_tp, d_weight_tp) = torch.autograd.grad(
+            (logprobs_tp,), (hidden, weight), (g_logprobs,), retain_graph=False
+        )
+
+        local_d_hidden_single_gpu = d_hidden_single_gpu[
+            self.tp_rank * hidden.shape[0] : (self.tp_rank + 1) * hidden.shape[0], :
+        ]
+        torch.testing.assert_close(local_d_hidden_single_gpu, d_hidden_tp, atol=1e-3, rtol=1e-3)
+        local_d_weight_single_gpu = d_weight_single_gpu[
+            self.tp_rank * weight.shape[0] : (self.tp_rank + 1) * weight.shape[0], :
+        ]
+        torch.testing.assert_close(local_d_weight_single_gpu, d_weight_tp, atol=1e-3, rtol=1e-3)
+
+        self.cleanup()
+
+    @staticmethod
+    def get_problems():
+        return [
+            (80, 125, 64),
+            (80, 152064, 64),
+            (1024, 152064, 4096),
+            (4096, 15206, 1024),
+            ((1, 4096), 15206, 1024),
+            ((4, 1024), 15206, 1024),
+        ]
+
+    @pytest.mark.parametrize("dtype", [torch.bfloat16, torch.float16])
+    @pytest.mark.parametrize("reduction", ["mean", "sum", "none"])
+    @pytest.mark.parametrize("problem", get_problems())
+    def test_correctness(self, dtype, reduction, problem):
+        num_tokens, vocabsize, dim = problem
+        hidden_shape = (num_tokens, dim) if isinstance(num_tokens, int) else (*num_tokens, dim)
+        labels_shape = (
+            (num_tokens * self.tp_world_size,)
+            if isinstance(num_tokens, int)
+            else (num_tokens[0] * self.tp_world_size, *num_tokens[1:])
+        )
+
+        hidden = (
+            torch.empty(hidden_shape, dtype=dtype, device="cuda")
+            .uniform_(-0.1, 0.1)
+            .requires_grad_()
+        )
+        weight = (
+            torch.empty((vocabsize, dim), dtype=dtype, device="cuda")
+            .uniform_(-0.1, 0.1)
+            .requires_grad_()
+        )
+        labels = torch.randint(0, vocabsize, labels_shape, dtype=torch.long, device="cuda")
+
+        # ------ forward pass ------ #
+        dist.broadcast(labels, src=0, group=self.tp_group)
+
+        torch_logprobs = self.TorchLinearCrossEntropy.apply(
+            hidden.view(-1, dim), weight, labels, self.tp_group, reduction
+        )
+
+        custom_logprobs = linear_cross_entropy(
+            hidden,
+            weight,
+            labels,
+            tp_group=self.tp_group,
+            reduction=reduction,
+            sequence_parallel=True,
+        )
+
+        torch.testing.assert_close(torch_logprobs, custom_logprobs)
+
+        # ------- backward pass ------- #
+        g_logprobs = torch.empty_like(torch_logprobs).uniform_(-0.1, 0.1)
+        dist.broadcast(g_logprobs, src=0, group=self.tp_group)
+
+        (d_hidden_torch, d_weight_torch) = torch.autograd.grad(
+            (torch_logprobs,), (hidden, weight), (g_logprobs,), retain_graph=False
+        )
+        (d_hidden_custom, d_weight_custom) = torch.autograd.grad(
+            (custom_logprobs,), (hidden, weight), (g_logprobs,), retain_graph=False
+        )
+
+        # in case one GPU failed, and leading to hang
+        torch.testing.assert_close(d_hidden_torch, d_hidden_custom, atol=1e-3, rtol=1e-3)
+        torch.testing.assert_close(d_weight_torch, d_weight_custom, atol=1e-3, rtol=1e-3)
+        self.timed_barrier()
+
+        self.cleanup()
+
+    @pytest.mark.parametrize("problem", [((1, 1024), 129280, 7168)])
+    @pytest.mark.parametrize("dtype", [torch.bfloat16])
+    @pytest.mark.parametrize("reduction", ["mean"])
+    def test_performance(self, problem, dtype, reduction):
+        num_tokens, vocabsize, dim = problem
+        hidden_shape = (num_tokens, dim) if isinstance(num_tokens, int) else (*num_tokens, dim)
+        labels_shape = (
+            (num_tokens * self.tp_world_size,)
+            if isinstance(num_tokens, int)
+            else (num_tokens[0] * self.tp_world_size, *num_tokens[1:])
+        )
+
+        start_event = torch.cuda.Event(enable_timing=True)
+        end_event = torch.cuda.Event(enable_timing=True)
+
+        torch_fwd_latency = list()
+        torch_bwd_latency = list()
+        custom_fwd_latency = list()
+        custom_bwd_latency = list()
+
+        iterations = 5
+        for i in range(iterations):
+            hidden = (
+                torch.empty(hidden_shape, dtype=dtype, device="cuda")
+                .uniform_(-0.1, 0.1)
+                .requires_grad_()
+            )
+            weight = (
+                torch.empty((vocabsize, dim), dtype=dtype, device="cuda")
+                .uniform_(-0.1, 0.1)
+                .requires_grad_()
+            )
+            labels = torch.randint(0, vocabsize, labels_shape, dtype=torch.long, device="cuda")
+
+            # ------ forward pass ------ #
+            dist.broadcast(labels, src=0, group=self.tp_group)
+
+            start_event.record()
+            torch_logprobs = self.TorchLinearCrossEntropy.apply(
+                hidden.view(-1, dim), weight, labels, self.tp_group, reduction
+            )
+            end_event.record()
+            torch.cuda.synchronize()
+            torch_fwd_latency.append(start_event.elapsed_time(end_event))
+
+            start_event.record()
+            custom_logprobs = linear_cross_entropy(
+                hidden,
+                weight,
+                labels,
+                tp_group=self.tp_group,
+                reduction=reduction,
+                sequence_parallel=True,
+            )
+            end_event.record()
+            torch.cuda.synchronize()
+            custom_fwd_latency.append(start_event.elapsed_time(end_event))
+
+            # ------- backward pass ------- #
+            g_logprobs = torch.empty_like(torch_logprobs).uniform_(-0.1, 0.1)
+            dist.broadcast(g_logprobs, src=0, group=self.tp_group)
+
+            start_event.record()
+            (d_hidden_torch, d_weight_torch) = torch.autograd.grad(
+                (torch_logprobs,), (hidden, weight), (g_logprobs,), retain_graph=False
+            )
+            end_event.record()
+            torch.cuda.synchronize()
+            torch_bwd_latency.append(start_event.elapsed_time(end_event))
+
+            start_event.record()
+            (d_hidden_custom, d_weight_custom) = torch.autograd.grad(
+                (custom_logprobs,), (hidden, weight), (g_logprobs,), retain_graph=False
+            )
+            end_event.record()
+            torch.cuda.synchronize()
+            custom_bwd_latency.append(start_event.elapsed_time(end_event))
+
+        # --- remove first latency due to warmup --- #
+        torch_fwd_latency = torch_fwd_latency[1:]
+        torch_bwd_latency = torch_bwd_latency[1:]
+        custom_fwd_latency = custom_fwd_latency[1:]
+        custom_bwd_latency = custom_bwd_latency[1:]
+
+        if self.is_chief:
+            print()
+            print(
+                f"[INFO]: On problem {problem}, dtype {dtype}, reduction {reduction}, TP size {self.tp_world_size}, Sequence Parallel: True:"
+            )
+            print(
+                f"[INFO]: Torch forward latency: {sum(torch_fwd_latency) / len(torch_fwd_latency):.2f} ms"
+            )
+            print(
+                f"[INFO]: Custom forward latency: {sum(custom_fwd_latency) / len(custom_fwd_latency):.2f} ms"
+            )
+            print(
+                f"[INFO]: Torch backward latency: {sum(torch_bwd_latency) / len(torch_bwd_latency):.2f} ms"
+            )
+            print(
+                f"[INFO]: Custom backward latency: {sum(custom_bwd_latency) / len(custom_bwd_latency):.2f} ms"
+            )
+
+    @pytest.mark.parametrize("problem", [((1, 1024), 129280, 7168)])
+    @pytest.mark.parametrize("dtype", [torch.bfloat16])
+    @pytest.mark.parametrize("reduction", ["mean"])
+    def test_storage(self, problem, dtype, reduction):
+        num_tokens, vocabsize, dim = problem
+        hidden_shape = (num_tokens, dim) if isinstance(num_tokens, int) else (*num_tokens, dim)
+        labels_shape = (
+            (num_tokens * self.tp_world_size,)
+            if isinstance(num_tokens, int)
+            else (num_tokens[0] * self.tp_world_size, *num_tokens[1:])
+        )
+
+        if self.is_chief:
+            print()
+            print(
+                f"[INFO]: On problem {problem}, dtype {dtype}, reduction {reduction}, TP size {self.tp_world_size}, Sequence Parallel: True:"
+            )
+
+        def torch_storage():
+            hidden = (
+                torch.empty(hidden_shape, dtype=dtype, device="cuda")
+                .uniform_(-0.1, 0.1)
+                .requires_grad_()
+            )
+            weight = (
+                torch.empty((vocabsize, dim), dtype=dtype, device="cuda")
+                .uniform_(-0.1, 0.1)
+                .requires_grad_()
+            )
+            labels = torch.randint(0, vocabsize, labels_shape, dtype=torch.long, device="cuda")
+
+            dist.broadcast(hidden, src=0, group=self.tp_group)
+            dist.broadcast(labels, src=0, group=self.tp_group)
+
+            torch.cuda.reset_peak_memory_stats()
+            torch_logprobs = self.TorchLinearCrossEntropy.apply(
+                hidden.view(-1, dim), weight, labels, self.tp_group, reduction
+            )
+            torch.cuda.synchronize()
+            torch_max_memory = torch.cuda.max_memory_allocated() / 1024 / 1024
+            if self.is_chief:
+                print(
+                    f"[INFO]: On GPU {self.tp_rank}, Torch Forward pass peak memory: {torch_max_memory:.2f} MB"
+                )
+
+            g_logprobs = torch.empty_like(torch_logprobs).uniform_(-0.1, 0.1)
+            dist.broadcast(g_logprobs, src=0, group=self.tp_group)
+
+            torch.cuda.reset_peak_memory_stats()
+            (d_hidden_torch, d_weight_torch) = torch.autograd.grad(
+                (torch_logprobs,), (hidden, weight), (g_logprobs,), retain_graph=False
+            )
+            torch.cuda.synchronize()
+            torch_max_memory = torch.cuda.max_memory_allocated() / 1024 / 1024
+            if self.is_chief:
+                print(
+                    f"[INFO]: On GPU {self.tp_rank}, Torch Backward pass peak memory: {torch_max_memory:.2f} MB"
+                )
+
+        def custom_storage():
+            hidden = (
+                torch.empty(hidden_shape, dtype=dtype, device="cuda")
+                .uniform_(-0.1, 0.1)
+                .requires_grad_()
+            )
+            weight = (
+                torch.empty((vocabsize, dim), dtype=dtype, device="cuda")
+                .uniform_(-0.1, 0.1)
+                .requires_grad_()
+            )
+            labels = torch.randint(0, vocabsize, labels_shape, dtype=torch.long, device="cuda")
+
+            dist.broadcast(hidden, src=0, group=self.tp_group)
+            dist.broadcast(labels, src=0, group=self.tp_group)
+
+            torch.cuda.reset_peak_memory_stats()
+            custom_logprobs = linear_cross_entropy(
+                hidden,
+                weight,
+                labels,
+                tp_group=self.tp_group,
+                reduction=reduction,
+                sequence_parallel=True,
+            )
+            torch.cuda.synchronize()
+            custom_max_memory = torch.cuda.max_memory_allocated() / 1024 / 1024
+            if self.is_chief:
+                print(
+                    f"[INFO]: On GPU {self.tp_rank}, Custom Forward pass peak memory: {custom_max_memory:.2f} MB"
+                )
+
+            g_logprobs = torch.empty_like(custom_logprobs).uniform_(-0.1, 0.1)
+            dist.broadcast(g_logprobs, src=0, group=self.tp_group)
+
+            torch.cuda.reset_peak_memory_stats()
+            (d_hidden_custom, d_weight_custom) = torch.autograd.grad(
+                (custom_logprobs,), (hidden, weight), (g_logprobs,), retain_graph=False
+            )
+            torch.cuda.synchronize()
+            custom_max_memory = torch.cuda.max_memory_allocated() / 1024 / 1024
+            if self.is_chief:
+                print(
+                    f"[INFO]: On GPU {self.tp_rank}, Custom Backward pass peak memory: {custom_max_memory:.2f} MB"
+                )
+
+        self.cleanup()
+        torch_storage()
+        self.cleanup()
+        custom_storage()

From 9cf6838aec19fd17be4f0c975c38e9b95621fc9c Mon Sep 17 00:00:00 2001
From: Yuzhong Wang <yuzhongw@nvidia.com>
Date: Fri, 5 Dec 2025 11:40:37 +0800
Subject: [PATCH 182/334] Fix gpt_layer_spec for frequently linear attention
 (#2481)

Co-authored-by: Kunlun Li <94586211+kunlunl@users.noreply.github.com>
---
 gpt_builders.py                               |  6 +++--
 megatron/core/datasets/retro/config/config.py |  3 +++
 megatron/core/model_parallel_config.py        |  4 ++--
 ...rimental_attention_variant_module_specs.py |  6 +++++
 megatron/core/models/gpt/gpt_layer_specs.py   | 24 +++++++++++++------
 megatron/core/models/retro/config.py          |  3 ++-
 .../core/transformer/transformer_config.py    | 15 ++++++++++++
 megatron/training/arguments.py                |  3 ++-
 megatron/training/training.py                 | 11 ++++++---
 9 files changed, 59 insertions(+), 16 deletions(-)

diff --git a/gpt_builders.py b/gpt_builders.py
index 61d159b9967..2850354553b 100644
--- a/gpt_builders.py
+++ b/gpt_builders.py
@@ -8,6 +8,9 @@
     get_gpt_mtp_block_spec,
     get_gpt_decoder_layer_specs,
 )
+from megatron.core.models.gpt.experimental_attention_variant_module_specs import (
+    is_linear_attention_variant,
+)
 from megatron.core.models.gpt.heterogeneous.heterogeneous_layer_specs import (
     get_gpt_heterogeneous_layer_spec,
 )
@@ -42,8 +45,7 @@ def gpt_builder(args, pre_process, post_process, vp_stage=None, config=None):
         else:
             use_te = args.transformer_impl == "transformer_engine"
 
-            linear_attention_variants = ["gated_delta_net"]
-            if args.num_experts or args.experimental_attention_variant in linear_attention_variants:
+            if args.num_experts or is_linear_attention_variant(args.experimental_attention_variant):
                 # Define the decoder block spec
                 transformer_layer_spec = get_gpt_decoder_block_spec(
                     config,
diff --git a/megatron/core/datasets/retro/config/config.py b/megatron/core/datasets/retro/config/config.py
index ac9ca841242..73f34a47545 100644
--- a/megatron/core/datasets/retro/config/config.py
+++ b/megatron/core/datasets/retro/config/config.py
@@ -5,6 +5,7 @@
 from dataclasses import dataclass
 
 from megatron.core.transformer import TransformerConfig
+from megatron.core.utils import experimental_api
 
 from .bert_embedders import RetroBertEmbedders
 from .gpt_chunk_datasets import RetroGPTChunkDatasets
@@ -12,7 +13,9 @@
 
 
 @dataclass
+@experimental_api
 class RetroPreprocessingConfig(TransformerConfig):
+    # pylint: disable=line-too-long
     """Configuration object for Retro preprocessing.
 
     *Note* : Arguments prefixed with '--retro-gpt-*' or '--retro-bert-*' are
diff --git a/megatron/core/model_parallel_config.py b/megatron/core/model_parallel_config.py
index e75ff4a0273..129135c4cc0 100644
--- a/megatron/core/model_parallel_config.py
+++ b/megatron/core/model_parallel_config.py
@@ -6,11 +6,11 @@
 
 import torch
 
-from megatron.core.utils import internal_api
+from megatron.core.utils import experimental_api
 
 
 @dataclass
-@internal_api
+@experimental_api
 class ModelParallelConfig:
     """Base configuration for Megatron Core
 
diff --git a/megatron/core/models/gpt/experimental_attention_variant_module_specs.py b/megatron/core/models/gpt/experimental_attention_variant_module_specs.py
index cbe59618baf..e6d6fa03ce7 100644
--- a/megatron/core/models/gpt/experimental_attention_variant_module_specs.py
+++ b/megatron/core/models/gpt/experimental_attention_variant_module_specs.py
@@ -19,6 +19,12 @@
 from megatron.core.transformer.spec_utils import ModuleSpec
 
 
+def is_linear_attention_variant(experimental_attention_variant: str) -> bool:
+    """Check if the experimental attention variant is a linear attention variant."""
+    linear_attention_variants = ["gated_delta_net"]
+    return experimental_attention_variant in linear_attention_variants
+
+
 def get_gated_delta_net_module_spec_for_backend(
     backend: BackendSpecProvider, normalization: Optional[str] = None
 ) -> ModuleSpec:
diff --git a/megatron/core/models/gpt/gpt_layer_specs.py b/megatron/core/models/gpt/gpt_layer_specs.py
index 5395b158749..f25408e9553 100755
--- a/megatron/core/models/gpt/gpt_layer_specs.py
+++ b/megatron/core/models/gpt/gpt_layer_specs.py
@@ -7,6 +7,7 @@
 from megatron.core.models.backends import BackendSpecProvider, LocalSpecProvider
 from megatron.core.models.gpt.experimental_attention_variant_module_specs import (
     get_experimental_attention_variant_module_spec_for_backend,
+    is_linear_attention_variant,
 )
 from megatron.core.models.gpt.moe_module_specs import get_moe_module_spec_for_backend
 from megatron.core.transformer.attention import SelfAttention, SelfAttentionSubmodules
@@ -536,20 +537,29 @@ def get_gpt_decoder_layer_specs(
                 num_experts = None
                 moe_grouped_gemm = None
             if attention_type == "linear_attention":
-                linear_attention_variants = ["gated_delta_net"]
-                if config.experimental_attention_variant not in linear_attention_variants:
+                multi_latent_attention = None
+                if is_linear_attention_variant(config.experimental_attention_variant):
+                    # There exists linear attention layer in the model.
+                    experimental_attention_variant = config.experimental_attention_variant
+                else:
                     # Skip if there is no linear attention layer in the model.
                     continue
-                multi_latent_attention = None
             else:
                 multi_latent_attention = config.multi_latent_attention
+                if is_linear_attention_variant(config.experimental_attention_variant):
+                    # experimental_attention_variant is a linear attention variant,
+                    # so softmax attention is regular attention layer.
+                    experimental_attention_variant = None
+                else:
+                    # Softmax attention is an experimental attention variant.
+                    experimental_attention_variant = config.experimental_attention_variant
 
             layer_spec_key = f"{mlp_type}_{attention_type}"
             layer_spec_dict[layer_spec_key] = get_layer_spec_fn(
                 num_experts=num_experts,
                 moe_grouped_gemm=moe_grouped_gemm,
                 multi_latent_attention=multi_latent_attention,
-                experimental_attention_variant=config.experimental_attention_variant,
+                experimental_attention_variant=experimental_attention_variant,
                 **get_layer_spec_kwargs,
             )
 
@@ -592,13 +602,13 @@ def get_gpt_decoder_layer_specs(
             f"current linear attention pattern: {config.linear_attention_freq}"
         )
     elif config.linear_attention_freq is None:
-        linear_attention_variants = ["gated_delta_net"]
-        if config.experimental_attention_variant not in linear_attention_variants:
+        if not is_linear_attention_variant(config.experimental_attention_variant):
             linear_attention_pattern = [0] * config.num_layers
         else:
             linear_attention_pattern = [1] * config.num_layers
             warnings.warn(
-                "Linear attention type is specified but linear_attention_freq is None. "
+                f"Linear attention type {config.experimental_attention_variant} is specified "
+                "but linear_attention_freq is None. "
                 "Setting linear_attention_pattern to [1] * config.num_layers as default."
             )
     else:
diff --git a/megatron/core/models/retro/config.py b/megatron/core/models/retro/config.py
index 1b486767264..4e45be30b2e 100644
--- a/megatron/core/models/retro/config.py
+++ b/megatron/core/models/retro/config.py
@@ -7,10 +7,11 @@
 
 from megatron.core.transformer import TransformerConfig
 from megatron.core.transformer.enums import AttnBackend
-from megatron.core.utils import is_te_min_version
+from megatron.core.utils import experimental_api, is_te_min_version
 
 
 @dataclass
+@experimental_api
 class RetroConfig(TransformerConfig):
     """Configuration object for Retro models."""
 
diff --git a/megatron/core/transformer/transformer_config.py b/megatron/core/transformer/transformer_config.py
index a3a16754977..31dd5a98a58 100644
--- a/megatron/core/transformer/transformer_config.py
+++ b/megatron/core/transformer/transformer_config.py
@@ -11,6 +11,7 @@
 from megatron.core.quantization.quant_config import RecipeConfig
 from megatron.core.transformer.enums import AttnBackend, CudaGraphScope
 from megatron.core.transformer.pipeline_parallel_layer_layout import PipelineParallelLayerLayout
+from megatron.core.utils import experimental_api
 
 from ..fusions.fused_bias_geglu import quick_gelu
 from ..model_parallel_config import ModelParallelConfig
@@ -31,6 +32,7 @@
 
 
 @dataclass
+@experimental_api
 class TransformerConfig(ModelParallelConfig):
     """Configuration object for megatron-core transformers.
 
@@ -241,6 +243,10 @@ class TransformerConfig(ModelParallelConfig):
     ####################
     # attention variant: gated_delta_net
     ####################
+    linear_attention_type: Optional[str] = None
+    """Type of linear attention to use.
+    Deprecated. Use experimental_attention_variant instead."""
+
     linear_attention_freq: Optional[Union[int, List[int]]] = None
     """Frequency between LA (linear attention) layers 
     and SDPA (scaled dot-product attention) layers.
@@ -877,6 +883,14 @@ def __post_init__(self):
                 f"tensor_model_parallel_size ({self.tensor_model_parallel_size})."
             )
 
+        if self.linear_attention_type is not None:
+            warnings.warn(
+                "linear_attention_type is deprecated, "
+                "use experimental_attention_variant instead."
+            )
+            self.experimental_attention_variant = self.linear_attention_type
+            self.linear_attention_type = None
+
         if self.experimental_attention_variant in ["gated_delta_net"]:
             assert (
                 self.linear_attention_freq is not None
@@ -1912,6 +1926,7 @@ def __post_init__(self):
 
 
 @dataclass
+@experimental_api
 class MLATransformerConfig(TransformerConfig):
     """Configuration object for megatron-core Multi-Latent Attention (MLA) transformers.
 
diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py
index 2c87532c919..757f2b63de4 100644
--- a/megatron/training/arguments.py
+++ b/megatron/training/arguments.py
@@ -1201,6 +1201,7 @@ def validate_args(args, defaults={}):
             args.no_load_rng = True
             print('Warning: disabling --no-load-rng for upcycling.')
 
+    # Experimental attention variant check
     if args.linear_attention_type is not None:
         print_rank_0(
             '--linear-attention-type is deprecated, use --experimental-attention-variant instead.',
@@ -1209,7 +1210,7 @@ def validate_args(args, defaults={}):
         args.experimental_attention_variant = args.linear_attention_type
         del args.linear_attention_type
 
-    # Muon optimizercheck
+    # Muon optimizer check
     if 'muon' in args.optimizer:
         assert not args.use_distributed_optimizer, "Muon optimizer does not support distributed optimizer for now."
         assert not args.use_torch_fsdp2, "Muon optimizer does not support Torch-FSDP2 for now."
diff --git a/megatron/training/training.py b/megatron/training/training.py
index a732e3917e5..f7731ab3c1a 100644
--- a/megatron/training/training.py
+++ b/megatron/training/training.py
@@ -48,6 +48,9 @@
 
 
 from megatron.core import mpu, tensor_parallel
+from megatron.core.models.gpt.experimental_attention_variant_module_specs import (
+    is_linear_attention_variant,
+)
 from megatron.core.utils import (
     check_param_hashes_across_dp_replicas,
     get_attr_wrapped_model,
@@ -379,8 +382,7 @@ def transformer_flops():
                 )
             )
 
-        linear_attention_variants = ["gated_delta_net"]
-        if args.experimental_attention_variant in linear_attention_variants:
+        if is_linear_attention_variant(args.experimental_attention_variant):
             # Calculate number of dense and MoE Transformer MLPs.
             if isinstance(args.linear_attention_freq, int):
                 linear_attention_pattern = [
@@ -433,7 +435,10 @@ def transformer_flops():
                     )
                 )
             else:
-                raise ValueError(f"Invalid linear_attention_type: {args.linear_attention_type}")
+                raise ValueError(
+                    "Invalid experimental_attention_variant: "
+                    f"{args.experimental_attention_variant}"
+                )
         else:
             num_linear_attention_layers = 0
             linear_self_attn_term = 0

From 89fe8953cd0f46cb1f59cdfbb8647e73a7dcbdd3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Niccol=C3=B2=20Ajroldi?=
 <61059403+Niccolo-Ajroldi@users.noreply.github.com>
Date: Fri, 5 Dec 2025 07:16:38 +0100
Subject: [PATCH 183/334] Skip trainloader when `args.skip_train` is True
 (#2501)

---
 megatron/training/training.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/megatron/training/training.py b/megatron/training/training.py
index f7731ab3c1a..c29c48d4c9f 100644
--- a/megatron/training/training.py
+++ b/megatron/training/training.py
@@ -2990,7 +2990,8 @@ def build_train_valid_test_data_loaders(build_train_valid_test_datasets_provider
         valid_ds = [valid_ds] if not isinstance(valid_ds, list) else valid_ds
 
         # Build dataloders.
-        train_dataloader = build_pretraining_data_loader(train_ds, args.consumed_train_samples)
+        if not args.skip_train:
+            train_dataloader = build_pretraining_data_loader(train_ds, args.consumed_train_samples)
 
         valid_dataloaders = []
         for valid_d in valid_ds:

From a6d86a6da6591fd27b77e5e732690ab65632a8a0 Mon Sep 17 00:00:00 2001
From: Deyu Fu <Deyu.Foo@gmail.com>
Date: Fri, 5 Dec 2025 15:40:40 +0800
Subject: [PATCH 184/334] [DEV] fixes for muon(qwen3-next, ep multi-adam)
 (#2564)

Signed-off-by: Deyu Fu <deyuf@nvidia.com>
---
 megatron/core/optimizer/muon.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/megatron/core/optimizer/muon.py b/megatron/core/optimizer/muon.py
index ddf20b0abb8..b6af7a3e188 100644
--- a/megatron/core/optimizer/muon.py
+++ b/megatron/core/optimizer/muon.py
@@ -234,9 +234,10 @@ def get_megatron_muon_optimizer(
             # TODO(deyuf): support MLA
             if 'linear_qkv.weight' in name and len(param.shape) == 2:
                 param.is_qkv = True
-            # TODO(deyuf): might not be sufficient for future algorithm. revisit this conditioning
-            if not getattr(param, 'is_embedding_or_output_parameter', False) and not (
-                len(param.shape) == 1
+            # TODO(deyuf): currently only allow 2D non-embedding weight to avoid breaking
+            if (
+                not getattr(param, 'is_embedding_or_output_parameter', False)
+                and len(param.shape) == 2
             ):
                 linear_params.append(param)
             else:
@@ -339,6 +340,7 @@ def adam_init_state_fn(opt, config=None):
         param.requires_grad = True
 
     # chain everything together
+    init_fns = [muon_init_state_fn] + len(chained_adam.chained_optimizers) * [adam_init_state_fn]
     optimizers += chained_adam.chained_optimizers
 
     if layer_wise_distributed_optimizer:
@@ -346,9 +348,6 @@ def adam_init_state_fn(opt, config=None):
         if reset_config_bf16:
             config.bf16 = True
         return LayerWiseDistributedOptimizer(
-            optimizers,
-            config,
-            pg_collection,
-            init_state_fn_list=[muon_init_state_fn, adam_init_state_fn],
+            optimizers, config, pg_collection, init_state_fn_list=init_fns
         )
     return ChainedOptimizer(optimizers)

From aee4a74bb69838c08c2b251b143bb9b3d5795874 Mon Sep 17 00:00:00 2001
From: HaochenYuan <106647990+HaochenYuan@users.noreply.github.com>
Date: Mon, 8 Dec 2025 18:20:58 +0800
Subject: [PATCH 185/334] [Dev] remove fp16 assert in moe_grouped_gemm & EP
 (#2494)

---
 megatron/core/transformer/moe/experts.py      |  1 +
 megatron/training/arguments.py                |  3 -
 .../transformer/moe/test_moe_layer.py         | 84 +++++++++++++++++++
 3 files changed, 85 insertions(+), 3 deletions(-)

diff --git a/megatron/core/transformer/moe/experts.py b/megatron/core/transformer/moe/experts.py
index 83cf5b51ffc..5eeafdd8d1d 100644
--- a/megatron/core/transformer/moe/experts.py
+++ b/megatron/core/transformer/moe/experts.py
@@ -238,6 +238,7 @@ def forward(
         permuted_probs: torch.Tensor,
     ):
         """Forward step of the GroupedMLP."""
+        assert self.config.bf16, "Currently GroupedGEMM for MoE only supports bf16."
         if self.activation_recompute:
             self.activation_checkpoint = tensor_parallel.CheckpointWithoutOutput()
 
diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py
index 757f2b63de4..682bd94bdf9 100644
--- a/megatron/training/arguments.py
+++ b/megatron/training/arguments.py
@@ -900,7 +900,6 @@ def validate_args(args, defaults={}):
             'residual connection in fp32 only supported when using fp16 or bf16.'
 
     if args.moe_grouped_gemm:
-        assert args.bf16, 'Currently GroupedGEMM for MoE only supports bf16 dtype.'
         dc = torch.cuda.get_device_capability()
         assert dc[0] >= 8, "Unsupported compute capability for GroupedGEMM kernels."
 
@@ -1084,8 +1083,6 @@ def validate_args(args, defaults={}):
         assert args.num_experts is not None, "num_experts must be non None to use expert model parallelism"
         assert args.num_experts % args.expert_model_parallel_size == 0, \
             "Number of experts should be a multiple of expert model parallel_size."
-        assert not args.fp16, \
-            "Expert parallelism is not supported with fp16 training."
 
     # MoE router check
     if isinstance(args.moe_router_load_balancing_type, list) and len(args.moe_router_load_balancing_type) == 1:
diff --git a/tests/unit_tests/transformer/moe/test_moe_layer.py b/tests/unit_tests/transformer/moe/test_moe_layer.py
index 59385f757b3..2a2c995257e 100644
--- a/tests/unit_tests/transformer/moe/test_moe_layer.py
+++ b/tests/unit_tests/transformer/moe/test_moe_layer.py
@@ -192,3 +192,87 @@ def test_interleave_transformer_block(self, moe_layer_freq):
 
     def teardown_method(self, method):
         Utils.destroy_model_parallel()
+
+
+class TestMoELayerFP16:
+    """Test MoE layer with FP16 precision."""
+
+    def setup_method(self, method):
+        pass
+
+    @pytest.mark.parametrize("moe_token_dispatcher_type", ["allgather", "alltoall"])
+    @pytest.mark.parametrize("num_moe_experts", [2, 4])
+    @pytest.mark.parametrize("tp_size,ep_size", [(1, 1), (2, 2), (4, 2)])
+    def test_moe_layer_fp16_forward_backward(
+        self, num_moe_experts, moe_token_dispatcher_type, tp_size, ep_size
+    ):
+        """Test MoE layer forward and backward pass with fp16 params and inputs."""
+        Utils.initialize_model_parallel(
+            tensor_model_parallel_size=tp_size, expert_model_parallel_size=ep_size
+        )
+        _set_random_seed(seed_=123, data_parallel_random_init=False)
+
+        hidden_size = 64
+        sequence_length = 32
+        micro_batch_size = 2
+
+        transformer_config = TransformerConfig(
+            num_layers=1,
+            hidden_size=hidden_size,
+            num_attention_heads=4,
+            num_moe_experts=num_moe_experts,
+            use_cpu_initialization=False,
+            moe_token_dispatcher_type=moe_token_dispatcher_type,
+            moe_router_load_balancing_type="aux_loss",
+            moe_router_topk=2,
+            moe_aux_loss_coeff=0.01,
+            moe_grouped_gemm=False,  # Use SequentialMLP for fp16 test
+            moe_ffn_hidden_size=256,
+            add_bias_linear=False,
+            tensor_model_parallel_size=tp_size,
+            expert_model_parallel_size=ep_size,
+            sequence_parallel=tp_size > 1,
+            fp16=True,
+            params_dtype=torch.float16,
+        )
+
+        transformer_layer_spec = get_gpt_layer_local_spec(
+            num_experts=num_moe_experts, moe_grouped_gemm=False
+        )
+
+        moe_layer = MoELayer(
+            transformer_config, transformer_layer_spec.submodules.mlp.submodules
+        ).cuda()
+
+        hidden_states = torch.randn(
+            sequence_length,
+            micro_batch_size,
+            hidden_size,
+            device=torch.cuda.current_device(),
+            dtype=torch.float16,
+            requires_grad=True,
+        )
+
+        # Forward pass
+        output, _ = moe_layer(hidden_states)
+
+        assert output.dtype == torch.float16, f"Expected fp16 output, got {output.dtype}"
+        assert output.shape == hidden_states.shape, f"Output shape mismatch"
+
+        # Backward pass
+        loss = output.sum()
+        loss.backward()
+
+        assert hidden_states.grad is not None, "Input gradients should exist"
+        assert (
+            hidden_states.grad.dtype == torch.float16
+        ), f"Expected fp16 gradients, got {hidden_states.grad.dtype}"
+
+        for name, param in moe_layer.named_parameters():
+            if param.requires_grad:
+                assert param.grad is not None, f"Gradient for {name} should exist"
+
+        Utils.destroy_model_parallel()
+
+    def teardown_method(self, method):
+        Utils.destroy_model_parallel()

From dfe4da21527a58ce7790e5310c40c8d1fe0eb664 Mon Sep 17 00:00:00 2001
From: Hao Wu <skyw@users.noreply.github.com>
Date: Mon, 8 Dec 2025 08:03:54 -0800
Subject: [PATCH 186/334] Update tp support in muon (#2385)

Signed-off-by: Hao Wu <skyw@nvidia.com>
---
 megatron/core/optimizer/muon.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/megatron/core/optimizer/muon.py b/megatron/core/optimizer/muon.py
index b6af7a3e188..ca7c8563b6f 100644
--- a/megatron/core/optimizer/muon.py
+++ b/megatron/core/optimizer/muon.py
@@ -8,7 +8,6 @@
 import torch
 from torch.optim.optimizer import ParamsT
 
-from megatron.core import parallel_state
 from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.transformer.module import MegatronModule
 from megatron.core.utils import get_pg_size, log_single_rank
@@ -76,7 +75,7 @@ def scaled_orthogonalize_fn(
                 f'{scale_mode} scale mode, extra_scale_factor={extra_scale_factor}',
             )
             size = [grad.size(-2), grad.size(-1)]
-            if partition_dim:
+            if partition_dim is not None:
                 size[partition_dim] *= get_pg_size(tp_group)
             orth_grad = newton_schulz_tp(
                 grad,
@@ -130,8 +129,7 @@ def orthogonalize(self, p: torch.Tensor, grad: torch.Tensor, **kwargs: Any) -> t
             tp_group = None
         partition_dim = None if self.mode == "blockwise" else getattr(p, "partition_dim", None)
         if partition_dim == -1:
-            # llm-shower use different default value for partition_dim than TE.
-            # Because -1 is a valid index for ndarray, we decided to not overload it.
+            # emerging-optimizers use None instead of -1 to indicate no tensor parallel
             partition_dim = None
 
         if self.split_qkv and self.is_qkv_fn(p):  # type: ignore[misc]
@@ -201,8 +199,6 @@ def get_megatron_muon_optimizer(
     # before this function receive properly created collection
     if pg_collection is None:
         pg_collection = ProcessGroupCollection.use_mpu_process_groups()
-        pg_collection.dp_cp = parallel_state.get_data_parallel_group(with_context_parallel=True)
-        pg_collection.expt_dp = parallel_state.get_expert_data_parallel_group()
 
     log_single_rank(logger, logging.INFO, f'Setting up emerging optimizer with config {config}')
 

From 1d462bd37dac21cfa14177405d4921eedb987052 Mon Sep 17 00:00:00 2001
From: "Dennis(Zhenhuan) Liu" <denliu@nvidia.com>
Date: Mon, 8 Dec 2025 14:55:24 -0800
Subject: [PATCH 187/334] [DEV] Update GitHub MoE functional test cases (#2449)

---
 .../model_config.yaml                         |  2 +-
 tests/test_utils/recipes/moe.yaml             | 28 +++++++++----------
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_muon/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_muon/model_config.yaml
index 81b023bd86e..d3e3baa9f14 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_muon/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_muon/model_config.yaml
@@ -64,4 +64,4 @@ MODEL_ARGS:
   --muon-momentum: 0.9
   --muon-extra-scale-factor: 0.2
   --muon-scale-mode: spectral
-TEST_TYPE: ckpt-resume
+TEST_TYPE: regular
diff --git a/tests/test_utils/recipes/moe.yaml b/tests/test_utils/recipes/moe.yaml
index 285d16c99f3..aea3ec97597 100644
--- a/tests/test_utils/recipes/moe.yaml
+++ b/tests/test_utils/recipes/moe.yaml
@@ -109,7 +109,7 @@ products:
   - test_case: [gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
   - test_case: [gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective]
     products:
@@ -121,30 +121,30 @@ products:
       - environment: [dev]
         scope: [mr]
         platforms: [dgx_h100]
-  # - test_case: [gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_muon]
-  #   products:
-  #     - environment: [dev]
-  #       scope: [mr, mr-github, mr-slim]
-  #       platforms: [dgx_h100]
-  # - test_case: [gpt3_moe_mcore_te_ep8_resume_torch_dist_muon]
-  #   products:
-  #     - environment: [dev]
-  #       scope: [mr, mr-github, mr-slim]
-  #       platforms: [dgx_h100]
+  - test_case: [gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_muon]
+    products:
+      - environment: [dev]
+        scope: [mr, mr-github]
+        platforms: [dgx_h100]
+  - test_case: [gpt3_moe_mcore_te_ep8_resume_torch_dist_muon]
+    products:
+      - environment: [dev]
+        scope: [mr, mr-github]
+        platforms: [dgx_h100]
   - test_case: [gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
   - test_case: [gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
   - test_case: [gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_scoped_cudagraph]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
   #######################################################################
   # Super important mr, mr-github tests that run for both DEV and LTS per mr, mr-github       #

From 23e092f41ec8bc659020e401ddac9576c1cfed7e Mon Sep 17 00:00:00 2001
From: rj42 <lbkzman@gmail.com>
Date: Tue, 9 Dec 2025 13:50:31 +0300
Subject: [PATCH 188/334] Fix: don't enter branch if mtp_num_layers == 0
 (#2581)

Co-authored-by: Xin Yao <xiny@nvidia.com>
---
 megatron/core/models/gpt/gpt_model.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/megatron/core/models/gpt/gpt_model.py b/megatron/core/models/gpt/gpt_model.py
index 4a6370bc49d..a1230568cbd 100644
--- a/megatron/core/models/gpt/gpt_model.py
+++ b/megatron/core/models/gpt/gpt_model.py
@@ -562,7 +562,8 @@ def _postprocess(
         if not self.post_process:
             return hidden_states
 
-        if self.config.mtp_num_layers is not None:
+        # Skip when mtp_num_layers is None or 0
+        if self.config.mtp_num_layers:
             mtp_labels = labels.clone()
             hidden_states_list = torch.chunk(hidden_states, 1 + self.config.mtp_num_layers, dim=0)
             hidden_states = hidden_states_list[0]

From c60d5c2b7ff564c9cfbaf928d182cee7a887d87c Mon Sep 17 00:00:00 2001
From: Hongbin Liu <lhb8125@users.noreply.github.com>
Date: Wed, 10 Dec 2025 17:27:45 +0800
Subject: [PATCH 189/334] [Dev] fix(moe): Support HybridEP and reduce memory
 overhead for 1F1B A2A overlap (#2201)

Signed-off-by: Hongbin Liu <hongbinl@nvidia.com>
Signed-off-by: Pingtian Li <pingtianl@nvidia.com>
Co-authored-by: root <root@eos0318.eos.clusters.nvidia.com>
Co-authored-by: Zijie Yan <zijiey@nvidia.com>
Co-authored-by: Pingtian Li <pingtianl@nvidia.com>
---
 megatron/core/model_parallel_config.py        | 13 ++++
 .../common/model_chunk_schedule_plan.py       | 63 ++++++++++++++++---
 .../core/models/gpt/fine_grained_callables.py | 48 ++++++++++----
 megatron/core/pipeline_parallel/utils.py      |  7 +++
 .../core/transformer/transformer_config.py    | 11 ++++
 megatron/training/arguments.py                |  2 +
 .../a2a_overlap/test_schedule_layer_1f1b.py   | 52 +++++++++++++++
 7 files changed, 174 insertions(+), 22 deletions(-)

diff --git a/megatron/core/model_parallel_config.py b/megatron/core/model_parallel_config.py
index 129135c4cc0..4452bdf360b 100644
--- a/megatron/core/model_parallel_config.py
+++ b/megatron/core/model_parallel_config.py
@@ -265,6 +265,19 @@ class ModelParallelConfig:
     delay_wgrad_compute: bool = False
     """Delay the weight gradient computation to improve batch-level communication overlapping"""
 
+    ep_overlap_early_attn_memory_release: bool = False
+    """Enable early memory release of attention activations during EP overlap.
+    EP overlap can increase peak memory usage when the overlapped forward module allocates 
+    more memory than what is freed by the backward module. This flag addresses this by 
+    reordering the attention backward pass to occur earlier in the schedule.
+    Specifically:
+    - Without this flag: attn_bwd executes after moe_combine_fwd
+    - With this flag: attn_bwd executes before mlp_fwd
+    The earlier execution releases attention activations sooner, reducing peak memory.
+    Note: This may impact performance as moe_combine_fwd and moe_dispatch_bwd become 
+    exposed (not overlapped with other computation).
+    """
+
     ###################
     # Pipeline Parallel
     ###################
diff --git a/megatron/core/models/common/model_chunk_schedule_plan.py b/megatron/core/models/common/model_chunk_schedule_plan.py
index 401d9a81a97..486a498dd73 100644
--- a/megatron/core/models/common/model_chunk_schedule_plan.py
+++ b/megatron/core/models/common/model_chunk_schedule_plan.py
@@ -77,6 +77,7 @@ def __init__(self, layer, event, chunk_state, comp_stream, comm_stream, extra_ar
         """
         from megatron.core.models.gpt.fine_grained_callables import TransformerLayerState
 
+        self.config = layer.config
         self.layer_state = TransformerLayerState()
         self.chunk_state = chunk_state
         self.layer = layer
@@ -87,6 +88,32 @@ def __init__(self, layer, event, chunk_state, comp_stream, comm_stream, extra_ar
         # get callable nodes for transformer/mtp layer
         self._build_callable_nodes(event, comp_stream, comm_stream, extra_args)
 
+    def release_state(self):
+        """Release reference, this helps avoid memory leak."""
+        if hasattr(self, 'attn') and self.attn is not None:
+            del self.attn
+            self.attn = None
+        if hasattr(self, 'post_attn') and self.post_attn is not None:
+            del self.post_attn
+            self.post_attn = None
+        if hasattr(self, 'moe_dispatch') and self.moe_dispatch is not None:
+            del self.moe_dispatch
+            self.moe_dispatch = None
+        if hasattr(self, 'mlp') and self.mlp is not None:
+            del self.mlp
+            self.mlp = None
+        if hasattr(self, 'moe_combine') and self.moe_combine is not None:
+            del self.moe_combine
+            self.moe_combine = None
+        if hasattr(self, 'mtp_post_process') and self.mtp_post_process is not None:
+            del self.mtp_post_process
+            self.mtp_post_process = None
+        if hasattr(self, 'layer_state') and self.layer_state is not None:
+            del self.layer_state
+            self.layer_state = None
+        if hasattr(self, 'layer'):
+            del self.layer
+
     def _build_callable_nodes(self, event, comp_stream, comm_stream, extra_args):
         """
         Builds the callable nodes for the transformer/mtp layer:
@@ -114,7 +141,12 @@ def _build_callable_nodes(self, event, comp_stream, comm_stream, extra_args):
             self.layer.config.moe_token_dispatcher_type == "flex"
             and self.layer.config.moe_flex_dispatcher_backend == "deepep"
         )
+        enable_hybridep = (
+            self.layer.config.moe_token_dispatcher_type == "flex"
+            and self.layer.config.moe_flex_dispatcher_backend == "hybridep"
+        )
         extra_args["enable_deepep"] = enable_deepep
+        extra_args["enable_hybridep"] = enable_hybridep
         extra_args["is_moe"] = is_moe
         extra_args["delay_wgrad_compute"] = self.layer.config.delay_wgrad_compute
         extra_args["is_mtp"] = is_mtp
@@ -221,6 +253,10 @@ def run(f_layer, b_layer, f_input=None, b_grad=None, is_last_layer_in_bwd=False)
             b_layer.mlp.backward_dw()
             b_grad = b_layer.moe_dispatch.backward(b_grad)
 
+        if b_layer is not None and b_layer.config.ep_overlap_early_attn_memory_release:
+            b_grad = b_layer.post_attn.backward(b_grad)
+            b_grad = b_layer.attn.backward(b_grad)
+
         if f_layer is not None:
             with f_layer.get_fp8_context():
                 f_input = f_layer.mlp.forward(f_input)
@@ -230,7 +266,7 @@ def run(f_layer, b_layer, f_input=None, b_grad=None, is_last_layer_in_bwd=False)
                 f_input = f_layer.moe_combine.forward(f_input)
                 f_input = f_layer.mtp_post_process.forward(f_input)
 
-        if b_layer is not None:
+        if b_layer is not None and not b_layer.config.ep_overlap_early_attn_memory_release:
             b_grad = b_layer.post_attn.backward(b_grad)
             b_grad = b_layer.attn.backward(b_grad)
 
@@ -372,6 +408,10 @@ def get_layer(self, i):
         assert i < self.num_layers()
         return self._transformer_layers[i]
 
+    def pop_layer(self):
+        """Pops the transformer layer in FILO order."""
+        return self._transformer_layers.pop()
+
     def num_layers(self):
         """Gets the number of transformer layers."""
         return len(self._transformer_layers)
@@ -450,13 +490,14 @@ def run(
         b_num_layers = b_schedule_plan.num_layers() if b_schedule_plan is not None else 0
         overlapped_layers = min(f_num_layers, b_num_layers)
 
+        f_layer = b_layer = None
         # combined forward and backward pass for overlapped layers
         for i in range(overlapped_layers):
             f_layer = f_schedule_plan.get_layer(i)
-            b_layer = b_schedule_plan.get_layer(b_num_layers - 1 - i)
-            torch.cuda.nvtx.range_push(f"layer_{i}f-layer_{b_num_layers - 1 - i}b")
             if f_layer.layer.config.fine_grained_activation_offloading:
                 fine_grained_offloading_set_last_layer(i == f_num_layers - 1)
+            b_layer = b_schedule_plan.pop_layer()
+            torch.cuda.nvtx.range_push(f"layer_{i}f-layer_{b_schedule_plan.num_layers()}b")
             f_input, b_grad = TransformerLayerSchedulePlan.run(
                 f_layer,
                 b_layer,
@@ -464,15 +505,19 @@ def run(
                 b_grad=b_grad,
                 is_last_layer_in_bwd=(i == b_num_layers - 1),
             )
+            if i < b_num_layers - 1:
+                b_layer.release_state()
             torch.cuda.nvtx.range_pop()
 
         # backward pass for the remaining layers
         for i in range(overlapped_layers, b_num_layers):
-            b_layer = b_schedule_plan.get_layer(b_num_layers - 1 - i)
-            torch.cuda.nvtx.range_push(f"layer_{b_num_layers - 1 - i}b")
+            b_layer = b_schedule_plan.pop_layer()
+            torch.cuda.nvtx.range_push(f"layer_{b_schedule_plan.num_layers()}b")
             _, b_grad = TransformerLayerSchedulePlan.run(
                 None, b_layer, b_grad=b_grad, is_last_layer_in_bwd=(i == b_num_layers - 1)
             )
+            if i < b_num_layers - 1:
+                b_layer.release_state()
             torch.cuda.nvtx.range_pop()
 
         # forward pass for the remaining layers
@@ -500,7 +545,9 @@ def run(
         # Delay the last attn_dw in backward pass (attn_dw of the first layer)
         # for overlapping with the p2p comm
         if b_num_layers > 0:
-            b_schedule_plan.get_layer(0).attn.backward_dw()
+            assert b_layer is not None
+            b_layer.attn.backward_dw()
+            b_layer.release_state()
 
         # post process forward
         if f_schedule_plan is not None and f_schedule_plan.post_process is not None:
@@ -513,9 +560,7 @@ def run(
             f_schedule_plan.wait_current_stream()
         if b_schedule_plan:
             b_schedule_plan.wait_current_stream()
-
-        # Release reference as early as possible, this helps avoid memory leak.
-        if b_schedule_plan is not None:
+            # Release reference as early as possible, this helps avoid memory leak.
             b_schedule_plan.release_state()
 
         return f_input
diff --git a/megatron/core/models/gpt/fine_grained_callables.py b/megatron/core/models/gpt/fine_grained_callables.py
index 952b83f95fb..60094976a9a 100644
--- a/megatron/core/models/gpt/fine_grained_callables.py
+++ b/megatron/core/models/gpt/fine_grained_callables.py
@@ -21,6 +21,7 @@
     get_mtp_layer_offset,
 )
 from megatron.core.transformer.transformer_layer import TransformerLayer, make_viewless_tensor
+from megatron.core.utils import internal_api
 
 
 def weak_method(method):
@@ -40,13 +41,15 @@ def wrapped_func(*args, **kwarg):
     return wrapped_func
 
 
-def should_free_input(name, is_moe, is_deepep):
+@internal_api
+def should_free_input(name, is_moe, enable_deepep, enable_hybridep):
     """Determine if the node should free its input memory.
 
     Args:
         name: Node name
         is_moe: Whether it's a MoE model
-        is_deepep: Whether it's a DeepEP model
+        enable_deepep: Whether to use DeepEP dispatcher
+        enable_hybridep: Whether to use HybridEP dispatcher
 
     Returns:
         bool: Whether to free input memory
@@ -60,12 +63,13 @@ def should_free_input(name, is_moe, is_deepep):
     # The input and output of A2A are not needed anymore after the forward pass,
     # so we can free the input memory after the forward pass.
     free_input_nodes = {
-        "mlp": True,
+        "mlp": not enable_hybridep,
         "moe_combine": True,
-        # For non-deepep mode, the input is the un-dispatched tokens and probs before dispatch A2A
-        # and it's not needed anymore after the forward pass
-        # For deepep mode, they are both needed in backward pass, so they cannot be freed.
-        "moe_dispatch": not is_deepep,
+        # For non-DeepEP and non-HybridEP dispatcher mode, the input is the un-dispatched tokens
+        # and probs before dispatch A2A and it's not needed anymore after the forward pass
+        # For DeepEP and HybridEP dispatcher mode, they are both needed in backward pass
+        # and cannot be freed.
+        "moe_dispatch": not (enable_deepep or enable_hybridep),
     }
 
     return free_input_nodes.get(name, False)
@@ -223,12 +227,13 @@ def __init__(
             it's the per_batch_state_context, o.w. nullcontext
             name (str): Node name, also used to determine memory strategy
             bwd_dw_callables (list): List of weight gradient functions for the layer.
-            extra_args (dict): Extra arguments for the node: is_moe, enable_deepep.
+            extra_args (dict): Extra arguments for nodes: is_moe, enable_deepep, enable_hybridep.
         """
         # determine whether to free input memory
         is_moe = extra_args.get("is_moe", False)
         enable_deepep = extra_args.get("enable_deepep", False)
-        free_input = should_free_input(name, is_moe, enable_deepep)
+        enable_hybridep = extra_args.get("enable_hybridep", False)
+        free_input = should_free_input(name, is_moe, enable_deepep, enable_hybridep)
         self.delay_wgrad_compute = extra_args.get("delay_wgrad_compute", False)
 
         super().__init__(
@@ -274,7 +279,13 @@ def backward_impl(self, outputs, output_grad):
         detached_grad = tuple([e.grad for e in self.detached])
         grads = output_grad + detached_grad
         self.default_backward_func(outputs + self.before_detached, grads)
-        self._release_state()
+        # release the output grad memory after backward finishes,
+        # except when delay_wgrad_comptue is enabled, the grad should be
+        # kept until all modules' backward_dw has been invoked.
+        if self.delay_wgrad_compute:
+            self.output_grads = grads
+            self.delay_grads_release = len(self.bwd_dw_callables) > 0
+
         # return grads for record stream
         return grads
 
@@ -285,9 +296,16 @@ def backward_dw(self):
         with torch.cuda.nvtx.range(f"{self.name} wgrad"):
             for module in self.bwd_dw_callables:
                 module.backward_dw()
+
+        # the output grad memory is last used in wgrad compute, should be safe to release.
+        assert self.delay_grads_release, "output grad memory should be valid before wgrad."
+        for tensor in self.output_grads:
+            tensor.untyped_storage().resize_(0)
+        self.output_grads = None
+
         self.bwd_dw_callables = None
 
-    def _release_state(self):
+    def __del__(self):
         # Release reference as early as possible, this helps avoid memory leak.
         self.before_detached = None
         self.detached = None
@@ -328,6 +346,10 @@ def build_transformer_layer_callables(layer: TransformerLayer):
         layer.config.moe_token_dispatcher_type == "flex"
         and layer.config.moe_flex_dispatcher_backend == "deepep"
     )
+    enable_hybridep = (
+        layer.config.moe_token_dispatcher_type == "flex"
+        and layer.config.moe_flex_dispatcher_backend == "hybridep"
+    )
 
     def submodule_attn_forward(node: ScheduleNode, hidden_states: torch.Tensor):
         """
@@ -379,7 +401,7 @@ def submodule_dispatch_forward(
         Dispatches tokens to the experts based on the router output.
         """
         token_dispatcher = layer.mlp.token_dispatcher
-        if enable_deepep:
+        if enable_deepep or enable_hybridep:
             # update token_probs to be the detached version, prevents
             # backward graph from connecting to attn submodule
             token_dispatcher._comm_manager.token_probs = probs
@@ -396,7 +418,7 @@ def submodule_moe_forward(node: ScheduleNode, dispatched_tokens: torch.Tensor):
         shared_expert_output = None
         dispatched_probs = node.layer_state.dispatched_probs
         token_dispatcher = layer.mlp.token_dispatcher
-        if enable_deepep:
+        if enable_deepep or enable_hybridep:
             # update dispatched_probs to be detached version, prevents
             # backward graph from connecting to dispatch submodule
             token_dispatcher._comm_manager.dispatched_probs = dispatched_probs
diff --git a/megatron/core/pipeline_parallel/utils.py b/megatron/core/pipeline_parallel/utils.py
index c50c6ac7964..52d401c79f9 100644
--- a/megatron/core/pipeline_parallel/utils.py
+++ b/megatron/core/pipeline_parallel/utils.py
@@ -182,6 +182,7 @@ def __init__(
         self.free_input = free_input
         self.inputs = None
         self.outputs = None
+        self.delay_grads_release = False
 
     def default_backward_func(self, outputs, output_grad):
         """Default backward function"""
@@ -263,6 +264,12 @@ def _backward(self, *output_grad):
             for g in output_grad:
                 if g is not None:
                     g.record_stream(self.stream)
+                    # Manually trigger the memory release of dgrad tensor
+                    # to avoid delayed garbage collection. If
+                    # delay_grads_release is True, dgrad is last used in
+                    # wgrad compute and skip the release here.
+                    if not self.delay_grads_release:
+                        g.untyped_storage().resize_(0)
 
         grads = self.get_grad()
         self._release_state()
diff --git a/megatron/core/transformer/transformer_config.py b/megatron/core/transformer/transformer_config.py
index 31dd5a98a58..fcc45a54c87 100644
--- a/megatron/core/transformer/transformer_config.py
+++ b/megatron/core/transformer/transformer_config.py
@@ -1843,6 +1843,11 @@ def __post_init__(self):
             assert (
                 self.mtp_num_layers is None or self.mtp_num_layers == 1
             ), 'MTP layernum only supports 1 when enabling overlap_moe_expert_parallel_comm.'
+            if self.mtp_num_layers == 1:
+                assert self.pipeline_model_parallel_size > 1, (
+                    'Pipeline model parallel size must be larger than 1 '
+                    'when enabling overlap_moe_expert_parallel_comm with MTP layer.'
+                )
 
         # Check delay_wgrad_compute compatibility
         if self.delay_wgrad_compute:
@@ -1853,6 +1858,12 @@ def __post_init__(self):
                 not self.moe_use_legacy_grouped_gemm
             ), 'delay_wgrad_compute is not supported with legacy groupedgemm implementation'
 
+        if self.ep_overlap_early_attn_memory_release:
+            assert self.overlap_moe_expert_parallel_comm, (
+                'overlap_moe_expert_parallel_comm must be enabled when enabling '
+                'ep_overlap_early_attn_memory_release'
+            )
+
         if self.context_parallel_size > 1 and self.cp_comm_type is not None:
             if isinstance(self.cp_comm_type, list):
                 assert len(self.cp_comm_type) == self.num_layers, (
diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py
index 682bd94bdf9..847f1531767 100644
--- a/megatron/training/arguments.py
+++ b/megatron/training/arguments.py
@@ -3348,6 +3348,8 @@ def _add_moe_args(parser):
                        help='Overlap the EP A2A communication by batch-level overlapping in 1f1b stage.')
     group.add_argument('--delay-wgrad-compute', action='store_true',
                        help='Delay the wgrad compute for batch-level overlapping')
+    group.add_argument('--ep-overlap-early-attn-memory-release', action='store_true',
+                       help='Release the memory of the attention module early in EP overlap.')
 
     group.add_argument('--moe-upcycling-granularity', type=int, default=1,
                        help='This param sepecifics how many times smaller is the expert hidden size compared with the original dense FFN hidden size. '
diff --git a/tests/unit_tests/a2a_overlap/test_schedule_layer_1f1b.py b/tests/unit_tests/a2a_overlap/test_schedule_layer_1f1b.py
index 3ebffb810e5..7fb97f6e586 100644
--- a/tests/unit_tests/a2a_overlap/test_schedule_layer_1f1b.py
+++ b/tests/unit_tests/a2a_overlap/test_schedule_layer_1f1b.py
@@ -347,6 +347,58 @@ def test_transformer_layer_overlap_shared_expert(self):
             comp_res = compare_captures(capture_ref, capture_a2a_overlap, True)
             assert comp_res[0], f"[rank {torch.distributed.get_rank()}] {comp_res[1]}"
 
+    @pytest.mark.skipif(not is_te_min_version("1.9.0.dev0"), reason="Requires TE >= 1.9.0.dev0")
+    def test_transformer_layer_overlap_early_attn_memory_release(self):
+        """
+        Verifies all-to-all overlap optimization in transformer layer with early attn memory release
+        produces the same results as the reference implementation.
+        """
+        extra_kwargs = {
+            "moe_token_dispatcher_type": "alltoall",
+            "ep_overlap_early_attn_memory_release": True,
+            "overlap_moe_expert_parallel_comm": True,
+        }
+        overlap_config = get_test_config(extra_kwargs=extra_kwargs)
+        ref_config = get_test_config(extra_kwargs=extra_kwargs)
+        microbatches = 4
+        with deterministic_mode():
+            transformer_layer_spec = get_gpt_decoder_block_spec(
+                config=ref_config, use_transformer_engine=True
+            )
+            gpt_model = GPTModel(
+                config=ref_config,
+                transformer_layer_spec=transformer_layer_spec,
+                vocab_size=100,
+                pre_process=True,
+                post_process=True,
+                max_sequence_length=300,
+            )
+
+            params = reset_model(gpt_model)
+            input_tensors = [build_data() for _ in range(microbatches)]
+
+            fp8_context = get_fp8_context(ref_config, 0) if ref_config.fp8 else nullcontext()
+            with fp8_context:
+                capture_ref = run_transformer_layer_ref_with_capture(
+                    gpt_model, input_tensors, microbatches
+                )
+            del gpt_model
+
+            gpt_model = GPTModel(
+                config=overlap_config,
+                transformer_layer_spec=transformer_layer_spec,
+                vocab_size=100,
+                pre_process=True,
+                post_process=True,
+                max_sequence_length=300,
+            )
+            reset_model(gpt_model, params)
+            capture_a2a_overlap = run_transformer_layer_a2a_overlap_with_capture(
+                gpt_model, input_tensors, microbatches
+            )
+            comp_res = compare_captures(capture_ref, capture_a2a_overlap, True)
+            assert comp_res[0], f"[rank {torch.distributed.get_rank()}] {comp_res[1]}"
+
     @pytest.mark.skipif(not is_te_min_version("1.9.0.dev0"), reason="Requires TE >= 1.9.0.dev0")
     @pytest.mark.parametrize("dispatcher_type", get_valid_token_dispatcher_types())
     @pytest.mark.parametrize("fp8_flag", get_valid_fp8_flags())

From 2d398b42fd4237fffb553109563d73ac099751c3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Wed, 10 Dec 2025 20:28:35 -0800
Subject: [PATCH 190/334] chore: Bump baseline (#2626)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 ...k_api_backwards_compatibility_workflow.yml | 31 +++++++++----------
 1 file changed, 15 insertions(+), 16 deletions(-)

diff --git a/.github/workflows/check_api_backwards_compatibility_workflow.yml b/.github/workflows/check_api_backwards_compatibility_workflow.yml
index 0ccaa8ccc5e..42db9486cac 100644
--- a/.github/workflows/check_api_backwards_compatibility_workflow.yml
+++ b/.github/workflows/check_api_backwards_compatibility_workflow.yml
@@ -28,7 +28,7 @@ jobs:
         uses: actions/checkout@v4
         with:
           fetch-depth: 0
-      
+
       - name: Check if relevant files changed
         id: check_files
         run: |
@@ -83,7 +83,7 @@ jobs:
     if: needs.pre-flight.outputs.should_skip != 'true'
     name: Check API Backward Compatibility
     runs-on: ubuntu-latest
-    
+
     # ============================================================================
     # Configuration Parameters (modify here)
     # ============================================================================
@@ -91,24 +91,24 @@ jobs:
       # Default baseline for automatic PR checks
       # Can be: branch name (e.g., 'main'), commit hash, or tag
       # Will be resolved to commit hash during execution
-      DEFAULT_BASELINE: '274e04d21fbcb7f53f63de992ee1217f275f1cf2'
+      DEFAULT_BASELINE: 'ed804b49860201e7103ce0f9c1129a330a384a65'
       # Tag pattern for auto-detection (e.g., 'core_r*', 'core_v*')
       TAG_PATTERN: 'core_v*'
       # Tag regex filter (e.g., '^core_v[0-9]+\.[0-9]+\.[0-9]+$' for stable versions only)
       TAG_REGEX_FILTER: '^core_v[0-9]+\.[0-9]+\.[0-9]+$'
     # ============================================================================
-    
+
     steps:
       - name: Checkout code
         uses: actions/checkout@v4
         with:
-          fetch-depth: 0  # Need full history to access baseline ref
-      
+          fetch-depth: 0 # Need full history to access baseline ref
+
       - name: Set up Python
         uses: actions/setup-python@v5
         with:
           python-version: '3.12'
-      
+
       - name: Install griffe
         run: |
           python -m pip install --upgrade pip
@@ -116,7 +116,7 @@ jobs:
           python -c "import griffe; print('Griffe installed successfully')"
           python -c "from griffe import Object; print('Object import successful')" || echo "Object import from griffe failed"
           python -c "from griffe.dataclasses import Object; print('Object import from dataclasses successful')" || echo "Object import from dataclasses failed"
-      
+
       - name: Determine baseline reference
         id: baseline
         run: |
@@ -134,13 +134,13 @@ jobs:
             #   BASELINE_REF="${{ env.DEFAULT_BASELINE }}"
             # fi
           fi
-          
+
           # Resolve baseline to commit hash (works for branches, tags, or commit hashes)
           BASELINE_HASH=$(git rev-parse "$BASELINE_REF")
-          
+
           echo "baseline=$BASELINE_HASH" >> $GITHUB_OUTPUT
           echo "Using baseline: $BASELINE_REF (resolved to commit: $BASELINE_HASH)"
-      
+
       - name: Run compatibility check
         id: compat_check
         run: |
@@ -148,13 +148,13 @@ jobs:
           python scripts/check_api_backwards_compatibility.py \
             --baseline ${{ steps.baseline.outputs.baseline }} \
             --verbose 2>&1 | tee compat_check_output.txt
-          
+
           # Capture exit code
           EXIT_CODE=${PIPESTATUS[0]}
           echo "exit_code=$EXIT_CODE" >> $GITHUB_OUTPUT
           exit $EXIT_CODE
         continue-on-error: true
-      
+
       - name: Fail job if breaking changes detected
         if: steps.compat_check.outcome == 'failure'
         run: |
@@ -233,10 +233,10 @@ jobs:
           echo "🔧 Checker script: scripts/check_api_backwards_compatibility.py"
           echo "❓ Questions? Check the docs or ask in #megatron-core"
           echo ""
-          
+
           echo "::error::Breaking API changes detected. Please review the output above and choose a resolution strategy."
           exit 1
-      
+
       - name: Success message
         if: steps.compat_check.outcome == 'success'
         run: |
@@ -271,4 +271,3 @@ jobs:
               gh run view $GITHUB_RUN_ID --json jobs --jq '.jobs[] | select(.status == "completed" and .conclusion != "success" and .name != "API Backward Compatibility Check Summary") | .name'
               exit 1
           fi
-

From e8a927578d0fdeb98db5d40ab7bdc81d123795f7 Mon Sep 17 00:00:00 2001
From: Tong Liu <tongliu@nvidia.com>
Date: Fri, 12 Dec 2025 11:48:39 +0800
Subject: [PATCH 191/334] [Dev] Use the latest Hybrid-EP (#2424)

---
 docker/Dockerfile.ci.dev                      |  2 +-
 megatron/core/transformer/moe/fused_a2a.py    | 51 +++++--------------
 .../core/transformer/moe/token_dispatcher.py  | 15 ++----
 3 files changed, 18 insertions(+), 50 deletions(-)

diff --git a/docker/Dockerfile.ci.dev b/docker/Dockerfile.ci.dev
index 482c6af460c..5caa6003630 100644
--- a/docker/Dockerfile.ci.dev
+++ b/docker/Dockerfile.ci.dev
@@ -62,7 +62,7 @@ RUN bash -ex <<"EOF"
 
     git clone --branch hybrid-ep https://github.com/deepseek-ai/DeepEP.git
     pushd DeepEP
-        git checkout 1dddd194c26911c35b4f53a148617dd73de0ffc9
+        git checkout 83e0d156807f31abed4ea55c2fa6eb4b62a11b82
         patch -p1 < /workspace/deepep.patch
     popd
     TORCH_CUDA_ARCH_LIST="9.0 10.0 12.0" uv pip install --no-build-isolation -v DeepEP/.
diff --git a/megatron/core/transformer/moe/fused_a2a.py b/megatron/core/transformer/moe/fused_a2a.py
index 045a93039b3..aa13b9b5b5b 100644
--- a/megatron/core/transformer/moe/fused_a2a.py
+++ b/megatron/core/transformer/moe/fused_a2a.py
@@ -3,6 +3,7 @@
 # Copyright (c) 2025 DeepSeek
 # Licensed under the MIT License - https://github.com/deepseek-ai/DeepEP/blob/main/LICENSE
 
+from megatron.core.utils import internal_api
 
 try:
     from deep_ep import Buffer
@@ -328,6 +329,7 @@ def reset_hybrid_ep_buffer():
     _hybrid_ep_buffer = None
 
 
+@internal_api
 class HybridEPDispatch(torch.autograd.Function):
     '''
     Fused dispatch operation for permute + dispatch a2a + permute using the HybridEP backend
@@ -343,7 +345,6 @@ def forward(
         num_local_experts,
         num_sms_dispatch_api=24,
         num_sms_combine_api=24,
-        num_dispatched_tokens=None,
         num_permuted_tokens=None,
         pad_multiple=None,
     ):
@@ -362,11 +363,9 @@ def forward(
                 num_sms_combine_api,
                 fp8_dispatch,
             )
-        # Defaultly, the output token_per_expert and num_dispatched_tokens_tensor
-        # will be put on the CPU to avoid the potential sync in combine/backward pass,
-        # but if we provide the num_dispatched_tokens and num_permuted_tokens on CPU,
-        # we do not need to the D2H here.
-        use_host_meta = num_dispatched_tokens is None or num_permuted_tokens is None
+        # If we provide the num_permuted_tokens, we do not need to use sync to
+        # wait for the data in pinned memory ready
+        non_blocking = num_permuted_tokens is not None
         # Process the dispatch
         (
             dispatched_hidden,
@@ -381,14 +380,12 @@ def forward(
             scaling_factor=None,
             num_of_experts_per_rank=num_local_experts,
             pad_multiple=pad_multiple,
-            num_dispatched_tokens=num_dispatched_tokens,
             num_permuted_tokens=num_permuted_tokens,
-            use_host_meta=use_host_meta,
+            non_blocking=non_blocking,
         )
 
         ctx.handle = handle
         ctx.pad_multiple = pad_multiple
-        ctx.num_dispatched_tokens = num_dispatched_tokens
         return (
             dispatched_hidden,
             dispatched_probs,
@@ -404,36 +401,27 @@ def backward(ctx, grad_x, grad_probs, grad_scaling_factor, grad_tokens_per_exper
         '''
         handle = ctx.handle
         combined_hidden, combined_probs = _hybrid_ep_buffer.combine_with_unpermute(
-            hidden=grad_x,
-            probs=grad_probs,
-            handle=handle,
-            pad_multiple=ctx.pad_multiple,
-            num_dispatched_tokens=ctx.num_dispatched_tokens,
+            hidden=grad_x, probs=grad_probs, handle=handle, pad_multiple=ctx.pad_multiple
         )
         return combined_hidden, None, combined_probs, None, None, None, None, None, None, None
 
 
+@internal_api
 class HybridEPCombine(torch.autograd.Function):
     '''
     Fused combine operation for permute + combine a2a + permute using the HybridEP backend
     '''
 
     @staticmethod
-    def forward(
-        ctx, x, handle, num_dispatched_tokens=None, num_permuted_tokens=None, pad_multiple=None
-    ):
+    def forward(ctx, x, handle, num_permuted_tokens=None, pad_multiple=None):
         '''
         Forward pass of fused combine of the HybridEP backend
         '''
         combined_hidden, _ = _hybrid_ep_buffer.combine_with_unpermute(
-            hidden=x,
-            handle=handle,
-            pad_multiple=pad_multiple,
-            num_dispatched_tokens=num_dispatched_tokens,
+            hidden=x, handle=handle, pad_multiple=pad_multiple
         )
         ctx.handle = handle
         ctx.pad_multiple = pad_multiple
-        ctx.num_dispatched_tokens = num_dispatched_tokens
         ctx.num_permuted_tokens = num_permuted_tokens
         return combined_hidden
 
@@ -448,7 +436,6 @@ def backward(ctx, grad_x):
             scaling_factor=None,
             handle=handle,
             pad_multiple=ctx.pad_multiple,
-            num_dispatched_tokens=ctx.num_dispatched_tokens,
             num_permuted_tokens=ctx.num_permuted_tokens,
         )
         return dispatched_hidden, None, None, None, None
@@ -456,6 +443,7 @@ def backward(ctx, grad_x):
 
 if HAVE_HYBRIDEP:
 
+    @internal_api
     def hybrid_ep_dispatch(
         x,
         routing_map,
@@ -464,7 +452,6 @@ def hybrid_ep_dispatch(
         num_local_experts,
         num_sms_dispatch_api=24,
         num_sms_combine_api=24,
-        num_dispatched_tokens=None,
         num_permuted_tokens=None,
         pad_multiple=None,
     ):
@@ -487,10 +474,6 @@ def hybrid_ep_dispatch(
                 Number of SMs used by the dispatch API.
             num_sms_combine_api (int):
                 Number of SMs used by the combine API.
-            num_dispatched_tokens (int):
-                Number of tokens after dispatch but before permute. HybridEP uses this
-                to allocate buffers. If not provided, HybridEP obtains the size from
-                a GPU tensor, which causes a D2H synchronization.
             num_permuted_tokens (int):
                 Number of tokens after permute. HybridEP uses this to allocate buffers.
                 If not provided, HybridEP obtains the size from a GPU tensor,
@@ -507,12 +490,12 @@ def hybrid_ep_dispatch(
             num_local_experts,
             num_sms_dispatch_api,
             num_sms_combine_api,
-            num_dispatched_tokens,
             num_permuted_tokens,
             pad_multiple,
         )
 
-    def hybrid_ep_combine(x, handle, num_dispatched_tokens, num_permuted_tokens, pad_multiple):
+    @internal_api
+    def hybrid_ep_combine(x, handle, num_permuted_tokens, pad_multiple):
         '''
         Perform fused combine operation for unpermute + combine a2a + unpermute
         using the HybridEP backend
@@ -522,10 +505,6 @@ def hybrid_ep_combine(x, handle, num_dispatched_tokens, num_permuted_tokens, pad
                 Input hidden states to combine
             handle (EventHandle):
                 Communication handle from dispatch operation
-            num_dispatched_tokens (int):
-                The number of tokens after unpermute but before combine. HybridEP uses this
-                to allocate buffers. If not provided, HybridEP obtains the size from a GPU tensor,
-                which causes a D2H synchronization.
             num_permuted_tokens (int): The number of tokens before unpermute. HybridEP uses this
                 to allocate buffers. If not provided, HybridEP obtains the size from a GPU tensor,
                 which causes a D2H synchronization.
@@ -533,9 +512,7 @@ def hybrid_ep_combine(x, handle, num_dispatched_tokens, num_permuted_tokens, pad
                 The alignment multiple required for FP8 GEMM. If not provided, no padding
                 is performed.
         '''
-        return HybridEPCombine.apply(
-            x, handle, num_dispatched_tokens, num_permuted_tokens, pad_multiple
-        )
+        return HybridEPCombine.apply(x, handle, num_permuted_tokens, pad_multiple)
 
 else:
     hybrid_ep_dispatch = None
diff --git a/megatron/core/transformer/moe/token_dispatcher.py b/megatron/core/transformer/moe/token_dispatcher.py
index 61ef0b5f084..d0da38d6322 100644
--- a/megatron/core/transformer/moe/token_dispatcher.py
+++ b/megatron/core/transformer/moe/token_dispatcher.py
@@ -985,11 +985,8 @@ def __init__(
         if self.drop_and_pad:
             assert self.capacity_factor is not None
         self.capacity = None
-        # The up-bound for the number of tokens after dispatch op, -1 means no up-bound,
-        # which will cause a CPU sync
-        self.num_dispatched_tokens = None
-        # Actually the sum of tokens_per_expert, the up-bound for the number of tokens
-        # after permute op, -1 means no up-bound, will cause a CPU sync
+        # Actually the the up-bound for the number of tokens
+        # after permute op, None means no up-bound, will cause a CPU sync
         self.num_permuted_tokens = None
 
         # Metadata
@@ -1018,12 +1015,9 @@ def setup_metadata(self, routing_map: torch.Tensor, probs: torch.Tensor):
                 num_experts=self.num_experts,
                 capacity_factor=self.capacity_factor,
             )
-            # We cannot predict the actual number of tokens after the dispatch op,
-            # so we set it to the worst case in drop_and_pad mode
-            self.num_dispatched_tokens = self.capacity * self.group.size() * self.num_local_experts
             # In drop_and_pad mode, the number of tokens after the permute op
             # can be computed on the CPU
-            self.num_permuted_tokens = self.num_dispatched_tokens
+            self.num_permuted_tokens = self.capacity * self.group.size() * self.num_local_experts
             self.tokens_per_expert = torch.full(
                 (self.num_local_experts,), self.capacity * self.group.size(), dtype=torch.long
             )
@@ -1052,7 +1046,6 @@ def dispatch(
                 num_local_experts=self.num_local_experts,
                 num_sms_dispatch_api=self.config.moe_hybridep_num_sms,
                 num_sms_combine_api=self.config.moe_hybridep_num_sms,
-                num_dispatched_tokens=self.num_dispatched_tokens,
                 num_permuted_tokens=self.num_permuted_tokens,
                 pad_multiple=self.pad_multiple,
             )
@@ -1074,7 +1067,6 @@ def combine(
         hidden_states = hybrid_ep_combine(
             x=hidden_states,
             handle=self.handle,
-            num_dispatched_tokens=self.num_dispatched_tokens,
             num_permuted_tokens=self.num_permuted_tokens,
             pad_multiple=self.pad_multiple,
         )
@@ -1084,7 +1076,6 @@ def combine(
         self.handle = None
         if not self.drop_and_pad:
             self.num_permuted_tokens = None
-            self.num_dispatched_tokens = None
         return hidden_states
 
     def get_permuted_hidden_states_by_experts(self, hidden_states: torch.Tensor) -> torch.Tensor:

From 305957aa065b65d07bd5c876dd74a571c3eca409 Mon Sep 17 00:00:00 2001
From: Pablo Garay <pagaray@nvidia.com>
Date: Fri, 12 Dec 2025 10:04:50 -0800
Subject: [PATCH 192/334] API compat: ignore ParameterMovedBreakage for
 __init__ methods (#2649)

Signed-off-by: Pablo Garay <pagaray@nvidia.com>
---
 scripts/check_api_backwards_compatibility.py | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/scripts/check_api_backwards_compatibility.py b/scripts/check_api_backwards_compatibility.py
index 4977b806433..3c66f00b619 100644
--- a/scripts/check_api_backwards_compatibility.py
+++ b/scripts/check_api_backwards_compatibility.py
@@ -46,13 +46,22 @@
 # Decorators that exempt objects from compatibility checks
 EXEMPT_DECORATORS = ['internal_api', 'deprecated', 'experimental_api']
 
-# Breakage kinds to ignore (not actual API signature changes)
+# Breakage kinds to ignore globally (not actual API signature changes)
 # AttributeChangedValueBreakage: Changing constant values (e.g., VERSION = "1.0" -> "2.0")
 #   is not a breaking API change - the constant still exists with the same name
 IGNORED_BREAKAGE_KINDS = [
     'AttributeChangedValueBreakage',
 ]
 
+# Breakage kinds to ignore only for __init__ methods
+# ParameterMovedBreakage: Reordering parameters in __init__ is generally safe because:
+#   - Config dataclasses should always be initialized with keyword arguments
+#   - Adding fields to parent dataclasses shifts child __init__ params (inheritance artifact)
+#   - Nobody should call Config(4096, 32, ...) with positional args
+IGNORED_FOR_INIT_METHODS = [
+    'ParameterMovedBreakage',
+]
+
 
 def has_exempt_decorator(obj: Object) -> bool:
     """Check if a Griffe object has any exempt decorator.
@@ -217,6 +226,7 @@ def should_skip_change(change, filtered_paths: set) -> bool:
     
     A change is skipped if:
     - The change kind is in IGNORED_BREAKAGE_KINDS (not a signature change)
+    - The change kind is in IGNORED_FOR_INIT_METHODS and affects an __init__ method
     - The changed object itself is in filtered_paths (exact match)
     - The changed object is a child of an exempt object (prefix match)
     
@@ -227,7 +237,7 @@ def should_skip_change(change, filtered_paths: set) -> bool:
     Returns:
         bool: True if the change should be skipped (filtered out)
     """
-    # Check if this breakage kind should be ignored (not a signature change)
+    # Check if this breakage kind should be ignored globally (not a signature change)
     change_kind = type(change).__name__
     if change_kind in IGNORED_BREAKAGE_KINDS:
         return True
@@ -240,6 +250,12 @@ def should_skip_change(change, filtered_paths: set) -> bool:
     # e.g., "Class.__init__(param)" -> "Class.__init__"
     clean_path = path.split('(')[0] if '(' in path else path
     
+    # Check if this is a breakage kind we ignore for __init__ methods
+    # Config dataclasses should use keyword args, so parameter reordering is safe
+    if change_kind in IGNORED_FOR_INIT_METHODS:
+        if '.__init__' in clean_path:
+            return True
+    
     # Check exact match
     if clean_path in filtered_paths or path in filtered_paths:
         return True

From e93814b4c6965c3f8639abdf690416c08937f370 Mon Sep 17 00:00:00 2001
From: Maanu Grover <109391026+maanug-nv@users.noreply.github.com>
Date: Mon, 15 Dec 2025 18:01:42 -0800
Subject: [PATCH 193/334] [training migration] add training config dataclass
 and arg generation utility (#2651)

Signed-off-by: Maanu Grover <maanug@nvidia.com>
Co-authored-by: Eric Harper <eharper@nvidia.com>
---
 megatron/core/safe_globals.py            |   2 +
 megatron/training/argument_utils.py      | 250 +++++++++
 megatron/training/arguments.py           | 102 +---
 megatron/training/config.py              | 116 ++++
 megatron/training/dist_signal_handler.py |  11 +-
 tests/unit_tests/test_argument_utils.py  | 643 +++++++++++++++++++++++
 6 files changed, 1023 insertions(+), 101 deletions(-)
 create mode 100644 megatron/training/argument_utils.py
 create mode 100644 megatron/training/config.py
 create mode 100644 tests/unit_tests/test_argument_utils.py

diff --git a/megatron/core/safe_globals.py b/megatron/core/safe_globals.py
index ddb1dd25399..8bcfe788f60 100755
--- a/megatron/core/safe_globals.py
+++ b/megatron/core/safe_globals.py
@@ -3,6 +3,7 @@
 from argparse import Namespace
 from io import BytesIO
 from pathlib import PosixPath
+from signal import Signals
 from types import SimpleNamespace
 
 import torch
@@ -31,6 +32,7 @@
     RerunMode,
     RerunState,
     BytesIO,
+    Signals,
 ]
 
 
diff --git a/megatron/training/argument_utils.py b/megatron/training/argument_utils.py
new file mode 100644
index 00000000000..b9f7c7b22d1
--- /dev/null
+++ b/megatron/training/argument_utils.py
@@ -0,0 +1,250 @@
+# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+
+import dataclasses
+import typing
+import types
+from typing import Any, Optional
+from argparse import ArgumentParser, _ArgumentGroup
+import inspect
+import itertools
+import builtins
+import ast
+import enum
+from dataclasses import Field, fields
+
+# TODO: support arg renames
+
+class TypeInferenceError(Exception):
+    """Custom exception type to be conditionally handled by ArgumentGroupFactory."""
+    pass
+
+class ArgumentGroupFactory:
+    """Utility that adds an argument group to an ArgumentParser based on the attributes of a dataclass.
+
+    This utility uses dataclass metadata including type annotations and docstrings to automatically
+        infer the type, default, and other argparse keyword arguments.
+
+    You can override or supplement the automatically inferred argparse kwargs for any 
+        dataclass field by providing an "argparse_meta" key in the field's metadata dict.
+        The value should be a dict of kwargs that will be passed to ArgumentParser.add_argument().
+        These metadata kwargs take precedence over the automatically inferred values.
+
+        Example:
+            @dataclass
+            class YourConfig:
+                your_attribute: int | str | None = field(
+                    default=None,
+                    metadata={
+                        "argparse_meta": {
+                            "arg_names": ["--your-arg-name1", "--your-arg-name2"],
+                            "type": str,
+                            "nargs": "+",
+                            "default": "foo",
+                        }
+                    },
+                )
+
+        In this example, inferring the type automatically would fail, as Unions are
+        not supported. However the metadata is present, so that takes precedence.
+        Any keyword arguments to `ArgumentParser.add_argument()` can be included in
+        the "argparse_meta" dict, as well as "arg_names" for the argument flag name.
+
+    This class can also be used as a base class and extended as needed to support dataclasses
+        that require some customized or additional handling.
+
+    Args:
+        src_cfg_class: The source dataclass type (not instance) whose fields will be 
+            converted into command-line arguments. Each field's type annotation determines 
+            the argument type, default values become argument defaults, and field-level 
+            docstrings are extracted to populate argument help text.
+        exclude: Optional list of attribute names from `src_cfg_class` to exclude from 
+            argument generation. Useful for omitting internal fields, computed properties,
+            or attributes that should be configured through other means. If None, all 
+            dataclass fields will be converted to command-line arguments. Default: None.
+    """
+
+    def __init__(self, src_cfg_class: type, exclude: Optional[list[str]] = None) -> None:
+        self.src_cfg_class = src_cfg_class
+        self.field_docstrings = self._get_field_docstrings(src_cfg_class)
+        self.exclude = set(exclude) if exclude is not None else set()
+
+    def _format_arg_name(self, config_attr_name: str, prefix: Optional[str] = None) -> str:
+        """Convert dataclass name into appropriate argparse flag name.
+
+        Args:
+            config_attr_name: dataclass attribute name
+            prefix: prefix string to add to the dataclass attribute name. e.g. 'no' for bool 
+                settings that are default True. A hyphen is added after the prefix. Default: None
+        """
+        arg_name = config_attr_name
+        if prefix:
+            arg_name = prefix + '_' + arg_name
+        arg_name = "--" + arg_name.replace("_", "-")
+        return arg_name
+
+    def _get_enum_kwargs(self, config_type: enum.EnumMeta) -> dict[str, Any]:
+        """Build kwargs for Enums.
+
+        With these settings, the user must provide a valid enum value, e.g.
+            'flash', for `AttnBackend.flash`.
+        """
+        def enum_type_handler(cli_arg):
+            return config_type[cli_arg]
+
+        return {"type": enum_type_handler, "choices": list(config_type)}
+
+    def _extract_type(self, config_type: type) -> dict[str, Any]:
+        """Determine the type, nargs, and choices settings for this argument.
+
+        Args:
+            config_type: attribute type from dataclass
+        """
+        origin = typing.get_origin(config_type)
+        type_tuple = typing.get_args(config_type)
+
+        if isinstance(config_type, type) and issubclass(config_type, enum.Enum):
+            return self._get_enum_kwargs(config_type)
+
+        # Primitive type
+        if origin is None:
+            return {"type": config_type}
+
+        if origin in [types.UnionType, typing.Union]:
+            # Handle Optional and Union
+            if type_tuple[1] == type(None): # Optional type. First element is value inside Optional[]
+                return self._extract_type(type_tuple[0])
+            else:
+                raise TypeInferenceError(f"Unions not supported by argparse: {config_type}")
+
+        elif origin is list:
+            if len(type_tuple) == 1:
+                kwargs = self._extract_type(type_tuple[0])
+                kwargs["nargs"] = "+"
+                return kwargs
+            else:
+                raise TypeInferenceError(f"Multi-type lists not supported by argparse: {config_type}")
+
+        elif origin is typing.Literal:
+            choices_types = [type(choice) for choice in type_tuple]
+            assert all([t == choices_types[0] for t in choices_types]), "Type of each choice in a Literal type should all be the same."
+            kwargs = {"type": choices_types[0], "choices": type_tuple}
+            return kwargs
+        else:
+            raise TypeInferenceError(f"Unsupported type: {config_type}")
+
+
+    def _build_argparse_kwargs_from_field(self, attribute: Field) -> dict[str, Any]:
+        """Assemble kwargs for add_argument().
+
+        Args:
+            attribute: dataclass attribute
+        """
+        argparse_kwargs = {}
+        argparse_kwargs["arg_names"] = [self._format_arg_name(attribute.name)]
+        argparse_kwargs["dest"] = attribute.name
+        argparse_kwargs["help"] = self.field_docstrings[attribute.name] if attribute.name in self.field_docstrings else ""
+
+        # dataclasses specifies that both should not be set
+        if isinstance(attribute.default, type(dataclasses.MISSING)):
+            # dataclasses specified default_factory must be a zero-argument callable
+            argparse_kwargs["default"] = attribute.default_factory()
+        else:
+            argparse_kwargs["default"] = attribute.default
+
+        attr_argparse_meta = None
+        if attribute.metadata != {} and "argparse_meta" in attribute.metadata:
+            # save metadata here, but update at the end so the metadata has highest precedence
+            attr_argparse_meta = attribute.metadata["argparse_meta"]
+
+
+        # if we cannot infer the argparse type, all of this logic may fail. we try to defer
+        # to the developer-specified metadata if present
+        try:
+            argparse_kwargs.update(self._extract_type(attribute.type))
+
+            # use store_true or store_false action for enable/disable flags, which doesn't accept a 'type'
+            if argparse_kwargs["type"] == bool:
+                argparse_kwargs["action"] = "store_true" if attribute.default == False else "store_false"
+                argparse_kwargs.pop("type")
+
+                # add '--no-*' and '--disable-*' prefix if this is a store_false argument
+                if argparse_kwargs["action"] == "store_false":
+                    argparse_kwargs["arg_names"] = [self._format_arg_name(attribute.name, prefix="no"), self._format_arg_name(attribute.name, prefix="disable")] 
+        except TypeInferenceError as e:
+            if attr_argparse_meta is not None:
+                print(
+                    f"WARNING: Inferring the appropriate argparse argument type from {self.src_cfg_class} "
+                    f"failed for {attribute.name}: {attribute.type}.\n"
+                    "Deferring to attribute metadata. If the metadata is incomplete, 'parser.add_argument()' may fail.\n"
+                    f"Original failure: {e}"
+                )
+            else:
+                raise e
+
+        # metadata provided by field takes precedence 
+        if attr_argparse_meta is not None:
+            argparse_kwargs.update(attr_argparse_meta)
+
+        return argparse_kwargs
+
+    def build_group(self, parser: ArgumentParser, title: Optional[str] = None) -> _ArgumentGroup:
+        """Entrypoint method that adds the argument group to the parser.
+
+        Args:
+            parser: The parser to add arguments to
+            title: Title for the argument group
+        """
+        arg_group = parser.add_argument_group(title=title, description=self.src_cfg_class.__doc__)
+        for attr in fields(self.src_cfg_class):
+            if attr.name in self.exclude or attr.init is False:
+                continue
+
+            add_arg_kwargs = self._build_argparse_kwargs_from_field(attr)
+
+            arg_names = add_arg_kwargs.pop("arg_names")
+            arg_group.add_argument(*arg_names, **add_arg_kwargs)
+
+        return arg_group
+
+    def _get_field_docstrings(self, src_cfg_class: type) -> dict[str, str]:
+        """Extract field-level docstrings from a dataclass by inspecting its AST.
+
+        Recurses on parent classes of `src_cfg_class`.
+
+        Args:
+            src_cfg_class: Dataclass to get docstrings from.
+        """
+        source = inspect.getsource(src_cfg_class)
+        tree = ast.parse(source)
+        root_node = tree.body[0]
+
+        assert isinstance(root_node, ast.ClassDef), "Provided object must be a class."
+
+        field_docstrings = {}
+
+        # Iterate over body of the dataclass using 2-width sliding window.
+        # When 'a' is an assignment expression and 'b' is a constant, the window is
+        # lined up with an attribute-docstring pair. The pair can be saved to our dict.
+        for a, b in itertools.pairwise(root_node.body):
+            a_cond = isinstance(a, ast.AnnAssign) and isinstance(a.target, ast.Name)
+            b_cond = isinstance(b, ast.Expr) and isinstance(b.value, ast.Constant)
+
+            if a_cond and b_cond:
+                # These should be guaranteed by typechecks above, but assert just in case
+                assert isinstance(a.target.id, str), "Dataclass attribute not in the expected format. Name is not a string."
+                assert isinstance(b.value.value, str), "Dataclass attribute docstring is not a string."
+
+                # Formatting
+                docstring = inspect.cleandoc(b.value.value)
+                docstring = ' '.join(docstring.split())
+
+                field_docstrings[a.target.id] = docstring
+
+        # recurse on parent class
+        base_classes = src_cfg_class.__bases__
+        if len(base_classes) > 0:
+            parent_class = base_classes[0]
+            if parent_class.__name__ not in builtins.__dict__:
+                field_docstrings.update(self._get_field_docstrings(base_classes[0]))
+
+        return field_docstrings
diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py
index 7c9e4531c6d..70d1e4b1306 100644
--- a/megatron/training/arguments.py
+++ b/megatron/training/arguments.py
@@ -34,7 +34,6 @@
 )
 from megatron.core.activations import squared_relu
 from megatron.core.fusions.fused_bias_geglu import quick_gelu
-from megatron.training.dist_signal_handler import SIGNAL_MAP
 from megatron.training.utils import (
     get_device_arch_version,
     update_use_dist_ckpt,
@@ -48,6 +47,8 @@
     load_quantization_recipe,
 )
 
+from megatron.training.argument_utils import ArgumentGroupFactory
+
 def add_megatron_arguments(parser: argparse.ArgumentParser):
     """"Add Megatron-LM arguments to the given parser."""
 
@@ -2118,41 +2119,14 @@ def _add_rl_args(parser):
     return parser
 
 def _add_training_args(parser):
-    group = parser.add_argument_group(title='training')
+    from megatron.training.config import TrainingConfig
+
+    train_factory = ArgumentGroupFactory(TrainingConfig)
+    group = train_factory.build_group(parser, "training")
 
-    group.add_argument('--micro-batch-size', type=int, default=None,
-                       help='Batch size per model instance (local batch size). '
-                       'Global batch size is local batch size times data '
-                       'parallel size times number of micro batches.')
     group.add_argument('--batch-size', type=int, default=None,
                        help='Old batch size parameter, do not use. '
                        'Use --micro-batch-size instead')
-    group.add_argument('--global-batch-size', type=int, default=None,
-                       help='Training batch size. If set, it should be a '
-                       'multiple of micro-batch-size times data-parallel-size. '
-                       'If this value is None, then '
-                       'use micro-batch-size * data-parallel-size as the '
-                       'global batch size. This choice will result in 1 for '
-                       'number of micro-batches.')
-    group.add_argument('--rampup-batch-size', nargs='*', default=None,
-                       help='Batch size ramp up with the following values:'
-                       '  --rampup-batch-size <start batch size> '
-                       '                      <batch size incerement> '
-                       '                      <ramp-up samples> '
-                       'For example:'
-                       '   --rampup-batch-size 16 8 300000 \\ '
-                       '   --global-batch-size 1024'
-                       'will start with global batch size 16 and over '
-                       ' (1024 - 16) / 8 = 126 intervals will increase'
-                       'the batch size linearly to 1024. In each interval'
-                       'we will use approximately 300000 / 126 = 2380 samples.')
-    group.add_argument('--decrease-batch-size-if-needed', action='store_true', default=False,
-                       help='If set, decrease batch size if microbatch_size * dp_size'
-                       'does not divide batch_size. Useful for KSO (Keep Soldiering On)'
-                       'to continue making progress if number of healthy GPUs (and'
-                       'corresponding dp_size) does not support current batch_size.'
-                       'Old batch_size will be restored if training is re-started with'
-                       'dp_size that divides batch_size // microbatch_size.')
     group.add_argument('--recompute-activations', action='store_true',
                        help='recompute activation to allow for training '
                        'with larger models, sequences, and batch sizes.')
@@ -2221,8 +2195,6 @@ def _add_training_args(parser):
                        help='Global step to start profiling.')
     group.add_argument('--profile-step-end', type=int, default=12,
                        help='Global step to stop profiling.')
-    group.add_argument('--iterations-to-skip', nargs='+', type=int, default=[],
-                       help='List of iterations to skip, empty by default.')
     group.add_argument('--result-rejected-tracker-filename', type=str, default=None,
                        help='Optional name of file tracking `result_rejected` events.')
     group.add_argument('--disable-gloo-process-groups', action='store_false',
@@ -2265,47 +2237,19 @@ def _add_training_args(parser):
     group.add_argument('--use-cpu-initialization', action='store_true',
                        default=None,
                        help='If set, initialize weights on the CPU. This eliminates init differences based on tensor parallelism.')
-    group.add_argument('--empty-unused-memory-level', default=0, type=int,
-                       choices=[0, 1, 2],
-                       help='Call torch.cuda.empty_cache() each iteration '
-                       '(training and eval), to reduce fragmentation.'
-                       '0=off, 1=moderate, 2=aggressive.')
     group.add_argument('--deterministic-mode', action='store_true',
                        help='Choose code that has deterministic execution. This usually '
                        'means slower execution, but is good for debugging and testing.')
-    group.add_argument('--check-weight-hash-across-dp-replicas-interval', type=int, default=None,
-                       help='Interval to check weight hashes are same across DP replicas. If not specified, weight hashes not checked.')
     group.add_argument('--calculate-per-token-loss', action='store_true',
                        help=('Scale cross entropy loss by the number of non-padded tokens in the '
                              'global batch, versus the default behavior of assuming all tokens are non-padded.'))
-    group.add_argument('--train-sync-interval', type=int, default=None,
-                       help='Training CPU-GPU synchronization interval, to ensure that CPU is not running too far ahead of GPU.')
 
     # deprecated
     group.add_argument('--checkpoint-activations', action='store_true',
                        help='Checkpoint activation to allow for training '
                        'with larger models, sequences, and batch sizes.')
-    group.add_argument('--train-iters', type=int, default=None,
-                       help='Total number of iterations to train over all '
-                       'training runs. Note that either train-iters or '
-                       'train-samples should be provided.')
-    group.add_argument('--train-samples', type=int, default=None,
-                       help='Total number of samples to train over all '
-                       'training runs. Note that either train-iters or '
-                       'train-samples should be provided.')
     group.add_argument('--log-interval', type=int, default=100,
                        help='Report loss and timing interval.')
-    group.add_argument('--exit-interval', type=int, default=None,
-                       help='Exit the program after the iteration is divisible '
-                       'by this value.')
-    group.add_argument('--exit-duration-in-mins', type=int, default=None,
-                       help='Exit the program after this many minutes.')
-    group.add_argument('--exit-signal-handler', action='store_true',
-                       help='Dynamically save the checkpoint and shutdown the '
-                       'training if signal is received')
-    group.add_argument('--exit-signal', type=str, default='SIGTERM',
-                       choices=list(SIGNAL_MAP.keys()),
-                       help='Signal to use for exit signal handler. If not specified, defaults to SIGTERM.')
     group.add_argument('--tensorboard-dir', type=str, default=None,
                        help='Write TensorBoard logs to this directory.')
     group.add_argument('--no-masked-softmax-fusion',
@@ -2399,22 +2343,6 @@ def _add_training_args(parser):
                        '--use-legacy-models to not use core models.')
     group.add_argument('--use-legacy-models', action='store_true',
                        help='Use the legacy Megatron models, not Megatron-Core models.')
-    group.add_argument('--manual-gc', action='store_true',
-                       help='Disable the threshold-based default garbage '
-                       'collector and trigger the garbage collection manually. '
-                       'Manual garbage collection helps to align the timing of '
-                       'the collection across ranks which mitigates the impact '
-                       'of CPU-associated jitters. When the manual gc is enabled, '
-                       'garbage collection is performed only at the start and the '
-                       'end of the validation routine by default.')
-    group.add_argument('--manual-gc-interval', type=int, default=0,
-                       help='Training step interval to trigger manual garbage '
-                       'collection. When the value is set to 0, garbage '
-                       'collection is not triggered between training steps.')
-    group.add_argument('--no-manual-gc-eval', action='store_false',
-                       help='When using manual garbage collection, disable '
-                       'garbage collection at the start and the end of each '
-                       'evaluation run.', dest='manual_gc_eval')
     group.add_argument('--disable-tp-comm-split-ag', action='store_false',
                        help='Disables the All-Gather overlap with fprop GEMM.',
                        dest='tp_comm_split_ag')
@@ -2923,20 +2851,10 @@ def _add_distributed_args(parser):
 
 
 def _add_validation_args(parser):
-    group = parser.add_argument_group(title='validation')
-
-    group.add_argument('--full-validation', action='store_true', help='If set, each time validation occurs it uses the full validation dataset(s). This currently only works for GPT datasets!')
-    group.add_argument('--multiple-validation-sets', action='store_true', help='If set, multiple datasets listed in the validation split are evaluated independently with a separate loss for each dataset in the list. This argument requires that no weights are included in the list')
-    group.add_argument('--eval-iters', type=int, default=100,
-                       help='Number of iterations to run for evaluation'
-                       'validation/test for.')
-    group.add_argument('--eval-interval', type=int, default=1000,
-                       help='Interval between running evaluation on '
-                       'validation set.')
-    group.add_argument("--test-mode", action="store_true", help='Run all real-time test alongside the experiment.')
-    group.add_argument('--skip-train', action='store_true',
-                       default=False, help='If set, bypass the training loop, '
-                       'optionally do evaluation for validation/test, and exit.')
+    from megatron.training.config import ValidationConfig
+
+    val_factory = ArgumentGroupFactory(ValidationConfig)
+    group = val_factory.build_group(parser, "validation")
 
     return parser
 
diff --git a/megatron/training/config.py b/megatron/training/config.py
new file mode 100644
index 00000000000..d978083372d
--- /dev/null
+++ b/megatron/training/config.py
@@ -0,0 +1,116 @@
+# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+from dataclasses import dataclass, field
+import signal
+from typing import Literal
+
+@dataclass(kw_only=True)
+class TrainingConfig:
+    """Configuration settings related to the training loop."""
+
+    micro_batch_size: int | None = None
+    """Batch size per model instance (local batch size). Global batch size is local batch size times
+    data parallel size times number of micro batches."""
+
+    global_batch_size: int | None = None
+    """Training batch size. If set, it should be a multiple of micro-batch-size times
+    data-parallel-size. If this value is None, then use micro-batch-size * data-parallel-size
+    as the global batch size. This choice will result in 1 for number of micro-batches."""
+
+    rampup_batch_size: list[int] | None = field(default=None, metadata={"argparse_meta": {"nargs": 3}})
+    """Batch size ramp up with the following values: <start batch size>, <batch size increment>,
+    <ramp-up samples>
+    For example:
+        rampup-batch-size = [16, 8, 300000]
+        global-batch-size 1024
+    will start with global batch size 16 and over (1024 - 16) / 8 = 126 intervals will increase
+    the batch size linearly to 1024. In each interval we will use approximately
+    300000 / 126 = 2380 samples.
+    """
+
+    decrease_batch_size_if_needed: bool = False
+    """If set, decrease batch size if microbatch_size * dp_size does not 
+    divide batch_size. Old batch_size will be restored if training is re-started 
+    with dp_size that divides batch_size // microbatch_size."""
+
+    empty_unused_memory_level: Literal[0, 1, 2] = 0
+    """Call torch.cuda.empty_cache() each iteration (training and eval), to reduce fragmentation.
+    0=off, 1=moderate, 2=aggressive.
+    """
+
+    check_weight_hash_across_dp_replicas_interval: int | None = None
+    """Interval to check weight hashes are same across DP replicas. If not specified, weight hashes not checked."""
+
+    train_sync_interval: int | None = None
+    """Training CPU-GPU synchronization interval, to ensure that CPU is not running too far ahead of GPU."""
+
+    train_iters: int | None = None
+    """Total number of iterations to train over all training runs.
+    Note that either train_iters or train_samples should be provided.
+    """
+
+    train_samples: int | None = None
+    """Total number of samples to train over all training runs.
+    Note that either train_iters or train_samples should be provided."""
+
+    exit_interval: int | None = None
+    """Exit the program after the iteration is divisible by this value."""
+
+    exit_duration_in_mins: int | None = None
+    """Exit the program after this many minutes."""
+
+    exit_signal_handler: bool = False
+    """Dynamically save the checkpoint and shutdown the training if SIGTERM is received"""
+
+    exit_signal: signal.Signals = signal.SIGTERM
+    """Signal for the signal handler to detect."""
+
+    exit_signal_handler_for_dataloader: bool = False
+    """Use signal handler for dataloader workers"""
+
+    manual_gc: bool = False
+    """Disable the threshold-based default garbage collector and trigger the garbage collection
+    manually. Manual garbage collection helps to align the timing of the collection across ranks
+    which mitigates the impact of CPU-associated jitters. When the manual gc is enabled, garbage
+    collection is performed only at the start and the end of the validation routine by default."""
+
+    manual_gc_interval: int = 0
+    """Training step interval to trigger manual garbage collection. Values > 0 will trigger garbage
+    collections between training steps.
+    """
+
+    manual_gc_eval: bool = True
+    """When using manual garbage collection, this controls garbage collection at the start and the
+    end of each evaluation run.
+    """
+
+    iterations_to_skip: list[int] = field(default_factory=list)
+    """List of iterations to skip during training, empty by default."""
+
+
+@dataclass(kw_only=True)
+class ValidationConfig:
+    """Configuration settings related to validation during or after model training."""
+
+    eval_iters: int | None = 100
+    """Number of iterations to run for evaluation. Used for both validation and test. If not set,
+    evaluation will not run."""
+
+    eval_interval: int | None = None
+    """Interval between running evaluation on validation set. If not set, evaluation will not run
+    during training.
+    """
+
+    skip_train: bool = False
+    """If set, bypass the training loop, perform evaluation for validation/test, and exit."""
+
+    test_mode: bool = False
+    """Run all real-time test alongside the experiment."""
+
+    full_validation: bool = False
+    """If set, each time validation occurs it uses the full validation dataset(s). This currently only works for GPT datasets!"""
+
+    multiple_validation_sets: bool = False
+    """If set, multiple datasets listed in the validation split are evaluated independently with a
+       separate loss for each dataset in the list. This argument requires that no weights are 
+       included in the list.
+    """
diff --git a/megatron/training/dist_signal_handler.py b/megatron/training/dist_signal_handler.py
index f1f3725c8a9..0ecd706fdc7 100644
--- a/megatron/training/dist_signal_handler.py
+++ b/megatron/training/dist_signal_handler.py
@@ -3,13 +3,6 @@
 
 import torch
 
-SIGNAL_MAP = {
-    'SIGTERM': signal.SIGTERM,
-    'SIGINT': signal.SIGINT,
-    'SIGUSR1': signal.SIGUSR1,
-    'SIGUSR2': signal.SIGUSR2
-}
-
 def get_world_size():
     if torch.distributed.is_available() and torch.distributed.is_initialized():
         world_size = torch.distributed.get_world_size()
@@ -55,8 +48,8 @@ def all_gather_item(item, dtype, group=None, async_op=False, local_rank=None):
 
 
 class DistributedSignalHandler:
-    def __init__(self, sig: str = 'SIGTERM'):
-        self.sig = SIGNAL_MAP.get(sig, signal.SIGTERM)
+    def __init__(self, sig: signal.Signals = signal.SIGTERM):
+        self.sig = sig
 
     def signals_received(self):
         all_received = all_gather_item(
diff --git a/tests/unit_tests/test_argument_utils.py b/tests/unit_tests/test_argument_utils.py
new file mode 100644
index 00000000000..e5744c3b074
--- /dev/null
+++ b/tests/unit_tests/test_argument_utils.py
@@ -0,0 +1,643 @@
+# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+
+import signal
+from argparse import ArgumentError, ArgumentParser
+from dataclasses import dataclass, field
+from typing import Callable, Literal, Optional, Union
+
+import pytest
+
+from megatron.training.argument_utils import ArgumentGroupFactory, TypeInferenceError
+
+
+@dataclass
+class DummyConfig:
+    """A dummy configuration for testing."""
+
+    name: str = "default_name"
+    """Name of the configuration"""
+
+    count: int = 42
+    """Number of items"""
+
+    learning_rate: float = 0.001
+    """Learning rate for training"""
+
+    enabled: bool = False
+    """Whether feature is enabled"""
+
+    disabled_feature: bool = True
+    """Feature that is disabled by default"""
+
+    enum_setting: signal.Signals = signal.SIGTERM
+    """Setting with enum type to test enum handling"""
+
+
+@dataclass
+class ConfigWithOptional:
+    """Config with optional fields."""
+
+    required_field: str = "required"
+    """A required field"""
+
+    optional_field: Optional[int] = None
+    """An optional integer field"""
+
+    optional_str: Optional[str] = "default"
+    """An optional string with default"""
+
+    int_new_form: int | None = None
+    """Optional using new syntax"""
+
+    str_new_form: str | None = "default"
+    """Optional string using new syntax"""
+
+
+@dataclass
+class ConfigWithList:
+    """Config with list fields."""
+
+    tags: list[str] = field(default_factory=list)
+    """List of tags"""
+
+    numbers: list[int] = field(default_factory=lambda: [1, 2, 3])
+    """List of numbers with default"""
+
+
+@dataclass
+class ConfigWithLiteral:
+    """Config with Literal types."""
+
+    mode: Literal["train", "eval", "test"] = "train"
+    """Operating mode"""
+
+    precision: Literal[16, 32] = 32
+    """Precision level"""
+
+
+class TestArgumentGroupFactoryBasic:
+    """Test basic functionality of ArgumentGroupFactory."""
+
+    def test_creates_argument_group(self):
+        """Test that build_group creates an argument group."""
+        parser = ArgumentParser()
+        factory = ArgumentGroupFactory(DummyConfig)
+
+        arg_group = factory.build_group(parser, title="Test Group")
+
+        assert arg_group is not None
+        assert arg_group.title == "Test Group"
+        assert arg_group.description == DummyConfig.__doc__
+
+    def test_all_fields_added(self):
+        """Test that all dataclass fields are added as arguments."""
+        parser = ArgumentParser()
+        factory = ArgumentGroupFactory(DummyConfig)
+
+        factory.build_group(parser, title="Test Group")
+
+        # Parse empty args to get all defaults
+        args = parser.parse_args([])
+
+        # Check all fields exist
+        assert hasattr(args, 'name')
+        assert hasattr(args, 'count')
+        assert hasattr(args, 'learning_rate')
+        assert hasattr(args, 'enabled')
+        assert hasattr(args, 'disabled_feature')
+
+    def test_default_values_preserved(self):
+        """Test that default values from dataclass are preserved."""
+        parser = ArgumentParser()
+        factory = ArgumentGroupFactory(DummyConfig)
+
+        factory.build_group(parser, title="Test Group")
+        args = parser.parse_args([])
+
+        assert args.name == "default_name"
+        assert args.count == 42
+        assert args.learning_rate == 0.001
+        assert args.enabled == False
+        assert args.disabled_feature == True
+
+    def test_argument_types(self):
+        """Test that argument types are correctly inferred."""
+        parser = ArgumentParser()
+        factory = ArgumentGroupFactory(DummyConfig)
+
+        factory.build_group(parser, title="Test Group")
+
+        # Parse with actual values
+        args = parser.parse_args(
+            ['--name', 'test_name', '--count', '100', '--learning-rate', '0.01']
+        )
+
+        assert isinstance(args.name, str)
+        assert args.name == 'test_name'
+        assert isinstance(args.count, int)
+        assert args.count == 100
+        assert isinstance(args.learning_rate, float)
+        assert args.learning_rate == 0.01
+
+    def test_boolean_store_true(self):
+        """Test that boolean fields with default False use store_true."""
+        parser = ArgumentParser()
+        factory = ArgumentGroupFactory(DummyConfig)
+
+        factory.build_group(parser, title="Test Group")
+
+        # Without flag, should be False
+        args = parser.parse_args([])
+        assert args.enabled == False
+
+        # With flag, should be True
+        args = parser.parse_args(['--enabled'])
+        assert args.enabled == True
+
+    def test_boolean_store_false(self):
+        """Test that boolean fields with default True use store_false with no- prefix."""
+        parser = ArgumentParser()
+        factory = ArgumentGroupFactory(DummyConfig)
+
+        factory.build_group(parser, title="Test Group")
+
+        # Without flag, should be True
+        args = parser.parse_args([])
+        assert args.disabled_feature == True
+
+        # With --no- flag, should be False
+        args = parser.parse_args(['--no-disabled-feature'])
+        assert args.disabled_feature == False
+
+        # With --disable- flag, should also be False
+        args = parser.parse_args(['--disable-disabled-feature'])
+        assert args.disabled_feature == False
+
+    def test_field_docstrings_as_help(self):
+        """Test that field docstrings are extracted and used as help text."""
+        parser = ArgumentParser()
+        factory = ArgumentGroupFactory(DummyConfig)
+
+        # Check that field_docstrings were extracted
+        assert 'name' in factory.field_docstrings
+        assert factory.field_docstrings['name'] == "Name of the configuration"
+        assert factory.field_docstrings['count'] == "Number of items"
+        assert factory.field_docstrings['learning_rate'] == "Learning rate for training"
+
+    def test_enum_handling(self):
+        """Test that enum types are handled correctly."""
+        parser = ArgumentParser(exit_on_error=False)
+        factory = ArgumentGroupFactory(DummyConfig)
+
+        factory.build_group(parser, title="Test Group")
+
+        args = parser.parse_args([])
+        assert args.enum_setting == signal.SIGTERM
+
+        # test a different valid enum value
+        args = parser.parse_args(["--enum-setting", "SIGINT"])
+        assert args.enum_setting == signal.SIGINT
+
+        # test an invalid enum value
+        with pytest.raises(KeyError, match="sigbar"):
+            parser.parse_args(["--enum-setting", "sigbar"])
+
+
+class TestArgumentGroupFactoryExclusion:
+    """Test exclusion functionality."""
+
+    def test_exclude_single_field(self):
+        """Test excluding a single field."""
+        parser = ArgumentParser()
+        factory = ArgumentGroupFactory(DummyConfig, exclude=['count'])
+
+        factory.build_group(parser, title="Test Group")
+        args = parser.parse_args([])
+
+        # Excluded field should not exist
+        assert hasattr(args, 'name')
+        assert not hasattr(args, 'count')
+        assert hasattr(args, 'learning_rate')
+
+    def test_exclude_multiple_fields(self):
+        """Test excluding multiple fields."""
+        parser = ArgumentParser()
+        factory = ArgumentGroupFactory(DummyConfig, exclude=['count', 'learning_rate'])
+
+        factory.build_group(parser, title="Test Group")
+        args = parser.parse_args([])
+
+        assert hasattr(args, 'name')
+        assert not hasattr(args, 'count')
+        assert not hasattr(args, 'learning_rate')
+        assert hasattr(args, 'enabled')
+
+
+class TestArgumentGroupFactoryOptional:
+    """Test handling of Optional types."""
+
+    def test_optional_fields(self):
+        """Test that Optional fields are handled correctly."""
+        parser = ArgumentParser()
+        factory = ArgumentGroupFactory(ConfigWithOptional)
+
+        factory.build_group(parser, title="Test Group")
+
+        # Default values
+        args = parser.parse_args([])
+        assert args.required_field == "required"
+        assert args.optional_field is None
+        assert args.optional_str == "default"
+
+        # Provided values
+        args = parser.parse_args(
+            ['--required-field', 'new_value', '--optional-field', '123', '--optional-str', 'custom']
+        )
+        assert args.required_field == "new_value"
+        assert args.optional_field == 123
+        assert args.optional_str == "custom"
+
+
+class TestArgumentGroupFactoryList:
+    """Test handling of list types."""
+
+    def test_list_fields_with_default_factory(self):
+        """Test that list fields use nargs='+'."""
+        parser = ArgumentParser()
+        factory = ArgumentGroupFactory(ConfigWithList)
+
+        factory.build_group(parser, title="Test Group")
+
+        # Default values
+        args = parser.parse_args([])
+        assert args.tags == []
+        assert args.numbers == [1, 2, 3]
+
+        # Provided values
+        args = parser.parse_args(['--tags', 'tag1', 'tag2', 'tag3', '--numbers', '10', '20', '30'])
+        assert args.tags == ['tag1', 'tag2', 'tag3']
+        assert args.numbers == [10, 20, 30]
+
+
+class TestArgumentGroupFactoryLiteral:
+    """Test handling of Literal types."""
+
+    def test_literal_fields_have_choices(self):
+        """Test that Literal types create choice constraints."""
+        parser = ArgumentParser()
+        factory = ArgumentGroupFactory(ConfigWithLiteral)
+
+        factory.build_group(parser, title="Test Group")
+
+        # Default values
+        args = parser.parse_args([])
+        assert args.mode == "train"
+        assert args.precision == 32
+
+        # Valid choices
+        args = parser.parse_args(['--mode', 'eval', '--precision', '16'])
+        assert args.mode == "eval"
+        assert args.precision == 16
+
+    def test_literal_fields_reject_invalid_choices(self):
+        """Test that invalid Literal choices are rejected."""
+        parser = ArgumentParser()
+        factory = ArgumentGroupFactory(ConfigWithLiteral)
+
+        factory.build_group(parser, title="Test Group")
+
+        # Invalid choice should raise error
+        with pytest.raises(SystemExit):
+            parser.parse_args(['--mode', 'invalid'])
+
+        with pytest.raises(SystemExit):
+            parser.parse_args(['--precision', '64'])
+
+
+class TestArgumentGroupFactoryHelpers:
+    """Test helper methods."""
+
+    def test_format_arg_name_basic(self):
+        """Test basic argument name formatting."""
+        factory = ArgumentGroupFactory(DummyConfig)
+
+        assert factory._format_arg_name("simple") == "--simple"
+        assert factory._format_arg_name("with_underscore") == "--with-underscore"
+        assert factory._format_arg_name("multiple_under_scores") == "--multiple-under-scores"
+
+    def test_format_arg_name_with_prefix(self):
+        """Test argument name formatting with prefix."""
+        factory = ArgumentGroupFactory(DummyConfig)
+
+        assert factory._format_arg_name("feature", prefix="no") == "--no-feature"
+        assert factory._format_arg_name("feature", prefix="disable") == "--disable-feature"
+        assert factory._format_arg_name("multi_word", prefix="no") == "--no-multi-word"
+
+    def test_extract_type_primitive(self):
+        """Test type extraction for primitive types."""
+        factory = ArgumentGroupFactory(DummyConfig)
+
+        assert factory._extract_type(int) == {"type": int}
+        assert factory._extract_type(str) == {"type": str}
+        assert factory._extract_type(float) == {"type": float}
+
+    def test_extract_type_optional(self):
+        """Test type extraction for Optional types."""
+        factory = ArgumentGroupFactory(DummyConfig)
+
+        result = factory._extract_type(Optional[int])
+        assert result == {"type": int}
+
+        result = factory._extract_type(Optional[str])
+        assert result == {"type": str}
+
+    def test_extract_type_list(self):
+        """Test type extraction for list types."""
+        factory = ArgumentGroupFactory(DummyConfig)
+
+        result = factory._extract_type(list[int])
+        assert result == {"type": int, "nargs": "+"}
+
+        result = factory._extract_type(list[str])
+        assert result == {"type": str, "nargs": "+"}
+
+    def test_extract_type_literal(self):
+        """Test type extraction for Literal types."""
+        factory = ArgumentGroupFactory(DummyConfig)
+
+        result = factory._extract_type(Literal["a", "b", "c"])
+        assert result == {"type": str, "choices": ("a", "b", "c")}
+
+        result = factory._extract_type(Literal[1, 2, 3])
+        assert result == {"type": int, "choices": (1, 2, 3)}
+
+
+@dataclass
+class ConfigWithArgparseMeta:
+    """Config with argparse_meta metadata for testing overrides."""
+
+    custom_help: str = field(
+        default="default_value",
+        metadata={"argparse_meta": {"help": "Custom help text from metadata"}},
+    )
+    """Original help text"""
+
+    custom_type: str = field(default="100", metadata={"argparse_meta": {"type": int}})
+    """Field with type override"""
+
+    custom_default: str = field(
+        default="original_default", metadata={"argparse_meta": {"default": "overridden_default"}}
+    )
+    """Field with default override"""
+
+    custom_choices: str = field(
+        default="option1",
+        metadata={"argparse_meta": {"choices": ["option1", "option2", "option3"]}},
+    )
+    """Field with choices override"""
+
+    custom_dest: str = field(
+        default="value", metadata={"argparse_meta": {"dest": "renamed_destination"}}
+    )
+    """Field with dest override"""
+
+    custom_action: bool = field(
+        default=False,
+        metadata={"argparse_meta": {"action": "store_const", "const": "special_value"}},
+    )
+    """Field with custom action override"""
+
+    multiple_overrides: int = field(
+        default=42,
+        metadata={
+            "argparse_meta": {
+                "type": str,
+                "help": "Multiple overrides applied",
+                "default": "999",
+                "dest": "multi_override_dest",
+            }
+        },
+    )
+    """Field with multiple metadata overrides"""
+
+    nargs_override: str = field(default="single", metadata={"argparse_meta": {"nargs": "?"}})
+    """Field with nargs override"""
+
+
+@dataclass
+class ConfigWithUnsupportedCallables:
+    """Config with argparse_meta metadata for testing overrides."""
+
+    unsupported_type: Optional[Callable] = None
+    """Cannot take a callable over CLI"""
+
+    unsupported_with_metadata: Optional[Callable] = field(
+        default=None, metadata={"argparse_meta": {"type": int, "choices": (0, 1, 2)}}
+    )
+    """This argument should be 0, 1, or 2. The appropriate
+    Callable will be set by some other logic.
+    """
+
+
+@dataclass
+class ConfigWithUnsupportedUnions:
+    """Config with argparse_meta metadata for testing overrides."""
+
+    unsupported_type: Union[int, str] = 0
+    """Cannot infer type of a Union"""
+
+    unsupported_with_metadata: Union[int, str] = field(
+        default=0, metadata={"argparse_meta": {"type": str, "choices": ("foo", "bar")}}
+    )
+    """Metadata should take precedence over the exception caused by Union"""
+
+
+class TestArgumentGroupFactoryArgparseMeta:
+    """Test argparse_meta metadata override functionality."""
+
+    def test_help_override(self):
+        """Test that argparse_meta can override help text."""
+        parser = ArgumentParser()
+        factory = ArgumentGroupFactory(ConfigWithArgparseMeta)
+
+        factory.build_group(parser, title="Test Group")
+
+        # Find the action for this argument
+        for action in parser._actions:
+            if hasattr(action, 'dest') and action.dest == 'custom_help':
+                assert action.help == "Custom help text from metadata"
+                return
+
+        pytest.fail("custom_help argument not found")
+
+    def test_type_override(self):
+        """Test that argparse_meta can override argument type."""
+        parser = ArgumentParser()
+        factory = ArgumentGroupFactory(ConfigWithArgparseMeta)
+
+        factory.build_group(parser, title="Test Group")
+
+        # Parse with integer value (metadata overrides type to int)
+        args = parser.parse_args(['--custom-type', '42'])
+
+        # Should be parsed as int, not str
+        assert isinstance(args.custom_type, int)
+        assert args.custom_type == 42
+
+    def test_default_override(self):
+        """Test that argparse_meta can override default value."""
+        parser = ArgumentParser()
+        factory = ArgumentGroupFactory(ConfigWithArgparseMeta)
+
+        factory.build_group(parser, title="Test Group")
+
+        # Parse with no arguments
+        args = parser.parse_args([])
+
+        # Should use metadata default, not field default
+        assert args.custom_default == "overridden_default"
+
+    def test_choices_override(self):
+        """Test that argparse_meta can override choices."""
+        parser = ArgumentParser()
+        factory = ArgumentGroupFactory(ConfigWithArgparseMeta)
+
+        factory.build_group(parser, title="Test Group")
+
+        # Valid choice from metadata
+        args = parser.parse_args(['--custom-choices', 'option2'])
+        assert args.custom_choices == "option2"
+
+        # Invalid choice should fail
+        with pytest.raises(SystemExit):
+            parser.parse_args(['--custom-choices', 'invalid_option'])
+
+    def test_dest_override(self):
+        """Test that argparse_meta can override destination name."""
+        parser = ArgumentParser()
+        factory = ArgumentGroupFactory(ConfigWithArgparseMeta)
+
+        factory.build_group(parser, title="Test Group")
+
+        args = parser.parse_args(['--custom-dest', 'test_value'])
+
+        # Should be stored in renamed destination
+        assert hasattr(args, 'renamed_destination')
+        assert args.renamed_destination == "test_value"
+
+    def test_action_override(self):
+        """Test that argparse_meta can override action."""
+        parser = ArgumentParser()
+        factory = ArgumentGroupFactory(ConfigWithArgparseMeta)
+
+        factory.build_group(parser, title="Test Group")
+
+        # With custom action=store_const and const="special_value"
+        args = parser.parse_args(['--custom-action'])
+        assert args.custom_action == "special_value"
+
+        # Without flag, should use default
+        args = parser.parse_args([])
+        assert args.custom_action == False
+
+    def test_multiple_overrides(self):
+        """Test that multiple argparse_meta overrides work together."""
+        parser = ArgumentParser()
+        factory = ArgumentGroupFactory(ConfigWithArgparseMeta)
+
+        factory.build_group(parser, title="Test Group")
+
+        # Parse with no arguments to check default override
+        args = parser.parse_args([])
+
+        # Check all overrides applied
+        assert hasattr(args, 'multi_override_dest')
+        assert args.multi_override_dest == "999"  # default override
+
+        # Parse with value to check type override
+        args = parser.parse_args(['--multiple-overrides', 'text_value'])
+        assert isinstance(args.multi_override_dest, str)  # type override
+        assert args.multi_override_dest == "text_value"
+
+        # Check help override was applied
+        for action in parser._actions:
+            if hasattr(action, 'dest') and action.dest == 'multi_override_dest':
+                assert action.help == "Multiple overrides applied"
+                break
+
+    def test_nargs_override(self):
+        """Test that argparse_meta can override nargs."""
+        parser = ArgumentParser()
+        factory = ArgumentGroupFactory(ConfigWithArgparseMeta)
+
+        factory.build_group(parser, title="Test Group")
+
+        # With nargs='?', argument is optional
+        args = parser.parse_args(['--nargs-override'])
+        assert args.nargs_override is None  # No value provided with '?'
+
+        # With value
+        args = parser.parse_args(['--nargs-override', 'provided_value'])
+        assert args.nargs_override == "provided_value"
+
+        # Without flag at all, should use default
+        args = parser.parse_args([])
+        assert args.nargs_override == "single"
+
+    def test_metadata_takes_precedence_over_inference(self):
+        """Test that metadata has highest precedence over type inference."""
+        parser = ArgumentParser()
+        factory = ArgumentGroupFactory(ConfigWithArgparseMeta)
+
+        # Build kwargs for custom_type field which is str but metadata says int
+        from dataclasses import fields as dc_fields
+
+        for f in dc_fields(ConfigWithArgparseMeta):
+            if f.name == 'custom_type':
+                kwargs = factory._build_argparse_kwargs_from_field(f)
+                # Metadata type should override inferred type
+                assert kwargs['type'] == int
+                break
+
+    def test_unhandled_unsupported_callables(self):
+        """Test that an unsupported type produces a TypInferenceError."""
+        parser = ArgumentParser()
+        factory = ArgumentGroupFactory(
+            ConfigWithUnsupportedCallables, exclude=["unsupported_with_metadata"]
+        )
+
+        with pytest.raises(TypeInferenceError, match="Unsupported type"):
+            factory.build_group(parser, title="Test Group")
+
+    def test_handled_unsupported_callables(self):
+        """Test an attribute with an unsupported type that has type info in the metadata."""
+        parser = ArgumentParser()
+        factory = ArgumentGroupFactory(ConfigWithUnsupportedCallables, exclude=["unsupported_type"])
+
+        factory.build_group(parser, title="Test Group")
+
+        args = parser.parse_args(['--unsupported-with-metadata', '0'])
+        assert args.unsupported_with_metadata == 0
+
+    def test_unhandled_unsupported_unions(self):
+        """Test that an unsupported type produces a TypInferenceError."""
+        parser = ArgumentParser()
+        factory = ArgumentGroupFactory(
+            ConfigWithUnsupportedUnions, exclude=["unsupported_with_metadata"]
+        )
+
+        with pytest.raises(TypeInferenceError, match="Unions not supported by argparse"):
+            factory.build_group(parser, title="Test Group")
+
+    def test_handled_unsupported_unions(self):
+        """Test an attribute with an unsupported type that has type info in the metadata."""
+        parser = ArgumentParser(exit_on_error=False)
+        factory = ArgumentGroupFactory(ConfigWithUnsupportedUnions, exclude=["unsupported_type"])
+
+        factory.build_group(parser, title="Test Group")
+
+        args = parser.parse_args(['--unsupported-with-metadata', 'foo'])
+        assert args.unsupported_with_metadata == 'foo'
+
+        with pytest.raises(ArgumentError, match="invalid choice"):
+            args = parser.parse_args(['--unsupported-with-metadata', 'baz'])

From 288b8ea985221e6dc6dead2fa088b1899419f537 Mon Sep 17 00:00:00 2001
From: Robin Zhang <robinz@nvidia.com>
Date: Wed, 17 Dec 2025 12:01:13 +0800
Subject: [PATCH 194/334] [Dev] Optimize TE CUDA Graph _get_sample_arguments()
 Time (#2568)

Signed-off-by: Robin Zhang <robinz@nvidia.com>
---
 megatron/core/transformer/cuda_graphs.py      | 76 ++++++++++++++-----
 .../transformer/test_cuda_graphs.py           | 60 ++++++++-------
 2 files changed, 89 insertions(+), 47 deletions(-)

diff --git a/megatron/core/transformer/cuda_graphs.py b/megatron/core/transformer/cuda_graphs.py
index bcc90dc1240..6f75d67549e 100644
--- a/megatron/core/transformer/cuda_graphs.py
+++ b/megatron/core/transformer/cuda_graphs.py
@@ -1643,48 +1643,82 @@ def get_rotary_pos_emb(transformer_module, transformer_input):
         # with the same input signature.
         fwd_sample_queues = {}
         consumed_sample_queue = {}
+        layer_sample_keys_cache = {}
         fwd_idx = [0] * self.num_model_chunks
         for chunk_id in order:
             model_chunk_idx = abs(chunk_id) - 1
 
             if chunk_id > 0:
+                if model_chunk_idx not in fwd_sample_queues:
+                    fwd_sample_queues[model_chunk_idx] = []
+
                 sample_start_idx = (prefix_num_layers[model_chunk_idx] * self.num_microbatches) + (
                     fwd_idx[model_chunk_idx] * self.num_layers_per_chunk[model_chunk_idx]
                 )
-                fwd_sample_idx = [
-                    sample_start_idx + i for i in range(self.num_layers_per_chunk[model_chunk_idx])
-                ]
-                if model_chunk_idx not in fwd_sample_queues:
-                    fwd_sample_queues[model_chunk_idx] = []
-                for per_callable_fwd_idx in fwd_sample_idx:
-                    if sample_args[per_callable_fwd_idx] is None:
+                for layer_idx, layer in enumerate(self.callables_per_chunk[model_chunk_idx]):
+                    per_callable_fwd_idx = sample_start_idx + layer_idx
+
+                    # Get sample_args and sample_kwargs for index per_callable_fwd_idx.
+                    assert (
+                        sample_args[per_callable_fwd_idx] is None
+                        and sample_kwargs[per_callable_fwd_idx] is None
+                    ), (
+                        f"sample_args and sample_kwargs must be None before assigning static data, "
+                        f"but got sample_args[{per_callable_fwd_idx}] = "
+                        f"{sample_args[per_callable_fwd_idx]} and "
+                        f"sample_kwargs[{per_callable_fwd_idx}] = "
+                        f"{sample_kwargs[per_callable_fwd_idx]}."
+                    )
+                    if id(layer) not in layer_sample_keys_cache:
+                        # Have not generated the static inputs for this layer yet. So we don't
+                        # know the input signature of this layer. Generate the static inputs, and
+                        # cache the signature.
                         sample_args[per_callable_fwd_idx], sample_kwargs[per_callable_fwd_idx] = (
                             _get_layer_static_inputs(
-                                self.callables_per_chunk[model_chunk_idx][
-                                    per_callable_fwd_idx - sample_start_idx
-                                ],
-                                self.chunks_with_decoder[model_chunk_idx],
+                                layer, self.chunks_with_decoder[model_chunk_idx]
                             )
                         )
-
-                    sample_args_keys = tuple(
-                        (t.shape, t.dtype, t.layout) for t in sample_args[per_callable_fwd_idx]
-                    )
-                    sample_kwargs_keys = tuple(
-                        (k, v.shape, v.dtype, v.layout)
-                        for k, v in sorted(sample_kwargs[per_callable_fwd_idx].items())
-                    )
-                    sample_keys = sample_args_keys + sample_kwargs_keys
+                        sample_args_keys = tuple(
+                            (t.shape, t.dtype, t.layout) for t in sample_args[per_callable_fwd_idx]
+                        )
+                        sample_kwargs_keys = tuple(
+                            (k, v.shape, v.dtype, v.layout)
+                            for k, v in sorted(sample_kwargs[per_callable_fwd_idx].items())
+                        )
+                        sample_keys = sample_args_keys + sample_kwargs_keys
+                        layer_sample_keys_cache[id(layer)] = sample_keys
+                    else:
+                        # Get signature from cache. This signature will be used to see if we can
+                        # reuse the static inputs of a previous forward pass for this forward pass.
+                        # If not, we still need to generate the new static inputs.
+                        sample_keys = layer_sample_keys_cache[id(layer)]
 
                     fwd_sample_queues[model_chunk_idx].append((sample_keys, per_callable_fwd_idx))
                     if consumed_sample_queue.get(sample_keys, []):
+                        # We can reuse the static inputs of a previous forward pass for this
+                        # forward pass, because they are of the same input signature and the
+                        # backward pass of the previous forward pass has completed.
                         reuse_fwd_idx = consumed_sample_queue[sample_keys].pop(0)
                         assert (
                             sample_args[reuse_fwd_idx] is not None
                             and sample_kwargs[reuse_fwd_idx] is not None
-                        ), "sample_args and sample_kwargs must not be None when reusing."
+                        ), (
+                            f"sample_args and sample_kwargs must not be None when reusing, but got "
+                            f"sample_args[{reuse_fwd_idx}] = {sample_args[reuse_fwd_idx]} and "
+                            f"sample_kwargs[{reuse_fwd_idx}] = {sample_kwargs[reuse_fwd_idx]}.",
+                        )
                         sample_args[per_callable_fwd_idx] = sample_args[reuse_fwd_idx]
                         sample_kwargs[per_callable_fwd_idx] = sample_kwargs[reuse_fwd_idx]
+
+                    if sample_args[per_callable_fwd_idx] is None:
+                        # Unfortunately, no previous static inputs are available for reuse,
+                        # sample_args is still None. Last attempt: generate the new static inputs
+                        # for this forward pass.
+                        sample_args[per_callable_fwd_idx], sample_kwargs[per_callable_fwd_idx] = (
+                            _get_layer_static_inputs(
+                                layer, self.chunks_with_decoder[model_chunk_idx]
+                            )
+                        )
                 fwd_idx[model_chunk_idx] += 1
             else:
                 num_consumed_samples = min(
diff --git a/tests/unit_tests/transformer/test_cuda_graphs.py b/tests/unit_tests/transformer/test_cuda_graphs.py
index 8133a3d2db0..7f49a559f32 100644
--- a/tests/unit_tests/transformer/test_cuda_graphs.py
+++ b/tests/unit_tests/transformer/test_cuda_graphs.py
@@ -742,7 +742,8 @@ def test_capture_freeze_gc(self):
         )
 
 
-# Global storage for comparing unique buffer counts across different num_microbatches, keyed by pp_size
+# Global storage for comparing unique buffer counts across different num_microbatches,
+# keyed by (pp_size, vpp_size)
 _unique_buffer_counts = {}
 
 
@@ -758,19 +759,25 @@ def teardown_method(self, method):
         # Note: _unique_buffer_counts is intentionally NOT cleared here so we can
         # compare values across parametrized test runs
 
-    @pytest.mark.parametrize("num_microbatches", [4, 16, 64, 256])
+    @pytest.mark.parametrize("num_microbatches", [16, 64, 256])
     @pytest.mark.parametrize("pp_size", [1, 2, 4])
-    def test_get_cuda_graph_input_data(self, num_microbatches, pp_size):
+    @pytest.mark.parametrize("vpp_size", [None, 2])
+    def test_get_cuda_graph_input_data(self, num_microbatches, pp_size, vpp_size):
         """Test _get_cuda_graph_input_data function in TECudaGraphHelper."""
 
+        if vpp_size and pp_size == 1:
+            pytest.skip("vpp_size must be None when pp_size is 1")
+
         Utils.initialize_model_parallel(
-            tensor_model_parallel_size=1, pipeline_model_parallel_size=pp_size
+            tensor_model_parallel_size=1,
+            pipeline_model_parallel_size=pp_size,
+            virtual_pipeline_model_parallel_size=vpp_size,
         )
 
         # Set up test configuration
         seq_length = 128
         micro_batch_size = 2
-        num_layers = 4
+        num_layers = 8
         vocab_size = 1024
         hidden_size = 64
         num_attention_heads = 4
@@ -796,6 +803,7 @@ def test_get_cuda_graph_input_data(self, num_microbatches, pp_size):
             bf16=True,
             tensor_model_parallel_size=1,
             pipeline_model_parallel_size=pp_size,
+            virtual_pipeline_model_parallel_size=vpp_size,
             pipeline_dtype=torch.bfloat16,
             context_parallel_size=1,
         )
@@ -804,21 +812,22 @@ def test_get_cuda_graph_input_data(self, num_microbatches, pp_size):
         torch.manual_seed(123)
         model_parallel_cuda_manual_seed(123)
 
-        gpt_model = GPTModel(
-            config=transformer_config,
-            transformer_layer_spec=get_gpt_layer_with_transformer_engine_spec(),
-            vocab_size=vocab_size,
-            max_sequence_length=seq_length,
-            parallel_output=True,
-            position_embedding_type="rope",
-        )
-
-        # Move model to CUDA
-        gpt_model.cuda()
+        model = []
+        for i in range(vpp_size or 1):
+            this_model = GPTModel(
+                config=transformer_config,
+                transformer_layer_spec=get_gpt_layer_with_transformer_engine_spec(),
+                vocab_size=vocab_size,
+                max_sequence_length=seq_length,
+                parallel_output=True,
+                position_embedding_type="rope",
+                vp_stage=i if vpp_size else None,
+            ).cuda()
+            model.append(this_model)
 
         # Initialize TECudaGraphHelper
         cuda_graph_helper = TECudaGraphHelper(
-            model=[gpt_model],
+            model=model,
             config=transformer_config,
             seq_length=seq_length,
             micro_batch_size=micro_batch_size,
@@ -936,11 +945,13 @@ def test_get_cuda_graph_input_data(self, num_microbatches, pp_size):
             f"should be <= total_entries ({total_entries})"
         )
         global _unique_buffer_counts
-        if pp_size not in _unique_buffer_counts:
-            _unique_buffer_counts[pp_size] = unique_buffer_count
+        # Use (pp_size, vpp_size) as key to track unique buffer counts per configuration
+        config_key = (pp_size, vpp_size)
+        if config_key not in _unique_buffer_counts:
+            _unique_buffer_counts[config_key] = unique_buffer_count
         else:
-            assert unique_buffer_count == _unique_buffer_counts[pp_size], (
-                f"Unique buffer count mismatch: expected {_unique_buffer_counts[pp_size]}, "
+            assert unique_buffer_count == _unique_buffer_counts[config_key], (
+                f"Unique buffer count mismatch: expected {_unique_buffer_counts[config_key]}, "
                 f"got {unique_buffer_count}"
             )
 
@@ -956,11 +967,8 @@ def test_get_cuda_graph_input_data(self, num_microbatches, pp_size):
                 "but all signatures are unique"
             )
 
-            # If we have duplicate signatures and the schedule allows it,
-            # some buffers should be reused (max_reuse > 1)
-            # Note: The exact amount of reuse depends on the schedule order
-            # With 1F1B interleaved schedule, we should see some reuse
-            if pp_size > num_microbatches:
+            # We tested with a large number of microbatches, so we should see some buffer reuse.
+            if pp_size > 1:
                 assert max_reuse > 1, "Expected some buffer reuse"
 
         # Verify that make_graphed_callables_kwargs contains expected keys

From 0eec631b2ea4e2ed3cb3ab847bcccf749a881d4b Mon Sep 17 00:00:00 2001
From: Yuzhong Wang <yuzhongw@nvidia.com>
Date: Wed, 17 Dec 2025 12:03:49 +0800
Subject: [PATCH 195/334] Reopen qwen3next functional test in lightweight mode
 (#2493)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
Co-authored-by: oliver könig <okoenig@nvidia.com>
---
 .gitlab/stages/00.pre.yml                     |  10 +-
 .../shell_test_utils/run_ci_test.sh           |   2 +
 .../golden_values_dev_dgx_h100.json           | 287 ------------------
 .../model_config.yaml                         |  12 +-
 tests/test_utils/recipes/gpt.yaml             |   2 +-
 5 files changed, 19 insertions(+), 294 deletions(-)
 delete mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_gdn/golden_values_dev_dgx_h100.json

diff --git a/.gitlab/stages/00.pre.yml b/.gitlab/stages/00.pre.yml
index 2210ddd7d02..ff9e4e5178b 100644
--- a/.gitlab/stages/00.pre.yml
+++ b/.gitlab/stages/00.pre.yml
@@ -49,7 +49,7 @@ pre:create_ci_branches:
   stage: .pre
   image: python:3.10
   variables:
-    GIT_STRATEGY: "clone"
+    GIT_STRATEGY: 'clone'
   script:
     - git remote set-url origin "https://gitlab-ci-token:${PROJECT_ACCESS_TOKEN_MCORE}@${GITLAB_ENDPOINT}/adlr/megatron-lm.git"
     - git switch --force-create $branch
@@ -80,7 +80,7 @@ pre:create_ci_branches_dev:
   stage: .pre
   image: python:3.10
   variables:
-    GIT_STRATEGY: "clone"
+    GIT_STRATEGY: 'clone'
   script:
     - git remote set-url origin "https://gitlab-ci-token:${PROJECT_ACCESS_TOKEN_MCORE}@${GITLAB_ENDPOINT}/adlr/megatron-lm.git"
     - git switch --force-create $branch
@@ -103,7 +103,7 @@ pre:label_merge_request:
     - cd gitlab-mr-labeler
     - go install .
     - cd ..
-    - go install github.com/itchyny/gojq/cmd/gojq@latest
+    - go install github.com/itchyny/gojq/cmd/gojq@v0.12.17
   script:
     - set -x
     - |
@@ -137,7 +137,7 @@ pre:maybe_cherry_pick_to_main:
   stage: .pre
   image: nentangso/alpine-git-curl-jq
   variables:
-    GIT_STRATEGY: "clone"
+    GIT_STRATEGY: 'clone'
   script:
     - |
       set -x
@@ -202,7 +202,7 @@ pre:maybe_cherry_pick_commit:
   stage: .pre
   image: nentangso/alpine-git-curl-jq
   variables:
-    GIT_STRATEGY: "clone"
+    GIT_STRATEGY: 'clone'
   script:
     - set -x
     - set +e
diff --git a/tests/functional_tests/shell_test_utils/run_ci_test.sh b/tests/functional_tests/shell_test_utils/run_ci_test.sh
index 5a6ea64f42d..968d7dafeec 100644
--- a/tests/functional_tests/shell_test_utils/run_ci_test.sh
+++ b/tests/functional_tests/shell_test_utils/run_ci_test.sh
@@ -51,6 +51,8 @@ set -exo pipefail
 # Extract settings from params file
 TEST_TYPE=$(cat $TRAINING_PARAMS_PATH |
     /usr/local/bin/yq '.TEST_TYPE')
+ENABLE_LIGHTWEIGHT_MODE=$(cat $TRAINING_PARAMS_PATH |
+    /usr/local/bin/yq '.ENV_VARS.ENABLE_LIGHTWEIGHT_MODE // "false"')
 MODE=$(cat $TRAINING_PARAMS_PATH |
     /usr/local/bin/yq '.MODE // "pretraining"')
 
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_gdn/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_gdn/golden_values_dev_dgx_h100.json
deleted file mode 100644
index e836165b1af..00000000000
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_gdn/golden_values_dev_dgx_h100.json
+++ /dev/null
@@ -1,287 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 10.94549,
-            "2": 10.94266,
-            "3": 10.95029,
-            "4": 10.92935,
-            "5": 10.94226,
-            "6": 10.94118,
-            "7": 10.92599,
-            "8": 10.93843,
-            "9": 10.92667,
-            "10": 10.95239,
-            "11": 10.9316,
-            "12": 10.93754,
-            "13": 10.92806,
-            "14": 10.93106,
-            "15": 10.92268,
-            "16": 10.93309,
-            "17": 10.92783,
-            "18": 10.93162,
-            "19": 10.92174,
-            "20": 10.9222,
-            "21": 10.91749,
-            "22": 10.89939,
-            "23": 10.91334,
-            "24": 10.90584,
-            "25": 10.89761,
-            "26": 10.90421,
-            "27": 10.90329,
-            "28": 10.87234,
-            "29": 10.89828,
-            "30": 10.85482,
-            "31": 10.74433,
-            "32": 10.85937,
-            "33": 10.87082,
-            "34": 10.78866,
-            "35": 10.80404,
-            "36": 10.78603,
-            "37": 10.83611,
-            "38": 10.77081,
-            "39": 10.85659,
-            "40": 10.72227,
-            "41": 10.72701,
-            "42": 10.78348,
-            "43": 10.58371,
-            "44": 10.69609,
-            "45": 10.60756,
-            "46": 10.55935,
-            "47": 10.72505,
-            "48": 10.58391,
-            "49": 10.40808,
-            "50": 10.63209
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 22806516.0,
-            "2": 23004070.0,
-            "3": 22675828.0,
-            "4": 23298692.0,
-            "5": 22793918.0,
-            "6": 23100284.0,
-            "7": 22849388.0,
-            "8": 23004824.0,
-            "9": 22919836.0,
-            "10": 22997154.0,
-            "11": 22579508.0,
-            "12": 22537754.0,
-            "13": 22996688.0,
-            "14": 22467402.0,
-            "15": 22900118.0,
-            "16": 22909232.0,
-            "17": 22897812.0,
-            "18": 22661628.0,
-            "19": 22697360.0,
-            "20": 22773234.0,
-            "21": 22818520.0,
-            "22": 22878406.0,
-            "23": 22618508.0,
-            "24": 22849596.0,
-            "25": 22897480.0,
-            "26": 22626820.0,
-            "27": 22547392.0,
-            "28": 22531804.0,
-            "29": 22606952.0,
-            "30": 22710502.0,
-            "31": 23033192.0,
-            "32": 22663120.0,
-            "33": 22637648.0,
-            "34": 22914116.0,
-            "35": 22866052.0,
-            "36": 22667304.0,
-            "37": 22575802.0,
-            "38": 22974080.0,
-            "39": 22879488.0,
-            "40": 22736406.0,
-            "41": 22737628.0,
-            "42": 22745946.0,
-            "43": 23054018.0,
-            "44": 22825168.0,
-            "45": 22753408.0,
-            "46": 22962704.0,
-            "47": 22712868.0,
-            "48": 23007200.0,
-            "49": 22805320.0,
-            "50": 22983010.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 739501056.0,
-            "2": 739501056.0,
-            "3": 739501056.0,
-            "4": 739501056.0,
-            "5": 739501056.0,
-            "6": 739501056.0,
-            "7": 739501056.0,
-            "8": 739501056.0,
-            "9": 739501056.0,
-            "10": 739501056.0,
-            "11": 739501056.0,
-            "12": 739501056.0,
-            "13": 739501056.0,
-            "14": 739501056.0,
-            "15": 739501056.0,
-            "16": 739501056.0,
-            "17": 739501056.0,
-            "18": 739501056.0,
-            "19": 739501056.0,
-            "20": 739501056.0,
-            "21": 739501056.0,
-            "22": 739501056.0,
-            "23": 739501056.0,
-            "24": 739501056.0,
-            "25": 739501056.0,
-            "26": 739501056.0,
-            "27": 739501056.0,
-            "28": 739501056.0,
-            "29": 739501056.0,
-            "30": 739501056.0,
-            "31": 739501056.0,
-            "32": 739501056.0,
-            "33": 739501056.0,
-            "34": 739501056.0,
-            "35": 739501056.0,
-            "36": 739501056.0,
-            "37": 739501056.0,
-            "38": 739501056.0,
-            "39": 739501056.0,
-            "40": 739501056.0,
-            "41": 739501056.0,
-            "42": 739501056.0,
-            "43": 739501056.0,
-            "44": 739501056.0,
-            "45": 739501056.0,
-            "46": 739501056.0,
-            "47": 739501056.0,
-            "48": 739501056.0,
-            "49": 739501056.0,
-            "50": 739501056.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 2185745408.0,
-            "2": 2467083264.0,
-            "3": 2467083264.0,
-            "4": 2467083264.0,
-            "5": 2467083264.0,
-            "6": 2467083264.0,
-            "7": 2467083264.0,
-            "8": 2467083264.0,
-            "9": 2467083264.0,
-            "10": 2467083264.0,
-            "11": 2467083264.0,
-            "12": 2467083264.0,
-            "13": 2467083264.0,
-            "14": 2467083264.0,
-            "15": 2467083264.0,
-            "16": 2467083264.0,
-            "17": 2467083264.0,
-            "18": 2467083264.0,
-            "19": 2467083264.0,
-            "20": 2467083264.0,
-            "21": 2467083264.0,
-            "22": 2467083264.0,
-            "23": 2467083264.0,
-            "24": 2467083264.0,
-            "25": 2467083264.0,
-            "26": 2467083264.0,
-            "27": 2467083264.0,
-            "28": 2467083264.0,
-            "29": 2467083264.0,
-            "30": 2467083264.0,
-            "31": 2467083264.0,
-            "32": 2467083264.0,
-            "33": 2467083264.0,
-            "34": 2467083264.0,
-            "35": 2467083264.0,
-            "36": 2467083264.0,
-            "37": 2467083264.0,
-            "38": 2467083264.0,
-            "39": 2467083264.0,
-            "40": 2467083264.0,
-            "41": 2467083264.0,
-            "42": 2467083264.0,
-            "43": 2467083264.0,
-            "44": 2467083264.0,
-            "45": 2467083264.0,
-            "46": 2467083264.0,
-            "47": 2467083264.0,
-            "48": 2467083264.0,
-            "49": 2467083264.0,
-            "50": 2467083264.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 37.98779,
-            "2": 0.44183,
-            "3": 0.41794,
-            "4": 0.41574,
-            "5": 0.41502,
-            "6": 0.41403,
-            "7": 0.41636,
-            "8": 0.41731,
-            "9": 0.41907,
-            "10": 0.41341,
-            "11": 0.41278,
-            "12": 0.41269,
-            "13": 0.41248,
-            "14": 0.4133,
-            "15": 0.4156,
-            "16": 0.41652,
-            "17": 0.41625,
-            "18": 0.41902,
-            "19": 0.41584,
-            "20": 0.41729,
-            "21": 0.42212,
-            "22": 0.41334,
-            "23": 0.41588,
-            "24": 0.41641,
-            "25": 0.41859,
-            "26": 0.41721,
-            "27": 0.40783,
-            "28": 0.40735,
-            "29": 0.4046,
-            "30": 0.40445,
-            "31": 0.41196,
-            "32": 0.40703,
-            "33": 0.40362,
-            "34": 0.4043,
-            "35": 0.40787,
-            "36": 0.4094,
-            "37": 0.40514,
-            "38": 0.40653,
-            "39": 0.40616,
-            "40": 0.40471,
-            "41": 0.40633,
-            "42": 0.40318,
-            "43": 0.40362,
-            "44": 0.40095,
-            "45": 0.40173,
-            "46": 0.4018,
-            "47": 0.40121,
-            "48": 0.3989,
-            "49": 0.39861,
-            "50": 0.39894
-        }
-    }
-}
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_gdn/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_gdn/model_config.yaml
index 8c5838748d1..5f63de867d9 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_gdn/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_gdn/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Ring
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  ENABLE_LIGHTWEIGHT_MODE: true
 MODEL_ARGS:
   # Add network size args
   --untie-embeddings-and-output-weights: true
@@ -18,13 +19,22 @@ MODEL_ARGS:
   --apply-layernorm-1p: true
   --attention-output-gate: true
   --no-weight-decay-cond-type: apply_wd_to_qk_layernorm
-  --linear-attention-type: gated_delta_net
+  --experimental-attention-variant: gated_delta_net
   --linear-attention-freq: 3
   --linear-conv-kernel-dim: 4
   --linear-key-head-dim: 64
   --linear-value-head-dim: 64
   --linear-num-key-heads: 4
   --linear-num-value-heads: 8
+  # Add MoE args
+  --num-experts: 32
+  --moe-ffn-hidden-size: 64
+  --moe-shared-expert-intermediate-size: 64
+  --moe-shared-expert-gate: true
+  --moe-router-load-balancing-type: aux_loss
+  --moe-router-topk: 8
+  --disable-bias-linear: true
+  --moe-router-dtype: fp32
   # Add logging args
   --log-params-norm: true
   --log-num-zeros-in-grad: true
diff --git a/tests/test_utils/recipes/gpt.yaml b/tests/test_utils/recipes/gpt.yaml
index eae09a6e16a..f403ac20e3f 100644
--- a/tests/test_utils/recipes/gpt.yaml
+++ b/tests/test_utils/recipes/gpt.yaml
@@ -345,7 +345,7 @@ products:
   - test_case: [gpt3_mcore_te_tp2_pp1_gdn]
     products:
       - environment: [dev]
-        scope: [mr-broken, mr-github-broken]
+        scope: [mr, mr-github, mr-github-slim]
         platforms: [dgx_h100]
   - test_case: [gpt3_mcore_te_tp2_pp2_mla]
     products:

From 2ebff670288b28dd42dbd048e5e98ddbd19e89d5 Mon Sep 17 00:00:00 2001
From: Robin Zhang <robinz@nvidia.com>
Date: Wed, 17 Dec 2025 19:51:32 +0800
Subject: [PATCH 196/334] [Dev] Fix CUDA RNG Tracker (#2640)

Signed-off-by: Robin Zhang <robinz@nvidia.com>
---
 megatron/core/tensor_parallel/__init__.py     |   4 +
 megatron/core/tensor_parallel/random.py       |  78 +++++++++-
 megatron/core/transformer/cuda_graphs.py      |   7 +-
 megatron/core/transformer/moe/moe_utils.py    |  21 +--
 megatron/training/arguments.py                |   5 +-
 megatron/training/checkpointing.py            |  15 +-
 .../unit_tests/tensor_parallel/test_random.py | 145 ++++++++++++++++++
 7 files changed, 249 insertions(+), 26 deletions(-)

diff --git a/megatron/core/tensor_parallel/__init__.py b/megatron/core/tensor_parallel/__init__.py
index afa53bdc6e1..e629e5982b1 100644
--- a/megatron/core/tensor_parallel/__init__.py
+++ b/megatron/core/tensor_parallel/__init__.py
@@ -28,9 +28,11 @@
 from .random import (
     CheckpointWithoutOutput,
     checkpoint,
+    convert_cuda_rng_state,
     get_cuda_rng_tracker,
     get_data_parallel_rng_tracker_name,
     get_expert_parallel_rng_tracker_name,
+    is_graph_safe_cuda_rng_tracker,
     model_parallel_cuda_manual_seed,
 )
 from .utils import (
@@ -63,9 +65,11 @@
     "scatter_to_sequence_parallel_region",
     # random.py
     "checkpoint",
+    "convert_cuda_rng_state",
     "get_cuda_rng_tracker",
     "model_parallel_cuda_manual_seed",
     "get_expert_parallel_rng_tracker_name",
+    "is_graph_safe_cuda_rng_tracker",
     "CheckpointWithoutOutput",
     # utils.py
     "split_tensor_along_last_dim",
diff --git a/megatron/core/tensor_parallel/random.py b/megatron/core/tensor_parallel/random.py
index 396e5c54a2d..617d2803c12 100644
--- a/megatron/core/tensor_parallel/random.py
+++ b/megatron/core/tensor_parallel/random.py
@@ -111,6 +111,41 @@ def cb():
     _lazy_call(cb)
 
 
+def convert_cuda_rng_state(
+    state: Union[torch.Tensor, torch.Generator], to_graphable: bool = False
+) -> Union[torch.Tensor, torch.Generator]:
+    """
+    Convert the cuda rng state tensor to the graphable version,
+    or from the graphable version to the non-graphable tensor version.
+    """
+    if to_graphable:
+        if isinstance(state, torch.Tensor):
+            # Convert to the graphable version.
+            # Store current rng state.
+            orig_cuda_rng_state = _get_cuda_rng_state(graph_safe=False)
+            # Set rng state to the desired one
+            _set_cuda_rng_state(state, graph_safe=False)
+            # Get the graphable state
+            graphable_state = _get_cuda_rng_state(clone=True, graph_safe=True)
+            # And set the state to the original state we started with.
+            _set_cuda_rng_state(orig_cuda_rng_state, graph_safe=False)
+            return graphable_state
+        elif isinstance(state, torch.Generator):
+            # already graphable, just return it.
+            return state
+        else:
+            raise ValueError(f"Invalid state type: {type(state)}")
+    else:
+        if isinstance(state, torch.Tensor):
+            # already non-graphable, just return it.
+            return state
+        elif isinstance(state, torch.Generator):
+            # Convert to the non-graphable tensor version.
+            return state.get_state()
+        else:
+            raise ValueError(f"Invalid state type: {type(state)}")
+
+
 def get_expert_parallel_rng_tracker_name():
     """Get the expert parallel rng tracker name"""
     global _EXPERT_PARALLEL_RNG_TRACKER_NAME
@@ -161,6 +196,10 @@ def reset(self):
         # Seeds are just for book keeping and ensure no seed is set twice.
         self.seeds_ = set()
 
+        # Name of the rng state currently being used in the generator.
+        # The default one is "default-rng" and won't be pushed to the self.states_ dictionary.
+        self._current_state_name = "default-rng"
+
     def get_states(self):
         """Get rng states. Copy the dictionary so we have direct
         pointers to the states, not just a pointer to the dictionary."""
@@ -207,10 +246,14 @@ def fork(self, name=_MODEL_PARALLEL_RNG_TRACKER_NAME):
         # Check if we have added the state
         if name not in self.states_:
             raise Exception('cuda rng state {} is not added'.format(name))
-        # Store current rng state.
+        # Store current rng state and name. Store in self.states_ if it's not the default state.
         orig_cuda_rng_state = _get_cuda_rng_state(graph_safe=self.use_cudagraphable_rng)
-        # Set rng state to the desired one
+        orig_state_name = self._current_state_name
+        if orig_state_name != "default-rng":
+            self.states_[orig_state_name] = orig_cuda_rng_state
+        # Set rng state and name to the desired one.
         _set_cuda_rng_state(self.states_[name], graph_safe=self.use_cudagraphable_rng)
+        self._current_state_name = name
         # Record cpu RNG state
         cpu_rng_state = torch.get_rng_state()
         # Do the stuff we wanted to do.
@@ -220,10 +263,19 @@ def fork(self, name=_MODEL_PARALLEL_RNG_TRACKER_NAME):
             # Throw a warning if cpu RNG state changed
             if not torch.all(cpu_rng_state == torch.get_rng_state()).item():
                 logging.getLogger(__name__).warning('CPU RNG state changed within GPU RNG context')
+            # Check if the current state name is the same as the desired state name.
+            if self._current_state_name != name:
+                raise Exception(
+                    f'current state name {self._current_state_name} is not the same as the desired '
+                    f'state name {name}.'
+                )
             # Update the current rng state for later use.
             self.states_[name] = _get_cuda_rng_state(graph_safe=self.use_cudagraphable_rng)
-            # And set the state to the original state we started with.
+            # And set the state and name to the original state we started with.
+            if orig_state_name != "default-rng":
+                orig_cuda_rng_state = self.states_[orig_state_name]
             _set_cuda_rng_state(orig_cuda_rng_state, graph_safe=self.use_cudagraphable_rng)
+            self._current_state_name = orig_state_name
 
 
 # RNG tracker object.
@@ -377,10 +429,24 @@ def model_parallel_cuda_manual_seed(
     _CUDA_RNG_STATE_TRACKER.add(_EXPERT_PARALLEL_RNG_TRACKER_NAME, expert_parallel_seed)
 
 
+def is_graph_safe_cuda_rng_tracker(cuda_rng_tracker):
+    """Check if the cuda rng tracker is graph safe version."""
+    if HAVE_TE and is_te_min_version("1.5.0"):
+        from megatron.core.extensions.transformer_engine import TECudaRNGStatesTracker
+
+        if isinstance(cuda_rng_tracker, TECudaRNGStatesTracker):
+            return True
+    if getattr(cuda_rng_tracker, "use_cudagraphable_rng", False):
+        return True
+    return False
+
+
 def _get_all_rng_states():
     """Get all the rng states."""
     cpu_rng_state = torch.get_rng_state()
-    cuda_rng_state = _get_cuda_rng_state()
+    cuda_rng_state = _get_cuda_rng_state(
+        graph_safe=is_graph_safe_cuda_rng_tracker(get_cuda_rng_tracker())
+    )
     cuda_rng_state_tracker = get_cuda_rng_tracker().get_states()
     return cpu_rng_state, cuda_rng_state, cuda_rng_state_tracker
 
@@ -388,7 +454,9 @@ def _get_all_rng_states():
 def _set_all_rng_states(cpu_rng_state, cuda_rng_state, cuda_rng_state_tracker):
     """Set all the rng states."""
     torch.set_rng_state(cpu_rng_state)
-    _set_cuda_rng_state(cuda_rng_state)
+    _set_cuda_rng_state(
+        cuda_rng_state, graph_safe=is_graph_safe_cuda_rng_tracker(get_cuda_rng_tracker())
+    )
     get_cuda_rng_tracker().set_states(cuda_rng_state_tracker)
 
 
diff --git a/megatron/core/transformer/cuda_graphs.py b/megatron/core/transformer/cuda_graphs.py
index 6f75d67549e..27e6c65c738 100644
--- a/megatron/core/transformer/cuda_graphs.py
+++ b/megatron/core/transformer/cuda_graphs.py
@@ -1907,7 +1907,12 @@ def create_cudagraphs(self):
 
         # Prepare CUDA Graph capturing input data and call `make_graphed_callables`.
         sample_args, kwargs = self._get_cuda_graph_input_data()
-        graphs = make_graphed_callables(tuple(self.flattened_callables), sample_args, **kwargs)
+        if self.config.sequence_parallel:
+            rng_context = get_cuda_rng_tracker().fork()
+        else:
+            rng_context = nullcontext()
+        with rng_context:
+            graphs = make_graphed_callables(tuple(self.flattened_callables), sample_args, **kwargs)
 
         # Push the captured graphs to the corresponding TransformerBlock.
         num_layers_accumulated = 0
diff --git a/megatron/core/transformer/moe/moe_utils.py b/megatron/core/transformer/moe/moe_utils.py
index 8bab8d70065..28cff06f5ec 100644
--- a/megatron/core/transformer/moe/moe_utils.py
+++ b/megatron/core/transformer/moe/moe_utils.py
@@ -10,9 +10,11 @@
 from megatron.core.fp4_utils import get_fp4_align_size
 from megatron.core.fp8_utils import get_fp8_align_size
 from megatron.core.process_groups_config import ProcessGroupCollection
+from megatron.core.tensor_parallel import get_cuda_rng_tracker, get_expert_parallel_rng_tracker_name
 from megatron.core.transformer.cuda_graphs import is_graph_capturing
 from megatron.core.transformer.enums import CudaGraphScope
 from megatron.core.transformer.transformer_config import TransformerConfig
+from megatron.core.utils import internal_api
 
 try:
     import transformer_engine as te  # pylint: disable=unused-import
@@ -913,6 +915,7 @@ def get_moe_layer_wise_logging_tracker():
     return _MOE_LAYER_WISE_LOGGING_TRACKER
 
 
+@internal_api
 class RandomSTE(torch.autograd.Function):
     """
     Straight-Through Estimator(STE) function that returns random values
@@ -921,26 +924,14 @@ class RandomSTE(torch.autograd.Function):
     This is used to generate random logits of router for load-balanced benchmark.
     """
 
-    generator = None
-    random_logits = None
-
     @staticmethod
     def forward(ctx, logits):
         """
         Forward pass returns random logits with rank-specific seed.
         """
-        if is_graph_capturing() and RandomSTE.random_logits is not None:
-            return RandomSTE.random_logits
-
-        if RandomSTE.generator is None:
-            global_rank = torch.distributed.get_rank()
-            base_seed = 42
-            seed = base_seed + global_rank
-            RandomSTE.generator = torch.Generator(device=logits.device)
-            RandomSTE.generator.manual_seed(seed)
-
-        RandomSTE.random_logits = logits.clone().normal_(generator=RandomSTE.generator)
-        return RandomSTE.random_logits
+        with get_cuda_rng_tracker().fork(get_expert_parallel_rng_tracker_name()):
+            random_logits = logits.clone().normal_()
+        return random_logits
 
     @staticmethod
     def backward(ctx, grad_output):
diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py
index 70d1e4b1306..c157d062c53 100644
--- a/megatron/training/arguments.py
+++ b/megatron/training/arguments.py
@@ -1277,7 +1277,10 @@ def validate_args(args, defaults={}):
 
     # CUDA Graphs
     if args.cuda_graph_impl != "none":
-        if args.transformer_impl == 'transformer_engine' and not args.te_rng_tracker:
+        if (
+            "transformer_engine" in (args.transformer_impl, args.cuda_graph_impl)
+            and not args.te_rng_tracker
+        ):
             args.te_rng_tracker = True
             warn_rank_0("te_rng_tracker is not enabled, enabling it for CUDA graphs.", args.rank)
         assert (
diff --git a/megatron/training/checkpointing.py b/megatron/training/checkpointing.py
index 48a2025fa63..19206312b67 100644
--- a/megatron/training/checkpointing.py
+++ b/megatron/training/checkpointing.py
@@ -1766,6 +1766,8 @@ def load_model_state_dict(module, state_dict, strict: bool):
     # rng states.
     if not release and not args.finetune and not args.no_load_rng and not ignore_rng_state:
         try:
+            cuda_rng_tracker = tensor_parallel.get_cuda_rng_tracker()
+            graph_safe_rng = tensor_parallel.is_graph_safe_cuda_rng_tracker(cuda_rng_tracker)
             if 'rng_state' in state_dict:
                 if args.ckpt_format == "fsdp_dtensor":
                     # FSDP DTensor checkpoints store rng_state in a different format.
@@ -1791,8 +1793,10 @@ def load_model_state_dict(module, state_dict, strict: bool):
                 # Check for empty states array
                 if not rng_state['rng_tracker_states']:
                     raise KeyError
-                tensor_parallel.get_cuda_rng_tracker().set_states(
-                    rng_state['rng_tracker_states'])
+                rng_tracker_states = {
+                    k: tensor_parallel.convert_cuda_rng_state(v, to_graphable=graph_safe_rng)
+                    for k, v in rng_state['rng_tracker_states'].items()
+                }
             else:  # backward compatability
                 random.setstate(state_dict['random_rng_state'])
                 np.random.set_state(state_dict['np_rng_state'])
@@ -1801,8 +1805,11 @@ def load_model_state_dict(module, state_dict, strict: bool):
                 # Check for empty states array
                 if not state_dict['rng_tracker_states']:
                     raise KeyError
-                tensor_parallel.get_cuda_rng_tracker().set_states(
-                    state_dict['rng_tracker_states'])
+                rng_tracker_states = {
+                    k: tensor_parallel.convert_cuda_rng_state(v, to_graphable=graph_safe_rng)
+                    for k, v in state_dict['rng_tracker_states'].items()
+                }
+            cuda_rng_tracker.set_states(rng_tracker_states)
         except KeyError:
             print_rank_0('Unable to load rng state from checkpoint {}. '
                          'Specify --no-load-rng or --finetune to prevent '
diff --git a/tests/unit_tests/tensor_parallel/test_random.py b/tests/unit_tests/tensor_parallel/test_random.py
index 47b607b8795..a15ad83cb90 100644
--- a/tests/unit_tests/tensor_parallel/test_random.py
+++ b/tests/unit_tests/tensor_parallel/test_random.py
@@ -1,3 +1,5 @@
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
 import pytest
 import torch
 
@@ -5,6 +7,7 @@
     CheckpointWithoutOutput,
     CudaRNGStatesTracker,
     checkpoint,
+    convert_cuda_rng_state,
     get_cuda_rng_tracker,
     model_parallel_cuda_manual_seed,
 )
@@ -33,6 +36,148 @@ def test_cuda_rng_states_tracker():
     assert torch.equal(rng_tracker.get_states()['state2'], rng_state)
 
 
+@pytest.mark.parametrize("use_cudagraphable_rng", [True, False])
+def test_double_fork_cuda_rng_states_tracker(use_cudagraphable_rng):
+    rng_tracker = CudaRNGStatesTracker(use_cudagraphable_rng=use_cudagraphable_rng)
+    rng_tracker.add("state1", 1234)
+    rng_tracker.add("state2", 5678)
+    randn_double_fork_1 = []
+    randn_double_fork_2 = []
+    with rng_tracker.fork("state1"):
+        randn_double_fork_1.append(torch.randn(10, device="cuda"))
+        with rng_tracker.fork("state2"):
+            randn_double_fork_2.append(torch.randn(10, device="cuda"))
+            with rng_tracker.fork("state1"):
+                randn_double_fork_1.append(torch.randn(10, device="cuda"))
+            randn_double_fork_2.append(torch.randn(10, device="cuda"))
+        randn_double_fork_1.append(torch.randn(10, device="cuda"))
+    if use_cudagraphable_rng:
+        double_fork_state1 = rng_tracker.get_states()["state1"].get_state()
+        double_fork_state2 = rng_tracker.get_states()["state2"].get_state()
+    else:
+        double_fork_state1 = rng_tracker.get_states()["state1"]
+        double_fork_state2 = rng_tracker.get_states()["state2"]
+
+    rng_tracker.reset()
+    rng_tracker.add("state1", 1234)
+    rng_tracker.add("state2", 5678)
+    randn_single_fork_1 = []
+    randn_single_fork_2 = []
+    with rng_tracker.fork("state1"):
+        randn_single_fork_1.append(torch.randn(10, device="cuda"))
+        randn_single_fork_1.append(torch.randn(10, device="cuda"))
+        randn_single_fork_1.append(torch.randn(10, device="cuda"))
+    with rng_tracker.fork("state2"):
+        randn_single_fork_2.append(torch.randn(10, device="cuda"))
+        randn_single_fork_2.append(torch.randn(10, device="cuda"))
+    if use_cudagraphable_rng:
+        single_fork_state1 = rng_tracker.get_states()["state1"].get_state()
+        single_fork_state2 = rng_tracker.get_states()["state2"].get_state()
+    else:
+        single_fork_state1 = rng_tracker.get_states()["state1"]
+        single_fork_state2 = rng_tracker.get_states()["state2"]
+
+    assert torch.equal(randn_double_fork_1[0], randn_single_fork_1[0])
+    assert torch.equal(randn_double_fork_1[1], randn_single_fork_1[1])
+    assert torch.equal(randn_double_fork_1[2], randn_single_fork_1[2])
+    assert torch.equal(randn_double_fork_2[0], randn_single_fork_2[0])
+    assert torch.equal(randn_double_fork_2[1], randn_single_fork_2[1])
+    assert torch.equal(double_fork_state1, single_fork_state1)
+    assert torch.equal(double_fork_state2, single_fork_state2)
+
+
+def test_convert_cuda_rng_state():
+    ## Get the default rng state
+    torch.cuda.manual_seed(999)
+    randn = torch.randn(10, device="cuda")
+    rng_state = torch.cuda.get_rng_state()
+
+    try:
+        from megatron.core.extensions.transformer_engine import TECudaRNGStatesTracker
+    except ImportError:
+        TECudaRNGStatesTracker = None
+
+    ## from non-graphable RNG to graphable RNG
+    # get state from non-graphable RNG
+    tracker = CudaRNGStatesTracker(use_cudagraphable_rng=False)
+    tracker.add("state1", 123)
+    for i in range(3):
+        with tracker.fork("state1"):
+            randn = torch.randn(10, device="cuda")
+    state = convert_cuda_rng_state(tracker.states_["state1"], to_graphable=True)
+    rand_tensors = []
+    for i in range(3):
+        with tracker.fork("state1"):
+            randn = torch.randn(10, device="cuda")
+            rand_tensors.append(randn)
+
+    # set state to local graph RNG
+    cudagraphable_tracker = CudaRNGStatesTracker(use_cudagraphable_rng=True)
+    cudagraphable_tracker.set_states({"state1": state.clone_state()})
+    for i in range(3):
+        with cudagraphable_tracker.fork("state1"):
+            randn = torch.randn(10, device="cuda")
+            assert torch.equal(randn, rand_tensors[i])
+
+    # set state to TE RNG
+    if TECudaRNGStatesTracker is not None:
+        te_tracker = TECudaRNGStatesTracker()
+        te_tracker.set_states({"state1": state})
+        for i in range(3):
+            with te_tracker.fork("state1"):
+                randn = torch.randn(10, device="cuda")
+                assert torch.equal(randn, rand_tensors[i])
+
+    ## from graphable RNG to non-graphable RNG
+    # get state from graphable RNG
+    cudagraphable_tracker = CudaRNGStatesTracker(use_cudagraphable_rng=True)
+    cudagraphable_tracker.add("state2", 123)
+    for i in range(3):
+        with cudagraphable_tracker.fork("state2"):
+            randn = torch.randn(10, device="cuda")
+    state = convert_cuda_rng_state(cudagraphable_tracker.states_["state2"], to_graphable=False)
+    rand_tensors = []
+    for i in range(3):
+        with cudagraphable_tracker.fork("state2"):
+            randn = torch.randn(10, device="cuda")
+            rand_tensors.append(randn)
+
+    # set state to non-graphable RNG
+    tracker = CudaRNGStatesTracker(use_cudagraphable_rng=False)
+    tracker.set_states({"state2": state})
+    for i in range(3):
+        with tracker.fork("state2"):
+            randn = torch.randn(10, device="cuda")
+            assert torch.equal(randn, rand_tensors[i])
+
+    ## from TE RNG to non-graphable RNG
+    if TECudaRNGStatesTracker is not None:
+        # get state from TE RNG
+        cudagraphable_tracker = TECudaRNGStatesTracker()
+        cudagraphable_tracker.add("state3", 123)
+        for i in range(3):
+            with cudagraphable_tracker.fork("state3"):
+                randn = torch.randn(10, device="cuda")
+        state = convert_cuda_rng_state(cudagraphable_tracker.states_["state3"], to_graphable=False)
+        rand_tensors = []
+        for i in range(3):
+            with cudagraphable_tracker.fork("state3"):
+                randn = torch.randn(10, device="cuda")
+                rand_tensors.append(randn)
+
+        # set state to non-graphable RNG
+        tracker = CudaRNGStatesTracker(use_cudagraphable_rng=False)
+        tracker.set_states({"state3": state})
+        for i in range(3):
+            with tracker.fork("state3"):
+                randn = torch.randn(10, device="cuda")
+                assert torch.equal(randn, rand_tensors[i])
+
+    ## After all tests, check if the default rng state is still the same.
+    rng_state_final = torch.cuda.get_rng_state()
+    assert torch.equal(rng_state, rng_state_final)
+
+
 def test_model_parallel_cuda_manual_seed():
     Utils.initialize_model_parallel(4, 2)
     model_parallel_cuda_manual_seed(0, force_reset_rng=True)

From 368e580b7ad04fa5c6bfdaaf4ac05de9dbc96c07 Mon Sep 17 00:00:00 2001
From: Pablo Garay <pagaray@nvidia.com>
Date: Wed, 17 Dec 2025 10:25:39 -0800
Subject: [PATCH 197/334] [Dev] Mark API backwards compatibility checks as
 OPTIONAL (non-blocking) (#2699)

---
 .../check_api_backwards_compatibility_workflow.yml        | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/check_api_backwards_compatibility_workflow.yml b/.github/workflows/check_api_backwards_compatibility_workflow.yml
index 42db9486cac..4ba0ed2780c 100644
--- a/.github/workflows/check_api_backwards_compatibility_workflow.yml
+++ b/.github/workflows/check_api_backwards_compatibility_workflow.yml
@@ -81,7 +81,7 @@ jobs:
   check-compatibility:
     needs: [pre-flight]
     if: needs.pre-flight.outputs.should_skip != 'true'
-    name: Check API Backward Compatibility
+    name: "OPTIONAL: Check API Backward Compatibility"
     runs-on: ubuntu-latest
 
     # ============================================================================
@@ -245,7 +245,7 @@ jobs:
   api-backward-compatibility-summary:
     needs: [pre-flight, check-compatibility]
     runs-on: ubuntu-latest
-    name: API Backward Compatibility Check Summary
+    name: "OPTIONAL: API Backward Compatibility Check Summary"
     if: always() && !cancelled()
     steps:
       - name: Checkout
@@ -257,7 +257,7 @@ jobs:
           GH_TOKEN: ${{ github.token }}
           SKIPPING_IS_ALLOWED: ${{ needs.pre-flight.outputs.should_skip == 'true' }}
         run: |
-          FAILED_JOBS=$(gh run view $GITHUB_RUN_ID --json jobs --jq '[.jobs[] | select(.status == "completed" and .conclusion != "success" and .name != "API Backward Compatibility Check Summary")] | length') || echo 0
+          FAILED_JOBS=$(gh run view $GITHUB_RUN_ID --json jobs --jq '[.jobs[] | select(.status == "completed" and .conclusion != "success" and .name != "OPTIONAL: API Backward Compatibility Check Summary")] | length') || echo 0
 
           if [ "${FAILED_JOBS:-0}" -eq 0 ] || [ "$SKIPPING_IS_ALLOWED" == "true" ]; then
               if [ "$SKIPPING_IS_ALLOWED" == "true" ]; then
@@ -268,6 +268,6 @@ jobs:
               exit 0
           else
               echo "❌ Found $FAILED_JOBS failed job(s)"
-              gh run view $GITHUB_RUN_ID --json jobs --jq '.jobs[] | select(.status == "completed" and .conclusion != "success" and .name != "API Backward Compatibility Check Summary") | .name'
+              gh run view $GITHUB_RUN_ID --json jobs --jq '.jobs[] | select(.status == "completed" and .conclusion != "success" and .name != "OPTIONAL: API Backward Compatibility Check Summary") | .name'
               exit 1
           fi

From 3714d81d418c9f1bca4594fc35f9e8289f652862 Mon Sep 17 00:00:00 2001
From: Kunlun Li <94586211+kunlunl@users.noreply.github.com>
Date: Thu, 18 Dec 2025 09:05:09 +0800
Subject: [PATCH 198/334] [Dev] FP8 params support for megatron-fsdp
 (MXFP8/Blockwise) (#2086)

Signed-off-by: kunlunl <kunlunl@nvidia.com>
Co-authored-by: jianbinc <shjwudp@gmail.com>
---
 .../distributed/fsdp/mcore_fsdp_adapter.py    |   4 +
 .../fsdp/src/megatron_fsdp/megatron_fsdp.py   | 157 +++---
 .../fsdp/src/megatron_fsdp/mixed_precision.py | 331 +++++++++++++
 .../megatron_fsdp/param_and_grad_buffer.py    | 450 +++++++++++++-----
 .../fsdp/src/megatron_fsdp/utils.py           | 252 +---------
 megatron/training/arguments.py                |   3 +
 6 files changed, 776 insertions(+), 421 deletions(-)
 create mode 100644 megatron/core/distributed/fsdp/src/megatron_fsdp/mixed_precision.py

diff --git a/megatron/core/distributed/fsdp/mcore_fsdp_adapter.py b/megatron/core/distributed/fsdp/mcore_fsdp_adapter.py
index 7432a7f9a36..d6384e70488 100644
--- a/megatron/core/distributed/fsdp/mcore_fsdp_adapter.py
+++ b/megatron/core/distributed/fsdp/mcore_fsdp_adapter.py
@@ -111,6 +111,9 @@ def __init__(
                 dist_index=self.megatron_fsdp_dist_index,
                 calculate_per_token_loss=config.calculate_per_token_loss,
                 init_model_with_meta_device=config.init_model_with_meta_device,
+                enable_fine_grained_param_gather_hook=(
+                    config.fp8_recipe == "mxfp8" and ddp_config.fp8_param_gather
+                ),
             ),
         )
         self.param_and_grad_buffer = self.module.param_and_grad_buffer
@@ -123,6 +126,7 @@ def __init__(
         self.broadcast_params = self.module.broadcast_params
         self.module.state_dict_for_save_checkpoint = self.module.state_dict
         self.state_dict_for_save_checkpoint = self.state_dict
+        self.module.config = config
 
         self.sync_rng_states_across_tp_group()
 
diff --git a/megatron/core/distributed/fsdp/src/megatron_fsdp/megatron_fsdp.py b/megatron/core/distributed/fsdp/src/megatron_fsdp/megatron_fsdp.py
index 8a63e0f5cf7..17f7f4d1c05 100644
--- a/megatron/core/distributed/fsdp/src/megatron_fsdp/megatron_fsdp.py
+++ b/megatron/core/distributed/fsdp/src/megatron_fsdp/megatron_fsdp.py
@@ -23,6 +23,20 @@
 import torch.nn as nn
 from torch.utils._pytree import tree_flatten, tree_map, tree_unflatten
 
+from .mixed_precision import (
+    fp8_create_transpose_cache,
+    fp8_discard_transpose_cache,
+    is_float8tensor,
+)
+from .param_and_grad_buffer import (
+    AllGatherPipeline,
+    BucketingPolicy,
+    GradReducePipeline,
+    ParamAndGradBuffer,
+    PrefetchOrder,
+    override_sharded_param_methods_with_safety_checks,
+    to_local_if_dtensor,
+)
 from .utils import FSDPDistributedIndex
 
 logger = logging.getLogger(__name__)
@@ -34,23 +48,12 @@
     from megatron.core.distributed.distributed_data_parallel_config import (
         DistributedDataParallelConfig,
     )
-    from megatron.core.fp8_utils import is_float8tensor
     from megatron.core.utils import is_submodule
 except ImportError:
     # Megatron-LM is not installed, use Megatron-FSDP as a standalone module.
     logger.info("Megatron Core is not installed, Megatron-FSDP will run without Megatron Core.")
     from .distributed_data_parallel_config import DistributedDataParallelConfig
-    from .utils import is_float8tensor, is_submodule
-
-from .param_and_grad_buffer import (
-    AllGatherPipeline,
-    BucketingPolicy,
-    GradReducePipeline,
-    ParamAndGradBuffer,
-    PrefetchOrder,
-    override_sharded_param_methods_with_safety_checks,
-    to_local_if_dtensor,
-)
+    from .utils import is_submodule
 
 
 class TrainingState(Enum):
@@ -168,6 +171,7 @@ def __init__(
         nccl_ub: bool = False,
         fsdp_double_buffer: bool = False,
         disable_symmetric_registration: bool = False,
+        enable_fine_grained_param_gather_hook: bool = False,
     ):
         super().__init__()
         # If device is not specified, use the current device.
@@ -217,6 +221,7 @@ def __init__(
 
         self.calculate_per_token_loss = calculate_per_token_loss
         self.init_model_with_meta_device = init_model_with_meta_device
+        self.enable_fine_grained_param_gather_hook = enable_fine_grained_param_gather_hook
 
         # Whether to constantly synchronize the model every training iteration,
         # which defaults to False to overlap communication with computation
@@ -400,6 +405,7 @@ def all_gather_and_wait_parameters_ready(
         prefetch=True,
         prefetch_order=PrefetchOrder.FORWARD_PASS_ORDER,
         wait_bucket_ready=True,
+        bwd=False,
     ):
         """
         All-gather parameters across the data parallel group and wait for
@@ -426,11 +432,14 @@ def all_gather_and_wait_parameters_ready(
                 and self.ddp_config.outer_dp_sharding_strategy != "no_shard"
                 and (self.microbatch_count == 0 or self.model_auto_sync)
             ),
+            bwd=bwd,
         )
         if wait_bucket_ready:
             for param in params:
                 bucket_id = self.param_and_grad_buffer.param_to_param_group[param]
-                ag_pipeline.wait_bucket_ready(bucket_id)
+                ag_pipeline.wait_bucket_ready(bucket_id, bwd)
+                if bwd and is_float8tensor(param):
+                    fp8_create_transpose_cache(param)
 
         for param in params:
             # This setting is needed to make FSDP store the weight object when used
@@ -489,19 +498,17 @@ def _register_fsdp_hooks(self, root_module):
         """
         fsdp_unit_modules = self.fsdp_unit_modules
 
-        def release_module_parameters(module, *unused):
+        def release_module_parameters(module, bwd, *unused):
             for param in module.parameters():
                 bucket_id = self.param_and_grad_buffer.param_to_param_group[param]
-                self.all_gather_pipeline.release_bucket(bucket_id)
-
+                self.all_gather_pipeline.release_bucket(bucket_id, bwd)
             if not self.ddp_config.keep_fp8_transpose_cache:
                 release_params_fp8_transpose_cache(module.parameters())
 
         def release_params_fp8_transpose_cache(params):
             for param in params:
                 if is_float8tensor(param):
-                    param._transpose_invalid = True
-                    param._transpose = None
+                    fp8_discard_transpose_cache(param)
 
         def _grad_acc(param):
             """
@@ -558,12 +565,15 @@ def _post_backward(module, *unused):
                 if self.ddp_config.data_parallel_sharding_strategy == "optim_grads_params":
                     # Deallocate the module parameters after the backward pass,
                     # because we have our data-parallel gradients computed.
-                    release_module_parameters(module)
+                    release_module_parameters(module, bwd=True)
                     module._training_state = TrainingState.IDLE
                 param_list = list(module.parameters())
             else:
                 param_list = list(module.parameters(recurse=False))
 
+            if self.enable_fine_grained_param_gather_hook:
+                param_list = list(module.parameters(recurse=False))
+
             # If the parameter is shared, we do not accumulate gradients
             # here, as the gradients will be accumulated in the
             # root post-backward hook.
@@ -615,6 +625,9 @@ def _pre_forward_param_unshard(
                 # to allocate as little memory as possible for this forward pass.
                 param_list = list(module.parameters(recurse=False))
 
+            if self.enable_fine_grained_param_gather_hook:
+                param_list = list(module.parameters(recurse=False))
+
             # All-gather the parameters before the forward pass.
             self.all_gather_and_wait_parameters_ready(
                 params=param_list,
@@ -714,7 +727,7 @@ def _root_post_backward(*unused):
             if self.model_auto_sync:
                 self.finish_grad_sync()
 
-        def _pre_backward(module: nn.Module, *unused):
+        def _pre_backward_param_unshard(module: nn.Module, *unused):
             """
             Sub-module pre-backward hook to all-gather the module parameters
             before the backward pass.
@@ -723,11 +736,19 @@ def _pre_backward(module: nn.Module, *unused):
             # and unsharding operations when performing activation recomputation
             # / gradient checkpointing.
             module._training_state = TrainingState.PRE_BACKWARD
+
             if isinstance(module, tuple(fsdp_unit_modules)):
-                # All-gather / unshard the module parameters before the backward pass.
-                self.all_gather_and_wait_parameters_ready(
-                    list(module.parameters()), prefetch_order=PrefetchOrder.BACKWARD_PASS_ORDER
-                )
+                param_list = list(module.parameters())
+            else:
+                param_list = list(module.parameters(recurse=False))
+
+            if self.enable_fine_grained_param_gather_hook:
+                param_list = list(module.parameters(recurse=False))
+
+            # All-gather / unshard the module parameters before the backward pass.
+            self.all_gather_and_wait_parameters_ready(
+                param_list, prefetch_order=PrefetchOrder.BACKWARD_PASS_ORDER, bwd=True
+            )
 
         self._root_pre_backward_hook_issued = False
 
@@ -754,7 +775,9 @@ def _root_pre_backward(module: nn.Module, *unused):
                 for bucket_id in range(ag_pipeline.num_buckets):
                     group = self.param_and_grad_buffer.parameter_groups[bucket_id]
                     if group.fsdp_unit_id is not None:
-                        ag_pipeline.bucket_can_be_released[bucket_id] = True
+                        ag_pipeline.bucket_can_be_released[
+                            ag_pipeline.get_bucket_key(bucket_id, bwd=False)
+                        ] = True
             # Track parameters that require gradient reduction and optimization.
             self._params_require_handle_grad = set()
             for param_group in self.param_and_grad_buffer.parameter_groups:
@@ -776,8 +799,12 @@ def _post_forward(module: nn.Module, input: Any, output: Any):
                 # during activation recomputation / gradient checkpointing.
                 return output
 
+            assert isinstance(
+                module, tuple(fsdp_unit_modules)
+            ), "_post_forward hook should only be registered on FSDP unit modules."
+
             # Release the module parameters after the forward pass to save memory.
-            release_module_parameters(module)
+            release_module_parameters(module, bwd=False)
             module._training_state = TrainingState.IDLE
 
             return output
@@ -818,21 +845,55 @@ def forward_hook(_module, inputs, output):
             # on the output tensor(s).
             return module.register_forward_hook(forward_hook)
 
+        def _register_pre_forward_param_unshard_hook(module):
+            """
+            Register the forward pre-hook to unshard parameters before the forward pass.
+            If we are not sharding anything, we do not have a model weight buffer and thus
+            have nothing to all-gather / un-shard.
+            """
+            if self.ddp_config.data_parallel_sharding_strategy != "no_shard":
+                self.forward_pre_hooks[f"{module._get_name()} parameter unshard"] = (
+                    module.register_forward_pre_hook(
+                        _pre_forward_param_unshard, prepend=True, with_kwargs=True
+                    )
+                )
+
+        def _register_pre_backward_param_unshard_hook(module):
+            """
+            Register the backward pre-hook to unshard FSDP unit module parameters
+            immediately before the backward pass via attaching a gradient-triggered
+            hook to the output tensor(s) of a module during a post-forward hook.
+            """
+            self.backward_pre_hooks[f"all-gather {module._get_name()} parameters"] = (
+                create_custom_backward_hook(module, _pre_backward_param_unshard)
+            )
+
+        def _register_grad_acc_and_reduce_hook(module):
+            """
+            Register the post-backward hook to deallocate model parameters and
+            reduce-scatter gradients immediately after the module backward pass
+            has completed to conserve memory for the subsequent backward pass.
+            """
+            self.forward_pre_hooks[f"module {name} register post-backward hook"] = (
+                module.register_forward_pre_hook(
+                    functools.partial(_register_post_backward_hook, _post_backward),
+                    with_kwargs=True,
+                )
+            )
+
         fsdp_modules = []
         for name, module in root_module.named_modules():
+            if self.enable_fine_grained_param_gather_hook:
+                _register_pre_forward_param_unshard_hook(module)
+                _register_pre_backward_param_unshard_hook(module)
+                _register_grad_acc_and_reduce_hook(module)
+
             # Skip if the module is already registered in fsdp_modules.
             if any(is_submodule(module, fsdp_module) for fsdp_module in fsdp_modules):
                 continue
 
-            # Register the forward pre-hook to unshard parameters before the forward pass.
-            # If we are not sharding anything, we do not have a model weight buffer and thus
-            # have nothing to all-gather / un-shard.
-            if self.ddp_config.data_parallel_sharding_strategy != "no_shard":
-                self.forward_pre_hooks[f"module {name} parameter unshard"] = (
-                    module.register_forward_pre_hook(
-                        _pre_forward_param_unshard, prepend=True, with_kwargs=True
-                    )
-                )
+            if not self.enable_fine_grained_param_gather_hook:
+                _register_pre_forward_param_unshard_hook(module)
 
             if isinstance(module, tuple(fsdp_unit_modules)):
                 fsdp_modules.append(module)
@@ -843,12 +904,8 @@ def forward_hook(_module, inputs, output):
                     module.register_forward_hook(_post_forward, prepend=False)
                 )
 
-                # Register the backward pre-hook to unshard FSDP unit module parameters
-                # immediately before the backward pass via attaching a gradient-triggered
-                # hook to the output tensor(s) of a module during a post-forward hook.
-                self.backward_pre_hooks[f"all-gather module {name} parameters"] = (
-                    create_custom_backward_hook(module, _pre_backward)
-                )
+                if not self.enable_fine_grained_param_gather_hook:
+                    _register_pre_backward_param_unshard_hook(module)
             elif (
                 not self.ddp_config.keep_fp8_transpose_cache
                 and self.ddp_config.data_parallel_sharding_strategy == "optim_grads_params"
@@ -861,15 +918,8 @@ def forward_hook(_module, inputs, output):
                     module.register_forward_hook(_release_module_fp8_transpose_cache, prepend=False)
                 )
 
-            # Register the post-backward hook to deallocate model parameters and
-            # reduce-scatter gradients immediately after the module backward pass
-            # has completed to conserve memory for the subsequent backward pass.
-            self.forward_pre_hooks[f"module {name} register post-backward hook"] = (
-                module.register_forward_pre_hook(
-                    functools.partial(_register_post_backward_hook, _post_backward),
-                    with_kwargs=True,
-                )
-            )
+            if not self.enable_fine_grained_param_gather_hook:
+                _register_grad_acc_and_reduce_hook(module)
 
         # Register root module pre- and post-backward hooks in cases where the
         # forward function of root module is not called, but rather the forward
@@ -986,7 +1036,7 @@ def start_param_sync(self, *unused, force_sync: bool = False, force_dispatch: bo
         else:
             self.synchronize_param_gather()
             for bucket_id in range(self.all_gather_pipeline.num_buckets):
-                self.all_gather_pipeline.async_bucket_gather(bucket_id=bucket_id)
+                self.all_gather_pipeline.async_bucket_gather(bucket_id=bucket_id, bwd=False)
                 group = self.param_and_grad_buffer.parameter_groups[bucket_id]
                 if group.model_weight_buffer is None:
                     continue
@@ -994,9 +1044,10 @@ def start_param_sync(self, *unused, force_sync: bool = False, force_dispatch: bo
                 if group.model_weight_buffer.is_data_distributed:
                     # If model weight is sharded, we wait for the all-gather to complete and
                     # then release the bucket immediately to save memory usage.
-                    self.all_gather_pipeline.wait_bucket_ready(bucket_id)
+                    self.all_gather_pipeline.wait_bucket_ready(bucket_id, False)
+
             for bucket_id in range(self.all_gather_pipeline.num_buckets):
-                self.all_gather_pipeline.wait_bucket_ready(bucket_id)
+                self.all_gather_pipeline.wait_bucket_ready(bucket_id, False)
 
     def start_grad_sync(self, *unused):
         """
diff --git a/megatron/core/distributed/fsdp/src/megatron_fsdp/mixed_precision.py b/megatron/core/distributed/fsdp/src/megatron_fsdp/mixed_precision.py
new file mode 100644
index 00000000000..69a049ad955
--- /dev/null
+++ b/megatron/core/distributed/fsdp/src/megatron_fsdp/mixed_precision.py
@@ -0,0 +1,331 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+from importlib.metadata import version
+from typing import List, Optional, Tuple
+
+import torch
+from packaging.version import Version as PkgVersion
+
+logger = logging.getLogger(__name__)
+
+# Detect if Transformer Engine is installed
+try:
+    import transformer_engine  # pylint: disable=W0611
+    from transformer_engine.pytorch.module.base import TransformerEngineBaseModule
+
+    HAVE_TE = True
+except (ImportError, ModuleNotFoundError):
+    TransformerEngineBaseModule = None
+    HAVE_TE = False
+    logger.info("Using Megatron-FSDP without Transformer Engine.")
+
+# Detect the Transformer Engine version
+try:
+    import transformer_engine as te
+
+    if hasattr(te, "__version__"):
+        TE_VERSION = PkgVersion(str(te.__version__))
+    else:
+        TE_VERSION = PkgVersion(version("transformer-engine"))
+except:
+    TE_VERSION = None
+
+# Detect the FP8 tensor class
+try:
+    from transformer_engine.pytorch.tensor import QuantizedTensor
+
+    HAVE_TE_FP8_TENSOR_CLASS = True
+    FP8_TENSOR_CLASS = QuantizedTensor
+except:
+    try:
+        from transformer_engine.pytorch.float8_tensor import Float8Tensor
+
+        HAVE_TE_FP8_TENSOR_CLASS = True
+        FP8_TENSOR_CLASS = Float8Tensor
+    except:
+        HAVE_TE_FP8_TENSOR_CLASS = False
+
+# Detect the MXFP8 tensor class
+try:
+    from transformer_engine.pytorch.tensor.mxfp8_tensor import MXFP8Tensor
+
+    HAVE_TE_MXFP8TENSOR = True
+except:
+    HAVE_TE_MXFP8TENSOR = False
+
+# Detect the Blockwise FP8 tensor class
+try:
+    from transformer_engine.pytorch.tensor.float8_blockwise_tensor import Float8BlockwiseQTensor
+
+    HAVE_TE_BLOCKWISE_FP8TENSOR = True
+except:
+    HAVE_TE_BLOCKWISE_FP8TENSOR = False
+
+# Detect the "cast_master_weights_to_fp8" function of Transformer Engine
+try:
+    from transformer_engine.pytorch.tensor.utils import cast_master_weights_to_fp8
+
+    HAVE_TE_CAST_MASTER_WEIGHTS_TO_FP8 = True
+except:
+    HAVE_TE_CAST_MASTER_WEIGHTS_TO_FP8 = False
+
+    # Try to import multi_tensor_apply, used in the fallback of fp8 quantization.
+    try:
+        from transformer_engine.pytorch.optimizers import multi_tensor_applier, multi_tensor_scale
+
+        multi_tensor_scale_impl = multi_tensor_scale
+    except ImportError:
+        try:
+            import amp_C
+            from apex.multi_tensor_apply import multi_tensor_applier
+
+            multi_tensor_scale_impl = amp_C.multi_tensor_scale
+        except ImportError:
+            import warnings
+
+            warnings.warn(
+                "Transformer Engine and Apex are not installed. "
+                "Falling back to local implementations of "
+                "multi_tensor_applier and multi_tensor_scale"
+            )
+
+            def local_multi_tensor_applier(op, noop_flag_buffer, tensor_lists, *args):
+                """Multi tensor op applier"""
+                return op(2048 * 32, noop_flag_buffer, tensor_lists, *args)
+
+            def local_multi_tensor_scale(chunk_size, noop_flag, tensor_lists, scale):
+                """Works as a drop-in replacement for amp_C.multi_tensor_scale."""
+                for src, dst in zip(tensor_lists[0], tensor_lists[1]):
+                    dst.copy_(src * scale)
+
+            multi_tensor_applier = local_multi_tensor_applier
+            multi_tensor_scale_impl = local_multi_tensor_scale
+
+    def _multi_tensor_copy_this_to_that(
+        this: List[torch.Tensor],
+        that: List[torch.Tensor],
+        overflow_buf: Optional[torch.Tensor] = None,
+    ):
+        """
+        Use multi-tensor-applier to copy values from one list to another.
+        We don't have a bfloat16 implementation so for now if the overflow_buf
+        is not provided, we default back to simple loop copy to be compatible
+        with bfloat16.
+        """
+        if overflow_buf is not None:
+            overflow_buf.fill_(0)
+            # Scaling with factor `1.0` is equivalent to copy.
+            multi_tensor_applier(multi_tensor_scale_impl, overflow_buf, [this, that], 1.0)
+        else:
+            for this_, that_ in zip(this, that):
+                that_.copy_(this_)
+
+
+# Detect the "post_all_gather_processing" function of Transformer Engine
+try:
+    from transformer_engine.pytorch.tensor.utils import post_all_gather_processing
+
+    HAVE_TE_POST_ALL_GATHER_PROCESSING = True
+except:
+    HAVE_TE_POST_ALL_GATHER_PROCESSING = False
+
+
+def is_te_min_version(vers, check_equality=True):
+    """Check if minimum version of `transformer-engine` is installed."""
+    if not isinstance(TE_VERSION, PkgVersion):
+        return False
+
+    if check_equality:
+        return TE_VERSION >= PkgVersion(vers)
+    else:
+        return TE_VERSION > PkgVersion(vers)
+
+
+def is_float8tensor(tensor: torch.Tensor) -> bool:
+    """Check if a tensor is a FP8 tensor."""
+    return HAVE_TE and isinstance(tensor, FP8_TENSOR_CLASS)
+
+
+def is_blockwise_float8tensor(tensor: torch.Tensor) -> bool:
+    """Check if a tensor is a Blockwise FP8 tensor."""
+    return HAVE_TE_BLOCKWISE_FP8TENSOR and isinstance(tensor, Float8BlockwiseQTensor)
+
+
+def fp8_need_transpose_data(tensor: torch.Tensor) -> bool:
+    """Check if a FP8 tensor needs transpose data."""
+    return HAVE_TE_MXFP8TENSOR and isinstance(tensor, MXFP8Tensor)
+
+
+def fp8_need_transpose_data_for_meta_device_init(module: TransformerEngineBaseModule) -> bool:
+    """Check if a FP8 tensor needs transpose data, for meta device init scenario."""
+    return HAVE_TE_MXFP8TENSOR and module.fp8_meta["recipe"].mxfp8()
+
+
+def fp8_discard_transpose_cache(tensor: torch.Tensor) -> None:
+    """Discard the transpose cache of a FP8 tensor."""
+    assert is_float8tensor(tensor), f"Type {type(tensor)} is not a FP8 tensor"
+
+    if hasattr(tensor, "_transpose_invalid"):
+        tensor._transpose_invalid = True
+        tensor._transpose = None
+    elif not fp8_need_transpose_data(tensor):
+        tensor.update_usage(rowwise_usage=True, columnwise_usage=False)
+
+
+def fp8_create_transpose_cache(tensors: List[torch.Tensor]) -> None:
+    """Create the transpose cache of a FP8 tensor."""
+    if HAVE_TE_POST_ALL_GATHER_PROCESSING:
+        post_all_gather_processing(tensors)
+    else:
+        _fp8_create_transpose_cache_fallback(tensors)
+
+
+def _fp8_create_transpose_cache_fallback(tensors: List[torch.Tensor]) -> None:
+    if not isinstance(tensors, list):
+        tensors = [tensors]
+    for tensor in tensors:
+        assert is_float8tensor(tensor), f"Type {type(tensor)} is not a FP8 tensor"
+        if hasattr(tensor, "_create_transpose"):
+            tensor._create_transpose()
+        else:
+            tensor._create_columnwise()
+
+
+def fp8_set_raw_data(tensor: torch.Tensor, data: torch.Tensor, set_transpose: bool = False) -> None:
+    """Set the raw data of a Transformer Engine Float8Tensor."""
+    assert is_float8tensor(tensor), f"Type {type(tensor)} is not a FP8 tensor"
+
+    if set_transpose:
+        assert fp8_need_transpose_data(tensor), f"Type {type(tensor)} does not need transpose data"
+        data_attr = "_columnwise_data"
+    else:
+        data_attr = "_rowwise_data" if hasattr(tensor, "_rowwise_data") else "_data"
+
+    old_data = getattr(tensor, data_attr)
+    assert old_data.dtype == data.dtype, "The data types of raw data don't match"
+    assert (
+        old_data.shape == data.shape
+    ), f"Shape {old_data.shape} of old_data doesn't match {data.shape} of new_data"
+    setattr(tensor, data_attr, data)
+
+
+def fp8_get_raw_data(tensor: torch.Tensor, get_transpose: bool = False) -> torch.Tensor:
+    """Get the underlying raw storage of a FP8 tensor."""
+    assert is_float8tensor(tensor), f"Type {type(tensor)} is not a FP8 tensor"
+
+    if get_transpose:
+        assert fp8_need_transpose_data(tensor), f"Type {type(tensor)} does not need transpose data"
+        data_attr = "_columnwise_data"
+    else:
+        data_attr = "_rowwise_data" if hasattr(tensor, "_rowwise_data") else "_data"
+
+    return getattr(tensor, data_attr)
+
+
+def fp8_dequantize(tensor: torch.Tensor) -> torch.Tensor:
+    """Dequantize a FP8 tensor to a higher precision."""
+    assert is_float8tensor(tensor), f"Type {type(tensor)} is not a FP8 tensor"
+    assert is_te_min_version(
+        "2.0"
+    ), "Transformer Engine >= 2.0 is required for dequantizing parameters."
+    return tensor.dequantize()
+
+
+def fp8_quantize(
+    model_params: List[torch.Tensor],
+    main_params: List[torch.Tensor],
+    start_offsets: List[int],
+    data_parallel_group: torch.distributed.ProcessGroup,
+    fsdp_shard_model_params: List[Tuple[torch.Tensor, Optional[torch.Tensor]]],
+) -> None:
+    """Quantize sharded parameters to FP8."""
+    if len(model_params) == 0:
+        return
+    fsdp_shard_model_params = [x[0] if x[1] is None else x for x in fsdp_shard_model_params]
+
+    if HAVE_TE_CAST_MASTER_WEIGHTS_TO_FP8:
+        cast_master_weights_to_fp8(
+            model_params, main_params, start_offsets, data_parallel_group, fsdp_shard_model_params
+        )
+    else:
+        _fp8_quantize_fallback(
+            model_params, main_params, start_offsets, data_parallel_group, fsdp_shard_model_params
+        )
+
+
+def _fp8_quantize_fallback(
+    model_params: List[torch.Tensor],
+    main_params: List[torch.Tensor],
+    start_offsets: List[int],
+    data_parallel_group: torch.distributed.ProcessGroup,
+    fsdp_shard_model_params: List[Tuple[torch.Tensor, Optional[torch.Tensor]]],
+) -> None:
+    for model_param, main_param, start_offset, fsdp_shard_model_param in zip(
+        model_params, main_params, start_offsets, fsdp_shard_model_params
+    ):
+        if main_param is None:
+            continue
+
+        if fsdp_shard_model_param is not None:
+            shard_model_param = fsdp_shard_model_param
+        else:
+            shard_model_param = model_param._data.view(-1)[
+                start_offset : start_offset + main_param.numel()
+            ]
+
+        quantizer = model_param._quantizer
+        # When not using fp8 params, the main_param (fp32) is first cast to bf16/fp16, and then
+        # cast to fp8 during forward. This logic keeps numerical consistency with bf16 params.
+        main_param = main_param.to(model_param.dtype)
+        out = Float8Tensor(
+            shape=main_param.size(),
+            dtype=model_param.dtype,
+            requires_grad=False,
+            data=shard_model_param,
+            fp8_scale_inv=model_param._scale_inv,
+            fp8_dtype=model_param._fp8_dtype,
+            quantizer=quantizer,
+        )
+        quantizer.update_quantized(main_param, out)
+
+        amaxes = []
+        scales = []
+        scale_invs = []
+        for model_param in model_params:
+            quantizer = model_param._quantizer
+            amaxes.append(quantizer.amax.view(1))
+            scales.append(quantizer.scale.view(1))
+            scale_invs.append(model_param._scale_inv.view(1))
+            model_param._reset_caches()
+
+        dummy_overflow_buf = torch.tensor([0], dtype=torch.int, device="cuda")
+
+        # Update scaling factors.
+        packed_scales = torch.empty(len(scales), dtype=torch.float32, device=scales[0].device)
+        packed_scale_views = [packed_scales[i].view(1) for i in range(len(scales))]
+        _multi_tensor_copy_this_to_that(scales, packed_scale_views, dummy_overflow_buf)
+        torch.reciprocal(packed_scales, out=packed_scales)
+        _multi_tensor_copy_this_to_that(packed_scale_views, scale_invs, dummy_overflow_buf)
+
+        # Reduce amaxes.
+        # Note: Assume each param has a separate amax.
+        packed_amaxes = torch.empty(len(amaxes), dtype=torch.float32, device=amaxes[0].device)
+        packed_amax_views = [packed_amaxes[i].view(1) for i in range(len(amaxes))]
+        _multi_tensor_copy_this_to_that(amaxes, packed_amax_views, dummy_overflow_buf)
+        torch.distributed.all_reduce(
+            packed_amaxes, op=torch.distributed.ReduceOp.MAX, group=data_parallel_group
+        )
+        _multi_tensor_copy_this_to_that(packed_amax_views, amaxes, dummy_overflow_buf)
diff --git a/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py b/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py
index 88254d89988..b0154cb94e9 100644
--- a/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py
+++ b/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py
@@ -33,6 +33,17 @@
 from torch.distributed.tensor import DTensor, Replicate, Shard
 from torch.distributed.tensor.device_mesh import _mesh_resources
 
+from .mixed_precision import (
+    fp8_discard_transpose_cache,
+    fp8_get_raw_data,
+    fp8_need_transpose_data,
+    fp8_need_transpose_data_for_meta_device_init,
+    fp8_quantize,
+    fp8_set_raw_data,
+    is_blockwise_float8tensor,
+    is_float8tensor,
+    is_te_min_version,
+)
 from .uneven_dtensor import update_uneven_dtensor_chunk_metadata, validate_uneven_dtensor
 from .utils import (
     _MODEL_PARALLEL_RNG_TRACKER_NAME,
@@ -51,27 +62,15 @@
     from megatron.core.distributed.distributed_data_parallel_config import (
         DistributedDataParallelConfig,
     )
-    from megatron.core.fp8_utils import (
-        is_float8tensor,
-        modify_underlying_storage,
-        quantize_param_shard,
-    )
     from megatron.core.tensor_parallel import get_cuda_rng_tracker
-    from megatron.core.utils import is_submodule, is_te_min_version
+    from megatron.core.utils import is_submodule
 
     logger.info("Detected Megatron Core, using Megatron-FSDP with Megatron.")
 
 except ImportError:
     # Megatron-LM is not installed, use Megatron-FSDP as a standalone module.
     from .distributed_data_parallel_config import DistributedDataParallelConfig
-    from .utils import (
-        get_cuda_rng_tracker,
-        is_float8tensor,
-        is_submodule,
-        is_te_min_version,
-        modify_underlying_storage,
-        quantize_param_shard,
-    )
+    from .utils import get_cuda_rng_tracker, is_submodule
 
     logger.info("Megatron Core is not installed, Megatron-FSDP will run without Megatron Core.")
 
@@ -817,7 +816,7 @@ def __init__(
         data_parallel_group: Optional[torch.distributed.ProcessGroup] = None,
         dp_rank: Optional[int] = None,
         temporary_bucket_allocator: Optional[TemporaryBucketAllocator] = None,
-        is_dtype_float8: bool = False,
+        is_transpose_buffer: bool = False,
         gradient_scaling_factor: Optional[float] = None,
         chunk_size_factor: int = 1,
         mem_alloc_context: Optional[Callable] = None,
@@ -850,7 +849,7 @@ def __init__(
         self.temporary_bucket_allocator = (
             temporary_bucket_allocator if temporary_bucket_allocator else TemporaryBucketAllocator()
         )
-        self.is_dtype_float8 = is_dtype_float8
+        self.is_transpose_buffer = is_transpose_buffer
         self.gradient_scaling_factor = gradient_scaling_factor
         self.mem_alloc_context = mem_alloc_context if mem_alloc_context else nullcontext
 
@@ -946,11 +945,11 @@ def fetch_bucket(
             for p in self.params:
                 item_id = self.param_idx[p]
                 p = to_local_if_dtensor(p)
+                data = self.get_item_from_bucket(bucket, item_id).view(p.shape)
                 if is_float8tensor(p):
-                    p._data = self.get_item_from_bucket(bucket, item_id).view(p.shape)
+                    fp8_set_raw_data(p, data, self.is_transpose_buffer)
                 else:
-                    p.data = self.get_item_from_bucket(bucket, item_id).view(p.shape)
-
+                    p.data = data
         return bucket
 
     def free_bucket_storage(self):
@@ -1119,6 +1118,9 @@ def set_item(self, item_id: int, item_data: torch.Tensor) -> None:
         # When fully sharded, we need to get the slice of the item to be stored in this shard.
         # Otherwise, we can just flatten the entire item since this buffer contains
         # the entire bucket.
+        if is_float8tensor(item_data):
+            item_data = fp8_get_raw_data(item_data, self.is_transpose_buffer)
+
         if self.is_data_distributed:
             # Get the coordinates of the slice of the item that is contained in this shard.
             slice_start, slice_end = self._get_item_slice_in_shard(item_id)
@@ -1225,6 +1227,8 @@ class ParameterGroup:
             Factor determining chunk size for grouped parameter processing.
         model_weight_buffer (Optional[DataParallelBuffer]):
             Buffer used to store model weights for data-parallel operations.
+        transpose_weight_buffer (Optional[DataParallelBuffer]):
+            Buffer used to store transpose weights for data-parallel operations.
         main_weight_buffer (Optional[DataParallelBuffer]):
             Buffer used to store main model weights for data-parallel operations.
         main_grad_buffer (Optional[DataParallelBuffer]):
@@ -1244,6 +1248,7 @@ class ParameterGroup:
     fsdp_unit_id: Optional[int] = None
     chunk_size_factor: int = 1
     model_weight_buffer: Optional[DataParallelBuffer] = None
+    transpose_weight_buffer: Optional[DataParallelBuffer] = None
     main_weight_buffer: Optional[DataParallelBuffer] = None
     main_grad_buffer: Optional[DataParallelBuffer] = None
     hsdp_wbuf: Optional[DataParallelBuffer] = None
@@ -1314,12 +1319,10 @@ def _does_param_require_new_bucket(param):
     parameter_groups = []
     for name, param in module.named_parameters():
         # We need this information to correctly dynamically allocate Tensors!
+        is_fp8 = is_float8tensor(param)
+        is_fp8_meta_device_init = meta_device_init_fp8_params.get(name, (False, False))[0]
         param_attrs = dict(
-            dtype=(
-                "float8"
-                if is_float8tensor(param) or meta_device_init_fp8_params.get(name, False)
-                else param.dtype
-            ),
+            dtype="float8" if (is_fp8 or is_fp8_meta_device_init) else param.dtype,
             is_expert_param=is_expert_parameter(name, param),
             requires_grad=param.requires_grad,
             fsdp_unit_id=None,
@@ -1641,7 +1644,10 @@ def __init__(
                     # to determine whether this parameter is fp8 or not.
                     fp8_meta_index = m.param_init_meta[name].fp8_meta_index
                     if m.primary_weights_in_fp8 and fp8_meta_index is not None:
-                        meta_device_init_fp8_params[self.param_to_name[param]] = True
+                        meta_device_init_fp8_params[self.param_to_name[param]] = (
+                            True,
+                            fp8_need_transpose_data_for_meta_device_init(m),
+                        )
 
         # Get the parameter groups.
         (self.parameter_groups, self.param_to_param_group, self.bucket_to_bucket_group) = (
@@ -1725,6 +1731,7 @@ def _bytes_to_mb(bytes_val: int) -> str:
             numel = sum(to_local_if_dtensor(p).shape.numel() for p in group.params)
             buffers = {
                 "weight": group.model_weight_buffer,
+                "transpose_weight": group.transpose_weight_buffer,
                 "main_weight": group.main_weight_buffer,
                 "grad": group.main_grad_buffer,
             }
@@ -1794,12 +1801,18 @@ def _init_each_parameter_group_buffers(self, meta_device_init_fp8_params):
             self.weight_alloc = FixedPoolAllocator(
                 name="fsdp_params", fsdp_param_groups=self.parameter_groups, size=UB_BUFFER_NUM
             )
+            self.transpose_weight_alloc = FixedPoolAllocator(
+                name="fsdp_fp8_transpose_params",
+                fsdp_param_groups=self.parameter_groups,
+                size=UB_BUFFER_NUM,
+            )
             self.main_grad_alloc = FixedPoolAllocator(
                 name="fsdp_grads", fsdp_param_groups=self.parameter_groups, size=UB_BUFFER_NUM
             )
             self.double_buf_units = self.weight_alloc.fsdp_double_buffer_units
         else:
             self.weight_alloc = StorageResizeBasedBucketAllocator()
+            self.transpose_weight_alloc = StorageResizeBasedBucketAllocator()
             self.main_grad_alloc = None
             self.double_buf_units = []
 
@@ -1839,8 +1852,9 @@ def _init_each_parameter_group_buffers(self, meta_device_init_fp8_params):
             )
             # Check if the parameter group is FP8.
             one_param = group.params[0]
-            is_dtype_float8 = is_float8tensor(one_param) or meta_device_init_fp8_params.get(
-                self.param_to_name[one_param], False
+            is_dtype_float8 = (
+                is_float8tensor(one_param)
+                or meta_device_init_fp8_params.get(self.param_to_name[one_param], (False, False))[0]
             )
             if is_dtype_float8:
                 param_dtype = torch.uint8
@@ -1849,6 +1863,16 @@ def _init_each_parameter_group_buffers(self, meta_device_init_fp8_params):
                 param_dtype = group.params[0].dtype
                 grad_dtype = param_dtype
 
+            # Check if the parameter group needs a transpose buffer for model weights.
+            # Currently, only mxfp8 needs it.
+            need_transpose_data = is_float8tensor(one_param) and fp8_need_transpose_data(one_param)
+            need_transpose_data_for_meta_device_init = meta_device_init_fp8_params.get(
+                self.param_to_name[one_param], (False, False)
+            )[1]
+            should_create_transpose_weight_buffer = (
+                need_transpose_data or need_transpose_data_for_meta_device_init
+            )
+
             # Check if the parameter group requires a grad buffer or main weight buffer.
             should_create_grad_buffer_or_main_weight_buffer = (
                 not self.only_create_grad_buffer_and_main_weight_buffer_for_param_requires_grad
@@ -1865,13 +1889,29 @@ def _init_each_parameter_group_buffers(self, meta_device_init_fp8_params):
                     dtype=param_dtype,
                     device=self.device,
                     data_parallel_group=main_buf_dp_group,
-                    is_dtype_float8=is_dtype_float8,
+                    is_transpose_buffer=False,
                     temporary_bucket_allocator=self.weight_alloc,
                     bucket_id=group_id,
                     chunk_size_factor=group.chunk_size_factor,
                     mem_alloc_context=self.mem_alloc_context,
                     **main_buf_extra_kwargs,
                 )
+                if should_create_transpose_weight_buffer:
+                    group.transpose_weight_buffer = DataParallelBuffer(
+                        self.ddp_config,
+                        group.params,
+                        is_data_distributed=is_model_weight_buffer_distributed
+                        and main_buf_dp_group.size() > 1,
+                        dtype=param_dtype,
+                        device=self.device,
+                        data_parallel_group=main_buf_dp_group,
+                        is_transpose_buffer=True,
+                        temporary_bucket_allocator=self.transpose_weight_alloc,
+                        bucket_id=group_id,
+                        chunk_size_factor=group.chunk_size_factor,
+                        mem_alloc_context=self.mem_alloc_context,
+                        **main_buf_extra_kwargs,
+                    )
 
             # Initialize the main weight buffer.
             if should_create_grad_buffer_or_main_weight_buffer and preserve_fp32_weights:
@@ -1903,7 +1943,7 @@ def _init_each_parameter_group_buffers(self, meta_device_init_fp8_params):
                     dtype=torch.float32 if grad_reduce_in_fp32 else grad_dtype,
                     device=self.device,
                     data_parallel_group=main_buf_dp_group,
-                    is_dtype_float8=False,
+                    is_transpose_buffer=False,
                     temporary_bucket_allocator=self.main_grad_alloc,
                     gradient_scaling_factor=gradient_scaling_factor,
                     bucket_id=group_id,
@@ -1927,7 +1967,7 @@ def _init_each_parameter_group_buffers(self, meta_device_init_fp8_params):
                     dtype=wbuf.dtype,
                     device=wbuf.device,
                     data_parallel_group=hsdp_buf_dp_group,
-                    is_dtype_float8=wbuf.is_dtype_float8,
+                    is_transpose_buffer=False,
                     temporary_bucket_allocator=self.weight_alloc,
                     bucket_id=group_id,
                     chunk_size_factor=group.chunk_size_factor,
@@ -1943,6 +1983,9 @@ def _init_each_parameter_group_buffers(self, meta_device_init_fp8_params):
                     ),
                 )
 
+                if group.transpose_weight_buffer is not None:
+                    raise NotImplementedError("HSDP for transpose buffer is not implemented yet")
+
                 if should_create_grad_buffer_or_main_weight_buffer:
                     # Initialize the HSDP grad buffer.
                     gbuf = group.main_grad_buffer
@@ -1954,7 +1997,7 @@ def _init_each_parameter_group_buffers(self, meta_device_init_fp8_params):
                         dtype=gbuf.dtype,
                         device=gbuf.device,
                         data_parallel_group=hsdp_buf_dp_group,
-                        is_dtype_float8=gbuf.is_dtype_float8,
+                        is_transpose_buffer=False,
                         temporary_bucket_allocator=self.main_grad_alloc,
                         gradient_scaling_factor=gradient_scaling_factor,
                         bucket_id=group_id,
@@ -2037,6 +2080,20 @@ def _init_each_parameter_group_buffers(self, meta_device_init_fp8_params):
                             torch.empty(wbuf.data_size, dtype=wbuf.dtype, device=self.device)
                         )
                 bucket = wbuf.fetch_bucket()
+
+            tbuf = group.transpose_weight_buffer
+            if tbuf:
+                with self.mem_alloc_context():
+                    if group.hsdp_wbuf:
+                        raise NotImplementedError(
+                            "HSDP for transpose buffer is not implemented yet"
+                        )
+                    else:
+                        tbuf.init_data(
+                            torch.empty(tbuf.data_size, dtype=tbuf.dtype, device=self.device)
+                        )
+                transpose_bucket = tbuf.fetch_bucket()
+
             mbuf = group.main_weight_buffer
             if mbuf:
                 # Manually instantiate an empty tensor into the main weight buffer.
@@ -2090,25 +2147,41 @@ def _init_each_parameter_group_buffers(self, meta_device_init_fp8_params):
                             if not self.ddp_config.keep_fp8_transpose_cache:
                                 for _param in m.parameters(recurse=False):
                                     if is_float8tensor(_param):
-                                        _param._transpose_invalid = True
-                                        _param._transpose = None
+                                        fp8_discard_transpose_cache(_param)
                     # Raise error if a meta parameter still exists after initialization.
                     assert not p.is_meta, (self.param_to_name[p], module_reset_flag)
 
+                    p_local = to_local_if_dtensor(p)
+
                     # Copy the model weight parameter tensor into the buffer.
                     # When distributed, this shards and preserves the data across all ranks.
-                    wbuf.set_item(item_id, to_local_if_dtensor(p))
+                    wbuf.set_item(item_id, p_local)
+                    if tbuf:
+                        tbuf.set_item(item_id, p_local)
 
                     # Retrieve the newly allocated parameter data from the global bucket.
                     # Attach the bucket-allocated parameter data to the module parameter,
                     # to use the bucket-allocated data for autograd and NCCL.
-                    new_param_data = wbuf.get_item_from_bucket(bucket, item_id).view(
-                        to_local_if_dtensor(p).shape
-                    )
-                    if is_float8tensor(p):
-                        # Needed to instantiate FP8 parameters. Requires installing
-                        # TransformerEngine.
-                        modify_underlying_storage(p, new_param_data)
+                    new_param_data = wbuf.get_item_from_bucket(bucket, item_id).view(p_local.shape)
+                    if tbuf:
+                        new_transpose_data = tbuf.get_item_from_bucket(
+                            transpose_bucket, item_id
+                        ).view(p_local.shape)
+                    else:
+                        new_transpose_data = None
+
+                    if is_float8tensor(p_local):
+                        old_param_data = fp8_get_raw_data(p_local)
+                        assert old_param_data._base is None
+                        new_param_data.detach().copy_(old_param_data)
+                        fp8_set_raw_data(p_local, new_param_data)
+                        del old_param_data
+                        if new_transpose_data is not None:
+                            old_transpose_data = fp8_get_raw_data(p_local, True)
+                            assert old_transpose_data._base is None
+                            new_transpose_data.detach().copy_(old_transpose_data)
+                            fp8_set_raw_data(p_local, new_transpose_data, True)
+                            del old_transpose_data
                     elif isinstance(p, DTensor):
                         old_param_data = p._local_tensor.data
                         p._local_tensor.data = new_param_data
@@ -2146,7 +2219,12 @@ def _init_each_parameter_group_buffers(self, meta_device_init_fp8_params):
                         # the (high-precision) main weight buffer.
                         # Nothing else needs to be done, because the main weights
                         # do not require autograd operations, only possibly sharding.
-                        mbuf.set_item(item_id, to_local_if_dtensor(p))
+                        p_local = to_local_if_dtensor(p)
+                        assert not is_float8tensor(p_local), (
+                            self.param_to_name[p],
+                            "fp8 param should use get_high_precision_init_val method.",
+                        )
+                        mbuf.set_item(item_id, p_local)
 
             if wbuf and wbuf.is_data_distributed:
                 # Free the memory backing the temporarily-allocated bucket associated
@@ -2158,6 +2236,9 @@ def _init_each_parameter_group_buffers(self, meta_device_init_fp8_params):
                 # before forward activations and gradients are allocated in training.
                 wbuf.free_bucket_storage()
 
+            if tbuf and tbuf.is_data_distributed:
+                tbuf.free_bucket_storage()
+
         # Allocate the main_weight buffer and main_grad buffer data in one buffer.
         if self.buffer_all_in_one:
             with self.mem_alloc_context():
@@ -2281,6 +2362,7 @@ def _reset_parameters(self, old_params, new_params):
                 group.params[item_id] = new_p
                 for buf in [
                     group.model_weight_buffer,
+                    group.transpose_weight_buffer,
                     group.main_weight_buffer,
                     group.main_grad_buffer,
                     group.hsdp_wbuf,
@@ -2328,6 +2410,7 @@ def _init_distributed_params(self):
         dist_main_weight = {}
         for pg in self.parameter_groups:
             wbuf = pg.model_weight_buffer
+            tbuf = pg.transpose_weight_buffer
             mbuf = pg.main_weight_buffer
             for item_id, orig_param in enumerate(pg.params):
                 param_name = self.param_to_name[orig_param]
@@ -2355,6 +2438,7 @@ def _init_distributed_params(self):
                     )
                     dist_main_weight[param_name] = dist_param
                 elif wbuf:
+                    assert tbuf is None, "Transpose buffer should only exist when main params exist"
                     dist_param = make_fsdp_dtensor(
                         local_tensor=wbuf.get_item(item_id, only_shard=sharded_optimizer_state),
                         param=orig_param,
@@ -2524,9 +2608,54 @@ def copy_main_weights_to_model_weights(self):
         expert_param_quantize_kwargs = copy.deepcopy(dense_param_quantize_kwargs)
         data_parallel_group = None
         expert_data_parallel_group = None
+        clear_quantize_kwargs = lambda kwargs: [d.clear() for d in kwargs.values()]
+
+        def _fp8_quantize_params(dense_param_quantize_kwargs, expert_param_quantize_kwargs):
+            if len(dense_param_quantize_kwargs["model_params"]) > 0:
+                # If we have FP8 parameters, we need to quantize them.
+                fp8_quantize(data_parallel_group=data_parallel_group, **dense_param_quantize_kwargs)
+
+            if len(expert_param_quantize_kwargs["model_params"]) > 0:
+                # If we have FP8 expert parameters, we need to quantize them.
+                fp8_quantize(
+                    data_parallel_group=expert_data_parallel_group, **expert_param_quantize_kwargs
+                )
+
+            clear_quantize_kwargs(dense_param_quantize_kwargs)
+            clear_quantize_kwargs(expert_param_quantize_kwargs)
+
+        # Special handling of blockwise FP8
+        BATCH_QUANT_MEMORY_LIMIT_BYTES = 5 * 1024**3  # 5 GB
+        blockwise_fp8_weight_buffers = []
+        blockwise_fp8_param_buffers = []
+
+        def _batch_quantize_blockwise_fp8_params(
+            dense_param_quantize_kwargs, expert_param_quantize_kwargs, blockwise_fp8_param_buffers
+        ):
+            if len(blockwise_fp8_param_buffers) == 0:
+                return
+
+            # Copy original param shards into their blockwise FP8 working buffers
+            for bufs in blockwise_fp8_param_buffers:
+                bufs["bucket_param"].copy_(bufs["param"])
+
+            # Apply FP8 quantization to blockwise FP8 parameters
+            _fp8_quantize_params(dense_param_quantize_kwargs, expert_param_quantize_kwargs)
+
+            # Copy quantized params back from working buffers to original param tensors
+            for bufs in blockwise_fp8_param_buffers:
+                bufs["param"].copy_(bufs["bucket_param"])
+            blockwise_fp8_param_buffers.clear()
+
+            # Free bucket storage for blockwise FP8 weight buffers
+            for wbuf in blockwise_fp8_weight_buffers:
+                wbuf.free_bucket_storage()
+            blockwise_fp8_weight_buffers.clear()
+
         for pg in self.parameter_groups:
             mbuf = pg.main_weight_buffer
             wbuf = pg.model_weight_buffer
+            tbuf = pg.transpose_weight_buffer
             if mbuf is None:
                 continue
 
@@ -2542,44 +2671,88 @@ def copy_main_weights_to_model_weights(self):
             shard_offsets_in_fp8 = quantize_func_kwargs["start_offsets"]
             shard_model_params = quantize_func_kwargs["fsdp_shard_model_params"]
 
+            has_blockwise_fp8_param = False
             for param in pg.params:
                 item_id = mbuf.param_idx[param]
                 if wbuf:
                     if wbuf.is_data_distributed or mbuf.is_data_distributed:
                         model_param = wbuf.get_item(item_id, only_shard=True)
+                        if tbuf:
+                            transpose_param = tbuf.get_item(item_id, only_shard=True)
+                        else:
+                            transpose_param = None
                         main_weight = mbuf.get_item(item_id, only_shard=True)
                     else:
                         model_param = wbuf.get_item(item_id)
+                        if tbuf:
+                            transpose_param = tbuf.get_item(item_id)
+                        else:
+                            transpose_param = None
                         main_weight = mbuf.get_item(item_id)
                 else:
                     assert not mbuf.is_data_distributed
                     model_param = to_local_if_dtensor(param)
                     main_weight = mbuf.get_item(item_id)
 
+                if is_blockwise_float8tensor(param):
+                    fp8_params.append(param)
+                    if model_param.numel() == 0:
+                        shard_fp32_from_fp8.append(None)
+                        shard_offsets_in_fp8.append(None)
+                        shard_model_params.append([None, None])
+                    else:
+                        shard_fp32_from_fp8.append(main_weight)
+                        shard_offsets_in_fp8.append(wbuf.locate_item_in_global_item(item_id)[0])
+                        bucket = wbuf.fetch_bucket()
+                        b_model_param = wbuf.get_item_from_bucket(bucket, item_id)[
+                            slice(*wbuf.locate_item_in_global_item(item_id))
+                        ]
+                        assert (
+                            transpose_param is None
+                        ), "Blockwise FP8 does not support transpose param."
+                        shard_model_params.append([b_model_param, None])
+                        assert b_model_param.numel() == model_param.numel(), (
+                            f"Blockwise FP8 bucket param numel {b_model_param.numel()} does"
+                            f" not match model param numel {model_param.numel()}"
+                            f" name: {self.param_to_name[param]}"
+                        )
+                        blockwise_fp8_param_buffers.append(
+                            {"bucket_param": b_model_param, "param": model_param}
+                        )
+                        has_blockwise_fp8_param = True
+                    continue
+
                 if is_float8tensor(param):
                     fp8_params.append(param)
                     if model_param.numel() == 0:
                         shard_fp32_from_fp8.append(None)
                         shard_offsets_in_fp8.append(None)
-                        shard_model_params.append(None)
+                        shard_model_params.append([None, None])
                     else:
                         shard_fp32_from_fp8.append(main_weight)
                         shard_offsets_in_fp8.append(wbuf.locate_item_in_global_item(item_id)[0])
-                        shard_model_params.append(model_param)
+                        shard_model_params.append([model_param, transpose_param])
                     continue
 
                 if model_param.numel() > 0:
                     model_param.data.copy_(main_weight.view(model_param.shape))
 
-        if len(dense_param_quantize_kwargs["model_params"]) > 0:
-            # If we have FP8 parameters, we need to quantize them.
-            dense_param_quantize_kwargs["data_parallel_group"] = data_parallel_group
-            quantize_param_shard(**dense_param_quantize_kwargs)
+            if has_blockwise_fp8_param:
+                blockwise_fp8_weight_buffers.append(wbuf)
+                if (
+                    sum([wbuf.bucket_index.size for wbuf in blockwise_fp8_weight_buffers])
+                    > BATCH_QUANT_MEMORY_LIMIT_BYTES
+                ):
+                    _batch_quantize_blockwise_fp8_params(
+                        dense_param_quantize_kwargs,
+                        expert_param_quantize_kwargs,
+                        blockwise_fp8_param_buffers,
+                    )
 
-        if len(expert_param_quantize_kwargs["model_params"]) > 0:
-            # If we have FP8 expert parameters, we need to quantize them.
-            expert_param_quantize_kwargs["data_parallel_group"] = expert_data_parallel_group
-            quantize_param_shard(**expert_param_quantize_kwargs)
+        _batch_quantize_blockwise_fp8_params(
+            dense_param_quantize_kwargs, expert_param_quantize_kwargs, blockwise_fp8_param_buffers
+        )
+        _fp8_quantize_params(dense_param_quantize_kwargs, expert_param_quantize_kwargs)
 
     @torch.no_grad()
     def copy_model_weights_to_main_weights(self):
@@ -2597,6 +2770,7 @@ def copy_model_weights_to_main_weights(self):
                 f"Master weight buffer size {mbuf.data.numel()} does not match "
                 f"model weight buffer size {copyin_data.numel()}"
             )
+            # TODO(mxfp8): Make sure it's not a fp8 buf?
             mbuf.data.copy_(copyin_data.data)
 
     def all_gather_parameters(self, async_op: bool = True):
@@ -2614,15 +2788,18 @@ def all_gather_parameters(self, async_op: bool = True):
 
         all_gather_ops = []
         for g in self.parameter_groups:
-            shard = g.model_weight_buffer.get_shard_from_local_buffer()
-            all_gather_handler = torch.distributed.all_gather_into_tensor(
-                output_tensor=g.model_weight_buffer.data,
-                input_tensor=shard,
-                group=g.model_weight_buffer.data_parallel_group,
-                async_op=async_op,
-            )
-            if async_op:
-                all_gather_ops.append(all_gather_handler)
+            for buf in [g.model_weight_buffer, g.transpose_weight_buffer]:
+                if buf is None:
+                    continue
+                shard = buf.get_shard_from_local_buffer()
+                all_gather_handler = torch.distributed.all_gather_into_tensor(
+                    output_tensor=buf.data,
+                    input_tensor=shard,
+                    group=buf.data_parallel_group,
+                    async_op=async_op,
+                )
+                if async_op:
+                    all_gather_ops.append(all_gather_handler)
 
         for op in all_gather_ops:
             op.wait()
@@ -2643,7 +2820,7 @@ def reduce_scatter_gradients(self, async_op: bool = True):
         reduce_scatter_ops = []
         for g in self.parameter_groups:
             gbuf = g.main_grad_buffer
-            if gbuf is not None:
+            if gbuf is None:
                 continue
             scaling_factor = gbuf.gradient_scaling_factor
             reduce_op = gradient_reduce_preprocessing(gbuf.data, scaling_factor, self.ddp_config)
@@ -3093,9 +3270,16 @@ def __init__(
         # Track the status of all-gather operations for each bucket.
         self.param_gather_event_map = {}
         # All buckets are initially deallocated / empty after initialization of ParamAndGradBuffer.
-        self.bucket_status = {i: BucketStatus.EMPTY for i in range(self.buffer.num_buckets)}
+        self.bucket_status = {}
+        for i in range(self.buffer.num_buckets):
+            for bwd in [False, True]:
+                self.bucket_status[self.get_bucket_key(i, bwd)] = BucketStatus.EMPTY
+
         # Track whether each bucket can be deallocated.
-        self.bucket_can_be_released = {i: False for i in range(self.buffer.num_buckets)}
+        self.bucket_can_be_released = {}
+        for i in range(self.buffer.num_buckets):
+            for bwd in [False, True]:
+                self.bucket_can_be_released[self.get_bucket_key(i, bwd)] = False
 
         # Map each bucket to the bucket group it belongs to by enumerated ID.
         # Made to collect a subset of buckets in the same bucket group.
@@ -3120,6 +3304,13 @@ def __init__(
             # all-gather parameters across groups.
             self.outer_fsdp_group_param_gather_stream = torch.cuda.Stream()
 
+    def get_bucket_key(self, bucket_id, bwd):
+        """Get the key for the bucket."""
+        has_transpose_buffer = (
+            self.buffer.parameter_groups[bucket_id].transpose_weight_buffer is not None
+        )
+        return (bucket_id, has_transpose_buffer and bwd)
+
     @property
     def num_buckets(self):
         """Return the number of buckets."""
@@ -3136,10 +3327,11 @@ def reset(self):
                 UserWarning,
             )
             while len(self.param_gather_event_map) > 0:
-                bucket_id = next(iter(self.param_gather_event_map))
-                self.wait_bucket_ready(bucket_id)
+                (bucket_id, bwd) = next(iter(self.param_gather_event_map))
+                self.wait_bucket_ready(bucket_id, bwd)
         for bucket_id in range(self.num_buckets):
-            self.bucket_can_be_released[bucket_id] = True
+            for bwd in [False, True]:
+                self.bucket_can_be_released[self.get_bucket_key(bucket_id, bwd)] = True
         self.recycle_unused_buckets()
 
         assert all([status is BucketStatus.EMPTY for status in self.bucket_status.values()]), (
@@ -3161,6 +3353,7 @@ def all_gather_params(
         suggested_AG_prefetch_size: Optional[int] = None,
         async_param_gather: bool = True,
         outer_fsdp_group_param_gather: bool = False,
+        bwd: bool = False,
     ):
         """All-gather the params. If prefetch is enabled, prefetch next buckets
         in the order of `prefetch_order`.
@@ -3195,7 +3388,7 @@ def all_gather_params(
 
         # Do not release the buckets that are being all-gathered.
         for bucket_id in ag_buckets:
-            self.bucket_can_be_released[bucket_id] = False
+            self.bucket_can_be_released[self.get_bucket_key(bucket_id, bwd)] = False
 
         # If prefetch is enabled, we will add prefetch buckets to ag_buckets.
         if prefetch:
@@ -3267,7 +3460,11 @@ def need_skip_prefetch(bucket_id):
                 bucket_id = next_bucket_id(ag_buckets)
 
         # Only all-gather on buckets that have not been allocated yet.
-        ag_buckets = [i for i in ag_buckets if self.bucket_status[i] == BucketStatus.EMPTY]
+        ag_buckets = [
+            bucket_id
+            for bucket_id in ag_buckets
+            if self.bucket_status[self.get_bucket_key(bucket_id, bwd)] == BucketStatus.EMPTY
+        ]
         if len(ag_buckets) == 0:
             return
 
@@ -3286,6 +3483,7 @@ def need_skip_prefetch(bucket_id):
                 self.ag_stream if self.ag_stream is not None else torch.cuda.current_stream()
             )
             if outer_fsdp_group_param_gather:
+                # TODO(mxfp8): Support hsdp
                 self.outer_fsdp_group_param_gather_stream.wait_stream(torch.cuda.current_stream())
                 with torch.cuda.stream(self.outer_fsdp_group_param_gather_stream):
                     outer_fsdp_group = self.buffer.dist_index.get_outer_fsdp_group()
@@ -3313,12 +3511,13 @@ def need_skip_prefetch(bucket_id):
                     for bucket_id in buckets:
                         # All-gather the module weights from each FSDP buffer shard
                         # into an allocated bucket containing unsharded weights.
-                        self.async_bucket_gather(bucket_id)
+                        self.async_bucket_gather(bucket_id, bwd)
 
             # Replace the parameter all-gather event with coalescing event.
             for bucket_id in buckets:
-                _, mark_bucket_ready_to_use = self.param_gather_event_map[bucket_id]
-                self.param_gather_event_map[bucket_id] = (
+                bucket_key = self.get_bucket_key(bucket_id, bwd)
+                _, mark_bucket_ready_to_use = self.param_gather_event_map[bucket_key]
+                self.param_gather_event_map[bucket_key] = (
                     coalescing_event,
                     mark_bucket_ready_to_use,
                 )
@@ -3326,14 +3525,16 @@ def need_skip_prefetch(bucket_id):
         # Wait for all-gather to finish
         if not async_param_gather:
             for bucket_id in buckets:
-                self.wait_bucket_ready(bucket_id)
+                self.wait_bucket_ready(bucket_id, bwd)
 
-    def wait_bucket_ready(self, bucket_id, empty_ok=False):
+    def wait_bucket_ready(self, bucket_id, bwd, empty_ok=False):
         """Wait for the bucket to be ready."""
-        if self.bucket_status[bucket_id] == BucketStatus.READY_TO_USE:
+        bucket_key = self.get_bucket_key(bucket_id, bwd)
+
+        if self.bucket_status[bucket_key] == BucketStatus.READY_TO_USE:
             # Already ready to use.
             return
-        if self.bucket_status[bucket_id] == BucketStatus.EMPTY:
+        if self.bucket_status[bucket_key] == BucketStatus.EMPTY:
             if empty_ok:
                 return
             # Bucket shouldn't be empty, this implies that the bucket
@@ -3341,48 +3542,64 @@ def wait_bucket_ready(self, bucket_id, empty_ok=False):
             raise ValueError(f"Bucket {bucket_id} is empty.")
 
         # Wait for asynchronous / overlapped NCCL operations to complete.
-        param_gather_event, mark_bucket_ready_to_use = self.param_gather_event_map.pop(bucket_id)
+        param_gather_event, mark_bucket_ready_to_use = self.param_gather_event_map.pop(bucket_key)
         param_gather_event.wait()
         mark_bucket_ready_to_use()
 
     @torch.no_grad()
-    def release_bucket(self, bucket_id: int):
+    def release_bucket(self, bucket_id, bwd):
         """Release the bucket."""
-        if self.bucket_status[bucket_id] == BucketStatus.EMPTY:
+        # TODO(mxfp8): In some cases, there won't be ag before bwd?
+        bucket_key = self.get_bucket_key(bucket_id, bwd)
+
+        if self.bucket_status[bucket_key] == BucketStatus.EMPTY:
             return
 
-        self.wait_bucket_ready(bucket_id, empty_ok=True)
-        if self.bucket_status[bucket_id] == BucketStatus.COMMUNICATING:
+        self.wait_bucket_ready(bucket_id, bwd, empty_ok=True)
+        if self.bucket_status[bucket_key] == BucketStatus.COMMUNICATING:
             raise ValueError(f"Bucket {bucket_id} is communicating.")
 
-        wbuf = self.buffer.parameter_groups[bucket_id].model_weight_buffer
-        wbuf.free_bucket_storage()
-        self.bucket_status[bucket_id] = BucketStatus.EMPTY
+        if bwd and self.buffer.parameter_groups[bucket_id].transpose_weight_buffer is not None:
+            buf = self.buffer.parameter_groups[bucket_id].transpose_weight_buffer
+        else:
+            buf = self.buffer.parameter_groups[bucket_id].model_weight_buffer
+
+        buf.free_bucket_storage()
+        self.bucket_status[bucket_key] = BucketStatus.EMPTY
 
     def recycle_unused_buckets(self):
         """Recycle the unused buckets."""
-        for bucket_id, can_be_released in self.bucket_can_be_released.items():
+        for bucket_key, can_be_released in self.bucket_can_be_released.items():
             if can_be_released:
-                self.release_bucket(bucket_id)
-                self.bucket_can_be_released[bucket_id] = False
+                bucket_id, is_transpose_weight = bucket_key[0], bucket_key[1]
+                self.release_bucket(bucket_id, is_transpose_weight)
+                self.bucket_can_be_released[bucket_key] = False
 
-    def get_fsdp_buffer(self, bucket_id: int) -> DataParallelBuffer:
+    def get_fsdp_buffer(self, bucket_id: int, bwd=False) -> DataParallelBuffer:
         """Get the FSDP buffer with the given bucket ID."""
         param_group = self.buffer.parameter_groups[bucket_id]
         if self.buffer.ddp_config.outer_dp_sharding_strategy != "no_shard":
-            return param_group.hsdp_wbuf
-        return param_group.model_weight_buffer
+            if bwd and param_group.transpose_weight_buffer is not None:
+                raise RuntimeError("Transpose buffer is not supported for HSDP")
+            else:
+                return param_group.hsdp_wbuf
+        if bwd and param_group.transpose_weight_buffer is not None:
+            return param_group.transpose_weight_buffer
+        else:
+            return param_group.model_weight_buffer
 
     @torch.no_grad()
-    def async_bucket_gather(self, bucket_id: int) -> None:
+    def async_bucket_gather(self, bucket_id, bwd) -> None:
         """All-gather the bucket and set the items."""
-        self.bucket_can_be_released[bucket_id] = False
-        if self.bucket_status[bucket_id] != BucketStatus.EMPTY:
+        bucket_key = self.get_bucket_key(bucket_id, bwd)
+
+        self.bucket_can_be_released[bucket_key] = False
+        if self.bucket_status[bucket_key] != BucketStatus.EMPTY:
             return
 
-        self.bucket_status[bucket_id] = BucketStatus.COMMUNICATING
+        self.bucket_status[bucket_key] = BucketStatus.COMMUNICATING
 
-        wbuf = self.get_fsdp_buffer(bucket_id)
+        wbuf = self.get_fsdp_buffer(bucket_id, bwd)
 
         # Lazy release the unused buckets.
         self.recycle_unused_buckets()
@@ -3397,18 +3614,21 @@ def async_bucket_gather(self, bucket_id: int) -> None:
             async_op=True,
         )
 
-        def get_closure(bucket_id):
+        def get_closure(bucket_id, bwd):
             @torch.no_grad()
             def mark_bucket_ready_to_use():
                 # Mark the bucket as ready to use - all NCCL operations are complete.
-                self.bucket_status[bucket_id] = BucketStatus.READY_TO_USE
+                self.bucket_status[self.get_bucket_key(bucket_id, bwd)] = BucketStatus.READY_TO_USE
 
             return mark_bucket_ready_to_use
 
-        mark_bucket_ready_to_use = get_closure(bucket_id)
+        mark_bucket_ready_to_use = get_closure(bucket_id, bwd)
 
         # Track the async all-gather operation for the bucket.
-        self.param_gather_event_map[bucket_id] = (param_gather_event, mark_bucket_ready_to_use)
+        self.param_gather_event_map[self.get_bucket_key(bucket_id, bwd)] = (
+            param_gather_event,
+            mark_bucket_ready_to_use,
+        )
 
 
 @torch.no_grad()
@@ -3501,15 +3721,13 @@ def override_sharded_param_methods_with_safety_checks(params, all_gather_pipelin
 
         def override_sharded_param_to_function_closure(p, to_function):
             def override_sharded_param_to_function(*args, **kwargs):
-                bucket_id = all_gather_pipeline.buffer.param_to_param_group[p]
-                status = all_gather_pipeline.bucket_status[bucket_id]
-                if status == BucketStatus.READY_TO_USE:
-                    return to_function(*args, **kwargs)
-                raise RuntimeError(
-                    "This parameter is already shard by MCore FSDP and the "
-                    "shared-state parameter does not support 'to' function."
-                    "please define the dtype and device of the parameter before FSDP wrap."
-                )
+                if p._typed_storage()._size() == 0:
+                    warnings.warn(
+                        "The parameter may be sharded by Megatron-FSDP, "
+                        "no actual 'to' operation is performed."
+                    )
+                    return torch.empty([])
+                return to_function(*args, **kwargs)
 
             return override_sharded_param_to_function
 
@@ -3517,15 +3735,13 @@ def override_sharded_param_to_function(*args, **kwargs):
 
         def override_sharded_param_cpu_function_closure(p, cpu_function):
             def override_sharded_param_cpu_function(*args, **kwargs):
-                bucket_id = all_gather_pipeline.buffer.param_to_param_group[p]
-                status = all_gather_pipeline.bucket_status[bucket_id]
-                if status == BucketStatus.READY_TO_USE:
-                    return cpu_function(*args, **kwargs)
-                warnings.warn(
-                    "The parameters are sharded by MCore FSDP, and no actual cpu "
-                    "operation is performed."
-                )
-                return torch.empty([], device="cpu")
+                if p._typed_storage()._size() == 0:
+                    warnings.warn(
+                        "The parameter may be sharded by Megatron-FSDP, "
+                        "no actual 'cpu' operation is performed."
+                    )
+                    return torch.empty([], device="cpu")
+                return cpu_function(*args, **kwargs)
 
             return override_sharded_param_cpu_function
 
diff --git a/megatron/core/distributed/fsdp/src/megatron_fsdp/utils.py b/megatron/core/distributed/fsdp/src/megatron_fsdp/utils.py
index c9679494737..3d15711275f 100644
--- a/megatron/core/distributed/fsdp/src/megatron_fsdp/utils.py
+++ b/megatron/core/distributed/fsdp/src/megatron_fsdp/utils.py
@@ -19,7 +19,7 @@
 from contextlib import nullcontext
 from functools import reduce
 from importlib.metadata import version
-from typing import Callable, List, Optional, Sequence, Union
+from typing import Callable, Optional, Sequence, Union
 
 try:
     import einops
@@ -79,52 +79,6 @@ def is_te_min_version(vers, check_equality=True):
     return te_version > PkgVersion(vers)
 
 
-# Check if Transformer Engine has class for fp8 tensors.
-try:
-    if is_te_min_version("2.0"):
-        # In TE2.x, QuantizedTensor is the base class for all different type of fp8 tensors,
-        # including fp8 tensor for delayed scaling, current scaling and mxfp8, etc.
-        from transformer_engine.pytorch.tensor import QuantizedTensor as FP8_TENSOR_CLASS
-    else:
-        from transformer_engine.pytorch.float8_tensor import Float8Tensor as FP8_TENSOR_CLASS
-
-    HAVE_TE_FP8_TENSOR_CLASS = True
-except (ImportError, ModuleNotFoundError):
-    # FP8 tensor class not found
-    HAVE_TE_FP8_TENSOR_CLASS = False
-
-try:
-    from transformer_engine.pytorch.optimizers import multi_tensor_applier, multi_tensor_scale
-
-    multi_tensor_scale_impl = multi_tensor_scale
-except ImportError:
-    try:
-        import amp_C
-        from apex.multi_tensor_apply import multi_tensor_applier
-
-        multi_tensor_scale_impl = amp_C.multi_tensor_scale
-    except ImportError:
-        import warnings
-
-        warnings.warn(
-            "Transformer Engine and Apex are not installed. "
-            "Falling back to local implementations of "
-            "multi_tensor_applier and multi_tensor_scale"
-        )
-
-        def local_multi_tensor_applier(op, noop_flag_buffer, tensor_lists, *args):
-            """Multi tensor op applier"""
-            return op(2048 * 32, noop_flag_buffer, tensor_lists, *args)
-
-        def local_multi_tensor_scale(chunk_size, noop_flag, tensor_lists, scale):
-            """Works as a drop-in replacement for amp_C.multi_tensor_scale."""
-            for src, dst in zip(tensor_lists[0], tensor_lists[1]):
-                dst.copy_(src * scale)
-
-        multi_tensor_applier = local_multi_tensor_applier
-        multi_tensor_scale_impl = local_multi_tensor_scale
-
-
 def is_submodule(module, parent_module, strict=True):
     """
     Check if a module is a submodule of another module.
@@ -138,18 +92,6 @@ def is_submodule(module, parent_module, strict=True):
     return False
 
 
-def is_float8tensor(tensor: torch.Tensor) -> bool:
-    """Check if a tensor is a Transformer Engine Float8Tensor.
-
-    Note that in TE2.x, in order to support more recipes, the design of the fp8 tensor class has
-    changed. Now Float8Tensor is only used for current scaling and delayed scaling. And mxfp8
-    and blockwise scaling have their own fp8 tensor classes. These different fp8 tensor classes
-    are both inherited from QuantizedTensor. So, for TE1.x, FP8_TENSOR_CLASS is Float8Tensor,
-    and for TE2.x, FP8_TENSOR_CLASS is QuantizedTensor.
-    """
-    return HAVE_TE_FP8_TENSOR_CLASS and isinstance(tensor, FP8_TENSOR_CLASS)
-
-
 def get_mesh_names(device_mesh: Optional[DeviceMesh] = None) -> list[str]:
     """
     Get all the sub-mesh names in the DeviceMesh.
@@ -188,198 +130,6 @@ def contains_submesh(
     return all(submesh_name in device_mesh_names for submesh_name in submesh_names)
 
 
-def _multi_tensor_copy_this_to_that(
-    this: List[torch.Tensor], that: List[torch.Tensor], overflow_buf: Optional[torch.Tensor] = None
-):
-    """
-    Use multi-tensor-applier to copy values from one list to another.
-    We don't have a bfloat16 implementation so for now if the overflow_buf
-    is not provided, we default back to simple loop copy to be compatible
-    with bfloat16.
-    """
-    if overflow_buf is not None:
-        overflow_buf.fill_(0)
-        # Scaling with factor `1.0` is equivalent to copy.
-        multi_tensor_applier(multi_tensor_scale_impl, overflow_buf, [this, that], 1.0)
-    else:
-        for this_, that_ in zip(this, that):
-            that_.copy_(this_)
-
-
-"""
-The code below abstracts the functionalities needed for implementing "--fp8-param-gather" into
-several functions. It provides different implementations for each function based on different
-versions of TE, ensuring compatibility across various TE versions.
-
-Currently, there are three functions:
-    - modify_underlying_storage
-        This function is used in DDP to place all parameters into a contiguous buffer. For
-        non-fp8 tensors, replacing their data is simple, just using code like
-        "tensor.data = new_data". However, for fp8 tensors, their raw data is not stored in the
-        ".data" attribute, and it varies with different TE versions and different recipes. This
-        function provides a unified interface to replace the underlying storage of a fp8 tensor.
-    - quantize_param_shard
-        This function is used in dist-opt to cast fp32 main params to fp8 params. For non-fp8
-        params, this casting is as simple as "bf16_params.copy_(fp32_main_params)"; but for fp8
-        params, the casting logic varies with different TE versions and different recipes. This
-        function provides a unified interface to cast fp32 main params to fp8 params, and also
-        updates the necessary attributes (like amax, scale, scale_inv or transpose cache) of the
-        fp8 model params.
-    - correct_amax_history_if_needed
-        This function is used to correct the amax history of fp8 tensors. In TE1.x, some inplace
-        copy operations will write unwanted values to the amax_history of fp8 tensors. This function
-        corrects the amax_history back. For TE2.x, it's an empty function.
-        Only useful for delayed scaling.
-"""
-if HAVE_TE and is_te_min_version("2.2"):
-    # Supported TE versions: 2.2+
-    from transformer_engine.pytorch.tensor import QuantizedTensor
-
-    def _modify_underlying_storage_impl(
-        fp8_tensor: QuantizedTensor, new_raw_data: torch.Tensor
-    ) -> None:
-        from transformer_engine.pytorch.tensor.utils import replace_raw_data
-
-        replace_raw_data(fp8_tensor, new_raw_data)
-
-    def _quantize_param_shard_impl(
-        model_params: List[QuantizedTensor],
-        main_params: List[torch.Tensor],
-        start_offsets: List[int],
-        data_parallel_group: ProcessGroup,
-        fsdp_shard_model_params: Optional[List[torch.Tensor]] = None,
-    ) -> None:
-        if len(model_params) == 0:
-            return
-
-        from transformer_engine.pytorch.tensor.utils import cast_master_weights_to_fp8
-
-        args = [model_params, main_params, start_offsets, data_parallel_group]
-        if fsdp_shard_model_params is not None:
-            if get_te_version() == PkgVersion("2.3.0.dev0+5fdd7bb") or is_te_min_version("2.3.0"):
-                args.append(fsdp_shard_model_params)
-            else:
-                raise NotImplementedError(
-                    f"FSDP with --fp8-param-gather is not supported in TE v{get_te_version()}"
-                )
-        cast_master_weights_to_fp8(*args)
-
-elif HAVE_TE and is_te_min_version("2.0"):
-    # Supported TE versions: 2.0
-    from transformer_engine.pytorch.tensor import QuantizedTensor
-    from transformer_engine.pytorch.tensor.float8_tensor import Float8Tensor
-
-    def _modify_underlying_storage_impl(
-        fp8_tensor: QuantizedTensor, new_raw_data: torch.Tensor
-    ) -> None:
-        old_raw_data = fp8_tensor._data
-        assert old_raw_data.dtype == new_raw_data.dtype
-        new_raw_data.detach().copy_(old_raw_data)
-        fp8_tensor._data = new_raw_data
-        del old_raw_data
-
-    def _quantize_param_shard_impl(
-        model_params: List[QuantizedTensor],
-        main_params: List[torch.Tensor],
-        start_offsets: List[int],
-        data_parallel_group: ProcessGroup,
-        fsdp_shard_model_params: Optional[List[torch.Tensor]] = None,
-    ) -> None:
-        if len(model_params) == 0:
-            return
-
-        if fsdp_shard_model_params is None:
-            fsdp_shard_model_params = [None] * len(model_params)
-
-        for model_param, main_param, start_offset, fsdp_shard_model_param in zip(
-            model_params, main_params, start_offsets, fsdp_shard_model_params
-        ):
-            if main_param is None:
-                continue
-
-            if fsdp_shard_model_param is not None:
-                shard_model_param = fsdp_shard_model_param
-            else:
-                shard_model_param = model_param._data.view(-1)[
-                    start_offset : start_offset + main_param.numel()
-                ]
-
-            quantizer = model_param._quantizer
-            # When not using --fp8-param-gather, the main_param (fp32) is first cast to bf16/fp16,
-            # and then cast to fp8 during forward.
-            # Although it's not necessary when --fp8-param-gather is enabled, we still keep this
-            # logic to keep numerical consistency. So here cast the main_param to model_param.dtype.
-            main_param = main_param.to(model_param.dtype)
-            out = Float8Tensor(
-                shape=main_param.size(),
-                dtype=model_param.dtype,
-                requires_grad=False,
-                data=shard_model_param,
-                fp8_scale_inv=model_param._scale_inv,
-                fp8_dtype=model_param._fp8_dtype,
-                quantizer=quantizer,
-            )
-            quantizer.update_quantized(main_param, out)
-
-        amaxes = []
-        scales = []
-        scale_invs = []
-        for model_param in model_params:
-            quantizer = model_param._quantizer
-            amaxes.append(quantizer.amax.view(1))
-            scales.append(quantizer.scale.view(1))
-            scale_invs.append(model_param._scale_inv.view(1))
-            model_param._reset_caches()
-
-        dummy_overflow_buf = torch.tensor([0], dtype=torch.int, device="cuda")
-
-        # Update scaling factors.
-        packed_scales = torch.empty(len(scales), dtype=torch.float32, device=scales[0].device)
-        packed_scale_views = [packed_scales[i].view(1) for i in range(len(scales))]
-        _multi_tensor_copy_this_to_that(scales, packed_scale_views, dummy_overflow_buf)
-        torch.reciprocal(packed_scales, out=packed_scales)
-        _multi_tensor_copy_this_to_that(packed_scale_views, scale_invs, dummy_overflow_buf)
-
-        # Reduce amaxes.
-        # Note: Assume each param has a separate amax.
-        packed_amaxes = torch.empty(len(amaxes), dtype=torch.float32, device=amaxes[0].device)
-        packed_amax_views = [packed_amaxes[i].view(1) for i in range(len(amaxes))]
-        _multi_tensor_copy_this_to_that(amaxes, packed_amax_views, dummy_overflow_buf)
-        torch.distributed.all_reduce(
-            packed_amaxes, op=torch.distributed.ReduceOp.MAX, group=data_parallel_group
-        )
-        _multi_tensor_copy_this_to_that(packed_amax_views, amaxes, dummy_overflow_buf)
-
-else:
-    # Fallback impl if TE version is invalid or TE is not installed.
-    def _modify_underlying_storage_impl(*args, **kwargs):
-        raise RuntimeError(
-            "Invalid Transformer Engine version for FP8 distributed optimizer, "
-            "please install Transformer Engine 2.0+ or install Megatron-Core"
-        )
-
-    def _quantize_param_shard_impl(*args, **kwargs):
-        raise RuntimeError(
-            "Invalid Transformer Engine version for FP8 distributed optimizer, "
-            "please install Transformer Engine 2.0+ or install Megatron-Core"
-        )
-
-
-def modify_underlying_storage(tensor: torch.Tensor, new_raw_data: torch.Tensor):
-    """Replace the underlying raw data of a tensor with new data."""
-    _modify_underlying_storage_impl(tensor, new_raw_data)
-
-
-def quantize_param_shard(
-    model_params, main_params, start_offsets, data_parallel_group, fsdp_shard_model_params=None
-):
-    """Cast shard fp32 main params to fp8 model params."""
-    assert HAVE_TE, "Transformer Engine is required for quantizing parameters."
-    _quantize_param_shard_impl(
-        model_params, main_params, start_offsets, data_parallel_group, fsdp_shard_model_params
-    )
-
-
 def _get_cuda_rng_state(
     device: Union[int, str, torch.device] = "cuda", clone: bool = False, graph_safe: bool = False
 ) -> torch.Tensor:
diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py
index c157d062c53..b267c8a8170 100644
--- a/megatron/training/arguments.py
+++ b/megatron/training/arguments.py
@@ -745,6 +745,9 @@ def validate_args(args, defaults={}):
         assert args.ckpt_format == "fsdp_dtensor", \
             "Megatron FSDP only supports fsdp_dtensor checkpoint format"
 
+        if args.use_megatron_fsdp:
+            args.reuse_grad_buf_for_mxfp8_param_ag = False
+
     # Parameters dtype.
     args.params_dtype = torch.float
     if args.fp16:

From a935008a5fa775e8bd5a03fb9081ddceeeaa0d13 Mon Sep 17 00:00:00 2001
From: Yuzhong Wang <yuzhongw@nvidia.com>
Date: Fri, 19 Dec 2025 12:35:45 +0800
Subject: [PATCH 199/334] [Dev] Feat(moe): Gated delta net context parallel
 (CP) (#2614)

---
 megatron/core/ssm/gated_delta_net.py          | 303 +++++++++++++++---
 .../core/transformer/transformer_config.py    |  19 +-
 tests/unit_tests/ssm/test_gated_delta_net.py  | 178 +---------
 3 files changed, 291 insertions(+), 209 deletions(-)

diff --git a/megatron/core/ssm/gated_delta_net.py b/megatron/core/ssm/gated_delta_net.py
index dfa6e4c35e4..2b0a18b433b 100644
--- a/megatron/core/ssm/gated_delta_net.py
+++ b/megatron/core/ssm/gated_delta_net.py
@@ -21,6 +21,12 @@
 from megatron.core.jit import jit_fuser
 from megatron.core.packed_seq_params import PackedSeqParams
 from megatron.core.process_groups_config import ProcessGroupCollection
+from megatron.core.ssm.mamba_context_parallel import (
+    _all_to_all_cp2hp,
+    _all_to_all_hp2cp,
+    _redo_attention_load_balancing,
+    _undo_attention_load_balancing,
+)
 from megatron.core.tensor_parallel import get_cuda_rng_tracker
 from megatron.core.transformer import TransformerConfig
 from megatron.core.transformer.identity_op import IdentityOp
@@ -33,9 +39,6 @@
 )
 from megatron.core.utils import deprecate_inference_params, nvtx_range_pop, nvtx_range_push
 
-# TODO: Implement GatedDeltaNetContextParallel
-# from .gated_delta_net_context_parallel import GatedDeltaNetContextParallel
-
 try:
     from fla.modules.l2norm import l2norm
     from fla.ops.gated_delta_rule import chunk_gated_delta_rule
@@ -84,6 +87,7 @@ def __init__(
         use_qk_l2norm: bool = True,
         A_init_range: Tuple[float, float] = (1, 16),
         pg_collection: ProcessGroupCollection = None,
+        **kwargs,
     ):
         """
         Args:
@@ -114,6 +118,7 @@ def __init__(
         self.use_qk_l2norm = use_qk_l2norm
         assert pg_collection is not None, "pg_collection must be provided for GatedDeltaNet"
         self.pg_collection = pg_collection
+        self.cp_size = self.pg_collection.cp.size()
         self.tp_size = self.pg_collection.tp.size()
         self.sp_size = self.tp_size if config.sequence_parallel else 1
 
@@ -129,6 +134,8 @@ def __init__(
         self.num_value_heads = config.linear_num_value_heads
         self.qk_dim = self.key_head_dim * self.num_key_heads
         self.v_dim = self.value_head_dim * self.num_value_heads
+        self.qk_dim_local_tp = self.qk_dim // self.tp_size
+        self.v_dim_local_tp = self.v_dim // self.tp_size
 
         # Input projection (hidden_states -> q, k, v, gate, beta, alpha)
         # TODO: for now, output gate is forced for GDN.
@@ -217,8 +224,6 @@ def __init__(
             tp_group=self.pg_collection.tp,
         )
 
-        # TODO: support CP
-
         self.reset_parameters()
 
     def reset_parameters(self):
@@ -247,17 +252,12 @@ def forward(
         self,
         hidden_states: Tensor,
         attention_mask: Tensor,
-        key_value_states: Optional[Tensor] = None,
         inference_context: Optional[BaseInferenceContext] = None,
-        rotary_pos_emb: Optional[Union[Tensor, Tuple[Tensor, Tensor]]] = None,
-        rotary_pos_cos: Optional[Tensor] = None,
-        rotary_pos_sin: Optional[Tensor] = None,
-        rotary_pos_cos_sin: Optional[Tensor] = None,
-        attention_bias: Optional[Tensor] = None,
         packed_seq_params: Optional[PackedSeqParams] = None,
         sequence_len_offset: Optional[int] = None,
         *,
         inference_params: Optional[BaseInferenceContext] = None,
+        **kwargs,
     ):
         """
         Perform a forward pass through the GDN module.
@@ -265,15 +265,8 @@ def forward(
         Args:
             hidden_states (Tensor): Hidden states.
             attention_mask (Tensor): Attention mask.
-            key_value_states (Optional[Tensor]): Key/value states (for cross attention).
             inference_context (Optional[BaseInferenceContext]): Inference context that manages
                 KV cache.
-            rotary_pos_emb (Optional[Union[Tensor, Tuple[Tensor, Tensor]]]): Rotary
-                embedding tensor(s).
-            rotary_pos_cos (Optional[Tensor]): Rotary embedding cosine.
-            rotary_pos_sin (Optional[Tensor]): Rotary embedding sine.
-            rotary_pos_cos_sin (Optional[Tensor]): Combined rotary embedding cosine and sine.
-            attention_bias (Optional[Tensor]): Attention bias.
             packed_seq_params (Optional[PackedSeqparams]): Parameters used for THD format.
             sequence_len_offset (Optional[int]): Sequence length offset used for
                 inference CUDA graphs.
@@ -287,7 +280,7 @@ def forward(
         inference_context = deprecate_inference_params(inference_context, inference_params)
 
         seq_len, batch, _ = hidden_states.shape
-        seq_len = seq_len * self.sp_size
+        seq_len = seq_len * self.sp_size * self.cp_size
 
         if inference_context is not None:
             assert (
@@ -306,6 +299,22 @@ def forward(
         qkvzba, _ = self.in_proj(hidden_states)
         nvtx_range_pop(suffix="in_proj")
 
+        # CP All to All: CP to HP
+        qkvzba = tensor_a2a_cp2hp(
+            qkvzba,
+            seq_dim=0,
+            head_dim=-1,
+            cp_group=self.pg_collection.cp,
+            split_sections=[
+                self.qk_dim_local_tp,
+                self.qk_dim_local_tp,
+                self.v_dim_local_tp,
+                self.v_dim_local_tp,
+                self.num_value_heads // self.tp_size,
+                self.num_value_heads // self.tp_size,
+            ],
+        )
+
         # Transpose: s b x --> b s x
         # From sbhd to bshd format
         qkvzba = qkvzba.transpose(0, 1)
@@ -314,10 +323,10 @@ def forward(
         qkv, gate, beta, alpha = torch.split(
             qkvzba,
             [
-                (self.qk_dim * 2 + self.v_dim) // self.tp_size,
-                self.v_dim // self.tp_size,
-                self.num_value_heads // self.tp_size,
-                self.num_value_heads // self.tp_size,
+                (self.qk_dim_local_tp * 2 + self.v_dim_local_tp) // self.cp_size,
+                self.v_dim_local_tp // self.cp_size,
+                self.num_value_heads // self.tp_size // self.cp_size,
+                self.num_value_heads // self.tp_size // self.cp_size,
             ],
             dim=-1,
         )
@@ -328,14 +337,44 @@ def forward(
         # Convolution on qkv
         qkv = qkv.transpose(1, 2).contiguous()  # b, s, d -> b, d, s
         nvtx_range_push(suffix="conv1d")
+        qkv_channels_split_sections = [
+            self.qk_dim_local_tp,
+            self.qk_dim_local_tp,
+            self.v_dim_local_tp,
+        ]
+        conv1d_weight = get_parameter_local_cp(
+            self.conv1d.weight,
+            dim=0,
+            cp_group=self.pg_collection.cp,
+            split_sections=qkv_channels_split_sections,
+        )
+        conv1d_bias = (
+            get_parameter_local_cp(
+                self.conv1d.bias,
+                dim=0,
+                cp_group=self.pg_collection.cp,
+                split_sections=qkv_channels_split_sections,
+            )
+            if self.conv_bias
+            else None
+        )
         if (causal_conv1d_fn is None) or self.config.deterministic_mode:
-            qkv = self.act_fn(self.conv1d(qkv)[..., :seq_len])
+            conv_out = F.conv1d(
+                input=qkv,
+                weight=conv1d_weight,
+                bias=conv1d_bias,
+                stride=self.conv1d.stride,
+                padding=self.conv1d.padding,
+                dilation=self.conv1d.dilation,
+                groups=self.conv_dim_local_tp // self.cp_size,
+            )
+            qkv = self.act_fn(conv_out[..., :seq_len])
         else:
             assert self.activation in ["silu", "swish"]
             qkv = causal_conv1d_fn(
                 x=qkv,
-                weight=self.conv1d.weight.squeeze(1),  # d, 1, w -> d, w
-                bias=self.conv1d.bias,
+                weight=conv1d_weight.squeeze(1),  # d, 1, w -> d, w
+                bias=conv1d_bias,
                 activation=self.activation,
             )
         nvtx_range_pop(suffix="conv1d")
@@ -343,7 +382,11 @@ def forward(
         qkv = qkv.transpose(1, 2)  # b, d, s -> b, s, d
         query, key, value = torch.split(
             qkv,
-            [self.qk_dim // self.tp_size, self.qk_dim // self.tp_size, self.v_dim // self.tp_size],
+            [
+                self.qk_dim_local_tp // self.cp_size,
+                self.qk_dim_local_tp // self.cp_size,
+                self.v_dim_local_tp // self.cp_size,
+            ],
             dim=-1,
         )
         query = query.reshape(batch, seq_len, -1, self.key_head_dim)
@@ -367,7 +410,11 @@ def forward(
 
         # Calculate g and beta
         nvtx_range_push(suffix="g_and_beta")
-        g = -self.A_log.exp() * F.softplus(alpha.float() + self.dt_bias)  # In fp32
+        A_log_local_cp = get_parameter_local_cp(self.A_log, dim=0, cp_group=self.pg_collection.cp)
+        dt_bias_local_cp = get_parameter_local_cp(
+            self.dt_bias, dim=0, cp_group=self.pg_collection.cp
+        )
+        g = -A_log_local_cp.exp() * F.softplus(alpha.float() + dt_bias_local_cp)  # In fp32
         beta = beta.sigmoid()
         nvtx_range_pop(suffix="g_and_beta")
 
@@ -406,6 +453,11 @@ def forward(
         norm_out = norm_out.reshape(batch, seq_len, -1)
         norm_out = norm_out.transpose(0, 1).contiguous()
 
+        # CP all to all: HP to CP
+        norm_out = tensor_a2a_hp2cp(
+            norm_out, seq_dim=0, head_dim=-1, cp_group=self.pg_collection.cp
+        )
+
         # Output projection
         nvtx_range_push(suffix="out_proj")
         out, out_bias = self.out_proj(norm_out)
@@ -479,10 +531,10 @@ def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None, tp_gr
         sharded_state_dict[f"{prefix}in_proj.weight"] = _split_tensor_factory(
             sharded_state_dict[f"{prefix}in_proj.weight"],
             [
-                self.qk_dim // self.tp_size,
-                self.qk_dim // self.tp_size,
-                self.v_dim // self.tp_size,
-                self.v_dim // self.tp_size,
+                self.qk_dim_local_tp,
+                self.qk_dim_local_tp,
+                self.v_dim_local_tp,
+                self.v_dim_local_tp,
                 self.num_value_heads // self.tp_size,
                 self.num_value_heads // self.tp_size,
             ],
@@ -502,11 +554,7 @@ def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None, tp_gr
         for conv_layer_name in conv_layer_name_list:
             sharded_state_dict[f"{prefix}{conv_layer_name}"] = _split_tensor_factory(
                 sharded_state_dict[f"{prefix}{conv_layer_name}"],
-                [
-                    self.qk_dim // self.tp_size,
-                    self.qk_dim // self.tp_size,
-                    self.v_dim // self.tp_size,
-                ],
+                [self.qk_dim_local_tp, self.qk_dim_local_tp, self.v_dim_local_tp],
                 ["query", "key", "value"],
                 0,
             )
@@ -514,6 +562,9 @@ def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None, tp_gr
         return sharded_state_dict
 
 
+####################
+# Sharded state dict utilities
+####################
 def _split_tensor_factory(
     orig_sh_ten: ShardedTensor, split_sections: List[int], split_names: List[str], split_dim: int
 ) -> ShardedTensorFactory:
@@ -574,6 +625,184 @@ def sh_ten_merge_fn(sub_state_dict):
     )
 
 
+####################
+# Context parallel utilities
+####################
+def get_parameter_local_cp(
+    param: torch.Tensor,
+    dim: int,
+    cp_group: torch.distributed.ProcessGroup,
+    split_sections: Optional[List[int]] = None,
+) -> torch.Tensor:
+    """Get the local parameter for the current context parallel rank.
+
+    Args:
+        param (torch.Tensor): The entire parameter to get the local parameter for.
+        dim (int): The dimension to split the parameter along. Usually the dimension of head.
+        cp_group (torch.distributed.ProcessGroup): The context parallel group.
+        split_sections (Optional[List[int]]): If not None,
+            first split the parameter along the dimension dim into sections,
+            then get the local hidden parallel weights separately,
+            finally concatenate the local hidden parallel weights along the dimension dim.
+
+    Returns:
+        torch.Tensor: The local parameter for the current context parallel rank.
+    """
+
+    cp_size = cp_group.size()
+    cp_rank = cp_group.rank()
+
+    # No need to split if CP size is 1.
+    if cp_size == 1:
+        return param
+
+    # Split first if needed.
+    if split_sections is not None:
+        inputs = torch.split(param, split_sections, dim=dim)
+        outputs = []
+        for p in inputs:
+            p = get_parameter_local_cp(p, dim, cp_group)
+            outputs.append(p)
+        return torch.cat(outputs, dim=dim)
+
+    # Slice the parameter.
+    slices = [slice(None)] * param.dim()
+    dim_size = param.size(dim=dim)
+    slices[dim] = slice(cp_rank * dim_size // cp_size, (cp_rank + 1) * dim_size // cp_size)
+    param = param[slices]
+    return param
+
+
+def tensor_a2a_cp2hp(
+    tensor: torch.Tensor,
+    seq_dim: int,
+    head_dim: int,
+    cp_group: torch.distributed.ProcessGroup,
+    split_sections: Optional[List[int]] = None,
+    undo_attention_load_balancing: bool = True,
+):
+    """All-to-all context parallel to hidden parallel.
+
+    Args:
+        tensor (torch.Tensor): The tensor to all-to-all.
+            Currently only support (seq_len, batch, head_dim) shaped tensor.
+        seq_dim (int): The dimension of sequence length. Currently only supports seq_dim == 0.
+        head_dim (int): The dimension of head. Currently only supports head_dim == -1 or 2.
+        cp_group (torch.distributed.ProcessGroup): The context parallel group.
+        split_sections (Optional[List[int]]): If not None, split the tensor along the dimension
+            head_dim into sections first, then do all-to-all for each section separately,
+            finally concatenate the separated tensors along the dimension head_dim.
+        undo_attention_load_balancing (bool): Whether to undo the attention load balancing of CP.
+
+    Returns:
+        torch.Tensor: The all-to-all tensor.
+    """
+
+    cp_size = cp_group.size()
+
+    # No need to all-to-all if CP size is 1.
+    if cp_size == 1:
+        return tensor
+
+    # Limitations of mamba_context_parallel._all_to_all_cp2hp.
+    assert seq_dim == 0, f"tensor_a2a_cp2hp only supports seq_dim == 0 for now, but got {seq_dim=}"
+    assert (
+        head_dim == -1 or head_dim == 2
+    ), f"tensor_a2a_cp2hp only supports head_dim == -1 or 2 for now, but got {head_dim=}"
+    assert (
+        tensor.dim() == 3
+    ), f"tensor_a2a_cp2hp only supports 3-d input tensor for now, but got {tensor.dim()=}"
+
+    # Split first if needed.
+    if split_sections is not None:
+        inputs = torch.split(tensor, split_sections, dim=head_dim)
+        outputs = []
+        for x in inputs:
+            x = tensor_a2a_cp2hp(
+                x,
+                seq_dim=seq_dim,
+                head_dim=head_dim,
+                cp_group=cp_group,
+                undo_attention_load_balancing=False,
+            )
+            outputs.append(x)
+        tensor = torch.cat(outputs, dim=head_dim)
+    else:
+        tensor = _all_to_all_cp2hp(tensor, cp_group)
+
+    # Undo attention load balancing last if needed.
+    if undo_attention_load_balancing:
+        tensor = _undo_attention_load_balancing(tensor, cp_size)
+    return tensor
+
+
+def tensor_a2a_hp2cp(
+    tensor: torch.Tensor,
+    seq_dim: int,
+    head_dim: int,
+    cp_group: torch.distributed.ProcessGroup,
+    split_sections: Optional[List[int]] = None,
+    redo_attention_load_balancing: bool = True,
+):
+    """All-to-all hidden parallel to context parallel.
+
+    Args:
+        tensor (torch.Tensor): The tensor to all-to-all.
+            Currently only support (seq_len, batch, head_dim) shaped tensor.
+        seq_dim (int): The dimension of sequence length. Currently only supports seq_dim == 0.
+        head_dim (int): The dimension of head. Currently only supports head_dim == -1 or 2.
+        cp_group (torch.distributed.ProcessGroup): The context parallel group.
+        split_sections (Optional[List[int]]): If not None, first split the tensor along the
+            dimension head_dim into sections, then do all-to-all for each section separately,
+            finally concatenate the separated tensors along the dimension head_dim.
+        redo_attention_load_balancing (bool): Whether to redo the attention load balancing of HP.
+
+    Returns:
+        torch.Tensor: The all-to-all tensor.
+    """
+
+    cp_size = cp_group.size()
+
+    # No need to all-to-all if CP size is 1.
+    if cp_size == 1:
+        return tensor
+
+    # Limitations of mamba_context_parallel._all_to_all_hp2cp.
+    assert seq_dim == 0, f"tensor_a2a_cp2hp only supports seq_dim == 0 for now, but got {seq_dim=}"
+    assert (
+        head_dim == -1 or head_dim == 2
+    ), f"tensor_a2a_cp2hp only supports head_dim == -1 or 2 for now, but got {head_dim=}"
+    assert (
+        tensor.dim() == 3
+    ), f"tensor_a2a_cp2hp only supports 3-d input tensor for now, but got {tensor.dim()=}"
+
+    # Redo attention load balancing first if needed.
+    if redo_attention_load_balancing:
+        tensor = _redo_attention_load_balancing(tensor, cp_size)
+
+    # Split first if needed.
+    if split_sections is not None:
+        inputs = torch.split(tensor, split_sections, dim=head_dim)
+        outputs = []
+        for x in inputs:
+            x = tensor_a2a_hp2cp(
+                x,
+                seq_dim=seq_dim,
+                head_dim=head_dim,
+                cp_group=cp_group,
+                redo_attention_load_balancing=False,
+            )
+            outputs.append(x)
+        tensor = torch.cat(outputs, dim=head_dim)
+    else:
+        tensor = _all_to_all_hp2cp(tensor, cp_group)
+
+    return tensor
+
+
+####################
+# Torch native gated delta rule
+####################
 def torch_chunk_gated_delta_rule(
     query,
     key,
diff --git a/megatron/core/transformer/transformer_config.py b/megatron/core/transformer/transformer_config.py
index e2705bd9f51..6493a4bcce1 100644
--- a/megatron/core/transformer/transformer_config.py
+++ b/megatron/core/transformer/transformer_config.py
@@ -922,17 +922,14 @@ def __post_init__(self):
                 )
 
                 # Check tensor parallelism compatibility
-                assert (
-                    self.linear_num_key_heads % self.tensor_model_parallel_size == 0
-                ), "linear_num_key_heads must be a multiple of tensor_model_parallel_size."
-                assert (
-                    self.linear_num_value_heads % self.tensor_model_parallel_size == 0
-                ), "linear_num_value_heads must be a multiple of tensor_model_parallel_size."
-
-                # Do not support yet, but coming soon.
-                assert self.context_parallel_size == 1, (
-                    f"Gated delta net does not support context parallel for now,"
-                    f" but got {self.context_parallel_size=}."
+                tp_cp_size = self.tensor_model_parallel_size * self.context_parallel_size
+                assert self.linear_num_key_heads % tp_cp_size == 0, (
+                    f"{self.linear_num_key_heads=} must be a multiple of "
+                    f"({self.tensor_model_parallel_size=} * {self.context_parallel_size=})."
+                )
+                assert self.linear_num_value_heads % tp_cp_size == 0, (
+                    f"{self.linear_num_value_heads=} must be a multiple of "
+                    f"({self.tensor_model_parallel_size=} * {self.context_parallel_size=})."
                 )
         elif self.experimental_attention_variant == "dsa":
             assert (
diff --git a/tests/unit_tests/ssm/test_gated_delta_net.py b/tests/unit_tests/ssm/test_gated_delta_net.py
index 89a185e3755..725d18fbc06 100644
--- a/tests/unit_tests/ssm/test_gated_delta_net.py
+++ b/tests/unit_tests/ssm/test_gated_delta_net.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 from functools import partial
 from unittest import mock
@@ -28,6 +28,7 @@
     init_checkpointing_mock_args,
 )
 from tests.unit_tests.test_utilities import Utils
+from tests.unit_tests.transformer.test_attention import _test_parallel_attention_correctness
 
 try:
     import fla
@@ -39,12 +40,7 @@
 
 @pytest.mark.parametrize(
     ("tp_size", "sp", "cp_size"),
-    [
-        (1, False, 1),
-        (2, False, 1),
-        (2, True, 1),
-        # GDN does not support CP for now. Leave it for future work.
-    ],
+    [(1, False, 1), (2, False, 1), (2, True, 1), (1, False, 2), (2, False, 2), (2, True, 2)],
 )
 @pytest.mark.skipif(not HAVE_FLA, reason="FLA is not installed.")
 @pytest.mark.internal
@@ -142,50 +138,13 @@ def test_gpu_forward(self):
     [
         (4, False, 1),  # TP w/o SP
         (4, True, 1),  # TP w/ SP
-        # CP does not support GDN for now. Add it once it is supported.
+        (1, False, 2),  # CP
+        (2, False, 2),  # TP w/o SP + CP
+        (2, True, 2),  # TP w/ SP + CP
     ],
 )
 @pytest.mark.skipif(not HAVE_FLA, reason="FLA is not installed.")
 def test_parallel_gated_delta_net_correctness(tmp_path_dist_ckpt, tp, sp, cp):
-    # Constants
-    seed = 123
-    sequence_length = 256
-    micro_batch_size = 4
-    hidden_size = 128
-    normalization = "RMSNorm"
-
-    # Model initialization function
-    def initialize_gpt_model(config, pre_process=True, post_process=True, vp_stage=None):
-        layer_spec = get_gpt_layer_with_transformer_engine_spec(
-            experimental_attention_variant="gated_delta_net", normalization=normalization
-        )
-        gpt_model = GPTModel(
-            config=config,
-            transformer_layer_spec=layer_spec,
-            vocab_size=128,
-            max_sequence_length=sequence_length,
-            pre_process=pre_process,
-            post_process=post_process,
-            vp_stage=vp_stage,
-        )
-        return gpt_model
-
-    # Initialize baseline parallel state
-    Utils.initialize_model_parallel(
-        tensor_model_parallel_size=1, pipeline_model_parallel_size=1, context_parallel_size=1
-    )
-
-    # Initialize input hidden states
-    torch.manual_seed(seed)
-    model_parallel_cuda_manual_seed(seed)
-    input_hidden_states = (
-        torch.rand((sequence_length, micro_batch_size, hidden_size))
-        .cuda()
-        .bfloat16()
-        .requires_grad_(True)
-    )
-
-    # Initialize transformer config
     transformer_config = TransformerConfig(
         hidden_size=128,
         linear_conv_kernel_dim=2,
@@ -194,7 +153,7 @@ def initialize_gpt_model(config, pre_process=True, post_process=True, vp_stage=N
         linear_num_key_heads=4,
         linear_num_value_heads=8,
         num_layers=1,
-        normalization=normalization,
+        normalization="RMSNorm",
         use_cpu_initialization=True,
         layernorm_zero_centered_gamma=True,
         num_attention_heads=8,
@@ -202,118 +161,15 @@ def initialize_gpt_model(config, pre_process=True, post_process=True, vp_stage=N
         bf16=True,
     )
 
-    with TempNamedDir(tmp_path_dist_ckpt / 'test_parallel_gdn', sync=True) as ckpt_dir:
-        # Set argument
-        mock_args = parse_args(ignore_unknown_args=True)
-        set_args(mock_args)
-
-        # Initialize baseline model
-        init_basic_mock_args(mock_args, 1, 1, bf16=True)
-        mock_args.context_parallel_size = 1
-        mock_args.sequence_parallel = 1
-        gpt_model = unwrap_model(
-            get_model(partial(initialize_gpt_model, config=transformer_config))
-        )
-
-        # Initialize args and save checkpoint
-        init_checkpointing_mock_args(mock_args, ckpt_dir, False)
-        mock_args.no_save_optim = True
-        mock_args.no_save_rng = True
-        mock_args.no_load_optim = True
-        mock_args.no_load_rng = True
-        save_checkpoint(10, gpt_model, None, None, 0)
-
-        # Calculate baseline output
-        attention = gpt_model[0].decoder.layers[0].self_attention
-        output_hidden_states_baseline, bias_hidden_states_baseline = attention(
-            input_hidden_states, attention_mask=None
-        )
-        output_hidden_states_baseline.sum().backward()
-
-        # Save baseline output
-        input_grad_baseline = input_hidden_states.grad.detach()
-        output_hidden_states_baseline = output_hidden_states_baseline.detach()
-
-        # Initialize parallel model
-        Utils.destroy_model_parallel()
-        Utils.initialize_model_parallel(
-            tensor_model_parallel_size=tp, pipeline_model_parallel_size=1, context_parallel_size=cp
-        )
-        torch.manual_seed(seed)
-        model_parallel_cuda_manual_seed(seed)
-        transformer_config.context_parallel_size = cp
-        transformer_config.tensor_model_parallel_size = tp
-        transformer_config.sequence_parallel = sp
-        init_basic_mock_args(mock_args, tp, 1, bf16=True)
-        mock_args.context_parallel_size = cp
-        mock_args.sequence_parallel = sp
-        gpt_model = unwrap_model(
-            get_model(partial(initialize_gpt_model, config=transformer_config))
-        )
-        with mock.patch('megatron.training.checkpointing.check_checkpoint_args'):
-            with mock.patch('megatron.training.checkpointing.update_num_microbatches'):
-                load_checkpoint(gpt_model, None, None)
-
-        # Function to get tensor on this tp and cp rank
-        cp_group = parallel_state.get_context_parallel_group()
-        tp_rank = parallel_state.get_tensor_model_parallel_rank()
-
-        def get_tensor_on_this_rank(tensor):
-            if cp > 1:
-                tensor = get_tensor_on_this_cp_rank(tensor, 0, cp_group)
-            if tp > 1 and sp:
-                sp_seg = sequence_length // tp // cp
-                tensor = tensor[tp_rank * sp_seg : (tp_rank + 1) * sp_seg]
-            return tensor
-
-        # Calculate parallel model output
-        input_hidden_states = get_tensor_on_this_rank(input_hidden_states)
-        input_hidden_states = input_hidden_states.detach().requires_grad_(True)
-        parallel_attention = gpt_model[0].decoder.layers[0].self_attention
-        output_hidden_states_parallel, bias_hidden_states_parallel = parallel_attention(
-            input_hidden_states, attention_mask=None
-        )
-        output_hidden_states_parallel.sum().backward()
-        input_grad_parallel = input_hidden_states.grad.detach()
-
-        # Check if the output is the same
-        if cp:
-            atol, rtol = 5e-3, 5e-3
-        else:
-            atol, rtol = 5e-4, 5e-4
-        output_hidden_states_baseline = get_tensor_on_this_rank(output_hidden_states_baseline)
-        input_grad_baseline = get_tensor_on_this_rank(input_grad_baseline)
-
-        assert torch.all(
-            ~torch.isnan(output_hidden_states_baseline)
-        ), "output_hidden_states_baseline contains nan"
-        assert torch.all(
-            ~torch.isinf(output_hidden_states_baseline)
-        ), "output_hidden_states_baseline contains inf"
-        assert torch.all(~torch.isnan(input_grad_baseline)), "input_grad_baseline contains nan"
-        assert torch.all(~torch.isinf(input_grad_baseline)), "input_grad_baseline contains inf"
-        assert torch.all(
-            ~torch.isnan(output_hidden_states_parallel)
-        ), "output_hidden_states_parallel contains nan"
-        assert torch.all(
-            ~torch.isinf(output_hidden_states_parallel)
-        ), "output_hidden_states_parallel contains inf"
-        assert torch.all(~torch.isnan(input_grad_parallel)), "input_grad_parallel contains nan"
-        assert torch.all(~torch.isinf(input_grad_parallel)), "input_grad_parallel contains inf"
+    transformer_layer_spec = get_gpt_layer_with_transformer_engine_spec(
+        experimental_attention_variant="gated_delta_net", normalization="RMSNorm"
+    )
 
-        torch.testing.assert_close(
-            output_hidden_states_baseline,
-            output_hidden_states_parallel,
-            atol=atol,
-            rtol=rtol,
-            msg=lambda msg: f"Mismatch in output_hidden_states: {msg}",
-        )
-        torch.testing.assert_close(
-            input_grad_baseline,
-            input_grad_parallel,
-            atol=atol,
-            rtol=rtol,
-            msg=lambda msg: f"Mismatch in input_grad: {msg}",
-        )
+    if cp:
+        atol, rtol = 5e-3, 5e-3
+    else:
+        atol, rtol = 5e-4, 5e-4
 
-        Utils.destroy_model_parallel()
+    _test_parallel_attention_correctness(
+        transformer_config, transformer_layer_spec, tmp_path_dist_ckpt, tp, sp, cp
+    )

From fd932c9df547ec9364b6edcc58983f8ddfedea64 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Fri, 19 Dec 2025 15:33:49 +0100
Subject: [PATCH 200/334] ci: Gridify test configs (#2707)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
Co-authored-by: Dennis Liu <denliu@nvidia.com>
---
 .../test_cases/ci_base_config.yml             | 14 +++
 .../golden_values_dev_dgx_h100.json           |  0
 .../golden_values_dev_dgx_h100.json           |  0
 .../moe2.0/model_configs/dsv3_proxy.yaml      | 85 ++++++++++++++++
 .../moe2.0/model_configs/qwen3_proxy.yaml     | 74 ++++++++++++++
 .../moe2.0/runtime_configs/tp1pp1ep8.yaml     | 41 ++++++++
 .../moe2.0/runtime_configs/tp2pp2ep4.yaml     | 55 +++++++++++
 .../test_utils/python_scripts/merge_config.py | 92 ++++++++++++++++++
 .../python_scripts/recipe_parser.py           | 41 ++++++--
 tests/test_utils/recipes/moe2.0.yaml          | 97 +++++++++++++++++++
 10 files changed, 491 insertions(+), 8 deletions(-)
 create mode 100644 tests/functional_tests/test_cases/ci_base_config.yml
 create mode 100644 tests/functional_tests/test_cases/moe2.0/golden_values/dsv3_tp1pp1ep8/golden_values_dev_dgx_h100.json
 create mode 100644 tests/functional_tests/test_cases/moe2.0/golden_values/dsv3_tp2pp2ep4/golden_values_dev_dgx_h100.json
 create mode 100644 tests/functional_tests/test_cases/moe2.0/model_configs/dsv3_proxy.yaml
 create mode 100644 tests/functional_tests/test_cases/moe2.0/model_configs/qwen3_proxy.yaml
 create mode 100644 tests/functional_tests/test_cases/moe2.0/runtime_configs/tp1pp1ep8.yaml
 create mode 100644 tests/functional_tests/test_cases/moe2.0/runtime_configs/tp2pp2ep4.yaml
 create mode 100644 tests/test_utils/python_scripts/merge_config.py
 create mode 100644 tests/test_utils/recipes/moe2.0.yaml

diff --git a/tests/functional_tests/test_cases/ci_base_config.yml b/tests/functional_tests/test_cases/ci_base_config.yml
new file mode 100644
index 00000000000..739f343da9d
--- /dev/null
+++ b/tests/functional_tests/test_cases/ci_base_config.yml
@@ -0,0 +1,14 @@
+MODEL_ARGS:
+  # Add logging args
+  --log-timers-to-tensorboard: true
+  --log-memory-to-tensorboard: true
+  --log-num-zeros-in-grad: true
+  --log-params-norm: true
+  --log-validation-ppl-to-tensorboard: true
+  --log-throughput: true
+  --log-interval: 1
+  --logging-level: 40
+  --tensorboard-dir: ${TENSORBOARD_PATH}
+  # Add checkpointing args
+  --save: ${CHECKPOINT_SAVE_PATH}
+  --load: ${CHECKPOINT_LOAD_PATH}
diff --git a/tests/functional_tests/test_cases/moe2.0/golden_values/dsv3_tp1pp1ep8/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe2.0/golden_values/dsv3_tp1pp1ep8/golden_values_dev_dgx_h100.json
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/functional_tests/test_cases/moe2.0/golden_values/dsv3_tp2pp2ep4/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe2.0/golden_values/dsv3_tp2pp2ep4/golden_values_dev_dgx_h100.json
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/functional_tests/test_cases/moe2.0/model_configs/dsv3_proxy.yaml b/tests/functional_tests/test_cases/moe2.0/model_configs/dsv3_proxy.yaml
new file mode 100644
index 00000000000..70924aed0cc
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe2.0/model_configs/dsv3_proxy.yaml
@@ -0,0 +1,85 @@
+MODEL_ARGS:
+  # Data args
+  --seq-length: 4096
+  --data-cache-path: ${DATA_CACHE_PATH}
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
+  --split: 949,50,1
+  # Add transformer base args
+  --num-layers: 16
+  --hidden-size: 1024
+  --normalization: RMSNorm
+  --norm-epsilon: 1e-6
+  --disable-bias-linear: true
+  --max-position-embeddings: 4096
+  --make-vocab-size-divisible-by: 3232
+  --untie-embeddings-and-output-weights: true
+  # Add attention related args
+  --multi-latent-attention: true
+  --num-attention-heads: 32
+  --kv-channels: 128
+  --qk-layernorm: true
+  --position-embedding-type: rope
+  --rotary-base: 10000
+  --q-lora-rank: 1536
+  --kv-lora-rank: 512
+  --qk-head-dim: 128
+  --qk-pos-emb-head-dim: 64
+  --v-head-dim: 128
+  --rotary-scaling-factor: 40
+  --mscale: 1.0
+  --mscale-all-dim: 1.0
+  # Add MLP related args
+  --swiglu: true
+  --ffn-hidden-size: 4096
+  # Add MoE args
+  --num-experts: 32
+  --moe-layer-freq: ([0]*1+[1]*15)
+  --moe-ffn-hidden-size: 1024
+  --moe-shared-expert-intermediate-size: 1024
+  --moe-router-load-balancing-type: seq_aux_loss
+  --moe-router-topk: 4
+  --moe-router-pre-softmax: true
+  --moe-grouped-gemm: true
+  --moe-aux-loss-coeff: 1e-4
+  --moe-router-group-topk: 2
+  --moe-router-num-groups: 4
+  --moe-router-topk-scaling-factor: 2.0
+  --moe-router-score-function: sigmoid
+  --moe-router-enable-expert-bias: true
+  --moe-router-bias-update-rate: 1e-3
+  --moe-router-dtype: fp32
+  # Comment out the following MTP args to disable MTP
+  --mtp-num-layers: 1
+  --mtp-loss-scaling-factor: 0.1
+  # Add regularization args
+  --attention-dropout: 0.0
+  --hidden-dropout: 0.0
+  --clip-grad: 1.0
+  --weight-decay: 0.1
+  # Add learning rate args
+  --lr-warmup-fraction: .01
+  --lr: 0.00015
+  --min-lr: 1.0e-5
+  --lr-decay-style: cosine
+  --adam-beta1: 0.9
+  --adam-beta2: 0.95
+  # Add validation args
+  --eval-iters: 32
+  --eval-interval: 200
+  # Add initialization args
+  --init-method-std: 0.02
+  # Training args
+  --global-batch-size: 32
+  --train-iters: 50
+  --exit-duration-in-mins: 230
+  --no-check-for-nan-in-loss-and-grad: true
+
+METRICS:
+  - "lm loss"
+  - "num-zeros"
+  - "mem-allocated-bytes"
+  - "mem-max-allocated-bytes"
+  - "mtp_1 loss"
+  - "seq_load_balancing_loss"
diff --git a/tests/functional_tests/test_cases/moe2.0/model_configs/qwen3_proxy.yaml b/tests/functional_tests/test_cases/moe2.0/model_configs/qwen3_proxy.yaml
new file mode 100644
index 00000000000..46e298ec971
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe2.0/model_configs/qwen3_proxy.yaml
@@ -0,0 +1,74 @@
+MODEL_ARGS:
+  # Data args
+  --seq-length: 4096
+  --data-cache-path: ${DATA_CACHE_PATH}
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
+  --split: 949,50,1
+  # Add transformer base args
+  --num-layers: 16
+  --hidden-size: 1024
+  --normalization: RMSNorm
+  --norm-epsilon: 1e-6
+  --disable-bias-linear: true
+  --max-position-embeddings: 4096
+  --make-vocab-size-divisible-by: 3232
+  --untie-embeddings-and-output-weights: true
+  # Add attention related args
+  --group-query-attention: true
+  --num-query-groups: 4
+  --kv-channels: 128
+  --qk-layernorm: true
+  --position-embedding-type: rope
+  --rotary-percent: 1.0
+  --rotary-base: 1000000
+  # Add MLP related args
+  --swiglu: true
+  --ffn-hidden-size: 4096
+  # Add MoE args
+  --num-experts: 32
+  --moe-layer-freq: ([0]*1+[1]*15)
+  --moe-ffn-hidden-size: 1024
+  --moe-shared-expert-intermediate-size: 1024
+  --moe-router-load-balancing-type: aux_loss
+  --moe-router-topk: 4
+  --moe-router-pre-softmax: true
+  --moe-grouped-gemm: true
+  --moe-aux-loss-coeff: 1e-4
+  --moe-router-group-topk: 2
+  --moe-router-num-groups: 4
+  --moe-router-topk-scaling-factor: 2.0
+  --moe-router-score-function: sigmoid
+  --moe-router-enable-expert-bias: true
+  --moe-router-bias-update-rate: 1e-3
+  --moe-router-dtype: fp32
+  # Add regularization args
+  --attention-dropout: 0.0
+  --hidden-dropout: 0.0
+  --clip-grad: 1.0
+  --weight-decay: 0.1
+  # Add learning rate args
+  --lr-warmup-fraction: .01
+  --lr: 0.00015
+  --min-lr: 1.0e-5
+  --lr-decay-style: cosine
+  --adam-beta1: 0.9
+  --adam-beta2: 0.95
+  # Add validation args
+  --eval-iters: 32
+  --eval-interval: 200
+  # Add initialization args
+  --init-method-std: 0.02
+  # Training args
+  --global-batch-size: 32
+  --train-iters: 50
+  --exit-duration-in-mins: 230
+  --no-check-for-nan-in-loss-and-grad: true
+
+METRICS:
+  - "lm loss"
+  - "num-zeros"
+  - "mem-allocated-bytes"
+  - "mem-max-allocated-bytes"
+  - "load_balancing_loss"
diff --git a/tests/functional_tests/test_cases/moe2.0/runtime_configs/tp1pp1ep8.yaml b/tests/functional_tests/test_cases/moe2.0/runtime_configs/tp1pp1ep8.yaml
new file mode 100644
index 00000000000..305e2847305
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe2.0/runtime_configs/tp1pp1ep8.yaml
@@ -0,0 +1,41 @@
+ENV_VARS:
+  CUDA_DEVICE_MAX_CONNECTIONS: 1
+  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
+  PYTORCH_CUDA_ALLOC_CONF: expandable_segments:True
+  NCCL_NVLS_ENABLE: 0
+  PYTHONWARNINGS: ignore
+  NCCL_DEBUG: VERSION
+
+MODEL_ARGS:
+  # Transformer Engine args
+  --transformer-impl: transformer_engine
+  # Distributed args
+  --distributed-timeout-minutes: 60
+  --tensor-model-parallel-size: 1
+  --pipeline-model-parallel-size: 1
+  --expert-model-parallel-size: 8
+  --context-parallel-size: 1
+  --expert-tensor-parallel-size: 1
+  --use-distributed-optimizer: true
+  --overlap-grad-reduce: true
+  --overlap-param-gather: true
+  # Use unfused attention since MLA with fused attention and deterministic mode leads to NaN
+  --attention-backend: unfused # TODO: switch back to fused attention after fix
+  --use-mcore-models: true
+  --sequence-parallel: true
+  --micro-batch-size: 4
+  # MoE training related args
+  --moe-token-dispatcher-type: alltoall
+  --moe-permute-fusion: true
+  --save-interval: 25
+  # Add mixed precision args
+  --bf16: true
+  --exit-interval: 50
+  # kernel fusion related args
+  --no-rope-fusion: true
+  --cross-entropy-loss-fusion: true
+  --cross-entropy-fusion-impl: native
+  # MISC
+  --manual-gc: true
+  --manual-gc-interval: 100
+TEST_TYPE: resume-ckpt
diff --git a/tests/functional_tests/test_cases/moe2.0/runtime_configs/tp2pp2ep4.yaml b/tests/functional_tests/test_cases/moe2.0/runtime_configs/tp2pp2ep4.yaml
new file mode 100644
index 00000000000..b93862aff8c
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe2.0/runtime_configs/tp2pp2ep4.yaml
@@ -0,0 +1,55 @@
+ENV_VARS:
+  CUDA_DEVICE_MAX_CONNECTIONS: 1
+  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
+  PYTORCH_CUDA_ALLOC_CONF: expandable_segments:True
+  NCCL_NVLS_ENABLE: 0
+  PYTHONWARNINGS: ignore
+  NCCL_DEBUG: VERSION
+
+MODEL_ARGS:
+  # Transformer Engine args
+  --transformer-impl: transformer_engine
+  # Distributed args
+  --distributed-timeout-minutes: 60
+  --tensor-model-parallel-size: 2
+  --pipeline-model-parallel-size: 2
+  --num-virtual-stages-per-pipeline-rank: 4
+  --expert-model-parallel-size: 4
+  --context-parallel-size: 1
+  --expert-tensor-parallel-size: 1
+  --use-distributed-optimizer: true
+  --overlap-grad-reduce: true
+  --overlap-param-gather: true
+  # Use unfused attention since MLA with fused attention and deterministic mode leads to NaN
+  --attention-backend: unfused # TODO: switch back to fused attention after fix
+  --use-mcore-models: true
+  --sequence-parallel: true
+  --micro-batch-size: 4
+  # MoE training related args
+  --moe-token-dispatcher-type: alltoall
+  --moe-permute-fusion: true
+  # Add checkpointing args
+  --save: ${CHECKPOINT_SAVE_PATH}
+  --load: ${CHECKPOINT_LOAD_PATH}
+  --save-interval: 25
+  # Add logging args
+  --log-timers-to-tensorboard: true
+  --log-memory-to-tensorboard: true
+  --log-num-zeros-in-grad: true
+  --log-params-norm: true
+  --log-validation-ppl-to-tensorboard: true
+  --log-throughput: true
+  --log-interval: 1
+  --logging-level: 40
+  --tensorboard-dir: ${TENSORBOARD_PATH}
+  # Add mixed precision args
+  --bf16: true
+  --exit-interval: 50
+  # kernel fusion related args
+  --no-rope-fusion: true
+  --cross-entropy-loss-fusion: true
+  --cross-entropy-fusion-impl: native
+  # MISC
+  --manual-gc: true
+  --manual-gc-interval: 100
+TEST_TYPE: resume-ckpt
\ No newline at end of file
diff --git a/tests/test_utils/python_scripts/merge_config.py b/tests/test_utils/python_scripts/merge_config.py
new file mode 100644
index 00000000000..176706038b7
--- /dev/null
+++ b/tests/test_utils/python_scripts/merge_config.py
@@ -0,0 +1,92 @@
+"""
+Merges base_config, runtime_config and model_config into one final config that the CI can launch.
+
+Starting Dec 19th 2025 MCore CI supports a new format of defining tests. We are decoupling the test
+config into a modular system of base_config, model_config and runtime_config. This allows us to
+re-use and parametrize a given model easily with multiple runtime configs, like parallelism settings.
+
+With this DRY principle, we simplify test maintenance and reduce the amount of code duplication.
+
+This refactoring is fully compliant with the original CI system as we merge the three configs into one
+final config that the CI can launch.
+
+Precendence: Base config > Model config > Runtime config.
+
+Usage:
+
+python merge_config.py \
+    --model_config model_config.yaml \
+    --base_config base_config.yaml \
+    --runtime_config runtime_config.yaml \
+    --output_config output_config.yaml
+"""
+
+import logging
+
+import click
+import yaml
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+
+@click.command()
+@click.option("--model_config", type=str, help="Model config to merge")
+@click.option("--base_config", type=str, help="Base config to merge")
+@click.option("--runtime_config", type=str, help="Run time config to merge")
+@click.option("--output_config", type=str, help="Output config to merge")
+def main(model_config, base_config, runtime_config, output_config):
+
+    with open(model_config, "r") as f:
+        model_config = yaml.safe_load(f)
+    with open(base_config, "r") as f:
+        base_config = yaml.safe_load(f)
+    with open(runtime_config, "r") as f:
+        runtime_config = yaml.safe_load(f)
+
+    config = {}
+
+    # Collect all top-level keys (ENV_VARS, MODEL_ARGS, etc.)
+    all_keys = set(base_config.keys()) | set(model_config.keys()) | set(runtime_config.keys())
+
+    for key in all_keys:
+        base_val = base_config.get(key)
+        model_val = model_config.get(key)
+        runtime_val = runtime_config.get(key)
+
+        # Get first non-None value to check type
+        first_val = base_val or model_val or runtime_val
+
+        if isinstance(first_val, dict):
+            # Merge dicts
+            config[key] = {}
+            for val in [base_val, model_val, runtime_val]:
+                if val:
+                    config[key].update(val)
+        elif isinstance(first_val, list):
+            # Concatenate lists (deduplicate while preserving order)
+            config[key] = []
+            seen = set()
+            for val in [base_val, model_val, runtime_val]:
+                if val:
+                    for item in val:
+                        if item not in seen:
+                            config[key].append(item)
+                            seen.add(item)
+        else:
+            # Scalar value (string, int, bool, etc.) - use last defined
+            if runtime_val is not None:
+                config[key] = runtime_val
+            elif model_val is not None:
+                config[key] = model_val
+            else:
+                config[key] = base_val
+
+    with open(output_config, "w") as f:
+        yaml.dump(config, f)
+
+    logger.info(f"Config merged and saved to {output_config}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/test_utils/python_scripts/recipe_parser.py b/tests/test_utils/python_scripts/recipe_parser.py
index e26d04d6f20..a497bdbd9de 100644
--- a/tests/test_utils/python_scripts/recipe_parser.py
+++ b/tests/test_utils/python_scripts/recipe_parser.py
@@ -48,14 +48,34 @@ def resolve_artifact_config(cluster: str) -> str:
 
 def flatten_products(workload_manifest: dotdict) -> dotdict:
     """Flattens a nested dict of products"""
-    workload_manifest.products = [
-        dict(**dict(zip(inp.keys(), values)), **{"test_case": product["test_case"][0]})
-        for product in (workload_manifest.products or [])
-        if "products" in product
-        for inp in product["products"]
-        for values in itertools.product(*inp.values())
-    ]
-
+    expanded_products = []
+
+    for product in workload_manifest.products or []:
+        # Skip products that don't have nested product specifications
+        if "products" not in product:
+            continue
+
+        test_case = product["test_case"][0]
+
+        # Iterate over each input specification in the product
+        for inp in product["products"]:
+            # Generate all combinations of the input values (Cartesian product)
+            model_config = inp.pop("model_config", None)
+            runtime_config = inp.pop("runtime_config", None)
+            keys = inp.keys()
+            value_combinations = itertools.product(*inp.values())
+
+            # Create a flattened product dict for each combination
+            for values in value_combinations:
+                product_dict = dict(zip(keys, values))
+                product_dict["test_case"] = test_case
+                if model_config:
+                    product_dict["model_config"] = model_config
+                if runtime_config:
+                    product_dict["runtime_config"] = runtime_config
+                expanded_products.append(product_dict)
+
+    workload_manifest.products = expanded_products
     return workload_manifest
 
 
@@ -98,11 +118,16 @@ def load_and_flatten(config_path: str) -> List[dotdict]:
 
 def filter_by_test_case(workload_manifests: List[dotdict], test_case: str) -> Optional[dotdict]:
     """Returns a workload with matching name. Raises an error if there no or more than a single workload."""
+    print(len(workload_manifests))
     workload_manifests = list(
         workload_manifest
         for workload_manifest in workload_manifests
         if workload_manifest["spec"]["test_case"] == test_case
     )
+    print(len(workload_manifests))
+
+    for w in workload_manifests:
+        print(w["spec"]["test_case"])
 
     if len(workload_manifests) > 1:
         logger.info("Duplicate test_case found!")
diff --git a/tests/test_utils/recipes/moe2.0.yaml b/tests/test_utils/recipes/moe2.0.yaml
new file mode 100644
index 00000000000..e3249dd6ad1
--- /dev/null
+++ b/tests/test_utils/recipes/moe2.0.yaml
@@ -0,0 +1,97 @@
+type: basic
+format_version: 1
+maintainers: [mcore]
+loggers: [stdout]
+spec:
+  name: '{test_case}_{environment}_{platforms}'
+  model: moe2.0
+  build: mcore-pyt-{environment}
+  nodes: 1
+  gpus: 8
+  n_repeat: 5
+  platforms: dgx_a100
+  script_setup: |
+    unset https_proxy
+    echo "machine gitlab-master.nvidia.com login okoenig password $RO_API_TOKEN" | tee -a /root/.netrc
+
+    # Checkout latest
+    cd /opt
+    rm -rf /opt/megatron-lm; mkdir megatron-lm; cd megatron-lm
+    git init
+    git remote add origin $MCORE_REPO
+    git fetch origin '+refs/merge-requests/*:refs/remotes/merge-requests/*'
+    git fetch origin $MCORE_MR_COMMIT
+    git checkout $MCORE_MR_COMMIT
+    git rev-parse HEAD
+
+    # Checkout backwards-ref
+    cd /opt
+    rm -rf /opt/megatron-lm-legacy; mkdir megatron-lm-legacy; cd megatron-lm-legacy
+    git init
+    git remote add origin $MCORE_REPO
+    git fetch origin $MCORE_BACKWARDS_COMMIT
+    git checkout $MCORE_BACKWARDS_COMMIT
+    git rev-parse HEAD
+    rm -rf megatron; cp -a /opt/megatron-lm/megatron ./
+  script: |-
+    ls
+    cd /opt/megatron-lm
+
+    NAME=$(echo {test_case}_{environment} | sed 's/dgx_h100/dgx_a100/g')
+
+    mkdir -p $(dirname ./tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml)
+    python ./tests/test_utils/python_scripts/merge_config.py \
+      --base_config ./tests/functional_tests/test_cases/ci_base_config.yml \
+      --model_config ./tests/functional_tests/test_cases/{model}/model_configs/{model_config}.yaml \
+      --runtime_config ./tests/functional_tests/test_cases/{model}/runtime_configs/{runtime_config}.yaml \
+      --output_config ./tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml
+
+    ARGUMENTS=(
+        "DATA_PATH=/mnt/artifacts"
+        "DATA_CACHE_PATH=/workspace/data/cache"
+        "OUTPUT_PATH={assets_dir}"
+        "TENSORBOARD_PATH={assets_dir}/tensorboard"
+        "CHECKPOINT_SAVE_PATH={artifacts_dir}/checkpoints"
+        "CHECKPOINT_LOAD_PATH=/mnt/artifacts"
+        "TRAINING_SCRIPT_PATH=pretrain_gpt.py"
+        "TRAINING_PARAMS_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml"
+        "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_{platforms}.json"
+        "N_REPEAT={n_repeat}"
+        "ENABLE_LIGHTWEIGHT_MODE=${{ENABLE_LIGHTWEIGHT_MODE}}"
+        "RECORD_CHECKPOINTS=${{RECORD_CHECKPOINTS}}"
+    )
+
+    bash ./tests/functional_tests/shell_test_utils/run_ci_test.sh ${{ARGUMENTS[@]}}
+
+products:
+  ###########################
+  # Merge train tests       #
+  ###########################
+  - test_case: [dsv3_tp1pp1ep8]
+    products:
+      - model_config: dsv3_proxy
+        runtime_config: tp1pp1ep8
+        environment: [dev]
+        scope: [broken]
+        platforms: [dgx_h100]
+  - test_case: [dsv3_tp2pp2ep4]
+    products:
+      - model_config: dsv3_proxy
+        runtime_config: tp2pp2ep4
+        environment: [dev]
+        scope: [broken]
+        platforms: [dgx_h100]
+  - test_case: [qwen3_tp1pp1ep1]
+    products:
+      - model_config: qwen3_proxy
+        runtime_config: tp1pp1ep1
+        environment: [dev]
+        scope: [broken]
+        platforms: [dgx_h100]
+  - test_case: [qwen3_tp2pp2ep4]
+    products:
+      - model_config: qwen3_proxy
+        runtime_config: tp2pp2ep4
+        environment: [dev]
+        scope: [broken]
+        platforms: [dgx_h100]

From 2b1fc70891cd1b45b6a02a588430253a78bdb4fa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Mon, 22 Dec 2025 08:49:00 +0000
Subject: [PATCH 201/334] Revert "[dev] Add assertion for mxfp8 params without
 dp overlap (#2270)"

This reverts commit 7968d5f98f8457297d4a73f96d8a086d84a8fa67.
---
 .../core/distributed/distributed_data_parallel_config.py  | 8 --------
 .../src/megatron_fsdp/distributed_data_parallel_config.py | 8 --------
 2 files changed, 16 deletions(-)

diff --git a/megatron/core/distributed/distributed_data_parallel_config.py b/megatron/core/distributed/distributed_data_parallel_config.py
index e2a026d836f..3f97beab825 100644
--- a/megatron/core/distributed/distributed_data_parallel_config.py
+++ b/megatron/core/distributed/distributed_data_parallel_config.py
@@ -146,14 +146,6 @@ def __post_init__(self):
         """Check the validity of the config."""
         if self.reuse_grad_buf_for_mxfp8_param_ag:
             assert self.fp8_param_gather, "Reuse grad buffer only when keeping params in MXFP8."
-            # Using mxfp8 param without overlap param gather and overlap grad reduce will cause NaN.
-            # TODO: Remove this assertion when the issue is fixed.
-            assert (
-                self.overlap_param_gather
-            ), "--overlap-param-gather is required when using mxfp8 params"
-            assert (
-                self.overlap_grad_reduce
-            ), "--overlap-grad-reduce is required when using mxfp8 params"
 
         if self.nccl_ub:
             if 'expandable_segments:True' in os.getenv('PYTORCH_CUDA_ALLOC_CONF', '').split(','):
diff --git a/megatron/core/distributed/fsdp/src/megatron_fsdp/distributed_data_parallel_config.py b/megatron/core/distributed/fsdp/src/megatron_fsdp/distributed_data_parallel_config.py
index 5151ecabfb5..86826758498 100644
--- a/megatron/core/distributed/fsdp/src/megatron_fsdp/distributed_data_parallel_config.py
+++ b/megatron/core/distributed/fsdp/src/megatron_fsdp/distributed_data_parallel_config.py
@@ -137,14 +137,6 @@ def __post_init__(self):
         """Check the validity of the config."""
         if self.reuse_grad_buf_for_mxfp8_param_ag:
             assert self.fp8_param_gather, "Reuse grad buffer only when keeping params in MXFP8."
-            # Using mxfp8 param without overlap param gather and overlap grad reduce will cause NaN.
-            # TODO: Remove this assertion when the issue is fixed.
-            assert (
-                self.overlap_param_gather
-            ), "--overlap-param-gather is required when using mxfp8 params"
-            assert (
-                self.overlap_grad_reduce
-            ), "--overlap-grad-reduce is required when using mxfp8 params"
 
         if self.nccl_ub:
             if 'expandable_segments:True' in os.getenv('PYTORCH_CUDA_ALLOC_CONF', '').split(','):

From 4665be4dec0cd26f32e91d7fc4e1be4f1ea2132d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Mon, 22 Dec 2025 11:18:06 +0100
Subject: [PATCH 202/334] Revert "[Dev] Use the latest Hybrid-EP (#2424)"
 (#2732)

---
 docker/Dockerfile.ci.dev                      |  2 +-
 megatron/core/transformer/moe/fused_a2a.py    | 51 ++++++++++++++-----
 .../core/transformer/moe/token_dispatcher.py  | 15 ++++--
 3 files changed, 50 insertions(+), 18 deletions(-)

diff --git a/docker/Dockerfile.ci.dev b/docker/Dockerfile.ci.dev
index 5caa6003630..482c6af460c 100644
--- a/docker/Dockerfile.ci.dev
+++ b/docker/Dockerfile.ci.dev
@@ -62,7 +62,7 @@ RUN bash -ex <<"EOF"
 
     git clone --branch hybrid-ep https://github.com/deepseek-ai/DeepEP.git
     pushd DeepEP
-        git checkout 83e0d156807f31abed4ea55c2fa6eb4b62a11b82
+        git checkout 1dddd194c26911c35b4f53a148617dd73de0ffc9
         patch -p1 < /workspace/deepep.patch
     popd
     TORCH_CUDA_ARCH_LIST="9.0 10.0 12.0" uv pip install --no-build-isolation -v DeepEP/.
diff --git a/megatron/core/transformer/moe/fused_a2a.py b/megatron/core/transformer/moe/fused_a2a.py
index aa13b9b5b5b..045a93039b3 100644
--- a/megatron/core/transformer/moe/fused_a2a.py
+++ b/megatron/core/transformer/moe/fused_a2a.py
@@ -3,7 +3,6 @@
 # Copyright (c) 2025 DeepSeek
 # Licensed under the MIT License - https://github.com/deepseek-ai/DeepEP/blob/main/LICENSE
 
-from megatron.core.utils import internal_api
 
 try:
     from deep_ep import Buffer
@@ -329,7 +328,6 @@ def reset_hybrid_ep_buffer():
     _hybrid_ep_buffer = None
 
 
-@internal_api
 class HybridEPDispatch(torch.autograd.Function):
     '''
     Fused dispatch operation for permute + dispatch a2a + permute using the HybridEP backend
@@ -345,6 +343,7 @@ def forward(
         num_local_experts,
         num_sms_dispatch_api=24,
         num_sms_combine_api=24,
+        num_dispatched_tokens=None,
         num_permuted_tokens=None,
         pad_multiple=None,
     ):
@@ -363,9 +362,11 @@ def forward(
                 num_sms_combine_api,
                 fp8_dispatch,
             )
-        # If we provide the num_permuted_tokens, we do not need to use sync to
-        # wait for the data in pinned memory ready
-        non_blocking = num_permuted_tokens is not None
+        # Defaultly, the output token_per_expert and num_dispatched_tokens_tensor
+        # will be put on the CPU to avoid the potential sync in combine/backward pass,
+        # but if we provide the num_dispatched_tokens and num_permuted_tokens on CPU,
+        # we do not need to the D2H here.
+        use_host_meta = num_dispatched_tokens is None or num_permuted_tokens is None
         # Process the dispatch
         (
             dispatched_hidden,
@@ -380,12 +381,14 @@ def forward(
             scaling_factor=None,
             num_of_experts_per_rank=num_local_experts,
             pad_multiple=pad_multiple,
+            num_dispatched_tokens=num_dispatched_tokens,
             num_permuted_tokens=num_permuted_tokens,
-            non_blocking=non_blocking,
+            use_host_meta=use_host_meta,
         )
 
         ctx.handle = handle
         ctx.pad_multiple = pad_multiple
+        ctx.num_dispatched_tokens = num_dispatched_tokens
         return (
             dispatched_hidden,
             dispatched_probs,
@@ -401,27 +404,36 @@ def backward(ctx, grad_x, grad_probs, grad_scaling_factor, grad_tokens_per_exper
         '''
         handle = ctx.handle
         combined_hidden, combined_probs = _hybrid_ep_buffer.combine_with_unpermute(
-            hidden=grad_x, probs=grad_probs, handle=handle, pad_multiple=ctx.pad_multiple
+            hidden=grad_x,
+            probs=grad_probs,
+            handle=handle,
+            pad_multiple=ctx.pad_multiple,
+            num_dispatched_tokens=ctx.num_dispatched_tokens,
         )
         return combined_hidden, None, combined_probs, None, None, None, None, None, None, None
 
 
-@internal_api
 class HybridEPCombine(torch.autograd.Function):
     '''
     Fused combine operation for permute + combine a2a + permute using the HybridEP backend
     '''
 
     @staticmethod
-    def forward(ctx, x, handle, num_permuted_tokens=None, pad_multiple=None):
+    def forward(
+        ctx, x, handle, num_dispatched_tokens=None, num_permuted_tokens=None, pad_multiple=None
+    ):
         '''
         Forward pass of fused combine of the HybridEP backend
         '''
         combined_hidden, _ = _hybrid_ep_buffer.combine_with_unpermute(
-            hidden=x, handle=handle, pad_multiple=pad_multiple
+            hidden=x,
+            handle=handle,
+            pad_multiple=pad_multiple,
+            num_dispatched_tokens=num_dispatched_tokens,
         )
         ctx.handle = handle
         ctx.pad_multiple = pad_multiple
+        ctx.num_dispatched_tokens = num_dispatched_tokens
         ctx.num_permuted_tokens = num_permuted_tokens
         return combined_hidden
 
@@ -436,6 +448,7 @@ def backward(ctx, grad_x):
             scaling_factor=None,
             handle=handle,
             pad_multiple=ctx.pad_multiple,
+            num_dispatched_tokens=ctx.num_dispatched_tokens,
             num_permuted_tokens=ctx.num_permuted_tokens,
         )
         return dispatched_hidden, None, None, None, None
@@ -443,7 +456,6 @@ def backward(ctx, grad_x):
 
 if HAVE_HYBRIDEP:
 
-    @internal_api
     def hybrid_ep_dispatch(
         x,
         routing_map,
@@ -452,6 +464,7 @@ def hybrid_ep_dispatch(
         num_local_experts,
         num_sms_dispatch_api=24,
         num_sms_combine_api=24,
+        num_dispatched_tokens=None,
         num_permuted_tokens=None,
         pad_multiple=None,
     ):
@@ -474,6 +487,10 @@ def hybrid_ep_dispatch(
                 Number of SMs used by the dispatch API.
             num_sms_combine_api (int):
                 Number of SMs used by the combine API.
+            num_dispatched_tokens (int):
+                Number of tokens after dispatch but before permute. HybridEP uses this
+                to allocate buffers. If not provided, HybridEP obtains the size from
+                a GPU tensor, which causes a D2H synchronization.
             num_permuted_tokens (int):
                 Number of tokens after permute. HybridEP uses this to allocate buffers.
                 If not provided, HybridEP obtains the size from a GPU tensor,
@@ -490,12 +507,12 @@ def hybrid_ep_dispatch(
             num_local_experts,
             num_sms_dispatch_api,
             num_sms_combine_api,
+            num_dispatched_tokens,
             num_permuted_tokens,
             pad_multiple,
         )
 
-    @internal_api
-    def hybrid_ep_combine(x, handle, num_permuted_tokens, pad_multiple):
+    def hybrid_ep_combine(x, handle, num_dispatched_tokens, num_permuted_tokens, pad_multiple):
         '''
         Perform fused combine operation for unpermute + combine a2a + unpermute
         using the HybridEP backend
@@ -505,6 +522,10 @@ def hybrid_ep_combine(x, handle, num_permuted_tokens, pad_multiple):
                 Input hidden states to combine
             handle (EventHandle):
                 Communication handle from dispatch operation
+            num_dispatched_tokens (int):
+                The number of tokens after unpermute but before combine. HybridEP uses this
+                to allocate buffers. If not provided, HybridEP obtains the size from a GPU tensor,
+                which causes a D2H synchronization.
             num_permuted_tokens (int): The number of tokens before unpermute. HybridEP uses this
                 to allocate buffers. If not provided, HybridEP obtains the size from a GPU tensor,
                 which causes a D2H synchronization.
@@ -512,7 +533,9 @@ def hybrid_ep_combine(x, handle, num_permuted_tokens, pad_multiple):
                 The alignment multiple required for FP8 GEMM. If not provided, no padding
                 is performed.
         '''
-        return HybridEPCombine.apply(x, handle, num_permuted_tokens, pad_multiple)
+        return HybridEPCombine.apply(
+            x, handle, num_dispatched_tokens, num_permuted_tokens, pad_multiple
+        )
 
 else:
     hybrid_ep_dispatch = None
diff --git a/megatron/core/transformer/moe/token_dispatcher.py b/megatron/core/transformer/moe/token_dispatcher.py
index d0da38d6322..61ef0b5f084 100644
--- a/megatron/core/transformer/moe/token_dispatcher.py
+++ b/megatron/core/transformer/moe/token_dispatcher.py
@@ -985,8 +985,11 @@ def __init__(
         if self.drop_and_pad:
             assert self.capacity_factor is not None
         self.capacity = None
-        # Actually the the up-bound for the number of tokens
-        # after permute op, None means no up-bound, will cause a CPU sync
+        # The up-bound for the number of tokens after dispatch op, -1 means no up-bound,
+        # which will cause a CPU sync
+        self.num_dispatched_tokens = None
+        # Actually the sum of tokens_per_expert, the up-bound for the number of tokens
+        # after permute op, -1 means no up-bound, will cause a CPU sync
         self.num_permuted_tokens = None
 
         # Metadata
@@ -1015,9 +1018,12 @@ def setup_metadata(self, routing_map: torch.Tensor, probs: torch.Tensor):
                 num_experts=self.num_experts,
                 capacity_factor=self.capacity_factor,
             )
+            # We cannot predict the actual number of tokens after the dispatch op,
+            # so we set it to the worst case in drop_and_pad mode
+            self.num_dispatched_tokens = self.capacity * self.group.size() * self.num_local_experts
             # In drop_and_pad mode, the number of tokens after the permute op
             # can be computed on the CPU
-            self.num_permuted_tokens = self.capacity * self.group.size() * self.num_local_experts
+            self.num_permuted_tokens = self.num_dispatched_tokens
             self.tokens_per_expert = torch.full(
                 (self.num_local_experts,), self.capacity * self.group.size(), dtype=torch.long
             )
@@ -1046,6 +1052,7 @@ def dispatch(
                 num_local_experts=self.num_local_experts,
                 num_sms_dispatch_api=self.config.moe_hybridep_num_sms,
                 num_sms_combine_api=self.config.moe_hybridep_num_sms,
+                num_dispatched_tokens=self.num_dispatched_tokens,
                 num_permuted_tokens=self.num_permuted_tokens,
                 pad_multiple=self.pad_multiple,
             )
@@ -1067,6 +1074,7 @@ def combine(
         hidden_states = hybrid_ep_combine(
             x=hidden_states,
             handle=self.handle,
+            num_dispatched_tokens=self.num_dispatched_tokens,
             num_permuted_tokens=self.num_permuted_tokens,
             pad_multiple=self.pad_multiple,
         )
@@ -1076,6 +1084,7 @@ def combine(
         self.handle = None
         if not self.drop_and_pad:
             self.num_permuted_tokens = None
+            self.num_dispatched_tokens = None
         return hidden_states
 
     def get_permuted_hidden_states_by_experts(self, hidden_states: torch.Tensor) -> torch.Tensor:

From 46b550591ad4765a447980ff0ca615929cf8fb78 Mon Sep 17 00:00:00 2001
From: Pingtian Li <158665726+Wohox@users.noreply.github.com>
Date: Tue, 23 Dec 2025 11:15:53 +0800
Subject: [PATCH 203/334] [Dev] Fix ep overlap missing final layernorm (#2691)

---
 megatron/core/models/gpt/fine_grained_callables.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/megatron/core/models/gpt/fine_grained_callables.py b/megatron/core/models/gpt/fine_grained_callables.py
index 60094976a9a..741a25326fb 100644
--- a/megatron/core/models/gpt/fine_grained_callables.py
+++ b/megatron/core/models/gpt/fine_grained_callables.py
@@ -170,11 +170,16 @@ def forward_impl(self, hidden_states):
 
         Returns:
             The logits or loss depending on whether labels are provided.
-
-        Note:
-            Final layernorm now has been moved from the post-process stage to the
-            last decoder layer, so we don't need to run the final layer norm here.
         """
+
+        empty_decoder = len(self.gpt_model.decoder.layers) == 0
+        layer_norm = self.gpt_model.decoder.final_layernorm
+        if not self.gpt_model.config.mtp_num_layers and empty_decoder and layer_norm:
+            hidden_states = layer_norm(hidden_states)
+            hidden_states = make_viewless_tensor(
+                inp=hidden_states, requires_grad=True, keep_graph=True
+            )
+
         # Run GPTModel._postprocess
         loss = self.gpt_model._postprocess(
             hidden_states=hidden_states,

From 0b6714ec87ec256aca0bc9400985247d26f98ef0 Mon Sep 17 00:00:00 2001
From: HaochenYuan <106647990+HaochenYuan@users.noreply.github.com>
Date: Wed, 24 Dec 2025 10:34:10 +0800
Subject: [PATCH 204/334] [Dev] Remove calculation of padding token in moe
 routing loss (#2121)

Co-authored-by: Li Tao <lit@nvidia.com>
---
 .../core/extensions/transformer_engine.py     |   2 +-
 .../common/model_chunk_schedule_plan.py       |   2 +
 .../core/models/gpt/fine_grained_callables.py |  21 +-
 megatron/core/models/gpt/gpt_model.py         |  37 +++-
 megatron/core/transformer/mlp.py              |   2 +-
 megatron/core/transformer/moe/moe_layer.py    |  27 ++-
 megatron/core/transformer/moe/moe_utils.py    |  83 ++++++--
 megatron/core/transformer/moe/router.py       | 167 ++++++++++++----
 .../core/transformer/transformer_block.py     |  15 +-
 .../core/transformer/transformer_layer.py     |  23 ++-
 .../python_scripts/recipe_parser.py           |   1 +
 .../a2a_overlap/test_schedule_chunk_1f1b.py   | 116 ++++++++++-
 .../a2a_overlap/test_schedule_layer_1f1b.py   |   4 +-
 .../transformer/moe/test_aux_loss.py          | 189 ++++++++++++++++++
 .../transformer/moe/test_routers.py           |  47 +++++
 15 files changed, 646 insertions(+), 90 deletions(-)

diff --git a/megatron/core/extensions/transformer_engine.py b/megatron/core/extensions/transformer_engine.py
index acb93ef7853..546f8a59318 100644
--- a/megatron/core/extensions/transformer_engine.py
+++ b/megatron/core/extensions/transformer_engine.py
@@ -1851,7 +1851,7 @@ def forward_post_hook(module, *_) -> None:
                     "TEFusedMLP module does not support submodules with post-backward hooks"
                 )
 
-        def forward(self, hidden_states: torch.Tensor) -> Tuple[Tensor, Optional[Tensor]]:
+        def forward(self, hidden_states: torch.Tensor, **kwargs) -> Tuple[Tensor, Optional[Tensor]]:
             """Forward."""
 
             # Construct fused impl if needed
diff --git a/megatron/core/models/common/model_chunk_schedule_plan.py b/megatron/core/models/common/model_chunk_schedule_plan.py
index 486a498dd73..07bab1cb486 100644
--- a/megatron/core/models/common/model_chunk_schedule_plan.py
+++ b/megatron/core/models/common/model_chunk_schedule_plan.py
@@ -305,6 +305,7 @@ def __init__(
         extra_block_kwargs=None,
         runtime_gather_output: Optional[bool] = None,
         loss_mask: Optional[Tensor] = None,
+        padding_mask=None,
     ):
         """Initialize the schedule plan of all Transformer layers' sub-modules.
 
@@ -347,6 +348,7 @@ def __init__(
         self._model_chunk_state.mtp_hidden_states = None
         self._model_chunk_state.loss_mask = loss_mask
         self._model_chunk_state.packed_seq_params = packed_seq_params
+        self._model_chunk_state.padding_mask = padding_mask
         self._model_chunk_state.extra_block_kwargs = extra_block_kwargs
         self._model_chunk_state.runtime_gather_output = runtime_gather_output
         self._model_chunk_state.model = model
diff --git a/megatron/core/models/gpt/fine_grained_callables.py b/megatron/core/models/gpt/fine_grained_callables.py
index 741a25326fb..b0923a37b80 100644
--- a/megatron/core/models/gpt/fine_grained_callables.py
+++ b/megatron/core/models/gpt/fine_grained_callables.py
@@ -120,13 +120,19 @@ def forward_impl(self):
         if not self.gpt_model.pre_process:
             self.chunk_state.decoder_input = self.gpt_model.decoder.input_tensor
         # Run GPTModel._preprocess
-        decoder_input, rotary_pos_emb, rotary_pos_cos, rotary_pos_sin, sequence_len_offset = (
-            self.gpt_model._preprocess(
-                input_ids=self.chunk_state.input_ids,
-                position_ids=self.chunk_state.position_ids,
-                decoder_input=self.chunk_state.decoder_input,
-                packed_seq_params=self.chunk_state.packed_seq_params,
-            )
+        (
+            decoder_input,
+            rotary_pos_emb,
+            rotary_pos_cos,
+            rotary_pos_sin,
+            sequence_len_offset,
+            padding_mask,
+        ) = self.gpt_model._preprocess(
+            input_ids=self.chunk_state.input_ids,
+            position_ids=self.chunk_state.position_ids,
+            decoder_input=self.chunk_state.decoder_input,
+            packed_seq_params=self.chunk_state.packed_seq_params,
+            padding_mask=self.chunk_state.padding_mask,
         )
 
         # Saved for later use
@@ -135,6 +141,7 @@ def forward_impl(self):
         self.chunk_state.rotary_pos_cos = rotary_pos_cos
         self.chunk_state.rotary_pos_sin = rotary_pos_sin
         self.chunk_state.sequence_len_offset = sequence_len_offset
+        self.chunk_state.padding_mask = padding_mask
         return decoder_input
 
 
diff --git a/megatron/core/models/gpt/gpt_model.py b/megatron/core/models/gpt/gpt_model.py
index a1230568cbd..9e70c677226 100644
--- a/megatron/core/models/gpt/gpt_model.py
+++ b/megatron/core/models/gpt/gpt_model.py
@@ -284,6 +284,7 @@ def _preprocess(
         decoder_input: Tensor = None,
         inference_context: BaseInferenceContext = None,
         packed_seq_params: PackedSeqParams = None,
+        padding_mask: Optional[Tensor] = None,
     ):
         """Preprocesses inputs for the transformer decoder.
 
@@ -300,7 +301,20 @@ def _preprocess(
         if decoder_input is not None:
             pass
         elif self.pre_process:
+            if padding_mask is not None:
+                assert padding_mask.shape == input_ids.shape, (
+                    f"padding_mask shape {padding_mask.shape} does not match "
+                    f"input_ids shape {input_ids.shape}"
+                )
             decoder_input = self.embedding(input_ids=input_ids, position_ids=position_ids)
+            if padding_mask is not None and self.config.sequence_parallel:
+                padding_mask = (
+                    tensor_parallel.scatter_to_sequence_parallel_region(
+                        padding_mask.transpose(0, 1).contiguous()
+                    )
+                    .transpose(0, 1)
+                    .contiguous()
+                )
         else:
             # intermediate stage of pipeline
             # decoder will get hidden_states from encoder.input_tensor
@@ -403,6 +417,7 @@ def _preprocess(
             rotary_pos_cos,
             rotary_pos_sin,
             sequence_len_offset,
+            padding_mask,
         )
         if rotary_pos_cos_sin is not None:
             # only in the case of flashinfer fused rope will we
@@ -446,6 +461,7 @@ def forward(
         *,
         inference_params: Optional[BaseInferenceContext] = None,
         loss_mask: Optional[Tensor] = None,
+        padding_mask: Optional[Tensor] = None,
     ) -> Tensor:
         """Forward function of the GPT Model This function passes the input tensors
         through the embedding layer, and then the decoder and finally into the post
@@ -456,6 +472,9 @@ def forward(
         Args:
             runtime_gather_output (bool): Gather output at runtime. Default None means
                 `parallel_output` arg in the constructor will be used.
+            padding_mask (Tensor, optional): Padding mask for MoE routing.
+                Shape [bsz, seq_length]. True = padding (exclude), False = valid (include).
+                Only used for MoE layers to exclude padding tokens from routing computations.
         """
         if self.config.fine_grained_activation_offloading:
             self.preprocess_for_fine_grained_offloading()
@@ -468,13 +487,19 @@ def forward(
             decoder_input=decoder_input,
             inference_context=inference_context,
             packed_seq_params=packed_seq_params,
+            padding_mask=padding_mask,
         )
 
-        (decoder_input, rotary_pos_emb, rotary_pos_cos, rotary_pos_sin, sequence_len_offset) = (
-            preproc_output[:5]
-        )
+        (
+            decoder_input,
+            rotary_pos_emb,
+            rotary_pos_cos,
+            rotary_pos_sin,
+            sequence_len_offset,
+            padding_mask,
+        ) = preproc_output[:6]
 
-        rotary_pos_cos_sin = preproc_output[5] if len(preproc_output) == 6 else None
+        rotary_pos_cos_sin = preproc_output[6] if len(preproc_output) == 7 else None
 
         # Run decoder.
         hidden_states = self.decoder(
@@ -487,6 +512,7 @@ def forward(
             rotary_pos_cos_sin=rotary_pos_cos_sin,
             packed_seq_params=packed_seq_params,
             sequence_len_offset=sequence_len_offset,
+            padding_mask=padding_mask,
             **(extra_block_kwargs or {}),
         )
 
@@ -724,6 +750,7 @@ def build_schedule_plan(
         runtime_gather_output: Optional[bool] = None,
         inference_params: Optional[BaseInferenceContext] = None,
         loss_mask: Optional[Tensor] = None,
+        padding_mask: Optional[Tensor] = None,
     ):
         """Builds a computation schedule plan for the model.
 
@@ -749,6 +776,7 @@ def build_schedule_plan(
             inference_params (InferenceParams, optional):
                 Parameters for inference. Defaults to None.
             loss_mask (Optional[Tensor], optional): Loss mask. Defaults to None.
+            padding_mask (Optional[Tensor], optional): Padding mask. Defaults to None.
 
         Returns:
             TransformerModelChunkSchedulePlan: The model chunk schedule plan.
@@ -770,6 +798,7 @@ def build_schedule_plan(
             extra_block_kwargs,
             runtime_gather_output,
             loss_mask,
+            padding_mask,
         )
 
     def sharded_state_dict(
diff --git a/megatron/core/transformer/mlp.py b/megatron/core/transformer/mlp.py
index 8dcf196da94..fbb960f4be9 100644
--- a/megatron/core/transformer/mlp.py
+++ b/megatron/core/transformer/mlp.py
@@ -137,7 +137,7 @@ def __init__(
             tp_group=tp_group,
         )
 
-    def forward(self, hidden_states, per_token_scale=None):
+    def forward(self, hidden_states, per_token_scale=None, **kwargs):
         """Perform the forward pass through the MLP block."""
         # [s, b, 4 * h/p]
         nvtx_range_push(suffix="linear_fc1")
diff --git a/megatron/core/transformer/moe/moe_layer.py b/megatron/core/transformer/moe/moe_layer.py
index 10d10f667fe..153bac00ec1 100644
--- a/megatron/core/transformer/moe/moe_layer.py
+++ b/megatron/core/transformer/moe/moe_layer.py
@@ -178,13 +178,13 @@ def __init__(
         self.cudagraph_tensor_store = MoECudaGraphTensorStore()
 
     @maybe_skip_or_early_return_by_cudagraph("route")
-    def route(self, hidden_states: torch.Tensor):
+    def route(self, hidden_states: torch.Tensor, padding_mask: Optional[torch.Tensor] = None):
         """Compute token routing for preprocessing.
 
         This method uses the router to determine which experts to send each token to,
         producing routing probabilities and a mapping.
         """
-        probs, routing_map = self.router(hidden_states)
+        probs, routing_map = self.router(hidden_states, padding_mask=padding_mask)
         return probs, routing_map
 
     @maybe_skip_or_early_return_by_cudagraph("preprocess")
@@ -270,7 +270,7 @@ def combine(self, output: torch.Tensor, shared_expert_output: Optional[torch.Ten
             output = output + shared_expert_output
         return output
 
-    def forward(self, hidden_states: torch.Tensor):
+    def forward(self, hidden_states: torch.Tensor, padding_mask: Optional[torch.Tensor] = None):
         """Forward pass for the MoE layer.
 
         The forward pass comprises four main steps:
@@ -280,7 +280,11 @@ def forward(self, hidden_states: torch.Tensor):
         4. Combine: The outputs from the experts are combined and returned.
 
         Args:
-            hidden_states (torch.Tensor): The input tensor to the MoE layer.
+            hidden_states (torch.Tensor): The input tensor shape [seq_length, bsz, hidden_size].
+            padding_mask (torch.Tensor, optional): Boolean mask indicating padding positions.
+                used for correct auxiliary loss computation for packed sequence.
+                Shape = [bsz, seq_length]. True = padding (exclude), False = valid (include).
+                Defaults to None (all tokens are valid).
 
         Returns:
             A tuple containing the output tensor and the MLP bias, if any.
@@ -291,11 +295,15 @@ def forward(self, hidden_states: torch.Tensor):
                 "are enabled without also enabling sequence parallelism."
             )
 
+        # Transpose from [bsz, seq_length] to [seq_length, bsz] to align with hidden_states
+        if padding_mask is not None:
+            padding_mask = padding_mask.transpose(0, 1).bool()
+
         # MoE forward: route -> dispatch -> compute -> combine
-        def custom_forward(hidden_states):
+        def custom_forward(hidden_states, padding_mask=None):
             try:
                 shared_expert_output = self.shared_experts_compute(hidden_states)
-                probs, routing_map = self.route(hidden_states)
+                probs, routing_map = self.route(hidden_states, padding_mask=padding_mask)
                 hidden_states, probs, residual = self.preprocess(hidden_states, probs, routing_map)
             except MoECudaGraphPartialCaptureSignal as e:
                 # This signal is raised from the maybe_skip_or_early_return_by_cudagraph decorator.
@@ -318,11 +326,14 @@ def custom_forward(hidden_states):
                     tensor_parallel.random.get_cuda_rng_tracker,
                     parallel_state.get_tensor_model_parallel_group(),
                     hidden_states,
+                    padding_mask,
                 )
             else:
-                outputs = tensor_parallel.checkpoint(custom_forward, False, hidden_states)
+                outputs = tensor_parallel.checkpoint(
+                    custom_forward, False, hidden_states, padding_mask
+                )
         else:
-            outputs = custom_forward(hidden_states)
+            outputs = custom_forward(hidden_states, padding_mask)
 
         return outputs
 
diff --git a/megatron/core/transformer/moe/moe_utils.py b/megatron/core/transformer/moe/moe_utils.py
index 28cff06f5ec..f44d441c765 100644
--- a/megatron/core/transformer/moe/moe_utils.py
+++ b/megatron/core/transformer/moe/moe_utils.py
@@ -1,5 +1,4 @@
 # Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-
 import math
 from dataclasses import dataclass
 from typing import List, Optional, Union
@@ -11,6 +10,7 @@
 from megatron.core.fp8_utils import get_fp8_align_size
 from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.tensor_parallel import get_cuda_rng_tracker, get_expert_parallel_rng_tracker_name
+from megatron.core.tensor_parallel.mappings import reduce_from_tensor_model_parallel_region
 from megatron.core.transformer.cuda_graphs import is_graph_capturing
 from megatron.core.transformer.enums import CudaGraphScope
 from megatron.core.transformer.transformer_config import TransformerConfig
@@ -120,18 +120,34 @@ def switch_load_balancing_loss_func(
     return aux_loss
 
 
-def z_loss_func(logits, z_loss_coeff):
+def z_loss_func(logits, z_loss_coeff, padding_mask: Optional[torch.Tensor] = None):
     """Encourages the router's logits to remain small to enhance stability.
     Please refer to the ST-MoE paper (https://arxiv.org/pdf/2202.08906.pdf) for details.
 
     Args:
         logits (torch.Tensor): The logits of the router.
+        z_loss_coeff (float): The coefficient for the z-loss.
+        padding_mask (torch.Tensor, optional): Boolean mask indicating padding positions.
+                                               Shape [num_tokens]. True = padding (exclude),
+                                               False = valid (include). Defaults to None.
 
     Returns:
         torch.Tensor: The logits after applying the z-loss.
     """
+    logsum = torch.logsumexp(logits, dim=-1)
+    z_loss_values = torch.square(logsum)
+
+    if padding_mask is not None:
+        # Invert padding_mask: True (padding) -> 0, False (valid) -> 1
+        valid_mask = ~padding_mask
+        # Only compute z_loss for valid (non-padding) tokens
+        z_loss_values = z_loss_values * valid_mask
+        # Compute mean over valid tokens only
+        num_valid_tokens = valid_mask.sum()
+        z_loss = z_loss_values.sum() / torch.clamp(num_valid_tokens, min=1.0) * z_loss_coeff
+    else:
+        z_loss = torch.mean(z_loss_values) * z_loss_coeff
 
-    z_loss = torch.mean(torch.square(torch.logsumexp(logits, dim=-1))) * z_loss_coeff
     return z_loss
 
 
@@ -171,6 +187,28 @@ def get_capacity(num_tokens: int, num_experts: int, capacity_factor: float, min_
     return capacity
 
 
+def get_tokens_per_expert_and_token_count(
+    routing_map: torch.Tensor,
+    reduce_group: torch.distributed.ProcessGroup,
+    topk: int = None,
+    with_padding_mask: bool = False,
+) -> torch.Tensor:
+    """
+    Compute global_tokens_per_expert, local_num_tokens and total_num_tokens with padding mask.
+    """
+    local_tokens_per_expert = routing_map.sum(dim=0)
+    global_tokens_per_expert = reduce_from_tensor_model_parallel_region(
+        local_tokens_per_expert, reduce_group
+    )
+    if with_padding_mask:
+        local_num_tokens = local_tokens_per_expert.sum() / topk
+        total_num_tokens = global_tokens_per_expert.sum() / topk
+    else:
+        local_num_tokens = routing_map.shape[0]
+        total_num_tokens = local_num_tokens * reduce_group.size()
+    return global_tokens_per_expert, local_num_tokens, total_num_tokens
+
+
 class MoEAuxLossAutoScaler(torch.autograd.Function):
     """An AutoScaler that triggers the backward pass and scales the grad for auxiliary loss."""
 
@@ -629,35 +667,48 @@ def compute_topk(scores, topk, num_groups=None, group_topk=None):
 
 
 def compute_routing_scores_for_aux_loss(
-    logits: torch.Tensor, topk: int, score_function: str, fused: bool = False
+    logits: torch.Tensor,
+    topk: int,
+    score_function: str,
+    fused: bool = False,
+    padding_mask: Optional[torch.Tensor] = None,
 ):
     """Compute routing scores based on the score function.
 
     Args:
         logits (torch.Tensor): The logits tensor after gating, shape: [num_tokens, num_experts].
-
+        padding_mask (torch.Tensor, optional): Boolean mask indicating padding positions.
+                                               Shape [num_tokens]. True = padding (exclude),
+                                               False = valid (include). Defaults to None.
     Returns:
-        torch.Tensor: The normalized routing scores.
+        Tuple[torch.Tensor, torch.Tensor]: routing_map and scores.
     """
     if fused:
         if not HAVE_TE or fused_compute_score_for_moe_aux_loss is None:
             raise ValueError(
                 "fused_compute_score_for_moe_aux_loss is not available. Please install TE >= 2.6.0."
             )
-        return fused_compute_score_for_moe_aux_loss(
+        routing_map, scores = fused_compute_score_for_moe_aux_loss(
             logits=logits, topk=topk, score_function=score_function
         )
-
-    if score_function == "softmax":
-        scores = torch.softmax(logits, dim=-1, dtype=torch.float32)
-    elif score_function == "sigmoid":
-        scores = torch.sigmoid(logits)
-        scores = scores / (scores.sum(dim=-1, keepdim=True) + 1e-20)
     else:
-        raise ValueError(f"Invalid score_function: {score_function}")
+        if score_function == "softmax":
+            scores = torch.softmax(logits, dim=-1, dtype=torch.float32)
+        elif score_function == "sigmoid":
+            scores = torch.sigmoid(logits)
+            scores = scores / (scores.sum(dim=-1, keepdim=True) + 1e-20)
+        else:
+            raise ValueError(f"Invalid score_function: {score_function}")
+
+        _, top_indices = torch.topk(scores, k=topk, dim=1)
+        routing_map = torch.zeros_like(logits).int().scatter(1, top_indices, 1).bool()
 
-    _, top_indices = torch.topk(scores, k=topk, dim=1)
-    routing_map = torch.zeros_like(logits).int().scatter(1, top_indices, 1).bool()
+    # Apply padding mask to scores if provided
+    if padding_mask is not None:
+        # Invert padding_mask and make True indicates valid tokens
+        valid_mask = (~padding_mask).unsqueeze(-1)
+        routing_map = routing_map * valid_mask
+        scores = scores * valid_mask
     return routing_map, scores
 
 
diff --git a/megatron/core/transformer/moe/router.py b/megatron/core/transformer/moe/router.py
index 16fc9d9af8f..1c502e212ad 100644
--- a/megatron/core/transformer/moe/router.py
+++ b/megatron/core/transformer/moe/router.py
@@ -1,12 +1,11 @@
 # Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 from abc import ABC, abstractmethod
-from typing import Optional
+from typing import Optional, Union
 
 import torch
 
 from megatron.core.jit import jit_fuser
-from megatron.core.tensor_parallel import reduce_from_tensor_model_parallel_region
 from megatron.core.transformer.module import MegatronModule
 from megatron.core.transformer.moe.moe_utils import (
     MoEAuxLossAutoScaler,
@@ -14,6 +13,7 @@
     apply_random_logits,
     apply_router_token_dropping,
     compute_routing_scores_for_aux_loss,
+    get_tokens_per_expert_and_token_count,
     router_gating_linear,
     save_to_aux_losses_tracker,
     sinkhorn,
@@ -268,22 +268,28 @@ def is_aux_loss_enabled(self) -> bool:
         return False
 
     def _apply_aux_loss(
-        self, probs: torch.Tensor, scores_for_aux_loss: torch.Tensor, routing_map: torch.Tensor
+        self,
+        probs: torch.Tensor,
+        scores_for_aux_loss: torch.Tensor,
+        routing_map: torch.Tensor,
+        with_padding_mask: bool = False,
     ):
         """Apply the auxiliary loss for the given scores and routing map."""
         aux_loss_coeff = self.get_aux_loss_coeff("aux_loss")
         if aux_loss_coeff == 0:
             return probs
-        tokens_per_expert = routing_map.sum(dim=0)
-        tokens_per_expert = reduce_from_tensor_model_parallel_region(
-            tokens_per_expert, self.tp_cp_group
-        )
-        num_tokens = routing_map.shape[0]
-        total_num_tokens = num_tokens * self.tp_cp_group.size()
 
+        global_tokens_per_expert, local_num_tokens, total_num_tokens = (
+            get_tokens_per_expert_and_token_count(
+                routing_map=routing_map,
+                reduce_group=self.tp_cp_group,
+                topk=self.topk,
+                with_padding_mask=with_padding_mask,
+            )
+        )
         aux_loss = switch_load_balancing_loss_func(
             probs=scores_for_aux_loss,
-            tokens_per_expert=tokens_per_expert,
+            tokens_per_expert=global_tokens_per_expert,
             total_num_tokens=total_num_tokens,
             topk=self.topk,
             num_experts=self.config.num_moe_experts,
@@ -291,7 +297,12 @@ def _apply_aux_loss(
             fused=self.config.moe_router_fusion,
         )
         probs = self.attach_and_log_load_balancing_loss(
-            probs, aux_loss_coeff, aux_loss, "load_balancing_loss", self.tp_cp_group
+            probs,
+            aux_loss_coeff,
+            aux_loss,
+            "load_balancing_loss",
+            self.tp_cp_group,
+            valid_token_count=local_num_tokens,
         )
         return probs
 
@@ -302,6 +313,7 @@ def _apply_seq_aux_loss(
         routing_map: torch.Tensor,
         seq_length: int,
         bsz: int,
+        with_padding_mask: bool = False,
     ):
         """Apply the sequence-level auxiliary loss for the given scores and routing map.
 
@@ -315,17 +327,21 @@ def _apply_seq_aux_loss(
             return probs
 
         scores_for_aux_loss = scores_for_aux_loss.reshape(seq_length, -1)
-        tokens_per_expert = routing_map.reshape(seq_length, -1).sum(dim=0)
-        tokens_per_expert = reduce_from_tensor_model_parallel_region(
-            tokens_per_expert, self.tp_cp_group
+        routing_map = routing_map.reshape(seq_length, -1)
+
+        global_tokens_per_expert, local_num_tokens, total_num_tokens = (
+            get_tokens_per_expert_and_token_count(
+                routing_map=routing_map,
+                reduce_group=self.tp_cp_group,
+                with_padding_mask=with_padding_mask,
+                topk=self.topk * bsz,
+            )
         )
 
-        total_num_tokens = seq_length * self.tp_cp_group.size()
-
         aux_loss = (
             switch_load_balancing_loss_func(
                 probs=scores_for_aux_loss,
-                tokens_per_expert=tokens_per_expert,
+                tokens_per_expert=global_tokens_per_expert,
                 total_num_tokens=total_num_tokens,
                 topk=self.topk,
                 num_experts=self.config.num_moe_experts,
@@ -334,31 +350,42 @@ def _apply_seq_aux_loss(
             )
             / bsz
         )
+
         probs = self.attach_and_log_load_balancing_loss(
-            probs, seq_aux_loss_coeff, aux_loss, "seq_load_balancing_loss", self.tp_cp_group
+            probs,
+            seq_aux_loss_coeff,
+            aux_loss,
+            "seq_load_balancing_loss",
+            self.tp_cp_group,
+            valid_token_count=local_num_tokens,
         )
         return probs
 
     def _apply_global_aux_loss(
-        self, probs: torch.Tensor, scores_for_aux_loss: torch.Tensor, routing_map: torch.Tensor
+        self,
+        probs: torch.Tensor,
+        scores_for_aux_loss: torch.Tensor,
+        routing_map: torch.Tensor,
+        with_padding_mask: bool = False,
     ):
         """Apply the global auxiliary loss for the given scores and routing map."""
         global_aux_loss_coeff = self.get_aux_loss_coeff("global_aux_loss")
         if global_aux_loss_coeff == 0:
             return probs
 
-        tokens_per_expert = routing_map.sum(dim=0)
-        tokens_per_expert = reduce_from_tensor_model_parallel_region(
-            tokens_per_expert, self.tp_dp_cp_group
+        # Use unified function to compute tokens_per_expert and num_tokens
+        global_tokens_per_expert, local_num_tokens, total_num_tokens = (
+            get_tokens_per_expert_and_token_count(
+                routing_map=routing_map,
+                reduce_group=self.tp_dp_cp_group,
+                with_padding_mask=with_padding_mask,
+                topk=self.topk,
+            )
         )
-
-        self.global_tokens_per_expert += tokens_per_expert
+        self.global_tokens_per_expert += global_tokens_per_expert
         self.ga_steps += 1
         averated_tokens_per_expert = self.global_tokens_per_expert / self.ga_steps
 
-        num_tokens = scores_for_aux_loss.shape[0]
-        total_num_tokens = num_tokens * self.tp_dp_cp_group.size()
-
         global_aux_loss = switch_load_balancing_loss_func(
             probs=scores_for_aux_loss,
             tokens_per_expert=averated_tokens_per_expert,
@@ -374,6 +401,7 @@ def _apply_global_aux_loss(
             global_aux_loss,
             "global_load_balancing_loss",
             self.tp_dp_cp_group,
+            valid_token_count=local_num_tokens,
         )
         return probs
 
@@ -384,8 +412,20 @@ def attach_and_log_load_balancing_loss(
         aux_loss: torch.Tensor,
         aux_loss_name: str,
         reduce_group: torch.distributed.ProcessGroup,
+        valid_token_count: Optional[Union[int, torch.Tensor]] = None,
     ):
-        """Attach aux loss function to activation and add to logging."""
+        """Attach aux loss function to activation and add to logging.
+
+        Args:
+            activation (torch.Tensor): Activation tensor to attach the aux loss to.
+            aux_loss_coeff (float): Coefficient for the aux loss.
+            aux_loss (torch.Tensor): Computed aux loss.
+            aux_loss_name (str): Name of the aux loss for logging.
+            reduce_group (torch.distributed.ProcessGroup): Process group for reduction.
+            valid_token_count (int or torch.Tensor, optional): Number of valid tokens excluding
+                padding tokens. Can be a Python int or a torch.Tensor (typically 0-d tensor).
+                If None, uses activation.shape[0]. Defaults to None.
+        """
         # TODO (zijiey): fix the per_layer_logging for MTP, currently it will incorrectly
         # add the aux loss logging value to other layer's since it is difficult to get the
         # correct layer_number for MTP. It does not affect the correctness of the calculation
@@ -408,17 +448,22 @@ def attach_and_log_load_balancing_loss(
             # which scales both the main_loss gradient and aux_loss gradient by
             # 1/(num_local_tokens * dp_size * num_micro_batches) in finalize_model_grads function.
             # To correct this scaling, we need to scale the aux_loss by num_local_tokens here.
-            activation = MoEAuxLossAutoScaler.apply(activation, aux_loss * activation.shape[0])
+            # Use valid_token_count (excluding padding) if provided, otherwise use total tokens.
+            num_tokens = valid_token_count if valid_token_count is not None else activation.shape[0]
+            activation = MoEAuxLossAutoScaler.apply(activation, aux_loss * num_tokens)
         else:
             activation = MoEAuxLossAutoScaler.apply(activation, aux_loss)
         return activation
 
-    def apply_z_loss(self, logits):
+    def apply_z_loss(self, logits, padding_mask: Optional[torch.Tensor] = None):
         """Encourages the router's logits to remain small to enhance stability.
         Please refer to the ST-MoE paper (https://arxiv.org/pdf/2202.08906.pdf) for details.
 
         Args:
             logits (torch.Tensor): The logits of the router.
+            padding_mask (torch.Tensor, optional): Boolean mask indicating padding positions.
+                                                   Shape [num_tokens]. True = padding (exclude),
+                                                   False = valid (include). Defaults to None.
 
         Returns:
             torch.Tensor: The logits after applying the z-loss.
@@ -426,7 +471,7 @@ def apply_z_loss(self, logits):
         if self.config.moe_z_loss_coeff is not None and self.training and torch.is_grad_enabled():
             # Skip Z loss calculations when using torch.no_grad() or checkpointing.
             moe_z_loss_coeff = self.config.moe_z_loss_coeff / self.tp_cp_group.size()
-            z_loss = z_loss_func(logits, moe_z_loss_coeff)
+            z_loss = z_loss_func(logits, moe_z_loss_coeff, padding_mask=padding_mask)
             scale_up = 1.0
             if self.calculate_per_token_loss:
                 # The expected final scaling for z_loss gradients is
@@ -436,7 +481,9 @@ def apply_z_loss(self, logits):
                 # which scales both the main_loss gradient and z_loss gradient by
                 # 1/(num_local_tokens * dp_size * num_micro_batches) in finalize_model_grads().
                 # To correct this scaling, we need to scale the z_loss by num_local_tokens here.
-                logits = MoEAuxLossAutoScaler.apply(logits, z_loss * logits.shape[0])
+                # Count valid tokens: sum of inverted mask (False -> True = valid)
+                num_tokens = (~padding_mask).sum() if padding_mask is not None else logits.shape[0]
+                logits = MoEAuxLossAutoScaler.apply(logits, z_loss * num_tokens)
             else:
                 logits = MoEAuxLossAutoScaler.apply(logits, z_loss)
 
@@ -470,20 +517,32 @@ def apply_input_jitter(self, input: torch.Tensor):
             return input
 
     @jit_fuser
-    def _apply_expert_bias(self, routing_map: torch.Tensor):
+    def _apply_expert_bias(
+        self, routing_map: torch.Tensor, padding_mask: Optional[torch.Tensor] = None
+    ):
         """
         Update expert bias and tokens_per_expert
         Prevent extra local tokens accumulation on evaluation or activation recomputation
+
+        Args:
+            routing_map (torch.Tensor): Token to expert routing map, [num_tokens, num_experts].
+            padding_mask (torch.Tensor, optional): Boolean mask indicating padding positions.
+                Shape [num_tokens]. True = padding (exclude), False = valid (include).
         """
         if self.enable_expert_bias and torch.is_grad_enabled():
             with torch.no_grad():
+                if padding_mask is not None:
+                    routing_map = routing_map & (~padding_mask)
                 self.local_tokens_per_expert += routing_map.sum(dim=0)
 
-    def routing(self, logits: torch.Tensor):
+    def routing(self, logits: torch.Tensor, padding_mask: Optional[torch.Tensor] = None):
         """Top-k routing function
 
         Args:
             logits (torch.Tensor): Logits tensor after gating.
+            padding_mask (torch.Tensor, optional): Boolean mask indicating padding positions.
+                                                   Shape = [seq_length, bsz]. True=padding(exclude),
+                                                   False=valid(include). Defaults to None.
 
         Returns:
             probs (torch.Tensor): The probabilities of token to experts assignment.
@@ -493,8 +552,12 @@ def routing(self, logits: torch.Tensor):
         seq_length, bsz = logits.shape[:2]
         logits = logits.view(-1, self.config.num_moe_experts)
 
+        # Flatten padding_mask to [num_tokens] if provided
+        if padding_mask is not None:
+            padding_mask = padding_mask.reshape(-1)
+
         # Apply Z-Loss
-        logits = self.apply_z_loss(logits)
+        logits = self.apply_z_loss(logits, padding_mask=padding_mask)
 
         # Calculate probs and routing_map for token dispatching
         if self.routing_type == "sinkhorn":
@@ -527,18 +590,35 @@ def routing(self, logits: torch.Tensor):
         if self.training and torch.is_grad_enabled() and self.is_aux_loss_enabled():
             # Calculate scores and routing_map for aux loss
             routing_map_for_aux_loss, scores_for_aux_loss = compute_routing_scores_for_aux_loss(
-                logits, self.topk, self.score_function, fused=self.config.moe_router_fusion
+                logits,
+                self.topk,
+                self.score_function,
+                fused=self.config.moe_router_fusion,
+                padding_mask=padding_mask,
+            )
+            probs = self._apply_aux_loss(
+                probs,
+                scores_for_aux_loss,
+                routing_map_for_aux_loss,
+                with_padding_mask=padding_mask is not None,
             )
-            probs = self._apply_aux_loss(probs, scores_for_aux_loss, routing_map_for_aux_loss)
             probs = self._apply_seq_aux_loss(
-                probs, scores_for_aux_loss, routing_map_for_aux_loss, seq_length, bsz
+                probs,
+                scores_for_aux_loss,
+                routing_map_for_aux_loss,
+                seq_length,
+                bsz,
+                with_padding_mask=padding_mask is not None,
             )
             probs = self._apply_global_aux_loss(
-                probs, scores_for_aux_loss, routing_map_for_aux_loss
+                probs,
+                scores_for_aux_loss,
+                routing_map_for_aux_loss,
+                with_padding_mask=padding_mask is not None,
             )
 
         # Optionally apply expert bias
-        self._apply_expert_bias(routing_map)
+        self._apply_expert_bias(routing_map, padding_mask=padding_mask)
 
         return probs, routing_map
 
@@ -548,12 +628,15 @@ def reset_global_aux_loss_tracker(self):
             self.global_tokens_per_expert.zero_()
             self.ga_steps.zero_()
 
-    def forward(self, input: torch.Tensor):
+    def forward(self, input: torch.Tensor, padding_mask: Optional[torch.Tensor] = None):
         """
         Forward pass of the router.
 
         Args:
             input (torch.Tensor): Input tensor.
+            padding_mask (torch.Tensor, optional): Boolean mask indicating padding positions.
+                                                   Shape = [seq_length, bsz]. True=padding(exclude),
+                                                   False=valid(include). Defaults to None.
         """
         self._maintain_float32_expert_bias()
 
@@ -565,7 +648,7 @@ def forward(self, input: torch.Tensor):
             # Apply force load balancing with random logits for benchmark
             logits = apply_random_logits(logits)
 
-        probs, routing_map = self.routing(logits)
+        probs, routing_map = self.routing(logits, padding_mask=padding_mask)
 
         return probs, routing_map
 
diff --git a/megatron/core/transformer/transformer_block.py b/megatron/core/transformer/transformer_block.py
index 023db1fe75a..cbbd7ec00eb 100755
--- a/megatron/core/transformer/transformer_block.py
+++ b/megatron/core/transformer/transformer_block.py
@@ -390,7 +390,6 @@ def build_layer(layer_spec, layer_number):
     def has_final_layernorm_in_this_stage(self):
         """
         Check if this vpp stage contains the final layernorm.
-
         Note:
             Final layernorm now has been moved from the post-process stage to the last decoder
             layer by using this function.
@@ -429,12 +428,18 @@ def _checkpointed_forward(
         attention_bias: Tensor,
         packed_seq_params: PackedSeqParams,
         use_inner_quantization_context: bool,
+        padding_mask: Optional[Tensor] = None,
     ):
         """Forward method with activation checkpointing."""
 
         def custom(start: int, end: int):
             def custom_forward(
-                hidden_states, attention_mask, context, context_mask, rotary_pos_emb
+                hidden_states,
+                attention_mask,
+                context,
+                context_mask,
+                rotary_pos_emb,
+                padding_mask=None,
             ):
                 for index in range(start, end):
                     layer = self._get_layer(index)
@@ -465,6 +470,7 @@ def custom_forward(
                             attention_bias=attention_bias,
                             inference_context=None,
                             packed_seq_params=packed_seq_params,
+                            padding_mask=padding_mask,
                         )
                 return hidden_states, context
 
@@ -484,6 +490,7 @@ def checkpoint_handler(forward_func):
                     context,
                     context_mask,
                     rotary_pos_emb,
+                    padding_mask,
                 )
             else:
                 return tensor_parallel.checkpoint(
@@ -494,6 +501,7 @@ def checkpoint_handler(forward_func):
                     context,
                     context_mask,
                     rotary_pos_emb,
+                    padding_mask,
                 )
 
         if self.config.recompute_method == 'uniform':
@@ -599,6 +607,7 @@ def forward(
         inference_context: Optional[BaseInferenceContext] = None,
         packed_seq_params: Optional[PackedSeqParams] = None,
         sequence_len_offset: Optional[Tensor] = None,
+        padding_mask: Optional[Tensor] = None,
         *,
         inference_params: Optional[BaseInferenceContext] = None,
         dynamic_inference_decode_only: Optional[bool] = None,
@@ -708,6 +717,7 @@ def forward(
                     attention_bias=attention_bias,
                     packed_seq_params=packed_seq_params,
                     use_inner_quantization_context=use_inner_quantization_context,
+                    padding_mask=padding_mask,
                 )
             else:
                 for l_no, layer in enumerate(self.layers):
@@ -745,6 +755,7 @@ def forward(
                             inference_context=inference_context,
                             packed_seq_params=packed_seq_params,
                             sequence_len_offset=sequence_len_offset,
+                            padding_mask=padding_mask,
                         )
 
                     if (
diff --git a/megatron/core/transformer/transformer_layer.py b/megatron/core/transformer/transformer_layer.py
index 3ea40577009..21f38b06f30 100644
--- a/megatron/core/transformer/transformer_layer.py
+++ b/megatron/core/transformer/transformer_layer.py
@@ -457,7 +457,12 @@ def forward(self, *args, **kwargs):
         # runners in the cuda graph manager
         kwargs.pop("dynamic_inference_decode_only", None)
         hidden_states, context = self._forward_attention(*args, **kwargs)
-        output = self._forward_mlp(hidden_states, kwargs.get("inference_context", None))
+
+        output = self._forward_mlp(
+            hidden_states,
+            kwargs.get("inference_context", None),
+            padding_mask=kwargs.get("padding_mask", None),
+        )
         return output, context
 
     def _forward_attention(
@@ -474,6 +479,7 @@ def _forward_attention(
         inference_context: Optional[Any] = None,
         packed_seq_params: Optional[PackedSeqParams] = None,
         sequence_len_offset: Optional[Tensor] = None,
+        padding_mask: Optional[Tensor] = None,
         *,
         inference_params: Optional[Any] = None,
     ):
@@ -591,12 +597,18 @@ def _forward_attention(
 
         return hidden_states, context
 
-    def _forward_mlp(self, hidden_states, inference_context=None):
+    def _forward_mlp(self, hidden_states, inference_context=None, padding_mask=None):
         """
         Perform a forward pass through the feed-forward layer.
 
         Args:
             hidden_states (Tensor): Transformed hidden states before the MLP layernorm.
+                Shape [seq_length, batch_size, hidden_size].
+            inference_context: Inference context for optimizations.
+            padding_mask (Tensor, optional): Padding mask for MoE routing.
+                Shape [bsz, seq_length]. True = padding (exclude), False = valid (include).
+                Only used for MoE layers to exclude padding tokens from aux loss computations.
+                The MoELayer will internally transform this to [seq_length, bsz] format.
 
         Returns:
             output (Tensor): Transformed hidden states of shape [s, b, h].
@@ -642,7 +654,7 @@ def _forward_mlp(self, hidden_states, inference_context=None):
             assert (
                 not self.recompute_pre_mlp_layernorm
             ), "Recomputation is not supported for CUDA graph."
-            cudagraph_outputs = self.mlp(pre_mlp_layernorm_output)
+            cudagraph_outputs = self.mlp(pre_mlp_layernorm_output, padding_mask=padding_mask)
             nvtx_range_pop(suffix="mlp")
             return cudagraph_outputs + [residual]
         elif self.recompute_mlp:
@@ -656,10 +668,11 @@ def _forward_mlp(self, hidden_states, inference_context=None):
                     tensor_parallel.random.get_cuda_rng_tracker,
                     self.pg_collection.tp,
                     pre_mlp_layernorm_output,
+                    padding_mask=padding_mask,
                 )
             else:
                 mlp_output_with_bias = tensor_parallel.checkpoint(
-                    self.mlp, False, pre_mlp_layernorm_output
+                    self.mlp, False, pre_mlp_layernorm_output, padding_mask=padding_mask
                 )
         elif should_chunk_mlp_for_prefill:
             # Chunk input along sequence dimension
@@ -675,7 +688,7 @@ def _forward_mlp(self, hidden_states, inference_context=None):
             bias_output = torch.stack(bias_chunks, dim=0).sum(dim=0) if bias_chunks else None
             mlp_output_with_bias = (mlp_output, bias_output)
         else:
-            mlp_output_with_bias = self.mlp(pre_mlp_layernorm_output)
+            mlp_output_with_bias = self.mlp(pre_mlp_layernorm_output, padding_mask=padding_mask)
 
         if self.recompute_pre_mlp_layernorm:
             # discard the output of the pre-mlp layernorm and register the recompute
diff --git a/tests/test_utils/python_scripts/recipe_parser.py b/tests/test_utils/python_scripts/recipe_parser.py
index a497bdbd9de..b866fbbf5c2 100644
--- a/tests/test_utils/python_scripts/recipe_parser.py
+++ b/tests/test_utils/python_scripts/recipe_parser.py
@@ -1,3 +1,4 @@
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 import copy
 import itertools
 import logging
diff --git a/tests/unit_tests/a2a_overlap/test_schedule_chunk_1f1b.py b/tests/unit_tests/a2a_overlap/test_schedule_chunk_1f1b.py
index 81e61a3404a..6c59dd3f9e3 100644
--- a/tests/unit_tests/a2a_overlap/test_schedule_chunk_1f1b.py
+++ b/tests/unit_tests/a2a_overlap/test_schedule_chunk_1f1b.py
@@ -23,7 +23,7 @@
 from tests.unit_tests.test_utilities import Utils
 
 
-def build_model(config):
+def build_model(config, use_padding_mask=False):
     seq_len = 32
     max_seq_len = 300
     # ids = random.sample([i for i in range(max_seq_len)], seq_len)
@@ -39,6 +39,12 @@ def build_model(config):
         "attention_mask": torch.ones((1, 1, seq_len, seq_len), dtype=bool).cuda(),
     }
 
+    # Optionally add padding_mask with same shape as input_ids
+    if use_padding_mask:
+        padding_mask = torch.zeros((1, seq_len), dtype=torch.bool).cuda()
+        padding_mask[0, -8:] = True
+        data["padding_mask"] = padding_mask
+
     # build layer spec
     transformer_layer_spec = get_gpt_decoder_block_spec(config=config, use_transformer_engine=True)
     mtp_block_spec = get_gpt_mtp_block_spec(config, transformer_layer_spec.layer_specs[-1], True)
@@ -48,7 +54,7 @@ def build_model(config):
         config=config,
         transformer_layer_spec=transformer_layer_spec,
         mtp_block_spec=mtp_block_spec,
-        vocab_size=100,
+        vocab_size=128,
         pre_process=True,
         post_process=True,
         max_sequence_length=max_seq_len,
@@ -174,3 +180,109 @@ def test_1f1b_schedule_model_chunk(self, mtp_layers, dispatcher_type, fp8_flag,
                 gpt_models[i] = None
             gc.collect()
             torch.cuda.empty_cache()
+
+    @pytest.mark.skipif(not is_te_min_version("1.9.0.dev0"), reason="Requires TE >= 1.9.0.dev0")
+    @pytest.mark.parametrize("dispatcher_type", get_valid_token_dispatcher_types())
+    @pytest.mark.parametrize("layers", [[2, 1], [1, 1]])
+    @pytest.mark.parametrize("tp_size", [1, 2, 4, 8])
+    def test_1f1b_schedule_model_chunk_with_padding_mask(self, dispatcher_type, layers, tp_size):
+        """
+        Verifies all-to-all overlap optimization with padding_mask produces
+        the same results as the reference implementation with various TP/EP/CP combinations.
+        """
+        # Re-initialize model parallel with the specified configuration
+        Utils.destroy_model_parallel()
+        Utils.initialize_model_parallel(
+            tensor_model_parallel_size=tp_size,
+            pipeline_model_parallel_size=1,
+            expert_model_parallel_size=4,
+            expert_tensor_parallel_size=1,
+        )
+        set_streams()
+
+        microbatches = 1
+
+        gpt_models = []
+        schedule_plans = []
+        ref_captures = []
+        datas = []
+
+        # create TransformerConfig
+        extra_kwargs = {
+            "moe_token_dispatcher_type": dispatcher_type,
+            "tensor_model_parallel_size": tp_size,
+            "sequence_parallel": tp_size > 1,
+        }
+        if dispatcher_type == "flex":
+            extra_kwargs["moe_flex_dispatcher_backend"] = "deepep"
+            extra_kwargs["moe_router_dtype"] = "fp32"
+        with deterministic_mode():
+            for layer_num in layers:
+                output_tensors = []
+                # build config
+                config = get_test_config(num_layers=layer_num, extra_kwargs=extra_kwargs)
+                # build model with padding_mask
+                gpt_model, schedule_plan, data = build_model(config, use_padding_mask=True)
+                gpt_model.cuda()
+                gpt_models.append(gpt_model)
+                datas.append(data)
+                schedule_plans.append(schedule_plan)
+
+                # run reference
+                for _ in range(microbatches):
+                    loss = gpt_model.forward(**data)
+                    loss = float16_to_fp32(loss)
+                    loss.backward(torch.ones_like(loss))
+                    output_tensors.append(loss)
+
+                capture = {"outputs": output_tensors}
+                for name, param in gpt_model.named_parameters():
+                    capture[name] = param.grad
+                ref_captures.append(capture)
+                gpt_model.zero_grad()
+            assert gpt_models[0].embedding is not None
+            assert gpt_models[1].embedding is not None
+            # run a2a overlap
+            capture_0 = {"outputs": []}
+            capture_1 = {"outputs": []}
+            a2a_captures = [capture_0, capture_1]
+            for i in range(microbatches):
+                # 1st forward
+                if i > 0:
+                    assert (
+                        schedule_plans[0].pre_process is None
+                    ), "pre_process should be released after backward"
+                    schedule_plans[0] = gpt_models[0].build_schedule_plan(**datas[0])
+                    schedule_plans[1] = gpt_models[1].build_schedule_plan(**datas[1])
+                f_input_0 = TransformerModelChunkSchedulePlan.run(schedule_plans[0], None)
+                capture_0["outputs"].append(f_input_0)
+                # overlap
+                f_input_1 = TransformerModelChunkSchedulePlan.run(
+                    schedule_plans[1], schedule_plans[0], b_grad=torch.ones_like(f_input_0)
+                )
+                capture_1["outputs"].append(f_input_1)
+                # last backward
+                TransformerModelChunkSchedulePlan.run(
+                    None, schedule_plans[1], b_grad=torch.ones_like(f_input_1)
+                )
+            for i in range(len(gpt_models)):
+                for name, param in gpt_models[i].named_parameters():
+                    a2a_captures[i][name] = param.grad
+
+            # compare results
+            for i in range(len(ref_captures)):
+                comp_res = compare_captures(ref_captures[i], a2a_captures[i], True, True)
+                assert comp_res[0], f"[rank {torch.distributed.get_rank()}] {comp_res[1]}"
+
+            # release resources is necessary, otherwise later testcases will oom
+            for i in range(len(schedule_plans)):
+                schedule_plans[i] = None
+                ref_captures[i] = None
+                a2a_captures[i] = None
+                for k in datas[i]:
+                    datas[i][k] = None
+                datas[i] = None
+                gpt_models[i].zero_grad()
+                gpt_models[i] = None
+            gc.collect()
+            torch.cuda.empty_cache()
diff --git a/tests/unit_tests/a2a_overlap/test_schedule_layer_1f1b.py b/tests/unit_tests/a2a_overlap/test_schedule_layer_1f1b.py
index 7fb97f6e586..5ec096e5a04 100644
--- a/tests/unit_tests/a2a_overlap/test_schedule_layer_1f1b.py
+++ b/tests/unit_tests/a2a_overlap/test_schedule_layer_1f1b.py
@@ -502,8 +502,8 @@ def test_mtp_layer_overlap(self, dispatcher_type, fp8_flag):
             position_ids = torch.tensor(data, dtype=torch.int64).repeat((1, 1)).cuda()
             attention_mask = torch.ones((1, 1, seq_len, seq_len), dtype=bool).cuda()
             # get rotary pos emb
-            _, rotary_pos_emb, rotary_pos_cos, rotary_pos_sin, _ = gpt_model._preprocess(
-                input_ids, position_ids
+            _, rotary_pos_emb, rotary_pos_cos, rotary_pos_sin, _, _padding_mask = (
+                gpt_model._preprocess(input_ids, position_ids)
             )
             # reset model
             params = reset_model(gpt_model)
diff --git a/tests/unit_tests/transformer/moe/test_aux_loss.py b/tests/unit_tests/transformer/moe/test_aux_loss.py
index b1f78582383..f5726777383 100644
--- a/tests/unit_tests/transformer/moe/test_aux_loss.py
+++ b/tests/unit_tests/transformer/moe/test_aux_loss.py
@@ -576,3 +576,192 @@ def test_force_balanced_aux_loss(self, tp_size, ep_size, cp_size):
                 reduce_from_tensor_model_parallel_region(aux_loss, router.tp_cp_group)
             assert aux_loss.item() == 1, f"{aux_loss_type}: {aux_loss.item()}"
             clear_aux_losses_tracker()
+
+
+class TestPaddingMaskAuxLoss:
+    """Test padding mask support in various aux loss types."""
+
+    def setup_model_parallel(self, tp_size=1, ep_size=1, cp_size=1, sequence_parallel=False):
+        """Initialize model parallel with given configuration.
+
+        Args:
+            tp_size: Tensor parallel size.
+            ep_size: Expert parallel size.
+            cp_size: Context parallel size.
+        """
+        Utils.initialize_model_parallel(
+            tensor_model_parallel_size=tp_size,
+            pipeline_model_parallel_size=1,
+            context_parallel_size=cp_size,
+            expert_model_parallel_size=ep_size,
+        )
+        _set_random_seed(seed_=123, data_parallel_random_init=False)
+
+        # Store parallel configuration
+        self.tp_size = tp_size
+        self.ep_size = ep_size
+        self.cp_size = cp_size
+
+        # Default configuration
+        self.default_transformer_config = TransformerConfig(
+            num_layers=1,
+            hidden_size=12,
+            num_attention_heads=8,
+            num_moe_experts=32,
+            use_cpu_initialization=True,
+            moe_router_load_balancing_type="aux_loss",
+            moe_router_topk=8,
+            moe_aux_loss_coeff=1.0,
+            bf16=True,
+            params_dtype=torch.bfloat16,
+            add_bias_linear=False,
+            tensor_model_parallel_size=tp_size,
+            expert_model_parallel_size=ep_size,
+            context_parallel_size=cp_size,
+            sequence_parallel=sequence_parallel and tp_size > 1,
+        )
+
+    def new_router(self, **kwargs):
+        """Create a new router with updated configuration."""
+        pg_collection = get_default_pg_collection()
+        new_transformer_config = dataclasses.replace(self.default_transformer_config, **kwargs)
+        router = TopKRouter(config=new_transformer_config, pg_collection=pg_collection)
+        router.set_layer_number(0)
+        return router
+
+    @pytest.mark.internal
+    @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
+    @pytest.mark.parametrize("sequence_parallel", [True, False])
+    @pytest.mark.parametrize("aux_loss_type", ["aux_loss", "seq_aux_loss", "global_aux_loss"])
+    @pytest.mark.parametrize(
+        "tp_size,ep_size,cp_size", [(8, 1, 1), (4, 2, 1), (1, 1, 8), (2, 1, 4), (2, 2, 2)]
+    )
+    def test_padding_mask_removes_padding_tokens(
+        self, aux_loss_type, tp_size, ep_size, cp_size, sequence_parallel
+    ):
+        """Test that padding tokens are correctly excluded from aux loss calculation."""
+        # Initialize model parallel with given configuration
+        self.setup_model_parallel(
+            tp_size=tp_size, ep_size=ep_size, cp_size=cp_size, sequence_parallel=sequence_parallel
+        )
+
+        try:
+            clear_aux_losses_tracker()
+
+            router = self.new_router(
+                moe_router_load_balancing_type=aux_loss_type,
+                moe_aux_loss_coeff=1.0,
+                moe_router_dtype="fp64",
+            ).cuda()
+
+            seq_len = 32
+            batch_size = 2
+            hidden_size = router.config.hidden_size
+
+            # Create input with padding
+            hidden_states_full = torch.randn(
+                (seq_len, batch_size, hidden_size), dtype=torch.bfloat16, device='cuda'
+            )
+
+            # Create padding mask: first half valid (False), second half padding (True)
+            # Convention: True = padding (exclude), False = valid (include)
+            padding_mask = torch.zeros((seq_len, batch_size), dtype=torch.bool, device='cuda')
+            padding_mask[seq_len // 2 :, :] = True
+
+            # Test with padding mask
+            router.weight.grad = None
+            scores_with_mask, routing_map_with_mask = router(
+                hidden_states_full, padding_mask=padding_mask
+            )
+            scores_with_mask.backward(torch.zeros_like(scores_with_mask))
+
+            loss_name = {
+                "aux_loss": "load_balancing_loss",
+                "seq_aux_loss": "seq_load_balancing_loss",
+                "global_aux_loss": "global_load_balancing_loss",
+            }[aux_loss_type]
+
+            tracker = get_moe_layer_wise_logging_tracker()
+            aux_loss_with_mask = tracker[loss_name]["values"][0].clone()
+            grad_with_mask = router.weight.grad.clone()
+
+            # Test without padding (with only half of the tokens)
+            clear_aux_losses_tracker()
+            router.weight.grad = None
+            hidden_states_valid = hidden_states_full[: seq_len // 2, :, :]
+            scores_without_mask, routing_map_without_mask = router(hidden_states_valid)
+            scores_without_mask.backward(torch.zeros_like(scores_without_mask))
+
+            aux_loss_without_mask = tracker[loss_name]["values"][0].clone()
+            grad_without_mask = router.weight.grad.clone()
+
+            # The aux loss with mask should be close to the aux loss without mask
+            assert torch.equal(aux_loss_with_mask, aux_loss_without_mask)
+            assert torch.equal(grad_with_mask, grad_without_mask)
+
+            clear_aux_losses_tracker()
+        finally:
+            # Always cleanup model parallel
+            Utils.destroy_model_parallel()
+
+    @pytest.mark.internal
+    @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
+    @pytest.mark.parametrize(
+        "tp_size,ep_size,cp_size", [(8, 1, 1), (4, 2, 1), (1, 1, 8), (2, 1, 4), (2, 2, 2)]
+    )
+    def test_padding_mask_with_z_loss(self, tp_size, ep_size, cp_size):
+        """Test that padding mask works correctly with z_loss."""
+        # Initialize model parallel with given configuration
+        self.setup_model_parallel(tp_size=tp_size, ep_size=ep_size, cp_size=cp_size)
+
+        try:
+            clear_aux_losses_tracker()
+
+            router = self.new_router(
+                moe_router_load_balancing_type="aux_loss",
+                moe_aux_loss_coeff=0.0,
+                moe_z_loss_coeff=1.0,
+                moe_router_dtype="fp32",
+            ).cuda()
+
+            seq_len = 32
+            batch_size = 2
+            hidden_size = router.config.hidden_size
+
+            # Create input
+            hidden_states_full = torch.randn(
+                (seq_len, batch_size, hidden_size), dtype=torch.bfloat16, device='cuda'
+            )
+
+            # Create padding mask: first half valid (False), second half padding (True)
+            # Convention: True = padding (exclude), False = valid (include)
+            padding_mask = torch.zeros((seq_len, batch_size), dtype=torch.bool, device='cuda')
+            padding_mask[seq_len // 2 :, :] = True
+
+            # Test with padding mask
+            router.weight.grad = None
+            scores_with_mask, _ = router(hidden_states_full, padding_mask=padding_mask)
+            scores_with_mask.sum().backward()
+
+            tracker = get_moe_layer_wise_logging_tracker()
+            z_loss_with_mask = tracker["z_loss"]["values"][0].clone()
+            grad_with_mask = router.weight.grad.clone()
+
+            # Test without padding (with only half of the tokens)
+            clear_aux_losses_tracker()
+            router.weight.grad = None
+            hidden_states_valid = hidden_states_full[: seq_len // 2, :, :]
+            scores_without_mask, _ = router(hidden_states_valid)
+            scores_without_mask.sum().backward()
+
+            z_loss_without_mask = tracker["z_loss"]["values"][0].clone()
+            grad_without_mask = router.weight.grad.clone()
+
+            # The z_loss with mask should be close to the z_loss without mask
+            assert torch.equal(z_loss_with_mask, z_loss_without_mask)
+            assert torch.equal(grad_with_mask, grad_without_mask)
+
+            clear_aux_losses_tracker()
+        finally:
+            # Always cleanup model parallel
+            Utils.destroy_model_parallel()
diff --git a/tests/unit_tests/transformer/moe/test_routers.py b/tests/unit_tests/transformer/moe/test_routers.py
index 677d938cdc7..abd1a4db2dc 100644
--- a/tests/unit_tests/transformer/moe/test_routers.py
+++ b/tests/unit_tests/transformer/moe/test_routers.py
@@ -125,6 +125,53 @@ def test_aux_loss(self):
         out.sum().mul_(0).backward()
         assert self.sequential_mlp.router.weight.grad.abs().sum() > 0
 
+    @pytest.mark.internal
+    @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
+    def test_router_with_padding_mask(self):
+        """Test that padding mask correctly excludes padding tokens from routing."""
+        self.router = self.router.cuda()
+        seq_len = 32
+        batch_size = 2
+        hidden_size = self.router.config.hidden_size
+
+        # Create input with shape [seq_len, batch_size, hidden_size]
+        hidden_states = torch.randn((seq_len, batch_size, hidden_size)).cuda().bfloat16()
+
+        # Create padding mask: first half valid (False), second half padding (True)
+        # padding_mask shape: [seq_len, batch_size]
+        # Convention: True = padding (exclude), False = valid (include)
+        padding_mask = torch.zeros((seq_len, batch_size), dtype=torch.bool, device='cuda')
+        padding_mask[seq_len // 2 :, :] = True  # Second half is padding
+
+        # Test forward pass with padding mask
+        with torch.no_grad():
+            probs_with_mask, routing_map_with_mask = self.router(
+                hidden_states, padding_mask=padding_mask
+            )
+
+            # Test forward pass without padding mask (only valid tokens)
+            hidden_states_valid = hidden_states[: seq_len // 2, :, :]
+            probs_without_mask, routing_map_without_mask = self.router(hidden_states_valid)
+
+            # The valid part of routing with mask should match routing without mask
+            probs_valid_part = probs_with_mask.reshape(seq_len, batch_size, -1)[
+                : seq_len // 2, :, :
+            ]
+            probs_valid_part = probs_valid_part.reshape(-1, probs_valid_part.shape[-1])
+
+            # Check that shapes are as expected
+            assert probs_with_mask.shape == (
+                seq_len * batch_size,
+                self.router.config.num_moe_experts,
+            )
+            assert routing_map_with_mask.shape == (
+                seq_len * batch_size,
+                self.router.config.num_moe_experts,
+            )
+
+            # Verify that probs for valid tokens are similar
+            assert torch.equal(probs_valid_part, probs_without_mask)
+
     @pytest.mark.internal
     @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
     def test_router_dtype(self):

From 1068d775d665b9629193c5c8ec60813c4ec2b118 Mon Sep 17 00:00:00 2001
From: Charlie Truong <chtruong@nvidia.com>
Date: Tue, 23 Dec 2025 23:04:37 -0600
Subject: [PATCH 205/334] Revert "[Dev] Remove calculation of padding token in
 moe routing loss (#2121)" (#2747)

Signed-off-by: Charlie Truong <chtruong@nvidia.com>
---
 .../core/extensions/transformer_engine.py     |   2 +-
 .../common/model_chunk_schedule_plan.py       |   2 -
 .../core/models/gpt/fine_grained_callables.py |  21 +-
 megatron/core/models/gpt/gpt_model.py         |  37 +---
 megatron/core/transformer/mlp.py              |   2 +-
 megatron/core/transformer/moe/moe_layer.py    |  27 +--
 megatron/core/transformer/moe/moe_utils.py    |  83 ++------
 megatron/core/transformer/moe/router.py       | 167 ++++------------
 .../core/transformer/transformer_block.py     |  15 +-
 .../core/transformer/transformer_layer.py     |  23 +--
 .../python_scripts/recipe_parser.py           |   1 -
 .../a2a_overlap/test_schedule_chunk_1f1b.py   | 116 +----------
 .../a2a_overlap/test_schedule_layer_1f1b.py   |   4 +-
 .../transformer/moe/test_aux_loss.py          | 189 ------------------
 .../transformer/moe/test_routers.py           |  47 -----
 15 files changed, 90 insertions(+), 646 deletions(-)

diff --git a/megatron/core/extensions/transformer_engine.py b/megatron/core/extensions/transformer_engine.py
index 546f8a59318..acb93ef7853 100644
--- a/megatron/core/extensions/transformer_engine.py
+++ b/megatron/core/extensions/transformer_engine.py
@@ -1851,7 +1851,7 @@ def forward_post_hook(module, *_) -> None:
                     "TEFusedMLP module does not support submodules with post-backward hooks"
                 )
 
-        def forward(self, hidden_states: torch.Tensor, **kwargs) -> Tuple[Tensor, Optional[Tensor]]:
+        def forward(self, hidden_states: torch.Tensor) -> Tuple[Tensor, Optional[Tensor]]:
             """Forward."""
 
             # Construct fused impl if needed
diff --git a/megatron/core/models/common/model_chunk_schedule_plan.py b/megatron/core/models/common/model_chunk_schedule_plan.py
index 07bab1cb486..486a498dd73 100644
--- a/megatron/core/models/common/model_chunk_schedule_plan.py
+++ b/megatron/core/models/common/model_chunk_schedule_plan.py
@@ -305,7 +305,6 @@ def __init__(
         extra_block_kwargs=None,
         runtime_gather_output: Optional[bool] = None,
         loss_mask: Optional[Tensor] = None,
-        padding_mask=None,
     ):
         """Initialize the schedule plan of all Transformer layers' sub-modules.
 
@@ -348,7 +347,6 @@ def __init__(
         self._model_chunk_state.mtp_hidden_states = None
         self._model_chunk_state.loss_mask = loss_mask
         self._model_chunk_state.packed_seq_params = packed_seq_params
-        self._model_chunk_state.padding_mask = padding_mask
         self._model_chunk_state.extra_block_kwargs = extra_block_kwargs
         self._model_chunk_state.runtime_gather_output = runtime_gather_output
         self._model_chunk_state.model = model
diff --git a/megatron/core/models/gpt/fine_grained_callables.py b/megatron/core/models/gpt/fine_grained_callables.py
index b0923a37b80..741a25326fb 100644
--- a/megatron/core/models/gpt/fine_grained_callables.py
+++ b/megatron/core/models/gpt/fine_grained_callables.py
@@ -120,19 +120,13 @@ def forward_impl(self):
         if not self.gpt_model.pre_process:
             self.chunk_state.decoder_input = self.gpt_model.decoder.input_tensor
         # Run GPTModel._preprocess
-        (
-            decoder_input,
-            rotary_pos_emb,
-            rotary_pos_cos,
-            rotary_pos_sin,
-            sequence_len_offset,
-            padding_mask,
-        ) = self.gpt_model._preprocess(
-            input_ids=self.chunk_state.input_ids,
-            position_ids=self.chunk_state.position_ids,
-            decoder_input=self.chunk_state.decoder_input,
-            packed_seq_params=self.chunk_state.packed_seq_params,
-            padding_mask=self.chunk_state.padding_mask,
+        decoder_input, rotary_pos_emb, rotary_pos_cos, rotary_pos_sin, sequence_len_offset = (
+            self.gpt_model._preprocess(
+                input_ids=self.chunk_state.input_ids,
+                position_ids=self.chunk_state.position_ids,
+                decoder_input=self.chunk_state.decoder_input,
+                packed_seq_params=self.chunk_state.packed_seq_params,
+            )
         )
 
         # Saved for later use
@@ -141,7 +135,6 @@ def forward_impl(self):
         self.chunk_state.rotary_pos_cos = rotary_pos_cos
         self.chunk_state.rotary_pos_sin = rotary_pos_sin
         self.chunk_state.sequence_len_offset = sequence_len_offset
-        self.chunk_state.padding_mask = padding_mask
         return decoder_input
 
 
diff --git a/megatron/core/models/gpt/gpt_model.py b/megatron/core/models/gpt/gpt_model.py
index 9e70c677226..a1230568cbd 100644
--- a/megatron/core/models/gpt/gpt_model.py
+++ b/megatron/core/models/gpt/gpt_model.py
@@ -284,7 +284,6 @@ def _preprocess(
         decoder_input: Tensor = None,
         inference_context: BaseInferenceContext = None,
         packed_seq_params: PackedSeqParams = None,
-        padding_mask: Optional[Tensor] = None,
     ):
         """Preprocesses inputs for the transformer decoder.
 
@@ -301,20 +300,7 @@ def _preprocess(
         if decoder_input is not None:
             pass
         elif self.pre_process:
-            if padding_mask is not None:
-                assert padding_mask.shape == input_ids.shape, (
-                    f"padding_mask shape {padding_mask.shape} does not match "
-                    f"input_ids shape {input_ids.shape}"
-                )
             decoder_input = self.embedding(input_ids=input_ids, position_ids=position_ids)
-            if padding_mask is not None and self.config.sequence_parallel:
-                padding_mask = (
-                    tensor_parallel.scatter_to_sequence_parallel_region(
-                        padding_mask.transpose(0, 1).contiguous()
-                    )
-                    .transpose(0, 1)
-                    .contiguous()
-                )
         else:
             # intermediate stage of pipeline
             # decoder will get hidden_states from encoder.input_tensor
@@ -417,7 +403,6 @@ def _preprocess(
             rotary_pos_cos,
             rotary_pos_sin,
             sequence_len_offset,
-            padding_mask,
         )
         if rotary_pos_cos_sin is not None:
             # only in the case of flashinfer fused rope will we
@@ -461,7 +446,6 @@ def forward(
         *,
         inference_params: Optional[BaseInferenceContext] = None,
         loss_mask: Optional[Tensor] = None,
-        padding_mask: Optional[Tensor] = None,
     ) -> Tensor:
         """Forward function of the GPT Model This function passes the input tensors
         through the embedding layer, and then the decoder and finally into the post
@@ -472,9 +456,6 @@ def forward(
         Args:
             runtime_gather_output (bool): Gather output at runtime. Default None means
                 `parallel_output` arg in the constructor will be used.
-            padding_mask (Tensor, optional): Padding mask for MoE routing.
-                Shape [bsz, seq_length]. True = padding (exclude), False = valid (include).
-                Only used for MoE layers to exclude padding tokens from routing computations.
         """
         if self.config.fine_grained_activation_offloading:
             self.preprocess_for_fine_grained_offloading()
@@ -487,19 +468,13 @@ def forward(
             decoder_input=decoder_input,
             inference_context=inference_context,
             packed_seq_params=packed_seq_params,
-            padding_mask=padding_mask,
         )
 
-        (
-            decoder_input,
-            rotary_pos_emb,
-            rotary_pos_cos,
-            rotary_pos_sin,
-            sequence_len_offset,
-            padding_mask,
-        ) = preproc_output[:6]
+        (decoder_input, rotary_pos_emb, rotary_pos_cos, rotary_pos_sin, sequence_len_offset) = (
+            preproc_output[:5]
+        )
 
-        rotary_pos_cos_sin = preproc_output[6] if len(preproc_output) == 7 else None
+        rotary_pos_cos_sin = preproc_output[5] if len(preproc_output) == 6 else None
 
         # Run decoder.
         hidden_states = self.decoder(
@@ -512,7 +487,6 @@ def forward(
             rotary_pos_cos_sin=rotary_pos_cos_sin,
             packed_seq_params=packed_seq_params,
             sequence_len_offset=sequence_len_offset,
-            padding_mask=padding_mask,
             **(extra_block_kwargs or {}),
         )
 
@@ -750,7 +724,6 @@ def build_schedule_plan(
         runtime_gather_output: Optional[bool] = None,
         inference_params: Optional[BaseInferenceContext] = None,
         loss_mask: Optional[Tensor] = None,
-        padding_mask: Optional[Tensor] = None,
     ):
         """Builds a computation schedule plan for the model.
 
@@ -776,7 +749,6 @@ def build_schedule_plan(
             inference_params (InferenceParams, optional):
                 Parameters for inference. Defaults to None.
             loss_mask (Optional[Tensor], optional): Loss mask. Defaults to None.
-            padding_mask (Optional[Tensor], optional): Padding mask. Defaults to None.
 
         Returns:
             TransformerModelChunkSchedulePlan: The model chunk schedule plan.
@@ -798,7 +770,6 @@ def build_schedule_plan(
             extra_block_kwargs,
             runtime_gather_output,
             loss_mask,
-            padding_mask,
         )
 
     def sharded_state_dict(
diff --git a/megatron/core/transformer/mlp.py b/megatron/core/transformer/mlp.py
index fbb960f4be9..8dcf196da94 100644
--- a/megatron/core/transformer/mlp.py
+++ b/megatron/core/transformer/mlp.py
@@ -137,7 +137,7 @@ def __init__(
             tp_group=tp_group,
         )
 
-    def forward(self, hidden_states, per_token_scale=None, **kwargs):
+    def forward(self, hidden_states, per_token_scale=None):
         """Perform the forward pass through the MLP block."""
         # [s, b, 4 * h/p]
         nvtx_range_push(suffix="linear_fc1")
diff --git a/megatron/core/transformer/moe/moe_layer.py b/megatron/core/transformer/moe/moe_layer.py
index 153bac00ec1..10d10f667fe 100644
--- a/megatron/core/transformer/moe/moe_layer.py
+++ b/megatron/core/transformer/moe/moe_layer.py
@@ -178,13 +178,13 @@ def __init__(
         self.cudagraph_tensor_store = MoECudaGraphTensorStore()
 
     @maybe_skip_or_early_return_by_cudagraph("route")
-    def route(self, hidden_states: torch.Tensor, padding_mask: Optional[torch.Tensor] = None):
+    def route(self, hidden_states: torch.Tensor):
         """Compute token routing for preprocessing.
 
         This method uses the router to determine which experts to send each token to,
         producing routing probabilities and a mapping.
         """
-        probs, routing_map = self.router(hidden_states, padding_mask=padding_mask)
+        probs, routing_map = self.router(hidden_states)
         return probs, routing_map
 
     @maybe_skip_or_early_return_by_cudagraph("preprocess")
@@ -270,7 +270,7 @@ def combine(self, output: torch.Tensor, shared_expert_output: Optional[torch.Ten
             output = output + shared_expert_output
         return output
 
-    def forward(self, hidden_states: torch.Tensor, padding_mask: Optional[torch.Tensor] = None):
+    def forward(self, hidden_states: torch.Tensor):
         """Forward pass for the MoE layer.
 
         The forward pass comprises four main steps:
@@ -280,11 +280,7 @@ def forward(self, hidden_states: torch.Tensor, padding_mask: Optional[torch.Tens
         4. Combine: The outputs from the experts are combined and returned.
 
         Args:
-            hidden_states (torch.Tensor): The input tensor shape [seq_length, bsz, hidden_size].
-            padding_mask (torch.Tensor, optional): Boolean mask indicating padding positions.
-                used for correct auxiliary loss computation for packed sequence.
-                Shape = [bsz, seq_length]. True = padding (exclude), False = valid (include).
-                Defaults to None (all tokens are valid).
+            hidden_states (torch.Tensor): The input tensor to the MoE layer.
 
         Returns:
             A tuple containing the output tensor and the MLP bias, if any.
@@ -295,15 +291,11 @@ def forward(self, hidden_states: torch.Tensor, padding_mask: Optional[torch.Tens
                 "are enabled without also enabling sequence parallelism."
             )
 
-        # Transpose from [bsz, seq_length] to [seq_length, bsz] to align with hidden_states
-        if padding_mask is not None:
-            padding_mask = padding_mask.transpose(0, 1).bool()
-
         # MoE forward: route -> dispatch -> compute -> combine
-        def custom_forward(hidden_states, padding_mask=None):
+        def custom_forward(hidden_states):
             try:
                 shared_expert_output = self.shared_experts_compute(hidden_states)
-                probs, routing_map = self.route(hidden_states, padding_mask=padding_mask)
+                probs, routing_map = self.route(hidden_states)
                 hidden_states, probs, residual = self.preprocess(hidden_states, probs, routing_map)
             except MoECudaGraphPartialCaptureSignal as e:
                 # This signal is raised from the maybe_skip_or_early_return_by_cudagraph decorator.
@@ -326,14 +318,11 @@ def custom_forward(hidden_states, padding_mask=None):
                     tensor_parallel.random.get_cuda_rng_tracker,
                     parallel_state.get_tensor_model_parallel_group(),
                     hidden_states,
-                    padding_mask,
                 )
             else:
-                outputs = tensor_parallel.checkpoint(
-                    custom_forward, False, hidden_states, padding_mask
-                )
+                outputs = tensor_parallel.checkpoint(custom_forward, False, hidden_states)
         else:
-            outputs = custom_forward(hidden_states, padding_mask)
+            outputs = custom_forward(hidden_states)
 
         return outputs
 
diff --git a/megatron/core/transformer/moe/moe_utils.py b/megatron/core/transformer/moe/moe_utils.py
index f44d441c765..28cff06f5ec 100644
--- a/megatron/core/transformer/moe/moe_utils.py
+++ b/megatron/core/transformer/moe/moe_utils.py
@@ -1,4 +1,5 @@
 # Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
 import math
 from dataclasses import dataclass
 from typing import List, Optional, Union
@@ -10,7 +11,6 @@
 from megatron.core.fp8_utils import get_fp8_align_size
 from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.tensor_parallel import get_cuda_rng_tracker, get_expert_parallel_rng_tracker_name
-from megatron.core.tensor_parallel.mappings import reduce_from_tensor_model_parallel_region
 from megatron.core.transformer.cuda_graphs import is_graph_capturing
 from megatron.core.transformer.enums import CudaGraphScope
 from megatron.core.transformer.transformer_config import TransformerConfig
@@ -120,34 +120,18 @@ def switch_load_balancing_loss_func(
     return aux_loss
 
 
-def z_loss_func(logits, z_loss_coeff, padding_mask: Optional[torch.Tensor] = None):
+def z_loss_func(logits, z_loss_coeff):
     """Encourages the router's logits to remain small to enhance stability.
     Please refer to the ST-MoE paper (https://arxiv.org/pdf/2202.08906.pdf) for details.
 
     Args:
         logits (torch.Tensor): The logits of the router.
-        z_loss_coeff (float): The coefficient for the z-loss.
-        padding_mask (torch.Tensor, optional): Boolean mask indicating padding positions.
-                                               Shape [num_tokens]. True = padding (exclude),
-                                               False = valid (include). Defaults to None.
 
     Returns:
         torch.Tensor: The logits after applying the z-loss.
     """
-    logsum = torch.logsumexp(logits, dim=-1)
-    z_loss_values = torch.square(logsum)
-
-    if padding_mask is not None:
-        # Invert padding_mask: True (padding) -> 0, False (valid) -> 1
-        valid_mask = ~padding_mask
-        # Only compute z_loss for valid (non-padding) tokens
-        z_loss_values = z_loss_values * valid_mask
-        # Compute mean over valid tokens only
-        num_valid_tokens = valid_mask.sum()
-        z_loss = z_loss_values.sum() / torch.clamp(num_valid_tokens, min=1.0) * z_loss_coeff
-    else:
-        z_loss = torch.mean(z_loss_values) * z_loss_coeff
 
+    z_loss = torch.mean(torch.square(torch.logsumexp(logits, dim=-1))) * z_loss_coeff
     return z_loss
 
 
@@ -187,28 +171,6 @@ def get_capacity(num_tokens: int, num_experts: int, capacity_factor: float, min_
     return capacity
 
 
-def get_tokens_per_expert_and_token_count(
-    routing_map: torch.Tensor,
-    reduce_group: torch.distributed.ProcessGroup,
-    topk: int = None,
-    with_padding_mask: bool = False,
-) -> torch.Tensor:
-    """
-    Compute global_tokens_per_expert, local_num_tokens and total_num_tokens with padding mask.
-    """
-    local_tokens_per_expert = routing_map.sum(dim=0)
-    global_tokens_per_expert = reduce_from_tensor_model_parallel_region(
-        local_tokens_per_expert, reduce_group
-    )
-    if with_padding_mask:
-        local_num_tokens = local_tokens_per_expert.sum() / topk
-        total_num_tokens = global_tokens_per_expert.sum() / topk
-    else:
-        local_num_tokens = routing_map.shape[0]
-        total_num_tokens = local_num_tokens * reduce_group.size()
-    return global_tokens_per_expert, local_num_tokens, total_num_tokens
-
-
 class MoEAuxLossAutoScaler(torch.autograd.Function):
     """An AutoScaler that triggers the backward pass and scales the grad for auxiliary loss."""
 
@@ -667,48 +629,35 @@ def compute_topk(scores, topk, num_groups=None, group_topk=None):
 
 
 def compute_routing_scores_for_aux_loss(
-    logits: torch.Tensor,
-    topk: int,
-    score_function: str,
-    fused: bool = False,
-    padding_mask: Optional[torch.Tensor] = None,
+    logits: torch.Tensor, topk: int, score_function: str, fused: bool = False
 ):
     """Compute routing scores based on the score function.
 
     Args:
         logits (torch.Tensor): The logits tensor after gating, shape: [num_tokens, num_experts].
-        padding_mask (torch.Tensor, optional): Boolean mask indicating padding positions.
-                                               Shape [num_tokens]. True = padding (exclude),
-                                               False = valid (include). Defaults to None.
+
     Returns:
-        Tuple[torch.Tensor, torch.Tensor]: routing_map and scores.
+        torch.Tensor: The normalized routing scores.
     """
     if fused:
         if not HAVE_TE or fused_compute_score_for_moe_aux_loss is None:
             raise ValueError(
                 "fused_compute_score_for_moe_aux_loss is not available. Please install TE >= 2.6.0."
             )
-        routing_map, scores = fused_compute_score_for_moe_aux_loss(
+        return fused_compute_score_for_moe_aux_loss(
             logits=logits, topk=topk, score_function=score_function
         )
-    else:
-        if score_function == "softmax":
-            scores = torch.softmax(logits, dim=-1, dtype=torch.float32)
-        elif score_function == "sigmoid":
-            scores = torch.sigmoid(logits)
-            scores = scores / (scores.sum(dim=-1, keepdim=True) + 1e-20)
-        else:
-            raise ValueError(f"Invalid score_function: {score_function}")
 
-        _, top_indices = torch.topk(scores, k=topk, dim=1)
-        routing_map = torch.zeros_like(logits).int().scatter(1, top_indices, 1).bool()
+    if score_function == "softmax":
+        scores = torch.softmax(logits, dim=-1, dtype=torch.float32)
+    elif score_function == "sigmoid":
+        scores = torch.sigmoid(logits)
+        scores = scores / (scores.sum(dim=-1, keepdim=True) + 1e-20)
+    else:
+        raise ValueError(f"Invalid score_function: {score_function}")
 
-    # Apply padding mask to scores if provided
-    if padding_mask is not None:
-        # Invert padding_mask and make True indicates valid tokens
-        valid_mask = (~padding_mask).unsqueeze(-1)
-        routing_map = routing_map * valid_mask
-        scores = scores * valid_mask
+    _, top_indices = torch.topk(scores, k=topk, dim=1)
+    routing_map = torch.zeros_like(logits).int().scatter(1, top_indices, 1).bool()
     return routing_map, scores
 
 
diff --git a/megatron/core/transformer/moe/router.py b/megatron/core/transformer/moe/router.py
index 1c502e212ad..16fc9d9af8f 100644
--- a/megatron/core/transformer/moe/router.py
+++ b/megatron/core/transformer/moe/router.py
@@ -1,11 +1,12 @@
 # Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 from abc import ABC, abstractmethod
-from typing import Optional, Union
+from typing import Optional
 
 import torch
 
 from megatron.core.jit import jit_fuser
+from megatron.core.tensor_parallel import reduce_from_tensor_model_parallel_region
 from megatron.core.transformer.module import MegatronModule
 from megatron.core.transformer.moe.moe_utils import (
     MoEAuxLossAutoScaler,
@@ -13,7 +14,6 @@
     apply_random_logits,
     apply_router_token_dropping,
     compute_routing_scores_for_aux_loss,
-    get_tokens_per_expert_and_token_count,
     router_gating_linear,
     save_to_aux_losses_tracker,
     sinkhorn,
@@ -268,28 +268,22 @@ def is_aux_loss_enabled(self) -> bool:
         return False
 
     def _apply_aux_loss(
-        self,
-        probs: torch.Tensor,
-        scores_for_aux_loss: torch.Tensor,
-        routing_map: torch.Tensor,
-        with_padding_mask: bool = False,
+        self, probs: torch.Tensor, scores_for_aux_loss: torch.Tensor, routing_map: torch.Tensor
     ):
         """Apply the auxiliary loss for the given scores and routing map."""
         aux_loss_coeff = self.get_aux_loss_coeff("aux_loss")
         if aux_loss_coeff == 0:
             return probs
-
-        global_tokens_per_expert, local_num_tokens, total_num_tokens = (
-            get_tokens_per_expert_and_token_count(
-                routing_map=routing_map,
-                reduce_group=self.tp_cp_group,
-                topk=self.topk,
-                with_padding_mask=with_padding_mask,
-            )
+        tokens_per_expert = routing_map.sum(dim=0)
+        tokens_per_expert = reduce_from_tensor_model_parallel_region(
+            tokens_per_expert, self.tp_cp_group
         )
+        num_tokens = routing_map.shape[0]
+        total_num_tokens = num_tokens * self.tp_cp_group.size()
+
         aux_loss = switch_load_balancing_loss_func(
             probs=scores_for_aux_loss,
-            tokens_per_expert=global_tokens_per_expert,
+            tokens_per_expert=tokens_per_expert,
             total_num_tokens=total_num_tokens,
             topk=self.topk,
             num_experts=self.config.num_moe_experts,
@@ -297,12 +291,7 @@ def _apply_aux_loss(
             fused=self.config.moe_router_fusion,
         )
         probs = self.attach_and_log_load_balancing_loss(
-            probs,
-            aux_loss_coeff,
-            aux_loss,
-            "load_balancing_loss",
-            self.tp_cp_group,
-            valid_token_count=local_num_tokens,
+            probs, aux_loss_coeff, aux_loss, "load_balancing_loss", self.tp_cp_group
         )
         return probs
 
@@ -313,7 +302,6 @@ def _apply_seq_aux_loss(
         routing_map: torch.Tensor,
         seq_length: int,
         bsz: int,
-        with_padding_mask: bool = False,
     ):
         """Apply the sequence-level auxiliary loss for the given scores and routing map.
 
@@ -327,21 +315,17 @@ def _apply_seq_aux_loss(
             return probs
 
         scores_for_aux_loss = scores_for_aux_loss.reshape(seq_length, -1)
-        routing_map = routing_map.reshape(seq_length, -1)
-
-        global_tokens_per_expert, local_num_tokens, total_num_tokens = (
-            get_tokens_per_expert_and_token_count(
-                routing_map=routing_map,
-                reduce_group=self.tp_cp_group,
-                with_padding_mask=with_padding_mask,
-                topk=self.topk * bsz,
-            )
+        tokens_per_expert = routing_map.reshape(seq_length, -1).sum(dim=0)
+        tokens_per_expert = reduce_from_tensor_model_parallel_region(
+            tokens_per_expert, self.tp_cp_group
         )
 
+        total_num_tokens = seq_length * self.tp_cp_group.size()
+
         aux_loss = (
             switch_load_balancing_loss_func(
                 probs=scores_for_aux_loss,
-                tokens_per_expert=global_tokens_per_expert,
+                tokens_per_expert=tokens_per_expert,
                 total_num_tokens=total_num_tokens,
                 topk=self.topk,
                 num_experts=self.config.num_moe_experts,
@@ -350,42 +334,31 @@ def _apply_seq_aux_loss(
             )
             / bsz
         )
-
         probs = self.attach_and_log_load_balancing_loss(
-            probs,
-            seq_aux_loss_coeff,
-            aux_loss,
-            "seq_load_balancing_loss",
-            self.tp_cp_group,
-            valid_token_count=local_num_tokens,
+            probs, seq_aux_loss_coeff, aux_loss, "seq_load_balancing_loss", self.tp_cp_group
         )
         return probs
 
     def _apply_global_aux_loss(
-        self,
-        probs: torch.Tensor,
-        scores_for_aux_loss: torch.Tensor,
-        routing_map: torch.Tensor,
-        with_padding_mask: bool = False,
+        self, probs: torch.Tensor, scores_for_aux_loss: torch.Tensor, routing_map: torch.Tensor
     ):
         """Apply the global auxiliary loss for the given scores and routing map."""
         global_aux_loss_coeff = self.get_aux_loss_coeff("global_aux_loss")
         if global_aux_loss_coeff == 0:
             return probs
 
-        # Use unified function to compute tokens_per_expert and num_tokens
-        global_tokens_per_expert, local_num_tokens, total_num_tokens = (
-            get_tokens_per_expert_and_token_count(
-                routing_map=routing_map,
-                reduce_group=self.tp_dp_cp_group,
-                with_padding_mask=with_padding_mask,
-                topk=self.topk,
-            )
+        tokens_per_expert = routing_map.sum(dim=0)
+        tokens_per_expert = reduce_from_tensor_model_parallel_region(
+            tokens_per_expert, self.tp_dp_cp_group
         )
-        self.global_tokens_per_expert += global_tokens_per_expert
+
+        self.global_tokens_per_expert += tokens_per_expert
         self.ga_steps += 1
         averated_tokens_per_expert = self.global_tokens_per_expert / self.ga_steps
 
+        num_tokens = scores_for_aux_loss.shape[0]
+        total_num_tokens = num_tokens * self.tp_dp_cp_group.size()
+
         global_aux_loss = switch_load_balancing_loss_func(
             probs=scores_for_aux_loss,
             tokens_per_expert=averated_tokens_per_expert,
@@ -401,7 +374,6 @@ def _apply_global_aux_loss(
             global_aux_loss,
             "global_load_balancing_loss",
             self.tp_dp_cp_group,
-            valid_token_count=local_num_tokens,
         )
         return probs
 
@@ -412,20 +384,8 @@ def attach_and_log_load_balancing_loss(
         aux_loss: torch.Tensor,
         aux_loss_name: str,
         reduce_group: torch.distributed.ProcessGroup,
-        valid_token_count: Optional[Union[int, torch.Tensor]] = None,
     ):
-        """Attach aux loss function to activation and add to logging.
-
-        Args:
-            activation (torch.Tensor): Activation tensor to attach the aux loss to.
-            aux_loss_coeff (float): Coefficient for the aux loss.
-            aux_loss (torch.Tensor): Computed aux loss.
-            aux_loss_name (str): Name of the aux loss for logging.
-            reduce_group (torch.distributed.ProcessGroup): Process group for reduction.
-            valid_token_count (int or torch.Tensor, optional): Number of valid tokens excluding
-                padding tokens. Can be a Python int or a torch.Tensor (typically 0-d tensor).
-                If None, uses activation.shape[0]. Defaults to None.
-        """
+        """Attach aux loss function to activation and add to logging."""
         # TODO (zijiey): fix the per_layer_logging for MTP, currently it will incorrectly
         # add the aux loss logging value to other layer's since it is difficult to get the
         # correct layer_number for MTP. It does not affect the correctness of the calculation
@@ -448,22 +408,17 @@ def attach_and_log_load_balancing_loss(
             # which scales both the main_loss gradient and aux_loss gradient by
             # 1/(num_local_tokens * dp_size * num_micro_batches) in finalize_model_grads function.
             # To correct this scaling, we need to scale the aux_loss by num_local_tokens here.
-            # Use valid_token_count (excluding padding) if provided, otherwise use total tokens.
-            num_tokens = valid_token_count if valid_token_count is not None else activation.shape[0]
-            activation = MoEAuxLossAutoScaler.apply(activation, aux_loss * num_tokens)
+            activation = MoEAuxLossAutoScaler.apply(activation, aux_loss * activation.shape[0])
         else:
             activation = MoEAuxLossAutoScaler.apply(activation, aux_loss)
         return activation
 
-    def apply_z_loss(self, logits, padding_mask: Optional[torch.Tensor] = None):
+    def apply_z_loss(self, logits):
         """Encourages the router's logits to remain small to enhance stability.
         Please refer to the ST-MoE paper (https://arxiv.org/pdf/2202.08906.pdf) for details.
 
         Args:
             logits (torch.Tensor): The logits of the router.
-            padding_mask (torch.Tensor, optional): Boolean mask indicating padding positions.
-                                                   Shape [num_tokens]. True = padding (exclude),
-                                                   False = valid (include). Defaults to None.
 
         Returns:
             torch.Tensor: The logits after applying the z-loss.
@@ -471,7 +426,7 @@ def apply_z_loss(self, logits, padding_mask: Optional[torch.Tensor] = None):
         if self.config.moe_z_loss_coeff is not None and self.training and torch.is_grad_enabled():
             # Skip Z loss calculations when using torch.no_grad() or checkpointing.
             moe_z_loss_coeff = self.config.moe_z_loss_coeff / self.tp_cp_group.size()
-            z_loss = z_loss_func(logits, moe_z_loss_coeff, padding_mask=padding_mask)
+            z_loss = z_loss_func(logits, moe_z_loss_coeff)
             scale_up = 1.0
             if self.calculate_per_token_loss:
                 # The expected final scaling for z_loss gradients is
@@ -481,9 +436,7 @@ def apply_z_loss(self, logits, padding_mask: Optional[torch.Tensor] = None):
                 # which scales both the main_loss gradient and z_loss gradient by
                 # 1/(num_local_tokens * dp_size * num_micro_batches) in finalize_model_grads().
                 # To correct this scaling, we need to scale the z_loss by num_local_tokens here.
-                # Count valid tokens: sum of inverted mask (False -> True = valid)
-                num_tokens = (~padding_mask).sum() if padding_mask is not None else logits.shape[0]
-                logits = MoEAuxLossAutoScaler.apply(logits, z_loss * num_tokens)
+                logits = MoEAuxLossAutoScaler.apply(logits, z_loss * logits.shape[0])
             else:
                 logits = MoEAuxLossAutoScaler.apply(logits, z_loss)
 
@@ -517,32 +470,20 @@ def apply_input_jitter(self, input: torch.Tensor):
             return input
 
     @jit_fuser
-    def _apply_expert_bias(
-        self, routing_map: torch.Tensor, padding_mask: Optional[torch.Tensor] = None
-    ):
+    def _apply_expert_bias(self, routing_map: torch.Tensor):
         """
         Update expert bias and tokens_per_expert
         Prevent extra local tokens accumulation on evaluation or activation recomputation
-
-        Args:
-            routing_map (torch.Tensor): Token to expert routing map, [num_tokens, num_experts].
-            padding_mask (torch.Tensor, optional): Boolean mask indicating padding positions.
-                Shape [num_tokens]. True = padding (exclude), False = valid (include).
         """
         if self.enable_expert_bias and torch.is_grad_enabled():
             with torch.no_grad():
-                if padding_mask is not None:
-                    routing_map = routing_map & (~padding_mask)
                 self.local_tokens_per_expert += routing_map.sum(dim=0)
 
-    def routing(self, logits: torch.Tensor, padding_mask: Optional[torch.Tensor] = None):
+    def routing(self, logits: torch.Tensor):
         """Top-k routing function
 
         Args:
             logits (torch.Tensor): Logits tensor after gating.
-            padding_mask (torch.Tensor, optional): Boolean mask indicating padding positions.
-                                                   Shape = [seq_length, bsz]. True=padding(exclude),
-                                                   False=valid(include). Defaults to None.
 
         Returns:
             probs (torch.Tensor): The probabilities of token to experts assignment.
@@ -552,12 +493,8 @@ def routing(self, logits: torch.Tensor, padding_mask: Optional[torch.Tensor] = N
         seq_length, bsz = logits.shape[:2]
         logits = logits.view(-1, self.config.num_moe_experts)
 
-        # Flatten padding_mask to [num_tokens] if provided
-        if padding_mask is not None:
-            padding_mask = padding_mask.reshape(-1)
-
         # Apply Z-Loss
-        logits = self.apply_z_loss(logits, padding_mask=padding_mask)
+        logits = self.apply_z_loss(logits)
 
         # Calculate probs and routing_map for token dispatching
         if self.routing_type == "sinkhorn":
@@ -590,35 +527,18 @@ def routing(self, logits: torch.Tensor, padding_mask: Optional[torch.Tensor] = N
         if self.training and torch.is_grad_enabled() and self.is_aux_loss_enabled():
             # Calculate scores and routing_map for aux loss
             routing_map_for_aux_loss, scores_for_aux_loss = compute_routing_scores_for_aux_loss(
-                logits,
-                self.topk,
-                self.score_function,
-                fused=self.config.moe_router_fusion,
-                padding_mask=padding_mask,
-            )
-            probs = self._apply_aux_loss(
-                probs,
-                scores_for_aux_loss,
-                routing_map_for_aux_loss,
-                with_padding_mask=padding_mask is not None,
+                logits, self.topk, self.score_function, fused=self.config.moe_router_fusion
             )
+            probs = self._apply_aux_loss(probs, scores_for_aux_loss, routing_map_for_aux_loss)
             probs = self._apply_seq_aux_loss(
-                probs,
-                scores_for_aux_loss,
-                routing_map_for_aux_loss,
-                seq_length,
-                bsz,
-                with_padding_mask=padding_mask is not None,
+                probs, scores_for_aux_loss, routing_map_for_aux_loss, seq_length, bsz
             )
             probs = self._apply_global_aux_loss(
-                probs,
-                scores_for_aux_loss,
-                routing_map_for_aux_loss,
-                with_padding_mask=padding_mask is not None,
+                probs, scores_for_aux_loss, routing_map_for_aux_loss
             )
 
         # Optionally apply expert bias
-        self._apply_expert_bias(routing_map, padding_mask=padding_mask)
+        self._apply_expert_bias(routing_map)
 
         return probs, routing_map
 
@@ -628,15 +548,12 @@ def reset_global_aux_loss_tracker(self):
             self.global_tokens_per_expert.zero_()
             self.ga_steps.zero_()
 
-    def forward(self, input: torch.Tensor, padding_mask: Optional[torch.Tensor] = None):
+    def forward(self, input: torch.Tensor):
         """
         Forward pass of the router.
 
         Args:
             input (torch.Tensor): Input tensor.
-            padding_mask (torch.Tensor, optional): Boolean mask indicating padding positions.
-                                                   Shape = [seq_length, bsz]. True=padding(exclude),
-                                                   False=valid(include). Defaults to None.
         """
         self._maintain_float32_expert_bias()
 
@@ -648,7 +565,7 @@ def forward(self, input: torch.Tensor, padding_mask: Optional[torch.Tensor] = No
             # Apply force load balancing with random logits for benchmark
             logits = apply_random_logits(logits)
 
-        probs, routing_map = self.routing(logits, padding_mask=padding_mask)
+        probs, routing_map = self.routing(logits)
 
         return probs, routing_map
 
diff --git a/megatron/core/transformer/transformer_block.py b/megatron/core/transformer/transformer_block.py
index cbbd7ec00eb..023db1fe75a 100755
--- a/megatron/core/transformer/transformer_block.py
+++ b/megatron/core/transformer/transformer_block.py
@@ -390,6 +390,7 @@ def build_layer(layer_spec, layer_number):
     def has_final_layernorm_in_this_stage(self):
         """
         Check if this vpp stage contains the final layernorm.
+
         Note:
             Final layernorm now has been moved from the post-process stage to the last decoder
             layer by using this function.
@@ -428,18 +429,12 @@ def _checkpointed_forward(
         attention_bias: Tensor,
         packed_seq_params: PackedSeqParams,
         use_inner_quantization_context: bool,
-        padding_mask: Optional[Tensor] = None,
     ):
         """Forward method with activation checkpointing."""
 
         def custom(start: int, end: int):
             def custom_forward(
-                hidden_states,
-                attention_mask,
-                context,
-                context_mask,
-                rotary_pos_emb,
-                padding_mask=None,
+                hidden_states, attention_mask, context, context_mask, rotary_pos_emb
             ):
                 for index in range(start, end):
                     layer = self._get_layer(index)
@@ -470,7 +465,6 @@ def custom_forward(
                             attention_bias=attention_bias,
                             inference_context=None,
                             packed_seq_params=packed_seq_params,
-                            padding_mask=padding_mask,
                         )
                 return hidden_states, context
 
@@ -490,7 +484,6 @@ def checkpoint_handler(forward_func):
                     context,
                     context_mask,
                     rotary_pos_emb,
-                    padding_mask,
                 )
             else:
                 return tensor_parallel.checkpoint(
@@ -501,7 +494,6 @@ def checkpoint_handler(forward_func):
                     context,
                     context_mask,
                     rotary_pos_emb,
-                    padding_mask,
                 )
 
         if self.config.recompute_method == 'uniform':
@@ -607,7 +599,6 @@ def forward(
         inference_context: Optional[BaseInferenceContext] = None,
         packed_seq_params: Optional[PackedSeqParams] = None,
         sequence_len_offset: Optional[Tensor] = None,
-        padding_mask: Optional[Tensor] = None,
         *,
         inference_params: Optional[BaseInferenceContext] = None,
         dynamic_inference_decode_only: Optional[bool] = None,
@@ -717,7 +708,6 @@ def forward(
                     attention_bias=attention_bias,
                     packed_seq_params=packed_seq_params,
                     use_inner_quantization_context=use_inner_quantization_context,
-                    padding_mask=padding_mask,
                 )
             else:
                 for l_no, layer in enumerate(self.layers):
@@ -755,7 +745,6 @@ def forward(
                             inference_context=inference_context,
                             packed_seq_params=packed_seq_params,
                             sequence_len_offset=sequence_len_offset,
-                            padding_mask=padding_mask,
                         )
 
                     if (
diff --git a/megatron/core/transformer/transformer_layer.py b/megatron/core/transformer/transformer_layer.py
index 21f38b06f30..3ea40577009 100644
--- a/megatron/core/transformer/transformer_layer.py
+++ b/megatron/core/transformer/transformer_layer.py
@@ -457,12 +457,7 @@ def forward(self, *args, **kwargs):
         # runners in the cuda graph manager
         kwargs.pop("dynamic_inference_decode_only", None)
         hidden_states, context = self._forward_attention(*args, **kwargs)
-
-        output = self._forward_mlp(
-            hidden_states,
-            kwargs.get("inference_context", None),
-            padding_mask=kwargs.get("padding_mask", None),
-        )
+        output = self._forward_mlp(hidden_states, kwargs.get("inference_context", None))
         return output, context
 
     def _forward_attention(
@@ -479,7 +474,6 @@ def _forward_attention(
         inference_context: Optional[Any] = None,
         packed_seq_params: Optional[PackedSeqParams] = None,
         sequence_len_offset: Optional[Tensor] = None,
-        padding_mask: Optional[Tensor] = None,
         *,
         inference_params: Optional[Any] = None,
     ):
@@ -597,18 +591,12 @@ def _forward_attention(
 
         return hidden_states, context
 
-    def _forward_mlp(self, hidden_states, inference_context=None, padding_mask=None):
+    def _forward_mlp(self, hidden_states, inference_context=None):
         """
         Perform a forward pass through the feed-forward layer.
 
         Args:
             hidden_states (Tensor): Transformed hidden states before the MLP layernorm.
-                Shape [seq_length, batch_size, hidden_size].
-            inference_context: Inference context for optimizations.
-            padding_mask (Tensor, optional): Padding mask for MoE routing.
-                Shape [bsz, seq_length]. True = padding (exclude), False = valid (include).
-                Only used for MoE layers to exclude padding tokens from aux loss computations.
-                The MoELayer will internally transform this to [seq_length, bsz] format.
 
         Returns:
             output (Tensor): Transformed hidden states of shape [s, b, h].
@@ -654,7 +642,7 @@ def _forward_mlp(self, hidden_states, inference_context=None, padding_mask=None)
             assert (
                 not self.recompute_pre_mlp_layernorm
             ), "Recomputation is not supported for CUDA graph."
-            cudagraph_outputs = self.mlp(pre_mlp_layernorm_output, padding_mask=padding_mask)
+            cudagraph_outputs = self.mlp(pre_mlp_layernorm_output)
             nvtx_range_pop(suffix="mlp")
             return cudagraph_outputs + [residual]
         elif self.recompute_mlp:
@@ -668,11 +656,10 @@ def _forward_mlp(self, hidden_states, inference_context=None, padding_mask=None)
                     tensor_parallel.random.get_cuda_rng_tracker,
                     self.pg_collection.tp,
                     pre_mlp_layernorm_output,
-                    padding_mask=padding_mask,
                 )
             else:
                 mlp_output_with_bias = tensor_parallel.checkpoint(
-                    self.mlp, False, pre_mlp_layernorm_output, padding_mask=padding_mask
+                    self.mlp, False, pre_mlp_layernorm_output
                 )
         elif should_chunk_mlp_for_prefill:
             # Chunk input along sequence dimension
@@ -688,7 +675,7 @@ def _forward_mlp(self, hidden_states, inference_context=None, padding_mask=None)
             bias_output = torch.stack(bias_chunks, dim=0).sum(dim=0) if bias_chunks else None
             mlp_output_with_bias = (mlp_output, bias_output)
         else:
-            mlp_output_with_bias = self.mlp(pre_mlp_layernorm_output, padding_mask=padding_mask)
+            mlp_output_with_bias = self.mlp(pre_mlp_layernorm_output)
 
         if self.recompute_pre_mlp_layernorm:
             # discard the output of the pre-mlp layernorm and register the recompute
diff --git a/tests/test_utils/python_scripts/recipe_parser.py b/tests/test_utils/python_scripts/recipe_parser.py
index b866fbbf5c2..a497bdbd9de 100644
--- a/tests/test_utils/python_scripts/recipe_parser.py
+++ b/tests/test_utils/python_scripts/recipe_parser.py
@@ -1,4 +1,3 @@
-# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 import copy
 import itertools
 import logging
diff --git a/tests/unit_tests/a2a_overlap/test_schedule_chunk_1f1b.py b/tests/unit_tests/a2a_overlap/test_schedule_chunk_1f1b.py
index 6c59dd3f9e3..81e61a3404a 100644
--- a/tests/unit_tests/a2a_overlap/test_schedule_chunk_1f1b.py
+++ b/tests/unit_tests/a2a_overlap/test_schedule_chunk_1f1b.py
@@ -23,7 +23,7 @@
 from tests.unit_tests.test_utilities import Utils
 
 
-def build_model(config, use_padding_mask=False):
+def build_model(config):
     seq_len = 32
     max_seq_len = 300
     # ids = random.sample([i for i in range(max_seq_len)], seq_len)
@@ -39,12 +39,6 @@ def build_model(config, use_padding_mask=False):
         "attention_mask": torch.ones((1, 1, seq_len, seq_len), dtype=bool).cuda(),
     }
 
-    # Optionally add padding_mask with same shape as input_ids
-    if use_padding_mask:
-        padding_mask = torch.zeros((1, seq_len), dtype=torch.bool).cuda()
-        padding_mask[0, -8:] = True
-        data["padding_mask"] = padding_mask
-
     # build layer spec
     transformer_layer_spec = get_gpt_decoder_block_spec(config=config, use_transformer_engine=True)
     mtp_block_spec = get_gpt_mtp_block_spec(config, transformer_layer_spec.layer_specs[-1], True)
@@ -54,7 +48,7 @@ def build_model(config, use_padding_mask=False):
         config=config,
         transformer_layer_spec=transformer_layer_spec,
         mtp_block_spec=mtp_block_spec,
-        vocab_size=128,
+        vocab_size=100,
         pre_process=True,
         post_process=True,
         max_sequence_length=max_seq_len,
@@ -180,109 +174,3 @@ def test_1f1b_schedule_model_chunk(self, mtp_layers, dispatcher_type, fp8_flag,
                 gpt_models[i] = None
             gc.collect()
             torch.cuda.empty_cache()
-
-    @pytest.mark.skipif(not is_te_min_version("1.9.0.dev0"), reason="Requires TE >= 1.9.0.dev0")
-    @pytest.mark.parametrize("dispatcher_type", get_valid_token_dispatcher_types())
-    @pytest.mark.parametrize("layers", [[2, 1], [1, 1]])
-    @pytest.mark.parametrize("tp_size", [1, 2, 4, 8])
-    def test_1f1b_schedule_model_chunk_with_padding_mask(self, dispatcher_type, layers, tp_size):
-        """
-        Verifies all-to-all overlap optimization with padding_mask produces
-        the same results as the reference implementation with various TP/EP/CP combinations.
-        """
-        # Re-initialize model parallel with the specified configuration
-        Utils.destroy_model_parallel()
-        Utils.initialize_model_parallel(
-            tensor_model_parallel_size=tp_size,
-            pipeline_model_parallel_size=1,
-            expert_model_parallel_size=4,
-            expert_tensor_parallel_size=1,
-        )
-        set_streams()
-
-        microbatches = 1
-
-        gpt_models = []
-        schedule_plans = []
-        ref_captures = []
-        datas = []
-
-        # create TransformerConfig
-        extra_kwargs = {
-            "moe_token_dispatcher_type": dispatcher_type,
-            "tensor_model_parallel_size": tp_size,
-            "sequence_parallel": tp_size > 1,
-        }
-        if dispatcher_type == "flex":
-            extra_kwargs["moe_flex_dispatcher_backend"] = "deepep"
-            extra_kwargs["moe_router_dtype"] = "fp32"
-        with deterministic_mode():
-            for layer_num in layers:
-                output_tensors = []
-                # build config
-                config = get_test_config(num_layers=layer_num, extra_kwargs=extra_kwargs)
-                # build model with padding_mask
-                gpt_model, schedule_plan, data = build_model(config, use_padding_mask=True)
-                gpt_model.cuda()
-                gpt_models.append(gpt_model)
-                datas.append(data)
-                schedule_plans.append(schedule_plan)
-
-                # run reference
-                for _ in range(microbatches):
-                    loss = gpt_model.forward(**data)
-                    loss = float16_to_fp32(loss)
-                    loss.backward(torch.ones_like(loss))
-                    output_tensors.append(loss)
-
-                capture = {"outputs": output_tensors}
-                for name, param in gpt_model.named_parameters():
-                    capture[name] = param.grad
-                ref_captures.append(capture)
-                gpt_model.zero_grad()
-            assert gpt_models[0].embedding is not None
-            assert gpt_models[1].embedding is not None
-            # run a2a overlap
-            capture_0 = {"outputs": []}
-            capture_1 = {"outputs": []}
-            a2a_captures = [capture_0, capture_1]
-            for i in range(microbatches):
-                # 1st forward
-                if i > 0:
-                    assert (
-                        schedule_plans[0].pre_process is None
-                    ), "pre_process should be released after backward"
-                    schedule_plans[0] = gpt_models[0].build_schedule_plan(**datas[0])
-                    schedule_plans[1] = gpt_models[1].build_schedule_plan(**datas[1])
-                f_input_0 = TransformerModelChunkSchedulePlan.run(schedule_plans[0], None)
-                capture_0["outputs"].append(f_input_0)
-                # overlap
-                f_input_1 = TransformerModelChunkSchedulePlan.run(
-                    schedule_plans[1], schedule_plans[0], b_grad=torch.ones_like(f_input_0)
-                )
-                capture_1["outputs"].append(f_input_1)
-                # last backward
-                TransformerModelChunkSchedulePlan.run(
-                    None, schedule_plans[1], b_grad=torch.ones_like(f_input_1)
-                )
-            for i in range(len(gpt_models)):
-                for name, param in gpt_models[i].named_parameters():
-                    a2a_captures[i][name] = param.grad
-
-            # compare results
-            for i in range(len(ref_captures)):
-                comp_res = compare_captures(ref_captures[i], a2a_captures[i], True, True)
-                assert comp_res[0], f"[rank {torch.distributed.get_rank()}] {comp_res[1]}"
-
-            # release resources is necessary, otherwise later testcases will oom
-            for i in range(len(schedule_plans)):
-                schedule_plans[i] = None
-                ref_captures[i] = None
-                a2a_captures[i] = None
-                for k in datas[i]:
-                    datas[i][k] = None
-                datas[i] = None
-                gpt_models[i].zero_grad()
-                gpt_models[i] = None
-            gc.collect()
-            torch.cuda.empty_cache()
diff --git a/tests/unit_tests/a2a_overlap/test_schedule_layer_1f1b.py b/tests/unit_tests/a2a_overlap/test_schedule_layer_1f1b.py
index 5ec096e5a04..7fb97f6e586 100644
--- a/tests/unit_tests/a2a_overlap/test_schedule_layer_1f1b.py
+++ b/tests/unit_tests/a2a_overlap/test_schedule_layer_1f1b.py
@@ -502,8 +502,8 @@ def test_mtp_layer_overlap(self, dispatcher_type, fp8_flag):
             position_ids = torch.tensor(data, dtype=torch.int64).repeat((1, 1)).cuda()
             attention_mask = torch.ones((1, 1, seq_len, seq_len), dtype=bool).cuda()
             # get rotary pos emb
-            _, rotary_pos_emb, rotary_pos_cos, rotary_pos_sin, _, _padding_mask = (
-                gpt_model._preprocess(input_ids, position_ids)
+            _, rotary_pos_emb, rotary_pos_cos, rotary_pos_sin, _ = gpt_model._preprocess(
+                input_ids, position_ids
             )
             # reset model
             params = reset_model(gpt_model)
diff --git a/tests/unit_tests/transformer/moe/test_aux_loss.py b/tests/unit_tests/transformer/moe/test_aux_loss.py
index f5726777383..b1f78582383 100644
--- a/tests/unit_tests/transformer/moe/test_aux_loss.py
+++ b/tests/unit_tests/transformer/moe/test_aux_loss.py
@@ -576,192 +576,3 @@ def test_force_balanced_aux_loss(self, tp_size, ep_size, cp_size):
                 reduce_from_tensor_model_parallel_region(aux_loss, router.tp_cp_group)
             assert aux_loss.item() == 1, f"{aux_loss_type}: {aux_loss.item()}"
             clear_aux_losses_tracker()
-
-
-class TestPaddingMaskAuxLoss:
-    """Test padding mask support in various aux loss types."""
-
-    def setup_model_parallel(self, tp_size=1, ep_size=1, cp_size=1, sequence_parallel=False):
-        """Initialize model parallel with given configuration.
-
-        Args:
-            tp_size: Tensor parallel size.
-            ep_size: Expert parallel size.
-            cp_size: Context parallel size.
-        """
-        Utils.initialize_model_parallel(
-            tensor_model_parallel_size=tp_size,
-            pipeline_model_parallel_size=1,
-            context_parallel_size=cp_size,
-            expert_model_parallel_size=ep_size,
-        )
-        _set_random_seed(seed_=123, data_parallel_random_init=False)
-
-        # Store parallel configuration
-        self.tp_size = tp_size
-        self.ep_size = ep_size
-        self.cp_size = cp_size
-
-        # Default configuration
-        self.default_transformer_config = TransformerConfig(
-            num_layers=1,
-            hidden_size=12,
-            num_attention_heads=8,
-            num_moe_experts=32,
-            use_cpu_initialization=True,
-            moe_router_load_balancing_type="aux_loss",
-            moe_router_topk=8,
-            moe_aux_loss_coeff=1.0,
-            bf16=True,
-            params_dtype=torch.bfloat16,
-            add_bias_linear=False,
-            tensor_model_parallel_size=tp_size,
-            expert_model_parallel_size=ep_size,
-            context_parallel_size=cp_size,
-            sequence_parallel=sequence_parallel and tp_size > 1,
-        )
-
-    def new_router(self, **kwargs):
-        """Create a new router with updated configuration."""
-        pg_collection = get_default_pg_collection()
-        new_transformer_config = dataclasses.replace(self.default_transformer_config, **kwargs)
-        router = TopKRouter(config=new_transformer_config, pg_collection=pg_collection)
-        router.set_layer_number(0)
-        return router
-
-    @pytest.mark.internal
-    @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
-    @pytest.mark.parametrize("sequence_parallel", [True, False])
-    @pytest.mark.parametrize("aux_loss_type", ["aux_loss", "seq_aux_loss", "global_aux_loss"])
-    @pytest.mark.parametrize(
-        "tp_size,ep_size,cp_size", [(8, 1, 1), (4, 2, 1), (1, 1, 8), (2, 1, 4), (2, 2, 2)]
-    )
-    def test_padding_mask_removes_padding_tokens(
-        self, aux_loss_type, tp_size, ep_size, cp_size, sequence_parallel
-    ):
-        """Test that padding tokens are correctly excluded from aux loss calculation."""
-        # Initialize model parallel with given configuration
-        self.setup_model_parallel(
-            tp_size=tp_size, ep_size=ep_size, cp_size=cp_size, sequence_parallel=sequence_parallel
-        )
-
-        try:
-            clear_aux_losses_tracker()
-
-            router = self.new_router(
-                moe_router_load_balancing_type=aux_loss_type,
-                moe_aux_loss_coeff=1.0,
-                moe_router_dtype="fp64",
-            ).cuda()
-
-            seq_len = 32
-            batch_size = 2
-            hidden_size = router.config.hidden_size
-
-            # Create input with padding
-            hidden_states_full = torch.randn(
-                (seq_len, batch_size, hidden_size), dtype=torch.bfloat16, device='cuda'
-            )
-
-            # Create padding mask: first half valid (False), second half padding (True)
-            # Convention: True = padding (exclude), False = valid (include)
-            padding_mask = torch.zeros((seq_len, batch_size), dtype=torch.bool, device='cuda')
-            padding_mask[seq_len // 2 :, :] = True
-
-            # Test with padding mask
-            router.weight.grad = None
-            scores_with_mask, routing_map_with_mask = router(
-                hidden_states_full, padding_mask=padding_mask
-            )
-            scores_with_mask.backward(torch.zeros_like(scores_with_mask))
-
-            loss_name = {
-                "aux_loss": "load_balancing_loss",
-                "seq_aux_loss": "seq_load_balancing_loss",
-                "global_aux_loss": "global_load_balancing_loss",
-            }[aux_loss_type]
-
-            tracker = get_moe_layer_wise_logging_tracker()
-            aux_loss_with_mask = tracker[loss_name]["values"][0].clone()
-            grad_with_mask = router.weight.grad.clone()
-
-            # Test without padding (with only half of the tokens)
-            clear_aux_losses_tracker()
-            router.weight.grad = None
-            hidden_states_valid = hidden_states_full[: seq_len // 2, :, :]
-            scores_without_mask, routing_map_without_mask = router(hidden_states_valid)
-            scores_without_mask.backward(torch.zeros_like(scores_without_mask))
-
-            aux_loss_without_mask = tracker[loss_name]["values"][0].clone()
-            grad_without_mask = router.weight.grad.clone()
-
-            # The aux loss with mask should be close to the aux loss without mask
-            assert torch.equal(aux_loss_with_mask, aux_loss_without_mask)
-            assert torch.equal(grad_with_mask, grad_without_mask)
-
-            clear_aux_losses_tracker()
-        finally:
-            # Always cleanup model parallel
-            Utils.destroy_model_parallel()
-
-    @pytest.mark.internal
-    @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
-    @pytest.mark.parametrize(
-        "tp_size,ep_size,cp_size", [(8, 1, 1), (4, 2, 1), (1, 1, 8), (2, 1, 4), (2, 2, 2)]
-    )
-    def test_padding_mask_with_z_loss(self, tp_size, ep_size, cp_size):
-        """Test that padding mask works correctly with z_loss."""
-        # Initialize model parallel with given configuration
-        self.setup_model_parallel(tp_size=tp_size, ep_size=ep_size, cp_size=cp_size)
-
-        try:
-            clear_aux_losses_tracker()
-
-            router = self.new_router(
-                moe_router_load_balancing_type="aux_loss",
-                moe_aux_loss_coeff=0.0,
-                moe_z_loss_coeff=1.0,
-                moe_router_dtype="fp32",
-            ).cuda()
-
-            seq_len = 32
-            batch_size = 2
-            hidden_size = router.config.hidden_size
-
-            # Create input
-            hidden_states_full = torch.randn(
-                (seq_len, batch_size, hidden_size), dtype=torch.bfloat16, device='cuda'
-            )
-
-            # Create padding mask: first half valid (False), second half padding (True)
-            # Convention: True = padding (exclude), False = valid (include)
-            padding_mask = torch.zeros((seq_len, batch_size), dtype=torch.bool, device='cuda')
-            padding_mask[seq_len // 2 :, :] = True
-
-            # Test with padding mask
-            router.weight.grad = None
-            scores_with_mask, _ = router(hidden_states_full, padding_mask=padding_mask)
-            scores_with_mask.sum().backward()
-
-            tracker = get_moe_layer_wise_logging_tracker()
-            z_loss_with_mask = tracker["z_loss"]["values"][0].clone()
-            grad_with_mask = router.weight.grad.clone()
-
-            # Test without padding (with only half of the tokens)
-            clear_aux_losses_tracker()
-            router.weight.grad = None
-            hidden_states_valid = hidden_states_full[: seq_len // 2, :, :]
-            scores_without_mask, _ = router(hidden_states_valid)
-            scores_without_mask.sum().backward()
-
-            z_loss_without_mask = tracker["z_loss"]["values"][0].clone()
-            grad_without_mask = router.weight.grad.clone()
-
-            # The z_loss with mask should be close to the z_loss without mask
-            assert torch.equal(z_loss_with_mask, z_loss_without_mask)
-            assert torch.equal(grad_with_mask, grad_without_mask)
-
-            clear_aux_losses_tracker()
-        finally:
-            # Always cleanup model parallel
-            Utils.destroy_model_parallel()
diff --git a/tests/unit_tests/transformer/moe/test_routers.py b/tests/unit_tests/transformer/moe/test_routers.py
index abd1a4db2dc..677d938cdc7 100644
--- a/tests/unit_tests/transformer/moe/test_routers.py
+++ b/tests/unit_tests/transformer/moe/test_routers.py
@@ -125,53 +125,6 @@ def test_aux_loss(self):
         out.sum().mul_(0).backward()
         assert self.sequential_mlp.router.weight.grad.abs().sum() > 0
 
-    @pytest.mark.internal
-    @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
-    def test_router_with_padding_mask(self):
-        """Test that padding mask correctly excludes padding tokens from routing."""
-        self.router = self.router.cuda()
-        seq_len = 32
-        batch_size = 2
-        hidden_size = self.router.config.hidden_size
-
-        # Create input with shape [seq_len, batch_size, hidden_size]
-        hidden_states = torch.randn((seq_len, batch_size, hidden_size)).cuda().bfloat16()
-
-        # Create padding mask: first half valid (False), second half padding (True)
-        # padding_mask shape: [seq_len, batch_size]
-        # Convention: True = padding (exclude), False = valid (include)
-        padding_mask = torch.zeros((seq_len, batch_size), dtype=torch.bool, device='cuda')
-        padding_mask[seq_len // 2 :, :] = True  # Second half is padding
-
-        # Test forward pass with padding mask
-        with torch.no_grad():
-            probs_with_mask, routing_map_with_mask = self.router(
-                hidden_states, padding_mask=padding_mask
-            )
-
-            # Test forward pass without padding mask (only valid tokens)
-            hidden_states_valid = hidden_states[: seq_len // 2, :, :]
-            probs_without_mask, routing_map_without_mask = self.router(hidden_states_valid)
-
-            # The valid part of routing with mask should match routing without mask
-            probs_valid_part = probs_with_mask.reshape(seq_len, batch_size, -1)[
-                : seq_len // 2, :, :
-            ]
-            probs_valid_part = probs_valid_part.reshape(-1, probs_valid_part.shape[-1])
-
-            # Check that shapes are as expected
-            assert probs_with_mask.shape == (
-                seq_len * batch_size,
-                self.router.config.num_moe_experts,
-            )
-            assert routing_map_with_mask.shape == (
-                seq_len * batch_size,
-                self.router.config.num_moe_experts,
-            )
-
-            # Verify that probs for valid tokens are similar
-            assert torch.equal(probs_valid_part, probs_without_mask)
-
     @pytest.mark.internal
     @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
     def test_router_dtype(self):

From 9885ddb8e08e05786d88b28ee4698739d38a91ae Mon Sep 17 00:00:00 2001
From: Pingtian Li <158665726+Wohox@users.noreply.github.com>
Date: Tue, 30 Dec 2025 11:26:53 +0800
Subject: [PATCH 206/334] [Dev] Disable ep overlap memory optimization (#2750)

---
 megatron/core/models/gpt/fine_grained_callables.py | 5 +++--
 megatron/core/pipeline_parallel/utils.py           | 3 ++-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/megatron/core/models/gpt/fine_grained_callables.py b/megatron/core/models/gpt/fine_grained_callables.py
index 741a25326fb..a0be55c4ca1 100644
--- a/megatron/core/models/gpt/fine_grained_callables.py
+++ b/megatron/core/models/gpt/fine_grained_callables.py
@@ -304,8 +304,9 @@ def backward_dw(self):
 
         # the output grad memory is last used in wgrad compute, should be safe to release.
         assert self.delay_grads_release, "output grad memory should be valid before wgrad."
-        for tensor in self.output_grads:
-            tensor.untyped_storage().resize_(0)
+        if self.manual_release_grads:
+            for tensor in self.output_grads:
+                tensor.untyped_storage().resize_(0)
         self.output_grads = None
 
         self.bwd_dw_callables = None
diff --git a/megatron/core/pipeline_parallel/utils.py b/megatron/core/pipeline_parallel/utils.py
index 52d401c79f9..e7e416f99bd 100644
--- a/megatron/core/pipeline_parallel/utils.py
+++ b/megatron/core/pipeline_parallel/utils.py
@@ -183,6 +183,7 @@ def __init__(
         self.inputs = None
         self.outputs = None
         self.delay_grads_release = False
+        self.manual_release_grads = False
 
     def default_backward_func(self, outputs, output_grad):
         """Default backward function"""
@@ -268,7 +269,7 @@ def _backward(self, *output_grad):
                     # to avoid delayed garbage collection. If
                     # delay_grads_release is True, dgrad is last used in
                     # wgrad compute and skip the release here.
-                    if not self.delay_grads_release:
+                    if self.manual_release_grads and not self.delay_grads_release:
                         g.untyped_storage().resize_(0)
 
         grads = self.get_grad()

From 929e77f76585668b2dcfcf4c5ff4160831a14235 Mon Sep 17 00:00:00 2001
From: Youngeun Kwon <youngeunk@nvidia.com>
Date: Tue, 30 Dec 2025 13:19:28 -0800
Subject: [PATCH 207/334] feat: Cherry-pick PR of PR!2661 for dev branch
 (#2757)

Signed-off-by: Youngeun Kwon <youngeunk@nvidia.com>
---
 .../distributed_data_parallel_config.py       |  8 ++++
 megatron/core/distributed/fsdp/src/README.md  |  7 ++-
 .../distributed_data_parallel_config.py       |  8 ++++
 .../megatron_fsdp/param_and_grad_buffer.py    | 44 +++++++++++++++++
 megatron/core/nccl_allocator.py               | 48 +++++++++++++++++++
 megatron/training/arguments.py                |  9 +++-
 megatron/training/training.py                 | 14 ++++++
 .../test_mcore_fully_sharded_data_parallel.py | 20 ++++++--
 8 files changed, 151 insertions(+), 7 deletions(-)

diff --git a/megatron/core/distributed/distributed_data_parallel_config.py b/megatron/core/distributed/distributed_data_parallel_config.py
index 3f97beab825..eaec971c79c 100644
--- a/megatron/core/distributed/distributed_data_parallel_config.py
+++ b/megatron/core/distributed/distributed_data_parallel_config.py
@@ -137,6 +137,14 @@ class DistributedDataParallelConfig:
       when nccl_ub is set.
     """
 
+    fsdp_manual_registration: bool = False
+    """If true, manually register the FSDP communication buffers to NCCL user buffer.
+      This option is only effective when use_megatron_fsdp and nccl_ub is set.
+      For symmetric registration with large models, the registration itself can take 
+      a significant amount of time. This option minimizes the number of registration calls
+      to minimize the registration time.
+    """
+
     delay_wgrad_compute: bool = False
     """Delay the weight gradient computation to improve batch-level communication overlapping"""
 
diff --git a/megatron/core/distributed/fsdp/src/README.md b/megatron/core/distributed/fsdp/src/README.md
index 9e036f22f67..b4d81b2b368 100644
--- a/megatron/core/distributed/fsdp/src/README.md
+++ b/megatron/core/distributed/fsdp/src/README.md
@@ -220,13 +220,16 @@ optimizer.load_state_dict(ckpt_state_dict["optimizer"])
     - **Only effective when using Megatron-LM.**
     - Defaults to `False`.
 - `nccl_ub` will allocate and register the NCCL userbuffer for param and grad buffers. This option enables an SM-efficient NCCL algorithm that could improve the performance of overlapped computations. This flag will be much more effective when used together with SHARP if the FSDP communication includes both NVL and IB domains. Enabling this option will cause additional memory overhead due to the requirement to enable the `fsdp_double_buffer` option.
-    - **Only effective when using Megatron-LM.**
+    - **Only effective when using with Megatron-Core.**
     - Defaults to `False`.
     - By default we try to use NCCL window (symmetric) registration if it is available. If not it falls back to conventional local registraion.
+- `fsdp_manual_registration` will manually register the FSDP communication buffers with the NCCL user buffer. For symmetric registration with large models, the registration itself can take a significant amount of time. This option minimizes the number of registration calls to reduce the registration time. However, with this option enabled, you need to manually call the `ParamAndGradBuffer.manual_buffer_registration()` function after the first iteration. This is already implemented in the Megatron-LM training loop. In other use cases, users are expected to call this function themselves.
+    - **Only effective when using with Megatron-Core.**
+    - This option is only effective when `nccl_ub` is enabled.
+    - Defaults to `False`.
 - `disable_symmetric_registration` will disable NCCL window (i.e. symmetric) registraion when using `nccl_ub`. 
     - Dafaults to `False`.
 - `fsdp_double_buffer` will use persistently allocated double buffers for temporarily-defined memory needed in `MegatronFSDP` communications. Having persistent double buffers may increase peak VRAM utilization, but is required to register NCCL user buffers (`nccl_ub=True`) for `MegatronFSDP`. Currently, this is only supported for simple repetitive model structures such as GPT.
-    - **Only effective when using Megatron-LM.**
     - Defaults to `False`. Automatically overridden to `True` when `nccl_ub` is enabled.
 - `preproc_state_dict_for_dcp_ckpt` adds `model.state_dict()` and `optimizer.state_dict()` post-hooks that modify the model and optimizer state in preparation for `torch.distributed.checkpoint.{save,load}` ([Torch DCP](https://docs.pytorch.org/docs/stable/distributed.checkpoint.html)) checkpointing. Specifically, it adds `__create_write_items__` and `__create_chunk_list__` methods to Tensors utilized by Torch DCP to redistribute parameters when saving and loading model and optimizer checkpoints. Can be deactivated should the user need a custom distributed checkpointing strategy.
     - Defaults to `True`.
diff --git a/megatron/core/distributed/fsdp/src/megatron_fsdp/distributed_data_parallel_config.py b/megatron/core/distributed/fsdp/src/megatron_fsdp/distributed_data_parallel_config.py
index 86826758498..f0c817e1f80 100644
--- a/megatron/core/distributed/fsdp/src/megatron_fsdp/distributed_data_parallel_config.py
+++ b/megatron/core/distributed/fsdp/src/megatron_fsdp/distributed_data_parallel_config.py
@@ -131,6 +131,14 @@ class DistributedDataParallelConfig:
       when nccl_ub is set.
     """
 
+    fsdp_manual_registration: bool = False
+    """If true, manually register the FSDP communication buffers to NCCL user buffer.
+      This option is only effective when use_megatron_fsdp and nccl_ub is set.
+      For symmetric registration with large models, the registration itself can take 
+      a significant amount of time. This option minimizes the number of registration calls
+      to minimize the registration time.
+    """
+
     def __post_init__(self):
         import os
 
diff --git a/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py b/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py
index b0154cb94e9..46b97743385 100644
--- a/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py
+++ b/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py
@@ -1570,6 +1570,7 @@ def __init__(
             reset_parameters_for_meta_device_init_module
         )
         self.ubr_groups = None
+        self.already_registered = False
         # User buffer registration related settings
         if self.ddp_config.nccl_ub:
             assert nccl_allocator is not None, (
@@ -1676,6 +1677,10 @@ def get_mem_alloc_context(self, groups=None, symmetric=True):
                 groups = [self.dist_index.get_fsdp_group(is_expert_parallel=False)]
 
             if NCCL_ALLOCATOR == "MCORE":
+                if self.ddp_config.fsdp_manual_registration:
+                    return functools.partial(
+                        nccl_allocator.MemPoolAllocatorWithoutRegistration, NCCL_MEMORY_POOL
+                    )
                 if len(groups) == 1:
                     # register buffers to the default group directly using nccl memory allocator
                     mem_alloc_context = functools.partial(
@@ -1692,6 +1697,12 @@ def get_mem_alloc_context(self, groups=None, symmetric=True):
                         symmetric=symmetric,
                     )
             elif NCCL_ALLOCATOR == "APEX":
+                if self.ddp_config.fsdp_manual_registration:
+                    logging.warning(
+                        "FSDP manual registration is not supported for APEX NCCL allocator."
+                        "falling back to default registration. "
+                        "Please use Megatron Core NCCL allocator for manual registration."
+                    )
                 if symmetric:
                     logging.warning(
                         "Symmetric registration is not supported for APEX NCCL allocator."
@@ -1715,6 +1726,39 @@ def get_mem_alloc_context(self, groups=None, symmetric=True):
         else:
             return nullcontext
 
+    def manual_buffer_registration(self):
+        """
+        Manually register the FSDP communication buffers to NCCL user buffer.
+        """
+        assert self.ddp_config.nccl_ub, "NCCL UBR is not enabled"
+        assert self.ddp_config.fsdp_double_buffer, "FSDP double buffer is not enabled"
+        assert self.ddp_config.fsdp_manual_registration, "FSDP manual registration is not enabled"
+        assert not self.already_registered, "Mem pool is already registered"
+
+        self.already_registered = True
+
+        global NCCL_MEMORY_POOL
+        torch.cuda.synchronize()
+        torch.distributed.barrier(async_op=False)
+        torch.cuda.synchronize()
+
+        for group in self.ubr_groups:
+            if torch.distributed.get_rank() == 0:
+                logging.info(
+                    f"[MCORE][FSDP][Manual REG] Registering mem pool to group {group},"
+                    f"group.group_desc:{group.group_desc}, group.size(): {group.size()}"
+                )
+            nccl_allocator.register_mem_pool(
+                NCCL_MEMORY_POOL,
+                group,
+                symmetric=not self.ddp_config.disable_symmetric_registration,
+            )
+            if torch.distributed.get_rank() == 0:
+                logging.info(
+                    f"[MCORE][FSDP][Manual REG] Registered mem pool to group {group},"
+                    f"group.group_desc:{group.group_desc}, group.size(): {group.size()}"
+                )
+
     def _log_parameter_groups(self):
         """Compact log of FSDP parameter groups and their parameters."""
 
diff --git a/megatron/core/nccl_allocator.py b/megatron/core/nccl_allocator.py
index b46157e9d00..8eb4047634c 100644
--- a/megatron/core/nccl_allocator.py
+++ b/megatron/core/nccl_allocator.py
@@ -156,6 +156,37 @@ def init() -> None:
     logging.info(f"[MCORE][NCCL_ALLOCATOR] Initialized NCCL Allocator")
 
 
+# register_mem_pool/deregister_mem_pool are used for manual (de)registration of the memory pool.
+# They are used in the case of FSDP manual registration.
+def register_mem_pool(pool, group, symmetric=True):
+    """
+    Register a memory pool to a group.
+    symmetric: bool, this is for future use.
+    """
+    backend = group._get_backend(torch.device("cuda", torch.cuda.current_device()))
+    if symmetric:
+        try:
+            backend.register_mem_pool(pool, symm=symmetric)
+        except TypeError:
+            # Older PyTorch/APIs without 'symm' keyword.
+            logging.warning(
+                f"[MCORE][NCCL_ALLOCATOR] Failed in symmetric registration."
+                f"Falling back to registration api without 'symm' keyword!!"
+            )
+            backend.register_mem_pool(pool)
+    else:
+        backend.register_mem_pool(pool)
+
+
+def deregister_mem_pool(pool, group):
+    """
+    Deregister a memory pool from a group.
+    """
+    backend = group._get_backend(torch.device("cuda", torch.cuda.current_device()))
+    if pool.snapshot():
+        backend.deregister_mem_pool(pool)
+
+
 # Preserve the original APEX NCCL allocator interface for backward compatibility
 class nccl_mem:
     """
@@ -314,3 +345,20 @@ def __exit__(self, *args):
                     f"{repr(group)}({desc}) group!!"
                 )
         self.mem_context.__exit__(*args)
+
+
+class MemPoolAllocatorWithoutRegistration:
+    """
+    An allocator class that uses allocates memory without registering to any communication group.
+    Users are expected to register the memory manually to the communication groups.
+    """
+
+    def __init__(self, pool):
+        self.pool = pool
+        self.mem_context = torch.cuda.use_mem_pool(self.pool)
+
+    def __enter__(self):
+        self.mem_context.__enter__()
+
+    def __exit__(self, *args):
+        self.mem_context.__exit__(*args)
diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py
index b267c8a8170..0fc00bd91be 100644
--- a/megatron/training/arguments.py
+++ b/megatron/training/arguments.py
@@ -744,10 +744,14 @@ def validate_args(args, defaults={}):
 
         assert args.ckpt_format == "fsdp_dtensor", \
             "Megatron FSDP only supports fsdp_dtensor checkpoint format"
-
+        
         if args.use_megatron_fsdp:
             args.reuse_grad_buf_for_mxfp8_param_ag = False
 
+    if args.fsdp_manual_registration:
+        assert args.use_megatron_fsdp, "FSDP manual registration is only supported with Megatron FSDP"
+        assert args.nccl_ub, "FSDP manual registration is only supported with nccl-ub option"
+
     # Parameters dtype.
     args.params_dtype = torch.float
     if args.fp16:
@@ -2773,6 +2777,9 @@ def _add_distributed_args(parser):
     group.add_argument('--disable-symmetric-registration', action='store_true', dest='disable_symmetric_registration',
                        default=False, help='Disable symmetric (window) registration for NCCL userbuffer registration.'
                        'This option will force to use conventional (local) userbuffer registration when use-nccl-ub is set.')
+    group.add_argument('--fsdp-manual-registration', action='store_true', dest='fsdp_manual_registration',
+                       default=False, help='Manually register the FSDP communication buffers to NCCL user buffer.'
+                       'This option is only effective when use-megatron-fsdp and use-nccl-ub is set.')
     group.add_argument('--use-sharp', action='store_true', 
                        help='Required to enable SHARP communication.')
     group.add_argument('--sharp-enabled-group', type=str, default=None,
diff --git a/megatron/training/training.py b/megatron/training/training.py
index 459e77e6c81..f006772bbdd 100644
--- a/megatron/training/training.py
+++ b/megatron/training/training.py
@@ -2517,6 +2517,20 @@ def get_e2e_base_metrics():
 
         iteration += 1
 
+        # If requested, manually register FSDP communication buffers after a short warmup.
+        if (
+            getattr(args, "fsdp_manual_registration", False)
+            and getattr(args, "use_megatron_fsdp", False)
+            and iteration ==  start_iteration + 1
+        ):
+            for model_chunk in model:
+                if isinstance(model_chunk, megatron_FSDP) and getattr(
+                    model_chunk.ddp_config, "fsdp_manual_registration", False
+                ):
+                    pad_buf = getattr(model_chunk, "param_and_grad_buffer", None)
+                    if pad_buf is not None:
+                        pad_buf.manual_buffer_registration()
+
         if getattr(args, 'perform_rl_step', False) and args.rl_use_sequence_packing:
             iteration_sequences = rl_utils.get_iteration_sequence_count(args)
             # Track bins separately for packed mode
diff --git a/tests/unit_tests/distributed/test_mcore_fully_sharded_data_parallel.py b/tests/unit_tests/distributed/test_mcore_fully_sharded_data_parallel.py
index 3b41daf58ef..3f0cce4e40b 100644
--- a/tests/unit_tests/distributed/test_mcore_fully_sharded_data_parallel.py
+++ b/tests/unit_tests/distributed/test_mcore_fully_sharded_data_parallel.py
@@ -220,13 +220,16 @@ def train_step(model, optimizer, inputs):
 
     # Testing fsdp_double_buffer with and without nccl_ub
     @pytest.mark.parametrize(
-        ("dp_size", "nccl_ub", "fsdp_double_buffer"), [(8, False, True), (8, True, True)]
+        ("dp_size", "nccl_ub", "fsdp_double_buffer", "fsdp_manual_registration"),
+        [(8, False, True, False), (8, True, True, False), (8, True, True, True)],
     )
-    def test_fsdp_user_buffer_registration(self, dp_size, nccl_ub, fsdp_double_buffer):
+    def test_fsdp_user_buffer_registration(
+        self, dp_size, nccl_ub, fsdp_double_buffer, fsdp_manual_registration
+    ):
         """Test that FSDP works correctly with user buffer registration.
         This test compares the training results of the baseline fsdp with the target fsdp config.
-        Baseline fsdp: nccl_ub=False, fsdp_double_buffer=False
-        Target fsdp: nccl_ub=[True, False], fsdp_double_buffer=[True, False]
+        Baseline fsdp: nccl_ub=False, fsdp_double_buffer=False, fsdp_manual_registration=False
+        Target fsdp: nccl_ub=[True, False], fsdp_double_buffer=[True, False], fsdp_manual_registration=[True, False]
         """
         if not is_torch_min_version("2.4.0"):
             pytest.skip("Megatron FSDP requires torch >= 2.4.0")
@@ -264,6 +267,7 @@ def test_fsdp_user_buffer_registration(self, dp_size, nccl_ub, fsdp_double_buffe
             use_megatron_fsdp=True,
             nccl_ub=False,
             fsdp_double_buffer=False,
+            fsdp_manual_registration=False,
         )
 
         # Setup FSDP config - target fsdp config
@@ -275,6 +279,7 @@ def test_fsdp_user_buffer_registration(self, dp_size, nccl_ub, fsdp_double_buffe
             use_megatron_fsdp=True,
             nccl_ub=nccl_ub,
             fsdp_double_buffer=fsdp_double_buffer,
+            fsdp_manual_registration=fsdp_manual_registration,
         )
 
         # Create two identical models
@@ -354,6 +359,13 @@ def train_step(model, optimizer, inputs):
         out1, loss1 = train_step(baseline_fsdp_model, optimizer1, input_data)
         out2, loss2 = train_step(target_fsdp_model, optimizer2, input_data)
 
+        # In case of manual registration, we need to manually register the buffer
+        # And proceed one more step to check the results
+        if fsdp_manual_registration:
+            out1, loss1 = train_step(baseline_fsdp_model, optimizer1, input_data)
+            target_fsdp_model.manual_buffer_registration()
+            out2, loss2 = train_step(target_fsdp_model, optimizer2, input_data)
+
         testing.assert_close(out1, out2, rtol=0, atol=0)
         testing.assert_close(loss1, loss2, rtol=0, atol=0)
 

From 922e8e9080611d6432276115666659301f4f874f Mon Sep 17 00:00:00 2001
From: Charlie Truong <chtruong@nvidia.com>
Date: Tue, 30 Dec 2025 22:49:53 -0600
Subject: [PATCH 208/334] cp: Allow disabling external contributors (#2784)
 (#2786)

Signed-off-by: Charlie Truong <chtruong@nvidia.com>
---
 .github/workflows/cicd-main.yml | 38 +++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml
index a5a7a82287e..1ce96750a36 100644
--- a/.github/workflows/cicd-main.yml
+++ b/.github/workflows/cicd-main.yml
@@ -52,6 +52,7 @@ jobs:
     env:
       GITHUB_TOKEN: ${{ secrets.PAT }}
       REPO: ${{ github.repository }}
+      DISABLE_EXTERNAL_CONTRIBUTOR: ${{ vars.DISABLE_EXTERNAL_CONTRIBUTOR }}
     steps:
       - name: Checkout repository
         uses: actions/checkout@v4
@@ -86,6 +87,43 @@ jobs:
 
           # Use SSO membership check result
           IS_MEMBER="${{ steps.check-sso.outputs.is_member }}"
+
+          # If external contributor is disabled, check if user is a repo collaborator or an org collaborator to NVIDIA or NVIDIA-NeMo
+          if [ "${{ env.DISABLE_EXTERNAL_CONTRIBUTOR }}" == "true" ] && [ "${{ steps.check-sso.outputs.is_member }}" != "true" ]; then
+            PR_AUTHOR=${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').user.login }}
+
+            echo "Checking if $PR_AUTHOR is a repo collaborator..."
+            API_URL="https://api.github.com/repos/$REPO/collaborators/$PR_AUTHOR"
+            REPO_MEMBERSHIP_RESPONSE=$(curl -s -o /dev/null -w "%{http_code}" -L \
+              -H "Accept: application/vnd.github+json" \
+              -H "Authorization: Bearer $GITHUB_TOKEN" \
+              -H "X-GitHub-Api-Version: 2022-11-28" \
+              $API_URL)
+
+            echo "Checking if $PR_AUTHOR is an org collaborator to NVIDIA-NeMo..."
+            API_URL="https://api.github.com/orgs/NVIDIA-NeMo/members/$PR_AUTHOR"
+            ORG_NVIDIA_NEMO_MEMBERSHIP_RESPONSE=$(curl -s -o /dev/null -w "%{http_code}" -L \
+              -H "Accept: application/vnd.github+json" \
+              -H "Authorization: Bearer $GITHUB_TOKEN" \
+              -H "X-GitHub-Api-Version: 2022-11-28" \
+              $API_URL)
+
+            echo "Checking if $PR_AUTHOR is an org collaborator to NVIDIA..."
+            API_URL="https://api.github.com/orgs/NVIDIA/members/$PR_AUTHOR"
+            ORG_NVIDIA_MEMBERSHIP_RESPONSE=$(curl -s -o /dev/null -w "%{http_code}" -L \
+              -H "Accept: application/vnd.github+json" \
+              -H "Authorization: Bearer $GITHUB_TOKEN" \
+              -H "X-GitHub-Api-Version: 2022-11-28" \
+              $API_URL)
+
+            if [ "$REPO_MEMBERSHIP_RESPONSE" -eq 204 ] || [ "$ORG_NVIDIA_NEMO_MEMBERSHIP_RESPONSE" -eq 204 ] || [ "$ORG_NVIDIA_MEMBERSHIP_RESPONSE" -eq 204 ]; then
+              IS_MEMBER="true"
+            else
+              exit 1
+            fi
+          fi
+
+          # Use SSO membership check result
           if [ "$IS_MEMBER" == "true" ]; then
             echo "is_maintainer=true" | tee -a $GITHUB_OUTPUT
           else

From 5455f0a010eadc81d2de48b0b94dccafd7c08a2f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Sat, 3 Jan 2026 18:00:06 +0100
Subject: [PATCH 209/334] build: Pin down `nvidia-nvshmem-cu13` (#2798)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 docker/Dockerfile.ci.dev | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/Dockerfile.ci.dev b/docker/Dockerfile.ci.dev
index 482c6af460c..fa4d84bcad0 100644
--- a/docker/Dockerfile.ci.dev
+++ b/docker/Dockerfile.ci.dev
@@ -55,7 +55,7 @@ EOF
 COPY docker/patches/deepep.patch /workspace/deepep.patch
 RUN bash -ex <<"EOF"
     cd /workspace
-    uv pip install nvidia-nvshmem-cu13
+    uv pip install nvidia-nvshmem-cu13==3.4.5
     pushd /opt/venv/lib/python3.12/site-packages/nvidia/nvshmem/lib/
         ln -s libnvshmem_host.so.3 libnvshmem_host.so
     popd

From 71d5c84980aecd3be48ed4df368c70302f5560e3 Mon Sep 17 00:00:00 2001
From: Kunlun Li <94586211+kunlunl@users.noreply.github.com>
Date: Mon, 5 Jan 2026 14:07:54 +0800
Subject: [PATCH 210/334] [dev] Fix bug of reuse_grad_buf_for_mxfp8_param_ag
 (#2801)

Signed-off-by: kunlunl <kunlunl@nvidia.com>
---
 megatron/training/training.py | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/megatron/training/training.py b/megatron/training/training.py
index f006772bbdd..91cd420c214 100644
--- a/megatron/training/training.py
+++ b/megatron/training/training.py
@@ -1401,10 +1401,19 @@ def train_step(forward_step_func, data_iterator, model, optimizer, opt_param_sch
         # For the mxfp8_param with reuse_grad_buf_for_mxfp8_param_ag and dp_ag_overlap,
         # we need to call the _copy_main_params_to_param_buffer() after the grad buffer
         # is zeroed by zero_grad_buffer() because param and grad buffer are shared.
+        #
+        # However, we should skip this on the first iteration when forward_pre_hook is disabled,
+        # because:
+        # 1. The first iteration's params are already in param.data (from init or checkpoint).
+        # 2. Without forward_pre_hook, finish_param_sync() won't be called to zero the grad buffer,
+        #    so the main grads will be polluted by the main params.
         if args.reuse_grad_buf_for_mxfp8_param_ag and args.overlap_param_gather:
-            for optim_instance in optimizer.chained_optimizers:
-                if isinstance(optim_instance, DistributedOptimizer):
-                    optim_instance._copy_main_params_to_param_buffer()
+            # Check if forward_pre_hook is enabled by checking if hooks are registered.
+            forward_pre_hook_enabled = len(model[0].remove_forward_pre_hook_handles) > 0
+            if forward_pre_hook_enabled:
+                for optim_instance in optimizer.chained_optimizers:
+                    if isinstance(optim_instance, DistributedOptimizer):
+                        optim_instance._copy_main_params_to_param_buffer()
 
         # Forward pass.
         losses_reduced = forward_backward_func(

From 8b93e0d6ef0a5ca6ef3c1993b0728447a8ddc4b8 Mon Sep 17 00:00:00 2001
From: Pingtian Li <158665726+Wohox@users.noreply.github.com>
Date: Mon, 5 Jan 2026 16:08:58 +0800
Subject: [PATCH 211/334] [Dev] Partial CUDA Graph support for EP Overlap
 (#2168)

---
 .../common/model_chunk_schedule_plan.py       |  40 +-
 .../core/models/gpt/fine_grained_callables.py | 204 ++++++----
 megatron/core/pipeline_parallel/schedules.py  | 105 +++++
 megatron/core/pipeline_parallel/utils.py      |   4 +-
 megatron/core/transformer/cuda_graphs.py      |  84 +++-
 megatron/core/transformer/moe/moe_layer.py    |   7 +-
 .../core/transformer/transformer_config.py    |  15 +
 .../core/transformer/transformer_layer.py     |  36 ++
 .../test_cuda_graphed_schedule_chunk_1f1b.py  | 372 ++++++++++++++++++
 .../a2a_overlap/test_schedule_layer_1f1b.py   |   2 +-
 tests/unit_tests/a2a_overlap/utils.py         |   1 +
 .../pipeline_parallel/test_schedules.py       |  48 +++
 .../transformer/test_submodule_callables.py   |  16 +-
 13 files changed, 804 insertions(+), 130 deletions(-)
 create mode 100644 tests/unit_tests/a2a_overlap/test_cuda_graphed_schedule_chunk_1f1b.py

diff --git a/megatron/core/models/common/model_chunk_schedule_plan.py b/megatron/core/models/common/model_chunk_schedule_plan.py
index 486a498dd73..04ca580eeaa 100644
--- a/megatron/core/models/common/model_chunk_schedule_plan.py
+++ b/megatron/core/models/common/model_chunk_schedule_plan.py
@@ -17,6 +17,7 @@
     get_comm_stream,
     get_comp_stream,
 )
+from megatron.core.transformer.enums import CudaGraphScope
 
 
 class ModelChunkState:
@@ -37,23 +38,20 @@ class TransformerLayerSchedulePlan:
     mtp post process nodes.
 
     layer (TransformerLayerSchedulePlan)
-    ├── attn (TransformerLayerNode): attention module
-    ├── post_attn (TransformerLayerNode): layernorm -> router -> dispatch preprocess
+    ├── attn (TransformerLayerNode): attention -> router -> dispatch preprocess
     ├── moe_dispatch (TransformerLayerNode): dispatch All2All
     ├── mlp (TransformerLayerNode): mlp module
     ├── moe_combine (TransformerLayerNode): combine All2All
     └── mtp_post_process (PostProcessNode): mtp post process
 
     Note that MTP layer has the same operation and execution order with TransformerLayer regarding
-    post_attn, moe_dispatch, mlp, moe_combine, but contains extra operations in attn and
-    mtp_post_process:
+    moe_dispatch, mlp, moe_combine, but contains extra operations in attn and mtp_post_process:
     * mtp.attn wraps around transformer_layer.attn with extra norm, proj and embedding operations.
     * mtp.mtp_post_process contains output_layer, mtp loss operations, whereas
       transformer_layer.mtp_post_process is empty.
     """
 
     attn = None
-    post_attn = None
     moe_dispatch = None
     mlp = None
     moe_combine = None
@@ -117,7 +115,7 @@ def release_state(self):
     def _build_callable_nodes(self, event, comp_stream, comm_stream, extra_args):
         """
         Builds the callable nodes for the transformer/mtp layer:
-            attn, post_attn, mlp, moe_dispatch and moe_combine, and mtp_post_process.
+            attn, mlp, moe_dispatch and moe_combine, and mtp_post_process.
         """
         from megatron.core.models.gpt.fine_grained_callables import (
             TransformerLayerNode,
@@ -137,16 +135,7 @@ def _build_callable_nodes(self, event, comp_stream, comm_stream, extra_args):
             else isinstance(self.layer.mlp, MoELayer)
         )
 
-        enable_deepep = (
-            self.layer.config.moe_token_dispatcher_type == "flex"
-            and self.layer.config.moe_flex_dispatcher_backend == "deepep"
-        )
-        enable_hybridep = (
-            self.layer.config.moe_token_dispatcher_type == "flex"
-            and self.layer.config.moe_flex_dispatcher_backend == "hybridep"
-        )
-        extra_args["enable_deepep"] = enable_deepep
-        extra_args["enable_hybridep"] = enable_hybridep
+        extra_args["config"] = self.layer.config
         extra_args["is_moe"] = is_moe
         extra_args["delay_wgrad_compute"] = self.layer.config.delay_wgrad_compute
         extra_args["is_mtp"] = is_mtp
@@ -167,7 +156,6 @@ def create_node(stream, module, name):
 
         (
             attn_module,
-            post_attn_module,
             moe_dispatch_module,
             mlp_module,
             moe_combine_module,
@@ -179,11 +167,9 @@ def create_node(stream, module, name):
         self.attn = create_node(comp_stream, attn_module, "attn")
         self.mlp = create_node(comp_stream, mlp_module, "mlp")
         if is_moe:
-            self.post_attn = create_node(comp_stream, post_attn_module, "post_attn")
             self.moe_dispatch = create_node(comm_stream, moe_dispatch_module, "moe_dispatch")
             self.moe_combine = create_node(comm_stream, moe_combine_module, "moe_combine")
         else:
-            self.post_attn = NoopScheduleNode()
             self.moe_dispatch = NoopScheduleNode()
             self.moe_combine = NoopScheduleNode()
 
@@ -194,6 +180,11 @@ def create_node(stream, module, name):
         else:
             self.mtp_post_process = NoopScheduleNode()
 
+        # mlp and combine may receive dgrad from attn, which is managed by cuda graph.
+        if CudaGraphScope.attn in self.config.cuda_graph_scope:
+            self.mlp.manual_grads_release = False
+            self.moe_combine.manual_grads_release = False
+
     def get_fp8_context(self):
         """
         Get the fp8 context for the transformer layer.
@@ -216,8 +207,8 @@ def run(f_layer, b_layer, f_input=None, b_grad=None, is_last_layer_in_bwd=False)
         to maximize parallelism and efficiency.
 
         When f_layer and b_layer are not None, forward and backward pass are overlapped as follows:
-        comm_stream: combine_bwd            | dispatch_fwd->dispatch_bwd  | combine_fwd
-        comp_stream: attn_fwd->post_attn_fwd| mlp_bwd->mlp_bwd_dw->mlp_fwd| post_attn_bwd->attn_bwd
+        comm_stream: combine_bwd | dispatch_fwd->dispatch_bwd  | combine_fwd
+        comp_stream: attn_fwd    | mlp_bwd->mlp_bwd_dw->mlp_fwd| attn_bwd
         For MTP, mtp_post_process_fwd is executed after the combine_fwd in the comp_stream,
         and mtp_post_process_bwd is executed before the combine_bwd in the comp_stream.
 
@@ -240,7 +231,6 @@ def run(f_layer, b_layer, f_input=None, b_grad=None, is_last_layer_in_bwd=False)
         if f_layer is not None:
             with f_layer.get_fp8_context():
                 f_input = f_layer.attn.forward(f_input)
-                f_input = f_layer.post_attn.forward(f_input)
 
         if b_layer is not None:
             b_grad = b_layer.mlp.backward(b_grad)
@@ -254,7 +244,6 @@ def run(f_layer, b_layer, f_input=None, b_grad=None, is_last_layer_in_bwd=False)
             b_grad = b_layer.moe_dispatch.backward(b_grad)
 
         if b_layer is not None and b_layer.config.ep_overlap_early_attn_memory_release:
-            b_grad = b_layer.post_attn.backward(b_grad)
             b_grad = b_layer.attn.backward(b_grad)
 
         if f_layer is not None:
@@ -267,7 +256,6 @@ def run(f_layer, b_layer, f_input=None, b_grad=None, is_last_layer_in_bwd=False)
                 f_input = f_layer.mtp_post_process.forward(f_input)
 
         if b_layer is not None and not b_layer.config.ep_overlap_early_attn_memory_release:
-            b_grad = b_layer.post_attn.backward(b_grad)
             b_grad = b_layer.attn.backward(b_grad)
 
         # Delay the last attn_dw in backward pass (attn_dw of the first layer)
@@ -369,6 +357,10 @@ def __init__(
                 model, self._model_chunk_state, self._event, comp_stream
             )
 
+        # preprocess may receive dgrad from attn, which is managed by cuda graph.
+        if CudaGraphScope.attn in model.config.cuda_graph_scope:
+            self.pre_process.manual_grads_release = False
+
     def _build_layer_schedule_plan(self, module, comp_stream, comm_stream):
         if module is None:
             return
diff --git a/megatron/core/models/gpt/fine_grained_callables.py b/megatron/core/models/gpt/fine_grained_callables.py
index a0be55c4ca1..ab76659d01b 100644
--- a/megatron/core/models/gpt/fine_grained_callables.py
+++ b/megatron/core/models/gpt/fine_grained_callables.py
@@ -6,14 +6,15 @@
 from typing import Optional
 
 import torch
+from torch import Tensor
 
 from megatron.core import tensor_parallel
+from megatron.core.packed_seq_params import PackedSeqParams
 from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
     fine_grained_offloading_group_commit,
-    fine_grained_offloading_group_start,
-    get_fine_grained_offloading_context,
 )
 from megatron.core.pipeline_parallel.utils import ScheduleNode, make_viewless
+from megatron.core.transformer.enums import CudaGraphScope
 from megatron.core.transformer.module import float16_to_fp32
 from megatron.core.transformer.moe.moe_layer import MoELayer
 from megatron.core.transformer.multi_token_prediction import (
@@ -42,14 +43,13 @@ def wrapped_func(*args, **kwarg):
 
 
 @internal_api
-def should_free_input(name, is_moe, enable_deepep, enable_hybridep):
+def should_free_input(name, is_moe, config):
     """Determine if the node should free its input memory.
 
     Args:
         name: Node name
         is_moe: Whether it's a MoE model
-        enable_deepep: Whether to use DeepEP dispatcher
-        enable_hybridep: Whether to use HybridEP dispatcher
+        config: TransformerConfig object
 
     Returns:
         bool: Whether to free input memory
@@ -57,6 +57,14 @@ def should_free_input(name, is_moe, enable_deepep, enable_hybridep):
     # For dense layers [attn, fake, mlp, fake], the input is needed during backward pass
     if not is_moe:
         return False
+    enable_deepep = (
+        config.moe_token_dispatcher_type == "flex"
+        and config.moe_flex_dispatcher_backend == "deepep"
+    )
+    enable_hybridep = (
+        config.moe_token_dispatcher_type == "flex"
+        and config.moe_flex_dispatcher_backend == "hybridep"
+    )
     # Define which nodes should free input memory
     # Since we split the computing graph into multiple nodes, we can manually control
     # when and how to free the input memory.
@@ -69,7 +77,10 @@ def should_free_input(name, is_moe, enable_deepep, enable_hybridep):
         # and probs before dispatch A2A and it's not needed anymore after the forward pass
         # For DeepEP and HybridEP dispatcher mode, they are both needed in backward pass
         # and cannot be freed.
-        "moe_dispatch": not (enable_deepep or enable_hybridep),
+        # If moe_preprocess is in cuda graph scope, tokens and probs are fixed size tensors,
+        # so they cannot be freed.
+        "moe_dispatch": not (enable_deepep or enable_hybridep)
+        and (CudaGraphScope.moe_preprocess not in config.cuda_graph_scope),
     }
 
     return free_input_nodes.get(name, False)
@@ -232,13 +243,13 @@ def __init__(
             it's the per_batch_state_context, o.w. nullcontext
             name (str): Node name, also used to determine memory strategy
             bwd_dw_callables (list): List of weight gradient functions for the layer.
-            extra_args (dict): Extra arguments for nodes: is_moe, enable_deepep, enable_hybridep.
+            extra_args (dict): Extra arguments for the node: is_moe, config.
         """
         # determine whether to free input memory
+        config = extra_args.get("config", None)
+        assert config is not None, "model config must be passed to TransformerLayerNode."
         is_moe = extra_args.get("is_moe", False)
-        enable_deepep = extra_args.get("enable_deepep", False)
-        enable_hybridep = extra_args.get("enable_hybridep", False)
-        free_input = should_free_input(name, is_moe, enable_deepep, enable_hybridep)
+        free_input = should_free_input(name, is_moe, config)
         self.delay_wgrad_compute = extra_args.get("delay_wgrad_compute", False)
 
         super().__init__(
@@ -303,8 +314,8 @@ def backward_dw(self):
                 module.backward_dw()
 
         # the output grad memory is last used in wgrad compute, should be safe to release.
-        assert self.delay_grads_release, "output grad memory should be valid before wgrad."
-        if self.manual_release_grads:
+        if self.manual_grads_release:
+            assert self.delay_grads_release, "output grad memory should be valid before wgrad."
             for tensor in self.output_grads:
                 tensor.untyped_storage().resize_(0)
         self.output_grads = None
@@ -357,11 +368,95 @@ def build_transformer_layer_callables(layer: TransformerLayer):
         and layer.config.moe_flex_dispatcher_backend == "hybridep"
     )
 
+    class _BackwardDWWrapper:
+        def __init__(self):
+            self.graphed_backward_dw_callable = None
+            self.attn_dw_callable = layer.self_attention.backward_dw
+            if isinstance(layer.mlp, MoELayer):
+                self.shared_expert_dw_callable = partial(
+                    layer.mlp.backward_dw, routed_experts=False, shared_experts=True
+                )
+            else:
+                self.shared_expert_dw_callable = None
+            self.cuda_graph_scope = layer.config.cuda_graph_scope
+
+        def set_graphed_backward_dw_callable(self, graphed_backward_dw_callable):
+            """Store the CUDA graphed backward weight gradient callable."""
+            self.graphed_backward_dw_callable = graphed_backward_dw_callable
+
+        def backward_dw(self):
+            """Execute weight gradients, skipping CUDA graphed components during replay."""
+            is_replay = hasattr(layer, 'cuda_graphs') and layer.cuda_graphs
+            if self.shared_expert_dw_callable is not None and (
+                not is_replay or CudaGraphScope.moe_router not in self.cuda_graph_scope
+            ):
+                self.shared_expert_dw_callable()
+            if not is_replay or CudaGraphScope.attn not in self.cuda_graph_scope:
+                self.attn_dw_callable()
+            if is_replay and self.graphed_backward_dw_callable is not None:
+                self.graphed_backward_dw_callable()
+
+    attn_backward_dw_wrapper = _BackwardDWWrapper()
+
     def submodule_attn_forward(node: ScheduleNode, hidden_states: torch.Tensor):
         """
-        Performs same attnention forward logic as GPT Model.
+        Performs same attnention forward logic as GPT Model and forward pass for
+        computations between attention and dispatch:
+            pre mlp layernorm->router->dispatch preprocess
         """
-        hidden_states, _ = layer._forward_attention(
+
+        if hasattr(layer, 'cuda_graphs') and layer.cuda_graphs:
+            assert (
+                CudaGraphScope.mlp not in layer.config.cuda_graph_scope
+                and CudaGraphScope.moe not in layer.config.cuda_graph_scope
+            ), (
+                "Supported CUDA graph scope with EP overlap: "
+                "attn, moe_router, moe_preprocess, mlp, got {}".format(
+                    layer.config.cuda_graph_scope
+                )
+            )
+            forward_func = layer._te_cuda_graph_replay
+            attn_backward_dw_wrapper.set_graphed_backward_dw_callable(
+                partial(layer.backward_dw_cudagraph, layer.current_microbatch)
+            )
+        else:
+            # wrapper function that keeps consistent api with cuda graph replay
+            def forward_func(
+                hidden_states: Tensor,
+                attention_mask: Optional[Tensor] = None,
+                rotary_pos_emb: Optional[Tensor] = None,
+                rotary_pos_cos: Optional[Tensor] = None,
+                rotary_pos_sin: Optional[Tensor] = None,
+                packed_seq_params: Optional[PackedSeqParams] = None,
+                sequence_len_offset: Optional[Tensor] = None,
+            ):
+                hidden_states, _ = layer._forward_attention(
+                    hidden_states=hidden_states,
+                    attention_mask=attention_mask,
+                    rotary_pos_emb=rotary_pos_emb,
+                    rotary_pos_cos=rotary_pos_cos,
+                    rotary_pos_sin=rotary_pos_sin,
+                    packed_seq_params=packed_seq_params,
+                    sequence_len_offset=sequence_len_offset,
+                )
+                if not isinstance(layer.mlp, MoELayer):
+                    return hidden_states, None, None, None
+                if layer.recompute_pre_mlp_layernorm:
+                    layer.pre_mlp_norm_checkpoint = tensor_parallel.CheckpointWithoutOutput()
+                    pre_mlp_layernorm_output = layer.pre_mlp_norm_checkpoint.checkpoint(
+                        layer.pre_mlp_layernorm, hidden_states
+                    )
+                else:
+                    pre_mlp_layernorm_output = layer.pre_mlp_layernorm(hidden_states)
+
+                shared_expert_output = layer.mlp.shared_experts_compute(pre_mlp_layernorm_output)
+                probs, routing_map = layer.mlp.route(pre_mlp_layernorm_output)
+                local_tokens, probs, _ = layer.mlp.preprocess(
+                    pre_mlp_layernorm_output, probs, routing_map
+                )
+                return hidden_states, local_tokens, probs, shared_expert_output
+
+        hidden_states, local_tokens, probs, shared_expert_output = forward_func(
             hidden_states=hidden_states,
             attention_mask=node.chunk_state.attention_mask,
             rotary_pos_emb=node.chunk_state.rotary_pos_emb,
@@ -370,33 +465,14 @@ def submodule_attn_forward(node: ScheduleNode, hidden_states: torch.Tensor):
             packed_seq_params=node.chunk_state.packed_seq_params,
             sequence_len_offset=node.chunk_state.sequence_len_offset,
         )
-        return hidden_states
-
-    def submodule_post_attn_forward(node: ScheduleNode, hidden_states: torch.Tensor):
-        """
-        Run forward pass for computations between attention and dispatch:
-            pre mlp layernorm->router->dispatch preprocess
-        """
-        if layer.offload_mlp_norm:
-            hidden_states = fine_grained_offloading_group_start(hidden_states, name="mlp_norm")
-        if layer.recompute_pre_mlp_layernorm:
-            layer.pre_mlp_norm_checkpoint = tensor_parallel.CheckpointWithoutOutput()
-            with get_fine_grained_offloading_context(layer.offload_mlp_norm):
-                pre_mlp_layernorm_output = layer.pre_mlp_norm_checkpoint.checkpoint(
-                    layer.pre_mlp_layernorm, hidden_states
-                )
-        else:
-            with get_fine_grained_offloading_context(layer.offload_mlp_norm):
-                pre_mlp_layernorm_output = layer.pre_mlp_layernorm(hidden_states)
-
-        probs, routing_map = layer.mlp.route(pre_mlp_layernorm_output)
-        local_tokens, probs, _ = layer.mlp.preprocess(pre_mlp_layernorm_output, probs, routing_map)
+        if not isinstance(layer.mlp, MoELayer):
+            return hidden_states
 
         # Detach here for mlp_bda residual connection
         node.layer_state.residual = node.detach(hidden_states)
         if layer.mlp.use_shared_expert and not layer.mlp.shared_expert_overlap:
-            # Detach here for shared expert connection
-            node.layer_state.pre_mlp_layernorm_output = node.detach(pre_mlp_layernorm_output)
+            # Detach here for shared expert connection in moe_combine
+            node.layer_state.shared_expert_output = node.detach(shared_expert_output)
 
         return local_tokens, probs
 
@@ -421,7 +497,6 @@ def submodule_moe_forward(node: ScheduleNode, dispatched_tokens: torch.Tensor):
         Run forward pass for computations between dispatch and combine:
             post dispatch->experts->combine preprocess
         """
-        shared_expert_output = None
         dispatched_probs = node.layer_state.dispatched_probs
         token_dispatcher = layer.mlp.token_dispatcher
         if enable_deepep or enable_hybridep:
@@ -429,10 +504,8 @@ def submodule_moe_forward(node: ScheduleNode, dispatched_tokens: torch.Tensor):
             # backward graph from connecting to dispatch submodule
             token_dispatcher._comm_manager.dispatched_probs = dispatched_probs
 
-        pre_mlp_layernorm_output = getattr(node.layer_state, 'pre_mlp_layernorm_output', None)
-        shared_expert_output = layer.mlp.shared_experts_compute(pre_mlp_layernorm_output)
-        expert_output, mlp_bias = layer.mlp.routed_experts_compute(
-            dispatched_tokens, dispatched_probs, pre_mlp_layernorm_output
+        expert_output, _ = layer.mlp.routed_experts_compute(
+            dispatched_tokens, dispatched_probs, None
         )
 
         if layer.recompute_pre_mlp_layernorm:
@@ -442,16 +515,10 @@ def submodule_moe_forward(node: ScheduleNode, dispatched_tokens: torch.Tensor):
         # release tensor reference after use
         node.layer_state.dispatched_probs = None
         node.layer_state.pre_mlp_layernorm_output = None
-        if shared_expert_output is None:
-            # Return only expert_output, since shared_expert_output causes backward on None
-            return expert_output
-        return expert_output, shared_expert_output
-
-    def submodule_combine_forward(
-        node: ScheduleNode,
-        output: torch.Tensor,
-        shared_expert_output: Optional[torch.Tensor] = None,
-    ):
+
+        return expert_output
+
+    def submodule_combine_forward(node: ScheduleNode, output: torch.Tensor):
         """
         # Triggers token combine and the remaining computation in the transformer layer.
         # The `mlp_bda` computation is placed after `mlp.combine` due to data dependency.
@@ -461,10 +528,11 @@ def submodule_combine_forward(
         # with another microbatch's computation and expose the communication.
         """
         residual = node.layer_state.residual
-
+        shared_expert_output = getattr(node.layer_state, 'shared_expert_output', None)
         output = layer.mlp.combine(output, shared_expert_output)
         mlp_output_with_bias = (output, None)
-
+        if hasattr(layer, 'cuda_graphs') and layer.cuda_graphs:
+            layer.mlp.cudagraph_tensor_store.clear()
         with layer.bias_dropout_add_exec_handler():
             hidden_states = layer.mlp_bda(layer.training, layer.config.bias_dropout_fusion)(
                 mlp_output_with_bias, residual, layer.hidden_dropout
@@ -500,13 +568,12 @@ def raise_not_implemented(*args):
 
     # Build forward and backward callable functions
     attn_func = submodule_attn_forward
-    post_attn_func = submodule_post_attn_forward if is_moe else raise_not_implemented
     dispatch_func = submodule_dispatch_forward if is_moe else raise_not_implemented
     mlp_func = submodule_moe_forward if is_moe else mlp_wrapper
     combine_func = submodule_combine_forward if is_moe else raise_not_implemented
 
-    forward_funcs = [attn_func, post_attn_func, dispatch_func, mlp_func, combine_func, None]
-    backward_dw = {"attn": layer.self_attention, "mlp": layer.mlp}
+    forward_funcs = [attn_func, dispatch_func, mlp_func, combine_func, None]
+    backward_dw = {"attn": attn_backward_dw_wrapper, "mlp": layer.mlp}
     return forward_funcs, backward_dw
 
 
@@ -518,9 +585,7 @@ def build_mtp_layer_callables(layer):
     """
 
     forward_funcs, backward_dw = build_transformer_layer_callables(layer.transformer_layer)
-    attn_forward, post_attn_forward, dispatch_forward, mlp_forward, combine_forward, _ = (
-        forward_funcs
-    )
+    attn_forward, dispatch_forward, mlp_forward, combine_forward, _ = forward_funcs
     is_moe = isinstance(layer.transformer_layer.mlp, MoELayer)
     assert is_moe, "MTP layer in a2a overlap only supports MoE layer for now."
 
@@ -581,24 +646,17 @@ def rng_context_wrapper(func, *args, **kwargs):
     # Build forward and backward callable functions
     # attn_forward already has rng context, no need to wrap
     attn_func = submodule_mtp_attn_forward
-    post_attn_func = partial(rng_context_wrapper, post_attn_forward)
     dispatch_func = partial(rng_context_wrapper, dispatch_forward)
     mlp_func = partial(rng_context_wrapper, mlp_forward)
     combine_func = partial(rng_context_wrapper, combine_forward)
     mtp_post_process_func = submodule_mtp_postprocess_forward
 
-    forward_funcs = [
-        attn_func,
-        post_attn_func,
-        dispatch_func,
-        mlp_func,
-        combine_func,
-        mtp_post_process_func,
-    ]
-    backward_dw = {
-        "attn": [layer.transformer_layer.self_attention, layer.eh_proj],
-        "mlp": layer.transformer_layer.mlp,
-    }
+    forward_funcs = [attn_func, dispatch_func, mlp_func, combine_func, mtp_post_process_func]
+    if isinstance(backward_dw["attn"], list):
+        backward_dw["attn"].append(layer.eh_proj)
+    else:
+        backward_dw["attn"] = [backward_dw["attn"], layer.eh_proj]
+
     return forward_funcs, backward_dw
 
 
diff --git a/megatron/core/pipeline_parallel/schedules.py b/megatron/core/pipeline_parallel/schedules.py
index a8fdf2324f2..c41a09ea594 100644
--- a/megatron/core/pipeline_parallel/schedules.py
+++ b/megatron/core/pipeline_parallel/schedules.py
@@ -2,6 +2,7 @@
 
 import contextlib
 from functools import partial
+from itertools import zip_longest
 from typing import Callable, Iterator, List, Optional, Union
 
 import torch
@@ -843,6 +844,110 @@ def convert_schedule_table_to_order(num_warmup_microbatches, num_model_chunks, s
     return order
 
 
+def get_overlap_moe_expert_parallel_comm_order(order, num_layers_per_chunk, capture_wgrad_graph):
+    """
+    This functions gets the order for overlap_moe_expert_parallel_comm schedule for the original
+    chunk-wise order list. Each chunk is transformered to chunks with only 1 layer so that
+    layers between 2 chunks can now overlap with each other while following the graph order.
+    If capture_wgrad_graph is True, the wgrad backward graph is also added to the order by
+    decreasing the layer id by 0.5.
+
+    Args:
+        order (List[int]): The original chunk-wise order list. Positive values represent forward
+            passes for chunks, negative values represent backward passes. The absolute value
+            indicates the chunk ID (1-indexed).
+        num_layers_per_chunk (List[int]): Number of graphable layers in each chunk. The length
+            of this list equals the number of chunks.
+        capture_wgrad_graph (bool): If True, weight gradient computation graphs are added to the
+            order by appending entries with layer_id - 0.5.
+
+    Returns:
+        Tuple[List[float], List[Optional[List[int]]]]: A tuple containing:
+            - new_order: The layer-wise order list where each chunk is expanded to individual
+              layers. Positive values are forward passes, negative values are backward passes.
+              Values with .5 suffix indicate weight gradient computations.
+            - chunk_id_list: A list parallel to new_order. For forward passes, contains
+              [chunk_id, layer_index_within_chunk]. For backward passes, contains None.
+
+    Example:
+        original_order: [1, 2, -2, 1, -1, -1]
+        num_layers_per_chunk: [1, 2]
+        capture_wgrad_graph=True:
+            new_order: [1, 2, 3, 1, -3, -3.5, -2, -2.5, -1, -1.5, -1, -1.5]
+            chunk_id_list: [[0, 0], [1, 0], [1, 1], [0, 0], None,
+                            None, None, None, None, None, None, None]
+        capture_wgrad_graph=False:
+            new_order: [1, 2, 3, 1, -3, -2, -1, -1]
+            chunk_id_list: [[0, 0], [1, 0], [1, 1], [0, 0], None, None, None, None]
+    """
+
+    def _add_order(new_order, chunk_id_list, c_id, layer_id, is_wgrad=False, index=None):
+        if is_wgrad:
+            new_order.append(layer_id - 0.5)
+        else:
+            new_order.append(layer_id)
+        if c_id > 0:
+            chunk_id_list.append([abs(c_id) - 1, index])
+        else:
+            chunk_id_list.append(None)
+
+    new_order = []
+    chunk_id_list = []
+    add_order = partial(_add_order, new_order, chunk_id_list)
+    first_backward_idx, last_forward_idx = None, None
+    for idx, c_id in enumerate(order):
+        if first_backward_idx is None and c_id < 0:
+            first_backward_idx = idx
+        if c_id > 0:
+            last_forward_idx = idx
+
+    def get_layer_range(c_id):
+        num_layers = num_layers_per_chunk[abs(c_id) - 1]
+        num_layers_previous_chunks = sum(num_layers_per_chunk[: abs(c_id) - 1])
+        if c_id > 0:
+            return list(
+                range(num_layers_previous_chunks + 1, num_layers_previous_chunks + num_layers + 1)
+            )
+        return list(range(-num_layers_previous_chunks - num_layers, -num_layers_previous_chunks))
+
+    # warmup stage
+    for c_id in order[:first_backward_idx]:
+        layer_range = get_layer_range(c_id)
+        new_order += layer_range
+        chunk_id_list.extend([abs(c_id) - 1, i] for i in range(len(layer_range)))
+
+    # 1f1b overlap stage
+    if first_backward_idx < last_forward_idx:
+        for c_id_b, c_id_f in zip(
+            order[first_backward_idx : last_forward_idx + 1 : 2],
+            order[first_backward_idx + 1 : last_forward_idx + 1 : 2],
+        ):
+            layer_range_f = get_layer_range(c_id_f)
+            layer_range_b = get_layer_range(c_id_b)
+            index = 0
+            for l_b, l_f in zip_longest(layer_range_b, layer_range_f, fillvalue=0):
+                # always forward graph before backward graph
+                if l_f != 0:
+                    add_order(c_id_f, l_f, index=index)
+                if l_b != 0:
+                    add_order(c_id_b, l_b)
+                    if capture_wgrad_graph and index < len(layer_range_b) - 1:
+                        add_order(c_id_b, l_b, is_wgrad=True)
+                index += 1
+            # last wgrad backward
+            if capture_wgrad_graph and layer_range_b:
+                add_order(c_id_b, layer_range_b[-1], is_wgrad=True)
+
+    # cool down stage, backward graphs only
+    for c_id in order[last_forward_idx + 1 :]:
+        for l_b in get_layer_range(c_id):
+            add_order(c_id, l_b)
+            if capture_wgrad_graph:
+                add_order(c_id, l_b, is_wgrad=True)
+
+    return new_order, chunk_id_list
+
+
 def forward_backward_pipelining_with_interleaving(
     *,
     forward_step_func,
diff --git a/megatron/core/pipeline_parallel/utils.py b/megatron/core/pipeline_parallel/utils.py
index e7e416f99bd..d38f6d702c0 100644
--- a/megatron/core/pipeline_parallel/utils.py
+++ b/megatron/core/pipeline_parallel/utils.py
@@ -182,8 +182,8 @@ def __init__(
         self.free_input = free_input
         self.inputs = None
         self.outputs = None
+        self.manual_grads_release = False
         self.delay_grads_release = False
-        self.manual_release_grads = False
 
     def default_backward_func(self, outputs, output_grad):
         """Default backward function"""
@@ -269,7 +269,7 @@ def _backward(self, *output_grad):
                     # to avoid delayed garbage collection. If
                     # delay_grads_release is True, dgrad is last used in
                     # wgrad compute and skip the release here.
-                    if self.manual_release_grads and not self.delay_grads_release:
+                    if self.manual_grads_release and not self.delay_grads_release:
                         g.untyped_storage().resize_(0)
 
         grads = self.get_grad()
diff --git a/megatron/core/transformer/cuda_graphs.py b/megatron/core/transformer/cuda_graphs.py
index 27e6c65c738..b566c1830dc 100644
--- a/megatron/core/transformer/cuda_graphs.py
+++ b/megatron/core/transformer/cuda_graphs.py
@@ -10,6 +10,7 @@
 from contextlib import nullcontext
 from dataclasses import fields, is_dataclass
 from enum import Enum
+from math import ceil
 from typing import Any, Dict, List, Optional
 
 import torch
@@ -1510,7 +1511,7 @@ def graphs_created(self):
         """
         return self._graphs_created
 
-    def _get_sample_arguments(self, order):
+    def _get_sample_arguments(self, order, chunk_id_list=None):
         """
         Generate sample arguments and keyword arguments for CUDA Graph capturing with
         memory-optimized buffer reuse.
@@ -1539,6 +1540,9 @@ def _get_sample_arguments(self, order):
             order (List[int]): The forward/backward execution order from
                 convert_schedule_table_to_order(). Positive integers represent forward passes
                 (1-indexed chunk ID), negative integers represent backward passes.
+            chunk_id_list (List[Tuple[int, int]]): The list of chunk IDs and layer IDs in the
+                order. This is useful only when overlap_moe_expert_parallel_comm is enabled,
+                the order maps each layers' idx to their original chunk id.
 
         Returns:
             Tuple[List[Tuple], List[Dict]]: A tuple containing:
@@ -1560,9 +1564,11 @@ def _get_sample_arguments(self, order):
         assert self.num_model_chunks == max(
             order
         ), "num_model_chunks must match the max chunk id in order."
-        assert (
-            self.num_microbatches == len(order) // self.num_model_chunks // 2
-        ), "num_microbatches must match the number of microbatches in order."
+        if chunk_id_list is None:
+            # check only if 1f1b overlap is disabled.
+            assert (
+                self.num_microbatches == len(order) // self.num_model_chunks // 2
+            ), "num_microbatches must match the number of microbatches in order."
 
         # Generate sample arguments and keyword arguments for capturing.
         sample_args = [None] * (len(self.flattened_callables) * self.num_microbatches)
@@ -1645,8 +1651,8 @@ def get_rotary_pos_emb(transformer_module, transformer_input):
         consumed_sample_queue = {}
         layer_sample_keys_cache = {}
         fwd_idx = [0] * self.num_model_chunks
-        for chunk_id in order:
-            model_chunk_idx = abs(chunk_id) - 1
+        for idx, chunk_id in enumerate(order):
+            model_chunk_idx = abs(ceil(chunk_id)) - 1
 
             if chunk_id > 0:
                 if model_chunk_idx not in fwd_sample_queues:
@@ -1655,7 +1661,14 @@ def get_rotary_pos_emb(transformer_module, transformer_input):
                 sample_start_idx = (prefix_num_layers[model_chunk_idx] * self.num_microbatches) + (
                     fwd_idx[model_chunk_idx] * self.num_layers_per_chunk[model_chunk_idx]
                 )
-                for layer_idx, layer in enumerate(self.callables_per_chunk[model_chunk_idx]):
+                if chunk_id_list:
+                    model_chunk_idx = chunk_id_list[idx][0]
+                    callables_curr_chunk = [
+                        self.callables_per_chunk[model_chunk_idx][chunk_id_list[idx][1]]
+                    ]
+                else:
+                    callables_curr_chunk = self.callables_per_chunk[model_chunk_idx]
+                for layer_idx, layer in enumerate(callables_curr_chunk):
                     per_callable_fwd_idx = sample_start_idx + layer_idx
 
                     # Get sample_args and sample_kwargs for index per_callable_fwd_idx.
@@ -1692,7 +1705,7 @@ def get_rotary_pos_emb(transformer_module, transformer_input):
                         # reuse the static inputs of a previous forward pass for this forward pass.
                         # If not, we still need to generate the new static inputs.
                         sample_keys = layer_sample_keys_cache[id(layer)]
-
+                    model_chunk_idx = abs(chunk_id) - 1
                     fwd_sample_queues[model_chunk_idx].append((sample_keys, per_callable_fwd_idx))
                     if consumed_sample_queue.get(sample_keys, []):
                         # We can reuse the static inputs of a previous forward pass for this
@@ -1714,13 +1727,16 @@ def get_rotary_pos_emb(transformer_module, transformer_input):
                         # Unfortunately, no previous static inputs are available for reuse,
                         # sample_args is still None. Last attempt: generate the new static inputs
                         # for this forward pass.
+                        if chunk_id_list:
+                            model_chunk_idx = chunk_id_list[idx][0]
                         sample_args[per_callable_fwd_idx], sample_kwargs[per_callable_fwd_idx] = (
                             _get_layer_static_inputs(
                                 layer, self.chunks_with_decoder[model_chunk_idx]
                             )
                         )
+                        model_chunk_idx = abs(chunk_id) - 1
                 fwd_idx[model_chunk_idx] += 1
-            else:
+            elif ceil(chunk_id) == chunk_id:
                 num_consumed_samples = min(
                     len(fwd_sample_queues[model_chunk_idx]),
                     self.num_layers_per_chunk[model_chunk_idx],
@@ -1734,6 +1750,9 @@ def get_rotary_pos_emb(transformer_module, transformer_input):
                 fwd_sample_queues[model_chunk_idx] = fwd_sample_queues[model_chunk_idx][
                     num_consumed_samples:
                 ]
+            else:
+                # skip register static inputs for wgrad backward graphs
+                continue
 
         return sample_args, sample_kwargs
 
@@ -1746,12 +1765,16 @@ def _get_cuda_graph_input_data(self):
         # Get the PP and VPP scheduling order.
         from megatron.core.pipeline_parallel.schedules import (
             convert_schedule_table_to_order,
+            get_overlap_moe_expert_parallel_comm_order,
             get_pp_rank_microbatches,
             get_schedule_table,
         )
 
         # If PP is not enabled, we only need to capture one microbatch.
-        if parallel_state.get_pipeline_model_parallel_world_size() == 1:
+        if (
+            parallel_state.get_pipeline_model_parallel_world_size() == 1
+            and not self.config.overlap_moe_expert_parallel_comm
+        ):
             assert (
                 self.num_model_chunks == 1
             ), "If PP is not enabled, there should be only one model chunk."
@@ -1780,9 +1803,36 @@ def _get_cuda_graph_input_data(self):
             level=logging.DEBUG,
             msg=f'Rank {torch.distributed.get_rank()}: ORDER {order}',
         )
+        chunk_id_list = None
+        if self.config.overlap_moe_expert_parallel_comm:
+            wgrad_in_graph_scope = CudaGraphScope.attn in self.config.cuda_graph_scope or (
+                CudaGraphScope.moe_router in self.config.cuda_graph_scope
+                and self.config.moe_shared_expert_intermediate_size is not None
+                and not self.config.moe_shared_expert_overlap
+            )
+            capture_wgrad_graph = self.config.delay_wgrad_compute and wgrad_in_graph_scope
+            order, chunk_id_list = get_overlap_moe_expert_parallel_comm_order(
+                order, self.num_layers_per_chunk, capture_wgrad_graph
+            )
+            self.num_layers_per_chunk = [1] * sum(self.num_layers_per_chunk)
+            self.num_model_chunks = max(order)
+            _order_without_wgrad = []
+            for c_id in order:
+                if ceil(c_id) != c_id:
+                    continue
+                _order_without_wgrad.append(c_id)
+            self.num_microbatches = len(_order_without_wgrad) // self.num_model_chunks // 2
+            log_on_each_pipeline_stage(
+                logger=logger,
+                tp_group=None,
+                dp_cp_group=None,
+                level=logging.DEBUG,
+                msg=f'Rank {torch.distributed.get_rank()}: '
+                f'ORDER after overlap_moe_expert_parallel_comm {order}',
+            )
 
         # Generate sample arguments and keyword arguments for capturing.
-        sample_args, sample_kwargs = self._get_sample_arguments(order)
+        sample_args, sample_kwargs = self._get_sample_arguments(order, chunk_id_list)
 
         def get_make_graphed_callables_kwargs():
             kwargs = {'allow_unused_input': True, '_order': order}
@@ -1920,13 +1970,17 @@ def create_cudagraphs(self):
             for layer_number, layer in enumerate(layers):
                 layer.cuda_graphs = []
                 for batch_number in range(self.num_microbatches):
-                    layer.cuda_graphs.append(
-                        graphs[
+                    if self.config.overlap_moe_expert_parallel_comm:
+                        graph_idx = (
+                            num_layers_accumulated + layer_number
+                        ) * self.num_microbatches + batch_number
+                    else:
+                        graph_idx = (
                             num_layers_accumulated * self.num_microbatches
                             + batch_number * len(layers)
                             + layer_number
-                        ]
-                    )
+                        )
+                    layer.cuda_graphs.append(graphs[graph_idx])
             num_layers_accumulated += len(layers)
 
         self._finish_capturing(start_time)
diff --git a/megatron/core/transformer/moe/moe_layer.py b/megatron/core/transformer/moe/moe_layer.py
index 10d10f667fe..c8438bb2c8a 100644
--- a/megatron/core/transformer/moe/moe_layer.py
+++ b/megatron/core/transformer/moe/moe_layer.py
@@ -326,10 +326,11 @@ def custom_forward(hidden_states):
 
         return outputs
 
-    def backward_dw(self):
+    def backward_dw(self, routed_experts: bool = True, shared_experts: bool = False):
         """Compute weight gradients for experts and shared experts."""
-        self.experts.backward_dw()
-        if self.use_shared_expert and not self.shared_expert_overlap:
+        if routed_experts:
+            self.experts.backward_dw()
+        if shared_experts and self.use_shared_expert and not self.shared_expert_overlap:
             self.shared_experts.backward_dw()
 
     def set_for_recompute_pre_mlp_layernorm(self):
diff --git a/megatron/core/transformer/transformer_config.py b/megatron/core/transformer/transformer_config.py
index 6493a4bcce1..a5636d94e26 100644
--- a/megatron/core/transformer/transformer_config.py
+++ b/megatron/core/transformer/transformer_config.py
@@ -1849,6 +1849,16 @@ def __post_init__(self):
                     'when enabling overlap_moe_expert_parallel_comm with MTP layer.'
                 )
 
+            if self.cuda_graph_impl != "none":
+                assert (
+                    self.cuda_graph_impl == "transformer_engine"
+                    and CudaGraphScope.moe not in self.cuda_graph_scope
+                    and CudaGraphScope.mlp not in self.cuda_graph_scope
+                ), (
+                    'CUDA graph scope on moe and mlp is not '
+                    'supported with overlap_moe_expert_parallel_comm'
+                )
+
         # Check delay_wgrad_compute compatibility
         if self.delay_wgrad_compute:
             assert (
@@ -1857,6 +1867,11 @@ def __post_init__(self):
             assert (
                 not self.moe_use_legacy_grouped_gemm
             ), 'delay_wgrad_compute is not supported with legacy groupedgemm implementation'
+            if self.cuda_graph_impl == "transformer_engine":
+                assert is_te_min_version("2.10.0"), (
+                    'TE version >= 2.10.0 is required for delay_wgrad_compute with '
+                    'partial cuda graph'
+                )
 
         if self.ep_overlap_early_attn_memory_release:
             assert self.overlap_moe_expert_parallel_comm, (
diff --git a/megatron/core/transformer/transformer_layer.py b/megatron/core/transformer/transformer_layer.py
index 3ea40577009..db57e21c891 100644
--- a/megatron/core/transformer/transformer_layer.py
+++ b/megatron/core/transformer/transformer_layer.py
@@ -872,6 +872,10 @@ def _te_cuda_graph_replay(self, *args, **kwargs):
             # CUDA Graph captures the whole MLP/MoE part. CUDA Graph output is the layer output.
             assert len(cuda_graph_output) == 1, "CUDA Graph output should be the layer output."
             output = cuda_graph_output.pop()
+            assert (
+                not self.config.overlap_moe_expert_parallel_comm
+            ), "EP overlap must be \
+                disabled when CUDA graph captures the whole MLP/MoE part."
         elif self.is_moe_layer and CudaGraphScope.moe_router in self.config.cuda_graph_scope:
             # CUDA Graph partially captures the MoE.
             # The rest of the layer should go to the normal pass.
@@ -914,12 +918,35 @@ def _te_cuda_graph_replay(self, *args, **kwargs):
                 residual=residual,
                 shared_expert_output=shared_expert_output,
             )
+            # If EP overlap is enabled, remaining of mlp will be called as fine_grained_callables
+            # and should be skipped here.
+            if self.config.overlap_moe_expert_parallel_comm:
+                probs, routing_map = self.mlp.route(hidden_states)
+                hidden_states, probs, residual = self.mlp.preprocess(
+                    hidden_states, probs, routing_map
+                )
+                nvtx_range_pop(suffix="mlp")
+                return mlp_residual, hidden_states, probs, shared_expert_output
             mlp_output_with_bias = self.mlp(hidden_states)
             self.mlp.cudagraph_tensor_store.clear()
             nvtx_range_pop(suffix="mlp")
 
             output = self._forward_post_mlp(mlp_output_with_bias, mlp_residual)
         else:
+            # If EP overlap is enabled, needs to return same outputs as submodule.attn
+            if self.config.overlap_moe_expert_parallel_comm:
+                assert len(cuda_graph_output) == 1, "CUDA Graph output should be the layer output."
+                mlp_residual = cuda_graph_output.pop()
+                if not self.is_moe_layer:
+                    return mlp_residual, None, None, None
+                hidden_states = self.pre_mlp_layernorm(mlp_residual)
+                shared_expert_output = self.mlp.shared_experts_compute(hidden_states)
+                probs, routing_map = self.mlp.route(hidden_states)
+                hidden_states, probs, residual = self.mlp.preprocess(
+                    hidden_states, probs, routing_map
+                )
+                return mlp_residual, hidden_states, probs, shared_expert_output
+
             # CUDA Graph does not capture the MLP/MoE part at all.
             output = self._forward_mlp(*cuda_graph_output)
         return output, context
@@ -1007,6 +1034,15 @@ def _should_call_local_cudagraph(self, *args, **kwargs):
                 return True
         return False
 
+    def backward_dw_cudagraph(self, microbatch_idx):
+        """
+        CUDA Graph backward weight gradient computation for this layer.
+        """
+        cg_index = microbatch_idx % len(self.cuda_graphs)
+        if not hasattr(self.cuda_graphs[cg_index], 'backward_dw'):
+            return
+        self.cuda_graphs[cg_index].backward_dw()
+
     def __call__(self, *args, **kwargs):
         if self._should_call_local_cudagraph(*args, **kwargs):
             # Inference mode.
diff --git a/tests/unit_tests/a2a_overlap/test_cuda_graphed_schedule_chunk_1f1b.py b/tests/unit_tests/a2a_overlap/test_cuda_graphed_schedule_chunk_1f1b.py
new file mode 100644
index 00000000000..91c74fe1bb6
--- /dev/null
+++ b/tests/unit_tests/a2a_overlap/test_cuda_graphed_schedule_chunk_1f1b.py
@@ -0,0 +1,372 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+import gc
+import os
+import sys
+
+import pytest
+import torch
+
+from megatron.core.enums import ModelType
+from megatron.core.models.gpt.gpt_layer_specs import (
+    get_gpt_decoder_block_spec,
+    get_gpt_mtp_block_spec,
+)
+from megatron.core.models.gpt.gpt_model import GPTModel
+from megatron.core.num_microbatches_calculator import destroy_num_microbatches_calculator
+from megatron.core.pipeline_parallel.utils import set_streams
+from megatron.core.tensor_parallel.random import HAVE_TE, model_parallel_cuda_manual_seed
+from megatron.core.transformer.enums import CudaGraphScope
+from megatron.core.transformer.module import float16_to_fp32
+from megatron.core.utils import is_te_min_version, unwrap_model
+from megatron.training.arguments import core_transformer_config_from_args, parse_args, validate_args
+from megatron.training.global_vars import (
+    destroy_global_vars,
+    get_args,
+    set_args,
+    set_global_variables,
+)
+from megatron.training.training import setup_model_and_optimizer
+from tests.unit_tests.test_utilities import Utils
+
+
+def is_deep_ep_available():
+    from megatron.core.transformer.moe.fused_a2a import HAVE_DEEP_EP
+
+    return HAVE_DEEP_EP
+
+
+def is_hybrid_ep_available():
+    from megatron.core.transformer.moe.fused_a2a import HAVE_HYBRIDEP
+
+    return HAVE_HYBRIDEP
+
+
+def save(fn, message):
+    with open(fn, 'w') as f:
+        f.write(message)
+
+
+class TestPartialCudaGraphedA2AOverlap:
+    """Test that CUDA graph outputs match ep-overlapped CUDA graph outputs for various scopes."""
+
+    def setup_method(self, method):
+        self.seq_length = 512
+        self.micro_batch_size = 2
+        # Store original environment variable values
+        self.original_env = {
+            'CUDA_DEVICE_MAX_CONNECTIONS': os.environ.get('CUDA_DEVICE_MAX_CONNECTIONS'),
+            'NVTE_ALLOW_NONDETERMINISTIC_ALGO': os.environ.get('NVTE_ALLOW_NONDETERMINISTIC_ALGO'),
+        }
+        self.cuda_graph_helper = None
+        os.environ['CUDA_DEVICE_MAX_CONNECTIONS'] = '1'
+        os.environ['NVTE_ALLOW_NONDETERMINISTIC_ALGO'] = '0'
+
+    def teardown_method(self, method):
+        # Restore original environment variable values
+        for key, value in self.original_env.items():
+            if value is None:
+                os.environ.pop(key, None)
+            else:
+                os.environ[key] = value
+        Utils.destroy_model_parallel()
+        destroy_global_vars()
+        destroy_num_microbatches_calculator()
+        if self.cuda_graph_helper is not None and self.cuda_graph_helper.graphs_created():
+            self.cuda_graph_helper.delete_cuda_graphs()
+            self.cuda_graph_helper = None
+
+        gc.collect()
+
+    def model_provider(
+        self,
+        pre_process=True,
+        post_process=True,
+        layer_spec_fn=get_gpt_decoder_block_spec,
+        **config_kwargs,
+    ):
+        model_parallel_cuda_manual_seed(123)
+        args = get_args()
+        config = core_transformer_config_from_args(args)
+        transformer_layer_spec = layer_spec_fn(
+            config,
+            use_transformer_engine=True,
+            normalization=args.normalization,
+            qk_l2_norm=args.qk_l2_norm,
+        )
+        if args.mtp_num_layers:
+            mtp_block_spec = get_gpt_mtp_block_spec(
+                config, transformer_layer_spec, use_transformer_engine=True
+            )
+        else:
+            mtp_block_spec = None
+        return GPTModel(
+            config=config,
+            transformer_layer_spec=transformer_layer_spec,
+            vocab_size=args.vocab_size,
+            max_sequence_length=args.max_position_embeddings,
+            pre_process=pre_process,
+            post_process=post_process,
+            fp16_lm_cross_entropy=args.fp16_lm_cross_entropy,
+            parallel_output=True,
+            share_embeddings_and_output_weights=not args.untie_embeddings_and_output_weights,
+            position_embedding_type=args.position_embedding_type,
+            rotary_percent=args.rotary_percent,
+            mtp_block_spec=mtp_block_spec,
+        )
+
+    def create_test_args(
+        self, cuda_graph_impl, cuda_graph_scope, cuda_graph_warmup_steps, ep_size, **kwargs
+    ):
+        destroy_global_vars()
+        destroy_num_microbatches_calculator()
+
+        sys.argv = ['test_cuda_graphs.py']
+        args = parse_args()
+        args.num_layers = 1
+        args.mtp_num_layers = None
+        args.vocab_size = 1024
+        args.hidden_size = 128
+        args.num_attention_heads = 8
+        args.max_position_embeddings = 512
+        args.global_batch_size = self.micro_batch_size * 8
+        args.micro_batch_size = self.micro_batch_size
+        args.create_attention_mask_in_dataloader = True
+        args.seq_length = self.seq_length
+        args.tensor_model_parallel_size = 2
+        args.sequence_parallel = True
+        args.pipeline_model_parallel_size = 1
+        args.context_parallel_size = 1
+        args.expert_model_parallel_size = ep_size
+        args.train_iters = 10
+        args.lr = 3e-5
+        args.bf16 = True
+        args.add_bias_linear = False
+        args.swiglu = True
+        args.use_distributed_optimizer = True
+        args.position_embedding_type = "rope"
+        args.rotary_percent = 1.0
+        args.hidden_dropout = 0.0
+        args.attention_dropout = 0.0
+        args.untie_embeddings_and_output_weights = True
+
+        # MoE settings
+        args.num_experts = 16
+        args.expert_model_parallel_size = ep_size
+        args.moe_shared_expert_intermediate_size = 1024
+        args.moe_layer_freq = kwargs.get("moe_layer_freq", "[0,0,1,1]")
+        args.moe_permute_fusion = True
+        args.moe_router_fusion = True
+        args.moe_router_topk = 2
+
+        # CUDA graph settings
+        args.cuda_graph_impl = cuda_graph_impl
+        args.cuda_graph_scope = cuda_graph_scope
+        args.cuda_graph_warmup_steps = cuda_graph_warmup_steps
+        args.use_te_rng_tracker = cuda_graph_impl != "none"
+
+        for key, value in kwargs.items():
+            assert hasattr(args, key)
+            setattr(args, key, value)
+
+        validate_args(args)
+        set_global_variables(args, False)
+        return args
+
+    def get_batch(self, seq_length, micro_batch_size):
+        data = list(range(seq_length))
+        input_ids = torch.tensor(data, dtype=torch.int64).repeat((micro_batch_size, 1)).cuda()
+        labels = 1 + torch.tensor(data, dtype=torch.int64).repeat((micro_batch_size, 1)).cuda()
+        position_ids = torch.tensor(data, dtype=torch.int64).repeat((micro_batch_size, 1)).cuda()
+        attention_mask = torch.ones(
+            (micro_batch_size, 1, seq_length, seq_length), dtype=bool
+        ).cuda()
+        loss_mask = torch.ones(seq_length).repeat((micro_batch_size, 1)).cuda()
+        return input_ids, labels, position_ids, attention_mask, loss_mask
+
+    def _run_1f1b_helper(self, gpt_model, optimizer, data, num_iters, cuda_graph_warmup_steps):
+        from megatron.core.models.common.model_chunk_schedule_plan import (
+            TransformerModelChunkSchedulePlan,
+        )
+        from megatron.core.pipeline_parallel.schedules import set_current_microbatch
+
+        schedule_plans = []
+        losses = []
+        set_current_microbatch(gpt_model[0], 1)
+
+        gpt_model[0].zero_grad_buffer()
+        optimizer.zero_grad()
+        assert cuda_graph_warmup_steps > 0, "cuda_graph_warmup_steps must be greater than 0"
+        for fwd_mb_idx in range(num_iters + 1):
+            # Capture CUDA graphs after warmup if helper is provided
+            if self.cuda_graph_helper is not None and fwd_mb_idx == cuda_graph_warmup_steps:
+                self.cuda_graph_helper.create_cudagraphs()
+
+            if fwd_mb_idx < cuda_graph_warmup_steps:
+                gpt_model[0].zero_grad_buffer()
+                optimizer.zero_grad()
+                output = gpt_model[0].forward(**data)
+                schedule_plans.append(None)
+            else:
+                if fwd_mb_idx == cuda_graph_warmup_steps:
+                    extra_schedule_plan = unwrap_model(gpt_model[0]).build_schedule_plan(**data)
+                    TransformerModelChunkSchedulePlan.run(extra_schedule_plan, None)
+                    schedule_plans[-1] = extra_schedule_plan
+                f_schedule_plan = unwrap_model(gpt_model[0]).build_schedule_plan(**data)
+                b_schedule_plan = schedule_plans[-1]
+                schedule_plans.append(f_schedule_plan)
+                if b_schedule_plan is not None:
+                    gpt_model[0].zero_grad_buffer()
+                    optimizer.zero_grad()
+                output = TransformerModelChunkSchedulePlan.run(
+                    f_schedule_plan,
+                    b_schedule_plan,
+                    b_grad=torch.ones_like(output) if fwd_mb_idx > 0 else None,
+                )
+            # Check output shapes
+            if fwd_mb_idx < num_iters:
+                assert output is not None
+                assert output.shape[0] == self.micro_batch_size
+                assert output.shape[1] == self.seq_length
+                losses.append(output)
+
+            if fwd_mb_idx < cuda_graph_warmup_steps:
+                output.backward(torch.ones_like(output))
+
+            for param in gpt_model[0].parameters():
+                assert param.main_grad is not None
+
+            update_successful, _, _ = optimizer.step()
+            assert update_successful
+
+        return losses
+
+    def _run_test_helper(
+        self,
+        ep_size,
+        cuda_graph_impl,
+        cuda_graph_scope,
+        cuda_graph_warmup_steps,
+        ep_overlap=False,
+        **kwargs,
+    ):
+        """Test fp8_param with gpt_model."""
+        args = self.create_test_args(
+            cuda_graph_impl,
+            cuda_graph_scope,
+            cuda_graph_warmup_steps,
+            ep_size,
+            overlap_moe_expert_parallel_comm=ep_overlap,
+            **kwargs,
+        )
+        if ep_overlap:
+            set_streams()
+        set_args(args)
+        torch.manual_seed(123)
+        Utils.initialize_model_parallel(
+            tensor_model_parallel_size=2, expert_model_parallel_size=ep_size
+        )
+
+        input_ids, labels, position_ids, attention_mask, loss_mask = self.get_batch(
+            self.seq_length, self.micro_batch_size
+        )
+
+        gpt_model, optimizer, _ = setup_model_and_optimizer(
+            self.model_provider, ModelType.encoder_or_decoder
+        )
+        assert len(gpt_model) == 1  # Assume only one model in the model provider.
+
+        loss_list = []
+
+        if cuda_graph_impl == "transformer_engine":
+            from megatron.core.transformer.cuda_graphs import TECudaGraphHelper
+
+            self.cuda_graph_helper = TECudaGraphHelper(
+                model=gpt_model,
+                config=gpt_model[0].config,
+                seq_length=self.seq_length,
+                micro_batch_size=self.micro_batch_size,
+                optimizers=[optimizer],
+            )
+
+        num_iters = cuda_graph_warmup_steps + 2
+        data = {
+            "input_ids": input_ids,
+            "position_ids": position_ids,
+            "attention_mask": attention_mask,
+            "labels": labels,
+            "loss_mask": loss_mask,
+        }
+        if not ep_overlap:
+            for i in range(num_iters):
+                gpt_model[0].zero_grad_buffer()
+                optimizer.zero_grad()
+
+                # Capture CUDA graphs after warmup if helper is provided
+                if self.cuda_graph_helper is not None and i == cuda_graph_warmup_steps:
+                    self.cuda_graph_helper.create_cudagraphs()
+
+                output = unwrap_model(gpt_model[0]).forward(**data)
+                output = float16_to_fp32(output)
+
+                # Check output shapes
+                assert output.shape[0] == self.micro_batch_size
+                assert output.shape[1] == self.seq_length
+
+                # Verify gradients
+                output.backward(torch.ones_like(output))
+                for param in gpt_model[0].parameters():
+                    assert param.main_grad is not None
+
+                update_successful, _, _ = optimizer.step()
+                assert update_successful
+
+                loss_list.append(output)
+        else:
+            loss_list = self._run_1f1b_helper(
+                gpt_model, optimizer, data, num_iters, cuda_graph_warmup_steps
+            )
+
+        return loss_list
+
+    @pytest.mark.skipif(
+        not (HAVE_TE and is_te_min_version("2.10.0")),
+        reason="Partial CUDA graph support requires TransformerEngine version >= 2.10.0",
+    )
+    @pytest.mark.parametrize("moe_dispatcher_type", ["alltoall", "deepep"])
+    def test_moe_partial_cudagraph_with_ep_overlap(self, moe_dispatcher_type):
+        extra_kwargs = {"moe_layer_freq": 1}
+        if moe_dispatcher_type == "deepep":
+            if not is_deep_ep_available():
+                pytest.skip("Deep EP is not available")
+            extra_kwargs["moe_token_dispatcher_type"] = "flex"
+            extra_kwargs["moe_flex_dispatcher_backend"] = "deepep"
+            extra_kwargs["moe_router_dtype"] = "fp32"
+        elif moe_dispatcher_type == "hybridep":
+            if not is_hybrid_ep_available():
+                pytest.skip("Hybrid EP is not available")
+            extra_kwargs["moe_token_dispatcher_type"] = "flex"
+            extra_kwargs["moe_flex_dispatcher_backend"] = "hybridep"
+        else:
+            extra_kwargs["moe_token_dispatcher_type"] = moe_dispatcher_type
+
+        loss_list_ref = self._run_test_helper(4, "none", None, 3, **extra_kwargs)
+        for cuda_graph_scope in [
+            [CudaGraphScope.attn],
+            [CudaGraphScope.attn, CudaGraphScope.moe_router],
+            [CudaGraphScope.attn, CudaGraphScope.moe_router, CudaGraphScope.moe_preprocess],
+        ]:
+            cuda_graph_warmup_steps = 3
+            loss_list = self._run_test_helper(
+                4,
+                "transformer_engine",
+                cuda_graph_scope,
+                cuda_graph_warmup_steps,
+                ep_overlap=True,
+                **extra_kwargs,
+            )
+            assert len(loss_list) == len(loss_list_ref)
+            for i in range(len(loss_list)):
+                assert torch.equal(
+                    loss_list[i].mean(), loss_list_ref[i].mean()
+                ), f"scope={cuda_graph_scope}, i={i},loss_list={loss_list[i]}, loss_list_ref={loss_list_ref[i]}"
+            print(f"[DEBUG] Pass {cuda_graph_scope}")
diff --git a/tests/unit_tests/a2a_overlap/test_schedule_layer_1f1b.py b/tests/unit_tests/a2a_overlap/test_schedule_layer_1f1b.py
index 7fb97f6e586..0fd2c445c9f 100644
--- a/tests/unit_tests/a2a_overlap/test_schedule_layer_1f1b.py
+++ b/tests/unit_tests/a2a_overlap/test_schedule_layer_1f1b.py
@@ -306,7 +306,7 @@ def test_transformer_layer_overlap_shared_expert(self):
             "moe_shared_expert_intermediate_size": 512,
         }
         overlap_config = get_test_config(extra_kwargs=extra_kwargs)
-        extra_kwargs["moe_shared_expert_overlap"] = True
+        extra_kwargs["moe_shared_expert_overlap"] = False
         ref_config = get_test_config(extra_kwargs=extra_kwargs)
         microbatches = 4
         with deterministic_mode():
diff --git a/tests/unit_tests/a2a_overlap/utils.py b/tests/unit_tests/a2a_overlap/utils.py
index 7db4256a849..a52843956df 100644
--- a/tests/unit_tests/a2a_overlap/utils.py
+++ b/tests/unit_tests/a2a_overlap/utils.py
@@ -1,3 +1,4 @@
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 import os
 from contextlib import contextmanager
 from dataclasses import dataclass
diff --git a/tests/unit_tests/pipeline_parallel/test_schedules.py b/tests/unit_tests/pipeline_parallel/test_schedules.py
index b861aa2df49..86b9219fe0f 100644
--- a/tests/unit_tests/pipeline_parallel/test_schedules.py
+++ b/tests/unit_tests/pipeline_parallel/test_schedules.py
@@ -1,3 +1,5 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+
 import os
 
 import pytest
@@ -127,6 +129,52 @@ def test_get_pipeline_parallel_order(
     for k, v in order_cnt.items():
         assert -k in order_cnt and order_cnt[-k] == v
 
+    layers_per_chunk = 2
+    num_layers_per_chunk = [layers_per_chunk] * num_model_chunks
+    # disable wgrad compute
+    overlapped_order, chunk_id_list = schedule.get_overlap_moe_expert_parallel_comm_order(
+        order, num_layers_per_chunk, False
+    )
+    assert max(overlapped_order) == num_model_chunks * layers_per_chunk
+    assert len(overlapped_order) == len(order) * layers_per_chunk
+    assert len(chunk_id_list) == len(overlapped_order)
+    order_cnt = {}
+    accumulated_order = 0
+    for o in overlapped_order:
+        order_cnt[o] = order_cnt.get(o, 0) + 1
+        if o < 0:
+            assert -o in order_cnt and order_cnt[-o] >= order_cnt[o]
+        elif -o in order_cnt:
+            assert order_cnt[-o] < order_cnt[o]
+        accumulated_order += o
+        assert accumulated_order >= 0
+    assert accumulated_order == 0
+
+    # enable wgrad compute
+    overlapped_order, chunk_id_list = schedule.get_overlap_moe_expert_parallel_comm_order(
+        order, num_layers_per_chunk, True
+    )
+    assert max(overlapped_order) == num_model_chunks * layers_per_chunk
+    assert len(overlapped_order) == len(order) * layers_per_chunk * 3 // 2
+    assert len(chunk_id_list) == len(overlapped_order)
+    from math import ceil
+
+    order_cnt = {}
+    accumulated_order = 0
+    prev_o = 0
+    for o in overlapped_order:
+        if ceil(o) != o:
+            assert prev_o - 0.5 == o
+        else:
+            order_cnt[o] = order_cnt.get(o, 0) + 1
+            if o < 0:
+                assert -o in order_cnt and order_cnt[-o] >= order_cnt[o]
+            elif -o in order_cnt:
+                assert order_cnt[-o] < order_cnt[o]
+        accumulated_order += o
+        prev_o = o
+    assert accumulated_order < 0
+
     Utils.destroy_model_parallel()
 
 
diff --git a/tests/unit_tests/transformer/test_submodule_callables.py b/tests/unit_tests/transformer/test_submodule_callables.py
index 1ccb6fd5be8..73059495c06 100644
--- a/tests/unit_tests/transformer/test_submodule_callables.py
+++ b/tests/unit_tests/transformer/test_submodule_callables.py
@@ -64,7 +64,7 @@ def run_model_submodules_with_capture(model, input_tensors, microbatches):
     output_tensors = []
     # get callables
     callables, dw = build_layer_callables(model)
-    attn, post_attn, dispatch, moe, combine, post_process = callables
+    attn, dispatch, moe, combine, post_process = callables
     assert post_process is None
     dummy_model = DummyState()
     dummy_model.decoder = DummyState()
@@ -76,24 +76,16 @@ def run_model_submodules_with_capture(model, input_tensors, microbatches):
         node.chunk_state.model = dummy_model
 
         # attn fwd
-        hidden_states = attn(node, input_tensors[i])
-
-        # post attn fwd
-        local_tokens, probs = post_attn(node, hidden_states)
+        local_tokens, probs = attn(node, input_tensors[i])
 
         # dispatch fwd
         dispatched_tokens = dispatch(node, local_tokens, probs)
 
         # moe fwd
-        expert_outputs = moe(node, dispatched_tokens)
-        if model.mlp.use_shared_expert:
-            expert_output, shared_expert_output = expert_outputs
-        else:
-            expert_output = expert_outputs
-            shared_expert_output = None
+        expert_output = moe(node, dispatched_tokens)
 
         # combine fwd
-        hidden_states = combine(node, expert_output, shared_expert_output)
+        hidden_states = combine(node, expert_output)
 
         # loss
         output_tensors.append(hidden_states)

From c1045f6954a68599c0447f35310f80e94a07ff1a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Mon, 5 Jan 2026 11:59:40 +0100
Subject: [PATCH 212/334] =?UTF-8?q?Revert=20"[Dev]=20FP8=20params=20suppor?=
 =?UTF-8?q?t=20for=20megatron-fsdp=20(MXFP8/Blockwise)=20=E2=80=A6=20(#280?=
 =?UTF-8?q?4)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 .../distributed/fsdp/mcore_fsdp_adapter.py    |   4 -
 .../fsdp/src/megatron_fsdp/megatron_fsdp.py   | 157 +++---
 .../fsdp/src/megatron_fsdp/mixed_precision.py | 331 -------------
 .../megatron_fsdp/param_and_grad_buffer.py    | 450 +++++-------------
 .../fsdp/src/megatron_fsdp/utils.py           | 252 +++++++++-
 megatron/training/arguments.py                |   7 -
 6 files changed, 421 insertions(+), 780 deletions(-)
 delete mode 100644 megatron/core/distributed/fsdp/src/megatron_fsdp/mixed_precision.py

diff --git a/megatron/core/distributed/fsdp/mcore_fsdp_adapter.py b/megatron/core/distributed/fsdp/mcore_fsdp_adapter.py
index d6384e70488..7432a7f9a36 100644
--- a/megatron/core/distributed/fsdp/mcore_fsdp_adapter.py
+++ b/megatron/core/distributed/fsdp/mcore_fsdp_adapter.py
@@ -111,9 +111,6 @@ def __init__(
                 dist_index=self.megatron_fsdp_dist_index,
                 calculate_per_token_loss=config.calculate_per_token_loss,
                 init_model_with_meta_device=config.init_model_with_meta_device,
-                enable_fine_grained_param_gather_hook=(
-                    config.fp8_recipe == "mxfp8" and ddp_config.fp8_param_gather
-                ),
             ),
         )
         self.param_and_grad_buffer = self.module.param_and_grad_buffer
@@ -126,7 +123,6 @@ def __init__(
         self.broadcast_params = self.module.broadcast_params
         self.module.state_dict_for_save_checkpoint = self.module.state_dict
         self.state_dict_for_save_checkpoint = self.state_dict
-        self.module.config = config
 
         self.sync_rng_states_across_tp_group()
 
diff --git a/megatron/core/distributed/fsdp/src/megatron_fsdp/megatron_fsdp.py b/megatron/core/distributed/fsdp/src/megatron_fsdp/megatron_fsdp.py
index 17f7f4d1c05..8a63e0f5cf7 100644
--- a/megatron/core/distributed/fsdp/src/megatron_fsdp/megatron_fsdp.py
+++ b/megatron/core/distributed/fsdp/src/megatron_fsdp/megatron_fsdp.py
@@ -23,20 +23,6 @@
 import torch.nn as nn
 from torch.utils._pytree import tree_flatten, tree_map, tree_unflatten
 
-from .mixed_precision import (
-    fp8_create_transpose_cache,
-    fp8_discard_transpose_cache,
-    is_float8tensor,
-)
-from .param_and_grad_buffer import (
-    AllGatherPipeline,
-    BucketingPolicy,
-    GradReducePipeline,
-    ParamAndGradBuffer,
-    PrefetchOrder,
-    override_sharded_param_methods_with_safety_checks,
-    to_local_if_dtensor,
-)
 from .utils import FSDPDistributedIndex
 
 logger = logging.getLogger(__name__)
@@ -48,12 +34,23 @@
     from megatron.core.distributed.distributed_data_parallel_config import (
         DistributedDataParallelConfig,
     )
+    from megatron.core.fp8_utils import is_float8tensor
     from megatron.core.utils import is_submodule
 except ImportError:
     # Megatron-LM is not installed, use Megatron-FSDP as a standalone module.
     logger.info("Megatron Core is not installed, Megatron-FSDP will run without Megatron Core.")
     from .distributed_data_parallel_config import DistributedDataParallelConfig
-    from .utils import is_submodule
+    from .utils import is_float8tensor, is_submodule
+
+from .param_and_grad_buffer import (
+    AllGatherPipeline,
+    BucketingPolicy,
+    GradReducePipeline,
+    ParamAndGradBuffer,
+    PrefetchOrder,
+    override_sharded_param_methods_with_safety_checks,
+    to_local_if_dtensor,
+)
 
 
 class TrainingState(Enum):
@@ -171,7 +168,6 @@ def __init__(
         nccl_ub: bool = False,
         fsdp_double_buffer: bool = False,
         disable_symmetric_registration: bool = False,
-        enable_fine_grained_param_gather_hook: bool = False,
     ):
         super().__init__()
         # If device is not specified, use the current device.
@@ -221,7 +217,6 @@ def __init__(
 
         self.calculate_per_token_loss = calculate_per_token_loss
         self.init_model_with_meta_device = init_model_with_meta_device
-        self.enable_fine_grained_param_gather_hook = enable_fine_grained_param_gather_hook
 
         # Whether to constantly synchronize the model every training iteration,
         # which defaults to False to overlap communication with computation
@@ -405,7 +400,6 @@ def all_gather_and_wait_parameters_ready(
         prefetch=True,
         prefetch_order=PrefetchOrder.FORWARD_PASS_ORDER,
         wait_bucket_ready=True,
-        bwd=False,
     ):
         """
         All-gather parameters across the data parallel group and wait for
@@ -432,14 +426,11 @@ def all_gather_and_wait_parameters_ready(
                 and self.ddp_config.outer_dp_sharding_strategy != "no_shard"
                 and (self.microbatch_count == 0 or self.model_auto_sync)
             ),
-            bwd=bwd,
         )
         if wait_bucket_ready:
             for param in params:
                 bucket_id = self.param_and_grad_buffer.param_to_param_group[param]
-                ag_pipeline.wait_bucket_ready(bucket_id, bwd)
-                if bwd and is_float8tensor(param):
-                    fp8_create_transpose_cache(param)
+                ag_pipeline.wait_bucket_ready(bucket_id)
 
         for param in params:
             # This setting is needed to make FSDP store the weight object when used
@@ -498,17 +489,19 @@ def _register_fsdp_hooks(self, root_module):
         """
         fsdp_unit_modules = self.fsdp_unit_modules
 
-        def release_module_parameters(module, bwd, *unused):
+        def release_module_parameters(module, *unused):
             for param in module.parameters():
                 bucket_id = self.param_and_grad_buffer.param_to_param_group[param]
-                self.all_gather_pipeline.release_bucket(bucket_id, bwd)
+                self.all_gather_pipeline.release_bucket(bucket_id)
+
             if not self.ddp_config.keep_fp8_transpose_cache:
                 release_params_fp8_transpose_cache(module.parameters())
 
         def release_params_fp8_transpose_cache(params):
             for param in params:
                 if is_float8tensor(param):
-                    fp8_discard_transpose_cache(param)
+                    param._transpose_invalid = True
+                    param._transpose = None
 
         def _grad_acc(param):
             """
@@ -565,15 +558,12 @@ def _post_backward(module, *unused):
                 if self.ddp_config.data_parallel_sharding_strategy == "optim_grads_params":
                     # Deallocate the module parameters after the backward pass,
                     # because we have our data-parallel gradients computed.
-                    release_module_parameters(module, bwd=True)
+                    release_module_parameters(module)
                     module._training_state = TrainingState.IDLE
                 param_list = list(module.parameters())
             else:
                 param_list = list(module.parameters(recurse=False))
 
-            if self.enable_fine_grained_param_gather_hook:
-                param_list = list(module.parameters(recurse=False))
-
             # If the parameter is shared, we do not accumulate gradients
             # here, as the gradients will be accumulated in the
             # root post-backward hook.
@@ -625,9 +615,6 @@ def _pre_forward_param_unshard(
                 # to allocate as little memory as possible for this forward pass.
                 param_list = list(module.parameters(recurse=False))
 
-            if self.enable_fine_grained_param_gather_hook:
-                param_list = list(module.parameters(recurse=False))
-
             # All-gather the parameters before the forward pass.
             self.all_gather_and_wait_parameters_ready(
                 params=param_list,
@@ -727,7 +714,7 @@ def _root_post_backward(*unused):
             if self.model_auto_sync:
                 self.finish_grad_sync()
 
-        def _pre_backward_param_unshard(module: nn.Module, *unused):
+        def _pre_backward(module: nn.Module, *unused):
             """
             Sub-module pre-backward hook to all-gather the module parameters
             before the backward pass.
@@ -736,19 +723,11 @@ def _pre_backward_param_unshard(module: nn.Module, *unused):
             # and unsharding operations when performing activation recomputation
             # / gradient checkpointing.
             module._training_state = TrainingState.PRE_BACKWARD
-
             if isinstance(module, tuple(fsdp_unit_modules)):
-                param_list = list(module.parameters())
-            else:
-                param_list = list(module.parameters(recurse=False))
-
-            if self.enable_fine_grained_param_gather_hook:
-                param_list = list(module.parameters(recurse=False))
-
-            # All-gather / unshard the module parameters before the backward pass.
-            self.all_gather_and_wait_parameters_ready(
-                param_list, prefetch_order=PrefetchOrder.BACKWARD_PASS_ORDER, bwd=True
-            )
+                # All-gather / unshard the module parameters before the backward pass.
+                self.all_gather_and_wait_parameters_ready(
+                    list(module.parameters()), prefetch_order=PrefetchOrder.BACKWARD_PASS_ORDER
+                )
 
         self._root_pre_backward_hook_issued = False
 
@@ -775,9 +754,7 @@ def _root_pre_backward(module: nn.Module, *unused):
                 for bucket_id in range(ag_pipeline.num_buckets):
                     group = self.param_and_grad_buffer.parameter_groups[bucket_id]
                     if group.fsdp_unit_id is not None:
-                        ag_pipeline.bucket_can_be_released[
-                            ag_pipeline.get_bucket_key(bucket_id, bwd=False)
-                        ] = True
+                        ag_pipeline.bucket_can_be_released[bucket_id] = True
             # Track parameters that require gradient reduction and optimization.
             self._params_require_handle_grad = set()
             for param_group in self.param_and_grad_buffer.parameter_groups:
@@ -799,12 +776,8 @@ def _post_forward(module: nn.Module, input: Any, output: Any):
                 # during activation recomputation / gradient checkpointing.
                 return output
 
-            assert isinstance(
-                module, tuple(fsdp_unit_modules)
-            ), "_post_forward hook should only be registered on FSDP unit modules."
-
             # Release the module parameters after the forward pass to save memory.
-            release_module_parameters(module, bwd=False)
+            release_module_parameters(module)
             module._training_state = TrainingState.IDLE
 
             return output
@@ -845,55 +818,21 @@ def forward_hook(_module, inputs, output):
             # on the output tensor(s).
             return module.register_forward_hook(forward_hook)
 
-        def _register_pre_forward_param_unshard_hook(module):
-            """
-            Register the forward pre-hook to unshard parameters before the forward pass.
-            If we are not sharding anything, we do not have a model weight buffer and thus
-            have nothing to all-gather / un-shard.
-            """
-            if self.ddp_config.data_parallel_sharding_strategy != "no_shard":
-                self.forward_pre_hooks[f"{module._get_name()} parameter unshard"] = (
-                    module.register_forward_pre_hook(
-                        _pre_forward_param_unshard, prepend=True, with_kwargs=True
-                    )
-                )
-
-        def _register_pre_backward_param_unshard_hook(module):
-            """
-            Register the backward pre-hook to unshard FSDP unit module parameters
-            immediately before the backward pass via attaching a gradient-triggered
-            hook to the output tensor(s) of a module during a post-forward hook.
-            """
-            self.backward_pre_hooks[f"all-gather {module._get_name()} parameters"] = (
-                create_custom_backward_hook(module, _pre_backward_param_unshard)
-            )
-
-        def _register_grad_acc_and_reduce_hook(module):
-            """
-            Register the post-backward hook to deallocate model parameters and
-            reduce-scatter gradients immediately after the module backward pass
-            has completed to conserve memory for the subsequent backward pass.
-            """
-            self.forward_pre_hooks[f"module {name} register post-backward hook"] = (
-                module.register_forward_pre_hook(
-                    functools.partial(_register_post_backward_hook, _post_backward),
-                    with_kwargs=True,
-                )
-            )
-
         fsdp_modules = []
         for name, module in root_module.named_modules():
-            if self.enable_fine_grained_param_gather_hook:
-                _register_pre_forward_param_unshard_hook(module)
-                _register_pre_backward_param_unshard_hook(module)
-                _register_grad_acc_and_reduce_hook(module)
-
             # Skip if the module is already registered in fsdp_modules.
             if any(is_submodule(module, fsdp_module) for fsdp_module in fsdp_modules):
                 continue
 
-            if not self.enable_fine_grained_param_gather_hook:
-                _register_pre_forward_param_unshard_hook(module)
+            # Register the forward pre-hook to unshard parameters before the forward pass.
+            # If we are not sharding anything, we do not have a model weight buffer and thus
+            # have nothing to all-gather / un-shard.
+            if self.ddp_config.data_parallel_sharding_strategy != "no_shard":
+                self.forward_pre_hooks[f"module {name} parameter unshard"] = (
+                    module.register_forward_pre_hook(
+                        _pre_forward_param_unshard, prepend=True, with_kwargs=True
+                    )
+                )
 
             if isinstance(module, tuple(fsdp_unit_modules)):
                 fsdp_modules.append(module)
@@ -904,8 +843,12 @@ def _register_grad_acc_and_reduce_hook(module):
                     module.register_forward_hook(_post_forward, prepend=False)
                 )
 
-                if not self.enable_fine_grained_param_gather_hook:
-                    _register_pre_backward_param_unshard_hook(module)
+                # Register the backward pre-hook to unshard FSDP unit module parameters
+                # immediately before the backward pass via attaching a gradient-triggered
+                # hook to the output tensor(s) of a module during a post-forward hook.
+                self.backward_pre_hooks[f"all-gather module {name} parameters"] = (
+                    create_custom_backward_hook(module, _pre_backward)
+                )
             elif (
                 not self.ddp_config.keep_fp8_transpose_cache
                 and self.ddp_config.data_parallel_sharding_strategy == "optim_grads_params"
@@ -918,8 +861,15 @@ def _register_grad_acc_and_reduce_hook(module):
                     module.register_forward_hook(_release_module_fp8_transpose_cache, prepend=False)
                 )
 
-            if not self.enable_fine_grained_param_gather_hook:
-                _register_grad_acc_and_reduce_hook(module)
+            # Register the post-backward hook to deallocate model parameters and
+            # reduce-scatter gradients immediately after the module backward pass
+            # has completed to conserve memory for the subsequent backward pass.
+            self.forward_pre_hooks[f"module {name} register post-backward hook"] = (
+                module.register_forward_pre_hook(
+                    functools.partial(_register_post_backward_hook, _post_backward),
+                    with_kwargs=True,
+                )
+            )
 
         # Register root module pre- and post-backward hooks in cases where the
         # forward function of root module is not called, but rather the forward
@@ -1036,7 +986,7 @@ def start_param_sync(self, *unused, force_sync: bool = False, force_dispatch: bo
         else:
             self.synchronize_param_gather()
             for bucket_id in range(self.all_gather_pipeline.num_buckets):
-                self.all_gather_pipeline.async_bucket_gather(bucket_id=bucket_id, bwd=False)
+                self.all_gather_pipeline.async_bucket_gather(bucket_id=bucket_id)
                 group = self.param_and_grad_buffer.parameter_groups[bucket_id]
                 if group.model_weight_buffer is None:
                     continue
@@ -1044,10 +994,9 @@ def start_param_sync(self, *unused, force_sync: bool = False, force_dispatch: bo
                 if group.model_weight_buffer.is_data_distributed:
                     # If model weight is sharded, we wait for the all-gather to complete and
                     # then release the bucket immediately to save memory usage.
-                    self.all_gather_pipeline.wait_bucket_ready(bucket_id, False)
-
+                    self.all_gather_pipeline.wait_bucket_ready(bucket_id)
             for bucket_id in range(self.all_gather_pipeline.num_buckets):
-                self.all_gather_pipeline.wait_bucket_ready(bucket_id, False)
+                self.all_gather_pipeline.wait_bucket_ready(bucket_id)
 
     def start_grad_sync(self, *unused):
         """
diff --git a/megatron/core/distributed/fsdp/src/megatron_fsdp/mixed_precision.py b/megatron/core/distributed/fsdp/src/megatron_fsdp/mixed_precision.py
deleted file mode 100644
index 69a049ad955..00000000000
--- a/megatron/core/distributed/fsdp/src/megatron_fsdp/mixed_precision.py
+++ /dev/null
@@ -1,331 +0,0 @@
-# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-from importlib.metadata import version
-from typing import List, Optional, Tuple
-
-import torch
-from packaging.version import Version as PkgVersion
-
-logger = logging.getLogger(__name__)
-
-# Detect if Transformer Engine is installed
-try:
-    import transformer_engine  # pylint: disable=W0611
-    from transformer_engine.pytorch.module.base import TransformerEngineBaseModule
-
-    HAVE_TE = True
-except (ImportError, ModuleNotFoundError):
-    TransformerEngineBaseModule = None
-    HAVE_TE = False
-    logger.info("Using Megatron-FSDP without Transformer Engine.")
-
-# Detect the Transformer Engine version
-try:
-    import transformer_engine as te
-
-    if hasattr(te, "__version__"):
-        TE_VERSION = PkgVersion(str(te.__version__))
-    else:
-        TE_VERSION = PkgVersion(version("transformer-engine"))
-except:
-    TE_VERSION = None
-
-# Detect the FP8 tensor class
-try:
-    from transformer_engine.pytorch.tensor import QuantizedTensor
-
-    HAVE_TE_FP8_TENSOR_CLASS = True
-    FP8_TENSOR_CLASS = QuantizedTensor
-except:
-    try:
-        from transformer_engine.pytorch.float8_tensor import Float8Tensor
-
-        HAVE_TE_FP8_TENSOR_CLASS = True
-        FP8_TENSOR_CLASS = Float8Tensor
-    except:
-        HAVE_TE_FP8_TENSOR_CLASS = False
-
-# Detect the MXFP8 tensor class
-try:
-    from transformer_engine.pytorch.tensor.mxfp8_tensor import MXFP8Tensor
-
-    HAVE_TE_MXFP8TENSOR = True
-except:
-    HAVE_TE_MXFP8TENSOR = False
-
-# Detect the Blockwise FP8 tensor class
-try:
-    from transformer_engine.pytorch.tensor.float8_blockwise_tensor import Float8BlockwiseQTensor
-
-    HAVE_TE_BLOCKWISE_FP8TENSOR = True
-except:
-    HAVE_TE_BLOCKWISE_FP8TENSOR = False
-
-# Detect the "cast_master_weights_to_fp8" function of Transformer Engine
-try:
-    from transformer_engine.pytorch.tensor.utils import cast_master_weights_to_fp8
-
-    HAVE_TE_CAST_MASTER_WEIGHTS_TO_FP8 = True
-except:
-    HAVE_TE_CAST_MASTER_WEIGHTS_TO_FP8 = False
-
-    # Try to import multi_tensor_apply, used in the fallback of fp8 quantization.
-    try:
-        from transformer_engine.pytorch.optimizers import multi_tensor_applier, multi_tensor_scale
-
-        multi_tensor_scale_impl = multi_tensor_scale
-    except ImportError:
-        try:
-            import amp_C
-            from apex.multi_tensor_apply import multi_tensor_applier
-
-            multi_tensor_scale_impl = amp_C.multi_tensor_scale
-        except ImportError:
-            import warnings
-
-            warnings.warn(
-                "Transformer Engine and Apex are not installed. "
-                "Falling back to local implementations of "
-                "multi_tensor_applier and multi_tensor_scale"
-            )
-
-            def local_multi_tensor_applier(op, noop_flag_buffer, tensor_lists, *args):
-                """Multi tensor op applier"""
-                return op(2048 * 32, noop_flag_buffer, tensor_lists, *args)
-
-            def local_multi_tensor_scale(chunk_size, noop_flag, tensor_lists, scale):
-                """Works as a drop-in replacement for amp_C.multi_tensor_scale."""
-                for src, dst in zip(tensor_lists[0], tensor_lists[1]):
-                    dst.copy_(src * scale)
-
-            multi_tensor_applier = local_multi_tensor_applier
-            multi_tensor_scale_impl = local_multi_tensor_scale
-
-    def _multi_tensor_copy_this_to_that(
-        this: List[torch.Tensor],
-        that: List[torch.Tensor],
-        overflow_buf: Optional[torch.Tensor] = None,
-    ):
-        """
-        Use multi-tensor-applier to copy values from one list to another.
-        We don't have a bfloat16 implementation so for now if the overflow_buf
-        is not provided, we default back to simple loop copy to be compatible
-        with bfloat16.
-        """
-        if overflow_buf is not None:
-            overflow_buf.fill_(0)
-            # Scaling with factor `1.0` is equivalent to copy.
-            multi_tensor_applier(multi_tensor_scale_impl, overflow_buf, [this, that], 1.0)
-        else:
-            for this_, that_ in zip(this, that):
-                that_.copy_(this_)
-
-
-# Detect the "post_all_gather_processing" function of Transformer Engine
-try:
-    from transformer_engine.pytorch.tensor.utils import post_all_gather_processing
-
-    HAVE_TE_POST_ALL_GATHER_PROCESSING = True
-except:
-    HAVE_TE_POST_ALL_GATHER_PROCESSING = False
-
-
-def is_te_min_version(vers, check_equality=True):
-    """Check if minimum version of `transformer-engine` is installed."""
-    if not isinstance(TE_VERSION, PkgVersion):
-        return False
-
-    if check_equality:
-        return TE_VERSION >= PkgVersion(vers)
-    else:
-        return TE_VERSION > PkgVersion(vers)
-
-
-def is_float8tensor(tensor: torch.Tensor) -> bool:
-    """Check if a tensor is a FP8 tensor."""
-    return HAVE_TE and isinstance(tensor, FP8_TENSOR_CLASS)
-
-
-def is_blockwise_float8tensor(tensor: torch.Tensor) -> bool:
-    """Check if a tensor is a Blockwise FP8 tensor."""
-    return HAVE_TE_BLOCKWISE_FP8TENSOR and isinstance(tensor, Float8BlockwiseQTensor)
-
-
-def fp8_need_transpose_data(tensor: torch.Tensor) -> bool:
-    """Check if a FP8 tensor needs transpose data."""
-    return HAVE_TE_MXFP8TENSOR and isinstance(tensor, MXFP8Tensor)
-
-
-def fp8_need_transpose_data_for_meta_device_init(module: TransformerEngineBaseModule) -> bool:
-    """Check if a FP8 tensor needs transpose data, for meta device init scenario."""
-    return HAVE_TE_MXFP8TENSOR and module.fp8_meta["recipe"].mxfp8()
-
-
-def fp8_discard_transpose_cache(tensor: torch.Tensor) -> None:
-    """Discard the transpose cache of a FP8 tensor."""
-    assert is_float8tensor(tensor), f"Type {type(tensor)} is not a FP8 tensor"
-
-    if hasattr(tensor, "_transpose_invalid"):
-        tensor._transpose_invalid = True
-        tensor._transpose = None
-    elif not fp8_need_transpose_data(tensor):
-        tensor.update_usage(rowwise_usage=True, columnwise_usage=False)
-
-
-def fp8_create_transpose_cache(tensors: List[torch.Tensor]) -> None:
-    """Create the transpose cache of a FP8 tensor."""
-    if HAVE_TE_POST_ALL_GATHER_PROCESSING:
-        post_all_gather_processing(tensors)
-    else:
-        _fp8_create_transpose_cache_fallback(tensors)
-
-
-def _fp8_create_transpose_cache_fallback(tensors: List[torch.Tensor]) -> None:
-    if not isinstance(tensors, list):
-        tensors = [tensors]
-    for tensor in tensors:
-        assert is_float8tensor(tensor), f"Type {type(tensor)} is not a FP8 tensor"
-        if hasattr(tensor, "_create_transpose"):
-            tensor._create_transpose()
-        else:
-            tensor._create_columnwise()
-
-
-def fp8_set_raw_data(tensor: torch.Tensor, data: torch.Tensor, set_transpose: bool = False) -> None:
-    """Set the raw data of a Transformer Engine Float8Tensor."""
-    assert is_float8tensor(tensor), f"Type {type(tensor)} is not a FP8 tensor"
-
-    if set_transpose:
-        assert fp8_need_transpose_data(tensor), f"Type {type(tensor)} does not need transpose data"
-        data_attr = "_columnwise_data"
-    else:
-        data_attr = "_rowwise_data" if hasattr(tensor, "_rowwise_data") else "_data"
-
-    old_data = getattr(tensor, data_attr)
-    assert old_data.dtype == data.dtype, "The data types of raw data don't match"
-    assert (
-        old_data.shape == data.shape
-    ), f"Shape {old_data.shape} of old_data doesn't match {data.shape} of new_data"
-    setattr(tensor, data_attr, data)
-
-
-def fp8_get_raw_data(tensor: torch.Tensor, get_transpose: bool = False) -> torch.Tensor:
-    """Get the underlying raw storage of a FP8 tensor."""
-    assert is_float8tensor(tensor), f"Type {type(tensor)} is not a FP8 tensor"
-
-    if get_transpose:
-        assert fp8_need_transpose_data(tensor), f"Type {type(tensor)} does not need transpose data"
-        data_attr = "_columnwise_data"
-    else:
-        data_attr = "_rowwise_data" if hasattr(tensor, "_rowwise_data") else "_data"
-
-    return getattr(tensor, data_attr)
-
-
-def fp8_dequantize(tensor: torch.Tensor) -> torch.Tensor:
-    """Dequantize a FP8 tensor to a higher precision."""
-    assert is_float8tensor(tensor), f"Type {type(tensor)} is not a FP8 tensor"
-    assert is_te_min_version(
-        "2.0"
-    ), "Transformer Engine >= 2.0 is required for dequantizing parameters."
-    return tensor.dequantize()
-
-
-def fp8_quantize(
-    model_params: List[torch.Tensor],
-    main_params: List[torch.Tensor],
-    start_offsets: List[int],
-    data_parallel_group: torch.distributed.ProcessGroup,
-    fsdp_shard_model_params: List[Tuple[torch.Tensor, Optional[torch.Tensor]]],
-) -> None:
-    """Quantize sharded parameters to FP8."""
-    if len(model_params) == 0:
-        return
-    fsdp_shard_model_params = [x[0] if x[1] is None else x for x in fsdp_shard_model_params]
-
-    if HAVE_TE_CAST_MASTER_WEIGHTS_TO_FP8:
-        cast_master_weights_to_fp8(
-            model_params, main_params, start_offsets, data_parallel_group, fsdp_shard_model_params
-        )
-    else:
-        _fp8_quantize_fallback(
-            model_params, main_params, start_offsets, data_parallel_group, fsdp_shard_model_params
-        )
-
-
-def _fp8_quantize_fallback(
-    model_params: List[torch.Tensor],
-    main_params: List[torch.Tensor],
-    start_offsets: List[int],
-    data_parallel_group: torch.distributed.ProcessGroup,
-    fsdp_shard_model_params: List[Tuple[torch.Tensor, Optional[torch.Tensor]]],
-) -> None:
-    for model_param, main_param, start_offset, fsdp_shard_model_param in zip(
-        model_params, main_params, start_offsets, fsdp_shard_model_params
-    ):
-        if main_param is None:
-            continue
-
-        if fsdp_shard_model_param is not None:
-            shard_model_param = fsdp_shard_model_param
-        else:
-            shard_model_param = model_param._data.view(-1)[
-                start_offset : start_offset + main_param.numel()
-            ]
-
-        quantizer = model_param._quantizer
-        # When not using fp8 params, the main_param (fp32) is first cast to bf16/fp16, and then
-        # cast to fp8 during forward. This logic keeps numerical consistency with bf16 params.
-        main_param = main_param.to(model_param.dtype)
-        out = Float8Tensor(
-            shape=main_param.size(),
-            dtype=model_param.dtype,
-            requires_grad=False,
-            data=shard_model_param,
-            fp8_scale_inv=model_param._scale_inv,
-            fp8_dtype=model_param._fp8_dtype,
-            quantizer=quantizer,
-        )
-        quantizer.update_quantized(main_param, out)
-
-        amaxes = []
-        scales = []
-        scale_invs = []
-        for model_param in model_params:
-            quantizer = model_param._quantizer
-            amaxes.append(quantizer.amax.view(1))
-            scales.append(quantizer.scale.view(1))
-            scale_invs.append(model_param._scale_inv.view(1))
-            model_param._reset_caches()
-
-        dummy_overflow_buf = torch.tensor([0], dtype=torch.int, device="cuda")
-
-        # Update scaling factors.
-        packed_scales = torch.empty(len(scales), dtype=torch.float32, device=scales[0].device)
-        packed_scale_views = [packed_scales[i].view(1) for i in range(len(scales))]
-        _multi_tensor_copy_this_to_that(scales, packed_scale_views, dummy_overflow_buf)
-        torch.reciprocal(packed_scales, out=packed_scales)
-        _multi_tensor_copy_this_to_that(packed_scale_views, scale_invs, dummy_overflow_buf)
-
-        # Reduce amaxes.
-        # Note: Assume each param has a separate amax.
-        packed_amaxes = torch.empty(len(amaxes), dtype=torch.float32, device=amaxes[0].device)
-        packed_amax_views = [packed_amaxes[i].view(1) for i in range(len(amaxes))]
-        _multi_tensor_copy_this_to_that(amaxes, packed_amax_views, dummy_overflow_buf)
-        torch.distributed.all_reduce(
-            packed_amaxes, op=torch.distributed.ReduceOp.MAX, group=data_parallel_group
-        )
-        _multi_tensor_copy_this_to_that(packed_amax_views, amaxes, dummy_overflow_buf)
diff --git a/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py b/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py
index 46b97743385..cdd9d8bf0a1 100644
--- a/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py
+++ b/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py
@@ -33,17 +33,6 @@
 from torch.distributed.tensor import DTensor, Replicate, Shard
 from torch.distributed.tensor.device_mesh import _mesh_resources
 
-from .mixed_precision import (
-    fp8_discard_transpose_cache,
-    fp8_get_raw_data,
-    fp8_need_transpose_data,
-    fp8_need_transpose_data_for_meta_device_init,
-    fp8_quantize,
-    fp8_set_raw_data,
-    is_blockwise_float8tensor,
-    is_float8tensor,
-    is_te_min_version,
-)
 from .uneven_dtensor import update_uneven_dtensor_chunk_metadata, validate_uneven_dtensor
 from .utils import (
     _MODEL_PARALLEL_RNG_TRACKER_NAME,
@@ -62,15 +51,27 @@
     from megatron.core.distributed.distributed_data_parallel_config import (
         DistributedDataParallelConfig,
     )
+    from megatron.core.fp8_utils import (
+        is_float8tensor,
+        modify_underlying_storage,
+        quantize_param_shard,
+    )
     from megatron.core.tensor_parallel import get_cuda_rng_tracker
-    from megatron.core.utils import is_submodule
+    from megatron.core.utils import is_submodule, is_te_min_version
 
     logger.info("Detected Megatron Core, using Megatron-FSDP with Megatron.")
 
 except ImportError:
     # Megatron-LM is not installed, use Megatron-FSDP as a standalone module.
     from .distributed_data_parallel_config import DistributedDataParallelConfig
-    from .utils import get_cuda_rng_tracker, is_submodule
+    from .utils import (
+        get_cuda_rng_tracker,
+        is_float8tensor,
+        is_submodule,
+        is_te_min_version,
+        modify_underlying_storage,
+        quantize_param_shard,
+    )
 
     logger.info("Megatron Core is not installed, Megatron-FSDP will run without Megatron Core.")
 
@@ -816,7 +817,7 @@ def __init__(
         data_parallel_group: Optional[torch.distributed.ProcessGroup] = None,
         dp_rank: Optional[int] = None,
         temporary_bucket_allocator: Optional[TemporaryBucketAllocator] = None,
-        is_transpose_buffer: bool = False,
+        is_dtype_float8: bool = False,
         gradient_scaling_factor: Optional[float] = None,
         chunk_size_factor: int = 1,
         mem_alloc_context: Optional[Callable] = None,
@@ -849,7 +850,7 @@ def __init__(
         self.temporary_bucket_allocator = (
             temporary_bucket_allocator if temporary_bucket_allocator else TemporaryBucketAllocator()
         )
-        self.is_transpose_buffer = is_transpose_buffer
+        self.is_dtype_float8 = is_dtype_float8
         self.gradient_scaling_factor = gradient_scaling_factor
         self.mem_alloc_context = mem_alloc_context if mem_alloc_context else nullcontext
 
@@ -945,11 +946,11 @@ def fetch_bucket(
             for p in self.params:
                 item_id = self.param_idx[p]
                 p = to_local_if_dtensor(p)
-                data = self.get_item_from_bucket(bucket, item_id).view(p.shape)
                 if is_float8tensor(p):
-                    fp8_set_raw_data(p, data, self.is_transpose_buffer)
+                    p._data = self.get_item_from_bucket(bucket, item_id).view(p.shape)
                 else:
-                    p.data = data
+                    p.data = self.get_item_from_bucket(bucket, item_id).view(p.shape)
+
         return bucket
 
     def free_bucket_storage(self):
@@ -1118,9 +1119,6 @@ def set_item(self, item_id: int, item_data: torch.Tensor) -> None:
         # When fully sharded, we need to get the slice of the item to be stored in this shard.
         # Otherwise, we can just flatten the entire item since this buffer contains
         # the entire bucket.
-        if is_float8tensor(item_data):
-            item_data = fp8_get_raw_data(item_data, self.is_transpose_buffer)
-
         if self.is_data_distributed:
             # Get the coordinates of the slice of the item that is contained in this shard.
             slice_start, slice_end = self._get_item_slice_in_shard(item_id)
@@ -1227,8 +1225,6 @@ class ParameterGroup:
             Factor determining chunk size for grouped parameter processing.
         model_weight_buffer (Optional[DataParallelBuffer]):
             Buffer used to store model weights for data-parallel operations.
-        transpose_weight_buffer (Optional[DataParallelBuffer]):
-            Buffer used to store transpose weights for data-parallel operations.
         main_weight_buffer (Optional[DataParallelBuffer]):
             Buffer used to store main model weights for data-parallel operations.
         main_grad_buffer (Optional[DataParallelBuffer]):
@@ -1248,7 +1244,6 @@ class ParameterGroup:
     fsdp_unit_id: Optional[int] = None
     chunk_size_factor: int = 1
     model_weight_buffer: Optional[DataParallelBuffer] = None
-    transpose_weight_buffer: Optional[DataParallelBuffer] = None
     main_weight_buffer: Optional[DataParallelBuffer] = None
     main_grad_buffer: Optional[DataParallelBuffer] = None
     hsdp_wbuf: Optional[DataParallelBuffer] = None
@@ -1319,10 +1314,12 @@ def _does_param_require_new_bucket(param):
     parameter_groups = []
     for name, param in module.named_parameters():
         # We need this information to correctly dynamically allocate Tensors!
-        is_fp8 = is_float8tensor(param)
-        is_fp8_meta_device_init = meta_device_init_fp8_params.get(name, (False, False))[0]
         param_attrs = dict(
-            dtype="float8" if (is_fp8 or is_fp8_meta_device_init) else param.dtype,
+            dtype=(
+                "float8"
+                if is_float8tensor(param) or meta_device_init_fp8_params.get(name, False)
+                else param.dtype
+            ),
             is_expert_param=is_expert_parameter(name, param),
             requires_grad=param.requires_grad,
             fsdp_unit_id=None,
@@ -1645,10 +1642,7 @@ def __init__(
                     # to determine whether this parameter is fp8 or not.
                     fp8_meta_index = m.param_init_meta[name].fp8_meta_index
                     if m.primary_weights_in_fp8 and fp8_meta_index is not None:
-                        meta_device_init_fp8_params[self.param_to_name[param]] = (
-                            True,
-                            fp8_need_transpose_data_for_meta_device_init(m),
-                        )
+                        meta_device_init_fp8_params[self.param_to_name[param]] = True
 
         # Get the parameter groups.
         (self.parameter_groups, self.param_to_param_group, self.bucket_to_bucket_group) = (
@@ -1775,7 +1769,6 @@ def _bytes_to_mb(bytes_val: int) -> str:
             numel = sum(to_local_if_dtensor(p).shape.numel() for p in group.params)
             buffers = {
                 "weight": group.model_weight_buffer,
-                "transpose_weight": group.transpose_weight_buffer,
                 "main_weight": group.main_weight_buffer,
                 "grad": group.main_grad_buffer,
             }
@@ -1845,18 +1838,12 @@ def _init_each_parameter_group_buffers(self, meta_device_init_fp8_params):
             self.weight_alloc = FixedPoolAllocator(
                 name="fsdp_params", fsdp_param_groups=self.parameter_groups, size=UB_BUFFER_NUM
             )
-            self.transpose_weight_alloc = FixedPoolAllocator(
-                name="fsdp_fp8_transpose_params",
-                fsdp_param_groups=self.parameter_groups,
-                size=UB_BUFFER_NUM,
-            )
             self.main_grad_alloc = FixedPoolAllocator(
                 name="fsdp_grads", fsdp_param_groups=self.parameter_groups, size=UB_BUFFER_NUM
             )
             self.double_buf_units = self.weight_alloc.fsdp_double_buffer_units
         else:
             self.weight_alloc = StorageResizeBasedBucketAllocator()
-            self.transpose_weight_alloc = StorageResizeBasedBucketAllocator()
             self.main_grad_alloc = None
             self.double_buf_units = []
 
@@ -1896,9 +1883,8 @@ def _init_each_parameter_group_buffers(self, meta_device_init_fp8_params):
             )
             # Check if the parameter group is FP8.
             one_param = group.params[0]
-            is_dtype_float8 = (
-                is_float8tensor(one_param)
-                or meta_device_init_fp8_params.get(self.param_to_name[one_param], (False, False))[0]
+            is_dtype_float8 = is_float8tensor(one_param) or meta_device_init_fp8_params.get(
+                self.param_to_name[one_param], False
             )
             if is_dtype_float8:
                 param_dtype = torch.uint8
@@ -1907,16 +1893,6 @@ def _init_each_parameter_group_buffers(self, meta_device_init_fp8_params):
                 param_dtype = group.params[0].dtype
                 grad_dtype = param_dtype
 
-            # Check if the parameter group needs a transpose buffer for model weights.
-            # Currently, only mxfp8 needs it.
-            need_transpose_data = is_float8tensor(one_param) and fp8_need_transpose_data(one_param)
-            need_transpose_data_for_meta_device_init = meta_device_init_fp8_params.get(
-                self.param_to_name[one_param], (False, False)
-            )[1]
-            should_create_transpose_weight_buffer = (
-                need_transpose_data or need_transpose_data_for_meta_device_init
-            )
-
             # Check if the parameter group requires a grad buffer or main weight buffer.
             should_create_grad_buffer_or_main_weight_buffer = (
                 not self.only_create_grad_buffer_and_main_weight_buffer_for_param_requires_grad
@@ -1933,29 +1909,13 @@ def _init_each_parameter_group_buffers(self, meta_device_init_fp8_params):
                     dtype=param_dtype,
                     device=self.device,
                     data_parallel_group=main_buf_dp_group,
-                    is_transpose_buffer=False,
+                    is_dtype_float8=is_dtype_float8,
                     temporary_bucket_allocator=self.weight_alloc,
                     bucket_id=group_id,
                     chunk_size_factor=group.chunk_size_factor,
                     mem_alloc_context=self.mem_alloc_context,
                     **main_buf_extra_kwargs,
                 )
-                if should_create_transpose_weight_buffer:
-                    group.transpose_weight_buffer = DataParallelBuffer(
-                        self.ddp_config,
-                        group.params,
-                        is_data_distributed=is_model_weight_buffer_distributed
-                        and main_buf_dp_group.size() > 1,
-                        dtype=param_dtype,
-                        device=self.device,
-                        data_parallel_group=main_buf_dp_group,
-                        is_transpose_buffer=True,
-                        temporary_bucket_allocator=self.transpose_weight_alloc,
-                        bucket_id=group_id,
-                        chunk_size_factor=group.chunk_size_factor,
-                        mem_alloc_context=self.mem_alloc_context,
-                        **main_buf_extra_kwargs,
-                    )
 
             # Initialize the main weight buffer.
             if should_create_grad_buffer_or_main_weight_buffer and preserve_fp32_weights:
@@ -1987,7 +1947,7 @@ def _init_each_parameter_group_buffers(self, meta_device_init_fp8_params):
                     dtype=torch.float32 if grad_reduce_in_fp32 else grad_dtype,
                     device=self.device,
                     data_parallel_group=main_buf_dp_group,
-                    is_transpose_buffer=False,
+                    is_dtype_float8=False,
                     temporary_bucket_allocator=self.main_grad_alloc,
                     gradient_scaling_factor=gradient_scaling_factor,
                     bucket_id=group_id,
@@ -2011,7 +1971,7 @@ def _init_each_parameter_group_buffers(self, meta_device_init_fp8_params):
                     dtype=wbuf.dtype,
                     device=wbuf.device,
                     data_parallel_group=hsdp_buf_dp_group,
-                    is_transpose_buffer=False,
+                    is_dtype_float8=wbuf.is_dtype_float8,
                     temporary_bucket_allocator=self.weight_alloc,
                     bucket_id=group_id,
                     chunk_size_factor=group.chunk_size_factor,
@@ -2027,9 +1987,6 @@ def _init_each_parameter_group_buffers(self, meta_device_init_fp8_params):
                     ),
                 )
 
-                if group.transpose_weight_buffer is not None:
-                    raise NotImplementedError("HSDP for transpose buffer is not implemented yet")
-
                 if should_create_grad_buffer_or_main_weight_buffer:
                     # Initialize the HSDP grad buffer.
                     gbuf = group.main_grad_buffer
@@ -2041,7 +1998,7 @@ def _init_each_parameter_group_buffers(self, meta_device_init_fp8_params):
                         dtype=gbuf.dtype,
                         device=gbuf.device,
                         data_parallel_group=hsdp_buf_dp_group,
-                        is_transpose_buffer=False,
+                        is_dtype_float8=gbuf.is_dtype_float8,
                         temporary_bucket_allocator=self.main_grad_alloc,
                         gradient_scaling_factor=gradient_scaling_factor,
                         bucket_id=group_id,
@@ -2124,20 +2081,6 @@ def _init_each_parameter_group_buffers(self, meta_device_init_fp8_params):
                             torch.empty(wbuf.data_size, dtype=wbuf.dtype, device=self.device)
                         )
                 bucket = wbuf.fetch_bucket()
-
-            tbuf = group.transpose_weight_buffer
-            if tbuf:
-                with self.mem_alloc_context():
-                    if group.hsdp_wbuf:
-                        raise NotImplementedError(
-                            "HSDP for transpose buffer is not implemented yet"
-                        )
-                    else:
-                        tbuf.init_data(
-                            torch.empty(tbuf.data_size, dtype=tbuf.dtype, device=self.device)
-                        )
-                transpose_bucket = tbuf.fetch_bucket()
-
             mbuf = group.main_weight_buffer
             if mbuf:
                 # Manually instantiate an empty tensor into the main weight buffer.
@@ -2191,41 +2134,25 @@ def _init_each_parameter_group_buffers(self, meta_device_init_fp8_params):
                             if not self.ddp_config.keep_fp8_transpose_cache:
                                 for _param in m.parameters(recurse=False):
                                     if is_float8tensor(_param):
-                                        fp8_discard_transpose_cache(_param)
+                                        _param._transpose_invalid = True
+                                        _param._transpose = None
                     # Raise error if a meta parameter still exists after initialization.
                     assert not p.is_meta, (self.param_to_name[p], module_reset_flag)
 
-                    p_local = to_local_if_dtensor(p)
-
                     # Copy the model weight parameter tensor into the buffer.
                     # When distributed, this shards and preserves the data across all ranks.
-                    wbuf.set_item(item_id, p_local)
-                    if tbuf:
-                        tbuf.set_item(item_id, p_local)
+                    wbuf.set_item(item_id, to_local_if_dtensor(p))
 
                     # Retrieve the newly allocated parameter data from the global bucket.
                     # Attach the bucket-allocated parameter data to the module parameter,
                     # to use the bucket-allocated data for autograd and NCCL.
-                    new_param_data = wbuf.get_item_from_bucket(bucket, item_id).view(p_local.shape)
-                    if tbuf:
-                        new_transpose_data = tbuf.get_item_from_bucket(
-                            transpose_bucket, item_id
-                        ).view(p_local.shape)
-                    else:
-                        new_transpose_data = None
-
-                    if is_float8tensor(p_local):
-                        old_param_data = fp8_get_raw_data(p_local)
-                        assert old_param_data._base is None
-                        new_param_data.detach().copy_(old_param_data)
-                        fp8_set_raw_data(p_local, new_param_data)
-                        del old_param_data
-                        if new_transpose_data is not None:
-                            old_transpose_data = fp8_get_raw_data(p_local, True)
-                            assert old_transpose_data._base is None
-                            new_transpose_data.detach().copy_(old_transpose_data)
-                            fp8_set_raw_data(p_local, new_transpose_data, True)
-                            del old_transpose_data
+                    new_param_data = wbuf.get_item_from_bucket(bucket, item_id).view(
+                        to_local_if_dtensor(p).shape
+                    )
+                    if is_float8tensor(p):
+                        # Needed to instantiate FP8 parameters. Requires installing
+                        # TransformerEngine.
+                        modify_underlying_storage(p, new_param_data)
                     elif isinstance(p, DTensor):
                         old_param_data = p._local_tensor.data
                         p._local_tensor.data = new_param_data
@@ -2263,12 +2190,7 @@ def _init_each_parameter_group_buffers(self, meta_device_init_fp8_params):
                         # the (high-precision) main weight buffer.
                         # Nothing else needs to be done, because the main weights
                         # do not require autograd operations, only possibly sharding.
-                        p_local = to_local_if_dtensor(p)
-                        assert not is_float8tensor(p_local), (
-                            self.param_to_name[p],
-                            "fp8 param should use get_high_precision_init_val method.",
-                        )
-                        mbuf.set_item(item_id, p_local)
+                        mbuf.set_item(item_id, to_local_if_dtensor(p))
 
             if wbuf and wbuf.is_data_distributed:
                 # Free the memory backing the temporarily-allocated bucket associated
@@ -2280,9 +2202,6 @@ def _init_each_parameter_group_buffers(self, meta_device_init_fp8_params):
                 # before forward activations and gradients are allocated in training.
                 wbuf.free_bucket_storage()
 
-            if tbuf and tbuf.is_data_distributed:
-                tbuf.free_bucket_storage()
-
         # Allocate the main_weight buffer and main_grad buffer data in one buffer.
         if self.buffer_all_in_one:
             with self.mem_alloc_context():
@@ -2406,7 +2325,6 @@ def _reset_parameters(self, old_params, new_params):
                 group.params[item_id] = new_p
                 for buf in [
                     group.model_weight_buffer,
-                    group.transpose_weight_buffer,
                     group.main_weight_buffer,
                     group.main_grad_buffer,
                     group.hsdp_wbuf,
@@ -2454,7 +2372,6 @@ def _init_distributed_params(self):
         dist_main_weight = {}
         for pg in self.parameter_groups:
             wbuf = pg.model_weight_buffer
-            tbuf = pg.transpose_weight_buffer
             mbuf = pg.main_weight_buffer
             for item_id, orig_param in enumerate(pg.params):
                 param_name = self.param_to_name[orig_param]
@@ -2482,7 +2399,6 @@ def _init_distributed_params(self):
                     )
                     dist_main_weight[param_name] = dist_param
                 elif wbuf:
-                    assert tbuf is None, "Transpose buffer should only exist when main params exist"
                     dist_param = make_fsdp_dtensor(
                         local_tensor=wbuf.get_item(item_id, only_shard=sharded_optimizer_state),
                         param=orig_param,
@@ -2652,54 +2568,9 @@ def copy_main_weights_to_model_weights(self):
         expert_param_quantize_kwargs = copy.deepcopy(dense_param_quantize_kwargs)
         data_parallel_group = None
         expert_data_parallel_group = None
-        clear_quantize_kwargs = lambda kwargs: [d.clear() for d in kwargs.values()]
-
-        def _fp8_quantize_params(dense_param_quantize_kwargs, expert_param_quantize_kwargs):
-            if len(dense_param_quantize_kwargs["model_params"]) > 0:
-                # If we have FP8 parameters, we need to quantize them.
-                fp8_quantize(data_parallel_group=data_parallel_group, **dense_param_quantize_kwargs)
-
-            if len(expert_param_quantize_kwargs["model_params"]) > 0:
-                # If we have FP8 expert parameters, we need to quantize them.
-                fp8_quantize(
-                    data_parallel_group=expert_data_parallel_group, **expert_param_quantize_kwargs
-                )
-
-            clear_quantize_kwargs(dense_param_quantize_kwargs)
-            clear_quantize_kwargs(expert_param_quantize_kwargs)
-
-        # Special handling of blockwise FP8
-        BATCH_QUANT_MEMORY_LIMIT_BYTES = 5 * 1024**3  # 5 GB
-        blockwise_fp8_weight_buffers = []
-        blockwise_fp8_param_buffers = []
-
-        def _batch_quantize_blockwise_fp8_params(
-            dense_param_quantize_kwargs, expert_param_quantize_kwargs, blockwise_fp8_param_buffers
-        ):
-            if len(blockwise_fp8_param_buffers) == 0:
-                return
-
-            # Copy original param shards into their blockwise FP8 working buffers
-            for bufs in blockwise_fp8_param_buffers:
-                bufs["bucket_param"].copy_(bufs["param"])
-
-            # Apply FP8 quantization to blockwise FP8 parameters
-            _fp8_quantize_params(dense_param_quantize_kwargs, expert_param_quantize_kwargs)
-
-            # Copy quantized params back from working buffers to original param tensors
-            for bufs in blockwise_fp8_param_buffers:
-                bufs["param"].copy_(bufs["bucket_param"])
-            blockwise_fp8_param_buffers.clear()
-
-            # Free bucket storage for blockwise FP8 weight buffers
-            for wbuf in blockwise_fp8_weight_buffers:
-                wbuf.free_bucket_storage()
-            blockwise_fp8_weight_buffers.clear()
-
         for pg in self.parameter_groups:
             mbuf = pg.main_weight_buffer
             wbuf = pg.model_weight_buffer
-            tbuf = pg.transpose_weight_buffer
             if mbuf is None:
                 continue
 
@@ -2715,88 +2586,44 @@ def _batch_quantize_blockwise_fp8_params(
             shard_offsets_in_fp8 = quantize_func_kwargs["start_offsets"]
             shard_model_params = quantize_func_kwargs["fsdp_shard_model_params"]
 
-            has_blockwise_fp8_param = False
             for param in pg.params:
                 item_id = mbuf.param_idx[param]
                 if wbuf:
                     if wbuf.is_data_distributed or mbuf.is_data_distributed:
                         model_param = wbuf.get_item(item_id, only_shard=True)
-                        if tbuf:
-                            transpose_param = tbuf.get_item(item_id, only_shard=True)
-                        else:
-                            transpose_param = None
                         main_weight = mbuf.get_item(item_id, only_shard=True)
                     else:
                         model_param = wbuf.get_item(item_id)
-                        if tbuf:
-                            transpose_param = tbuf.get_item(item_id)
-                        else:
-                            transpose_param = None
                         main_weight = mbuf.get_item(item_id)
                 else:
                     assert not mbuf.is_data_distributed
                     model_param = to_local_if_dtensor(param)
                     main_weight = mbuf.get_item(item_id)
 
-                if is_blockwise_float8tensor(param):
-                    fp8_params.append(param)
-                    if model_param.numel() == 0:
-                        shard_fp32_from_fp8.append(None)
-                        shard_offsets_in_fp8.append(None)
-                        shard_model_params.append([None, None])
-                    else:
-                        shard_fp32_from_fp8.append(main_weight)
-                        shard_offsets_in_fp8.append(wbuf.locate_item_in_global_item(item_id)[0])
-                        bucket = wbuf.fetch_bucket()
-                        b_model_param = wbuf.get_item_from_bucket(bucket, item_id)[
-                            slice(*wbuf.locate_item_in_global_item(item_id))
-                        ]
-                        assert (
-                            transpose_param is None
-                        ), "Blockwise FP8 does not support transpose param."
-                        shard_model_params.append([b_model_param, None])
-                        assert b_model_param.numel() == model_param.numel(), (
-                            f"Blockwise FP8 bucket param numel {b_model_param.numel()} does"
-                            f" not match model param numel {model_param.numel()}"
-                            f" name: {self.param_to_name[param]}"
-                        )
-                        blockwise_fp8_param_buffers.append(
-                            {"bucket_param": b_model_param, "param": model_param}
-                        )
-                        has_blockwise_fp8_param = True
-                    continue
-
                 if is_float8tensor(param):
                     fp8_params.append(param)
                     if model_param.numel() == 0:
                         shard_fp32_from_fp8.append(None)
                         shard_offsets_in_fp8.append(None)
-                        shard_model_params.append([None, None])
+                        shard_model_params.append(None)
                     else:
                         shard_fp32_from_fp8.append(main_weight)
                         shard_offsets_in_fp8.append(wbuf.locate_item_in_global_item(item_id)[0])
-                        shard_model_params.append([model_param, transpose_param])
+                        shard_model_params.append(model_param)
                     continue
 
                 if model_param.numel() > 0:
                     model_param.data.copy_(main_weight.view(model_param.shape))
 
-            if has_blockwise_fp8_param:
-                blockwise_fp8_weight_buffers.append(wbuf)
-                if (
-                    sum([wbuf.bucket_index.size for wbuf in blockwise_fp8_weight_buffers])
-                    > BATCH_QUANT_MEMORY_LIMIT_BYTES
-                ):
-                    _batch_quantize_blockwise_fp8_params(
-                        dense_param_quantize_kwargs,
-                        expert_param_quantize_kwargs,
-                        blockwise_fp8_param_buffers,
-                    )
+        if len(dense_param_quantize_kwargs["model_params"]) > 0:
+            # If we have FP8 parameters, we need to quantize them.
+            dense_param_quantize_kwargs["data_parallel_group"] = data_parallel_group
+            quantize_param_shard(**dense_param_quantize_kwargs)
 
-        _batch_quantize_blockwise_fp8_params(
-            dense_param_quantize_kwargs, expert_param_quantize_kwargs, blockwise_fp8_param_buffers
-        )
-        _fp8_quantize_params(dense_param_quantize_kwargs, expert_param_quantize_kwargs)
+        if len(expert_param_quantize_kwargs["model_params"]) > 0:
+            # If we have FP8 expert parameters, we need to quantize them.
+            expert_param_quantize_kwargs["data_parallel_group"] = expert_data_parallel_group
+            quantize_param_shard(**expert_param_quantize_kwargs)
 
     @torch.no_grad()
     def copy_model_weights_to_main_weights(self):
@@ -2814,7 +2641,6 @@ def copy_model_weights_to_main_weights(self):
                 f"Master weight buffer size {mbuf.data.numel()} does not match "
                 f"model weight buffer size {copyin_data.numel()}"
             )
-            # TODO(mxfp8): Make sure it's not a fp8 buf?
             mbuf.data.copy_(copyin_data.data)
 
     def all_gather_parameters(self, async_op: bool = True):
@@ -2832,18 +2658,15 @@ def all_gather_parameters(self, async_op: bool = True):
 
         all_gather_ops = []
         for g in self.parameter_groups:
-            for buf in [g.model_weight_buffer, g.transpose_weight_buffer]:
-                if buf is None:
-                    continue
-                shard = buf.get_shard_from_local_buffer()
-                all_gather_handler = torch.distributed.all_gather_into_tensor(
-                    output_tensor=buf.data,
-                    input_tensor=shard,
-                    group=buf.data_parallel_group,
-                    async_op=async_op,
-                )
-                if async_op:
-                    all_gather_ops.append(all_gather_handler)
+            shard = g.model_weight_buffer.get_shard_from_local_buffer()
+            all_gather_handler = torch.distributed.all_gather_into_tensor(
+                output_tensor=g.model_weight_buffer.data,
+                input_tensor=shard,
+                group=g.model_weight_buffer.data_parallel_group,
+                async_op=async_op,
+            )
+            if async_op:
+                all_gather_ops.append(all_gather_handler)
 
         for op in all_gather_ops:
             op.wait()
@@ -2864,7 +2687,7 @@ def reduce_scatter_gradients(self, async_op: bool = True):
         reduce_scatter_ops = []
         for g in self.parameter_groups:
             gbuf = g.main_grad_buffer
-            if gbuf is None:
+            if gbuf is not None:
                 continue
             scaling_factor = gbuf.gradient_scaling_factor
             reduce_op = gradient_reduce_preprocessing(gbuf.data, scaling_factor, self.ddp_config)
@@ -3314,16 +3137,9 @@ def __init__(
         # Track the status of all-gather operations for each bucket.
         self.param_gather_event_map = {}
         # All buckets are initially deallocated / empty after initialization of ParamAndGradBuffer.
-        self.bucket_status = {}
-        for i in range(self.buffer.num_buckets):
-            for bwd in [False, True]:
-                self.bucket_status[self.get_bucket_key(i, bwd)] = BucketStatus.EMPTY
-
+        self.bucket_status = {i: BucketStatus.EMPTY for i in range(self.buffer.num_buckets)}
         # Track whether each bucket can be deallocated.
-        self.bucket_can_be_released = {}
-        for i in range(self.buffer.num_buckets):
-            for bwd in [False, True]:
-                self.bucket_can_be_released[self.get_bucket_key(i, bwd)] = False
+        self.bucket_can_be_released = {i: False for i in range(self.buffer.num_buckets)}
 
         # Map each bucket to the bucket group it belongs to by enumerated ID.
         # Made to collect a subset of buckets in the same bucket group.
@@ -3348,13 +3164,6 @@ def __init__(
             # all-gather parameters across groups.
             self.outer_fsdp_group_param_gather_stream = torch.cuda.Stream()
 
-    def get_bucket_key(self, bucket_id, bwd):
-        """Get the key for the bucket."""
-        has_transpose_buffer = (
-            self.buffer.parameter_groups[bucket_id].transpose_weight_buffer is not None
-        )
-        return (bucket_id, has_transpose_buffer and bwd)
-
     @property
     def num_buckets(self):
         """Return the number of buckets."""
@@ -3371,11 +3180,10 @@ def reset(self):
                 UserWarning,
             )
             while len(self.param_gather_event_map) > 0:
-                (bucket_id, bwd) = next(iter(self.param_gather_event_map))
-                self.wait_bucket_ready(bucket_id, bwd)
+                bucket_id = next(iter(self.param_gather_event_map))
+                self.wait_bucket_ready(bucket_id)
         for bucket_id in range(self.num_buckets):
-            for bwd in [False, True]:
-                self.bucket_can_be_released[self.get_bucket_key(bucket_id, bwd)] = True
+            self.bucket_can_be_released[bucket_id] = True
         self.recycle_unused_buckets()
 
         assert all([status is BucketStatus.EMPTY for status in self.bucket_status.values()]), (
@@ -3397,7 +3205,6 @@ def all_gather_params(
         suggested_AG_prefetch_size: Optional[int] = None,
         async_param_gather: bool = True,
         outer_fsdp_group_param_gather: bool = False,
-        bwd: bool = False,
     ):
         """All-gather the params. If prefetch is enabled, prefetch next buckets
         in the order of `prefetch_order`.
@@ -3432,7 +3239,7 @@ def all_gather_params(
 
         # Do not release the buckets that are being all-gathered.
         for bucket_id in ag_buckets:
-            self.bucket_can_be_released[self.get_bucket_key(bucket_id, bwd)] = False
+            self.bucket_can_be_released[bucket_id] = False
 
         # If prefetch is enabled, we will add prefetch buckets to ag_buckets.
         if prefetch:
@@ -3504,11 +3311,7 @@ def need_skip_prefetch(bucket_id):
                 bucket_id = next_bucket_id(ag_buckets)
 
         # Only all-gather on buckets that have not been allocated yet.
-        ag_buckets = [
-            bucket_id
-            for bucket_id in ag_buckets
-            if self.bucket_status[self.get_bucket_key(bucket_id, bwd)] == BucketStatus.EMPTY
-        ]
+        ag_buckets = [i for i in ag_buckets if self.bucket_status[i] == BucketStatus.EMPTY]
         if len(ag_buckets) == 0:
             return
 
@@ -3527,7 +3330,6 @@ def need_skip_prefetch(bucket_id):
                 self.ag_stream if self.ag_stream is not None else torch.cuda.current_stream()
             )
             if outer_fsdp_group_param_gather:
-                # TODO(mxfp8): Support hsdp
                 self.outer_fsdp_group_param_gather_stream.wait_stream(torch.cuda.current_stream())
                 with torch.cuda.stream(self.outer_fsdp_group_param_gather_stream):
                     outer_fsdp_group = self.buffer.dist_index.get_outer_fsdp_group()
@@ -3555,13 +3357,12 @@ def need_skip_prefetch(bucket_id):
                     for bucket_id in buckets:
                         # All-gather the module weights from each FSDP buffer shard
                         # into an allocated bucket containing unsharded weights.
-                        self.async_bucket_gather(bucket_id, bwd)
+                        self.async_bucket_gather(bucket_id)
 
             # Replace the parameter all-gather event with coalescing event.
             for bucket_id in buckets:
-                bucket_key = self.get_bucket_key(bucket_id, bwd)
-                _, mark_bucket_ready_to_use = self.param_gather_event_map[bucket_key]
-                self.param_gather_event_map[bucket_key] = (
+                _, mark_bucket_ready_to_use = self.param_gather_event_map[bucket_id]
+                self.param_gather_event_map[bucket_id] = (
                     coalescing_event,
                     mark_bucket_ready_to_use,
                 )
@@ -3569,16 +3370,14 @@ def need_skip_prefetch(bucket_id):
         # Wait for all-gather to finish
         if not async_param_gather:
             for bucket_id in buckets:
-                self.wait_bucket_ready(bucket_id, bwd)
+                self.wait_bucket_ready(bucket_id)
 
-    def wait_bucket_ready(self, bucket_id, bwd, empty_ok=False):
+    def wait_bucket_ready(self, bucket_id, empty_ok=False):
         """Wait for the bucket to be ready."""
-        bucket_key = self.get_bucket_key(bucket_id, bwd)
-
-        if self.bucket_status[bucket_key] == BucketStatus.READY_TO_USE:
+        if self.bucket_status[bucket_id] == BucketStatus.READY_TO_USE:
             # Already ready to use.
             return
-        if self.bucket_status[bucket_key] == BucketStatus.EMPTY:
+        if self.bucket_status[bucket_id] == BucketStatus.EMPTY:
             if empty_ok:
                 return
             # Bucket shouldn't be empty, this implies that the bucket
@@ -3586,64 +3385,48 @@ def wait_bucket_ready(self, bucket_id, bwd, empty_ok=False):
             raise ValueError(f"Bucket {bucket_id} is empty.")
 
         # Wait for asynchronous / overlapped NCCL operations to complete.
-        param_gather_event, mark_bucket_ready_to_use = self.param_gather_event_map.pop(bucket_key)
+        param_gather_event, mark_bucket_ready_to_use = self.param_gather_event_map.pop(bucket_id)
         param_gather_event.wait()
         mark_bucket_ready_to_use()
 
     @torch.no_grad()
-    def release_bucket(self, bucket_id, bwd):
+    def release_bucket(self, bucket_id: int):
         """Release the bucket."""
-        # TODO(mxfp8): In some cases, there won't be ag before bwd?
-        bucket_key = self.get_bucket_key(bucket_id, bwd)
-
-        if self.bucket_status[bucket_key] == BucketStatus.EMPTY:
+        if self.bucket_status[bucket_id] == BucketStatus.EMPTY:
             return
 
-        self.wait_bucket_ready(bucket_id, bwd, empty_ok=True)
-        if self.bucket_status[bucket_key] == BucketStatus.COMMUNICATING:
+        self.wait_bucket_ready(bucket_id, empty_ok=True)
+        if self.bucket_status[bucket_id] == BucketStatus.COMMUNICATING:
             raise ValueError(f"Bucket {bucket_id} is communicating.")
 
-        if bwd and self.buffer.parameter_groups[bucket_id].transpose_weight_buffer is not None:
-            buf = self.buffer.parameter_groups[bucket_id].transpose_weight_buffer
-        else:
-            buf = self.buffer.parameter_groups[bucket_id].model_weight_buffer
-
-        buf.free_bucket_storage()
-        self.bucket_status[bucket_key] = BucketStatus.EMPTY
+        wbuf = self.buffer.parameter_groups[bucket_id].model_weight_buffer
+        wbuf.free_bucket_storage()
+        self.bucket_status[bucket_id] = BucketStatus.EMPTY
 
     def recycle_unused_buckets(self):
         """Recycle the unused buckets."""
-        for bucket_key, can_be_released in self.bucket_can_be_released.items():
+        for bucket_id, can_be_released in self.bucket_can_be_released.items():
             if can_be_released:
-                bucket_id, is_transpose_weight = bucket_key[0], bucket_key[1]
-                self.release_bucket(bucket_id, is_transpose_weight)
-                self.bucket_can_be_released[bucket_key] = False
+                self.release_bucket(bucket_id)
+                self.bucket_can_be_released[bucket_id] = False
 
-    def get_fsdp_buffer(self, bucket_id: int, bwd=False) -> DataParallelBuffer:
+    def get_fsdp_buffer(self, bucket_id: int) -> DataParallelBuffer:
         """Get the FSDP buffer with the given bucket ID."""
         param_group = self.buffer.parameter_groups[bucket_id]
         if self.buffer.ddp_config.outer_dp_sharding_strategy != "no_shard":
-            if bwd and param_group.transpose_weight_buffer is not None:
-                raise RuntimeError("Transpose buffer is not supported for HSDP")
-            else:
-                return param_group.hsdp_wbuf
-        if bwd and param_group.transpose_weight_buffer is not None:
-            return param_group.transpose_weight_buffer
-        else:
-            return param_group.model_weight_buffer
+            return param_group.hsdp_wbuf
+        return param_group.model_weight_buffer
 
     @torch.no_grad()
-    def async_bucket_gather(self, bucket_id, bwd) -> None:
+    def async_bucket_gather(self, bucket_id: int) -> None:
         """All-gather the bucket and set the items."""
-        bucket_key = self.get_bucket_key(bucket_id, bwd)
-
-        self.bucket_can_be_released[bucket_key] = False
-        if self.bucket_status[bucket_key] != BucketStatus.EMPTY:
+        self.bucket_can_be_released[bucket_id] = False
+        if self.bucket_status[bucket_id] != BucketStatus.EMPTY:
             return
 
-        self.bucket_status[bucket_key] = BucketStatus.COMMUNICATING
+        self.bucket_status[bucket_id] = BucketStatus.COMMUNICATING
 
-        wbuf = self.get_fsdp_buffer(bucket_id, bwd)
+        wbuf = self.get_fsdp_buffer(bucket_id)
 
         # Lazy release the unused buckets.
         self.recycle_unused_buckets()
@@ -3658,21 +3441,18 @@ def async_bucket_gather(self, bucket_id, bwd) -> None:
             async_op=True,
         )
 
-        def get_closure(bucket_id, bwd):
+        def get_closure(bucket_id):
             @torch.no_grad()
             def mark_bucket_ready_to_use():
                 # Mark the bucket as ready to use - all NCCL operations are complete.
-                self.bucket_status[self.get_bucket_key(bucket_id, bwd)] = BucketStatus.READY_TO_USE
+                self.bucket_status[bucket_id] = BucketStatus.READY_TO_USE
 
             return mark_bucket_ready_to_use
 
-        mark_bucket_ready_to_use = get_closure(bucket_id, bwd)
+        mark_bucket_ready_to_use = get_closure(bucket_id)
 
         # Track the async all-gather operation for the bucket.
-        self.param_gather_event_map[self.get_bucket_key(bucket_id, bwd)] = (
-            param_gather_event,
-            mark_bucket_ready_to_use,
-        )
+        self.param_gather_event_map[bucket_id] = (param_gather_event, mark_bucket_ready_to_use)
 
 
 @torch.no_grad()
@@ -3765,13 +3545,15 @@ def override_sharded_param_methods_with_safety_checks(params, all_gather_pipelin
 
         def override_sharded_param_to_function_closure(p, to_function):
             def override_sharded_param_to_function(*args, **kwargs):
-                if p._typed_storage()._size() == 0:
-                    warnings.warn(
-                        "The parameter may be sharded by Megatron-FSDP, "
-                        "no actual 'to' operation is performed."
-                    )
-                    return torch.empty([])
-                return to_function(*args, **kwargs)
+                bucket_id = all_gather_pipeline.buffer.param_to_param_group[p]
+                status = all_gather_pipeline.bucket_status[bucket_id]
+                if status == BucketStatus.READY_TO_USE:
+                    return to_function(*args, **kwargs)
+                raise RuntimeError(
+                    "This parameter is already shard by MCore FSDP and the "
+                    "shared-state parameter does not support 'to' function."
+                    "please define the dtype and device of the parameter before FSDP wrap."
+                )
 
             return override_sharded_param_to_function
 
@@ -3779,13 +3561,15 @@ def override_sharded_param_to_function(*args, **kwargs):
 
         def override_sharded_param_cpu_function_closure(p, cpu_function):
             def override_sharded_param_cpu_function(*args, **kwargs):
-                if p._typed_storage()._size() == 0:
-                    warnings.warn(
-                        "The parameter may be sharded by Megatron-FSDP, "
-                        "no actual 'cpu' operation is performed."
-                    )
-                    return torch.empty([], device="cpu")
-                return cpu_function(*args, **kwargs)
+                bucket_id = all_gather_pipeline.buffer.param_to_param_group[p]
+                status = all_gather_pipeline.bucket_status[bucket_id]
+                if status == BucketStatus.READY_TO_USE:
+                    return cpu_function(*args, **kwargs)
+                warnings.warn(
+                    "The parameters are sharded by MCore FSDP, and no actual cpu "
+                    "operation is performed."
+                )
+                return torch.empty([], device="cpu")
 
             return override_sharded_param_cpu_function
 
diff --git a/megatron/core/distributed/fsdp/src/megatron_fsdp/utils.py b/megatron/core/distributed/fsdp/src/megatron_fsdp/utils.py
index 3d15711275f..c9679494737 100644
--- a/megatron/core/distributed/fsdp/src/megatron_fsdp/utils.py
+++ b/megatron/core/distributed/fsdp/src/megatron_fsdp/utils.py
@@ -19,7 +19,7 @@
 from contextlib import nullcontext
 from functools import reduce
 from importlib.metadata import version
-from typing import Callable, Optional, Sequence, Union
+from typing import Callable, List, Optional, Sequence, Union
 
 try:
     import einops
@@ -79,6 +79,52 @@ def is_te_min_version(vers, check_equality=True):
     return te_version > PkgVersion(vers)
 
 
+# Check if Transformer Engine has class for fp8 tensors.
+try:
+    if is_te_min_version("2.0"):
+        # In TE2.x, QuantizedTensor is the base class for all different type of fp8 tensors,
+        # including fp8 tensor for delayed scaling, current scaling and mxfp8, etc.
+        from transformer_engine.pytorch.tensor import QuantizedTensor as FP8_TENSOR_CLASS
+    else:
+        from transformer_engine.pytorch.float8_tensor import Float8Tensor as FP8_TENSOR_CLASS
+
+    HAVE_TE_FP8_TENSOR_CLASS = True
+except (ImportError, ModuleNotFoundError):
+    # FP8 tensor class not found
+    HAVE_TE_FP8_TENSOR_CLASS = False
+
+try:
+    from transformer_engine.pytorch.optimizers import multi_tensor_applier, multi_tensor_scale
+
+    multi_tensor_scale_impl = multi_tensor_scale
+except ImportError:
+    try:
+        import amp_C
+        from apex.multi_tensor_apply import multi_tensor_applier
+
+        multi_tensor_scale_impl = amp_C.multi_tensor_scale
+    except ImportError:
+        import warnings
+
+        warnings.warn(
+            "Transformer Engine and Apex are not installed. "
+            "Falling back to local implementations of "
+            "multi_tensor_applier and multi_tensor_scale"
+        )
+
+        def local_multi_tensor_applier(op, noop_flag_buffer, tensor_lists, *args):
+            """Multi tensor op applier"""
+            return op(2048 * 32, noop_flag_buffer, tensor_lists, *args)
+
+        def local_multi_tensor_scale(chunk_size, noop_flag, tensor_lists, scale):
+            """Works as a drop-in replacement for amp_C.multi_tensor_scale."""
+            for src, dst in zip(tensor_lists[0], tensor_lists[1]):
+                dst.copy_(src * scale)
+
+        multi_tensor_applier = local_multi_tensor_applier
+        multi_tensor_scale_impl = local_multi_tensor_scale
+
+
 def is_submodule(module, parent_module, strict=True):
     """
     Check if a module is a submodule of another module.
@@ -92,6 +138,18 @@ def is_submodule(module, parent_module, strict=True):
     return False
 
 
+def is_float8tensor(tensor: torch.Tensor) -> bool:
+    """Check if a tensor is a Transformer Engine Float8Tensor.
+
+    Note that in TE2.x, in order to support more recipes, the design of the fp8 tensor class has
+    changed. Now Float8Tensor is only used for current scaling and delayed scaling. And mxfp8
+    and blockwise scaling have their own fp8 tensor classes. These different fp8 tensor classes
+    are both inherited from QuantizedTensor. So, for TE1.x, FP8_TENSOR_CLASS is Float8Tensor,
+    and for TE2.x, FP8_TENSOR_CLASS is QuantizedTensor.
+    """
+    return HAVE_TE_FP8_TENSOR_CLASS and isinstance(tensor, FP8_TENSOR_CLASS)
+
+
 def get_mesh_names(device_mesh: Optional[DeviceMesh] = None) -> list[str]:
     """
     Get all the sub-mesh names in the DeviceMesh.
@@ -130,6 +188,198 @@ def contains_submesh(
     return all(submesh_name in device_mesh_names for submesh_name in submesh_names)
 
 
+def _multi_tensor_copy_this_to_that(
+    this: List[torch.Tensor], that: List[torch.Tensor], overflow_buf: Optional[torch.Tensor] = None
+):
+    """
+    Use multi-tensor-applier to copy values from one list to another.
+    We don't have a bfloat16 implementation so for now if the overflow_buf
+    is not provided, we default back to simple loop copy to be compatible
+    with bfloat16.
+    """
+    if overflow_buf is not None:
+        overflow_buf.fill_(0)
+        # Scaling with factor `1.0` is equivalent to copy.
+        multi_tensor_applier(multi_tensor_scale_impl, overflow_buf, [this, that], 1.0)
+    else:
+        for this_, that_ in zip(this, that):
+            that_.copy_(this_)
+
+
+"""
+The code below abstracts the functionalities needed for implementing "--fp8-param-gather" into
+several functions. It provides different implementations for each function based on different
+versions of TE, ensuring compatibility across various TE versions.
+
+Currently, there are three functions:
+    - modify_underlying_storage
+        This function is used in DDP to place all parameters into a contiguous buffer. For
+        non-fp8 tensors, replacing their data is simple, just using code like
+        "tensor.data = new_data". However, for fp8 tensors, their raw data is not stored in the
+        ".data" attribute, and it varies with different TE versions and different recipes. This
+        function provides a unified interface to replace the underlying storage of a fp8 tensor.
+    - quantize_param_shard
+        This function is used in dist-opt to cast fp32 main params to fp8 params. For non-fp8
+        params, this casting is as simple as "bf16_params.copy_(fp32_main_params)"; but for fp8
+        params, the casting logic varies with different TE versions and different recipes. This
+        function provides a unified interface to cast fp32 main params to fp8 params, and also
+        updates the necessary attributes (like amax, scale, scale_inv or transpose cache) of the
+        fp8 model params.
+    - correct_amax_history_if_needed
+        This function is used to correct the amax history of fp8 tensors. In TE1.x, some inplace
+        copy operations will write unwanted values to the amax_history of fp8 tensors. This function
+        corrects the amax_history back. For TE2.x, it's an empty function.
+        Only useful for delayed scaling.
+"""
+if HAVE_TE and is_te_min_version("2.2"):
+    # Supported TE versions: 2.2+
+    from transformer_engine.pytorch.tensor import QuantizedTensor
+
+    def _modify_underlying_storage_impl(
+        fp8_tensor: QuantizedTensor, new_raw_data: torch.Tensor
+    ) -> None:
+        from transformer_engine.pytorch.tensor.utils import replace_raw_data
+
+        replace_raw_data(fp8_tensor, new_raw_data)
+
+    def _quantize_param_shard_impl(
+        model_params: List[QuantizedTensor],
+        main_params: List[torch.Tensor],
+        start_offsets: List[int],
+        data_parallel_group: ProcessGroup,
+        fsdp_shard_model_params: Optional[List[torch.Tensor]] = None,
+    ) -> None:
+        if len(model_params) == 0:
+            return
+
+        from transformer_engine.pytorch.tensor.utils import cast_master_weights_to_fp8
+
+        args = [model_params, main_params, start_offsets, data_parallel_group]
+        if fsdp_shard_model_params is not None:
+            if get_te_version() == PkgVersion("2.3.0.dev0+5fdd7bb") or is_te_min_version("2.3.0"):
+                args.append(fsdp_shard_model_params)
+            else:
+                raise NotImplementedError(
+                    f"FSDP with --fp8-param-gather is not supported in TE v{get_te_version()}"
+                )
+        cast_master_weights_to_fp8(*args)
+
+elif HAVE_TE and is_te_min_version("2.0"):
+    # Supported TE versions: 2.0
+    from transformer_engine.pytorch.tensor import QuantizedTensor
+    from transformer_engine.pytorch.tensor.float8_tensor import Float8Tensor
+
+    def _modify_underlying_storage_impl(
+        fp8_tensor: QuantizedTensor, new_raw_data: torch.Tensor
+    ) -> None:
+        old_raw_data = fp8_tensor._data
+        assert old_raw_data.dtype == new_raw_data.dtype
+        new_raw_data.detach().copy_(old_raw_data)
+        fp8_tensor._data = new_raw_data
+        del old_raw_data
+
+    def _quantize_param_shard_impl(
+        model_params: List[QuantizedTensor],
+        main_params: List[torch.Tensor],
+        start_offsets: List[int],
+        data_parallel_group: ProcessGroup,
+        fsdp_shard_model_params: Optional[List[torch.Tensor]] = None,
+    ) -> None:
+        if len(model_params) == 0:
+            return
+
+        if fsdp_shard_model_params is None:
+            fsdp_shard_model_params = [None] * len(model_params)
+
+        for model_param, main_param, start_offset, fsdp_shard_model_param in zip(
+            model_params, main_params, start_offsets, fsdp_shard_model_params
+        ):
+            if main_param is None:
+                continue
+
+            if fsdp_shard_model_param is not None:
+                shard_model_param = fsdp_shard_model_param
+            else:
+                shard_model_param = model_param._data.view(-1)[
+                    start_offset : start_offset + main_param.numel()
+                ]
+
+            quantizer = model_param._quantizer
+            # When not using --fp8-param-gather, the main_param (fp32) is first cast to bf16/fp16,
+            # and then cast to fp8 during forward.
+            # Although it's not necessary when --fp8-param-gather is enabled, we still keep this
+            # logic to keep numerical consistency. So here cast the main_param to model_param.dtype.
+            main_param = main_param.to(model_param.dtype)
+            out = Float8Tensor(
+                shape=main_param.size(),
+                dtype=model_param.dtype,
+                requires_grad=False,
+                data=shard_model_param,
+                fp8_scale_inv=model_param._scale_inv,
+                fp8_dtype=model_param._fp8_dtype,
+                quantizer=quantizer,
+            )
+            quantizer.update_quantized(main_param, out)
+
+        amaxes = []
+        scales = []
+        scale_invs = []
+        for model_param in model_params:
+            quantizer = model_param._quantizer
+            amaxes.append(quantizer.amax.view(1))
+            scales.append(quantizer.scale.view(1))
+            scale_invs.append(model_param._scale_inv.view(1))
+            model_param._reset_caches()
+
+        dummy_overflow_buf = torch.tensor([0], dtype=torch.int, device="cuda")
+
+        # Update scaling factors.
+        packed_scales = torch.empty(len(scales), dtype=torch.float32, device=scales[0].device)
+        packed_scale_views = [packed_scales[i].view(1) for i in range(len(scales))]
+        _multi_tensor_copy_this_to_that(scales, packed_scale_views, dummy_overflow_buf)
+        torch.reciprocal(packed_scales, out=packed_scales)
+        _multi_tensor_copy_this_to_that(packed_scale_views, scale_invs, dummy_overflow_buf)
+
+        # Reduce amaxes.
+        # Note: Assume each param has a separate amax.
+        packed_amaxes = torch.empty(len(amaxes), dtype=torch.float32, device=amaxes[0].device)
+        packed_amax_views = [packed_amaxes[i].view(1) for i in range(len(amaxes))]
+        _multi_tensor_copy_this_to_that(amaxes, packed_amax_views, dummy_overflow_buf)
+        torch.distributed.all_reduce(
+            packed_amaxes, op=torch.distributed.ReduceOp.MAX, group=data_parallel_group
+        )
+        _multi_tensor_copy_this_to_that(packed_amax_views, amaxes, dummy_overflow_buf)
+
+else:
+    # Fallback impl if TE version is invalid or TE is not installed.
+    def _modify_underlying_storage_impl(*args, **kwargs):
+        raise RuntimeError(
+            "Invalid Transformer Engine version for FP8 distributed optimizer, "
+            "please install Transformer Engine 2.0+ or install Megatron-Core"
+        )
+
+    def _quantize_param_shard_impl(*args, **kwargs):
+        raise RuntimeError(
+            "Invalid Transformer Engine version for FP8 distributed optimizer, "
+            "please install Transformer Engine 2.0+ or install Megatron-Core"
+        )
+
+
+def modify_underlying_storage(tensor: torch.Tensor, new_raw_data: torch.Tensor):
+    """Replace the underlying raw data of a tensor with new data."""
+    _modify_underlying_storage_impl(tensor, new_raw_data)
+
+
+def quantize_param_shard(
+    model_params, main_params, start_offsets, data_parallel_group, fsdp_shard_model_params=None
+):
+    """Cast shard fp32 main params to fp8 model params."""
+    assert HAVE_TE, "Transformer Engine is required for quantizing parameters."
+    _quantize_param_shard_impl(
+        model_params, main_params, start_offsets, data_parallel_group, fsdp_shard_model_params
+    )
+
+
 def _get_cuda_rng_state(
     device: Union[int, str, torch.device] = "cuda", clone: bool = False, graph_safe: bool = False
 ) -> torch.Tensor:
diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py
index 0fc00bd91be..dd0281e61b1 100644
--- a/megatron/training/arguments.py
+++ b/megatron/training/arguments.py
@@ -744,13 +744,6 @@ def validate_args(args, defaults={}):
 
         assert args.ckpt_format == "fsdp_dtensor", \
             "Megatron FSDP only supports fsdp_dtensor checkpoint format"
-        
-        if args.use_megatron_fsdp:
-            args.reuse_grad_buf_for_mxfp8_param_ag = False
-
-    if args.fsdp_manual_registration:
-        assert args.use_megatron_fsdp, "FSDP manual registration is only supported with Megatron FSDP"
-        assert args.nccl_ub, "FSDP manual registration is only supported with nccl-ub option"
 
     # Parameters dtype.
     args.params_dtype = torch.float

From bd0694574f82dcafc1b552214fd1937917f45b30 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Mon, 5 Jan 2026 12:30:18 +0000
Subject: [PATCH 213/334] Revert "[Dev] Partial CUDA Graph support for EP
 Overlap (#2168)"

This reverts commit 8b93e0d6ef0a5ca6ef3c1993b0728447a8ddc4b8.
---
 .../common/model_chunk_schedule_plan.py       |  40 +-
 .../core/models/gpt/fine_grained_callables.py | 204 ++++------
 megatron/core/pipeline_parallel/schedules.py  | 105 -----
 megatron/core/pipeline_parallel/utils.py      |   4 +-
 megatron/core/transformer/cuda_graphs.py      |  84 +---
 megatron/core/transformer/moe/moe_layer.py    |   7 +-
 .../core/transformer/transformer_config.py    |  15 -
 .../core/transformer/transformer_layer.py     |  36 --
 .../test_cuda_graphed_schedule_chunk_1f1b.py  | 372 ------------------
 .../a2a_overlap/test_schedule_layer_1f1b.py   |   2 +-
 tests/unit_tests/a2a_overlap/utils.py         |   1 -
 .../pipeline_parallel/test_schedules.py       |  48 ---
 .../transformer/test_submodule_callables.py   |  16 +-
 13 files changed, 130 insertions(+), 804 deletions(-)
 delete mode 100644 tests/unit_tests/a2a_overlap/test_cuda_graphed_schedule_chunk_1f1b.py

diff --git a/megatron/core/models/common/model_chunk_schedule_plan.py b/megatron/core/models/common/model_chunk_schedule_plan.py
index 04ca580eeaa..486a498dd73 100644
--- a/megatron/core/models/common/model_chunk_schedule_plan.py
+++ b/megatron/core/models/common/model_chunk_schedule_plan.py
@@ -17,7 +17,6 @@
     get_comm_stream,
     get_comp_stream,
 )
-from megatron.core.transformer.enums import CudaGraphScope
 
 
 class ModelChunkState:
@@ -38,20 +37,23 @@ class TransformerLayerSchedulePlan:
     mtp post process nodes.
 
     layer (TransformerLayerSchedulePlan)
-    ├── attn (TransformerLayerNode): attention -> router -> dispatch preprocess
+    ├── attn (TransformerLayerNode): attention module
+    ├── post_attn (TransformerLayerNode): layernorm -> router -> dispatch preprocess
     ├── moe_dispatch (TransformerLayerNode): dispatch All2All
     ├── mlp (TransformerLayerNode): mlp module
     ├── moe_combine (TransformerLayerNode): combine All2All
     └── mtp_post_process (PostProcessNode): mtp post process
 
     Note that MTP layer has the same operation and execution order with TransformerLayer regarding
-    moe_dispatch, mlp, moe_combine, but contains extra operations in attn and mtp_post_process:
+    post_attn, moe_dispatch, mlp, moe_combine, but contains extra operations in attn and
+    mtp_post_process:
     * mtp.attn wraps around transformer_layer.attn with extra norm, proj and embedding operations.
     * mtp.mtp_post_process contains output_layer, mtp loss operations, whereas
       transformer_layer.mtp_post_process is empty.
     """
 
     attn = None
+    post_attn = None
     moe_dispatch = None
     mlp = None
     moe_combine = None
@@ -115,7 +117,7 @@ def release_state(self):
     def _build_callable_nodes(self, event, comp_stream, comm_stream, extra_args):
         """
         Builds the callable nodes for the transformer/mtp layer:
-            attn, mlp, moe_dispatch and moe_combine, and mtp_post_process.
+            attn, post_attn, mlp, moe_dispatch and moe_combine, and mtp_post_process.
         """
         from megatron.core.models.gpt.fine_grained_callables import (
             TransformerLayerNode,
@@ -135,7 +137,16 @@ def _build_callable_nodes(self, event, comp_stream, comm_stream, extra_args):
             else isinstance(self.layer.mlp, MoELayer)
         )
 
-        extra_args["config"] = self.layer.config
+        enable_deepep = (
+            self.layer.config.moe_token_dispatcher_type == "flex"
+            and self.layer.config.moe_flex_dispatcher_backend == "deepep"
+        )
+        enable_hybridep = (
+            self.layer.config.moe_token_dispatcher_type == "flex"
+            and self.layer.config.moe_flex_dispatcher_backend == "hybridep"
+        )
+        extra_args["enable_deepep"] = enable_deepep
+        extra_args["enable_hybridep"] = enable_hybridep
         extra_args["is_moe"] = is_moe
         extra_args["delay_wgrad_compute"] = self.layer.config.delay_wgrad_compute
         extra_args["is_mtp"] = is_mtp
@@ -156,6 +167,7 @@ def create_node(stream, module, name):
 
         (
             attn_module,
+            post_attn_module,
             moe_dispatch_module,
             mlp_module,
             moe_combine_module,
@@ -167,9 +179,11 @@ def create_node(stream, module, name):
         self.attn = create_node(comp_stream, attn_module, "attn")
         self.mlp = create_node(comp_stream, mlp_module, "mlp")
         if is_moe:
+            self.post_attn = create_node(comp_stream, post_attn_module, "post_attn")
             self.moe_dispatch = create_node(comm_stream, moe_dispatch_module, "moe_dispatch")
             self.moe_combine = create_node(comm_stream, moe_combine_module, "moe_combine")
         else:
+            self.post_attn = NoopScheduleNode()
             self.moe_dispatch = NoopScheduleNode()
             self.moe_combine = NoopScheduleNode()
 
@@ -180,11 +194,6 @@ def create_node(stream, module, name):
         else:
             self.mtp_post_process = NoopScheduleNode()
 
-        # mlp and combine may receive dgrad from attn, which is managed by cuda graph.
-        if CudaGraphScope.attn in self.config.cuda_graph_scope:
-            self.mlp.manual_grads_release = False
-            self.moe_combine.manual_grads_release = False
-
     def get_fp8_context(self):
         """
         Get the fp8 context for the transformer layer.
@@ -207,8 +216,8 @@ def run(f_layer, b_layer, f_input=None, b_grad=None, is_last_layer_in_bwd=False)
         to maximize parallelism and efficiency.
 
         When f_layer and b_layer are not None, forward and backward pass are overlapped as follows:
-        comm_stream: combine_bwd | dispatch_fwd->dispatch_bwd  | combine_fwd
-        comp_stream: attn_fwd    | mlp_bwd->mlp_bwd_dw->mlp_fwd| attn_bwd
+        comm_stream: combine_bwd            | dispatch_fwd->dispatch_bwd  | combine_fwd
+        comp_stream: attn_fwd->post_attn_fwd| mlp_bwd->mlp_bwd_dw->mlp_fwd| post_attn_bwd->attn_bwd
         For MTP, mtp_post_process_fwd is executed after the combine_fwd in the comp_stream,
         and mtp_post_process_bwd is executed before the combine_bwd in the comp_stream.
 
@@ -231,6 +240,7 @@ def run(f_layer, b_layer, f_input=None, b_grad=None, is_last_layer_in_bwd=False)
         if f_layer is not None:
             with f_layer.get_fp8_context():
                 f_input = f_layer.attn.forward(f_input)
+                f_input = f_layer.post_attn.forward(f_input)
 
         if b_layer is not None:
             b_grad = b_layer.mlp.backward(b_grad)
@@ -244,6 +254,7 @@ def run(f_layer, b_layer, f_input=None, b_grad=None, is_last_layer_in_bwd=False)
             b_grad = b_layer.moe_dispatch.backward(b_grad)
 
         if b_layer is not None and b_layer.config.ep_overlap_early_attn_memory_release:
+            b_grad = b_layer.post_attn.backward(b_grad)
             b_grad = b_layer.attn.backward(b_grad)
 
         if f_layer is not None:
@@ -256,6 +267,7 @@ def run(f_layer, b_layer, f_input=None, b_grad=None, is_last_layer_in_bwd=False)
                 f_input = f_layer.mtp_post_process.forward(f_input)
 
         if b_layer is not None and not b_layer.config.ep_overlap_early_attn_memory_release:
+            b_grad = b_layer.post_attn.backward(b_grad)
             b_grad = b_layer.attn.backward(b_grad)
 
         # Delay the last attn_dw in backward pass (attn_dw of the first layer)
@@ -357,10 +369,6 @@ def __init__(
                 model, self._model_chunk_state, self._event, comp_stream
             )
 
-        # preprocess may receive dgrad from attn, which is managed by cuda graph.
-        if CudaGraphScope.attn in model.config.cuda_graph_scope:
-            self.pre_process.manual_grads_release = False
-
     def _build_layer_schedule_plan(self, module, comp_stream, comm_stream):
         if module is None:
             return
diff --git a/megatron/core/models/gpt/fine_grained_callables.py b/megatron/core/models/gpt/fine_grained_callables.py
index ab76659d01b..a0be55c4ca1 100644
--- a/megatron/core/models/gpt/fine_grained_callables.py
+++ b/megatron/core/models/gpt/fine_grained_callables.py
@@ -6,15 +6,14 @@
 from typing import Optional
 
 import torch
-from torch import Tensor
 
 from megatron.core import tensor_parallel
-from megatron.core.packed_seq_params import PackedSeqParams
 from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
     fine_grained_offloading_group_commit,
+    fine_grained_offloading_group_start,
+    get_fine_grained_offloading_context,
 )
 from megatron.core.pipeline_parallel.utils import ScheduleNode, make_viewless
-from megatron.core.transformer.enums import CudaGraphScope
 from megatron.core.transformer.module import float16_to_fp32
 from megatron.core.transformer.moe.moe_layer import MoELayer
 from megatron.core.transformer.multi_token_prediction import (
@@ -43,13 +42,14 @@ def wrapped_func(*args, **kwarg):
 
 
 @internal_api
-def should_free_input(name, is_moe, config):
+def should_free_input(name, is_moe, enable_deepep, enable_hybridep):
     """Determine if the node should free its input memory.
 
     Args:
         name: Node name
         is_moe: Whether it's a MoE model
-        config: TransformerConfig object
+        enable_deepep: Whether to use DeepEP dispatcher
+        enable_hybridep: Whether to use HybridEP dispatcher
 
     Returns:
         bool: Whether to free input memory
@@ -57,14 +57,6 @@ def should_free_input(name, is_moe, config):
     # For dense layers [attn, fake, mlp, fake], the input is needed during backward pass
     if not is_moe:
         return False
-    enable_deepep = (
-        config.moe_token_dispatcher_type == "flex"
-        and config.moe_flex_dispatcher_backend == "deepep"
-    )
-    enable_hybridep = (
-        config.moe_token_dispatcher_type == "flex"
-        and config.moe_flex_dispatcher_backend == "hybridep"
-    )
     # Define which nodes should free input memory
     # Since we split the computing graph into multiple nodes, we can manually control
     # when and how to free the input memory.
@@ -77,10 +69,7 @@ def should_free_input(name, is_moe, config):
         # and probs before dispatch A2A and it's not needed anymore after the forward pass
         # For DeepEP and HybridEP dispatcher mode, they are both needed in backward pass
         # and cannot be freed.
-        # If moe_preprocess is in cuda graph scope, tokens and probs are fixed size tensors,
-        # so they cannot be freed.
-        "moe_dispatch": not (enable_deepep or enable_hybridep)
-        and (CudaGraphScope.moe_preprocess not in config.cuda_graph_scope),
+        "moe_dispatch": not (enable_deepep or enable_hybridep),
     }
 
     return free_input_nodes.get(name, False)
@@ -243,13 +232,13 @@ def __init__(
             it's the per_batch_state_context, o.w. nullcontext
             name (str): Node name, also used to determine memory strategy
             bwd_dw_callables (list): List of weight gradient functions for the layer.
-            extra_args (dict): Extra arguments for the node: is_moe, config.
+            extra_args (dict): Extra arguments for nodes: is_moe, enable_deepep, enable_hybridep.
         """
         # determine whether to free input memory
-        config = extra_args.get("config", None)
-        assert config is not None, "model config must be passed to TransformerLayerNode."
         is_moe = extra_args.get("is_moe", False)
-        free_input = should_free_input(name, is_moe, config)
+        enable_deepep = extra_args.get("enable_deepep", False)
+        enable_hybridep = extra_args.get("enable_hybridep", False)
+        free_input = should_free_input(name, is_moe, enable_deepep, enable_hybridep)
         self.delay_wgrad_compute = extra_args.get("delay_wgrad_compute", False)
 
         super().__init__(
@@ -314,8 +303,8 @@ def backward_dw(self):
                 module.backward_dw()
 
         # the output grad memory is last used in wgrad compute, should be safe to release.
-        if self.manual_grads_release:
-            assert self.delay_grads_release, "output grad memory should be valid before wgrad."
+        assert self.delay_grads_release, "output grad memory should be valid before wgrad."
+        if self.manual_release_grads:
             for tensor in self.output_grads:
                 tensor.untyped_storage().resize_(0)
         self.output_grads = None
@@ -368,95 +357,11 @@ def build_transformer_layer_callables(layer: TransformerLayer):
         and layer.config.moe_flex_dispatcher_backend == "hybridep"
     )
 
-    class _BackwardDWWrapper:
-        def __init__(self):
-            self.graphed_backward_dw_callable = None
-            self.attn_dw_callable = layer.self_attention.backward_dw
-            if isinstance(layer.mlp, MoELayer):
-                self.shared_expert_dw_callable = partial(
-                    layer.mlp.backward_dw, routed_experts=False, shared_experts=True
-                )
-            else:
-                self.shared_expert_dw_callable = None
-            self.cuda_graph_scope = layer.config.cuda_graph_scope
-
-        def set_graphed_backward_dw_callable(self, graphed_backward_dw_callable):
-            """Store the CUDA graphed backward weight gradient callable."""
-            self.graphed_backward_dw_callable = graphed_backward_dw_callable
-
-        def backward_dw(self):
-            """Execute weight gradients, skipping CUDA graphed components during replay."""
-            is_replay = hasattr(layer, 'cuda_graphs') and layer.cuda_graphs
-            if self.shared_expert_dw_callable is not None and (
-                not is_replay or CudaGraphScope.moe_router not in self.cuda_graph_scope
-            ):
-                self.shared_expert_dw_callable()
-            if not is_replay or CudaGraphScope.attn not in self.cuda_graph_scope:
-                self.attn_dw_callable()
-            if is_replay and self.graphed_backward_dw_callable is not None:
-                self.graphed_backward_dw_callable()
-
-    attn_backward_dw_wrapper = _BackwardDWWrapper()
-
     def submodule_attn_forward(node: ScheduleNode, hidden_states: torch.Tensor):
         """
-        Performs same attnention forward logic as GPT Model and forward pass for
-        computations between attention and dispatch:
-            pre mlp layernorm->router->dispatch preprocess
+        Performs same attnention forward logic as GPT Model.
         """
-
-        if hasattr(layer, 'cuda_graphs') and layer.cuda_graphs:
-            assert (
-                CudaGraphScope.mlp not in layer.config.cuda_graph_scope
-                and CudaGraphScope.moe not in layer.config.cuda_graph_scope
-            ), (
-                "Supported CUDA graph scope with EP overlap: "
-                "attn, moe_router, moe_preprocess, mlp, got {}".format(
-                    layer.config.cuda_graph_scope
-                )
-            )
-            forward_func = layer._te_cuda_graph_replay
-            attn_backward_dw_wrapper.set_graphed_backward_dw_callable(
-                partial(layer.backward_dw_cudagraph, layer.current_microbatch)
-            )
-        else:
-            # wrapper function that keeps consistent api with cuda graph replay
-            def forward_func(
-                hidden_states: Tensor,
-                attention_mask: Optional[Tensor] = None,
-                rotary_pos_emb: Optional[Tensor] = None,
-                rotary_pos_cos: Optional[Tensor] = None,
-                rotary_pos_sin: Optional[Tensor] = None,
-                packed_seq_params: Optional[PackedSeqParams] = None,
-                sequence_len_offset: Optional[Tensor] = None,
-            ):
-                hidden_states, _ = layer._forward_attention(
-                    hidden_states=hidden_states,
-                    attention_mask=attention_mask,
-                    rotary_pos_emb=rotary_pos_emb,
-                    rotary_pos_cos=rotary_pos_cos,
-                    rotary_pos_sin=rotary_pos_sin,
-                    packed_seq_params=packed_seq_params,
-                    sequence_len_offset=sequence_len_offset,
-                )
-                if not isinstance(layer.mlp, MoELayer):
-                    return hidden_states, None, None, None
-                if layer.recompute_pre_mlp_layernorm:
-                    layer.pre_mlp_norm_checkpoint = tensor_parallel.CheckpointWithoutOutput()
-                    pre_mlp_layernorm_output = layer.pre_mlp_norm_checkpoint.checkpoint(
-                        layer.pre_mlp_layernorm, hidden_states
-                    )
-                else:
-                    pre_mlp_layernorm_output = layer.pre_mlp_layernorm(hidden_states)
-
-                shared_expert_output = layer.mlp.shared_experts_compute(pre_mlp_layernorm_output)
-                probs, routing_map = layer.mlp.route(pre_mlp_layernorm_output)
-                local_tokens, probs, _ = layer.mlp.preprocess(
-                    pre_mlp_layernorm_output, probs, routing_map
-                )
-                return hidden_states, local_tokens, probs, shared_expert_output
-
-        hidden_states, local_tokens, probs, shared_expert_output = forward_func(
+        hidden_states, _ = layer._forward_attention(
             hidden_states=hidden_states,
             attention_mask=node.chunk_state.attention_mask,
             rotary_pos_emb=node.chunk_state.rotary_pos_emb,
@@ -465,14 +370,33 @@ def forward_func(
             packed_seq_params=node.chunk_state.packed_seq_params,
             sequence_len_offset=node.chunk_state.sequence_len_offset,
         )
-        if not isinstance(layer.mlp, MoELayer):
-            return hidden_states
+        return hidden_states
+
+    def submodule_post_attn_forward(node: ScheduleNode, hidden_states: torch.Tensor):
+        """
+        Run forward pass for computations between attention and dispatch:
+            pre mlp layernorm->router->dispatch preprocess
+        """
+        if layer.offload_mlp_norm:
+            hidden_states = fine_grained_offloading_group_start(hidden_states, name="mlp_norm")
+        if layer.recompute_pre_mlp_layernorm:
+            layer.pre_mlp_norm_checkpoint = tensor_parallel.CheckpointWithoutOutput()
+            with get_fine_grained_offloading_context(layer.offload_mlp_norm):
+                pre_mlp_layernorm_output = layer.pre_mlp_norm_checkpoint.checkpoint(
+                    layer.pre_mlp_layernorm, hidden_states
+                )
+        else:
+            with get_fine_grained_offloading_context(layer.offload_mlp_norm):
+                pre_mlp_layernorm_output = layer.pre_mlp_layernorm(hidden_states)
+
+        probs, routing_map = layer.mlp.route(pre_mlp_layernorm_output)
+        local_tokens, probs, _ = layer.mlp.preprocess(pre_mlp_layernorm_output, probs, routing_map)
 
         # Detach here for mlp_bda residual connection
         node.layer_state.residual = node.detach(hidden_states)
         if layer.mlp.use_shared_expert and not layer.mlp.shared_expert_overlap:
-            # Detach here for shared expert connection in moe_combine
-            node.layer_state.shared_expert_output = node.detach(shared_expert_output)
+            # Detach here for shared expert connection
+            node.layer_state.pre_mlp_layernorm_output = node.detach(pre_mlp_layernorm_output)
 
         return local_tokens, probs
 
@@ -497,6 +421,7 @@ def submodule_moe_forward(node: ScheduleNode, dispatched_tokens: torch.Tensor):
         Run forward pass for computations between dispatch and combine:
             post dispatch->experts->combine preprocess
         """
+        shared_expert_output = None
         dispatched_probs = node.layer_state.dispatched_probs
         token_dispatcher = layer.mlp.token_dispatcher
         if enable_deepep or enable_hybridep:
@@ -504,8 +429,10 @@ def submodule_moe_forward(node: ScheduleNode, dispatched_tokens: torch.Tensor):
             # backward graph from connecting to dispatch submodule
             token_dispatcher._comm_manager.dispatched_probs = dispatched_probs
 
-        expert_output, _ = layer.mlp.routed_experts_compute(
-            dispatched_tokens, dispatched_probs, None
+        pre_mlp_layernorm_output = getattr(node.layer_state, 'pre_mlp_layernorm_output', None)
+        shared_expert_output = layer.mlp.shared_experts_compute(pre_mlp_layernorm_output)
+        expert_output, mlp_bias = layer.mlp.routed_experts_compute(
+            dispatched_tokens, dispatched_probs, pre_mlp_layernorm_output
         )
 
         if layer.recompute_pre_mlp_layernorm:
@@ -515,10 +442,16 @@ def submodule_moe_forward(node: ScheduleNode, dispatched_tokens: torch.Tensor):
         # release tensor reference after use
         node.layer_state.dispatched_probs = None
         node.layer_state.pre_mlp_layernorm_output = None
-
-        return expert_output
-
-    def submodule_combine_forward(node: ScheduleNode, output: torch.Tensor):
+        if shared_expert_output is None:
+            # Return only expert_output, since shared_expert_output causes backward on None
+            return expert_output
+        return expert_output, shared_expert_output
+
+    def submodule_combine_forward(
+        node: ScheduleNode,
+        output: torch.Tensor,
+        shared_expert_output: Optional[torch.Tensor] = None,
+    ):
         """
         # Triggers token combine and the remaining computation in the transformer layer.
         # The `mlp_bda` computation is placed after `mlp.combine` due to data dependency.
@@ -528,11 +461,10 @@ def submodule_combine_forward(node: ScheduleNode, output: torch.Tensor):
         # with another microbatch's computation and expose the communication.
         """
         residual = node.layer_state.residual
-        shared_expert_output = getattr(node.layer_state, 'shared_expert_output', None)
+
         output = layer.mlp.combine(output, shared_expert_output)
         mlp_output_with_bias = (output, None)
-        if hasattr(layer, 'cuda_graphs') and layer.cuda_graphs:
-            layer.mlp.cudagraph_tensor_store.clear()
+
         with layer.bias_dropout_add_exec_handler():
             hidden_states = layer.mlp_bda(layer.training, layer.config.bias_dropout_fusion)(
                 mlp_output_with_bias, residual, layer.hidden_dropout
@@ -568,12 +500,13 @@ def raise_not_implemented(*args):
 
     # Build forward and backward callable functions
     attn_func = submodule_attn_forward
+    post_attn_func = submodule_post_attn_forward if is_moe else raise_not_implemented
     dispatch_func = submodule_dispatch_forward if is_moe else raise_not_implemented
     mlp_func = submodule_moe_forward if is_moe else mlp_wrapper
     combine_func = submodule_combine_forward if is_moe else raise_not_implemented
 
-    forward_funcs = [attn_func, dispatch_func, mlp_func, combine_func, None]
-    backward_dw = {"attn": attn_backward_dw_wrapper, "mlp": layer.mlp}
+    forward_funcs = [attn_func, post_attn_func, dispatch_func, mlp_func, combine_func, None]
+    backward_dw = {"attn": layer.self_attention, "mlp": layer.mlp}
     return forward_funcs, backward_dw
 
 
@@ -585,7 +518,9 @@ def build_mtp_layer_callables(layer):
     """
 
     forward_funcs, backward_dw = build_transformer_layer_callables(layer.transformer_layer)
-    attn_forward, dispatch_forward, mlp_forward, combine_forward, _ = forward_funcs
+    attn_forward, post_attn_forward, dispatch_forward, mlp_forward, combine_forward, _ = (
+        forward_funcs
+    )
     is_moe = isinstance(layer.transformer_layer.mlp, MoELayer)
     assert is_moe, "MTP layer in a2a overlap only supports MoE layer for now."
 
@@ -646,17 +581,24 @@ def rng_context_wrapper(func, *args, **kwargs):
     # Build forward and backward callable functions
     # attn_forward already has rng context, no need to wrap
     attn_func = submodule_mtp_attn_forward
+    post_attn_func = partial(rng_context_wrapper, post_attn_forward)
     dispatch_func = partial(rng_context_wrapper, dispatch_forward)
     mlp_func = partial(rng_context_wrapper, mlp_forward)
     combine_func = partial(rng_context_wrapper, combine_forward)
     mtp_post_process_func = submodule_mtp_postprocess_forward
 
-    forward_funcs = [attn_func, dispatch_func, mlp_func, combine_func, mtp_post_process_func]
-    if isinstance(backward_dw["attn"], list):
-        backward_dw["attn"].append(layer.eh_proj)
-    else:
-        backward_dw["attn"] = [backward_dw["attn"], layer.eh_proj]
-
+    forward_funcs = [
+        attn_func,
+        post_attn_func,
+        dispatch_func,
+        mlp_func,
+        combine_func,
+        mtp_post_process_func,
+    ]
+    backward_dw = {
+        "attn": [layer.transformer_layer.self_attention, layer.eh_proj],
+        "mlp": layer.transformer_layer.mlp,
+    }
     return forward_funcs, backward_dw
 
 
diff --git a/megatron/core/pipeline_parallel/schedules.py b/megatron/core/pipeline_parallel/schedules.py
index c41a09ea594..a8fdf2324f2 100644
--- a/megatron/core/pipeline_parallel/schedules.py
+++ b/megatron/core/pipeline_parallel/schedules.py
@@ -2,7 +2,6 @@
 
 import contextlib
 from functools import partial
-from itertools import zip_longest
 from typing import Callable, Iterator, List, Optional, Union
 
 import torch
@@ -844,110 +843,6 @@ def convert_schedule_table_to_order(num_warmup_microbatches, num_model_chunks, s
     return order
 
 
-def get_overlap_moe_expert_parallel_comm_order(order, num_layers_per_chunk, capture_wgrad_graph):
-    """
-    This functions gets the order for overlap_moe_expert_parallel_comm schedule for the original
-    chunk-wise order list. Each chunk is transformered to chunks with only 1 layer so that
-    layers between 2 chunks can now overlap with each other while following the graph order.
-    If capture_wgrad_graph is True, the wgrad backward graph is also added to the order by
-    decreasing the layer id by 0.5.
-
-    Args:
-        order (List[int]): The original chunk-wise order list. Positive values represent forward
-            passes for chunks, negative values represent backward passes. The absolute value
-            indicates the chunk ID (1-indexed).
-        num_layers_per_chunk (List[int]): Number of graphable layers in each chunk. The length
-            of this list equals the number of chunks.
-        capture_wgrad_graph (bool): If True, weight gradient computation graphs are added to the
-            order by appending entries with layer_id - 0.5.
-
-    Returns:
-        Tuple[List[float], List[Optional[List[int]]]]: A tuple containing:
-            - new_order: The layer-wise order list where each chunk is expanded to individual
-              layers. Positive values are forward passes, negative values are backward passes.
-              Values with .5 suffix indicate weight gradient computations.
-            - chunk_id_list: A list parallel to new_order. For forward passes, contains
-              [chunk_id, layer_index_within_chunk]. For backward passes, contains None.
-
-    Example:
-        original_order: [1, 2, -2, 1, -1, -1]
-        num_layers_per_chunk: [1, 2]
-        capture_wgrad_graph=True:
-            new_order: [1, 2, 3, 1, -3, -3.5, -2, -2.5, -1, -1.5, -1, -1.5]
-            chunk_id_list: [[0, 0], [1, 0], [1, 1], [0, 0], None,
-                            None, None, None, None, None, None, None]
-        capture_wgrad_graph=False:
-            new_order: [1, 2, 3, 1, -3, -2, -1, -1]
-            chunk_id_list: [[0, 0], [1, 0], [1, 1], [0, 0], None, None, None, None]
-    """
-
-    def _add_order(new_order, chunk_id_list, c_id, layer_id, is_wgrad=False, index=None):
-        if is_wgrad:
-            new_order.append(layer_id - 0.5)
-        else:
-            new_order.append(layer_id)
-        if c_id > 0:
-            chunk_id_list.append([abs(c_id) - 1, index])
-        else:
-            chunk_id_list.append(None)
-
-    new_order = []
-    chunk_id_list = []
-    add_order = partial(_add_order, new_order, chunk_id_list)
-    first_backward_idx, last_forward_idx = None, None
-    for idx, c_id in enumerate(order):
-        if first_backward_idx is None and c_id < 0:
-            first_backward_idx = idx
-        if c_id > 0:
-            last_forward_idx = idx
-
-    def get_layer_range(c_id):
-        num_layers = num_layers_per_chunk[abs(c_id) - 1]
-        num_layers_previous_chunks = sum(num_layers_per_chunk[: abs(c_id) - 1])
-        if c_id > 0:
-            return list(
-                range(num_layers_previous_chunks + 1, num_layers_previous_chunks + num_layers + 1)
-            )
-        return list(range(-num_layers_previous_chunks - num_layers, -num_layers_previous_chunks))
-
-    # warmup stage
-    for c_id in order[:first_backward_idx]:
-        layer_range = get_layer_range(c_id)
-        new_order += layer_range
-        chunk_id_list.extend([abs(c_id) - 1, i] for i in range(len(layer_range)))
-
-    # 1f1b overlap stage
-    if first_backward_idx < last_forward_idx:
-        for c_id_b, c_id_f in zip(
-            order[first_backward_idx : last_forward_idx + 1 : 2],
-            order[first_backward_idx + 1 : last_forward_idx + 1 : 2],
-        ):
-            layer_range_f = get_layer_range(c_id_f)
-            layer_range_b = get_layer_range(c_id_b)
-            index = 0
-            for l_b, l_f in zip_longest(layer_range_b, layer_range_f, fillvalue=0):
-                # always forward graph before backward graph
-                if l_f != 0:
-                    add_order(c_id_f, l_f, index=index)
-                if l_b != 0:
-                    add_order(c_id_b, l_b)
-                    if capture_wgrad_graph and index < len(layer_range_b) - 1:
-                        add_order(c_id_b, l_b, is_wgrad=True)
-                index += 1
-            # last wgrad backward
-            if capture_wgrad_graph and layer_range_b:
-                add_order(c_id_b, layer_range_b[-1], is_wgrad=True)
-
-    # cool down stage, backward graphs only
-    for c_id in order[last_forward_idx + 1 :]:
-        for l_b in get_layer_range(c_id):
-            add_order(c_id, l_b)
-            if capture_wgrad_graph:
-                add_order(c_id, l_b, is_wgrad=True)
-
-    return new_order, chunk_id_list
-
-
 def forward_backward_pipelining_with_interleaving(
     *,
     forward_step_func,
diff --git a/megatron/core/pipeline_parallel/utils.py b/megatron/core/pipeline_parallel/utils.py
index d38f6d702c0..e7e416f99bd 100644
--- a/megatron/core/pipeline_parallel/utils.py
+++ b/megatron/core/pipeline_parallel/utils.py
@@ -182,8 +182,8 @@ def __init__(
         self.free_input = free_input
         self.inputs = None
         self.outputs = None
-        self.manual_grads_release = False
         self.delay_grads_release = False
+        self.manual_release_grads = False
 
     def default_backward_func(self, outputs, output_grad):
         """Default backward function"""
@@ -269,7 +269,7 @@ def _backward(self, *output_grad):
                     # to avoid delayed garbage collection. If
                     # delay_grads_release is True, dgrad is last used in
                     # wgrad compute and skip the release here.
-                    if self.manual_grads_release and not self.delay_grads_release:
+                    if self.manual_release_grads and not self.delay_grads_release:
                         g.untyped_storage().resize_(0)
 
         grads = self.get_grad()
diff --git a/megatron/core/transformer/cuda_graphs.py b/megatron/core/transformer/cuda_graphs.py
index b566c1830dc..27e6c65c738 100644
--- a/megatron/core/transformer/cuda_graphs.py
+++ b/megatron/core/transformer/cuda_graphs.py
@@ -10,7 +10,6 @@
 from contextlib import nullcontext
 from dataclasses import fields, is_dataclass
 from enum import Enum
-from math import ceil
 from typing import Any, Dict, List, Optional
 
 import torch
@@ -1511,7 +1510,7 @@ def graphs_created(self):
         """
         return self._graphs_created
 
-    def _get_sample_arguments(self, order, chunk_id_list=None):
+    def _get_sample_arguments(self, order):
         """
         Generate sample arguments and keyword arguments for CUDA Graph capturing with
         memory-optimized buffer reuse.
@@ -1540,9 +1539,6 @@ def _get_sample_arguments(self, order, chunk_id_list=None):
             order (List[int]): The forward/backward execution order from
                 convert_schedule_table_to_order(). Positive integers represent forward passes
                 (1-indexed chunk ID), negative integers represent backward passes.
-            chunk_id_list (List[Tuple[int, int]]): The list of chunk IDs and layer IDs in the
-                order. This is useful only when overlap_moe_expert_parallel_comm is enabled,
-                the order maps each layers' idx to their original chunk id.
 
         Returns:
             Tuple[List[Tuple], List[Dict]]: A tuple containing:
@@ -1564,11 +1560,9 @@ def _get_sample_arguments(self, order, chunk_id_list=None):
         assert self.num_model_chunks == max(
             order
         ), "num_model_chunks must match the max chunk id in order."
-        if chunk_id_list is None:
-            # check only if 1f1b overlap is disabled.
-            assert (
-                self.num_microbatches == len(order) // self.num_model_chunks // 2
-            ), "num_microbatches must match the number of microbatches in order."
+        assert (
+            self.num_microbatches == len(order) // self.num_model_chunks // 2
+        ), "num_microbatches must match the number of microbatches in order."
 
         # Generate sample arguments and keyword arguments for capturing.
         sample_args = [None] * (len(self.flattened_callables) * self.num_microbatches)
@@ -1651,8 +1645,8 @@ def get_rotary_pos_emb(transformer_module, transformer_input):
         consumed_sample_queue = {}
         layer_sample_keys_cache = {}
         fwd_idx = [0] * self.num_model_chunks
-        for idx, chunk_id in enumerate(order):
-            model_chunk_idx = abs(ceil(chunk_id)) - 1
+        for chunk_id in order:
+            model_chunk_idx = abs(chunk_id) - 1
 
             if chunk_id > 0:
                 if model_chunk_idx not in fwd_sample_queues:
@@ -1661,14 +1655,7 @@ def get_rotary_pos_emb(transformer_module, transformer_input):
                 sample_start_idx = (prefix_num_layers[model_chunk_idx] * self.num_microbatches) + (
                     fwd_idx[model_chunk_idx] * self.num_layers_per_chunk[model_chunk_idx]
                 )
-                if chunk_id_list:
-                    model_chunk_idx = chunk_id_list[idx][0]
-                    callables_curr_chunk = [
-                        self.callables_per_chunk[model_chunk_idx][chunk_id_list[idx][1]]
-                    ]
-                else:
-                    callables_curr_chunk = self.callables_per_chunk[model_chunk_idx]
-                for layer_idx, layer in enumerate(callables_curr_chunk):
+                for layer_idx, layer in enumerate(self.callables_per_chunk[model_chunk_idx]):
                     per_callable_fwd_idx = sample_start_idx + layer_idx
 
                     # Get sample_args and sample_kwargs for index per_callable_fwd_idx.
@@ -1705,7 +1692,7 @@ def get_rotary_pos_emb(transformer_module, transformer_input):
                         # reuse the static inputs of a previous forward pass for this forward pass.
                         # If not, we still need to generate the new static inputs.
                         sample_keys = layer_sample_keys_cache[id(layer)]
-                    model_chunk_idx = abs(chunk_id) - 1
+
                     fwd_sample_queues[model_chunk_idx].append((sample_keys, per_callable_fwd_idx))
                     if consumed_sample_queue.get(sample_keys, []):
                         # We can reuse the static inputs of a previous forward pass for this
@@ -1727,16 +1714,13 @@ def get_rotary_pos_emb(transformer_module, transformer_input):
                         # Unfortunately, no previous static inputs are available for reuse,
                         # sample_args is still None. Last attempt: generate the new static inputs
                         # for this forward pass.
-                        if chunk_id_list:
-                            model_chunk_idx = chunk_id_list[idx][0]
                         sample_args[per_callable_fwd_idx], sample_kwargs[per_callable_fwd_idx] = (
                             _get_layer_static_inputs(
                                 layer, self.chunks_with_decoder[model_chunk_idx]
                             )
                         )
-                        model_chunk_idx = abs(chunk_id) - 1
                 fwd_idx[model_chunk_idx] += 1
-            elif ceil(chunk_id) == chunk_id:
+            else:
                 num_consumed_samples = min(
                     len(fwd_sample_queues[model_chunk_idx]),
                     self.num_layers_per_chunk[model_chunk_idx],
@@ -1750,9 +1734,6 @@ def get_rotary_pos_emb(transformer_module, transformer_input):
                 fwd_sample_queues[model_chunk_idx] = fwd_sample_queues[model_chunk_idx][
                     num_consumed_samples:
                 ]
-            else:
-                # skip register static inputs for wgrad backward graphs
-                continue
 
         return sample_args, sample_kwargs
 
@@ -1765,16 +1746,12 @@ def _get_cuda_graph_input_data(self):
         # Get the PP and VPP scheduling order.
         from megatron.core.pipeline_parallel.schedules import (
             convert_schedule_table_to_order,
-            get_overlap_moe_expert_parallel_comm_order,
             get_pp_rank_microbatches,
             get_schedule_table,
         )
 
         # If PP is not enabled, we only need to capture one microbatch.
-        if (
-            parallel_state.get_pipeline_model_parallel_world_size() == 1
-            and not self.config.overlap_moe_expert_parallel_comm
-        ):
+        if parallel_state.get_pipeline_model_parallel_world_size() == 1:
             assert (
                 self.num_model_chunks == 1
             ), "If PP is not enabled, there should be only one model chunk."
@@ -1803,36 +1780,9 @@ def _get_cuda_graph_input_data(self):
             level=logging.DEBUG,
             msg=f'Rank {torch.distributed.get_rank()}: ORDER {order}',
         )
-        chunk_id_list = None
-        if self.config.overlap_moe_expert_parallel_comm:
-            wgrad_in_graph_scope = CudaGraphScope.attn in self.config.cuda_graph_scope or (
-                CudaGraphScope.moe_router in self.config.cuda_graph_scope
-                and self.config.moe_shared_expert_intermediate_size is not None
-                and not self.config.moe_shared_expert_overlap
-            )
-            capture_wgrad_graph = self.config.delay_wgrad_compute and wgrad_in_graph_scope
-            order, chunk_id_list = get_overlap_moe_expert_parallel_comm_order(
-                order, self.num_layers_per_chunk, capture_wgrad_graph
-            )
-            self.num_layers_per_chunk = [1] * sum(self.num_layers_per_chunk)
-            self.num_model_chunks = max(order)
-            _order_without_wgrad = []
-            for c_id in order:
-                if ceil(c_id) != c_id:
-                    continue
-                _order_without_wgrad.append(c_id)
-            self.num_microbatches = len(_order_without_wgrad) // self.num_model_chunks // 2
-            log_on_each_pipeline_stage(
-                logger=logger,
-                tp_group=None,
-                dp_cp_group=None,
-                level=logging.DEBUG,
-                msg=f'Rank {torch.distributed.get_rank()}: '
-                f'ORDER after overlap_moe_expert_parallel_comm {order}',
-            )
 
         # Generate sample arguments and keyword arguments for capturing.
-        sample_args, sample_kwargs = self._get_sample_arguments(order, chunk_id_list)
+        sample_args, sample_kwargs = self._get_sample_arguments(order)
 
         def get_make_graphed_callables_kwargs():
             kwargs = {'allow_unused_input': True, '_order': order}
@@ -1970,17 +1920,13 @@ def create_cudagraphs(self):
             for layer_number, layer in enumerate(layers):
                 layer.cuda_graphs = []
                 for batch_number in range(self.num_microbatches):
-                    if self.config.overlap_moe_expert_parallel_comm:
-                        graph_idx = (
-                            num_layers_accumulated + layer_number
-                        ) * self.num_microbatches + batch_number
-                    else:
-                        graph_idx = (
+                    layer.cuda_graphs.append(
+                        graphs[
                             num_layers_accumulated * self.num_microbatches
                             + batch_number * len(layers)
                             + layer_number
-                        )
-                    layer.cuda_graphs.append(graphs[graph_idx])
+                        ]
+                    )
             num_layers_accumulated += len(layers)
 
         self._finish_capturing(start_time)
diff --git a/megatron/core/transformer/moe/moe_layer.py b/megatron/core/transformer/moe/moe_layer.py
index c8438bb2c8a..10d10f667fe 100644
--- a/megatron/core/transformer/moe/moe_layer.py
+++ b/megatron/core/transformer/moe/moe_layer.py
@@ -326,11 +326,10 @@ def custom_forward(hidden_states):
 
         return outputs
 
-    def backward_dw(self, routed_experts: bool = True, shared_experts: bool = False):
+    def backward_dw(self):
         """Compute weight gradients for experts and shared experts."""
-        if routed_experts:
-            self.experts.backward_dw()
-        if shared_experts and self.use_shared_expert and not self.shared_expert_overlap:
+        self.experts.backward_dw()
+        if self.use_shared_expert and not self.shared_expert_overlap:
             self.shared_experts.backward_dw()
 
     def set_for_recompute_pre_mlp_layernorm(self):
diff --git a/megatron/core/transformer/transformer_config.py b/megatron/core/transformer/transformer_config.py
index a5636d94e26..6493a4bcce1 100644
--- a/megatron/core/transformer/transformer_config.py
+++ b/megatron/core/transformer/transformer_config.py
@@ -1849,16 +1849,6 @@ def __post_init__(self):
                     'when enabling overlap_moe_expert_parallel_comm with MTP layer.'
                 )
 
-            if self.cuda_graph_impl != "none":
-                assert (
-                    self.cuda_graph_impl == "transformer_engine"
-                    and CudaGraphScope.moe not in self.cuda_graph_scope
-                    and CudaGraphScope.mlp not in self.cuda_graph_scope
-                ), (
-                    'CUDA graph scope on moe and mlp is not '
-                    'supported with overlap_moe_expert_parallel_comm'
-                )
-
         # Check delay_wgrad_compute compatibility
         if self.delay_wgrad_compute:
             assert (
@@ -1867,11 +1857,6 @@ def __post_init__(self):
             assert (
                 not self.moe_use_legacy_grouped_gemm
             ), 'delay_wgrad_compute is not supported with legacy groupedgemm implementation'
-            if self.cuda_graph_impl == "transformer_engine":
-                assert is_te_min_version("2.10.0"), (
-                    'TE version >= 2.10.0 is required for delay_wgrad_compute with '
-                    'partial cuda graph'
-                )
 
         if self.ep_overlap_early_attn_memory_release:
             assert self.overlap_moe_expert_parallel_comm, (
diff --git a/megatron/core/transformer/transformer_layer.py b/megatron/core/transformer/transformer_layer.py
index db57e21c891..3ea40577009 100644
--- a/megatron/core/transformer/transformer_layer.py
+++ b/megatron/core/transformer/transformer_layer.py
@@ -872,10 +872,6 @@ def _te_cuda_graph_replay(self, *args, **kwargs):
             # CUDA Graph captures the whole MLP/MoE part. CUDA Graph output is the layer output.
             assert len(cuda_graph_output) == 1, "CUDA Graph output should be the layer output."
             output = cuda_graph_output.pop()
-            assert (
-                not self.config.overlap_moe_expert_parallel_comm
-            ), "EP overlap must be \
-                disabled when CUDA graph captures the whole MLP/MoE part."
         elif self.is_moe_layer and CudaGraphScope.moe_router in self.config.cuda_graph_scope:
             # CUDA Graph partially captures the MoE.
             # The rest of the layer should go to the normal pass.
@@ -918,35 +914,12 @@ def _te_cuda_graph_replay(self, *args, **kwargs):
                 residual=residual,
                 shared_expert_output=shared_expert_output,
             )
-            # If EP overlap is enabled, remaining of mlp will be called as fine_grained_callables
-            # and should be skipped here.
-            if self.config.overlap_moe_expert_parallel_comm:
-                probs, routing_map = self.mlp.route(hidden_states)
-                hidden_states, probs, residual = self.mlp.preprocess(
-                    hidden_states, probs, routing_map
-                )
-                nvtx_range_pop(suffix="mlp")
-                return mlp_residual, hidden_states, probs, shared_expert_output
             mlp_output_with_bias = self.mlp(hidden_states)
             self.mlp.cudagraph_tensor_store.clear()
             nvtx_range_pop(suffix="mlp")
 
             output = self._forward_post_mlp(mlp_output_with_bias, mlp_residual)
         else:
-            # If EP overlap is enabled, needs to return same outputs as submodule.attn
-            if self.config.overlap_moe_expert_parallel_comm:
-                assert len(cuda_graph_output) == 1, "CUDA Graph output should be the layer output."
-                mlp_residual = cuda_graph_output.pop()
-                if not self.is_moe_layer:
-                    return mlp_residual, None, None, None
-                hidden_states = self.pre_mlp_layernorm(mlp_residual)
-                shared_expert_output = self.mlp.shared_experts_compute(hidden_states)
-                probs, routing_map = self.mlp.route(hidden_states)
-                hidden_states, probs, residual = self.mlp.preprocess(
-                    hidden_states, probs, routing_map
-                )
-                return mlp_residual, hidden_states, probs, shared_expert_output
-
             # CUDA Graph does not capture the MLP/MoE part at all.
             output = self._forward_mlp(*cuda_graph_output)
         return output, context
@@ -1034,15 +1007,6 @@ def _should_call_local_cudagraph(self, *args, **kwargs):
                 return True
         return False
 
-    def backward_dw_cudagraph(self, microbatch_idx):
-        """
-        CUDA Graph backward weight gradient computation for this layer.
-        """
-        cg_index = microbatch_idx % len(self.cuda_graphs)
-        if not hasattr(self.cuda_graphs[cg_index], 'backward_dw'):
-            return
-        self.cuda_graphs[cg_index].backward_dw()
-
     def __call__(self, *args, **kwargs):
         if self._should_call_local_cudagraph(*args, **kwargs):
             # Inference mode.
diff --git a/tests/unit_tests/a2a_overlap/test_cuda_graphed_schedule_chunk_1f1b.py b/tests/unit_tests/a2a_overlap/test_cuda_graphed_schedule_chunk_1f1b.py
deleted file mode 100644
index 91c74fe1bb6..00000000000
--- a/tests/unit_tests/a2a_overlap/test_cuda_graphed_schedule_chunk_1f1b.py
+++ /dev/null
@@ -1,372 +0,0 @@
-# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
-import gc
-import os
-import sys
-
-import pytest
-import torch
-
-from megatron.core.enums import ModelType
-from megatron.core.models.gpt.gpt_layer_specs import (
-    get_gpt_decoder_block_spec,
-    get_gpt_mtp_block_spec,
-)
-from megatron.core.models.gpt.gpt_model import GPTModel
-from megatron.core.num_microbatches_calculator import destroy_num_microbatches_calculator
-from megatron.core.pipeline_parallel.utils import set_streams
-from megatron.core.tensor_parallel.random import HAVE_TE, model_parallel_cuda_manual_seed
-from megatron.core.transformer.enums import CudaGraphScope
-from megatron.core.transformer.module import float16_to_fp32
-from megatron.core.utils import is_te_min_version, unwrap_model
-from megatron.training.arguments import core_transformer_config_from_args, parse_args, validate_args
-from megatron.training.global_vars import (
-    destroy_global_vars,
-    get_args,
-    set_args,
-    set_global_variables,
-)
-from megatron.training.training import setup_model_and_optimizer
-from tests.unit_tests.test_utilities import Utils
-
-
-def is_deep_ep_available():
-    from megatron.core.transformer.moe.fused_a2a import HAVE_DEEP_EP
-
-    return HAVE_DEEP_EP
-
-
-def is_hybrid_ep_available():
-    from megatron.core.transformer.moe.fused_a2a import HAVE_HYBRIDEP
-
-    return HAVE_HYBRIDEP
-
-
-def save(fn, message):
-    with open(fn, 'w') as f:
-        f.write(message)
-
-
-class TestPartialCudaGraphedA2AOverlap:
-    """Test that CUDA graph outputs match ep-overlapped CUDA graph outputs for various scopes."""
-
-    def setup_method(self, method):
-        self.seq_length = 512
-        self.micro_batch_size = 2
-        # Store original environment variable values
-        self.original_env = {
-            'CUDA_DEVICE_MAX_CONNECTIONS': os.environ.get('CUDA_DEVICE_MAX_CONNECTIONS'),
-            'NVTE_ALLOW_NONDETERMINISTIC_ALGO': os.environ.get('NVTE_ALLOW_NONDETERMINISTIC_ALGO'),
-        }
-        self.cuda_graph_helper = None
-        os.environ['CUDA_DEVICE_MAX_CONNECTIONS'] = '1'
-        os.environ['NVTE_ALLOW_NONDETERMINISTIC_ALGO'] = '0'
-
-    def teardown_method(self, method):
-        # Restore original environment variable values
-        for key, value in self.original_env.items():
-            if value is None:
-                os.environ.pop(key, None)
-            else:
-                os.environ[key] = value
-        Utils.destroy_model_parallel()
-        destroy_global_vars()
-        destroy_num_microbatches_calculator()
-        if self.cuda_graph_helper is not None and self.cuda_graph_helper.graphs_created():
-            self.cuda_graph_helper.delete_cuda_graphs()
-            self.cuda_graph_helper = None
-
-        gc.collect()
-
-    def model_provider(
-        self,
-        pre_process=True,
-        post_process=True,
-        layer_spec_fn=get_gpt_decoder_block_spec,
-        **config_kwargs,
-    ):
-        model_parallel_cuda_manual_seed(123)
-        args = get_args()
-        config = core_transformer_config_from_args(args)
-        transformer_layer_spec = layer_spec_fn(
-            config,
-            use_transformer_engine=True,
-            normalization=args.normalization,
-            qk_l2_norm=args.qk_l2_norm,
-        )
-        if args.mtp_num_layers:
-            mtp_block_spec = get_gpt_mtp_block_spec(
-                config, transformer_layer_spec, use_transformer_engine=True
-            )
-        else:
-            mtp_block_spec = None
-        return GPTModel(
-            config=config,
-            transformer_layer_spec=transformer_layer_spec,
-            vocab_size=args.vocab_size,
-            max_sequence_length=args.max_position_embeddings,
-            pre_process=pre_process,
-            post_process=post_process,
-            fp16_lm_cross_entropy=args.fp16_lm_cross_entropy,
-            parallel_output=True,
-            share_embeddings_and_output_weights=not args.untie_embeddings_and_output_weights,
-            position_embedding_type=args.position_embedding_type,
-            rotary_percent=args.rotary_percent,
-            mtp_block_spec=mtp_block_spec,
-        )
-
-    def create_test_args(
-        self, cuda_graph_impl, cuda_graph_scope, cuda_graph_warmup_steps, ep_size, **kwargs
-    ):
-        destroy_global_vars()
-        destroy_num_microbatches_calculator()
-
-        sys.argv = ['test_cuda_graphs.py']
-        args = parse_args()
-        args.num_layers = 1
-        args.mtp_num_layers = None
-        args.vocab_size = 1024
-        args.hidden_size = 128
-        args.num_attention_heads = 8
-        args.max_position_embeddings = 512
-        args.global_batch_size = self.micro_batch_size * 8
-        args.micro_batch_size = self.micro_batch_size
-        args.create_attention_mask_in_dataloader = True
-        args.seq_length = self.seq_length
-        args.tensor_model_parallel_size = 2
-        args.sequence_parallel = True
-        args.pipeline_model_parallel_size = 1
-        args.context_parallel_size = 1
-        args.expert_model_parallel_size = ep_size
-        args.train_iters = 10
-        args.lr = 3e-5
-        args.bf16 = True
-        args.add_bias_linear = False
-        args.swiglu = True
-        args.use_distributed_optimizer = True
-        args.position_embedding_type = "rope"
-        args.rotary_percent = 1.0
-        args.hidden_dropout = 0.0
-        args.attention_dropout = 0.0
-        args.untie_embeddings_and_output_weights = True
-
-        # MoE settings
-        args.num_experts = 16
-        args.expert_model_parallel_size = ep_size
-        args.moe_shared_expert_intermediate_size = 1024
-        args.moe_layer_freq = kwargs.get("moe_layer_freq", "[0,0,1,1]")
-        args.moe_permute_fusion = True
-        args.moe_router_fusion = True
-        args.moe_router_topk = 2
-
-        # CUDA graph settings
-        args.cuda_graph_impl = cuda_graph_impl
-        args.cuda_graph_scope = cuda_graph_scope
-        args.cuda_graph_warmup_steps = cuda_graph_warmup_steps
-        args.use_te_rng_tracker = cuda_graph_impl != "none"
-
-        for key, value in kwargs.items():
-            assert hasattr(args, key)
-            setattr(args, key, value)
-
-        validate_args(args)
-        set_global_variables(args, False)
-        return args
-
-    def get_batch(self, seq_length, micro_batch_size):
-        data = list(range(seq_length))
-        input_ids = torch.tensor(data, dtype=torch.int64).repeat((micro_batch_size, 1)).cuda()
-        labels = 1 + torch.tensor(data, dtype=torch.int64).repeat((micro_batch_size, 1)).cuda()
-        position_ids = torch.tensor(data, dtype=torch.int64).repeat((micro_batch_size, 1)).cuda()
-        attention_mask = torch.ones(
-            (micro_batch_size, 1, seq_length, seq_length), dtype=bool
-        ).cuda()
-        loss_mask = torch.ones(seq_length).repeat((micro_batch_size, 1)).cuda()
-        return input_ids, labels, position_ids, attention_mask, loss_mask
-
-    def _run_1f1b_helper(self, gpt_model, optimizer, data, num_iters, cuda_graph_warmup_steps):
-        from megatron.core.models.common.model_chunk_schedule_plan import (
-            TransformerModelChunkSchedulePlan,
-        )
-        from megatron.core.pipeline_parallel.schedules import set_current_microbatch
-
-        schedule_plans = []
-        losses = []
-        set_current_microbatch(gpt_model[0], 1)
-
-        gpt_model[0].zero_grad_buffer()
-        optimizer.zero_grad()
-        assert cuda_graph_warmup_steps > 0, "cuda_graph_warmup_steps must be greater than 0"
-        for fwd_mb_idx in range(num_iters + 1):
-            # Capture CUDA graphs after warmup if helper is provided
-            if self.cuda_graph_helper is not None and fwd_mb_idx == cuda_graph_warmup_steps:
-                self.cuda_graph_helper.create_cudagraphs()
-
-            if fwd_mb_idx < cuda_graph_warmup_steps:
-                gpt_model[0].zero_grad_buffer()
-                optimizer.zero_grad()
-                output = gpt_model[0].forward(**data)
-                schedule_plans.append(None)
-            else:
-                if fwd_mb_idx == cuda_graph_warmup_steps:
-                    extra_schedule_plan = unwrap_model(gpt_model[0]).build_schedule_plan(**data)
-                    TransformerModelChunkSchedulePlan.run(extra_schedule_plan, None)
-                    schedule_plans[-1] = extra_schedule_plan
-                f_schedule_plan = unwrap_model(gpt_model[0]).build_schedule_plan(**data)
-                b_schedule_plan = schedule_plans[-1]
-                schedule_plans.append(f_schedule_plan)
-                if b_schedule_plan is not None:
-                    gpt_model[0].zero_grad_buffer()
-                    optimizer.zero_grad()
-                output = TransformerModelChunkSchedulePlan.run(
-                    f_schedule_plan,
-                    b_schedule_plan,
-                    b_grad=torch.ones_like(output) if fwd_mb_idx > 0 else None,
-                )
-            # Check output shapes
-            if fwd_mb_idx < num_iters:
-                assert output is not None
-                assert output.shape[0] == self.micro_batch_size
-                assert output.shape[1] == self.seq_length
-                losses.append(output)
-
-            if fwd_mb_idx < cuda_graph_warmup_steps:
-                output.backward(torch.ones_like(output))
-
-            for param in gpt_model[0].parameters():
-                assert param.main_grad is not None
-
-            update_successful, _, _ = optimizer.step()
-            assert update_successful
-
-        return losses
-
-    def _run_test_helper(
-        self,
-        ep_size,
-        cuda_graph_impl,
-        cuda_graph_scope,
-        cuda_graph_warmup_steps,
-        ep_overlap=False,
-        **kwargs,
-    ):
-        """Test fp8_param with gpt_model."""
-        args = self.create_test_args(
-            cuda_graph_impl,
-            cuda_graph_scope,
-            cuda_graph_warmup_steps,
-            ep_size,
-            overlap_moe_expert_parallel_comm=ep_overlap,
-            **kwargs,
-        )
-        if ep_overlap:
-            set_streams()
-        set_args(args)
-        torch.manual_seed(123)
-        Utils.initialize_model_parallel(
-            tensor_model_parallel_size=2, expert_model_parallel_size=ep_size
-        )
-
-        input_ids, labels, position_ids, attention_mask, loss_mask = self.get_batch(
-            self.seq_length, self.micro_batch_size
-        )
-
-        gpt_model, optimizer, _ = setup_model_and_optimizer(
-            self.model_provider, ModelType.encoder_or_decoder
-        )
-        assert len(gpt_model) == 1  # Assume only one model in the model provider.
-
-        loss_list = []
-
-        if cuda_graph_impl == "transformer_engine":
-            from megatron.core.transformer.cuda_graphs import TECudaGraphHelper
-
-            self.cuda_graph_helper = TECudaGraphHelper(
-                model=gpt_model,
-                config=gpt_model[0].config,
-                seq_length=self.seq_length,
-                micro_batch_size=self.micro_batch_size,
-                optimizers=[optimizer],
-            )
-
-        num_iters = cuda_graph_warmup_steps + 2
-        data = {
-            "input_ids": input_ids,
-            "position_ids": position_ids,
-            "attention_mask": attention_mask,
-            "labels": labels,
-            "loss_mask": loss_mask,
-        }
-        if not ep_overlap:
-            for i in range(num_iters):
-                gpt_model[0].zero_grad_buffer()
-                optimizer.zero_grad()
-
-                # Capture CUDA graphs after warmup if helper is provided
-                if self.cuda_graph_helper is not None and i == cuda_graph_warmup_steps:
-                    self.cuda_graph_helper.create_cudagraphs()
-
-                output = unwrap_model(gpt_model[0]).forward(**data)
-                output = float16_to_fp32(output)
-
-                # Check output shapes
-                assert output.shape[0] == self.micro_batch_size
-                assert output.shape[1] == self.seq_length
-
-                # Verify gradients
-                output.backward(torch.ones_like(output))
-                for param in gpt_model[0].parameters():
-                    assert param.main_grad is not None
-
-                update_successful, _, _ = optimizer.step()
-                assert update_successful
-
-                loss_list.append(output)
-        else:
-            loss_list = self._run_1f1b_helper(
-                gpt_model, optimizer, data, num_iters, cuda_graph_warmup_steps
-            )
-
-        return loss_list
-
-    @pytest.mark.skipif(
-        not (HAVE_TE and is_te_min_version("2.10.0")),
-        reason="Partial CUDA graph support requires TransformerEngine version >= 2.10.0",
-    )
-    @pytest.mark.parametrize("moe_dispatcher_type", ["alltoall", "deepep"])
-    def test_moe_partial_cudagraph_with_ep_overlap(self, moe_dispatcher_type):
-        extra_kwargs = {"moe_layer_freq": 1}
-        if moe_dispatcher_type == "deepep":
-            if not is_deep_ep_available():
-                pytest.skip("Deep EP is not available")
-            extra_kwargs["moe_token_dispatcher_type"] = "flex"
-            extra_kwargs["moe_flex_dispatcher_backend"] = "deepep"
-            extra_kwargs["moe_router_dtype"] = "fp32"
-        elif moe_dispatcher_type == "hybridep":
-            if not is_hybrid_ep_available():
-                pytest.skip("Hybrid EP is not available")
-            extra_kwargs["moe_token_dispatcher_type"] = "flex"
-            extra_kwargs["moe_flex_dispatcher_backend"] = "hybridep"
-        else:
-            extra_kwargs["moe_token_dispatcher_type"] = moe_dispatcher_type
-
-        loss_list_ref = self._run_test_helper(4, "none", None, 3, **extra_kwargs)
-        for cuda_graph_scope in [
-            [CudaGraphScope.attn],
-            [CudaGraphScope.attn, CudaGraphScope.moe_router],
-            [CudaGraphScope.attn, CudaGraphScope.moe_router, CudaGraphScope.moe_preprocess],
-        ]:
-            cuda_graph_warmup_steps = 3
-            loss_list = self._run_test_helper(
-                4,
-                "transformer_engine",
-                cuda_graph_scope,
-                cuda_graph_warmup_steps,
-                ep_overlap=True,
-                **extra_kwargs,
-            )
-            assert len(loss_list) == len(loss_list_ref)
-            for i in range(len(loss_list)):
-                assert torch.equal(
-                    loss_list[i].mean(), loss_list_ref[i].mean()
-                ), f"scope={cuda_graph_scope}, i={i},loss_list={loss_list[i]}, loss_list_ref={loss_list_ref[i]}"
-            print(f"[DEBUG] Pass {cuda_graph_scope}")
diff --git a/tests/unit_tests/a2a_overlap/test_schedule_layer_1f1b.py b/tests/unit_tests/a2a_overlap/test_schedule_layer_1f1b.py
index 0fd2c445c9f..7fb97f6e586 100644
--- a/tests/unit_tests/a2a_overlap/test_schedule_layer_1f1b.py
+++ b/tests/unit_tests/a2a_overlap/test_schedule_layer_1f1b.py
@@ -306,7 +306,7 @@ def test_transformer_layer_overlap_shared_expert(self):
             "moe_shared_expert_intermediate_size": 512,
         }
         overlap_config = get_test_config(extra_kwargs=extra_kwargs)
-        extra_kwargs["moe_shared_expert_overlap"] = False
+        extra_kwargs["moe_shared_expert_overlap"] = True
         ref_config = get_test_config(extra_kwargs=extra_kwargs)
         microbatches = 4
         with deterministic_mode():
diff --git a/tests/unit_tests/a2a_overlap/utils.py b/tests/unit_tests/a2a_overlap/utils.py
index a52843956df..7db4256a849 100644
--- a/tests/unit_tests/a2a_overlap/utils.py
+++ b/tests/unit_tests/a2a_overlap/utils.py
@@ -1,4 +1,3 @@
-# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 import os
 from contextlib import contextmanager
 from dataclasses import dataclass
diff --git a/tests/unit_tests/pipeline_parallel/test_schedules.py b/tests/unit_tests/pipeline_parallel/test_schedules.py
index 86b9219fe0f..b861aa2df49 100644
--- a/tests/unit_tests/pipeline_parallel/test_schedules.py
+++ b/tests/unit_tests/pipeline_parallel/test_schedules.py
@@ -1,5 +1,3 @@
-# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
-
 import os
 
 import pytest
@@ -129,52 +127,6 @@ def test_get_pipeline_parallel_order(
     for k, v in order_cnt.items():
         assert -k in order_cnt and order_cnt[-k] == v
 
-    layers_per_chunk = 2
-    num_layers_per_chunk = [layers_per_chunk] * num_model_chunks
-    # disable wgrad compute
-    overlapped_order, chunk_id_list = schedule.get_overlap_moe_expert_parallel_comm_order(
-        order, num_layers_per_chunk, False
-    )
-    assert max(overlapped_order) == num_model_chunks * layers_per_chunk
-    assert len(overlapped_order) == len(order) * layers_per_chunk
-    assert len(chunk_id_list) == len(overlapped_order)
-    order_cnt = {}
-    accumulated_order = 0
-    for o in overlapped_order:
-        order_cnt[o] = order_cnt.get(o, 0) + 1
-        if o < 0:
-            assert -o in order_cnt and order_cnt[-o] >= order_cnt[o]
-        elif -o in order_cnt:
-            assert order_cnt[-o] < order_cnt[o]
-        accumulated_order += o
-        assert accumulated_order >= 0
-    assert accumulated_order == 0
-
-    # enable wgrad compute
-    overlapped_order, chunk_id_list = schedule.get_overlap_moe_expert_parallel_comm_order(
-        order, num_layers_per_chunk, True
-    )
-    assert max(overlapped_order) == num_model_chunks * layers_per_chunk
-    assert len(overlapped_order) == len(order) * layers_per_chunk * 3 // 2
-    assert len(chunk_id_list) == len(overlapped_order)
-    from math import ceil
-
-    order_cnt = {}
-    accumulated_order = 0
-    prev_o = 0
-    for o in overlapped_order:
-        if ceil(o) != o:
-            assert prev_o - 0.5 == o
-        else:
-            order_cnt[o] = order_cnt.get(o, 0) + 1
-            if o < 0:
-                assert -o in order_cnt and order_cnt[-o] >= order_cnt[o]
-            elif -o in order_cnt:
-                assert order_cnt[-o] < order_cnt[o]
-        accumulated_order += o
-        prev_o = o
-    assert accumulated_order < 0
-
     Utils.destroy_model_parallel()
 
 
diff --git a/tests/unit_tests/transformer/test_submodule_callables.py b/tests/unit_tests/transformer/test_submodule_callables.py
index 73059495c06..1ccb6fd5be8 100644
--- a/tests/unit_tests/transformer/test_submodule_callables.py
+++ b/tests/unit_tests/transformer/test_submodule_callables.py
@@ -64,7 +64,7 @@ def run_model_submodules_with_capture(model, input_tensors, microbatches):
     output_tensors = []
     # get callables
     callables, dw = build_layer_callables(model)
-    attn, dispatch, moe, combine, post_process = callables
+    attn, post_attn, dispatch, moe, combine, post_process = callables
     assert post_process is None
     dummy_model = DummyState()
     dummy_model.decoder = DummyState()
@@ -76,16 +76,24 @@ def run_model_submodules_with_capture(model, input_tensors, microbatches):
         node.chunk_state.model = dummy_model
 
         # attn fwd
-        local_tokens, probs = attn(node, input_tensors[i])
+        hidden_states = attn(node, input_tensors[i])
+
+        # post attn fwd
+        local_tokens, probs = post_attn(node, hidden_states)
 
         # dispatch fwd
         dispatched_tokens = dispatch(node, local_tokens, probs)
 
         # moe fwd
-        expert_output = moe(node, dispatched_tokens)
+        expert_outputs = moe(node, dispatched_tokens)
+        if model.mlp.use_shared_expert:
+            expert_output, shared_expert_output = expert_outputs
+        else:
+            expert_output = expert_outputs
+            shared_expert_output = None
 
         # combine fwd
-        hidden_states = combine(node, expert_output)
+        hidden_states = combine(node, expert_output, shared_expert_output)
 
         # loss
         output_tensors.append(hidden_states)

From dfa6cc12d3a246d55f4c45847d73c9127099327b Mon Sep 17 00:00:00 2001
From: HaochenYuan <106647990+HaochenYuan@users.noreply.github.com>
Date: Tue, 6 Jan 2026 15:35:49 +0800
Subject: [PATCH 214/334] [Dev] Remove calculation of padding token in moe
 routing loss (#2754)

Co-authored-by: Li Tao <lit@nvidia.com>
Co-authored-by: Dennis(Zhenhuan) Liu <denliu@nvidia.com>
---
 .../core/extensions/transformer_engine.py     |   2 +-
 .../common/model_chunk_schedule_plan.py       |   2 +
 .../core/models/gpt/fine_grained_callables.py |  21 +-
 megatron/core/models/gpt/gpt_model.py         |  37 +++-
 megatron/core/transformer/mlp.py              |   2 +-
 megatron/core/transformer/moe/moe_layer.py    |  27 ++-
 megatron/core/transformer/moe/moe_utils.py    |  83 ++++++--
 megatron/core/transformer/moe/router.py       | 157 +++++++++++----
 .../core/transformer/transformer_block.py     |  15 +-
 .../core/transformer/transformer_layer.py     |  26 ++-
 .../python_scripts/recipe_parser.py           |   1 +
 .../a2a_overlap/test_schedule_chunk_1f1b.py   | 116 ++++++++++-
 .../a2a_overlap/test_schedule_layer_1f1b.py   |   4 +-
 .../transformer/moe/test_aux_loss.py          | 189 ++++++++++++++++++
 .../transformer/moe/test_routers.py           |  47 +++++
 15 files changed, 640 insertions(+), 89 deletions(-)

diff --git a/megatron/core/extensions/transformer_engine.py b/megatron/core/extensions/transformer_engine.py
index 151b8ad27fa..d823e42b0bc 100644
--- a/megatron/core/extensions/transformer_engine.py
+++ b/megatron/core/extensions/transformer_engine.py
@@ -2139,7 +2139,7 @@ def forward_post_hook(module, *_) -> None:
                     "TEFusedMLP module does not support submodules with post-backward hooks"
                 )
 
-        def forward(self, hidden_states: torch.Tensor) -> Tuple[Tensor, Optional[Tensor]]:
+        def forward(self, hidden_states: torch.Tensor, **kwargs) -> Tuple[Tensor, Optional[Tensor]]:
             """Forward."""
 
             # Construct fused impl if needed
diff --git a/megatron/core/models/common/model_chunk_schedule_plan.py b/megatron/core/models/common/model_chunk_schedule_plan.py
index 486a498dd73..07bab1cb486 100644
--- a/megatron/core/models/common/model_chunk_schedule_plan.py
+++ b/megatron/core/models/common/model_chunk_schedule_plan.py
@@ -305,6 +305,7 @@ def __init__(
         extra_block_kwargs=None,
         runtime_gather_output: Optional[bool] = None,
         loss_mask: Optional[Tensor] = None,
+        padding_mask=None,
     ):
         """Initialize the schedule plan of all Transformer layers' sub-modules.
 
@@ -347,6 +348,7 @@ def __init__(
         self._model_chunk_state.mtp_hidden_states = None
         self._model_chunk_state.loss_mask = loss_mask
         self._model_chunk_state.packed_seq_params = packed_seq_params
+        self._model_chunk_state.padding_mask = padding_mask
         self._model_chunk_state.extra_block_kwargs = extra_block_kwargs
         self._model_chunk_state.runtime_gather_output = runtime_gather_output
         self._model_chunk_state.model = model
diff --git a/megatron/core/models/gpt/fine_grained_callables.py b/megatron/core/models/gpt/fine_grained_callables.py
index a0be55c4ca1..5913dfaba33 100644
--- a/megatron/core/models/gpt/fine_grained_callables.py
+++ b/megatron/core/models/gpt/fine_grained_callables.py
@@ -120,13 +120,19 @@ def forward_impl(self):
         if not self.gpt_model.pre_process:
             self.chunk_state.decoder_input = self.gpt_model.decoder.input_tensor
         # Run GPTModel._preprocess
-        decoder_input, rotary_pos_emb, rotary_pos_cos, rotary_pos_sin, sequence_len_offset = (
-            self.gpt_model._preprocess(
-                input_ids=self.chunk_state.input_ids,
-                position_ids=self.chunk_state.position_ids,
-                decoder_input=self.chunk_state.decoder_input,
-                packed_seq_params=self.chunk_state.packed_seq_params,
-            )
+        (
+            decoder_input,
+            rotary_pos_emb,
+            rotary_pos_cos,
+            rotary_pos_sin,
+            sequence_len_offset,
+            padding_mask,
+        ) = self.gpt_model._preprocess(
+            input_ids=self.chunk_state.input_ids,
+            position_ids=self.chunk_state.position_ids,
+            decoder_input=self.chunk_state.decoder_input,
+            packed_seq_params=self.chunk_state.packed_seq_params,
+            padding_mask=self.chunk_state.padding_mask,
         )
 
         # Saved for later use
@@ -135,6 +141,7 @@ def forward_impl(self):
         self.chunk_state.rotary_pos_cos = rotary_pos_cos
         self.chunk_state.rotary_pos_sin = rotary_pos_sin
         self.chunk_state.sequence_len_offset = sequence_len_offset
+        self.chunk_state.padding_mask = padding_mask
         return decoder_input
 
 
diff --git a/megatron/core/models/gpt/gpt_model.py b/megatron/core/models/gpt/gpt_model.py
index a1230568cbd..9e70c677226 100644
--- a/megatron/core/models/gpt/gpt_model.py
+++ b/megatron/core/models/gpt/gpt_model.py
@@ -284,6 +284,7 @@ def _preprocess(
         decoder_input: Tensor = None,
         inference_context: BaseInferenceContext = None,
         packed_seq_params: PackedSeqParams = None,
+        padding_mask: Optional[Tensor] = None,
     ):
         """Preprocesses inputs for the transformer decoder.
 
@@ -300,7 +301,20 @@ def _preprocess(
         if decoder_input is not None:
             pass
         elif self.pre_process:
+            if padding_mask is not None:
+                assert padding_mask.shape == input_ids.shape, (
+                    f"padding_mask shape {padding_mask.shape} does not match "
+                    f"input_ids shape {input_ids.shape}"
+                )
             decoder_input = self.embedding(input_ids=input_ids, position_ids=position_ids)
+            if padding_mask is not None and self.config.sequence_parallel:
+                padding_mask = (
+                    tensor_parallel.scatter_to_sequence_parallel_region(
+                        padding_mask.transpose(0, 1).contiguous()
+                    )
+                    .transpose(0, 1)
+                    .contiguous()
+                )
         else:
             # intermediate stage of pipeline
             # decoder will get hidden_states from encoder.input_tensor
@@ -403,6 +417,7 @@ def _preprocess(
             rotary_pos_cos,
             rotary_pos_sin,
             sequence_len_offset,
+            padding_mask,
         )
         if rotary_pos_cos_sin is not None:
             # only in the case of flashinfer fused rope will we
@@ -446,6 +461,7 @@ def forward(
         *,
         inference_params: Optional[BaseInferenceContext] = None,
         loss_mask: Optional[Tensor] = None,
+        padding_mask: Optional[Tensor] = None,
     ) -> Tensor:
         """Forward function of the GPT Model This function passes the input tensors
         through the embedding layer, and then the decoder and finally into the post
@@ -456,6 +472,9 @@ def forward(
         Args:
             runtime_gather_output (bool): Gather output at runtime. Default None means
                 `parallel_output` arg in the constructor will be used.
+            padding_mask (Tensor, optional): Padding mask for MoE routing.
+                Shape [bsz, seq_length]. True = padding (exclude), False = valid (include).
+                Only used for MoE layers to exclude padding tokens from routing computations.
         """
         if self.config.fine_grained_activation_offloading:
             self.preprocess_for_fine_grained_offloading()
@@ -468,13 +487,19 @@ def forward(
             decoder_input=decoder_input,
             inference_context=inference_context,
             packed_seq_params=packed_seq_params,
+            padding_mask=padding_mask,
         )
 
-        (decoder_input, rotary_pos_emb, rotary_pos_cos, rotary_pos_sin, sequence_len_offset) = (
-            preproc_output[:5]
-        )
+        (
+            decoder_input,
+            rotary_pos_emb,
+            rotary_pos_cos,
+            rotary_pos_sin,
+            sequence_len_offset,
+            padding_mask,
+        ) = preproc_output[:6]
 
-        rotary_pos_cos_sin = preproc_output[5] if len(preproc_output) == 6 else None
+        rotary_pos_cos_sin = preproc_output[6] if len(preproc_output) == 7 else None
 
         # Run decoder.
         hidden_states = self.decoder(
@@ -487,6 +512,7 @@ def forward(
             rotary_pos_cos_sin=rotary_pos_cos_sin,
             packed_seq_params=packed_seq_params,
             sequence_len_offset=sequence_len_offset,
+            padding_mask=padding_mask,
             **(extra_block_kwargs or {}),
         )
 
@@ -724,6 +750,7 @@ def build_schedule_plan(
         runtime_gather_output: Optional[bool] = None,
         inference_params: Optional[BaseInferenceContext] = None,
         loss_mask: Optional[Tensor] = None,
+        padding_mask: Optional[Tensor] = None,
     ):
         """Builds a computation schedule plan for the model.
 
@@ -749,6 +776,7 @@ def build_schedule_plan(
             inference_params (InferenceParams, optional):
                 Parameters for inference. Defaults to None.
             loss_mask (Optional[Tensor], optional): Loss mask. Defaults to None.
+            padding_mask (Optional[Tensor], optional): Padding mask. Defaults to None.
 
         Returns:
             TransformerModelChunkSchedulePlan: The model chunk schedule plan.
@@ -770,6 +798,7 @@ def build_schedule_plan(
             extra_block_kwargs,
             runtime_gather_output,
             loss_mask,
+            padding_mask,
         )
 
     def sharded_state_dict(
diff --git a/megatron/core/transformer/mlp.py b/megatron/core/transformer/mlp.py
index 5d765484709..98e30887e7b 100644
--- a/megatron/core/transformer/mlp.py
+++ b/megatron/core/transformer/mlp.py
@@ -142,7 +142,7 @@ def __init__(
             tp_group=tp_group,
         )
 
-    def forward(self, hidden_states, per_token_scale=None):
+    def forward(self, hidden_states, per_token_scale=None, **kwargs):
         """Perform the forward pass through the MLP block."""
         # [s, b, 4 * h/p]
         nvtx_range_push(suffix="linear_fc1")
diff --git a/megatron/core/transformer/moe/moe_layer.py b/megatron/core/transformer/moe/moe_layer.py
index 12ca61b64c1..3742d064508 100644
--- a/megatron/core/transformer/moe/moe_layer.py
+++ b/megatron/core/transformer/moe/moe_layer.py
@@ -206,13 +206,13 @@ def __init__(
         self.cudagraph_tensor_store = MoECudaGraphTensorStore()
 
     @maybe_skip_or_early_return_by_cudagraph("route")
-    def route(self, hidden_states: torch.Tensor):
+    def route(self, hidden_states: torch.Tensor, padding_mask: Optional[torch.Tensor] = None):
         """Compute token routing for preprocessing.
 
         This method uses the router to determine which experts to send each token to,
         producing routing probabilities and a mapping.
         """
-        probs, routing_map = self.router(hidden_states)
+        probs, routing_map = self.router(hidden_states, padding_mask=padding_mask)
         return probs, routing_map
 
     @maybe_skip_or_early_return_by_cudagraph("preprocess")
@@ -308,7 +308,7 @@ def combine(self, output: torch.Tensor, shared_expert_output: Optional[torch.Ten
             output = output + shared_expert_output
         return output
 
-    def forward(self, hidden_states: torch.Tensor):
+    def forward(self, hidden_states: torch.Tensor, padding_mask: Optional[torch.Tensor] = None):
         """Forward pass for the MoE layer.
 
         The forward pass comprises four main steps:
@@ -318,7 +318,11 @@ def forward(self, hidden_states: torch.Tensor):
         4. Combine: The outputs from the experts are combined and returned.
 
         Args:
-            hidden_states (torch.Tensor): The input tensor to the MoE layer.
+            hidden_states (torch.Tensor): The input tensor shape [seq_length, bsz, hidden_size].
+            padding_mask (torch.Tensor, optional): Boolean mask indicating padding positions.
+                used for correct auxiliary loss computation for packed sequence.
+                Shape = [bsz, seq_length]. True = padding (exclude), False = valid (include).
+                Defaults to None (all tokens are valid).
 
         Returns:
             A tuple containing the output tensor and the MLP bias, if any.
@@ -329,11 +333,15 @@ def forward(self, hidden_states: torch.Tensor):
                 "are enabled without also enabling sequence parallelism."
             )
 
+        # Transpose from [bsz, seq_length] to [seq_length, bsz] to align with hidden_states
+        if padding_mask is not None:
+            padding_mask = padding_mask.transpose(0, 1).bool()
+
         # MoE forward: route -> dispatch -> compute -> combine
-        def custom_forward(hidden_states):
+        def custom_forward(hidden_states, padding_mask=None):
             try:
                 shared_expert_output = self.shared_experts_compute(hidden_states)
-                probs, routing_map = self.route(hidden_states)
+                probs, routing_map = self.route(hidden_states, padding_mask=padding_mask)
                 hidden_states, probs, residual = self.preprocess(hidden_states, probs, routing_map)
             except MoECudaGraphPartialCaptureSignal as e:
                 # This signal is raised from the maybe_skip_or_early_return_by_cudagraph decorator.
@@ -358,11 +366,14 @@ def custom_forward(hidden_states):
                     tensor_parallel.random.get_cuda_rng_tracker,
                     parallel_state.get_tensor_model_parallel_group(),
                     hidden_states,
+                    padding_mask,
                 )
             else:
-                outputs = tensor_parallel.checkpoint(custom_forward, False, hidden_states)
+                outputs = tensor_parallel.checkpoint(
+                    custom_forward, False, hidden_states, padding_mask
+                )
         else:
-            outputs = custom_forward(hidden_states)
+            outputs = custom_forward(hidden_states, padding_mask)
 
         return outputs
 
diff --git a/megatron/core/transformer/moe/moe_utils.py b/megatron/core/transformer/moe/moe_utils.py
index 0837675507d..d915cfabb26 100644
--- a/megatron/core/transformer/moe/moe_utils.py
+++ b/megatron/core/transformer/moe/moe_utils.py
@@ -1,5 +1,4 @@
 # Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-
 import math
 from dataclasses import dataclass
 from typing import List, Optional, Union
@@ -11,6 +10,7 @@
 from megatron.core.fp8_utils import get_fp8_align_size
 from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.tensor_parallel import get_cuda_rng_tracker, get_expert_parallel_rng_tracker_name
+from megatron.core.tensor_parallel.mappings import reduce_from_tensor_model_parallel_region
 from megatron.core.transformer.cuda_graphs import is_graph_capturing
 from megatron.core.transformer.enums import CudaGraphScope
 from megatron.core.transformer.transformer_config import TransformerConfig
@@ -120,18 +120,34 @@ def switch_load_balancing_loss_func(
     return aux_loss
 
 
-def z_loss_func(logits, z_loss_coeff):
+def z_loss_func(logits, z_loss_coeff, padding_mask: Optional[torch.Tensor] = None):
     """Encourages the router's logits to remain small to enhance stability.
     Please refer to the ST-MoE paper (https://arxiv.org/pdf/2202.08906.pdf) for details.
 
     Args:
         logits (torch.Tensor): The logits of the router.
+        z_loss_coeff (float): The coefficient for the z-loss.
+        padding_mask (torch.Tensor, optional): Boolean mask indicating padding positions.
+                                               Shape [num_tokens]. True = padding (exclude),
+                                               False = valid (include). Defaults to None.
 
     Returns:
         torch.Tensor: The logits after applying the z-loss.
     """
+    logsum = torch.logsumexp(logits, dim=-1)
+    z_loss_values = torch.square(logsum)
+
+    if padding_mask is not None:
+        # Invert padding_mask: True (padding) -> 0, False (valid) -> 1
+        valid_mask = ~padding_mask
+        # Only compute z_loss for valid (non-padding) tokens
+        z_loss_values = z_loss_values * valid_mask
+        # Compute mean over valid tokens only
+        num_valid_tokens = valid_mask.sum()
+        z_loss = z_loss_values.sum() / torch.clamp(num_valid_tokens, min=1.0) * z_loss_coeff
+    else:
+        z_loss = torch.mean(z_loss_values) * z_loss_coeff
 
-    z_loss = torch.mean(torch.square(torch.logsumexp(logits, dim=-1))) * z_loss_coeff
     return z_loss
 
 
@@ -171,6 +187,28 @@ def get_capacity(num_tokens: int, num_experts: int, capacity_factor: float, min_
     return capacity
 
 
+def get_tokens_per_expert_and_token_count(
+    routing_map: torch.Tensor,
+    reduce_group: torch.distributed.ProcessGroup,
+    topk: int = None,
+    with_padding_mask: bool = False,
+) -> torch.Tensor:
+    """
+    Compute global_tokens_per_expert, local_num_tokens and total_num_tokens with padding mask.
+    """
+    local_tokens_per_expert = routing_map.sum(dim=0)
+    global_tokens_per_expert = reduce_from_tensor_model_parallel_region(
+        local_tokens_per_expert, reduce_group
+    )
+    if with_padding_mask:
+        local_num_tokens = local_tokens_per_expert.sum() / topk
+        total_num_tokens = global_tokens_per_expert.sum() / topk
+    else:
+        local_num_tokens = routing_map.shape[0]
+        total_num_tokens = local_num_tokens * reduce_group.size()
+    return global_tokens_per_expert, local_num_tokens, total_num_tokens
+
+
 class MoEAuxLossAutoScaler(torch.autograd.Function):
     """An AutoScaler that triggers the backward pass and scales the grad for auxiliary loss."""
 
@@ -629,35 +667,48 @@ def compute_topk(scores, topk, num_groups=None, group_topk=None):
 
 
 def compute_routing_scores_for_aux_loss(
-    logits: torch.Tensor, topk: int, score_function: str, fused: bool = False
+    logits: torch.Tensor,
+    topk: int,
+    score_function: str,
+    fused: bool = False,
+    padding_mask: Optional[torch.Tensor] = None,
 ):
     """Compute routing scores based on the score function.
 
     Args:
         logits (torch.Tensor): The logits tensor after gating, shape: [num_tokens, num_experts].
-
+        padding_mask (torch.Tensor, optional): Boolean mask indicating padding positions.
+                                               Shape [num_tokens]. True = padding (exclude),
+                                               False = valid (include). Defaults to None.
     Returns:
-        torch.Tensor: The normalized routing scores.
+        Tuple[torch.Tensor, torch.Tensor]: routing_map and scores.
     """
     if fused:
         if not HAVE_TE or fused_compute_score_for_moe_aux_loss is None:
             raise ValueError(
                 "fused_compute_score_for_moe_aux_loss is not available. Please install TE >= 2.6.0."
             )
-        return fused_compute_score_for_moe_aux_loss(
+        routing_map, scores = fused_compute_score_for_moe_aux_loss(
             logits=logits, topk=topk, score_function=score_function
         )
-
-    if score_function == "softmax":
-        scores = torch.softmax(logits, dim=-1, dtype=torch.float32)
-    elif score_function == "sigmoid":
-        scores = torch.sigmoid(logits)
-        scores = scores / (scores.sum(dim=-1, keepdim=True) + 1e-20)
     else:
-        raise ValueError(f"Invalid score_function: {score_function}")
+        if score_function == "softmax":
+            scores = torch.softmax(logits, dim=-1, dtype=torch.float32)
+        elif score_function == "sigmoid":
+            scores = torch.sigmoid(logits)
+            scores = scores / (scores.sum(dim=-1, keepdim=True) + 1e-20)
+        else:
+            raise ValueError(f"Invalid score_function: {score_function}")
+
+        _, top_indices = torch.topk(scores, k=topk, dim=1)
+        routing_map = torch.zeros_like(logits).int().scatter(1, top_indices, 1).bool()
 
-    _, top_indices = torch.topk(scores, k=topk, dim=1)
-    routing_map = torch.zeros_like(logits).int().scatter(1, top_indices, 1).bool()
+    # Apply padding mask to scores if provided
+    if padding_mask is not None:
+        # Invert padding_mask and make True indicates valid tokens
+        valid_mask = (~padding_mask).unsqueeze(-1)
+        routing_map = routing_map * valid_mask
+        scores = scores * valid_mask
     return routing_map, scores
 
 
diff --git a/megatron/core/transformer/moe/router.py b/megatron/core/transformer/moe/router.py
index 34d81a21ffa..bbfb01fec8b 100644
--- a/megatron/core/transformer/moe/router.py
+++ b/megatron/core/transformer/moe/router.py
@@ -1,12 +1,11 @@
 # Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 from abc import ABC, abstractmethod
-from typing import Optional
+from typing import Optional, Union
 
 import torch
 
 from megatron.core.jit import jit_fuser
-from megatron.core.tensor_parallel import reduce_from_tensor_model_parallel_region
 from megatron.core.transformer.module import MegatronModule
 from megatron.core.transformer.moe.moe_utils import (
     MoEAuxLossAutoScaler,
@@ -14,6 +13,7 @@
     apply_random_logits,
     apply_router_token_dropping,
     compute_routing_scores_for_aux_loss,
+    get_tokens_per_expert_and_token_count,
     router_gating_linear,
     save_to_aux_losses_tracker,
     sinkhorn,
@@ -268,22 +268,28 @@ def is_aux_loss_enabled(self) -> bool:
         return False
 
     def _apply_aux_loss(
-        self, probs: torch.Tensor, scores_for_aux_loss: torch.Tensor, routing_map: torch.Tensor
+        self,
+        probs: torch.Tensor,
+        scores_for_aux_loss: torch.Tensor,
+        routing_map: torch.Tensor,
+        with_padding_mask: bool = False,
     ):
         """Apply the auxiliary loss for the given scores and routing map."""
         aux_loss_coeff = self.get_aux_loss_coeff("aux_loss")
         if aux_loss_coeff == 0:
             return probs
-        tokens_per_expert = routing_map.sum(dim=0)
-        tokens_per_expert = reduce_from_tensor_model_parallel_region(
-            tokens_per_expert, self.tp_cp_group
-        )
-        num_tokens = routing_map.shape[0]
-        total_num_tokens = num_tokens * self.tp_cp_group.size()
 
+        global_tokens_per_expert, local_num_tokens, total_num_tokens = (
+            get_tokens_per_expert_and_token_count(
+                routing_map=routing_map,
+                reduce_group=self.tp_cp_group,
+                topk=self.topk,
+                with_padding_mask=with_padding_mask,
+            )
+        )
         aux_loss = switch_load_balancing_loss_func(
             probs=scores_for_aux_loss,
-            tokens_per_expert=tokens_per_expert,
+            tokens_per_expert=global_tokens_per_expert,
             total_num_tokens=total_num_tokens,
             topk=self.topk,
             num_experts=self.config.num_moe_experts,
@@ -291,7 +297,12 @@ def _apply_aux_loss(
             fused=self.config.moe_router_fusion,
         )
         probs = self.attach_and_log_load_balancing_loss(
-            probs, aux_loss_coeff, aux_loss, "load_balancing_loss", self.tp_cp_group
+            probs,
+            aux_loss_coeff,
+            aux_loss,
+            "load_balancing_loss",
+            self.tp_cp_group,
+            valid_token_count=local_num_tokens,
         )
         return probs
 
@@ -302,6 +313,7 @@ def _apply_seq_aux_loss(
         routing_map: torch.Tensor,
         seq_length: int,
         bsz: int,
+        with_padding_mask: bool = False,
     ):
         """Apply the sequence-level auxiliary loss for the given scores and routing map.
 
@@ -315,17 +327,21 @@ def _apply_seq_aux_loss(
             return probs
 
         scores_for_aux_loss = scores_for_aux_loss.reshape(seq_length, -1)
-        tokens_per_expert = routing_map.reshape(seq_length, -1).sum(dim=0)
-        tokens_per_expert = reduce_from_tensor_model_parallel_region(
-            tokens_per_expert, self.tp_cp_group
+        routing_map = routing_map.reshape(seq_length, -1)
+
+        global_tokens_per_expert, local_num_tokens, total_num_tokens = (
+            get_tokens_per_expert_and_token_count(
+                routing_map=routing_map,
+                reduce_group=self.tp_cp_group,
+                with_padding_mask=with_padding_mask,
+                topk=self.topk * bsz,
+            )
         )
 
-        total_num_tokens = seq_length * self.tp_cp_group.size()
-
         aux_loss = (
             switch_load_balancing_loss_func(
                 probs=scores_for_aux_loss,
-                tokens_per_expert=tokens_per_expert,
+                tokens_per_expert=global_tokens_per_expert,
                 total_num_tokens=total_num_tokens,
                 topk=self.topk,
                 num_experts=self.config.num_moe_experts,
@@ -334,31 +350,42 @@ def _apply_seq_aux_loss(
             )
             / bsz
         )
+
         probs = self.attach_and_log_load_balancing_loss(
-            probs, seq_aux_loss_coeff, aux_loss, "seq_load_balancing_loss", self.tp_cp_group
+            probs,
+            seq_aux_loss_coeff,
+            aux_loss,
+            "seq_load_balancing_loss",
+            self.tp_cp_group,
+            valid_token_count=local_num_tokens,
         )
         return probs
 
     def _apply_global_aux_loss(
-        self, probs: torch.Tensor, scores_for_aux_loss: torch.Tensor, routing_map: torch.Tensor
+        self,
+        probs: torch.Tensor,
+        scores_for_aux_loss: torch.Tensor,
+        routing_map: torch.Tensor,
+        with_padding_mask: bool = False,
     ):
         """Apply the global auxiliary loss for the given scores and routing map."""
         global_aux_loss_coeff = self.get_aux_loss_coeff("global_aux_loss")
         if global_aux_loss_coeff == 0:
             return probs
 
-        tokens_per_expert = routing_map.sum(dim=0)
-        tokens_per_expert = reduce_from_tensor_model_parallel_region(
-            tokens_per_expert, self.tp_dp_cp_group
+        # Use unified function to compute tokens_per_expert and num_tokens
+        global_tokens_per_expert, local_num_tokens, total_num_tokens = (
+            get_tokens_per_expert_and_token_count(
+                routing_map=routing_map,
+                reduce_group=self.tp_dp_cp_group,
+                with_padding_mask=with_padding_mask,
+                topk=self.topk,
+            )
         )
-
-        self.global_tokens_per_expert += tokens_per_expert
+        self.global_tokens_per_expert += global_tokens_per_expert
         self.ga_steps += 1
         averated_tokens_per_expert = self.global_tokens_per_expert / self.ga_steps
 
-        num_tokens = scores_for_aux_loss.shape[0]
-        total_num_tokens = num_tokens * self.tp_dp_cp_group.size()
-
         global_aux_loss = switch_load_balancing_loss_func(
             probs=scores_for_aux_loss,
             tokens_per_expert=averated_tokens_per_expert,
@@ -374,6 +401,7 @@ def _apply_global_aux_loss(
             global_aux_loss,
             "global_load_balancing_loss",
             self.tp_dp_cp_group,
+            valid_token_count=local_num_tokens,
             reduce_group_has_dp=True,
         )
         return probs
@@ -385,6 +413,7 @@ def attach_and_log_load_balancing_loss(
         aux_loss: torch.Tensor,
         aux_loss_name: str,
         reduce_group: torch.distributed.ProcessGroup,
+        valid_token_count: Optional[Union[int, torch.Tensor]] = None,
         reduce_group_has_dp: bool = False,
     ):
         """Attach aux loss function to activation and add to logging.
@@ -395,6 +424,9 @@ def attach_and_log_load_balancing_loss(
             aux_loss (torch.Tensor): The auxiliary loss tensor.
             aux_loss_name (str): The name of the auxiliary loss for logging.
             reduce_group (torch.distributed.ProcessGroup): The group for reducing the loss.
+            valid_token_count (int or torch.Tensor, optional): Number of valid tokens excluding
+                padding tokens. Can be a Python int or a torch.Tensor (typically 0-d tensor).
+                If None, uses activation.shape[0]. Defaults to None.
             reduce_group_has_dp (bool): Whether the reduce group has data parallel ranks.
                 Set this to True if the reduce group has data parallel ranks. This flag is used to
                 ensure the correct reduction in aux loss tracking.
@@ -422,17 +454,22 @@ def attach_and_log_load_balancing_loss(
             # which scales both the main_loss gradient and aux_loss gradient by
             # 1/(num_local_tokens * dp_size * num_micro_batches) in finalize_model_grads function.
             # To correct this scaling, we need to scale the aux_loss by num_local_tokens here.
-            activation = MoEAuxLossAutoScaler.apply(activation, aux_loss * activation.shape[0])
+            # Use valid_token_count (excluding padding) if provided, otherwise use total tokens.
+            num_tokens = valid_token_count if valid_token_count is not None else activation.shape[0]
+            activation = MoEAuxLossAutoScaler.apply(activation, aux_loss * num_tokens)
         else:
             activation = MoEAuxLossAutoScaler.apply(activation, aux_loss)
         return activation
 
-    def apply_z_loss(self, logits):
+    def apply_z_loss(self, logits, padding_mask: Optional[torch.Tensor] = None):
         """Encourages the router's logits to remain small to enhance stability.
         Please refer to the ST-MoE paper (https://arxiv.org/pdf/2202.08906.pdf) for details.
 
         Args:
             logits (torch.Tensor): The logits of the router.
+            padding_mask (torch.Tensor, optional): Boolean mask indicating padding positions.
+                                                   Shape [num_tokens]. True = padding (exclude),
+                                                   False = valid (include). Defaults to None.
 
         Returns:
             torch.Tensor: The logits after applying the z-loss.
@@ -440,7 +477,7 @@ def apply_z_loss(self, logits):
         if self.config.moe_z_loss_coeff is not None and self.training and torch.is_grad_enabled():
             # Skip Z loss calculations when using torch.no_grad() or checkpointing.
             moe_z_loss_coeff = self.config.moe_z_loss_coeff / self.tp_cp_group.size()
-            z_loss = z_loss_func(logits, moe_z_loss_coeff)
+            z_loss = z_loss_func(logits, moe_z_loss_coeff, padding_mask=padding_mask)
             scale_up = 1.0
             if self.calculate_per_token_loss:
                 # The expected final scaling for z_loss gradients is
@@ -450,7 +487,9 @@ def apply_z_loss(self, logits):
                 # which scales both the main_loss gradient and z_loss gradient by
                 # 1/(num_local_tokens * dp_size * num_micro_batches) in finalize_model_grads().
                 # To correct this scaling, we need to scale the z_loss by num_local_tokens here.
-                logits = MoEAuxLossAutoScaler.apply(logits, z_loss * logits.shape[0])
+                # Count valid tokens: sum of inverted mask (False -> True = valid)
+                num_tokens = (~padding_mask).sum() if padding_mask is not None else logits.shape[0]
+                logits = MoEAuxLossAutoScaler.apply(logits, z_loss * num_tokens)
             else:
                 logits = MoEAuxLossAutoScaler.apply(logits, z_loss)
 
@@ -484,20 +523,32 @@ def apply_input_jitter(self, input: torch.Tensor):
             return input
 
     @jit_fuser
-    def _apply_expert_bias(self, routing_map: torch.Tensor):
+    def _apply_expert_bias(
+        self, routing_map: torch.Tensor, padding_mask: Optional[torch.Tensor] = None
+    ):
         """
         Update expert bias and tokens_per_expert
         Prevent extra local tokens accumulation on evaluation or activation recomputation
+
+        Args:
+            routing_map (torch.Tensor): Token to expert routing map, [num_tokens, num_experts].
+            padding_mask (torch.Tensor, optional): Boolean mask indicating padding positions.
+                Shape [num_tokens]. True = padding (exclude), False = valid (include).
         """
         if self.enable_expert_bias and torch.is_grad_enabled():
             with torch.no_grad():
+                if padding_mask is not None:
+                    routing_map = routing_map & (~padding_mask)
                 self.local_tokens_per_expert += routing_map.sum(dim=0)
 
-    def routing(self, logits: torch.Tensor):
+    def routing(self, logits: torch.Tensor, padding_mask: Optional[torch.Tensor] = None):
         """Top-k routing function
 
         Args:
             logits (torch.Tensor): Logits tensor after gating.
+            padding_mask (torch.Tensor, optional): Boolean mask indicating padding positions.
+                                                   Shape = [seq_length, bsz]. True=padding(exclude),
+                                                   False=valid(include). Defaults to None.
 
         Returns:
             probs (torch.Tensor): The probabilities of token to experts assignment.
@@ -507,8 +558,12 @@ def routing(self, logits: torch.Tensor):
         seq_length, bsz = logits.shape[:2]
         logits = logits.view(-1, self.config.num_moe_experts)
 
+        # Flatten padding_mask to [num_tokens] if provided
+        if padding_mask is not None:
+            padding_mask = padding_mask.reshape(-1)
+
         # Apply Z-Loss
-        logits = self.apply_z_loss(logits)
+        logits = self.apply_z_loss(logits, padding_mask=padding_mask)
 
         # Calculate probs and routing_map for token dispatching
         if self.routing_type == "sinkhorn":
@@ -541,18 +596,35 @@ def routing(self, logits: torch.Tensor):
         if self.training and torch.is_grad_enabled() and self.is_aux_loss_enabled():
             # Calculate scores and routing_map for aux loss
             routing_map_for_aux_loss, scores_for_aux_loss = compute_routing_scores_for_aux_loss(
-                logits, self.topk, self.score_function, fused=self.config.moe_router_fusion
+                logits,
+                self.topk,
+                self.score_function,
+                fused=self.config.moe_router_fusion,
+                padding_mask=padding_mask,
+            )
+            probs = self._apply_aux_loss(
+                probs,
+                scores_for_aux_loss,
+                routing_map_for_aux_loss,
+                with_padding_mask=padding_mask is not None,
             )
-            probs = self._apply_aux_loss(probs, scores_for_aux_loss, routing_map_for_aux_loss)
             probs = self._apply_seq_aux_loss(
-                probs, scores_for_aux_loss, routing_map_for_aux_loss, seq_length, bsz
+                probs,
+                scores_for_aux_loss,
+                routing_map_for_aux_loss,
+                seq_length,
+                bsz,
+                with_padding_mask=padding_mask is not None,
             )
             probs = self._apply_global_aux_loss(
-                probs, scores_for_aux_loss, routing_map_for_aux_loss
+                probs,
+                scores_for_aux_loss,
+                routing_map_for_aux_loss,
+                with_padding_mask=padding_mask is not None,
             )
 
         # Optionally apply expert bias
-        self._apply_expert_bias(routing_map)
+        self._apply_expert_bias(routing_map, padding_mask=padding_mask)
 
         return probs, routing_map
 
@@ -562,12 +634,15 @@ def reset_global_aux_loss_tracker(self):
             self.global_tokens_per_expert.zero_()
             self.ga_steps.zero_()
 
-    def forward(self, input: torch.Tensor):
+    def forward(self, input: torch.Tensor, padding_mask: Optional[torch.Tensor] = None):
         """
         Forward pass of the router.
 
         Args:
             input (torch.Tensor): Input tensor.
+            padding_mask (torch.Tensor, optional): Boolean mask indicating padding positions.
+                                                   Shape = [seq_length, bsz]. True=padding(exclude),
+                                                   False=valid(include). Defaults to None.
         """
         self._maintain_float32_expert_bias()
 
@@ -579,7 +654,7 @@ def forward(self, input: torch.Tensor):
             # Apply force load balancing with random logits for benchmark
             logits = apply_random_logits(logits)
 
-        probs, routing_map = self.routing(logits)
+        probs, routing_map = self.routing(logits, padding_mask=padding_mask)
 
         return probs, routing_map
 
diff --git a/megatron/core/transformer/transformer_block.py b/megatron/core/transformer/transformer_block.py
index 023db1fe75a..cbbd7ec00eb 100755
--- a/megatron/core/transformer/transformer_block.py
+++ b/megatron/core/transformer/transformer_block.py
@@ -390,7 +390,6 @@ def build_layer(layer_spec, layer_number):
     def has_final_layernorm_in_this_stage(self):
         """
         Check if this vpp stage contains the final layernorm.
-
         Note:
             Final layernorm now has been moved from the post-process stage to the last decoder
             layer by using this function.
@@ -429,12 +428,18 @@ def _checkpointed_forward(
         attention_bias: Tensor,
         packed_seq_params: PackedSeqParams,
         use_inner_quantization_context: bool,
+        padding_mask: Optional[Tensor] = None,
     ):
         """Forward method with activation checkpointing."""
 
         def custom(start: int, end: int):
             def custom_forward(
-                hidden_states, attention_mask, context, context_mask, rotary_pos_emb
+                hidden_states,
+                attention_mask,
+                context,
+                context_mask,
+                rotary_pos_emb,
+                padding_mask=None,
             ):
                 for index in range(start, end):
                     layer = self._get_layer(index)
@@ -465,6 +470,7 @@ def custom_forward(
                             attention_bias=attention_bias,
                             inference_context=None,
                             packed_seq_params=packed_seq_params,
+                            padding_mask=padding_mask,
                         )
                 return hidden_states, context
 
@@ -484,6 +490,7 @@ def checkpoint_handler(forward_func):
                     context,
                     context_mask,
                     rotary_pos_emb,
+                    padding_mask,
                 )
             else:
                 return tensor_parallel.checkpoint(
@@ -494,6 +501,7 @@ def checkpoint_handler(forward_func):
                     context,
                     context_mask,
                     rotary_pos_emb,
+                    padding_mask,
                 )
 
         if self.config.recompute_method == 'uniform':
@@ -599,6 +607,7 @@ def forward(
         inference_context: Optional[BaseInferenceContext] = None,
         packed_seq_params: Optional[PackedSeqParams] = None,
         sequence_len_offset: Optional[Tensor] = None,
+        padding_mask: Optional[Tensor] = None,
         *,
         inference_params: Optional[BaseInferenceContext] = None,
         dynamic_inference_decode_only: Optional[bool] = None,
@@ -708,6 +717,7 @@ def forward(
                     attention_bias=attention_bias,
                     packed_seq_params=packed_seq_params,
                     use_inner_quantization_context=use_inner_quantization_context,
+                    padding_mask=padding_mask,
                 )
             else:
                 for l_no, layer in enumerate(self.layers):
@@ -745,6 +755,7 @@ def forward(
                             inference_context=inference_context,
                             packed_seq_params=packed_seq_params,
                             sequence_len_offset=sequence_len_offset,
+                            padding_mask=padding_mask,
                         )
 
                     if (
diff --git a/megatron/core/transformer/transformer_layer.py b/megatron/core/transformer/transformer_layer.py
index 3ea40577009..5c310cc81e4 100644
--- a/megatron/core/transformer/transformer_layer.py
+++ b/megatron/core/transformer/transformer_layer.py
@@ -1,5 +1,6 @@
 # Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
+import functools
 import logging
 import warnings
 from abc import ABC
@@ -457,7 +458,12 @@ def forward(self, *args, **kwargs):
         # runners in the cuda graph manager
         kwargs.pop("dynamic_inference_decode_only", None)
         hidden_states, context = self._forward_attention(*args, **kwargs)
-        output = self._forward_mlp(hidden_states, kwargs.get("inference_context", None))
+
+        output = self._forward_mlp(
+            hidden_states,
+            kwargs.get("inference_context", None),
+            padding_mask=kwargs.get("padding_mask", None),
+        )
         return output, context
 
     def _forward_attention(
@@ -474,6 +480,7 @@ def _forward_attention(
         inference_context: Optional[Any] = None,
         packed_seq_params: Optional[PackedSeqParams] = None,
         sequence_len_offset: Optional[Tensor] = None,
+        padding_mask: Optional[Tensor] = None,
         *,
         inference_params: Optional[Any] = None,
     ):
@@ -591,12 +598,18 @@ def _forward_attention(
 
         return hidden_states, context
 
-    def _forward_mlp(self, hidden_states, inference_context=None):
+    def _forward_mlp(self, hidden_states, inference_context=None, padding_mask=None):
         """
         Perform a forward pass through the feed-forward layer.
 
         Args:
             hidden_states (Tensor): Transformed hidden states before the MLP layernorm.
+                Shape [seq_length, batch_size, hidden_size].
+            inference_context: Inference context for optimizations.
+            padding_mask (Tensor, optional): Padding mask for MoE routing.
+                Shape [bsz, seq_length]. True = padding (exclude), False = valid (include).
+                Only used for MoE layers to exclude padding tokens from aux loss computations.
+                The MoELayer will internally transform this to [seq_length, bsz] format.
 
         Returns:
             output (Tensor): Transformed hidden states of shape [s, b, h].
@@ -642,7 +655,7 @@ def _forward_mlp(self, hidden_states, inference_context=None):
             assert (
                 not self.recompute_pre_mlp_layernorm
             ), "Recomputation is not supported for CUDA graph."
-            cudagraph_outputs = self.mlp(pre_mlp_layernorm_output)
+            cudagraph_outputs = self.mlp(pre_mlp_layernorm_output, padding_mask=padding_mask)
             nvtx_range_pop(suffix="mlp")
             return cudagraph_outputs + [residual]
         elif self.recompute_mlp:
@@ -656,10 +669,13 @@ def _forward_mlp(self, hidden_states, inference_context=None):
                     tensor_parallel.random.get_cuda_rng_tracker,
                     self.pg_collection.tp,
                     pre_mlp_layernorm_output,
+                    padding_mask=padding_mask,
                 )
             else:
                 mlp_output_with_bias = tensor_parallel.checkpoint(
-                    self.mlp, False, pre_mlp_layernorm_output
+                    functools.partial(self.mlp, padding_mask=padding_mask),
+                    False,
+                    pre_mlp_layernorm_output,
                 )
         elif should_chunk_mlp_for_prefill:
             # Chunk input along sequence dimension
@@ -675,7 +691,7 @@ def _forward_mlp(self, hidden_states, inference_context=None):
             bias_output = torch.stack(bias_chunks, dim=0).sum(dim=0) if bias_chunks else None
             mlp_output_with_bias = (mlp_output, bias_output)
         else:
-            mlp_output_with_bias = self.mlp(pre_mlp_layernorm_output)
+            mlp_output_with_bias = self.mlp(pre_mlp_layernorm_output, padding_mask=padding_mask)
 
         if self.recompute_pre_mlp_layernorm:
             # discard the output of the pre-mlp layernorm and register the recompute
diff --git a/tests/test_utils/python_scripts/recipe_parser.py b/tests/test_utils/python_scripts/recipe_parser.py
index a497bdbd9de..b866fbbf5c2 100644
--- a/tests/test_utils/python_scripts/recipe_parser.py
+++ b/tests/test_utils/python_scripts/recipe_parser.py
@@ -1,3 +1,4 @@
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 import copy
 import itertools
 import logging
diff --git a/tests/unit_tests/a2a_overlap/test_schedule_chunk_1f1b.py b/tests/unit_tests/a2a_overlap/test_schedule_chunk_1f1b.py
index 81e61a3404a..6c59dd3f9e3 100644
--- a/tests/unit_tests/a2a_overlap/test_schedule_chunk_1f1b.py
+++ b/tests/unit_tests/a2a_overlap/test_schedule_chunk_1f1b.py
@@ -23,7 +23,7 @@
 from tests.unit_tests.test_utilities import Utils
 
 
-def build_model(config):
+def build_model(config, use_padding_mask=False):
     seq_len = 32
     max_seq_len = 300
     # ids = random.sample([i for i in range(max_seq_len)], seq_len)
@@ -39,6 +39,12 @@ def build_model(config):
         "attention_mask": torch.ones((1, 1, seq_len, seq_len), dtype=bool).cuda(),
     }
 
+    # Optionally add padding_mask with same shape as input_ids
+    if use_padding_mask:
+        padding_mask = torch.zeros((1, seq_len), dtype=torch.bool).cuda()
+        padding_mask[0, -8:] = True
+        data["padding_mask"] = padding_mask
+
     # build layer spec
     transformer_layer_spec = get_gpt_decoder_block_spec(config=config, use_transformer_engine=True)
     mtp_block_spec = get_gpt_mtp_block_spec(config, transformer_layer_spec.layer_specs[-1], True)
@@ -48,7 +54,7 @@ def build_model(config):
         config=config,
         transformer_layer_spec=transformer_layer_spec,
         mtp_block_spec=mtp_block_spec,
-        vocab_size=100,
+        vocab_size=128,
         pre_process=True,
         post_process=True,
         max_sequence_length=max_seq_len,
@@ -174,3 +180,109 @@ def test_1f1b_schedule_model_chunk(self, mtp_layers, dispatcher_type, fp8_flag,
                 gpt_models[i] = None
             gc.collect()
             torch.cuda.empty_cache()
+
+    @pytest.mark.skipif(not is_te_min_version("1.9.0.dev0"), reason="Requires TE >= 1.9.0.dev0")
+    @pytest.mark.parametrize("dispatcher_type", get_valid_token_dispatcher_types())
+    @pytest.mark.parametrize("layers", [[2, 1], [1, 1]])
+    @pytest.mark.parametrize("tp_size", [1, 2, 4, 8])
+    def test_1f1b_schedule_model_chunk_with_padding_mask(self, dispatcher_type, layers, tp_size):
+        """
+        Verifies all-to-all overlap optimization with padding_mask produces
+        the same results as the reference implementation with various TP/EP/CP combinations.
+        """
+        # Re-initialize model parallel with the specified configuration
+        Utils.destroy_model_parallel()
+        Utils.initialize_model_parallel(
+            tensor_model_parallel_size=tp_size,
+            pipeline_model_parallel_size=1,
+            expert_model_parallel_size=4,
+            expert_tensor_parallel_size=1,
+        )
+        set_streams()
+
+        microbatches = 1
+
+        gpt_models = []
+        schedule_plans = []
+        ref_captures = []
+        datas = []
+
+        # create TransformerConfig
+        extra_kwargs = {
+            "moe_token_dispatcher_type": dispatcher_type,
+            "tensor_model_parallel_size": tp_size,
+            "sequence_parallel": tp_size > 1,
+        }
+        if dispatcher_type == "flex":
+            extra_kwargs["moe_flex_dispatcher_backend"] = "deepep"
+            extra_kwargs["moe_router_dtype"] = "fp32"
+        with deterministic_mode():
+            for layer_num in layers:
+                output_tensors = []
+                # build config
+                config = get_test_config(num_layers=layer_num, extra_kwargs=extra_kwargs)
+                # build model with padding_mask
+                gpt_model, schedule_plan, data = build_model(config, use_padding_mask=True)
+                gpt_model.cuda()
+                gpt_models.append(gpt_model)
+                datas.append(data)
+                schedule_plans.append(schedule_plan)
+
+                # run reference
+                for _ in range(microbatches):
+                    loss = gpt_model.forward(**data)
+                    loss = float16_to_fp32(loss)
+                    loss.backward(torch.ones_like(loss))
+                    output_tensors.append(loss)
+
+                capture = {"outputs": output_tensors}
+                for name, param in gpt_model.named_parameters():
+                    capture[name] = param.grad
+                ref_captures.append(capture)
+                gpt_model.zero_grad()
+            assert gpt_models[0].embedding is not None
+            assert gpt_models[1].embedding is not None
+            # run a2a overlap
+            capture_0 = {"outputs": []}
+            capture_1 = {"outputs": []}
+            a2a_captures = [capture_0, capture_1]
+            for i in range(microbatches):
+                # 1st forward
+                if i > 0:
+                    assert (
+                        schedule_plans[0].pre_process is None
+                    ), "pre_process should be released after backward"
+                    schedule_plans[0] = gpt_models[0].build_schedule_plan(**datas[0])
+                    schedule_plans[1] = gpt_models[1].build_schedule_plan(**datas[1])
+                f_input_0 = TransformerModelChunkSchedulePlan.run(schedule_plans[0], None)
+                capture_0["outputs"].append(f_input_0)
+                # overlap
+                f_input_1 = TransformerModelChunkSchedulePlan.run(
+                    schedule_plans[1], schedule_plans[0], b_grad=torch.ones_like(f_input_0)
+                )
+                capture_1["outputs"].append(f_input_1)
+                # last backward
+                TransformerModelChunkSchedulePlan.run(
+                    None, schedule_plans[1], b_grad=torch.ones_like(f_input_1)
+                )
+            for i in range(len(gpt_models)):
+                for name, param in gpt_models[i].named_parameters():
+                    a2a_captures[i][name] = param.grad
+
+            # compare results
+            for i in range(len(ref_captures)):
+                comp_res = compare_captures(ref_captures[i], a2a_captures[i], True, True)
+                assert comp_res[0], f"[rank {torch.distributed.get_rank()}] {comp_res[1]}"
+
+            # release resources is necessary, otherwise later testcases will oom
+            for i in range(len(schedule_plans)):
+                schedule_plans[i] = None
+                ref_captures[i] = None
+                a2a_captures[i] = None
+                for k in datas[i]:
+                    datas[i][k] = None
+                datas[i] = None
+                gpt_models[i].zero_grad()
+                gpt_models[i] = None
+            gc.collect()
+            torch.cuda.empty_cache()
diff --git a/tests/unit_tests/a2a_overlap/test_schedule_layer_1f1b.py b/tests/unit_tests/a2a_overlap/test_schedule_layer_1f1b.py
index 7fb97f6e586..5ec096e5a04 100644
--- a/tests/unit_tests/a2a_overlap/test_schedule_layer_1f1b.py
+++ b/tests/unit_tests/a2a_overlap/test_schedule_layer_1f1b.py
@@ -502,8 +502,8 @@ def test_mtp_layer_overlap(self, dispatcher_type, fp8_flag):
             position_ids = torch.tensor(data, dtype=torch.int64).repeat((1, 1)).cuda()
             attention_mask = torch.ones((1, 1, seq_len, seq_len), dtype=bool).cuda()
             # get rotary pos emb
-            _, rotary_pos_emb, rotary_pos_cos, rotary_pos_sin, _ = gpt_model._preprocess(
-                input_ids, position_ids
+            _, rotary_pos_emb, rotary_pos_cos, rotary_pos_sin, _, _padding_mask = (
+                gpt_model._preprocess(input_ids, position_ids)
             )
             # reset model
             params = reset_model(gpt_model)
diff --git a/tests/unit_tests/transformer/moe/test_aux_loss.py b/tests/unit_tests/transformer/moe/test_aux_loss.py
index b1f78582383..f5726777383 100644
--- a/tests/unit_tests/transformer/moe/test_aux_loss.py
+++ b/tests/unit_tests/transformer/moe/test_aux_loss.py
@@ -576,3 +576,192 @@ def test_force_balanced_aux_loss(self, tp_size, ep_size, cp_size):
                 reduce_from_tensor_model_parallel_region(aux_loss, router.tp_cp_group)
             assert aux_loss.item() == 1, f"{aux_loss_type}: {aux_loss.item()}"
             clear_aux_losses_tracker()
+
+
+class TestPaddingMaskAuxLoss:
+    """Test padding mask support in various aux loss types."""
+
+    def setup_model_parallel(self, tp_size=1, ep_size=1, cp_size=1, sequence_parallel=False):
+        """Initialize model parallel with given configuration.
+
+        Args:
+            tp_size: Tensor parallel size.
+            ep_size: Expert parallel size.
+            cp_size: Context parallel size.
+        """
+        Utils.initialize_model_parallel(
+            tensor_model_parallel_size=tp_size,
+            pipeline_model_parallel_size=1,
+            context_parallel_size=cp_size,
+            expert_model_parallel_size=ep_size,
+        )
+        _set_random_seed(seed_=123, data_parallel_random_init=False)
+
+        # Store parallel configuration
+        self.tp_size = tp_size
+        self.ep_size = ep_size
+        self.cp_size = cp_size
+
+        # Default configuration
+        self.default_transformer_config = TransformerConfig(
+            num_layers=1,
+            hidden_size=12,
+            num_attention_heads=8,
+            num_moe_experts=32,
+            use_cpu_initialization=True,
+            moe_router_load_balancing_type="aux_loss",
+            moe_router_topk=8,
+            moe_aux_loss_coeff=1.0,
+            bf16=True,
+            params_dtype=torch.bfloat16,
+            add_bias_linear=False,
+            tensor_model_parallel_size=tp_size,
+            expert_model_parallel_size=ep_size,
+            context_parallel_size=cp_size,
+            sequence_parallel=sequence_parallel and tp_size > 1,
+        )
+
+    def new_router(self, **kwargs):
+        """Create a new router with updated configuration."""
+        pg_collection = get_default_pg_collection()
+        new_transformer_config = dataclasses.replace(self.default_transformer_config, **kwargs)
+        router = TopKRouter(config=new_transformer_config, pg_collection=pg_collection)
+        router.set_layer_number(0)
+        return router
+
+    @pytest.mark.internal
+    @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
+    @pytest.mark.parametrize("sequence_parallel", [True, False])
+    @pytest.mark.parametrize("aux_loss_type", ["aux_loss", "seq_aux_loss", "global_aux_loss"])
+    @pytest.mark.parametrize(
+        "tp_size,ep_size,cp_size", [(8, 1, 1), (4, 2, 1), (1, 1, 8), (2, 1, 4), (2, 2, 2)]
+    )
+    def test_padding_mask_removes_padding_tokens(
+        self, aux_loss_type, tp_size, ep_size, cp_size, sequence_parallel
+    ):
+        """Test that padding tokens are correctly excluded from aux loss calculation."""
+        # Initialize model parallel with given configuration
+        self.setup_model_parallel(
+            tp_size=tp_size, ep_size=ep_size, cp_size=cp_size, sequence_parallel=sequence_parallel
+        )
+
+        try:
+            clear_aux_losses_tracker()
+
+            router = self.new_router(
+                moe_router_load_balancing_type=aux_loss_type,
+                moe_aux_loss_coeff=1.0,
+                moe_router_dtype="fp64",
+            ).cuda()
+
+            seq_len = 32
+            batch_size = 2
+            hidden_size = router.config.hidden_size
+
+            # Create input with padding
+            hidden_states_full = torch.randn(
+                (seq_len, batch_size, hidden_size), dtype=torch.bfloat16, device='cuda'
+            )
+
+            # Create padding mask: first half valid (False), second half padding (True)
+            # Convention: True = padding (exclude), False = valid (include)
+            padding_mask = torch.zeros((seq_len, batch_size), dtype=torch.bool, device='cuda')
+            padding_mask[seq_len // 2 :, :] = True
+
+            # Test with padding mask
+            router.weight.grad = None
+            scores_with_mask, routing_map_with_mask = router(
+                hidden_states_full, padding_mask=padding_mask
+            )
+            scores_with_mask.backward(torch.zeros_like(scores_with_mask))
+
+            loss_name = {
+                "aux_loss": "load_balancing_loss",
+                "seq_aux_loss": "seq_load_balancing_loss",
+                "global_aux_loss": "global_load_balancing_loss",
+            }[aux_loss_type]
+
+            tracker = get_moe_layer_wise_logging_tracker()
+            aux_loss_with_mask = tracker[loss_name]["values"][0].clone()
+            grad_with_mask = router.weight.grad.clone()
+
+            # Test without padding (with only half of the tokens)
+            clear_aux_losses_tracker()
+            router.weight.grad = None
+            hidden_states_valid = hidden_states_full[: seq_len // 2, :, :]
+            scores_without_mask, routing_map_without_mask = router(hidden_states_valid)
+            scores_without_mask.backward(torch.zeros_like(scores_without_mask))
+
+            aux_loss_without_mask = tracker[loss_name]["values"][0].clone()
+            grad_without_mask = router.weight.grad.clone()
+
+            # The aux loss with mask should be close to the aux loss without mask
+            assert torch.equal(aux_loss_with_mask, aux_loss_without_mask)
+            assert torch.equal(grad_with_mask, grad_without_mask)
+
+            clear_aux_losses_tracker()
+        finally:
+            # Always cleanup model parallel
+            Utils.destroy_model_parallel()
+
+    @pytest.mark.internal
+    @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
+    @pytest.mark.parametrize(
+        "tp_size,ep_size,cp_size", [(8, 1, 1), (4, 2, 1), (1, 1, 8), (2, 1, 4), (2, 2, 2)]
+    )
+    def test_padding_mask_with_z_loss(self, tp_size, ep_size, cp_size):
+        """Test that padding mask works correctly with z_loss."""
+        # Initialize model parallel with given configuration
+        self.setup_model_parallel(tp_size=tp_size, ep_size=ep_size, cp_size=cp_size)
+
+        try:
+            clear_aux_losses_tracker()
+
+            router = self.new_router(
+                moe_router_load_balancing_type="aux_loss",
+                moe_aux_loss_coeff=0.0,
+                moe_z_loss_coeff=1.0,
+                moe_router_dtype="fp32",
+            ).cuda()
+
+            seq_len = 32
+            batch_size = 2
+            hidden_size = router.config.hidden_size
+
+            # Create input
+            hidden_states_full = torch.randn(
+                (seq_len, batch_size, hidden_size), dtype=torch.bfloat16, device='cuda'
+            )
+
+            # Create padding mask: first half valid (False), second half padding (True)
+            # Convention: True = padding (exclude), False = valid (include)
+            padding_mask = torch.zeros((seq_len, batch_size), dtype=torch.bool, device='cuda')
+            padding_mask[seq_len // 2 :, :] = True
+
+            # Test with padding mask
+            router.weight.grad = None
+            scores_with_mask, _ = router(hidden_states_full, padding_mask=padding_mask)
+            scores_with_mask.sum().backward()
+
+            tracker = get_moe_layer_wise_logging_tracker()
+            z_loss_with_mask = tracker["z_loss"]["values"][0].clone()
+            grad_with_mask = router.weight.grad.clone()
+
+            # Test without padding (with only half of the tokens)
+            clear_aux_losses_tracker()
+            router.weight.grad = None
+            hidden_states_valid = hidden_states_full[: seq_len // 2, :, :]
+            scores_without_mask, _ = router(hidden_states_valid)
+            scores_without_mask.sum().backward()
+
+            z_loss_without_mask = tracker["z_loss"]["values"][0].clone()
+            grad_without_mask = router.weight.grad.clone()
+
+            # The z_loss with mask should be close to the z_loss without mask
+            assert torch.equal(z_loss_with_mask, z_loss_without_mask)
+            assert torch.equal(grad_with_mask, grad_without_mask)
+
+            clear_aux_losses_tracker()
+        finally:
+            # Always cleanup model parallel
+            Utils.destroy_model_parallel()
diff --git a/tests/unit_tests/transformer/moe/test_routers.py b/tests/unit_tests/transformer/moe/test_routers.py
index 677d938cdc7..abd1a4db2dc 100644
--- a/tests/unit_tests/transformer/moe/test_routers.py
+++ b/tests/unit_tests/transformer/moe/test_routers.py
@@ -125,6 +125,53 @@ def test_aux_loss(self):
         out.sum().mul_(0).backward()
         assert self.sequential_mlp.router.weight.grad.abs().sum() > 0
 
+    @pytest.mark.internal
+    @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
+    def test_router_with_padding_mask(self):
+        """Test that padding mask correctly excludes padding tokens from routing."""
+        self.router = self.router.cuda()
+        seq_len = 32
+        batch_size = 2
+        hidden_size = self.router.config.hidden_size
+
+        # Create input with shape [seq_len, batch_size, hidden_size]
+        hidden_states = torch.randn((seq_len, batch_size, hidden_size)).cuda().bfloat16()
+
+        # Create padding mask: first half valid (False), second half padding (True)
+        # padding_mask shape: [seq_len, batch_size]
+        # Convention: True = padding (exclude), False = valid (include)
+        padding_mask = torch.zeros((seq_len, batch_size), dtype=torch.bool, device='cuda')
+        padding_mask[seq_len // 2 :, :] = True  # Second half is padding
+
+        # Test forward pass with padding mask
+        with torch.no_grad():
+            probs_with_mask, routing_map_with_mask = self.router(
+                hidden_states, padding_mask=padding_mask
+            )
+
+            # Test forward pass without padding mask (only valid tokens)
+            hidden_states_valid = hidden_states[: seq_len // 2, :, :]
+            probs_without_mask, routing_map_without_mask = self.router(hidden_states_valid)
+
+            # The valid part of routing with mask should match routing without mask
+            probs_valid_part = probs_with_mask.reshape(seq_len, batch_size, -1)[
+                : seq_len // 2, :, :
+            ]
+            probs_valid_part = probs_valid_part.reshape(-1, probs_valid_part.shape[-1])
+
+            # Check that shapes are as expected
+            assert probs_with_mask.shape == (
+                seq_len * batch_size,
+                self.router.config.num_moe_experts,
+            )
+            assert routing_map_with_mask.shape == (
+                seq_len * batch_size,
+                self.router.config.num_moe_experts,
+            )
+
+            # Verify that probs for valid tokens are similar
+            assert torch.equal(probs_valid_part, probs_without_mask)
+
     @pytest.mark.internal
     @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
     def test_router_dtype(self):

From 5823534a4078b030134e7e2d703d7817b1a64df9 Mon Sep 17 00:00:00 2001
From: Kunlun Li <94586211+kunlunl@users.noreply.github.com>
Date: Wed, 7 Jan 2026 01:25:07 +0800
Subject: [PATCH 215/334] [dev] Reapply fsdp mxfp8 (#2828)

Signed-off-by: jianbinc <shjwudp@gmail.com>
Co-authored-by: jianbinc <shjwudp@gmail.com>
---
 .../distributed/fsdp/mcore_fsdp_adapter.py    |   4 +
 .../fsdp/src/megatron_fsdp/megatron_fsdp.py   | 157 +++---
 .../fsdp/src/megatron_fsdp/mixed_precision.py | 334 +++++++++++++
 .../megatron_fsdp/param_and_grad_buffer.py    | 450 +++++++++++++-----
 .../fsdp/src/megatron_fsdp/utils.py           | 252 +---------
 megatron/training/arguments.py                |   7 +
 6 files changed, 783 insertions(+), 421 deletions(-)
 create mode 100644 megatron/core/distributed/fsdp/src/megatron_fsdp/mixed_precision.py

diff --git a/megatron/core/distributed/fsdp/mcore_fsdp_adapter.py b/megatron/core/distributed/fsdp/mcore_fsdp_adapter.py
index 7432a7f9a36..d6384e70488 100644
--- a/megatron/core/distributed/fsdp/mcore_fsdp_adapter.py
+++ b/megatron/core/distributed/fsdp/mcore_fsdp_adapter.py
@@ -111,6 +111,9 @@ def __init__(
                 dist_index=self.megatron_fsdp_dist_index,
                 calculate_per_token_loss=config.calculate_per_token_loss,
                 init_model_with_meta_device=config.init_model_with_meta_device,
+                enable_fine_grained_param_gather_hook=(
+                    config.fp8_recipe == "mxfp8" and ddp_config.fp8_param_gather
+                ),
             ),
         )
         self.param_and_grad_buffer = self.module.param_and_grad_buffer
@@ -123,6 +126,7 @@ def __init__(
         self.broadcast_params = self.module.broadcast_params
         self.module.state_dict_for_save_checkpoint = self.module.state_dict
         self.state_dict_for_save_checkpoint = self.state_dict
+        self.module.config = config
 
         self.sync_rng_states_across_tp_group()
 
diff --git a/megatron/core/distributed/fsdp/src/megatron_fsdp/megatron_fsdp.py b/megatron/core/distributed/fsdp/src/megatron_fsdp/megatron_fsdp.py
index 5e953e8c6c2..e2cbccf4356 100644
--- a/megatron/core/distributed/fsdp/src/megatron_fsdp/megatron_fsdp.py
+++ b/megatron/core/distributed/fsdp/src/megatron_fsdp/megatron_fsdp.py
@@ -23,6 +23,20 @@
 import torch.nn as nn
 from torch.utils._pytree import tree_flatten, tree_map, tree_unflatten
 
+from .mixed_precision import (
+    fp8_create_transpose_cache,
+    fp8_discard_transpose_cache,
+    is_float8tensor,
+)
+from .param_and_grad_buffer import (
+    AllGatherPipeline,
+    BucketingPolicy,
+    GradReducePipeline,
+    ParamAndGradBuffer,
+    PrefetchOrder,
+    override_sharded_param_methods_with_safety_checks,
+    to_local_if_dtensor,
+)
 from .utils import FSDPDistributedIndex
 
 logger = logging.getLogger(__name__)
@@ -34,23 +48,12 @@
     from megatron.core.distributed.distributed_data_parallel_config import (
         DistributedDataParallelConfig,
     )
-    from megatron.core.fp8_utils import is_float8tensor
     from megatron.core.utils import is_submodule
 except ImportError:
     # Megatron-LM is not installed, use Megatron-FSDP as a standalone module.
     logger.info("Megatron Core is not installed, Megatron-FSDP will run without Megatron Core.")
     from .distributed_data_parallel_config import DistributedDataParallelConfig
-    from .utils import is_float8tensor, is_submodule
-
-from .param_and_grad_buffer import (
-    AllGatherPipeline,
-    BucketingPolicy,
-    GradReducePipeline,
-    ParamAndGradBuffer,
-    PrefetchOrder,
-    override_sharded_param_methods_with_safety_checks,
-    to_local_if_dtensor,
-)
+    from .utils import is_submodule
 
 
 class TrainingState(Enum):
@@ -168,6 +171,7 @@ def __init__(
         nccl_ub: bool = False,
         fsdp_double_buffer: bool = False,
         disable_symmetric_registration: bool = False,
+        enable_fine_grained_param_gather_hook: bool = False,
     ):
         super().__init__()
         # If device is not specified, use the current device.
@@ -217,6 +221,7 @@ def __init__(
 
         self.calculate_per_token_loss = calculate_per_token_loss
         self.init_model_with_meta_device = init_model_with_meta_device
+        self.enable_fine_grained_param_gather_hook = enable_fine_grained_param_gather_hook
 
         # Whether to constantly synchronize the model every training iteration,
         # which defaults to False to overlap communication with computation
@@ -406,6 +411,7 @@ def all_gather_and_wait_parameters_ready(
         prefetch=True,
         prefetch_order=PrefetchOrder.FORWARD_PASS_ORDER,
         wait_bucket_ready=True,
+        bwd=False,
     ):
         """
         All-gather parameters across the data parallel group and wait for
@@ -432,11 +438,14 @@ def all_gather_and_wait_parameters_ready(
                 and self.ddp_config.outer_dp_sharding_strategy != "no_shard"
                 and (self.microbatch_count == 0 or self.model_auto_sync)
             ),
+            bwd=bwd,
         )
         if wait_bucket_ready:
             for param in params:
                 bucket_id = self.param_and_grad_buffer.param_to_param_group[param]
-                ag_pipeline.wait_bucket_ready(bucket_id)
+                ag_pipeline.wait_bucket_ready(bucket_id, bwd)
+                if bwd and is_float8tensor(param):
+                    fp8_create_transpose_cache(param)
 
         for param in params:
             # This setting is needed to make FSDP store the weight object when used
@@ -495,19 +504,17 @@ def _register_fsdp_hooks(self, root_module):
         """
         fsdp_unit_modules = self.fsdp_unit_modules
 
-        def release_module_parameters(module, *unused):
+        def release_module_parameters(module, bwd, *unused):
             for param in module.parameters():
                 bucket_id = self.param_and_grad_buffer.param_to_param_group[param]
-                self.all_gather_pipeline.release_bucket(bucket_id)
-
+                self.all_gather_pipeline.release_bucket(bucket_id, bwd)
             if not self.ddp_config.keep_fp8_transpose_cache:
                 release_params_fp8_transpose_cache(module.parameters())
 
         def release_params_fp8_transpose_cache(params):
             for param in params:
                 if is_float8tensor(param):
-                    param._transpose_invalid = True
-                    param._transpose = None
+                    fp8_discard_transpose_cache(param)
 
         def _grad_acc(param):
             """
@@ -564,12 +571,15 @@ def _post_backward(module, *unused):
                 if self.ddp_config.data_parallel_sharding_strategy == "optim_grads_params":
                     # Deallocate the module parameters after the backward pass,
                     # because we have our data-parallel gradients computed.
-                    release_module_parameters(module)
+                    release_module_parameters(module, bwd=True)
                     module._training_state = TrainingState.IDLE
                 param_list = list(module.parameters())
             else:
                 param_list = list(module.parameters(recurse=False))
 
+            if self.enable_fine_grained_param_gather_hook:
+                param_list = list(module.parameters(recurse=False))
+
             # If the parameter is shared, we do not accumulate gradients
             # here, as the gradients will be accumulated in the
             # root post-backward hook.
@@ -621,6 +631,9 @@ def _pre_forward_param_unshard(
                 # to allocate as little memory as possible for this forward pass.
                 param_list = list(module.parameters(recurse=False))
 
+            if self.enable_fine_grained_param_gather_hook:
+                param_list = list(module.parameters(recurse=False))
+
             # All-gather the parameters before the forward pass.
             self.all_gather_and_wait_parameters_ready(
                 params=param_list,
@@ -720,7 +733,7 @@ def _root_post_backward(*unused):
             if self.model_auto_sync:
                 self.finish_grad_sync()
 
-        def _pre_backward(module: nn.Module, *unused):
+        def _pre_backward_param_unshard(module: nn.Module, *unused):
             """
             Sub-module pre-backward hook to all-gather the module parameters
             before the backward pass.
@@ -729,11 +742,19 @@ def _pre_backward(module: nn.Module, *unused):
             # and unsharding operations when performing activation recomputation
             # / gradient checkpointing.
             module._training_state = TrainingState.PRE_BACKWARD
+
             if isinstance(module, tuple(fsdp_unit_modules)):
-                # All-gather / unshard the module parameters before the backward pass.
-                self.all_gather_and_wait_parameters_ready(
-                    list(module.parameters()), prefetch_order=PrefetchOrder.BACKWARD_PASS_ORDER
-                )
+                param_list = list(module.parameters())
+            else:
+                param_list = list(module.parameters(recurse=False))
+
+            if self.enable_fine_grained_param_gather_hook:
+                param_list = list(module.parameters(recurse=False))
+
+            # All-gather / unshard the module parameters before the backward pass.
+            self.all_gather_and_wait_parameters_ready(
+                param_list, prefetch_order=PrefetchOrder.BACKWARD_PASS_ORDER, bwd=True
+            )
 
         self._root_pre_backward_hook_issued = False
 
@@ -760,7 +781,9 @@ def _root_pre_backward(module: nn.Module, *unused):
                 for bucket_id in range(ag_pipeline.num_buckets):
                     group = self.param_and_grad_buffer.parameter_groups[bucket_id]
                     if group.fsdp_unit_id is not None:
-                        ag_pipeline.bucket_can_be_released[bucket_id] = True
+                        ag_pipeline.bucket_can_be_released[
+                            ag_pipeline.get_bucket_key(bucket_id, bwd=False)
+                        ] = True
             # Track parameters that require gradient reduction and optimization.
             self._params_require_handle_grad = set()
             for param_group in self.param_and_grad_buffer.parameter_groups:
@@ -782,8 +805,12 @@ def _post_forward(module: nn.Module, input: Any, output: Any):
                 # during activation recomputation / gradient checkpointing.
                 return output
 
+            assert isinstance(
+                module, tuple(fsdp_unit_modules)
+            ), "_post_forward hook should only be registered on FSDP unit modules."
+
             # Release the module parameters after the forward pass to save memory.
-            release_module_parameters(module)
+            release_module_parameters(module, bwd=False)
             module._training_state = TrainingState.IDLE
 
             return output
@@ -824,21 +851,55 @@ def forward_hook(_module, inputs, output):
             # on the output tensor(s).
             return module.register_forward_hook(forward_hook)
 
+        def _register_pre_forward_param_unshard_hook(module):
+            """
+            Register the forward pre-hook to unshard parameters before the forward pass.
+            If we are not sharding anything, we do not have a model weight buffer and thus
+            have nothing to all-gather / un-shard.
+            """
+            if self.ddp_config.data_parallel_sharding_strategy != "no_shard":
+                self.forward_pre_hooks[f"{module._get_name()} parameter unshard"] = (
+                    module.register_forward_pre_hook(
+                        _pre_forward_param_unshard, prepend=True, with_kwargs=True
+                    )
+                )
+
+        def _register_pre_backward_param_unshard_hook(module):
+            """
+            Register the backward pre-hook to unshard FSDP unit module parameters
+            immediately before the backward pass via attaching a gradient-triggered
+            hook to the output tensor(s) of a module during a post-forward hook.
+            """
+            self.backward_pre_hooks[f"all-gather {module._get_name()} parameters"] = (
+                create_custom_backward_hook(module, _pre_backward_param_unshard)
+            )
+
+        def _register_grad_acc_and_reduce_hook(module):
+            """
+            Register the post-backward hook to deallocate model parameters and
+            reduce-scatter gradients immediately after the module backward pass
+            has completed to conserve memory for the subsequent backward pass.
+            """
+            self.forward_pre_hooks[f"module {name} register post-backward hook"] = (
+                module.register_forward_pre_hook(
+                    functools.partial(_register_post_backward_hook, _post_backward),
+                    with_kwargs=True,
+                )
+            )
+
         fsdp_modules = []
         for name, module in root_module.named_modules():
+            if self.enable_fine_grained_param_gather_hook:
+                _register_pre_forward_param_unshard_hook(module)
+                _register_pre_backward_param_unshard_hook(module)
+                _register_grad_acc_and_reduce_hook(module)
+
             # Skip if the module is already registered in fsdp_modules.
             if any(is_submodule(module, fsdp_module) for fsdp_module in fsdp_modules):
                 continue
 
-            # Register the forward pre-hook to unshard parameters before the forward pass.
-            # If we are not sharding anything, we do not have a model weight buffer and thus
-            # have nothing to all-gather / un-shard.
-            if self.ddp_config.data_parallel_sharding_strategy != "no_shard":
-                self.forward_pre_hooks[f"module {name} parameter unshard"] = (
-                    module.register_forward_pre_hook(
-                        _pre_forward_param_unshard, prepend=True, with_kwargs=True
-                    )
-                )
+            if not self.enable_fine_grained_param_gather_hook:
+                _register_pre_forward_param_unshard_hook(module)
 
             if isinstance(module, tuple(fsdp_unit_modules)):
                 fsdp_modules.append(module)
@@ -849,12 +910,8 @@ def forward_hook(_module, inputs, output):
                     module.register_forward_hook(_post_forward, prepend=False)
                 )
 
-                # Register the backward pre-hook to unshard FSDP unit module parameters
-                # immediately before the backward pass via attaching a gradient-triggered
-                # hook to the output tensor(s) of a module during a post-forward hook.
-                self.backward_pre_hooks[f"all-gather module {name} parameters"] = (
-                    create_custom_backward_hook(module, _pre_backward)
-                )
+                if not self.enable_fine_grained_param_gather_hook:
+                    _register_pre_backward_param_unshard_hook(module)
             elif (
                 not self.ddp_config.keep_fp8_transpose_cache
                 and self.ddp_config.data_parallel_sharding_strategy == "optim_grads_params"
@@ -867,15 +924,8 @@ def forward_hook(_module, inputs, output):
                     module.register_forward_hook(_release_module_fp8_transpose_cache, prepend=False)
                 )
 
-            # Register the post-backward hook to deallocate model parameters and
-            # reduce-scatter gradients immediately after the module backward pass
-            # has completed to conserve memory for the subsequent backward pass.
-            self.forward_pre_hooks[f"module {name} register post-backward hook"] = (
-                module.register_forward_pre_hook(
-                    functools.partial(_register_post_backward_hook, _post_backward),
-                    with_kwargs=True,
-                )
-            )
+            if not self.enable_fine_grained_param_gather_hook:
+                _register_grad_acc_and_reduce_hook(module)
 
         # Register root module pre- and post-backward hooks in cases where the
         # forward function of root module is not called, but rather the forward
@@ -992,7 +1042,7 @@ def start_param_sync(self, *unused, force_sync: bool = False, force_dispatch: bo
         else:
             self.synchronize_param_gather()
             for bucket_id in range(self.all_gather_pipeline.num_buckets):
-                self.all_gather_pipeline.async_bucket_gather(bucket_id=bucket_id)
+                self.all_gather_pipeline.async_bucket_gather(bucket_id=bucket_id, bwd=False)
                 group = self.param_and_grad_buffer.parameter_groups[bucket_id]
                 if group.model_weight_buffer is None:
                     continue
@@ -1000,9 +1050,10 @@ def start_param_sync(self, *unused, force_sync: bool = False, force_dispatch: bo
                 if group.model_weight_buffer.is_data_distributed:
                     # If model weight is sharded, we wait for the all-gather to complete and
                     # then release the bucket immediately to save memory usage.
-                    self.all_gather_pipeline.wait_bucket_ready(bucket_id)
+                    self.all_gather_pipeline.wait_bucket_ready(bucket_id, False)
+
             for bucket_id in range(self.all_gather_pipeline.num_buckets):
-                self.all_gather_pipeline.wait_bucket_ready(bucket_id)
+                self.all_gather_pipeline.wait_bucket_ready(bucket_id, False)
 
     def start_grad_sync(self, *unused):
         """
diff --git a/megatron/core/distributed/fsdp/src/megatron_fsdp/mixed_precision.py b/megatron/core/distributed/fsdp/src/megatron_fsdp/mixed_precision.py
new file mode 100644
index 00000000000..d7156bea5c6
--- /dev/null
+++ b/megatron/core/distributed/fsdp/src/megatron_fsdp/mixed_precision.py
@@ -0,0 +1,334 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+from importlib.metadata import version
+from typing import List, Optional, Tuple
+
+import torch
+from packaging.version import Version as PkgVersion
+
+logger = logging.getLogger(__name__)
+
+# Detect if Transformer Engine is installed
+try:
+    import transformer_engine  # pylint: disable=W0611
+    from transformer_engine.pytorch.module.base import TransformerEngineBaseModule
+
+    HAVE_TE = True
+except (ImportError, ModuleNotFoundError):
+    TransformerEngineBaseModule = None
+    HAVE_TE = False
+    logger.info("Using Megatron-FSDP without Transformer Engine.")
+
+# Detect the Transformer Engine version
+try:
+    import transformer_engine as te
+
+    if hasattr(te, "__version__"):
+        TE_VERSION = PkgVersion(str(te.__version__))
+    else:
+        TE_VERSION = PkgVersion(version("transformer-engine"))
+except:
+    TE_VERSION = None
+
+# Detect the FP8 tensor class
+try:
+    from transformer_engine.pytorch.tensor import QuantizedTensor
+
+    HAVE_TE_FP8_TENSOR_CLASS = True
+    FP8_TENSOR_CLASS = QuantizedTensor
+except:
+    try:
+        from transformer_engine.pytorch.float8_tensor import Float8Tensor
+
+        HAVE_TE_FP8_TENSOR_CLASS = True
+        FP8_TENSOR_CLASS = Float8Tensor
+    except:
+        HAVE_TE_FP8_TENSOR_CLASS = False
+
+# Detect the MXFP8 tensor class
+try:
+    from transformer_engine.pytorch.tensor.mxfp8_tensor import MXFP8Tensor
+
+    HAVE_TE_MXFP8TENSOR = True
+except:
+    HAVE_TE_MXFP8TENSOR = False
+
+# Detect the Blockwise FP8 tensor class
+try:
+    from transformer_engine.pytorch.tensor.float8_blockwise_tensor import Float8BlockwiseQTensor
+
+    HAVE_TE_BLOCKWISE_FP8TENSOR = True
+except:
+    HAVE_TE_BLOCKWISE_FP8TENSOR = False
+
+# Detect the "cast_master_weights_to_fp8" function of Transformer Engine
+try:
+    from transformer_engine.pytorch.tensor.utils import cast_master_weights_to_fp8
+
+    HAVE_TE_CAST_MASTER_WEIGHTS_TO_FP8 = True
+except:
+    HAVE_TE_CAST_MASTER_WEIGHTS_TO_FP8 = False
+
+    # Try to import multi_tensor_apply, used in the fallback of fp8 quantization.
+    try:
+        from transformer_engine.pytorch.optimizers import multi_tensor_applier, multi_tensor_scale
+
+        multi_tensor_scale_impl = multi_tensor_scale
+    except ImportError:
+        try:
+            import amp_C
+            from apex.multi_tensor_apply import multi_tensor_applier
+
+            multi_tensor_scale_impl = amp_C.multi_tensor_scale
+        except ImportError:
+            import warnings
+
+            warnings.warn(
+                "Transformer Engine and Apex are not installed. "
+                "Falling back to local implementations of "
+                "multi_tensor_applier and multi_tensor_scale"
+            )
+
+            def local_multi_tensor_applier(op, noop_flag_buffer, tensor_lists, *args):
+                """Multi tensor op applier"""
+                return op(2048 * 32, noop_flag_buffer, tensor_lists, *args)
+
+            def local_multi_tensor_scale(chunk_size, noop_flag, tensor_lists, scale):
+                """Works as a drop-in replacement for amp_C.multi_tensor_scale."""
+                for src, dst in zip(tensor_lists[0], tensor_lists[1]):
+                    dst.copy_(src * scale)
+
+            multi_tensor_applier = local_multi_tensor_applier
+            multi_tensor_scale_impl = local_multi_tensor_scale
+
+    def _multi_tensor_copy_this_to_that(
+        this: List[torch.Tensor],
+        that: List[torch.Tensor],
+        overflow_buf: Optional[torch.Tensor] = None,
+    ):
+        """
+        Use multi-tensor-applier to copy values from one list to another.
+        We don't have a bfloat16 implementation so for now if the overflow_buf
+        is not provided, we default back to simple loop copy to be compatible
+        with bfloat16.
+        """
+        if overflow_buf is not None:
+            overflow_buf.fill_(0)
+            # Scaling with factor `1.0` is equivalent to copy.
+            multi_tensor_applier(multi_tensor_scale_impl, overflow_buf, [this, that], 1.0)
+        else:
+            for this_, that_ in zip(this, that):
+                that_.copy_(this_)
+
+
+# Detect the "post_all_gather_processing" function of Transformer Engine
+try:
+    from transformer_engine.pytorch.tensor.utils import post_all_gather_processing
+
+    HAVE_TE_POST_ALL_GATHER_PROCESSING = True
+except:
+    HAVE_TE_POST_ALL_GATHER_PROCESSING = False
+
+
+def is_te_min_version(vers, check_equality=True):
+    """Check if minimum version of `transformer-engine` is installed."""
+    if not isinstance(TE_VERSION, PkgVersion):
+        return False
+
+    if check_equality:
+        return TE_VERSION >= PkgVersion(vers)
+    else:
+        return TE_VERSION > PkgVersion(vers)
+
+
+def is_float8tensor(tensor: torch.Tensor) -> bool:
+    """Check if a tensor is a FP8 tensor."""
+    return HAVE_TE and isinstance(tensor, FP8_TENSOR_CLASS)
+
+
+def is_blockwise_float8tensor(tensor: torch.Tensor) -> bool:
+    """Check if a tensor is a Blockwise FP8 tensor."""
+    return HAVE_TE_BLOCKWISE_FP8TENSOR and isinstance(tensor, Float8BlockwiseQTensor)
+
+
+def fp8_need_transpose_data(tensor: torch.Tensor) -> bool:
+    """Check if a FP8 tensor needs transpose data."""
+    return HAVE_TE_MXFP8TENSOR and isinstance(tensor, MXFP8Tensor)
+
+
+def fp8_need_transpose_data_for_meta_device_init(module: TransformerEngineBaseModule) -> bool:
+    """Check if a FP8 tensor needs transpose data, for meta device init scenario."""
+    return HAVE_TE_MXFP8TENSOR and module.fp8_meta["recipe"].mxfp8()
+
+
+def fp8_discard_transpose_cache(tensor: torch.Tensor) -> None:
+    """Discard the transpose cache of a FP8 tensor."""
+    assert is_float8tensor(tensor), f"Type {type(tensor)} is not a FP8 tensor"
+
+    if hasattr(tensor, "_transpose_invalid"):
+        tensor._transpose_invalid = True
+        tensor._transpose = None
+    elif not fp8_need_transpose_data(tensor):
+        tensor.update_usage(rowwise_usage=True, columnwise_usage=False)
+
+
+def fp8_create_transpose_cache(tensors: List[torch.Tensor]) -> None:
+    """Create the transpose cache of a FP8 tensor."""
+    if HAVE_TE_POST_ALL_GATHER_PROCESSING:
+        post_all_gather_processing(tensors)
+    else:
+        _fp8_create_transpose_cache_fallback(tensors)
+
+
+def _fp8_create_transpose_cache_fallback(tensors: List[torch.Tensor]) -> None:
+    if not isinstance(tensors, list):
+        tensors = [tensors]
+    for tensor in tensors:
+        assert is_float8tensor(tensor), f"Type {type(tensor)} is not a FP8 tensor"
+        if hasattr(tensor, "_create_transpose"):
+            tensor._create_transpose()
+        else:
+            tensor._create_columnwise()
+
+
+def fp8_set_raw_data(tensor: torch.Tensor, data: torch.Tensor, set_transpose: bool = False) -> None:
+    """Set the raw data of a Transformer Engine Float8Tensor."""
+    assert is_float8tensor(tensor), f"Type {type(tensor)} is not a FP8 tensor"
+
+    if set_transpose:
+        assert fp8_need_transpose_data(tensor), f"Type {type(tensor)} does not need transpose data"
+        data_attr = "_columnwise_data"
+    else:
+        data_attr = "_rowwise_data" if hasattr(tensor, "_rowwise_data") else "_data"
+
+    old_data = getattr(tensor, data_attr)
+    if old_data is not None:
+        assert (
+            old_data.dtype == data.dtype
+        ), f"The data types of raw data don't match {old_data.dtype} vs {data.dtype}"
+        assert (
+            old_data.shape == data.shape
+        ), f"Shape {old_data.shape} of old_data doesn't match {data.shape} of new_data"
+    setattr(tensor, data_attr, data)
+
+
+def fp8_get_raw_data(tensor: torch.Tensor, get_transpose: bool = False) -> torch.Tensor:
+    """Get the underlying raw storage of a FP8 tensor."""
+    assert is_float8tensor(tensor), f"Type {type(tensor)} is not a FP8 tensor"
+
+    if get_transpose:
+        assert fp8_need_transpose_data(tensor), f"Type {type(tensor)} does not need transpose data"
+        data_attr = "_columnwise_data"
+    else:
+        data_attr = "_rowwise_data" if hasattr(tensor, "_rowwise_data") else "_data"
+
+    return getattr(tensor, data_attr)
+
+
+def fp8_dequantize(tensor: torch.Tensor) -> torch.Tensor:
+    """Dequantize a FP8 tensor to a higher precision."""
+    assert is_float8tensor(tensor), f"Type {type(tensor)} is not a FP8 tensor"
+    assert is_te_min_version(
+        "2.0"
+    ), "Transformer Engine >= 2.0 is required for dequantizing parameters."
+    return tensor.dequantize()
+
+
+def fp8_quantize(
+    model_params: List[torch.Tensor],
+    main_params: List[torch.Tensor],
+    start_offsets: List[int],
+    data_parallel_group: torch.distributed.ProcessGroup,
+    fsdp_shard_model_params: List[Tuple[torch.Tensor, Optional[torch.Tensor]]],
+) -> None:
+    """Quantize sharded parameters to FP8."""
+    if len(model_params) == 0:
+        return
+    fsdp_shard_model_params = [x[0] if x[1] is None else x for x in fsdp_shard_model_params]
+
+    if HAVE_TE_CAST_MASTER_WEIGHTS_TO_FP8:
+        cast_master_weights_to_fp8(
+            model_params, main_params, start_offsets, data_parallel_group, fsdp_shard_model_params
+        )
+    else:
+        _fp8_quantize_fallback(
+            model_params, main_params, start_offsets, data_parallel_group, fsdp_shard_model_params
+        )
+
+
+def _fp8_quantize_fallback(
+    model_params: List[torch.Tensor],
+    main_params: List[torch.Tensor],
+    start_offsets: List[int],
+    data_parallel_group: torch.distributed.ProcessGroup,
+    fsdp_shard_model_params: List[Tuple[torch.Tensor, Optional[torch.Tensor]]],
+) -> None:
+    for model_param, main_param, start_offset, fsdp_shard_model_param in zip(
+        model_params, main_params, start_offsets, fsdp_shard_model_params
+    ):
+        if main_param is None:
+            continue
+
+        if fsdp_shard_model_param is not None:
+            shard_model_param = fsdp_shard_model_param
+        else:
+            shard_model_param = model_param._data.view(-1)[
+                start_offset : start_offset + main_param.numel()
+            ]
+
+        quantizer = model_param._quantizer
+        # When not using fp8 params, the main_param (fp32) is first cast to bf16/fp16, and then
+        # cast to fp8 during forward. This logic keeps numerical consistency with bf16 params.
+        main_param = main_param.to(model_param.dtype)
+        out = Float8Tensor(
+            shape=main_param.size(),
+            dtype=model_param.dtype,
+            requires_grad=False,
+            data=shard_model_param,
+            fp8_scale_inv=model_param._scale_inv,
+            fp8_dtype=model_param._fp8_dtype,
+            quantizer=quantizer,
+        )
+        quantizer.update_quantized(main_param, out)
+
+        amaxes = []
+        scales = []
+        scale_invs = []
+        for model_param in model_params:
+            quantizer = model_param._quantizer
+            amaxes.append(quantizer.amax.view(1))
+            scales.append(quantizer.scale.view(1))
+            scale_invs.append(model_param._scale_inv.view(1))
+            model_param._reset_caches()
+
+        dummy_overflow_buf = torch.tensor([0], dtype=torch.int, device="cuda")
+
+        # Update scaling factors.
+        packed_scales = torch.empty(len(scales), dtype=torch.float32, device=scales[0].device)
+        packed_scale_views = [packed_scales[i].view(1) for i in range(len(scales))]
+        _multi_tensor_copy_this_to_that(scales, packed_scale_views, dummy_overflow_buf)
+        torch.reciprocal(packed_scales, out=packed_scales)
+        _multi_tensor_copy_this_to_that(packed_scale_views, scale_invs, dummy_overflow_buf)
+
+        # Reduce amaxes.
+        # Note: Assume each param has a separate amax.
+        packed_amaxes = torch.empty(len(amaxes), dtype=torch.float32, device=amaxes[0].device)
+        packed_amax_views = [packed_amaxes[i].view(1) for i in range(len(amaxes))]
+        _multi_tensor_copy_this_to_that(amaxes, packed_amax_views, dummy_overflow_buf)
+        torch.distributed.all_reduce(
+            packed_amaxes, op=torch.distributed.ReduceOp.MAX, group=data_parallel_group
+        )
+        _multi_tensor_copy_this_to_that(packed_amax_views, amaxes, dummy_overflow_buf)
diff --git a/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py b/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py
index 64fbe84e7eb..04ea09970f4 100644
--- a/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py
+++ b/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py
@@ -32,6 +32,17 @@
 from torch.distributed import _coalescing_manager
 from torch.distributed.tensor import DTensor, Replicate, Shard
 
+from .mixed_precision import (
+    fp8_discard_transpose_cache,
+    fp8_get_raw_data,
+    fp8_need_transpose_data,
+    fp8_need_transpose_data_for_meta_device_init,
+    fp8_quantize,
+    fp8_set_raw_data,
+    is_blockwise_float8tensor,
+    is_float8tensor,
+    is_te_min_version,
+)
 from .uneven_dtensor import update_uneven_dtensor_chunk_metadata, validate_uneven_dtensor
 from .utils import (
     _MODEL_PARALLEL_RNG_TRACKER_NAME,
@@ -50,27 +61,15 @@
     from megatron.core.distributed.distributed_data_parallel_config import (
         DistributedDataParallelConfig,
     )
-    from megatron.core.fp8_utils import (
-        is_float8tensor,
-        modify_underlying_storage,
-        quantize_param_shard,
-    )
     from megatron.core.tensor_parallel import get_cuda_rng_tracker
-    from megatron.core.utils import is_submodule, is_te_min_version
+    from megatron.core.utils import is_submodule
 
     logger.info("Detected Megatron Core, using Megatron-FSDP with Megatron.")
 
 except ImportError:
     # Megatron-LM is not installed, use Megatron-FSDP as a standalone module.
     from .distributed_data_parallel_config import DistributedDataParallelConfig
-    from .utils import (
-        get_cuda_rng_tracker,
-        is_float8tensor,
-        is_submodule,
-        is_te_min_version,
-        modify_underlying_storage,
-        quantize_param_shard,
-    )
+    from .utils import get_cuda_rng_tracker, is_submodule
 
     logger.info("Megatron Core is not installed, Megatron-FSDP will run without Megatron Core.")
 
@@ -816,7 +815,7 @@ def __init__(
         data_parallel_group: Optional[torch.distributed.ProcessGroup] = None,
         dp_rank: Optional[int] = None,
         temporary_bucket_allocator: Optional[TemporaryBucketAllocator] = None,
-        is_dtype_float8: bool = False,
+        is_transpose_buffer: bool = False,
         gradient_scaling_factor: Optional[float] = None,
         chunk_size_factor: int = 1,
         mem_alloc_context: Optional[Callable] = None,
@@ -849,7 +848,7 @@ def __init__(
         self.temporary_bucket_allocator = (
             temporary_bucket_allocator if temporary_bucket_allocator else TemporaryBucketAllocator()
         )
-        self.is_dtype_float8 = is_dtype_float8
+        self.is_transpose_buffer = is_transpose_buffer
         self.gradient_scaling_factor = gradient_scaling_factor
         self.mem_alloc_context = mem_alloc_context if mem_alloc_context else nullcontext
 
@@ -945,11 +944,11 @@ def fetch_bucket(
             for p in self.params:
                 item_id = self.param_idx[p]
                 p = to_local_if_dtensor(p)
+                data = self.get_item_from_bucket(bucket, item_id).view(p.shape)
                 if is_float8tensor(p):
-                    p._data = self.get_item_from_bucket(bucket, item_id).view(p.shape)
+                    fp8_set_raw_data(p, data, self.is_transpose_buffer)
                 else:
-                    p.data = self.get_item_from_bucket(bucket, item_id).view(p.shape)
-
+                    p.data = data
         return bucket
 
     def free_bucket_storage(self):
@@ -1118,6 +1117,9 @@ def set_item(self, item_id: int, item_data: torch.Tensor) -> None:
         # When fully sharded, we need to get the slice of the item to be stored in this shard.
         # Otherwise, we can just flatten the entire item since this buffer contains
         # the entire bucket.
+        if is_float8tensor(item_data):
+            item_data = fp8_get_raw_data(item_data, self.is_transpose_buffer)
+
         if self.is_data_distributed:
             # Get the coordinates of the slice of the item that is contained in this shard.
             slice_start, slice_end = self._get_item_slice_in_shard(item_id)
@@ -1224,6 +1226,8 @@ class ParameterGroup:
             Factor determining chunk size for grouped parameter processing.
         model_weight_buffer (Optional[DataParallelBuffer]):
             Buffer used to store model weights for data-parallel operations.
+        transpose_weight_buffer (Optional[DataParallelBuffer]):
+            Buffer used to store transpose weights for data-parallel operations.
         main_weight_buffer (Optional[DataParallelBuffer]):
             Buffer used to store main model weights for data-parallel operations.
         main_grad_buffer (Optional[DataParallelBuffer]):
@@ -1243,6 +1247,7 @@ class ParameterGroup:
     fsdp_unit_id: Optional[int] = None
     chunk_size_factor: int = 1
     model_weight_buffer: Optional[DataParallelBuffer] = None
+    transpose_weight_buffer: Optional[DataParallelBuffer] = None
     main_weight_buffer: Optional[DataParallelBuffer] = None
     main_grad_buffer: Optional[DataParallelBuffer] = None
     hsdp_wbuf: Optional[DataParallelBuffer] = None
@@ -1313,12 +1318,10 @@ def _does_param_require_new_bucket(param):
     parameter_groups = []
     for name, param in module.named_parameters():
         # We need this information to correctly dynamically allocate Tensors!
+        is_fp8 = is_float8tensor(param)
+        is_fp8_meta_device_init = meta_device_init_fp8_params.get(name, (False, False))[0]
         param_attrs = dict(
-            dtype=(
-                "float8"
-                if is_float8tensor(param) or meta_device_init_fp8_params.get(name, False)
-                else param.dtype
-            ),
+            dtype="float8" if (is_fp8 or is_fp8_meta_device_init) else param.dtype,
             is_expert_param=is_expert_parameter(name, param),
             requires_grad=param.requires_grad,
             fsdp_unit_id=None,
@@ -1641,7 +1644,10 @@ def __init__(
                     # to determine whether this parameter is fp8 or not.
                     fp8_meta_index = m.param_init_meta[name].fp8_meta_index
                     if m.primary_weights_in_fp8 and fp8_meta_index is not None:
-                        meta_device_init_fp8_params[self.param_to_name[param]] = True
+                        meta_device_init_fp8_params[self.param_to_name[param]] = (
+                            True,
+                            fp8_need_transpose_data_for_meta_device_init(m),
+                        )
 
         # Get the parameter groups.
         (self.parameter_groups, self.param_to_param_group, self.bucket_to_bucket_group) = (
@@ -1768,6 +1774,7 @@ def _bytes_to_mb(bytes_val: int) -> str:
             numel = sum(to_local_if_dtensor(p).shape.numel() for p in group.params)
             buffers = {
                 "weight": group.model_weight_buffer,
+                "transpose_weight": group.transpose_weight_buffer,
                 "main_weight": group.main_weight_buffer,
                 "grad": group.main_grad_buffer,
             }
@@ -1837,12 +1844,18 @@ def _init_each_parameter_group_buffers(self, meta_device_init_fp8_params):
             self.weight_alloc = FixedPoolAllocator(
                 name="fsdp_params", fsdp_param_groups=self.parameter_groups, size=UB_BUFFER_NUM
             )
+            self.transpose_weight_alloc = FixedPoolAllocator(
+                name="fsdp_fp8_transpose_params",
+                fsdp_param_groups=self.parameter_groups,
+                size=UB_BUFFER_NUM,
+            )
             self.main_grad_alloc = FixedPoolAllocator(
                 name="fsdp_grads", fsdp_param_groups=self.parameter_groups, size=UB_BUFFER_NUM
             )
             self.double_buf_units = self.weight_alloc.fsdp_double_buffer_units
         else:
             self.weight_alloc = StorageResizeBasedBucketAllocator()
+            self.transpose_weight_alloc = StorageResizeBasedBucketAllocator()
             self.main_grad_alloc = None
             self.double_buf_units = []
 
@@ -1882,8 +1895,9 @@ def _init_each_parameter_group_buffers(self, meta_device_init_fp8_params):
             )
             # Check if the parameter group is FP8.
             one_param = group.params[0]
-            is_dtype_float8 = is_float8tensor(one_param) or meta_device_init_fp8_params.get(
-                self.param_to_name[one_param], False
+            is_dtype_float8 = (
+                is_float8tensor(one_param)
+                or meta_device_init_fp8_params.get(self.param_to_name[one_param], (False, False))[0]
             )
             if is_dtype_float8:
                 param_dtype = torch.uint8
@@ -1892,6 +1906,16 @@ def _init_each_parameter_group_buffers(self, meta_device_init_fp8_params):
                 param_dtype = group.params[0].dtype
                 grad_dtype = param_dtype
 
+            # Check if the parameter group needs a transpose buffer for model weights.
+            # Currently, only mxfp8 needs it.
+            need_transpose_data = is_float8tensor(one_param) and fp8_need_transpose_data(one_param)
+            need_transpose_data_for_meta_device_init = meta_device_init_fp8_params.get(
+                self.param_to_name[one_param], (False, False)
+            )[1]
+            should_create_transpose_weight_buffer = (
+                need_transpose_data or need_transpose_data_for_meta_device_init
+            )
+
             # Check if the parameter group requires a grad buffer or main weight buffer.
             should_create_grad_buffer_or_main_weight_buffer = (
                 not self.only_create_grad_buffer_and_main_weight_buffer_for_param_requires_grad
@@ -1908,13 +1932,29 @@ def _init_each_parameter_group_buffers(self, meta_device_init_fp8_params):
                     dtype=param_dtype,
                     device=self.device,
                     data_parallel_group=main_buf_dp_group,
-                    is_dtype_float8=is_dtype_float8,
+                    is_transpose_buffer=False,
                     temporary_bucket_allocator=self.weight_alloc,
                     bucket_id=group_id,
                     chunk_size_factor=group.chunk_size_factor,
                     mem_alloc_context=self.mem_alloc_context,
                     **main_buf_extra_kwargs,
                 )
+                if should_create_transpose_weight_buffer:
+                    group.transpose_weight_buffer = DataParallelBuffer(
+                        self.ddp_config,
+                        group.params,
+                        is_data_distributed=is_model_weight_buffer_distributed
+                        and main_buf_dp_group.size() > 1,
+                        dtype=param_dtype,
+                        device=self.device,
+                        data_parallel_group=main_buf_dp_group,
+                        is_transpose_buffer=True,
+                        temporary_bucket_allocator=self.transpose_weight_alloc,
+                        bucket_id=group_id,
+                        chunk_size_factor=group.chunk_size_factor,
+                        mem_alloc_context=self.mem_alloc_context,
+                        **main_buf_extra_kwargs,
+                    )
 
             # Initialize the main weight buffer.
             if should_create_grad_buffer_or_main_weight_buffer and preserve_fp32_weights:
@@ -1946,7 +1986,7 @@ def _init_each_parameter_group_buffers(self, meta_device_init_fp8_params):
                     dtype=torch.float32 if grad_reduce_in_fp32 else grad_dtype,
                     device=self.device,
                     data_parallel_group=main_buf_dp_group,
-                    is_dtype_float8=False,
+                    is_transpose_buffer=False,
                     temporary_bucket_allocator=self.main_grad_alloc,
                     gradient_scaling_factor=gradient_scaling_factor,
                     bucket_id=group_id,
@@ -1970,7 +2010,7 @@ def _init_each_parameter_group_buffers(self, meta_device_init_fp8_params):
                     dtype=wbuf.dtype,
                     device=wbuf.device,
                     data_parallel_group=hsdp_buf_dp_group,
-                    is_dtype_float8=wbuf.is_dtype_float8,
+                    is_transpose_buffer=False,
                     temporary_bucket_allocator=self.weight_alloc,
                     bucket_id=group_id,
                     chunk_size_factor=group.chunk_size_factor,
@@ -1986,6 +2026,9 @@ def _init_each_parameter_group_buffers(self, meta_device_init_fp8_params):
                     ),
                 )
 
+                if group.transpose_weight_buffer is not None:
+                    raise NotImplementedError("HSDP for transpose buffer is not implemented yet")
+
                 if should_create_grad_buffer_or_main_weight_buffer:
                     # Initialize the HSDP grad buffer.
                     gbuf = group.main_grad_buffer
@@ -1997,7 +2040,7 @@ def _init_each_parameter_group_buffers(self, meta_device_init_fp8_params):
                         dtype=gbuf.dtype,
                         device=gbuf.device,
                         data_parallel_group=hsdp_buf_dp_group,
-                        is_dtype_float8=gbuf.is_dtype_float8,
+                        is_transpose_buffer=False,
                         temporary_bucket_allocator=self.main_grad_alloc,
                         gradient_scaling_factor=gradient_scaling_factor,
                         bucket_id=group_id,
@@ -2080,6 +2123,20 @@ def _init_each_parameter_group_buffers(self, meta_device_init_fp8_params):
                             torch.empty(wbuf.data_size, dtype=wbuf.dtype, device=self.device)
                         )
                 bucket = wbuf.fetch_bucket()
+
+            tbuf = group.transpose_weight_buffer
+            if tbuf:
+                with self.mem_alloc_context():
+                    if group.hsdp_wbuf:
+                        raise NotImplementedError(
+                            "HSDP for transpose buffer is not implemented yet"
+                        )
+                    else:
+                        tbuf.init_data(
+                            torch.empty(tbuf.data_size, dtype=tbuf.dtype, device=self.device)
+                        )
+                transpose_bucket = tbuf.fetch_bucket()
+
             mbuf = group.main_weight_buffer
             if mbuf:
                 # Manually instantiate an empty tensor into the main weight buffer.
@@ -2133,25 +2190,41 @@ def _init_each_parameter_group_buffers(self, meta_device_init_fp8_params):
                             if not self.ddp_config.keep_fp8_transpose_cache:
                                 for _param in m.parameters(recurse=False):
                                     if is_float8tensor(_param):
-                                        _param._transpose_invalid = True
-                                        _param._transpose = None
+                                        fp8_discard_transpose_cache(_param)
                     # Raise error if a meta parameter still exists after initialization.
                     assert not p.is_meta, (self.param_to_name[p], module_reset_flag)
 
+                    p_local = to_local_if_dtensor(p)
+
                     # Copy the model weight parameter tensor into the buffer.
                     # When distributed, this shards and preserves the data across all ranks.
-                    wbuf.set_item(item_id, to_local_if_dtensor(p))
+                    wbuf.set_item(item_id, p_local)
+                    if tbuf:
+                        tbuf.set_item(item_id, p_local)
 
                     # Retrieve the newly allocated parameter data from the global bucket.
                     # Attach the bucket-allocated parameter data to the module parameter,
                     # to use the bucket-allocated data for autograd and NCCL.
-                    new_param_data = wbuf.get_item_from_bucket(bucket, item_id).view(
-                        to_local_if_dtensor(p).shape
-                    )
-                    if is_float8tensor(p):
-                        # Needed to instantiate FP8 parameters. Requires installing
-                        # TransformerEngine.
-                        modify_underlying_storage(p, new_param_data)
+                    new_param_data = wbuf.get_item_from_bucket(bucket, item_id).view(p_local.shape)
+                    if tbuf:
+                        new_transpose_data = tbuf.get_item_from_bucket(
+                            transpose_bucket, item_id
+                        ).view(p_local.shape)
+                    else:
+                        new_transpose_data = None
+
+                    if is_float8tensor(p_local):
+                        old_param_data = fp8_get_raw_data(p_local)
+                        assert old_param_data._base is None
+                        new_param_data.detach().copy_(old_param_data)
+                        fp8_set_raw_data(p_local, new_param_data)
+                        del old_param_data
+                        if new_transpose_data is not None:
+                            old_transpose_data = fp8_get_raw_data(p_local, True)
+                            assert old_transpose_data._base is None
+                            new_transpose_data.detach().copy_(old_transpose_data)
+                            fp8_set_raw_data(p_local, new_transpose_data, True)
+                            del old_transpose_data
                     elif isinstance(p, DTensor):
                         old_param_data = p._local_tensor.data
                         p._local_tensor.data = new_param_data
@@ -2189,7 +2262,12 @@ def _init_each_parameter_group_buffers(self, meta_device_init_fp8_params):
                         # the (high-precision) main weight buffer.
                         # Nothing else needs to be done, because the main weights
                         # do not require autograd operations, only possibly sharding.
-                        mbuf.set_item(item_id, to_local_if_dtensor(p))
+                        p_local = to_local_if_dtensor(p)
+                        assert not is_float8tensor(p_local), (
+                            self.param_to_name[p],
+                            "fp8 param should use get_high_precision_init_val method.",
+                        )
+                        mbuf.set_item(item_id, p_local)
 
             if wbuf and wbuf.is_data_distributed:
                 # Free the memory backing the temporarily-allocated bucket associated
@@ -2201,6 +2279,9 @@ def _init_each_parameter_group_buffers(self, meta_device_init_fp8_params):
                 # before forward activations and gradients are allocated in training.
                 wbuf.free_bucket_storage()
 
+            if tbuf and tbuf.is_data_distributed:
+                tbuf.free_bucket_storage()
+
         # Allocate the main_weight buffer and main_grad buffer data in one buffer.
         if self.buffer_all_in_one:
             with self.mem_alloc_context():
@@ -2324,6 +2405,7 @@ def _reset_parameters(self, old_params, new_params):
                 group.params[item_id] = new_p
                 for buf in [
                     group.model_weight_buffer,
+                    group.transpose_weight_buffer,
                     group.main_weight_buffer,
                     group.main_grad_buffer,
                     group.hsdp_wbuf,
@@ -2371,6 +2453,7 @@ def _init_distributed_params(self):
         dist_main_weight = {}
         for pg in self.parameter_groups:
             wbuf = pg.model_weight_buffer
+            tbuf = pg.transpose_weight_buffer
             mbuf = pg.main_weight_buffer
             for item_id, orig_param in enumerate(pg.params):
                 param_name = self.param_to_name[orig_param]
@@ -2398,6 +2481,7 @@ def _init_distributed_params(self):
                     )
                     dist_main_weight[param_name] = dist_param
                 elif wbuf:
+                    assert tbuf is None, "Transpose buffer should only exist when main params exist"
                     dist_param = make_fsdp_dtensor(
                         local_tensor=wbuf.get_item(item_id, only_shard=sharded_optimizer_state),
                         param=orig_param,
@@ -2567,9 +2651,54 @@ def copy_main_weights_to_model_weights(self):
         expert_param_quantize_kwargs = copy.deepcopy(dense_param_quantize_kwargs)
         data_parallel_group = None
         expert_data_parallel_group = None
+        clear_quantize_kwargs = lambda kwargs: [d.clear() for d in kwargs.values()]
+
+        def _fp8_quantize_params(dense_param_quantize_kwargs, expert_param_quantize_kwargs):
+            if len(dense_param_quantize_kwargs["model_params"]) > 0:
+                # If we have FP8 parameters, we need to quantize them.
+                fp8_quantize(data_parallel_group=data_parallel_group, **dense_param_quantize_kwargs)
+
+            if len(expert_param_quantize_kwargs["model_params"]) > 0:
+                # If we have FP8 expert parameters, we need to quantize them.
+                fp8_quantize(
+                    data_parallel_group=expert_data_parallel_group, **expert_param_quantize_kwargs
+                )
+
+            clear_quantize_kwargs(dense_param_quantize_kwargs)
+            clear_quantize_kwargs(expert_param_quantize_kwargs)
+
+        # Special handling of blockwise FP8
+        BATCH_QUANT_MEMORY_LIMIT_BYTES = 5 * 1024**3  # 5 GB
+        blockwise_fp8_weight_buffers = []
+        blockwise_fp8_param_buffers = []
+
+        def _batch_quantize_blockwise_fp8_params(
+            dense_param_quantize_kwargs, expert_param_quantize_kwargs, blockwise_fp8_param_buffers
+        ):
+            if len(blockwise_fp8_param_buffers) == 0:
+                return
+
+            # Copy original param shards into their blockwise FP8 working buffers
+            for bufs in blockwise_fp8_param_buffers:
+                bufs["bucket_param"].copy_(bufs["param"])
+
+            # Apply FP8 quantization to blockwise FP8 parameters
+            _fp8_quantize_params(dense_param_quantize_kwargs, expert_param_quantize_kwargs)
+
+            # Copy quantized params back from working buffers to original param tensors
+            for bufs in blockwise_fp8_param_buffers:
+                bufs["param"].copy_(bufs["bucket_param"])
+            blockwise_fp8_param_buffers.clear()
+
+            # Free bucket storage for blockwise FP8 weight buffers
+            for wbuf in blockwise_fp8_weight_buffers:
+                wbuf.free_bucket_storage()
+            blockwise_fp8_weight_buffers.clear()
+
         for pg in self.parameter_groups:
             mbuf = pg.main_weight_buffer
             wbuf = pg.model_weight_buffer
+            tbuf = pg.transpose_weight_buffer
             if mbuf is None:
                 continue
 
@@ -2585,44 +2714,88 @@ def copy_main_weights_to_model_weights(self):
             shard_offsets_in_fp8 = quantize_func_kwargs["start_offsets"]
             shard_model_params = quantize_func_kwargs["fsdp_shard_model_params"]
 
+            has_blockwise_fp8_param = False
             for param in pg.params:
                 item_id = mbuf.param_idx[param]
                 if wbuf:
                     if wbuf.is_data_distributed or mbuf.is_data_distributed:
                         model_param = wbuf.get_item(item_id, only_shard=True)
+                        if tbuf:
+                            transpose_param = tbuf.get_item(item_id, only_shard=True)
+                        else:
+                            transpose_param = None
                         main_weight = mbuf.get_item(item_id, only_shard=True)
                     else:
                         model_param = wbuf.get_item(item_id)
+                        if tbuf:
+                            transpose_param = tbuf.get_item(item_id)
+                        else:
+                            transpose_param = None
                         main_weight = mbuf.get_item(item_id)
                 else:
                     assert not mbuf.is_data_distributed
                     model_param = to_local_if_dtensor(param)
                     main_weight = mbuf.get_item(item_id)
 
+                if is_blockwise_float8tensor(param):
+                    fp8_params.append(param)
+                    if model_param.numel() == 0:
+                        shard_fp32_from_fp8.append(None)
+                        shard_offsets_in_fp8.append(None)
+                        shard_model_params.append([None, None])
+                    else:
+                        shard_fp32_from_fp8.append(main_weight)
+                        shard_offsets_in_fp8.append(wbuf.locate_item_in_global_item(item_id)[0])
+                        bucket = wbuf.fetch_bucket()
+                        b_model_param = wbuf.get_item_from_bucket(bucket, item_id)[
+                            slice(*wbuf.locate_item_in_global_item(item_id))
+                        ]
+                        assert (
+                            transpose_param is None
+                        ), "Blockwise FP8 does not support transpose param."
+                        shard_model_params.append([b_model_param, None])
+                        assert b_model_param.numel() == model_param.numel(), (
+                            f"Blockwise FP8 bucket param numel {b_model_param.numel()} does"
+                            f" not match model param numel {model_param.numel()}"
+                            f" name: {self.param_to_name[param]}"
+                        )
+                        blockwise_fp8_param_buffers.append(
+                            {"bucket_param": b_model_param, "param": model_param}
+                        )
+                        has_blockwise_fp8_param = True
+                    continue
+
                 if is_float8tensor(param):
                     fp8_params.append(param)
                     if model_param.numel() == 0:
                         shard_fp32_from_fp8.append(None)
                         shard_offsets_in_fp8.append(None)
-                        shard_model_params.append(None)
+                        shard_model_params.append([None, None])
                     else:
                         shard_fp32_from_fp8.append(main_weight)
                         shard_offsets_in_fp8.append(wbuf.locate_item_in_global_item(item_id)[0])
-                        shard_model_params.append(model_param)
+                        shard_model_params.append([model_param, transpose_param])
                     continue
 
                 if model_param.numel() > 0:
                     model_param.data.copy_(main_weight.view(model_param.shape))
 
-        if len(dense_param_quantize_kwargs["model_params"]) > 0:
-            # If we have FP8 parameters, we need to quantize them.
-            dense_param_quantize_kwargs["data_parallel_group"] = data_parallel_group
-            quantize_param_shard(**dense_param_quantize_kwargs)
+            if has_blockwise_fp8_param:
+                blockwise_fp8_weight_buffers.append(wbuf)
+                if (
+                    sum([wbuf.bucket_index.size for wbuf in blockwise_fp8_weight_buffers])
+                    > BATCH_QUANT_MEMORY_LIMIT_BYTES
+                ):
+                    _batch_quantize_blockwise_fp8_params(
+                        dense_param_quantize_kwargs,
+                        expert_param_quantize_kwargs,
+                        blockwise_fp8_param_buffers,
+                    )
 
-        if len(expert_param_quantize_kwargs["model_params"]) > 0:
-            # If we have FP8 expert parameters, we need to quantize them.
-            expert_param_quantize_kwargs["data_parallel_group"] = expert_data_parallel_group
-            quantize_param_shard(**expert_param_quantize_kwargs)
+        _batch_quantize_blockwise_fp8_params(
+            dense_param_quantize_kwargs, expert_param_quantize_kwargs, blockwise_fp8_param_buffers
+        )
+        _fp8_quantize_params(dense_param_quantize_kwargs, expert_param_quantize_kwargs)
 
     @torch.no_grad()
     def copy_model_weights_to_main_weights(self):
@@ -2640,6 +2813,7 @@ def copy_model_weights_to_main_weights(self):
                 f"Master weight buffer size {mbuf.data.numel()} does not match "
                 f"model weight buffer size {copyin_data.numel()}"
             )
+            # TODO(mxfp8): Make sure it's not a fp8 buf?
             mbuf.data.copy_(copyin_data.data)
 
     def all_gather_parameters(self, async_op: bool = True):
@@ -2657,15 +2831,18 @@ def all_gather_parameters(self, async_op: bool = True):
 
         all_gather_ops = []
         for g in self.parameter_groups:
-            shard = g.model_weight_buffer.get_shard_from_local_buffer()
-            all_gather_handler = torch.distributed.all_gather_into_tensor(
-                output_tensor=g.model_weight_buffer.data,
-                input_tensor=shard,
-                group=g.model_weight_buffer.data_parallel_group,
-                async_op=async_op,
-            )
-            if async_op:
-                all_gather_ops.append(all_gather_handler)
+            for buf in [g.model_weight_buffer, g.transpose_weight_buffer]:
+                if buf is None:
+                    continue
+                shard = buf.get_shard_from_local_buffer()
+                all_gather_handler = torch.distributed.all_gather_into_tensor(
+                    output_tensor=buf.data,
+                    input_tensor=shard,
+                    group=buf.data_parallel_group,
+                    async_op=async_op,
+                )
+                if async_op:
+                    all_gather_ops.append(all_gather_handler)
 
         for op in all_gather_ops:
             op.wait()
@@ -2686,7 +2863,7 @@ def reduce_scatter_gradients(self, async_op: bool = True):
         reduce_scatter_ops = []
         for g in self.parameter_groups:
             gbuf = g.main_grad_buffer
-            if gbuf is not None:
+            if gbuf is None:
                 continue
             scaling_factor = gbuf.gradient_scaling_factor
             reduce_op = gradient_reduce_preprocessing(gbuf.data, scaling_factor, self.ddp_config)
@@ -3136,9 +3313,16 @@ def __init__(
         # Track the status of all-gather operations for each bucket.
         self.param_gather_event_map = {}
         # All buckets are initially deallocated / empty after initialization of ParamAndGradBuffer.
-        self.bucket_status = {i: BucketStatus.EMPTY for i in range(self.buffer.num_buckets)}
+        self.bucket_status = {}
+        for i in range(self.buffer.num_buckets):
+            for bwd in [False, True]:
+                self.bucket_status[self.get_bucket_key(i, bwd)] = BucketStatus.EMPTY
+
         # Track whether each bucket can be deallocated.
-        self.bucket_can_be_released = {i: False for i in range(self.buffer.num_buckets)}
+        self.bucket_can_be_released = {}
+        for i in range(self.buffer.num_buckets):
+            for bwd in [False, True]:
+                self.bucket_can_be_released[self.get_bucket_key(i, bwd)] = False
 
         # Map each bucket to the bucket group it belongs to by enumerated ID.
         # Made to collect a subset of buckets in the same bucket group.
@@ -3163,6 +3347,13 @@ def __init__(
             # all-gather parameters across groups.
             self.outer_fsdp_group_param_gather_stream = torch.cuda.Stream()
 
+    def get_bucket_key(self, bucket_id, bwd):
+        """Get the key for the bucket."""
+        has_transpose_buffer = (
+            self.buffer.parameter_groups[bucket_id].transpose_weight_buffer is not None
+        )
+        return (bucket_id, has_transpose_buffer and bwd)
+
     @property
     def num_buckets(self):
         """Return the number of buckets."""
@@ -3179,10 +3370,11 @@ def reset(self):
                 UserWarning,
             )
             while len(self.param_gather_event_map) > 0:
-                bucket_id = next(iter(self.param_gather_event_map))
-                self.wait_bucket_ready(bucket_id)
+                (bucket_id, bwd) = next(iter(self.param_gather_event_map))
+                self.wait_bucket_ready(bucket_id, bwd)
         for bucket_id in range(self.num_buckets):
-            self.bucket_can_be_released[bucket_id] = True
+            for bwd in [False, True]:
+                self.bucket_can_be_released[self.get_bucket_key(bucket_id, bwd)] = True
         self.recycle_unused_buckets()
 
         assert all([status is BucketStatus.EMPTY for status in self.bucket_status.values()]), (
@@ -3204,6 +3396,7 @@ def all_gather_params(
         suggested_AG_prefetch_size: Optional[int] = None,
         async_param_gather: bool = True,
         outer_fsdp_group_param_gather: bool = False,
+        bwd: bool = False,
     ):
         """All-gather the params. If prefetch is enabled, prefetch next buckets
         in the order of `prefetch_order`.
@@ -3238,7 +3431,7 @@ def all_gather_params(
 
         # Do not release the buckets that are being all-gathered.
         for bucket_id in ag_buckets:
-            self.bucket_can_be_released[bucket_id] = False
+            self.bucket_can_be_released[self.get_bucket_key(bucket_id, bwd)] = False
 
         # If prefetch is enabled, we will add prefetch buckets to ag_buckets.
         if prefetch:
@@ -3310,7 +3503,11 @@ def need_skip_prefetch(bucket_id):
                 bucket_id = next_bucket_id(ag_buckets)
 
         # Only all-gather on buckets that have not been allocated yet.
-        ag_buckets = [i for i in ag_buckets if self.bucket_status[i] == BucketStatus.EMPTY]
+        ag_buckets = [
+            bucket_id
+            for bucket_id in ag_buckets
+            if self.bucket_status[self.get_bucket_key(bucket_id, bwd)] == BucketStatus.EMPTY
+        ]
         if len(ag_buckets) == 0:
             return
 
@@ -3329,6 +3526,7 @@ def need_skip_prefetch(bucket_id):
                 self.ag_stream if self.ag_stream is not None else torch.cuda.current_stream()
             )
             if outer_fsdp_group_param_gather:
+                # TODO(mxfp8): Support hsdp
                 self.outer_fsdp_group_param_gather_stream.wait_stream(torch.cuda.current_stream())
                 with torch.cuda.stream(self.outer_fsdp_group_param_gather_stream):
                     outer_fsdp_group = self.buffer.dist_index.get_outer_fsdp_group()
@@ -3356,12 +3554,13 @@ def need_skip_prefetch(bucket_id):
                     for bucket_id in buckets:
                         # All-gather the module weights from each FSDP buffer shard
                         # into an allocated bucket containing unsharded weights.
-                        self.async_bucket_gather(bucket_id)
+                        self.async_bucket_gather(bucket_id, bwd)
 
             # Replace the parameter all-gather event with coalescing event.
             for bucket_id in buckets:
-                _, mark_bucket_ready_to_use = self.param_gather_event_map[bucket_id]
-                self.param_gather_event_map[bucket_id] = (
+                bucket_key = self.get_bucket_key(bucket_id, bwd)
+                _, mark_bucket_ready_to_use = self.param_gather_event_map[bucket_key]
+                self.param_gather_event_map[bucket_key] = (
                     coalescing_event,
                     mark_bucket_ready_to_use,
                 )
@@ -3369,14 +3568,16 @@ def need_skip_prefetch(bucket_id):
         # Wait for all-gather to finish
         if not async_param_gather:
             for bucket_id in buckets:
-                self.wait_bucket_ready(bucket_id)
+                self.wait_bucket_ready(bucket_id, bwd)
 
-    def wait_bucket_ready(self, bucket_id, empty_ok=False):
+    def wait_bucket_ready(self, bucket_id, bwd, empty_ok=False):
         """Wait for the bucket to be ready."""
-        if self.bucket_status[bucket_id] == BucketStatus.READY_TO_USE:
+        bucket_key = self.get_bucket_key(bucket_id, bwd)
+
+        if self.bucket_status[bucket_key] == BucketStatus.READY_TO_USE:
             # Already ready to use.
             return
-        if self.bucket_status[bucket_id] == BucketStatus.EMPTY:
+        if self.bucket_status[bucket_key] == BucketStatus.EMPTY:
             if empty_ok:
                 return
             # Bucket shouldn't be empty, this implies that the bucket
@@ -3384,48 +3585,64 @@ def wait_bucket_ready(self, bucket_id, empty_ok=False):
             raise ValueError(f"Bucket {bucket_id} is empty.")
 
         # Wait for asynchronous / overlapped NCCL operations to complete.
-        param_gather_event, mark_bucket_ready_to_use = self.param_gather_event_map.pop(bucket_id)
+        param_gather_event, mark_bucket_ready_to_use = self.param_gather_event_map.pop(bucket_key)
         param_gather_event.wait()
         mark_bucket_ready_to_use()
 
     @torch.no_grad()
-    def release_bucket(self, bucket_id: int):
+    def release_bucket(self, bucket_id, bwd):
         """Release the bucket."""
-        if self.bucket_status[bucket_id] == BucketStatus.EMPTY:
+        # TODO(mxfp8): In some cases, there won't be ag before bwd?
+        bucket_key = self.get_bucket_key(bucket_id, bwd)
+
+        if self.bucket_status[bucket_key] == BucketStatus.EMPTY:
             return
 
-        self.wait_bucket_ready(bucket_id, empty_ok=True)
-        if self.bucket_status[bucket_id] == BucketStatus.COMMUNICATING:
+        self.wait_bucket_ready(bucket_id, bwd, empty_ok=True)
+        if self.bucket_status[bucket_key] == BucketStatus.COMMUNICATING:
             raise ValueError(f"Bucket {bucket_id} is communicating.")
 
-        wbuf = self.buffer.parameter_groups[bucket_id].model_weight_buffer
-        wbuf.free_bucket_storage()
-        self.bucket_status[bucket_id] = BucketStatus.EMPTY
+        if bwd and self.buffer.parameter_groups[bucket_id].transpose_weight_buffer is not None:
+            buf = self.buffer.parameter_groups[bucket_id].transpose_weight_buffer
+        else:
+            buf = self.buffer.parameter_groups[bucket_id].model_weight_buffer
+
+        buf.free_bucket_storage()
+        self.bucket_status[bucket_key] = BucketStatus.EMPTY
 
     def recycle_unused_buckets(self):
         """Recycle the unused buckets."""
-        for bucket_id, can_be_released in self.bucket_can_be_released.items():
+        for bucket_key, can_be_released in self.bucket_can_be_released.items():
             if can_be_released:
-                self.release_bucket(bucket_id)
-                self.bucket_can_be_released[bucket_id] = False
+                bucket_id, is_transpose_weight = bucket_key[0], bucket_key[1]
+                self.release_bucket(bucket_id, is_transpose_weight)
+                self.bucket_can_be_released[bucket_key] = False
 
-    def get_fsdp_buffer(self, bucket_id: int) -> DataParallelBuffer:
+    def get_fsdp_buffer(self, bucket_id: int, bwd=False) -> DataParallelBuffer:
         """Get the FSDP buffer with the given bucket ID."""
         param_group = self.buffer.parameter_groups[bucket_id]
         if self.buffer.ddp_config.outer_dp_sharding_strategy != "no_shard":
-            return param_group.hsdp_wbuf
-        return param_group.model_weight_buffer
+            if bwd and param_group.transpose_weight_buffer is not None:
+                raise RuntimeError("Transpose buffer is not supported for HSDP")
+            else:
+                return param_group.hsdp_wbuf
+        if bwd and param_group.transpose_weight_buffer is not None:
+            return param_group.transpose_weight_buffer
+        else:
+            return param_group.model_weight_buffer
 
     @torch.no_grad()
-    def async_bucket_gather(self, bucket_id: int) -> None:
+    def async_bucket_gather(self, bucket_id, bwd) -> None:
         """All-gather the bucket and set the items."""
-        self.bucket_can_be_released[bucket_id] = False
-        if self.bucket_status[bucket_id] != BucketStatus.EMPTY:
+        bucket_key = self.get_bucket_key(bucket_id, bwd)
+
+        self.bucket_can_be_released[bucket_key] = False
+        if self.bucket_status[bucket_key] != BucketStatus.EMPTY:
             return
 
-        self.bucket_status[bucket_id] = BucketStatus.COMMUNICATING
+        self.bucket_status[bucket_key] = BucketStatus.COMMUNICATING
 
-        wbuf = self.get_fsdp_buffer(bucket_id)
+        wbuf = self.get_fsdp_buffer(bucket_id, bwd)
 
         # Lazy release the unused buckets.
         self.recycle_unused_buckets()
@@ -3440,18 +3657,21 @@ def async_bucket_gather(self, bucket_id: int) -> None:
             async_op=True,
         )
 
-        def get_closure(bucket_id):
+        def get_closure(bucket_id, bwd):
             @torch.no_grad()
             def mark_bucket_ready_to_use():
                 # Mark the bucket as ready to use - all NCCL operations are complete.
-                self.bucket_status[bucket_id] = BucketStatus.READY_TO_USE
+                self.bucket_status[self.get_bucket_key(bucket_id, bwd)] = BucketStatus.READY_TO_USE
 
             return mark_bucket_ready_to_use
 
-        mark_bucket_ready_to_use = get_closure(bucket_id)
+        mark_bucket_ready_to_use = get_closure(bucket_id, bwd)
 
         # Track the async all-gather operation for the bucket.
-        self.param_gather_event_map[bucket_id] = (param_gather_event, mark_bucket_ready_to_use)
+        self.param_gather_event_map[self.get_bucket_key(bucket_id, bwd)] = (
+            param_gather_event,
+            mark_bucket_ready_to_use,
+        )
 
 
 @torch.no_grad()
@@ -3544,15 +3764,13 @@ def override_sharded_param_methods_with_safety_checks(params, all_gather_pipelin
 
         def override_sharded_param_to_function_closure(p, to_function):
             def override_sharded_param_to_function(*args, **kwargs):
-                bucket_id = all_gather_pipeline.buffer.param_to_param_group[p]
-                status = all_gather_pipeline.bucket_status[bucket_id]
-                if status == BucketStatus.READY_TO_USE:
-                    return to_function(*args, **kwargs)
-                raise RuntimeError(
-                    "This parameter is already shard by MCore FSDP and the "
-                    "shared-state parameter does not support 'to' function."
-                    "please define the dtype and device of the parameter before FSDP wrap."
-                )
+                if p._typed_storage()._size() == 0:
+                    warnings.warn(
+                        "The parameter may be sharded by Megatron-FSDP, "
+                        "no actual 'to' operation is performed."
+                    )
+                    return torch.empty([])
+                return to_function(*args, **kwargs)
 
             return override_sharded_param_to_function
 
@@ -3560,15 +3778,13 @@ def override_sharded_param_to_function(*args, **kwargs):
 
         def override_sharded_param_cpu_function_closure(p, cpu_function):
             def override_sharded_param_cpu_function(*args, **kwargs):
-                bucket_id = all_gather_pipeline.buffer.param_to_param_group[p]
-                status = all_gather_pipeline.bucket_status[bucket_id]
-                if status == BucketStatus.READY_TO_USE:
-                    return cpu_function(*args, **kwargs)
-                warnings.warn(
-                    "The parameters are sharded by MCore FSDP, and no actual cpu "
-                    "operation is performed."
-                )
-                return torch.empty([], device="cpu")
+                if p._typed_storage()._size() == 0:
+                    warnings.warn(
+                        "The parameter may be sharded by Megatron-FSDP, "
+                        "no actual 'cpu' operation is performed."
+                    )
+                    return torch.empty([], device="cpu")
+                return cpu_function(*args, **kwargs)
 
             return override_sharded_param_cpu_function
 
diff --git a/megatron/core/distributed/fsdp/src/megatron_fsdp/utils.py b/megatron/core/distributed/fsdp/src/megatron_fsdp/utils.py
index e3e9996335e..01523929ae1 100644
--- a/megatron/core/distributed/fsdp/src/megatron_fsdp/utils.py
+++ b/megatron/core/distributed/fsdp/src/megatron_fsdp/utils.py
@@ -19,7 +19,7 @@
 from contextlib import nullcontext
 from functools import reduce
 from importlib.metadata import version
-from typing import Callable, List, Optional, Sequence, Union
+from typing import Callable, Optional, Sequence, Union
 
 try:
     import einops
@@ -78,52 +78,6 @@ def is_te_min_version(vers, check_equality=True):
     return te_version > PkgVersion(vers)
 
 
-# Check if Transformer Engine has class for fp8 tensors.
-try:
-    if is_te_min_version("2.0"):
-        # In TE2.x, QuantizedTensor is the base class for all different type of fp8 tensors,
-        # including fp8 tensor for delayed scaling, current scaling and mxfp8, etc.
-        from transformer_engine.pytorch.tensor import QuantizedTensor as FP8_TENSOR_CLASS
-    else:
-        from transformer_engine.pytorch.float8_tensor import Float8Tensor as FP8_TENSOR_CLASS
-
-    HAVE_TE_FP8_TENSOR_CLASS = True
-except (ImportError, ModuleNotFoundError):
-    # FP8 tensor class not found
-    HAVE_TE_FP8_TENSOR_CLASS = False
-
-try:
-    from transformer_engine.pytorch.optimizers import multi_tensor_applier, multi_tensor_scale
-
-    multi_tensor_scale_impl = multi_tensor_scale
-except ImportError:
-    try:
-        import amp_C
-        from apex.multi_tensor_apply import multi_tensor_applier
-
-        multi_tensor_scale_impl = amp_C.multi_tensor_scale
-    except ImportError:
-        import warnings
-
-        warnings.warn(
-            "Transformer Engine and Apex are not installed. "
-            "Falling back to local implementations of "
-            "multi_tensor_applier and multi_tensor_scale"
-        )
-
-        def local_multi_tensor_applier(op, noop_flag_buffer, tensor_lists, *args):
-            """Multi tensor op applier"""
-            return op(2048 * 32, noop_flag_buffer, tensor_lists, *args)
-
-        def local_multi_tensor_scale(chunk_size, noop_flag, tensor_lists, scale):
-            """Works as a drop-in replacement for amp_C.multi_tensor_scale."""
-            for src, dst in zip(tensor_lists[0], tensor_lists[1]):
-                dst.copy_(src * scale)
-
-        multi_tensor_applier = local_multi_tensor_applier
-        multi_tensor_scale_impl = local_multi_tensor_scale
-
-
 def is_submodule(module, parent_module, strict=True):
     """
     Check if a module is a submodule of another module.
@@ -137,18 +91,6 @@ def is_submodule(module, parent_module, strict=True):
     return False
 
 
-def is_float8tensor(tensor: torch.Tensor) -> bool:
-    """Check if a tensor is a Transformer Engine Float8Tensor.
-
-    Note that in TE2.x, in order to support more recipes, the design of the fp8 tensor class has
-    changed. Now Float8Tensor is only used for current scaling and delayed scaling. And mxfp8
-    and blockwise scaling have their own fp8 tensor classes. These different fp8 tensor classes
-    are both inherited from QuantizedTensor. So, for TE1.x, FP8_TENSOR_CLASS is Float8Tensor,
-    and for TE2.x, FP8_TENSOR_CLASS is QuantizedTensor.
-    """
-    return HAVE_TE_FP8_TENSOR_CLASS and isinstance(tensor, FP8_TENSOR_CLASS)
-
-
 def get_mesh_names(
     device_mesh: Optional[DeviceMesh] = None, only_submesh_dims: bool = False
 ) -> list[str]:
@@ -210,198 +152,6 @@ def contains_submesh(
     return all(submesh_name in device_mesh_names for submesh_name in submesh_names)
 
 
-def _multi_tensor_copy_this_to_that(
-    this: List[torch.Tensor], that: List[torch.Tensor], overflow_buf: Optional[torch.Tensor] = None
-):
-    """
-    Use multi-tensor-applier to copy values from one list to another.
-    We don't have a bfloat16 implementation so for now if the overflow_buf
-    is not provided, we default back to simple loop copy to be compatible
-    with bfloat16.
-    """
-    if overflow_buf is not None:
-        overflow_buf.fill_(0)
-        # Scaling with factor `1.0` is equivalent to copy.
-        multi_tensor_applier(multi_tensor_scale_impl, overflow_buf, [this, that], 1.0)
-    else:
-        for this_, that_ in zip(this, that):
-            that_.copy_(this_)
-
-
-"""
-The code below abstracts the functionalities needed for implementing "--fp8-param-gather" into
-several functions. It provides different implementations for each function based on different
-versions of TE, ensuring compatibility across various TE versions.
-
-Currently, there are three functions:
-    - modify_underlying_storage
-        This function is used in DDP to place all parameters into a contiguous buffer. For
-        non-fp8 tensors, replacing their data is simple, just using code like
-        "tensor.data = new_data". However, for fp8 tensors, their raw data is not stored in the
-        ".data" attribute, and it varies with different TE versions and different recipes. This
-        function provides a unified interface to replace the underlying storage of a fp8 tensor.
-    - quantize_param_shard
-        This function is used in dist-opt to cast fp32 main params to fp8 params. For non-fp8
-        params, this casting is as simple as "bf16_params.copy_(fp32_main_params)"; but for fp8
-        params, the casting logic varies with different TE versions and different recipes. This
-        function provides a unified interface to cast fp32 main params to fp8 params, and also
-        updates the necessary attributes (like amax, scale, scale_inv or transpose cache) of the
-        fp8 model params.
-    - correct_amax_history_if_needed
-        This function is used to correct the amax history of fp8 tensors. In TE1.x, some inplace
-        copy operations will write unwanted values to the amax_history of fp8 tensors. This function
-        corrects the amax_history back. For TE2.x, it's an empty function.
-        Only useful for delayed scaling.
-"""
-if HAVE_TE and is_te_min_version("2.2"):
-    # Supported TE versions: 2.2+
-    from transformer_engine.pytorch.tensor import QuantizedTensor
-
-    def _modify_underlying_storage_impl(
-        fp8_tensor: QuantizedTensor, new_raw_data: torch.Tensor
-    ) -> None:
-        from transformer_engine.pytorch.tensor.utils import replace_raw_data
-
-        replace_raw_data(fp8_tensor, new_raw_data)
-
-    def _quantize_param_shard_impl(
-        model_params: List[QuantizedTensor],
-        main_params: List[torch.Tensor],
-        start_offsets: List[int],
-        data_parallel_group: ProcessGroup,
-        fsdp_shard_model_params: Optional[List[torch.Tensor]] = None,
-    ) -> None:
-        if len(model_params) == 0:
-            return
-
-        from transformer_engine.pytorch.tensor.utils import cast_master_weights_to_fp8
-
-        args = [model_params, main_params, start_offsets, data_parallel_group]
-        if fsdp_shard_model_params is not None:
-            if get_te_version() == PkgVersion("2.3.0.dev0+5fdd7bb") or is_te_min_version("2.3.0"):
-                args.append(fsdp_shard_model_params)
-            else:
-                raise NotImplementedError(
-                    f"FSDP with --fp8-param-gather is not supported in TE v{get_te_version()}"
-                )
-        cast_master_weights_to_fp8(*args)
-
-elif HAVE_TE and is_te_min_version("2.0"):
-    # Supported TE versions: 2.0
-    from transformer_engine.pytorch.tensor import QuantizedTensor
-    from transformer_engine.pytorch.tensor.float8_tensor import Float8Tensor
-
-    def _modify_underlying_storage_impl(
-        fp8_tensor: QuantizedTensor, new_raw_data: torch.Tensor
-    ) -> None:
-        old_raw_data = fp8_tensor._data
-        assert old_raw_data.dtype == new_raw_data.dtype
-        new_raw_data.detach().copy_(old_raw_data)
-        fp8_tensor._data = new_raw_data
-        del old_raw_data
-
-    def _quantize_param_shard_impl(
-        model_params: List[QuantizedTensor],
-        main_params: List[torch.Tensor],
-        start_offsets: List[int],
-        data_parallel_group: ProcessGroup,
-        fsdp_shard_model_params: Optional[List[torch.Tensor]] = None,
-    ) -> None:
-        if len(model_params) == 0:
-            return
-
-        if fsdp_shard_model_params is None:
-            fsdp_shard_model_params = [None] * len(model_params)
-
-        for model_param, main_param, start_offset, fsdp_shard_model_param in zip(
-            model_params, main_params, start_offsets, fsdp_shard_model_params
-        ):
-            if main_param is None:
-                continue
-
-            if fsdp_shard_model_param is not None:
-                shard_model_param = fsdp_shard_model_param
-            else:
-                shard_model_param = model_param._data.view(-1)[
-                    start_offset : start_offset + main_param.numel()
-                ]
-
-            quantizer = model_param._quantizer
-            # When not using --fp8-param-gather, the main_param (fp32) is first cast to bf16/fp16,
-            # and then cast to fp8 during forward.
-            # Although it's not necessary when --fp8-param-gather is enabled, we still keep this
-            # logic to keep numerical consistency. So here cast the main_param to model_param.dtype.
-            main_param = main_param.to(model_param.dtype)
-            out = Float8Tensor(
-                shape=main_param.size(),
-                dtype=model_param.dtype,
-                requires_grad=False,
-                data=shard_model_param,
-                fp8_scale_inv=model_param._scale_inv,
-                fp8_dtype=model_param._fp8_dtype,
-                quantizer=quantizer,
-            )
-            quantizer.update_quantized(main_param, out)
-
-        amaxes = []
-        scales = []
-        scale_invs = []
-        for model_param in model_params:
-            quantizer = model_param._quantizer
-            amaxes.append(quantizer.amax.view(1))
-            scales.append(quantizer.scale.view(1))
-            scale_invs.append(model_param._scale_inv.view(1))
-            model_param._reset_caches()
-
-        dummy_overflow_buf = torch.tensor([0], dtype=torch.int, device="cuda")
-
-        # Update scaling factors.
-        packed_scales = torch.empty(len(scales), dtype=torch.float32, device=scales[0].device)
-        packed_scale_views = [packed_scales[i].view(1) for i in range(len(scales))]
-        _multi_tensor_copy_this_to_that(scales, packed_scale_views, dummy_overflow_buf)
-        torch.reciprocal(packed_scales, out=packed_scales)
-        _multi_tensor_copy_this_to_that(packed_scale_views, scale_invs, dummy_overflow_buf)
-
-        # Reduce amaxes.
-        # Note: Assume each param has a separate amax.
-        packed_amaxes = torch.empty(len(amaxes), dtype=torch.float32, device=amaxes[0].device)
-        packed_amax_views = [packed_amaxes[i].view(1) for i in range(len(amaxes))]
-        _multi_tensor_copy_this_to_that(amaxes, packed_amax_views, dummy_overflow_buf)
-        torch.distributed.all_reduce(
-            packed_amaxes, op=torch.distributed.ReduceOp.MAX, group=data_parallel_group
-        )
-        _multi_tensor_copy_this_to_that(packed_amax_views, amaxes, dummy_overflow_buf)
-
-else:
-    # Fallback impl if TE version is invalid or TE is not installed.
-    def _modify_underlying_storage_impl(*args, **kwargs):
-        raise RuntimeError(
-            "Invalid Transformer Engine version for FP8 distributed optimizer, "
-            "please install Transformer Engine 2.0+ or install Megatron-Core"
-        )
-
-    def _quantize_param_shard_impl(*args, **kwargs):
-        raise RuntimeError(
-            "Invalid Transformer Engine version for FP8 distributed optimizer, "
-            "please install Transformer Engine 2.0+ or install Megatron-Core"
-        )
-
-
-def modify_underlying_storage(tensor: torch.Tensor, new_raw_data: torch.Tensor):
-    """Replace the underlying raw data of a tensor with new data."""
-    _modify_underlying_storage_impl(tensor, new_raw_data)
-
-
-def quantize_param_shard(
-    model_params, main_params, start_offsets, data_parallel_group, fsdp_shard_model_params=None
-):
-    """Cast shard fp32 main params to fp8 model params."""
-    assert HAVE_TE, "Transformer Engine is required for quantizing parameters."
-    _quantize_param_shard_impl(
-        model_params, main_params, start_offsets, data_parallel_group, fsdp_shard_model_params
-    )
-
-
 def _get_cuda_rng_state(
     device: Union[int, str, torch.device] = "cuda", clone: bool = False, graph_safe: bool = False
 ) -> torch.Tensor:
diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py
index 9d9bfcd7e90..9aba3a7cb8e 100644
--- a/megatron/training/arguments.py
+++ b/megatron/training/arguments.py
@@ -744,6 +744,13 @@ def validate_args(args, defaults={}):
 
         assert args.ckpt_format == "fsdp_dtensor", \
             "Megatron FSDP only supports fsdp_dtensor checkpoint format"
+        
+        if args.use_megatron_fsdp:
+            args.reuse_grad_buf_for_mxfp8_param_ag = False
+
+    if args.fsdp_manual_registration:
+        assert args.use_megatron_fsdp, "FSDP manual registration is only supported with Megatron FSDP"
+        assert args.nccl_ub, "FSDP manual registration is only supported with nccl-ub option"
 
     # Parameters dtype.
     args.params_dtype = torch.float

From 1ec0beb1eb973058fad8d7a4ab9b6a0699485199 Mon Sep 17 00:00:00 2001
From: Pingtian Li <158665726+Wohox@users.noreply.github.com>
Date: Wed, 7 Jan 2026 04:23:47 +0800
Subject: [PATCH 216/334] [Dev] Partial CUDA Graph support for EP Overlap
 (#2810)

---
 .../common/model_chunk_schedule_plan.py       |  40 +-
 .../core/models/gpt/fine_grained_callables.py | 208 +++---
 megatron/core/pipeline_parallel/schedules.py  | 105 ++++
 megatron/core/pipeline_parallel/utils.py      |   4 +-
 megatron/core/transformer/cuda_graphs.py      |  84 ++-
 megatron/core/transformer/moe/moe_layer.py    |   7 +-
 .../core/transformer/transformer_config.py    |  15 +
 .../core/transformer/transformer_layer.py     |  36 ++
 .../golden_values_dev_dgx_h100.json           | 592 +++++++++---------
 .../model_config.yaml                         |   5 +-
 .../test_cuda_graphed_schedule_chunk_1f1b.py  | 372 +++++++++++
 .../a2a_overlap/test_schedule_layer_1f1b.py   |   2 +-
 tests/unit_tests/a2a_overlap/utils.py         |   1 +
 .../pipeline_parallel/test_schedules.py       |  48 ++
 .../transformer/test_submodule_callables.py   |  16 +-
 15 files changed, 1109 insertions(+), 426 deletions(-)
 create mode 100644 tests/unit_tests/a2a_overlap/test_cuda_graphed_schedule_chunk_1f1b.py

diff --git a/megatron/core/models/common/model_chunk_schedule_plan.py b/megatron/core/models/common/model_chunk_schedule_plan.py
index 07bab1cb486..b8f11ed9d38 100644
--- a/megatron/core/models/common/model_chunk_schedule_plan.py
+++ b/megatron/core/models/common/model_chunk_schedule_plan.py
@@ -17,6 +17,7 @@
     get_comm_stream,
     get_comp_stream,
 )
+from megatron.core.transformer.enums import CudaGraphScope
 
 
 class ModelChunkState:
@@ -37,23 +38,20 @@ class TransformerLayerSchedulePlan:
     mtp post process nodes.
 
     layer (TransformerLayerSchedulePlan)
-    ├── attn (TransformerLayerNode): attention module
-    ├── post_attn (TransformerLayerNode): layernorm -> router -> dispatch preprocess
+    ├── attn (TransformerLayerNode): attention -> router -> dispatch preprocess
     ├── moe_dispatch (TransformerLayerNode): dispatch All2All
     ├── mlp (TransformerLayerNode): mlp module
     ├── moe_combine (TransformerLayerNode): combine All2All
     └── mtp_post_process (PostProcessNode): mtp post process
 
     Note that MTP layer has the same operation and execution order with TransformerLayer regarding
-    post_attn, moe_dispatch, mlp, moe_combine, but contains extra operations in attn and
-    mtp_post_process:
+    moe_dispatch, mlp, moe_combine, but contains extra operations in attn and mtp_post_process:
     * mtp.attn wraps around transformer_layer.attn with extra norm, proj and embedding operations.
     * mtp.mtp_post_process contains output_layer, mtp loss operations, whereas
       transformer_layer.mtp_post_process is empty.
     """
 
     attn = None
-    post_attn = None
     moe_dispatch = None
     mlp = None
     moe_combine = None
@@ -117,7 +115,7 @@ def release_state(self):
     def _build_callable_nodes(self, event, comp_stream, comm_stream, extra_args):
         """
         Builds the callable nodes for the transformer/mtp layer:
-            attn, post_attn, mlp, moe_dispatch and moe_combine, and mtp_post_process.
+            attn, mlp, moe_dispatch and moe_combine, and mtp_post_process.
         """
         from megatron.core.models.gpt.fine_grained_callables import (
             TransformerLayerNode,
@@ -137,16 +135,7 @@ def _build_callable_nodes(self, event, comp_stream, comm_stream, extra_args):
             else isinstance(self.layer.mlp, MoELayer)
         )
 
-        enable_deepep = (
-            self.layer.config.moe_token_dispatcher_type == "flex"
-            and self.layer.config.moe_flex_dispatcher_backend == "deepep"
-        )
-        enable_hybridep = (
-            self.layer.config.moe_token_dispatcher_type == "flex"
-            and self.layer.config.moe_flex_dispatcher_backend == "hybridep"
-        )
-        extra_args["enable_deepep"] = enable_deepep
-        extra_args["enable_hybridep"] = enable_hybridep
+        extra_args["config"] = self.layer.config
         extra_args["is_moe"] = is_moe
         extra_args["delay_wgrad_compute"] = self.layer.config.delay_wgrad_compute
         extra_args["is_mtp"] = is_mtp
@@ -167,7 +156,6 @@ def create_node(stream, module, name):
 
         (
             attn_module,
-            post_attn_module,
             moe_dispatch_module,
             mlp_module,
             moe_combine_module,
@@ -179,11 +167,9 @@ def create_node(stream, module, name):
         self.attn = create_node(comp_stream, attn_module, "attn")
         self.mlp = create_node(comp_stream, mlp_module, "mlp")
         if is_moe:
-            self.post_attn = create_node(comp_stream, post_attn_module, "post_attn")
             self.moe_dispatch = create_node(comm_stream, moe_dispatch_module, "moe_dispatch")
             self.moe_combine = create_node(comm_stream, moe_combine_module, "moe_combine")
         else:
-            self.post_attn = NoopScheduleNode()
             self.moe_dispatch = NoopScheduleNode()
             self.moe_combine = NoopScheduleNode()
 
@@ -194,6 +180,11 @@ def create_node(stream, module, name):
         else:
             self.mtp_post_process = NoopScheduleNode()
 
+        # mlp and combine may receive dgrad from attn, which is managed by cuda graph.
+        if CudaGraphScope.attn in self.config.cuda_graph_scope:
+            self.mlp.manual_grads_release = False
+            self.moe_combine.manual_grads_release = False
+
     def get_fp8_context(self):
         """
         Get the fp8 context for the transformer layer.
@@ -216,8 +207,8 @@ def run(f_layer, b_layer, f_input=None, b_grad=None, is_last_layer_in_bwd=False)
         to maximize parallelism and efficiency.
 
         When f_layer and b_layer are not None, forward and backward pass are overlapped as follows:
-        comm_stream: combine_bwd            | dispatch_fwd->dispatch_bwd  | combine_fwd
-        comp_stream: attn_fwd->post_attn_fwd| mlp_bwd->mlp_bwd_dw->mlp_fwd| post_attn_bwd->attn_bwd
+        comm_stream: combine_bwd | dispatch_fwd->dispatch_bwd  | combine_fwd
+        comp_stream: attn_fwd    | mlp_bwd->mlp_bwd_dw->mlp_fwd| attn_bwd
         For MTP, mtp_post_process_fwd is executed after the combine_fwd in the comp_stream,
         and mtp_post_process_bwd is executed before the combine_bwd in the comp_stream.
 
@@ -240,7 +231,6 @@ def run(f_layer, b_layer, f_input=None, b_grad=None, is_last_layer_in_bwd=False)
         if f_layer is not None:
             with f_layer.get_fp8_context():
                 f_input = f_layer.attn.forward(f_input)
-                f_input = f_layer.post_attn.forward(f_input)
 
         if b_layer is not None:
             b_grad = b_layer.mlp.backward(b_grad)
@@ -254,7 +244,6 @@ def run(f_layer, b_layer, f_input=None, b_grad=None, is_last_layer_in_bwd=False)
             b_grad = b_layer.moe_dispatch.backward(b_grad)
 
         if b_layer is not None and b_layer.config.ep_overlap_early_attn_memory_release:
-            b_grad = b_layer.post_attn.backward(b_grad)
             b_grad = b_layer.attn.backward(b_grad)
 
         if f_layer is not None:
@@ -267,7 +256,6 @@ def run(f_layer, b_layer, f_input=None, b_grad=None, is_last_layer_in_bwd=False)
                 f_input = f_layer.mtp_post_process.forward(f_input)
 
         if b_layer is not None and not b_layer.config.ep_overlap_early_attn_memory_release:
-            b_grad = b_layer.post_attn.backward(b_grad)
             b_grad = b_layer.attn.backward(b_grad)
 
         # Delay the last attn_dw in backward pass (attn_dw of the first layer)
@@ -371,6 +359,10 @@ def __init__(
                 model, self._model_chunk_state, self._event, comp_stream
             )
 
+        # preprocess may receive dgrad from attn, which is managed by cuda graph.
+        if CudaGraphScope.attn in model.config.cuda_graph_scope:
+            self.pre_process.manual_grads_release = False
+
     def _build_layer_schedule_plan(self, module, comp_stream, comm_stream):
         if module is None:
             return
diff --git a/megatron/core/models/gpt/fine_grained_callables.py b/megatron/core/models/gpt/fine_grained_callables.py
index 5913dfaba33..b4879cd1e13 100644
--- a/megatron/core/models/gpt/fine_grained_callables.py
+++ b/megatron/core/models/gpt/fine_grained_callables.py
@@ -6,14 +6,17 @@
 from typing import Optional
 
 import torch
+from torch import Tensor
 
 from megatron.core import tensor_parallel
+from megatron.core.packed_seq_params import PackedSeqParams
 from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
     fine_grained_offloading_group_commit,
     fine_grained_offloading_group_start,
     get_fine_grained_offloading_context,
 )
 from megatron.core.pipeline_parallel.utils import ScheduleNode, make_viewless
+from megatron.core.transformer.enums import CudaGraphScope
 from megatron.core.transformer.module import float16_to_fp32
 from megatron.core.transformer.moe.moe_layer import MoELayer
 from megatron.core.transformer.multi_token_prediction import (
@@ -42,14 +45,13 @@ def wrapped_func(*args, **kwarg):
 
 
 @internal_api
-def should_free_input(name, is_moe, enable_deepep, enable_hybridep):
+def should_free_input(name, is_moe, config):
     """Determine if the node should free its input memory.
 
     Args:
         name: Node name
         is_moe: Whether it's a MoE model
-        enable_deepep: Whether to use DeepEP dispatcher
-        enable_hybridep: Whether to use HybridEP dispatcher
+        config: TransformerConfig object
 
     Returns:
         bool: Whether to free input memory
@@ -57,6 +59,14 @@ def should_free_input(name, is_moe, enable_deepep, enable_hybridep):
     # For dense layers [attn, fake, mlp, fake], the input is needed during backward pass
     if not is_moe:
         return False
+    enable_deepep = (
+        config.moe_token_dispatcher_type == "flex"
+        and config.moe_flex_dispatcher_backend == "deepep"
+    )
+    enable_hybridep = (
+        config.moe_token_dispatcher_type == "flex"
+        and config.moe_flex_dispatcher_backend == "hybridep"
+    )
     # Define which nodes should free input memory
     # Since we split the computing graph into multiple nodes, we can manually control
     # when and how to free the input memory.
@@ -69,7 +79,10 @@ def should_free_input(name, is_moe, enable_deepep, enable_hybridep):
         # and probs before dispatch A2A and it's not needed anymore after the forward pass
         # For DeepEP and HybridEP dispatcher mode, they are both needed in backward pass
         # and cannot be freed.
-        "moe_dispatch": not (enable_deepep or enable_hybridep),
+        # If moe_preprocess is in cuda graph scope, tokens and probs are fixed size tensors,
+        # so they cannot be freed.
+        "moe_dispatch": not (enable_deepep or enable_hybridep)
+        and (CudaGraphScope.moe_preprocess not in config.cuda_graph_scope),
     }
 
     return free_input_nodes.get(name, False)
@@ -239,13 +252,13 @@ def __init__(
             it's the per_batch_state_context, o.w. nullcontext
             name (str): Node name, also used to determine memory strategy
             bwd_dw_callables (list): List of weight gradient functions for the layer.
-            extra_args (dict): Extra arguments for nodes: is_moe, enable_deepep, enable_hybridep.
+            extra_args (dict): Extra arguments for the node: is_moe, config.
         """
         # determine whether to free input memory
+        config = extra_args.get("config", None)
+        assert config is not None, "model config must be passed to TransformerLayerNode."
         is_moe = extra_args.get("is_moe", False)
-        enable_deepep = extra_args.get("enable_deepep", False)
-        enable_hybridep = extra_args.get("enable_hybridep", False)
-        free_input = should_free_input(name, is_moe, enable_deepep, enable_hybridep)
+        free_input = should_free_input(name, is_moe, config)
         self.delay_wgrad_compute = extra_args.get("delay_wgrad_compute", False)
 
         super().__init__(
@@ -310,8 +323,8 @@ def backward_dw(self):
                 module.backward_dw()
 
         # the output grad memory is last used in wgrad compute, should be safe to release.
-        assert self.delay_grads_release, "output grad memory should be valid before wgrad."
-        if self.manual_release_grads:
+        if self.manual_grads_release:
+            assert self.delay_grads_release, "output grad memory should be valid before wgrad."
             for tensor in self.output_grads:
                 tensor.untyped_storage().resize_(0)
         self.output_grads = None
@@ -364,11 +377,101 @@ def build_transformer_layer_callables(layer: TransformerLayer):
         and layer.config.moe_flex_dispatcher_backend == "hybridep"
     )
 
+    class _BackwardDWWrapper:
+        def __init__(self):
+            self.graphed_backward_dw_callable = None
+            self.attn_dw_callable = layer.self_attention.backward_dw
+            if isinstance(layer.mlp, MoELayer):
+                self.shared_expert_dw_callable = partial(
+                    layer.mlp.backward_dw, routed_experts=False, shared_experts=True
+                )
+            else:
+                self.shared_expert_dw_callable = None
+            self.cuda_graph_scope = layer.config.cuda_graph_scope
+
+        def set_graphed_backward_dw_callable(self, graphed_backward_dw_callable):
+            """Store the CUDA graphed backward weight gradient callable."""
+            self.graphed_backward_dw_callable = graphed_backward_dw_callable
+
+        def backward_dw(self):
+            """Execute weight gradients, skipping CUDA graphed components during replay."""
+            is_replay = hasattr(layer, 'cuda_graphs') and layer.cuda_graphs
+            if self.shared_expert_dw_callable is not None and (
+                not is_replay or CudaGraphScope.moe_router not in self.cuda_graph_scope
+            ):
+                self.shared_expert_dw_callable()
+            if not is_replay or CudaGraphScope.attn not in self.cuda_graph_scope:
+                self.attn_dw_callable()
+            if is_replay and self.graphed_backward_dw_callable is not None:
+                self.graphed_backward_dw_callable()
+
+    attn_backward_dw_wrapper = _BackwardDWWrapper()
+
     def submodule_attn_forward(node: ScheduleNode, hidden_states: torch.Tensor):
         """
-        Performs same attnention forward logic as GPT Model.
+        Performs same attnention forward logic as GPT Model and forward pass for
+        computations between attention and dispatch:
+            pre mlp layernorm->router->dispatch preprocess
         """
-        hidden_states, _ = layer._forward_attention(
+
+        if hasattr(layer, 'cuda_graphs') and layer.cuda_graphs:
+            assert (
+                CudaGraphScope.mlp not in layer.config.cuda_graph_scope
+                and CudaGraphScope.moe not in layer.config.cuda_graph_scope
+            ), (
+                "Supported CUDA graph scope with EP overlap: "
+                "attn, moe_router, moe_preprocess, mlp, got {}".format(
+                    layer.config.cuda_graph_scope
+                )
+            )
+            forward_func = layer._te_cuda_graph_replay
+            attn_backward_dw_wrapper.set_graphed_backward_dw_callable(
+                partial(layer.backward_dw_cudagraph, layer.current_microbatch)
+            )
+        else:
+            # wrapper function that keeps consistent api with cuda graph replay
+            def forward_func(
+                hidden_states: Tensor,
+                attention_mask: Optional[Tensor] = None,
+                rotary_pos_emb: Optional[Tensor] = None,
+                rotary_pos_cos: Optional[Tensor] = None,
+                rotary_pos_sin: Optional[Tensor] = None,
+                packed_seq_params: Optional[PackedSeqParams] = None,
+                sequence_len_offset: Optional[Tensor] = None,
+            ):
+                hidden_states, _ = layer._forward_attention(
+                    hidden_states=hidden_states,
+                    attention_mask=attention_mask,
+                    rotary_pos_emb=rotary_pos_emb,
+                    rotary_pos_cos=rotary_pos_cos,
+                    rotary_pos_sin=rotary_pos_sin,
+                    packed_seq_params=packed_seq_params,
+                    sequence_len_offset=sequence_len_offset,
+                )
+                if not isinstance(layer.mlp, MoELayer):
+                    return hidden_states, None, None, None
+                if layer.offload_mlp_norm:
+                    hidden_states = fine_grained_offloading_group_start(
+                        hidden_states, name="mlp_norm"
+                    )
+                if layer.recompute_pre_mlp_layernorm:
+                    layer.pre_mlp_norm_checkpoint = tensor_parallel.CheckpointWithoutOutput()
+                    with get_fine_grained_offloading_context(layer.offload_mlp_norm):
+                        pre_mlp_layernorm_output = layer.pre_mlp_norm_checkpoint.checkpoint(
+                            layer.pre_mlp_layernorm, hidden_states
+                        )
+                else:
+                    with get_fine_grained_offloading_context(layer.offload_mlp_norm):
+                        pre_mlp_layernorm_output = layer.pre_mlp_layernorm(hidden_states)
+
+                shared_expert_output = layer.mlp.shared_experts_compute(pre_mlp_layernorm_output)
+                probs, routing_map = layer.mlp.route(pre_mlp_layernorm_output)
+                local_tokens, probs, _ = layer.mlp.preprocess(
+                    pre_mlp_layernorm_output, probs, routing_map
+                )
+                return hidden_states, local_tokens, probs, shared_expert_output
+
+        hidden_states, local_tokens, probs, shared_expert_output = forward_func(
             hidden_states=hidden_states,
             attention_mask=node.chunk_state.attention_mask,
             rotary_pos_emb=node.chunk_state.rotary_pos_emb,
@@ -377,33 +480,14 @@ def submodule_attn_forward(node: ScheduleNode, hidden_states: torch.Tensor):
             packed_seq_params=node.chunk_state.packed_seq_params,
             sequence_len_offset=node.chunk_state.sequence_len_offset,
         )
-        return hidden_states
-
-    def submodule_post_attn_forward(node: ScheduleNode, hidden_states: torch.Tensor):
-        """
-        Run forward pass for computations between attention and dispatch:
-            pre mlp layernorm->router->dispatch preprocess
-        """
-        if layer.offload_mlp_norm:
-            hidden_states = fine_grained_offloading_group_start(hidden_states, name="mlp_norm")
-        if layer.recompute_pre_mlp_layernorm:
-            layer.pre_mlp_norm_checkpoint = tensor_parallel.CheckpointWithoutOutput()
-            with get_fine_grained_offloading_context(layer.offload_mlp_norm):
-                pre_mlp_layernorm_output = layer.pre_mlp_norm_checkpoint.checkpoint(
-                    layer.pre_mlp_layernorm, hidden_states
-                )
-        else:
-            with get_fine_grained_offloading_context(layer.offload_mlp_norm):
-                pre_mlp_layernorm_output = layer.pre_mlp_layernorm(hidden_states)
-
-        probs, routing_map = layer.mlp.route(pre_mlp_layernorm_output)
-        local_tokens, probs, _ = layer.mlp.preprocess(pre_mlp_layernorm_output, probs, routing_map)
+        if not isinstance(layer.mlp, MoELayer):
+            return hidden_states
 
         # Detach here for mlp_bda residual connection
         node.layer_state.residual = node.detach(hidden_states)
         if layer.mlp.use_shared_expert and not layer.mlp.shared_expert_overlap:
-            # Detach here for shared expert connection
-            node.layer_state.pre_mlp_layernorm_output = node.detach(pre_mlp_layernorm_output)
+            # Detach here for shared expert connection in moe_combine
+            node.layer_state.shared_expert_output = node.detach(shared_expert_output)
 
         return local_tokens, probs
 
@@ -428,7 +512,6 @@ def submodule_moe_forward(node: ScheduleNode, dispatched_tokens: torch.Tensor):
         Run forward pass for computations between dispatch and combine:
             post dispatch->experts->combine preprocess
         """
-        shared_expert_output = None
         dispatched_probs = node.layer_state.dispatched_probs
         token_dispatcher = layer.mlp.token_dispatcher
         if enable_deepep or enable_hybridep:
@@ -436,10 +519,8 @@ def submodule_moe_forward(node: ScheduleNode, dispatched_tokens: torch.Tensor):
             # backward graph from connecting to dispatch submodule
             token_dispatcher._comm_manager.dispatched_probs = dispatched_probs
 
-        pre_mlp_layernorm_output = getattr(node.layer_state, 'pre_mlp_layernorm_output', None)
-        shared_expert_output = layer.mlp.shared_experts_compute(pre_mlp_layernorm_output)
-        expert_output, mlp_bias = layer.mlp.routed_experts_compute(
-            dispatched_tokens, dispatched_probs, pre_mlp_layernorm_output
+        expert_output, _ = layer.mlp.routed_experts_compute(
+            dispatched_tokens, dispatched_probs, None
         )
 
         if layer.recompute_pre_mlp_layernorm:
@@ -449,16 +530,10 @@ def submodule_moe_forward(node: ScheduleNode, dispatched_tokens: torch.Tensor):
         # release tensor reference after use
         node.layer_state.dispatched_probs = None
         node.layer_state.pre_mlp_layernorm_output = None
-        if shared_expert_output is None:
-            # Return only expert_output, since shared_expert_output causes backward on None
-            return expert_output
-        return expert_output, shared_expert_output
-
-    def submodule_combine_forward(
-        node: ScheduleNode,
-        output: torch.Tensor,
-        shared_expert_output: Optional[torch.Tensor] = None,
-    ):
+
+        return expert_output
+
+    def submodule_combine_forward(node: ScheduleNode, output: torch.Tensor):
         """
         # Triggers token combine and the remaining computation in the transformer layer.
         # The `mlp_bda` computation is placed after `mlp.combine` due to data dependency.
@@ -468,10 +543,11 @@ def submodule_combine_forward(
         # with another microbatch's computation and expose the communication.
         """
         residual = node.layer_state.residual
-
+        shared_expert_output = getattr(node.layer_state, 'shared_expert_output', None)
         output = layer.mlp.combine(output, shared_expert_output)
         mlp_output_with_bias = (output, None)
-
+        if hasattr(layer, 'cuda_graphs') and layer.cuda_graphs:
+            layer.mlp.cudagraph_tensor_store.clear()
         with layer.bias_dropout_add_exec_handler():
             hidden_states = layer.mlp_bda(layer.training, layer.config.bias_dropout_fusion)(
                 mlp_output_with_bias, residual, layer.hidden_dropout
@@ -507,13 +583,12 @@ def raise_not_implemented(*args):
 
     # Build forward and backward callable functions
     attn_func = submodule_attn_forward
-    post_attn_func = submodule_post_attn_forward if is_moe else raise_not_implemented
     dispatch_func = submodule_dispatch_forward if is_moe else raise_not_implemented
     mlp_func = submodule_moe_forward if is_moe else mlp_wrapper
     combine_func = submodule_combine_forward if is_moe else raise_not_implemented
 
-    forward_funcs = [attn_func, post_attn_func, dispatch_func, mlp_func, combine_func, None]
-    backward_dw = {"attn": layer.self_attention, "mlp": layer.mlp}
+    forward_funcs = [attn_func, dispatch_func, mlp_func, combine_func, None]
+    backward_dw = {"attn": attn_backward_dw_wrapper, "mlp": layer.mlp}
     return forward_funcs, backward_dw
 
 
@@ -525,9 +600,7 @@ def build_mtp_layer_callables(layer):
     """
 
     forward_funcs, backward_dw = build_transformer_layer_callables(layer.transformer_layer)
-    attn_forward, post_attn_forward, dispatch_forward, mlp_forward, combine_forward, _ = (
-        forward_funcs
-    )
+    attn_forward, dispatch_forward, mlp_forward, combine_forward, _ = forward_funcs
     is_moe = isinstance(layer.transformer_layer.mlp, MoELayer)
     assert is_moe, "MTP layer in a2a overlap only supports MoE layer for now."
 
@@ -588,24 +661,17 @@ def rng_context_wrapper(func, *args, **kwargs):
     # Build forward and backward callable functions
     # attn_forward already has rng context, no need to wrap
     attn_func = submodule_mtp_attn_forward
-    post_attn_func = partial(rng_context_wrapper, post_attn_forward)
     dispatch_func = partial(rng_context_wrapper, dispatch_forward)
     mlp_func = partial(rng_context_wrapper, mlp_forward)
     combine_func = partial(rng_context_wrapper, combine_forward)
     mtp_post_process_func = submodule_mtp_postprocess_forward
 
-    forward_funcs = [
-        attn_func,
-        post_attn_func,
-        dispatch_func,
-        mlp_func,
-        combine_func,
-        mtp_post_process_func,
-    ]
-    backward_dw = {
-        "attn": [layer.transformer_layer.self_attention, layer.eh_proj],
-        "mlp": layer.transformer_layer.mlp,
-    }
+    forward_funcs = [attn_func, dispatch_func, mlp_func, combine_func, mtp_post_process_func]
+    if isinstance(backward_dw["attn"], list):
+        backward_dw["attn"].append(layer.eh_proj)
+    else:
+        backward_dw["attn"] = [backward_dw["attn"], layer.eh_proj]
+
     return forward_funcs, backward_dw
 
 
diff --git a/megatron/core/pipeline_parallel/schedules.py b/megatron/core/pipeline_parallel/schedules.py
index a35ccac504a..9dc79ed11f7 100644
--- a/megatron/core/pipeline_parallel/schedules.py
+++ b/megatron/core/pipeline_parallel/schedules.py
@@ -2,6 +2,7 @@
 
 import contextlib
 from functools import partial
+from itertools import zip_longest
 from typing import Callable, Iterator, List, Optional, Union
 
 import torch
@@ -845,6 +846,110 @@ def convert_schedule_table_to_order(num_warmup_microbatches, num_model_chunks, s
     return order
 
 
+def get_overlap_moe_expert_parallel_comm_order(order, num_layers_per_chunk, capture_wgrad_graph):
+    """
+    This functions gets the order for overlap_moe_expert_parallel_comm schedule for the original
+    chunk-wise order list. Each chunk is transformered to chunks with only 1 layer so that
+    layers between 2 chunks can now overlap with each other while following the graph order.
+    If capture_wgrad_graph is True, the wgrad backward graph is also added to the order by
+    decreasing the layer id by 0.5.
+
+    Args:
+        order (List[int]): The original chunk-wise order list. Positive values represent forward
+            passes for chunks, negative values represent backward passes. The absolute value
+            indicates the chunk ID (1-indexed).
+        num_layers_per_chunk (List[int]): Number of graphable layers in each chunk. The length
+            of this list equals the number of chunks.
+        capture_wgrad_graph (bool): If True, weight gradient computation graphs are added to the
+            order by appending entries with layer_id - 0.5.
+
+    Returns:
+        Tuple[List[float], List[Optional[List[int]]]]: A tuple containing:
+            - new_order: The layer-wise order list where each chunk is expanded to individual
+              layers. Positive values are forward passes, negative values are backward passes.
+              Values with .5 suffix indicate weight gradient computations.
+            - chunk_id_list: A list parallel to new_order. For forward passes, contains
+              [chunk_id, layer_index_within_chunk]. For backward passes, contains None.
+
+    Example:
+        original_order: [1, 2, -2, 1, -1, -1]
+        num_layers_per_chunk: [1, 2]
+        capture_wgrad_graph=True:
+            new_order: [1, 2, 3, 1, -3, -3.5, -2, -2.5, -1, -1.5, -1, -1.5]
+            chunk_id_list: [[0, 0], [1, 0], [1, 1], [0, 0], None,
+                            None, None, None, None, None, None, None]
+        capture_wgrad_graph=False:
+            new_order: [1, 2, 3, 1, -3, -2, -1, -1]
+            chunk_id_list: [[0, 0], [1, 0], [1, 1], [0, 0], None, None, None, None]
+    """
+
+    def _add_order(new_order, chunk_id_list, c_id, layer_id, is_wgrad=False, index=None):
+        if is_wgrad:
+            new_order.append(layer_id - 0.5)
+        else:
+            new_order.append(layer_id)
+        if c_id > 0:
+            chunk_id_list.append([abs(c_id) - 1, index])
+        else:
+            chunk_id_list.append(None)
+
+    new_order = []
+    chunk_id_list = []
+    add_order = partial(_add_order, new_order, chunk_id_list)
+    first_backward_idx, last_forward_idx = None, None
+    for idx, c_id in enumerate(order):
+        if first_backward_idx is None and c_id < 0:
+            first_backward_idx = idx
+        if c_id > 0:
+            last_forward_idx = idx
+
+    def get_layer_range(c_id):
+        num_layers = num_layers_per_chunk[abs(c_id) - 1]
+        num_layers_previous_chunks = sum(num_layers_per_chunk[: abs(c_id) - 1])
+        if c_id > 0:
+            return list(
+                range(num_layers_previous_chunks + 1, num_layers_previous_chunks + num_layers + 1)
+            )
+        return list(range(-num_layers_previous_chunks - num_layers, -num_layers_previous_chunks))
+
+    # warmup stage
+    for c_id in order[:first_backward_idx]:
+        layer_range = get_layer_range(c_id)
+        new_order += layer_range
+        chunk_id_list.extend([abs(c_id) - 1, i] for i in range(len(layer_range)))
+
+    # 1f1b overlap stage
+    if first_backward_idx < last_forward_idx:
+        for c_id_b, c_id_f in zip(
+            order[first_backward_idx : last_forward_idx + 1 : 2],
+            order[first_backward_idx + 1 : last_forward_idx + 1 : 2],
+        ):
+            layer_range_f = get_layer_range(c_id_f)
+            layer_range_b = get_layer_range(c_id_b)
+            index = 0
+            for l_b, l_f in zip_longest(layer_range_b, layer_range_f, fillvalue=0):
+                # always forward graph before backward graph
+                if l_f != 0:
+                    add_order(c_id_f, l_f, index=index)
+                if l_b != 0:
+                    add_order(c_id_b, l_b)
+                    if capture_wgrad_graph and index < len(layer_range_b) - 1:
+                        add_order(c_id_b, l_b, is_wgrad=True)
+                index += 1
+            # last wgrad backward
+            if capture_wgrad_graph and layer_range_b:
+                add_order(c_id_b, layer_range_b[-1], is_wgrad=True)
+
+    # cool down stage, backward graphs only
+    for c_id in order[last_forward_idx + 1 :]:
+        for l_b in get_layer_range(c_id):
+            add_order(c_id, l_b)
+            if capture_wgrad_graph:
+                add_order(c_id, l_b, is_wgrad=True)
+
+    return new_order, chunk_id_list
+
+
 def forward_backward_pipelining_with_interleaving(
     *,
     forward_step_func,
diff --git a/megatron/core/pipeline_parallel/utils.py b/megatron/core/pipeline_parallel/utils.py
index e7e416f99bd..d38f6d702c0 100644
--- a/megatron/core/pipeline_parallel/utils.py
+++ b/megatron/core/pipeline_parallel/utils.py
@@ -182,8 +182,8 @@ def __init__(
         self.free_input = free_input
         self.inputs = None
         self.outputs = None
+        self.manual_grads_release = False
         self.delay_grads_release = False
-        self.manual_release_grads = False
 
     def default_backward_func(self, outputs, output_grad):
         """Default backward function"""
@@ -269,7 +269,7 @@ def _backward(self, *output_grad):
                     # to avoid delayed garbage collection. If
                     # delay_grads_release is True, dgrad is last used in
                     # wgrad compute and skip the release here.
-                    if self.manual_release_grads and not self.delay_grads_release:
+                    if self.manual_grads_release and not self.delay_grads_release:
                         g.untyped_storage().resize_(0)
 
         grads = self.get_grad()
diff --git a/megatron/core/transformer/cuda_graphs.py b/megatron/core/transformer/cuda_graphs.py
index 27e6c65c738..b566c1830dc 100644
--- a/megatron/core/transformer/cuda_graphs.py
+++ b/megatron/core/transformer/cuda_graphs.py
@@ -10,6 +10,7 @@
 from contextlib import nullcontext
 from dataclasses import fields, is_dataclass
 from enum import Enum
+from math import ceil
 from typing import Any, Dict, List, Optional
 
 import torch
@@ -1510,7 +1511,7 @@ def graphs_created(self):
         """
         return self._graphs_created
 
-    def _get_sample_arguments(self, order):
+    def _get_sample_arguments(self, order, chunk_id_list=None):
         """
         Generate sample arguments and keyword arguments for CUDA Graph capturing with
         memory-optimized buffer reuse.
@@ -1539,6 +1540,9 @@ def _get_sample_arguments(self, order):
             order (List[int]): The forward/backward execution order from
                 convert_schedule_table_to_order(). Positive integers represent forward passes
                 (1-indexed chunk ID), negative integers represent backward passes.
+            chunk_id_list (List[Tuple[int, int]]): The list of chunk IDs and layer IDs in the
+                order. This is useful only when overlap_moe_expert_parallel_comm is enabled,
+                the order maps each layers' idx to their original chunk id.
 
         Returns:
             Tuple[List[Tuple], List[Dict]]: A tuple containing:
@@ -1560,9 +1564,11 @@ def _get_sample_arguments(self, order):
         assert self.num_model_chunks == max(
             order
         ), "num_model_chunks must match the max chunk id in order."
-        assert (
-            self.num_microbatches == len(order) // self.num_model_chunks // 2
-        ), "num_microbatches must match the number of microbatches in order."
+        if chunk_id_list is None:
+            # check only if 1f1b overlap is disabled.
+            assert (
+                self.num_microbatches == len(order) // self.num_model_chunks // 2
+            ), "num_microbatches must match the number of microbatches in order."
 
         # Generate sample arguments and keyword arguments for capturing.
         sample_args = [None] * (len(self.flattened_callables) * self.num_microbatches)
@@ -1645,8 +1651,8 @@ def get_rotary_pos_emb(transformer_module, transformer_input):
         consumed_sample_queue = {}
         layer_sample_keys_cache = {}
         fwd_idx = [0] * self.num_model_chunks
-        for chunk_id in order:
-            model_chunk_idx = abs(chunk_id) - 1
+        for idx, chunk_id in enumerate(order):
+            model_chunk_idx = abs(ceil(chunk_id)) - 1
 
             if chunk_id > 0:
                 if model_chunk_idx not in fwd_sample_queues:
@@ -1655,7 +1661,14 @@ def get_rotary_pos_emb(transformer_module, transformer_input):
                 sample_start_idx = (prefix_num_layers[model_chunk_idx] * self.num_microbatches) + (
                     fwd_idx[model_chunk_idx] * self.num_layers_per_chunk[model_chunk_idx]
                 )
-                for layer_idx, layer in enumerate(self.callables_per_chunk[model_chunk_idx]):
+                if chunk_id_list:
+                    model_chunk_idx = chunk_id_list[idx][0]
+                    callables_curr_chunk = [
+                        self.callables_per_chunk[model_chunk_idx][chunk_id_list[idx][1]]
+                    ]
+                else:
+                    callables_curr_chunk = self.callables_per_chunk[model_chunk_idx]
+                for layer_idx, layer in enumerate(callables_curr_chunk):
                     per_callable_fwd_idx = sample_start_idx + layer_idx
 
                     # Get sample_args and sample_kwargs for index per_callable_fwd_idx.
@@ -1692,7 +1705,7 @@ def get_rotary_pos_emb(transformer_module, transformer_input):
                         # reuse the static inputs of a previous forward pass for this forward pass.
                         # If not, we still need to generate the new static inputs.
                         sample_keys = layer_sample_keys_cache[id(layer)]
-
+                    model_chunk_idx = abs(chunk_id) - 1
                     fwd_sample_queues[model_chunk_idx].append((sample_keys, per_callable_fwd_idx))
                     if consumed_sample_queue.get(sample_keys, []):
                         # We can reuse the static inputs of a previous forward pass for this
@@ -1714,13 +1727,16 @@ def get_rotary_pos_emb(transformer_module, transformer_input):
                         # Unfortunately, no previous static inputs are available for reuse,
                         # sample_args is still None. Last attempt: generate the new static inputs
                         # for this forward pass.
+                        if chunk_id_list:
+                            model_chunk_idx = chunk_id_list[idx][0]
                         sample_args[per_callable_fwd_idx], sample_kwargs[per_callable_fwd_idx] = (
                             _get_layer_static_inputs(
                                 layer, self.chunks_with_decoder[model_chunk_idx]
                             )
                         )
+                        model_chunk_idx = abs(chunk_id) - 1
                 fwd_idx[model_chunk_idx] += 1
-            else:
+            elif ceil(chunk_id) == chunk_id:
                 num_consumed_samples = min(
                     len(fwd_sample_queues[model_chunk_idx]),
                     self.num_layers_per_chunk[model_chunk_idx],
@@ -1734,6 +1750,9 @@ def get_rotary_pos_emb(transformer_module, transformer_input):
                 fwd_sample_queues[model_chunk_idx] = fwd_sample_queues[model_chunk_idx][
                     num_consumed_samples:
                 ]
+            else:
+                # skip register static inputs for wgrad backward graphs
+                continue
 
         return sample_args, sample_kwargs
 
@@ -1746,12 +1765,16 @@ def _get_cuda_graph_input_data(self):
         # Get the PP and VPP scheduling order.
         from megatron.core.pipeline_parallel.schedules import (
             convert_schedule_table_to_order,
+            get_overlap_moe_expert_parallel_comm_order,
             get_pp_rank_microbatches,
             get_schedule_table,
         )
 
         # If PP is not enabled, we only need to capture one microbatch.
-        if parallel_state.get_pipeline_model_parallel_world_size() == 1:
+        if (
+            parallel_state.get_pipeline_model_parallel_world_size() == 1
+            and not self.config.overlap_moe_expert_parallel_comm
+        ):
             assert (
                 self.num_model_chunks == 1
             ), "If PP is not enabled, there should be only one model chunk."
@@ -1780,9 +1803,36 @@ def _get_cuda_graph_input_data(self):
             level=logging.DEBUG,
             msg=f'Rank {torch.distributed.get_rank()}: ORDER {order}',
         )
+        chunk_id_list = None
+        if self.config.overlap_moe_expert_parallel_comm:
+            wgrad_in_graph_scope = CudaGraphScope.attn in self.config.cuda_graph_scope or (
+                CudaGraphScope.moe_router in self.config.cuda_graph_scope
+                and self.config.moe_shared_expert_intermediate_size is not None
+                and not self.config.moe_shared_expert_overlap
+            )
+            capture_wgrad_graph = self.config.delay_wgrad_compute and wgrad_in_graph_scope
+            order, chunk_id_list = get_overlap_moe_expert_parallel_comm_order(
+                order, self.num_layers_per_chunk, capture_wgrad_graph
+            )
+            self.num_layers_per_chunk = [1] * sum(self.num_layers_per_chunk)
+            self.num_model_chunks = max(order)
+            _order_without_wgrad = []
+            for c_id in order:
+                if ceil(c_id) != c_id:
+                    continue
+                _order_without_wgrad.append(c_id)
+            self.num_microbatches = len(_order_without_wgrad) // self.num_model_chunks // 2
+            log_on_each_pipeline_stage(
+                logger=logger,
+                tp_group=None,
+                dp_cp_group=None,
+                level=logging.DEBUG,
+                msg=f'Rank {torch.distributed.get_rank()}: '
+                f'ORDER after overlap_moe_expert_parallel_comm {order}',
+            )
 
         # Generate sample arguments and keyword arguments for capturing.
-        sample_args, sample_kwargs = self._get_sample_arguments(order)
+        sample_args, sample_kwargs = self._get_sample_arguments(order, chunk_id_list)
 
         def get_make_graphed_callables_kwargs():
             kwargs = {'allow_unused_input': True, '_order': order}
@@ -1920,13 +1970,17 @@ def create_cudagraphs(self):
             for layer_number, layer in enumerate(layers):
                 layer.cuda_graphs = []
                 for batch_number in range(self.num_microbatches):
-                    layer.cuda_graphs.append(
-                        graphs[
+                    if self.config.overlap_moe_expert_parallel_comm:
+                        graph_idx = (
+                            num_layers_accumulated + layer_number
+                        ) * self.num_microbatches + batch_number
+                    else:
+                        graph_idx = (
                             num_layers_accumulated * self.num_microbatches
                             + batch_number * len(layers)
                             + layer_number
-                        ]
-                    )
+                        )
+                    layer.cuda_graphs.append(graphs[graph_idx])
             num_layers_accumulated += len(layers)
 
         self._finish_capturing(start_time)
diff --git a/megatron/core/transformer/moe/moe_layer.py b/megatron/core/transformer/moe/moe_layer.py
index 3742d064508..e44d8647bd6 100644
--- a/megatron/core/transformer/moe/moe_layer.py
+++ b/megatron/core/transformer/moe/moe_layer.py
@@ -377,10 +377,11 @@ def custom_forward(hidden_states, padding_mask=None):
 
         return outputs
 
-    def backward_dw(self):
+    def backward_dw(self, routed_experts: bool = True, shared_experts: bool = False):
         """Compute weight gradients for experts and shared experts."""
-        self.experts.backward_dw()
-        if self.use_shared_expert and not self.shared_expert_overlap:
+        if routed_experts:
+            self.experts.backward_dw()
+        if shared_experts and self.use_shared_expert and not self.shared_expert_overlap:
             self.shared_experts.backward_dw()
 
     def set_for_recompute_pre_mlp_layernorm(self):
diff --git a/megatron/core/transformer/transformer_config.py b/megatron/core/transformer/transformer_config.py
index 365c7a265eb..3a57f09f6cf 100644
--- a/megatron/core/transformer/transformer_config.py
+++ b/megatron/core/transformer/transformer_config.py
@@ -1869,6 +1869,16 @@ def __post_init__(self):
                     'when enabling overlap_moe_expert_parallel_comm with MTP layer.'
                 )
 
+            if self.cuda_graph_impl != "none":
+                assert (
+                    self.cuda_graph_impl == "transformer_engine"
+                    and CudaGraphScope.moe not in self.cuda_graph_scope
+                    and CudaGraphScope.mlp not in self.cuda_graph_scope
+                ), (
+                    'CUDA graph scope on moe and mlp is not '
+                    'supported with overlap_moe_expert_parallel_comm'
+                )
+
         # Check delay_wgrad_compute compatibility
         if self.delay_wgrad_compute:
             assert (
@@ -1877,6 +1887,11 @@ def __post_init__(self):
             assert (
                 not self.moe_use_legacy_grouped_gemm
             ), 'delay_wgrad_compute is not supported with legacy groupedgemm implementation'
+            if self.cuda_graph_impl == "transformer_engine":
+                assert is_te_min_version("2.10.0"), (
+                    'TE version >= 2.10.0 is required for delay_wgrad_compute with '
+                    'partial cuda graph'
+                )
 
         if self.ep_overlap_early_attn_memory_release:
             assert self.overlap_moe_expert_parallel_comm, (
diff --git a/megatron/core/transformer/transformer_layer.py b/megatron/core/transformer/transformer_layer.py
index 5c310cc81e4..53a1470c492 100644
--- a/megatron/core/transformer/transformer_layer.py
+++ b/megatron/core/transformer/transformer_layer.py
@@ -888,6 +888,10 @@ def _te_cuda_graph_replay(self, *args, **kwargs):
             # CUDA Graph captures the whole MLP/MoE part. CUDA Graph output is the layer output.
             assert len(cuda_graph_output) == 1, "CUDA Graph output should be the layer output."
             output = cuda_graph_output.pop()
+            assert (
+                not self.config.overlap_moe_expert_parallel_comm
+            ), "EP overlap must be \
+                disabled when CUDA graph captures the whole MLP/MoE part."
         elif self.is_moe_layer and CudaGraphScope.moe_router in self.config.cuda_graph_scope:
             # CUDA Graph partially captures the MoE.
             # The rest of the layer should go to the normal pass.
@@ -930,12 +934,35 @@ def _te_cuda_graph_replay(self, *args, **kwargs):
                 residual=residual,
                 shared_expert_output=shared_expert_output,
             )
+            # If EP overlap is enabled, remaining of mlp will be called as fine_grained_callables
+            # and should be skipped here.
+            if self.config.overlap_moe_expert_parallel_comm:
+                probs, routing_map = self.mlp.route(hidden_states)
+                hidden_states, probs, residual = self.mlp.preprocess(
+                    hidden_states, probs, routing_map
+                )
+                nvtx_range_pop(suffix="mlp")
+                return mlp_residual, hidden_states, probs, shared_expert_output
             mlp_output_with_bias = self.mlp(hidden_states)
             self.mlp.cudagraph_tensor_store.clear()
             nvtx_range_pop(suffix="mlp")
 
             output = self._forward_post_mlp(mlp_output_with_bias, mlp_residual)
         else:
+            # If EP overlap is enabled, needs to return same outputs as submodule.attn
+            if self.config.overlap_moe_expert_parallel_comm:
+                assert len(cuda_graph_output) == 1, "CUDA Graph output should be the layer output."
+                mlp_residual = cuda_graph_output.pop()
+                if not self.is_moe_layer:
+                    return mlp_residual, None, None, None
+                hidden_states = self.pre_mlp_layernorm(mlp_residual)
+                shared_expert_output = self.mlp.shared_experts_compute(hidden_states)
+                probs, routing_map = self.mlp.route(hidden_states)
+                hidden_states, probs, residual = self.mlp.preprocess(
+                    hidden_states, probs, routing_map
+                )
+                return mlp_residual, hidden_states, probs, shared_expert_output
+
             # CUDA Graph does not capture the MLP/MoE part at all.
             output = self._forward_mlp(*cuda_graph_output)
         return output, context
@@ -1023,6 +1050,15 @@ def _should_call_local_cudagraph(self, *args, **kwargs):
                 return True
         return False
 
+    def backward_dw_cudagraph(self, microbatch_idx):
+        """
+        CUDA Graph backward weight gradient computation for this layer.
+        """
+        cg_index = microbatch_idx % len(self.cuda_graphs)
+        if not hasattr(self.cuda_graphs[cg_index], 'backward_dw'):
+            return
+        self.cuda_graphs[cg_index].backward_dw()
+
     def __call__(self, *args, **kwargs):
         if self._should_call_local_cudagraph(*args, **kwargs):
             # Inference mode.
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_dgx_h100.json
index e7da3fb2265..51e9d7154c9 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_dgx_h100.json
@@ -6,54 +6,54 @@
         "values": {
             "1": 11.06693,
             "2": 11.0602,
-            "3": 10.21183,
-            "4": 9.95418,
-            "5": 10.12235,
-            "6": 8.8232,
-            "7": 9.52776,
-            "8": 8.44297,
-            "9": 7.84862,
-            "10": 7.0731,
-            "11": 9.29877,
-            "12": 9.14048,
-            "13": 7.86753,
-            "14": 8.20366,
-            "15": 8.2163,
-            "16": 8.17366,
-            "17": 8.20571,
-            "18": 7.48715,
-            "19": 8.08859,
-            "20": 7.6351,
-            "21": 7.94948,
-            "22": 7.29052,
-            "23": 7.93234,
-            "24": 7.43607,
-            "25": 8.23632,
-            "26": 7.75037,
-            "27": 7.69922,
-            "28": 7.65432,
-            "29": 7.75197,
-            "30": 7.56043,
-            "31": 7.81763,
-            "32": 6.46365,
-            "33": 7.20218,
-            "34": 7.7734,
-            "35": 7.72752,
-            "36": 6.71703,
-            "37": 8.09101,
-            "38": 7.61439,
-            "39": 7.96641,
-            "40": 7.49902,
-            "41": 7.49619,
-            "42": 6.10035,
-            "43": 7.59169,
-            "44": 7.9135,
-            "45": 6.83091,
-            "46": 7.40862,
-            "47": 7.78798,
-            "48": 7.87259,
-            "49": 7.58321,
-            "50": 6.84073
+            "3": 10.21167,
+            "4": 9.95277,
+            "5": 10.12388,
+            "6": 8.82369,
+            "7": 9.52785,
+            "8": 8.44289,
+            "9": 7.85041,
+            "10": 7.07093,
+            "11": 9.28562,
+            "12": 9.13324,
+            "13": 7.86224,
+            "14": 8.19705,
+            "15": 8.22932,
+            "16": 8.17783,
+            "17": 8.2161,
+            "18": 7.50358,
+            "19": 8.08893,
+            "20": 7.64905,
+            "21": 7.95183,
+            "22": 7.29849,
+            "23": 7.93348,
+            "24": 7.43565,
+            "25": 8.2385,
+            "26": 7.75634,
+            "27": 7.70075,
+            "28": 7.66089,
+            "29": 7.75606,
+            "30": 7.56072,
+            "31": 7.81859,
+            "32": 6.46861,
+            "33": 7.20532,
+            "34": 7.77706,
+            "35": 7.73113,
+            "36": 6.72448,
+            "37": 8.09344,
+            "38": 7.62008,
+            "39": 7.96872,
+            "40": 7.4992,
+            "41": 7.49916,
+            "42": 6.11993,
+            "43": 7.59389,
+            "44": 7.91482,
+            "45": 6.83633,
+            "46": 7.41335,
+            "47": 7.78887,
+            "48": 7.87666,
+            "49": 7.58746,
+            "50": 6.84352
         }
     },
     "num-zeros": {
@@ -61,56 +61,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 47165232.0,
-            "2": 46897932.0,
-            "3": 49538636.0,
-            "4": 293970432.0,
-            "5": 569239168.0,
-            "6": 649282112.0,
-            "7": 1024299712.0,
-            "8": 745969216.0,
-            "9": 849837376.0,
-            "10": 671136704.0,
-            "11": 820579712.0,
-            "12": 808020608.0,
-            "13": 642603904.0,
-            "14": 628553728.0,
-            "15": 703673088.0,
-            "16": 861425280.0,
-            "17": 658078464.0,
-            "18": 805612544.0,
-            "19": 902126016.0,
-            "20": 890704960.0,
-            "21": 670006528.0,
-            "22": 761263488.0,
-            "23": 761663488.0,
-            "24": 767542784.0,
-            "25": 638744256.0,
-            "26": 742320640.0,
-            "27": 745099136.0,
-            "28": 720589184.0,
-            "29": 751754368.0,
-            "30": 742684032.0,
-            "31": 656692864.0,
-            "32": 790831616.0,
-            "33": 789798208.0,
-            "34": 780255872.0,
-            "35": 776100992.0,
-            "36": 736753344.0,
-            "37": 740480640.0,
-            "38": 715119872.0,
-            "39": 739264064.0,
-            "40": 723054656.0,
-            "41": 698221312.0,
-            "42": 667945792.0,
-            "43": 654024448.0,
-            "44": 651974656.0,
-            "45": 625754432.0,
-            "46": 616508224.0,
-            "47": 607837184.0,
-            "48": 581971328.0,
-            "49": 562630912.0,
-            "50": 544389376.0
+            "1": 47165160.0,
+            "2": 46897928.0,
+            "3": 52684380.0,
+            "4": 297108064.0,
+            "5": 556667648.0,
+            "6": 661861120.0,
+            "7": 1027446592.0,
+            "8": 742822528.0,
+            "9": 846651648.0,
+            "10": 693167680.0,
+            "11": 826875520.0,
+            "12": 814304768.0,
+            "13": 642608768.0,
+            "14": 606554752.0,
+            "15": 728814528.0,
+            "16": 845696384.0,
+            "17": 667529728.0,
+            "18": 673504384.0,
+            "19": 889544960.0,
+            "20": 890696768.0,
+            "21": 676302464.0,
+            "22": 688965120.0,
+            "23": 789972480.0,
+            "24": 761249536.0,
+            "25": 648185280.0,
+            "26": 789507392.0,
+            "27": 641355648.0,
+            "28": 805511168.0,
+            "29": 773780224.0,
+            "30": 811888960.0,
+            "31": 688167744.0,
+            "32": 834871424.0,
+            "33": 792944256.0,
+            "34": 777109568.0,
+            "35": 763515136.0,
+            "36": 733607744.0,
+            "37": 743626240.0,
+            "38": 746577024.0,
+            "39": 732972864.0,
+            "40": 735645696.0,
+            "41": 556711680.0,
+            "42": 680528384.0,
+            "43": 669752960.0,
+            "44": 667702912.0,
+            "45": 635197248.0,
+            "46": 629093120.0,
+            "47": 626713344.0,
+            "48": 600843456.0,
+            "49": 581506752.0,
+            "50": 572705728.0
         }
     },
     "mem-allocated-bytes": {
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 5249984000.0,
-            "2": 5250123264.0,
-            "3": 5250260480.0,
-            "4": 5249986048.0,
-            "5": 5250123264.0,
-            "6": 5250260480.0,
-            "7": 5250397696.0,
-            "8": 5250534912.0,
-            "9": 5250672128.0,
-            "10": 5250809344.0,
-            "11": 5250946560.0,
-            "12": 5251083776.0,
-            "13": 5251220992.0,
-            "14": 5251358208.0,
-            "15": 5251495424.0,
-            "16": 5251632640.0,
-            "17": 5251769856.0,
-            "18": 5251907072.0,
-            "19": 5252044288.0,
-            "20": 5252181504.0,
-            "21": 5252318720.0,
-            "22": 5252455936.0,
-            "23": 5252593152.0,
-            "24": 5252730368.0,
-            "25": 5252867584.0,
-            "26": 5253004800.0,
-            "27": 5253142016.0,
-            "28": 5253279232.0,
-            "29": 5253416448.0,
-            "30": 5253553664.0,
-            "31": 5253690880.0,
-            "32": 5253828096.0,
-            "33": 5253965312.0,
-            "34": 5254102528.0,
-            "35": 5254239744.0,
-            "36": 5254376960.0,
-            "37": 5254514176.0,
-            "38": 5254651392.0,
-            "39": 5254788608.0,
-            "40": 5254925824.0,
-            "41": 5255063040.0,
-            "42": 5255200256.0,
-            "43": 5255337472.0,
-            "44": 5255474688.0,
-            "45": 5255611904.0,
-            "46": 5255749120.0,
-            "47": 5255886336.0,
-            "48": 5256023552.0,
-            "49": 5256160768.0,
-            "50": 5256297984.0
+            "1": 5275215360.0,
+            "2": 5275420160.0,
+            "3": 5275622912.0,
+            "4": 5275217408.0,
+            "5": 5275420160.0,
+            "6": 5275622912.0,
+            "7": 5275825664.0,
+            "8": 5276028416.0,
+            "9": 5276231168.0,
+            "10": 5276433920.0,
+            "11": 5276636672.0,
+            "12": 5276839424.0,
+            "13": 5277042176.0,
+            "14": 5277244928.0,
+            "15": 5277447680.0,
+            "16": 5277650432.0,
+            "17": 5277853184.0,
+            "18": 5278055936.0,
+            "19": 5278258688.0,
+            "20": 5278461440.0,
+            "21": 5278664192.0,
+            "22": 5278866944.0,
+            "23": 5279069696.0,
+            "24": 5279272448.0,
+            "25": 5279475200.0,
+            "26": 5279677952.0,
+            "27": 5279880704.0,
+            "28": 5280083456.0,
+            "29": 5280286208.0,
+            "30": 5280488960.0,
+            "31": 5280691712.0,
+            "32": 5280894464.0,
+            "33": 5281097216.0,
+            "34": 5281299968.0,
+            "35": 5281502720.0,
+            "36": 5281705472.0,
+            "37": 5281908224.0,
+            "38": 5282110976.0,
+            "39": 5282313728.0,
+            "40": 5282516480.0,
+            "41": 5282719232.0,
+            "42": 5282921984.0,
+            "43": 5283124736.0,
+            "44": 5283327488.0,
+            "45": 5283530240.0,
+            "46": 5283732992.0,
+            "47": 5283935744.0,
+            "48": 5284138496.0,
+            "49": 5284341248.0,
+            "50": 5284544000.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 6101398016.0,
-            "2": 8124549632.0,
-            "3": 8124549632.0,
-            "4": 8124549632.0,
-            "5": 8124549632.0,
-            "6": 8127293952.0,
-            "7": 8146633216.0,
-            "8": 8146633216.0,
-            "9": 8151443968.0,
-            "10": 8151443968.0,
-            "11": 8153425408.0,
-            "12": 8153425408.0,
-            "13": 8153425408.0,
-            "14": 8153425408.0,
-            "15": 8153425408.0,
-            "16": 8169207296.0,
-            "17": 8190995456.0,
-            "18": 8190995456.0,
-            "19": 8190995456.0,
-            "20": 8206373376.0,
-            "21": 8206373376.0,
-            "22": 8209894400.0,
-            "23": 8209894400.0,
-            "24": 8209894400.0,
-            "25": 8209894400.0,
-            "26": 8209894400.0,
-            "27": 8209894400.0,
-            "28": 8209894400.0,
-            "29": 8209894400.0,
-            "30": 8231049216.0,
-            "31": 8231049216.0,
-            "32": 8231049216.0,
-            "33": 8231049216.0,
-            "34": 8231049216.0,
-            "35": 8231049216.0,
-            "36": 8231049216.0,
-            "37": 8231049216.0,
-            "38": 8231049216.0,
-            "39": 8231049216.0,
-            "40": 8231049216.0,
-            "41": 8231049216.0,
-            "42": 8231049216.0,
-            "43": 8231049216.0,
-            "44": 8231049216.0,
-            "45": 8231049216.0,
-            "46": 8231049216.0,
-            "47": 8231049216.0,
-            "48": 8231049216.0,
-            "49": 8231049216.0,
-            "50": 8231049216.0
+            "1": 6208857600.0,
+            "2": 8233667072.0,
+            "3": 8233667072.0,
+            "4": 8233667072.0,
+            "5": 8233667072.0,
+            "6": 8233667072.0,
+            "7": 8233667072.0,
+            "8": 8233667072.0,
+            "9": 8233667072.0,
+            "10": 8233667072.0,
+            "11": 8262715904.0,
+            "12": 8262715904.0,
+            "13": 8262715904.0,
+            "14": 8262715904.0,
+            "15": 8262715904.0,
+            "16": 8268117504.0,
+            "17": 8288236032.0,
+            "18": 8288236032.0,
+            "19": 8288236032.0,
+            "20": 8288236032.0,
+            "21": 8288236032.0,
+            "22": 8299924992.0,
+            "23": 8302176768.0,
+            "24": 8302176768.0,
+            "25": 8302176768.0,
+            "26": 8302176768.0,
+            "27": 8302176768.0,
+            "28": 8302176768.0,
+            "29": 8302176768.0,
+            "30": 8302176768.0,
+            "31": 8302176768.0,
+            "32": 8302176768.0,
+            "33": 8302176768.0,
+            "34": 8302176768.0,
+            "35": 8302176768.0,
+            "36": 8302176768.0,
+            "37": 8302176768.0,
+            "38": 8313753088.0,
+            "39": 8313753088.0,
+            "40": 8313753088.0,
+            "41": 8313753088.0,
+            "42": 8313753088.0,
+            "43": 8313753088.0,
+            "44": 8313753088.0,
+            "45": 8313753088.0,
+            "46": 8313753088.0,
+            "47": 8313753088.0,
+            "48": 8313753088.0,
+            "49": 8313753088.0,
+            "50": 8313753088.0
         }
     },
     "mtp_1 loss": {
@@ -234,54 +234,54 @@
         "values": {
             "1": 11.07401,
             "2": 11.0927,
-            "3": 10.82643,
-            "4": 10.27622,
-            "5": 10.45336,
-            "6": 8.32745,
-            "7": 9.82615,
-            "8": 8.0154,
-            "9": 7.47567,
-            "10": 6.7579,
-            "11": 8.9295,
-            "12": 8.98788,
-            "13": 7.8023,
-            "14": 8.02404,
-            "15": 8.11201,
-            "16": 8.1414,
-            "17": 8.13011,
-            "18": 7.44461,
-            "19": 8.03519,
-            "20": 7.53958,
-            "21": 7.90042,
-            "22": 7.27752,
-            "23": 7.88457,
-            "24": 7.37662,
-            "25": 8.17118,
-            "26": 7.69984,
-            "27": 7.62511,
-            "28": 7.61547,
-            "29": 7.69882,
-            "30": 7.48104,
-            "31": 7.73945,
-            "32": 6.36982,
-            "33": 7.14012,
-            "34": 7.71799,
-            "35": 7.6339,
-            "36": 6.61216,
-            "37": 8.03046,
-            "38": 7.58074,
-            "39": 7.89628,
-            "40": 7.41236,
-            "41": 7.42281,
-            "42": 6.01575,
-            "43": 7.48966,
-            "44": 7.86842,
-            "45": 6.74992,
-            "46": 7.30434,
-            "47": 7.72759,
-            "48": 7.78813,
-            "49": 7.49091,
-            "50": 6.75731
+            "3": 10.8262,
+            "4": 10.27574,
+            "5": 10.45324,
+            "6": 8.32758,
+            "7": 9.82629,
+            "8": 8.01538,
+            "9": 7.47611,
+            "10": 6.75851,
+            "11": 8.92961,
+            "12": 8.98772,
+            "13": 7.80203,
+            "14": 8.02221,
+            "15": 8.11372,
+            "16": 8.14498,
+            "17": 8.13435,
+            "18": 7.45035,
+            "19": 8.03784,
+            "20": 7.54246,
+            "21": 7.90269,
+            "22": 7.28093,
+            "23": 7.88727,
+            "24": 7.37587,
+            "25": 8.17289,
+            "26": 7.70083,
+            "27": 7.62668,
+            "28": 7.61747,
+            "29": 7.69888,
+            "30": 7.48586,
+            "31": 7.74301,
+            "32": 6.37542,
+            "33": 7.13919,
+            "34": 7.7198,
+            "35": 7.63387,
+            "36": 6.6127,
+            "37": 8.03449,
+            "38": 7.58334,
+            "39": 7.89887,
+            "40": 7.41168,
+            "41": 7.42316,
+            "42": 6.01689,
+            "43": 7.48867,
+            "44": 7.86976,
+            "45": 6.75113,
+            "46": 7.3054,
+            "47": 7.73281,
+            "48": 7.79017,
+            "49": 7.48985,
+            "50": 6.75753
         }
     },
     "iteration-time": {
@@ -289,56 +289,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 89.01124,
-            "2": 2.6502,
-            "3": 2.63345,
-            "4": 4.59488,
-            "5": 2.67282,
-            "6": 2.75196,
-            "7": 2.38279,
-            "8": 1.95041,
-            "9": 2.55604,
-            "10": 1.89736,
-            "11": 1.9113,
-            "12": 2.59681,
-            "13": 1.87891,
-            "14": 1.89422,
-            "15": 1.89013,
-            "16": 1.88538,
-            "17": 1.91699,
-            "18": 1.88747,
-            "19": 1.93691,
-            "20": 1.88026,
-            "21": 1.94991,
-            "22": 1.90744,
-            "23": 1.8723,
-            "24": 1.87253,
-            "25": 1.93307,
-            "26": 1.93367,
-            "27": 1.88847,
-            "28": 1.93732,
-            "29": 1.95357,
-            "30": 1.93714,
-            "31": 1.89529,
-            "32": 1.87856,
-            "33": 1.96722,
-            "34": 1.88912,
-            "35": 1.88862,
-            "36": 1.88927,
-            "37": 1.8706,
-            "38": 1.85827,
-            "39": 1.86274,
-            "40": 1.9308,
-            "41": 1.93374,
-            "42": 1.88512,
-            "43": 1.89015,
-            "44": 1.90068,
-            "45": 1.89028,
-            "46": 1.89124,
-            "47": 1.87497,
-            "48": 1.86585,
-            "49": 1.87712,
-            "50": 1.95776
+            "1": 64.76466,
+            "2": 2.42359,
+            "3": 2.56054,
+            "4": 2.61199,
+            "5": 2.3272,
+            "6": 2.19806,
+            "7": 2.16133,
+            "8": 1.97339,
+            "9": 2.14238,
+            "10": 2.05512,
+            "11": 2.00856,
+            "12": 1.96198,
+            "13": 2.08656,
+            "14": 1.96948,
+            "15": 1.96059,
+            "16": 1.97248,
+            "17": 1.97639,
+            "18": 2.01386,
+            "19": 1.9606,
+            "20": 1.94716,
+            "21": 2.00286,
+            "22": 1.965,
+            "23": 2.03401,
+            "24": 2.00528,
+            "25": 2.03321,
+            "26": 1.95999,
+            "27": 1.96395,
+            "28": 1.98191,
+            "29": 1.99346,
+            "30": 1.97579,
+            "31": 1.95097,
+            "32": 1.95726,
+            "33": 1.9399,
+            "34": 1.99177,
+            "35": 1.91153,
+            "36": 1.97534,
+            "37": 1.95691,
+            "38": 1.96206,
+            "39": 1.9414,
+            "40": 1.96027,
+            "41": 1.97807,
+            "42": 1.98861,
+            "43": 1.94856,
+            "44": 1.96339,
+            "45": 1.96835,
+            "46": 1.99733,
+            "47": 1.9716,
+            "48": 1.96591,
+            "49": 1.93865,
+            "50": 1.95198
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/model_config.yaml
index c657b9087e7..be34eb9aec5 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/model_config.yaml
@@ -5,6 +5,9 @@ ENV_VARS:
   NCCL_NVLS_ENABLE: 0
   PYTHONWARNINGS: ignore
   NCCL_DEBUG: VERSION
+  NVTE_FUSED_ATTN: 0
+  NCCL_ALGO: ^NVLS
+  CUBLAS_WORKSPACE_CONFIG: ':4096:8'
 MODEL_ARGS:
   # Distributed args
   --distributed-timeout-minutes: 60
@@ -29,8 +32,6 @@ MODEL_ARGS:
   --exit-duration-in-mins: 230
   --no-check-for-nan-in-loss-and-grad: true
   --no-rope-fusion: true
-  --cross-entropy-loss-fusion: true
-  --cross-entropy-fusion-impl: native
   --manual-gc: true
   --manual-gc-interval: 100
   --recompute-granularity: selective
diff --git a/tests/unit_tests/a2a_overlap/test_cuda_graphed_schedule_chunk_1f1b.py b/tests/unit_tests/a2a_overlap/test_cuda_graphed_schedule_chunk_1f1b.py
new file mode 100644
index 00000000000..91c74fe1bb6
--- /dev/null
+++ b/tests/unit_tests/a2a_overlap/test_cuda_graphed_schedule_chunk_1f1b.py
@@ -0,0 +1,372 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+import gc
+import os
+import sys
+
+import pytest
+import torch
+
+from megatron.core.enums import ModelType
+from megatron.core.models.gpt.gpt_layer_specs import (
+    get_gpt_decoder_block_spec,
+    get_gpt_mtp_block_spec,
+)
+from megatron.core.models.gpt.gpt_model import GPTModel
+from megatron.core.num_microbatches_calculator import destroy_num_microbatches_calculator
+from megatron.core.pipeline_parallel.utils import set_streams
+from megatron.core.tensor_parallel.random import HAVE_TE, model_parallel_cuda_manual_seed
+from megatron.core.transformer.enums import CudaGraphScope
+from megatron.core.transformer.module import float16_to_fp32
+from megatron.core.utils import is_te_min_version, unwrap_model
+from megatron.training.arguments import core_transformer_config_from_args, parse_args, validate_args
+from megatron.training.global_vars import (
+    destroy_global_vars,
+    get_args,
+    set_args,
+    set_global_variables,
+)
+from megatron.training.training import setup_model_and_optimizer
+from tests.unit_tests.test_utilities import Utils
+
+
+def is_deep_ep_available():
+    from megatron.core.transformer.moe.fused_a2a import HAVE_DEEP_EP
+
+    return HAVE_DEEP_EP
+
+
+def is_hybrid_ep_available():
+    from megatron.core.transformer.moe.fused_a2a import HAVE_HYBRIDEP
+
+    return HAVE_HYBRIDEP
+
+
+def save(fn, message):
+    with open(fn, 'w') as f:
+        f.write(message)
+
+
+class TestPartialCudaGraphedA2AOverlap:
+    """Test that CUDA graph outputs match ep-overlapped CUDA graph outputs for various scopes."""
+
+    def setup_method(self, method):
+        self.seq_length = 512
+        self.micro_batch_size = 2
+        # Store original environment variable values
+        self.original_env = {
+            'CUDA_DEVICE_MAX_CONNECTIONS': os.environ.get('CUDA_DEVICE_MAX_CONNECTIONS'),
+            'NVTE_ALLOW_NONDETERMINISTIC_ALGO': os.environ.get('NVTE_ALLOW_NONDETERMINISTIC_ALGO'),
+        }
+        self.cuda_graph_helper = None
+        os.environ['CUDA_DEVICE_MAX_CONNECTIONS'] = '1'
+        os.environ['NVTE_ALLOW_NONDETERMINISTIC_ALGO'] = '0'
+
+    def teardown_method(self, method):
+        # Restore original environment variable values
+        for key, value in self.original_env.items():
+            if value is None:
+                os.environ.pop(key, None)
+            else:
+                os.environ[key] = value
+        Utils.destroy_model_parallel()
+        destroy_global_vars()
+        destroy_num_microbatches_calculator()
+        if self.cuda_graph_helper is not None and self.cuda_graph_helper.graphs_created():
+            self.cuda_graph_helper.delete_cuda_graphs()
+            self.cuda_graph_helper = None
+
+        gc.collect()
+
+    def model_provider(
+        self,
+        pre_process=True,
+        post_process=True,
+        layer_spec_fn=get_gpt_decoder_block_spec,
+        **config_kwargs,
+    ):
+        model_parallel_cuda_manual_seed(123)
+        args = get_args()
+        config = core_transformer_config_from_args(args)
+        transformer_layer_spec = layer_spec_fn(
+            config,
+            use_transformer_engine=True,
+            normalization=args.normalization,
+            qk_l2_norm=args.qk_l2_norm,
+        )
+        if args.mtp_num_layers:
+            mtp_block_spec = get_gpt_mtp_block_spec(
+                config, transformer_layer_spec, use_transformer_engine=True
+            )
+        else:
+            mtp_block_spec = None
+        return GPTModel(
+            config=config,
+            transformer_layer_spec=transformer_layer_spec,
+            vocab_size=args.vocab_size,
+            max_sequence_length=args.max_position_embeddings,
+            pre_process=pre_process,
+            post_process=post_process,
+            fp16_lm_cross_entropy=args.fp16_lm_cross_entropy,
+            parallel_output=True,
+            share_embeddings_and_output_weights=not args.untie_embeddings_and_output_weights,
+            position_embedding_type=args.position_embedding_type,
+            rotary_percent=args.rotary_percent,
+            mtp_block_spec=mtp_block_spec,
+        )
+
+    def create_test_args(
+        self, cuda_graph_impl, cuda_graph_scope, cuda_graph_warmup_steps, ep_size, **kwargs
+    ):
+        destroy_global_vars()
+        destroy_num_microbatches_calculator()
+
+        sys.argv = ['test_cuda_graphs.py']
+        args = parse_args()
+        args.num_layers = 1
+        args.mtp_num_layers = None
+        args.vocab_size = 1024
+        args.hidden_size = 128
+        args.num_attention_heads = 8
+        args.max_position_embeddings = 512
+        args.global_batch_size = self.micro_batch_size * 8
+        args.micro_batch_size = self.micro_batch_size
+        args.create_attention_mask_in_dataloader = True
+        args.seq_length = self.seq_length
+        args.tensor_model_parallel_size = 2
+        args.sequence_parallel = True
+        args.pipeline_model_parallel_size = 1
+        args.context_parallel_size = 1
+        args.expert_model_parallel_size = ep_size
+        args.train_iters = 10
+        args.lr = 3e-5
+        args.bf16 = True
+        args.add_bias_linear = False
+        args.swiglu = True
+        args.use_distributed_optimizer = True
+        args.position_embedding_type = "rope"
+        args.rotary_percent = 1.0
+        args.hidden_dropout = 0.0
+        args.attention_dropout = 0.0
+        args.untie_embeddings_and_output_weights = True
+
+        # MoE settings
+        args.num_experts = 16
+        args.expert_model_parallel_size = ep_size
+        args.moe_shared_expert_intermediate_size = 1024
+        args.moe_layer_freq = kwargs.get("moe_layer_freq", "[0,0,1,1]")
+        args.moe_permute_fusion = True
+        args.moe_router_fusion = True
+        args.moe_router_topk = 2
+
+        # CUDA graph settings
+        args.cuda_graph_impl = cuda_graph_impl
+        args.cuda_graph_scope = cuda_graph_scope
+        args.cuda_graph_warmup_steps = cuda_graph_warmup_steps
+        args.use_te_rng_tracker = cuda_graph_impl != "none"
+
+        for key, value in kwargs.items():
+            assert hasattr(args, key)
+            setattr(args, key, value)
+
+        validate_args(args)
+        set_global_variables(args, False)
+        return args
+
+    def get_batch(self, seq_length, micro_batch_size):
+        data = list(range(seq_length))
+        input_ids = torch.tensor(data, dtype=torch.int64).repeat((micro_batch_size, 1)).cuda()
+        labels = 1 + torch.tensor(data, dtype=torch.int64).repeat((micro_batch_size, 1)).cuda()
+        position_ids = torch.tensor(data, dtype=torch.int64).repeat((micro_batch_size, 1)).cuda()
+        attention_mask = torch.ones(
+            (micro_batch_size, 1, seq_length, seq_length), dtype=bool
+        ).cuda()
+        loss_mask = torch.ones(seq_length).repeat((micro_batch_size, 1)).cuda()
+        return input_ids, labels, position_ids, attention_mask, loss_mask
+
+    def _run_1f1b_helper(self, gpt_model, optimizer, data, num_iters, cuda_graph_warmup_steps):
+        from megatron.core.models.common.model_chunk_schedule_plan import (
+            TransformerModelChunkSchedulePlan,
+        )
+        from megatron.core.pipeline_parallel.schedules import set_current_microbatch
+
+        schedule_plans = []
+        losses = []
+        set_current_microbatch(gpt_model[0], 1)
+
+        gpt_model[0].zero_grad_buffer()
+        optimizer.zero_grad()
+        assert cuda_graph_warmup_steps > 0, "cuda_graph_warmup_steps must be greater than 0"
+        for fwd_mb_idx in range(num_iters + 1):
+            # Capture CUDA graphs after warmup if helper is provided
+            if self.cuda_graph_helper is not None and fwd_mb_idx == cuda_graph_warmup_steps:
+                self.cuda_graph_helper.create_cudagraphs()
+
+            if fwd_mb_idx < cuda_graph_warmup_steps:
+                gpt_model[0].zero_grad_buffer()
+                optimizer.zero_grad()
+                output = gpt_model[0].forward(**data)
+                schedule_plans.append(None)
+            else:
+                if fwd_mb_idx == cuda_graph_warmup_steps:
+                    extra_schedule_plan = unwrap_model(gpt_model[0]).build_schedule_plan(**data)
+                    TransformerModelChunkSchedulePlan.run(extra_schedule_plan, None)
+                    schedule_plans[-1] = extra_schedule_plan
+                f_schedule_plan = unwrap_model(gpt_model[0]).build_schedule_plan(**data)
+                b_schedule_plan = schedule_plans[-1]
+                schedule_plans.append(f_schedule_plan)
+                if b_schedule_plan is not None:
+                    gpt_model[0].zero_grad_buffer()
+                    optimizer.zero_grad()
+                output = TransformerModelChunkSchedulePlan.run(
+                    f_schedule_plan,
+                    b_schedule_plan,
+                    b_grad=torch.ones_like(output) if fwd_mb_idx > 0 else None,
+                )
+            # Check output shapes
+            if fwd_mb_idx < num_iters:
+                assert output is not None
+                assert output.shape[0] == self.micro_batch_size
+                assert output.shape[1] == self.seq_length
+                losses.append(output)
+
+            if fwd_mb_idx < cuda_graph_warmup_steps:
+                output.backward(torch.ones_like(output))
+
+            for param in gpt_model[0].parameters():
+                assert param.main_grad is not None
+
+            update_successful, _, _ = optimizer.step()
+            assert update_successful
+
+        return losses
+
+    def _run_test_helper(
+        self,
+        ep_size,
+        cuda_graph_impl,
+        cuda_graph_scope,
+        cuda_graph_warmup_steps,
+        ep_overlap=False,
+        **kwargs,
+    ):
+        """Test fp8_param with gpt_model."""
+        args = self.create_test_args(
+            cuda_graph_impl,
+            cuda_graph_scope,
+            cuda_graph_warmup_steps,
+            ep_size,
+            overlap_moe_expert_parallel_comm=ep_overlap,
+            **kwargs,
+        )
+        if ep_overlap:
+            set_streams()
+        set_args(args)
+        torch.manual_seed(123)
+        Utils.initialize_model_parallel(
+            tensor_model_parallel_size=2, expert_model_parallel_size=ep_size
+        )
+
+        input_ids, labels, position_ids, attention_mask, loss_mask = self.get_batch(
+            self.seq_length, self.micro_batch_size
+        )
+
+        gpt_model, optimizer, _ = setup_model_and_optimizer(
+            self.model_provider, ModelType.encoder_or_decoder
+        )
+        assert len(gpt_model) == 1  # Assume only one model in the model provider.
+
+        loss_list = []
+
+        if cuda_graph_impl == "transformer_engine":
+            from megatron.core.transformer.cuda_graphs import TECudaGraphHelper
+
+            self.cuda_graph_helper = TECudaGraphHelper(
+                model=gpt_model,
+                config=gpt_model[0].config,
+                seq_length=self.seq_length,
+                micro_batch_size=self.micro_batch_size,
+                optimizers=[optimizer],
+            )
+
+        num_iters = cuda_graph_warmup_steps + 2
+        data = {
+            "input_ids": input_ids,
+            "position_ids": position_ids,
+            "attention_mask": attention_mask,
+            "labels": labels,
+            "loss_mask": loss_mask,
+        }
+        if not ep_overlap:
+            for i in range(num_iters):
+                gpt_model[0].zero_grad_buffer()
+                optimizer.zero_grad()
+
+                # Capture CUDA graphs after warmup if helper is provided
+                if self.cuda_graph_helper is not None and i == cuda_graph_warmup_steps:
+                    self.cuda_graph_helper.create_cudagraphs()
+
+                output = unwrap_model(gpt_model[0]).forward(**data)
+                output = float16_to_fp32(output)
+
+                # Check output shapes
+                assert output.shape[0] == self.micro_batch_size
+                assert output.shape[1] == self.seq_length
+
+                # Verify gradients
+                output.backward(torch.ones_like(output))
+                for param in gpt_model[0].parameters():
+                    assert param.main_grad is not None
+
+                update_successful, _, _ = optimizer.step()
+                assert update_successful
+
+                loss_list.append(output)
+        else:
+            loss_list = self._run_1f1b_helper(
+                gpt_model, optimizer, data, num_iters, cuda_graph_warmup_steps
+            )
+
+        return loss_list
+
+    @pytest.mark.skipif(
+        not (HAVE_TE and is_te_min_version("2.10.0")),
+        reason="Partial CUDA graph support requires TransformerEngine version >= 2.10.0",
+    )
+    @pytest.mark.parametrize("moe_dispatcher_type", ["alltoall", "deepep"])
+    def test_moe_partial_cudagraph_with_ep_overlap(self, moe_dispatcher_type):
+        extra_kwargs = {"moe_layer_freq": 1}
+        if moe_dispatcher_type == "deepep":
+            if not is_deep_ep_available():
+                pytest.skip("Deep EP is not available")
+            extra_kwargs["moe_token_dispatcher_type"] = "flex"
+            extra_kwargs["moe_flex_dispatcher_backend"] = "deepep"
+            extra_kwargs["moe_router_dtype"] = "fp32"
+        elif moe_dispatcher_type == "hybridep":
+            if not is_hybrid_ep_available():
+                pytest.skip("Hybrid EP is not available")
+            extra_kwargs["moe_token_dispatcher_type"] = "flex"
+            extra_kwargs["moe_flex_dispatcher_backend"] = "hybridep"
+        else:
+            extra_kwargs["moe_token_dispatcher_type"] = moe_dispatcher_type
+
+        loss_list_ref = self._run_test_helper(4, "none", None, 3, **extra_kwargs)
+        for cuda_graph_scope in [
+            [CudaGraphScope.attn],
+            [CudaGraphScope.attn, CudaGraphScope.moe_router],
+            [CudaGraphScope.attn, CudaGraphScope.moe_router, CudaGraphScope.moe_preprocess],
+        ]:
+            cuda_graph_warmup_steps = 3
+            loss_list = self._run_test_helper(
+                4,
+                "transformer_engine",
+                cuda_graph_scope,
+                cuda_graph_warmup_steps,
+                ep_overlap=True,
+                **extra_kwargs,
+            )
+            assert len(loss_list) == len(loss_list_ref)
+            for i in range(len(loss_list)):
+                assert torch.equal(
+                    loss_list[i].mean(), loss_list_ref[i].mean()
+                ), f"scope={cuda_graph_scope}, i={i},loss_list={loss_list[i]}, loss_list_ref={loss_list_ref[i]}"
+            print(f"[DEBUG] Pass {cuda_graph_scope}")
diff --git a/tests/unit_tests/a2a_overlap/test_schedule_layer_1f1b.py b/tests/unit_tests/a2a_overlap/test_schedule_layer_1f1b.py
index 5ec096e5a04..c6c4a75af99 100644
--- a/tests/unit_tests/a2a_overlap/test_schedule_layer_1f1b.py
+++ b/tests/unit_tests/a2a_overlap/test_schedule_layer_1f1b.py
@@ -306,7 +306,7 @@ def test_transformer_layer_overlap_shared_expert(self):
             "moe_shared_expert_intermediate_size": 512,
         }
         overlap_config = get_test_config(extra_kwargs=extra_kwargs)
-        extra_kwargs["moe_shared_expert_overlap"] = True
+        extra_kwargs["moe_shared_expert_overlap"] = False
         ref_config = get_test_config(extra_kwargs=extra_kwargs)
         microbatches = 4
         with deterministic_mode():
diff --git a/tests/unit_tests/a2a_overlap/utils.py b/tests/unit_tests/a2a_overlap/utils.py
index 7db4256a849..a52843956df 100644
--- a/tests/unit_tests/a2a_overlap/utils.py
+++ b/tests/unit_tests/a2a_overlap/utils.py
@@ -1,3 +1,4 @@
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 import os
 from contextlib import contextmanager
 from dataclasses import dataclass
diff --git a/tests/unit_tests/pipeline_parallel/test_schedules.py b/tests/unit_tests/pipeline_parallel/test_schedules.py
index b861aa2df49..86b9219fe0f 100644
--- a/tests/unit_tests/pipeline_parallel/test_schedules.py
+++ b/tests/unit_tests/pipeline_parallel/test_schedules.py
@@ -1,3 +1,5 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+
 import os
 
 import pytest
@@ -127,6 +129,52 @@ def test_get_pipeline_parallel_order(
     for k, v in order_cnt.items():
         assert -k in order_cnt and order_cnt[-k] == v
 
+    layers_per_chunk = 2
+    num_layers_per_chunk = [layers_per_chunk] * num_model_chunks
+    # disable wgrad compute
+    overlapped_order, chunk_id_list = schedule.get_overlap_moe_expert_parallel_comm_order(
+        order, num_layers_per_chunk, False
+    )
+    assert max(overlapped_order) == num_model_chunks * layers_per_chunk
+    assert len(overlapped_order) == len(order) * layers_per_chunk
+    assert len(chunk_id_list) == len(overlapped_order)
+    order_cnt = {}
+    accumulated_order = 0
+    for o in overlapped_order:
+        order_cnt[o] = order_cnt.get(o, 0) + 1
+        if o < 0:
+            assert -o in order_cnt and order_cnt[-o] >= order_cnt[o]
+        elif -o in order_cnt:
+            assert order_cnt[-o] < order_cnt[o]
+        accumulated_order += o
+        assert accumulated_order >= 0
+    assert accumulated_order == 0
+
+    # enable wgrad compute
+    overlapped_order, chunk_id_list = schedule.get_overlap_moe_expert_parallel_comm_order(
+        order, num_layers_per_chunk, True
+    )
+    assert max(overlapped_order) == num_model_chunks * layers_per_chunk
+    assert len(overlapped_order) == len(order) * layers_per_chunk * 3 // 2
+    assert len(chunk_id_list) == len(overlapped_order)
+    from math import ceil
+
+    order_cnt = {}
+    accumulated_order = 0
+    prev_o = 0
+    for o in overlapped_order:
+        if ceil(o) != o:
+            assert prev_o - 0.5 == o
+        else:
+            order_cnt[o] = order_cnt.get(o, 0) + 1
+            if o < 0:
+                assert -o in order_cnt and order_cnt[-o] >= order_cnt[o]
+            elif -o in order_cnt:
+                assert order_cnt[-o] < order_cnt[o]
+        accumulated_order += o
+        prev_o = o
+    assert accumulated_order < 0
+
     Utils.destroy_model_parallel()
 
 
diff --git a/tests/unit_tests/transformer/test_submodule_callables.py b/tests/unit_tests/transformer/test_submodule_callables.py
index 1ccb6fd5be8..73059495c06 100644
--- a/tests/unit_tests/transformer/test_submodule_callables.py
+++ b/tests/unit_tests/transformer/test_submodule_callables.py
@@ -64,7 +64,7 @@ def run_model_submodules_with_capture(model, input_tensors, microbatches):
     output_tensors = []
     # get callables
     callables, dw = build_layer_callables(model)
-    attn, post_attn, dispatch, moe, combine, post_process = callables
+    attn, dispatch, moe, combine, post_process = callables
     assert post_process is None
     dummy_model = DummyState()
     dummy_model.decoder = DummyState()
@@ -76,24 +76,16 @@ def run_model_submodules_with_capture(model, input_tensors, microbatches):
         node.chunk_state.model = dummy_model
 
         # attn fwd
-        hidden_states = attn(node, input_tensors[i])
-
-        # post attn fwd
-        local_tokens, probs = post_attn(node, hidden_states)
+        local_tokens, probs = attn(node, input_tensors[i])
 
         # dispatch fwd
         dispatched_tokens = dispatch(node, local_tokens, probs)
 
         # moe fwd
-        expert_outputs = moe(node, dispatched_tokens)
-        if model.mlp.use_shared_expert:
-            expert_output, shared_expert_output = expert_outputs
-        else:
-            expert_output = expert_outputs
-            shared_expert_output = None
+        expert_output = moe(node, dispatched_tokens)
 
         # combine fwd
-        hidden_states = combine(node, expert_output, shared_expert_output)
+        hidden_states = combine(node, expert_output)
 
         # loss
         output_tensors.append(hidden_states)

From 0bc4114957a22d186e7c700e42b1c131b806e78b Mon Sep 17 00:00:00 2001
From: Pingtian Li <158665726+Wohox@users.noreply.github.com>
Date: Wed, 7 Jan 2026 12:13:19 +0800
Subject: [PATCH 217/334] [Dev] fix EP Overlap Partial Cuda Graph Unit Test
 hang issue (#2838)

---
 .../a2a_overlap/test_cuda_graphed_schedule_chunk_1f1b.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/tests/unit_tests/a2a_overlap/test_cuda_graphed_schedule_chunk_1f1b.py b/tests/unit_tests/a2a_overlap/test_cuda_graphed_schedule_chunk_1f1b.py
index 91c74fe1bb6..719bd5df18f 100644
--- a/tests/unit_tests/a2a_overlap/test_cuda_graphed_schedule_chunk_1f1b.py
+++ b/tests/unit_tests/a2a_overlap/test_cuda_graphed_schedule_chunk_1f1b.py
@@ -71,12 +71,15 @@ def teardown_method(self, method):
         Utils.destroy_model_parallel()
         destroy_global_vars()
         destroy_num_microbatches_calculator()
+        self.delete_cuda_graphs()
+
+        gc.collect()
+
+    def delete_cuda_graphs(self):
         if self.cuda_graph_helper is not None and self.cuda_graph_helper.graphs_created():
             self.cuda_graph_helper.delete_cuda_graphs()
             self.cuda_graph_helper = None
 
-        gc.collect()
-
     def model_provider(
         self,
         pre_process=True,
@@ -326,6 +329,8 @@ def _run_test_helper(
                 gpt_model, optimizer, data, num_iters, cuda_graph_warmup_steps
             )
 
+        self.delete_cuda_graphs()
+
         return loss_list
 
     @pytest.mark.skipif(

From 28c586e91506631835d8c5f29bf325a4e5aefddd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Thu, 8 Jan 2026 20:13:19 +0100
Subject: [PATCH 218/334] build: Bump jet-client (#2877)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 docker/Dockerfile.ci.dev  | 4 +++-
 docker/Dockerfile.ci.nemo | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/docker/Dockerfile.ci.dev b/docker/Dockerfile.ci.dev
index fa4d84bcad0..3f440efcd47 100644
--- a/docker/Dockerfile.ci.dev
+++ b/docker/Dockerfile.ci.dev
@@ -1,3 +1,5 @@
+# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+
 # syntax=docker/dockerfile:1.3-labs
 
 ARG FROM_IMAGE_NAME
@@ -90,6 +92,6 @@ RUN --mount=type=secret,id=JET_INDEX_URLS \
     LOGGER_INDEX_URL=$(cat /run/secrets/LOGGER_INDEX_URL)
     uv pip install --no-cache-dir --upgrade $LOGGER_INDEX_URL "one-logger"
     uv pip install --no-cache-dir --upgrade "setuptools<80.0.0"
-    uv pip install --no-cache-dir --upgrade $JET_INDEX_URLS "jet-client~=3.0" 
+    uv pip install --no-cache-dir --upgrade $JET_INDEX_URLS "jet-client~=4.0" 
 EOF
 ###
diff --git a/docker/Dockerfile.ci.nemo b/docker/Dockerfile.ci.nemo
index 2369602f54d..93fe23bfd6f 100644
--- a/docker/Dockerfile.ci.nemo
+++ b/docker/Dockerfile.ci.nemo
@@ -1,3 +1,5 @@
+# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+
 # syntax=docker/dockerfile:1.3-labs
 
 ARG FROM_IMAGE_NAME
@@ -14,7 +16,7 @@ FROM main as jet
 ARG JET_API_VERSION
 RUN --mount=type=secret,id=JET_INDEX_URLS \
     JET_INDEX_URLS=$(cat /run/secrets/JET_INDEX_URLS) && \
-    pip install --no-cache-dir jet-api==$JET_API_VERSION "jet-client~=3.0" --upgrade $JET_INDEX_URLS
+    pip install --no-cache-dir jet-api==$JET_API_VERSION "jet-client~=4.0" --upgrade $JET_INDEX_URLS
 
 ENV PATH="$PATH:/opt/jet/bin"
 ###

From 46d1f47d74c782f45c0bcdf4da001aed982c8de9 Mon Sep 17 00:00:00 2001
From: vasunvidia <108759426+vasunvidia@users.noreply.github.com>
Date: Thu, 8 Jan 2026 17:00:06 -0800
Subject: [PATCH 219/334] FP8 attention knob for nvFP4 recipe (#2818)

---
 megatron/core/fp4_utils.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/megatron/core/fp4_utils.py b/megatron/core/fp4_utils.py
index 4f9e7e5d026..a4cc172796b 100644
--- a/megatron/core/fp4_utils.py
+++ b/megatron/core/fp4_utils.py
@@ -86,7 +86,9 @@ def get_fp4_recipe(config: TransformerConfig):
         if is_te_min_version("2.7.0.dev0"):
             if config.fp4_recipe == Fp4Recipe.nvfp4:
                 try:
-                    fp4_recipe = transformer_engine.common.recipe.NVFP4BlockScaling()
+                    fp4_recipe = transformer_engine.common.recipe.NVFP4BlockScaling(
+                        fp8_dpa=config.fp8_dot_product_attention
+                    )
                 except AttributeError:
                     raise ValueError(
                         """NVFP4BlockScaling recipe is not available in this version of 

From ed6ebff3021e5eb5fc45aa13c00c9cdca889288f Mon Sep 17 00:00:00 2001
From: Zhongbo Zhu <42691305+zhongbozhu@users.noreply.github.com>
Date: Thu, 8 Jan 2026 20:00:59 -0800
Subject: [PATCH 220/334] [DEV][NVFP4][MOE] 128 Zero Padding for Grouped
 Quantization kernels and Cuda Graph Support   (#2654)

Signed-off-by: Zhongbo Zhu <zhongboz@nvidia.com>
Co-authored-by: Xin Yao <xiny@nvidia.com>
---
 megatron/core/fp4_utils.py | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/megatron/core/fp4_utils.py b/megatron/core/fp4_utils.py
index a4cc172796b..95368d7c2b7 100644
--- a/megatron/core/fp4_utils.py
+++ b/megatron/core/fp4_utils.py
@@ -61,13 +61,23 @@ def get_fp4_align_size(fp4_recipe: Fp4Recipe) -> int:
     Note that since we are also random hadamard transform for NVFP4 training, we want
     fused group nvfp4 quantize plus hadamard transform. Hadamard transform will leverage
     tensor core instructions for better performance, while group quantize kernels also
-    prefer a more aligned size in token dimension M. Therefore, we apply align size 64
-    here for better performance in MOE.
+    prefer a more aligned size in token dimension M. The efficiently leverage grouped
+    kernels, padding needs to be 64 multiple, but 128 multiple will bring even faster.
+
+    When it comes to MOE cuda graph support, the number of tokens for each expert should
+    be a buffer on device memory, which means that we don't know the token dimension for
+    each expertin host, therefore we cannot calculate the zero padded scaling factors shape
+    on host to comply with the NVFP4 GEMM scaling factor layout. However, if we have already
+    zero padded the tokens to 128 multiple, then there is no need for such padding, so that
+    host doesn't need to copy the token distribution from device to host (which will break
+    the CUDA graph).
 
     Paper link: https://arxiv.org/pdf/2509.25149
+    Scaling factor layout: https://docs.nvidia.com/cuda/cublas/#d-block-scaling-factors-layout
+    TE NVFP4 Grouped Quantization: https://github.com/NVIDIA/TransformerEngine/pull/2411
     """
     # pylint: disable=unused-argument
-    return 64
+    return 128
 
 
 def dequantize_fp4_tensor(fp4_tensor: torch.Tensor) -> torch.Tensor:

From ebe7079ba472894e5f6ec845ca0027e1fd0c0e10 Mon Sep 17 00:00:00 2001
From: vasunvidia <108759426+vasunvidia@users.noreply.github.com>
Date: Thu, 8 Jan 2026 20:01:01 -0800
Subject: [PATCH 221/334] Add check for full_iteration scope before
 instantiating CudaGraphManager (#2657)

Co-authored-by: Xin Yao <xiny@nvidia.com>
Co-authored-by: Zijie Yan <zijiey@nvidia.com>
---
 megatron/core/transformer/module.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/megatron/core/transformer/module.py b/megatron/core/transformer/module.py
index 2330df91b52..d68f34ffd0b 100644
--- a/megatron/core/transformer/module.py
+++ b/megatron/core/transformer/module.py
@@ -9,6 +9,7 @@
 
 from megatron.core import parallel_state
 from megatron.core.dist_checkpointing.mapping import ShardedStateDict
+from megatron.core.transformer.enums import CudaGraphScope
 from megatron.core.transformer.transformer_config import TransformerConfig
 from megatron.core.transformer.utils import (
     ensure_metadata_has_dp_cp_group,
@@ -167,7 +168,10 @@ def __init__(self, config: TransformerConfig, vp_stage: Optional[int] = None):
         assert isinstance(config, TransformerConfig), "config must be a TransformerConfig"
 
         # Enable cuda graphs.
-        if config.cuda_graph_impl == "local":
+        if (
+            config.cuda_graph_impl == "local"
+            and CudaGraphScope.full_iteration not in config.cuda_graph_scope
+        ):
             from megatron.core.transformer.cuda_graphs import CudaGraphManager
 
             self.cudagraph_manager = CudaGraphManager(config, vp_stage=vp_stage)

From 736da3cff027dd7f3849d1340dad0f8586b02666 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Fri, 9 Jan 2026 10:06:58 +0100
Subject: [PATCH 222/334] Reapply "[Dev] Use the latest Hybrid-EP (#2423)"
 (#2867)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 docker/Dockerfile.ci.dev                      |  4 +-
 megatron/core/transformer/moe/fused_a2a.py    | 51 +++++--------------
 .../core/transformer/moe/token_dispatcher.py  | 15 ++----
 3 files changed, 19 insertions(+), 51 deletions(-)

diff --git a/docker/Dockerfile.ci.dev b/docker/Dockerfile.ci.dev
index 3f440efcd47..d8c1dd33942 100644
--- a/docker/Dockerfile.ci.dev
+++ b/docker/Dockerfile.ci.dev
@@ -62,9 +62,9 @@ RUN bash -ex <<"EOF"
         ln -s libnvshmem_host.so.3 libnvshmem_host.so
     popd
 
-    git clone --branch hybrid-ep https://github.com/deepseek-ai/DeepEP.git
+    git clone --branch hybrid-ep https://github.com/Autumn1998/DeepEP.git
     pushd DeepEP
-        git checkout 1dddd194c26911c35b4f53a148617dd73de0ffc9
+        git checkout df375b40f24e5c495e2db36e808125266661652c
         patch -p1 < /workspace/deepep.patch
     popd
     TORCH_CUDA_ARCH_LIST="9.0 10.0 12.0" uv pip install --no-build-isolation -v DeepEP/.
diff --git a/megatron/core/transformer/moe/fused_a2a.py b/megatron/core/transformer/moe/fused_a2a.py
index 045a93039b3..aa13b9b5b5b 100644
--- a/megatron/core/transformer/moe/fused_a2a.py
+++ b/megatron/core/transformer/moe/fused_a2a.py
@@ -3,6 +3,7 @@
 # Copyright (c) 2025 DeepSeek
 # Licensed under the MIT License - https://github.com/deepseek-ai/DeepEP/blob/main/LICENSE
 
+from megatron.core.utils import internal_api
 
 try:
     from deep_ep import Buffer
@@ -328,6 +329,7 @@ def reset_hybrid_ep_buffer():
     _hybrid_ep_buffer = None
 
 
+@internal_api
 class HybridEPDispatch(torch.autograd.Function):
     '''
     Fused dispatch operation for permute + dispatch a2a + permute using the HybridEP backend
@@ -343,7 +345,6 @@ def forward(
         num_local_experts,
         num_sms_dispatch_api=24,
         num_sms_combine_api=24,
-        num_dispatched_tokens=None,
         num_permuted_tokens=None,
         pad_multiple=None,
     ):
@@ -362,11 +363,9 @@ def forward(
                 num_sms_combine_api,
                 fp8_dispatch,
             )
-        # Defaultly, the output token_per_expert and num_dispatched_tokens_tensor
-        # will be put on the CPU to avoid the potential sync in combine/backward pass,
-        # but if we provide the num_dispatched_tokens and num_permuted_tokens on CPU,
-        # we do not need to the D2H here.
-        use_host_meta = num_dispatched_tokens is None or num_permuted_tokens is None
+        # If we provide the num_permuted_tokens, we do not need to use sync to
+        # wait for the data in pinned memory ready
+        non_blocking = num_permuted_tokens is not None
         # Process the dispatch
         (
             dispatched_hidden,
@@ -381,14 +380,12 @@ def forward(
             scaling_factor=None,
             num_of_experts_per_rank=num_local_experts,
             pad_multiple=pad_multiple,
-            num_dispatched_tokens=num_dispatched_tokens,
             num_permuted_tokens=num_permuted_tokens,
-            use_host_meta=use_host_meta,
+            non_blocking=non_blocking,
         )
 
         ctx.handle = handle
         ctx.pad_multiple = pad_multiple
-        ctx.num_dispatched_tokens = num_dispatched_tokens
         return (
             dispatched_hidden,
             dispatched_probs,
@@ -404,36 +401,27 @@ def backward(ctx, grad_x, grad_probs, grad_scaling_factor, grad_tokens_per_exper
         '''
         handle = ctx.handle
         combined_hidden, combined_probs = _hybrid_ep_buffer.combine_with_unpermute(
-            hidden=grad_x,
-            probs=grad_probs,
-            handle=handle,
-            pad_multiple=ctx.pad_multiple,
-            num_dispatched_tokens=ctx.num_dispatched_tokens,
+            hidden=grad_x, probs=grad_probs, handle=handle, pad_multiple=ctx.pad_multiple
         )
         return combined_hidden, None, combined_probs, None, None, None, None, None, None, None
 
 
+@internal_api
 class HybridEPCombine(torch.autograd.Function):
     '''
     Fused combine operation for permute + combine a2a + permute using the HybridEP backend
     '''
 
     @staticmethod
-    def forward(
-        ctx, x, handle, num_dispatched_tokens=None, num_permuted_tokens=None, pad_multiple=None
-    ):
+    def forward(ctx, x, handle, num_permuted_tokens=None, pad_multiple=None):
         '''
         Forward pass of fused combine of the HybridEP backend
         '''
         combined_hidden, _ = _hybrid_ep_buffer.combine_with_unpermute(
-            hidden=x,
-            handle=handle,
-            pad_multiple=pad_multiple,
-            num_dispatched_tokens=num_dispatched_tokens,
+            hidden=x, handle=handle, pad_multiple=pad_multiple
         )
         ctx.handle = handle
         ctx.pad_multiple = pad_multiple
-        ctx.num_dispatched_tokens = num_dispatched_tokens
         ctx.num_permuted_tokens = num_permuted_tokens
         return combined_hidden
 
@@ -448,7 +436,6 @@ def backward(ctx, grad_x):
             scaling_factor=None,
             handle=handle,
             pad_multiple=ctx.pad_multiple,
-            num_dispatched_tokens=ctx.num_dispatched_tokens,
             num_permuted_tokens=ctx.num_permuted_tokens,
         )
         return dispatched_hidden, None, None, None, None
@@ -456,6 +443,7 @@ def backward(ctx, grad_x):
 
 if HAVE_HYBRIDEP:
 
+    @internal_api
     def hybrid_ep_dispatch(
         x,
         routing_map,
@@ -464,7 +452,6 @@ def hybrid_ep_dispatch(
         num_local_experts,
         num_sms_dispatch_api=24,
         num_sms_combine_api=24,
-        num_dispatched_tokens=None,
         num_permuted_tokens=None,
         pad_multiple=None,
     ):
@@ -487,10 +474,6 @@ def hybrid_ep_dispatch(
                 Number of SMs used by the dispatch API.
             num_sms_combine_api (int):
                 Number of SMs used by the combine API.
-            num_dispatched_tokens (int):
-                Number of tokens after dispatch but before permute. HybridEP uses this
-                to allocate buffers. If not provided, HybridEP obtains the size from
-                a GPU tensor, which causes a D2H synchronization.
             num_permuted_tokens (int):
                 Number of tokens after permute. HybridEP uses this to allocate buffers.
                 If not provided, HybridEP obtains the size from a GPU tensor,
@@ -507,12 +490,12 @@ def hybrid_ep_dispatch(
             num_local_experts,
             num_sms_dispatch_api,
             num_sms_combine_api,
-            num_dispatched_tokens,
             num_permuted_tokens,
             pad_multiple,
         )
 
-    def hybrid_ep_combine(x, handle, num_dispatched_tokens, num_permuted_tokens, pad_multiple):
+    @internal_api
+    def hybrid_ep_combine(x, handle, num_permuted_tokens, pad_multiple):
         '''
         Perform fused combine operation for unpermute + combine a2a + unpermute
         using the HybridEP backend
@@ -522,10 +505,6 @@ def hybrid_ep_combine(x, handle, num_dispatched_tokens, num_permuted_tokens, pad
                 Input hidden states to combine
             handle (EventHandle):
                 Communication handle from dispatch operation
-            num_dispatched_tokens (int):
-                The number of tokens after unpermute but before combine. HybridEP uses this
-                to allocate buffers. If not provided, HybridEP obtains the size from a GPU tensor,
-                which causes a D2H synchronization.
             num_permuted_tokens (int): The number of tokens before unpermute. HybridEP uses this
                 to allocate buffers. If not provided, HybridEP obtains the size from a GPU tensor,
                 which causes a D2H synchronization.
@@ -533,9 +512,7 @@ def hybrid_ep_combine(x, handle, num_dispatched_tokens, num_permuted_tokens, pad
                 The alignment multiple required for FP8 GEMM. If not provided, no padding
                 is performed.
         '''
-        return HybridEPCombine.apply(
-            x, handle, num_dispatched_tokens, num_permuted_tokens, pad_multiple
-        )
+        return HybridEPCombine.apply(x, handle, num_permuted_tokens, pad_multiple)
 
 else:
     hybrid_ep_dispatch = None
diff --git a/megatron/core/transformer/moe/token_dispatcher.py b/megatron/core/transformer/moe/token_dispatcher.py
index 61ef0b5f084..d0da38d6322 100644
--- a/megatron/core/transformer/moe/token_dispatcher.py
+++ b/megatron/core/transformer/moe/token_dispatcher.py
@@ -985,11 +985,8 @@ def __init__(
         if self.drop_and_pad:
             assert self.capacity_factor is not None
         self.capacity = None
-        # The up-bound for the number of tokens after dispatch op, -1 means no up-bound,
-        # which will cause a CPU sync
-        self.num_dispatched_tokens = None
-        # Actually the sum of tokens_per_expert, the up-bound for the number of tokens
-        # after permute op, -1 means no up-bound, will cause a CPU sync
+        # Actually the the up-bound for the number of tokens
+        # after permute op, None means no up-bound, will cause a CPU sync
         self.num_permuted_tokens = None
 
         # Metadata
@@ -1018,12 +1015,9 @@ def setup_metadata(self, routing_map: torch.Tensor, probs: torch.Tensor):
                 num_experts=self.num_experts,
                 capacity_factor=self.capacity_factor,
             )
-            # We cannot predict the actual number of tokens after the dispatch op,
-            # so we set it to the worst case in drop_and_pad mode
-            self.num_dispatched_tokens = self.capacity * self.group.size() * self.num_local_experts
             # In drop_and_pad mode, the number of tokens after the permute op
             # can be computed on the CPU
-            self.num_permuted_tokens = self.num_dispatched_tokens
+            self.num_permuted_tokens = self.capacity * self.group.size() * self.num_local_experts
             self.tokens_per_expert = torch.full(
                 (self.num_local_experts,), self.capacity * self.group.size(), dtype=torch.long
             )
@@ -1052,7 +1046,6 @@ def dispatch(
                 num_local_experts=self.num_local_experts,
                 num_sms_dispatch_api=self.config.moe_hybridep_num_sms,
                 num_sms_combine_api=self.config.moe_hybridep_num_sms,
-                num_dispatched_tokens=self.num_dispatched_tokens,
                 num_permuted_tokens=self.num_permuted_tokens,
                 pad_multiple=self.pad_multiple,
             )
@@ -1074,7 +1067,6 @@ def combine(
         hidden_states = hybrid_ep_combine(
             x=hidden_states,
             handle=self.handle,
-            num_dispatched_tokens=self.num_dispatched_tokens,
             num_permuted_tokens=self.num_permuted_tokens,
             pad_multiple=self.pad_multiple,
         )
@@ -1084,7 +1076,6 @@ def combine(
         self.handle = None
         if not self.drop_and_pad:
             self.num_permuted_tokens = None
-            self.num_dispatched_tokens = None
         return hidden_states
 
     def get_permuted_hidden_states_by_experts(self, hidden_states: torch.Tensor) -> torch.Tensor:

From 9d741cf674fd29fca38988e54ae2f36505a7cc6d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Tue, 13 Jan 2026 00:12:33 +0100
Subject: [PATCH 223/334] build: Main dependency bump for 26.02 (#2682)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 .github/actions/action.yml                    |    1 +
 .gitlab/stages/01.build.yml                   |    8 +-
 docker/.ngc_version.dev                       |    2 +-
 pyproject.toml                                |    5 +-
 .../launch_nemo_run_workload.py               |    1 +
 uv.lock                                       | 1228 ++++++++---------
 6 files changed, 604 insertions(+), 641 deletions(-)

diff --git a/.github/actions/action.yml b/.github/actions/action.yml
index 5c35385b036..a17b4a9a8c1 100644
--- a/.github/actions/action.yml
+++ b/.github/actions/action.yml
@@ -77,6 +77,7 @@ runs:
 
         export PYTHONPATH=$(pwd)
         export NEMORUN_HOME=$(pwd)
+        export NCCL_DEBUG=INFO
         pip install --no-cache-dir uv
         uv sync --only-group test
         uv run python tests/test_utils/python_scripts/launch_nemo_run_workload.py \
diff --git a/.gitlab/stages/01.build.yml b/.gitlab/stages/01.build.yml
index d67225311f6..b3ab8cc5bd5 100644
--- a/.gitlab/stages/01.build.yml
+++ b/.gitlab/stages/01.build.yml
@@ -16,13 +16,13 @@
   services:
     - name: docker:24.0.5-dind
       variables:
-        HEALTHCHECK_TCP_PORT: "2376"
+        HEALTHCHECK_TCP_PORT: '2376'
   timeout: 180m
   variables:
     DOCKER_HOST: tcp://docker:2376
-    DOCKER_TLS_CERTDIR: "/certs"
+    DOCKER_TLS_CERTDIR: '/certs'
     DOCKER_TLS_VERIFY: 1
-    DOCKER_CERT_PATH: "$DOCKER_TLS_CERTDIR/client"
+    DOCKER_CERT_PATH: '$DOCKER_TLS_CERTDIR/client'
     TAG: purpose/builder-large
     STAGE: jet
     MCORE_BACKWARDS_REF: core_r0.14.0
@@ -59,7 +59,7 @@ test:build_image:
       - IMAGE: CI_MCORE_DEV_IMAGE
         FILE: Dockerfile.ci.dev
         IMAGE_TYPE: dev
-        BASE_IMAGE: nvcr.io/nvidia/pytorch:25.09-py3
+        BASE_IMAGE: nvcr.io/nvidia/pytorch:25.11-py3
       - IMAGE: UTILITY_IMAGE
         FILE: Dockerfile.linting
         BASE_IMAGE: python:3.10
diff --git a/docker/.ngc_version.dev b/docker/.ngc_version.dev
index 6b72812b34f..8e8108b9a9a 100644
--- a/docker/.ngc_version.dev
+++ b/docker/.ngc_version.dev
@@ -1 +1 @@
-nvcr.io/nvidia/pytorch:25.09-py3
\ No newline at end of file
+nvcr.io/nvidia/pytorch:25.11-py3
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index 120db5b2ad7..22ee405cb4f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -68,7 +68,7 @@ mlm = ["flask-restful", "sentencepiece", "tiktoken", "wandb", "transformers"]
 
 dev = [
     "nvidia-modelopt[torch]; sys_platform != 'darwin'",
-    "transformer-engine[pytorch,core_cu13]>=2.9.0a0,<2.11.0",
+    "transformer-engine[pytorch,core_cu13]>=2.9.0a0,<2.12.0",
     "nvidia-resiliency-ext",
     "tqdm",
     "einops~=0.8",
@@ -174,10 +174,11 @@ override-dependencies = [
 ]
 
 [tool.uv.sources]
+
 flash_mla = [
     { git = "https://github.com/deepseek-ai/FlashMLA", rev = "9edee0c022cd0938148a18e334203b0aab43aa19" },
 ]
-# transformer-engine = { git = "https://github.com/NVIDIA/TransformerEngine.git", rev = "release_v2.10" } # on `release_v2.10`
+transformer-engine = { git = "https://github.com/NVIDIA/TransformerEngine.git", rev = "release_v2.11" }
 nemo-run = { git = "https://github.com/NVIDIA-NeMo/Run.git", rev = "01a9a8ba360f7b2908728ad0516e0ad9d936966d" }
 emerging_optimizers = { git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git", rev = "v0.1.0" }
 
diff --git a/tests/test_utils/python_scripts/launch_nemo_run_workload.py b/tests/test_utils/python_scripts/launch_nemo_run_workload.py
index 6e2b73e430f..26a7dbd79f5 100644
--- a/tests/test_utils/python_scripts/launch_nemo_run_workload.py
+++ b/tests/test_utils/python_scripts/launch_nemo_run_workload.py
@@ -115,6 +115,7 @@ def main(
             "ENABLE_LIGHTWEIGHT_MODE": str(enable_lightweight_mode).lower(),
             "N_REPEAT": "1",
             "CLUSTER": "dgxh100_dgxc",
+            "NCCL_DEBUG": "INFO",
         },
         packager=run.Packager(),
         volumes=artifacts,
diff --git a/uv.lock b/uv.lock
index b36351849fe..15892827c83 100644
--- a/uv.lock
+++ b/uv.lock
@@ -75,7 +75,7 @@ wheels = [
 
 [[package]]
 name = "aiohttp"
-version = "3.13.2"
+version = "3.13.3"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "aiohappyeyeballs" },
@@ -87,110 +87,110 @@ dependencies = [
     { name = "propcache" },
     { name = "yarl" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/1c/ce/3b83ebba6b3207a7135e5fcaba49706f8a4b6008153b4e30540c982fae26/aiohttp-3.13.2.tar.gz", hash = "sha256:40176a52c186aefef6eb3cad2cdd30cd06e3afbe88fe8ab2af9c0b90f228daca", size = 7837994, upload-time = "2025-10-28T20:59:39.937Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/6d/34/939730e66b716b76046dedfe0842995842fa906ccc4964bba414ff69e429/aiohttp-3.13.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:2372b15a5f62ed37789a6b383ff7344fc5b9f243999b0cd9b629d8bc5f5b4155", size = 736471, upload-time = "2025-10-28T20:55:27.924Z" },
-    { url = "https://files.pythonhosted.org/packages/fd/cf/dcbdf2df7f6ca72b0bb4c0b4509701f2d8942cf54e29ca197389c214c07f/aiohttp-3.13.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e7f8659a48995edee7229522984bd1009c1213929c769c2daa80b40fe49a180c", size = 493985, upload-time = "2025-10-28T20:55:29.456Z" },
-    { url = "https://files.pythonhosted.org/packages/9d/87/71c8867e0a1d0882dcbc94af767784c3cb381c1c4db0943ab4aae4fed65e/aiohttp-3.13.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:939ced4a7add92296b0ad38892ce62b98c619288a081170695c6babe4f50e636", size = 489274, upload-time = "2025-10-28T20:55:31.134Z" },
-    { url = "https://files.pythonhosted.org/packages/38/0f/46c24e8dae237295eaadd113edd56dee96ef6462adf19b88592d44891dc5/aiohttp-3.13.2-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6315fb6977f1d0dd41a107c527fee2ed5ab0550b7d885bc15fee20ccb17891da", size = 1668171, upload-time = "2025-10-28T20:55:36.065Z" },
-    { url = "https://files.pythonhosted.org/packages/eb/c6/4cdfb4440d0e28483681a48f69841fa5e39366347d66ef808cbdadddb20e/aiohttp-3.13.2-cp310-cp310-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:6e7352512f763f760baaed2637055c49134fd1d35b37c2dedfac35bfe5cf8725", size = 1636036, upload-time = "2025-10-28T20:55:37.576Z" },
-    { url = "https://files.pythonhosted.org/packages/84/37/8708cf678628216fb678ab327a4e1711c576d6673998f4f43e86e9ae90dd/aiohttp-3.13.2-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e09a0a06348a2dd73e7213353c90d709502d9786219f69b731f6caa0efeb46f5", size = 1727975, upload-time = "2025-10-28T20:55:39.457Z" },
-    { url = "https://files.pythonhosted.org/packages/e6/2e/3ebfe12fdcb9b5f66e8a0a42dffcd7636844c8a018f261efb2419f68220b/aiohttp-3.13.2-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a09a6d073fb5789456545bdee2474d14395792faa0527887f2f4ec1a486a59d3", size = 1815823, upload-time = "2025-10-28T20:55:40.958Z" },
-    { url = "https://files.pythonhosted.org/packages/a1/4f/ca2ef819488cbb41844c6cf92ca6dd15b9441e6207c58e5ae0e0fc8d70ad/aiohttp-3.13.2-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b59d13c443f8e049d9e94099c7e412e34610f1f49be0f230ec656a10692a5802", size = 1669374, upload-time = "2025-10-28T20:55:42.745Z" },
-    { url = "https://files.pythonhosted.org/packages/f8/fe/1fe2e1179a0d91ce09c99069684aab619bf2ccde9b20bd6ca44f8837203e/aiohttp-3.13.2-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:20db2d67985d71ca033443a1ba2001c4b5693fe09b0e29f6d9358a99d4d62a8a", size = 1555315, upload-time = "2025-10-28T20:55:44.264Z" },
-    { url = "https://files.pythonhosted.org/packages/5a/2b/f3781899b81c45d7cbc7140cddb8a3481c195e7cbff8e36374759d2ab5a5/aiohttp-3.13.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:960c2fc686ba27b535f9fd2b52d87ecd7e4fd1cf877f6a5cba8afb5b4a8bd204", size = 1639140, upload-time = "2025-10-28T20:55:46.626Z" },
-    { url = "https://files.pythonhosted.org/packages/72/27/c37e85cd3ece6f6c772e549bd5a253d0c122557b25855fb274224811e4f2/aiohttp-3.13.2-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:6c00dbcf5f0d88796151e264a8eab23de2997c9303dd7c0bf622e23b24d3ce22", size = 1645496, upload-time = "2025-10-28T20:55:48.933Z" },
-    { url = "https://files.pythonhosted.org/packages/66/20/3af1ab663151bd3780b123e907761cdb86ec2c4e44b2d9b195ebc91fbe37/aiohttp-3.13.2-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:fed38a5edb7945f4d1bcabe2fcd05db4f6ec7e0e82560088b754f7e08d93772d", size = 1697625, upload-time = "2025-10-28T20:55:50.377Z" },
-    { url = "https://files.pythonhosted.org/packages/95/eb/ae5cab15efa365e13d56b31b0d085a62600298bf398a7986f8388f73b598/aiohttp-3.13.2-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:b395bbca716c38bef3c764f187860e88c724b342c26275bc03e906142fc5964f", size = 1542025, upload-time = "2025-10-28T20:55:51.861Z" },
-    { url = "https://files.pythonhosted.org/packages/e9/2d/1683e8d67ec72d911397fe4e575688d2a9b8f6a6e03c8fdc9f3fd3d4c03f/aiohttp-3.13.2-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:204ffff2426c25dfda401ba08da85f9c59525cdc42bda26660463dd1cbcfec6f", size = 1714918, upload-time = "2025-10-28T20:55:53.515Z" },
-    { url = "https://files.pythonhosted.org/packages/99/a2/ffe8e0e1c57c5e542d47ffa1fcf95ef2b3ea573bf7c4d2ee877252431efc/aiohttp-3.13.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:05c4dd3c48fb5f15db31f57eb35374cb0c09afdde532e7fb70a75aede0ed30f6", size = 1656113, upload-time = "2025-10-28T20:55:55.438Z" },
-    { url = "https://files.pythonhosted.org/packages/0d/42/d511aff5c3a2b06c09d7d214f508a4ad8ac7799817f7c3d23e7336b5e896/aiohttp-3.13.2-cp310-cp310-win32.whl", hash = "sha256:e574a7d61cf10351d734bcddabbe15ede0eaa8a02070d85446875dc11189a251", size = 432290, upload-time = "2025-10-28T20:55:56.96Z" },
-    { url = "https://files.pythonhosted.org/packages/8b/ea/1c2eb7098b5bad4532994f2b7a8228d27674035c9b3234fe02c37469ef14/aiohttp-3.13.2-cp310-cp310-win_amd64.whl", hash = "sha256:364f55663085d658b8462a1c3f17b2b84a5c2e1ba858e1b79bff7b2e24ad1514", size = 455075, upload-time = "2025-10-28T20:55:58.373Z" },
-    { url = "https://files.pythonhosted.org/packages/35/74/b321e7d7ca762638cdf8cdeceb39755d9c745aff7a64c8789be96ddf6e96/aiohttp-3.13.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:4647d02df098f6434bafd7f32ad14942f05a9caa06c7016fdcc816f343997dd0", size = 743409, upload-time = "2025-10-28T20:56:00.354Z" },
-    { url = "https://files.pythonhosted.org/packages/99/3d/91524b905ec473beaf35158d17f82ef5a38033e5809fe8742e3657cdbb97/aiohttp-3.13.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e3403f24bcb9c3b29113611c3c16a2a447c3953ecf86b79775e7be06f7ae7ccb", size = 497006, upload-time = "2025-10-28T20:56:01.85Z" },
-    { url = "https://files.pythonhosted.org/packages/eb/d3/7f68bc02a67716fe80f063e19adbd80a642e30682ce74071269e17d2dba1/aiohttp-3.13.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:43dff14e35aba17e3d6d5ba628858fb8cb51e30f44724a2d2f0c75be492c55e9", size = 493195, upload-time = "2025-10-28T20:56:03.314Z" },
-    { url = "https://files.pythonhosted.org/packages/98/31/913f774a4708775433b7375c4f867d58ba58ead833af96c8af3621a0d243/aiohttp-3.13.2-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e2a9ea08e8c58bb17655630198833109227dea914cd20be660f52215f6de5613", size = 1747759, upload-time = "2025-10-28T20:56:04.904Z" },
-    { url = "https://files.pythonhosted.org/packages/e8/63/04efe156f4326f31c7c4a97144f82132c3bb21859b7bb84748d452ccc17c/aiohttp-3.13.2-cp311-cp311-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:53b07472f235eb80e826ad038c9d106c2f653584753f3ddab907c83f49eedead", size = 1704456, upload-time = "2025-10-28T20:56:06.986Z" },
-    { url = "https://files.pythonhosted.org/packages/8e/02/4e16154d8e0a9cf4ae76f692941fd52543bbb148f02f098ca73cab9b1c1b/aiohttp-3.13.2-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e736c93e9c274fce6419af4aac199984d866e55f8a4cec9114671d0ea9688780", size = 1807572, upload-time = "2025-10-28T20:56:08.558Z" },
-    { url = "https://files.pythonhosted.org/packages/34/58/b0583defb38689e7f06798f0285b1ffb3a6fb371f38363ce5fd772112724/aiohttp-3.13.2-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ff5e771f5dcbc81c64898c597a434f7682f2259e0cd666932a913d53d1341d1a", size = 1895954, upload-time = "2025-10-28T20:56:10.545Z" },
-    { url = "https://files.pythonhosted.org/packages/6b/f3/083907ee3437425b4e376aa58b2c915eb1a33703ec0dc30040f7ae3368c6/aiohttp-3.13.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a3b6fb0c207cc661fa0bf8c66d8d9b657331ccc814f4719468af61034b478592", size = 1747092, upload-time = "2025-10-28T20:56:12.118Z" },
-    { url = "https://files.pythonhosted.org/packages/ac/61/98a47319b4e425cc134e05e5f3fc512bf9a04bf65aafd9fdcda5d57ec693/aiohttp-3.13.2-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:97a0895a8e840ab3520e2288db7cace3a1981300d48babeb50e7425609e2e0ab", size = 1606815, upload-time = "2025-10-28T20:56:14.191Z" },
-    { url = "https://files.pythonhosted.org/packages/97/4b/e78b854d82f66bb974189135d31fce265dee0f5344f64dd0d345158a5973/aiohttp-3.13.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:9e8f8afb552297aca127c90cb840e9a1d4bfd6a10d7d8f2d9176e1acc69bad30", size = 1723789, upload-time = "2025-10-28T20:56:16.101Z" },
-    { url = "https://files.pythonhosted.org/packages/ed/fc/9d2ccc794fc9b9acd1379d625c3a8c64a45508b5091c546dea273a41929e/aiohttp-3.13.2-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:ed2f9c7216e53c3df02264f25d824b079cc5914f9e2deba94155190ef648ee40", size = 1718104, upload-time = "2025-10-28T20:56:17.655Z" },
-    { url = "https://files.pythonhosted.org/packages/66/65/34564b8765ea5c7d79d23c9113135d1dd3609173da13084830f1507d56cf/aiohttp-3.13.2-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:99c5280a329d5fa18ef30fd10c793a190d996567667908bef8a7f81f8202b948", size = 1785584, upload-time = "2025-10-28T20:56:19.238Z" },
-    { url = "https://files.pythonhosted.org/packages/30/be/f6a7a426e02fc82781afd62016417b3948e2207426d90a0e478790d1c8a4/aiohttp-3.13.2-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:2ca6ffef405fc9c09a746cb5d019c1672cd7f402542e379afc66b370833170cf", size = 1595126, upload-time = "2025-10-28T20:56:20.836Z" },
-    { url = "https://files.pythonhosted.org/packages/e5/c7/8e22d5d28f94f67d2af496f14a83b3c155d915d1fe53d94b66d425ec5b42/aiohttp-3.13.2-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:47f438b1a28e926c37632bff3c44df7d27c9b57aaf4e34b1def3c07111fdb782", size = 1800665, upload-time = "2025-10-28T20:56:22.922Z" },
-    { url = "https://files.pythonhosted.org/packages/d1/11/91133c8b68b1da9fc16555706aa7276fdf781ae2bb0876c838dd86b8116e/aiohttp-3.13.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:9acda8604a57bb60544e4646a4615c1866ee6c04a8edef9b8ee6fd1d8fa2ddc8", size = 1739532, upload-time = "2025-10-28T20:56:25.924Z" },
-    { url = "https://files.pythonhosted.org/packages/17/6b/3747644d26a998774b21a616016620293ddefa4d63af6286f389aedac844/aiohttp-3.13.2-cp311-cp311-win32.whl", hash = "sha256:868e195e39b24aaa930b063c08bb0c17924899c16c672a28a65afded9c46c6ec", size = 431876, upload-time = "2025-10-28T20:56:27.524Z" },
-    { url = "https://files.pythonhosted.org/packages/c3/63/688462108c1a00eb9f05765331c107f95ae86f6b197b865d29e930b7e462/aiohttp-3.13.2-cp311-cp311-win_amd64.whl", hash = "sha256:7fd19df530c292542636c2a9a85854fab93474396a52f1695e799186bbd7f24c", size = 456205, upload-time = "2025-10-28T20:56:29.062Z" },
-    { url = "https://files.pythonhosted.org/packages/29/9b/01f00e9856d0a73260e86dd8ed0c2234a466c5c1712ce1c281548df39777/aiohttp-3.13.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:b1e56bab2e12b2b9ed300218c351ee2a3d8c8fdab5b1ec6193e11a817767e47b", size = 737623, upload-time = "2025-10-28T20:56:30.797Z" },
-    { url = "https://files.pythonhosted.org/packages/5a/1b/4be39c445e2b2bd0aab4ba736deb649fabf14f6757f405f0c9685019b9e9/aiohttp-3.13.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:364e25edaabd3d37b1db1f0cbcee8c73c9a3727bfa262b83e5e4cf3489a2a9dc", size = 492664, upload-time = "2025-10-28T20:56:32.708Z" },
-    { url = "https://files.pythonhosted.org/packages/28/66/d35dcfea8050e131cdd731dff36434390479b4045a8d0b9d7111b0a968f1/aiohttp-3.13.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c5c94825f744694c4b8db20b71dba9a257cd2ba8e010a803042123f3a25d50d7", size = 491808, upload-time = "2025-10-28T20:56:34.57Z" },
-    { url = "https://files.pythonhosted.org/packages/00/29/8e4609b93e10a853b65f8291e64985de66d4f5848c5637cddc70e98f01f8/aiohttp-3.13.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ba2715d842ffa787be87cbfce150d5e88c87a98e0b62e0f5aa489169a393dbbb", size = 1738863, upload-time = "2025-10-28T20:56:36.377Z" },
-    { url = "https://files.pythonhosted.org/packages/9d/fa/4ebdf4adcc0def75ced1a0d2d227577cd7b1b85beb7edad85fcc87693c75/aiohttp-3.13.2-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:585542825c4bc662221fb257889e011a5aa00f1ae4d75d1d246a5225289183e3", size = 1700586, upload-time = "2025-10-28T20:56:38.034Z" },
-    { url = "https://files.pythonhosted.org/packages/da/04/73f5f02ff348a3558763ff6abe99c223381b0bace05cd4530a0258e52597/aiohttp-3.13.2-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:39d02cb6025fe1aabca329c5632f48c9532a3dabccd859e7e2f110668972331f", size = 1768625, upload-time = "2025-10-28T20:56:39.75Z" },
-    { url = "https://files.pythonhosted.org/packages/f8/49/a825b79ffec124317265ca7d2344a86bcffeb960743487cb11988ffb3494/aiohttp-3.13.2-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:e67446b19e014d37342f7195f592a2a948141d15a312fe0e700c2fd2f03124f6", size = 1867281, upload-time = "2025-10-28T20:56:41.471Z" },
-    { url = "https://files.pythonhosted.org/packages/b9/48/adf56e05f81eac31edcfae45c90928f4ad50ef2e3ea72cb8376162a368f8/aiohttp-3.13.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4356474ad6333e41ccefd39eae869ba15a6c5299c9c01dfdcfdd5c107be4363e", size = 1752431, upload-time = "2025-10-28T20:56:43.162Z" },
-    { url = "https://files.pythonhosted.org/packages/30/ab/593855356eead019a74e862f21523db09c27f12fd24af72dbc3555b9bfd9/aiohttp-3.13.2-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:eeacf451c99b4525f700f078becff32c32ec327b10dcf31306a8a52d78166de7", size = 1562846, upload-time = "2025-10-28T20:56:44.85Z" },
-    { url = "https://files.pythonhosted.org/packages/39/0f/9f3d32271aa8dc35036e9668e31870a9d3b9542dd6b3e2c8a30931cb27ae/aiohttp-3.13.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:d8a9b889aeabd7a4e9af0b7f4ab5ad94d42e7ff679aaec6d0db21e3b639ad58d", size = 1699606, upload-time = "2025-10-28T20:56:46.519Z" },
-    { url = "https://files.pythonhosted.org/packages/2c/3c/52d2658c5699b6ef7692a3f7128b2d2d4d9775f2a68093f74bca06cf01e1/aiohttp-3.13.2-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:fa89cb11bc71a63b69568d5b8a25c3ca25b6d54c15f907ca1c130d72f320b76b", size = 1720663, upload-time = "2025-10-28T20:56:48.528Z" },
-    { url = "https://files.pythonhosted.org/packages/9b/d4/8f8f3ff1fb7fb9e3f04fcad4e89d8a1cd8fc7d05de67e3de5b15b33008ff/aiohttp-3.13.2-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:8aa7c807df234f693fed0ecd507192fc97692e61fee5702cdc11155d2e5cadc8", size = 1737939, upload-time = "2025-10-28T20:56:50.77Z" },
-    { url = "https://files.pythonhosted.org/packages/03/d3/ddd348f8a27a634daae39a1b8e291ff19c77867af438af844bf8b7e3231b/aiohttp-3.13.2-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:9eb3e33fdbe43f88c3c75fa608c25e7c47bbd80f48d012763cb67c47f39a7e16", size = 1555132, upload-time = "2025-10-28T20:56:52.568Z" },
-    { url = "https://files.pythonhosted.org/packages/39/b8/46790692dc46218406f94374903ba47552f2f9f90dad554eed61bfb7b64c/aiohttp-3.13.2-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:9434bc0d80076138ea986833156c5a48c9c7a8abb0c96039ddbb4afc93184169", size = 1764802, upload-time = "2025-10-28T20:56:54.292Z" },
-    { url = "https://files.pythonhosted.org/packages/ba/e4/19ce547b58ab2a385e5f0b8aa3db38674785085abcf79b6e0edd1632b12f/aiohttp-3.13.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ff15c147b2ad66da1f2cbb0622313f2242d8e6e8f9b79b5206c84523a4473248", size = 1719512, upload-time = "2025-10-28T20:56:56.428Z" },
-    { url = "https://files.pythonhosted.org/packages/70/30/6355a737fed29dcb6dfdd48682d5790cb5eab050f7b4e01f49b121d3acad/aiohttp-3.13.2-cp312-cp312-win32.whl", hash = "sha256:27e569eb9d9e95dbd55c0fc3ec3a9335defbf1d8bc1d20171a49f3c4c607b93e", size = 426690, upload-time = "2025-10-28T20:56:58.736Z" },
-    { url = "https://files.pythonhosted.org/packages/0a/0d/b10ac09069973d112de6ef980c1f6bb31cb7dcd0bc363acbdad58f927873/aiohttp-3.13.2-cp312-cp312-win_amd64.whl", hash = "sha256:8709a0f05d59a71f33fd05c17fc11fcb8c30140506e13c2f5e8ee1b8964e1b45", size = 453465, upload-time = "2025-10-28T20:57:00.795Z" },
-    { url = "https://files.pythonhosted.org/packages/bf/78/7e90ca79e5aa39f9694dcfd74f4720782d3c6828113bb1f3197f7e7c4a56/aiohttp-3.13.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:7519bdc7dfc1940d201651b52bf5e03f5503bda45ad6eacf64dda98be5b2b6be", size = 732139, upload-time = "2025-10-28T20:57:02.455Z" },
-    { url = "https://files.pythonhosted.org/packages/db/ed/1f59215ab6853fbaa5c8495fa6cbc39edfc93553426152b75d82a5f32b76/aiohttp-3.13.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:088912a78b4d4f547a1f19c099d5a506df17eacec3c6f4375e2831ec1d995742", size = 490082, upload-time = "2025-10-28T20:57:04.784Z" },
-    { url = "https://files.pythonhosted.org/packages/68/7b/fe0fe0f5e05e13629d893c760465173a15ad0039c0a5b0d0040995c8075e/aiohttp-3.13.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5276807b9de9092af38ed23ce120539ab0ac955547b38563a9ba4f5b07b95293", size = 489035, upload-time = "2025-10-28T20:57:06.894Z" },
-    { url = "https://files.pythonhosted.org/packages/d2/04/db5279e38471b7ac801d7d36a57d1230feeee130bbe2a74f72731b23c2b1/aiohttp-3.13.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1237c1375eaef0db4dcd7c2559f42e8af7b87ea7d295b118c60c36a6e61cb811", size = 1720387, upload-time = "2025-10-28T20:57:08.685Z" },
-    { url = "https://files.pythonhosted.org/packages/31/07/8ea4326bd7dae2bd59828f69d7fdc6e04523caa55e4a70f4a8725a7e4ed2/aiohttp-3.13.2-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:96581619c57419c3d7d78703d5b78c1e5e5fc0172d60f555bdebaced82ded19a", size = 1688314, upload-time = "2025-10-28T20:57:10.693Z" },
-    { url = "https://files.pythonhosted.org/packages/48/ab/3d98007b5b87ffd519d065225438cc3b668b2f245572a8cb53da5dd2b1bc/aiohttp-3.13.2-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a2713a95b47374169409d18103366de1050fe0ea73db358fc7a7acb2880422d4", size = 1756317, upload-time = "2025-10-28T20:57:12.563Z" },
-    { url = "https://files.pythonhosted.org/packages/97/3d/801ca172b3d857fafb7b50c7c03f91b72b867a13abca982ed6b3081774ef/aiohttp-3.13.2-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:228a1cd556b3caca590e9511a89444925da87d35219a49ab5da0c36d2d943a6a", size = 1858539, upload-time = "2025-10-28T20:57:14.623Z" },
-    { url = "https://files.pythonhosted.org/packages/f7/0d/4764669bdf47bd472899b3d3db91fffbe925c8e3038ec591a2fd2ad6a14d/aiohttp-3.13.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ac6cde5fba8d7d8c6ac963dbb0256a9854e9fafff52fbcc58fdf819357892c3e", size = 1739597, upload-time = "2025-10-28T20:57:16.399Z" },
-    { url = "https://files.pythonhosted.org/packages/c4/52/7bd3c6693da58ba16e657eb904a5b6decfc48ecd06e9ac098591653b1566/aiohttp-3.13.2-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f2bef8237544f4e42878c61cef4e2839fee6346dc60f5739f876a9c50be7fcdb", size = 1555006, upload-time = "2025-10-28T20:57:18.288Z" },
-    { url = "https://files.pythonhosted.org/packages/48/30/9586667acec5993b6f41d2ebcf96e97a1255a85f62f3c653110a5de4d346/aiohttp-3.13.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:16f15a4eac3bc2d76c45f7ebdd48a65d41b242eb6c31c2245463b40b34584ded", size = 1683220, upload-time = "2025-10-28T20:57:20.241Z" },
-    { url = "https://files.pythonhosted.org/packages/71/01/3afe4c96854cfd7b30d78333852e8e851dceaec1c40fd00fec90c6402dd2/aiohttp-3.13.2-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:bb7fb776645af5cc58ab804c58d7eba545a97e047254a52ce89c157b5af6cd0b", size = 1712570, upload-time = "2025-10-28T20:57:22.253Z" },
-    { url = "https://files.pythonhosted.org/packages/11/2c/22799d8e720f4697a9e66fd9c02479e40a49de3de2f0bbe7f9f78a987808/aiohttp-3.13.2-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:e1b4951125ec10c70802f2cb09736c895861cd39fd9dcb35107b4dc8ae6220b8", size = 1733407, upload-time = "2025-10-28T20:57:24.37Z" },
-    { url = "https://files.pythonhosted.org/packages/34/cb/90f15dd029f07cebbd91f8238a8b363978b530cd128488085b5703683594/aiohttp-3.13.2-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:550bf765101ae721ee1d37d8095f47b1f220650f85fe1af37a90ce75bab89d04", size = 1550093, upload-time = "2025-10-28T20:57:26.257Z" },
-    { url = "https://files.pythonhosted.org/packages/69/46/12dce9be9d3303ecbf4d30ad45a7683dc63d90733c2d9fe512be6716cd40/aiohttp-3.13.2-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:fe91b87fc295973096251e2d25a811388e7d8adf3bd2b97ef6ae78bc4ac6c476", size = 1758084, upload-time = "2025-10-28T20:57:28.349Z" },
-    { url = "https://files.pythonhosted.org/packages/f9/c8/0932b558da0c302ffd639fc6362a313b98fdf235dc417bc2493da8394df7/aiohttp-3.13.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e0c8e31cfcc4592cb200160344b2fb6ae0f9e4effe06c644b5a125d4ae5ebe23", size = 1716987, upload-time = "2025-10-28T20:57:30.233Z" },
-    { url = "https://files.pythonhosted.org/packages/5d/8b/f5bd1a75003daed099baec373aed678f2e9b34f2ad40d85baa1368556396/aiohttp-3.13.2-cp313-cp313-win32.whl", hash = "sha256:0740f31a60848d6edb296a0df827473eede90c689b8f9f2a4cdde74889eb2254", size = 425859, upload-time = "2025-10-28T20:57:32.105Z" },
-    { url = "https://files.pythonhosted.org/packages/5d/28/a8a9fc6957b2cee8902414e41816b5ab5536ecf43c3b1843c10e82c559b2/aiohttp-3.13.2-cp313-cp313-win_amd64.whl", hash = "sha256:a88d13e7ca367394908f8a276b89d04a3652044612b9a408a0bb22a5ed976a1a", size = 452192, upload-time = "2025-10-28T20:57:34.166Z" },
-    { url = "https://files.pythonhosted.org/packages/9b/36/e2abae1bd815f01c957cbf7be817b3043304e1c87bad526292a0410fdcf9/aiohttp-3.13.2-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:2475391c29230e063ef53a66669b7b691c9bfc3f1426a0f7bcdf1216bdbac38b", size = 735234, upload-time = "2025-10-28T20:57:36.415Z" },
-    { url = "https://files.pythonhosted.org/packages/ca/e3/1ee62dde9b335e4ed41db6bba02613295a0d5b41f74a783c142745a12763/aiohttp-3.13.2-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:f33c8748abef4d8717bb20e8fb1b3e07c6adacb7fd6beaae971a764cf5f30d61", size = 490733, upload-time = "2025-10-28T20:57:38.205Z" },
-    { url = "https://files.pythonhosted.org/packages/1a/aa/7a451b1d6a04e8d15a362af3e9b897de71d86feac3babf8894545d08d537/aiohttp-3.13.2-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:ae32f24bbfb7dbb485a24b30b1149e2f200be94777232aeadba3eecece4d0aa4", size = 491303, upload-time = "2025-10-28T20:57:40.122Z" },
-    { url = "https://files.pythonhosted.org/packages/57/1e/209958dbb9b01174870f6a7538cd1f3f28274fdbc88a750c238e2c456295/aiohttp-3.13.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5d7f02042c1f009ffb70067326ef183a047425bb2ff3bc434ead4dd4a4a66a2b", size = 1717965, upload-time = "2025-10-28T20:57:42.28Z" },
-    { url = "https://files.pythonhosted.org/packages/08/aa/6a01848d6432f241416bc4866cae8dc03f05a5a884d2311280f6a09c73d6/aiohttp-3.13.2-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:93655083005d71cd6c072cdab54c886e6570ad2c4592139c3fb967bfc19e4694", size = 1667221, upload-time = "2025-10-28T20:57:44.869Z" },
-    { url = "https://files.pythonhosted.org/packages/87/4f/36c1992432d31bbc789fa0b93c768d2e9047ec8c7177e5cd84ea85155f36/aiohttp-3.13.2-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:0db1e24b852f5f664cd728db140cf11ea0e82450471232a394b3d1a540b0f906", size = 1757178, upload-time = "2025-10-28T20:57:47.216Z" },
-    { url = "https://files.pythonhosted.org/packages/ac/b4/8e940dfb03b7e0f68a82b88fd182b9be0a65cb3f35612fe38c038c3112cf/aiohttp-3.13.2-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:b009194665bcd128e23eaddef362e745601afa4641930848af4c8559e88f18f9", size = 1838001, upload-time = "2025-10-28T20:57:49.337Z" },
-    { url = "https://files.pythonhosted.org/packages/d7/ef/39f3448795499c440ab66084a9db7d20ca7662e94305f175a80f5b7e0072/aiohttp-3.13.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c038a8fdc8103cd51dbd986ecdce141473ffd9775a7a8057a6ed9c3653478011", size = 1716325, upload-time = "2025-10-28T20:57:51.327Z" },
-    { url = "https://files.pythonhosted.org/packages/d7/51/b311500ffc860b181c05d91c59a1313bdd05c82960fdd4035a15740d431e/aiohttp-3.13.2-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:66bac29b95a00db411cd758fea0e4b9bdba6d549dfe333f9a945430f5f2cc5a6", size = 1547978, upload-time = "2025-10-28T20:57:53.554Z" },
-    { url = "https://files.pythonhosted.org/packages/31/64/b9d733296ef79815226dab8c586ff9e3df41c6aff2e16c06697b2d2e6775/aiohttp-3.13.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:4ebf9cfc9ba24a74cf0718f04aac2a3bbe745902cc7c5ebc55c0f3b5777ef213", size = 1682042, upload-time = "2025-10-28T20:57:55.617Z" },
-    { url = "https://files.pythonhosted.org/packages/3f/30/43d3e0f9d6473a6db7d472104c4eff4417b1e9df01774cb930338806d36b/aiohttp-3.13.2-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:a4b88ebe35ce54205c7074f7302bd08a4cb83256a3e0870c72d6f68a3aaf8e49", size = 1680085, upload-time = "2025-10-28T20:57:57.59Z" },
-    { url = "https://files.pythonhosted.org/packages/16/51/c709f352c911b1864cfd1087577760ced64b3e5bee2aa88b8c0c8e2e4972/aiohttp-3.13.2-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:98c4fb90bb82b70a4ed79ca35f656f4281885be076f3f970ce315402b53099ae", size = 1728238, upload-time = "2025-10-28T20:57:59.525Z" },
-    { url = "https://files.pythonhosted.org/packages/19/e2/19bd4c547092b773caeb48ff5ae4b1ae86756a0ee76c16727fcfd281404b/aiohttp-3.13.2-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:ec7534e63ae0f3759df3a1ed4fa6bc8f75082a924b590619c0dd2f76d7043caa", size = 1544395, upload-time = "2025-10-28T20:58:01.914Z" },
-    { url = "https://files.pythonhosted.org/packages/cf/87/860f2803b27dfc5ed7be532832a3498e4919da61299b4a1f8eb89b8ff44d/aiohttp-3.13.2-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:5b927cf9b935a13e33644cbed6c8c4b2d0f25b713d838743f8fe7191b33829c4", size = 1742965, upload-time = "2025-10-28T20:58:03.972Z" },
-    { url = "https://files.pythonhosted.org/packages/67/7f/db2fc7618925e8c7a601094d5cbe539f732df4fb570740be88ed9e40e99a/aiohttp-3.13.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:88d6c017966a78c5265d996c19cdb79235be5e6412268d7e2ce7dee339471b7a", size = 1697585, upload-time = "2025-10-28T20:58:06.189Z" },
-    { url = "https://files.pythonhosted.org/packages/0c/07/9127916cb09bb38284db5036036042b7b2c514c8ebaeee79da550c43a6d6/aiohttp-3.13.2-cp314-cp314-win32.whl", hash = "sha256:f7c183e786e299b5d6c49fb43a769f8eb8e04a2726a2bd5887b98b5cc2d67940", size = 431621, upload-time = "2025-10-28T20:58:08.636Z" },
-    { url = "https://files.pythonhosted.org/packages/fb/41/554a8a380df6d3a2bba8a7726429a23f4ac62aaf38de43bb6d6cde7b4d4d/aiohttp-3.13.2-cp314-cp314-win_amd64.whl", hash = "sha256:fe242cd381e0fb65758faf5ad96c2e460df6ee5b2de1072fe97e4127927e00b4", size = 457627, upload-time = "2025-10-28T20:58:11Z" },
-    { url = "https://files.pythonhosted.org/packages/c7/8e/3824ef98c039d3951cb65b9205a96dd2b20f22241ee17d89c5701557c826/aiohttp-3.13.2-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:f10d9c0b0188fe85398c61147bbd2a657d616c876863bfeff43376e0e3134673", size = 767360, upload-time = "2025-10-28T20:58:13.358Z" },
-    { url = "https://files.pythonhosted.org/packages/a4/0f/6a03e3fc7595421274fa34122c973bde2d89344f8a881b728fa8c774e4f1/aiohttp-3.13.2-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:e7c952aefdf2460f4ae55c5e9c3e80aa72f706a6317e06020f80e96253b1accd", size = 504616, upload-time = "2025-10-28T20:58:15.339Z" },
-    { url = "https://files.pythonhosted.org/packages/c6/aa/ed341b670f1bc8a6f2c6a718353d13b9546e2cef3544f573c6a1ff0da711/aiohttp-3.13.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c20423ce14771d98353d2e25e83591fa75dfa90a3c1848f3d7c68243b4fbded3", size = 509131, upload-time = "2025-10-28T20:58:17.693Z" },
-    { url = "https://files.pythonhosted.org/packages/7f/f0/c68dac234189dae5c4bbccc0f96ce0cc16b76632cfc3a08fff180045cfa4/aiohttp-3.13.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e96eb1a34396e9430c19d8338d2ec33015e4a87ef2b4449db94c22412e25ccdf", size = 1864168, upload-time = "2025-10-28T20:58:20.113Z" },
-    { url = "https://files.pythonhosted.org/packages/8f/65/75a9a76db8364b5d0e52a0c20eabc5d52297385d9af9c35335b924fafdee/aiohttp-3.13.2-cp314-cp314t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:23fb0783bc1a33640036465019d3bba069942616a6a2353c6907d7fe1ccdaf4e", size = 1719200, upload-time = "2025-10-28T20:58:22.583Z" },
-    { url = "https://files.pythonhosted.org/packages/f5/55/8df2ed78d7f41d232f6bd3ff866b6f617026551aa1d07e2f03458f964575/aiohttp-3.13.2-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2e1a9bea6244a1d05a4e57c295d69e159a5c50d8ef16aa390948ee873478d9a5", size = 1843497, upload-time = "2025-10-28T20:58:24.672Z" },
-    { url = "https://files.pythonhosted.org/packages/e9/e0/94d7215e405c5a02ccb6a35c7a3a6cfff242f457a00196496935f700cde5/aiohttp-3.13.2-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:0a3d54e822688b56e9f6b5816fb3de3a3a64660efac64e4c2dc435230ad23bad", size = 1935703, upload-time = "2025-10-28T20:58:26.758Z" },
-    { url = "https://files.pythonhosted.org/packages/0b/78/1eeb63c3f9b2d1015a4c02788fb543141aad0a03ae3f7a7b669b2483f8d4/aiohttp-3.13.2-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7a653d872afe9f33497215745da7a943d1dc15b728a9c8da1c3ac423af35178e", size = 1792738, upload-time = "2025-10-28T20:58:29.787Z" },
-    { url = "https://files.pythonhosted.org/packages/41/75/aaf1eea4c188e51538c04cc568040e3082db263a57086ea74a7d38c39e42/aiohttp-3.13.2-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:56d36e80d2003fa3fc0207fac644216d8532e9504a785ef9a8fd013f84a42c61", size = 1624061, upload-time = "2025-10-28T20:58:32.529Z" },
-    { url = "https://files.pythonhosted.org/packages/9b/c2/3b6034de81fbcc43de8aeb209073a2286dfb50b86e927b4efd81cf848197/aiohttp-3.13.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:78cd586d8331fb8e241c2dd6b2f4061778cc69e150514b39a9e28dd050475661", size = 1789201, upload-time = "2025-10-28T20:58:34.618Z" },
-    { url = "https://files.pythonhosted.org/packages/c9/38/c15dcf6d4d890217dae79d7213988f4e5fe6183d43893a9cf2fe9e84ca8d/aiohttp-3.13.2-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:20b10bbfbff766294fe99987f7bb3b74fdd2f1a2905f2562132641ad434dcf98", size = 1776868, upload-time = "2025-10-28T20:58:38.835Z" },
-    { url = "https://files.pythonhosted.org/packages/04/75/f74fd178ac81adf4f283a74847807ade5150e48feda6aef024403716c30c/aiohttp-3.13.2-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:9ec49dff7e2b3c85cdeaa412e9d438f0ecd71676fde61ec57027dd392f00c693", size = 1790660, upload-time = "2025-10-28T20:58:41.507Z" },
-    { url = "https://files.pythonhosted.org/packages/e7/80/7368bd0d06b16b3aba358c16b919e9c46cf11587dc572091031b0e9e3ef0/aiohttp-3.13.2-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:94f05348c4406450f9d73d38efb41d669ad6cd90c7ee194810d0eefbfa875a7a", size = 1617548, upload-time = "2025-10-28T20:58:43.674Z" },
-    { url = "https://files.pythonhosted.org/packages/7d/4b/a6212790c50483cb3212e507378fbe26b5086d73941e1ec4b56a30439688/aiohttp-3.13.2-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:fa4dcb605c6f82a80c7f95713c2b11c3b8e9893b3ebd2bc9bde93165ed6107be", size = 1817240, upload-time = "2025-10-28T20:58:45.787Z" },
-    { url = "https://files.pythonhosted.org/packages/ff/f7/ba5f0ba4ea8d8f3c32850912944532b933acbf0f3a75546b89269b9b7dde/aiohttp-3.13.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:cf00e5db968c3f67eccd2778574cf64d8b27d95b237770aa32400bd7a1ca4f6c", size = 1762334, upload-time = "2025-10-28T20:58:47.936Z" },
-    { url = "https://files.pythonhosted.org/packages/7e/83/1a5a1856574588b1cad63609ea9ad75b32a8353ac995d830bf5da9357364/aiohttp-3.13.2-cp314-cp314t-win32.whl", hash = "sha256:d23b5fe492b0805a50d3371e8a728a9134d8de5447dce4c885f5587294750734", size = 464685, upload-time = "2025-10-28T20:58:50.642Z" },
-    { url = "https://files.pythonhosted.org/packages/9f/4d/d22668674122c08f4d56972297c51a624e64b3ed1efaa40187607a7cb66e/aiohttp-3.13.2-cp314-cp314t-win_amd64.whl", hash = "sha256:ff0a7b0a82a7ab905cbda74006318d1b12e37c797eb1b0d4eb3e316cf47f658f", size = 498093, upload-time = "2025-10-28T20:58:52.782Z" },
+sdist = { url = "https://files.pythonhosted.org/packages/50/42/32cf8e7704ceb4481406eb87161349abb46a57fee3f008ba9cb610968646/aiohttp-3.13.3.tar.gz", hash = "sha256:a949eee43d3782f2daae4f4a2819b2cb9b0c5d3b7f7a927067cc84dafdbb9f88", size = 7844556, upload-time = "2026-01-03T17:33:05.204Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/36/d6/5aec9313ee6ea9c7cde8b891b69f4ff4001416867104580670a31daeba5b/aiohttp-3.13.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:d5a372fd5afd301b3a89582817fdcdb6c34124787c70dbcc616f259013e7eef7", size = 738950, upload-time = "2026-01-03T17:29:13.002Z" },
+    { url = "https://files.pythonhosted.org/packages/68/03/8fa90a7e6d11ff20a18837a8e2b5dd23db01aabc475aa9271c8ad33299f5/aiohttp-3.13.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:147e422fd1223005c22b4fe080f5d93ced44460f5f9c105406b753612b587821", size = 496099, upload-time = "2026-01-03T17:29:15.268Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/23/b81f744d402510a8366b74eb420fc0cc1170d0c43daca12d10814df85f10/aiohttp-3.13.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:859bd3f2156e81dd01432f5849fc73e2243d4a487c4fd26609b1299534ee1845", size = 491072, upload-time = "2026-01-03T17:29:16.922Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/e1/56d1d1c0dd334cd203dd97706ce004c1aa24b34a813b0b8daf3383039706/aiohttp-3.13.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:dca68018bf48c251ba17c72ed479f4dafe9dbd5a73707ad8d28a38d11f3d42af", size = 1671588, upload-time = "2026-01-03T17:29:18.539Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/34/8d7f962604f4bc2b4e39eb1220dac7d4e4cba91fb9ba0474b4ecd67db165/aiohttp-3.13.3-cp310-cp310-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:fee0c6bc7db1de362252affec009707a17478a00ec69f797d23ca256e36d5940", size = 1640334, upload-time = "2026-01-03T17:29:21.028Z" },
+    { url = "https://files.pythonhosted.org/packages/94/1d/fcccf2c668d87337ddeef9881537baee13c58d8f01f12ba8a24215f2b804/aiohttp-3.13.3-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c048058117fd649334d81b4b526e94bde3ccaddb20463a815ced6ecbb7d11160", size = 1722656, upload-time = "2026-01-03T17:29:22.531Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/98/c6f3b081c4c606bc1e5f2ec102e87d6411c73a9ef3616fea6f2d5c98c062/aiohttp-3.13.3-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:215a685b6fbbfcf71dfe96e3eba7a6f58f10da1dfdf4889c7dd856abe430dca7", size = 1817625, upload-time = "2026-01-03T17:29:24.276Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/c0/cfcc3d2e11b477f86e1af2863f3858c8850d751ce8dc39c4058a072c9e54/aiohttp-3.13.3-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:de2c184bb1fe2cbd2cefba613e9db29a5ab559323f994b6737e370d3da0ac455", size = 1672604, upload-time = "2026-01-03T17:29:26.099Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/77/6b4ffcbcac4c6a5d041343a756f34a6dd26174ae07f977a64fe028dda5b0/aiohttp-3.13.3-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:75ca857eba4e20ce9f546cd59c7007b33906a4cd48f2ff6ccf1ccfc3b646f279", size = 1554370, upload-time = "2026-01-03T17:29:28.121Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/f0/e3ddfa93f17d689dbe014ba048f18e0c9f9b456033b70e94349a2e9048be/aiohttp-3.13.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:81e97251d9298386c2b7dbeb490d3d1badbdc69107fb8c9299dd04eb39bddc0e", size = 1642023, upload-time = "2026-01-03T17:29:30.002Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/45/c14019c9ec60a8e243d06d601b33dcc4fd92379424bde3021725859d7f99/aiohttp-3.13.3-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:c0e2d366af265797506f0283487223146af57815b388623f0357ef7eac9b209d", size = 1649680, upload-time = "2026-01-03T17:29:31.782Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/fd/09c9451dae5aa5c5ed756df95ff9ef549d45d4be663bafd1e4954fd836f0/aiohttp-3.13.3-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:4e239d501f73d6db1522599e14b9b321a7e3b1de66ce33d53a765d975e9f4808", size = 1692407, upload-time = "2026-01-03T17:29:33.392Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/81/938bc2ec33c10efd6637ccb3d22f9f3160d08e8f3aa2587a2c2d5ab578eb/aiohttp-3.13.3-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:0db318f7a6f065d84cb1e02662c526294450b314a02bd9e2a8e67f0d8564ce40", size = 1543047, upload-time = "2026-01-03T17:29:34.855Z" },
+    { url = "https://files.pythonhosted.org/packages/f7/23/80488ee21c8d567c83045e412e1d9b7077d27171591a4eb7822586e8c06a/aiohttp-3.13.3-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:bfc1cc2fe31a6026a8a88e4ecfb98d7f6b1fec150cfd708adbfd1d2f42257c29", size = 1715264, upload-time = "2026-01-03T17:29:36.389Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/83/259a8da6683182768200b368120ab3deff5370bed93880fb9a3a86299f34/aiohttp-3.13.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:af71fff7bac6bb7508956696dce8f6eec2bbb045eceb40343944b1ae62b5ef11", size = 1657275, upload-time = "2026-01-03T17:29:38.162Z" },
+    { url = "https://files.pythonhosted.org/packages/3f/4f/2c41f800a0b560785c10fb316216ac058c105f9be50bdc6a285de88db625/aiohttp-3.13.3-cp310-cp310-win32.whl", hash = "sha256:37da61e244d1749798c151421602884db5270faf479cf0ef03af0ff68954c9dd", size = 434053, upload-time = "2026-01-03T17:29:40.074Z" },
+    { url = "https://files.pythonhosted.org/packages/80/df/29cd63c7ecfdb65ccc12f7d808cac4fa2a19544660c06c61a4a48462de0c/aiohttp-3.13.3-cp310-cp310-win_amd64.whl", hash = "sha256:7e63f210bc1b57ef699035f2b4b6d9ce096b5914414a49b0997c839b2bd2223c", size = 456687, upload-time = "2026-01-03T17:29:41.819Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/4c/a164164834f03924d9a29dc3acd9e7ee58f95857e0b467f6d04298594ebb/aiohttp-3.13.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:5b6073099fb654e0a068ae678b10feff95c5cae95bbfcbfa7af669d361a8aa6b", size = 746051, upload-time = "2026-01-03T17:29:43.287Z" },
+    { url = "https://files.pythonhosted.org/packages/82/71/d5c31390d18d4f58115037c432b7e0348c60f6f53b727cad33172144a112/aiohttp-3.13.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1cb93e166e6c28716c8c6aeb5f99dfb6d5ccf482d29fe9bf9a794110e6d0ab64", size = 499234, upload-time = "2026-01-03T17:29:44.822Z" },
+    { url = "https://files.pythonhosted.org/packages/0e/c9/741f8ac91e14b1d2e7100690425a5b2b919a87a5075406582991fb7de920/aiohttp-3.13.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:28e027cf2f6b641693a09f631759b4d9ce9165099d2b5d92af9bd4e197690eea", size = 494979, upload-time = "2026-01-03T17:29:46.405Z" },
+    { url = "https://files.pythonhosted.org/packages/75/b5/31d4d2e802dfd59f74ed47eba48869c1c21552c586d5e81a9d0d5c2ad640/aiohttp-3.13.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3b61b7169ababd7802f9568ed96142616a9118dd2be0d1866e920e77ec8fa92a", size = 1748297, upload-time = "2026-01-03T17:29:48.083Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/3e/eefad0ad42959f226bb79664826883f2687d602a9ae2941a18e0484a74d3/aiohttp-3.13.3-cp311-cp311-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:80dd4c21b0f6237676449c6baaa1039abae86b91636b6c91a7f8e61c87f89540", size = 1707172, upload-time = "2026-01-03T17:29:49.648Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/3a/54a64299fac2891c346cdcf2aa6803f994a2e4beeaf2e5a09dcc54acc842/aiohttp-3.13.3-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:65d2ccb7eabee90ce0503c17716fc77226be026dcc3e65cce859a30db715025b", size = 1805405, upload-time = "2026-01-03T17:29:51.244Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/70/ddc1b7169cf64075e864f64595a14b147a895a868394a48f6a8031979038/aiohttp-3.13.3-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5b179331a481cb5529fca8b432d8d3c7001cb217513c94cd72d668d1248688a3", size = 1899449, upload-time = "2026-01-03T17:29:53.938Z" },
+    { url = "https://files.pythonhosted.org/packages/a1/7e/6815aab7d3a56610891c76ef79095677b8b5be6646aaf00f69b221765021/aiohttp-3.13.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9d4c940f02f49483b18b079d1c27ab948721852b281f8b015c058100e9421dd1", size = 1748444, upload-time = "2026-01-03T17:29:55.484Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/f2/073b145c4100da5511f457dc0f7558e99b2987cf72600d42b559db856fbc/aiohttp-3.13.3-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f9444f105664c4ce47a2a7171a2418bce5b7bae45fb610f4e2c36045d85911d3", size = 1606038, upload-time = "2026-01-03T17:29:57.179Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/c1/778d011920cae03ae01424ec202c513dc69243cf2db303965615b81deeea/aiohttp-3.13.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:694976222c711d1d00ba131904beb60534f93966562f64440d0c9d41b8cdb440", size = 1724156, upload-time = "2026-01-03T17:29:58.914Z" },
+    { url = "https://files.pythonhosted.org/packages/0e/cb/3419eabf4ec1e9ec6f242c32b689248365a1cf621891f6f0386632525494/aiohttp-3.13.3-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:f33ed1a2bf1997a36661874b017f5c4b760f41266341af36febaf271d179f6d7", size = 1722340, upload-time = "2026-01-03T17:30:01.962Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/e5/76cf77bdbc435bf233c1f114edad39ed4177ccbfab7c329482b179cff4f4/aiohttp-3.13.3-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:e636b3c5f61da31a92bf0d91da83e58fdfa96f178ba682f11d24f31944cdd28c", size = 1783041, upload-time = "2026-01-03T17:30:03.609Z" },
+    { url = "https://files.pythonhosted.org/packages/9d/d4/dd1ca234c794fd29c057ce8c0566b8ef7fd6a51069de5f06fa84b9a1971c/aiohttp-3.13.3-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:5d2d94f1f5fcbe40838ac51a6ab5704a6f9ea42e72ceda48de5e6b898521da51", size = 1596024, upload-time = "2026-01-03T17:30:05.132Z" },
+    { url = "https://files.pythonhosted.org/packages/55/58/4345b5f26661a6180afa686c473620c30a66afdf120ed3dd545bbc809e85/aiohttp-3.13.3-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:2be0e9ccf23e8a94f6f0650ce06042cefc6ac703d0d7ab6c7a917289f2539ad4", size = 1804590, upload-time = "2026-01-03T17:30:07.135Z" },
+    { url = "https://files.pythonhosted.org/packages/7b/06/05950619af6c2df7e0a431d889ba2813c9f0129cec76f663e547a5ad56f2/aiohttp-3.13.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:9af5e68ee47d6534d36791bbe9b646d2a7c7deb6fc24d7943628edfbb3581f29", size = 1740355, upload-time = "2026-01-03T17:30:09.083Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/80/958f16de79ba0422d7c1e284b2abd0c84bc03394fbe631d0a39ffa10e1eb/aiohttp-3.13.3-cp311-cp311-win32.whl", hash = "sha256:a2212ad43c0833a873d0fb3c63fa1bacedd4cf6af2fee62bf4b739ceec3ab239", size = 433701, upload-time = "2026-01-03T17:30:10.869Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/f2/27cdf04c9851712d6c1b99df6821a6623c3c9e55956d4b1e318c337b5a48/aiohttp-3.13.3-cp311-cp311-win_amd64.whl", hash = "sha256:642f752c3eb117b105acbd87e2c143de710987e09860d674e068c4c2c441034f", size = 457678, upload-time = "2026-01-03T17:30:12.719Z" },
+    { url = "https://files.pythonhosted.org/packages/a0/be/4fc11f202955a69e0db803a12a062b8379c970c7c84f4882b6da17337cc1/aiohttp-3.13.3-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:b903a4dfee7d347e2d87697d0713be59e0b87925be030c9178c5faa58ea58d5c", size = 739732, upload-time = "2026-01-03T17:30:14.23Z" },
+    { url = "https://files.pythonhosted.org/packages/97/2c/621d5b851f94fa0bb7430d6089b3aa970a9d9b75196bc93bb624b0db237a/aiohttp-3.13.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:a45530014d7a1e09f4a55f4f43097ba0fd155089372e105e4bff4ca76cb1b168", size = 494293, upload-time = "2026-01-03T17:30:15.96Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/43/4be01406b78e1be8320bb8316dc9c42dbab553d281c40364e0f862d5661c/aiohttp-3.13.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:27234ef6d85c914f9efeb77ff616dbf4ad2380be0cda40b4db086ffc7ddd1b7d", size = 493533, upload-time = "2026-01-03T17:30:17.431Z" },
+    { url = "https://files.pythonhosted.org/packages/8d/a8/5a35dc56a06a2c90d4742cbf35294396907027f80eea696637945a106f25/aiohttp-3.13.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d32764c6c9aafb7fb55366a224756387cd50bfa720f32b88e0e6fa45b27dcf29", size = 1737839, upload-time = "2026-01-03T17:30:19.422Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/62/4b9eeb331da56530bf2e198a297e5303e1c1ebdceeb00fe9b568a65c5a0c/aiohttp-3.13.3-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:b1a6102b4d3ebc07dad44fbf07b45bb600300f15b552ddf1851b5390202ea2e3", size = 1703932, upload-time = "2026-01-03T17:30:21.756Z" },
+    { url = "https://files.pythonhosted.org/packages/7c/f6/af16887b5d419e6a367095994c0b1332d154f647e7dc2bd50e61876e8e3d/aiohttp-3.13.3-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c014c7ea7fb775dd015b2d3137378b7be0249a448a1612268b5a90c2d81de04d", size = 1771906, upload-time = "2026-01-03T17:30:23.932Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/83/397c634b1bcc24292fa1e0c7822800f9f6569e32934bdeef09dae7992dfb/aiohttp-3.13.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:2b8d8ddba8f95ba17582226f80e2de99c7a7948e66490ef8d947e272a93e9463", size = 1871020, upload-time = "2026-01-03T17:30:26Z" },
+    { url = "https://files.pythonhosted.org/packages/86/f6/a62cbbf13f0ac80a70f71b1672feba90fdb21fd7abd8dbf25c0105fb6fa3/aiohttp-3.13.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9ae8dd55c8e6c4257eae3a20fd2c8f41edaea5992ed67156642493b8daf3cecc", size = 1755181, upload-time = "2026-01-03T17:30:27.554Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/87/20a35ad487efdd3fba93d5843efdfaa62d2f1479eaafa7453398a44faf13/aiohttp-3.13.3-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:01ad2529d4b5035578f5081606a465f3b814c542882804e2e8cda61adf5c71bf", size = 1561794, upload-time = "2026-01-03T17:30:29.254Z" },
+    { url = "https://files.pythonhosted.org/packages/de/95/8fd69a66682012f6716e1bc09ef8a1a2a91922c5725cb904689f112309c4/aiohttp-3.13.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:bb4f7475e359992b580559e008c598091c45b5088f28614e855e42d39c2f1033", size = 1697900, upload-time = "2026-01-03T17:30:31.033Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/66/7b94b3b5ba70e955ff597672dad1691333080e37f50280178967aff68657/aiohttp-3.13.3-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:c19b90316ad3b24c69cd78d5c9b4f3aa4497643685901185b65166293d36a00f", size = 1728239, upload-time = "2026-01-03T17:30:32.703Z" },
+    { url = "https://files.pythonhosted.org/packages/47/71/6f72f77f9f7d74719692ab65a2a0252584bf8d5f301e2ecb4c0da734530a/aiohttp-3.13.3-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:96d604498a7c782cb15a51c406acaea70d8c027ee6b90c569baa6e7b93073679", size = 1740527, upload-time = "2026-01-03T17:30:34.695Z" },
+    { url = "https://files.pythonhosted.org/packages/fa/b4/75ec16cbbd5c01bdaf4a05b19e103e78d7ce1ef7c80867eb0ace42ff4488/aiohttp-3.13.3-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:084911a532763e9d3dd95adf78a78f4096cd5f58cdc18e6fdbc1b58417a45423", size = 1554489, upload-time = "2026-01-03T17:30:36.864Z" },
+    { url = "https://files.pythonhosted.org/packages/52/8f/bc518c0eea29f8406dcf7ed1f96c9b48e3bc3995a96159b3fc11f9e08321/aiohttp-3.13.3-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:7a4a94eb787e606d0a09404b9c38c113d3b099d508021faa615d70a0131907ce", size = 1767852, upload-time = "2026-01-03T17:30:39.433Z" },
+    { url = "https://files.pythonhosted.org/packages/9d/f2/a07a75173124f31f11ea6f863dc44e6f09afe2bca45dd4e64979490deab1/aiohttp-3.13.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:87797e645d9d8e222e04160ee32aa06bc5c163e8499f24db719e7852ec23093a", size = 1722379, upload-time = "2026-01-03T17:30:41.081Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/4a/1a3fee7c21350cac78e5c5cef711bac1b94feca07399f3d406972e2d8fcd/aiohttp-3.13.3-cp312-cp312-win32.whl", hash = "sha256:b04be762396457bef43f3597c991e192ee7da460a4953d7e647ee4b1c28e7046", size = 428253, upload-time = "2026-01-03T17:30:42.644Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/b7/76175c7cb4eb73d91ad63c34e29fc4f77c9386bba4a65b53ba8e05ee3c39/aiohttp-3.13.3-cp312-cp312-win_amd64.whl", hash = "sha256:e3531d63d3bdfa7e3ac5e9b27b2dd7ec9df3206a98e0b3445fa906f233264c57", size = 455407, upload-time = "2026-01-03T17:30:44.195Z" },
+    { url = "https://files.pythonhosted.org/packages/97/8a/12ca489246ca1faaf5432844adbfce7ff2cc4997733e0af120869345643a/aiohttp-3.13.3-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:5dff64413671b0d3e7d5918ea490bdccb97a4ad29b3f311ed423200b2203e01c", size = 734190, upload-time = "2026-01-03T17:30:45.832Z" },
+    { url = "https://files.pythonhosted.org/packages/32/08/de43984c74ed1fca5c014808963cc83cb00d7bb06af228f132d33862ca76/aiohttp-3.13.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:87b9aab6d6ed88235aa2970294f496ff1a1f9adcd724d800e9b952395a80ffd9", size = 491783, upload-time = "2026-01-03T17:30:47.466Z" },
+    { url = "https://files.pythonhosted.org/packages/17/f8/8dd2cf6112a5a76f81f81a5130c57ca829d101ad583ce57f889179accdda/aiohttp-3.13.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:425c126c0dc43861e22cb1c14ba4c8e45d09516d0a3ae0a3f7494b79f5f233a3", size = 490704, upload-time = "2026-01-03T17:30:49.373Z" },
+    { url = "https://files.pythonhosted.org/packages/6d/40/a46b03ca03936f832bc7eaa47cfbb1ad012ba1be4790122ee4f4f8cba074/aiohttp-3.13.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7f9120f7093c2a32d9647abcaf21e6ad275b4fbec5b55969f978b1a97c7c86bf", size = 1720652, upload-time = "2026-01-03T17:30:50.974Z" },
+    { url = "https://files.pythonhosted.org/packages/f7/7e/917fe18e3607af92657e4285498f500dca797ff8c918bd7d90b05abf6c2a/aiohttp-3.13.3-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:697753042d57f4bf7122cab985bf15d0cef23c770864580f5af4f52023a56bd6", size = 1692014, upload-time = "2026-01-03T17:30:52.729Z" },
+    { url = "https://files.pythonhosted.org/packages/71/b6/cefa4cbc00d315d68973b671cf105b21a609c12b82d52e5d0c9ae61d2a09/aiohttp-3.13.3-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:6de499a1a44e7de70735d0b39f67c8f25eb3d91eb3103be99ca0fa882cdd987d", size = 1759777, upload-time = "2026-01-03T17:30:54.537Z" },
+    { url = "https://files.pythonhosted.org/packages/fb/e3/e06ee07b45e59e6d81498b591fc589629be1553abb2a82ce33efe2a7b068/aiohttp-3.13.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:37239e9f9a7ea9ac5bf6b92b0260b01f8a22281996da609206a84df860bc1261", size = 1861276, upload-time = "2026-01-03T17:30:56.512Z" },
+    { url = "https://files.pythonhosted.org/packages/7c/24/75d274228acf35ceeb2850b8ce04de9dd7355ff7a0b49d607ee60c29c518/aiohttp-3.13.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f76c1e3fe7d7c8afad7ed193f89a292e1999608170dcc9751a7462a87dfd5bc0", size = 1743131, upload-time = "2026-01-03T17:30:58.256Z" },
+    { url = "https://files.pythonhosted.org/packages/04/98/3d21dde21889b17ca2eea54fdcff21b27b93f45b7bb94ca029c31ab59dc3/aiohttp-3.13.3-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:fc290605db2a917f6e81b0e1e0796469871f5af381ce15c604a3c5c7e51cb730", size = 1556863, upload-time = "2026-01-03T17:31:00.445Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/84/da0c3ab1192eaf64782b03971ab4055b475d0db07b17eff925e8c93b3aa5/aiohttp-3.13.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4021b51936308aeea0367b8f006dc999ca02bc118a0cc78c303f50a2ff6afb91", size = 1682793, upload-time = "2026-01-03T17:31:03.024Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/0f/5802ada182f575afa02cbd0ec5180d7e13a402afb7c2c03a9aa5e5d49060/aiohttp-3.13.3-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:49a03727c1bba9a97d3e93c9f93ca03a57300f484b6e935463099841261195d3", size = 1716676, upload-time = "2026-01-03T17:31:04.842Z" },
+    { url = "https://files.pythonhosted.org/packages/3f/8c/714d53bd8b5a4560667f7bbbb06b20c2382f9c7847d198370ec6526af39c/aiohttp-3.13.3-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:3d9908a48eb7416dc1f4524e69f1d32e5d90e3981e4e37eb0aa1cd18f9cfa2a4", size = 1733217, upload-time = "2026-01-03T17:31:06.868Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/79/e2176f46d2e963facea939f5be2d26368ce543622be6f00a12844d3c991f/aiohttp-3.13.3-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:2712039939ec963c237286113c68dbad80a82a4281543f3abf766d9d73228998", size = 1552303, upload-time = "2026-01-03T17:31:08.958Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/6a/28ed4dea1759916090587d1fe57087b03e6c784a642b85ef48217b0277ae/aiohttp-3.13.3-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:7bfdc049127717581866fa4708791220970ce291c23e28ccf3922c700740fdc0", size = 1763673, upload-time = "2026-01-03T17:31:10.676Z" },
+    { url = "https://files.pythonhosted.org/packages/e8/35/4a3daeb8b9fab49240d21c04d50732313295e4bd813a465d840236dd0ce1/aiohttp-3.13.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8057c98e0c8472d8846b9c79f56766bcc57e3e8ac7bfd510482332366c56c591", size = 1721120, upload-time = "2026-01-03T17:31:12.575Z" },
+    { url = "https://files.pythonhosted.org/packages/bc/9f/d643bb3c5fb99547323e635e251c609fbbc660d983144cfebec529e09264/aiohttp-3.13.3-cp313-cp313-win32.whl", hash = "sha256:1449ceddcdbcf2e0446957863af03ebaaa03f94c090f945411b61269e2cb5daf", size = 427383, upload-time = "2026-01-03T17:31:14.382Z" },
+    { url = "https://files.pythonhosted.org/packages/4e/f1/ab0395f8a79933577cdd996dd2f9aa6014af9535f65dddcf88204682fe62/aiohttp-3.13.3-cp313-cp313-win_amd64.whl", hash = "sha256:693781c45a4033d31d4187d2436f5ac701e7bbfe5df40d917736108c1cc7436e", size = 453899, upload-time = "2026-01-03T17:31:15.958Z" },
+    { url = "https://files.pythonhosted.org/packages/99/36/5b6514a9f5d66f4e2597e40dea2e3db271e023eb7a5d22defe96ba560996/aiohttp-3.13.3-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:ea37047c6b367fd4bd632bff8077449b8fa034b69e812a18e0132a00fae6e808", size = 737238, upload-time = "2026-01-03T17:31:17.909Z" },
+    { url = "https://files.pythonhosted.org/packages/f7/49/459327f0d5bcd8c6c9ca69e60fdeebc3622861e696490d8674a6d0cb90a6/aiohttp-3.13.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:6fc0e2337d1a4c3e6acafda6a78a39d4c14caea625124817420abceed36e2415", size = 492292, upload-time = "2026-01-03T17:31:19.919Z" },
+    { url = "https://files.pythonhosted.org/packages/e8/0b/b97660c5fd05d3495b4eb27f2d0ef18dc1dc4eff7511a9bf371397ff0264/aiohttp-3.13.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c685f2d80bb67ca8c3837823ad76196b3694b0159d232206d1e461d3d434666f", size = 493021, upload-time = "2026-01-03T17:31:21.636Z" },
+    { url = "https://files.pythonhosted.org/packages/54/d4/438efabdf74e30aeceb890c3290bbaa449780583b1270b00661126b8aae4/aiohttp-3.13.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:48e377758516d262bde50c2584fc6c578af272559c409eecbdd2bae1601184d6", size = 1717263, upload-time = "2026-01-03T17:31:23.296Z" },
+    { url = "https://files.pythonhosted.org/packages/71/f2/7bddc7fd612367d1459c5bcf598a9e8f7092d6580d98de0e057eb42697ad/aiohttp-3.13.3-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:34749271508078b261c4abb1767d42b8d0c0cc9449c73a4df494777dc55f0687", size = 1669107, upload-time = "2026-01-03T17:31:25.334Z" },
+    { url = "https://files.pythonhosted.org/packages/00/5a/1aeaecca40e22560f97610a329e0e5efef5e0b5afdf9f857f0d93839ab2e/aiohttp-3.13.3-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:82611aeec80eb144416956ec85b6ca45a64d76429c1ed46ae1b5f86c6e0c9a26", size = 1760196, upload-time = "2026-01-03T17:31:27.394Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/f8/0ff6992bea7bd560fc510ea1c815f87eedd745fe035589c71ce05612a19a/aiohttp-3.13.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:2fff83cfc93f18f215896e3a190e8e5cb413ce01553901aca925176e7568963a", size = 1843591, upload-time = "2026-01-03T17:31:29.238Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/d1/e30e537a15f53485b61f5be525f2157da719819e8377298502aebac45536/aiohttp-3.13.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bbe7d4cecacb439e2e2a8a1a7b935c25b812af7a5fd26503a66dadf428e79ec1", size = 1720277, upload-time = "2026-01-03T17:31:31.053Z" },
+    { url = "https://files.pythonhosted.org/packages/84/45/23f4c451d8192f553d38d838831ebbc156907ea6e05557f39563101b7717/aiohttp-3.13.3-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:b928f30fe49574253644b1ca44b1b8adbd903aa0da4b9054a6c20fc7f4092a25", size = 1548575, upload-time = "2026-01-03T17:31:32.87Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/ed/0a42b127a43712eda7807e7892c083eadfaf8429ca8fb619662a530a3aab/aiohttp-3.13.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7b5e8fe4de30df199155baaf64f2fcd604f4c678ed20910db8e2c66dc4b11603", size = 1679455, upload-time = "2026-01-03T17:31:34.76Z" },
+    { url = "https://files.pythonhosted.org/packages/2e/b5/c05f0c2b4b4fe2c9d55e73b6d3ed4fd6c9dc2684b1d81cbdf77e7fad9adb/aiohttp-3.13.3-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:8542f41a62bcc58fc7f11cf7c90e0ec324ce44950003feb70640fc2a9092c32a", size = 1687417, upload-time = "2026-01-03T17:31:36.699Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/6b/915bc5dad66aef602b9e459b5a973529304d4e89ca86999d9d75d80cbd0b/aiohttp-3.13.3-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:5e1d8c8b8f1d91cd08d8f4a3c2b067bfca6ec043d3ff36de0f3a715feeedf926", size = 1729968, upload-time = "2026-01-03T17:31:38.622Z" },
+    { url = "https://files.pythonhosted.org/packages/11/3b/e84581290a9520024a08640b63d07673057aec5ca548177a82026187ba73/aiohttp-3.13.3-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:90455115e5da1c3c51ab619ac57f877da8fd6d73c05aacd125c5ae9819582aba", size = 1545690, upload-time = "2026-01-03T17:31:40.57Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/04/0c3655a566c43fd647c81b895dfe361b9f9ad6d58c19309d45cff52d6c3b/aiohttp-3.13.3-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:042e9e0bcb5fba81886c8b4fbb9a09d6b8a00245fd8d88e4d989c1f96c74164c", size = 1746390, upload-time = "2026-01-03T17:31:42.857Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/53/71165b26978f719c3419381514c9690bd5980e764a09440a10bb816ea4ab/aiohttp-3.13.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2eb752b102b12a76ca02dff751a801f028b4ffbbc478840b473597fc91a9ed43", size = 1702188, upload-time = "2026-01-03T17:31:44.984Z" },
+    { url = "https://files.pythonhosted.org/packages/29/a7/cbe6c9e8e136314fa1980da388a59d2f35f35395948a08b6747baebb6aa6/aiohttp-3.13.3-cp314-cp314-win32.whl", hash = "sha256:b556c85915d8efaed322bf1bdae9486aa0f3f764195a0fb6ee962e5c71ef5ce1", size = 433126, upload-time = "2026-01-03T17:31:47.463Z" },
+    { url = "https://files.pythonhosted.org/packages/de/56/982704adea7d3b16614fc5936014e9af85c0e34b58f9046655817f04306e/aiohttp-3.13.3-cp314-cp314-win_amd64.whl", hash = "sha256:9bf9f7a65e7aa20dd764151fb3d616c81088f91f8df39c3893a536e279b4b984", size = 459128, upload-time = "2026-01-03T17:31:49.2Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/2a/3c79b638a9c3d4658d345339d22070241ea341ed4e07b5ac60fb0f418003/aiohttp-3.13.3-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:05861afbbec40650d8a07ea324367cb93e9e8cc7762e04dd4405df99fa65159c", size = 769512, upload-time = "2026-01-03T17:31:51.134Z" },
+    { url = "https://files.pythonhosted.org/packages/29/b9/3e5014d46c0ab0db8707e0ac2711ed28c4da0218c358a4e7c17bae0d8722/aiohttp-3.13.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:2fc82186fadc4a8316768d61f3722c230e2c1dcab4200d52d2ebdf2482e47592", size = 506444, upload-time = "2026-01-03T17:31:52.85Z" },
+    { url = "https://files.pythonhosted.org/packages/90/03/c1d4ef9a054e151cd7839cdc497f2638f00b93cbe8043983986630d7a80c/aiohttp-3.13.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:0add0900ff220d1d5c5ebbf99ed88b0c1bbf87aa7e4262300ed1376a6b13414f", size = 510798, upload-time = "2026-01-03T17:31:54.91Z" },
+    { url = "https://files.pythonhosted.org/packages/ea/76/8c1e5abbfe8e127c893fe7ead569148a4d5a799f7cf958d8c09f3eedf097/aiohttp-3.13.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:568f416a4072fbfae453dcf9a99194bbb8bdeab718e08ee13dfa2ba0e4bebf29", size = 1868835, upload-time = "2026-01-03T17:31:56.733Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/ac/984c5a6f74c363b01ff97adc96a3976d9c98940b8969a1881575b279ac5d/aiohttp-3.13.3-cp314-cp314t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:add1da70de90a2569c5e15249ff76a631ccacfe198375eead4aadf3b8dc849dc", size = 1720486, upload-time = "2026-01-03T17:31:58.65Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/9a/b7039c5f099c4eb632138728828b33428585031a1e658d693d41d07d89d1/aiohttp-3.13.3-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:10b47b7ba335d2e9b1239fa571131a87e2d8ec96b333e68b2a305e7a98b0bae2", size = 1847951, upload-time = "2026-01-03T17:32:00.989Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/02/3bec2b9a1ba3c19ff89a43a19324202b8eb187ca1e928d8bdac9bbdddebd/aiohttp-3.13.3-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:3dd4dce1c718e38081c8f35f323209d4c1df7d4db4bab1b5c88a6b4d12b74587", size = 1941001, upload-time = "2026-01-03T17:32:03.122Z" },
+    { url = "https://files.pythonhosted.org/packages/37/df/d879401cedeef27ac4717f6426c8c36c3091c6e9f08a9178cc87549c537f/aiohttp-3.13.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:34bac00a67a812570d4a460447e1e9e06fae622946955f939051e7cc895cfab8", size = 1797246, upload-time = "2026-01-03T17:32:05.255Z" },
+    { url = "https://files.pythonhosted.org/packages/8d/15/be122de1f67e6953add23335c8ece6d314ab67c8bebb3f181063010795a7/aiohttp-3.13.3-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:a19884d2ee70b06d9204b2727a7b9f983d0c684c650254679e716b0b77920632", size = 1627131, upload-time = "2026-01-03T17:32:07.607Z" },
+    { url = "https://files.pythonhosted.org/packages/12/12/70eedcac9134cfa3219ab7af31ea56bc877395b1ac30d65b1bc4b27d0438/aiohttp-3.13.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:5f8ca7f2bb6ba8348a3614c7918cc4bb73268c5ac2a207576b7afea19d3d9f64", size = 1795196, upload-time = "2026-01-03T17:32:09.59Z" },
+    { url = "https://files.pythonhosted.org/packages/32/11/b30e1b1cd1f3054af86ebe60df96989c6a414dd87e27ad16950eee420bea/aiohttp-3.13.3-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:b0d95340658b9d2f11d9697f59b3814a9d3bb4b7a7c20b131df4bcef464037c0", size = 1782841, upload-time = "2026-01-03T17:32:11.445Z" },
+    { url = "https://files.pythonhosted.org/packages/88/0d/d98a9367b38912384a17e287850f5695c528cff0f14f791ce8ee2e4f7796/aiohttp-3.13.3-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:a1e53262fd202e4b40b70c3aff944a8155059beedc8a89bba9dc1f9ef06a1b56", size = 1795193, upload-time = "2026-01-03T17:32:13.705Z" },
+    { url = "https://files.pythonhosted.org/packages/43/a5/a2dfd1f5ff5581632c7f6a30e1744deda03808974f94f6534241ef60c751/aiohttp-3.13.3-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:d60ac9663f44168038586cab2157e122e46bdef09e9368b37f2d82d354c23f72", size = 1621979, upload-time = "2026-01-03T17:32:15.965Z" },
+    { url = "https://files.pythonhosted.org/packages/fa/f0/12973c382ae7c1cccbc4417e129c5bf54c374dfb85af70893646e1f0e749/aiohttp-3.13.3-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:90751b8eed69435bac9ff4e3d2f6b3af1f57e37ecb0fbeee59c0174c9e2d41df", size = 1822193, upload-time = "2026-01-03T17:32:18.219Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/5f/24155e30ba7f8c96918af1350eb0663e2430aad9e001c0489d89cd708ab1/aiohttp-3.13.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:fc353029f176fd2b3ec6cfc71be166aba1936fe5d73dd1992ce289ca6647a9aa", size = 1769801, upload-time = "2026-01-03T17:32:20.25Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/f8/7314031ff5c10e6ece114da79b338ec17eeff3a079e53151f7e9f43c4723/aiohttp-3.13.3-cp314-cp314t-win32.whl", hash = "sha256:2e41b18a58da1e474a057b3d35248d8320029f61d70a37629535b16a0c8f3767", size = 466523, upload-time = "2026-01-03T17:32:22.215Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/63/278a98c715ae467624eafe375542d8ba9b4383a016df8fdefe0ae28382a7/aiohttp-3.13.3-cp314-cp314t-win_amd64.whl", hash = "sha256:44531a36aa2264a1860089ffd4dce7baf875ee5a6079d5fb42e261c704ef7344", size = 499694, upload-time = "2026-01-03T17:32:24.546Z" },
 ]
 
 [[package]]
@@ -274,37 +274,37 @@ wheels = [
 
 [[package]]
 name = "apache-tvm-ffi"
-version = "0.1.6"
+version = "0.1.7"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/45/20/8da071821b2142bdeed757d2859dede4817e0b82a96e9a4d8cfbffd49006/apache_tvm_ffi-0.1.6.tar.gz", hash = "sha256:53088126f7fce11823ddf0fb101e968a90298d79fd68829c0a981f25467a574c", size = 2387987, upload-time = "2025-12-16T19:00:33.523Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/c7/f8/6bc29ca8945a8a0b52997fd1e564c783f5b2578b6125315ed30dd0b1d0e4/apache_tvm_ffi-0.1.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ecda748ad9139593296cde3581223e9ddf1be3feca987adea676708b98f297ac", size = 1806165, upload-time = "2025-12-16T18:59:40.928Z" },
-    { url = "https://files.pythonhosted.org/packages/1c/12/310a9953d6a35c2975e0d585f5bdd936858ec6b5b9daee34dc49dd4e3e2e/apache_tvm_ffi-0.1.6-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d976e347d0e6f6695103ce90cc739c717b3623fb9fd4867ffc395e2fe006f345", size = 1965883, upload-time = "2025-12-16T18:59:42.54Z" },
-    { url = "https://files.pythonhosted.org/packages/9a/e1/37326821f2976167f142d23ded0e80f15ca05408ab49d87a2151ff246c76/apache_tvm_ffi-0.1.6-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e6caf9fdc209c3a6f618a462fc8c0925525246f16912f6333424819f19484c06", size = 2037885, upload-time = "2025-12-16T18:59:43.846Z" },
-    { url = "https://files.pythonhosted.org/packages/28/d2/614d397d69b20ccf86d07f3e02d77e0056415f82e81816905ae1d11cd6e5/apache_tvm_ffi-0.1.6-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d43d8540bc38eb7f5173f8516a7963b2b0a8cdbc3fe315600d856fe2e3ed0f6f", size = 1909586, upload-time = "2025-12-16T18:59:45.111Z" },
-    { url = "https://files.pythonhosted.org/packages/1c/3a/79aac72fbf67aac585757d34a57770d17c0ee34e9e46f668ab62df5c16ce/apache_tvm_ffi-0.1.6-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f08cb6638dd2cd2e9f1cdc5126be676632ecaf09edb1ad6d43f836baa2f02845", size = 2019954, upload-time = "2025-12-16T18:59:46.612Z" },
-    { url = "https://files.pythonhosted.org/packages/73/99/857e1497bfec2e3622ec21ca706b9af6f2ec94bca162d1216855cc617752/apache_tvm_ffi-0.1.6-cp310-cp310-win_amd64.whl", hash = "sha256:017576fc9a638a37cb2fc7024a3b2f9071a54db62545daf166efc8f9c8fda8a3", size = 1777727, upload-time = "2025-12-16T18:59:47.908Z" },
-    { url = "https://files.pythonhosted.org/packages/a6/d1/dc4878dcca3d244918fa815a00c558652209f68a1678280b01cd79cdcc01/apache_tvm_ffi-0.1.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:52e9213b553e729e9bcf9acb2bfa0d7e3000fc4756f86ed375827b1e4b53692f", size = 1807748, upload-time = "2025-12-16T18:59:49.709Z" },
-    { url = "https://files.pythonhosted.org/packages/fb/44/9e33ca98ee36f1ddf81246d8aad64a87728e03590dae71f3a99b8647c853/apache_tvm_ffi-0.1.6-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:9532d721f208e4b9989f0e1b3a2d785c6b26d27d3e2b378b945c60d9c29e86ce", size = 1965166, upload-time = "2025-12-16T18:59:51.239Z" },
-    { url = "https://files.pythonhosted.org/packages/c0/04/f1f580c53271795b6c231e4f9d65b1b263c4288413601abf4e3b175a474e/apache_tvm_ffi-0.1.6-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e93fe06aa0266faec4bd63de82a77af2005dc4b793cc6dd3dcc941eb05d4ba47", size = 2037588, upload-time = "2025-12-16T18:59:52.474Z" },
-    { url = "https://files.pythonhosted.org/packages/56/7c/a0fc4194742766919a4d2664a1845561b81f4488d6088835f1d1c311680a/apache_tvm_ffi-0.1.6-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c1b8ca3e79d4a37266ab9b15c8e265fd9fd7131d351302149cff0a948f37986c", size = 1909384, upload-time = "2025-12-16T18:59:54.931Z" },
-    { url = "https://files.pythonhosted.org/packages/f1/e1/c228f2314ad14bc72dd80c883108b0d84988b655f7afe74b5336e38224e1/apache_tvm_ffi-0.1.6-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4cdcba21a2425a40b72367d0a4299ee268ad1d19d5f4c2b9e55e02dadf4c2465", size = 2020174, upload-time = "2025-12-16T18:59:56.449Z" },
-    { url = "https://files.pythonhosted.org/packages/5e/3a/42edbd6d5cc6eb403981e5ff0e1548a16794687d75d1dbbf04fa187adc62/apache_tvm_ffi-0.1.6-cp311-cp311-win_amd64.whl", hash = "sha256:bc9973e71c54cd77a9e9d3937534f304bc9079edc42df00598778c115380cb1c", size = 1778243, upload-time = "2025-12-16T18:59:58.077Z" },
-    { url = "https://files.pythonhosted.org/packages/1f/de/4ae5dd4d493b1cea755a25d59088895486432c053cff5a3287b75e36ce54/apache_tvm_ffi-0.1.6-cp312-abi3-macosx_11_0_arm64.whl", hash = "sha256:5f4c0678854dbf3bfaa37795465f570d79c68759896b04b3d31774af0a03bcb8", size = 1779381, upload-time = "2025-12-16T18:59:59.593Z" },
-    { url = "https://files.pythonhosted.org/packages/2d/40/2e943cbda764c3266a6966a34e582d3f0ac6046ab6aaa756631df9afd7bf/apache_tvm_ffi-0.1.6-cp312-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:653f1d4c8ffd6bca5300fd1825a81373a5be82f31dc79353d1c476fa31cf377a", size = 1936756, upload-time = "2025-12-16T19:00:00.844Z" },
-    { url = "https://files.pythonhosted.org/packages/a3/91/fc43f155b4d4363e61707655c1f4bee75af1d6dd4a76680f4956dd9846fe/apache_tvm_ffi-0.1.6-cp312-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6a2cdfa90860a80e3cfb2364ce3b66a559fa5748de8d593a203b2e5992d92bc1", size = 2013641, upload-time = "2025-12-16T19:00:02.479Z" },
-    { url = "https://files.pythonhosted.org/packages/14/9b/45208f2a9c70a88fd8e65668c0628f3917625d64668800ff55a2390d7fe0/apache_tvm_ffi-0.1.6-cp312-abi3-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:223ac7ac08b34a6dbabe7085f23939b4aaa70666e72ddad41015659034e095af", size = 1881149, upload-time = "2025-12-16T19:00:03.776Z" },
-    { url = "https://files.pythonhosted.org/packages/7d/c5/e3ba08379127578bb3417605b61e9cd5e513184a6947ec7f3fac93d16355/apache_tvm_ffi-0.1.6-cp312-abi3-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:05cedb3ba7600dc9ae35c17b7325d44ecf02c56c3ba1b62668dca8390da7ec28", size = 1992886, upload-time = "2025-12-16T19:00:05.047Z" },
-    { url = "https://files.pythonhosted.org/packages/d6/7b/4df1e523ae4bcbfbe65a3e7ef3c8810cb76e9ae44fa9b44c9fac152ecc2b/apache_tvm_ffi-0.1.6-cp312-abi3-win_amd64.whl", hash = "sha256:a6c29ba9dbc6273f4534bfc0e8a52a784f264724eb62df62daedc2b349dabe85", size = 1758454, upload-time = "2025-12-16T19:00:06.498Z" },
-    { url = "https://files.pythonhosted.org/packages/65/b5/17d994698417882e3d0f4531390abfeec8eab08de3cf8117e22041a70f67/apache_tvm_ffi-0.1.6-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:23b1a7a7ca409189147d4c517b72676d12538fcbb1631437ad06919107ab91a3", size = 1809885, upload-time = "2025-12-16T19:00:08.028Z" },
-    { url = "https://files.pythonhosted.org/packages/32/d6/32fd7385878ac4c721e23c6e01e7d914147ff175105f5f24696e5316ffb8/apache_tvm_ffi-0.1.6-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:2720594c9d2bc5a50768b80b966ab9ef942e0f7a0aeb91e9fd7fd35703cfd944", size = 1950167, upload-time = "2025-12-16T19:00:09.365Z" },
-    { url = "https://files.pythonhosted.org/packages/4d/ad/2877cc6d4c21d78783452e082b430a0d0cdcacaab6cec162d2542b753f75/apache_tvm_ffi-0.1.6-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d27fbdf7c0f41be14a56a043a55c056548cbc0a76031c4fb3c6157d487afdec", size = 2021788, upload-time = "2025-12-16T19:00:10.681Z" },
-    { url = "https://files.pythonhosted.org/packages/57/3c/8252539e4b03305e0c78508f90441ff5a73070cdac499c40a68fb533716f/apache_tvm_ffi-0.1.6-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c258313a49e246e878391bd2d9469f287bd3089ce53dcb379eee07bb78ad0675", size = 1894013, upload-time = "2025-12-16T19:00:11.963Z" },
-    { url = "https://files.pythonhosted.org/packages/07/e8/199779b4ad83e570dface5c7727f2e4a288d07bec8a7ceec21e51a5e96dc/apache_tvm_ffi-0.1.6-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4378ca283d680fa4af296cc430f6e050746434f487b29724273a56c169af2282", size = 2003016, upload-time = "2025-12-16T19:00:13.569Z" },
-    { url = "https://files.pythonhosted.org/packages/fc/9f/0ffac1066ffb06b4c9645a74e6423ecae25228d26bae4c0a77abd0c032a0/apache_tvm_ffi-0.1.6-cp314-cp314t-win_amd64.whl", hash = "sha256:05fc0bde38884c9973126f9c87f3d296255b46b51fa4051c693d8ee559ba14ed", size = 1818312, upload-time = "2025-12-16T19:00:15.406Z" },
+sdist = { url = "https://files.pythonhosted.org/packages/3d/07/6fbc8fbef1d04bd290f2dcdb3091ae784ac526b62649ec52993a41c65f72/apache_tvm_ffi-0.1.7.tar.gz", hash = "sha256:737cd4a067d6c6c7ad7dd909a0708eb3dc28540299039ea636f8ff5766b122be", size = 2397940, upload-time = "2025-12-28T09:13:25.52Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/3b/00/e6c7e0710344ccfb2a42be68e04dfd1920864c25bab4a7411a48a4809a1a/apache_tvm_ffi-0.1.7-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:cc6334f55ad8b4cb3c084dcdf33720b47665d0ea488c36a1b4f1b99445ae5a12", size = 1816700, upload-time = "2025-12-28T09:12:22.223Z" },
+    { url = "https://files.pythonhosted.org/packages/84/68/82799768095fe83640f0def07eda01891c9d713a9db8770316ca460a6114/apache_tvm_ffi-0.1.7-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f69f1195ad7701b0a024a84914b934487a30d5975a9e5d5044c57eb9f9b0fcf7", size = 1976292, upload-time = "2025-12-28T09:12:24.623Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/ab/0c01ac5c3d545c04d1adf03a154f8167dc5884c0fdcbb519714107426028/apache_tvm_ffi-0.1.7-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7b6444a322279cc33ada0bb2a0482e3433c31028becda106dcb0d48c30fb2de0", size = 2048671, upload-time = "2025-12-28T09:12:26.457Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/e3/449fcdbe7ebd8df4b830399171fb325e7f77b2babe958c6fa6c537281e26/apache_tvm_ffi-0.1.7-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d5e9e668620ba3b78b1c1f393dee67a63850882b0713dba31972c5f854f02860", size = 1920010, upload-time = "2025-12-28T09:12:27.81Z" },
+    { url = "https://files.pythonhosted.org/packages/a2/98/737ffc4576af7d4da97f3c73bf347f69d269497cfe9ac089517af5900919/apache_tvm_ffi-0.1.7-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5f7deaa48cfd720949dd1638dfbd4cc7d5285008c7f3f342887e2bf33cf1f5be", size = 2030727, upload-time = "2025-12-28T09:12:29.38Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/36/8ea373c1758c812a504a856a06fc08d8761df1c0e2515e6867c22168fea7/apache_tvm_ffi-0.1.7-cp310-cp310-win_amd64.whl", hash = "sha256:c1fd70f6e7578eeec5e5d8ed0fb814b12280b724531487ff4d899edddd188d97", size = 1787864, upload-time = "2025-12-28T09:12:31.194Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/e7/33ece51ba1670fa77a1897745720b9c8bdac854acb0e09d45e64340948f4/apache_tvm_ffi-0.1.7-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:20a8847f4609f1fe61015b7547bced99eba38072ed422799fc7bd15371d6d83c", size = 1818328, upload-time = "2025-12-28T09:12:32.784Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/b9/3bb4099a82b4c7198823b67067a3d206ec8a0b32204a559c5cca1bee54bd/apache_tvm_ffi-0.1.7-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f0e010e61d1f220ec4ce3d15053db3f8c8d9c79230ea763343fc5e4acf53ef17", size = 1975412, upload-time = "2025-12-28T09:12:34.737Z" },
+    { url = "https://files.pythonhosted.org/packages/48/53/423788fb9b26460b3d7ceb8588d172dfe7ae4abcc335931fcbf08a859904/apache_tvm_ffi-0.1.7-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:9b05155b4b60ebd3642213d0489b6ef24aff17b268960dbb5f106a39899bb8b1", size = 2047974, upload-time = "2025-12-28T09:12:36.296Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/30/45d4acf7f99e1fc79a8663f2111901b8031e1f9b316860af7acf4859c964/apache_tvm_ffi-0.1.7-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:cceaddc7636060231aca4ada2632814189b1169224b2b451f41984145ef615fc", size = 1919697, upload-time = "2025-12-28T09:12:38.15Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/bb/fa5042076bf6e7daaf9774389f99149c1851434fc0d8e4cb34aa0c4a3810/apache_tvm_ffi-0.1.7-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5769cadc42e70522e2a523f1dfe24f48dbe3bf384e63f95df251f9d572ffcf23", size = 2030760, upload-time = "2025-12-28T09:12:39.813Z" },
+    { url = "https://files.pythonhosted.org/packages/fe/74/fd06e97699e9cbf36d887c5fbbc56b14e896e2652bbe1781ab84cef82a40/apache_tvm_ffi-0.1.7-cp311-cp311-win_amd64.whl", hash = "sha256:b5c7716429ce2beb0a5b00c5a3bdd90b8a5891838afb782491c576ade42ba7c4", size = 1788026, upload-time = "2025-12-28T09:12:42.142Z" },
+    { url = "https://files.pythonhosted.org/packages/26/4e/43a41ac023a5989803952d527dfea6e63da71fe223f6e010d4ec71ca0526/apache_tvm_ffi-0.1.7-cp312-abi3-macosx_11_0_arm64.whl", hash = "sha256:12950ca9f9f4f4436869afe17845a6bfc85cbcd8a15dfa2b16095f7e6f49d06f", size = 1790152, upload-time = "2025-12-28T09:12:43.975Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/d3/05ba0a63baba1e3aec0f6303c4bc567493fb1c070d9f298f929a7703c0fb/apache_tvm_ffi-0.1.7-cp312-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d0e579234ce6fb2899377335a881ecf15d0197d833e2d370c9269ea6ca578f6f", size = 1947362, upload-time = "2025-12-28T09:12:45.921Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/11/b69df7685d75144fd9f57e5155cdf4ff91d6617a9f8b89b1415204863da0/apache_tvm_ffi-0.1.7-cp312-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:258a4aecc16e963def8ba0ab07f585147c7e7f586156b9496bfdf34af229443d", size = 2024240, upload-time = "2025-12-28T09:12:47.337Z" },
+    { url = "https://files.pythonhosted.org/packages/cf/b6/31459f4141ea8621377fecac7c29e1568d494cbf95c5aa1ddf2cbc12a8ff/apache_tvm_ffi-0.1.7-cp312-abi3-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:363701589349e11a945dabce026578203bd83cb8de71af9a066beadd77af085a", size = 1891485, upload-time = "2025-12-28T09:12:49.171Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/4d/d21874eda6e3ea59c5a84aa010b24b84617e3b286ad759ac5eadccb1a88c/apache_tvm_ffi-0.1.7-cp312-abi3-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fbbf87df625930bafbd979c2c510d5bd989e9171098e5bb65320d0e7336d0095", size = 2003196, upload-time = "2025-12-28T09:12:50.891Z" },
+    { url = "https://files.pythonhosted.org/packages/3f/d4/37102d96e359386107f5ce3751c4e2a8c1b8df3d34f65b701810ba59465c/apache_tvm_ffi-0.1.7-cp312-abi3-win_amd64.whl", hash = "sha256:d2fb56f53e33c7ddf7d6d340d44cbc440d205f7dab4bc5ed1ad20c8fc779250f", size = 1768697, upload-time = "2025-12-28T09:12:52.394Z" },
+    { url = "https://files.pythonhosted.org/packages/92/c3/aa4b950032251c24b9db7d725b86d7d683b62d9919f8a32f478c28951dc3/apache_tvm_ffi-0.1.7-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:dc4a02e0252599d0c4eb2d2fa91b7756f0446b3bc42479b05c140e9d336b9b8b", size = 1820520, upload-time = "2025-12-28T09:12:54.29Z" },
+    { url = "https://files.pythonhosted.org/packages/19/70/55ee17b8a340ef8ffc0d6c0587ff5a0c7e7c85a94e6cb202e682838a42c7/apache_tvm_ffi-0.1.7-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:41e50f2c8d98d706923c70ac19fd5f605bf71b8ffa43c0c2e9e1e22c2d60d4e0", size = 1960686, upload-time = "2025-12-28T09:12:56.206Z" },
+    { url = "https://files.pythonhosted.org/packages/b6/0f/ca4f7b4836e1e03386b6e486a0ba88812644723a96965a01e2072f551f2e/apache_tvm_ffi-0.1.7-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:835bd391c6f3388e84e36f0ea2347761992241a3953be6ebb319bf1c2ac855d8", size = 2032237, upload-time = "2025-12-28T09:12:58.113Z" },
+    { url = "https://files.pythonhosted.org/packages/89/b6/35be0035f8ed9e10ae6d9ffb7e91397ba381eb734f85ff852efe56eb3012/apache_tvm_ffi-0.1.7-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d7d8b53e94c2bc28e961934e8291a9763d7868f84f9759cbae462b77ca801e5b", size = 1904414, upload-time = "2025-12-28T09:12:59.624Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/5f/1f57863c2c68389d1453fe147d89da22910a0e4f645a8be29cc8f461850f/apache_tvm_ffi-0.1.7-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e135b70c7be8627661c5ec4a466e17e1aba260ffd7c6bccfe231c9ea975875e7", size = 2013039, upload-time = "2025-12-28T09:13:01.37Z" },
+    { url = "https://files.pythonhosted.org/packages/bb/3f/08d1931c6ebca557051176d400e15c1d7f6cf9096fc02f8c90ac7ee309ac/apache_tvm_ffi-0.1.7-cp314-cp314t-win_amd64.whl", hash = "sha256:408bb2c1fa585260afd556e53d65e2735f201f358202fda2b07d08a6cbfaf91f", size = 1828344, upload-time = "2025-12-28T09:13:03.359Z" },
 ]
 
 [[package]]
@@ -686,11 +686,11 @@ sdist = { url = "https://files.pythonhosted.org/packages/64/cb/104778c728dc3d5ea
 
 [[package]]
 name = "certifi"
-version = "2025.11.12"
+version = "2026.1.4"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/a2/8c/58f469717fa48465e4a50c014a0400602d3c437d7c0c468e17ada824da3a/certifi-2025.11.12.tar.gz", hash = "sha256:d8ab5478f2ecd78af242878415affce761ca6bc54a22a27e026d7c25357c3316", size = 160538, upload-time = "2025-11-12T02:54:51.517Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/e0/2d/a891ca51311197f6ad14a7ef42e2399f36cf2f9bd44752b3dc4eab60fdc5/certifi-2026.1.4.tar.gz", hash = "sha256:ac726dd470482006e014ad384921ed6438c457018f4b3d204aea4281258b2120", size = 154268, upload-time = "2026-01-04T02:42:41.825Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/70/7d/9bc192684cea499815ff478dfcdc13835ddf401365057044fb721ec6bddb/certifi-2025.11.12-py3-none-any.whl", hash = "sha256:97de8790030bbd5c2d96b7ec782fc2f7820ef8dba6db909ccf95449f2d062d4b", size = 159438, upload-time = "2025-11-12T02:54:49.735Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/ad/3cc14f097111b4de0040c83a525973216457bbeeb63739ef1ed275c1c021/certifi-2026.1.4-py3-none-any.whl", hash = "sha256:9943707519e4add1115f44c2bc244f782c0249876bf51b6599fee1ffbedd685c", size = 152900, upload-time = "2026-01-04T02:42:40.15Z" },
 ]
 
 [[package]]
@@ -905,101 +905,101 @@ wheels = [
 
 [[package]]
 name = "coverage"
-version = "7.13.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/b6/45/2c665ca77ec32ad67e25c77daf1cee28ee4558f3bc571cdbaf88a00b9f23/coverage-7.13.0.tar.gz", hash = "sha256:a394aa27f2d7ff9bc04cf703817773a59ad6dfbd577032e690f961d2460ee936", size = 820905, upload-time = "2025-12-08T13:14:38.055Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/db/08/bdd7ccca14096f7eb01412b87ac11e5d16e4cb54b6e328afc9dee8bdaec1/coverage-7.13.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:02d9fb9eccd48f6843c98a37bd6817462f130b86da8660461e8f5e54d4c06070", size = 217979, upload-time = "2025-12-08T13:12:14.505Z" },
-    { url = "https://files.pythonhosted.org/packages/fa/f0/d1302e3416298a28b5663ae1117546a745d9d19fde7e28402b2c5c3e2109/coverage-7.13.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:367449cf07d33dc216c083f2036bb7d976c6e4903ab31be400ad74ad9f85ce98", size = 218496, upload-time = "2025-12-08T13:12:16.237Z" },
-    { url = "https://files.pythonhosted.org/packages/07/26/d36c354c8b2a320819afcea6bffe72839efd004b98d1d166b90801d49d57/coverage-7.13.0-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:cdb3c9f8fef0a954c632f64328a3935988d33a6604ce4bf67ec3e39670f12ae5", size = 245237, upload-time = "2025-12-08T13:12:17.858Z" },
-    { url = "https://files.pythonhosted.org/packages/91/52/be5e85631e0eec547873d8b08dd67a5f6b111ecfe89a86e40b89b0c1c61c/coverage-7.13.0-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:d10fd186aac2316f9bbb46ef91977f9d394ded67050ad6d84d94ed6ea2e8e54e", size = 247061, upload-time = "2025-12-08T13:12:19.132Z" },
-    { url = "https://files.pythonhosted.org/packages/0f/45/a5e8fa0caf05fbd8fa0402470377bff09cc1f026d21c05c71e01295e55ab/coverage-7.13.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7f88ae3e69df2ab62fb0bc5219a597cb890ba5c438190ffa87490b315190bb33", size = 248928, upload-time = "2025-12-08T13:12:20.702Z" },
-    { url = "https://files.pythonhosted.org/packages/f5/42/ffb5069b6fd1b95fae482e02f3fecf380d437dd5a39bae09f16d2e2e7e01/coverage-7.13.0-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:c4be718e51e86f553bcf515305a158a1cd180d23b72f07ae76d6017c3cc5d791", size = 245931, upload-time = "2025-12-08T13:12:22.243Z" },
-    { url = "https://files.pythonhosted.org/packages/95/6e/73e809b882c2858f13e55c0c36e94e09ce07e6165d5644588f9517efe333/coverage-7.13.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:a00d3a393207ae12f7c49bb1c113190883b500f48979abb118d8b72b8c95c032", size = 246968, upload-time = "2025-12-08T13:12:23.52Z" },
-    { url = "https://files.pythonhosted.org/packages/87/08/64ebd9e64b6adb8b4a4662133d706fbaccecab972e0b3ccc23f64e2678ad/coverage-7.13.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:3a7b1cd820e1b6116f92c6128f1188e7afe421c7e1b35fa9836b11444e53ebd9", size = 244972, upload-time = "2025-12-08T13:12:24.781Z" },
-    { url = "https://files.pythonhosted.org/packages/12/97/f4d27c6fe0cb375a5eced4aabcaef22de74766fb80a3d5d2015139e54b22/coverage-7.13.0-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:37eee4e552a65866f15dedd917d5e5f3d59805994260720821e2c1b51ac3248f", size = 245241, upload-time = "2025-12-08T13:12:28.041Z" },
-    { url = "https://files.pythonhosted.org/packages/0c/94/42f8ae7f633bf4c118bf1038d80472f9dade88961a466f290b81250f7ab7/coverage-7.13.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:62d7c4f13102148c78d7353c6052af6d899a7f6df66a32bddcc0c0eb7c5326f8", size = 245847, upload-time = "2025-12-08T13:12:29.337Z" },
-    { url = "https://files.pythonhosted.org/packages/a8/2f/6369ca22b6b6d933f4f4d27765d313d8914cc4cce84f82a16436b1a233db/coverage-7.13.0-cp310-cp310-win32.whl", hash = "sha256:24e4e56304fdb56f96f80eabf840eab043b3afea9348b88be680ec5986780a0f", size = 220573, upload-time = "2025-12-08T13:12:30.905Z" },
-    { url = "https://files.pythonhosted.org/packages/f1/dc/a6a741e519acceaeccc70a7f4cfe5d030efc4b222595f0677e101af6f1f3/coverage-7.13.0-cp310-cp310-win_amd64.whl", hash = "sha256:74c136e4093627cf04b26a35dab8cbfc9b37c647f0502fc313376e11726ba303", size = 221509, upload-time = "2025-12-08T13:12:32.09Z" },
-    { url = "https://files.pythonhosted.org/packages/f1/dc/888bf90d8b1c3d0b4020a40e52b9f80957d75785931ec66c7dfaccc11c7d/coverage-7.13.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0dfa3855031070058add1a59fdfda0192fd3e8f97e7c81de0596c145dea51820", size = 218104, upload-time = "2025-12-08T13:12:33.333Z" },
-    { url = "https://files.pythonhosted.org/packages/8d/ea/069d51372ad9c380214e86717e40d1a743713a2af191cfba30a0911b0a4a/coverage-7.13.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4fdb6f54f38e334db97f72fa0c701e66d8479af0bc3f9bfb5b90f1c30f54500f", size = 218606, upload-time = "2025-12-08T13:12:34.498Z" },
-    { url = "https://files.pythonhosted.org/packages/68/09/77b1c3a66c2aa91141b6c4471af98e5b1ed9b9e6d17255da5eb7992299e3/coverage-7.13.0-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:7e442c013447d1d8d195be62852270b78b6e255b79b8675bad8479641e21fd96", size = 248999, upload-time = "2025-12-08T13:12:36.02Z" },
-    { url = "https://files.pythonhosted.org/packages/0a/32/2e2f96e9d5691eaf1181d9040f850b8b7ce165ea10810fd8e2afa534cef7/coverage-7.13.0-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:1ed5630d946859de835a85e9a43b721123a8a44ec26e2830b296d478c7fd4259", size = 250925, upload-time = "2025-12-08T13:12:37.221Z" },
-    { url = "https://files.pythonhosted.org/packages/7b/45/b88ddac1d7978859b9a39a8a50ab323186148f1d64bc068f86fc77706321/coverage-7.13.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7f15a931a668e58087bc39d05d2b4bf4b14ff2875b49c994bbdb1c2217a8daeb", size = 253032, upload-time = "2025-12-08T13:12:38.763Z" },
-    { url = "https://files.pythonhosted.org/packages/71/cb/e15513f94c69d4820a34b6bf3d2b1f9f8755fa6021be97c7065442d7d653/coverage-7.13.0-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:30a3a201a127ea57f7e14ba43c93c9c4be8b7d17a26e03bb49e6966d019eede9", size = 249134, upload-time = "2025-12-08T13:12:40.382Z" },
-    { url = "https://files.pythonhosted.org/packages/09/61/d960ff7dc9e902af3310ce632a875aaa7860f36d2bc8fc8b37ee7c1b82a5/coverage-7.13.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:7a485ff48fbd231efa32d58f479befce52dcb6bfb2a88bb7bf9a0b89b1bc8030", size = 250731, upload-time = "2025-12-08T13:12:41.992Z" },
-    { url = "https://files.pythonhosted.org/packages/98/34/c7c72821794afc7c7c2da1db8f00c2c98353078aa7fb6b5ff36aac834b52/coverage-7.13.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:22486cdafba4f9e471c816a2a5745337742a617fef68e890d8baf9f3036d7833", size = 248795, upload-time = "2025-12-08T13:12:43.331Z" },
-    { url = "https://files.pythonhosted.org/packages/0a/5b/e0f07107987a43b2def9aa041c614ddb38064cbf294a71ef8c67d43a0cdd/coverage-7.13.0-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:263c3dbccc78e2e331e59e90115941b5f53e85cfcc6b3b2fbff1fd4e3d2c6ea8", size = 248514, upload-time = "2025-12-08T13:12:44.546Z" },
-    { url = "https://files.pythonhosted.org/packages/71/c2/c949c5d3b5e9fc6dd79e1b73cdb86a59ef14f3709b1d72bf7668ae12e000/coverage-7.13.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:e5330fa0cc1f5c3c4c3bb8e101b742025933e7848989370a1d4c8c5e401ea753", size = 249424, upload-time = "2025-12-08T13:12:45.759Z" },
-    { url = "https://files.pythonhosted.org/packages/11/f1/bbc009abd6537cec0dffb2cc08c17a7f03de74c970e6302db4342a6e05af/coverage-7.13.0-cp311-cp311-win32.whl", hash = "sha256:0f4872f5d6c54419c94c25dd6ae1d015deeb337d06e448cd890a1e89a8ee7f3b", size = 220597, upload-time = "2025-12-08T13:12:47.378Z" },
-    { url = "https://files.pythonhosted.org/packages/c4/f6/d9977f2fb51c10fbaed0718ce3d0a8541185290b981f73b1d27276c12d91/coverage-7.13.0-cp311-cp311-win_amd64.whl", hash = "sha256:51a202e0f80f241ccb68e3e26e19ab5b3bf0f813314f2c967642f13ebcf1ddfe", size = 221536, upload-time = "2025-12-08T13:12:48.7Z" },
-    { url = "https://files.pythonhosted.org/packages/be/ad/3fcf43fd96fb43e337a3073dea63ff148dcc5c41ba7a14d4c7d34efb2216/coverage-7.13.0-cp311-cp311-win_arm64.whl", hash = "sha256:d2a9d7f1c11487b1c69367ab3ac2d81b9b3721f097aa409a3191c3e90f8f3dd7", size = 220206, upload-time = "2025-12-08T13:12:50.365Z" },
-    { url = "https://files.pythonhosted.org/packages/9b/f1/2619559f17f31ba00fc40908efd1fbf1d0a5536eb75dc8341e7d660a08de/coverage-7.13.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:0b3d67d31383c4c68e19a88e28fc4c2e29517580f1b0ebec4a069d502ce1e0bf", size = 218274, upload-time = "2025-12-08T13:12:52.095Z" },
-    { url = "https://files.pythonhosted.org/packages/2b/11/30d71ae5d6e949ff93b2a79a2c1b4822e00423116c5c6edfaeef37301396/coverage-7.13.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:581f086833d24a22c89ae0fe2142cfaa1c92c930adf637ddf122d55083fb5a0f", size = 218638, upload-time = "2025-12-08T13:12:53.418Z" },
-    { url = "https://files.pythonhosted.org/packages/79/c2/fce80fc6ded8d77e53207489d6065d0fed75db8951457f9213776615e0f5/coverage-7.13.0-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:0a3a30f0e257df382f5f9534d4ce3d4cf06eafaf5192beb1a7bd066cb10e78fb", size = 250129, upload-time = "2025-12-08T13:12:54.744Z" },
-    { url = "https://files.pythonhosted.org/packages/5b/b6/51b5d1eb6fcbb9a1d5d6984e26cbe09018475c2922d554fd724dd0f056ee/coverage-7.13.0-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:583221913fbc8f53b88c42e8dbb8fca1d0f2e597cb190ce45916662b8b9d9621", size = 252885, upload-time = "2025-12-08T13:12:56.401Z" },
-    { url = "https://files.pythonhosted.org/packages/0d/f8/972a5affea41de798691ab15d023d3530f9f56a72e12e243f35031846ff7/coverage-7.13.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5f5d9bd30756fff3e7216491a0d6d520c448d5124d3d8e8f56446d6412499e74", size = 253974, upload-time = "2025-12-08T13:12:57.718Z" },
-    { url = "https://files.pythonhosted.org/packages/8a/56/116513aee860b2c7968aa3506b0f59b22a959261d1dbf3aea7b4450a7520/coverage-7.13.0-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:a23e5a1f8b982d56fa64f8e442e037f6ce29322f1f9e6c2344cd9e9f4407ee57", size = 250538, upload-time = "2025-12-08T13:12:59.254Z" },
-    { url = "https://files.pythonhosted.org/packages/d6/75/074476d64248fbadf16dfafbf93fdcede389ec821f74ca858d7c87d2a98c/coverage-7.13.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:9b01c22bc74a7fb44066aaf765224c0d933ddf1f5047d6cdfe4795504a4493f8", size = 251912, upload-time = "2025-12-08T13:13:00.604Z" },
-    { url = "https://files.pythonhosted.org/packages/f2/d2/aa4f8acd1f7c06024705c12609d8698c51b27e4d635d717cd1934c9668e2/coverage-7.13.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:898cce66d0836973f48dda4e3514d863d70142bdf6dfab932b9b6a90ea5b222d", size = 250054, upload-time = "2025-12-08T13:13:01.892Z" },
-    { url = "https://files.pythonhosted.org/packages/19/98/8df9e1af6a493b03694a1e8070e024e7d2cdc77adedc225a35e616d505de/coverage-7.13.0-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:3ab483ea0e251b5790c2aac03acde31bff0c736bf8a86829b89382b407cd1c3b", size = 249619, upload-time = "2025-12-08T13:13:03.236Z" },
-    { url = "https://files.pythonhosted.org/packages/d8/71/f8679231f3353018ca66ef647fa6fe7b77e6bff7845be54ab84f86233363/coverage-7.13.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:1d84e91521c5e4cb6602fe11ece3e1de03b2760e14ae4fcf1a4b56fa3c801fcd", size = 251496, upload-time = "2025-12-08T13:13:04.511Z" },
-    { url = "https://files.pythonhosted.org/packages/04/86/9cb406388034eaf3c606c22094edbbb82eea1fa9d20c0e9efadff20d0733/coverage-7.13.0-cp312-cp312-win32.whl", hash = "sha256:193c3887285eec1dbdb3f2bd7fbc351d570ca9c02ca756c3afbc71b3c98af6ef", size = 220808, upload-time = "2025-12-08T13:13:06.422Z" },
-    { url = "https://files.pythonhosted.org/packages/1c/59/af483673df6455795daf5f447c2f81a3d2fcfc893a22b8ace983791f6f34/coverage-7.13.0-cp312-cp312-win_amd64.whl", hash = "sha256:4f3e223b2b2db5e0db0c2b97286aba0036ca000f06aca9b12112eaa9af3d92ae", size = 221616, upload-time = "2025-12-08T13:13:07.95Z" },
-    { url = "https://files.pythonhosted.org/packages/64/b0/959d582572b30a6830398c60dd419c1965ca4b5fb38ac6b7093a0d50ca8d/coverage-7.13.0-cp312-cp312-win_arm64.whl", hash = "sha256:086cede306d96202e15a4b77ace8472e39d9f4e5f9fd92dd4fecdfb2313b2080", size = 220261, upload-time = "2025-12-08T13:13:09.581Z" },
-    { url = "https://files.pythonhosted.org/packages/7c/cc/bce226595eb3bf7d13ccffe154c3c487a22222d87ff018525ab4dd2e9542/coverage-7.13.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:28ee1c96109974af104028a8ef57cec21447d42d0e937c0275329272e370ebcf", size = 218297, upload-time = "2025-12-08T13:13:10.977Z" },
-    { url = "https://files.pythonhosted.org/packages/3b/9f/73c4d34600aae03447dff3d7ad1d0ac649856bfb87d1ca7d681cfc913f9e/coverage-7.13.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:d1e97353dcc5587b85986cda4ff3ec98081d7e84dd95e8b2a6d59820f0545f8a", size = 218673, upload-time = "2025-12-08T13:13:12.562Z" },
-    { url = "https://files.pythonhosted.org/packages/63/ab/8fa097db361a1e8586535ae5073559e6229596b3489ec3ef2f5b38df8cb2/coverage-7.13.0-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:99acd4dfdfeb58e1937629eb1ab6ab0899b131f183ee5f23e0b5da5cba2fec74", size = 249652, upload-time = "2025-12-08T13:13:13.909Z" },
-    { url = "https://files.pythonhosted.org/packages/90/3a/9bfd4de2ff191feb37ef9465855ca56a6f2f30a3bca172e474130731ac3d/coverage-7.13.0-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:ff45e0cd8451e293b63ced93161e189780baf444119391b3e7d25315060368a6", size = 252251, upload-time = "2025-12-08T13:13:15.553Z" },
-    { url = "https://files.pythonhosted.org/packages/df/61/b5d8105f016e1b5874af0d7c67542da780ccd4a5f2244a433d3e20ceb1ad/coverage-7.13.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f4f72a85316d8e13234cafe0a9f81b40418ad7a082792fa4165bd7d45d96066b", size = 253492, upload-time = "2025-12-08T13:13:16.849Z" },
-    { url = "https://files.pythonhosted.org/packages/f3/b8/0fad449981803cc47a4694768b99823fb23632150743f9c83af329bb6090/coverage-7.13.0-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:11c21557d0e0a5a38632cbbaca5f008723b26a89d70db6315523df6df77d6232", size = 249850, upload-time = "2025-12-08T13:13:18.142Z" },
-    { url = "https://files.pythonhosted.org/packages/9a/e9/8d68337c3125014d918cf4327d5257553a710a2995a6a6de2ac77e5aa429/coverage-7.13.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:76541dc8d53715fb4f7a3a06b34b0dc6846e3c69bc6204c55653a85dd6220971", size = 251633, upload-time = "2025-12-08T13:13:19.56Z" },
-    { url = "https://files.pythonhosted.org/packages/55/14/d4112ab26b3a1bc4b3c1295d8452dcf399ed25be4cf649002fb3e64b2d93/coverage-7.13.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:6e9e451dee940a86789134b6b0ffbe31c454ade3b849bb8a9d2cca2541a8e91d", size = 249586, upload-time = "2025-12-08T13:13:20.883Z" },
-    { url = "https://files.pythonhosted.org/packages/2c/a9/22b0000186db663b0d82f86c2f1028099ae9ac202491685051e2a11a5218/coverage-7.13.0-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:5c67dace46f361125e6b9cace8fe0b729ed8479f47e70c89b838d319375c8137", size = 249412, upload-time = "2025-12-08T13:13:22.22Z" },
-    { url = "https://files.pythonhosted.org/packages/a1/2e/42d8e0d9e7527fba439acdc6ed24a2b97613b1dc85849b1dd935c2cffef0/coverage-7.13.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f59883c643cb19630500f57016f76cfdcd6845ca8c5b5ea1f6e17f74c8e5f511", size = 251191, upload-time = "2025-12-08T13:13:23.899Z" },
-    { url = "https://files.pythonhosted.org/packages/a4/af/8c7af92b1377fd8860536aadd58745119252aaaa71a5213e5a8e8007a9f5/coverage-7.13.0-cp313-cp313-win32.whl", hash = "sha256:58632b187be6f0be500f553be41e277712baa278147ecb7559983c6d9faf7ae1", size = 220829, upload-time = "2025-12-08T13:13:25.182Z" },
-    { url = "https://files.pythonhosted.org/packages/58/f9/725e8bf16f343d33cbe076c75dc8370262e194ff10072c0608b8e5cf33a3/coverage-7.13.0-cp313-cp313-win_amd64.whl", hash = "sha256:73419b89f812f498aca53f757dd834919b48ce4799f9d5cad33ca0ae442bdb1a", size = 221640, upload-time = "2025-12-08T13:13:26.836Z" },
-    { url = "https://files.pythonhosted.org/packages/8a/ff/e98311000aa6933cc79274e2b6b94a2fe0fe3434fca778eba82003675496/coverage-7.13.0-cp313-cp313-win_arm64.whl", hash = "sha256:eb76670874fdd6091eedcc856128ee48c41a9bbbb9c3f1c7c3cf169290e3ffd6", size = 220269, upload-time = "2025-12-08T13:13:28.116Z" },
-    { url = "https://files.pythonhosted.org/packages/cf/cf/bbaa2e1275b300343ea865f7d424cc0a2e2a1df6925a070b2b2d5d765330/coverage-7.13.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:6e63ccc6e0ad8986386461c3c4b737540f20426e7ec932f42e030320896c311a", size = 218990, upload-time = "2025-12-08T13:13:29.463Z" },
-    { url = "https://files.pythonhosted.org/packages/21/1d/82f0b3323b3d149d7672e7744c116e9c170f4957e0c42572f0366dbb4477/coverage-7.13.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:494f5459ffa1bd45e18558cd98710c36c0b8fbfa82a5eabcbe671d80ecffbfe8", size = 219340, upload-time = "2025-12-08T13:13:31.524Z" },
-    { url = "https://files.pythonhosted.org/packages/fb/e3/fe3fd4702a3832a255f4d43013eacb0ef5fc155a5960ea9269d8696db28b/coverage-7.13.0-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:06cac81bf10f74034e055e903f5f946e3e26fc51c09fc9f584e4a1605d977053", size = 260638, upload-time = "2025-12-08T13:13:32.965Z" },
-    { url = "https://files.pythonhosted.org/packages/ad/01/63186cb000307f2b4da463f72af9b85d380236965574c78e7e27680a2593/coverage-7.13.0-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:f2ffc92b46ed6e6760f1d47a71e56b5664781bc68986dbd1836b2b70c0ce2071", size = 262705, upload-time = "2025-12-08T13:13:34.378Z" },
-    { url = "https://files.pythonhosted.org/packages/7c/a1/c0dacef0cc865f2455d59eed3548573ce47ed603205ffd0735d1d78b5906/coverage-7.13.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0602f701057c6823e5db1b74530ce85f17c3c5be5c85fc042ac939cbd909426e", size = 265125, upload-time = "2025-12-08T13:13:35.73Z" },
-    { url = "https://files.pythonhosted.org/packages/ef/92/82b99223628b61300bd382c205795533bed021505eab6dd86e11fb5d7925/coverage-7.13.0-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:25dc33618d45456ccb1d37bce44bc78cf269909aa14c4db2e03d63146a8a1493", size = 259844, upload-time = "2025-12-08T13:13:37.69Z" },
-    { url = "https://files.pythonhosted.org/packages/cf/2c/89b0291ae4e6cd59ef042708e1c438e2290f8c31959a20055d8768349ee2/coverage-7.13.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:71936a8b3b977ddd0b694c28c6a34f4fff2e9dd201969a4ff5d5fc7742d614b0", size = 262700, upload-time = "2025-12-08T13:13:39.525Z" },
-    { url = "https://files.pythonhosted.org/packages/bf/f9/a5f992efae1996245e796bae34ceb942b05db275e4b34222a9a40b9fbd3b/coverage-7.13.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:936bc20503ce24770c71938d1369461f0c5320830800933bc3956e2a4ded930e", size = 260321, upload-time = "2025-12-08T13:13:41.172Z" },
-    { url = "https://files.pythonhosted.org/packages/4c/89/a29f5d98c64fedbe32e2ac3c227fbf78edc01cc7572eee17d61024d89889/coverage-7.13.0-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:af0a583efaacc52ae2521f8d7910aff65cdb093091d76291ac5820d5e947fc1c", size = 259222, upload-time = "2025-12-08T13:13:43.282Z" },
-    { url = "https://files.pythonhosted.org/packages/b3/c3/940fe447aae302a6701ee51e53af7e08b86ff6eed7631e5740c157ee22b9/coverage-7.13.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:f1c23e24a7000da892a312fb17e33c5f94f8b001de44b7cf8ba2e36fbd15859e", size = 261411, upload-time = "2025-12-08T13:13:44.72Z" },
-    { url = "https://files.pythonhosted.org/packages/eb/31/12a4aec689cb942a89129587860ed4d0fd522d5fda81237147fde554b8ae/coverage-7.13.0-cp313-cp313t-win32.whl", hash = "sha256:5f8a0297355e652001015e93be345ee54393e45dc3050af4a0475c5a2b767d46", size = 221505, upload-time = "2025-12-08T13:13:46.332Z" },
-    { url = "https://files.pythonhosted.org/packages/65/8c/3b5fe3259d863572d2b0827642c50c3855d26b3aefe80bdc9eba1f0af3b0/coverage-7.13.0-cp313-cp313t-win_amd64.whl", hash = "sha256:6abb3a4c52f05e08460bd9acf04fec027f8718ecaa0d09c40ffbc3fbd70ecc39", size = 222569, upload-time = "2025-12-08T13:13:47.79Z" },
-    { url = "https://files.pythonhosted.org/packages/b0/39/f71fa8316a96ac72fc3908839df651e8eccee650001a17f2c78cdb355624/coverage-7.13.0-cp313-cp313t-win_arm64.whl", hash = "sha256:3ad968d1e3aa6ce5be295ab5fe3ae1bf5bb4769d0f98a80a0252d543a2ef2e9e", size = 220841, upload-time = "2025-12-08T13:13:49.243Z" },
-    { url = "https://files.pythonhosted.org/packages/f8/4b/9b54bedda55421449811dcd5263a2798a63f48896c24dfb92b0f1b0845bd/coverage-7.13.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:453b7ec753cf5e4356e14fe858064e5520c460d3bbbcb9c35e55c0d21155c256", size = 218343, upload-time = "2025-12-08T13:13:50.811Z" },
-    { url = "https://files.pythonhosted.org/packages/59/df/c3a1f34d4bba2e592c8979f924da4d3d4598b0df2392fbddb7761258e3dc/coverage-7.13.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:af827b7cbb303e1befa6c4f94fd2bf72f108089cfa0f8abab8f4ca553cf5ca5a", size = 218672, upload-time = "2025-12-08T13:13:52.284Z" },
-    { url = "https://files.pythonhosted.org/packages/07/62/eec0659e47857698645ff4e6ad02e30186eb8afd65214fd43f02a76537cb/coverage-7.13.0-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:9987a9e4f8197a1000280f7cc089e3ea2c8b3c0a64d750537809879a7b4ceaf9", size = 249715, upload-time = "2025-12-08T13:13:53.791Z" },
-    { url = "https://files.pythonhosted.org/packages/23/2d/3c7ff8b2e0e634c1f58d095f071f52ed3c23ff25be524b0ccae8b71f99f8/coverage-7.13.0-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:3188936845cd0cb114fa6a51842a304cdbac2958145d03be2377ec41eb285d19", size = 252225, upload-time = "2025-12-08T13:13:55.274Z" },
-    { url = "https://files.pythonhosted.org/packages/aa/ac/fb03b469d20e9c9a81093575003f959cf91a4a517b783aab090e4538764b/coverage-7.13.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a2bdb3babb74079f021696cb46b8bb5f5661165c385d3a238712b031a12355be", size = 253559, upload-time = "2025-12-08T13:13:57.161Z" },
-    { url = "https://files.pythonhosted.org/packages/29/62/14afa9e792383c66cc0a3b872a06ded6e4ed1079c7d35de274f11d27064e/coverage-7.13.0-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:7464663eaca6adba4175f6c19354feea61ebbdd735563a03d1e472c7072d27bb", size = 249724, upload-time = "2025-12-08T13:13:58.692Z" },
-    { url = "https://files.pythonhosted.org/packages/31/b7/333f3dab2939070613696ab3ee91738950f0467778c6e5a5052e840646b7/coverage-7.13.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:8069e831f205d2ff1f3d355e82f511eb7c5522d7d413f5db5756b772ec8697f8", size = 251582, upload-time = "2025-12-08T13:14:00.642Z" },
-    { url = "https://files.pythonhosted.org/packages/81/cb/69162bda9381f39b2287265d7e29ee770f7c27c19f470164350a38318764/coverage-7.13.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:6fb2d5d272341565f08e962cce14cdf843a08ac43bd621783527adb06b089c4b", size = 249538, upload-time = "2025-12-08T13:14:02.556Z" },
-    { url = "https://files.pythonhosted.org/packages/e0/76/350387b56a30f4970abe32b90b2a434f87d29f8b7d4ae40d2e8a85aacfb3/coverage-7.13.0-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:5e70f92ef89bac1ac8a99b3324923b4749f008fdbd7aa9cb35e01d7a284a04f9", size = 249349, upload-time = "2025-12-08T13:14:04.015Z" },
-    { url = "https://files.pythonhosted.org/packages/86/0d/7f6c42b8d59f4c7e43ea3059f573c0dcfed98ba46eb43c68c69e52ae095c/coverage-7.13.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:4b5de7d4583e60d5fd246dd57fcd3a8aa23c6e118a8c72b38adf666ba8e7e927", size = 251011, upload-time = "2025-12-08T13:14:05.505Z" },
-    { url = "https://files.pythonhosted.org/packages/d7/f1/4bb2dff379721bb0b5c649d5c5eaf438462cad824acf32eb1b7ca0c7078e/coverage-7.13.0-cp314-cp314-win32.whl", hash = "sha256:a6c6e16b663be828a8f0b6c5027d36471d4a9f90d28444aa4ced4d48d7d6ae8f", size = 221091, upload-time = "2025-12-08T13:14:07.127Z" },
-    { url = "https://files.pythonhosted.org/packages/ba/44/c239da52f373ce379c194b0ee3bcc121020e397242b85f99e0afc8615066/coverage-7.13.0-cp314-cp314-win_amd64.whl", hash = "sha256:0900872f2fdb3ee5646b557918d02279dc3af3dfb39029ac4e945458b13f73bc", size = 221904, upload-time = "2025-12-08T13:14:08.542Z" },
-    { url = "https://files.pythonhosted.org/packages/89/1f/b9f04016d2a29c2e4a0307baefefad1a4ec5724946a2b3e482690486cade/coverage-7.13.0-cp314-cp314-win_arm64.whl", hash = "sha256:3a10260e6a152e5f03f26db4a407c4c62d3830b9af9b7c0450b183615f05d43b", size = 220480, upload-time = "2025-12-08T13:14:10.958Z" },
-    { url = "https://files.pythonhosted.org/packages/16/d4/364a1439766c8e8647860584171c36010ca3226e6e45b1753b1b249c5161/coverage-7.13.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:9097818b6cc1cfb5f174e3263eba4a62a17683bcfe5c4b5d07f4c97fa51fbf28", size = 219074, upload-time = "2025-12-08T13:14:13.345Z" },
-    { url = "https://files.pythonhosted.org/packages/ce/f4/71ba8be63351e099911051b2089662c03d5671437a0ec2171823c8e03bec/coverage-7.13.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:0018f73dfb4301a89292c73be6ba5f58722ff79f51593352759c1790ded1cabe", size = 219342, upload-time = "2025-12-08T13:14:15.02Z" },
-    { url = "https://files.pythonhosted.org/packages/5e/25/127d8ed03d7711a387d96f132589057213e3aef7475afdaa303412463f22/coverage-7.13.0-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:166ad2a22ee770f5656e1257703139d3533b4a0b6909af67c6b4a3adc1c98657", size = 260713, upload-time = "2025-12-08T13:14:16.907Z" },
-    { url = "https://files.pythonhosted.org/packages/fd/db/559fbb6def07d25b2243663b46ba9eb5a3c6586c0c6f4e62980a68f0ee1c/coverage-7.13.0-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:f6aaef16d65d1787280943f1c8718dc32e9cf141014e4634d64446702d26e0ff", size = 262825, upload-time = "2025-12-08T13:14:18.68Z" },
-    { url = "https://files.pythonhosted.org/packages/37/99/6ee5bf7eff884766edb43bd8736b5e1c5144d0fe47498c3779326fe75a35/coverage-7.13.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e999e2dcc094002d6e2c7bbc1fb85b58ba4f465a760a8014d97619330cdbbbf3", size = 265233, upload-time = "2025-12-08T13:14:20.55Z" },
-    { url = "https://files.pythonhosted.org/packages/d8/90/92f18fe0356ea69e1f98f688ed80cec39f44e9f09a1f26a1bbf017cc67f2/coverage-7.13.0-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:00c3d22cf6fb1cf3bf662aaaa4e563be8243a5ed2630339069799835a9cc7f9b", size = 259779, upload-time = "2025-12-08T13:14:22.367Z" },
-    { url = "https://files.pythonhosted.org/packages/90/5d/b312a8b45b37a42ea7d27d7d3ff98ade3a6c892dd48d1d503e773503373f/coverage-7.13.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:22ccfe8d9bb0d6134892cbe1262493a8c70d736b9df930f3f3afae0fe3ac924d", size = 262700, upload-time = "2025-12-08T13:14:24.309Z" },
-    { url = "https://files.pythonhosted.org/packages/63/f8/b1d0de5c39351eb71c366f872376d09386640840a2e09b0d03973d791e20/coverage-7.13.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:9372dff5ea15930fea0445eaf37bbbafbc771a49e70c0aeed8b4e2c2614cc00e", size = 260302, upload-time = "2025-12-08T13:14:26.068Z" },
-    { url = "https://files.pythonhosted.org/packages/aa/7c/d42f4435bc40c55558b3109a39e2d456cddcec37434f62a1f1230991667a/coverage-7.13.0-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:69ac2c492918c2461bc6ace42d0479638e60719f2a4ef3f0815fa2df88e9f940", size = 259136, upload-time = "2025-12-08T13:14:27.604Z" },
-    { url = "https://files.pythonhosted.org/packages/b8/d3/23413241dc04d47cfe19b9a65b32a2edd67ecd0b817400c2843ebc58c847/coverage-7.13.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:739c6c051a7540608d097b8e13c76cfa85263ced467168dc6b477bae3df7d0e2", size = 261467, upload-time = "2025-12-08T13:14:29.09Z" },
-    { url = "https://files.pythonhosted.org/packages/13/e6/6e063174500eee216b96272c0d1847bf215926786f85c2bd024cf4d02d2f/coverage-7.13.0-cp314-cp314t-win32.whl", hash = "sha256:fe81055d8c6c9de76d60c94ddea73c290b416e061d40d542b24a5871bad498b7", size = 221875, upload-time = "2025-12-08T13:14:31.106Z" },
-    { url = "https://files.pythonhosted.org/packages/3b/46/f4fb293e4cbe3620e3ac2a3e8fd566ed33affb5861a9b20e3dd6c1896cbc/coverage-7.13.0-cp314-cp314t-win_amd64.whl", hash = "sha256:445badb539005283825959ac9fa4a28f712c214b65af3a2c464f1adc90f5fcbc", size = 222982, upload-time = "2025-12-08T13:14:33.1Z" },
-    { url = "https://files.pythonhosted.org/packages/68/62/5b3b9018215ed9733fbd1ae3b2ed75c5de62c3b55377a52cae732e1b7805/coverage-7.13.0-cp314-cp314t-win_arm64.whl", hash = "sha256:de7f6748b890708578fc4b7bb967d810aeb6fcc9bff4bb77dbca77dab2f9df6a", size = 221016, upload-time = "2025-12-08T13:14:34.601Z" },
-    { url = "https://files.pythonhosted.org/packages/8d/4c/1968f32fb9a2604645827e11ff84a31e59d532e01995f904723b4f5328b3/coverage-7.13.0-py3-none-any.whl", hash = "sha256:850d2998f380b1e266459ca5b47bc9e7daf9af1d070f66317972f382d46f1904", size = 210068, upload-time = "2025-12-08T13:14:36.236Z" },
+version = "7.13.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/23/f9/e92df5e07f3fc8d4c7f9a0f146ef75446bf870351cd37b788cf5897f8079/coverage-7.13.1.tar.gz", hash = "sha256:b7593fe7eb5feaa3fbb461ac79aac9f9fc0387a5ca8080b0c6fe2ca27b091afd", size = 825862, upload-time = "2025-12-28T15:42:56.969Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/2d/9a/3742e58fd04b233df95c012ee9f3dfe04708a5e1d32613bd2d47d4e1be0d/coverage-7.13.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e1fa280b3ad78eea5be86f94f461c04943d942697e0dac889fa18fff8f5f9147", size = 218633, upload-time = "2025-12-28T15:40:10.165Z" },
+    { url = "https://files.pythonhosted.org/packages/7e/45/7e6bdc94d89cd7c8017ce735cf50478ddfe765d4fbf0c24d71d30ea33d7a/coverage-7.13.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c3d8c679607220979434f494b139dfb00131ebf70bb406553d69c1ff01a5c33d", size = 219147, upload-time = "2025-12-28T15:40:12.069Z" },
+    { url = "https://files.pythonhosted.org/packages/f7/38/0d6a258625fd7f10773fe94097dc16937a5f0e3e0cdf3adef67d3ac6baef/coverage-7.13.1-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:339dc63b3eba969067b00f41f15ad161bf2946613156fb131266d8debc8e44d0", size = 245894, upload-time = "2025-12-28T15:40:13.556Z" },
+    { url = "https://files.pythonhosted.org/packages/27/58/409d15ea487986994cbd4d06376e9860e9b157cfbfd402b1236770ab8dd2/coverage-7.13.1-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:db622b999ffe49cb891f2fff3b340cdc2f9797d01a0a202a0973ba2562501d90", size = 247721, upload-time = "2025-12-28T15:40:15.37Z" },
+    { url = "https://files.pythonhosted.org/packages/da/bf/6e8056a83fd7a96c93341f1ffe10df636dd89f26d5e7b9ca511ce3bcf0df/coverage-7.13.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d1443ba9acbb593fa7c1c29e011d7c9761545fe35e7652e85ce7f51a16f7e08d", size = 249585, upload-time = "2025-12-28T15:40:17.226Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/15/e1daff723f9f5959acb63cbe35b11203a9df77ee4b95b45fffd38b318390/coverage-7.13.1-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:c832ec92c4499ac463186af72f9ed4d8daec15499b16f0a879b0d1c8e5cf4a3b", size = 246597, upload-time = "2025-12-28T15:40:19.028Z" },
+    { url = "https://files.pythonhosted.org/packages/74/a6/1efd31c5433743a6ddbc9d37ac30c196bb07c7eab3d74fbb99b924c93174/coverage-7.13.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:562ec27dfa3f311e0db1ba243ec6e5f6ab96b1edfcfc6cf86f28038bc4961ce6", size = 247626, upload-time = "2025-12-28T15:40:20.846Z" },
+    { url = "https://files.pythonhosted.org/packages/6d/9f/1609267dd3e749f57fdd66ca6752567d1c13b58a20a809dc409b263d0b5f/coverage-7.13.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:4de84e71173d4dada2897e5a0e1b7877e5eefbfe0d6a44edee6ce31d9b8ec09e", size = 245629, upload-time = "2025-12-28T15:40:22.397Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/f6/6815a220d5ec2466383d7cc36131b9fa6ecbe95c50ec52a631ba733f306a/coverage-7.13.1-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:a5a68357f686f8c4d527a2dc04f52e669c2fc1cbde38f6f7eb6a0e58cbd17cae", size = 245901, upload-time = "2025-12-28T15:40:23.836Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/58/40576554cd12e0872faf6d2c0eb3bc85f71d78427946ddd19ad65201e2c0/coverage-7.13.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:77cc258aeb29a3417062758975521eae60af6f79e930d6993555eeac6a8eac29", size = 246505, upload-time = "2025-12-28T15:40:25.421Z" },
+    { url = "https://files.pythonhosted.org/packages/3b/77/9233a90253fba576b0eee81707b5781d0e21d97478e5377b226c5b096c0f/coverage-7.13.1-cp310-cp310-win32.whl", hash = "sha256:bb4f8c3c9a9f34423dba193f241f617b08ffc63e27f67159f60ae6baf2dcfe0f", size = 221257, upload-time = "2025-12-28T15:40:27.217Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/43/e842ff30c1a0a623ec80db89befb84a3a7aad7bfe44a6ea77d5a3e61fedd/coverage-7.13.1-cp310-cp310-win_amd64.whl", hash = "sha256:c8e2706ceb622bc63bac98ebb10ef5da80ed70fbd8a7999a5076de3afaef0fb1", size = 222191, upload-time = "2025-12-28T15:40:28.916Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/9b/77baf488516e9ced25fc215a6f75d803493fc3f6a1a1227ac35697910c2a/coverage-7.13.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1a55d509a1dc5a5b708b5dad3b5334e07a16ad4c2185e27b40e4dba796ab7f88", size = 218755, upload-time = "2025-12-28T15:40:30.812Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/cd/7ab01154e6eb79ee2fab76bf4d89e94c6648116557307ee4ebbb85e5c1bf/coverage-7.13.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4d010d080c4888371033baab27e47c9df7d6fb28d0b7b7adf85a4a49be9298b3", size = 219257, upload-time = "2025-12-28T15:40:32.333Z" },
+    { url = "https://files.pythonhosted.org/packages/01/d5/b11ef7863ffbbdb509da0023fad1e9eda1c0eaea61a6d2ea5b17d4ac706e/coverage-7.13.1-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:d938b4a840fb1523b9dfbbb454f652967f18e197569c32266d4d13f37244c3d9", size = 249657, upload-time = "2025-12-28T15:40:34.1Z" },
+    { url = "https://files.pythonhosted.org/packages/f7/7c/347280982982383621d29b8c544cf497ae07ac41e44b1ca4903024131f55/coverage-7.13.1-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:bf100a3288f9bb7f919b87eb84f87101e197535b9bd0e2c2b5b3179633324fee", size = 251581, upload-time = "2025-12-28T15:40:36.131Z" },
+    { url = "https://files.pythonhosted.org/packages/82/f6/ebcfed11036ade4c0d75fa4453a6282bdd225bc073862766eec184a4c643/coverage-7.13.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ef6688db9bf91ba111ae734ba6ef1a063304a881749726e0d3575f5c10a9facf", size = 253691, upload-time = "2025-12-28T15:40:37.626Z" },
+    { url = "https://files.pythonhosted.org/packages/02/92/af8f5582787f5d1a8b130b2dcba785fa5e9a7a8e121a0bb2220a6fdbdb8a/coverage-7.13.1-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:0b609fc9cdbd1f02e51f67f51e5aee60a841ef58a68d00d5ee2c0faf357481a3", size = 249799, upload-time = "2025-12-28T15:40:39.47Z" },
+    { url = "https://files.pythonhosted.org/packages/24/aa/0e39a2a3b16eebf7f193863323edbff38b6daba711abaaf807d4290cf61a/coverage-7.13.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c43257717611ff5e9a1d79dce8e47566235ebda63328718d9b65dd640bc832ef", size = 251389, upload-time = "2025-12-28T15:40:40.954Z" },
+    { url = "https://files.pythonhosted.org/packages/73/46/7f0c13111154dc5b978900c0ccee2e2ca239b910890e674a77f1363d483e/coverage-7.13.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:e09fbecc007f7b6afdfb3b07ce5bd9f8494b6856dd4f577d26c66c391b829851", size = 249450, upload-time = "2025-12-28T15:40:42.489Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/ca/e80da6769e8b669ec3695598c58eef7ad98b0e26e66333996aee6316db23/coverage-7.13.1-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:a03a4f3a19a189919c7055098790285cc5c5b0b3976f8d227aea39dbf9f8bfdb", size = 249170, upload-time = "2025-12-28T15:40:44.279Z" },
+    { url = "https://files.pythonhosted.org/packages/af/18/9e29baabdec1a8644157f572541079b4658199cfd372a578f84228e860de/coverage-7.13.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:3820778ea1387c2b6a818caec01c63adc5b3750211af6447e8dcfb9b6f08dbba", size = 250081, upload-time = "2025-12-28T15:40:45.748Z" },
+    { url = "https://files.pythonhosted.org/packages/00/f8/c3021625a71c3b2f516464d322e41636aea381018319050a8114105872ee/coverage-7.13.1-cp311-cp311-win32.whl", hash = "sha256:ff10896fa55167371960c5908150b434b71c876dfab97b69478f22c8b445ea19", size = 221281, upload-time = "2025-12-28T15:40:47.232Z" },
+    { url = "https://files.pythonhosted.org/packages/27/56/c216625f453df6e0559ed666d246fcbaaa93f3aa99eaa5080cea1229aa3d/coverage-7.13.1-cp311-cp311-win_amd64.whl", hash = "sha256:a998cc0aeeea4c6d5622a3754da5a493055d2d95186bad877b0a34ea6e6dbe0a", size = 222215, upload-time = "2025-12-28T15:40:49.19Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/9a/be342e76f6e531cae6406dc46af0d350586f24d9b67fdfa6daee02df71af/coverage-7.13.1-cp311-cp311-win_arm64.whl", hash = "sha256:fea07c1a39a22614acb762e3fbbb4011f65eedafcb2948feeef641ac78b4ee5c", size = 220886, upload-time = "2025-12-28T15:40:51.067Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/8a/87af46cccdfa78f53db747b09f5f9a21d5fc38d796834adac09b30a8ce74/coverage-7.13.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6f34591000f06e62085b1865c9bc5f7858df748834662a51edadfd2c3bfe0dd3", size = 218927, upload-time = "2025-12-28T15:40:52.814Z" },
+    { url = "https://files.pythonhosted.org/packages/82/a8/6e22fdc67242a4a5a153f9438d05944553121c8f4ba70cb072af4c41362e/coverage-7.13.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b67e47c5595b9224599016e333f5ec25392597a89d5744658f837d204e16c63e", size = 219288, upload-time = "2025-12-28T15:40:54.262Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/0a/853a76e03b0f7c4375e2ca025df45c918beb367f3e20a0a8e91967f6e96c/coverage-7.13.1-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:3e7b8bd70c48ffb28461ebe092c2345536fb18bbbf19d287c8913699735f505c", size = 250786, upload-time = "2025-12-28T15:40:56.059Z" },
+    { url = "https://files.pythonhosted.org/packages/ea/b4/694159c15c52b9f7ec7adf49d50e5f8ee71d3e9ef38adb4445d13dd56c20/coverage-7.13.1-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:c223d078112e90dc0e5c4e35b98b9584164bea9fbbd221c0b21c5241f6d51b62", size = 253543, upload-time = "2025-12-28T15:40:57.585Z" },
+    { url = "https://files.pythonhosted.org/packages/96/b2/7f1f0437a5c855f87e17cf5d0dc35920b6440ff2b58b1ba9788c059c26c8/coverage-7.13.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:794f7c05af0763b1bbd1b9e6eff0e52ad068be3b12cd96c87de037b01390c968", size = 254635, upload-time = "2025-12-28T15:40:59.443Z" },
+    { url = "https://files.pythonhosted.org/packages/e9/d1/73c3fdb8d7d3bddd9473c9c6a2e0682f09fc3dfbcb9c3f36412a7368bcab/coverage-7.13.1-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:0642eae483cc8c2902e4af7298bf886d605e80f26382124cddc3967c2a3df09e", size = 251202, upload-time = "2025-12-28T15:41:01.328Z" },
+    { url = "https://files.pythonhosted.org/packages/66/3c/f0edf75dcc152f145d5598329e864bbbe04ab78660fe3e8e395f9fff010f/coverage-7.13.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:9f5e772ed5fef25b3de9f2008fe67b92d46831bd2bc5bdc5dd6bfd06b83b316f", size = 252566, upload-time = "2025-12-28T15:41:03.319Z" },
+    { url = "https://files.pythonhosted.org/packages/17/b3/e64206d3c5f7dcbceafd14941345a754d3dbc78a823a6ed526e23b9cdaab/coverage-7.13.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:45980ea19277dc0a579e432aef6a504fe098ef3a9032ead15e446eb0f1191aee", size = 250711, upload-time = "2025-12-28T15:41:06.411Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/ad/28a3eb970a8ef5b479ee7f0c484a19c34e277479a5b70269dc652b730733/coverage-7.13.1-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:e4f18eca6028ffa62adbd185a8f1e1dd242f2e68164dba5c2b74a5204850b4cf", size = 250278, upload-time = "2025-12-28T15:41:08.285Z" },
+    { url = "https://files.pythonhosted.org/packages/54/e3/c8f0f1a93133e3e1291ca76cbb63565bd4b5c5df63b141f539d747fff348/coverage-7.13.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:f8dca5590fec7a89ed6826fce625595279e586ead52e9e958d3237821fbc750c", size = 252154, upload-time = "2025-12-28T15:41:09.969Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/bf/9939c5d6859c380e405b19e736321f1c7d402728792f4c752ad1adcce005/coverage-7.13.1-cp312-cp312-win32.whl", hash = "sha256:ff86d4e85188bba72cfb876df3e11fa243439882c55957184af44a35bd5880b7", size = 221487, upload-time = "2025-12-28T15:41:11.468Z" },
+    { url = "https://files.pythonhosted.org/packages/fa/dc/7282856a407c621c2aad74021680a01b23010bb8ebf427cf5eacda2e876f/coverage-7.13.1-cp312-cp312-win_amd64.whl", hash = "sha256:16cc1da46c04fb0fb128b4dc430b78fa2aba8a6c0c9f8eb391fd5103409a6ac6", size = 222299, upload-time = "2025-12-28T15:41:13.386Z" },
+    { url = "https://files.pythonhosted.org/packages/10/79/176a11203412c350b3e9578620013af35bcdb79b651eb976f4a4b32044fa/coverage-7.13.1-cp312-cp312-win_arm64.whl", hash = "sha256:8d9bc218650022a768f3775dd7fdac1886437325d8d295d923ebcfef4892ad5c", size = 220941, upload-time = "2025-12-28T15:41:14.975Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/a4/e98e689347a1ff1a7f67932ab535cef82eb5e78f32a9e4132e114bbb3a0a/coverage-7.13.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:cb237bfd0ef4d5eb6a19e29f9e528ac67ac3be932ea6b44fb6cc09b9f3ecff78", size = 218951, upload-time = "2025-12-28T15:41:16.653Z" },
+    { url = "https://files.pythonhosted.org/packages/32/33/7cbfe2bdc6e2f03d6b240d23dc45fdaf3fd270aaf2d640be77b7f16989ab/coverage-7.13.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1dcb645d7e34dcbcc96cd7c132b1fc55c39263ca62eb961c064eb3928997363b", size = 219325, upload-time = "2025-12-28T15:41:18.609Z" },
+    { url = "https://files.pythonhosted.org/packages/59/f6/efdabdb4929487baeb7cb2a9f7dac457d9356f6ad1b255be283d58b16316/coverage-7.13.1-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:3d42df8201e00384736f0df9be2ced39324c3907607d17d50d50116c989d84cd", size = 250309, upload-time = "2025-12-28T15:41:20.629Z" },
+    { url = "https://files.pythonhosted.org/packages/12/da/91a52516e9d5aea87d32d1523f9cdcf7a35a3b298e6be05d6509ba3cfab2/coverage-7.13.1-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:fa3edde1aa8807de1d05934982416cb3ec46d1d4d91e280bcce7cca01c507992", size = 252907, upload-time = "2025-12-28T15:41:22.257Z" },
+    { url = "https://files.pythonhosted.org/packages/75/38/f1ea837e3dc1231e086db1638947e00d264e7e8c41aa8ecacf6e1e0c05f4/coverage-7.13.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9edd0e01a343766add6817bc448408858ba6b489039eaaa2018474e4001651a4", size = 254148, upload-time = "2025-12-28T15:41:23.87Z" },
+    { url = "https://files.pythonhosted.org/packages/7f/43/f4f16b881aaa34954ba446318dea6b9ed5405dd725dd8daac2358eda869a/coverage-7.13.1-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:985b7836931d033570b94c94713c6dba5f9d3ff26045f72c3e5dbc5fe3361e5a", size = 250515, upload-time = "2025-12-28T15:41:25.437Z" },
+    { url = "https://files.pythonhosted.org/packages/84/34/8cba7f00078bd468ea914134e0144263194ce849ec3baad187ffb6203d1c/coverage-7.13.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ffed1e4980889765c84a5d1a566159e363b71d6b6fbaf0bebc9d3c30bc016766", size = 252292, upload-time = "2025-12-28T15:41:28.459Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/a4/cffac66c7652d84ee4ac52d3ccb94c015687d3b513f9db04bfcac2ac800d/coverage-7.13.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:8842af7f175078456b8b17f1b73a0d16a65dcbdc653ecefeb00a56b3c8c298c4", size = 250242, upload-time = "2025-12-28T15:41:30.02Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/78/9a64d462263dde416f3c0067efade7b52b52796f489b1037a95b0dc389c9/coverage-7.13.1-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:ccd7a6fca48ca9c131d9b0a2972a581e28b13416fc313fb98b6d24a03ce9a398", size = 250068, upload-time = "2025-12-28T15:41:32.007Z" },
+    { url = "https://files.pythonhosted.org/packages/69/c8/a8994f5fece06db7c4a97c8fc1973684e178599b42e66280dded0524ef00/coverage-7.13.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:0403f647055de2609be776965108447deb8e384fe4a553c119e3ff6bfbab4784", size = 251846, upload-time = "2025-12-28T15:41:33.946Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/f7/91fa73c4b80305c86598a2d4e54ba22df6bf7d0d97500944af7ef155d9f7/coverage-7.13.1-cp313-cp313-win32.whl", hash = "sha256:549d195116a1ba1e1ae2f5ca143f9777800f6636eab917d4f02b5310d6d73461", size = 221512, upload-time = "2025-12-28T15:41:35.519Z" },
+    { url = "https://files.pythonhosted.org/packages/45/0b/0768b4231d5a044da8f75e097a8714ae1041246bb765d6b5563bab456735/coverage-7.13.1-cp313-cp313-win_amd64.whl", hash = "sha256:5899d28b5276f536fcf840b18b61a9fce23cc3aec1d114c44c07fe94ebeaa500", size = 222321, upload-time = "2025-12-28T15:41:37.371Z" },
+    { url = "https://files.pythonhosted.org/packages/9b/b8/bdcb7253b7e85157282450262008f1366aa04663f3e3e4c30436f596c3e2/coverage-7.13.1-cp313-cp313-win_arm64.whl", hash = "sha256:868a2fae76dfb06e87291bcbd4dcbcc778a8500510b618d50496e520bd94d9b9", size = 220949, upload-time = "2025-12-28T15:41:39.553Z" },
+    { url = "https://files.pythonhosted.org/packages/70/52/f2be52cc445ff75ea8397948c96c1b4ee14f7f9086ea62fc929c5ae7b717/coverage-7.13.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:67170979de0dacac3f3097d02b0ad188d8edcea44ccc44aaa0550af49150c7dc", size = 219643, upload-time = "2025-12-28T15:41:41.567Z" },
+    { url = "https://files.pythonhosted.org/packages/47/79/c85e378eaa239e2edec0c5523f71542c7793fe3340954eafb0bc3904d32d/coverage-7.13.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:f80e2bb21bfab56ed7405c2d79d34b5dc0bc96c2c1d2a067b643a09fb756c43a", size = 219997, upload-time = "2025-12-28T15:41:43.418Z" },
+    { url = "https://files.pythonhosted.org/packages/fe/9b/b1ade8bfb653c0bbce2d6d6e90cc6c254cbb99b7248531cc76253cb4da6d/coverage-7.13.1-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:f83351e0f7dcdb14d7326c3d8d8c4e915fa685cbfdc6281f9470d97a04e9dfe4", size = 261296, upload-time = "2025-12-28T15:41:45.207Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/af/ebf91e3e1a2473d523e87e87fd8581e0aa08741b96265730e2d79ce78d8d/coverage-7.13.1-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:bb3f6562e89bad0110afbe64e485aac2462efdce6232cdec7862a095dc3412f6", size = 263363, upload-time = "2025-12-28T15:41:47.163Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/8b/fb2423526d446596624ac7fde12ea4262e66f86f5120114c3cfd0bb2befa/coverage-7.13.1-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:77545b5dcda13b70f872c3b5974ac64c21d05e65b1590b441c8560115dc3a0d1", size = 265783, upload-time = "2025-12-28T15:41:49.03Z" },
+    { url = "https://files.pythonhosted.org/packages/9b/26/ef2adb1e22674913b89f0fe7490ecadcef4a71fa96f5ced90c60ec358789/coverage-7.13.1-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:a4d240d260a1aed814790bbe1f10a5ff31ce6c21bc78f0da4a1e8268d6c80dbd", size = 260508, upload-time = "2025-12-28T15:41:51.035Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/7d/f0f59b3404caf662e7b5346247883887687c074ce67ba453ea08c612b1d5/coverage-7.13.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:d2287ac9360dec3837bfdad969963a5d073a09a85d898bd86bea82aa8876ef3c", size = 263357, upload-time = "2025-12-28T15:41:52.631Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/b1/29896492b0b1a047604d35d6fa804f12818fa30cdad660763a5f3159e158/coverage-7.13.1-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:0d2c11f3ea4db66b5cbded23b20185c35066892c67d80ec4be4bab257b9ad1e0", size = 260978, upload-time = "2025-12-28T15:41:54.589Z" },
+    { url = "https://files.pythonhosted.org/packages/48/f2/971de1238a62e6f0a4128d37adadc8bb882ee96afbe03ff1570291754629/coverage-7.13.1-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:3fc6a169517ca0d7ca6846c3c5392ef2b9e38896f61d615cb75b9e7134d4ee1e", size = 259877, upload-time = "2025-12-28T15:41:56.263Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/fc/0474efcbb590ff8628830e9aaec5f1831594874360e3251f1fdec31d07a3/coverage-7.13.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:d10a2ed46386e850bb3de503a54f9fe8192e5917fcbb143bfef653a9355e9a53", size = 262069, upload-time = "2025-12-28T15:41:58.093Z" },
+    { url = "https://files.pythonhosted.org/packages/88/4f/3c159b7953db37a7b44c0eab8a95c37d1aa4257c47b4602c04022d5cb975/coverage-7.13.1-cp313-cp313t-win32.whl", hash = "sha256:75a6f4aa904301dab8022397a22c0039edc1f51e90b83dbd4464b8a38dc87842", size = 222184, upload-time = "2025-12-28T15:41:59.763Z" },
+    { url = "https://files.pythonhosted.org/packages/58/a5/6b57d28f81417f9335774f20679d9d13b9a8fb90cd6160957aa3b54a2379/coverage-7.13.1-cp313-cp313t-win_amd64.whl", hash = "sha256:309ef5706e95e62578cda256b97f5e097916a2c26247c287bbe74794e7150df2", size = 223250, upload-time = "2025-12-28T15:42:01.52Z" },
+    { url = "https://files.pythonhosted.org/packages/81/7c/160796f3b035acfbb58be80e02e484548595aa67e16a6345e7910ace0a38/coverage-7.13.1-cp313-cp313t-win_arm64.whl", hash = "sha256:92f980729e79b5d16d221038dbf2e8f9a9136afa072f9d5d6ed4cb984b126a09", size = 221521, upload-time = "2025-12-28T15:42:03.275Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/8e/ba0e597560c6563fc0adb902fda6526df5d4aa73bb10adf0574d03bd2206/coverage-7.13.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:97ab3647280d458a1f9adb85244e81587505a43c0c7cff851f5116cd2814b894", size = 218996, upload-time = "2025-12-28T15:42:04.978Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/8e/764c6e116f4221dc7aa26c4061181ff92edb9c799adae6433d18eeba7a14/coverage-7.13.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:8f572d989142e0908e6acf57ad1b9b86989ff057c006d13b76c146ec6a20216a", size = 219326, upload-time = "2025-12-28T15:42:06.691Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/a6/6130dc6d8da28cdcbb0f2bf8865aeca9b157622f7c0031e48c6cf9a0e591/coverage-7.13.1-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:d72140ccf8a147e94274024ff6fd8fb7811354cf7ef88b1f0a988ebaa5bc774f", size = 250374, upload-time = "2025-12-28T15:42:08.786Z" },
+    { url = "https://files.pythonhosted.org/packages/82/2b/783ded568f7cd6b677762f780ad338bf4b4750205860c17c25f7c708995e/coverage-7.13.1-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:d3c9f051b028810f5a87c88e5d6e9af3c0ff32ef62763bf15d29f740453ca909", size = 252882, upload-time = "2025-12-28T15:42:10.515Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/b2/9808766d082e6a4d59eb0cc881a57fc1600eb2c5882813eefff8254f71b5/coverage-7.13.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f398ba4df52d30b1763f62eed9de5620dcde96e6f491f4c62686736b155aa6e4", size = 254218, upload-time = "2025-12-28T15:42:12.208Z" },
+    { url = "https://files.pythonhosted.org/packages/44/ea/52a985bb447c871cb4d2e376e401116520991b597c85afdde1ea9ef54f2c/coverage-7.13.1-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:132718176cc723026d201e347f800cd1a9e4b62ccd3f82476950834dad501c75", size = 250391, upload-time = "2025-12-28T15:42:14.21Z" },
+    { url = "https://files.pythonhosted.org/packages/7f/1d/125b36cc12310718873cfc8209ecfbc1008f14f4f5fa0662aa608e579353/coverage-7.13.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:9e549d642426e3579b3f4b92d0431543b012dcb6e825c91619d4e93b7363c3f9", size = 252239, upload-time = "2025-12-28T15:42:16.292Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/16/10c1c164950cade470107f9f14bbac8485f8fb8515f515fca53d337e4a7f/coverage-7.13.1-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:90480b2134999301eea795b3a9dbf606c6fbab1b489150c501da84a959442465", size = 250196, upload-time = "2025-12-28T15:42:18.54Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/c6/cd860fac08780c6fd659732f6ced1b40b79c35977c1356344e44d72ba6c4/coverage-7.13.1-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:e825dbb7f84dfa24663dd75835e7257f8882629fc11f03ecf77d84a75134b864", size = 250008, upload-time = "2025-12-28T15:42:20.365Z" },
+    { url = "https://files.pythonhosted.org/packages/f0/3a/a8c58d3d38f82a5711e1e0a67268362af48e1a03df27c03072ac30feefcf/coverage-7.13.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:623dcc6d7a7ba450bbdbeedbaa0c42b329bdae16491af2282f12a7e809be7eb9", size = 251671, upload-time = "2025-12-28T15:42:22.114Z" },
+    { url = "https://files.pythonhosted.org/packages/f0/bc/fd4c1da651d037a1e3d53e8cb3f8182f4b53271ffa9a95a2e211bacc0349/coverage-7.13.1-cp314-cp314-win32.whl", hash = "sha256:6e73ebb44dca5f708dc871fe0b90cf4cff1a13f9956f747cc87b535a840386f5", size = 221777, upload-time = "2025-12-28T15:42:23.919Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/50/71acabdc8948464c17e90b5ffd92358579bd0910732c2a1c9537d7536aa6/coverage-7.13.1-cp314-cp314-win_amd64.whl", hash = "sha256:be753b225d159feb397bd0bf91ae86f689bad0da09d3b301478cd39b878ab31a", size = 222592, upload-time = "2025-12-28T15:42:25.619Z" },
+    { url = "https://files.pythonhosted.org/packages/f7/c8/a6fb943081bb0cc926499c7907731a6dc9efc2cbdc76d738c0ab752f1a32/coverage-7.13.1-cp314-cp314-win_arm64.whl", hash = "sha256:228b90f613b25ba0019361e4ab81520b343b622fc657daf7e501c4ed6a2366c0", size = 221169, upload-time = "2025-12-28T15:42:27.629Z" },
+    { url = "https://files.pythonhosted.org/packages/16/61/d5b7a0a0e0e40d62e59bc8c7aa1afbd86280d82728ba97f0673b746b78e2/coverage-7.13.1-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:60cfb538fe9ef86e5b2ab0ca8fc8d62524777f6c611dcaf76dc16fbe9b8e698a", size = 219730, upload-time = "2025-12-28T15:42:29.306Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/2c/8881326445fd071bb49514d1ce97d18a46a980712b51fee84f9ab42845b4/coverage-7.13.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:57dfc8048c72ba48a8c45e188d811e5efd7e49b387effc8fb17e97936dde5bf6", size = 220001, upload-time = "2025-12-28T15:42:31.319Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/d7/50de63af51dfa3a7f91cc37ad8fcc1e244b734232fbc8b9ab0f3c834a5cd/coverage-7.13.1-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:3f2f725aa3e909b3c5fdb8192490bdd8e1495e85906af74fe6e34a2a77ba0673", size = 261370, upload-time = "2025-12-28T15:42:32.992Z" },
+    { url = "https://files.pythonhosted.org/packages/e1/2c/d31722f0ec918fd7453b2758312729f645978d212b410cd0f7c2aed88a94/coverage-7.13.1-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:9ee68b21909686eeb21dfcba2c3b81fee70dcf38b140dcd5aa70680995fa3aa5", size = 263485, upload-time = "2025-12-28T15:42:34.759Z" },
+    { url = "https://files.pythonhosted.org/packages/fa/7a/2c114fa5c5fc08ba0777e4aec4c97e0b4a1afcb69c75f1f54cff78b073ab/coverage-7.13.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:724b1b270cb13ea2e6503476e34541a0b1f62280bc997eab443f87790202033d", size = 265890, upload-time = "2025-12-28T15:42:36.517Z" },
+    { url = "https://files.pythonhosted.org/packages/65/d9/f0794aa1c74ceabc780fe17f6c338456bbc4e96bd950f2e969f48ac6fb20/coverage-7.13.1-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:916abf1ac5cf7eb16bc540a5bf75c71c43a676f5c52fcb9fe75a2bd75fb944e8", size = 260445, upload-time = "2025-12-28T15:42:38.646Z" },
+    { url = "https://files.pythonhosted.org/packages/49/23/184b22a00d9bb97488863ced9454068c79e413cb23f472da6cbddc6cfc52/coverage-7.13.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:776483fd35b58d8afe3acbd9988d5de592ab6da2d2a865edfdbc9fdb43e7c486", size = 263357, upload-time = "2025-12-28T15:42:40.788Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/bd/58af54c0c9199ea4190284f389005779d7daf7bf3ce40dcd2d2b2f96da69/coverage-7.13.1-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:b6f3b96617e9852703f5b633ea01315ca45c77e879584f283c44127f0f1ec564", size = 260959, upload-time = "2025-12-28T15:42:42.808Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/2a/6839294e8f78a4891bf1df79d69c536880ba2f970d0ff09e7513d6e352e9/coverage-7.13.1-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:bd63e7b74661fed317212fab774e2a648bc4bb09b35f25474f8e3325d2945cd7", size = 259792, upload-time = "2025-12-28T15:42:44.818Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/c3/528674d4623283310ad676c5af7414b9850ab6d55c2300e8aa4b945ec554/coverage-7.13.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:933082f161bbb3e9f90d00990dc956120f608cdbcaeea15c4d897f56ef4fe416", size = 262123, upload-time = "2025-12-28T15:42:47.108Z" },
+    { url = "https://files.pythonhosted.org/packages/06/c5/8c0515692fb4c73ac379d8dc09b18eaf0214ecb76ea6e62467ba7a1556ff/coverage-7.13.1-cp314-cp314t-win32.whl", hash = "sha256:18be793c4c87de2965e1c0f060f03d9e5aff66cfeae8e1dbe6e5b88056ec153f", size = 222562, upload-time = "2025-12-28T15:42:49.144Z" },
+    { url = "https://files.pythonhosted.org/packages/05/0e/c0a0c4678cb30dac735811db529b321d7e1c9120b79bd728d4f4d6b010e9/coverage-7.13.1-cp314-cp314t-win_amd64.whl", hash = "sha256:0e42e0ec0cd3e0d851cb3c91f770c9301f48647cb2877cb78f74bdaa07639a79", size = 223670, upload-time = "2025-12-28T15:42:51.218Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/5f/b177aa0011f354abf03a8f30a85032686d290fdeed4222b27d36b4372a50/coverage-7.13.1-cp314-cp314t-win_arm64.whl", hash = "sha256:eaecf47ef10c72ece9a2a92118257da87e460e113b83cc0d2905cbbe931792b4", size = 221707, upload-time = "2025-12-28T15:42:53.034Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/48/d9f421cb8da5afaa1a64570d9989e00fb7955e6acddc5a12979f7666ef60/coverage-7.13.1-py3-none-any.whl", hash = "sha256:2016745cb3ba554469d02819d78958b571792bb68e31302610e898f80dd3a573", size = 210722, upload-time = "2025-12-28T15:42:54.901Z" },
 ]
 
 [package.optional-dependencies]
@@ -1095,45 +1095,45 @@ wheels = [
 
 [[package]]
 name = "cython"
-version = "3.2.3"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/39/e1/c0d92b1258722e1bc62a12e630c33f1f842fdab53fd8cd5de2f75c6449a9/cython-3.2.3.tar.gz", hash = "sha256:f13832412d633376ffc08d751cc18ed0d7d00a398a4065e2871db505258748a6", size = 3276650, upload-time = "2025-12-14T07:50:34.691Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/11/77/71c2aef97648548116ca22197c191f8293178f9d4e939e2cb4cbe912619e/cython-3.2.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:55c0157a5940fbf0b054508207fe0fc5cc796d0532af492c0fa35b5b41a883f7", size = 2959265, upload-time = "2025-12-14T07:50:46.035Z" },
-    { url = "https://files.pythonhosted.org/packages/76/b8/bc06c6427dfe46164d36c0b35e45028d0427faac28d218e065da05edcce5/cython-3.2.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:51fd1a56d0fc682c05ecc44f11927dbe28dd2867c30148557b62d7d1017a13d8", size = 3368365, upload-time = "2025-12-14T07:50:48.111Z" },
-    { url = "https://files.pythonhosted.org/packages/c7/3e/7550e90ccd6493842dede63ac484181d4a254ed7332eaad01253ab789d36/cython-3.2.3-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1309bdce06f767e8514377f44b3a5b9e5b91e58af1348010cca10b572e1852ad", size = 3536996, upload-time = "2025-12-14T07:50:50.175Z" },
-    { url = "https://files.pythonhosted.org/packages/33/94/df8d414d8fb3afd5a0350245ebc589e5bc25b655342ad7341e5cfc869cf5/cython-3.2.3-cp310-cp310-win_amd64.whl", hash = "sha256:6b6dd6b7aca8447b2a6779b314cc402f1e4990754507a88477e535b3c8b41ad1", size = 2765625, upload-time = "2025-12-14T07:50:51.962Z" },
-    { url = "https://files.pythonhosted.org/packages/c3/85/77315c92d29d782bee1b36e30b8d76ad1e731cb7ea0af17e285885f3bb68/cython-3.2.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c041f7e338cca2422e0924716b04fabeda57636214324fc1941396acce99e7c7", size = 2951618, upload-time = "2025-12-14T07:50:53.883Z" },
-    { url = "https://files.pythonhosted.org/packages/cb/dd/a8209e0d424a0207ddb4a3097a97b667027af3cfada762d85f3bed08ccf8/cython-3.2.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:283262b8f902323ceb6ed3b643f275a2a963e7ab059f0714a467933383cbc56d", size = 3243636, upload-time = "2025-12-14T07:50:56.346Z" },
-    { url = "https://files.pythonhosted.org/packages/1f/2d/bc1927fd7174f7928b86cc9b83589d39592b9273c8b1d2295ca0c0071984/cython-3.2.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:22a624290c2883387b2c2cfb5224c15bff21432c6a2cf0c23ac8df3dcbd45e96", size = 3378528, upload-time = "2025-12-14T07:50:57.988Z" },
-    { url = "https://files.pythonhosted.org/packages/ad/10/5add6a6e1721f9c36b5d5b4f3b75fa7af43196e4f2a474921a7277e31b7a/cython-3.2.3-cp311-cp311-win_amd64.whl", hash = "sha256:26404441f733fd1cfb0dd9c45477f501437e7d51fad05bb402bd2feb4e127aa3", size = 2769341, upload-time = "2025-12-14T07:50:59.581Z" },
-    { url = "https://files.pythonhosted.org/packages/b4/14/d16282d17c9eb2f78ca9ccd5801fed22f6c3360f5a55dbcce3c93cc70352/cython-3.2.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:cf210228c15b5c625824d8e31d43b6fea25f9e13c81dac632f2f7d838e0229a5", size = 2968471, upload-time = "2025-12-14T07:51:01.207Z" },
-    { url = "https://files.pythonhosted.org/packages/d0/3c/46304a942dac5a636701c55f5b05ec00ad151e6722cd068fe3d0993349bb/cython-3.2.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f5bf0cebeb4147e172a114437d3fce5a507595d8fdd821be792b1bb25c691514", size = 3223581, upload-time = "2025-12-14T07:51:04.336Z" },
-    { url = "https://files.pythonhosted.org/packages/29/ad/15da606d71f40bcf2c405f84ca3d4195cb252f4eaa2f551fe6b2e630ee7c/cython-3.2.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d1f8700ba89c977438744f083890d87187f15709507a5489e0f6d682053b7fa0", size = 3391391, upload-time = "2025-12-14T07:51:05.998Z" },
-    { url = "https://files.pythonhosted.org/packages/51/9e/045b35eb678682edc3e2d57112cf5ac3581a9ef274eb220b638279195678/cython-3.2.3-cp312-cp312-win_amd64.whl", hash = "sha256:25732f3981a93407826297f4423206e5e22c3cfccfc74e37bf444453bbdc076f", size = 2756814, upload-time = "2025-12-14T07:51:07.759Z" },
-    { url = "https://files.pythonhosted.org/packages/d5/c2/35cedff7fcbc844e4e872c6719df5ece26551e14f37d76eb41c412d778c6/cython-3.2.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1d097ad4686b58b8c03d760d08eca28f79878d404ef7452c49636170571654e0", size = 2959019, upload-time = "2025-12-14T07:51:09.429Z" },
-    { url = "https://files.pythonhosted.org/packages/44/1b/05787f71b4834a28b19a0a3edee44537c239924f9a7d96ea38ebba365e5c/cython-3.2.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2a18f2e3bcd018416157d0a83446e29b4a31437ab79061fe5504c077e70389d0", size = 3212912, upload-time = "2025-12-14T07:51:11.512Z" },
-    { url = "https://files.pythonhosted.org/packages/48/fe/f5d560e3a2eb1891d55f465d17437179d9f5fbd4f46aebf2c00d01fa5e80/cython-3.2.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:73afc824896ffaf22bf8122d0a7107f0120e3188a353bdcfa92317fc0d9a87ce", size = 3375222, upload-time = "2025-12-14T07:51:13.762Z" },
-    { url = "https://files.pythonhosted.org/packages/3d/b9/dcf5a68ac2ef89424657b03f751ca799861db097fa83bd52068bed198120/cython-3.2.3-cp313-cp313-win_amd64.whl", hash = "sha256:9aa1a8abf3d8bb53cc19cfaa21c004afad8d4ccb17513f8aa11a788d1f525abd", size = 2754908, upload-time = "2025-12-14T07:51:15.575Z" },
-    { url = "https://files.pythonhosted.org/packages/5c/07/93c65fbee4ab419767b7e54937e91cacae5c71d2d1277cc882ea3b1ce777/cython-3.2.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:80f20369d7aaf4e76cfef902025256918a5cc6eb0aed6d8783e4b1c563e4f6c4", size = 2969476, upload-time = "2025-12-14T07:51:17.213Z" },
-    { url = "https://files.pythonhosted.org/packages/00/ad/736b4cbcb42740608cae1315c790dd6a4419705545f0615af4074e267ea3/cython-3.2.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:60d19376252722241a3d3ec8a695c5cae4deb053486d2e5f9a40cb569a0cf984", size = 3258714, upload-time = "2025-12-14T07:51:18.925Z" },
-    { url = "https://files.pythonhosted.org/packages/a2/74/03c08a723a319640f0bb3eaca947e009caa2eb48957ff735bfd77b0be060/cython-3.2.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e4293f1861480b397809a6f021a6c12e15e918feae1c7add80c99d07af206578", size = 3384940, upload-time = "2025-12-14T07:51:20.593Z" },
-    { url = "https://files.pythonhosted.org/packages/73/14/0871a0b407fa50257a79c57a608903ed50032c7619d9531451f7090a5ee3/cython-3.2.3-cp314-cp314-win_amd64.whl", hash = "sha256:84330e7c8bf220a82b633678b9f99e10227c8f4c406d67c5552449ab2afedef8", size = 2791923, upload-time = "2025-12-14T07:51:22.292Z" },
-    { url = "https://files.pythonhosted.org/packages/43/49/afe1e3df87a770861cf17ba39f4a91f6d22a2571010fc1890b3708360630/cython-3.2.3-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:74f482da8b605c61b4df6ff716d013f20131949cb2fa59b03e63abd36ef5bac0", size = 2874467, upload-time = "2025-12-14T07:51:31.568Z" },
-    { url = "https://files.pythonhosted.org/packages/c7/da/044f725a083e28fb4de5bd33d13ec13f0753734b6ae52d4bc07434610cc8/cython-3.2.3-cp39-abi3-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:0a75a04688875b275a6c875565e672325bae04327dd6ec2fc25aeb5c6cf82fce", size = 3211272, upload-time = "2025-12-14T07:51:33.673Z" },
-    { url = "https://files.pythonhosted.org/packages/95/14/af02ba6e2e03279f2ca2956e3024a44faed4c8496bda8170b663dc3ba6e8/cython-3.2.3-cp39-abi3-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:6b01b36c9eb1b68c25bddbeef7379f7bfc37f7c9afc044e71840ffab761a2dd0", size = 2856058, upload-time = "2025-12-14T07:51:36.015Z" },
-    { url = "https://files.pythonhosted.org/packages/69/16/d254359396c2f099ab154f89b2b35f5b8b0dd21a8102c2c96a7e00291434/cython-3.2.3-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:3829f99d611412288f44ff543e9d2b5c0c83274998b2a6680bbe5cca3539c1fd", size = 2993276, upload-time = "2025-12-14T07:51:37.863Z" },
-    { url = "https://files.pythonhosted.org/packages/51/0e/1a071381923e896f751f8fbff2a01c5dc8860a8b9a90066f6ec8df561dc4/cython-3.2.3-cp39-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:c2365a0c79ab9c0fa86d30a4a6ba7e37fc1be9537c48b79b9d63ee7e08bf2fef", size = 2890843, upload-time = "2025-12-14T07:51:40.409Z" },
-    { url = "https://files.pythonhosted.org/packages/f4/46/1e93e10766db988e6bb8e5c6f7e2e90b9e62f1ac8dee4c1a6cf1fc170773/cython-3.2.3-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:3141734fb15f8b5e9402b9240f8da8336edecae91742b41c85678c31ab68f66d", size = 3225339, upload-time = "2025-12-14T07:51:42.09Z" },
-    { url = "https://files.pythonhosted.org/packages/d4/ae/c284b06ae6a9c95d5883bf8744d10466cf0df64cef041a4c80ccf9fd07bd/cython-3.2.3-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:9a24cc653fad3adbd9cbaa638d80df3aa08a1fe27f62eb35850971c70be680df", size = 3114751, upload-time = "2025-12-14T07:51:44.088Z" },
-    { url = "https://files.pythonhosted.org/packages/c6/d6/7795a4775c70256217134195f06b07233cf17b00f8905d5b3d782208af64/cython-3.2.3-cp39-abi3-win32.whl", hash = "sha256:b39dff92db70cbd95528f3b81d70e06bd6d3fc9c1dd91321e4d3b999ece3bceb", size = 2435616, upload-time = "2025-12-14T07:51:46.063Z" },
-    { url = "https://files.pythonhosted.org/packages/18/9e/2a3edcb858ad74e6274448dccf32150c532bc6e423f112a71f65ff3b5680/cython-3.2.3-cp39-abi3-win_arm64.whl", hash = "sha256:18edc858e6a52de47fe03ffa97ea14dadf450e20069de0a8aef531006c4bbd93", size = 2440952, upload-time = "2025-12-14T07:51:47.943Z" },
-    { url = "https://files.pythonhosted.org/packages/e5/41/54fd429ff8147475fc24ca43246f85d78fb4e747c27f227e68f1594648f1/cython-3.2.3-py3-none-any.whl", hash = "sha256:06a1317097f540d3bb6c7b81ed58a0d8b9dbfa97abf39dfd4c22ee87a6c7241e", size = 1255561, upload-time = "2025-12-14T07:50:31.217Z" },
+version = "3.2.4"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/91/85/7574c9cd44b69a27210444b6650f6477f56c75fee1b70d7672d3e4166167/cython-3.2.4.tar.gz", hash = "sha256:84226ecd313b233da27dc2eb3601b4f222b8209c3a7216d8733b031da1dc64e6", size = 3280291, upload-time = "2026-01-04T14:14:14.473Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a1/10/720e0fb84eab4c927c4dd6b61eb7993f7732dd83d29ba6d73083874eade9/cython-3.2.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:02cb0cc0f23b9874ad262d7d2b9560aed9c7e2df07b49b920bda6f2cc9cb505e", size = 2960836, upload-time = "2026-01-04T14:14:51.103Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/3d/b26f29092c71c36e0462752885bdfb18c23c176af4de953fdae2772a8941/cython-3.2.4-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f136f379a4a54246facd0eb6f1ee15c3837cb314ce87b677582ec014db4c6845", size = 3370134, upload-time = "2026-01-04T14:14:53.627Z" },
+    { url = "https://files.pythonhosted.org/packages/56/9e/539fb0d09e4f5251b5b14f8daf77e71fee021527f1013791038234618b6b/cython-3.2.4-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:35ab0632186057406ec729374c737c37051d2eacad9d515d94e5a3b3e58a9b02", size = 3537552, upload-time = "2026-01-04T14:14:56.852Z" },
+    { url = "https://files.pythonhosted.org/packages/10/c6/82d19a451c050d1be0f05b1a3302267463d391db548f013ee88b5348a8e9/cython-3.2.4-cp310-cp310-win_amd64.whl", hash = "sha256:ca2399dc75796b785f74fb85c938254fa10c80272004d573c455f9123eceed86", size = 2766191, upload-time = "2026-01-04T14:14:58.709Z" },
+    { url = "https://files.pythonhosted.org/packages/85/cc/8f06145ec3efa121c8b1b67f06a640386ddacd77ee3e574da582a21b14ee/cython-3.2.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ff9af2134c05e3734064808db95b4dd7341a39af06e8945d05ea358e1741aaed", size = 2953769, upload-time = "2026-01-04T14:15:00.361Z" },
+    { url = "https://files.pythonhosted.org/packages/55/b0/706cf830eddd831666208af1b3058c2e0758ae157590909c1f634b53bed9/cython-3.2.4-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:67922c9de058a0bfb72d2e75222c52d09395614108c68a76d9800f150296ddb3", size = 3243841, upload-time = "2026-01-04T14:15:02.066Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/25/58893afd4ef45f79e3d4db82742fa4ff874b936d67a83c92939053920ccd/cython-3.2.4-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b362819d155fff1482575e804e43e3a8825332d32baa15245f4642022664a3f4", size = 3378083, upload-time = "2026-01-04T14:15:04.248Z" },
+    { url = "https://files.pythonhosted.org/packages/32/e4/424a004d7c0d8a4050c81846ebbd22272ececfa9a498cb340aa44fccbec2/cython-3.2.4-cp311-cp311-win_amd64.whl", hash = "sha256:1a64a112a34ec719b47c01395647e54fb4cf088a511613f9a3a5196694e8e382", size = 2769990, upload-time = "2026-01-04T14:15:06.53Z" },
+    { url = "https://files.pythonhosted.org/packages/91/4d/1eb0c7c196a136b1926f4d7f0492a96c6fabd604d77e6cd43b56a3a16d83/cython-3.2.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:64d7f71be3dd6d6d4a4c575bb3a4674ea06d1e1e5e4cd1b9882a2bc40ed3c4c9", size = 2970064, upload-time = "2026-01-04T14:15:08.567Z" },
+    { url = "https://files.pythonhosted.org/packages/03/1c/46e34b08bea19a1cdd1e938a4c123e6299241074642db9d81983cef95e9f/cython-3.2.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:869487ea41d004f8b92171f42271fbfadb1ec03bede3158705d16cd570d6b891", size = 3226757, upload-time = "2026-01-04T14:15:10.812Z" },
+    { url = "https://files.pythonhosted.org/packages/12/33/3298a44d201c45bcf0d769659725ae70e9c6c42adf8032f6d89c8241098d/cython-3.2.4-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:55b6c44cd30821f0b25220ceba6fe636ede48981d2a41b9bbfe3c7902ce44ea7", size = 3388969, upload-time = "2026-01-04T14:15:12.45Z" },
+    { url = "https://files.pythonhosted.org/packages/bb/f3/4275cd3ea0a4cf4606f9b92e7f8766478192010b95a7f516d1b7cf22cb10/cython-3.2.4-cp312-cp312-win_amd64.whl", hash = "sha256:767b143704bdd08a563153448955935844e53b852e54afdc552b43902ed1e235", size = 2756457, upload-time = "2026-01-04T14:15:14.67Z" },
+    { url = "https://files.pythonhosted.org/packages/18/b5/1cfca43b7d20a0fdb1eac67313d6bb6b18d18897f82dd0f17436bdd2ba7f/cython-3.2.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:28e8075087a59756f2d059273184b8b639fe0f16cf17470bd91c39921bc154e0", size = 2960506, upload-time = "2026-01-04T14:15:16.733Z" },
+    { url = "https://files.pythonhosted.org/packages/71/bb/8f28c39c342621047fea349a82fac712a5e2b37546d2f737bbde48d5143d/cython-3.2.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:03893c88299a2c868bb741ba6513357acd104e7c42265809fd58dce1456a36fc", size = 3213148, upload-time = "2026-01-04T14:15:18.804Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/d2/16fa02f129ed2b627e88d9d9ebd5ade3eeb66392ae5ba85b259d2d52b047/cython-3.2.4-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f81eda419b5ada7b197bbc3c5f4494090e3884521ffd75a3876c93fbf66c9ca8", size = 3375764, upload-time = "2026-01-04T14:15:20.817Z" },
+    { url = "https://files.pythonhosted.org/packages/91/3f/deb8f023a5c10c0649eb81332a58c180fad27c7533bb4aae138b5bc34d92/cython-3.2.4-cp313-cp313-win_amd64.whl", hash = "sha256:83266c356c13c68ffe658b4905279c993d8a5337bb0160fa90c8a3e297ea9a2e", size = 2754238, upload-time = "2026-01-04T14:15:23.001Z" },
+    { url = "https://files.pythonhosted.org/packages/ee/d7/3bda3efce0c5c6ce79cc21285dbe6f60369c20364e112f5a506ee8a1b067/cython-3.2.4-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:d4b4fd5332ab093131fa6172e8362f16adef3eac3179fd24bbdc392531cb82fa", size = 2971496, upload-time = "2026-01-04T14:15:25.038Z" },
+    { url = "https://files.pythonhosted.org/packages/89/ed/1021ffc80b9c4720b7ba869aea8422c82c84245ef117ebe47a556bdc00c3/cython-3.2.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e3b5ac54e95f034bc7fb07313996d27cbf71abc17b229b186c1540942d2dc28e", size = 3256146, upload-time = "2026-01-04T14:15:26.741Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/51/ca221ec7e94b3c5dc4138dcdcbd41178df1729c1e88c5dfb25f9d30ba3da/cython-3.2.4-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:90f43be4eaa6afd58ce20d970bb1657a3627c44e1760630b82aa256ba74b4acb", size = 3383458, upload-time = "2026-01-04T14:15:28.425Z" },
+    { url = "https://files.pythonhosted.org/packages/79/2e/1388fc0243240cd54994bb74f26aaaf3b2e22f89d3a2cf8da06d75d46ca2/cython-3.2.4-cp314-cp314-win_amd64.whl", hash = "sha256:983f9d2bb8a896e16fa68f2b37866ded35fa980195eefe62f764ddc5f9f5ef8e", size = 2791241, upload-time = "2026-01-04T14:15:30.448Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/8b/fd393f0923c82be4ec0db712fffb2ff0a7a131707b842c99bf24b549274d/cython-3.2.4-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:36bf3f5eb56d5281aafabecbaa6ed288bc11db87547bba4e1e52943ae6961ccf", size = 2875622, upload-time = "2026-01-04T14:15:39.749Z" },
+    { url = "https://files.pythonhosted.org/packages/73/48/48530d9b9d64ec11dbe0dd3178a5fe1e0b27977c1054ecffb82be81e9b6a/cython-3.2.4-cp39-abi3-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:6d5267f22b6451eb1e2e1b88f6f78a2c9c8733a6ddefd4520d3968d26b824581", size = 3210669, upload-time = "2026-01-04T14:15:41.911Z" },
+    { url = "https://files.pythonhosted.org/packages/5e/91/4865fbfef1f6bb4f21d79c46104a53d1a3fa4348286237e15eafb26e0828/cython-3.2.4-cp39-abi3-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:3b6e58f73a69230218d5381817850ce6d0da5bb7e87eb7d528c7027cbba40b06", size = 2856835, upload-time = "2026-01-04T14:15:43.815Z" },
+    { url = "https://files.pythonhosted.org/packages/fa/39/60317957dbef179572398253f29d28f75f94ab82d6d39ea3237fb6c89268/cython-3.2.4-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:e71efb20048358a6b8ec604a0532961c50c067b5e63e345e2e359fff72feaee8", size = 2994408, upload-time = "2026-01-04T14:15:45.422Z" },
+    { url = "https://files.pythonhosted.org/packages/8d/30/7c24d9292650db4abebce98abc9b49c820d40fa7c87921c0a84c32f4efe7/cython-3.2.4-cp39-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:28b1e363b024c4b8dcf52ff68125e635cb9cb4b0ba997d628f25e32543a71103", size = 2891478, upload-time = "2026-01-04T14:15:47.394Z" },
+    { url = "https://files.pythonhosted.org/packages/86/70/03dc3c962cde9da37a93cca8360e576f904d5f9beecfc9d70b1f820d2e5f/cython-3.2.4-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:31a90b4a2c47bb6d56baeb926948348ec968e932c1ae2c53239164e3e8880ccf", size = 3225663, upload-time = "2026-01-04T14:15:49.446Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/97/10b50c38313c37b1300325e2e53f48ea9a2c078a85c0c9572057135e31d5/cython-3.2.4-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:e65e4773021f8dc8532010b4fbebe782c77f9a0817e93886e518c93bd6a44e9d", size = 3115628, upload-time = "2026-01-04T14:15:51.323Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/b1/d6a353c9b147848122a0db370863601fdf56de2d983b5c4a6a11e6ee3cd7/cython-3.2.4-cp39-abi3-win32.whl", hash = "sha256:2b1f12c0e4798293d2754e73cd6f35fa5bbdf072bdc14bc6fc442c059ef2d290", size = 2437463, upload-time = "2026-01-04T14:15:53.787Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/d8/319a1263b9c33b71343adfd407e5daffd453daef47ebc7b642820a8b68ed/cython-3.2.4-cp39-abi3-win_arm64.whl", hash = "sha256:3b8e62049afef9da931d55de82d8f46c9a147313b69d5ff6af6e9121d545ce7a", size = 2442754, upload-time = "2026-01-04T14:15:55.382Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/fa/d3c15189f7c52aaefbaea76fb012119b04b9013f4bf446cb4eb4c26c4e6b/cython-3.2.4-py3-none-any.whl", hash = "sha256:732fc93bc33ae4b14f6afaca663b916c2fdd5dcbfad7114e17fb2434eeaea45c", size = 1257078, upload-time = "2026-01-04T14:14:12.373Z" },
 ]
 
 [[package]]
 name = "datasets"
-version = "4.4.1"
+version = "4.4.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "dill" },
@@ -1143,7 +1143,7 @@ dependencies = [
     { name = "huggingface-hub" },
     { name = "multiprocess" },
     { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "numpy", version = "2.3.5", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.4.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "packaging" },
     { name = "pandas" },
     { name = "pyarrow" },
@@ -1152,9 +1152,9 @@ dependencies = [
     { name = "tqdm" },
     { name = "xxhash" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/93/bf/0dae295d6d1ba0b1a200a9dd216838464b5bbd05da01407cb1330b377445/datasets-4.4.1.tar.gz", hash = "sha256:80322699aa8c0bbbdb7caa87906da689c3c2e29523cff698775c67f28fdab1fc", size = 585341, upload-time = "2025-11-05T16:00:38.162Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/c4/54/9359803da96bc65439a28fbb014dc2c90b7d4d8034a93b72362b0d40191f/datasets-4.4.2.tar.gz", hash = "sha256:9de16e415c4ba4713eac0493f7c7dc74f3aa21599297f00cc6ddab409cb7b24b", size = 586474, upload-time = "2025-12-19T15:03:09.129Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/3b/5e/6f8d874366788ad5d549e9ba258037d974dda6e004843be1bda794571701/datasets-4.4.1-py3-none-any.whl", hash = "sha256:c1163de5211e42546079ab355cc0250c7e6db16eb209ac5ac6252f801f596c44", size = 511591, upload-time = "2025-11-05T16:00:36.365Z" },
+    { url = "https://files.pythonhosted.org/packages/7b/b5/fefa518c809de7bced5cddb7c21c010da66fa2ae494bda96844a280cc6ce/datasets-4.4.2-py3-none-any.whl", hash = "sha256:6f5ef3417504d9cd663c71c1b90b9a494ff4c2076a2cd6a6e40ceee6ad95befc", size = 512268, upload-time = "2025-12-19T15:03:07.087Z" },
 ]
 
 [[package]]
@@ -1285,7 +1285,7 @@ wheels = [
 
 [[package]]
 name = "fastapi"
-version = "0.125.0"
+version = "0.128.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "annotated-doc" },
@@ -1293,9 +1293,9 @@ dependencies = [
     { name = "starlette" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/17/71/2df15009fb4bdd522a069d2fbca6007c6c5487fce5cb965be00fc335f1d1/fastapi-0.125.0.tar.gz", hash = "sha256:16b532691a33e2c5dee1dac32feb31dc6eb41a3dd4ff29a95f9487cb21c054c0", size = 370550, upload-time = "2025-12-17T21:41:44.15Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/52/08/8c8508db6c7b9aae8f7175046af41baad690771c9bcde676419965e338c7/fastapi-0.128.0.tar.gz", hash = "sha256:1cc179e1cef10a6be60ffe429f79b829dce99d8de32d7acb7e6c8dfdf7f2645a", size = 365682, upload-time = "2025-12-27T15:21:13.714Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/34/2f/ff2fcc98f500713368d8b650e1bbc4a0b3ebcdd3e050dcdaad5f5a13fd7e/fastapi-0.125.0-py3-none-any.whl", hash = "sha256:2570ec4f3aecf5cca8f0428aed2398b774fcdfee6c2116f86e80513f2f86a7a1", size = 112888, upload-time = "2025-12-17T21:41:41.286Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/05/5cbb59154b093548acd0f4c7c474a118eda06da25aa75c616b72d8fcd92a/fastapi-0.128.0-py3-none-any.whl", hash = "sha256:aebd93f9716ee3b4f4fcfe13ffb7cf308d99c9f3ab5622d8877441072561582d", size = 103094, upload-time = "2025-12-27T15:21:12.154Z" },
 ]
 
 [[package]]
@@ -1315,11 +1315,11 @@ wheels = [
 
 [[package]]
 name = "filelock"
-version = "3.20.1"
+version = "3.20.2"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/a7/23/ce7a1126827cedeb958fc043d61745754464eb56c5937c35bbf2b8e26f34/filelock-3.20.1.tar.gz", hash = "sha256:b8360948b351b80f420878d8516519a2204b07aefcdcfd24912a5d33127f188c", size = 19476, upload-time = "2025-12-15T23:54:28.027Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/c1/e0/a75dbe4bca1e7d41307323dad5ea2efdd95408f74ab2de8bd7dba9b51a1a/filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64", size = 19510, upload-time = "2026-01-02T15:33:32.582Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/e3/7f/a1a97644e39e7316d850784c642093c99df1290a460df4ede27659056834/filelock-3.20.1-py3-none-any.whl", hash = "sha256:15d9e9a67306188a44baa72f569d2bfd803076269365fdea0934385da4dc361a", size = 16666, upload-time = "2025-12-15T23:54:26.874Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/30/ab407e2ec752aa541704ed8f93c11e2a5d92c168b8a755d818b74a3c5c2d/filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8", size = 16697, upload-time = "2026-01-02T15:33:31.133Z" },
 ]
 
 [[package]]
@@ -1388,7 +1388,7 @@ dependencies = [
     { name = "einops" },
     { name = "ninja" },
     { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "numpy", version = "2.3.5", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.4.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "nvidia-cudnn-frontend" },
     { name = "nvidia-cutlass-dsl" },
     { name = "nvidia-ml-py" },
@@ -1584,14 +1584,14 @@ wheels = [
 
 [[package]]
 name = "gitpython"
-version = "3.1.45"
+version = "3.1.46"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "gitdb" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/9a/c8/dd58967d119baab745caec2f9d853297cec1989ec1d63f677d3880632b88/gitpython-3.1.45.tar.gz", hash = "sha256:85b0ee964ceddf211c41b9f27a49086010a190fd8132a24e21f362a4b36a791c", size = 215076, upload-time = "2025-07-24T03:45:54.871Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/df/b5/59d16470a1f0dfe8c793f9ef56fd3826093fc52b3bd96d6b9d6c26c7e27b/gitpython-3.1.46.tar.gz", hash = "sha256:400124c7d0ef4ea03f7310ac2fbf7151e09ff97f2a3288d64a440c584a29c37f", size = 215371, upload-time = "2026-01-01T15:37:32.073Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/01/61/d4b89fec821f72385526e1b9d9a3a0385dda4a72b206d28049e2c7cd39b8/gitpython-3.1.45-py3-none-any.whl", hash = "sha256:8908cb2e02fb3b93b7eb0f2827125cb699869470432cc885f019b8fd0fccff77", size = 208168, upload-time = "2025-07-24T03:45:52.517Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/09/e21df6aef1e1ffc0c816f0522ddc3f6dcded766c3261813131c78a704470/gitpython-3.1.46-py3-none-any.whl", hash = "sha256:79812ed143d9d25b6d176a10bb511de0f9c67b1fa641d82097b0ab90398a2058", size = 208620, upload-time = "2026-01-01T15:37:30.574Z" },
 ]
 
 [[package]]
@@ -2223,7 +2223,7 @@ name = "megatron-core"
 source = { editable = "." }
 dependencies = [
     { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "numpy", version = "2.3.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.4.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "packaging" },
     { name = "torch", marker = "sys_platform == 'never' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
@@ -2250,7 +2250,7 @@ dev = [
     { name = "tensorstore", version = "0.1.78", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "tensorstore", version = "0.1.80", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "tqdm" },
-    { name = "transformer-engine", extra = ["core-cu13", "pytorch"], marker = "extra == 'extra-13-megatron-core-dev'" },
+    { name = "transformer-engine", marker = "extra == 'extra-13-megatron-core-dev'" },
     { name = "wget" },
 ]
 lts = [
@@ -2374,7 +2374,7 @@ requires-dist = [
     { name = "torch" },
     { name = "tqdm", marker = "extra == 'dev'" },
     { name = "tqdm", marker = "extra == 'lts'" },
-    { name = "transformer-engine", extras = ["core-cu13", "pytorch"], marker = "extra == 'dev'", specifier = ">=2.9.0a0,<2.11.0" },
+    { name = "transformer-engine", extras = ["core-cu13", "pytorch"], marker = "extra == 'dev'", git = "https://github.com/NVIDIA/TransformerEngine.git?rev=release_v2.11" },
     { name = "transformers", marker = "extra == 'mlm'" },
     { name = "wandb", marker = "extra == 'mlm'" },
     { name = "wget", marker = "extra == 'dev'" },
@@ -2441,7 +2441,7 @@ dependencies = [
     { name = "click" },
     { name = "multi-storage-client" },
     { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "numpy", version = "2.3.5", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.4.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "pillow" },
     { name = "pyyaml" },
     { name = "s3fs" },
@@ -2470,7 +2470,7 @@ version = "0.5.4"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "numpy", version = "2.3.5", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.4.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/0e/4a/c27b42ed9b1c7d13d9ba8b6905dece787d6259152f2309338aed29b2447b/ml_dtypes-0.5.4.tar.gz", hash = "sha256:8ab06a50fb9bf9666dd0fe5dfb4676fa2b0ac0f31ecff72a6c3af8e22c063453", size = 692314, upload-time = "2025-11-17T22:32:31.031Z" }
 wheels = [
@@ -2965,7 +2965,7 @@ wheels = [
 
 [[package]]
 name = "numpy"
-version = "2.3.5"
+version = "2.4.0"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
     "python_full_version >= '3.14' and sys_platform == 'linux'",
@@ -2977,81 +2977,79 @@ resolution-markers = [
     "python_full_version == '3.11.*' and sys_platform == 'linux'",
     "python_full_version == '3.11.*' and sys_platform != 'linux'",
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/76/65/21b3bc86aac7b8f2862db1e808f1ea22b028e30a225a34a5ede9bf8678f2/numpy-2.3.5.tar.gz", hash = "sha256:784db1dcdab56bf0517743e746dfb0f885fc68d948aba86eeec2cba234bdf1c0", size = 20584950, upload-time = "2025-11-16T22:52:42.067Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/43/77/84dd1d2e34d7e2792a236ba180b5e8fcc1e3e414e761ce0253f63d7f572e/numpy-2.3.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:de5672f4a7b200c15a4127042170a694d4df43c992948f5e1af57f0174beed10", size = 17034641, upload-time = "2025-11-16T22:49:19.336Z" },
-    { url = "https://files.pythonhosted.org/packages/2a/ea/25e26fa5837106cde46ae7d0b667e20f69cbbc0efd64cba8221411ab26ae/numpy-2.3.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:acfd89508504a19ed06ef963ad544ec6664518c863436306153e13e94605c218", size = 12528324, upload-time = "2025-11-16T22:49:22.582Z" },
-    { url = "https://files.pythonhosted.org/packages/4d/1a/e85f0eea4cf03d6a0228f5c0256b53f2df4bc794706e7df019fc622e47f1/numpy-2.3.5-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:ffe22d2b05504f786c867c8395de703937f934272eb67586817b46188b4ded6d", size = 5356872, upload-time = "2025-11-16T22:49:25.408Z" },
-    { url = "https://files.pythonhosted.org/packages/5c/bb/35ef04afd567f4c989c2060cde39211e4ac5357155c1833bcd1166055c61/numpy-2.3.5-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:872a5cf366aec6bb1147336480fef14c9164b154aeb6542327de4970282cd2f5", size = 6893148, upload-time = "2025-11-16T22:49:27.549Z" },
-    { url = "https://files.pythonhosted.org/packages/f2/2b/05bbeb06e2dff5eab512dfc678b1cc5ee94d8ac5956a0885c64b6b26252b/numpy-2.3.5-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3095bdb8dd297e5920b010e96134ed91d852d81d490e787beca7e35ae1d89cf7", size = 14557282, upload-time = "2025-11-16T22:49:30.964Z" },
-    { url = "https://files.pythonhosted.org/packages/65/fb/2b23769462b34398d9326081fad5655198fcf18966fcb1f1e49db44fbf31/numpy-2.3.5-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8cba086a43d54ca804ce711b2a940b16e452807acebe7852ff327f1ecd49b0d4", size = 16897903, upload-time = "2025-11-16T22:49:34.191Z" },
-    { url = "https://files.pythonhosted.org/packages/ac/14/085f4cf05fc3f1e8aa95e85404e984ffca9b2275a5dc2b1aae18a67538b8/numpy-2.3.5-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:6cf9b429b21df6b99f4dee7a1218b8b7ffbbe7df8764dc0bd60ce8a0708fed1e", size = 16341672, upload-time = "2025-11-16T22:49:37.2Z" },
-    { url = "https://files.pythonhosted.org/packages/6f/3b/1f73994904142b2aa290449b3bb99772477b5fd94d787093e4f24f5af763/numpy-2.3.5-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:396084a36abdb603546b119d96528c2f6263921c50df3c8fd7cb28873a237748", size = 18838896, upload-time = "2025-11-16T22:49:39.727Z" },
-    { url = "https://files.pythonhosted.org/packages/cd/b9/cf6649b2124f288309ffc353070792caf42ad69047dcc60da85ee85fea58/numpy-2.3.5-cp311-cp311-win32.whl", hash = "sha256:b0c7088a73aef3d687c4deef8452a3ac7c1be4e29ed8bf3b366c8111128ac60c", size = 6563608, upload-time = "2025-11-16T22:49:42.079Z" },
-    { url = "https://files.pythonhosted.org/packages/aa/44/9fe81ae1dcc29c531843852e2874080dc441338574ccc4306b39e2ff6e59/numpy-2.3.5-cp311-cp311-win_amd64.whl", hash = "sha256:a414504bef8945eae5f2d7cb7be2d4af77c5d1cb5e20b296c2c25b61dff2900c", size = 13078442, upload-time = "2025-11-16T22:49:43.99Z" },
-    { url = "https://files.pythonhosted.org/packages/6d/a7/f99a41553d2da82a20a2f22e93c94f928e4490bb447c9ff3c4ff230581d3/numpy-2.3.5-cp311-cp311-win_arm64.whl", hash = "sha256:0cd00b7b36e35398fa2d16af7b907b65304ef8bb4817a550e06e5012929830fa", size = 10458555, upload-time = "2025-11-16T22:49:47.092Z" },
-    { url = "https://files.pythonhosted.org/packages/44/37/e669fe6cbb2b96c62f6bbedc6a81c0f3b7362f6a59230b23caa673a85721/numpy-2.3.5-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:74ae7b798248fe62021dbf3c914245ad45d1a6b0cb4a29ecb4b31d0bfbc4cc3e", size = 16733873, upload-time = "2025-11-16T22:49:49.84Z" },
-    { url = "https://files.pythonhosted.org/packages/c5/65/df0db6c097892c9380851ab9e44b52d4f7ba576b833996e0080181c0c439/numpy-2.3.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ee3888d9ff7c14604052b2ca5535a30216aa0a58e948cdd3eeb8d3415f638769", size = 12259838, upload-time = "2025-11-16T22:49:52.863Z" },
-    { url = "https://files.pythonhosted.org/packages/5b/e1/1ee06e70eb2136797abe847d386e7c0e830b67ad1d43f364dd04fa50d338/numpy-2.3.5-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:612a95a17655e213502f60cfb9bf9408efdc9eb1d5f50535cc6eb365d11b42b5", size = 5088378, upload-time = "2025-11-16T22:49:55.055Z" },
-    { url = "https://files.pythonhosted.org/packages/6d/9c/1ca85fb86708724275103b81ec4cf1ac1d08f465368acfc8da7ab545bdae/numpy-2.3.5-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:3101e5177d114a593d79dd79658650fe28b5a0d8abeb8ce6f437c0e6df5be1a4", size = 6628559, upload-time = "2025-11-16T22:49:57.371Z" },
-    { url = "https://files.pythonhosted.org/packages/74/78/fcd41e5a0ce4f3f7b003da85825acddae6d7ecb60cf25194741b036ca7d6/numpy-2.3.5-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8b973c57ff8e184109db042c842423ff4f60446239bd585a5131cc47f06f789d", size = 14250702, upload-time = "2025-11-16T22:49:59.632Z" },
-    { url = "https://files.pythonhosted.org/packages/b6/23/2a1b231b8ff672b4c450dac27164a8b2ca7d9b7144f9c02d2396518352eb/numpy-2.3.5-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0d8163f43acde9a73c2a33605353a4f1bc4798745a8b1d73183b28e5b435ae28", size = 16606086, upload-time = "2025-11-16T22:50:02.127Z" },
-    { url = "https://files.pythonhosted.org/packages/a0/c5/5ad26fbfbe2012e190cc7d5003e4d874b88bb18861d0829edc140a713021/numpy-2.3.5-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:51c1e14eb1e154ebd80e860722f9e6ed6ec89714ad2db2d3aa33c31d7c12179b", size = 16025985, upload-time = "2025-11-16T22:50:04.536Z" },
-    { url = "https://files.pythonhosted.org/packages/d2/fa/dd48e225c46c819288148d9d060b047fd2a6fb1eb37eae25112ee4cb4453/numpy-2.3.5-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b46b4ec24f7293f23adcd2d146960559aaf8020213de8ad1909dba6c013bf89c", size = 18542976, upload-time = "2025-11-16T22:50:07.557Z" },
-    { url = "https://files.pythonhosted.org/packages/05/79/ccbd23a75862d95af03d28b5c6901a1b7da4803181513d52f3b86ed9446e/numpy-2.3.5-cp312-cp312-win32.whl", hash = "sha256:3997b5b3c9a771e157f9aae01dd579ee35ad7109be18db0e85dbdbe1de06e952", size = 6285274, upload-time = "2025-11-16T22:50:10.746Z" },
-    { url = "https://files.pythonhosted.org/packages/2d/57/8aeaf160312f7f489dea47ab61e430b5cb051f59a98ae68b7133ce8fa06a/numpy-2.3.5-cp312-cp312-win_amd64.whl", hash = "sha256:86945f2ee6d10cdfd67bcb4069c1662dd711f7e2a4343db5cecec06b87cf31aa", size = 12782922, upload-time = "2025-11-16T22:50:12.811Z" },
-    { url = "https://files.pythonhosted.org/packages/78/a6/aae5cc2ca78c45e64b9ef22f089141d661516856cf7c8a54ba434576900d/numpy-2.3.5-cp312-cp312-win_arm64.whl", hash = "sha256:f28620fe26bee16243be2b7b874da327312240a7cdc38b769a697578d2100013", size = 10194667, upload-time = "2025-11-16T22:50:16.16Z" },
-    { url = "https://files.pythonhosted.org/packages/db/69/9cde09f36da4b5a505341180a3f2e6fadc352fd4d2b7096ce9778db83f1a/numpy-2.3.5-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:d0f23b44f57077c1ede8c5f26b30f706498b4862d3ff0a7298b8411dd2f043ff", size = 16728251, upload-time = "2025-11-16T22:50:19.013Z" },
-    { url = "https://files.pythonhosted.org/packages/79/fb/f505c95ceddd7027347b067689db71ca80bd5ecc926f913f1a23e65cf09b/numpy-2.3.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:aa5bc7c5d59d831d9773d1170acac7893ce3a5e130540605770ade83280e7188", size = 12254652, upload-time = "2025-11-16T22:50:21.487Z" },
-    { url = "https://files.pythonhosted.org/packages/78/da/8c7738060ca9c31b30e9301ee0cf6c5ffdbf889d9593285a1cead337f9a5/numpy-2.3.5-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:ccc933afd4d20aad3c00bcef049cb40049f7f196e0397f1109dba6fed63267b0", size = 5083172, upload-time = "2025-11-16T22:50:24.562Z" },
-    { url = "https://files.pythonhosted.org/packages/a4/b4/ee5bb2537fb9430fd2ef30a616c3672b991a4129bb1c7dcc42aa0abbe5d7/numpy-2.3.5-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:afaffc4393205524af9dfa400fa250143a6c3bc646c08c9f5e25a9f4b4d6a903", size = 6622990, upload-time = "2025-11-16T22:50:26.47Z" },
-    { url = "https://files.pythonhosted.org/packages/95/03/dc0723a013c7d7c19de5ef29e932c3081df1c14ba582b8b86b5de9db7f0f/numpy-2.3.5-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9c75442b2209b8470d6d5d8b1c25714270686f14c749028d2199c54e29f20b4d", size = 14248902, upload-time = "2025-11-16T22:50:28.861Z" },
-    { url = "https://files.pythonhosted.org/packages/f5/10/ca162f45a102738958dcec8023062dad0cbc17d1ab99d68c4e4a6c45fb2b/numpy-2.3.5-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:11e06aa0af8c0f05104d56450d6093ee639e15f24ecf62d417329d06e522e017", size = 16597430, upload-time = "2025-11-16T22:50:31.56Z" },
-    { url = "https://files.pythonhosted.org/packages/2a/51/c1e29be863588db58175175f057286900b4b3327a1351e706d5e0f8dd679/numpy-2.3.5-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ed89927b86296067b4f81f108a2271d8926467a8868e554eaf370fc27fa3ccaf", size = 16024551, upload-time = "2025-11-16T22:50:34.242Z" },
-    { url = "https://files.pythonhosted.org/packages/83/68/8236589d4dbb87253d28259d04d9b814ec0ecce7cb1c7fed29729f4c3a78/numpy-2.3.5-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:51c55fe3451421f3a6ef9a9c1439e82101c57a2c9eab9feb196a62b1a10b58ce", size = 18533275, upload-time = "2025-11-16T22:50:37.651Z" },
-    { url = "https://files.pythonhosted.org/packages/40/56/2932d75b6f13465239e3b7b7e511be27f1b8161ca2510854f0b6e521c395/numpy-2.3.5-cp313-cp313-win32.whl", hash = "sha256:1978155dd49972084bd6ef388d66ab70f0c323ddee6f693d539376498720fb7e", size = 6277637, upload-time = "2025-11-16T22:50:40.11Z" },
-    { url = "https://files.pythonhosted.org/packages/0c/88/e2eaa6cffb115b85ed7c7c87775cb8bcf0816816bc98ca8dbfa2ee33fe6e/numpy-2.3.5-cp313-cp313-win_amd64.whl", hash = "sha256:00dc4e846108a382c5869e77c6ed514394bdeb3403461d25a829711041217d5b", size = 12779090, upload-time = "2025-11-16T22:50:42.503Z" },
-    { url = "https://files.pythonhosted.org/packages/8f/88/3f41e13a44ebd4034ee17baa384acac29ba6a4fcc2aca95f6f08ca0447d1/numpy-2.3.5-cp313-cp313-win_arm64.whl", hash = "sha256:0472f11f6ec23a74a906a00b48a4dcf3849209696dff7c189714511268d103ae", size = 10194710, upload-time = "2025-11-16T22:50:44.971Z" },
-    { url = "https://files.pythonhosted.org/packages/13/cb/71744144e13389d577f867f745b7df2d8489463654a918eea2eeb166dfc9/numpy-2.3.5-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:414802f3b97f3c1eef41e530aaba3b3c1620649871d8cb38c6eaff034c2e16bd", size = 16827292, upload-time = "2025-11-16T22:50:47.715Z" },
-    { url = "https://files.pythonhosted.org/packages/71/80/ba9dc6f2a4398e7f42b708a7fdc841bb638d353be255655498edbf9a15a8/numpy-2.3.5-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:5ee6609ac3604fa7780e30a03e5e241a7956f8e2fcfe547d51e3afa5247ac47f", size = 12378897, upload-time = "2025-11-16T22:50:51.327Z" },
-    { url = "https://files.pythonhosted.org/packages/2e/6d/db2151b9f64264bcceccd51741aa39b50150de9b602d98ecfe7e0c4bff39/numpy-2.3.5-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:86d835afea1eaa143012a2d7a3f45a3adce2d7adc8b4961f0b362214d800846a", size = 5207391, upload-time = "2025-11-16T22:50:54.542Z" },
-    { url = "https://files.pythonhosted.org/packages/80/ae/429bacace5ccad48a14c4ae5332f6aa8ab9f69524193511d60ccdfdc65fa/numpy-2.3.5-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:30bc11310e8153ca664b14c5f1b73e94bd0503681fcf136a163de856f3a50139", size = 6721275, upload-time = "2025-11-16T22:50:56.794Z" },
-    { url = "https://files.pythonhosted.org/packages/74/5b/1919abf32d8722646a38cd527bc3771eb229a32724ee6ba340ead9b92249/numpy-2.3.5-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1062fde1dcf469571705945b0f221b73928f34a20c904ffb45db101907c3454e", size = 14306855, upload-time = "2025-11-16T22:50:59.208Z" },
-    { url = "https://files.pythonhosted.org/packages/a5/87/6831980559434973bebc30cd9c1f21e541a0f2b0c280d43d3afd909b66d0/numpy-2.3.5-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ce581db493ea1a96c0556360ede6607496e8bf9b3a8efa66e06477267bc831e9", size = 16657359, upload-time = "2025-11-16T22:51:01.991Z" },
-    { url = "https://files.pythonhosted.org/packages/dd/91/c797f544491ee99fd00495f12ebb7802c440c1915811d72ac5b4479a3356/numpy-2.3.5-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:cc8920d2ec5fa99875b670bb86ddeb21e295cb07aa331810d9e486e0b969d946", size = 16093374, upload-time = "2025-11-16T22:51:05.291Z" },
-    { url = "https://files.pythonhosted.org/packages/74/a6/54da03253afcbe7a72785ec4da9c69fb7a17710141ff9ac5fcb2e32dbe64/numpy-2.3.5-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:9ee2197ef8c4f0dfe405d835f3b6a14f5fee7782b5de51ba06fb65fc9b36e9f1", size = 18594587, upload-time = "2025-11-16T22:51:08.585Z" },
-    { url = "https://files.pythonhosted.org/packages/80/e9/aff53abbdd41b0ecca94285f325aff42357c6b5abc482a3fcb4994290b18/numpy-2.3.5-cp313-cp313t-win32.whl", hash = "sha256:70b37199913c1bd300ff6e2693316c6f869c7ee16378faf10e4f5e3275b299c3", size = 6405940, upload-time = "2025-11-16T22:51:11.541Z" },
-    { url = "https://files.pythonhosted.org/packages/d5/81/50613fec9d4de5480de18d4f8ef59ad7e344d497edbef3cfd80f24f98461/numpy-2.3.5-cp313-cp313t-win_amd64.whl", hash = "sha256:b501b5fa195cc9e24fe102f21ec0a44dffc231d2af79950b451e0d99cea02234", size = 12920341, upload-time = "2025-11-16T22:51:14.312Z" },
-    { url = "https://files.pythonhosted.org/packages/bb/ab/08fd63b9a74303947f34f0bd7c5903b9c5532c2d287bead5bdf4c556c486/numpy-2.3.5-cp313-cp313t-win_arm64.whl", hash = "sha256:a80afd79f45f3c4a7d341f13acbe058d1ca8ac017c165d3fa0d3de6bc1a079d7", size = 10262507, upload-time = "2025-11-16T22:51:16.846Z" },
-    { url = "https://files.pythonhosted.org/packages/ba/97/1a914559c19e32d6b2e233cf9a6a114e67c856d35b1d6babca571a3e880f/numpy-2.3.5-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:bf06bc2af43fa8d32d30fae16ad965663e966b1a3202ed407b84c989c3221e82", size = 16735706, upload-time = "2025-11-16T22:51:19.558Z" },
-    { url = "https://files.pythonhosted.org/packages/57/d4/51233b1c1b13ecd796311216ae417796b88b0616cfd8a33ae4536330748a/numpy-2.3.5-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:052e8c42e0c49d2575621c158934920524f6c5da05a1d3b9bab5d8e259e045f0", size = 12264507, upload-time = "2025-11-16T22:51:22.492Z" },
-    { url = "https://files.pythonhosted.org/packages/45/98/2fe46c5c2675b8306d0b4a3ec3494273e93e1226a490f766e84298576956/numpy-2.3.5-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:1ed1ec893cff7040a02c8aa1c8611b94d395590d553f6b53629a4461dc7f7b63", size = 5093049, upload-time = "2025-11-16T22:51:25.171Z" },
-    { url = "https://files.pythonhosted.org/packages/ce/0e/0698378989bb0ac5f1660c81c78ab1fe5476c1a521ca9ee9d0710ce54099/numpy-2.3.5-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:2dcd0808a421a482a080f89859a18beb0b3d1e905b81e617a188bd80422d62e9", size = 6626603, upload-time = "2025-11-16T22:51:27Z" },
-    { url = "https://files.pythonhosted.org/packages/5e/a6/9ca0eecc489640615642a6cbc0ca9e10df70df38c4d43f5a928ff18d8827/numpy-2.3.5-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:727fd05b57df37dc0bcf1a27767a3d9a78cbbc92822445f32cc3436ba797337b", size = 14262696, upload-time = "2025-11-16T22:51:29.402Z" },
-    { url = "https://files.pythonhosted.org/packages/c8/f6/07ec185b90ec9d7217a00eeeed7383b73d7e709dae2a9a021b051542a708/numpy-2.3.5-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fffe29a1ef00883599d1dc2c51aa2e5d80afe49523c261a74933df395c15c520", size = 16597350, upload-time = "2025-11-16T22:51:32.167Z" },
-    { url = "https://files.pythonhosted.org/packages/75/37/164071d1dde6a1a84c9b8e5b414fa127981bad47adf3a6b7e23917e52190/numpy-2.3.5-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:8f7f0e05112916223d3f438f293abf0727e1181b5983f413dfa2fefc4098245c", size = 16040190, upload-time = "2025-11-16T22:51:35.403Z" },
-    { url = "https://files.pythonhosted.org/packages/08/3c/f18b82a406b04859eb026d204e4e1773eb41c5be58410f41ffa511d114ae/numpy-2.3.5-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2e2eb32ddb9ccb817d620ac1d8dae7c3f641c1e5f55f531a33e8ab97960a75b8", size = 18536749, upload-time = "2025-11-16T22:51:39.698Z" },
-    { url = "https://files.pythonhosted.org/packages/40/79/f82f572bf44cf0023a2fe8588768e23e1592585020d638999f15158609e1/numpy-2.3.5-cp314-cp314-win32.whl", hash = "sha256:66f85ce62c70b843bab1fb14a05d5737741e74e28c7b8b5a064de10142fad248", size = 6335432, upload-time = "2025-11-16T22:51:42.476Z" },
-    { url = "https://files.pythonhosted.org/packages/a3/2e/235b4d96619931192c91660805e5e49242389742a7a82c27665021db690c/numpy-2.3.5-cp314-cp314-win_amd64.whl", hash = "sha256:e6a0bc88393d65807d751a614207b7129a310ca4fe76a74e5c7da5fa5671417e", size = 12919388, upload-time = "2025-11-16T22:51:45.275Z" },
-    { url = "https://files.pythonhosted.org/packages/07/2b/29fd75ce45d22a39c61aad74f3d718e7ab67ccf839ca8b60866054eb15f8/numpy-2.3.5-cp314-cp314-win_arm64.whl", hash = "sha256:aeffcab3d4b43712bb7a60b65f6044d444e75e563ff6180af8f98dd4b905dfd2", size = 10476651, upload-time = "2025-11-16T22:51:47.749Z" },
-    { url = "https://files.pythonhosted.org/packages/17/e1/f6a721234ebd4d87084cfa68d081bcba2f5cfe1974f7de4e0e8b9b2a2ba1/numpy-2.3.5-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:17531366a2e3a9e30762c000f2c43a9aaa05728712e25c11ce1dbe700c53ad41", size = 16834503, upload-time = "2025-11-16T22:51:50.443Z" },
-    { url = "https://files.pythonhosted.org/packages/5c/1c/baf7ffdc3af9c356e1c135e57ab7cf8d247931b9554f55c467efe2c69eff/numpy-2.3.5-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:d21644de1b609825ede2f48be98dfde4656aefc713654eeee280e37cadc4e0ad", size = 12381612, upload-time = "2025-11-16T22:51:53.609Z" },
-    { url = "https://files.pythonhosted.org/packages/74/91/f7f0295151407ddc9ba34e699013c32c3c91944f9b35fcf9281163dc1468/numpy-2.3.5-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:c804e3a5aba5460c73955c955bdbd5c08c354954e9270a2c1565f62e866bdc39", size = 5210042, upload-time = "2025-11-16T22:51:56.213Z" },
-    { url = "https://files.pythonhosted.org/packages/2e/3b/78aebf345104ec50dd50a4d06ddeb46a9ff5261c33bcc58b1c4f12f85ec2/numpy-2.3.5-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:cc0a57f895b96ec78969c34f682c602bf8da1a0270b09bc65673df2e7638ec20", size = 6724502, upload-time = "2025-11-16T22:51:58.584Z" },
-    { url = "https://files.pythonhosted.org/packages/02/c6/7c34b528740512e57ef1b7c8337ab0b4f0bddf34c723b8996c675bc2bc91/numpy-2.3.5-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:900218e456384ea676e24ea6a0417f030a3b07306d29d7ad843957b40a9d8d52", size = 14308962, upload-time = "2025-11-16T22:52:01.698Z" },
-    { url = "https://files.pythonhosted.org/packages/80/35/09d433c5262bc32d725bafc619e095b6a6651caf94027a03da624146f655/numpy-2.3.5-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:09a1bea522b25109bf8e6f3027bd810f7c1085c64a0c7ce050c1676ad0ba010b", size = 16655054, upload-time = "2025-11-16T22:52:04.267Z" },
-    { url = "https://files.pythonhosted.org/packages/7a/ab/6a7b259703c09a88804fa2430b43d6457b692378f6b74b356155283566ac/numpy-2.3.5-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:04822c00b5fd0323c8166d66c701dc31b7fbd252c100acd708c48f763968d6a3", size = 16091613, upload-time = "2025-11-16T22:52:08.651Z" },
-    { url = "https://files.pythonhosted.org/packages/c2/88/330da2071e8771e60d1038166ff9d73f29da37b01ec3eb43cb1427464e10/numpy-2.3.5-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:d6889ec4ec662a1a37eb4b4fb26b6100841804dac55bd9df579e326cdc146227", size = 18591147, upload-time = "2025-11-16T22:52:11.453Z" },
-    { url = "https://files.pythonhosted.org/packages/51/41/851c4b4082402d9ea860c3626db5d5df47164a712cb23b54be028b184c1c/numpy-2.3.5-cp314-cp314t-win32.whl", hash = "sha256:93eebbcf1aafdf7e2ddd44c2923e2672e1010bddc014138b229e49725b4d6be5", size = 6479806, upload-time = "2025-11-16T22:52:14.641Z" },
-    { url = "https://files.pythonhosted.org/packages/90/30/d48bde1dfd93332fa557cff1972fbc039e055a52021fbef4c2c4b1eefd17/numpy-2.3.5-cp314-cp314t-win_amd64.whl", hash = "sha256:c8a9958e88b65c3b27e22ca2a076311636850b612d6bbfb76e8d156aacde2aaf", size = 13105760, upload-time = "2025-11-16T22:52:17.975Z" },
-    { url = "https://files.pythonhosted.org/packages/2d/fd/4b5eb0b3e888d86aee4d198c23acec7d214baaf17ea93c1adec94c9518b9/numpy-2.3.5-cp314-cp314t-win_arm64.whl", hash = "sha256:6203fdf9f3dc5bdaed7319ad8698e685c7a3be10819f41d32a0723e611733b42", size = 10545459, upload-time = "2025-11-16T22:52:20.55Z" },
-    { url = "https://files.pythonhosted.org/packages/c6/65/f9dea8e109371ade9c782b4e4756a82edf9d3366bca495d84d79859a0b79/numpy-2.3.5-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:f0963b55cdd70fad460fa4c1341f12f976bb26cb66021a5580329bd498988310", size = 16910689, upload-time = "2025-11-16T22:52:23.247Z" },
-    { url = "https://files.pythonhosted.org/packages/00/4f/edb00032a8fb92ec0a679d3830368355da91a69cab6f3e9c21b64d0bb986/numpy-2.3.5-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:f4255143f5160d0de972d28c8f9665d882b5f61309d8362fdd3e103cf7bf010c", size = 12457053, upload-time = "2025-11-16T22:52:26.367Z" },
-    { url = "https://files.pythonhosted.org/packages/16/a4/e8a53b5abd500a63836a29ebe145fc1ab1f2eefe1cfe59276020373ae0aa/numpy-2.3.5-pp311-pypy311_pp73-macosx_14_0_arm64.whl", hash = "sha256:a4b9159734b326535f4dd01d947f919c6eefd2d9827466a696c44ced82dfbc18", size = 5285635, upload-time = "2025-11-16T22:52:29.266Z" },
-    { url = "https://files.pythonhosted.org/packages/a3/2f/37eeb9014d9c8b3e9c55bc599c68263ca44fdbc12a93e45a21d1d56df737/numpy-2.3.5-pp311-pypy311_pp73-macosx_14_0_x86_64.whl", hash = "sha256:2feae0d2c91d46e59fcd62784a3a83b3fb677fead592ce51b5a6fbb4f95965ff", size = 6801770, upload-time = "2025-11-16T22:52:31.421Z" },
-    { url = "https://files.pythonhosted.org/packages/7d/e4/68d2f474df2cb671b2b6c2986a02e520671295647dad82484cde80ca427b/numpy-2.3.5-pp311-pypy311_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ffac52f28a7849ad7576293c0cb7b9f08304e8f7d738a8cb8a90ec4c55a998eb", size = 14391768, upload-time = "2025-11-16T22:52:33.593Z" },
-    { url = "https://files.pythonhosted.org/packages/b8/50/94ccd8a2b141cb50651fddd4f6a48874acb3c91c8f0842b08a6afc4b0b21/numpy-2.3.5-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:63c0e9e7eea69588479ebf4a8a270d5ac22763cc5854e9a7eae952a3908103f7", size = 16729263, upload-time = "2025-11-16T22:52:36.369Z" },
-    { url = "https://files.pythonhosted.org/packages/2d/ee/346fa473e666fe14c52fcdd19ec2424157290a032d4c41f98127bfb31ac7/numpy-2.3.5-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:f16417ec91f12f814b10bafe79ef77e70113a2f5f7018640e7425ff979253425", size = 12967213, upload-time = "2025-11-16T22:52:39.38Z" },
+sdist = { url = "https://files.pythonhosted.org/packages/a4/7a/6a3d14e205d292b738db449d0de649b373a59edb0d0b4493821d0a3e8718/numpy-2.4.0.tar.gz", hash = "sha256:6e504f7b16118198f138ef31ba24d985b124c2c469fe8467007cf30fd992f934", size = 20685720, upload-time = "2025-12-20T16:18:19.023Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/26/7e/7bae7cbcc2f8132271967aa03e03954fc1e48aa1f3bf32b29ca95fbef352/numpy-2.4.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:316b2f2584682318539f0bcaca5a496ce9ca78c88066579ebd11fd06f8e4741e", size = 16940166, upload-time = "2025-12-20T16:15:43.434Z" },
+    { url = "https://files.pythonhosted.org/packages/0f/27/6c13f5b46776d6246ec884ac5817452672156a506d08a1f2abb39961930a/numpy-2.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a2718c1de8504121714234b6f8241d0019450353276c88b9453c9c3d92e101db", size = 12641781, upload-time = "2025-12-20T16:15:45.701Z" },
+    { url = "https://files.pythonhosted.org/packages/14/1c/83b4998d4860d15283241d9e5215f28b40ac31f497c04b12fa7f428ff370/numpy-2.4.0-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:21555da4ec4a0c942520ead42c3b0dc9477441e085c42b0fbdd6a084869a6f6b", size = 5470247, upload-time = "2025-12-20T16:15:47.943Z" },
+    { url = "https://files.pythonhosted.org/packages/54/08/cbce72c835d937795571b0464b52069f869c9e78b0c076d416c5269d2718/numpy-2.4.0-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:413aa561266a4be2d06cd2b9665e89d9f54c543f418773076a76adcf2af08bc7", size = 6799807, upload-time = "2025-12-20T16:15:49.795Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/be/2e647961cd8c980591d75cdcd9e8f647d69fbe05e2a25613dc0a2ea5fb1a/numpy-2.4.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0feafc9e03128074689183031181fac0897ff169692d8492066e949041096548", size = 14701992, upload-time = "2025-12-20T16:15:51.615Z" },
+    { url = "https://files.pythonhosted.org/packages/a2/fb/e1652fb8b6fd91ce6ed429143fe2e01ce714711e03e5b762615e7b36172c/numpy-2.4.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a8fdfed3deaf1928fb7667d96e0567cdf58c2b370ea2ee7e586aa383ec2cb346", size = 16646871, upload-time = "2025-12-20T16:15:54.129Z" },
+    { url = "https://files.pythonhosted.org/packages/62/23/d841207e63c4322842f7cd042ae981cffe715c73376dcad8235fb31debf1/numpy-2.4.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:e06a922a469cae9a57100864caf4f8a97a1026513793969f8ba5b63137a35d25", size = 16487190, upload-time = "2025-12-20T16:15:56.147Z" },
+    { url = "https://files.pythonhosted.org/packages/bc/a0/6a842c8421ebfdec0a230e65f61e0dabda6edbef443d999d79b87c273965/numpy-2.4.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:927ccf5cd17c48f801f4ed43a7e5673a2724bd2171460be3e3894e6e332ef83a", size = 18580762, upload-time = "2025-12-20T16:15:58.524Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/d1/c79e0046641186f2134dde05e6181825b911f8bdcef31b19ddd16e232847/numpy-2.4.0-cp311-cp311-win32.whl", hash = "sha256:882567b7ae57c1b1a0250208cc21a7976d8cbcc49d5a322e607e6f09c9e0bd53", size = 6233359, upload-time = "2025-12-20T16:16:00.938Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/f0/74965001d231f28184d6305b8cdc1b6fcd4bf23033f6cb039cfe76c9fca7/numpy-2.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:8b986403023c8f3bf8f487c2e6186afda156174d31c175f747d8934dfddf3479", size = 12601132, upload-time = "2025-12-20T16:16:02.484Z" },
+    { url = "https://files.pythonhosted.org/packages/65/32/55408d0f46dfebce38017f5bd931affa7256ad6beac1a92a012e1fbc67a7/numpy-2.4.0-cp311-cp311-win_arm64.whl", hash = "sha256:3f3096405acc48887458bbf9f6814d43785ac7ba2a57ea6442b581dedbc60ce6", size = 10573977, upload-time = "2025-12-20T16:16:04.77Z" },
+    { url = "https://files.pythonhosted.org/packages/8b/ff/f6400ffec95de41c74b8e73df32e3fff1830633193a7b1e409be7fb1bb8c/numpy-2.4.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:2a8b6bb8369abefb8bd1801b054ad50e02b3275c8614dc6e5b0373c305291037", size = 16653117, upload-time = "2025-12-20T16:16:06.709Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/28/6c23e97450035072e8d830a3c411bf1abd1f42c611ff9d29e3d8f55c6252/numpy-2.4.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2e284ca13d5a8367e43734148622caf0b261b275673823593e3e3634a6490f83", size = 12369711, upload-time = "2025-12-20T16:16:08.758Z" },
+    { url = "https://files.pythonhosted.org/packages/bc/af/acbef97b630ab1bb45e6a7d01d1452e4251aa88ce680ac36e56c272120ec/numpy-2.4.0-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:49ff32b09f5aa0cd30a20c2b39db3e669c845589f2b7fc910365210887e39344", size = 5198355, upload-time = "2025-12-20T16:16:10.902Z" },
+    { url = "https://files.pythonhosted.org/packages/c1/c8/4e0d436b66b826f2e53330adaa6311f5cac9871a5b5c31ad773b27f25a74/numpy-2.4.0-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:36cbfb13c152b1c7c184ddac43765db8ad672567e7bafff2cc755a09917ed2e6", size = 6545298, upload-time = "2025-12-20T16:16:12.607Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/27/e1f5d144ab54eac34875e79037011d511ac57b21b220063310cb96c80fbc/numpy-2.4.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:35ddc8f4914466e6fc954c76527aa91aa763682a4f6d73249ef20b418fe6effb", size = 14398387, upload-time = "2025-12-20T16:16:14.257Z" },
+    { url = "https://files.pythonhosted.org/packages/67/64/4cb909dd5ab09a9a5d086eff9586e69e827b88a5585517386879474f4cf7/numpy-2.4.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dc578891de1db95b2a35001b695451767b580bb45753717498213c5ff3c41d63", size = 16363091, upload-time = "2025-12-20T16:16:17.32Z" },
+    { url = "https://files.pythonhosted.org/packages/9d/9c/8efe24577523ec6809261859737cf117b0eb6fdb655abdfdc81b2e468ce4/numpy-2.4.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:98e81648e0b36e325ab67e46b5400a7a6d4a22b8a7c8e8bbfe20e7db7906bf95", size = 16176394, upload-time = "2025-12-20T16:16:19.524Z" },
+    { url = "https://files.pythonhosted.org/packages/61/f0/1687441ece7b47a62e45a1f82015352c240765c707928edd8aef875d5951/numpy-2.4.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:d57b5046c120561ba8fa8e4030fbb8b822f3063910fa901ffadf16e2b7128ad6", size = 18287378, upload-time = "2025-12-20T16:16:22.866Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/6f/f868765d44e6fc466467ed810ba9d8d6db1add7d4a748abfa2a4c99a3194/numpy-2.4.0-cp312-cp312-win32.whl", hash = "sha256:92190db305a6f48734d3982f2c60fa30d6b5ee9bff10f2887b930d7b40119f4c", size = 5955432, upload-time = "2025-12-20T16:16:25.06Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/b5/94c1e79fcbab38d1ca15e13777477b2914dd2d559b410f96949d6637b085/numpy-2.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:680060061adb2d74ce352628cb798cfdec399068aa7f07ba9fb818b2b3305f98", size = 12306201, upload-time = "2025-12-20T16:16:26.979Z" },
+    { url = "https://files.pythonhosted.org/packages/70/09/c39dadf0b13bb0768cd29d6a3aaff1fb7c6905ac40e9aaeca26b1c086e06/numpy-2.4.0-cp312-cp312-win_arm64.whl", hash = "sha256:39699233bc72dd482da1415dcb06076e32f60eddc796a796c5fb6c5efce94667", size = 10308234, upload-time = "2025-12-20T16:16:29.417Z" },
+    { url = "https://files.pythonhosted.org/packages/a7/0d/853fd96372eda07c824d24adf02e8bc92bb3731b43a9b2a39161c3667cc4/numpy-2.4.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:a152d86a3ae00ba5f47b3acf3b827509fd0b6cb7d3259665e63dafbad22a75ea", size = 16649088, upload-time = "2025-12-20T16:16:31.421Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/37/cc636f1f2a9f585434e20a3e6e63422f70bfe4f7f6698e941db52ea1ac9a/numpy-2.4.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:39b19251dec4de8ff8496cd0806cbe27bf0684f765abb1f4809554de93785f2d", size = 12364065, upload-time = "2025-12-20T16:16:33.491Z" },
+    { url = "https://files.pythonhosted.org/packages/ed/69/0b78f37ca3690969beee54103ce5f6021709134e8020767e93ba691a72f1/numpy-2.4.0-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:009bd0ea12d3c784b6639a8457537016ce5172109e585338e11334f6a7bb88ee", size = 5192640, upload-time = "2025-12-20T16:16:35.636Z" },
+    { url = "https://files.pythonhosted.org/packages/1d/2a/08569f8252abf590294dbb09a430543ec8f8cc710383abfb3e75cc73aeda/numpy-2.4.0-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:5fe44e277225fd3dff6882d86d3d447205d43532c3627313d17e754fb3905a0e", size = 6541556, upload-time = "2025-12-20T16:16:37.276Z" },
+    { url = "https://files.pythonhosted.org/packages/93/e9/a949885a4e177493d61519377952186b6cbfdf1d6002764c664ba28349b5/numpy-2.4.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f935c4493eda9069851058fa0d9e39dbf6286be690066509305e52912714dbb2", size = 14396562, upload-time = "2025-12-20T16:16:38.953Z" },
+    { url = "https://files.pythonhosted.org/packages/99/98/9d4ad53b0e9ef901c2ef1d550d2136f5ac42d3fd2988390a6def32e23e48/numpy-2.4.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8cfa5f29a695cb7438965e6c3e8d06e0416060cf0d709c1b1c1653a939bf5c2a", size = 16351719, upload-time = "2025-12-20T16:16:41.503Z" },
+    { url = "https://files.pythonhosted.org/packages/28/de/5f3711a38341d6e8dd619f6353251a0cdd07f3d6d101a8fd46f4ef87f895/numpy-2.4.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ba0cb30acd3ef11c94dc27fbfba68940652492bc107075e7ffe23057f9425681", size = 16176053, upload-time = "2025-12-20T16:16:44.552Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/5b/2a3753dc43916501b4183532e7ace862e13211042bceafa253afb5c71272/numpy-2.4.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:60e8c196cd82cbbd4f130b5290007e13e6de3eca79f0d4d38014769d96a7c475", size = 18277859, upload-time = "2025-12-20T16:16:47.174Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/c5/a18bcdd07a941db3076ef489d036ab16d2bfc2eae0cf27e5a26e29189434/numpy-2.4.0-cp313-cp313-win32.whl", hash = "sha256:5f48cb3e88fbc294dc90e215d86fbaf1c852c63dbdb6c3a3e63f45c4b57f7344", size = 5953849, upload-time = "2025-12-20T16:16:49.554Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/f1/719010ff8061da6e8a26e1980cf090412d4f5f8060b31f0c45d77dd67a01/numpy-2.4.0-cp313-cp313-win_amd64.whl", hash = "sha256:a899699294f28f7be8992853c0c60741f16ff199205e2e6cdca155762cbaa59d", size = 12302840, upload-time = "2025-12-20T16:16:51.227Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/5a/b3d259083ed8b4d335270c76966cb6cf14a5d1b69e1a608994ac57a659e6/numpy-2.4.0-cp313-cp313-win_arm64.whl", hash = "sha256:9198f447e1dc5647d07c9a6bbe2063cc0132728cc7175b39dbc796da5b54920d", size = 10308509, upload-time = "2025-12-20T16:16:53.313Z" },
+    { url = "https://files.pythonhosted.org/packages/31/01/95edcffd1bb6c0633df4e808130545c4f07383ab629ac7e316fb44fff677/numpy-2.4.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:74623f2ab5cc3f7c886add4f735d1031a1d2be4a4ae63c0546cfd74e7a31ddf6", size = 12491815, upload-time = "2025-12-20T16:16:55.496Z" },
+    { url = "https://files.pythonhosted.org/packages/59/ea/5644b8baa92cc1c7163b4b4458c8679852733fa74ca49c942cfa82ded4e0/numpy-2.4.0-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:0804a8e4ab070d1d35496e65ffd3cf8114c136a2b81f61dfab0de4b218aacfd5", size = 5320321, upload-time = "2025-12-20T16:16:57.468Z" },
+    { url = "https://files.pythonhosted.org/packages/26/4e/e10938106d70bc21319bd6a86ae726da37edc802ce35a3a71ecdf1fdfe7f/numpy-2.4.0-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:02a2038eb27f9443a8b266a66911e926566b5a6ffd1a689b588f7f35b81e7dc3", size = 6641635, upload-time = "2025-12-20T16:16:59.379Z" },
+    { url = "https://files.pythonhosted.org/packages/b3/8d/a8828e3eaf5c0b4ab116924df82f24ce3416fa38d0674d8f708ddc6c8aac/numpy-2.4.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1889b3a3f47a7b5bee16bc25a2145bd7cb91897f815ce3499db64c7458b6d91d", size = 14456053, upload-time = "2025-12-20T16:17:01.768Z" },
+    { url = "https://files.pythonhosted.org/packages/68/a1/17d97609d87d4520aa5ae2dcfb32305654550ac6a35effb946d303e594ce/numpy-2.4.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:85eef4cb5625c47ee6425c58a3502555e10f45ee973da878ac8248ad58c136f3", size = 16401702, upload-time = "2025-12-20T16:17:04.235Z" },
+    { url = "https://files.pythonhosted.org/packages/18/32/0f13c1b2d22bea1118356b8b963195446f3af124ed7a5adfa8fdecb1b6ca/numpy-2.4.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:6dc8b7e2f4eb184b37655195f421836cfae6f58197b67e3ffc501f1333d993fa", size = 16242493, upload-time = "2025-12-20T16:17:06.856Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/23/48f21e3d309fbc137c068a1475358cbd3a901b3987dcfc97a029ab3068e2/numpy-2.4.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:44aba2f0cafd287871a495fb3163408b0bd25bbce135c6f621534a07f4f7875c", size = 18324222, upload-time = "2025-12-20T16:17:09.392Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/52/41f3d71296a3dcaa4f456aaa3c6fc8e745b43d0552b6bde56571bb4b4a0f/numpy-2.4.0-cp313-cp313t-win32.whl", hash = "sha256:20c115517513831860c573996e395707aa9fb691eb179200125c250e895fcd93", size = 6076216, upload-time = "2025-12-20T16:17:11.437Z" },
+    { url = "https://files.pythonhosted.org/packages/35/ff/46fbfe60ab0710d2a2b16995f708750307d30eccbb4c38371ea9e986866e/numpy-2.4.0-cp313-cp313t-win_amd64.whl", hash = "sha256:b48e35f4ab6f6a7597c46e301126ceba4c44cd3280e3750f85db48b082624fa4", size = 12444263, upload-time = "2025-12-20T16:17:13.182Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/e3/9189ab319c01d2ed556c932ccf55064c5d75bb5850d1df7a482ce0badead/numpy-2.4.0-cp313-cp313t-win_arm64.whl", hash = "sha256:4d1cfce39e511069b11e67cd0bd78ceff31443b7c9e5c04db73c7a19f572967c", size = 10378265, upload-time = "2025-12-20T16:17:15.211Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/ed/52eac27de39d5e5a6c9aadabe672bc06f55e24a3d9010cd1183948055d76/numpy-2.4.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:c95eb6db2884917d86cde0b4d4cf31adf485c8ec36bf8696dd66fa70de96f36b", size = 16647476, upload-time = "2025-12-20T16:17:17.671Z" },
+    { url = "https://files.pythonhosted.org/packages/77/c0/990ce1b7fcd4e09aeaa574e2a0a839589e4b08b2ca68070f1acb1fea6736/numpy-2.4.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:65167da969cd1ec3a1df31cb221ca3a19a8aaa25370ecb17d428415e93c1935e", size = 12374563, upload-time = "2025-12-20T16:17:20.216Z" },
+    { url = "https://files.pythonhosted.org/packages/37/7c/8c5e389c6ae8f5fd2277a988600d79e9625db3fff011a2d87ac80b881a4c/numpy-2.4.0-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:3de19cfecd1465d0dcf8a5b5ea8b3155b42ed0b639dba4b71e323d74f2a3be5e", size = 5203107, upload-time = "2025-12-20T16:17:22.47Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/94/ca5b3bd6a8a70a5eec9a0b8dd7f980c1eff4b8a54970a9a7fef248ef564f/numpy-2.4.0-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:6c05483c3136ac4c91b4e81903cb53a8707d316f488124d0398499a4f8e8ef51", size = 6538067, upload-time = "2025-12-20T16:17:24.001Z" },
+    { url = "https://files.pythonhosted.org/packages/79/43/993eb7bb5be6761dde2b3a3a594d689cec83398e3f58f4758010f3b85727/numpy-2.4.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:36667db4d6c1cea79c8930ab72fadfb4060feb4bfe724141cd4bd064d2e5f8ce", size = 14411926, upload-time = "2025-12-20T16:17:25.822Z" },
+    { url = "https://files.pythonhosted.org/packages/03/75/d4c43b61de473912496317a854dac54f1efec3eeb158438da6884b70bb90/numpy-2.4.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9a818668b674047fd88c4cddada7ab8f1c298812783e8328e956b78dc4807f9f", size = 16354295, upload-time = "2025-12-20T16:17:28.308Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/0a/b54615b47ee8736a6461a4bb6749128dd3435c5a759d5663f11f0e9af4ac/numpy-2.4.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:1ee32359fb7543b7b7bd0b2f46294db27e29e7bbdf70541e81b190836cd83ded", size = 16190242, upload-time = "2025-12-20T16:17:30.993Z" },
+    { url = "https://files.pythonhosted.org/packages/98/ce/ea207769aacad6246525ec6c6bbd66a2bf56c72443dc10e2f90feed29290/numpy-2.4.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:e493962256a38f58283de033d8af176c5c91c084ea30f15834f7545451c42059", size = 18280875, upload-time = "2025-12-20T16:17:33.327Z" },
+    { url = "https://files.pythonhosted.org/packages/17/ef/ec409437aa962ea372ed601c519a2b141701683ff028f894b7466f0ab42b/numpy-2.4.0-cp314-cp314-win32.whl", hash = "sha256:6bbaebf0d11567fa8926215ae731e1d58e6ec28a8a25235b8a47405d301332db", size = 6002530, upload-time = "2025-12-20T16:17:35.729Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/4a/5cb94c787a3ed1ac65e1271b968686521169a7b3ec0b6544bb3ca32960b0/numpy-2.4.0-cp314-cp314-win_amd64.whl", hash = "sha256:3d857f55e7fdf7c38ab96c4558c95b97d1c685be6b05c249f5fdafcbd6f9899e", size = 12435890, upload-time = "2025-12-20T16:17:37.599Z" },
+    { url = "https://files.pythonhosted.org/packages/48/a0/04b89db963af9de1104975e2544f30de89adbf75b9e75f7dd2599be12c79/numpy-2.4.0-cp314-cp314-win_arm64.whl", hash = "sha256:bb50ce5fb202a26fd5404620e7ef820ad1ab3558b444cb0b55beb7ef66cd2d63", size = 10591892, upload-time = "2025-12-20T16:17:39.649Z" },
+    { url = "https://files.pythonhosted.org/packages/53/e5/d74b5ccf6712c06c7a545025a6a71bfa03bdc7e0568b405b0d655232fd92/numpy-2.4.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:355354388cba60f2132df297e2d53053d4063f79077b67b481d21276d61fc4df", size = 12494312, upload-time = "2025-12-20T16:17:41.714Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/08/3ca9cc2ddf54dfee7ae9a6479c071092a228c68aef08252aa08dac2af002/numpy-2.4.0-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:1d8f9fde5f6dc1b6fc34df8162f3b3079365468703fee7f31d4e0cc8c63baed9", size = 5322862, upload-time = "2025-12-20T16:17:44.145Z" },
+    { url = "https://files.pythonhosted.org/packages/87/74/0bb63a68394c0c1e52670cfff2e309afa41edbe11b3327d9af29e4383f34/numpy-2.4.0-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:e0434aa22c821f44eeb4c650b81c7fbdd8c0122c6c4b5a576a76d5a35625ecd9", size = 6644986, upload-time = "2025-12-20T16:17:46.203Z" },
+    { url = "https://files.pythonhosted.org/packages/06/8f/9264d9bdbcf8236af2823623fe2f3981d740fc3461e2787e231d97c38c28/numpy-2.4.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:40483b2f2d3ba7aad426443767ff5632ec3156ef09742b96913787d13c336471", size = 14457958, upload-time = "2025-12-20T16:17:48.017Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/d9/f9a69ae564bbc7236a35aa883319364ef5fd41f72aa320cc1cbe66148fe2/numpy-2.4.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d9e6a7664ddd9746e20b7325351fe1a8408d0a2bf9c63b5e898290ddc8f09544", size = 16398394, upload-time = "2025-12-20T16:17:50.409Z" },
+    { url = "https://files.pythonhosted.org/packages/34/c7/39241501408dde7f885d241a98caba5421061a2c6d2b2197ac5e3aa842d8/numpy-2.4.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:ecb0019d44f4cdb50b676c5d0cb4b1eae8e15d1ed3d3e6639f986fc92b2ec52c", size = 16241044, upload-time = "2025-12-20T16:17:52.661Z" },
+    { url = "https://files.pythonhosted.org/packages/7c/95/cae7effd90e065a95e59fe710eeee05d7328ed169776dfdd9f789e032125/numpy-2.4.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:d0ffd9e2e4441c96a9c91ec1783285d80bf835b677853fc2770a89d50c1e48ac", size = 18321772, upload-time = "2025-12-20T16:17:54.947Z" },
+    { url = "https://files.pythonhosted.org/packages/96/df/3c6c279accd2bfb968a76298e5b276310bd55d243df4fa8ac5816d79347d/numpy-2.4.0-cp314-cp314t-win32.whl", hash = "sha256:77f0d13fa87036d7553bf81f0e1fe3ce68d14c9976c9851744e4d3e91127e95f", size = 6148320, upload-time = "2025-12-20T16:17:57.249Z" },
+    { url = "https://files.pythonhosted.org/packages/92/8d/f23033cce252e7a75cae853d17f582e86534c46404dea1c8ee094a9d6d84/numpy-2.4.0-cp314-cp314t-win_amd64.whl", hash = "sha256:b1f5b45829ac1848893f0ddf5cb326110604d6df96cdc255b0bf9edd154104d4", size = 12623460, upload-time = "2025-12-20T16:17:58.963Z" },
+    { url = "https://files.pythonhosted.org/packages/a4/4f/1f8475907d1a7c4ef9020edf7f39ea2422ec896849245f00688e4b268a71/numpy-2.4.0-cp314-cp314t-win_arm64.whl", hash = "sha256:23a3e9d1a6f360267e8fbb38ba5db355a6a7e9be71d7fce7ab3125e88bb646c8", size = 10661799, upload-time = "2025-12-20T16:18:01.078Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/ef/088e7c7342f300aaf3ee5f2c821c4b9996a1bef2aaf6a49cc8ab4883758e/numpy-2.4.0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:b54c83f1c0c0f1d748dca0af516062b8829d53d1f0c402be24b4257a9c48ada6", size = 16819003, upload-time = "2025-12-20T16:18:03.41Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/ce/a53017b5443b4b84517182d463fc7bcc2adb4faa8b20813f8e5f5aeb5faa/numpy-2.4.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:aabb081ca0ec5d39591fc33018cd4b3f96e1a2dd6756282029986d00a785fba4", size = 12567105, upload-time = "2025-12-20T16:18:05.594Z" },
+    { url = "https://files.pythonhosted.org/packages/77/58/5ff91b161f2ec650c88a626c3905d938c89aaadabd0431e6d9c1330c83e2/numpy-2.4.0-pp311-pypy311_pp73-macosx_14_0_arm64.whl", hash = "sha256:8eafe7c36c8430b7794edeab3087dec7bf31d634d92f2af9949434b9d1964cba", size = 5395590, upload-time = "2025-12-20T16:18:08.031Z" },
+    { url = "https://files.pythonhosted.org/packages/1d/4e/f1a084106df8c2df8132fc437e56987308e0524836aa7733721c8429d4fe/numpy-2.4.0-pp311-pypy311_pp73-macosx_14_0_x86_64.whl", hash = "sha256:2f585f52b2baf07ff3356158d9268ea095e221371f1074fadea2f42544d58b4d", size = 6709947, upload-time = "2025-12-20T16:18:09.836Z" },
+    { url = "https://files.pythonhosted.org/packages/63/09/3d8aeb809c0332c3f642da812ac2e3d74fc9252b3021f8c30c82e99e3f3d/numpy-2.4.0-pp311-pypy311_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:32ed06d0fe9cae27d8fb5f400c63ccee72370599c75e683a6358dd3a4fb50aaf", size = 14535119, upload-time = "2025-12-20T16:18:12.105Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/7f/68f0fc43a2cbdc6bb239160c754d87c922f60fbaa0fa3cd3d312b8a7f5ee/numpy-2.4.0-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:57c540ed8fb1f05cb997c6761cd56db72395b0d6985e90571ff660452ade4f98", size = 16475815, upload-time = "2025-12-20T16:18:14.433Z" },
+    { url = "https://files.pythonhosted.org/packages/11/73/edeacba3167b1ca66d51b1a5a14697c2c40098b5ffa01811c67b1785a5ab/numpy-2.4.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:a39fb973a726e63223287adc6dafe444ce75af952d711e400f3bf2b36ef55a7b", size = 12489376, upload-time = "2025-12-20T16:18:16.524Z" },
 ]
 
 [[package]]
@@ -3061,7 +3059,7 @@ source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "absl-py" },
     { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "numpy", version = "2.3.5", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.4.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "torch", marker = "sys_platform == 'never'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/02/ad/046a097b63a96c1ba1d85f0031dbe7fcbdb33e6c445dfbaba2ffaefdd497/nv_grouped_gemm-1.1.4.post8.tar.gz", hash = "sha256:ab321693f0292cfd8a26dc7b6f14decd9eb00e209494de7218e4fad36191275d", size = 20821209, upload-time = "2025-12-17T02:22:38.432Z" }
@@ -3151,21 +3149,21 @@ wheels = [
 
 [[package]]
 name = "nvidia-cudnn-frontend"
-version = "1.16.0"
+version = "1.17.0"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/fa/cf/3cd3cc682df5488288c6043fc0977090497ff015a082ab160076fecb080a/nvidia_cudnn_frontend-1.16.0-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:83ecbe6d1145dc208a9ae82aa0b45b2c8f74ed8a43d3a102a13eef2117e2fedd", size = 1835542, upload-time = "2025-11-07T01:28:20.133Z" },
-    { url = "https://files.pythonhosted.org/packages/92/45/87f3f2d94a928be21459949b03b0b8bcea13531d30094ad84a8ae4fca761/nvidia_cudnn_frontend-1.16.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:77cb06b91877c8489363867434ba1d9936f3e10bf7ed98d82e98f5f578611920", size = 1950339, upload-time = "2025-11-07T01:31:41.69Z" },
-    { url = "https://files.pythonhosted.org/packages/be/f5/1662f18084ef4441bfb3a01383cbf77194905b53474dcb51c0d0f373c74b/nvidia_cudnn_frontend-1.16.0-cp310-cp310-win_amd64.whl", hash = "sha256:ee3f3886f107919dad48cbc905fa6ae9207c8d7d5a24165e55625ea96f0fe40f", size = 1367883, upload-time = "2025-11-07T01:25:17.791Z" },
-    { url = "https://files.pythonhosted.org/packages/10/b7/d0a3a337f5e83f26ff79a7fd63a859181ff2911f1d905d6fbab5fc80170d/nvidia_cudnn_frontend-1.16.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c360d5840d6eb597aade9e9c8780e24aec283b8e6bc97d52881c821a35c92aa9", size = 1837573, upload-time = "2025-11-07T01:29:05.507Z" },
-    { url = "https://files.pythonhosted.org/packages/95/dc/465a14f2d235778405f2e84fce336d07ab045bf1c7df6404bdf8033e06a8/nvidia_cudnn_frontend-1.16.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5c4a8fc573d85a86e08b15d9bf37f729e2487298781867a492a59cde6ac295e2", size = 1952630, upload-time = "2025-11-07T01:32:00.242Z" },
-    { url = "https://files.pythonhosted.org/packages/3b/89/f14435f616603a999975930c4456d6140127f6acb19a877c752beccad837/nvidia_cudnn_frontend-1.16.0-cp311-cp311-win_amd64.whl", hash = "sha256:a257f10a932ffde9741f644efd3611acf77e2fd89d493d81bc6a8353c48f1ec2", size = 1368775, upload-time = "2025-11-07T01:25:42.252Z" },
-    { url = "https://files.pythonhosted.org/packages/00/39/79b606e805abd67ab4fa72f752a5413a496159f10d94fbdb1d67bb5ae86c/nvidia_cudnn_frontend-1.16.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:dd6fdd71c0896ff2ca1809d914cbd17f2904d55863f8881f47946e1d634c7a88", size = 1839271, upload-time = "2025-11-07T01:29:53.06Z" },
-    { url = "https://files.pythonhosted.org/packages/09/21/a0e0d50ba8d7b639fe635500fee0d9c0319561b1ae72176d7024ec04b439/nvidia_cudnn_frontend-1.16.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:16efb069d4bda4d3b99134f59f376cfd4d09558298bd96af778fdc7f2851e696", size = 1954062, upload-time = "2025-11-07T01:32:18.556Z" },
-    { url = "https://files.pythonhosted.org/packages/ce/d6/30ae67bb9c010e9459d1211c56d73373eb4e3dd9f57f4c3c1fe0966efcb1/nvidia_cudnn_frontend-1.16.0-cp312-cp312-win_amd64.whl", hash = "sha256:7b7860db03767c158accbe0b4e9c9553506513cc970ff08ed28c7761681ac466", size = 1368435, upload-time = "2025-11-07T01:26:28.022Z" },
-    { url = "https://files.pythonhosted.org/packages/32/2c/b4376afef0a6342c56e82e3465c1f8f5c719f588293a50dd04019a22ae6e/nvidia_cudnn_frontend-1.16.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b6bcb3a2fbff80538958e21e2227520f082a961164865aaeedaac527f61084f9", size = 1839805, upload-time = "2025-11-07T01:30:31.056Z" },
-    { url = "https://files.pythonhosted.org/packages/71/13/836b90354036154ab82db3861210e5736983fe1fc44bb39c146ad93b333b/nvidia_cudnn_frontend-1.16.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:cbdad88b2bec5dde837f8fa7632022334cddb4756f923b5421c06a712cb59d31", size = 1953953, upload-time = "2025-11-07T01:33:03.781Z" },
-    { url = "https://files.pythonhosted.org/packages/e5/30/3025f34f2c86ceef85134dc1f323f8cf2a26d3ffddc5ada48528c80bfae1/nvidia_cudnn_frontend-1.16.0-cp313-cp313-win_amd64.whl", hash = "sha256:138de2bc4697fabb2eb2f0f601a7e31f8fe97874908e26e33d737276f335473c", size = 1368359, upload-time = "2025-11-07T01:26:51.561Z" },
+    { url = "https://files.pythonhosted.org/packages/14/94/b224e65becfb5ab02c5b331aeb73c98f6d95cde5326d7698a2fc0d20e84a/nvidia_cudnn_frontend-1.17.0-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4835ee3fc350782c89cdd290088ade69464faaa5dd66ccb0b215ad481ab3b41b", size = 1911670, upload-time = "2025-12-20T00:26:36.302Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/05/54afda6fc47838bd68a029067d8019e6b495dca0570d7e970cbb2c3e0b32/nvidia_cudnn_frontend-1.17.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1da7e972dbba939ad21111f1208815b8c8024cbf72aa6c1eb223b14b2049d4b6", size = 2033618, upload-time = "2025-12-20T00:24:42.991Z" },
+    { url = "https://files.pythonhosted.org/packages/83/97/77ad90fac9372b0420885f16a2afaca95f78b082fa9d6a082d51a7c96bd3/nvidia_cudnn_frontend-1.17.0-cp310-cp310-win_amd64.whl", hash = "sha256:21c5b2ce097f72c6510cbf974ce8ea9a31b34989dd9209d7187584a6100e57e5", size = 1440589, upload-time = "2025-12-20T00:29:17.641Z" },
+    { url = "https://files.pythonhosted.org/packages/4e/4a/a903c57ef5aaa32aa074007ba4d50ed7cbc80a8092ddb84fe9d879a69bbb/nvidia_cudnn_frontend-1.17.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:961004000a2c21dd4a03f816534629105cf49125a643dbb49abbc97021e66d20", size = 1911775, upload-time = "2025-12-20T00:27:11.297Z" },
+    { url = "https://files.pythonhosted.org/packages/15/20/80c4f5d62ebc58b8db8d25a2ee11f3246bb8947addea37c229540bcc05ac/nvidia_cudnn_frontend-1.17.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6ea44a8f2c0cfd20868b239ea13a2e0f32895dab868f6ff2bee01caf3778d273", size = 2035158, upload-time = "2025-12-20T00:25:00.9Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/18/c24375c8d579c53a99a2d7428397288a94c7ea411d1823e3b8dc3cef50dc/nvidia_cudnn_frontend-1.17.0-cp311-cp311-win_amd64.whl", hash = "sha256:8dd6cc197a58d63da4d146a1febc1f99d425374d159f9b00628b140c65acb486", size = 1441316, upload-time = "2025-12-20T00:29:34.951Z" },
+    { url = "https://files.pythonhosted.org/packages/42/d9/f58ed6292c9396f7422812a0a2d9f80cc5a623ea6c758bcb3d34d4795bb8/nvidia_cudnn_frontend-1.17.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:de0c473f32d705abcf14f351615f7ffbeed7320e3499cf2195ae5689652a2592", size = 1917620, upload-time = "2025-12-20T00:27:46.179Z" },
+    { url = "https://files.pythonhosted.org/packages/db/eb/c641135632bd2afc21339aadee96af4c5db1460dfa07ca74836de75a590f/nvidia_cudnn_frontend-1.17.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c913c87fca691a91385287f2587575531933acfebc85c33dbcecb191886c7a53", size = 2038994, upload-time = "2025-12-20T00:25:18.9Z" },
+    { url = "https://files.pythonhosted.org/packages/82/49/a92da03eb43bde90be770a43666c5ab26b4f8b15f6e46c4b0b0e84f37994/nvidia_cudnn_frontend-1.17.0-cp312-cp312-win_amd64.whl", hash = "sha256:a0d4cfd03961592108abd1ba246e43c8bb7540aed984df860256d0bff181de98", size = 1441271, upload-time = "2025-12-20T00:29:52.056Z" },
+    { url = "https://files.pythonhosted.org/packages/99/96/4d55a559dff3175599fe15d83c853f051526b91994b083ec36b12caae776/nvidia_cudnn_frontend-1.17.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3800a1fe3d41a9206281475b1c8c438b02cb7e3c7e262d13f0a101edec223cb6", size = 1917065, upload-time = "2025-12-20T00:28:21.402Z" },
+    { url = "https://files.pythonhosted.org/packages/20/f6/5af63c254d7260dd1e974b2300eae9b157998b9d958f79c98ddaada0a0bf/nvidia_cudnn_frontend-1.17.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5adaf4a930b3be5ed019e1a25cfec7cc2bf444592a54a7639c28149b9227c2a4", size = 2039180, upload-time = "2025-12-20T00:25:36.695Z" },
+    { url = "https://files.pythonhosted.org/packages/64/ee/6de6aec1e42c859134312e6d5348d6f036b2f1b825e6eae92f9a429eccc4/nvidia_cudnn_frontend-1.17.0-cp313-cp313-win_amd64.whl", hash = "sha256:5c6a120fb54b157585ce6587153fc7086081af961f284f2553e01ba7c7a80c1a", size = 1441177, upload-time = "2025-12-20T00:30:09.927Z" },
 ]
 
 [[package]]
@@ -3240,23 +3238,23 @@ wheels = [
 
 [[package]]
 name = "nvidia-cutlass-dsl"
-version = "4.3.3"
+version = "4.3.4"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "cuda-python" },
     { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "numpy", version = "2.3.5", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.4.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "typing-extensions" },
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/ac/48/52907ac203c6de58b451511e251c8b1fc77c414dcb32aef3a0cd5194c7bd/nvidia_cutlass_dsl-4.3.3-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:dca550c8a7f7556a4c46bd4b85453342ae4e70600dc4aa3b5a6f1ebcd39a5ce5", size = 58734224, upload-time = "2025-12-10T09:45:22.008Z" },
-    { url = "https://files.pythonhosted.org/packages/44/d7/f1936fdf697a8b76eea1f60d4bcfe41faa015e5bca925c4e767035e6857a/nvidia_cutlass_dsl-4.3.3-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:0d92144b9f161328be4a6734911c101d03c7d5335e307112ad579d826d7ac3b1", size = 58596215, upload-time = "2025-12-10T10:35:19.436Z" },
-    { url = "https://files.pythonhosted.org/packages/53/ff/41a855a356067cab074c77e79ddb308a8d3df0e74659bdc2195f5c19bb10/nvidia_cutlass_dsl-4.3.3-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:7d3914b3e865cf17334d3139c11d38aed1160b5855c29eaa4e3a470ea1fcfaba", size = 58731282, upload-time = "2025-12-17T09:17:36.918Z" },
-    { url = "https://files.pythonhosted.org/packages/ef/75/79f494e08b85ea921eb376a5363a7a357db2352a6a1dfdfc659721fe94b2/nvidia_cutlass_dsl-4.3.3-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:046f3fc3fe3fa60f3207133e57512f2f5581ca36943f0763f3f7e8ab11180e16", size = 58596543, upload-time = "2025-12-10T10:09:13.657Z" },
-    { url = "https://files.pythonhosted.org/packages/c6/d4/7c5ef53ccf75d7f99a9ea29cae9f9c0233229b75b3b22f85a4ef4f52e6ab/nvidia_cutlass_dsl-4.3.3-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:3278526f54bddd920d8e539771e5820c6166c549a1e67813375025f39417dec6", size = 58734009, upload-time = "2025-12-10T09:23:29.305Z" },
-    { url = "https://files.pythonhosted.org/packages/88/a8/a27562194cc4182c67793cd21c5dbf9468cd5a49c775a487153c6f28364c/nvidia_cutlass_dsl-4.3.3-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:f2b25816b8bb8bc332bcbf6fc341347b5d728344cf185c65af0dd73e8503d5c7", size = 58596724, upload-time = "2025-12-10T11:01:07.228Z" },
-    { url = "https://files.pythonhosted.org/packages/9d/dd/83679f3467ee5827084994c2390c97659f2cda35ad824bfa936ba56295fd/nvidia_cutlass_dsl-4.3.3-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:5200ede1f51f2127c53ed5e7d38849895760469160861739813f24557e1230b8", size = 58733331, upload-time = "2025-12-10T09:03:12.607Z" },
-    { url = "https://files.pythonhosted.org/packages/c6/94/1f591add7341a2ecdab76fabc0b2c7a07cadf9589bb0e78c041bd8a5a81f/nvidia_cutlass_dsl-4.3.3-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:0eb90254eee0bfdc73087034cab40f1ef723c26961606d3dd68e0fd6fe11115f", size = 58597870, upload-time = "2025-12-10T11:15:48.138Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/1f/83e48a71e0b7bed6b33b01732ae53e9f2e61dc518ab273e56ec859bb05f1/nvidia_cutlass_dsl-4.3.4-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:118508bc84f2a55ec7af3affd379bb713edf837d593218329909db67b518e700", size = 58736512, upload-time = "2025-12-21T07:40:34.715Z" },
+    { url = "https://files.pythonhosted.org/packages/27/f1/21166ae0b6da766e11448d32c1e69fc60ba4023de9040f6ef9c333e7b0b5/nvidia_cutlass_dsl-4.3.4-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:3fdf0603ab7ec1bf6a499fbf72cff65e73b597d6e1359286808317c69aeb7c3d", size = 58598504, upload-time = "2025-12-21T07:39:43.124Z" },
+    { url = "https://files.pythonhosted.org/packages/43/01/3067eaad7454a3e36523b6814f09344afa0d36f71719072a6eecd6c87a40/nvidia_cutlass_dsl-4.3.4-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:c5bd21ed877da171f115123a12aae4a920035fc47eb57c807f9fba9f3df97cf4", size = 58733573, upload-time = "2025-12-21T07:41:51.364Z" },
+    { url = "https://files.pythonhosted.org/packages/86/3b/f8255a1fe6841955eea7a211bc9f30fd46bd8424ea15f361d5c09b29520a/nvidia_cutlass_dsl-4.3.4-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:671936f1df909e7de377d0cc00cb4287a3458c013d34947600423e9deb827e41", size = 58598831, upload-time = "2025-12-21T07:39:17.853Z" },
+    { url = "https://files.pythonhosted.org/packages/86/ee/53d22e2e14cb763927d85f7ec9748f6af6d27a2b7f43d52de014728da10e/nvidia_cutlass_dsl-4.3.4-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:57693d87677919572ab9eefa386b3f39e8e888bc4a9db7ab8730a97e8dbe06b4", size = 58736300, upload-time = "2025-12-21T07:41:25.723Z" },
+    { url = "https://files.pythonhosted.org/packages/66/f6/47489e07081cd4060f08bfa4166f8ff32beaecf71c06060d03bde88f3b6c/nvidia_cutlass_dsl-4.3.4-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:a48fbff859e44dd548f8f26819d97d0595acea70e3b057c91dfdb47929015c72", size = 58599014, upload-time = "2025-12-21T07:38:51.632Z" },
+    { url = "https://files.pythonhosted.org/packages/c7/2e/3aaf6121842351ec0231d5ab9d9ebe9a6e2269e9a8f7345e02f096db1ba8/nvidia_cutlass_dsl-4.3.4-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:36bde25160f461f393beba81868ef9e54d5ba2e0e7666ed3e44b6dbf788af493", size = 58735620, upload-time = "2025-12-21T07:40:59.729Z" },
+    { url = "https://files.pythonhosted.org/packages/62/90/1da2583bda001bf678066bc970963aad3986036ac15e95eb38447fa1b51e/nvidia_cutlass_dsl-4.3.4-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:be127f0f087028fa498f50a994c49f95b2c6a518e11e2567bc3d71528bf0a504", size = 58600158, upload-time = "2025-12-21T07:40:09.36Z" },
 ]
 
 [[package]]
@@ -3283,7 +3281,7 @@ source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "ninja" },
     { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "numpy", version = "2.3.5", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.4.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "nvidia-ml-py" },
     { name = "packaging" },
     { name = "pulp" },
@@ -3424,7 +3422,7 @@ source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "ml-dtypes" },
     { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "numpy", version = "2.3.5", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.4.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "protobuf" },
     { name = "typing-extensions" },
 ]
@@ -3461,7 +3459,7 @@ source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "ml-dtypes" },
     { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "numpy", version = "2.3.5", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.4.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "onnx" },
     { name = "typing-extensions" },
 ]
@@ -3477,7 +3475,7 @@ source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "ml-dtypes" },
     { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "numpy", version = "2.3.5", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.4.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "onnx" },
     { name = "onnx-ir" },
     { name = "packaging" },
@@ -3525,7 +3523,7 @@ version = "2.3.3"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "numpy", version = "2.3.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.4.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "python-dateutil" },
     { name = "pytz" },
     { name = "tzdata" },
@@ -3598,11 +3596,11 @@ wheels = [
 
 [[package]]
 name = "pathspec"
-version = "0.12.1"
+version = "1.0.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/ca/bc/f35b8446f4531a7cb215605d100cd88b7ac6f44ab3fc94870c120ab3adbf/pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712", size = 51043, upload-time = "2023-12-10T22:30:45Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/c2/97/39352be14d20d377a387828daf9d3f765fad1ff29bd49913d5bbf4cefe61/pathspec-1.0.0.tar.gz", hash = "sha256:9ada63a23541746b0cf7d5672a39ea77eac31dd23a80470be90df83537512131", size = 129410, upload-time = "2026-01-06T03:21:22.892Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/cc/20/ff623b09d963f88bfde16306a54e12ee5ea43e9b597108672ff3a408aad6/pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08", size = 31191, upload-time = "2023-12-10T22:30:43.14Z" },
+    { url = "https://files.pythonhosted.org/packages/05/bb/39e6768529454cc2b57e1e2fa0a0a18ff64397a16303270e215a3e03285f/pathspec-1.0.0-py3-none-any.whl", hash = "sha256:1373719036e64a2b9de3b8ddd9e30afb082a915619f07265ed76d9ae507800ae", size = 54316, upload-time = "2026-01-06T03:21:21.74Z" },
 ]
 
 [[package]]
@@ -3616,100 +3614,100 @@ wheels = [
 
 [[package]]
 name = "pillow"
-version = "12.0.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/5a/b0/cace85a1b0c9775a9f8f5d5423c8261c858760e2466c79b2dd184638b056/pillow-12.0.0.tar.gz", hash = "sha256:87d4f8125c9988bfbed67af47dd7a953e2fc7b0cc1e7800ec6d2080d490bb353", size = 47008828, upload-time = "2025-10-15T18:24:14.008Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/5d/08/26e68b6b5da219c2a2cb7b563af008b53bb8e6b6fcb3fa40715fcdb2523a/pillow-12.0.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:3adfb466bbc544b926d50fe8f4a4e6abd8c6bffd28a26177594e6e9b2b76572b", size = 5289809, upload-time = "2025-10-15T18:21:27.791Z" },
-    { url = "https://files.pythonhosted.org/packages/cb/e9/4e58fb097fb74c7b4758a680aacd558810a417d1edaa7000142976ef9d2f/pillow-12.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1ac11e8ea4f611c3c0147424eae514028b5e9077dd99ab91e1bd7bc33ff145e1", size = 4650606, upload-time = "2025-10-15T18:21:29.823Z" },
-    { url = "https://files.pythonhosted.org/packages/4b/e0/1fa492aa9f77b3bc6d471c468e62bfea1823056bf7e5e4f1914d7ab2565e/pillow-12.0.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d49e2314c373f4c2b39446fb1a45ed333c850e09d0c59ac79b72eb3b95397363", size = 6221023, upload-time = "2025-10-15T18:21:31.415Z" },
-    { url = "https://files.pythonhosted.org/packages/c1/09/4de7cd03e33734ccd0c876f0251401f1314e819cbfd89a0fcb6e77927cc6/pillow-12.0.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c7b2a63fd6d5246349f3d3f37b14430d73ee7e8173154461785e43036ffa96ca", size = 8024937, upload-time = "2025-10-15T18:21:33.453Z" },
-    { url = "https://files.pythonhosted.org/packages/2e/69/0688e7c1390666592876d9d474f5e135abb4acb39dcb583c4dc5490f1aff/pillow-12.0.0-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d64317d2587c70324b79861babb9c09f71fbb780bad212018874b2c013d8600e", size = 6334139, upload-time = "2025-10-15T18:21:35.395Z" },
-    { url = "https://files.pythonhosted.org/packages/ed/1c/880921e98f525b9b44ce747ad1ea8f73fd7e992bafe3ca5e5644bf433dea/pillow-12.0.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d77153e14b709fd8b8af6f66a3afbb9ed6e9fc5ccf0b6b7e1ced7b036a228782", size = 7026074, upload-time = "2025-10-15T18:21:37.219Z" },
-    { url = "https://files.pythonhosted.org/packages/28/03/96f718331b19b355610ef4ebdbbde3557c726513030665071fd025745671/pillow-12.0.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:32ed80ea8a90ee3e6fa08c21e2e091bba6eda8eccc83dbc34c95169507a91f10", size = 6448852, upload-time = "2025-10-15T18:21:39.168Z" },
-    { url = "https://files.pythonhosted.org/packages/3a/a0/6a193b3f0cc9437b122978d2c5cbce59510ccf9a5b48825096ed7472da2f/pillow-12.0.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:c828a1ae702fc712978bda0320ba1b9893d99be0badf2647f693cc01cf0f04fa", size = 7117058, upload-time = "2025-10-15T18:21:40.997Z" },
-    { url = "https://files.pythonhosted.org/packages/a7/c4/043192375eaa4463254e8e61f0e2ec9a846b983929a8d0a7122e0a6d6fff/pillow-12.0.0-cp310-cp310-win32.whl", hash = "sha256:bd87e140e45399c818fac4247880b9ce719e4783d767e030a883a970be632275", size = 6295431, upload-time = "2025-10-15T18:21:42.518Z" },
-    { url = "https://files.pythonhosted.org/packages/92/c6/c2f2fc7e56301c21827e689bb8b0b465f1b52878b57471a070678c0c33cd/pillow-12.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:455247ac8a4cfb7b9bc45b7e432d10421aea9fc2e74d285ba4072688a74c2e9d", size = 7000412, upload-time = "2025-10-15T18:21:44.404Z" },
-    { url = "https://files.pythonhosted.org/packages/b2/d2/5f675067ba82da7a1c238a73b32e3fd78d67f9d9f80fbadd33a40b9c0481/pillow-12.0.0-cp310-cp310-win_arm64.whl", hash = "sha256:6ace95230bfb7cd79ef66caa064bbe2f2a1e63d93471c3a2e1f1348d9f22d6b7", size = 2435903, upload-time = "2025-10-15T18:21:46.29Z" },
-    { url = "https://files.pythonhosted.org/packages/0e/5a/a2f6773b64edb921a756eb0729068acad9fc5208a53f4a349396e9436721/pillow-12.0.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:0fd00cac9c03256c8b2ff58f162ebcd2587ad3e1f2e397eab718c47e24d231cc", size = 5289798, upload-time = "2025-10-15T18:21:47.763Z" },
-    { url = "https://files.pythonhosted.org/packages/2e/05/069b1f8a2e4b5a37493da6c5868531c3f77b85e716ad7a590ef87d58730d/pillow-12.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a3475b96f5908b3b16c47533daaa87380c491357d197564e0ba34ae75c0f3257", size = 4650589, upload-time = "2025-10-15T18:21:49.515Z" },
-    { url = "https://files.pythonhosted.org/packages/61/e3/2c820d6e9a36432503ead175ae294f96861b07600a7156154a086ba7111a/pillow-12.0.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:110486b79f2d112cf6add83b28b627e369219388f64ef2f960fef9ebaf54c642", size = 6230472, upload-time = "2025-10-15T18:21:51.052Z" },
-    { url = "https://files.pythonhosted.org/packages/4f/89/63427f51c64209c5e23d4d52071c8d0f21024d3a8a487737caaf614a5795/pillow-12.0.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5269cc1caeedb67e6f7269a42014f381f45e2e7cd42d834ede3c703a1d915fe3", size = 8033887, upload-time = "2025-10-15T18:21:52.604Z" },
-    { url = "https://files.pythonhosted.org/packages/f6/1b/c9711318d4901093c15840f268ad649459cd81984c9ec9887756cca049a5/pillow-12.0.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:aa5129de4e174daccbc59d0a3b6d20eaf24417d59851c07ebb37aeb02947987c", size = 6343964, upload-time = "2025-10-15T18:21:54.619Z" },
-    { url = "https://files.pythonhosted.org/packages/41/1e/db9470f2d030b4995083044cd8738cdd1bf773106819f6d8ba12597d5352/pillow-12.0.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bee2a6db3a7242ea309aa7ee8e2780726fed67ff4e5b40169f2c940e7eb09227", size = 7034756, upload-time = "2025-10-15T18:21:56.151Z" },
-    { url = "https://files.pythonhosted.org/packages/cc/b0/6177a8bdd5ee4ed87cba2de5a3cc1db55ffbbec6176784ce5bb75aa96798/pillow-12.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:90387104ee8400a7b4598253b4c406f8958f59fcf983a6cea2b50d59f7d63d0b", size = 6458075, upload-time = "2025-10-15T18:21:57.759Z" },
-    { url = "https://files.pythonhosted.org/packages/bc/5e/61537aa6fa977922c6a03253a0e727e6e4a72381a80d63ad8eec350684f2/pillow-12.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:bc91a56697869546d1b8f0a3ff35224557ae7f881050e99f615e0119bf934b4e", size = 7125955, upload-time = "2025-10-15T18:21:59.372Z" },
-    { url = "https://files.pythonhosted.org/packages/1f/3d/d5033539344ee3cbd9a4d69e12e63ca3a44a739eb2d4c8da350a3d38edd7/pillow-12.0.0-cp311-cp311-win32.whl", hash = "sha256:27f95b12453d165099c84f8a8bfdfd46b9e4bda9e0e4b65f0635430027f55739", size = 6298440, upload-time = "2025-10-15T18:22:00.982Z" },
-    { url = "https://files.pythonhosted.org/packages/4d/42/aaca386de5cc8bd8a0254516957c1f265e3521c91515b16e286c662854c4/pillow-12.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:b583dc9070312190192631373c6c8ed277254aa6e6084b74bdd0a6d3b221608e", size = 6999256, upload-time = "2025-10-15T18:22:02.617Z" },
-    { url = "https://files.pythonhosted.org/packages/ba/f1/9197c9c2d5708b785f631a6dfbfa8eb3fb9672837cb92ae9af812c13b4ed/pillow-12.0.0-cp311-cp311-win_arm64.whl", hash = "sha256:759de84a33be3b178a64c8ba28ad5c135900359e85fb662bc6e403ad4407791d", size = 2436025, upload-time = "2025-10-15T18:22:04.598Z" },
-    { url = "https://files.pythonhosted.org/packages/2c/90/4fcce2c22caf044e660a198d740e7fbc14395619e3cb1abad12192c0826c/pillow-12.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:53561a4ddc36facb432fae7a9d8afbfaf94795414f5cdc5fc52f28c1dca90371", size = 5249377, upload-time = "2025-10-15T18:22:05.993Z" },
-    { url = "https://files.pythonhosted.org/packages/fd/e0/ed960067543d080691d47d6938ebccbf3976a931c9567ab2fbfab983a5dd/pillow-12.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:71db6b4c1653045dacc1585c1b0d184004f0d7e694c7b34ac165ca70c0838082", size = 4650343, upload-time = "2025-10-15T18:22:07.718Z" },
-    { url = "https://files.pythonhosted.org/packages/e7/a1/f81fdeddcb99c044bf7d6faa47e12850f13cee0849537a7d27eeab5534d4/pillow-12.0.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:2fa5f0b6716fc88f11380b88b31fe591a06c6315e955c096c35715788b339e3f", size = 6232981, upload-time = "2025-10-15T18:22:09.287Z" },
-    { url = "https://files.pythonhosted.org/packages/88/e1/9098d3ce341a8750b55b0e00c03f1630d6178f38ac191c81c97a3b047b44/pillow-12.0.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:82240051c6ca513c616f7f9da06e871f61bfd7805f566275841af15015b8f98d", size = 8041399, upload-time = "2025-10-15T18:22:10.872Z" },
-    { url = "https://files.pythonhosted.org/packages/a7/62/a22e8d3b602ae8cc01446d0c57a54e982737f44b6f2e1e019a925143771d/pillow-12.0.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:55f818bd74fe2f11d4d7cbc65880a843c4075e0ac7226bc1a23261dbea531953", size = 6347740, upload-time = "2025-10-15T18:22:12.769Z" },
-    { url = "https://files.pythonhosted.org/packages/4f/87/424511bdcd02c8d7acf9f65caa09f291a519b16bd83c3fb3374b3d4ae951/pillow-12.0.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b87843e225e74576437fd5b6a4c2205d422754f84a06942cfaf1dc32243e45a8", size = 7040201, upload-time = "2025-10-15T18:22:14.813Z" },
-    { url = "https://files.pythonhosted.org/packages/dc/4d/435c8ac688c54d11755aedfdd9f29c9eeddf68d150fe42d1d3dbd2365149/pillow-12.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:c607c90ba67533e1b2355b821fef6764d1dd2cbe26b8c1005ae84f7aea25ff79", size = 6462334, upload-time = "2025-10-15T18:22:16.375Z" },
-    { url = "https://files.pythonhosted.org/packages/2b/f2/ad34167a8059a59b8ad10bc5c72d4d9b35acc6b7c0877af8ac885b5f2044/pillow-12.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:21f241bdd5080a15bc86d3466a9f6074a9c2c2b314100dd896ac81ee6db2f1ba", size = 7134162, upload-time = "2025-10-15T18:22:17.996Z" },
-    { url = "https://files.pythonhosted.org/packages/0c/b1/a7391df6adacf0a5c2cf6ac1cf1fcc1369e7d439d28f637a847f8803beb3/pillow-12.0.0-cp312-cp312-win32.whl", hash = "sha256:dd333073e0cacdc3089525c7df7d39b211bcdf31fc2824e49d01c6b6187b07d0", size = 6298769, upload-time = "2025-10-15T18:22:19.923Z" },
-    { url = "https://files.pythonhosted.org/packages/a2/0b/d87733741526541c909bbf159e338dcace4f982daac6e5a8d6be225ca32d/pillow-12.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:9fe611163f6303d1619bbcb653540a4d60f9e55e622d60a3108be0d5b441017a", size = 7001107, upload-time = "2025-10-15T18:22:21.644Z" },
-    { url = "https://files.pythonhosted.org/packages/bc/96/aaa61ce33cc98421fb6088af2a03be4157b1e7e0e87087c888e2370a7f45/pillow-12.0.0-cp312-cp312-win_arm64.whl", hash = "sha256:7dfb439562f234f7d57b1ac6bc8fe7f838a4bd49c79230e0f6a1da93e82f1fad", size = 2436012, upload-time = "2025-10-15T18:22:23.621Z" },
-    { url = "https://files.pythonhosted.org/packages/62/f2/de993bb2d21b33a98d031ecf6a978e4b61da207bef02f7b43093774c480d/pillow-12.0.0-cp313-cp313-ios_13_0_arm64_iphoneos.whl", hash = "sha256:0869154a2d0546545cde61d1789a6524319fc1897d9ee31218eae7a60ccc5643", size = 4045493, upload-time = "2025-10-15T18:22:25.758Z" },
-    { url = "https://files.pythonhosted.org/packages/0e/b6/bc8d0c4c9f6f111a783d045310945deb769b806d7574764234ffd50bc5ea/pillow-12.0.0-cp313-cp313-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:a7921c5a6d31b3d756ec980f2f47c0cfdbce0fc48c22a39347a895f41f4a6ea4", size = 4120461, upload-time = "2025-10-15T18:22:27.286Z" },
-    { url = "https://files.pythonhosted.org/packages/5d/57/d60d343709366a353dc56adb4ee1e7d8a2cc34e3fbc22905f4167cfec119/pillow-12.0.0-cp313-cp313-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:1ee80a59f6ce048ae13cda1abf7fbd2a34ab9ee7d401c46be3ca685d1999a399", size = 3576912, upload-time = "2025-10-15T18:22:28.751Z" },
-    { url = "https://files.pythonhosted.org/packages/a4/a4/a0a31467e3f83b94d37568294b01d22b43ae3c5d85f2811769b9c66389dd/pillow-12.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:c50f36a62a22d350c96e49ad02d0da41dbd17ddc2e29750dbdba4323f85eb4a5", size = 5249132, upload-time = "2025-10-15T18:22:30.641Z" },
-    { url = "https://files.pythonhosted.org/packages/83/06/48eab21dd561de2914242711434c0c0eb992ed08ff3f6107a5f44527f5e9/pillow-12.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5193fde9a5f23c331ea26d0cf171fbf67e3f247585f50c08b3e205c7aeb4589b", size = 4650099, upload-time = "2025-10-15T18:22:32.73Z" },
-    { url = "https://files.pythonhosted.org/packages/fc/bd/69ed99fd46a8dba7c1887156d3572fe4484e3f031405fcc5a92e31c04035/pillow-12.0.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:bde737cff1a975b70652b62d626f7785e0480918dece11e8fef3c0cf057351c3", size = 6230808, upload-time = "2025-10-15T18:22:34.337Z" },
-    { url = "https://files.pythonhosted.org/packages/ea/94/8fad659bcdbf86ed70099cb60ae40be6acca434bbc8c4c0d4ef356d7e0de/pillow-12.0.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:a6597ff2b61d121172f5844b53f21467f7082f5fb385a9a29c01414463f93b07", size = 8037804, upload-time = "2025-10-15T18:22:36.402Z" },
-    { url = "https://files.pythonhosted.org/packages/20/39/c685d05c06deecfd4e2d1950e9a908aa2ca8bc4e6c3b12d93b9cafbd7837/pillow-12.0.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0b817e7035ea7f6b942c13aa03bb554fc44fea70838ea21f8eb31c638326584e", size = 6345553, upload-time = "2025-10-15T18:22:38.066Z" },
-    { url = "https://files.pythonhosted.org/packages/38/57/755dbd06530a27a5ed74f8cb0a7a44a21722ebf318edbe67ddbd7fb28f88/pillow-12.0.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f4f1231b7dec408e8670264ce63e9c71409d9583dd21d32c163e25213ee2a344", size = 7037729, upload-time = "2025-10-15T18:22:39.769Z" },
-    { url = "https://files.pythonhosted.org/packages/ca/b6/7e94f4c41d238615674d06ed677c14883103dce1c52e4af16f000338cfd7/pillow-12.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6e51b71417049ad6ab14c49608b4a24d8fb3fe605e5dfabfe523b58064dc3d27", size = 6459789, upload-time = "2025-10-15T18:22:41.437Z" },
-    { url = "https://files.pythonhosted.org/packages/9c/14/4448bb0b5e0f22dd865290536d20ec8a23b64e2d04280b89139f09a36bb6/pillow-12.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:d120c38a42c234dc9a8c5de7ceaaf899cf33561956acb4941653f8bdc657aa79", size = 7130917, upload-time = "2025-10-15T18:22:43.152Z" },
-    { url = "https://files.pythonhosted.org/packages/dd/ca/16c6926cc1c015845745d5c16c9358e24282f1e588237a4c36d2b30f182f/pillow-12.0.0-cp313-cp313-win32.whl", hash = "sha256:4cc6b3b2efff105c6a1656cfe59da4fdde2cda9af1c5e0b58529b24525d0a098", size = 6302391, upload-time = "2025-10-15T18:22:44.753Z" },
-    { url = "https://files.pythonhosted.org/packages/6d/2a/dd43dcfd6dae9b6a49ee28a8eedb98c7d5ff2de94a5d834565164667b97b/pillow-12.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:4cf7fed4b4580601c4345ceb5d4cbf5a980d030fd5ad07c4d2ec589f95f09905", size = 7007477, upload-time = "2025-10-15T18:22:46.838Z" },
-    { url = "https://files.pythonhosted.org/packages/77/f0/72ea067f4b5ae5ead653053212af05ce3705807906ba3f3e8f58ddf617e6/pillow-12.0.0-cp313-cp313-win_arm64.whl", hash = "sha256:9f0b04c6b8584c2c193babcccc908b38ed29524b29dd464bc8801bf10d746a3a", size = 2435918, upload-time = "2025-10-15T18:22:48.399Z" },
-    { url = "https://files.pythonhosted.org/packages/f5/5e/9046b423735c21f0487ea6cb5b10f89ea8f8dfbe32576fe052b5ba9d4e5b/pillow-12.0.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:7fa22993bac7b77b78cae22bad1e2a987ddf0d9015c63358032f84a53f23cdc3", size = 5251406, upload-time = "2025-10-15T18:22:49.905Z" },
-    { url = "https://files.pythonhosted.org/packages/12/66/982ceebcdb13c97270ef7a56c3969635b4ee7cd45227fa707c94719229c5/pillow-12.0.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:f135c702ac42262573fe9714dfe99c944b4ba307af5eb507abef1667e2cbbced", size = 4653218, upload-time = "2025-10-15T18:22:51.587Z" },
-    { url = "https://files.pythonhosted.org/packages/16/b3/81e625524688c31859450119bf12674619429cab3119eec0e30a7a1029cb/pillow-12.0.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:c85de1136429c524e55cfa4e033b4a7940ac5c8ee4d9401cc2d1bf48154bbc7b", size = 6266564, upload-time = "2025-10-15T18:22:53.215Z" },
-    { url = "https://files.pythonhosted.org/packages/98/59/dfb38f2a41240d2408096e1a76c671d0a105a4a8471b1871c6902719450c/pillow-12.0.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:38df9b4bfd3db902c9c2bd369bcacaf9d935b2fff73709429d95cc41554f7b3d", size = 8069260, upload-time = "2025-10-15T18:22:54.933Z" },
-    { url = "https://files.pythonhosted.org/packages/dc/3d/378dbea5cd1874b94c312425ca77b0f47776c78e0df2df751b820c8c1d6c/pillow-12.0.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7d87ef5795da03d742bf49439f9ca4d027cde49c82c5371ba52464aee266699a", size = 6379248, upload-time = "2025-10-15T18:22:56.605Z" },
-    { url = "https://files.pythonhosted.org/packages/84/b0/d525ef47d71590f1621510327acec75ae58c721dc071b17d8d652ca494d8/pillow-12.0.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:aff9e4d82d082ff9513bdd6acd4f5bd359f5b2c870907d2b0a9c5e10d40c88fe", size = 7066043, upload-time = "2025-10-15T18:22:58.53Z" },
-    { url = "https://files.pythonhosted.org/packages/61/2c/aced60e9cf9d0cde341d54bf7932c9ffc33ddb4a1595798b3a5150c7ec4e/pillow-12.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:8d8ca2b210ada074d57fcee40c30446c9562e542fc46aedc19baf758a93532ee", size = 6490915, upload-time = "2025-10-15T18:23:00.582Z" },
-    { url = "https://files.pythonhosted.org/packages/ef/26/69dcb9b91f4e59f8f34b2332a4a0a951b44f547c4ed39d3e4dcfcff48f89/pillow-12.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:99a7f72fb6249302aa62245680754862a44179b545ded638cf1fef59befb57ef", size = 7157998, upload-time = "2025-10-15T18:23:02.627Z" },
-    { url = "https://files.pythonhosted.org/packages/61/2b/726235842220ca95fa441ddf55dd2382b52ab5b8d9c0596fe6b3f23dafe8/pillow-12.0.0-cp313-cp313t-win32.whl", hash = "sha256:4078242472387600b2ce8d93ade8899c12bf33fa89e55ec89fe126e9d6d5d9e9", size = 6306201, upload-time = "2025-10-15T18:23:04.709Z" },
-    { url = "https://files.pythonhosted.org/packages/c0/3d/2afaf4e840b2df71344ababf2f8edd75a705ce500e5dc1e7227808312ae1/pillow-12.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:2c54c1a783d6d60595d3514f0efe9b37c8808746a66920315bfd34a938d7994b", size = 7013165, upload-time = "2025-10-15T18:23:06.46Z" },
-    { url = "https://files.pythonhosted.org/packages/6f/75/3fa09aa5cf6ed04bee3fa575798ddf1ce0bace8edb47249c798077a81f7f/pillow-12.0.0-cp313-cp313t-win_arm64.whl", hash = "sha256:26d9f7d2b604cd23aba3e9faf795787456ac25634d82cd060556998e39c6fa47", size = 2437834, upload-time = "2025-10-15T18:23:08.194Z" },
-    { url = "https://files.pythonhosted.org/packages/54/2a/9a8c6ba2c2c07b71bec92cf63e03370ca5e5f5c5b119b742bcc0cde3f9c5/pillow-12.0.0-cp314-cp314-ios_13_0_arm64_iphoneos.whl", hash = "sha256:beeae3f27f62308f1ddbcfb0690bf44b10732f2ef43758f169d5e9303165d3f9", size = 4045531, upload-time = "2025-10-15T18:23:10.121Z" },
-    { url = "https://files.pythonhosted.org/packages/84/54/836fdbf1bfb3d66a59f0189ff0b9f5f666cee09c6188309300df04ad71fa/pillow-12.0.0-cp314-cp314-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:d4827615da15cd59784ce39d3388275ec093ae3ee8d7f0c089b76fa87af756c2", size = 4120554, upload-time = "2025-10-15T18:23:12.14Z" },
-    { url = "https://files.pythonhosted.org/packages/0d/cd/16aec9f0da4793e98e6b54778a5fbce4f375c6646fe662e80600b8797379/pillow-12.0.0-cp314-cp314-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:3e42edad50b6909089750e65c91aa09aaf1e0a71310d383f11321b27c224ed8a", size = 3576812, upload-time = "2025-10-15T18:23:13.962Z" },
-    { url = "https://files.pythonhosted.org/packages/f6/b7/13957fda356dc46339298b351cae0d327704986337c3c69bb54628c88155/pillow-12.0.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:e5d8efac84c9afcb40914ab49ba063d94f5dbdf5066db4482c66a992f47a3a3b", size = 5252689, upload-time = "2025-10-15T18:23:15.562Z" },
-    { url = "https://files.pythonhosted.org/packages/fc/f5/eae31a306341d8f331f43edb2e9122c7661b975433de5e447939ae61c5da/pillow-12.0.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:266cd5f2b63ff316d5a1bba46268e603c9caf5606d44f38c2873c380950576ad", size = 4650186, upload-time = "2025-10-15T18:23:17.379Z" },
-    { url = "https://files.pythonhosted.org/packages/86/62/2a88339aa40c4c77e79108facbd307d6091e2c0eb5b8d3cf4977cfca2fe6/pillow-12.0.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:58eea5ebe51504057dd95c5b77d21700b77615ab0243d8152793dc00eb4faf01", size = 6230308, upload-time = "2025-10-15T18:23:18.971Z" },
-    { url = "https://files.pythonhosted.org/packages/c7/33/5425a8992bcb32d1cb9fa3dd39a89e613d09a22f2c8083b7bf43c455f760/pillow-12.0.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f13711b1a5ba512d647a0e4ba79280d3a9a045aaf7e0cc6fbe96b91d4cdf6b0c", size = 8039222, upload-time = "2025-10-15T18:23:20.909Z" },
-    { url = "https://files.pythonhosted.org/packages/d8/61/3f5d3b35c5728f37953d3eec5b5f3e77111949523bd2dd7f31a851e50690/pillow-12.0.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6846bd2d116ff42cba6b646edf5bf61d37e5cbd256425fa089fee4ff5c07a99e", size = 6346657, upload-time = "2025-10-15T18:23:23.077Z" },
-    { url = "https://files.pythonhosted.org/packages/3a/be/ee90a3d79271227e0f0a33c453531efd6ed14b2e708596ba5dd9be948da3/pillow-12.0.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c98fa880d695de164b4135a52fd2e9cd7b7c90a9d8ac5e9e443a24a95ef9248e", size = 7038482, upload-time = "2025-10-15T18:23:25.005Z" },
-    { url = "https://files.pythonhosted.org/packages/44/34/a16b6a4d1ad727de390e9bd9f19f5f669e079e5826ec0f329010ddea492f/pillow-12.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:fa3ed2a29a9e9d2d488b4da81dcb54720ac3104a20bf0bd273f1e4648aff5af9", size = 6461416, upload-time = "2025-10-15T18:23:27.009Z" },
-    { url = "https://files.pythonhosted.org/packages/b6/39/1aa5850d2ade7d7ba9f54e4e4c17077244ff7a2d9e25998c38a29749eb3f/pillow-12.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:d034140032870024e6b9892c692fe2968493790dd57208b2c37e3fb35f6df3ab", size = 7131584, upload-time = "2025-10-15T18:23:29.752Z" },
-    { url = "https://files.pythonhosted.org/packages/bf/db/4fae862f8fad0167073a7733973bfa955f47e2cac3dc3e3e6257d10fab4a/pillow-12.0.0-cp314-cp314-win32.whl", hash = "sha256:1b1b133e6e16105f524a8dec491e0586d072948ce15c9b914e41cdadd209052b", size = 6400621, upload-time = "2025-10-15T18:23:32.06Z" },
-    { url = "https://files.pythonhosted.org/packages/2b/24/b350c31543fb0107ab2599464d7e28e6f856027aadda995022e695313d94/pillow-12.0.0-cp314-cp314-win_amd64.whl", hash = "sha256:8dc232e39d409036af549c86f24aed8273a40ffa459981146829a324e0848b4b", size = 7142916, upload-time = "2025-10-15T18:23:34.71Z" },
-    { url = "https://files.pythonhosted.org/packages/0f/9b/0ba5a6fd9351793996ef7487c4fdbde8d3f5f75dbedc093bb598648fddf0/pillow-12.0.0-cp314-cp314-win_arm64.whl", hash = "sha256:d52610d51e265a51518692045e372a4c363056130d922a7351429ac9f27e70b0", size = 2523836, upload-time = "2025-10-15T18:23:36.967Z" },
-    { url = "https://files.pythonhosted.org/packages/f5/7a/ceee0840aebc579af529b523d530840338ecf63992395842e54edc805987/pillow-12.0.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:1979f4566bb96c1e50a62d9831e2ea2d1211761e5662afc545fa766f996632f6", size = 5255092, upload-time = "2025-10-15T18:23:38.573Z" },
-    { url = "https://files.pythonhosted.org/packages/44/76/20776057b4bfd1aef4eeca992ebde0f53a4dce874f3ae693d0ec90a4f79b/pillow-12.0.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:b2e4b27a6e15b04832fe9bf292b94b5ca156016bbc1ea9c2c20098a0320d6cf6", size = 4653158, upload-time = "2025-10-15T18:23:40.238Z" },
-    { url = "https://files.pythonhosted.org/packages/82/3f/d9ff92ace07be8836b4e7e87e6a4c7a8318d47c2f1463ffcf121fc57d9cb/pillow-12.0.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fb3096c30df99fd01c7bf8e544f392103d0795b9f98ba71a8054bcbf56b255f1", size = 6267882, upload-time = "2025-10-15T18:23:42.434Z" },
-    { url = "https://files.pythonhosted.org/packages/9f/7a/4f7ff87f00d3ad33ba21af78bfcd2f032107710baf8280e3722ceec28cda/pillow-12.0.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7438839e9e053ef79f7112c881cef684013855016f928b168b81ed5835f3e75e", size = 8071001, upload-time = "2025-10-15T18:23:44.29Z" },
-    { url = "https://files.pythonhosted.org/packages/75/87/fcea108944a52dad8cca0715ae6247e271eb80459364a98518f1e4f480c1/pillow-12.0.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5d5c411a8eaa2299322b647cd932586b1427367fd3184ffbb8f7a219ea2041ca", size = 6380146, upload-time = "2025-10-15T18:23:46.065Z" },
-    { url = "https://files.pythonhosted.org/packages/91/52/0d31b5e571ef5fd111d2978b84603fce26aba1b6092f28e941cb46570745/pillow-12.0.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d7e091d464ac59d2c7ad8e7e08105eaf9dafbc3883fd7265ffccc2baad6ac925", size = 7067344, upload-time = "2025-10-15T18:23:47.898Z" },
-    { url = "https://files.pythonhosted.org/packages/7b/f4/2dd3d721f875f928d48e83bb30a434dee75a2531bca839bb996bb0aa5a91/pillow-12.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:792a2c0be4dcc18af9d4a2dfd8a11a17d5e25274a1062b0ec1c2d79c76f3e7f8", size = 6491864, upload-time = "2025-10-15T18:23:49.607Z" },
-    { url = "https://files.pythonhosted.org/packages/30/4b/667dfcf3d61fc309ba5a15b141845cece5915e39b99c1ceab0f34bf1d124/pillow-12.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:afbefa430092f71a9593a99ab6a4e7538bc9eabbf7bf94f91510d3503943edc4", size = 7158911, upload-time = "2025-10-15T18:23:51.351Z" },
-    { url = "https://files.pythonhosted.org/packages/a2/2f/16cabcc6426c32218ace36bf0d55955e813f2958afddbf1d391849fee9d1/pillow-12.0.0-cp314-cp314t-win32.whl", hash = "sha256:3830c769decf88f1289680a59d4f4c46c72573446352e2befec9a8512104fa52", size = 6408045, upload-time = "2025-10-15T18:23:53.177Z" },
-    { url = "https://files.pythonhosted.org/packages/35/73/e29aa0c9c666cf787628d3f0dcf379f4791fba79f4936d02f8b37165bdf8/pillow-12.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:905b0365b210c73afb0ebe9101a32572152dfd1c144c7e28968a331b9217b94a", size = 7148282, upload-time = "2025-10-15T18:23:55.316Z" },
-    { url = "https://files.pythonhosted.org/packages/c1/70/6b41bdcddf541b437bbb9f47f94d2db5d9ddef6c37ccab8c9107743748a4/pillow-12.0.0-cp314-cp314t-win_arm64.whl", hash = "sha256:99353a06902c2e43b43e8ff74ee65a7d90307d82370604746738a1e0661ccca7", size = 2525630, upload-time = "2025-10-15T18:23:57.149Z" },
-    { url = "https://files.pythonhosted.org/packages/1d/b3/582327e6c9f86d037b63beebe981425d6811104cb443e8193824ef1a2f27/pillow-12.0.0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:b22bd8c974942477156be55a768f7aa37c46904c175be4e158b6a86e3a6b7ca8", size = 5215068, upload-time = "2025-10-15T18:23:59.594Z" },
-    { url = "https://files.pythonhosted.org/packages/fd/d6/67748211d119f3b6540baf90f92fae73ae51d5217b171b0e8b5f7e5d558f/pillow-12.0.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:805ebf596939e48dbb2e4922a1d3852cfc25c38160751ce02da93058b48d252a", size = 4614994, upload-time = "2025-10-15T18:24:01.669Z" },
-    { url = "https://files.pythonhosted.org/packages/2d/e1/f8281e5d844c41872b273b9f2c34a4bf64ca08905668c8ae730eedc7c9fa/pillow-12.0.0-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cae81479f77420d217def5f54b5b9d279804d17e982e0f2fa19b1d1e14ab5197", size = 5246639, upload-time = "2025-10-15T18:24:03.403Z" },
-    { url = "https://files.pythonhosted.org/packages/94/5a/0d8ab8ffe8a102ff5df60d0de5af309015163bf710c7bb3e8311dd3b3ad0/pillow-12.0.0-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:aeaefa96c768fc66818730b952a862235d68825c178f1b3ffd4efd7ad2edcb7c", size = 6986839, upload-time = "2025-10-15T18:24:05.344Z" },
-    { url = "https://files.pythonhosted.org/packages/20/2e/3434380e8110b76cd9eb00a363c484b050f949b4bbe84ba770bb8508a02c/pillow-12.0.0-pp311-pypy311_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:09f2d0abef9e4e2f349305a4f8cc784a8a6c2f58a8c4892eea13b10a943bd26e", size = 5313505, upload-time = "2025-10-15T18:24:07.137Z" },
-    { url = "https://files.pythonhosted.org/packages/57/ca/5a9d38900d9d74785141d6580950fe705de68af735ff6e727cb911b64740/pillow-12.0.0-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bdee52571a343d721fb2eb3b090a82d959ff37fc631e3f70422e0c2e029f3e76", size = 5963654, upload-time = "2025-10-15T18:24:09.579Z" },
-    { url = "https://files.pythonhosted.org/packages/95/7e/f896623c3c635a90537ac093c6a618ebe1a90d87206e42309cb5d98a1b9e/pillow-12.0.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:b290fd8aa38422444d4b50d579de197557f182ef1068b75f5aa8558638b8d0a5", size = 6997850, upload-time = "2025-10-15T18:24:11.495Z" },
+version = "12.1.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/d0/02/d52c733a2452ef1ffcc123b68e6606d07276b0e358db70eabad7e40042b7/pillow-12.1.0.tar.gz", hash = "sha256:5c5ae0a06e9ea030ab786b0251b32c7e4ce10e58d983c0d5c56029455180b5b9", size = 46977283, upload-time = "2026-01-02T09:13:29.892Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/fe/41/f73d92b6b883a579e79600d391f2e21cb0df767b2714ecbd2952315dfeef/pillow-12.1.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:fb125d860738a09d363a88daa0f59c4533529a90e564785e20fe875b200b6dbd", size = 5304089, upload-time = "2026-01-02T09:10:24.953Z" },
+    { url = "https://files.pythonhosted.org/packages/94/55/7aca2891560188656e4a91ed9adba305e914a4496800da6b5c0a15f09edf/pillow-12.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:cad302dc10fac357d3467a74a9561c90609768a6f73a1923b0fd851b6486f8b0", size = 4657815, upload-time = "2026-01-02T09:10:27.063Z" },
+    { url = "https://files.pythonhosted.org/packages/e9/d2/b28221abaa7b4c40b7dba948f0f6a708bd7342c4d47ce342f0ea39643974/pillow-12.1.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:a40905599d8079e09f25027423aed94f2823adaf2868940de991e53a449e14a8", size = 6222593, upload-time = "2026-01-02T09:10:29.115Z" },
+    { url = "https://files.pythonhosted.org/packages/71/b8/7a61fb234df6a9b0b479f69e66901209d89ff72a435b49933f9122f94cac/pillow-12.1.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:92a7fe4225365c5e3a8e598982269c6d6698d3e783b3b1ae979e7819f9cd55c1", size = 8027579, upload-time = "2026-01-02T09:10:31.182Z" },
+    { url = "https://files.pythonhosted.org/packages/ea/51/55c751a57cc524a15a0e3db20e5cde517582359508d62305a627e77fd295/pillow-12.1.0-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f10c98f49227ed8383d28174ee95155a675c4ed7f85e2e573b04414f7e371bda", size = 6335760, upload-time = "2026-01-02T09:10:33.02Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/7c/60e3e6f5e5891a1a06b4c910f742ac862377a6fe842f7184df4a274ce7bf/pillow-12.1.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8637e29d13f478bc4f153d8daa9ffb16455f0a6cb287da1b432fdad2bfbd66c7", size = 7027127, upload-time = "2026-01-02T09:10:35.009Z" },
+    { url = "https://files.pythonhosted.org/packages/06/37/49d47266ba50b00c27ba63a7c898f1bb41a29627ced8c09e25f19ebec0ff/pillow-12.1.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:21e686a21078b0f9cb8c8a961d99e6a4ddb88e0fc5ea6e130172ddddc2e5221a", size = 6449896, upload-time = "2026-01-02T09:10:36.793Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/e5/67fd87d2913902462cd9b79c6211c25bfe95fcf5783d06e1367d6d9a741f/pillow-12.1.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:2415373395a831f53933c23ce051021e79c8cd7979822d8cc478547a3f4da8ef", size = 7151345, upload-time = "2026-01-02T09:10:39.064Z" },
+    { url = "https://files.pythonhosted.org/packages/bd/15/f8c7abf82af68b29f50d77c227e7a1f87ce02fdc66ded9bf603bc3b41180/pillow-12.1.0-cp310-cp310-win32.whl", hash = "sha256:e75d3dba8fc1ddfec0cd752108f93b83b4f8d6ab40e524a95d35f016b9683b09", size = 6325568, upload-time = "2026-01-02T09:10:41.035Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/24/7d1c0e160b6b5ac2605ef7d8be537e28753c0db5363d035948073f5513d7/pillow-12.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:64efdf00c09e31efd754448a383ea241f55a994fd079866b92d2bbff598aad91", size = 7032367, upload-time = "2026-01-02T09:10:43.09Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/03/41c038f0d7a06099254c60f618d0ec7be11e79620fc23b8e85e5b31d9a44/pillow-12.1.0-cp310-cp310-win_arm64.whl", hash = "sha256:f188028b5af6b8fb2e9a76ac0f841a575bd1bd396e46ef0840d9b88a48fdbcea", size = 2452345, upload-time = "2026-01-02T09:10:44.795Z" },
+    { url = "https://files.pythonhosted.org/packages/43/c4/bf8328039de6cc22182c3ef007a2abfbbdab153661c0a9aa78af8d706391/pillow-12.1.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:a83e0850cb8f5ac975291ebfc4170ba481f41a28065277f7f735c202cd8e0af3", size = 5304057, upload-time = "2026-01-02T09:10:46.627Z" },
+    { url = "https://files.pythonhosted.org/packages/43/06/7264c0597e676104cc22ca73ee48f752767cd4b1fe084662620b17e10120/pillow-12.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b6e53e82ec2db0717eabb276aa56cf4e500c9a7cec2c2e189b55c24f65a3e8c0", size = 4657811, upload-time = "2026-01-02T09:10:49.548Z" },
+    { url = "https://files.pythonhosted.org/packages/72/64/f9189e44474610daf83da31145fa56710b627b5c4c0b9c235e34058f6b31/pillow-12.1.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:40a8e3b9e8773876d6e30daed22f016509e3987bab61b3b7fe309d7019a87451", size = 6232243, upload-time = "2026-01-02T09:10:51.62Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/30/0df458009be6a4caca4ca2c52975e6275c387d4e5c95544e34138b41dc86/pillow-12.1.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:800429ac32c9b72909c671aaf17ecd13110f823ddb7db4dfef412a5587c2c24e", size = 8037872, upload-time = "2026-01-02T09:10:53.446Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/86/95845d4eda4f4f9557e25381d70876aa213560243ac1a6d619c46caaedd9/pillow-12.1.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0b022eaaf709541b391ee069f0022ee5b36c709df71986e3f7be312e46f42c84", size = 6345398, upload-time = "2026-01-02T09:10:55.426Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/1f/8e66ab9be3aaf1435bc03edd1ebdf58ffcd17f7349c1d970cafe87af27d9/pillow-12.1.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1f345e7bc9d7f368887c712aa5054558bad44d2a301ddf9248599f4161abc7c0", size = 7034667, upload-time = "2026-01-02T09:10:57.11Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/f6/683b83cb9b1db1fb52b87951b1c0b99bdcfceaa75febf11406c19f82cb5e/pillow-12.1.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d70347c8a5b7ccd803ec0c85c8709f036e6348f1e6a5bf048ecd9c64d3550b8b", size = 6458743, upload-time = "2026-01-02T09:10:59.331Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/7d/de833d63622538c1d58ce5395e7c6cb7e7dce80decdd8bde4a484e095d9f/pillow-12.1.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:1fcc52d86ce7a34fd17cb04e87cfdb164648a3662a6f20565910a99653d66c18", size = 7159342, upload-time = "2026-01-02T09:11:01.82Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/40/50d86571c9e5868c42b81fe7da0c76ca26373f3b95a8dd675425f4a92ec1/pillow-12.1.0-cp311-cp311-win32.whl", hash = "sha256:3ffaa2f0659e2f740473bcf03c702c39a8d4b2b7ffc629052028764324842c64", size = 6328655, upload-time = "2026-01-02T09:11:04.556Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/af/b1d7e301c4cd26cd45d4af884d9ee9b6fab893b0ad2450d4746d74a6968c/pillow-12.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:806f3987ffe10e867bab0ddad45df1148a2b98221798457fa097ad85d6e8bc75", size = 7031469, upload-time = "2026-01-02T09:11:06.538Z" },
+    { url = "https://files.pythonhosted.org/packages/48/36/d5716586d887fb2a810a4a61518a327a1e21c8b7134c89283af272efe84b/pillow-12.1.0-cp311-cp311-win_arm64.whl", hash = "sha256:9f5fefaca968e700ad1a4a9de98bf0869a94e397fe3524c4c9450c1445252304", size = 2452515, upload-time = "2026-01-02T09:11:08.226Z" },
+    { url = "https://files.pythonhosted.org/packages/20/31/dc53fe21a2f2996e1b7d92bf671cdb157079385183ef7c1ae08b485db510/pillow-12.1.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:a332ac4ccb84b6dde65dbace8431f3af08874bf9770719d32a635c4ef411b18b", size = 5262642, upload-time = "2026-01-02T09:11:10.138Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/c1/10e45ac9cc79419cedf5121b42dcca5a50ad2b601fa080f58c22fb27626e/pillow-12.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:907bfa8a9cb790748a9aa4513e37c88c59660da3bcfffbd24a7d9e6abf224551", size = 4657464, upload-time = "2026-01-02T09:11:12.319Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/26/7b82c0ab7ef40ebede7a97c72d473bda5950f609f8e0c77b04af574a0ddb/pillow-12.1.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:efdc140e7b63b8f739d09a99033aa430accce485ff78e6d311973a67b6bf3208", size = 6234878, upload-time = "2026-01-02T09:11:14.096Z" },
+    { url = "https://files.pythonhosted.org/packages/76/25/27abc9792615b5e886ca9411ba6637b675f1b77af3104710ac7353fe5605/pillow-12.1.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bef9768cab184e7ae6e559c032e95ba8d07b3023c289f79a2bd36e8bf85605a5", size = 8044868, upload-time = "2026-01-02T09:11:15.903Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/ea/f200a4c36d836100e7bc738fc48cd963d3ba6372ebc8298a889e0cfc3359/pillow-12.1.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:742aea052cf5ab5034a53c3846165bc3ce88d7c38e954120db0ab867ca242661", size = 6349468, upload-time = "2026-01-02T09:11:17.631Z" },
+    { url = "https://files.pythonhosted.org/packages/11/8f/48d0b77ab2200374c66d344459b8958c86693be99526450e7aee714e03e4/pillow-12.1.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a6dfc2af5b082b635af6e08e0d1f9f1c4e04d17d4e2ca0ef96131e85eda6eb17", size = 7041518, upload-time = "2026-01-02T09:11:19.389Z" },
+    { url = "https://files.pythonhosted.org/packages/1d/23/c281182eb986b5d31f0a76d2a2c8cd41722d6fb8ed07521e802f9bba52de/pillow-12.1.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:609e89d9f90b581c8d16358c9087df76024cf058fa693dd3e1e1620823f39670", size = 6462829, upload-time = "2026-01-02T09:11:21.28Z" },
+    { url = "https://files.pythonhosted.org/packages/25/ef/7018273e0faac099d7b00982abdcc39142ae6f3bd9ceb06de09779c4a9d6/pillow-12.1.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:43b4899cfd091a9693a1278c4982f3e50f7fb7cff5153b05174b4afc9593b616", size = 7166756, upload-time = "2026-01-02T09:11:23.559Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/c8/993d4b7ab2e341fe02ceef9576afcf5830cdec640be2ac5bee1820d693d4/pillow-12.1.0-cp312-cp312-win32.whl", hash = "sha256:aa0c9cc0b82b14766a99fbe6084409972266e82f459821cd26997a488a7261a7", size = 6328770, upload-time = "2026-01-02T09:11:25.661Z" },
+    { url = "https://files.pythonhosted.org/packages/a7/87/90b358775a3f02765d87655237229ba64a997b87efa8ccaca7dd3e36e7a7/pillow-12.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:d70534cea9e7966169ad29a903b99fc507e932069a881d0965a1a84bb57f6c6d", size = 7033406, upload-time = "2026-01-02T09:11:27.474Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/cf/881b457eccacac9e5b2ddd97d5071fb6d668307c57cbf4e3b5278e06e536/pillow-12.1.0-cp312-cp312-win_arm64.whl", hash = "sha256:65b80c1ee7e14a87d6a068dd3b0aea268ffcabfe0498d38661b00c5b4b22e74c", size = 2452612, upload-time = "2026-01-02T09:11:29.309Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/c7/2530a4aa28248623e9d7f27316b42e27c32ec410f695929696f2e0e4a778/pillow-12.1.0-cp313-cp313-ios_13_0_arm64_iphoneos.whl", hash = "sha256:7b5dd7cbae20285cdb597b10eb5a2c13aa9de6cde9bb64a3c1317427b1db1ae1", size = 4062543, upload-time = "2026-01-02T09:11:31.566Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/1f/40b8eae823dc1519b87d53c30ed9ef085506b05281d313031755c1705f73/pillow-12.1.0-cp313-cp313-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:29a4cef9cb672363926f0470afc516dbf7305a14d8c54f7abbb5c199cd8f8179", size = 4138373, upload-time = "2026-01-02T09:11:33.367Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/77/6fa60634cf06e52139fd0e89e5bbf055e8166c691c42fb162818b7fda31d/pillow-12.1.0-cp313-cp313-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:681088909d7e8fa9e31b9799aaa59ba5234c58e5e4f1951b4c4d1082a2e980e0", size = 3601241, upload-time = "2026-01-02T09:11:35.011Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/bf/28ab865de622e14b747f0cd7877510848252d950e43002e224fb1c9ababf/pillow-12.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:983976c2ab753166dc66d36af6e8ec15bb511e4a25856e2227e5f7e00a160587", size = 5262410, upload-time = "2026-01-02T09:11:36.682Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/34/583420a1b55e715937a85bd48c5c0991598247a1fd2eb5423188e765ea02/pillow-12.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:db44d5c160a90df2d24a24760bbd37607d53da0b34fb546c4c232af7192298ac", size = 4657312, upload-time = "2026-01-02T09:11:38.535Z" },
+    { url = "https://files.pythonhosted.org/packages/1d/fd/f5a0896839762885b3376ff04878f86ab2b097c2f9a9cdccf4eda8ba8dc0/pillow-12.1.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:6b7a9d1db5dad90e2991645874f708e87d9a3c370c243c2d7684d28f7e133e6b", size = 6232605, upload-time = "2026-01-02T09:11:40.602Z" },
+    { url = "https://files.pythonhosted.org/packages/98/aa/938a09d127ac1e70e6ed467bd03834350b33ef646b31edb7452d5de43792/pillow-12.1.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6258f3260986990ba2fa8a874f8b6e808cf5abb51a94015ca3dc3c68aa4f30ea", size = 8041617, upload-time = "2026-01-02T09:11:42.721Z" },
+    { url = "https://files.pythonhosted.org/packages/17/e8/538b24cb426ac0186e03f80f78bc8dc7246c667f58b540bdd57c71c9f79d/pillow-12.1.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e115c15e3bc727b1ca3e641a909f77f8ca72a64fff150f666fcc85e57701c26c", size = 6346509, upload-time = "2026-01-02T09:11:44.955Z" },
+    { url = "https://files.pythonhosted.org/packages/01/9a/632e58ec89a32738cabfd9ec418f0e9898a2b4719afc581f07c04a05e3c9/pillow-12.1.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6741e6f3074a35e47c77b23a4e4f2d90db3ed905cb1c5e6e0d49bff2045632bc", size = 7038117, upload-time = "2026-01-02T09:11:46.736Z" },
+    { url = "https://files.pythonhosted.org/packages/c7/a2/d40308cf86eada842ca1f3ffa45d0ca0df7e4ab33c83f81e73f5eaed136d/pillow-12.1.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:935b9d1aed48fcfb3f838caac506f38e29621b44ccc4f8a64d575cb1b2a88644", size = 6460151, upload-time = "2026-01-02T09:11:48.625Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/88/f5b058ad6453a085c5266660a1417bdad590199da1b32fb4efcff9d33b05/pillow-12.1.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5fee4c04aad8932da9f8f710af2c1a15a83582cfb884152a9caa79d4efcdbf9c", size = 7164534, upload-time = "2026-01-02T09:11:50.445Z" },
+    { url = "https://files.pythonhosted.org/packages/19/ce/c17334caea1db789163b5d855a5735e47995b0b5dc8745e9a3605d5f24c0/pillow-12.1.0-cp313-cp313-win32.whl", hash = "sha256:a786bf667724d84aa29b5db1c61b7bfdde380202aaca12c3461afd6b71743171", size = 6332551, upload-time = "2026-01-02T09:11:52.234Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/07/74a9d941fa45c90a0d9465098fe1ec85de3e2afbdc15cc4766622d516056/pillow-12.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:461f9dfdafa394c59cd6d818bdfdbab4028b83b02caadaff0ffd433faf4c9a7a", size = 7040087, upload-time = "2026-01-02T09:11:54.822Z" },
+    { url = "https://files.pythonhosted.org/packages/88/09/c99950c075a0e9053d8e880595926302575bc742b1b47fe1bbcc8d388d50/pillow-12.1.0-cp313-cp313-win_arm64.whl", hash = "sha256:9212d6b86917a2300669511ed094a9406888362e085f2431a7da985a6b124f45", size = 2452470, upload-time = "2026-01-02T09:11:56.522Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/ba/970b7d85ba01f348dee4d65412476321d40ee04dcb51cd3735b9dc94eb58/pillow-12.1.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:00162e9ca6d22b7c3ee8e61faa3c3253cd19b6a37f126cad04f2f88b306f557d", size = 5264816, upload-time = "2026-01-02T09:11:58.227Z" },
+    { url = "https://files.pythonhosted.org/packages/10/60/650f2fb55fdba7a510d836202aa52f0baac633e50ab1cf18415d332188fb/pillow-12.1.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:7d6daa89a00b58c37cb1747ec9fb7ac3bc5ffd5949f5888657dfddde6d1312e0", size = 4660472, upload-time = "2026-01-02T09:12:00.798Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/c0/5273a99478956a099d533c4f46cbaa19fd69d606624f4334b85e50987a08/pillow-12.1.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:e2479c7f02f9d505682dc47df8c0ea1fc5e264c4d1629a5d63fe3e2334b89554", size = 6268974, upload-time = "2026-01-02T09:12:02.572Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/26/0bf714bc2e73d5267887d47931d53c4ceeceea6978148ed2ab2a4e6463c4/pillow-12.1.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f188d580bd870cda1e15183790d1cc2fa78f666e76077d103edf048eed9c356e", size = 8073070, upload-time = "2026-01-02T09:12:04.75Z" },
+    { url = "https://files.pythonhosted.org/packages/43/cf/1ea826200de111a9d65724c54f927f3111dc5ae297f294b370a670c17786/pillow-12.1.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0fde7ec5538ab5095cc02df38ee99b0443ff0e1c847a045554cf5f9af1f4aa82", size = 6380176, upload-time = "2026-01-02T09:12:06.626Z" },
+    { url = "https://files.pythonhosted.org/packages/03/e0/7938dd2b2013373fd85d96e0f38d62b7a5a262af21ac274250c7ca7847c9/pillow-12.1.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0ed07dca4a8464bada6139ab38f5382f83e5f111698caf3191cb8dbf27d908b4", size = 7067061, upload-time = "2026-01-02T09:12:08.624Z" },
+    { url = "https://files.pythonhosted.org/packages/86/ad/a2aa97d37272a929a98437a8c0ac37b3cf012f4f8721e1bd5154699b2518/pillow-12.1.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:f45bd71d1fa5e5749587613037b172e0b3b23159d1c00ef2fc920da6f470e6f0", size = 6491824, upload-time = "2026-01-02T09:12:10.488Z" },
+    { url = "https://files.pythonhosted.org/packages/a4/44/80e46611b288d51b115826f136fb3465653c28f491068a72d3da49b54cd4/pillow-12.1.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:277518bf4fe74aa91489e1b20577473b19ee70fb97c374aa50830b279f25841b", size = 7190911, upload-time = "2026-01-02T09:12:12.772Z" },
+    { url = "https://files.pythonhosted.org/packages/86/77/eacc62356b4cf81abe99ff9dbc7402750044aed02cfd6a503f7c6fc11f3e/pillow-12.1.0-cp313-cp313t-win32.whl", hash = "sha256:7315f9137087c4e0ee73a761b163fc9aa3b19f5f606a7fc08d83fd3e4379af65", size = 6336445, upload-time = "2026-01-02T09:12:14.775Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/3c/57d81d0b74d218706dafccb87a87ea44262c43eef98eb3b164fd000e0491/pillow-12.1.0-cp313-cp313t-win_amd64.whl", hash = "sha256:0ddedfaa8b5f0b4ffbc2fa87b556dc59f6bb4ecb14a53b33f9189713ae8053c0", size = 7045354, upload-time = "2026-01-02T09:12:16.599Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/82/8b9b97bba2e3576a340f93b044a3a3a09841170ab4c1eb0d5c93469fd32f/pillow-12.1.0-cp313-cp313t-win_arm64.whl", hash = "sha256:80941e6d573197a0c28f394753de529bb436b1ca990ed6e765cf42426abc39f8", size = 2454547, upload-time = "2026-01-02T09:12:18.704Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/87/bdf971d8bbcf80a348cc3bacfcb239f5882100fe80534b0ce67a784181d8/pillow-12.1.0-cp314-cp314-ios_13_0_arm64_iphoneos.whl", hash = "sha256:5cb7bc1966d031aec37ddb9dcf15c2da5b2e9f7cc3ca7c54473a20a927e1eb91", size = 4062533, upload-time = "2026-01-02T09:12:20.791Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/4f/5eb37a681c68d605eb7034c004875c81f86ec9ef51f5be4a63eadd58859a/pillow-12.1.0-cp314-cp314-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:97e9993d5ed946aba26baf9c1e8cf18adbab584b99f452ee72f7ee8acb882796", size = 4138546, upload-time = "2026-01-02T09:12:23.664Z" },
+    { url = "https://files.pythonhosted.org/packages/11/6d/19a95acb2edbace40dcd582d077b991646b7083c41b98da4ed7555b59733/pillow-12.1.0-cp314-cp314-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:414b9a78e14ffeb98128863314e62c3f24b8a86081066625700b7985b3f529bd", size = 3601163, upload-time = "2026-01-02T09:12:26.338Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/36/2b8138e51cb42e4cc39c3297713455548be855a50558c3ac2beebdc251dd/pillow-12.1.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:e6bdb408f7c9dd2a5ff2b14a3b0bb6d4deb29fb9961e6eb3ae2031ae9a5cec13", size = 5266086, upload-time = "2026-01-02T09:12:28.782Z" },
+    { url = "https://files.pythonhosted.org/packages/53/4b/649056e4d22e1caa90816bf99cef0884aed607ed38075bd75f091a607a38/pillow-12.1.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:3413c2ae377550f5487991d444428f1a8ae92784aac79caa8b1e3b89b175f77e", size = 4657344, upload-time = "2026-01-02T09:12:31.117Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/6b/c5742cea0f1ade0cd61485dc3d81f05261fc2276f537fbdc00802de56779/pillow-12.1.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:e5dcbe95016e88437ecf33544ba5db21ef1b8dd6e1b434a2cb2a3d605299e643", size = 6232114, upload-time = "2026-01-02T09:12:32.936Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/8f/9f521268ce22d63991601aafd3d48d5ff7280a246a1ef62d626d67b44064/pillow-12.1.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d0a7735df32ccbcc98b98a1ac785cc4b19b580be1bdf0aeb5c03223220ea09d5", size = 8042708, upload-time = "2026-01-02T09:12:34.78Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/eb/257f38542893f021502a1bbe0c2e883c90b5cff26cc33b1584a841a06d30/pillow-12.1.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0c27407a2d1b96774cbc4a7594129cc027339fd800cd081e44497722ea1179de", size = 6347762, upload-time = "2026-01-02T09:12:36.748Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/5a/8ba375025701c09b309e8d5163c5a4ce0102fa86bbf8800eb0d7ac87bc51/pillow-12.1.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:15c794d74303828eaa957ff8070846d0efe8c630901a1c753fdc63850e19ecd9", size = 7039265, upload-time = "2026-01-02T09:12:39.082Z" },
+    { url = "https://files.pythonhosted.org/packages/cf/dc/cf5e4cdb3db533f539e88a7bbf9f190c64ab8a08a9bc7a4ccf55067872e4/pillow-12.1.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c990547452ee2800d8506c4150280757f88532f3de2a58e3022e9b179107862a", size = 6462341, upload-time = "2026-01-02T09:12:40.946Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/47/0291a25ac9550677e22eda48510cfc4fa4b2ef0396448b7fbdc0a6946309/pillow-12.1.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:b63e13dd27da389ed9475b3d28510f0f954bca0041e8e551b2a4eb1eab56a39a", size = 7165395, upload-time = "2026-01-02T09:12:42.706Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/4c/e005a59393ec4d9416be06e6b45820403bb946a778e39ecec62f5b2b991e/pillow-12.1.0-cp314-cp314-win32.whl", hash = "sha256:1a949604f73eb07a8adab38c4fe50791f9919344398bdc8ac6b307f755fc7030", size = 6431413, upload-time = "2026-01-02T09:12:44.944Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/af/f23697f587ac5f9095d67e31b81c95c0249cd461a9798a061ed6709b09b5/pillow-12.1.0-cp314-cp314-win_amd64.whl", hash = "sha256:4f9f6a650743f0ddee5593ac9e954ba1bdbc5e150bc066586d4f26127853ab94", size = 7176779, upload-time = "2026-01-02T09:12:46.727Z" },
+    { url = "https://files.pythonhosted.org/packages/b3/36/6a51abf8599232f3e9afbd16d52829376a68909fe14efe29084445db4b73/pillow-12.1.0-cp314-cp314-win_arm64.whl", hash = "sha256:808b99604f7873c800c4840f55ff389936ef1948e4e87645eaf3fccbc8477ac4", size = 2543105, upload-time = "2026-01-02T09:12:49.243Z" },
+    { url = "https://files.pythonhosted.org/packages/82/54/2e1dd20c8749ff225080d6ba465a0cab4387f5db0d1c5fb1439e2d99923f/pillow-12.1.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:bc11908616c8a283cf7d664f77411a5ed2a02009b0097ff8abbba5e79128ccf2", size = 5268571, upload-time = "2026-01-02T09:12:51.11Z" },
+    { url = "https://files.pythonhosted.org/packages/57/61/571163a5ef86ec0cf30d265ac2a70ae6fc9e28413d1dc94fa37fae6bda89/pillow-12.1.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:896866d2d436563fa2a43a9d72f417874f16b5545955c54a64941e87c1376c61", size = 4660426, upload-time = "2026-01-02T09:12:52.865Z" },
+    { url = "https://files.pythonhosted.org/packages/5e/e1/53ee5163f794aef1bf84243f755ee6897a92c708505350dd1923f4afec48/pillow-12.1.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8e178e3e99d3c0ea8fc64b88447f7cac8ccf058af422a6cedc690d0eadd98c51", size = 6269908, upload-time = "2026-01-02T09:12:54.884Z" },
+    { url = "https://files.pythonhosted.org/packages/bc/0b/b4b4106ff0ee1afa1dc599fde6ab230417f800279745124f6c50bcffed8e/pillow-12.1.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:079af2fb0c599c2ec144ba2c02766d1b55498e373b3ac64687e43849fbbef5bc", size = 8074733, upload-time = "2026-01-02T09:12:56.802Z" },
+    { url = "https://files.pythonhosted.org/packages/19/9f/80b411cbac4a732439e629a26ad3ef11907a8c7fc5377b7602f04f6fe4e7/pillow-12.1.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bdec5e43377761c5dbca620efb69a77f6855c5a379e32ac5b158f54c84212b14", size = 6381431, upload-time = "2026-01-02T09:12:58.823Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/b7/d65c45db463b66ecb6abc17c6ba6917a911202a07662247e1355ce1789e7/pillow-12.1.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:565c986f4b45c020f5421a4cea13ef294dde9509a8577f29b2fc5edc7587fff8", size = 7068529, upload-time = "2026-01-02T09:13:00.885Z" },
+    { url = "https://files.pythonhosted.org/packages/50/96/dfd4cd726b4a45ae6e3c669fc9e49deb2241312605d33aba50499e9d9bd1/pillow-12.1.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:43aca0a55ce1eefc0aefa6253661cb54571857b1a7b2964bd8a1e3ef4b729924", size = 6492981, upload-time = "2026-01-02T09:13:03.314Z" },
+    { url = "https://files.pythonhosted.org/packages/4d/1c/b5dc52cf713ae46033359c5ca920444f18a6359ce1020dd3e9c553ea5bc6/pillow-12.1.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:0deedf2ea233722476b3a81e8cdfbad786f7adbed5d848469fa59fe52396e4ef", size = 7191878, upload-time = "2026-01-02T09:13:05.276Z" },
+    { url = "https://files.pythonhosted.org/packages/53/26/c4188248bd5edaf543864fe4834aebe9c9cb4968b6f573ce014cc42d0720/pillow-12.1.0-cp314-cp314t-win32.whl", hash = "sha256:b17fbdbe01c196e7e159aacb889e091f28e61020a8abeac07b68079b6e626988", size = 6438703, upload-time = "2026-01-02T09:13:07.491Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/0e/69ed296de8ea05cb03ee139cee600f424ca166e632567b2d66727f08c7ed/pillow-12.1.0-cp314-cp314t-win_amd64.whl", hash = "sha256:27b9baecb428899db6c0de572d6d305cfaf38ca1596b5c0542a5182e3e74e8c6", size = 7182927, upload-time = "2026-01-02T09:13:09.841Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/f5/68334c015eed9b5cff77814258717dec591ded209ab5b6fb70e2ae873d1d/pillow-12.1.0-cp314-cp314t-win_arm64.whl", hash = "sha256:f61333d817698bdcdd0f9d7793e365ac3d2a21c1f1eb02b32ad6aefb8d8ea831", size = 2545104, upload-time = "2026-01-02T09:13:12.068Z" },
+    { url = "https://files.pythonhosted.org/packages/8b/bc/224b1d98cffd7164b14707c91aac83c07b047fbd8f58eba4066a3e53746a/pillow-12.1.0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:ca94b6aac0d7af2a10ba08c0f888b3d5114439b6b3ef39968378723622fed377", size = 5228605, upload-time = "2026-01-02T09:13:14.084Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/ca/49ca7769c4550107de049ed85208240ba0f330b3f2e316f24534795702ce/pillow-12.1.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:351889afef0f485b84078ea40fe33727a0492b9af3904661b0abbafee0355b72", size = 4622245, upload-time = "2026-01-02T09:13:15.964Z" },
+    { url = "https://files.pythonhosted.org/packages/73/48/fac807ce82e5955bcc2718642b94b1bd22a82a6d452aea31cbb678cddf12/pillow-12.1.0-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:bb0984b30e973f7e2884362b7d23d0a348c7143ee559f38ef3eaab640144204c", size = 5247593, upload-time = "2026-01-02T09:13:17.913Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/95/3e0742fe358c4664aed4fd05d5f5373dcdad0b27af52aa0972568541e3f4/pillow-12.1.0-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:84cabc7095dd535ca934d57e9ce2a72ffd216e435a84acb06b2277b1de2689bd", size = 6989008, upload-time = "2026-01-02T09:13:20.083Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/74/fe2ac378e4e202e56d50540d92e1ef4ff34ed687f3c60f6a121bcf99437e/pillow-12.1.0-pp311-pypy311_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:53d8b764726d3af1a138dd353116f774e3862ec7e3794e0c8781e30db0f35dfc", size = 5313824, upload-time = "2026-01-02T09:13:22.405Z" },
+    { url = "https://files.pythonhosted.org/packages/f3/77/2a60dee1adee4e2655ac328dd05c02a955c1cd683b9f1b82ec3feb44727c/pillow-12.1.0-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5da841d81b1a05ef940a8567da92decaa15bc4d7dedb540a8c219ad83d91808a", size = 5963278, upload-time = "2026-01-02T09:13:24.706Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/71/64e9b1c7f04ae0027f788a248e6297d7fcc29571371fe7d45495a78172c0/pillow-12.1.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:75af0b4c229ac519b155028fa1be632d812a519abba9b46b20e50c6caa184f19", size = 7029809, upload-time = "2026-01-02T09:13:26.541Z" },
 ]
 
 [[package]]
@@ -3907,28 +3905,30 @@ wheels = [
 
 [[package]]
 name = "psutil"
-version = "7.1.3"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/e1/88/bdd0a41e5857d5d703287598cbf08dad90aed56774ea52ae071bae9071b6/psutil-7.1.3.tar.gz", hash = "sha256:6c86281738d77335af7aec228328e944b30930899ea760ecf33a4dba66be5e74", size = 489059, upload-time = "2025-11-02T12:25:54.619Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/bd/93/0c49e776b8734fef56ec9c5c57f923922f2cf0497d62e0f419465f28f3d0/psutil-7.1.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0005da714eee687b4b8decd3d6cc7c6db36215c9e74e5ad2264b90c3df7d92dc", size = 239751, upload-time = "2025-11-02T12:25:58.161Z" },
-    { url = "https://files.pythonhosted.org/packages/6f/8d/b31e39c769e70780f007969815195a55c81a63efebdd4dbe9e7a113adb2f/psutil-7.1.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:19644c85dcb987e35eeeaefdc3915d059dac7bd1167cdcdbf27e0ce2df0c08c0", size = 240368, upload-time = "2025-11-02T12:26:00.491Z" },
-    { url = "https://files.pythonhosted.org/packages/62/61/23fd4acc3c9eebbf6b6c78bcd89e5d020cfde4acf0a9233e9d4e3fa698b4/psutil-7.1.3-cp313-cp313t-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:95ef04cf2e5ba0ab9eaafc4a11eaae91b44f4ef5541acd2ee91d9108d00d59a7", size = 287134, upload-time = "2025-11-02T12:26:02.613Z" },
-    { url = "https://files.pythonhosted.org/packages/30/1c/f921a009ea9ceb51aa355cb0cc118f68d354db36eae18174bab63affb3e6/psutil-7.1.3-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1068c303be3a72f8e18e412c5b2a8f6d31750fb152f9cb106b54090296c9d251", size = 289904, upload-time = "2025-11-02T12:26:05.207Z" },
-    { url = "https://files.pythonhosted.org/packages/a6/82/62d68066e13e46a5116df187d319d1724b3f437ddd0f958756fc052677f4/psutil-7.1.3-cp313-cp313t-win_amd64.whl", hash = "sha256:18349c5c24b06ac5612c0428ec2a0331c26443d259e2a0144a9b24b4395b58fa", size = 249642, upload-time = "2025-11-02T12:26:07.447Z" },
-    { url = "https://files.pythonhosted.org/packages/df/ad/c1cd5fe965c14a0392112f68362cfceb5230819dbb5b1888950d18a11d9f/psutil-7.1.3-cp313-cp313t-win_arm64.whl", hash = "sha256:c525ffa774fe4496282fb0b1187725793de3e7c6b29e41562733cae9ada151ee", size = 245518, upload-time = "2025-11-02T12:26:09.719Z" },
-    { url = "https://files.pythonhosted.org/packages/2e/bb/6670bded3e3236eb4287c7bcdc167e9fae6e1e9286e437f7111caed2f909/psutil-7.1.3-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:b403da1df4d6d43973dc004d19cee3b848e998ae3154cc8097d139b77156c353", size = 239843, upload-time = "2025-11-02T12:26:11.968Z" },
-    { url = "https://files.pythonhosted.org/packages/b8/66/853d50e75a38c9a7370ddbeefabdd3d3116b9c31ef94dc92c6729bc36bec/psutil-7.1.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:ad81425efc5e75da3f39b3e636293360ad8d0b49bed7df824c79764fb4ba9b8b", size = 240369, upload-time = "2025-11-02T12:26:14.358Z" },
-    { url = "https://files.pythonhosted.org/packages/41/bd/313aba97cb5bfb26916dc29cf0646cbe4dd6a89ca69e8c6edce654876d39/psutil-7.1.3-cp314-cp314t-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8f33a3702e167783a9213db10ad29650ebf383946e91bc77f28a5eb083496bc9", size = 288210, upload-time = "2025-11-02T12:26:16.699Z" },
-    { url = "https://files.pythonhosted.org/packages/c2/fa/76e3c06e760927a0cfb5705eb38164254de34e9bd86db656d4dbaa228b04/psutil-7.1.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:fac9cd332c67f4422504297889da5ab7e05fd11e3c4392140f7370f4208ded1f", size = 291182, upload-time = "2025-11-02T12:26:18.848Z" },
-    { url = "https://files.pythonhosted.org/packages/0f/1d/5774a91607035ee5078b8fd747686ebec28a962f178712de100d00b78a32/psutil-7.1.3-cp314-cp314t-win_amd64.whl", hash = "sha256:3792983e23b69843aea49c8f5b8f115572c5ab64c153bada5270086a2123c7e7", size = 250466, upload-time = "2025-11-02T12:26:21.183Z" },
-    { url = "https://files.pythonhosted.org/packages/00/ca/e426584bacb43a5cb1ac91fae1937f478cd8fbe5e4ff96574e698a2c77cd/psutil-7.1.3-cp314-cp314t-win_arm64.whl", hash = "sha256:31d77fcedb7529f27bb3a0472bea9334349f9a04160e8e6e5020f22c59893264", size = 245756, upload-time = "2025-11-02T12:26:23.148Z" },
-    { url = "https://files.pythonhosted.org/packages/ef/94/46b9154a800253e7ecff5aaacdf8ebf43db99de4a2dfa18575b02548654e/psutil-7.1.3-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:2bdbcd0e58ca14996a42adf3621a6244f1bb2e2e528886959c72cf1e326677ab", size = 238359, upload-time = "2025-11-02T12:26:25.284Z" },
-    { url = "https://files.pythonhosted.org/packages/68/3a/9f93cff5c025029a36d9a92fef47220ab4692ee7f2be0fba9f92813d0cb8/psutil-7.1.3-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:bc31fa00f1fbc3c3802141eede66f3a2d51d89716a194bf2cd6fc68310a19880", size = 239171, upload-time = "2025-11-02T12:26:27.23Z" },
-    { url = "https://files.pythonhosted.org/packages/ce/b1/5f49af514f76431ba4eea935b8ad3725cdeb397e9245ab919dbc1d1dc20f/psutil-7.1.3-cp36-abi3-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3bb428f9f05c1225a558f53e30ccbad9930b11c3fc206836242de1091d3e7dd3", size = 263261, upload-time = "2025-11-02T12:26:29.48Z" },
-    { url = "https://files.pythonhosted.org/packages/e0/95/992c8816a74016eb095e73585d747e0a8ea21a061ed3689474fabb29a395/psutil-7.1.3-cp36-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:56d974e02ca2c8eb4812c3f76c30e28836fffc311d55d979f1465c1feeb2b68b", size = 264635, upload-time = "2025-11-02T12:26:31.74Z" },
-    { url = "https://files.pythonhosted.org/packages/55/4c/c3ed1a622b6ae2fd3c945a366e64eb35247a31e4db16cf5095e269e8eb3c/psutil-7.1.3-cp37-abi3-win_amd64.whl", hash = "sha256:f39c2c19fe824b47484b96f9692932248a54c43799a84282cfe58d05a6449efd", size = 247633, upload-time = "2025-11-02T12:26:33.887Z" },
-    { url = "https://files.pythonhosted.org/packages/c9/ad/33b2ccec09bf96c2b2ef3f9a6f66baac8253d7565d8839e024a6b905d45d/psutil-7.1.3-cp37-abi3-win_arm64.whl", hash = "sha256:bd0d69cee829226a761e92f28140bec9a5ee9d5b4fb4b0cc589068dbfff559b1", size = 244608, upload-time = "2025-11-02T12:26:36.136Z" },
+version = "7.2.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/73/cb/09e5184fb5fc0358d110fc3ca7f6b1d033800734d34cac10f4136cfac10e/psutil-7.2.1.tar.gz", hash = "sha256:f7583aec590485b43ca601dd9cea0dcd65bd7bb21d30ef4ddbf4ea6b5ed1bdd3", size = 490253, upload-time = "2025-12-29T08:26:00.169Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/77/8e/f0c242053a368c2aa89584ecd1b054a18683f13d6e5a318fc9ec36582c94/psutil-7.2.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:ba9f33bb525b14c3ea563b2fd521a84d2fa214ec59e3e6a2858f78d0844dd60d", size = 129624, upload-time = "2025-12-29T08:26:04.255Z" },
+    { url = "https://files.pythonhosted.org/packages/26/97/a58a4968f8990617decee234258a2b4fc7cd9e35668387646c1963e69f26/psutil-7.2.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:81442dac7abfc2f4f4385ea9e12ddf5a796721c0f6133260687fec5c3780fa49", size = 130132, upload-time = "2025-12-29T08:26:06.228Z" },
+    { url = "https://files.pythonhosted.org/packages/db/6d/ed44901e830739af5f72a85fa7ec5ff1edea7f81bfbf4875e409007149bd/psutil-7.2.1-cp313-cp313t-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ea46c0d060491051d39f0d2cff4f98d5c72b288289f57a21556cc7d504db37fc", size = 180612, upload-time = "2025-12-29T08:26:08.276Z" },
+    { url = "https://files.pythonhosted.org/packages/c7/65/b628f8459bca4efbfae50d4bf3feaab803de9a160b9d5f3bd9295a33f0c2/psutil-7.2.1-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:35630d5af80d5d0d49cfc4d64c1c13838baf6717a13effb35869a5919b854cdf", size = 183201, upload-time = "2025-12-29T08:26:10.622Z" },
+    { url = "https://files.pythonhosted.org/packages/fb/23/851cadc9764edcc18f0effe7d0bf69f727d4cf2442deb4a9f78d4e4f30f2/psutil-7.2.1-cp313-cp313t-win_amd64.whl", hash = "sha256:923f8653416604e356073e6e0bccbe7c09990acef442def2f5640dd0faa9689f", size = 139081, upload-time = "2025-12-29T08:26:12.483Z" },
+    { url = "https://files.pythonhosted.org/packages/59/82/d63e8494ec5758029f31c6cb06d7d161175d8281e91d011a4a441c8a43b5/psutil-7.2.1-cp313-cp313t-win_arm64.whl", hash = "sha256:cfbe6b40ca48019a51827f20d830887b3107a74a79b01ceb8cc8de4ccb17b672", size = 134767, upload-time = "2025-12-29T08:26:14.528Z" },
+    { url = "https://files.pythonhosted.org/packages/05/c2/5fb764bd61e40e1fe756a44bd4c21827228394c17414ade348e28f83cd79/psutil-7.2.1-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:494c513ccc53225ae23eec7fe6e1482f1b8a44674241b54561f755a898650679", size = 129716, upload-time = "2025-12-29T08:26:16.017Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/d2/935039c20e06f615d9ca6ca0ab756cf8408a19d298ffaa08666bc18dc805/psutil-7.2.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:3fce5f92c22b00cdefd1645aa58ab4877a01679e901555067b1bd77039aa589f", size = 130133, upload-time = "2025-12-29T08:26:18.009Z" },
+    { url = "https://files.pythonhosted.org/packages/77/69/19f1eb0e01d24c2b3eacbc2f78d3b5add8a89bf0bb69465bc8d563cc33de/psutil-7.2.1-cp314-cp314t-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:93f3f7b0bb07711b49626e7940d6fe52aa9940ad86e8f7e74842e73189712129", size = 181518, upload-time = "2025-12-29T08:26:20.241Z" },
+    { url = "https://files.pythonhosted.org/packages/e1/6d/7e18b1b4fa13ad370787626c95887b027656ad4829c156bb6569d02f3262/psutil-7.2.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d34d2ca888208eea2b5c68186841336a7f5e0b990edec929be909353a202768a", size = 184348, upload-time = "2025-12-29T08:26:22.215Z" },
+    { url = "https://files.pythonhosted.org/packages/98/60/1672114392dd879586d60dd97896325df47d9a130ac7401318005aab28ec/psutil-7.2.1-cp314-cp314t-win_amd64.whl", hash = "sha256:2ceae842a78d1603753561132d5ad1b2f8a7979cb0c283f5b52fb4e6e14b1a79", size = 140400, upload-time = "2025-12-29T08:26:23.993Z" },
+    { url = "https://files.pythonhosted.org/packages/fb/7b/d0e9d4513c46e46897b46bcfc410d51fc65735837ea57a25170f298326e6/psutil-7.2.1-cp314-cp314t-win_arm64.whl", hash = "sha256:08a2f175e48a898c8eb8eace45ce01777f4785bc744c90aa2cc7f2fa5462a266", size = 135430, upload-time = "2025-12-29T08:26:25.999Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/cf/5180eb8c8bdf6a503c6919f1da28328bd1e6b3b1b5b9d5b01ae64f019616/psutil-7.2.1-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:b2e953fcfaedcfbc952b44744f22d16575d3aa78eb4f51ae74165b4e96e55f42", size = 128137, upload-time = "2025-12-29T08:26:27.759Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/2c/78e4a789306a92ade5000da4f5de3255202c534acdadc3aac7b5458fadef/psutil-7.2.1-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:05cc68dbb8c174828624062e73078e7e35406f4ca2d0866c272c2410d8ef06d1", size = 128947, upload-time = "2025-12-29T08:26:29.548Z" },
+    { url = "https://files.pythonhosted.org/packages/29/f8/40e01c350ad9a2b3cb4e6adbcc8a83b17ee50dd5792102b6142385937db5/psutil-7.2.1-cp36-abi3-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5e38404ca2bb30ed7267a46c02f06ff842e92da3bb8c5bfdadbd35a5722314d8", size = 154694, upload-time = "2025-12-29T08:26:32.147Z" },
+    { url = "https://files.pythonhosted.org/packages/06/e4/b751cdf839c011a9714a783f120e6a86b7494eb70044d7d81a25a5cd295f/psutil-7.2.1-cp36-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ab2b98c9fc19f13f59628d94df5cc4cc4844bc572467d113a8b517d634e362c6", size = 156136, upload-time = "2025-12-29T08:26:34.079Z" },
+    { url = "https://files.pythonhosted.org/packages/44/ad/bbf6595a8134ee1e94a4487af3f132cef7fce43aef4a93b49912a48c3af7/psutil-7.2.1-cp36-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:f78baafb38436d5a128f837fab2d92c276dfb48af01a240b861ae02b2413ada8", size = 148108, upload-time = "2025-12-29T08:26:36.225Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/15/dd6fd869753ce82ff64dcbc18356093471a5a5adf4f77ed1f805d473d859/psutil-7.2.1-cp36-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:99a4cd17a5fdd1f3d014396502daa70b5ec21bf4ffe38393e152f8e449757d67", size = 147402, upload-time = "2025-12-29T08:26:39.21Z" },
+    { url = "https://files.pythonhosted.org/packages/34/68/d9317542e3f2b180c4306e3f45d3c922d7e86d8ce39f941bb9e2e9d8599e/psutil-7.2.1-cp37-abi3-win_amd64.whl", hash = "sha256:b1b0671619343aa71c20ff9767eced0483e4fc9e1f489d50923738caf6a03c17", size = 136938, upload-time = "2025-12-29T08:26:41.036Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/73/2ce007f4198c80fcf2cb24c169884f833fe93fbc03d55d302627b094ee91/psutil-7.2.1-cp37-abi3-win_arm64.whl", hash = "sha256:0d67c1822c355aa6f7314d92018fb4268a76668a536f133599b91edd48759442", size = 133836, upload-time = "2025-12-29T08:26:43.086Z" },
 ]
 
 [[package]]
@@ -4245,39 +4245,37 @@ wheels = [
 
 [[package]]
 name = "pynacl"
-version = "1.6.1"
+version = "1.6.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "cffi", marker = "platform_python_implementation != 'PyPy' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/b2/46/aeca065d227e2265125aea590c9c47fbf5786128c9400ee0eb7c88931f06/pynacl-1.6.1.tar.gz", hash = "sha256:8d361dac0309f2b6ad33b349a56cd163c98430d409fa503b10b70b3ad66eaa1d", size = 3506616, upload-time = "2025-11-10T16:02:13.195Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/75/d6/4b2dca33ed512de8f54e5c6074aa06eaeb225bfbcd9b16f33a414389d6bd/pynacl-1.6.1-cp314-cp314t-macosx_10_10_universal2.whl", hash = "sha256:7d7c09749450c385301a3c20dca967a525152ae4608c0a096fe8464bfc3df93d", size = 389109, upload-time = "2025-11-10T16:01:28.79Z" },
-    { url = "https://files.pythonhosted.org/packages/3c/30/e8dbb8ff4fa2559bbbb2187ba0d0d7faf728d17cb8396ecf4a898b22d3da/pynacl-1.6.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fc734c1696ffd49b40f7c1779c89ba908157c57345cf626be2e0719488a076d3", size = 808254, upload-time = "2025-11-10T16:01:37.839Z" },
-    { url = "https://files.pythonhosted.org/packages/44/f9/f5449c652f31da00249638dbab065ad4969c635119094b79b17c3a4da2ab/pynacl-1.6.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3cd787ec1f5c155dc8ecf39b1333cfef41415dc96d392f1ce288b4fe970df489", size = 1407365, upload-time = "2025-11-10T16:01:40.454Z" },
-    { url = "https://files.pythonhosted.org/packages/eb/2f/9aa5605f473b712065c0a193ebf4ad4725d7a245533f0cd7e5dcdbc78f35/pynacl-1.6.1-cp314-cp314t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6b35d93ab2df03ecb3aa506be0d3c73609a51449ae0855c2e89c7ed44abde40b", size = 843842, upload-time = "2025-11-10T16:01:30.524Z" },
-    { url = "https://files.pythonhosted.org/packages/32/8d/748f0f6956e207453da8f5f21a70885fbbb2e060d5c9d78e0a4a06781451/pynacl-1.6.1-cp314-cp314t-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dece79aecbb8f4640a1adbb81e4aa3bfb0e98e99834884a80eb3f33c7c30e708", size = 1445559, upload-time = "2025-11-10T16:01:33.663Z" },
-    { url = "https://files.pythonhosted.org/packages/78/d0/2387f0dcb0e9816f38373999e48db4728ed724d31accdd4e737473319d35/pynacl-1.6.1-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:c2228054f04bf32d558fb89bb99f163a8197d5a9bf4efa13069a7fa8d4b93fc3", size = 825791, upload-time = "2025-11-10T16:01:34.823Z" },
-    { url = "https://files.pythonhosted.org/packages/18/3d/ef6fb7eb072aaf15f280bc66f26ab97e7fc9efa50fb1927683013ef47473/pynacl-1.6.1-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:2b12f1b97346f177affcdfdc78875ff42637cb40dcf79484a97dae3448083a78", size = 1410843, upload-time = "2025-11-10T16:01:36.401Z" },
-    { url = "https://files.pythonhosted.org/packages/e3/fb/23824a017526850ee7d8a1cc4cd1e3e5082800522c10832edbbca8619537/pynacl-1.6.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e735c3a1bdfde3834503baf1a6d74d4a143920281cb724ba29fb84c9f49b9c48", size = 801140, upload-time = "2025-11-10T16:01:42.013Z" },
-    { url = "https://files.pythonhosted.org/packages/5d/d1/ebc6b182cb98603a35635b727d62f094bc201bf610f97a3bb6357fe688d2/pynacl-1.6.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:3384a454adf5d716a9fadcb5eb2e3e72cd49302d1374a60edc531c9957a9b014", size = 1371966, upload-time = "2025-11-10T16:01:43.297Z" },
-    { url = "https://files.pythonhosted.org/packages/64/f4/c9d7b6f02924b1f31db546c7bd2a83a2421c6b4a8e6a2e53425c9f2802e0/pynacl-1.6.1-cp314-cp314t-win32.whl", hash = "sha256:d8615ee34d01c8e0ab3f302dcdd7b32e2bcf698ba5f4809e7cc407c8cdea7717", size = 230482, upload-time = "2025-11-10T16:01:47.688Z" },
-    { url = "https://files.pythonhosted.org/packages/c4/2c/942477957fba22da7bf99131850e5ebdff66623418ab48964e78a7a8293e/pynacl-1.6.1-cp314-cp314t-win_amd64.whl", hash = "sha256:5f5b35c1a266f8a9ad22525049280a600b19edd1f785bccd01ae838437dcf935", size = 243232, upload-time = "2025-11-10T16:01:45.208Z" },
-    { url = "https://files.pythonhosted.org/packages/7a/0c/bdbc0d04a53b96a765ab03aa2cf9a76ad8653d70bf1665459b9a0dedaa1c/pynacl-1.6.1-cp314-cp314t-win_arm64.whl", hash = "sha256:d984c91fe3494793b2a1fb1e91429539c6c28e9ec8209d26d25041ec599ccf63", size = 187907, upload-time = "2025-11-10T16:01:46.328Z" },
-    { url = "https://files.pythonhosted.org/packages/49/41/3cfb3b4f3519f6ff62bf71bf1722547644bcfb1b05b8fdbdc300249ba113/pynacl-1.6.1-cp38-abi3-macosx_10_10_universal2.whl", hash = "sha256:a6f9fd6d6639b1e81115c7f8ff16b8dedba1e8098d2756275d63d208b0e32021", size = 387591, upload-time = "2025-11-10T16:01:49.1Z" },
-    { url = "https://files.pythonhosted.org/packages/18/21/b8a6563637799f617a3960f659513eccb3fcc655d5fc2be6e9dc6416826f/pynacl-1.6.1-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:e49a3f3d0da9f79c1bec2aa013261ab9fa651c7da045d376bd306cf7c1792993", size = 798866, upload-time = "2025-11-10T16:01:55.688Z" },
-    { url = "https://files.pythonhosted.org/packages/e8/6c/dc38033bc3ea461e05ae8f15a81e0e67ab9a01861d352ae971c99de23e7c/pynacl-1.6.1-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7713f8977b5d25f54a811ec9efa2738ac592e846dd6e8a4d3f7578346a841078", size = 1398001, upload-time = "2025-11-10T16:01:57.101Z" },
-    { url = "https://files.pythonhosted.org/packages/9f/05/3ec0796a9917100a62c5073b20c4bce7bf0fea49e99b7906d1699cc7b61b/pynacl-1.6.1-cp38-abi3-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5a3becafc1ee2e5ea7f9abc642f56b82dcf5be69b961e782a96ea52b55d8a9fc", size = 834024, upload-time = "2025-11-10T16:01:50.228Z" },
-    { url = "https://files.pythonhosted.org/packages/f0/b7/ae9982be0f344f58d9c64a1c25d1f0125c79201634efe3c87305ac7cb3e3/pynacl-1.6.1-cp38-abi3-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4ce50d19f1566c391fedc8dc2f2f5be265ae214112ebe55315e41d1f36a7f0a9", size = 1436766, upload-time = "2025-11-10T16:01:51.886Z" },
-    { url = "https://files.pythonhosted.org/packages/b4/51/b2ccbf89cf3025a02e044dd68a365cad593ebf70f532299f2c047d2b7714/pynacl-1.6.1-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:543f869140f67d42b9b8d47f922552d7a967e6c116aad028c9bfc5f3f3b3a7b7", size = 817275, upload-time = "2025-11-10T16:01:53.351Z" },
-    { url = "https://files.pythonhosted.org/packages/a8/6c/dd9ee8214edf63ac563b08a9b30f98d116942b621d39a751ac3256694536/pynacl-1.6.1-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:a2bb472458c7ca959aeeff8401b8efef329b0fc44a89d3775cffe8fad3398ad8", size = 1401891, upload-time = "2025-11-10T16:01:54.587Z" },
-    { url = "https://files.pythonhosted.org/packages/0f/c1/97d3e1c83772d78ee1db3053fd674bc6c524afbace2bfe8d419fd55d7ed1/pynacl-1.6.1-cp38-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:3206fa98737fdc66d59b8782cecc3d37d30aeec4593d1c8c145825a345bba0f0", size = 772291, upload-time = "2025-11-10T16:01:58.111Z" },
-    { url = "https://files.pythonhosted.org/packages/4d/ca/691ff2fe12f3bb3e43e8e8df4b806f6384593d427f635104d337b8e00291/pynacl-1.6.1-cp38-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:53543b4f3d8acb344f75fd4d49f75e6572fce139f4bfb4815a9282296ff9f4c0", size = 1370839, upload-time = "2025-11-10T16:01:59.252Z" },
-    { url = "https://files.pythonhosted.org/packages/30/27/06fe5389d30391fce006442246062cc35773c84fbcad0209fbbf5e173734/pynacl-1.6.1-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:319de653ef84c4f04e045eb250e6101d23132372b0a61a7acf91bac0fda8e58c", size = 791371, upload-time = "2025-11-10T16:02:01.075Z" },
-    { url = "https://files.pythonhosted.org/packages/2c/7a/e2bde8c9d39074a5aa046c7d7953401608d1f16f71e237f4bef3fb9d7e49/pynacl-1.6.1-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:262a8de6bba4aee8a66f5edf62c214b06647461c9b6b641f8cd0cb1e3b3196fe", size = 1363031, upload-time = "2025-11-10T16:02:02.656Z" },
-    { url = "https://files.pythonhosted.org/packages/dd/b6/63fd77264dae1087770a1bb414bc604470f58fbc21d83822fc9c76248076/pynacl-1.6.1-cp38-abi3-win32.whl", hash = "sha256:9fd1a4eb03caf8a2fe27b515a998d26923adb9ddb68db78e35ca2875a3830dde", size = 226585, upload-time = "2025-11-10T16:02:07.116Z" },
-    { url = "https://files.pythonhosted.org/packages/12/c8/b419180f3fdb72ab4d45e1d88580761c267c7ca6eda9a20dcbcba254efe6/pynacl-1.6.1-cp38-abi3-win_amd64.whl", hash = "sha256:a569a4069a7855f963940040f35e87d8bc084cb2d6347428d5ad20550a0a1a21", size = 238923, upload-time = "2025-11-10T16:02:04.401Z" },
-    { url = "https://files.pythonhosted.org/packages/35/76/c34426d532e4dce7ff36e4d92cb20f4cbbd94b619964b93d24e8f5b5510f/pynacl-1.6.1-cp38-abi3-win_arm64.whl", hash = "sha256:5953e8b8cfadb10889a6e7bd0f53041a745d1b3d30111386a1bb37af171e6daf", size = 183970, upload-time = "2025-11-10T16:02:05.786Z" },
+sdist = { url = "https://files.pythonhosted.org/packages/d9/9a/4019b524b03a13438637b11538c82781a5eda427394380381af8f04f467a/pynacl-1.6.2.tar.gz", hash = "sha256:018494d6d696ae03c7e656e5e74cdfd8ea1326962cc401bcf018f1ed8436811c", size = 3511692, upload-time = "2026-01-01T17:48:10.851Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/4b/79/0e3c34dc3c4671f67d251c07aa8eb100916f250ee470df230b0ab89551b4/pynacl-1.6.2-cp314-cp314t-macosx_10_10_universal2.whl", hash = "sha256:622d7b07cc5c02c666795792931b50c91f3ce3c2649762efb1ef0d5684c81594", size = 390064, upload-time = "2026-01-01T17:31:57.264Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/1c/23a26e931736e13b16483795c8a6b2f641bf6a3d5238c22b070a5112722c/pynacl-1.6.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d071c6a9a4c94d79eb665db4ce5cedc537faf74f2355e4d502591d850d3913c0", size = 809370, upload-time = "2026-01-01T17:31:59.198Z" },
+    { url = "https://files.pythonhosted.org/packages/87/74/8d4b718f8a22aea9e8dcc8b95deb76d4aae380e2f5b570cc70b5fd0a852d/pynacl-1.6.2-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fe9847ca47d287af41e82be1dd5e23023d3c31a951da134121ab02e42ac218c9", size = 1408304, upload-time = "2026-01-01T17:32:01.162Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/73/be4fdd3a6a87fe8a4553380c2b47fbd1f7f58292eb820902f5c8ac7de7b0/pynacl-1.6.2-cp314-cp314t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:04316d1fc625d860b6c162fff704eb8426b1a8bcd3abacea11142cbd99a6b574", size = 844871, upload-time = "2026-01-01T17:32:02.824Z" },
+    { url = "https://files.pythonhosted.org/packages/55/ad/6efc57ab75ee4422e96b5f2697d51bbcf6cdcc091e66310df91fbdc144a8/pynacl-1.6.2-cp314-cp314t-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:44081faff368d6c5553ccf55322ef2819abb40e25afaec7e740f159f74813634", size = 1446356, upload-time = "2026-01-01T17:32:04.452Z" },
+    { url = "https://files.pythonhosted.org/packages/78/b7/928ee9c4779caa0a915844311ab9fb5f99585621c5d6e4574538a17dca07/pynacl-1.6.2-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:a9f9932d8d2811ce1a8ffa79dcbdf3970e7355b5c8eb0c1a881a57e7f7d96e88", size = 826814, upload-time = "2026-01-01T17:32:06.078Z" },
+    { url = "https://files.pythonhosted.org/packages/f7/a9/1bdba746a2be20f8809fee75c10e3159d75864ef69c6b0dd168fc60e485d/pynacl-1.6.2-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:bc4a36b28dd72fb4845e5d8f9760610588a96d5a51f01d84d8c6ff9849968c14", size = 1411742, upload-time = "2026-01-01T17:32:07.651Z" },
+    { url = "https://files.pythonhosted.org/packages/f3/2f/5e7ea8d85f9f3ea5b6b87db1d8388daa3587eed181bdeb0306816fdbbe79/pynacl-1.6.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:3bffb6d0f6becacb6526f8f42adfb5efb26337056ee0831fb9a7044d1a964444", size = 801714, upload-time = "2026-01-01T17:32:09.558Z" },
+    { url = "https://files.pythonhosted.org/packages/06/ea/43fe2f7eab5f200e40fb10d305bf6f87ea31b3bbc83443eac37cd34a9e1e/pynacl-1.6.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:2fef529ef3ee487ad8113d287a593fa26f48ee3620d92ecc6f1d09ea38e0709b", size = 1372257, upload-time = "2026-01-01T17:32:11.026Z" },
+    { url = "https://files.pythonhosted.org/packages/4d/54/c9ea116412788629b1347e415f72195c25eb2f3809b2d3e7b25f5c79f13a/pynacl-1.6.2-cp314-cp314t-win32.whl", hash = "sha256:a84bf1c20339d06dc0c85d9aea9637a24f718f375d861b2668b2f9f96fa51145", size = 231319, upload-time = "2026-01-01T17:32:12.46Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/04/64e9d76646abac2dccf904fccba352a86e7d172647557f35b9fe2a5ee4a1/pynacl-1.6.2-cp314-cp314t-win_amd64.whl", hash = "sha256:320ef68a41c87547c91a8b58903c9caa641ab01e8512ce291085b5fe2fcb7590", size = 244044, upload-time = "2026-01-01T17:32:13.781Z" },
+    { url = "https://files.pythonhosted.org/packages/33/33/7873dc161c6a06f43cda13dec67b6fe152cb2f982581151956fa5e5cdb47/pynacl-1.6.2-cp314-cp314t-win_arm64.whl", hash = "sha256:d29bfe37e20e015a7d8b23cfc8bd6aa7909c92a1b8f41ee416bbb3e79ef182b2", size = 188740, upload-time = "2026-01-01T17:32:15.083Z" },
+    { url = "https://files.pythonhosted.org/packages/be/7b/4845bbf88e94586ec47a432da4e9107e3fc3ce37eb412b1398630a37f7dd/pynacl-1.6.2-cp38-abi3-macosx_10_10_universal2.whl", hash = "sha256:c949ea47e4206af7c8f604b8278093b674f7c79ed0d4719cc836902bf4517465", size = 388458, upload-time = "2026-01-01T17:32:16.829Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/b4/e927e0653ba63b02a4ca5b4d852a8d1d678afbf69b3dbf9c4d0785ac905c/pynacl-1.6.2-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8845c0631c0be43abdd865511c41eab235e0be69c81dc66a50911594198679b0", size = 800020, upload-time = "2026-01-01T17:32:18.34Z" },
+    { url = "https://files.pythonhosted.org/packages/7f/81/d60984052df5c97b1d24365bc1e30024379b42c4edcd79d2436b1b9806f2/pynacl-1.6.2-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:22de65bb9010a725b0dac248f353bb072969c94fa8d6b1f34b87d7953cf7bbe4", size = 1399174, upload-time = "2026-01-01T17:32:20.239Z" },
+    { url = "https://files.pythonhosted.org/packages/68/f7/322f2f9915c4ef27d140101dd0ed26b479f7e6f5f183590fd32dfc48c4d3/pynacl-1.6.2-cp38-abi3-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:46065496ab748469cdd999246d17e301b2c24ae2fdf739132e580a0e94c94a87", size = 835085, upload-time = "2026-01-01T17:32:22.24Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/d0/f301f83ac8dbe53442c5a43f6a39016f94f754d7a9815a875b65e218a307/pynacl-1.6.2-cp38-abi3-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8a66d6fb6ae7661c58995f9c6435bda2b1e68b54b598a6a10247bfcdadac996c", size = 1437614, upload-time = "2026-01-01T17:32:23.766Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/58/fc6e649762b029315325ace1a8c6be66125e42f67416d3dbd47b69563d61/pynacl-1.6.2-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:26bfcd00dcf2cf160f122186af731ae30ab120c18e8375684ec2670dccd28130", size = 818251, upload-time = "2026-01-01T17:32:25.69Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/a8/b917096b1accc9acd878819a49d3d84875731a41eb665f6ebc826b1af99e/pynacl-1.6.2-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:c8a231e36ec2cab018c4ad4358c386e36eede0319a0c41fed24f840b1dac59f6", size = 1402859, upload-time = "2026-01-01T17:32:27.215Z" },
+    { url = "https://files.pythonhosted.org/packages/85/42/fe60b5f4473e12c72f977548e4028156f4d340b884c635ec6b063fe7e9a5/pynacl-1.6.2-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:68be3a09455743ff9505491220b64440ced8973fe930f270c8e07ccfa25b1f9e", size = 791926, upload-time = "2026-01-01T17:32:29.314Z" },
+    { url = "https://files.pythonhosted.org/packages/fa/f9/e40e318c604259301cc091a2a63f237d9e7b424c4851cafaea4ea7c4834e/pynacl-1.6.2-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:8b097553b380236d51ed11356c953bf8ce36a29a3e596e934ecabe76c985a577", size = 1363101, upload-time = "2026-01-01T17:32:31.263Z" },
+    { url = "https://files.pythonhosted.org/packages/48/47/e761c254f410c023a469284a9bc210933e18588ca87706ae93002c05114c/pynacl-1.6.2-cp38-abi3-win32.whl", hash = "sha256:5811c72b473b2f38f7e2a3dc4f8642e3a3e9b5e7317266e4ced1fba85cae41aa", size = 227421, upload-time = "2026-01-01T17:32:33.076Z" },
+    { url = "https://files.pythonhosted.org/packages/41/ad/334600e8cacc7d86587fe5f565480fde569dfb487389c8e1be56ac21d8ac/pynacl-1.6.2-cp38-abi3-win_amd64.whl", hash = "sha256:62985f233210dee6548c223301b6c25440852e13d59a8b81490203c3227c5ba0", size = 239754, upload-time = "2026-01-01T17:32:34.557Z" },
+    { url = "https://files.pythonhosted.org/packages/29/7d/5945b5af29534641820d3bd7b00962abbbdfee84ec7e19f0d5b3175f9a31/pynacl-1.6.2-cp38-abi3-win_arm64.whl", hash = "sha256:834a43af110f743a754448463e8fd61259cd4ab5bbedcf70f9dabad1d28a394c", size = 184801, upload-time = "2026-01-01T17:32:36.309Z" },
 ]
 
 [[package]]
@@ -4376,15 +4374,15 @@ wheels = [
 
 [[package]]
 name = "python-gitlab"
-version = "7.0.0"
+version = "7.1.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "requests" },
     { name = "requests-toolbelt" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/5e/c4/0b613303b4f0fcda69b3d2e03d0a1fb1b6b079a7c7832e03a8d92461e9fe/python_gitlab-7.0.0.tar.gz", hash = "sha256:e4d934430f64efc09e6208b782c61cc0a3389527765e03ffbef17f4323dce441", size = 400568, upload-time = "2025-10-29T15:06:02.069Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/31/98/0b5d0a0367b90aec818298390b60ae65e6a08989cf5140271d0ee0206882/python_gitlab-7.1.0.tar.gz", hash = "sha256:1c34da3de40ad21675d788136f73d20a60649513e692f52c5a9720434db97c46", size = 401058, upload-time = "2025-12-28T01:27:01.369Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/4f/9e/811edc46a15f8deb828cba7ef8aab3451dc11ca72d033f3df72a5af865d9/python_gitlab-7.0.0-py3-none-any.whl", hash = "sha256:712a6c8c5e79e7e66f6dabb25d8fe7831a6b238d4a5132f8231df6b3b890ceff", size = 144415, upload-time = "2025-10-29T15:06:00.232Z" },
+    { url = "https://files.pythonhosted.org/packages/14/44/70fa1e395731b6a4b1f249d5f7326f3bb6281e2cf94d6535f679239f4b93/python_gitlab-7.1.0-py3-none-any.whl", hash = "sha256:8e42030cf27674e7ec9ea1f6d2fedcaaef0a6210f5fa22c80721abaa3a4fec90", size = 144441, upload-time = "2025-12-28T01:26:59.726Z" },
 ]
 
 [[package]]
@@ -4517,7 +4515,7 @@ wheels = [
 
 [[package]]
 name = "ray"
-version = "2.51.2"
+version = "2.53.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "click" },
@@ -4530,21 +4528,21 @@ dependencies = [
     { name = "requests" },
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/6a/ad/59270b7d1003152ef231b65c38c3721066fc970b2a2475314e7c8ee81990/ray-2.51.2-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:eb9b995de9ba3110373f00e77dda86f6a55a80a58114b1eae5e6daf1f5697338", size = 68040029, upload-time = "2025-11-29T00:28:25.435Z" },
-    { url = "https://files.pythonhosted.org/packages/bc/bf/43442642cf4f29ac9ef721d9b184512ed84436e65d8244f1867e31b1ecdb/ray-2.51.2-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:983adacd9cecf2f74f7915560036f14c5d4fabdf6f65d959debc92820373729d", size = 70344819, upload-time = "2025-11-29T00:28:32.157Z" },
-    { url = "https://files.pythonhosted.org/packages/57/78/79d8b884492b28c5d9ec99fd8750baaf30e311e79013e9f137dafee3b246/ray-2.51.2-cp310-cp310-manylinux2014_x86_64.whl", hash = "sha256:572d8f7e95e506d6264c7b916fe70e765e3367d5f1bc9755bc1d73c8607a2ac6", size = 71172369, upload-time = "2025-11-29T00:28:38.511Z" },
-    { url = "https://files.pythonhosted.org/packages/6a/26/632c509eda0742f6c9e8c876ebe308cfdefdd2cdd414fcb4e65c37490995/ray-2.51.2-cp310-cp310-win_amd64.whl", hash = "sha256:05d1cdd0352f9da10555899cb6212ac9a2e783b05c20c2989cae09531c1b1969", size = 26696512, upload-time = "2025-11-29T00:28:42.955Z" },
-    { url = "https://files.pythonhosted.org/packages/6d/fa/4ee6a516d9de9d5fa7ecd0e59888c9ab1a2bedaec06fe9c6b91d0f9523b2/ray-2.51.2-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:26100d25b0ca5162e7404d57247ad697514709c6f41db7efb3d312d78a5ef292", size = 68044847, upload-time = "2025-11-29T00:28:47.902Z" },
-    { url = "https://files.pythonhosted.org/packages/92/ca/06b1b761e8c4398c2818f0ac04e14c2f2937fa79bf9be6ffc74d785641fb/ray-2.51.2-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:1102471b4edb08605001be781f094c2291805d8e4a118ad8b59b833b12d4f13f", size = 70464861, upload-time = "2025-11-29T00:28:53.591Z" },
-    { url = "https://files.pythonhosted.org/packages/7c/b0/7dda0bf542f3cf08fae67c57ec61422d4f8b3d0342d0d03057eefb93886e/ray-2.51.2-cp311-cp311-manylinux2014_x86_64.whl", hash = "sha256:ad6aafbb7f67d1edbe3cad72b9e33ee99b0ed31ca7210ee8c6af9db1d1c4d850", size = 71286437, upload-time = "2025-11-29T00:28:59.26Z" },
-    { url = "https://files.pythonhosted.org/packages/57/c9/31289a53bf4418b9fe71be8f7780ee520ef5f76fb5a5cdd5dcff9e41fb0b/ray-2.51.2-cp311-cp311-win_amd64.whl", hash = "sha256:a48e3871cc2b526bca7de84527fdf56875115829fab518cc938dd4c64e0174b9", size = 26692167, upload-time = "2025-11-29T00:29:03.786Z" },
-    { url = "https://files.pythonhosted.org/packages/70/54/66fcfebd26c9747d908e2ac24f3a8a5502e84f19ea1e7a9b7f4d4a12bc34/ray-2.51.2-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:461b0e711f73cebc68128bca7202bef8db2c0e14dc6d49140f96549e5e752eb1", size = 68030141, upload-time = "2025-11-29T00:29:08.67Z" },
-    { url = "https://files.pythonhosted.org/packages/0e/9e/7add3c78a5a3d05f9c702d247da83a8a3e30d57eae153985f48ec3309c82/ray-2.51.2-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:5c97f29574072e3568a2714a84e6948fb457ce09eefd251c919221584b2d458d", size = 70506728, upload-time = "2025-11-29T00:29:14.051Z" },
-    { url = "https://files.pythonhosted.org/packages/b3/8e/5d1325619399d7eb9563e2f883f8e782fb26b39a122d6d629e54c8989a5a/ray-2.51.2-cp312-cp312-manylinux2014_x86_64.whl", hash = "sha256:7b2a842744a1d4b47af8f3c0665a319736139518dd2e26fb9e18114281d8f9ea", size = 71359570, upload-time = "2025-11-29T00:29:19.508Z" },
-    { url = "https://files.pythonhosted.org/packages/ba/96/ec1ee03fb1731d9e09d94d7ba6d9e47fce886d7cc79aac47e8422fe9c528/ray-2.51.2-cp312-cp312-win_amd64.whl", hash = "sha256:6b04ca7dccf540da2ab07fd7073009dfe04d9d084d705e337572272fa3e56485", size = 26675734, upload-time = "2025-11-29T00:29:24.27Z" },
-    { url = "https://files.pythonhosted.org/packages/70/89/255ac2a70928a1d439c98fca9f3437cabbbebd3ac767523df608cce39197/ray-2.51.2-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:c9ed290667868c809eb467ad8830d887fdce10dac2c674b3d43d3b3b5f9c7b07", size = 67975149, upload-time = "2025-11-29T00:29:28.995Z" },
-    { url = "https://files.pythonhosted.org/packages/d3/05/1e3bb04e263a2bc1eacd762b37a0013d18f76341de0a7199d84a5a00b372/ray-2.51.2-cp313-cp313-manylinux2014_aarch64.whl", hash = "sha256:554bd393e97bed9dfa5f73f47e4fbf42aa35d81b1228081aa93ccb7cdd5d4b34", size = 70414911, upload-time = "2025-11-29T00:29:34.286Z" },
-    { url = "https://files.pythonhosted.org/packages/c4/85/f6994a74cf5e6fa6ebc959c27ff6f1f5352b78e71b947b4b302c6bb0a203/ray-2.51.2-cp313-cp313-manylinux2014_x86_64.whl", hash = "sha256:e3bf004ed23971ec5d324ed9748aed23f6645d56696a44cdbe35d331f66c4619", size = 71275062, upload-time = "2025-11-29T00:29:39.379Z" },
+    { url = "https://files.pythonhosted.org/packages/2f/99/21986c7f8135dafbf7c49229c52faaa9d2d365db7d86fffe978dde8ee967/ray-2.53.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:4db914a0a6dd608fa49c066929a1282745a2dbd73caee67d7b80fe684ca65bdd", size = 69473649, upload-time = "2025-12-20T16:05:40.58Z" },
+    { url = "https://files.pythonhosted.org/packages/70/d9/58b5426a3f11993851db3c93841358cebdddd948153481d355b720f31f9d/ray-2.53.0-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:4108280d8a1cb90d7d68e5c954c35e63b8bb9a4ba15f88c5e7da0e2025647712", size = 71342662, upload-time = "2025-12-20T16:05:46.936Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/05/4aa32370b313481c2d1d41cb53ec786daebdb2ef665b01ef2ac43d9cf457/ray-2.53.0-cp310-cp310-manylinux2014_x86_64.whl", hash = "sha256:4dbb5fce1364763f29741055f50abe33cf726397141f9cc0e845dd3cc963e455", size = 72188620, upload-time = "2025-12-20T16:05:52.817Z" },
+    { url = "https://files.pythonhosted.org/packages/f7/c6/21efe5886898421df20078a333b0984eade7d7aa4bdc68a336f0c66db27e/ray-2.53.0-cp310-cp310-win_amd64.whl", hash = "sha256:90faf630d20b6abf3135997fb3edb5842134aff92e04ee709865db04816d97ef", size = 27200553, upload-time = "2025-12-20T16:05:57.655Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/64/d5c29a4b014d8b9a624203a88b67630072c1d6960425dbf7a1f0fa5d6b74/ray-2.53.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:bd3ec4c342776ddac23ae2b108c64f5939f417ccc4875900d586c7c978463269", size = 69479296, upload-time = "2025-12-20T16:06:05.111Z" },
+    { url = "https://files.pythonhosted.org/packages/c6/41/9e19d1e5d9458a5ba157c36642e2874bcb22fddbd7c1e77b668e5afc3f3d/ray-2.53.0-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:a0bbb98b0b0f25a3ee075ca10171e1260e70b6bc690cd509ecd7ce1228af854d", size = 71463449, upload-time = "2025-12-20T16:06:10.983Z" },
+    { url = "https://files.pythonhosted.org/packages/63/de/58c19906b0dd16ea06b4f2465b7327f5f180e6b6e1c8c9b610d7c589ea5f/ray-2.53.0-cp311-cp311-manylinux2014_x86_64.whl", hash = "sha256:eb000c17f7301071fdd15c44c4cd3ac0f7953bb4c7c227e61719fe7048195bcd", size = 72305102, upload-time = "2025-12-20T16:06:17.989Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/43/72cc1cfe17d26abe62a793eab10445f9546dce24192b85a6cd0cdc47ed86/ray-2.53.0-cp311-cp311-win_amd64.whl", hash = "sha256:4a1bb3fe09ab4cd0d16ddc96b9f60c9ed83b3f93b87aa8506e0d3b746fd4e825", size = 27194174, upload-time = "2025-12-20T16:06:23.042Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/44/562718a634e63e8ef7985285288a167d4af62bc2a7decce3300cf937776a/ray-2.53.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:d8b95d047d947493803fb8417aea31225dcacdab15afdc75b8a238901949d457", size = 69463763, upload-time = "2025-12-20T16:06:28.685Z" },
+    { url = "https://files.pythonhosted.org/packages/38/68/8e59b8413f3751fe7ce8b98ee8787d13964b47a4043587950790a9dd2151/ray-2.53.0-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:65e2ce58d3dc6baa3cf45824d889c1968ebde565ee54dfd80a98af8f31af8e4a", size = 71504450, upload-time = "2025-12-20T16:06:34.922Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/db/978a50d264565ca42e2a4bf115ec9a1f04f19ca5e620e6aa2f280747b644/ray-2.53.0-cp312-cp312-manylinux2014_x86_64.whl", hash = "sha256:14f46363e9b4cf0c1c8b4d8623ec337c5bd408377831b5e5b50067930137bbca", size = 72370424, upload-time = "2025-12-20T16:06:40.821Z" },
+    { url = "https://files.pythonhosted.org/packages/8d/6c/bba6f22a9d83ee8f236000ba315f0c197bdc79888b4fa42fd762f729cbbd/ray-2.53.0-cp312-cp312-win_amd64.whl", hash = "sha256:b828c147f9ff2f277b1d254e4fe9a746fdfaee7e313a93a97c7edf4dae9b81a4", size = 27178106, upload-time = "2025-12-20T16:06:45.594Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/38/450cf9cf3c490fa4cc6d470597f819444da60f85579d2b34b95ee79fcb6f/ray-2.53.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:85b472ab6fb8f1189f8cef81913fd91b24dd69b3fa7dcca7e144827bd924f6c0", size = 69409819, upload-time = "2025-12-20T16:06:50.668Z" },
+    { url = "https://files.pythonhosted.org/packages/71/5e/d452970b07174d5e4f8688abae889d01321b51ced827db1f1d1cb7d56d44/ray-2.53.0-cp313-cp313-manylinux2014_aarch64.whl", hash = "sha256:7196e5358dfcc8211be864f45e6dfe4827202df294af3c7a76ff8fbc080e0522", size = 71409529, upload-time = "2025-12-20T16:06:56.2Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/84/50b317a125617a638a64694c12f56183edd5df01828a35fa4c55c7b13c66/ray-2.53.0-cp313-cp313-manylinux2014_x86_64.whl", hash = "sha256:73dbbaa7962a7f5e38aa8cf9483e0e9817205e989aa3dc859c738c2af1ae01df", size = 72283961, upload-time = "2025-12-20T16:07:05.831Z" },
 ]
 
 [[package]]
@@ -4991,7 +4989,7 @@ resolution-markers = [
     "python_full_version == '3.11.*' and sys_platform != 'linux'",
 ]
 dependencies = [
-    { name = "numpy", version = "2.3.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
+    { name = "numpy", version = "2.4.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/0a/ca/d8ace4f98322d01abcd52d381134344bf7b431eba7ed8b42bdea5a3c2ac9/scipy-1.16.3.tar.gz", hash = "sha256:01e87659402762f43bd2fee13370553a17ada367d42e7487800bf2916535aecb", size = 30597883, upload-time = "2025-10-28T17:38:54.068Z" }
 wheels = [
@@ -5213,7 +5211,7 @@ source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "cffi" },
     { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "numpy", version = "2.3.5", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.4.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/e1/41/9b873a8c055582859b239be17902a85339bec6a30ad162f98c9b0288a2cc/soundfile-0.13.1.tar.gz", hash = "sha256:b2c68dab1e30297317080a5b43df57e302584c49e2942defdde0acccc53f0e5b", size = 46156, upload-time = "2025-01-25T09:17:04.831Z" }
 wheels = [
@@ -5486,7 +5484,7 @@ dependencies = [
     { name = "grpcio" },
     { name = "markdown" },
     { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "numpy", version = "2.3.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.4.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "packaging" },
     { name = "pillow" },
     { name = "protobuf" },
@@ -5560,7 +5558,7 @@ resolution-markers = [
 ]
 dependencies = [
     { name = "ml-dtypes", marker = "python_full_version >= '3.11'" },
-    { name = "numpy", version = "2.3.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
+    { name = "numpy", version = "2.4.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/88/18/7b91daa9cf29dbb6bfdd603154f355c9069a9cd8c757038fe52b0f613611/tensorstore-0.1.80.tar.gz", hash = "sha256:4158fe76b96f62d12a37d7868150d836e089b5280b2bdd363c43c5d651f10e26", size = 7090032, upload-time = "2025-12-10T21:35:10.941Z" }
 wheels = [
@@ -5653,27 +5651,32 @@ wheels = [
 
 [[package]]
 name = "tokenizers"
-version = "0.22.1"
+version = "0.22.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "huggingface-hub" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/1c/46/fb6854cec3278fbfa4a75b50232c77622bc517ac886156e6afbfa4d8fc6e/tokenizers-0.22.1.tar.gz", hash = "sha256:61de6522785310a309b3407bac22d99c4db5dba349935e99e4d15ea2226af2d9", size = 363123, upload-time = "2025-09-19T09:49:23.424Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/bf/33/f4b2d94ada7ab297328fc671fed209368ddb82f965ec2224eb1892674c3a/tokenizers-0.22.1-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:59fdb013df17455e5f950b4b834a7b3ee2e0271e6378ccb33aa74d178b513c73", size = 3069318, upload-time = "2025-09-19T09:49:11.848Z" },
-    { url = "https://files.pythonhosted.org/packages/1c/58/2aa8c874d02b974990e89ff95826a4852a8b2a273c7d1b4411cdd45a4565/tokenizers-0.22.1-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:8d4e484f7b0827021ac5f9f71d4794aaef62b979ab7608593da22b1d2e3c4edc", size = 2926478, upload-time = "2025-09-19T09:49:09.759Z" },
-    { url = "https://files.pythonhosted.org/packages/1e/3b/55e64befa1e7bfea963cf4b787b2cea1011362c4193f5477047532ce127e/tokenizers-0.22.1-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:19d2962dd28bc67c1f205ab180578a78eef89ac60ca7ef7cbe9635a46a56422a", size = 3256994, upload-time = "2025-09-19T09:48:56.701Z" },
-    { url = "https://files.pythonhosted.org/packages/71/0b/fbfecf42f67d9b7b80fde4aabb2b3110a97fac6585c9470b5bff103a80cb/tokenizers-0.22.1-cp39-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:38201f15cdb1f8a6843e6563e6e79f4abd053394992b9bbdf5213ea3469b4ae7", size = 3153141, upload-time = "2025-09-19T09:48:59.749Z" },
-    { url = "https://files.pythonhosted.org/packages/17/a9/b38f4e74e0817af8f8ef925507c63c6ae8171e3c4cb2d5d4624bf58fca69/tokenizers-0.22.1-cp39-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d1cbe5454c9a15df1b3443c726063d930c16f047a3cc724b9e6e1a91140e5a21", size = 3508049, upload-time = "2025-09-19T09:49:05.868Z" },
-    { url = "https://files.pythonhosted.org/packages/d2/48/dd2b3dac46bb9134a88e35d72e1aa4869579eacc1a27238f1577270773ff/tokenizers-0.22.1-cp39-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e7d094ae6312d69cc2a872b54b91b309f4f6fbce871ef28eb27b52a98e4d0214", size = 3710730, upload-time = "2025-09-19T09:49:01.832Z" },
-    { url = "https://files.pythonhosted.org/packages/93/0e/ccabc8d16ae4ba84a55d41345207c1e2ea88784651a5a487547d80851398/tokenizers-0.22.1-cp39-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:afd7594a56656ace95cdd6df4cca2e4059d294c5cfb1679c57824b605556cb2f", size = 3412560, upload-time = "2025-09-19T09:49:03.867Z" },
-    { url = "https://files.pythonhosted.org/packages/d0/c6/dc3a0db5a6766416c32c034286d7c2d406da1f498e4de04ab1b8959edd00/tokenizers-0.22.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e2ef6063d7a84994129732b47e7915e8710f27f99f3a3260b8a38fc7ccd083f4", size = 3250221, upload-time = "2025-09-19T09:49:07.664Z" },
-    { url = "https://files.pythonhosted.org/packages/d7/a6/2c8486eef79671601ff57b093889a345dd3d576713ef047776015dc66de7/tokenizers-0.22.1-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:ba0a64f450b9ef412c98f6bcd2a50c6df6e2443b560024a09fa6a03189726879", size = 9345569, upload-time = "2025-09-19T09:49:14.214Z" },
-    { url = "https://files.pythonhosted.org/packages/6b/16/32ce667f14c35537f5f605fe9bea3e415ea1b0a646389d2295ec348d5657/tokenizers-0.22.1-cp39-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:331d6d149fa9c7d632cde4490fb8bbb12337fa3a0232e77892be656464f4b446", size = 9271599, upload-time = "2025-09-19T09:49:16.639Z" },
-    { url = "https://files.pythonhosted.org/packages/51/7c/a5f7898a3f6baa3fc2685c705e04c98c1094c523051c805cdd9306b8f87e/tokenizers-0.22.1-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:607989f2ea68a46cb1dfbaf3e3aabdf3f21d8748312dbeb6263d1b3b66c5010a", size = 9533862, upload-time = "2025-09-19T09:49:19.146Z" },
-    { url = "https://files.pythonhosted.org/packages/36/65/7e75caea90bc73c1dd8d40438adf1a7bc26af3b8d0a6705ea190462506e1/tokenizers-0.22.1-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a0f307d490295717726598ef6fa4f24af9d484809223bbc253b201c740a06390", size = 9681250, upload-time = "2025-09-19T09:49:21.501Z" },
-    { url = "https://files.pythonhosted.org/packages/30/2c/959dddef581b46e6209da82df3b78471e96260e2bc463f89d23b1bf0e52a/tokenizers-0.22.1-cp39-abi3-win32.whl", hash = "sha256:b5120eed1442765cd90b903bb6cfef781fd8fe64e34ccaecbae4c619b7b12a82", size = 2472003, upload-time = "2025-09-19T09:49:27.089Z" },
-    { url = "https://files.pythonhosted.org/packages/b3/46/e33a8c93907b631a99377ef4c5f817ab453d0b34f93529421f42ff559671/tokenizers-0.22.1-cp39-abi3-win_amd64.whl", hash = "sha256:65fd6e3fb11ca1e78a6a93602490f134d1fdeb13bcef99389d5102ea318ed138", size = 2674684, upload-time = "2025-09-19T09:49:24.953Z" },
+sdist = { url = "https://files.pythonhosted.org/packages/73/6f/f80cfef4a312e1fb34baf7d85c72d4411afde10978d4657f8cdd811d3ccc/tokenizers-0.22.2.tar.gz", hash = "sha256:473b83b915e547aa366d1eee11806deaf419e17be16310ac0a14077f1e28f917", size = 372115, upload-time = "2026-01-05T10:45:15.988Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/92/97/5dbfabf04c7e348e655e907ed27913e03db0923abb5dfdd120d7b25630e1/tokenizers-0.22.2-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:544dd704ae7238755d790de45ba8da072e9af3eea688f698b137915ae959281c", size = 3100275, upload-time = "2026-01-05T10:41:02.158Z" },
+    { url = "https://files.pythonhosted.org/packages/2e/47/174dca0502ef88b28f1c9e06b73ce33500eedfac7a7692108aec220464e7/tokenizers-0.22.2-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:1e418a55456beedca4621dbab65a318981467a2b188e982a23e117f115ce5001", size = 2981472, upload-time = "2026-01-05T10:41:00.276Z" },
+    { url = "https://files.pythonhosted.org/packages/d6/84/7990e799f1309a8b87af6b948f31edaa12a3ed22d11b352eaf4f4b2e5753/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2249487018adec45d6e3554c71d46eb39fa8ea67156c640f7513eb26f318cec7", size = 3290736, upload-time = "2026-01-05T10:40:32.165Z" },
+    { url = "https://files.pythonhosted.org/packages/78/59/09d0d9ba94dcd5f4f1368d4858d24546b4bdc0231c2354aa31d6199f0399/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:25b85325d0815e86e0bac263506dd114578953b7b53d7de09a6485e4a160a7dd", size = 3168835, upload-time = "2026-01-05T10:40:38.847Z" },
+    { url = "https://files.pythonhosted.org/packages/47/50/b3ebb4243e7160bda8d34b731e54dd8ab8b133e50775872e7a434e524c28/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bfb88f22a209ff7b40a576d5324bf8286b519d7358663db21d6246fb17eea2d5", size = 3521673, upload-time = "2026-01-05T10:40:56.614Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/fa/89f4cb9e08df770b57adb96f8cbb7e22695a4cb6c2bd5f0c4f0ebcf33b66/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1c774b1276f71e1ef716e5486f21e76333464f47bece56bbd554485982a9e03e", size = 3724818, upload-time = "2026-01-05T10:40:44.507Z" },
+    { url = "https://files.pythonhosted.org/packages/64/04/ca2363f0bfbe3b3d36e95bf67e56a4c88c8e3362b658e616d1ac185d47f2/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:df6c4265b289083bf710dff49bc51ef252f9d5be33a45ee2bed151114a56207b", size = 3379195, upload-time = "2026-01-05T10:40:51.139Z" },
+    { url = "https://files.pythonhosted.org/packages/2e/76/932be4b50ef6ccedf9d3c6639b056a967a86258c6d9200643f01269211ca/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:369cc9fc8cc10cb24143873a0d95438bb8ee257bb80c71989e3ee290e8d72c67", size = 3274982, upload-time = "2026-01-05T10:40:58.331Z" },
+    { url = "https://files.pythonhosted.org/packages/1d/28/5f9f5a4cc211b69e89420980e483831bcc29dade307955cc9dc858a40f01/tokenizers-0.22.2-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:29c30b83d8dcd061078b05ae0cb94d3c710555fbb44861139f9f83dcca3dc3e4", size = 9478245, upload-time = "2026-01-05T10:41:04.053Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/fb/66e2da4704d6aadebf8cb39f1d6d1957df667ab24cff2326b77cda0dcb85/tokenizers-0.22.2-cp39-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:37ae80a28c1d3265bb1f22464c856bd23c02a05bb211e56d0c5301a435be6c1a", size = 9560069, upload-time = "2026-01-05T10:45:10.673Z" },
+    { url = "https://files.pythonhosted.org/packages/16/04/fed398b05caa87ce9b1a1bb5166645e38196081b225059a6edaff6440fac/tokenizers-0.22.2-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:791135ee325f2336f498590eb2f11dc5c295232f288e75c99a36c5dbce63088a", size = 9899263, upload-time = "2026-01-05T10:45:12.559Z" },
+    { url = "https://files.pythonhosted.org/packages/05/a1/d62dfe7376beaaf1394917e0f8e93ee5f67fea8fcf4107501db35996586b/tokenizers-0.22.2-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:38337540fbbddff8e999d59970f3c6f35a82de10053206a7562f1ea02d046fa5", size = 10033429, upload-time = "2026-01-05T10:45:14.333Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/18/a545c4ea42af3df6effd7d13d250ba77a0a86fb20393143bbb9a92e434d4/tokenizers-0.22.2-cp39-abi3-win32.whl", hash = "sha256:a6bf3f88c554a2b653af81f3204491c818ae2ac6fbc09e76ef4773351292bc92", size = 2502363, upload-time = "2026-01-05T10:45:20.593Z" },
+    { url = "https://files.pythonhosted.org/packages/65/71/0670843133a43d43070abeb1949abfdef12a86d490bea9cd9e18e37c5ff7/tokenizers-0.22.2-cp39-abi3-win_amd64.whl", hash = "sha256:c9ea31edff2968b44a88f97d784c2f16dc0729b8b143ed004699ebca91f05c48", size = 2747786, upload-time = "2026-01-05T10:45:18.411Z" },
+    { url = "https://files.pythonhosted.org/packages/72/f4/0de46cfa12cdcbcd464cc59fde36912af405696f687e53a091fb432f694c/tokenizers-0.22.2-cp39-abi3-win_arm64.whl", hash = "sha256:9ce725d22864a1e965217204946f830c37876eee3b2ba6fc6255e8e903d5fcbc", size = 2612133, upload-time = "2026-01-05T10:45:17.232Z" },
+    { url = "https://files.pythonhosted.org/packages/84/04/655b79dbcc9b3ac5f1479f18e931a344af67e5b7d3b251d2dcdcd7558592/tokenizers-0.22.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:753d47ebd4542742ef9261d9da92cd545b2cacbb48349a1225466745bb866ec4", size = 3282301, upload-time = "2026-01-05T10:40:34.858Z" },
+    { url = "https://files.pythonhosted.org/packages/46/cd/e4851401f3d8f6f45d8480262ab6a5c8cb9c4302a790a35aa14eeed6d2fd/tokenizers-0.22.2-pp310-pypy310_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e10bf9113d209be7cd046d40fbabbaf3278ff6d18eb4da4c500443185dc1896c", size = 3161308, upload-time = "2026-01-05T10:40:40.737Z" },
+    { url = "https://files.pythonhosted.org/packages/6f/6e/55553992a89982cd12d4a66dddb5e02126c58677ea3931efcbe601d419db/tokenizers-0.22.2-pp310-pypy310_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:64d94e84f6660764e64e7e0b22baa72f6cd942279fdbb21d46abd70d179f0195", size = 3718964, upload-time = "2026-01-05T10:40:46.56Z" },
+    { url = "https://files.pythonhosted.org/packages/59/8c/b1c87148aa15e099243ec9f0cf9d0e970cc2234c3257d558c25a2c5304e6/tokenizers-0.22.2-pp310-pypy310_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f01a9c019878532f98927d2bacb79bbb404b43d3437455522a00a30718cdedb5", size = 3373542, upload-time = "2026-01-05T10:40:52.803Z" },
 ]
 
 [[package]]
@@ -5810,7 +5813,7 @@ version = "0.0.4"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "numpy", version = "2.3.5", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.4.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "torch", marker = "sys_platform == 'never'" },
     { name = "torchvision", marker = "sys_platform == 'never'" },
 ]
@@ -5825,7 +5828,7 @@ version = "0.24.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "numpy", version = "2.3.5", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.4.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "pillow", marker = "sys_platform != 'linux'" },
     { name = "torch", marker = "sys_platform == 'never'" },
 ]
@@ -5894,60 +5897,17 @@ wheels = [
 
 [[package]]
 name = "transformer-engine"
-version = "2.10.0"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/ea/45/b3402a4931c0850ac662b532888d7cb89d5d8f22324309ae8d24557340ee/transformer_engine-2.10.0-py3-none-any.whl", hash = "sha256:a14ccf4e887409be062c0bd8c4a341df55a77baad6aea6aabfe39c24e38252e5", size = 696221, upload-time = "2025-12-02T20:53:17.688Z" },
-]
-
-[package.optional-dependencies]
-core-cu13 = [
-    { name = "transformer-engine-cu13" },
-]
-pytorch = [
-    { name = "transformer-engine-torch" },
-]
-
-[[package]]
-name = "transformer-engine-cu12"
-version = "2.10.0"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "importlib-metadata" },
-    { name = "packaging" },
-    { name = "pydantic" },
-]
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/24/3c/9f480a555c4707cd7b091c5341cc96db1af80b5bfb1a2eae834fb704283b/transformer_engine_cu12-2.10.0-py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:ddd6f4f1f2a8f2c450ea0210d04a08a7b8ceff49a4d900f27b3858980502f21b", size = 286567840, upload-time = "2025-12-02T20:50:26.438Z" },
-    { url = "https://files.pythonhosted.org/packages/29/c7/b63b6989262fcf37402a910112aaee9f3273338d9d1d854478e022f5deb7/transformer_engine_cu12-2.10.0-py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:6766d4ea1643a2606d498aa396d4e7da1046fe01580fdef2047c2c8aa37936b0", size = 287067223, upload-time = "2025-12-02T20:52:11.248Z" },
-]
-
-[[package]]
-name = "transformer-engine-cu13"
-version = "2.10.0"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "importlib-metadata" },
-    { name = "packaging" },
-    { name = "pydantic" },
-]
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/31/bf/34a93b94ec3a8e707e9c5660c76533316357e3b84d08f5cc676787a196c5/transformer_engine_cu13-2.10.0-py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:7409c48a5478acc15b7ac88231be3c45aa9e7c9d17f4875ad31d1bc1650595dd", size = 176560075, upload-time = "2025-12-02T20:48:52.307Z" },
-    { url = "https://files.pythonhosted.org/packages/48/80/1f08d928e7e0ce3f10c6cfa6871b17d13cec070dffb8b88ed9308653ac77/transformer_engine_cu13-2.10.0-py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:b8ee7bd6cef455e07bad61d645af290940aa58856d70fda05e1f973353a85349", size = 177257305, upload-time = "2025-12-02T20:51:36.94Z" },
-]
-
-[[package]]
-name = "transformer-engine-torch"
-version = "2.10.0"
-source = { registry = "https://pypi.org/simple" }
+version = "2.11.0+c188b533"
+source = { git = "https://github.com/NVIDIA/TransformerEngine.git?rev=release_v2.11#c188b533cc3721ca9c6bbfd26148f5cf60108c25" }
 dependencies = [
     { name = "einops" },
+    { name = "importlib-metadata" },
     { name = "onnx" },
     { name = "onnxscript" },
+    { name = "packaging" },
+    { name = "pydantic" },
     { name = "torch", marker = "sys_platform == 'never'" },
-    { name = "transformer-engine-cu12" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/18/94/609a7772569d3acdba34261be7fd30b75f5ff4e5f704117c9e0da517b079/transformer_engine_torch-2.10.0.tar.gz", hash = "sha256:71faff8e3def742553ad74b4e32d2d12e91be9acfb13d1699c89e1e18dd4ecd6", size = 220302, upload-time = "2025-12-02T20:53:57.876Z" }
 
 [[package]]
 name = "transformers"
@@ -5957,7 +5917,7 @@ dependencies = [
     { name = "filelock" },
     { name = "huggingface-hub" },
     { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "numpy", version = "2.3.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.4.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "packaging" },
     { name = "pyyaml" },
     { name = "regex" },
@@ -6003,7 +5963,7 @@ wheels = [
 
 [[package]]
 name = "typer"
-version = "0.20.0"
+version = "0.21.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "click" },
@@ -6011,9 +5971,9 @@ dependencies = [
     { name = "shellingham" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/8f/28/7c85c8032b91dbe79725b6f17d2fffc595dff06a35c7a30a37bef73a1ab4/typer-0.20.0.tar.gz", hash = "sha256:1aaf6494031793e4876fb0bacfa6a912b551cf43c1e63c800df8b1a866720c37", size = 106492, upload-time = "2025-10-20T17:03:49.445Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/36/bf/8825b5929afd84d0dabd606c67cd57b8388cb3ec385f7ef19c5cc2202069/typer-0.21.1.tar.gz", hash = "sha256:ea835607cd752343b6b2b7ce676893e5a0324082268b48f27aa058bdb7d2145d", size = 110371, upload-time = "2026-01-06T11:21:10.989Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/78/64/7713ffe4b5983314e9d436a90d5bd4f63b6054e2aca783a3cfc44cb95bbf/typer-0.20.0-py3-none-any.whl", hash = "sha256:5b463df6793ec1dca6213a3cf4c0f03bc6e322ac5e16e13ddd622a889489784a", size = 47028, upload-time = "2025-10-20T17:03:47.617Z" },
+    { url = "https://files.pythonhosted.org/packages/a0/1d/d9257dd49ff2ca23ea5f132edf1281a0c4f9de8a762b9ae399b670a59235/typer-0.21.1-py3-none-any.whl", hash = "sha256:7985e89081c636b88d172c2ee0cfe33c253160994d47bdfdc302defd7d1f1d01", size = 47381, upload-time = "2026-01-06T11:21:09.824Z" },
 ]
 
 [[package]]
@@ -6070,16 +6030,16 @@ wheels = [
 
 [[package]]
 name = "uvicorn"
-version = "0.38.0"
+version = "0.40.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "click" },
     { name = "h11" },
     { name = "typing-extensions", marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/cb/ce/f06b84e2697fef4688ca63bdb2fdf113ca0a3be33f94488f2cadb690b0cf/uvicorn-0.38.0.tar.gz", hash = "sha256:fd97093bdd120a2609fc0d3afe931d4d4ad688b6e75f0f929fde1bc36fe0e91d", size = 80605, upload-time = "2025-10-18T13:46:44.63Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/c3/d1/8f3c683c9561a4e6689dd3b1d345c815f10f86acd044ee1fb9a4dcd0b8c5/uvicorn-0.40.0.tar.gz", hash = "sha256:839676675e87e73694518b5574fd0f24c9d97b46bea16df7b8c05ea1a51071ea", size = 81761, upload-time = "2025-12-21T14:16:22.45Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/ee/d9/d88e73ca598f4f6ff671fb5fde8a32925c2e08a637303a1d12883c7305fa/uvicorn-0.38.0-py3-none-any.whl", hash = "sha256:48c0afd214ceb59340075b4a052ea1ee91c16fbc2a9b1469cca0e54566977b02", size = 68109, upload-time = "2025-10-18T13:46:42.958Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/d8/2083a1daa7439a66f3a48589a57d576aa117726762618f6bb09fe3798796/uvicorn-0.40.0-py3-none-any.whl", hash = "sha256:c6c8f55bc8bf13eb6fa9ff87ad62308bbbc33d0b67f84293151efe87e0d5f2ee", size = 68502, upload-time = "2025-12-21T14:16:21.041Z" },
 ]
 
 [[package]]
@@ -6242,7 +6202,7 @@ source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "braceexpand" },
     { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "numpy", version = "2.3.5", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.4.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "pyyaml" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/5a/3a/68800d92e065cf4750ebecf973b13979c0c929b439e1293012938862038d/webdataset-1.0.2.tar.gz", hash = "sha256:7f0498be827cfa46cc5430a58768a24e2c6a410676a61be1838f53d61afdaab4", size = 80090, upload-time = "2025-06-19T23:26:21.945Z" }

From de866fa56682b00a9e332c1116142e6173e13edb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Tue, 13 Jan 2026 09:38:07 +0100
Subject: [PATCH 224/334] ci(fix): Update golden values (#2921)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 .../golden_values_dev_dgx_h100.json           |  480 ++--
 .../golden_values_dev_dgx_h100.json           |  480 ++--
 .../golden_values_dev_dgx_h100.json           |  480 ++--
 .../golden_values_dev_dgx_h100.json           |  480 ++--
 .../golden_values_lts_dgx_a100.json           |  538 +----
 .../golden_values_dev_dgx_h100.json           | 2050 ++++++++---------
 .../golden_values_dev_dgx_h100.json           |  492 ++--
 .../golden_values_dev_dgx_h100.json           |  446 ++--
 .../golden_values_dev_dgx_h100.json           |  494 ++--
 .../golden_values_dev_dgx_h100.json           |  472 ++--
 .../golden_values_dev_dgx_h100.json           |  390 ++--
 .../golden_values_dev_dgx_h100.json           | 1140 ++++-----
 12 files changed, 3703 insertions(+), 4239 deletions(-)

diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2/golden_values_dev_dgx_h100.json
index 02b4683ea0b..81005995dad 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2/golden_values_dev_dgx_h100.json
@@ -4,55 +4,55 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 10.86535,
-            "2": 10.85873,
-            "3": 10.86284,
-            "4": 10.84009,
+            "1": 10.86539,
+            "2": 10.85871,
+            "3": 10.86282,
+            "4": 10.84007,
             "5": 10.87856,
-            "6": 10.88856,
-            "7": 10.86532,
-            "8": 10.86017,
-            "9": 10.8599,
-            "10": 10.82981,
-            "11": 10.8895,
-            "12": 10.8751,
-            "13": 10.87423,
+            "6": 10.88852,
+            "7": 10.86536,
+            "8": 10.86015,
+            "9": 10.85991,
+            "10": 10.82982,
+            "11": 10.88947,
+            "12": 10.87511,
+            "13": 10.87422,
             "14": 10.89675,
-            "15": 10.82054,
-            "16": 10.82504,
+            "15": 10.82056,
+            "16": 10.82497,
             "17": 10.78983,
             "18": 10.81029,
-            "19": 10.80535,
-            "20": 10.70398,
-            "21": 10.66993,
-            "22": 10.50643,
-            "23": 10.69004,
-            "24": 10.56314,
-            "25": 10.4942,
-            "26": 10.56628,
-            "27": 10.58025,
+            "19": 10.80528,
+            "20": 10.70396,
+            "21": 10.6699,
+            "22": 10.50641,
+            "23": 10.69006,
+            "24": 10.56312,
+            "25": 10.49418,
+            "26": 10.56627,
+            "27": 10.58023,
             "28": 10.51571,
-            "29": 10.55299,
-            "30": 10.30549,
-            "31": 10.02245,
-            "32": 10.40614,
+            "29": 10.55296,
+            "30": 10.30551,
+            "31": 10.02244,
+            "32": 10.40618,
             "33": 10.39874,
-            "34": 10.13771,
+            "34": 10.1377,
             "35": 10.20184,
-            "36": 10.16052,
-            "37": 10.28973,
-            "38": 10.11474,
+            "36": 10.1605,
+            "37": 10.28975,
+            "38": 10.11483,
             "39": 10.361,
-            "40": 10.01903,
+            "40": 10.01905,
             "41": 10.07292,
-            "42": 10.14698,
-            "43": 9.74687,
-            "44": 9.87766,
-            "45": 9.74966,
-            "46": 9.73383,
-            "47": 10.07535,
-            "48": 9.78068,
-            "49": 9.44784,
+            "42": 10.14697,
+            "43": 9.74684,
+            "44": 9.87763,
+            "45": 9.74962,
+            "46": 9.73382,
+            "47": 10.07536,
+            "48": 9.78071,
+            "49": 9.44783,
             "50": 9.8399
         }
     },
@@ -61,56 +61,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 653.0,
-            "2": 642.0,
-            "3": 630.0,
-            "4": 585.0,
-            "5": 635.0,
-            "6": 687.0,
-            "7": 615.0,
-            "8": 601.0,
-            "9": 607.0,
-            "10": 522.0,
-            "11": 637.0,
-            "12": 675.0,
-            "13": 649.0,
-            "14": 648.0,
-            "15": 640.0,
-            "16": 602.0,
-            "17": 668.0,
-            "18": 634.0,
-            "19": 593.0,
-            "20": 579.0,
-            "21": 633.0,
-            "22": 597.0,
-            "23": 756.0,
-            "24": 612.0,
-            "25": 591.0,
-            "26": 620.0,
-            "27": 700.0,
-            "28": 705.0,
-            "29": 795.0,
-            "30": 752.0,
-            "31": 628.0,
-            "32": 712.0,
-            "33": 752.0,
-            "34": 737.0,
-            "35": 741.0,
-            "36": 770.0,
-            "37": 861.0,
-            "38": 823.0,
-            "39": 812.0,
-            "40": 814.0,
-            "41": 826.0,
-            "42": 801.0,
-            "43": 769.0,
-            "44": 822.0,
-            "45": 777.0,
-            "46": 828.0,
-            "47": 878.0,
-            "48": 915.0,
-            "49": 908.0,
-            "50": 848.0
+            "1": 572.0,
+            "2": 656.0,
+            "3": 649.0,
+            "4": 631.0,
+            "5": 658.0,
+            "6": 636.0,
+            "7": 636.0,
+            "8": 542.0,
+            "9": 653.0,
+            "10": 551.0,
+            "11": 681.0,
+            "12": 642.0,
+            "13": 624.0,
+            "14": 658.0,
+            "15": 682.0,
+            "16": 659.0,
+            "17": 620.0,
+            "18": 603.0,
+            "19": 634.0,
+            "20": 639.0,
+            "21": 634.0,
+            "22": 602.0,
+            "23": 731.0,
+            "24": 620.0,
+            "25": 611.0,
+            "26": 626.0,
+            "27": 683.0,
+            "28": 668.0,
+            "29": 713.0,
+            "30": 712.0,
+            "31": 616.0,
+            "32": 786.0,
+            "33": 800.0,
+            "34": 702.0,
+            "35": 684.0,
+            "36": 664.0,
+            "37": 831.0,
+            "38": 802.0,
+            "39": 919.0,
+            "40": 802.0,
+            "41": 791.0,
+            "42": 840.0,
+            "43": 718.0,
+            "44": 756.0,
+            "45": 765.0,
+            "46": 809.0,
+            "47": 839.0,
+            "48": 827.0,
+            "49": 935.0,
+            "50": 839.0
         }
     },
     "mem-allocated-bytes": {
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 510689792.0,
-            "2": 510689792.0,
-            "3": 510689792.0,
-            "4": 510689792.0,
-            "5": 510689792.0,
-            "6": 510689792.0,
-            "7": 510689792.0,
-            "8": 510689792.0,
-            "9": 510689792.0,
-            "10": 510689792.0,
-            "11": 510689792.0,
-            "12": 510689792.0,
-            "13": 510689792.0,
-            "14": 510689792.0,
-            "15": 510689792.0,
-            "16": 510689792.0,
-            "17": 510689792.0,
-            "18": 510689792.0,
-            "19": 510689792.0,
-            "20": 510689792.0,
-            "21": 510689792.0,
-            "22": 510689792.0,
-            "23": 510689792.0,
-            "24": 510689792.0,
-            "25": 510689792.0,
-            "26": 510689792.0,
-            "27": 510689792.0,
-            "28": 510689792.0,
-            "29": 510689792.0,
-            "30": 510689792.0,
-            "31": 510689792.0,
-            "32": 510689792.0,
-            "33": 510689792.0,
-            "34": 510689792.0,
-            "35": 510689792.0,
-            "36": 510689792.0,
-            "37": 510689792.0,
-            "38": 510689792.0,
-            "39": 510689792.0,
-            "40": 510689792.0,
-            "41": 510689792.0,
-            "42": 510689792.0,
-            "43": 510689792.0,
-            "44": 510689792.0,
-            "45": 510689792.0,
-            "46": 510689792.0,
-            "47": 510689792.0,
-            "48": 510689792.0,
-            "49": 510689792.0,
-            "50": 510689792.0
+            "1": 509641216.0,
+            "2": 509641216.0,
+            "3": 509641216.0,
+            "4": 509641216.0,
+            "5": 509641216.0,
+            "6": 509641216.0,
+            "7": 509641216.0,
+            "8": 509641216.0,
+            "9": 509641216.0,
+            "10": 509641216.0,
+            "11": 509641216.0,
+            "12": 509641216.0,
+            "13": 509641216.0,
+            "14": 509641216.0,
+            "15": 509641216.0,
+            "16": 509641216.0,
+            "17": 509641216.0,
+            "18": 509641216.0,
+            "19": 509641216.0,
+            "20": 509641216.0,
+            "21": 509641216.0,
+            "22": 509641216.0,
+            "23": 509641216.0,
+            "24": 509641216.0,
+            "25": 509641216.0,
+            "26": 509641216.0,
+            "27": 509641216.0,
+            "28": 509641216.0,
+            "29": 509641216.0,
+            "30": 509641216.0,
+            "31": 509641216.0,
+            "32": 509641216.0,
+            "33": 509641216.0,
+            "34": 509641216.0,
+            "35": 509641216.0,
+            "36": 509641216.0,
+            "37": 509641216.0,
+            "38": 509641216.0,
+            "39": 509641216.0,
+            "40": 509641216.0,
+            "41": 509641216.0,
+            "42": 509641216.0,
+            "43": 509641216.0,
+            "44": 509641216.0,
+            "45": 509641216.0,
+            "46": 509641216.0,
+            "47": 509641216.0,
+            "48": 509641216.0,
+            "49": 509641216.0,
+            "50": 509641216.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 757801472.0,
-            "2": 933156352.0,
-            "3": 933156352.0,
-            "4": 933156352.0,
-            "5": 933156352.0,
-            "6": 933156352.0,
-            "7": 933156352.0,
-            "8": 933156352.0,
-            "9": 933156352.0,
-            "10": 933156352.0,
-            "11": 933156352.0,
-            "12": 933156352.0,
-            "13": 933156352.0,
-            "14": 933156352.0,
-            "15": 933156352.0,
-            "16": 933156352.0,
-            "17": 933156352.0,
-            "18": 933156352.0,
-            "19": 933156352.0,
-            "20": 933156352.0,
-            "21": 933156352.0,
-            "22": 933156352.0,
-            "23": 933156352.0,
-            "24": 933156352.0,
-            "25": 933156352.0,
-            "26": 933156352.0,
-            "27": 933156352.0,
-            "28": 933156352.0,
-            "29": 933156352.0,
-            "30": 933156352.0,
-            "31": 933156352.0,
-            "32": 933156352.0,
-            "33": 933156352.0,
-            "34": 933156352.0,
-            "35": 933156352.0,
-            "36": 933156352.0,
-            "37": 933156352.0,
-            "38": 933156352.0,
-            "39": 933156352.0,
-            "40": 933156352.0,
-            "41": 933156352.0,
-            "42": 933156352.0,
-            "43": 933156352.0,
-            "44": 933156352.0,
-            "45": 933156352.0,
-            "46": 933156352.0,
-            "47": 933156352.0,
-            "48": 933156352.0,
-            "49": 933156352.0,
-            "50": 933156352.0
+            "1": 756751872.0,
+            "2": 932632064.0,
+            "3": 932632064.0,
+            "4": 932632064.0,
+            "5": 932632064.0,
+            "6": 932632064.0,
+            "7": 932632064.0,
+            "8": 932632064.0,
+            "9": 932632064.0,
+            "10": 933679616.0,
+            "11": 933679616.0,
+            "12": 933679616.0,
+            "13": 933679616.0,
+            "14": 933679616.0,
+            "15": 933679616.0,
+            "16": 933679616.0,
+            "17": 933679616.0,
+            "18": 933679616.0,
+            "19": 933679616.0,
+            "20": 933679616.0,
+            "21": 933679616.0,
+            "22": 933679616.0,
+            "23": 933679616.0,
+            "24": 933679616.0,
+            "25": 933679616.0,
+            "26": 933679616.0,
+            "27": 933679616.0,
+            "28": 933679616.0,
+            "29": 933679616.0,
+            "30": 933679616.0,
+            "31": 933679616.0,
+            "32": 933679616.0,
+            "33": 933679616.0,
+            "34": 933679616.0,
+            "35": 933679616.0,
+            "36": 933679616.0,
+            "37": 933679616.0,
+            "38": 933679616.0,
+            "39": 933679616.0,
+            "40": 933679616.0,
+            "41": 933679616.0,
+            "42": 933679616.0,
+            "43": 933679616.0,
+            "44": 933679616.0,
+            "45": 933680640.0,
+            "46": 933680640.0,
+            "47": 933680640.0,
+            "48": 933680640.0,
+            "49": 933680640.0,
+            "50": 933680640.0
         }
     },
     "iteration-time": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 15.78036,
-            "2": 0.34723,
-            "3": 0.33492,
-            "4": 0.3292,
-            "5": 0.33036,
-            "6": 0.34971,
-            "7": 0.33848,
-            "8": 0.33262,
-            "9": 0.34028,
-            "10": 0.3518,
-            "11": 0.34239,
-            "12": 0.33211,
-            "13": 0.32961,
-            "14": 0.33263,
-            "15": 0.32808,
-            "16": 0.33152,
-            "17": 0.33313,
-            "18": 0.329,
-            "19": 0.3317,
-            "20": 0.33143,
-            "21": 0.34166,
-            "22": 0.33873,
-            "23": 0.34817,
-            "24": 0.3415,
-            "25": 0.34495,
-            "26": 0.32592,
-            "27": 0.32935,
-            "28": 0.33233,
-            "29": 0.328,
-            "30": 0.32746,
-            "31": 0.3275,
-            "32": 0.327,
-            "33": 0.32765,
-            "34": 0.32542,
-            "35": 0.32703,
-            "36": 0.33052,
-            "37": 0.33413,
-            "38": 0.32701,
-            "39": 0.32816,
-            "40": 0.32555,
-            "41": 0.33676,
-            "42": 0.33367,
-            "43": 0.33748,
-            "44": 0.33125,
-            "45": 0.32793,
-            "46": 0.33387,
-            "47": 0.32628,
-            "48": 0.32993,
-            "49": 0.32747,
-            "50": 0.327
+            "1": 42.02117,
+            "2": 0.34315,
+            "3": 0.31657,
+            "4": 0.29715,
+            "5": 0.29109,
+            "6": 0.28638,
+            "7": 0.28745,
+            "8": 0.29318,
+            "9": 0.30075,
+            "10": 0.29578,
+            "11": 0.30101,
+            "12": 0.29769,
+            "13": 0.2954,
+            "14": 0.2989,
+            "15": 0.29627,
+            "16": 0.29342,
+            "17": 0.29396,
+            "18": 0.29431,
+            "19": 0.29408,
+            "20": 0.29286,
+            "21": 0.29361,
+            "22": 0.29448,
+            "23": 0.29521,
+            "24": 0.29494,
+            "25": 0.29812,
+            "26": 0.29413,
+            "27": 0.2949,
+            "28": 0.29469,
+            "29": 0.29393,
+            "30": 0.29682,
+            "31": 0.2951,
+            "32": 0.29532,
+            "33": 0.29449,
+            "34": 0.29334,
+            "35": 0.29679,
+            "36": 0.29557,
+            "37": 0.29495,
+            "38": 0.29826,
+            "39": 0.29574,
+            "40": 0.2972,
+            "41": 0.29568,
+            "42": 0.29643,
+            "43": 0.29627,
+            "44": 0.29491,
+            "45": 0.29476,
+            "46": 0.29707,
+            "47": 0.35995,
+            "48": 0.28743,
+            "49": 0.28604,
+            "50": 0.28593
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss/golden_values_dev_dgx_h100.json
index f2adbef4530..873d08f92a3 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss/golden_values_dev_dgx_h100.json
@@ -4,55 +4,55 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 10.86535,
-            "2": 10.85873,
-            "3": 10.86284,
-            "4": 10.84009,
+            "1": 10.86539,
+            "2": 10.85871,
+            "3": 10.86282,
+            "4": 10.84007,
             "5": 10.87856,
-            "6": 10.88856,
-            "7": 10.86532,
-            "8": 10.86017,
-            "9": 10.8599,
-            "10": 10.82981,
-            "11": 10.8895,
-            "12": 10.8751,
-            "13": 10.87423,
+            "6": 10.88852,
+            "7": 10.86536,
+            "8": 10.86015,
+            "9": 10.85991,
+            "10": 10.82982,
+            "11": 10.88947,
+            "12": 10.87511,
+            "13": 10.87422,
             "14": 10.89675,
-            "15": 10.82054,
-            "16": 10.82504,
+            "15": 10.82056,
+            "16": 10.82497,
             "17": 10.78983,
             "18": 10.81029,
-            "19": 10.80535,
-            "20": 10.70398,
-            "21": 10.66993,
-            "22": 10.50643,
-            "23": 10.69004,
-            "24": 10.56314,
-            "25": 10.4942,
-            "26": 10.56628,
-            "27": 10.58025,
+            "19": 10.80528,
+            "20": 10.70396,
+            "21": 10.6699,
+            "22": 10.50641,
+            "23": 10.69006,
+            "24": 10.56312,
+            "25": 10.49418,
+            "26": 10.56627,
+            "27": 10.58023,
             "28": 10.51571,
-            "29": 10.55299,
-            "30": 10.30549,
-            "31": 10.02245,
-            "32": 10.40614,
+            "29": 10.55296,
+            "30": 10.30551,
+            "31": 10.02244,
+            "32": 10.40618,
             "33": 10.39874,
-            "34": 10.13771,
+            "34": 10.1377,
             "35": 10.20184,
-            "36": 10.16052,
-            "37": 10.28973,
-            "38": 10.11474,
+            "36": 10.1605,
+            "37": 10.28975,
+            "38": 10.11483,
             "39": 10.361,
-            "40": 10.01903,
+            "40": 10.01905,
             "41": 10.07292,
-            "42": 10.14698,
-            "43": 9.74687,
-            "44": 9.87766,
-            "45": 9.74966,
-            "46": 9.73383,
-            "47": 10.07535,
-            "48": 9.78068,
-            "49": 9.44784,
+            "42": 10.14697,
+            "43": 9.74684,
+            "44": 9.87763,
+            "45": 9.74962,
+            "46": 9.73382,
+            "47": 10.07536,
+            "48": 9.78071,
+            "49": 9.44783,
             "50": 9.8399
         }
     },
@@ -61,56 +61,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 653.0,
-            "2": 642.0,
-            "3": 630.0,
-            "4": 585.0,
-            "5": 635.0,
-            "6": 687.0,
-            "7": 615.0,
-            "8": 601.0,
-            "9": 607.0,
-            "10": 522.0,
-            "11": 637.0,
-            "12": 675.0,
-            "13": 649.0,
-            "14": 648.0,
-            "15": 640.0,
-            "16": 602.0,
-            "17": 668.0,
-            "18": 634.0,
-            "19": 593.0,
-            "20": 579.0,
-            "21": 633.0,
-            "22": 597.0,
-            "23": 756.0,
-            "24": 612.0,
-            "25": 591.0,
-            "26": 620.0,
-            "27": 700.0,
-            "28": 705.0,
-            "29": 795.0,
-            "30": 752.0,
-            "31": 628.0,
-            "32": 712.0,
-            "33": 752.0,
-            "34": 737.0,
-            "35": 741.0,
-            "36": 770.0,
-            "37": 861.0,
-            "38": 823.0,
-            "39": 812.0,
-            "40": 814.0,
-            "41": 826.0,
-            "42": 801.0,
-            "43": 769.0,
-            "44": 822.0,
-            "45": 777.0,
-            "46": 828.0,
-            "47": 878.0,
-            "48": 915.0,
-            "49": 908.0,
-            "50": 848.0
+            "1": 572.0,
+            "2": 656.0,
+            "3": 649.0,
+            "4": 631.0,
+            "5": 658.0,
+            "6": 636.0,
+            "7": 636.0,
+            "8": 542.0,
+            "9": 653.0,
+            "10": 551.0,
+            "11": 681.0,
+            "12": 642.0,
+            "13": 624.0,
+            "14": 658.0,
+            "15": 682.0,
+            "16": 659.0,
+            "17": 620.0,
+            "18": 603.0,
+            "19": 634.0,
+            "20": 639.0,
+            "21": 634.0,
+            "22": 602.0,
+            "23": 731.0,
+            "24": 620.0,
+            "25": 611.0,
+            "26": 626.0,
+            "27": 683.0,
+            "28": 668.0,
+            "29": 713.0,
+            "30": 712.0,
+            "31": 616.0,
+            "32": 786.0,
+            "33": 800.0,
+            "34": 702.0,
+            "35": 684.0,
+            "36": 664.0,
+            "37": 831.0,
+            "38": 802.0,
+            "39": 919.0,
+            "40": 802.0,
+            "41": 791.0,
+            "42": 840.0,
+            "43": 718.0,
+            "44": 756.0,
+            "45": 765.0,
+            "46": 809.0,
+            "47": 839.0,
+            "48": 827.0,
+            "49": 935.0,
+            "50": 839.0
         }
     },
     "mem-allocated-bytes": {
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 510689792.0,
-            "2": 510689792.0,
-            "3": 510689792.0,
-            "4": 510689792.0,
-            "5": 510689792.0,
-            "6": 510689792.0,
-            "7": 510689792.0,
-            "8": 510689792.0,
-            "9": 510689792.0,
-            "10": 510689792.0,
-            "11": 510689792.0,
-            "12": 510689792.0,
-            "13": 510689792.0,
-            "14": 510689792.0,
-            "15": 510689792.0,
-            "16": 510689792.0,
-            "17": 510689792.0,
-            "18": 510689792.0,
-            "19": 510689792.0,
-            "20": 510689792.0,
-            "21": 510689792.0,
-            "22": 510689792.0,
-            "23": 510689792.0,
-            "24": 510689792.0,
-            "25": 510689792.0,
-            "26": 510689792.0,
-            "27": 510689792.0,
-            "28": 510689792.0,
-            "29": 510689792.0,
-            "30": 510689792.0,
-            "31": 510689792.0,
-            "32": 510689792.0,
-            "33": 510689792.0,
-            "34": 510689792.0,
-            "35": 510689792.0,
-            "36": 510689792.0,
-            "37": 510689792.0,
-            "38": 510689792.0,
-            "39": 510689792.0,
-            "40": 510689792.0,
-            "41": 510689792.0,
-            "42": 510689792.0,
-            "43": 510689792.0,
-            "44": 510689792.0,
-            "45": 510689792.0,
-            "46": 510689792.0,
-            "47": 510689792.0,
-            "48": 510689792.0,
-            "49": 510689792.0,
-            "50": 510689792.0
+            "1": 511214080.0,
+            "2": 511214080.0,
+            "3": 511214080.0,
+            "4": 511214080.0,
+            "5": 511214080.0,
+            "6": 511214080.0,
+            "7": 511214080.0,
+            "8": 511214080.0,
+            "9": 511214080.0,
+            "10": 511214080.0,
+            "11": 511214080.0,
+            "12": 511214080.0,
+            "13": 511214080.0,
+            "14": 511214080.0,
+            "15": 511214080.0,
+            "16": 511214080.0,
+            "17": 511214080.0,
+            "18": 511214080.0,
+            "19": 511214080.0,
+            "20": 511214080.0,
+            "21": 511214080.0,
+            "22": 511214080.0,
+            "23": 511214080.0,
+            "24": 511214080.0,
+            "25": 511214080.0,
+            "26": 511214080.0,
+            "27": 511214080.0,
+            "28": 511214080.0,
+            "29": 511214080.0,
+            "30": 511214080.0,
+            "31": 511214080.0,
+            "32": 511214080.0,
+            "33": 511214080.0,
+            "34": 511214080.0,
+            "35": 511214080.0,
+            "36": 511214080.0,
+            "37": 511214080.0,
+            "38": 511214080.0,
+            "39": 511214080.0,
+            "40": 511214080.0,
+            "41": 511214080.0,
+            "42": 511214080.0,
+            "43": 511214080.0,
+            "44": 511214080.0,
+            "45": 511214080.0,
+            "46": 511214080.0,
+            "47": 511214080.0,
+            "48": 511214080.0,
+            "49": 511214080.0,
+            "50": 511214080.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 759898624.0,
-            "2": 933156352.0,
-            "3": 933156352.0,
-            "4": 933156352.0,
-            "5": 933156352.0,
-            "6": 933156352.0,
-            "7": 933156352.0,
-            "8": 933156352.0,
-            "9": 933156352.0,
-            "10": 933156352.0,
-            "11": 933156352.0,
-            "12": 933156352.0,
-            "13": 933156352.0,
-            "14": 933156352.0,
-            "15": 933156352.0,
-            "16": 933156352.0,
-            "17": 933156352.0,
-            "18": 933156352.0,
-            "19": 933156352.0,
-            "20": 933156352.0,
-            "21": 933156352.0,
-            "22": 933156352.0,
-            "23": 933156352.0,
-            "24": 933156352.0,
-            "25": 933156352.0,
-            "26": 933156352.0,
-            "27": 933156352.0,
-            "28": 933156352.0,
-            "29": 933156352.0,
-            "30": 933156352.0,
-            "31": 933156352.0,
-            "32": 933156352.0,
-            "33": 933156352.0,
-            "34": 933156352.0,
-            "35": 933156352.0,
-            "36": 933156352.0,
-            "37": 933156352.0,
-            "38": 933156352.0,
-            "39": 933156352.0,
-            "40": 933156352.0,
-            "41": 933156352.0,
-            "42": 933156352.0,
-            "43": 933156352.0,
-            "44": 933156352.0,
-            "45": 933156352.0,
-            "46": 933156352.0,
-            "47": 933156352.0,
-            "48": 933156352.0,
-            "49": 933156352.0,
-            "50": 933156352.0
+            "1": 756753920.0,
+            "2": 935776768.0,
+            "3": 935777792.0,
+            "4": 935777792.0,
+            "5": 935777792.0,
+            "6": 935777792.0,
+            "7": 935777792.0,
+            "8": 935777792.0,
+            "9": 935777792.0,
+            "10": 935777792.0,
+            "11": 935777792.0,
+            "12": 935777792.0,
+            "13": 935777792.0,
+            "14": 935777792.0,
+            "15": 935777792.0,
+            "16": 935777792.0,
+            "17": 935777792.0,
+            "18": 935777792.0,
+            "19": 935777792.0,
+            "20": 935777792.0,
+            "21": 935777792.0,
+            "22": 935777792.0,
+            "23": 935777792.0,
+            "24": 935777792.0,
+            "25": 935777792.0,
+            "26": 935777792.0,
+            "27": 935777792.0,
+            "28": 935777792.0,
+            "29": 935777792.0,
+            "30": 935777792.0,
+            "31": 935777792.0,
+            "32": 935777792.0,
+            "33": 935777792.0,
+            "34": 935777792.0,
+            "35": 935777792.0,
+            "36": 935777792.0,
+            "37": 935777792.0,
+            "38": 935777792.0,
+            "39": 935777792.0,
+            "40": 935777792.0,
+            "41": 935777792.0,
+            "42": 935777792.0,
+            "43": 935777792.0,
+            "44": 935777792.0,
+            "45": 935777792.0,
+            "46": 935777792.0,
+            "47": 935777792.0,
+            "48": 935777792.0,
+            "49": 935777792.0,
+            "50": 935777792.0
         }
     },
     "iteration-time": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 16.72434,
-            "2": 0.40342,
-            "3": 0.32477,
-            "4": 0.32459,
-            "5": 0.32511,
-            "6": 0.32478,
-            "7": 0.32469,
-            "8": 0.32479,
-            "9": 0.32229,
-            "10": 0.32534,
-            "11": 0.32568,
-            "12": 0.32325,
-            "13": 0.3234,
-            "14": 0.32735,
-            "15": 0.32264,
-            "16": 0.32664,
-            "17": 0.32289,
-            "18": 0.32328,
-            "19": 0.32997,
-            "20": 0.32955,
-            "21": 0.32699,
-            "22": 0.3292,
-            "23": 0.32982,
-            "24": 0.32452,
-            "25": 0.32644,
-            "26": 0.32596,
-            "27": 0.32426,
-            "28": 0.32527,
-            "29": 0.32409,
-            "30": 0.32549,
-            "31": 0.32259,
-            "32": 0.32488,
-            "33": 0.32331,
-            "34": 0.3242,
-            "35": 0.3261,
-            "36": 0.32048,
-            "37": 0.32127,
-            "38": 0.32479,
-            "39": 0.32338,
-            "40": 0.32137,
-            "41": 0.32292,
-            "42": 0.32202,
-            "43": 0.32321,
-            "44": 0.32105,
-            "45": 0.32265,
-            "46": 0.32148,
-            "47": 0.32443,
-            "48": 0.32158,
-            "49": 0.32089,
-            "50": 0.32389
+            "1": 44.927,
+            "2": 0.34811,
+            "3": 0.31209,
+            "4": 0.29049,
+            "5": 0.28904,
+            "6": 0.28728,
+            "7": 0.28884,
+            "8": 0.29393,
+            "9": 0.28153,
+            "10": 0.28717,
+            "11": 0.28861,
+            "12": 0.29265,
+            "13": 0.29015,
+            "14": 0.29189,
+            "15": 0.29081,
+            "16": 0.29742,
+            "17": 0.29933,
+            "18": 0.29528,
+            "19": 0.29058,
+            "20": 0.29304,
+            "21": 0.29307,
+            "22": 0.29297,
+            "23": 0.2889,
+            "24": 0.29028,
+            "25": 0.29626,
+            "26": 0.29321,
+            "27": 0.29347,
+            "28": 0.29303,
+            "29": 0.2812,
+            "30": 0.28971,
+            "31": 0.28878,
+            "32": 0.28499,
+            "33": 0.28119,
+            "34": 0.27908,
+            "35": 0.28101,
+            "36": 0.2794,
+            "37": 0.2798,
+            "38": 0.27799,
+            "39": 0.28519,
+            "40": 0.28246,
+            "41": 0.28126,
+            "42": 0.28572,
+            "43": 0.28647,
+            "44": 0.28772,
+            "45": 0.28736,
+            "46": 0.29677,
+            "47": 0.29247,
+            "48": 0.29174,
+            "49": 0.29182,
+            "50": 0.29085
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last/golden_values_dev_dgx_h100.json
index f64661824cb..84e2331d673 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last/golden_values_dev_dgx_h100.json
@@ -4,55 +4,55 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 10.86535,
-            "2": 10.85873,
-            "3": 10.86284,
-            "4": 10.84009,
+            "1": 10.86539,
+            "2": 10.85871,
+            "3": 10.86282,
+            "4": 10.84007,
             "5": 10.87856,
-            "6": 10.88856,
-            "7": 10.86532,
-            "8": 10.86017,
-            "9": 10.8599,
-            "10": 10.82981,
-            "11": 10.8895,
-            "12": 10.8751,
-            "13": 10.87423,
+            "6": 10.88852,
+            "7": 10.86536,
+            "8": 10.86015,
+            "9": 10.85991,
+            "10": 10.82982,
+            "11": 10.88947,
+            "12": 10.87511,
+            "13": 10.87422,
             "14": 10.89675,
-            "15": 10.82054,
-            "16": 10.82504,
+            "15": 10.82056,
+            "16": 10.82497,
             "17": 10.78983,
             "18": 10.81029,
-            "19": 10.80535,
-            "20": 10.70398,
-            "21": 10.66993,
-            "22": 10.50643,
-            "23": 10.69004,
-            "24": 10.56314,
-            "25": 10.4942,
-            "26": 10.56628,
-            "27": 10.58025,
+            "19": 10.80528,
+            "20": 10.70396,
+            "21": 10.6699,
+            "22": 10.50641,
+            "23": 10.69006,
+            "24": 10.56312,
+            "25": 10.49418,
+            "26": 10.56627,
+            "27": 10.58023,
             "28": 10.51571,
-            "29": 10.55299,
-            "30": 10.30549,
-            "31": 10.02245,
-            "32": 10.40614,
+            "29": 10.55296,
+            "30": 10.30551,
+            "31": 10.02244,
+            "32": 10.40618,
             "33": 10.39874,
-            "34": 10.13771,
+            "34": 10.1377,
             "35": 10.20184,
-            "36": 10.16052,
-            "37": 10.28973,
-            "38": 10.11474,
+            "36": 10.1605,
+            "37": 10.28975,
+            "38": 10.11483,
             "39": 10.361,
-            "40": 10.01903,
+            "40": 10.01905,
             "41": 10.07292,
-            "42": 10.14698,
-            "43": 9.74687,
-            "44": 9.87766,
-            "45": 9.74966,
-            "46": 9.73383,
-            "47": 10.07535,
-            "48": 9.78068,
-            "49": 9.44784,
+            "42": 10.14697,
+            "43": 9.74684,
+            "44": 9.87763,
+            "45": 9.74962,
+            "46": 9.73382,
+            "47": 10.07536,
+            "48": 9.78071,
+            "49": 9.44783,
             "50": 9.8399
         }
     },
@@ -61,56 +61,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 653.0,
-            "2": 642.0,
-            "3": 630.0,
-            "4": 585.0,
-            "5": 635.0,
-            "6": 687.0,
-            "7": 615.0,
-            "8": 601.0,
-            "9": 607.0,
-            "10": 522.0,
-            "11": 637.0,
-            "12": 675.0,
-            "13": 649.0,
-            "14": 648.0,
-            "15": 640.0,
-            "16": 602.0,
-            "17": 668.0,
-            "18": 634.0,
-            "19": 593.0,
-            "20": 579.0,
-            "21": 633.0,
-            "22": 597.0,
-            "23": 756.0,
-            "24": 612.0,
-            "25": 591.0,
-            "26": 620.0,
-            "27": 700.0,
-            "28": 705.0,
-            "29": 795.0,
-            "30": 752.0,
-            "31": 628.0,
-            "32": 712.0,
-            "33": 752.0,
-            "34": 737.0,
-            "35": 741.0,
-            "36": 770.0,
-            "37": 861.0,
-            "38": 823.0,
-            "39": 812.0,
-            "40": 814.0,
-            "41": 826.0,
-            "42": 801.0,
-            "43": 769.0,
-            "44": 822.0,
-            "45": 777.0,
-            "46": 828.0,
-            "47": 878.0,
-            "48": 915.0,
-            "49": 908.0,
-            "50": 848.0
+            "1": 572.0,
+            "2": 656.0,
+            "3": 649.0,
+            "4": 631.0,
+            "5": 658.0,
+            "6": 636.0,
+            "7": 636.0,
+            "8": 542.0,
+            "9": 653.0,
+            "10": 551.0,
+            "11": 681.0,
+            "12": 642.0,
+            "13": 624.0,
+            "14": 658.0,
+            "15": 682.0,
+            "16": 659.0,
+            "17": 620.0,
+            "18": 603.0,
+            "19": 634.0,
+            "20": 639.0,
+            "21": 634.0,
+            "22": 602.0,
+            "23": 731.0,
+            "24": 620.0,
+            "25": 611.0,
+            "26": 626.0,
+            "27": 683.0,
+            "28": 668.0,
+            "29": 713.0,
+            "30": 712.0,
+            "31": 616.0,
+            "32": 786.0,
+            "33": 800.0,
+            "34": 702.0,
+            "35": 684.0,
+            "36": 664.0,
+            "37": 831.0,
+            "38": 802.0,
+            "39": 919.0,
+            "40": 802.0,
+            "41": 791.0,
+            "42": 840.0,
+            "43": 718.0,
+            "44": 756.0,
+            "45": 765.0,
+            "46": 809.0,
+            "47": 839.0,
+            "48": 827.0,
+            "49": 935.0,
+            "50": 839.0
         }
     },
     "mem-allocated-bytes": {
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 510689792.0,
-            "2": 510689792.0,
-            "3": 510689792.0,
-            "4": 510689792.0,
-            "5": 510689792.0,
-            "6": 510689792.0,
-            "7": 510689792.0,
-            "8": 510689792.0,
-            "9": 510689792.0,
-            "10": 510689792.0,
-            "11": 510689792.0,
-            "12": 510689792.0,
-            "13": 510689792.0,
-            "14": 510689792.0,
-            "15": 510689792.0,
-            "16": 510689792.0,
-            "17": 510689792.0,
-            "18": 510689792.0,
-            "19": 510689792.0,
-            "20": 510689792.0,
-            "21": 510689792.0,
-            "22": 510689792.0,
-            "23": 510689792.0,
-            "24": 510689792.0,
-            "25": 510689792.0,
-            "26": 510689792.0,
-            "27": 510689792.0,
-            "28": 510689792.0,
-            "29": 510689792.0,
-            "30": 510689792.0,
-            "31": 510689792.0,
-            "32": 510689792.0,
-            "33": 510689792.0,
-            "34": 510689792.0,
-            "35": 510689792.0,
-            "36": 510689792.0,
-            "37": 510689792.0,
-            "38": 510689792.0,
-            "39": 510689792.0,
-            "40": 510689792.0,
-            "41": 510689792.0,
-            "42": 510689792.0,
-            "43": 510689792.0,
-            "44": 510689792.0,
-            "45": 510689792.0,
-            "46": 510689792.0,
-            "47": 510689792.0,
-            "48": 510689792.0,
-            "49": 510689792.0,
-            "50": 510689792.0
+            "1": 511214080.0,
+            "2": 511214080.0,
+            "3": 511214080.0,
+            "4": 511214080.0,
+            "5": 511214080.0,
+            "6": 511214080.0,
+            "7": 511214080.0,
+            "8": 511214080.0,
+            "9": 511214080.0,
+            "10": 511214080.0,
+            "11": 511214080.0,
+            "12": 511214080.0,
+            "13": 511214080.0,
+            "14": 511214080.0,
+            "15": 511214080.0,
+            "16": 511214080.0,
+            "17": 511214080.0,
+            "18": 511214080.0,
+            "19": 511214080.0,
+            "20": 511214080.0,
+            "21": 511214080.0,
+            "22": 511214080.0,
+            "23": 511214080.0,
+            "24": 511214080.0,
+            "25": 511214080.0,
+            "26": 511214080.0,
+            "27": 511214080.0,
+            "28": 511214080.0,
+            "29": 511214080.0,
+            "30": 511214080.0,
+            "31": 511214080.0,
+            "32": 511214080.0,
+            "33": 511214080.0,
+            "34": 511214080.0,
+            "35": 511214080.0,
+            "36": 511214080.0,
+            "37": 511214080.0,
+            "38": 511214080.0,
+            "39": 511214080.0,
+            "40": 511214080.0,
+            "41": 511214080.0,
+            "42": 511214080.0,
+            "43": 511214080.0,
+            "44": 511214080.0,
+            "45": 511214080.0,
+            "46": 511214080.0,
+            "47": 511214080.0,
+            "48": 511214080.0,
+            "49": 511214080.0,
+            "50": 511214080.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 759898624.0,
-            "2": 933156352.0,
-            "3": 933156352.0,
-            "4": 933156352.0,
-            "5": 933156352.0,
-            "6": 933156352.0,
-            "7": 933156352.0,
-            "8": 933156352.0,
-            "9": 933156352.0,
-            "10": 933156352.0,
-            "11": 933156352.0,
-            "12": 933156352.0,
-            "13": 933156352.0,
-            "14": 933156352.0,
-            "15": 933156352.0,
-            "16": 933156352.0,
-            "17": 933156352.0,
-            "18": 933156352.0,
-            "19": 933156352.0,
-            "20": 933156352.0,
-            "21": 933156352.0,
-            "22": 933156352.0,
-            "23": 933156352.0,
-            "24": 933156352.0,
-            "25": 933156352.0,
-            "26": 933156352.0,
-            "27": 933156352.0,
-            "28": 933156352.0,
-            "29": 933156352.0,
-            "30": 933156352.0,
-            "31": 933156352.0,
-            "32": 933156352.0,
-            "33": 933156352.0,
-            "34": 933156352.0,
-            "35": 933156352.0,
-            "36": 933156352.0,
-            "37": 933156352.0,
-            "38": 933156352.0,
-            "39": 933156352.0,
-            "40": 933156352.0,
-            "41": 933156352.0,
-            "42": 933156352.0,
-            "43": 933156352.0,
-            "44": 933156352.0,
-            "45": 933156352.0,
-            "46": 933156352.0,
-            "47": 933156352.0,
-            "48": 933156352.0,
-            "49": 933156352.0,
-            "50": 933156352.0
+            "1": 759899136.0,
+            "2": 936824320.0,
+            "3": 936824832.0,
+            "4": 936824832.0,
+            "5": 936824832.0,
+            "6": 936824832.0,
+            "7": 936824832.0,
+            "8": 936824832.0,
+            "9": 936824832.0,
+            "10": 936824832.0,
+            "11": 936824832.0,
+            "12": 936824832.0,
+            "13": 936824832.0,
+            "14": 936824832.0,
+            "15": 936824832.0,
+            "16": 936824832.0,
+            "17": 936824832.0,
+            "18": 936824832.0,
+            "19": 936824832.0,
+            "20": 936824832.0,
+            "21": 936824832.0,
+            "22": 936824832.0,
+            "23": 936824832.0,
+            "24": 936824832.0,
+            "25": 936824832.0,
+            "26": 936824832.0,
+            "27": 936824832.0,
+            "28": 936824832.0,
+            "29": 936824832.0,
+            "30": 936824832.0,
+            "31": 936824832.0,
+            "32": 936824832.0,
+            "33": 936824832.0,
+            "34": 936824832.0,
+            "35": 936824832.0,
+            "36": 936824832.0,
+            "37": 936824832.0,
+            "38": 936824832.0,
+            "39": 936824832.0,
+            "40": 936824832.0,
+            "41": 936824832.0,
+            "42": 936824832.0,
+            "43": 936824832.0,
+            "44": 936824832.0,
+            "45": 936824832.0,
+            "46": 936824832.0,
+            "47": 936824832.0,
+            "48": 936824832.0,
+            "49": 936824832.0,
+            "50": 936824832.0
         }
     },
     "iteration-time": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 18.71096,
-            "2": 0.39649,
-            "3": 0.33228,
-            "4": 0.33042,
-            "5": 0.33036,
-            "6": 0.3326,
-            "7": 0.33962,
-            "8": 0.37041,
-            "9": 0.33077,
-            "10": 0.33179,
-            "11": 0.33053,
-            "12": 0.33332,
-            "13": 0.33149,
-            "14": 0.32928,
-            "15": 0.33252,
-            "16": 0.3321,
-            "17": 0.32661,
-            "18": 0.32933,
-            "19": 0.32718,
-            "20": 0.32982,
-            "21": 0.32827,
-            "22": 0.3313,
-            "23": 0.32836,
-            "24": 0.3287,
-            "25": 0.33025,
-            "26": 0.32605,
-            "27": 0.33501,
-            "28": 0.32889,
-            "29": 0.32971,
-            "30": 0.3318,
-            "31": 0.33458,
-            "32": 0.33222,
-            "33": 0.33434,
-            "34": 0.3337,
-            "35": 0.33221,
-            "36": 0.32984,
-            "37": 0.32779,
-            "38": 0.33131,
-            "39": 0.33056,
-            "40": 0.32941,
-            "41": 0.32351,
-            "42": 0.32946,
-            "43": 0.32913,
-            "44": 0.3283,
-            "45": 0.32845,
-            "46": 0.32474,
-            "47": 0.33097,
-            "48": 0.32791,
-            "49": 0.33143,
-            "50": 0.33005
+            "1": 45.68343,
+            "2": 0.392,
+            "3": 0.35818,
+            "4": 0.28793,
+            "5": 0.28609,
+            "6": 0.28869,
+            "7": 0.28726,
+            "8": 0.28725,
+            "9": 0.28787,
+            "10": 0.2834,
+            "11": 0.28813,
+            "12": 0.28685,
+            "13": 0.28453,
+            "14": 0.28421,
+            "15": 0.28504,
+            "16": 0.28118,
+            "17": 0.28123,
+            "18": 0.28302,
+            "19": 0.28937,
+            "20": 0.28486,
+            "21": 0.28762,
+            "22": 0.28121,
+            "23": 0.28289,
+            "24": 0.28379,
+            "25": 0.28305,
+            "26": 0.28337,
+            "27": 0.28236,
+            "28": 0.28063,
+            "29": 0.27814,
+            "30": 0.2808,
+            "31": 0.27908,
+            "32": 0.28085,
+            "33": 0.28065,
+            "34": 0.28226,
+            "35": 0.28009,
+            "36": 0.2802,
+            "37": 0.28283,
+            "38": 0.27963,
+            "39": 0.28465,
+            "40": 0.28297,
+            "41": 0.28176,
+            "42": 0.28166,
+            "43": 0.2805,
+            "44": 0.28385,
+            "45": 0.28053,
+            "46": 0.27883,
+            "47": 0.28037,
+            "48": 0.28067,
+            "49": 0.27929,
+            "50": 0.27864
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_dp_last/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_dp_last/golden_values_dev_dgx_h100.json
index cd45ff021d9..e8b9cea88e0 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_dp_last/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_dp_last/golden_values_dev_dgx_h100.json
@@ -4,55 +4,55 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 10.86535,
-            "2": 10.85873,
-            "3": 10.86284,
-            "4": 10.84009,
+            "1": 10.86539,
+            "2": 10.85871,
+            "3": 10.86282,
+            "4": 10.84007,
             "5": 10.87856,
-            "6": 10.88856,
-            "7": 10.86532,
-            "8": 10.86017,
-            "9": 10.8599,
-            "10": 10.82981,
-            "11": 10.8895,
-            "12": 10.8751,
-            "13": 10.87423,
+            "6": 10.88852,
+            "7": 10.86536,
+            "8": 10.86015,
+            "9": 10.85991,
+            "10": 10.82982,
+            "11": 10.88947,
+            "12": 10.87511,
+            "13": 10.87422,
             "14": 10.89675,
-            "15": 10.82054,
-            "16": 10.82504,
+            "15": 10.82056,
+            "16": 10.82497,
             "17": 10.78983,
             "18": 10.81029,
-            "19": 10.80535,
-            "20": 10.70398,
-            "21": 10.66993,
-            "22": 10.50643,
-            "23": 10.69004,
-            "24": 10.56314,
-            "25": 10.4942,
-            "26": 10.56628,
-            "27": 10.58025,
+            "19": 10.80528,
+            "20": 10.70396,
+            "21": 10.6699,
+            "22": 10.50641,
+            "23": 10.69006,
+            "24": 10.56312,
+            "25": 10.49418,
+            "26": 10.56627,
+            "27": 10.58023,
             "28": 10.51571,
-            "29": 10.55299,
-            "30": 10.30549,
-            "31": 10.02245,
-            "32": 10.40614,
+            "29": 10.55296,
+            "30": 10.30551,
+            "31": 10.02244,
+            "32": 10.40618,
             "33": 10.39874,
-            "34": 10.13771,
+            "34": 10.1377,
             "35": 10.20184,
-            "36": 10.16052,
-            "37": 10.28973,
-            "38": 10.11474,
+            "36": 10.1605,
+            "37": 10.28975,
+            "38": 10.11483,
             "39": 10.361,
-            "40": 10.01903,
+            "40": 10.01905,
             "41": 10.07292,
-            "42": 10.14698,
-            "43": 9.74687,
-            "44": 9.87766,
-            "45": 9.74966,
-            "46": 9.73383,
-            "47": 10.07535,
-            "48": 9.78068,
-            "49": 9.44784,
+            "42": 10.14697,
+            "43": 9.74684,
+            "44": 9.87763,
+            "45": 9.74962,
+            "46": 9.73382,
+            "47": 10.07536,
+            "48": 9.78071,
+            "49": 9.44783,
             "50": 9.8399
         }
     },
@@ -61,56 +61,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 653.0,
-            "2": 642.0,
-            "3": 630.0,
-            "4": 585.0,
-            "5": 635.0,
-            "6": 687.0,
-            "7": 615.0,
-            "8": 601.0,
-            "9": 607.0,
-            "10": 522.0,
-            "11": 637.0,
-            "12": 675.0,
-            "13": 649.0,
-            "14": 648.0,
-            "15": 640.0,
-            "16": 602.0,
-            "17": 668.0,
-            "18": 634.0,
-            "19": 593.0,
-            "20": 579.0,
-            "21": 633.0,
-            "22": 597.0,
-            "23": 756.0,
-            "24": 612.0,
-            "25": 591.0,
-            "26": 620.0,
-            "27": 700.0,
-            "28": 705.0,
-            "29": 795.0,
-            "30": 752.0,
-            "31": 628.0,
-            "32": 712.0,
-            "33": 752.0,
-            "34": 737.0,
-            "35": 741.0,
-            "36": 770.0,
-            "37": 861.0,
-            "38": 823.0,
-            "39": 812.0,
-            "40": 814.0,
-            "41": 826.0,
-            "42": 801.0,
-            "43": 769.0,
-            "44": 822.0,
-            "45": 777.0,
-            "46": 828.0,
-            "47": 878.0,
-            "48": 915.0,
-            "49": 908.0,
-            "50": 848.0
+            "1": 572.0,
+            "2": 656.0,
+            "3": 649.0,
+            "4": 631.0,
+            "5": 658.0,
+            "6": 636.0,
+            "7": 636.0,
+            "8": 542.0,
+            "9": 653.0,
+            "10": 551.0,
+            "11": 681.0,
+            "12": 642.0,
+            "13": 624.0,
+            "14": 658.0,
+            "15": 682.0,
+            "16": 659.0,
+            "17": 620.0,
+            "18": 603.0,
+            "19": 634.0,
+            "20": 639.0,
+            "21": 634.0,
+            "22": 602.0,
+            "23": 731.0,
+            "24": 620.0,
+            "25": 611.0,
+            "26": 626.0,
+            "27": 683.0,
+            "28": 668.0,
+            "29": 713.0,
+            "30": 712.0,
+            "31": 616.0,
+            "32": 786.0,
+            "33": 800.0,
+            "34": 702.0,
+            "35": 684.0,
+            "36": 664.0,
+            "37": 831.0,
+            "38": 802.0,
+            "39": 919.0,
+            "40": 802.0,
+            "41": 791.0,
+            "42": 840.0,
+            "43": 718.0,
+            "44": 756.0,
+            "45": 765.0,
+            "46": 809.0,
+            "47": 839.0,
+            "48": 827.0,
+            "49": 935.0,
+            "50": 839.0
         }
     },
     "mem-allocated-bytes": {
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 510689792.0,
-            "2": 510689792.0,
-            "3": 510689792.0,
-            "4": 510689792.0,
-            "5": 510689792.0,
-            "6": 510689792.0,
-            "7": 510689792.0,
-            "8": 510689792.0,
-            "9": 510689792.0,
-            "10": 510689792.0,
-            "11": 510689792.0,
-            "12": 510689792.0,
-            "13": 510689792.0,
-            "14": 510689792.0,
-            "15": 510689792.0,
-            "16": 510689792.0,
-            "17": 510689792.0,
-            "18": 510689792.0,
-            "19": 510689792.0,
-            "20": 510689792.0,
-            "21": 510689792.0,
-            "22": 510689792.0,
-            "23": 510689792.0,
-            "24": 510689792.0,
-            "25": 510689792.0,
-            "26": 510689792.0,
-            "27": 510689792.0,
-            "28": 510689792.0,
-            "29": 510689792.0,
-            "30": 510689792.0,
-            "31": 510689792.0,
-            "32": 510689792.0,
-            "33": 510689792.0,
-            "34": 510689792.0,
-            "35": 510689792.0,
-            "36": 510689792.0,
-            "37": 510689792.0,
-            "38": 510689792.0,
-            "39": 510689792.0,
-            "40": 510689792.0,
-            "41": 510689792.0,
-            "42": 510689792.0,
-            "43": 510689792.0,
-            "44": 510689792.0,
-            "45": 510689792.0,
-            "46": 510689792.0,
-            "47": 510689792.0,
-            "48": 510689792.0,
-            "49": 510689792.0,
-            "50": 510689792.0
+            "1": 511214080.0,
+            "2": 511214080.0,
+            "3": 511214080.0,
+            "4": 511214080.0,
+            "5": 511214080.0,
+            "6": 511214080.0,
+            "7": 511214080.0,
+            "8": 511214080.0,
+            "9": 511214080.0,
+            "10": 511214080.0,
+            "11": 511214080.0,
+            "12": 511214080.0,
+            "13": 511214080.0,
+            "14": 511214080.0,
+            "15": 511214080.0,
+            "16": 511214080.0,
+            "17": 511214080.0,
+            "18": 511214080.0,
+            "19": 511214080.0,
+            "20": 511214080.0,
+            "21": 511214080.0,
+            "22": 511214080.0,
+            "23": 511214080.0,
+            "24": 511214080.0,
+            "25": 511214080.0,
+            "26": 511214080.0,
+            "27": 511214080.0,
+            "28": 511214080.0,
+            "29": 511214080.0,
+            "30": 511214080.0,
+            "31": 511214080.0,
+            "32": 511214080.0,
+            "33": 511214080.0,
+            "34": 511214080.0,
+            "35": 511214080.0,
+            "36": 511214080.0,
+            "37": 511214080.0,
+            "38": 511214080.0,
+            "39": 511214080.0,
+            "40": 511214080.0,
+            "41": 511214080.0,
+            "42": 511214080.0,
+            "43": 511214080.0,
+            "44": 511214080.0,
+            "45": 511214080.0,
+            "46": 511214080.0,
+            "47": 511214080.0,
+            "48": 511214080.0,
+            "49": 511214080.0,
+            "50": 511214080.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 759895552.0,
-            "2": 933156352.0,
-            "3": 933156352.0,
-            "4": 933156352.0,
-            "5": 933156352.0,
-            "6": 933156352.0,
-            "7": 933156352.0,
-            "8": 933156352.0,
-            "9": 933156352.0,
-            "10": 933156352.0,
-            "11": 933156352.0,
-            "12": 933156352.0,
-            "13": 933156352.0,
-            "14": 933156352.0,
-            "15": 933156352.0,
-            "16": 933156352.0,
-            "17": 933156352.0,
-            "18": 933156352.0,
-            "19": 933156352.0,
-            "20": 933156352.0,
-            "21": 933156352.0,
-            "22": 933156352.0,
-            "23": 933156352.0,
-            "24": 933156352.0,
-            "25": 933156352.0,
-            "26": 933156352.0,
-            "27": 933156352.0,
-            "28": 933156352.0,
-            "29": 933156352.0,
-            "30": 933156352.0,
-            "31": 933156352.0,
-            "32": 933156352.0,
-            "33": 934201856.0,
-            "34": 934201856.0,
-            "35": 934201856.0,
-            "36": 934201856.0,
-            "37": 934201856.0,
-            "38": 934201856.0,
-            "39": 934201856.0,
-            "40": 934201856.0,
-            "41": 934201856.0,
-            "42": 934201856.0,
-            "43": 934201856.0,
-            "44": 934201856.0,
-            "45": 934201856.0,
-            "46": 934201856.0,
-            "47": 934201856.0,
-            "48": 934201856.0,
-            "49": 934201856.0,
-            "50": 934201856.0
+            "1": 757801984.0,
+            "2": 935777792.0,
+            "3": 935777792.0,
+            "4": 935777792.0,
+            "5": 935777792.0,
+            "6": 935777792.0,
+            "7": 935777792.0,
+            "8": 935777792.0,
+            "9": 935777792.0,
+            "10": 935777792.0,
+            "11": 935777792.0,
+            "12": 935777792.0,
+            "13": 935777792.0,
+            "14": 935777792.0,
+            "15": 935777792.0,
+            "16": 935777792.0,
+            "17": 935777792.0,
+            "18": 935777792.0,
+            "19": 935777792.0,
+            "20": 935777792.0,
+            "21": 935777792.0,
+            "22": 935777792.0,
+            "23": 935777792.0,
+            "24": 935777792.0,
+            "25": 935777792.0,
+            "26": 935777792.0,
+            "27": 935777792.0,
+            "28": 935777792.0,
+            "29": 935777792.0,
+            "30": 935777792.0,
+            "31": 935777792.0,
+            "32": 935777792.0,
+            "33": 935777792.0,
+            "34": 935777792.0,
+            "35": 935777792.0,
+            "36": 935777792.0,
+            "37": 935777792.0,
+            "38": 935777792.0,
+            "39": 935777792.0,
+            "40": 935777792.0,
+            "41": 935777792.0,
+            "42": 935777792.0,
+            "43": 935777792.0,
+            "44": 935777792.0,
+            "45": 935777792.0,
+            "46": 935777792.0,
+            "47": 935777792.0,
+            "48": 935777792.0,
+            "49": 935777792.0,
+            "50": 935777792.0
         }
     },
     "iteration-time": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 17.72917,
-            "2": 0.36269,
-            "3": 0.33585,
-            "4": 0.33878,
-            "5": 0.33758,
-            "6": 0.33453,
-            "7": 0.33628,
-            "8": 0.33416,
-            "9": 0.33309,
-            "10": 0.33521,
-            "11": 0.33536,
-            "12": 0.33148,
-            "13": 0.33565,
-            "14": 0.33401,
-            "15": 0.33029,
-            "16": 0.33788,
-            "17": 0.33302,
-            "18": 0.33337,
-            "19": 0.33761,
-            "20": 0.33672,
-            "21": 0.33256,
-            "22": 0.3374,
-            "23": 0.33652,
-            "24": 0.33672,
-            "25": 0.33982,
-            "26": 0.3335,
-            "27": 0.3328,
-            "28": 0.33835,
-            "29": 0.33338,
-            "30": 0.33371,
-            "31": 0.33991,
-            "32": 0.33259,
-            "33": 0.33537,
-            "34": 0.33777,
-            "35": 0.33494,
-            "36": 0.33504,
-            "37": 0.33915,
-            "38": 0.33462,
-            "39": 0.33387,
-            "40": 0.33791,
-            "41": 0.33426,
-            "42": 0.33834,
-            "43": 0.33785,
-            "44": 0.32761,
-            "45": 0.32857,
-            "46": 0.33205,
-            "47": 0.3355,
-            "48": 0.33535,
-            "49": 0.33792,
-            "50": 0.33613
+            "1": 44.86787,
+            "2": 0.36349,
+            "3": 0.3142,
+            "4": 0.29456,
+            "5": 0.29609,
+            "6": 0.29566,
+            "7": 0.29467,
+            "8": 0.2899,
+            "9": 0.28864,
+            "10": 0.28994,
+            "11": 0.28355,
+            "12": 0.28608,
+            "13": 0.28278,
+            "14": 0.2823,
+            "15": 0.28087,
+            "16": 0.28237,
+            "17": 0.28556,
+            "18": 0.28363,
+            "19": 0.28381,
+            "20": 0.28356,
+            "21": 0.28235,
+            "22": 0.29036,
+            "23": 0.28491,
+            "24": 0.28322,
+            "25": 0.28412,
+            "26": 0.28352,
+            "27": 0.28643,
+            "28": 0.2853,
+            "29": 0.28809,
+            "30": 0.28258,
+            "31": 0.28114,
+            "32": 0.281,
+            "33": 0.28135,
+            "34": 0.27914,
+            "35": 0.28099,
+            "36": 0.28267,
+            "37": 0.28236,
+            "38": 0.28102,
+            "39": 0.31493,
+            "40": 0.28173,
+            "41": 0.28058,
+            "42": 0.28033,
+            "43": 0.28335,
+            "44": 0.28253,
+            "45": 0.28169,
+            "46": 0.28078,
+            "47": 0.28082,
+            "48": 0.2819,
+            "49": 0.28087,
+            "50": 0.28
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_lts_dgx_a100.json
index cac9c570ec1..9e26dfeeb6e 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_lts_dgx_a100.json
@@ -1,537 +1 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 10.8583,
-            "2": 10.85411,
-            "3": 10.8543,
-            "4": 10.84407,
-            "5": 10.87282,
-            "6": 10.8793,
-            "7": 10.84658,
-            "8": 10.86139,
-            "9": 10.87078,
-            "10": 10.83266,
-            "11": 10.86332,
-            "12": 10.87295,
-            "13": 10.87798,
-            "14": 10.88588,
-            "15": 10.82104,
-            "16": 10.82759,
-            "17": 10.80303,
-            "18": 10.82092,
-            "19": 10.80032,
-            "20": 10.71379,
-            "21": 10.69818,
-            "22": 10.57542,
-            "23": 10.72119,
-            "24": 10.60091,
-            "25": 10.5476,
-            "26": 10.61127,
-            "27": 10.61393,
-            "28": 10.57777,
-            "29": 10.57888,
-            "30": 10.36791,
-            "31": 10.13451,
-            "32": 10.47063,
-            "33": 10.47371,
-            "34": 10.23442,
-            "35": 10.28457,
-            "36": 10.23595,
-            "37": 10.35351,
-            "38": 10.20695,
-            "39": 10.40581,
-            "40": 10.08924,
-            "41": 10.16388,
-            "42": 10.22671,
-            "43": 9.86336,
-            "44": 9.98189,
-            "45": 9.84555,
-            "46": 9.85753,
-            "47": 10.16884,
-            "48": 9.86474,
-            "49": 9.54712,
-            "50": 9.91942,
-            "51": 9.86179,
-            "52": 9.76162,
-            "53": 10.08383,
-            "54": 9.96743,
-            "55": 9.89199,
-            "56": 9.63777,
-            "57": 9.49339,
-            "58": 9.83897,
-            "59": 9.59641,
-            "60": 9.50823,
-            "61": 9.70513,
-            "62": 9.99499,
-            "63": 9.38054,
-            "64": 9.78296,
-            "65": 8.95946,
-            "66": 9.71045,
-            "67": 9.38075,
-            "68": 9.78884,
-            "69": 9.79451,
-            "70": 9.73441,
-            "71": 9.62146,
-            "72": 9.58792,
-            "73": 9.49657,
-            "74": 8.9434,
-            "75": 9.43112,
-            "76": 9.09716,
-            "77": 10.0681,
-            "78": 9.73005,
-            "79": 9.37764,
-            "80": 9.41097,
-            "81": 9.48622,
-            "82": 9.69669,
-            "83": 9.3163,
-            "84": 9.42182,
-            "85": 9.61516,
-            "86": 9.07553,
-            "87": 9.59851,
-            "88": 9.75046,
-            "89": 9.61112,
-            "90": 9.82373,
-            "91": 9.35278,
-            "92": 9.36495,
-            "93": 9.08811,
-            "94": 8.83656,
-            "95": 9.52256,
-            "96": 9.52793,
-            "97": 9.31634,
-            "98": 9.67876,
-            "99": 8.89321,
-            "100": 9.40801
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 1708.0,
-            "2": 1804.0,
-            "3": 1725.0,
-            "4": 1881.0,
-            "5": 2019.0,
-            "6": 2015.0,
-            "7": 2086.0,
-            "8": 1730.0,
-            "9": 2024.0,
-            "10": 1515.0,
-            "11": 2162.0,
-            "12": 1847.0,
-            "13": 2125.0,
-            "14": 2050.0,
-            "15": 1946.0,
-            "16": 2000.0,
-            "17": 1996.0,
-            "18": 1874.0,
-            "19": 2011.0,
-            "20": 1771.0,
-            "21": 2099.0,
-            "22": 1892.0,
-            "23": 2171.0,
-            "24": 1834.0,
-            "25": 1790.0,
-            "26": 1803.0,
-            "27": 1998.0,
-            "28": 2211.0,
-            "29": 2129.0,
-            "30": 2147.0,
-            "31": 1623.0,
-            "32": 2174.0,
-            "33": 2364.0,
-            "34": 2035.0,
-            "35": 2089.0,
-            "36": 2202.0,
-            "37": 2603.0,
-            "38": 2468.0,
-            "39": 2623.0,
-            "40": 2383.0,
-            "41": 2519.0,
-            "42": 2522.0,
-            "43": 2235.0,
-            "44": 2275.0,
-            "45": 2319.0,
-            "46": 2632.0,
-            "47": 2675.0,
-            "48": 2697.0,
-            "49": 2551.0,
-            "50": 2814.0,
-            "51": 2767.0,
-            "52": 2804.0,
-            "53": 3231.0,
-            "54": 2905.0,
-            "55": 2575.0,
-            "56": 3077.0,
-            "57": 2587.0,
-            "58": 3346.0,
-            "59": 3056.0,
-            "60": 2695.0,
-            "61": 3191.0,
-            "62": 2637.0,
-            "63": 2649.0,
-            "64": 3176.0,
-            "65": 2756.0,
-            "66": 3481.0,
-            "67": 2905.0,
-            "68": 3114.0,
-            "69": 3133.0,
-            "70": 3533.0,
-            "71": 3225.0,
-            "72": 2621.0,
-            "73": 3297.0,
-            "74": 2145.0,
-            "75": 2799.0,
-            "76": 3354.0,
-            "77": 3466.0,
-            "78": 3485.0,
-            "79": 3464.0,
-            "80": 3614.0,
-            "81": 4011.0,
-            "82": 3694.0,
-            "83": 3201.0,
-            "84": 3655.0,
-            "85": 3597.0,
-            "86": 3096.0,
-            "87": 4103.0,
-            "88": 3306.0,
-            "89": 3839.0,
-            "90": 3352.0,
-            "91": 2980.0,
-            "92": 3452.0,
-            "93": 2967.0,
-            "94": 3773.0,
-            "95": 3589.0,
-            "96": 3800.0,
-            "97": 3412.0,
-            "98": 3998.0,
-            "99": 3483.0,
-            "100": 3651.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 232422400.0,
-            "2": 232422400.0,
-            "3": 232422400.0,
-            "4": 232422400.0,
-            "5": 232422400.0,
-            "6": 233470976.0,
-            "7": 232422400.0,
-            "8": 233470976.0,
-            "9": 232422400.0,
-            "10": 232422400.0,
-            "11": 232422400.0,
-            "12": 232422400.0,
-            "13": 232422400.0,
-            "14": 233470976.0,
-            "15": 232422400.0,
-            "16": 232422400.0,
-            "17": 232422400.0,
-            "18": 232422400.0,
-            "19": 232422400.0,
-            "20": 232422400.0,
-            "21": 232422400.0,
-            "22": 232422400.0,
-            "23": 232422400.0,
-            "24": 232422400.0,
-            "25": 232422400.0,
-            "26": 232422400.0,
-            "27": 232422400.0,
-            "28": 232422400.0,
-            "29": 232422400.0,
-            "30": 232422400.0,
-            "31": 232422400.0,
-            "32": 232422400.0,
-            "33": 232422400.0,
-            "34": 232422400.0,
-            "35": 232422400.0,
-            "36": 232422400.0,
-            "37": 232422400.0,
-            "38": 232422400.0,
-            "39": 232422400.0,
-            "40": 232422400.0,
-            "41": 232422400.0,
-            "42": 232422400.0,
-            "43": 232422400.0,
-            "44": 232422400.0,
-            "45": 232422400.0,
-            "46": 232422400.0,
-            "47": 232422400.0,
-            "48": 232422400.0,
-            "49": 233470976.0,
-            "50": 232422400.0,
-            "51": 232422400.0,
-            "52": 232422400.0,
-            "53": 232422400.0,
-            "54": 232422400.0,
-            "55": 233470976.0,
-            "56": 232422400.0,
-            "57": 233470976.0,
-            "58": 232422400.0,
-            "59": 232422400.0,
-            "60": 232422400.0,
-            "61": 232422400.0,
-            "62": 232422400.0,
-            "63": 232422400.0,
-            "64": 232422400.0,
-            "65": 232422400.0,
-            "66": 232422400.0,
-            "67": 232422400.0,
-            "68": 232422400.0,
-            "69": 232422400.0,
-            "70": 232422400.0,
-            "71": 232422400.0,
-            "72": 232422400.0,
-            "73": 232422400.0,
-            "74": 232422400.0,
-            "75": 232422400.0,
-            "76": 232422400.0,
-            "77": 232422400.0,
-            "78": 232422400.0,
-            "79": 232422400.0,
-            "80": 232422400.0,
-            "81": 232422400.0,
-            "82": 232422400.0,
-            "83": 232422400.0,
-            "84": 232422400.0,
-            "85": 232422400.0,
-            "86": 232422400.0,
-            "87": 232422400.0,
-            "88": 232422400.0,
-            "89": 232422400.0,
-            "90": 232422400.0,
-            "91": 232422400.0,
-            "92": 232422400.0,
-            "93": 232422400.0,
-            "94": 232422400.0,
-            "95": 232422400.0,
-            "96": 232422400.0,
-            "97": 232422400.0,
-            "98": 232422400.0,
-            "99": 233470976.0,
-            "100": 232422400.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 683423744.0,
-            "2": 773273600.0,
-            "3": 773276672.0,
-            "4": 773276672.0,
-            "5": 773276672.0,
-            "6": 773276672.0,
-            "7": 773276672.0,
-            "8": 773276672.0,
-            "9": 773276672.0,
-            "10": 773276672.0,
-            "11": 773276672.0,
-            "12": 773276672.0,
-            "13": 773276672.0,
-            "14": 773276672.0,
-            "15": 773276672.0,
-            "16": 773276672.0,
-            "17": 773276672.0,
-            "18": 773276672.0,
-            "19": 773276672.0,
-            "20": 773276672.0,
-            "21": 773276672.0,
-            "22": 773276672.0,
-            "23": 773276672.0,
-            "24": 773276672.0,
-            "25": 773276672.0,
-            "26": 773276672.0,
-            "27": 773276672.0,
-            "28": 773276672.0,
-            "29": 773276672.0,
-            "30": 773276672.0,
-            "31": 773276672.0,
-            "32": 773276672.0,
-            "33": 773276672.0,
-            "34": 773276672.0,
-            "35": 773276672.0,
-            "36": 773276672.0,
-            "37": 773276672.0,
-            "38": 773276672.0,
-            "39": 773276672.0,
-            "40": 773276672.0,
-            "41": 773276672.0,
-            "42": 773276672.0,
-            "43": 773276672.0,
-            "44": 773276672.0,
-            "45": 773276672.0,
-            "46": 773276672.0,
-            "47": 773276672.0,
-            "48": 773276672.0,
-            "49": 773276672.0,
-            "50": 775372800.0,
-            "51": 775372800.0,
-            "52": 775372800.0,
-            "53": 775372800.0,
-            "54": 775372800.0,
-            "55": 775372800.0,
-            "56": 775372800.0,
-            "57": 775372800.0,
-            "58": 775372800.0,
-            "59": 775372800.0,
-            "60": 775372800.0,
-            "61": 775372800.0,
-            "62": 775372800.0,
-            "63": 775372800.0,
-            "64": 775372800.0,
-            "65": 775372800.0,
-            "66": 775372800.0,
-            "67": 775372800.0,
-            "68": 775372800.0,
-            "69": 775372800.0,
-            "70": 775372800.0,
-            "71": 775372800.0,
-            "72": 775372800.0,
-            "73": 775372800.0,
-            "74": 775372800.0,
-            "75": 775372800.0,
-            "76": 775372800.0,
-            "77": 775372800.0,
-            "78": 775372800.0,
-            "79": 775372800.0,
-            "80": 775372800.0,
-            "81": 775372800.0,
-            "82": 775372800.0,
-            "83": 775372800.0,
-            "84": 775372800.0,
-            "85": 775372800.0,
-            "86": 775372800.0,
-            "87": 775372800.0,
-            "88": 775372800.0,
-            "89": 775372800.0,
-            "90": 775372800.0,
-            "91": 775372800.0,
-            "92": 775372800.0,
-            "93": 775372800.0,
-            "94": 775372800.0,
-            "95": 775372800.0,
-            "96": 775372800.0,
-            "97": 775372800.0,
-            "98": 775372800.0,
-            "99": 775373312.0,
-            "100": 775373312.0
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 100,
-        "step_interval": 1,
-        "values": {
-            "1": 16.23173,
-            "2": 0.48632,
-            "3": 0.3184,
-            "4": 0.31067,
-            "5": 0.31575,
-            "6": 0.3127,
-            "7": 0.3096,
-            "8": 0.31392,
-            "9": 0.31591,
-            "10": 0.30891,
-            "11": 0.31209,
-            "12": 0.31271,
-            "13": 0.30582,
-            "14": 0.31032,
-            "15": 0.30879,
-            "16": 0.3077,
-            "17": 0.30689,
-            "18": 0.30824,
-            "19": 0.30953,
-            "20": 0.30728,
-            "21": 0.31141,
-            "22": 0.31157,
-            "23": 0.30569,
-            "24": 0.30896,
-            "25": 0.30916,
-            "26": 0.30674,
-            "27": 0.31017,
-            "28": 0.30716,
-            "29": 0.30734,
-            "30": 0.30698,
-            "31": 0.30881,
-            "32": 0.3089,
-            "33": 0.30647,
-            "34": 0.3112,
-            "35": 0.311,
-            "36": 0.30632,
-            "37": 0.30856,
-            "38": 0.30986,
-            "39": 0.30502,
-            "40": 0.31035,
-            "41": 0.306,
-            "42": 0.30943,
-            "43": 0.30773,
-            "44": 0.30886,
-            "45": 0.30942,
-            "46": 0.30579,
-            "47": 0.31121,
-            "48": 0.31407,
-            "49": 0.30981,
-            "50": 0.30966,
-            "51": 0.3347,
-            "52": 0.35543,
-            "53": 0.31067,
-            "54": 0.30931,
-            "55": 0.31517,
-            "56": 0.30883,
-            "57": 0.30908,
-            "58": 0.31373,
-            "59": 0.30746,
-            "60": 0.31113,
-            "61": 0.31473,
-            "62": 0.30775,
-            "63": 0.31034,
-            "64": 0.31108,
-            "65": 0.3103,
-            "66": 0.3085,
-            "67": 0.31036,
-            "68": 0.31412,
-            "69": 0.30947,
-            "70": 0.30646,
-            "71": 0.31133,
-            "72": 0.30734,
-            "73": 0.31043,
-            "74": 0.31583,
-            "75": 0.3074,
-            "76": 0.30939,
-            "77": 0.3182,
-            "78": 0.30755,
-            "79": 0.30953,
-            "80": 0.3085,
-            "81": 0.31023,
-            "82": 0.30621,
-            "83": 0.30705,
-            "84": 0.31232,
-            "85": 0.30864,
-            "86": 0.31017,
-            "87": 0.3124,
-            "88": 0.30667,
-            "89": 0.31086,
-            "90": 0.31626,
-            "91": 0.30744,
-            "92": 0.30887,
-            "93": 0.31054,
-            "94": 0.31172,
-            "95": 0.31164,
-            "96": 0.31058,
-            "97": 0.31089,
-            "98": 0.30676,
-            "99": 0.3105,
-            "100": 0.31337
-        }
-    }
-}
\ No newline at end of file
+{}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_dp8_583m_throughputtest_zmq/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_dp8_583m_throughputtest_zmq/golden_values_dev_dgx_h100.json
index 9be8a9dc0ca..b31640a2a28 100644
--- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_dp8_583m_throughputtest_zmq/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_dp8_583m_throughputtest_zmq/golden_values_dev_dgx_h100.json
@@ -1,1028 +1,1028 @@
 {
     "throughput": [
-        94.6087716527102,
-        115.85992244026639,
-        138.9562527069375,
-        133.18726531918395,
-        81.97861561771212,
-        134.30726469422635,
-        86.456140428456,
-        114.99456351298251,
-        147.3101800153954,
-        3.0364623744653003,
-        124.7590786954667,
-        134.2276982994434,
-        3.0580463134110167,
-        117.03969654341354,
-        130.92134521286803,
-        48.493091604204935,
-        1.4498729599486508,
-        128.01470907994928,
-        1.8330770354872434,
-        66.31842482241125,
-        82.24189975425459,
-        1.07058112939944,
-        1.8815468970982412,
-        0.9373246942729808,
-        134.9963160815443,
-        2.285771114682068,
-        43.068220270070434,
-        134.9677086822377,
-        82.44946740133796,
-        47.71839155542011,
-        114.4199568886962,
-        29.67621576315833,
-        144.1589742491705,
-        95.8164720809401,
-        122.80562228460093,
-        39.21436814433054,
-        3.041180292262413,
-        3.2867844729646842,
-        72.43808226229888,
-        0.8371525937296347,
-        1.2212635079980698,
-        145.6869075644325,
-        42.317711349146016,
-        109.1196064871946,
-        73.6281770453198,
-        140.4495689387567,
-        1.219834296561022,
-        138.66856497329005,
-        23.33818821323391,
-        67.82342558671365,
-        130.09683254313987,
-        147.60199288178146,
-        0.9427431720755464,
-        3.2856495013162523,
-        79.12426666101076,
-        86.41557345094756,
-        120.17346279825053,
-        137.16615251640926,
-        108.93291864542198,
-        110.10504114490513,
-        46.19253755421628,
-        0.950218846923012,
-        136.50642826951463,
-        142.73168666846448,
-        1.2206786818073785,
-        1.898581377105612,
-        131.72636154091063,
-        2.2842414327001976,
-        89.76521170090028,
-        114.66053545744656,
-        58.64474290044525,
-        0.8367865961030284,
-        128.01767795820945,
-        60.87292097103301,
-        124.20016865241587,
-        119.59336898055426,
-        0.9425820346281929,
-        93.70053305431952,
-        1.0728113870213674,
-        135.7596767309971,
-        112.89357243644062,
-        89.2743296587299,
-        137.86411291342458,
-        135.6974706051771,
-        102.59633828443238,
-        129.82058179399326,
-        139.57672703148444,
-        140.5642311163746,
-        78.49182953675201,
-        123.40912657074227,
-        82.74099904578694,
-        75.5490641626476,
-        93.38596238341951,
-        141.19058076067225,
-        1.072254167577298,
-        100.8669047802279,
-        132.77382347347034,
-        92.29086179175866,
-        137.20301032384705,
-        89.57723938765776,
-        67.5465256589703,
-        0.9498935124108836,
-        1.0716887464650027,
-        0.8365472180547067,
-        137.902625307774,
-        132.67132600219722,
-        1.45201860416265,
-        1.8366476879619427,
-        88.65095604379363,
-        132.1806036761347,
-        126.0481874394642,
-        127.43750324083169,
-        93.27238135265156,
-        109.83884164204308,
-        102.30516355984702,
-        141.10387096377744,
-        0.9425154448032942,
-        95.04281981148903,
-        103.11525529548061,
-        0.8361762901534399,
-        135.3171561172067,
-        123.30032998064965,
-        118.75691144485415,
-        82.21375599642211,
-        66.37216333263251,
-        120.02349229491865,
-        27.339414655466246,
-        133.1312422227687,
-        123.02377779863252,
-        111.0798894329,
-        58.88405247768833,
-        131.31767475108893,
-        40.19076958615912,
-        123.58362152151858,
-        130.6541142941889,
-        61.39555613504246,
-        43.92154495664044,
-        1.037012527495492,
-        127.16052127606021,
-        137.06554800183082,
-        85.67161160523041,
-        1.0253417447981334,
-        139.20903624514017,
-        140.19068787455728,
-        117.67416498245059,
-        23.410837515725987,
-        130.73052473972666,
-        22.561824695346466,
-        1.028901717647808,
-        119.30712483977753,
-        117.77548263464804,
-        135.2959098119142,
-        142.10193821260228,
-        1.0366044325624144,
-        1.0350271698893887,
-        132.8943567509843,
-        51.50353963446039,
-        113.39559408843714,
-        124.25424103796537,
-        129.60407993083075,
-        136.8566687186031,
-        1.036163010240988,
-        1.0345739017743927,
-        118.72350056844492,
-        32.453707095990595,
-        43.851925176925825,
-        139.39206855448938,
-        141.0979597861742,
-        132.81461728578432,
-        80.95956255477945,
-        133.42483643501154,
-        57.27721135575491,
-        81.47649794801364,
-        79.39765285063396,
-        56.40255861789973,
-        0.8890603607397893,
-        137.59325887086797,
-        118.03982850100024,
-        53.04390121587005,
-        88.31177924841927,
-        1.0287550608831881,
-        54.67393025836421,
-        54.73556135447348,
-        129.6143036059356,
-        123.57095756116274,
-        146.05184555314386,
-        55.506024155977386,
-        84.40666358740559,
-        62.68531518105107,
-        147.42894642823578,
-        1.0274253590993496,
-        145.9063526676371,
-        76.36231256557768,
-        1.035808949157935,
-        136.1858098182613,
-        93.13144140533397,
-        54.57886608953819,
-        1.0251956490815057,
-        1.0270063804838983,
-        67.96952180390161,
-        136.90103479290272,
-        78.62986077133174,
-        129.97235998681177,
-        70.57784076609056,
-        1.028567312218149,
-        69.64434330087829,
-        1.0266016363366386,
-        25.142311727265525,
-        139.54750333578679,
-        118.80547132463877,
-        1.0342055876192149,
-        132.79991800938092,
-        88.25494664060619,
-        132.4600307114398,
-        1.026200775415348,
-        111.33264788932784,
-        1.031301270403004,
-        104.45912302410692,
-        1.0337771723701492,
-        124.53550504281608,
-        1.0283501183885058,
-        126.53361938982871,
-        139.83512785200963,
-        102.28350299734186,
-        122.68389734539087,
-        139.27095111763788,
-        1.0333552237490158,
-        97.04945381465573,
-        60.63422077140298,
-        1.0248694052483192,
-        96.77644543721476,
-        118.38370846079931,
-        1.0309087229819596,
-        136.0487423665781,
-        1.032932214377732,
-        104.96525711514936,
-        50.75370028394122,
-        125.67617176346853,
-        125.47392048276225,
-        101.59371483024698,
-        119.1183231384482,
-        134.24568445137294,
-        1.0323996653747745,
-        119.28563313083153,
-        50.183581144589674,
-        107.50817556608582,
-        127.4693561344537,
-        116.0234844098742,
-        149.0429439759437,
-        127.77855747904051,
-        1.0319900690130652,
-        129.7400124946839,
-        60.27584011696136,
-        1.0245534026749026,
-        113.8687773549026,
-        129.9927880985222,
-        41.55332067297356,
-        12.991853549713621,
-        144.9384518471586,
-        127.77570879015505,
-        79.09214991388126,
-        1.0326234729165304,
-        144.50618896622706,
-        44.461452482592826,
-        145.75357879817352,
-        150.5618330832813,
-        123.17802281879979,
-        147.0133924731902,
-        57.07203337285457,
-        140.17944630269687,
-        44.5066568841284,
-        150.2834791394652,
-        146.37106237628518,
-        135.59553639884948,
-        21.91845075979551,
-        1.0391172002596458,
-        92.42182316100705,
-        14.98578222593142,
-        19.944740287073653,
-        32.75622847272977,
-        58.94666795839769,
-        1.0428676908165904,
-        97.94938911630567,
-        140.5399781540016,
-        36.397689902912774,
-        1.0322919875583962,
-        33.76444948259586,
-        147.54902815924785,
-        51.316830076622495,
-        153.55703202636914,
-        46.423895018386204,
-        140.271682540213,
-        1.0340651759548871,
-        85.22971449383292,
-        141.80480996358014,
-        1.0234621691055457,
-        1.0355322329825165,
-        136.96321865236195,
-        138.2293990177049,
-        136.89440582973347,
-        96.94919171687799,
-        54.992986423891566,
-        142.91167590864902,
-        138.73615931624403,
-        86.32837448704223,
-        1.0424247604140402,
-        127.58052889290863,
-        138.2472241943501,
-        1.0338260095695477,
-        1.0317372756221133,
-        150.59249576769173,
-        1.0229533138894364,
-        149.1711141084735,
-        1.0419379125129562,
-        1.040305113121658,
-        150.13261057757276,
-        62.47975017460808,
-        70.20443057037575,
-        76.88821624674898,
-        1.0225242667788867,
-        136.83301633777177,
-        1.0414381555227956,
-        131.6044067829552,
-        1.038902005769604,
-        1.0335832618537684,
-        83.38230404797935,
-        3.047737981863063,
-        140.9843162162637,
-        1.0352264324041114,
-        1.0409374510445146,
-        103.17228299164871,
-        1.0383219913492376,
-        67.5151836065632,
-        126.94018489907108,
-        95.29974174831813,
-        1.022161551972834,
-        1.0348032799350415,
-        93.24855217625235,
-        140.00831851627856,
-        142.46553219867087,
-        80.52507876480331,
-        149.47939431741142,
-        125.60095189608528,
-        92.57991472689042,
-        153.09192667088175,
-        98.78787611117323,
-        136.9802701171813,
-        1.0378200246498124,
-        79.05370338483348,
-        145.63143231877774,
-        107.86253722014555,
-        113.1390555766259,
-        150.4596904971142,
-        6.010262757833046,
-        138.11675690694213,
-        1.0371929842524894,
-        55.1702723554103,
-        148.4142582794926,
-        108.62464742566522,
-        142.2515578682958,
-        149.5588988951372,
-        1.0310870179234204,
-        32.798276334675066,
-        145.8363475163408,
-        82.52497836005318,
-        144.77105210255448,
-        140.95035733017403,
-        145.4844811663436,
-        145.0646083055648,
-        139.1641494303434,
-        1.0401220454548914,
-        146.10598185112948,
-        1.0335329080843159,
-        1.0316085392161136,
-        133.98012837767038,
-        129.62059667226987,
-        151.2681266565858,
-        1.030719335336581,
-        135.9600336007384,
-        1.0366589924031362,
-        107.70864165999221,
-        118.06361914834272,
-        148.4615541738592,
-        135.1206190516379,
-        1.0788915925864082,
-        1.0662361391973343,
-        1.0784094142292293,
-        145.5492563111853,
-        100.1745158858024,
-        89.97448812790176,
-        140.13008352060388,
-        8.378443606045758,
-        19.841723966559687,
-        31.11972559764219,
-        127.75589035167928,
-        144.649118240912,
-        83.40454687650907,
-        13.609558087727212,
-        144.14916775068022,
-        143.0831699051951,
-        144.53789580070173,
-        129.35689525213576,
-        126.54760361436873,
-        136.72725454688293,
-        83.66753329456253,
-        35.238850690537326,
-        138.73588075606074,
-        148.39285997484404,
-        141.43706957675556,
-        35.20788617289704,
-        140.22918428708584,
-        141.42288954532623,
-        80.8071906111917,
-        53.480908541665116,
-        96.60869116876205,
-        138.83030943256392,
-        146.89537016655746,
-        1.0659353965573166,
-        138.66041009897964,
-        138.0783824554628,
-        54.95061283513892,
-        1.0688789370964418,
-        145.4981195236156,
-        107.91672388693667,
-        147.39387423946786,
-        143.49840246862203,
-        1.0781871694837721,
-        125.37215873599833,
-        46.390553110182545,
-        1.0683430650310588,
-        60.55314896188811,
-        128.32962060837178,
-        142.6648214311374,
-        1.065532502621677,
-        145.06202945295232,
-        149.5985088362253,
-        43.61426254132819,
-        139.2120402464869,
-        138.80120892663803,
-        142.59390751862693,
-        147.27000174003754,
-        139.5980537408405,
-        142.37081759892675,
-        76.47257166426981,
-        0.8663971721944621,
-        1.067847671923619,
-        1.0752972325757186,
-        139.11225337731244,
-        154.1012640338781,
-        91.85315813315137,
-        7.34066705730821,
-        1.0763437477764217,
-        56.03391448680589,
-        1.067309924884827,
-        1.0747789028833068,
-        1.057667310022394,
-        146.4284745539176,
-        142.32867288307636,
-        132.81801172672715,
-        142.5746724111237,
-        43.178263922620026,
-        140.19958418325498,
-        1.0742201855279276,
-        139.95237701874325,
-        124.69044225989671,
-        89.93275546978569,
-        1.0778110524743836,
-        108.03753008375865,
-        0.8649825661375887,
-        101.22782607000799,
-        138.6615942910557,
-        1.0572642952018412,
-        143.509260845593,
-        1.0651693329533294,
-        97.454990956795,
-        1.075960473594851,
-        104.89429761368234,
-        153.46849816095335,
-        143.28204379991922,
-        112.57923589922926,
-        145.35468060283986,
-        119.53338040876814,
-        132.53105489182144,
-        146.60735281445733,
-        0.8648000721123511,
-        132.61504628627392,
-        140.81953388748138,
-        1.05684091289561,
-        147.29646966899597,
-        1.0646855258714663,
-        1.0772400203863821,
-        137.87592499226204,
-        101.79954304062817,
-        134.45893707567646,
-        1.0737967838723397,
-        147.3289039421509,
-        142.95955673278567,
-        123.11846557585149,
-        139.7223884224781,
-        5.274894457437767,
-        0.8646226703470901,
-        135.27010135142623,
-        134.53222451904563,
-        140.4520894166607,
-        148.6784682726068,
-        148.83999547746723,
-        144.76059628877204,
-        146.09818079047014,
-        0.8644123666240657,
-        133.05795012757028,
-        141.21253159110282,
-        147.08086640702987,
-        153.13511211461227,
-        147.72437078211334,
-        53.87242850230838,
-        61.34701685378028,
-        74.50771860339175,
-        16.40780504974564,
-        16.448796993269678,
-        144.08505364828036,
-        143.78069847853888,
-        145.08382905436133,
-        139.4144567792124,
-        1.113422304912727,
-        23.732299099149245,
-        146.716938504402,
-        1.1150428401994323,
-        1.1070863332993708,
-        147.462815334713,
-        15.300506166735937,
-        142.89311901203018,
-        35.881455163220174,
-        0.8959120615185874,
-        134.50389621984408,
-        79.91603718165896,
-        145.31776951960734,
-        153.19384567886857,
-        142.494036234602,
-        130.58249312188119,
-        1.1128817603274543,
-        56.157995916719756,
-        35.81413980204931,
-        116.5213087641768,
-        63.30354399512571,
-        55.0117106848875,
-        47.52954249314361,
-        153.04709230401787,
-        1.112276523473745,
-        80.1523559974256,
-        136.20373724941714,
-        1.114673225365626,
-        1.1067132158651183,
-        149.29883052073288,
-        145.10950784560325,
-        130.53765167080937,
-        1.111788125890117,
-        0.8957719496064405,
-        1.1050775451489783,
-        17.522300994030367,
-        154.45472111064055,
-        152.07616582090188,
-        1.1020107149905272,
-        138.6808068419634,
-        76.87873177159636,
-        51.43702839643221,
-        138.95045176064437,
-        138.64177504011988,
-        140.72197385602811,
-        132.80947742972836,
-        149.78872816785005,
-        139.94034036065392,
-        154.2632802491591,
-        55.57148538150843,
-        1.1044580058296936,
-        147.1712801496827,
-        77.84198065949245,
-        142.38330204183904,
-        151.76812011990265,
-        145.19131540821485,
-        147.26566215388425,
-        87.12413393605841,
-        1.1038403429439656,
-        141.4935550752979,
-        145.7397470598185,
-        3.3080164659931235,
-        123.0327553358976,
-        146.24080278853327,
-        148.10448175245884,
-        29.234562433775857,
-        151.30177873039895,
-        135.4653748135468,
-        144.3293913931314,
-        148.16163203136404,
-        1.1015876034201657,
-        1.1114790318458536,
-        136.68047783885697,
-        77.72584511329579,
-        125.73692105352463,
-        106.98755729483561,
-        96.25926845246491,
-        1.109721323323522,
-        141.71073652156545,
-        130.22006710827588,
-        145.24478945746003,
-        80.67459353439743,
-        1.1033551544760267,
-        150.03177939272493,
-        154.12875534463626,
-        150.04771421074818,
-        1.1010813815407388,
-        1.1110434127990452,
-        145.385699877379,
-        86.86487551811825,
-        130.16687493633253,
-        143.8726181331947,
-        111.91340621077623,
-        146.0394914387852,
-        1.1006353022455784,
-        134.47903589563677,
-        148.6907436994389,
-        102.87151097507036,
-        137.41724911494663,
-        1.1146766644704549,
-        143.85952373403495,
-        146.92280951248307,
-        1.100156488603178,
-        144.04783334738536,
-        148.53630346113712,
-        58.74848466983248,
-        147.0485685726298,
-        141.32891699761203,
-        142.8441702922343,
-        131.04366253726744,
-        128.6305301075303,
-        1.1106412111686195,
-        147.90025888582002,
-        0.8959265584913588,
-        149.5194069726666,
-        137.43649451567626,
-        1.1068068376551545,
-        68.05269425995475,
-        138.94056631255367,
-        138.43818227469507,
-        69.60391199895408,
-        114.83395091462887,
-        151.34107787433956,
-        141.57237630997332,
-        146.07433910500515,
-        9.941778754980154,
-        131.297822968639,
-        10.386636719874664,
-        10.545636067043365,
-        114.58677137445733,
-        75.28902943071078,
-        90.63452059810655,
-        143.58694736923238,
-        9.901118804514459,
-        144.5206530902411,
-        144.78737732574044,
-        79.81136215142409,
-        84.9314508821071,
-        120.18939827456474,
-        10.225253542151219,
-        9.702822548173124,
-        103.1188517219872,
-        138.5008491242522,
-        92.02238700298246,
-        151.99592340131602,
-        9.807595290716304,
-        150.0447954775559,
-        134.2614008494909,
-        149.38544573345007,
-        149.62298116309924,
-        124.32358754465251,
-        132.817456221544,
-        10.50607995390264,
-        9.78317681034783,
-        151.07916494121415,
-        146.93545537009487,
-        118.45851163082196,
-        145.03008316360754,
-        154.4449202186591,
-        146.86002069809945,
-        150.6932855951215,
-        110.74803327496042,
-        127.40788523389726,
-        150.81323854197058,
-        150.0047673310006,
-        149.6063654551971,
-        133.87244996538675,
-        10.329695475492791,
-        9.414695716712222,
-        106.77032789813472,
-        118.34636653947105,
-        123.44441062862572,
-        144.9015592115516,
-        153.74652990582067,
-        10.065713405335144,
-        129.38998560194165,
-        117.69087049838025,
-        99.15650839997046,
-        127.90462338199198,
-        147.3574863739125,
-        9.696544883885949,
-        9.8853852911422,
-        128.35872796896587,
-        145.2939860705264,
-        128.72081963712404,
-        94.09935653689803,
-        142.8780531031409,
-        130.5213122981276,
-        126.89288883528536,
-        153.36107852781166,
-        149.17239657923582,
-        9.177632630803961,
-        9.387171298727486,
-        109.68196882316985,
-        148.55536204011432,
-        152.61730207818772,
-        9.648922236946333,
-        132.805446535875,
-        138.74295200738652,
-        141.66118217831166,
-        124.0399127789103,
-        113.05005278683446,
-        149.71230902297984,
-        25.727698431920004,
-        129.56419655827216,
-        130.40687823665095,
-        128.46470366050013,
-        150.46298369674685,
-        9.22073843893938,
-        110.36443029340542,
-        148.23878821929193,
-        10.219508495480236,
-        9.615051521185155,
-        9.8723813087942,
-        149.91378148843256,
-        9.149056684599877,
-        130.37704092008303,
-        114.86611671621016,
-        134.53633480709703,
-        131.11593468604048,
-        149.74665952988033,
-        136.60701891253495,
-        146.50864617645632,
-        9.094221140419737,
-        149.69902295915708,
-        126.93245475406366,
-        141.2463933703881,
-        10.18172163650932,
-        136.76582155059438,
-        155.5823388453975,
-        144.68082947663285,
-        142.0128061769988,
-        116.20800508912414,
-        101.13756407758095,
-        10.050927550768915,
-        10.14139856150474,
-        9.573219645146107,
-        146.33874064646594,
-        137.22302119976462,
-        132.14965518046,
-        148.08190796641483,
-        117.6843964457568,
-        153.04352772565807,
-        146.79238076404926,
-        9.522740968586977,
-        145.93484469600287,
-        13.925952420322696,
-        12.697420287309185,
-        146.39122941822845,
-        113.94298610788566,
-        13.844109957456581,
-        154.57922917096633,
-        13.525210269101805,
-        103.83976095796662,
-        97.75660804271413,
-        135.83818209343426,
-        158.60060111529293,
-        111.57793188874757,
-        13.768524263105455,
-        154.2203592546867,
-        108.85242762118563,
-        111.15752259030245,
-        149.5942138872604,
-        119.77102605185765,
-        120.68065341205389,
-        105.29698904913548,
-        151.41465167808087,
-        138.90606724001483,
-        13.437371194424983,
-        119.97194649055415,
-        144.6223725248399,
-        146.9934910169238,
-        149.45319992777343,
-        121.48260402443249,
-        13.662736071688842,
-        14.448955892498802,
-        144.5545360346381,
-        154.00382983055897,
-        151.8635735223181,
-        137.2321484611102,
-        119.71487519948164,
-        88.24978714231261,
-        147.74815341218743,
-        142.1113258863455,
-        132.08775922189477,
-        124.63351274554526,
-        145.72256212355262,
-        100.50708502243579,
-        139.16363846809003,
-        114.82662827063822,
-        154.78307253831395,
-        149.22879563842886,
-        152.6744734255461,
-        145.81022434241217,
-        152.68018782123758,
-        116.75549006136289,
-        12.968595875688791,
-        6.824624970615158,
-        125.05116103474757,
-        147.66072487793718,
-        147.5735120742967,
-        139.1302141298083,
-        146.48542990069834,
-        12.674865288395944,
-        147.88858853602966,
-        6.8124480142416175,
-        137.54766974463703,
-        130.89979405333307,
-        13.364169845161861,
-        14.116086127002273,
-        130.3002929300388,
-        116.98398239487472,
-        152.70827610346095,
-        98.51470626500011,
-        135.1252373635164,
-        14.405992358855888,
-        154.13709739001223,
-        146.28661687368685,
-        137.87827066214206,
-        12.621081453489012,
-        154.04574874294514,
-        6.802625211185703,
-        152.18661864386252,
-        149.30257880598677,
-        13.244501725269068,
-        138.34068638798834,
-        150.95140747506372,
-        141.8441899037163,
-        152.99022366652198,
-        103.95004802425926,
-        140.28144756248412,
-        154.51222806007945,
-        85.40777548962518,
-        154.7067128296305,
-        120.47843952303268,
-        12.568053995018431,
-        12.916583075889136,
-        105.92477484543576,
-        137.92878859711615,
-        135.13853669037294,
-        137.88549737290148,
-        157.83019925734393,
-        145.48927689323145,
-        12.509532718065461,
-        150.6233829715981,
-        119.23669844460764,
-        138.49099023171033,
-        154.0870149904812,
-        140.1862744667834,
-        148.860174031694,
-        147.54629689336036,
-        12.448861769003683,
-        152.4711466483636,
-        102.47079224461186,
-        152.40864885890767,
-        156.21773232766026,
-        13.139291580904986,
-        150.30653960489693,
-        145.43571147072188,
-        132.8965387342577,
-        144.85972103961666,
-        125.5438694385711,
-        158.07457773478276,
-        14.359506122440205,
-        137.7658155977229,
-        153.68125116011197,
-        156.57780724945528,
-        12.394708947912125,
-        12.874702780202174,
-        110.61518572692995,
-        149.4338565730422,
-        149.67552030435513,
-        146.20909415912828,
-        9.308833539527914,
-        26.176147260970783,
-        8.701217384742513,
-        66.92241449340185,
-        105.12940849136734,
-        145.25326276553395,
-        139.68219350261262,
-        131.60335890332783,
-        150.53420884400245,
-        17.552483447968918,
-        99.60476667168517,
-        9.003208512207522,
-        8.539560747895454,
-        9.946172723540226,
-        150.55644446784382,
-        9.608936841972842,
-        104.80864366760326,
-        25.95068644438624,
-        99.42592550150236,
-        108.35979254469888,
-        113.9171427720856,
-        9.905905876631499,
-        131.1684982861573,
-        154.7989292174601,
-        151.34753888952145,
-        150.11816141981262,
-        143.00557828542912,
-        126.2310299151925,
-        113.53830001728545,
-        148.13405630794878,
-        150.7564429392251,
-        155.252325076404,
-        18.20048176554747,
-        25.725436761645142,
-        8.678711562613207,
-        143.3683328827327,
-        127.0294451168928,
-        137.50119476282134,
-        10.068367539846923,
-        155.64822784014916,
-        153.2789382926615,
-        25.46950813818654,
-        142.9138107220956,
-        155.10510899417167,
-        107.40557834412083,
-        9.871948602847068,
-        144.4712732194919,
-        140.17802930301565,
-        9.286026243902361,
-        129.1488895575147,
-        124.35586045151207,
-        140.1410811550992,
-        96.63692877337894,
-        153.62093095799207,
-        156.05800033315097,
-        9.587609950939838,
-        140.09721428165886,
-        134.898750425008,
-        8.652809034763463,
-        8.989448046931262,
-        107.64260577858933,
-        9.825071080298192,
-        150.6237132142087,
-        143.76058852986372,
-        154.01627264735168,
-        140.85322298632985,
-        143.63714834446708,
-        149.7259575806535,
-        8.53942846683121,
-        157.02635815805976,
-        150.83913162907433,
-        154.0283691261865,
-        9.246842209481716,
-        154.5851361854829,
-        133.4662155767381,
-        137.55396410787307,
-        105.77910782321499,
-        148.97953057255376,
-        111.3041581371634,
-        9.543858351726714,
-        142.71996301994741,
-        144.2417836324451,
-        148.5293262803374,
-        8.95331376662564,
-        105.2724164655814,
-        149.16646109060707,
-        151.1947852118465,
-        9.503293907683512,
-        133.40055362812345,
-        8.776394391795916,
-        148.3675722527084,
-        154.66946641450528,
-        122.71674068416665,
-        149.62192317697068,
-        153.40159484208397,
-        9.46860898864519,
-        146.10526710538994,
-        143.96020057925128,
-        8.62472208077336,
-        8.906885562515198,
-        105.7754218686014,
-        150.17957794387223,
-        144.0451331512576,
-        149.95461039551162,
-        151.46311089131117,
-        142.22104279807664,
-        147.3679944003333,
-        140.5394711174869,
-        123.62157744638432,
-        152.32796921399395,
-        156.6603241829257,
-        9.43621164630811,
-        158.2241383954169,
-        149.33346139426692,
-        144.12074054746773,
-        143.1977521817863,
-        8.536662624511228,
-        9.785635570067782,
-        147.61880087321424,
-        9.402323265876474,
-        159.1161790596516,
-        146.56796834276156,
-        147.64890403285438,
-        157.70847517328534,
-        114.64282143770687,
-        148.5000942425868,
-        10.052761003641129,
-        147.38801074409378
+        41.46611265659158,
+        44.4918071112372,
+        46.926673665513704,
+        46.30487800041612,
+        45.31117511724168,
+        39.48427257480573,
+        41.73807567318408,
+        44.986328772700176,
+        46.79460518580979,
+        2.1481645603133406,
+        45.3304673980315,
+        46.361305003734564,
+        1.2216768370041928,
+        35.39842883637453,
+        44.9539795483452,
+        39.212326267312775,
+        1.0742220506708642,
+        45.596949876501405,
+        1.656518545685144,
+        41.1853065101293,
+        45.186903991589205,
+        2.733636984435035,
+        1.8859234764357438,
+        4.103119744826081,
+        45.69245622017379,
+        1.6582215083936738,
+        37.954906657600475,
+        46.5127757873931,
+        45.29733823530308,
+        23.1754689963102,
+        43.44487109471452,
+        33.311038622351724,
+        46.400400898475304,
+        43.13207624251721,
+        45.26221685255157,
+        38.89631907864675,
+        1.0766827581902934,
+        3.1955625641377354,
+        41.00672778846412,
+        1.225434086753332,
+        0.951420354873873,
+        47.29759062957134,
+        37.27931328255301,
+        44.02626192577354,
+        44.567351509891715,
+        41.19817412895097,
+        1.4117117845102758,
+        46.974942144500005,
+        26.16803432928029,
+        40.79104304470394,
+        45.98186302516314,
+        47.4055947551752,
+        1.076201435026891,
+        3.1796394093402074,
+        41.23717257081556,
+        42.85213590859161,
+        44.28329201807133,
+        46.527540336613534,
+        43.08848614726634,
+        44.40830753324719,
+        41.37604170752994,
+        0.9482378607333808,
+        45.48122547719385,
+        47.20316588665498,
+        0.9510683482370443,
+        1.9012380421663475,
+        46.19550253488152,
+        2.7330118039774067,
+        45.74495207812405,
+        34.67238053318697,
+        38.85119722571936,
+        1.225081100472964,
+        45.15238085691014,
+        40.396011557170766,
+        45.488921919651816,
+        45.29351001493665,
+        1.0758273605231232,
+        29.808026495079588,
+        1.2280820949811997,
+        46.586185131212794,
+        42.89263913245724,
+        42.15612175451927,
+        46.693253798156995,
+        46.57003199283068,
+        46.509087816223484,
+        38.12557546239959,
+        45.81548305523131,
+        46.07453120649211,
+        40.81605463432999,
+        45.228424339779814,
+        42.086064813661196,
+        42.78740035356858,
+        45.98922633164769,
+        41.28717865700289,
+        1.2274351142907918,
+        43.46971411790415,
+        45.4498626576556,
+        42.51719188567606,
+        46.624215728553786,
+        43.26045159027894,
+        43.962414509948275,
+        0.9481540147597537,
+        1.2267700611313974,
+        1.2246727704472544,
+        45.950324312195605,
+        46.02559998344755,
+        1.413545795432525,
+        2.1538932898075407,
+        45.57032628071106,
+        38.877775528665516,
+        44.5660811280025,
+        45.98326532911864,
+        41.78435738761637,
+        44.118449498817554,
+        43.11682781122976,
+        46.80957208928424,
+        1.0755822711089933,
+        29.775928132799514,
+        42.492052303926506,
+        1.2241095107799485,
+        45.796086216431775,
+        45.258843364665246,
+        44.97308057669771,
+        42.89527265230854,
+        43.91533758581356,
+        35.81442349583988,
+        30.65358830169187,
+        46.3182793971083,
+        44.145493159555286,
+        44.2651994526335,
+        40.09824843769361,
+        45.68707977480025,
+        39.990813212941646,
+        35.79658562417175,
+        44.86013694329229,
+        41.83115806056866,
+        37.15064410140025,
+        0.996787320025337,
+        45.66808620182929,
+        46.6130598481811,
+        45.60972037064592,
+        0.9940425141246046,
+        45.591900274871186,
+        46.96840985185615,
+        43.393354375970155,
+        25.5248831966376,
+        45.77235244972332,
+        24.590561326831967,
+        0.9773483444490005,
+        34.09417278739622,
+        43.586572958161206,
+        46.535859932274164,
+        45.946757322805404,
+        0.9962165194499956,
+        0.992874583950711,
+        46.119932829039165,
+        42.179658293228435,
+        32.997191121192365,
+        44.17582132320044,
+        46.14366473770965,
+        45.81106545186327,
+        0.9957624959115234,
+        0.9924622264244217,
+        39.42192933951627,
+        37.64229442727469,
+        21.26565173458009,
+        45.593412953334585,
+        46.87304671516134,
+        45.216027572946594,
+        42.43765019133474,
+        46.197382024442064,
+        40.692114254409056,
+        45.33796853087654,
+        27.766522112160985,
+        40.02641706822085,
+        1.3017150918854614,
+        45.591631786019235,
+        44.34279696011747,
+        39.28257190816356,
+        43.72958684288255,
+        0.9771143356157014,
+        23.874882409185425,
+        38.84831650281934,
+        46.04825715862786,
+        44.318350427904555,
+        47.26086876225989,
+        39.433419122254435,
+        42.94084765393213,
+        43.44077111651132,
+        42.4775425505976,
+        0.9890763303083981,
+        47.353878858820345,
+        40.99026973150018,
+        0.9955331259047124,
+        46.52810662522569,
+        43.71121305319187,
+        43.098140605333754,
+        0.9941110054345192,
+        0.9887007080233833,
+        41.60423122999918,
+        45.81533148936388,
+        42.37614297709579,
+        45.84171517205181,
+        41.73162426832469,
+        0.976838541947363,
+        14.558863836592382,
+        0.988317986920056,
+        27.41518624216025,
+        46.00613760472248,
+        44.605125117227445,
+        0.9923556095766691,
+        46.06453996269855,
+        45.69598995103852,
+        38.29204120955434,
+        0.9879204612413145,
+        45.051133494631664,
+        0.974139430894493,
+        43.52911731376158,
+        0.9919675926934881,
+        45.37964604415822,
+        0.976397605350521,
+        36.30289308241207,
+        45.597233615462315,
+        43.61071649968794,
+        43.122470348017536,
+        46.76087701561043,
+        0.9915593888202096,
+        43.301652472823534,
+        43.35874933591963,
+        0.9940066207204965,
+        42.186091123827985,
+        45.37749985977852,
+        0.9738097357420213,
+        46.47531110944141,
+        0.9911618676375942,
+        43.561154900046205,
+        42.50481546978642,
+        36.28178246877416,
+        44.229193258120816,
+        43.274122438133034,
+        43.16603619055846,
+        46.24123104179791,
+        0.9907652867200517,
+        44.808052346983644,
+        42.157257924432415,
+        30.810167635761594,
+        44.5009455404432,
+        44.803133707609575,
+        46.717718944658586,
+        45.328295623099564,
+        0.9903649151763216,
+        45.98765051561304,
+        43.15949033247262,
+        0.9938810855133485,
+        42.5272021864534,
+        46.202556875553654,
+        37.69680010665373,
+        13.506488443568907,
+        47.084518208092895,
+        45.34409129030842,
+        45.528670127709155,
+        1.0839758382565585,
+        45.77369572816552,
+        40.36600389536794,
+        46.346373598961115,
+        47.59928731210073,
+        45.213230445194775,
+        46.97741000418462,
+        43.73589527028813,
+        38.21138599701667,
+        39.80440406603509,
+        47.546574744238036,
+        46.363044750837105,
+        45.73935328577624,
+        22.79542790283351,
+        1.0852955230764447,
+        46.31190530756646,
+        10.103645571001175,
+        20.743583307847267,
+        34.08924086156784,
+        40.34233471572178,
+        1.0825832325439408,
+        42.93380762165118,
+        46.538540446937695,
+        40.56431787179345,
+        1.0837596134259624,
+        35.02268200701654,
+        47.136990718638934,
+        38.591258432063235,
+        47.93266376947172,
+        40.53416662878643,
+        46.663334136659614,
+        1.0714520955139675,
+        27.88935756664922,
+        45.48047962233704,
+        1.0758750615408978,
+        1.0683190801502396,
+        46.009876361978876,
+        46.59268594380503,
+        46.02812612004097,
+        46.372356575684854,
+        22.894765755636868,
+        45.64436406976758,
+        46.20773355624579,
+        42.364426646383905,
+        1.0822510357556412,
+        44.863056156314066,
+        46.46090797778492,
+        1.0710544669423023,
+        1.083596675232654,
+        46.253226306136575,
+        1.075461579555405,
+        46.46757181265049,
+        1.081777244820761,
+        1.079157130525964,
+        47.44728077576711,
+        44.18890905454099,
+        25.69445080780143,
+        41.61341063520841,
+        1.0749834632245117,
+        45.18278804232428,
+        1.0813046939407982,
+        45.584290798191994,
+        1.0851558601194167,
+        1.0706298125469418,
+        27.277652622917802,
+        3.13795203228774,
+        46.596243996630385,
+        1.0680343711445561,
+        1.0808489429820316,
+        44.07771833504717,
+        1.0782837622370247,
+        44.620236842054005,
+        33.66037405692795,
+        42.88981761147569,
+        1.0745719383443746,
+        1.067541523615096,
+        43.3531928586852,
+        46.45260807995745,
+        46.301433990064965,
+        45.45037480313856,
+        42.01190688214572,
+        43.97592120992246,
+        44.22612202356458,
+        46.93790632881387,
+        43.35324044647867,
+        46.24983553374027,
+        1.0779013969854039,
+        45.68642573969881,
+        40.71576971597602,
+        43.609256041900395,
+        44.75345611987869,
+        46.683440264062696,
+        6.250364298356673,
+        46.58797465847453,
+        1.0773923535890582,
+        43.82763570204923,
+        41.62940460437239,
+        42.91661388574536,
+        46.901610347450095,
+        46.61677212391794,
+        1.080583826854443,
+        34.07713605907777,
+        46.92641126499492,
+        45.79075334582258,
+        40.14409222341034,
+        45.361779654878845,
+        46.88204342817273,
+        46.35566639777504,
+        46.36704829301128,
+        1.079068056447631,
+        46.774512434519465,
+        1.0704507990204184,
+        1.0837001046492374,
+        44.56501843026455,
+        45.92497594226974,
+        46.819599375484145,
+        1.0801577199815187,
+        46.01182819769449,
+        1.0770346495733834,
+        46.950613182781744,
+        30.797706097998343,
+        46.18180484355316,
+        46.16072338065117,
+        1.1133090433838153,
+        1.1264329475750274,
+        1.1236172122377037,
+        47.045544454610436,
+        46.77875324298633,
+        28.03992244253687,
+        45.334641615839494,
+        8.780689100623139,
+        20.7913981632672,
+        32.723036948097274,
+        45.13282209264667,
+        46.65435200771115,
+        45.96287965580367,
+        9.076296968757461,
+        45.4816339150996,
+        46.902872519542036,
+        46.16846796984993,
+        45.756891597403175,
+        44.88315382035088,
+        46.23903054578556,
+        45.83324366902273,
+        17.750809391531607,
+        45.20000225981293,
+        47.302482301226895,
+        45.60218665990497,
+        36.97764728135097,
+        46.59609042040382,
+        46.604767462324304,
+        45.96159537616419,
+        22.37221435902452,
+        43.859502782475616,
+        46.5164446015921,
+        46.29329085467359,
+        1.1262112315718147,
+        46.308551190848824,
+        46.12319048896243,
+        43.60305812792925,
+        0.9422659923955576,
+        45.850627271010616,
+        45.017760412103506,
+        46.45017372234843,
+        46.681005137311296,
+        1.1235052275623567,
+        45.024655731975905,
+        42.551907139236725,
+        0.9419457570631012,
+        41.1118024425248,
+        45.63421048620437,
+        46.022116096626675,
+        1.1258383546403372,
+        47.1081443735114,
+        47.030126605956774,
+        42.86500455064436,
+        37.358353939700315,
+        45.34461986882157,
+        46.86806884248587,
+        46.417501701989885,
+        46.351389315230215,
+        46.78447423742242,
+        43.74686698408526,
+        1.116867665232356,
+        0.9417093885501255,
+        1.1193255628248941,
+        46.36628759364972,
+        47.0182927090698,
+        44.33757352470002,
+        7.691634088129115,
+        1.1283438070497074,
+        43.879143747221455,
+        0.9414915905260655,
+        1.1187592356622462,
+        1.1221505116978934,
+        46.07747894106487,
+        46.579798906537704,
+        45.766896552621894,
+        46.65247758283254,
+        43.302159908237364,
+        37.720159108605536,
+        1.1182282725285237,
+        46.39182837285494,
+        44.636636353923784,
+        43.44450203063323,
+        1.1233649178804157,
+        45.04855028838785,
+        1.1165108506849695,
+        29.25784442036365,
+        44.92016113045485,
+        1.1217307674387187,
+        46.08594914883392,
+        1.1256588113160433,
+        44.33658350966423,
+        1.1279641443945907,
+        46.995953225218045,
+        43.09174152350243,
+        45.522175701238005,
+        44.54660682798267,
+        46.26002914896281,
+        45.121721334753246,
+        45.99661519970516,
+        46.999367551883665,
+        1.1162274151428622,
+        34.79092708982097,
+        45.466303894602824,
+        1.1214388358967042,
+        46.3611527229414,
+        1.1253775196067384,
+        1.1231558495643674,
+        45.46781022594765,
+        46.83967784020296,
+        35.37244717495285,
+        1.1180685191822184,
+        47.0281597759591,
+        45.004932496628875,
+        44.35708507257986,
+        46.65855899768837,
+        5.505111079406215,
+        1.115802761131929,
+        35.602590093008914,
+        44.671751586624886,
+        46.281278781026465,
+        46.65874233841448,
+        47.449917573209895,
+        47.11754288927177,
+        46.84313387306054,
+        1.1152851890752418,
+        26.693730551391678,
+        45.574691537692864,
+        47.110350441661474,
+        46.950895044828556,
+        47.10814947984309,
+        42.35670263948847,
+        43.399091167413815,
+        45.65945467138436,
+        10.323879128717438,
+        17.406756102821927,
+        46.70765041608834,
+        46.265154949804675,
+        46.966387230240066,
+        46.58181691440536,
+        1.1794390054814614,
+        40.240832270343546,
+        39.59688963721167,
+        1.169177901708881,
+        1.176889456593387,
+        46.512318262726104,
+        16.255791986842784,
+        46.90191826875892,
+        38.002332039368945,
+        1.1673839996531623,
+        32.855434627015846,
+        43.339268319257165,
+        46.75273409704357,
+        46.82224515218503,
+        46.7787448289983,
+        46.08633464118119,
+        1.1789416201176985,
+        45.01880600815589,
+        17.692981429746695,
+        43.82069805510859,
+        42.693302457425894,
+        40.895519742462156,
+        43.141099312595934,
+        48.08036522096514,
+        1.178390117026328,
+        45.95511642215028,
+        35.29568405980472,
+        1.1687957641452225,
+        1.1765143734981645,
+        46.688387154545254,
+        47.06125638807941,
+        45.346066735128574,
+        1.1777709765320192,
+        1.166989666506321,
+        0.9847523589742398,
+        18.562855771239047,
+        47.9065264813057,
+        46.73354514650198,
+        1.1735046304883543,
+        46.412712735423334,
+        45.16100408019957,
+        43.83022094061403,
+        35.89794593782671,
+        44.97192473982221,
+        46.7633180339843,
+        44.329869977212624,
+        47.38342947643397,
+        46.79402738420473,
+        47.634269098703626,
+        44.0213863595159,
+        0.9845269249937244,
+        45.78778499348287,
+        43.90149865817902,
+        45.65368969409286,
+        47.746456721033944,
+        47.21697228426952,
+        47.01924612843149,
+        46.3245200194134,
+        0.9842560530393194,
+        45.26992712182612,
+        46.89243421872701,
+        3.4924828727877877,
+        45.25207572636316,
+        47.25700297914972,
+        46.94730150195301,
+        39.12367514310055,
+        42.117856976344655,
+        44.28179459170351,
+        46.596840500912684,
+        45.392754933120926,
+        1.1731165363524663,
+        1.1755941425503302,
+        46.46126582671268,
+        45.79994582850055,
+        31.36362072652773,
+        43.50384100878153,
+        45.440038476775335,
+        1.1661505662188223,
+        46.52744939333318,
+        45.250414658311975,
+        46.53386354717518,
+        45.796239735104564,
+        0.9841302985201961,
+        46.27883497779145,
+        47.83598353847002,
+        46.607837943658275,
+        1.1726681962992465,
+        1.1751504766334446,
+        46.84845290565303,
+        46.07497571222637,
+        33.33732005606778,
+        45.813985387630716,
+        45.57964157112892,
+        46.41818933014048,
+        1.1721397028860254,
+        45.89252926130944,
+        47.09569465450331,
+        47.250364539349285,
+        35.22784278442342,
+        1.1688030911620526,
+        46.42186257421796,
+        46.25658899517002,
+        1.171409947579052,
+        45.16137403712752,
+        47.22442045049697,
+        44.82261712339744,
+        32.494327996097915,
+        44.219079390101115,
+        46.87735465561079,
+        44.699203955991905,
+        45.12568915598884,
+        1.1747532937483116,
+        47.069832959511444,
+        1.1670956785442357,
+        41.217948435045656,
+        44.93033926516496,
+        1.1766349885441727,
+        35.47522021954888,
+        46.21124702140885,
+        46.24628779612773,
+        34.53125955420697,
+        46.66578037331865,
+        43.65856477535035,
+        45.03361057951491,
+        46.76526122602155,
+        10.182019712559228,
+        45.71366318720834,
+        9.833945628376052,
+        9.322117004081543,
+        46.537564499785105,
+        31.262138808373493,
+        37.90592059294092,
+        46.820091937863225,
+        10.139423148881114,
+        46.75580347295349,
+        46.89455728317566,
+        39.52390472502032,
+        42.643467900988064,
+        38.90725083946543,
+        9.086630150053459,
+        8.937192123351853,
+        40.9872575801166,
+        46.394128489242924,
+        41.193529101734704,
+        47.34329154675404,
+        10.054610354639179,
+        43.31828144588645,
+        44.553079069624026,
+        46.98279134065351,
+        46.830147489351724,
+        45.31329233494219,
+        45.552850223950976,
+        9.295212965663417,
+        10.01436272470524,
+        43.57022598341257,
+        45.70609566213184,
+        43.449062338174066,
+        46.855675373016474,
+        47.68860594538369,
+        47.09689498272573,
+        47.173878516378814,
+        46.069788054621185,
+        38.92002107306488,
+        46.38712908030891,
+        47.104897416242906,
+        46.938337511897245,
+        45.36212980855197,
+        9.7037632831636,
+        9.265430506589102,
+        46.11721659871563,
+        38.06187391881914,
+        43.25827348162763,
+        46.84719251692419,
+        47.03682707869591,
+        9.90500846057903,
+        45.68739012850455,
+        43.47148156475432,
+        45.23323967788647,
+        39.81125388088527,
+        45.95084232488125,
+        8.919454342379801,
+        8.706571515609426,
+        45.29003523159025,
+        46.867399234540684,
+        45.35240769107086,
+        44.80265358061401,
+        41.83510960528982,
+        43.92616077285124,
+        44.61292075723489,
+        46.86625528407582,
+        47.230904823696534,
+        9.643361950798496,
+        9.236779459262468,
+        46.27993094745158,
+        43.29062809284174,
+        46.53130368901898,
+        8.891092687715933,
+        45.323215643957305,
+        46.38559644193777,
+        46.8553797027437,
+        45.16725651833185,
+        46.26177304715086,
+        43.16649621953115,
+        19.53072875578119,
+        44.16107832748164,
+        44.46643011473998,
+        45.302511702487166,
+        47.59950805589659,
+        9.206283803180765,
+        46.31521045156664,
+        42.932315734513345,
+        9.081962094633843,
+        8.862645496755041,
+        8.681026899042758,
+        47.175946890403075,
+        9.613647025719098,
+        45.37459772842735,
+        46.657937572561956,
+        40.090063197986055,
+        43.91176191056239,
+        47.1764939819939,
+        44.932347492473085,
+        46.951971869749755,
+        9.588107858966847,
+        46.890536209011636,
+        47.457220061858926,
+        41.820791051617206,
+        9.051934235829219,
+        45.46750284471863,
+        47.1114848526844,
+        46.90614671206355,
+        46.81408948407702,
+        44.76508972637772,
+        44.94143445208981,
+        10.013702243637548,
+        9.016326405341099,
+        8.836765675846252,
+        46.724030690708,
+        45.670931647965055,
+        45.52105012345985,
+        46.760404038674345,
+        46.879394746618935,
+        44.17372013338399,
+        45.75158023561404,
+        8.805217872024683,
+        45.797390838433785,
+        13.147893146580197,
+        10.47047709122617,
+        46.61575812332005,
+        46.51823693220529,
+        4.823033237525791,
+        46.77438522864306,
+        12.978009554740229,
+        38.60487947846694,
+        42.776667803234396,
+        46.400158258735026,
+        47.945284694706544,
+        46.56814403610221,
+        4.817274157491479,
+        46.62284523101857,
+        43.12368820615556,
+        41.32670008561977,
+        47.18041683967238,
+        43.946314235571926,
+        44.21062282398479,
+        46.19942835901387,
+        43.058732279332816,
+        45.38189559700182,
+        12.884302510247224,
+        41.31993708388949,
+        46.47169213829526,
+        47.19006572402318,
+        47.14982705362978,
+        47.06368907184152,
+        4.812880414029111,
+        11.16220592067454,
+        46.574241250493166,
+        46.97994816848278,
+        47.45816665639938,
+        46.13083135931701,
+        44.32000975084153,
+        43.41804159092183,
+        42.66169852490167,
+        45.48613569289166,
+        44.33345445574926,
+        43.452008302705025,
+        46.81171828117368,
+        43.10993692872848,
+        45.994793877105536,
+        46.800586622051604,
+        44.27154316655175,
+        46.105917327794614,
+        47.46844284412024,
+        46.26483577817879,
+        47.53682651754337,
+        44.570703276937955,
+        13.903655242145248,
+        11.480956559418479,
+        39.336500908555834,
+        45.90660459732642,
+        46.77917515765938,
+        45.088381020490885,
+        46.506580602768324,
+        10.416775312398924,
+        46.58444309156844,
+        11.387487180031048,
+        40.66527760299146,
+        43.83362837067986,
+        12.535722984692502,
+        10.862075986088263,
+        45.57849071079437,
+        44.54752207894966,
+        47.368339209936586,
+        44.99292457355705,
+        40.53083756344339,
+        11.0636299214144,
+        47.688667053142176,
+        46.49150277169404,
+        45.74006902822907,
+        10.33525884882965,
+        47.48557960393818,
+        11.308966508889716,
+        43.29259854243531,
+        46.1099584752184,
+        12.17957601526656,
+        45.17415787692287,
+        47.42069363597441,
+        46.61857073840612,
+        47.2421945434337,
+        45.43588217737557,
+        40.87274833234901,
+        46.70759606653805,
+        36.65554403597885,
+        47.00974843039727,
+        44.27238095134427,
+        10.215116571612004,
+        13.7852700376187,
+        46.056843647274086,
+        40.6532114020977,
+        44.73992298080998,
+        45.68916428641405,
+        47.31026005200245,
+        46.82535713731543,
+        10.130547297609347,
+        47.03536361799409,
+        46.991892284267614,
+        40.158116078863046,
+        46.709887162762875,
+        46.67477141304538,
+        46.52127067854677,
+        46.8876604645323,
+        10.042145383707755,
+        47.028109894652104,
+        45.7372913308103,
+        43.35504560755716,
+        46.94810107337359,
+        11.8541419498795,
+        46.48396692070885,
+        46.650791251635994,
+        45.251645228092976,
+        46.90500963017914,
+        47.44769079351513,
+        45.17830741847997,
+        10.999409433497265,
+        46.47750683850478,
+        46.775120397902185,
+        47.814786925390884,
+        9.948141267257297,
+        13.587316761063226,
+        46.55485731583328,
+        42.77962873201528,
+        45.79657353014755,
+        46.78648032853886,
+        6.092950585496579,
+        16.427217699690395,
+        6.041669306781378,
+        33.44834000640586,
+        45.71021173581392,
+        40.44649791159415,
+        44.41704966518361,
+        45.16867811008679,
+        46.553484065254395,
+        11.951659518508801,
+        40.964520355583325,
+        17.222473173678548,
+        15.810785212495478,
+        5.896598504159821,
+        46.15486957962745,
+        6.267247605496281,
+        38.65955739206124,
+        16.334240831872595,
+        40.92114763036668,
+        44.25538155878388,
+        46.79667178943268,
+        5.886210147826818,
+        45.086831193223446,
+        47.3009972481073,
+        47.07801971653764,
+        46.80397795995714,
+        46.806845163101094,
+        43.42411625011456,
+        46.37426980773864,
+        41.17909401763616,
+        46.16226579941339,
+        47.44507636385267,
+        11.930205494257288,
+        16.233747914032552,
+        6.031411752952078,
+        45.92910900092996,
+        47.47110773753601,
+        39.494621036199604,
+        16.734374432604927,
+        47.37802539239185,
+        46.74469194379278,
+        16.087259096423576,
+        46.92051488410033,
+        47.34732444333283,
+        46.40587690730415,
+        5.872780467931287,
+        44.55593583365237,
+        45.7052618242163,
+        6.085826627872682,
+        44.846431805065144,
+        45.41689502907426,
+        45.289189315257374,
+        44.95210230627078,
+        42.99904025714732,
+        46.839026962763846,
+        6.250954782033121,
+        44.8453124032084,
+        45.278261112862296,
+        6.020810288080093,
+        17.182296973833214,
+        46.63633652424215,
+        5.866101016705892,
+        46.160696572751434,
+        46.32038287353405,
+        46.89907461120633,
+        45.95374406526204,
+        46.925975948392896,
+        46.42837166656114,
+        15.78999329881552,
+        44.465193132950446,
+        46.21771478110725,
+        47.314131714710484,
+        6.0756954521719475,
+        47.654756058723834,
+        45.70610138140926,
+        46.42506531228388,
+        46.278376731444745,
+        42.38396099575264,
+        42.30031354989153,
+        6.238343970049818,
+        44.63197875047801,
+        45.842276161134954,
+        47.290515920449934,
+        17.100464476837107,
+        46.03336595920761,
+        42.199011552033475,
+        46.12151306088509,
+        6.22230433569469,
+        42.38409981463419,
+        16.065182030558717,
+        47.159068653554634,
+        47.325440650358736,
+        47.304702743784624,
+        41.95305830151048,
+        46.32090634094613,
+        6.205841232502227,
+        45.21525043209204,
+        46.68630635575757,
+        6.014917714514858,
+        16.99660741175496,
+        46.04707312586917,
+        42.19662106675615,
+        45.454018018858854,
+        47.15352407193948,
+        46.93603762078255,
+        46.83396897378934,
+        47.15013333226566,
+        46.77541231643884,
+        47.24502443147304,
+        42.759813321329425,
+        47.001201569266215,
+        6.192232905623395,
+        47.13098385966453,
+        47.01234120088298,
+        46.79153288884898,
+        46.373378014241005,
+        15.754365078113269,
+        5.8675558701311985,
+        45.42074545020536,
+        6.176488223442546,
+        47.27337589918247,
+        46.90578973015155,
+        47.16448140788897,
+        47.56000914081759,
+        46.62586586855627,
+        41.982557140496446,
+        16.770559660054925,
+        47.00638722437522
     ]
-}
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgx_h100.json
index 221abd48c74..a47b94faa75 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgx_h100.json
@@ -6,54 +6,54 @@
         "values": {
             "1": 11.04733,
             "2": 11.03572,
-            "3": 9.5878,
-            "4": 9.25791,
-            "5": 9.51585,
-            "6": 9.91425,
-            "7": 9.49022,
-            "8": 8.94619,
-            "9": 8.65195,
-            "10": 9.06313,
-            "11": 8.49654,
-            "12": 8.52749,
-            "13": 8.45919,
-            "14": 7.99341,
-            "15": 8.05353,
-            "16": 8.08327,
-            "17": 8.10021,
-            "18": 7.77408,
-            "19": 8.14992,
-            "20": 7.89646,
-            "21": 7.60027,
-            "22": 7.55248,
-            "23": 7.43137,
-            "24": 7.43223,
-            "25": 7.68057,
-            "26": 7.07422,
-            "27": 7.62201,
-            "28": 7.33353,
-            "29": 7.49795,
-            "30": 7.64414,
-            "31": 7.39519,
-            "32": 7.59013,
-            "33": 7.64569,
-            "34": 7.70593,
-            "35": 7.2143,
-            "36": 7.08788,
-            "37": 7.43168,
-            "38": 7.19723,
-            "39": 7.55557,
-            "40": 7.54844,
-            "41": 7.49611,
-            "42": 7.25383,
-            "43": 7.23801,
-            "44": 7.42036,
-            "45": 7.19742,
-            "46": 6.90447,
-            "47": 7.30251,
-            "48": 7.14379,
-            "49": 7.59525,
-            "50": 7.04023
+            "3": 9.58776,
+            "4": 9.25801,
+            "5": 9.53164,
+            "6": 9.90992,
+            "7": 9.48661,
+            "8": 8.93947,
+            "9": 8.65725,
+            "10": 9.0567,
+            "11": 8.49436,
+            "12": 8.52422,
+            "13": 8.45295,
+            "14": 7.97674,
+            "15": 8.04629,
+            "16": 8.08024,
+            "17": 8.08398,
+            "18": 7.76141,
+            "19": 8.15001,
+            "20": 7.89339,
+            "21": 7.58212,
+            "22": 7.54491,
+            "23": 7.43428,
+            "24": 7.42622,
+            "25": 7.67267,
+            "26": 7.07291,
+            "27": 7.61503,
+            "28": 7.31789,
+            "29": 7.48965,
+            "30": 7.64357,
+            "31": 7.3927,
+            "32": 7.58407,
+            "33": 7.63624,
+            "34": 7.69746,
+            "35": 7.21377,
+            "36": 7.08367,
+            "37": 7.4245,
+            "38": 7.18783,
+            "39": 7.5498,
+            "40": 7.54133,
+            "41": 7.48816,
+            "42": 7.24677,
+            "43": 7.23194,
+            "44": 7.41471,
+            "45": 7.18838,
+            "46": 6.89674,
+            "47": 7.29904,
+            "48": 7.13855,
+            "49": 7.58882,
+            "50": 7.03386
         }
     },
     "num-zeros": {
@@ -61,56 +61,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 38802616.0,
-            "2": 38543540.0,
-            "3": 38741560.0,
-            "4": 273652640.0,
-            "5": 246619984.0,
-            "6": 255713984.0,
-            "7": 585904576.0,
-            "8": 775188544.0,
-            "9": 683552384.0,
-            "10": 678184384.0,
-            "11": 709420544.0,
-            "12": 771913024.0,
-            "13": 884572992.0,
-            "14": 805905152.0,
-            "15": 771490816.0,
-            "16": 932248832.0,
-            "17": 721261824.0,
-            "18": 683711296.0,
-            "19": 963724352.0,
-            "20": 998655872.0,
-            "21": 756360320.0,
-            "22": 969720704.0,
-            "23": 762708416.0,
-            "24": 889305088.0,
-            "25": 865191296.0,
-            "26": 828440320.0,
-            "27": 806905024.0,
-            "28": 837449408.0,
-            "29": 783497856.0,
-            "30": 772494272.0,
-            "31": 793774528.0,
-            "32": 774902528.0,
-            "33": 752992128.0,
-            "34": 721632000.0,
-            "35": 728225216.0,
-            "36": 542603008.0,
-            "37": 723530816.0,
-            "38": 677573184.0,
-            "39": 686397568.0,
-            "40": 651324224.0,
-            "41": 604614656.0,
-            "42": 582812544.0,
-            "43": 564189760.0,
-            "44": 569972864.0,
-            "45": 536820928.0,
-            "46": 334504672.0,
-            "47": 494444000.0,
-            "48": 504118016.0,
-            "49": 475199808.0,
-            "50": 350261056.0
+            "1": 38802552.0,
+            "2": 38543496.0,
+            "3": 38742496.0,
+            "4": 276808768.0,
+            "5": 252900224.0,
+            "6": 262014400.0,
+            "7": 604765376.0,
+            "8": 778329280.0,
+            "9": 664674944.0,
+            "10": 728521920.0,
+            "11": 718868480.0,
+            "12": 787622592.0,
+            "13": 900296192.0,
+            "14": 831151488.0,
+            "15": 762029184.0,
+            "16": 938532864.0,
+            "17": 633234048.0,
+            "18": 708920704.0,
+            "19": 976315584.0,
+            "20": 986060288.0,
+            "21": 781551744.0,
+            "22": 762139648.0,
+            "23": 888477824.0,
+            "24": 851552512.0,
+            "25": 827443072.0,
+            "26": 812721088.0,
+            "27": 806914304.0,
+            "28": 802850496.0,
+            "29": 748894592.0,
+            "30": 731604672.0,
+            "31": 752878144.0,
+            "32": 762315520.0,
+            "33": 737258304.0,
+            "34": 746789888.0,
+            "35": 734508928.0,
+            "36": 674695808.0,
+            "37": 673198208.0,
+            "38": 633526912.0,
+            "39": 620340928.0,
+            "40": 613575552.0,
+            "41": 566869312.0,
+            "42": 557646592.0,
+            "43": 554752576.0,
+            "44": 547950784.0,
+            "45": 527374464.0,
+            "46": 347107200.0,
+            "47": 497586496.0,
+            "48": 497828864.0,
+            "49": 465758912.0,
+            "50": 450885792.0
         }
     },
     "mem-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 55051542528.0,
-            "2": 57803964416.0,
-            "3": 57918414848.0,
-            "4": 57918414848.0,
-            "5": 57918414848.0,
-            "6": 57918414848.0,
-            "7": 57918414848.0,
-            "8": 57918414848.0,
-            "9": 57918414848.0,
-            "10": 57918414848.0,
-            "11": 57918414848.0,
-            "12": 57918414848.0,
-            "13": 57918414848.0,
-            "14": 57918414848.0,
-            "15": 57918414848.0,
-            "16": 57918414848.0,
-            "17": 57918414848.0,
-            "18": 57918414848.0,
-            "19": 57918414848.0,
-            "20": 57918414848.0,
-            "21": 57918414848.0,
-            "22": 57918414848.0,
-            "23": 57918414848.0,
-            "24": 57918414848.0,
-            "25": 57918414848.0,
-            "26": 57918414848.0,
-            "27": 57918414848.0,
-            "28": 57918414848.0,
-            "29": 57918414848.0,
-            "30": 57918414848.0,
-            "31": 57918414848.0,
-            "32": 57918414848.0,
-            "33": 57918414848.0,
-            "34": 57918414848.0,
-            "35": 57918414848.0,
-            "36": 57918414848.0,
-            "37": 57918414848.0,
-            "38": 57918414848.0,
-            "39": 57918414848.0,
-            "40": 57918414848.0,
-            "41": 57918414848.0,
-            "42": 57918414848.0,
-            "43": 57918414848.0,
-            "44": 57981075456.0,
-            "45": 58164338688.0,
-            "46": 58164338688.0,
-            "47": 58164338688.0,
-            "48": 58164338688.0,
-            "49": 58164338688.0,
-            "50": 58164338688.0
+            "1": 54204293120.0,
+            "2": 56956715008.0,
+            "3": 57074692096.0,
+            "4": 57074692096.0,
+            "5": 57074692096.0,
+            "6": 57074692096.0,
+            "7": 57074692096.0,
+            "8": 57074692096.0,
+            "9": 57074692096.0,
+            "10": 57074692096.0,
+            "11": 57074692096.0,
+            "12": 57074692096.0,
+            "13": 57074692096.0,
+            "14": 57074692096.0,
+            "15": 57074692096.0,
+            "16": 57074692096.0,
+            "17": 57074692096.0,
+            "18": 57074692096.0,
+            "19": 57074692096.0,
+            "20": 57074692096.0,
+            "21": 57074692096.0,
+            "22": 57074692096.0,
+            "23": 57074692096.0,
+            "24": 57074692096.0,
+            "25": 57074692096.0,
+            "26": 57211289600.0,
+            "27": 57211289600.0,
+            "28": 57211289600.0,
+            "29": 57368535040.0,
+            "30": 57742073856.0,
+            "31": 57742073856.0,
+            "32": 57742073856.0,
+            "33": 57742073856.0,
+            "34": 57744101376.0,
+            "35": 58293194752.0,
+            "36": 58293194752.0,
+            "37": 58293194752.0,
+            "38": 58293194752.0,
+            "39": 58293194752.0,
+            "40": 58293194752.0,
+            "41": 58293194752.0,
+            "42": 58293194752.0,
+            "43": 58293194752.0,
+            "44": 58293194752.0,
+            "45": 58293194752.0,
+            "46": 58293194752.0,
+            "47": 58293194752.0,
+            "48": 58293194752.0,
+            "49": 58293194752.0,
+            "50": 58293194752.0
         }
     },
     "mtp_1 loss": {
@@ -234,54 +234,54 @@
         "values": {
             "1": 11.0765,
             "2": 11.07404,
-            "3": 10.5387,
-            "4": 10.09807,
-            "5": 9.81158,
-            "6": 10.07371,
-            "7": 9.79765,
-            "8": 9.06972,
-            "9": 8.86823,
-            "10": 9.12665,
-            "11": 8.49944,
-            "12": 8.5346,
-            "13": 8.42954,
-            "14": 7.8522,
-            "15": 7.99476,
-            "16": 8.05407,
-            "17": 8.0055,
-            "18": 7.73795,
-            "19": 8.11808,
-            "20": 7.83141,
-            "21": 7.53056,
-            "22": 7.50549,
-            "23": 7.37363,
-            "24": 7.37845,
-            "25": 7.62115,
-            "26": 7.02061,
-            "27": 7.5605,
-            "28": 7.2695,
-            "29": 7.44668,
-            "30": 7.58971,
-            "31": 7.32847,
-            "32": 7.50861,
-            "33": 7.57687,
-            "34": 7.63939,
-            "35": 7.15634,
-            "36": 7.02394,
-            "37": 7.35539,
-            "38": 7.13177,
-            "39": 7.49132,
-            "40": 7.47677,
-            "41": 7.42456,
-            "42": 7.1802,
-            "43": 7.16487,
-            "44": 7.34808,
-            "45": 7.12903,
-            "46": 6.83012,
-            "47": 7.2395,
-            "48": 7.08268,
-            "49": 7.51404,
-            "50": 6.97693
+            "3": 10.53863,
+            "4": 10.0981,
+            "5": 9.81152,
+            "6": 10.0744,
+            "7": 9.79944,
+            "8": 9.07176,
+            "9": 8.87116,
+            "10": 9.12759,
+            "11": 8.49894,
+            "12": 8.53114,
+            "13": 8.42531,
+            "14": 7.84784,
+            "15": 7.99147,
+            "16": 8.05102,
+            "17": 8.00126,
+            "18": 7.73217,
+            "19": 8.11102,
+            "20": 7.83055,
+            "21": 7.52608,
+            "22": 7.49979,
+            "23": 7.37315,
+            "24": 7.37265,
+            "25": 7.61392,
+            "26": 7.01833,
+            "27": 7.55877,
+            "28": 7.26822,
+            "29": 7.44363,
+            "30": 7.58581,
+            "31": 7.3265,
+            "32": 7.50876,
+            "33": 7.57264,
+            "34": 7.63783,
+            "35": 7.15428,
+            "36": 7.02086,
+            "37": 7.35313,
+            "38": 7.12909,
+            "39": 7.48882,
+            "40": 7.47518,
+            "41": 7.42231,
+            "42": 7.17726,
+            "43": 7.16243,
+            "44": 7.34345,
+            "45": 7.12344,
+            "46": 6.8279,
+            "47": 7.23665,
+            "48": 7.08061,
+            "49": 7.51184,
+            "50": 6.9731
         }
     },
     "iteration-time": {
@@ -289,56 +289,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 90.94511,
-            "2": 1.54793,
-            "3": 1.33035,
-            "4": 2.25969,
-            "5": 1.82487,
-            "6": 1.71972,
-            "7": 2.15404,
-            "8": 1.61956,
-            "9": 1.77326,
-            "10": 1.72086,
-            "11": 1.01952,
-            "12": 1.02588,
-            "13": 1.02874,
-            "14": 1.02703,
-            "15": 1.03114,
-            "16": 1.03244,
-            "17": 1.03532,
-            "18": 1.04017,
-            "19": 1.03111,
-            "20": 1.03139,
-            "21": 1.03293,
-            "22": 1.03136,
-            "23": 1.03187,
-            "24": 1.0297,
-            "25": 1.03561,
-            "26": 1.5512,
-            "27": 1.03857,
-            "28": 1.02247,
-            "29": 1.03252,
-            "30": 1.02351,
-            "31": 1.02701,
-            "32": 1.0267,
-            "33": 1.02921,
-            "34": 1.02405,
-            "35": 1.02405,
-            "36": 1.04177,
-            "37": 1.0449,
-            "38": 1.04688,
-            "39": 1.05181,
-            "40": 1.04378,
-            "41": 1.0421,
-            "42": 1.04502,
-            "43": 1.0336,
-            "44": 1.05112,
-            "45": 1.04838,
-            "46": 1.03386,
-            "47": 1.04806,
-            "48": 1.04195,
-            "49": 1.04121,
-            "50": 1.03797
+            "1": 97.95665,
+            "2": 1.66988,
+            "3": 1.35644,
+            "4": 2.24552,
+            "5": 2.14285,
+            "6": 1.60272,
+            "7": 1.5113,
+            "8": 2.10932,
+            "9": 1.69738,
+            "10": 1.0561,
+            "11": 1.04064,
+            "12": 1.0335,
+            "13": 1.03186,
+            "14": 1.03406,
+            "15": 1.05897,
+            "16": 1.03516,
+            "17": 1.04396,
+            "18": 1.08073,
+            "19": 1.06079,
+            "20": 1.04178,
+            "21": 1.03726,
+            "22": 1.03706,
+            "23": 1.03878,
+            "24": 1.04111,
+            "25": 1.04952,
+            "26": 1.04497,
+            "27": 1.04672,
+            "28": 1.03793,
+            "29": 1.03092,
+            "30": 1.04813,
+            "31": 1.03205,
+            "32": 1.03729,
+            "33": 1.02557,
+            "34": 1.03623,
+            "35": 1.04247,
+            "36": 1.03261,
+            "37": 1.03911,
+            "38": 1.04764,
+            "39": 1.0376,
+            "40": 1.04918,
+            "41": 1.03907,
+            "42": 1.05227,
+            "43": 1.04186,
+            "44": 1.04266,
+            "45": 1.03786,
+            "46": 1.04673,
+            "47": 1.05766,
+            "48": 1.04958,
+            "49": 1.05312,
+            "50": 1.05239
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/golden_values_dev_dgx_h100.json
index 644d5284b7a..a76d8667ec6 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/golden_values_dev_dgx_h100.json
@@ -6,54 +6,54 @@
         "values": {
             "1": 11.0474,
             "2": 11.03765,
-            "3": 9.60584,
-            "4": 9.26463,
-            "5": 9.32776,
-            "6": 9.30982,
-            "7": 9.1645,
-            "8": 8.78939,
-            "9": 8.69677,
-            "10": 8.91589,
-            "11": 8.38321,
-            "12": 8.44094,
-            "13": 8.35341,
-            "14": 7.80742,
-            "15": 7.95516,
-            "16": 7.99761,
-            "17": 7.95082,
-            "18": 7.67707,
-            "19": 8.07009,
-            "20": 7.78393,
-            "21": 7.48374,
-            "22": 7.4799,
-            "23": 7.35056,
-            "24": 7.34597,
-            "25": 7.62236,
-            "26": 7.01653,
-            "27": 7.55175,
-            "28": 7.27173,
-            "29": 7.44209,
-            "30": 7.57394,
-            "31": 7.33713,
-            "32": 7.52234,
-            "33": 7.5745,
-            "34": 7.62003,
-            "35": 7.15235,
-            "36": 7.01753,
-            "37": 7.35428,
-            "38": 7.12808,
-            "39": 7.47832,
-            "40": 7.48784,
-            "41": 7.42289,
-            "42": 7.19117,
-            "43": 7.17856,
-            "44": 7.35808,
-            "45": 7.12045,
-            "46": 6.85278,
-            "47": 7.23963,
-            "48": 7.07274,
-            "49": 7.54922,
-            "50": 6.97811
+            "3": 9.6074,
+            "4": 9.2648,
+            "5": 9.42291,
+            "6": 9.09511,
+            "7": 9.12753,
+            "8": 8.75686,
+            "9": 8.61627,
+            "10": 8.89295,
+            "11": 8.37933,
+            "12": 8.39932,
+            "13": 8.32626,
+            "14": 7.81437,
+            "15": 7.93661,
+            "16": 7.99492,
+            "17": 7.95458,
+            "18": 7.67733,
+            "19": 8.07234,
+            "20": 7.78815,
+            "21": 7.48342,
+            "22": 7.48177,
+            "23": 7.34879,
+            "24": 7.34465,
+            "25": 7.61117,
+            "26": 7.01605,
+            "27": 7.54878,
+            "28": 7.26655,
+            "29": 7.43507,
+            "30": 7.56529,
+            "31": 7.32669,
+            "32": 7.50645,
+            "33": 7.5577,
+            "34": 7.60977,
+            "35": 7.14607,
+            "36": 7.00597,
+            "37": 7.34071,
+            "38": 7.11796,
+            "39": 7.46649,
+            "40": 7.47443,
+            "41": 7.41032,
+            "42": 7.17365,
+            "43": 7.16495,
+            "44": 7.34265,
+            "45": 7.10918,
+            "46": 6.83934,
+            "47": 7.22335,
+            "48": 7.05732,
+            "49": 7.53394,
+            "50": 6.95951
         }
     },
     "num-zeros": {
@@ -61,56 +61,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 38802592.0,
-            "2": 38543572.0,
-            "3": 38743144.0,
-            "4": 270609984.0,
-            "5": 224754048.0,
-            "6": 372389344.0,
-            "7": 598920768.0,
-            "8": 850687488.0,
-            "9": 708853952.0,
-            "10": 684582272.0,
-            "11": 621544192.0,
-            "12": 630341056.0,
-            "13": 639368448.0,
-            "14": 548278592.0,
-            "15": 617425984.0,
-            "16": 702795968.0,
-            "17": 567344064.0,
-            "18": 589440000.0,
-            "19": 630362240.0,
-            "20": 669614592.0,
-            "21": 564495744.0,
-            "22": 586578304.0,
-            "23": 542928576.0,
-            "24": 511907552.0,
-            "25": 547508864.0,
-            "26": 661787712.0,
-            "27": 479817696.0,
-            "28": 466314688.0,
-            "29": 491018048.0,
-            "30": 470632640.0,
-            "31": 623908992.0,
-            "32": 523373440.0,
-            "33": 435529664.0,
-            "34": 405444992.0,
-            "35": 489248416.0,
-            "36": 322730176.0,
-            "37": 339782720.0,
-            "38": 281398720.0,
-            "39": 249171440.0,
-            "40": 343532416.0,
-            "41": 400160576.0,
-            "42": 384640608.0,
-            "43": 378621824.0,
-            "44": 374955616.0,
-            "45": 241150752.0,
-            "46": 340828096.0,
-            "47": 280778400.0,
-            "48": 284051968.0,
-            "49": 173319200.0,
-            "50": 197102384.0
+            "1": 38802536.0,
+            "2": 38543540.0,
+            "3": 38739408.0,
+            "4": 273756736.0,
+            "5": 205853584.0,
+            "6": 284244640.0,
+            "7": 652227968.0,
+            "8": 790994816.0,
+            "9": 762295424.0,
+            "10": 665870592.0,
+            "11": 618336384.0,
+            "12": 639816192.0,
+            "13": 699169600.0,
+            "14": 620502464.0,
+            "15": 623699456.0,
+            "16": 847396864.0,
+            "17": 601834432.0,
+            "18": 642855744.0,
+            "19": 668078912.0,
+            "20": 574651008.0,
+            "21": 608590080.0,
+            "22": 599821504.0,
+            "23": 558380672.0,
+            "24": 688014720.0,
+            "25": 500623296.0,
+            "26": 532887808.0,
+            "27": 506526976.0,
+            "28": 450900800.0,
+            "29": 528748480.0,
+            "30": 445603872.0,
+            "31": 457250368.0,
+            "32": 400653888.0,
+            "33": 347460640.0,
+            "34": 268919904.0,
+            "35": 495515584.0,
+            "36": 332139008.0,
+            "37": 446760768.0,
+            "38": 391328576.0,
+            "39": 378290400.0,
+            "40": 261331328.0,
+            "41": 368680832.0,
+            "42": 337485280.0,
+            "43": 337755968.0,
+            "44": 324657920.0,
+            "45": 216104608.0,
+            "46": 218159872.0,
+            "47": 302569184.0,
+            "48": 296505312.0,
+            "49": 280170176.0,
+            "50": 268486912.0
         }
     },
     "mem-allocated-bytes": {
@@ -198,33 +198,33 @@
             "21": 56295710720.0,
             "22": 56295710720.0,
             "23": 56295710720.0,
-            "24": 56295710720.0,
-            "25": 56502132736.0,
-            "26": 56578957312.0,
-            "27": 57159032832.0,
-            "28": 57159032832.0,
-            "29": 57159032832.0,
-            "30": 57159032832.0,
-            "31": 57159032832.0,
-            "32": 57159032832.0,
-            "33": 57159032832.0,
-            "34": 57159032832.0,
-            "35": 57159032832.0,
-            "36": 57159032832.0,
-            "37": 57159032832.0,
-            "38": 57159032832.0,
-            "39": 57159032832.0,
-            "40": 57159032832.0,
-            "41": 57159032832.0,
-            "42": 57296633856.0,
-            "43": 57314361344.0,
-            "44": 57498943488.0,
-            "45": 57649999872.0,
-            "46": 57649999872.0,
-            "47": 57649999872.0,
-            "48": 57649999872.0,
-            "49": 57649999872.0,
-            "50": 57649999872.0
+            "24": 56738553856.0,
+            "25": 56738553856.0,
+            "26": 56777162752.0,
+            "27": 56777162752.0,
+            "28": 56777162752.0,
+            "29": 56777162752.0,
+            "30": 56777162752.0,
+            "31": 56777162752.0,
+            "32": 56777162752.0,
+            "33": 56777162752.0,
+            "34": 56824344576.0,
+            "35": 57080135680.0,
+            "36": 57331695616.0,
+            "37": 57331695616.0,
+            "38": 57577013248.0,
+            "39": 57577013248.0,
+            "40": 57577013248.0,
+            "41": 57577013248.0,
+            "42": 57577013248.0,
+            "43": 57587191808.0,
+            "44": 57596944384.0,
+            "45": 57705652224.0,
+            "46": 57790390272.0,
+            "47": 57790390272.0,
+            "48": 57790390272.0,
+            "49": 57790390272.0,
+            "50": 57790390272.0
         }
     },
     "mtp_1 loss": {
@@ -234,54 +234,54 @@
         "values": {
             "1": 11.07756,
             "2": 11.07651,
-            "3": 10.53059,
-            "4": 10.08643,
-            "5": 9.86147,
-            "6": 9.55598,
-            "7": 9.64192,
-            "8": 8.9278,
-            "9": 8.73566,
-            "10": 9.03281,
-            "11": 8.40329,
-            "12": 8.42578,
-            "13": 8.32864,
-            "14": 7.77688,
-            "15": 7.92204,
-            "16": 7.97443,
-            "17": 7.92322,
-            "18": 7.65613,
-            "19": 8.04247,
-            "20": 7.76026,
-            "21": 7.44933,
-            "22": 7.43739,
-            "23": 7.31015,
-            "24": 7.31285,
-            "25": 7.56522,
-            "26": 6.97802,
-            "27": 7.50958,
-            "28": 7.22284,
-            "29": 7.40631,
-            "30": 7.53948,
-            "31": 7.2872,
-            "32": 7.474,
-            "33": 7.53734,
-            "34": 7.59617,
-            "35": 7.12168,
-            "36": 6.98902,
-            "37": 7.32682,
-            "38": 7.10026,
-            "39": 7.4584,
-            "40": 7.44943,
-            "41": 7.39421,
-            "42": 7.15113,
-            "43": 7.13405,
-            "44": 7.31917,
-            "45": 7.09081,
-            "46": 6.80653,
-            "47": 7.21079,
-            "48": 7.0516,
-            "49": 7.48755,
-            "50": 6.95113
+            "3": 10.53063,
+            "4": 10.08611,
+            "5": 9.87524,
+            "6": 9.55366,
+            "7": 9.62345,
+            "8": 8.91012,
+            "9": 8.72228,
+            "10": 9.02504,
+            "11": 8.39501,
+            "12": 8.42504,
+            "13": 8.32334,
+            "14": 7.76976,
+            "15": 7.91789,
+            "16": 7.97018,
+            "17": 7.92051,
+            "18": 7.65266,
+            "19": 8.0377,
+            "20": 7.76074,
+            "21": 7.44752,
+            "22": 7.43657,
+            "23": 7.30984,
+            "24": 7.31186,
+            "25": 7.56562,
+            "26": 6.97201,
+            "27": 7.50933,
+            "28": 7.2266,
+            "29": 7.40633,
+            "30": 7.53569,
+            "31": 7.28904,
+            "32": 7.47424,
+            "33": 7.53526,
+            "34": 7.59404,
+            "35": 7.11968,
+            "36": 6.9867,
+            "37": 7.32338,
+            "38": 7.09605,
+            "39": 7.45524,
+            "40": 7.44706,
+            "41": 7.39271,
+            "42": 7.14573,
+            "43": 7.13128,
+            "44": 7.31399,
+            "45": 7.08836,
+            "46": 6.80158,
+            "47": 7.2062,
+            "48": 7.0468,
+            "49": 7.47982,
+            "50": 6.94494
         }
     },
     "iteration-time": {
@@ -289,56 +289,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 97.6542,
-            "2": 1.64943,
-            "3": 1.32578,
-            "4": 1.75905,
-            "5": 1.13768,
-            "6": 1.90299,
-            "7": 1.09961,
-            "8": 1.09819,
-            "9": 1.09778,
-            "10": 1.11461,
-            "11": 1.09709,
-            "12": 1.10879,
-            "13": 1.11446,
-            "14": 1.10227,
-            "15": 1.10064,
-            "16": 1.10154,
-            "17": 1.10307,
-            "18": 1.11422,
-            "19": 1.11171,
-            "20": 1.10785,
-            "21": 1.10391,
-            "22": 1.10739,
-            "23": 1.09617,
-            "24": 1.09808,
-            "25": 1.10211,
-            "26": 1.09861,
-            "27": 1.11235,
-            "28": 1.10628,
-            "29": 1.08834,
-            "30": 1.08904,
-            "31": 1.09002,
-            "32": 1.08833,
-            "33": 1.08496,
-            "34": 1.09187,
-            "35": 1.09656,
-            "36": 1.0944,
-            "37": 1.0819,
-            "38": 1.08992,
-            "39": 1.10447,
-            "40": 1.08684,
-            "41": 1.0921,
-            "42": 1.10087,
-            "43": 1.09566,
-            "44": 1.08789,
-            "45": 1.09029,
-            "46": 1.08534,
-            "47": 1.08796,
-            "48": 1.10222,
-            "49": 1.09817,
-            "50": 1.07925
+            "1": 102.52307,
+            "2": 1.75305,
+            "3": 1.36681,
+            "4": 1.62808,
+            "5": 1.13714,
+            "6": 1.45805,
+            "7": 1.6121,
+            "8": 1.20031,
+            "9": 1.09784,
+            "10": 1.10383,
+            "11": 1.10878,
+            "12": 1.18093,
+            "13": 1.43808,
+            "14": 1.17223,
+            "15": 1.11575,
+            "16": 1.1159,
+            "17": 1.11727,
+            "18": 1.10751,
+            "19": 1.11189,
+            "20": 1.1082,
+            "21": 1.10459,
+            "22": 1.11252,
+            "23": 1.10744,
+            "24": 1.12218,
+            "25": 1.09823,
+            "26": 1.11657,
+            "27": 1.08949,
+            "28": 1.10254,
+            "29": 1.10189,
+            "30": 1.08963,
+            "31": 1.10454,
+            "32": 1.09654,
+            "33": 1.08747,
+            "34": 1.09674,
+            "35": 1.09106,
+            "36": 1.08904,
+            "37": 1.1178,
+            "38": 1.09379,
+            "39": 1.10306,
+            "40": 1.09998,
+            "41": 1.08808,
+            "42": 1.0941,
+            "43": 1.0919,
+            "44": 1.0813,
+            "45": 1.08715,
+            "46": 1.07061,
+            "47": 1.07098,
+            "48": 1.07438,
+            "49": 1.07469,
+            "50": 1.0719
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgx_h100.json
index f50f32bf276..c55faf839a8 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgx_h100.json
@@ -6,54 +6,54 @@
         "values": {
             "1": 10.94944,
             "2": 10.95158,
-            "3": 10.50291,
-            "4": 9.96373,
-            "5": 9.94051,
-            "6": 9.67323,
-            "7": 10.22821,
-            "8": 9.49736,
-            "9": 9.54323,
-            "10": 9.79347,
+            "3": 10.50318,
+            "4": 9.964,
+            "5": 9.94016,
+            "6": 9.67332,
+            "7": 10.23184,
+            "8": 9.4965,
+            "9": 9.54631,
+            "10": 9.79388,
             "11": 9.3003,
-            "12": 9.40372,
-            "13": 9.39468,
-            "14": 8.84935,
-            "15": 9.02277,
-            "16": 9.06983,
-            "17": 9.04403,
-            "18": 8.75568,
-            "19": 9.17822,
-            "20": 8.86078,
-            "21": 8.53542,
-            "22": 8.54991,
-            "23": 8.42524,
-            "24": 8.37607,
-            "25": 8.63809,
-            "26": 7.96681,
-            "27": 8.57149,
-            "28": 8.19023,
-            "29": 8.39544,
-            "30": 8.67048,
-            "31": 8.28487,
-            "32": 8.43358,
-            "33": 8.55518,
-            "34": 8.65834,
-            "35": 8.07752,
-            "36": 7.94541,
-            "37": 8.29246,
-            "38": 7.97753,
-            "39": 8.38915,
-            "40": 8.35513,
-            "41": 8.31736,
-            "42": 8.05606,
-            "43": 8.03035,
-            "44": 8.23838,
-            "45": 8.09696,
-            "46": 7.61491,
-            "47": 8.15046,
-            "48": 8.0039,
-            "49": 8.38371,
-            "50": 7.81253
+            "12": 9.40451,
+            "13": 9.39562,
+            "14": 8.8513,
+            "15": 9.02474,
+            "16": 9.07111,
+            "17": 9.04534,
+            "18": 8.75805,
+            "19": 9.1794,
+            "20": 8.86325,
+            "21": 8.5391,
+            "22": 8.55134,
+            "23": 8.42688,
+            "24": 8.38109,
+            "25": 8.63783,
+            "26": 7.96861,
+            "27": 8.57603,
+            "28": 8.1922,
+            "29": 8.3971,
+            "30": 8.67285,
+            "31": 8.28458,
+            "32": 8.43378,
+            "33": 8.55597,
+            "34": 8.65985,
+            "35": 8.07899,
+            "36": 7.94715,
+            "37": 8.29413,
+            "38": 7.97958,
+            "39": 8.39117,
+            "40": 8.35496,
+            "41": 8.31782,
+            "42": 8.05717,
+            "43": 8.03152,
+            "44": 8.24042,
+            "45": 8.0999,
+            "46": 7.61677,
+            "47": 8.15178,
+            "48": 8.00508,
+            "49": 8.38458,
+            "50": 7.81369
         }
     },
     "num-zeros": {
@@ -61,56 +61,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 19403592.0,
-            "2": 19274176.0,
-            "3": 20945222.0,
-            "4": 89687760.0,
-            "5": 151693248.0,
-            "6": 138938096.0,
-            "7": 164021920.0,
-            "8": 198936768.0,
-            "9": 160969488.0,
-            "10": 159820768.0,
-            "11": 216424656.0,
-            "12": 209851488.0,
-            "13": 225333088.0,
-            "14": 222140112.0,
-            "15": 231619680.0,
-            "16": 216080960.0,
-            "17": 288314816.0,
-            "18": 170463296.0,
-            "19": 167479232.0,
-            "20": 178590448.0,
-            "21": 241500624.0,
-            "22": 220658528.0,
-            "23": 197474784.0,
-            "24": 226071040.0,
-            "25": 237749008.0,
-            "26": 288417664.0,
-            "27": 232076720.0,
-            "28": 286654304.0,
-            "29": 258070544.0,
-            "30": 214923920.0,
-            "31": 241275712.0,
-            "32": 214510896.0,
-            "33": 203527888.0,
-            "34": 228752368.0,
-            "35": 194293392.0,
-            "36": 236711744.0,
-            "37": 162157968.0,
-            "38": 225545168.0,
-            "39": 214299328.0,
-            "40": 218746384.0,
-            "41": 163931104.0,
-            "42": 162458624.0,
-            "43": 192453632.0,
-            "44": 149739552.0,
-            "45": 175646608.0,
-            "46": 129510480.0,
-            "47": 170153408.0,
-            "48": 157697168.0,
-            "49": 92955200.0,
-            "50": 157824256.0
+            "1": 19403652.0,
+            "2": 19274102.0,
+            "3": 19373168.0,
+            "4": 86562120.0,
+            "5": 151677296.0,
+            "6": 142091232.0,
+            "7": 167132032.0,
+            "8": 197337088.0,
+            "9": 168836496.0,
+            "10": 162963792.0,
+            "11": 211653824.0,
+            "12": 214575616.0,
+            "13": 231549168.0,
+            "14": 220571728.0,
+            "15": 250508240.0,
+            "16": 168968368.0,
+            "17": 294610112.0,
+            "18": 167327952.0,
+            "19": 156385504.0,
+            "20": 177007072.0,
+            "21": 219468816.0,
+            "22": 217511168.0,
+            "23": 194318208.0,
+            "24": 208788192.0,
+            "25": 240820928.0,
+            "26": 250667072.0,
+            "27": 235205856.0,
+            "28": 285071552.0,
+            "29": 270668736.0,
+            "30": 241596448.0,
+            "31": 256938208.0,
+            "32": 252232640.0,
+            "33": 213058752.0,
+            "34": 217720576.0,
+            "35": 172316416.0,
+            "36": 246137120.0,
+            "37": 228162320.0,
+            "38": 238162048.0,
+            "39": 211207168.0,
+            "40": 206162560.0,
+            "41": 151397232.0,
+            "42": 206473424.0,
+            "43": 175165248.0,
+            "44": 182768560.0,
+            "45": 158317856.0,
+            "46": 159388704.0,
+            "47": 152897904.0,
+            "48": 143548896.0,
+            "49": 124357696.0,
+            "50": 151519648.0
         }
     },
     "mem-allocated-bytes": {
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 4876471296.0,
-            "2": 4876535296.0,
-            "3": 4875369984.0,
-            "4": 4874512896.0,
-            "5": 4874505728.0,
-            "6": 4876898816.0,
-            "7": 4875386368.0,
-            "8": 4876464640.0,
-            "9": 4876400128.0,
-            "10": 4877448704.0,
-            "11": 4876193280.0,
-            "12": 4874407424.0,
-            "13": 4875226624.0,
-            "14": 4875415040.0,
-            "15": 4876397056.0,
-            "16": 4877806080.0,
-            "17": 4876205568.0,
-            "18": 4876743168.0,
-            "19": 4875044352.0,
-            "20": 4877310464.0,
-            "21": 4875642368.0,
-            "22": 4874806784.0,
-            "23": 4875531776.0,
-            "24": 4878220800.0,
-            "25": 4875477504.0,
-            "26": 4877613568.0,
-            "27": 4875030016.0,
-            "28": 4875365888.0,
-            "29": 4876291584.0,
-            "30": 4876465664.0,
-            "31": 4874710528.0,
-            "32": 4875980288.0,
-            "33": 4874096128.0,
-            "34": 4875379200.0,
-            "35": 4875995648.0,
-            "36": 4876016128.0,
-            "37": 4874497536.0,
-            "38": 4875453952.0,
-            "39": 4875932160.0,
-            "40": 4876112384.0,
-            "41": 4875683328.0,
-            "42": 4877188608.0,
-            "43": 4875977216.0,
-            "44": 4878347776.0,
-            "45": 4876845568.0,
-            "46": 4875212288.0,
-            "47": 4876330496.0,
-            "48": 4875971072.0,
-            "49": 4875368960.0,
-            "50": 4875349504.0
+            "1": 4875597824.0,
+            "2": 4875363840.0,
+            "3": 4874979840.0,
+            "4": 4874899968.0,
+            "5": 4875749888.0,
+            "6": 4876656128.0,
+            "7": 4875178496.0,
+            "8": 4874036736.0,
+            "9": 4876568064.0,
+            "10": 4876058112.0,
+            "11": 4876045824.0,
+            "12": 4874515968.0,
+            "13": 4875086336.0,
+            "14": 4874568192.0,
+            "15": 4875987456.0,
+            "16": 4874790400.0,
+            "17": 4875477504.0,
+            "18": 4875512320.0,
+            "19": 4876186112.0,
+            "20": 4875747840.0,
+            "21": 4874790400.0,
+            "22": 4876221952.0,
+            "23": 4874534400.0,
+            "24": 4875733504.0,
+            "25": 4875019776.0,
+            "26": 4875168256.0,
+            "27": 4874978816.0,
+            "28": 4875781632.0,
+            "29": 4876329472.0,
+            "30": 4875107840.0,
+            "31": 4874253824.0,
+            "32": 4874167808.0,
+            "33": 4876044800.0,
+            "34": 4875914752.0,
+            "35": 4874962432.0,
+            "36": 4875862528.0,
+            "37": 4877336064.0,
+            "38": 4875002368.0,
+            "39": 4874599936.0,
+            "40": 4874880512.0,
+            "41": 4875294208.0,
+            "42": 4875419136.0,
+            "43": 4875780608.0,
+            "44": 4874780160.0,
+            "45": 4875191808.0,
+            "46": 4875717120.0,
+            "47": 4874050048.0,
+            "48": 4875580928.0,
+            "49": 4875412992.0,
+            "50": 4875462144.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 41199984640.0,
-            "2": 41199984640.0,
-            "3": 41199984640.0,
-            "4": 41199984640.0,
-            "5": 41199984640.0,
-            "6": 41199984640.0,
-            "7": 41199984640.0,
-            "8": 41199984640.0,
-            "9": 41199984640.0,
-            "10": 41199984640.0,
-            "11": 41199984640.0,
-            "12": 41199984640.0,
-            "13": 41199984640.0,
-            "14": 41199984640.0,
-            "15": 41199984640.0,
-            "16": 41199984640.0,
-            "17": 41199984640.0,
-            "18": 41199984640.0,
-            "19": 41199984640.0,
-            "20": 41199984640.0,
-            "21": 41199984640.0,
-            "22": 41199984640.0,
-            "23": 41199984640.0,
-            "24": 41199984640.0,
-            "25": 41199984640.0,
-            "26": 41199984640.0,
-            "27": 41199984640.0,
-            "28": 41199984640.0,
-            "29": 41199984640.0,
-            "30": 41199984640.0,
-            "31": 41199984640.0,
-            "32": 41199984640.0,
-            "33": 41199984640.0,
-            "34": 41199984640.0,
-            "35": 41199984640.0,
-            "36": 41199984640.0,
-            "37": 41199984640.0,
-            "38": 41199984640.0,
-            "39": 41199984640.0,
-            "40": 41199984640.0,
-            "41": 41199984640.0,
-            "42": 41199984640.0,
-            "43": 41199984640.0,
-            "44": 41199984640.0,
-            "45": 41199984640.0,
-            "46": 41199984640.0,
-            "47": 41199984640.0,
-            "48": 41199984640.0,
-            "49": 41199984640.0,
-            "50": 41199984640.0
+            "1": 41201033216.0,
+            "2": 41201033216.0,
+            "3": 41201033216.0,
+            "4": 41201033216.0,
+            "5": 41201033216.0,
+            "6": 41201033216.0,
+            "7": 41201033216.0,
+            "8": 41201033216.0,
+            "9": 41201033216.0,
+            "10": 41201033216.0,
+            "11": 41201033216.0,
+            "12": 41201033216.0,
+            "13": 41201033216.0,
+            "14": 41201033216.0,
+            "15": 41201033216.0,
+            "16": 41201033216.0,
+            "17": 41201033216.0,
+            "18": 41201033216.0,
+            "19": 41201033216.0,
+            "20": 41201033216.0,
+            "21": 41201033216.0,
+            "22": 41201033216.0,
+            "23": 41201033216.0,
+            "24": 41201033216.0,
+            "25": 41201033216.0,
+            "26": 41201033216.0,
+            "27": 41201033216.0,
+            "28": 41201033216.0,
+            "29": 41201033216.0,
+            "30": 41201033216.0,
+            "31": 41201033216.0,
+            "32": 41201033216.0,
+            "33": 41201033216.0,
+            "34": 41201033216.0,
+            "35": 41201033216.0,
+            "36": 41201033216.0,
+            "37": 41201033216.0,
+            "38": 41201033216.0,
+            "39": 41201033216.0,
+            "40": 41201033216.0,
+            "41": 41201033216.0,
+            "42": 41201033216.0,
+            "43": 41201033216.0,
+            "44": 41201033216.0,
+            "45": 41201033216.0,
+            "46": 41201033216.0,
+            "47": 41201033216.0,
+            "48": 41201033216.0,
+            "49": 41201033216.0,
+            "50": 41201033216.0
         }
     },
     "iteration-time": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 86.59245,
-            "2": 1.11188,
-            "3": 0.94659,
-            "4": 0.89686,
-            "5": 1.40432,
-            "6": 1.06239,
-            "7": 1.03181,
-            "8": 1.07838,
-            "9": 0.88529,
-            "10": 0.87346,
-            "11": 0.9764,
-            "12": 0.87397,
-            "13": 0.87922,
-            "14": 0.87464,
-            "15": 0.86356,
-            "16": 0.88539,
-            "17": 0.86198,
-            "18": 0.86676,
-            "19": 0.85335,
-            "20": 0.85904,
-            "21": 0.84697,
-            "22": 0.84984,
-            "23": 0.84683,
-            "24": 0.85172,
-            "25": 0.84975,
-            "26": 0.86347,
-            "27": 0.86726,
-            "28": 0.84853,
-            "29": 0.84946,
-            "30": 0.85197,
-            "31": 0.85026,
-            "32": 0.84681,
-            "33": 0.84571,
-            "34": 0.85295,
-            "35": 0.8568,
-            "36": 0.84946,
-            "37": 0.8495,
-            "38": 0.84754,
-            "39": 0.85264,
-            "40": 0.8452,
-            "41": 0.84517,
-            "42": 0.84876,
-            "43": 0.84152,
-            "44": 0.84772,
-            "45": 0.84803,
-            "46": 0.84148,
-            "47": 0.84697,
-            "48": 0.84232,
-            "49": 0.84236,
-            "50": 0.84249
+            "1": 84.85893,
+            "2": 1.16099,
+            "3": 0.98814,
+            "4": 0.90006,
+            "5": 1.44704,
+            "6": 1.12424,
+            "7": 1.08423,
+            "8": 1.07558,
+            "9": 1.1513,
+            "10": 0.88417,
+            "11": 1.07532,
+            "12": 0.88519,
+            "13": 0.87318,
+            "14": 0.87758,
+            "15": 0.87276,
+            "16": 0.8776,
+            "17": 0.86863,
+            "18": 0.87011,
+            "19": 0.86845,
+            "20": 0.86617,
+            "21": 0.85521,
+            "22": 0.86783,
+            "23": 0.86126,
+            "24": 0.85746,
+            "25": 0.85758,
+            "26": 0.86093,
+            "27": 0.85634,
+            "28": 0.85365,
+            "29": 0.86147,
+            "30": 0.86891,
+            "31": 0.85512,
+            "32": 0.85344,
+            "33": 0.85409,
+            "34": 0.85597,
+            "35": 0.85605,
+            "36": 0.84565,
+            "37": 0.84908,
+            "38": 0.85623,
+            "39": 0.8586,
+            "40": 0.87856,
+            "41": 0.85187,
+            "42": 0.86298,
+            "43": 0.85814,
+            "44": 0.85706,
+            "45": 0.85473,
+            "46": 0.85417,
+            "47": 0.85861,
+            "48": 0.85261,
+            "49": 0.85118,
+            "50": 0.84383
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_dgx_h100.json
index 51e9d7154c9..bc1062ce151 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_dgx_h100.json
@@ -6,54 +6,54 @@
         "values": {
             "1": 11.06693,
             "2": 11.0602,
-            "3": 10.21167,
-            "4": 9.95277,
-            "5": 10.12388,
-            "6": 8.82369,
-            "7": 9.52785,
-            "8": 8.44289,
-            "9": 7.85041,
-            "10": 7.07093,
-            "11": 9.28562,
-            "12": 9.13324,
-            "13": 7.86224,
-            "14": 8.19705,
-            "15": 8.22932,
-            "16": 8.17783,
-            "17": 8.2161,
-            "18": 7.50358,
-            "19": 8.08893,
-            "20": 7.64905,
-            "21": 7.95183,
-            "22": 7.29849,
-            "23": 7.93348,
-            "24": 7.43565,
-            "25": 8.2385,
-            "26": 7.75634,
-            "27": 7.70075,
-            "28": 7.66089,
-            "29": 7.75606,
-            "30": 7.56072,
-            "31": 7.81859,
-            "32": 6.46861,
-            "33": 7.20532,
-            "34": 7.77706,
-            "35": 7.73113,
-            "36": 6.72448,
-            "37": 8.09344,
-            "38": 7.62008,
-            "39": 7.96872,
-            "40": 7.4992,
-            "41": 7.49916,
-            "42": 6.11993,
-            "43": 7.59389,
-            "44": 7.91482,
-            "45": 6.83633,
-            "46": 7.41335,
-            "47": 7.78887,
-            "48": 7.87666,
-            "49": 7.58746,
-            "50": 6.84352
+            "3": 10.21173,
+            "4": 9.95255,
+            "5": 10.12502,
+            "6": 8.8231,
+            "7": 9.52825,
+            "8": 8.44297,
+            "9": 7.84977,
+            "10": 7.0728,
+            "11": 9.30154,
+            "12": 9.14531,
+            "13": 7.86583,
+            "14": 8.21069,
+            "15": 8.2169,
+            "16": 8.17413,
+            "17": 8.21514,
+            "18": 7.49348,
+            "19": 8.08414,
+            "20": 7.63479,
+            "21": 7.95116,
+            "22": 7.29475,
+            "23": 7.9358,
+            "24": 7.43073,
+            "25": 8.23819,
+            "26": 7.75508,
+            "27": 7.6991,
+            "28": 7.65492,
+            "29": 7.75272,
+            "30": 7.56401,
+            "31": 7.81794,
+            "32": 6.46781,
+            "33": 7.20433,
+            "34": 7.77611,
+            "35": 7.72648,
+            "36": 6.71848,
+            "37": 8.09106,
+            "38": 7.61823,
+            "39": 7.96665,
+            "40": 7.49555,
+            "41": 7.49366,
+            "42": 6.10456,
+            "43": 7.59158,
+            "44": 7.91315,
+            "45": 6.83253,
+            "46": 7.4064,
+            "47": 7.78787,
+            "48": 7.87227,
+            "49": 7.58424,
+            "50": 6.83739
         }
     },
     "num-zeros": {
@@ -61,56 +61,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 47165160.0,
-            "2": 46897928.0,
-            "3": 52684380.0,
-            "4": 297108064.0,
-            "5": 556667648.0,
-            "6": 661861120.0,
-            "7": 1027446592.0,
-            "8": 742822528.0,
-            "9": 846651648.0,
-            "10": 693167680.0,
-            "11": 826875520.0,
-            "12": 814304768.0,
-            "13": 642608768.0,
-            "14": 606554752.0,
-            "15": 728814528.0,
-            "16": 845696384.0,
-            "17": 667529728.0,
-            "18": 673504384.0,
-            "19": 889544960.0,
-            "20": 890696768.0,
-            "21": 676302464.0,
-            "22": 688965120.0,
-            "23": 789972480.0,
-            "24": 761249536.0,
-            "25": 648185280.0,
-            "26": 789507392.0,
-            "27": 641355648.0,
-            "28": 805511168.0,
-            "29": 773780224.0,
-            "30": 811888960.0,
-            "31": 688167744.0,
-            "32": 834871424.0,
-            "33": 792944256.0,
-            "34": 777109568.0,
-            "35": 763515136.0,
-            "36": 733607744.0,
-            "37": 743626240.0,
-            "38": 746577024.0,
-            "39": 732972864.0,
-            "40": 735645696.0,
-            "41": 556711680.0,
-            "42": 680528384.0,
-            "43": 669752960.0,
-            "44": 667702912.0,
-            "45": 635197248.0,
-            "46": 629093120.0,
-            "47": 626713344.0,
-            "48": 600843456.0,
-            "49": 581506752.0,
-            "50": 572705728.0
+            "1": 47165248.0,
+            "2": 46897896.0,
+            "3": 52684328.0,
+            "4": 297102368.0,
+            "5": 569266880.0,
+            "6": 661848704.0,
+            "7": 1027448384.0,
+            "8": 752263424.0,
+            "9": 852974912.0,
+            "10": 683720576.0,
+            "11": 833170624.0,
+            "12": 814312640.0,
+            "13": 639456320.0,
+            "14": 628553664.0,
+            "15": 706814592.0,
+            "16": 848848256.0,
+            "17": 676948992.0,
+            "18": 676681088.0,
+            "19": 892688576.0,
+            "20": 890700864.0,
+            "21": 676293696.0,
+            "22": 701562304.0,
+            "23": 796268224.0,
+            "24": 786414720.0,
+            "25": 667072192.0,
+            "26": 767487552.0,
+            "27": 773408512.0,
+            "28": 758333696.0,
+            "29": 770627840.0,
+            "30": 758410304.0,
+            "31": 644127616.0,
+            "32": 806561088.0,
+            "33": 811820352.0,
+            "34": 780254848.0,
+            "35": 757223808.0,
+            "36": 758778496.0,
+            "37": 753072832.0,
+            "38": 752875328.0,
+            "39": 767575744.0,
+            "40": 760803392.0,
+            "41": 742253440.0,
+            "42": 718278848.0,
+            "43": 676047424.0,
+            "44": 673998592.0,
+            "45": 635196864.0,
+            "46": 629090048.0,
+            "47": 623565376.0,
+            "48": 600849984.0,
+            "49": 578357504.0,
+            "50": 585291904.0
         }
     },
     "mem-allocated-bytes": {
@@ -185,46 +185,46 @@
             "8": 8233667072.0,
             "9": 8233667072.0,
             "10": 8233667072.0,
-            "11": 8262715904.0,
-            "12": 8262715904.0,
-            "13": 8262715904.0,
-            "14": 8262715904.0,
-            "15": 8262715904.0,
-            "16": 8268117504.0,
-            "17": 8288236032.0,
-            "18": 8288236032.0,
-            "19": 8288236032.0,
-            "20": 8288236032.0,
-            "21": 8288236032.0,
-            "22": 8299924992.0,
-            "23": 8302176768.0,
-            "24": 8302176768.0,
-            "25": 8302176768.0,
-            "26": 8302176768.0,
-            "27": 8302176768.0,
-            "28": 8302176768.0,
-            "29": 8302176768.0,
-            "30": 8302176768.0,
-            "31": 8302176768.0,
-            "32": 8302176768.0,
-            "33": 8302176768.0,
-            "34": 8302176768.0,
-            "35": 8302176768.0,
-            "36": 8302176768.0,
-            "37": 8302176768.0,
-            "38": 8313753088.0,
-            "39": 8313753088.0,
-            "40": 8313753088.0,
-            "41": 8313753088.0,
-            "42": 8313753088.0,
-            "43": 8313753088.0,
-            "44": 8313753088.0,
-            "45": 8313753088.0,
-            "46": 8313753088.0,
-            "47": 8313753088.0,
-            "48": 8313753088.0,
-            "49": 8313753088.0,
-            "50": 8313753088.0
+            "11": 8262763008.0,
+            "12": 8262763008.0,
+            "13": 8262763008.0,
+            "14": 8262763008.0,
+            "15": 8262763008.0,
+            "16": 8273029632.0,
+            "17": 8282915328.0,
+            "18": 8282915328.0,
+            "19": 8284467712.0,
+            "20": 8294910464.0,
+            "21": 8294910464.0,
+            "22": 8303365632.0,
+            "23": 8303365632.0,
+            "24": 8303365632.0,
+            "25": 8303365632.0,
+            "26": 8303365632.0,
+            "27": 8303365632.0,
+            "28": 8303365632.0,
+            "29": 8303365632.0,
+            "30": 8328921600.0,
+            "31": 8328921600.0,
+            "32": 8328921600.0,
+            "33": 8328921600.0,
+            "34": 8342317568.0,
+            "35": 8352083456.0,
+            "36": 8352083456.0,
+            "37": 8352083456.0,
+            "38": 8352083456.0,
+            "39": 8352083456.0,
+            "40": 8352083456.0,
+            "41": 8352083456.0,
+            "42": 8352083456.0,
+            "43": 8352083456.0,
+            "44": 8352083456.0,
+            "45": 8352083456.0,
+            "46": 8352083456.0,
+            "47": 8352083456.0,
+            "48": 8352083456.0,
+            "49": 8352083456.0,
+            "50": 8352083456.0
         }
     },
     "mtp_1 loss": {
@@ -234,54 +234,54 @@
         "values": {
             "1": 11.07401,
             "2": 11.0927,
-            "3": 10.8262,
-            "4": 10.27574,
-            "5": 10.45324,
-            "6": 8.32758,
-            "7": 9.82629,
-            "8": 8.01538,
-            "9": 7.47611,
-            "10": 6.75851,
-            "11": 8.92961,
-            "12": 8.98772,
-            "13": 7.80203,
-            "14": 8.02221,
-            "15": 8.11372,
-            "16": 8.14498,
-            "17": 8.13435,
-            "18": 7.45035,
-            "19": 8.03784,
-            "20": 7.54246,
-            "21": 7.90269,
-            "22": 7.28093,
-            "23": 7.88727,
-            "24": 7.37587,
-            "25": 8.17289,
-            "26": 7.70083,
-            "27": 7.62668,
-            "28": 7.61747,
-            "29": 7.69888,
-            "30": 7.48586,
-            "31": 7.74301,
-            "32": 6.37542,
-            "33": 7.13919,
-            "34": 7.7198,
-            "35": 7.63387,
-            "36": 6.6127,
-            "37": 8.03449,
-            "38": 7.58334,
-            "39": 7.89887,
-            "40": 7.41168,
-            "41": 7.42316,
-            "42": 6.01689,
-            "43": 7.48867,
-            "44": 7.86976,
-            "45": 6.75113,
-            "46": 7.3054,
-            "47": 7.73281,
-            "48": 7.79017,
-            "49": 7.48985,
-            "50": 6.75753
+            "3": 10.82644,
+            "4": 10.27575,
+            "5": 10.45332,
+            "6": 8.3277,
+            "7": 9.8265,
+            "8": 8.01558,
+            "9": 7.47586,
+            "10": 6.7581,
+            "11": 8.9297,
+            "12": 8.98829,
+            "13": 7.80214,
+            "14": 8.02436,
+            "15": 8.11251,
+            "16": 8.14258,
+            "17": 8.13031,
+            "18": 7.44579,
+            "19": 8.03606,
+            "20": 7.54064,
+            "21": 7.90046,
+            "22": 7.27709,
+            "23": 7.88548,
+            "24": 7.37576,
+            "25": 8.17071,
+            "26": 7.69849,
+            "27": 7.62829,
+            "28": 7.61349,
+            "29": 7.69754,
+            "30": 7.47936,
+            "31": 7.73926,
+            "32": 6.37137,
+            "33": 7.1379,
+            "34": 7.71901,
+            "35": 7.63544,
+            "36": 6.61321,
+            "37": 8.03174,
+            "38": 7.58067,
+            "39": 7.89473,
+            "40": 7.41418,
+            "41": 7.42196,
+            "42": 6.01401,
+            "43": 7.49099,
+            "44": 7.86625,
+            "45": 6.74951,
+            "46": 7.30637,
+            "47": 7.72653,
+            "48": 7.78872,
+            "49": 7.48917,
+            "50": 6.75533
         }
     },
     "iteration-time": {
@@ -289,56 +289,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 64.76466,
-            "2": 2.42359,
-            "3": 2.56054,
-            "4": 2.61199,
-            "5": 2.3272,
-            "6": 2.19806,
-            "7": 2.16133,
-            "8": 1.97339,
-            "9": 2.14238,
-            "10": 2.05512,
-            "11": 2.00856,
-            "12": 1.96198,
-            "13": 2.08656,
-            "14": 1.96948,
-            "15": 1.96059,
-            "16": 1.97248,
-            "17": 1.97639,
-            "18": 2.01386,
-            "19": 1.9606,
-            "20": 1.94716,
-            "21": 2.00286,
-            "22": 1.965,
-            "23": 2.03401,
-            "24": 2.00528,
-            "25": 2.03321,
-            "26": 1.95999,
-            "27": 1.96395,
-            "28": 1.98191,
-            "29": 1.99346,
-            "30": 1.97579,
-            "31": 1.95097,
-            "32": 1.95726,
-            "33": 1.9399,
-            "34": 1.99177,
-            "35": 1.91153,
-            "36": 1.97534,
-            "37": 1.95691,
-            "38": 1.96206,
-            "39": 1.9414,
-            "40": 1.96027,
-            "41": 1.97807,
-            "42": 1.98861,
-            "43": 1.94856,
-            "44": 1.96339,
-            "45": 1.96835,
-            "46": 1.99733,
-            "47": 1.9716,
-            "48": 1.96591,
-            "49": 1.93865,
-            "50": 1.95198
+            "1": 88.9425,
+            "2": 2.91855,
+            "3": 2.58352,
+            "4": 3.73409,
+            "5": 2.63585,
+            "6": 2.48926,
+            "7": 2.27523,
+            "8": 2.50563,
+            "9": 2.45577,
+            "10": 1.90482,
+            "11": 1.96806,
+            "12": 2.42331,
+            "13": 1.88872,
+            "14": 1.89773,
+            "15": 1.90418,
+            "16": 1.885,
+            "17": 1.91181,
+            "18": 1.89194,
+            "19": 1.97889,
+            "20": 1.88063,
+            "21": 1.88612,
+            "22": 1.90981,
+            "23": 1.87053,
+            "24": 1.87293,
+            "25": 1.89611,
+            "26": 1.96035,
+            "27": 1.9067,
+            "28": 1.91982,
+            "29": 1.94441,
+            "30": 1.88208,
+            "31": 1.9521,
+            "32": 1.89063,
+            "33": 1.9571,
+            "34": 1.93481,
+            "35": 1.87558,
+            "36": 1.88538,
+            "37": 1.89041,
+            "38": 1.97023,
+            "39": 1.89001,
+            "40": 1.87859,
+            "41": 1.89949,
+            "42": 1.88775,
+            "43": 1.94805,
+            "44": 1.90575,
+            "45": 1.89185,
+            "46": 1.87259,
+            "47": 1.89396,
+            "48": 1.8747,
+            "49": 1.88874,
+            "50": 1.91915
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_dgx_h100.json
index 162edd4f113..ca64f30b0fb 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_dgx_h100.json
@@ -6,54 +6,54 @@
         "values": {
             "1": 11.01693,
             "2": 11.06263,
-            "3": 10.1782,
-            "4": 10.86126,
-            "5": 9.81699,
-            "6": 9.10047,
-            "7": 9.6123,
-            "8": 8.39574,
-            "9": 7.79397,
-            "10": 7.15194,
-            "11": 9.06709,
-            "12": 12.4321,
-            "13": 8.58689,
-            "14": 8.37208,
-            "15": 8.32207,
-            "16": 8.28873,
-            "17": 8.33948,
-            "18": 7.62098,
-            "19": 8.20737,
-            "20": 7.71874,
-            "21": 8.02566,
-            "22": 7.37552,
-            "23": 7.97218,
-            "24": 7.52837,
-            "25": 8.3433,
-            "26": 7.79595,
-            "27": 7.73606,
-            "28": 7.71545,
-            "29": 7.78466,
-            "30": 7.57814,
-            "31": 7.86251,
-            "32": 6.53514,
-            "33": 7.24722,
-            "34": 7.81689,
-            "35": 7.75181,
-            "36": 6.74644,
-            "37": 8.15937,
-            "38": 7.62962,
-            "39": 7.9886,
-            "40": 7.53058,
-            "41": 7.54209,
-            "42": 6.14029,
-            "43": 7.61626,
-            "44": 7.97638,
-            "45": 6.85528,
-            "46": 7.44245,
-            "47": 7.84386,
-            "48": 7.89235,
-            "49": 7.61461,
-            "50": 6.86695
+            "3": 10.17828,
+            "4": 10.86162,
+            "5": 9.8171,
+            "6": 9.10066,
+            "7": 9.61216,
+            "8": 8.39629,
+            "9": 7.79624,
+            "10": 7.15182,
+            "11": 9.06686,
+            "12": 12.41529,
+            "13": 8.05859,
+            "14": 8.25078,
+            "15": 8.25932,
+            "16": 8.33199,
+            "17": 8.33144,
+            "18": 7.58852,
+            "19": 8.19681,
+            "20": 7.68193,
+            "21": 8.00256,
+            "22": 7.37928,
+            "23": 7.95036,
+            "24": 7.52138,
+            "25": 8.32313,
+            "26": 7.80137,
+            "27": 7.73067,
+            "28": 7.70985,
+            "29": 7.77487,
+            "30": 7.57653,
+            "31": 7.85303,
+            "32": 6.5208,
+            "33": 7.2477,
+            "34": 7.80024,
+            "35": 7.74614,
+            "36": 6.73365,
+            "37": 8.154,
+            "38": 7.62714,
+            "39": 7.97924,
+            "40": 7.524,
+            "41": 7.52079,
+            "42": 6.11188,
+            "43": 7.6025,
+            "44": 7.97264,
+            "45": 6.84479,
+            "46": 7.4241,
+            "47": 7.82528,
+            "48": 7.87668,
+            "49": 7.5987,
+            "50": 6.8481
         }
     },
     "num-zeros": {
@@ -61,56 +61,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 47167904.0,
-            "2": 46900672.0,
-            "3": 81004512.0,
-            "4": 231040016.0,
-            "5": 477984896.0,
-            "6": 558059904.0,
-            "7": 958271680.0,
-            "8": 723959296.0,
-            "9": 802607040.0,
-            "10": 715176064.0,
-            "11": 657024320.0,
-            "12": 565795136.0,
-            "13": 541943680.0,
-            "14": 773290880.0,
-            "15": 810566400.0,
-            "16": 748195712.0,
-            "17": 730395008.0,
-            "18": 733261760.0,
-            "19": 729119744.0,
-            "20": 859242112.0,
-            "21": 846155136.0,
-            "22": 648056832.0,
-            "23": 774244288.0,
-            "24": 629192960.0,
-            "25": 843192448.0,
-            "26": 846129280.0,
-            "27": 804864512.0,
-            "28": 789783424.0,
-            "29": 817814656.0,
-            "30": 808743168.0,
-            "31": 662987648.0,
-            "32": 841163840.0,
-            "33": 676597440.0,
-            "34": 808569792.0,
-            "35": 804410048.0,
-            "36": 749336000.0,
-            "37": 759355904.0,
-            "38": 768597888.0,
-            "39": 758146688.0,
-            "40": 767096448.0,
-            "41": 735961920.0,
-            "42": 705693632.0,
-            "43": 694921152.0,
-            "44": 692872768.0,
-            "45": 638337792.0,
-            "46": 654254336.0,
-            "47": 655022208.0,
-            "48": 648030848.0,
-            "49": 622397184.0,
-            "50": 582138304.0
+            "1": 47167760.0,
+            "2": 46900544.0,
+            "3": 84151152.0,
+            "4": 237329488.0,
+            "5": 471710816.0,
+            "6": 558040704.0,
+            "7": 958277696.0,
+            "8": 723945792.0,
+            "9": 812038208.0,
+            "10": 721441280.0,
+            "11": 622437632.0,
+            "12": 556346176.0,
+            "13": 633166464.0,
+            "14": 700920576.0,
+            "15": 766532480.0,
+            "16": 719878656.0,
+            "17": 673785280.0,
+            "18": 733291456.0,
+            "19": 713440768.0,
+            "20": 859244608.0,
+            "21": 836730112.0,
+            "22": 789566720.0,
+            "23": 808848960.0,
+            "24": 644896128.0,
+            "25": 852631104.0,
+            "26": 836696384.0,
+            "27": 550069504.0,
+            "28": 604192832.0,
+            "29": 761193792.0,
+            "30": 758412160.0,
+            "31": 782509568.0,
+            "32": 765664256.0,
+            "33": 745758912.0,
+            "34": 569510656.0,
+            "35": 728914304.0,
+            "36": 699003840.0,
+            "37": 705883072.0,
+            "38": 705682240.0,
+            "39": 685787136.0,
+            "40": 656996352.0,
+            "41": 484325760.0,
+            "42": 633345536.0,
+            "43": 641441984.0,
+            "44": 466413888.0,
+            "45": 427604864.0,
+            "46": 566181184.0,
+            "47": 563795904.0,
+            "48": 421565312.0,
+            "49": 537463040.0,
+            "50": 494058176.0
         }
     },
     "mem-allocated-bytes": {
@@ -178,53 +178,53 @@
             "1": 4305060864.0,
             "2": 5850929152.0,
             "3": 5850929152.0,
-            "4": 5857025536.0,
-            "5": 5857025536.0,
-            "6": 5857025536.0,
-            "7": 5857025536.0,
-            "8": 5857025536.0,
-            "9": 5857025536.0,
-            "10": 5857025536.0,
-            "11": 5857025536.0,
-            "12": 5857025536.0,
-            "13": 5857025536.0,
-            "14": 5857025536.0,
-            "15": 5857025536.0,
-            "16": 5857025536.0,
-            "17": 5857025536.0,
-            "18": 5857025536.0,
-            "19": 5857025536.0,
-            "20": 5857025536.0,
-            "21": 5857025536.0,
-            "22": 5857025536.0,
-            "23": 5857025536.0,
-            "24": 5857025536.0,
-            "25": 5857025536.0,
-            "26": 5857025536.0,
-            "27": 5857025536.0,
-            "28": 5857025536.0,
-            "29": 5857025536.0,
-            "30": 5857025536.0,
-            "31": 5857025536.0,
-            "32": 5857025536.0,
-            "33": 5857025536.0,
-            "34": 5857025536.0,
-            "35": 5857025536.0,
-            "36": 5857025536.0,
-            "37": 5857025536.0,
-            "38": 5857025536.0,
-            "39": 5857025536.0,
-            "40": 5857025536.0,
-            "41": 5857025536.0,
-            "42": 5857025536.0,
-            "43": 5857025536.0,
-            "44": 5857025536.0,
-            "45": 5857025536.0,
-            "46": 5857025536.0,
-            "47": 5857025536.0,
-            "48": 5857025536.0,
-            "49": 5857025536.0,
-            "50": 5860186112.0
+            "4": 5857061888.0,
+            "5": 5857061888.0,
+            "6": 5857061888.0,
+            "7": 5857061888.0,
+            "8": 5857061888.0,
+            "9": 5857061888.0,
+            "10": 5857061888.0,
+            "11": 5857061888.0,
+            "12": 5857061888.0,
+            "13": 5857061888.0,
+            "14": 5857061888.0,
+            "15": 5857061888.0,
+            "16": 5857061888.0,
+            "17": 5857061888.0,
+            "18": 5857061888.0,
+            "19": 5857061888.0,
+            "20": 5857061888.0,
+            "21": 5857061888.0,
+            "22": 5857061888.0,
+            "23": 5857061888.0,
+            "24": 5857061888.0,
+            "25": 5857061888.0,
+            "26": 5857061888.0,
+            "27": 5857061888.0,
+            "28": 5857061888.0,
+            "29": 5857061888.0,
+            "30": 5857061888.0,
+            "31": 5857061888.0,
+            "32": 5857061888.0,
+            "33": 5857061888.0,
+            "34": 5857061888.0,
+            "35": 5857061888.0,
+            "36": 5857061888.0,
+            "37": 5857061888.0,
+            "38": 5857061888.0,
+            "39": 5860414976.0,
+            "40": 5860414976.0,
+            "41": 5860414976.0,
+            "42": 5860414976.0,
+            "43": 5860414976.0,
+            "44": 5860414976.0,
+            "45": 5860414976.0,
+            "46": 5860414976.0,
+            "47": 5860414976.0,
+            "48": 5860414976.0,
+            "49": 5860414976.0,
+            "50": 5860414976.0
         }
     },
     "iteration-time": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 89.57975,
-            "2": 3.08398,
-            "3": 3.39072,
-            "4": 2.95563,
-            "5": 3.89951,
-            "6": 1.99592,
-            "7": 2.70541,
-            "8": 1.95431,
-            "9": 1.95178,
-            "10": 1.95311,
-            "11": 2.53128,
-            "12": 2.03561,
-            "13": 2.63986,
-            "14": 1.9956,
-            "15": 1.94751,
-            "16": 1.94319,
-            "17": 1.96972,
-            "18": 2.07225,
-            "19": 1.94281,
-            "20": 1.9489,
-            "21": 1.94199,
-            "22": 1.95565,
-            "23": 1.94632,
-            "24": 1.94485,
-            "25": 1.94325,
-            "26": 1.96685,
-            "27": 2.00745,
-            "28": 1.94741,
-            "29": 1.95606,
-            "30": 1.95414,
-            "31": 2.57092,
-            "32": 1.95172,
-            "33": 1.94952,
-            "34": 1.95519,
-            "35": 1.95735,
-            "36": 1.94985,
-            "37": 1.95117,
-            "38": 1.96384,
-            "39": 1.98373,
-            "40": 1.98071,
-            "41": 1.96168,
-            "42": 1.97892,
-            "43": 1.97654,
-            "44": 1.95705,
-            "45": 1.95269,
-            "46": 2.02666,
-            "47": 1.96138,
-            "48": 1.9657,
-            "49": 1.96155,
-            "50": 1.96872
+            "1": 92.74621,
+            "2": 3.05215,
+            "3": 3.87635,
+            "4": 2.96691,
+            "5": 3.09601,
+            "6": 1.94793,
+            "7": 2.58283,
+            "8": 2.00403,
+            "9": 1.96081,
+            "10": 1.955,
+            "11": 1.95251,
+            "12": 2.07845,
+            "13": 2.01952,
+            "14": 1.96206,
+            "15": 1.96234,
+            "16": 1.97406,
+            "17": 2.0423,
+            "18": 1.96841,
+            "19": 1.95796,
+            "20": 2.48713,
+            "21": 2.55338,
+            "22": 1.97633,
+            "23": 1.95723,
+            "24": 1.98425,
+            "25": 1.95827,
+            "26": 1.95919,
+            "27": 1.95629,
+            "28": 1.96685,
+            "29": 1.95089,
+            "30": 2.55672,
+            "31": 1.93918,
+            "32": 1.95892,
+            "33": 1.95987,
+            "34": 1.95394,
+            "35": 1.96053,
+            "36": 1.96074,
+            "37": 1.96542,
+            "38": 1.97304,
+            "39": 2.00073,
+            "40": 1.98223,
+            "41": 1.95986,
+            "42": 1.96976,
+            "43": 1.94793,
+            "44": 1.95897,
+            "45": 1.96904,
+            "46": 1.96519,
+            "47": 1.95996,
+            "48": 1.96564,
+            "49": 1.96485,
+            "50": 1.97038
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_scoped_cudagraph/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_scoped_cudagraph/golden_values_dev_dgx_h100.json
index 06c61dd41cd..a77eac20664 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_scoped_cudagraph/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_scoped_cudagraph/golden_values_dev_dgx_h100.json
@@ -4,106 +4,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 10.93667,
-            "2": 10.93264,
-            "3": 10.94261,
-            "4": 10.94946,
-            "5": 10.9505,
-            "6": 10.94178,
-            "7": 10.94476,
-            "8": 10.93699,
-            "9": 10.94972,
-            "10": 10.93759,
-            "11": 10.9406,
-            "12": 10.93716,
-            "13": 10.92358,
-            "14": 10.93371,
-            "15": 10.88706,
-            "16": 10.87515,
-            "17": 10.86873,
-            "18": 10.86098,
-            "19": 10.86339,
-            "20": 10.78129,
-            "21": 10.73115,
-            "22": 10.60306,
-            "23": 10.73333,
-            "24": 10.61855,
-            "25": 10.55193,
-            "26": 10.62733,
-            "27": 10.63863,
-            "28": 10.59011,
-            "29": 10.59838,
-            "30": 10.37855,
-            "31": 10.12094,
-            "32": 10.4607,
-            "33": 10.45529,
-            "34": 10.20066,
-            "35": 10.25786,
-            "36": 10.20915,
-            "37": 10.33728,
-            "38": 10.1679,
-            "39": 10.40892,
-            "40": 10.05215,
-            "41": 10.09403,
-            "42": 10.17856,
-            "43": 9.74184,
-            "44": 9.89065,
-            "45": 9.73999,
-            "46": 9.72711,
-            "47": 10.0914,
-            "48": 9.75297,
-            "49": 9.40165,
-            "50": 9.83664,
-            "51": 9.77026,
-            "52": 9.65357,
-            "53": 10.03083,
-            "54": 9.87876,
-            "55": 9.79584,
-            "56": 9.53186,
-            "57": 9.36615,
-            "58": 9.75299,
-            "59": 9.48086,
-            "60": 9.40843,
-            "61": 9.6013,
-            "62": 9.90762,
-            "63": 9.25801,
-            "64": 9.68466,
-            "65": 8.79874,
-            "66": 9.60761,
-            "67": 9.25475,
-            "68": 9.71411,
-            "69": 9.71658,
-            "70": 9.66191,
-            "71": 9.52462,
-            "72": 9.47118,
-            "73": 9.38807,
-            "74": 8.8033,
-            "75": 9.33989,
-            "76": 8.93556,
-            "77": 9.99334,
-            "78": 9.6476,
-            "79": 9.28161,
-            "80": 9.29609,
-            "81": 9.39641,
-            "82": 9.60864,
-            "83": 9.21675,
-            "84": 9.34039,
-            "85": 9.53003,
-            "86": 8.95526,
-            "87": 9.51627,
-            "88": 9.68227,
-            "89": 9.50564,
-            "90": 9.75275,
-            "91": 9.23417,
-            "92": 9.25974,
-            "93": 8.94473,
-            "94": 8.6919,
-            "95": 9.44561,
-            "96": 9.40972,
-            "97": 9.20069,
-            "98": 9.58166,
-            "99": 8.75941,
-            "100": 9.2944
+            "1": 10.93691,
+            "2": 10.93262,
+            "3": 10.94243,
+            "4": 10.95011,
+            "5": 10.9502,
+            "6": 10.94175,
+            "7": 10.94469,
+            "8": 10.93675,
+            "9": 10.94939,
+            "10": 10.9367,
+            "11": 10.94082,
+            "12": 10.93794,
+            "13": 10.92338,
+            "14": 10.93415,
+            "15": 10.88723,
+            "16": 10.87495,
+            "17": 10.86864,
+            "18": 10.86127,
+            "19": 10.86341,
+            "20": 10.78125,
+            "21": 10.73131,
+            "22": 10.60371,
+            "23": 10.73309,
+            "24": 10.61865,
+            "25": 10.55175,
+            "26": 10.62651,
+            "27": 10.63921,
+            "28": 10.59104,
+            "29": 10.5981,
+            "30": 10.37817,
+            "31": 10.12235,
+            "32": 10.46117,
+            "33": 10.45537,
+            "34": 10.20087,
+            "35": 10.25661,
+            "36": 10.20876,
+            "37": 10.33662,
+            "38": 10.16683,
+            "39": 10.40916,
+            "40": 10.05209,
+            "41": 10.09427,
+            "42": 10.17821,
+            "43": 9.74204,
+            "44": 9.89005,
+            "45": 9.74011,
+            "46": 9.72669,
+            "47": 10.09152,
+            "48": 9.75295,
+            "49": 9.40186,
+            "50": 9.83645,
+            "51": 9.77036,
+            "52": 9.65641,
+            "53": 10.03067,
+            "54": 9.87916,
+            "55": 9.79619,
+            "56": 9.52858,
+            "57": 9.36596,
+            "58": 9.75327,
+            "59": 9.48259,
+            "60": 9.40835,
+            "61": 9.60202,
+            "62": 9.90742,
+            "63": 9.25777,
+            "64": 9.68411,
+            "65": 8.79911,
+            "66": 9.60796,
+            "67": 9.25427,
+            "68": 9.71419,
+            "69": 9.71666,
+            "70": 9.6613,
+            "71": 9.52439,
+            "72": 9.4709,
+            "73": 9.38862,
+            "74": 8.80286,
+            "75": 9.34004,
+            "76": 8.93543,
+            "77": 9.99337,
+            "78": 9.64723,
+            "79": 9.28126,
+            "80": 9.29633,
+            "81": 9.39609,
+            "82": 9.60877,
+            "83": 9.21694,
+            "84": 9.34008,
+            "85": 9.53009,
+            "86": 8.95652,
+            "87": 9.51691,
+            "88": 9.68221,
+            "89": 9.50553,
+            "90": 9.753,
+            "91": 9.2347,
+            "92": 9.26019,
+            "93": 8.94568,
+            "94": 8.69194,
+            "95": 9.44616,
+            "96": 9.41008,
+            "97": 9.20125,
+            "98": 9.58169,
+            "99": 8.75946,
+            "100": 9.29483
         }
     },
     "num-zeros": {
@@ -111,106 +111,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 22750340.0,
-            "2": 22953240.0,
-            "3": 22604372.0,
-            "4": 23266290.0,
-            "5": 22735592.0,
-            "6": 23061820.0,
-            "7": 22793344.0,
-            "8": 22960844.0,
-            "9": 22865576.0,
-            "10": 22950400.0,
-            "11": 22499694.0,
-            "12": 22456048.0,
-            "13": 22948070.0,
-            "14": 22384472.0,
-            "15": 22846226.0,
-            "16": 22856726.0,
-            "17": 22836416.0,
-            "18": 22590156.0,
-            "19": 22627028.0,
-            "20": 22712304.0,
-            "21": 22762708.0,
-            "22": 22816860.0,
-            "23": 22545234.0,
-            "24": 22794360.0,
-            "25": 22842012.0,
-            "26": 22549648.0,
-            "27": 22464794.0,
-            "28": 22453688.0,
-            "29": 22534550.0,
-            "30": 22636280.0,
-            "31": 22989464.0,
-            "32": 22594058.0,
-            "33": 22565896.0,
-            "34": 22855566.0,
-            "35": 22813548.0,
-            "36": 22595456.0,
-            "37": 22499328.0,
-            "38": 22926188.0,
-            "39": 22825288.0,
-            "40": 22675666.0,
-            "41": 22671440.0,
-            "42": 22682290.0,
-            "43": 23013968.0,
-            "44": 22764432.0,
-            "45": 22682616.0,
-            "46": 22911524.0,
-            "47": 23691920.0,
-            "48": 22954152.0,
-            "49": 23786644.0,
-            "50": 22934374.0,
-            "51": 23866192.0,
-            "52": 23807216.0,
-            "53": 24007492.0,
-            "54": 22868900.0,
-            "55": 23571312.0,
-            "56": 23954240.0,
-            "57": 23162470.0,
-            "58": 23914490.0,
-            "59": 22722768.0,
-            "60": 23813636.0,
-            "61": 23813616.0,
-            "62": 23739838.0,
-            "63": 23916666.0,
-            "64": 23899012.0,
-            "65": 24148300.0,
-            "66": 23796396.0,
-            "67": 25032292.0,
-            "68": 23675750.0,
-            "69": 23646956.0,
-            "70": 23903548.0,
-            "71": 24864524.0,
-            "72": 24767004.0,
-            "73": 24850716.0,
-            "74": 24133058.0,
-            "75": 24146156.0,
-            "76": 25025568.0,
-            "77": 24358296.0,
-            "78": 24910078.0,
-            "79": 23808274.0,
-            "80": 24821470.0,
-            "81": 25020448.0,
-            "82": 23851480.0,
-            "83": 23911932.0,
-            "84": 25143880.0,
-            "85": 24823452.0,
-            "86": 23154428.0,
-            "87": 24850248.0,
-            "88": 24749204.0,
-            "89": 22506446.0,
-            "90": 25108540.0,
-            "91": 23839404.0,
-            "92": 23875080.0,
-            "93": 24769680.0,
-            "94": 23992436.0,
-            "95": 25189956.0,
-            "96": 23908992.0,
-            "97": 24713120.0,
-            "98": 23832428.0,
-            "99": 23983742.0,
-            "100": 24101128.0
+            "1": 22750372.0,
+            "2": 22953180.0,
+            "3": 22604424.0,
+            "4": 23266362.0,
+            "5": 22735560.0,
+            "6": 23061884.0,
+            "7": 22793368.0,
+            "8": 22960792.0,
+            "9": 22865612.0,
+            "10": 22950328.0,
+            "11": 22499656.0,
+            "12": 22456052.0,
+            "13": 22948014.0,
+            "14": 22384498.0,
+            "15": 22846334.0,
+            "16": 22856854.0,
+            "17": 22836340.0,
+            "18": 22590220.0,
+            "19": 22627128.0,
+            "20": 22712376.0,
+            "21": 22762744.0,
+            "22": 22816900.0,
+            "23": 22545168.0,
+            "24": 22794340.0,
+            "25": 22841898.0,
+            "26": 22549680.0,
+            "27": 22464852.0,
+            "28": 22453780.0,
+            "29": 22534588.0,
+            "30": 22636160.0,
+            "31": 22989382.0,
+            "32": 22594002.0,
+            "33": 22566000.0,
+            "34": 22855476.0,
+            "35": 22813640.0,
+            "36": 22595484.0,
+            "37": 22499348.0,
+            "38": 22926172.0,
+            "39": 22825344.0,
+            "40": 22675752.0,
+            "41": 22671542.0,
+            "42": 22682408.0,
+            "43": 23014140.0,
+            "44": 22768504.0,
+            "45": 22679044.0,
+            "46": 22912572.0,
+            "47": 23691904.0,
+            "48": 24003148.0,
+            "49": 23786764.0,
+            "50": 22931654.0,
+            "51": 23866164.0,
+            "52": 23807242.0,
+            "53": 24007504.0,
+            "54": 22867916.0,
+            "55": 23571280.0,
+            "56": 23954212.0,
+            "57": 24211680.0,
+            "58": 23914512.0,
+            "59": 22722820.0,
+            "60": 23813508.0,
+            "61": 23796364.0,
+            "62": 23739896.0,
+            "63": 24965914.0,
+            "64": 23898698.0,
+            "65": 24150860.0,
+            "66": 23796512.0,
+            "67": 25032960.0,
+            "68": 23673048.0,
+            "69": 23644684.0,
+            "70": 23903614.0,
+            "71": 24864656.0,
+            "72": 24766928.0,
+            "73": 24850636.0,
+            "74": 24133166.0,
+            "75": 24143912.0,
+            "76": 25025406.0,
+            "77": 24358344.0,
+            "78": 24910132.0,
+            "79": 23808164.0,
+            "80": 23772256.0,
+            "81": 25020440.0,
+            "82": 23851242.0,
+            "83": 23911824.0,
+            "84": 25143864.0,
+            "85": 24823592.0,
+            "86": 23153228.0,
+            "87": 24850332.0,
+            "88": 24749368.0,
+            "89": 22505174.0,
+            "90": 25108752.0,
+            "91": 23838548.0,
+            "92": 24923816.0,
+            "93": 24769484.0,
+            "94": 25041572.0,
+            "95": 25189350.0,
+            "96": 23909318.0,
+            "97": 23664104.0,
+            "98": 23832392.0,
+            "99": 23981812.0,
+            "100": 24101144.0
         }
     },
     "mem-allocated-bytes": {
@@ -219,105 +219,105 @@
         "step_interval": 1,
         "values": {
             "1": 773784064.0,
-            "2": 763563008.0,
-            "3": 766700544.0,
-            "4": 935098368.0,
+            "2": 776621056.0,
+            "3": 764709888.0,
+            "4": 937392128.0,
             "5": 935098368.0,
-            "6": 937392128.0,
-            "7": 937392128.0,
-            "8": 935639040.0,
-            "9": 937392128.0,
-            "10": 937392128.0,
-            "11": 935098368.0,
-            "12": 937392128.0,
-            "13": 937392128.0,
-            "14": 935098368.0,
+            "6": 935098368.0,
+            "7": 935639040.0,
+            "8": 937392128.0,
+            "9": 935098368.0,
+            "10": 936785920.0,
+            "11": 937392128.0,
+            "12": 935098368.0,
+            "13": 935098368.0,
+            "14": 935639040.0,
             "15": 937392128.0,
-            "16": 936785920.0,
-            "17": 935098368.0,
+            "16": 935098368.0,
+            "17": 935639040.0,
             "18": 937392128.0,
             "19": 937392128.0,
             "20": 935098368.0,
-            "21": 937392128.0,
-            "22": 936785920.0,
-            "23": 935098368.0,
+            "21": 936785920.0,
+            "22": 937392128.0,
+            "23": 936785920.0,
             "24": 937392128.0,
-            "25": 935639040.0,
-            "26": 937392128.0,
-            "27": 937392128.0,
-            "28": 935098368.0,
+            "25": 935098368.0,
+            "26": 935098368.0,
+            "27": 936245248.0,
+            "28": 937392128.0,
             "29": 937392128.0,
-            "30": 935639040.0,
+            "30": 935098368.0,
             "31": 935098368.0,
-            "32": 937392128.0,
-            "33": 935098368.0,
+            "32": 935639040.0,
+            "33": 936785920.0,
             "34": 937392128.0,
-            "35": 936245248.0,
-            "36": 935098368.0,
-            "37": 937392128.0,
+            "35": 937392128.0,
+            "36": 937392128.0,
+            "37": 935098368.0,
             "38": 935098368.0,
-            "39": 937392128.0,
-            "40": 937392128.0,
-            "41": 935098368.0,
+            "39": 935098368.0,
+            "40": 936785920.0,
+            "41": 937392128.0,
             "42": 937392128.0,
-            "43": 935098368.0,
+            "43": 937392128.0,
             "44": 937392128.0,
-            "45": 936245248.0,
+            "45": 937392128.0,
             "46": 937392128.0,
-            "47": 937392128.0,
+            "47": 935098368.0,
             "48": 935098368.0,
             "49": 937392128.0,
-            "50": 935639040.0,
-            "51": 937392128.0,
-            "52": 935098368.0,
-            "53": 937392128.0,
-            "54": 936245248.0,
-            "55": 935098368.0,
-            "56": 937392128.0,
+            "50": 937392128.0,
+            "51": 935098368.0,
+            "52": 935639040.0,
+            "53": 936785920.0,
+            "54": 937392128.0,
+            "55": 937392128.0,
+            "56": 935098368.0,
             "57": 935098368.0,
-            "58": 937392128.0,
+            "58": 935098368.0,
             "59": 935639040.0,
-            "60": 937392128.0,
+            "60": 936245248.0,
             "61": 936785920.0,
-            "62": 937392128.0,
-            "63": 936785920.0,
-            "64": 935098368.0,
+            "62": 936785920.0,
+            "63": 937392128.0,
+            "64": 937392128.0,
             "65": 937392128.0,
             "66": 935098368.0,
-            "67": 937392128.0,
-            "68": 935098368.0,
-            "69": 937392128.0,
-            "70": 935098368.0,
+            "67": 935098368.0,
+            "68": 935639040.0,
+            "69": 936245248.0,
+            "70": 936785920.0,
             "71": 937392128.0,
-            "72": 935098368.0,
+            "72": 937392128.0,
             "73": 937392128.0,
-            "74": 936245248.0,
-            "75": 937392128.0,
-            "76": 936785920.0,
+            "74": 937392128.0,
+            "75": 935098368.0,
+            "76": 937392128.0,
             "77": 937392128.0,
-            "78": 936785920.0,
-            "79": 935098368.0,
+            "78": 935098368.0,
+            "79": 935639040.0,
             "80": 937392128.0,
-            "81": 935098368.0,
-            "82": 937392128.0,
-            "83": 935098368.0,
+            "81": 937392128.0,
+            "82": 935098368.0,
+            "83": 936785920.0,
             "84": 937392128.0,
-            "85": 935639040.0,
-            "86": 937392128.0,
-            "87": 937392128.0,
-            "88": 935098368.0,
-            "89": 937392128.0,
+            "85": 937392128.0,
+            "86": 935098368.0,
+            "87": 936785920.0,
+            "88": 937392128.0,
+            "89": 935098368.0,
             "90": 935639040.0,
             "91": 937392128.0,
-            "92": 936785920.0,
-            "93": 935098368.0,
-            "94": 937392128.0,
+            "92": 937392128.0,
+            "93": 937392128.0,
+            "94": 935098368.0,
             "95": 935098368.0,
-            "96": 937392128.0,
-            "97": 936785920.0,
-            "98": 935098368.0,
-            "99": 937392128.0,
-            "100": 935098368.0
+            "96": 935639040.0,
+            "97": 936245248.0,
+            "98": 937392128.0,
+            "99": 935098368.0,
+            "100": 936785920.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -325,106 +325,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 940788224.0,
-            "2": 1157431808.0,
-            "3": 1157431808.0,
-            "4": 1247832064.0,
-            "5": 1247832064.0,
-            "6": 1247832064.0,
-            "7": 1247832064.0,
-            "8": 1248165376.0,
-            "9": 1248165376.0,
-            "10": 1248305664.0,
-            "11": 1248305664.0,
-            "12": 1248305664.0,
-            "13": 1248305664.0,
-            "14": 1248979968.0,
-            "15": 1248979968.0,
-            "16": 1248979968.0,
-            "17": 1248979968.0,
-            "18": 1248979968.0,
-            "19": 1249688576.0,
-            "20": 1249688576.0,
-            "21": 1249688576.0,
-            "22": 1249688576.0,
-            "23": 1249688576.0,
-            "24": 1249688576.0,
-            "25": 1249688576.0,
-            "26": 1249688576.0,
-            "27": 1249688576.0,
-            "28": 1249688576.0,
-            "29": 1249688576.0,
-            "30": 1249688576.0,
-            "31": 1249688576.0,
-            "32": 1249688576.0,
-            "33": 1249688576.0,
-            "34": 1249688576.0,
-            "35": 1249688576.0,
-            "36": 1249688576.0,
-            "37": 1249688576.0,
-            "38": 1249688576.0,
-            "39": 1249688576.0,
-            "40": 1249688576.0,
-            "41": 1249688576.0,
-            "42": 1249688576.0,
-            "43": 1249688576.0,
-            "44": 1249688576.0,
-            "45": 1249688576.0,
-            "46": 1249688576.0,
-            "47": 1249688576.0,
-            "48": 1249688576.0,
-            "49": 1249688576.0,
-            "50": 1249688576.0,
-            "51": 1249688576.0,
-            "52": 1249688576.0,
-            "53": 1249688576.0,
-            "54": 1249688576.0,
-            "55": 1249688576.0,
-            "56": 1249688576.0,
-            "57": 1249688576.0,
-            "58": 1249688576.0,
-            "59": 1249688576.0,
-            "60": 1249688576.0,
-            "61": 1249688576.0,
-            "62": 1249688576.0,
-            "63": 1249688576.0,
-            "64": 1249688576.0,
-            "65": 1249688576.0,
-            "66": 1249688576.0,
-            "67": 1249688576.0,
-            "68": 1249688576.0,
-            "69": 1249688576.0,
-            "70": 1249688576.0,
-            "71": 1249688576.0,
-            "72": 1249688576.0,
-            "73": 1249688576.0,
-            "74": 1249688576.0,
-            "75": 1249688576.0,
-            "76": 1249688576.0,
-            "77": 1249688576.0,
-            "78": 1249688576.0,
-            "79": 1249688576.0,
-            "80": 1249688576.0,
-            "81": 1249688576.0,
-            "82": 1249688576.0,
-            "83": 1249688576.0,
-            "84": 1249688576.0,
-            "85": 1249688576.0,
-            "86": 1249688576.0,
-            "87": 1249688576.0,
-            "88": 1249688576.0,
-            "89": 1249688576.0,
-            "90": 1249688576.0,
-            "91": 1249688576.0,
-            "92": 1249688576.0,
-            "93": 1249688576.0,
-            "94": 1249688576.0,
-            "95": 1249688576.0,
-            "96": 1249688576.0,
-            "97": 1249688576.0,
-            "98": 1249688576.0,
-            "99": 1249688576.0,
-            "100": 1249688576.0
+            "1": 936453632.0,
+            "2": 1158617088.0,
+            "3": 1158617088.0,
+            "4": 1246761472.0,
+            "5": 1247365632.0,
+            "6": 1247365632.0,
+            "7": 1247765504.0,
+            "8": 1247765504.0,
+            "9": 1247765504.0,
+            "10": 1252415488.0,
+            "11": 1252415488.0,
+            "12": 1252415488.0,
+            "13": 1252415488.0,
+            "14": 1252415488.0,
+            "15": 1252415488.0,
+            "16": 1252415488.0,
+            "17": 1252415488.0,
+            "18": 1252415488.0,
+            "19": 1252415488.0,
+            "20": 1252415488.0,
+            "21": 1252415488.0,
+            "22": 1252415488.0,
+            "23": 1252415488.0,
+            "24": 1252415488.0,
+            "25": 1252415488.0,
+            "26": 1252415488.0,
+            "27": 1252415488.0,
+            "28": 1252415488.0,
+            "29": 1252415488.0,
+            "30": 1252415488.0,
+            "31": 1252415488.0,
+            "32": 1252415488.0,
+            "33": 1252415488.0,
+            "34": 1252415488.0,
+            "35": 1252415488.0,
+            "36": 1252415488.0,
+            "37": 1252415488.0,
+            "38": 1252415488.0,
+            "39": 1252415488.0,
+            "40": 1252415488.0,
+            "41": 1252415488.0,
+            "42": 1252415488.0,
+            "43": 1252415488.0,
+            "44": 1252415488.0,
+            "45": 1252415488.0,
+            "46": 1252415488.0,
+            "47": 1252415488.0,
+            "48": 1252415488.0,
+            "49": 1252415488.0,
+            "50": 1252415488.0,
+            "51": 1252415488.0,
+            "52": 1252415488.0,
+            "53": 1252415488.0,
+            "54": 1252415488.0,
+            "55": 1252415488.0,
+            "56": 1252415488.0,
+            "57": 1252415488.0,
+            "58": 1252415488.0,
+            "59": 1252415488.0,
+            "60": 1252415488.0,
+            "61": 1252415488.0,
+            "62": 1252415488.0,
+            "63": 1252415488.0,
+            "64": 1252415488.0,
+            "65": 1252415488.0,
+            "66": 1252415488.0,
+            "67": 1252415488.0,
+            "68": 1252415488.0,
+            "69": 1252415488.0,
+            "70": 1252415488.0,
+            "71": 1252415488.0,
+            "72": 1252415488.0,
+            "73": 1252415488.0,
+            "74": 1252415488.0,
+            "75": 1252415488.0,
+            "76": 1252415488.0,
+            "77": 1252415488.0,
+            "78": 1252415488.0,
+            "79": 1252415488.0,
+            "80": 1252415488.0,
+            "81": 1252415488.0,
+            "82": 1252415488.0,
+            "83": 1252415488.0,
+            "84": 1252415488.0,
+            "85": 1252415488.0,
+            "86": 1252415488.0,
+            "87": 1252415488.0,
+            "88": 1252415488.0,
+            "89": 1252415488.0,
+            "90": 1252415488.0,
+            "91": 1252415488.0,
+            "92": 1252415488.0,
+            "93": 1252415488.0,
+            "94": 1252415488.0,
+            "95": 1252415488.0,
+            "96": 1252415488.0,
+            "97": 1252415488.0,
+            "98": 1252415488.0,
+            "99": 1252415488.0,
+            "100": 1252415488.0
         }
     },
     "mtp_1 loss": {
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 10.88688,
-            "2": 10.90482,
-            "3": 10.9087,
-            "4": 10.86893,
-            "5": 10.91659,
-            "6": 10.90568,
-            "7": 10.90273,
-            "8": 10.89003,
-            "9": 10.90367,
-            "10": 10.89165,
-            "11": 10.93407,
-            "12": 10.91649,
-            "13": 10.91113,
-            "14": 10.91972,
-            "15": 10.88512,
-            "16": 10.90762,
+            "1": 10.88691,
+            "2": 10.90544,
+            "3": 10.90868,
+            "4": 10.86912,
+            "5": 10.91636,
+            "6": 10.90651,
+            "7": 10.90278,
+            "8": 10.88975,
+            "9": 10.90453,
+            "10": 10.89162,
+            "11": 10.93392,
+            "12": 10.91634,
+            "13": 10.91136,
+            "14": 10.91999,
+            "15": 10.88538,
+            "16": 10.90717,
             "17": 10.87525,
-            "18": 10.91396,
-            "19": 10.90949,
-            "20": 10.87811,
-            "21": 10.87944,
-            "22": 10.85495,
-            "23": 10.87985,
-            "24": 10.87289,
-            "25": 10.85849,
-            "26": 10.86957,
-            "27": 10.87683,
-            "28": 10.88682,
-            "29": 10.88885,
-            "30": 10.85468,
-            "31": 10.79756,
-            "32": 10.86606,
-            "33": 10.87767,
-            "34": 10.84002,
-            "35": 10.84197,
-            "36": 10.8501,
-            "37": 10.85593,
-            "38": 10.8371,
-            "39": 10.86345,
-            "40": 10.82902,
-            "41": 10.83425,
-            "42": 10.84438,
-            "43": 10.78764,
-            "44": 10.82077,
-            "45": 10.78834,
-            "46": 10.78249,
-            "47": 10.82884,
-            "48": 10.79035,
-            "49": 10.71167,
-            "50": 10.77366,
-            "51": 10.76725,
-            "52": 10.74037,
-            "53": 10.80261,
-            "54": 10.77356,
-            "55": 10.76019,
-            "56": 10.71045,
-            "57": 10.66667,
-            "58": 10.74362,
-            "59": 10.69036,
-            "60": 10.66502,
-            "61": 10.70788,
-            "62": 10.772,
-            "63": 10.61853,
-            "64": 10.71765,
-            "65": 10.49451,
-            "66": 10.67121,
-            "67": 10.57549,
-            "68": 10.68782,
-            "69": 10.68291,
-            "70": 10.6695,
-            "71": 10.64584,
-            "72": 10.60876,
-            "73": 10.56523,
-            "74": 10.37039,
-            "75": 10.51086,
-            "76": 10.39869,
-            "77": 10.75172,
-            "78": 10.62677,
-            "79": 10.46664,
-            "80": 10.47405,
-            "81": 10.51052,
-            "82": 10.58766,
-            "83": 10.43963,
-            "84": 10.44967,
-            "85": 10.55157,
-            "86": 10.28464,
-            "87": 10.51164,
-            "88": 10.6034,
-            "89": 10.50879,
-            "90": 10.60395,
-            "91": 10.38241,
-            "92": 10.38669,
-            "93": 10.22995,
-            "94": 10.08283,
-            "95": 10.42553,
-            "96": 10.44856,
-            "97": 10.32063,
-            "98": 10.49615,
-            "99": 10.04594,
-            "100": 10.33373
+            "18": 10.91409,
+            "19": 10.90936,
+            "20": 10.87835,
+            "21": 10.8786,
+            "22": 10.85481,
+            "23": 10.87937,
+            "24": 10.87208,
+            "25": 10.85798,
+            "26": 10.86991,
+            "27": 10.87718,
+            "28": 10.88667,
+            "29": 10.88859,
+            "30": 10.85479,
+            "31": 10.79701,
+            "32": 10.86609,
+            "33": 10.87789,
+            "34": 10.8397,
+            "35": 10.84184,
+            "36": 10.85,
+            "37": 10.85585,
+            "38": 10.83714,
+            "39": 10.86361,
+            "40": 10.82866,
+            "41": 10.83386,
+            "42": 10.84447,
+            "43": 10.78747,
+            "44": 10.82127,
+            "45": 10.78826,
+            "46": 10.78323,
+            "47": 10.82894,
+            "48": 10.7901,
+            "49": 10.71201,
+            "50": 10.77359,
+            "51": 10.76681,
+            "52": 10.74029,
+            "53": 10.8027,
+            "54": 10.77345,
+            "55": 10.76133,
+            "56": 10.71153,
+            "57": 10.66673,
+            "58": 10.74318,
+            "59": 10.69182,
+            "60": 10.66418,
+            "61": 10.70712,
+            "62": 10.77164,
+            "63": 10.61759,
+            "64": 10.71667,
+            "65": 10.4936,
+            "66": 10.67118,
+            "67": 10.57515,
+            "68": 10.68716,
+            "69": 10.68277,
+            "70": 10.66908,
+            "71": 10.64566,
+            "72": 10.60905,
+            "73": 10.56507,
+            "74": 10.37106,
+            "75": 10.5114,
+            "76": 10.39856,
+            "77": 10.75192,
+            "78": 10.62708,
+            "79": 10.4675,
+            "80": 10.47474,
+            "81": 10.51003,
+            "82": 10.58819,
+            "83": 10.43946,
+            "84": 10.45015,
+            "85": 10.55142,
+            "86": 10.2831,
+            "87": 10.51182,
+            "88": 10.60318,
+            "89": 10.50948,
+            "90": 10.60407,
+            "91": 10.38208,
+            "92": 10.38708,
+            "93": 10.23019,
+            "94": 10.08381,
+            "95": 10.4259,
+            "96": 10.4489,
+            "97": 10.32133,
+            "98": 10.49668,
+            "99": 10.04795,
+            "100": 10.33446
         }
     },
     "iteration-time": {
@@ -539,106 +539,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 67.25594,
-            "2": 2.02448,
-            "3": 1.31909,
-            "4": 3.51713,
-            "5": 0.68118,
-            "6": 0.68517,
-            "7": 0.6825,
-            "8": 0.66566,
-            "9": 0.66522,
-            "10": 0.67133,
-            "11": 0.66857,
-            "12": 0.66644,
-            "13": 0.67083,
-            "14": 0.66571,
-            "15": 0.66315,
-            "16": 0.66494,
-            "17": 0.66971,
-            "18": 0.67036,
-            "19": 0.66993,
-            "20": 0.66906,
-            "21": 0.66515,
-            "22": 0.66541,
-            "23": 0.66633,
-            "24": 0.66527,
-            "25": 0.66367,
-            "26": 0.66301,
-            "27": 0.6633,
-            "28": 0.66152,
-            "29": 0.66022,
-            "30": 0.66204,
-            "31": 0.66645,
-            "32": 0.66494,
-            "33": 0.66029,
-            "34": 0.66391,
-            "35": 0.65922,
-            "36": 0.66135,
-            "37": 0.6625,
-            "38": 0.65862,
-            "39": 0.65997,
-            "40": 0.68187,
-            "41": 0.65886,
-            "42": 0.65824,
-            "43": 0.65934,
-            "44": 0.65661,
-            "45": 0.65819,
-            "46": 0.66081,
-            "47": 0.65905,
-            "48": 0.66151,
-            "49": 0.66043,
-            "50": 0.65818,
-            "51": 0.74732,
-            "52": 0.65757,
-            "53": 0.66273,
-            "54": 0.65899,
-            "55": 0.66722,
-            "56": 0.65747,
-            "57": 0.65863,
-            "58": 0.66051,
-            "59": 0.65938,
-            "60": 0.65822,
-            "61": 0.65548,
-            "62": 0.65759,
-            "63": 0.65386,
-            "64": 0.65424,
-            "65": 0.65305,
-            "66": 0.65491,
-            "67": 0.6567,
-            "68": 0.65495,
-            "69": 0.65344,
-            "70": 0.65619,
-            "71": 0.65258,
-            "72": 0.65965,
-            "73": 0.66093,
-            "74": 0.65552,
-            "75": 0.65731,
-            "76": 0.6542,
-            "77": 0.65449,
-            "78": 0.65305,
-            "79": 0.65456,
-            "80": 0.65355,
-            "81": 0.65662,
-            "82": 0.65633,
-            "83": 0.6568,
-            "84": 0.65869,
-            "85": 0.66387,
-            "86": 0.66145,
-            "87": 0.66045,
-            "88": 0.66082,
-            "89": 0.66365,
-            "90": 0.66413,
-            "91": 0.66268,
-            "92": 0.6594,
-            "93": 0.66184,
-            "94": 0.65968,
-            "95": 0.66219,
-            "96": 0.66239,
-            "97": 0.66014,
-            "98": 0.66265,
-            "99": 0.66054,
-            "100": 0.66123
+            "1": 74.16337,
+            "2": 1.6487,
+            "3": 1.45105,
+            "4": 4.39166,
+            "5": 0.72113,
+            "6": 0.82637,
+            "7": 0.7985,
+            "8": 0.73623,
+            "9": 0.7398,
+            "10": 0.74065,
+            "11": 0.73395,
+            "12": 0.73395,
+            "13": 0.79806,
+            "14": 0.7251,
+            "15": 0.7312,
+            "16": 0.75102,
+            "17": 0.72379,
+            "18": 0.72614,
+            "19": 0.73367,
+            "20": 0.73334,
+            "21": 0.72408,
+            "22": 0.74787,
+            "23": 0.75535,
+            "24": 0.72783,
+            "25": 0.7314,
+            "26": 0.71985,
+            "27": 0.7246,
+            "28": 0.72236,
+            "29": 0.71945,
+            "30": 0.72182,
+            "31": 0.72292,
+            "32": 0.71754,
+            "33": 0.7157,
+            "34": 0.70975,
+            "35": 0.72388,
+            "36": 0.71455,
+            "37": 0.71511,
+            "38": 0.71163,
+            "39": 0.71376,
+            "40": 0.72067,
+            "41": 0.71279,
+            "42": 0.70858,
+            "43": 0.7086,
+            "44": 0.70995,
+            "45": 0.70901,
+            "46": 0.70881,
+            "47": 0.71115,
+            "48": 0.72369,
+            "49": 0.73908,
+            "50": 0.81598,
+            "51": 0.73667,
+            "52": 0.71381,
+            "53": 0.72282,
+            "54": 0.73549,
+            "55": 0.70748,
+            "56": 0.7102,
+            "57": 0.70853,
+            "58": 0.70998,
+            "59": 0.71846,
+            "60": 0.70825,
+            "61": 0.70848,
+            "62": 0.70734,
+            "63": 0.7097,
+            "64": 0.72007,
+            "65": 0.71061,
+            "66": 0.7223,
+            "67": 0.71411,
+            "68": 0.71437,
+            "69": 0.70943,
+            "70": 0.70895,
+            "71": 0.71052,
+            "72": 0.70672,
+            "73": 0.72725,
+            "74": 0.70761,
+            "75": 0.7334,
+            "76": 0.7387,
+            "77": 0.72758,
+            "78": 0.72748,
+            "79": 0.73386,
+            "80": 0.72774,
+            "81": 0.71859,
+            "82": 0.71526,
+            "83": 0.75425,
+            "84": 0.72064,
+            "85": 0.72017,
+            "86": 0.72277,
+            "87": 0.73635,
+            "88": 0.72228,
+            "89": 0.73388,
+            "90": 0.74435,
+            "91": 0.7281,
+            "92": 0.71839,
+            "93": 0.71175,
+            "94": 0.71437,
+            "95": 0.71311,
+            "96": 0.71386,
+            "97": 0.71412,
+            "98": 0.72944,
+            "99": 0.7486,
+            "100": 0.74015
         }
     }
 }
\ No newline at end of file

From ae3dbc04b6ec04091b85f4d7ec3acc53becbafe3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Tue, 13 Jan 2026 16:01:15 +0000
Subject: [PATCH 225/334] ci(hotfix): Re-add
 `gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone` value
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 .../golden_values_lts_dgx_a100.json           | 538 +++++++++++++++++-
 1 file changed, 537 insertions(+), 1 deletion(-)

diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_lts_dgx_a100.json
index 9e26dfeeb6e..f273ff540d3 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_lts_dgx_a100.json
@@ -1 +1,537 @@
-{}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.85936,
+            "2": 10.8548,
+            "3": 10.85199,
+            "4": 10.84317,
+            "5": 10.87247,
+            "6": 10.87857,
+            "7": 10.84622,
+            "8": 10.86369,
+            "9": 10.87211,
+            "10": 10.8311,
+            "11": 10.86068,
+            "12": 10.87273,
+            "13": 10.87992,
+            "14": 10.88657,
+            "15": 10.82029,
+            "16": 10.82684,
+            "17": 10.7998,
+            "18": 10.81985,
+            "19": 10.80035,
+            "20": 10.71399,
+            "21": 10.69893,
+            "22": 10.57449,
+            "23": 10.71973,
+            "24": 10.60285,
+            "25": 10.54611,
+            "26": 10.61041,
+            "27": 10.61227,
+            "28": 10.57731,
+            "29": 10.58005,
+            "30": 10.36705,
+            "31": 10.13447,
+            "32": 10.47127,
+            "33": 10.47454,
+            "34": 10.23198,
+            "35": 10.28443,
+            "36": 10.23436,
+            "37": 10.35346,
+            "38": 10.20696,
+            "39": 10.40599,
+            "40": 10.08972,
+            "41": 10.16331,
+            "42": 10.2256,
+            "43": 9.8639,
+            "44": 9.98246,
+            "45": 9.84548,
+            "46": 9.8581,
+            "47": 10.1689,
+            "48": 9.86658,
+            "49": 9.54555,
+            "50": 9.91937,
+            "51": 9.86074,
+            "52": 9.76116,
+            "53": 10.08415,
+            "54": 9.96563,
+            "55": 9.89123,
+            "56": 9.63923,
+            "57": 9.4936,
+            "58": 9.83871,
+            "59": 9.59623,
+            "60": 9.5091,
+            "61": 9.70544,
+            "62": 9.99513,
+            "63": 9.38104,
+            "64": 9.78222,
+            "65": 8.95962,
+            "66": 9.71006,
+            "67": 9.38013,
+            "68": 9.78827,
+            "69": 9.79425,
+            "70": 9.73517,
+            "71": 9.62218,
+            "72": 9.58801,
+            "73": 9.49714,
+            "74": 8.94242,
+            "75": 9.4322,
+            "76": 9.09757,
+            "77": 10.06853,
+            "78": 9.73055,
+            "79": 9.37759,
+            "80": 9.41116,
+            "81": 9.48631,
+            "82": 9.69758,
+            "83": 9.31674,
+            "84": 9.42151,
+            "85": 9.61502,
+            "86": 9.07627,
+            "87": 9.59887,
+            "88": 9.75047,
+            "89": 9.61233,
+            "90": 9.82363,
+            "91": 9.35377,
+            "92": 9.36525,
+            "93": 9.08833,
+            "94": 8.83614,
+            "95": 9.5226,
+            "96": 9.52736,
+            "97": 9.3169,
+            "98": 9.67961,
+            "99": 8.89276,
+            "100": 9.40803
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1768.0,
+            "2": 1871.0,
+            "3": 1757.0,
+            "4": 1902.0,
+            "5": 2016.0,
+            "6": 1943.0,
+            "7": 1981.0,
+            "8": 1667.0,
+            "9": 1973.0,
+            "10": 1477.0,
+            "11": 2178.0,
+            "12": 1985.0,
+            "13": 2137.0,
+            "14": 2021.0,
+            "15": 1944.0,
+            "16": 2053.0,
+            "17": 1967.0,
+            "18": 1922.0,
+            "19": 2031.0,
+            "20": 1837.0,
+            "21": 2048.0,
+            "22": 1917.0,
+            "23": 2190.0,
+            "24": 1787.0,
+            "25": 1869.0,
+            "26": 1882.0,
+            "27": 2143.0,
+            "28": 2147.0,
+            "29": 2222.0,
+            "30": 2046.0,
+            "31": 1734.0,
+            "32": 2171.0,
+            "33": 2380.0,
+            "34": 2046.0,
+            "35": 2147.0,
+            "36": 2149.0,
+            "37": 2645.0,
+            "38": 2416.0,
+            "39": 2672.0,
+            "40": 2441.0,
+            "41": 2585.0,
+            "42": 2483.0,
+            "43": 2262.0,
+            "44": 2344.0,
+            "45": 2300.0,
+            "46": 2560.0,
+            "47": 2755.0,
+            "48": 2764.0,
+            "49": 2505.0,
+            "50": 2723.0,
+            "51": 2806.0,
+            "52": 2805.0,
+            "53": 3225.0,
+            "54": 3028.0,
+            "55": 2486.0,
+            "56": 3093.0,
+            "57": 2588.0,
+            "58": 3219.0,
+            "59": 3021.0,
+            "60": 2649.0,
+            "61": 3247.0,
+            "62": 2649.0,
+            "63": 2637.0,
+            "64": 3140.0,
+            "65": 3038.0,
+            "66": 3422.0,
+            "67": 2933.0,
+            "68": 3039.0,
+            "69": 3167.0,
+            "70": 3539.0,
+            "71": 3213.0,
+            "72": 2597.0,
+            "73": 3290.0,
+            "74": 2140.0,
+            "75": 2837.0,
+            "76": 3342.0,
+            "77": 3444.0,
+            "78": 3504.0,
+            "79": 3513.0,
+            "80": 3733.0,
+            "81": 4024.0,
+            "82": 3670.0,
+            "83": 3199.0,
+            "84": 3539.0,
+            "85": 3585.0,
+            "86": 2979.0,
+            "87": 3951.0,
+            "88": 3286.0,
+            "89": 3787.0,
+            "90": 3341.0,
+            "91": 3070.0,
+            "92": 3410.0,
+            "93": 2923.0,
+            "94": 3868.0,
+            "95": 3627.0,
+            "96": 3787.0,
+            "97": 3549.0,
+            "98": 4026.0,
+            "99": 3531.0,
+            "100": 3649.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 232398336.0,
+            "2": 232398336.0,
+            "3": 232398336.0,
+            "4": 232398336.0,
+            "5": 232398336.0,
+            "6": 232398336.0,
+            "7": 232398336.0,
+            "8": 232398336.0,
+            "9": 232398336.0,
+            "10": 232398336.0,
+            "11": 232398336.0,
+            "12": 232398336.0,
+            "13": 232398336.0,
+            "14": 232398336.0,
+            "15": 232398336.0,
+            "16": 232398336.0,
+            "17": 232398336.0,
+            "18": 232398336.0,
+            "19": 232398336.0,
+            "20": 232398336.0,
+            "21": 232398336.0,
+            "22": 232398336.0,
+            "23": 232398336.0,
+            "24": 232398336.0,
+            "25": 232398336.0,
+            "26": 232398336.0,
+            "27": 232398336.0,
+            "28": 232398336.0,
+            "29": 232398336.0,
+            "30": 232398336.0,
+            "31": 232398336.0,
+            "32": 232398336.0,
+            "33": 232398336.0,
+            "34": 232398336.0,
+            "35": 232398336.0,
+            "36": 232398336.0,
+            "37": 232398336.0,
+            "38": 232398336.0,
+            "39": 232398336.0,
+            "40": 232398336.0,
+            "41": 232398336.0,
+            "42": 232398336.0,
+            "43": 232398336.0,
+            "44": 232398336.0,
+            "45": 232398336.0,
+            "46": 232398336.0,
+            "47": 232398336.0,
+            "48": 232398336.0,
+            "49": 232398336.0,
+            "50": 232398336.0,
+            "51": 232398336.0,
+            "52": 232398336.0,
+            "53": 232398336.0,
+            "54": 232398336.0,
+            "55": 232398336.0,
+            "56": 232398336.0,
+            "57": 232398336.0,
+            "58": 232398336.0,
+            "59": 232398336.0,
+            "60": 232398336.0,
+            "61": 232398336.0,
+            "62": 232398336.0,
+            "63": 232398336.0,
+            "64": 232398336.0,
+            "65": 232398336.0,
+            "66": 232398336.0,
+            "67": 232398336.0,
+            "68": 232398336.0,
+            "69": 232398336.0,
+            "70": 232398336.0,
+            "71": 232398336.0,
+            "72": 232398336.0,
+            "73": 232398336.0,
+            "74": 232398336.0,
+            "75": 232398336.0,
+            "76": 232398336.0,
+            "77": 232398336.0,
+            "78": 232398336.0,
+            "79": 232398336.0,
+            "80": 232398336.0,
+            "81": 232398336.0,
+            "82": 232398336.0,
+            "83": 232398336.0,
+            "84": 232398336.0,
+            "85": 232398336.0,
+            "86": 232398336.0,
+            "87": 232398336.0,
+            "88": 232398336.0,
+            "89": 232398336.0,
+            "90": 232398336.0,
+            "91": 232398336.0,
+            "92": 232398336.0,
+            "93": 232398336.0,
+            "94": 232398336.0,
+            "95": 232398336.0,
+            "96": 232398336.0,
+            "97": 232398336.0,
+            "98": 232398336.0,
+            "99": 232398336.0,
+            "100": 232398336.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 685490688.0,
+            "2": 773246464.0,
+            "3": 773246464.0,
+            "4": 773246464.0,
+            "5": 773246464.0,
+            "6": 773246464.0,
+            "7": 773246464.0,
+            "8": 773246464.0,
+            "9": 773246464.0,
+            "10": 773246464.0,
+            "11": 773246464.0,
+            "12": 773246464.0,
+            "13": 773246464.0,
+            "14": 773246464.0,
+            "15": 773246464.0,
+            "16": 773246464.0,
+            "17": 773246464.0,
+            "18": 773246464.0,
+            "19": 773246464.0,
+            "20": 773246464.0,
+            "21": 773246464.0,
+            "22": 773246464.0,
+            "23": 773246464.0,
+            "24": 773246464.0,
+            "25": 773246464.0,
+            "26": 773246464.0,
+            "27": 773246464.0,
+            "28": 773246464.0,
+            "29": 773246464.0,
+            "30": 773246464.0,
+            "31": 773246464.0,
+            "32": 773246464.0,
+            "33": 773246464.0,
+            "34": 773246464.0,
+            "35": 773246464.0,
+            "36": 773246464.0,
+            "37": 773246464.0,
+            "38": 773246464.0,
+            "39": 773246464.0,
+            "40": 773246464.0,
+            "41": 773246464.0,
+            "42": 773246464.0,
+            "43": 773246464.0,
+            "44": 773246464.0,
+            "45": 773246464.0,
+            "46": 773246464.0,
+            "47": 773246464.0,
+            "48": 773246464.0,
+            "49": 773246464.0,
+            "50": 773246464.0,
+            "51": 773246464.0,
+            "52": 773246464.0,
+            "53": 773246464.0,
+            "54": 773246464.0,
+            "55": 773246464.0,
+            "56": 773246464.0,
+            "57": 773246464.0,
+            "58": 773246464.0,
+            "59": 773246464.0,
+            "60": 773246464.0,
+            "61": 773246464.0,
+            "62": 773246464.0,
+            "63": 773246464.0,
+            "64": 773246464.0,
+            "65": 773246464.0,
+            "66": 773246464.0,
+            "67": 773246464.0,
+            "68": 773246464.0,
+            "69": 773246464.0,
+            "70": 773246464.0,
+            "71": 773246464.0,
+            "72": 773246464.0,
+            "73": 773246464.0,
+            "74": 773246464.0,
+            "75": 773246464.0,
+            "76": 773246464.0,
+            "77": 773246464.0,
+            "78": 773246464.0,
+            "79": 773246464.0,
+            "80": 773246464.0,
+            "81": 773246464.0,
+            "82": 773246464.0,
+            "83": 773246464.0,
+            "84": 773246464.0,
+            "85": 773246464.0,
+            "86": 773246464.0,
+            "87": 773246464.0,
+            "88": 773246464.0,
+            "89": 773246464.0,
+            "90": 773246464.0,
+            "91": 773246464.0,
+            "92": 773246464.0,
+            "93": 773246464.0,
+            "94": 773246464.0,
+            "95": 773246464.0,
+            "96": 773246464.0,
+            "97": 773246464.0,
+            "98": 773246464.0,
+            "99": 773246464.0,
+            "100": 773246464.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 9.93671,
+            "2": 0.44025,
+            "3": 0.31978,
+            "4": 0.30044,
+            "5": 0.29939,
+            "6": 0.29882,
+            "7": 0.29791,
+            "8": 0.29478,
+            "9": 0.29711,
+            "10": 0.29556,
+            "11": 0.29815,
+            "12": 0.29967,
+            "13": 0.29479,
+            "14": 0.29726,
+            "15": 0.29661,
+            "16": 0.29615,
+            "17": 0.29592,
+            "18": 0.29568,
+            "19": 0.29536,
+            "20": 0.29486,
+            "21": 0.29478,
+            "22": 0.29533,
+            "23": 0.29472,
+            "24": 0.29577,
+            "25": 0.29612,
+            "26": 0.29259,
+            "27": 0.28753,
+            "28": 0.28697,
+            "29": 0.70578,
+            "30": 0.29095,
+            "31": 0.29056,
+            "32": 0.29195,
+            "33": 0.29198,
+            "34": 0.29205,
+            "35": 0.29049,
+            "36": 0.28947,
+            "37": 0.29052,
+            "38": 0.29096,
+            "39": 0.29096,
+            "40": 0.29115,
+            "41": 0.29128,
+            "42": 0.29068,
+            "43": 0.29094,
+            "44": 0.29228,
+            "45": 0.29059,
+            "46": 0.29108,
+            "47": 0.29102,
+            "48": 0.29077,
+            "49": 0.29062,
+            "50": 0.2902,
+            "51": 0.30007,
+            "52": 0.63804,
+            "53": 0.28911,
+            "54": 0.46416,
+            "55": 0.29262,
+            "56": 0.37133,
+            "57": 0.29216,
+            "58": 0.32564,
+            "59": 0.29296,
+            "60": 0.2903,
+            "61": 0.29162,
+            "62": 0.28953,
+            "63": 0.28969,
+            "64": 0.28976,
+            "65": 0.64598,
+            "66": 0.28891,
+            "67": 0.55309,
+            "68": 0.67465,
+            "69": 0.35714,
+            "70": 0.3918,
+            "71": 0.2878,
+            "72": 0.33397,
+            "73": 0.41898,
+            "74": 0.29045,
+            "75": 0.31982,
+            "76": 0.28797,
+            "77": 0.34091,
+            "78": 0.52101,
+            "79": 0.29094,
+            "80": 0.299,
+            "81": 0.43963,
+            "82": 0.28851,
+            "83": 0.38734,
+            "84": 0.38974,
+            "85": 0.38902,
+            "86": 0.69087,
+            "87": 0.37076,
+            "88": 0.29102,
+            "89": 0.55341,
+            "90": 0.54278,
+            "91": 0.28909,
+            "92": 0.31421,
+            "93": 0.29166,
+            "94": 0.29126,
+            "95": 0.32114,
+            "96": 0.29039,
+            "97": 0.30171,
+            "98": 0.29192,
+            "99": 0.29197,
+            "100": 0.31795
+        }
+    }
+}
\ No newline at end of file

From 583dd584fe2d0525f88a3d6b55732bcc5c4f10cd Mon Sep 17 00:00:00 2001
From: Charlie Truong <chtruong@nvidia.com>
Date: Tue, 13 Jan 2026 17:28:44 -0600
Subject: [PATCH 226/334] ci: Skip broken tests after dependency update (#2935)

Signed-off-by: Charlie Truong <chtruong@nvidia.com>
---
 tests/test_utils/recipes/moe.yaml | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/test_utils/recipes/moe.yaml b/tests/test_utils/recipes/moe.yaml
index d702fd1ac71..02c3f68b5f1 100644
--- a/tests/test_utils/recipes/moe.yaml
+++ b/tests/test_utils/recipes/moe.yaml
@@ -89,7 +89,7 @@ products:
   - test_case: [gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr-broken]
         platforms: [dgx_h100] # hang: #513
   # - test_case: [gpt3_mcore_te_tp2_pp2_ep4_etp1_selective_recompute_experimental]
   #   products:
@@ -151,7 +151,7 @@ products:
   - test_case: [gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr-broken, mr-github]
         platforms: [dgx_h100]
   - test_case: [gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading]
     products:
@@ -187,13 +187,13 @@ products:
   - test_case: [gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr-broken]
         platforms: [dgx_h100]
   - test_case: [gpt3_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr-broken]
         platforms: [dgx_h100] # hang: #513
       - environment: [dev]
-        scope: [mr-slim]
+        scope: [mr-slim-broken]
         platforms: [dgx_h100]

From b0a702b2813f088b7107457e8091695b0cb8e66e Mon Sep 17 00:00:00 2001
From: Yu Yao <54727607+yaoyu-33@users.noreply.github.com>
Date: Tue, 13 Jan 2026 22:50:55 -1000
Subject: [PATCH 227/334] Cherry-pick optimizer override refactor from #2723
 (#2835)

Signed-off-by: John St John <jstjohn@nvidia.com>
Signed-off-by: John St. John <jstjohn@nvidia.com>
Signed-off-by: Boxiang Wang <boxiangw@nvidia.com>
Co-authored-by: John St John <jstjohn@nvidia.com>
Co-authored-by: Boxiang Wang <boxiangw@nvidia.com>
---
 megatron/core/optimizer/__init__.py           | 237 ++++++++++--------
 megatron/core/optimizer/optimizer_config.py   |  69 ++++-
 megatron/core/optimizer_param_scheduler.py    |  69 ++++-
 megatron/training/training.py                 |  17 +-
 .../test_layer_wise_optimizer.py              |   8 +-
 tests/unit_tests/optimizer/__init__.py        |   1 +
 .../optimizer/test_optimizer_config.py        |  38 +++
 tests/unit_tests/test_optimizer.py            | 156 +++++++++++-
 tests/unit_tests/test_utilities.py            |   5 +-
 9 files changed, 476 insertions(+), 124 deletions(-)
 create mode 100644 tests/unit_tests/optimizer/__init__.py
 create mode 100644 tests/unit_tests/optimizer/test_optimizer_config.py

diff --git a/megatron/core/optimizer/__init__.py b/megatron/core/optimizer/__init__.py
index 234bee274be..b4d15daefd2 100644
--- a/megatron/core/optimizer/__init__.py
+++ b/megatron/core/optimizer/__init__.py
@@ -3,7 +3,7 @@
 import logging
 import warnings
 from dataclasses import astuple
-from typing import Callable, Dict, List, Optional, Tuple, Union
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
 
 import torch
 from torch.optim import SGD as CPUSGD
@@ -35,6 +35,11 @@
 
 from megatron.core import parallel_state
 from megatron.core.optimizer.cpu_offloading.hybrid_optimizer import HybridDeviceOptimizer
+from megatron.core.optimizer_param_scheduler import (
+    ParamGroupOverride,
+    combine_param_group_overrides,
+    param_group_override_to_tuple,
+)
 from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.transformer.fsdp_dtensor_checkpoint import get_global_unique_param_name
 
@@ -50,66 +55,84 @@
     MegatronOptimizer,
     param_group_identifier_keys,
 )
-from .optimizer_config import AdamOptimizerConfig, OptimizerConfig, ParamKey, SGDOptimizerConfig
+from .optimizer_config import (
+    AdamOptimizerConfig,
+    OptimizerConfig,
+    ParamKey,
+    ParamPredicate,
+    SGDOptimizerConfig,
+)
 
 logger = logging.getLogger(__name__)
 
 
-def _matches(param: torch.nn.Parameter, param_name: str, param_key: ParamKey) -> bool:
-    """Returns true if passed-in parameter (with name) matches `param_key`.
+def get_standard_config_overrides(
+    decoupled_lr: float | None = None, decoupled_min_lr: float | None = None
+) -> Dict[ParamKey, ParamGroupOverride]:
+    """Get standard config overrides for the optimizer, handling decoupled LR and common wd skips.
 
     Args:
-        param (torch.nn.Parameter): Handle to parameter object.
-        param_name (str): Name of parameter in underlying PyTorch module.
-        param_key (ParamKey): ParamKey object.
+        decoupled_lr (float | None): decoupled learning rate.
+        decoupled_min_lr (float | None): decoupled minimum learning rate.
 
     Returns:
-        bool: True if parameter matches passed-in param_key.
+        Dict[ParamKey, ParamGroupOverride]: standard config overrides.
     """
+    config_overrides: Optional[Dict[ParamKey, ParamGroupOverride]] = {}
+    if decoupled_lr is not None:
+        decoupled_lr_config: ParamGroupOverride = {"max_lr": decoupled_lr}
+        decoupled_param_key = ParamKey(attr="is_embedding_or_output_parameter")
+        if decoupled_min_lr is not None:
+            decoupled_lr_config["min_lr"] = decoupled_min_lr
+        config_overrides[decoupled_param_key] = decoupled_lr_config
+
+    # Next construct the standard param group overrides for no weight decay on bias parameters
+    #  as well as any length 1 parameters.
+    param_length_1_match = ParamPredicate(
+        name="param_len_1", fn=lambda param: len(param.shape) == 1
+    )
+    param_wd_mult_key = ParamKey(name="*.bias", predicate=param_length_1_match)
+    config_overrides[param_wd_mult_key] = ParamGroupOverride(wd_mult=0.0)
 
-    # Check if name matches.
-    if isinstance(param_key.name, str):
-        target_names = [param_key.name]
-    else:
-        target_names = list(param_key.name)
-    for target_name in target_names:
-        if param_name in target_name:
-            return True
-
-    # Check if attribute matches.
-    if isinstance(param_key.attr, str):
-        target_attrs = [param_key.attr]
-    else:
-        target_attrs = list(param_key.attr)
-    for target_attr in target_attrs:
-        if getattr(param, target_attr, False):
-            return True
-
-    return False
+    return config_overrides
 
 
 def _get_param_groups(
     model_chunks: List[MegatronModule],
     config: OptimizerConfig,
-    config_overrides: Optional[Dict[ParamKey, OptimizerConfig]],
+    config_overrides: Optional[Dict[ParamKey, ParamGroupOverride]],
 ) -> List[Dict]:
     """Create parameter groups for optimizer.
 
     Creates parameter groups from provided optimizer config object.
 
+    NOTE There can be more than one match between a ParamKey and a parameter.
+        What we do is merge all of the matching ParamKey overrides into a single ParamGroupOverride
+        for that parameter and use that as the key for that parameter. Any parameters that get
+        the same set of merged overrides will be mapped into the same parameter group.
+
     Args:
         model_chunks (List[MegatronModule]): model chunks to create parameter
             groups for.
         config (OptimizerConfig): optimizer configuration object.
-        config_overrides (Optional[Dict[LayerKey, OptimizerConfig]): optimizer overrides,
-            specified on a per-layer basis.
+        config_overrides (Optional[Dict[ParamKey, ParamGroupOverride]): optimizer overrides,
+            specified on a per-layer basis. NOTE: if you want to skip applying weight decay on bias
+            and length 1 parameters, and also do not want to do any other overrides, set this to an
+            empty dictionary rather than the default value of None.
     Returns:
         List of parameter groups.
     """
 
-    # Map (wd_mult, is_expert_parallel, param_group_hyperparameters_config) to params.
+    # Map (pg_overrides, is_expert_parallel) to params.
     params_map = {}
-    configs_map = {}
+
+    if config_overrides is None:
+        # TODO remove this default behavior eventually.
+        #  This is only needed for backwards compatibility with the old config overrides API where
+        #  the config_overrides argument by default lead to bias parameters and length 1 parameters.
+        #  We assume that users of decoupled LR already provide config overrides so will adapt
+        #  to the new API.
+        config_overrides = get_standard_config_overrides()
 
     for model_chunk in model_chunks:
         for name, param in model_chunk.named_parameters():
@@ -117,47 +140,31 @@ def _get_param_groups(
                 continue
 
             uses_default_config = False
-            # Get optimizer config for this parameter.
-            if config_overrides is None:
-                config_for_param = config
-                uses_default_config = True
+            # Get optimizer config overrides for this parameter.
+            param_overrides_list: list[ParamGroupOverride] = []
+            if config_overrides is not None:
+                for param_key, param_override in config_overrides.items():
+                    if param_key.matches(param, name):
+                        param_overrides_list.append(param_override)
+
+            if param_overrides_list:
+                param_override: ParamGroupOverride | None = combine_param_group_overrides(
+                    param_overrides_list
+                )
             else:
-                config_for_param = None
-                for param_key in config_overrides:
-                    if _matches(param, name, param_key):
-                        config_for_param = config_overrides[param_key]
-                        break
-                # Fall back to default config.
-                if config_for_param is None:
-                    config_for_param = config
-                    uses_default_config = True
+                param_override = None
 
             is_expert_parallel = not getattr(param, 'allreduce', True)
 
-            # TODO: Make sure there is a way to support old no_weight_decay_func functionality
-            # and default_skip_embedding_weight_decay:
-            #     or (default_skip_embedding_weight_decay and "embedding" in name)
-            no_wd = name.endswith(".bias") or len(param.shape) == 1
-            if not no_wd:
-                wd_mult = 1.0
-            else:
-                wd_mult = 0.0
-
-            # Create config_tuple that is hash-able. Remove timers object before
-            # creating config_tuple.
-            config_for_param_copy = copy.deepcopy(config_for_param)
-            config_for_param_copy.timers = None
-            config_tuple = astuple(config_for_param_copy)
-            key = (wd_mult, is_expert_parallel, config_tuple)
+            # Create config_tuple that is hash-able, and has a consistent ordering of the keys.
+            param_override_tuple: tuple[tuple[str, Any], ...] | None = (
+                param_group_override_to_tuple(param_override)
+            )
+            key = (param_override_tuple, is_expert_parallel)
             if key not in params_map:
                 params_map[key] = []
             params_map[key].append(param)
 
-            if key in configs_map:
-                assert (config_for_param, uses_default_config) == configs_map[key]
-            else:
-                configs_map[key] = (config_for_param, uses_default_config)
-
     # Distributed checkpoint requires all ranks to have the same param groups,
     # so we need to align the param groups across ranks, otherwise we may have
     # runtime error when loading the checkpoint or numerical error when resuming training.
@@ -168,34 +175,47 @@ def _get_param_groups(
         for key in keys:
             if key not in params_key:
                 params_key.append(key)
-
+    # Need to pick one of the param_override_tuples to use for the param group.
     param_groups = []
-    for key in params_key:
-        wd_mult, is_expert_parallel, _ = key
+    # Sort keys, None first.
+    for key in sorted(params_key, key=lambda x: (x[0] is not None, x[0])):
+        param_override_tuple, is_expert_parallel = key
         params = params_map[key] if key in params_map else []
-        config, uses_default_config = None, True
-        if key not in configs_map:
-            assert params == []
+        if param_override_tuple is None:
+            param_override: ParamGroupOverride = {}
         else:
-            config, uses_default_config = configs_map[key]
-            assert config is not None
+            param_override: ParamGroupOverride = {k: v for (k, v) in param_override_tuple}
+
+        # False if param_group_override is None or empty tuple or if we do not modify the
+        #  LR schedule.
+        #  NOTE: "default_config" is used for logging the learning rate in training.py.
+        #   so set to True if we do not modify the learning rate.
+        #  if param_group['default_config']:
+        #    learning_rate = param_group['lr']
+        uses_default_lr_schedule: bool = (not bool(param_override_tuple)) or not any(
+            ["lr" in k for k in param_override]
+        )
 
         # TODO: Remove "backwards compatible" fields below eventually.
+        default_config: ParamGroupOverride = {
+            'wd_mult': 1.0,
+            'lr_mult': 1.0,
+            'is_decoupled_lr': False,
+            # The following two fields may be important to keep even when we remove the
+            #   above "backwards compatible" fields.
+            "max_lr": config.lr,  # user may override this in param_override
+            "min_lr": config.min_lr,  # user may override this in param_override
+        }
+        assert (
+            "params" not in param_override
+        ), "'params' should not be in param_override, this is a protected key"
         param_group = {
             'params': params,
-            'wd_mult': wd_mult,  # For backwards compatibility.
-            'lr_mult': 1.0,  # For backwards compatibility.
             'is_expert_parallel': is_expert_parallel,
-            'is_decoupled_lr': False,  # For backwards compatibility.
-            'default_config': uses_default_config,
+            'default_config': uses_default_lr_schedule,
+            **default_config,
+            **param_override,  # keep **param_override last so that users can override other fields.
         }
-
-        # Stick relevant fields into param_group from config object.
-        if config is not None:
-            param_group['max_lr'] = config.lr
-            param_group['min_lr'] = config.min_lr
-            # TODO: Add other relevant arguments (e.g., weight decay, optimizer)
-            # here as well.
         param_groups.append(param_group)
 
     return param_groups
@@ -205,7 +225,7 @@ def _get_param_groups_and_buffers(
     model_chunks: List[MegatronModule],
     model_chunk_offset: int,
     config: OptimizerConfig,
-    config_overrides: Optional[Dict[ParamKey, OptimizerConfig]],
+    config_overrides: Optional[Dict[ParamKey, ParamGroupOverride]],
     filter_fn: Callable,
     buffer_name: str,
 ) -> Tuple[List[Dict], Dict[int, List[_ParamAndGradBuffer]]]:
@@ -216,8 +236,8 @@ def _get_param_groups_and_buffers(
             groups for.
         model_chunk_offset (int): offset of model_chunks in global model_chunks list.
         config (OptimizerConfig): optimizer configuration object.
-        config_overrides (Optional[Dict[LayerKey, OptimizerConfig]): optimizer overrides,
-            specified on a per-layer basis.
+        config_overrides (Optional[Dict[ParamKey, ParamGroupOverride]): optimizer/scheduler
+            overrides, specified on the basis of ParamKey matches with each parameter.
         lr (float): learning rate.
         min_lr (float): minimum learning rate.
         filter_fn (callable): filtering function for param_groups.
@@ -447,10 +467,37 @@ def init_state_fn(opt, config=None):
     return optimizer
 
 
+def check_config_overrides_consistency(
+    config: OptimizerConfig, config_overrides: Optional[Dict[ParamKey, ParamGroupOverride]]
+):
+    """Check if the config overrides are consistent with the config."""
+
+    # TODO: Remove `optimizer` from this eventually (e.g., if we use Muon for some layers and
+    # Adam for other layers). This would need some more refactoring to work though (param_groups
+    # filtered by optimizer passed into _get_megatron_optimizer_based_on_param_groups).
+    if config_overrides is not None:
+        fields_to_check_for_consistency = [
+            'overlap_param_gather_with_optimizer_step',
+            'optimizer',
+            'optimizer_cpu_offload',
+        ]
+        for field_name in fields_to_check_for_consistency:
+            base_field = getattr(config, field_name, None)
+            all_config_overrides = list(config_overrides.values())
+            for config_override in all_config_overrides:
+                if field_name in config_override:
+                    field = config_override[field_name]
+                    if field != base_field:
+                        raise ValueError(
+                            f"Field {field_name} should not be overriden in a config override."
+                        )
+    return True
+
+
 def get_megatron_optimizer(
     config: OptimizerConfig,
     model_chunks: List[MegatronModule],
-    config_overrides: Optional[Dict[ParamKey, OptimizerConfig]] = None,
+    config_overrides: Optional[Dict[ParamKey, ParamGroupOverride]] = None,
     use_gloo_process_groups: bool = True,
     pg_collection: Optional[ProcessGroupCollection] = None,
     dump_param_to_param_group_map: Optional[str] = None,
@@ -476,19 +523,7 @@ def get_megatron_optimizer(
 
     log_single_rank(logger, logging.INFO, f'Setting up optimizer with config {config}')
 
-    # TODO: Remove `optimizer` from this eventually (e.g., if we use Muon for some layers and
-    # Adam for other layers). This would need some more refactoring to work though (param_groups
-    # filtered by optimizer passed into _get_megatron_optimizer_based_on_param_groups).
-    fields_to_check_for_consistency = [
-        'overlap_param_gather_with_optimizer_step',
-        'optimizer',
-        'optimizer_cpu_offload',
-    ]
-    for field_name in fields_to_check_for_consistency:
-        field = getattr(config, field_name, None)
-        if config_overrides is not None:
-            all_configs = list(config_overrides.values())
-            assert all([getattr(x, field_name, None) == field for x in all_configs])
+    check_config_overrides_consistency(config, config_overrides)
 
     # Separate out first model chunk if overlapping param AG with optimizer step.
     if config.overlap_param_gather_with_optimizer_step:
diff --git a/megatron/core/optimizer/optimizer_config.py b/megatron/core/optimizer/optimizer_config.py
index 6a4199a1f7a..679878ed954 100644
--- a/megatron/core/optimizer/optimizer_config.py
+++ b/megatron/core/optimizer/optimizer_config.py
@@ -1,5 +1,6 @@
 # Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
+import fnmatch
 from dataclasses import dataclass, field
 from typing import Callable, Optional, Tuple, Union
 
@@ -8,6 +9,30 @@
 from ..utils import is_te_min_version
 
 
+@dataclass(frozen=True)
+class ParamPredicate:
+    """Wraps a matching function to make it hashable for ParamKey.
+    Example:
+        >>> shape_1_param = ParamPredicate(name="s1", fn=lambda param: len(param.shape) == 1)
+        >>> shape_1_param(torch.empty(10))
+        True
+        >>> shape_1_param_copy = ParamPredicate(name="s1", fn=lambda param: len(param.shape) == 1)
+        >>> shape_1_param == shape_1_param_copy  # name is used to match
+        True
+        >>> {shape_1_param, shape_1_param_copy} == {shape_1_param}  # set hashing works properly
+
+    NOTE:
+        __hash__ and __eq__ are automatically generated by @dataclass(frozen=True)
+        based solely on 'name' because we set compare=False/hash=False on 'fn'.
+    """
+
+    name: str
+    fn: Callable[[torch.nn.Parameter], bool] = field(compare=False, hash=False)
+
+    def __call__(self, param: torch.nn.Parameter) -> bool:
+        return self.fn(param)
+
+
 @dataclass(frozen=True, slots=True)
 class ParamKey:
     """Key to group parameters by. All such grouped parameters can share an
@@ -16,11 +41,53 @@ class ParamKey:
     # TODO: Can add layer_id here later.
 
     name: Union[str, Tuple[str]] = field(default_factory=tuple)
-    """Parameter name(s)."""
+    """Parameter name(s), will use unix filesystem path syntax for matching."""
 
     attr: Union[str, Tuple[str]] = field(default_factory=tuple)
     """Parameter attribute(s)."""
 
+    predicate: Union[ParamPredicate, Tuple[ParamPredicate]] = field(default_factory=tuple)
+    """Predicate(s) to match parameters by. If multiple predicates are provided, any must match."""
+
+    def matches(self, param: torch.nn.Parameter, param_name: str) -> bool:
+        """Returns true if passed-in parameter (with name) matches `param_key`.
+
+        Args:
+            param (torch.nn.Parameter): Handle to parameter object.
+            param_name (str): Name of parameter in underlying PyTorch module.
+
+        Returns:
+            bool: True if parameter matches passed-in param_key.
+        """
+
+        # Check if name matches.
+        if isinstance(self.name, str):
+            target_names = [self.name]
+        else:
+            target_names = list(self.name)
+        for target_name in target_names:
+            if fnmatch.fnmatch(param_name, target_name):
+                return True
+
+        # Check if attribute matches.
+        if isinstance(self.attr, str):
+            target_attrs = [self.attr]
+        else:
+            target_attrs = list(self.attr)
+        for target_attr in target_attrs:
+            if getattr(param, target_attr, False):
+                return True
+
+        # Check if predicate matches.
+        if isinstance(self.predicate, ParamPredicate):
+            if self.predicate(param):
+                return True
+        else:
+            for predicate in self.predicate:
+                if predicate(param):
+                    return True
+        return False
+
 
 @dataclass
 class OptimizerConfig:
diff --git a/megatron/core/optimizer_param_scheduler.py b/megatron/core/optimizer_param_scheduler.py
index 9f771c612e8..7ff6fee35a7 100644
--- a/megatron/core/optimizer_param_scheduler.py
+++ b/megatron/core/optimizer_param_scheduler.py
@@ -3,14 +3,77 @@
 """Learning rate decay and weight decay incr functions."""
 import logging
 import math
-from typing import Optional
+from typing import TYPE_CHECKING, Any, Optional, TypedDict
 
-from megatron.core.optimizer import MegatronOptimizer
 from megatron.core.utils import log_single_rank
 
+if TYPE_CHECKING:
+    # Avoid circular import.
+    from megatron.core.optimizer import MegatronOptimizer
+
 logger = logging.getLogger(__name__)
 
 
+class ParamGroupOverride(TypedDict):
+    """Override values for a parameter group. These values may be optimizer-state/scheduler related.
+
+    These are the values you see later in param_group.get(...) calls in the
+        OptimizerParamScheduler.get_lr and get_wd methods. If you use a custom optimizer
+        or scheduler, you could override those variables instead.
+
+    Example:
+        >>> param_group_override = ParamGroupOverride(min_lr=1e-4, wd_mult=0.1)
+        >>> param_group_override == ParamGroupOverride(newvar=3) # this is ok too
+
+    """
+
+    max_lr: float
+    min_lr: float
+    start_wd: float
+    end_wd: float
+    wd_mult: float
+
+
+def param_group_override_to_tuple(
+    param_group_override: ParamGroupOverride | None,
+) -> tuple[tuple[str, Any], ...] | None:
+    """Convert a param group override to a tuple for use as a key in a dictionary.
+
+    The tuple is sorted by the keys of the param group override to handle different orderings of
+     the keys in different override dictionaries which still mean the same thing.
+    """
+    if param_group_override is None:
+        return None
+    return tuple(sorted(param_group_override.items()))
+
+
+def combine_param_group_overrides(
+    param_group_overrides: list[ParamGroupOverride | None],
+) -> ParamGroupOverride:
+    """Combine a list of param group overrides into a single param group override.
+
+    This function ensures that the overrides are not conflicting as well.
+
+    Args:
+        param_group_overrides (list[ParamGroupOverride]): list of param group overrides to combine
+
+    Returns:
+        ParamGroupOverride: combined param group override
+    """
+    combined_override = ParamGroupOverride()
+    for override in param_group_overrides:
+        if override is None:
+            continue
+        for key, value in override.items():
+            if key in combined_override:
+                if combined_override[key] != value:
+                    raise ValueError(
+                        f"Conflicting overrides for {key}: {combined_override[key]} and {value}"
+                    )
+            combined_override[key] = value
+    return combined_override
+
+
 class OptimizerParamScheduler:
     """Anneals learning rate and weight decay
 
@@ -38,7 +101,7 @@ class OptimizerParamScheduler:
 
     def __init__(
         self,
-        optimizer: MegatronOptimizer,
+        optimizer: "MegatronOptimizer",
         init_lr: float,
         max_lr: float,
         min_lr: float,
diff --git a/megatron/training/training.py b/megatron/training/training.py
index 5b171821497..845d271f62e 100644
--- a/megatron/training/training.py
+++ b/megatron/training/training.py
@@ -12,7 +12,7 @@
 import math
 import os
 import sys
-from typing import Any, Optional
+from typing import Any, Optional, Dict
 
 import torch.distributed
 
@@ -68,6 +68,7 @@
     is_vp_first_stage,
     is_vp_last_stage,
 )
+from megatron.core.optimizer import get_standard_config_overrides
 from megatron.training.checkpointing import load_checkpoint
 from megatron.training.checkpointing import save_checkpoint
 from megatron.training.checkpointing import checkpoint_exists
@@ -1245,17 +1246,9 @@ def get_megatron_optimizer_config(args: Any) -> OptimizerConfig:
     else:
         raise ValueError("Invalid optimizer type!")
 
-    # Construct the appropriate config_overrides object.
-    # TODO: add more logic here as needed down the road.
-    if args.decoupled_lr is not None:
-        decoupled_param_key = ParamKey(attr="is_embedding_or_output_parameter")
-        decoupled_optimizer_config = copy.deepcopy(config)
-        decoupled_optimizer_config.lr = args.decoupled_lr
-        if args.decoupled_min_lr is not None:
-            decoupled_optimizer_config.min_lr = args.decoupled_min_lr
-        config_overrides = {decoupled_param_key: decoupled_optimizer_config}
-    else:
-        config_overrides = None
+    # Construct the appropriate config_overrides object. This default handles many cases, but
+    #  can be added to as needed by the user, or replaced entirely with a custom override.
+    config_overrides = get_standard_config_overrides(args.decoupled_lr, args.decoupled_min_lr)
 
     return config, config_overrides
 
diff --git a/tests/unit_tests/dist_checkpointing/test_layer_wise_optimizer.py b/tests/unit_tests/dist_checkpointing/test_layer_wise_optimizer.py
index 0816273dfb8..54e12b9e7b7 100644
--- a/tests/unit_tests/dist_checkpointing/test_layer_wise_optimizer.py
+++ b/tests/unit_tests/dist_checkpointing/test_layer_wise_optimizer.py
@@ -189,8 +189,9 @@ def test_broadcast_params(self, tp, pp):
             for name, param in model[0].named_parameters():
                 assert torch.allclose(param.data, original_params[name])
 
+    # TODO(@boxiangw): add PP=4 back and fix the test
     @pytest.mark.parametrize('tp', [1, 2, 4])
-    @pytest.mark.parametrize('pp', [1, 2, 4])
+    @pytest.mark.parametrize('pp', [1, 2])
     @pytest.mark.parametrize('bf16', [True, False])
     def test_layer_wise_optimizer_save_load(self, tmp_path_dist_ckpt, tp, pp, bf16):
         """Test save/load of LayerWiseDistributedOptimizer checkpoints."""
@@ -317,10 +318,11 @@ def test_layer_wise_optimizer_count_zeros(self, tp, pp):
             num_zeros = optimizer.count_zeros()
             assert num_zeros >= 0
 
+    # TODO(@boxiangw): add PP=4 back and fix the test
     @pytest.mark.parametrize('src_tp', [1, 2, 4])
-    @pytest.mark.parametrize('src_pp', [1, 2, 4])
+    @pytest.mark.parametrize('src_pp', [1, 2])
     @pytest.mark.parametrize('dest_tp', [1, 2, 4])
-    @pytest.mark.parametrize('dest_pp', [1, 2, 4])
+    @pytest.mark.parametrize('dest_pp', [1, 2])
     def test_layer_wise_optimizer_resharding(
         self, tmp_path_dist_ckpt, src_tp, src_pp, dest_tp, dest_pp
     ):
diff --git a/tests/unit_tests/optimizer/__init__.py b/tests/unit_tests/optimizer/__init__.py
new file mode 100644
index 00000000000..b5dff7b5663
--- /dev/null
+++ b/tests/unit_tests/optimizer/__init__.py
@@ -0,0 +1 @@
+# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
diff --git a/tests/unit_tests/optimizer/test_optimizer_config.py b/tests/unit_tests/optimizer/test_optimizer_config.py
new file mode 100644
index 00000000000..0ecb877ed27
--- /dev/null
+++ b/tests/unit_tests/optimizer/test_optimizer_config.py
@@ -0,0 +1,38 @@
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+import torch
+
+from megatron.core.optimizer.optimizer_config import ParamKey, ParamPredicate
+
+
+def test_paramkey_matches():
+    len_1_predicate = ParamPredicate(name="param_len_1", fn=lambda param: len(param.shape) == 1)
+    endswith_bias = ParamKey(name="*.bias")
+    has_dotbias = ParamKey(name="*.bias*")
+    len_1_param = ParamKey(predicate=len_1_predicate)
+    has_bias_or_len1_param = ParamKey(name="*.bias", predicate=len_1_predicate)
+    has_attr = ParamKey(attr="is_embedding_or_output_parameter")
+
+    assert endswith_bias.matches(torch.nn.Parameter(torch.empty(10, 10)), "interesting.bias")
+    assert not endswith_bias.matches(
+        torch.nn.Parameter(torch.empty(10, 10)), "something.bias.other"
+    )
+    assert has_dotbias.matches(torch.nn.Parameter(torch.empty(10)), "random.biasstuff")
+    assert not has_dotbias.matches(torch.nn.Parameter(torch.empty(10, 10)), "random_bias_name")
+    assert len_1_param.matches(torch.nn.Parameter(torch.empty(10)), "interesting.bias")
+    assert not len_1_param.matches(torch.nn.Parameter(torch.empty(10, 10)), "interesting_bias")
+    assert has_bias_or_len1_param.matches(
+        torch.nn.Parameter(torch.empty(10, 10)), "interesting.bias"
+    )
+    assert has_bias_or_len1_param.matches(torch.nn.Parameter(torch.empty(10)), "interesting_bias")
+    assert not has_bias_or_len1_param.matches(
+        torch.nn.Parameter(torch.empty(10, 10)), "random_bias_name"
+    )
+    p_with_attr = torch.nn.Parameter(torch.empty(10, 10))
+    setattr(p_with_attr, "is_embedding_or_output_parameter", True)
+    assert has_attr.matches(p_with_attr, "interesting.bias")
+    assert not has_attr.matches(torch.nn.Parameter(torch.empty(10, 10)), "interesting.bias")
+
+    # We expect that if the return of the attribute is False, it should not match even if
+    #  it has the attribute.
+    setattr(p_with_attr, "is_embedding_or_output_parameter", False)
+    assert not has_attr.matches(p_with_attr, "interesting.bias")
diff --git a/tests/unit_tests/test_optimizer.py b/tests/unit_tests/test_optimizer.py
index f74414c449b..4f914b56f7c 100644
--- a/tests/unit_tests/test_optimizer.py
+++ b/tests/unit_tests/test_optimizer.py
@@ -1,6 +1,7 @@
 # Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 import os
+from unittest.mock import patch
 
 import pytest
 import torch
@@ -12,7 +13,16 @@
 from transformer_engine.pytorch.fp8 import fp8_autocast
 
 from megatron.core.distributed import DistributedDataParallel, DistributedDataParallelConfig
-from megatron.core.optimizer import ChainedOptimizer, OptimizerConfig, get_megatron_optimizer
+from megatron.core.optimizer import (
+    ChainedOptimizer,
+    OptimizerConfig,
+    ParamKey,
+    ParamPredicate,
+    _get_param_groups,
+    check_config_overrides_consistency,
+    get_megatron_optimizer,
+)
+from megatron.core.optimizer_param_scheduler import ParamGroupOverride
 from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.transformer import TransformerConfig
 from megatron.core.utils import is_te_min_version, is_torch_min_version
@@ -24,7 +34,7 @@
     from transformer_engine.pytorch.fp8 import check_fp8_block_scaling_support
 
     fp8_block_scaling_available, reason_for_no_fp8_block_scaling = check_fp8_block_scaling_support()
-    from transformer_engine.common.recipe import Float8BlockScaling, Format
+    from transformer_engine.common.recipe import DelayedScaling, Float8BlockScaling, Format
 except:
     fp8_block_scaling_available = False
     reason_for_no_fp8_block_scaling = "FP8 block scaled GEMM requires Hopper and CUDA >= 12.9."
@@ -54,6 +64,148 @@ def forward(self, x):
         return x
 
 
+@patch('torch.distributed.get_world_size', return_value=1)
+@patch(
+    'torch.distributed.all_gather_object', lambda output_list, obj: output_list.__setitem__(0, obj)
+)
+def test_get_param_groups_no_overrides(mock_get_world_size):
+    net = Net()
+    # NOTE: to get no overrides, supply an empty dictionary rather than None.
+    param_groups = _get_param_groups([net], OptimizerConfig(optimizer='adam', lr=0.01), {})
+    assert len(param_groups) == 1
+    pg0 = param_groups[0]
+    assert pg0.keys() == {
+        'params',
+        'is_expert_parallel',
+        'default_config',
+        'wd_mult',
+        'lr_mult',
+        'is_decoupled_lr',
+        'max_lr',
+        'min_lr',
+    }
+    assert pg0['params'] == list(net.parameters())
+    assert pg0['is_expert_parallel'] == False
+    assert pg0['default_config'] == True
+    assert pg0['wd_mult'] == 1.0
+    assert pg0['lr_mult'] == 1.0
+    assert pg0['is_decoupled_lr'] == False
+    assert pg0['max_lr'] == 0.01  # from the optimizer config default for lr
+    assert pg0['min_lr'] is None  # from the optimizer config default.
+
+
+@patch('torch.distributed.get_world_size', return_value=1)
+@patch(
+    'torch.distributed.all_gather_object', lambda output_list, obj: output_list.__setitem__(0, obj)
+)
+def test_get_param_groups_default_overrides(mock_get_world_size):
+    """Test that the default overrides are applied to the parameter groups."""
+    net = Net()
+    # NOTE: to get legacy default overrides, supply None.
+    opt_config = OptimizerConfig(optimizer='adam', lr=0.01)
+    check_config_overrides_consistency(opt_config, None)
+    param_groups = _get_param_groups([net], opt_config, None)
+    assert len(param_groups) == 2
+    pg0, pg1 = param_groups
+    wd_mults = {pg0['wd_mult'], pg1['wd_mult']}
+    assert wd_mults == {1.0, 0.0}
+
+
+@patch('torch.distributed.get_world_size', return_value=1)
+@patch(
+    'torch.distributed.all_gather_object', lambda output_list, obj: output_list.__setitem__(0, obj)
+)
+def test_get_param_groups_with_overrides(mock_get_world_size):
+    net = Net()
+    config_overrides = {
+        ParamKey(
+            name="*.bias",
+            predicate=ParamPredicate(name="param_len_1", fn=lambda param: len(param.shape) == 1),
+        ): ParamGroupOverride(wd_mult=0.0)
+    }
+    opt_config = OptimizerConfig(optimizer='adam', lr=0.01)
+    check_config_overrides_consistency(opt_config, config_overrides)
+    param_groups = _get_param_groups([net], opt_config, config_overrides)
+    assert len(param_groups) == 2
+    p_set = set(net.parameters())
+
+    assert p_set == set(param_groups[0]['params']) | set(param_groups[1]['params'])
+    assert len(p_set) == len(param_groups[0]['params']) + len(param_groups[1]['params'])
+    assert param_groups[0]['wd_mult'] == 0.0 or param_groups[1]['wd_mult'] == 0.0
+    assert param_groups[0]['wd_mult'] == 1.0 or param_groups[1]['wd_mult'] == 1.0
+    assert len(param_groups[0]['params']) > 0 and len(param_groups[1]['params']) > 0
+
+
+@patch('torch.distributed.get_world_size', return_value=1)
+@patch(
+    'torch.distributed.all_gather_object', lambda output_list, obj: output_list.__setitem__(0, obj)
+)
+def test_get_param_groups_multiple_matches(mock_get_world_size):
+    net = Net()
+
+    param_groups = _get_param_groups(
+        [net],
+        OptimizerConfig(optimizer='adam', lr=0.01),
+        {
+            ParamKey(name="*.bias"): ParamGroupOverride(min_lr=1e-4, wd_mult=0.0),
+            ParamKey(
+                predicate=ParamPredicate(name="param_len_1", fn=lambda param: len(param.shape) == 1)
+            ): ParamGroupOverride(wd_mult=0.0, min_lr=1e-4),
+        },
+    )
+    config_overrides = {
+        ParamKey(
+            name="*.bias",
+            predicate=ParamPredicate(name="param_len_1", fn=lambda param: len(param.shape) == 1),
+        ): ParamGroupOverride(min_lr=1e-4, wd_mult=0.0)
+    }
+    opt_config = OptimizerConfig(optimizer='adam', lr=0.01)
+    check_config_overrides_consistency(opt_config, config_overrides)
+    param_groups2 = _get_param_groups([net], opt_config, config_overrides)
+    assert len(param_groups) == 2
+    assert param_groups == param_groups2
+
+
+@patch('torch.distributed.get_world_size', return_value=1)
+@patch(
+    'torch.distributed.all_gather_object', lambda output_list, obj: output_list.__setitem__(0, obj)
+)
+def test_get_param_groups_overlapping_matches(mock_get_world_size):
+    """In this test, we see if we can have two matches that create three param groups."""
+    net = Net()
+    # We expect that all convolution parameters will have wd_mult=0.0
+    #  However the conv1 related parameters will additionally have a different LR schedule.
+    #  this should create three param groups (no match, conv1 (both wd_mult=0.0 and LR schedule), conv2 (only wd_mult=0.0))
+    config_overrides = {
+        ParamKey(name="*conv*"): ParamGroupOverride(wd_mult=0.0),
+        ParamKey(name="*conv1*"): ParamGroupOverride(min_lr=10, max_lr=20),
+    }
+    opt_config = OptimizerConfig(optimizer='adam', lr=0.01)
+    check_config_overrides_consistency(opt_config, config_overrides)
+    param_groups = _get_param_groups([net], opt_config, config_overrides)
+    assert len(param_groups) == 3
+    p_set = set(net.parameters())
+    assert p_set == set(param_groups[0]['params']) | set(param_groups[1]['params']) | set(
+        param_groups[2]['params']
+    )
+    assert len(p_set) == len(param_groups[0]['params']) + len(param_groups[1]['params']) + len(
+        param_groups[2]['params']
+    )
+    assert (
+        param_groups[0]['wd_mult'] == 1.0
+    ), "We expect the first param group to be the None one, which should have wd_mult=1.0"
+    assert (
+        param_groups[1]['wd_mult'] == 0.0
+    ), "We expect the second param group to be the conv1 one, which should have wd_mult=0.0"
+    assert (
+        param_groups[2]['wd_mult'] == 0.0
+    ), "We expect the third param group to be the conv2 one, which should have wd_mult=0.0"
+    assert param_groups[1]['min_lr'] == 10
+    assert param_groups[1]['max_lr'] == 20
+    assert param_groups[2]['min_lr'] is None
+    assert param_groups[2]['max_lr'] == 0.01
+
+
 def test_chained_optimizer():
     net = Net()
     optimizer_1 = Adam(list(net.parameters())[:2], lr=0.01)
diff --git a/tests/unit_tests/test_utilities.py b/tests/unit_tests/test_utilities.py
index f16f88f7865..39c78efb2b9 100644
--- a/tests/unit_tests/test_utilities.py
+++ b/tests/unit_tests/test_utilities.py
@@ -1,3 +1,4 @@
+# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 import os
 from datetime import timedelta
 
@@ -27,8 +28,8 @@ def __init__(
 
 class Utils:
 
-    world_size = int(os.environ['WORLD_SIZE'])
-    rank = int(os.environ['LOCAL_RANK'])
+    world_size = int(os.environ.get('WORLD_SIZE', '1'))
+    rank = int(os.environ.get('LOCAL_RANK', '0'))
     inited = False
     store = None
 

From 1964d396810b72fde6706cc61831cafe1b868b50 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Wed, 14 Jan 2026 12:16:01 +0000
Subject: [PATCH 228/334] ci(hotfix): Disable
 gpt_grpo_tp1_pp1_dp8_583m_throughputtest
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 tests/test_utils/recipes/gpt-grpo.yaml | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/test_utils/recipes/gpt-grpo.yaml b/tests/test_utils/recipes/gpt-grpo.yaml
index 76f1ea2d3a9..90e9815c5fe 100644
--- a/tests/test_utils/recipes/gpt-grpo.yaml
+++ b/tests/test_utils/recipes/gpt-grpo.yaml
@@ -54,11 +54,11 @@ spec:
     bash ./tests/functional_tests/shell_test_utils/run_ci_test.sh ${{ARGUMENTS[@]}}
 
 products:
-  - test_case: [gpt_grpo_tp1_pp1_dp8_583m_throughputtest]
-    products:
-      - environment: [dev]
-        scope: [mr]
-        platforms: [dgx_h100]
+  # - test_case: [gpt_grpo_tp1_pp1_dp8_583m_throughputtest]
+  #   products:
+  #     - environment: [dev]
+  #       scope: [mr]
+  #       platforms: [dgx_h100]
   - test_case: [gpt_grpo_tp1_pp1_dp8_583m_throughputtest_github]
     products:
       - environment: [dev]

From 383505c753fff5a21723c7182a40c198f610481d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Wed, 14 Jan 2026 17:01:44 +0100
Subject: [PATCH 229/334] [dev]: ci: Onboard GB200 (#2922)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 .gitlab-ci.yml                                | 223 +++++++-------
 .gitlab/scripts/build.sh                      |  24 +-
 .gitlab/stages/01.build.yml                   |  81 ++++-
 .gitlab/stages/03.integration-tests.yml       |  31 ++
 .gitlab/stages/04.functional-tests.yml        |  33 ++
 docker/Dockerfile.ci.dev                      |  11 +-
 megatron/core/datasets/Makefile               |   2 +-
 .../shell_test_utils/_run_training.sh         |   4 +-
 .../golden_values_dev_dgx_gb200.json          | 287 ++++++++++++++++++
 .../python_scripts/launch_jet_workload.py     |   6 +-
 .../python_scripts/recipe_parser.py           |  14 +-
 .../test_utils/recipes/_build-mcore-dev.yaml  |   2 +-
 .../test_utils/recipes/_build-mcore-lts.yaml  |   2 +-
 tests/test_utils/recipes/gpt-gb200.yaml       |  73 +++++
 tests/test_utils/recipes/gpt.yaml             |   2 +-
 15 files changed, 645 insertions(+), 150 deletions(-)
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_dev_dgx_gb200.json
 create mode 100644 tests/test_utils/recipes/gpt-gb200.yaml

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 53574fdea22..a238f2c9999 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -1,16 +1,16 @@
 .merge_train_rule: &merge_train_rule
-  UNIT_TEST: "yes"
+  UNIT_TEST: 'yes'
   UNIT_TEST_REPEAT: 1
   UNIT_TEST_TIMEOUT: 30
-  INTEGRATION_TEST: "no"
+  INTEGRATION_TEST: 'no'
   INTEGRATION_TEST_SCOPE: mr
-  FUNCTIONAL_TEST: "yes"
+  FUNCTIONAL_TEST: 'yes'
   FUNCTIONAL_TEST_SCOPE: mr-slim
   FUNCTIONAL_TEST_REPEAT: 1
   FUNCTIONAL_TEST_TIME_LIMIT: 2700
-  CLUSTER_A100: ""
-  CLUSTER_H100: ""
-  PUBLISH: "no"
+  CLUSTER_A100: ''
+  CLUSTER_H100: ''
+  PUBLISH: 'no'
 
 workflow:
   rules:
@@ -35,30 +35,30 @@ workflow:
     # For push to main
     - if: $CI_PIPELINE_SOURCE == 'push' && ($CI_COMMIT_BRANCH == "main" || $CI_COMMIT_BRANCH == "dev" || $CI_COMMIT_BRANCH =~ /^core_/)
       variables:
-        UNIT_TEST: "no"
-        INTEGRATION_TEST: "no"
-        FUNCTIONAL_TEST: "yes"
+        UNIT_TEST: 'no'
+        INTEGRATION_TEST: 'no'
+        FUNCTIONAL_TEST: 'yes'
         FUNCTIONAL_TEST_SCOPE: mr
         FUNCTIONAL_TEST_REPEAT: 5
-        FUNCTIONAL_TEST_RECORD_CHECKPOINTS: "no"
+        FUNCTIONAL_TEST_RECORD_CHECKPOINTS: 'no'
         FUNCTIONAL_TEST_TIME_LIMIT: 3600
-        CLUSTER_A100: ""
-        CLUSTER_H100: ""
-        PUBLISH: "no"
+        CLUSTER_A100: ''
+        CLUSTER_H100: ''
+        PUBLISH: 'no'
       auto_cancel:
         on_new_commit: interruptible
 
     # For merge-trains that need to be fast-tracked
     - if: $CI_MERGE_REQUEST_EVENT_TYPE == 'merge_train' && $CI_MERGE_REQUEST_LABELS =~ /fast-track/
       variables:
-        UNIT_TEST: "yes"
+        UNIT_TEST: 'yes'
         UNIT_TEST_REPEAT: 1
         UNIT_TEST_TIMEOUT: 30
-        INTEGRATION_TEST: "no"
-        FUNCTIONAL_TEST: "no"
-        CLUSTER_A100: ""
-        CLUSTER_H100: ""
-        PUBLISH: "no"
+        INTEGRATION_TEST: 'no'
+        FUNCTIONAL_TEST: 'no'
+        CLUSTER_A100: ''
+        CLUSTER_H100: ''
+        PUBLISH: 'no'
 
     # For normal merge-trains
     - if: $CI_MERGE_REQUEST_EVENT_TYPE == 'merge_train'
@@ -67,75 +67,75 @@ workflow:
     # For MRs with integration suite
     - if: $CI_MERGE_REQUEST_EVENT_TYPE == 'merged_result' && $CI_MERGE_REQUEST_LABELS =~ /Run tests/
       variables:
-        UNIT_TEST: "yes"
+        UNIT_TEST: 'yes'
         UNIT_TEST_REPEAT: 1
         UNIT_TEST_TIMEOUT: 30
-        INTEGRATION_TEST: "yes"
+        INTEGRATION_TEST: 'yes'
         INTEGRATION_TEST_SCOPE: mr
-        FUNCTIONAL_TEST: "no"
+        FUNCTIONAL_TEST: 'no'
         FUNCTIONAL_TEST_SCOPE: mr-slim
         FUNCTIONAL_TEST_REPEAT: 1
         FUNCTIONAL_TEST_TIME_LIMIT: 2700
-        CLUSTER_A100: ""
-        CLUSTER_H100: ""
-        PUBLISH: "no"
+        CLUSTER_A100: ''
+        CLUSTER_H100: ''
+        PUBLISH: 'no'
 
     # For MRs with nightly
     - if: $CI_MERGE_REQUEST_EVENT_TYPE == 'merged_result' && $CI_MERGE_REQUEST_LABELS =~ /Run nightly/
       variables:
-        UNIT_TEST: "yes"
+        UNIT_TEST: 'yes'
         UNIT_TEST_REPEAT: 1
         UNIT_TEST_TIMEOUT: 30
-        INTEGRATION_TEST: "no"
-        FUNCTIONAL_TEST: "yes"
+        INTEGRATION_TEST: 'no'
+        FUNCTIONAL_TEST: 'yes'
         FUNCTIONAL_TEST_SCOPE: nightly
         FUNCTIONAL_TEST_REPEAT: 5
-        FUNCTIONAL_TEST_RECORD_CHECKPOINTS: "no"
+        FUNCTIONAL_TEST_RECORD_CHECKPOINTS: 'no'
         FUNCTIONAL_TEST_TIME_LIMIT: 2700
-        CLUSTER_A100: ""
-        CLUSTER_H100: ""
-        PUBLISH: "no"
+        CLUSTER_A100: ''
+        CLUSTER_H100: ''
+        PUBLISH: 'no'
 
     # For MRs with weekly
     - if: $CI_MERGE_REQUEST_EVENT_TYPE == 'merged_result' && $CI_MERGE_REQUEST_LABELS =~ /Run weekly/
       variables:
-        UNIT_TEST: "yes"
+        UNIT_TEST: 'yes'
         UNIT_TEST_REPEAT: 1
         UNIT_TEST_TIMEOUT: 30
-        INTEGRATION_TEST: "no"
-        FUNCTIONAL_TEST: "yes"
+        INTEGRATION_TEST: 'no'
+        FUNCTIONAL_TEST: 'yes'
         FUNCTIONAL_TEST_SCOPE: weekly
         FUNCTIONAL_TEST_REPEAT: 1
-        FUNCTIONAL_TEST_RECORD_CHECKPOINTS: "no"
+        FUNCTIONAL_TEST_RECORD_CHECKPOINTS: 'no'
         FUNCTIONAL_TEST_TIME_LIMIT: 9000
-        CLUSTER_A100: ""
-        CLUSTER_H100: ""
-        PUBLISH: "no"
+        CLUSTER_A100: ''
+        CLUSTER_H100: ''
+        PUBLISH: 'no'
 
     # For MRs with heavy suite
     - if: $CI_MERGE_REQUEST_EVENT_TYPE == 'merged_result' && $CI_MERGE_REQUEST_LABELS =~ /Run functional tests/
       variables:
-        UNIT_TEST: "yes"
+        UNIT_TEST: 'yes'
         UNIT_TEST_REPEAT: 1
         UNIT_TEST_TIMEOUT: 30
-        INTEGRATION_TEST: "no"
-        FUNCTIONAL_TEST: "yes"
+        INTEGRATION_TEST: 'no'
+        FUNCTIONAL_TEST: 'yes'
         FUNCTIONAL_TEST_SCOPE: mr
         FUNCTIONAL_TEST_REPEAT: 1
         FUNCTIONAL_TEST_TIME_LIMIT: 2700
-        CLUSTER_A100: ""
-        CLUSTER_H100: ""
-        PUBLISH: "no"
+        CLUSTER_A100: ''
+        CLUSTER_H100: ''
+        PUBLISH: 'no'
 
     # Default MRs
     - if: $CI_MERGE_REQUEST_EVENT_TYPE == 'merged_result'
       variables:
-        UNIT_TEST: "yes"
+        UNIT_TEST: 'yes'
         UNIT_TEST_REPEAT: 1
         UNIT_TEST_TIMEOUT: 30
-        INTEGRATION_TEST: "no"
-        FUNCTIONAL_TEST: "no"
-        PUBLISH: "no"
+        INTEGRATION_TEST: 'no'
+        FUNCTIONAL_TEST: 'no'
+        PUBLISH: 'no'
 
     - when: never
 
@@ -157,104 +157,109 @@ default:
 
 variables:
   BUILD:
-    value: "yes"
+    value: 'yes'
   UNIT_TEST:
-    value: "yes"
+    value: 'yes'
     options:
-      - "yes"
-      - "no"
+      - 'yes'
+      - 'no'
     description: To run the funtional test suite
   UNIT_TEST_REPEAT:
-    value: "1"
-    description: "Number of repetitions"
+    value: '1'
+    description: 'Number of repetitions'
   UNIT_TEST_TIMEOUT:
-    value: "30"
+    value: '30'
     description: Timeout (minutes) for Unit tests (all repeats)
   INTEGRATION_TEST:
-    value: "yes"
+    value: 'yes'
     options:
-      - "yes"
-      - "no"
+      - 'yes'
+      - 'no'
     description: To run the integration test suite
   INTEGRATION_TEST_SCOPE:
-    value: "mr"
+    value: 'mr'
     options:
-      - "mr"
-      - "nightly"
-      - "weekly"
-      - "pre-release"
-      - "release"
-    description: "Testsuite to run (only for INTEGRATION_TEST=yes)"
+      - 'mr'
+      - 'nightly'
+      - 'weekly'
+      - 'pre-release'
+      - 'release'
+    description: 'Testsuite to run (only for INTEGRATION_TEST=yes)'
   INTEGRATION_TEST_TIME_LIMIT:
-    value: "900"
-    description: "Timeout in seconds per test"
+    value: '900'
+    description: 'Timeout in seconds per test'
   INTEGRATION_TEST_CASES:
-    value: "all"
+    value: 'all'
     description: "Comma-separated list of test_cases to run. Use 'all' to run the full suite."
   FUNCTIONAL_TEST:
-    value: "yes"
+    value: 'yes'
     options:
-      - "yes"
-      - "no"
+      - 'yes'
+      - 'no'
     description: To run the funtional test suite
   FUNCTIONAL_TEST_SCOPE:
-    value: "mr"
+    value: 'mr'
     options:
-      - "mr"
-      - "nightly"
-      - "weekly"
-      - "pre-release"
-      - "release"
-    description: "Testsuite to run (only for FUNCTIONAL_TEST=yes)"
+      - 'mr'
+      - 'nightly'
+      - 'weekly'
+      - 'pre-release'
+      - 'release'
+    description: 'Testsuite to run (only for FUNCTIONAL_TEST=yes)'
   FUNCTIONAL_TEST_REPEAT:
-    value: "5"
-    description: "Number of repetitions per test"
+    value: '5'
+    description: 'Number of repetitions per test'
   FUNCTIONAL_TEST_TIME_LIMIT:
-    value: "2700"
-    description: "Timeout in seconds per test"
+    value: '2700'
+    description: 'Timeout in seconds per test'
   FUNCTIONAL_TEST_CASES:
-    value: "all"
+    value: 'all'
     description: "Comma-separated list of test_cases to run. Use 'all' to run the full suite."
   FUNCTIONAL_TEST_NAME:
-    description: "Name of functional test run (only for pre-release and release)"
-    value: "$$CI_COMMIT_SHA"
+    description: 'Name of functional test run (only for pre-release and release)'
+    value: '$$CI_COMMIT_SHA'
   FUNCTIONAL_TEST_RECORD_CHECKPOINTS:
-    value: "no"
-    description: "Record golden checkpoints"
+    value: 'no'
+    description: 'Record golden checkpoints'
     options:
-      - "yes"
-      - "no"
+      - 'yes'
+      - 'no'
   CLUSTER_A100:
-    value: "dgxa100_dracooci"
+    value: 'dgxa100_dracooci'
     options:
-      - "dgxa100_dracooci"
-      - "dgxa100_dracooci-ord"
-    description: "Cluster for A100 workloads"
+      - 'dgxa100_dracooci'
+      - 'dgxa100_dracooci-ord'
+    description: 'Cluster for A100 workloads'
   CLUSTER_H100:
-    value: "dgxh100_coreweave"
+    value: 'dgxh100_coreweave'
     options:
-      - "dgxh100_coreweave"
-      - "dgxh100_eos"
-    description: "Cluster for H100 workloads"
+      - 'dgxh100_coreweave'
+      - 'dgxh100_eos'
+    description: 'Cluster for H100 workloads'
+  CLUSTER_GB200:
+    value: 'dgxgb200_oci-hsg'
+    options:
+      - 'dgxgb200_oci-hsg'
+    description: 'Cluster for H100 workloads'
   PUBLISH:
-    value: "no"
+    value: 'no'
     options:
-      - "yes"
-      - "no"
+      - 'yes'
+      - 'no'
     description: Build and publish a wheel to PyPi
   PUBLISH_COMMIT:
-    value: "$$CI_COMMIT_SHA"
+    value: '$$CI_COMMIT_SHA'
     description: Which commit to publish
   PUBLISH_VERSION_BUMP_BRANCH:
-    value: "$$CI_COMMIT_BRANCH"
+    value: '$$CI_COMMIT_BRANCH'
     description: Which branch to target for version bump
   PUBLISH_SCOPE:
-    value: "code-freeze"
+    value: 'code-freeze'
     options:
-      - "code-freeze"
-      - "release"
-      - "review-reminder"
-      - "upgrade-dependencies"
+      - 'code-freeze'
+      - 'release'
+      - 'review-reminder'
+      - 'upgrade-dependencies'
     description: Type of publish (freeze or final release)
 
   # CI wide variables
@@ -262,7 +267,7 @@ variables:
   CI_MCORE_DEV_IMAGE: ${GITLAB_ENDPOINT}:5005/adlr/megatron-lm/mcore_ci_dev
   CI_NEMO_IMAGE: ${GITLAB_ENDPOINT}:5005/adlr/megatron-lm/nemo_ci
   UTILITY_IMAGE: ${GITLAB_ENDPOINT}:5005/adlr/megatron-lm/mcore_utility
-  TE_GIT_REF: ""
+  TE_GIT_REF: ''
 
 include:
   - .gitlab/stages/00.pre.yml
diff --git a/.gitlab/scripts/build.sh b/.gitlab/scripts/build.sh
index e64434e834d..8359731e3d7 100644
--- a/.gitlab/scripts/build.sh
+++ b/.gitlab/scripts/build.sh
@@ -22,15 +22,21 @@ ADDITIONAL_PARAMS=()
 
 if [[ "$CI_COMMIT_BRANCH" == "ci-rebuild-mcore-nemo-image" || "$CI_COMMIT_BRANCH" == "main" || "$CI_COMMIT_BRANCH" == "dev" ]]; then
     ADDITIONAL_PARAMS+=("--pull")
-    ADDITIONAL_PARAMS+=("--cache-to type=registry,ref=${IMAGE}-buildcache:main,mode=max")
-    ADDITIONAL_PARAMS+=("-t ${IMAGE}:${CI_COMMIT_BRANCH}")
-elif [[ -n "$CI_MERGE_REQUEST_IID" ]]; then
-    ADDITIONAL_PARAMS+=("--cache-to type=registry,ref=${IMAGE}-buildcache:${CI_MERGE_REQUEST_IID},mode=max")
-    ADDITIONAL_PARAMS+=("-t ${IMAGE}:${CI_MERGE_REQUEST_IID}")
+fi
+
+CI_COMMIT_BRANCH=$(echo "$CI_COMMIT_BRANCH" | tr '/' '-' | tr '[:upper:]' '[:lower:]' | sed 's/[^a-z0-9._-]/-/g')
+ADDITIONAL_PARAMS+=("--cache-to type=registry,ref=${IMAGE}-buildcache:${CI_COMMIT_BRANCH}-${PLATFORM},mode=max")
+ADDITIONAL_PARAMS+=("--cache-from type=registry,ref=${IMAGE}-buildcache:${CI_COMMIT_BRANCH}-${PLATFORM}")
+ADDITIONAL_PARAMS+=("-t ${IMAGE}:${CI_COMMIT_BRANCH}-${PLATFORM}")
+
+if [[ -n "$CI_MERGE_REQUEST_IID" ]]; then
+    ADDITIONAL_PARAMS+=("--cache-to type=registry,ref=${IMAGE}-buildcache:${CI_MERGE_REQUEST_IID}-${PLATFORM},mode=max")
+    ADDITIONAL_PARAMS+=("--cache-from type=registry,ref=${IMAGE}-buildcache:${CI_MERGE_REQUEST_IID}-${PLATFORM}")
+    ADDITIONAL_PARAMS+=("-t ${IMAGE}:${CI_MERGE_REQUEST_IID}-${PLATFORM}")
 fi
 
 if [[ "$CI_COMMIT_BRANCH" == "ci-nightly" ]]; then
-    ADDITIONAL_PARAMS+=("-t ${IMAGE}:nightly")
+    ADDITIONAL_PARAMS+=("-t ${IMAGE}:nightly-${PLATFORM}")
 fi
 
 if [[ -n "$TE_GIT_REF" ]]; then
@@ -46,13 +52,11 @@ DOCKER_BUILDKIT=1 docker build \
     --secret id=LOGGER_INDEX_URL \
     --target $STAGE \
     -f docker/$FILE \
-    -t ${IMAGE}:${CI_PIPELINE_ID} \
+    -t ${IMAGE}:${CI_PIPELINE_ID}-${PLATFORM} \
     --builder=container \
     --build-arg JET_API_VERSION=$JET_API_VERSION \
-    --cache-from type=registry,ref=${IMAGE}-buildcache:${CI_MERGE_REQUEST_IID} \
-    --cache-from type=registry,ref=${IMAGE}-buildcache:dev \
-    --cache-from type=registry,ref=${IMAGE}-buildcache:main \
     --build-arg FROM_IMAGE_NAME=$BASE_IMAGE \
+    --provenance=false \
     --push \
     --progress plain \
     ${ADDITIONAL_PARAMS[@]} .
diff --git a/.gitlab/stages/01.build.yml b/.gitlab/stages/01.build.yml
index b3ab8cc5bd5..20252e7d045 100644
--- a/.gitlab/stages/01.build.yml
+++ b/.gitlab/stages/01.build.yml
@@ -9,21 +9,20 @@
   extends: [.build_rules, .dind_rules]
   stage: build
   tags:
-    - arch/amd64
+    - arch/${PLATFORM}
     - origin/jet-fleet
     - env/prod
-    - ${TAG}
+    - purpose/builder-large
   services:
     - name: docker:24.0.5-dind
       variables:
-        HEALTHCHECK_TCP_PORT: '2376'
+        HEALTHCHECK_TCP_PORT: "2376"
   timeout: 180m
   variables:
     DOCKER_HOST: tcp://docker:2376
-    DOCKER_TLS_CERTDIR: '/certs'
+    DOCKER_TLS_CERTDIR: "/certs"
     DOCKER_TLS_VERIFY: 1
-    DOCKER_CERT_PATH: '$DOCKER_TLS_CERTDIR/client'
-    TAG: purpose/builder-large
+    DOCKER_CERT_PATH: "$DOCKER_TLS_CERTDIR/client"
     STAGE: jet
     MCORE_BACKWARDS_REF: core_r0.14.0
     KUBERNETES_SERVICE_MEMORY_REQUEST: 90Gi
@@ -48,7 +47,7 @@
     reports:
       dotenv: build.env
 
-test:build_image:
+test:pre_build_image:
   extends: [.build_image]
   parallel:
     matrix:
@@ -56,13 +55,30 @@ test:build_image:
         FILE: Dockerfile.ci.dev
         IMAGE_TYPE: lts
         BASE_IMAGE: nvcr.io/nvidia/pytorch:25.09-py3
+        PLATFORM: amd64
+      - IMAGE: CI_MCORE_LTS_IMAGE
+        FILE: Dockerfile.ci.dev
+        IMAGE_TYPE: lts
+        BASE_IMAGE: nvcr.io/nvidia/pytorch:25.09-py3
+        PLATFORM: arm64
       - IMAGE: CI_MCORE_DEV_IMAGE
         FILE: Dockerfile.ci.dev
         IMAGE_TYPE: dev
         BASE_IMAGE: nvcr.io/nvidia/pytorch:25.11-py3
+        PLATFORM: amd64
+      - IMAGE: CI_MCORE_DEV_IMAGE
+        FILE: Dockerfile.ci.dev
+        IMAGE_TYPE: dev
+        BASE_IMAGE: nvcr.io/nvidia/pytorch:25.11-py3
+        PLATFORM: arm64
+      - IMAGE: UTILITY_IMAGE
+        FILE: Dockerfile.linting
+        BASE_IMAGE: python:3.10
+        PLATFORM: amd64
       - IMAGE: UTILITY_IMAGE
         FILE: Dockerfile.linting
         BASE_IMAGE: python:3.10
+        PLATFORM: arm64
 
 test:build_nemo_image:
   extends: [.build_image]
@@ -70,6 +86,57 @@ test:build_nemo_image:
     IMAGE: CI_NEMO_IMAGE
     FILE: Dockerfile.ci.nemo
     BASE_IMAGE: nvcr.io/nvidian/nemo:nightly
+    PLATFORM: amd64
   rules:
     - if: $FUNCTIONAL_TEST == "yes" || $INTEGRATION_TEST == "yes" || $CI_COMMIT_BRANCH == "ci-rebuild-mcore-nemo-image"
       when: on_success
+
+test:build_image:
+  needs: [test:pre_build_image]
+  extends: [.build_rules, .dind_rules]
+  parallel:
+    matrix:
+      - IMAGE: CI_MCORE_LTS_IMAGE
+      - IMAGE: CI_MCORE_DEV_IMAGE
+      - IMAGE: UTILITY_IMAGE
+  stage: build
+  tags:
+    - arch/amd64
+    - origin/jet-fleet
+    - env/prod
+    - purpose/builder-large
+  services:
+    - name: docker:24.0.5-dind
+      variables:
+        HEALTHCHECK_TCP_PORT: "2376"
+  timeout: 180m
+  variables:
+    DOCKER_HOST: tcp://docker:2376
+    DOCKER_TLS_CERTDIR: "/certs"
+    DOCKER_TLS_VERIFY: 1
+    DOCKER_CERT_PATH: "$DOCKER_TLS_CERTDIR/client"
+    STAGE: jet
+    MCORE_BACKWARDS_REF: core_r0.14.0
+    KUBERNETES_SERVICE_MEMORY_REQUEST: 90Gi
+    KUBERNETES_SERVICE_MEMORY_LIMIT: 90Gi
+    SHARED_PATH: /builds/$CI_PROJECT_PATH/shared
+  script:
+    - |
+      set -x
+
+      env
+      eval "IMAGE=\$$IMAGE"
+
+      docker manifest create ${IMAGE}:${CI_PIPELINE_ID} \
+        ${IMAGE}:${CI_PIPELINE_ID}-amd64 \
+        ${IMAGE}:${CI_PIPELINE_ID}-arm64
+
+      docker manifest push ${IMAGE}:${CI_PIPELINE_ID}
+    - echo "MCORE_MR_COMMIT=$CI_COMMIT_SHA" | tee -a build.env
+    - echo "MCORE_BACKWARDS_COMMIT=$MCORE_BACKWARDS_COMMIT" | tee -a build.env
+    - cat build.env
+  retry:
+    max: 2
+  artifacts:
+    reports:
+      dotenv: build.env
diff --git a/.gitlab/stages/03.integration-tests.yml b/.gitlab/stages/03.integration-tests.yml
index 824721b9fb1..d28ecd8e137 100644
--- a/.gitlab/stages/03.integration-tests.yml
+++ b/.gitlab/stages/03.integration-tests.yml
@@ -43,6 +43,7 @@ integration:configure:
     - |
       A100_CLUSTER=$([[ "$CLUSTER_A100" != "" ]] && echo $CLUSTER_A100 || echo $DEFAULT_A100_CLUSTER)
       H100_CLUSTER=$([[ "$CLUSTER_H100" != "" ]] && echo $CLUSTER_H100 || echo $DEFAULT_H100_CLUSTER)
+      GB200_CLUSTER=$([[ "$CLUSTER_GB200" != "" ]] && echo $CLUSTER_GB200 || echo $DEFAULT_GB200_CLUSTER)
     - |
       ARGS=(
         "--scope $INTEGRATION_TEST_SCOPE"
@@ -88,12 +89,30 @@ integration:configure:
         --platform dgx_h100 \
         --cluster $H100_CLUSTER \
         --output-path "functional-test-job-lts-H100.yaml"
+    - |
+      export PYTHONPATH=$(pwd)
+      python tests/test_utils/python_scripts/generate_jet_trigger_job.py \
+        ${ARGS[@]} \
+        --environment lts \
+        --platform dgx_gb2100 \
+        --cluster $GB200_CLUSTER \
+        --output-path "functional-test-job-lts-GB200.yaml"
+    - |
+      export PYTHONPATH=$(pwd)
+      python tests/test_utils/python_scripts/generate_jet_trigger_job.py \
+        ${ARGS[@]} \
+        --environment lts \
+        --platform dgx_gb200 \
+        --cluster $GB200_CLUSTER \
+        --output-path "functional-test-job-lts-GB200.yaml"
   artifacts:
     paths:
       - functional-test-job-lts-A100.yaml
       - functional-test-job-lts-H100.yaml
       - functional-test-job-dev-H100.yaml
       - functional-test-job-dev-A100.yaml
+      - functional-test-job-lts-GB200.yaml
+      - functional-test-job-dev-GB200.yaml
       - tests/test_utils/local_recipes
 
 .integration_run:
@@ -132,6 +151,12 @@ integration:run_lts_dgx_h100:
     ENVIRONMENT: lts
     CLUSTER: H100
 
+integration:run_lts_dgx_gb200:
+  extends: [.integration_run]
+  variables:
+    ENVIRONMENT: lts
+    CLUSTER: GB200
+
 integration:run_dev_dgx_a100:
   extends: [.integration_run]
   variables:
@@ -143,3 +168,9 @@ integration:run_dev_dgx_h100:
   variables:
     ENVIRONMENT: dev
     CLUSTER: H100
+
+integration:run_dev_dgx_gb200:
+  extends: [.integration_run]
+  variables:
+    ENVIRONMENT: dev
+    CLUSTER: GB200
diff --git a/.gitlab/stages/04.functional-tests.yml b/.gitlab/stages/04.functional-tests.yml
index eee5a9b80fe..d32ff86a344 100644
--- a/.gitlab/stages/04.functional-tests.yml
+++ b/.gitlab/stages/04.functional-tests.yml
@@ -50,6 +50,7 @@ functional:configure:
     - |
       A100_CLUSTER=$([[ "$CLUSTER_A100" != "" ]] && echo $CLUSTER_A100 || echo $DEFAULT_A100_CLUSTER)
       H100_CLUSTER=$([[ "$CLUSTER_H100" != "" ]] && echo $CLUSTER_H100 || echo $DEFAULT_H100_CLUSTER)
+      GB200_CLUSTER=$([[ "$CLUSTER_GB200" != "" ]] && echo $CLUSTER_GB200 || echo $DEFAULT_GB200_CLUSTER)
     - |
       RECORD_CHECKPOINTS=$([[ "$CI_MERGE_REQUEST_LABELS" == *"Record checkpoints"* || "$FUNCTIONAL_TEST_RECORD_CHECKPOINTS" == "yes" ]] && echo "true" || echo "false")
     - |
@@ -113,12 +114,32 @@ functional:configure:
         --cluster $H100_CLUSTER \
         --output-path "functional-test-job-lts-H100.yaml" \
         ${RELEASE_ARGS[@]}
+    - |
+      export PYTHONPATH=$(pwd)
+      python tests/test_utils/python_scripts/generate_jet_trigger_job.py \
+        ${ARGS[@]} \
+        --environment dev \
+        --platform dgx_gb200 \
+        --cluster $GB200_CLUSTER \
+        --output-path "functional-test-job-dev-GB200.yaml" \
+        ${RELEASE_ARGS[@]}
+    - |
+      export PYTHONPATH=$(pwd)
+      python tests/test_utils/python_scripts/generate_jet_trigger_job.py \
+        ${ARGS[@]} \
+        --environment lts \
+        --platform dgx_gb200 \
+        --cluster $GB200_CLUSTER \
+        --output-path "functional-test-job-lts-GB200.yaml" \
+        ${RELEASE_ARGS[@]}
   artifacts:
     paths:
       - functional-test-job-lts-A100.yaml
       - functional-test-job-lts-H100.yaml
       - functional-test-job-dev-A100.yaml
       - functional-test-job-dev-H100.yaml
+      - functional-test-job-lts-GB200.yaml
+      - functional-test-job-dev-GB200.yaml
       - tests/test_utils/local_recipes
 
 .functional_run:
@@ -157,6 +178,12 @@ functional:run_lts_dgx_h100:
     ENVIRONMENT: lts
     CLUSTER: H100
 
+functional:run_lts_dgx_gb200:
+  extends: [.functional_run]
+  variables:
+    ENVIRONMENT: lts
+    CLUSTER: GB200
+
 functional:run_dev_dgx_a100:
   extends: [.functional_run]
   variables:
@@ -169,6 +196,12 @@ functional:run_dev_dgx_h100:
     ENVIRONMENT: dev
     CLUSTER: H100
 
+functional:run_dev_dgx_gb200:
+  extends: [.functional_run]
+  variables:
+    ENVIRONMENT: dev
+    CLUSTER: GB200
+
 functional:run_nemo:
   extends: [.functional_tests_rules]
   trigger:
diff --git a/docker/Dockerfile.ci.dev b/docker/Dockerfile.ci.dev
index d8c1dd33942..4e1a4de55e8 100644
--- a/docker/Dockerfile.ci.dev
+++ b/docker/Dockerfile.ci.dev
@@ -17,10 +17,17 @@ ENV UV_LINK_MODE=copy
 
 RUN bash -ex <<"EOF"
     apt-get update
-    apt-get install -y --no-install-recommends gettext python3-venv psmisc
+    apt-get install -y --no-install-recommends gettext python3-venv psmisc uuid-runtime
     apt-get clean
     python -m venv /opt/jet
-    wget https://github.com/mikefarah/yq/releases/download/v${YQ_VERSION}/yq_linux_amd64 -O /usr/local/bin/yq
+    ARCH=$(uname -m)
+    case "${ARCH}" in \
+      "x86_64") YQ_ARCH=amd64 ;; \
+      "aarch64") YQ_ARCH=arm64 ;; \
+      "armv7l") YQ_ARCH=arm ;; \
+      *) echo "Unsupported architecture: ${ARCH}" && exit 1 ;; \
+    esac 
+    wget https://github.com/mikefarah/yq/releases/download/v${YQ_VERSION}/yq_linux_${YQ_ARCH} -O /usr/local/bin/yq
     chmod a+x /usr/local/bin/yq
     curl -LsSf https://astral.sh/uv/${UV_VERSION}/install.sh | sh
 EOF
diff --git a/megatron/core/datasets/Makefile b/megatron/core/datasets/Makefile
index e745f52399b..16f251bf903 100644
--- a/megatron/core/datasets/Makefile
+++ b/megatron/core/datasets/Makefile
@@ -1,4 +1,4 @@
-CXXFLAGS += -O3 -Wall -shared -std=c++11 -fPIC -fdiagnostics-color
+CXXFLAGS += -O3 -Wall -shared -std=c++17 -fPIC -fdiagnostics-color
 CPPFLAGS += $(shell python3 -m pybind11 --includes)
 
 LIBNAME = helpers_cpp
diff --git a/tests/functional_tests/shell_test_utils/_run_training.sh b/tests/functional_tests/shell_test_utils/_run_training.sh
index 1d0e77a3477..72fd187d19d 100644
--- a/tests/functional_tests/shell_test_utils/_run_training.sh
+++ b/tests/functional_tests/shell_test_utils/_run_training.sh
@@ -159,7 +159,7 @@ MASTER_PORT=${MASTER_PORT:-6000}
 NUM_NODES=${NUM_NODES:-${SLURM_NNODES:-1}}
 GPUS_PER_NODE=${GPUS_PER_NODE:-8}
 NODE_RANK=${SLURM_NODEID:-${SLURM_NODEID:-0}}
-LAST_RANK=7
+LAST_RANK=$((GPUS_PER_NODE - 1)) 
 export LOG_DIR=$OUTPUT_PATH/logs/$REPEAT
 mkdir -p $LOG_DIR
 
@@ -170,7 +170,7 @@ DISTRIBUTED_ARGS=(
     --master_port $MASTER_PORT
     --node_rank $NODE_RANK
     --log-dir $LOG_DIR
-    --tee "0:3,7:3"
+    --tee "0:3,$LAST_RANK:3"
     --redirects "3"
 )
 
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_dev_dgx_gb200.json
new file mode 100644
index 00000000000..f023ed07c99
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_dev_dgx_gb200.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.82558,
+            "2": 10.83322,
+            "3": 10.82737,
+            "4": 10.79588,
+            "5": 10.85708,
+            "6": 10.86392,
+            "7": 10.8269,
+            "8": 10.82588,
+            "9": 10.83699,
+            "10": 10.79719,
+            "11": 10.87851,
+            "12": 10.85797,
+            "13": 10.85368,
+            "14": 10.87548,
+            "15": 10.79177,
+            "16": 10.80301,
+            "17": 10.7745,
+            "18": 10.80399,
+            "19": 10.79365,
+            "20": 10.69588,
+            "21": 10.6855,
+            "22": 10.53152,
+            "23": 10.70658,
+            "24": 10.57319,
+            "25": 10.51545,
+            "26": 10.59076,
+            "27": 10.60738,
+            "28": 10.57025,
+            "29": 10.58904,
+            "30": 10.34674,
+            "31": 10.07736,
+            "32": 10.46317,
+            "33": 10.45705,
+            "34": 10.19923,
+            "35": 10.25593,
+            "36": 10.21246,
+            "37": 10.34689,
+            "38": 10.18008,
+            "39": 10.40796,
+            "40": 10.07602,
+            "41": 10.12935,
+            "42": 10.21132,
+            "43": 9.81692,
+            "44": 9.94027,
+            "45": 9.817,
+            "46": 9.80608,
+            "47": 10.12473,
+            "48": 9.84047,
+            "49": 9.50975,
+            "50": 9.88932
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1691.0,
+            "2": 1553.0,
+            "3": 1673.0,
+            "4": 1760.0,
+            "5": 1852.0,
+            "6": 1861.0,
+            "7": 1852.0,
+            "8": 1755.0,
+            "9": 1952.0,
+            "10": 1427.0,
+            "11": 1857.0,
+            "12": 1820.0,
+            "13": 1948.0,
+            "14": 1828.0,
+            "15": 1913.0,
+            "16": 1881.0,
+            "17": 1770.0,
+            "18": 1683.0,
+            "19": 1784.0,
+            "20": 1714.0,
+            "21": 1969.0,
+            "22": 1701.0,
+            "23": 1972.0,
+            "24": 1545.0,
+            "25": 1537.0,
+            "26": 1650.0,
+            "27": 1770.0,
+            "28": 1889.0,
+            "29": 1946.0,
+            "30": 2031.0,
+            "31": 1511.0,
+            "32": 1848.0,
+            "33": 2009.0,
+            "34": 1749.0,
+            "35": 1978.0,
+            "36": 1926.0,
+            "37": 2358.0,
+            "38": 2036.0,
+            "39": 2202.0,
+            "40": 2015.0,
+            "41": 2184.0,
+            "42": 2304.0,
+            "43": 2079.0,
+            "44": 2042.0,
+            "45": 2082.0,
+            "46": 2206.0,
+            "47": 2417.0,
+            "48": 2284.0,
+            "49": 2231.0,
+            "50": 2430.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 552193536.0,
+            "2": 552193536.0,
+            "3": 552193536.0,
+            "4": 553242112.0,
+            "5": 552193536.0,
+            "6": 553242112.0,
+            "7": 553242112.0,
+            "8": 552193536.0,
+            "9": 552193536.0,
+            "10": 552193536.0,
+            "11": 553242112.0,
+            "12": 552193536.0,
+            "13": 552193536.0,
+            "14": 552193536.0,
+            "15": 552193536.0,
+            "16": 553242112.0,
+            "17": 553242112.0,
+            "18": 552193536.0,
+            "19": 553242112.0,
+            "20": 552193536.0,
+            "21": 552193536.0,
+            "22": 552193536.0,
+            "23": 552193536.0,
+            "24": 552193536.0,
+            "25": 552193536.0,
+            "26": 552193536.0,
+            "27": 552193536.0,
+            "28": 552193536.0,
+            "29": 552193536.0,
+            "30": 552193536.0,
+            "31": 552193536.0,
+            "32": 552193536.0,
+            "33": 552193536.0,
+            "34": 552193536.0,
+            "35": 552193536.0,
+            "36": 552193536.0,
+            "37": 552193536.0,
+            "38": 552193536.0,
+            "39": 552193536.0,
+            "40": 552193536.0,
+            "41": 552193536.0,
+            "42": 552193536.0,
+            "43": 552193536.0,
+            "44": 552193536.0,
+            "45": 553242112.0,
+            "46": 552193536.0,
+            "47": 552193536.0,
+            "48": 552193536.0,
+            "49": 552193536.0,
+            "50": 552193536.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 3798208000.0,
+            "2": 3942086144.0,
+            "3": 3942086144.0,
+            "4": 3942086144.0,
+            "5": 3942086144.0,
+            "6": 3942086144.0,
+            "7": 3942086144.0,
+            "8": 3942086144.0,
+            "9": 3942086144.0,
+            "10": 3942086144.0,
+            "11": 3942086144.0,
+            "12": 3942086144.0,
+            "13": 3942086144.0,
+            "14": 3942086144.0,
+            "15": 3942086144.0,
+            "16": 3942086144.0,
+            "17": 3942086144.0,
+            "18": 3942086144.0,
+            "19": 3942086144.0,
+            "20": 3942086144.0,
+            "21": 3942086144.0,
+            "22": 3942086144.0,
+            "23": 3942086144.0,
+            "24": 3942086144.0,
+            "25": 3942086144.0,
+            "26": 3942086144.0,
+            "27": 3942086144.0,
+            "28": 3942086144.0,
+            "29": 3942086144.0,
+            "30": 3942086144.0,
+            "31": 3942086144.0,
+            "32": 3942086144.0,
+            "33": 3942086144.0,
+            "34": 3942086144.0,
+            "35": 3942086144.0,
+            "36": 3942086144.0,
+            "37": 3942086144.0,
+            "38": 3942086144.0,
+            "39": 3942086144.0,
+            "40": 3942086144.0,
+            "41": 3942086144.0,
+            "42": 3942086144.0,
+            "43": 3942086144.0,
+            "44": 3942086144.0,
+            "45": 3942086144.0,
+            "46": 3942086144.0,
+            "47": 3942086144.0,
+            "48": 3942086144.0,
+            "49": 3942086144.0,
+            "50": 3942086144.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 11.06303,
+            "2": 0.15398,
+            "3": 0.27325,
+            "4": 0.13945,
+            "5": 0.25021,
+            "6": 0.16329,
+            "7": 0.27717,
+            "8": 0.18718,
+            "9": 0.12007,
+            "10": 0.21402,
+            "11": 0.2385,
+            "12": 0.61603,
+            "13": 0.24413,
+            "14": 0.18837,
+            "15": 0.14999,
+            "16": 0.12555,
+            "17": 0.24832,
+            "18": 0.1361,
+            "19": 0.13136,
+            "20": 0.27497,
+            "21": 0.22444,
+            "22": 0.11923,
+            "23": 0.11996,
+            "24": 0.25718,
+            "25": 0.20275,
+            "26": 0.35028,
+            "27": 0.11968,
+            "28": 0.23901,
+            "29": 0.12079,
+            "30": 0.12184,
+            "31": 0.21733,
+            "32": 0.28054,
+            "33": 0.11829,
+            "34": 0.17717,
+            "35": 0.1215,
+            "36": 0.27112,
+            "37": 0.22357,
+            "38": 0.12158,
+            "39": 0.12105,
+            "40": 0.12099,
+            "41": 0.21658,
+            "42": 0.22641,
+            "43": 0.12146,
+            "44": 0.1201,
+            "45": 0.253,
+            "46": 0.12142,
+            "47": 0.23268,
+            "48": 0.13569,
+            "49": 0.1302,
+            "50": 0.24153
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/test_utils/python_scripts/launch_jet_workload.py b/tests/test_utils/python_scripts/launch_jet_workload.py
index 6ecd98a06c1..7f60ceb12d6 100644
--- a/tests/test_utils/python_scripts/launch_jet_workload.py
+++ b/tests/test_utils/python_scripts/launch_jet_workload.py
@@ -8,6 +8,7 @@
 import signal
 import sys
 import time
+import uuid
 import zipfile
 from typing import Dict, List, Optional
 
@@ -111,15 +112,12 @@ def launch_and_wait_for_completion(
                                         "HF_HUB_CACHE": "/lustre/fsw/coreai_dlalgo_mcore/hf_hub",
                                         "TRANSFORMERS_OFFLINE": "1",
                                         "CLUSTER": cluster,
+                                        "RUN_ID": str(uuid.uuid4()),
                                     }
                                 }
                             }
                         }
                     },
-                    "outputs": {
-                        "enabled": True,
-                        "artifacts_storages": [recipe_parser.resolve_artifact_config(cluster)],
-                    },
                 },
                 wait_for_validation=True,
                 max_wait_time=(60 * 60),
diff --git a/tests/test_utils/python_scripts/recipe_parser.py b/tests/test_utils/python_scripts/recipe_parser.py
index b866fbbf5c2..c6e7c5517e8 100644
--- a/tests/test_utils/python_scripts/recipe_parser.py
+++ b/tests/test_utils/python_scripts/recipe_parser.py
@@ -24,6 +24,8 @@ class dotdict(dict):
 def resolve_cluster_config(cluster: str) -> str:
     if cluster == "dgxh100_eos":
         return "eos"
+    if cluster == "dgxgb200_oci-hsg":
+        return "oci-hsg"
     if cluster == "dgxa100_dracooci":
         return "draco-oci-iad"
     if cluster == "dgxa100_dracooci-ord":
@@ -35,18 +37,6 @@ def resolve_cluster_config(cluster: str) -> str:
     raise ValueError(f"Unknown cluster {cluster} provided.")
 
 
-def resolve_artifact_config(cluster: str) -> str:
-    if cluster == "dgxh100_eos":
-        return "eos_lustre"
-    if cluster == "dgxa100_dracooci":
-        return "draco-oci_lustre"
-    if cluster == "dgxa100_dracooci-ord":
-        return "draco-oci-ord_lustre"
-    if cluster == "dgxh100_coreweave":
-        return "coreweave_lustre"
-    raise ValueError(f"Unknown cluster {cluster} provided.")
-
-
 def flatten_products(workload_manifest: dotdict) -> dotdict:
     """Flattens a nested dict of products"""
     expanded_products = []
diff --git a/tests/test_utils/recipes/_build-mcore-dev.yaml b/tests/test_utils/recipes/_build-mcore-dev.yaml
index 123250d7469..d82417ea5e3 100644
--- a/tests/test_utils/recipes/_build-mcore-dev.yaml
+++ b/tests/test_utils/recipes/_build-mcore-dev.yaml
@@ -3,7 +3,7 @@ format_version: 1
 maintainers: [maanug]
 spec:
   name: mcore-pyt-dev
-  platforms: [linux/amd64]
+  platforms: [linux/amd64,linux/arm64]
   source:
     # The image tag will be added via `jet-tests.yaml`
     # Tags are one of {buildcache, $CI_PIPELINE_ID}
diff --git a/tests/test_utils/recipes/_build-mcore-lts.yaml b/tests/test_utils/recipes/_build-mcore-lts.yaml
index d017b71c101..8efa6faa1e5 100644
--- a/tests/test_utils/recipes/_build-mcore-lts.yaml
+++ b/tests/test_utils/recipes/_build-mcore-lts.yaml
@@ -3,7 +3,7 @@ format_version: 1
 maintainers: [maanug]
 spec:
   name: mcore-pyt-lts
-  platforms: [linux/amd64]
+  platforms: [linux/amd64,linux/arm64]
   source:
     # The image tag will be added via `jet-tests.yaml`
     # Tags are one of {buildcache, $CI_PIPELINE_ID}
diff --git a/tests/test_utils/recipes/gpt-gb200.yaml b/tests/test_utils/recipes/gpt-gb200.yaml
new file mode 100644
index 00000000000..c32d141bbf4
--- /dev/null
+++ b/tests/test_utils/recipes/gpt-gb200.yaml
@@ -0,0 +1,73 @@
+type: basic
+format_version: 1
+maintainers: [mcore]
+loggers: [stdout]
+spec:
+  name: "{test_case}_{environment}_{platforms}"
+  model: gpt
+  build: mcore-pyt-{environment}
+  nodes: 2
+  gpus: 4
+  n_repeat: 5
+  platforms: dgx_a100
+  script_setup: |
+    unset https_proxy
+    echo "machine gitlab-master.nvidia.com login okoenig password $RO_API_TOKEN" | tee -a /root/.netrc
+
+    # Checkout latest
+    cd /opt
+    rm -rf /opt/megatron-lm; mkdir megatron-lm; cd megatron-lm
+    git init
+    git remote add origin $MCORE_REPO
+    git fetch origin '+refs/merge-requests/*:refs/remotes/merge-requests/*'
+    git fetch origin $MCORE_MR_COMMIT
+    git checkout $MCORE_MR_COMMIT
+    git rev-parse HEAD
+
+    # Checkout backwards-ref
+    cd /opt
+    rm -rf /opt/megatron-lm-legacy; mkdir megatron-lm-legacy; cd megatron-lm-legacy
+    git init
+    git remote add origin $MCORE_REPO
+    git fetch origin $MCORE_BACKWARDS_COMMIT
+    git checkout $MCORE_BACKWARDS_COMMIT
+    git rev-parse HEAD
+    rm -rf megatron; cp -a /opt/megatron-lm/megatron ./
+  script: |-
+    ls
+    cd /opt/megatron-lm
+
+    NAME=$(echo {test_case}_{environment} | sed 's/dgx_h100/dgx_a100/g')
+    export GPUS_PER_NODE={gpus}
+
+    ARGUMENTS=(
+        "DATA_PATH=/mnt/artifacts"
+        "DATA_CACHE_PATH=/lustre/fsw/coreai_dlalgo_mcore/mcore_ci/data/$RUN_ID/cache/"
+        "OUTPUT_PATH={assets_dir}"
+        "TENSORBOARD_PATH={assets_dir}/tensorboard"
+        "CHECKPOINT_SAVE_PATH={artifacts_dir}/checkpoints"
+        "CHECKPOINT_LOAD_PATH=/mnt/artifacts/"
+        "TRAINING_SCRIPT_PATH=pretrain_gpt.py"
+        "TRAINING_PARAMS_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml"
+        "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_{platforms}.json"
+        "N_REPEAT={n_repeat}"
+        "ENABLE_LIGHTWEIGHT_MODE=${{ENABLE_LIGHTWEIGHT_MODE}}"
+        "RECORD_CHECKPOINTS=${{RECORD_CHECKPOINTS}}"
+    )
+
+    set +x 
+    bash ./tests/functional_tests/shell_test_utils/run_ci_test.sh ${{ARGUMENTS[@]}} 
+    exit_code=$?
+    echo "Exit code: $exit_code"
+    rm -rf /lustre/fsw/coreai_dlalgo_mcore/mcore_ci/data/$RUN_ID || true
+    set -x
+    exit $exit_code
+
+products:
+  - test_case: [gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer]
+    products:
+      - environment: [lts]
+        scope: [mr]
+      - environment: [dev]
+        scope: [mr, mr-github, mr-github-slim]
+        platforms: [dgx_gb200]
diff --git a/tests/test_utils/recipes/gpt.yaml b/tests/test_utils/recipes/gpt.yaml
index f403ac20e3f..eab62026381 100644
--- a/tests/test_utils/recipes/gpt.yaml
+++ b/tests/test_utils/recipes/gpt.yaml
@@ -3,7 +3,7 @@ format_version: 1
 maintainers: [mcore]
 loggers: [stdout]
 spec:
-  name: '{test_case}_{environment}_{platforms}'
+  name: "{test_case}_{environment}_{platforms}"
   model: gpt
   build: mcore-pyt-{environment}
   nodes: 1

From ab3ae8a08cc6a221f91926ac489ee5d911e33ed8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Wed, 14 Jan 2026 18:08:52 +0000
Subject: [PATCH 230/334] ci(hotfix): Repair recipe
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 tests/test_utils/recipes/gpt-gb200.yaml | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tests/test_utils/recipes/gpt-gb200.yaml b/tests/test_utils/recipes/gpt-gb200.yaml
index c32d141bbf4..750017b70a7 100644
--- a/tests/test_utils/recipes/gpt-gb200.yaml
+++ b/tests/test_utils/recipes/gpt-gb200.yaml
@@ -66,8 +66,6 @@ spec:
 products:
   - test_case: [gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer]
     products:
-      - environment: [lts]
-        scope: [mr]
       - environment: [dev]
         scope: [mr, mr-github, mr-github-slim]
         platforms: [dgx_gb200]

From dce8e88e7ad709dc270d16bf4bc84b3b56fe490a Mon Sep 17 00:00:00 2001
From: Juntao Wang <juntaow@nvidia.com>
Date: Thu, 15 Jan 2026 12:06:28 +0800
Subject: [PATCH 231/334] Fix clip_qk for virtual pipeline size > 1 (#2776)

Co-authored-by: Xin Yao <xiny@nvidia.com>
---
 megatron/core/optimizer/qk_clip.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/megatron/core/optimizer/qk_clip.py b/megatron/core/optimizer/qk_clip.py
index 72127f94712..26b5787cd50 100644
--- a/megatron/core/optimizer/qk_clip.py
+++ b/megatron/core/optimizer/qk_clip.py
@@ -22,6 +22,11 @@ def clip_qk(model, log_max_only=False) -> float:
         for model_chunk in model:
             for transformer_layer in model_chunk.module.module.decoder.layers:
                 if hasattr(transformer_layer.self_attention, 'clip_qk'):
+                    if (
+                        transformer_layer.self_attention.core_attention.current_max_attn_logits
+                        is None
+                    ):
+                        continue
                     torch.distributed.all_reduce(
                         transformer_layer.self_attention.core_attention.current_max_attn_logits,
                         op=torch.distributed.ReduceOp.MAX,

From 748ab80ed7bda06a6ec4730ff2eb8e9923153818 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Thu, 15 Jan 2026 08:49:58 +0000
Subject: [PATCH 232/334] ci(hotfix): GB200 to nightly
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 tests/test_utils/recipes/gpt-gb200.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_utils/recipes/gpt-gb200.yaml b/tests/test_utils/recipes/gpt-gb200.yaml
index 750017b70a7..70b89e31a0e 100644
--- a/tests/test_utils/recipes/gpt-gb200.yaml
+++ b/tests/test_utils/recipes/gpt-gb200.yaml
@@ -67,5 +67,5 @@ products:
   - test_case: [gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer]
     products:
       - environment: [dev]
-        scope: [mr, mr-github, mr-github-slim]
+        scope: [nightly]
         platforms: [dgx_gb200]

From a32b1985da4d645ceeabae725ef72c110817b987 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Thu, 15 Jan 2026 16:42:23 +0100
Subject: [PATCH 233/334] ci(fix): GB200 racecondition (#2962)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 tests/functional_tests/shell_test_utils/run_ci_test.sh | 3 ++-
 tests/test_utils/recipes/gpt-gb200.yaml                | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/functional_tests/shell_test_utils/run_ci_test.sh b/tests/functional_tests/shell_test_utils/run_ci_test.sh
index 693970d3b67..00daaea69e2 100644
--- a/tests/functional_tests/shell_test_utils/run_ci_test.sh
+++ b/tests/functional_tests/shell_test_utils/run_ci_test.sh
@@ -69,6 +69,7 @@ mkdir -p $CHECKPOINT_SAVE_PATH
 mkdir -p $CHECKPOINT_LOAD_PATH || true
 _CHECKPOINT_LOAD_PATH=$CHECKPOINT_LOAD_PATH
 _CHECKPOINT_SAVE_PATH=$CHECKPOINT_SAVE_PATH
+_TENSORBOARD_PATH=$TENSORBOARD_PATH
 
 SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)
 ROOT_DIR=$(realpath $SCRIPT_DIR/../../../)
@@ -130,11 +131,11 @@ for i in $(seq 1 $N_REPEAT); do
     if [[ $i -gt 1 ]]; then
         rm -rf $CHECKPOINT_SAVE_PATH/*
         rm -rf /tmp/checkpoints/*
-        rm -rf $TENSORBOARD_PATH/*
     fi
 
     # First run never loads from a checkpoint
     export RUN_NUMBER=1
+    export TENSORBOARD_PATH=$_TENSORBOARD_PATH/$i/
     export REPEAT=$i
     export CHECKPOINT_SAVE_PATH=$_CHECKPOINT_SAVE_PATH
     export TRAINING_EXIT_CODE=0
diff --git a/tests/test_utils/recipes/gpt-gb200.yaml b/tests/test_utils/recipes/gpt-gb200.yaml
index 70b89e31a0e..fd3a8b1605c 100644
--- a/tests/test_utils/recipes/gpt-gb200.yaml
+++ b/tests/test_utils/recipes/gpt-gb200.yaml
@@ -67,5 +67,5 @@ products:
   - test_case: [gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer]
     products:
       - environment: [dev]
-        scope: [nightly]
+        scope: [mr]
         platforms: [dgx_gb200]

From 7c6c4e9b753a78c3ac2e740cb9c715eb599de1e2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Thu, 15 Jan 2026 18:44:21 +0000
Subject: [PATCH 234/334] Revert "ci(fix): GB200 racecondition (#2962)"

This reverts commit a32b1985da4d645ceeabae725ef72c110817b987.
---
 tests/functional_tests/shell_test_utils/run_ci_test.sh | 3 +--
 tests/test_utils/recipes/gpt-gb200.yaml                | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/tests/functional_tests/shell_test_utils/run_ci_test.sh b/tests/functional_tests/shell_test_utils/run_ci_test.sh
index 00daaea69e2..693970d3b67 100644
--- a/tests/functional_tests/shell_test_utils/run_ci_test.sh
+++ b/tests/functional_tests/shell_test_utils/run_ci_test.sh
@@ -69,7 +69,6 @@ mkdir -p $CHECKPOINT_SAVE_PATH
 mkdir -p $CHECKPOINT_LOAD_PATH || true
 _CHECKPOINT_LOAD_PATH=$CHECKPOINT_LOAD_PATH
 _CHECKPOINT_SAVE_PATH=$CHECKPOINT_SAVE_PATH
-_TENSORBOARD_PATH=$TENSORBOARD_PATH
 
 SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)
 ROOT_DIR=$(realpath $SCRIPT_DIR/../../../)
@@ -131,11 +130,11 @@ for i in $(seq 1 $N_REPEAT); do
     if [[ $i -gt 1 ]]; then
         rm -rf $CHECKPOINT_SAVE_PATH/*
         rm -rf /tmp/checkpoints/*
+        rm -rf $TENSORBOARD_PATH/*
     fi
 
     # First run never loads from a checkpoint
     export RUN_NUMBER=1
-    export TENSORBOARD_PATH=$_TENSORBOARD_PATH/$i/
     export REPEAT=$i
     export CHECKPOINT_SAVE_PATH=$_CHECKPOINT_SAVE_PATH
     export TRAINING_EXIT_CODE=0
diff --git a/tests/test_utils/recipes/gpt-gb200.yaml b/tests/test_utils/recipes/gpt-gb200.yaml
index fd3a8b1605c..70b89e31a0e 100644
--- a/tests/test_utils/recipes/gpt-gb200.yaml
+++ b/tests/test_utils/recipes/gpt-gb200.yaml
@@ -67,5 +67,5 @@ products:
   - test_case: [gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [nightly]
         platforms: [dgx_gb200]

From 619115a902a2c74c3e9f200bdbbaadf10723952f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Fri, 16 Jan 2026 01:20:07 +0100
Subject: [PATCH 235/334] ci: Fix GB200 change (#2969) (#2974)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 tests/functional_tests/shell_test_utils/run_ci_test.sh | 5 +++++
 tests/test_utils/recipes/gpt.yaml                      | 8 ++++----
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/tests/functional_tests/shell_test_utils/run_ci_test.sh b/tests/functional_tests/shell_test_utils/run_ci_test.sh
index 693970d3b67..20267536a0f 100644
--- a/tests/functional_tests/shell_test_utils/run_ci_test.sh
+++ b/tests/functional_tests/shell_test_utils/run_ci_test.sh
@@ -69,6 +69,7 @@ mkdir -p $CHECKPOINT_SAVE_PATH
 mkdir -p $CHECKPOINT_LOAD_PATH || true
 _CHECKPOINT_LOAD_PATH=$CHECKPOINT_LOAD_PATH
 _CHECKPOINT_SAVE_PATH=$CHECKPOINT_SAVE_PATH
+_TENSORBOARD_PATH=$TENSORBOARD_PATH
 
 SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)
 ROOT_DIR=$(realpath $SCRIPT_DIR/../../../)
@@ -135,6 +136,10 @@ for i in $(seq 1 $N_REPEAT); do
 
     # First run never loads from a checkpoint
     export RUN_NUMBER=1
+    DIR=$(dirname "$_TENSORBOARD_PATH")
+    FILE=$(basename "$_TENSORBOARD_PATH")
+    export TENSORBOARD_PATH=$DIR/$i/$FILE
+    mkdir -p $(dirname $TENSORBOARD_PATH)
     export REPEAT=$i
     export CHECKPOINT_SAVE_PATH=$_CHECKPOINT_SAVE_PATH
     export TRAINING_EXIT_CODE=0
diff --git a/tests/test_utils/recipes/gpt.yaml b/tests/test_utils/recipes/gpt.yaml
index eab62026381..90eddc55c27 100644
--- a/tests/test_utils/recipes/gpt.yaml
+++ b/tests/test_utils/recipes/gpt.yaml
@@ -462,7 +462,7 @@ products:
   - test_case: [gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer]
     products:
       - environment: [lts]
-        scope: [mr]
+        scope: [nightly]
       - environment: [dev]
         scope: [mr, mr-github, mr-github-slim]
         platforms: [dgx_h100]
@@ -472,11 +472,11 @@ products:
         scope: [mr]
         platforms: [dgx_h100]
       - environment: [lts]
-        scope: [mr]
+        scope: [nightly]
   - test_case: [gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather]
     products:
       - environment: [lts]
-        scope: [mr]
+        scope: [nightly]
       - environment: [dev]
         scope: [mr, mr-github, mr-github-slim]
         platforms: [dgx_h100]
@@ -486,7 +486,7 @@ products:
         scope: [mr]
         platforms: [dgx_h100]
       - environment: [lts]
-        scope: [mr]
+        scope: [nightly]
   # - test_case: [gpt3_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone]
   #   products:
   #     - environment: [dev]

From b3950164bcf3294f03a0f315d4274b98e7b97adf Mon Sep 17 00:00:00 2001
From: Robin Zhang <robinz@nvidia.com>
Date: Fri, 16 Jan 2026 09:38:58 +0800
Subject: [PATCH 236/334] [Dev] TE cudagraph recompute (#2694)

Signed-off-by: Robin Zhang <robinz@nvidia.com>
Co-authored-by: Xin Yao <xiny@nvidia.com>
---
 .../core/models/gpt/fine_grained_callables.py |   6 +-
 megatron/core/tensor_parallel/random.py       |   5 +
 megatron/core/transformer/cuda_graphs.py      |   6 +-
 megatron/core/transformer/moe/moe_layer.py    |  15 +-
 megatron/core/transformer/moe/moe_utils.py    |  68 ++++-----
 .../core/transformer/transformer_config.py    | 104 ++++++--------
 .../core/transformer/transformer_layer.py     | 132 +++++++++++-------
 megatron/training/arguments.py                |   3 -
 8 files changed, 172 insertions(+), 167 deletions(-)

diff --git a/megatron/core/models/gpt/fine_grained_callables.py b/megatron/core/models/gpt/fine_grained_callables.py
index b4879cd1e13..71c5c19749c 100644
--- a/megatron/core/models/gpt/fine_grained_callables.py
+++ b/megatron/core/models/gpt/fine_grained_callables.py
@@ -466,7 +466,7 @@ def forward_func(
 
                 shared_expert_output = layer.mlp.shared_experts_compute(pre_mlp_layernorm_output)
                 probs, routing_map = layer.mlp.route(pre_mlp_layernorm_output)
-                local_tokens, probs, _ = layer.mlp.preprocess(
+                local_tokens, probs = layer.mlp.preprocess(
                     pre_mlp_layernorm_output, probs, routing_map
                 )
                 return hidden_states, local_tokens, probs, shared_expert_output
@@ -519,9 +519,7 @@ def submodule_moe_forward(node: ScheduleNode, dispatched_tokens: torch.Tensor):
             # backward graph from connecting to dispatch submodule
             token_dispatcher._comm_manager.dispatched_probs = dispatched_probs
 
-        expert_output, _ = layer.mlp.routed_experts_compute(
-            dispatched_tokens, dispatched_probs, None
-        )
+        expert_output, _ = layer.mlp.routed_experts_compute(dispatched_tokens, dispatched_probs)
 
         if layer.recompute_pre_mlp_layernorm:
             # discard the output of the pre-mlp layernorm and register the recompute
diff --git a/megatron/core/tensor_parallel/random.py b/megatron/core/tensor_parallel/random.py
index 617d2803c12..5d5389a52d2 100644
--- a/megatron/core/tensor_parallel/random.py
+++ b/megatron/core/tensor_parallel/random.py
@@ -627,6 +627,11 @@ def checkpoint(self, run_function, *args):
 
     def _recompute(self, _):
         """Used as a hook to recompute the output."""
+
+        if self.ctx is None:
+            # The recomputation has been triggered already. Just return.
+            return
+
         if not torch.autograd._is_checkpoint_valid():
             raise RuntimeError(
                 "Checkpointing is not compatible with .grad(), "
diff --git a/megatron/core/transformer/cuda_graphs.py b/megatron/core/transformer/cuda_graphs.py
index b566c1830dc..ec02555233b 100644
--- a/megatron/core/transformer/cuda_graphs.py
+++ b/megatron/core/transformer/cuda_graphs.py
@@ -1835,7 +1835,11 @@ def _get_cuda_graph_input_data(self):
         sample_args, sample_kwargs = self._get_sample_arguments(order, chunk_id_list)
 
         def get_make_graphed_callables_kwargs():
-            kwargs = {'allow_unused_input': True, '_order': order}
+            kwargs = {
+                'allow_unused_input': True,
+                '_order': order,
+                'retain_graph_in_backward': self.config.cuda_graph_retain_backward_graph,
+            }
 
             # Calculate the number of warmup iterations per layer per microbatch inside TE
             # make_graphed_callables(). There are two rules:
diff --git a/megatron/core/transformer/moe/moe_layer.py b/megatron/core/transformer/moe/moe_layer.py
index e44d8647bd6..e17cebcf1f9 100644
--- a/megatron/core/transformer/moe/moe_layer.py
+++ b/megatron/core/transformer/moe/moe_layer.py
@@ -24,6 +24,7 @@
 )
 from megatron.core.transformer.spec_utils import ModuleSpec, build_module
 from megatron.core.transformer.transformer_config import TransformerConfig
+from megatron.core.utils import internal_api
 
 try:
     import transformer_engine as te  # pylint: disable=unused-import
@@ -222,9 +223,8 @@ def preprocess(
         """Preprocess token routing for dispatch.
 
         This method preprocesses the hidden states and routing probabilities for the token
-        dispatcher. The original hidden states are returned as a residual connection.
+        dispatcher.
         """
-        residual = hidden_states
         # Project the hidden_states from hidden dimension down to latent dimenion.
         if self.config.moe_latent_size:
             assert (
@@ -234,7 +234,7 @@ def preprocess(
         hidden_states, probs = self.token_dispatcher.dispatch_preprocess(
             hidden_states, routing_map, probs
         )
-        return hidden_states, probs, residual
+        return hidden_states, probs
 
     def dispatch(self, hidden_states: torch.Tensor, probs: torch.Tensor):
         """Dispatches tokens to assigned expert ranks via communication.
@@ -273,9 +273,8 @@ def shared_experts_compute(self, hidden_states: torch.Tensor):
 
         return shared_expert_output
 
-    def routed_experts_compute(
-        self, hidden_states: torch.Tensor, probs: torch.Tensor, residual: torch.Tensor
-    ):
+    @internal_api
+    def routed_experts_compute(self, hidden_states: torch.Tensor, probs: torch.Tensor):
         """Computes the output of the routed experts on the dispatched tokens.
 
         This method first post-processes the dispatched input to get permuted tokens
@@ -342,7 +341,7 @@ def custom_forward(hidden_states, padding_mask=None):
             try:
                 shared_expert_output = self.shared_experts_compute(hidden_states)
                 probs, routing_map = self.route(hidden_states, padding_mask=padding_mask)
-                hidden_states, probs, residual = self.preprocess(hidden_states, probs, routing_map)
+                hidden_states, probs = self.preprocess(hidden_states, probs, routing_map)
             except MoECudaGraphPartialCaptureSignal as e:
                 # This signal is raised from the maybe_skip_or_early_return_by_cudagraph decorator.
                 # It means we should early-return from the MoE layer forward pass.
@@ -352,7 +351,7 @@ def custom_forward(hidden_states, padding_mask=None):
                 return e.get_early_return_outputs(hidden_states, shared_expert_output)
 
             dispatched_input, probs = self.dispatch(hidden_states, probs)
-            output, mlp_bias = self.routed_experts_compute(dispatched_input, probs, residual)
+            output, mlp_bias = self.routed_experts_compute(dispatched_input, probs)
             assert mlp_bias is None, f"mlp_bias is not supported for {type(self.token_dispatcher)}"
             output = self.combine(output, shared_expert_output)
 
diff --git a/megatron/core/transformer/moe/moe_utils.py b/megatron/core/transformer/moe/moe_utils.py
index d915cfabb26..d38b06b2704 100644
--- a/megatron/core/transformer/moe/moe_utils.py
+++ b/megatron/core/transformer/moe/moe_utils.py
@@ -1,4 +1,5 @@
 # Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+import functools
 import math
 from dataclasses import dataclass
 from typing import List, Optional, Union
@@ -1142,17 +1143,24 @@ def get_early_return_outputs(
         """
         Get the CUDA graph early return outputs for the MoE layer, including the intermediate
         tensors and the intermediate attributes of the token dispatcher.
+
+        The returned output tensors are in the order of:
+        - routed experts path outputs
+          - hidden states, probs, and routing map for capturing router
+          - hidden states and probs for capturing router and preprocess
+        - intermediate attributes of the token dispatcher (if capturing the preprocess step)
+        - shared expert path output (if exists)
         """
         if self.return_step == "route":
             # Capturing the router step returns three intermediate tensors:
             # hidden states, routing probabilities, and routing map.
             outputs = [hidden_states, self.kwargs['probs'], self.kwargs['routing_map']]
         elif self.return_step == "preprocess":
-            # Capturing the preprocess step returns three intermediate tensors:
-            # hidden states, routing probabilities, and residual connection.
+            # Capturing the preprocess step returns two intermediate tensors:
+            # hidden states and routing probabilities.
             # It also returns the intermediate attributes of the token dispatcher, recorded in
             # "token_dispatcher.cudagraph_attrs".
-            outputs = [self.kwargs['hidden_states'], self.kwargs['probs'], self.kwargs['residual']]
+            outputs = [self.kwargs['hidden_states'], self.kwargs['probs']]
             valid_cudagraph_attrs = []
             for attr_name in self.moe_layer.token_dispatcher.cudagraph_attrs:
                 hier_attr_name = attr_name.split('.')
@@ -1180,6 +1188,7 @@ def get_early_return_outputs(
         return outputs
 
 
+@internal_api
 @dataclass
 class MoECudaGraphTensorStore:
     """Storage for tensors used in CUDA graph replay for MoE layers.
@@ -1192,8 +1201,6 @@ class MoECudaGraphTensorStore:
         probs (Optional[torch.Tensor]): The routing probabilities for each token-expert pair.
         routing_map (Optional[torch.Tensor]): The sparse mapping indicating which experts
             were selected for each token. Used to skip the normal router step.
-        residual (Optional[torch.Tensor]): The residual connection tensor before routing.
-            Used to skip the normal preprocess step.
         shared_expert_output (Optional[torch.Tensor]): The output from shared experts
             computation. Used to skip the normal shared expert computation step.
     """
@@ -1201,7 +1208,6 @@ class MoECudaGraphTensorStore:
     hidden_states: Optional[torch.Tensor] = None
     probs: Optional[torch.Tensor] = None
     routing_map: Optional[torch.Tensor] = None
-    residual: Optional[torch.Tensor] = None
     shared_expert_output: Optional[torch.Tensor] = None
 
     def is_empty(self) -> bool:
@@ -1212,13 +1218,7 @@ def is_empty(self) -> bool:
         """
         return all(
             getattr(self, field_name) is None
-            for field_name in [
-                'hidden_states',
-                'probs',
-                'routing_map',
-                'residual',
-                'shared_expert_output',
-            ]
+            for field_name in ['hidden_states', 'probs', 'routing_map', 'shared_expert_output']
         )
 
     def set(self, **kwargs):
@@ -1228,7 +1228,6 @@ def set(self, **kwargs):
                 'hidden_states',
                 'probs',
                 'routing_map',
-                'residual',
                 'shared_expert_output',
             ], f"Invalid field name: {field_name}"
             if value is not None:
@@ -1239,13 +1238,7 @@ def set(self, **kwargs):
 
     def clear(self):
         """Reset all stored tensors to None."""
-        for field_name in [
-            'hidden_states',
-            'probs',
-            'routing_map',
-            'residual',
-            'shared_expert_output',
-        ]:
+        for field_name in ['hidden_states', 'probs', 'routing_map', 'shared_expert_output']:
             setattr(self, field_name, None)
 
 
@@ -1288,6 +1281,8 @@ def maybe_raise_signal(moe_layer, **kwargs):
                 raise MoECudaGraphPartialCaptureSignal(moe_layer, "preprocess", **kwargs)
 
     def decorator(func):
+
+        @functools.wraps(func)
         def wrapped_func(moe_layer, *args, **kwargs):
             """
             Check if we should skip executing the original function based on the current
@@ -1316,46 +1311,39 @@ def wrapped_func(moe_layer, *args, **kwargs):
                     # Don't skip the router.
                     assert (
                         moe_layer.cudagraph_tensor_store.routing_map is None
-                        and moe_layer.cudagraph_tensor_store.residual is None
-                    ), "both routing_map and residual must be None if probs is None"
+                    ), "routing_map must be None if probs is None"
                     probs, routing_map = func(moe_layer, *args, **kwargs)
 
                     # Maybe early return after the router.
                     maybe_raise_signal(moe_layer, probs=probs, routing_map=routing_map)
                 else:
                     # Skip the router and get value from store.
-                    assert (
-                        moe_layer.cudagraph_tensor_store.routing_map is not None
-                        or moe_layer.cudagraph_tensor_store.residual is not None
-                    ), "either routing_map or residual must be given if probs is given"
                     probs, routing_map = (
                         moe_layer.cudagraph_tensor_store.probs,
                         moe_layer.cudagraph_tensor_store.routing_map,
                     )
                 return probs, routing_map
             elif step_condition == "preprocess":
-                if moe_layer.cudagraph_tensor_store.residual is None:
+                if (
+                    moe_layer.cudagraph_tensor_store.is_empty()
+                    or moe_layer.cudagraph_tensor_store.routing_map is not None
+                ):
                     # Don't skip the preprocess.
-                    hidden_states, probs, residual = func(moe_layer, *args, **kwargs)
+                    hidden_states, probs = func(moe_layer, *args, **kwargs)
 
                     # Maybe early return after the preprocess.
-                    maybe_raise_signal(
-                        moe_layer, hidden_states=hidden_states, probs=probs, residual=residual
-                    )
+                    maybe_raise_signal(moe_layer, hidden_states=hidden_states, probs=probs)
                 else:
                     # Skip the preprocess and get value from store.
                     assert (
-                        moe_layer.cudagraph_tensor_store.probs is not None
-                    ), "probs must not be None if residual is not None"
-                    assert (
-                        moe_layer.cudagraph_tensor_store.routing_map is None
-                    ), "routing_map must be None if residual is not None"
-                    hidden_states, probs, residual = (
+                        moe_layer.cudagraph_tensor_store.hidden_states is not None
+                        and moe_layer.cudagraph_tensor_store.probs is not None
+                    ), "hidden_states and probs must be given in moe_preprocess cudagraph replay"
+                    hidden_states, probs = (
                         moe_layer.cudagraph_tensor_store.hidden_states,
                         moe_layer.cudagraph_tensor_store.probs,
-                        moe_layer.cudagraph_tensor_store.residual,
                     )
-                return hidden_states, probs, residual
+                return hidden_states, probs
 
         return wrapped_func
 
diff --git a/megatron/core/transformer/transformer_config.py b/megatron/core/transformer/transformer_config.py
index 3a57f09f6cf..df11daeb095 100644
--- a/megatron/core/transformer/transformer_config.py
+++ b/megatron/core/transformer/transformer_config.py
@@ -723,11 +723,11 @@ class TransformerConfig(ModelParallelConfig):
     determines the scope of graph capture."""
 
     cuda_graph_use_single_mempool: bool = False
-    """When set to true, cudagraphs will be captured inside a single mempool, in which all
-    cudagraphs may only be used once per step. If false, cudagraphs may be reused across
-    microbatches. Enabling may reduce cudagraph memory overheads due to memory fragmentation,
-    however may greatly increase the number of cudagraphs created when the number of microbatches
-    is high."""
+    """[For `local` implementation only] When set to true, cudagraphs will be captured inside a
+    single mempool, in which all cudagraphs may only be used once per step. If false, cudagraphs may
+    be reused across microbatches. Enabling may reduce cudagraph memory overheads due to memory
+    fragmentation, however may greatly increase the number of cudagraphs created when the number of
+    microbatches is high."""
 
     cuda_graph_retain_backward_graph: bool = False
     """When set to true, cudagraph backward passes will be graph captured with 'retain_grad=True'
@@ -1739,64 +1739,46 @@ def __post_init__(self):
                             )
 
             if self.recompute_granularity:
-                if self.recompute_granularity != "selective" or not self.cuda_graph_scope:
-                    raise ValueError(
-                        "Full-layer CUDA graphs not supported with activation recomputation."
-                    )
-                elif self.cuda_graph_scope != [CudaGraphScope.full_iteration]:
-                    # For scoped CUDA graphs, only the non-graphed parts of the layer can be
-                    # recomputed. So check if there are overlaps between the recomputed parts
-                    # and the graphed parts.
-                    if CudaGraphScope.attn in self.cuda_graph_scope:
-                        for module in self.recompute_modules:
-                            if module in ['core_attn', 'mla_up_proj']:
-                                raise ValueError(
-                                    f'attn cuda graph is not supported with {module} recompute.'
-                                )
+                if self.recompute_granularity != "selective":
+                    assert self.cuda_graph_scope == [
+                        CudaGraphScope.full_iteration
+                    ], "full recompute is only supported with full iteration CUDA graph."
+                else:
+                    # The recompute module should be inside or outside of the graph scope.
+                    # Recompute module coverring graph scope is not allowed.
+                    if "moe" in self.recompute_modules:
+                        assert (
+                            CudaGraphScope.moe_router not in self.cuda_graph_scope
+                        ), "moe recompute is not supported with moe_router CUDA graph."
+                    # Graphed recompute module doesn't accept random number.
                     if (
-                        CudaGraphScope.mlp in self.cuda_graph_scope
-                        and "mlp" in self.recompute_modules
+                        not self.cuda_graph_scope
+                        or CudaGraphScope.full_iteration in self.cuda_graph_scope
                     ):
-                        raise ValueError(f'mlp cuda graph is not supported with mlp recompute.')
-                    if CudaGraphScope.moe in self.cuda_graph_scope:
-                        for module in self.recompute_modules:
-                            if module in ['moe_act', 'moe', 'shared_experts']:
-                                raise ValueError(
-                                    f'moe cuda graph is not supported with {module} recompute.'
-                                )
-                    if CudaGraphScope.moe_router in self.cuda_graph_scope:
-                        for module in self.recompute_modules:
-                            if module in ['moe', 'shared_experts']:
-                                raise ValueError(
-                                    f'moe_router cuda graph is not supported with {module} '
-                                    'recompute.'
-                                )
-                    if "layernorm" in self.recompute_modules:
-                        if (
-                            CudaGraphScope.attn in self.cuda_graph_scope
-                            and CudaGraphScope.mlp in self.cuda_graph_scope
-                            and (
-                                CudaGraphScope.moe in self.cuda_graph_scope
-                                or CudaGraphScope.moe_router in self.cuda_graph_scope
-                            )
-                        ):
-                            raise ValueError(
-                                'cuda graph is not supported with layernorm recompute.'
-                            )
-                        if CudaGraphScope.attn in self.cuda_graph_scope:
-                            warnings.warn(
-                                "input_layernorm recompute is not supported with attention "
-                                "cudagraph. Will only recompute the pre_mlp_layernorm."
-                            )
-                        if (
-                            CudaGraphScope.mlp in self.cuda_graph_scope
-                            or CudaGraphScope.moe in self.cuda_graph_scope
-                            or CudaGraphScope.moe_router in self.cuda_graph_scope
-                        ):
-                            warnings.warn(
-                                "pre_mlp_layernorm recompute is not supported with mlp/moe "
-                                "cudagraph. Will only recompute the input_layernorm."
-                            )
+                        full_cudagraph = True
+                    else:
+                        full_cudagraph = False
+                    if self.attention_dropout != 0.0:
+                        assert (
+                            not full_cudagraph and CudaGraphScope.attn not in self.cuda_graph_scope
+                        ) or "core_attn" not in self.recompute_modules, (
+                            "attention dropout is not supported with graphed attention "
+                            "recomputation."
+                        )
+                    if self.hidden_dropout != 0.0:
+                        assert (
+                            (not full_cudagraph and CudaGraphScope.mlp not in self.cuda_graph_scope)
+                            or "mlp" not in self.recompute_modules
+                        ) and (
+                            (not full_cudagraph and CudaGraphScope.moe not in self.cuda_graph_scope)
+                            or "moe" not in self.recompute_modules
+                        ), "hidden dropout is not supported with graphed MLP/MoE recomputation."
+                    if self.moe_input_jitter_eps is not None:
+                        assert (
+                            not full_cudagraph and CudaGraphScope.moe not in self.cuda_graph_scope
+                        ) or "moe" not in self.recompute_modules, (
+                            "moe_input_jitter_eps is not supported with graphed moe recomputation."
+                        )
 
         if self.moe_token_dispatcher_type in ["allgather"]:
             if self.variable_seq_lengths is True:
diff --git a/megatron/core/transformer/transformer_layer.py b/megatron/core/transformer/transformer_layer.py
index 53a1470c492..ce90aaf357a 100644
--- a/megatron/core/transformer/transformer_layer.py
+++ b/megatron/core/transformer/transformer_layer.py
@@ -381,24 +381,55 @@ def __init__(
         self.recompute_mlp = False
         if self.config.recompute_granularity == 'selective':
             if "layernorm" in self.config.recompute_modules:
-                if not isinstance(self.input_layernorm, IdentityOp) and (
-                    self.config.cuda_graph_impl == "none"
-                    or CudaGraphScope.attn not in self.config.cuda_graph_scope
-                ):
+                if not isinstance(self.input_layernorm, IdentityOp):
                     self.recompute_input_layernorm = True
                     if self.config.fp8 or self.config.fp4:
                         self.self_attention.set_for_recompute_input_layernorm()
-                if not isinstance(self.pre_mlp_layernorm, IdentityOp) and (
-                    self.config.cuda_graph_impl == "none"
-                    or (
+
+                def can_recompute_pre_mlp_layernorm_for_cudagraph():
+                    if (
                         not self.is_moe_layer
-                        and CudaGraphScope.mlp not in self.config.cuda_graph_scope
-                    )
-                    or (
-                        self.is_moe_layer
-                        and CudaGraphScope.moe not in self.config.cuda_graph_scope
-                        and CudaGraphScope.moe_router not in self.config.cuda_graph_scope
+                        or CudaGraphScope.moe_router not in self.config.cuda_graph_scope
+                    ):
+                        # Not a MoE layer, or not capturing the router part.
+                        return True
+                    if (
+                        self.config.moe_shared_expert_intermediate_size is not None
+                        and self.config.moe_shared_expert_overlap
+                    ):
+                        # If shared expert overlap is used, we cannot make the pre-mlp layernorm
+                        # recomputation, because the shared expert takes the layernorm output as
+                        # input, and it is outside of the CUDA graph scope.
+                        log_single_rank(
+                            logger,
+                            logging.WARNING,
+                            "pre_mlp_layernorm recompute is not supported with moe router "
+                            "cudagraph + shared expert overlap. Disabling pre_mlp_layernorm "
+                            "recompute.",
+                        )
+                        return False
+                    if CudaGraphScope.moe_preprocess in self.config.cuda_graph_scope and (
+                        self.config.moe_token_dispatcher_type == "alltoall"
+                        or self.config.moe_latent_size
+                    ):
+                        # Only when capturing the preprocess part and using alltoall token
+                        # dispatcher or latent MoE can we make the pre-mlp layernorm recomputation.
+                        # Because in other cases the layernorm output returns directly as one of the
+                        # outputs of the cudagraph, which will be allocated a static buffer, thus
+                        # not able to be released.
+                        return True
+                    log_single_rank(
+                        logger,
+                        logging.WARNING,
+                        "pre_mlp_layernorm recompute is only supported with moe router + "
+                        "preprocess cudagraph will alltoall token dispatcher or latent MoE. "
+                        "Disabling pre_mlp_layernorm recompute.",
                     )
+                    return False
+
+                if (
+                    not isinstance(self.pre_mlp_layernorm, IdentityOp)
+                    and can_recompute_pre_mlp_layernorm_for_cudagraph()
                 ):
                     self.recompute_pre_mlp_layernorm = True
                     if self.config.fp8 or self.config.fp4:
@@ -645,20 +676,7 @@ def _forward_mlp(self, hidden_states, inference_context=None, padding_mask=None)
             and not isinstance(self.mlp, IdentityOp)
         )
 
-        if (
-            self.is_moe_layer
-            and self.config.cuda_graph_impl == "transformer_engine"
-            and self.training
-            and is_graph_capturing()
-            and CudaGraphScope.moe_router in self.config.cuda_graph_scope
-        ):
-            assert (
-                not self.recompute_pre_mlp_layernorm
-            ), "Recomputation is not supported for CUDA graph."
-            cudagraph_outputs = self.mlp(pre_mlp_layernorm_output, padding_mask=padding_mask)
-            nvtx_range_pop(suffix="mlp")
-            return cudagraph_outputs + [residual]
-        elif self.recompute_mlp:
+        if self.recompute_mlp:
             if self.config.fp8 or self.config.fp4:
                 # import here to avoid circular import
                 from megatron.core.extensions.transformer_engine import te_checkpoint
@@ -701,7 +719,23 @@ def _forward_mlp(self, hidden_states, inference_context=None, padding_mask=None)
             )
         nvtx_range_pop(suffix="mlp")
 
-        return self._forward_post_mlp(mlp_output_with_bias, residual)
+        if (
+            self.is_moe_layer
+            and self.config.cuda_graph_impl == "transformer_engine"
+            and self.training
+            and is_graph_capturing()
+            and CudaGraphScope.moe_router in self.config.cuda_graph_scope
+        ):
+            if self.recompute_pre_mlp_layernorm:
+                # Register the recompute hooks to all the cudagraph output tensors, because some
+                # tensors are in parallel execution paths and they all need pre_mlp_layernorm to be
+                # recomputed in backward pass. For example, the router path and the shared expert
+                # path. So only register in one path is risky.
+                for tensor in mlp_output_with_bias[1:]:
+                    self.pre_mlp_norm_checkpoint.discard_output_and_register_recompute(tensor)
+            return list(mlp_output_with_bias) + [residual]
+        else:
+            return self._forward_post_mlp(mlp_output_with_bias, residual)
 
     def _forward_post_mlp(self, mlp_output_with_bias, residual):
         """
@@ -895,20 +929,19 @@ def _te_cuda_graph_replay(self, *args, **kwargs):
         elif self.is_moe_layer and CudaGraphScope.moe_router in self.config.cuda_graph_scope:
             # CUDA Graph partially captures the MoE.
             # The rest of the layer should go to the normal pass.
-            shared_expert_output, routing_map, residual = None, None, None
-            mlp_residual = cuda_graph_output.pop()
+            shared_expert_output, routing_map = None, None
+            # residual is the last element in the CUDA graph output.
+            residual = cuda_graph_output.pop()
             if (
                 self.config.moe_shared_expert_intermediate_size is not None
                 and not self.config.moe_shared_expert_overlap
             ):
-                # The shared expert output is the fourth element in the CUDA graph output.
+                # The shared expert output is the last second element in the CUDA graph output.
                 shared_expert_output = cuda_graph_output.pop()
 
-            # Split cudagraph outputs into function outputs and attribute outputs, and
-            # process them separately. Function outputs should have three tensors.
-            func_output, attr_outputs = cuda_graph_output[:3], cuda_graph_output[3:]
             if CudaGraphScope.moe_preprocess in self.config.cuda_graph_scope:
-                hidden_states, probs, residual = func_output
+                # CUDA graph output is [hidden_states, probs] + attributes outputs.
+                (hidden_states, probs), attr_outputs = cuda_graph_output[:2], cuda_graph_output[2:]
                 valid_cudagraph_attrs = self.mlp.token_dispatcher.valid_cudagraph_attrs
                 assert len(attr_outputs) == len(
                     valid_cudagraph_attrs
@@ -920,8 +953,12 @@ def _te_cuda_graph_replay(self, *args, **kwargs):
                         attr = getattr(attr, name)
                     setattr(attr, hier_attr_name[-1], attr_outputs[i])
             else:
-                hidden_states, probs, routing_map = func_output
-                assert not attr_outputs, "cuda_graph_attr_outputs should be empty"
+                # CUDA graph output is [hidden_states, probs, routing_map].
+                assert len(cuda_graph_output) == 3, (
+                    "CUDA graph output should be [hidden_states, probs, routing_map], "
+                    f"but got {len(cuda_graph_output)} elements"
+                )
+                hidden_states, probs, routing_map = cuda_graph_output
 
             # Resume the MoELayer forward pass from the end of the CUDA graph scope.
             # The MoE layer will skip redundant computations when we pass in the calculated values
@@ -931,37 +968,32 @@ def _te_cuda_graph_replay(self, *args, **kwargs):
                 hidden_states=hidden_states,
                 probs=probs,
                 routing_map=routing_map,
-                residual=residual,
                 shared_expert_output=shared_expert_output,
             )
             # If EP overlap is enabled, remaining of mlp will be called as fine_grained_callables
             # and should be skipped here.
             if self.config.overlap_moe_expert_parallel_comm:
                 probs, routing_map = self.mlp.route(hidden_states)
-                hidden_states, probs, residual = self.mlp.preprocess(
-                    hidden_states, probs, routing_map
-                )
+                hidden_states, probs = self.mlp.preprocess(hidden_states, probs, routing_map)
                 nvtx_range_pop(suffix="mlp")
-                return mlp_residual, hidden_states, probs, shared_expert_output
+                return residual, hidden_states, probs, shared_expert_output
             mlp_output_with_bias = self.mlp(hidden_states)
             self.mlp.cudagraph_tensor_store.clear()
             nvtx_range_pop(suffix="mlp")
 
-            output = self._forward_post_mlp(mlp_output_with_bias, mlp_residual)
+            output = self._forward_post_mlp(mlp_output_with_bias, residual)
         else:
             # If EP overlap is enabled, needs to return same outputs as submodule.attn
             if self.config.overlap_moe_expert_parallel_comm:
                 assert len(cuda_graph_output) == 1, "CUDA Graph output should be the layer output."
-                mlp_residual = cuda_graph_output.pop()
+                residual = cuda_graph_output.pop()
                 if not self.is_moe_layer:
-                    return mlp_residual, None, None, None
-                hidden_states = self.pre_mlp_layernorm(mlp_residual)
+                    return residual, None, None, None
+                hidden_states = self.pre_mlp_layernorm(residual)
                 shared_expert_output = self.mlp.shared_experts_compute(hidden_states)
                 probs, routing_map = self.mlp.route(hidden_states)
-                hidden_states, probs, residual = self.mlp.preprocess(
-                    hidden_states, probs, routing_map
-                )
-                return mlp_residual, hidden_states, probs, shared_expert_output
+                hidden_states, probs = self.mlp.preprocess(hidden_states, probs, routing_map)
+                return residual, hidden_states, probs, shared_expert_output
 
             # CUDA Graph does not capture the MLP/MoE part at all.
             output = self._forward_mlp(*cuda_graph_output)
diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py
index 9aba3a7cb8e..5f9e7350c18 100644
--- a/megatron/training/arguments.py
+++ b/megatron/training/arguments.py
@@ -1322,9 +1322,6 @@ def validate_args(args, defaults={}):
             "Setting NCCL_GRAPH_REGISTER=0 to avoid illegal memory access when using "
             "CUDA Graph with PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True."
         )
-        assert (
-            args.recompute_granularity != 'full'
-        ), 'recompute_granularity must not be full when CUDA Graphs are enabled.'
     if args.cuda_graph_scope == "full" or (
         isinstance(args.cuda_graph_scope, list) and "full" in args.cuda_graph_scope
     ):

From b927e1fa1a90e218c64129280531d34377a66c72 Mon Sep 17 00:00:00 2001
From: xuwchen <79835960+xuwchen@users.noreply.github.com>
Date: Fri, 16 Jan 2026 14:55:03 +0800
Subject: [PATCH 237/334] [Dev] docs(megatron-fsdp): add Megatron-FSDP user
 guide (#2397)

---
 docs/api-guide/custom_fsdp.md                 |   2 +
 docs/discussions/README.md                    |  10 +-
 .../sbatch_checkpoint_convert.sh              |  50 ++++
 .../sbatch_mfsdp_deepseek_v3.sh               | 223 ++++++++++++++++++
 .../megatron-fsdp-user-guide.md               | 116 +++++++++
 5 files changed, 397 insertions(+), 4 deletions(-)
 create mode 100644 docs/discussions/megatron-fsdp-user-guide/example-scripts/sbatch_checkpoint_convert.sh
 create mode 100644 docs/discussions/megatron-fsdp-user-guide/example-scripts/sbatch_mfsdp_deepseek_v3.sh
 create mode 100644 docs/discussions/megatron-fsdp-user-guide/megatron-fsdp-user-guide.md

diff --git a/docs/api-guide/custom_fsdp.md b/docs/api-guide/custom_fsdp.md
index e265de8ae4b..faa262ee7fa 100644
--- a/docs/api-guide/custom_fsdp.md
+++ b/docs/api-guide/custom_fsdp.md
@@ -13,6 +13,8 @@ Add these flag to enable MCore custom FSDP.
 --use-distributed-optimizer
 ```
 
+For a practical guide covering required configurations, checkpoint conversion, and example scripts, see the [Megatron-FSDP User Guide](../../discussions/megatron-fsdp-user-guide/megatron-fsdp-user-guide.md).
+
 ## Key Features
 
 - **Sharding Strategy**: Efficiently shards optimizer states, gradients, and parameters to reduce memory consumption.
diff --git a/docs/discussions/README.md b/docs/discussions/README.md
index 26a2a8e1648..81b1a58d5b0 100644
--- a/docs/discussions/README.md
+++ b/docs/discussions/README.md
@@ -6,14 +6,16 @@ This directory contains in-depth guides, tutorials, and discussions about optimi
 
 ### Performance Optimization
 
-- **[Optimizing DeepSeek-V3 Training Performance on NVIDIA GB200 NVL72](deepseek-v3-gb200-optimization/deepseek-v3-gb200-optimization.md)**
-  
-  A comprehensive guide on optimizing DeepSeek-V3 model training on NVIDIA GB200 NVL72 systems, covering profiling techniques, performance bottlenecks, and optimization strategies.
-
 - **[A Guide to Reproduce DeepSeek-V3 Pre-training Performance on GB200](deepseek-v3-gb200-optimization/deepseek-v3-gb200-reproduce-guide.md)**
   
   A detailed guide on how to reproduce the DeepSeek-V3 pre-training performance on GB200, incluing the dockerfile, package requirements and training scripts.
 
+### Training Guides
+
+- **[Megatron-FSDP User Guide](megatron-fsdp-user-guide/megatron-fsdp-user-guide.md)**
+
+  A practical guide to enable Megatron-FSDP training, including a quick-start example for DeepSeek-V3, required and recommended configurations, and instructions for checkpoint conversion from torch_dist to fsdp_dtensor.
+
 ## Contributing
 
 If you'd like to contribute a guide or tutorial, please follow this structure:
diff --git a/docs/discussions/megatron-fsdp-user-guide/example-scripts/sbatch_checkpoint_convert.sh b/docs/discussions/megatron-fsdp-user-guide/example-scripts/sbatch_checkpoint_convert.sh
new file mode 100644
index 00000000000..9f302c93f8f
--- /dev/null
+++ b/docs/discussions/megatron-fsdp-user-guide/example-scripts/sbatch_checkpoint_convert.sh
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+# Configuration: Set these paths before running the script
+MEGATRON_PATH=${MEGATRON_PATH:-"your_own_megatron_path"} # Path to Megatron-LM repository
+CONTAINER_IMAGE=${CONTAINER_IMAGE:-"your_own_container_image"} # Path to .sqsh or docker image url
+OUTPUT_PATH=${OUTPUT_PATH:-"your_own_output_path"} # Path for SLURM logs
+
+# Checkpoint conversion command
+# Note: Update the checkpoint paths in the command below
+RUN_CMD="
+cd ${MEGATRON_PATH};
+git rev-parse HEAD;
+export PYTHONPATH=${MEGATRON_PATH}:${PYTHONPATH};
+python3 tools/checkpoint/checkpoint_inspector.py \
+    convert-torch-dist-to-fsdp-dtensor --swiglu \
+    your_own_path_to_input_torch_dist_checkpoint \
+    your_own_path_to_output_fsdp_dtensor_checkpoint \
+    --param-to-param-group-map-json your_own_path_to_param_to_param_group_map.json"
+
+# SLURM settings
+SLURM_LOGS="${OUTPUT_PATH}/slurm_logs"
+mkdir -p ${SLURM_LOGS} || {
+    echo "Error: Failed to create SLURM logs directory ${SLURM_LOGS}"
+    exit 1
+}
+
+# Submit SLURM job
+# Note: Update SBATCH parameters below according to your cluster configuration
+set +e
+sbatch <<EOF
+#!/bin/bash
+
+#SBATCH --job-name=your_own_job_name
+#SBATCH --partition=your_own_partition
+#SBATCH --nodes=your_own_num_nodes
+#SBATCH --ntasks-per-node=your_own_tasks_per_node
+#SBATCH --gres=gpu:your_own_gpu_per_node
+#SBATCH --time=your_own_time
+#SBATCH --account=your_own_account
+#SBATCH --exclusive
+#SBATCH --dependency=singleton
+
+srun --mpi=pmix -l \
+    --container-image=${CONTAINER_IMAGE} \
+    --container-mounts=your_own_container_mounts \
+    --container-workdir=${MEGATRON_PATH} \
+    bash -x -c "${RUN_CMD}" 2>&1 | tee ${SLURM_LOGS}/\${SLURM_JOB_ID}.log
+
+EOF
+set -e
diff --git a/docs/discussions/megatron-fsdp-user-guide/example-scripts/sbatch_mfsdp_deepseek_v3.sh b/docs/discussions/megatron-fsdp-user-guide/example-scripts/sbatch_mfsdp_deepseek_v3.sh
new file mode 100644
index 00000000000..7b93d25d943
--- /dev/null
+++ b/docs/discussions/megatron-fsdp-user-guide/example-scripts/sbatch_mfsdp_deepseek_v3.sh
@@ -0,0 +1,223 @@
+#!/bin/bash
+
+export NCCL_IB_SL=1
+export NCCL_IB_TIMEOUT=19
+export NVTE_FWD_LAYERNORM_SM_MARGIN=16
+export NVTE_BWD_LAYERNORM_SM_MARGIN=16
+export NCCL_P2P_NET_CHUNKSIZE=2097152
+export TORCH_NCCL_AVOID_RECORD_STREAMS=1
+export PYTHONWARNINGS=ignore
+export TRITON_CACHE_DIR=/tmp/triton_cache_$SLURM_NODEID
+
+# Configuration: Set these variables before running the script
+MEGATRON_PATH=${MEGATRON_PATH:-"your_own_megatron_path"} # Path to Megatron-LM repository
+CONTAINER_IMAGE=${CONTAINER_IMAGE:-"your_own_container_image"} # Path to .sqsh or docker image url
+OUTPUT_PATH=${OUTPUT_PATH:-"your_own_output_path"} # Path for output logs and checkpoints
+DATA_PATH=${DATA_PATH:-"your_own_data_path"}
+USE_MEGATRON_FSDP=${USE_MEGATRON_FSDP:-1}
+SHARDING_STRATEGY=${SHARDING_STRATEGY:-"optim_grads_params"}
+PROFILE=${PROFILE:-0}
+WANDB=${WANDB:-1}
+
+TP=${TP:-1}
+EP=${EP:-8}
+MBS=${MBS:-4}
+GBS=${GBS:-2048}
+COMMENT=${COMMENT:-"hybridep-selective-recompute"}
+
+PRETRAIN_ARGS=(
+    --distributed-timeout-minutes 60
+    --tensor-model-parallel-size ${TP}
+    --expert-model-parallel-size ${EP}
+    --expert-tensor-parallel-size 1
+    --context-parallel-size 1
+    --use-distributed-optimizer
+    --overlap-grad-reduce
+    --overlap-param-gather
+    --use-mcore-models
+    --sequence-parallel
+    --use-flash-attn
+    --disable-bias-linear
+    --micro-batch-size ${MBS}
+    --global-batch-size ${GBS}
+    --train-samples 585937500
+    --exit-duration-in-mins 220
+    --no-check-for-nan-in-loss-and-grad
+    --manual-gc
+    --manual-gc-interval 10
+    --recompute-granularity selective
+    --recompute-modules mlp moe mla_up_proj layernorm
+    --transformer-impl transformer_engine
+    --seq-length 4096
+    --data-cache-path ${OUTPUT_PATH}/cache
+    --tokenizer-type HuggingFaceTokenizer
+    --tokenizer-model deepseek-ai/DeepSeek-V3
+    --data-path ${DATA_PATH}
+    --split 99,1,0
+    --no-mmap-bin-files
+    --no-create-attention-mask-in-dataloader
+    --num-workers 6
+    --num-layers 61
+    --hidden-size 7168
+    --ffn-hidden-size 18432
+    --num-attention-heads 128
+    --kv-channels 128
+    --max-position-embeddings 4096
+    --position-embedding-type rope
+    --rotary-base 10000
+    --make-vocab-size-divisible-by 3232
+    --normalization RMSNorm
+    --norm-epsilon 1e-6
+    --swiglu
+    --untie-embeddings-and-output-weights
+    --multi-latent-attention
+    --attention-dropout 0.0
+    --hidden-dropout 0.0
+    --clip-grad 1.0
+    --weight-decay 0.1
+    --qk-layernorm
+    --lr-decay-samples 584765624
+    --lr-warmup-samples 1536000
+    --lr-warmup-init 3.9e-7
+    --lr 3.9e-6
+    --min-lr 3.9e-7
+    --lr-decay-style cosine
+    --adam-beta1 0.9
+    --adam-beta2 0.95
+    --num-experts 256
+    --moe-layer-freq [0]*3+[1]*58
+    --moe-ffn-hidden-size 2048
+    --moe-shared-expert-intermediate-size 2048
+    --moe-router-load-balancing-type seq_aux_loss
+    --moe-router-topk 8
+    --moe-token-dispatcher-type flex
+    --moe-flex-dispatcher-backend hybridep
+    --moe-router-pre-softmax
+    --moe-grouped-gemm
+    --moe-aux-loss-coeff 1e-4
+    --moe-router-group-topk 4
+    --moe-router-num-groups 8
+    --moe-router-topk-scaling-factor 2.5
+    --moe-router-score-function sigmoid
+    --moe-router-enable-expert-bias
+    --moe-router-bias-update-rate 1e-3
+    --moe-router-dtype fp32
+    --moe-permute-fusion
+    --moe-router-force-load-balancing
+    --q-lora-rank 1536
+    --kv-lora-rank 512
+    --qk-head-dim 128
+    --qk-pos-emb-head-dim 64
+    --v-head-dim 128
+    --rotary-scaling-factor 40
+    --mscale 1.0
+    --mscale-all-dim 1.0
+    --mtp-num-layers 1
+    --mtp-loss-scaling-factor 0.1
+    --eval-iters 32
+    --eval-interval 100
+    --auto-detect-ckpt-format
+    --load ${OUTPUT_PATH}/checkpoints
+    --save ${OUTPUT_PATH}/checkpoints
+    --save-interval 100
+    --dist-ckpt-strictness log_all
+    --init-method-std 0.02
+    --log-timers-to-tensorboard
+    --log-memory-to-tensorboard
+    --log-num-zeros-in-grad
+    --log-params-norm
+    --log-validation-ppl-to-tensorboard
+    --log-throughput
+    --log-interval 1
+    --logging-level 40
+    --tensorboard-dir ${OUTPUT_PATH}/tensorboard
+    --bf16
+    --enable-experimental
+) 
+
+if [ "${USE_MEGATRON_FSDP}" = 1 ]; then
+    unset CUDA_DEVICE_MAX_CONNECTIONS
+    PRETRAIN_ARGS=(
+        "${PRETRAIN_ARGS[@]}"
+        --use-megatron-fsdp
+        --data-parallel-sharding-strategy ${SHARDING_STRATEGY}
+        --no-gradient-accumulation-fusion
+        --use-distributed-optimizer
+        --calculate-per-token-loss
+        --init-model-with-meta-device
+        --ckpt-format fsdp_dtensor
+        --grad-reduce-in-bf16
+        --fsdp-double-buffer
+        --use-nccl-ub
+    )
+fi
+
+# Profiling command
+if [ "${PROFILE}" = 1 ]; then
+    PROFILE_CMD="nsys profile --sample=none --cpuctxsw=none --trace=cuda,nvtx,cublas,cudnn \
+        --capture-range=cudaProfilerApi \
+        --capture-range-end=stop \
+        --cuda-graph-trace=node \
+        --cuda-memory-usage=true \
+        -f true -x true \
+        -o ${OUTPUT_PATH}/nsys/Megatron-FSDP-Deepseek-V3-TP${TP}EP${EP}-MBS${MBS}GBS${GBS}-${COMMENT}"
+    PRETRAIN_ARGS=(
+        "${PRETRAIN_ARGS[@]}"
+        --profile
+        --profile-step-start 10
+        --profile-step-end 12
+        --profile-ranks 0
+    )
+    echo "PROFILE_CMD="
+    echo $PROFILE_CMD
+else
+    PROFILE_CMD=""
+fi
+
+if [ "${WANDB}" = 1 ]; then
+    export WANDB_API_KEY=${WANDB_API_KEY:-"your_own_wandb_api_key"}
+    PRETRAIN_ARGS=(
+        "${PRETRAIN_ARGS[@]}"
+        --wandb-project your_own_wandb_project
+        --wandb-exp-name DeepSeek-V3-TP${TP}EP${EP}-MBS${MBS}GBS${GBS}-${COMMENT}
+    )
+fi
+
+TRAINING_CMD="
+cd ${MEGATRON_PATH};
+git rev-parse HEAD;
+export PYTHONPATH=${MEGATRON_PATH}:${PYTHONPATH};
+${PROFILE_CMD} python ${MEGATRON_PATH}/pretrain_gpt.py ${PRETRAIN_ARGS[@]}"
+
+# SLURM settings
+SLURM_LOGS="${OUTPUT_PATH}/slurm_logs"
+mkdir -p ${SLURM_LOGS} || {
+    echo "Error: Failed to create SLURM logs directory ${SLURM_LOGS}"
+    exit 1
+}
+
+# Submit SLURM job
+# Note: Update SBATCH parameters below according to your cluster configuration
+set +e
+sbatch <<EOF
+#!/bin/bash
+
+#SBATCH --job-name=your_own_job_name
+#SBATCH --partition=your_own_partition
+#SBATCH --nodes=your_own_num_nodes
+#SBATCH --ntasks-per-node=your_own_tasks_per_node
+#SBATCH --gres=gpu:your_own_gpu_per_node
+#SBATCH --time=your_own_time
+#SBATCH --account=your_own_account
+#SBATCH --exclusive
+#SBATCH --dependency=singleton
+
+srun \
+    --mpi=pmix -l \
+    --container-image=${CONTAINER_IMAGE} \
+    --container-mounts=your_own_container_mounts \
+    --container-workdir=${MEGATRON_PATH} \
+    bash -x -c "${TRAINING_CMD}" 2>&1 | tee ${SLURM_LOGS}/\${SLURM_JOB_ID}.log
+
+EOF
+set -e
diff --git a/docs/discussions/megatron-fsdp-user-guide/megatron-fsdp-user-guide.md b/docs/discussions/megatron-fsdp-user-guide/megatron-fsdp-user-guide.md
new file mode 100644
index 00000000000..c2354ad07f0
--- /dev/null
+++ b/docs/discussions/megatron-fsdp-user-guide/megatron-fsdp-user-guide.md
@@ -0,0 +1,116 @@
+# Megatron-FSDP User Guide
+
+## Table of Contents
+
+- [Megatron-FSDP Quick Start](#megatron-fsdp-quick-start)
+- [Checkpoint Conversion from 3D-Parallel to Megatron-FSDP](#checkpoint-conversion-from-3d-parallel-to-megatron-fsdp)
+
+## Megatron-FSDP Quick Start
+
+We recommend using the latest [NVIDIA NeMo Framework Container](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/nemo/tags), which provides a tested software stack and optimized performance.
+
+For your reference, we provide an example launch script for DeepSeek-V3: [`sbatch_mfsdp_deepseek_v3.sh`](./example-scripts/sbatch_mfsdp_deepseek_v3.sh).
+
+### Required Configurations
+
+To enable Megatron-FSDP, add the following required flags to your training script:
+
+```bash
+--use-megatron-fsdp
+--data-parallel-sharding-strategy optim_grads_params
+--no-gradient-accumulation-fusion
+--use-distributed-optimizer
+--ckpt-format fsdp_dtensor
+```
+
+### Recommended Configurations
+
+We also recommend adding the following configurations to further improve performance:
+
+```bash
+unset CUDA_DEVICE_MAX_CONNECTIONS
+```
+```bash
+--calculate-per-token-loss
+--init-model-with-meta-device
+--grad-reduce-in-bf16
+--fsdp-double-buffer
+--use-nccl-ub
+```
+
+💡 **Detailed explanations of these configurations are provided below.**
+
+#### 1. Disable `CUDA_DEVICE_MAX_CONNECTIONS`
+
+To ensure full parallelization of FSDP communication and computation, disable the CUDA_DEVICE_MAX_CONNECTIONS environment variable. This step avoids potential bubbles in the CUDA stream. (But it may slow down TP and CP to some extent.)
+
+#### 2. Add `--calculate-per-token-loss`
+
+For gradients sharding mode optimization, include the `--calculate-per-token-loss` flag in your training script. This improves performance by reducing the frequency of gradient scaling, which is also a sizable drain on SM resources.
+
+#### 3. Add `--init-model-with-meta-device`
+
+Allows model initialization using meta device, followed by layer-by-layer initialization of distributed model weight buffers via the `Module.reset_parameters` API, facilitating the initialization of extremely large models.
+
+#### 4. Add `--grad-reduce-in-bf16`
+
+Enables gradient reduction in BF16 precision instead of FP32, reducing communication volume and accelerating the backward pass.
+
+#### 5. Add `--fsdp-double-buffer`
+
+Uses persistently allocated double buffers for temporarily-defined memory needed in `MegatronFSDP` communications. While having persistent double buffers may increase peak VRAM utilization, it is necessary to register NCCL user buffers (`nccl_ub=True`) for `MegatronFSDP`. Currently, this is supported only for simple repetitive model structures such as GPT.
+
+- **Only effective when using Megatron-LM.**
+- Defaults to `False`. Automatically overridden to `True` when `nccl_ub` is enabled.
+
+#### 6. Add `--use-nccl-ub`
+
+Allocates and [registers NCCL user buffers](https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/usage/bufferreg.html#) for param and grad buffers. This option enables an SM-efficient NCCL algorithm that could improve the performance of overlapped computations. This flag will be much more effective when used together with [SHARP](https://docs.nvidia.com/networking/display/sharpv3130) if the FSDP communication includes both NVL and IB domains. Enabling this option will cause additional memory overhead due to the requirement to enable the `fsdp_double_buffer` option.
+
+- **Only effective when using Megatron-LM.**
+- Defaults to `False`.
+- By default we try to use NCCL window (symmetric) registration if it is available. If not it falls back to conventional local registration.
+- **Incompatible with PyTorch's segmentable allocator:** Do not set `PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True` when using `--use-nccl-ub`, as this will cause a runtime error due to compatibility issues with the `torch.cuda.MemPool` API.
+
+## Checkpoint Conversion from 3D-Parallel to Megatron-FSDP
+
+Megatron-FSDP introduces `fsdp_dtensor`, a DTensor-based distributed checkpoint format that serves as its standard. To help you smoothly transition from 3D-Parallel to Megatron-FSDP, we provide a script for converting checkpoints from the `torch_dist` format to the `fsdp_dtensor` format. Using DeepSeek-V3 as an example, the detailed conversion process is described below.
+
+### Step 1: Generate 3D-Parallel Checkpoint with `param_to_param_group_map`
+
+Run your 3D-parallel + EP training script to generate a `torch_dist` checkpoint along with a directory containing `param_to_param_group_map` files. Add the following flag to your training script:
+
+```bash
+--dump-param-to-param-group-map /path/to/param_to_param_group_map
+```
+
+If you already have a `torch_dist` checkpoint, simply specify the `--dump-param-to-param-group-map /path/to/param_to_param_group_map` flag and run a very short experiment-this will create the `param_to_param_group_map` you need without full pretraining.
+
+### Step 2: Export `param_to_param_group_map` to a JSON File
+
+Convert the `param_to_param_group_map` into a JSON file for easier processing by running:
+
+```bash
+python tools/checkpoint/checkpoint_inspector.py print-torch-dcp-in-json /path/to/param_to_param_group_map
+```
+
+This will create a `param_to_param_group_map.json` file in the `/path/to/param_to_param_group_map` directory.
+
+### Step 3: Convert Checkpoint from `torch_dist` to `fsdp_dtensor`
+
+Convert your `torch_dist` checkpoint to the `fsdp_dtensor` format using the parameter to `param_to_param_group_map` JSON file:
+
+```bash
+torchrun --nproc_per_node=8 --nnodes=1 \
+    tools/checkpoint/checkpoint_inspector.py \
+    convert-torch-dist-to-fsdp-dtensor --swiglu \
+    /path/to/input_torch_dist_checkpoint \
+    /path/to/output_fsdp_dtensor_checkpoint \
+    --param-to-param-group-map-json /path/to/param_to_param_group_map.json
+```
+
+**Note:** For multi-node conversion tasks, please refer to the example script: [`sbatch_checkpoint_convert.sh`](./example-scripts/sbatch_checkpoint_convert.sh).
+
+### Step 4: Launch Megatron-FSDP Training
+
+Start your Megatron-FSDP training job using the converted `fsdp_dtensor` checkpoint.
\ No newline at end of file

From 6b157e007138c28f5ea25d79a7f4661800f3f8b4 Mon Sep 17 00:00:00 2001
From: hx <hongxiaob@nvidia.com>
Date: Fri, 16 Jan 2026 23:17:05 +0800
Subject: [PATCH 238/334] [Dev] Optimizer State and Master Weight Offloading
 (#2760)

Co-authored-by: Xin Yao <xiny@nvidia.com>
---
 .../optimizer_state_offloader.py              | 315 ++++++++++++++++
 megatron/core/optimizer/distrib_optimizer.py  |  25 ++
 megatron/core/optimizer/optimizer_config.py   |   6 +
 megatron/training/arguments.py                |  13 +
 megatron/training/training.py                 |  30 +-
 .../test_optimizer_state_offloading.py        | 337 ++++++++++++++++++
 6 files changed, 725 insertions(+), 1 deletion(-)
 create mode 100644 megatron/core/optimizer/cpu_offloading/optimizer_state_offloader.py
 create mode 100644 tests/unit_tests/test_optimizer_state_offloading.py

diff --git a/megatron/core/optimizer/cpu_offloading/optimizer_state_offloader.py b/megatron/core/optimizer/cpu_offloading/optimizer_state_offloader.py
new file mode 100644
index 00000000000..81fd116c8ba
--- /dev/null
+++ b/megatron/core/optimizer/cpu_offloading/optimizer_state_offloader.py
@@ -0,0 +1,315 @@
+# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+
+"""Optimizer state offloading class."""
+
+from typing import TYPE_CHECKING, Dict, List, Tuple
+
+import torch
+
+if TYPE_CHECKING:
+    from megatron.core.optimizer.distrib_optimizer import DistributedOptimizer
+
+
+class OptimizerStateOffloader:
+    """
+    Manages offloading of optimizer states and master weights to CPU.
+    Used with DistributedOptimizer to reduce GPU memory usage.
+
+    Supports overlapped D2H/H2D transfers using CUDA streams.
+
+    Master weights can be stored in two locations:
+    - In adam optimizer state (when use_precision_aware_optimizer_no_fp8_or_ds_fp8 is True)
+    - In mcore's shard_fp32_from_float16_groups
+    """
+
+    OPTIMIZER_STATE_KEYS = ('exp_avg', 'exp_avg_sq')
+    MASTER_WEIGHT_KEY = 'master_param'
+
+    def __init__(self, distrib_optimizer: "DistributedOptimizer"):
+        """
+        Args:
+            distrib_optimizer: The DistributedOptimizer to offload states and master weights from.
+        """
+        self.dist_optimizer = distrib_optimizer
+        self.adam_optimizer = distrib_optimizer.optimizer
+
+        # Only support TE FusedAdam optimizer for now.
+        try:
+            from transformer_engine.pytorch.optimizers import FusedAdam
+
+            assert isinstance(self.adam_optimizer, FusedAdam), (
+                f"OptimizerStateOffloader requires TE FusedAdam optimizer, "
+                f"but got {type(self.adam_optimizer).__name__}"
+            )
+        except ImportError:
+            raise ImportError(
+                "OptimizerStateOffloader requires transformer_engine.pytorch.optimizers.FusedAdam"
+            )
+
+        # Check if master weights are stored in adam optimizer state
+        self.optimizer_contains_master_weights = self.adam_optimizer.master_weights
+
+        # CUDA streams for async transfers
+        self._d2h_stream = torch.cuda.Stream()
+        self._h2d_stream = torch.cuda.Stream()
+
+        # CPU buffers for optimizer states: {param: {key: cpu_tensor}}
+        self._opt_state_cpu_buffers: Dict[torch.Tensor, Dict[str, torch.Tensor]] = {}
+
+        # CPU buffers for mcore master weights, matching the structure of source groups
+        # List[List[cpu_tensor]]
+        self._shard_fp32_from_float16_cpu_buffers: List[List[torch.Tensor]] = []
+
+        # State tracking
+        self._offloaded = False
+        self._offloaded_state_keys: Tuple[str, ...] = ()
+        self._offloaded_mcore_master_weights = False
+
+        # Track whether optimizer states (exp_avg, exp_avg_sq) have been initialized.
+        # These are lazily initialized by FusedAdam during the first optimizer.step().
+        # Master weights (shard_fp32_from_float16_groups) are available from the start.
+        self._optimizer_states_initialized = False
+
+    def mark_optimizer_states_initialized(self):
+        """
+        Mark that optimizer states (exp_avg, exp_avg_sq) are now available.
+        Should be called after the first optimizer.step() completes.
+        """
+        self._optimizer_states_initialized = True
+
+    def _get_state_keys_to_offload(
+        self, offload_optimizer_states: bool, offload_master_weights: bool
+    ) -> Tuple[str, ...]:
+        """Get the state keys in FusedAdam to offload based on configuration."""
+        keys = []
+        # Skip optimizer states offloading if they haven't been initialized yet.
+        # Optimizer states are lazily initialized by FusedAdam during the first optimizer.step().
+        if self._optimizer_states_initialized:
+            if offload_optimizer_states:
+                keys.extend(self.OPTIMIZER_STATE_KEYS)
+            if offload_master_weights and self.optimizer_contains_master_weights:
+                keys.append(self.MASTER_WEIGHT_KEY)
+        return tuple(keys)
+
+    def _ensure_state_cpu_buffer(
+        self, param: torch.Tensor, state_key: str, gpu_tensor: torch.Tensor, pin_memory: bool = True
+    ) -> torch.Tensor:
+        """Get or create a CPU buffer for a state tensor."""
+        if param not in self._opt_state_cpu_buffers:
+            self._opt_state_cpu_buffers[param] = {}
+
+        if state_key not in self._opt_state_cpu_buffers[param]:
+            cpu_buffer = torch.empty(
+                gpu_tensor.size(),
+                dtype=gpu_tensor.dtype,
+                layout=gpu_tensor.layout,
+                device='cpu',
+                pin_memory=pin_memory,
+            )
+            self._opt_state_cpu_buffers[param][state_key] = cpu_buffer
+
+        return self._opt_state_cpu_buffers[param][state_key]
+
+    def _offload_shard_groups(
+        self,
+        shard_groups: List[List[torch.Tensor]],
+        cpu_buffers: List[List[torch.Tensor]],
+        pin_memory: bool = True,
+    ):
+        """Offload a shard group to CPU buffers."""
+        # Initialize CPU buffers on first call
+        if len(cpu_buffers) == 0:
+            for group in shard_groups:
+                group_buffers = []
+                for gpu_tensor in group:
+                    cpu_buffer = torch.empty(
+                        gpu_tensor.size(),
+                        dtype=gpu_tensor.dtype,
+                        layout=gpu_tensor.layout,
+                        device='cpu',
+                        pin_memory=pin_memory,
+                    )
+                    group_buffers.append(cpu_buffer)
+                cpu_buffers.append(group_buffers)
+
+        # Copy D2H
+        for group_idx, group in enumerate(shard_groups):
+            for param_idx, gpu_tensor in enumerate(group):
+                cpu_buffer = cpu_buffers[group_idx][param_idx]
+                cpu_buffer.copy_(gpu_tensor, non_blocking=pin_memory)
+                gpu_tensor.record_stream(self._d2h_stream)
+
+    def _offload_states(
+        self,
+        offload_optimizer_states: bool,
+        offload_master_weights: bool,
+        use_pin_memory: bool = True,
+    ):
+        """Offload optimizer states and/or master weights to CPU."""
+        # Offload states from adam optimizer
+        self._offloaded_state_keys = self._get_state_keys_to_offload(
+            offload_optimizer_states, offload_master_weights
+        )
+        states = self.adam_optimizer.state
+
+        for param, param_state in states.items():
+            for state_key in self._offloaded_state_keys:
+                if state_key not in param_state:
+                    continue
+
+                gpu_tensor = param_state[state_key]
+                if not isinstance(gpu_tensor, torch.Tensor) or not gpu_tensor.is_cuda:
+                    continue
+
+                cpu_buffer = self._ensure_state_cpu_buffer(
+                    param, state_key, gpu_tensor, use_pin_memory
+                )
+                cpu_buffer.copy_(gpu_tensor, non_blocking=use_pin_memory)
+                gpu_tensor.record_stream(self._d2h_stream)
+
+        # Offload mcore master weights if not in optimizer state
+        if offload_master_weights and not self.optimizer_contains_master_weights:
+            self._offload_shard_groups(
+                self.dist_optimizer.shard_fp32_from_float16_groups,
+                self._shard_fp32_from_float16_cpu_buffers,
+                use_pin_memory,
+            )
+            self._offloaded_mcore_master_weights = True
+
+    def _release_states(self):
+        """Replace optimizer state GPU tensors with CPU tensors to free GPU memory."""
+        states = self.adam_optimizer.state
+
+        for param, param_state in states.items():
+            if param not in self._opt_state_cpu_buffers:
+                continue
+
+            for state_key in self._offloaded_state_keys:
+                if state_key not in self._opt_state_cpu_buffers[param]:
+                    continue
+
+                param_state[state_key].untyped_storage().resize_(0)
+
+        if self._offloaded_mcore_master_weights:
+            for group in self.dist_optimizer.shard_fp32_from_float16_groups:
+                for gpu_tensor in group:
+                    gpu_tensor.untyped_storage().resize_(0)
+
+    def _reload_shard_groups(
+        self,
+        shard_groups: List[List[torch.Tensor]],
+        cpu_buffers: List[List[torch.Tensor]],
+        is_allocate_stage: bool,
+    ):
+        """Reload shard groups from CPU to GPU."""
+        for group_idx, group in enumerate(shard_groups):
+            for param_idx, _ in enumerate(group):
+                cpu_buffer = cpu_buffers[group_idx][param_idx]
+                if is_allocate_stage:
+                    shard_groups[group_idx][param_idx].untyped_storage().resize_(
+                        cpu_buffer.untyped_storage().size()
+                    )
+                else:
+                    shard_groups[group_idx][param_idx].copy_(
+                        cpu_buffer, non_blocking=cpu_buffer.is_pinned()
+                    )
+
+    def _reload_states(self, is_allocate_stage: bool):
+        """
+        Reload optimizer states and/or master weights from CPU to GPU.
+
+        If is_allocate_stage is True, only allocate GPU memory for the states and master weights,
+        but do not copy the data from CPU to GPU. Otherwise, copy the data from CPU to GPU.
+        The two processes are separated to make sure that the GPU memory is allocated on the
+        default stream to avoid fragmentation.
+        """
+        # Reload states to adam optimizer
+        states = self.adam_optimizer.state
+
+        for param, param_state in states.items():
+            if param not in self._opt_state_cpu_buffers:
+                continue
+
+            for state_key in self._offloaded_state_keys:
+                if state_key not in self._opt_state_cpu_buffers[param]:
+                    continue
+
+                cpu_buffer = self._opt_state_cpu_buffers[param][state_key]
+                if is_allocate_stage:
+                    param_state[state_key].untyped_storage().resize_(
+                        cpu_buffer.untyped_storage().size()
+                    )
+                else:
+                    param_state[state_key].copy_(cpu_buffer, non_blocking=cpu_buffer.is_pinned())
+
+        # Reload mcore master weights if not in optimizer state
+        if self._offloaded_mcore_master_weights:
+            self._reload_shard_groups(
+                self.dist_optimizer.shard_fp32_from_float16_groups,
+                self._shard_fp32_from_float16_cpu_buffers,
+                is_allocate_stage,
+            )
+
+    def offload(self, offload_optimizer_states: bool = True, offload_master_weights: bool = True):
+        """
+        Offload optimizer states and/or master weights to CPU.
+        Starts async D2H transfer that can overlap with other operations.
+
+        Args:
+            offload_optimizer_states: Whether to offload exp_avg, exp_avg_sq.
+            offload_master_weights: Whether to offload master weights.
+        """
+        if not offload_optimizer_states and not offload_master_weights:
+            return
+
+        # Wait for current stream finishing updating the optimizer states.
+        self._d2h_stream.wait_stream(torch.cuda.current_stream())
+
+        with torch.cuda.stream(self._d2h_stream):
+            self._offload_states(offload_optimizer_states, offload_master_weights)
+
+        self._offloaded = True
+
+    def release_gpu_memory(self):
+        """
+        Release GPU memory for optimizer states and master weights after D2H copy completes.
+
+        This is separated from offload() to allow delayed GPU memory release,
+        which is needed for mxfp8 + overlap_param_gather case where master weights
+        must remain on GPU until after _copy_main_params_to_param_buffer() is called.
+        """
+        if not self._offloaded:
+            return
+
+        self._release_states()
+
+    def reload(self):
+        """
+        Reload optimizer states and/or master weights from CPU to GPU.
+        Call before optimizer.step() to ensure states are on GPU.
+        """
+        if not self._offloaded:
+            return
+
+        # Allocate GPU memory on the current stream to avoid fragmentation.
+        self._reload_states(is_allocate_stage=True)
+
+        self._h2d_stream.wait_stream(self._d2h_stream)
+        self._h2d_stream.wait_stream(torch.cuda.current_stream())
+
+        # Reload states on the h2d stream to overlap with other operations.
+        with torch.cuda.stream(self._h2d_stream):
+            self._reload_states(is_allocate_stage=False)
+
+        self._offloaded_state_keys = ()
+        self._offloaded_mcore_master_weights = False
+        self._offloaded = False
+
+    def sync_before_step(self):
+        """
+        Wait for H2D reload to complete before optimizer.step().
+        Must be called to ensure states are on GPU before optimizer uses them.
+
+        This is separated from reload() to make it possible to move the reload ahead of time.
+        """
+        torch.cuda.current_stream().wait_stream(self._h2d_stream)
diff --git a/megatron/core/optimizer/distrib_optimizer.py b/megatron/core/optimizer/distrib_optimizer.py
index 6e093f96f7e..9536bc4f9ef 100644
--- a/megatron/core/optimizer/distrib_optimizer.py
+++ b/megatron/core/optimizer/distrib_optimizer.py
@@ -49,6 +49,7 @@
 from ..fp8_utils import dequantize_fp8_tensor, is_float8tensor, quantize_param_shard
 from ..transformer.fsdp_dtensor_checkpoint import handle_experts_in_state_dict
 from ..transformer.module import MegatronModule
+from .cpu_offloading.optimizer_state_offloader import OptimizerStateOffloader
 from .grad_scaler import MegatronGradScaler
 from .optimizer import MixedPrecisionOptimizer, _zero_grad_group_helper, param_group_identifier_keys
 from .optimizer_config import OptimizerConfig
@@ -604,6 +605,10 @@ def __init__(
             self.optimizer.param_groups = [g["orig_group"] for g in self.opt_group_ranges]
             self.optimizer.load_state_dict(self.optimizer.state_dict())
 
+        self._state_offloader: Optional[OptimizerStateOffloader] = None
+        if self.config.offload_optimizer_states:
+            self._state_offloader = OptimizerStateOffloader(self)
+
     def _get_model_param_range_map(self, param: torch.nn.Parameter):
         """
         Given a model param, get the index sub-range of the param that this
@@ -2580,6 +2585,8 @@ def step_with_ready_grads(self) -> bool:
         Under the hood, either launch synchronous param all-gathers or get ready to launch
         asynchorous all-gathers that get overlapped with the next forward pass.
         """
+        if self._state_offloader is not None:
+            self._state_offloader.sync_before_step()
         update_successful = super().step_with_ready_grads()
 
         timers = self.config.timers
@@ -2600,4 +2607,22 @@ def step_with_ready_grads(self) -> bool:
         if timers is not None:
             timers('params-all-gather').stop()
 
+        if self._state_offloader is not None:
+            self._state_offloader.mark_optimizer_states_initialized()
+
         return update_successful
+
+    def offload_states(self):
+        """Offload states to CPU."""
+        if self._state_offloader is not None:
+            self._state_offloader.offload()
+
+    def reload_offloaded_states(self):
+        """Start async reload of offloaded states."""
+        if self._state_offloader is not None:
+            self._state_offloader.reload()
+
+    def release_offloaded_gpu_states(self):
+        """Release GPU memory after D2H completes. For delayed release case."""
+        if self._state_offloader is not None:
+            self._state_offloader.release_gpu_memory()
diff --git a/megatron/core/optimizer/optimizer_config.py b/megatron/core/optimizer/optimizer_config.py
index 679878ed954..1813488d7bd 100644
--- a/megatron/core/optimizer/optimizer_config.py
+++ b/megatron/core/optimizer/optimizer_config.py
@@ -266,6 +266,12 @@ class OptimizerConfig:
     pin_cpu_params: bool = True
     """If True, pin the optimizer parameters to CPU memory."""
 
+    offload_optimizer_states: bool = False
+    """
+    If True, offload optimizer states to CPU after each optimizer step and
+    reload them before the next optimizer step.
+    """
+
     ################
     # Miscellaneous
     ################
diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py
index 5f9e7350c18..8a70772cc3d 100644
--- a/megatron/training/arguments.py
+++ b/megatron/training/arguments.py
@@ -1271,6 +1271,11 @@ def validate_args(args, defaults={}):
             "must be used in conjunction with `--fp8-recipe delayed`."
         )
 
+    if args.offload_optimizer_states:
+        assert args.use_distributed_optimizer, "offload_optimizer_states is only supported with distributed optimizer"
+        assert args.optimizer == 'adam', "offload_optimizer_states is only supported with adam optimizer"
+        assert not args.use_megatron_fsdp, "offload_optimizer_states does not support Megatron-FSDP for now."
+
     if args.non_persistent_ckpt_type == "local":
         assert args.non_persistent_local_ckpt_dir is not None, "Tried to use local checkpointing without specifying --local-ckpt-dir!"
     if args.replication:
@@ -2386,6 +2391,14 @@ def _add_training_args(parser):
                        help='Disable pinning of CPU memory for gradients.')
     group.add_argument('--no-pin-cpu-params', action='store_false', dest='pin_cpu_params',
                        help='Disable pinning of CPU memory for parameters.')
+    group.add_argument('--offload-optimizer-states',
+                       action='store_true',
+                       dest='offload_optimizer_states',
+                       help='Offload optimizer states to CPU after each optimizer step and '
+                       'reload them before the next optimizer step. '
+                       'Only support TE FusedAdam optimizer.'
+                       'Note that this still uses pure GPU optimizer instead of '
+                       'HybridDeviceOptimizer for --optimizer-cpu-offload.')
     group.add_argument('--dataloader-type', type=str, default=None,
                        choices=['single', 'cyclic', 'external'],
                        help='Single pass vs multiple pass data loader')
diff --git a/megatron/training/training.py b/megatron/training/training.py
index 845d271f62e..8aff2556d14 100644
--- a/megatron/training/training.py
+++ b/megatron/training/training.py
@@ -1425,6 +1425,12 @@ def train_step(forward_step_func, data_iterator, model, optimizer, opt_param_sch
 
     rerun_state_machine = get_rerun_state_machine()
     while rerun_state_machine.should_run_forward_backward(data_iterator):
+        # Offload optimizer states to CPU if enabled.
+        if args.offload_optimizer_states:
+            for optim_instance in optimizer.chained_optimizers:
+                if isinstance(optim_instance, DistributedOptimizer):
+                    optim_instance.offload_states()
+
         # Set grad to zero.
         for model_chunk in model:
             model_chunk.zero_grad_buffer()
@@ -1458,6 +1464,14 @@ def train_step(forward_step_func, data_iterator, model, optimizer, opt_param_sch
                     if isinstance(optim_instance, DistributedOptimizer):
                         optim_instance._copy_main_params_to_param_buffer()
 
+        # Release GPU memory for offloaded optimizer states.
+        # This needs to be done after _copy_main_params_to_param_buffer().
+        # Separate offload and release to allow early D2H transfer to overlap with other operations.
+        if args.offload_optimizer_states:
+            for optim_instance in optimizer.chained_optimizers:
+                if isinstance(optim_instance, DistributedOptimizer):
+                    optim_instance.release_offloaded_gpu_states()
+
         # Forward pass.
         losses_reduced = forward_backward_func(
             forward_step_func=forward_step_func,
@@ -2305,7 +2319,21 @@ def train(
         config.param_sync_func = [model_chunk.start_param_sync for model_chunk in model]
         if len(model) == 1:
             config.param_sync_func = config.param_sync_func[0]
-    config.finalize_model_grads_func = finalize_model_grads
+
+    # Wrap finalize_model_grads to reload offloaded optimizer states before grad finalization.
+    # This allows H2D transfer to overlap with grad all-reduce.
+    if args.offload_optimizer_states:
+
+        def finalize_model_grads_with_state_reload(*fmg_args, **fmg_kwargs):
+            # Reload offloaded states for all DistributedOptimizer instances
+            for optim_instance in optimizer.chained_optimizers:
+                if isinstance(optim_instance, DistributedOptimizer):
+                    optim_instance.reload_offloaded_states()
+            return finalize_model_grads(*fmg_args, **fmg_kwargs)
+
+        config.finalize_model_grads_func = finalize_model_grads_with_state_reload
+    else:
+        config.finalize_model_grads_func = finalize_model_grads
 
     if args.log_energy:
         energy_monitor.setup()
diff --git a/tests/unit_tests/test_optimizer_state_offloading.py b/tests/unit_tests/test_optimizer_state_offloading.py
new file mode 100644
index 00000000000..baaab355182
--- /dev/null
+++ b/tests/unit_tests/test_optimizer_state_offloading.py
@@ -0,0 +1,337 @@
+# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+
+"""Unit tests for OptimizerStateOffloader."""
+
+import pytest
+import torch
+import torch.nn as nn
+
+from megatron.core.distributed import DistributedDataParallel, DistributedDataParallelConfig
+from megatron.core.optimizer import OptimizerConfig, get_megatron_optimizer
+from megatron.core.transformer import TransformerConfig
+from tests.unit_tests.test_utilities import Utils
+
+try:
+    from transformer_engine.pytorch.optimizers import FusedAdam  # noqa: F401
+
+    TE_FUSED_ADAM_AVAILABLE = True
+except ImportError:
+    TE_FUSED_ADAM_AVAILABLE = False
+
+
+class SimpleModel(nn.Module):
+    """Simple model for testing."""
+
+    def __init__(self, hidden_size=256):
+        super().__init__()
+        self.fc1 = nn.Linear(hidden_size, hidden_size)
+        self.fc2 = nn.Linear(hidden_size, hidden_size)
+
+    def forward(self, x):
+        return self.fc2(torch.relu(self.fc1(x)))
+
+
+def create_model_and_optimizer(hidden_size=256, offload_optimizer_states=True, **optimizer_kwargs):
+    """Helper to create model and optimizer for tests."""
+    model = SimpleModel(hidden_size=hidden_size).bfloat16().cuda()
+    ddp_config = DistributedDataParallelConfig(use_distributed_optimizer=True)
+    model = DistributedDataParallel(
+        TransformerConfig(num_attention_heads=1, num_layers=1), ddp_config, model
+    )
+
+    default_config = dict(
+        optimizer='adam',
+        bf16=True,
+        lr=0.001,
+        use_distributed_optimizer=True,
+        offload_optimizer_states=offload_optimizer_states,
+    )
+    default_config.update(optimizer_kwargs)
+
+    optimizer_config = OptimizerConfig(**default_config)
+    optim = get_megatron_optimizer(optimizer_config, [model])
+    return model, optim
+
+
+def run_forward_backward_step(model, optim, hidden_size=256):
+    """Run a single forward-backward-step cycle."""
+    input_tensor = torch.randn(8, hidden_size, dtype=torch.bfloat16, device='cuda')
+    output = model(input_tensor)
+    output.sum().backward()
+    optim.step()
+    optim.zero_grad()
+
+
+# =============================================================================
+# Test 1: Basic OptimizerStateOffloader Initialization
+# =============================================================================
+@pytest.mark.skipif(not TE_FUSED_ADAM_AVAILABLE, reason="Requires TE FusedAdam")
+def test_offloader_initialization():
+    """Test that OptimizerStateOffloader initializes correctly."""
+    Utils.initialize_model_parallel()
+    model, optim = create_model_and_optimizer()
+    dist_optim = optim.chained_optimizers[0]
+
+    # Offloader is created in __init__ when offload_optimizer_states=True
+    assert dist_optim._state_offloader is not None
+    offloader = dist_optim._state_offloader
+
+    # Verify offloader properties
+    assert offloader.adam_optimizer is not None
+    assert offloader._d2h_stream is not None
+    assert offloader._h2d_stream is not None
+    assert offloader._offloaded is False
+
+    # Before first step, optimizer states are not initialized yet
+    assert offloader._optimizer_states_initialized is False
+
+    # Run one step to initialize optimizer states
+    run_forward_backward_step(model, optim)
+
+    # After first step, optimizer states should be marked as initialized
+    assert offloader._optimizer_states_initialized is True
+    Utils.destroy_model_parallel()
+
+
+# =============================================================================
+# Test 2: Early Master Weight Offloading Before First Step
+# =============================================================================
+@pytest.mark.skipif(not TE_FUSED_ADAM_AVAILABLE, reason="Requires TE FusedAdam")
+def test_early_master_weight_offloading():
+    """Test that master weights can be offloaded before the first optimizer step."""
+    Utils.initialize_model_parallel()
+    model, optim = create_model_and_optimizer()
+    dist_optim = optim.chained_optimizers[0]
+
+    # Offloader is created in __init__
+    assert dist_optim._state_offloader is not None
+    offloader = dist_optim._state_offloader
+
+    # Before first step, optimizer states are not initialized
+    assert offloader._optimizer_states_initialized is False
+
+    # Capture original master weights before offload
+    original_master_weights = []
+    for group in dist_optim.shard_fp32_from_float16_groups:
+        group_weights = [tensor.clone() for tensor in group]
+        original_master_weights.append(group_weights)
+
+    # Offload before first step - should only offload master weights
+    offloader.offload()
+    offloader.release_gpu_memory()
+    torch.cuda.synchronize()
+
+    # Verify master weights were offloaded (storage resized to 0)
+    for group in dist_optim.shard_fp32_from_float16_groups:
+        for tensor in group:
+            assert tensor.untyped_storage().size() == 0, "Master weight should be offloaded"
+
+    # Reload master weights
+    offloader.reload()
+    offloader.sync_before_step()
+
+    # Verify master weights match after reload
+    for group_idx, group in enumerate(dist_optim.shard_fp32_from_float16_groups):
+        for param_idx, tensor in enumerate(group):
+            original = original_master_weights[group_idx][param_idx]
+            torch.testing.assert_close(
+                tensor,
+                original,
+                msg=f"Master weight [{group_idx}][{param_idx}] mismatch after offload/reload",
+            )
+
+    # Now run a step and verify optimizer states can be offloaded after
+    run_forward_backward_step(model, optim)
+    assert offloader._optimizer_states_initialized is True
+
+    Utils.destroy_model_parallel()
+
+
+# =============================================================================
+# Test 3: Offload and Reload Correctness
+# =============================================================================
+@pytest.mark.skipif(not TE_FUSED_ADAM_AVAILABLE, reason="Requires TE FusedAdam")
+@pytest.mark.parametrize("offload_optimizer_states", [True, False])
+@pytest.mark.parametrize("offload_master_weights", [True, False])
+def test_offload_reload_correctness(offload_optimizer_states, offload_master_weights):
+    """Test that offload/reload preserves optimizer state values."""
+    if not offload_optimizer_states and not offload_master_weights:
+        pytest.skip("At least one offload type required")
+
+    Utils.initialize_model_parallel()
+    model, optim = create_model_and_optimizer()
+    dist_optim = optim.chained_optimizers[0]
+
+    # Run steps to build up optimizer state
+    for _ in range(3):
+        run_forward_backward_step(model, optim)
+
+    offloader = dist_optim._state_offloader
+
+    # Capture original states before offload
+    original_states = {}
+    for param, state in offloader.adam_optimizer.state.items():
+        original_states[param] = {
+            k: v.clone() for k, v in state.items() if isinstance(v, torch.Tensor)
+        }
+
+    # Offload
+    offloader.offload(
+        offload_optimizer_states=offload_optimizer_states,
+        offload_master_weights=offload_master_weights,
+    )
+
+    # Release GPU memory
+    offloader.release_gpu_memory()
+    torch.cuda.synchronize()
+
+    # Reload
+    offloader.reload()
+    offloader.sync_before_step()
+
+    # Verify states match after reload
+    for param, state in offloader.adam_optimizer.state.items():
+        if param in original_states:
+            for key, original_tensor in original_states[param].items():
+                if key in state and isinstance(state[key], torch.Tensor):
+                    reloaded_tensor = state[key]
+                    assert reloaded_tensor.device.type == 'cuda', f"State {key} should be on GPU"
+                    torch.testing.assert_close(
+                        reloaded_tensor,
+                        original_tensor,
+                        msg=f"State {key} mismatch after offload/reload",
+                    )
+    Utils.destroy_model_parallel()
+
+
+# =============================================================================
+# Test 4: GPU Memory Release Verification
+# =============================================================================
+@pytest.mark.skipif(not TE_FUSED_ADAM_AVAILABLE, reason="Requires TE FusedAdam")
+def test_gpu_memory_release():
+    """Test that GPU memory is actually freed after release_gpu_memory()."""
+    Utils.initialize_model_parallel()
+    # Use larger model for measurable memory impact
+    model, optim = create_model_and_optimizer(hidden_size=1024)
+    dist_optim = optim.chained_optimizers[0]
+
+    # Initialize optimizer states
+    run_forward_backward_step(model, optim, hidden_size=1024)
+
+    offloader = dist_optim._state_offloader
+
+    # Measure memory before offload
+    torch.cuda.synchronize()
+    torch.cuda.empty_cache()
+    memory_before = torch.cuda.memory_allocated()
+
+    # Offload and release
+    offloader.offload()
+    offloader.release_gpu_memory()
+
+    # Wait for async operations
+    torch.cuda.synchronize()
+    torch.cuda.empty_cache()
+    memory_after = torch.cuda.memory_allocated()
+
+    # Memory should decrease
+    memory_freed = memory_before - memory_after
+    assert memory_freed > 0, f"Expected memory to be freed, but got {memory_freed} bytes difference"
+    Utils.destroy_model_parallel()
+
+
+# =============================================================================
+# Test 5: Multiple Offload/Reload Cycles
+# =============================================================================
+@pytest.mark.skipif(not TE_FUSED_ADAM_AVAILABLE, reason="Requires TE FusedAdam")
+def test_multiple_offload_reload_cycles():
+    """Test that multiple offload/reload cycles work correctly."""
+    Utils.initialize_model_parallel()
+    model, optim = create_model_and_optimizer()
+    dist_optim = optim.chained_optimizers[0]
+
+    # Initialize
+    run_forward_backward_step(model, optim)
+
+    offloader = dist_optim._state_offloader
+
+    # Run multiple cycles
+    for cycle in range(5):
+        # Offload
+        offloader.offload()
+        offloader.release_gpu_memory()
+
+        # Reload
+        offloader.reload()
+        offloader.sync_before_step()
+
+        # Run optimizer step
+        run_forward_backward_step(model, optim)
+
+    # Verify model can still produce valid outputs
+    input_tensor = torch.randn(8, 256, dtype=torch.bfloat16, device='cuda')
+    output = model(input_tensor)
+    assert not output.isnan().any(), "Model output contains NaN after multiple cycles"
+    Utils.destroy_model_parallel()
+
+
+# =============================================================================
+# Test 6: Training Correctness with Offloading
+# =============================================================================
+@pytest.mark.skipif(not TE_FUSED_ADAM_AVAILABLE, reason="Requires TE FusedAdam")
+def test_training_correctness_with_offloading():
+    """Test that training with offloading produces same results as without."""
+    Utils.initialize_model_parallel()
+    torch.manual_seed(42)
+
+    # Model 1: with offloading
+    model1, optim1 = create_model_and_optimizer(offload_optimizer_states=True, lr=0.01)
+
+    # Model 2: without offloading (reference)
+    torch.manual_seed(42)
+    model2, optim2 = create_model_and_optimizer(offload_optimizer_states=False, lr=0.01)
+
+    # Train both models
+    n_steps = 10
+    torch.manual_seed(123)
+    dist_optim1 = optim1.chained_optimizers[0]
+
+    # Offloader is created in __init__ when offload_optimizer_states=True
+    assert dist_optim1._state_offloader is not None
+    offloader = dist_optim1._state_offloader
+
+    for step in range(n_steps):
+        input_tensor = torch.randn(8, 256, dtype=torch.bfloat16, device='cuda')
+
+        # Model 1 with offloading
+        # Offload states (master weights can be offloaded from the start,
+        # optimizer states will be skipped until after first step)
+        offloader.offload()
+        offloader.release_gpu_memory()
+
+        output1 = model1(input_tensor)
+        loss1 = output1.sum()
+        loss1.backward()
+
+        offloader.reload()
+        offloader.sync_before_step()
+        optim1.step()
+        optim1.zero_grad()
+
+        # Model 2 without offloading
+        output2 = model2(input_tensor)
+        loss2 = output2.sum()
+        loss2.backward()
+        optim2.step()
+        optim2.zero_grad()
+
+    # Compare final model weights
+    for (n1, p1), (n2, p2) in zip(model1.named_parameters(), model2.named_parameters()):
+        torch.testing.assert_close(
+            p1.data,
+            p2.data,
+            atol=1e-5,
+            rtol=1e-4,
+            msg=f"Parameter {n1} mismatch between offloaded and non-offloaded training",
+        )
+    Utils.destroy_model_parallel()

From 8ac3a9f43c1034c63547c01434c97835febb5234 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Fri, 16 Jan 2026 17:28:06 +0100
Subject: [PATCH 239/334] Revert "[Dev] Optimizer State and Master Weight
 Offloading (#2760)" (#2984)

---
 .../optimizer_state_offloader.py              | 315 ----------------
 megatron/core/optimizer/distrib_optimizer.py  |  25 --
 megatron/core/optimizer/optimizer_config.py   |   6 -
 megatron/training/arguments.py                |  13 -
 megatron/training/training.py                 |  30 +-
 .../test_optimizer_state_offloading.py        | 337 ------------------
 6 files changed, 1 insertion(+), 725 deletions(-)
 delete mode 100644 megatron/core/optimizer/cpu_offloading/optimizer_state_offloader.py
 delete mode 100644 tests/unit_tests/test_optimizer_state_offloading.py

diff --git a/megatron/core/optimizer/cpu_offloading/optimizer_state_offloader.py b/megatron/core/optimizer/cpu_offloading/optimizer_state_offloader.py
deleted file mode 100644
index 81fd116c8ba..00000000000
--- a/megatron/core/optimizer/cpu_offloading/optimizer_state_offloader.py
+++ /dev/null
@@ -1,315 +0,0 @@
-# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
-
-"""Optimizer state offloading class."""
-
-from typing import TYPE_CHECKING, Dict, List, Tuple
-
-import torch
-
-if TYPE_CHECKING:
-    from megatron.core.optimizer.distrib_optimizer import DistributedOptimizer
-
-
-class OptimizerStateOffloader:
-    """
-    Manages offloading of optimizer states and master weights to CPU.
-    Used with DistributedOptimizer to reduce GPU memory usage.
-
-    Supports overlapped D2H/H2D transfers using CUDA streams.
-
-    Master weights can be stored in two locations:
-    - In adam optimizer state (when use_precision_aware_optimizer_no_fp8_or_ds_fp8 is True)
-    - In mcore's shard_fp32_from_float16_groups
-    """
-
-    OPTIMIZER_STATE_KEYS = ('exp_avg', 'exp_avg_sq')
-    MASTER_WEIGHT_KEY = 'master_param'
-
-    def __init__(self, distrib_optimizer: "DistributedOptimizer"):
-        """
-        Args:
-            distrib_optimizer: The DistributedOptimizer to offload states and master weights from.
-        """
-        self.dist_optimizer = distrib_optimizer
-        self.adam_optimizer = distrib_optimizer.optimizer
-
-        # Only support TE FusedAdam optimizer for now.
-        try:
-            from transformer_engine.pytorch.optimizers import FusedAdam
-
-            assert isinstance(self.adam_optimizer, FusedAdam), (
-                f"OptimizerStateOffloader requires TE FusedAdam optimizer, "
-                f"but got {type(self.adam_optimizer).__name__}"
-            )
-        except ImportError:
-            raise ImportError(
-                "OptimizerStateOffloader requires transformer_engine.pytorch.optimizers.FusedAdam"
-            )
-
-        # Check if master weights are stored in adam optimizer state
-        self.optimizer_contains_master_weights = self.adam_optimizer.master_weights
-
-        # CUDA streams for async transfers
-        self._d2h_stream = torch.cuda.Stream()
-        self._h2d_stream = torch.cuda.Stream()
-
-        # CPU buffers for optimizer states: {param: {key: cpu_tensor}}
-        self._opt_state_cpu_buffers: Dict[torch.Tensor, Dict[str, torch.Tensor]] = {}
-
-        # CPU buffers for mcore master weights, matching the structure of source groups
-        # List[List[cpu_tensor]]
-        self._shard_fp32_from_float16_cpu_buffers: List[List[torch.Tensor]] = []
-
-        # State tracking
-        self._offloaded = False
-        self._offloaded_state_keys: Tuple[str, ...] = ()
-        self._offloaded_mcore_master_weights = False
-
-        # Track whether optimizer states (exp_avg, exp_avg_sq) have been initialized.
-        # These are lazily initialized by FusedAdam during the first optimizer.step().
-        # Master weights (shard_fp32_from_float16_groups) are available from the start.
-        self._optimizer_states_initialized = False
-
-    def mark_optimizer_states_initialized(self):
-        """
-        Mark that optimizer states (exp_avg, exp_avg_sq) are now available.
-        Should be called after the first optimizer.step() completes.
-        """
-        self._optimizer_states_initialized = True
-
-    def _get_state_keys_to_offload(
-        self, offload_optimizer_states: bool, offload_master_weights: bool
-    ) -> Tuple[str, ...]:
-        """Get the state keys in FusedAdam to offload based on configuration."""
-        keys = []
-        # Skip optimizer states offloading if they haven't been initialized yet.
-        # Optimizer states are lazily initialized by FusedAdam during the first optimizer.step().
-        if self._optimizer_states_initialized:
-            if offload_optimizer_states:
-                keys.extend(self.OPTIMIZER_STATE_KEYS)
-            if offload_master_weights and self.optimizer_contains_master_weights:
-                keys.append(self.MASTER_WEIGHT_KEY)
-        return tuple(keys)
-
-    def _ensure_state_cpu_buffer(
-        self, param: torch.Tensor, state_key: str, gpu_tensor: torch.Tensor, pin_memory: bool = True
-    ) -> torch.Tensor:
-        """Get or create a CPU buffer for a state tensor."""
-        if param not in self._opt_state_cpu_buffers:
-            self._opt_state_cpu_buffers[param] = {}
-
-        if state_key not in self._opt_state_cpu_buffers[param]:
-            cpu_buffer = torch.empty(
-                gpu_tensor.size(),
-                dtype=gpu_tensor.dtype,
-                layout=gpu_tensor.layout,
-                device='cpu',
-                pin_memory=pin_memory,
-            )
-            self._opt_state_cpu_buffers[param][state_key] = cpu_buffer
-
-        return self._opt_state_cpu_buffers[param][state_key]
-
-    def _offload_shard_groups(
-        self,
-        shard_groups: List[List[torch.Tensor]],
-        cpu_buffers: List[List[torch.Tensor]],
-        pin_memory: bool = True,
-    ):
-        """Offload a shard group to CPU buffers."""
-        # Initialize CPU buffers on first call
-        if len(cpu_buffers) == 0:
-            for group in shard_groups:
-                group_buffers = []
-                for gpu_tensor in group:
-                    cpu_buffer = torch.empty(
-                        gpu_tensor.size(),
-                        dtype=gpu_tensor.dtype,
-                        layout=gpu_tensor.layout,
-                        device='cpu',
-                        pin_memory=pin_memory,
-                    )
-                    group_buffers.append(cpu_buffer)
-                cpu_buffers.append(group_buffers)
-
-        # Copy D2H
-        for group_idx, group in enumerate(shard_groups):
-            for param_idx, gpu_tensor in enumerate(group):
-                cpu_buffer = cpu_buffers[group_idx][param_idx]
-                cpu_buffer.copy_(gpu_tensor, non_blocking=pin_memory)
-                gpu_tensor.record_stream(self._d2h_stream)
-
-    def _offload_states(
-        self,
-        offload_optimizer_states: bool,
-        offload_master_weights: bool,
-        use_pin_memory: bool = True,
-    ):
-        """Offload optimizer states and/or master weights to CPU."""
-        # Offload states from adam optimizer
-        self._offloaded_state_keys = self._get_state_keys_to_offload(
-            offload_optimizer_states, offload_master_weights
-        )
-        states = self.adam_optimizer.state
-
-        for param, param_state in states.items():
-            for state_key in self._offloaded_state_keys:
-                if state_key not in param_state:
-                    continue
-
-                gpu_tensor = param_state[state_key]
-                if not isinstance(gpu_tensor, torch.Tensor) or not gpu_tensor.is_cuda:
-                    continue
-
-                cpu_buffer = self._ensure_state_cpu_buffer(
-                    param, state_key, gpu_tensor, use_pin_memory
-                )
-                cpu_buffer.copy_(gpu_tensor, non_blocking=use_pin_memory)
-                gpu_tensor.record_stream(self._d2h_stream)
-
-        # Offload mcore master weights if not in optimizer state
-        if offload_master_weights and not self.optimizer_contains_master_weights:
-            self._offload_shard_groups(
-                self.dist_optimizer.shard_fp32_from_float16_groups,
-                self._shard_fp32_from_float16_cpu_buffers,
-                use_pin_memory,
-            )
-            self._offloaded_mcore_master_weights = True
-
-    def _release_states(self):
-        """Replace optimizer state GPU tensors with CPU tensors to free GPU memory."""
-        states = self.adam_optimizer.state
-
-        for param, param_state in states.items():
-            if param not in self._opt_state_cpu_buffers:
-                continue
-
-            for state_key in self._offloaded_state_keys:
-                if state_key not in self._opt_state_cpu_buffers[param]:
-                    continue
-
-                param_state[state_key].untyped_storage().resize_(0)
-
-        if self._offloaded_mcore_master_weights:
-            for group in self.dist_optimizer.shard_fp32_from_float16_groups:
-                for gpu_tensor in group:
-                    gpu_tensor.untyped_storage().resize_(0)
-
-    def _reload_shard_groups(
-        self,
-        shard_groups: List[List[torch.Tensor]],
-        cpu_buffers: List[List[torch.Tensor]],
-        is_allocate_stage: bool,
-    ):
-        """Reload shard groups from CPU to GPU."""
-        for group_idx, group in enumerate(shard_groups):
-            for param_idx, _ in enumerate(group):
-                cpu_buffer = cpu_buffers[group_idx][param_idx]
-                if is_allocate_stage:
-                    shard_groups[group_idx][param_idx].untyped_storage().resize_(
-                        cpu_buffer.untyped_storage().size()
-                    )
-                else:
-                    shard_groups[group_idx][param_idx].copy_(
-                        cpu_buffer, non_blocking=cpu_buffer.is_pinned()
-                    )
-
-    def _reload_states(self, is_allocate_stage: bool):
-        """
-        Reload optimizer states and/or master weights from CPU to GPU.
-
-        If is_allocate_stage is True, only allocate GPU memory for the states and master weights,
-        but do not copy the data from CPU to GPU. Otherwise, copy the data from CPU to GPU.
-        The two processes are separated to make sure that the GPU memory is allocated on the
-        default stream to avoid fragmentation.
-        """
-        # Reload states to adam optimizer
-        states = self.adam_optimizer.state
-
-        for param, param_state in states.items():
-            if param not in self._opt_state_cpu_buffers:
-                continue
-
-            for state_key in self._offloaded_state_keys:
-                if state_key not in self._opt_state_cpu_buffers[param]:
-                    continue
-
-                cpu_buffer = self._opt_state_cpu_buffers[param][state_key]
-                if is_allocate_stage:
-                    param_state[state_key].untyped_storage().resize_(
-                        cpu_buffer.untyped_storage().size()
-                    )
-                else:
-                    param_state[state_key].copy_(cpu_buffer, non_blocking=cpu_buffer.is_pinned())
-
-        # Reload mcore master weights if not in optimizer state
-        if self._offloaded_mcore_master_weights:
-            self._reload_shard_groups(
-                self.dist_optimizer.shard_fp32_from_float16_groups,
-                self._shard_fp32_from_float16_cpu_buffers,
-                is_allocate_stage,
-            )
-
-    def offload(self, offload_optimizer_states: bool = True, offload_master_weights: bool = True):
-        """
-        Offload optimizer states and/or master weights to CPU.
-        Starts async D2H transfer that can overlap with other operations.
-
-        Args:
-            offload_optimizer_states: Whether to offload exp_avg, exp_avg_sq.
-            offload_master_weights: Whether to offload master weights.
-        """
-        if not offload_optimizer_states and not offload_master_weights:
-            return
-
-        # Wait for current stream finishing updating the optimizer states.
-        self._d2h_stream.wait_stream(torch.cuda.current_stream())
-
-        with torch.cuda.stream(self._d2h_stream):
-            self._offload_states(offload_optimizer_states, offload_master_weights)
-
-        self._offloaded = True
-
-    def release_gpu_memory(self):
-        """
-        Release GPU memory for optimizer states and master weights after D2H copy completes.
-
-        This is separated from offload() to allow delayed GPU memory release,
-        which is needed for mxfp8 + overlap_param_gather case where master weights
-        must remain on GPU until after _copy_main_params_to_param_buffer() is called.
-        """
-        if not self._offloaded:
-            return
-
-        self._release_states()
-
-    def reload(self):
-        """
-        Reload optimizer states and/or master weights from CPU to GPU.
-        Call before optimizer.step() to ensure states are on GPU.
-        """
-        if not self._offloaded:
-            return
-
-        # Allocate GPU memory on the current stream to avoid fragmentation.
-        self._reload_states(is_allocate_stage=True)
-
-        self._h2d_stream.wait_stream(self._d2h_stream)
-        self._h2d_stream.wait_stream(torch.cuda.current_stream())
-
-        # Reload states on the h2d stream to overlap with other operations.
-        with torch.cuda.stream(self._h2d_stream):
-            self._reload_states(is_allocate_stage=False)
-
-        self._offloaded_state_keys = ()
-        self._offloaded_mcore_master_weights = False
-        self._offloaded = False
-
-    def sync_before_step(self):
-        """
-        Wait for H2D reload to complete before optimizer.step().
-        Must be called to ensure states are on GPU before optimizer uses them.
-
-        This is separated from reload() to make it possible to move the reload ahead of time.
-        """
-        torch.cuda.current_stream().wait_stream(self._h2d_stream)
diff --git a/megatron/core/optimizer/distrib_optimizer.py b/megatron/core/optimizer/distrib_optimizer.py
index 9536bc4f9ef..6e093f96f7e 100644
--- a/megatron/core/optimizer/distrib_optimizer.py
+++ b/megatron/core/optimizer/distrib_optimizer.py
@@ -49,7 +49,6 @@
 from ..fp8_utils import dequantize_fp8_tensor, is_float8tensor, quantize_param_shard
 from ..transformer.fsdp_dtensor_checkpoint import handle_experts_in_state_dict
 from ..transformer.module import MegatronModule
-from .cpu_offloading.optimizer_state_offloader import OptimizerStateOffloader
 from .grad_scaler import MegatronGradScaler
 from .optimizer import MixedPrecisionOptimizer, _zero_grad_group_helper, param_group_identifier_keys
 from .optimizer_config import OptimizerConfig
@@ -605,10 +604,6 @@ def __init__(
             self.optimizer.param_groups = [g["orig_group"] for g in self.opt_group_ranges]
             self.optimizer.load_state_dict(self.optimizer.state_dict())
 
-        self._state_offloader: Optional[OptimizerStateOffloader] = None
-        if self.config.offload_optimizer_states:
-            self._state_offloader = OptimizerStateOffloader(self)
-
     def _get_model_param_range_map(self, param: torch.nn.Parameter):
         """
         Given a model param, get the index sub-range of the param that this
@@ -2585,8 +2580,6 @@ def step_with_ready_grads(self) -> bool:
         Under the hood, either launch synchronous param all-gathers or get ready to launch
         asynchorous all-gathers that get overlapped with the next forward pass.
         """
-        if self._state_offloader is not None:
-            self._state_offloader.sync_before_step()
         update_successful = super().step_with_ready_grads()
 
         timers = self.config.timers
@@ -2607,22 +2600,4 @@ def step_with_ready_grads(self) -> bool:
         if timers is not None:
             timers('params-all-gather').stop()
 
-        if self._state_offloader is not None:
-            self._state_offloader.mark_optimizer_states_initialized()
-
         return update_successful
-
-    def offload_states(self):
-        """Offload states to CPU."""
-        if self._state_offloader is not None:
-            self._state_offloader.offload()
-
-    def reload_offloaded_states(self):
-        """Start async reload of offloaded states."""
-        if self._state_offloader is not None:
-            self._state_offloader.reload()
-
-    def release_offloaded_gpu_states(self):
-        """Release GPU memory after D2H completes. For delayed release case."""
-        if self._state_offloader is not None:
-            self._state_offloader.release_gpu_memory()
diff --git a/megatron/core/optimizer/optimizer_config.py b/megatron/core/optimizer/optimizer_config.py
index 1813488d7bd..679878ed954 100644
--- a/megatron/core/optimizer/optimizer_config.py
+++ b/megatron/core/optimizer/optimizer_config.py
@@ -266,12 +266,6 @@ class OptimizerConfig:
     pin_cpu_params: bool = True
     """If True, pin the optimizer parameters to CPU memory."""
 
-    offload_optimizer_states: bool = False
-    """
-    If True, offload optimizer states to CPU after each optimizer step and
-    reload them before the next optimizer step.
-    """
-
     ################
     # Miscellaneous
     ################
diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py
index 8a70772cc3d..5f9e7350c18 100644
--- a/megatron/training/arguments.py
+++ b/megatron/training/arguments.py
@@ -1271,11 +1271,6 @@ def validate_args(args, defaults={}):
             "must be used in conjunction with `--fp8-recipe delayed`."
         )
 
-    if args.offload_optimizer_states:
-        assert args.use_distributed_optimizer, "offload_optimizer_states is only supported with distributed optimizer"
-        assert args.optimizer == 'adam', "offload_optimizer_states is only supported with adam optimizer"
-        assert not args.use_megatron_fsdp, "offload_optimizer_states does not support Megatron-FSDP for now."
-
     if args.non_persistent_ckpt_type == "local":
         assert args.non_persistent_local_ckpt_dir is not None, "Tried to use local checkpointing without specifying --local-ckpt-dir!"
     if args.replication:
@@ -2391,14 +2386,6 @@ def _add_training_args(parser):
                        help='Disable pinning of CPU memory for gradients.')
     group.add_argument('--no-pin-cpu-params', action='store_false', dest='pin_cpu_params',
                        help='Disable pinning of CPU memory for parameters.')
-    group.add_argument('--offload-optimizer-states',
-                       action='store_true',
-                       dest='offload_optimizer_states',
-                       help='Offload optimizer states to CPU after each optimizer step and '
-                       'reload them before the next optimizer step. '
-                       'Only support TE FusedAdam optimizer.'
-                       'Note that this still uses pure GPU optimizer instead of '
-                       'HybridDeviceOptimizer for --optimizer-cpu-offload.')
     group.add_argument('--dataloader-type', type=str, default=None,
                        choices=['single', 'cyclic', 'external'],
                        help='Single pass vs multiple pass data loader')
diff --git a/megatron/training/training.py b/megatron/training/training.py
index 8aff2556d14..845d271f62e 100644
--- a/megatron/training/training.py
+++ b/megatron/training/training.py
@@ -1425,12 +1425,6 @@ def train_step(forward_step_func, data_iterator, model, optimizer, opt_param_sch
 
     rerun_state_machine = get_rerun_state_machine()
     while rerun_state_machine.should_run_forward_backward(data_iterator):
-        # Offload optimizer states to CPU if enabled.
-        if args.offload_optimizer_states:
-            for optim_instance in optimizer.chained_optimizers:
-                if isinstance(optim_instance, DistributedOptimizer):
-                    optim_instance.offload_states()
-
         # Set grad to zero.
         for model_chunk in model:
             model_chunk.zero_grad_buffer()
@@ -1464,14 +1458,6 @@ def train_step(forward_step_func, data_iterator, model, optimizer, opt_param_sch
                     if isinstance(optim_instance, DistributedOptimizer):
                         optim_instance._copy_main_params_to_param_buffer()
 
-        # Release GPU memory for offloaded optimizer states.
-        # This needs to be done after _copy_main_params_to_param_buffer().
-        # Separate offload and release to allow early D2H transfer to overlap with other operations.
-        if args.offload_optimizer_states:
-            for optim_instance in optimizer.chained_optimizers:
-                if isinstance(optim_instance, DistributedOptimizer):
-                    optim_instance.release_offloaded_gpu_states()
-
         # Forward pass.
         losses_reduced = forward_backward_func(
             forward_step_func=forward_step_func,
@@ -2319,21 +2305,7 @@ def train(
         config.param_sync_func = [model_chunk.start_param_sync for model_chunk in model]
         if len(model) == 1:
             config.param_sync_func = config.param_sync_func[0]
-
-    # Wrap finalize_model_grads to reload offloaded optimizer states before grad finalization.
-    # This allows H2D transfer to overlap with grad all-reduce.
-    if args.offload_optimizer_states:
-
-        def finalize_model_grads_with_state_reload(*fmg_args, **fmg_kwargs):
-            # Reload offloaded states for all DistributedOptimizer instances
-            for optim_instance in optimizer.chained_optimizers:
-                if isinstance(optim_instance, DistributedOptimizer):
-                    optim_instance.reload_offloaded_states()
-            return finalize_model_grads(*fmg_args, **fmg_kwargs)
-
-        config.finalize_model_grads_func = finalize_model_grads_with_state_reload
-    else:
-        config.finalize_model_grads_func = finalize_model_grads
+    config.finalize_model_grads_func = finalize_model_grads
 
     if args.log_energy:
         energy_monitor.setup()
diff --git a/tests/unit_tests/test_optimizer_state_offloading.py b/tests/unit_tests/test_optimizer_state_offloading.py
deleted file mode 100644
index baaab355182..00000000000
--- a/tests/unit_tests/test_optimizer_state_offloading.py
+++ /dev/null
@@ -1,337 +0,0 @@
-# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
-
-"""Unit tests for OptimizerStateOffloader."""
-
-import pytest
-import torch
-import torch.nn as nn
-
-from megatron.core.distributed import DistributedDataParallel, DistributedDataParallelConfig
-from megatron.core.optimizer import OptimizerConfig, get_megatron_optimizer
-from megatron.core.transformer import TransformerConfig
-from tests.unit_tests.test_utilities import Utils
-
-try:
-    from transformer_engine.pytorch.optimizers import FusedAdam  # noqa: F401
-
-    TE_FUSED_ADAM_AVAILABLE = True
-except ImportError:
-    TE_FUSED_ADAM_AVAILABLE = False
-
-
-class SimpleModel(nn.Module):
-    """Simple model for testing."""
-
-    def __init__(self, hidden_size=256):
-        super().__init__()
-        self.fc1 = nn.Linear(hidden_size, hidden_size)
-        self.fc2 = nn.Linear(hidden_size, hidden_size)
-
-    def forward(self, x):
-        return self.fc2(torch.relu(self.fc1(x)))
-
-
-def create_model_and_optimizer(hidden_size=256, offload_optimizer_states=True, **optimizer_kwargs):
-    """Helper to create model and optimizer for tests."""
-    model = SimpleModel(hidden_size=hidden_size).bfloat16().cuda()
-    ddp_config = DistributedDataParallelConfig(use_distributed_optimizer=True)
-    model = DistributedDataParallel(
-        TransformerConfig(num_attention_heads=1, num_layers=1), ddp_config, model
-    )
-
-    default_config = dict(
-        optimizer='adam',
-        bf16=True,
-        lr=0.001,
-        use_distributed_optimizer=True,
-        offload_optimizer_states=offload_optimizer_states,
-    )
-    default_config.update(optimizer_kwargs)
-
-    optimizer_config = OptimizerConfig(**default_config)
-    optim = get_megatron_optimizer(optimizer_config, [model])
-    return model, optim
-
-
-def run_forward_backward_step(model, optim, hidden_size=256):
-    """Run a single forward-backward-step cycle."""
-    input_tensor = torch.randn(8, hidden_size, dtype=torch.bfloat16, device='cuda')
-    output = model(input_tensor)
-    output.sum().backward()
-    optim.step()
-    optim.zero_grad()
-
-
-# =============================================================================
-# Test 1: Basic OptimizerStateOffloader Initialization
-# =============================================================================
-@pytest.mark.skipif(not TE_FUSED_ADAM_AVAILABLE, reason="Requires TE FusedAdam")
-def test_offloader_initialization():
-    """Test that OptimizerStateOffloader initializes correctly."""
-    Utils.initialize_model_parallel()
-    model, optim = create_model_and_optimizer()
-    dist_optim = optim.chained_optimizers[0]
-
-    # Offloader is created in __init__ when offload_optimizer_states=True
-    assert dist_optim._state_offloader is not None
-    offloader = dist_optim._state_offloader
-
-    # Verify offloader properties
-    assert offloader.adam_optimizer is not None
-    assert offloader._d2h_stream is not None
-    assert offloader._h2d_stream is not None
-    assert offloader._offloaded is False
-
-    # Before first step, optimizer states are not initialized yet
-    assert offloader._optimizer_states_initialized is False
-
-    # Run one step to initialize optimizer states
-    run_forward_backward_step(model, optim)
-
-    # After first step, optimizer states should be marked as initialized
-    assert offloader._optimizer_states_initialized is True
-    Utils.destroy_model_parallel()
-
-
-# =============================================================================
-# Test 2: Early Master Weight Offloading Before First Step
-# =============================================================================
-@pytest.mark.skipif(not TE_FUSED_ADAM_AVAILABLE, reason="Requires TE FusedAdam")
-def test_early_master_weight_offloading():
-    """Test that master weights can be offloaded before the first optimizer step."""
-    Utils.initialize_model_parallel()
-    model, optim = create_model_and_optimizer()
-    dist_optim = optim.chained_optimizers[0]
-
-    # Offloader is created in __init__
-    assert dist_optim._state_offloader is not None
-    offloader = dist_optim._state_offloader
-
-    # Before first step, optimizer states are not initialized
-    assert offloader._optimizer_states_initialized is False
-
-    # Capture original master weights before offload
-    original_master_weights = []
-    for group in dist_optim.shard_fp32_from_float16_groups:
-        group_weights = [tensor.clone() for tensor in group]
-        original_master_weights.append(group_weights)
-
-    # Offload before first step - should only offload master weights
-    offloader.offload()
-    offloader.release_gpu_memory()
-    torch.cuda.synchronize()
-
-    # Verify master weights were offloaded (storage resized to 0)
-    for group in dist_optim.shard_fp32_from_float16_groups:
-        for tensor in group:
-            assert tensor.untyped_storage().size() == 0, "Master weight should be offloaded"
-
-    # Reload master weights
-    offloader.reload()
-    offloader.sync_before_step()
-
-    # Verify master weights match after reload
-    for group_idx, group in enumerate(dist_optim.shard_fp32_from_float16_groups):
-        for param_idx, tensor in enumerate(group):
-            original = original_master_weights[group_idx][param_idx]
-            torch.testing.assert_close(
-                tensor,
-                original,
-                msg=f"Master weight [{group_idx}][{param_idx}] mismatch after offload/reload",
-            )
-
-    # Now run a step and verify optimizer states can be offloaded after
-    run_forward_backward_step(model, optim)
-    assert offloader._optimizer_states_initialized is True
-
-    Utils.destroy_model_parallel()
-
-
-# =============================================================================
-# Test 3: Offload and Reload Correctness
-# =============================================================================
-@pytest.mark.skipif(not TE_FUSED_ADAM_AVAILABLE, reason="Requires TE FusedAdam")
-@pytest.mark.parametrize("offload_optimizer_states", [True, False])
-@pytest.mark.parametrize("offload_master_weights", [True, False])
-def test_offload_reload_correctness(offload_optimizer_states, offload_master_weights):
-    """Test that offload/reload preserves optimizer state values."""
-    if not offload_optimizer_states and not offload_master_weights:
-        pytest.skip("At least one offload type required")
-
-    Utils.initialize_model_parallel()
-    model, optim = create_model_and_optimizer()
-    dist_optim = optim.chained_optimizers[0]
-
-    # Run steps to build up optimizer state
-    for _ in range(3):
-        run_forward_backward_step(model, optim)
-
-    offloader = dist_optim._state_offloader
-
-    # Capture original states before offload
-    original_states = {}
-    for param, state in offloader.adam_optimizer.state.items():
-        original_states[param] = {
-            k: v.clone() for k, v in state.items() if isinstance(v, torch.Tensor)
-        }
-
-    # Offload
-    offloader.offload(
-        offload_optimizer_states=offload_optimizer_states,
-        offload_master_weights=offload_master_weights,
-    )
-
-    # Release GPU memory
-    offloader.release_gpu_memory()
-    torch.cuda.synchronize()
-
-    # Reload
-    offloader.reload()
-    offloader.sync_before_step()
-
-    # Verify states match after reload
-    for param, state in offloader.adam_optimizer.state.items():
-        if param in original_states:
-            for key, original_tensor in original_states[param].items():
-                if key in state and isinstance(state[key], torch.Tensor):
-                    reloaded_tensor = state[key]
-                    assert reloaded_tensor.device.type == 'cuda', f"State {key} should be on GPU"
-                    torch.testing.assert_close(
-                        reloaded_tensor,
-                        original_tensor,
-                        msg=f"State {key} mismatch after offload/reload",
-                    )
-    Utils.destroy_model_parallel()
-
-
-# =============================================================================
-# Test 4: GPU Memory Release Verification
-# =============================================================================
-@pytest.mark.skipif(not TE_FUSED_ADAM_AVAILABLE, reason="Requires TE FusedAdam")
-def test_gpu_memory_release():
-    """Test that GPU memory is actually freed after release_gpu_memory()."""
-    Utils.initialize_model_parallel()
-    # Use larger model for measurable memory impact
-    model, optim = create_model_and_optimizer(hidden_size=1024)
-    dist_optim = optim.chained_optimizers[0]
-
-    # Initialize optimizer states
-    run_forward_backward_step(model, optim, hidden_size=1024)
-
-    offloader = dist_optim._state_offloader
-
-    # Measure memory before offload
-    torch.cuda.synchronize()
-    torch.cuda.empty_cache()
-    memory_before = torch.cuda.memory_allocated()
-
-    # Offload and release
-    offloader.offload()
-    offloader.release_gpu_memory()
-
-    # Wait for async operations
-    torch.cuda.synchronize()
-    torch.cuda.empty_cache()
-    memory_after = torch.cuda.memory_allocated()
-
-    # Memory should decrease
-    memory_freed = memory_before - memory_after
-    assert memory_freed > 0, f"Expected memory to be freed, but got {memory_freed} bytes difference"
-    Utils.destroy_model_parallel()
-
-
-# =============================================================================
-# Test 5: Multiple Offload/Reload Cycles
-# =============================================================================
-@pytest.mark.skipif(not TE_FUSED_ADAM_AVAILABLE, reason="Requires TE FusedAdam")
-def test_multiple_offload_reload_cycles():
-    """Test that multiple offload/reload cycles work correctly."""
-    Utils.initialize_model_parallel()
-    model, optim = create_model_and_optimizer()
-    dist_optim = optim.chained_optimizers[0]
-
-    # Initialize
-    run_forward_backward_step(model, optim)
-
-    offloader = dist_optim._state_offloader
-
-    # Run multiple cycles
-    for cycle in range(5):
-        # Offload
-        offloader.offload()
-        offloader.release_gpu_memory()
-
-        # Reload
-        offloader.reload()
-        offloader.sync_before_step()
-
-        # Run optimizer step
-        run_forward_backward_step(model, optim)
-
-    # Verify model can still produce valid outputs
-    input_tensor = torch.randn(8, 256, dtype=torch.bfloat16, device='cuda')
-    output = model(input_tensor)
-    assert not output.isnan().any(), "Model output contains NaN after multiple cycles"
-    Utils.destroy_model_parallel()
-
-
-# =============================================================================
-# Test 6: Training Correctness with Offloading
-# =============================================================================
-@pytest.mark.skipif(not TE_FUSED_ADAM_AVAILABLE, reason="Requires TE FusedAdam")
-def test_training_correctness_with_offloading():
-    """Test that training with offloading produces same results as without."""
-    Utils.initialize_model_parallel()
-    torch.manual_seed(42)
-
-    # Model 1: with offloading
-    model1, optim1 = create_model_and_optimizer(offload_optimizer_states=True, lr=0.01)
-
-    # Model 2: without offloading (reference)
-    torch.manual_seed(42)
-    model2, optim2 = create_model_and_optimizer(offload_optimizer_states=False, lr=0.01)
-
-    # Train both models
-    n_steps = 10
-    torch.manual_seed(123)
-    dist_optim1 = optim1.chained_optimizers[0]
-
-    # Offloader is created in __init__ when offload_optimizer_states=True
-    assert dist_optim1._state_offloader is not None
-    offloader = dist_optim1._state_offloader
-
-    for step in range(n_steps):
-        input_tensor = torch.randn(8, 256, dtype=torch.bfloat16, device='cuda')
-
-        # Model 1 with offloading
-        # Offload states (master weights can be offloaded from the start,
-        # optimizer states will be skipped until after first step)
-        offloader.offload()
-        offloader.release_gpu_memory()
-
-        output1 = model1(input_tensor)
-        loss1 = output1.sum()
-        loss1.backward()
-
-        offloader.reload()
-        offloader.sync_before_step()
-        optim1.step()
-        optim1.zero_grad()
-
-        # Model 2 without offloading
-        output2 = model2(input_tensor)
-        loss2 = output2.sum()
-        loss2.backward()
-        optim2.step()
-        optim2.zero_grad()
-
-    # Compare final model weights
-    for (n1, p1), (n2, p2) in zip(model1.named_parameters(), model2.named_parameters()):
-        torch.testing.assert_close(
-            p1.data,
-            p2.data,
-            atol=1e-5,
-            rtol=1e-4,
-            msg=f"Parameter {n1} mismatch between offloaded and non-offloaded training",
-        )
-    Utils.destroy_model_parallel()

From bd8411c39332651120ce7505bb64b37d73075801 Mon Sep 17 00:00:00 2001
From: Nan Zheng <80790206+nanz-nv@users.noreply.github.com>
Date: Mon, 19 Jan 2026 15:36:47 +0800
Subject: [PATCH 240/334] Forced load imbalance (#2917)

Co-authored-by: Dennis(Zhenhuan) Liu <denliu@nvidia.com>
---
 megatron/core/transformer/moe/moe_utils.py    | 54 ++++++++++++++++++-
 megatron/core/transformer/moe/router.py       |  7 +++
 .../core/transformer/transformer_config.py    |  7 +++
 megatron/training/arguments.py                |  6 +++
 4 files changed, 73 insertions(+), 1 deletion(-)

diff --git a/megatron/core/transformer/moe/moe_utils.py b/megatron/core/transformer/moe/moe_utils.py
index d38b06b2704..60878155fd4 100644
--- a/megatron/core/transformer/moe/moe_utils.py
+++ b/megatron/core/transformer/moe/moe_utils.py
@@ -10,7 +10,11 @@
 from megatron.core.fp4_utils import get_fp4_align_size
 from megatron.core.fp8_utils import get_fp8_align_size
 from megatron.core.process_groups_config import ProcessGroupCollection
-from megatron.core.tensor_parallel import get_cuda_rng_tracker, get_expert_parallel_rng_tracker_name
+from megatron.core.tensor_parallel import (
+    get_cuda_rng_tracker,
+    get_data_parallel_rng_tracker_name,
+    get_expert_parallel_rng_tracker_name,
+)
 from megatron.core.tensor_parallel.mappings import reduce_from_tensor_model_parallel_region
 from megatron.core.transformer.cuda_graphs import is_graph_capturing
 from megatron.core.transformer.enums import CudaGraphScope
@@ -1021,6 +1025,54 @@ def apply_random_logits(logits):
     return RandomSTE.apply(logits)
 
 
+@internal_api
+class RandomSTEShared(torch.autograd.Function):
+    """
+    STE that generates random values with shared seed across all ranks.
+    When std < 0, caches and reuses values per layer.
+    """
+
+    _cache = {}
+
+    @staticmethod
+    def forward(ctx, logits, std, layer_number):
+        """Forward pass: apply random bias to logits."""
+        # Check cache if reuse mode (negative std)
+        if std < 0 and layer_number in RandomSTEShared._cache:
+            return logits + RandomSTEShared._cache[layer_number]
+
+        # Generate random bias with shared seed across all ranks
+        with get_cuda_rng_tracker().fork(get_data_parallel_rng_tracker_name()):
+            bias = torch.empty(logits.shape[-1], device=logits.device, dtype=logits.dtype).normal_(
+                std=abs(std)
+            )
+
+        # Cache if reuse mode
+        if std < 0 and layer_number is not None:
+            RandomSTEShared._cache[layer_number] = bias
+
+        return logits + bias
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        """Backward pass: pass through gradients."""
+        return grad_output, None, None
+
+
+def apply_biased_logits(logits, std, layer_number=None):
+    """
+    Apply random bias to logits. All ranks get the same random values.
+
+    Args:
+        logits: Input logits tensor [num_tokens, num_experts]
+        std: Standard deviation for random bias. If negative, generate once
+             per layer and reuse (using abs(std) as actual std).
+        layer_number: Layer number for caching when std is negative.
+    """
+    logits = apply_random_logits(logits)
+    return RandomSTEShared.apply(logits, std, layer_number)
+
+
 class RouterGatingLinearFunction(torch.autograd.Function):
     """
     Autograd function for router gating linear.
diff --git a/megatron/core/transformer/moe/router.py b/megatron/core/transformer/moe/router.py
index bbfb01fec8b..003043bc18d 100644
--- a/megatron/core/transformer/moe/router.py
+++ b/megatron/core/transformer/moe/router.py
@@ -10,6 +10,7 @@
 from megatron.core.transformer.moe.moe_utils import (
     MoEAuxLossAutoScaler,
     ProcessGroupCollection,
+    apply_biased_logits,
     apply_random_logits,
     apply_router_token_dropping,
     compute_routing_scores_for_aux_loss,
@@ -654,6 +655,12 @@ def forward(self, input: torch.Tensor, padding_mask: Optional[torch.Tensor] = No
             # Apply force load balancing with random logits for benchmark
             logits = apply_random_logits(logits)
 
+        if self.config.moe_router_force_biased is not None:
+            # Apply biased logits with shared random bias across all ranks
+            logits = apply_biased_logits(
+                logits, self.config.moe_router_force_biased, self.layer_number
+            )
+
         probs, routing_map = self.routing(logits, padding_mask=padding_mask)
 
         return probs, routing_map
diff --git a/megatron/core/transformer/transformer_config.py b/megatron/core/transformer/transformer_config.py
index df11daeb095..18cea44c51f 100644
--- a/megatron/core/transformer/transformer_config.py
+++ b/megatron/core/transformer/transformer_config.py
@@ -616,6 +616,13 @@ class TransformerConfig(ModelParallelConfig):
     """[Experimental] Force load balancing with random logits for MoE router, supports naive topk 
     and group-limited topk. This is an experimental feature and only for benchmark."""
 
+    moe_router_force_biased: Optional[float] = None
+    """[Experimental] Apply random expert bias in normal distribution with specified std
+    to router logits. Shared seed across all ranks ensures identical bias.
+    If positive, generates new random bias each forward pass.
+    If negative, generates bias once per layer and reuses it (abs value is std).
+    This is an experimental feature for benchmarking purposes."""
+
     moe_grouped_gemm: bool = False
     """When there are multiple experts per rank, compress multiple local (potentially small) gemms
     in a single kernel launch to improve the utilization and performance by leveraging the Grouped
diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py
index 5f9e7350c18..096d63985d9 100644
--- a/megatron/training/arguments.py
+++ b/megatron/training/arguments.py
@@ -3332,6 +3332,12 @@ def _add_moe_args(parser):
                        'The default value 1e-3 is same as that used in DeepSeekV3.')
     group.add_argument('--moe-router-force-load-balancing', action='store_true',
                        help='[Experimental] Force override routing to balance token distribution using random logits for MoE routers, supporting naive top-k and group-limited top-k. This experimental feature is for benchmarking purposes only!')
+    group.add_argument('--moe-router-force-biased', type=float, default=None,
+                       help='[Experimental] Apply random expert bias in normal distribution with specified std to router logits. '
+                       'Shared seed across all ranks ensures identical bias. '
+                       'If positive, generates new random bias each forward pass. '
+                       'If negative, generates bias once per layer and reuses it (abs value is std). '
+                       'This experimental feature is for benchmarking purposes only!')
     group.add_argument('--moe-router-padding-for-quantization', action='store_true',
                        help='Pad the routing_map to make sure the number of tokens each expert received '
                        'is a multiple of 16/32 for FP8/FP4 precision. It is suggested to enable this for '

From 0a2e01fdcade766a9d1ebd0119387ba159358b61 Mon Sep 17 00:00:00 2001
From: hx <hongxiaob@nvidia.com>
Date: Mon, 19 Jan 2026 15:51:50 +0800
Subject: [PATCH 241/334] [Dev] [Reapply] Optimizer State and Master Weight
 Offloading (#2987)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Xin Yao <xiny@nvidia.com>
Co-authored-by: oliver könig <okoenig@nvidia.com>
---
 .../optimizer_state_offloader.py              | 315 ++++++++++++++++
 megatron/core/optimizer/distrib_optimizer.py  |  26 ++
 megatron/core/optimizer/optimizer_config.py   |   6 +
 megatron/training/arguments.py                |  13 +
 megatron/training/training.py                 |  30 +-
 .../test_optimizer_state_offloading.py        | 337 ++++++++++++++++++
 6 files changed, 726 insertions(+), 1 deletion(-)
 create mode 100644 megatron/core/optimizer/cpu_offloading/optimizer_state_offloader.py
 create mode 100644 tests/unit_tests/test_optimizer_state_offloading.py

diff --git a/megatron/core/optimizer/cpu_offloading/optimizer_state_offloader.py b/megatron/core/optimizer/cpu_offloading/optimizer_state_offloader.py
new file mode 100644
index 00000000000..81fd116c8ba
--- /dev/null
+++ b/megatron/core/optimizer/cpu_offloading/optimizer_state_offloader.py
@@ -0,0 +1,315 @@
+# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+
+"""Optimizer state offloading class."""
+
+from typing import TYPE_CHECKING, Dict, List, Tuple
+
+import torch
+
+if TYPE_CHECKING:
+    from megatron.core.optimizer.distrib_optimizer import DistributedOptimizer
+
+
+class OptimizerStateOffloader:
+    """
+    Manages offloading of optimizer states and master weights to CPU.
+    Used with DistributedOptimizer to reduce GPU memory usage.
+
+    Supports overlapped D2H/H2D transfers using CUDA streams.
+
+    Master weights can be stored in two locations:
+    - In adam optimizer state (when use_precision_aware_optimizer_no_fp8_or_ds_fp8 is True)
+    - In mcore's shard_fp32_from_float16_groups
+    """
+
+    OPTIMIZER_STATE_KEYS = ('exp_avg', 'exp_avg_sq')
+    MASTER_WEIGHT_KEY = 'master_param'
+
+    def __init__(self, distrib_optimizer: "DistributedOptimizer"):
+        """
+        Args:
+            distrib_optimizer: The DistributedOptimizer to offload states and master weights from.
+        """
+        self.dist_optimizer = distrib_optimizer
+        self.adam_optimizer = distrib_optimizer.optimizer
+
+        # Only support TE FusedAdam optimizer for now.
+        try:
+            from transformer_engine.pytorch.optimizers import FusedAdam
+
+            assert isinstance(self.adam_optimizer, FusedAdam), (
+                f"OptimizerStateOffloader requires TE FusedAdam optimizer, "
+                f"but got {type(self.adam_optimizer).__name__}"
+            )
+        except ImportError:
+            raise ImportError(
+                "OptimizerStateOffloader requires transformer_engine.pytorch.optimizers.FusedAdam"
+            )
+
+        # Check if master weights are stored in adam optimizer state
+        self.optimizer_contains_master_weights = self.adam_optimizer.master_weights
+
+        # CUDA streams for async transfers
+        self._d2h_stream = torch.cuda.Stream()
+        self._h2d_stream = torch.cuda.Stream()
+
+        # CPU buffers for optimizer states: {param: {key: cpu_tensor}}
+        self._opt_state_cpu_buffers: Dict[torch.Tensor, Dict[str, torch.Tensor]] = {}
+
+        # CPU buffers for mcore master weights, matching the structure of source groups
+        # List[List[cpu_tensor]]
+        self._shard_fp32_from_float16_cpu_buffers: List[List[torch.Tensor]] = []
+
+        # State tracking
+        self._offloaded = False
+        self._offloaded_state_keys: Tuple[str, ...] = ()
+        self._offloaded_mcore_master_weights = False
+
+        # Track whether optimizer states (exp_avg, exp_avg_sq) have been initialized.
+        # These are lazily initialized by FusedAdam during the first optimizer.step().
+        # Master weights (shard_fp32_from_float16_groups) are available from the start.
+        self._optimizer_states_initialized = False
+
+    def mark_optimizer_states_initialized(self):
+        """
+        Mark that optimizer states (exp_avg, exp_avg_sq) are now available.
+        Should be called after the first optimizer.step() completes.
+        """
+        self._optimizer_states_initialized = True
+
+    def _get_state_keys_to_offload(
+        self, offload_optimizer_states: bool, offload_master_weights: bool
+    ) -> Tuple[str, ...]:
+        """Get the state keys in FusedAdam to offload based on configuration."""
+        keys = []
+        # Skip optimizer states offloading if they haven't been initialized yet.
+        # Optimizer states are lazily initialized by FusedAdam during the first optimizer.step().
+        if self._optimizer_states_initialized:
+            if offload_optimizer_states:
+                keys.extend(self.OPTIMIZER_STATE_KEYS)
+            if offload_master_weights and self.optimizer_contains_master_weights:
+                keys.append(self.MASTER_WEIGHT_KEY)
+        return tuple(keys)
+
+    def _ensure_state_cpu_buffer(
+        self, param: torch.Tensor, state_key: str, gpu_tensor: torch.Tensor, pin_memory: bool = True
+    ) -> torch.Tensor:
+        """Get or create a CPU buffer for a state tensor."""
+        if param not in self._opt_state_cpu_buffers:
+            self._opt_state_cpu_buffers[param] = {}
+
+        if state_key not in self._opt_state_cpu_buffers[param]:
+            cpu_buffer = torch.empty(
+                gpu_tensor.size(),
+                dtype=gpu_tensor.dtype,
+                layout=gpu_tensor.layout,
+                device='cpu',
+                pin_memory=pin_memory,
+            )
+            self._opt_state_cpu_buffers[param][state_key] = cpu_buffer
+
+        return self._opt_state_cpu_buffers[param][state_key]
+
+    def _offload_shard_groups(
+        self,
+        shard_groups: List[List[torch.Tensor]],
+        cpu_buffers: List[List[torch.Tensor]],
+        pin_memory: bool = True,
+    ):
+        """Offload a shard group to CPU buffers."""
+        # Initialize CPU buffers on first call
+        if len(cpu_buffers) == 0:
+            for group in shard_groups:
+                group_buffers = []
+                for gpu_tensor in group:
+                    cpu_buffer = torch.empty(
+                        gpu_tensor.size(),
+                        dtype=gpu_tensor.dtype,
+                        layout=gpu_tensor.layout,
+                        device='cpu',
+                        pin_memory=pin_memory,
+                    )
+                    group_buffers.append(cpu_buffer)
+                cpu_buffers.append(group_buffers)
+
+        # Copy D2H
+        for group_idx, group in enumerate(shard_groups):
+            for param_idx, gpu_tensor in enumerate(group):
+                cpu_buffer = cpu_buffers[group_idx][param_idx]
+                cpu_buffer.copy_(gpu_tensor, non_blocking=pin_memory)
+                gpu_tensor.record_stream(self._d2h_stream)
+
+    def _offload_states(
+        self,
+        offload_optimizer_states: bool,
+        offload_master_weights: bool,
+        use_pin_memory: bool = True,
+    ):
+        """Offload optimizer states and/or master weights to CPU."""
+        # Offload states from adam optimizer
+        self._offloaded_state_keys = self._get_state_keys_to_offload(
+            offload_optimizer_states, offload_master_weights
+        )
+        states = self.adam_optimizer.state
+
+        for param, param_state in states.items():
+            for state_key in self._offloaded_state_keys:
+                if state_key not in param_state:
+                    continue
+
+                gpu_tensor = param_state[state_key]
+                if not isinstance(gpu_tensor, torch.Tensor) or not gpu_tensor.is_cuda:
+                    continue
+
+                cpu_buffer = self._ensure_state_cpu_buffer(
+                    param, state_key, gpu_tensor, use_pin_memory
+                )
+                cpu_buffer.copy_(gpu_tensor, non_blocking=use_pin_memory)
+                gpu_tensor.record_stream(self._d2h_stream)
+
+        # Offload mcore master weights if not in optimizer state
+        if offload_master_weights and not self.optimizer_contains_master_weights:
+            self._offload_shard_groups(
+                self.dist_optimizer.shard_fp32_from_float16_groups,
+                self._shard_fp32_from_float16_cpu_buffers,
+                use_pin_memory,
+            )
+            self._offloaded_mcore_master_weights = True
+
+    def _release_states(self):
+        """Replace optimizer state GPU tensors with CPU tensors to free GPU memory."""
+        states = self.adam_optimizer.state
+
+        for param, param_state in states.items():
+            if param not in self._opt_state_cpu_buffers:
+                continue
+
+            for state_key in self._offloaded_state_keys:
+                if state_key not in self._opt_state_cpu_buffers[param]:
+                    continue
+
+                param_state[state_key].untyped_storage().resize_(0)
+
+        if self._offloaded_mcore_master_weights:
+            for group in self.dist_optimizer.shard_fp32_from_float16_groups:
+                for gpu_tensor in group:
+                    gpu_tensor.untyped_storage().resize_(0)
+
+    def _reload_shard_groups(
+        self,
+        shard_groups: List[List[torch.Tensor]],
+        cpu_buffers: List[List[torch.Tensor]],
+        is_allocate_stage: bool,
+    ):
+        """Reload shard groups from CPU to GPU."""
+        for group_idx, group in enumerate(shard_groups):
+            for param_idx, _ in enumerate(group):
+                cpu_buffer = cpu_buffers[group_idx][param_idx]
+                if is_allocate_stage:
+                    shard_groups[group_idx][param_idx].untyped_storage().resize_(
+                        cpu_buffer.untyped_storage().size()
+                    )
+                else:
+                    shard_groups[group_idx][param_idx].copy_(
+                        cpu_buffer, non_blocking=cpu_buffer.is_pinned()
+                    )
+
+    def _reload_states(self, is_allocate_stage: bool):
+        """
+        Reload optimizer states and/or master weights from CPU to GPU.
+
+        If is_allocate_stage is True, only allocate GPU memory for the states and master weights,
+        but do not copy the data from CPU to GPU. Otherwise, copy the data from CPU to GPU.
+        The two processes are separated to make sure that the GPU memory is allocated on the
+        default stream to avoid fragmentation.
+        """
+        # Reload states to adam optimizer
+        states = self.adam_optimizer.state
+
+        for param, param_state in states.items():
+            if param not in self._opt_state_cpu_buffers:
+                continue
+
+            for state_key in self._offloaded_state_keys:
+                if state_key not in self._opt_state_cpu_buffers[param]:
+                    continue
+
+                cpu_buffer = self._opt_state_cpu_buffers[param][state_key]
+                if is_allocate_stage:
+                    param_state[state_key].untyped_storage().resize_(
+                        cpu_buffer.untyped_storage().size()
+                    )
+                else:
+                    param_state[state_key].copy_(cpu_buffer, non_blocking=cpu_buffer.is_pinned())
+
+        # Reload mcore master weights if not in optimizer state
+        if self._offloaded_mcore_master_weights:
+            self._reload_shard_groups(
+                self.dist_optimizer.shard_fp32_from_float16_groups,
+                self._shard_fp32_from_float16_cpu_buffers,
+                is_allocate_stage,
+            )
+
+    def offload(self, offload_optimizer_states: bool = True, offload_master_weights: bool = True):
+        """
+        Offload optimizer states and/or master weights to CPU.
+        Starts async D2H transfer that can overlap with other operations.
+
+        Args:
+            offload_optimizer_states: Whether to offload exp_avg, exp_avg_sq.
+            offload_master_weights: Whether to offload master weights.
+        """
+        if not offload_optimizer_states and not offload_master_weights:
+            return
+
+        # Wait for current stream finishing updating the optimizer states.
+        self._d2h_stream.wait_stream(torch.cuda.current_stream())
+
+        with torch.cuda.stream(self._d2h_stream):
+            self._offload_states(offload_optimizer_states, offload_master_weights)
+
+        self._offloaded = True
+
+    def release_gpu_memory(self):
+        """
+        Release GPU memory for optimizer states and master weights after D2H copy completes.
+
+        This is separated from offload() to allow delayed GPU memory release,
+        which is needed for mxfp8 + overlap_param_gather case where master weights
+        must remain on GPU until after _copy_main_params_to_param_buffer() is called.
+        """
+        if not self._offloaded:
+            return
+
+        self._release_states()
+
+    def reload(self):
+        """
+        Reload optimizer states and/or master weights from CPU to GPU.
+        Call before optimizer.step() to ensure states are on GPU.
+        """
+        if not self._offloaded:
+            return
+
+        # Allocate GPU memory on the current stream to avoid fragmentation.
+        self._reload_states(is_allocate_stage=True)
+
+        self._h2d_stream.wait_stream(self._d2h_stream)
+        self._h2d_stream.wait_stream(torch.cuda.current_stream())
+
+        # Reload states on the h2d stream to overlap with other operations.
+        with torch.cuda.stream(self._h2d_stream):
+            self._reload_states(is_allocate_stage=False)
+
+        self._offloaded_state_keys = ()
+        self._offloaded_mcore_master_weights = False
+        self._offloaded = False
+
+    def sync_before_step(self):
+        """
+        Wait for H2D reload to complete before optimizer.step().
+        Must be called to ensure states are on GPU before optimizer uses them.
+
+        This is separated from reload() to make it possible to move the reload ahead of time.
+        """
+        torch.cuda.current_stream().wait_stream(self._h2d_stream)
diff --git a/megatron/core/optimizer/distrib_optimizer.py b/megatron/core/optimizer/distrib_optimizer.py
index 6e093f96f7e..2f5876fa48a 100644
--- a/megatron/core/optimizer/distrib_optimizer.py
+++ b/megatron/core/optimizer/distrib_optimizer.py
@@ -49,6 +49,7 @@
 from ..fp8_utils import dequantize_fp8_tensor, is_float8tensor, quantize_param_shard
 from ..transformer.fsdp_dtensor_checkpoint import handle_experts_in_state_dict
 from ..transformer.module import MegatronModule
+from .cpu_offloading.optimizer_state_offloader import OptimizerStateOffloader
 from .grad_scaler import MegatronGradScaler
 from .optimizer import MixedPrecisionOptimizer, _zero_grad_group_helper, param_group_identifier_keys
 from .optimizer_config import OptimizerConfig
@@ -516,6 +517,8 @@ def __init__(
             "due to checkpointing requirements."
         )
 
+        self._state_offloader: Optional[OptimizerStateOffloader] = None
+
         # when freezing sub-models we have no real optimizer
         # but still need a stub DistributedOptimizer class
         if optimizer is None:
@@ -604,6 +607,9 @@ def __init__(
             self.optimizer.param_groups = [g["orig_group"] for g in self.opt_group_ranges]
             self.optimizer.load_state_dict(self.optimizer.state_dict())
 
+        if self.config.offload_optimizer_states:
+            self._state_offloader = OptimizerStateOffloader(self)
+
     def _get_model_param_range_map(self, param: torch.nn.Parameter):
         """
         Given a model param, get the index sub-range of the param that this
@@ -2580,6 +2586,8 @@ def step_with_ready_grads(self) -> bool:
         Under the hood, either launch synchronous param all-gathers or get ready to launch
         asynchorous all-gathers that get overlapped with the next forward pass.
         """
+        if self._state_offloader is not None:
+            self._state_offloader.sync_before_step()
         update_successful = super().step_with_ready_grads()
 
         timers = self.config.timers
@@ -2600,4 +2608,22 @@ def step_with_ready_grads(self) -> bool:
         if timers is not None:
             timers('params-all-gather').stop()
 
+        if self._state_offloader is not None:
+            self._state_offloader.mark_optimizer_states_initialized()
+
         return update_successful
+
+    def offload_states(self):
+        """Offload states to CPU."""
+        if self._state_offloader is not None:
+            self._state_offloader.offload()
+
+    def reload_offloaded_states(self):
+        """Start async reload of offloaded states."""
+        if self._state_offloader is not None:
+            self._state_offloader.reload()
+
+    def release_offloaded_gpu_states(self):
+        """Release GPU memory after D2H completes. For delayed release case."""
+        if self._state_offloader is not None:
+            self._state_offloader.release_gpu_memory()
diff --git a/megatron/core/optimizer/optimizer_config.py b/megatron/core/optimizer/optimizer_config.py
index 679878ed954..1813488d7bd 100644
--- a/megatron/core/optimizer/optimizer_config.py
+++ b/megatron/core/optimizer/optimizer_config.py
@@ -266,6 +266,12 @@ class OptimizerConfig:
     pin_cpu_params: bool = True
     """If True, pin the optimizer parameters to CPU memory."""
 
+    offload_optimizer_states: bool = False
+    """
+    If True, offload optimizer states to CPU after each optimizer step and
+    reload them before the next optimizer step.
+    """
+
     ################
     # Miscellaneous
     ################
diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py
index 096d63985d9..a65f1cd6469 100644
--- a/megatron/training/arguments.py
+++ b/megatron/training/arguments.py
@@ -1271,6 +1271,11 @@ def validate_args(args, defaults={}):
             "must be used in conjunction with `--fp8-recipe delayed`."
         )
 
+    if args.offload_optimizer_states:
+        assert args.use_distributed_optimizer, "offload_optimizer_states is only supported with distributed optimizer"
+        assert args.optimizer == 'adam', "offload_optimizer_states is only supported with adam optimizer"
+        assert not args.use_megatron_fsdp, "offload_optimizer_states does not support Megatron-FSDP for now."
+
     if args.non_persistent_ckpt_type == "local":
         assert args.non_persistent_local_ckpt_dir is not None, "Tried to use local checkpointing without specifying --local-ckpt-dir!"
     if args.replication:
@@ -2386,6 +2391,14 @@ def _add_training_args(parser):
                        help='Disable pinning of CPU memory for gradients.')
     group.add_argument('--no-pin-cpu-params', action='store_false', dest='pin_cpu_params',
                        help='Disable pinning of CPU memory for parameters.')
+    group.add_argument('--offload-optimizer-states',
+                       action='store_true',
+                       dest='offload_optimizer_states',
+                       help='Offload optimizer states to CPU after each optimizer step and '
+                       'reload them before the next optimizer step. '
+                       'Only support TE FusedAdam optimizer.'
+                       'Note that this still uses pure GPU optimizer instead of '
+                       'HybridDeviceOptimizer for --optimizer-cpu-offload.')
     group.add_argument('--dataloader-type', type=str, default=None,
                        choices=['single', 'cyclic', 'external'],
                        help='Single pass vs multiple pass data loader')
diff --git a/megatron/training/training.py b/megatron/training/training.py
index 845d271f62e..8aff2556d14 100644
--- a/megatron/training/training.py
+++ b/megatron/training/training.py
@@ -1425,6 +1425,12 @@ def train_step(forward_step_func, data_iterator, model, optimizer, opt_param_sch
 
     rerun_state_machine = get_rerun_state_machine()
     while rerun_state_machine.should_run_forward_backward(data_iterator):
+        # Offload optimizer states to CPU if enabled.
+        if args.offload_optimizer_states:
+            for optim_instance in optimizer.chained_optimizers:
+                if isinstance(optim_instance, DistributedOptimizer):
+                    optim_instance.offload_states()
+
         # Set grad to zero.
         for model_chunk in model:
             model_chunk.zero_grad_buffer()
@@ -1458,6 +1464,14 @@ def train_step(forward_step_func, data_iterator, model, optimizer, opt_param_sch
                     if isinstance(optim_instance, DistributedOptimizer):
                         optim_instance._copy_main_params_to_param_buffer()
 
+        # Release GPU memory for offloaded optimizer states.
+        # This needs to be done after _copy_main_params_to_param_buffer().
+        # Separate offload and release to allow early D2H transfer to overlap with other operations.
+        if args.offload_optimizer_states:
+            for optim_instance in optimizer.chained_optimizers:
+                if isinstance(optim_instance, DistributedOptimizer):
+                    optim_instance.release_offloaded_gpu_states()
+
         # Forward pass.
         losses_reduced = forward_backward_func(
             forward_step_func=forward_step_func,
@@ -2305,7 +2319,21 @@ def train(
         config.param_sync_func = [model_chunk.start_param_sync for model_chunk in model]
         if len(model) == 1:
             config.param_sync_func = config.param_sync_func[0]
-    config.finalize_model_grads_func = finalize_model_grads
+
+    # Wrap finalize_model_grads to reload offloaded optimizer states before grad finalization.
+    # This allows H2D transfer to overlap with grad all-reduce.
+    if args.offload_optimizer_states:
+
+        def finalize_model_grads_with_state_reload(*fmg_args, **fmg_kwargs):
+            # Reload offloaded states for all DistributedOptimizer instances
+            for optim_instance in optimizer.chained_optimizers:
+                if isinstance(optim_instance, DistributedOptimizer):
+                    optim_instance.reload_offloaded_states()
+            return finalize_model_grads(*fmg_args, **fmg_kwargs)
+
+        config.finalize_model_grads_func = finalize_model_grads_with_state_reload
+    else:
+        config.finalize_model_grads_func = finalize_model_grads
 
     if args.log_energy:
         energy_monitor.setup()
diff --git a/tests/unit_tests/test_optimizer_state_offloading.py b/tests/unit_tests/test_optimizer_state_offloading.py
new file mode 100644
index 00000000000..baaab355182
--- /dev/null
+++ b/tests/unit_tests/test_optimizer_state_offloading.py
@@ -0,0 +1,337 @@
+# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+
+"""Unit tests for OptimizerStateOffloader."""
+
+import pytest
+import torch
+import torch.nn as nn
+
+from megatron.core.distributed import DistributedDataParallel, DistributedDataParallelConfig
+from megatron.core.optimizer import OptimizerConfig, get_megatron_optimizer
+from megatron.core.transformer import TransformerConfig
+from tests.unit_tests.test_utilities import Utils
+
+try:
+    from transformer_engine.pytorch.optimizers import FusedAdam  # noqa: F401
+
+    TE_FUSED_ADAM_AVAILABLE = True
+except ImportError:
+    TE_FUSED_ADAM_AVAILABLE = False
+
+
+class SimpleModel(nn.Module):
+    """Simple model for testing."""
+
+    def __init__(self, hidden_size=256):
+        super().__init__()
+        self.fc1 = nn.Linear(hidden_size, hidden_size)
+        self.fc2 = nn.Linear(hidden_size, hidden_size)
+
+    def forward(self, x):
+        return self.fc2(torch.relu(self.fc1(x)))
+
+
+def create_model_and_optimizer(hidden_size=256, offload_optimizer_states=True, **optimizer_kwargs):
+    """Helper to create model and optimizer for tests."""
+    model = SimpleModel(hidden_size=hidden_size).bfloat16().cuda()
+    ddp_config = DistributedDataParallelConfig(use_distributed_optimizer=True)
+    model = DistributedDataParallel(
+        TransformerConfig(num_attention_heads=1, num_layers=1), ddp_config, model
+    )
+
+    default_config = dict(
+        optimizer='adam',
+        bf16=True,
+        lr=0.001,
+        use_distributed_optimizer=True,
+        offload_optimizer_states=offload_optimizer_states,
+    )
+    default_config.update(optimizer_kwargs)
+
+    optimizer_config = OptimizerConfig(**default_config)
+    optim = get_megatron_optimizer(optimizer_config, [model])
+    return model, optim
+
+
+def run_forward_backward_step(model, optim, hidden_size=256):
+    """Run a single forward-backward-step cycle."""
+    input_tensor = torch.randn(8, hidden_size, dtype=torch.bfloat16, device='cuda')
+    output = model(input_tensor)
+    output.sum().backward()
+    optim.step()
+    optim.zero_grad()
+
+
+# =============================================================================
+# Test 1: Basic OptimizerStateOffloader Initialization
+# =============================================================================
+@pytest.mark.skipif(not TE_FUSED_ADAM_AVAILABLE, reason="Requires TE FusedAdam")
+def test_offloader_initialization():
+    """Test that OptimizerStateOffloader initializes correctly."""
+    Utils.initialize_model_parallel()
+    model, optim = create_model_and_optimizer()
+    dist_optim = optim.chained_optimizers[0]
+
+    # Offloader is created in __init__ when offload_optimizer_states=True
+    assert dist_optim._state_offloader is not None
+    offloader = dist_optim._state_offloader
+
+    # Verify offloader properties
+    assert offloader.adam_optimizer is not None
+    assert offloader._d2h_stream is not None
+    assert offloader._h2d_stream is not None
+    assert offloader._offloaded is False
+
+    # Before first step, optimizer states are not initialized yet
+    assert offloader._optimizer_states_initialized is False
+
+    # Run one step to initialize optimizer states
+    run_forward_backward_step(model, optim)
+
+    # After first step, optimizer states should be marked as initialized
+    assert offloader._optimizer_states_initialized is True
+    Utils.destroy_model_parallel()
+
+
+# =============================================================================
+# Test 2: Early Master Weight Offloading Before First Step
+# =============================================================================
+@pytest.mark.skipif(not TE_FUSED_ADAM_AVAILABLE, reason="Requires TE FusedAdam")
+def test_early_master_weight_offloading():
+    """Test that master weights can be offloaded before the first optimizer step."""
+    Utils.initialize_model_parallel()
+    model, optim = create_model_and_optimizer()
+    dist_optim = optim.chained_optimizers[0]
+
+    # Offloader is created in __init__
+    assert dist_optim._state_offloader is not None
+    offloader = dist_optim._state_offloader
+
+    # Before first step, optimizer states are not initialized
+    assert offloader._optimizer_states_initialized is False
+
+    # Capture original master weights before offload
+    original_master_weights = []
+    for group in dist_optim.shard_fp32_from_float16_groups:
+        group_weights = [tensor.clone() for tensor in group]
+        original_master_weights.append(group_weights)
+
+    # Offload before first step - should only offload master weights
+    offloader.offload()
+    offloader.release_gpu_memory()
+    torch.cuda.synchronize()
+
+    # Verify master weights were offloaded (storage resized to 0)
+    for group in dist_optim.shard_fp32_from_float16_groups:
+        for tensor in group:
+            assert tensor.untyped_storage().size() == 0, "Master weight should be offloaded"
+
+    # Reload master weights
+    offloader.reload()
+    offloader.sync_before_step()
+
+    # Verify master weights match after reload
+    for group_idx, group in enumerate(dist_optim.shard_fp32_from_float16_groups):
+        for param_idx, tensor in enumerate(group):
+            original = original_master_weights[group_idx][param_idx]
+            torch.testing.assert_close(
+                tensor,
+                original,
+                msg=f"Master weight [{group_idx}][{param_idx}] mismatch after offload/reload",
+            )
+
+    # Now run a step and verify optimizer states can be offloaded after
+    run_forward_backward_step(model, optim)
+    assert offloader._optimizer_states_initialized is True
+
+    Utils.destroy_model_parallel()
+
+
+# =============================================================================
+# Test 3: Offload and Reload Correctness
+# =============================================================================
+@pytest.mark.skipif(not TE_FUSED_ADAM_AVAILABLE, reason="Requires TE FusedAdam")
+@pytest.mark.parametrize("offload_optimizer_states", [True, False])
+@pytest.mark.parametrize("offload_master_weights", [True, False])
+def test_offload_reload_correctness(offload_optimizer_states, offload_master_weights):
+    """Test that offload/reload preserves optimizer state values."""
+    if not offload_optimizer_states and not offload_master_weights:
+        pytest.skip("At least one offload type required")
+
+    Utils.initialize_model_parallel()
+    model, optim = create_model_and_optimizer()
+    dist_optim = optim.chained_optimizers[0]
+
+    # Run steps to build up optimizer state
+    for _ in range(3):
+        run_forward_backward_step(model, optim)
+
+    offloader = dist_optim._state_offloader
+
+    # Capture original states before offload
+    original_states = {}
+    for param, state in offloader.adam_optimizer.state.items():
+        original_states[param] = {
+            k: v.clone() for k, v in state.items() if isinstance(v, torch.Tensor)
+        }
+
+    # Offload
+    offloader.offload(
+        offload_optimizer_states=offload_optimizer_states,
+        offload_master_weights=offload_master_weights,
+    )
+
+    # Release GPU memory
+    offloader.release_gpu_memory()
+    torch.cuda.synchronize()
+
+    # Reload
+    offloader.reload()
+    offloader.sync_before_step()
+
+    # Verify states match after reload
+    for param, state in offloader.adam_optimizer.state.items():
+        if param in original_states:
+            for key, original_tensor in original_states[param].items():
+                if key in state and isinstance(state[key], torch.Tensor):
+                    reloaded_tensor = state[key]
+                    assert reloaded_tensor.device.type == 'cuda', f"State {key} should be on GPU"
+                    torch.testing.assert_close(
+                        reloaded_tensor,
+                        original_tensor,
+                        msg=f"State {key} mismatch after offload/reload",
+                    )
+    Utils.destroy_model_parallel()
+
+
+# =============================================================================
+# Test 4: GPU Memory Release Verification
+# =============================================================================
+@pytest.mark.skipif(not TE_FUSED_ADAM_AVAILABLE, reason="Requires TE FusedAdam")
+def test_gpu_memory_release():
+    """Test that GPU memory is actually freed after release_gpu_memory()."""
+    Utils.initialize_model_parallel()
+    # Use larger model for measurable memory impact
+    model, optim = create_model_and_optimizer(hidden_size=1024)
+    dist_optim = optim.chained_optimizers[0]
+
+    # Initialize optimizer states
+    run_forward_backward_step(model, optim, hidden_size=1024)
+
+    offloader = dist_optim._state_offloader
+
+    # Measure memory before offload
+    torch.cuda.synchronize()
+    torch.cuda.empty_cache()
+    memory_before = torch.cuda.memory_allocated()
+
+    # Offload and release
+    offloader.offload()
+    offloader.release_gpu_memory()
+
+    # Wait for async operations
+    torch.cuda.synchronize()
+    torch.cuda.empty_cache()
+    memory_after = torch.cuda.memory_allocated()
+
+    # Memory should decrease
+    memory_freed = memory_before - memory_after
+    assert memory_freed > 0, f"Expected memory to be freed, but got {memory_freed} bytes difference"
+    Utils.destroy_model_parallel()
+
+
+# =============================================================================
+# Test 5: Multiple Offload/Reload Cycles
+# =============================================================================
+@pytest.mark.skipif(not TE_FUSED_ADAM_AVAILABLE, reason="Requires TE FusedAdam")
+def test_multiple_offload_reload_cycles():
+    """Test that multiple offload/reload cycles work correctly."""
+    Utils.initialize_model_parallel()
+    model, optim = create_model_and_optimizer()
+    dist_optim = optim.chained_optimizers[0]
+
+    # Initialize
+    run_forward_backward_step(model, optim)
+
+    offloader = dist_optim._state_offloader
+
+    # Run multiple cycles
+    for cycle in range(5):
+        # Offload
+        offloader.offload()
+        offloader.release_gpu_memory()
+
+        # Reload
+        offloader.reload()
+        offloader.sync_before_step()
+
+        # Run optimizer step
+        run_forward_backward_step(model, optim)
+
+    # Verify model can still produce valid outputs
+    input_tensor = torch.randn(8, 256, dtype=torch.bfloat16, device='cuda')
+    output = model(input_tensor)
+    assert not output.isnan().any(), "Model output contains NaN after multiple cycles"
+    Utils.destroy_model_parallel()
+
+
+# =============================================================================
+# Test 6: Training Correctness with Offloading
+# =============================================================================
+@pytest.mark.skipif(not TE_FUSED_ADAM_AVAILABLE, reason="Requires TE FusedAdam")
+def test_training_correctness_with_offloading():
+    """Test that training with offloading produces same results as without."""
+    Utils.initialize_model_parallel()
+    torch.manual_seed(42)
+
+    # Model 1: with offloading
+    model1, optim1 = create_model_and_optimizer(offload_optimizer_states=True, lr=0.01)
+
+    # Model 2: without offloading (reference)
+    torch.manual_seed(42)
+    model2, optim2 = create_model_and_optimizer(offload_optimizer_states=False, lr=0.01)
+
+    # Train both models
+    n_steps = 10
+    torch.manual_seed(123)
+    dist_optim1 = optim1.chained_optimizers[0]
+
+    # Offloader is created in __init__ when offload_optimizer_states=True
+    assert dist_optim1._state_offloader is not None
+    offloader = dist_optim1._state_offloader
+
+    for step in range(n_steps):
+        input_tensor = torch.randn(8, 256, dtype=torch.bfloat16, device='cuda')
+
+        # Model 1 with offloading
+        # Offload states (master weights can be offloaded from the start,
+        # optimizer states will be skipped until after first step)
+        offloader.offload()
+        offloader.release_gpu_memory()
+
+        output1 = model1(input_tensor)
+        loss1 = output1.sum()
+        loss1.backward()
+
+        offloader.reload()
+        offloader.sync_before_step()
+        optim1.step()
+        optim1.zero_grad()
+
+        # Model 2 without offloading
+        output2 = model2(input_tensor)
+        loss2 = output2.sum()
+        loss2.backward()
+        optim2.step()
+        optim2.zero_grad()
+
+    # Compare final model weights
+    for (n1, p1), (n2, p2) in zip(model1.named_parameters(), model2.named_parameters()):
+        torch.testing.assert_close(
+            p1.data,
+            p2.data,
+            atol=1e-5,
+            rtol=1e-4,
+            msg=f"Parameter {n1} mismatch between offloaded and non-offloaded training",
+        )
+    Utils.destroy_model_parallel()

From 8abc08640a3dfc11510d2849f358d65784507fca Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Mon, 19 Jan 2026 15:51:08 +0100
Subject: [PATCH 242/334] ci(fix): CI_COMMIT_BRANCH on forks (#2982) (#2989)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 .gitlab/scripts/build.sh | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.gitlab/scripts/build.sh b/.gitlab/scripts/build.sh
index 8359731e3d7..9bcf5d45712 100644
--- a/.gitlab/scripts/build.sh
+++ b/.gitlab/scripts/build.sh
@@ -20,6 +20,8 @@ docker buildx create --name container --driver=docker-container --use tls-enviro
 
 ADDITIONAL_PARAMS=()
 
+CI_COMMIT_BRANCH="${CI_COMMIT_BRANCH:-$CI_MERGE_REQUEST_SOURCE_BRANCH_NAME}"
+
 if [[ "$CI_COMMIT_BRANCH" == "ci-rebuild-mcore-nemo-image" || "$CI_COMMIT_BRANCH" == "main" || "$CI_COMMIT_BRANCH" == "dev" ]]; then
     ADDITIONAL_PARAMS+=("--pull")
 fi

From 5b17f19fc7d0ed6e00aabb1a3154769d276c68fe Mon Sep 17 00:00:00 2001
From: "Dennis(Zhenhuan) Liu" <denliu@nvidia.com>
Date: Tue, 20 Jan 2026 00:56:53 +0800
Subject: [PATCH 243/334] [Dev] Update MoE readme. (#2808)

Co-authored-by: Zijie Yan <zijiey@nvidia.com>
---
 megatron/core/transformer/moe/README.md | 931 +++++++++++++++---------
 1 file changed, 584 insertions(+), 347 deletions(-)

diff --git a/megatron/core/transformer/moe/README.md b/megatron/core/transformer/moe/README.md
index a44daea38e2..71dfa17fda0 100644
--- a/megatron/core/transformer/moe/README.md
+++ b/megatron/core/transformer/moe/README.md
@@ -1,159 +1,396 @@
 # Megatron Core MoE
 
-Megatron-Core MoE provides comprehensive parallelism strategies, seamlessly integrating Expert Parallelism with tensor, data, sequence, and pipeline parallelism. With MCore v0.9, we've achieved remarkable performance of **468 TFLOPS** for Mixtral 8X7B bf16 training. Additionally, we support state-of-the-art MoE model architectures including DeepSeek-V3 and Qwen-MoE.
-
-### What's New
-- **Support for DeepSeek-V3 architecture**
-  - Enable TP for MLA and DeepSeek-V3
-  - Enable CP for MLA and DeepSeek-V3
-    - Requires TransformerEngine >= 2.5.0
-    - Many thanks to [SuperCB](https://github.com/SuperCB) from Xiaohongshu Inc. and [RandMist](https://github.com/RandMist) from WeChat Infra Department, Tencent Inc. for their contributions.
-  - Support aux-loss-free load balancing strategy
-  - Support node-limited routing
-  - Support Multi-Token Prediction (MTP)
-  - Batch-level overlapping to hide EP-A2A communication
-- **Support DeepSeek's DeepEP for efficient token dispatching and combining**
-- Support HybridEP for efficient token dispatching and combining within intra-node and MNNVL scenarios.
-- Add fusion for token permutation and unpermutation
-- Support Uneven virtual pipeline parallel split
-- Support output-discarding checkpointing on some submodules
-
-### Parallelism
-- **Expert Parallelism**
-    - A specific method of parallelism for MoE models, where experts are partitioned onto different workers and each worker processes a different batch of training samples, each worker process one or more experts for each MoE layer.
-- **3D Parallelism**: Data Parallelism, Tensor Parallelism, Pipeline Parallelism
-    - Note: When using MoE with expert parallelism and tensor parallelism, sequence parallelism must be enabled.
-- **Context Parallelism**:
-    - Split the sequence dimension to support long context training.
-- **Richer parallel mappings**: EP can be combined with DP/TP/PP/CP for handling larger MoE variants.
-- **MoE Parallel Folding**: Support for setting different parallelism strategies for Attention and MoE components, enabling more flexible and efficient model sharding. See detailed documentation below.
-- **Full distributed optimizer support.**
-
-### Router and Load Balancing
-- Router type:
-    - Top-K MLP router
-- Load Balancing algorithms:
-    - Sinkhorn (S-BASE)
-    - Aux loss / Load balancing loss
-    - Aux-loss-free load balancing strategy
-- CUDA fused routing and load balancing kernels
+Megatron Core MoE is a production-ready framework for training large-scale Mixture-of-Experts models, providing the foundational architecture, performance optimizations, and best practices that guide MoE framework development across the industry.
+
+## Table of Contents
+
+- [What's New](#whats-new)
+- [Overview of MCore MoE Supported Features and Architectures](#overview-of-mcore-moe-supported-features-and-architectures)
+- [Quick Start Guide](#quick-start-guide)
+  - [Basic MoE Training](#basic-moe-training-in-megatron-lm)
+  - [Pre-defined Configs for Popular Models](#use-the-pre-defined-config-to-train-the-popular-moe-models)
+  - [General Performance Tips](#general-performance-tips)
+- [Best Practices for High Performance MoE Training](#best-practices-to-achieve-high-performance-on-moe-training)
+  - [Step 1: Find Feasible Parallel Mapping](#step-1-find-the-feasible-parallel-mapping-under-the-memory-capacity-of-the-gpu)
+  - [Step 2: Select Optimal Parallelism Strategy](#step-2-select-optimal-parallelism-strategy)
+  - [Step 3: Enable Performance Features](#step-3-enable-performance-features-based-on-profiling-bottlenecks)
+- [Feature Documentation](#feature-documentation)
+  - [Router and Load Balancing](#router-and-load-balancing)
+  - [Token Dispatching](#token-dispatching)
+  - [Upcycling](#upcycling)
+- [Training Optimizations](#training-optimizations)
+  - [MoE Parallel Folding](#moe-parallel-folding)
+  - [Memory Optimization](#memory-optimization)
+  - [Communication Optimization](#communication-optimization)
+  - [Compute Optimization](#compute-optimization)
+  - [FP8 Training](#fp8-training)
+  - [CUDA Graph](#cuda-graph)
+- [MoE Arguments Reference](#moe-arguments-reference)
+- [Examples](#examples)
+- [Contributing](#contributing)
+- [Citation](#citation)
+
+## What's New
+For latest features and architectures, please refer to the [MCore dev roadmap](https://github.com/NVIDIA/Megatron-LM/issues/1729).
+
+### 🔥 [MCore dev] (2026/01)
+- 🚀 Pipeline-aware fine-grained activation offloading 
+- 🚀 Qwen3-Next model support
+- 🚀 Muon and Layer-wise distributed optimizer
+
+### 🔥 [MCore v0.15] (2025/11)
+- 🚀 Add HybridEP backend to Flex Dispatcher(GB200, B200, H100 supported)
+- 🚀 Support FSDP with EP for MoE models
+
+### 🔥 [MCore v0.14] (2025/09)
+- 🚀 Batch-level overlapping to hide EP-A2A communication (--overlap-moe-expert-parallel-comm --delay-wgrad-compute)
+- 🚀 FP8 support for Fine-grained Recomputations
+- Router fusion kernels for MoE models (--moe-router-fusion)
+- Context Parallelism (CP) support for MTP and MLA
+
+### 🔥 [MCore v0.13] (2025/07)
+- Support bf16 dtype for optimizer states to use precision-aware optimizer in TransformerEngine (--use-precision-aware-optimizer)
+- Flexible Asymmetric Virtual Pipeline Parallelism with Custom Pipeline Layout (--pipeline-model-parallel-layout)
+- Add Hybrid Shard Data-Parallel support for MoE models (--num-distributed-optimizer-instances)
+- Fine-grained recomputation to reduce activation memory. (--recompute-modules with --recompute-granularity selective)
+- Memory efficient token permutation by moving the probs multiplication from unpermutation to activation function of GroupedMLP.
+
+### 🔥 [MCore v0.12] (2025/05)
+- Support DeepSeek's DeepEP for efficient token dispatching (--moe-token-dispatcher-type flex --moe-enable-deepep)
+- Support Multi-Token Prediction (MTP) (--mtp-num-layers 1)
+- CUDA Graph support for dropless MoE models with attention only capture (--te-rng-track --external-cuda-graph --cuda-graph-scope attn)
+
+## Overview of MCore MoE Supported Features and Architectures
+
+### Model Support
+- ✅ **DeepSeek**
+  - ✅ DeepSeek-V2
+  - ✅ DeepSeek-V3, including MTP
+- ✅ **Qwen**
+  - ✅ Qwen2-57B-A14B
+  - ✅ Qwen3-30B-A3B
+  - ✅ Qwen3-235B-A22B
+- ✅ **Mixtral**
+  - ✅ Mixtral-8x7B
+  - ✅ Mixtral-8x22B
+
+### Core MoE Functionality
+- ✅ Token dropless MoE (dMoE) - Advanced routing without token dropping
+- ✅ Top-K Router with flexible K selection
+- ✅ Load balancing losses for expert utilization optimization
+
+### Advanced Parallelism
+- ✅ Expert Parallel (EP) with 3D parallelism integration
+- ✅ Full parallelism combo: EP + DP + TP + PP + SP support
+- ✅ Context Parallel (CP) for long sequence MoE training
+- ✅ Parallel Folding Heterogeneous Parallelism Mappings for Efficient Large-Scale MoE Model Training
+- ✅ Distributed Optimizer for MoE (ZeRO-1 equivalent)
 
 ### Performance Optimizations
-- (Experimental) **DeepEP** is integrated for efficient token communication in large-scale MoE training.
-- GroupedGEMM when num local experts > 1
-    - Supported dtype: bf16
-    - Performance improvements for larger MoE models
-- Enable `--tp-comm-overlap` for MoE
-- FP8 training support
-
-### Token Dispatch Mechanism
-- Dropless / No token drop
-- Token drop, with or without padding to capacity
-- Token permutation / Unpermutation fusion
+- ✅ Memory Efficient token permutation
+- ✅ Fine-grained Recomputations (mla, moe, mlp, moe_act, norm)
+- ✅ MLA TP Support for Mixture of Linear Attention
+- ✅ GroupedGEMM and GA Fusion
+- ✅ DP/PP/TP Communication Overlapping
+- ✅ Overlapped Shared Expert execution
+- ✅ Router Fusion optimizations
+- ✅ Token (un)permutation Fusion kernels
+- ✅ cuDNN fused Attention integration
+
+### Hardware & Precision Support
+- ✅ DeepEP support for H100 and B200
+- ✅ GroupedGEMM including FP8/MXFP8 support
+- ✅ FP8 weights with BF16 optimizer states
+- ✅ FP8 training full support
+
+### Developer Experience
+- ✅ MoE Model Zoo with pre-training best practices
+- ✅ Distributed Checkpointing for MoE models
+- ✅ Upcycling Support for model scaling
+- ✅ MCore2HF Converter for ecosystem compatibility
+- ✅ Layer-wise logging for detailed monitoring
+- ✅ Runtime Upcycling capabilities
+
+## Quick Start Guide
+
+### Basic MoE Training in Megatron-LM
+
+To train a top-2 MoE model with 8 experts and auxiliary loss, add the following arguments to your megatron training script:
 
-### Ease of use
-- Checkpoint converter for Mixtral models, see the [example](https://github.com/NVIDIA/Megatron-LM/tree/main/examples/mixtral) for details.
-- MoE Layer Frequency to customize the hybrid MoE/Dense layer architecture
-- Distributed checkpoining
-- Per-layer logging
-- Upcycling Support
+```bash
+## Set MoE Hidden site
+--num-experts 8
+--moe-shared-expert-intermediate-size: 2048
+## Set router config
+--moe-router-load-balancing-type aux_loss
+--moe-router-topk 2
+--moe-aux-loss-coeff 1e-2
+## Set token dispatcher
+--moe-token-dispatcher-type alltoall
+```
 
-# User Guide
+Detailed documentation for each feature is available in the [Feature Documentation](#feature-documentation) section.
 
-## Usage
+### Use the pre-defined config to train the popular MoE models
+We have provided some pre-defined config to train the popular MoE models in the [Megatron-MoE-Model-Zoo](https://github.com/yanring/Megatron-MoE-ModelZoo/tree/main) repository. You can use them as a reference to configure your training script. Currently we have added the config for Mixtral 8x7B, Mixtral 8x22B, DeepSeek-V3, Qwen3-30B-A3B, Qwen3-235B-A22B.
 
-### Quick Start
-To train a top-2 MoE model with 8 experts and auxiliary loss, include the following arguments:
+### General Performance Tips
+#### Training arguments
+The following flags are general performance flags that can help to achieve higher performance on almost all workloads. Check if you have enabled all of them in your training script.
 
 ```bash
---num-experts 8
---expert-model-parallel-size 8
+## Enable DeepEP token dispatcher
+--moe-token-dispatcher-type flex
+--moe-flex-dispatcher-backend deepep
+## Enable GroupedGEMM
 --moe-grouped-gemm
+## Enable fusion kernels
+--moe-router-fusion
 --moe-permute-fusion
---moe-router-load-balancing-type aux_loss # options: aux_loss, sinkhorn, none. Default is aux_loss.
---moe-router-topk 2
---moe-aux-loss-coeff 1e-2
+--cross-entropy-loss-fusion
+--cross-entropy-fusion-impl te
+
+## Communication optimization
 --use-distributed-optimizer
---moe-token-dispatcher-type alltoall
-```
+--overlap-param-gather
+--overlap-grad-reduce
+--tp-comm-overlap
 
-To enable the token drop mechanism, such as GShard and SwitchTransformer, include the following arguments:
+## Enable manual gc to prevent python jitter
+--manual-gc: true
+--manual-gc-interval: 10
+```
+#### Environment variables
 
+Below are some environment variables that can be useful.
 ```bash
---moe-expert-capacity-factor 1.0
---moe-pad-expert-input-to-capacity # Optional
+export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True # Enable expandable segments to prevent memory fragmentation
+export NCCL_NVLS_ENABLE=0 # Disable NVLS to prevent memory overhead
 ```
+#### Dependencies
+- Use the latest version of [TransformerEngine](https://github.com/NVIDIA/TransformerEngine).
+- Use the latest [NGC PyTorch Docker Image](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/pytorch)
 
-The following figure illustrates differenting dropping strategies in MCore:
-<!-- This image is uncommented for now as Sphinx cannot resolve this path. Sphinx imports this markdown file, and from the imported location this relative path does not exist anymore. Ideally, this markdown should not live here but rather in the `docs/` directory that Sphinx uses. -->
-<!-- ![Token Droppling Strategies](../../../../docs/source/images/moe/token_drop.png) -->
-
-1. The default dropless strategy will not drop or pad any token.
-2. By setting `--moe-expert-capacity-factor`, the tokens exceed the capacity of expert will be dropped based on their selected probabilities. 
-   The dropping is performed before the token exchange operation between EP ranks when EP > 1. 
-   The formula of capacity is `capacity = num_tokens_per_rank * topk * capacity_factor / num_experts`.
-3. By setting `--moe-pad-expert-input-to-capacity`, the experts with tokens less than capacity will be padded to the capacity.
-
-### Fine-tuning Mixtral Models
-Megatron-Core has full support for Mixtral MoE models, and we provide the checkpoint converter for Mixtral models from huggingface format to MCore format. 
-<!-- See more details in the [mixtral example](../../../../examples/mixtral/README.md). -->
-
-### Distributed Checkpointing
-MCore v0.7 introduced fully parallel and asynchronous saving capabilities to distributed checkpointing, 
-which addresses the issues of low efficiency in the traditional checkpoint saving methods. 
-It also solved the problem of incompatibility between checkpoints of different parallel mappings in the traditional format.
-With the new distributed checkpointing solution, MCore can achieve flexible parallelism configurations by saving and loading the unified format checkpoints.
-Compared to native PyTorch solution, MCore achieves up to 50x reduction in checkpointing overhead.
-
-From MCore v0.8, MoE supports Distributed Checkpointing, which means users can save and load with any combination of parallelism and it is currently available, including expert parallel.
-1. Loading weight and distributed optimizer states with TPxCPxEPxPP resharding with SequentialMLP is supported in version 0.8.
-2. GroupedMLP weight resharding is supported in version 0.8.0 and optimizer state resharding is supported in version 0.10.0. Switching between GroupedMLP/SequentialMLP when loading and saving is partially supported.
-3. TEGroupedMLP has fully support on distributed checkpointing and is fully exchangable with SequentialMLP in version 0.9.0.
-4. Optimizer state resharding cannot do across EP=1 with EP>1 due to the different optimizer type.
-
-Usage
-- `--ckpt-format torch_dist` The main argument, it will attempt to save and load using distributed checkpointing.
-- `--auto-detect-ckpt-format` With this, it can load both distributed checkpointing and legacy checkpointing.
-
-Checkpoint compatibility across SequentialMLP, GroupedMLP, and TEGroupedMLP:
-```text
-    ┌───────────────┐          ┌───────────────┐          ┌───────────────┐     
-    │   GroupedMLP  │          │ SequentialMLP │          │ TEGroupedMLP  │     
-    │               │          │               │          │               │     
-    │               │          │               │          │               │     
-    │ ┌───────────┐ │          │ ┌───────────┐ │          │ ┌───────────┐ │     
-    │ │legacy ckpt│ │          │ │legacy ckpt│ │          │ │legacy ckpt│ │     
-    │ └─────┬─────┘ │          │ └─────┬─────┘ │          │ └─────┬─────┘ │     
-    │       ▼       │          │       ▼       │          │       ▼       │     
-    │  ┌─────────┐  │          │  ┌─────────┐  │          │  ┌─────────┐  │     
-    │  │dist ckpt│  │          │  │dist ckpt│  │          │  │dist ckpt│  │     
-┌──►│  │ weight  │  │◄────────►│  │ weight  │  │◄────────►│  │ weight  │  │◄──┐ 
-│   │  └─────────┘  │          │  └─────────┘  │          │  └─────────┘  │   │ 
-└───┼───────────────┼──────────┼───────────────┼──────────┼───────────────┼───┘ 
-    │┌─────────────┐│          │┌─────────────┐│          │┌─────────────┐│     
-    ││  dist ckpt  ││          ││  dist ckpt  ││          ││  dist ckpt  ││     
-    ││optim states ││          ││optim states ││◄────────►││optim states ││     
-    │└─────────────┘│          │└─────────────┘│          │└─────────────┘│     
-    └───────────────┘          └───────────────┘          └───────────────┘     
-```
+## Best Practices to achieve high performance on MoE training
+
+Distributed training involves complex trade-offs between **communication**, **memory**, and **computation**, making it challenging to find an optimal parallelism configuration. This section provides a systematic workflow to help you identify the best parallel mapping for your model and hardware.
+
+### Step 1: Find the feasible parallel mapping under the memory capacity of the GPU
+To find the best parallel mapping, we need to first know the feasible parallel mapping for the model under the memory capacity of the GPU.
+The consumption of memory consists of three parts:
+- Activation memory
+- Weight and gradient memory
+- Optimizer states memory
+Different parallel strategies will shard these tensor memory in different ways.
+
+| Parallel Strategy | Peak Activation Memory          | Weight Memory  | Optimizer states                  | Communication (Per-Layer) |
+|:-----------------:|:-------------------------------:|:--------------:|:---------------------------------:|:-------------------------:|
+| TP                | 1/N (with SP on)                | 1/N            | 1/N                               |        High               |
+| EP                | ~1 (varies with EP balancing)   | 1/N in MoELayer| 1/N                               |       Medium              |
+| PP                | 1 (>1 with virtual pipeline)    | 1/N            | 1/N                               |       Medium              |
+| CP                | 1/N                             | 1              | 1/N (with distributed optimizer)  |       Medium              |
+| DP                | 1                               | 1              | 1/N (with distributed optimizer)  |        Low                |
+
+We provide the argument of `--fake-init-process-group` to emulate distributed training on one GPU. This is useful to find the feasible parallel mapping under the memory capacity of the GPU. See https://github.com/NVIDIA/Megatron-LM/pull/2254 for detailed usage.
+
+### Step 2: Select Optimal Parallelism Strategy
+
+The optimal parallelism configuration varies based on **model architecture**, **sequence length**, and **hardware platform**. Below are general guidelines to help you achieve high throughput.
+
+#### Guideline 1: Minimize Model Parallelism, Maximize Data Parallelism
+
+| Aspect | Recommendation |
+|--------|----------------|
+| **Goal** | Keep TP/EP/PP as small as possible while avoiding OOM |
+| **Why** | Model parallelism introduces communication overhead that hurts performance |
+| **How** | Use distributed optimizer (`--use-distributed-optimizer`) to shard optimizer states across DP ranks, freeing memory for larger DP size |
+
+#### Guideline 2: Keep EP and TP Communication Within NVLink Domain
+
+| Aspect | Recommendation |
+|--------|----------------|
+| **Goal** | Ensure EP×TP fits within a single node (typically 8 GPUs) |
+| **Why** | EP and TP are communication-intensive; NVLink provides much higher bandwidth than cross-node interconnects |
+| **Scaling** | When scaling beyond one node, prefer PP over expanding TP/EP across nodes |
+
+**Note:**
+For very large MoE models like DeepSeek-V3, the EP communication may exceed the NVLink bandwidth. In this case, consider using 1F1B A2A Overlap to overlap the EP communication.
+
+#### Guideline 3: Use Pipeline Parallelism (PP) for Multi-Node Scaling
+
+| Aspect | Recommendation |
+|--------|----------------|
+| **Goal** | Use PP to distribute layers across nodes while keeping EP×TP within NVLink |
+| **VPP** | Enable Virtual Pipeline Parallelism to reduce pipeline bubbles when `PP ≥ 2` |
+| **Config** | Set `--num-layers-per-virtual-pipeline-stage` to control VPP size |
+
+**VPP Size Tuning:**
+- Valid values: all divisors of `num_layers / PP_size`
+- Example: `num_layers=24, PP=4` → valid VPP sizes: `{1, 2, 3, 6}`
+- Trade-off: Larger VPP = fewer bubbles but more P2P communications
+- Recommendation: A middle value often gives the best balance
+
+#### Guideline 4: Prefer EP over TP for Expert Layers
+
+| EP Advantages | Details |
+|---------------|---------|
+| **Better GEMM efficiency** | Larger local matrix sizes improve GPU utilization |
+| **Lower communication** | EP has less communication overhead than TP for MoE layers |
+| **Simpler computation graph** | Easier to overlap communication with computation |
+| **Token permutation** | When `EP = num_experts`, local token permutation is eliminated |
+
+**Example:** For Mixtral 8x7B, `EP8×TP1` outperforms `EP4×TP2`.
+
+#### Guideline 5: Enable Context Parallelism (CP) for Long Sequences
+
+| Aspect | Recommendation |
+|--------|----------------|
+| **When to use** | Sequence length ≥ 8K tokens |
+| **Key factor** | CP efficiency depends on overlapping communication with computation |
+| **Config** | Set `--context-parallel-size` to partition sequences across GPUs |
+
+### Step 3: Enable Performance Features Based on Profiling Bottlenecks
+
+After establishing a working parallel configuration, profile your training to identify bottlenecks and apply targeted optimizations.
+
+#### Memory Bottleneck
+
+**Symptom**: Forced to use full recomputation or excessively large parallelism degrees to avoid OOM.
+
+**Solutions**:
+| Optimization | Overhead | Config | Reference |
+|--------------|----------|--------|---------|
+| Selective Recomputation | Low | `--recompute-granularity selective --recompute-modules ...` | [Fine-grained Recomputation](#fine-grained-recomputation) |
+| Activation Offloading | Medium | `--fine-grained-activation-offloading --offload-modules ...` | [Fine-grained Activation Offloading](#fine-grained-activation-offloading) |
+| Optimizer Offloading | Medium | `--optimizer-cpu-offload` | --- |
+
+#### Communication Bottleneck
+
+**Symptom**: Profiling shows significant time spent in collective operations.
+
+**Solutions**: Identify which communication is the bottleneck and enable corresponding overlap:
+| Communication Type | Overlap Config |
+|--------------------|----------------|
+| DP gradient reduce | `--overlap-grad-reduce` |
+| DP param gather    | `--overlap-param-gather` |
+| TP communication   | `--tp-comm-overlap` |
+| EP All-to-All      | `--overlap-moe-expert-parallel-comm --delay-wgrad-compute` |
+| PP send/recv       | Enable VPP with `--num-layers-per-virtual-pipeline-stage` |
+
+#### CPU Overhead Bottleneck
+
+**Symptom**: Nsight Systems timeline shows gaps between GPU kernels where CPU cannot launch kernels fast enough.
+
+**Solutions**:
+| Optimization | Config |
+|--------------|--------|
+| Disable Python GC | `--manual-gc --manual-gc-interval 100` |
+| Enable CUDA Graphs | `--cuda-graph-impl transformer_engine --cuda-graph-scope attn moe_router moe_preprocess` |
+| Reduce kernel launches | Decrease TP size or increase micro-batch size |
+
+#### Computation Bottleneck
+
+**Symptom**: GPU utilization is low despite no communication or CPU bottlenecks.
+
+**Solutions**:
+| Optimization | Config |
+|--------------|--------|
+| Enable kernel fusions | `--moe-router-fusion --moe-grouped-gemm --moe-permute-fusion` |
+| Use FP8 precision | `--fp8-format e4m3 --fp8-recipe blockwise` |
+
+
+## Feature Documentation
+
+### Router and Load Balancing
+
+Routers determine which expert(s) handle each token. A lightweight MLP scores every token and applies `softmax` or `sigmoid` to compute routing probabilities. The router then selects the top-K experts for each token.
+
+> **Note**: The router logits is better to remain in **FP32** or **FP64** rather than BF16 by --moe-router-dtype fp32. At high expert counts, FP32 precision yields better accuracy because output hidden states of experts are multiplied by router scores and accumulated to get the final output.
+
+#### Router Types
+
+| Router Types | Description | Config |
+|-------------|-------------|----------|
+| **Top-K Router** | Standard routing with configurable K, uses softmax for probability computation | --moe-router-topk 8 |
+| **Group Top-K Router** | Selects top-K expert groups, then routes experts in selected groups | --moe-router-num-groups 8 --moe-router-group-topk 4 |
+| **Router score function** | Score function to calculate the probs from output logits of router | --moe-router-score-function softmax/sigmoid |
+
+#### Load Balancing Strategies
+
+| Strategy | Description | Config |
+|----------|-------------|--------|
+| **aux_loss** | Auxiliary loss for balancing expert usage on a micro-batch | `--moe-router-load-balancing-type aux_loss` |
+| **seq_aux_loss** | Sequence-level auxiliary loss for balancing expert usage on each sequence| `--moe-router-load-balancing-type seq_aux_loss` |
+| **global_aux_loss** | Global auxiliary loss for balancing expert usage on a global batch across all ranks | `--moe-router-load-balancing-type global_aux_loss` |
+| **sinkhorn** | Optimal transport formulation for balancing expert usage | `--moe-router-load-balancing-type sinkhorn` |
+| **aux loss free** | Dynamic bias-based load balancing strategy without auxiliary loss | `--moe-router-enable-expert-bias --moe-router-bias-update-rate 1e-3`|
+| **none** | No load balancing | `--moe-router-load-balancing-type none` |
+
+### Token Dispatching
+
+After routing, tokens are **dispatched** to the GPU hosting the assigned expert. After expert computation, tokens are sent back and **combined** to restore the original sequence.
+
+| Dispatcher | Description | Best For | Config |
+|------------|-------------|----------|--------|
+| **alltoall** | NCCL-based All-to-All communication for token exchange | Standard EP > 1 setups | `--moe-token-dispatcher-type alltoall` |
+| **FlexDispatcher with [DeepEP](https://github.com/deepseek-ai/DeepEP) backend** | Removes redundant tokens during cross-node communication, fuses intra/inter-node communication into single kernel | Cross-node EP, fine-grained MoE (DeepSeek-V3) | `--moe-token-dispatcher-type flex --moe-flex-dispatcher-backend deepep` |
+| **FlexDispatcher with [HybridEP](https://github.com/deepseek-ai/DeepEP/tree/hybrid-ep) backend** | NVIDIA's optimized dispatcher using TMA and IBGDA, fewer SMs, native MNNVL support | GB200 NVL72, Multi-Node NVLink | `--moe-token-dispatcher-type flex --moe-flex-dispatcher-backend hybridep` |
+| **allgather** | Gathers all tokens to each GPU, no inter-GPU token movement | TP-only setups, small EP, large Top-K | `--moe-token-dispatcher-type allgather` |
+
+### Upcycling
+Use `--moe-use-upcycling` to enable upcycling, which loads the dense model from the `--load` directory, converts it to an MoE model at runtime, and starts training. The converted model is saved to the `--save` path before training begins. Upcycling is built on distributed checkpointing, supporting parallel modes different from existing dense checkpoints, such as arbitrary expert parallelism during upcycling.
+
+In addition to the default upcycling strategy, we also support granular upcycling strategy which is a more state-of-the-art upcycling strategy from [our recent research work](https://arxiv.org/abs/2410.07524). For the default upcycling strategy, we duplicate the existing MLP to multiple experts, with each expert starting from a copy of the MLP. For the granular upcycling strategy, we use `--moe-upcycling-granularity` to specify how many times smaller is the expert hidden size compared with the original dense FFN hidden size. For using granular upcycling strategy, please set `--moe-upcycling-granularity` as a positive integer. If this param is set to 1, it means using the default upcycling strategy.
+
+Note: The MoE model structure is defined through script arguments. All MoE-related arguments (such as `--num-experts`) can be customized; however, other model structure arguments must be consistent with those of the dense model. For granular upcycling strategy, the moe's FFN hidden size should be set as dense FFN hidden size divided by `--moe-upcycling-granularity`.
+
+## Training Optimizations
+MoE training faces three fundamental performance bottlenecks: **Memory Wall**, **Communication Wall**, and **Compute Efficiency Wall**. The following optimizations address each of these challenges.
+
+### MoE Parallel Folding
+**The Problem with Traditional Approaches:**
+- Prior MoE frameworks constrain **EP ≤ DP** (Expert Parallelism must be a sub-group of Data Parallelism), which severely limits scalability.
+- Applying the same TP/CP to both attention and MoE is suboptimal:
+  - High TP benefits attention but hurts MoE (small per-expert dims make TP overhead prohibitive)
+  - High CP benefits long-context attention but is unnecessary for MoE (tokens processed independently)
+
+**MoE Parallel Folding** is Megatron Core's solution that **decouples attention and MoE parallelism**:
+
+| Parallelism Group | Attention Layers | MoE Layers |
+|-------------------|------------------|------------|
+| **Dimensions** | TP × CP × DP × PP | ETP × EP × EDP × PP |
+
+#### Key Benefits
+
+1. **Breaks the EP ≤ DP Constraint**
+   - Traditional: TP=4, CP=2, DP=8, PP=4 → max EP=8
+   - With Folding: Same attention config, but MoE uses ETP=1, EP=64, EDP=1 → 8× more expert parallelism
 
-Best practices for distributed checkpointing:
-1. Convert a legacy checkpoint to a distributed checkpoint. To achieve this, we can add both `--ckpt-format torch_dist --auto-detect-ckpt-format`, then it will load the legacy one and save as the distributed checkpoint format later when the training progress tries to save checkpoints.
-2. Convert checkpoint of the legacy GroupedMLP to TEGroupedMLP. This is only supported for the weight parts. To achieve this, we can use the above method to convert the legacy checkpoint to a distributed checkpoint of the legacy GroupedMLP. After updating the libraries and using TEGroupedMLP, we can directly load the previously saved checkpoint by adding argument `--no-load-optim`.
+2. **Reduces Minimum GPU Requirements**
+   - Traditional CP=8, EP=8 requires at least 64 GPUs
+   - With Folding: CP and EP are folded together, only 8 GPUs needed
 
-### Shared Experts
-MCore v0.9 introduced the shared expert feature. We can enable this feature by setting suitable `--moe-shared-expert-intermediate-size`.
+3. **Enables Independent Optimization**
+   - Use high TP for attention (memory efficiency)
+   - Use ETP=1 for MoE (better GEMM efficiency, less communication)
 
-The parallelism patterns of the shared experts follow the settings of the dense part, i.e., the attention module. The shared experts are not distributed but replicated in EP ranks.
+4. **Keeps High-Bandwidth Communication in NVLink Domain**
+   - Both CP and EP communication can remain within NVLink domain
 
-We also have an experimental feature that tries to overlap the communications and computations in the shared experts and the dispatcher.
-We can set `--moe-shared-expert-overlap` and use `alltoall` dispatcher to enable it.
-The overlapping relies on the envirionment setting `CUDA_DEVICE_MAX_CONNECTIONS=1`.
-The `AllGather` and `ReduceScatter` communications in the shared experts are overlapped with `permute`/`unpermute` in the dispatcher.
-The `MLP` computation part in the shared experts are overlapped with the `AlltoAll` communications in the dispatcher.
-Both the forward and the backward pass can overlap. But to get the overlapping in the backward pass, the PyTorch version should `>= 2.2.0`.
+> **Reference**: [MoE Parallel Folding: Heterogeneous Parallelism Mappings for Efficient Large-Scale MoE Model Training](https://arxiv.org/abs/2504.14960)
 
-### Checkpointing
+### Memory Optimization
+
+Memory optimization is critical for large-scale MoE training, as MoE models maintain all expert parameters even though only a subset is activated per token.
+
+| Optimization | Description | Config |
+|--------------|-------------|--------|
+| **Fine-grained Recomputation** | Selectively recomputes specific modules (e.g., `mla_up_proj`, `layernorm`, `moe_act`) instead of full layers | `--recompute-granularity selective --recompute-modules mla_up_proj layernorm moe_act` |
+| **Fine-grained Activation Offloading** | Offloads activations to CPU memory, overlapping D2H/H2D transfers with computation | See `docs/source/api-guide/fine_grained_activation_offloading.md` |
+| **Precision-aware Optimizer** | Stores optimizer states (exp_avg, exp_avg_sq) in BF16 instead of FP32, reducing optimizer memory by 50% | `--use-precision-aware-optimizer --exp-avg-dtype bf16 --exp-avg-sq-dtype bf16` |
+| **Optimizer Offloading** | Offloads optimizer states to CPU memory. | `--optimizer-cpu-offload` |
+
+#### Fine-grained Recomputation
 A new output-discarding checkpointing method is also supported. This method discards the output memory of certain submodules during the forward pass and recomputes them during the backward pass, which can save memory compared to standard checkpointing. This can be enabled for specific submodules using the `--recompute-granularity selective --recompute-modules [submodule1, submodule2, ...]` argument. The supported submodules are:
 
 * `moe_act`: Recompute the GroupedMLP activation function.
@@ -163,137 +400,214 @@ A new output-discarding checkpointing method is also supported. This method disc
 * `mlp`: Recompute the dense MLP submodule (uses standard checkpointing rather than output-discarding) which is useful for hybrid-models like DeepSeek-V3.
 * `moe`: Recompute the MoE layer submodule (uses standard checkpointing rather than output-discarding).
 
-### Upcycling
-Use `--moe-use-upcycling` to enable upcycling, which loads the dense model from the `--load` directory, converts it to an MoE model at runtime, and starts training. The converted model is saved to the `--save` path before training begins. Upcycling is built on distributed checkpointing, supporting parallel modes different from existing dense checkpoints, such as arbitrary expert parallelism during upcycling.
+#### Fine-grained Activation Offloading
 
-In addition to the default upcycling strategy, we also support granular upcycling strategy which is a more state-of-the-art upcycling strategy from [our recent research work](https://arxiv.org/abs/2410.07524). For the default upcycling strategy, we duplicate the existing MLP to multiple experts, with each expert starting from a copy of the MLP. For the granular upcycling strategy, we use `--moe-upcycling-granularity` to specify how many times smaller is the expert hidden size compared with the original dense FFN hidden size. For using granular upcycling strategy, please set `--moe-upcycling-granularity` as a positive integer. If this param is set to 1, it means using the default upcycling strategy.
+Unlike recomputation (which trades compute for memory), offloading trades **GPU-CPU bandwidth for memory**: activations are transferred to CPU during forward pass and retrieved during backward pass. The key is hiding transfer latency behind computation using asynchronous D2H/H2D transfers.
 
-Note: The MoE model structure is defined through script arguments. All MoE-related arguments (such as `--num-experts`) can be customized; however, other model structure arguments must be consistent with those of the dense model. For granular upcycling strategy, the moe's FFN hidden size should be set as dense FFN hidden size divided by `--moe-upcycling-granularity`.
+**Key Features:**
+- **Module-level granularity**: Target specific modules rather than entire layers
+- **Computation-offloading overlap**: Asynchronous transfers via independent CUDA streams
+- **Compatible with PP/VPP**: Works with pipeline parallelism and fine-grained recomputation
 
-### Leverage DeepSeek's DeepEP for High-Performance Cross-Node Token Dispatching
-- [DeepSeek-DeepEP](https://github.com/deepseek-ai/deepep) provides a highly optimized implementation for MoE token dispatching and combining operations, specifically designed for large-scale MoE training scenarios.
-- DeepEP is particularly recommended for training large-scale, fine-grained MoE architectures such as DeepSeek-V3 and other advanced MoE models.
-- To enable DeepEP in your training configuration, simply set `--moe-token-dispatcher-type=flex` and `--moe-flex-dispatcher-backend=deepep` in your command line arguments.
+**Usage**
+```bash
+--fine-grained-activation-offloading
+--offload-modules expert_fc1 moe_act # Choices: attn_norm, core_attn, attn_proj, mlp_norm, expert_fc1, moe_act
+```
 
-### Integrate HybridEP for High-Performance Intra-Node Token Dispatching
-- [HybridEP](https://github.com/deepseek-ai/DeepEP/tree/hybrid-ep) is developed by NVIDIA as an optimized solution for large-scale MoE (Mixture of Experts) all-to-all communication. It is designed to leverage NVIDIA GPU hardware capabilities, significantly reducing Streaming Multiprocessor (SM) resource usage.
-- HybridEP currently supports intra-node and multi-node NVLink scenarios.
-- To enable HybridEP, set `--moe-token-dispatcher-type=flex` and
-  `--moe-flex-dispatcher-backend=hybridep` in your command line arguments.
+For more details, see `docs/source/api-guide/fine_grained_activation_offloading.md`
 
-### CUDA Graph Support
-CUDA Graph functionality can be enabled through the `--cuda-graph-impl` option. There are two implementations:
+### Communication Optimization
 
-1. `--cuda-graph-impl=local`: Captures cuda graphs using the MCore-internal cuda graph manager.
-2. `--cuda-graph-impl=transformer_engine`: Captures cuda graphs using the TE `make_graphed_callables()` interface.
+Distributed training introduces communication overhead from various parallelism strategies. Megatron Core supports overlapping communication with computation to hide latency and improve throughput.
 
-To use `--cuda-graph-impl=transformer_engine`, the user should call related methods `TECudaGraphHelper.create_cudagraphs()` and `TECudaGraphHelper.cuda_graph_set_manual_hooks()` in the training script. Please refer to the usage in `megatron/training/training.py`.
+#### Data Parallel (DP) Communication Overlap
 
-For MoE models, certain configurations may prevent CUDA Graph capture of MoE layers. Specifically, when `--moe-expert-capacity-factor` and `--moe-pad-expert-input-to-capacity` are not set, the resulting dynamic shapes make MoE layers uncapturable. In such cases, you can still leverage CUDA Graphs for the attention layers (operations in `TransformerLayer._forward_attention()`) by setting `--cuda-graph-scope=attn`, while leaving the MoE layers (operations in `TransformerLayer._forward_mlp()`) unmodified. See the argument description for more usage of `--cuda-graph-scope`.
+With distributed optimizer, DP introduces **reduce-scatter** (gradients) and **all-gather** (parameters) communications, chunked by Transformer layer granularity.
+
+| Optimization | Description | Config |
+|--------------|-------------|--------|
+| **Gradient Reduce Overlap** | Overlaps gradient reduce-scatter with backward computation | `--overlap-grad-reduce` |
+| **Param Gather Overlap** | Overlaps parameter all-gather with forward computation | `--overlap-param-gather` |
+| **BF16 Gradient Reduce** | Reduces gradients in BF16 instead of FP32 for better performance | `--grad-reduce-in-fp32 false` (via mixed precision config) |
+| **FP8 Param Gather** | Conducts parameter all-gather in FP8, reducing overhead by 50% | `--fp8-param-gather` |
+
+#### Tensor Parallel (TP) Communication Overlap
+
+TP with sequence parallelism introduces activation all-gather and reduce-scatter operations. Communications are overlapped in **bulk** (no dependency) or **pipelined** (with dependency) fashion.
+
+| Optimization | Description | Config |
+|--------------|-------------|--------|
+| **TP Comm Overlap** | Enables bulk and pipelined TP communication overlap | `--tp-comm-overlap` |
+
+> **Requirements**: `tensor_model_parallel_size >= 2` and `--sequence-parallel`
+
+#### Pipeline Parallel (PP) Communication Overlap
+
+PP introduces P2P activation sends/receives between pipeline stages. Overlap is automatic in the 1F1B pipelining phase when VPP is enabled.
+
+| Optimization | Description | Config |
+|--------------|-------------|--------|
+| **P2P Comm Overlap** | Overlaps PP P2P communications with non-dependent computations | `--overlap-p2p-comm` (auto-enabled with VPP) |
+| **VPP for Better Overlap** | Increases overlap opportunities by reducing layers per virtual stage | `--num-layers-per-virtual-pipeline-stage` |
+
+#### Expert Parallel (EP) Communication Overlap
+
+EP All-to-All can consume 30-40% of training time without optimization. These features hide or reduce EP communication overhead.
+
+| Optimization | Description | Config |
+|--------------|-------------|--------|
+| **EP A2A Overlap** | Overlaps All-to-All with computation by merging FWD-BWD passes of adjacent microbatches | `--overlap-moe-expert-parallel-comm --delay-wgrad-compute` |
+| **Shared Expert Overlap** | Runs shared expert computation concurrently with EP token transfer | `--moe-shared-expert-overlap` |
 
+> **Requirements for EP A2A Overlap**: `expert_model_parallel_size > 1`, CUDA_DEVICE_MAX_CONNECTIONS > 1.
 
-### Batch-Level EP-A2A hidding
-Enable A2A overlap across different batches inspired by the DSv3 DualPipe implmentation. \
-**Features** 
-- Hide ep a2a communication by batch-level overlapping
-- Split weight gradient and activation gradient computations for better overlap with communications
-- Support interleaved pipelined parallelism
-- Support FP8 training
-- Support MTP (`-mtp-num-layers 1` only, multiple MTP layers are not supported yet.)
+### Compute Optimization
 
+Fine-grained MoE produces many small operations that can underutilize GPU resources. These optimizations reduce kernel launch overhead and improve GPU utilization.
+
+| Optimization | Description | Config |
+|--------------|-------------|--------|
+| **Grouped GEMM** | Batches multiple expert GEMM operations into a single kernel call, improving GPU utilization | `--moe-grouped-gemm` |
+| **Router Fusion** | Fuses router projection, top-k selection, softmax, and auxiliary loss into fewer kernels | `--moe-router-fusion` |
+| **Permute Fusion** | Fuses token permutation/unpermutation operations into optimized single kernels | `--moe-permute-fusion` |
+| **FP8 Training** | Uses FP8 Tensor Core operations for faster GEMMs on Hopper/Blackwell GPUs | `--fp8 --fp8-recipe blockwise` |
+
+
+### FP8 Training
+
+FP8 training provides benefits across all three performance walls:
+
+| Wall | FP8 Benefit | Impact |
+|------|-------------|--------|
+| **Compute** | Faster Tensor Core GEMMs | FP8 ops on Hopper/Blackwell are faster than BF16 |
+| **Memory** | 50% activation reduction | Stores linear layer inputs in FP8 instead of BF16 |
+| **Communication** | 50% parameter all-gather | With FP8 primary weights (except MXFP8) |
+
+#### FP8 Recipes
+
+| Recipe | Scaling Granularity | Format | Platform | Use Case |
+|--------|---------------------|--------|----------|----------|
+| **Per-tensor** | Whole tensor | E4M3/E5M2 hybrid | Hopper, Blackwell | Conservative, initial experimentation |
+| **Blockwise** | 1×128 (activations), 128×128 (weights) | E4M3 | Hopper | **Production-proven** (DeepSeek-V3, Minimax-M2) |
+| **MXFP8** | 1×32 | E4M3 + E8M0 scaling | Blackwell | Native hardware support on GB200 |
+
+> **Recommendation**: Use **blockwise FP8** on Hopper for production training. It has been validated at scale on DeepSeek-V3 class models.
+
+#### MoE-Specific FP8 Optimizations
+
+| Optimization | Description | Config |
+|--------------|-------------|--------|
+| **Routing Map Padding** | Pads routing map (not tokens) to align M dimension to 16/32, avoiding per-tensor padding overhead | `--moe-router-padding-for-fp8` |
+| **FP8 Primary Weights** | Casts FP32 master weights directly to FP8, eliminating BF16 intermediate copy | `--fp8-param-gather` (Need additional `--reuse-grad-buf-for-mxfp8-param-ag` for MXFP8) |
+
+
+#### Example Configuration
 
-**Usage** 
 ```bash
-# Add the following flags to your training scripts
---overlap-moe-expert-parallel-comm
-# [optional] only works with specific TE version
---delay-wgrad-compute
+# Blockwise FP8 on Hopper (recommended for production)
+--fp8-format e4m3
+--fp8-recipe blockwise
+--fp8-param-gather
+--moe-router-padding-for-fp8
+
+# MXFP8 on Blackwell
+--fp8-format e4m3
+--fp8-recipe mxfp8
+--moe-router-padding-for-fp8
+--fp8-param-gather 
+--reuse-grad-buf-for-mxfp8-param-ag
 ```
 
-### Fine-grained Activation Offloading (collaborated with rednote)
-Offload the input activation at the granularity of modules
+> **Note**: For blockwise and MXFP8 recipes with current scaling, training loss curves show negligible difference compared to BF16 baselines.
 
-**Usage**
-```bash
-# Enable fine-grained activation offloading
---fine-grained-activation-offloading
 
-# Specify which modules are going to offload its input
-# Choices: "attn_norm", "core_attn", "attn_proj", "mlp_norm", "expert_fc1", "moe_act".
---offload-modules expert_fc1
-```
-For more details, please refer to the ```docs/source/api-guide/fine_grained_activation_offloading.md```
-
-### MoE Related Arguments
-| Item | Description |
-| --- | --- |
-| --num-experts | Number of Experts in MoE (None means no MoE) |
-| --expert-model-parallel-size | Degree of expert model parallelism. Default is 1. |
-| --moe-ffn-hidden-size | MoE Feed-Forward Network hidden size. Default is None. |
-
-<details>
-<summary> View all MoE related arguments. </summary>
-
-| Item | Description |
-| --- | --- |
-| --num-experts | Number of Experts in MoE (None means no MoE) |
-| --expert-model-parallel-size | Degree of expert model parallelism. Default is 1. |
-| --moe-ffn-hidden-size | MoE Feed-Forward Network hidden size. Default is None. |
-| --expert-tensor-parallel-size | Degree of tensor model parallelism of expert layer. Default is same to --tensor-model-parallel-size. |
-| --moe-layer-freq | Frequency between MoE layers and Dense layers. Accepts either: 1) An integer N for 1:N ratio (one expert layer for every N-1 dense layers), 2) A string "N" for the same ratio, or 3) A string with Python list expression for custom patterns like `([1]*3+[0]*1)*3` which gives [1,1,1,0,1,1,1,0,1,1,1,0] where 1=expert layer and 0=dense layer. Examples: `([0]+[1]*23)` for 1 dense layer followed by 23 experts layers, `([1]*3+[0]*2)*2` for three expert layers followed by two dense layers, repeated twice. Default is 1. |
-| --moe-grouped-gemm | When there are multiple experts per rank, launch multiple local GEMM kernels in multiple streams to improve the utilization and performance with GroupedLinear in TransformerEngine. |
-| --moe-router-load-balancing-type | Determines the load balancing strategy for the router. "aux_loss" corresponds to the load balancing loss used in GShard and SwitchTransformer; "seq_aux_loss" corresponds to the load balancing loss used in DeepSeekV2 and DeepSeekV3, which computes the loss for each individual sample; "sinkhorn" corresponds to the balancing algorithm used in S-BASE, and "none" implies no load balancing. The default is "aux_loss". |
-| --moe-router-dtype | Data type for routing computation and expert output weighted averaging. Options are 'fp32' and 'fp64'. This can improve numerical stability, particularly when using a large number of experts. The throughput/memory impact should be negligible when used with --moe-permute-fusion. Default is None (no dtype promotion). |
-| --moe-router-topk | Number of experts to route to for each token. The default is 2. |  
-| --moe-router-score-function | Score function for MoE routing. Can be "softmax" or "sigmoid". Default is "softmax". |
-| --moe-router-pre-softmax | Enable pre-softmax routing for MoE, which means softmax is before the top-k selection. By default, softmax is done after top-k. |
-| --moe-router-num-groups | Number of groups to divide experts into for group-limited routing. When using group-limited routing: 1) Experts are divided into equal-sized groups, 2) For each token, a subset of groups are selected based on routing scores (sum of top-2 expert scores within each group), 3) From these selected groups, moe_router_topk experts are chosen.  Two common use cases: 1) Device-limited routing: Set equal to expert parallel size (EP) to limit each token to experts on a subset of devices (See DeepSeek-V2: https://arxiv.org/pdf/2405.04434) 2) Node-limited routing: Set equal to number of nodes in EP group to limit each token to experts on a subset of nodes (See DeepSeek-V3: https://arxiv.org/pdf/2412.19437)) |
-| --moe-router-group-topk | Number of selected groups for group-limited routing. |
-| --moe-router-topk-scaling-factor | Scaling factor for routing score in top-k selection, only works when --moe-router-pre-softmax enabled. Defaults to None, which means no scaling. |
-| --moe-router-enable-expert-bias | TopK routing with dynamic per-expert bias in the aux-loss-free load balancing strategy. The routing decision is based on the sum of the routing scores and the expert bias. See https://arxiv.org/abs/2408.15664 for details. |
-| --moe-router-fusion | Enable fusion for MoE TopK routing and aux-loss computation. This is only supported in TransformerEngine 2.7.0 and above. |
-| --moe-router-bias-update-rate | The expert bias is updated based on the number of assigned tokens to each expert in a global batch, where the bias is increased for experts with less assigned tokens and decreased for experts with more assigned tokens. Default is 1e-3 same as that used in DeepSeekV3. |
-| --moe-router-force-load-balancing | (Experimental) Force override routing to balance token distribution using random logits for MoE routers, supporting naive top-k and group-limited top-k. This experimental feature is for benchmarking purposes only! |
-| --moe-router-padding-for-quantization | Pad the routing_map to make sure the number of tokens each expert received is a multiple of 16/32 for FP8/FP4 precision. It is suggested to enable this for dropless training with FP8 precision when num_local_experts > 1. This is a more efficient way to pad for FP8 which eliminates the explicit padding in the GroupedMLP layer. |
-| --moe-aux-loss-coeff | Scaling coefficient for the aux loss: a starting value of 1e-2 is recommended. Default is 0.0. |
-| --moe-z-loss-coeff | Scaling coefficient for the z-loss: a starting value of 1e-3 is recommended. Default is None. |
-| --moe-input-jitter-eps | Add noise to the input tensor by applying jitter with a specified epsilon value. Default is None. |
-| --moe-token-dispatcher-type | Determines the token dispatcher type. Choices are "allgather", "alltoall". Default is "allgather". We recommend using 'alltoall' if expert parallelism is applied. We have upgraded the "alltoall" dispatcher in place during MCore v0.9, while the original implementation renamed as "alltoall_seq" is retained until MCore v0.13.|
-| --moe-flex-dispatcher-backend | (Experimental) Select the backend for the flex token dispatcher. Supported options: "deepep", "hybridep". Enables efficient token dispatching and combining for MoE models. |
-| --moe-per-layer-logging | Enable per-layer logging for MoE, currently supports auxiliary loss and z loss. |
-| --moe-expert-capacity-factor | The capacity factor for each expert, None means no token will be dropped. Default is None. |
-| --moe-pad-expert-input-to-capacity | Pads the input for each expert to match the expert capacity length, effective only after the --moe-expert-capacity-factor is set. |
-| --moe-token-drop-policy | The policy to drop tokens. Can be either "probs" or "position". If "probs", the tokens with the lowest probabilities will be dropped. If "position", tokens at the end of each batch will be dropped. |
-| --moe-layer-recompute | Enable activation checkpointing for moe_layer, should be used when memory is not sufficient. |
-| --moe-permute-fusion | Fuse token rearrangement ops during token dispatching. |
-| --moe-shared-expert-intermediate-size | Set shared expert total ffn hidden size. It should be equal to `num_shared_experts * ffn_size_of_each_shared_expert` if there are multiple shared experts. None means no shared expert. |
-| --moe-shared-expert-overlap | (Experimental, may change) If this is set, the communications/computations in the shared experts and the dispatcher will overlap (The `alltoall` dispatcher is needed.) Otherwise, the shared expert runs after the routed experts. |
-| --moe-use-upcycling | Load the dense model checkpoint, convert it into an MoE model at runtime and start training. The converted model will be saved to the path specified by `--save` before training begins. Upcycling is implemented on the top of distributed checkpointing, so it supports parallel modes different from the dense model.|
-| --overlap-moe-expert-parallel-comm | Enable batch-level overlapping in 1f1b stage. |
-| --delay-wgrad-compute | Enable split dgrad and wgrad for `overlap-moe-expert-parallel-comm` execution. Increasing room to hide communication latency by more finegrained control. |
-| --pipeline-model-parallel-layout | (Experimental, may change) A string containing a Python list expression that defines a custom pipeline model parallel layout. |
-| --moe-upcycling-granularity | This param sepecifics how many times smaller is the expert hidden size compared with the original dense FFN hidden size. For using granular upcycling strategy, please set this param as a positive integer. If this param is set to 1, it means using the default upcycling strategy.|
+### CUDA Graph
+CUDA Graph functionality can be enabled through the `--cuda-graph-impl` option. There are two implementations:
 
-</details>
+1. `--cuda-graph-impl=local`: Captures cuda graphs using the MCore-internal cuda graph manager.
+2. `--cuda-graph-impl=transformer_engine`: Captures cuda graphs using the TE `make_graphed_callables()` interface.
 
-## MoE training example:
-<details>
-<summary>Click here. </summary>
+To use `--cuda-graph-impl=transformer_engine`, the user should call related methods `TECudaGraphHelper.create_cudagraphs()` and `TECudaGraphHelper.cuda_graph_set_manual_hooks()` in the training script. Please refer to the usage in `megatron/training/training.py`.
+
+For MoE models, certain configurations may prevent CUDA Graph capture of MoE layers. Specifically, when `--moe-expert-capacity-factor` and `--moe-pad-expert-input-to-capacity` are not set, the resulting dynamic shapes make MoE layers uncapturable. In such cases, you can still leverage CUDA Graphs for the attention layers (operations in `TransformerLayer._forward_attention()`) by setting `--cuda-graph-scope=attn`, while leaving the MoE layers (operations in `TransformerLayer._forward_mlp()`) unmodified. See the argument description for more usage of `--cuda-graph-scope`.
 
+## MoE Arguments Reference
+### Core Arguments
+| Argument | Description | Default |
+|----------|-------------|---------|
+| --num-experts | Number of Experts in MoE | None |
+| --expert-model-parallel-size | Degree of expert model parallelism | 1 |
+| --moe-ffn-hidden-size | MoE FFN hidden size | FFN hidden size of the dense model |
+| --expert-tensor-parallel-size | Expert layer tensor parallelism | Same as TP(Recommeded to set to 1 for fine-grained MoE models) |
+| --moe-layer-freq | MoE layer frequency pattern | 1 |
+
+### Router Arguments
+| Argument | Description | Default |
+|----------|-------------|---------|
+| --moe-router-load-balancing-type | Load balancing: aux_loss, sinkhorn, seq_aux_loss, none | aux_loss |
+| --moe-router-topk | Number of experts per token | 2 |
+| --moe-router-score-function | Score function: softmax, sigmoid | softmax |
+| --moe-router-pre-softmax | Softmax before top-k | False |
+| --moe-router-num-groups | Groups for group-limited routing | None |
+| --moe-router-group-topk | Selected groups in group-limited routing | None |
+| --moe-router-enable-expert-bias | Dynamic per-expert bias | False |
+| --moe-router-bias-update-rate | Bias update rate | 1e-3 |
+| --moe-router-fusion | Enable router fusion | False |
+| --moe-router-dtype | Router precision: fp32, fp64 | None |
+| --moe-router-padding-for-fp8 | Pad for FP8 alignment | False |
+
+### Loss and Regularization
+| Argument | Description | Default |
+|----------|-------------|---------|
+| --moe-aux-loss-coeff | Auxiliary loss coefficient | 0.0 |
+| --moe-z-loss-coeff | Z-loss coefficient | None |
+| --moe-input-jitter-eps | Input jitter epsilon | None |
+
+### Token Dispatching
+| Argument | Description | Default |
+|----------|-------------|---------|
+| --moe-token-dispatcher-type | Dispatcher: allgather, alltoall, flex | allgather |
+| --moe-enable-deepep | Enable DeepEP (with flex) | False |
+| --moe-expert-capacity-factor | Capacity factor | None |
+| --moe-pad-expert-input-to-capacity | Pad to capacity | False |
+| --moe-token-drop-policy | Drop policy: probs, position | probs |
+| --moe-permute-fusion | Fuse permutation ops | False |
+
+### Performance Optimization
+| Argument | Description | Default |
+|----------|-------------|---------|
+| --moe-grouped-gemm | Use GroupedGEMM | False |
+| --overlap-moe-expert-parallel-comm | Batch-level EP overlap | False |
+| --delay-wgrad-compute | Split dgrad/wgrad compute | False |
+| --moe-shared-expert-intermediate-size | Shared expert FFN size | None |
+| --moe-shared-expert-overlap | Overlap shared expert | False |
+
+### Memory and Checkpointing
+| Argument | Description | Default |
+|----------|-------------|---------|
+| --moe-layer-recompute | Recompute MoE layer | False |
+| --moe-use-upcycling | Enable upcycling | False |
+| --moe-upcycling-granularity | Upcycling granularity | 1 |
+
+### Miscellaneous
+| Argument | Description | Default |
+|----------|-------------|---------|
+| --moe-per-layer-logging | Per-layer logging | False |
+| --moe-router-force-load-balancing | Force load balancing (experimental) | False |
+
+## Examples
 ```bash
 #!/bin/bash
 
 # Runs Mixtral 8x7B model on 32 H100/A100 GPUs
-# The Dropless MoE suffers from an imbalanced token distribution at the early stage of training (the first few hundred iterations), which may lead to poor performance and out-of-memory (OOM) issues.
-# To check the performance of a Dropless MoE model, we should run the model for at least 500 iterations or resume from trained checkpoints.
 
 export CUDA_DEVICE_MAX_CONNECTIONS=1
 
 GPUS_PER_NODE=8
-# Change for multinode config
 MASTER_ADDR=${MASTER_ADDR:-"localhost"}
 MASTER_PORT=${MASTER_PORT:-"6000"}
-NNODES=${NNODES:-"1"}
+NNODES=${NNODES:-"4"}
 NODE_RANK=${RANK:-"0"}
 WORLD_SIZE=$(($GPUS_PER_NODE*$NNODES))
 
@@ -333,11 +647,12 @@ MODEL_ARGS=(
 MOE_ARGS=(
     --num-experts 8
     --expert-model-parallel-size 8
-    --moe-router-load-balancing-type aux_loss # options: aux_loss, sinkhorn, None. Default is aux_loss.
+    --moe-router-load-balancing-type aux_loss
     --moe-router-topk 2
     --moe-aux-loss-coeff 1e-2
     --moe-grouped-gemm
     --moe-permute-fusion
+    --moe-token-dispatcher-type alltoall
 )
 
 DATA_ARGS=(
@@ -372,24 +687,17 @@ MODEL_PARALLEL_ARGS=(
 )
 
 LOGGING_ARGS=(
-    --log-interval 1 \
-    --save-interval 10000 \
-    --eval-interval 1000 \
-    --eval-iters 10 \
-    --save $CHECKPOINT_PATH \
-    --load $CHECKPOINT_PATH \
-    --tensorboard-dir "${CHECKPOINT_PATH}/tensorboard" \
-    --no-load-optim \
-    --no-load-rng
+    --log-interval 1
+    --save-interval 10000
+    --eval-interval 1000
+    --eval-iters 10
+    --save $CHECKPOINT_PATH
+    --load $CHECKPOINT_PATH
+    --tensorboard-dir "${CHECKPOINT_PATH}/tensorboard"
+    --ckpt-format torch_dist
+    --auto-detect-ckpt-format
 )
 
-if [ -n "${WANDB_API_KEY}" ]; then
-    LOGGING_ARGS+=(
-        --wandb-project ${WANDB_PROJECT:-"Mixtral-Finetuning"}
-        --wandb-exp-name ${WANDB_NAME:-"Mixtral_8x7B"} 
-    )
-fi
-
 torchrun ${DISTRIBUTED_ARGS[@]} pretrain_gpt.py \
     ${MODEL_ARGS[@]} \
     ${MOE_ARGS[@]} \
@@ -398,107 +706,36 @@ torchrun ${DISTRIBUTED_ARGS[@]} pretrain_gpt.py \
     ${MODEL_PARALLEL_ARGS[@]} \
     ${LOGGING_ARGS[@]}
 ```
+
 </details>
 
-# Performance Best Practice
+## Contributing
 
-### Tuning Guide of Parallel Mappings
+We welcome contributions! Please see [CONTRIBUTING.md](../../../../CONTRIBUTING.md) for guidelines.
 
-To find a good parallel mapping that help you achieve a high throughput of a new model, there are some general rule that could help. Here is an overview of properties in different aspects for each parallel strategy.
+## Support
 
-| Parallel Strategy | Peak Activation Memory          | Weight Memory  | Optimizer states                  | Communication (Per-Layer) |
-|:-----------------:|:-------------------------------:|:--------------:|:---------------------------------:|:-------------------------:|
-| TP                | 1/N (with SP on)                | 1/N            | 1/N                               |        High               |
-| EP                | 1                               | 1/N in MoELayer| 1/N                               |       Medium              |
-| PP                | 1 (>1 with virtual pipeline)    | 1/N            | 1/N                               |       Medium              |
-| CP                | 1/N                             | 1              | 1/N (with distributed optimizer)  |       Medium              |
-| DP                | 1                               | 1              | 1/N (with distributed optimizer)  |        Low                |
+- GitHub Issues: [Report bugs or request features](https://github.com/NVIDIA/Megatron-LM/issues)
+- Documentation: [Full documentation](https://docs.nvidia.com/megatron-core/developer-guide/latest/index.html)
 
-For a specific model, the best parallel mapping varies based on the model architecture, trained sequence length and the hardware platform.
-Here we provide some general rules to get better performance:
-1. Keep the model parallism size as small as possible. 
-    - For the large language models, model parallism is often required to prevent OOM, but it will bring communication overhead and hurt performance. 
-    - With distributed optimizer, master weights and optimizer states will be sharded across all DP ranks with slight communication overhead.
-    So try to reduce the model parallism size and increase data parallism size when there are lots of free GPU memory during training.
-2. Ensure the EPxTP communication winthin the NVLink domain.
-    - Communications of EP and TP should remain within the NVLink domain as much as possible, as both are communication-intensive.
-    - If the model is too large and requires scaling across multiple nodes, consider PP before TP and EP. See item 3 for details.
-3. Use Pipeline Parallelism to scale the model further.
-    - Enable Virtual Pipeline Parallelism(VPP) to reduce pp bubbles when PP_size >= 2 by setting `num_layers_per_virtual_pipeline_stage`.
-    - VPP_size tuning: the legal values of vpp_size are all common divisors of num_layers/pp_size, E.g., num_layers=24, pp_size=4, then we can pick vpp_size from {1, 2, 3, 6}. The larger the vpp_size, the lower the pipeline bubbles, while the larger number of P2P communications between each PP stages. Empirically a value in the middle often gives the best trade-off. `VPP_size=num_layers / PP_size / num_layers_per_virtual_pipeline_stage`
-4. Prefer EP over TP for the expert layer when possible:
-    - TP saves more memory than EP, but EP can achieve better GEMM efficiency and less communication overhead than TP.
-    - If EP size increased to the number of expert, the local token permutation/un-permutation for experts computation are omitted.
-    - Simplify the computation graph of MoE layers, more convenient for performing potential comm-computation overlapping.
-    - In practice, EP8TP1 is better than EP4TP2 for 8x7B.
-5. Enable Context Parallelism for long context training.
-    - The efficiency of CP largely depends on whether its communication can be overlapped with computation. 
-    - Empirically, use CP when sequence length >= 8K.
 
-### MoE Parallel Folding
+## Citation
 
-MoE Parallel Folding separates the MoE related parallel groups from Dense groups.
-1. Traditional MoE parallel groups are entangled with dense by using a 5-dimension parallel group generator with default order `tp-cp-ep-dp-pp`. The EP group in MoE is a sub-group of DP in Attention.
-2. With MoE Parallel Folding, we use a parallel group generator with `tp-cp-dp-pp` for Attention, and another with `tp-ep-dp-pp` for MoE. The EPxTP group in MoE is a sub-group of DPxCPxTP in Attention.
-
-By setting `--expert-tensor-parallel-size`, we can set MoE-specific TP size.
-
-#### Advantages of MoE Parallel Folding
-1. The CP and EP group are folded together by defualt, such that:
-    1. It reduces the minimal required GPUs to turn on both CP and EP. For example, the traditional way with (CP=8, EP=8) needs at least 64 GPUs, for now it only requires 8 GPUs.
-    2. The CP and EP communication can be both put in the NVLink domain.
-2. We can set different TP sizes for Attention and MoE part.
-    1. For MoE, EP is often more efficient than TP. But in the traditional way, only using EP can get OOM for most models.
-    2. With MoE parallel folding, we can turn on TP for Attention part and setting TP=1 for MoE models, which often gets better MFU.
-
-### End-to-End Training Practice
-**Use the latest NVIDIA PyTorch or NeMo Docker Image**
-- [NGC PyTorch Image](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/pytorch)
-- [NGC NeMo Image](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/nemo)
-
-**Token Dispatcher Choices**
-- Token Dispatcher sends tokens to the designated expert, involves tensor rearangement and communications.
-- Dispatcher `allgather` is the default option. It achieves better performance and efficiency when only tensor parallelism is used or when the Top-k value is very large.
-- Dispatcher `alltoall` is recommended if expert parallelism is applied.
-- Dispatcher `flex` is a new dispatcher decouples communication group from model parallelism. It supports two backends(DeepEP and HybridEP) selectable via `--moe-flex-dispatcher-backend`.
-
-**Enable Communication Overlap**
-- Enable `--overlap-param-gather` and `--overlap-grad-reduce` with distributed optimizer.
-- Enable `--tp-comm-overlap` when TP>1.
-- Enable p2p comm overlap when PP > 1 by setting `num_layers_per_virtual_pipeline_stage`.
-
-**Enable GroupedGEMM when num_local_experts>1 with `--moe-grouped-gemm`**
-- GroupedGEMM has higher efficiency than vanilla sequential GEMMs for each expert.
-- Recommend to use the TE version of Grouped GEMM (by upgrading to MCore v0.8 and TE v1.9), which support Gradient Accumulation Fusion and FP8 Training.
-
-**OOM Caused by Token Distribution Imbalance when Training From Scratch**  
-MoE suffers from a severe load imbalance issue when the router is under-trained, leading to the model easily running out of memory (OOM), which typically occurs in the first 100~300 steps when training from scratch. 
-Therefore, there are two recommended ways during the first 200 steps to avoid the OOM problem, which can be removed after the token distribution is more stable:
-1. Increase the `expert-tensor-parallel-size` and decrease `expert-model-parallel-size` to replace EP with TP in MoELayer, this can prevent the load imbalancing between EP ranks. Since current ETP implementation has some memeory overhead, you can further enable activation recomputation only for MoE Layer by adding `--moe-layer-recompute`.
-2. Setting capacity factor to a relatively small number like 1.0 by adding `--moe-token-capacity-factor 1.0`.
-
-**Leverage DeepSeek's DeepEP for High-Performance Cross-Node Token Dispatching**
-- The primary advantage of DeepEP is its cross-node token communication efficiency, which delivers substantial performance improvements when deploying expert parallelism across multiple nodes with large TopK values.
-- To enable DeepEP in your training configuration, simply set `--moe-token-dispatcher-type=flex` and `--moe-enable-deepep` in your command line arguments.
-
-**FP8 Training Best Practice**
-- Using latest version of [TransformerEngine](https://github.com/NVIDIA/TransformerEngine).
-- Enable router padding with `--moe-router-padding-for-quantization` to reduce padding overhead.
-- Enable native FP8 weights with `--fp8-param-gather` to reduce weights memory cost.
-
-### Reference Best Parallel Mapping
-
-Here are the reference parallel mappings of MCore v0.8 for Mixtral 8x7B and 8x22B models:
-|        Model            | Vocab Size| Dispatcher | Precision | #GPUs | SEQ LEN | TP | EP | PP | VP | MBS | GBS |
-|:-----------------------:|:---------:|:----------:|:---------:|:-----:|:-------:|:--:|:--:|:--:|:--:|:---:|:---:|
-| Mixtral 8x7B(Dropless)  |   32K     | All-to-All | BF16      | 64    | 4096    | 1  | 8  | 4  | 8  | 1   | 256 |
-| Mixtral 8x22B(Dropless) |   32K     | All-to-All | BF16      | 128   | 4096    | 4  | 2  | 8  | 7  | 1   | 256 |
-
-Detailed Benchmark Information:  
-Server:
-- 8xH100 80GB HBM3 
-- NVLink 4th Generation
-- InfiniBand 8x400 Gbit/s
-
-Docker Image:
-- PyTorch 24.09 with TransformerEngine v1.11
+If you use Megatron-Core MoE in your research, please cite:
+
+```bibtex
+
+@article{megatron-lm,
+  title={Megatron-LM: Training Multi-Billion Parameter Language Models Using Model Parallelism},
+  author={Shoeybi, Mohammad and Patwary, Mostofa and Puri, Raul and LeGresley, Patrick and Casper, Jared and Catanzaro, Bryan},
+  journal={arXiv preprint arXiv:1909.08053},
+  year={2019}
+}
+
+@article{moe-parallel-folding,
+    title={MoE Parallel Folding: Heterogeneous Parallelism Mappings for Efficient Large-Scale MoE Model Training with Megatron Core}, 
+    author={Liu, Dennis and Yan, Zijie and Yao, Xin and Liu, Tong and Korthikanti, Vijay and Wu, Evan and Fan, Shiqing and Deng, Gao and Bai, Hongxiao and Chang, Jianbin and Aithal, Ashwath and Andersch, Michael and Shoeybi, Mohammad and Yao, Jiajie and Zhou, Chandler and Wu, David and Li, Xipeng and Yang, June},
+    year={2025},
+    journal={arXiv preprint arXiv:2504.14960},
+}
+```

From 9ea50a9d500c187798571d42ffaafe1bb77758c5 Mon Sep 17 00:00:00 2001
From: litianjian <45817262+litianjian@users.noreply.github.com>
Date: Tue, 20 Jan 2026 22:52:05 +0800
Subject: [PATCH 244/334] feat: add routing replay for Mcore (#2693)

Co-authored-by: litianjian <litianjian@bytedance.com>
Co-authored-by: Zijie Yan <zijiey@nvidia.com>
---
 docs/source/api-guide/router_replay.md        | 176 ++++++++++++++++++
 megatron/core/transformer/moe/moe_utils.py    |  16 +-
 megatron/core/transformer/moe/router.py       |   6 +
 .../core/transformer/moe/router_replay.py     | 161 ++++++++++++++++
 .../core/transformer/transformer_config.py    |   3 +
 megatron/training/arguments.py                |   3 +
 .../transformer/moe/test_router_replay.py     |  95 ++++++++++
 7 files changed, 459 insertions(+), 1 deletion(-)
 create mode 100644 docs/source/api-guide/router_replay.md
 create mode 100644 megatron/core/transformer/moe/router_replay.py
 create mode 100644 tests/unit_tests/transformer/moe/test_router_replay.py

diff --git a/docs/source/api-guide/router_replay.md b/docs/source/api-guide/router_replay.md
new file mode 100644
index 00000000000..334a29c78a6
--- /dev/null
+++ b/docs/source/api-guide/router_replay.md
@@ -0,0 +1,176 @@
+# Design Document: MoE Router Replay Feature
+
+### 1. Overview
+
+This document provides a detailed description of the "Router Replay" feature implemented within the Megatron-LM Core for Mixture-of-Experts (MoE) models.
+
+This feature is designed to enhance determinism and analyzability in MoE model training and inference. It enables the model to load routing decisions from a predefined file and enforce their use during the forward pass, thereby bypassing the real-time routing computation.
+
+### 2. Motivation
+
+*   **Determinism & Reproducibility**: In distributed training, MoE routing decisions can exhibit minor variations due to factors like floating-point precision. By replaying a fixed routing table, the MoE computation path is guaranteed to be identical across runs, which facilitates debugging and reproducing experimental results.
+*   **Performance Profiling**: The router's own computation (e.g., logits calculation, top-k selection) incurs overhead. In replay mode, this part of the computation can be completely skipped, allowing for more precise isolation and profiling of performance bottlenecks within the Expert Layers themselves.
+*   **Debugging Aid**: When issues arise in the model, fixing the routing decisions helps to isolate variables, making it easier to determine whether the problem lies with the routing mechanism or the expert computations.
+
+### 3. Design and Architecture
+
+The design follows the principles of being non-intrusive and on-demand, with the core idea of activating the replay logic only when explicitly requested by the user.
+
+*   **Core Components**:
+    *   `RouterReplay` (located in `megatron/core/transformer/moe/router_replay.py`): A utility class for replaying MoE routing decisions. When enabled via the `enable_routing_replay` flag, a separate instance of `RouterReplay` is created for each MoE layer's router. Each instance is responsible for loading routing data and providing the deterministic routing decisions for its corresponding layer during the forward pass.
+    *   `enable_routing_replay` (located in `megatron/core/transformer/transformer_config.py`): A boolean global configuration flag that serves as the sole entry point for enabling this feature.
+
+*   **Workflow**:
+    The feature supports different modes, such as recording and replaying, controlled by a `RouterReplayAction`.
+
+    1.  **Enabling the Feature**: The user sets `enable_routing_replay` to `True` in the model configuration.
+    2.  **Initialization**: When `enable_routing_replay` is true, each `TopKRouter` creates its own `RouterReplay` instance.
+    3.  **Mode Configuration**: The user must programmatically set the desired router replay action (e.g., `record`, `forward_replay`, `backward_replay`) on the `RouterReplay` instances.
+    4.  **Execution Flow (within a mini-batch)**:
+        *   **Forward Pass**:
+            *   For each micro-batch, the `topk_routing_with_score_function` checks the `router_replay_action`.
+            *   **In `record` mode**: The dynamically computed `top-k` expert indices are captured and stored.
+            *   **In `forward_replay` mode**: The function retrieves pre-loaded expert indices from `target_topk_idx`. These indices are used for the forward computation and are also appended to the `replay_backward_list` to prepare for the backward pass.
+        *   **Backward Pass**:
+            *   For each micro-batch (processed in reverse order in pipeline parallelism), the `router_replay_action` is checked again.
+            *   **In `backward_replay` mode**: The function retrieves the expert indices for the corresponding micro-batch by popping them from the `replay_backward_list`. This mode is intended for training recomputation (e.g., activation checkpointing and pipeline recompute) so the same routing decisions are used during recompute/backward as in forward, ensuring determinism and correctness.
+
+### 4. Implementation Details
+
+The implementation cleanly separates the replay logic from the router's core computation.
+
+*   **`megatron/core/transformer/transformer_config.py`**:
+    *   Adds the configuration option `enable_routing_replay: bool = False`.
+
+*   **`megatron/core/transformer/moe/moe_utils.py`**:
+    *   Introduces the `RouterReplay` class to manage the state for recording and replaying routing decisions for a single MoE layer.
+        *   `target_topk_idx`: An attribute holding the expert indices for the current micro-batch during forward replay mode.
+        *   `recorded_topk_idx`: An attribute for storing the computed expert indices when in record mode.
+        *   `replay_backward_list`: A list that accumulates the `top-k` indices used during the forward passes of a mini-batch. This list is consumed in FIFO order during the backward pass to ensure correctness under pipeline parallelism.
+        *   `set_target_indices()`: A method to load the replay indices into `target_topk_idx` for the forward pass.
+        *   `record_indices()`: A method to save the computed indices.
+    *   The `topk_routing_with_score_function` is modified to contain the core logic. It checks the `router_replay_action` on the `router_replay` instance and accordingly performs one of the following actions: computes and records indices, replays indices from `target_topk_idx` (for forward), replays indices from `replay_backward_list` (for backward), or falls through to the default dynamic routing.
+
+#### Training recompute usage
+- During forward replay, `set_target_indices()` prepares `replay_backward_list` so each micro-batch’s indices are available for recomputation.
+- During recompute/backward, set action to `REPLAY_BACKWARD` so indices are consumed in FIFO order to mirror the forward sequence.
+
+### 5. Usage Guide
+
+1.  **Enable & Instantiate**
+    - Create one `RouterReplay` instance per MoE router layer when building the model.
+    - Optionally use the global helpers to set/clear actions across all layers.
+2.  **Record Routing Decisions**
+    - Set action: `RouterReplay.set_global_router_replay_action(RouterReplayAction.RECORD)`.
+    - Run the model; retrieve per-layer indices via `RouterReplay.get_recorded_data()` and persist.
+3.  **Forward Replay**
+    - Load indices and distribute: `RouterReplay.set_replay_data(list_of_tensors)`.
+    - Set action: `RouterReplay.set_global_router_replay_action(RouterReplayAction.REPLAY_FORWARD)`.
+    - Run the model; dynamic top‑k is bypassed and target indices are used.
+4.  **Backward Replay**
+    - For training recomputation (activation checkpointing or pipeline recompute), set action: `REPLAY_BACKWARD` during recomputation.
+    - Per micro‑batch indices are consumed from `replay_backward_list` in FIFO order.
+5.  **Cleanup**
+    - Use `RouterReplay.clear_global_indices()`, `RouterReplay.clear_global_router_replay_action()`, and `RouterReplay.clear_global_router_replay_instances()` to restore default behavior and prevent memory leaks.
+
+#### Quick usage with `topk_routing_with_score_function`
+
+```python
+import torch
+from megatron.core.transformer.moe.router_replay import RouterReplay, RouterReplayAction
+from megatron.core.transformer.moe.moe_utils import topk_routing_with_score_function
+
+rr = RouterReplay()
+
+# Record
+RouterReplay.set_global_router_replay_action(RouterReplayAction.RECORD)
+logits = torch.randn(8, 16)
+probs_rec, routing_map_rec = topk_routing_with_score_function(
+    logits=logits, topk=2, use_pre_softmax=False, score_function="softmax", router_replay=rr,
+)
+recorded = rr.get_recorded_indices()
+torch.save(recorded, "/tmp/replay.pt")
+
+# Forward replay
+rr.clear_router_replay_action()
+rr.set_router_replay_action(RouterReplayAction.REPLAY_FORWARD)
+target = torch.load("/tmp/replay.pt")
+rr.set_target_indices(target)
+probs_rep, routing_map_rep = topk_routing_with_score_function(
+    logits=logits, topk=2, use_pre_softmax=False, score_function="softmax", router_replay=rr,
+)
+
+RouterReplay.clear_global_router_replay_action()
+RouterReplay.clear_global_indices()
+RouterReplay.clear_global_router_replay_instances()
+```
+
+### 6. Minimal Demo
+
+Here is a minimal code example showing how to use RouterReplay for recording and replaying:
+
+```python
+import torch
+import torch.distributed as dist
+from megatron.core.transformer.transformer_config import TransformerConfig
+from megatron.core.transformer.moe.router import TopKRouter
+from megatron.core.transformer.moe.router_replay import RouterReplay, RouterReplayAction
+
+
+# Initialize distributed training
+if not dist.is_initialized():
+    dist.init_process_group(backend="nccl")
+
+# Create a transformer config with RouterReplay enabled
+config = TransformerConfig(
+    num_experts=8,
+    expert_model_parallel_size=1,
+    num_top_k=2,
+    enable_routing_replay=True
+)
+
+# Create a TopKRouter instance
+router = TopKRouter(config)
+
+# Generate sample input (batch_size, sequence_length, hidden_size)
+logits = torch.randn(16, 32, 8).to(torch.cuda.current_device())
+
+# -----------------
+# 1. Recording Mode
+# -----------------
+print("=== Recording Mode ===")
+# Set global router replay action to RECORD
+RouterReplay.set_global_router_replay_action(RouterReplayAction.RECORD)
+
+# Perform routing
+routing_output = router.forward(logits)
+print(f"Recorded top-k indices shape: {routing_output.top_k_idx.shape}")
+
+# -----------------
+# 2. Forward Replay Mode
+# -----------------
+print("\n=== Forward Replay Mode ===")
+# Save recorded indices to a file
+torch.save(routing_output.top_k_idx, "/tmp/replay.pt")
+
+# Load indices from file and set as target for replay
+replay_indices = torch.load("/tmp/replay.pt")
+for router_instance in RouterReplay.global_router_replay_instances:
+    router_instance.target_topk_idx = replay_indices
+
+# Set global router replay action to REPLAY_FORWARD
+RouterReplay.set_global_router_replay_action(RouterReplayAction.REPLAY_FORWARD)
+
+# Perform routing again - this will use the replayed indices
+replay_routing_output = router.forward(logits)
+print(f"Replayed top-k indices shape: {replay_routing_output.top_k_idx.shape}")
+print(f"Are indices the same? {torch.equal(routing_output.top_k_idx, replay_routing_output.top_k_idx)}")
+
+
+# Clean up
+RouterReplay.clear_global_router_replay_action()
+RouterReplay.clear_global_indices()
+RouterReplay.clear_global_router_replay_instances()
+if dist.is_initialized():
+    dist.destroy_process_group()
+```
diff --git a/megatron/core/transformer/moe/moe_utils.py b/megatron/core/transformer/moe/moe_utils.py
index 60878155fd4..e5e06f05758 100644
--- a/megatron/core/transformer/moe/moe_utils.py
+++ b/megatron/core/transformer/moe/moe_utils.py
@@ -18,6 +18,7 @@
 from megatron.core.tensor_parallel.mappings import reduce_from_tensor_model_parallel_region
 from megatron.core.transformer.cuda_graphs import is_graph_capturing
 from megatron.core.transformer.enums import CudaGraphScope
+from megatron.core.transformer.moe.router_replay import RouterReplay
 from megatron.core.transformer.transformer_config import TransformerConfig
 from megatron.core.utils import internal_api
 
@@ -580,6 +581,7 @@ def topk_routing_with_score_function(
     score_function: str = "softmax",
     expert_bias: Optional[torch.Tensor] = None,
     fused: bool = False,
+    router_replay: Optional['RouterReplay'] = None,
 ):
     """Compute the routing probabilities and map for top-k selection with score function.
     Args:
@@ -591,6 +593,9 @@ def topk_routing_with_score_function(
         scaling_factor (float): Scaling factor of routing score in top-k selection.
         score_function (str): The score function to use. Can be either "softmax" or "sigmoid".
         expert_bias (torch.Tensor): The bias added to logits for expert routing.
+        router_replay (Optional['RouterReplay']): For debugging and development, allows for
+                                             deterministic routing by replaying a previously
+                                             recorded routing sequence.
     Returns:
         Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
             - routing_probs (torch.Tensor): A tensor of shape [num_tokens, num_experts] containing
@@ -617,7 +622,7 @@ def topk_routing_with_score_function(
             expert_bias=expert_bias,
         )
 
-    def compute_topk(scores, topk, num_groups=None, group_topk=None):
+    def _compute_topk(scores, topk, num_groups=None, group_topk=None):
         if group_topk:
             return group_limited_topk(
                 scores=scores,
@@ -630,6 +635,15 @@ def compute_topk(scores, topk, num_groups=None, group_topk=None):
         else:
             return torch.topk(scores, k=topk, dim=1)
 
+    def compute_topk(scores, topk, num_groups=None, group_topk=None):
+        # Default behavior if no replay is active
+        if router_replay is None:
+            return _compute_topk(scores, topk, num_groups=num_groups, group_topk=group_topk)
+        else:
+            return router_replay.get_replay_topk(
+                scores, topk, num_groups, group_topk, _compute_topk
+            )
+
     if score_function == "softmax":
         if use_pre_softmax:
             scores = torch.softmax(logits, dim=-1, dtype=torch.float32).type_as(logits)
diff --git a/megatron/core/transformer/moe/router.py b/megatron/core/transformer/moe/router.py
index 003043bc18d..01238e425d9 100644
--- a/megatron/core/transformer/moe/router.py
+++ b/megatron/core/transformer/moe/router.py
@@ -22,6 +22,7 @@
     topk_routing_with_score_function,
     z_loss_func,
 )
+from megatron.core.transformer.moe.router_replay import RouterReplay
 from megatron.core.transformer.transformer_config import TransformerConfig
 
 
@@ -202,6 +203,10 @@ def __init__(
             self.global_tokens_per_expert = None
             self.ga_steps = None
 
+        self.router_replay = None
+        if self.config.enable_routing_replay:
+            self.router_replay = RouterReplay()
+
     def _maintain_float32_expert_bias(self):
         """
         Maintain the expert bias in float32.
@@ -580,6 +585,7 @@ def routing(self, logits: torch.Tensor, padding_mask: Optional[torch.Tensor] = N
                 score_function=self.score_function,
                 expert_bias=self.expert_bias,
                 fused=self.config.moe_router_fusion,
+                router_replay=self.router_replay,
             )
 
         # Apply token dropping to probs and routing_map.
diff --git a/megatron/core/transformer/moe/router_replay.py b/megatron/core/transformer/moe/router_replay.py
new file mode 100644
index 00000000000..b6b8e26a0a6
--- /dev/null
+++ b/megatron/core/transformer/moe/router_replay.py
@@ -0,0 +1,161 @@
+# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+from enum import Enum
+from typing import Callable, List, Optional, Tuple
+
+import torch
+
+
+class RouterReplayAction(Enum):
+    """
+    A Enum to define the actions for router replay.
+    """
+
+    RECORD = "record"  # Record the topk indices for replay
+    REPLAY_FORWARD = "replay_forward"  # Replay the recorded topk indices for forward pass
+    REPLAY_BACKWARD = "replay_backward"  # Replay topk indices for re-compute during backward pass
+
+
+class RouterReplay:
+    """
+    A class to manage the recording and replaying of MoE routing decisions.
+    It holds all router instances and provides static methods to globally
+    control recording and replaying.
+    """
+
+    # Static variable to hold all router instances, one per MoE layer.
+    global_router_replay_instances: List['RouterReplay'] = []
+
+    @staticmethod
+    def set_replay_data(all_layers_topk_indices: List[torch.Tensor]):
+        """
+        Distributes the topk indices for all layers to their respective RouterReplay instances.
+        :param all_layers_topk_indices: A list of tensors, where each tensor contains the
+                                        topk indices for a specific layer. The order
+                                        must match the instantiation order of the routers.
+        """
+        if len(all_layers_topk_indices) != len(RouterReplay.global_router_replay_instances):
+            raise ValueError(
+                f"The number of replay tensors ({len(all_layers_topk_indices)}) "
+                f"does not match instances ({len(RouterReplay.global_router_replay_instances)})."
+            )
+        for i, router_instance in enumerate(RouterReplay.global_router_replay_instances):
+            router_instance.set_target_indices(all_layers_topk_indices[i])
+
+    @staticmethod
+    def get_recorded_data() -> List[torch.Tensor]:
+        """
+        Collects the recorded topk indices from all RouterReplay instances.
+        :return: A list of tensors, each containing the recorded topk indices for a layer.
+        """
+        return [
+            router.get_recorded_indices() for router in RouterReplay.global_router_replay_instances
+        ]
+
+    @staticmethod
+    def clear_global_indices():
+        """Clears the recorded and target topk indices in all instances."""
+        for router in RouterReplay.global_router_replay_instances:
+            router.clear_indices()
+
+    @staticmethod
+    def set_global_router_replay_action(router_replay_action: RouterReplayAction):
+        """Sets the router replay action for all router instances."""
+        for router in RouterReplay.global_router_replay_instances:
+            router.set_router_replay_action(router_replay_action)
+
+    @staticmethod
+    def clear_global_router_replay_action():
+        """Clears the router replay action for all router instances."""
+        for router in RouterReplay.global_router_replay_instances:
+            router.clear_router_replay_action()
+
+    @staticmethod
+    def clear_global_router_replay_instances():
+        """Clear the global list of router replay instances to prevent memory leaks."""
+        RouterReplay.global_router_replay_instances.clear()
+
+    def __init__(self):
+        """Initializes a RouterReplay instance for a specific layer."""
+        self.target_topk_idx: Optional[torch.Tensor] = None  # Target topk indices for replay
+        self.recorded_topk_idx: Optional[torch.Tensor] = None  # Recorded topk indices for replay
+        self.router_replay_action: Optional[RouterReplayAction] = (
+            None  # Router replay action for this layer
+        )
+        self.replay_backward_list: List[torch.Tensor] = (
+            []
+        )  # List of tensors for backward pass replay
+        RouterReplay.global_router_replay_instances.append(self)
+
+    def set_target_indices(self, topk_indices: torch.Tensor):
+        """Sets the target topk indices for replay."""
+        self.target_topk_idx = topk_indices
+        self.replay_backward_list.append(topk_indices)
+
+    def get_recorded_indices(self) -> Optional[torch.Tensor]:
+        """Returns the recorded topk indices."""
+        return self.recorded_topk_idx
+
+    def record_indices(self, topk_indices: torch.Tensor):
+        """Records the topk indices."""
+        self.recorded_topk_idx = topk_indices
+
+    def clear_indices(self):
+        """Clears the recorded and target topk indices."""
+        self.recorded_topk_idx = None
+        self.target_topk_idx = None
+        self.replay_backward_list = []
+
+    def set_router_replay_action(self, router_replay_action: RouterReplayAction):
+        """Sets the router replay action for this layer."""
+        self.router_replay_action = router_replay_action
+
+    def clear_router_replay_action(self):
+        """Clears the router replay action for this layer."""
+        self.router_replay_action = None
+
+    def get_replay_topk(
+        self,
+        scores: torch.Tensor,
+        topk: int,
+        num_groups: Optional[int] = None,
+        group_topk: Optional[int] = None,
+        default_compute_topk: Callable[
+            [torch.Tensor, int, Optional[int], Optional[int]], Tuple[torch.Tensor, torch.Tensor]
+        ] = None,
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        """
+        A wrapper for top-k computation that handles different replay actions.
+
+        Args:
+            scores (torch.Tensor): The scores to compute top-k on.
+            topk (int): The number of top elements to select.
+            num_groups (Optional[int]): Number of expert groups for group-limited routing.
+            group_topk (Optional[int]): Number of groups to select for each token.
+            default_compute_topk (Callable): The default top-k computation function, which
+                                             should return a tuple of (values, indices).
+
+        Returns:
+            Tuple[torch.Tensor, torch.Tensor]: A tuple containing the top-k values and indices.
+        """
+        if self.router_replay_action == RouterReplayAction.RECORD:
+            probs, top_indices = default_compute_topk(
+                scores, topk, num_groups=num_groups, group_topk=group_topk
+            )
+            self.record_indices(top_indices)
+            return probs, top_indices
+        elif self.router_replay_action == RouterReplayAction.REPLAY_FORWARD:
+            top_indices = self.target_topk_idx
+            # Ensure indices are on the correct device
+            top_indices = top_indices.to(scores.device)
+            # Gather the scores for the replayed indices to get the probabilities
+            probs = scores.gather(1, top_indices)
+            return probs, top_indices
+        elif self.router_replay_action == RouterReplayAction.REPLAY_BACKWARD:
+            top_indices = self.replay_backward_list.pop(0)
+            # Ensure indices are on the correct device
+            top_indices = top_indices.to(scores.device)
+            # Gather the scores for the replayed indices to get the probabilities
+            probs = scores.gather(1, top_indices)
+            return probs, top_indices
+        else:
+            return default_compute_topk(scores, topk, num_groups, group_topk)
diff --git a/megatron/core/transformer/transformer_config.py b/megatron/core/transformer/transformer_config.py
index 18cea44c51f..875d8a92049 100644
--- a/megatron/core/transformer/transformer_config.py
+++ b/megatron/core/transformer/transformer_config.py
@@ -551,6 +551,9 @@ class TransformerConfig(ModelParallelConfig):
     moe_router_topk: int = 2
     """Number of experts to route to for each token."""
 
+    enable_routing_replay: bool = False
+    """Enable routing replay for MoE."""
+
     moe_router_topk_limited_devices: Optional[int] = None
     """Number of EP ranks to consider for each token in group-limited routing,
     DEPRECATED and replaced by moe_router_num_groups and moe_router_group_topk.
diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py
index a65f1cd6469..7744869f80e 100644
--- a/megatron/training/arguments.py
+++ b/megatron/training/arguments.py
@@ -3325,6 +3325,9 @@ def _add_moe_args(parser):
                        help='Score function for MoE TopK routing. Can be "softmax" or "sigmoid".')
     group.add_argument('--moe-router-topk', type=int, default=2,
                        help='Number of experts to route to for each token. The default is 2.')
+    group.add_argument('--enable-routing-replay', action='store_true',
+                       help='Enable routing replay for MoE routers. When enabled, the router will '
+                            'use a pre-defined routing table instead of computing it on the fly.')
     group.add_argument('--moe-router-pre-softmax', action='store_true',
                        help='Enable pre-softmax routing for MoE, which means softmax is before the top-k selection. By default, softmax is done after top-k.')
     group.add_argument('--moe-router-num-groups', type=int, default=None,
diff --git a/tests/unit_tests/transformer/moe/test_router_replay.py b/tests/unit_tests/transformer/moe/test_router_replay.py
new file mode 100644
index 00000000000..840fc0fd269
--- /dev/null
+++ b/tests/unit_tests/transformer/moe/test_router_replay.py
@@ -0,0 +1,95 @@
+# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+import pytest
+import torch
+
+from megatron.core.transformer.moe.moe_utils import topk_routing_with_score_function
+from megatron.core.transformer.moe.router_replay import RouterReplay, RouterReplayAction
+
+
+def setup_function():
+    RouterReplay.global_router_replay_instances.clear()
+
+
+def teardown_function():
+    RouterReplay.global_router_replay_instances.clear()
+
+
+def test_record_mode_with_topk_routing_softmax_post():
+    rr = RouterReplay()
+    rr.set_router_replay_action(RouterReplayAction.RECORD)
+    logits = torch.randn(4, 6)
+    probs, routing_map = topk_routing_with_score_function(
+        logits=logits, topk=2, use_pre_softmax=False, router_replay=rr, score_function="softmax"
+    )
+    recorded = rr.get_recorded_indices()
+    expected_idx = torch.topk(logits, k=2, dim=1).indices
+    assert recorded is not None
+    assert torch.equal(recorded, expected_idx)
+    assert probs.shape == (4, 6)
+    assert routing_map.shape == (4, 6)
+    assert routing_map.sum(dim=1).eq(2).all()
+
+
+def test_replay_forward_with_topk_routing_softmax_pre():
+    rr = RouterReplay()
+    rr.set_router_replay_action(RouterReplayAction.REPLAY_FORWARD)
+    logits = torch.randn(3, 5)
+    target = torch.tensor([[1, 2], [0, 3], [2, 4]], dtype=torch.long)
+    rr.set_target_indices(target)
+    probs, routing_map = topk_routing_with_score_function(
+        logits=logits, topk=2, use_pre_softmax=True, router_replay=rr, score_function="softmax"
+    )
+    assert routing_map.sum(dim=1).eq(2).all()
+    scores = torch.softmax(logits, dim=-1)
+    assert torch.equal(probs.gather(1, target), scores.gather(1, target))
+
+
+def test_replay_forward_with_topk_routing_softmax_post():
+    rr = RouterReplay()
+    rr.set_router_replay_action(RouterReplayAction.REPLAY_FORWARD)
+    logits = torch.randn(3, 6)
+    target = torch.tensor([[1, 2], [0, 5], [3, 4]], dtype=torch.long)
+    rr.set_target_indices(target)
+    probs, routing_map = topk_routing_with_score_function(
+        logits=logits, topk=2, use_pre_softmax=False, router_replay=rr, score_function="softmax"
+    )
+    selected = torch.softmax(logits.gather(1, target), dim=-1)
+    assert torch.equal(probs.gather(1, target), selected)
+    assert routing_map.sum(dim=1).eq(2).all()
+
+
+def test_global_set_get_clear_indices():
+    r1 = RouterReplay()
+    r2 = RouterReplay()
+    t1 = torch.tensor([[0, 1]], dtype=torch.long)
+    t2 = torch.tensor([[1, 0]], dtype=torch.long)
+    RouterReplay.set_replay_data([t1, t2])
+    assert torch.equal(r1.target_topk_idx, t1)
+    assert torch.equal(r2.target_topk_idx, t2)
+    r1.record_indices(t1)
+    r2.record_indices(t2)
+    rec = RouterReplay.get_recorded_data()
+    assert len(rec) == 2
+    assert torch.equal(rec[0], t1)
+    assert torch.equal(rec[1], t2)
+    RouterReplay.clear_global_indices()
+    assert r1.target_topk_idx is None and r2.target_topk_idx is None
+    assert r1.get_recorded_indices() is None and r2.get_recorded_indices() is None
+
+
+def test_global_action_set_and_clear():
+    r1 = RouterReplay()
+    r2 = RouterReplay()
+    RouterReplay.set_global_router_replay_action(RouterReplayAction.REPLAY_FORWARD)
+    assert r1.router_replay_action == RouterReplayAction.REPLAY_FORWARD
+    assert r2.router_replay_action == RouterReplayAction.REPLAY_FORWARD
+    RouterReplay.clear_global_router_replay_action()
+    assert r1.router_replay_action is None and r2.router_replay_action is None
+
+
+def test_set_replay_data_length_mismatch():
+    _ = RouterReplay()
+    with pytest.raises(ValueError):
+        RouterReplay.set_replay_data(
+            [torch.tensor([[0, 1]], dtype=torch.long), torch.tensor([[1, 0]], dtype=torch.long)]
+        )

From ac9f665c149e8114a9e8fb2294f7e1dd825b4c25 Mon Sep 17 00:00:00 2001
From: Yuzhong Wang <yuzhongw@nvidia.com>
Date: Wed, 21 Jan 2026 13:49:18 +0800
Subject: [PATCH 245/334] [dev] feat(moe): Support apply wd to qk layernorm for
 Qwen3-Next (#2825)

Signed-off-by: John St. John <jstjohn@nvidia.com>
Co-authored-by: John St. John <jstjohn@nvidia.com>
Co-authored-by: Deepak Narayanan <2724038+deepakn94@users.noreply.github.com>
---
 megatron/core/optimizer/__init__.py           | 44 ++++++----
 megatron/core/optimizer/optimizer_config.py   | 57 +++++++++++++
 megatron/core/ssm/gated_delta_net.py          |  2 +-
 megatron/training/arguments.py                | 25 ++++--
 megatron/training/training.py                 |  2 +-
 .../model_config.yaml                         |  2 +-
 tests/unit_tests/test_optimizer.py            | 82 ++++++++++++++++++-
 7 files changed, 186 insertions(+), 28 deletions(-)

diff --git a/megatron/core/optimizer/__init__.py b/megatron/core/optimizer/__init__.py
index b4d15daefd2..11aa6c49585 100644
--- a/megatron/core/optimizer/__init__.py
+++ b/megatron/core/optimizer/__init__.py
@@ -60,40 +60,48 @@
     OptimizerConfig,
     ParamKey,
     ParamPredicate,
+    ParamWithNamePredicate,
     SGDOptimizerConfig,
 )
 
 logger = logging.getLogger(__name__)
 
 
-def get_standard_config_overrides(
-    decoupled_lr: float | None = None, decoupled_min_lr: float | None = None
-) -> Dict[ParamKey, ParamGroupOverride]:
+def get_standard_config_overrides(config: OptimizerConfig) -> Dict[ParamKey, ParamGroupOverride]:
     """Get standard config overrides for the optimizer, handling decoupled LR and common wd skips.
 
     Args:
-        decoupled_lr (float | None): decoupled learning rate.
-        decoupled_min_lr (float | None): decoupled minimum learning rate.
+        config (OptimizerConfig): optimizer configuration object.
 
     Returns:
         Dict[ParamKey, ParamGroupOverride]: standard config overrides.
     """
     config_overrides: Optional[Dict[ParamKey, ParamGroupOverride]] = {}
-    if decoupled_lr is not None:
-        decoupled_lr_config: ParamGroupOverride = {"max_lr": decoupled_lr}
-        decoupled_param_key = ParamKey(attr="is_embedding_or_output_parameter")
-        if decoupled_min_lr is not None:
-            decoupled_lr_config["min_lr"] = decoupled_min_lr
-        config_overrides[decoupled_param_key] = decoupled_lr_config
+    # First, figure out how we are going to do wd skipping. The two main approaches are:
+    #  1. The classic megatron approach of skipping all len 1 and bias parameters.
+    #  2. The Qwen3-Next approach of doing 1, other than qk layernorm parameters.
+    if config.apply_wd_to_qk_layernorm:
+        shape_1_not_qkln_param = ParamWithNamePredicate(
+            name="s1_not_qkln",
+            fn=lambda param, name: (len(param.shape) == 1 or name.endswith(".bias"))
+            and not ("q_layernorm." in name or "k_layernorm." in name),
+        )
+        param_wd_mult_key = ParamKey(with_name_predicate=shape_1_not_qkln_param)
+    else:
+        param_length_1_match = ParamPredicate(
+            name="param_len_1", fn=lambda param: len(param.shape) == 1
+        )
+        param_wd_mult_key = ParamKey(name="*.bias", predicate=param_length_1_match)
 
-    # Next construct the standard param group overrides for no weight decay on bias parameters
-    #  as well as any length 1 parameters.
-    param_length_1_match = ParamPredicate(
-        name="param_len_1", fn=lambda param: len(param.shape) == 1
-    )
-    param_wd_mult_key = ParamKey(name="*.bias", predicate=param_length_1_match)
     config_overrides[param_wd_mult_key] = ParamGroupOverride(wd_mult=0.0)
 
+    if config.decoupled_lr is not None:
+        decoupled_lr_config: ParamGroupOverride = {"max_lr": config.decoupled_lr}
+        decoupled_param_key = ParamKey(attr="is_embedding_or_output_parameter")
+        if config.decoupled_min_lr is not None:
+            decoupled_lr_config["min_lr"] = config.decoupled_min_lr
+        config_overrides[decoupled_param_key] = decoupled_lr_config
+
     return config_overrides
 
 
@@ -132,7 +140,7 @@ def _get_param_groups(
         #  the config_overrides argument by default lead to bias parameters and length 1 parameters.
         #  We assume that users of decoupled LR already provide config overrides so will adapt
         #  to the new API.
-        config_overrides = get_standard_config_overrides()
+        config_overrides = get_standard_config_overrides(config=config)
 
     for model_chunk in model_chunks:
         for name, param in model_chunk.named_parameters():
diff --git a/megatron/core/optimizer/optimizer_config.py b/megatron/core/optimizer/optimizer_config.py
index 1813488d7bd..a1429b7a170 100644
--- a/megatron/core/optimizer/optimizer_config.py
+++ b/megatron/core/optimizer/optimizer_config.py
@@ -33,6 +33,34 @@ def __call__(self, param: torch.nn.Parameter) -> bool:
         return self.fn(param)
 
 
+@dataclass(frozen=True)
+class ParamWithNamePredicate:
+    """Wraps a matching function to make it hashable for ParamKey.
+    Example:
+        >>> shape_1_not_qkln_param = ParamWithNamePredicate(
+                name="s1_not_qkln",
+                fn=lambda param, name: (
+                    len(param.shape) == 1 or name.endswith(".bias")
+                    and not ("q_layernorm." in name or "k_layernorm." in name)
+                )
+            )
+        >>> shape_1_not_qkln_param(torch.empty(10), "interesting.bias")
+        True
+        >>> shape_1_not_qkln_param(torch.empty(10), "interesting.q_layernorm.bias")
+        False
+
+    NOTE:
+        __hash__ and __eq__ are automatically generated by @dataclass(frozen=True)
+        based solely on 'name' because we set compare=False/hash=False on 'fn'.
+    """
+
+    name: str
+    fn: Callable[[torch.nn.Parameter, str], bool] = field(compare=False, hash=False)
+
+    def __call__(self, param: torch.nn.Parameter, name: str) -> bool:
+        return self.fn(param, name)
+
+
 @dataclass(frozen=True, slots=True)
 class ParamKey:
     """Key to group parameters by. All such grouped parameters can share an
@@ -49,6 +77,15 @@ class ParamKey:
     predicate: Union[ParamPredicate, Tuple[ParamPredicate]] = field(default_factory=tuple)
     """Predicate(s) to match parameters by. If multiple predicates are provided, any must match."""
 
+    with_name_predicate: Union[ParamWithNamePredicate, Tuple[ParamWithNamePredicate]] = field(
+        default_factory=tuple
+    )
+    """
+    Predicate(s) to match parameters with their name. If multiple predicates are provided, 
+      any must match. This is useful if you need to filter out some parameters from an otherwise 
+      positive match by their name.
+    """
+
     def matches(self, param: torch.nn.Parameter, param_name: str) -> bool:
         """Returns true if passed-in parameter (with name) matches `param_key`.
 
@@ -86,6 +123,15 @@ def matches(self, param: torch.nn.Parameter, param_name: str) -> bool:
             for predicate in self.predicate:
                 if predicate(param):
                     return True
+
+        # Check if with_name_predicate matches.
+        if isinstance(self.with_name_predicate, ParamWithNamePredicate):
+            if self.with_name_predicate(param, param_name):
+                return True
+        else:
+            for predicate in self.with_name_predicate:
+                if predicate(param, param_name):
+                    return True
         return False
 
 
@@ -104,9 +150,20 @@ class OptimizerConfig:
     min_lr: Optional[float] = None
     """Minumum value for learning rate. The scheduler clip values below this threshold."""
 
+    decoupled_lr: Optional[float] = None
+    """Separate learning rate for the input and output layer."""
+
+    decoupled_min_lr: Optional[float] = None
+    """Minimum value for learning rate for the input and output layer. The scheduler clip values
+       below this threshold.
+    """
+
     weight_decay: float = 0.01
     """Weight decay coefficient for L2 regularization."""
 
+    apply_wd_to_qk_layernorm: bool = False
+    """If true, apply weight decay to qk layernorm as a special case."""
+
     ##############
     # Precision
     ##############
diff --git a/megatron/core/ssm/gated_delta_net.py b/megatron/core/ssm/gated_delta_net.py
index 2b0a18b433b..a08d043bdb3 100644
--- a/megatron/core/ssm/gated_delta_net.py
+++ b/megatron/core/ssm/gated_delta_net.py
@@ -246,7 +246,7 @@ def reset_parameters(self):
                     dtype=self.config.params_dtype,
                     device=torch.cuda.current_device(),
                 ).uniform_(*self.A_init_range)
-                self.A_log.data.copy_(A)
+                self.A_log.data.copy_(torch.log(A))
 
     def forward(
         self,
diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py
index 7744869f80e..c85228e1136 100644
--- a/megatron/training/arguments.py
+++ b/megatron/training/arguments.py
@@ -911,6 +911,17 @@ def validate_args(args, defaults={}):
         dc = torch.cuda.get_device_capability()
         assert dc[0] >= 8, "Unsupported compute capability for GroupedGEMM kernels."
 
+    if args.no_weight_decay_cond_type is not None:
+        print_rank_0(
+            'WARNING: --no-weight-decay-cond-type is deprecated. Please use --apply-wd-to-qk-layernorm instead.',
+            args.rank,
+        )
+        if args.no_weight_decay_cond_type == "apply_wd_to_qk_layernorm":
+            args.apply_wd_to_qk_layernorm = True
+        else:
+            raise ValueError(f"Invalid no_weight_decay_cond_type: {args.no_weight_decay_cond_type}")
+        args.no_weight_decay_cond_type = None
+
     if args.weight_decay_incr_style == 'constant':
         assert args.start_weight_decay is None
         assert args.end_weight_decay is None
@@ -2083,12 +2094,8 @@ def _add_regularization_args(parser):
     group.add_argument('--weight-decay-incr-style', type=str, default='constant',
                        choices=['constant', 'linear', 'cosine'],
                        help='Weight decay increment function.')
-    group.add_argument('--no-weight-decay-cond-type', type=str, choices=['apply_wd_to_qk_layernorm'],
-                       help='Type of no weight decay condition. Choices: '
-                       'None (default): param no weight decay if and only if it is 1D; or it is bias; '
-                       'or it is embedding and embedding_init_method_std is not None. '
-                       '"apply_wd_to_qk_layernorm": In addition to the default rules, '
-                       'apply weight decay to qk layernorm as a special case.')
+    group.add_argument('--apply-wd-to-qk-layernorm', action='store_true',
+                       help='Apply weight decay to qk layernorm as a special case.')
     group.add_argument('--clip-grad', type=float, default=1.0,
                        help='Gradient clipping based on global L2 norm.')
     group.add_argument('--adam-beta1', type=float, default=0.9,
@@ -2123,6 +2130,12 @@ def _add_regularization_args(parser):
     group.add_argument('--muon-extra-scale-factor', type=float, default=1.0,
                        help='Additional scale factor for the muon update')
 
+    group.add_argument('--no-weight-decay-cond-type', type=str, choices=['apply_wd_to_qk_layernorm'],
+                       help='Type of no weight decay condition. Choices: '
+                       'None (default): apply weight decay to 1D weights and biases.'
+                       '"apply_wd_to_qk_layernorm": additionally apply weight decay to '
+                       'qk layernorm as a special case.'
+                       'DEPRECATED. Please use --apply-wd-to-qk-layernorm instead. ')
     return parser
 
 
diff --git a/megatron/training/training.py b/megatron/training/training.py
index 8aff2556d14..60156e1f227 100644
--- a/megatron/training/training.py
+++ b/megatron/training/training.py
@@ -1248,7 +1248,7 @@ def get_megatron_optimizer_config(args: Any) -> OptimizerConfig:
 
     # Construct the appropriate config_overrides object. This default handles many cases, but
     #  can be added to as needed by the user, or replaced entirely with a custom override.
-    config_overrides = get_standard_config_overrides(args.decoupled_lr, args.decoupled_min_lr)
+    config_overrides = get_standard_config_overrides(config=config)
 
     return config, config_overrides
 
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_gdn/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_gdn/model_config.yaml
index 5f63de867d9..37933a0e0a7 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_gdn/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_gdn/model_config.yaml
@@ -18,7 +18,7 @@ MODEL_ARGS:
   --no-rope-fusion: true #TODO: We can remove this once upgrading to the DEV container
   --apply-layernorm-1p: true
   --attention-output-gate: true
-  --no-weight-decay-cond-type: apply_wd_to_qk_layernorm
+  --apply-wd-to-qk-layernorm: true
   --experimental-attention-variant: gated_delta_net
   --linear-attention-freq: 3
   --linear-conv-kernel-dim: 4
diff --git a/tests/unit_tests/test_optimizer.py b/tests/unit_tests/test_optimizer.py
index 4f914b56f7c..1f5bbc3f14c 100644
--- a/tests/unit_tests/test_optimizer.py
+++ b/tests/unit_tests/test_optimizer.py
@@ -21,6 +21,7 @@
     _get_param_groups,
     check_config_overrides_consistency,
     get_megatron_optimizer,
+    get_standard_config_overrides,
 )
 from megatron.core.optimizer_param_scheduler import ParamGroupOverride
 from megatron.core.process_groups_config import ProcessGroupCollection
@@ -45,7 +46,7 @@
 
 
 class Net(nn.Module):
-    def __init__(self):
+    def __init__(self, add_layernorm=False):
         super().__init__()
         self.conv1 = nn.Conv2d(3, 6, 5)
         self.pool = nn.MaxPool2d(2, 2)
@@ -53,6 +54,10 @@ def __init__(self):
         self.fc1 = nn.Linear(16 * 5 * 5, 120)
         self.fc2 = nn.Linear(120, 84)
         self.fc3 = nn.Linear(84, 10)
+        if add_layernorm:
+            self.q_layernorm = nn.LayerNorm(10, bias=False)
+            self.k_layernorm = nn.LayerNorm(10, bias=False)
+            self.layernorm = nn.LayerNorm(10, bias=False)
 
     def forward(self, x):
         x = self.pool(F.relu(self.conv1(x)))
@@ -206,6 +211,81 @@ def test_get_param_groups_overlapping_matches(mock_get_world_size):
     assert param_groups[2]['max_lr'] == 0.01
 
 
+@patch('torch.distributed.get_world_size', return_value=1)
+@patch(
+    'torch.distributed.all_gather_object', lambda output_list, obj: output_list.__setitem__(0, obj)
+)
+def test_get_param_groups_with_standard_config_overrides(apply_wd_to_qk_layernorm: bool):
+    """In this test, we see if the standard config overrides are applied correctly."""
+
+    # Initialize the model with layernorm
+    net = Net()
+
+    config = OptimizerConfig(optimizer='adam', lr=0.01)
+    config_overrides = get_standard_config_overrides(config=config)
+    param_groups = _get_param_groups([net], config, config_overrides)
+
+    assert len(param_groups) == 2
+    p_set = set(net.parameters())
+
+    assert p_set == set(param_groups[0]['params']) | set(param_groups[1]['params'])
+    assert len(p_set) == len(param_groups[0]['params']) + len(param_groups[1]['params'])
+    assert param_groups[0]['wd_mult'] == 0.0 or param_groups[1]['wd_mult'] == 0.0
+    assert param_groups[0]['wd_mult'] == 1.0 or param_groups[1]['wd_mult'] == 1.0
+    assert len(param_groups[0]['params']) > 0 and len(param_groups[1]['params']) > 0
+
+    # Both param groups should have 5 parameters.
+    # Param group A (wd_mult=1.0): conv1.weight, conv2.weight, fc1.weight, fc2.weight, fc3.weight
+    # Param group B (wd_mult=0.0): conv1.bias, conv2.bias, fc1.bias, fc2.bias, fc3.bias
+    assert len(param_groups[0]['params']) == 5, (
+        f"Expected 5 parameters in the first param group, "
+        f"but got {len(param_groups[0]['params'])}"
+    )
+    assert len(param_groups[1]['params']) == 5, (
+        f"Expected 5 parameters in the second param group, "
+        f"but got {len(param_groups[1]['params'])}"
+    )
+
+
+@patch('torch.distributed.get_world_size', return_value=1)
+@patch(
+    'torch.distributed.all_gather_object', lambda output_list, obj: output_list.__setitem__(0, obj)
+)
+def test_get_param_groups_appling_wd_to_qk_layernorm(apply_wd_to_qk_layernorm: bool):
+    """In this test, we see if the `apply_wd_to_qk_layernorm` config is applied correctly."""
+
+    # Initialize the model with layernorm
+    net = Net(add_layernorm=True)
+
+    config = OptimizerConfig(
+        optimizer='adam', lr=0.01, apply_wd_to_qk_layernorm=apply_wd_to_qk_layernorm
+    )
+    config_overrides = get_standard_config_overrides(config=config)
+    param_groups = _get_param_groups([net], config, config_overrides)
+
+    assert len(param_groups) == 2
+    p_set = set(net.parameters())
+
+    assert p_set == set(param_groups[0]['params']) | set(param_groups[1]['params'])
+    assert len(p_set) == len(param_groups[0]['params']) + len(param_groups[1]['params'])
+    assert param_groups[0]['wd_mult'] == 1.0
+    assert param_groups[1]['wd_mult'] == 0.0
+
+    # There are two param groups, having 7, and 6 parameters respectively.
+    # Param group A (wd_mult=1.0): conv1.weight, conv2.weight, fc1.weight, fc2.weight, fc3.weight,
+    #    q_layernorm.weight, k_layernorm.weight
+    # Param group B (wd_mult=0.0): conv1.bias, conv2.bias, fc1.bias, fc2.bias, fc3.bias,
+    #    layernorm.weight
+    assert len(param_groups[0]['params']) == 7, (
+        f"Expected 5 parameters in the first param group, "
+        f"but got {len(param_groups[0]['params'])}"
+    )
+    assert len(param_groups[1]['params']) == 6, (
+        f"Expected 6 parameters in the second param group, "
+        f"but got {len(param_groups[1]['params'])}"
+    )
+
+
 def test_chained_optimizer():
     net = Net()
     optimizer_1 = Adam(list(net.parameters())[:2], lr=0.01)

From 6e2153b9e3c7a71c07bdb1aa417bef0177809f01 Mon Sep 17 00:00:00 2001
From: Yuzhong Wang <yuzhongw@nvidia.com>
Date: Wed, 21 Jan 2026 14:19:46 +0800
Subject: [PATCH 246/334] [dev] feat(moe): Cherry-pick #1989 back to dev
 (#3011)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
Co-authored-by: oliver könig <okoenig@nvidia.com>
---
 gpt_builders.py                               |  27 +-
 ...rimental_attention_variant_module_specs.py | 467 +++++++++++++--
 megatron/core/models/gpt/gpt_layer_specs.py   | 530 +++++++-----------
 megatron/core/ssm/gated_delta_net.py          |   4 +-
 .../dot_product_attention_context_parallel.py |   3 +
 megatron/core/transformer/spec_utils.py       |   1 +
 .../core/transformer/transformer_config.py    |  43 +-
 megatron/training/arguments.py                |  26 +-
 megatron/training/checkpointing.py            |   8 +-
 megatron/training/training.py                 |  66 ++-
 .../test_modelopt_module_spec.py              |   1 +
 tests/unit_tests/ssm/test_gated_delta_net.py  |  33 +-
 .../unit_tests/transformer/test_attention.py  |  43 +-
 13 files changed, 749 insertions(+), 503 deletions(-)

diff --git a/gpt_builders.py b/gpt_builders.py
index 293475b06b6..0be64edaab6 100644
--- a/gpt_builders.py
+++ b/gpt_builders.py
@@ -10,7 +10,8 @@
     get_gpt_decoder_layer_specs,
 )
 from megatron.core.models.gpt.experimental_attention_variant_module_specs import (
-    is_linear_attention_variant,
+    get_transformer_block_with_experimental_attention_variant_spec,
+    get_transformer_layer_with_experimental_attention_variant_spec,
 )
 from megatron.core.models.gpt.heterogeneous.heterogeneous_layer_specs import (
     get_gpt_heterogeneous_layer_spec,
@@ -46,7 +47,13 @@ def gpt_builder(args, pre_process, post_process, vp_stage=None, config=None, pg_
         else:
             use_te = args.transformer_impl == "transformer_engine"
 
-            if args.num_experts or is_linear_attention_variant(args.experimental_attention_variant):
+            if args.experimental_attention_variant is not None:
+                transformer_layer_spec = (
+                    get_transformer_block_with_experimental_attention_variant_spec(
+                        config=config, vp_stage=vp_stage
+                    )
+                )
+            elif args.num_experts:
                 assert not (config.transformer_impl == "inference_optimized")
                 # Define the decoder block spec
                 transformer_layer_spec = get_gpt_decoder_block_spec(
@@ -70,9 +77,19 @@ def gpt_builder(args, pre_process, post_process, vp_stage=None, config=None, pg_
                 mtp_transformer_layer_spec = import_module(args.spec)
             else:
                 # Define the decoder block spec
-                decoder_layer_specs = get_gpt_decoder_layer_specs(
-                    config, use_transformer_engine=use_te, normalization=args.normalization, qk_l2_norm=args.qk_l2_norm, vp_stage=vp_stage
-                )
+                if args.experimental_attention_variant is not None:
+                    decoder_layer_specs = (
+                        get_transformer_layer_with_experimental_attention_variant_spec(
+                            config=config
+                        )
+                    )
+                else:
+                    decoder_layer_specs = get_gpt_decoder_layer_specs(
+                        config,
+                        use_transformer_engine=use_te,
+                        normalization=args.normalization,
+                        qk_l2_norm=args.qk_l2_norm,
+                    )
                 mtp_transformer_layer_spec = decoder_layer_specs[-1]
             # Use spec of the last layer in decoder block as spec of the transformer layer in MTP
             mtp_block_spec = get_gpt_mtp_block_spec(
diff --git a/megatron/core/models/gpt/experimental_attention_variant_module_specs.py b/megatron/core/models/gpt/experimental_attention_variant_module_specs.py
index e6d6fa03ce7..7649a0b2165 100644
--- a/megatron/core/models/gpt/experimental_attention_variant_module_specs.py
+++ b/megatron/core/models/gpt/experimental_attention_variant_module_specs.py
@@ -1,10 +1,11 @@
-# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.
 
-from typing import Optional
+from typing import List, Optional
 
+from megatron.core.fusions.fused_bias_dropout import get_bias_dropout_add
 from megatron.core.models.backends import BackendSpecProvider
 from megatron.core.ssm.gated_delta_net import GatedDeltaNet, GatedDeltaNetSubmodules
-from megatron.core.transformer.enums import AttnMaskType
+from megatron.core.transformer.enums import AttnMaskType, LayerType
 from megatron.core.transformer.experimental_attention_variant.dsa import (
     DSAIndexer,
     DSAIndexerSubmodules,
@@ -17,19 +18,50 @@
     MLASelfAttentionSubmodules,
 )
 from megatron.core.transformer.spec_utils import ModuleSpec
+from megatron.core.transformer.transformer_block import (
+    TransformerBlockSubmodules,
+    get_num_layers_to_build,
+)
+from megatron.core.transformer.transformer_config import TransformerConfig
+from megatron.core.transformer.transformer_layer import (
+    TransformerLayer,
+    TransformerLayerSubmodules,
+    get_transformer_layer_offset,
+)
 
+try:
+    import transformer_engine as te  # type: ignore[import-untyped]  # pylint: disable=unused-import
 
-def is_linear_attention_variant(experimental_attention_variant: str) -> bool:
-    """Check if the experimental attention variant is a linear attention variant."""
-    linear_attention_variants = ["gated_delta_net"]
-    return experimental_attention_variant in linear_attention_variants
+    from megatron.core.extensions.transformer_engine_spec_provider import TESpecProvider
+
+    HAVE_TE = True
+except ImportError:
+    HAVE_TE = False
+
+try:
+    import nvidia_kitchen  # type: ignore[import-not-found]  # pylint: disable=unused-import
+
+    from megatron.core.extensions.kitchen import KitchenSpecProvider
 
+    HAVE_KITCHEN = True
+except ImportError:
+    HAVE_KITCHEN = False
 
-def get_gated_delta_net_module_spec_for_backend(
-    backend: BackendSpecProvider, normalization: Optional[str] = None
+
+##########
+# Experimental Attention Variant Module Specs
+##########
+
+
+def get_gated_delta_net_module_spec(
+    config: TransformerConfig, backend: BackendSpecProvider = None
 ) -> ModuleSpec:
-    """Helper function to get module spec for Linear Attention"""
-    rms_norm = normalization == "RMSNorm"
+    """Build module spec for GatedDeltaNet attention."""
+
+    if backend is None:
+        backend = _get_backend_spec_provider(config=config)
+
+    rms_norm = config.normalization == "RMSNorm"
     attention = ModuleSpec(
         module=GatedDeltaNet,
         submodules=GatedDeltaNetSubmodules(
@@ -43,27 +75,22 @@ def get_gated_delta_net_module_spec_for_backend(
 
 
 def get_dsa_module_spec_for_backend(
-    backend: BackendSpecProvider,
-    qk_layernorm: Optional[bool] = False,
-    qk_l2_norm: Optional[bool] = False,
-    multi_latent_attention: Optional[bool] = False,
-    mla_down_proj_use_column_parallel: Optional[bool] = False,
-    normalization: Optional[str] = None,
-    fallback_to_eager_attn: Optional[bool] = False,
+    config: TransformerConfig, backend: BackendSpecProvider = None
 ) -> ModuleSpec:
     """Helper function to get module spec for Sparse Attention."""
-    assert multi_latent_attention, "Currently only MLA supports sparse attention."
-    assert qk_l2_norm is False, "qk_l2_norm is not supported with MLA."
-    assert fallback_to_eager_attn is False, "Fallback to eager attention is not supported with DSA."
+    assert config.multi_latent_attention, "Currently only MLA supports sparse attention."
+    assert config.qk_l2_norm is False, "qk_l2_norm is not supported with MLA."
 
-    linear_q_down_proj = (
-        backend.column_parallel_linear() if mla_down_proj_use_column_parallel else backend.linear()
+    linear_q_up_proj = (
+        backend.column_parallel_layer_norm_linear()
+        if config.qk_layernorm
+        else backend.column_parallel_linear()
     )
-    linear_kv_down_proj = (
-        backend.column_parallel_linear() if mla_down_proj_use_column_parallel else backend.linear()
+    linear_kv_up_proj = (
+        backend.column_parallel_layer_norm_linear()
+        if config.qk_layernorm
+        else backend.column_parallel_linear()
     )
-    linear_q_up_proj = backend.column_parallel_linear()
-    linear_kv_up_proj = backend.column_parallel_linear()
 
     # Because TransformerEngine does not support sparse attention yet, we use local
     # implementation whether the backend is TransformerEngine or not.
@@ -82,23 +109,19 @@ def get_dsa_module_spec_for_backend(
         ),
     )
 
-    # Adjust for RMS norm.
-    rms_norm = normalization == "RMSNorm"
-    qk_norm = backend.layer_norm(rms_norm=rms_norm, for_qk=True) if qk_layernorm else IdentityOp
-
     attention = ModuleSpec(
         module=MLASelfAttention,
         params={"attn_mask_type": AttnMaskType.causal},
         submodules=MLASelfAttentionSubmodules(
             linear_q_proj=backend.column_parallel_linear(),
-            linear_q_down_proj=linear_q_down_proj,
+            linear_q_down_proj=backend.linear(),
             linear_q_up_proj=linear_q_up_proj,
-            linear_kv_down_proj=linear_kv_down_proj,
+            linear_kv_down_proj=backend.linear(),
             linear_kv_up_proj=linear_kv_up_proj,
             core_attention=core_attention,
             linear_proj=backend.row_parallel_linear(),
-            q_layernorm=qk_norm,
-            kv_layernorm=qk_norm,
+            q_layernorm=IdentityOp,
+            kv_layernorm=IdentityOp,
         ),
         metainfo={"fuse_input_layernorm": False},
     )
@@ -106,33 +129,359 @@ def get_dsa_module_spec_for_backend(
     return attention
 
 
-def get_experimental_attention_variant_module_spec_for_backend(
-    backend: BackendSpecProvider,
-    sharded_state_dict_keys_map: dict,
-    experimental_attention_variant: Optional[str] = None,
-    qk_layernorm: Optional[bool] = False,
-    qk_l2_norm: Optional[bool] = False,
-    multi_latent_attention: Optional[bool] = False,
-    mla_down_proj_use_column_parallel: Optional[bool] = False,
-    normalization: Optional[str] = None,
-    fallback_to_eager_attn: Optional[bool] = False,
+def get_experimental_attention_variant_module_spec(
+    config: TransformerConfig, backend: BackendSpecProvider = None
 ) -> ModuleSpec:
-    """Helper function to get module spec for Attention"""
-    if experimental_attention_variant == "gated_delta_net":
-        return get_gated_delta_net_module_spec_for_backend(
-            backend=backend, normalization=normalization
+    """Helper function to get module spec for experimental attention variant"""
+
+    if backend is None:
+        backend = _get_backend_spec_provider(config=config)
+
+    if config.experimental_attention_variant == "gated_delta_net":
+        return get_gated_delta_net_module_spec(config=config, backend=backend)
+    else:
+        raise ValueError(
+            f"Invalid experimental attention variant: {config.experimental_attention_variant}"
         )
-    elif experimental_attention_variant == "dsa":
-        return get_dsa_module_spec_for_backend(
-            backend=backend,
-            qk_layernorm=qk_layernorm,
-            qk_l2_norm=qk_l2_norm,
-            multi_latent_attention=multi_latent_attention,
-            mla_down_proj_use_column_parallel=mla_down_proj_use_column_parallel,
-            normalization=normalization,
-            fallback_to_eager_attn=fallback_to_eager_attn,
+
+
+##########
+# Experimental GPT Decoder Block Spec
+##########
+
+
+def get_transformer_layer_with_experimental_attention_variant_spec(
+    config: TransformerConfig, backend: BackendSpecProvider = None
+) -> List[ModuleSpec]:
+    """Build transformer layer specs with experimental attention variants (e.g., linear attention).
+
+    This function is for constructing a heterogeneous transformer that supports mixing different
+    attention mechanisms (experimental vs standard) and MLP types (MoE vs dense) across layers.
+    **Note that, this API is a experimental API in the short term, and might be deprecated in the
+    future. In the long run, we will move to a new design that better support hybrid models.**
+
+    Key Design:
+        1. Attention and MLP patterns: The attention pattern and MLP pattern are orthogonal
+           and determined independently. This allows flexible combinations (e.g., linear attention
+           with MoE, or standard attention with dense MLP).
+           - Attention pattern: derived from `config.linear_attention_freq` or
+             `config.experimental_attention_variant`.
+           - MLP pattern: derived from `config.moe_layer_freq`.
+
+        2. Per-Layer Spec Construction: Iterates through layers, constructing transformer
+           layer specs based on attention and MLP patterns.
+
+    Args:
+        config: Transformer configuration containing model hyperparameters and feature flags.
+
+    Returns:
+        List[ModuleSpec] containing per-layer specs.
+
+    Note:
+        Currently only supports transformer_engine backend. Kitchen backend can be used as a
+        wrapper with TE fallback for unsupported operations.
+    """
+
+    if backend is None:
+        backend = _get_backend_spec_provider(config=config)
+
+    # Get attention patterns and specs
+    experimental_attention_pattern = [0] * config.num_layers
+    if is_linear_attention_variant(config.experimental_attention_variant):
+        experimental_attention_pattern = get_linear_attention_pattern(config=config)
+    elif config.experimental_attention_variant is not None:
+        experimental_attention_pattern = [1] * config.num_layers
+
+    if 1 in experimental_attention_pattern:
+        experimental_attention_spec = get_experimental_attention_variant_module_spec(
+            config=config, backend=backend
+        )
+    else:
+        experimental_attention_spec = None
+
+    if 0 in experimental_attention_pattern:
+        standard_attention_spec = _get_self_attention_module_spec(config=config, backend=backend)
+    else:
+        standard_attention_spec = None
+
+    # Get MLP patterns and specs
+    if config.num_moe_experts is not None:
+        moe_layer_pattern = get_moe_layer_pattern(config=config)
+    else:
+        moe_layer_pattern = [0] * config.num_layers
+
+    if 1 in moe_layer_pattern:
+        moe_layer_spec = _get_moe_module_spec(config=config, backend=backend)
+    else:
+        moe_layer_spec = None
+
+    if 0 in moe_layer_pattern:
+        dense_mlp_layer_spec = _get_dense_mlp_module_spec(config=config, backend=backend)
+    else:
+        dense_mlp_layer_spec = None
+
+    # Get GPT decoder block layer specs
+    rms_norm = config.normalization == "RMSNorm"
+    layer_specs = []
+    for layer_number in range(config.num_layers):
+        attention = (
+            experimental_attention_spec
+            if experimental_attention_pattern[layer_number] == 1
+            else standard_attention_spec
+        )
+        mlp = moe_layer_spec if moe_layer_pattern[layer_number] == 1 else dense_mlp_layer_spec
+        input_layernorm = (
+            IdentityOp
+            if attention.metainfo["fuse_input_layernorm"]
+            else backend.layer_norm(rms_norm=rms_norm, for_qk=False)
+        )
+        pre_mlp_layernorm = (
+            IdentityOp
+            if mlp.metainfo["fuse_pre_mlp_layernorm"]
+            else backend.layer_norm(rms_norm=rms_norm, for_qk=False)
+        )
+
+        layer_specs.append(
+            ModuleSpec(
+                module=TransformerLayer,
+                submodules=TransformerLayerSubmodules(
+                    input_layernorm=input_layernorm,
+                    self_attention=attention,
+                    self_attn_bda=get_bias_dropout_add,
+                    pre_mlp_layernorm=pre_mlp_layernorm,
+                    mlp=mlp,
+                    mlp_bda=get_bias_dropout_add,
+                ),
+            )
+        )
+
+    return layer_specs
+
+
+def get_transformer_block_with_experimental_attention_variant_spec(
+    config: TransformerConfig, vp_stage: Optional[int] = None, pp_rank: Optional[int] = None
+) -> TransformerBlockSubmodules:
+    """Build transformer block spec with experimental attention variants (e.g., linear attention).
+
+    This function constructs a heterogeneous transformer block that supports mixing different
+    attention mechanisms (experimental vs standard) and MLP types (MoE vs dense) across layers.
+    **Note that, this API is a experimental API in the short term, and might be deprecated in the
+    future. In the long run, we will move to a new design that better support hybrid models.**
+
+    Constructing transformer layer specs by
+    `get_transformer_layer_with_experimental_attention_variant_spec` and then slicing the
+    layer specs to only include the layers that are built in this pipeline stage.
+
+    Args:
+        config: Transformer configuration containing model hyperparameters and feature flags.
+        vp_stage: Virtual pipeline stage index for interleaved pipeline parallelism.
+        pp_rank: Pipeline model parallel rank.
+
+    Returns:
+        TransformerBlockSubmodules containing per-layer specs and final layer norm.
+
+    Note:
+        Currently only supports transformer_engine backend. Kitchen backend can be used as a
+        wrapper with TE fallback for unsupported operations.
+    """
+
+    backend = _get_backend_spec_provider(config=config)
+
+    layer_specs = get_transformer_layer_with_experimental_attention_variant_spec(
+        config=config, backend=backend
+    )
+
+    # Slice the layer specs to only include the layers that are built in this pipeline stage.
+    if config.pipeline_model_parallel_layout is not None:
+        local_layer_ids = config.pipeline_model_parallel_layout.get_layer_id_list(
+            layer_type=LayerType.decoder, vp_stage=vp_stage, pp_rank=pp_rank
+        )
+    else:
+        offset = get_transformer_layer_offset(config, vp_stage=vp_stage, pp_rank=pp_rank)
+        num_layers_to_build = get_num_layers_to_build(config, vp_stage=vp_stage, pp_rank=pp_rank)
+        local_layer_ids = range(offset, offset + num_layers_to_build)
+
+    layer_specs = [layer_specs[layer_id] for layer_id in local_layer_ids]
+
+    # Get GPT decoder block spec
+    rms_norm = config.normalization == "RMSNorm"
+    gpt_decoder_block_spec = TransformerBlockSubmodules(
+        layer_specs=layer_specs, layer_norm=backend.layer_norm(rms_norm=rms_norm, for_qk=False)
+    )
+
+    return gpt_decoder_block_spec
+
+
+##########
+# Utilities
+##########
+
+
+def is_linear_attention_variant(experimental_attention_variant: Optional[str]) -> bool:
+    """Check if the experimental attention variant is a linear attention variant."""
+    linear_attention_variants = ["gated_delta_net"]
+    return experimental_attention_variant in linear_attention_variants
+
+
+def get_moe_layer_pattern(config: TransformerConfig) -> List[int]:
+    """Parse config.moe_layer_freq to get per-layer MoE pattern (1=MoE, 0=dense).
+
+    - int N: one MoE layer every N layers (e.g., N=2 -> [1,0,1,0,...])
+    - list: use directly as the pattern."""
+
+    if isinstance(config.moe_layer_freq, int):
+        # [1,0,0,...,0,1,0,0,...,0,...]
+        moe_layer_pattern = [
+            1 if (i % config.moe_layer_freq == 0) else 0 for i in range(config.num_layers)
+        ]
+    elif isinstance(config.moe_layer_freq, list):
+        moe_layer_pattern = config.moe_layer_freq
+        assert len(moe_layer_pattern) == config.num_layers, (
+            f"Invalid length of moe_layer_pattern: {len(moe_layer_pattern)}, "
+            f"expected {config.num_layers}, "
+            f"current moe layer pattern: {config.moe_layer_freq}"
         )
     else:
         raise ValueError(
-            f"Invalid experimental attention variant: {experimental_attention_variant}"
+            f"Invalid moe_layer_freq: {type(config.moe_layer_freq)}, {config.moe_layer_freq}"
+        )
+    return moe_layer_pattern
+
+
+def get_linear_attention_pattern(config: TransformerConfig) -> List[int]:
+    """Parse config.linear_attention_freq to get per-layer attention pattern (1=LA, 0=SDPA).
+
+    - int N: one SDPA layer every N layers (e.g., N=4 -> [1,1,1,0,1,1,1,0,...])
+    - list: use directly as the pattern."""
+
+    if isinstance(config.linear_attention_freq, int):
+        linear_attention_pattern = [
+            # [1,1,...,1,0,1,1,...,1,0,...]
+            0 if ((i + 1) % config.linear_attention_freq == 0) else 1
+            for i in range(config.num_layers)
+        ]
+    elif isinstance(config.linear_attention_freq, list):
+        linear_attention_pattern = config.linear_attention_freq
+        assert len(linear_attention_pattern) == config.num_layers, (
+            f"Invalid length of linear_attention_pattern: {len(linear_attention_pattern)}, "
+            f"expected {config.num_layers}, "
+            f"current linear attention pattern: {config.linear_attention_freq}"
+        )
+    elif config.linear_attention_freq is None:
+        if not is_linear_attention_variant(config.experimental_attention_variant):
+            linear_attention_pattern = [0] * config.num_layers
+        else:
+            # This should be caught by config validation, but raise here as a safety check
+            raise ValueError(
+                f"Linear attention type {config.experimental_attention_variant} is specified "
+                "but linear_attention_freq is None. "
+                "Please set linear_attention_freq to specify the LA/SDPA layer pattern."
+            )
+    else:
+        raise ValueError(
+            f"Invalid linear_attention_freq: {type(config.linear_attention_freq)},"
+            f" {config.linear_attention_freq}"
+        )
+    return linear_attention_pattern
+
+
+def _get_backend_spec_provider(config: TransformerConfig) -> BackendSpecProvider:
+    """Get backend spec provider for experimental attention variant."""
+
+    assert config.transformer_impl == "transformer_engine", (
+        "Experimental GPT decoder block spec only supports "
+        "transformer engine implementation for now."
+    )
+    backend: BackendSpecProvider = (
+        KitchenSpecProvider(
+            fallback=TESpecProvider(fallback_to_eager_attn=config.fallback_to_eager_attn),
+            use_kitchen_attention=config.use_kitchen_attention,
+            kitchen_attention_backend=config.kitchen_attention_backend,
         )
+        if config.use_kitchen
+        else TESpecProvider()
+    )
+    return backend
+
+
+##########
+# Spec functions for non-experimental self attention and MLP layer.
+##########
+
+
+def _get_self_attention_module_spec(
+    config: TransformerConfig, backend: BackendSpecProvider = None
+) -> ModuleSpec:
+    """Get non-experimental self-attention module spec.
+    For hybrid models that mix experimental and non-experimental attention architectures.
+
+    Warning: This function may be deprecated in the future."""
+
+    if backend is None:
+        backend = _get_backend_spec_provider(config=config)
+
+    from megatron.core.models.gpt.gpt_layer_specs import get_gpt_layer_with_transformer_engine_spec
+
+    layer_spec = get_gpt_layer_with_transformer_engine_spec(
+        num_experts=config.num_moe_experts,
+        moe_grouped_gemm=config.moe_grouped_gemm,
+        qk_layernorm=config.qk_layernorm,
+        multi_latent_attention=config.multi_latent_attention,
+        moe_use_legacy_grouped_gemm=config.moe_use_legacy_grouped_gemm,
+        qk_l2_norm=config.qk_l2_norm,
+        use_kitchen=config.use_kitchen,
+        use_te_activation_func=config.use_te_activation_func,
+        fallback_to_eager_attn=config.fallback_to_eager_attn,
+        use_kitchen_attention=config.use_kitchen_attention,
+        kitchen_attention_backend=config.kitchen_attention_backend,
+    )
+    attn_spec = layer_spec.submodules.self_attention
+    if config.multi_latent_attention:
+        attn_spec.metainfo["fuse_input_layernorm"] = False
+    else:
+        attn_spec.metainfo["fuse_input_layernorm"] = backend.fuse_layernorm_and_linear()
+
+    return attn_spec
+
+
+def _get_dense_mlp_module_spec(
+    config: TransformerConfig, backend: BackendSpecProvider = None
+) -> ModuleSpec:
+    """Get dense MLP module spec.
+    For hybrid models that mix dense MLP and experimental attention architectures.
+
+    Warning: This function may be deprecated in the future."""
+
+    if backend is None:
+        backend = _get_backend_spec_provider(config=config)
+
+    from megatron.core.models.gpt.gpt_layer_specs import get_mlp_module_spec_for_backend
+
+    mlp_spec = get_mlp_module_spec_for_backend(backend=backend, num_experts=None)
+    mlp_spec.metainfo["fuse_pre_mlp_layernorm"] = backend.fuse_layernorm_and_linear()
+
+    return mlp_spec
+
+
+def _get_moe_module_spec(
+    config: TransformerConfig, backend: BackendSpecProvider = None
+) -> ModuleSpec:
+    """Get MoE module spec.
+    For hybrid models that mix MoE and experimental attention architectures.
+
+    Warning: This function may be deprecated in the future."""
+
+    if backend is None:
+        backend = _get_backend_spec_provider(config=config)
+
+    from megatron.core.models.gpt.moe_module_specs import get_moe_module_spec_for_backend
+
+    moe_spec = get_moe_module_spec_for_backend(
+        backend=backend,
+        num_experts=config.num_moe_experts,
+        moe_grouped_gemm=config.moe_grouped_gemm,
+        moe_use_legacy_grouped_gemm=config.moe_use_legacy_grouped_gemm,
+        use_te_activation_func=config.use_te_activation_func,
+    )
+    moe_spec.metainfo["fuse_pre_mlp_layernorm"] = False
+    return moe_spec
diff --git a/megatron/core/models/gpt/gpt_layer_specs.py b/megatron/core/models/gpt/gpt_layer_specs.py
index 1db3b939530..70f0a8244ca 100755
--- a/megatron/core/models/gpt/gpt_layer_specs.py
+++ b/megatron/core/models/gpt/gpt_layer_specs.py
@@ -9,13 +9,8 @@
     InferenceSpecProvider,
     LocalSpecProvider,
 )
-from megatron.core.models.gpt.experimental_attention_variant_module_specs import (
-    get_experimental_attention_variant_module_spec_for_backend,
-    is_linear_attention_variant,
-)
 from megatron.core.models.gpt.moe_module_specs import get_moe_module_spec_for_backend
 from megatron.core.transformer.attention import SelfAttention, SelfAttentionSubmodules
-from megatron.core.transformer.dot_product_attention import DotProductAttention
 from megatron.core.transformer.enums import AttnMaskType, LayerType
 from megatron.core.transformer.identity_op import IdentityOp
 from megatron.core.transformer.mlp import MLP, MLPSubmodules
@@ -45,7 +40,7 @@
 from megatron.core.utils import is_te_min_version
 
 try:
-    import transformer_engine as te  # type: ignore[import-untyped]  # pylint: disable=unused-import
+    import transformer_engine as te  # pylint: disable=unused-import
 
     from megatron.core.extensions.transformer_engine import TEFusedMLP, TENorm
     from megatron.core.extensions.transformer_engine_spec_provider import TESpecProvider
@@ -55,7 +50,7 @@
     HAVE_TE = False
 
 try:
-    import nvidia_kitchen  # type: ignore[import-not-found]  # pylint: disable=unused-import
+    import nvidia_kitchen  # pylint: disable=unused-import
 
     from megatron.core.extensions.kitchen import KitchenSpecProvider
 
@@ -64,7 +59,7 @@
     HAVE_KITCHEN = False
 
 try:
-    import apex  # type: ignore[import-untyped]  # pylint: disable=unused-import
+    import apex  # pylint: disable=unused-import
 
     from megatron.core.fusions.fused_layer_norm import FusedLayerNorm
 
@@ -181,10 +176,8 @@ def get_gpt_layer_with_transformer_engine_spec(
     moe_grouped_gemm: Optional[bool] = False,
     qk_layernorm: Optional[bool] = False,
     multi_latent_attention: Optional[bool] = False,
-    experimental_attention_variant: Optional[str] = None,
     fp8: Optional[str] = None,  # pylint: disable=unused-argument
     moe_use_legacy_grouped_gemm: Optional[bool] = False,
-    normalization: Optional[str] = None,
     qk_l2_norm: Optional[bool] = False,
     use_te_op_fuser: Optional[bool] = False,
     use_kitchen: bool = False,
@@ -200,15 +193,10 @@ def get_gpt_layer_with_transformer_engine_spec(
         num_experts (int, optional): Number of experts. Defaults to None.
         moe_grouped_gemm (bool, optional): To use Grouped GEMM. Defaults to False.
         qk_layernorm (bool, optional): To use layernorm for queries/keys. Defaults to False.
-        multi_latent_attention (bool, optional): To use multi-latent attention. Defaults to False.
-        experimental_attention_variant (str, optional): The type of experimental attention variant.
-                                                        Defaults to None.
         fp8 (str, optional): Deprecated. For temporary Nemo compatibility.
         moe_use_legacy_grouped_gemm (bool, optional): Force use the legacy GroupedMLP.
                                                       Defaults to False.
-        normalization (str, optional): The normalization to use. Defaults to None.
         qk_l2_norm (bool, optional): To use l2 norm for queries/keys. Defaults to False.
-        use_kitchen (bool, optional): To use KitchenSpecProvider. Defaults to False.
         use_te_op_fuser (bool, optional): Use Transformer Engine's operation-based API, which may
                                           enable certain operation fusions. Defaults to False.
 
@@ -236,23 +224,8 @@ def get_gpt_layer_with_transformer_engine_spec(
     else:
         backend = TESpecProvider(fallback_to_eager_attn=fallback_to_eager_attn)
 
-    sharded_state_dict_keys_map = {}
-
-    attention = get_attention_module_spec_for_backend(
-        backend=backend,
-        sharded_state_dict_keys_map=sharded_state_dict_keys_map,
-        experimental_attention_variant=experimental_attention_variant,
-        qk_layernorm=qk_layernorm,
-        qk_l2_norm=qk_l2_norm,
-        multi_latent_attention=multi_latent_attention,
-        mla_down_proj_use_column_parallel=False,
-        normalization=normalization,
-        fallback_to_eager_attn=fallback_to_eager_attn,
-    )
-
     mlp = get_mlp_module_spec_for_backend(
         backend=backend,
-        sharded_state_dict_keys_map=sharded_state_dict_keys_map,
         num_experts=num_experts,
         moe_grouped_gemm=moe_grouped_gemm,
         moe_use_legacy_grouped_gemm=moe_use_legacy_grouped_gemm,
@@ -260,13 +233,77 @@ def get_gpt_layer_with_transformer_engine_spec(
         use_te_activation_func=use_te_activation_func,
     )
 
-    return get_transformer_layer_spec_for_backend(
-        backend=backend,
-        attention=attention,
-        mlp=mlp,
-        sharded_state_dict_keys_map=sharded_state_dict_keys_map,
-        normalization=normalization,
-    )
+    if multi_latent_attention:
+        assert qk_l2_norm is False, "qk_l2_norm is not supported with MLA."
+        linear_q_up_proj = (
+            backend.column_parallel_layer_norm_linear()
+            if qk_layernorm
+            else backend.column_parallel_linear()
+        )
+        linear_kv_up_proj = (
+            backend.column_parallel_layer_norm_linear()
+            if qk_layernorm
+            else backend.column_parallel_linear()
+        )
+        return ModuleSpec(
+            module=TransformerLayer,
+            submodules=TransformerLayerSubmodules(
+                input_layernorm=backend.layer_norm(),
+                self_attention=ModuleSpec(
+                    module=MLASelfAttention,
+                    params={"attn_mask_type": AttnMaskType.causal},
+                    submodules=MLASelfAttentionSubmodules(
+                        linear_q_proj=backend.column_parallel_linear(),
+                        linear_q_down_proj=backend.linear(),
+                        linear_q_up_proj=linear_q_up_proj,
+                        linear_kv_down_proj=backend.linear(),
+                        linear_kv_up_proj=linear_kv_up_proj,
+                        core_attention=backend.core_attention(),
+                        linear_proj=backend.row_parallel_linear(),
+                        q_layernorm=IdentityOp,
+                        kv_layernorm=IdentityOp,
+                    ),
+                ),
+                self_attn_bda=get_bias_dropout_add,
+                pre_mlp_layernorm=backend.layer_norm() if num_experts else IdentityOp,
+                mlp=mlp,
+                mlp_bda=get_bias_dropout_add,
+            ),
+        )
+    else:
+        qk_norm = backend.layer_norm(for_qk=True)
+        return ModuleSpec(
+            module=TransformerLayer,
+            submodules=TransformerLayerSubmodules(
+                self_attention=ModuleSpec(
+                    module=SelfAttention,
+                    params={"attn_mask_type": AttnMaskType.causal},
+                    submodules=SelfAttentionSubmodules(
+                        linear_qkv=backend.column_parallel_layer_norm_linear(),
+                        core_attention=backend.core_attention(),
+                        linear_proj=backend.row_parallel_linear(),
+                        q_layernorm=(
+                            L2Norm if qk_l2_norm else (qk_norm if qk_layernorm else IdentityOp)
+                        ),
+                        k_layernorm=(
+                            L2Norm if qk_l2_norm else (qk_norm if qk_layernorm else IdentityOp)
+                        ),
+                    ),
+                ),
+                self_attn_bda=get_bias_dropout_add,
+                pre_mlp_layernorm=backend.layer_norm() if num_experts else IdentityOp,
+                mlp=mlp,
+                mlp_bda=get_bias_dropout_add,
+                sharded_state_dict_keys_map={
+                    "mlp.0.weight": "mlp.linear_fc1.layer_norm_weight",
+                    "mlp.0.bias": "mlp.linear_fc1.layer_norm_bias",
+                    "mlp.1.basic_ops.0.weight": "mlp.linear_fc1.weight",
+                    "mlp.1.basic_ops.1.bias": "mlp.linear_fc1.bias",
+                    "mlp.3.basic_ops.0.weight": "mlp.linear_fc2.weight",
+                    "mlp.3.basic_ops.1.bias": "mlp.linear_fc2.bias",
+                },
+            ),
+        )
 
 
 def get_gpt_layer_local_spec(
@@ -274,7 +311,6 @@ def get_gpt_layer_local_spec(
     moe_grouped_gemm: Optional[bool] = False,
     qk_layernorm: Optional[bool] = False,
     multi_latent_attention: Optional[bool] = False,
-    experimental_attention_variant: Optional[str] = None,
     fp8: Optional[str] = None,  # pylint: disable=unused-argument
     moe_use_legacy_grouped_gemm: Optional[bool] = False,
     normalization: Optional[str] = None,
@@ -290,15 +326,10 @@ def get_gpt_layer_local_spec(
         num_experts (int, optional): Number of experts. Defaults to None.
         moe_grouped_gemm (bool, optional): To use Grouped GEMM. Defaults to False.
         qk_layernorm (bool, optional): To use layernorm for queries/keys. Defaults to False.
-        multi_latent_attention (bool, optional): To use multi-latent attention. Defaults to False.
-        experimental_attention_variant (str, optional): The type of experimental attention variant.
-                                                        Defaults to None.
         fp8 (str, optional): Deprecated. For temporary Nemo compatibility.
         moe_use_legacy_grouped_gemm (bool, optional): Force use the legacy GroupedMLP.
                                                       Defaults to False.
-        normalization (str, optional): The normalization to use. Defaults to None.
         qk_l2_norm (bool, optional): To use l2 norm for queries/keys. Defaults to False.
-        use_kitchen (bool, optional): To use KitchenSpecProvider. Defaults to False.
 
     Returns:
         ModuleSpec: Module specification with Megatron-Core modules
@@ -313,6 +344,13 @@ def get_gpt_layer_local_spec(
         )
     else:
         backend = LocalSpecProvider()
+    # Adjust for RMS norm.
+    if normalization == "RMSNorm":
+        layer_norm = backend.layer_norm(rms_norm=True, for_qk=False)
+        qk_norm = backend.layer_norm(rms_norm=True, for_qk=True)
+    else:
+        layer_norm = backend.layer_norm(rms_norm=False, for_qk=False)
+        qk_norm = backend.layer_norm(rms_norm=False, for_qk=True)
 
     if fp8 is not None:
         warnings.warn(
@@ -320,25 +358,6 @@ def get_gpt_layer_local_spec(
             " and will be removed soon. Please update your code accordingly."
         )
 
-    if experimental_attention_variant is not None:
-        raise NotImplementedError(
-            "Experimental attention variant is not supported with local spec yet."
-        )
-
-    sharded_state_dict_keys_map = {}
-
-    attention = get_attention_module_spec_for_backend(
-        backend=backend,
-        sharded_state_dict_keys_map=sharded_state_dict_keys_map,
-        experimental_attention_variant=experimental_attention_variant,
-        qk_layernorm=qk_layernorm,
-        qk_l2_norm=qk_l2_norm,
-        multi_latent_attention=multi_latent_attention,
-        mla_down_proj_use_column_parallel=True,
-        normalization=normalization,
-        fallback_to_eager_attn=False,
-    )
-
     mlp = get_mlp_module_spec_for_backend(
         backend=backend,
         num_experts=num_experts,
@@ -346,170 +365,63 @@ def get_gpt_layer_local_spec(
         moe_use_legacy_grouped_gemm=moe_use_legacy_grouped_gemm,
     )
 
-    return get_transformer_layer_spec_for_backend(
-        backend=backend,
-        attention=attention,
-        mlp=mlp,
-        sharded_state_dict_keys_map=sharded_state_dict_keys_map,
-        normalization=normalization,
-    )
-
-
-def get_transformer_layer_spec_for_backend(
-    backend: BackendSpecProvider,
-    attention: ModuleSpec,
-    mlp: ModuleSpec,
-    sharded_state_dict_keys_map: Optional[dict] = None,
-    normalization: Optional[str] = None,
-) -> ModuleSpec:
-    """Helper function to get module spec for TransformerLayer"""
-
-    rms_norm = normalization == "RMSNorm"
-
-    input_layernorm = (
-        IdentityOp
-        if attention.metainfo["fuse_input_layernorm"]
-        else backend.layer_norm(rms_norm=rms_norm, for_qk=False)
-    )
-    pre_mlp_layernorm = (
-        IdentityOp
-        if mlp.metainfo["fuse_pre_mlp_layernorm"]
-        else backend.layer_norm(rms_norm=rms_norm, for_qk=False)
-    )
-
-    transformer_layer = ModuleSpec(
-        module=TransformerLayer,
-        submodules=TransformerLayerSubmodules(
-            input_layernorm=input_layernorm,
-            self_attention=attention,
-            self_attn_bda=get_bias_dropout_add,
-            pre_mlp_layernorm=pre_mlp_layernorm,
-            mlp=mlp,
-            mlp_bda=get_bias_dropout_add,
-            sharded_state_dict_keys_map=sharded_state_dict_keys_map,
-        ),
-    )
-    return transformer_layer
-
-
-def get_attention_module_spec_for_backend(
-    backend: BackendSpecProvider,
-    sharded_state_dict_keys_map: dict,
-    experimental_attention_variant: Optional[str] = None,
-    qk_layernorm: Optional[bool] = False,
-    qk_l2_norm: Optional[bool] = False,
-    multi_latent_attention: Optional[bool] = False,
-    mla_down_proj_use_column_parallel: Optional[bool] = False,
-    normalization: Optional[str] = None,
-    fallback_to_eager_attn: Optional[bool] = False,
-) -> ModuleSpec:
-    """Helper function to get module spec for Attention"""
-
-    if experimental_attention_variant is not None:
-        return get_experimental_attention_variant_module_spec_for_backend(
-            backend,
-            sharded_state_dict_keys_map,
-            experimental_attention_variant,
-            qk_layernorm,
-            qk_l2_norm,
-            multi_latent_attention,
-            mla_down_proj_use_column_parallel,
-            normalization,
-            fallback_to_eager_attn,
-        )
-
-    # Adjust for RMS norm.
-    rms_norm = normalization == "RMSNorm"
-    qk_norm = backend.layer_norm(rms_norm=rms_norm, for_qk=True)
-
-    core_attention = backend.core_attention() if not fallback_to_eager_attn else DotProductAttention
     if multi_latent_attention:
         assert qk_l2_norm is False, "qk_l2_norm is not supported with MLA."
-        linear_q_down_proj = (
-            backend.column_parallel_linear()
-            if mla_down_proj_use_column_parallel
-            else backend.linear()
-        )
-        linear_kv_down_proj = (
-            backend.column_parallel_linear()
-            if mla_down_proj_use_column_parallel
-            else backend.linear()
-        )
-        linear_q_up_proj = (
-            backend.column_parallel_layer_norm_linear()
-            if qk_layernorm and backend.fuse_layernorm_and_linear()
-            else backend.column_parallel_linear()
-        )
-        linear_kv_up_proj = (
-            backend.column_parallel_layer_norm_linear()
-            if qk_layernorm and backend.fuse_layernorm_and_linear()
-            else backend.column_parallel_linear()
-        )
-        qk_norm = (
-            backend.layer_norm(rms_norm=rms_norm, for_qk=True)
-            if qk_layernorm and not backend.fuse_layernorm_and_linear()
-            else IdentityOp
-        )
-        attention = ModuleSpec(
-            module=MLASelfAttention,
-            params={"attn_mask_type": AttnMaskType.causal},
-            submodules=MLASelfAttentionSubmodules(
-                linear_q_proj=backend.column_parallel_linear(),
-                linear_q_down_proj=linear_q_down_proj,
-                linear_q_up_proj=linear_q_up_proj,
-                linear_kv_down_proj=linear_kv_down_proj,
-                linear_kv_up_proj=linear_kv_up_proj,
-                core_attention=core_attention,
-                linear_proj=backend.row_parallel_linear(),
-                q_layernorm=qk_norm,
-                kv_layernorm=qk_norm,
+        return ModuleSpec(
+            module=TransformerLayer,
+            submodules=TransformerLayerSubmodules(
+                input_layernorm=layer_norm,
+                self_attention=ModuleSpec(
+                    module=MLASelfAttention,
+                    params={"attn_mask_type": AttnMaskType.causal},
+                    submodules=MLASelfAttentionSubmodules(
+                        linear_q_proj=backend.column_parallel_linear(),
+                        linear_q_down_proj=backend.column_parallel_linear(),
+                        linear_q_up_proj=backend.column_parallel_linear(),
+                        linear_kv_down_proj=backend.column_parallel_linear(),
+                        linear_kv_up_proj=backend.column_parallel_linear(),
+                        core_attention=backend.core_attention(),
+                        linear_proj=backend.row_parallel_linear(),
+                        q_layernorm=qk_norm if qk_layernorm else IdentityOp,
+                        kv_layernorm=qk_norm if qk_layernorm else IdentityOp,
+                    ),
+                ),
+                self_attn_bda=get_bias_dropout_add,
+                pre_mlp_layernorm=layer_norm,
+                mlp=mlp,
+                mlp_bda=get_bias_dropout_add,
             ),
-            metainfo={"fuse_input_layernorm": False},
         )
     else:
-        linear_qkv = (
-            backend.column_parallel_layer_norm_linear()
-            if backend.fuse_layernorm_and_linear()
-            else backend.column_parallel_linear()
-        )
-        if qk_l2_norm:
-            qk_norm = L2Norm
-        elif qk_layernorm:
-            qk_norm = backend.layer_norm(rms_norm=rms_norm, for_qk=True)
-        else:
-            qk_norm = IdentityOp
-        attention = ModuleSpec(
-            module=SelfAttention,
-            params={"attn_mask_type": AttnMaskType.causal},
-            submodules=SelfAttentionSubmodules(
-                linear_qkv=linear_qkv,
-                core_attention=core_attention,
-                linear_proj=backend.row_parallel_linear(),
-                q_layernorm=qk_norm,
-                k_layernorm=qk_norm,
-            ),
-            metainfo={"fuse_input_layernorm": backend.fuse_layernorm_and_linear()},
-        )
-        if backend.fuse_layernorm_and_linear():
-            sharded_state_dict_keys_map.update(
-                {
-                    "mlp.0.weight": "mlp.linear_fc1.layer_norm_weight",
-                    "mlp.0.bias": "mlp.linear_fc1.layer_norm_bias",
-                    "mlp.1.basic_ops.0.weight": "mlp.linear_fc1.weight",
-                    "mlp.1.basic_ops.1.bias": "mlp.linear_fc1.bias",
-                    "mlp.3.basic_ops.0.weight": "mlp.linear_fc2.weight",
-                    "mlp.3.basic_ops.1.bias": "mlp.linear_fc2.bias",
-                }
-            )
-        else:
-            sharded_state_dict_keys_map.update(
-                {
+        return ModuleSpec(
+            module=TransformerLayer,
+            submodules=TransformerLayerSubmodules(
+                input_layernorm=layer_norm,
+                self_attention=ModuleSpec(
+                    module=SelfAttention,
+                    params={"attn_mask_type": AttnMaskType.causal},
+                    submodules=SelfAttentionSubmodules(
+                        linear_qkv=backend.column_parallel_linear(),
+                        core_attention=backend.core_attention(),
+                        linear_proj=backend.row_parallel_linear(),
+                        q_layernorm=(
+                            L2Norm if qk_l2_norm else (qk_norm if qk_layernorm else IdentityOp)
+                        ),
+                        k_layernorm=(
+                            L2Norm if qk_l2_norm else (qk_norm if qk_layernorm else IdentityOp)
+                        ),
+                    ),
+                ),
+                self_attn_bda=get_bias_dropout_add,
+                pre_mlp_layernorm=layer_norm,
+                mlp=mlp,
+                mlp_bda=get_bias_dropout_add,
+                sharded_state_dict_keys_map={
                     "input_layernorm.": "self_attention.linear_qkv.layer_norm_",
                     "pre_mlp_layernorm.": "mlp.linear_fc1.layer_norm_",
-                }
-            )
-
-    return attention
+                },
+            ),
+        )
 
 
 def _get_mlp_module_spec(
@@ -568,7 +480,6 @@ def get_mlp_module_spec(
 
 def get_mlp_module_spec_for_backend(
     backend: BackendSpecProvider,
-    sharded_state_dict_keys_map: Optional[dict] = None,
     num_experts: Optional[int] = None,
     moe_grouped_gemm: Optional[bool] = False,
     moe_use_legacy_grouped_gemm: Optional[bool] = False,
@@ -586,16 +497,13 @@ def get_mlp_module_spec_for_backend(
         if backend.fuse_layernorm_and_linear():
             linear_fc1 = backend.column_parallel_layer_norm_linear()
             assert linear_fc1 is not None
-            fuse_pre_mlp_layernorm = True
         else:
             linear_fc1 = backend.column_parallel_linear()
-            fuse_pre_mlp_layernorm = False
         return ModuleSpec(
             module=module,
             submodules=MLPSubmodules(
                 linear_fc1=linear_fc1, linear_fc2=linear_fc2, activation_func=activation_func
             ),
-            metainfo={"fuse_pre_mlp_layernorm": fuse_pre_mlp_layernorm},
         )
     else:
         # Mixture of experts with modules in megatron core.
@@ -613,76 +521,61 @@ def get_gpt_decoder_layer_specs(
     use_transformer_engine: bool,
     normalization: Optional[str] = None,
     qk_l2_norm: Optional[bool] = False,
-    vp_stage: Optional[int] = None,
-    pp_rank: Optional[int] = None,
 ) -> TransformerBlockSubmodules:
-    """Helper function to get GPT block spec.
-
-    Return a list of transformer layer spec of the current pipeline stage."""
-
-    get_layer_spec_kwargs = {
-        "qk_layernorm": config.qk_layernorm,
-        "moe_use_legacy_grouped_gemm": config.moe_use_legacy_grouped_gemm,
-        "qk_l2_norm": qk_l2_norm,
-        "use_kitchen": config.use_kitchen,
-        "normalization": normalization,
-        "use_kitchen_attention": config.use_kitchen_attention,
-        "kitchen_attention_backend": config.kitchen_attention_backend,
-    }
+    """GPT block spec."""
+    assert config.experimental_attention_variant is None, (
+        "Experimental attention variant is not supported with get_gpt_decoder_layer_specs, "
+        f"but got {config.experimental_attention_variant=}."
+    )
+
     if use_transformer_engine:
-        layer_norm_impl = TENorm
-        get_layer_spec_kwargs["use_te_activation_func"] = config.use_te_activation_func
-        get_layer_spec_kwargs['fallback_to_eager_attn'] = config.fallback_to_eager_attn
-        get_layer_spec_fn = get_gpt_layer_with_transformer_engine_spec
+        dense_layer_spec = get_gpt_layer_with_transformer_engine_spec(
+            num_experts=None,
+            moe_grouped_gemm=False,
+            qk_layernorm=config.qk_layernorm,
+            multi_latent_attention=config.multi_latent_attention,
+            moe_use_legacy_grouped_gemm=config.moe_use_legacy_grouped_gemm,
+            qk_l2_norm=qk_l2_norm,
+            use_kitchen=config.use_kitchen,
+            use_te_activation_func=config.use_te_activation_func,
+        )
+        moe_layer_spec = get_gpt_layer_with_transformer_engine_spec(
+            num_experts=config.num_moe_experts,
+            moe_grouped_gemm=config.moe_grouped_gemm,
+            qk_layernorm=config.qk_layernorm,
+            multi_latent_attention=config.multi_latent_attention,
+            moe_use_legacy_grouped_gemm=config.moe_use_legacy_grouped_gemm,
+            qk_l2_norm=qk_l2_norm,
+            use_kitchen=config.use_kitchen,
+            use_te_activation_func=config.use_te_activation_func,
+        )
     else:
-        layer_norm_impl = LNImpl
-        get_layer_spec_fn = get_gpt_layer_local_spec
-
-    layer_spec_dict = {}
-    for mlp_type in ["dense", "moe"]:
-        for attention_type in ["softmax_attention", "linear_attention"]:
-            if mlp_type == "moe":
-                if config.moe_layer_freq is None:
-                    # Skip if there is no MoE layer in the model.
-                    continue
-                num_experts = config.num_moe_experts
-                moe_grouped_gemm = config.moe_grouped_gemm
-            else:
-                num_experts = None
-                moe_grouped_gemm = None
-            if attention_type == "linear_attention":
-                multi_latent_attention = None
-                if is_linear_attention_variant(config.experimental_attention_variant):
-                    # There exists linear attention layer in the model.
-                    experimental_attention_variant = config.experimental_attention_variant
-                else:
-                    # Skip if there is no linear attention layer in the model.
-                    continue
-            else:
-                multi_latent_attention = config.multi_latent_attention
-                if is_linear_attention_variant(config.experimental_attention_variant):
-                    # experimental_attention_variant is a linear attention variant,
-                    # so softmax attention is regular attention layer.
-                    experimental_attention_variant = None
-                else:
-                    # Softmax attention is an experimental attention variant.
-                    experimental_attention_variant = config.experimental_attention_variant
-
-            layer_spec_key = f"{mlp_type}_{attention_type}"
-            layer_spec_dict[layer_spec_key] = get_layer_spec_fn(
-                num_experts=num_experts,
-                moe_grouped_gemm=moe_grouped_gemm,
-                multi_latent_attention=multi_latent_attention,
-                experimental_attention_variant=experimental_attention_variant,
-                **get_layer_spec_kwargs,
-            )
+        dense_layer_spec = get_gpt_layer_local_spec(
+            num_experts=None,
+            moe_grouped_gemm=False,
+            qk_layernorm=config.qk_layernorm,
+            multi_latent_attention=config.multi_latent_attention,
+            moe_use_legacy_grouped_gemm=config.moe_use_legacy_grouped_gemm,
+            normalization=normalization,
+            qk_l2_norm=qk_l2_norm,
+            use_kitchen=config.use_kitchen,
+        )
+        moe_layer_spec = get_gpt_layer_local_spec(
+            num_experts=config.num_moe_experts,
+            moe_grouped_gemm=config.moe_grouped_gemm,
+            qk_layernorm=config.qk_layernorm,
+            multi_latent_attention=config.multi_latent_attention,
+            moe_use_legacy_grouped_gemm=config.moe_use_legacy_grouped_gemm,
+            normalization=normalization,
+            qk_l2_norm=qk_l2_norm,
+            use_kitchen=config.use_kitchen,
+        )
 
     # Parse config.moe_layer_freq to determine the pattern of expert/dense layers.
     # 0 stands for dense layers, 1 stands for expert layers.
     # For integer N: Creates a pattern with one expert layer every N layers.
     # For string pattern: Evaluates the str directly (e.g. "[1,0,1]" for alternating expert/dense).
     if isinstance(config.moe_layer_freq, int):
-        # [1,0,0,...,0,1,0,0,...,0,...]
         moe_layer_pattern = [
             1 if (i % config.moe_layer_freq == 0) else 0 for i in range(config.num_layers)
         ]
@@ -698,50 +591,15 @@ def get_gpt_decoder_layer_specs(
             f"Invalid moe_layer_freq: {type(config.moe_layer_freq)}, {config.moe_layer_freq}"
         )
 
-    # Parse config.linear_attention_freq to determine the pattern of expert/dense layers.
-    # 0 stands for SDPA layers, 1 stands for LA layers.
-    # For integer N: Creates a pattern with (N-1) LA layers and 1 SDPA layer every N layers.
-    # For string pattern: Evaluates the str directly (e.g. "[1,0,1]" for alternating LA/SDPA).
-    if isinstance(config.linear_attention_freq, int):
-        linear_attention_pattern = [
-            # [1,1,...,1,0,1,1,...,1,0,...]
-            0 if ((i + 1) % config.linear_attention_freq == 0) else 1
-            for i in range(config.num_layers)
-        ]
-    elif isinstance(config.linear_attention_freq, list):
-        linear_attention_pattern = config.linear_attention_freq
-        assert len(linear_attention_pattern) == config.num_layers, (
-            f"Invalid length of linear_attention_pattern: {len(linear_attention_pattern)}, "
-            f"expected {config.num_layers}, "
-            f"current linear attention pattern: {config.linear_attention_freq}"
-        )
-    elif config.linear_attention_freq is None:
-        if not is_linear_attention_variant(config.experimental_attention_variant):
-            linear_attention_pattern = [0] * config.num_layers
-        else:
-            linear_attention_pattern = [1] * config.num_layers
-            warnings.warn(
-                f"Linear attention type {config.experimental_attention_variant} is specified "
-                "but linear_attention_freq is None. "
-                "Setting linear_attention_pattern to [1] * config.num_layers as default."
-            )
-    else:
-        raise ValueError(
-            f"Invalid linear_attention_freq: {type(config.linear_attention_freq)},"
-            f" {config.linear_attention_freq}"
-        )
-
     # Create the layer specs for the model.
     layer_specs = []
     for layer_number in range(config.num_layers):
-        mlp_type = "moe" if moe_layer_pattern[layer_number] else "dense"
-        attention_type = (
-            "linear_attention" if linear_attention_pattern[layer_number] else "softmax_attention"
-        )
-        layer_spec_key = f"{mlp_type}_{attention_type}"
-        if layer_spec_key not in layer_spec_dict:
-            raise ValueError(f"Invalid layer spec key: {layer_spec_key}")
-        layer_specs.append(layer_spec_dict[layer_spec_key])
+        if moe_layer_pattern[layer_number] == 1:
+            layer_specs.append(moe_layer_spec)
+        elif moe_layer_pattern[layer_number] == 0:
+            layer_specs.append(dense_layer_spec)
+        else:
+            raise ValueError(f"Invalid layer pattern: {moe_layer_pattern}")
 
     return layer_specs
 
@@ -758,13 +616,16 @@ def get_gpt_decoder_block_spec(
     layer_specs = get_gpt_decoder_layer_specs(
         config, use_transformer_engine, normalization, qk_l2_norm
     )
+
     # Slice the layer specs to only include the layers that are built in this pipeline stage.
     # Note: MCore layer_number starts at 1
     num_layers_to_build = get_num_layers_to_build(config, vp_stage=vp_stage, pp_rank=pp_rank)
 
     if config.pipeline_model_parallel_layout is not None:
         layout = config.pipeline_model_parallel_layout
-        assert isinstance(layout, PipelineParallelLayerLayout)
+        assert isinstance(
+            layout, PipelineParallelLayerLayout
+        ), f"Invalid pipeline model parallel layout: {layout}"
         local_layer_specs = [
             layer_specs[layer_id]
             for layer_id in layout.get_layer_id_list(
@@ -775,11 +636,11 @@ def get_gpt_decoder_block_spec(
         offset = get_transformer_layer_offset(config, vp_stage=vp_stage, pp_rank=pp_rank)
         local_layer_specs = layer_specs[offset : offset + num_layers_to_build]
 
+    # Block spec.
     if use_transformer_engine:
         layer_norm_impl = TENorm
     else:
         layer_norm_impl = LNImpl
-    # Block spec.
     block_spec = TransformerBlockSubmodules(
         layer_specs=local_layer_specs, layer_norm=layer_norm_impl
     )
@@ -796,22 +657,17 @@ def get_gpt_mtp_block_spec(
 ) -> MultiTokenPredictionBlockSubmodules:
     """GPT Multi-Token Prediction (MTP) block spec."""
     if use_transformer_engine:
-        backend: BackendSpecProvider = (
-            KitchenSpecProvider(
+        if config.use_kitchen:
+            backend: BackendSpecProvider = KitchenSpecProvider(
                 fallback=TESpecProvider(fallback_to_eager_attn=config.fallback_to_eager_attn),
                 use_kitchen_attention=config.use_kitchen_attention,
                 kitchen_attention_backend=config.kitchen_attention_backend,
             )
-            if config.use_kitchen
-            else TESpecProvider(fallback_to_eager_attn=config.fallback_to_eager_attn)
-        )
+        else:
+            backend = TESpecProvider(fallback_to_eager_attn=config.fallback_to_eager_attn)
     else:
         backend = (
-            KitchenSpecProvider(
-                fallback=LocalSpecProvider(),
-                use_kitchen_attention=config.use_kitchen_attention,
-                kitchen_attention_backend=config.kitchen_attention_backend,
-            )
+            KitchenSpecProvider(fallback=LocalSpecProvider())
             if config.use_kitchen
             else LocalSpecProvider()
         )
diff --git a/megatron/core/ssm/gated_delta_net.py b/megatron/core/ssm/gated_delta_net.py
index a08d043bdb3..16dc3a79ebb 100644
--- a/megatron/core/ssm/gated_delta_net.py
+++ b/megatron/core/ssm/gated_delta_net.py
@@ -104,7 +104,9 @@ def __init__(
         """
 
         if not HAVE_FLA:
-            raise ImportError("FLA is not installed. Please install it with `pip install fla`.")
+            raise ImportError(
+                "FLA is not installed. Please install it with `pip install flash-linear-attention`."
+            )
 
         super().__init__(config)
 
diff --git a/megatron/core/transformer/dot_product_attention_context_parallel.py b/megatron/core/transformer/dot_product_attention_context_parallel.py
index 89659a1d743..aaf08d40ade 100644
--- a/megatron/core/transformer/dot_product_attention_context_parallel.py
+++ b/megatron/core/transformer/dot_product_attention_context_parallel.py
@@ -185,6 +185,9 @@ def forward(ctx, q, k, v, attention_mask, attention_dropout, softmax_scale, pg):
         comm.all_gather(kv_buffer_copy[1], v_0)
 
         # Prepare attention bias
+        assert (
+            attention_mask is not None
+        ), "Attention mask is required for the native attention function with context parallelism"
         attn_bias = to_zz_mask_attn_bias(
             attention_mask, cp_size, nheads, nheads_k, heads_k_stride, q.device, q.dtype
         )
diff --git a/megatron/core/transformer/spec_utils.py b/megatron/core/transformer/spec_utils.py
index 24df1add0eb..dbd2e08bccb 100644
--- a/megatron/core/transformer/spec_utils.py
+++ b/megatron/core/transformer/spec_utils.py
@@ -46,6 +46,7 @@ def import_module(module_path: Tuple[str]):
     return vars(module)[name]
 
 
+# pylint: disable=missing-function-docstring
 def get_module(spec_or_module: Union[ModuleSpec, type], **additional_kwargs):
     """Retrieve the module class or function specified by a ModuleSpec or
     return it as is if already provided.
diff --git a/megatron/core/transformer/transformer_config.py b/megatron/core/transformer/transformer_config.py
index 875d8a92049..8f5462ff55b 100644
--- a/megatron/core/transformer/transformer_config.py
+++ b/megatron/core/transformer/transformer_config.py
@@ -194,6 +194,9 @@ class TransformerConfig(ModelParallelConfig):
     qk_layernorm: bool = False
     """Whether to apply `normalization` type of normalization to the query and key embeddings."""
 
+    qk_l2_norm: bool = False
+    """Whether to apply llama 4-style qk L2 norm."""
+
     qk_clip: bool = False
     """Whether to clip the query and key weights. Needed for Muon MLA Model training."""
 
@@ -234,7 +237,26 @@ class TransformerConfig(ModelParallelConfig):
     """Type of attention variant to use. Currently support gated_delta_net and dsa."""
 
     ####################
-    # attention variant: gated_delta_net
+    # DSA
+    ####################
+    dsa_indexer_n_heads: Optional[int] = None
+    """Number of DSA indexer heads."""
+
+    dsa_indexer_head_dim: Optional[int] = None
+    """Dimension per DSA indexer head."""
+
+    dsa_indexer_topk: Optional[int] = None
+    """Number of top-k tokens to select in DSA indexer."""
+
+    dsa_indexer_loss_coeff: Optional[float] = None
+    """Coefficient for the DSA indexer KL divergence loss. Set to 0 to disable indexer loss."""
+
+    dsa_indexer_use_sparse_loss: Optional[bool] = None
+    """Whether to use sparse DSA indexer loss. If True, the indexer loss will be computed using the
+    top-k indices."""
+
+    ####################
+    # linear attention
     ####################
     linear_attention_type: Optional[str] = None
     """Type of linear attention to use.
@@ -262,25 +284,6 @@ class TransformerConfig(ModelParallelConfig):
     linear_num_value_heads: Optional[int] = None
     """Number of value and gate heads for the gated delta net."""
 
-    ####################
-    # attention variant: dsa
-    ####################
-    dsa_indexer_n_heads: Optional[int] = None
-    """Number of DSA indexer heads."""
-
-    dsa_indexer_head_dim: Optional[int] = None
-    """Dimension per DSA indexer head."""
-
-    dsa_indexer_topk: Optional[int] = None
-    """Number of top-k tokens to select in DSA indexer."""
-
-    dsa_indexer_loss_coeff: Optional[float] = None
-    """Coefficient for the DSA indexer KL divergence loss. Set to 0 to disable indexer loss."""
-
-    dsa_indexer_use_sparse_loss: Optional[bool] = None
-    """Whether to use sparse DSA indexer loss. If True, the indexer loss will be computed using the
-    top-k indices."""
-
     ####################
     # initialization
     ####################
diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py
index c85228e1136..027449b1729 100644
--- a/megatron/training/arguments.py
+++ b/megatron/training/arguments.py
@@ -2467,7 +2467,6 @@ def _add_training_args(parser):
                        'which only ensures bitwise identical results when the same inputs are processed in the same batch configuration. '
                        'This will significantly affect speed of training and inference as the kernels are not full optimized.')
 
-
     return parser
 
 
@@ -3454,7 +3453,17 @@ def _add_experimental_attention_variant_args(parser):
     group = parser.add_argument_group(title="experimental_attention_variant")
     group.add_argument('--experimental-attention-variant', default=None, choices=['gated_delta_net', 'dsa'], type=str,
                        help='Type of attention variant to use. Currently support gated_delta_net and dsa.')
-
+    # DSA
+    group.add_argument('--dsa-indexer-n-heads', default=None, type=int,
+                       help='Number of indexer heads for sparse attention. If not set, defaults to num-attention-heads.')
+    group.add_argument('--dsa-indexer-head-dim', default=None, type=int,
+                       help='Dimension per indexer head for sparse attention. If not set, defaults to kv-channels.')
+    group.add_argument('--dsa-indexer-topk', default=None, type=int,
+                       help='Number of top-k tokens to select in sparse attention indexer.')
+    group.add_argument('--dsa-indexer-loss-coeff', default=0.0, type=float,
+                       help='Coefficient for the indexer KL divergence loss. Set to 0 to disable indexer loss.')
+    group.add_argument('--dsa-indexer-use-sparse-loss', action='store_true',
+                       help='Use sparse indexer loss. If set, the indexer loss will be computed using the top-k indices.')
     # Linear attention
     group.add_argument('--linear-attention-type', default=None, choices=['gated_delta_net'], type=str,
                        help='(Deprecated, use --experimental-attention-variant instead) Type of linear attention to use. Currently support gated_delta_net.')
@@ -3477,19 +3486,6 @@ def _add_experimental_attention_variant_args(parser):
                        help='Number of query and key heads for the gated delta net.')
     group.add_argument('--linear-num-value-heads', default=32, type=int,
                        help='Number of value and gate heads for the gated delta net.')
-
-    # DSA
-    group.add_argument('--dsa-indexer-n-heads', default=None, type=int,
-                       help='Number of indexer heads for sparse attention. If not set, defaults to num-attention-heads.')
-    group.add_argument('--dsa-indexer-head-dim', default=None, type=int,
-                       help='Dimension per indexer head for sparse attention. If not set, defaults to kv-channels.')
-    group.add_argument('--dsa-indexer-topk', default=None, type=int,
-                       help='Number of top-k tokens to select in sparse attention indexer.')
-    group.add_argument('--dsa-indexer-loss-coeff', default=0.0, type=float,
-                       help='Coefficient for the indexer KL divergence loss. Set to 0 to disable indexer loss.')
-    group.add_argument('--dsa-indexer-use-sparse-loss', action='store_true',
-                       help='Use sparse indexer loss. If set, the indexer loss will be computed using the top-k indices.')
-
     return parser
 
 def _add_heterogeneous_args(parser):
diff --git a/megatron/training/checkpointing.py b/megatron/training/checkpointing.py
index 77b17b07e13..f7ff7cd2775 100644
--- a/megatron/training/checkpointing.py
+++ b/megatron/training/checkpointing.py
@@ -1472,13 +1472,13 @@ def load_checkpoint(ddp_model, optimizer, opt_param_scheduler, load_arg='load',
             ckpt_args = state_dict.get("args")
 
         if not hasattr(ckpt_args, "tensor_model_parallel_size"):
-            print_rank_0("WARNING: TP size not found in checkpoint args, using 0 as default.")
+            print_rank_0("WARNING: TP size not found in checkpoint args, using 1 as default.")
         if not hasattr(ckpt_args, "pipeline_model_parallel_size"):
-            print_rank_0("WARNING: PP size not found in checkpoint args, using 0 as default.")
+            print_rank_0("WARNING: PP size not found in checkpoint args, using 1 as default.")
 
         ckpt_tp_pp = (
-            getattr(ckpt_args, "tensor_model_parallel_size", 0),
-            getattr(ckpt_args, "pipeline_model_parallel_size", 0),
+            getattr(ckpt_args, "tensor_model_parallel_size", 1),
+            getattr(ckpt_args, "pipeline_model_parallel_size", 1),
         )
         run_tp_pp = (
             args.tensor_model_parallel_size,
diff --git a/megatron/training/training.py b/megatron/training/training.py
index 60156e1f227..5c52f907fc6 100644
--- a/megatron/training/training.py
+++ b/megatron/training/training.py
@@ -332,18 +332,15 @@ def transformer_flops():
             if args.moe_shared_expert_intermediate_size is None
             else args.moe_shared_expert_intermediate_size
         )
-        # SwiGLU.
-        gated_linear_multiplier = 3 / 2 if args.swiglu else 1
 
-        # The 12x term below comes from the following factors; for more details, see
-        # "APPENDIX: FLOATING-POINT OPERATIONS" in https://arxiv.org/abs/2104.04473.
         # - 3x: Each GEMM in the model needs to be performed 3 times (forward pass,
         #       backward wgrad [weight gradient], backward dgrad [data gradient]).
-        # - 2x: GEMMs of a particular size are stacked twice in the standard Transformer model
-        #       architectures implemented in this codebase (e.g., h->ffn_h GEMM and ffn_h->h GEMM
-        #       in MLP layer).
+        forward_backward_expansion_factor = 3
         # - 2x: A GEMM of a m*n tensor with a n*k tensor requires 2mnk floating-point operations.
-        expansion_factor = 3 * 2 * 2
+        fma_expansion_factor = 2
+        # - 3x (SwiGLU enabled): h->2*ffn_h GEMM and ffn_h->h GEMM are stacked.
+        # - 2x (SwiGLU disabled): h->ffn_h GEMM and ffn_h->h GEMM are stacked.
+        ffn_expansion_factor = 3 if args.swiglu else 2
 
         if args.multi_latent_attention:
             assert not args.group_query_attention
@@ -374,8 +371,8 @@ def transformer_flops():
                     + 1
                 )
             standard_self_attn_term = (
-                3
-                * 2  # fwd(1) + bwd(2) *FMA
+                forward_backward_expansion_factor
+                * fma_expansion_factor
                 * (
                     ## q lora + rope + q norm
                     q_term
@@ -402,13 +399,19 @@ def transformer_flops():
             query_projection_size = args.kv_channels * args.num_attention_heads
             key_projection_size = args.kv_channels * args.num_query_groups
             value_projection_size = args.kv_channels * args.num_query_groups
+            gate_projection_size = query_projection_size if args.attention_output_gate else 0
             standard_self_attn_term = (
-                3
-                * 2  # fwd(1) + bwd(2) *FMA
+                forward_backward_expansion_factor
+                * fma_expansion_factor
                 * (
                     ## qkv proj
                     args.hidden_size
-                    * (query_projection_size + key_projection_size + value_projection_size)
+                    * (
+                        query_projection_size
+                        + key_projection_size
+                        + value_projection_size
+                        + gate_projection_size
+                    )
                     ## core attention
                     + query_projection_size
                     * args.seq_length
@@ -436,7 +439,12 @@ def transformer_flops():
                     f"current linear attention pattern: {args.linear_attention_freq}"
                 )
             elif args.linear_attention_freq is None:
-                linear_attention_pattern = [1] * num_layers
+                # This should be caught by config validation, but raise here as a safety check
+                raise ValueError(
+                    f"Linear attention type {args.experimental_attention_variant} is specified "
+                    "but linear_attention_freq is None. "
+                    "Please set linear_attention_freq to specify the LA/SDPA layer pattern."
+                )
             else:
                 raise ValueError(
                     f"Invalid linear_attention_freq: {type(args.linear_attention_freq)},"
@@ -454,8 +462,8 @@ def transformer_flops():
                 qk_dim = qk_head_dim * num_qk_heads
                 v_dim = v_head_dim * num_v_heads
                 linear_self_attn_term = (
-                    3
-                    * 2  # fwd(1) + bwd(2) *FMA
+                    forward_backward_expansion_factor
+                    * fma_expansion_factor
                     * (
                         ## in proj
                         args.hidden_size
@@ -492,25 +500,25 @@ def transformer_flops():
             * args.seq_length
             * (
                 # MLP
-                expansion_factor
-                * num_layers
+                forward_backward_expansion_factor
+                * fma_expansion_factor
                 * args.hidden_size
                 * (
                     # dense layer (deepseek v2, v3 style)
-                    (args.ffn_hidden_size * gated_linear_multiplier)
-                    * (num_dense_layers / num_layers)
+                    (args.ffn_hidden_size * ffn_expansion_factor)
+                    * num_dense_layers
                     # routed experts
-                    + (moe_ffn_hidden_size * num_experts_routed_to * gated_linear_multiplier)
-                    * (num_moe_layers / num_layers)
+                    + (moe_ffn_hidden_size * num_experts_routed_to * ffn_expansion_factor)
+                    * num_moe_layers
                     # Shared Experts.
-                    + (shared_expert_ffn_hidden_size * gated_linear_multiplier)
-                    * (num_moe_layers / num_layers)
+                    + (shared_expert_ffn_hidden_size * ffn_expansion_factor)
+                    * num_moe_layers
                 )
                 # Self Attention
                 + self_attn_term
                 # MTP norms and proj
-                + 3
-                * 2
+                + forward_backward_expansion_factor
+                * fma_expansion_factor
                 * mtp_num_layers
                 * (
                     # MTP eh norm + final nrom
@@ -519,7 +527,11 @@ def transformer_flops():
                     + 2 * args.hidden_size * args.hidden_size
                 )
                 # Logit.
-                + 3 * 2 * args.hidden_size * args.padded_vocab_size * (mtp_num_layers + 1)
+                + forward_backward_expansion_factor
+                * fma_expansion_factor
+                * args.hidden_size
+                * args.padded_vocab_size
+                * (mtp_num_layers + 1)  # MTP + final logit
             )
         )
         return total_floating_point_operations
diff --git a/tests/unit_tests/post_training/test_modelopt_module_spec.py b/tests/unit_tests/post_training/test_modelopt_module_spec.py
index ec80fcb1a72..dac96785bc0 100644
--- a/tests/unit_tests/post_training/test_modelopt_module_spec.py
+++ b/tests/unit_tests/post_training/test_modelopt_module_spec.py
@@ -173,6 +173,7 @@ def setup_method(self, method):
             moe_ffn_hidden_size=128,
             moe_shared_expert_intermediate_size=128,
             qk_layernorm=True,
+            qk_l2_norm=True,
             use_cpu_initialization=True,
         )
         default_spec = get_gpt_decoder_block_spec(
diff --git a/tests/unit_tests/ssm/test_gated_delta_net.py b/tests/unit_tests/ssm/test_gated_delta_net.py
index 725d18fbc06..81f8eed0574 100644
--- a/tests/unit_tests/ssm/test_gated_delta_net.py
+++ b/tests/unit_tests/ssm/test_gated_delta_net.py
@@ -11,7 +11,10 @@
 from megatron.core.models.common.embeddings.rope_utils import (
     get_pos_emb_on_this_cp_rank as get_tensor_on_this_cp_rank,
 )
-from megatron.core.models.gpt.gpt_layer_specs import get_gpt_layer_with_transformer_engine_spec
+from megatron.core.models.gpt.experimental_attention_variant_module_specs import (
+    get_experimental_attention_variant_module_spec,
+    get_transformer_block_with_experimental_attention_variant_spec,
+)
 from megatron.core.models.gpt.gpt_model import GPTModel
 from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.ssm.gated_delta_net import GatedDeltaNet
@@ -82,10 +85,13 @@ def setup_method(self, tp_size, sp, cp_size):
             tensor_model_parallel_size=tp_size,
             sequence_parallel=sp,
             context_parallel_size=cp_size,
+            experimental_attention_variant="gated_delta_net",
+            linear_attention_freq=[1],
+            transformer_impl="transformer_engine",
         )
-        gdn_submodules = get_gpt_layer_with_transformer_engine_spec(
-            experimental_attention_variant="gated_delta_net", normalization="RMSNorm"
-        ).submodules.self_attention.submodules
+        gdn_submodules = get_experimental_attention_variant_module_spec(
+            config=self.transformer_config
+        ).submodules
 
         self.gdn = GatedDeltaNet(
             self.transformer_config,
@@ -159,10 +165,13 @@ def test_parallel_gated_delta_net_correctness(tmp_path_dist_ckpt, tp, sp, cp):
         num_attention_heads=8,
         activation_func=F.silu,
         bf16=True,
+        experimental_attention_variant="gated_delta_net",
+        linear_attention_freq=[1],
+        transformer_impl="transformer_engine",
     )
 
-    transformer_layer_spec = get_gpt_layer_with_transformer_engine_spec(
-        experimental_attention_variant="gated_delta_net", normalization="RMSNorm"
+    transformer_layer_spec = get_transformer_block_with_experimental_attention_variant_spec(
+        config=transformer_config, vp_stage=None, pp_rank=0
     )
 
     if cp:
@@ -171,5 +180,15 @@ def test_parallel_gated_delta_net_correctness(tmp_path_dist_ckpt, tp, sp, cp):
         atol, rtol = 5e-4, 5e-4
 
     _test_parallel_attention_correctness(
-        transformer_config, transformer_layer_spec, tmp_path_dist_ckpt, tp, sp, cp
+        transformer_config=transformer_config,
+        transformer_layer_spec=transformer_layer_spec,
+        tmp_path_dist_ckpt=tmp_path_dist_ckpt,
+        atol=atol,
+        rtol=rtol,
+        tp=tp,
+        sp=sp,
+        cp=cp,
+        seed=123,
+        sequence_length=256,
+        micro_batch_size=4,
     )
diff --git a/tests/unit_tests/transformer/test_attention.py b/tests/unit_tests/transformer/test_attention.py
index cd7ca916091..b5f2857d622 100644
--- a/tests/unit_tests/transformer/test_attention.py
+++ b/tests/unit_tests/transformer/test_attention.py
@@ -875,6 +875,7 @@ def get_tensor_on_this_rank(tensor):
         Utils.destroy_model_parallel()
 
 
+# TODO(yuzhongw): Add test case for fallback_to_eager_attn
 @pytest.mark.parametrize("apply_rope_fusion", [False, True])
 @pytest.mark.parametrize(
     ("tp", "sp", "cp"),
@@ -887,25 +888,15 @@ def get_tensor_on_this_rank(tensor):
     ],
 )
 @pytest.mark.parametrize("qk_layernorm", [False, True])
-@pytest.mark.parametrize("fallback_to_eager_attn", [False, True])
 @pytest.mark.parametrize("output_gate", [False, True])
 def test_parallel_attention_correctness(
-    tmp_path_dist_ckpt,
-    apply_rope_fusion,
-    tp,
-    sp,
-    cp,
-    qk_layernorm,
-    fallback_to_eager_attn,
-    output_gate,
+    tmp_path_dist_ckpt, apply_rope_fusion, tp, sp, cp, qk_layernorm, output_gate
 ):
     transformer_config = TransformerConfig(
         num_layers=1,
         hidden_size=128,
         num_attention_heads=4,
-        context_parallel_size=1,
-        tensor_model_parallel_size=1,
-        sequence_parallel=False,
+        normalization="RMSNorm",
         bf16=True,
         qk_layernorm=qk_layernorm,
         apply_rope_fusion=apply_rope_fusion,
@@ -914,24 +905,20 @@ def test_parallel_attention_correctness(
         attention_dropout=0.0,
     )
 
-    transformer_layer_spec = get_gpt_layer_with_transformer_engine_spec(
-        fallback_to_eager_attn=fallback_to_eager_attn,
-        normalization="RMSNorm",
-        qk_layernorm=qk_layernorm,
-    )
-    if cp > 1:
-        if qk_layernorm:
-            atol, rtol = 2e-2, 2e-2
-        else:
-            atol, rtol = 5e-3, 5e-3
-    else:
-        if qk_layernorm:
-            atol, rtol = 1e-2, 1e-2
-        else:
-            atol, rtol = 2e-3, 2e-3
+    transformer_layer_spec = get_gpt_layer_with_transformer_engine_spec(qk_layernorm=qk_layernorm)
+    atol, rtol = 1e-2, 1e-2
 
     _test_parallel_attention_correctness(
-        transformer_config, transformer_layer_spec, tmp_path_dist_ckpt, tp, sp, cp
+        transformer_config,
+        transformer_layer_spec,
+        tmp_path_dist_ckpt,
+        atol=atol,
+        rtol=rtol,
+        tp=tp,
+        sp=sp,
+        cp=cp,
+        seed=123,
+        sequence_length=256,
     )
 
 
From 68e5fec01969afbb7cd466a40909a2d2fc6da91d Mon Sep 17 00:00:00 2001
From: Hongbin Liu <lhb8125@users.noreply.github.com>
Date: Thu, 22 Jan 2026 11:26:06 +0800
Subject: [PATCH 247/334] [Dev]feat(moe): code refactor for fine grained
 activation offloading (#2905)

Signed-off-by: Hongbin Liu <hongbinl@nvidia.com>
Signed-off-by: root <root@eos0321.eos.clusters.nvidia.com>
Co-authored-by: root <root@eos0321.eos.clusters.nvidia.com>
---
 .../fine_grained_activation_offloading.md     |    2 +-
 .../offloading_and_recomputing.png            |  Bin
 .../common/model_chunk_schedule_plan.py       |    9 +-
 .../core/models/gpt/fine_grained_callables.py |   20 +-
 megatron/core/models/gpt/gpt_model.py         |   10 +-
 .../fine_grained_activation_offload.py        | 1037 ++++++++++++++---
 megatron/core/pipeline_parallel/schedules.py  |   19 +-
 megatron/core/pipeline_parallel/utils.py      |   25 +-
 megatron/core/transformer/attention.py        |   27 +-
 megatron/core/transformer/moe/experts.py      |   29 +-
 .../transformer/multi_latent_attention.py     |   68 +-
 .../transformer/multi_token_prediction.py     |    5 -
 .../core/transformer/transformer_block.py     |    8 -
 .../core/transformer/transformer_layer.py     |   29 +-
 megatron/training/arguments.py                |    3 +
 megatron/training/training.py                 |    7 +-
 .../golden_values_dev_dgx_h100.json           |  102 +-
 .../model_config.yaml                         |    2 +-
 .../golden_values_dev_dgx_h100.json           |  102 +-
 .../model_config.yaml                         |    7 +-
 ...test_fine_grained_activation_offloading.py |  720 +++++++++---
 21 files changed, 1638 insertions(+), 593 deletions(-)
 rename docs/{source => }/images/fine_grained_activation_offloading/offloading_and_recomputing.png (100%)

diff --git a/docs/api-guide/fine_grained_activation_offloading.md b/docs/api-guide/fine_grained_activation_offloading.md
index 969098263fc..53211d1d06c 100644
--- a/docs/api-guide/fine_grained_activation_offloading.md
+++ b/docs/api-guide/fine_grained_activation_offloading.md
@@ -28,4 +28,4 @@ Currently, the supported offloading modules are `"attn_norm", "core_attn", "attn
 - For other modules, use offloading to reduce memory footprint;
 - Make sure the offloading/reloading could be overlapped with computing;
 
-![Fine-grained Activation Offloading and Fine-grained Recomputation](../images/fine_grained_activation_offloading/offloading_and_recomputing.png)
+![Fine-grained Activation Offloading and Fine-grained Recomputation](../../images/fine_grained_activation_offloading/offloading_and_recomputing.png)
diff --git a/docs/source/images/fine_grained_activation_offloading/offloading_and_recomputing.png b/docs/images/fine_grained_activation_offloading/offloading_and_recomputing.png
similarity index 100%
rename from docs/source/images/fine_grained_activation_offloading/offloading_and_recomputing.png
rename to docs/images/fine_grained_activation_offloading/offloading_and_recomputing.png
diff --git a/megatron/core/models/common/model_chunk_schedule_plan.py b/megatron/core/models/common/model_chunk_schedule_plan.py
index b8f11ed9d38..0c29423edab 100644
--- a/megatron/core/models/common/model_chunk_schedule_plan.py
+++ b/megatron/core/models/common/model_chunk_schedule_plan.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
 
 from contextlib import nullcontext
 from typing import Optional
@@ -8,9 +8,6 @@
 
 from megatron.core.enums import Fp8Recipe
 from megatron.core.fp8_utils import get_fp8_context
-from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
-    fine_grained_offloading_set_last_layer,
-)
 from megatron.core.pipeline_parallel.utils import (
     AbstractSchedulePlan,
     NoopScheduleNode,
@@ -488,8 +485,6 @@ def run(
         # combined forward and backward pass for overlapped layers
         for i in range(overlapped_layers):
             f_layer = f_schedule_plan.get_layer(i)
-            if f_layer.layer.config.fine_grained_activation_offloading:
-                fine_grained_offloading_set_last_layer(i == f_num_layers - 1)
             b_layer = b_schedule_plan.pop_layer()
             torch.cuda.nvtx.range_push(f"layer_{i}f-layer_{b_schedule_plan.num_layers()}b")
             f_input, b_grad = TransformerLayerSchedulePlan.run(
@@ -518,8 +513,6 @@ def run(
         for i in range(overlapped_layers, f_num_layers):
             f_layer = f_schedule_plan.get_layer(i)
             torch.cuda.nvtx.range_push(f"layer_{i}f")
-            if f_layer.layer.config.fine_grained_activation_offloading:
-                fine_grained_offloading_set_last_layer(i == f_num_layers - 1)
             f_input, _ = TransformerLayerSchedulePlan.run(f_layer, None, f_input=f_input)
             torch.cuda.nvtx.range_pop()
 
diff --git a/megatron/core/models/gpt/fine_grained_callables.py b/megatron/core/models/gpt/fine_grained_callables.py
index 71c5c19749c..5a365b015b2 100644
--- a/megatron/core/models/gpt/fine_grained_callables.py
+++ b/megatron/core/models/gpt/fine_grained_callables.py
@@ -11,9 +11,7 @@
 from megatron.core import tensor_parallel
 from megatron.core.packed_seq_params import PackedSeqParams
 from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
-    fine_grained_offloading_group_commit,
-    fine_grained_offloading_group_start,
-    get_fine_grained_offloading_context,
+    FineGrainedActivationOffloadingInterface as off_interface,
 )
 from megatron.core.pipeline_parallel.utils import ScheduleNode, make_viewless
 from megatron.core.transformer.enums import CudaGraphScope
@@ -450,18 +448,18 @@ def forward_func(
                 )
                 if not isinstance(layer.mlp, MoELayer):
                     return hidden_states, None, None, None
-                if layer.offload_mlp_norm:
-                    hidden_states = fine_grained_offloading_group_start(
-                        hidden_states, name="mlp_norm"
-                    )
                 if layer.recompute_pre_mlp_layernorm:
                     layer.pre_mlp_norm_checkpoint = tensor_parallel.CheckpointWithoutOutput()
-                    with get_fine_grained_offloading_context(layer.offload_mlp_norm):
+                    with off_interface(
+                        layer.offload_mlp_norm, hidden_states, "mlp_norm"
+                    ) as hidden_states:
                         pre_mlp_layernorm_output = layer.pre_mlp_norm_checkpoint.checkpoint(
                             layer.pre_mlp_layernorm, hidden_states
                         )
                 else:
-                    with get_fine_grained_offloading_context(layer.offload_mlp_norm):
+                    with off_interface(
+                        layer.offload_mlp_norm, hidden_states, "mlp_norm"
+                    ) as hidden_states:
                         pre_mlp_layernorm_output = layer.pre_mlp_layernorm(hidden_states)
 
                 shared_expert_output = layer.mlp.shared_experts_compute(pre_mlp_layernorm_output)
@@ -550,8 +548,10 @@ def submodule_combine_forward(node: ScheduleNode, output: torch.Tensor):
             hidden_states = layer.mlp_bda(layer.training, layer.config.bias_dropout_fusion)(
                 mlp_output_with_bias, residual, layer.hidden_dropout
             )
+        # Delay the offload of the mlp norm until after the mlp_bda has been computed
+        # because the residual is needed in the mlp_bda.
         if layer.offload_mlp_norm:
-            (hidden_states,) = fine_grained_offloading_group_commit(
+            hidden_states = off_interface.group_commit(
                 hidden_states, name="mlp_norm", forced_released_tensors=[residual]
             )
         output = make_viewless_tensor(
diff --git a/megatron/core/models/gpt/gpt_model.py b/megatron/core/models/gpt/gpt_model.py
index 9e70c677226..16462d6e426 100644
--- a/megatron/core/models/gpt/gpt_model.py
+++ b/megatron/core/models/gpt/gpt_model.py
@@ -19,7 +19,7 @@
 from megatron.core.models.common.language_module.language_module import LanguageModule
 from megatron.core.packed_seq_params import PackedSeqParams
 from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
-    fine_grained_offloading_init_chunk_handler,
+    FineGrainedActivationOffloadingInterface as off_interface,
 )
 from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.quantization.utils import get_quant_config_or_none
@@ -431,20 +431,20 @@ def _preprocess(
 
     def preprocess_for_fine_grained_offloading(self):
         """Preprocess for fine-grained activation offloading."""
-        fine_grained_offloading_init_chunk_handler(
+        off_interface.init_chunk_handler(
             vp_size=self.config.virtual_pipeline_model_parallel_size,
             vp_stage=self.vp_stage,
             min_offloaded_tensor_size=self.config.min_offloaded_tensor_size,
         )
         if self.disable_param_offloading:
             for param in self.decoder.parameters():
-                param.offloading_activation = False
+                off_interface.mark_not_offloadable(param)
             if self.mtp_process:
                 for param in self.mtp.parameters():
-                    param.offloading_activation = False
+                    off_interface.mark_not_offloadable(param)
             if self.post_process:
                 for param in self.output_layer.parameters():
-                    param.offloading_activation = False
+                    off_interface.mark_not_offloadable(param)
             self.disable_param_offloading = False
 
     def forward(
diff --git a/megatron/core/pipeline_parallel/fine_grained_activation_offload.py b/megatron/core/pipeline_parallel/fine_grained_activation_offload.py
index 138dcd8f7b1..9996c9b57a4 100644
--- a/megatron/core/pipeline_parallel/fine_grained_activation_offload.py
+++ b/megatron/core/pipeline_parallel/fine_grained_activation_offload.py
@@ -2,16 +2,16 @@
 
 from collections import deque
 from contextlib import nullcontext
-from typing import Any
+from typing import Any, Dict, Tuple
 
 import torch
 
-from megatron.core.pipeline_parallel.utils import set_ideal_affinity_for_current_gpu
-
 # CPU offload implementation for pipeline parallelism
 DEBUG = False
 DEBUG_RANK = 0
 
+from megatron.core.transformer.cuda_graphs import is_graph_capturing
+
 
 def debug_rank(message):
     """Print debug message for a specific rank when DEBUG is enabled."""
@@ -23,6 +23,362 @@ def debug_rank(message):
         print(message)
 
 
+def print_offload_summary_table(total_offload_bytes: Dict[str, int]):
+    """
+    Print an ASCII table summarizing offload bytes across all ranks.
+
+    Gathers offload data from all ranks and prints a formatted table on rank 0,
+    with rows representing ranks and columns representing groups.
+
+    Args:
+        total_offload_bytes: Dict mapping group names to offload bytes for this rank.
+    """
+    # pylint: disable=bad-builtin
+    assert torch.distributed.is_initialized()
+    rank = torch.distributed.get_rank()
+    world_size = torch.distributed.get_world_size()
+
+    # Gather all group names across ranks
+    local_names = list(total_offload_bytes.keys())
+    all_names_list = [None] * world_size
+    torch.distributed.all_gather_object(all_names_list, local_names)
+    all_group_names = sorted(set(name for names in all_names_list for name in names))
+
+    # Gather offload bytes from all ranks: each rank sends a list of bytes per group
+    local_bytes = [total_offload_bytes.get(name, 0) for name in all_group_names]
+    all_bytes_list = [None] * world_size
+    torch.distributed.all_gather_object(all_bytes_list, local_bytes)
+
+    # Print ASCII table on rank 0
+    if rank == 0:
+        # Calculate column widths
+        col_width = max(12, max((len(name) for name in all_group_names), default=8) + 2)
+        rank_col_width = max(6, len(f"Rank {world_size - 1}") + 2)
+
+        # Build header
+        header = "Rank".ljust(rank_col_width)
+        header += "".join(name.rjust(col_width) for name in all_group_names)
+        header += "Total".rjust(col_width)
+        separator = "-" * len(header)
+
+        print("\n" + "=" * len(header))
+        print("Activation Offload Summary (MB)".center(len(header)))
+        print("=" * len(header))
+        print(header)
+        print(separator)
+
+        # Build rows for each rank
+        grand_total = 0
+        col_totals = [0] * len(all_group_names)
+        for r in range(world_size):
+            row_bytes = all_bytes_list[r]
+            row_total = sum(row_bytes)
+            grand_total += row_total
+            for i, b in enumerate(row_bytes):
+                col_totals[i] += b
+            row_str = f"Rank {r}".ljust(rank_col_width)
+            for b in row_bytes:
+                row_str += f"{b / (1024 * 1024):.2f}".rjust(col_width)
+            row_str += f"{row_total / (1024 * 1024):.2f}".rjust(col_width)
+            print(row_str)
+
+        # Print totals row
+        print(separator)
+        totals_row = "Total".ljust(rank_col_width)
+        for ct in col_totals:
+            totals_row += f"{ct / (1024 * 1024):.2f}".rjust(col_width)
+        totals_row += f"{grand_total / (1024 * 1024):.2f}".rjust(col_width)
+        print(totals_row)
+        print("=" * len(header) + "\n")
+
+    torch.distributed.barrier()
+
+
+class GPUTensorPool:
+    """
+    GPU memory pool for efficient allocation and deallocation of tensors.
+
+    Features:
+    - Supports multiple tensor shapes and dtypes, each with its own pool
+    - Dynamic allocation: tensors are created on-demand during allocation
+    - Efficient reuse: freed tensors are returned to the pool for reuse
+    - Uses queue-based management for O(1) allocation and deallocation
+
+    Example:
+        pool = GPUTensorPool(device='cuda:0')
+        tensor = pool.allocate((128, 512), dtype=torch.float32)
+        # ... use tensor ...
+        pool.free(tensor, (128, 512), dtype=torch.float32)
+    """
+
+    def __init__(self, device: str = 'cuda', pin_memory: bool = False):
+        """
+        Initialize GPU tensor pool.
+
+        Args:
+            device: GPU device, default 'cuda'
+            pin_memory: Whether to use pinned memory (mainly for CPU tensors)
+        """
+        self.device = torch.device(device)
+        self.pin_memory = pin_memory
+
+        # Maintain a separate pool for each (shape, dtype) combination
+        # Structure: {(shape, dtype): {'free': deque, 'all': list, 'allocated_count': int}}
+        self._pools: Dict[Tuple, Dict[str, Any]] = {}
+
+        # Statistics
+        self._stats = {
+            'total_allocated': 0,  # Total number of tensors ever allocated
+            'current_in_use': 0,  # Number of tensors currently in use
+            'allocation_requests': 0,  # Number of allocation requests
+            'free_requests': 0,  # Number of free requests
+            'pool_hits': 0,  # Number of times a tensor was reused from pool
+            'pool_misses': 0,  # Number of times a new tensor was created
+        }
+
+        debug_rank("GPUTensorPool: Initialized with dynamic allocation")
+
+    def _get_pool_key(self, shape: Tuple, dtype: torch.dtype) -> Tuple:
+        """Generate a unique key for the pool based on shape and dtype."""
+        return (shape, dtype)
+
+    @staticmethod
+    def _calculate_memory_size(shape: Tuple, dtype: torch.dtype) -> int:
+        """Calculate memory size in bytes."""
+        element_size = torch.tensor([], dtype=dtype).element_size()
+        numel = 1
+        for dim in shape:
+            numel *= dim
+        return numel * element_size
+
+    def allocate(self, shape: Tuple, dtype: torch.dtype = torch.float32) -> torch.Tensor:
+        """
+        Allocate a tensor with the specified shape and dtype.
+
+        Args:
+            shape: Shape of the tensor
+            dtype: Data type of the tensor, default torch.float32
+
+        Returns:
+            Allocated tensor
+        """
+        self._stats['allocation_requests'] += 1
+
+        pool_key = self._get_pool_key(shape, dtype)
+
+        # Create pool for this (shape, dtype) if it doesn't exist
+        if pool_key not in self._pools:
+            self._pools[pool_key] = {
+                'free': deque(),  # Queue of available tensors
+                'all': [],  # List of all tensors (for tracking)
+                'allocated_count': 0,  # Number of allocated tensors
+            }
+
+        pool = self._pools[pool_key]
+
+        # Try to reuse a tensor from the pool
+        if len(pool['free']) > 0:
+            tensor = pool['free'].popleft()
+            self._stats['pool_hits'] += 1
+            debug_rank(
+                f"GPUTensorPool.allocate: Reused tensor from pool, "
+                f"shape={shape}, dtype={dtype}, "
+                f"remaining in pool={len(pool['free'])}"
+            )
+        else:
+            # Allocate a new tensor
+            tensor = torch.empty(shape, dtype=dtype, device=self.device, pin_memory=self.pin_memory)
+            pool['all'].append(tensor)
+            self._stats['total_allocated'] += 1
+            self._stats['pool_misses'] += 1
+
+            memory_mb = self._calculate_memory_size(shape, dtype) / (1024**2)
+            debug_rank(
+                f"GPUTensorPool.allocate: Created new tensor, "
+                f"shape={shape}, dtype={dtype}, "
+                f"memory={memory_mb:.2f} MB, "
+                f"total_created={len(pool['all'])}"
+            )
+
+        pool['allocated_count'] += 1
+        self._stats['current_in_use'] += 1
+
+        return tensor
+
+    def free(self, tensor: torch.Tensor):
+        """
+        Return a tensor to the pool for reuse.
+
+        Args:
+            tensor: Tensor to free
+
+        Raises:
+            ValueError: If tensor doesn't belong to this pool
+        """
+        self._stats['free_requests'] += 1
+
+        shape = tensor.shape
+        dtype = tensor.dtype
+
+        pool_key = self._get_pool_key(shape, dtype)
+
+        if pool_key not in self._pools:
+            raise ValueError(
+                f"No pool exists for shape={shape}, dtype={dtype}. "
+                f"Available pools: {list(self._pools.keys())}"
+            )
+
+        pool = self._pools[pool_key]
+
+        # Verify tensor belongs to this pool (use identity check, not value comparison)
+        tensor_found = any(tensor is t for t in pool['all'])
+        if not tensor_found:
+            raise ValueError(
+                f"Attempting to free a tensor that doesn't belong to this pool "
+                f"(shape={shape}, dtype={dtype})"
+            )
+
+        # Return tensor to the free queue
+        pool['free'].append(tensor)
+        pool['allocated_count'] -= 1
+        self._stats['current_in_use'] -= 1
+
+        debug_rank(
+            f"GPUTensorPool.free: shape={shape}, dtype={dtype}, "
+            f"available in pool={len(pool['free'])}"
+        )
+
+    def get_pool_status(self, shape: Tuple = None, dtype: torch.dtype = None) -> Dict[str, Any]:
+        """
+        Get the status of the memory pool.
+
+        Args:
+            shape: If specified along with dtype, return status for that specific pool
+            dtype: Data type (required if shape is specified)
+
+        Returns:
+            Dictionary containing status information
+        """
+        if shape is not None:
+            if dtype is None:
+                raise ValueError("dtype must be specified when shape is provided")
+
+            pool_key = self._get_pool_key(shape, dtype)
+
+            if pool_key not in self._pools:
+                raise ValueError(f"No pool exists for shape={shape}, dtype={dtype}")
+
+            pool = self._pools[pool_key]
+            total_count = len(pool['all'])
+
+            return {
+                'shape': shape,
+                'dtype': dtype,
+                'total_count': total_count,
+                'allocated_count': pool['allocated_count'],
+                'free_count': len(pool['free']),
+                'utilization': (
+                    pool['allocated_count'] / total_count * 100 if total_count > 0 else 0
+                ),
+            }
+        else:
+            # Return status for all pools
+            status = {'global_stats': self._stats.copy(), 'pools': {}}
+
+            for pool_key in self._pools:
+                shape, dtype = pool_key
+                status['pools'][pool_key] = self.get_pool_status(shape, dtype)
+
+            return status
+
+    def reset(self):
+        """Reset the pool, marking all tensors as available."""
+        debug_rank("GPUTensorPool: Resetting pool...")
+
+        for pool_key, pool in self._pools.items():
+            # Clear and refill the free queue
+            pool['free'].clear()
+            for tensor in pool['all']:
+                pool['free'].append(tensor)
+            pool['allocated_count'] = 0
+
+        self._stats['current_in_use'] = 0
+        debug_rank("GPUTensorPool: Reset complete")
+
+    def clear(self):
+        """Clear the pool and release all GPU memory."""
+        debug_rank("GPUTensorPool: Clearing pool...")
+
+        for pool_key, pool in self._pools.items():
+            # Clear all references, allowing PyTorch GC to reclaim memory
+            pool['free'].clear()
+            pool['all'].clear()
+
+        self._pools.clear()
+        self._stats['current_in_use'] = 0
+
+        # Trigger GPU cache cleanup
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+
+        debug_rank("GPUTensorPool: Clear complete")
+
+    def __del__(self):
+        """Destructor to ensure resources are released."""
+        self.clear()
+
+
+class OffloadTensorGroup:
+    """
+    A group of tensors to be offloaded together.
+    """
+
+    def __init__(self, name):
+        self._name = name
+        self._tensors = {}
+        self._offload_event = torch.cuda.Event()
+        self._reload_event = torch.cuda.Event()
+        self.offload = True
+        self.total_offload_bytes = 0
+        self.total_tensor_count = 0
+        # Using memory pool is for the compatibility with cuda graph.
+        # Shapes of tensors for expert_fc1 and moe_act are not known in advance,
+        # so we do not use CPU pool for them.
+        if name == "expert_fc1" or name == "moe_act":
+            self.use_cpu_pool = False
+        else:
+            self.use_cpu_pool = True
+
+    def push_tensor(self, tag, tensor):
+        """Push a tensor to the group."""
+        self._tensors[tag] = tensor
+
+    def pop_tensor(self, tag):
+        """Pop a tensor from the group."""
+        return self._tensors.pop(tag)
+
+    def record_offload_event(self, stream):
+        """Record the offload event."""
+        self._offload_event.record(stream)
+
+    def wait_offload_event(self, stream):
+        """Wait for the offload event."""
+        stream.wait_event(self._offload_event)
+
+    def record_reload_event(self, stream):
+        """Record the reload event."""
+        self._reload_event.record(stream)
+
+    def wait_reload_event(self, stream):
+        """Wait for the reload event."""
+        stream.wait_event(self._reload_event)
+
+    def update_offload_info(self, tensor):
+        """Update the offload information."""
+        self.total_offload_bytes += tensor.numel() * tensor.element_size()
+        self.total_tensor_count += 1
+
+
 class PipelineOffloadManager:
     """
     Singleton manager for coordinating activation offloading across pipeline stages.
@@ -39,6 +395,12 @@ def get_instance(cls):
             cls.OFFLOAD_MGR = PipelineOffloadManager()
         return cls.OFFLOAD_MGR
 
+    @classmethod
+    def reset_instance(cls):
+        """Reset the singleton instance of PipelineOffloadManager."""
+        cls.OFFLOAD_MGR = None
+        cls.OFFLOAD_MGR = PipelineOffloadManager()
+
     def __init__(self):
         """Initialize the manager with queues and dedicated CUDA streams."""
         # Queue to store chunk handlers for backward pass
@@ -48,6 +410,27 @@ def __init__(self):
         # allocate streams and events for synchronization
         self._d2h_stream = torch.cuda.Stream()
         self._h2d_stream = torch.cuda.Stream()
+        # Shared CPU tensor pool for all chunks to improve reuse efficiency
+        self._cpu_tensor_pool = GPUTensorPool(device="cpu", pin_memory=True)
+
+        # Whether the manager is in warmup phase.
+        self._is_warmup = True
+        # Cache OffloadChunkHandler objects for each virtual pipeline stage and each forward pass.
+        self._cached_chunks_forward = []
+        # Cache OffloadChunkHandler objects for each virtual pipeline stage and each backward pass.
+        self._cached_chunks_backward = []
+        # Index of the current backward chunk in the cached chunks backward.
+        self._cached_chunks_index_backward = 0
+        # Index of the current forward chunk in the cached chunks forward.
+        self._cached_chunks_index_forward = 0
+
+        self.do_offload = True
+
+        # Do not offload the last X groups so that the reloading won't block the computing stream.
+        self._offload_margin = 0
+        # Sometimes we need to delay the offloading and launch it later.
+        # The delayed offload groups are stored in a queue.
+        self._delayed_offload_groups = []
         self.reset()
 
     @property
@@ -60,14 +443,52 @@ def h2d_stream(self):
         """Get the host-to-device (CPU to GPU) transfer stream."""
         return self._h2d_stream
 
+    @property
+    def cpu_tensor_pool(self):
+        """Get the shared CPU tensor pool."""
+        return self._cpu_tensor_pool
+
+    def push_offload_groups(self, group_hook, forced_released_tensors):
+        """Push the offload groups to the delayed queue."""
+        debug_rank(f"pushing offload groups to the delayed queue")
+        self._delayed_offload_groups.append((group_hook, forced_released_tensors))
+
+    def flush_delayed_groups(self):
+        """Flush the delayed groups."""
+        debug_rank("flushing delayed groups")
+        # Flush the delayed groups in reverse order to maintain the order of the groups.
+        for group_hook, forced_released_tensors in reversed(self._delayed_offload_groups):
+            group_hook(forced_released_tensors)
+        self._delayed_offload_groups = []
+
     def reset(self):
         """Reset manager state for a new training iteration."""
-        set_ideal_affinity_for_current_gpu()
         self._inside_context = False
         self._cur_forward_chunk = None
         self._cur_backward_chunk = None
-        # Track the first microbatch of the last virtual pipeline stage
-        self._is_first_last_vpp_chunk = True
+        # Reset CPU tensor pool to reuse all CPU tensors for next iteration
+        if hasattr(self, '_cpu_tensor_pool'):
+            self._cpu_tensor_pool.reset()
+
+        # Call post_warmup_callback after warmup to collect the offload information.
+        if self._is_warmup and len(self._cached_chunks_forward) > 0:
+            self.post_warmup_callback()
+        self._cached_chunks_index_backward = 0
+        self._cached_chunks_index_forward = 0
+
+        for chunk in self._cached_chunks_forward:
+            chunk.reset()
+        self._delayed_offload_groups = []
+
+    @property
+    def offload_summary_bytes(self) -> Dict[str, int]:
+        """Offload summary bytes per group collected after warmup."""
+        return self._offload_summary_bytes
+
+    @property
+    def offload_summary_total_bytes(self) -> int:
+        """Total offloaded bytes collected after warmup."""
+        return self._offload_summary_total_bytes
 
     def flush(self):
         """Flush all staged chunks to the backward queue in reverse order."""
@@ -84,33 +505,107 @@ def flush(self):
             for i in range(self._vpp):
                 self._stages[i] = []
 
+    def disable_offload(self):
+        """Disable the offload."""
+        debug_rank("disable_offload")
+        self.do_offload = False
+        for chunk in self._cached_chunks_forward:
+            chunk.do_offload = False
+
+    def enable_offload(self):
+        """Enable the offload."""
+        debug_rank("enable_offload")
+        self.do_offload = True
+        for chunk in self._cached_chunks_forward:
+            chunk.do_offload = True
+
+    def post_warmup_callback(self):
+        """Callback after warmup."""
+        # pylint: disable=bad-builtin
+        debug_rank("post_warmup_callback")
+        self._is_warmup = False
+        assert len(self._cached_chunks_forward) == len(
+            self._cached_chunks_backward
+        ), "Cached chunks forward and backward must have the same length"
+        for chunk in self._cached_chunks_forward:
+            chunk.is_warmup = False
+            assert (
+                chunk in self._cached_chunks_backward
+            ), "Chunk not found in cached chunks backward"
+            # Update the offload margin to the maximum number of deduplicated groups
+            self._offload_margin = max(self._offload_margin, chunk.get_max_deduplicated_groups())
+            debug_rank(f"offload margin {self._offload_margin}")
+        # Find the last group with the same name in the cached chunks backward
+        last_group_with_same_name = {}
+        for chunk_idx, chunk in enumerate(reversed(self._cached_chunks_backward)):
+            for group in chunk.offload_groups:
+                last_group_with_same_name[group._name] = group
+        # Mark the last group with the same name as not offloadable to make sure
+        # the reloading won't block the main stream.
+        for name, group in last_group_with_same_name.items():
+            if self._offload_margin > 0:
+                group.offload = False
+                self._offload_margin -= 1
+                debug_rank(f"setting offload to false for group {name} at chunk index {chunk_idx}")
+            else:
+                break
+        debug_rank(f"offload margin {self._offload_margin}")
+        assert self._offload_margin == 0, "Offload margin is not 0"
+        # Dump the offload information
+        total_tensor_count = {}
+        total_offload_bytes = {}
+        for chunk in self._cached_chunks_forward:
+            for group in chunk.offload_groups:
+                if group.offload:
+                    if group._name not in total_tensor_count:
+                        total_tensor_count[group._name] = 0
+                    total_tensor_count[group._name] += group.total_tensor_count
+                    if group._name not in total_offload_bytes:
+                        total_offload_bytes[group._name] = 0
+                    total_offload_bytes[group._name] += group.total_offload_bytes
+            # Stop statistics at the first backward chunk after which 1F1B is running,
+            # where the memory cost will not increase anymore.
+            if chunk is self._cached_chunks_backward[0]:
+                break
+        # Cache summary for downstream consumers (e.g., unit tests).
+        self._offload_summary_bytes = dict(total_offload_bytes)
+        self._offload_summary_total_bytes = int(sum(total_offload_bytes.values()))
+        print_offload_summary_table(total_offload_bytes)
+
     def push(self, handler):
         """Add a chunk handler to the backward queue."""
         debug_rank(f"pushing handler {handler}")
         self._queue.append(handler)
+        if self._is_warmup:
+            self._cached_chunks_backward.append(handler)
 
-    def pop(self):
-        """Remove and set the next non-empty chunk as the current backward chunk."""
-        assert self.size(), "Cannot pop from empty queue"
-        while self._queue:
-            self._cur_backward_chunk = self._queue.popleft()
-            if not self._cur_backward_chunk.is_empty_chunk():
+    def pop_backward_chunk(self, name=None):
+        """Get the next non-empty backward chunk containing the group with the given name."""
+        self._cur_backward_chunk = None
+        debug_rank(f"popping backward chunk {self._cached_chunks_index_backward}")
+        debug_rank(f"cached chunks backward {self._cached_chunks_backward}")
+        for idx, handler in enumerate(
+            self._cached_chunks_backward[self._cached_chunks_index_backward :]
+        ):
+            self._cached_chunks_index_backward += 1
+            if not handler.is_empty_chunk(name):
+                self._cur_backward_chunk = (
+                    handler  # set the first non-empty chunk as the current backward chunk
+                )
+                debug_rank(f"handler {handler} at index {idx} is not empty")
                 break
-        debug_rank(f"popping handler {self._cur_backward_chunk}")
-
-    def front(self):
-        """Get the first non-empty chunk handler without removing it from the queue."""
-        if not self.size():
-            return None
-        for chunk_handler in self._queue:
-            if not chunk_handler.is_empty_chunk():
-                return chunk_handler
+        assert self._cur_backward_chunk is not None, "No non-empty chunk found"
+
+    def front_backward_chunk(self, name=None):
+        """Get the first non-empty backward chunk containing the group with the given name."""
+        for idx, handler in enumerate(
+            self._cached_chunks_backward[self._cached_chunks_index_backward :]
+        ):
+            if not handler.is_empty_chunk(name):
+                debug_rank(f"front handler {handler} at index {idx}")
+                return handler
         return None
 
-    def size(self):
-        """Return the number of chunk handlers in the queue."""
-        return len(self._queue)
-
     def init_model_chunk_offload_handler(
         self, vp_size, vp_stage, min_offloaded_tensor_size=1024 * 1024
     ):
@@ -122,8 +617,11 @@ def init_model_chunk_offload_handler(
             vp_stage: Virtual pipeline stage index (None means stage 0)
             min_offloaded_tensor_size: Minimum tensor size (in elements) to offload
         """
+        if not self._is_warmup:
+            return
+
+        vp_size = 1 if vp_size is None else vp_size
         if self._stages is None:
-            vp_size = 1 if vp_size is None else vp_size
             self._vpp = vp_size
             self._stages = [[] for _ in range(vp_size)]
 
@@ -132,26 +630,34 @@ def init_model_chunk_offload_handler(
         else:
             cur_vpp_rank = vp_stage
 
-        is_first_last_vpp_chunk = self._is_first_last_vpp_chunk
         # Flush staged chunks when reaching the last virtual pipeline stage
         if cur_vpp_rank == self._vpp - 1:
             self.flush()
-        # Determine if this is the first microbatch of the last virtual pipeline stage
-        is_first_last_vpp_chunk = is_first_last_vpp_chunk and (cur_vpp_rank == self._vpp - 1)
 
-        cur_chunk = ChunkOffloadHandler(is_first_last_vpp_chunk, min_offloaded_tensor_size)
+        # Use shared CPU tensor pool for better reuse across chunks
+        cur_chunk = ChunkOffloadHandler(min_offloaded_tensor_size, self._cpu_tensor_pool)
+        debug_rank(f"init_model_chunk_offload_handler {cur_chunk}")
         self._stages[cur_vpp_rank].append(cur_chunk)
         # For the last stage, push immediately and flush
         if cur_vpp_rank == self._vpp - 1:
-            self._is_first_last_vpp_chunk = False
             self.push(cur_chunk)
             self.flush()
         self._cur_forward_chunk = cur_chunk
         cur_chunk.vpp_rank = cur_vpp_rank
-
-    def set_last_layer(self, is_last_layer):
-        """Mark whether the current forward chunk is processing the last layer."""
-        self._cur_forward_chunk.is_last_layer = is_last_layer
+        self._cached_chunks_forward.append(cur_chunk)
+
+    def pop_forward_chunk(self, name=None):
+        """Get the next forward pass chunk handler."""
+        debug_rank(f"pop_forward_chunk {self._cur_forward_chunk}")
+        if not self.do_offload:
+            return self._cur_forward_chunk
+        while not self._is_warmup and (
+            self._cur_forward_chunk is None or self._cur_forward_chunk.finish_all_groups(name)
+        ):
+            self._cur_forward_chunk = self._cached_chunks_forward[self._cached_chunks_index_forward]
+            self._cached_chunks_index_forward += 1
+            debug_rank(f"new cur_forward_chunk {self._cur_forward_chunk}")
+        return self._cur_forward_chunk
 
     def cur_forward_chunk(self):
         """Get the current forward pass chunk handler."""
@@ -161,9 +667,16 @@ def cur_backward_chunk(self):
         """Get the current backward pass chunk handler."""
         return self._cur_backward_chunk
 
+    def mark_not_offloadable(self, tensor: torch.Tensor):
+        """Mark the current forward chunk as not offloadable."""
+        if tensor is not None:
+            tensor.offloading_activation = False
+
     def __enter__(self):
         """Enter context manager to enable activation offloading hooks."""
         debug_rank("----__enter__")
+        if self._cur_forward_chunk is None or not self.cur_forward_chunk().do_offload:
+            return
         from megatron.core.extensions.transformer_engine import cpu_offload
 
         if cpu_offload is not None:
@@ -179,6 +692,8 @@ def __enter__(self):
     def __exit__(self, *args: Any):
         """Exit context manager and restore original tensor saving behavior."""
         debug_rank("----__exit__")
+        if self._cur_forward_chunk is None or not self.cur_forward_chunk().do_offload:
+            return
         from megatron.core.extensions.transformer_engine import cpu_offload
 
         if cpu_offload is not None:
@@ -212,69 +727,103 @@ class ChunkOffloadHandler:
     Manages tensor groups, coordinates asynchronous GPU-CPU transfers, and handles synchronization.
     """
 
-    @staticmethod
-    def offload(src_tensor, pin_memory=True):
+    def offload(self, src_tensor, pin_memory=True, use_cpu_pool=True):
         """Offload."""
         debug_rank("--------offload")
 
         if not src_tensor.is_contiguous():
             src_tensor = src_tensor.contiguous()
 
-        cpu_backup = torch.empty(
-            src_tensor.size(),
-            dtype=src_tensor.dtype,
-            layout=src_tensor.layout,
-            device="cpu",
-            pin_memory=pin_memory,
-        )
+        if use_cpu_pool:
+            cpu_backup = self.cpu_tensor_pool.allocate(src_tensor.shape, dtype=src_tensor.dtype)
+        else:
+            cpu_backup = torch.empty(
+                src_tensor.shape, dtype=src_tensor.dtype, device="cpu", pin_memory=pin_memory
+            )
 
         cpu_backup.copy_(src_tensor, non_blocking=pin_memory)
-        state = (src_tensor.device, cpu_backup)
+        state = (src_tensor.device, cpu_backup, use_cpu_pool)
         return state
 
-    @staticmethod
-    def reload(state, non_blocking=None):
+    def reload(self, state, non_blocking=None):
         """Reload."""
         debug_rank("------reload")
-        dev, cpu_backup = state
+        dev, cpu_backup, use_cpu_pool = state
         if non_blocking is None:
             non_blocking = cpu_backup.is_pinned()
-        return cpu_backup.to(dev, non_blocking=non_blocking)
+        gpu_tensor = torch.empty(
+            cpu_backup.size(), dtype=cpu_backup.dtype, layout=cpu_backup.layout, device=dev
+        )
+        gpu_tensor.copy_(cpu_backup, non_blocking=non_blocking)
+        if use_cpu_pool:
+            self.cpu_tensor_pool.free(cpu_backup)
+        return gpu_tensor
 
-    def __init__(self, is_first_last_vpp_chunk, min_offloaded_tensor_size):
-        # Data Structure to maintain reference to activation tensors
-        self._tensor_tag_to_state = {}
-        # Mark the first microbatch of the last virtual pipeline stage
-        self._is_first_last_vpp_chunk = is_first_last_vpp_chunk
+    def __init__(self, min_offloaded_tensor_size, cpu_tensor_pool):
+        self.do_offload = True
 
         # Group management for batching offload/reload operations
+        self.offload_groups = []
         self._offloaded_group_index = 0
+        # Groups to be offloaded.
         self._groups_to_offload = []
+        # Groups to be reloaded.
         self._groups_to_reload = []
+        # Tensor count for the current group.
         self._tensor_count_current_group = 0
-
+        # Maximum number of groups to offload or reload.
+        self._max_group_size = 0
+        # Groups being reloaded.
+        self._reloading_group = []
         # Counter for special torch tensor types (FakeTensor, FunctionalTensor)
         self.torch_tensor_count = 0
         self.d2h_stream = PipelineOffloadManager.get_instance().d2h_stream
         self.h2d_stream = PipelineOffloadManager.get_instance().h2d_stream
-        self._offload_events = {}
-        self._reload_events = {}
         self.min_offloaded_tensor_size = min_offloaded_tensor_size
-        self.is_last_layer = False
+        self.cpu_tensor_pool = cpu_tensor_pool
+        self.is_warmup = True
+
+    def reset(self):
+        """Reset the chunk offload handler."""
+        self._offloaded_group_index = 0
+        self._groups_to_offload = []
+        self._groups_to_reload = []
+        self._tensor_count_current_group = 0
+        self._reloading_group = []
 
-    def is_empty_chunk(self):
+    def find_group_with_name(self, name: str, start_index: int = 0):
+        """Find the group with the given name starting from the given index."""
+        return next(
+            (group for group in self.offload_groups[start_index:] if group._name == name), None
+        )
+
+    def is_empty_chunk(self, name=None):
         """Check if this chunk has no tensors to manage."""
-        return len(self._tensor_tag_to_state) == 0
+        debug_rank(f"------is_empty_chunk {self._max_group_size}")
+        if name is not None:
+            return self.find_group_with_name(name) is None
+        return self._max_group_size == 0
 
-    def is_first_last_layer(self):
-        """
-        Check if this is the last layer of the first microbatch of the last vp stage.
-        These tensors should not be offloaded to avoid unnecessary overhead.
-        """
+    def finish_all_groups(self, name=None) -> bool:
+        """Finish all groups."""
         debug_rank(
-            f"------is_first_last_layer {self._is_first_last_vpp_chunk} {self.is_last_layer}"
+            f"------finish_all_groups {self} {self._max_group_size} {self._offloaded_group_index}"
         )
-        return self._is_first_last_vpp_chunk and self.is_last_layer
+        # TODO: check if this is correct
+        # Mark it as finished when there are no groups to offload or reload
+        if (
+            len(self._groups_to_reload) == 0
+            and len(self._groups_to_offload) == 0
+            and self._offloaded_group_index > 0
+        ):
+            return True
+        assert name is not None, "Name is required"
+        return self.find_group_with_name(name, self._offloaded_group_index) is None
+
+    def find_next_group(self, name=None):
+        """Find the next group with the given name."""
+        assert name is not None, "Name is required"
+        return self.find_group_with_name(name, self._offloaded_group_index)
 
     def tensor_push(self, tensor):
         """Push tensor to the offload handler."""
@@ -285,26 +834,20 @@ def tensor_push(self, tensor):
                 torch._subclasses.functional_tensor.FunctionalTensor,
             ),
         )
+        assert not torch_stray_tensor, "Stray tensor should not be offloaded"
 
-        if not torch_stray_tensor:
-            # Assign unique tag based on group index and position within group
-            tensor_tag = (self._offloaded_group_index, self._tensor_count_current_group)
-            self._tensor_count_current_group += 1
-            assert tensor_tag not in self._tensor_tag_to_state, "Duplicate tensor tag"
-            self._tensor_tag_to_state[tensor_tag] = tensor
-        else:
-            # Use negative group ID for special tensor types
-            tensor_tag = (-1, self.torch_tensor_count)
-            self.torch_tensor_count += 1
-            self._tensor_tag_to_state[tensor_tag] = tensor
+        # Assign unique tag based on group index and position within group
+        tensor_tag = (self._offloaded_group_index, self._tensor_count_current_group)
+        self._tensor_count_current_group += 1
+        self.offload_groups[self._offloaded_group_index - 1].push_tensor(tensor_tag, tensor)
         debug_rank(f"--------tensor_push {tensor_tag}")
         return tensor_tag
 
     def tensor_pop(self, tensor_tag):
         """Pop tensor from the offload handler."""
         debug_rank(f"--------tensor_pop {tensor_tag}")
-        assert tensor_tag in self._tensor_tag_to_state, f"Tag {tensor_tag} not found"
-        tensor = self._tensor_tag_to_state.pop(tensor_tag)
+        group_id, idx = tensor_tag
+        tensor = self.offload_groups[group_id - 1].pop_tensor(tensor_tag)
         # If tensor is offloaded (stored as tuple), reload it
         if isinstance(tensor, tuple):
             tensor = self.reload(tensor)
@@ -313,6 +856,9 @@ def tensor_pop(self, tensor_tag):
 
     def tensor_need_offloading_checker(self, tensor):
         """Check if the tensor needs to be offloaded."""
+        debug_rank(
+            f"tensor_need_offloading_checker {getattr(tensor, 'offloading_activation', None)}"
+        )
         if tensor.numel() < self.min_offloaded_tensor_size:
             return False
         # Respect tensor's offload preference if specified
@@ -320,83 +866,82 @@ def tensor_need_offloading_checker(self, tensor):
             return False
         return True
 
-    def bulk_offload_group(self, group_to_offload):
+    def bulk_offload_group(self):
         """offload a group of tensors recorded in tensor_push()."""
         debug_rank("------bulk_offload_group")
-        assert not self.is_first_last_layer(), "Should not offload first-last layer"
-        group_id_to_offload, name = group_to_offload
-        torch.cuda.nvtx.range_push("activation offloading " + name)
+        group_to_offload = self._groups_to_offload[-1]
+        torch.cuda.nvtx.range_push("activation offloading " + group_to_offload._name)
         with torch.cuda.stream(self.d2h_stream):
-            for tensor_tag, state in self._tensor_tag_to_state.items():
-                group_id, _ = tensor_tag
-                if group_id == group_id_to_offload:
-                    debug_rank(f"------tensor_tag {tensor_tag}")
-                    debug_rank(f"------group_to_offload {group_to_offload}")
-                    assert not isinstance(state, tuple), "Tensor already offloaded"
-                    tensor_on_device = state
-                    if self.tensor_need_offloading_checker(tensor_on_device):
-                        state = self.offload(tensor_on_device)
-                        event = torch.cuda.Event()
-                        event.record(self.d2h_stream)
-                        self._offload_events[name] = event
-                        tensor_on_device.record_stream(self.d2h_stream)
-                        self._tensor_tag_to_state[tensor_tag] = state
+            for tensor_tag, tensor_on_device in group_to_offload._tensors.items():
+                if self.tensor_need_offloading_checker(tensor_on_device):
+                    state = self.offload(
+                        tensor_on_device, use_cpu_pool=group_to_offload.use_cpu_pool
+                    )
+                    if self.is_warmup:
+                        group_to_offload.update_offload_info(tensor_on_device)
+                    tensor_on_device.record_stream(self.d2h_stream)
+                    group_to_offload.push_tensor(tensor_tag, state)
+            group_to_offload.record_offload_event(self.d2h_stream)
+        self._groups_to_offload.pop()
         torch.cuda.nvtx.range_pop()
 
-    def get_offload_event(self, name):
-        """Get the CUDA event for a named offload operation."""
-        return self._offload_events.get(name, None)
-
-    def get_reload_event(self, name):
-        """Get the CUDA event for a named reload operation."""
-        return self._reload_events.get(name, None)
+    def get_max_deduplicated_groups(self):
+        """Get the maximum number of deduplicated groups."""
+        count_modules = []
+        for group in self.offload_groups:
+            if group._name not in count_modules:
+                count_modules.append(group._name)
+        return len(count_modules)
 
-    def bulk_reload_group(self, group_to_reload):
+    def bulk_reload_group(self):
         """Bulk reload group."""
         debug_rank("----bulk_reload_group")
-        found_reload_group = False
-        group_id_to_reload, name = group_to_reload
-        torch.cuda.nvtx.range_push("activation reloading " + name)
+        group_to_reload = self._groups_to_reload[-1]
+        torch.cuda.nvtx.range_push("activation reloading " + group_to_reload._name)
         with torch.cuda.stream(self.h2d_stream):
-            for tensor_label, state in self._tensor_tag_to_state.items():
-                group_id, _ = tensor_label
-                if group_id == group_id_to_reload:
-                    debug_rank(f"----tensor_label {tensor_label}")
-                    found_reload_group = True
-                    event = self.get_offload_event(name)
-                    # Only reload if tensor was offloaded (stored as tuple)
-                    if isinstance(state, tuple):
-                        # Wait for offload to complete before reloading
-                        torch.cuda.current_stream().wait_event(event)
-                        recovered_tensor = self.reload(state)
-                        event.record(self.h2d_stream)
-                        self._reload_events[name] = event
-                        debug_rank(f"----recovered_tensor {recovered_tensor.shape}")
-                        self._tensor_tag_to_state[tensor_label] = recovered_tensor
+            # Wait for offload to complete before reloading
+            if not is_graph_capturing():
+                group_to_reload.wait_offload_event(self.h2d_stream)
+            for tensor_tag, state in group_to_reload._tensors.items():
+                # Only reload if tensor was offloaded (stored as tuple)
+                if isinstance(state, tuple):
+                    recovered_tensor = self.reload(state)
+                    debug_rank(f"----recovered_tensor {recovered_tensor.shape}")
+                    group_to_reload.push_tensor(tensor_tag, recovered_tensor)
+            group_to_reload.record_reload_event(self.h2d_stream)
+        self._groups_to_reload.pop()
+        # Add the group to the reloading group to wait for the reload event.
+        self._reloading_group.append(group_to_reload)
         torch.cuda.nvtx.range_pop()
-        return found_reload_group
 
     def pre_reload_last_layer(self):
         """Pre-reload the last layer of this chunk to hide reload latency."""
         debug_rank("pre_reload_last_layer")
-        assert not self._is_first_last_vpp_chunk, "Should not pre-reload first chunk"
         debug_rank(f"len(self._groups_to_reload) {len(self._groups_to_reload)}")
         if len(self._groups_to_reload) > 0:
             # Reload the last group (last layer) early
-            if self.bulk_reload_group(self._groups_to_reload[-1]):
-                self._groups_to_reload.pop()
+            self.bulk_reload_group()
 
     def should_bulk_offload(self):
         """Determine if the current group should be offloaded."""
-        # Don't offload the first backward chunk's last layer
-        if self.is_first_last_layer():
+        assert len(self._groups_to_offload) > 0, "No groups to offload"
+        group = self._groups_to_offload[-1]
+        debug_rank(f"should_bulk_offload {self.is_warmup} {group.offload}")
+        # Don't offload if the chunk is not in warmup stage
+        if self.is_warmup:
+            return True
+        # Don't offload if the group is marked as not offloadable
+        if not group.offload:
             return False
 
         # Check if next backward chunk is this chunk (for last pipeline stage)
-        next_backward_chunk = PipelineOffloadManager.get_instance().front()
+        next_backward_chunk = PipelineOffloadManager.get_instance().front_backward_chunk(
+            group._name
+        )
         if next_backward_chunk is not None and next_backward_chunk is self:
-            # Don't offload last layer if it's about to be used immediately
-            if self.is_last_layer:
+            # Don't offload the last group with the same name if it's about to be used immediately
+            if self.find_next_group(group._name) is None:
+                debug_rank(f"next group {group._name} is not found")
                 return False
 
         return True
@@ -405,9 +950,8 @@ def bulk_offload(self, forced_released_tensors):
         """Offload a group of tensors and optionally release their GPU memory."""
         debug_rank("----bulk_offload")
         if self.should_bulk_offload():
-            group_to_offload = self._groups_to_offload.pop()
-            self._groups_to_reload.append(group_to_offload)
-            self.bulk_offload_group(group_to_offload)
+            self._groups_to_reload.append(self._groups_to_offload[-1])
+            self.bulk_offload_group()
             # Manually release tensors not auto-freed by torch GC
             if len(forced_released_tensors) > 0:
                 cur_stream = torch.cuda.current_stream()
@@ -419,6 +963,8 @@ def bulk_offload(self, forced_released_tensors):
 
     def on_group_commit_forward(self, forced_released_tensors):
         """Called at the end of a layer group's forward pass to trigger offloading."""
+        if not self.do_offload:
+            return
         debug_rank("--on_group_commit_forward")
         # Wait for compute to finish before starting offload
         self.d2h_stream.wait_stream(torch.cuda.current_stream())
@@ -429,13 +975,16 @@ def bulk_reload(self):
         debug_rank("--bulk_reload")
         if len(self._groups_to_reload) > 0:
             # Reload the next layer group
-            if self.bulk_reload_group(self._groups_to_reload[-1]):
-                debug_rank(f"--bulk_reload_group {self._groups_to_reload}")
-                self._groups_to_reload.pop()
+            self.bulk_reload_group()
         else:
             # Pre-load the last layer of the next backward chunk to hide latency
-            next_backward_chunk = PipelineOffloadManager.get_instance().front()
-            if next_backward_chunk is not None:
+            next_backward_chunk = PipelineOffloadManager.get_instance().front_backward_chunk()
+            # Don't pre-reload the last layer if the next backward chunk hasn't finished fprop yet.
+            if (
+                next_backward_chunk is not None
+                and next_backward_chunk._offloaded_group_index
+                == next_backward_chunk._max_group_size
+            ):
                 next_backward_chunk.pre_reload_last_layer()
 
     def on_group_commit_backward(self, name):
@@ -443,40 +992,70 @@ def on_group_commit_backward(self, name):
         Called at the end of a layer group's backward pass.
         Ensures correct chunk is active and synchronizes reloads.
         """
+        if not self.do_offload:
+            return
         debug_rank("--on_group_commit_backward")
         cur_backward_chunk = PipelineOffloadManager.get_instance().cur_backward_chunk()
         # Switch to this chunk if it's not already current
         if cur_backward_chunk is not self:
-            PipelineOffloadManager.get_instance().pop()
+            PipelineOffloadManager.get_instance().pop_backward_chunk(name)
         cur_backward_chunk = PipelineOffloadManager.get_instance().cur_backward_chunk()
-        assert cur_backward_chunk is self, "Chunk mismatch"
+        assert cur_backward_chunk is self, f"Chunk mismatch {cur_backward_chunk} {self}"
         # Wait for reload to complete before using tensors
-        event = self.get_reload_event(name)
-        if event is not None:
-            torch.cuda.current_stream().wait_event(event)
-        self._offloaded_group_index = self._offloaded_group_index - 1
+        if not is_graph_capturing() and len(self._reloading_group) > 0:
+            for reloading_group in self._reloading_group:
+                if reloading_group._name == name:
+                    reloading_group.wait_reload_event(torch.cuda.current_stream())
+                    self._reloading_group.remove(reloading_group)
+                    break
 
     def on_group_start_forward(self, name):
         """
         Called at the start of a layer group's forward pass.
         Increments group index and prepares for offloading.
         """
-        debug_rank(f"--on_group_start_forward")
+        if not self.do_offload:
+            return
+        debug_rank(f"--on_group_start_forward {name}")
         self._offloaded_group_index = self._offloaded_group_index + 1
+        if self.is_warmup:
+            self.offload_groups.append(OffloadTensorGroup(name))
+            self._max_group_size = max(self._max_group_size, self._offloaded_group_index)
+            debug_rank(f"max group size {self._max_group_size}")
+        else:
+            for group in self.offload_groups[self._offloaded_group_index - 1 :]:
+                if group._name == name:
+                    break
+                self._offloaded_group_index = self._offloaded_group_index + 1
         self._tensor_count_current_group = 0
-        self._groups_to_offload.append((self._offloaded_group_index, name))
+        self._groups_to_offload.append(self.offload_groups[self._offloaded_group_index - 1])
+        debug_rank(f"groups to offload {self._groups_to_offload}")
 
     def on_group_start_backward(self):
         """
         Called at the start of a layer group's backward pass.
         Triggers reloading of tensors from CPU.
         """
-        debug_rank("--on_group_start_backward")
+        if not self.do_offload:
+            return
+        debug_rank(f"--on_group_start_backward {self}")
         # Wait for compute to finish before starting reload
         self.h2d_stream.wait_stream(torch.cuda.current_stream())
         self.bulk_reload()
 
 
+def fine_grained_offloading_disable_offload():
+    """Disable the offload."""
+    debug_rank("fine_grained_offloading_disable_offload")
+    PipelineOffloadManager.get_instance().disable_offload()
+
+
+def fine_grained_offloading_enable_offload():
+    """Enable the offload."""
+    debug_rank("fine_grained_offloading_enable_offload")
+    PipelineOffloadManager.get_instance().enable_offload()
+
+
 class FineGrainedOffloadingGroupCommitFunction(torch.autograd.Function):
     """
     Identity operation that marks the end of a layer group for offload synchronization.
@@ -484,19 +1063,18 @@ class FineGrainedOffloadingGroupCommitFunction(torch.autograd.Function):
     """
 
     @staticmethod
-    def forward(ctx, *args):
+    def forward(ctx, tensor, cur_forward_chunk, name, forced_released_tensors, delay_offload):
         # pylint: disable=missing-function-docstring
         debug_rank("FineGrainedOffloadingGroupCommitFunction forward")
 
-        forced_released_tensors = args[-1]
-        name = args[-2]
-        cpu_offload_handler = args[-3]
-        tensor = args[:-3]
-        cpu_offload_handler.on_group_commit_forward(forced_released_tensors)
-        ctx.cpu_offload_handler = cpu_offload_handler
+        if delay_offload:
+            PipelineOffloadManager.get_instance().push_offload_groups(
+                cur_forward_chunk.on_group_commit_forward, forced_released_tensors
+            )
+        else:
+            cur_forward_chunk.on_group_commit_forward(forced_released_tensors)
+        ctx.cpu_offload_handler = cur_forward_chunk
         ctx.name = name
-
-        # return the identical tensor
         return tensor
 
     @staticmethod
@@ -506,19 +1084,49 @@ def backward(ctx, *grad_output):
 
         cpu_offload_handler = ctx.cpu_offload_handler
         cpu_offload_handler.on_group_commit_backward(ctx.name)
-        return grad_output + (None, None, None)
+        return grad_output + (None, None, None, None)
 
 
-def fine_grained_offloading_group_commit(*tensor, name, forced_released_tensors=[]):
+def fine_grained_offloading_group_commit(
+    tensor, name, forced_released_tensors=None, delay_offload=False
+):
     """
     Specify the tensors to be released after offloading.
     forced_released_tensors is a list of tensors to be released after offloading.
     The tensors will be untyped_storage().resize_(0) after offloading.
     Note: specify the tensors only when they are not automatically released by torch gc.
     """
+    # Be permissive: callers may pass a tuple/list of outputs (e.g., (q, k, v)).
+    # We only need to insert a single identity op into the autograd graph; applying
+    # it to the first tensor output is sufficient and keeps callers' code minimal.
+    if forced_released_tensors is None:
+        forced_released_tensors = []
+    if isinstance(tensor, tuple):
+        if len(tensor) == 0:
+            return tensor
+        committed0 = fine_grained_offloading_group_commit(
+            tensor[0],
+            name=name,
+            forced_released_tensors=forced_released_tensors,
+            delay_offload=delay_offload,
+        )
+        return (committed0,) + tensor[1:]
+    if isinstance(tensor, list):
+        if len(tensor) == 0:
+            return tensor
+        committed0 = fine_grained_offloading_group_commit(
+            tensor[0],
+            name=name,
+            forced_released_tensors=forced_released_tensors,
+            delay_offload=delay_offload,
+        )
+        return [committed0] + tensor[1:]
+
     cur_forward_chunk = PipelineOffloadManager.get_instance().cur_forward_chunk()
+    if cur_forward_chunk is None:
+        return tensor
     return FineGrainedOffloadingGroupCommitFunction.apply(
-        *tensor, cur_forward_chunk, name, forced_released_tensors
+        tensor, cur_forward_chunk, name, forced_released_tensors, delay_offload
     )
 
 
@@ -544,32 +1152,105 @@ def backward(ctx, grad_output):
         debug_rank("FineGrainedOffloadingGroupStartFunction backward")
         cpu_offload_handler = ctx.cpu_offload_handler
         cpu_offload_handler.on_group_start_backward()
-        return grad_output, None, None
+        return grad_output, None, None, None
 
 
 def fine_grained_offloading_group_start(tensor, name=None):
     """Mark the start of a layer group and prepare for offload/reload."""
-    cur_forward_chunk = PipelineOffloadManager.get_instance().cur_forward_chunk()
+    cur_forward_chunk = PipelineOffloadManager.get_instance().pop_forward_chunk(name=name)
+    if cur_forward_chunk is None:
+        return tensor
     return FineGrainedOffloadingGroupStartFunction.apply(tensor, cur_forward_chunk, name)
 
 
-def get_fine_grained_offloading_context(flag):
-    """Get the fine-grained offload context"""
-    return PipelineOffloadManager.get_instance() if flag else nullcontext()
+class FineGrainedOffloadingBackwardRecordFunction(torch.autograd.Function):
+    """
+    Identity operation that marks the end of a layer group for offload synchronization.
+    Triggers offload during forward and synchronizes reload during backward.
+    """
 
+    @staticmethod
+    def forward(ctx, tensor, event: torch.cuda.Event) -> torch.Tensor:
+        """Forward pass for cuda graph capture."""
+        ctx.event = event
+        return tensor
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        """Record the backward event and wait for the h2d stream on cuda graph stream."""
+        h2d_stream = PipelineOffloadManager.get_instance().h2d_stream
+        torch.cuda.current_stream().record_event(ctx.event)
+        torch.cuda.current_stream().wait_stream(h2d_stream)
+        return grad_output, None
 
-def fine_grained_offloading_set_last_layer(is_last_layer):
-    """Set the last layer flag."""
-    PipelineOffloadManager.get_instance().set_last_layer(is_last_layer)
 
+class FineGrainedActivationOffloadingInterface:
+    """Interface for fine-grained activation offloading."""
 
-def fine_grained_offloading_init_chunk_handler(vp_size, vp_stage, min_offloaded_tensor_size):
-    """Initialize the chunk handler, called at the start of a microbatch forward pass."""
-    PipelineOffloadManager.get_instance().init_model_chunk_offload_handler(
-        vp_size, vp_stage, min_offloaded_tensor_size
-    )
+    def __init__(self, offload: bool, tensor: torch.Tensor, name: str):
+        self.offload = offload
+        self.tensor = tensor
+        self.name = name
 
+    def __enter__(self):
+        """Enter context manager to enable activation offloading hooks."""
+        if self.offload:
+            self.tensor = fine_grained_offloading_group_start(self.tensor, self.name)
+            PipelineOffloadManager.get_instance().__enter__()
+        return self.tensor
 
-def fine_grained_offloading_reset():
-    """Reset the chunk handler, called at the start of a training iteration."""
-    PipelineOffloadManager.get_instance().reset()
+    def __exit__(self, *args: Any):
+        """Exit context manager to disable activation offloading hooks."""
+        if self.offload:
+            PipelineOffloadManager.get_instance().__exit__()
+
+    @staticmethod
+    def init_chunk_handler(vp_size, vp_stage, min_offloaded_tensor_size):
+        """Initialize the chunk handler, called at the start of a microbatch forward pass."""
+        PipelineOffloadManager.get_instance().init_model_chunk_offload_handler(
+            vp_size, vp_stage, min_offloaded_tensor_size
+        )
+
+    @staticmethod
+    def get_context(flag):
+        """Get the fine-grained offload context"""
+        return PipelineOffloadManager.get_instance() if flag else nullcontext()
+
+    @staticmethod
+    def group_commit(tensor, name, forced_released_tensors=None, delay_offload=False):
+        """Group commit the tensors."""
+        return fine_grained_offloading_group_commit(
+            tensor, name, forced_released_tensors, delay_offload
+        )
+
+    @staticmethod
+    def mark_not_offloadable(tensor: torch.Tensor):
+        """Mark the tensor as not offloadable."""
+        PipelineOffloadManager.get_instance().mark_not_offloadable(tensor)
+
+    @staticmethod
+    def forward_record(event: torch.cuda.Event) -> None:
+        """Record the forward event for cuda graph capture."""
+        d2h_stream = PipelineOffloadManager.get_instance().d2h_stream
+        torch.cuda.current_stream().record_event(event)
+        torch.cuda.current_stream().wait_stream(d2h_stream)
+
+    @staticmethod
+    def backward_record(tensor, event: torch.cuda.Event) -> torch.Tensor:
+        """Record the backward event for cuda graph capture."""
+        return FineGrainedOffloadingBackwardRecordFunction.apply(tensor, event)
+
+    @staticmethod
+    def reset():
+        """Reset the chunk handler."""
+        PipelineOffloadManager.get_instance().reset()
+
+    @staticmethod
+    def reset_instance():
+        """Reset the singleton instance."""
+        PipelineOffloadManager.reset_instance()
+
+    @staticmethod
+    def flush_delayed_groups():
+        """Flush the delayed groups."""
+        PipelineOffloadManager.get_instance().flush_delayed_groups()
diff --git a/megatron/core/pipeline_parallel/schedules.py b/megatron/core/pipeline_parallel/schedules.py
index 9dc79ed11f7..dadbd199ab7 100644
--- a/megatron/core/pipeline_parallel/schedules.py
+++ b/megatron/core/pipeline_parallel/schedules.py
@@ -11,7 +11,7 @@
 from megatron.core import parallel_state
 from megatron.core.enums import ModelType
 from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
-    fine_grained_offloading_reset,
+    FineGrainedActivationOffloadingInterface as off_interface,
 )
 from megatron.core.pipeline_parallel.p2p_communication import P2PCommunicator
 from megatron.core.pipeline_parallel.utils import (
@@ -581,9 +581,6 @@ def forward_backward_no_pipelining(
     if config.timers is not None:
         config.timers('forward-backward', log_level=1).start(barrier=config.barrier_with_L1_time)
 
-    if not forward_only and config.fine_grained_activation_offloading:
-        fine_grained_offloading_reset()
-
     no_sync_func = config.no_sync_func
     if no_sync_func is None:
         no_sync_func = contextlib.nullcontext
@@ -682,6 +679,9 @@ def forward_backward_no_pipelining(
             pg_collection=pg_collection,
         )
 
+    if not forward_only and config.fine_grained_activation_offloading:
+        off_interface.reset()
+
     if config.timers is not None:
         config.timers('forward-backward').stop()
 
@@ -1042,9 +1042,6 @@ def forward_backward_pipelining_with_interleaving(
         adjust_tensor_shapes_fn is None
     ), "adjust_tensor_shapes_fn is not supported for interleaved pipeline parallelism"
 
-    if not forward_only and config.fine_grained_activation_offloading:
-        fine_grained_offloading_reset()
-
     if config.overlap_p2p_comm and config.batch_p2p_comm:
         raise ValueError("Can not use both overlap_p2p_comm and batch_p2p_comm")
 
@@ -2049,6 +2046,8 @@ def pp_post_backward(input_tensor_grad, vp_stage=None):
             pg_collection=pg_collection,
         )
 
+    if not forward_only and config.fine_grained_activation_offloading:
+        off_interface.reset()
     # Restore config.grad_sync_func and config.param_sync_func.
     if forward_only:
         config.grad_sync_func, config.param_sync_func = grad_sync_func, param_sync_func
@@ -2190,9 +2189,6 @@ def forward_backward_pipelining_without_interleaving(
     if config.timers is not None:
         config.timers('forward-backward', log_level=1).start(barrier=config.barrier_with_L1_time)
 
-    if not forward_only and config.fine_grained_activation_offloading:
-        fine_grained_offloading_reset()
-
     # Disable async grad reductions
     no_sync_func = config.no_sync_func
     if no_sync_func is None:
@@ -2440,6 +2436,9 @@ def enable_grad_sync():
             pg_collection=pg_collection,
         )
 
+    if not forward_only and config.fine_grained_activation_offloading:
+        off_interface.reset()
+
     if config.timers is not None:
         config.timers('forward-backward').stop()
 
diff --git a/megatron/core/pipeline_parallel/utils.py b/megatron/core/pipeline_parallel/utils.py
index d38f6d702c0..bda6334fc4b 100644
--- a/megatron/core/pipeline_parallel/utils.py
+++ b/megatron/core/pipeline_parallel/utils.py
@@ -1,5 +1,6 @@
 # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
 
+import logging
 from abc import ABC, abstractmethod
 from contextlib import contextmanager
 from typing import Callable, Optional
@@ -7,7 +8,9 @@
 import torch
 from torch.autograd import Variable
 
-from megatron.core.utils import get_pg_rank, get_pg_size, make_viewless_tensor
+from megatron.core.utils import get_pg_rank, get_pg_size, log_single_rank, make_viewless_tensor
+
+logger = logging.getLogger(__name__)
 
 
 def is_pp_first_stage(pp_group: torch.distributed.ProcessGroup):
@@ -87,19 +90,13 @@ def set_ideal_affinity_for_current_gpu():
     try:
         import cuda.bindings.driver as cuda_driver
         import cuda.bindings.runtime as cuda_runtime
-    except ImportError:
+    except:
         try:
             import cuda.cuda as cuda_driver
             import cuda.cudart as cuda_runtime
-        except ImportError:
-            # print("cuda-python may not be installed, skipping GPU affinity setting")
-            warnings.warn("cuda-python may not be installed, skipping GPU affinity setting")
-            return
-    try:
-        import pynvml
-    except ImportError:
-        warnings.warn("pynvml is not installed, skipping GPU affinity setting")
-        return
+        except:
+            raise RuntimeError("Please install cuda-python to enable GPU affinity setting")
+    import pynvml
 
     # Get current CUDA device ID
     err, device_id = cuda_runtime.cudaGetDevice()
@@ -112,6 +109,12 @@ def set_ideal_affinity_for_current_gpu():
     handle = pynvml.nvmlDeviceGetHandleByUUID("GPU-" + str(uuid.UUID(bytes=device_uuid.bytes)))
     pynvml.nvmlDeviceSetCpuAffinity(handle)
 
+    log_single_rank(
+        logger,
+        logging.WARNING,
+        f"Set CPU affinity for all GPUs for optimal host-device transfer performance",
+    )
+
 
 @contextmanager
 def stream_acquire_context(stream, event):
diff --git a/megatron/core/transformer/attention.py b/megatron/core/transformer/attention.py
index 0c5309a5876..c3c7dad250a 100644
--- a/megatron/core/transformer/attention.py
+++ b/megatron/core/transformer/attention.py
@@ -25,9 +25,7 @@
     get_tensor_model_parallel_world_size,
 )
 from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
-    fine_grained_offloading_group_commit,
-    fine_grained_offloading_group_start,
-    get_fine_grained_offloading_context,
+    FineGrainedActivationOffloadingInterface as off_interface,
 )
 from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.tensor_parallel.mappings import all_gather_last_dim_from_tensor_parallel_region
@@ -830,14 +828,13 @@ def forward(
         if output_gate:
             assert split_qkv, "output_gate is not supported for unsplit mixed_qkv tensor."
 
-        if self.offload_qkv_linear:
-            hidden_states = fine_grained_offloading_group_start(hidden_states, name="qkv_linear")
-        with get_fine_grained_offloading_context(self.offload_qkv_linear):
+        with off_interface(self.offload_qkv_linear, hidden_states, "qkv_linear") as hidden_states:
             qkv_output = self.get_query_key_value_tensors(
                 hidden_states, key_value_states, output_gate=output_gate, split_qkv=split_qkv
             )
         if self.offload_qkv_linear:
-            (qkv_output,) = fine_grained_offloading_group_commit(
+            # `qkv_output` may be a tuple; commit supports tuple/list and will keep structure.
+            qkv_output = off_interface.group_commit(
                 qkv_output, name="qkv_linear", forced_released_tensors=[]
             )
 
@@ -989,11 +986,11 @@ def forward(
                 packed_seq_params=packed_seq_params,
             )
         else:
-            if self.offload_core_attention and self.training:
-                query = fine_grained_offloading_group_start(query, name="core_attn")
             if inference_context is None or inference_context.is_static_batching():
                 # Static batching attention kernel.
-                with get_fine_grained_offloading_context(self.offload_core_attention):
+                with off_interface(
+                    self.offload_core_attention and self.training, query, "core_attn"
+                ) as query:
                     core_attn_out = self.core_attention(
                         query,
                         key,
@@ -1023,7 +1020,7 @@ def forward(
                 )
                 core_attn_out = rearrange(core_attn_out, 's b h d -> s b (h d)')
             if self.offload_core_attention and self.training:
-                (core_attn_out,) = fine_grained_offloading_group_commit(
+                core_attn_out = off_interface.group_commit(
                     core_attn_out, name="core_attn", forced_released_tensors=[query, key, value]
                 )
 
@@ -1046,13 +1043,11 @@ def forward(
         # =================
 
         nvtx_range_push(suffix="linear_proj")
-        if self.offload_attn_proj:
-            core_attn_out = fine_grained_offloading_group_start(core_attn_out, name="attn_proj")
-        with get_fine_grained_offloading_context(self.offload_attn_proj):
+        with off_interface(self.offload_attn_proj, core_attn_out, "attn_proj") as core_attn_out:
             output, bias = self.linear_proj(core_attn_out)
         if self.offload_attn_proj:
-            output, bias = fine_grained_offloading_group_commit(
-                output, bias, name="attn_proj", forced_released_tensors=[core_attn_out]
+            output = off_interface.group_commit(
+                output, name="attn_proj", forced_released_tensors=[core_attn_out]
             )
         nvtx_range_pop(suffix="linear_proj")
 
diff --git a/megatron/core/transformer/moe/experts.py b/megatron/core/transformer/moe/experts.py
index aec5ac00bab..615e12e09d6 100644
--- a/megatron/core/transformer/moe/experts.py
+++ b/megatron/core/transformer/moe/experts.py
@@ -26,9 +26,7 @@
 from megatron.core.fusions.fused_weighted_squared_relu import weighted_squared_relu_impl
 from megatron.core.jit import jit_fuser
 from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
-    fine_grained_offloading_group_commit,
-    fine_grained_offloading_group_start,
-    get_fine_grained_offloading_context,
+    FineGrainedActivationOffloadingInterface as off_interface,
 )
 from megatron.core.tensor_parallel.layers import (
     _initialize_affine_weight_cpu,
@@ -662,7 +660,7 @@ def __init__(
             set_save_original_input(self.linear_fc2)
 
         # This is to avoid the CPU overhead of multiple d2h copies
-        if self.offload_expert_fc1 and not (self.config.fp8 or self.config.fp4):
+        if self.offload_expert_fc1:
             from megatron.core.extensions.transformer_engine import set_save_original_input
 
             set_save_original_input(self.linear_fc1)
@@ -731,18 +729,15 @@ def forward(
             # Probs already applied, so reset to 1.
             permuted_probs = torch.ones_like(permuted_probs)
 
-        if self.offload_expert_fc1:
-            permuted_local_hidden_states = fine_grained_offloading_group_start(
-                permuted_local_hidden_states, name="expert_fc1"
-            )
-        with get_fine_grained_offloading_context(self.offload_expert_fc1):
+        with off_interface(
+            self.offload_expert_fc1, permuted_local_hidden_states, "expert_fc1"
+        ) as permuted_local_hidden_states:
             fc1_output, bias_parallel = self.linear_fc1(
                 permuted_local_hidden_states, tokens_per_expert
             )
         if self.offload_expert_fc1:
-            fc1_output, bias_parallel = fine_grained_offloading_group_commit(
+            fc1_output = off_interface.group_commit(
                 fc1_output,
-                bias_parallel,
                 name="expert_fc1",
                 forced_released_tensors=[permuted_local_hidden_states],
             )
@@ -805,24 +800,24 @@ def glu(x):
                 intermediate_parallel = intermediate_parallel.to(original_dtype)
             return intermediate_parallel
 
-        if self.offload_moe_act:
-            fc1_output = fine_grained_offloading_group_start(fc1_output, name="moe_act")
-
         if self.activation_recompute:
             self.activation_checkpoint = tensor_parallel.CheckpointWithoutOutput()
-            with get_fine_grained_offloading_context(self.offload_moe_act):
+            with off_interface(self.offload_moe_act, fc1_output, "moe_act") as fc1_output:
                 bias_act_output = self.activation_checkpoint.checkpoint(
                     bias_act_func, fc1_output, bias_parallel, permuted_probs
                 )
         else:
-            with get_fine_grained_offloading_context(self.offload_moe_act):
+            with off_interface(self.offload_moe_act, fc1_output, "moe_act") as fc1_output:
                 bias_act_output = bias_act_func(fc1_output, bias_parallel, permuted_probs)
 
         output, output_bias = self.linear_fc2(bias_act_output, tokens_per_expert)
         if self.activation_recompute:
             self.activation_checkpoint.discard_output_and_register_recompute(output)
+
+        # Delay the offload of the moe act until after the linear_fc2 has been computed
+        # to make sure the fc1_output is reloaded to GPU before recomputing moe_act.
         if self.offload_moe_act:
-            (output,) = fine_grained_offloading_group_commit(
+            output = off_interface.group_commit(
                 output, name="moe_act", forced_released_tensors=[fc1_output]
             )
 
diff --git a/megatron/core/transformer/multi_latent_attention.py b/megatron/core/transformer/multi_latent_attention.py
index ed90fdffa97..9689056e325 100644
--- a/megatron/core/transformer/multi_latent_attention.py
+++ b/megatron/core/transformer/multi_latent_attention.py
@@ -23,9 +23,7 @@
     apply_rotary_pos_emb,
 )
 from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
-    fine_grained_offloading_group_commit,
-    fine_grained_offloading_group_start,
-    get_fine_grained_offloading_context,
+    FineGrainedActivationOffloadingInterface as off_interface,
 )
 from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.tensor_parallel.layers import ColumnParallelLinear
@@ -244,27 +242,32 @@ def forward(
         # Get the query, key and value tensors based on the type of attention -
         # self or cross attn.
         # query: [96, 1, 16, 128], key:[96, 1, 16, 128], value:[96, 1, 16, 128]
-        if self.config.experimental_attention_variant is None:
-            query, key, value = self.get_query_key_value_tensors(
-                hidden_states,
-                key_value_states,
-                position_ids,
-                packed_seq_params,
-                inference_context=inference_context,
-            )
-        elif self.config.experimental_attention_variant == "dsa":
-            query, key, value, q_compressed, _ = self.get_query_key_value_tensors(
-                hidden_states,
-                key_value_states,
-                position_ids,
-                packed_seq_params,
-                inference_context=inference_context,
-                return_compressed_tensors=True,
-            )
-        else:
-            raise ValueError(
-                f"Unsupported experimental attention variant: "
-                f"{self.config.experimental_attention_variant}"
+        with off_interface(self.offload_qkv_linear, hidden_states, "qkv_linear") as hidden_states:
+            if self.config.experimental_attention_variant is None:
+                query, key, value = self.get_query_key_value_tensors(
+                    hidden_states,
+                    key_value_states,
+                    position_ids,
+                    packed_seq_params,
+                    inference_context=inference_context,
+                )
+            elif self.config.experimental_attention_variant == "dsa":
+                query, key, value, q_compressed, _ = self.get_query_key_value_tensors(
+                    hidden_states,
+                    key_value_states,
+                    position_ids,
+                    packed_seq_params,
+                    inference_context=inference_context,
+                    return_compressed_tensors=True,
+                )
+            else:
+                raise ValueError(
+                    f"Unsupported experimental attention variant: "
+                    f"{self.config.experimental_attention_variant}"
+                )
+        if self.offload_qkv_linear:
+            query = off_interface.group_commit(
+                query, name="qkv_linear", forced_released_tensors=[hidden_states]
             )
 
         # ===================================================
@@ -292,11 +295,10 @@ def forward(
                 query, key, value, attention_mask, packed_seq_params=packed_seq_params
             )
         else:
-            if self.offload_core_attention and self.training:
-                query = fine_grained_offloading_group_start(query, name="core_attn")
-
             if inference_context is None or inference_context.is_static_batching():
-                with get_fine_grained_offloading_context(self.offload_core_attention):
+                with off_interface(
+                    self.offload_core_attention and self.training, query, "core_attn"
+                ) as query:
                     if self.config.experimental_attention_variant is None:
                         core_attn_out = self.core_attention(
                             query,
@@ -346,7 +348,7 @@ def forward(
                 if not inference_context.is_decode_only():
                     core_attn_out = rearrange(core_attn_out, 's b h d -> s b (h d)')
             if self.offload_core_attention and self.training:
-                (core_attn_out,) = fine_grained_offloading_group_commit(
+                core_attn_out = off_interface.group_commit(
                     core_attn_out, name="core_attn", forced_released_tensors=[query, key, value]
                 )
 
@@ -374,13 +376,11 @@ def forward(
         # =================
         # Output. [sq, b, h]
         # =================
-        if self.offload_attn_proj:
-            core_attn_out = fine_grained_offloading_group_start(core_attn_out, name="attn_proj")
-        with get_fine_grained_offloading_context(self.offload_attn_proj):
+        with off_interface(self.offload_attn_proj, core_attn_out, "attn_proj") as core_attn_out:
             output, bias = self.linear_proj(core_attn_out)
         if self.offload_attn_proj:
-            output, bias = fine_grained_offloading_group_commit(
-                output, bias, name="attn_proj", forced_released_tensors=[core_attn_out]
+            output = off_interface.group_commit(
+                output, name="attn_proj", forced_released_tensors=[core_attn_out]
             )
 
         return output, bias
diff --git a/megatron/core/transformer/multi_token_prediction.py b/megatron/core/transformer/multi_token_prediction.py
index bde3149f5f4..8d5c479aa59 100755
--- a/megatron/core/transformer/multi_token_prediction.py
+++ b/megatron/core/transformer/multi_token_prediction.py
@@ -13,9 +13,6 @@
 from megatron.core.fp8_utils import get_fp8_context
 from megatron.core.models.backends import BackendSpecProvider, LocalSpecProvider
 from megatron.core.packed_seq_params import PackedSeqParams
-from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
-    fine_grained_offloading_set_last_layer,
-)
 from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.tensor_parallel import (
     gather_from_tensor_model_parallel_region,
@@ -1114,8 +1111,6 @@ def forward(
         hidden_states_list = list(torch.chunk(hidden_states, 1 + offset, dim=0))
         hidden_states = hidden_states_list[offset]
         for layer_number in range(len(self.layers)):
-            if self.config.fine_grained_activation_offloading:
-                fine_grained_offloading_set_last_layer(layer_number == len(self.layers) - 1)
             (hidden_states, input_ids, position_ids) = self.layers[layer_number](
                 input_ids=input_ids,
                 position_ids=position_ids,
diff --git a/megatron/core/transformer/transformer_block.py b/megatron/core/transformer/transformer_block.py
index cbbd7ec00eb..b28a66400e0 100755
--- a/megatron/core/transformer/transformer_block.py
+++ b/megatron/core/transformer/transformer_block.py
@@ -16,9 +16,6 @@
 from megatron.core.fusions.fused_layer_norm import FusedLayerNorm
 from megatron.core.inference.contexts import BaseInferenceContext
 from megatron.core.packed_seq_params import PackedSeqParams
-from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
-    fine_grained_offloading_set_last_layer,
-)
 from megatron.core.pipeline_parallel.utils import is_vp_first_stage, is_vp_last_stage
 from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.transformer.enums import CudaGraphScope, LayerType
@@ -736,11 +733,6 @@ def forward(
                     else:
                         inner_quantization_context = nullcontext()
 
-                    if self.config.fine_grained_activation_offloading:
-                        fine_grained_offloading_set_last_layer(
-                            l_no == self.num_layers_per_pipeline_rank - 1
-                        )
-
                     with self.offload_context, inner_quantization_context:
                         hidden_states, context = layer(
                             hidden_states=hidden_states,
diff --git a/megatron/core/transformer/transformer_layer.py b/megatron/core/transformer/transformer_layer.py
index ce90aaf357a..a486b6ed3d5 100644
--- a/megatron/core/transformer/transformer_layer.py
+++ b/megatron/core/transformer/transformer_layer.py
@@ -543,9 +543,7 @@ def _forward_attention(
                 otherwise None.
         """
         from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
-            fine_grained_offloading_group_commit,
-            fine_grained_offloading_group_start,
-            get_fine_grained_offloading_context,
+            FineGrainedActivationOffloadingInterface as off_interface,
         )
 
         inference_context = deprecate_inference_params(inference_context, inference_params)
@@ -553,17 +551,15 @@ def _forward_attention(
         # Residual connection.
         residual = hidden_states
 
-        if self.offload_attn_norm:
-            hidden_states = fine_grained_offloading_group_start(hidden_states, name="attn_norm")
         # Optional Input Layer norm
         if self.recompute_input_layernorm:
             self.input_layernorm_checkpoint = tensor_parallel.CheckpointWithoutOutput()
-            with get_fine_grained_offloading_context(self.offload_attn_norm):
+            with off_interface(self.offload_attn_norm, hidden_states, "attn_norm") as hidden_states:
                 input_layernorm_output = self.input_layernorm_checkpoint.checkpoint(
                     self.input_layernorm, hidden_states
                 )
         else:
-            with get_fine_grained_offloading_context(self.offload_attn_norm):
+            with off_interface(self.offload_attn_norm, hidden_states, "attn_norm") as hidden_states:
                 input_layernorm_output = self.input_layernorm(hidden_states)
 
         # Self attention.
@@ -598,8 +594,10 @@ def _forward_attention(
             )
         nvtx_range_pop(suffix="self_attn_bda")
 
+        # Delay the offload of the attention norm until after the self_attn_bda has been computed
+        # because the residual is needed in the self_attn_bda.
         if self.offload_attn_norm:
-            (hidden_states,) = fine_grained_offloading_group_commit(
+            hidden_states = off_interface.group_commit(
                 hidden_states, name="attn_norm", forced_released_tensors=[residual]
             )
 
@@ -647,24 +645,21 @@ def _forward_mlp(self, hidden_states, inference_context=None, padding_mask=None)
         """
 
         from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
-            fine_grained_offloading_group_start,
-            get_fine_grained_offloading_context,
+            FineGrainedActivationOffloadingInterface as off_interface,
         )
 
         # Residual connection.
         residual = hidden_states
 
-        if self.offload_mlp_norm:
-            hidden_states = fine_grained_offloading_group_start(hidden_states, name="mlp_norm")
         # Optional Layer norm post the cross-attention.
         if self.recompute_pre_mlp_layernorm:
             self.pre_mlp_norm_checkpoint = tensor_parallel.CheckpointWithoutOutput()
-            with get_fine_grained_offloading_context(self.offload_mlp_norm):
+            with off_interface(self.offload_mlp_norm, hidden_states, "mlp_norm") as hidden_states:
                 pre_mlp_layernorm_output = self.pre_mlp_norm_checkpoint.checkpoint(
                     self.pre_mlp_layernorm, hidden_states
                 )
         else:
-            with get_fine_grained_offloading_context(self.offload_mlp_norm):
+            with off_interface(self.offload_mlp_norm, hidden_states, "mlp_norm") as hidden_states:
                 pre_mlp_layernorm_output = self.pre_mlp_layernorm(hidden_states)
 
         nvtx_range_push(suffix="mlp")
@@ -750,7 +745,7 @@ def _forward_post_mlp(self, mlp_output_with_bias, residual):
         """
 
         from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
-            fine_grained_offloading_group_commit,
+            FineGrainedActivationOffloadingInterface as off_interface,
         )
 
         # TODO: could we move `bias_dropout_add_exec_handler` itself
@@ -761,8 +756,10 @@ def _forward_post_mlp(self, mlp_output_with_bias, residual):
                 mlp_output_with_bias, residual, self.hidden_dropout
             )
         nvtx_range_pop(suffix="mlp_bda")
+        # Delay the offload of the mlp norm until after the mlp_bda has been computed
+        # because the residual is needed in the mlp_bda.
         if self.offload_mlp_norm:
-            (hidden_states,) = fine_grained_offloading_group_commit(
+            hidden_states = off_interface.group_commit(
                 hidden_states, name="mlp_norm", forced_released_tensors=[residual]
             )
 
diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py
index 027449b1729..b94b5b45544 100644
--- a/megatron/training/arguments.py
+++ b/megatron/training/arguments.py
@@ -1312,6 +1312,9 @@ def validate_args(args, defaults={}):
     if args.fine_grained_activation_offloading:
         assert args.transformer_impl == 'transformer_engine', \
             "Fine-grained activation offloading is only supported with transformer_engine implementation"
+        if is_te_min_version("2.10.0"):
+            assert os.getenv("NVTE_CPU_OFFLOAD_V1", "0") == "1", \
+                "For fine-grained activation offloading with TE >= 2.10.0, NVTE_CPU_OFFLOAD_V1 should be set to 1 to avoid offloading weights."
 
     if args.mtp_num_layers:
         assert not args.use_legacy_models, "The legacy Megatron models does not support Multi-Token Prediction (MTP)."
diff --git a/megatron/training/training.py b/megatron/training/training.py
index 5c52f907fc6..13ad0025e43 100644
--- a/megatron/training/training.py
+++ b/megatron/training/training.py
@@ -728,11 +728,16 @@ def pretrain(
     args = get_args()
     timers = get_timers()
 
+    if args.fine_grained_activation_offloading:
+        from megatron.core.pipeline_parallel.utils import (
+            set_ideal_affinity_for_current_gpu
+        )
+        set_ideal_affinity_for_current_gpu()
+
     if args.batch_invariant_mode:
         print_rank_0("Enabling batch invariant mode globally",flush=True)
         enable_batch_invariant_mode()
 
-
     if args.log_progress:
         append_to_progress_log("Starting job")
 
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_dgx_h100.json
index bc1062ce151..038ed2be724 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_dgx_h100.json
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 5275215360.0,
-            "2": 5275420160.0,
-            "3": 5275622912.0,
-            "4": 5275217408.0,
-            "5": 5275420160.0,
-            "6": 5275622912.0,
-            "7": 5275825664.0,
-            "8": 5276028416.0,
-            "9": 5276231168.0,
-            "10": 5276433920.0,
-            "11": 5276636672.0,
-            "12": 5276839424.0,
-            "13": 5277042176.0,
-            "14": 5277244928.0,
-            "15": 5277447680.0,
-            "16": 5277650432.0,
-            "17": 5277853184.0,
-            "18": 5278055936.0,
-            "19": 5278258688.0,
-            "20": 5278461440.0,
-            "21": 5278664192.0,
-            "22": 5278866944.0,
-            "23": 5279069696.0,
-            "24": 5279272448.0,
-            "25": 5279475200.0,
-            "26": 5279677952.0,
-            "27": 5279880704.0,
-            "28": 5280083456.0,
-            "29": 5280286208.0,
-            "30": 5280488960.0,
-            "31": 5280691712.0,
-            "32": 5280894464.0,
-            "33": 5281097216.0,
-            "34": 5281299968.0,
-            "35": 5281502720.0,
-            "36": 5281705472.0,
-            "37": 5281908224.0,
-            "38": 5282110976.0,
-            "39": 5282313728.0,
-            "40": 5282516480.0,
-            "41": 5282719232.0,
-            "42": 5282921984.0,
-            "43": 5283124736.0,
-            "44": 5283327488.0,
-            "45": 5283530240.0,
-            "46": 5283732992.0,
-            "47": 5283935744.0,
-            "48": 5284138496.0,
-            "49": 5284341248.0,
-            "50": 5284544000.0
+            "1": 5283616256.0,
+            "2": 5288015360.0,
+            "3": 5288218112.0,
+            "4": 5288420864.0,
+            "5": 5288623616.0,
+            "6": 5287812608.0,
+            "7": 5288015360.0,
+            "8": 5288218112.0,
+            "9": 5287711232.0,
+            "10": 5287913984.0,
+            "11": 5288116736.0,
+            "12": 5288319488.0,
+            "13": 5288522240.0,
+            "14": 5288724992.0,
+            "15": 5288927744.0,
+            "16": 5289130496.0,
+            "17": 5289333248.0,
+            "18": 5289536000.0,
+            "19": 5289738752.0,
+            "20": 5289941504.0,
+            "21": 5290144256.0,
+            "22": 5290347008.0,
+            "23": 5290549760.0,
+            "24": 5290752512.0,
+            "25": 5290955264.0,
+            "26": 5291158016.0,
+            "27": 5291360768.0,
+            "28": 5291563520.0,
+            "29": 5291766272.0,
+            "30": 5291969024.0,
+            "31": 5292171776.0,
+            "32": 5292374528.0,
+            "33": 5292577280.0,
+            "34": 5292780032.0,
+            "35": 5292982784.0,
+            "36": 5293185536.0,
+            "37": 5293388288.0,
+            "38": 5293591040.0,
+            "39": 5293793792.0,
+            "40": 5293996544.0,
+            "41": 5294199296.0,
+            "42": 5294402048.0,
+            "43": 5294604800.0,
+            "44": 5294807552.0,
+            "45": 5295010304.0,
+            "46": 5295213056.0,
+            "47": 5295415808.0,
+            "48": 5295618560.0,
+            "49": 5295821312.0,
+            "50": 5296024064.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -341,4 +341,4 @@
             "50": 1.91915
         }
     }
-}
\ No newline at end of file
+}
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/model_config.yaml
index be34eb9aec5..38528836659 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/model_config.yaml
@@ -5,6 +5,7 @@ ENV_VARS:
   NCCL_NVLS_ENABLE: 0
   PYTHONWARNINGS: ignore
   NCCL_DEBUG: VERSION
+  NVTE_CPU_OFFLOAD_V1: 1
   NVTE_FUSED_ATTN: 0
   NCCL_ALGO: ^NVLS
   CUBLAS_WORKSPACE_CONFIG: ':4096:8'
@@ -134,7 +135,6 @@ TEST_TYPE: regular # Usually ckpt-resume, but as a WAR to #513 set to regular
 METRICS:
   # - "iteration-time"
   - "lm loss"
-  - "num-zeros"
   - "mem-allocated-bytes"
   - "mem-max-allocated-bytes"
   - "mtp_1 loss"
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_dgx_h100.json
index ca64f30b0fb..9cc2fa69da7 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_dgx_h100.json
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 4305058304.0,
-            "2": 4305059840.0,
-            "3": 4305059840.0,
-            "4": 4305059840.0,
-            "5": 4305059840.0,
-            "6": 4305059840.0,
-            "7": 4305059840.0,
-            "8": 4305059840.0,
-            "9": 4305059840.0,
-            "10": 4305059840.0,
-            "11": 4305059840.0,
-            "12": 4305059840.0,
-            "13": 4305059840.0,
-            "14": 4305059840.0,
-            "15": 4305059840.0,
-            "16": 4305059840.0,
-            "17": 4305059840.0,
-            "18": 4305059840.0,
-            "19": 4305059840.0,
-            "20": 4305059840.0,
-            "21": 4305059840.0,
-            "22": 4305059840.0,
-            "23": 4305059840.0,
-            "24": 4305059840.0,
-            "25": 4305059840.0,
-            "26": 4305059840.0,
-            "27": 4305059840.0,
-            "28": 4305059840.0,
-            "29": 4305059840.0,
-            "30": 4305059840.0,
-            "31": 4305059840.0,
-            "32": 4305059840.0,
-            "33": 4305059840.0,
-            "34": 4305059840.0,
-            "35": 4305059840.0,
-            "36": 4305059840.0,
-            "37": 4305059840.0,
-            "38": 4305059840.0,
-            "39": 4305059840.0,
-            "40": 4305059840.0,
-            "41": 4305059840.0,
-            "42": 4305059840.0,
-            "43": 4305059840.0,
-            "44": 4305059840.0,
-            "45": 4305059840.0,
-            "46": 4305059840.0,
-            "47": 4305059840.0,
-            "48": 4305059840.0,
-            "49": 4305059840.0,
-            "50": 4305059840.0
+            "1": 4313446912.0,
+            "2": 4313448448.0,
+            "3": 4313448448.0,
+            "4": 4313448448.0,
+            "5": 4313448448.0,
+            "6": 4313448448.0,
+            "7": 4313448448.0,
+            "8": 4313448448.0,
+            "9": 4313448448.0,
+            "10": 4313448448.0,
+            "11": 4313448448.0,
+            "12": 4313448448.0,
+            "13": 4313448448.0,
+            "14": 4313448448.0,
+            "15": 4313448448.0,
+            "16": 4313448448.0,
+            "17": 4313448448.0,
+            "18": 4313448448.0,
+            "19": 4313448448.0,
+            "20": 4313448448.0,
+            "21": 4313448448.0,
+            "22": 4313448448.0,
+            "23": 4313448448.0,
+            "24": 4313448448.0,
+            "25": 4313448448.0,
+            "26": 4313448448.0,
+            "27": 4313448448.0,
+            "28": 4313448448.0,
+            "29": 4313448448.0,
+            "30": 4313448448.0,
+            "31": 4313448448.0,
+            "32": 4313448448.0,
+            "33": 4313448448.0,
+            "34": 4313448448.0,
+            "35": 4313448448.0,
+            "36": 4313448448.0,
+            "37": 4313448448.0,
+            "38": 4313448448.0,
+            "39": 4313448448.0,
+            "40": 4313448448.0,
+            "41": 4313448448.0,
+            "42": 4313448448.0,
+            "43": 4313448448.0,
+            "44": 4313448448.0,
+            "45": 4313448448.0,
+            "46": 4313448448.0,
+            "47": 4313448448.0,
+            "48": 4313448448.0,
+            "49": 4313448448.0,
+            "50": 4313448448.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -284,4 +284,4 @@
             "50": 1.97038
         }
     }
-}
\ No newline at end of file
+}
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/model_config.yaml
index 5b177ed116d..d1fcd8fd4b7 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/model_config.yaml
@@ -5,6 +5,10 @@ ENV_VARS:
   NCCL_NVLS_ENABLE: 0
   PYTHONWARNINGS: ignore
   NCCL_DEBUG: VERSION
+  NVTE_CPU_OFFLOAD_V1: 1
+  NVTE_FUSED_ATTN: 0
+  NCCL_ALGO: ^NVLS
+  CUBLAS_WORKSPACE_CONFIG: ':4096:8'
 MODEL_ARGS:
   # Distributed args
   --distributed-timeout-minutes: 60
@@ -29,8 +33,6 @@ MODEL_ARGS:
   --exit-duration-in-mins: 230
   --no-check-for-nan-in-loss-and-grad: true
   --no-rope-fusion: true
-  --cross-entropy-loss-fusion: true
-  --cross-entropy-fusion-impl: native
   --manual-gc: true
   --manual-gc-interval: 100
   --recompute-granularity: selective
@@ -129,6 +131,5 @@ TEST_TYPE: regular # Usually ckpt-resume, but as a WAR to #513 set to regular
 METRICS:
   # - "iteration-time"
   - "lm loss"
-  - "num-zeros"
   - "mem-allocated-bytes"
   - "mem-max-allocated-bytes"
diff --git a/tests/unit_tests/pipeline_parallel/test_fine_grained_activation_offloading.py b/tests/unit_tests/pipeline_parallel/test_fine_grained_activation_offloading.py
index 7c1b7f1fe4b..558c6934a0c 100644
--- a/tests/unit_tests/pipeline_parallel/test_fine_grained_activation_offloading.py
+++ b/tests/unit_tests/pipeline_parallel/test_fine_grained_activation_offloading.py
@@ -1,187 +1,573 @@
 # Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 import gc
+import os
+from contextlib import nullcontext
+from typing import Dict, List, Optional, Tuple
 
 import pytest
 import torch
 
-EPSILON = 0.1
+from megatron.core.models.gpt.gpt_layer_specs import get_gpt_layer_with_transformer_engine_spec
+from megatron.core.models.gpt.gpt_model import GPTModel
+from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
+    FineGrainedActivationOffloadingInterface as off_interface,
+)
+from megatron.core.tensor_parallel.random import model_parallel_cuda_manual_seed
+from megatron.core.transformer.enums import AttnBackend
+from megatron.core.transformer.transformer_config import MLATransformerConfig, TransformerConfig
+from megatron.core.utils import is_te_min_version
+from tests.unit_tests.test_utilities import Utils
 
-# Skip all tests if CUDA is not available
-cuda_available = torch.cuda.is_available()
+# Tolerance for memory expectation check (GPU allocator jitter etc).
+EPSILON = 0.30
+EPSILON_A2A = 0.30
+DELTA = 20  # MiB
 
 
-def _reset_cuda_memory():
+def _reset_cuda_memory() -> None:
     gc.collect()
-    if cuda_available:
+    if torch.cuda.is_available():
         torch.cuda.empty_cache()
+        torch.cuda.synchronize()
+
+
+def _build_gpt_model(
+    *,
+    seed: int,
+    num_layers: int,
+    hidden_size: int,
+    num_attention_heads: int,
+    vocab_size: int,
+    seq_length: int,
+    num_experts: Optional[int],
+    fine_grained_activation_offloading: bool,
+    offload_modules: Optional[List[str]],
+    min_offloaded_tensor_size: int,
+    is_mla: bool,
+) -> GPTModel:
+    """Build a GPTModel that uses TE-based transformer layer spec."""
+    model_parallel_cuda_manual_seed(seed)
+    torch.manual_seed(seed)
+    ConfigClass = MLATransformerConfig if is_mla else TransformerConfig
+    transformer_config = ConfigClass(
+        num_layers=num_layers,
+        hidden_size=hidden_size,
+        num_attention_heads=num_attention_heads,
+        use_cpu_initialization=True,
+        attention_backend=AttnBackend.unfused,
+        bf16=True,
+        # Recompute
+        recompute_modules=["layernorm", "moe_act"] if num_experts is not None else ["layernorm"],
+        recompute_granularity="selective",
+        # MoE
+        num_moe_experts=num_experts,
+        moe_grouped_gemm=(num_experts is not None),
+        # Fine-grained activation offloading
+        fine_grained_activation_offloading=fine_grained_activation_offloading,
+        offload_modules=offload_modules,
+        min_offloaded_tensor_size=min_offloaded_tensor_size,
+    )
+    gpt_model = GPTModel(
+        config=transformer_config,
+        transformer_layer_spec=get_gpt_layer_with_transformer_engine_spec(
+            num_experts=num_experts,
+            moe_grouped_gemm=num_experts is not None,
+            moe_use_legacy_grouped_gemm=False,
+            multi_latent_attention=is_mla,
+        ),
+        vocab_size=vocab_size,
+        max_sequence_length=seq_length,
+    ).bfloat16()
+    return gpt_model
+
+
+def _make_gpt_inputs(
+    *, seq_length: int, micro_batch_size: int, device: torch.device
+) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+    data = list(range(seq_length))
+    input_ids = torch.tensor(data, dtype=torch.int64).repeat((micro_batch_size, 1)).to(device)
+    position_ids = torch.tensor(data, dtype=torch.int64).repeat((micro_batch_size, 1)).to(device)
+    attention_mask = torch.ones((micro_batch_size, 1, seq_length, seq_length), dtype=bool).to(
+        device
+    )
+    return input_ids, position_ids, attention_mask
+
+
+def _run_one_iter_and_capture(
+    model: GPTModel,
+    *,
+    input_ids: torch.Tensor,
+    position_ids: torch.Tensor,
+    attention_mask: torch.Tensor,
+    enable_offload_reset: bool,
+) -> Tuple[torch.Tensor, Dict[str, torch.Tensor], int]:
+    """
+    Run a single forward+backward iteration.
+
+    Returns:
+      - logits (CPU float32)
+      - selected grads (CPU float32)
+      - peak_memory_allocated (bytes) during the iteration
+    """
+
+    if enable_offload_reset:
+        off_interface.reset()
+
+    # for p in model.parameters():
+    #     if p.grad is not None:
+    #         p.grad = None
+
+    torch.cuda.reset_peak_memory_stats()
+    logits = model(input_ids=input_ids, position_ids=position_ids, attention_mask=attention_mask)
+    loss = logits.float().sum()
+    loss.backward()
+    torch.cuda.synchronize()
+    peak_bytes = int(torch.cuda.max_memory_allocated())
+
+    # capture all gradients for correctness
+    grads: Dict[str, torch.Tensor] = {}
+    for name, p in model.named_parameters():
+        grads[name] = p.grad.detach().float().cpu() if p.grad is not None else None
+
+    return logits.detach().float().cpu(), grads, peak_bytes
+
+
+@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA is required for offloading tests.")
+@pytest.mark.parametrize(
+    "is_moe, is_mla, offload_modules",
+    [
+        # Dense GPT modules
+        (False, True, ["attn_norm"]),
+        (True, False, ["qkv_linear"]),
+        (True, False, ["core_attn"]),
+        # # attn_proj depends on core_attn (validated in TransformerConfig.__post_init__)
+        (True, True, ["core_attn", "attn_proj"]),
+        (True, False, ["mlp_norm"]),
+        (True, False, ["expert_fc1"]),
+        (True, False, ["moe_act"]),
+    ],
+)
+def test_gpt_fine_grained_activation_offloading_correctness_and_memory(
+    is_moe: bool, is_mla: bool, offload_modules: List[str]
+):
+    """
+    Initialize a GPTModel and verify:
+    - forward output correctness under each offload_modules setting
+    - backward gradient correctness (subset)
+    - peak GPU memory is reduced roughly as expected (based on recorded offload bytes)
+    """
+    # setup distributed/model-parallel (same pattern as other UTs)
+    os.environ.pop("NVTE_FUSED_ATTN", None)
+    os.environ.pop("NVTE_FLASH_ATTN", None)
+    os.environ.pop("NVTE_UNFUSED_ATTN", None)
+    # os.environ["NVTE_FLASH_ATTN"] = "1"
+    Utils.initialize_model_parallel(tensor_model_parallel_size=1, pipeline_model_parallel_size=1)
+
+    seed = 123
+    # Choose shapes large enough to make memory deltas stable but still fast.
+    num_experts = 4 if is_moe else None
+    num_layers = 8
+    hidden_size = 2048 if num_experts is None else 1024
+    num_attention_heads = 16 if hidden_size >= 2048 else 8
+    vocab_size = 1024
+    seq_length = 1024
+    micro_batch_size = 2
+    device = torch.device("cuda")
+
+    input_ids, position_ids, attention_mask = _make_gpt_inputs(
+        seq_length=seq_length, micro_batch_size=micro_batch_size, device=device
+    )
 
+    from megatron.core.pipeline_parallel import fine_grained_activation_offload as off
 
-class ToyModel(torch.nn.Module):
-    def __init__(self, hidden_size: int = 2048, num_layers: int = 4, dtype=torch.bfloat16):
-        super().__init__()
-        layers = []
-        for _ in range(num_layers):
-            layers.append(
-                torch.nn.Linear(hidden_size, hidden_size, bias=True, dtype=dtype, device="cuda")
+    off_interface.reset_instance()
+
+    try:
+        # 1) Baseline run (no offloading)
+        _reset_cuda_memory()
+        base_model = _build_gpt_model(
+            seed=seed,
+            num_layers=num_layers,
+            hidden_size=hidden_size,
+            num_attention_heads=num_attention_heads,
+            vocab_size=vocab_size,
+            seq_length=seq_length,
+            num_experts=num_experts,
+            fine_grained_activation_offloading=False,
+            offload_modules=None,
+            min_offloaded_tensor_size=1024 * 1024,
+            is_mla=is_mla,
+        ).cuda()
+        base_model.train()
+
+        # Warmup baseline once for allocator stability
+        _run_one_iter_and_capture(
+            base_model,
+            input_ids=input_ids,
+            position_ids=position_ids,
+            attention_mask=attention_mask,
+            enable_offload_reset=False,
+        )
+        _reset_cuda_memory()
+        base_logits, base_grads, base_peak = _run_one_iter_and_capture(
+            base_model,
+            input_ids=input_ids,
+            position_ids=position_ids,
+            attention_mask=attention_mask,
+            enable_offload_reset=False,
+        )
+        # Free baseline model GPU memory before offload path
+        del base_model
+        _reset_cuda_memory()
+
+        # 2) Offload run (warmup to record bytes + steady-state measurement)
+        off_model = _build_gpt_model(
+            seed=seed,
+            num_layers=num_layers,
+            hidden_size=hidden_size,
+            num_attention_heads=num_attention_heads,
+            vocab_size=vocab_size,
+            seq_length=seq_length,
+            num_experts=num_experts,
+            fine_grained_activation_offloading=True,
+            offload_modules=offload_modules,
+            min_offloaded_tensor_size=1024,  # force offloading for UT determinism
+            is_mla=is_mla,
+        ).cuda()
+        off_model.train()
+
+        # Warmup 1 iter to populate cached chunks, then reset to finish warmup bookkeeping.
+        _run_one_iter_and_capture(
+            off_model,
+            input_ids=input_ids,
+            position_ids=position_ids,
+            attention_mask=attention_mask,
+            enable_offload_reset=True,
+        )
+        # Reset once more to trigger post_warmup_callback and apply steady-state offload decisions.
+        off_interface.reset()
+
+        from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
+            PipelineOffloadManager,
+        )
+
+        mgr = PipelineOffloadManager.get_instance()
+        expected_offload_bytes = int(
+            sum(mgr.offload_summary_bytes.get(k, 0) for k in offload_modules)
+        )
+        expected_offload_mib = expected_offload_bytes / (1024**2)
+
+        _reset_cuda_memory()
+        off_logits, off_grads, off_peak = _run_one_iter_and_capture(
+            off_model,
+            input_ids=input_ids,
+            position_ids=position_ids,
+            attention_mask=attention_mask,
+            enable_offload_reset=True,
+        )
+        del off_model
+        _reset_cuda_memory()
+
+        # 3) Correctness checks (forward + selected grads)
+        assert torch.allclose(off_logits, base_logits, rtol=1e-3, atol=1e-3)
+        assert set(off_grads.keys()) == set(base_grads.keys())
+        for name, gb in base_grads.items():
+            go = off_grads[name]
+            if gb is None or go is None:
+                assert gb is None and go is None, f"Grad None mismatch for {name}"
+                continue
+            assert torch.allclose(go, gb, rtol=1e-3, atol=1e-3), f"Grad mismatch for {name}"
+
+        # 4) Memory checks (peak allocated over forward+backward)
+        saved_mib = (base_peak - off_peak) / (1024**2)
+        assert saved_mib > 0.0, (
+            f"Expected GPU peak memory reduction for offload_modules={offload_modules}, "
+            f"but got saved={saved_mib:.2f}MiB (base={base_peak/(1024**2):.2f}MiB, "
+            f"off={off_peak/(1024**2):.2f}MiB)"
+        )
+
+        # If expectation is large enough, enforce approximate match.
+        # For tiny expectations, allocator noise may dominate; we only require a positive reduction.
+        if expected_offload_mib >= 2.0:
+            rel_err = abs(saved_mib - expected_offload_mib) / max(expected_offload_mib, 1e-6)
+            abs_err = abs(saved_mib - expected_offload_mib)
+            assert rel_err <= EPSILON and abs_err <= DELTA, (
+                f"Memory saving mismatch for offload_modules={offload_modules}: "
+                f"saved={saved_mib:.2f}MiB expected~={expected_offload_mib:.2f}MiB "
+                f"(rel_err={rel_err:.2f}, abs_err={abs_err:.2f})"
             )
-        self.net = torch.nn.Sequential(*layers).to(device="cuda", dtype=dtype)
-        self.hidden_size = hidden_size
-        self.num_layers = num_layers
-        self.dtype = dtype
-
-        # Prevent weights/bias from being considered activation tensors for offload;
-        # ensure we only count activation tensors (inputs x) in memory accounting.
-        for p in self.parameters():
-            try:
-                setattr(p, "offloading_activation", False)
-            except Exception:
-                pass
-
-    def forward(self, x, use_offload: bool = False):
-        from megatron.core.pipeline_parallel import fine_grained_activation_offload as off
-
-        if use_offload:
-            # Initialize a new chunk (microbatch) and enable offload context.
-            with off.get_fine_grained_offloading_context(True):
-                off.fine_grained_offloading_init_chunk_handler(
-                    vp_size=1, vp_stage=None, min_offloaded_tensor_size=1
-                )
-                for i, layer in enumerate(self.net):
-                    # Group by module; with this linear-only model, each group corresponds to a layer.
-                    off.fine_grained_offloading_set_last_layer(i == len(self.net) - 1)
-                    x = off.fine_grained_offloading_group_start(x, name=f"layer_{i}")
-                    x = layer(x)
-                    # Commit the group; returns a tuple of tensors
-                    (x,) = off.fine_grained_offloading_group_commit(
-                        x, name=f"layer_{i}", forced_released_tensors=[]
-                    )
-                return x
-        # Baseline path (no offload hooks)
-        with (
-            torch.autocast(device_type="cuda", dtype=self.dtype)
-            if self.dtype in (torch.float16, torch.bfloat16)
-            else torch.cuda.amp.autocast(enabled=False)
-        ):
-            for layer in self.net:
-                x = layer(x)
-            return x
-
-
-@pytest.fixture(autouse=True)
-def _monkeypatch_offload_deps(monkeypatch):
-    # Avoid requiring torch.distributed initialization and NVML in tests
-    import megatron.core.pipeline_parallel.fine_grained_activation_offload as off
-
-    monkeypatch.setattr(off, "debug_rank", lambda *args, **kwargs: None, raising=False)
-    monkeypatch.setattr(off, "set_ideal_affinity_for_current_gpu", lambda: None, raising=False)
-    # Ensure a clean state each test
-    off.fine_grained_offloading_reset()
-    yield
-    off.fine_grained_offloading_reset()
-
-
-def test_fine_grained_activation_offload_memory_reduction():
-    torch.manual_seed(1234)
-    # Use a linear-only stack so theoretical saved memory equals sum of per-layer input x bytes.
-    model = ToyModel(hidden_size=2048, num_layers=8, dtype=torch.bfloat16).eval()
-
-    # Create input
-    inp = torch.randn(
-        (2048, model.hidden_size), device="cuda", dtype=torch.bfloat16, requires_grad=True
+            print(
+                f"Rank {torch.distributed.get_rank()}: Saved {saved_mib:.2f}MiB, expected {expected_offload_mib:.2f}MiB"
+            )
+    finally:
+        Utils.destroy_model_parallel()
+
+
+@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA is required for offloading tests.")
+@pytest.mark.skipif(
+    not is_te_min_version("1.9.0.dev0"),
+    reason="EP A2A overlap requires TE 1.9.0.dev0+ in this repo's tests.",
+)
+@pytest.mark.parametrize(
+    "dispatcher_backend, is_mla, offload_modules",
+    [
+        ("alltoall", True, ["attn_norm"]),
+        ("alltoall", True, ["core_attn"]),
+        ("alltoall", True, ["attn_norm", "core_attn", "attn_proj"]),
+        ("alltoall", True, ["mlp_norm"]),
+        ("alltoall", False, ["expert_fc1"]),
+        ("alltoall", False, ["moe_act"]),
+        ("alltoall", False, ["mlp_norm", "expert_fc1", "moe_act"]),
+        (
+            "alltoall",
+            True,
+            ["attn_norm", "core_attn", "attn_proj", "mlp_norm", "expert_fc1", "moe_act"],
+        ),
+        (
+            "alltoall",
+            False,
+            ["attn_norm", "core_attn", "attn_proj", "mlp_norm", "expert_fc1", "moe_act"],
+        ),
+    ],
+)
+def test_fine_grained_activation_offload_with_ep_a2a_overlap_compatibility(
+    dispatcher_backend: str, is_mla: bool, offload_modules: List[str]
+):
+    """
+    Compatibility test for:
+      - fine-grained activation offloading
+      - EP all-to-all overlap (overlap_moe_expert_parallel_comm)
+      - memory saving roughly matches expected offload bytes (when expectation is large enough)
+
+    The EP A2A overlap initialization pattern is aligned with
+    `tests/unit_tests/a2a_overlap/test_schedule_chunk_1f1b.py`.
+    """
+    from megatron.core.models.common.model_chunk_schedule_plan import (
+        TransformerModelChunkSchedulePlan,
     )
+    from megatron.core.pipeline_parallel.utils import set_streams
+    from tests.unit_tests.a2a_overlap.utils import deterministic_mode
+
+    # EP overlap requires distributed initialization with EP groups.
+    ep_size = 4
+    if Utils.world_size % ep_size != 0:
+        pytest.skip(
+            f"Skipping: WORLD_SIZE={Utils.world_size} must be divisible by ep_size={ep_size}."
+        )
+
+    seed = 123
+    num_experts = 8  # must be divisible by ep_size
+    if num_experts % ep_size != 0:
+        pytest.skip(
+            f"Skipping: num_moe_experts={num_experts} must be divisible by ep_size={ep_size}."
+        )
+
+    # Small shapes to keep this compatibility test fast.
+    num_layers = 8
+    hidden_size = 1024
+    num_attention_heads = 16
+    vocab_size = 1024
+    seq_length = 1024
+    micro_batch_size = 2
+    device = torch.device("cuda")
 
-    # Warmup to stabilize allocator behavior
-    _reset_cuda_memory()
-    out = model(inp, use_offload=False)
-    (out.sum()).backward()
-    torch.cuda.synchronize()
-    _reset_cuda_memory()
-
-    # Baseline memory measurement (no offload)
-    _reset_cuda_memory()
-    inp_baseline = inp.detach().clone().requires_grad_(True)
-    baseline_mem_before = torch.cuda.memory_allocated() / (1024**2)
-    out_base = model(inp_baseline, use_offload=False)
-    baseline_mem_after = (torch.cuda.memory_allocated() - out_base.nbytes) / (1024**2)
-    (out_base.sum()).backward()
-    torch.cuda.synchronize()
-    baseline_delta = baseline_mem_after - baseline_mem_before
-
-    # Offload memory measurement
     from megatron.core.pipeline_parallel import fine_grained_activation_offload as off
 
-    off.fine_grained_offloading_reset()
-    _reset_cuda_memory()
-    inp_off = inp.detach().clone().requires_grad_(True)
-    offload_mem_before = torch.cuda.memory_allocated() / (1024**2)
-    out_off = model(inp_off, use_offload=True)
-    offload_mem_after = (torch.cuda.memory_allocated() - out_off.nbytes) / (1024**2)
-    (out_off.sum()).backward()
-    torch.cuda.synchronize()
-    offload_delta = offload_mem_after - offload_mem_before
-
-    # Offload should reduce peak cached memory usage after forward
-    assert (
-        offload_delta < baseline_delta
-    ), f"offload did not reduce memory: off={offload_delta:.2f}MiB base={baseline_delta:.2f}MiB"
-
-    # Theoretical savings: storing per-layer input x (same shape each layer).
-    bytes_per_elem = inp.element_size()  # 2 for bfloat16
-    input_bytes = inp.numel() * bytes_per_elem
-    # -2 because the first and last activations are not offloaded
-    expected_saved_mib = (model.num_layers - 2) * (input_bytes / (1024**2))
-
-    # Actual savings ≈ baseline_delta - offload_delta (both exclude output tensor memory).
-    actual_saved_mib = baseline_delta - offload_delta
-
-    # Allow slack for allocator jitter and extra intermediates; magnitudes should match.
-    rel_err = abs(actual_saved_mib - expected_saved_mib) / max(expected_saved_mib, 1e-6)
-    assert (
-        rel_err <= EPSILON
-    ), f"saved mismatch: actual={actual_saved_mib:.2f}MiB expected~={expected_saved_mib:.2f}MiB (rel_err={rel_err:.2f})"
-
-
-def test_fine_grained_activation_offload_output_and_grad_consistency():
-    torch.manual_seed(2025)
-    hidden = 1024
-    layers = 3
-
-    # Create identical models by resetting seed
-    torch.manual_seed(2025)
-    model_base = ToyModel(hidden_size=hidden, num_layers=layers, dtype=torch.bfloat16).train()
-    torch.manual_seed(2025)
-    model_off = ToyModel(hidden_size=hidden, num_layers=layers, dtype=torch.bfloat16).train()
-
-    # Same input and target
-    inp = torch.randn((32, hidden), device="cuda", dtype=torch.bfloat16, requires_grad=True)
-    target = torch.randn_like(inp)
-
-    # Baseline forward/backward
-    out_base = model_base(inp, use_offload=False)
-    loss_base = torch.nn.functional.mse_loss(out_base, target)
-    loss_base.backward()
-    grads_base = [
-        p.grad.detach().clone() if p.grad is not None else None for p in model_base.parameters()
-    ]
-
-    # Offload forward/backward
-    from megatron.core.pipeline_parallel import fine_grained_activation_offload as off
+    def _make_schedule_inputs() -> Dict[str, torch.Tensor]:
+        data = list(range(seq_length))
+        input_ids = torch.tensor(data, dtype=torch.int64).repeat((micro_batch_size, 1)).to(device)
+        position_ids = (
+            torch.tensor(data, dtype=torch.int64).repeat((micro_batch_size, 1)).to(device)
+        )
+        attention_mask = torch.ones((micro_batch_size, 1, seq_length, seq_length), dtype=bool).to(
+            device
+        )
+        labels = input_ids.clone()
+        return {
+            "input_ids": input_ids,
+            "labels": labels,
+            "position_ids": position_ids,
+            "attention_mask": attention_mask,
+        }
+
+    def _capture_params(model: torch.nn.Module) -> Dict[str, torch.Tensor]:
+        params: Dict[str, torch.Tensor] = {}
+        for name, p in model.named_parameters():
+            params[name] = p.detach().clone()
+        return params
+
+    def _restore_params(model: torch.nn.Module, params: Dict[str, torch.Tensor]) -> None:
+        for name, p in model.named_parameters():
+            p.data.copy_(params[name])
+
+    def _build_overlap_moe_gpt(
+        *, enable_offload: bool, is_mla: bool, dispatcher_backend: str
+    ) -> GPTModel:
+        model_parallel_cuda_manual_seed(seed)
+        torch.manual_seed(seed)
+        ConfigClass = MLATransformerConfig if is_mla else TransformerConfig
+        transformer_config = ConfigClass(
+            num_layers=num_layers,
+            hidden_size=hidden_size,
+            num_attention_heads=num_attention_heads,
+            use_cpu_initialization=True,
+            attention_backend=AttnBackend.unfused,
+            # Recompute
+            recompute_modules=["layernorm", "moe_act"],
+            recompute_granularity="selective",
+            bf16=True,
+            # MoE + EP overlap
+            num_moe_experts=num_experts,
+            moe_grouped_gemm=True,
+            expert_model_parallel_size=ep_size,
+            moe_token_dispatcher_type="alltoall" if dispatcher_backend == "alltoall" else "flex",
+            moe_flex_dispatcher_backend=dispatcher_backend,
+            moe_router_dtype="fp32" if dispatcher_backend == "hybridep" else "fp64",
+            overlap_moe_expert_parallel_comm=True,
+            delay_wgrad_compute=True,
+            # Fine-grained activation offloading
+            fine_grained_activation_offloading=enable_offload,
+            offload_modules=offload_modules if enable_offload else None,
+            min_offloaded_tensor_size=1024,  # force offloading to exercise the code path
+        )
+        return (
+            GPTModel(
+                config=transformer_config,
+                transformer_layer_spec=get_gpt_layer_with_transformer_engine_spec(
+                    num_experts=num_experts,
+                    moe_grouped_gemm=True,
+                    moe_use_legacy_grouped_gemm=False,
+                    multi_latent_attention=is_mla,
+                ),
+                vocab_size=vocab_size,
+                max_sequence_length=seq_length,
+            )
+            .bfloat16()
+            .cuda()
+        )
+
+    def _run_schedule_1f1b_two_microbatches(
+        model: GPTModel, *, enable_offload_reset: bool
+    ) -> Tuple[List[torch.Tensor], Dict[str, torch.Tensor], int]:
+        """
+        Run a minimal 1F1B schedule (2 microbatches) using ModelChunkSchedulePlan.run().
+        This is the execution path that exercises EP A2A overlap scheduling.
+        """
+        if enable_offload_reset:
+            off_interface.reset()
+
+        data0 = _make_schedule_inputs()
+        data1 = _make_schedule_inputs()
+        plan0 = model.build_schedule_plan(**data0)
+
+        torch.cuda.reset_peak_memory_stats()
+        out0 = TransformerModelChunkSchedulePlan.run(plan0, None)
+        plan1 = model.build_schedule_plan(**data1)
+        out1 = TransformerModelChunkSchedulePlan.run(plan1, plan0, b_grad=torch.ones_like(out0))
+        TransformerModelChunkSchedulePlan.run(None, plan1, b_grad=torch.ones_like(out1))
+        torch.cuda.synchronize()
+        peak_bytes = int(torch.cuda.max_memory_allocated())
+
+        # capture outputs and grads
+        outputs = [out0.detach().float().cpu(), out1.detach().float().cpu()]
+        grads: Dict[str, torch.Tensor] = {}
+        for name, p in model.named_parameters():
+            grads[name] = p.grad.detach().float().cpu() if p.grad is not None else None
+        return outputs, grads, peak_bytes
+
+    # setup distributed/model-parallel
+    os.environ.pop("NVTE_FUSED_ATTN", None)
+    os.environ.pop("NVTE_FLASH_ATTN", None)
+    os.environ.pop("NVTE_UNFUSED_ATTN", None)
+
+    Utils.initialize_model_parallel(
+        tensor_model_parallel_size=1,
+        pipeline_model_parallel_size=1,
+        expert_model_parallel_size=ep_size,
+    )
+    set_streams()
+
+    off_interface.reset_instance()
+
+    try:
+        with deterministic_mode():
+            # Baseline: EP overlap on, offload off.
+            _reset_cuda_memory()
+            base_model = _build_overlap_moe_gpt(
+                enable_offload=False, is_mla=is_mla, dispatcher_backend=dispatcher_backend
+            )
+            base_model.train()
+            base_params = _capture_params(base_model)
+            # Warmup once for allocator stability / graph caching
+            _run_schedule_1f1b_two_microbatches(base_model, enable_offload_reset=False)
+            _reset_cuda_memory()
+            base_outs, base_grads, base_peak = _run_schedule_1f1b_two_microbatches(
+                base_model, enable_offload_reset=False
+            )
+            del base_model
+            _reset_cuda_memory()
+
+            # Offload: EP overlap on, fine-grained offload on.
+            off_model = _build_overlap_moe_gpt(
+                enable_offload=True, is_mla=is_mla, dispatcher_backend=dispatcher_backend
+            )
+            _restore_params(off_model, base_params)
+            off_model.train()
+            # Warmup once to populate cached chunks, then reset to apply steady-state offload decisions.
+            off_interface.reset()
+            _run_schedule_1f1b_two_microbatches(off_model, enable_offload_reset=False)
+            off_interface.reset()
+            from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
+                PipelineOffloadManager,
+            )
+
+            mgr = PipelineOffloadManager.get_instance()
+            expected_offload_bytes = int(
+                sum(mgr.offload_summary_bytes.get(k, 0) for k in offload_modules)
+            )
+            expected_offload_mib = expected_offload_bytes / (1024**2)
 
-    off.fine_grained_offloading_reset()
-    out_off = model_off(inp.detach().clone().requires_grad_(True), use_offload=True)
-    loss_off = torch.nn.functional.mse_loss(out_off, target)
-    loss_off.backward()
-    grads_off = [
-        p.grad.detach().clone() if p.grad is not None else None for p in model_off.parameters()
-    ]
-
-    # Compare outputs
-    assert torch.allclose(out_off.float(), out_base.float(), rtol=1e-3, atol=1e-3)
-
-    # Compare gradients parameter-wise
-    for gb, go in zip(grads_base, grads_off):
-        if gb is None and go is None:
-            continue
-        assert gb is not None and go is not None
-        assert torch.allclose(go.float(), gb.float(), rtol=1e-3, atol=1e-3)
+            _reset_cuda_memory()
+            off_outs, off_grads, off_peak = _run_schedule_1f1b_two_microbatches(
+                off_model, enable_offload_reset=True
+            )
+            del off_model
+            _reset_cuda_memory()
+
+            # Correctness (forward outputs + all grads)
+            assert len(off_outs) == len(base_outs) == 2
+            for i in range(2):
+                assert torch.allclose(off_outs[i], base_outs[i], rtol=1e-3, atol=1e-3)
+            assert set(off_grads.keys()) == set(base_grads.keys())
+            for name, gb in base_grads.items():
+                go = off_grads[name]
+                if gb is None or go is None:
+                    assert gb is None and go is None, f"Grad None mismatch for {name}"
+                    continue
+                assert torch.allclose(
+                    go, gb, rtol=1e-3, atol=1e-3
+                ), f"Rank {torch.distributed.get_rank()}: Grad mismatch for {name}"
+
+            # Memory checks (peak allocated during the scheduled 1F1B run)
+            saved_mib = (base_peak - off_peak) / (1024**2)
+            assert saved_mib > 0.0, (
+                f"Expected GPU peak memory reduction for offload_modules={offload_modules}, "
+                f"but got saved={saved_mib:.2f}MiB (base={base_peak/(1024**2):.2f}MiB, "
+                f"off={off_peak/(1024**2):.2f}MiB)"
+            )
+            # If expectation is large enough, enforce approximate match.
+            if expected_offload_mib >= 2.0:
+                rel_err = abs(saved_mib - expected_offload_mib) / max(expected_offload_mib, 1e-6)
+                abs_err = abs(saved_mib - expected_offload_mib)
+                print(
+                    f"Rank {torch.distributed.get_rank()}: Saved {saved_mib:.2f}MiB, expected {expected_offload_mib:.2f}MiB"
+                )
+                if abs_err > DELTA:
+                    assert rel_err <= EPSILON_A2A, (
+                        f"Memory saving mismatch for offload_modules={offload_modules}: "
+                        f"saved={saved_mib:.2f}MiB expected~={expected_offload_mib:.2f}MiB "
+                        f"(rel_err={rel_err:.2f}, abs_err={abs_err:.2f})"
+                    )
+    finally:
+        Utils.destroy_model_parallel()

From 6807df4ff4f97e1b56b978877b891328a25b8b7a Mon Sep 17 00:00:00 2001
From: Hongbin Liu <lhb8125@users.noreply.github.com>
Date: Thu, 22 Jan 2026 21:10:35 +0800
Subject: [PATCH 248/334] [Dev] [fix] Bug fix for offloading in evaluate()
 (#3041)

Signed-off-by: Hongbin Liu <hongbinl@nvidia.com>
---
 .../core/pipeline_parallel/fine_grained_activation_offload.py  | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/megatron/core/pipeline_parallel/fine_grained_activation_offload.py b/megatron/core/pipeline_parallel/fine_grained_activation_offload.py
index 9996c9b57a4..01c3a0c3aa0 100644
--- a/megatron/core/pipeline_parallel/fine_grained_activation_offload.py
+++ b/megatron/core/pipeline_parallel/fine_grained_activation_offload.py
@@ -654,6 +654,9 @@ def pop_forward_chunk(self, name=None):
         while not self._is_warmup and (
             self._cur_forward_chunk is None or self._cur_forward_chunk.finish_all_groups(name)
         ):
+            if self._cached_chunks_index_forward >= len(self._cached_chunks_forward):
+                self._cur_forward_chunk = None
+                break
             self._cur_forward_chunk = self._cached_chunks_forward[self._cached_chunks_index_forward]
             self._cached_chunks_index_forward += 1
             debug_rank(f"new cur_forward_chunk {self._cur_forward_chunk}")

From b3bba3f45d62d4655b2fb32b9d7e9538861cec5d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Mon, 26 Jan 2026 20:29:29 +0100
Subject: [PATCH 249/334] ci: Log node name (#3081) (#3082)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 .github/actions/action.yml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.github/actions/action.yml b/.github/actions/action.yml
index a17b4a9a8c1..5fba1ca1241 100644
--- a/.github/actions/action.yml
+++ b/.github/actions/action.yml
@@ -49,6 +49,10 @@ inputs:
 runs:
   using: 'composite'
   steps:
+    - name: Print node name
+      shell: bash -x -e -u -o pipefail {0}
+      run: echo "node_name=$NODE_NAME" | tee -a "$GITHUB_OUTPUT"
+
     - name: Checkout repository
       uses: actions/checkout@v2
 

From a4e3fb3400fb8be8e2d2090b823ecac20da48b46 Mon Sep 17 00:00:00 2001
From: Deyu Fu <deyuf@nvidia.com>
Date: Tue, 27 Jan 2026 13:49:25 +0800
Subject: [PATCH 250/334] [dev] pull main 260122 (#3045)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Robin Zhang <robinz@nvidia.com>
Signed-off-by: oliver könig <okoenig@nvidia.com>
Signed-off-by: Charlie Truong <chtruong@nvidia.com>
Signed-off-by: Maanu Grover <maanug@nvidia.com>
Signed-off-by: Jennifer Chen <jennifchen@nvidia.com>
Signed-off-by: Antoni-Joan Solergibert <asolergibert@nvidia.com>
Signed-off-by: Lifu Zhang <lifuz@login-lyris01.lyris.clusters.nvidia.com>
Signed-off-by: Keshav Santhanam <ksanthanam@nvidia.com>
Signed-off-by: Youngeun Kwon <youngeunk@nvidia.com>
Signed-off-by: Hongbin Liu <hongbinl@nvidia.com>
Signed-off-by: Pingtian Li <pingtianl@nvidia.com>
Signed-off-by: John St. John <jstjohn@nvidia.com>
Signed-off-by: John St John <jstjohn@nvidia.com>
Signed-off-by: kunlunl <kunlunl@nvidia.com>
Signed-off-by: jianbinc <shjwudp@gmail.com>
Signed-off-by: Deepak Narayanan <dnarayanan@nvidia.com>
Signed-off-by: dimapihtar <dpihtar@gmail.com>
Signed-off-by: Zhongbo Zhu <zhongboz@nvidia.com>
Signed-off-by: Boxiang Wang <boxiangw@nvidia.com>
Signed-off-by: Deyu Fu <deyuf@nvidia.com>
Signed-off-by: Hao Wu <skyw@nvidia.com>
Signed-off-by: Asha Anoosheh <aanoosheh@nvidia.com>
Signed-off-by: Li Tao <lit@nvidia.com>
Signed-off-by: lit <lit@nvidia.com>
Signed-off-by: Hongbin Liu <hongbinl@cw-dfw-cs-001-login-02.cm.cluster>
Signed-off-by: root <root@eos0321.eos.clusters.nvidia.com>
Signed-off-by: tailaim <tailaim@nvidia.com>
Signed-off-by: Parth Mannan <pmannan@nvidia.com>
Signed-off-by: Cory Ye <cye@nvidia.com>
Signed-off-by: Jimmy Zhang <jiemingz@nvidia.com>
Signed-off-by: Jieming Zhang <jiemingz@nvidia.com>
Signed-off-by: Dong Hyuk Chang <donghyukc@nvidia.com>
Co-authored-by: Philip Petrakian <ppetrakian@nvidia.com>
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
Co-authored-by: Siddharth Singh <136645615+sidsingh-nvidia@users.noreply.github.com>
Co-authored-by: Teodor-Dumitru Ene <teodord.ene@gmail.com>
Co-authored-by: Robin Zhang <robinz@nvidia.com>
Co-authored-by: Jared Casper <155158+jaredcasper@users.noreply.github.com>
Co-authored-by: oliver könig <okoenig@nvidia.com>
Co-authored-by: Lawrence McAfee <85179052+lmcafee-nvidia@users.noreply.github.com>
Co-authored-by: Santosh Bhavani <santosh.bhavani@live.com>
Co-authored-by: Charlie Truong <chtruong@nvidia.com>
Co-authored-by: Maanu Grover <109391026+maanug-nv@users.noreply.github.com>
Co-authored-by: Teodor-Dumitru Ene <34819528+tdene@users.noreply.github.com>
Co-authored-by: wdykas <73254672+wdykas@users.noreply.github.com>
Co-authored-by: William Dykas <wdykas@cw-pdx-cs-001-vscode-02.cm.cluster>
Co-authored-by: root <root@gpu-h100-0371.cm.cluster>
Co-authored-by: root <root@gpu-h100-0159.cm.cluster>
Co-authored-by: HaochenYuan <106647990+HaochenYuan@users.noreply.github.com>
Co-authored-by: Philip Petrakian <pgpetrak@gmail.com>
Co-authored-by: Jenny Chen <jennifchen@nvidia.com>
Co-authored-by: Antoni-Joan Solergibert <asolergibert@nvidia.com>
Co-authored-by: Deepak Narayanan <2724038+deepakn94@users.noreply.github.com>
Co-authored-by: Lifu Zhang <lifuz@nvidia.com>
Co-authored-by: Lifu Zhang <lifuz@login-lyris01.lyris.clusters.nvidia.com>
Co-authored-by: Shanmugam Ramasamy <111910568+shanmugamr1992@users.noreply.github.com>
Co-authored-by: Shanmugam Ramasamy <shanmugamr@cw-dfw-cs-001-login-01.cm.cluster>
Co-authored-by: Keshav Santhanam <ksanthanam@nvidia.com>
Co-authored-by: Kan Zhu <kanz@nvidia.com>
Co-authored-by: helen ngo <helenn@nvidia.com>
Co-authored-by: Youngeun Kwon <youngeunk@nvidia.com>
Co-authored-by: Nick Schank <nicolas.schank@gmail.com>
Co-authored-by: Eric Harper <eharper@nvidia.com>
Co-authored-by: Nick Schank <nick@reflection.ai>
Co-authored-by: wineandchord <guoqizhou123123@qq.com>
Co-authored-by: Xin Yao <xiny@nvidia.com>
Co-authored-by: Chenhan D. Yu <5185878+ChenhanYu@users.noreply.github.com>
Co-authored-by: Hongbin Liu <lhb8125@users.noreply.github.com>
Co-authored-by: Pingtian Li <pingtianl@nvidia.com>
Co-authored-by: John St. John <jstjohn@users.noreply.github.com>
Co-authored-by: kwyss-nvidia <kwyss@nvidia.com>
Co-authored-by: ankurv-nvidia <ankurv@nvidia.com>
Co-authored-by: Deepak Narayanan <dnarayanan@nvidia.com>
Co-authored-by: Jon Barker <jbarker@nvidia.com>
Co-authored-by: root <root@gpu-h100-0348.cm.cluster>
Co-authored-by: root <root@gpu-h100-0193.cm.cluster>
Co-authored-by: root <root@gpu-h100-0082.cm.cluster>
Co-authored-by: root <root@gpu-h100-0495.cm.cluster>
Co-authored-by: root <root@gpu-h100-0213.cm.cluster>
Co-authored-by: root <root@gpu-h100-0435.cm.cluster>
Co-authored-by: root <root@gpu-h100-0188.cm.cluster>
Co-authored-by: root <root@gpu-h100-0032.cm.cluster>
Co-authored-by: root <root@gpu-h100-0023.cm.cluster>
Co-authored-by: root <root@gpu-h100-0368.cm.cluster>
Co-authored-by: root <root@gpu-h100-0203.cm.cluster>
Co-authored-by: root <root@gpu-h100-0229.cm.cluster>
Co-authored-by: root <root@gpu-h100-0123.cm.cluster>
Co-authored-by: root <root@gpu-h100-0217.cm.cluster>
Co-authored-by: root <root@gpu-h100-0496.cm.cluster>
Co-authored-by: root <root@gpu-h100-0022.cm.cluster>
Co-authored-by: root <root@gpu-h100-0176.cm.cluster>
Co-authored-by: root <root@gpu-h100-0190.cm.cluster>
Co-authored-by: Yuzhong Wang <yuzhongw@nvidia.com>
Co-authored-by: Kunlun Li <94586211+kunlunl@users.noreply.github.com>
Co-authored-by: jianbinc <shjwudp@gmail.com>
Co-authored-by: Cory Ye <44509866+cspades@users.noreply.github.com>
Co-authored-by: shanmugamr1992 <shanmugamr1992@gmail.com>
Co-authored-by: yobi byte <yobibyte@users.noreply.github.com>
Co-authored-by: Chen Cui <chcui@nvidia.com>
Co-authored-by: Yu Yao <54727607+yaoyu-33@users.noreply.github.com>
Co-authored-by: Mcore Bot <mcore-bot@nvidia.com>
Co-authored-by: Dmytro Pykhtar <37850217+dimapihtar@users.noreply.github.com>
Co-authored-by: dimapihtar <dpihtar@gmail.com>
Co-authored-by: Zhongbo Zhu <42691305+zhongbozhu@users.noreply.github.com>
Co-authored-by: Zijie Yan <zijiey@nvidia.com>
Co-authored-by: Hao Wu <skyw@nvidia.com>
Co-authored-by: Boxiang Wang <boxiangw@nvidia.com>
Co-authored-by: mikail <mkhona@nvidia.com>
Co-authored-by: root <root@gpu-h100-0261.cm.cluster>
Co-authored-by: root <root@gpu-h100-0029.cm.cluster>
Co-authored-by: root <root@gpu-h100-0215.cm.cluster>
Co-authored-by: Asha Anoosheh <aanoosheh@nvidia.com>
Co-authored-by: Hexin Wang <160587990+hexinw-nvidia@users.noreply.github.com>
Co-authored-by: Russell Hewett <rhewett@nvidia.com>
Co-authored-by: Li Tao <lit@nvidia.com>
Co-authored-by: shifangx <shifangx@nvidia.com>
Co-authored-by: Deepak Joshi <151525321+Deepak-J0shi@users.noreply.github.com>
Co-authored-by: Hongbin Liu <hongbinl@cw-dfw-cs-001-login-02.cm.cluster>
Co-authored-by: root <root@eos0321.eos.clusters.nvidia.com>
Co-authored-by: John Kamalu <jkamalu@users.noreply.github.com>
Co-authored-by: Brandon Norick <bnorick@nvidia.com>
Co-authored-by: Pingtian Li <158665726+Wohox@users.noreply.github.com>
Co-authored-by: Duncan Riach <33532941+duncanriach@users.noreply.github.com>
Co-authored-by: xuwchen <79835960+xuwchen@users.noreply.github.com>
Co-authored-by: John St. John <jstjohn@nvidia.com>
Co-authored-by: Parth Mannan <38387286+parthmannan@users.noreply.github.com>
Co-authored-by: tailaim <tailaim@nvidia.com>
Co-authored-by: kunlunl <kunlunl@nvidia.com>
Co-authored-by: Jimmy Zhang <133159885+jiemingz@users.noreply.github.com>
Co-authored-by: Yashaswi Karnati <144376261+yashaswikarnati@users.noreply.github.com>
Co-authored-by: Dong Hyuk Chang <donghyukc@nvidia.com>
---
 .github/CODEOWNERS                            |    2 +
 .github/ISSUE_TEMPLATE/bug_report.md          |    5 +-
 .github/ISSUE_TEMPLATE/feature_request.md     |    3 +
 .github/ISSUE_TEMPLATE/question.md            |    3 +-
 .github/ISSUE_TEMPLATE/regression.md          |    3 +-
 .github/actions/action.yml                    |   31 +-
 .github/copy-pr-bot.yaml                      |    2 +-
 .github/oncall_schedule.json                  |   52 +-
 .github/pull_request_template.md              |    4 +-
 .github/scripts/oncall_manager.py             |  194 +-
 .github/scripts/sync_team_usergroups.py       |  527 ++
 .../workflows/_build_test_publish_wheel.yml   |   20 +-
 .github/workflows/_release_library.yml        |    8 +-
 .github/workflows/auto-update-copy-pr-bot.yml |    5 +-
 .github/workflows/cicd-main.yml               |   54 +-
 .github/workflows/oncall-assign.yml           |   20 +-
 .github/workflows/oncall-rotation.yml         |    7 +-
 .github/workflows/sync-team-usergroups.yml    |   39 +
 .gitlab/scripts/build.sh                      |    3 +
 CONTRIBUTING.md                               |   67 +-
 docker/Dockerfile.ci.dev                      |    8 +-
 docker/Dockerfile.ci.nemo                     |    1 -
 docker/common/install.sh                      |    2 +-
 docker/common/install_source_wheels.sh        |    2 +-
 docs/advanced/index.md                        |    5 +
 docs/api-guide/core/datasets.md               |    4 +
 .../{ => core}/dist_checkpointing.md          |    0
 .../dist_checkpointing.strategies.md          |    0
 docs/api-guide/{ => core}/distributed.md      |    0
 docs/api-guide/{ => core}/fusions.md          |    0
 docs/api-guide/core/index.md                  |   16 +
 .../api-guide/{ => core}/pipeline_parallel.md |    0
 docs/api-guide/{ => core}/tensor_parallel.md  |    0
 docs/api-guide/{ => core}/transformer.md      |    0
 docs/api-guide/datasets.md                    |    5 -
 docs/api-guide/datasets_readme.md             |    3 -
 docs/api-guide/index.md                       |   26 +-
 docs/api-guide/internal/index.md              |   10 +
 .../num_microbatches_calculator.md            |    0
 .../optimizer_param_scheduler.md              |    0
 docs/api-guide/models/index.md                |   12 +
 docs/api-guide/{ => models}/models.bert.md    |    0
 docs/api-guide/{ => models}/models.gpt.md     |    0
 docs/api-guide/{ => models}/models.md         |    0
 docs/api-guide/{ => models}/models.t5.md      |    0
 docs/api-guide/moe.md                         |    5 -
 docs/api-guide/optimizer_cpu_offload.md       |    5 -
 docs/api-guide/tokenizers.md                  |  137 -
 docs/conf.py                                  |    2 +-
 docs/developer/contribute.md                  |   61 +
 docs/developer/generate_docs.md               |   13 +
 docs/developer/oncall.md                      |   48 +
 docs/developer/submit.md                      |   16 +
 docs/discussions/README.md                    |    8 +-
 docs/get-started/quickstart.md                |   69 +
 docs/index.md                                 |   84 +-
 docs/llama_mistral.md                         |   16 +-
 docs/models/index.md                          |   17 +
 docs/models/llms.md                           |   57 +
 docs/models/multimodal.md                     |   61 +
 docs/user-guide/data-preparation.md           |   70 +
 .../features}/context_parallel.md             |    4 +-
 .../features}/custom_fsdp.md                  |   10 +-
 .../features}/dist_optimizer.md               |    4 +-
 .../fine_grained_activation_offloading.md     |   31 +
 docs/user-guide/features/index.md             |   17 +
 docs/user-guide/features/megatron_energon.md  |  132 +
 docs/user-guide/features/megatron_rl.md       |   46 +
 docs/user-guide/features/moe.md               |   12 +
 .../features}/multi_latent_attention.md       |    0
 .../features}/multi_token_prediction.md       |   27 +-
 .../features/optimizer_cpu_offload.md         |    4 +
 .../features}/pipeline_parallel_layout.md     |    0
 docs/user-guide/features/tokenizers.md        |  230 +
 docs/user-guide/index.md                      |    7 +-
 docs/user-guide/parallelism-guide.md          |  211 +
 docs/user-guide/training-examples.md          |  146 +
 .../inference/gpt/gpt_dynamic_inference.py    |   27 +-
 .../gpt/gpt_dynamic_inference_12b.sh          |    5 +
 .../gpt/gpt_dynamic_inference_357m.sh         |    5 +
 .../gpt_dynamic_inference_with_coordinator.py |    2 +-
 .../inference/gpt/gpt_static_inference.py     |    8 +-
 examples/inference/gpt/utils.py               |   29 +-
 examples/multimodal/layer_specs.py            |   37 +-
 .../Dockerfile                                |    2 +-
 examples/multimodal/nvlm/internvit.py         |   21 +-
 examples/multimodal/radio/radio_g.py          |   17 +-
 examples/post_training/modelopt/Dockerfile    |    2 +-
 examples/post_training/modelopt/README.md     |    6 +-
 .../conf/nvidia/NVIDIA-Nemotron-Nano-9B-v2.sh |    1 +
 .../nvidia/Nemotron-H-47B-Reasoning-128K.sh   |   39 +
 .../conf/nvidia/Nemotron-H-4B-Instruct.sh     |    1 +
 .../conf/nvidia/Nemotron-H-56B-Base-8K.sh     |   41 +
 .../conf/nvidia/Nemotron-H-8B-Base-8K.sh      |    1 +
 .../conf/nvidia/Nemotron-Mini-4B-Instruct.sh  |    1 +
 examples/post_training/modelopt/convert.sh    |    0
 .../post_training/modelopt/convert_model.py   |   42 +-
 examples/post_training/modelopt/export.py     |   27 +-
 examples/post_training/modelopt/export.sh     |    0
 examples/post_training/modelopt/finetune.py   |   90 +-
 examples/post_training/modelopt/generate.py   |    8 +
 examples/post_training/modelopt/generate.sh   |    0
 examples/post_training/modelopt/mmlu.py       |   59 +-
 examples/post_training/modelopt/mmlu.sh       |    0
 .../modelopt/offline_feature_extract.sh       |    0
 examples/post_training/modelopt/prune.py      |   52 +-
 examples/post_training/modelopt/quantize.py   |  205 +-
 examples/post_training/modelopt/quantize.sh   |    0
 .../post_training/modelopt/requirements.txt   |    5 +-
 .../post_training/modelopt/speculative.md     |    2 +-
 examples/post_training/modelopt/validate.sh   |    0
 examples/rl/environment_configs/gsm8k.yaml    |    1 +
 .../rl/environment_configs/gsm8k_nanov3.yaml  |   10 +
 examples/rl/environments/math/gsm8k_agent.py  |   19 +-
 examples/rl/environments/math/math_agent.py   |  118 +-
 examples/rl/model_configs/common.sh           |    1 +
 .../rl/model_configs/nemotron5p5_12b_H.sh     |    4 +-
 examples/rl/model_configs/nemotron6_3b_moe.sh |  128 +
 .../rl/model_configs/qwen3_30b_a3b_moe.sh     |   97 +
 gpt_builders.py                               |   21 +-
 mamba_builders.py                             |    1 +
 megatron/core/QuickStart.md                   |    2 +-
 megatron/core/datasets/blended_dataset.py     |   35 +-
 .../blended_megatron_dataset_builder.py       |   50 +-
 .../blended_megatron_dataset_config.py        |   21 +
 megatron/core/datasets/gpt_dataset.py         |  101 +-
 megatron/core/datasets/helpers.cpp            |    3 +-
 megatron/core/datasets/indexed_dataset.py     |  182 +-
 megatron/core/datasets/readme.md              |   24 +-
 .../strategies/async_utils.py                 |    9 +
 .../dist_checkpointing/strategies/base.py     |   15 +-
 .../strategies/tensorstore.py                 |  149 -
 .../strategies/two_stage.py                   |  266 -
 .../dist_checkpointing/strategies/zarr.py     |  357 --
 megatron/core/distributed/fsdp/src/README.md  |  163 +-
 .../fsdp/src/megatron_fsdp/fully_shard.py     |   20 +-
 .../fsdp/src/megatron_fsdp/megatron_fsdp.py   |   42 +-
 .../fsdp/src/megatron_fsdp/mixed_precision.py |   26 +
 .../megatron_fsdp/param_and_grad_buffer.py    |   43 +-
 .../core/distributed/param_and_grad_buffer.py |   52 +-
 megatron/core/extensions/kitchen.py           |   22 +-
 .../core/extensions/transformer_engine.py     |   50 +-
 megatron/core/hyper_comm_grid.py              |   16 +
 .../core/inference/batch_dimensions_utils.py  |   90 +-
 .../torch_symm_triton/__init__.py             |    1 +
 .../torch_symm_triton/fused_collectives.py    |  280 +
 .../torch_symm_triton/multimem_asm.py         |   54 +
 .../attention_context/mamba_metadata.py       |  180 +-
 .../attention_context/triton/tensor_ops.py    |  462 ++
 .../contexts/dynamic_block_allocator.py       |   30 +-
 .../inference/contexts/dynamic_context.py     |  564 +-
 .../engines/async_zmq_communicator.py         |   14 +-
 .../core/inference/engines/dynamic_engine.py  |  152 +-
 megatron/core/inference/inference_client.py   |    7 +-
 megatron/core/inference/inference_request.py  |   24 +-
 .../abstract_model_inference_wrapper.py       |   12 +-
 .../gpt/gpt_inference_wrapper.py              |    6 +-
 megatron/core/inference/sampling_params.py    |    7 +-
 .../text_generation_controller.py             |   42 +-
 .../dynamic_text_gen_server/__init__.py       |    3 +
 .../endpoints/__init__.py                     |   10 +
 .../endpoints/chat_completions.py             |  158 +
 .../endpoints/common.py                       |   14 +
 .../endpoints/completions.py                  |  214 +
 .../dynamic_text_gen_server/flask_server.py   |   76 +
 .../dynamic_text_gen_server/tokenization.py   |  110 +
 megatron/core/inference/unified_memory.py     |  336 +-
 megatron/core/models/T5/t5_spec.py            |   22 +-
 megatron/core/models/backends.py              |    4 +-
 megatron/core/models/bert/bert_layer_specs.py |   10 +-
 .../common/embeddings/rotary_pos_embedding.py |   33 +-
 .../embeddings/yarn_rotary_pos_embedding.py   |   35 +-
 .../common/model_chunk_schedule_plan.py       |    9 +-
 ...rimental_attention_variant_module_specs.py |   60 +-
 .../core/models/gpt/fine_grained_callables.py |  108 +-
 megatron/core/models/gpt/gpt_layer_specs.py   |    7 +-
 megatron/core/models/gpt/gpt_model.py         |   41 +-
 .../heterogeneous_layer_specs.py              |   14 +-
 megatron/core/models/mamba/mamba_model.py     |   25 +-
 .../core/models/multimodal/llava_model.py     |   31 +-
 megatron/core/models/retro/decoder_spec.py    |   13 +-
 megatron/core/models/retro/encoder_spec.py    |   13 +-
 .../core/optimizer/cpu_offloading/README.md   |    2 +-
 megatron/core/optimizer/muon.py               |    3 +-
 megatron/core/optimizer/optimizer_config.py   |    3 +-
 megatron/core/parallel_state.py               |    2 +-
 .../fine_grained_activation_offload.py        |   29 +-
 megatron/core/pipeline_parallel/utils.py      |    4 +-
 megatron/core/rerun_state_machine.py          |    7 +-
 megatron/core/resharding/__init__.py          |   16 +
 .../core/resharding/copy_services/__init__.py |    7 +
 .../core/resharding/copy_services/base.py     |   25 +
 .../copy_services/gloo_copy_service.py        |  146 +
 .../copy_services/nccl_copy_service.py        |  126 +
 megatron/core/resharding/execution.py         |   66 +
 megatron/core/resharding/planner.py           |  345 +
 megatron/core/resharding/refit.py             |   85 +
 megatron/core/resharding/utils.py             |  361 ++
 megatron/core/ssm/mamba_block.py              |    9 +-
 megatron/core/ssm/mamba_context_parallel.py   |  120 +-
 megatron/core/ssm/mamba_layer.py              |   12 +-
 megatron/core/ssm/mamba_mixer.py              |  379 +-
 .../core/tensor_parallel/inference_layers.py  |  108 +-
 megatron/core/tensor_parallel/layers.py       |   11 +-
 megatron/core/timers.py                       |   11 +
 megatron/core/transformer/attention.py        |  346 +-
 megatron/core/transformer/cuda_graphs.py      |  190 +-
 .../core/transformer/dot_product_attention.py |   16 +-
 .../experimental_attention_variant/dsa.py     |   10 +-
 megatron/core/transformer/identity_op.py      |   20 +-
 megatron/core/transformer/mlp.py              |    9 +-
 megatron/core/transformer/module.py           |   42 +-
 megatron/core/transformer/moe/README.md       |   20 +
 megatron/core/transformer/moe/experts.py      |   53 +-
 megatron/core/transformer/moe/fused_a2a.py    |    1 -
 megatron/core/transformer/moe/moe_layer.py    |   47 +-
 megatron/core/transformer/moe/moe_utils.py    |   15 +-
 megatron/core/transformer/moe/router.py       |    1 -
 .../core/transformer/moe/shared_experts.py    |    2 +-
 .../core/transformer/moe/token_dispatcher.py  |    7 +-
 .../transformer/multi_latent_attention.py     |   95 +-
 .../transformer/multi_token_prediction.py     |   58 +-
 .../pipeline_parallel_layer_layout.py         |    2 +-
 megatron/core/transformer/spec_utils.py       |   27 +-
 .../core/transformer/transformer_block.py     |   31 +-
 .../core/transformer/transformer_config.py    |   24 +-
 .../core/transformer/transformer_layer.py     |  118 +-
 megatron/core/typed_torch.py                  |   50 +
 megatron/core/utils.py                        |   37 +-
 megatron/post_training/arguments.py           |   24 +-
 megatron/post_training/checkpointing.py       |    5 +-
 megatron/post_training/model_builder.py       |    5 +-
 megatron/post_training/utils.py               |   80 +-
 megatron/rl/agent/api.py                      |    5 +
 megatron/rl/agent/weighted_multi_task.py      |    8 +-
 megatron/rl/inference/megatron.py             |   33 +-
 megatron/rl/parallel_utils.py                 |  171 +
 megatron/rl/rl_utils.py                       |  283 +-
 megatron/rl/sequence_packing_utils.py         |   46 +-
 .../inference/inference_interface_server.py   |    4 +-
 megatron/training/__init__.py                 |    2 +-
 megatron/training/arguments.py                |  284 +-
 megatron/training/checkpointing.py            |   37 +-
 megatron/training/common_config.py            |   56 +
 megatron/training/datasets/data_samplers.py   |   17 +-
 megatron/training/datasets/sft_dataset.py     |  197 +-
 megatron/training/ft_integration.py           |   25 +-
 megatron/training/initialize.py               |    6 +
 megatron/training/resilience_config.py        |   24 +
 megatron/training/tokenizer/sft_tokenizer.py  |    5 -
 megatron/training/training.py                 |  354 +-
 .../{config.py => training_config.py}         |   79 +
 megatron/training/utils.py                    |   60 +-
 pretrain_gpt.py                               |   30 +-
 pretrain_mamba.py                             |  162 +-
 pyproject.toml                                |   11 +-
 .../test_grpo_training_loop.py                |   38 +-
 .../test_inference_regular_pipeline.py        |   70 +-
 .../shell_test_utils/run_batch_ci_tests.sh    |  255 +
 .../shell_test_utils/run_ci_test.sh           |   30 +-
 .../shell_test_utils/start_interactive_job.sh |    1 +
 .../golden_values_dev_dgx_h100.json           |  420 +-
 .../golden_values_dev_dgx_h100.json           |  428 +-
 .../golden_values_dev_dgx_h100.json           |  412 +-
 .../golden_values_dev_dgx_h100.json           |  340 +-
 .../golden_values_dev_dgx_h100.json           |  910 +--
 .../golden_values_dev_dgx_h100_2nd.json       |  537 ++
 .../golden_values_dev_dgx_h100.json           |  836 +--
 .../golden_values_dev_dgx_h100_2nd.json       |  537 ++
 .../golden_values_dev_dgx_h100.json           |  468 +-
 .../golden_values_dev_dgx_gb200.json          |  162 +
 .../golden_values_dev_dgx_h100.json           |   28 +-
 .../golden_values_dev_dgx_gb200.json          |  162 +
 .../golden_values_dev_dgx_h100.json           |   26 +-
 .../golden_values_dev_dgx_h100.json           |   42 +
 .../golden_values_dev_dgx_gb200.json          |  287 +
 .../golden_values_dev_dgx_h100.json           |  100 +-
 .../golden_values_dev_dgx_gb200.json          |  287 +
 .../golden_values_dev_dgx_h100.json           |  300 +-
 .../golden_values_lts_dgx_a100.json           |  100 +-
 .../golden_values_dev_dgx_gb200.json          |  537 ++
 .../golden_values_dev_dgx_h100.json           |  600 +-
 .../golden_values_dev_dgx_h100_2nd.json       |  537 ++
 .../golden_values_lts_dgx_a100.json           |  200 +-
 .../golden_values_lts_dgx_a100_2nd.json       |  537 ++
 .../golden_values_dev_dgx_gb200.json          |  537 ++
 .../golden_values_dev_dgx_h100.json           |  600 +-
 .../golden_values_dev_dgx_h100_2nd.json       |  537 ++
 .../golden_values_lts_dgx_a100.json           |  200 +-
 .../golden_values_lts_dgx_a100_2nd.json       |  537 ++
 .../golden_values_dev_dgx_gb200.json          |  537 ++
 .../golden_values_dev_dgx_h100.json           |  600 +-
 .../golden_values_dev_dgx_h100_2nd.json       |  537 ++
 .../golden_values_dev_dgx_gb200.json          |  537 ++
 .../golden_values_dev_dgx_h100.json           |  600 +-
 .../golden_values_dev_dgx_h100_2nd.json       |  537 ++
 .../golden_values_lts_dgx_a100.json           |  200 +-
 .../golden_values_lts_dgx_a100_2nd.json       |  537 ++
 .../golden_values_dev_dgx_gb200.json          |  537 ++
 .../golden_values_dev_dgx_h100.json           |  600 +-
 .../golden_values_dev_dgx_h100_2nd.json       |  537 ++
 .../golden_values_dev_dgx_gb200.json          |  537 ++
 .../golden_values_dev_dgx_h100.json           |  600 +-
 .../golden_values_dev_dgx_h100_2nd.json       |  537 ++
 .../golden_values_lts_dgx_a100.json           |  200 +-
 .../golden_values_lts_dgx_a100_2nd.json       |  537 ++
 .../golden_values_dev_dgx_gb200.json          |  537 ++
 .../golden_values_dev_dgx_h100.json           |  600 +-
 .../golden_values_dev_dgx_h100_2nd.json       |  537 ++
 .../golden_values_dev_dgx_gb200.json          |  537 ++
 .../golden_values_dev_dgx_h100.json           |  600 +-
 .../golden_values_dev_dgx_h100_2nd.json       |  537 ++
 .../golden_values_lts_dgx_a100.json           |  200 +-
 .../golden_values_lts_dgx_a100_2nd.json       |  537 ++
 .../golden_values_dev_dgx_gb200.json          |  287 +
 .../golden_values_dev_dgx_h100.json           |  300 +-
 .../golden_values_lts_dgx_a100.json           |  100 +-
 .../golden_values_dev_dgx_gb200.json          |  298 +-
 .../golden_values_dev_dgx_h100.json           |  300 +-
 .../golden_values_lts_dgx_a100.json           |  298 +-
 .../golden_values_dev_dgx_gb200.json          |  537 ++
 .../golden_values_dev_dgx_h100.json           |  916 +--
 .../golden_values_lts_dgx_a100.json           |  540 +-
 .../golden_values_dev_dgx_gb200.json          |  537 ++
 .../golden_values_dev_dgx_h100.json           |  600 +-
 .../golden_values_dev_dgx_h100_2nd.json       |  537 ++
 .../golden_values_lts_dgx_a100.json           |  202 +-
 .../golden_values_lts_dgx_a100_2nd.json       |  537 ++
 .../golden_values_dev_dgx_gb200.json          |  537 ++
 .../golden_values_dev_dgx_h100.json           |  600 +-
 .../golden_values_dev_dgx_h100_2nd.json       |  537 ++
 .../golden_values_lts_dgx_a100.json           |  202 +-
 .../golden_values_lts_dgx_a100_2nd.json       |  537 ++
 .../golden_values_dev_dgx_gb200.json          |  537 ++
 .../golden_values_lts_dgx_a100.json           |  198 +-
 .../golden_values_dev_dgx_gb200.json          |  537 ++
 .../golden_values_lts_dgx_a100.json           |  200 +-
 .../golden_values_dev_dgx_gb200.json          |  537 ++
 .../golden_values_lts_dgx_a100.json           |  200 +-
 .../golden_values_dev_dgx_gb200.json          |  287 +
 .../golden_values_dev_dgx_h100.json           |  100 +-
 .../golden_values_lts_dgx_a100.json           |  100 +-
 .../golden_values_dev_dgx_gb200.json          |  287 +
 .../golden_values_dev_dgx_h100.json           |  300 +-
 .../golden_values_lts_dgx_a100.json           |  298 +-
 .../golden_values_dev_dgx_gb200.json          |  287 +
 .../golden_values_dev_dgx_h100.json           |  300 +-
 .../golden_values_lts_dgx_a100.json           |  298 +-
 .../model_config.yaml                         |    2 +-
 .../golden_values_dev_dgx_gb200.json          |  537 ++
 .../golden_values_dev_dgx_h100.json           |  972 +--
 .../golden_values_dev_dgx_h100_2nd.json       |  537 ++
 .../golden_values_lts_dgx_a100.json           |  538 +-
 .../golden_values_lts_dgx_a100_2nd.json       |  537 ++
 .../golden_values_dev_dgx_gb200.json          |  537 ++
 .../golden_values_dev_dgx_h100.json           |  402 +-
 .../golden_values_dev_dgx_h100_2nd.json       |  537 ++
 .../golden_values_lts_dgx_a100.json           |  200 +-
 .../golden_values_lts_dgx_a100_2nd.json       |  537 ++
 .../golden_values_dev_dgx_gb200.json          |  287 +
 .../golden_values_dev_dgx_h100.json           |  300 +-
 .../golden_values_dev_dgx_gb200.json          |  287 +
 .../golden_values_dev_dgx_h100.json           |  300 +-
 .../golden_values_dev_dgx_gb200.json          |  287 +
 .../golden_values_dev_dgx_h100.json           |  488 +-
 .../golden_values_lts_dgx_a100.json           |  307 +-
 .../golden_values_dev_dgx_gb200.json          |  287 +
 .../golden_values_dev_dgx_h100.json           |  300 +-
 .../golden_values_dev_dgx_gb200.json          |  287 +
 .../golden_values_dev_dgx_h100.json           |  482 +-
 .../golden_values_lts_dgx_a100.json           |  288 +-
 .../golden_values_dev_dgx_gb200.json          |  287 +
 .../golden_values_dev_dgx_h100.json           |  300 +-
 .../golden_values_dev_dgx_gb200.json          |  287 +
 .../golden_values_dev_dgx_h100.json           |  476 +-
 .../golden_values_lts_dgx_a100.json           |  288 +-
 .../golden_values_dev_dgx_gb200.json          |  287 +
 .../golden_values_dev_dgx_h100.json           |  490 +-
 .../golden_values_lts_dgx_a100.json           |  288 +-
 .../golden_values_dev_dgx_gb200.json          |  287 +
 .../golden_values_dev_dgx_h100.json           |  474 +-
 .../golden_values_lts_dgx_a100.json           |  315 +-
 .../golden_values_dev_dgx_gb200.json          |  287 +
 .../golden_values_dev_dgx_h100.json           |  336 +-
 .../golden_values_dev_dgx_h100_2nd.json       |  287 +
 .../golden_values_dev_dgx_gb200.json          |  537 ++
 .../golden_values_dev_dgx_h100.json           |  600 +-
 .../golden_values_dev_dgx_h100_2nd.json       |  537 ++
 .../golden_values_lts_dgx_a100.json           |  200 +-
 .../golden_values_lts_dgx_a100_2nd.json       |  537 ++
 .../golden_values_dev_dgx_gb200.json          |  537 ++
 .../golden_values_dev_dgx_h100.json           |  976 +--
 .../golden_values_dev_dgx_h100_2nd.json       |  537 ++
 .../golden_values_lts_dgx_a100.json           |  538 +-
 .../golden_values_lts_dgx_a100_2nd.json       |  537 ++
 .../golden_values_dev_dgx_gb200.json          |  537 ++
 .../golden_values_dev_dgx_h100.json           |  970 +--
 .../golden_values_dev_dgx_h100_2nd.json       |  537 ++
 .../golden_values_lts_dgx_a100.json           |  613 +-
 .../golden_values_lts_dgx_a100_2nd.json       |  537 ++
 .../golden_values_dev_dgx_gb200.json          |  537 ++
 .../golden_values_dev_dgx_h100.json           |  600 +-
 .../golden_values_dev_dgx_h100_2nd.json       |  537 ++
 .../golden_values_lts_dgx_a100.json           |  200 +-
 .../golden_values_lts_dgx_a100_2nd.json       |  537 ++
 .../golden_values_dev_dgx_gb200.json          |  537 ++
 .../golden_values_dev_dgx_h100.json           |  600 +-
 .../golden_values_dev_dgx_h100_2nd.json       |  537 ++
 .../golden_values_lts_dgx_a100.json           |  200 +-
 .../golden_values_lts_dgx_a100_2nd.json       |  537 ++
 .../golden_values_dev_dgx_gb200.json          |  537 ++
 .../golden_values_lts_dgx_a100.json           |  200 +-
 .../golden_values_lts_dgx_a100_2nd.json       |  537 ++
 .../golden_values_dev_dgx_gb200.json          |  537 ++
 .../golden_values_dev_dgx_h100.json           |  600 +-
 .../golden_values_dev_dgx_h100_2nd.json       |  537 ++
 .../golden_values_lts_dgx_a100.json           |  613 +-
 .../golden_values_lts_dgx_a100_2nd.json       |  537 ++
 .../golden_values_dev_dgx_gb200.json          |  537 ++
 .../golden_values_dev_dgx_h100.json           |  994 +--
 .../golden_values_dev_dgx_gb200.json          |  287 +
 .../golden_values_dev_dgx_h100.json           |  300 +-
 .../golden_values_lts_dgx_a100.json           |  200 +-
 .../golden_values_dev_dgx_gb200.json          |  537 ++
 .../golden_values_dev_dgx_h100.json           |  600 +-
 .../golden_values_dev_dgx_h100_2nd.json       |  537 ++
 .../golden_values_lts_dgx_a100.json           |  400 +-
 .../golden_values_lts_dgx_a100_2nd.json       |  537 ++
 .../golden_values_dev_dgx_gb200.json          |  537 ++
 .../golden_values_dev_dgx_h100.json           |  600 +-
 .../golden_values_dev_dgx_h100_2nd.json       |  537 ++
 .../golden_values_lts_dgx_a100.json           |  400 +-
 .../golden_values_lts_dgx_a100_2nd.json       |  537 ++
 .../golden_values_dev_dgx_gb200.json          |  537 ++
 .../golden_values_dev_dgx_h100.json           |  598 +-
 .../golden_values_dev_dgx_h100_2nd.json       |  537 ++
 .../golden_values_lts_dgx_a100.json           |  200 +-
 .../golden_values_lts_dgx_a100_2nd.json       |  537 ++
 .../golden_values_dev_dgx_gb200.json          |  537 ++
 .../golden_values_dev_dgx_h100.json           |  600 +-
 .../golden_values_dev_dgx_h100_2nd.json       |  537 ++
 .../golden_values_lts_dgx_a100.json           |  764 +--
 .../golden_values_lts_dgx_a100_2nd.json       |  537 ++
 .../golden_values_dev_dgx_gb200.json          |  537 ++
 .../golden_values_dev_dgx_h100.json           |  200 +-
 .../golden_values_dev_dgx_h100_2nd.json       |  537 ++
 .../golden_values_lts_dgx_a100.json           |  200 +-
 .../golden_values_lts_dgx_a100_2nd.json       |  537 ++
 .../golden_values_dev_dgx_gb200.json          |  287 +
 .../golden_values_dev_dgx_h100.json           |  248 +-
 .../golden_values_lts_dgx_a100.json           |  110 +-
 .../golden_values_dev_dgx_a100.json           |  538 +-
 .../golden_values_dev_dgx_a100_2nd.json       |  537 ++
 .../golden_values_dev_dgx_gb200.json          |  537 ++
 .../golden_values_lts_dgx_a100.json           |  352 +-
 .../golden_values_lts_dgx_a100_2nd.json       |  537 ++
 .../golden_values_dev_dgx_gb200.json          |  287 +
 .../golden_values_dev_dgx_h100.json           |  244 +-
 .../golden_values_lts_dgx_a100.json           |  212 +-
 .../golden_values_dev_dgx_a100.json           |  485 +-
 .../golden_values_dev_dgx_a100_2nd.json       |  537 ++
 .../golden_values_dev_dgx_gb200.json          |  537 ++
 .../golden_values_lts_dgx_a100.json           |  348 +-
 .../golden_values_lts_dgx_a100_2nd.json       |  537 ++
 .../golden_values_dev_dgx_gb200.json          |  537 ++
 .../golden_values_dev_dgx_h100.json           |  434 +-
 .../golden_values_dev_dgx_h100_2nd.json       |  537 ++
 .../golden_values_lts_dgx_a100.json           |  232 +-
 .../golden_values_lts_dgx_a100_2nd.json       |  537 ++
 .../golden_values_dev_dgx_gb200.json          |  537 ++
 .../golden_values_dev_dgx_h100.json           |  434 +-
 .../golden_values_dev_dgx_h100_2nd.json       |  537 ++
 .../golden_values_lts_dgx_a100.json           |  206 +-
 .../golden_values_lts_dgx_a100_2nd.json       |  537 ++
 .../golden_values_dev_dgx_gb200.json          |    1 +
 .../golden_values_dev_dgx_h100.json           |  228 +-
 .../golden_values_dev_dgx_h100.json           |  225 +-
 .../golden_values_dev_dgx_h100.json           |    3 +-
 .../golden_values_dev_dgx_h100.json           |  234 +-
 .../model_config.yaml                         |    4 +
 .../golden_values_dev_dgx_h100.json           |  414 +-
 .../model_config.yaml                         |    3 +
 .../golden_values_dev_dgx_h100.json           |  362 +-
 .../env_config.yaml                           |    5 +
 .../golden_values_dev_dgx_h100.json           |  173 +
 .../model_config.yaml                         |   83 +
 .../env_config.yaml                           |    5 +
 .../golden_values_dev_dgx_h100.json           |  287 +
 .../model_config.yaml                         |   80 +
 .../env_config.yaml                           |    5 +
 .../golden_values_dev_dgx_h100.json           |  173 +
 .../model_config.yaml                         |   84 +
 .../model_config.yaml                         |    1 +
 .../golden_values_dev_dgx_h100.json           | 5586 +++++++++++++++++
 .../model_config.yaml                         |   76 +
 .../golden_values_dev_dgx_h100.json           |  402 +-
 .../golden_values_dev_dgx_h100.json           |  498 +-
 .../golden_values_dev_dgx_h100.json           |  500 +-
 .../golden_values_dev_dgx_a100.json           |  315 +-
 .../golden_values_dev_dgx_h100.json           |  498 +-
 .../golden_values_lts_dgx_a100.json           |  288 +-
 .../golden_values_dev_dgx_a100.json           |  288 +-
 .../golden_values_dev_dgx_h100.json           |  500 +-
 .../golden_values_lts_dgx_a100.json           |  498 +-
 .../golden_values_dev_dgx_h100.json           |  996 +--
 .../golden_values_dev_dgx_h100_2nd.json       |  537 ++
 .../golden_values_dev_dgx_h100.json           |  600 +-
 .../golden_values_dev_dgx_h100_2nd.json       |  537 ++
 .../golden_values_dev_dgx_h100.json           |  600 +-
 .../golden_values_dev_dgx_h100_2nd.json       |  537 ++
 .../golden_values_dev_dgx_h100.json           |  300 +-
 .../golden_values_lts_dgx_a100.json           |  102 +-
 .../golden_values_dev_dgx_h100.json           |  300 +-
 .../golden_values_lts_dgx_a100.json           |  102 +-
 .../golden_values_dev_dgx_h100.json           |  300 +-
 .../golden_values_dev_dgx_h100.json           |  490 +-
 .../golden_values_dev_dgx_h100.json           |  396 +-
 .../golden_values_dev_dgx_h100.json           |  600 +-
 .../golden_values_dev_dgx_h100_2nd.json       |  537 ++
 .../golden_values_dev_dgx_a100.json           |  288 +-
 .../golden_values_dev_dgx_h100.json           |  440 +-
 .../golden_values_dev_dgx_a100.json           |  288 +-
 .../golden_values_dev_dgx_h100.json           |  440 +-
 .../golden_values_dev_dgx_a100.json           |  288 +-
 .../golden_values_dev_dgx_h100.json           |  300 +-
 .../golden_values_lts_dgx_a100.json           |  300 +-
 .../golden_values_dev_dgx_a100.json           |  288 +-
 .../golden_values_dev_dgx_h100.json           |  300 +-
 .../golden_values_lts_dgx_a100.json           |  300 +-
 .../golden_values_dev_dgx_a100.json           |  288 +-
 .../golden_values_dev_dgx_h100.json           |  300 +-
 .../golden_values_lts_dgx_a100.json           |  352 +-
 .../golden_values_dev_dgx_h100.json           |  996 +--
 .../golden_values_dev_dgx_h100_2nd.json       |  537 ++
 .../golden_values_dev_dgx_h100.json           |  986 +--
 .../golden_values_dev_dgx_h100_2nd.json       |  537 ++
 .../golden_values_dev_dgx_h100.json           |  200 +-
 .../golden_values_dev_dgx_h100_2nd.json       |  537 ++
 .../model_config.yaml                         |    2 +-
 .../golden_values_dev_dgx_h100.json           |  494 +-
 .../model_config.yaml                         |    2 +-
 .../golden_values_dev_dgx_h100.json           |  398 +-
 .../model_config.yaml                         |    3 +-
 .../golden_values_dev_dgx_h100.json           |  994 +--
 .../golden_values_dev_dgx_h100_2nd.json       |  537 ++
 .../golden_values_dev_dgx_h100.json           |  598 +-
 .../golden_values_dev_dgx_h100_2nd.json       |  644 ++
 .../env_config.yaml                           |    5 +
 .../golden_values_dev_dgx_h100.json           |   62 +
 .../model_config.yaml                         |  131 +
 .../model_config.yaml                         |    1 +
 .../golden_values_dev_dgx_h100.json           |  300 +-
 .../golden_values_dev_dgx_h100.json           |  486 +-
 .../golden_values_dev_dgx_h100.json           |   52 +-
 .../golden_values_dev_dgx_a100.json           |  538 +-
 .../golden_values_dev_dgx_a100_2nd.json       |  537 ++
 .../golden_values_dev_dgx_h100.json           |  200 +-
 .../golden_values_dev_dgx_h100_2nd.json       |  537 ++
 .../golden_values_dev_dgx_a100.json           |  538 +-
 .../golden_values_dev_dgx_h100.json           |  600 +-
 .../golden_values_dev_dgx_a100.json           |  538 +-
 .../golden_values_dev_dgx_h100.json           |  876 +--
 .../golden_values_dev_dgx_h100.json           |  600 +-
 .../golden_values_dev_dgx_h100.json           |  600 +-
 .../golden_values_dev_dgx_h100_2nd.json       |  537 ++
 .../golden_values_dev_dgx_a100.json           |  538 +-
 .../golden_values_dev_dgx_h100.json           |  200 +-
 .../golden_values_dev_dgx_a100.json           |  538 +-
 .../golden_values_dev_dgx_a100_2nd.json       |  537 ++
 .../golden_values_dev_dgx_h100.json           |  200 +-
 .../golden_values_dev_dgx_h100_2nd.json       |  537 ++
 .../golden_values_dev_dgx_a100.json           |  538 +-
 .../golden_values_dev_dgx_h100.json           |  200 +-
 .../golden_values_dev_dgx_h100.json           |  200 +-
 .../golden_values_dev_dgx_h100.json           |  200 +-
 .../golden_values_dev_dgx_h100_2nd.json       |  537 ++
 .../python_scripts/auto_reminder_github.py    |    4 +-
 .../python_scripts/download_golden_values.py  |   47 +-
 .../python_scripts/recipe_parser.py           |    2 +-
 tests/test_utils/recipes/gpt-gb200.yaml       |  287 +-
 tests/test_utils/recipes/gpt-grpo.yaml        |   19 +-
 .../recipes/mamba-dynamic-inference.yaml      |    7 +-
 .../recipes/mamba-static-inference.yaml       |    4 +-
 tests/test_utils/recipes/moe-grpo.yaml        |   61 +
 tests/test_utils/recipes/moe.yaml             |   82 +-
 tests/unit_tests/conftest.py                  |    8 -
 tests/unit_tests/data/test_builder.py         |  234 +
 .../models/test_bert_model.py                 |    4 +-
 .../test_layer_wise_optimizer.py              |   13 +-
 .../test_pipeline_parallel_layout.py          |    1 +
 .../dist_checkpointing/test_serialization.py  |   18 -
 tests/unit_tests/dist_checkpointing/utils.py  |    1 +
 .../fsdp/test_mfsdp_fully_shard.py            |  164 +-
 .../test_grad_sync_with_expert_parallel.py    |   43 +-
 .../distributed/test_param_and_grad_buffer.py |   32 +-
 .../attention_metadata/test_tensor_ops.py     |  302 +
 .../contexts/test_dynamic_context.py          |  105 +-
 .../inference/engines/test_dynamic_engine.py  |   38 +-
 tests/unit_tests/inference/test_stop_words.py |  226 +
 .../inference/test_wandb_logging.py           |   12 +-
 .../test_simple_text_generation_controller.py |    2 +
 tests/unit_tests/models/test_gpt_model.py     |  112 +-
 tests/unit_tests/models/test_mamba_model.py   |  189 +-
 .../unit_tests/models/test_mamba_moe_model.py |  572 ++
 .../pipeline_parallel/test_pipeline_layout.py |    4 +-
 .../pipeline_parallel/test_schedules.py       |   10 +-
 .../test_modelopt_model_builder.py            |   68 +
 .../unit_tests/resharding/test_model_swap.py  |  278 +
 tests/unit_tests/test_checkpointing.py        |    1 +
 tests/unit_tests/test_fp8_param.py            |   22 +-
 tests/unit_tests/test_optimizer.py            |    1 +
 tests/unit_tests/test_rl_utils.py             |    5 +-
 tests/unit_tests/test_training.py             |    3 +
 .../transformer/moe/test_aux_loss.py          |    9 +-
 .../transformer/moe/test_routers.py           |    8 +-
 .../transformer/moe/test_token_dispatcher.py  |   13 +-
 .../unit_tests/transformer/test_attention.py  |    7 +-
 .../transformer/test_cuda_graphs.py           |    5 +-
 .../test_multi_latent_attention.py            |   72 +-
 tools/build_sequences_per_dataset.py          |  117 +
 tools/run_dynamic_text_generation_server.py   |  109 +
 tools/run_inference_performance_test.py       |    1 +
 train_rl.py                                   |    8 +
 uv.lock                                       | 1104 ++--
 624 files changed, 121207 insertions(+), 32930 deletions(-)
 create mode 100644 .github/scripts/sync_team_usergroups.py
 create mode 100644 .github/workflows/sync-team-usergroups.yml
 create mode 100644 docs/advanced/index.md
 create mode 100644 docs/api-guide/core/datasets.md
 rename docs/api-guide/{ => core}/dist_checkpointing.md (100%)
 rename docs/api-guide/{ => core}/dist_checkpointing.strategies.md (100%)
 rename docs/api-guide/{ => core}/distributed.md (100%)
 rename docs/api-guide/{ => core}/fusions.md (100%)
 create mode 100644 docs/api-guide/core/index.md
 rename docs/api-guide/{ => core}/pipeline_parallel.md (100%)
 rename docs/api-guide/{ => core}/tensor_parallel.md (100%)
 rename docs/api-guide/{ => core}/transformer.md (100%)
 delete mode 100644 docs/api-guide/datasets.md
 delete mode 100644 docs/api-guide/datasets_readme.md
 create mode 100644 docs/api-guide/internal/index.md
 rename docs/api-guide/{ => internal}/num_microbatches_calculator.md (100%)
 rename docs/api-guide/{ => internal}/optimizer_param_scheduler.md (100%)
 create mode 100644 docs/api-guide/models/index.md
 rename docs/api-guide/{ => models}/models.bert.md (100%)
 rename docs/api-guide/{ => models}/models.gpt.md (100%)
 rename docs/api-guide/{ => models}/models.md (100%)
 rename docs/api-guide/{ => models}/models.t5.md (100%)
 delete mode 100644 docs/api-guide/moe.md
 delete mode 100644 docs/api-guide/optimizer_cpu_offload.md
 delete mode 100644 docs/api-guide/tokenizers.md
 create mode 100644 docs/developer/contribute.md
 create mode 100644 docs/developer/generate_docs.md
 create mode 100644 docs/developer/oncall.md
 create mode 100644 docs/developer/submit.md
 create mode 100644 docs/get-started/quickstart.md
 create mode 100644 docs/models/index.md
 create mode 100644 docs/models/llms.md
 create mode 100644 docs/models/multimodal.md
 create mode 100644 docs/user-guide/data-preparation.md
 rename docs/{api-guide => user-guide/features}/context_parallel.md (97%)
 rename docs/{api-guide => user-guide/features}/custom_fsdp.md (98%)
 rename docs/{api-guide => user-guide/features}/dist_optimizer.md (95%)
 create mode 100644 docs/user-guide/features/fine_grained_activation_offloading.md
 create mode 100644 docs/user-guide/features/index.md
 create mode 100644 docs/user-guide/features/megatron_energon.md
 create mode 100644 docs/user-guide/features/megatron_rl.md
 create mode 100644 docs/user-guide/features/moe.md
 rename docs/{api-guide => user-guide/features}/multi_latent_attention.md (100%)
 rename docs/{api-guide => user-guide/features}/multi_token_prediction.md (57%)
 create mode 100644 docs/user-guide/features/optimizer_cpu_offload.md
 rename docs/{api-guide => user-guide/features}/pipeline_parallel_layout.md (100%)
 create mode 100644 docs/user-guide/features/tokenizers.md
 create mode 100644 docs/user-guide/parallelism-guide.md
 create mode 100644 docs/user-guide/training-examples.md
 create mode 100644 examples/post_training/modelopt/conf/nvidia/Nemotron-H-47B-Reasoning-128K.sh
 create mode 100644 examples/post_training/modelopt/conf/nvidia/Nemotron-H-56B-Base-8K.sh
 mode change 100644 => 100755 examples/post_training/modelopt/convert.sh
 mode change 100644 => 100755 examples/post_training/modelopt/export.sh
 mode change 100644 => 100755 examples/post_training/modelopt/generate.sh
 mode change 100644 => 100755 examples/post_training/modelopt/mmlu.sh
 mode change 100644 => 100755 examples/post_training/modelopt/offline_feature_extract.sh
 mode change 100644 => 100755 examples/post_training/modelopt/quantize.sh
 mode change 100644 => 100755 examples/post_training/modelopt/validate.sh
 create mode 100644 examples/rl/environment_configs/gsm8k_nanov3.yaml
 create mode 100644 examples/rl/model_configs/nemotron6_3b_moe.sh
 create mode 100644 examples/rl/model_configs/qwen3_30b_a3b_moe.sh
 delete mode 100644 megatron/core/dist_checkpointing/strategies/tensorstore.py
 delete mode 100644 megatron/core/dist_checkpointing/strategies/two_stage.py
 delete mode 100644 megatron/core/dist_checkpointing/strategies/zarr.py
 create mode 100644 megatron/core/inference/communication/torch_symm_triton/fused_collectives.py
 create mode 100644 megatron/core/inference/contexts/attention_context/triton/tensor_ops.py
 create mode 100644 megatron/core/inference/text_generation_server/dynamic_text_gen_server/__init__.py
 create mode 100644 megatron/core/inference/text_generation_server/dynamic_text_gen_server/endpoints/__init__.py
 create mode 100644 megatron/core/inference/text_generation_server/dynamic_text_gen_server/endpoints/chat_completions.py
 create mode 100644 megatron/core/inference/text_generation_server/dynamic_text_gen_server/endpoints/common.py
 create mode 100644 megatron/core/inference/text_generation_server/dynamic_text_gen_server/endpoints/completions.py
 create mode 100644 megatron/core/inference/text_generation_server/dynamic_text_gen_server/flask_server.py
 create mode 100644 megatron/core/inference/text_generation_server/dynamic_text_gen_server/tokenization.py
 create mode 100644 megatron/core/resharding/__init__.py
 create mode 100644 megatron/core/resharding/copy_services/__init__.py
 create mode 100644 megatron/core/resharding/copy_services/base.py
 create mode 100644 megatron/core/resharding/copy_services/gloo_copy_service.py
 create mode 100644 megatron/core/resharding/copy_services/nccl_copy_service.py
 create mode 100644 megatron/core/resharding/execution.py
 create mode 100644 megatron/core/resharding/planner.py
 create mode 100644 megatron/core/resharding/refit.py
 create mode 100644 megatron/core/resharding/utils.py
 create mode 100644 megatron/core/typed_torch.py
 create mode 100644 megatron/rl/parallel_utils.py
 create mode 100644 megatron/training/common_config.py
 create mode 100644 megatron/training/resilience_config.py
 rename megatron/training/{config.py => training_config.py} (57%)
 create mode 100755 tests/functional_tests/shell_test_utils/run_batch_ci_tests.sh
 create mode 100644 tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist/golden_values_dev_dgx_h100_2nd.json
 create mode 100644 tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist_local_spec/golden_values_dev_dgx_h100_2nd.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_7b_tp1_pp4_memory_speed/golden_values_dev_dgx_gb200.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_7b_tp4_pp1_memory_speed/golden_values_dev_dgx_gb200.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_resume_check_grads/golden_values_dev_dgx_h100.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_fim_dataset/golden_values_dev_dgx_gb200.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_dev_dgx_gb200.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_gb200.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100_2nd.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_lts_dgx_a100_2nd.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_dev_dgx_gb200.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_dev_dgx_h100_2nd.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_lts_dgx_a100_2nd.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings/golden_values_dev_dgx_gb200.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings/golden_values_dev_dgx_h100_2nd.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_dev_dgx_gb200.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_dev_dgx_h100_2nd.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_lts_dgx_a100_2nd.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear/golden_values_dev_dgx_gb200.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear/golden_values_dev_dgx_h100_2nd.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_dev_dgx_gb200.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_dev_dgx_h100_2nd.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_lts_dgx_a100_2nd.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu/golden_values_dev_dgx_gb200.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu/golden_values_dev_dgx_h100_2nd.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_dev_dgx_gb200.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_dev_dgx_h100_2nd.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_lts_dgx_a100_2nd.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_dev_dgx_gb200.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_dev_dgx_gb200.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_dev_dgx_gb200.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_dev_dgx_h100_2nd.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_lts_dgx_a100_2nd.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_gb200.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_h100_2nd.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgx_a100_2nd.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_gb200.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied/golden_values_dev_dgx_gb200.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap/golden_values_dev_dgx_gb200.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_dev_dgx_gb200.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_dev_dgx_gb200.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_dev_dgx_gb200.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgx_gb200.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgx_h100_2nd.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/golden_values_lts_dgx_a100_2nd.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_dev_dgx_gb200.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_dev_dgx_h100_2nd.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_lts_dgx_a100_2nd.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2/golden_values_dev_dgx_gb200.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss/golden_values_dev_dgx_gb200.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic/golden_values_dev_dgx_gb200.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last/golden_values_dev_dgx_gb200.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last/golden_values_dev_dgx_gb200.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_dp_last/golden_values_dev_dgx_gb200.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last/golden_values_dev_dgx_gb200.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_nondeterministic/golden_values_dev_dgx_gb200.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion/golden_values_dev_dgx_gb200.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mla/golden_values_dev_dgx_gb200.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mla/golden_values_dev_dgx_h100_2nd.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_dev_dgx_gb200.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_dev_dgx_h100_2nd.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_lts_dgx_a100_2nd.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgx_gb200.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgx_h100_2nd.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/golden_values_lts_dgx_a100_2nd.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/golden_values_dev_dgx_gb200.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/golden_values_dev_dgx_h100_2nd.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/golden_values_lts_dgx_a100_2nd.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_dev_dgx_gb200.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_dev_dgx_h100_2nd.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_lts_dgx_a100_2nd.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_dev_dgx_gb200.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_dev_dgx_h100_2nd.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_lts_dgx_a100_2nd.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader/golden_values_dev_dgx_gb200.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader/golden_values_lts_dgx_a100_2nd.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/golden_values_dev_dgx_gb200.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/golden_values_dev_dgx_h100_2nd.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/golden_values_lts_dgx_a100_2nd.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor/golden_values_dev_dgx_gb200.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_gb200.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_gb200.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_h100_2nd.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgx_a100_2nd.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_gb200.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_h100_2nd.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100_2nd.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_dev_dgx_gb200.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_dev_dgx_h100_2nd.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_lts_dgx_a100_2nd.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_dev_dgx_gb200.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_dev_dgx_h100_2nd.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_lts_dgx_a100_2nd.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_gb200.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_h100_2nd.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100_2nd.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_dev_dgx_gb200.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/golden_values_dev_dgx_a100_2nd.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/golden_values_dev_dgx_gb200.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/golden_values_lts_dgx_a100_2nd.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_dev_dgx_gb200.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/golden_values_dev_dgx_a100_2nd.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/golden_values_dev_dgx_gb200.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/golden_values_lts_dgx_a100_2nd.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_dev_dgx_gb200.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_dev_dgx_h100_2nd.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_lts_dgx_a100_2nd.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_gb200.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_h100_2nd.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_lts_dgx_a100_2nd.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/golden_values_dev_dgx_gb200.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt_grpo_tp1tp2_pp1_dp8_583m_throughputtest/env_config.yaml
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt_grpo_tp1tp2_pp1_dp8_583m_throughputtest/golden_values_dev_dgx_h100.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt_grpo_tp1tp2_pp1_dp8_583m_throughputtest/model_config.yaml
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt_grpo_tp1tp2_pp1_dp8_583m_throughputtest_github/env_config.yaml
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt_grpo_tp1tp2_pp1_dp8_583m_throughputtest_github/golden_values_dev_dgx_h100.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt_grpo_tp1tp2_pp1_dp8_583m_throughputtest_github/model_config.yaml
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt_grpo_tp2tp1_pp4pp2_dp8_583m_throughputtest/env_config.yaml
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt_grpo_tp2tp1_pp4pp2_dp8_583m_throughputtest/golden_values_dev_dgx_h100.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt_grpo_tp2tp1_pp4pp2_dp8_583m_throughputtest/model_config.yaml
 create mode 100644 tests/functional_tests/test_cases/hybrid/hybrid_dynamic_inference_tp1_pp1_dp8_583m_chunked_prefill/golden_values_dev_dgx_h100.json
 create mode 100644 tests/functional_tests/test_cases/hybrid/hybrid_dynamic_inference_tp1_pp1_dp8_583m_chunked_prefill/model_config.yaml
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer/golden_values_dev_dgx_h100_2nd.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer/golden_values_dev_dgx_h100_2nd.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances/golden_values_dev_dgx_h100_2nd.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgx_h100_2nd.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_muon/golden_values_dev_dgx_h100_2nd.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100_2nd.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_muon/golden_values_dev_dgx_h100_2nd.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100_2nd.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_scoped_cudagraph/golden_values_dev_dgx_h100_2nd.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt_grpo_tp8tp4_pp1_ep8ep2_dp8_throughputtest/env_config.yaml
 create mode 100644 tests/functional_tests/test_cases/moe/gpt_grpo_tp8tp4_pp1_ep8ep2_dp8_throughputtest/golden_values_dev_dgx_h100.json
 create mode 100644 tests/functional_tests/test_cases/moe/gpt_grpo_tp8tp4_pp1_ep8ep2_dp8_throughputtest/model_config.yaml
 create mode 100644 tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_a100_2nd.json
 create mode 100644 tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_h100_2nd.json
 create mode 100644 tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_h100_2nd.json
 create mode 100644 tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_a100_2nd.json
 create mode 100644 tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_h100_2nd.json
 create mode 100644 tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_h100_2nd.json
 create mode 100644 tests/test_utils/recipes/moe-grpo.yaml
 create mode 100644 tests/unit_tests/inference/contexts/attention_metadata/test_tensor_ops.py
 create mode 100644 tests/unit_tests/inference/test_stop_words.py
 create mode 100644 tests/unit_tests/models/test_mamba_moe_model.py
 create mode 100644 tests/unit_tests/post_training/test_modelopt_model_builder.py
 create mode 100644 tests/unit_tests/resharding/test_model_swap.py
 create mode 100644 tools/build_sequences_per_dataset.py
 create mode 100644 tools/run_dynamic_text_generation_server.py

diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 7613dc59da5..5b2db410381 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -1,5 +1,7 @@
 * @NVIDIA/core-nemo @NVIDIA/core-devtech
 
+megatron/core/transformer/cuda_graphs.py @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/cuda-graphs
+
 .gitlab/ @NVIDIA/ci
 .github/ @NVIDIA/ci
 .gitlab-ci.yml @NVIDIA/ci
diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
index 10eef953d5d..9662160da10 100644
--- a/.github/ISSUE_TEMPLATE/bug_report.md
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -9,7 +9,8 @@ assignees: ''
 
 **Describe the bug**
 
-A clear and concise description of what the bug is.
+A clear and concise description of what the bug is. Tag the [@mcore-oncall](https://github.com/orgs/NVIDIA/teams/mcore-oncall) 
+to get oncall's attention to this issue.
 
 **Steps/Code to reproduce bug**
 
@@ -25,4 +26,4 @@ A clear and concise description of what you expected to happen.
 
 **Additional context**
 
-Add any other context about the problem here.
+Add any other context about the problem here. 
diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md
index 7334f687d1b..b0da6789a8e 100644
--- a/.github/ISSUE_TEMPLATE/feature_request.md
+++ b/.github/ISSUE_TEMPLATE/feature_request.md
@@ -10,6 +10,9 @@ assignees: ''
 **Is your feature request related to a problem? Please describe.**
 A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
 
+Tag the [@mcore-oncall](https://github.com/orgs/NVIDIA/teams/mcore-oncall) 
+to get oncall's attention to this issue.
+
 **Describe the solution you'd like**
 A clear and concise description of what you want to happen.
 
diff --git a/.github/ISSUE_TEMPLATE/question.md b/.github/ISSUE_TEMPLATE/question.md
index b3d89a0ac1a..899ff44d6a6 100644
--- a/.github/ISSUE_TEMPLATE/question.md
+++ b/.github/ISSUE_TEMPLATE/question.md
@@ -9,4 +9,5 @@ assignees: ''
 ---
 
 **Your question**
-Ask a clear and concise question about Megatron-LM.
+Ask a clear and concise question about Megatron-LM. Tag the [@mcore-oncall](https://github.com/orgs/NVIDIA/teams/mcore-oncall) 
+to get oncall's attention to this issue.
\ No newline at end of file
diff --git a/.github/ISSUE_TEMPLATE/regression.md b/.github/ISSUE_TEMPLATE/regression.md
index 10078d23a6e..180db633cb8 100644
--- a/.github/ISSUE_TEMPLATE/regression.md
+++ b/.github/ISSUE_TEMPLATE/regression.md
@@ -8,7 +8,8 @@ assignees: ''
 ---
 
 **Describe the regression**
-A clear and concise description of what the regression is.
+A clear and concise description of what the regression is. Tag the [@mcore-oncall](https://github.com/orgs/NVIDIA/teams/mcore-oncall) 
+to get oncall's attention to this issue.
 
 **To Reproduce**
 Steps to reproduce the behavior. The easier it is to reproduce the faster it will get maintainer attention.
diff --git a/.github/actions/action.yml b/.github/actions/action.yml
index 5fba1ca1241..dfc6d79688e 100644
--- a/.github/actions/action.yml
+++ b/.github/actions/action.yml
@@ -11,28 +11,28 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-name: 'Test Template'
-description: 'Template for running NeMo tests in a containerized environment'
+name: "Test Template"
+description: "Template for running NeMo tests in a containerized environment"
 
 inputs:
   container-image:
-    description: 'Container image to use for test'
+    description: "Container image to use for test"
     required: true
   timeout:
-    description: 'Max runtime of test in minutes'
+    description: "Max runtime of test in minutes"
     required: false
-    default: '30'
+    default: "30"
   script:
-    description: 'Test script to execute'
+    description: "Test script to execute"
     required: true
   is-optional:
-    description: 'Pass this job on failure.'
+    description: "Pass this job on failure."
     required: false
-    default: 'false'
+    default: "false"
   is_unit_test:
-    description: 'Upload coverage as unit test'
+    description: "Upload coverage as unit test"
     required: false
-    default: 'false'
+    default: "false"
   tag:
     description: Latest or legacy test suite
     required: true
@@ -43,11 +43,14 @@ inputs:
     description: Model to launch
     required: false
   PAT:
-    description: 'GitHub Personal Access Token'
+    description: "GitHub Personal Access Token"
+    required: true
+  is_ci_workload:
+    description: "Is CI workload"
     required: true
 
 runs:
-  using: 'composite'
+  using: "composite"
   steps:
     - name: Print node name
       shell: bash -x -e -u -o pipefail {0}
@@ -124,9 +127,11 @@ runs:
       id: has-run-functional-tests-label
       env:
         GH_TOKEN: ${{ github.token }}
+        IS_CI_WORKLOAD: ${{ inputs.is_ci_workload }}
       run: |
         PR_NUMBER=${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number }}
-        HAS_RUN_FUNCTIONAL_TESTS_LABEL=$(gh pr view $PR_NUMBER --json labels | jq '[.labels[].name] | any(. == "Run functional tests")') || echo "false"
+        HAS_RUN_FUNCTIONAL_TESTS_LABEL=$(gh pr view $PR_NUMBER --json labels | jq '[.labels[].name] | any(. == "Run functional tests")') || echo "$IS_CI_WORKLOAD"
+        HAS_RUN_FUNCTIONAL_TESTS_LABEL=${HAS_RUN_FUNCTIONAL_TESTS_LABEL:-$IS_CI_WORKLOAD}
         echo "main=$HAS_RUN_FUNCTIONAL_TESTS_LABEL" | tee -a $GITHUB_OUTPUT
 
     - name: Create run-script (e2e test)
diff --git a/.github/copy-pr-bot.yaml b/.github/copy-pr-bot.yaml
index 8e703301ca7..8e92aabe027 100644
--- a/.github/copy-pr-bot.yaml
+++ b/.github/copy-pr-bot.yaml
@@ -1,4 +1,4 @@
 enabled: true
 auto_sync_draft: false
 auto_sync_ready: true
-trustees_override: ["AAnoosheh", "ArEsKay3", "Autumn1998", "BestJuly", "BoxiangW", "ChenhanYu", "FDecaYed", "HaochenYuan", "ISEEKYAN", "JRD971000", "QiZhangNV", "ShriyaRishab", "Victarry", "Wohox", "ZhiyuLi-Nvidia", "aklife97", "ananthsub", "asolergi-nv", "buptzyb", "chtruong814", "cspades", "cuichenx", "deepakn94", "dimapihtar", "duncanriach", "erhoo82", "ericharper", "fanshiqing", "gautham-kollu", "guyueh1", "hxbai", "jaredcasper", "jiemingz", "jkamalu", "jon-barker", "kanz-nv", "kevalmorabia97", "ko3n1g", "kunlunl", "kvareddy", "layalir", "lhb8125", "lmcafee-nvidia", "maanug-nv", "mathemakitten", "matthieule", "mehraakash", "mkhona-nvidia", "pablo-garay", "parthmannan", "pthombre", "rogerwaleffe", "sanandaraj5597", "santhnm2", "sbak5", "shanmugamr1992", "shifangx", "shjwudp", "sidsingh-nvidia", "skyw", "tdene", "theothermike", "thomasdhc", "trintamaki", "tylerpoon", "wdykas", "xiaoyao0115", "xuwchen", "yanring", "yaox12", "yaoyu-33", "yashaswikarnati", "yeyu-nvidia", "yobibyte", "youngeunkwon0405", "yuzhongw-nvidia", "zhongbozhu"]
+trustees_override: ["AAnoosheh", "ArEsKay3", "Autumn1998", "BestJuly", "BoxiangW", "ChenhanYu", "FDecaYed", "HaochenYuan", "ISEEKYAN", "JRD971000", "Phlip79", "QiZhangNV", "ShriyaRishab", "Victarry", "Wohox", "ZhiyuLi-Nvidia", "ahmadki", "aklife97", "ananthsub", "asolergi-nv", "buptzyb", "chtruong814", "cspades", "cuichenx", "deepakn94", "dimapihtar", "duncanriach", "erhoo82", "ericharper", "fanshiqing", "frsun-nvda", "gautham-kollu", "gdengk", "guyueh1", "hxbai", "jalbericiola", "jaredcasper", "jenchen13", "jiemingz", "jingqiny-99", "jkamalu", "jon-barker", "jstjohn", "kanz-nv", "kevalmorabia97", "ko3n1g", "kunlunl", "kvareddy", "layalir", "lhb8125", "lmcafee-nvidia", "maanug-nv", "mathemakitten", "matthieule", "mehraakash", "mkhona-nvidia", "pablo-garay", "parthmannan", "pthombre", "rogerwaleffe", "sanandaraj5597", "santhnm2", "sbak5", "shanmugamr1992", "shifangx", "shjwudp", "sidsingh-nvidia", "skyw", "sudhakarsingh27", "tdene", "theothermike", "thomasdhc", "trintamaki", "tylerpoon", "wdykas", "xiaoyao0115", "xuwchen", "yanring", "yaox12", "yaoyu-33", "yashaswikarnati", "yeyu-nvidia", "yobibyte", "youngeunkwon0405", "yuzhongw-nvidia", "zhongbozhu"]
diff --git a/.github/oncall_schedule.json b/.github/oncall_schedule.json
index 7dd43875219..5a9f35f5b5a 100644
--- a/.github/oncall_schedule.json
+++ b/.github/oncall_schedule.json
@@ -1,2 +1,50 @@
-[]
-
+[
+    {
+        "user": "maanug-nv",
+        "date": "2026-01-21"
+    },
+    {
+        "user": "dimapihtar",
+        "date": "2026-01-28"
+    },
+    {
+        "user": "gautham-kollu",
+        "date": "2026-02-04"
+    },
+    {
+        "user": "janEbert",
+        "date": "2026-02-11"
+    },
+    {
+        "user": "Phlip79",
+        "date": "2026-02-18"
+    },
+    {
+        "user": "asolergi-nv",
+        "date": "2026-02-25"
+    },
+    {
+        "user": "BoxiangW",
+        "date": "2026-03-04"
+    },
+    {
+        "user": "maanug-nv",
+        "date": "2026-03-11"
+    },
+    {
+        "user": "dimapihtar",
+        "date": "2026-03-18"
+    },
+    {
+        "user": "gautham-kollu",
+        "date": "2026-03-25"
+    },
+    {
+        "user": "janEbert",
+        "date": "2026-04-01"
+    },
+    {
+        "user": "maanug-nv",
+        "date": "2026-04-08"
+    }
+]
diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
index 7f7dedd27ad..5cd5138eb69 100644
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -1,7 +1,7 @@
 # What does this PR do ?
 <!-- Add a one line overview of what this PR aims to accomplish. -->
 
-:warning: For major changes (either in lines of code or in its impact), please make sure to first share discuss a design-doc with the team.  
+:warning: For major changes (either in lines of code or in its impact), please make sure to first share a design doc with the team. If you're unsure what's the best way to do so, contact the @mcore-oncall.
 
 ## Contribution process
 
@@ -31,6 +31,8 @@ The following process is enforced via the CODEOWNERS file for changes into `mega
 <details>
 <summary>For MRs into `main` branch</summary>
 
+Feel free to message or comment the @mcore-oncall to help accelerate your merge into main. The less complex your PR is, the faster it will be approved and merged!
+
 #### (Step 1): Add PR label `Expert Review`
 
 #### (Step 2): Collect the expert reviewers reviews
diff --git a/.github/scripts/oncall_manager.py b/.github/scripts/oncall_manager.py
index 4bb415eb7d4..332fcb1c8cc 100644
--- a/.github/scripts/oncall_manager.py
+++ b/.github/scripts/oncall_manager.py
@@ -19,13 +19,21 @@
 import argparse
 from datetime import datetime, timedelta, timezone
 
+from slack_sdk import WebClient
+from slack_sdk.errors import SlackApiError
+
 # Constants
 GITHUB_API_URL = "https://api.github.com"
 SCHEDULE_FILE = ".github/oncall_schedule.json"
 ROTATION_TEAM_SLUG = "mcore-oncall-rotation"
-ACTIVE_ONCALL_TEAM_SLUG = "megatron-oncall"
+ACTIVE_ONCALL_TEAM_SLUG = "mcore-oncall"
+SLACK_USERGROUP_HANDLE = "mcore-oncall"
 TARGET_WEEKS = 12
 
+# Caches for email and Slack lookups
+_email_cache = {}
+_slack_id_cache = {}
+
 def get_headers():
     token = os.environ.get("GH_TOKEN")
     if not token:
@@ -55,7 +63,7 @@ def get_team_members(org, team_slug):
     url = f"{GITHUB_API_URL}/orgs/{org}/teams/{team_slug}/members"
     headers = get_headers()
     
-    members = []
+    members = set()
     page = 1
     while True:
         resp = requests.get(f"{url}?per_page=100&page={page}", headers=headers)
@@ -67,13 +75,166 @@ def get_team_members(org, team_slug):
         if not data:
             break
             
-        members.extend([m['login'] for m in data])
+        members.update([m['login'] for m in data])
         if len(data) < 100:
             break
         page += 1
         
     return members
 
+def get_user_email(username):
+    """Get user's email from GitHub, prioritizing @nvidia.com emails.
+    
+    Checks in order:
+    1. Public profile email
+    2. Recent commits in the repository
+    """
+    if username in _email_cache:
+        return _email_cache[username]
+    
+    headers = get_headers()
+    public_email = None
+    
+    try:
+        # 1. Try to get user's public profile email first
+        resp = requests.get(f"{GITHUB_API_URL}/users/{username}", headers=headers)
+        if resp.status_code == 200:
+            user_data = resp.json()
+            email = user_data.get('email')
+            if email and not email.endswith("@users.noreply.github.com"):
+                if email.endswith("@nvidia.com"):
+                    _email_cache[username] = email
+                    return email
+                # Store non-nvidia email as fallback
+                public_email = email
+        
+        # 2. Check recent commits in the repository for @nvidia.com email
+        repo_env = os.environ.get("GITHUB_REPOSITORY", "NVIDIA/Megatron-LM")
+        commits_url = f"{GITHUB_API_URL}/repos/{repo_env}/commits?author={username}&per_page=10"
+        resp = requests.get(commits_url, headers=headers)
+        
+        if resp.status_code == 200:
+            commits = resp.json()
+            for commit in commits:
+                # Get email from commit author
+                commit_data = commit.get('commit', {})
+                author_data = commit_data.get('author', {})
+                email = author_data.get('email')
+                
+                if email and not email.endswith("@users.noreply.github.com"):
+                    if email.endswith("@nvidia.com"):
+                        _email_cache[username] = email
+                        print(f"Found @nvidia.com email for {username} from commits: {email}")
+                        return email
+                    elif public_email is None:
+                        public_email = email
+        
+        # 3. Use public email if found, otherwise fallback
+        if public_email:
+            _email_cache[username] = public_email
+            print(f"Using public email for {username}: {public_email}")
+            return public_email
+        
+        # Fallback to noreply email
+        fallback = f"{username}@users.noreply.github.com"
+        _email_cache[username] = fallback
+        print(f"Warning: No email found for {username}, using fallback: {fallback}")
+        return fallback
+        
+    except Exception as e:
+        print(f"Warning: Could not get email for {username}: {e}")
+        fallback = f"{username}@users.noreply.github.com"
+        _email_cache[username] = fallback
+        return fallback
+
+def get_slack_client():
+    """Get Slack WebClient if token is available."""
+    slack_token = os.environ.get("SLACK_TOKEN")
+    if not slack_token:
+        return None
+    
+    return WebClient(token=slack_token)
+
+def get_slack_user_id(slack_client, email):
+    """Get Slack user ID from email."""
+    if not slack_client:
+        return None
+    
+    if email in _slack_id_cache:
+        return _slack_id_cache[email]
+    
+    try:
+        response = slack_client.users_lookupByEmail(email=email)
+        user_id = response["user"]["id"]
+        _slack_id_cache[email] = user_id
+        return user_id
+    except SlackApiError as e:
+        print(f"Warning: Could not find Slack user for {email}: {e.response['error']}")
+        _slack_id_cache[email] = None
+        return None
+
+def get_slack_usergroup_id(slack_client, handle):
+    """Get Slack usergroup ID from handle."""
+    if not slack_client:
+        return None
+    
+    try:
+        response = slack_client.usergroups_list(include_users=True)
+        for usergroup in response.get("usergroups", []):
+            if usergroup.get("handle") == handle:
+                return usergroup.get("id"), usergroup.get("users", [])
+        print(f"Warning: Slack usergroup '{handle}' not found")
+        return None, []
+    except SlackApiError as e:
+        print(f"Warning: Could not list Slack usergroups: {e.response['error']}")
+        return None, []
+
+def update_slack_usergroup(new_oncall_username, old_members_usernames):
+    """
+    Updates the Slack usergroup to contain only the new oncall user.
+    Adds new oncall first, then removes old members (usergroups need at least one member).
+    """
+    slack_client = get_slack_client()
+    if not slack_client:
+        print("Slack token not configured, skipping Slack usergroup update")
+        return
+    
+    # Get the new oncall's email and Slack user ID
+    new_email = get_user_email(new_oncall_username)
+    new_slack_id = get_slack_user_id(slack_client, new_email)
+    
+    if not new_slack_id:
+        print(f"Could not find Slack user ID for {new_oncall_username} ({new_email}), skipping Slack update")
+        return
+    
+    # Get the usergroup ID and current members
+    usergroup_id, current_slack_members = get_slack_usergroup_id(slack_client, SLACK_USERGROUP_HANDLE)
+    
+    if not usergroup_id:
+        print(f"Could not find Slack usergroup '{SLACK_USERGROUP_HANDLE}', skipping Slack update")
+        return
+    
+    try:
+        # Step 1: Add new oncall first (include current members to avoid removing anyone yet)
+        # This ensures usergroup always has at least one member
+        if new_slack_id not in current_slack_members:
+            updated_members = list(set(current_slack_members + [new_slack_id]))
+            slack_client.usergroups_users_update(
+                usergroup=usergroup_id,
+                users=updated_members
+            )
+            print(f"Added {new_oncall_username} to Slack usergroup '{SLACK_USERGROUP_HANDLE}'")
+        
+        # Step 2: Now set the usergroup to contain only the new oncall
+        slack_client.usergroups_users_update(
+            usergroup=usergroup_id,
+            users=[new_slack_id]
+        )
+        print(f"Updated Slack usergroup '{SLACK_USERGROUP_HANDLE}' to contain only {new_oncall_username}")
+        
+    except SlackApiError as e:
+        print(f"Failed to update Slack usergroup: {e.response['error']}")
+
 def load_schedule():
     if not os.path.exists(SCHEDULE_FILE):
         return []
@@ -111,14 +272,19 @@ def update_active_oncall_team(org, new_oncall):
             print(f"Failed to add {new_oncall} to {ACTIVE_ONCALL_TEAM_SLUG}: {resp.status_code} {resp.text}")
 
     # 3. Remove everyone else
+    old_members = []
     for member in current_members:
-        if member != new_oncall:
+        if member not in [new_oncall, 'svcnvidia-nemo-ci']:
+            old_members.append(member)
             url = f"{GITHUB_API_URL}/orgs/{org}/teams/{ACTIVE_ONCALL_TEAM_SLUG}/memberships/{member}"
             resp = requests.delete(url, headers=get_headers())
             if resp.status_code == 204:
                 print(f"Removed {member} from {ACTIVE_ONCALL_TEAM_SLUG}")
             else:
                 print(f"Failed to remove {member} from {ACTIVE_ONCALL_TEAM_SLUG}: {resp.status_code} {resp.text}")
+    
+    # 4. Update Slack usergroup (add new oncall first, then remove old members)
+    update_slack_usergroup(new_oncall, old_members)
 
 def rotate_schedule(repo_owner, dry_run=False):
     schedule = load_schedule()
@@ -182,6 +348,9 @@ def ensure_schedule_filled(schedule, repo_owner):
     if not members:
         print(f"Warning: No team members found in {ROTATION_TEAM_SLUG}.")
         return
+    if 'svcnvidia-nemo-ci' in members:
+        members.remove('svcnvidia-nemo-ci')
+    members = list(members)
 
     members.sort() # Deterministic order
     
@@ -222,25 +391,16 @@ def ensure_schedule_filled(schedule, repo_owner):
         print(f"Appended: {new_entry}")
 
 def assign_reviewer(pr_number):
-    """Assigns the current oncall as the reviewer for the PR."""
-    schedule = load_schedule()
-    if not schedule:
-        print("Error: Schedule is empty. Cannot assign reviewer.")
-        sys.exit(1)
-        
-    current_entry = schedule[0]
-    current_oncall = current_entry['user']
-    print(f"Current oncall: {current_oncall} (Since {current_entry['date']})")
-    
+    """Assigns the mcore-oncall team as the reviewer for the PR."""
     owner, repo = get_repo_info()
     url = f"{GITHUB_API_URL}/repos/{owner}/{repo}/pulls/{pr_number}/requested_reviewers"
     
-    # We can assign the user directly
-    data = {"reviewers": [current_oncall]}
+    # Assign the oncall team as reviewer
+    data = {"team_reviewers": [ACTIVE_ONCALL_TEAM_SLUG]}
     resp = requests.post(url, headers=get_headers(), json=data)
     
     if resp.status_code in [201, 200]:
-        print(f"Successfully requested review from {current_oncall}")
+        print(f"Successfully requested review from team NVIDIA/{ACTIVE_ONCALL_TEAM_SLUG}")
     else:
         print(f"Failed to request review: {resp.status_code} {resp.text}")
         sys.exit(1)
diff --git a/.github/scripts/sync_team_usergroups.py b/.github/scripts/sync_team_usergroups.py
new file mode 100644
index 00000000000..429387fc6de
--- /dev/null
+++ b/.github/scripts/sync_team_usergroups.py
@@ -0,0 +1,527 @@
+# Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Syncs GitHub team membership to Slack user groups.
+
+This script reads members from GitHub teams and updates the corresponding
+Slack user groups to match.
+"""
+
+import os
+import sys
+import argparse
+import requests
+
+from slack_sdk import WebClient
+from slack_sdk.errors import SlackApiError
+
+# Constants
+GITHUB_API_URL = "https://api.github.com"
+PARENT_TEAM_SLUG = "mcore-reviewers"
+
+# Caches for email and Slack lookups
+_email_cache = {}
+_slack_id_cache = {}
+_usergroups_cache = None
+
+
+def get_headers():
+    """Get GitHub API headers with authentication."""
+    token = os.environ.get("GH_TOKEN")
+    if not token:
+        token = os.environ.get("GITHUB_TOKEN")
+
+    if not token:
+        print("Error: GH_TOKEN or GITHUB_TOKEN not set")
+        sys.exit(1)
+
+    return {
+        "Authorization": f"token {token}",
+        "Accept": "application/vnd.github.v3+json",
+    }
+
+
+def get_org():
+    """Returns the organization from GITHUB_REPOSITORY env var or default."""
+    repo_env = os.environ.get("GITHUB_REPOSITORY", "NVIDIA/Megatron-LM")
+    return repo_env.split("/")[0]
+
+
+def github_team_to_slack_usergroup(team_slug):
+    """Convert a GitHub team slug to a Slack usergroup handle.
+
+    Rules:
+    - Base pattern: "test" -> "mcore-test"
+    - Remove "core-" prefix: "core-test" -> "mcore-test"
+    - Remove "megatron-" prefix: "megatron-test" -> "mcore-test"
+    - Remove "-and-": "test1-and-test2" -> "mcore-test1-test2"
+    - Shorten "mixture-of-experts" to "moe"
+    - Shorten "pipeline-parallelism" to "pp"
+    - Shorten "reinforcement-learning" to "rl"
+    """
+    name = team_slug
+
+    # Apply shortenings first (before removing prefixes)
+    name = name.replace("mixture-of-experts", "moe")
+    name = name.replace("pipeline-parallelism", "pp")
+    name = name.replace("reinforcement-learning", "rl")
+
+    # Remove prefixes
+    if name.startswith("core-"):
+        name = name[5:]  # Remove "core-"
+    elif name.startswith("megatron-"):
+        name = name[9:]  # Remove "megatron-"
+
+    # Remove "-and-"
+    name = name.replace("-and-", "-")
+
+    return f"mcore-{name}"
+
+
+def get_child_teams(org, parent_team_slug):
+    """Fetches child teams of a parent GitHub team."""
+    # First get the team ID
+    url = f"{GITHUB_API_URL}/orgs/{org}/teams/{parent_team_slug}"
+    headers = get_headers()
+
+    resp = requests.get(url, headers=headers)
+    if resp.status_code != 200:
+        print(f"Error fetching parent team '{parent_team_slug}': {resp.status_code} {resp.text}")
+        return []
+
+    parent_team_id = resp.json().get("id")
+    if not parent_team_id:
+        print(f"Error: Could not get ID for team '{parent_team_slug}'")
+        return []
+
+    # Now fetch child teams
+    url = f"{GITHUB_API_URL}/orgs/{org}/teams/{parent_team_slug}/teams"
+    child_teams = []
+    page = 1
+
+    while True:
+        resp = requests.get(f"{url}?per_page=100&page={page}", headers=headers)
+        if resp.status_code != 200:
+            print(f"Error fetching child teams: {resp.status_code} {resp.text}")
+            return child_teams
+
+        data = resp.json()
+        if not data:
+            break
+
+        child_teams.extend([team["slug"] for team in data])
+        if len(data) < 100:
+            break
+        page += 1
+
+    return child_teams
+
+
+def get_team_members(org, team_slug):
+    """Fetches members of the GitHub team."""
+    url = f"{GITHUB_API_URL}/orgs/{org}/teams/{team_slug}/members"
+    headers = get_headers()
+
+    members = set()
+    page = 1
+    while True:
+        resp = requests.get(f"{url}?per_page=100&page={page}", headers=headers)
+        if resp.status_code == 404:
+            print(f"Warning: Team '{team_slug}' not found in org '{org}'")
+            return set()
+        if resp.status_code != 200:
+            print(f"Error fetching team members: {resp.status_code} {resp.text}")
+            return set()
+
+        data = resp.json()
+        if not data:
+            break
+
+        members.update([m["login"] for m in data])
+        if len(data) < 100:
+            break
+        page += 1
+
+    return members
+
+
+def get_user_email(username):
+    """Get user's email from GitHub, prioritizing @nvidia.com emails.
+
+    Checks in order:
+    1. Public profile email
+    2. Recent commits in the repository
+    """
+    if username in _email_cache:
+        return _email_cache[username]
+
+    headers = get_headers()
+    public_email = None
+
+    try:
+        # 1. Try to get user's public profile email first
+        resp = requests.get(f"{GITHUB_API_URL}/users/{username}", headers=headers)
+        if resp.status_code == 200:
+            user_data = resp.json()
+            email = user_data.get('email')
+            if email and not email.endswith("@users.noreply.github.com"):
+                if email.endswith("@nvidia.com"):
+                    _email_cache[username] = email
+                    return email
+                # Store non-nvidia email as fallback
+                public_email = email
+
+        # 2. Check recent commits in the repository for @nvidia.com email
+        repo_env = os.environ.get("GITHUB_REPOSITORY", "NVIDIA/Megatron-LM")
+        commits_url = f"{GITHUB_API_URL}/repos/{repo_env}/commits?author={username}&per_page=10"
+        resp = requests.get(commits_url, headers=headers)
+
+        if resp.status_code == 200:
+            commits = resp.json()
+            for commit in commits:
+                # Get email from commit author
+                commit_data = commit.get('commit', {})
+                author_data = commit_data.get('author', {})
+                email = author_data.get('email')
+
+                if email and not email.endswith("@users.noreply.github.com"):
+                    if email.endswith("@nvidia.com"):
+                        _email_cache[username] = email
+                        print(f"Found @nvidia.com email for {username} from commits")
+                        return email
+                    elif public_email is None:
+                        public_email = email
+
+        # 3. Use public email if found, otherwise fallback
+        if public_email:
+            _email_cache[username] = public_email
+            print(f"Using public email for {username}: {public_email}")
+            return public_email
+
+        # Fallback to noreply email
+        fallback = f"{username}@users.noreply.github.com"
+        _email_cache[username] = fallback
+        print(f"Warning: No email found for {username}, using fallback: {fallback}")
+        return fallback
+
+    except Exception as e:
+        print(f"Warning: Could not get email for {username}: {e}")
+        fallback = f"{username}@users.noreply.github.com"
+        _email_cache[username] = fallback
+        return fallback
+
+
+def get_slack_client():
+    """Get Slack WebClient if token is available."""
+    slack_token = os.environ.get("SLACK_TOKEN")
+    if not slack_token:
+        return None
+
+    return WebClient(token=slack_token)
+
+
+def get_slack_user_id(slack_client, email):
+    """Get Slack user ID from email."""
+    if not slack_client:
+        return None
+
+    if email in _slack_id_cache:
+        return _slack_id_cache[email]
+
+    try:
+        response = slack_client.users_lookupByEmail(email=email)
+        user_id = response["user"]["id"]
+        _slack_id_cache[email] = user_id
+        return user_id
+    except SlackApiError as e:
+        print(f"Warning: Could not find Slack user for {email}: {e.response['error']}")
+        _slack_id_cache[email] = None
+        return None
+
+
+def fetch_all_usergroups(slack_client):
+    """Fetch all Slack usergroups once and cache them."""
+    global _usergroups_cache
+
+    if _usergroups_cache is not None:
+        return _usergroups_cache
+
+    if not slack_client:
+        _usergroups_cache = {}
+        return _usergroups_cache
+
+    try:
+        print("Fetching Slack usergroups...")
+        response = slack_client.usergroups_list(include_users=True)
+        _usergroups_cache = {}
+        for usergroup in response.get("usergroups", []):
+            handle = usergroup.get("handle")
+            if handle:
+                _usergroups_cache[handle] = {
+                    "id": usergroup.get("id"),
+                    "users": usergroup.get("users", []),
+                }
+        print(f"Fetched {len(_usergroups_cache)} usergroups")
+        return _usergroups_cache
+    except SlackApiError as e:
+        print(f"Warning: Could not list Slack usergroups: {e.response['error']}")
+        _usergroups_cache = {}
+        return _usergroups_cache
+
+
+def get_slack_usergroup_id(slack_client, handle):
+    """Get Slack usergroup ID from handle."""
+    usergroups = fetch_all_usergroups(slack_client)
+
+    if handle in usergroups:
+        return usergroups[handle]["id"], usergroups[handle]["users"]
+
+    return None, []
+
+
+def github_team_to_usergroup_name(team_slug):
+    """Convert a GitHub team slug to a Slack usergroup display name.
+
+    Example: "test3" -> "Megatron Core Experts: Test3"
+    """
+    # Title case each word separated by hyphens, then join with spaces
+    words = team_slug.split("-")
+    title_cased = " ".join(word.capitalize() for word in words)
+    return f"Megatron Core Experts: {title_cased}"
+
+
+def create_slack_usergroup(slack_client, handle, team_slug):
+    """Create a new Slack usergroup.
+
+    Args:
+        slack_client: Slack WebClient instance
+        handle: The usergroup handle (e.g., "mcore-test")
+        team_slug: The GitHub team slug (used for name and description)
+
+    Returns:
+        The usergroup ID if created successfully, None otherwise
+    """
+    global _usergroups_cache
+
+    name = github_team_to_usergroup_name(team_slug)
+    description = f'Expert review group "{team_slug}"'
+
+    try:
+        print(f"Creating Slack usergroup '@{handle}' with name '{name}'...")
+        response = slack_client.usergroups_create(
+            name=name,
+            handle=handle,
+            description=description,
+        )
+        usergroup = response.get("usergroup", {})
+        usergroup_id = usergroup.get("id")
+
+        if usergroup_id:
+            # Update cache with new usergroup
+            if _usergroups_cache is not None:
+                _usergroups_cache[handle] = {
+                    "id": usergroup_id,
+                    "users": [],
+                }
+            print(f"Successfully created Slack usergroup '@{handle}'")
+            return usergroup_id
+        else:
+            print(f"Error: Usergroup created but no ID returned")
+            return None
+
+    except SlackApiError as e:
+        print(f"Error creating Slack usergroup '@{handle}': {e.response['error']}")
+        return None
+
+
+def sync_team_to_usergroup(team_slug, usergroup_handle, dry_run=False):
+    """Sync a GitHub team to a Slack usergroup."""
+    print(f"\n{'='*60}")
+    print(f"Syncing GitHub team '{team_slug}' -> Slack usergroup '@{usergroup_handle}'")
+    print(f"{'='*60}")
+
+    org = get_org()
+    slack_client = get_slack_client()
+
+    if not slack_client:
+        print("Error: Slack token not configured")
+        return False
+
+    # 1. Get GitHub team members
+    members = get_team_members(org, team_slug)
+    if not members:
+        print(f"No members found in GitHub team '{team_slug}'")
+        return False
+
+    # Filter out service accounts
+    members = {m for m in members if not m.startswith("svc")}
+    print(f"GitHub team members ({len(members)}): {sorted(members)}")
+
+    # 2. Get Slack user IDs for each member
+    slack_user_ids = []
+    missing_users = []
+
+    for username in sorted(members):
+        email = get_user_email(username)
+        slack_id = get_slack_user_id(slack_client, email)
+        if slack_id:
+            slack_user_ids.append(slack_id)
+        else:
+            missing_users.append((username, email, "not found in Slack"))
+
+    if missing_users:
+        print(f"\nWarning: Could not resolve {len(missing_users)} users:")
+        for username, email, reason in missing_users:
+            print(f"  - {username}: {reason}" + (f" (tried {email})" if email else ""))
+
+    if not slack_user_ids:
+        print(f"Error: No Slack users found for team '{team_slug}'")
+        return False
+
+    # 3. Get current Slack usergroup membership (or create if it doesn't exist)
+    usergroup_id, current_members = get_slack_usergroup_id(slack_client, usergroup_handle)
+
+    if not usergroup_id:
+        print(f"Slack usergroup '@{usergroup_handle}' not found, creating it...")
+        if dry_run:
+            print(f"Dry run: Would create usergroup '@{usergroup_handle}'")
+            current_members = []
+        else:
+            usergroup_id = create_slack_usergroup(slack_client, usergroup_handle, team_slug)
+            if not usergroup_id:
+                print(f"Error: Failed to create Slack usergroup '@{usergroup_handle}'")
+                return False
+            current_members = []
+
+    # 4. Compare and update
+    current_set = set(current_members)
+    new_set = set(slack_user_ids)
+
+    to_add = new_set - current_set
+    to_remove = current_set - new_set
+
+    print(f"\nCurrent usergroup members: {len(current_members)}")
+    print(f"New members to set: {len(slack_user_ids)}")
+    print(f"  Adding: {len(to_add)} users")
+    print(f"  Removing: {len(to_remove)} users")
+
+    if current_set == new_set:
+        print("No changes needed - usergroup is already in sync")
+        return True
+
+    if dry_run:
+        print(f"\nDry run: Would update '@{usergroup_handle}' with {len(slack_user_ids)} members")
+        return True
+
+    # 5. Update the usergroup
+    try:
+        slack_client.usergroups_users_update(
+            usergroup=usergroup_id, users=slack_user_ids
+        )
+        print(f"\nSuccessfully updated '@{usergroup_handle}' with {len(slack_user_ids)} members")
+        return True
+    except SlackApiError as e:
+        print(f"Error updating usergroup: {e.response['error']}")
+        return False
+
+
+def get_team_to_usergroup_mapping():
+    """Fetch child teams of mcore-reviewers and generate the mapping."""
+    org = get_org()
+    child_teams = get_child_teams(org, PARENT_TEAM_SLUG)
+
+    if not child_teams:
+        print(f"Error: No child teams found under '{PARENT_TEAM_SLUG}'")
+        return {}
+
+    mapping = {}
+    for team_slug in child_teams:
+        usergroup_handle = github_team_to_slack_usergroup(team_slug)
+        mapping[team_slug] = usergroup_handle
+
+    return mapping
+
+
+def sync_all_teams(dry_run=False):
+    """Sync all GitHub teams under mcore-reviewers to their Slack usergroups."""
+    print(f"Fetching child teams of '{PARENT_TEAM_SLUG}'...")
+    team_to_usergroup = get_team_to_usergroup_mapping()
+
+    if not team_to_usergroup:
+        return False
+
+    print(f"Found {len(team_to_usergroup)} teams to sync")
+    print("\nTeam to usergroup mapping:")
+    for team, usergroup in sorted(team_to_usergroup.items()):
+        print(f"  {team} -> @{usergroup}")
+
+    results = {"success": [], "failed": []}
+
+    for team_slug, usergroup_handle in team_to_usergroup.items():
+        success = sync_team_to_usergroup(team_slug, usergroup_handle, dry_run=dry_run)
+        if success:
+            results["success"].append(team_slug)
+        else:
+            results["failed"].append(team_slug)
+
+    # Summary
+    print(f"\n{'='*60}")
+    print("SYNC SUMMARY")
+    print(f"{'='*60}")
+    print(f"Successful: {len(results['success'])}")
+    print(f"Failed: {len(results['failed'])}")
+
+    if results["failed"]:
+        print(f"\nFailed teams: {', '.join(results['failed'])}")
+        return False
+
+    return True
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Sync GitHub team membership to Slack user groups"
+    )
+    parser.add_argument(
+        "--dry-run",
+        action="store_true",
+        help="Show what would be done without making changes",
+    )
+    parser.add_argument(
+        "--list",
+        action="store_true",
+        help="List all configured team-to-usergroup mappings",
+    )
+
+    args = parser.parse_args()
+
+    if args.list:
+        print(f"Fetching child teams of '{PARENT_TEAM_SLUG}'...")
+        team_to_usergroup = get_team_to_usergroup_mapping()
+        if not team_to_usergroup:
+            sys.exit(1)
+        print("\nTeam-to-usergroup mappings:")
+        print(f"{'GitHub Team':<35} {'Slack Usergroup':<30}")
+        print("-" * 65)
+        for team, usergroup in sorted(team_to_usergroup.items()):
+            print(f"{team:<35} @{usergroup:<29}")
+        return
+
+    success = sync_all_teams(dry_run=args.dry_run)
+    sys.exit(0 if success else 1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/.github/workflows/_build_test_publish_wheel.yml b/.github/workflows/_build_test_publish_wheel.yml
index 1367dbdeb72..9e9062827de 100644
--- a/.github/workflows/_build_test_publish_wheel.yml
+++ b/.github/workflows/_build_test_publish_wheel.yml
@@ -74,7 +74,7 @@ jobs:
             rm LICENSE || true
             docker run --rm -v $(pwd):/workspace -w /workspace $IMAGE bash -c '\
               for python_version in cp310 cp311 cp312 cp313; do \
-                /opt/python/${python_version}-${python_version}/bin/pip install --upgrade "setuptools>=80.0.0" build; \
+                /opt/python/${python_version}-${python_version}/bin/pip install --upgrade "setuptools<80.0.0,>=77.0.0" build; \
               done && \
               for python_version in cp310 cp311 cp312 cp313; do \
                 /opt/python/${python_version}-${python_version}/bin/python -m build; \
@@ -157,7 +157,7 @@ jobs:
           - PACKAGE: megatron-core
             PLATFORM: amd64
           - PACKAGE: megatron-fsdp
-            IMAGE: quay.io/pypa/manylinux_2_28_x86_64
+            PLATFORM: amd64
     env:
       PACKAGE: ${{ matrix.PACKAGE }}
     steps:
@@ -173,7 +173,19 @@ jobs:
           TWINE_USERNAME: ${{ secrets.TWINE_USERNAME }}
           TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD }}
           TWINE_REPOSITORY: ${{ (github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/heads/r')) && 'pypi' || 'testpypi' }}
+          PLATFORM: ${{ matrix.PLATFORM }}
         run: |
-          ls -al dist/$PACKAGE*
+
+          # Delete sdist for arm64 since we already upload it with amd64.
+          if [ "$PLATFORM" == "arm64" ]; then
+            rm dist/*.tar.gz
+          fi
+
+          ls -al dist/
           pip install twine
-          twine upload -r $TWINE_REPOSITORY -u $TWINE_USERNAME -p $TWINE_PASSWORD dist/$PACKAGE*
+          twine upload \
+            --verbose \
+            -r $TWINE_REPOSITORY \
+            -u $TWINE_USERNAME \
+            -p $TWINE_PASSWORD \
+            dist/*
diff --git a/.github/workflows/_release_library.yml b/.github/workflows/_release_library.yml
index c166a58c21e..d39ee505c2a 100644
--- a/.github/workflows/_release_library.yml
+++ b/.github/workflows/_release_library.yml
@@ -60,6 +60,7 @@ jobs:
     with:
       dry-run: true
       ref: ${{ inputs.release-ref }}
+      no-publish: true
     secrets:
       TWINE_USERNAME: ${{ secrets.TWINE_USERNAME }}
       TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD }}
@@ -74,7 +75,7 @@ jobs:
       )
       && !cancelled()
     outputs:
-      version: ${{ needs.bump-version-mcore.outputs.release-version }}
+      release-version: ${{ steps.bump-version-mcore.outputs.release-version }}
     env:
       IS_DRY_RUN: ${{ inputs.dry-run }}
     steps:
@@ -92,6 +93,7 @@ jobs:
           SRC_DIR: ''
           PYPROJECT_NAME: 'megatron.core'
         run: |
+          set +u
           cd ${{ github.run_id }}
 
           PACKAGE_INFO_FILE="$SRC_DIR${PYPROJECT_NAME//.//}/package_info.py"
@@ -101,7 +103,7 @@ jobs:
           PATCH=$(cat $PACKAGE_INFO_FILE | awk '/^PATCH = /' | awk -F"= " '{print $2}')
           PRERELEASE=$(cat $PACKAGE_INFO_FILE | awk '/^PRE_RELEASE = /' | awk -F"= " '{print $2}' | tr -d '"' | tr -d "'")
 
-          echo "release-version=$MAJOR.$MINOR.$NEXT_PATCH$NEXT_PRERELEASE" | tee -a "$GITHUB_OUTPUT"
+          echo "release-version=$MAJOR.$MINOR.$PATCH$PRERELEASE" | tee -a "$GITHUB_OUTPUT"
 
           if [[ "$PRERELEASE" != "" ]]; then
             if [[ "$PRERELEASE" == *rc* ]]; then
@@ -130,6 +132,8 @@ jobs:
           SRC_DIR: 'megatron/core/distributed/fsdp/src/'
           PYPROJECT_NAME: 'megatron_fsdp'
         run: |
+          set +u
+
           cd ${{ github.run_id }}
 
           PACKAGE_INFO_FILE="$SRC_DIR${PYPROJECT_NAME//.//}/package_info.py"
diff --git a/.github/workflows/auto-update-copy-pr-bot.yml b/.github/workflows/auto-update-copy-pr-bot.yml
index b04d34251f0..5f6f1ade9e8 100644
--- a/.github/workflows/auto-update-copy-pr-bot.yml
+++ b/.github/workflows/auto-update-copy-pr-bot.yml
@@ -3,7 +3,7 @@ name: Auto Update Copy PR Bot
 on:
   workflow_dispatch:
   schedule:
-    - cron: "0 0 * * *"
+    - cron: '0 0 * * *'
 
 jobs:
   auto-update-copy-pr-bot:
@@ -13,6 +13,9 @@ jobs:
     steps:
       - name: Checkout code
         uses: actions/checkout@v3
+        with:
+          token: ${{ secrets.PAT }}
+          ref: main
 
       - name: Fetch list of members in mcore-reviewers team
         shell: bash -euxo pipefail {0}
diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml
index b8ca3d29047..aea7186cdf0 100644
--- a/.github/workflows/cicd-main.yml
+++ b/.github/workflows/cicd-main.yml
@@ -20,8 +20,8 @@ on:
     branches:
       - dev
       - main
-      - 'pull-request/[0-9]+'
-      - 'deploy-release/*'
+      - "pull-request/[0-9]+"
+      - "deploy-release/*"
   merge_group:
     types: [checks_requested]
   workflow_dispatch:
@@ -130,42 +130,6 @@ jobs:
             echo "is_maintainer=false" | tee -a $GITHUB_OUTPUT
           fi
 
-      - name: Find Comment
-        uses: peter-evans/find-comment@v4
-        if: startsWith(github.ref, 'refs/heads/pull-request/')
-        id: fc
-        with:
-          issue-number: ${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number }}
-          repository: ${{ github.repository }}
-          body-includes: '<!--external-contributor-comment-->'
-
-      - name: Delete comment
-        uses: actions/github-script@v7
-        if: startsWith(github.ref, 'refs/heads/pull-request/') && steps.fc.outputs.comment-id != ''
-        with:
-          github-token: ${{ secrets.GITHUB_TOKEN }}
-          script: |
-            await github.rest.issues.deleteComment({
-              owner: context.repo.owner,
-              repo: context.repo.repo,
-              comment_id: ${{ steps.fc.outputs.comment-id }}
-            })
-
-      - name: Write pull request comment
-        if: startsWith(github.ref, 'refs/heads/pull-request/') && steps.check-membership.outputs.is_maintainer == 'false'
-        uses: peter-evans/create-or-update-comment@v5
-        with:
-          issue-number: ${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number }}
-          repository: ${{ github.repository }}
-          body: |
-            <!--external-contributor-comment-->
-
-            Thank you for your contribution!
-
-            NVIDIA Megatron-LM is currently transitioning to development on Github. We will aim to review your PR after we complete our transition and stabilize our Github development process.
-
-            Thank you for your understanding.
-
   pre-flight:
     needs: [is-not-external-contributor]
     if: github.repository == 'NVIDIA/Megatron-LM'
@@ -383,7 +347,7 @@ jobs:
       - cicd-container-build
       - cicd-parse-unit-tests
     runs-on: ${{ needs.is-not-external-contributor.outputs.selected_runner }}
-    name: '${{ matrix.bucket }} - latest'
+    name: "${{ matrix.bucket }} - latest"
     if: |
       (
         success()
@@ -405,9 +369,10 @@ jobs:
           test_case: ${{ matrix.bucket }}
           tag: latest
           timeout: ${{ matrix.timeout || 30 }}
-          is_unit_test: 'true'
+          is_unit_test: "true"
           PAT: ${{ secrets.PAT }}
           container-image: ${{ env.container-registry }}/megatron-lm:${{ github.sha }}
+          is_ci_workload: ${{ needs.pre-flight.outputs.is_ci_workload }}
 
   cicd-parse-integration-tests:
     runs-on: ubuntu-latest
@@ -449,9 +414,11 @@ jobs:
         id: has-run-functional-tests-label
         env:
           GH_TOKEN: ${{ secrets.PAT }}
+          IS_CI_WORKLOAD: ${{ needs.pre-flight.outputs.is_ci_workload }}
         run: |
           PR_NUMBER=${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number }}
-          HAS_RUN_FUNCTIONAL_TESTS_LABEL=$(gh pr view $PR_NUMBER --json labels | jq '[.labels[].name] | any(. == "Run functional tests")') || echo "false"
+          HAS_RUN_FUNCTIONAL_TESTS_LABEL=$(gh pr view $PR_NUMBER --json labels | jq '[.labels[].name] | any(. == "Run functional tests")')
+          HAS_RUN_FUNCTIONAL_TESTS_LABEL=${HAS_RUN_FUNCTIONAL_TESTS_LABEL:-$IS_CI_WORKLOAD}
           echo "main=$HAS_RUN_FUNCTIONAL_TESTS_LABEL" | tee -a $GITHUB_OUTPUT
 
       - name: Parse functional tests
@@ -510,7 +477,7 @@ jobs:
       - cicd-parse-integration-tests
       - cicd-unit-tests-latest
     runs-on: ${{ needs.is-not-external-contributor.outputs.selected_runner }}
-    name: '${{ matrix.model }}/${{ matrix.test_case }} - latest'
+    name: "${{ matrix.model }}/${{ matrix.test_case }} - latest"
     env:
       PIP_DISABLE_PIP_VERSION_CHECK: 1
       PIP_NO_PYTHON_VERSION_WARNING: 1
@@ -533,9 +500,10 @@ jobs:
           model: ${{ matrix.model }}
           tag: latest
           timeout: ${{ matrix.timeout || 30 }}
-          is_unit_test: 'false'
+          is_unit_test: "false"
           PAT: ${{ secrets.PAT }}
           container-image: ${{ env.container-registry }}/megatron-lm:${{ github.sha }}
+          is_ci_workload: ${{ needs.pre-flight.outputs.is_ci_workload }}
 
   Nemo_CICD_Test:
     needs:
diff --git a/.github/workflows/oncall-assign.yml b/.github/workflows/oncall-assign.yml
index f15004b7c83..d4cc47d5f9e 100644
--- a/.github/workflows/oncall-assign.yml
+++ b/.github/workflows/oncall-assign.yml
@@ -15,8 +15,10 @@
 name: Oncall Assign
 
 on:
-  pull_request:
-    types: [labeled, ready_for_review]
+  pull_request_target:
+    types: [opened, ready_for_review]
+    branches:
+      - main
 
 permissions:
   pull-requests: write
@@ -25,12 +27,7 @@ permissions:
 jobs:
   assign-reviewer:
     runs-on: ubuntu-latest
-    if: >
-      !github.event.pull_request.draft &&
-      (
-        (github.event.action == 'labeled' && github.event.label.name == 'Oncall Review') ||
-        (github.event.action == 'ready_for_review' && contains(github.event.pull_request.labels.*.name, 'Oncall Review'))
-      )
+    if: ${{ !github.event.pull_request.draft }}
     steps:
       - name: Checkout code
         uses: actions/checkout@v4
@@ -40,8 +37,11 @@ jobs:
         with:
           python-version: '3.10'
 
+      - name: Install dependencies
+        run: pip install requests slack-sdk
+
       - name: Assign Reviewer
         env:
-          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          GH_TOKEN: ${{ secrets.PAT }}
         run: |
-          uv run python .github/scripts/oncall_manager.py assign --pr ${{ github.event.pull_request.number }}
+          python .github/scripts/oncall_manager.py assign --pr ${{ github.event.pull_request.number }}
diff --git a/.github/workflows/oncall-rotation.yml b/.github/workflows/oncall-rotation.yml
index ba688320723..46a45810ad1 100644
--- a/.github/workflows/oncall-rotation.yml
+++ b/.github/workflows/oncall-rotation.yml
@@ -25,6 +25,7 @@ permissions:
 
 jobs:
   rotate-schedule:
+    environment: main
     runs-on: ubuntu-latest
     steps:
       - name: Checkout code
@@ -41,8 +42,11 @@ jobs:
         env:
           # Token to read org team members. Needs read:org scope.
           GH_TOKEN: ${{ secrets.NVIDIA_MCORE_ONCALL_TOKEN || secrets.PAT || secrets.GITHUB_TOKEN }}
+          # Slack token for updating the Slack usergroup
+          SLACK_TOKEN: ${{ secrets.ONCALL_SLACK_TOKEN }}
         run: |
-          uv run python .github/scripts/oncall_manager.py rotate
+          pip install --no-cache-dir uv
+          uv run --with slack-sdk python .github/scripts/oncall_manager.py rotate
 
       - name: Commit and Push changes
         run: |
@@ -50,5 +54,6 @@ jobs:
           git config --global user.email "github-actions[bot]@users.noreply.github.com"
           git add .github/oncall_schedule.json
           git commit -m "chore: rotate oncall schedule" || echo "No changes to commit"
+          git pull --rebase
           git push origin HEAD:main
 
diff --git a/.github/workflows/sync-team-usergroups.yml b/.github/workflows/sync-team-usergroups.yml
new file mode 100644
index 00000000000..8b08182dceb
--- /dev/null
+++ b/.github/workflows/sync-team-usergroups.yml
@@ -0,0 +1,39 @@
+# Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: Sync GitHub Teams to Slack User Groups
+
+on:
+  workflow_dispatch:
+
+jobs:
+  sync-usergroups:
+    environment: main
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.10'
+
+      - name: Sync Teams to User Groups
+        env:
+          GH_TOKEN: ${{ secrets.NVIDIA_MCORE_ONCALL_TOKEN || secrets.PAT || secrets.GITHUB_TOKEN }}
+          SLACK_TOKEN: ${{ secrets.ONCALL_SLACK_TOKEN }}
+        run: |
+          pip install --no-cache-dir uv
+          uv run --with slack-sdk python .github/scripts/sync_team_usergroups.py
diff --git a/.gitlab/scripts/build.sh b/.gitlab/scripts/build.sh
index 9bcf5d45712..0f34b838384 100644
--- a/.gitlab/scripts/build.sh
+++ b/.gitlab/scripts/build.sh
@@ -29,6 +29,9 @@ fi
 CI_COMMIT_BRANCH=$(echo "$CI_COMMIT_BRANCH" | tr '/' '-' | tr '[:upper:]' '[:lower:]' | sed 's/[^a-z0-9._-]/-/g')
 ADDITIONAL_PARAMS+=("--cache-to type=registry,ref=${IMAGE}-buildcache:${CI_COMMIT_BRANCH}-${PLATFORM},mode=max")
 ADDITIONAL_PARAMS+=("--cache-from type=registry,ref=${IMAGE}-buildcache:${CI_COMMIT_BRANCH}-${PLATFORM}")
+ADDITIONAL_PARAMS+=("--cache-from type=registry,ref=${IMAGE}-buildcache:main-${PLATFORM}")
+ADDITIONAL_PARAMS+=("--cache-from type=registry,ref=${IMAGE}-buildcache:dev-${PLATFORM}")
+
 ADDITIONAL_PARAMS+=("-t ${IMAGE}:${CI_COMMIT_BRANCH}-${PLATFORM}")
 
 if [[ -n "$CI_MERGE_REQUEST_IID" ]]; then
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 615227600cc..6b128dce590 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -1,66 +1,3 @@
-# Contributing to Megatron-LM
+# Contributing to Megatron
 
-This document outlines the processes and policies for issues and pull requests by non-NVIDIA contributors to the Megatron-LM github repository.
-
-Everyone is welcome to contribute to the project but development of Megatron-LM continues internally at NVIDIA. When contributing it important to ensure that changes are in line with the project direction. Small changes to fix bugs are welcomed and appreciated. If proposing large architectural changes or changes for stylistic reasons open an issue first so we can discuss it.
-
-PRs will first be pulled into NVIDIA's internal Megatron-LM repo and then pushed back out to the open github repo with proper credit given to the committers.
-
-## Issue policy
-
-Please do file any bugs you find, keeping the following in mind:
-
-- If filing a bug, i.e. you have found something that doesn't work as expected, use the BUG template.
-- If you've found a regression in speed or accuracy use the REGRESSION template.
-- If you are requesting a new feature or modification of an existing feature use the ENHANCEMENT template.
-- If opening an issue to ask a question no template is needed but please make your question as clear and concise as possible.
-- One issue per bug. Putting multiple things in the same issue makes both discussion and completion unnecessarily complicated.
-- Your bug is mostly likely to get attention from the development team quickly if we can easily reproduce it.
-- Use proper spelling, grammar, and punctuation.
-- Write in an authoritative and technical tone.
-
-## Code submission policy
-
-Here are some dos & don'ts to try and stick to:
-
-### Do:
-
-- Format new code in a style that is consistent with the file being changed. Megatron-LM doesn't (yet) have a style guide or enforced formatting.
-- Split your changes into separate, atomic commits i.e. A commit per feature or fix.
-- Make sure your commits are rebased on the master branch.
-- Write the commit message subject line in the imperative mood ("Change the default argument for X", not "Changed the default argument for X").
-- Write your commit messages in proper English, with care and punctuation.
-- Check the spelling of your code, comments and commit messages.
-
-### Don't:
-
-- Submit code that's incompatible with the project licence.
-- Touch anything outside the stated scope of the PR. This includes formatting changes to code not relevant to the PR.
-- Iterate excessively on your design across multiple commits.
-- Include commented-out code.
-- Attempt large architectural changes without first opening an issue to discuss.
-
-## Issue and Pull Request Q&A (Updated Jul 2023)
-
-### I've submitted an issue and PR. When can I expect to get some feedback?
-
-Megatron-LM is developed and maintained by a small team of researchers. We will endeavour to read and acknowledge all new issues and PRs within a week. A few rules of thumb:
-- Reproducible bugs/regressions and bug/regression fixes are likely to get the attention of maintainers the quickest.
-- Issues requesting an enhancement may only recieve acknowlegement that they've been read and may be closed with a "wontfix" label if they're not inline with the project direction. If they are acknowledged and remain open you can assume the maintainers agree they're a desirable feature.
-- Support requests, i.e. requests for help running the code, have the lowest priority and will be responded to as maintainer time permits.
-
-### If my issue or PR isn't getting attention, how long should I wait before pinging one of the project maintainers?
-
-One week if there is no acknowledgement of the intial request.
-
-### Who are the project maintainers I should ping?
-
-The corresponding maintainers at this time are @jaredcasper and @jon-barker.
-
-### Is there a policy for issues and PRs that haven't been touched in X days? Should they be closed?
-
-Yes, starting in July 2023 we have a bot that will mark untouched PRs as "stale" after 60 days.
-
-We have a long backlog of issues and PRs dating back 3.5 years. We are trying to triage these now by working backwards. Older issues we believe may still be relevant may recieve a request to re-test them with the latest code. If there's no response they may be closed. Again, if you they should be re-opened then just respond with a comment to that effect.
-
-Thank-you!
\ No newline at end of file
+Visit our [contributing page](https://docs.nvidia.com/megatron-core/developer-guide/latest/developer/contribute.html).
\ No newline at end of file
diff --git a/docker/Dockerfile.ci.dev b/docker/Dockerfile.ci.dev
index 4e1a4de55e8..b43b7286506 100644
--- a/docker/Dockerfile.ci.dev
+++ b/docker/Dockerfile.ci.dev
@@ -1,5 +1,4 @@
 # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
-
 # syntax=docker/dockerfile:1.3-labs
 
 ARG FROM_IMAGE_NAME
@@ -69,9 +68,9 @@ RUN bash -ex <<"EOF"
         ln -s libnvshmem_host.so.3 libnvshmem_host.so
     popd
 
-    git clone --branch hybrid-ep https://github.com/Autumn1998/DeepEP.git
+    git clone --branch hybrid-ep https://github.com/deepseek-ai/DeepEP.git
     pushd DeepEP
-        git checkout df375b40f24e5c495e2db36e808125266661652c
+        git checkout 83e0d156807f31abed4ea55c2fa6eb4b62a11b82
         patch -p1 < /workspace/deepep.patch
     popd
     TORCH_CUDA_ARCH_LIST="9.0 10.0 12.0" uv pip install --no-build-isolation -v DeepEP/.
@@ -80,7 +79,6 @@ EOF
 
 COPY assets/ /opt/data/
 ENV UV_PYTHON=$UV_PROJECT_ENVIRONMENT/bin/python
-COPY . /opt/megatron-lm/
 
 ##### For NVIDIANS only #####
 FROM main as jet
@@ -98,7 +96,7 @@ RUN --mount=type=secret,id=JET_INDEX_URLS \
     JET_INDEX_URLS=$(cat /run/secrets/JET_INDEX_URLS)
     LOGGER_INDEX_URL=$(cat /run/secrets/LOGGER_INDEX_URL)
     uv pip install --no-cache-dir --upgrade $LOGGER_INDEX_URL "one-logger"
-    uv pip install --no-cache-dir --upgrade "setuptools<80.0.0"
+    uv pip install --no-cache-dir --upgrade "setuptools<80.0.0,>=77.0.0"
     uv pip install --no-cache-dir --upgrade $JET_INDEX_URLS "jet-client~=4.0" 
 EOF
 ###
diff --git a/docker/Dockerfile.ci.nemo b/docker/Dockerfile.ci.nemo
index 93fe23bfd6f..b00349e101a 100644
--- a/docker/Dockerfile.ci.nemo
+++ b/docker/Dockerfile.ci.nemo
@@ -1,5 +1,4 @@
 # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
-
 # syntax=docker/dockerfile:1.3-labs
 
 ARG FROM_IMAGE_NAME
diff --git a/docker/common/install.sh b/docker/common/install.sh
index 761244a1068..01003c0e7aa 100644
--- a/docker/common/install.sh
+++ b/docker/common/install.sh
@@ -136,7 +136,7 @@ main() {
         . $UV_PROJECT_ENVIRONMENT/bin/activate
 
         pip install --pre --no-cache-dir --upgrade pip
-        pip install --pre --no-cache-dir torch pybind11 wheel_stub ninja wheel packaging "setuptools>=77.0.0"
+        pip install --pre --no-cache-dir torch pybind11 wheel_stub ninja wheel packaging "setuptools<80.0.0,>=77.0.0"
         pip install --pre --no-cache-dir --no-build-isolation .
     fi
 
diff --git a/docker/common/install_source_wheels.sh b/docker/common/install_source_wheels.sh
index 1308e604822..2f144a6ff0a 100644
--- a/docker/common/install_source_wheels.sh
+++ b/docker/common/install_source_wheels.sh
@@ -54,4 +54,4 @@ uv pip install --no-cache-dir \
     $MAMBA_WHEEL \
     $CAUSALCONV1D_WHEEL \
     $GROUPEDGEMM_WHEEL \
-    "setuptools<80.0.0"
+    "setuptools<80.0.0,>=77.0.0"
diff --git a/docs/advanced/index.md b/docs/advanced/index.md
new file mode 100644
index 00000000000..573cb0ee81a
--- /dev/null
+++ b/docs/advanced/index.md
@@ -0,0 +1,5 @@
+# Discussions
+
+In-depth technical discussions and optimization guides:
+
+- [Optimizing DeepSeek-V3 Training on GB200 NVL72](https://github.com/NVIDIA/Megatron-LM/blob/dev/docs/discussions/deepseek-v3-gb200-optimization/deepseek-v3-gb200-reproduce-guide.md) - Achieving 970 TFLOPS/GPU with MXFP8, kernel optimizations, and HybridEP
diff --git a/docs/api-guide/core/datasets.md b/docs/api-guide/core/datasets.md
new file mode 100644
index 00000000000..e97e99ae1db
--- /dev/null
+++ b/docs/api-guide/core/datasets.md
@@ -0,0 +1,4 @@
+# datasets package
+
+```{include} ../../../megatron/core/datasets/readme.md
+```
diff --git a/docs/api-guide/dist_checkpointing.md b/docs/api-guide/core/dist_checkpointing.md
similarity index 100%
rename from docs/api-guide/dist_checkpointing.md
rename to docs/api-guide/core/dist_checkpointing.md
diff --git a/docs/api-guide/dist_checkpointing.strategies.md b/docs/api-guide/core/dist_checkpointing.strategies.md
similarity index 100%
rename from docs/api-guide/dist_checkpointing.strategies.md
rename to docs/api-guide/core/dist_checkpointing.strategies.md
diff --git a/docs/api-guide/distributed.md b/docs/api-guide/core/distributed.md
similarity index 100%
rename from docs/api-guide/distributed.md
rename to docs/api-guide/core/distributed.md
diff --git a/docs/api-guide/fusions.md b/docs/api-guide/core/fusions.md
similarity index 100%
rename from docs/api-guide/fusions.md
rename to docs/api-guide/core/fusions.md
diff --git a/docs/api-guide/core/index.md b/docs/api-guide/core/index.md
new file mode 100644
index 00000000000..150fd72cb1e
--- /dev/null
+++ b/docs/api-guide/core/index.md
@@ -0,0 +1,16 @@
+# Core APIs
+
+Low-level API reference for core Megatron components.
+
+```{toctree}
+:maxdepth: 2
+
+transformer
+tensor_parallel
+pipeline_parallel
+fusions
+distributed
+datasets
+dist_checkpointing
+dist_checkpointing.strategies
+```
diff --git a/docs/api-guide/pipeline_parallel.md b/docs/api-guide/core/pipeline_parallel.md
similarity index 100%
rename from docs/api-guide/pipeline_parallel.md
rename to docs/api-guide/core/pipeline_parallel.md
diff --git a/docs/api-guide/tensor_parallel.md b/docs/api-guide/core/tensor_parallel.md
similarity index 100%
rename from docs/api-guide/tensor_parallel.md
rename to docs/api-guide/core/tensor_parallel.md
diff --git a/docs/api-guide/transformer.md b/docs/api-guide/core/transformer.md
similarity index 100%
rename from docs/api-guide/transformer.md
rename to docs/api-guide/core/transformer.md
diff --git a/docs/api-guide/datasets.md b/docs/api-guide/datasets.md
deleted file mode 100644
index d4a5d63a3d5..00000000000
--- a/docs/api-guide/datasets.md
+++ /dev/null
@@ -1,5 +0,0 @@
-# datasets package
-
-```{include} datasets_readme.md
-```
-
diff --git a/docs/api-guide/datasets_readme.md b/docs/api-guide/datasets_readme.md
deleted file mode 100644
index e94bc86f85f..00000000000
--- a/docs/api-guide/datasets_readme.md
+++ /dev/null
@@ -1,3 +0,0 @@
-```{include} ../../megatron/core/datasets/readme.md
-```
-
diff --git a/docs/api-guide/index.md b/docs/api-guide/index.md
index d38fc1092ba..851114d98e8 100644
--- a/docs/api-guide/index.md
+++ b/docs/api-guide/index.md
@@ -1,25 +1,11 @@
 # API Guide
 
+API reference documentation for Megatron Core components.
+
 ```{toctree}
-:maxdepth: 2
+:maxdepth: 3
 
-models
-tensor_parallel
-context_parallel
-pipeline_parallel
-custom_fsdp
-fusions
-transformer
-moe
-dist_checkpointing
-dist_optimizer
-distributed
-datasets
-multi_latent_attention
-num_microbatches_calculator
-optimizer_param_scheduler
-optimizer_cpu_offload
-multi_token_prediction
-tokenizers
+models/index
+core/index
+internal/index
 ```
-
diff --git a/docs/api-guide/internal/index.md b/docs/api-guide/internal/index.md
new file mode 100644
index 00000000000..c216a976c77
--- /dev/null
+++ b/docs/api-guide/internal/index.md
@@ -0,0 +1,10 @@
+# Internal Utilities
+
+Internal utility APIs.
+
+```{toctree}
+:maxdepth: 2
+
+num_microbatches_calculator
+optimizer_param_scheduler
+```
diff --git a/docs/api-guide/num_microbatches_calculator.md b/docs/api-guide/internal/num_microbatches_calculator.md
similarity index 100%
rename from docs/api-guide/num_microbatches_calculator.md
rename to docs/api-guide/internal/num_microbatches_calculator.md
diff --git a/docs/api-guide/optimizer_param_scheduler.md b/docs/api-guide/internal/optimizer_param_scheduler.md
similarity index 100%
rename from docs/api-guide/optimizer_param_scheduler.md
rename to docs/api-guide/internal/optimizer_param_scheduler.md
diff --git a/docs/api-guide/models/index.md b/docs/api-guide/models/index.md
new file mode 100644
index 00000000000..c6279d2409a
--- /dev/null
+++ b/docs/api-guide/models/index.md
@@ -0,0 +1,12 @@
+# Model APIs
+
+API reference for Megatron Core model implementations.
+
+```{toctree}
+:maxdepth: 2
+
+models
+models.gpt
+models.bert
+models.t5
+```
diff --git a/docs/api-guide/models.bert.md b/docs/api-guide/models/models.bert.md
similarity index 100%
rename from docs/api-guide/models.bert.md
rename to docs/api-guide/models/models.bert.md
diff --git a/docs/api-guide/models.gpt.md b/docs/api-guide/models/models.gpt.md
similarity index 100%
rename from docs/api-guide/models.gpt.md
rename to docs/api-guide/models/models.gpt.md
diff --git a/docs/api-guide/models.md b/docs/api-guide/models/models.md
similarity index 100%
rename from docs/api-guide/models.md
rename to docs/api-guide/models/models.md
diff --git a/docs/api-guide/models.t5.md b/docs/api-guide/models/models.t5.md
similarity index 100%
rename from docs/api-guide/models.t5.md
rename to docs/api-guide/models/models.t5.md
diff --git a/docs/api-guide/moe.md b/docs/api-guide/moe.md
deleted file mode 100644
index a4a66430119..00000000000
--- a/docs/api-guide/moe.md
+++ /dev/null
@@ -1,5 +0,0 @@
-# Mixture of Experts package
-
-```{include} ../../megatron/core/transformer/moe/README.md
-```
-
diff --git a/docs/api-guide/optimizer_cpu_offload.md b/docs/api-guide/optimizer_cpu_offload.md
deleted file mode 100644
index c495b222622..00000000000
--- a/docs/api-guide/optimizer_cpu_offload.md
+++ /dev/null
@@ -1,5 +0,0 @@
-# Optimizer CPU offload package
-
-```{include} ../../megatron/core/optimizer/cpu_offloading/README.md
-```
-
diff --git a/docs/api-guide/tokenizers.md b/docs/api-guide/tokenizers.md
deleted file mode 100644
index 5aaf9866f1e..00000000000
--- a/docs/api-guide/tokenizers.md
+++ /dev/null
@@ -1,137 +0,0 @@
-# New Tokenizer System
-
-## Key Differences from the Old Tokenizer System
-
-### 1. Hugging Face–style API
-
-We now have a `MegatronTokenizer` class that provides a familiar, simple API similar to Hugging Face’s:
-
-`.from_pretrained()` – Load a tokenizer from a directory or file, automatically detecting the type and settings.
-
-`.write_metadata()` – Save tokenizer configuration (metadata) so that it can be reused without re-specifying parameters.
-
-This eliminates the need for long initialization arguments and hard-coded settings in training scripts.
-
-### 2. Tokenizer Metadata
-
-A metadata file (JSON) now stores all essential tokenizer configuration in one place:
- - Tokenizer library (e.g., HuggingFace, SentencePiece, TikToken, etc.)
- - Chat templates
- - Tokenizer class
-
-Benefits:
- - You only need to set these parameters once.
- - No more passing multiple CLI arguments for tokenizer settings.
- - Easy sharing — just copy the tokenizer directory with its metadata file.
-
-### 3. Library Classes Are Now Internal
-
-In the old system, you had to know which tokenizer library to use (`SentencePieceTokenizer`, `HuggingFaceTokenizer`, etc.) and instantiate it manually.
-
-In the new system:
- - The library is automatically detected from the metadata.
- - The correct tokenizer implementation is chosen under the hood.
- - Users don’t need to manually manage tokenizer classes.
-
-### 3. Support for Model-specific Tokenizer Classes
-
-The system now supports:
- - Built-in LLM-specific tokenizers. 
- - Custom tokenizers: You can create your own tokenizer class by inheriting from `MegatronTokenizerText` and specify it in the `tokenizer_class` field in the metadata file.
- - This allows advanced customization while keeping defaults simple for most users.
-
-### 4. Usage
-
-**Creating and Saving Metadata**
-
-```python
-from megatron.core.tokenizers import MegatronTokenizer
-
-# The metadata will be stored as a file named tokenizer_metadata.json inside the tokenizer’s directory.
-MegatronTokenizer.write_metadata(
-    tokenizer_path="/path/to/tokenizer.model",
-    tokenizer_library="sentencepiece",
-    chat_template="chat template in jinja format",
-)
-
-# To use custom tokenizer class
-from megatron.core.tokenizers.text import MegatronTokenizerText
-
-class CustomTokenizer(MegatronTokenizerText):
-    ...
-
-MegatronTokenizer.write_metadata(
-    tokenizer_path="/path/to/tokenizer.model",
-    tokenizer_library="sentencepiece",
-    chat_template="chat template in jinja format",
-    tokenizer_class=CustomTokenizer,
-)
-
-# To save metadata to another dir
-MegatronTokenizer.write_metadata(
-    tokenizer_path="/path/to/tokenizer.model",
-    tokenizer_library="sentencepiece",
-    metadata_path="/path/to/save/metadata.json",
-)
-
-```
-
-**Restoring the tokenizer**
-
-```python
-from megatron.core.tokenizers import MegatronTokenizer
-
-MegatronTokenizer.from_pretrained(
-    tokenizer_path="/path/to/tokenizer.model",
-)
-
-# If metadata is not in tokenizer’s dir
-MegatronTokenizer.from_pretrained(
-    tokenizer_path="/path/to/tokenizer.model",
-    metadata_path="/path/to/metadata.json",
-)
-
-# Pass metadata as dict
-MegatronTokenizer.from_pretrained(
-    tokenizer_path="GPT2BPETokenizer",
-    metadata_path={"library": "megatron"},
-    vocab_file="/path/to/vocab.txt",
-)
-
-# Pass additional params
-MegatronTokenizer.from_pretrained(
-    tokenizer_path="/path/to/tokenizer/model.json",
-    metadata_path={"library": "tiktoken"},
-    pattern="v2",
-    num_special_tokens=1000,
-)
-
-# Null tokenzier
-MegatronTokenizer.from_pretrained(
-    metadata_path={"library": "null"},
-    vocab_size=131072,
-)
-
-```
-
-### 4. Megatron-LM pretraining compatibility
-
-New tokenizer system is compatible with megatron-lm pretrain script. If `--tokenizer-metadata` is not specified, a default metadata file will be generated automatically.
-
-```bash
-# Null tokenizer
-torchrun --nproc_per_node=1 pretrain_gpt.py \
-    ... \
-    --tokenizer-type NullTokenizer \
-    --vocab-size 131072
-
-# HuggingFace tokenizer with specified metadata
-torchrun --nproc_per_node=1 pretrain_gpt.py \
-    ... \
-    --tokenizer-type HuggingFaceTokenizer \
-    --tokenizer-model meta-llama/Meta-Llama-3-8B \
-    --tokenizer-metadata /path/to/metadata.json
-
-```
-
-The Megatron-LM pretraining script still supports the legacy tokenizer system. To enable it, simply add the `--legacy-tokenizer` flag.
diff --git a/docs/conf.py b/docs/conf.py
index ac93ac35aaf..a64da441084 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -24,7 +24,7 @@
 # -- Project information -----------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
 
-project = "Megatron-LM"
+project = "Megatron Core"
 copyright = "2025, NVIDIA Corporation"
 author = "NVIDIA Corporation"
 release = "latest"
diff --git a/docs/developer/contribute.md b/docs/developer/contribute.md
new file mode 100644
index 00000000000..859b5562f4b
--- /dev/null
+++ b/docs/developer/contribute.md
@@ -0,0 +1,61 @@
+# Contributing to Megatron-LM
+
+This document outlines the processes and policies for issues and pull requests by non-NVIDIA contributors to the Megatron-LM GitHub repository.
+
+Everyone is welcome to contribute to the project! We recently migrated from using an internal repo to doing all development directly from the GitHub repository.
+
+When contributing it is important to ensure that changes are in line with the project direction. Small changes to fix bugs are welcomed and appreciated. If proposing large architectural changes or changes for stylistic reasons open an issue first so we can discuss it.
+
+## Issue policy
+
+Please do file any bugs you find, keeping the following in mind:
+
+- If filing a bug, i.e. you have found something that doesn't work as expected, use the BUG template.
+- If you've found a regression in speed or accuracy use the REGRESSION template.
+- If you are requesting a new feature or modification of an existing feature use the ENHANCEMENT template.
+- If opening an issue to ask a question no template is needed but please make your question as clear and concise as possible.
+- One issue per bug. Putting multiple things in the same issue makes both discussion and completion unnecessarily complicated.
+- Your bug is mostly likely to get attention from the development team quickly if we can easily reproduce it.
+- Use proper spelling, grammar, and punctuation.
+- Write in an authoritative and technical tone.
+
+## Code submission policy
+
+### Do
+
+- Format new code in a style that is consistent with the file being changed. Megatron-LM doesn't (yet) have a style guide or enforced formatting.
+- Split your changes into separate, atomic commits i.e. A commit per feature or fix.
+- Make sure your commits are rebased on the master branch.
+- Write the commit message subject line in the imperative mood ("Change the default argument for X", not "Changed the default argument for X").
+- Write your commit messages in proper English, with care and punctuation.
+- Check the spelling of your code, comments and commit messages.
+
+### Don't
+
+- Submit code that's incompatible with the project licence.
+- Touch anything outside the stated scope of the PR. This includes formatting changes to code not relevant to the PR.
+- Iterate excessively on your design across multiple commits.
+- Include commented-out code.
+- Attempt large architectural changes without first opening an issue to discuss.
+
+## Issue and Pull Request Q&A
+
+### I've submitted an issue and PR. When can I expect to get some feedback?
+
+You should receive a response within 2 business days.
+
+### I need help, who should I ping?
+
+Use [@mcore-oncall](https://github.com/orgs/NVIDIA/teams/mcore-oncall).
+
+### If my issue or PR isn't getting attention, what should I do?
+
+After 2 business days, tag the user [@mcore-oncall](https://github.com/orgs/NVIDIA/teams/mcore-oncall).
+
+### Is there a policy for issues and PRs that haven't been touched in X days? Should they be closed?
+
+Yes, we have a bot that will mark untouched PRs as "stale" after 60 days.
+
+We have a long backlog of issues and PRs dating back years. We are trying to triage these now by working backwards. Older issues we believe may still be relevant may recieve a request to re-test them with the latest code. If there's no response they may be closed. Again, if you they should be re-opened then just respond with a comment to that effect.
+
+Thank you!
\ No newline at end of file
diff --git a/docs/developer/generate_docs.md b/docs/developer/generate_docs.md
new file mode 100644
index 00000000000..52fa288122d
--- /dev/null
+++ b/docs/developer/generate_docs.md
@@ -0,0 +1,13 @@
+# Generating Docs Locally
+
+To generate docs locally, use the following commands:
+
+```
+cd docs
+uv run --only-group docs sphinx-autobuild . _build/html --port 8080 --host 127.0.0.1
+```
+
+Docs will be generated at <http://localhost:8080/>.
+
+**Recommended:** set the environment variable `SKIP_AUTODOC=true` when generating docs 
+to skip the generation of `apidocs`.
\ No newline at end of file
diff --git a/docs/developer/oncall.md b/docs/developer/oncall.md
new file mode 100644
index 00000000000..b88da7bb6df
--- /dev/null
+++ b/docs/developer/oncall.md
@@ -0,0 +1,48 @@
+# Oncall Overview
+
+During your oncall week, you will be assigned to all PRs marked “Ready for 
+Review”. From a high-level, your responsibilities include:
+
+- Review all new PRs
+- Accelerate the review process
+- Ensure issues and discussion questions are answered
+
+## PR Responsibilities
+
+Below is the checklist that the oncall needs to go through for each PR.
+
+- Should the PR remain a single PR?
+  - Each PR should have at most 1 expert reviewer, although there will be some outlier cases
+- Label PR as “complexity: low”, “complexity: medium”, or “complexity: high” depending on complexity
+  - Expert reviewers have final say, oncall just sets the initial complexity level
+  - Initial complexity level guideline
+    - Low: <100 lines changed
+    - Medium: 100 < lines changed < 500
+    - High: > 500 lines changed
+- Does this PR have proper testing coverage?
+  - If new logic is added, is the new logic tested?
+- Should the PR add documentation for any new features?
+- Does the PR conform to our style guidelines?
+  - Code structure
+  - Cleanliness
+  - Comments
+  - File structure
+- Do all tests pass?
+  - Oncall will need to kick off testing suite for external reviewers
+  - Comment “/ok to test commid_id” to kick off testing suite
+- Add the “Expert Review” label
+  - Select an expert reviewer from each expert group as a reviewer. If you’re unsure who to select, pick a “maintainer” or manager.
+  - **Expert reviewers should review within 1 business day.** Message the assigned reviewer if it is taking longer. The reviewer either needs to review the PR or suggest an alternate reviewer.
+  - If the reviewer is not responding after 2 business days, escalate to the reviewer's manager.
+- Add the “Final Review” label after experts approve
+  - Final reviewers should review within 1 business day. Message the assigned reviewer if it is taking longer.
+  - If the reviewer is not responding after 2 business days, escalate to the reviewer's manager.
+
+## Issues and Discussion Questions
+
+If you do not know the answer to an issue or discussion question: that's ok! **Delegate to someone who does.**
+
+On a daily basis, track the following:
+
+- [new issues](https://github.com/NVIDIA/Megatron-LM/issues): check to see if there are any new issues before they become out of SLA!
+- [out of SLA issues](https://github.com/orgs/NVIDIA-NeMo/projects/20/views/4?sliceBy%5Bvalue%5D=NVIDIA%2FMegatron-LM): useful dashboard that tracks all out of SLA issues
diff --git a/docs/developer/submit.md b/docs/developer/submit.md
new file mode 100644
index 00000000000..a096312d21e
--- /dev/null
+++ b/docs/developer/submit.md
@@ -0,0 +1,16 @@
+# How to Submit a PR
+
+## Step 1: Add PR label `Expert Review`
+
+## Step 2: Collect the expert reviewers reviews
+
+1. Attach the `Expert Review` label when your PR is ready for review.
+2. GitHub auto-assigns expert reviewers based on your changes. They will get notified and pick up your PR soon.
+
+:warning: Only proceed to the next step once all reviewers have approved, merge-conflict are resolved and the CI is passing.  
+Final Review might get declined if these requirements are not fulfilled.
+
+## Step 3: Final Review
+
+1. Add `Final Review` label
+2. GitHub auto-assigns final reviewers based on your changes. They will get notified and pick up your PR soon.
diff --git a/docs/discussions/README.md b/docs/discussions/README.md
index 81b1a58d5b0..4ac3c4e3254 100644
--- a/docs/discussions/README.md
+++ b/docs/discussions/README.md
@@ -4,12 +4,6 @@ This directory contains in-depth guides, tutorials, and discussions about optimi
 
 ## Available Guides
 
-### Performance Optimization
-
-- **[A Guide to Reproduce DeepSeek-V3 Pre-training Performance on GB200](deepseek-v3-gb200-optimization/deepseek-v3-gb200-reproduce-guide.md)**
-  
-  A detailed guide on how to reproduce the DeepSeek-V3 pre-training performance on GB200, incluing the dockerfile, package requirements and training scripts.
-
 ### Training Guides
 
 - **[Megatron-FSDP User Guide](megatron-fsdp-user-guide/megatron-fsdp-user-guide.md)**
@@ -25,4 +19,4 @@ If you'd like to contribute a guide or tutorial, please follow this structure:
 3. Create an images directory: `docs/discussions/your-guide-name/images/`
 4. Update this README.md with a link to your guide
 
-Each guide should be self-contained with its own images and supporting files.
+Each guide should be self-contained with its own images and supporting files.
\ No newline at end of file
diff --git a/docs/get-started/quickstart.md b/docs/get-started/quickstart.md
new file mode 100644
index 00000000000..36a923e6ad2
--- /dev/null
+++ b/docs/get-started/quickstart.md
@@ -0,0 +1,69 @@
+# Quick Start
+
+## Installation
+
+Install Megatron Core with pip:
+
+```bash
+# 1. Install Megatron Core with required dependencies
+pip install --no-build-isolation megatron-core[mlm,dev]
+
+# 2. Clone repository for examples
+git clone https://github.com/NVIDIA/Megatron-LM.git
+cd Megatron-LM
+pip install --no-build-isolation .[mlm,dev]
+```
+
+That's it! You're ready to start training.
+
+## Your First Training Run
+
+### Simple Training Example
+
+```bash
+# Distributed training example (2 GPUs, mock data)
+torchrun --nproc_per_node=2 examples/run_simple_mcore_train_loop.py
+```
+
+### LLaMA-3 Training Example
+
+```bash
+# 8 GPUs, FP8 precision, mock data
+./examples/llama/train_llama3_8b_fp8.sh
+```
+
+## Data Preparation
+
+### JSONL Data Format
+
+```json
+{"text": "Your training text here..."}
+{"text": "Another training sample..."}
+```
+
+### Basic Preprocessing
+
+```bash
+python tools/preprocess_data.py \
+    --input data.jsonl \
+    --output-prefix processed_data \
+    --tokenizer-type HuggingFaceTokenizer \
+    --tokenizer-model /path/to/tokenizer.model \
+    --workers 8 \
+    --append-eod
+```
+
+### Key Arguments
+
+- `--input`: Path to input JSON/JSONL file
+- `--output-prefix`: Prefix for output binary files (.bin and .idx)
+- `--tokenizer-type`: Tokenizer type (`HuggingFaceTokenizer`, `GPT2BPETokenizer`, etc.)
+- `--tokenizer-model`: Path to tokenizer model file
+- `--workers`: Number of parallel workers for processing
+- `--append-eod`: Add end-of-document token
+
+## Next Steps
+
+- Explore [Parallelism Strategies](../user-guide/parallelism-guide.md) to scale your training
+- Learn about [Data Preparation](../user-guide/data-preparation.md) best practices
+- Check out [Advanced Features](../user-guide/features/index.md) for advanced capabilities
diff --git a/docs/index.md b/docs/index.md
index 30e89f2c96f..88760513f23 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -1,21 +1,85 @@
-# Megatron User Guide
+# Megatron Core User Guide
 
-**Megatron Core** is a Python library that has the core components required to build your language models.
-A reference implementation of Megatron Core can be found in [NeMo](https://github.com/NVIDIA/NeMo/tree/main) It offers a *simple* and
-*intuitive* API.
+**Megatron Core** is a GPU-optimized library for training large language models at scale. It provides modular, composable building blocks for creating custom training frameworks with state-of-the-art parallelism strategies and performance optimizations.
+
+Megatron Core offers a flexible, reusable foundation for building large-scale transformer training systems. **Megatron-LM** serves as a reference implementation demonstrating how to use Megatron Core components to train models with billions to trillions of parameters across distributed GPU clusters.
+
+## Key Features
+
+* Composable transformer building blocks (attention, MLP, etc.)
+* Advanced parallelism strategies (TP, PP, DP, EP, CP)
+* Pipeline schedules and distributed optimizers
+* Mixed precision support (FP16, BF16, FP8)
+* GPU-optimized kernels and memory management
+* High-performance dataloaders and dataset utilities
+* Model architectures (LLaMA, Qwen, DeepSeek, GPT, Mamba, etc.)
 
 ```{toctree}
-:maxdepth: 1
-:caption: User Guide
+:maxdepth: 2
+:hidden:
+:caption: Get Started
+
+get-started/quickstart
+```
+
+```{toctree}
+:maxdepth: 2
+:hidden:
+:caption: Basic Usage
+
+user-guide/data-preparation
+user-guide/training-examples
+user-guide/parallelism-guide
+```
+
+```{toctree}
+:maxdepth: 2
+:hidden:
+:caption: Supported Models
+
+models/index
+```
+
+```{toctree}
+:maxdepth: 2
+:hidden:
+:caption: Advanced Features
 
-user-guide/index
+user-guide/features/moe
+user-guide/features/context_parallel
+user-guide/features/custom_fsdp
+user-guide/features/dist_optimizer
+user-guide/features/optimizer_cpu_offload
+user-guide/features/pipeline_parallel_layout
+user-guide/features/megatron_energon
+user-guide/features/megatron_rl
+user-guide/features/tokenizers
 ```
 
 ```{toctree}
 :maxdepth: 1
-:caption: API Guide
+:hidden:
+:caption: Developer Guide
 
-api-guide/index
-apidocs/index.rst
+developer/contribute
+developer/submit
+developer/oncall
+developer/generate_docs
 ```
 
+```{toctree}
+:maxdepth: 2
+:hidden:
+:caption: Discussions
+
+advanced/index
+```
+
+```{toctree}
+:maxdepth: 2
+:hidden:
+:caption: API Reference
+
+api-guide/index
+apidocs/index.rst
+```
\ No newline at end of file
diff --git a/docs/llama_mistral.md b/docs/llama_mistral.md
index 5dd61866e87..a79bb2c4bf9 100644
--- a/docs/llama_mistral.md
+++ b/docs/llama_mistral.md
@@ -1,6 +1,6 @@
 # Llama, Mistral and other Llama-like model support in Megatron-LM
 
-NOTE: In order to simplify code we now only support converting llama-3.x and mistral checkpoints downloaded from Huggingface.
+NOTE: In order to simplify code we now only support converting llama-3.x and mistral checkpoints downloaded from Hugging Face. For converting other models, see [Megatron Bridge](models/index.md).
 
 The [Llama-2](https://ai.meta.com/llama/) and [Llama-3.x](https://llama.meta.com/) family of models are an open-source set of pretrained & finetuned (for chat) models that have achieved strong results across a wide set of benchmarks. At their times of release, both Llama-2 and Llama-3 models achieved among the best results for open-source models, and were competitive with leading closed-source models (see https://arxiv.org/pdf/2307.09288.pdf and https://ai.meta.com/blog/meta-llama-3/).
 
@@ -28,15 +28,15 @@ Architecturally Llama-2, Llama-3 and Mistral-7b are very similar. As such Megatr
     - [MMLU](#mmlu)
 - [Llama-3.x](#llama-3x)
   - [Download Huggingface checkpoints](#download-huggingface-checkpoints)
-  - [Convert checkpoint format](#convert-checkpoint-format-1)
-    - [Huggingface format](#huggingface-format-1)
+  - [Convert checkpoint format](#convert-checkpoint-format)
+    - [Huggingface format](#huggingface-format)
   - [(Optional) Validate checkpoints](#optional-validate-checkpoints)
-  - [Launch model](#launch-model-1)
+  - [Launch model](#launch-model)
 - [Mistral-7b](#mistral-7b)
-  - [Download Huggingface checkpoints](#download-huggingface-checkpoints-2)
-  - [Convert checkpoint format](#convert-checkpoint-format-3)
-  - [(Optional) Validate checkpoints](#optional-validate-checkpoints-2)
-  - [Launch model](#launch-model-3)
+  - [Download Huggingface checkpoints](#download-huggingface-checkpoints)
+  - [Convert checkpoint format](#convert-checkpoint-format)
+  - [(Optional) Validate checkpoints](#optional-validate-checkpoints)
+  - [Launch model](#launch-model)
 - [Other Llama-like model support](#other-llama-like-model-support)
 - [Known numerical differences](#known-numerical-differences)
 - [Using legacy model format](#using-legacy-model-format)
diff --git a/docs/models/index.md b/docs/models/index.md
new file mode 100644
index 00000000000..6fabd1f582c
--- /dev/null
+++ b/docs/models/index.md
@@ -0,0 +1,17 @@
+# Supported Models
+
+Megatron Core supports a wide range of language and multimodal models with optimized implementations for large-scale training.
+
+## Model Conversion
+
+For converting HuggingFace models to Megatron format, use [Megatron Bridge](https://github.com/NVIDIA-NeMo/Megatron-Bridge), the official standalone converter. Megatron Bridge supports an extensive list of models including LLaMA, Mistral, Mixtral, Qwen, DeepSeek, Gemma, Phi, Nemotron, and many more.
+
+See the [Megatron Bridge supported models list](https://github.com/NVIDIA-NeMo/Megatron-Bridge?tab=readme-ov-file#supported-models) for the complete and up-to-date list of supported models.
+
+```{toctree}
+:maxdepth: 1
+
+llms
+multimodal
+../llama_mistral
+```
diff --git a/docs/models/llms.md b/docs/models/llms.md
new file mode 100644
index 00000000000..1464b934f9d
--- /dev/null
+++ b/docs/models/llms.md
@@ -0,0 +1,57 @@
+# Language Models
+
+Megatron Core supports the following language model architectures for large-scale training.
+
+## Converting HuggingFace Models
+
+Use [**Megatron Bridge**](https://github.com/NVIDIA-NeMo/Megatron-Bridge) to convert HuggingFace models to Megatron format. Megatron Bridge is the official standalone converter with support for an extensive list of models including LLaMA, Mistral, Mixtral, Qwen, DeepSeek, Gemma, Phi, Nemotron, and many more.
+
+See the [Megatron Bridge supported models list](https://github.com/NVIDIA-NeMo/Megatron-Bridge?tab=readme-ov-file#supported-models) for the complete and up-to-date list.
+
+## Decoder-Only Models
+
+| Model | Description | Key Features |
+|-------|-------------|--------------|
+| **GPT** | Generative Pre-trained Transformer | Standard autoregressive LM, foundational architecture |
+| **LLaMA** | Meta's LLaMA family | Efficient architecture with RoPE, SwiGLU, RMSNorm |
+| **Mistral** | Mistral AI models | Sliding window attention, efficient inference |
+| **Mixtral** | Sparse Mixture-of-Experts | 8x7B MoE architecture for efficient scaling |
+| **Qwen** | Alibaba's Qwen series | HuggingFace integration, multilingual support |
+| **Mamba** | State Space Model | Subquadratic sequence length scaling, efficient long context |
+
+## Encoder-Only Models
+
+| Model | Description | Key Features |
+|-------|-------------|--------------|
+| **BERT** | Bidirectional Encoder Representations | Masked language modeling, classification tasks |
+
+## Encoder-Decoder Models
+
+| Model | Description | Key Features |
+|-------|-------------|--------------|
+| **T5** | Text-to-Text Transfer Transformer | Unified text-to-text framework, sequence-to-sequence |
+
+## Retrieval-Augmented Models
+
+| Model | Description | Key Features |
+|-------|-------------|--------------|
+| **RETRO** | Retrieval-Enhanced Transformer | Retrieval-augmented generation, knowledge grounding |
+
+## Example Scripts
+
+Training examples for these models can be found in the `examples/` directory:
+- `examples/gpt3/` - GPT-3 training scripts
+- `examples/llama/` - LLaMA training scripts
+- `examples/mixtral/` - Mixtral MoE training
+- `examples/mamba/` - Mamba training scripts
+- `examples/bert/` - BERT training scripts
+- `examples/t5/` - T5 training scripts
+- `examples/retro/` - RETRO training scripts
+
+## Model Implementation
+
+All language models are built using Megatron Core's composable transformer blocks, enabling:
+- Flexible parallelism strategies (TP, PP, DP, EP, CP)
+- Mixed precision training (FP16, BF16, FP8)
+- Distributed checkpointing
+- Efficient memory management
diff --git a/docs/models/multimodal.md b/docs/models/multimodal.md
new file mode 100644
index 00000000000..66ed8ccd9cb
--- /dev/null
+++ b/docs/models/multimodal.md
@@ -0,0 +1,61 @@
+# Multimodal Models
+
+Megatron Core supports multimodal models that combine language with vision, audio, and other modalities for comprehensive multimodal understanding.
+
+## MIMO: Multimodal In/Out Framework
+
+**MIMO (Multimodal In/Out Model)** is an experimental framework in Megatron Core that supports arbitrary combinations of modalities including vision, audio, and text. MIMO provides a flexible architecture for building custom multimodal models.
+
+> **Note**: MIMO is experimental and under active development. The API may change in future releases.
+
+**Key Features:**
+- Arbitrary modality combinations (vision, audio, text, etc.)
+- Flexible encoder architecture for different input modalities
+- Unified embedding space across modalities
+- Support for both vision-language and audio-vision-language models
+
+See [examples/mimo](https://github.com/NVIDIA/Megatron-LM/tree/main/examples/mimo) for training scripts and examples.
+
+## Vision-Language Models
+
+| Model | Description | Vision Encoder | Language Model |
+|-------|-------------|----------------|----------------|
+| **LLaVA** | Visual instruction tuning | CLIP ViT-L/14 | Mistral-7B / LLaMA |
+| **NVLM** | NVIDIA Vision-Language Model | CLIP / Custom ViT | LLaMA-based |
+| **LLaMA 3.1 Nemotron Nano VL** | Efficient multimodal model | Vision Transformer | LLaMA 3.1 8B |
+
+## Vision Encoders
+
+| Model | Description | Key Features |
+|-------|-------------|--------------|
+| **CLIP ViT** | OpenAI's CLIP Vision Transformer | Image-text alignment, multiple scales (L/14@336px) |
+| **RADIO** | Resolution-Agnostic Dynamic Image Optimization | Flexible resolution handling, efficient vision encoding |
+
+## Diffusion Models
+
+For multimodal diffusion models (image generation, text-to-image, etc.), see [NeMo Diffusion Models](https://github.com/NVIDIA-NeMo/NeMo/tree/main/nemo/collections/diffusion). NeMo provides production-ready implementations of:
+- Stable Diffusion variants
+- Text-to-image generation
+- Image-to-image translation
+- ControlNet and other conditioning mechanisms
+
+## Multimodal Features
+
+- **Image-Text Alignment**: Pre-training on image-caption pairs
+- **Visual Instruction Tuning**: Fine-tuning on instruction-following datasets
+- **Flexible Vision Encoders**: Support for different ViT architectures and resolutions
+- **Combined Checkpointing**: Unified checkpoints combining vision and language models
+- **Efficient Training**: Full parallelism support (TP, PP, DP) for both vision and language components
+
+## Example Scripts
+
+Multimodal training examples can be found in the following directories:
+
+**MIMO Framework:**
+- `examples/mimo/` - Multimodal In/Out training with support for vision-language and audio-vision-language models
+
+**Specific Multimodal Models:**
+- `examples/multimodal/` - LLaVA-style training with Mistral + CLIP
+- `examples/multimodal/nvlm/` - NVLM training scripts
+- `examples/multimodal/llama_3p1_nemotron_nano_vl_8b_v1/` - Nemotron VL training
+- `examples/multimodal/radio/` - RADIO vision encoder integration
diff --git a/docs/user-guide/data-preparation.md b/docs/user-guide/data-preparation.md
new file mode 100644
index 00000000000..3ff5eedba89
--- /dev/null
+++ b/docs/user-guide/data-preparation.md
@@ -0,0 +1,70 @@
+# Data Preparation
+
+Preparing your data correctly is essential for successful training with Megatron Core.
+
+## Data Format
+
+Megatron Core expects training data in JSONL (JSON Lines) format, where each line is a JSON object:
+
+```json
+{"text": "Your training text here..."}
+{"text": "Another training sample..."}
+{"text": "More training data..."}
+```
+
+## Preprocessing Data
+
+Use the `preprocess_data.py` tool to convert your JSONL data into Megatron's binary format:
+
+```bash
+python tools/preprocess_data.py \
+    --input data.jsonl \
+    --output-prefix processed_data \
+    --tokenizer-type HuggingFaceTokenizer \
+    --tokenizer-model /path/to/tokenizer.model \
+    --workers 8 \
+    --append-eod
+```
+
+### Key Arguments
+
+| Argument | Description |
+|----------|-------------|
+| `--input` | Path to input JSON/JSONL file |
+| `--output-prefix` | Prefix for output binary files (.bin and .idx) |
+| `--tokenizer-type` | Tokenizer type (`HuggingFaceTokenizer`, `GPT2BPETokenizer`, etc.) |
+| `--tokenizer-model` | Path to tokenizer model file |
+| `--workers` | Number of parallel workers for processing |
+| `--append-eod` | Add end-of-document token |
+
+## Output Files
+
+The preprocessing tool generates two files:
+- `processed_data.bin` - Binary file containing tokenized sequences
+- `processed_data.idx` - Index file for fast random access
+
+## Using Preprocessed Data
+
+Reference your preprocessed data in training scripts:
+
+```bash
+--data-path processed_data \
+--split 949,50,1  # Train/validation/test split
+```
+
+## Common Tokenizers
+
+### HuggingFace Tokenizers
+
+```bash
+--tokenizer-type HuggingFaceTokenizer \
+--tokenizer-model /path/to/tokenizer.model
+```
+
+### GPT-2 BPE Tokenizer
+
+```bash
+--tokenizer-type GPT2BPETokenizer \
+--vocab-file gpt2-vocab.json \
+--merge-file gpt2-merges.txt
+```
diff --git a/docs/api-guide/context_parallel.md b/docs/user-guide/features/context_parallel.md
similarity index 97%
rename from docs/api-guide/context_parallel.md
rename to docs/user-guide/features/context_parallel.md
index f81a6097abe..841c16326b3 100644
--- a/docs/api-guide/context_parallel.md
+++ b/docs/user-guide/features/context_parallel.md
@@ -2,7 +2,7 @@
 
 ## Context parallelism overview
 
-```{figure} ../images/context_parallel/CP_overview.png
+```{figure} ../../images/context_parallel/CP_overview.png
 :alt: cp_overview
 :align: center
 
@@ -15,7 +15,7 @@ For example, in Figure 1, assuming sequence length is 8K, each GPU processes 4K
 
 ## Context parallelism benefits
 
-```{figure} ../images/context_parallel/CP_results.png
+```{figure} ../../images/context_parallel/CP_results.png
 :alt: cp_results
 :align: center
 
diff --git a/docs/api-guide/custom_fsdp.md b/docs/user-guide/features/custom_fsdp.md
similarity index 98%
rename from docs/api-guide/custom_fsdp.md
rename to docs/user-guide/features/custom_fsdp.md
index faa262ee7fa..2f81eb0c5ef 100644
--- a/docs/api-guide/custom_fsdp.md
+++ b/docs/user-guide/features/custom_fsdp.md
@@ -1,6 +1,6 @@
-**NOTE: In M-Core 0.14, the custom FSDP refactored its checkpoint implementation to use DTensor-based torch distributed checkpointing. The custom FSDP was also renamed Megatron FSDP. The relevant sections of this document are no longer applicable.**
+# Megatron FSDP
 
-# MCore Custom Fully Sharded Data Parallel (FSDP)
+**NOTE: In M-Core 0.14, the custom FSDP refactored its checkpoint implementation to use DTensor-based torch distributed checkpointing. The custom FSDP was also renamed Megatron FSDP. The relevant sections of this document are no longer applicable.**
 
 ## How to use ?
 
@@ -54,7 +54,7 @@ The design of Custom FSDP draws inspiration from PyTorch FSDP [Zhao, Yanli, et a
 
 > When training with FSDP, the GPU memory footprint is smaller than when training with DDP across all workers. This makes the training of some very large models feasible by allowing larger models or batch sizes to fit on device. This comes with the cost of increased communication volume. The communication overhead is reduced by internal optimizations like overlapping communication and computation.
 
-![FSDP workflow](../images/custom_fsdp/FSDP_workflow.png)
+![FSDP workflow](../../images/custom_fsdp/FSDP_workflow.png)
 
 *Notice that the unit processed in workflow here is the “FSDP instance 1: N layers”, where an FSDP instance is the smallest FSDP processing unit (also a PyTorch module), which means that we can safely release this module weights after using it (executing the forward or backward of this module), and there will be no other computations computations relying on these weights. This capability is the foundation of FSDP's layer-by-layer execution and memory-saving strategy. An FSDP instance is also referred to as an **FSDP Unit**.*
 
@@ -78,13 +78,13 @@ In backward path
 
 One way to view FSDP’s sharding is to decompose the DDP gradient all-reduce into reduce-scatter and all-gather. Specifically, during the backward pass, FSDP reduces and scatters gradients, ensuring that each rank possesses a shard of the gradients. Then it updates the corresponding shard of the parameters in the optimizer step. Finally, in the subsequent forward pass, it performs an all-gather operation to collect and combine the updated parameter shards.
 
-![FSDP Allreduce](../images/custom_fsdp/FSDP_Allreduce.png)
+![FSDP Allreduce](../../images/custom_fsdp/FSDP_Allreduce.png)
 
 ### 2. Custom FSDP underlying data structure
 
 To implement the FSDP functionality described above, the custom FSDP is designed with the following Python classes and data structure:
 
-![MCore Custom FSDP Class Diagram](../images/custom_fsdp/MCore_Custom_FSDP_Class_Diagram.png)
+![MCore Custom FSDP Class Diagram](../../images/custom_fsdp/MCore_Custom_FSDP_Class_Diagram.png)
 
 ### 3. The custom FSDP interface: FullyShardedDataParallel
 
diff --git a/docs/api-guide/dist_optimizer.md b/docs/user-guide/features/dist_optimizer.md
similarity index 95%
rename from docs/api-guide/dist_optimizer.md
rename to docs/user-guide/features/dist_optimizer.md
index 34f42d5343f..ddb6079885c 100644
--- a/docs/api-guide/dist_optimizer.md
+++ b/docs/user-guide/features/dist_optimizer.md
@@ -16,11 +16,11 @@ The figures below illustrate the distributed optimizer's sharding scheme, and th
 
 ## Data flow
 
-![Data flow](../images/distrib_optimizer/data_flow.png)
+![Data flow](../../images/distrib_optimizer/data_flow.png)
 
 ## Sharding scheme
 
-![Sharding scheme](../images/distrib_optimizer/sharding_scheme.png)
+![Sharding scheme](../../images/distrib_optimizer/sharding_scheme.png)
 
 ## Key steps
 
diff --git a/docs/user-guide/features/fine_grained_activation_offloading.md b/docs/user-guide/features/fine_grained_activation_offloading.md
new file mode 100644
index 00000000000..53211d1d06c
--- /dev/null
+++ b/docs/user-guide/features/fine_grained_activation_offloading.md
@@ -0,0 +1,31 @@
+# Fine-grained Activation Offloading (collaborated with rednote)
+
+Memory capacity is more and more important with the rising of extreme sparse MoE models like DeepSeek-V3 and Qwen3-235B. Fine-grained recomputing reduces the memory footprint at the cost of extra recomputation, while offloading could utilize the host-device bandwidth to achieve nearly zero-overhead. Fine-grained Activation Offloading targets at offloading the activation at the granularity of specific modules, so that we can calibrate the amount of offloading activation to maximize the training throughput.
+
+Currently, the supported offloading modules are `"attn_norm", "core_attn", "attn_proj", "mlp_norm", "expert_fc1", "moe_act"`, which could work with fine-grained recomputation to release almost all activations of a transformer layer.
+
+**Features**
+* Support PP=1/PP/Interleaved PP
+* Compatible with fine-grained recomputation
+* Support FP8
+* Support MTP
+* Support mixed dense & moe layer
+* Support A2A Overlap
+* Support CUDA Graph
+  * (Temporary) cuda graph scope cannot contains the offloading modules
+
+**Usage**
+```bash
+# Enable fine-grained activation offloading
+--fine-grained-activation-offloading
+
+# Specify which modules are going to offload its input
+# Choices: "attn_norm", "core_attn", "attn_proj", "mlp_norm", "expert_fc1", "moe_act".
+--offload-modules expert_fc1
+```
+**Compatible with Fine-grained Recomputation**
+- For modules with minor perf overhead like layernorm or moe_act, use recomputing to reduce memory footprint;
+- For other modules, use offloading to reduce memory footprint;
+- Make sure the offloading/reloading could be overlapped with computing;
+
+![Fine-grained Activation Offloading and Fine-grained Recomputation](../../images/fine_grained_activation_offloading/offloading_and_recomputing.png)
diff --git a/docs/user-guide/features/index.md b/docs/user-guide/features/index.md
new file mode 100644
index 00000000000..7730443e91b
--- /dev/null
+++ b/docs/user-guide/features/index.md
@@ -0,0 +1,17 @@
+# Advanced Features
+
+Advanced feature guides for key Megatron Core capabilities.
+
+```{toctree}
+:maxdepth: 2
+
+moe
+context_parallel
+custom_fsdp
+dist_optimizer
+optimizer_cpu_offload
+pipeline_parallel_layout
+tokenizers
+megatron_energon
+megatron_rl
+```
diff --git a/docs/user-guide/features/megatron_energon.md b/docs/user-guide/features/megatron_energon.md
new file mode 100644
index 00000000000..d08bde21e38
--- /dev/null
+++ b/docs/user-guide/features/megatron_energon.md
@@ -0,0 +1,132 @@
+# Megatron Energon
+
+Advanced multimodal dataloader for efficient loading of text, images, video, and audio at scale.
+
+## Overview
+
+[**Megatron Energon**](https://github.com/NVIDIA/Megatron-Energon) is purpose-built for large-scale multimodal training with:
+
+- **Multimodal support** - Text, images, video, audio
+- **Distributed loading** - Optimized for multi-node training
+- **Data blending** - Mix datasets with configurable weights
+- **WebDataset format** - Efficient streaming from cloud storage
+- **State management** - Save and restore training position
+
+## Installation
+
+```bash
+pip install megatron-energon
+```
+
+## Key Features
+
+### Data Processing
+
+- **Packing** - Optimize sequence length utilization
+- **Grouping** - Smart batching of similar-length sequences
+- **Joining** - Combine multiple dataset sources
+- **Object storage** - Stream from S3, GCS, Azure Blob Storage
+
+### Production-Ready
+
+- Distributed loading across workers and nodes
+- Checkpoint data loading state
+- Memory-efficient streaming
+- Parallel data loading with prefetching
+
+## Basic Usage
+
+```python
+from megatron.energon import get_train_dataset, get_loader, WorkerConfig
+
+# Create dataset
+ds = get_train_dataset(
+    '/path/to/dataset',
+    batch_size=32,
+    shuffle_buffer_size=1000,
+    worker_config=WorkerConfig.default_worker_config(),
+)
+
+# Create loader and iterate
+for batch in get_loader(ds):
+    # Training step
+    pass
+```
+
+## Multimodal Example
+
+```python
+# Load image-text dataset
+ds = get_train_dataset(
+    '/path/to/multimodal/dataset',
+    batch_size=32,
+    worker_config=WorkerConfig(num_workers=8, prefetch_factor=2),
+)
+
+for batch in get_loader(ds):
+    images = batch['image']  # Image tensors
+    texts = batch['text']    # Text captions
+    # Process batch
+```
+
+## Dataset Blending
+
+Mix multiple datasets with custom weights:
+
+```python
+from megatron.energon import Blender
+
+blended_ds = Blender([
+    ('/path/to/dataset1', 0.6),  # 60%
+    ('/path/to/dataset2', 0.3),  # 30%
+    ('/path/to/dataset3', 0.1),  # 10%
+])
+```
+
+## Configuration
+
+### Worker Configuration
+
+```python
+WorkerConfig(
+    num_workers=8,              # Parallel workers
+    prefetch_factor=2,          # Batches to prefetch per worker
+    persistent_workers=True,    # Keep workers alive between epochs
+)
+```
+
+### Common Parameters
+
+| Parameter | Description |
+|-----------|-------------|
+| `batch_size` | Samples per batch |
+| `shuffle_buffer_size` | Buffer size for randomization |
+| `max_samples_per_sequence` | Max samples to pack into one sequence |
+| `worker_config` | Worker configuration for parallel loading |
+
+## Integration with Megatron-LM
+
+```python
+from megatron.energon import get_train_dataset, get_loader
+from megatron.training import get_args
+
+args = get_args()
+
+train_ds = get_train_dataset(
+    args.data_path,
+    batch_size=args.micro_batch_size,
+)
+
+for iteration, batch in enumerate(get_loader(train_ds)):
+    loss = train_step(batch)
+```
+
+## Resources
+
+- **[Megatron Energon GitHub](https://github.com/NVIDIA/Megatron-Energon)** - Documentation and examples
+- **[Multimodal Examples](https://github.com/NVIDIA/Megatron-LM/tree/main/examples/multimodal)** - Megatron-LM multimodal training
+
+## Next Steps
+
+- Check [Multimodal Models](../../models/multimodal.md) for supported architectures
+- See [Training Examples](../training-examples.md) for integration examples
diff --git a/docs/user-guide/features/megatron_rl.md b/docs/user-guide/features/megatron_rl.md
new file mode 100644
index 00000000000..128b41bdaf5
--- /dev/null
+++ b/docs/user-guide/features/megatron_rl.md
@@ -0,0 +1,46 @@
+# Megatron RL
+
+Reinforcement learning library for post-training large language models at scale.
+
+## Overview
+
+[**Megatron RL**](https://github.com/NVIDIA/Megatron-LM/tree/dev/megatron/rl) adds native reinforcement learning capabilities to Megatron-LM for large-scale RL-based post-training of foundation models.
+
+> **Note**: Megatron RL is under active development and primarily designed for research teams exploring RL post-training on modern NVIDIA hardware. For production deployments, use [**NeMo RL**](https://github.com/NVIDIA-NeMo/RL).
+
+## Key Features
+
+- **Decoupled Design** - Clean separation between agent/environment logic and RL implementation
+- **Flexible Inference** - Support for Megatron, OpenAI, and HuggingFace inference backends
+- **Trainer/Evaluator** - Manages rollout generation and coordinates with inference systems
+- **Megatron Integration** - Native integration with Megatron Core inference system
+
+## Architecture
+
+### Components
+
+**Agents & Environments**
+- Accept inference handles
+- Return experience rollouts with rewards
+- Implement custom RL logic
+
+**Trainer/Evaluator**
+- Controls rollout generation
+- Coordinates with inference systems
+- Manages training loops
+
+**Inference Interface**
+- Provides `.generate(prompt, **generation_args)` endpoint
+- Supports multiple backends (Megatron, OpenAI, HuggingFace)
+
+## Use Cases
+
+- RLHF (Reinforcement Learning from Human Feedback)
+- Custom reward-based fine-tuning
+- Policy optimization for specific tasks
+- Research on RL post-training techniques
+
+## Resources
+
+- **[Megatron RL GitHub](https://github.com/NVIDIA/Megatron-LM/tree/dev/megatron/rl)** - Source code and documentation
+- **[Megatron Core Inference](../../api-guide/core/transformer.md)** - Native inference integration
diff --git a/docs/user-guide/features/moe.md b/docs/user-guide/features/moe.md
new file mode 100644
index 00000000000..56aca8c6999
--- /dev/null
+++ b/docs/user-guide/features/moe.md
@@ -0,0 +1,12 @@
+# Mixture of Experts
+
+```{toctree}
+:maxdepth: 1
+:caption: MoE Features
+
+multi_token_prediction
+multi_latent_attention
+```
+
+```{include} ../../../megatron/core/transformer/moe/README.md
+```
diff --git a/docs/api-guide/multi_latent_attention.md b/docs/user-guide/features/multi_latent_attention.md
similarity index 100%
rename from docs/api-guide/multi_latent_attention.md
rename to docs/user-guide/features/multi_latent_attention.md
diff --git a/docs/api-guide/multi_token_prediction.md b/docs/user-guide/features/multi_token_prediction.md
similarity index 57%
rename from docs/api-guide/multi_token_prediction.md
rename to docs/user-guide/features/multi_token_prediction.md
index 4059fa5326e..891bf4c93c5 100644
--- a/docs/api-guide/multi_token_prediction.md
+++ b/docs/user-guide/features/multi_token_prediction.md
@@ -3,7 +3,7 @@
 Multi-Token Prediction (MTP) extends the prediction scope to multiple future tokens at each position. On the one hand, an MTP objective densifies the training signals and may improve
 data efficiency. On the other hand, MTP may enable the model to pre-plan its representations for better prediction of future tokens. In this implementation of MTP, we sequentially predict additional tokens and keep the complete causal chain at each prediction depth. The following figure illustrates our implementation of MTP in [DeepSeek-V3](https://github.com/deepseek-ai/DeepSeek-V3/).
 
-![MTP_implementation](../images/multi_token_prediction/MTP_implementation.png)
+![MTP_implementation](../../images/multi_token_prediction/MTP_implementation.png)
 
 The k-th MTP module consists of a shared embedding layer, a projection matrix, a Transformer block, and a shared output head. For the i-th input token at the (k - 1)-th prediction depth, we first combine the representation of the i-th token and the embedding of the (i + K)-th token with the linear projection. The combined serves as the input of the Transformer block at the k-th depth to produce the output representation.
 
@@ -18,6 +18,31 @@ We can train GPTModel like models with Multi-Token Prediction (MTP) by setting m
 | mtp_num_layers | Number of Multi-Token Prediction (MTP) Layers. MTP extends the prediction scope to multiple future tokens at each position. This MTP implementation sequentially predict additional tokens by using D sequential modules to predict D additional tokens. Default is None. |
 | mtp_loss_scaling_factor | Scaling factor of Multi-Token Prediction (MTP) loss. We compute the average of the MTP losses across all depths, and multiply it the scaling factor to obtain the overall MTP loss, which serves as an additional training objective. Default is 0.1. |
 
+## Pipeline Parallel Layout for MTP
+
+MTP supports flexible placement of MTP layers across pipeline stages using a custom `pipeline_model_parallel_layout`. By default, all MTP layers are placed on the last pipeline stage, but you can customize their placement.
+
+### MTP Standalone Mode
+
+When MTP layers are placed in a separate virtual pipeline (vpp) stage that is not on the last pipeline rank, the `mtp_standalone` flag is automatically set to `True`. This mode enables MTP to run independently in its own pipeline stage.
+
+### Layout Format
+
+Use `m` to represent MTP layers in the pipeline layout string. For example:
+- `"E|t*3|(t|)*5mL"` - MTP in the last stage
+- `"E|t*3|(t|)*4tm|L"` - MTP in the second-to-last stage with a decoder layer
+- `"E|t*3|(t|)*3tt|m|L"` - MTP in a standalone stage (second-to-last) with no other layers
+
+### Constraints
+
+- All MTP layers must be placed in the same one virtual pipeline stage.
+- MTP layers cannot be placed on the first pipeline rank.
+
+## Implementation Notes
+
+- For models with MTP layers, the final layernorm is placed in the stage that contains the last decoder layer, rather than in the post-process stage. This may cause small numerical differences in gradient norm reduction when final layernorm is placed in different pipeline stages in deterministic mode. Bitwise alignment can be achieved by disabling gradient norm clipping.
+- MTP loss is computed in the post-processing stage.
+
 ## Precautions
 
 Please do not use Context Parallel (CP), or arbitrary AttnMaskType, or learned absolute position embedding type with MTP. These use cases are not yet supported.
diff --git a/docs/user-guide/features/optimizer_cpu_offload.md b/docs/user-guide/features/optimizer_cpu_offload.md
new file mode 100644
index 00000000000..408d7f6a788
--- /dev/null
+++ b/docs/user-guide/features/optimizer_cpu_offload.md
@@ -0,0 +1,4 @@
+# Optimizer CPU Offload
+
+```{include} ../../../megatron/core/optimizer/cpu_offloading/README.md
+```
diff --git a/docs/api-guide/pipeline_parallel_layout.md b/docs/user-guide/features/pipeline_parallel_layout.md
similarity index 100%
rename from docs/api-guide/pipeline_parallel_layout.md
rename to docs/user-guide/features/pipeline_parallel_layout.md
diff --git a/docs/user-guide/features/tokenizers.md b/docs/user-guide/features/tokenizers.md
new file mode 100644
index 00000000000..0aecf8df8a7
--- /dev/null
+++ b/docs/user-guide/features/tokenizers.md
@@ -0,0 +1,230 @@
+# Tokenizers
+
+Megatron Core provides a unified tokenizer system with a HuggingFace-style API for easy tokenizer management and configuration.
+
+## Overview
+
+The `MegatronTokenizer` class offers a simple, familiar API for loading and managing tokenizers:
+
+- **Automatic detection** - Load any tokenizer type without specifying the library
+- **Metadata-based configuration** - Store tokenizer settings in JSON for easy reuse
+- **HuggingFace-compatible API** - Familiar `.from_pretrained()` interface
+- **Custom tokenizer support** - Extend with model-specific tokenization logic
+
+## Key Features
+
+### Unified API
+
+Use the same API regardless of tokenizer backend (SentencePiece, HuggingFace, TikToken, etc.):
+
+```python
+from megatron.core.tokenizers import MegatronTokenizer
+
+tokenizer = MegatronTokenizer.from_pretrained("/path/to/tokenizer")
+```
+
+### Tokenizer Metadata
+
+Configuration is stored in a JSON metadata file containing:
+- Tokenizer library (HuggingFace, SentencePiece, TikToken, etc.)
+- Chat templates
+- Custom tokenizer class
+- Special token configurations
+
+**Benefits:**
+- Set configuration once, reuse everywhere
+- No repeated CLI arguments
+- Easy sharing - just copy the tokenizer directory
+
+### Automatic Library Detection
+
+The correct tokenizer implementation is automatically selected:
+- No need to specify `SentencePieceTokenizer`, `HuggingFaceTokenizer`, etc.
+- Library type detected from metadata
+- Seamless switching between tokenizer backends
+
+## Basic Usage
+
+### Creating Tokenizer Metadata
+
+Save tokenizer configuration for reuse:
+
+```python
+from megatron.core.tokenizers import MegatronTokenizer
+
+# Create metadata for a SentencePiece tokenizer
+MegatronTokenizer.write_metadata(
+    tokenizer_path="/path/to/tokenizer.model",
+    tokenizer_library="sentencepiece",
+    chat_template="{% for message in messages %}{{ message.content }}{% endfor %}",
+)
+```
+
+The metadata is saved as `tokenizer_metadata.json` in the tokenizer directory.
+
+### Loading a Tokenizer
+
+Load from a directory with metadata:
+
+```python
+from megatron.core.tokenizers import MegatronTokenizer
+
+# Load with auto-detected configuration
+tokenizer = MegatronTokenizer.from_pretrained("/path/to/tokenizer.model")
+```
+
+### Loading with Custom Metadata Path
+
+If metadata is stored separately:
+
+```python
+tokenizer = MegatronTokenizer.from_pretrained(
+    tokenizer_path="/path/to/tokenizer.model",
+    metadata_path="/path/to/custom/metadata.json",
+)
+```
+
+### Loading with Inline Metadata
+
+Pass metadata as a dictionary:
+
+```python
+tokenizer = MegatronTokenizer.from_pretrained(
+    tokenizer_path="GPT2BPETokenizer",
+    metadata_path={"library": "megatron"},
+    vocab_file="/path/to/vocab.txt",
+)
+```
+
+## Advanced Usage
+
+### Custom Tokenizer Classes
+
+Create model-specific tokenization logic:
+
+```python
+from megatron.core.tokenizers.text import MegatronTokenizerText
+
+class CustomTokenizer(MegatronTokenizerText):
+    def encode(self, text):
+        # Custom encoding logic
+        return super().encode(text)
+
+    def decode(self, tokens):
+        # Custom decoding logic
+        return super().decode(tokens)
+
+# Save metadata with custom class
+MegatronTokenizer.write_metadata(
+    tokenizer_path="/path/to/tokenizer.model",
+    tokenizer_library="sentencepiece",
+    tokenizer_class=CustomTokenizer,
+)
+```
+
+### TikToken Tokenizers
+
+Configure TikToken-based tokenizers:
+
+```python
+tokenizer = MegatronTokenizer.from_pretrained(
+    tokenizer_path="/path/to/tokenizer/model.json",
+    metadata_path={"library": "tiktoken"},
+    pattern="v2",
+    num_special_tokens=1000,
+)
+```
+
+### Null Tokenizer
+
+Use a null tokenizer for testing or non-text models:
+
+```python
+tokenizer = MegatronTokenizer.from_pretrained(
+    metadata_path={"library": "null"},
+    vocab_size=131072,
+)
+```
+
+## Integration with Megatron-LM
+
+### Using with Training Scripts
+
+The tokenizer system integrates seamlessly with Megatron-LM training:
+
+```bash
+# Null tokenizer for testing
+torchrun --nproc_per_node=8 pretrain_gpt.py \
+    --tokenizer-type NullTokenizer \
+    --vocab-size 131072 \
+    ...
+```
+
+```bash
+# HuggingFace tokenizer with metadata
+torchrun --nproc_per_node=8 pretrain_gpt.py \
+    --tokenizer-type HuggingFaceTokenizer \
+    --tokenizer-model meta-llama/Meta-Llama-3-8B \
+    --tokenizer-metadata /path/to/metadata.json \
+    ...
+```
+
+### Auto-Generated Metadata
+
+If `--tokenizer-metadata` is not specified, a default metadata file is generated automatically based on the tokenizer type.
+
+### Legacy Tokenizer Support
+
+The old tokenizer system is still supported for backward compatibility:
+
+```bash
+torchrun --nproc_per_node=8 pretrain_gpt.py \
+    --legacy-tokenizer \
+    ...
+```
+
+## Supported Tokenizer Libraries
+
+| Library | Description | Use Case |
+|---------|-------------|----------|
+| **HuggingFace** | Transformers tokenizers | Most modern LLMs (LLaMA, Mistral, etc.) |
+| **SentencePiece** | Google's tokenizer | GPT-style models, custom vocabularies |
+| **TikToken** | OpenAI's tokenizer | GPT-3.5/GPT-4 style tokenization |
+| **Megatron** | Built-in tokenizers | Legacy GPT-2 BPE |
+| **Null** | No-op tokenizer | Testing, non-text modalities |
+
+## Common Tokenizer Types
+
+### LLaMA / Mistral
+
+```python
+MegatronTokenizer.write_metadata(
+    tokenizer_path="/path/to/llama/tokenizer.model",
+    tokenizer_library="sentencepiece",
+)
+```
+
+### GPT-2
+
+```python
+MegatronTokenizer.write_metadata(
+    tokenizer_path="GPT2BPETokenizer",
+    tokenizer_library="megatron",
+    vocab_file="/path/to/gpt2-vocab.json",
+    merge_file="/path/to/gpt2-merges.txt",
+)
+```
+
+## Best Practices
+
+1. **Always save metadata** - Create metadata once, reuse across training runs
+2. **Use HuggingFace tokenizers** - When possible, for modern LLM compatibility
+3. **Test tokenization** - Verify encode/decode before starting training
+4. **Version control metadata** - Include `tokenizer_metadata.json` in your experiment configs
+5. **Share tokenizer directories** - Include both model files and metadata for reproducibility
+
+## Next Steps
+
+- **Prepare Data**: See [Data Preparation](../data-preparation.md) for preprocessing with tokenizers
+- **Train Models**: Use tokenizers in [Training Examples](../training-examples.md)
+- **Supported Models**: Check [Language Models](../../models/llms.md) for model-specific tokenizers
diff --git a/docs/user-guide/index.md b/docs/user-guide/index.md
index 153d4972416..bbe85451582 100644
--- a/docs/user-guide/index.md
+++ b/docs/user-guide/index.md
@@ -1,9 +1,14 @@
 # User Guide
 
+Comprehensive guides for using Megatron Core and Megatron-LM.
+
 ```{toctree}
 :maxdepth: 2
 
 quickstart
 msc_integration
+data-preparation
+training-examples
+parallelism-guide
+features/index
 ```
-
diff --git a/docs/user-guide/parallelism-guide.md b/docs/user-guide/parallelism-guide.md
new file mode 100644
index 00000000000..2baf518ae85
--- /dev/null
+++ b/docs/user-guide/parallelism-guide.md
@@ -0,0 +1,211 @@
+# Parallelism Strategies Guide
+
+Megatron Core supports multiple parallelism strategies that can be combined to efficiently train models from billions to trillions of parameters across thousands of GPUs.
+
+## Overview
+
+| Strategy | What it parallelizes | Best for |
+|----------|---------------------|----------|
+| **Data Parallelism (DP)** | Batch dimension | Standard training, most common |
+| **Tensor Parallelism (TP)** | Individual layers | Large layers, GPU memory constraints |
+| **Pipeline Parallelism (PP)** | Model depth | Very deep models |
+| **Context Parallelism (CP)** | Sequence length | Long sequences (8K+ tokens) |
+| **Expert Parallelism (EP)** | MoE experts | Mixture-of-Experts models |
+
+## Data Parallelism (DP)
+
+Replicate the model across GPUs and split the batch.
+
+### Standard Data Parallel (DDP)
+
+```bash
+torchrun --nproc_per_node=8 pretrain_gpt.py \
+    --data-parallel-sharding-strategy no_shard
+```
+
+Each GPU has a full copy of the model and processes a portion of the batch.
+
+### Fully Sharded Data Parallel (FSDP)
+
+Shard model parameters, gradients, and optimizer states to reduce memory:
+
+```bash
+# Megatron FSDP (~15% faster than PyTorch FSDP2)
+--use-megatron-fsdp \
+--data-parallel-sharding-strategy optim_grads_params
+```
+
+**Sharding strategies:**
+- `optim` - Shard optimizer states only (ZeRO-1)
+- `optim_grads` - Shard gradients + optimizer (ZeRO-2)
+- `optim_grads_params` - Shard parameters + gradients + optimizer (ZeRO-3)
+
+## Tensor Parallelism (TP)
+
+Split individual model layers across GPUs. Recommended for large hidden dimensions.
+
+```bash
+--tensor-model-parallel-size 4  # 4-way tensor parallelism
+--sequence-parallel              # Enable sequence parallelism (recommended)
+```
+
+**When to use:**
+- Model layers don't fit on single GPU
+- Large hidden dimensions (4096+)
+- Usually combined with DP and PP
+
+## Pipeline Parallelism (PP)
+
+Split model layers across GPUs vertically (by depth).
+
+```bash
+--pipeline-model-parallel-size 8              # 8 pipeline stages
+--num-layers-per-virtual-pipeline-stage 4     # Virtual pipeline for load balancing
+```
+
+**When to use:**
+- Very deep models (50+ layers)
+- Combine with TP for large models
+- Helps distribute memory across GPUs
+
+## Context Parallelism (CP)
+
+Split long sequences across GPUs for efficient long-context training.
+
+```bash
+--context-parallel-size 2           # 2-way context parallelism
+--cp-comm-type p2p                  # Communication type
+```
+
+**When to use:**
+- Long sequences (8K+ tokens)
+- Reduces activation memory
+- Can combine with TP, PP, DP
+
+**→ [Context Parallelism Deep Dive](features/context_parallel.md)** - Detailed guide with performance analysis
+
+## Expert Parallelism (EP)
+
+Distribute experts across GPUs in Mixture-of-Experts models.
+
+```bash
+--expert-model-parallel-size 8  # 8-way expert parallelism
+--num-experts 64                # 64 experts per MoE layer
+--moe-grouped-gemm              # Optimize expert computation
+```
+
+**Important:** When combining EP with TP, you **must enable Sequence Parallelism**:
+
+```bash
+--tensor-model-parallel-size 4
+--expert-model-parallel-size 8
+--sequence-parallel  # Required when using TP + EP
+```
+
+## Parallelism Selection Guide
+
+Recommended configurations based on [NVIDIA NeMo production setups](https://github.com/NVIDIA/NeMo/tree/main/scripts/performance/recommended_model_configs):
+
+### Language Models
+
+| Model | Size | GPUs | TP | PP | CP | EP | Configuration Notes |
+|-------|------|------|----|----|----|----|---------------------|
+| **LLaMA-3** | 8B | 8 | 1 | 1 | 2 | 1 | CP=2 for long context (8K seqlen) |
+| **LLaMA-3** | 70B | 64 | 4 | 4 | 2 | 1 | Balanced TP+PP for 70B scale |
+| **LLaMA-3.1** | 405B | 1024 | 8 | 8 | 2 | 1 | 3D parallelism (TP+PP+CP) |
+| **GPT-3** | 175B | 128-512 | 4 | 8 | 1 | 1 | Standard large model config |
+
+### Mixture-of-Experts Models
+
+| Model | Size | GPUs | TP | PP | CP | EP | Configuration Notes |
+|-------|------|------|----|----|----|----|---------------------|
+| **Mixtral** | 8x7B | 64 | 1 | 4 | 1 | 8 | EP=8 for 8 experts |
+| **Mixtral** | 8x22B | 256 | 4 | 4 | 1 | 8 | TP+PP+EP for large MoE |
+| **DeepSeek-V3** | 671B | 1024 | 2 | 16 | 1 | 64 | Massive MoE with 256 experts |
+
+## Combining Strategies
+
+### Total GPU Count
+
+The total number of GPUs is calculated as:
+
+```
+Total GPUs = TP × PP × CP × EP × DP
+```
+
+### Example: LLaMA-3 70B on 64 GPUs
+
+```bash
+# TP=4, PP=4, CP=2, DP=2 => 4 × 4 × 2 × 2 = 64 GPUs
+torchrun --nproc_per_node=8 pretrain_gpt.py \
+    --tensor-model-parallel-size 4 \
+    --pipeline-model-parallel-size 4 \
+    --context-parallel-size 2 \
+    --num-layers 80 \
+    --hidden-size 8192 \
+    --num-attention-heads 64 \
+    --seq-length 8192 \
+    --micro-batch-size 1 \
+    --global-batch-size 512 \
+    --bf16
+```
+
+## Performance Optimizations
+
+### Communication Overlap
+
+Enable overlapping of communication with computation:
+
+```bash
+--overlap-grad-reduce      # Overlap gradient reduction with backward pass
+--overlap-param-gather     # Overlap parameter gathering with forward pass
+--tp-comm-overlap          # Overlap TP communication
+```
+
+### Distributed Optimizer
+
+Recommended for all multi-GPU training:
+
+```bash
+--use-distributed-optimizer
+```
+
+Benefits:
+- Faster checkpointing
+- Reduced memory when combined with FSDP
+- Better performance at scale
+
+### Sequence Parallelism
+
+Always enable when using TP:
+
+```bash
+--sequence-parallel
+```
+
+Reduces activation memory by sharding sequence dimension in LayerNorm and Dropout.
+
+## Choosing the Right Strategy
+
+### Start Simple
+1. Begin with **Data Parallelism** (DP) only
+2. Add **Tensor Parallelism** (TP) if model doesn't fit
+3. Add **Pipeline Parallelism** (PP) for very large models
+4. Add **Context Parallelism** (CP) for long sequences
+
+### Memory Constraints
+- Use **FSDP** to reduce memory per GPU
+- Use **TP** to split large layers
+- Use **PP** to split model depth
+- Enable **activation checkpointing** for extreme cases
+
+### Communication Bottlenecks
+- Reduce **TP** degree (increases memory per GPU)
+- Increase **PP** degree (may reduce efficiency)
+- Use **CP** instead of larger TP for long sequences
+
+## Next Steps
+
+- **API Reference**: See [Tensor Parallel](../api-guide/core/tensor_parallel.md) and [Pipeline Parallel](../api-guide/core/pipeline_parallel.md) API documentation
+- **Advanced Features**: Explore [Megatron FSDP](features/custom_fsdp.md) and [Distributed Optimizer](features/dist_optimizer.md)
+- **Performance Tuning**: Check [NVIDIA NeMo Performance Guide](https://docs.nvidia.com/nemo-framework/user-guide/latest/performance/performance-guide.html)
diff --git a/docs/user-guide/training-examples.md b/docs/user-guide/training-examples.md
new file mode 100644
index 00000000000..2824c608c36
--- /dev/null
+++ b/docs/user-guide/training-examples.md
@@ -0,0 +1,146 @@
+# Training Examples
+
+Get started with Megatron Core training using these practical examples.
+
+## Simple Training Example
+
+The simplest way to get started is with the basic training loop using mock data:
+
+```bash
+# Distributed training on 2 GPUs with mock data
+torchrun --nproc_per_node=2 examples/run_simple_mcore_train_loop.py
+```
+
+This example:
+- Runs on 2 GPUs
+- Uses generated mock data (no data preparation needed)
+- Demonstrates basic distributed training setup
+- Perfect for testing your installation
+
+## LLaMA-3 Training Examples
+
+### LLaMA-3 8B with FP8
+
+Train LLaMA-3 8B model with FP8 mixed precision on 8 GPUs:
+
+```bash
+./examples/llama/train_llama3_8b_fp8.sh
+```
+
+**Configuration:**
+- 8 GPUs
+- FP8 mixed precision (requires Hopper/Ada/Blackwell GPUs)
+- Mock data for quick testing
+
+### Custom LLaMA Training
+
+For training with your own data:
+
+```bash
+torchrun --nproc_per_node=8 pretrain_gpt.py \
+    --tensor-model-parallel-size 1 \
+    --pipeline-model-parallel-size 1 \
+    --num-layers 32 \
+    --hidden-size 4096 \
+    --num-attention-heads 32 \
+    --seq-length 2048 \
+    --max-position-embeddings 2048 \
+    --micro-batch-size 4 \
+    --global-batch-size 32 \
+    --train-iters 100000 \
+    --lr 3.0e-4 \
+    --min-lr 3.0e-5 \
+    --lr-decay-style cosine \
+    --lr-warmup-iters 2000 \
+    --weight-decay 0.1 \
+    --clip-grad 1.0 \
+    --bf16 \
+    --data-path /path/to/your/preprocessed_data \
+    --split 949,50,1 \
+    --save /path/to/checkpoints \
+    --load /path/to/checkpoints \
+    --log-interval 10 \
+    --save-interval 1000 \
+    --eval-interval 1000
+```
+
+## GPT-3 Training Example
+
+Train a GPT-3 style model:
+
+```bash
+torchrun --nproc_per_node=8 pretrain_gpt.py \
+    --tensor-model-parallel-size 2 \
+    --pipeline-model-parallel-size 2 \
+    --num-layers 24 \
+    --hidden-size 2048 \
+    --num-attention-heads 16 \
+    --seq-length 1024 \
+    --max-position-embeddings 1024 \
+    --micro-batch-size 2 \
+    --global-batch-size 16 \
+    --train-iters 100000 \
+    --lr 1.5e-4 \
+    --min-lr 1.0e-5 \
+    --lr-decay-style cosine \
+    --lr-warmup-iters 1000 \
+    --weight-decay 0.1 \
+    --clip-grad 1.0 \
+    --fp16 \
+    --data-path /path/to/preprocessed_data \
+    --split 949,50,1 \
+    --save /path/to/checkpoints \
+    --load /path/to/checkpoints
+```
+
+## Key Training Arguments
+
+### Model Architecture
+
+| Argument | Description |
+|----------|-------------|
+| `--num-layers` | Number of transformer layers |
+| `--hidden-size` | Hidden dimension size |
+| `--num-attention-heads` | Number of attention heads |
+| `--seq-length` | Sequence length for training |
+
+### Training Configuration
+
+| Argument | Description |
+|----------|-------------|
+| `--micro-batch-size` | Batch size per GPU |
+| `--global-batch-size` | Total batch size across all GPUs |
+| `--train-iters` | Number of training iterations |
+
+### Learning Rate
+
+| Argument | Description |
+|----------|-------------|
+| `--lr` | Peak learning rate |
+| `--min-lr` | Minimum learning rate |
+| `--lr-decay-style` | LR schedule (cosine, linear, constant) |
+| `--lr-warmup-iters` | Warmup iterations |
+
+### Mixed Precision
+
+| Argument | Description |
+|----------|-------------|
+| `--fp16` | FP16 mixed precision |
+| `--bf16` | BF16 mixed precision (recommended) |
+| `--fp8-hybrid` | FP8 mixed precision (Hopper/Ada/Blackwell) |
+
+### Data and Checkpointing
+
+| Argument | Description |
+|----------|-------------|
+| `--data-path` | Path to preprocessed data |
+| `--split` | Train/validation/test split (e.g., 949,50,1) |
+| `--save` | Checkpoint save directory |
+| `--load` | Checkpoint load directory |
+| `--save-interval` | Save checkpoint every N iterations |
+
+## Next Steps
+
+- **Optimize Performance**: See [Advanced Features](features/index.md) for FSDP, distributed optimizer, and other optimizations
+- **Scale Up**: Learn about [Parallelism Strategies](parallelism-guide.md) to train larger models across more GPUs
+- **Prepare Data**: Follow the [Data Preparation](data-preparation.md) guide to process your own datasets
diff --git a/examples/inference/gpt/gpt_dynamic_inference.py b/examples/inference/gpt/gpt_dynamic_inference.py
index 6c2a539ce7e..679dd78b42b 100644
--- a/examples/inference/gpt/gpt_dynamic_inference.py
+++ b/examples/inference/gpt/gpt_dynamic_inference.py
@@ -26,6 +26,7 @@
     build_dynamic_engine_setup_prefix,
     build_requests,
     get_curr_time,
+    get_global_peak_memory_stats_bytes,
 )
 from megatron.core.inference.contexts.dynamic_context import (
     ContextOverflowError,
@@ -174,9 +175,11 @@ def get_inference_context(
         ),
         block_size_tokens=args.inference_dynamic_batching_block_size,
         buffer_size_gb=args.inference_dynamic_batching_buffer_size_gb,
+        paused_buffer_size_gb=args.inference_dynamic_batching_paused_buffer_size_gb,
         max_requests=args.inference_dynamic_batching_max_requests,
         max_tokens=args.inference_dynamic_batching_max_tokens,
         tensor_model_parallel_size=args.tensor_model_parallel_size,
+        pipeline_model_parallel_size=args.pipeline_model_parallel_size,
         materialize_only_last_token_logits=not args.return_log_probs,
         mamba_inference_state_config=mamba_inference_state_config,
         cache_mla_latent=args.multi_latent_attention and args.cache_mla_latents,
@@ -368,6 +371,7 @@ def _add_request():
                 request.time_end = get_curr_time()
                 request.state = "finished"
                 request.request_id = finished_request.request_id
+                request.events = finished_request.events
 
                 # Update prompt, in case engine has been suspended and resumed.
                 request.prompt_tokens = finished_request.prompt_tokens.tolist()
@@ -436,6 +440,10 @@ def main():
     else:
         tokenizer = build_tokenizer(args)
 
+    # Reset peak memory stats so functional tests measure this run and not
+    # whatever happened earlier during initialization.
+    torch.cuda.reset_peak_memory_stats()
+
     # Sampling params.
     sampling_params = SamplingParams(
         temperature=args.temperature,
@@ -446,6 +454,7 @@ def main():
         num_tokens_to_generate=args.num_tokens_to_generate,
         termination_id=args.termination_id if args.termination_id is not None else tokenizer.eod,
         top_n_logprobs=args.top_n_logprobs,
+        stop_words=args.stop_words,
     ) 
 
     model = get_model()
@@ -495,6 +504,8 @@ def main():
         # Reset engine.
         engine.reset()
 
+        torch.cuda.reset_peak_memory_stats()
+
         # Trial.
         t = get_curr_time()
         result = run_inference(requests, engine)
@@ -514,8 +525,9 @@ def main():
             f"request.state == '{request.state}' != 'finished'."
         )
 
-    # Print unique prompts + outputs.
+    peak_mem_stats = get_global_peak_memory_stats_bytes()
 
+    # Print unique prompts + outputs.
     if torch.distributed.get_rank() == 0:
         def escape_str(s):
             return s.replace("\n", "\\n")
@@ -534,7 +546,7 @@ def escape_str(s):
             # ---- Prompt summary line ----
             prompt_len = len(requests[request_idxs[0]].prompt_tokens)
             escaped_prompt_text = escape_str(prompt_text)
-            print(f"{unique_idx+1}/{len(unique_prompt_map)} [n {len(request_idxs)}, l {prompt_len}] {escaped_prompt_text}")
+            print(f"\n{unique_idx+1}/{len(unique_prompt_map)} [n {len(request_idxs)}, l {prompt_len}] {escaped_prompt_text}")
 
             # ---- Group all outputs for this prompt ----
             output_map = defaultdict(list)
@@ -544,6 +556,12 @@ def escape_str(s):
 
             # ---- Print each unique output ----
             for output_text, output_request_idxs in output_map.items():
+                evicted = False
+                for idx in output_request_idxs:
+                    for event in requests[idx].events:
+                        if event.type.name == "EVICT":
+                            evicted = True
+                            break
                 if output_text is not None:
                     # Use hash of prompt + generated text in case engine was
                     # suspended and resumed, which misaligns boundary between
@@ -557,7 +575,7 @@ def escape_str(s):
                     o_hash = "--"
                     o_len = 0
                     escaped_output_text = "--"
-                print(f"  >>>> [n {len(output_request_idxs)}, {o_len} tokens, hash {o_hash}] {escaped_output_text}")
+                print(f"  >>>> [n {len(output_request_idxs)}, {o_len} tokens, hash {o_hash}{', <evicted>' if evicted else ''}] {escaped_output_text}")
                 text_hashes.append(o_hash)
 
         # Write results to JSON. Primarily used for functional testing.
@@ -587,6 +605,9 @@ def escape_str(s):
             # Track system-level throughput as a test / debug metric
             if args.record_throughput:
                 json_results["throughput"] = throughputs
+            # Attach peak memory metrics; the functional test only validates these
+            # if the fields exist in the golden values.
+            json_results.update(peak_mem_stats)
 
             print(f' Saving results to {args.output_path}')
             with open(args.output_path, "w") as fp:
diff --git a/examples/inference/gpt/gpt_dynamic_inference_12b.sh b/examples/inference/gpt/gpt_dynamic_inference_12b.sh
index 20f1a29cb5b..4991d9d5177 100644
--- a/examples/inference/gpt/gpt_dynamic_inference_12b.sh
+++ b/examples/inference/gpt/gpt_dynamic_inference_12b.sh
@@ -97,6 +97,11 @@ if [[ -v PROMPTS ]]; then
         --prompts ${PROMPTS} \
         --num-tokens-to-generate ${NUM_TOKENS_TO_GENERATE} \
     "
+elif [[ -v PROMPT_FILE ]]; then
+    ARGS+=" \
+        --prompt-file ${PROMPT_FILE} \
+        --num-tokens-to-generate ${NUM_TOKENS_TO_GENERATE} \
+    "
 else
     ARGS+=" \
         --num-tokens-to-prompt ${NUM_TOKENS_TO_PROMPT} \
diff --git a/examples/inference/gpt/gpt_dynamic_inference_357m.sh b/examples/inference/gpt/gpt_dynamic_inference_357m.sh
index 215cc2bac8f..44abb575c63 100644
--- a/examples/inference/gpt/gpt_dynamic_inference_357m.sh
+++ b/examples/inference/gpt/gpt_dynamic_inference_357m.sh
@@ -83,6 +83,11 @@ if [[ -v PROMPTS ]]; then
         --prompts ${PROMPTS} \
         --num-tokens-to-generate ${NUM_TOKENS_TO_GENERATE} \
     "
+elif [[ -v PROMPT_FILE ]]; then
+    ARGS+=" \
+        --prompt-file ${PROMPT_FILE} \
+        --num-tokens-to-generate ${NUM_TOKENS_TO_GENERATE} \
+    "
 else
     ARGS+=" \
         --num-tokens-to-prompt ${NUM_TOKENS_TO_PROMPT} \
diff --git a/examples/inference/gpt/gpt_dynamic_inference_with_coordinator.py b/examples/inference/gpt/gpt_dynamic_inference_with_coordinator.py
index 9ca4517f650..f354b122a7e 100644
--- a/examples/inference/gpt/gpt_dynamic_inference_with_coordinator.py
+++ b/examples/inference/gpt/gpt_dynamic_inference_with_coordinator.py
@@ -258,4 +258,4 @@ async def main(
 
         # Stop Nsight profiler.
         if os.environ.get("NSIGHT_PREFIX"):
-            torch.cuda.cudart().cudaProfilerStop()
+            torch.cuda.cudart().cudaProfilerStop()
\ No newline at end of file
diff --git a/examples/inference/gpt/gpt_static_inference.py b/examples/inference/gpt/gpt_static_inference.py
index 32a49191b19..03a60927ab2 100644
--- a/examples/inference/gpt/gpt_static_inference.py
+++ b/examples/inference/gpt/gpt_static_inference.py
@@ -104,7 +104,13 @@ def get_inference_engine(args: Namespace, model: MegatronModule) -> StaticInfere
     text_generation_controller = TextGenerationController(
         inference_wrapped_model=inference_wrapped_model, tokenizer=tokenizer
     )
-    return StaticInferenceEngine(text_generation_controller=text_generation_controller, legacy=True)
+    engine_kwargs = {
+        "text_generation_controller" : text_generation_controller,
+        "legacy" : args.use_legacy_static_engine,
+    }
+    if not args.use_legacy_static_engine:
+        engine_kwargs["buffer_size_gb"] = args.inference_dynamic_batching_buffer_size_gb
+    return StaticInferenceEngine(**engine_kwargs)
 
 
 async def generate(
diff --git a/examples/inference/gpt/utils.py b/examples/inference/gpt/utils.py
index 84ad7b0cb7b..a04b856c0a6 100644
--- a/examples/inference/gpt/utils.py
+++ b/examples/inference/gpt/utils.py
@@ -72,7 +72,7 @@ def add_common_inference_args(parser: ArgumentParser) -> ArgumentParser:
         help="Add a deterministic number of requests per step. This arg is "
         "prioritized over `--incoming-requests-per-sec` below (which is non-"
         "deterministic). Note that the number of requests added per step is "
-        "additionally limited by the inference context's `max_active_requests`, "
+        "additionally limited by the inference context's `max_requests`, "
         "`max_tokens`, and KV buffer size.",
     )
     group.add_argument(
@@ -102,6 +102,15 @@ def add_common_inference_args(parser: ArgumentParser) -> ArgumentParser:
         default=False,
         help='Skip prompt log probs.',
     )
+    group.add_argument(
+        "--stop-words",
+        metavar='WORD',
+        type=str,
+        nargs='+',
+        default=None,
+        help='Stop words to terminate generation. Each word should be quoted and '
+        'separated by space. Example: --stop-words "\\n\\n" "END" "###"',
+    )
     group.add_argument(
         "--output-path",
         type=str,
@@ -384,7 +393,7 @@ def build_dynamic_engine_setup_prefix(
 
     Args:
         args (Namespace): Command-line arguments for this run.
-        context (DynamicInferenceContext): Stores limits such as `max_active_requests`,
+        context (DynamicInferenceContext): Stores limits such as `max_requests`,
             `max_tokens`, and `gtd_request_count`.
         requests (List[DynamicInferenceRequest]): List of inference requests.
 
@@ -421,7 +430,7 @@ def build_dynamic_engine_setup_prefix(
     buffer_limits_str = (
         f"bf: {get_mem_size_str(args.inference_dynamic_batching_buffer_size_gb*1024**3)}, "
         f"{context.block_allocator.active_count} chunks "
-        f"[r {context.max_active_requests}, t {context.max_tokens}]"
+        f"[r {context.max_requests}, t {context.max_tokens}]"
     )
 
     parts = [
@@ -434,3 +443,17 @@ def build_dynamic_engine_setup_prefix(
     ]
 
     return " | ".join(parts)
+
+
+def get_global_peak_memory_stats_bytes() -> dict:
+    """Peak allocated CUDA memory aggregated across ranks (MAX), in bytes.
+
+    Uses `torch.cuda.max_memory_allocated()` and assumes peak stats were reset
+    before the benchmark run.
+    """
+    peak_alloc = int(torch.cuda.max_memory_allocated())
+    if torch.distributed.is_available() and torch.distributed.is_initialized():
+        t = torch.tensor([peak_alloc], device="cuda", dtype=torch.int64)
+        torch.distributed.all_reduce(t, op=torch.distributed.ReduceOp.MAX)
+        peak_alloc = int(t[0].item())
+    return {"mem-max-allocated-bytes": peak_alloc}
\ No newline at end of file
diff --git a/examples/multimodal/layer_specs.py b/examples/multimodal/layer_specs.py
index 4c50ecea10a..56821f2cec6 100644
--- a/examples/multimodal/layer_specs.py
+++ b/examples/multimodal/layer_specs.py
@@ -2,6 +2,10 @@
 import torch
 
 from megatron.core.fusions.fused_bias_dropout import get_bias_dropout_add
+from megatron.core.ssm.mamba_block import MambaStack, MambaStackSubmodules
+from megatron.core.ssm.mamba_layer import MambaLayer, MambaLayerSubmodules
+from megatron.core.ssm.mamba_mixer import MambaMixer, MambaMixerSubmodules
+from megatron.core.ssm.mlp_layer import MLPLayer
 from megatron.core.tensor_parallel.layers import ColumnParallelLinear, RowParallelLinear
 from megatron.core.transformer.attention import SelfAttention, SelfAttentionSubmodules
 from megatron.core.transformer.dot_product_attention import DotProductAttention
@@ -10,10 +14,7 @@
 from megatron.core.transformer.mlp import MLP, MLPSubmodules
 from megatron.core.transformer.spec_utils import ModuleSpec
 from megatron.core.transformer.transformer_layer import TransformerLayer, TransformerLayerSubmodules
-from megatron.core.ssm.mamba_block import MambaStack, MambaStackSubmodules
-from megatron.core.ssm.mamba_layer import MambaLayer, MambaLayerSubmodules
-from megatron.core.ssm.mamba_mixer import MambaMixer, MambaMixerSubmodules
-from megatron.core.ssm.mlp_layer import MLPLayer
+from megatron.core.typed_torch import not_none
 
 try:
     from megatron.core.extensions.transformer_engine import (
@@ -26,6 +27,13 @@
 
     HAVE_TE = True
 except ImportError:
+    (
+        TEColumnParallelLinear,
+        TEDotProductAttention,
+        TELayerNormColumnParallelLinear,
+        TENorm,
+        TERowParallelLinear,
+    ) = (None, None, None, None, None)
     HAVE_TE = False
 
 try:
@@ -54,12 +62,8 @@ def get_layer_spec(is_vit, normalization) -> ModuleSpec:
             norm = TENorm
         else:
             version = torch.__version__.split('.')
-            version_geq_2_4 = (
-                int(TORCH_VERSION[0]) > 2
-                or (
-                    int(TORCH_VERSION[0]) == 2
-                    and int(TORCH_VERSION[1]) >= 4
-                )
+            version_geq_2_4 = int(TORCH_VERSION[0]) > 2 or (
+                int(TORCH_VERSION[0]) == 2 and int(TORCH_VERSION[1]) >= 4
             )
             assert version_geq_2_4, "Torch version >= 2.4.0 is required for RMSNorm"
             if HAVE_APEX:
@@ -108,8 +112,8 @@ def get_layer_spec_te(is_vit=False, padding=False) -> ModuleSpec:
                 module=SelfAttention,
                 params={"attn_mask_type": attn_mask_type},
                 submodules=SelfAttentionSubmodules(
-                    linear_qkv=TELayerNormColumnParallelLinear,
-                    core_attention=TEDotProductAttention,
+                    linear_qkv=not_none(TELayerNormColumnParallelLinear),
+                    core_attention=not_none(TEDotProductAttention),
                     linear_proj=TERowParallelLinear,
                     q_layernorm=IdentityOp,
                     k_layernorm=IdentityOp,
@@ -122,6 +126,7 @@ def get_layer_spec_te(is_vit=False, padding=False) -> ModuleSpec:
         ),
     )
 
+
 def get_mamba_layer_spec_te(padding=False) -> ModuleSpec:
     attn_mask_type = AttnMaskType.causal
     # Padding mask is needed for e.g. Context Parallel.
@@ -153,8 +158,8 @@ def get_mamba_layer_spec_te(padding=False) -> ModuleSpec:
                         module=SelfAttention,
                         params={"attn_mask_type": attn_mask_type},
                         submodules=SelfAttentionSubmodules(
-                            linear_qkv=TELayerNormColumnParallelLinear,
-                            core_attention=TEDotProductAttention,
+                            linear_qkv=not_none(TELayerNormColumnParallelLinear),
+                            core_attention=not_none(TEDotProductAttention),
                             linear_proj=TERowParallelLinear,
                         ),
                     ),
@@ -170,7 +175,8 @@ def get_mamba_layer_spec_te(padding=False) -> ModuleSpec:
                     mlp=ModuleSpec(
                         module=MLP,
                         submodules=MLPSubmodules(
-                            linear_fc1=TELayerNormColumnParallelLinear, linear_fc2=TERowParallelLinear
+                            linear_fc1=TELayerNormColumnParallelLinear,
+                            linear_fc2=TERowParallelLinear,
                         ),
                     ),
                     mlp_bda=get_bias_dropout_add,
@@ -179,6 +185,7 @@ def get_mamba_layer_spec_te(padding=False) -> ModuleSpec:
         ),
     )
 
+
 def get_mlp_module_spec(use_te: bool = True) -> ModuleSpec:
     # Dense MLP w/ or w/o TE modules.
     return ModuleSpec(
diff --git a/examples/multimodal/llama_3p1_nemotron_nano_vl_8b_v1/Dockerfile b/examples/multimodal/llama_3p1_nemotron_nano_vl_8b_v1/Dockerfile
index 7f30dc6c156..186046ab8c3 100644
--- a/examples/multimodal/llama_3p1_nemotron_nano_vl_8b_v1/Dockerfile
+++ b/examples/multimodal/llama_3p1_nemotron_nano_vl_8b_v1/Dockerfile
@@ -36,5 +36,5 @@ RUN pip install fairscale fire blobfile
 # Use --no-deps for the following to avoid outdated and unnecessary dependencies.
 RUN pip install mmf --no-deps
 RUN pip install open_clip_torch open-flamingo[eval] --no-deps
-RUN pip install zarr "tensorstore==0.1.45"
+RUN pip install "tensorstore==0.1.45"
 RUN pip install git+https://github.com/NVIDIA/Megatron-Energon.git#egg=megatron-energon[av_decode]
diff --git a/examples/multimodal/nvlm/internvit.py b/examples/multimodal/nvlm/internvit.py
index 62f3bdccd85..9df9af23f05 100644
--- a/examples/multimodal/nvlm/internvit.py
+++ b/examples/multimodal/nvlm/internvit.py
@@ -14,7 +14,10 @@
 
 import torch
 
-from megatron.core.utils import divide
+from examples.multimodal.layer_scaling import (
+    LayerScalingTransformerLayer,
+    get_bias_dropout_add_layer_scaling,
+)
 from megatron.core.extensions.transformer_engine import (
     TEColumnParallelLinear,
     TEDotProductAttention,
@@ -35,9 +38,7 @@
 from megatron.core.transformer.transformer_config import TransformerConfig
 from megatron.core.transformer.transformer_layer import TransformerLayer, TransformerLayerSubmodules
 from megatron.core.transformer.utils import make_sharded_tensors_for_checkpoint
-
-from examples.multimodal.layer_scaling import LayerScalingTransformerLayer, get_bias_dropout_add_layer_scaling
-
+from megatron.core.utils import divide
 
 try:
     import apex
@@ -128,10 +129,14 @@ def _gather_var(self, input_, max_dim):
 
         if rank < valid_ranks:  # Ranks without any dummy attention heads.
             var = input_.sum(-1, keepdim=True)
-        elif rank == valid_ranks:  # The only rank which may contain 'residual_heads' dummy attention heads.
+        elif (
+            rank == valid_ranks
+        ):  # The only rank which may contain 'residual_heads' dummy attention heads.
             var = input_[..., :max_dim].sum(-1, keepdim=True)
         else:
-            var = input_.sum(-1, keepdim=True) * 0.0  # All heads in these ranks are dummy heads: Zero-out.
+            var = (
+                input_.sum(-1, keepdim=True) * 0.0
+            )  # All heads in these ranks are dummy heads: Zero-out.
 
         tensor_list = [torch.empty_like(var) for _ in range(world_size)]
         tensor_list[rank] = var
@@ -175,8 +180,7 @@ def __init__(
         # Need to override linear_qkv, q_layernorm and k_layernorm.
         qkv_bias = False
 
-        self.linear_qkv = build_module(
-            submodules.linear_qkv,
+        self.linear_qkv = submodules.linear_qkv(
             self.config.hidden_size,
             self.query_projection_size + 2 * self.kv_projection_size,
             config=self.config,
@@ -256,6 +260,7 @@ def get_internvit_layer_spec(use_te) -> ModuleSpec:
         ),
     )
 
+
 def get_internvit300M_layer_spec(use_te) -> ModuleSpec:
     mlp = get_mlp_module_spec(use_te)  # no norm
 
diff --git a/examples/multimodal/radio/radio_g.py b/examples/multimodal/radio/radio_g.py
index 3ce793be75d..f139632df86 100644
--- a/examples/multimodal/radio/radio_g.py
+++ b/examples/multimodal/radio/radio_g.py
@@ -3,6 +3,10 @@
 
 import torch
 
+from examples.multimodal.layer_scaling import (
+    LayerScalingTransformerLayer,
+    get_bias_dropout_add_layer_scaling,
+)
 from megatron.core.tensor_parallel.layers import ColumnParallelLinear, RowParallelLinear
 from megatron.core.transformer.attention import SelfAttention, SelfAttentionSubmodules
 from megatron.core.transformer.dot_product_attention import DotProductAttention
@@ -11,7 +15,7 @@
 from megatron.core.transformer.mlp import MLP, MLPSubmodules
 from megatron.core.transformer.spec_utils import ModuleSpec
 from megatron.core.transformer.transformer_layer import TransformerLayer, TransformerLayerSubmodules
-from examples.multimodal.layer_scaling import LayerScalingTransformerLayer, get_bias_dropout_add_layer_scaling
+from megatron.core.typed_torch import not_none
 
 try:
     from megatron.core.extensions.transformer_engine import (
@@ -24,6 +28,13 @@
 
     HAVE_TE = True
 except ImportError:
+    (
+        TEColumnParallelLinear,
+        TEDotProductAttention,
+        TELayerNormColumnParallelLinear,
+        TENorm,
+        TERowParallelLinear,
+    ) = (None, None, None, None, None)
     HAVE_TE = False
 
 try:
@@ -113,8 +124,8 @@ def get_radio_g_layer_spec_te() -> ModuleSpec:
                 module=SelfAttention,
                 params={"attn_mask_type": attn_mask_type},
                 submodules=SelfAttentionSubmodules(
-                    linear_qkv=TELayerNormColumnParallelLinear,
-                    core_attention=TEDotProductAttention,
+                    linear_qkv=not_none(TELayerNormColumnParallelLinear),
+                    core_attention=not_none(TEDotProductAttention),
                     linear_proj=TERowParallelLinear,
                     q_layernorm=IdentityOp,
                     k_layernorm=IdentityOp,
diff --git a/examples/post_training/modelopt/Dockerfile b/examples/post_training/modelopt/Dockerfile
index e127215904d..f44306ef08e 100644
--- a/examples/post_training/modelopt/Dockerfile
+++ b/examples/post_training/modelopt/Dockerfile
@@ -4,7 +4,7 @@ ARG PIP_CONSTRAINT=
 
 WORKDIR /workspace/nmm-sandbox
 
-RUN pip install jsonlines omegaconf
+RUN pip install omegaconf
 RUN pip install flask flask_restful fire nltk
 RUN pip install tiktoken blobfile
 
diff --git a/examples/post_training/modelopt/README.md b/examples/post_training/modelopt/README.md
index 600531223d6..48e679e4e31 100644
--- a/examples/post_training/modelopt/README.md
+++ b/examples/post_training/modelopt/README.md
@@ -210,4 +210,8 @@ The saved Megatron-LM distributed checkpoint (output of above scripts) can be re
 ```
 
 ## Advanced Usage
-TBD
+To contribute, please ping [@NVIDIA/post-training](https://github.com/orgs/NVIDIA/teams/post-training) team members. We format the examples with
+```
+uvx black@24.10.0 .
+uvx isort .
+```
diff --git a/examples/post_training/modelopt/conf/nvidia/NVIDIA-Nemotron-Nano-9B-v2.sh b/examples/post_training/modelopt/conf/nvidia/NVIDIA-Nemotron-Nano-9B-v2.sh
index d6ba1e1dcc4..a2212483008 100644
--- a/examples/post_training/modelopt/conf/nvidia/NVIDIA-Nemotron-Nano-9B-v2.sh
+++ b/examples/post_training/modelopt/conf/nvidia/NVIDIA-Nemotron-Nano-9B-v2.sh
@@ -8,6 +8,7 @@ else
 fi
 
 MODEL_ARGS=" \
+    --trust-remote-code \
     --save-interval 100000 \
     --micro-batch-size 1 \
     --bf16 \
diff --git a/examples/post_training/modelopt/conf/nvidia/Nemotron-H-47B-Reasoning-128K.sh b/examples/post_training/modelopt/conf/nvidia/Nemotron-H-47B-Reasoning-128K.sh
new file mode 100644
index 00000000000..ad07c1061c5
--- /dev/null
+++ b/examples/post_training/modelopt/conf/nvidia/Nemotron-H-47B-Reasoning-128K.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+
+if [ -z ${HF_MODEL_CKPT} ]; then
+    HF_MODEL_CKPT=nvidia/Nemotron-H-47B-Reasoning-128K
+    TOKENIZER_MODEL=nvidia/Nemotron-H-47B-Reasoning-128K
+else
+    TOKENIZER_MODEL=${HF_MODEL_CKPT}
+fi
+
+MODEL_ARGS=" \
+    --trust-remote-code \
+    --save-interval 100000 \
+    --micro-batch-size 1 \
+    --bf16 \
+    --attention-backend flash \
+    --disable-bias-linear \
+    --untie-embeddings-and-output-weights \
+    --position-embedding-type none \
+    --normalization RMSNorm \
+    --squared-relu \
+    --num-layers 98 \
+    --hidden-size 8192 \
+    --ffn-hidden-size 30720 \
+    --num-attention-heads 64 \
+    --kv-channels 128 \
+    --group-query-attention \
+    --num-query-groups 8 \
+    --hybrid-override-pattern M-M-M-M-M-M-M-M-M*-M-M-M-M-M-M-M-M-M-M*-M-M-M-M-M*-M-M-M-M-M*-M-M-M-M-M-M-M---MM---M-M*-M-M-M-M-M- \
+    --is-hybrid-model \
+    --mamba-head-dim 64 \
+    --mamba-num-heads 256 \
+    --mamba-num-groups 8 \
+    --mamba-state-dim 256 \
+    --seq-length 8192 \
+    --max-position-embeddings 8192 \
+    --tokenizer-type HuggingFaceTokenizer \
+    --use-mcore-models \
+    --export-model-type MambaModel \
+"
diff --git a/examples/post_training/modelopt/conf/nvidia/Nemotron-H-4B-Instruct.sh b/examples/post_training/modelopt/conf/nvidia/Nemotron-H-4B-Instruct.sh
index 4f32fbd63ad..4ba91dbd8c6 100644
--- a/examples/post_training/modelopt/conf/nvidia/Nemotron-H-4B-Instruct.sh
+++ b/examples/post_training/modelopt/conf/nvidia/Nemotron-H-4B-Instruct.sh
@@ -8,6 +8,7 @@ else
 fi
 
 MODEL_ARGS=" \
+    --trust-remote-code \
     --save-interval 100000 \
     --micro-batch-size 1 \
     --bf16 \
diff --git a/examples/post_training/modelopt/conf/nvidia/Nemotron-H-56B-Base-8K.sh b/examples/post_training/modelopt/conf/nvidia/Nemotron-H-56B-Base-8K.sh
new file mode 100644
index 00000000000..8377f0f11d6
--- /dev/null
+++ b/examples/post_training/modelopt/conf/nvidia/Nemotron-H-56B-Base-8K.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+if [ -z ${HF_MODEL_CKPT} ]; then
+    HF_MODEL_CKPT=nvidia/Nemotron-H-56B-Base-8K
+    TOKENIZER_MODEL=nvidia/Nemotron-H-56B-Base-8K
+else
+    TOKENIZER_MODEL=${HF_MODEL_CKPT}
+fi
+
+MODEL_ARGS=" \
+    --trust-remote-code \
+    --save-interval 100000 \
+    --micro-batch-size 1 \
+    --attention-backend flash \
+    --is-hybrid-model \
+    --hybrid-override-pattern M-M-M-M*-M-M-M-M-M*-M-M-M-M-M*-M-M-M-M-M*-M-M-M-M-M*-M-M-M-M-M*-M-M-M-M-M*-M-M-M-M-M*-M-M-M-M-M*-M-M-M-M-M*-M-M-M-M-M- \
+    --mamba-state-dim 256 \
+    --tiktoken-pattern v2 \
+    --use-mcore-models \
+    --untie-embeddings-and-output-weights \
+    --disable-bias-linear \
+    --init-method-std 0.0099 \
+    --position-embedding-type none \
+    --squared-relu \
+    --num-layers 118 \
+    --hidden-size 8192 \
+    --num-attention-heads 64 \
+    --group-query-attention \
+    --num-query-groups 8 \
+    --ffn-hidden-size 32768 \
+    --kv-channels 128 \
+    --normalization RMSNorm \
+    --exit-duration-in-mins 230 \
+    --tensor-model-parallel-size 8 \
+    --pipeline-model-parallel-size 1 \
+    --seq-length 8192 \
+    --max-position-embeddings 8192 \
+    --tokenizer-type HuggingFaceTokenizer \
+    --bf16 \
+    --export-model-type MambaModel \
+    "
diff --git a/examples/post_training/modelopt/conf/nvidia/Nemotron-H-8B-Base-8K.sh b/examples/post_training/modelopt/conf/nvidia/Nemotron-H-8B-Base-8K.sh
index bfcb8ee0b02..b04bf76f360 100644
--- a/examples/post_training/modelopt/conf/nvidia/Nemotron-H-8B-Base-8K.sh
+++ b/examples/post_training/modelopt/conf/nvidia/Nemotron-H-8B-Base-8K.sh
@@ -8,6 +8,7 @@ else
 fi
 
 MODEL_ARGS=" \
+    --trust-remote-code \
     --save-interval 100000 \
     --micro-batch-size 1 \
     --bf16 \
diff --git a/examples/post_training/modelopt/conf/nvidia/Nemotron-Mini-4B-Instruct.sh b/examples/post_training/modelopt/conf/nvidia/Nemotron-Mini-4B-Instruct.sh
index 7ef969b059d..d2c4cda36b2 100644
--- a/examples/post_training/modelopt/conf/nvidia/Nemotron-Mini-4B-Instruct.sh
+++ b/examples/post_training/modelopt/conf/nvidia/Nemotron-Mini-4B-Instruct.sh
@@ -8,6 +8,7 @@ else
 fi
 
 MODEL_ARGS=" \
+    --trust-remote-code \
     --save-interval 100000 \
     --micro-batch-size 1 \
     --bf16 \
diff --git a/examples/post_training/modelopt/convert.sh b/examples/post_training/modelopt/convert.sh
old mode 100644
new mode 100755
diff --git a/examples/post_training/modelopt/convert_model.py b/examples/post_training/modelopt/convert_model.py
index 6b8801dd26f..53ae25f8d92 100644
--- a/examples/post_training/modelopt/convert_model.py
+++ b/examples/post_training/modelopt/convert_model.py
@@ -19,7 +19,11 @@
 from megatron.post_training.arguments import add_modelopt_args
 from megatron.post_training.checkpointing import load_modelopt_checkpoint
 from megatron.post_training.model_builder import modelopt_gpt_mamba_builder
-from megatron.post_training.utils import report_current_memory_info, to_empty_if_meta
+from megatron.post_training.utils import (
+    modelopt_version_at_least,
+    report_current_memory_info,
+    to_empty_if_meta,
+)
 from megatron.training import get_args, get_tokenizer
 from megatron.training.checkpointing import save_checkpoint
 from megatron.training.initialize import initialize_megatron
@@ -50,14 +54,11 @@ def add_convert_args(parser):
         help='Chosing between different speculative decoding algorithms. Default is None.',
     )
     group.add_argument(
-        '--export-num-medusa-heads',
-        type=int,
-        default=0,
-        help='Number of Medusa heads for speculative decoding.',
-    )
-    group.add_argument(
-        "--eagle-config", type=str, default=None, help="EAGLE architecture config. If not given, " \
-        "a default config will be use. If provided, it will overwrite the default config."
+        "--eagle-config",
+        type=str,
+        default=None,
+        help="EAGLE architecture config. If not given, "
+        "a default config will be use. If provided, it will overwrite the default config.",
     )
 
     add_modelopt_args(parser)
@@ -121,7 +122,9 @@ def check_arguments():
             UserWarning,
         )
 
-    model = get_model(functools.partial(model_provider, modelopt_gpt_mamba_builder), wrap_with_ddp=False)
+    model = get_model(
+        functools.partial(model_provider, modelopt_gpt_mamba_builder), wrap_with_ddp=False
+    )
     report_current_memory_info()
 
     unwrapped_model = unwrap_model(model)[0]
@@ -130,12 +133,14 @@ def check_arguments():
         import_dtype = torch.float16 if args.fp16 else torch.bfloat16
         unwrapped_model = unwrap_model(model)[0]
         workspace_dir = os.environ.get("MLM_WORK_DIR", "/tmp")
-        print_rank_0("Import model from Hugging Face checkpoint in dtype {}.".format(str(import_dtype)))
+        print_rank_0(
+            "Import model from Hugging Face checkpoint in dtype {}.".format(str(import_dtype))
+        )
+        import_kwargs = {"dtype": import_dtype}
+        if modelopt_version_at_least("0.41.0"):
+            import_kwargs.update({"trust_remote_code": args.trust_remote_code})
         import_mcore_gpt_from_hf(
-            unwrapped_model,
-            args.pretrained_model_path,
-            workspace_dir,
-            dtype = import_dtype,
+            unwrapped_model, args.pretrained_model_path, workspace_dir, **import_kwargs
         )
     elif args.load is not None:
         _ = load_modelopt_checkpoint(model)
@@ -143,10 +148,10 @@ def check_arguments():
     if args.algorithm in ("eagle1", "eagle3"):
         mtsp_config = ALGO_TO_CONFIG[args.algorithm]
         if args.eagle_config:
-            with open(args.eagle_config)as f:
+            with open(args.eagle_config) as f:
                 eagle_config = json.load(f)
             mtsp_config["config"]["eagle_architecture_config"].update(eagle_config)
-        
+
         if args.export_offline_model:
             mtsp_config["config"]["eagle_offline"] = True
 
@@ -157,12 +162,11 @@ def check_arguments():
             if eagle_module is not None:
                 mcore_eagle_state_dict = torch.load(args.extra_model_path)
                 eagle_module.load_state_dict(mcore_eagle_state_dict, strict=False)
-                
+
     elif args.algorithm == "medusa":
         config = {"medusa_num_heads": args.export_num_medusa_heads, "medusa_num_layers": 1}
         unwrapped_model = mtsp.convert(unwrapped_model, [("medusa", config)])
 
-
     print_rank_0(f"Converted Model:\n {model}")
     torch.distributed.barrier()
 
diff --git a/examples/post_training/modelopt/export.py b/examples/post_training/modelopt/export.py
index 8794c4c738c..0aa625b875d 100644
--- a/examples/post_training/modelopt/export.py
+++ b/examples/post_training/modelopt/export.py
@@ -14,6 +14,7 @@
 from megatron.post_training.arguments import add_modelopt_args
 from megatron.post_training.checkpointing import load_modelopt_checkpoint
 from megatron.post_training.model_builder import modelopt_gpt_mamba_builder
+from megatron.post_training.utils import modelopt_version_at_least
 from megatron.training import get_args, get_model
 from megatron.training.initialize import initialize_megatron
 from megatron.training.utils import unwrap_model
@@ -65,7 +66,9 @@ def add_modelopt_export_args(parser):
             UserWarning,
         )
 
-    model = get_model(functools.partial(model_provider, modelopt_gpt_mamba_builder), wrap_with_ddp=False)
+    model = get_model(
+        functools.partial(model_provider, modelopt_gpt_mamba_builder), wrap_with_ddp=False
+    )
 
     # Materialize the model from meta device to cpu before loading the checkpoint.
     unwrapped_model = unwrap_model(model)[0]
@@ -76,16 +79,18 @@ def add_modelopt_export_args(parser):
 
     # Decide whether we are exporting only the extra_modules (e.g. EAGLE3).
     # Only the last pp stage may have extra_modules, hence broadcast from the last rank.
-    export_extra_modules = hasattr(unwrapped_model, "eagle_module") or hasattr(unwrapped_model, "medusa_heads")
+    export_extra_modules = hasattr(unwrapped_model, "eagle_module") or hasattr(
+        unwrapped_model, "medusa_heads"
+    )
     torch.distributed.broadcast_object_list(
-        [export_extra_modules],
-        src=torch.distributed.get_world_size() - 1,
+        [export_extra_modules], src=torch.distributed.get_world_size() - 1
     )
 
-    mtex.export_mcore_gpt_to_hf(
-        unwrapped_model,
-        args.pretrained_model_name,
-        export_extra_modules=export_extra_modules,
-        dtype=torch.bfloat16,
-        export_dir=args.export_dir,
-    )
+    export_kwargs = {
+        "export_extra_modules": export_extra_modules,
+        "dtype": torch.bfloat16,
+        "export_dir": args.export_dir,
+    }
+    if modelopt_version_at_least("0.41.0"):
+        export_kwargs.update({"trust_remote_code": args.trust_remote_code})
+    mtex.export_mcore_gpt_to_hf(unwrapped_model, args.pretrained_model_name, **export_kwargs)
diff --git a/examples/post_training/modelopt/export.sh b/examples/post_training/modelopt/export.sh
old mode 100644
new mode 100755
diff --git a/examples/post_training/modelopt/finetune.py b/examples/post_training/modelopt/finetune.py
index 6489d394392..19ece4ef299 100755
--- a/examples/post_training/modelopt/finetune.py
+++ b/examples/post_training/modelopt/finetune.py
@@ -8,8 +8,6 @@
 from functools import partial
 from typing import Any, Dict, Optional
 
-import jsonlines
-
 sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../")))
 
 import datasets
@@ -110,13 +108,21 @@ class SFTDataset(torch.utils.data.Dataset):
         "Open-Orca/OpenOrca": "{{ messages['question'] + ' ' + messages['response'] + ' ' }}",
     }
 
+    @classmethod
+    def _wildcard_get(cls, directory: Dict[str, Any], name: str, default_value=None):
+        ret = default_value
+        for key, val in directory.items():
+            if key in name:
+                ret = val
+                break
+        return ret
+
     def __init__(
         self,
         num_packed_samples: int,
-        data_path: Optional[str],
+        hf_dataset: str,
         tokenizer: transformers.PreTrainedTokenizerBase,
         seq_length: int,
-        hf_dataset: Optional[str] = None,
         num_shards: int = 1,
         shard_index: int = 0,
     ):
@@ -129,20 +135,20 @@ def __init__(
         until the packed dataset has sufficient length.
 
         Args:
-            data_path: Path to the json or jsonl file
             num_packed_samples: total number of packed samples (cyclic access)
-            tokenizer: hf tokenizer
+            hf_dataset: Huggingface dataset name or local path
+            tokenizer: Huggingface PreTrainedTokenizer instance
             seq_length: max sequence length
-            hf_dataset: not supported yet
+            num_shards: number of shards for distributed training
+            shard_index: shard index for distributed training
         """
         if not isinstance(tokenizer, transformers.PreTrainedTokenizerBase):
             raise ValueError("SFTDataset only supports transformers.PreTrainedTokenizerBase!")
 
         self.num_packed_samples = num_packed_samples
-        self.data_path = data_path
+        self.hf_dataset = hf_dataset
         self.tokenizer = tokenizer
         self.seq_length = seq_length
-        self.hf_dataset = hf_dataset
         self.data_transformation = lambda data: data
         self.num_shards = num_shards
         self.shard_index = shard_index
@@ -155,42 +161,32 @@ def __init__(
             REMOVE_THINK_CHAT_TEMPLATE, ""
         )
 
-        if data_path is not None:
-            if data_path.endswith(".json"):
-                self._raw_samples = json.load(open(data_path))
-            elif data_path.endswith(".jsonl"):
-                with jsonlines.open(data_path, mode='r') as reader:
-                    self._raw_samples = [obj for obj in reader]
-            else:
-                raise ValueError("data_path must be json or jsonl")
-        elif self.hf_dataset is not None:
-            hf_dataset_kwargs = SFTDataset.hf_dataset_to_kwargs.get(
-                self.hf_dataset, {"split": "train"}
-            )
-            self._raw_samples = datasets.load_dataset(self.hf_dataset, token=os.environ.get("HF_TOKEN", None), **hf_dataset_kwargs)
-            self._raw_samples = self._raw_samples.shard(
-                num_shards=self.num_shards, index=shard_index
-            )
-
-            print(
-                "Rank {:3}/{:3} creates SFT data shard {:3}/{:3} with {:10} raw samples".format(
-                    torch.distributed.get_rank(),
-                    torch.distributed.get_world_size(),
-                    self.shard_index,
-                    self.num_shards,
-                    len(self._raw_samples),
-                ),
-                flush=True,
-            )
+        hf_dataset_kwargs = SFTDataset.hf_dataset_to_kwargs.get(
+            self.hf_dataset, {"split": "train"}
+        )
+        self._raw_samples = datasets.load_dataset(self.hf_dataset, token=os.environ.get("HF_TOKEN", None), **hf_dataset_kwargs)
+        self._raw_samples = self._raw_samples.shard(
+            num_shards=self.num_shards, index=shard_index
+        )
 
-        else:
-            raise ValueError("Either hf_dataset or data_path must be provided!")
+        print(
+            "Rank {:3}/{:3} creates SFT data shard {:3}/{:3} with {:10} raw samples".format(
+                torch.distributed.get_rank(),
+                torch.distributed.get_world_size(),
+                self.shard_index,
+                self.num_shards,
+                len(self._raw_samples),
+            ),
+            flush=True,
+        )
 
         if self.tokenizer.chat_template is None:
             self.tokenizer.chat_template = SFTDataset.hf_dataset_to_prompt_template
         elif self.hf_dataset is not None:
-            self.data_transformation = SFTDataset.hf_dataset_to_conversation.get(
-                self.hf_dataset, lambda data: data
+            self.data_transformation = SFTDataset._wildcard_get(
+                SFTDataset.hf_dataset_to_conversation,
+                self.hf_dataset,
+                default_value=lambda data: data,
             )
 
         if self.tokenizer.chat_template is None:
@@ -361,23 +357,17 @@ def train_valid_test_sft_datasets_provider(train_val_test_num_samples):
         print_rank_0("> finished creating offline SFT datasets ...")
     else:
         kwargs = {
+            "hf_dataset": args.finetune_hf_dataset,
             "tokenizer": tokenizer._tokenizer,
             "seq_length": args.seq_length,
             # Optional kwargs
-            "hf_dataset": args.finetune_hf_dataset,
             "num_shards": mpu.get_expert_data_parallel_world_size(),
             "shard_index": mpu.get_expert_data_parallel_rank(),
         }
 
-        data_path = [
-            args.train_data_path[0] if args.train_data_path else None,
-            args.valid_data_path[0] if args.valid_data_path else None,
-            args.test_data_path[0] if args.test_data_path else None,
-        ]
-
-        train_ds = SFTDataset(train_val_test_num_samples[0], data_path[0], **kwargs)
-        valid_ds = SFTDataset(train_val_test_num_samples[1], data_path[1], **kwargs)
-        test_ds = SFTDataset(train_val_test_num_samples[2], data_path[2], **kwargs)
+        train_ds = SFTDataset(train_val_test_num_samples[0], **kwargs)
+        valid_ds = SFTDataset(train_val_test_num_samples[1], **kwargs)
+        test_ds = SFTDataset(train_val_test_num_samples[2], **kwargs)
 
         print_rank_0("> finished creating SFT datasets ...")
 
diff --git a/examples/post_training/modelopt/generate.py b/examples/post_training/modelopt/generate.py
index a773ea89f00..63d3f241f59 100644
--- a/examples/post_training/modelopt/generate.py
+++ b/examples/post_training/modelopt/generate.py
@@ -20,6 +20,8 @@
 from megatron.training.utils import print_rank_0, unwrap_model
 from model_provider import model_provider
 
+import modelopt.torch.quantization as mtq
+
 warnings.filterwarnings('once')
 
 
@@ -129,6 +131,12 @@ def get_conversations(example):
     unwrapped_model = unwrap_model(model)[0]
     unwrapped_model.eval()
 
+    # Fold the scalars into weight for speedup.
+    # [TODO]: fold_weight current assumes all weight_quantizer has weight allocated;
+    # however, this is not the case when share_embeddings_and_output_weights is False.
+    if getattr(unwrapped_model, "share_embeddings_and_output_weights", False):
+        mtq.fold_weight(unwrapped_model)
+
     for idx, example in enumerate(dataset):
         if idx > args.fraction * len(dataset):
             break
diff --git a/examples/post_training/modelopt/generate.sh b/examples/post_training/modelopt/generate.sh
old mode 100644
new mode 100755
diff --git a/examples/post_training/modelopt/mmlu.py b/examples/post_training/modelopt/mmlu.py
index 1446afc8392..d475ac9fb30 100644
--- a/examples/post_training/modelopt/mmlu.py
+++ b/examples/post_training/modelopt/mmlu.py
@@ -5,11 +5,14 @@
 import os
 import sys
 import warnings
+import datasets
+import logging
+import torch.distributed as dist
 
 sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../")))
 
 import torch
-from datasets import load_dataset
+from diskcache import Cache
 
 from megatron.post_training.arguments import add_modelopt_args
 from megatron.post_training.checkpointing import load_modelopt_checkpoint
@@ -18,10 +21,13 @@
 from megatron.post_training.utils import report_current_memory_info
 from megatron.training import get_args, get_model, get_tokenizer, initialize_megatron
 from megatron.training.utils import print_rank_0, unwrap_model
+import modelopt.torch.quantization as mtq
 from model_provider import model_provider
 
-warnings.filterwarnings('ignore')
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO) # set to debug if you need more logging
 
+warnings.filterwarnings('ignore')
 
 def add_mmlu_args(parser):
     """Add additional arguments for ModelOpt text generation PTQ."""
@@ -30,6 +36,8 @@ def add_mmlu_args(parser):
     group.add_argument("--fraction", type=float, default=1.0, help="Fraction of dataset to use.")
     group.add_argument("--lower-bound", type=float, default=None)
     group.add_argument("--no-subject-prompt", action="store_true", help="Use empty prompt instead of subject-based prompt.")
+    group.add_argument("--mmlu-dataset", type=str, default="cais/mmlu", help="The default dataset to use is cais/mmlu from the HG hub.")
+    group.add_argument("--cache-dir", type=str, default=None)
     add_modelopt_args(parser)
     return parser
 
@@ -134,7 +142,7 @@ def generate_prompt(test_example, dev_examples, few_shots=0, no_subject_prompt=F
     )
 
     args = get_args()
-
+    cache = Cache(args.cache_dir)
     # Meta device initialization for ParallelLinear only works if using cpu initialization.
     # Meta device initialization is used such that models can be materialized in low-precision
     # directly when ModelOpt real quant is used. Otherwise, the model is first initialized
@@ -152,6 +160,12 @@ def generate_prompt(test_example, dev_examples, few_shots=0, no_subject_prompt=F
     model = get_model(functools.partial(model_provider, modelopt_gpt_mamba_builder), wrap_with_ddp=False)
     report_current_memory_info()
 
+    # Materialize the model from meta device to gpu before loading the checkpoint.
+    unwrapped_model = unwrap_model(model)[0]
+    unwrapped_model.eval()
+    unwrapped_model.to_empty(device="cuda")
+    report_current_memory_info()
+
     disable_tqdm = args.disable_tqdm or torch.distributed.get_rank() > 0
 
     tokenizer = get_tokenizer()._tokenizer
@@ -160,29 +174,42 @@ def generate_prompt(test_example, dev_examples, few_shots=0, no_subject_prompt=F
         load_modelopt_checkpoint(model, strict=not args.untie_embeddings_and_output_weights)
         print_rank_0("Done loading checkpoint")
 
-    unwrapped_model = unwrap_model(model)[0]
-    unwrapped_model.eval()
+    # Fold the scalars into weight for speedup.
+    # [TODO]: fold_weight current assumes all weight_quantizer has weight allocated;
+    # however, this is not the case when share_embeddings_and_output_weights is False.
+    if not getattr(unwrapped_model, "share_embeddings_and_output_weights", False):
+        mtq.fold_weight(unwrapped_model)
 
     all_subjects = get_all_subjects()
 
     all_correct = {}
 
     for subject in all_subjects:
-        test_data = load_dataset("cais/mmlu", subject, split="test")
-        dev_data = load_dataset("cais/mmlu", subject, split="dev")
+        test_data = datasets.load_dataset(args.mmlu_dataset, subject, split="test")
+        dev_data = datasets.load_dataset(args.mmlu_dataset, subject, split="dev")
 
         correct = []
         for idx, test_example in enumerate(test_data):
             if idx > args.fraction * len(test_data):
                 break
-            prompt = generate_prompt(test_example, dev_data, few_shots=0, no_subject_prompt=args.no_subject_prompt)
             label = ["A", "B", "C", "D"][test_example["answer"]]
-            tokens = tokenizer(prompt, return_tensors="pt")
-            with torch.no_grad():
-                generated_ids = simple_generate(
-                    unwrapped_model, tokens.input_ids.cuda(), osl=2, disable_tqdm=disable_tqdm
-                )
-            predict = tokenizer.batch_decode(generated_ids)[0].strip()
+            prompt = generate_prompt(test_example, dev_data, few_shots=0, no_subject_prompt=args.no_subject_prompt)
+            cache_key = f"{args.load}_{subject}_{prompt}" # model name, subject, prompt
+
+            if cache_key in cache:
+                predict = cache[cache_key]
+                if dist.get_rank() == 0:
+                    logger.debug(f"Cache hit for {args.load}_{subject}")
+            else:
+                tokens = tokenizer(prompt, return_tensors="pt")
+                with torch.no_grad():
+                    generated_ids = simple_generate(
+                        unwrapped_model, tokens.input_ids.cuda(), osl=2, disable_tqdm=disable_tqdm
+                    )
+                predict = tokenizer.batch_decode(generated_ids)[0].strip()
+                if torch.distributed.get_rank() == 0:
+                    cache.add(cache_key, predict)
+
             correct += [True] if predict.startswith(label) else [False]
         all_correct[subject] = correct
 
@@ -207,5 +234,5 @@ def generate_prompt(test_example, dev_examples, few_shots=0, no_subject_prompt=F
             flush=True,
         )
 
-    if args.lower_bound is not None:
-        assert sum(avg_correct) / len(avg_correct) > args.lower_bound
+        if args.lower_bound is not None:
+            assert sum(avg_correct) / len(avg_correct) > args.lower_bound
diff --git a/examples/post_training/modelopt/mmlu.sh b/examples/post_training/modelopt/mmlu.sh
old mode 100644
new mode 100755
diff --git a/examples/post_training/modelopt/offline_feature_extract.sh b/examples/post_training/modelopt/offline_feature_extract.sh
old mode 100644
new mode 100755
diff --git a/examples/post_training/modelopt/prune.py b/examples/post_training/modelopt/prune.py
index 41d7e499ab8..2671b6badd9 100644
--- a/examples/post_training/modelopt/prune.py
+++ b/examples/post_training/modelopt/prune.py
@@ -20,12 +20,18 @@
 from modelopt.torch.export import import_mcore_gpt_from_hf
 from modelopt.torch.prune.plugins.mcore_minitron import SUPPORTED_HPARAMS
 
-from megatron.core.parallel_state import get_pipeline_model_parallel_group, get_tensor_model_parallel_group
+from megatron.core.parallel_state import (
+    get_pipeline_model_parallel_group,
+    get_tensor_model_parallel_group,
+)
 from megatron.post_training.arguments import add_modelopt_args
 from megatron.post_training.checkpointing import load_modelopt_checkpoint
 from megatron.post_training.generate import simple_generate
 from megatron.post_training.model_builder import modelopt_gpt_mamba_builder
-from megatron.post_training.utils import report_current_memory_info
+from megatron.post_training.utils import (
+    modelopt_version_at_least,
+    report_current_memory_info,
+)
 from megatron.training import get_args, get_model, get_tokenizer, initialize_megatron
 from megatron.training.checkpointing import save_checkpoint
 from megatron.training.utils import print_rank_0, unwrap_model
@@ -38,10 +44,7 @@ def add_prune_args(parser):
     """Add additional arguments for ModelOpt pruning."""
     group = parser.add_argument_group(title="ModelOpt pruning")
     group.add_argument(
-        "--calib-size",
-        type=int,
-        default=1024,
-        help="Samples to use for pruning calibration.",
+        "--calib-size", type=int, default=1024, help="Samples to use for pruning calibration."
     )
     group.add_argument(
         "--prompts",
@@ -56,21 +59,14 @@ def add_prune_args(parser):
         help="Reference texts. Please use | to separate different batches.",
     )
     group.add_argument(
-        "--pretrained-model-path",
-        type=str,
-        default=None,
-        help="HuggingFace pretrained model",
+        "--pretrained-model-path", type=str, default=None, help="HuggingFace pretrained model"
     )
     # Pruning parameters
     group.add_argument(
-        "--target-ffn-hidden-size",
-        type=int,
-        help="Prune MLP FFN hidden size to this value",
+        "--target-ffn-hidden-size", type=int, help="Prune MLP FFN hidden size to this value"
     )
     group.add_argument(
-        "--target-hidden-size",
-        type=int,
-        help="Prune hidden size (embedding dim) to this value",
+        "--target-hidden-size", type=int, help="Prune hidden size (embedding dim) to this value"
     )
     group.add_argument(
         "--target-num-attention-heads",
@@ -93,14 +89,10 @@ def add_prune_args(parser):
         help="Prune dimension of Mamba attention heads to this value",
     )
     group.add_argument(
-        "--target-num-moe-experts",
-        type=int,
-        help="Prune number of MoE experts to this value",
+        "--target-num-moe-experts", type=int, help="Prune number of MoE experts to this value"
     )
     group.add_argument(
-        "--target-moe-ffn-hidden-size",
-        type=int,
-        help="Prune MoE FFN hidden size to this value",
+        "--target-moe-ffn-hidden-size", type=int, help="Prune MoE FFN hidden size to this value"
     )
     group.add_argument(
         "--target-moe-shared-expert-intermediate-size",
@@ -169,7 +161,9 @@ def get_params(model):
     check_arguments(args)
 
     tokenizer = get_tokenizer()._tokenizer
-    model = get_model(functools.partial(model_provider, modelopt_gpt_mamba_builder), wrap_with_ddp=False)
+    model = get_model(
+        functools.partial(model_provider, modelopt_gpt_mamba_builder), wrap_with_ddp=False
+    )
     unwrapped_model = unwrap_model(model)[0]
 
     report_current_memory_info()
@@ -181,11 +175,11 @@ def get_params(model):
     if args.pretrained_model_path is not None:
         import_dtype = torch.float16 if args.fp16 else torch.bfloat16
         workspace_dir = os.environ.get("MLM_WORK_DIR", "/tmp")
+        import_kwargs = {"dtype": import_dtype}
+        if modelopt_version_at_least("0.41.0"):
+            import_kwargs.update({"trust_remote_code": args.trust_remote_code})
         import_mcore_gpt_from_hf(
-            unwrapped_model,
-            args.pretrained_model_path,
-            workspace_dir,
-            dtype=import_dtype,
+            unwrapped_model, args.pretrained_model_path, workspace_dir, **import_kwargs
         )
 
     def _custom_prompt_forward_loop_func(model):
@@ -211,7 +205,9 @@ def _hf_dataset_forword_loop_func(model):
             simple_generate(model, tokens.input_ids.cuda(), osl=1)
 
     if args.layers_to_drop:
-        mtp.mcore_minitron.drop_mcore_language_model_layers(model, layers_to_drop=args.layers_to_drop)
+        mtp.mcore_minitron.drop_mcore_language_model_layers(
+            model, layers_to_drop=args.layers_to_drop
+        )
     else:
         print_rank_0("Pruning model...")
         export_config = {
diff --git a/examples/post_training/modelopt/quantize.py b/examples/post_training/modelopt/quantize.py
index 737aed68b6a..635c18ee545 100644
--- a/examples/post_training/modelopt/quantize.py
+++ b/examples/post_training/modelopt/quantize.py
@@ -2,26 +2,49 @@
 
 """Sample Generate GPT."""
 
+import copy
 import functools
 import os
 import sys
 import warnings
 
 import torch
+import torch.distributed
 from datasets import load_dataset
 from tqdm import tqdm
 
 sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../")))
 
 import modelopt.torch.quantization as mtq
+
+try:
+    import modelopt.torch.quantization.plugins.psx_formats as mtq_psx
+except ImportError:
+    mtq_psx = None
+    warnings.warn(
+        "psx_formats is not installed. PSX formats quantization configs will not be available."
+    )
+
+try:
+    import modelopt.torch.quantization.plugins.luts as mtq_luts
+except ImportError:
+    mtq_luts = None
+    warnings.warn("luts is not installed. LUTs quantization configs will not be available.")
+
+
 from modelopt.torch.export import import_mcore_gpt_from_hf
 
+from megatron.core import parallel_state
 from megatron.core.transformer.moe.router import TopKRouter
 from megatron.post_training.arguments import add_modelopt_args
 from megatron.post_training.checkpointing import load_modelopt_checkpoint
 from megatron.post_training.generate import simple_generate
 from megatron.post_training.model_builder import modelopt_gpt_mamba_builder
-from megatron.post_training.utils import report_current_memory_info
+from megatron.post_training.utils import (
+    modelopt_version_at_least,
+    print_distributed_quant_summary,
+    report_current_memory_info,
+)
 from megatron.training import get_args, get_model, get_tokenizer, initialize_megatron
 from megatron.training.checkpointing import save_checkpoint
 from megatron.training.utils import print_rank_0, unwrap_model
@@ -29,7 +52,7 @@
 
 warnings.filterwarnings("ignore")
 
-
+# TODO deprecate these aliases in the next release
 QUANT_CFG_CHOICES = {
     "int8_sq": mtq.INT8_SMOOTHQUANT_CFG,
     "fp8": mtq.FP8_DEFAULT_CFG,
@@ -38,6 +61,23 @@
     "w4a8_awq": mtq.W4A8_AWQ_BETA_CFG,
     "nvfp4": mtq.NVFP4_DEFAULT_CFG,
 }
+for k in mtq.config.choices:
+    QUANT_CFG_CHOICES[k] = getattr(mtq, k)
+
+KV_QUANT_CFG_CHOICES = {
+    "none": "none",
+    "fp8": "FP8_KV_CFG",
+    "fp8_affine": "FP8_AFFINE_KV_CFG",
+    "nvfp4": "NVFP4_KV_CFG",
+    "nvfp4_affine": "NVFP4_AFFINE_KV_CFG",
+    "nvfp4_rotate": "NVFP4_KV_ROTATE_CFG",
+}
+
+if mtq_psx is not None:
+    QUANT_CFG_CHOICES.update({k: getattr(mtq_psx, k) for k in mtq_psx.choices})
+
+if mtq_luts is not None:
+    QUANT_CFG_CHOICES.update({k: getattr(mtq_luts, k) for k in mtq_luts.choices})
 
 
 def add_text_generate_ptq_args(parser):
@@ -46,6 +86,12 @@ def add_text_generate_ptq_args(parser):
     group.add_argument(
         "--calib-size", type=int, default=512, help="Samples to use for ptq calibration."
     )
+    group.add_argument(
+        "--calib-dataset",
+        type=str,
+        default="abisee/cnn_dailymail",
+        help="The default clibration dataset is cnn_dailymail from HF hub.",
+    )
     group.add_argument(
         "--prompts",
         type=str,
@@ -61,26 +107,30 @@ def add_text_generate_ptq_args(parser):
     group.add_argument(
         "--pretrained-model-path", type=str, default=None, help="HuggingFace pretrained model"
     )
-    group.add_argument(
-        "--compress",
-        action="store_true",
-        help="Enable real low-bit quantization.",
-    )
+    group.add_argument("--compress", action="store_true", help="Enable real low-bit quantization.")
     group.add_argument(
         "--disable-qkv-quant",
         action="store_true",
         help="Disable q, k, v linear from being quantized.",
     )
-    group.add_argument(
-        "--weight-only",
-        action="store_true",
-        help="Disable input quantization.",
-    )
+    group.add_argument("--weight-only", action="store_true", help="Disable input quantization.")
     group.add_argument(
         "--force-all-expert-routing",
         action="store_true",
         help="Forcing all experts to be routed during the calibration.",
     )
+    group.add_argument(
+        "--num-first-layers-to-skip-quant",
+        type=int,
+        default=None,
+        help="Number of first layers to skip quantization.",
+    )
+    group.add_argument(
+        "--num-last-layers-to-skip-quant",
+        type=int,
+        default=None,
+        help="Number of last layers to skip quantization.",
+    )
     add_modelopt_args(parser)
     return parser
 
@@ -97,6 +147,62 @@ def check_arguments():
         args.moe_grouped_gemm = False
 
 
+def _is_first_layers(name: str, num_layers: int = 1, num_layers_to_disable: int = 1) -> bool:
+    if "layers." not in name:
+        return False
+    try:
+        layer_idx = int(name.split("layers.")[-1].split(".")[0])
+    except ValueError:
+        return False
+    return layer_idx < num_layers_to_disable
+
+
+def _is_last_layers(name: str, num_layers: int = 1, num_layers_to_disable: int = 1) -> bool:
+    if "layers." not in name:
+        return False
+    try:
+        layer_idx = int(name.split("layers.")[-1].split(".")[0])
+    except ValueError:
+        return False
+    return layer_idx >= num_layers - num_layers_to_disable
+
+
+def get_first_layers_disabled_config(config, num_layers: int = 1, num_layers_to_disable: int = 1):
+    """Get a config for `mtq.quantize` with first & last `num_layers_to_disable` layers disabled.
+
+    The layers to disable are the first & last `num_layers_to_disable` layers.
+    """
+    config = copy.deepcopy(config)
+    quant_cfg = config.get("quant_cfg", {})
+    quant_cfg.update(
+        {
+            functools.partial(
+                _is_first_layers, num_layers=num_layers, num_layers_to_disable=num_layers_to_disable
+            ): {"enable": False}
+        }
+    )
+    config["quant_cfg"] = quant_cfg
+    return config
+
+
+def get_last_layers_disabled_config(config, num_layers: int = 1, num_layers_to_disable: int = 1):
+    """Get a config for `mtq.quantize` with last `num_layers_to_disable` layers disabled.
+
+    The layers to disable are the last `num_layers_to_disable` layers.
+    """
+    config = copy.deepcopy(config)
+    quant_cfg = config.get("quant_cfg", {})
+    quant_cfg.update(
+        {
+            functools.partial(
+                _is_last_layers, num_layers=num_layers, num_layers_to_disable=num_layers_to_disable
+            ): {"enable": False}
+        }
+    )
+    config["quant_cfg"] = quant_cfg
+    return config
+
+
 def get_modelopt_torch_quantization_config():
     """Return a quantization config."""
     args = get_args()
@@ -108,8 +214,6 @@ def get_modelopt_torch_quantization_config():
         "axis": None,
         "enable": True,
     }
-    # Disable mamba-mixer quantization for now.
-    mtq_config["quant_cfg"]["*mixer.*"] = {"enable": False}
     if args.export_quant_cfg == "fp8":
         # Enable Medusa heads and kv-cache quantization
         mtq_config["quant_cfg"]["*medusa_heads**"] = fp8_config
@@ -125,17 +229,38 @@ def get_modelopt_torch_quantization_config():
     # Customization
     if args.disable_qkv_quant:
         mtq_config["quant_cfg"]["*self_attention*"] = {"enable": False}
-    if args.export_kv_cache_quant and not args.compress:
-        mtq_config["quant_cfg"]["*linear_qkv.output_quantizer"] = fp8_config
+
+    # KV Cache Quantization
+    enable_quant_kv_cache = args.export_kv_cache_quant != "none"
+    if enable_quant_kv_cache and not args.compress:
+        kv_cache_quant_cfg = getattr(mtq, KV_QUANT_CFG_CHOICES[args.export_kv_cache_quant])[
+            "quant_cfg"
+        ]
+        mtq_config = mtq.utils.update_quant_cfg_with_kv_cache_quant(mtq_config, kv_cache_quant_cfg)
+
+    # Weight Only Quantization
     if args.weight_only:
         mtq_config["quant_cfg"]["*input_quantizer"] = {"enable": False}
+    if args.num_first_layers_to_skip_quant is not None:
+        mtq_config = get_first_layers_disabled_config(
+            mtq_config,
+            num_layers=args.num_layers,
+            num_layers_to_disable=args.num_first_layers_to_skip_quant,
+        )
+    if args.num_last_layers_to_skip_quant is not None:
+        mtq_config = get_last_layers_disabled_config(
+            mtq_config,
+            num_layers=args.num_layers,
+            num_layers_to_disable=args.num_last_layers_to_skip_quant,
+        )
 
     return mtq_config
 
 
 def get_calib_dataloader(calib_size=512, max_sequence_length=512):
     """Return a dataloader for calibration."""
-    dataset = load_dataset("cnn_dailymail", name="3.0.0", split="train")
+    args = get_args()
+    dataset = load_dataset(args.calib_dataset, name="3.0.0", split="train")
     text_column = "article"
 
     calib_size = min(len(dataset), calib_size)
@@ -158,7 +283,9 @@ def get_calib_dataloader(calib_size=512, max_sequence_length=512):
     args = get_args()
 
     tokenizer = get_tokenizer()._tokenizer
-    model = get_model(functools.partial(model_provider, modelopt_gpt_mamba_builder), wrap_with_ddp=False)
+    model = get_model(
+        functools.partial(model_provider, modelopt_gpt_mamba_builder), wrap_with_ddp=False
+    )
 
     report_current_memory_info()
 
@@ -168,14 +295,15 @@ def get_calib_dataloader(calib_size=512, max_sequence_length=512):
 
     if args.pretrained_model_path is not None:
         from modelopt.torch.export import import_mcore_gpt_from_hf
+
         import_dtype = torch.float16 if args.fp16 else torch.bfloat16
         unwrapped_model = unwrap_model(model)[0]
         workspace_dir = os.environ.get("MLM_WORK_DIR", "/tmp")
+        import_kwargs = {"dtype": import_dtype}
+        if modelopt_version_at_least("0.41.0"):
+            import_kwargs.update({"trust_remote_code": args.trust_remote_code})
         import_mcore_gpt_from_hf(
-            unwrapped_model,
-            args.pretrained_model_path,
-            workspace_dir,
-            dtype=import_dtype,
+            unwrapped_model, args.pretrained_model_path, workspace_dir, **import_kwargs
         )
 
     def _custom_prompt_forward_loop_func(model):
@@ -196,23 +324,20 @@ def _custom_prompt_forward_loop_func(model):
     def _hf_dataset_forword_loop_func(model):
         dataloader = get_calib_dataloader(args.calib_size)
 
-        if args.force_all_expert_routing:
-            for name, module in model.named_modules():
-                if isinstance(module, TopKRouter):
-                    module.topk = module.num_experts
-
         for prompt in tqdm(dataloader, total=args.calib_size, disable=torch.distributed.get_rank()):
             tokens = tokenizer(prompt, return_tensors="pt")
             generated_ids = simple_generate(model, tokens.input_ids.cuda(), osl=1)
 
-            if args.force_all_expert_routing:
-                for name, module in model.named_modules():
-                    if isinstance(module, TopKRouter):
-                        module.topk = module.config.moe_router_topk
-
     unwrapped_model = unwrap_model(model)[0]
 
-    if args.export_quant_cfg in QUANT_CFG_CHOICES:
+    if args.force_all_expert_routing:
+        warnings.warn(
+            "--force-all-expert-routing will be deprecated in the next release and is no longer needed."
+        )
+
+    if args.export_quant_cfg is not None:
+        if args.export_quant_cfg not in QUANT_CFG_CHOICES:
+            raise ValueError(f"Unsupported quantization config {args.export_quant_cfg}.")
         print_rank_0("Quantizing the model...")
         mtq_config = get_modelopt_torch_quantization_config()
         ptq_forward_loop_func = _hf_dataset_forword_loop_func
@@ -230,19 +355,9 @@ def _hf_dataset_forword_loop_func(model):
             mtq.compress(unwrapped_model)
             print_rank_0("Weights are now compressed to low-bit!")
 
-    print_rank_0(f"Fake Quantized Model:\n {unwrapped_model}")
-
-    if torch.distributed.get_rank() == 0:
-        for k, v in unwrapped_model.state_dict().items():
-            if "amax" not in k and "_scale" not in k:
-                continue
-            if isinstance(v, torch.Tensor):
-                v_amax = torch.max(torch.abs(v.clone().detach().to(torch.bfloat16)))
-                print("{:80} {:32} {:32} max {:.4e}".format(k, str(v.dtype), str(v.shape), v_amax))
-            else:
-                print("{:80}".format(k))
+        print_distributed_quant_summary(model, "Quantized Model:")
 
     _custom_prompt_forward_loop_func(unwrapped_model)
 
-    if args.save is not None and args.export_quant_cfg in QUANT_CFG_CHOICES:
+    if args.save is not None:
         save_checkpoint(1, model, None, None, 0, release=True)
diff --git a/examples/post_training/modelopt/quantize.sh b/examples/post_training/modelopt/quantize.sh
old mode 100644
new mode 100755
diff --git a/examples/post_training/modelopt/requirements.txt b/examples/post_training/modelopt/requirements.txt
index dd1f47ef6c8..3c763e01cc1 100644
--- a/examples/post_training/modelopt/requirements.txt
+++ b/examples/post_training/modelopt/requirements.txt
@@ -1,9 +1,6 @@
+diskcache
 datasets
-jsonlines
 nvidia-modelopt
 omegaconf
-pulp
 tensorstore!=0.1.46,!=0.1.72
-torchprofile
 transformers
-zarr
diff --git a/examples/post_training/modelopt/speculative.md b/examples/post_training/modelopt/speculative.md
index 064d56768cc..6ea9dea9478 100755
--- a/examples/post_training/modelopt/speculative.md
+++ b/examples/post_training/modelopt/speculative.md
@@ -43,7 +43,7 @@ and acceptance rate (AR).
 For simplicity and efficiency, we use `vllm serve --quantization modelopt` to host an quantized
 endpoint and we feed multi-turn conversation data to synthesize the assistant output.
 See ModelOpt's example (https://github.com/NVIDIA/Model-Optimizer/tree/main/speculative_decoding)
-for more details. The final output is stored as jsonlines in an OpenAI chat completion format.
+for more details. The final output is stored as `jsonlines` in an OpenAI chat completion format.
 
 
 ### Quantization-Aware Training (QAT)
diff --git a/examples/post_training/modelopt/validate.sh b/examples/post_training/modelopt/validate.sh
old mode 100644
new mode 100755
diff --git a/examples/rl/environment_configs/gsm8k.yaml b/examples/rl/environment_configs/gsm8k.yaml
index ae0a319d9df..dc0f34dd4ca 100644
--- a/examples/rl/environment_configs/gsm8k.yaml
+++ b/examples/rl/environment_configs/gsm8k.yaml
@@ -1,5 +1,6 @@
 - agent_type: examples.rl.environments.math.gsm8k_agent.GSM8KAgent
   agent_args:
     answer_format: "boxed"
+    format_reward: 0.5
   weight: 1.0
   evaluation_only: false
diff --git a/examples/rl/environment_configs/gsm8k_nanov3.yaml b/examples/rl/environment_configs/gsm8k_nanov3.yaml
new file mode 100644
index 00000000000..30403ed052b
--- /dev/null
+++ b/examples/rl/environment_configs/gsm8k_nanov3.yaml
@@ -0,0 +1,10 @@
+- agent_type: examples.rl.environments.math.gsm8k_agent.GSM8KAgent
+  agent_args:
+    answer_format: "boxed"
+    format_reward: 0.5
+    assistant_suffix: "Assistant: "
+    chat_mode: true
+    negative_reward: 0.0
+    partial_end_reward: 0.75
+  weight: 1.0
+  evaluation_only: false
diff --git a/examples/rl/environments/math/gsm8k_agent.py b/examples/rl/environments/math/gsm8k_agent.py
index 348ba655dbb..3bb39bc09f9 100644
--- a/examples/rl/environments/math/gsm8k_agent.py
+++ b/examples/rl/environments/math/gsm8k_agent.py
@@ -23,8 +23,23 @@
 
 
 class GSM8KAgent(MathAgent):
-    def __init__(self, answer_format: str = "boxed", format_reward: float = 0.0, **kwargs):
-        super().__init__(format_reward=format_reward, answer_format=answer_format, **kwargs)
+    def __init__(self,
+        answer_format: str = "boxed",
+        chat_mode: bool = False,
+        assistant_suffix: str = "Assistant: Let me solve this step by step.\n<think>",
+        format_reward: float = 0.0,
+        negative_reward: float = 0.0,
+        partial_end_reward: float = 0.0,
+        **kwargs):
+        super().__init__(
+            answer_format=answer_format,
+            chat_mode=chat_mode,
+            assistant_suffix=assistant_suffix,
+            format_reward=format_reward,
+            negative_reward=negative_reward,
+            partial_end_reward=partial_end_reward,
+            **kwargs
+        )
         self.env_id: str = "gsm8k"
 
     def reformat_datum(self, datum: dict) -> dict:
diff --git a/examples/rl/environments/math/math_agent.py b/examples/rl/environments/math/math_agent.py
index d63e3f25623..67feb3b4adb 100644
--- a/examples/rl/environments/math/math_agent.py
+++ b/examples/rl/environments/math/math_agent.py
@@ -21,15 +21,38 @@
     MATHVERIFY_AVAILABLE
 ), "math_verify is not installed but now required. Install it using `pip install math-verify` to continue."
 
-NEGATIVE_REWARD = 0.0
-
-
 class MathAgent(RewardOnlyAgent):
-    def __init__(self, format_reward: float = 0.0, answer_format: str = "tagged", **kwargs):
+    def __init__(self,
+        format_reward: float = 0.0,
+        answer_format: str = "tagged",
+        assistant_suffix: str = "Assistant: Let me solve this step by step.\n<think>",
+        chat_mode: bool = False,
+        negative_reward: float = 0.0,
+        partial_end_reward: float = 0.0,
+        **kwargs):
+        """
+        Args:
+            format_reward (float): Reward given when the answer is in the expected format,
+                even if the answer is incorrect or is missing the end-of-text token.
+            answer_format (str): Which answer format is expected: "tagged" for <answer> tags,
+                or "boxed" for \boxed{} LaTeX formatting.
+            assistant_suffix (str): The suffix string included in the assistant's response, typically to
+                guide the assistant's output format and "persona". For example, "Let me solve this step by step."
+            chat_mode (bool): If True, agent operates in a chat (conversational) context.
+            negative_reward (float): Reward assigned for a clearly incorrect or unparseable answer.
+            partial_end_reward (float): Reward when the answer is correct but an expected end token is not matched exactly.
+            **kwargs: Additional arguments for the base RewardOnlyAgent.
+        """
         super().__init__(**kwargs)
+
         assert answer_format in ["tagged", "boxed"], "Invalid answer format"
+
         self.format_reward = format_reward
         self.answer_format = answer_format
+        self.assistant_suffix = assistant_suffix
+        self.chat_mode = chat_mode
+        self.negative_reward = negative_reward
+        self.partial_end_reward = partial_end_reward
 
     def compute_score(self, response: str, golden: dict, golden_key: str = "answer") -> float:
         """Take a response and a golden answer and return a score. Supports tagged or boxed answers.
@@ -37,32 +60,70 @@ def compute_score(self, response: str, golden: dict, golden_key: str = "answer")
         Uses the final answer in the response string to compute the score.
         """
         # Allow <answer> tags or \boxed{} tags (this is a bit of cheating in favor of deepseek distilled models I think)
-        for pattern in [
-            r'<answer>(.*?)</answer>',
-            r"\\boxed\{((?:[^{}]|\{(?:[^{}]|\{[^{}]*\})*\})*)\}",
-        ]:
-            match = re.finditer(pattern, response, re.DOTALL)
-            matches = list(match)
-            if matches:
-                final_answer = matches[-1].group(1).strip()
-                break
-        else:
-            # Did not format the answer correctly
-            return NEGATIVE_REWARD
+        matched_format = None
+        end_tokens = ["<|end_of_text|>", "<|endoftext|>", "</s>"]
 
-        try:
-            parsed_answer = parse(final_answer)
-        except ValueError as e:
-            print("Failed to parse the answer.")
-            traceback.print_stack()
-            return NEGATIVE_REWARD
+        # Only an answer immediately followed by a known end token yields 1.0 reward.
+        answer_tag_pattern = r'<answer>(.*?)</answer>'
+        answer_tag_match = list(re.finditer(answer_tag_pattern, response, re.DOTALL))
+        if answer_tag_match:
+            # Only consider the last occurrence
+            last_match = answer_tag_match[-1]
+            final_answer = last_match.group(1).strip()
+            after = response[last_match.end():].lstrip()  # strip whitespace between </answer> and token
 
-        correct_answer = verify(str(golden[golden_key]), parsed_answer)
-        if correct_answer:
-            return 1.0
+            try:
+                parsed_answer = parse(final_answer)
+            except ValueError as e:
+                print("Failed to parse the answer.")
+                traceback.print_stack()
+                return self.negative_reward
+
+            correct_answer = verify(str(golden[golden_key]), parsed_answer)
+            if correct_answer:
+                # Accept either <|end_of_text|> or <|endoftext|> as valid terminators, for flexibility.
+                for token in end_tokens:
+                    if after.startswith(token):
+                        return 1.0
+                # If the end token is present later (extra text before it), give partial credit.
+                for token in end_tokens:
+                    if token in after:
+                        return self.partial_end_reward
+                # If a correct answer but missing immediate end, give format reward (not NEGATIVE_REWARD).
+                return self.format_reward
+            else:
+                # Incorrect answer, regardless of format/end-of-text
+                return self.format_reward
         else:
-            # Formatting is correct but the answer is incorrect
-            return self.format_reward
+            # Fallback: check boxed answer format for diagnostic/format reward as before
+            boxed_pattern = r"\\boxed\{((?:[^{}]|\{(?:[^{}]|\{[^{}]*\})*\})*)\}"
+            boxed_match = list(re.finditer(boxed_pattern, response, re.DOTALL))
+            if boxed_match:
+                last_match = boxed_match[-1]
+                final_answer = last_match.group(1).strip()
+                after = response[last_match.end():].lstrip()
+                try:
+                    parsed_answer = parse(final_answer)
+                except ValueError as e:
+                    print("Failed to parse the answer.")
+                    traceback.print_stack()
+                    return self.negative_reward
+
+                correct_answer = verify(str(golden[golden_key]), parsed_answer)
+                if correct_answer:
+                    for token in end_tokens:
+                        if after.startswith(token):
+                            return 1.0
+                    for token in end_tokens:
+                        if token in after:
+                            return self.partial_end_reward
+                    return self.format_reward
+                else:
+                    # Formatting is correct but the answer is incorrect
+                    return self.format_reward
+            else:
+                # Did not format the answer correctly
+                return self.negative_reward
 
     def make_prefix(self, problem_key: str = "problem", **kwargs) -> str:
         """Take a string math problem and return the prompt. Supports requesting tagged or boxed answers. Supports chat mode prompts."""
@@ -80,6 +141,5 @@ def make_prefix(self, problem_key: str = "problem", **kwargs) -> str:
     The question will be a word math problem. Show your work in <think> </think> tags. 
     {answer_format}
     User: {kwargs[problem_key]}
-    Assistant: Let me solve this step by step.
-    <think>"""
+    {self.assistant_suffix}"""
         return prefix
diff --git a/examples/rl/model_configs/common.sh b/examples/rl/model_configs/common.sh
index fb168f1f153..4f6ca0e18cf 100644
--- a/examples/rl/model_configs/common.sh
+++ b/examples/rl/model_configs/common.sh
@@ -24,6 +24,7 @@ COMMON_OPTIONS="\
     --log-timers-to-tensorboard \
     --save-retain-interval 120 \
     --inference-dynamic-batching-num-cuda-graphs 1 \
+    --inference-dynamic-batching-unified-memory-level 1 \
     --adam-beta1 0.9 \
     --adam-beta2 ${ADAM_BETA2:-0.95} \
     --adam-eps 1e-8 \
diff --git a/examples/rl/model_configs/nemotron5p5_12b_H.sh b/examples/rl/model_configs/nemotron5p5_12b_H.sh
index a6248618e5e..9e97051e087 100644
--- a/examples/rl/model_configs/nemotron5p5_12b_H.sh
+++ b/examples/rl/model_configs/nemotron5p5_12b_H.sh
@@ -137,6 +137,6 @@ MODEL_OPTIONS="\
     --straggler-minmax-count 16 \
     --check-weight-hash-across-dp-replicas-interval 20000 \
     --rerun-mode disabled \
-    --grpo-default-temperature 0.9 \
-    --grpo-default-top-p 0.95 \
+    --rl-default-temperature 0.9 \
+    --rl-default-top-p 0.95 \
   "
diff --git a/examples/rl/model_configs/nemotron6_3b_moe.sh b/examples/rl/model_configs/nemotron6_3b_moe.sh
new file mode 100644
index 00000000000..8efe0b2debb
--- /dev/null
+++ b/examples/rl/model_configs/nemotron6_3b_moe.sh
@@ -0,0 +1,128 @@
+#!/bin/bash
+TP=${TP:-2}
+PP=${PP:-1}
+EP=${EP:-32}
+NODES_REQUIRED=${NODES_REQUIRED:-4}
+LLM="nemotron6_3b_moe"
+
+ROOT_DIR="/lustre/fsw/portfolios/llmservice/projects/llmservice_nlp_fm/nemotron6"
+
+CHECKPOINT="${ROOT_DIR}/3b_hybrid_moe/checkpoints/phase2_lc_reinit_emb/"
+
+TOKENIZER_MODEL="${ROOT_DIR}/tokenizers/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json"
+
+echo "Using Nemotron6 3B MOE model checkpoint"
+SCRIPT_PATH="${BASH_SOURCE[0]}"
+source $(dirname $SCRIPT_PATH)/common.sh
+
+# In all cases, one can override those values.
+# However, running without envs will give you some
+# good perf out of the box for established envs.
+if [ "$(basename "$ENV_CONFIG")" = "dapo.yaml" ]; then
+  echo "Using DAPO environment config"
+  GRPO_CLAMP_EPS_LOWER=${GRPO_CLAMP_EPS_LOWER:-0.2}
+  GRPO_CLAMP_EPS_UPPER=${GRPO_CLAMP_EPS_UPPER:-0.28}
+  MAX_INFERENCE_BS=${MAX_INFERENCE_BS:-32}
+  GRPO_GROUP_SIZE=${GRPO_GROUP_SIZE:-16}
+  GRPO_PROMPTS_PER_STEP=${GRPO_PROMPTS_PER_STEP:-64}
+  GRPO_ITERATIONS=${GRPO_ITERATIONS:-1}
+  GRPO_KL_BETA=${GRPO_KL_BETA:-"0.0"}
+  TRAINING_BATCH_SIZE=${TRAINING_BATCH_SIZE:-1024}
+  MICRO_BATCH_SIZE=${MICRO_BATCH_SIZE:-1}
+  MAX_SEQ_LENGTH=${MAX_SEQ_LENGTH:-11999}
+  EXIT_INTERVAL=${EXIT_INTERVAL:-20}
+  CHKPT_SAVE_INTERVAL=${CHKPT_SAVE_INTERVAL:-20}
+else
+  # Some default values if config is unsupported.
+  echo "Undected environment config, using default values"
+  GRPO_CLAMP_EPS_LOWER=${GRPO_CLAMP_EPS_LOWER:-0.2}
+  GRPO_CLAMP_EPS_UPPER=${GRPO_CLAMP_EPS_UPPER:-0.28}
+  MAX_INFERENCE_BS=${MAX_INFERENCE_BS:-64}
+  GRPO_GROUP_SIZE=${GRPO_GROUP_SIZE:-2}
+  GRPO_PROMPTS_PER_STEP=${GRPO_PROMPTS_PER_STEP:-16}
+  GRPO_ITERATIONS=${GRPO_ITERATIONS:-1}
+  GRPO_KL_BETA=${GRPO_KL_BETA:-"0.0"}
+  TRAINING_BATCH_SIZE=${TRAINING_BATCH_SIZE:-32}
+  MICRO_BATCH_SIZE=${MICRO_BATCH_SIZE:-1}
+  MAX_SEQ_LENGTH=${MAX_SEQ_LENGTH:-1024}
+  EXIT_INTERVAL=${EXIT_INTERVAL:-20}
+  CHKPT_SAVE_INTERVAL=${CHKPT_SAVE_INTERVAL:-20}
+fi
+
+ENV_DEPENDENT="\
+  --micro-batch-size $MICRO_BATCH_SIZE \
+  --global-batch-size $TRAINING_BATCH_SIZE \
+  --grpo-group-size $GRPO_GROUP_SIZE \
+  --grpo-prompts-per-step $GRPO_PROMPTS_PER_STEP \
+  --grpo-iterations $GRPO_ITERATIONS \
+  --grpo-clamp-eps-lower $GRPO_CLAMP_EPS_LOWER \
+  --grpo-clamp-eps-upper $GRPO_CLAMP_EPS_UPPER \
+  --grpo-kl-beta $GRPO_KL_BETA \
+  --langrl-env-config $ENV_CONFIG "
+
+MODEL_OPTIONS="\
+  --rl-skip-bos-token \
+  --no-rl-use-sequence-packing \
+  --rl-partial-rollouts \
+  --rl-offload-optimizer-during-inference \
+  --moe-pad-experts-for-cuda-graph-inference \
+  --inference-dynamic-batching-max-tokens 8192 \
+  --inference-dynamic-batching-max-requests 128 \
+  --inference-dynamic-batching-num-cuda-graphs 2 \
+  --decode-only-cuda-graphs \
+  --cuda-graph-impl local \
+  --cuda-graph-scope full \
+  --use-checkpoint-args \
+  --enable-experimental \
+  --cross-entropy-loss-fusion \
+  --cross-entropy-fusion-impl native \
+  --moe-aux-loss-coeff 0.0 \
+  --moe-router-dtype fp64 \
+  --moe-router-load-balancing-type aux_loss \
+  --moe-router-score-function sigmoid \
+  --moe-token-dispatcher-type alltoall \
+  --moe-router-enable-expert-bias \
+  --moe-router-topk-scaling-factor 2.5 \
+  --disable-gloo-process-groups \
+  --rl-default-top-k -1 \
+  --rl-default-temperature 1.0 \
+  --rl-default-top-p 1.0 \
+  --rl-inference-logprobs-is-correction \
+  --rl-importance-sampling-truncation-coef 10.0 \
+  --seq-length $MAX_SEQ_LENGTH \
+  --inference-max-seq-length $MAX_SEQ_LENGTH \
+  --inference-max-batch-size $MAX_INFERENCE_BS \
+  --pretrained-checkpoint $CHECKPOINT \
+  --distributed-timeout-minutes 60 \
+  --use-mcore-models \
+  --no-mmap-bin-files \
+  --disable-bias-linear \
+  --norm-epsilon 1e-5 \
+  --init-method-std 0.014 \
+  --exit-duration-in-mins 5750 \
+  --max-position-embeddings $MAX_SEQ_LENGTH \
+  --tensor-model-parallel-size $TP  \
+  --pipeline-model-parallel-size $PP  \
+  --expert-model-parallel-size $EP \
+  --expert-tensor-parallel-size 1 \
+  --weight-decay 0.01 \
+  --clip-grad 1.0 \
+  --tiktoken-pattern v2 \
+  --tokenizer-type TikTokenizer \
+  --tokenizer-model ${TOKENIZER_MODEL} \
+  --dist-ckpt-strictness log_unexpected
+  --ckpt-format torch_dist \
+  --ckpt-fully-parallel-save \
+  --ckpt-fully-parallel-load \
+  --use-distributed-optimizer \
+  --overlap-grad-reduce \
+  --overlap-param-gather \
+  --no-create-attention-mask-in-dataloader \
+  --lr 3e-6 \
+  --min-lr 3e-6 \
+  --lr-decay-style constant \
+  --lr-warmup-samples 640 \
+  --lr-warmup-init 0.3e-7 \
+  --no-load-optim \
+  --no-load-rng \
+  "
diff --git a/examples/rl/model_configs/qwen3_30b_a3b_moe.sh b/examples/rl/model_configs/qwen3_30b_a3b_moe.sh
new file mode 100644
index 00000000000..775a9587ba4
--- /dev/null
+++ b/examples/rl/model_configs/qwen3_30b_a3b_moe.sh
@@ -0,0 +1,97 @@
+#!/bin/bash 
+
+TP=${TP:-4}
+PP=${PP:-1}
+NODES_REQUIRED=${NODES_REQUIRED:-1}
+
+echo "Using Qwen3-30B-A3B model checkpoint"
+SCRIPT_PATH="${BASH_SOURCE[0]}"
+source $(dirname $SCRIPT_PATH)/common.sh
+
+# Default values
+GRPO_CLAMP_EPS_LOWER=${GRPO_CLAMP_EPS_LOWER:-0.2}
+GRPO_CLAMP_EPS_UPPER=${GRPO_CLAMP_EPS_UPPER:-0.2}
+MAX_INFERENCE_BS=${MAX_INFERENCE_BS:-32}
+GRPO_GROUP_SIZE=${GRPO_GROUP_SIZE:-16}
+GRPO_PROMPTS_PER_STEP=${GRPO_PROMPTS_PER_STEP:-64}
+GRPO_ITERATIONS=${GRPO_ITERATIONS:-1}
+GRPO_KL_BETA=${GRPO_KL_BETA:-"0.0"}
+TRAINING_BATCH_SIZE=${TRAINING_BATCH_SIZE:-256}
+MICRO_BATCH_SIZE=${MICRO_BATCH_SIZE:-1}
+MAX_SEQ_LENGTH=${MAX_SEQ_LENGTH:-8192}
+EXIT_INTERVAL=${EXIT_INTERVAL:-20}
+CHKPT_SAVE_INTERVAL=${CHKPT_SAVE_INTERVAL:-20}
+
+ENV_DEPENDENT="\
+  --micro-batch-size $MICRO_BATCH_SIZE \
+  --global-batch-size $TRAINING_BATCH_SIZE \
+  --grpo-group-size $GRPO_GROUP_SIZE \
+  --grpo-prompts-per-step $GRPO_PROMPTS_PER_STEP \
+  --grpo-iterations $GRPO_ITERATIONS \
+  --grpo-clamp-eps-lower $GRPO_CLAMP_EPS_LOWER \
+  --grpo-clamp-eps-upper $GRPO_CLAMP_EPS_UPPER \
+  --grpo-kl-beta $GRPO_KL_BETA \
+  --langrl-env-config $ENV_CONFIG "
+
+
+MODEL_OPTIONS="
+--seq-length $MAX_SEQ_LENGTH \
+--inference-max-seq-length $MAX_SEQ_LENGTH \
+--inference-max-batch-size $MAX_INFERENCE_BS \
+--pretrained-checkpoint $CHECKPOINT \
+--no-use-tokenizer-model-from-checkpoint-args \
+--seq-length 8192 \
+--inference-max-seq-length 8192 \
+--bf16 \
+--tensor-model-parallel-size $TP  \
+--pipeline-model-parallel-size $PP  \
+--expert-model-parallel-size $EP \
+--attention-backend flash \
+--transformer-impl transformer_engine \
+--te-rng-tracker \
+--tokenizer-type HuggingFaceTokenizer \
+--tokenizer-model Qwen/Qwen3-30B-A3B \
+--untie-embeddings-and-output-weights \
+--num-layers 48 \
+--hidden-size 2048 \
+--ffn-hidden-size 6144 \
+--num-attention-heads 32 \
+--kv-channels 128 \
+--max-position-embeddings 8192 \
+--group-query-attention \
+--num-query-groups 4 \
+--normalization RMSNorm \
+--norm-epsilon 1e-6 \
+--position-embedding-type rope \
+--rotary-percent 1.0 \
+--rotary-base 1000000 \
+--use-rotary-position-embeddings \
+--swiglu \
+--disable-bias-linear \
+--num-experts 128 \
+--moe-router-topk 8 \
+--moe-ffn-hidden-size 768 \
+--moe-aux-loss-coeff 0.001 \
+--moe-router-load-balancing-type aux_loss \
+--attention-dropout 0.0 \
+--hidden-dropout 0.0 \
+--no-masked-softmax-fusion \
+--attention-softmax-in-fp32 \
+--vocab-size 151936 \
+--make-vocab-size-divisible-by 128 \
+--dist-ckpt-strictness log_unexpected \
+--qk-layernorm \
+--moe-token-dispatcher-type alltoall \
+--moe-layer-freq 1 \
+--optimizer adam \
+--adam-beta1 0.9 \
+--adam-beta2 0.999 \
+--adam-eps 1e-8 \
+--lr 1e-6 \
+--min-lr 1e-7 \
+--lr-warmup-samples 0 \
+--clip-grad 1.0 \
+--weight-decay 0.01 \
+--no-load-optim \
+--ckpt-format torch_dist
+"
diff --git a/gpt_builders.py b/gpt_builders.py
index 0be64edaab6..a86d3af100b 100644
--- a/gpt_builders.py
+++ b/gpt_builders.py
@@ -11,7 +11,6 @@
 )
 from megatron.core.models.gpt.experimental_attention_variant_module_specs import (
     get_transformer_block_with_experimental_attention_variant_spec,
-    get_transformer_layer_with_experimental_attention_variant_spec,
 )
 from megatron.core.models.gpt.heterogeneous.heterogeneous_layer_specs import (
     get_gpt_heterogeneous_layer_spec,
@@ -77,19 +76,13 @@ def gpt_builder(args, pre_process, post_process, vp_stage=None, config=None, pg_
                 mtp_transformer_layer_spec = import_module(args.spec)
             else:
                 # Define the decoder block spec
-                if args.experimental_attention_variant is not None:
-                    decoder_layer_specs = (
-                        get_transformer_layer_with_experimental_attention_variant_spec(
-                            config=config
-                        )
-                    )
-                else:
-                    decoder_layer_specs = get_gpt_decoder_layer_specs(
-                        config,
-                        use_transformer_engine=use_te,
-                        normalization=args.normalization,
-                        qk_l2_norm=args.qk_l2_norm,
-                    )
+                decoder_layer_specs = get_gpt_decoder_layer_specs(
+                    config,
+                    use_transformer_engine=use_te,
+                    normalization=args.normalization,
+                    qk_l2_norm=args.qk_l2_norm,
+                    vp_stage=vp_stage,
+                )
                 mtp_transformer_layer_spec = decoder_layer_specs[-1]
             # Use spec of the last layer in decoder block as spec of the transformer layer in MTP
             mtp_block_spec = get_gpt_mtp_block_spec(
diff --git a/mamba_builders.py b/mamba_builders.py
index 40e8480d635..6a792ba6ea5 100644
--- a/mamba_builders.py
+++ b/mamba_builders.py
@@ -16,6 +16,7 @@ def mamba_builder(args, pre_process, post_process, vp_stage=None, config=None, p
 
     if config.transformer_impl == "inference_optimized":
         mamba_stack_spec = mamba_inference_stack_spec 
+        assert not config.inference_fuse_tp_communication, "inference_fuse_tp_communication is not supported for Mamba"
     elif args.spec is not None:
         mamba_stack_spec = import_module(args.spec)
     else:
diff --git a/megatron/core/QuickStart.md b/megatron/core/QuickStart.md
index dedde653db1..4aa966a164d 100644
--- a/megatron/core/QuickStart.md
+++ b/megatron/core/QuickStart.md
@@ -21,7 +21,7 @@ This guide for Megatron Core walks you through the following tasks:
 
     git clone https://github.com/NVIDIA/Megatron-LM.git
     cd Megatron-LM
-    pip install -U setuptools packaging
+    pip install -U "setuptools<80.0.0,>=77.0.0" packaging
     pip install --no-build-isolation .[dev]
     ```
 
diff --git a/megatron/core/datasets/blended_dataset.py b/megatron/core/datasets/blended_dataset.py
index e5c1915bc2b..802a9770506 100644
--- a/megatron/core/datasets/blended_dataset.py
+++ b/megatron/core/datasets/blended_dataset.py
@@ -86,9 +86,23 @@ def __init__(
         self.dataset_index, self.dataset_sample_index = self._build_indices()
 
     def __len__(self) -> int:
+        if self.config.defer_npy_index_mmap:
+            size = sum(self.weights)
+            if self.size is not None:
+                size = self.size
+            return size
+
         return self.dataset_index.shape[0]
 
     def __getitem__(self, idx: int) -> Dict[str, Union[int, numpy.ndarray]]:
+        if self.dataset_index is None:
+            self.dataset_index = numpy.load(
+                self.path_to_dataset_index, allow_pickle=True, mmap_mode="r"
+            )
+            self.dataset_sample_index = numpy.load(
+                self.path_to_dataset_sample_index, allow_pickle=True, mmap_mode="r"
+            )
+
         dataset_id = self.dataset_index[idx]
         dataset_sample_id = self.dataset_sample_index[idx]
         return {"dataset_id": dataset_id, **self.datasets[dataset_id][dataset_sample_id]}
@@ -103,6 +117,15 @@ def _build_indices(self) -> Tuple[numpy.ndarray, numpy.ndarray]:
         Returns:
             Tuple[numpy.ndarray, numpy.ndarray]: The dataset index and the dataset sample index
         """
+        if self.config.defer_npy_index_mmap:
+            # NOTE(asolergi-nv): Direct path to lazy memmap the indexes
+            get_path_to = lambda suffix: os.path.join(
+                self.config.path_to_cache,
+                f"{self.unique_description_hash}-{type(self).__name__}-{self.split.name}-{suffix}",
+            )
+            self.path_to_dataset_index = get_path_to("dataset_index.npy")
+            self.path_to_dataset_sample_index = get_path_to("dataset_sample_index.npy")
+            return None, None
 
         path_to_cache = self.config.path_to_cache
 
@@ -114,10 +137,14 @@ def _build_indices(self) -> Tuple[numpy.ndarray, numpy.ndarray]:
             path_to_description = get_path_to("description.txt")
             path_to_dataset_index = get_path_to("dataset_index.npy")
             path_to_dataset_sample_index = get_path_to("dataset_sample_index.npy")
-            cache_hit = all(
-                map(
-                    os.path.isfile,
-                    [path_to_description, path_to_dataset_index, path_to_dataset_sample_index],
+            cache_hit = (
+                True
+                if self.config.fast_cache_load
+                else all(
+                    map(
+                        os.path.isfile,
+                        [path_to_description, path_to_dataset_index, path_to_dataset_sample_index],
+                    )
                 )
             )
         else:
diff --git a/megatron/core/datasets/blended_megatron_dataset_builder.py b/megatron/core/datasets/blended_megatron_dataset_builder.py
index 5ad3f7389a2..6cb75ab5104 100644
--- a/megatron/core/datasets/blended_megatron_dataset_builder.py
+++ b/megatron/core/datasets/blended_megatron_dataset_builder.py
@@ -10,6 +10,7 @@
 
 from megatron.core.datasets.blended_dataset import BlendedDataset
 from megatron.core.datasets.blended_megatron_dataset_config import BlendedMegatronDatasetConfig
+from megatron.core.datasets.gpt_dataset import GPTDatasetConfig
 from megatron.core.datasets.megatron_dataset import LowLevelDataset, MegatronDataset
 from megatron.core.datasets.utils import Split, normalize
 from megatron.core.utils import log_single_rank
@@ -215,7 +216,14 @@ def _build_blended_dataset_splits(self) -> List[Optional[TopLevelDataset]]:
                     blended_datasets[i] = self.build_generic_dataset(
                         BlendedDataset,
                         self.is_built_on_rank,
-                        True,  # synchronize_ranks, default behavior to build on rank-0 first
+                        (
+                            False
+                            if (
+                                isinstance(self.config, GPTDatasetConfig)
+                                and self.config.fast_cache_load
+                            )
+                            else True
+                        ),  # synchronize_ranks, default behavior to build on rank-0 first. Set to False if we are using --dataloader-fast-cache-load # pylint: disable=C0301
                         megatron_datasets[i],
                         weights_i,
                         size_i,
@@ -306,7 +314,14 @@ def _build_blended_dataset_splits(self) -> List[Optional[TopLevelDataset]]:
                     blended_datasets[i] = self.build_generic_dataset(
                         BlendedDataset,
                         self.is_built_on_rank,
-                        True,  # synchronize_ranks, default behavior to build on rank-0 first
+                        (
+                            False
+                            if (
+                                isinstance(self.config, GPTDatasetConfig)
+                                and self.config.fast_cache_load
+                            )
+                            else True
+                        ),  # synchronize_ranks, default behavior to build on rank-0 first. Set to False if we are using --dataloader-fast-cache-load # pylint: disable=C0301
                         megatron_datasets,
                         weights,
                         size,
@@ -364,7 +379,10 @@ def _threading_helper(
         megatron_datasets = [[] for _ in range(len(Split))]
         num_dataset_builder_threads = self.config.num_dataset_builder_threads
 
-        if torch.distributed.is_initialized():
+        # NOTE(asolergi-nv): Skip rank-0 first dataset building if we are using --dataloader-fast-cache-load # pylint: disable=C0301
+        if torch.distributed.is_initialized() and not (
+            isinstance(self.config, GPTDatasetConfig) and self.config.fast_cache_load
+        ):
             rank = torch.distributed.get_rank()
             # First, build on rank 0
             if rank == 0:
@@ -420,6 +438,14 @@ def _build_megatron_dataset_splits(
         Returns:
             List[Optional[MidLevelDataset]]: The MidLevelDataset (or None) per split
         """
+        synchronize_ranks = (
+            False
+            if (
+                synchronize_ranks
+                and (isinstance(self.cls, GPTDatasetConfig) and self.config.fast_cache_load)
+            )
+            else synchronize_ranks
+        )  # NOTE(asolergi-nv): Set synchronize_ranks to False if we are using --dataloader-fast-cache-load # pylint: disable=C0301
         # short-cut if we are not building on this rank
         if torch.distributed.is_initialized() and not self.is_built_on_rank():
             for i in range(len(Split)):
@@ -432,14 +458,6 @@ def _build_megatron_dataset_splits(
 
         # Build the split indices for the low level dataset
         num_elements = self.cls.numel_low_level_dataset(low_level_dataset)
-        split_indices = []
-        for i, _ in enumerate(Split):
-            if split[i] is not None:
-                beg = int(round(split[i][0] * float(num_elements)))
-                end = int(round(split[i][1] * float(num_elements)))
-                split_indices.append(numpy.arange(start=beg, stop=end, step=1, dtype=numpy.int32))
-            else:
-                split_indices.append(None)
 
         # Build the mid level dataset
         mid_level_datasets = []
@@ -447,6 +465,14 @@ def _build_megatron_dataset_splits(
             if split[i] is None:
                 mid_level_datasets.append(None)
             else:
+                indexed_indices = None
+                if not (
+                    isinstance(self.config, GPTDatasetConfig) and self.config.fast_cache_load
+                ):  # NOTE(asolergi-nv): Skip indexed_indices building if we are using --dataloader-fast-cache-load # pylint: disable=C0301
+                    beg = int(round(split[i][0] * float(num_elements)))
+                    end = int(round(split[i][1] * float(num_elements)))
+                    indexed_indices = numpy.arange(start=beg, stop=end, step=1, dtype=numpy.int32)
+
                 mid_level_datasets.append(
                     self.build_generic_dataset(
                         self.cls,
@@ -454,7 +480,7 @@ def _build_megatron_dataset_splits(
                         synchronize_ranks,
                         low_level_dataset,
                         dataset_path,
-                        split_indices[i],
+                        indexed_indices,
                         sizes[i],
                         _split,
                         self.config,
diff --git a/megatron/core/datasets/blended_megatron_dataset_config.py b/megatron/core/datasets/blended_megatron_dataset_config.py
index eed5b97abad..cee7f333bb8 100644
--- a/megatron/core/datasets/blended_megatron_dataset_config.py
+++ b/megatron/core/datasets/blended_megatron_dataset_config.py
@@ -88,8 +88,29 @@ class BlendedMegatronDatasetConfig:
        incorrect tokenizer - this option may be set to True. This is typically not recommended.
     """
 
+    fast_cache_load: bool = False
+    """Option to use the fast cache loading path. Requires all the dataset caches to be built."""
+
+    defer_npy_index_mmap: bool = False
+    """Option to defer the mmap of the dataset indexes until the first access.
+       Requires all the dataset caches to be built.
+    """
+
     def __post_init__(self) -> None:
         """Do asserts and set fields post init"""
+        if self.fast_cache_load:
+            assert (
+                self.path_to_cache is not None
+            ), "--data-cache-path must be provided when using --dataloader-fast-cache-load."
+            assert (
+                self.blend is None
+            ), f"--dataloader-fast-cache-load and --data-path cannot be used together. \
+            Use --per-split-data-args-path or --train-data-path, --valid-data-path and \
+            --test-data-path instead."
+        if self.defer_npy_index_mmap:
+            assert (
+                self.path_to_cache is not None
+            ), "--data-cache-path must be provided when using --dataloader-defer-npy-index-mmap."
         if self.blend_per_split is not None and any(self.blend_per_split):
             assert self.blend is None, "blend and blend_per_split are incompatible"
             assert self.split is None, "split and blend_per_split are incompatible"
diff --git a/megatron/core/datasets/gpt_dataset.py b/megatron/core/datasets/gpt_dataset.py
index a2d39a6d688..3549db88001 100644
--- a/megatron/core/datasets/gpt_dataset.py
+++ b/megatron/core/datasets/gpt_dataset.py
@@ -3,7 +3,8 @@
 import logging
 import os
 import time
-from dataclasses import dataclass
+from dataclasses import dataclass, field
+from math import ceil
 from typing import Dict, Optional, Tuple
 
 import numpy
@@ -67,6 +68,17 @@ class GPTDatasetConfig(BlendedMegatronDatasetConfig):
     data parallel size * context parallel size * sequence parallel size * 2.
     """
 
+    sequences_per_dataset: Optional[Dict[str, int]] = None
+    """If provided, the sequence and document counts for each dataset. 
+       Check --per-dataset-sequences-path
+    """
+
+    token_dtype_code: Optional[int] = field(init=False, default=None)
+    """The dtype code for the token ids. 4 for int32, 8 for uint16."""
+
+    context_parallel_size: Optional[int] = None
+    """The size of the context parallel group. Needed for padding in packed sequences."""
+
     def __post_init__(self) -> None:
         """Do asserts and set fields post init"""
         super().__post_init__()
@@ -77,6 +89,17 @@ def __post_init__(self) -> None:
         assert self.reset_attention_mask is not None
         assert self.eod_mask_loss is not None
 
+        self.token_dtype_code = (
+            None
+            if self.tokenizer.vocab_size is None
+            else (4 if self.tokenizer.vocab_size > numpy.iinfo(numpy.uint16).max + 1 else 8)
+        )
+        if self.sequences_per_dataset is not None:
+            assert (
+                self.token_dtype_code is not None
+            ), "Tokenizer vocab size is not set, deactivate --per-dataset-sequences-path or \
+            fix the tokenizer."
+
 
 class GPTDataset(MegatronDataset):
     """The base GPT dataset
@@ -161,7 +184,17 @@ def build_low_level_dataset(dataset_path: str, config: GPTDatasetConfig) -> Inde
                     path_to_idx_cache=config.object_storage_cache_path
                 ),
             )
-        return IndexedDataset(dataset_path, multimodal=False, mmap=config.mmap_bin_files)
+        sequences_per_dataset = None
+        if config.sequences_per_dataset:
+            sequences_per_dataset = config.sequences_per_dataset[dataset_path]
+        return IndexedDataset(
+            dataset_path,
+            multimodal=False,
+            mmap=config.mmap_bin_files,
+            fast_cache_load=config.fast_cache_load,
+            sequences_per_dataset=sequences_per_dataset,
+            dtype_code=config.token_dtype_code,
+        )
 
     def __len__(self) -> int:
         """Abstract method implementation
@@ -169,6 +202,27 @@ def __len__(self) -> int:
         Returns:
             int: The length of the dataset
         """
+        if self.config.defer_npy_index_mmap:
+            # NOTE(asolergi-nv): We need the number of samples of every GPTDataset to build/hit the BlendedDataset cache # pylint: disable=C0301
+            # NOTE(asolergi-nv): Uses logic from megatron/core/datasets/helpers.cpp::build_sample_idx to compute the number of samples # pylint: disable=C0301
+            num_tokens_per_epoch = self._get_num_tokens_per_epoch()
+            num_epochs = self._get_num_epochs(num_tokens_per_epoch)
+
+            drop_last_partial_sequence = True
+            if self.index_split == Split.valid:
+                drop_last_partial_sequence = self.config.drop_last_partial_validation_sequence
+
+            if drop_last_partial_sequence:
+                return (
+                    num_epochs * num_tokens_per_epoch - self.config.add_extra_token_to_sequence
+                ) // self.config.sequence_length
+            else:
+                return ceil(
+                    float(
+                        num_epochs * num_tokens_per_epoch - self.config.add_extra_token_to_sequence
+                    )
+                    / self.config.sequence_length
+                )
         return self.sample_index.shape[0] - 1
 
     def __getitem__(self, idx: Optional[int]) -> Dict[str, torch.Tensor]:
@@ -255,6 +309,18 @@ def _query_document_sample_shuffle_indices(
         Returns:
             Tuple[numpy.ndarray, numpy.ndarray]: The text ids and document ids
         """
+        if self.shuffle_index is None:
+            # NOTE(asolergi-nv): Lazy memmap the indexes
+            self.shuffle_index = numpy.load(
+                self.path_to_shuffle_index, allow_pickle=True, mmap_mode='r'
+            )
+            self.sample_index = numpy.load(
+                self.path_to_sample_index, allow_pickle=True, mmap_mode='r'
+            )
+            self.document_index = numpy.load(
+                self.path_to_document_index, allow_pickle=True, mmap_mode='r'
+            )
+
         # Do the shuffle mapping
         idx = self.shuffle_index[idx]
 
@@ -336,6 +402,15 @@ def _build_document_sample_shuffle_indices(
             Tuple[numpy.ndarray, numpy.ndarray, numpy.ndarray]: The document index, the sample
             index, and the shuffle index
         """
+        if self.config.defer_npy_index_mmap:
+            # NOTE(asolergi-nv): Direct path to lazy memmap the indexes
+            base = f"{self.unique_description_hash}-{type(self).__name__}-{self.index_split.name}"
+            get_path_to = lambda affix: os.path.join(self.config.path_to_cache, f"{base}-{affix}")
+            self.path_to_document_index = get_path_to("document_index.npy")
+            self.path_to_sample_index = get_path_to("sample_index.npy")
+            self.path_to_shuffle_index = get_path_to("shuffle_index.npy")
+            return None, None, None
+
         path_to_cache = self.config.path_to_cache
         if path_to_cache is None and not self.config.mock:
             path_to_cache = os.path.join(
@@ -349,15 +424,19 @@ def _build_document_sample_shuffle_indices(
             path_to_document_index = get_path_to("document_index.npy")
             path_to_sample_index = get_path_to("sample_index.npy")
             path_to_shuffle_index = get_path_to("shuffle_index.npy")
-            cache_hit = all(
-                map(
-                    os.path.isfile,
-                    [
-                        path_to_description,
-                        path_to_document_index,
-                        path_to_sample_index,
-                        path_to_shuffle_index,
-                    ],
+            cache_hit = (
+                True
+                if self.config.fast_cache_load
+                else all(
+                    map(
+                        os.path.isfile,
+                        [
+                            path_to_description,
+                            path_to_document_index,
+                            path_to_sample_index,
+                            path_to_shuffle_index,
+                        ],
+                    )
                 )
             )
         else:
diff --git a/megatron/core/datasets/helpers.cpp b/megatron/core/datasets/helpers.cpp
index bfb2958da51..1f587618d84 100644
--- a/megatron/core/datasets/helpers.cpp
+++ b/megatron/core/datasets/helpers.cpp
@@ -166,7 +166,8 @@ py::array_t<T> build_sample_idx(
   // Remove bound checks.
   auto sizes = sizes_.unchecked<1>();
   auto document_idx = document_idx_.unchecked<1>();
-
+  
+  // NOTE(asolergi-nv): This is the logic used to compute the number of samples in the GPTDataset when leveraging defer_npy_index_mmap
   // Build the sample idx as a contiguous 1-D array of type T.
   int64_t num_samples = 0;
   if (drop_last_partial_sequence == true) {
diff --git a/megatron/core/datasets/indexed_dataset.py b/megatron/core/datasets/indexed_dataset.py
index 74f0f4205b0..76de4cca8d2 100644
--- a/megatron/core/datasets/indexed_dataset.py
+++ b/megatron/core/datasets/indexed_dataset.py
@@ -13,6 +13,7 @@
 import time
 from abc import ABC, abstractmethod
 from collections.abc import Iterable
+from datetime import datetime
 from enum import Enum
 from functools import lru_cache
 from itertools import accumulate
@@ -236,26 +237,45 @@ class _IndexReader(object):
         idx_path (str): The path to the index file
 
         multimodal (bool): Whether the dataset is multimodal
+
+        sequences_per_dataset (Optional[Tuple[int, int]]): The sequences per dataset.
+
+        dtype_code (int): The dtype code of the tokenized documents.
     """
 
-    def __init__(self, idx_path: str, multimodal: bool) -> None:
+    def __init__(
+        self,
+        idx_path: str,
+        multimodal: bool,
+        sequences_per_dataset: Optional[Tuple[int, int]] = None,
+        dtype_code: int = None,
+    ) -> None:
         log_single_rank(logger, logging.INFO, f"Load the {type(self).__name__} from {idx_path}")
 
-        with open(idx_path, "rb") as stream:
-            header = stream.read(9)
-            assert header == _INDEX_HEADER, f"bad header, cannot read: {idx_path}"
+        if sequences_per_dataset:
+            self.dtype = DType.dtype_from_code(dtype_code)
+            self.dtype_size = DType.size(self.dtype)
+            self.sequence_count = sequences_per_dataset[0]
+            self.document_count = sequences_per_dataset[1]
+            offset = 34  # 9 bytes from the header + 8 bytes from the version
+            # + 1 bytes for the dtype code + 8 bytes for the sequence count
+            # + 8 bytes for the document count = 34 bytes
+        else:
+            with open(idx_path, "rb") as stream:
+                header = stream.read(9)
+                assert header == _INDEX_HEADER, f"bad header, cannot read: {idx_path}"
 
-            version = struct.unpack("<Q", stream.read(8))[0]
-            assert version == 1, f"bad version, cannot read: {idx_path}"
+                version = struct.unpack("<Q", stream.read(8))[0]
+                assert version == 1, f"bad version, cannot read: {idx_path}"
 
-            code = struct.unpack("<B", stream.read(1))[0]
-            self.dtype = DType.dtype_from_code(code)
-            self.dtype_size = DType.size(self.dtype)
+                code = struct.unpack("<B", stream.read(1))[0]
+                self.dtype = DType.dtype_from_code(code)
+                self.dtype_size = DType.size(self.dtype)
 
-            self.sequence_count = struct.unpack("<Q", stream.read(8))[0]
-            self.document_count = struct.unpack("<Q", stream.read(8))[0]
+                self.sequence_count = struct.unpack("<Q", stream.read(8))[0]
+                self.document_count = struct.unpack("<Q", stream.read(8))[0]
 
-            offset = stream.tell()
+                offset = stream.tell()
 
         self.bin_buffer_mmap = numpy.memmap(idx_path, mode="r", order="C")
         self.bin_buffer = memoryview(self.bin_buffer_mmap)
@@ -306,10 +326,6 @@ def __init__(self, idx_path: str, multimodal: bool) -> None:
             t_end = time.time()
             log_single_rank(logger, logging.DEBUG, f"\t> time elapsed: {t_end - t_beg:4f} seconds")
 
-        assert self.sequence_lengths.shape[0] == len(self)
-        assert self.sequence_lengths.shape[0] == self.sequence_count
-        assert self.sequence_lengths.shape[0] == self.document_indices[-1]
-
         log_single_rank(logger, logging.INFO, f"> total number of sequences: {len(self)}")
         log_single_rank(
             logger,
@@ -419,8 +435,14 @@ class _FileBinReader(_BinReader):
         bin_path (str): The path to the data (.bin) file.
     """
 
-    def __init__(self, bin_path: str) -> None:
+    def __init__(
+        self, bin_path: str, num_max_retries: int = 3, sleep_duration_start: int = 10
+    ) -> None:
         self._bin_path = bin_path
+        # Retry-specific parameters. With default arguments, sleep for 10, 20, 40 seconds
+        # between retries.
+        self.num_max_retries = num_max_retries
+        self.sleep_duration_start = sleep_duration_start
 
     def read(self, dtype: Type[numpy.number], count: int, offset: int) -> numpy.ndarray:
         """Read bytes into a numpy array.
@@ -436,17 +458,43 @@ def read(self, dtype: Type[numpy.number], count: int, offset: int) -> numpy.ndar
             numpy.ndarray: An array with `count` items and data-type `dtype` constructed from
                 reading bytes from the data file starting at `offset`.
         """
-        sequence = numpy.empty(count, dtype=dtype)
-        if MultiStorageClientFeature.is_enabled():
-            msc = MultiStorageClientFeature.import_package()
-            with msc.open(self._bin_path, mode="rb", buffering=0) as bin_buffer_file:
-                bin_buffer_file.seek(offset)
-                bin_buffer_file.readinto(sequence)
-        else:
-            with open(self._bin_path, mode="rb", buffering=0) as bin_buffer_file:
-                bin_buffer_file.seek(offset)
-                bin_buffer_file.readinto(sequence)
-        return sequence
+
+        def _read():
+            """Helper method to read `count` bytes from self._bin_path at provided offset."""
+            sequence = numpy.empty(count, dtype=dtype)
+            if MultiStorageClientFeature.is_enabled():
+                msc = MultiStorageClientFeature.import_package()
+                with msc.open(self._bin_path, mode="rb", buffering=0) as bin_buffer_file:
+                    bin_buffer_file.seek(offset)
+                    bin_buffer_file.readinto(sequence)
+            else:
+                with open(self._bin_path, mode="rb", buffering=0) as bin_buffer_file:
+                    bin_buffer_file.seek(offset)
+                    bin_buffer_file.readinto(sequence)
+            return sequence
+
+        sleep_duration = self.sleep_duration_start
+        for i in range(self.num_max_retries + 1):
+            try:
+                return _read()
+            except Exception as e:
+                time_str = datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')
+                if i == self.num_max_retries:
+                    logger.warning(
+                        f"[{time_str}] {self.num_max_retries+1} total tries to read data item "
+                        f"failed; going to abort and re-raise exception \"{e}\"..."
+                    )
+                    # Re-raise exception if in last iteration of for loop.
+                    raise e
+                logger.warning(
+                    f"[{time_str}] Attempt {i+1}/{self.num_max_retries+1} to read data item "
+                    f"failed with exception \"{e}\"; going to sleep for {sleep_duration} "
+                    "seconds and then re-try..."
+                )
+                time.sleep(sleep_duration)
+                sleep_duration = sleep_duration * 2
+
+        raise RuntimeError("Should not reach here!")
 
 
 class _S3BinReader(_BinReader):
@@ -575,6 +623,12 @@ class IndexedDataset(torch.utils.data.Dataset):
             `object_storage_config.path_to_idx_cache` and streams data from the data (.bin) file
             in `object_storage_config.bin_chunk_nbytes` blocks. Note that `mmap` must be disabled
             for S3 data loading. Defaults to None.
+
+        fast_cache_load (bool): Whether to use the fast cache mode.
+
+        sequences_per_dataset (Optional[Tuple[int, int]]): The sequences per dataset.
+
+        dtype_code (int): The dtype code of the tokenized documents.
     """
 
     def __init__(
@@ -584,6 +638,9 @@ def __init__(
         mmap: bool = True,
         object_storage_config: Optional[ObjectStorageConfig] = None,
         s3_config: Optional[S3Config] = None,
+        fast_cache_load: bool = False,
+        sequences_per_dataset: Optional[Tuple[int, int]] = None,
+        dtype_code: int = None,
     ) -> None:
         super().__init__()
         self.path_prefix: str
@@ -603,7 +660,20 @@ def __init__(
             cache_idx_path = get_index_cache_path(idx_path, object_storage_config)
             cache_index_file(idx_path, cache_idx_path)
 
-        self.initialize(path_prefix, multimodal, mmap, object_storage_config)
+        self.initialize(
+            path_prefix,
+            multimodal,
+            mmap,
+            object_storage_config,
+            fast_cache_load,
+            sequences_per_dataset,
+            dtype_code,
+        )
+
+        if not fast_cache_load:
+            assert self.index.sequence_lengths.shape[0] == self.index.document_indices[-1]
+            assert self.index.sequence_lengths.shape[0] == len(self.index)
+            assert self.index.sequence_lengths.shape[0] == self.index.sequence_count
 
     def initialize(
         self,
@@ -611,6 +681,9 @@ def initialize(
         multimodal: bool,
         mmap: bool,
         object_storage_config: Optional[ObjectStorageConfig],
+        fast_cache_load: bool = False,
+        sequences_per_dataset: Optional[Tuple[int, int]] = None,
+        dtype_code: int = None,
     ) -> None:
         """Initialize the dataset
 
@@ -626,18 +699,27 @@ def initialize(
 
             object_storage_config (Optional[ObjectStorageConfig]): See IndexedDataset docstring
                 for details.
+
+            fast_cache_load (bool): Whether to use the fast cache mode.
+
+            sequences_per_dataset (Optional[Tuple[int, int]]): The sequences per dataset.
+
+            dtype_code (int): The dtype code of the tokenized documents.
         """
         idx_path = get_idx_path(path_prefix)
         bin_path = get_bin_path(path_prefix)
-        if object_storage_config is None:
-            assert os.path.exists(idx_path) and os.path.exists(
-                bin_path
-            ), "One or both of the .idx and .bin files cannot be found at the "
-            f"path prefix {path_prefix}"
+        if object_storage_config is None and not fast_cache_load:
+            assert os.path.exists(idx_path) and os.path.exists(bin_path), (
+                "One or both of the .idx and .bin files cannot be found at the "
+                f"path prefix {path_prefix}"
+            )
         self.path_prefix = path_prefix
         self.multimodal = multimodal
         self.mmap = mmap
         self.object_storage_config = object_storage_config
+        self.fast_cache_load = fast_cache_load
+        self.sequences_per_dataset = sequences_per_dataset
+        self.dtype_code = dtype_code
         if mmap:
             assert not object_storage_config
             self.bin_reader = _MMapBinReader(bin_path)
@@ -649,7 +731,7 @@ def initialize(
             idx_path = get_index_cache_path(get_idx_path(path_prefix), object_storage_config)
         else:
             self.bin_reader = _FileBinReader(bin_path)
-        self.index = _IndexReader(idx_path, self.multimodal)
+        self.index = _IndexReader(idx_path, self.multimodal, sequences_per_dataset, dtype_code)
 
     def __getstate__(self) -> Tuple[str, bool, bool, Optional[ObjectStorageConfig]]:
         """Get the state during pickling
@@ -657,7 +739,15 @@ def __getstate__(self) -> Tuple[str, bool, bool, Optional[ObjectStorageConfig]]:
         Returns:
             Tuple[str, bool, bool, Optional[ObjectStorageConfig]]: The state tuple
         """
-        return self.path_prefix, self.multimodal, self.mmap, self.object_storage_config
+        return (
+            self.path_prefix,
+            self.multimodal,
+            self.mmap,
+            self.object_storage_config,
+            self.fast_cache_load,
+            self.sequences_per_dataset,
+            self.dtype_code,
+        )
 
     def __setstate__(self, state: Tuple[str, bool, bool, Optional[ObjectStorageConfig]]) -> None:
         """Set the state during un-pickling
@@ -665,8 +755,24 @@ def __setstate__(self, state: Tuple[str, bool, bool, Optional[ObjectStorageConfi
         Args:
             state (Tuple[str, bool, bool, Optional[ObjectStorageConfig]]): The state tuple
         """
-        path_prefix, multimodal, mmap, object_storage_config = state
-        self.initialize(path_prefix, multimodal, mmap, object_storage_config)
+        (
+            path_prefix,
+            multimodal,
+            mmap,
+            object_storage_config,
+            fast_cache_load,
+            sequences_per_dataset,
+            dtype_code,
+        ) = state
+        self.initialize(
+            path_prefix,
+            multimodal,
+            mmap,
+            object_storage_config,
+            fast_cache_load,
+            sequences_per_dataset,
+            dtype_code,
+        )
 
     def __del__(self) -> None:
         """Clean up the object"""
diff --git a/megatron/core/datasets/readme.md b/megatron/core/datasets/readme.md
index 12ade943b53..452bf24e4a2 100644
--- a/megatron/core/datasets/readme.md
+++ b/megatron/core/datasets/readme.md
@@ -9,11 +9,11 @@ Data preprocessing is built around the following classes:
 
 At the moment, an end-to-end data preprocessing implementation is left to the user. See the class docstring(s) for more details.
 
-#### IndexedDatasetBuilder
+### IndexedDatasetBuilder
 
 The `IndexedDatasetBuilder` is capable of building and merging `IndexedDataset` instances.
 
-#### IndexedDataset
+### IndexedDataset
 
 The `IndexedDataset` class is the lowest-level data interface in Megatron Core. Internally, an `IndexedDataset` instance references two binaries: the data file (`.bin`) contains document/sequence data and the index file (`.idx`) contains document/sequence metadata.
 
@@ -42,32 +42,32 @@ Building the data loaders is a distributed-aware process built around the follow
 
 See the class docstrings for more details.
 
-#### BlendedMegatronDatasetConfig (extendable)
+### BlendedMegatronDatasetConfig (extendable)
 
 The `BlendedMegatronDatasetConfig` class parameterizes the `BlendedMegatronDatasetBuilder` and in turn the `MegatronDataset` and `BlendedDataset`.
 
 Different training/inference regimes will require different extensions e.g. the `GPTDatasetConfig`
 
-#### BlendedMegatronDatasetBuilder
+### BlendedMegatronDatasetBuilder
 
 The `BlendedMegatronDatasetBuilder` class builds the highest-level data interfaces in Megatron Core.
 
 **NB:** All ranks should attempt to build the dataset via the `BlendedMegatronDatasetBuilder` or the program will hang. Which ranks follow through on their attempts can be controlled via the `BlendedMegatronDatasetConfig`.
 
-#### IndexedDataset
+### IndexedDataset
 
 The `IndexedDataset` class is the lowest-level data interface in Megatron Core.
 
 The `IndexedDataset` should already exist on disk before attempting to build any of the high-level data interfaces.
 
 
-#### MegatronDataset (extendable)
+### MegatronDataset (extendable)
 
 The `MegatronDataset` abstract class is a high-level data interface in Megatron Core. It is an abstraction built upon the `IndexedDataset`.
 
 Different training/inference regimes will require different extensions e.g. the `GPTDataset`
 
-#### BlendedDataset
+### BlendedDataset
 
 The `BlendedDataset` class is a high-level data interface in Megatron Core. It is an abstraction built upon the `MegatronDataset`.
 
@@ -191,3 +191,13 @@ To query the `BlendedDataset` for the _k_-th sample we do the following
     ```
 
 To save time during initialization, each index is built/cached sequentially on one process rank and subsequently loaded in parallel on other process ranks. The cached indices are unique to a hash generated in the `BlendedDataset.__init__` function.
+
+## Fast DataLoader initialization
+
+Especially for large-scale runs, DataLoader initialization can take several minutes, since it involves opening and memory-mapping multiple files and can significantly stress the filesystem. To speed up this process, we have developed the following three optimizations, controlled by configuration flags":
+
+  - `--dataloader-fast-cache-load`: This option assumes that the dataset cache already exists in the specified `--data-cache-path`. When enabled, it speeds up the creation process by removing synchronization points and file check assertions.
+
+  - `--dataloader-defer-npy-index-mmap`: This option also assumes that the dataset cache already exists in the specified `--data-cache-path`. When enabled, it defers the memory mapping of the dataset indexes (.npy files) until their first access. We recommend using this configuration together with `--num-workers` > 0 so that the DataLoader prefetches the next batches of data, thereby hiding the cost of index memory mapping.
+
+  - `--per-dataset-sequences-path`: With this configuration, we specify the JSON file generated by the `tools/build_sequences_per_dataset.py` script. This script generates a single file containing the required metadata from all the specified file prefixes. This configuration is especially useful when dealing with hundreds to thousands of file prefixes, since it requires only a single `open` operation instead of one per file prefix.
\ No newline at end of file
diff --git a/megatron/core/dist_checkpointing/strategies/async_utils.py b/megatron/core/dist_checkpointing/strategies/async_utils.py
index 4c1aab1b1d7..94af4beef54 100644
--- a/megatron/core/dist_checkpointing/strategies/async_utils.py
+++ b/megatron/core/dist_checkpointing/strategies/async_utils.py
@@ -466,9 +466,18 @@ def async_loop(
                                        to get aligned with the training rank's logging level
 
         """
+        # Set logger.
         logger = logging.getLogger(__name__)
         logger.setLevel(log_level)
         logger.info(f"PersistentAsyncCaller: persistent ckpt worker for {rank} has started")
+
+        # Set CUDA device to appropriate local_rank to ensure allocations / CUDA contexts
+        # in this new process are on the right device, and device 0 on the node does not
+        # take on undue memory burden from other devices on node (default behavior without
+        # this line).
+        torch.cuda.set_device(rank % torch.cuda.device_count())
+
+        # Start busy loop waiting for and executing checkpoint saves.
         while True:
             item = queue.get()
             if isinstance(item, str) and item == 'DONE':
diff --git a/megatron/core/dist_checkpointing/strategies/base.py b/megatron/core/dist_checkpointing/strategies/base.py
index 4ecc0948b18..53422b362f6 100644
--- a/megatron/core/dist_checkpointing/strategies/base.py
+++ b/megatron/core/dist_checkpointing/strategies/base.py
@@ -30,19 +30,10 @@ def get_default_strategy(action: StrategyAction, backend: str, version: int):
     """Retrieves a default strategy for a given action, backend and version."""
     error_hint: str = ""
     try:
-        if backend == 'zarr':
-            error_hint = ' Please install `zarr` and `tensorstore!=0.1.46` packages'
-            from .tensorstore import register_default_tensorstore_strategies
+        error_hint = ' Please use PyTorch version >=2.1'
+        from .torch import register_default_torch_strategies
 
-            register_default_tensorstore_strategies()
-            from .zarr import register_default_zarr_strategies
-
-            register_default_zarr_strategies()
-        elif backend == 'torch_dist':
-            error_hint = ' Please use PyTorch version >=2.1'
-            from .torch import register_default_torch_strategies
-
-            register_default_torch_strategies()
+        register_default_torch_strategies()
     except ImportError as e:
         raise CheckpointingException(
             f'Cannot import a default strategy for: {(action.value, backend, version)}. '
diff --git a/megatron/core/dist_checkpointing/strategies/tensorstore.py b/megatron/core/dist_checkpointing/strategies/tensorstore.py
deleted file mode 100644
index 6472c9d58f9..00000000000
--- a/megatron/core/dist_checkpointing/strategies/tensorstore.py
+++ /dev/null
@@ -1,149 +0,0 @@
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.  All rights reserved.
-
-"""Strategies using TensorStore to load and save Zarr arrays."""
-
-from functools import partial
-from itertools import starmap
-from logging import getLogger
-from pathlib import Path
-from typing import Union
-
-import torch
-
-from ..core import CheckpointingException
-from ..dict_utils import dict_list_map_inplace
-from ..mapping import ShardedStateDict, ShardedTensor
-from .base import LoadShardedStrategy, StrategyAction, register_default_strategy
-from .zarr import load_zarr_based_sharded_metadata, postprocess_numpy_array
-
-try:
-    import tensorstore as ts
-
-    HAVE_TENSORSTORE = True
-except ImportError:
-    from unittest.mock import MagicMock
-
-    ts = MagicMock()
-    HAVE_TENSORSTORE = False
-
-
-logger = getLogger(__name__)
-
-
-def register_default_tensorstore_strategies():
-    """Register default strategies leveraging tensorstore."""
-    register_default_strategy(
-        StrategyAction.LOAD_SHARDED, "zarr", 1, TensorStoreLoadShardedStrategy()
-    )
-
-
-class TensorStoreLoadShardedStrategy(LoadShardedStrategy):
-    """Load strategy for Zarr backend using `tensorstore` for loading."""
-
-    def __init__(self, load_directly_on_device: bool = False):
-        super().__init__()
-        self.load_directly_on_device = load_directly_on_device
-
-    def load(self, sharded_state_dict: ShardedStateDict, checkpoint_dir: Union[str, Path]):
-        if isinstance(checkpoint_dir, str):
-            checkpoint_dir = Path(checkpoint_dir)
-
-        if torch.distributed.get_rank() == 0:
-            print(f"Loading distributed checkpoint with {self.__class__.__name__}")
-            if self.load_directly_on_device:
-                print(f"Loading distributed checkpoint directly on the GPU")
-        load_fn = partial(
-            _load_from_array,
-            checkpoint_dir=checkpoint_dir,
-            load_directly_on_device=self.load_directly_on_device,
-        )
-        dict_list_map_inplace(load_fn, sharded_state_dict)
-        return sharded_state_dict
-
-    def load_tensors_metadata(self, checkpoint_dir: Union[str, Path]):
-        if isinstance(checkpoint_dir, str):
-            checkpoint_dir = Path(checkpoint_dir)
-
-        def get_ts_shape_dtype(path):
-            arr = open_ts_array(path)
-            return arr.shape, arr.dtype.numpy_dtype
-
-        return load_zarr_based_sharded_metadata(checkpoint_dir, get_ts_shape_dtype)
-
-    def check_backend_compatibility(self, loaded_version):
-        pass  # TODO
-
-    def check_version_compatibility(self, loaded_version):
-        pass  # TODO
-
-
-def merge_global_slice_with_shape(global_slice, actual_shape, key):
-    """Intersects the global slice with the actual shape (prevent overflow)."""
-
-    def _merge_slice(dim_slice, dim_size):
-        if isinstance(dim_slice, slice):
-            assert (
-                dim_slice.start < dim_size
-            ), f"Got empty slice for ShardedTensor {key} ({dim_slice}, {dim_size})"
-            if dim_slice.stop > dim_size:
-                dim_slice = slice(dim_slice.start, dim_size, dim_slice.step)
-        return dim_slice
-
-    assert len(global_slice) == len(actual_shape), (global_slice, actual_shape, key)
-    return tuple(starmap(_merge_slice, zip(global_slice, actual_shape)))
-
-
-def _load_from_array(
-    sharded_tensor: ShardedTensor,
-    checkpoint_dir: Path,
-    load_directly_on_device: bool = False,
-    apply_flattened_range: bool = True,
-):
-    x = _load_regular_chunk(sharded_tensor, checkpoint_dir)
-    ten = postprocess_numpy_array(x, sharded_tensor, apply_flattened_range)
-    if load_directly_on_device:
-        sharded_tensor.data.data.copy_(ten)
-        return sharded_tensor.data
-    else:
-        return ten
-
-
-def _load_regular_chunk(sharded_tensor: ShardedTensor, checkpoint_dir: Path):
-    assert isinstance(sharded_tensor, ShardedTensor), type(sharded_tensor)
-    arr = open_ts_array(checkpoint_dir / sharded_tensor.key)
-    if sharded_tensor.global_shape == arr.shape:
-        x = (
-            arr[sharded_tensor.global_slice()].read().result()
-        )  # flattened tensors loading is delayed
-    elif sharded_tensor.allow_shape_mismatch:
-        global_slice = merge_global_slice_with_shape(
-            sharded_tensor.global_slice(), arr.shape, sharded_tensor.key
-        )
-        x = arr[global_slice].read().result()  # flattened tensors loading is delayed
-    else:
-        _msg = (
-            f"Global shape mismatch for loaded ({arr.shape})"
-            f" and expected ({sharded_tensor.global_shape}) tensor"
-            f" for key {sharded_tensor.key}"
-        )
-        raise CheckpointingException(_msg)
-    return x
-
-
-def open_ts_array(arr_path: Path):
-    """Opens a Zarr file array with Tensorstore with basic setting.
-
-    Args:
-        arr_path (Path): path to a Zarr (Tensorstore) array
-    """
-    if not HAVE_TENSORSTORE:
-        raise RuntimeError(
-            "tensorstore is required, please install it with `pip install tensorstore`"
-        )
-    spec = {"driver": "zarr", "metadata_key": ".zarray", "kvstore": {}}
-    spec["kvstore"] = {"driver": "file", "path": str(arr_path)}
-    try:
-        arr = ts.open(ts.Spec(spec), open=True).result()
-    except Exception as e:
-        raise CheckpointingException(f"Array {arr_path} could not be loaded. Error: {e}") from e
-    return arr
diff --git a/megatron/core/dist_checkpointing/strategies/two_stage.py b/megatron/core/dist_checkpointing/strategies/two_stage.py
deleted file mode 100644
index 481758b55b5..00000000000
--- a/megatron/core/dist_checkpointing/strategies/two_stage.py
+++ /dev/null
@@ -1,266 +0,0 @@
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.  All rights reserved.
-
-""" 2-stage checkpoint loading. """
-import time
-from collections import defaultdict
-from dataclasses import dataclass
-from functools import partial, wraps
-from itertools import chain
-from logging import getLogger
-from operator import attrgetter, itemgetter
-from pathlib import Path
-from typing import List, Optional, Tuple, Union
-
-import torch
-
-from ..dict_utils import dict_list_map_inplace, map_reduce, nested_values
-from ..mapping import ShardedStateDict, ShardedTensor
-from .base import LoadShardedStrategy
-from .tensorstore import _load_from_array, open_ts_array
-from .zarr import load_zarr_based_sharded_metadata
-
-_import_trigger = None
-
-
-timers = defaultdict(list)
-
-logger = getLogger(__name__)
-logger.warning(
-    'megatron.core.dist_checkpointing.two_stage module is deprecated'
-    ' and will be removed in Megatron-Core v0.12. Please use'
-    ' FullyParallelLoadStrategyWrapper to accomplish a parallelized checkpoint load.'
-)
-
-
-def timed(verbose=True):
-    """Timing decorator."""
-
-    def timed_dec(fn):
-        name = fn.__name__
-
-        @wraps(fn)
-        def wrapped(*args, **kwargs):
-            if verbose:
-                logger.debug(f'{name} init')
-            start = time.time()
-            ret = fn(*args, **kwargs)
-            took = time.time() - start
-            if verbose:
-                logger.debug(f'{name} took {took}s')
-            timers[name].append(took)
-            return ret
-
-        return wrapped
-
-    return timed_dec
-
-
-@dataclass
-class _ShardedTensorMetadata:
-    global_rank: int
-    sharded_tensor_no_data: ShardedTensor
-    dist_group_rank: Tuple[int]  # id of distributed group
-    dist_group_ranks: Tuple[int]  # id of distributed group
-    data_size: Optional[int] = None  # bytes
-
-
-def sharded_tensor_chunk_id(sharded_tensor: ShardedTensor):
-    """Id of a sharded tensor."""
-    return (sharded_tensor.key, sharded_tensor.global_offset)
-
-
-class TwoStageDataParallelLoadShardedStrategy(LoadShardedStrategy):
-    """Loads one checkpoint replica from storage and broadcasts to other nodes.
-
-    This strategy loads checkpoint from storage on minimal set of nodes
-    and distributes the checkpoint to other nodes with torch.distributed.
-    Loading is performed with tensorstore.
-
-    Steps:
-    0. (optional) create Gloo distributed groups
-    1. Exchange ShardedTensors metadata between all nodes
-    2. Align needed tensors within DP groups
-    3. For each globally unique tensor:
-    3.a) on one of the ranks load it from storage to CPU and move to CUDA
-    3.b) allocate CUDA tensor on other ranks
-    3.c) broadcast within DP group
-    3.d) copy tensor content to the model param location
-    3.e) free tensor buffers from a) and b)
-
-    Notes:
-    1. Loading and broadcasting is done sequentially to avoid both host and device OOMs
-    2. There is a lot of overlap potential between all three steps done for each tensor:
-    2.a) loading from storage to numpy
-    2.b) moving CPU tensors to CUDA
-    2.c) broadcast
-    """
-
-    def __init__(self, data_parallel_group, cpu_transfer=True):
-        super().__init__()
-
-        self.cpu_transfer = cpu_transfer
-        self.data_parallel_group_orig = data_parallel_group
-        self.data_parallel_group = None if cpu_transfer else data_parallel_group
-        self.dp_group_ranks = tuple(
-            sorted(torch.distributed.get_process_group_ranks(data_parallel_group))
-        )
-        self.dp_group_rank = self.data_parallel_group_orig.rank()
-        self.global_rank = torch.distributed.get_rank()
-
-    def load(self, sharded_state_dict: ShardedStateDict, checkpoint_dir: Path):
-        """Main load method."""
-        self.maybe_init_gloo_group()
-        all_tensors_sorted = self._build_load_plan(sharded_state_dict)
-        self._exchange_loaded_tensors(all_tensors_sorted, sharded_state_dict, checkpoint_dir)
-        # TODO: fix hang in summarize_load_times
-        # self.summarize_load_times()
-        return sharded_state_dict
-
-    def summarize_load_times(self):
-        """Summarize load times."""
-        torch.distributed.barrier()
-        logger.info('Checkpoint loading finished. Summary:')
-        # TODO: `timers` keys are not guaranteed to be the same across ranks which causes hangs
-        for key, times in sorted(timers.items()):
-            times_sum = sum(times)
-            max_times = torch.tensor([times_sum], device='cuda')
-            avg_times = torch.tensor([times_sum], device='cuda')
-            torch.distributed.all_reduce(max_times, op=torch.distributed.ReduceOp.MAX)
-            torch.distributed.all_reduce(avg_times, op=torch.distributed.ReduceOp.SUM)
-            avg_times /= torch.distributed.get_world_size()
-            if torch.distributed.get_rank() == 0:
-                logger.info(f'{key}: max {max_times[0]}, avg {avg_times[0]}')
-
-    @timed(verbose=False)
-    def load_tensor_from_storage(self, checkpoint_dir, ten_meta: _ShardedTensorMetadata):
-        """Load tensor from storage."""
-        logger.debug(f'_load_from_array({ten_meta.sharded_tensor_no_data.key}) init')
-        ret = _load_from_array(
-            ten_meta.sharded_tensor_no_data,
-            checkpoint_dir,
-            load_directly_on_device=False,
-            apply_flattened_range=False,
-        )
-        logger.debug(f'_load_from_array({ten_meta.sharded_tensor_no_data.key}) DONE')
-        return ret
-
-    @timed()
-    def maybe_init_gloo_group(self):
-        """Create Gloo groups."""
-        if not self.cpu_transfer:
-            return
-        all_groups = [None] * torch.distributed.get_world_size()
-        torch.distributed.all_gather_object(all_groups, self.dp_group_ranks)
-        all_groups = set(tuple(sorted(gr)) for gr in all_groups)
-        for group_ranks in sorted(all_groups):
-            # "two_stage" module will be deprecated, so not replace new_group()
-            # with ...parallel_state.create_group() func setting group_desc here.
-            gloo_pg = torch.distributed.new_group(ranks=group_ranks, backend='gloo')
-            if self.global_rank in group_ranks:
-                self.data_parallel_group = gloo_pg
-                assert self.dp_group_rank == self.data_parallel_group.rank()
-
-    def check_backend_compatibility(self, loaded_version):
-        pass  # TODO
-
-    def check_version_compatibility(self, loaded_version):
-        pass  # TODO
-
-    @timed()
-    def _build_load_plan(
-        self, sharded_state_dict: ShardedStateDict
-    ) -> List[_ShardedTensorMetadata]:
-        local_meta = [
-            _ShardedTensorMetadata(
-                self.global_rank,
-                sharded_ten.without_data(),
-                self.dp_group_rank,
-                self.dp_group_ranks,
-            )
-            for sharded_ten in nested_values(sharded_state_dict)
-        ]
-        all_meta = [None] * self.data_parallel_group.size()
-        torch.distributed.all_gather_object(all_meta, local_meta, group=self.data_parallel_group)
-        all_meta = list(chain.from_iterable(all_meta))
-        all_tensors_sorted = self.deduplicate_chunks(all_meta)
-        return all_tensors_sorted
-
-    @timed()
-    def deduplicate_chunks(self, ten_metas: List[_ShardedTensorMetadata]):
-        """Group tensors by chunk and then pick the tensor with the lowest rank.
-
-        NOTE: with proper loading overlap, loading from randomized ranks
-         (instead of the smallest one) could be beneficial here.
-        """
-        ten_metas = map_reduce(
-            ten_metas,
-            key_fn=lambda meta: sharded_tensor_chunk_id(meta.sharded_tensor_no_data),
-            reduce_fn=partial(min, key=attrgetter('dist_group_rank')),
-        )
-        all_metas_sorted = list(map(itemgetter(1), sorted(ten_metas.items())))
-        return all_metas_sorted
-
-    @timed()
-    def _exchange_loaded_tensors(
-        self, ten_metas: List[_ShardedTensorMetadata], sharded_state_dict, checkpoint_dir
-    ):
-        logger.debug(f'_exchange_loaded_tensors, num ten_metas: {len(ten_metas)}')
-        for ten_meta in ten_metas:
-
-            src_rank = torch.distributed.get_global_rank(
-                self.data_parallel_group, ten_meta.dist_group_rank
-            )
-
-            if self.dp_group_rank == ten_meta.dist_group_rank:
-                exchange_tensor = self.load_tensor_from_storage(checkpoint_dir, ten_meta)
-                if not self.cpu_transfer:
-                    exchange_tensor = exchange_tensor.cuda()
-            else:
-                # TODO: for non-flattened ranges we could reuse the buffer from the start here
-                exchange_tensor = torch.empty(
-                    ten_meta.sharded_tensor_no_data.local_shape,
-                    device='cpu' if self.cpu_transfer else 'cuda',
-                    dtype=ten_meta.sharded_tensor_no_data.dtype,
-                )
-
-            logger.debug(
-                f'exchange {ten_meta.sharded_tensor_no_data.key}, {exchange_tensor.shape}\
-({exchange_tensor.numel()}), broadcast({src_rank} -> {self.dp_group_ranks})'
-            )
-            torch.distributed.broadcast(
-                exchange_tensor, group=self.data_parallel_group, src=src_rank
-            )
-            self._distribute_data_to_state_dict(ten_meta, exchange_tensor, sharded_state_dict)
-            logger.debug(f'exchange {ten_meta.sharded_tensor_no_data.key} done')
-
-            # free buffer memory
-            exchange_tensor = None
-
-    @timed(verbose=False)
-    def _distribute_data_to_state_dict(
-        self,
-        ten_meta: _ShardedTensorMetadata,
-        loaded_ten: torch.Tensor,
-        sharded_state_dict: ShardedStateDict,
-    ):
-        tensor_key = sharded_tensor_chunk_id(ten_meta.sharded_tensor_no_data)
-
-        def _fill_in_data(t: Union[ShardedTensor, torch.Tensor]):
-            if not isinstance(t, ShardedTensor) or sharded_tensor_chunk_id(t) != tensor_key:
-                # already filled-in or key not matching
-                return t
-            sharded_tensor: ShardedTensor = t
-            x = loaded_ten
-
-            # Reuse existing buffer
-            sharded_tensor.data.data.copy_(x)
-            return sharded_tensor.data
-
-        dict_list_map_inplace(_fill_in_data, sharded_state_dict)
-
-    def load_tensors_metadata(self, checkpoint_dir: Path):
-        def get_ts_shape_dtype(path):
-            arr = open_ts_array(path)
-            return arr.shape, arr.dtype.numpy_dtype
-
-        return load_zarr_based_sharded_metadata(checkpoint_dir, get_ts_shape_dtype)
diff --git a/megatron/core/dist_checkpointing/strategies/zarr.py b/megatron/core/dist_checkpointing/strategies/zarr.py
deleted file mode 100644
index cea21ddea16..00000000000
--- a/megatron/core/dist_checkpointing/strategies/zarr.py
+++ /dev/null
@@ -1,357 +0,0 @@
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.  All rights reserved.
-
-"""Strategies using Zarr as an underlying format."""
-
-import logging
-import os
-from functools import partial
-from logging import getLogger
-from pathlib import Path
-from typing import Callable, List, Optional, Tuple, Union
-
-import numpy as np
-import torch
-
-from ..core import CheckpointingException
-from ..dict_utils import dict_list_map_inplace, nested_values
-from ..mapping import ShardedStateDict, ShardedTensor, is_main_replica
-from .base import (
-    LoadShardedStrategy,
-    SaveShardedStrategy,
-    StrategyAction,
-    register_default_strategy,
-)
-
-logger = logging.getLogger(__name__)
-
-try:
-    import zarr
-
-    HAVE_ZARR = True
-except ImportError:
-    from unittest.mock import MagicMock
-
-    zarr = MagicMock()
-    HAVE_ZARR = False
-
-
-numpy_to_torch_dtype_dict = {
-    np.dtype("bool"): torch.bool,
-    np.dtype("uint8"): torch.uint8,
-    np.dtype("int8"): torch.int8,
-    np.dtype("int16"): torch.int16,
-    np.dtype("int32"): torch.int32,
-    np.dtype("int64"): torch.int64,
-    np.dtype("float16"): torch.float16,
-    np.dtype("float32"): torch.float32,
-    np.dtype("float64"): torch.float64,
-    np.dtype("complex64"): torch.complex64,
-    np.dtype("complex128"): torch.complex128,
-}
-
-torch_to_numpy_dtype_dict = {v: k for k, v in numpy_to_torch_dtype_dict.items()}
-
-
-try:
-    # Register a bfloat16 type with this import
-    import tensorstore  # pylint: disable=unused-import
-
-    HAS_BFLOAT16 = True
-    numpy_to_torch_dtype_dict[np.dtype("bfloat16")] = torch.bfloat16
-    torch_to_numpy_dtype_dict[torch.bfloat16] = np.dtype("bfloat16")
-except ImportError:
-    HAS_BFLOAT16 = False
-
-logger = getLogger(__name__)
-
-
-def register_default_zarr_strategies():
-    """Register default strategies related to Zarr backend."""
-    register_default_strategy(
-        StrategyAction.SAVE_SHARDED, "zarr", 1, ZarrSaveShardedStrategy("zarr", 1)
-    )
-
-
-class ZarrSaveShardedStrategy(SaveShardedStrategy):
-    """Save strategy for Zarr backend."""
-
-    def __init__(self, backend: str, version: int):
-        super().__init__(backend, version)
-        raise CheckpointingException(
-            "`zarr` distributed checkpoint backend is no longer supported. "
-            "Please switch to PyTorch Distributed format (`torch_dist`)."
-        )
-
-    def save(self, sharded_state_dict: ShardedStateDict, checkpoint_dir: Union[str, Path]):
-        if isinstance(checkpoint_dir, str):
-            checkpoint_dir = Path(checkpoint_dir)
-
-        sharded_tensors = list(nested_values(sharded_state_dict))
-        arrays = _create_or_open_zarr_arrays(sharded_tensors, checkpoint_dir)
-        for ten, arr in zip(sharded_tensors, arrays):
-            _save_to_existing_array(ten, arr)
-        torch.distributed.barrier()
-
-
-def _create_or_open_zarr_arrays(
-    sharded_tensors: List[ShardedTensor], checkpoint_dir: Path
-) -> List[Optional[zarr.Array]]:
-    """Returns list of zarr arrays corresponding to given tensors.
-
-    For a sharded tensors that:
-    a) is main replica and represents the first chunk (all offsets 0), creates the Zarr array
-    b) is main replica but not the first chunk,
-        opens the arrays created in (a) (possibly by other process)
-    c) otherwise, sets the corresponding array to None since it won't be used
-
-    Args:
-        sharded_tensors (List[ShardedTensor]): sharded tensors from a given rank
-            that will be saved to checkpoint
-        checkpoint_dir (Path): checkpoint in which the arrays will be created
-    """
-    if not HAVE_ZARR:
-        raise RuntimeError("zarr is required, please install it with `pip install zarr`")
-
-    arrays = []
-    for ten in sharded_tensors:
-        arr = _create_zarr_array(ten, checkpoint_dir) if _should_create_array(ten) else None
-        arrays.append(arr)
-
-    torch.distributed.barrier()
-    # Open arrays created above by other processes
-    for arr_idx, ten in enumerate(sharded_tensors):
-        if arrays[arr_idx] is not None:
-            # array created by this process
-            assert _should_create_array(ten), ten
-            continue
-        if not is_main_replica(ten.replica_id):
-            # this array won't be needed for saving and can stay None
-            continue
-        open_kwargs = {}
-        if ten.flattened_range is not None:
-            open_kwargs["synchronizer"] = zarr.ProcessSynchronizer(
-                str(checkpoint_dir / f"{ten.key}.sync")
-            )
-        arrays[arr_idx] = _open_zarr_array_verbose(checkpoint_dir / ten.key, "r+", **open_kwargs)
-    return arrays
-
-
-def _should_create_array(ten: ShardedTensor):
-    return (
-        is_main_replica(ten.replica_id)
-        and set(ten.global_offset) == {0}
-        and (ten.flattened_range is None or ten.flattened_range.start == 0)
-    )
-
-
-def _save_to_existing_array(sharded_tensor: ShardedTensor, arr: Optional[zarr.Array]):
-    if not is_main_replica(sharded_tensor.replica_id):
-        return
-    assert arr is not None
-    x = sharded_tensor.data
-    x = x.detach().cpu()
-    torch.cuda.synchronize()
-    if x.dtype == torch.bfloat16:
-        x = x.float()
-        x = x.numpy()
-        x = x.astype("bfloat16")
-    else:
-        x = x.numpy()
-
-    if sharded_tensor.flattened_range is None:
-        arr[sharded_tensor.global_slice()] = x
-    else:
-        arr.set_coordinate_selection(sharded_tensor.global_coordinates(), x)
-
-
-def _create_zarr_array(sharded_tensor: ShardedTensor, checkpoint_dir: Path):
-    np_dtype = torch_to_numpy_dtype_dict[sharded_tensor.dtype]
-    try:
-        arr = zarr.create(
-            sharded_tensor.global_shape,
-            dtype=np_dtype,
-            store=checkpoint_dir / sharded_tensor.key,
-            chunks=sharded_tensor.max_allowed_chunks(),
-            compressor=None,
-            fill_value=None,
-            write_empty_chunks=True,
-            synchronizer=(
-                zarr.ProcessSynchronizer(str(checkpoint_dir / f'{sharded_tensor.key}.sync'))
-                if sharded_tensor.flattened_range is not None
-                else None
-            ),
-        )
-        logger.debug(f"Created a new Zarr array at {checkpoint_dir / sharded_tensor.key}")
-    except zarr.errors.ContainsArrayError as e:
-        raise CheckpointingException(
-            f"Array {checkpoint_dir / sharded_tensor.key} already exists"
-        ) from e
-
-    if HAS_BFLOAT16 and np_dtype == np.dtype("bfloat16"):
-        arr._dtype = np_dtype
-        zarray = arr.store[".zarray"]
-        arr.store[".zarray"] = zarray.replace(b"<V2", b"bfloat16")
-    return arr
-
-
-class ZarrLoadShardedStrategy(LoadShardedStrategy):
-    """Load strategy for the Zarr backend."""
-
-    def __init__(self, backend: str, version: int):
-        super().__init__(backend, version)
-        raise CheckpointingException(
-            "`zarr` distributed checkpoint backend is no longer supported. "
-            "Please switch to PyTorch Distributed format (`torch_dist`)."
-        )
-
-    def load(self, sharded_state_dict: ShardedStateDict, checkpoint_dir: Union[str, Path]):
-        if isinstance(checkpoint_dir, str):
-            checkpoint_dir = Path(checkpoint_dir)
-
-        dict_list_map_inplace(
-            partial(_load_from_array, checkpoint_dir=checkpoint_dir), sharded_state_dict
-        )
-        return sharded_state_dict
-
-    def load_tensors_metadata(self, checkpoint_dir: Union[str, Path]):
-        def get_zarr_shape_dtype(path):
-            arr = zarr.open(path, "r")
-            return arr.shape, arr.dtype
-
-        if isinstance(checkpoint_dir, str):
-            checkpoint_dir = Path(checkpoint_dir)
-
-        return load_zarr_based_sharded_metadata(checkpoint_dir, get_zarr_shape_dtype)
-
-    def check_backend_compatibility(self, loaded_version):
-        pass  # TODO
-
-    def check_version_compatibility(self, loaded_version):
-        pass  # TODO
-
-
-def _load_from_array(sharded_tensor: ShardedTensor, checkpoint_dir: Path):
-    assert isinstance(sharded_tensor, ShardedTensor), type(sharded_tensor)
-    arr = _open_zarr_array_verbose(checkpoint_dir / sharded_tensor.key, "r")
-
-    if not sharded_tensor.allow_shape_mismatch and sharded_tensor.global_shape != arr.shape:
-        _msg = (
-            f"Global shape mismatch for loaded ({arr.shape})"
-            f" and expected ({sharded_tensor.global_shape}) tensor"
-            f" for key {sharded_tensor.key}"
-        )
-        raise CheckpointingException(_msg)
-
-    x = arr[sharded_tensor.global_slice()]  # flattened tensors loading is delayed
-    return postprocess_numpy_array(x, sharded_tensor)
-
-
-def _open_zarr_array_verbose(path: Path, mode: str, **open_kwargs):
-    try:
-        return zarr.open(str(path), mode, **open_kwargs)
-    except zarr.errors.PathNotFoundError as e:
-        ckpt_dir = path.parent
-        err_msg = f"Array {path} not found"
-        if ckpt_dir.exists():
-            ckpt_files = [f.name for f in ckpt_dir.iterdir()]
-            logger.debug(f"{err_msg}. Checkpoint directory {ckpt_dir} content: {ckpt_files}")
-        else:
-            err_msg += f". Checkpoint directory {ckpt_dir} does not exist."
-        raise CheckpointingException(err_msg) from e
-
-
-def postprocess_numpy_array(loaded_array, sharded_tensor, apply_flattened_range=True):
-    """Turn numpy array to torch tensor."""
-    x = loaded_array
-    if HAS_BFLOAT16 and x.dtype == np.dtype("bfloat16"):
-        x = x.astype(np.dtype("float32"))
-        x = torch.from_numpy(x)
-        x = x.bfloat16()
-    else:
-        x = torch.from_numpy(x)
-    # TODO: consider some other consistency checks
-    if x.shape != sharded_tensor.local_shape:
-        if sharded_tensor.allow_shape_mismatch:
-            x = pad_to_expected_shape(x, sharded_tensor)
-        else:
-            _msg = (
-                f"Local shape mismatch for loaded ({x.shape})"
-                f" and expected ({sharded_tensor.local_shape}) tensor"
-                f" for key {sharded_tensor.key}"
-            )
-            raise CheckpointingException(_msg)
-
-    if apply_flattened_range and sharded_tensor.flattened_range is not None:
-        x = flatten_range(sharded_tensor, x)
-
-    # TODO: consider cuda() tensors support
-    return x
-
-
-def flatten_range(sharded_tensor, x):
-    """Apply flattened range to a tensor."""
-    return x.flatten()[sharded_tensor.flattened_range]
-
-
-def pad_to_expected_shape(x: torch.Tensor, expected_sharded_ten: ShardedTensor):
-    """Pad tensor to the expected shape."""
-    pad_args = []
-    assert len(x.shape) == len(expected_sharded_ten.local_shape)
-    # Reversed iteration order because F.pad expects so
-    for x_sh, exp_sh, axis_fragm in reversed(
-        list(
-            zip(x.shape, expected_sharded_ten.local_shape, expected_sharded_ten.axis_fragmentations)
-        )
-    ):
-        if x_sh == exp_sh:
-            pad_args.extend((0, 0))
-        elif x_sh > exp_sh:
-            assert False, (
-                f"Expected shape ({exp_sh}) smaller than actual ({x_sh})"
-                f" for {repr(expected_sharded_ten)}"
-            )
-        else:
-            pad_args.extend((0, exp_sh - x_sh))
-    # TODO: behavior control with envvar is for testing purposes only, remove it
-    if not int(os.environ.get("DIST_CKPT_PAD_REPLICATE", 0)):
-        return torch.nn.functional.pad(x, pad_args)
-
-    # unsqueeze and squeeze to get shapes supported by cudnn
-    logger.info(f"Replicating last row for {expected_sharded_ten.key}")
-    if x.dtype == torch.bfloat16:
-        return (
-            torch.nn.functional.pad(x.float().unsqueeze(0), pad_args, mode="replicate")
-            .squeeze(0)
-            .bfloat16()
-        )
-    return torch.nn.functional.pad(x.unsqueeze(0), pad_args, mode="replicate").squeeze(0)
-
-
-def load_zarr_based_sharded_metadata(
-    checkpoint_dir: Path, get_shape_dtype_fn: Callable[[str], Tuple[Tuple[int], np.dtype]]
-) -> ShardedStateDict:
-    """Load metadata of Zarr arrays.
-
-    Args:
-        checkpoint_dir (str): checkpoint root directory
-        get_shape_dtype_fn (str -> ((int, ...), np.dtype)): a function returning
-            an array shape and dtype for a given Zarr array path
-    """
-
-    sharded_state_dict = {}
-    for subdir in checkpoint_dir.iterdir():
-        if not subdir.is_dir() or not (subdir / ".zarray").exists() or subdir.suffix == ".sync":
-            continue
-        key = subdir.name
-        arr_shape, arr_dtype = get_shape_dtype_fn(str(subdir))
-
-        sharded_state_dict[key] = ShardedTensor(
-            key,
-            None,
-            numpy_to_torch_dtype_dict[arr_dtype],
-            arr_shape,
-            arr_shape,
-            tuple(0 for _ in arr_shape),
-            tuple(1 for _ in arr_shape),
-        )
-    return sharded_state_dict
diff --git a/megatron/core/distributed/fsdp/src/README.md b/megatron/core/distributed/fsdp/src/README.md
index d51797fd51d..bc4cdaa078e 100644
--- a/megatron/core/distributed/fsdp/src/README.md
+++ b/megatron/core/distributed/fsdp/src/README.md
@@ -116,9 +116,13 @@ fully_shard(model)
 # Your model is now ready for distributed training!
 ```
 
-## `fully_shard` / `MegatronFSDP` API - Advanced Features
+### `torch.compile` Compatibility
 
-Megatron-FSDP's `fully_shard_*` API has a comprehensive set of arguments for fine-tuning your model's performance:
+Megatron-FSDP is compatible with `torch.compile`, but this feature is still experimental and may introduce performance regressions in some workloads.
+
+## 📖 Megatron-FSDP Comprehensive Walkthrough
+
+### Import `megatron_fsdp`.
 
 ```python
 import torch
@@ -126,10 +130,16 @@ from megatron_fsdp import (
     fully_shard_model,
     fully_shard_optimizer,
 )
+```
+
+### Set up a distributed environment using `DeviceMesh`.
+
+`DeviceMesh` simplifies the construction of complex arrangements of devices
+to support various parallelisms.
+
+```python
+from torch.distributed.device_mesh import DeviceMesh
 
-"""
-Megatron-FSDP DeviceMesh Distributed Environment
-"""
 # Initialize DeviceMesh.
 device_mesh = torch.distributed.device_mesh.init_device_mesh(
     "cuda",
@@ -144,20 +154,22 @@ device_mesh[("dp_shard", "cp")]._flatten("dp_shard_cp")
 # Only required if using HSDP. Otherwise, don't pass hybrid_fsdp_group.
 device_mesh[("dp_outer", "dp_shard", "cp")]._flatten("hsdp")
 hsdp_group = device_mesh["hsdp"].get_group()
+
 # Initialize DeviceMesh for expert parallel (EP) modules when using FSDP + EP.
-expert_device_mesh = torch.distributed.device_mesh.init_device_mesh(
-    "cuda",
-    mesh_shape=(expt_dp_shard_size, expt_tp_size),
-    mesh_dim_names=("dp_shard", "tp"),
+expt_device_mesh = DeviceMesh.from_group(
+    [expt_dp_group, expt_tp_group],
+    device_type="cuda",
+    mesh=expt_mesh.tolist(),
+    mesh_dim_names=["dp_shard_cp", "tp"],
 )
+```
 
-"""
-Fully-shard the model for Megatron-FSDP. This wraps the model in a MegatronFSDP
-class that schedules the sharding lifecycle of the model parameters and gradients
-during training and inference.
+### Convert models into fully-sharded `MegatronFSDP` models with `fully_shard_model`.
 
-The original `torch.nn.Module` can be accessed at `MegatronFSDP.module`.
-"""
+This wraps the model in a MegatronFSDP class that schedules the sharding
+lifecycle of the model parameters and gradients during training and inference.
+
+```python
 model = fully_shard_model(
     # PyTorch (Root) Module
     model,
@@ -192,25 +204,43 @@ model = fully_shard_model(
     # Preprocess state dict for DCP checkpointing. Required for Torch Distributed Checkpoint.
     preproc_state_dict_for_dcp_ckpt=True,
 )
+```
+
+The original `torch.nn.Module` can be accessed at `MegatronFSDP.module`.
+
+### Initialize and fully-shard your optimizer on the `MegatronFSDP` model.
 
-# Initialize your optimizer on the Megatron-FSDP model distributed Parameter(s).
-# If your optimizer has already been initialized, either use the `fully_shard`
-# entrypoint, or use `optimizer.add_param_group({"params": model.parameters()})`
-# after resetting your optimizer state via `optimizer.param_groups.clear()`
-# and `optimizer.state.clear()`.
+Initialize your optimizer on the Megatron-FSDP model distributed `Parameter`(s).
+If your optimizer has already been initialized, either use the `fully_shard`
+entrypoint, or use `optimizer.add_param_group({"params": model.parameters()})`
+after resetting your optimizer state via `optimizer.param_groups.clear()`
+and `optimizer.state.clear()`.
+
+```python
 optimizer = torch.optim.Optimizer(model.parameters())
+```
 
-"""
-Fully-shard your optimizer, which just modifies your `optimizer.step()`, `optimizer.zero_grad()`,
-and distributed optimizer parameters to punctually trigger scheduled FSDP operations for Megatron-FSDP.
+`fully_shard_optimizer` modifies your `optimizer.step()`, `optimizer.zero_grad()`,
+and distributed optimizer parameters to punctually trigger scheduled FSDP operations
+for Megatron-FSDP.
+
+```python
+fully_shard_optimizer(
+    # PyTorch Optimizer
+    optimizer,
+    # Preprocess state dict for DCP checkpointing.
+    # Required for Torch Distributed Checkpoint.
+    preproc_state_dict_for_dcp_ckpt=True,
+)
+```
 
-These operations can be customized precisely via extended arguments to `step()` and `zero_grad()`:
+Extended arguments to `step()` and `zero_grad()` control these FSDP operations:
 
+```python
     optimizer.step(
         ...,
-        # Sync all gradients before the optimizer step. Not necessary and disabled
-        # automatically when `sync_model_each_microbatch=True` in MegatronFSDP, in
-        # which case we already synchronize gradients every step but lose performance.
+        # Sync all gradients before the optimizer step. Alternatively enabled using
+        # `sync_model_each_microbatch=True` in MegatronFSDP.
         sync_grad_before_optimizer_step=True,
         # After `optimizer.step()`, install optimized weights into MegatronFSDP's buffers.
         install_optimized_model_weights=True,
@@ -221,19 +251,20 @@ These operations can be customized precisely via extended arguments to `step()`
         # Also zero out MegatronFSDP's gradient accumulation buffers.
         zero_grad_buffer=True
     )
-"""
-fully_shard_optimizer(
-    # PyTorch Optimizer
-    optimizer,
-    # Preprocess state dict for DCP checkpointing. Required for Torch Distributed Checkpoint.
-    preproc_state_dict_for_dcp_ckpt=True,
-)
+```
 
-"""
-Megatron-FSDP Model Checkpointing
-"""
+### `MegatronFSDP` Distributed Checkpointing
+
+Distributed checkpoints can be saved and loaded using Torch DCP. Alternatively,
+you can load non-distributed checkpoints before fully-sharding your model with
+any existing checkpoint utility compatible with PyTorch Modules.
+
+```python
 # Save model and optimizer state.
-torch.distributed.checkpoint.save({"model": model.state_dict(), "optimizer": optimizer.state_dict()}, checkpoint_id=str(CKPT_DIR))
+torch.distributed.checkpoint.save(
+    {"model": model.state_dict(), "optimizer": optimizer.state_dict()},
+    checkpoint_id=str(CKPT_DIR)
+)
 
 # Load model and optimizer state.
 ckpt_state_dict = {"model": model.state_dict(), "optimizer": optimizer.state_dict()}
@@ -245,6 +276,10 @@ model.load_state_dict(ckpt_state_dict["model"], strict=False)
 optimizer.load_state_dict(ckpt_state_dict["optimizer"])
 ```
 
+## ⚙️ `fully_shard` / `MegatronFSDP` API - Advanced Features
+
+Megatron-FSDP's `fully_shard_*` API has a comprehensive set of arguments for fine-tuning your model's performance.
+
 - `fsdp_unit_modules` is a list of sub-module classes or `str` import-paths associated with modules that you want `MegatronFSDP` to fully-shard.
   - Required if `1`, `2`, or `3` are specified as the sharding strategy. Defaults to `None`, in which case Megatron-FSDP will replicate the parameters similar to DDP.
 - `zero_dp_strategy` (and `outer_dp_sharding_strategy`) configure different degrees of zero-redundancy data parallelism as described in [ZeRO (Zero Redundancy Optimizer)](https://arxiv.org/abs/1910.02054). It reduces CUDA memory utilization during model training by distributing model parameters, gradients, and optimizer states across multiple devices in the DP `ProcessGroup`, and collectively communicating subsets of parameters and gradients to specific devices when needed for computation or differentiation. More aggressive sharding strategies will entail more communication overhead, with `no_shard` being the least memory efficient but most communication efficient, and `optim_grads_params` being the most memory efficient but least communication efficient. `outer_dp_sharding_strategy` has the same options, except for the (required) "outer" DP group (`dp_outer_dim` / `hybrid_fsdp_group`) when using [Hybrid-Sharded Data Parallelism (HSDP)](https://arxiv.org/pdf/2304.11277), and only `no_shard` (DP Replication) and `optim` (Optimizer State Hybrid Sharding, requires `zero_dp_strategy='optim_grads_params`) are supported.
@@ -276,20 +311,62 @@ optimizer.load_state_dict(ckpt_state_dict["optimizer"])
     - Both default to `True`.
 - `sync_model_each_microbatch` will trigger a `wait` (`MegatronFSDP.finish_grad_sync()`) on gradient reduction, parameter de-allocation, and optimizer parameter / gradient installation (in preparation for `optimizer.step()`) after every forward-backward pass. When using HSDP, parameters and gradients will be all-gathered and reduced respectively on the "outer" DP group each training step instead of each optimization cycle. This behavior is desirable for a transparent and user-friendly sharded training loop where post-backward transformations on the gradient and a clean compute / memory state are necessary between training iterations, but damages performance in situations where optimization is delayed (e.g. gradient accumulation) where the communications of the previous training iteration can be overlapped with the compute of the next training iteration. Will also override `is_last_microbatch` / `microbatch_count` logic in `MegatronFSDP`.
     - Defaults to `True` for `fully_shard`, but defaults to `False` when using the `MegatronFSDP` class directly.
-- `keep_fp8_transpose_cache_when_using_custom_fsdp` will keep the fp8 transpose cache when using `MegatronFSDP`. This option will cause (number of parameter $\times$ 1 Byte) of memory overhead, but can skip the weight transpose operation in the backward propagation. This feature will not give any benefit from the Blackwell architecture.
-    - **Only effective when using Megatron-LM.**
+- `enable_fine_grained_param_gather` modifies FSDP to all-gather parameters with per-Module granularity instead of collectively unsharding all sub-modules of a unit module in Megatron-FSDP.
+    - Defaults to `False`.
+- `keep_fp8_transpose_cache` will keep the fp8 transpose cache when using `MegatronFSDP`. This option will cause (number of parameter $\times$ 1 Byte) of memory overhead, but can skip the weight transpose operation in the backward propagation. This feature will not give any benefit from the Blackwell architecture.
     - Defaults to `False`.
 - `nccl_ub` will allocate and register the NCCL userbuffer for param and grad buffers. This option enables an SM-efficient NCCL algorithm that could improve the performance of overlapped computations. This flag will be much more effective when used together with SHARP if the FSDP communication includes both NVL and IB domains. Enabling this option will cause additional memory overhead due to the requirement to enable the `fsdp_double_buffer` option.
     - **Only effective when using with Megatron-Core.**
     - Defaults to `False`.
-    - By default we try to use NCCL window (symmetric) registration if it is available. If not it falls back to conventional local registraion.
+    - By default we try to use NCCL window (symmetric) registration if it is available. If not it falls back to conventional local registration.
 - `fsdp_manual_registration` will manually register the FSDP communication buffers with the NCCL user buffer. For symmetric registration with large models, the registration itself can take a significant amount of time. This option minimizes the number of registration calls to reduce the registration time. However, with this option enabled, you need to manually call the `ParamAndGradBuffer.manual_buffer_registration()` function after the first iteration. This is already implemented in the Megatron-LM training loop. In other use cases, users are expected to call this function themselves.
     - **Only effective when using with Megatron-Core.**
     - This option is only effective when `nccl_ub` is enabled.
     - Defaults to `False`.
-- `disable_symmetric_registration` will disable NCCL window (i.e. symmetric) registraion when using `nccl_ub`. 
-    - Dafaults to `False`.
+- `disable_symmetric_registration` will disable NCCL window (i.e. symmetric) registration when using `nccl_ub`. 
+    - Defaults to `False`.
 - `fsdp_double_buffer` will use persistently allocated double buffers for temporarily-defined memory needed in `MegatronFSDP` communications. Having persistent double buffers may increase peak VRAM utilization, but is required to register NCCL user buffers (`nccl_ub=True`) for `MegatronFSDP`. Currently, this is only supported for simple repetitive model structures such as GPT.
     - Defaults to `False`. Automatically overridden to `True` when `nccl_ub` is enabled.
 - `preproc_state_dict_for_dcp_ckpt` adds `model.state_dict()` and `optimizer.state_dict()` post-hooks that modify the model and optimizer state in preparation for `torch.distributed.checkpoint.{save,load}` ([Torch DCP](https://docs.pytorch.org/docs/stable/distributed.checkpoint.html)) checkpointing. Specifically, it adds `__create_write_items__` and `__create_chunk_list__` methods to Tensors utilized by Torch DCP to redistribute parameters when saving and loading model and optimizer checkpoints. Can be deactivated should the user need a custom distributed checkpointing strategy.
     - Defaults to `True`.
+
+## 🧮 Using Megatron-FSDP with [`TransformerEngine`](https://github.com/NVIDIA/TransformerEngine)
+
+Megatron-FSDP natively supports mixed-precision activations and parameter sharding in conjunction with [TransformerEngine](https://github.com/NVIDIA/TransformerEngine).
+
+- Within the [`transformer_engine.pytorch.autocast(recipe: transformer_engine.common.recipe.Recipe)`](https://docs.nvidia.com/deeplearning/transformer-engine/user-guide/api/pytorch.html#transformer_engine.pytorch.autocast) context, model activations are converted based on the recipe.
+- Within the [`transformer_engine.pytorch.quantized_model_init(recipe: transformer_engine.common.recipe.Recipe)`](https://docs.nvidia.com/deeplearning/transformer-engine/user-guide/api/pytorch.html#transformer_engine.pytorch.quantized_model_init) context, TransformerEngine native modules (e.g. [`transformer_engine.pytorch.TransformerLayer`](https://docs.nvidia.com/deeplearning/transformer-engine/user-guide/api/pytorch.html#transformer_engine.pytorch.TransformerLayer)) have their parameters converted based on the recipe.
+    - Requires FP8 model activations, i.e. `transformer_engine.pytorch.autocast`.
+
+```python
+# FP8 Recipe
+fp8_recipe = transformer_engine.common.recipe.MXFP8BlockScaling(
+    fp8_format=transformer_engine.common.recipe.Format.HYBRID,
+)
+
+# Construct TransformerEngine model with FP8 parameters.
+with transformer_engine.pytorch.quantized_model_init(
+    recipe=fp8_recipe,
+    # Needed for FP8 parameters with Megatron-FSDP.
+    preserve_high_precision_init_val=True,
+):
+    te_model = transformer_engine.pytorch.TransformerLayer(...)
+
+# Fully-shard the model.
+mfsdp_model = fully_shard_model(
+    module=te_model,
+    fsdp_unit_modules=[te.pytorch.TransformerLayer],
+    # Only FSDP / ZeRO-3 supports FP8 parameters.
+    zero_dp_strategy=3,
+    # Needed for FP8 parameters. (Default is already True.)
+    preserve_fp32_weights=True,
+    # Needed for select FP8 recipes.
+    keep_fp8_transpose_cache=True,
+)
+
+# Evaluate and differentiate the model with FP8 activations.
+with transformer_engine.pytorch.autocast(recipe=fp8_recipe):
+    mfsdp_model(x).sum().backward()
+```
+
+ℹ️ `TransformerEngine` kernels have a fair bit of configuration constraints when using FP8-quantized parameters, such as using fused QKV parameters or defining activations and parameters with shapes compatible to FP8 CuBLAS kernels on supported hardware from NVIDIA. To properly initialize `TransformerLayer`, you can refer to the toy model used in our FP8 unit tests: `Megatron-LM/tests/unit_tests/distributed/fsdp/test_mfsdp_fully_shard.py::TestMegatronFsdpFullyShard::test_fully_shard_te_quantized`.
\ No newline at end of file
diff --git a/megatron/core/distributed/fsdp/src/megatron_fsdp/fully_shard.py b/megatron/core/distributed/fsdp/src/megatron_fsdp/fully_shard.py
index c3e50e769bf..df210f15f05 100644
--- a/megatron/core/distributed/fsdp/src/megatron_fsdp/fully_shard.py
+++ b/megatron/core/distributed/fsdp/src/megatron_fsdp/fully_shard.py
@@ -97,6 +97,7 @@ def fully_shard_model(
     nccl_ub: bool = False,
     fsdp_double_buffer: bool = False,
     disable_symmetric_registration: bool = False,
+    enable_fine_grained_param_gather: bool = False,
 ) -> torch.nn.Module:
     """
     Fully-shard the model for Megatron-FSDP. This wraps the model in a MegatronFSDP
@@ -232,6 +233,13 @@ class that schedules the sharding lifecycle of the model parameters and gradient
         disable_symmetric_registration (bool):
             Whether to disable symmetric (window) registration for NCCL UB registration.
             This option forces conventional (local) UB registration when nccl_ub is set.
+            Defaults to False.
+
+        enable_fine_grained_param_gather (bool):
+            Whether to enable "fine-grained" param all-gather, which can improve performance
+            when using MXFP8 parameters with activation recomputation. Specifically, it
+            unshards parameters per-Module instead of unsharding all sub-modules of an FSDP
+            unit module simultaneously. Defaults to False.
 
     Returns:
         model (MegatronFSDP): The wrapped Megatron-FSDP model configured for FSDP.
@@ -241,14 +249,17 @@ class that schedules the sharding lifecycle of the model parameters and gradient
     if device_mesh is None:
         if dp_shard_dim is None:
             dp_shard_dim = "fsdp"
+        if tp_dim is None:
+            # Trivial TP dimension to seamlessly support TransformerEngine.
+            tp_dim = "tp"
         # Deactivate DP-Outer, which needs to be consistent with Expert DeviceMesh.
         dp_outer_dim = None
         hybrid_fsdp_group = None
         outer_dp_sharding_strategy = ShardingStrategy.NO_SHARD
         device_mesh = init_device_mesh(
             device_type="cuda",
-            mesh_shape=(torch.distributed.get_world_size(),),
-            mesh_dim_names=(dp_shard_dim,),
+            mesh_shape=(torch.distributed.get_world_size(), 1),
+            mesh_dim_names=(dp_shard_dim, tp_dim),
         )
 
     # Parse zero_dp_strategy and outer_dp_sharding_strategy.
@@ -293,7 +304,7 @@ class that schedules the sharding lifecycle of the model parameters and gradient
     if _outer_fsdp_sharding and zero_dp_strategy != "optim_grads_params":
         # If sharding on outer DP using HSDP, then we must use HSDP buffers and
         # we must be fully-sharding on inner DP. HSDP is an extension of FSDP.
-        # FIXME(@shjwudp, @cspades): This is an unexpected lack of support.
+        # TODO(@shjwudp, @cspades): Requires various modifications to support.
         raise ValueError(
             f"Sharding with Hybrid (Fully) Sharded Data Parallel (HSDP) requires "
             "zero_dp_strategy to use FSDP ('optim_grads_params', 3), because "
@@ -358,6 +369,7 @@ class that schedules the sharding lifecycle of the model parameters and gradient
         calculate_per_token_loss=calculate_per_token_loss,
         init_model_with_meta_device=init_model_with_meta_device,
         sync_model_each_microbatch=sync_model_each_microbatch,
+        enable_fine_grained_param_gather_hook=enable_fine_grained_param_gather,
     )
 
     # Register a state dict post-hook to add Torch DCP metadata for writing checkpoints.
@@ -529,6 +541,7 @@ def fully_shard(
     nccl_ub: bool = False,
     fsdp_double_buffer: bool = False,
     disable_symmetric_registration: bool = False,
+    enable_fine_grained_param_gather: bool = False,
 ) -> tuple[MegatronFSDP, torch.optim.Optimizer]:
     """
     Fully shard the model and the optimizer for Megatron-FSDP.
@@ -575,6 +588,7 @@ def fully_shard(
         nccl_ub=nccl_ub,
         fsdp_double_buffer=fsdp_double_buffer,
         disable_symmetric_registration=disable_symmetric_registration,
+        enable_fine_grained_param_gather=enable_fine_grained_param_gather,
     )
 
     # Extend optimizer methods to support Megatron-FSDP operations.
diff --git a/megatron/core/distributed/fsdp/src/megatron_fsdp/megatron_fsdp.py b/megatron/core/distributed/fsdp/src/megatron_fsdp/megatron_fsdp.py
index e2cbccf4356..c1c11721f7e 100644
--- a/megatron/core/distributed/fsdp/src/megatron_fsdp/megatron_fsdp.py
+++ b/megatron/core/distributed/fsdp/src/megatron_fsdp/megatron_fsdp.py
@@ -139,6 +139,9 @@ class MegatronFSDP(torch.nn.Module):
         disable_symmetric_registration (bool): Whether to disable symmetric (window) registration
             for NCCL userbuffer registration. This option will force to use conventional (local)
             userbuffer registration when nccl_ub is set.
+        enable_fine_grained_param_gather (bool): Whether to enable "fine-grained" param all-gather,
+            which can improve performance when using MXFP8 parameters with activation recomputation.
+
     Examples:
         >>> model = GPTModel(config)
         >>> model = MegatronFSDP(
@@ -541,6 +544,7 @@ def _grad_acc(param):
                     param.main_grad = param.get_main_grad()
                     if param.grad is not None:
                         # Copy the gradient into the allocated main gradient bucket.
+                        # It will be reduce-scattered and accumulated into gbuf.
                         param.main_grad.copy_(to_local_if_dtensor(param.grad))
                         del param.grad
                     else:
@@ -550,6 +554,7 @@ def _grad_acc(param):
                 if not param.grad_added_to_main_grad:
                     if param.grad is not None:
                         # Add the gradient into the allocated main gradient bucket.
+                        # For unsharded gradients, this is gradient accumulation.
                         param.main_grad = param.get_main_grad()
                         param.main_grad.add_(to_local_if_dtensor(param.grad))
                         del param.grad
@@ -611,6 +616,7 @@ def _post_backward(module, *unused):
                     ),
                 )
 
+        @torch.compiler.disable
         def _pre_forward_param_unshard(
             module: nn.Module, args: Tuple[Any, ...], kwargs: Dict[str, Any]
         ):
@@ -642,6 +648,7 @@ def _pre_forward_param_unshard(
             )
             return args, kwargs
 
+        @torch.compiler.disable
         def _register_post_backward_hook(
             post_backward_hook: callable,
             module: nn.Module,
@@ -652,9 +659,8 @@ def _register_post_backward_hook(
             Pre-forward hook utilized to attach a gradient reduction post-backward
             hook to the module.
             """
-            # Register the backward function to reduce gradients after the backward pass.
-            # And for optim_grads_params, we need to release the parameters after the backward pass.
             if not torch.is_grad_enabled():
+                # No gradients / backward pass, don't attach the post-backward hook.
                 return args, kwargs
 
             # Preprocess the input arguments.
@@ -673,10 +679,10 @@ def _register_post_backward_hook(
 
             """
             Bootstrapped identity autograd function that attaches a post-backward
-            "hook" to the module to trigger model resharding / deallocation and
-            gradient reduce-scatter immediately after the module backward pass has
-            completed to deallocate this layer's model and gradient memory before
-            the subsequent backward pass.
+            "hook" to the module to trigger model compute parameter deallocation
+            and gradient reduce-scatter immediately after the module backward pass
+            has completed to shard this layer's model and gradient memory after
+            the current backward pass stage is complete.
             """
             inp_tensors = RegisterFSDPBackwardFunction.apply(
                 functools.partial(post_backward_hook, module), *inp_tensors
@@ -733,14 +739,13 @@ def _root_post_backward(*unused):
             if self.model_auto_sync:
                 self.finish_grad_sync()
 
+        @torch.compiler.disable
         def _pre_backward_param_unshard(module: nn.Module, *unused):
             """
             Sub-module pre-backward hook to all-gather the module parameters
             before the backward pass.
             """
-            # Set the module's training state to PRE_BACKWARD to skip resharding
-            # and unsharding operations when performing activation recomputation
-            # / gradient checkpointing.
+            # Set the module's training state to PRE_BACKWARD.
             module._training_state = TrainingState.PRE_BACKWARD
 
             if isinstance(module, tuple(fsdp_unit_modules)):
@@ -759,12 +764,13 @@ def _pre_backward_param_unshard(module: nn.Module, *unused):
         self._root_pre_backward_hook_issued = False
 
         def _root_pre_backward(module: nn.Module, *unused):
-            """Marks the module's training state as 'pre_backward' before the
+            """Marks the module's training state as PRE_BACKWARD before the
             backprop, this function is registered on the root module.
 
-            This marking enables us to determine whether forward pass needs to
-            perform reshard/unshard operations in activation recomputation
-            scenarios.
+            This root pre-backward hook informs all modules to skip forward
+            pre-fetching in the pre-forward hooks (for activation recomputation)
+            and skip weight deallocation / resharding in the post-forward hooks
+            during the backward pass, which are instead performed by backward hooks.
             """
             if self._root_pre_backward_hook_issued:
                 return
@@ -773,7 +779,7 @@ def _root_pre_backward(module: nn.Module, *unused):
             if self.ddp_config.data_parallel_sharding_strategy == "optim_grads_params":
                 for module in root_module.modules():
                     if isinstance(module, tuple(fsdp_unit_modules)):
-                        # Set PRE_BACKWARD state to skip resharding and unsharding operations
+                        # Set PRE_BACKWARD state to skip resharding and forward pre-fetching
                         # when performing activation recomputation / gradient checkpointing.
                         module._training_state = TrainingState.PRE_BACKWARD
                 # set all param buckets can be released
@@ -796,6 +802,7 @@ def _root_pre_backward(module: nn.Module, *unused):
             # the backward pass.
             torch.autograd.Variable._execution_engine.queue_callback(_root_post_backward)
 
+        @torch.compiler.disable
         def _post_forward(module: nn.Module, input: Any, output: Any):
             # When composed with module-hook-based activation recomputation, the
             # post-backward hook is responsible for resharding the module parameters
@@ -815,6 +822,7 @@ def _post_forward(module: nn.Module, input: Any, output: Any):
 
             return output
 
+        @torch.compiler.disable
         def _release_module_fp8_transpose_cache(module: nn.Module, *unused):
             release_params_fp8_transpose_cache(module.parameters(recurse=False))
 
@@ -824,6 +832,7 @@ def create_custom_backward_hook(module, custom_backward_handler):
             to the output tensor(s) of a module during a post-forward hook.
             """
 
+            @torch.compiler.disable
             def forward_hook(_module, inputs, output):
                 # Replace the output to avoid the output tensor being the same as
                 # the input tensor, which makes it impossible to identify which
@@ -934,10 +943,7 @@ def _register_grad_acc_and_reduce_hook(module):
             if len(list(module.parameters())) != len(list(root_module.parameters())):
                 # Only attach to root sub-module.
                 continue
-            # Add a pre-backward hook to reshard / deallocate model parameters prior
-            # to the backward pass.
-            # Furthermore, add a gradient-triggered post-backward hook to reduce-scatter
-            # leftover gradients.
+            # Install the root pre-backward hook.
             self.backward_pre_hooks[f"{name} _root_pre_backward"] = create_custom_backward_hook(
                 module, _root_pre_backward
             )
diff --git a/megatron/core/distributed/fsdp/src/megatron_fsdp/mixed_precision.py b/megatron/core/distributed/fsdp/src/megatron_fsdp/mixed_precision.py
index d7156bea5c6..d2797d98079 100644
--- a/megatron/core/distributed/fsdp/src/megatron_fsdp/mixed_precision.py
+++ b/megatron/core/distributed/fsdp/src/megatron_fsdp/mixed_precision.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import logging
+from contextlib import nullcontext
 from importlib.metadata import version
 from typing import List, Optional, Tuple
 
@@ -43,6 +44,19 @@
 except:
     TE_VERSION = None
 
+# Detect the quantized_model_init or fp8_model_init context manager.
+if HAVE_TE:
+    try:
+        from transformer_engine.pytorch import quantized_model_init
+
+        QUANTIZED_MODEL_INIT_CLASS = quantized_model_init
+    except:
+        # Fallback to original FP8 model init.
+        from transformer_engine.pytorch import fp8_model_init
+
+        QUANTIZED_MODEL_INIT_CLASS = fp8_model_init
+else:
+    QUANTIZED_MODEL_INIT_CLASS = nullcontext
 # Detect the FP8 tensor class
 try:
     from transformer_engine.pytorch.tensor import QuantizedTensor
@@ -332,3 +346,15 @@ def _fp8_quantize_fallback(
             packed_amaxes, op=torch.distributed.ReduceOp.MAX, group=data_parallel_group
         )
         _multi_tensor_copy_this_to_that(packed_amax_views, amaxes, dummy_overflow_buf)
+
+
+def get_quantized_model_init_context_cls():
+    """
+    Get the TransformerEngine model parameter quantization context manager.
+    """
+    if QUANTIZED_MODEL_INIT_CLASS is nullcontext:
+        logger.warning(
+            f"quantized_model_init / fp8_model_init context was requested but does not exist. "
+            f"Verify TransformerEngine is installed (TE_INSTALLED={HAVE_TE})."
+        )
+    return QUANTIZED_MODEL_INIT_CLASS
diff --git a/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py b/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py
index 04ea09970f4..0865ff8e647 100644
--- a/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py
+++ b/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py
@@ -39,6 +39,7 @@
     fp8_need_transpose_data_for_meta_device_init,
     fp8_quantize,
     fp8_set_raw_data,
+    get_quantized_model_init_context_cls,
     is_blockwise_float8tensor,
     is_float8tensor,
     is_te_min_version,
@@ -74,7 +75,6 @@
     logger.info("Megatron Core is not installed, Megatron-FSDP will run without Megatron Core.")
 
 try:
-    from transformer_engine.pytorch import fp8_model_init
     from transformer_engine.pytorch.module.base import TransformerEngineBaseModule
 
     HAVE_TE = True
@@ -2641,7 +2641,12 @@ def num_buckets(self):
 
     @torch.no_grad()
     def copy_main_weights_to_model_weights(self):
-        """Update the model weights from the main weights."""
+        """
+        Update the model weights from the main weights.
+
+        If FP8 parameters are utilized, this function will quantize the high-precision
+        main weights prior to installation into the model compute weight buffers.
+        """
         dense_param_quantize_kwargs = {
             "model_params": [],
             "main_params": [],
@@ -2737,6 +2742,12 @@ def _batch_quantize_blockwise_fp8_params(
                     model_param = to_local_if_dtensor(param)
                     main_weight = mbuf.get_item(item_id)
 
+                # TODO(@kunlunl, @cspades): Currently, we only support FP8 parameters
+                # for FSDP, i.e. fully-sharded compute parameters with a high-precision
+                # main weight buffer. Would it be possible to add if branches here to
+                # quantize the original param (no_shard) or wbuf data (optim, optim_grads)
+                # for a seamless user experience and coverage for ZeRO-1 and ZeRO-2?
+
                 if is_blockwise_float8tensor(param):
                     fp8_params.append(param)
                     if model_param.numel() == 0:
@@ -2768,6 +2779,7 @@ def _batch_quantize_blockwise_fp8_params(
                 if is_float8tensor(param):
                     fp8_params.append(param)
                     if model_param.numel() == 0:
+                        # Empty parameter.
                         shard_fp32_from_fp8.append(None)
                         shard_offsets_in_fp8.append(None)
                         shard_model_params.append([None, None])
@@ -3164,7 +3176,7 @@ def _bucket_group_gradient_reduce(
                     # Scale gradients.
                     scaling_factor = gbuf.gradient_scaling_factor
                     reduce_op = gradient_reduce_preprocessing(
-                        gbuf.data, scaling_factor, gbuf.ddp_config
+                        bucket.data, scaling_factor, gbuf.ddp_config
                     )
                     if not gbuf.is_data_distributed:
                         # All-reduce the gradients on every rank. No scattering
@@ -3731,11 +3743,26 @@ def __init__(self, init_param_with_fp8=False, with_cuda_rng_tracker=False):
     def __enter__(self):
         self.stack = ExitStack()
         if self.init_param_with_fp8:
-            assert HAVE_TE
-            args = {"enabled": True}
-            if "preserve_high_precision_init_val" in inspect.signature(fp8_model_init).parameters:
-                args["preserve_high_precision_init_val"] = True
-            self.stack.enter_context(fp8_model_init(**args))
+            # FIXME(@cspades): This appears to be a legacy dependency that is not needed for
+            # more recent versions of TransformerEngine, which only requires this context during
+            # TransformerEngineBaseModule.__init__. Should be removed if backwards compatibility
+            # is confirmed, because overwrites the quantized_model_init context specified by user.
+            assert (
+                HAVE_TE
+            ), "TransformerEngine is required for using FP8 parameters with Megatron-FSDP."
+            # Retrieve import for quantized_model_init (new) or fp8_model_init (old).
+            # Will be nullcontext if TE is not installed.
+            te_quantized_model_init_cls = get_quantized_model_init_context_cls()
+            if te_quantized_model_init_cls is not nullcontext:
+                # Enable TE quantized parameter context manager.
+                args = {"enabled": True}
+                if (
+                    "preserve_high_precision_init_val"
+                    in inspect.signature(te_quantized_model_init_cls).parameters
+                ):
+                    # Required for Megatron-FSDP + FP8 parameters.
+                    args["preserve_high_precision_init_val"] = True
+                self.stack.enter_context(te_quantized_model_init_cls(**args))
 
         if self.with_cuda_rng_tracker:
             # Megatron / TE RNG tracker needs to be initialized and seeded by the user or FW
diff --git a/megatron/core/distributed/param_and_grad_buffer.py b/megatron/core/distributed/param_and_grad_buffer.py
index d34fdebaf75..50cf3e0ea37 100644
--- a/megatron/core/distributed/param_and_grad_buffer.py
+++ b/megatron/core/distributed/param_and_grad_buffer.py
@@ -142,9 +142,7 @@ def __init__(
             self.data_parallel_group = collective_group
 
         # State for bookkeeping: params is the set of parameters this bucket group is
-        # responsible for, params_with_grad is the set of parameters with grads
-        # available. When overlap_grad_reduce is True, communication (all-reduce
-        # or reduce-scatter) is issued when params_with_grad equals params.
+        # responsible for, param_to_bucket maps params to the corresponding bucket.
         self.param_to_bucket = {}
         self.params = set()
         for bucket in self.buckets:
@@ -165,7 +163,22 @@ def __init__(
         if self.ddp_config.reduce_scatter_with_fp32_accumulation:
             dist_reduce_scatter_func = reduce_scatter_with_fp32_accumulation
 
-        self.reset()
+        # per_param_grad_ready_counts is a dict mapping parameters to number of times
+        # `register_grad_ready` is called for that parameter *when
+        # self.is_last_microbatch is True*. Should be 1 for most params but could be greater
+        # than 1 if control flow passes through the same parameter multiple times. We lazily
+        # populate this in the first batch, hence the .is_first_batch attribute.
+        # When overlap_grad_reduce is True, communication (all-reduce or reduce-scatter)
+        # is issued when per_param_grad_ready_counts equals golden_per_param_grad_ready_counts.
+        # In other words, communication is dispatched as soon as all gradients in this bucket
+        # are *ready*, as marked by the backward hook.
+        # The set of keys in per_param_grad_ready_counts should be equal to `params`.
+        self.golden_per_param_grad_ready_counts = {}
+        self.per_param_grad_ready_counts = {}
+        self.is_last_microbatch = True
+        self.is_first_batch = True
+
+        # Other metadata to keep track of collectives.
         self.param_gather_handle = None
         self.param_gather_dispatched = False
         self.grad_reduce_handle = None
@@ -182,7 +195,12 @@ def reset(self):
         """
         Reset metadata in bucket group in preparation for the next iteration of training.
         """
-        self.params_with_grad = set()
+        if self.is_first_batch and len(self.per_param_grad_ready_counts) > 0:
+            # Record golden per_param_grad_ready_counts.
+            assert len(self.per_param_grad_ready_counts) == len(self.params)
+            self.golden_per_param_grad_ready_counts = self.per_param_grad_ready_counts
+            self.is_first_batch = False
+        self.per_param_grad_ready_counts = {}
         self.is_last_microbatch = True
 
     def check_grads(self, check_for_nan_or_inf, check_for_large):
@@ -346,6 +364,11 @@ def start_grad_sync(self):
         communication call. When ddp_config.overlap_grad_reduce is set to False, makes
         synchronous call.
         """
+        if self.is_first_batch and self.grad_reduce_handle is not None:
+            # Make this start_grad_sync call a no-op if in first batch and collective has
+            # already been dispatched.
+            return
+
         assert (
             self.grad_reduce_handle is None
         ), "Should not have multiple communication calls outstanding at once"
@@ -485,6 +508,11 @@ def finish_grad_sync(self):
         if not self.ddp_config.overlap_grad_reduce:
             self.start_grad_sync()
             return
+        # If first batch, start asynchronous communication here. register_grad_ready() launches
+        # asynchronous communication only once self.golden_per_param_grad_ready_counts is
+        # populated at the end of this first batch.
+        if self.is_first_batch:
+            self.start_grad_sync()
         # When using multiple DistOpt instances, we don't need to sync here as we launch
         # communications on a separate communication stream.
         if self.ddp_config.num_distributed_optimizer_instances > 1:
@@ -492,7 +520,8 @@ def finish_grad_sync(self):
             return
         assert self.grad_reduce_handle is not None, (
             f"Communication call has not been issued for this bucket "
-            f"({len(self.params_with_grad)}/{len(self.params)} params have grad available)"
+            f"({len(self.per_param_grad_ready_counts)}/{len(self.params)} "
+            "params have grad available)"
         )
         self.grad_reduce_handle.wait()
         self.grad_reduce_handle = None
@@ -510,11 +539,14 @@ def register_grad_ready(self, param: torch.nn.Parameter):
         ), "register_grad_ready() should only be called when overlap_grad_reduce is True"
         if self.is_last_microbatch:
             assert param in self.param_to_bucket, "Param is not in the bucket group"
-            assert param not in self.params_with_grad, "Cannot set grad twice"
-            self.params_with_grad.add(param)
+            if param not in self.per_param_grad_ready_counts:
+                self.per_param_grad_ready_counts[param] = 0
+            self.per_param_grad_ready_counts[param] += 1
             # If all params in bucket group have grads available, issue communication call.
-            if len(self.params_with_grad) == len(self.params):
-                self.start_grad_sync()
+            if not self.is_first_batch:
+                if self.per_param_grad_ready_counts == self.golden_per_param_grad_ready_counts:
+                    assert len(self.per_param_grad_ready_counts) == len(self.params)
+                    self.start_grad_sync()
 
 
 class _ParamAndGradBuffer:
diff --git a/megatron/core/extensions/kitchen.py b/megatron/core/extensions/kitchen.py
index 998d864614f..ad9be01fb60 100644
--- a/megatron/core/extensions/kitchen.py
+++ b/megatron/core/extensions/kitchen.py
@@ -1431,9 +1431,9 @@ def forward(
         query: Tensor,
         key: Tensor,
         value: Tensor,
-        attention_mask: Tensor,
-        attn_mask_type: AttnMaskType = None,
-        attention_bias: Tensor = None,
+        attention_mask: Optional[Tensor],
+        attn_mask_type: Optional[AttnMaskType] = None,
+        attention_bias: Optional[Tensor] = None,
         packed_seq_params: Optional[PackedSeqParams] = None,
     ):
         """Forward."""
@@ -1581,11 +1581,11 @@ def forward(
         query: Tensor,
         key: Tensor,
         value: Tensor,
-        attention_mask: Tensor,
-        attn_mask_type: AttnMaskType = None,
-        attention_bias: Tensor = None,
+        attention_mask: Optional[Tensor],
+        attn_mask_type: Optional[AttnMaskType] = None,
+        attention_bias: Optional[Tensor] = None,
         packed_seq_params: Optional[PackedSeqParams] = None,
-    ):
+    ) -> Tensor:
         """Forward."""
         assert self.init_finished, "Must call finish_init before forward."
         assert packed_seq_params is None, (
@@ -1725,7 +1725,7 @@ def __init__(
         self.use_kitchen_attention = use_kitchen_attention
         self.kitchen_attention_backend = kitchen_attention_backend
 
-    def column_parallel_linear(self) -> type:
+    def column_parallel_linear(self) -> type[KitchenColumnParallelLinear]:
         """Which column parallel linear module kitchen backend uses"""
         return KitchenColumnParallelLinear
 
@@ -1744,7 +1744,7 @@ def fuse_layernorm_and_linear(self) -> bool:
         # explicitly about whether to include a norm.
         return self.fallback.fuse_layernorm_and_linear()
 
-    def column_parallel_layer_norm_linear(self) -> Optional[type]:
+    def column_parallel_layer_norm_linear(self) -> type[KitchenLayerNormColumnParallelLinear]:
         """Which module for sequential layernorm and linear"""
         return KitchenLayerNormColumnParallelLinear
 
@@ -1752,7 +1752,9 @@ def layer_norm(self, rms_norm: bool = False, for_qk: bool = False) -> type:
         """Which module to use for layer norm"""
         return self.fallback.layer_norm(rms_norm=rms_norm, for_qk=for_qk)
 
-    def core_attention(self) -> type:
+    def core_attention(
+        self,
+    ) -> type[KitchenDotProductAttention] | type[KitchenFlashAttention] | type:
         """Which module to use for attention"""
         if not self.use_kitchen_attention:
             log_single_rank(
diff --git a/megatron/core/extensions/transformer_engine.py b/megatron/core/extensions/transformer_engine.py
index d823e42b0bc..ef8527e9e5e 100644
--- a/megatron/core/extensions/transformer_engine.py
+++ b/megatron/core/extensions/transformer_engine.py
@@ -8,7 +8,7 @@
 import pickle
 import warnings
 from contextlib import nullcontext
-from typing import Any, Callable, Dict, List, Optional, Set, Tuple
+from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Set, Tuple
 
 import torch
 import torch.nn.functional as F
@@ -64,10 +64,17 @@
 
     HAVE_TE = True
 except ImportError:
-    from unittest.mock import MagicMock
+    if TYPE_CHECKING:
+        # For type checking, treat transformer_engine as always available.
+        import transformer_engine as te
+        from transformer_engine.pytorch.fp8 import FP8GlobalStateManager, fp8_autocast
 
-    te = MagicMock()
-    HAVE_TE = False
+        HAVE_TE = True
+    else:
+        from unittest.mock import MagicMock
+
+        te = MagicMock()
+        HAVE_TE = False
 
 _TE_CONFIG_TYPE_KEY = "transformer_engine_config_type"
 
@@ -719,6 +726,7 @@ def __init__(
         skip_weight_param_allocation: bool = False,
         tp_comm_buffer_name: Optional[str] = None,
         tp_group: Optional[torch.distributed.ProcessGroup] = None,
+        stride: int = 1,
     ):
         if not HAVE_TE:
             raise ImportError(
@@ -810,6 +818,8 @@ def __init__(
             ), "Must have at least TE version 2.3 or higher to use symmetric memory all reduce"
             extra_kwargs["symmetric_ar_type"] = self.config.symmetric_ar_type
 
+        self.stride = stride
+
         super().__init__(
             in_features=input_size,
             out_features=output_size,
@@ -835,6 +845,11 @@ def __init__(
         )
         self.te_quant_params: Optional[TEQuantizationParams] = None
 
+        # Set proper partition_stride
+        setattr(self.weight, 'partition_stride', stride)
+        if bias and hasattr(self, 'bias') and self.bias is not None:
+            setattr(self.bias, 'partition_stride', stride)
+
         if config.use_cpu_initialization:
             output_size_per_partition = divide(output_size, self.tp_size)
             _ = _initialize_affine_weight_cpu(
@@ -844,7 +859,7 @@ def __init__(
                 output_size_per_partition,
                 0,
                 init_method=condition_init_method(config, init_method),
-                stride=1,
+                stride=stride,
                 return_master_weight=False,
                 rank=self.tp_rank,
                 world_size=self.tp_size,
@@ -854,7 +869,7 @@ def __init__(
                 self.bias = Parameter(
                     torch.empty(output_size_per_partition, dtype=config.params_dtype)
                 )
-                set_tensor_model_parallel_attributes(self.bias, True, 0, 1)
+                set_tensor_model_parallel_attributes(self.bias, True, 0, stride)
                 with torch.no_grad():
                     self.bias.zero_()
                 setattr(self.bias, "allreduce", True)
@@ -934,6 +949,7 @@ def __init__(
         skip_weight_param_allocation: bool = False,
         tp_comm_buffer_name: Optional[str] = None,
         tp_group: Optional[torch.distributed.ProcessGroup] = None,
+        stride: int = 1,
     ):
         if not HAVE_TE:
             raise ImportError(
@@ -947,6 +963,7 @@ def __init__(
         self._tp_group = tp_group
         world_size = get_pg_size(tp_group)
         rank = get_pg_rank(tp_group)
+        self.stride = stride
 
         super().__init__(
             input_size=input_size,
@@ -967,6 +984,11 @@ def __init__(
             tp_group=tp_group,
         )
 
+        # Set proper partition_stride
+        setattr(self.weight, 'partition_stride', stride)
+        if bias and hasattr(self, 'bias') and self.bias is not None:
+            setattr(self.bias, 'partition_stride', stride)
+
         if config.use_cpu_initialization:
             output_size_per_partition = divide(output_size, world_size)
             _ = _initialize_affine_weight_cpu(
@@ -976,7 +998,7 @@ def __init__(
                 output_size_per_partition,
                 0,
                 init_method=condition_init_method(config, init_method),
-                stride=1,
+                stride=stride,
                 return_master_weight=False,
                 rank=rank,
                 world_size=world_size,
@@ -986,7 +1008,7 @@ def __init__(
                 self.bias = Parameter(
                     torch.empty(output_size_per_partition, dtype=config.params_dtype)
                 )
-                set_tensor_model_parallel_attributes(self.bias, True, 0, 1)
+                set_tensor_model_parallel_attributes(self.bias, True, 0, stride)
                 with torch.no_grad():
                     self.bias.zero_()
                 setattr(self.bias, "allreduce", True)
@@ -1137,8 +1159,8 @@ def __init__(
         k_channels: Optional[int] = None,
         v_channels: Optional[int] = None,
         num_splits: Optional[int] = None,
-        cp_comm_type: str = "p2p",
-        pg_collection: ProcessGroupCollection = None,
+        cp_comm_type: Optional[str] = "p2p",
+        pg_collection: Optional[ProcessGroupCollection] = None,
     ):
         if not HAVE_TE:
             raise ImportError(
@@ -1313,12 +1335,12 @@ def forward(
         query: Tensor,
         key: Tensor,
         value: Tensor,
-        attention_mask: Tensor,
+        attention_mask: Optional[Tensor],
         attn_mask_type: AttnMaskType,
-        attention_bias: Tensor = None,
-        packed_seq_params: PackedSeqParams = None,
+        attention_bias: Optional[Tensor] = None,
+        packed_seq_params: Optional[PackedSeqParams] = None,
         num_splits: Optional[int] = None,
-    ):
+    ) -> torch.Tensor:
         """Forward."""
         if packed_seq_params is not None:
             # If Dynamic CP group is provided, update TE DPA CP group
diff --git a/megatron/core/hyper_comm_grid.py b/megatron/core/hyper_comm_grid.py
index 379bca69f74..401d4a1c927 100644
--- a/megatron/core/hyper_comm_grid.py
+++ b/megatron/core/hyper_comm_grid.py
@@ -177,6 +177,22 @@ def get_pg(self, dims: Union[str, list[str]]) -> dist.ProcessGroup:
 
         return self._pgs[unique_group_key]
 
+    def get_rank_enum(self, dims: Union[str, list[str]]) -> list[list[int]]:
+        r"""Get the rank enumeration for the requested dimension(s).
+
+        This is the exact enumeration that would be used by create_pg for the same
+        dims. It is useful for creating additional groups whose membership is derived from
+        the grid (e.g., embedding/position-embedding groups derived from PP groups).
+
+        Args:
+            dims: Dimension name or list of dimension names.
+
+        Returns:
+            List of rank lists (one per subgroup).
+        """
+        ordered_dims, _ = self._order_dims(dims)
+        return self._gen_rank_enum(ordered_dims)
+
     def _gen_rank_enum(self, dims: list[str]) -> list[list[int]]:
         r"""Generate rank enumeration before calling new_subgroups_by_enumeration
 
diff --git a/megatron/core/inference/batch_dimensions_utils.py b/megatron/core/inference/batch_dimensions_utils.py
index 41f00b1f162..e6ea32a6df8 100644
--- a/megatron/core/inference/batch_dimensions_utils.py
+++ b/megatron/core/inference/batch_dimensions_utils.py
@@ -14,7 +14,7 @@
 
 import torch
 
-from megatron.core import parallel_state
+from megatron.core.utils import get_pg_size
 
 
 @dataclass(order=True, frozen=True)
@@ -25,6 +25,7 @@ class InferenceBatchDimensions:
         token_count : number of total input tokens
         prefill_req_count : number of prefill requests
         decode_req_count : number of decode requests
+        has_explicit_chunked_prefill_req : whether the batch has an explicit chunked prefill request
 
     The batch dimensions are ordered by token_count, then by prefill_req_count,
     then by decode_req_count.
@@ -34,6 +35,7 @@ class InferenceBatchDimensions:
     token_count: int = 0
     prefill_req_count: int = 0
     decode_req_count: int = 0
+    has_explicit_chunked_prefill_req: bool = False
 
     def __str__(self):
         """
@@ -53,6 +55,9 @@ def is_applicable_for_batch_dim(
         for prefill or decode requests. Otherwise, prefill slots
         can only be used for prefill requests.
         """
+        if real_batch_dim.has_explicit_chunked_prefill_req != self.has_explicit_chunked_prefill_req:
+            return False
+
         if real_batch_dim.prefill_req_count == 0:
             return (
                 self.token_count >= real_batch_dim.token_count
@@ -99,6 +104,10 @@ def is_valid(self, max_requests: int, max_sequence_length: int) -> bool:
         if self.token_count > self.prefill_req_count * max_sequence_length + self.decode_req_count:
             return False
 
+        # Check if there is an invalid chunked prefill request.
+        if self.prefill_req_count == 0 and self.has_explicit_chunked_prefill_req:
+            return False
+
         return True
 
     def __hash__(self):
@@ -106,7 +115,14 @@ def __hash__(self):
         Returns a hash of the batch dimension.
         In cuda graph quick matching, the batch dimension is used as a key in a dictionary.
         """
-        return hash((self.token_count, self.prefill_req_count, self.decode_req_count))
+        return hash(
+            (
+                self.token_count,
+                self.prefill_req_count,
+                self.decode_req_count,
+                self.has_explicit_chunked_prefill_req,
+            )
+        )
 
     def __eq__(self, other: "InferenceBatchDimensions") -> bool:
         """
@@ -114,10 +130,16 @@ def __eq__(self, other: "InferenceBatchDimensions") -> bool:
         """
         if other is None:
             return False
-        return (self.token_count, self.prefill_req_count, self.decode_req_count) == (
+        return (
+            self.token_count,
+            self.prefill_req_count,
+            self.decode_req_count,
+            self.has_explicit_chunked_prefill_req,
+        ) == (
             other.token_count,
             other.prefill_req_count,
             other.decode_req_count,
+            other.has_explicit_chunked_prefill_req,
         )
 
     @property
@@ -129,41 +151,68 @@ def req_count(self) -> int:
 
     @staticmethod
     def adjust_batch_dims_for_expert_parallelism(
-        local_batch_dims, decode_only_cuda_graphs: bool
-    ) -> "InferenceBatchDimensions":
+        local_batch_dims,
+        strict: bool,
+        decode_only_cuda_graphs: bool,
+        ep_group: Optional[torch.distributed.ProcessGroup] = None,
+    ) -> Optional["InferenceBatchDimensions"]:
         """Adjusted cuda graph batch dimensions for expert parallelism.
             We take the max token count across expert model parallel group.
+
+        Args:
+            local_batch_dims: The local batch dimensions to adjust.
+            strict: Whether to use strict matching for batch dimensions.
+            decode_only_cuda_graphs: Whether CUDA graphs are only used for decode steps.
+            ep_group: Optional expert parallel process group. If None, uses global parallel state.
+                      When using different EP sizes for inference vs training, pass the
+                      inference EP group explicitly.
+
         Return:
             (InferenceBatchDimensions) A new InferenceBatchDimensions object with
-            adjusted dimensions.
+            adjusted dimensions, or None if eager mode should be used.
         """
-
-        ep_size = parallel_state.get_expert_model_parallel_world_size()
+        ep_size = get_pg_size(ep_group)
         if ep_size <= 1:
             return local_batch_dims
-
-        expert_model_parallel_group = parallel_state.get_expert_model_parallel_group()
         # all reduce local work across expert model parallel group
 
+        has_explicit_chunked_prefill_req = local_batch_dims.has_explicit_chunked_prefill_req
         is_non_decode = local_batch_dims.prefill_req_count > 0
-
         sync_tensor = torch.tensor(
-            [local_batch_dims.token_count, int(is_non_decode)],
+            [
+                local_batch_dims.token_count,
+                int(is_non_decode),
+                int(has_explicit_chunked_prefill_req),
+            ],
             dtype=torch.int32,
             device=torch.cuda.current_device(),
         )
-        torch.distributed.all_reduce(
-            sync_tensor, op=torch.distributed.ReduceOp.MAX, group=expert_model_parallel_group
-        )
+
+        torch.distributed.all_reduce(sync_tensor, op=torch.distributed.ReduceOp.MAX, group=ep_group)
+
         sync_tensor = sync_tensor.cpu()
         is_any_ep_rank_in_non_decode = sync_tensor[1].item() == 1
-        if decode_only_cuda_graphs and is_any_ep_rank_in_non_decode:
+        any_ep_rank_has_explicit_chunked_prefill_req = sync_tensor[2].item() == 1
+
+        # We force eager mode for scenarios where some ranks will run with CUDA graphs
+        # while others will not. Without this check, the all-to-all communication in the
+        # expert routing layer would pad up to the maximum capacity only for the ranks that
+        # are using CUDA graphs in this step, leading to a NCCL hang.
+        # This can happen in the following cases:
+        #   1. If we only allow decode CUDA graphs but some ranks are running non-decode batches
+        #   2. Some ranks are running explicit chunked prefill requests
+        #       (graphs are not recorded for batches with explicit chunked prefill requests)
+        if (
+            decode_only_cuda_graphs and is_any_ep_rank_in_non_decode
+        ) or any_ep_rank_has_explicit_chunked_prefill_req:
             return None  # indicate no match, run in eager mode
 
+        assert not has_explicit_chunked_prefill_req
         adjusted_batch_dim = InferenceBatchDimensions(
             token_count=int(sync_tensor[0].item()),
             prefill_req_count=local_batch_dims.prefill_req_count,
             decode_req_count=local_batch_dims.decode_req_count,
+            has_explicit_chunked_prefill_req=False,
         )
         return adjusted_batch_dim
 
@@ -400,6 +449,7 @@ def match_graph_config(
         cuda_graph_batch_dimensions_list: List[InferenceBatchDimensions],
         strict: bool = False,
         decode_only_cuda_graphs: bool = False,
+        ep_group: Optional[torch.distributed.ProcessGroup] = None,
     ) -> Optional[InferenceBatchDimensions]:
         """
         Matches the best CUDA graph batch dimension for the given real batch dimension.
@@ -412,6 +462,9 @@ def match_graph_config(
             decode_only_cuda_graphs: Used by expert parallel matching. If this is true,
             and one of the EP ranks is running a non-decode step, we elect to run in
             eager mode instead of matching a decode-only cuda graph.
+            ep_group: Optional expert parallel process group. If None, uses global parallel state.
+                      When using different EP sizes for inference vs training, pass the
+                      inference EP group explicitly.
         Returns:
             The best matching CUDA graph batch dimension, or None if no applicable match is found
         """
@@ -421,7 +474,10 @@ def match_graph_config(
             return None
 
         adjusted_batch_dim = InferenceBatchDimensions.adjust_batch_dims_for_expert_parallelism(
-            real_batch_dim, decode_only_cuda_graphs
+            real_batch_dim,
+            strict=strict,
+            decode_only_cuda_graphs=decode_only_cuda_graphs,
+            ep_group=ep_group,
         )
 
         if adjusted_batch_dim is None:
diff --git a/megatron/core/inference/communication/torch_symm_triton/__init__.py b/megatron/core/inference/communication/torch_symm_triton/__init__.py
index 17e42a67768..ca58663d9ec 100644
--- a/megatron/core/inference/communication/torch_symm_triton/__init__.py
+++ b/megatron/core/inference/communication/torch_symm_triton/__init__.py
@@ -1,3 +1,4 @@
 # Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 from .collectives import multimem_all_gather, multimem_reduce_scatter
+from .fused_collectives import fused_multimem_rs_add_norm_ag
diff --git a/megatron/core/inference/communication/torch_symm_triton/fused_collectives.py b/megatron/core/inference/communication/torch_symm_triton/fused_collectives.py
new file mode 100644
index 00000000000..875a8ff8d96
--- /dev/null
+++ b/megatron/core/inference/communication/torch_symm_triton/fused_collectives.py
@@ -0,0 +1,280 @@
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+import torch
+
+from .barrier import symm_mem_sync
+from .multimem_asm import add_v8_bf16_from_u32, asm_rsqrt, ld_128, st_128
+from .utils import sync_threads
+
+try:
+    import triton
+    import triton.language as tl
+except ImportError:
+    from unittest.mock import MagicMock
+
+    from megatron.core.utils import null_decorator
+
+    triton = MagicMock()
+    tl = MagicMock()
+    triton.jit = null_decorator
+
+
+@triton.jit
+def unpack_bf16x2(x, mask):
+    """
+    Unpack x, which is in bf16x2 packed format stored in uint32,
+    into two float32 tensors representing the high and low bf16 values.
+
+    Args:
+        x: tl.uint32 tensor containing packed bf16x2 values.
+        mask: boolean mask tensor, 1 denotes that x is valid.
+    Returns:
+        x_hi: float32 tensor containing the high bf16 values.
+        x_lo: float32 tensor containing the low bf16 values.
+    """
+    x = x * mask
+    x_hi = (x >> 16).cast(tl.uint16).cast(tl.bfloat16, bitcast=True).cast(tl.float32)
+    x_lo = x.cast(tl.uint16).cast(tl.bfloat16, bitcast=True).cast(tl.float32)
+    return x_hi, x_lo
+
+
+@triton.jit
+def sum_sq(x, y, z, w, mask):
+    """
+    First computes the squared sum of 8 bf16 values
+    packed in x, y, z, w. Then does an SM-wide
+    reduction to get the total sqaured sum.
+    Args:
+        x, y, z, w: tl.uint32 tensors containing packed bf16x2 values.
+        mask: boolean mask tensor, 1 denotes that x,y,z,w are valid.
+    Returns:
+        sq_sum: float32 scalar, the total squared sum.
+    """
+    x_hi, x_lo = unpack_bf16x2(x, mask)
+    y_hi, y_lo = unpack_bf16x2(y, mask)
+    z_hi, z_lo = unpack_bf16x2(z, mask)
+    w_hi, w_lo = unpack_bf16x2(w, mask)
+    # thread local sum
+    sq_sum = (
+        x_hi * x_hi
+        + x_lo * x_lo
+        + y_hi * y_hi
+        + y_lo * y_lo
+        + z_hi * z_hi
+        + z_lo * z_lo
+        + w_hi * w_hi
+        + w_lo * w_lo
+    )
+    # sm-wide reduction
+    sq_sum = tl.sum(sq_sum)
+    return sq_sum
+
+
+@triton.jit
+def apply_norm(x, y, z, w, wx, wy, wz, ww, rrms, mask):
+    """
+    Apply RMS norm to the input bf16x2 tensors x,y,z,w using
+    the rms norm weights wx,wy,wz,ww and the reciprocal
+    root mean square rrms.
+    """
+    # todo: try converting to pure ASM code
+    x_hi, x_lo = unpack_bf16x2(x, mask)
+    y_hi, y_lo = unpack_bf16x2(y, mask)
+    z_hi, z_lo = unpack_bf16x2(z, mask)
+    w_hi, w_lo = unpack_bf16x2(w, mask)
+    wx_hi, wx_lo = unpack_bf16x2(wx, mask)
+    wy_hi, wy_lo = unpack_bf16x2(wy, mask)
+    wz_hi, wz_lo = unpack_bf16x2(wz, mask)
+    ww_hi, ww_lo = unpack_bf16x2(ww, mask)
+
+    x_hi = (x_hi * rrms * wx_hi).cast(tl.bfloat16).cast(tl.uint16, bitcast=True).cast(
+        tl.uint32
+    ) << 16
+    x_lo = (x_lo * rrms * wx_lo).cast(tl.bfloat16).cast(tl.uint16, bitcast=True).cast(tl.uint32)
+    y_hi = (y_hi * rrms * wy_hi).cast(tl.bfloat16).cast(tl.uint16, bitcast=True).cast(
+        tl.uint32
+    ) << 16
+    y_lo = (y_lo * rrms * wy_lo).cast(tl.bfloat16).cast(tl.uint16, bitcast=True).cast(tl.uint32)
+    z_hi = (z_hi * rrms * wz_hi).cast(tl.bfloat16).cast(tl.uint16, bitcast=True).cast(
+        tl.uint32
+    ) << 16
+    z_lo = (z_lo * rrms * wz_lo).cast(tl.bfloat16).cast(tl.uint16, bitcast=True).cast(tl.uint32)
+    w_hi = (w_hi * rrms * ww_hi).cast(tl.bfloat16).cast(tl.uint16, bitcast=True).cast(
+        tl.uint32
+    ) << 16
+    w_lo = (w_lo * rrms * ww_lo).cast(tl.bfloat16).cast(tl.uint16, bitcast=True).cast(tl.uint32)
+    # pack back to bf16x2, to be used by nvls multicast store.
+    x = x_hi | x_lo
+    y = y_hi | y_lo
+    z = z_hi | z_lo
+    w = w_hi | w_lo
+    return x, y, z, w
+
+
+@triton.jit
+def _multimem_reduce_scatter_residual_add_kernel(
+    residual_output_ptr,
+    residual_input_ptr,
+    rms_norm_weights_ptr,
+    multicast_ptr,  # points to symmetric memory buffer
+    signal_pad_ptrs,
+    num_tokens,
+    eps,
+    HIDDEN_SIZE: tl.constexpr,
+    BLOCK_SIZE: tl.constexpr,
+    NUMEL_PER_THREAD: tl.constexpr,
+    RANK: tl.constexpr,
+    WORLD_SIZE: tl.constexpr,
+):
+    symm_mem_sync(
+        signal_pad_ptrs,
+        None,
+        RANK,
+        WORLD_SIZE,
+        hasPreviousMemAccess=False,
+        hasSubsequentMemAccess=False,
+    )
+    sync_threads()
+
+    pid = tl.program_id(axis=0)
+    tid = tl.arange(0, BLOCK_SIZE)
+
+    tokens_per_rank = tl.cdiv(num_tokens, WORLD_SIZE)
+    numel_per_token = tl.cdiv(HIDDEN_SIZE, NUMEL_PER_THREAD)
+    numel_per_rank = tokens_per_rank * numel_per_token
+
+    # each program handles 1 token at a time
+    program_offset = pid * numel_per_token
+    thread_mask = tid < numel_per_token
+
+    for token_offset in range(pid, tokens_per_rank, tl.num_programs(axis=0)):
+        # Step 1: - reduce-scatter + residual add for this token + collect sq sum
+        program_offset = token_offset * numel_per_token
+        sq_sum_ = 0.0
+        for thread_offset in range(0, numel_per_token, BLOCK_SIZE):
+            offsets = program_offset + thread_offset + tid
+            mask = (offsets < numel_per_rank) & (thread_mask)
+            multicast_ptrs = (
+                multicast_ptr.to(tl.pointer_type(tl.uint64)) + (RANK * numel_per_rank + offsets) * 2
+            )
+            res_out_ptrs = residual_output_ptr.to(tl.pointer_type(tl.uint64)) + offsets * 2
+            res_in_ptrs = residual_input_ptr.to(tl.pointer_type(tl.uint64)) + offsets * 2
+            # reduce-scatter
+            (x, y, z, w) = ld_128(multicast_ptrs, mask=mask, multicast_op=True)
+            # load residual
+            (rx, ry, rz, rw) = ld_128(res_in_ptrs, mask=mask, multicast_op=False)
+            # add residual
+            (x, y, z, w) = add_v8_bf16_from_u32(x, y, z, w, rx, ry, rz, rw)
+            # store residual
+            st_128(res_out_ptrs, x, y, z, w, mask=mask, multicast_op=False)
+            # update squared sum for computing the norm later
+            sq_sum_ += sum_sq(x, y, z, w, mask=mask)
+
+        # sum_sq is now the sum of squares for this token
+        # it is a SM-wide reduction, so no need to sync_threads()
+        mean_sq = sq_sum_ / HIDDEN_SIZE
+        rrms = asm_rsqrt(mean_sq, eps)
+
+        # Step 2 - apply-rms-norm + all-gather
+        for thread_offset in range(0, numel_per_token, BLOCK_SIZE):
+            offsets = program_offset + thread_offset + tid
+            # first offset is a token offset
+            # second offset is a hidden-dim offset (in units of 128-bit)
+            mask = (offsets < numel_per_rank) & (thread_mask)
+
+            multicast_ptrs = (
+                multicast_ptr.to(tl.pointer_type(tl.uint64)) + (RANK * numel_per_rank + offsets) * 2
+            )
+            res_out_ptrs = residual_output_ptr.to(tl.pointer_type(tl.uint64)) + offsets * 2
+
+            rms_norm_weights_ptrs = (
+                rms_norm_weights_ptr.to(tl.pointer_type(tl.uint64)) + (thread_offset + tid) * 2
+            )
+
+            (rx, ry, rz, rw) = ld_128(res_out_ptrs, mask=mask, multicast_op=False)
+            (wx, wy, wz, ww) = ld_128(rms_norm_weights_ptrs, mask=mask, multicast_op=False)
+            (nx, ny, nz, nw) = apply_norm(rx, ry, rz, rw, wx, wy, wz, ww, rrms, mask)
+            st_128(multicast_ptrs, nx, ny, nz, nw, mask=mask, multicast_op=True)
+
+    sync_threads()
+    symm_mem_sync(
+        signal_pad_ptrs,
+        None,
+        RANK,
+        WORLD_SIZE,
+        hasPreviousMemAccess=True,
+        hasSubsequentMemAccess=True,
+    )
+
+
+def fused_multimem_rs_add_norm_ag(
+    residual_output_tensor: torch.Tensor,
+    input_tensor: torch.Tensor,
+    symm_mem_hdl,
+    residual_input_tensor: torch.Tensor,
+    rms_norm_weights: torch.Tensor,
+    eps: float,
+) -> torch.Tensor:
+    """
+    Calls a multicast reduce-scatter + residual add + rms norm + all-gather
+    triton kernel. Writes out the output of the residual add to residual_output_tensor.
+    The output of the full kernel is written in-place to the symmetric memory buffer.
+    input_tensor must be a symmetric memory buffer.
+    Args:
+        residual_output_tensor: torch.Tensor to write the output of the residual add.
+        input_tensor: torch.Tensor, symmetric memory buffer to read the input from.
+        symm_mem_hdl: _SymmetricMemory handle for the symmetric memory buffer.
+        residual_input_tensor: torch.Tensor, the residual input to be added.
+        rms_norm_weights: torch.Tensor, the weights for rms norm.
+        eps: float, epsilon value for rms norm.
+    Returns:
+        residual_output_tensor: torch.Tensor, the output of the full fused operation.
+    """
+    WARP_SIZE = 32
+    MAX_NUM_BLOCKS = 128
+    MAX_BLOCK_SIZE = 1024
+    BYTES_PER_THREAD = 16
+
+    assert input_tensor.dtype == torch.bfloat16, "Only bfloat16 is supported for now."
+    assert residual_output_tensor.dtype == torch.bfloat16, "Only bfloat16 is supported for now."
+    assert residual_input_tensor.dtype == torch.bfloat16, "Only bfloat16 is supported for now."
+
+    # this evaluates to 128 for bf16.
+    # each thread will process 128 bits (8 bf16 values) at a time.
+    numel_per_thread = BYTES_PER_THREAD // residual_input_tensor.element_size()
+
+    assert (
+        input_tensor.numel() % numel_per_thread == 0
+    ), "The number of elements must be 128-bit aligned."
+
+    num_threads = triton.cdiv(input_tensor.numel() // numel_per_thread, symm_mem_hdl.world_size)
+
+    if num_threads < MAX_BLOCK_SIZE:
+        block_size = 1
+        while block_size < num_threads:
+            block_size *= 2
+        num_warps = block_size // WARP_SIZE
+        num_blocks = 1
+    else:
+        block_size = MAX_BLOCK_SIZE
+        num_warps = MAX_BLOCK_SIZE // WARP_SIZE
+        num_blocks = min(triton.cdiv(num_threads, MAX_BLOCK_SIZE), MAX_NUM_BLOCKS)
+
+    hsize = input_tensor.size(-1)
+    _multimem_reduce_scatter_residual_add_kernel[(num_blocks, 1, 1)](
+        residual_output_tensor.data_ptr(),
+        residual_input_tensor.data_ptr(),
+        rms_norm_weights.data_ptr(),
+        symm_mem_hdl.multicast_ptr,
+        symm_mem_hdl.signal_pad_ptrs_dev,
+        input_tensor.numel() // hsize,
+        eps=eps,
+        HIDDEN_SIZE=hsize,
+        BLOCK_SIZE=block_size,
+        NUMEL_PER_THREAD=numel_per_thread,
+        RANK=symm_mem_hdl.rank,
+        WORLD_SIZE=symm_mem_hdl.world_size,
+        num_warps=num_warps,
+    )
+
+    return residual_output_tensor
diff --git a/megatron/core/inference/communication/torch_symm_triton/multimem_asm.py b/megatron/core/inference/communication/torch_symm_triton/multimem_asm.py
index cf85ce57f61..774c3f6d2bf 100644
--- a/megatron/core/inference/communication/torch_symm_triton/multimem_asm.py
+++ b/megatron/core/inference/communication/torch_symm_triton/multimem_asm.py
@@ -157,3 +157,57 @@ def st_128(ptr, x, y, z, w, mask, multicast_op):
             is_pure=False,
             pack=1,
         )
+
+
+@triton.jit
+def add_v8_bf16_from_u32(
+    a0,
+    a1,
+    a2,
+    a3,  # First vector of 8 bf16s, packed in 4 uint32s
+    b0,
+    b1,
+    b2,
+    b3,  # Second vector of 8 bf16s, packed in 4 uint32s
+):
+    """
+    Adds two vectors of 8 bfloat16 numbers.
+    Each vector is passed as four tl.uint32 tensors.
+    Returns the result as a tuple of four tl.uint32 tensors.
+    """
+    return tl.inline_asm_elementwise(
+        """
+        {
+            add.bf16x2 $0, $4, $8;
+            add.bf16x2 $1, $5, $9;
+            add.bf16x2 $2, $6, $10;
+            add.bf16x2 $3, $7, $11;
+        }
+        """,
+        # 8 outputs (=r), 8 inputs (r)
+        "=r,=r,=r,=r,r,r,r,r,r,r,r,r",
+        args=[a0, a1, a2, a3, b0, b1, b2, b3],
+        dtype=(tl.uint32, tl.uint32, tl.uint32, tl.uint32),
+        is_pure=True,
+        pack=1,
+    )
+
+
+@triton.jit
+def asm_rsqrt(x, eps):
+    """
+    Computes the reciprocal square root of a float32 number using inline assembly.
+    """
+    return tl.inline_asm_elementwise(
+        """
+        {
+            add.f32 $1, $1, $2;
+            rsqrt.approx.f32 $0, $1;
+        }
+        """,
+        "=f, f, f",
+        args=[x, eps],
+        dtype=(tl.float32),
+        is_pure=True,
+        pack=1,
+    )
diff --git a/megatron/core/inference/contexts/attention_context/mamba_metadata.py b/megatron/core/inference/contexts/attention_context/mamba_metadata.py
index ecb0296559f..6cf45aeb9e1 100644
--- a/megatron/core/inference/contexts/attention_context/mamba_metadata.py
+++ b/megatron/core/inference/contexts/attention_context/mamba_metadata.py
@@ -5,10 +5,18 @@
 
 import torch
 
+from megatron.core.inference.batch_dimensions_utils import InferenceBatchDimensions
+
 
 @dataclass
 class MambaInferenceStateConfig:
-    """Config for initializing Mamba model inference state tensors."""
+    """
+    Config for initializing Mamba model inference state tensors.
+
+    Note that we maintain separate metadata for decode, regular prefill, and
+    chunked prefill requests because the Mamba kernels do not yet support mixing
+    these. Once the kernels have been updated we can simplify this code.
+    """
 
     layer_type_list: List[str]
     """
@@ -26,7 +34,7 @@ class MambaInferenceStateConfig:
 class MambaMetadata:
     """Manages the metadata tensors required for Mamba layers during inference."""
 
-    def __init__(self, max_requests: int):
+    def __init__(self, max_requests: int, max_tokens: int):
         """
         Initializes the Mamba slot allocator.
 
@@ -34,15 +42,50 @@ def __init__(self, max_requests: int):
             max_requests (int): The maximum number of concurrent requests.
         """
         self.max_requests = max_requests
+        self.max_tokens = max_tokens
+        self.device = torch.cuda.current_device()
 
-        # Metadata for mapping requests to slots in the static Mamba state buffer
+        # Map from requests to slots in the static Mamba state buffer
         self.request_to_mamba_state_idx = torch.full(
             (self.max_requests,), -1, dtype=torch.int32, device=torch.cuda.current_device()
         )
 
-        # Separate mapping used only for CUDA graph compatibility
-        self.request_to_mamba_state_idx_cudagraph_only = torch.full(
-            (self.max_requests,), -1, dtype=torch.int32, device=torch.cuda.current_device()
+        # Map from requests to slots in the static Mamba state buffer for active decode requests
+        self._batch_indices_decode_buffer = torch.full(
+            (self.max_requests,), -1, dtype=torch.int32, device=self.device
+        )
+
+        # Map from requests to slots in the static Mamba state buffer for active prefill requests
+        self._batch_indices_prefill_buffer = torch.full(
+            (self.max_requests,), -1, dtype=torch.int32, device=self.device
+        )
+
+        # Map from the active chunked prefill request to its slot in the static Mamba state buffer
+        self._batch_indices_chunked_prefill_buffer = torch.full(
+            (1,), -1, dtype=torch.int32, device=self.device
+        )
+
+        # Map from token id to request id for active prefill requests
+        self._seq_idx_buffer = torch.full(
+            (1, self.max_tokens), -1, dtype=torch.int32, device=self.device
+        )
+
+        # Cumulative sequence lengths for active prefill requests
+        self._cu_seqlens_buffer = torch.zeros(
+            (self.max_requests + 1,), dtype=torch.int32, device=self.device
+        )
+
+        # Tuple of (active decode request count, active prefill request count)
+        self._device_decode_prefill_buffer = torch.zeros(
+            (2,), dtype=torch.int32, device=self.device
+        )
+
+        # Tuple of (
+        #   total prefill sequence length excluding chunked prefill,
+        #   chunked prefill sequence length
+        # )
+        self._device_chunked_prefill_buffer = torch.zeros(
+            (2,), dtype=torch.int32, device=self.device
         )
 
         # Allocator for Mamba state slots
@@ -56,7 +99,8 @@ def reset(self) -> None:
         Resets all Mamba states and frees all allocated slots.
         """
         self.request_to_mamba_state_idx.fill_(-1)
-        self.request_to_mamba_state_idx_cudagraph_only.fill_(-1)
+
+        self.reset_varlen_metadata()
 
         # Re-initialize the free slot pool
         self.mamba_state_free_slots = torch.arange(
@@ -64,14 +108,23 @@ def reset(self) -> None:
         )
         self.mamba_state_free_slot_count = self.max_requests
 
-    def reset_cudagraph_mapping(self) -> None:
-        """
-        Resets only the CUDA graph mapping tensor.
-        """
-        self.request_to_mamba_state_idx_cudagraph_only.fill_(-1)
+    def reset_varlen_metadata(self) -> None:
+        """Resets varlen metadata."""
+        self.batch_indices_decode = None
+        self.batch_indices_prefill = None
+        self.batch_indices_chunked_prefill = None
+        self.cu_seqlens = None
+        self.seq_idx = None
+        self.device_decode_prefill = None
+        self.device_chunked_prefill = None
 
-    def update_cudagraph_mapping(
-        self, active_mamba_indices: torch.Tensor, num_active_requests: int
+    def update(
+        self,
+        active_mamba_indices: torch.Tensor,
+        token_to_request_idx: torch.Tensor,
+        cu_seqlens: torch.Tensor,
+        batch_dimensions: InferenceBatchDimensions,
+        padded_batch_dimensions: InferenceBatchDimensions,
     ) -> None:
         """
         Updates the dedicated CUDA graph mapping tensor with the indices
@@ -82,7 +135,104 @@ def update_cudagraph_mapping(
                                            for active requests.
             num_active_requests (int): The number of active requests.
         """
-        self.request_to_mamba_state_idx_cudagraph_only[0:num_active_requests] = active_mamba_indices
+        real_decode_count = batch_dimensions.decode_req_count
+        real_prefill_count = batch_dimensions.prefill_req_count
+        real_token_count = batch_dimensions.token_count
+        has_explicit_chunked_prefill_req = batch_dimensions.has_explicit_chunked_prefill_req
+
+        padded_decode_count = padded_batch_dimensions.decode_req_count
+        padded_prefill_count = padded_batch_dimensions.prefill_req_count
+        padded_token_count = padded_batch_dimensions.token_count
+        assert (
+            has_explicit_chunked_prefill_req
+            == padded_batch_dimensions.has_explicit_chunked_prefill_req
+        )
+
+        if padded_decode_count > 0:
+            # Update decode indices
+            self._batch_indices_decode_buffer[:real_decode_count].copy_(
+                active_mamba_indices[:real_decode_count]
+            )
+            if padded_decode_count > real_decode_count:
+                self._batch_indices_decode_buffer[real_decode_count:padded_decode_count] = -1
+            self.batch_indices_decode = self._batch_indices_decode_buffer[:padded_decode_count]
+
+        # Determine if we have a chunked prefill request and adjust counts for regular prefill
+        regular_prefill_count = real_prefill_count
+        if has_explicit_chunked_prefill_req:
+            # The last prefill request is the chunked one
+            regular_prefill_count -= 1
+            chunked_req_idx = real_decode_count + regular_prefill_count
+
+            # Update chunked prefill indices
+            self._batch_indices_chunked_prefill_buffer[0] = active_mamba_indices[chunked_req_idx]
+            self.batch_indices_chunked_prefill = self._batch_indices_chunked_prefill_buffer
+        else:
+            self.batch_indices_chunked_prefill = None
+
+        if padded_prefill_count > 0:
+            # Update prefill indices (excluding chunked prefill from regular prefill buffer)
+            if regular_prefill_count > 0:
+                self._batch_indices_prefill_buffer[:regular_prefill_count].copy_(
+                    active_mamba_indices[
+                        real_decode_count : real_decode_count + regular_prefill_count
+                    ]
+                )
+
+            if padded_prefill_count > regular_prefill_count:
+                self._batch_indices_prefill_buffer[regular_prefill_count:padded_prefill_count] = -1
+
+            self.batch_indices_prefill = self._batch_indices_prefill_buffer[:padded_prefill_count]
+
+            # Update seq_idx
+            end_regular_prefill_token_idx = cu_seqlens[real_decode_count + regular_prefill_count]
+
+            # The length of tokens belonging to regular prefill requests (excluding decode tokens)
+            seq_len = end_regular_prefill_token_idx - real_decode_count
+
+            if seq_len > 0:
+                self._seq_idx_buffer[:, :seq_len].copy_(
+                    token_to_request_idx[real_decode_count:end_regular_prefill_token_idx]
+                    - real_decode_count
+                )
+
+            if padded_token_count > seq_len:
+                self._seq_idx_buffer[:, seq_len:padded_token_count] = -1
+            self.seq_idx = self._seq_idx_buffer[:, :padded_token_count]
+
+            # Update cu_seqlens
+            self._cu_seqlens_buffer[0] = 0
+            if regular_prefill_count > 0:
+                self._cu_seqlens_buffer[1 : regular_prefill_count + 1].copy_(
+                    cu_seqlens[
+                        real_decode_count + 1 : real_decode_count + regular_prefill_count + 1
+                    ]
+                    - real_decode_count
+                )
+
+            # Pad the rest with the last value (effectively length 0 segments)
+            last_val = self._cu_seqlens_buffer[regular_prefill_count]
+            self._cu_seqlens_buffer[regular_prefill_count + 1 : padded_prefill_count + 1].fill_(
+                last_val
+            )
+            self.cu_seqlens = self._cu_seqlens_buffer[: padded_prefill_count + 1]
+
+        if padded_decode_count > 0 and padded_prefill_count > 0:
+            self._device_decode_prefill_buffer[0] = real_decode_count
+            self._device_decode_prefill_buffer[1] = regular_prefill_count
+            self.device_decode_prefill = self._device_decode_prefill_buffer
+
+        # If using chunked prefill for this batch, store the number of regular prefill tokens
+        # and the number of tokens in the chunked prefill request
+        if has_explicit_chunked_prefill_req:
+            chunked_prefill_token_count = (
+                cu_seqlens[real_decode_count + real_prefill_count]
+                - cu_seqlens[real_decode_count + real_prefill_count - 1]
+            )
+            assert self.cu_seqlens is not None
+            self._device_chunked_prefill_buffer[0] = self.cu_seqlens[regular_prefill_count]
+            self._device_chunked_prefill_buffer[1] = chunked_prefill_token_count
+            self.device_chunked_prefill = self._device_chunked_prefill_buffer
 
     def allocate_slot(self) -> Optional[int]:
         """
diff --git a/megatron/core/inference/contexts/attention_context/triton/tensor_ops.py b/megatron/core/inference/contexts/attention_context/triton/tensor_ops.py
new file mode 100644
index 00000000000..2f3210488f5
--- /dev/null
+++ b/megatron/core/inference/contexts/attention_context/triton/tensor_ops.py
@@ -0,0 +1,462 @@
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+from typing import Optional
+
+import torch
+import triton  # type: ignore
+import triton.language as tl  # type: ignore
+
+
+@triton.jit
+def _tensor_get_slice_after_kernel(
+    INPUT_TENSOR,
+    OUTPUT_TENSOR,
+    POS_ON_DEVICE,
+    INPUT_BATCH_SIZE: tl.constexpr,
+    OUTPUT_BATCH_SIZE: tl.constexpr,
+    ROW_SIZE: tl.constexpr,
+    BLOCK_SIZE: tl.constexpr,
+):
+    """Kernel to copy rows from INPUT_TENSOR[pos_on_device:] into OUTPUT_TENSOR."""
+
+    pid = tl.program_id(0)
+    pos_on_device = tl.load(POS_ON_DEVICE)
+    copy_size = INPUT_BATCH_SIZE - pos_on_device
+
+    if pid < copy_size and pid < OUTPUT_BATCH_SIZE:
+        input_idx = pos_on_device + pid
+
+        if input_idx < INPUT_BATCH_SIZE:
+            row_offsets = tl.arange(0, BLOCK_SIZE)
+            row_mask = row_offsets < ROW_SIZE
+
+            input_ptr = INPUT_TENSOR + input_idx * ROW_SIZE + row_offsets
+            output_ptr = OUTPUT_TENSOR + pid * ROW_SIZE + row_offsets
+
+            input_data = tl.load(input_ptr, mask=row_mask, other=0.0)
+            tl.store(output_ptr, input_data, mask=row_mask)
+
+
+@triton.jit
+def _tensor_merge_kernel(
+    TENSOR_A,
+    TENSOR_B,
+    OUTPUT_TENSOR,
+    POS_ON_DEVICE,
+    TENSOR_B_BATCH_SIZE: tl.constexpr,
+    ROW_SIZE: tl.constexpr,
+    BLOCK_SIZE: tl.constexpr,
+    OUTPUT_BATCH_SIZE: tl.constexpr,
+    IS_INPLACE: tl.constexpr,
+):
+    """
+    Kernel to merge rows from tensor_a and tensor_b into output_tensor.
+
+    - output[:pos_on_device] = tensor_a[:pos_on_device]
+    - output[pos_on_device:pos_on_device + tensor_b_batch] = tensor_b[:tensor_b_batch]
+    """
+
+    pid = tl.program_id(0)
+    pos_on_device = tl.load(POS_ON_DEVICE)
+
+    if pid < pos_on_device:
+        if not IS_INPLACE:
+            row_offsets = tl.arange(0, BLOCK_SIZE)
+            row_mask = row_offsets < ROW_SIZE
+
+            tensor_a_ptr = TENSOR_A + pid * ROW_SIZE + row_offsets
+            output_ptr = OUTPUT_TENSOR + pid * ROW_SIZE + row_offsets
+
+            tensor_a_data = tl.load(tensor_a_ptr, mask=row_mask, other=0.0)
+            tl.store(output_ptr, tensor_a_data, mask=row_mask)
+
+    elif pid < pos_on_device + TENSOR_B_BATCH_SIZE and pid < OUTPUT_BATCH_SIZE:
+        tensor_b_idx = pid - pos_on_device
+
+        if tensor_b_idx < TENSOR_B_BATCH_SIZE:
+            row_offsets = tl.arange(0, BLOCK_SIZE)
+            row_mask = row_offsets < ROW_SIZE
+
+            tensor_b_ptr = TENSOR_B + tensor_b_idx * ROW_SIZE + row_offsets
+            output_ptr = OUTPUT_TENSOR + pid * ROW_SIZE + row_offsets
+
+            tensor_b_data = tl.load(tensor_b_ptr, mask=row_mask, other=0.0)
+            tl.store(output_ptr, tensor_b_data, mask=row_mask)
+
+
+@triton.jit
+def _tensor_masked_update_kernel_2d(
+    STATES_PTR,
+    IDX_PTR,
+    NEW_STATES_PTR,
+    stride_state_b,
+    stride_state_d0,
+    stride_new_b,
+    stride_new_d0,
+    ROW_SIZE,
+    BLOCK_SIZE: tl.constexpr,
+):
+    """Kernel to update values in a 2D states tensor using a mask."""
+    pid_batch = tl.program_id(0).to(tl.int64)
+    pid_row_chunk = tl.program_id(1).to(tl.int64)
+
+    target_idx = tl.load(IDX_PTR + pid_batch)
+    if target_idx == -1:
+        return
+
+    row_start_offset = pid_row_chunk * BLOCK_SIZE
+    row_offsets = row_start_offset + tl.arange(0, BLOCK_SIZE)
+    mask = row_offsets < ROW_SIZE
+
+    # 2D Calculation: base + batch * stride0 + col * stride1
+    dst_ptr = (
+        STATES_PTR
+        + (target_idx.to(tl.int64) * stride_state_b)
+        + (row_offsets.to(tl.int64) * stride_state_d0)
+    )
+    src_ptr = (
+        NEW_STATES_PTR
+        + (pid_batch * stride_new_b.to(tl.int64))
+        + (row_offsets.to(tl.int64) * stride_new_d0)
+    )
+
+    val = tl.load(src_ptr, mask=mask)
+    tl.store(dst_ptr, val, mask=mask)
+
+
+@triton.jit
+def _tensor_masked_update_kernel_3d(
+    STATES_PTR,
+    IDX_PTR,
+    NEW_STATES_PTR,
+    stride_state_b,
+    stride_state_d0,
+    stride_state_d1,
+    stride_new_b,
+    stride_new_d0,
+    stride_new_d1,
+    SIZE_D0,
+    SIZE_D1,  # Dimensions of the non-batch axes
+    ROW_SIZE,  # Total elements per batch item (D0 * D1)
+    BLOCK_SIZE: tl.constexpr,
+):
+    """Kernel to update values in a 3D states tensor using a mask."""
+    pid_batch = tl.program_id(0).to(tl.int64)
+    pid_row_chunk = tl.program_id(1).to(tl.int64)
+
+    target_idx = tl.load(IDX_PTR + pid_batch)
+    if target_idx == -1:
+        return
+
+    # Linear index within the "row" (flattened 3D volume)
+    row_start_offset = pid_row_chunk * BLOCK_SIZE
+    flat_offsets = row_start_offset + tl.arange(0, BLOCK_SIZE)
+    mask = flat_offsets < ROW_SIZE
+
+    # Reconstruct 3D coordinates from linear index
+    # Given shape (batch, D0, D1)
+    # idx_d1 = flat_idx % D1
+    # idx_d0 = flat_idx // D1
+    idx_d1 = flat_offsets % SIZE_D1.to(tl.int64)
+    idx_d0 = flat_offsets // SIZE_D1.to(tl.int64)
+
+    # Calculate pointers using specific strides
+    dst_offset = (
+        (target_idx.to(tl.int64) * stride_state_b.to(tl.int64))
+        + (idx_d0 * stride_state_d0)
+        + (idx_d1 * stride_state_d1)
+    )
+
+    src_offset = (
+        (pid_batch * stride_new_b.to(tl.int64))
+        + (idx_d0 * stride_new_d0)
+        + (idx_d1 * stride_new_d1)
+    )
+
+    dst_ptr = STATES_PTR + dst_offset
+    src_ptr = NEW_STATES_PTR + src_offset
+
+    val = tl.load(src_ptr, mask=mask)
+    tl.store(dst_ptr, val, mask=mask)
+
+
+@triton.jit
+def _tensor_masked_update_kernel_4d(
+    STATES_PTR,
+    IDX_PTR,
+    NEW_STATES_PTR,
+    stride_state_b,
+    stride_state_d0,
+    stride_state_d1,
+    stride_state_d2,
+    stride_new_b,
+    stride_new_d0,
+    stride_new_d1,
+    stride_new_d2,
+    SIZE_D0,
+    SIZE_D1,
+    SIZE_D2,  # Dimensions (C, H, W)
+    ROW_SIZE,  # Total elements (C * H * W)
+    BLOCK_SIZE: tl.constexpr,
+):
+    """Kernel to update values in a 4D states tensor using a mask."""
+    pid_batch = tl.program_id(0).to(tl.int64)
+    pid_row_chunk = tl.program_id(1).to(tl.int64)
+
+    target_idx = tl.load(IDX_PTR + pid_batch)
+    if target_idx == -1:
+        return
+
+    # Linear index
+    row_start_offset = pid_row_chunk * BLOCK_SIZE
+    flat_offsets = row_start_offset + tl.arange(0, BLOCK_SIZE)
+    mask = flat_offsets < ROW_SIZE
+
+    # Reconstruct 4D coordinates from linear index
+    # Given shape (batch, D0, D1, D2)
+    # idx_d2 = flat % D2
+    # temp   = flat // D2
+    # idx_d1 = temp % D1
+    # idx_d0 = temp // D1
+
+    idx_d2 = flat_offsets % SIZE_D2.to(tl.int64)
+    temp = flat_offsets // SIZE_D2.to(tl.int64)
+    idx_d1 = temp % SIZE_D1.to(tl.int64)
+    idx_d0 = temp // SIZE_D1.to(tl.int64)
+
+    # Calculate pointers using specific strides
+    dst_offset = (
+        (target_idx.to(tl.int64) * stride_state_b.to(tl.int64))
+        + (idx_d0 * stride_state_d0)
+        + (idx_d1 * stride_state_d1)
+        + (idx_d2 * stride_state_d2)
+    )
+
+    src_offset = (
+        (pid_batch * stride_new_b.to(tl.int64))
+        + (idx_d0 * stride_new_d0)
+        + (idx_d1 * stride_new_d1)
+        + (idx_d2 * stride_new_d2)
+    )
+
+    dst_ptr = STATES_PTR + dst_offset
+    src_ptr = NEW_STATES_PTR + src_offset
+
+    val = tl.load(src_ptr, mask=mask)
+    tl.store(dst_ptr, val, mask=mask)
+
+
+def _compute_row_size(tensor):
+    if tensor.ndim == 1:
+        return 1
+
+    row_size = 1
+    for dim in tensor.shape[1:]:
+        row_size *= dim
+    return row_size
+
+
+def tensor_get_slice_after(input_tensor, output_tensor, pos_on_device, check_bounds: bool = False):
+    """
+    Copy from input_tensor[pos_on_device:] to output_tensor[:copy_size].
+    """
+
+    assert (
+        input_tensor.device == output_tensor.device
+    ), "Input and output tensors must be on the same device"
+    assert (
+        input_tensor.dtype == output_tensor.dtype
+    ), "Input and output tensors must have the same dtype"
+    assert (
+        input_tensor.is_contiguous() and output_tensor.is_contiguous()
+    ), "Input and output tensors must be contiguous"
+
+    if check_bounds:
+        assert (
+            input_tensor.ndim == output_tensor.ndim
+        ), "Input and output tensors must have the same number of dimensions"
+
+        for i in range(1, input_tensor.ndim):
+            assert (
+                input_tensor.shape[i] == output_tensor.shape[i]
+            ), f"Dimension {i} must match between input and output tensors"
+
+        pos_on_device_val = pos_on_device[0].item()
+        assert (
+            0 <= pos_on_device_val <= input_tensor.shape[0]
+        ), "pos_on_device must be between 0 and input_tensor.shape[0]"
+
+        copy_size = input_tensor.shape[0] - pos_on_device_val
+        assert (
+            copy_size <= output_tensor.shape[0]
+        ), f"Copy size ({copy_size}) exceeds output_tensor batch size ({output_tensor.shape[0]})"
+
+    input_batch_size = input_tensor.shape[0]
+    output_batch_size = output_tensor.shape[0]
+
+    row_size = _compute_row_size(input_tensor)
+    block_size = triton.next_power_of_2(row_size)
+
+    grid = (input_batch_size,) if input_batch_size > 0 else (1,)
+
+    if input_batch_size > 0:
+        _tensor_get_slice_after_kernel[grid](
+            input_tensor,
+            output_tensor,
+            POS_ON_DEVICE=pos_on_device,
+            INPUT_BATCH_SIZE=input_batch_size,
+            OUTPUT_BATCH_SIZE=output_batch_size,
+            ROW_SIZE=row_size,
+            BLOCK_SIZE=block_size,
+        )
+
+
+def tensor_merge(
+    tensor_a: torch.Tensor,
+    tensor_b: torch.Tensor,
+    pos_on_device: torch.Tensor,
+    output_tensor: Optional[torch.Tensor] = None,
+    check_bounds: bool = False,
+):
+    """
+    Merge tensor_a and tensor_b.
+
+    If output_tensor is None, the operation is performed in-place on tensor_a.
+    """
+
+    is_inplace = False
+    if output_tensor is None:
+        output_tensor = tensor_a
+        is_inplace = True
+
+    assert (
+        tensor_a.device == tensor_b.device == output_tensor.device
+    ), "All tensors must be on the same device"
+    assert (
+        tensor_a.dtype == tensor_b.dtype == output_tensor.dtype
+    ), "All tensors must have the same dtype"
+    assert (
+        tensor_a.is_contiguous() and tensor_b.is_contiguous() and output_tensor.is_contiguous()
+    ), "All tensors must be contiguous"
+
+    if check_bounds:
+        assert (
+            tensor_a.ndim == tensor_b.ndim == output_tensor.ndim
+        ), "All tensors must have the same number of dimensions"
+
+        for i in range(1, tensor_a.ndim):
+            assert (
+                tensor_a.shape[i] == tensor_b.shape[i] == output_tensor.shape[i]
+            ), f"Dimension {i} must match across all tensors"
+
+        assert (
+            output_tensor.shape[0] >= tensor_a.shape[0]
+        ), "output_tensor batch size must be >= tensor_a batch size"
+
+        pos_on_device_val = pos_on_device[0].item()
+        assert (
+            0 <= pos_on_device_val <= tensor_a.shape[0]
+        ), "pos_on_device must be between 0 and tensor_a batch size"
+
+    tensor_b_batch_size = tensor_b.shape[0]
+    output_batch_size = output_tensor.shape[0]
+
+    row_size = _compute_row_size(tensor_a)
+    block_size = triton.next_power_of_2(row_size)
+
+    grid = (output_batch_size,)
+
+    _tensor_merge_kernel[grid](
+        tensor_a,
+        tensor_b,
+        output_tensor,
+        POS_ON_DEVICE=pos_on_device,
+        TENSOR_B_BATCH_SIZE=tensor_b_batch_size,
+        ROW_SIZE=row_size,
+        BLOCK_SIZE=block_size,
+        OUTPUT_BATCH_SIZE=output_batch_size,
+        IS_INPLACE=is_inplace,
+    )
+
+
+def tensor_masked_update(states: torch.Tensor, idx: torch.Tensor, new_states: torch.Tensor):
+    """
+    Update `states` to `new_states` at `idx`, but ignore any -1 values in `idx`.
+    Works for 2D, 3D, or 4D tensors.
+
+    Args:
+        states: (N, ...) - Destination tensor (2D, 3D, or 4D)
+        idx: (B,) - Indices to update. -1 means skip.
+        new_states: (B, ...) - Source tensor. Must match states shape[1:]
+    """
+    assert states.is_cuda and idx.is_cuda and new_states.is_cuda
+    assert idx.ndim == 1
+    assert states.shape[1:] == new_states.shape[1:], "State dimensions must match"
+
+    ndim = states.ndim
+    assert ndim in [2, 3, 4], "Only 2D, 3D, and 4D tensors are supported"
+
+    n_updates = idx.shape[0]
+
+    row_size = 1
+    for dim in states.shape[1:]:
+        row_size *= dim
+
+    BLOCK_SIZE = 1024
+    grid = lambda meta: (n_updates, triton.cdiv(row_size, meta["BLOCK_SIZE"]))
+
+    if ndim == 2:
+        _tensor_masked_update_kernel_2d[grid](
+            STATES_PTR=states,
+            IDX_PTR=idx,
+            NEW_STATES_PTR=new_states,
+            stride_state_b=states.stride(0),
+            stride_state_d0=states.stride(1),
+            stride_new_b=new_states.stride(0),
+            stride_new_d0=new_states.stride(1),
+            ROW_SIZE=row_size,
+            BLOCK_SIZE=BLOCK_SIZE,
+        )
+
+    elif ndim == 3:
+        # Shapes: (N, D0, D1)
+        _tensor_masked_update_kernel_3d[grid](
+            STATES_PTR=states,
+            IDX_PTR=idx,
+            NEW_STATES_PTR=new_states,
+            # Strides
+            stride_state_b=states.stride(0),
+            stride_state_d0=states.stride(1),
+            stride_state_d1=states.stride(2),
+            stride_new_b=new_states.stride(0),
+            stride_new_d0=new_states.stride(1),
+            stride_new_d1=new_states.stride(2),
+            # Dims
+            SIZE_D0=states.shape[1],
+            SIZE_D1=states.shape[2],
+            ROW_SIZE=row_size,
+            BLOCK_SIZE=BLOCK_SIZE,
+        )
+
+    elif ndim == 4:
+        # Shapes: (N, D0, D1, D2)
+        _tensor_masked_update_kernel_4d[grid](
+            STATES_PTR=states,
+            IDX_PTR=idx,
+            NEW_STATES_PTR=new_states,
+            # Strides
+            stride_state_b=states.stride(0),
+            stride_state_d0=states.stride(1),
+            stride_state_d1=states.stride(2),
+            stride_state_d2=states.stride(3),
+            stride_new_b=new_states.stride(0),
+            stride_new_d0=new_states.stride(1),
+            stride_new_d1=new_states.stride(2),
+            stride_new_d2=new_states.stride(3),
+            # Dims
+            SIZE_D0=states.shape[1],
+            SIZE_D1=states.shape[2],
+            SIZE_D2=states.shape[3],
+            ROW_SIZE=row_size,
+            BLOCK_SIZE=BLOCK_SIZE,
+        )
diff --git a/megatron/core/inference/contexts/dynamic_block_allocator.py b/megatron/core/inference/contexts/dynamic_block_allocator.py
index b8b473097f4..8207a17550c 100644
--- a/megatron/core/inference/contexts/dynamic_block_allocator.py
+++ b/megatron/core/inference/contexts/dynamic_block_allocator.py
@@ -16,21 +16,20 @@ class BlockAllocator:
 
     Args:
         context (DynamicInferenceContext): Dynamic inference context.
-        active_count (int): Total number of active blocks available in the buffer.
-            The full buffer size is 2*active_count, to accommodate an equal-size
-            space for paused requests that live on the CPU.
+        total_count (int): Total number of blocks in the buffer.
+        paused_count (int): Number of paused blocks in the buffer. Must be less
+            than `total_count`.
     """
 
-    def __init__(self, context: "DynamicInferenceContext", total_count: int):
+    def __init__(self, context: "DynamicInferenceContext", total_count: int, paused_count: int):
 
         self.context = context
 
-        active_count = (total_count - 1) // 2  # -1 for dummy_block_idx (see below)
-        active_count = max(1, active_count)  # need at least one block
-        self.total_count = 2 * active_count + 1  # +1 for dummy_block_idx
-        self.total_avail = self.total_count - 1  # -1 for dummy_block_idx
-        self.active_count = active_count
-        self.paused_count = self.total_count - self.active_count - 1  # -1 for dummy_block_idx
+        self.total_count = total_count
+        self.total_avail = total_count - 1  # -1 for dummy_block_idx (see below)
+        self.paused_count = paused_count
+        self.active_count = total_count - paused_count - 1  # -1 for dummy_block_idx
+        assert self.active_count >= 1  # ensures paused_count < total_count - 1
         self.dummy_block_idx = self.total_count - 1
 
         # Initialize block pool as a "stack" data structure
@@ -40,10 +39,15 @@ def __init__(self, context: "DynamicInferenceContext", total_count: int):
 
     def __str__(self):
         return (
-            f"total avail {self.total_avail} / {self.total_count - 1}"
-            f"; active {self.active_count}"
+            f"using: total {self.get_total_used()}/{self.total_count - 1}"
+            f"; active {self.get_active_used()}/{self.active_count}"
+            f"; paused {self.get_paused_used()}/{self.paused_count}"
         )
 
+    def get_total_used(self):
+        """Compute number of total blocks used."""
+        return self.total_count - self.total_avail - 1
+
     def get_active_used(self):
         """Compute number of active blocks used."""
         return (
@@ -77,7 +81,7 @@ def is_memory_available(self, num_blocks: int) -> bool:
         Return:
             (bool) Is memory available?
         """
-        return self.get_active_avail() >= num_blocks
+        return self.total_avail >= num_blocks
 
     def allocate_memory_blocks(self, num_blocks: int) -> Optional[Tensor]:
         """Allocate memory blocks if available, else return None.
diff --git a/megatron/core/inference/contexts/dynamic_context.py b/megatron/core/inference/contexts/dynamic_context.py
index 6e70d71fe26..b4e50ff6c8c 100644
--- a/megatron/core/inference/contexts/dynamic_context.py
+++ b/megatron/core/inference/contexts/dynamic_context.py
@@ -28,10 +28,11 @@
 from megatron.core.inference.utils import tensor_swap
 from megatron.core.models.common.embeddings.rope_utils import apply_rotary_pos_emb
 from megatron.core.package_info import __version__ as mcore_version
+from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.ssm.mamba_hybrid_layer_allocation import get_layer_maps_from_layer_type_list
 from megatron.core.transformer import TransformerConfig
 from megatron.core.utils import divide as core_divide
-from megatron.core.utils import internal_api
+from megatron.core.utils import get_attr_wrapped_model, get_pg_size, internal_api
 
 from .attention_context.mamba_metadata import MambaInferenceStateConfig, MambaMetadata
 from .attention_context.mha_metadata import GraphedMHAMetadata, NonGraphedMHAMetadata
@@ -116,7 +117,7 @@ class BlockOverflowError(ContextOverflowError):
 
 class ActiveRequestCountOverflowError(ContextOverflowError):
     '''Used when `initialize_attention_state()` is called with
-    `num_warmup_requests > max_active_requests.'''
+    `num_warmup_requests > max_requests.'''
 
     def __init__(self, max_request_count, active_request_count):
         assert active_request_count > max_request_count
@@ -174,7 +175,7 @@ def deserialize(cls, obj: dict) -> ContextOverflowError:
             "ActiveRequestCountOverflowError": ActiveRequestCountOverflowError,
         }[obj["type"]]
         error = ContextOverflowError(**{k: v for k, v in obj.items() if k != "type"})
-        error.__class__ = error_cls  # todo (@lmcafe): better/safer alternative?
+        error.__class__ = error_cls  # todo (@lmcafee): better/safer alternative?
         return error
 
 
@@ -199,9 +200,9 @@ class DynamicInferenceContext(BaseInferenceContext):
     at any step. The only constraint is the maximum number of requests or tokens
     that the context is defined to support. For the block-level KV cache, a memory
     buffer is allocated up front (size `buffer_size_gb` if `unified_memory_level`
-    == 0, or `2 * buffer_size_gb` if `unified_memory_level` == 1), that is
-    divided into blocks and dynamically assigned to requests. At any given step,
-    any unassigned blocks equate to unused space.
+    == 0, or `buffer_size_gb + paused_buffer_size_gb` if `unified_memory_level` ==
+    1), that is divided into blocks and dynamically assigned to requests. At any
+    given step, any unassigned blocks equate to unused space.
 
     Args:
         params_dtype (torch.dtype): Dtype used for KV cache.
@@ -212,9 +213,14 @@ class DynamicInferenceContext(BaseInferenceContext):
             that will occur.
         buffer_size_gb (float): Buffer size reserved on the GPU for the KV cache.
             if `unified_memory_level` >= 1, then CPU memory is additionally
-            utilized, resulting in a total buffer size of `2 * buffer_size_gb`.
-            Regardless of total buffer size, the KV cache is conceptually divided
-            into 50% active requests and 50% paused requests.
+            utilized, resulting in a total buffer size of `buffer_size_gb +
+            paused_buffer_size_gb`.
+        paused_buffer_size_gb (float | None): Portion of buffer reserved for
+            paused requests. Active requests are paused when there are not enough
+            active blocks available to continue generating a request. The total
+            buffer size (active + paused) depends on `unified_memory_level` (uvm):
+            - uvm 0: buffer_size_gb (paused buffer is inclusive)
+            - uvm 1: buffer_size_gb + paused_buffer_size_gb
         max_requests (int): Max number of active requests to use for
             decode-only forward passes. This value is primarily limited by the
             combination of `buffer_size_gb` and `max_sequence_length`.
@@ -224,7 +230,7 @@ class DynamicInferenceContext(BaseInferenceContext):
         block_size_tokens (int): Size of KV cache block size.
         tensor_model_parallel_size (Optional[int]): Tensor model parallel size.
         num_cuda_graphs (Optional[int]): Maximum number of cuda graphs to capture,
-            where the cuda graph batch sizes range from 1 to `max_active_requests`
+            where the cuda graph batch sizes range from 1 to `max_requests`
             (as computed below). Due to rounding, the actual number of cuda graphs
             may not equal this argument.
         materialize_only_last_token_logits (Optional[bool]): Whether to only
@@ -259,10 +265,13 @@ def __init__(
         num_attention_heads: int,
         max_sequence_length: int,
         buffer_size_gb: float,
+        paused_buffer_size_gb: float | None = None,
         max_requests: int = None,
         max_tokens: int = DEFAULT_MAX_TOKENS,
         block_size_tokens: int = 256,
         tensor_model_parallel_size: Optional[int] = None,
+        pipeline_model_parallel_size: Optional[int] = None,
+        pg_collection: Optional[ProcessGroupCollection] = None,
         cache_mla_latent: bool = False,
         kv_lora_rank: Optional[int] = None,
         qk_pos_emb_head_dim: Optional[int] = None,
@@ -271,11 +280,12 @@ def __init__(
         mamba_inference_state_config: Optional[MambaInferenceStateConfig] = None,
         use_cuda_graphs_for_non_decode_steps: bool = True,
         use_flashinfer_fused_rope: bool = False,
-        unified_memory_level: Optional[int] = 1,
+        unified_memory_level: Optional[int] = 0,
         cuda_graph_max_tokens: Optional[int] = None,
         cuda_graph_mixed_prefill_count: Optional[int] = 16,
         metrics_writer: Optional['WandbModule'] = None,
         request_metadata_types: Optional[List[Tuple[str, torch.dtype, bool]]] = None,
+        persist_cuda_graphs: Optional[bool] = False,
     ):
         super().__init__(materialize_only_last_token_logits=materialize_only_last_token_logits)
 
@@ -290,7 +300,7 @@ def __init__(
             warnings.warn(
                 "`cuda_graph_max_tokens` is deprecated and will be removed in a future release. "
                 "The context now automatically sets the max tokens for cuda graphs based on "
-                "`max_active_requests`.",
+                "`max_requests`.",
                 DeprecationWarning,
             )
 
@@ -299,12 +309,44 @@ def __init__(
         # Per partition num heads and hidden size.
         projection_size = kv_channels * num_attention_heads
         if tensor_model_parallel_size is None:
-            tp_size = parallel_state.get_tensor_model_parallel_world_size()
+            tp_size = (
+                get_pg_size(pg_collection.tp)
+                if pg_collection is not None
+                else parallel_state.get_tensor_model_parallel_world_size()
+            )
         else:
             tp_size = tensor_model_parallel_size
         self.hidden_size_per_attention_head = core_divide(projection_size, num_attention_heads)
         self.num_attention_heads_per_partition = core_divide(num_attention_heads, tp_size)
 
+        if pipeline_model_parallel_size is None:
+            pp_size = (
+                get_pg_size(pg_collection.pp)
+                if pg_collection is not None
+                else parallel_state.get_pipeline_model_parallel_world_size()
+            )
+        else:
+            pp_size = pipeline_model_parallel_size
+
+        # Cache the PP group we should use for PP collectives inside the context.
+        # If the model provides a pg_collection with a pp group, prefer it.
+        # Otherwise:
+        # - for PP=1 we don't need a PP group at all
+        # - for PP>1 we require Megatron parallel_state to be initialized
+        if pg_collection is not None and get_pg_size(pg_collection.pp) > 1:
+            self.pipeline_parallel_group = pg_collection.pp
+        elif pp_size > 1:
+            self.pipeline_parallel_group = parallel_state.get_pipeline_model_parallel_group()
+        else:
+            self.pipeline_parallel_group = None
+
+        if pg_collection is not None:
+            self.expert_model_parallel_group = pg_collection.ep
+        elif parallel_state.get_expert_model_parallel_world_size() > 1:
+            self.expert_model_parallel_group = parallel_state.get_expert_model_parallel_group()
+        else:
+            self.expert_model_parallel_group = None
+
         # Mamba states.
         self.is_hybrid_model = mamba_inference_state_config is not None
         if self.is_hybrid_model:
@@ -316,9 +358,6 @@ def __init__(
             assert (
                 mamba_ssm_states_shape is not None
             ), "`mamba_ssm_states_shape` must be specified for hybrid models"
-            assert not (
-                num_cuda_graphs is not None and use_cuda_graphs_for_non_decode_steps
-            ), "Non-decode CUDA graphs not yet supported for hybrid models"
 
             # For hybrid models, the layer map converts the global layer index to the
             # corresponding attention layer index or Mamba layer index depending on the
@@ -375,6 +414,7 @@ def __init__(
 
         # Unified memory.
         self.unified_memory_level = unified_memory_level
+        self.persist_cuda_graphs = persist_cuda_graphs
         if unified_memory_level > 0:
             try:
                 self.unified_memory_mempool = create_unified_mempool()
@@ -387,36 +427,38 @@ def __init__(
 
         # Initialize block allocator.
         buffer_size_bytes = int(buffer_size_gb * 1024**3)
-        block_count_total = buffer_size_bytes // (
+        paused_buffer_size_bytes = (
+            0 if paused_buffer_size_gb is None else int(paused_buffer_size_gb * 1024**3)
+        )
+        # TODO: Add parameter to control fraction of memory assigned to KV cache
+        # versus Mamba state.
+        block_count = buffer_size_bytes // (self.block_size_bytes + mamba_states_memory_per_request)
+        block_count = max(2, block_count)  # need >= 1 active block + 1 dummy block
+        paused_block_count = paused_buffer_size_bytes // (
             self.block_size_bytes + mamba_states_memory_per_request
         )
+
+        # If using pipeline parallelism synchronize the total block count in case the
+        # pipeline stages have different layer allocations. Non-uniform block counts
+        # can lead to some ranks pausing requests earlier than other ranks
+        # (i.e., divergence in the scheduling behavior).
+        if pp_size > 1:
+            block_count_tensor = torch.tensor(
+                block_count, dtype=torch.int32, device=torch.cuda.current_device()
+            )
+            torch.distributed.all_reduce(
+                block_count_tensor,
+                op=torch.distributed.ReduceOp.MIN,
+                group=self.pipeline_parallel_group,
+            )
+            block_count = block_count_tensor.item()
+
         self.block_allocator = BlockAllocator(
             context=self,
             total_count=(
-                block_count_total if self.unified_memory_level == 0 else 2 * block_count_total
+                block_count if self.unified_memory_level == 0 else block_count + paused_block_count
             ),
-        )
-
-        # Set max_total_requests, max_active_requests, max_tokens.
-        self.max_total_requests = self.block_allocator.total_count - 1  # -1 for dummy block
-        max_active_requests = self.block_allocator.active_count // tp_size * tp_size
-        self.max_active_requests = (
-            max_active_requests // self.REQUEST_ROUNDER * self.REQUEST_ROUNDER
-        )
-        self.max_tokens = max_tokens or self.DEFAULT_MAX_TOKENS
-
-        # User-specified max_requests.
-        if max_requests is not None:
-            assert max_requests <= self.max_active_requests, (
-                f"User-specified `max_requests` {max_requests} > "
-                f"`max_active_requests` {self.max_active_requests}"
-            )
-            self.max_active_requests = max_requests
-
-        assert self.max_tokens >= self.max_active_requests, (
-            f"max_tokens ({self.max_tokens}) must be >= "
-            f"max_active_requests ({self.max_active_requests}), "
-            "to have consistency between cuda graph sizes and the block table size."
+            paused_count=paused_block_count,
         )
 
         # Track request metadata.
@@ -445,6 +487,24 @@ def __init__(
         # Block ids.
         self.max_kv_block_count = math.ceil(self.max_sequence_length / self.block_size_tokens)
 
+        # Set max_requests, max_tokens.
+        if max_requests is None:
+            # Maximize compute utilization by defaulting to 1 block per request.
+            self.max_requests = self.block_allocator.total_count - 1  # -1 for dummy block
+            self.max_requests = self.max_requests // tp_size * tp_size
+            self.max_requests = self.max_requests // self.REQUEST_ROUNDER * self.REQUEST_ROUNDER
+        else:
+            # User can control request overflow via max_requests.
+            self.max_requests = max_requests
+
+        self.max_tokens = max_tokens or self.DEFAULT_MAX_TOKENS
+
+        assert self.max_tokens >= self.max_requests, (
+            f"max_tokens ({self.max_tokens}) must be >= "
+            f"max_requests ({self.max_requests}), "
+            "to have consistency between cuda graph sizes and the block table size."
+        )
+
         # Attention metadata initialization (tensors are now handled by MHAMetadata classes)
 
         self.num_prefill_requests = 0
@@ -455,7 +515,7 @@ def __init__(
         self.graph_attn_metadata["mha_metadata"] = GraphedMHAMetadata(
             block_count_total=self.block_allocator.total_count,
             max_kv_block_count=self.max_kv_block_count,
-            max_requests=self.max_total_requests,
+            max_requests=self.max_requests,
             block_size_tokens=self.block_size_tokens,
             max_seqlen=self.max_sequence_length,
         )
@@ -463,20 +523,19 @@ def __init__(
         self.non_graph_attn_metadata["mha_metadata"] = NonGraphedMHAMetadata(
             block_count_total=self.block_allocator.total_count,
             max_kv_block_count=self.max_kv_block_count,
-            max_requests=self.max_total_requests,
+            max_requests=self.max_requests,
             block_size_tokens=self.block_size_tokens,
             max_seqlen=self.max_sequence_length,
         )
 
         # CUDA graph config list
-        is_expert_parallel = parallel_state.get_expert_model_parallel_world_size() > 1
         self.cuda_graph_batch_dimensions_list, self.cuda_graph_token_counts = (
             CUDAGraphBatchDimensionBuilder.generate_cuda_graph_batch_dimensions_list(
                 tp_size=tp_size,
                 num_cuda_graphs=num_cuda_graphs,
-                cuda_graph_max_tokens=self.max_active_requests,
+                cuda_graph_max_tokens=self.max_requests,
                 cuda_graph_mixed_prefill_count=cuda_graph_mixed_prefill_count,
-                max_requests=self.max_active_requests,
+                max_requests=self.max_requests,
                 max_tokens=self.max_tokens,
                 max_sequence_length=self.max_sequence_length,
                 use_cuda_graphs_for_non_decode_steps=use_cuda_graphs_for_non_decode_steps,
@@ -487,6 +546,7 @@ def __init__(
         self.use_cuda_graphs_for_non_decode_steps = use_cuda_graphs_for_non_decode_steps
         # Deal with chunked prefill
         self.chunked_prefill_request_id = -1
+        self.has_explicit_chunked_prefill_req = False
 
         # FlashInfer.
         if use_flashinfer_fused_rope is True:
@@ -541,7 +601,7 @@ def allocate_all_tensors(self, *, is_init: bool) -> None:
 
         # Per-request state.
         self.request_ids = torch.full(
-            (self.max_total_requests,), -1, dtype=torch.int32, device=torch.cuda.current_device()
+            (self.max_requests,), -1, dtype=torch.int32, device=torch.cuda.current_device()
         )
         # request_query_lengths is the input prompt tokens length during prefill phase (1st step) and then 1 for the decode phase (i.e During generation)
         self.request_query_lengths = torch.empty_like(self.request_ids)
@@ -554,7 +614,7 @@ def allocate_all_tensors(self, *, is_init: bool) -> None:
         # request_last_kv_block_offset represents number of tokens in the last kv block
         self.request_last_kv_block_offset = torch.empty_like(self.request_ids)
         self.request_to_kv_block_ids = torch.full(
-            (self.max_total_requests, self.max_kv_block_count),
+            (self.max_requests, self.max_kv_block_count),
             -1,
             dtype=torch.int,
             device=torch.cuda.current_device(),
@@ -563,7 +623,7 @@ def allocate_all_tensors(self, *, is_init: bool) -> None:
         # Track request metadata.
         self.request_metadata = {
             label: torch.empty(
-                (self.max_total_requests,), dtype=dtype, device=torch.cuda.current_device()
+                (self.max_requests,), dtype=dtype, device=torch.cuda.current_device()
             )
             for label, dtype, _ in self.request_metadata_types
         }
@@ -615,14 +675,16 @@ def allocate_mamba_states():
             """Allocate Mamba states. This function is called below within
             `with ctx_manager:`."""
             if self.is_hybrid_model:
-                self.mamba_metadata = MambaMetadata(max_requests=self.max_total_requests)
+                self.mamba_metadata = MambaMetadata(
+                    max_requests=self.max_requests, max_tokens=self.max_tokens
+                )
                 self.mamba_conv_states = torch.empty(
-                    (self.num_mamba_layers, self.max_total_requests) + self.mamba_conv_states_shape,
+                    (self.num_mamba_layers, self.max_requests) + self.mamba_conv_states_shape,
                     dtype=self.params_dtype,
                     device=torch.cuda.current_device(),
                 )
                 self.mamba_ssm_states = torch.empty(
-                    (self.num_mamba_layers, self.max_total_requests) + self.mamba_ssm_states_shape,
+                    (self.num_mamba_layers, self.max_requests) + self.mamba_ssm_states_shape,
                     dtype=self.params_dtype,
                     device=torch.cuda.current_device(),
                 )
@@ -700,28 +762,51 @@ def from_config(
         buffer_size_gb: float = 40,
         num_cuda_graphs: int = None,
         mamba_inference_state_config: Optional[MambaInferenceStateConfig] = None,
+        unified_memory_level: int = 0,
     ):
         """
         Instantiate a `DynamicInferenceContext` from a `TransformerConfig` and an `InferenceWrapperConfig`.
         """
         # TODO: Add other necessary configs from inference_config
 
-        model_config = model.config
-        max_sequence_length = (
-            inference_config.inference_max_seq_length or model_config.max_sequence_length
-        )
+        # Max sequence length.
+        position_embedding_type = get_attr_wrapped_model(model, "position_embedding_type")
+        model_max_seq_len = get_attr_wrapped_model(model, "max_sequence_length")
+        inf_max_seq_len = inference_config.inference_max_seq_length
+
+        if position_embedding_type == "learned_absolute":
+            # When using absolute position embeddings, it is critical that the
+            # context's `max_sequence_length` is less than or equal to the model's
+            # `max_sequence_length`. Otherwise, the context's `position_ids` will
+            # contain ids greater than the dimension of the position embedding
+            # tensor, which will result in an index error.
+            if inf_max_seq_len:
+                max_sequence_length = min(model_max_seq_len, inf_max_seq_len)
+            else:
+                max_sequence_length = model_max_seq_len
+            assert max_batch_size <= model_max_seq_len
+        else:
+            max_sequence_length = (
+                inference_config.inference_max_seq_length or model_config.max_sequence_length
+            )
         max_sequence_length = max(max_sequence_length, max_batch_size)
+
+        # Context.
+        model_config = model.config
         return cls(
             params_dtype=inference_config.params_dtype,
             num_layers=model_config.num_layers // model_config.pipeline_model_parallel_size,
             kv_channels=model_config.kv_channels,
             num_attention_heads=model_config.num_query_groups,
-            max_sequence_length=inference_config.inference_max_seq_length,
+            tensor_model_parallel_size=model_config.tensor_model_parallel_size,
+            pipeline_model_parallel_size=model_config.pipeline_model_parallel_size,
+            max_sequence_length=max_sequence_length,
             buffer_size_gb=buffer_size_gb,
             materialize_only_last_token_logits=False,
             num_cuda_graphs=num_cuda_graphs,
             use_flashinfer_fused_rope=None,
             mamba_inference_state_config=mamba_inference_state_config,
+            unified_memory_level=unified_memory_level,
         )
 
     @classmethod
@@ -1001,7 +1086,7 @@ def reset_attention_state(self) -> None:
         self.active_attn_metadata = None
 
         if self.is_hybrid_model:
-            self.mamba_metadata.reset_cudagraph_mapping()
+            self.mamba_metadata.reset_varlen_metadata()
 
     def reset_mamba_state(self) -> None:
         """Reset state used within Mamba layers."""
@@ -1017,7 +1102,7 @@ def add_dummy_requests_parallel(
             return
 
         num_new_requests = len(requests)
-        if self.total_request_count + num_new_requests > self.max_active_requests:
+        if self.total_request_count + num_new_requests > self.max_requests:
             raise RequestOverflowError(requests[-1].request_id)
 
         lengths: List[int] = []
@@ -1226,12 +1311,15 @@ def initialize_attention_state(
             token_count=self.active_token_count,
             prefill_req_count=self.num_prefill_requests,
             decode_req_count=self.num_decode_requests,
+            has_explicit_chunked_prefill_req=self.has_explicit_chunked_prefill_req,
         )
         self.batch_dimensions = batch_dimensions
         best_graph = CUDAGraphBatchDimensionBuilder.match_graph_config(
             batch_dimensions,
             self.cuda_graph_batch_dimensions_list,
+            strict=self.is_hybrid_model,
             decode_only_cuda_graphs=(not self.use_cuda_graphs_for_non_decode_steps),
+            ep_group=self.expert_model_parallel_group,
         )
         self._using_cuda_graph_this_step = best_graph is not None
 
@@ -1242,14 +1330,14 @@ def initialize_attention_state(
             if self.is_decode_only():
                 padded_token_count = min(
                     self.max_tokens,
-                    self.max_active_requests,
+                    self.max_requests,
                     self.round_up_tokens(self.active_token_count),
                 )
                 padded_decode_req_count = padded_token_count
                 padded_prefill_req_count = 0
             else:
                 target_padding_req_count = min(
-                    self.max_active_requests,
+                    self.max_requests,
                     self.round_up_requests(self.total_request_count - self.paused_request_count),
                 )
                 padded_decode_req_count = self.num_decode_requests
@@ -1258,9 +1346,11 @@ def initialize_attention_state(
                 token_count=padded_token_count,
                 prefill_req_count=padded_prefill_req_count,
                 decode_req_count=padded_decode_req_count,
+                has_explicit_chunked_prefill_req=self.has_explicit_chunked_prefill_req,
             )
         self.padded_active_token_count = self.padded_batch_dimensions.token_count
         self.padded_active_request_count = self.padded_batch_dimensions.req_count
+        self.padding_slice = slice(self.active_token_count, self.padded_active_token_count)
 
         # Update token position indexes.
         self.token_to_block_idx[self.active_token_count : self.padded_active_token_count] = (
@@ -1287,6 +1377,8 @@ def initialize_attention_state(
 
         attn_dimensions = batch_dimensions
         if self.using_cuda_graph_this_step():
+            assert not self.has_explicit_chunked_prefill_req
+
             # Treat some decode requests as prefill requests to fit the cuda graph batch dimension.
             if batch_dimensions.decode_req_count > self.padded_batch_dimensions.decode_req_count:
                 total_req = batch_dimensions.req_count
@@ -1296,6 +1388,7 @@ def initialize_attention_state(
                     token_count=batch_dimensions.token_count,
                     prefill_req_count=adjusted_prefill_req_count,
                     decode_req_count=adjusted_decode_req_count,
+                    has_explicit_chunked_prefill_req=False,
                 )
 
         self.active_attn_metadata["mha_metadata"].update(
@@ -1306,15 +1399,19 @@ def initialize_attention_state(
             padded_batch_dimensions=self.padded_batch_dimensions,
         )
 
-        # Create Mamba state block table if it's a hybrid model
         if self.is_hybrid_model:
-            active_mamba_indices = self.mamba_metadata.request_to_mamba_state_idx[
-                self.paused_request_count : self.total_request_count
+            active_mamba_indices_view = self.mamba_metadata.request_to_mamba_state_idx[active_slice]
+            token_to_request_idx_view = self.token_to_request_idx[: self.active_token_count]
+            cu_seqlens = self.active_attn_metadata["mha_metadata"].state_data[
+                "cu_query_seq_lengths"
             ]
-            if self.is_decode_only() or self.using_cuda_graph_this_step():
-                self.mamba_metadata.update_cudagraph_mapping(
-                    active_mamba_indices, self.total_request_count - self.paused_request_count
-                )
+            self.mamba_metadata.update(
+                active_mamba_indices_view,
+                token_to_request_idx_view,
+                cu_seqlens,
+                batch_dimensions=attn_dimensions,
+                padded_batch_dimensions=self.padded_batch_dimensions,
+            )
 
     def reset(self) -> None:
         """Reset entire context.
@@ -1370,6 +1467,7 @@ def reset(self) -> None:
 
         # Reset chunked prefill state
         self.chunked_prefill_request_id = -1
+        self.has_explicit_chunked_prefill_req = False
         self.num_prefill_requests = 0
         self._using_cuda_graph_this_step = False
         self.padded_batch_dimensions = InferenceBatchDimensions(
@@ -1430,11 +1528,10 @@ def check_availability(self, req: DynamicInferenceRequest) -> (bool, bool, bool)
         Check if the request can be added to the context.
         """
         request_can_be_added = (
-            self.total_request_count - self.paused_request_count < self.max_active_requests
+            self.total_request_count < self.max_requests and self.paused_request_count == 0
         )
         request_tokens_can_be_added = (
             self.active_token_count + req.remaining_prompt_length <= self.max_tokens
-            and self.paused_request_count == 0
         )
         blocks = math.ceil(
             (req.remaining_prompt_length + req.finished_chunk_token_count) / self.block_size_tokens
@@ -1503,7 +1600,7 @@ def add_request(self, req: DynamicInferenceRequest, chunk_length: Optional[int]
         else:
             current_id = self.total_request_count
 
-        if current_id >= self.max_active_requests:
+        if current_id >= self.max_requests:
             raise RequestOverflowError(req.request_id)
 
         if self.active_token_count + chunk_length > self.max_tokens:
@@ -1631,7 +1728,217 @@ def get_index_of_chunked_prefill_request(self) -> int:
         """
         return torch.where(self.request_ids == self.chunked_prefill_request_id)[0][0]
 
-    # TODO: see if we can compile this function
+    def release_memory_blocks_from_request_indexes(self, request_indexes) -> None:
+        """Release memory blocks used by the given request idxs.
+
+        Args:
+            request_indexes (torch.Tensor): Request indexes. (*Note*, NOT request
+                ids.)
+        """
+        kv_blocks_assigned = self.request_to_kv_block_ids[request_indexes]
+        non_zero_values_in_kv_memory = kv_blocks_assigned[kv_blocks_assigned != -1]
+        self.block_allocator.release_memory_blocks(non_zero_values_in_kv_memory)
+
+        # Reset the KV blocks for finished requests.
+        # Note: do not use fill_() (or add_() and similar inplace ops) here.
+        # The combinition of indexing with a tensor (like finished_idxs) and
+        # fill_()/add_() creates a clone and updates it instead of the original
+        # tensor.
+        self.request_to_kv_block_ids[request_indexes] = -1
+
+        # Free Mamba slots.
+        if self.is_hybrid_model:
+            self.mamba_metadata.free_slots(request_indexes)
+
+    def resume_paused_requests(
+        self,
+        active_request_count: int,
+        newly_paused_request_ids: torch.Tensor,
+        next_tokens: torch.Tensor,
+    ) -> tuple[int, int, torch.Tensor]:
+        """Resume as many paused requests as we have space for in the active buffer.
+
+        Args:
+            active_request_count (int): Number of active requests.
+            newly_paused_request_ids (torch.Tensor): List of newly paused request ids.
+            next_tokens (torch.Tensor): Sampled tokens.
+
+        Returns:
+            (tuple[int, torch.Tensor]) active_request_count, newly_paused_request_ids.
+        """
+
+        # Assign released blocks to paused requests.
+        # todo: @shanmugamr, un-pause requests using FIFO, rather than LIFO.
+        resume_request_count = 0
+        if self.paused_request_count > 0:
+            active_block_count_avail = self.block_allocator.get_active_avail()
+            paused_block_counts = self.request_kv_block_counts[: self.paused_request_count]
+            # Flip counts before cumsum, since paused requests are resumed from
+            # the right-most index, so we must count resumed blocks starting from
+            # the right side.
+            paused_block_counts = paused_block_counts.flip(dims=[0])
+            # Add +1 to all block counts, since any time a paused request is
+            # resumed, it will be starting a new memory block. For background,
+            # pausing happens after a request has generated the final token of a
+            # memory block (i.e., token 256 of that block), which means the very
+            # next token (whenever that request gets unpaused) will be in a new
+            # block. So, when we resume a paused request, we have to account for
+            # the fact that it will need an extra block beyond the ones that it
+            # has already used.
+            paused_block_counts += 1  # +1 for newly added block
+            paused_block_counts_cumsum = paused_block_counts.cumsum(dim=0)
+            resume_request_count = min(
+                torch.nonzero(paused_block_counts_cumsum <= active_block_count_avail).numel(),
+                self.block_allocator.total_avail,
+            )
+
+        self.paused_request_count -= resume_request_count
+        active_request_count += resume_request_count
+
+        # Resume requests by assigning blocks and updating bookkeeping tensors.
+        if resume_request_count > 0:
+            assert torch.all(
+                self.request_last_kv_block_offset[
+                    self.paused_request_count : (self.paused_request_count + resume_request_count)
+                ]
+                == self.block_size_tokens - 1
+            ), "The request_last_kv_block_offset should be 0 for the requests that just got resumed this step."
+
+            assert resume_request_count <= self.block_allocator.total_avail
+            block_ids = self.block_allocator.allocate_memory_blocks(resume_request_count)
+            row_idx = torch.arange(
+                self.paused_request_count,
+                self.paused_request_count + resume_request_count,
+                device=torch.cuda.current_device(),
+            )
+            col_idx = self.request_kv_block_counts[
+                self.paused_request_count : (self.paused_request_count + resume_request_count)
+            ]
+            self.request_to_kv_block_ids[row_idx, col_idx] = block_ids
+            self.request_kv_block_counts[
+                self.paused_request_count : (self.paused_request_count + resume_request_count)
+            ] += 1
+            self.request_last_kv_block_id[
+                self.paused_request_count : (self.paused_request_count + resume_request_count)
+            ] = block_ids
+
+        # Remove resumed requests from newly_paused_request_ids. We do this by
+        # truncating the end of newly_paused_request_ids, which works because we
+        # resume requests in LIFO order. If resume_request_count >
+        # len(newly_paused_request_ids), this means that none of the paused
+        # requests are newly paused during this update.
+        if newly_paused_request_ids is not None and resume_request_count > 0:
+            newly_paused_request_ids = newly_paused_request_ids[:-resume_request_count]
+
+        return active_request_count, newly_paused_request_ids
+
+    def evict_overflow_paused_requests(
+        self, active_request_count: int, next_tokens: torch.Tensor
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        """Evict requests that overflow the paused buffer.
+
+        Args:
+            active_request_count (int): Number of active requests.
+            next_tokens (torch.Tensor): Sampled tokens.
+
+        Returns:
+            (torch.Tensor) Evicted request ids.
+        """
+
+        # Overflow paused block count.
+        overflow_paused_block_count = (
+            self.block_allocator.get_paused_used() - self.block_allocator.paused_count
+        )
+
+        # Nothing to evict?
+        if overflow_paused_block_count <= 0:
+            return None
+
+        # Overflow paused block count.
+        paused_block_counts = self.request_kv_block_counts[: self.paused_request_count]
+        paused_block_counts_cumsum = paused_block_counts.cumsum(dim=0)
+        valid_paused_request_count = torch.nonzero(
+            paused_block_counts_cumsum <= self.block_allocator.paused_count
+        ).numel()
+        overflow_paused_request_count = self.paused_request_count - valid_paused_request_count
+
+        # Nothing to evict? (Similar to checking overflow_paused_block_count
+        # above, but here we allow up to one paused request to overflow into the
+        # active buffer.
+        if overflow_paused_request_count == 0:
+            return None
+
+        # Evict request count. (Flip paused_block_counts because evictions are
+        # counted from the right-most paused requests.
+        paused_block_counts = paused_block_counts[-overflow_paused_request_count:].flip(dims=[0])
+        paused_block_counts_cumsum = paused_block_counts.cumsum(dim=0)
+        remaining_paused_request_counts = torch.arange(
+            overflow_paused_request_count - 1,
+            -1,
+            -1,
+            dtype=paused_block_counts_cumsum.dtype,
+            device=torch.cuda.current_device(),
+        )
+        net_block_counts = paused_block_counts_cumsum - remaining_paused_request_counts
+        evict_request_count = torch.nonzero(net_block_counts >= 0)[0].item() + 1
+
+        # Eviction index range.
+        evict_start_idx = self.paused_request_count - evict_request_count
+        evict_end_idx = self.paused_request_count
+        evict_request_idxs = torch.arange(
+            evict_start_idx, evict_end_idx, device=torch.cuda.current_device()
+        )
+        evict_request_ids = self.request_ids[evict_start_idx:evict_end_idx].clone()
+
+        # Release memory.
+        self.release_memory_blocks_from_request_indexes(evict_request_idxs)
+
+        # Move evicted requests to the right of active requests, while minimizing
+        # movement.
+        if evict_request_count < active_request_count:
+            # Swap all evicted requests with right-most active requests.
+            src_idxs = torch.arange(
+                self.paused_request_count - evict_request_count,
+                self.paused_request_count,
+                device=torch.cuda.current_device(),
+            )
+            dst_idxs = torch.arange(
+                self.total_request_count - evict_request_count,
+                self.total_request_count,
+                device=torch.cuda.current_device(),
+            )
+        else:
+            # Swap all active requests with left-most evicted requests.
+            src_idxs = torch.arange(
+                self.paused_request_count - evict_request_count,
+                self.paused_request_count - evict_request_count + active_request_count,
+                device=torch.cuda.current_device(),
+            )
+            dst_idxs = torch.arange(
+                self.paused_request_count,
+                self.paused_request_count + active_request_count,
+                device=torch.cuda.current_device(),
+            )
+
+        # Swap evicted and active requests.
+        self._swap_book_keeping_tensors(
+            src_idxs=src_idxs, dst_idxs=dst_idxs, next_tokens=next_tokens
+        )
+
+        # Update tracking vars.
+        self.paused_request_count -= evict_request_count
+        self.total_request_count -= evict_request_count
+
+        # Reset unused block ids.
+        evict_slice = slice(
+            self.total_request_count, self.total_request_count + evict_request_count
+        )
+        self.request_to_kv_block_ids[evict_slice] = -1
+        if self.is_hybrid_model:
+            self.mamba_metadata.request_to_mamba_state_idx[evict_slice] = -1
+
+        return evict_request_ids
+
     def update_requests(self, active_requests_mask: Tensor, new_tokens: Tensor) -> Tensor:
         """Update context state after calling engine.step().
 
@@ -1648,7 +1955,7 @@ def update_requests(self, active_requests_mask: Tensor, new_tokens: Tensor) -> T
         between these request groups.
         - 0:paused_request_count -> paused requests
         - paused_request_count:total_request_count -> active requests
-        - total_request_count:max_active_requests -> completed requests are moved here.
+        - total_request_count:max_requests -> completed requests are moved here.
         The reason for maintaining contiguous tensors rather than multiple
         smaller (e.g., per-group or per-request) tensors is for both 1) speed
         (avoid unnecessary tensor allocations), and 2) compatibility with the
@@ -1660,10 +1967,9 @@ def update_requests(self, active_requests_mask: Tensor, new_tokens: Tensor) -> T
         3. Concatenate the paused tokens to the active tokens
         4. For the finished requests we release memory blocks and move them to the right
         5. We identify requests that require a new block and add them to the paused requests (i.e move them left)
-        6. We determine how many requests we can resume and resume them
+        6. Resume paused requests & evict overflowing paused requests.
         7. We make changes to the request book keeping tesnsors and setup the tokens for next iteration
-        8. We resume those requests by assigning blocks and updating bookkeeping tensors
-        9. We make relevant changes to the token bookkeeping tensors
+        8. We make relevant changes to the token bookkeeping tensors
 
         Args:
             active_requests_mask (Tensor): 1D Mask tensor marking active requests.
@@ -1682,6 +1988,7 @@ def update_requests(self, active_requests_mask: Tensor, new_tokens: Tensor) -> T
             active_requests_mask[-1] = (
                 1  # must keep this, next iteration will add a new chunk to it
             )
+        self.has_explicit_chunked_prefill_req = False
 
         active_request_count = (active_requests_mask == 1).sum().item()
         finished_request_count = (active_requests_mask == 0).sum().item()
@@ -1703,12 +2010,7 @@ def update_requests(self, active_requests_mask: Tensor, new_tokens: Tensor) -> T
                     torch.nonzero(active_requests_mask == 0, as_tuple=True)[0]
                     + self.paused_request_count
                 )
-                kv_blocks_assigned = self.request_to_kv_block_ids[finished_idxs]
-                non_zero_values_in_kv_memory = kv_blocks_assigned[kv_blocks_assigned != -1]
-                self.block_allocator.release_memory_blocks(non_zero_values_in_kv_memory)
-
-                if self.is_hybrid_model:
-                    self.mamba_metadata.free_slots(finished_idxs)
+                self.release_memory_blocks_from_request_indexes(finished_idxs)
 
             # Reset request/token counts.
             self.request_to_kv_block_ids.fill_(-1)
@@ -1717,7 +2019,6 @@ def update_requests(self, active_requests_mask: Tensor, new_tokens: Tensor) -> T
 
             # Reset Mamba state.
             self.reset_mamba_state()
-
             return
 
         # 3. Concatenate the paused tokens to the active tokens if present.
@@ -1735,19 +2036,7 @@ def update_requests(self, active_requests_mask: Tensor, new_tokens: Tensor) -> T
                 torch.nonzero(active_requests_mask == 0, as_tuple=True)[0]
                 + self.paused_request_count
             )
-            kv_blocks_assigned = self.request_to_kv_block_ids[finished_idxs]
-            non_zero_values_in_kv_memory = kv_blocks_assigned[kv_blocks_assigned != -1]
-            self.block_allocator.release_memory_blocks(non_zero_values_in_kv_memory)
-
-            # Reset the KV blocks for finished requests.
-            # Note: do not use fill_() (or add_() and similar inplace ops) here.
-            # The combinition of indexing with a tensor (like finished_idxs) and fill_()/add_() creates a clone
-            # and updates it instead of the original tensor.
-            self.request_to_kv_block_ids[finished_idxs] = -1
-
-            if self.is_hybrid_model:
-                # Get the Mamba state indices for finished requests and free them
-                self.mamba_metadata.free_slots(finished_idxs)
+            self.release_memory_blocks_from_request_indexes(finished_idxs)
 
             if active_request_count > 0:
                 finished_idxs_on_left = (
@@ -1788,9 +2077,9 @@ def update_requests(self, active_requests_mask: Tensor, new_tokens: Tensor) -> T
 
             if self.chunked_prefill_request_id != -1:
                 # find the id in request_ids that is the chunked_prefill_request_id. Only one request should be chunked.
-                active_requests_requiring_new_block[self.get_index_of_chunked_prefill_request()] = (
-                    0  # chunked prefill should not be paused
-                )
+                active_requests_requiring_new_block[
+                    self.get_index_of_chunked_prefill_request() - self.paused_request_count
+                ] = 0  # chunked prefill should not be paused
 
             active_requests_requiring_new_block_count = (
                 (active_requests_requiring_new_block == 1).sum().item()
@@ -1839,41 +2128,33 @@ def update_requests(self, active_requests_mask: Tensor, new_tokens: Tensor) -> T
 
         # 6. Now that we have the requests in following order [Paused, Active, Finished]
         # We determine how many requests we can resume and resume them
-        # Assign released blocks to paused requests.
-        # todo: @shanmugamr, un-pause requests using FIFO, rather than LIFO.
-        resume_request_count = 0
-        if self.paused_request_count > 0:
-            active_block_count_avail = self.block_allocator.get_active_avail()
-            paused_block_counts = self.request_kv_block_counts[: self.paused_request_count]
-            paused_block_counts = paused_block_counts.flip(dims=[0])
-            paused_block_counts += 1  # +1 for newly added block
-            paused_block_counts_cumsum = paused_block_counts.cumsum(dim=0)
-            resume_request_count = min(
-                torch.nonzero(paused_block_counts_cumsum <= active_block_count_avail).numel(),
-                self.block_allocator.total_avail,
-            )
 
-        self.paused_request_count -= resume_request_count
-        active_request_count += resume_request_count
+        # 6.a. First, resume temporarily paused requests.
+        active_request_count, newly_paused_request_ids = self.resume_paused_requests(
+            active_request_count, newly_paused_request_ids, next_tokens
+        )
+
+        # 6.b. Evict requests that overflow the paused buffer.
+        evict_request_ids = self.evict_overflow_paused_requests(active_request_count, next_tokens)
+
+        # 6.c. Resume any additional requests.
+        active_request_count, newly_paused_request_ids = self.resume_paused_requests(
+            active_request_count, newly_paused_request_ids, next_tokens
+        )
+
         assert active_request_count > 0, "active_request_count == %d." % active_request_count
 
-        # finally, swap the chunked prefill to the end of the active requests to obey the invariance
+        # 6.d. Swap the chunked prefill request to the end of the active requests
+        # to obey the invariance.
         if self.chunked_prefill_request_id != -1:
             self._swap_book_keeping_tensors(
                 src_idxs=torch.tensor([self.get_index_of_chunked_prefill_request()]),
-                dst_idxs=torch.tensor([active_request_count + self.paused_request_count - 1]),
+                dst_idxs=torch.tensor([self.total_request_count - 1]),
                 next_tokens=next_tokens,
             )
-        # Remove resumed requests from newly_paused_request_ids. We do this by
-        # truncating the end of newly_paused_request_ids, which works because we
-        # resume requests in LIFO order. If resume_request_count >
-        # len(newly_paused_request_ids), this means that none of the paused
-        # requests are newly paused during this update.
-        if newly_paused_request_ids is not None and resume_request_count > 0:
-            newly_paused_request_ids = newly_paused_request_ids[:-resume_request_count]
 
         # 7. We make changes to the request book keeping tesnsors and setup the tokens for next iteration
-        self.total_request_count = active_request_count + self.paused_request_count
+        assert self.total_request_count == active_request_count + self.paused_request_count
 
         # All these active requests are in decode phase, so they need only 1 token per request
         self.active_token_count = active_request_count
@@ -1900,34 +2181,7 @@ def update_requests(self, active_requests_mask: Tensor, new_tokens: Tensor) -> T
             + 1
         ) % self.block_size_tokens
 
-        # 8. We resume those requests by assigning blocks and updating bookkeeping tensors
-        if resume_request_count > 0:
-            assert torch.all(
-                self.request_last_kv_block_offset[
-                    self.paused_request_count : (self.paused_request_count + resume_request_count)
-                ]
-                == 0
-            ), "The request_last_kv_block_offset should be 0 for the requests that just got resumed this step. "
-
-            assert resume_request_count <= self.block_allocator.total_avail
-            block_ids = self.block_allocator.allocate_memory_blocks(resume_request_count)
-            row_idx = torch.arange(
-                self.paused_request_count,
-                self.paused_request_count + resume_request_count,
-                device=torch.cuda.current_device(),
-            )
-            col_idx = self.request_kv_block_counts[
-                self.paused_request_count : (self.paused_request_count + resume_request_count)
-            ]
-            self.request_to_kv_block_ids[row_idx, col_idx] = block_ids
-            self.request_kv_block_counts[
-                self.paused_request_count : (self.paused_request_count + resume_request_count)
-            ] += 1
-            self.request_last_kv_block_id[
-                self.paused_request_count : (self.paused_request_count + resume_request_count)
-            ] = block_ids
-
-        # 9. We make relevant changes to the token bookkeeping tensors
+        # 8. We make relevant changes to the token bookkeeping tensors
         self.token_to_request_idx[: self.active_token_count] = torch.arange(
             self.paused_request_count, self.total_request_count, device=torch.cuda.current_device()
         )
@@ -1942,7 +2196,10 @@ def update_requests(self, active_requests_mask: Tensor, new_tokens: Tensor) -> T
             self.request_last_kv_block_offset[self.paused_request_count : self.total_request_count]
         )
 
-        return newly_paused_request_ids
+        return {
+            "newly_paused_request_ids": newly_paused_request_ids,
+            "evict_request_ids": evict_request_ids,
+        }
 
     def calculate_log_probs(
         self, logits: Tensor, new_tokens: Tensor, only_last_token_logits: Optional[bool] = False
@@ -2073,8 +2330,7 @@ def get_kvcache_utilization_stats(self) -> dict:
             'block_count_avail': int(block_count_avail),
             'active_token_count': int(self.active_token_count),
             'total_request_count': int(total_request_count),
-            'max_total_requests': int(self.max_total_requests),
-            'max_active_requests': int(self.max_active_requests),
+            'max_requests': int(self.max_requests),
         }
 
     def maybe_initialize_symmetric_memory(self):
diff --git a/megatron/core/inference/engines/async_zmq_communicator.py b/megatron/core/inference/engines/async_zmq_communicator.py
index be326192154..7076bb283bd 100644
--- a/megatron/core/inference/engines/async_zmq_communicator.py
+++ b/megatron/core/inference/engines/async_zmq_communicator.py
@@ -6,8 +6,6 @@
 
 import torch.distributed as dist
 
-from megatron.core import parallel_state
-
 try:
     import zmq
 
@@ -39,6 +37,8 @@ def __init__(self, zmq_context: zmq.Context, process_group: dist.ProcessGroup):
         self.rank = dist.get_rank(process_group)
         self.world_size = dist.get_world_size(process_group)
         self.is_leader = self.rank == 0
+        # Get the global rank of the leader (first rank in the process group)
+        src_rank = dist.get_process_group_ranks(process_group)[0]
 
         if self.is_leader:
             local_ip = socket.gethostname()
@@ -52,18 +52,12 @@ def __init__(self, zmq_context: zmq.Context, process_group: dist.ProcessGroup):
 
             # Share the socket addresses with all peers
             dist.broadcast_object_list(
-                [gather_socket_addr, bcast_socket_addr],
-                src=parallel_state.get_expert_model_parallel_src_rank(),
-                group=process_group,
+                [gather_socket_addr, bcast_socket_addr], src=src_rank, group=process_group
             )
 
         else:
             bcast_output = [None, None]
-            dist.broadcast_object_list(
-                bcast_output,
-                src=parallel_state.get_expert_model_parallel_src_rank(),
-                group=process_group,
-            )
+            dist.broadcast_object_list(bcast_output, src=src_rank, group=process_group)
             gather_socket_addr, bcast_socket_addr = bcast_output
             self.gather_sock = zmq_context.socket(zmq.PUSH)
             self.gather_sock.connect(gather_socket_addr)
diff --git a/megatron/core/inference/engines/dynamic_engine.py b/megatron/core/inference/engines/dynamic_engine.py
index cee73b1e833..906b46efed5 100644
--- a/megatron/core/inference/engines/dynamic_engine.py
+++ b/megatron/core/inference/engines/dynamic_engine.py
@@ -184,6 +184,7 @@ def __init__(
         self.enable_chunked_prefill = enable_chunked_prefill
         self.inference_logging_step_interval = inference_logging_step_interval
         self.unified_memory_level = context.unified_memory_level
+        self.persist_cuda_graphs = context.persist_cuda_graphs
 
         if enable_cuda_graph is not None:
             self.cuda_graph_impl = "local" if enable_cuda_graph else "none"
@@ -193,6 +194,11 @@ def __init__(
         # Initialize engine.
         self.reset()
 
+        # Set callback for getting stop word finished request IDs
+        self.controller.set_stop_word_finished_ids_callback(
+            self._get_and_clear_stop_word_finished_ids
+        )
+
         # Configure wandb to use separate step counter for inference metrics (only once)
         if self.inference_logging_step_interval > 0 and self.context.metrics_writer is not None:
             logging.info(
@@ -229,10 +235,15 @@ def reset(self) -> None:
         # Request state.
         self.request_counter = Counter()
         self.finished_request_count = 0
+        self.evicted_request_count = 0
 
         self.requests: Dict[int, RequestEntry] = {}
         self.waiting_request_ids = deque()
         self.failed_request_ids = []
+        # Track requests that should stop due to stop words (detected in post_process_requests)
+        self.stop_word_finished_request_ids: set[int] = set()
+        # Track requests currently being finished due to stop words (to skip extra token)
+        self.stop_word_being_finished_ids: set[int] = set()
 
         # Timing and logging variables.
         self.rank = torch.distributed.get_rank()
@@ -557,10 +568,10 @@ def suspend(self):
         ):
             self.context.deallocate_all_tensors()
 
-        # Delete cuda graphs when not using unified memory at all (level 0). For
-        # levels 1 and 2, the context's tensors maintain static memory addresses,
-        # so the cuda graphs are re-used.
-        if self.unified_memory_level == 0:
+        # Delete cuda graphs when not using unified memory at all (level 0) and
+        # `--rl-training-cuda-graphs` is not passed. For UVM levels 1 and 2, the context's tensors
+        # maintain static memory addresses, so the cuda graphs are re-used.
+        if self.unified_memory_level == 0 and not self.persist_cuda_graphs:
             delete_cuda_graphs()
 
         # Maintain references to requests before reset.
@@ -571,7 +582,7 @@ def suspend(self):
 
         # Suspend requests objects.
         for request_id in active_request_ids:
-            self.requests[request_id].record.suspend()
+            self.requests[request_id].record.checkpoint()
 
     def resume(self):
         """Resume engine by reallocating context's GPU state."""
@@ -602,7 +613,7 @@ def resume(self):
             # 0). For levels 1 and 2, the context's tensors maintain static
             # memory addresses, so the cuda graphs are re-used.
             capture_time = time.time()
-            if self.unified_memory_level == 0:
+            if self.unified_memory_level == 0 and not self.persist_cuda_graphs:
                 self.create_cuda_graphs()
             capture_time = time.time() - capture_time
 
@@ -708,7 +719,7 @@ def _add_request(
         if (
             len(request.prompt_tokens) + request.sampling_params.num_tokens_to_generate
             > self.context.max_sequence_length
-        ):
+        ) or (request.sampling_params.num_tokens_to_generate < 0):
             request.status = Status.FAILED
             request.add_event_error_nontransient(MaxSequenceLengthOverflowError(request_id))
 
@@ -716,6 +727,14 @@ def _add_request(
             request.status = Status.FAILED
             request.add_event_error_nontransient(TokenOverflowError(request_id))
 
+        # Tokenize stop words if provided
+        if request.sampling_params.stop_words:
+            stop_word_ids = [
+                self.controller.tokenize_prompt(stop_word, add_BOS=False)
+                for stop_word in request.sampling_params.stop_words
+            ]
+            request.stop_word_ids = stop_word_ids
+
         if request.status != Status.FAILED:
             self.waiting_request_ids.append(request_id)
         else:
@@ -780,6 +799,7 @@ def post_process_requests(
         self,
         request_ids: torch.Tensor,
         finished_request_ids: torch.Tensor,
+        evict_request_ids: torch.Tensor,
         step_time: float,
         sample: torch.Tensor,
         log_probs: torch.Tensor,
@@ -791,6 +811,7 @@ def post_process_requests(
         Args:
             request_ids (torch.Tensor): A list of request_ids
             finished_request_ids (torch.Tensor): A list of finished request ids
+            evict_request_ids (torch.Tensor): A list of evicted request ids.
             step_time (float): The latency of the last step
             sample: (torch.Tensor): The newly generated tokens for each request
             log_probs: (List): Log probs for each request
@@ -804,6 +825,8 @@ def post_process_requests(
         finished_request_ids = set(finished_request_ids.tolist())
         finished_request_records: list[DynamicInferenceRequestRecord] = []
         self.finished_request_count += len(finished_request_ids)
+        if evict_request_ids is not None:
+            self.evicted_request_count += evict_request_ids.numel()
 
         log_probs_iter = log_probs if log_probs else repeat(None)
 
@@ -812,12 +835,19 @@ def post_process_requests(
         ):
             request: DynamicInferenceRequest = self.get_request(request_id)
             if request_id != self.context.chunked_prefill_request_id:
-                request.generated_tokens.append(token)
-                if request.tpot is None:
-                    request.tpot = []
-                request.tpot.append(step_time)
+                # Skip appending token for requests being finished due to stop words
+                # (they already have their final token from the previous step)
+                if request_id not in self.stop_word_being_finished_ids:
+                    request.generated_tokens.append(token)
+                    if request.tpot is None:
+                        request.tpot = []
+                    request.tpot.append(step_time)
+
+                # Check for stop words (after token is appended)
+                stop_word_hit = self._check_stop_words_for_request_post_append(request)
 
                 if request_id in finished_request_ids:
+                    # Request finished by normal means (termination_id, max_length, or stop word from previous step)
                     request.generated_length = len(request.generated_tokens)
                     request.status = Status.COMPLETED
                     finished_entry = self.requests.pop(request_id)
@@ -825,6 +855,11 @@ def post_process_requests(
                     finished_request.generated_length = len(finished_request.generated_tokens)
                     finished_request_records.append(finished_entry.record)
                     finished_entry.future.set_result(finished_entry.record)
+                elif stop_word_hit:
+                    # Stop word detected - mark for removal in next step's bookkeeping
+                    # Don't pop yet; let the next step handle it properly via callback
+                    self.stop_word_finished_request_ids.add(request_id)
+                    active_request_ids.append(request_id)
                 else:
                     active_request_ids.append(request_id)
             else:
@@ -911,8 +946,79 @@ def post_process_requests(
                     else:
                         request.generated_top_n_logprobs.append(logit_dict)
 
+        # Handle evicted requests.
+        if evict_request_ids is not None and evict_request_ids.numel() > 0:
+
+            evict_request_ids = evict_request_ids.tolist()
+
+            # Insert into waiting_request_ids after any chunk prefill request.
+            self.waiting_request_ids.extendleft(evict_request_ids)
+            if self.context.chunked_prefill_request_id != -1:
+                chunked_prefill_id = self.waiting_request_ids[len(evict_request_ids)]
+                del self.waiting_request_ids[len(evict_request_ids)]
+                self.waiting_request_ids.appendleft(chunked_prefill_id)
+
+            # Checkpoint requests (i.e., prompt += generations) + add eviction event.
+            for request_id in evict_request_ids:
+                self.requests[request_id].record.checkpoint()
+                self.get_request(request_id).add_event_evict()
+
+        # Clear the stop word being finished set after processing
+        self.stop_word_being_finished_ids.clear()
+
         return active_request_ids, finished_request_records
 
+    def _get_and_clear_stop_word_finished_ids(self, active_request_ids: list[int]) -> set[int]:
+        """Get and clear the set of request IDs that should be finished due to stop words.
+
+        This callback is called from the controller during bookkeeping to get request IDs
+        that were detected as hitting stop words in the previous step's post_process_requests.
+
+        Args:
+            active_request_ids: List of currently active request IDs.
+
+        Returns:
+            Set of request IDs from active_request_ids that should be marked as finished.
+        """
+        if not self.stop_word_finished_request_ids:
+            return set()
+
+        # Find which stop word finished IDs are in the current active requests
+        result = self.stop_word_finished_request_ids & set(active_request_ids)
+        # Move to "being finished" set so post_process_requests can skip the extra token
+        self.stop_word_being_finished_ids = result
+        # Clear the IDs that we're returning (they'll be marked as finished)
+        self.stop_word_finished_request_ids -= result
+        return result
+
+    def _check_stop_words_for_request_post_append(self, request: DynamicInferenceRequest) -> bool:
+        """Check if a request should stop due to stop words (after token is appended).
+
+        This method is called from post_process_requests after the token has already
+        been appended to request.generated_tokens.
+
+        Args:
+            request: The request to check.
+
+        Returns:
+            bool: True if the generated sequence ends with a stop word, False otherwise.
+        """
+        # Check if request has stop words configured
+        if request.stop_word_ids is None or len(request.stop_word_ids) == 0:
+            return False
+
+        generated_tokens = request.generated_tokens
+
+        # Check if the sequence ends with any stop word
+        for stop_word_ids in request.stop_word_ids:
+            stop_len = len(stop_word_ids)
+            if len(generated_tokens) >= stop_len:
+                # Check if the last stop_len tokens match the stop word
+                if list(generated_tokens[-stop_len:]) == stop_word_ids:
+                    return True
+
+        return False
+
     def schedule_waiting_requests(self):
         """Tries to schedule any requests in the waiting pool."""
         if self.enable_chunked_prefill:
@@ -975,6 +1081,12 @@ def schedule_chunked_prefill(self):
 
             if request_can_be_added and kv_cache_available:
                 if token_fully_can_be_added:
+                    # For Mamba models we need to ensure that the last prefill chunk
+                    # is still tagged as a chunked prefill request.
+                    self.context.has_explicit_chunked_prefill_req = (
+                        self.context.is_hybrid_model
+                        and self.context.chunked_prefill_request_id == req.request_id
+                    )
                     self.context.chunked_prefill_request_id = -1
                     self.context.add_request(req)
                     self._loop.call_soon_threadsafe(
@@ -985,7 +1097,10 @@ def schedule_chunked_prefill(self):
                     # Fully scheduled, so we remove from waiting pool
                     self.waiting_request_ids.popleft()
                     # Only this case we keep checking the rest of the waiting queue
-                    can_schedule = True
+                    # We break early for Mamba models running a final prefill chunk
+                    # so that no additional requests are scheduled beyond the chunked
+                    # prefill request.
+                    can_schedule = not self.context.has_explicit_chunked_prefill_req
                 elif token_partially_can_be_added:
                     chunk_length = self.context.max_tokens - self.context.active_token_count
                     self.context.add_request(req, chunk_length=chunk_length)
@@ -993,6 +1108,7 @@ def schedule_chunked_prefill(self):
                         self._loop.create_task, self._notify_cond_for_new_request()
                     )
                     self.context.chunked_prefill_request_id = req.request_id
+                    self.context.has_explicit_chunked_prefill_req = self.context.is_hybrid_model
                     req.remaining_prompt_tokens = req.remaining_prompt_tokens[chunk_length:]
                     req.finished_chunk_token_count += chunk_length
                     # Still have tokens to prefill, so we break and keep the
@@ -1022,7 +1138,7 @@ async def async_forward(self) -> Tuple[Dict, Dict, float, int]:
         is_decode_only = self.context.is_decode_only()
         pre_step_context_state = {
             "is_decode_only": is_decode_only,
-            "max_active_requests": self.context.max_active_requests,
+            "max_requests": self.context.max_requests,
             "total_request_count": self.context.total_request_count,
             "paused_request_count": self.context.paused_request_count,
             "active_token_count": self.context.active_token_count,
@@ -1055,6 +1171,7 @@ async def async_forward(self) -> Tuple[Dict, Dict, float, int]:
         post_step_context_state = {
             "waiting_request_count": len(self.waiting_request_ids),
             "finished_request_count": self.finished_request_count,
+            "evicted_request_count": self.evicted_request_count,
             "kv_stats": kvcache_util_stats,
             "padded_active_token_count": self.context.padded_active_token_count,
             "using_cuda_graph_this_step": self.context.using_cuda_graph_this_step(),
@@ -1091,8 +1208,9 @@ async def async_bookkeep(
 
         if step_result is not None:
             active_request_ids = step_result["active_request_ids"]
-            newly_paused_request_ids = step_result["newly_paused_request_ids"]
             finished_request_ids = step_result["finished_request_ids"]
+            newly_paused_request_ids = step_result.get("newly_paused_request_ids")
+            evict_request_ids = step_result.get("evict_request_ids")
             sample = step_result["sample"]
             log_probs = step_result["log_probs"]
             top_n_logprobs = step_result.get("top_n_logprobs", None)
@@ -1109,6 +1227,7 @@ async def async_bookkeep(
             (active_request_ids, finished_request_records) = self.post_process_requests(
                 active_request_ids,
                 finished_request_ids,
+                evict_request_ids,
                 step_time,
                 sample,
                 log_probs,
@@ -1184,7 +1303,7 @@ async def async_bookkeep(
             step_type = "decode" if context_state["is_decode_only"] else "non-decode"
             output_str = (
                 "* rank %d | step %d | %s ... time: %.3f%s ... "
-                "reqs: a %d/%d, p %d, w %d, f %d ... "
+                "reqs: a %d/%d, p %d, w %d, f %d, e %d ... "
                 "blocks: a %d/%d, p %d/%d ... "
                 "mem: tensors %d, alloc %.1f gb, res %.1f gb."
                 % (
@@ -1205,10 +1324,11 @@ async def async_bookkeep(
                         )
                     ),
                     context_state["total_request_count"] - context_state["paused_request_count"],
-                    context_state["max_active_requests"],
+                    context_state["max_requests"],
                     context_state["paused_request_count"],
                     context_state["waiting_request_count"],
                     context_state["finished_request_count"],
+                    context_state["evicted_request_count"],
                     context_state["total_active_used_blocks"],
                     context_state["total_active_block_count"],
                     context_state["total_paused_used_blocks"],
diff --git a/megatron/core/inference/inference_client.py b/megatron/core/inference/inference_client.py
index 8a19e226c46..8659368b9fa 100644
--- a/megatron/core/inference/inference_client.py
+++ b/megatron/core/inference/inference_client.py
@@ -111,7 +111,7 @@ def add_request(
         payload_serialized = msgpack.packb(payload, use_bin_type=True)
         self.socket.send(payload_serialized)
         assert request_id not in self.completion_futures
-        self.completion_futures[request_id] = self._loop.create_future()
+        self.completion_futures[request_id] = asyncio.get_running_loop().create_future()
         self.request_submission_times[request_id] = time.perf_counter()
         return self.completion_futures[request_id]
 
@@ -141,7 +141,10 @@ async def _recv_task(self):
                     if completion_future.done():
                         logging.warning(f"Client: The future for {request_id} has been cancelled!")
                         continue
-                    completion_future.set_result(DynamicInferenceRequestRecord.deserialize(reply))
+                    completed_request = DynamicInferenceRequestRecord.deserialize(reply)
+                    completion_future.get_loop().call_soon_threadsafe(
+                        completion_future.set_result, completed_request
+                    )
                 elif header == Headers.PAUSE_ACK:
                     self.paused.set()
                 elif header == Headers.STOP_ACK:
diff --git a/megatron/core/inference/inference_request.py b/megatron/core/inference/inference_request.py
index 458fbad387f..8bd0dd0aff4 100644
--- a/megatron/core/inference/inference_request.py
+++ b/megatron/core/inference/inference_request.py
@@ -140,6 +140,7 @@ class DynamicInferenceEventType(Enum):
 
     ADD = auto()
     PAUSE = auto()
+    EVICT = auto()
     FINISH = auto()
     FAIL = auto()
     ERROR_TRANSIENT = auto()
@@ -154,6 +155,7 @@ class DynamicInferenceEvent:
 
     - request added
     - request paused
+    - request evicted
     - request finished
     - request failed
     - request error (transient)
@@ -246,6 +248,7 @@ class DynamicInferenceRequest(InferenceRequest):
     remaining_prompt_tokens: Optional[torch.Tensor] = None
     latency: Optional[float] = None
     finished_chunk_token_count = 0
+    stop_word_ids: Optional[List[List[int]]] = None  # Tokenized stop words (populated internally)
 
     def __post_init__(self):
         self.sampling_params = copy.deepcopy(self.sampling_params)
@@ -349,6 +352,10 @@ def add_event_pause(self):
         """Add 'pause' event."""
         return self.add_event(DynamicInferenceEventType.PAUSE)
 
+    def add_event_evict(self):
+        """Add 'evict' event."""
+        return self.add_event(DynamicInferenceEventType.EVICT)
+
     def add_event_finish(self):
         """Add 'finish' event."""
         return self.add_event(DynamicInferenceEventType.FINISH)
@@ -376,8 +383,8 @@ def failed(self) -> bool:
 
 @dataclass(kw_only=True)
 class DynamicInferenceRequestRecord:
-    """History of DynamicInferenceRequest objects over multiple suspend and
-    resumes."""
+    """History of DynamicInferenceRequest objects over multiple request
+    checkpoints."""
 
     requests: list[DynamicInferenceRequest] = field(default_factory=list)
     latency: Optional[float] = None
@@ -416,9 +423,9 @@ def request_id(self) -> int:
         """
         return self.requests[0].request_id
 
-    def suspend(self, tokenizer: MegatronTokenizer | None = None):
-        """Suspend request by storing references to previous prompt, generations,
-        and sampling params.
+    def checkpoint(self, tokenizer: MegatronTokenizer | None = None):
+        """Maintain reference to previous request, and then append a new request
+        that concatenates the previous prompt and generations.
 
         Args:
             tokenizer (MegatronTokenizer | None): (Deprecated) Tokenizer.
@@ -459,7 +466,7 @@ def suspend(self, tokenizer: MegatronTokenizer | None = None):
         self.requests.append(new_request)
 
     def merge(self, tokenizer: MegatronTokenizer | None = None) -> DynamicInferenceRequest:
-        """Merge requests into a single suspend-agnostic request object.
+        """Merge requests into a single checkpoint-agnostic request object.
 
         Args:
             tokenizer (MegatronTokenizer | None): (Deprecated) Tokenizer.
@@ -477,7 +484,10 @@ def merge_lists(key):
         prompt_tokens = self.requests[0].prompt_tokens
         prompt_text = self.requests[0].prompt
         generated_tokens = merge_lists("generated_tokens")
-        generated_text = "".join(r.generated_text for r in self.requests)
+        try:
+            generated_text = "".join(r.generated_text for r in self.requests)
+        except TypeError as e:  # generally means r.generated_text is None
+            generated_text = None
 
         # Merged request.
         request = DynamicInferenceRequest(
diff --git a/megatron/core/inference/model_inference_wrappers/abstract_model_inference_wrapper.py b/megatron/core/inference/model_inference_wrappers/abstract_model_inference_wrapper.py
index 0d7d15b4510..6a17de685bf 100644
--- a/megatron/core/inference/model_inference_wrappers/abstract_model_inference_wrapper.py
+++ b/megatron/core/inference/model_inference_wrappers/abstract_model_inference_wrapper.py
@@ -7,7 +7,6 @@
 
 import torch
 
-from megatron.core import parallel_state
 from megatron.core.fp8_utils import prepare_model_for_fp8_inference
 from megatron.core.inference.communication_utils import (
     is_pipeline_first_stage,
@@ -73,10 +72,7 @@ def __init__(
         self.inference_context = inference_context
 
         if pg_collection is None:
-            pg_collection = ProcessGroupCollection(
-                tp=parallel_state.get_tensor_model_parallel_group(),
-                pp=parallel_state.get_pipeline_model_parallel_group(),
-            )
+            pg_collection = ProcessGroupCollection.use_mpu_process_groups()
 
         self.tp_group = pg_collection.tp
         self.pp_group = pg_collection.pp
@@ -173,7 +169,7 @@ def dummy_forward(self):
         for the all-to-all communication."""
         # we use num_dummy_tokens equal to tensor model parallel size
         # so that the dummy forward pass will work with sequence parallel
-        num_dummy_tokens = parallel_state.get_tensor_model_parallel_world_size()
+        num_dummy_tokens = self.tp_size
         tokens = torch.zeros(
             (1, num_dummy_tokens), dtype=torch.long, device=torch.cuda.current_device()
         )
@@ -382,9 +378,7 @@ def run_one_forward_step(
             torch.Tensor: The output logits of shape [batch_size, seq_len, padded_vocab_size]. The logits are returned only in the last pipeline stage for PP models.
         """
         # Check if we are in a PP model
-        if not (
-            parallel_state.is_pipeline_first_stage() and parallel_state.is_pipeline_last_stage()
-        ):
+        if not (is_pipeline_first_stage(self.pp_group) and is_pipeline_last_stage(self.pp_group)):
             tokens = inference_input["tokens"]
             current_batch_size, seq_len = self._get_batch_size_and_seq_len(
                 tokens, recv_buffer_seq_len
diff --git a/megatron/core/inference/model_inference_wrappers/gpt/gpt_inference_wrapper.py b/megatron/core/inference/model_inference_wrappers/gpt/gpt_inference_wrapper.py
index 430126816a7..ba89fbc2f6c 100644
--- a/megatron/core/inference/model_inference_wrappers/gpt/gpt_inference_wrapper.py
+++ b/megatron/core/inference/model_inference_wrappers/gpt/gpt_inference_wrapper.py
@@ -12,6 +12,7 @@
 )
 from megatron.core.inference.utils import get_attention_mask
 from megatron.core.models.gpt import GPTModel
+from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.transformer.enums import AttnBackend
 from megatron.core.utils import get_model_config
 
@@ -28,6 +29,8 @@ class GPTInferenceWrapper(AbstractModelInferenceWrapper):
             size, etc.
         inference_context (BaseInferenceContext): Manages KV cache, and tracks
             sequence/token/batch offsets.
+        pg_collection (ProcessGroupCollection): Process groups for model communication.
+            If not provided, defaults to global parallel state groups.
     """
 
     def __init__(
@@ -35,8 +38,9 @@ def __init__(
         model: GPTModel,
         inference_wrapper_config: InferenceWrapperConfig,
         inference_context: Optional[BaseInferenceContext] = None,
+        pg_collection: Optional[ProcessGroupCollection] = None,
     ):
-        super().__init__(model, inference_wrapper_config, inference_context)
+        super().__init__(model, inference_wrapper_config, inference_context, pg_collection)
 
     def prep_inference_input(self, prompts_tokens: torch.Tensor) -> Dict[str, Any]:
         """Prepares the inference input data.
diff --git a/megatron/core/inference/sampling_params.py b/megatron/core/inference/sampling_params.py
index 6a4c5736706..ba1acae4c57 100644
--- a/megatron/core/inference/sampling_params.py
+++ b/megatron/core/inference/sampling_params.py
@@ -2,7 +2,7 @@
 
 import warnings
 from dataclasses import dataclass
-from typing import Optional
+from typing import List, Optional
 
 
 @dataclass
@@ -30,6 +30,9 @@ class SamplingParams:
     top_n_logprobs: int = 0
     return_prompt_top_n_logprobs: bool = False  # Deprecated field for backwards compatibility
     add_BOS: bool = False
+    stop_words: Optional[List[str]] = (
+        None  # List of strings that will stop generation when produced
+    )
 
     def __post_init__(self):
         """Ensure backward compatibility for return_prompt_top_n_logprobs.
@@ -48,7 +51,7 @@ def _sync_prompt_logprobs_fields(self):
                 DeprecationWarning,
             )
             assert (
-                self.skip_prompt_log_probs
+                not self.skip_prompt_log_probs
             ), "return_prompt_top_n_logprobs requires skip_prompt_log_probs to be False"
         if self.top_n_logprobs > 0:
             self.return_prompt_top_n_logprobs = not self.skip_prompt_log_probs
diff --git a/megatron/core/inference/text_generation_controllers/text_generation_controller.py b/megatron/core/inference/text_generation_controllers/text_generation_controller.py
index 15b19835121..a5233983ed0 100644
--- a/megatron/core/inference/text_generation_controllers/text_generation_controller.py
+++ b/megatron/core/inference/text_generation_controllers/text_generation_controller.py
@@ -78,10 +78,24 @@ def __init__(
         if self.inference_wrapped_model.inference_context.is_dynamic_batching():
             self._init_dynamic_sampling_tensors()
 
+    def set_stop_word_finished_ids_callback(self, callback):
+        """Set a callback to get request IDs that should be marked as finished due to stop words.
+
+        The callback should have signature: callback(active_request_ids: List[int]) -> Set[int]
+        Returns a set of request IDs from active_request_ids that should be marked as finished.
+
+        Args:
+            callback: Function that returns request IDs to mark as finished.
+        """
+        self._get_stop_word_finished_ids_callback = callback
+
     def _init_dynamic_sampling_tensors(self):
         """Initialize tensors needed for dynamic sampling."""
         context = self.inference_wrapped_model.inference_context
-        max_requests = context.max_total_requests
+        max_requests = context.max_requests
+
+        # Callback to get request IDs that should be marked as finished due to stop words
+        self._get_stop_word_finished_ids_callback = None
 
         device = torch.cuda.current_device()
         logits_dtype = self.inference_wrapped_model.inference_wrapper_config.params_dtype
@@ -475,13 +489,16 @@ def unpad_input_prompt_tokens(
         return padded_batch_prompt_tokens[:original_batch_size]
 
     def _dynamic_step_context_init(
-        self, construct_graph_dimensions: Optional[InferenceBatchDimensions] = None
+        self,
+        construct_graph_dimensions: Optional[InferenceBatchDimensions] = None,
+        is_dummy_forward: bool = False,
     ):
         """Initializes the inference context for dynamic batching.
 
         Args:
             construct_graph_dimensions (Optional[InferenceBatchDimensions]): The graph config to use
                 for constructing the cuda graphs.
+            is_dummy_forward (bool): Whether we are running an expert parallel dummy forward pass
 
         Return:
             input_ids (Tensor): The active input IDs.
@@ -534,7 +551,9 @@ def _dynamic_step_context_init(
                 )
 
         # Get flat tokens, position ids.
-        if construct_graph_dimensions is not None:
+        # If we are running a dummy forward step we want to use the token count agreed upon
+        # by all EP ranks rather than the minimum number of tokens.
+        if construct_graph_dimensions is not None and not is_dummy_forward:
             return context.current_input_and_position_ids(
                 num_warmup_tokens=construct_graph_dimensions.token_count
             )
@@ -766,7 +785,8 @@ def dummy_forward(self):
         # a dummy cuda graph.
         input_ids, position_ids = self._dynamic_step_context_init(
             # try to use the smallest cuda-graph config for dummy forward
-            construct_graph_dimensions=min(context.cuda_graph_batch_dimensions_list)
+            construct_graph_dimensions=min(context.cuda_graph_batch_dimensions_list),
+            is_dummy_forward=True,
         )
 
         # _dynamic_step_context_init tries to find a cuda-graph that is compatible
@@ -812,6 +832,16 @@ def _dynamic_step_context_bookkeeping(self) -> Dict[str, Tensor]:
             self._sampled_tokens_cuda[:active_request_count]
             != self._request_metadata["termination_id"][active_request_slice]
         ).byte() & torch.less(active_sequence_lengths, max_sequence_lengths).byte()
+
+        # Mark requests as finished if they hit stop words (detected in previous step's post_process_requests)
+        if self._get_stop_word_finished_ids_callback is not None:
+            request_ids_list = active_request_ids.tolist()
+            stop_word_finished_ids = self._get_stop_word_finished_ids_callback(request_ids_list)
+            if stop_word_finished_ids:
+                for idx, request_id in enumerate(request_ids_list):
+                    if request_id in stop_word_finished_ids:
+                        active_request_mask[idx] = 0
+
         finished_idxs = (
             torch.nonzero(active_request_mask == 0, as_tuple=True)[0] + context.paused_request_count
         )
@@ -821,12 +851,12 @@ def _dynamic_step_context_bookkeeping(self) -> Dict[str, Tensor]:
         new_sample_copy = self._sampled_tokens_cuda[:active_request_count].clone()
 
         # Update requests.
-        newly_paused_request_ids = context.update_requests(active_request_mask, new_sample_copy)
+        update_result = context.update_requests(active_request_mask, new_sample_copy)
 
         return {
             "active_request_ids": active_request_ids,
-            "newly_paused_request_ids": newly_paused_request_ids,
             "finished_request_ids": finished_request_ids,
+            **(update_result or {}),
         }
 
     @torch.inference_mode()
diff --git a/megatron/core/inference/text_generation_server/dynamic_text_gen_server/__init__.py b/megatron/core/inference/text_generation_server/dynamic_text_gen_server/__init__.py
new file mode 100644
index 00000000000..3ad54686261
--- /dev/null
+++ b/megatron/core/inference/text_generation_server/dynamic_text_gen_server/__init__.py
@@ -0,0 +1,3 @@
+# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+
+from .flask_server import run_flask_server
diff --git a/megatron/core/inference/text_generation_server/dynamic_text_gen_server/endpoints/__init__.py b/megatron/core/inference/text_generation_server/dynamic_text_gen_server/endpoints/__init__.py
new file mode 100644
index 00000000000..1945fd10dba
--- /dev/null
+++ b/megatron/core/inference/text_generation_server/dynamic_text_gen_server/endpoints/__init__.py
@@ -0,0 +1,10 @@
+# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+
+
+try:
+    from .chat_completions import bp as ChatCompletions
+    from .completions import bp as Completions
+
+    __all__ = [Completions, ChatCompletions]
+except ImportError:
+    __all__ = []
diff --git a/megatron/core/inference/text_generation_server/dynamic_text_gen_server/endpoints/chat_completions.py b/megatron/core/inference/text_generation_server/dynamic_text_gen_server/endpoints/chat_completions.py
new file mode 100644
index 00000000000..0c3379bc53f
--- /dev/null
+++ b/megatron/core/inference/text_generation_server/dynamic_text_gen_server/endpoints/chat_completions.py
@@ -0,0 +1,158 @@
+# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+
+import asyncio
+import logging
+import time
+
+from megatron.core.inference.sampling_params import SamplingParams
+
+logger = logging.getLogger(__name__)
+
+try:
+    from flask import Blueprint, current_app, jsonify, request
+
+    bp = Blueprint('chat_completions_api', __name__)
+
+    @bp.route('/chat/completions', methods=['POST'])
+    @bp.route('/v1/chat/completions', methods=['POST'])
+    async def chat_completions():
+        """Handles async POST requests for chat completions."""
+        client = current_app.config['client']
+        tokenizer = current_app.config['tokenizer']
+
+        req = request.get_json()
+
+        # --- 1. Parse Messages ---
+        messages = req.get("messages")
+        if not messages:
+            return "Missing 'messages' field", 400
+        if not isinstance(messages, list):
+            return "'messages' must be a list", 400
+
+        try:
+            prompt_tokens = tokenizer.apply_chat_template(
+                messages, tokenize=True, add_generation_prompt=True
+            )
+        except AttributeError:
+            return (
+                "Tokenizer does not support 'apply_chat_template'. "
+                "Chat completions requires a tokenizer with a configured chat template."
+            ), 500
+        except Exception as e:
+            return f"Error processing 'messages': {e}", 500
+
+        # --- 2. Parse Sampling Params ---
+        try:
+            temperature = float(req.get("temperature", 1.0))
+            top_p = float(req.get("top_p", 1.0))
+            top_k = int(req.get("top_k", 0))
+            n = int(req.get("n", 1))  # Number of choices to generate
+
+            if temperature == 0.0:
+                top_k = 1
+                top_p = 0.0
+
+            # Check for 'logprobs' (bool) and 'top_logprobs' (int)
+            return_log_probs = bool(req.get("logprobs", False))
+            top_n_logprobs = int(req.get("top_logprobs", 0)) if return_log_probs else 0
+
+            sampling_params = SamplingParams(
+                temperature=temperature,
+                top_k=top_k,
+                top_p=top_p,
+                return_log_probs=return_log_probs,
+                top_n_logprobs=top_n_logprobs,
+                num_tokens_to_generate=int(req.get("max_tokens", 16)),
+            )
+        except ValueError as e:
+            return f"Invalid sampling parameter: {e}", 400
+
+        # --- 3. Send Requests to Engine ---
+        # For chat, we run the *same* prompt 'n' times.
+        tasks = []
+        for _ in range(n):
+            per_req_params = SamplingParams(
+                temperature=sampling_params.temperature,
+                top_k=sampling_params.top_k,
+                top_p=sampling_params.top_p,
+                return_log_probs=sampling_params.return_log_probs,
+                top_n_logprobs=sampling_params.top_n_logprobs,
+                num_tokens_to_generate=sampling_params.num_tokens_to_generate,
+            )
+            tasks.append(client.add_request(prompt_tokens, per_req_params))
+
+        start_time = time.perf_counter()
+        try:
+            batch_results = await asyncio.gather(*tasks)
+        except Exception as e:
+            return f"Error during inference: {e}", 500
+
+        logger.info(
+            f"Batch of {len(tasks)} requests (n={n}) processed in "
+            f"{time.perf_counter() - start_time:.2f}s"
+        )
+
+        # --- 4. Format OpenAI Response ---
+        choices = []
+        total_completion_tokens = 0
+        prompt_token_count = len(prompt_tokens)  # Calculated once
+
+        request_idx = 0
+        for record in batch_results:
+            for result in record.requests:
+                text_output = result.generated_text
+
+                logprobs_content = None
+                if sampling_params.return_log_probs:
+                    token_logprobs = getattr(result, 'log_probs', [])
+                    tokens = [tokenizer.detokenize([tok]) for tok in result.generated_tokens]
+
+                    # Get top_n_logprobs if available
+                    generated_top_n_logprobs = getattr(result, 'generated_top_n_logprobs', None)
+
+                    logprobs_content = []
+                    for i, (tok, lp) in enumerate(zip(tokens, token_logprobs)):
+                        # Build top_logprobs list for this token position
+                        top_logprobs_list = []
+                        if generated_top_n_logprobs and i < len(generated_top_n_logprobs):
+                            top_n_dict = generated_top_n_logprobs[i]
+                            for token_str, logprob in top_n_dict.items():
+                                top_logprobs_list.append(
+                                    {
+                                        "token": token_str,
+                                        "logprob": logprob,
+                                        "bytes": list(token_str.encode("utf-8")),
+                                    }
+                                )
+
+                        entry = {
+                            "token": tok,
+                            "logprob": lp,
+                            "bytes": list(tok.encode("utf-8")),
+                            "top_logprobs": top_logprobs_list,
+                        }
+                        logprobs_content.append(entry)
+
+                choice_data = {
+                    "index": 0,
+                    "message": {"role": "assistant", "content": text_output},
+                    # 'logprobs' in chat API is an object containing 'content'
+                    "logprobs": {"content": logprobs_content} if logprobs_content else None,
+                    "finish_reason": "length",  # Original code hardcoded this.
+                }
+                choices.append(choice_data)
+                total_completion_tokens += len(result.generated_tokens)
+                request_idx += 0
+
+        response = {
+            "choices": choices,
+            "usage": {
+                "prompt_tokens": prompt_token_count,
+                "completion_tokens": total_completion_tokens,
+                "total_tokens": prompt_token_count + total_completion_tokens,
+            },
+        }
+        return jsonify(response)
+
+except ImportError as e:
+    logger.warning(f"Could not import flask: {e}")
diff --git a/megatron/core/inference/text_generation_server/dynamic_text_gen_server/endpoints/common.py b/megatron/core/inference/text_generation_server/dynamic_text_gen_server/endpoints/common.py
new file mode 100644
index 00000000000..6efdba5cdb2
--- /dev/null
+++ b/megatron/core/inference/text_generation_server/dynamic_text_gen_server/endpoints/common.py
@@ -0,0 +1,14 @@
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+
+import threading
+
+import torch
+
+GENERATE_NUM = 0
+LOCK = threading.Lock()
+
+
+def send_do_generate():
+    """Broadcasts a message to perform a generation to all tensor parallel ranks."""
+    choice = torch.tensor([GENERATE_NUM], dtype=torch.long, device=torch.cuda.current_device())
+    torch.distributed.broadcast(choice, 0)
diff --git a/megatron/core/inference/text_generation_server/dynamic_text_gen_server/endpoints/completions.py b/megatron/core/inference/text_generation_server/dynamic_text_gen_server/endpoints/completions.py
new file mode 100644
index 00000000000..b749205cdfd
--- /dev/null
+++ b/megatron/core/inference/text_generation_server/dynamic_text_gen_server/endpoints/completions.py
@@ -0,0 +1,214 @@
+# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+
+import asyncio
+import logging
+import time
+
+from megatron.core.inference.sampling_params import SamplingParams
+
+logger = logging.getLogger(__name__)
+
+
+try:
+    from flask import Blueprint, current_app, jsonify, request
+
+    bp = Blueprint('completions_api', __name__)
+
+    @bp.route('/completions', methods=['POST'])
+    @bp.route('/v1/completions', methods=['POST'])
+    async def completions():
+        """Handles async POST requests for completions."""
+        client = current_app.config['client']
+        tokenizer = current_app.config['tokenizer']
+
+        req = request.get_json()
+
+        # --- 1. Parse Prompt ---
+        prompt_data = req.get("prompt")
+        if not prompt_data:
+            return "Missing 'prompt' field", 400
+
+        try:
+            if isinstance(prompt_data, str):
+                prompts_as_tokens = [tokenizer.tokenize(prompt_data)]
+                prompts_as_strings = [prompt_data]
+            elif isinstance(prompt_data, list):
+                if not prompt_data:
+                    return "'prompt' list is empty", 400
+                if all(isinstance(p, str) for p in prompt_data):
+                    prompts_as_tokens = [tokenizer.tokenize(p) for p in prompt_data]
+                    prompts_as_strings = prompt_data
+                elif all(isinstance(p, int) for p in prompt_data):
+                    prompts_as_tokens = [prompt_data]
+                    prompts_as_strings = [tokenizer.detokenize(prompt_data)]
+                elif all(
+                    isinstance(p, list) and all(isinstance(t, int) for t in p) for p in prompt_data
+                ):
+                    prompts_as_tokens = prompt_data
+                    prompts_as_strings = [tokenizer.detokenize(p) for p in prompt_data]
+                else:
+                    return (
+                        (
+                            "Invalid 'prompt' format. Must be str, list[str], "
+                            "list[int], or list[list[int]]"
+                        ),
+                        400,
+                    )
+            else:
+                return "Invalid 'prompt' type. Must be str or list", 400
+        except Exception as e:
+            return f"Error tokenizing prompt: {e}", 500
+
+        # --- 2. Parse Sampling Params ---
+        try:
+            temperature = float(req.get("temperature", 1.0))
+            top_p = float(req.get("top_p", 1.0))
+            top_k = int(req.get("top_k", 0))
+            echo = bool(req.get("echo", False))
+
+            if temperature == 0.0:
+                top_k = 1
+                top_p = 0.0
+
+            # Parse logprobs - can be an integer (number of top logprobs to return) or None
+            logprobs_param = req.get("logprobs", None)
+
+            if logprobs_param is not None:
+                top_n_logprobs = int(logprobs_param)
+                return_log_probs = True
+            else:
+                top_n_logprobs = 0
+                return_log_probs = False
+
+            # When echo=True and logprobs are requested, we need prompt logprobs
+            # skip_prompt_log_probs=False ensures the engine computes logprobs for prompt tokens
+            skip_prompt_log_probs = not (echo and return_log_probs)
+
+            sampling_params = SamplingParams(
+                temperature=temperature,
+                top_k=top_k,
+                top_p=top_p,
+                return_log_probs=return_log_probs,
+                top_n_logprobs=top_n_logprobs,
+                skip_prompt_log_probs=skip_prompt_log_probs,
+                num_tokens_to_generate=int(req.get("max_tokens", 16)),
+            )
+        except ValueError as e:
+            return f"Invalid sampling parameter: {e}", 400
+
+        # --- 3. Send Requests to Engine ---
+        tasks = []
+        for prompt_tokens in prompts_as_tokens:
+            per_req_params = SamplingParams(
+                temperature=sampling_params.temperature,
+                top_k=sampling_params.top_k,
+                top_p=sampling_params.top_p,
+                return_log_probs=sampling_params.return_log_probs,
+                top_n_logprobs=sampling_params.top_n_logprobs,
+                skip_prompt_log_probs=sampling_params.skip_prompt_log_probs,
+                num_tokens_to_generate=sampling_params.num_tokens_to_generate,
+            )
+            tasks.append(client.add_request(prompt_tokens, per_req_params))
+
+        start_time = time.perf_counter()
+        try:
+            batch_results = await asyncio.gather(*tasks)
+        except Exception as e:
+            return f"Error during inference: {e}", 500
+
+        logger.info(
+            f"Batch of {len(tasks)} requests processed in {time.perf_counter() - start_time:.2f}s"
+        )
+
+        # --- 4. Format Response (matching old_completions.py) ---
+        choices = []
+
+        request_idx = 0
+        for record in batch_results:
+            for result in record.requests:
+                full_text = result.generated_text or ""
+                text_output = (prompts_as_strings[request_idx] + full_text) if echo else full_text
+
+                logprobs_data = None
+                if sampling_params.return_log_probs:
+                    # Get prompt tokens and logprobs
+                    prompt_tokens_list = []
+                    if result.prompt_tokens is not None:
+                        if hasattr(result.prompt_tokens, 'tolist'):
+                            prompt_tokens_list = result.prompt_tokens.tolist()
+                        else:
+                            prompt_tokens_list = list(result.prompt_tokens)
+
+                    prompt_log_probs = getattr(result, 'prompt_log_probs', None) or []
+                    prompt_top_n_logprobs = getattr(result, 'prompt_top_n_logprobs', None) or []
+
+                    # Get generated tokens and logprobs
+                    generated_tokens_list = (
+                        list(result.generated_tokens) if result.generated_tokens else []
+                    )
+                    generated_log_probs = getattr(result, 'generated_log_probs', None) or []
+                    generated_top_n_logprobs = (
+                        getattr(result, 'generated_top_n_logprobs', None) or []
+                    )
+
+                    if echo:
+                        # When echo=True, include prompt tokens and their logprobs
+                        # Prompt logprobs are for tokens [1:] (first token has no logprob)
+                        all_token_ids = prompt_tokens_list + generated_tokens_list
+                        tokens = [tokenizer.detokenize([tok]) for tok in all_token_ids]
+
+                        # Build token_logprobs: [None] for first token, then prompt logprobs,
+                        # then generated logprobs
+                        token_logprobs = [None] + list(prompt_log_probs) + list(generated_log_probs)
+
+                        # Build top_logprobs: [None] for first token, then prompt top_n,
+                        # then generated top_n
+                        top_logprobs = None
+                        if prompt_top_n_logprobs or generated_top_n_logprobs:
+                            top_logprobs = (
+                                [None]
+                                + list(prompt_top_n_logprobs)
+                                + list(generated_top_n_logprobs)
+                            )
+
+                        # Calculate text_offset: cumulative character positions starting from 0
+                        text_offset = []
+                        current_offset = 0
+                        for tok_str in tokens:
+                            text_offset.append(current_offset)
+                            current_offset += len(tok_str)
+                    else:
+                        # When echo=False, only return generated tokens and their logprobs
+                        tokens = [tokenizer.detokenize([tok]) for tok in generated_tokens_list]
+
+                        # Prepend [None] to match OpenAI format
+                        token_logprobs = [None] + list(generated_log_probs)
+
+                        # Build top_logprobs
+                        top_logprobs = None
+                        if generated_top_n_logprobs:
+                            top_logprobs = [None] + list(generated_top_n_logprobs)
+
+                        # Calculate text_offset for generated tokens only
+                        text_offset = []
+                        current_offset = 0
+                        for tok_str in tokens:
+                            text_offset.append(current_offset)
+                            current_offset += len(tok_str)
+
+                    logprobs_data = {
+                        "token_logprobs": token_logprobs,
+                        "tokens": tokens,
+                        "text_offset": text_offset,
+                        "top_logprobs": top_logprobs,
+                    }
+
+                choices.append(
+                    {"index": request_idx, "text": text_output, "logprobs": logprobs_data}
+                )
+                request_idx += 1
+
+        return jsonify({"choices": choices})
+
+except ImportError as e:
+    logger.warning(f"Could not import flask: {e}")
diff --git a/megatron/core/inference/text_generation_server/dynamic_text_gen_server/flask_server.py b/megatron/core/inference/text_generation_server/dynamic_text_gen_server/flask_server.py
new file mode 100644
index 00000000000..2b0469b340a
--- /dev/null
+++ b/megatron/core/inference/text_generation_server/dynamic_text_gen_server/flask_server.py
@@ -0,0 +1,76 @@
+# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+
+import logging
+import socket
+from contextlib import contextmanager
+
+try:
+    from flask import Flask
+    from hypercorn.asyncio import serve
+    from hypercorn.config import Config
+
+    HAS_FLASK = True
+except ImportError as e:
+    HAS_FLASK = False
+
+import megatron.core.inference.text_generation_server.dynamic_text_gen_server.endpoints as endpoints
+from megatron.core.inference.inference_client import InferenceClient
+from megatron.core.utils import trace_async_exceptions
+
+logger = logging.getLogger(__name__)
+
+
+@contextmanager
+def temp_log_level(level, logger=None):
+    """Enables temporarily overriding the logging level."""
+    logger = logger or logging.getLogger()
+    old_level = logger.level
+    logger.setLevel(level)
+    try:
+        yield
+    finally:
+        logger.setLevel(old_level)
+
+
+@trace_async_exceptions
+async def run_flask_server(coordinator_port: int, tokenizer, rank: int, flask_port: int):
+    """Initializes and runs the async Flask server."""
+    if not HAS_FLASK:
+        raise RuntimeError(f"Flask not available")
+
+    try:
+        hostname = socket.gethostname()
+    except Exception as e:
+        logger.warning(f"Could not get hostname: {e}")
+        hostname = "0.0.0.0"
+
+    inference_client = InferenceClient(coordinator_port)
+    await inference_client.start()
+    logger.info(f"Rank {rank}: InferenceClient connected.")
+
+    app = Flask(__name__)
+
+    # Store client and tokenizer in app config for Blueprints to use
+    app.config['client'] = inference_client
+    app.config['tokenizer'] = tokenizer
+
+    # Register all blueprints from the 'endpoints' package
+    for endpoint in endpoints.__all__:
+        app.register_blueprint(endpoint)
+
+    @app.route('/')
+    def health_check():
+        return "Megatron Dynamic Inference Server is running."
+
+    config = Config()
+    config.bind = [f"0.0.0.0:{flask_port}"]
+
+    # Force logging level to INFO to ensure that hostname is printed
+    with temp_log_level(logging.INFO, logger):
+        logger.info(f"Starting Flask server on http://{hostname}:{flask_port}")
+
+    try:
+        await serve(app, config)
+    finally:
+        await inference_client.stop()
+        logger.info(f"Rank {rank}: Flask server and client shut down.")
diff --git a/megatron/core/inference/text_generation_server/dynamic_text_gen_server/tokenization.py b/megatron/core/inference/text_generation_server/dynamic_text_gen_server/tokenization.py
new file mode 100644
index 00000000000..ca645c8f7d6
--- /dev/null
+++ b/megatron/core/inference/text_generation_server/dynamic_text_gen_server/tokenization.py
@@ -0,0 +1,110 @@
+# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
+
+"""Tokenization utilities."""
+
+
+import torch
+
+from megatron.core import parallel_state
+from megatron.core.inference.communication_utils import broadcast_int_list, broadcast_tensor
+
+
+def tokenize_prompts(
+    tokenizer, prompts=None, tokens_to_generate=None, add_BOS=None, rank=0, data_parallel=False
+):
+    """Tokenize prompts and make them avaiable on all ranks.
+
+    Args:
+        data_parallel (bool): Broadcast tokens across a single data parallel model replica.
+    """
+
+    # On all ranks set to None so we can pass them to functions
+    sizes_list = None
+    prompts_tokens_cuda_long_tensor = None
+    prompts_length_cuda_long_tensor = None
+
+    # On the specified rank, build the above.
+    src_rank = torch.distributed.get_rank()
+    if data_parallel:
+        src_rank = parallel_state.get_data_parallel_src_rank()
+
+    if src_rank == rank:
+        assert prompts is not None
+        assert tokens_to_generate is not None
+        # Tensor of tokens padded and their unpadded length.
+        prompts_tokens_cuda_long_tensor, prompts_length_cuda_long_tensor = (
+            _tokenize_prompts_and_batch(tokenizer, prompts, tokens_to_generate, add_BOS)
+        )
+        # We need the sizes of these tensors for the boradcast
+        sizes_list = [
+            prompts_tokens_cuda_long_tensor.size(0),  # Batch size
+            prompts_tokens_cuda_long_tensor.size(1),
+        ]  # Sequence lenght
+
+    # First, broadcast the sizes.
+    sizes_tensor = broadcast_int_list(
+        2, int_list=sizes_list, rank=rank, data_parallel=data_parallel
+    )
+
+    # Now that we have the sizes, we can boradcast the tokens
+    # and length tensors.
+    sizes = sizes_tensor.tolist()
+    prompts_tokens_cuda_long_tensor = broadcast_tensor(
+        sizes,
+        torch.int64,
+        tensor=prompts_tokens_cuda_long_tensor,
+        rank=rank,
+        data_parallel=data_parallel,
+    )
+    prompts_length_cuda_long_tensor = broadcast_tensor(
+        sizes[0],
+        torch.int64,
+        tensor=prompts_length_cuda_long_tensor,
+        rank=rank,
+        data_parallel=data_parallel,
+    )
+
+    return prompts_tokens_cuda_long_tensor, prompts_length_cuda_long_tensor
+
+
+def _tokenize_prompts_and_batch(tokenizer, prompts, tokens_to_generate, add_BOS):
+    """Given a set of prompts and number of tokens to generate:
+    - tokenize prompts
+    - set the sequence length to be the max of length of prompts
+      plus the number of tokens we would like to generate
+    - pad all the sequences to this length so we can convert them
+      into a 2D tensor.
+    """
+
+    # Tokenize all the prompts.
+    if hasattr(tokenizer, 'eod'):
+        eod_token = tokenizer.eod
+    elif hasattr(tokenizer, 'eos_id'):
+        eod_token = tokenizer.eos_id
+    else:
+        raise AttributeError('No eod token found in Tokenizer')
+    if add_BOS:
+        prompts_tokens = [[eod_token] + tokenizer.tokenize(prompt) for prompt in prompts]
+    else:
+        prompts_tokens = [tokenizer.tokenize(prompt) for prompt in prompts]
+
+    # Now we have a list of list of tokens which each list has a different
+    # size. We want to extend this list to:
+    #   - incorporate the tokens that need to be generated
+    #   - make all the sequences equal length.
+    # Get the prompts length.
+    prompts_length = [len(prompt_tokens) for prompt_tokens in prompts_tokens]
+    # Get the max prompts length.
+    max_prompt_len = max(prompts_length)
+    # Number of tokens in the each sample of the batch.
+    samples_length = max_prompt_len + tokens_to_generate
+    # Now update the list of list to be of the same size: samples_length.
+    for prompt_tokens, prompt_length in zip(prompts_tokens, prompts_length):
+        padding_size = samples_length - prompt_length
+        prompt_tokens.extend([eod_token] * padding_size)
+
+    # Now we are in a structured format, we can convert to tensors.
+    prompts_tokens_tensor = torch.tensor(prompts_tokens, dtype=torch.long, device='cuda')
+    prompts_length_tensor = torch.tensor(prompts_length, dtype=torch.long, device='cuda')
+
+    return prompts_tokens_tensor, prompts_length_tensor
diff --git a/megatron/core/inference/unified_memory.py b/megatron/core/inference/unified_memory.py
index 56073df063f..6b58e845812 100644
--- a/megatron/core/inference/unified_memory.py
+++ b/megatron/core/inference/unified_memory.py
@@ -1,12 +1,15 @@
 # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
 
+import ctypes
 import os
 import signal
+import threading
 import warnings
 from contextlib import contextmanager
 from enum import Enum, auto
 from pathlib import Path
 
+import torch
 from torch.cuda.memory import CUDAPluggableAllocator
 from torch.utils.cpp_extension import CUDA_HOME, load_inline
 
@@ -42,6 +45,10 @@ class UnifiedMemoryCompileTimeoutError(UnifiedMemoryUnsupportedError):
 _compilation_state = CompilationState.UNATTEMPTED
 _alloc = None  # must remain global until process exit.
 _mod = None  # must remain global until process exit.
+_so_path = None  # path to compiled extension .so (must remain global until exit).
+_ctypes_lib = None  # ctypes handle to compiled extension
+_ctypes_lock = threading.Lock()
+_compilation_error: str | None = None  # store last failure reason for better error messages
 
 
 @contextmanager
@@ -74,11 +81,19 @@ def _handler(signum, frame):
 def compile_allocator():
     """Attempt to compile UVM allocator."""
 
-    global _compilation_state, _alloc, _mod
+    global _compilation_state, _alloc, _mod, _so_path, _ctypes_lib, _compilation_error
 
     if _compilation_state != CompilationState.UNATTEMPTED:
         return
 
+    if not _has_mem_pool:
+        _compilation_state = CompilationState.FAILURE
+        _compilation_error = (
+            "PyTorch does not expose CUDA MemPool on this build/version. "
+            "UVM mempool requires torch.cuda.MemPool or torch.cuda.memory.MemPool."
+        )
+        return
+
     _mempool_c_src = r"""
     #include <cuda_runtime_api.h>
     #include <cstddef>
@@ -134,6 +149,59 @@ def compile_allocator():
       (void)size; (void)device; (void)stream;
       if (ptr) cudaFree(ptr);
     }
+
+    // Prefetch managed memory to a device (or to CPU with cudaCpuDeviceId == -1).
+    EXPORT int managed_prefetch(void* ptr, size_t size, int device, void* stream) {
+      cudaStream_t s = (cudaStream_t)stream;
+      cudaError_t err;
+      #if CUDART_VERSION >= 13000
+        cudaMemLocation location;
+        if (device == (int)-1) {
+          location.type = cudaMemLocationTypeHost;
+          location.id = 0;
+        } else {
+          location.type = cudaMemLocationTypeDevice;
+          location.id = device;
+        }
+        err = cudaMemPrefetchAsync(ptr, (size_t)size, location, 0, s);
+      #else
+        err = cudaMemPrefetchAsync(ptr, (size_t)size, device, s);
+      #endif
+      return (int)err;
+    }
+
+    // Update preferred location advice for managed memory (GPU device id, or CPU with cudaCpuDeviceId == -1).
+    EXPORT int managed_advise_preferred_location(void* ptr, size_t size, int device) {
+      cudaError_t err;
+      #if CUDART_VERSION >= 13000
+        cudaMemLocation location;
+        if (device == (int)-1) {
+          location.type = cudaMemLocationTypeHost;
+          location.id = 0;
+        } else {
+          location.type = cudaMemLocationTypeDevice;
+          location.id = device;
+        }
+        err = cudaMemAdvise(ptr, (size_t)size, cudaMemAdviseSetPreferredLocation, location);
+      #else
+        err = cudaMemAdvise(ptr, (size_t)size, cudaMemAdviseSetPreferredLocation, device);
+      #endif
+      return (int)err;
+    }
+
+    // Ensure a device is in the page table for this managed region.
+    EXPORT int managed_advise_accessed_by(void* ptr, size_t size, int device) {
+      cudaError_t err;
+      #if CUDART_VERSION >= 13000
+        cudaMemLocation location;
+        location.type = cudaMemLocationTypeDevice;
+        location.id = device;
+        err = cudaMemAdvise(ptr, (size_t)size, cudaMemAdviseSetAccessedBy, location);
+      #else
+        err = cudaMemAdvise(ptr, (size_t)size, cudaMemAdviseSetAccessedBy, device);
+      #endif
+      return (int)err;
+    }
     """
 
     # Define a timeout of 30s for how long the build is allowed to run.
@@ -160,14 +228,16 @@ def compile_allocator():
                 _cpa = CUDAPluggableAllocator(_so_path, "managed_malloc", "managed_free")
                 _alloc = _cpa.allocator()
                 _compilation_state = CompilationState.SUCCESS
+                _compilation_error = None
         except (RuntimeError, ImportError, OSError, UnifiedMemoryCompileTimeoutError) as e:
+            _compilation_error = str(e)
             warnings.warn(f"Failed to create unified memory mempool: '{e}'.")
             _compilation_state = CompilationState.FAILURE
+            _so_path = None
+            _ctypes_lib = None
 
         # Synchronize failure state across ranks. (For currently unknown reasons,
         # one rank can show as FAILURE while the remaining ranks show as SUCCESS.)
-        import torch
-
         local_state = torch.tensor(
             [_compilation_state.value], dtype=torch.uint8, device=torch.cuda.current_device()
         )
@@ -193,6 +263,264 @@ def create_unified_mempool() -> "MemPool":
 
     # Return mempool.
     if _compilation_state != CompilationState.SUCCESS:
-        raise UnifiedMemoryUnsupportedError()
+        details = _compilation_error
+        if details is None:
+            details = "Unknown reason (allocator compilation did not succeed)."
+        raise UnifiedMemoryUnsupportedError(
+            "Unified virtual memory (UVM) mempool is unsupported or failed to initialize: "
+            + details
+        )
     else:
         return MemPool(allocator=_alloc)
+
+
+def _get_ctypes_lib() -> "ctypes.CDLL":
+    """Return a ctypes handle to the compiled UVM extension (.so)."""
+    global _ctypes_lib
+    compile_allocator()
+    if _compilation_state != CompilationState.SUCCESS or _so_path is None:
+        raise UnifiedMemoryUnsupportedError()
+    if _ctypes_lib is not None:
+        return _ctypes_lib
+    with _ctypes_lock:
+        if _ctypes_lib is None:
+            _ctypes_lib = ctypes.CDLL(_so_path)
+            # Configure argtypes/restype for exported helpers.
+            _ctypes_lib.managed_prefetch.argtypes = [
+                ctypes.c_void_p,
+                ctypes.c_size_t,
+                ctypes.c_int,
+                ctypes.c_void_p,
+            ]
+            _ctypes_lib.managed_prefetch.restype = ctypes.c_int
+            _ctypes_lib.managed_advise_preferred_location.argtypes = [
+                ctypes.c_void_p,
+                ctypes.c_size_t,
+                ctypes.c_int,
+            ]
+            _ctypes_lib.managed_advise_preferred_location.restype = ctypes.c_int
+            _ctypes_lib.managed_advise_accessed_by.argtypes = [
+                ctypes.c_void_p,
+                ctypes.c_size_t,
+                ctypes.c_int,
+            ]
+            _ctypes_lib.managed_advise_accessed_by.restype = ctypes.c_int
+    return _ctypes_lib
+
+
+def prefetch_managed_tensor(tensor, *, device: int, stream=None) -> None:
+    """Prefetch a CUDA tensor allocated from the UVM mempool to a specific device.
+
+    This uses `cudaMemPrefetchAsync` to physically migrate the pages backing the tensor.
+    The virtual address (pointer) remains unchanged, making this safe for use with
+    recorded CUDA graphs.
+
+    Args:
+        tensor (torch.Tensor): CUDA tensor allocated from the UVM mempool.
+        device (int): Target device ID. Use -1 (cudaCpuDeviceId) to prefetch to CPU.
+        stream (torch.cuda.Stream, optional): Stream to use for the asynchronous prefetch.
+            Defaults to the current stream.
+    """
+    if tensor is None:
+        return
+    if not isinstance(tensor, torch.Tensor):
+        raise TypeError("prefetch_managed_tensor expects a torch.Tensor")
+    if tensor.numel() == 0:
+        return
+    if not tensor.is_cuda:
+        raise ValueError("prefetch_managed_tensor expects a CUDA tensor")
+
+    lib = _get_ctypes_lib()
+    nbytes = tensor.nbytes
+    if stream is None:
+        stream = torch.cuda.current_stream()
+    # torch.cuda.Stream exposes a cuda_stream integer handle.
+    stream_ptr = ctypes.c_void_p(int(stream.cuda_stream))
+    err = lib.managed_prefetch(
+        ctypes.c_void_p(int(tensor.data_ptr())), ctypes.c_size_t(nbytes), int(device), stream_ptr
+    )
+    if err != 0:
+        raise RuntimeError(f"cudaMemPrefetchAsync failed with cudaError={err}")
+
+
+def advise_managed_tensor_preferred_location(tensor, *, device: int) -> None:
+    """Set the preferred physical location hint for a managed tensor.
+
+    This uses `cudaMemAdviseSetPreferredLocation`. It tells the CUDA driver where the
+    pages should ideally reside. Unlike prefetch, this is a hint and does not
+    immediately trigger migration unless the driver decides it is necessary.
+
+    Args:
+        tensor (torch.Tensor): CUDA tensor allocated from the UVM mempool.
+        device (int): Preferred device ID. Use -1 (cudaCpuDeviceId) for CPU.
+    """
+    if tensor is None:
+        return
+    if not isinstance(tensor, torch.Tensor):
+        raise TypeError("advise_managed_tensor_preferred_location expects a torch.Tensor")
+    if tensor.numel() == 0:
+        return
+    if not tensor.is_cuda:
+        raise ValueError("advise_managed_tensor_preferred_location expects a CUDA tensor")
+
+    lib = _get_ctypes_lib()
+    nbytes = tensor.nbytes
+    err = lib.managed_advise_preferred_location(
+        ctypes.c_void_p(int(tensor.data_ptr())), ctypes.c_size_t(nbytes), int(device)
+    )
+    if err != 0:
+        raise RuntimeError(f"cudaMemAdviseSetAccessedBy failed with cudaError={err}")
+
+
+def advise_managed_tensor_accessed_by(tensor, *, device: int) -> None:
+    """Hint that a specific device will access the managed tensor.
+
+    This uses `cudaMemAdviseSetAccessedBy`. It ensures that the mapping for this
+    memory region is established in the page tables of the specified device,
+    reducing page fault latency when the device first touches the data.
+
+    Args:
+        tensor (torch.Tensor): CUDA tensor allocated from the UVM mempool.
+        device (int): Device ID that will access the tensor. Must be a GPU ID.
+    """
+    if tensor is None:
+        return
+    if not isinstance(tensor, torch.Tensor):
+        raise TypeError("advise_managed_tensor_accessed_by expects a torch.Tensor")
+    if tensor.numel() == 0:
+        return
+    if not tensor.is_cuda:
+        raise ValueError("advise_managed_tensor_accessed_by expects a CUDA tensor")
+
+    lib = _get_ctypes_lib()
+    nbytes = tensor.nbytes
+    err = lib.managed_advise_accessed_by(
+        ctypes.c_void_p(int(tensor.data_ptr())), ctypes.c_size_t(nbytes), int(device)
+    )
+    if err != 0:
+        raise RuntimeError(f"cudaMemAdviseSetAccessedBy failed with cudaError={err}")
+
+
+def prefetch_managed_module_parameters(
+    module, *, device: int, include_buffers: bool = False
+) -> int:
+    """Prefetch all UVM-allocated parameters (and optionally buffers) of a module.
+
+    Iterates through all parameters of the module and initiates an asynchronous
+    migration to the target device. This is typically used to offload weights to
+    CPU during training or prefetch them to GPU before inference.
+
+    Args:
+        module (torch.nn.Module): The module containing UVM parameters.
+        device (int): Target device ID (-1 for CPU).
+        include_buffers (bool, optional): Whether to also prefetch module buffers.
+            Defaults to False.
+
+    Returns:
+        int: The total number of bytes for which prefetch was initiated.
+    """
+    if module is None:
+        return 0
+
+    # Avoid duplicate prefetch on shared tensors.
+    seen_ptrs: set[int] = set()
+    total_nbytes = 0
+    stream = torch.cuda.current_stream()
+
+    for name, p in module.named_parameters(recurse=True):
+        if p is None:
+            continue
+        t = p.data
+        if not isinstance(t, torch.Tensor) or not t.is_cuda or t.numel() == 0:
+            continue
+        ptr = int(t.data_ptr())
+        if ptr in seen_ptrs:
+            continue
+        seen_ptrs.add(ptr)
+        nbytes = t.nbytes
+        err = prefetch_managed_tensor(t, device=device, stream=stream)
+        if err:
+            raise RuntimeError(
+                f"cudaMemPrefetchAsync failed (cudaError={err}) for parameter '{name}': "
+                f"shape={tuple(t.shape)}, dtype={t.dtype}, device={t.device}, "
+                f"data_ptr=0x{t.data_ptr():x}, nbytes={nbytes}. "
+                "This tensor is not UVM-allocated."
+            )
+        total_nbytes += nbytes
+
+    if include_buffers:
+        for name, b in module.named_buffers(recurse=True):
+            if b is None:
+                continue
+            if not isinstance(b, torch.Tensor) or not b.is_cuda or b.numel() == 0:
+                continue
+            ptr = int(b.data_ptr())
+            if ptr in seen_ptrs:
+                continue
+            seen_ptrs.add(ptr)
+            nbytes = b.nbytes
+            err = prefetch_managed_tensor(b, device=device, stream=stream)
+            if err:
+                raise RuntimeError(
+                    f"cudaMemPrefetchAsync failed (cudaError={err}) for buffer '{name}': "
+                    f"shape={tuple(b.shape)}, dtype={b.dtype}, device={b.device}, "
+                    f"data_ptr=0x{b.data_ptr():x}, nbytes={nbytes}. "
+                    "This tensor is not UVM-allocated."
+                )
+            total_nbytes += nbytes
+
+    return total_nbytes
+
+
+def advise_managed_module_parameters_preferred_location(
+    module, *, device: int, include_buffers: bool = False
+) -> None:
+    """Set the preferred physical location hint for all UVM parameters in a module.
+
+    Args:
+        module (torch.nn.Module): The module containing UVM parameters.
+        device (int): Preferred device ID (-1 for CPU).
+        include_buffers (bool, optional): Whether to also advise on module buffers.
+            Defaults to False.
+    """
+    if module is None:
+        return
+
+    seen_ptrs: set[int] = set()
+    for name, p in module.named_parameters(recurse=True):
+        if p is None:
+            continue
+        t = p.data
+        if not isinstance(t, torch.Tensor) or not t.is_cuda or t.numel() == 0:
+            continue
+        ptr = int(t.data_ptr())
+        if ptr in seen_ptrs:
+            continue
+        seen_ptrs.add(ptr)
+        err = advise_managed_tensor_preferred_location(t, device=device)
+        if err:
+            raise RuntimeError(
+                f"cudaMemAdviseSetPreferredLocation failed (cudaError={err}) for param '{name}': "
+                f"shape={tuple(t.shape)}, dtype={t.dtype}, device={t.device}, "
+                f"data_ptr=0x{t.data_ptr():x}, nbytes={t.nbytes}. "
+                "This tensor is not UVM-allocated."
+            )
+
+    if include_buffers:
+        for name, b in module.named_buffers(recurse=True):
+            if b is None:
+                continue
+            if not isinstance(b, torch.Tensor) or not b.is_cuda or b.numel() == 0:
+                continue
+            ptr = int(b.data_ptr())
+            if ptr in seen_ptrs:
+                continue
+            seen_ptrs.add(ptr)
+            err = advise_managed_tensor_preferred_location(b, device=device)
+            if err:
+                raise RuntimeError(
+                    f"cudaMemAdviseSetPreferredLocation failed (err={err}) for buf '{name}': "
+                    f"shape={tuple(b.shape)}, dtype={b.dtype}, device={b.device}, "
+                    f"data_ptr=0x{b.data_ptr():x}, nbytes={b.nbytes}. "
+                    "This tensor is not UVM-allocated."
+                )
diff --git a/megatron/core/models/T5/t5_spec.py b/megatron/core/models/T5/t5_spec.py
index a3ad84a21d6..50aecf0a950 100644
--- a/megatron/core/models/T5/t5_spec.py
+++ b/megatron/core/models/T5/t5_spec.py
@@ -14,6 +14,7 @@
 from megatron.core.transformer.spec_utils import ModuleSpec
 from megatron.core.transformer.transformer_block import TransformerBlockSubmodules
 from megatron.core.transformer.transformer_layer import TransformerLayer, TransformerLayerSubmodules
+from megatron.core.typed_torch import not_none
 
 try:
     import transformer_engine as te  # pylint: disable=unused-import
@@ -28,6 +29,13 @@
 
     HAVE_TE = True
 except ImportError:
+    (
+        TEColumnParallelLinear,
+        TEDotProductAttention,
+        TELayerNormColumnParallelLinear,
+        TENorm,
+        TERowParallelLinear,
+    ) = (None, None, None, None, None)
     HAVE_TE = False
 
 try:
@@ -57,8 +65,8 @@ def encoder_model_with_transformer_engine_default_spec() -> ModuleSpec:
                 module=SelfAttention,
                 params={"attn_mask_type": AttnMaskType.padding},
                 submodules=SelfAttentionSubmodules(
-                    linear_qkv=TELayerNormColumnParallelLinear,
-                    core_attention=TEDotProductAttention,
+                    linear_qkv=not_none(TELayerNormColumnParallelLinear),
+                    core_attention=not_none(TEDotProductAttention),
                     linear_proj=TERowParallelLinear,
                     q_layernorm=IdentityOp,
                     k_layernorm=IdentityOp,
@@ -86,8 +94,8 @@ def decoder_model_with_transformer_engine_default_spec() -> ModuleSpec:
                 module=SelfAttention,
                 params={"attn_mask_type": AttnMaskType.causal},
                 submodules=SelfAttentionSubmodules(
-                    linear_qkv=TELayerNormColumnParallelLinear,
-                    core_attention=TEDotProductAttention,
+                    linear_qkv=not_none(TELayerNormColumnParallelLinear),
+                    core_attention=not_none(TEDotProductAttention),
                     linear_proj=TERowParallelLinear,
                     q_layernorm=IdentityOp,
                     k_layernorm=IdentityOp,
@@ -99,9 +107,9 @@ def decoder_model_with_transformer_engine_default_spec() -> ModuleSpec:
                 module=CrossAttention,
                 params={"attn_mask_type": AttnMaskType.padding},
                 submodules=CrossAttentionSubmodules(
-                    linear_q=TEColumnParallelLinear,
-                    linear_kv=TEColumnParallelLinear,
-                    core_attention=TEDotProductAttention,
+                    linear_q=not_none(TEColumnParallelLinear),
+                    linear_kv=not_none(TEColumnParallelLinear),
+                    core_attention=not_none(TEDotProductAttention),
                     linear_proj=TERowParallelLinear,
                 ),
             ),
diff --git a/megatron/core/models/backends.py b/megatron/core/models/backends.py
index 29169285b3e..7f84599a04c 100644
--- a/megatron/core/models/backends.py
+++ b/megatron/core/models/backends.py
@@ -153,7 +153,7 @@ def fuse_layernorm_and_linear(self) -> bool:
         """TE backend chooses a single module for layernorm and linear"""
         return True
 
-    def column_parallel_layer_norm_linear(self) -> Optional[type]:
+    def column_parallel_layer_norm_linear(self) -> type[InferenceLayerNormColumnParallelLinear]:
         """Which module for sequential layernorm and linear"""
         return InferenceLayerNormColumnParallelLinear
 
@@ -166,7 +166,7 @@ def layer_norm(self, rms_norm: bool = False, for_qk: bool = False) -> type:
             return FusedLayerNorm
         return TENorm
 
-    def core_attention(self) -> type:
+    def core_attention(self) -> type[TEDotProductAttention]:
         """Which module to use for attention"""
         return TEDotProductAttention
 
diff --git a/megatron/core/models/bert/bert_layer_specs.py b/megatron/core/models/bert/bert_layer_specs.py
index 69cec788b2c..8415ef02cc5 100644
--- a/megatron/core/models/bert/bert_layer_specs.py
+++ b/megatron/core/models/bert/bert_layer_specs.py
@@ -10,6 +10,7 @@
 from megatron.core.transformer.mlp import MLP, MLPSubmodules
 from megatron.core.transformer.spec_utils import ModuleSpec
 from megatron.core.transformer.transformer_layer import TransformerLayer, TransformerLayerSubmodules
+from megatron.core.typed_torch import not_none
 
 try:
     import transformer_engine as te  # pylint: disable=unused-import
@@ -22,6 +23,11 @@
 
     HAVE_TE = True
 except ImportError:
+    (TEDotProductAttention, TELayerNormColumnParallelLinear, TERowParallelLinear) = (
+        None,
+        None,
+        None,
+    )
     HAVE_TE = False
 
 try:
@@ -57,8 +63,8 @@ def get_bert_layer_with_transformer_engine_spec():
                 module=SelfAttention,
                 params={"attn_mask_type": AttnMaskType.padding},
                 submodules=SelfAttentionSubmodules(
-                    linear_qkv=TELayerNormColumnParallelLinear,
-                    core_attention=TEDotProductAttention,
+                    linear_qkv=not_none(TELayerNormColumnParallelLinear),
+                    core_attention=not_none(TEDotProductAttention),
                     linear_proj=TERowParallelLinear,
                     q_layernorm=IdentityOp,
                     k_layernorm=IdentityOp,
diff --git a/megatron/core/models/common/embeddings/rotary_pos_embedding.py b/megatron/core/models/common/embeddings/rotary_pos_embedding.py
index 5d7b69cd34e..05a7e8f60bb 100644
--- a/megatron/core/models/common/embeddings/rotary_pos_embedding.py
+++ b/megatron/core/models/common/embeddings/rotary_pos_embedding.py
@@ -147,7 +147,6 @@ def get_cos_sin(self, max_seq_len: int, offset: int = 0) -> (Tensor, Tensor):
         sin = torch.sin(freqs)
         return cos, sin
 
-    @lru_cache(maxsize=32)
     def get_emb(self, max_seq_len: int, offset: int = 0) -> Tensor:
         """Forward pass of RoPE embedding before CP sharding.
 
@@ -175,28 +174,30 @@ def get_emb(self, max_seq_len: int, offset: int = 0) -> Tensor:
         emb = emb[:, None, None, :]
         return emb
 
+    @lru_cache(maxsize=32)
     @internal_api
     def forward(
-        self, max_seq_len: int, offset: int = 0, packed_seq_params: Optional[PackedSeqParams] = None
+        self,
+        max_seq_len: int,
+        offset: int = 0,
+        packed_seq: bool = False,
+        cp_group: Optional[torch.distributed.ProcessGroup] = None,
     ) -> Tensor:
         """Forward pass of RoPE embedding.
 
         Args:
             max_seq_len (int): Maximum size of sequence
             offset (int, optional): RoPE offset. Defaults to 0.
-            packed_seq_params (PackedSeqParams, optional): Packed sequence params. Defaults to None.
+            packed_seq (bool, optional): Whether to use packed sequence. Defaults to False.
+            cp_group (torch.distributed.ProcessGroup, optional): Context parallel group.
+                Defaults to None.
 
         Returns:
             Tensor: Embeddings after applying RoPE.
         """
         emb = self.get_emb(max_seq_len, offset)
-        packed_seq = packed_seq_params is not None and packed_seq_params.qkv_format == 'thd'
-        if packed_seq_params is not None and packed_seq_params.local_cp_size is not None:
-            # Set CP group to dynamic CP group for CP slicing
-            cp_group = packed_seq_params.cp_group
-        else:
+        if cp_group is None:
             cp_group = self.cp_group
-
         if cp_group is not None and cp_group.size() > 1 and not packed_seq:
             # slice rotary_pos_emb along sequence dimension
             # and select the parition of the current CP rank
@@ -307,7 +308,7 @@ def forward(
         self,
         position_ids: torch.Tensor,
         mrope_section: List[int],
-        packed_seq_params: Optional[PackedSeqParams] = None,
+        cp_group: Optional[torch.distributed.ProcessGroup] = None,
     ) -> Tensor:
         """Forward pass of multimodal RoPE embedding.
 
@@ -315,7 +316,8 @@ def forward(
             position_ids (torch.Tensor): A postion_id tensor with shape [3, batchsize, seqlens]
             mrope_section (list[int]): Multimodal rope section is for channel dimension of temporal,
                 height and width in rope calculation.
-            packed_seq_params (PackedSeqParams, optional): Packed sequence params. Defaults to None.
+            cp_group (torch.distributed.ProcessGroup, optional): Context parallel group.
+                Defaults to None.
 
         Returns:
             Tensor: Embeddings after applying RoPE.
@@ -348,14 +350,7 @@ def forward(
 
         # shape (seq_length, bs, 1, 2 * dim)
         emb = emb[..., None, :].transpose(0, 1).contiguous()
-        if packed_seq_params is not None and packed_seq_params.local_cp_size is not None:
-            if packed_seq_params.local_cp_size > 1:
-                # Set CP group to dynamic CP group for CP slicing
-                cp_group = packed_seq_params.cp_group
-            else:
-                # Set CP group to None to avoid CP slicing
-                cp_group = None
-        else:
+        if cp_group is None:
             cp_group = self.cp_group
         if cp_group is not None and cp_group.size() > 1:
             # slice rotary_pos_emb along sequence dimension and select the parition of the current
diff --git a/megatron/core/models/common/embeddings/yarn_rotary_pos_embedding.py b/megatron/core/models/common/embeddings/yarn_rotary_pos_embedding.py
index 7b224ec56c0..bc5a9c5fa3f 100644
--- a/megatron/core/models/common/embeddings/yarn_rotary_pos_embedding.py
+++ b/megatron/core/models/common/embeddings/yarn_rotary_pos_embedding.py
@@ -99,11 +99,10 @@ def __init__(
                 self.original_max_position_embeddings, offset=0, dtype=torch.get_default_dtype()
             )
 
-            # clear the lru_cache for the get_emb method. If not cleared, the cache of get_emb
+            # clear the lru_cache for the forward method. If not cleared, the cache of forward
             # method causes a memory leak in NeMo-RL.
-            self.get_emb.cache_clear()
+            self.forward.cache_clear()
 
-    @lru_cache(maxsize=32)
     def get_emb(self, max_seq_len: int, offset: int = 0) -> Tensor:
         """Forward pass of Yarn Rotary Embedding.
 
@@ -157,26 +156,29 @@ def get_emb(self, max_seq_len: int, offset: int = 0) -> Tensor:
         emb = emb[:, None, None, :]
         return emb, _mscale
 
+    @lru_cache(maxsize=32)
     @internal_api
     def forward(
-        self, max_seq_len: int, offset: int = 0, packed_seq_params: Optional[PackedSeqParams] = None
+        self,
+        max_seq_len: int,
+        offset: int = 0,
+        packed_seq: bool = False,
+        cp_group: Optional[torch.distributed.ProcessGroup] = None,
     ) -> Tensor:
         """Forward pass of Yarn Rotary Embedding.
 
         Args:
             max_seq_len (int): Maximum size of sequence
             offset (int, optional): RoPE offset. Defaults to 0.
-            packed_seq_params (PackedSeqParams, optional): Packed sequence params. Defaults to None.
+            packed_seq (bool, optional): Whether to use packed sequence. Defaults to False.
+            cp_group (torch.distributed.ProcessGroup, optional): Context parallel group.
+                Defaults to None.
 
         Returns:
             Tensor: Embeddings after applying Yarn RoPE.
         """
         emb, _mscale = self.get_emb(max_seq_len, offset)
-        packed_seq = packed_seq_params is not None and packed_seq_params.qkv_format == 'thd'
-        if packed_seq_params is not None and packed_seq_params.local_cp_size is not None:
-            # Set CP group to dynamic CP group for CP slicing
-            cp_group = packed_seq_params.cp_group
-        else:
+        if cp_group is None:
             cp_group = self.cp_group
         if cp_group is not None and cp_group.size() > 1 and not packed_seq:
             # slice rotary_pos_emb along sequence dimension
@@ -184,15 +186,13 @@ def forward(
             emb = get_pos_emb_on_this_cp_rank(emb, 0, cp_group)
         return emb, _mscale
 
-    def _set_cos_sin_cache(self, seq_len, offset, dtype, packed_seq_params=None):
+    def _set_cos_sin_cache(self, seq_len, offset, dtype, packed_seq=False, cp_group=None):
         self.max_seq_len_cached = seq_len
         self.offset_cached = offset
         self.dtype_cached = dtype
-        self.packed_seq_cached = (
-            packed_seq_params is not None and packed_seq_params.qkv_format == 'thd'
-        )
+        self.packed_seq_cached = packed_seq
 
-        emb, _mscale = self.forward(seq_len, offset, packed_seq_params)
+        emb, _mscale = self.forward(seq_len, offset, packed_seq=packed_seq, cp_group=cp_group)
         self.register_buffer(
             "cos_cached", (emb.cos() * _mscale).to(dtype).contiguous(), persistent=False
         )
@@ -201,17 +201,16 @@ def _set_cos_sin_cache(self, seq_len, offset, dtype, packed_seq_params=None):
         )
 
     def get_cached_cos_sin(
-        self, seq_len, offset=0, dtype=torch.get_default_dtype(), packed_seq_params=None
+        self, seq_len, offset=0, dtype=torch.get_default_dtype(), packed_seq=False, cp_group=None
     ):
         """Get cached cos and sin values."""
-        packed_seq = packed_seq_params is not None and packed_seq_params.qkv_format == 'thd'
         if (
             seq_len > self.max_seq_len_cached
             or offset != self.offset_cached
             or dtype != self.dtype_cached
             or packed_seq != self.packed_seq_cached
         ):
-            self._set_cos_sin_cache(seq_len, offset, dtype, packed_seq_params)
+            self._set_cos_sin_cache(seq_len, offset, dtype, packed_seq, cp_group)
         return (self.cos_cached[:seq_len, ...], self.sin_cached[:seq_len, ...])
 
 
diff --git a/megatron/core/models/common/model_chunk_schedule_plan.py b/megatron/core/models/common/model_chunk_schedule_plan.py
index 0c29423edab..d11e53d7fc2 100644
--- a/megatron/core/models/common/model_chunk_schedule_plan.py
+++ b/megatron/core/models/common/model_chunk_schedule_plan.py
@@ -35,7 +35,7 @@ class TransformerLayerSchedulePlan:
     mtp post process nodes.
 
     layer (TransformerLayerSchedulePlan)
-    ├── attn (TransformerLayerNode): attention -> router -> dispatch preprocess
+    ├── attn (TransformerLayerNode): attention -> layernorm -> router -> dispatch preprocess
     ├── moe_dispatch (TransformerLayerNode): dispatch All2All
     ├── mlp (TransformerLayerNode): mlp module
     ├── moe_combine (TransformerLayerNode): combine All2All
@@ -88,9 +88,6 @@ def release_state(self):
         if hasattr(self, 'attn') and self.attn is not None:
             del self.attn
             self.attn = None
-        if hasattr(self, 'post_attn') and self.post_attn is not None:
-            del self.post_attn
-            self.post_attn = None
         if hasattr(self, 'moe_dispatch') and self.moe_dispatch is not None:
             del self.moe_dispatch
             self.moe_dispatch = None
@@ -356,10 +353,6 @@ def __init__(
                 model, self._model_chunk_state, self._event, comp_stream
             )
 
-        # preprocess may receive dgrad from attn, which is managed by cuda graph.
-        if CudaGraphScope.attn in model.config.cuda_graph_scope:
-            self.pre_process.manual_grads_release = False
-
     def _build_layer_schedule_plan(self, module, comp_stream, comm_stream):
         if module is None:
             return
diff --git a/megatron/core/models/gpt/experimental_attention_variant_module_specs.py b/megatron/core/models/gpt/experimental_attention_variant_module_specs.py
index 7649a0b2165..a7cc7cc0a55 100644
--- a/megatron/core/models/gpt/experimental_attention_variant_module_specs.py
+++ b/megatron/core/models/gpt/experimental_attention_variant_module_specs.py
@@ -123,7 +123,6 @@ def get_dsa_module_spec_for_backend(
             q_layernorm=IdentityOp,
             kv_layernorm=IdentityOp,
         ),
-        metainfo={"fuse_input_layernorm": False},
     )
 
     return attention
@@ -150,12 +149,12 @@ def get_experimental_attention_variant_module_spec(
 ##########
 
 
-def get_transformer_layer_with_experimental_attention_variant_spec(
-    config: TransformerConfig, backend: BackendSpecProvider = None
-) -> List[ModuleSpec]:
-    """Build transformer layer specs with experimental attention variants (e.g., linear attention).
+def get_transformer_block_with_experimental_attention_variant_spec(
+    config: TransformerConfig, vp_stage: Optional[int] = None, pp_rank: Optional[int] = None
+) -> TransformerBlockSubmodules:
+    """Build transformer block spec with experimental attention variants (e.g., linear attention).
 
-    This function is for constructing a heterogeneous transformer that supports mixing different
+    This function constructs a heterogeneous transformer block that supports mixing different
     attention mechanisms (experimental vs standard) and MLP types (MoE vs dense) across layers.
     **Note that, this API is a experimental API in the short term, and might be deprecated in the
     future. In the long run, we will move to a new design that better support hybrid models.**
@@ -171,19 +170,22 @@ def get_transformer_layer_with_experimental_attention_variant_spec(
         2. Per-Layer Spec Construction: Iterates through layers, constructing transformer
            layer specs based on attention and MLP patterns.
 
+        3. Pipeline Slicing: Extracts layer specs for the current pipeline stage.
+
     Args:
         config: Transformer configuration containing model hyperparameters and feature flags.
+        vp_stage: Virtual pipeline stage index for interleaved pipeline parallelism.
+        pp_rank: Pipeline model parallel rank.
 
     Returns:
-        List[ModuleSpec] containing per-layer specs.
+        TransformerBlockSubmodules containing per-layer specs and final layer norm.
 
     Note:
         Currently only supports transformer_engine backend. Kitchen backend can be used as a
         wrapper with TE fallback for unsupported operations.
     """
 
-    if backend is None:
-        backend = _get_backend_spec_provider(config=config)
+    backend = _get_backend_spec_provider(config=config)
 
     # Get attention patterns and specs
     experimental_attention_pattern = [0] * config.num_layers
@@ -255,42 +257,6 @@ def get_transformer_layer_with_experimental_attention_variant_spec(
             )
         )
 
-    return layer_specs
-
-
-def get_transformer_block_with_experimental_attention_variant_spec(
-    config: TransformerConfig, vp_stage: Optional[int] = None, pp_rank: Optional[int] = None
-) -> TransformerBlockSubmodules:
-    """Build transformer block spec with experimental attention variants (e.g., linear attention).
-
-    This function constructs a heterogeneous transformer block that supports mixing different
-    attention mechanisms (experimental vs standard) and MLP types (MoE vs dense) across layers.
-    **Note that, this API is a experimental API in the short term, and might be deprecated in the
-    future. In the long run, we will move to a new design that better support hybrid models.**
-
-    Constructing transformer layer specs by
-    `get_transformer_layer_with_experimental_attention_variant_spec` and then slicing the
-    layer specs to only include the layers that are built in this pipeline stage.
-
-    Args:
-        config: Transformer configuration containing model hyperparameters and feature flags.
-        vp_stage: Virtual pipeline stage index for interleaved pipeline parallelism.
-        pp_rank: Pipeline model parallel rank.
-
-    Returns:
-        TransformerBlockSubmodules containing per-layer specs and final layer norm.
-
-    Note:
-        Currently only supports transformer_engine backend. Kitchen backend can be used as a
-        wrapper with TE fallback for unsupported operations.
-    """
-
-    backend = _get_backend_spec_provider(config=config)
-
-    layer_specs = get_transformer_layer_with_experimental_attention_variant_spec(
-        config=config, backend=backend
-    )
-
     # Slice the layer specs to only include the layers that are built in this pipeline stage.
     if config.pipeline_model_parallel_layout is not None:
         local_layer_ids = config.pipeline_model_parallel_layout.get_layer_id_list(
@@ -304,7 +270,6 @@ def get_transformer_block_with_experimental_attention_variant_spec(
     layer_specs = [layer_specs[layer_id] for layer_id in local_layer_ids]
 
     # Get GPT decoder block spec
-    rms_norm = config.normalization == "RMSNorm"
     gpt_decoder_block_spec = TransformerBlockSubmodules(
         layer_specs=layer_specs, layer_norm=backend.layer_norm(rms_norm=rms_norm, for_qk=False)
     )
@@ -394,7 +359,7 @@ def _get_backend_spec_provider(config: TransformerConfig) -> BackendSpecProvider
     )
     backend: BackendSpecProvider = (
         KitchenSpecProvider(
-            fallback=TESpecProvider(fallback_to_eager_attn=config.fallback_to_eager_attn),
+            fallback=TESpecProvider(),
             use_kitchen_attention=config.use_kitchen_attention,
             kitchen_attention_backend=config.kitchen_attention_backend,
         )
@@ -431,7 +396,6 @@ def _get_self_attention_module_spec(
         qk_l2_norm=config.qk_l2_norm,
         use_kitchen=config.use_kitchen,
         use_te_activation_func=config.use_te_activation_func,
-        fallback_to_eager_attn=config.fallback_to_eager_attn,
         use_kitchen_attention=config.use_kitchen_attention,
         kitchen_attention_backend=config.kitchen_attention_backend,
     )
diff --git a/megatron/core/models/gpt/fine_grained_callables.py b/megatron/core/models/gpt/fine_grained_callables.py
index 5a365b015b2..6f2f6b1cb80 100644
--- a/megatron/core/models/gpt/fine_grained_callables.py
+++ b/megatron/core/models/gpt/fine_grained_callables.py
@@ -15,7 +15,7 @@
 )
 from megatron.core.pipeline_parallel.utils import ScheduleNode, make_viewless
 from megatron.core.transformer.enums import CudaGraphScope
-from megatron.core.transformer.module import float16_to_fp32
+from megatron.core.transformer.module import GraphableMegatronModule, float16_to_fp32
 from megatron.core.transformer.moe.moe_layer import MoELayer
 from megatron.core.transformer.multi_token_prediction import (
     MultiTokenPredictionLayer,
@@ -321,8 +321,8 @@ def backward_dw(self):
                 module.backward_dw()
 
         # the output grad memory is last used in wgrad compute, should be safe to release.
-        if self.manual_grads_release:
-            assert self.delay_grads_release, "output grad memory should be valid before wgrad."
+        assert self.delay_grads_release, "output grad memory should be valid before wgrad."
+        if self.manual_release_grads:
             for tensor in self.output_grads:
                 tensor.untyped_storage().resize_(0)
         self.output_grads = None
@@ -338,6 +338,55 @@ def __del__(self):
         self.submodule = None
 
 
+class _BackwardDWWrapper:
+    """Wrapper for managing backward weight gradient computation of attn module.
+
+    This class handles the execution of weight gradient computations for transformer layers,
+    coordinating between CUDA graphed and non-graphed components. It is used when
+    overlap_moe_expert_parallel_comm and delay_wgrad_compute are enabled to manage
+    the delayed weight gradient computation in MoE models.
+
+    The wrapper stores references to the attention and shared expert backward weight gradient
+    callables, and determines which components should be executed based on whether CUDA graphs
+    are being replayed and which scopes are covered by the graphs.
+    """
+
+    def __init__(self, layer):
+        assert isinstance(
+            layer, GraphableMegatronModule
+        ), "cuda graphed ep overlap only supports GraphableMegatronModule."
+        assert isinstance(
+            layer, TransformerLayer
+        ), "cuda graphed ep overlap only supports TransformerLayer for now."
+        self.layer = layer
+        self.graphed_backward_dw_callable = None
+        self.attn_dw_callable = layer.self_attention.backward_dw
+        if layer.is_moe_layer:
+            self.shared_expert_dw_callable = partial(
+                layer.mlp.backward_dw, routed_experts=False, shared_experts=True
+            )
+        else:
+            self.shared_expert_dw_callable = None
+        self.cuda_graph_scope = layer.config.cuda_graph_scope
+
+    def backward_dw(self):
+        """Execute weight gradients, skipping CUDA graphed components during replay."""
+        is_replay = hasattr(self.layer, 'cuda_graphs') and self.layer.cuda_graphs
+        if self.shared_expert_dw_callable is not None and (
+            not is_replay or CudaGraphScope.moe_router not in self.cuda_graph_scope
+        ):
+            self.shared_expert_dw_callable()
+        if not is_replay or CudaGraphScope.attn not in self.cuda_graph_scope:
+            self.attn_dw_callable()
+        if is_replay and self.graphed_backward_dw_callable is not None:
+            self.graphed_backward_dw_callable()
+        self.layer = None
+
+    def set_graphed_backward_dw_callable(self, graphed_backward_dw_callable):
+        """Store the CUDA graphed backward weight gradient callable."""
+        self.graphed_backward_dw_callable = graphed_backward_dw_callable
+
+
 def build_transformer_layer_callables(layer: TransformerLayer):
     """Create callables for transformer layer nodes.
     Divides the transformer layer's operations into a sequence of smaller, independent
@@ -375,36 +424,6 @@ def build_transformer_layer_callables(layer: TransformerLayer):
         and layer.config.moe_flex_dispatcher_backend == "hybridep"
     )
 
-    class _BackwardDWWrapper:
-        def __init__(self):
-            self.graphed_backward_dw_callable = None
-            self.attn_dw_callable = layer.self_attention.backward_dw
-            if isinstance(layer.mlp, MoELayer):
-                self.shared_expert_dw_callable = partial(
-                    layer.mlp.backward_dw, routed_experts=False, shared_experts=True
-                )
-            else:
-                self.shared_expert_dw_callable = None
-            self.cuda_graph_scope = layer.config.cuda_graph_scope
-
-        def set_graphed_backward_dw_callable(self, graphed_backward_dw_callable):
-            """Store the CUDA graphed backward weight gradient callable."""
-            self.graphed_backward_dw_callable = graphed_backward_dw_callable
-
-        def backward_dw(self):
-            """Execute weight gradients, skipping CUDA graphed components during replay."""
-            is_replay = hasattr(layer, 'cuda_graphs') and layer.cuda_graphs
-            if self.shared_expert_dw_callable is not None and (
-                not is_replay or CudaGraphScope.moe_router not in self.cuda_graph_scope
-            ):
-                self.shared_expert_dw_callable()
-            if not is_replay or CudaGraphScope.attn not in self.cuda_graph_scope:
-                self.attn_dw_callable()
-            if is_replay and self.graphed_backward_dw_callable is not None:
-                self.graphed_backward_dw_callable()
-
-    attn_backward_dw_wrapper = _BackwardDWWrapper()
-
     def submodule_attn_forward(node: ScheduleNode, hidden_states: torch.Tensor):
         """
         Performs same attnention forward logic as GPT Model and forward pass for
@@ -412,20 +431,13 @@ def submodule_attn_forward(node: ScheduleNode, hidden_states: torch.Tensor):
             pre mlp layernorm->router->dispatch preprocess
         """
 
-        if hasattr(layer, 'cuda_graphs') and layer.cuda_graphs:
-            assert (
-                CudaGraphScope.mlp not in layer.config.cuda_graph_scope
-                and CudaGraphScope.moe not in layer.config.cuda_graph_scope
-            ), (
-                "Supported CUDA graph scope with EP overlap: "
-                "attn, moe_router, moe_preprocess, mlp, got {}".format(
-                    layer.config.cuda_graph_scope
-                )
-            )
+        if (
+            isinstance(layer, GraphableMegatronModule)
+            and hasattr(layer, 'cuda_graphs')
+            and layer.cuda_graphs
+        ):
+            layer.set_te_cuda_graph_backward_dw_wrapper()
             forward_func = layer._te_cuda_graph_replay
-            attn_backward_dw_wrapper.set_graphed_backward_dw_callable(
-                partial(layer.backward_dw_cudagraph, layer.current_microbatch)
-            )
         else:
             # wrapper function that keeps consistent api with cuda graph replay
             def forward_func(
@@ -585,8 +597,10 @@ def raise_not_implemented(*args):
     mlp_func = submodule_moe_forward if is_moe else mlp_wrapper
     combine_func = submodule_combine_forward if is_moe else raise_not_implemented
 
+    layer.init_backward_dw_wrapper()
+
     forward_funcs = [attn_func, dispatch_func, mlp_func, combine_func, None]
-    backward_dw = {"attn": attn_backward_dw_wrapper, "mlp": layer.mlp}
+    backward_dw = {"attn": layer.backward_dw_wrapper, "mlp": layer.mlp}
     return forward_funcs, backward_dw
 
 
diff --git a/megatron/core/models/gpt/gpt_layer_specs.py b/megatron/core/models/gpt/gpt_layer_specs.py
index 70f0a8244ca..3bd0c7fe6ab 100755
--- a/megatron/core/models/gpt/gpt_layer_specs.py
+++ b/megatron/core/models/gpt/gpt_layer_specs.py
@@ -193,6 +193,7 @@ def get_gpt_layer_with_transformer_engine_spec(
         num_experts (int, optional): Number of experts. Defaults to None.
         moe_grouped_gemm (bool, optional): To use Grouped GEMM. Defaults to False.
         qk_layernorm (bool, optional): To use layernorm for queries/keys. Defaults to False.
+        multi_latent_attention (bool, optional): To use MLA. Defaults to False.
         fp8 (str, optional): Deprecated. For temporary Nemo compatibility.
         moe_use_legacy_grouped_gemm (bool, optional): Force use the legacy GroupedMLP.
                                                       Defaults to False.
@@ -326,6 +327,7 @@ def get_gpt_layer_local_spec(
         num_experts (int, optional): Number of experts. Defaults to None.
         moe_grouped_gemm (bool, optional): To use Grouped GEMM. Defaults to False.
         qk_layernorm (bool, optional): To use layernorm for queries/keys. Defaults to False.
+        multi_latent_attention (bool, optional): To use MLA. Defaults to False.
         fp8 (str, optional): Deprecated. For temporary Nemo compatibility.
         moe_use_legacy_grouped_gemm (bool, optional): Force use the legacy GroupedMLP.
                                                       Defaults to False.
@@ -616,7 +618,6 @@ def get_gpt_decoder_block_spec(
     layer_specs = get_gpt_decoder_layer_specs(
         config, use_transformer_engine, normalization, qk_l2_norm
     )
-
     # Slice the layer specs to only include the layers that are built in this pipeline stage.
     # Note: MCore layer_number starts at 1
     num_layers_to_build = get_num_layers_to_build(config, vp_stage=vp_stage, pp_rank=pp_rank)
@@ -636,6 +637,10 @@ def get_gpt_decoder_block_spec(
         offset = get_transformer_layer_offset(config, vp_stage=vp_stage, pp_rank=pp_rank)
         local_layer_specs = layer_specs[offset : offset + num_layers_to_build]
 
+    if use_transformer_engine:
+        layer_norm_impl = TENorm
+    else:
+        layer_norm_impl = LNImpl
     # Block spec.
     if use_transformer_engine:
         layer_norm_impl = TENorm
diff --git a/megatron/core/models/gpt/gpt_model.py b/megatron/core/models/gpt/gpt_model.py
index 16462d6e426..3c65621a060 100644
--- a/megatron/core/models/gpt/gpt_model.py
+++ b/megatron/core/models/gpt/gpt_model.py
@@ -35,7 +35,11 @@
 from megatron.core.transformer.spec_utils import ModuleSpec
 from megatron.core.transformer.transformer_block import TransformerBlock
 from megatron.core.transformer.transformer_config import TransformerConfig
-from megatron.core.utils import WrappedTensor, deprecate_inference_params
+from megatron.core.utils import (
+    WrappedTensor,
+    deprecate_inference_params,
+    is_using_quantization_scales,
+)
 
 
 class GPTModel(LanguageModule):
@@ -358,7 +362,10 @@ def _preprocess(
                     inference_context, self.decoder, decoder_input, self.config, packed_seq_params
                 )
                 rotary_pos_emb = self.rotary_pos_emb(
-                    rotary_seq_len, packed_seq_params=packed_seq_params
+                    rotary_seq_len,
+                    packed_seq=packed_seq_params is not None
+                    and packed_seq_params.qkv_format == 'thd',
+                    cp_group=packed_seq_params.cp_group if packed_seq_params is not None else None,
                 )
         elif self.position_embedding_type == 'yarn':
             if self.training or not self.config.flash_decode:
@@ -366,7 +373,10 @@ def _preprocess(
                     inference_context, self.decoder, decoder_input, self.config, packed_seq_params
                 )
                 rotary_pos_emb, _ = self.rotary_pos_emb(
-                    rotary_seq_len, packed_seq_params=packed_seq_params
+                    rotary_seq_len,
+                    packed_seq=packed_seq_params is not None
+                    and packed_seq_params.qkv_format == 'thd',
+                    cp_group=packed_seq_params.cp_group if packed_seq_params is not None else None,
                 )
             else:
                 raise NotImplementedError(
@@ -376,7 +386,9 @@ def _preprocess(
         elif self.position_embedding_type == 'mrope' and not self.config.multi_latent_attention:
             if self.training or not self.config.flash_decode:
                 rotary_pos_emb = self.rotary_pos_emb(
-                    position_ids, self.mrope_section, packed_seq_params=packed_seq_params
+                    position_ids,
+                    self.mrope_section,
+                    cp_group=packed_seq_params.cp_group if packed_seq_params is not None else None,
                 )
             else:
                 # Flash decoding uses precomputed cos and sin for RoPE
@@ -405,11 +417,19 @@ def _preprocess(
         else:
             sequence_len_offset = None
 
-        # Wrap decoder_input to allow the decoder (TransformerBlock) to delete the
-        # reference held by this caller function, enabling early garbage collection for
-        # inference. Skip wrapping if decoder_input is logged after decoder completion.
-        if in_inference_mode and not has_config_logger_enabled(self.config):
-            decoder_input = WrappedTensor(decoder_input)
+        if in_inference_mode:
+            # Clear the outputs for padding tokens when using dynamic batching with
+            # quantization scales to avoid corrupting amax calculations
+            if inference_context.is_dynamic_batching() and is_using_quantization_scales(
+                self.config
+            ):
+                decoder_input[inference_context.padding_slice] = 0.0
+
+            # Wrap decoder_input to allow the decoder (TransformerBlock) to delete the
+            # reference held by this caller function, enabling early garbage collection for
+            # inference. Skip wrapping if decoder_input is logged after decoder completion.
+            if not has_config_logger_enabled(self.config):
+                decoder_input = WrappedTensor(decoder_input)
 
         preproc_output = (
             decoder_input,
@@ -588,8 +608,7 @@ def _postprocess(
         if not self.post_process:
             return hidden_states
 
-        # Skip when mtp_num_layers is None or 0
-        if self.config.mtp_num_layers:
+        if self.config.mtp_num_layers is not None:
             mtp_labels = labels.clone()
             hidden_states_list = torch.chunk(hidden_states, 1 + self.config.mtp_num_layers, dim=0)
             hidden_states = hidden_states_list[0]
diff --git a/megatron/core/models/gpt/heterogeneous/heterogeneous_layer_specs.py b/megatron/core/models/gpt/heterogeneous/heterogeneous_layer_specs.py
index b1c2fb79a11..5e9687b09a3 100644
--- a/megatron/core/models/gpt/heterogeneous/heterogeneous_layer_specs.py
+++ b/megatron/core/models/gpt/heterogeneous/heterogeneous_layer_specs.py
@@ -27,6 +27,7 @@
     TransformerLayerSubmodules,
     get_transformer_layer_offset,
 )
+from megatron.core.typed_torch import not_none
 from megatron.core.utils import is_te_min_version
 
 try:
@@ -44,6 +45,13 @@
 
     HAVE_TE = True
 except ImportError:
+    (
+        TEDotProductAttention,
+        TELayerNormColumnParallelLinear,
+        TENorm,
+        TERowParallelLinear,
+        TELayerNormColumnParallelLinearGathered,
+    ) = (None, None, None, None, None)
     HAVE_TE = False
 
 from megatron.core.transformer.torch_norm import WrappedTorchNorm
@@ -110,8 +118,10 @@ def _get_heterogenous_attention_spec(
             module=SelfAttention,
             params={"attn_mask_type": AttnMaskType.causal},
             submodules=SelfAttentionSubmodules(
-                linear_qkv=TELayerNormColumnParallelLinear if use_te else ColumnParallelLinear,
-                core_attention=TEDotProductAttention if use_te else DotProductAttention,
+                linear_qkv=(
+                    not_none(TELayerNormColumnParallelLinear) if use_te else ColumnParallelLinear
+                ),
+                core_attention=not_none(TEDotProductAttention) if use_te else DotProductAttention,
                 linear_proj=TERowParallelLinear if use_te else RowParallelLinear,
                 q_layernorm=ln,
                 k_layernorm=ln,
diff --git a/megatron/core/models/mamba/mamba_model.py b/megatron/core/models/mamba/mamba_model.py
index e4074eda806..0acca7e8713 100644
--- a/megatron/core/models/mamba/mamba_model.py
+++ b/megatron/core/models/mamba/mamba_model.py
@@ -10,13 +10,18 @@
 from megatron.core.models.common.embeddings.language_model_embedding import LanguageModelEmbedding
 from megatron.core.models.common.embeddings.rotary_pos_embedding import RotaryEmbedding
 from megatron.core.models.common.language_module.language_module import LanguageModule
+from megatron.core.packed_seq_params import PackedSeqParams
 from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.quantization.utils import get_quant_config_or_none
 from megatron.core.tensor_parallel import gather_from_sequence_parallel_region
 from megatron.core.transformer import TransformerConfig
 from megatron.core.transformer.enums import ModelType
 from megatron.core.transformer.spec_utils import ModuleSpec, build_module
-from megatron.core.utils import WrappedTensor, deprecate_inference_params
+from megatron.core.utils import (
+    WrappedTensor,
+    deprecate_inference_params,
+    is_using_quantization_scales,
+)
 
 
 class MambaModel(LanguageModule):
@@ -179,6 +184,7 @@ def forward(
         runtime_gather_output: Optional[bool] = None,
         *,
         inference_params: Optional[BaseInferenceContext] = None,
+        packed_seq_params: Optional[PackedSeqParams] = None,
     ) -> Tensor:
         """Forward function of the Mamba model. This function passes the input tensors
         through the embedding layer, and then the decoder and finally into the post
@@ -201,6 +207,15 @@ def forward(
             pass
         elif self.pre_process:
             decoder_input = self.embedding(input_ids=input_ids, position_ids=position_ids)
+
+            # Clear the outputs for padding tokens when using dynamic batching with
+            # quantization scales to avoid corrupting amax calculations
+            if (
+                in_inference_mode
+                and inference_context.is_dynamic_batching()
+                and is_using_quantization_scales(self.config)
+            ):
+                decoder_input[inference_context.padding_slice] = 0.0
         else:
             # intermediate stage of pipeline
             # decoder will get hidden_states from encoder.input_tensor
@@ -209,9 +224,12 @@ def forward(
         rotary_pos_emb = None
         if self.position_embedding_type == 'rope':
             rotary_seq_len = self.rotary_pos_emb.get_rotary_seq_len(
-                inference_context, self.decoder, decoder_input, self.config
+                inference_context, self.decoder, decoder_input, self.config, packed_seq_params
+            )
+            rotary_pos_emb = self.rotary_pos_emb(
+                rotary_seq_len,
+                packed_seq=packed_seq_params is not None and packed_seq_params.qkv_format == 'thd',
             )
-            rotary_pos_emb = self.rotary_pos_emb(rotary_seq_len)
 
         # Wrap decoder_input to allow the decoder (MambaBlock) to delete the
         # reference held by this caller function, enabling early garbage collection
@@ -235,6 +253,7 @@ def forward(
             attention_mask=attention_mask,
             inference_context=inference_context,
             rotary_pos_emb=rotary_pos_emb,
+            packed_seq_params=packed_seq_params,
         )
 
         if not self.post_process:
diff --git a/megatron/core/models/multimodal/llava_model.py b/megatron/core/models/multimodal/llava_model.py
index dae9a02b780..af0bcf6e9fd 100644
--- a/megatron/core/models/multimodal/llava_model.py
+++ b/megatron/core/models/multimodal/llava_model.py
@@ -924,27 +924,16 @@ def forward(
                 )
             )
 
-        if isinstance(self.language_model, MambaModel):
-            output = self.language_model(
-                input_ids=None,
-                position_ids=None,
-                attention_mask=attention_mask,
-                decoder_input=combined_embeddings,
-                labels=new_labels,
-                inference_context=inference_context,
-                runtime_gather_output=runtime_gather_output,
-            )
-        else:
-            output = self.language_model(
-                input_ids=None,
-                position_ids=None,
-                attention_mask=attention_mask,
-                decoder_input=combined_embeddings,
-                labels=new_labels,
-                inference_context=inference_context,
-                runtime_gather_output=runtime_gather_output,
-                packed_seq_params=packed_seq_params,
-            )
+        output = self.language_model(
+            input_ids=None,
+            position_ids=None,
+            attention_mask=attention_mask,
+            decoder_input=combined_embeddings,
+            labels=new_labels,
+            inference_context=inference_context,
+            runtime_gather_output=runtime_gather_output,
+            packed_seq_params=packed_seq_params,
+        )
 
         return output, new_loss_mask
 
diff --git a/megatron/core/models/retro/decoder_spec.py b/megatron/core/models/retro/decoder_spec.py
index 6539348143f..c872a4f77e1 100644
--- a/megatron/core/models/retro/decoder_spec.py
+++ b/megatron/core/models/retro/decoder_spec.py
@@ -23,6 +23,7 @@
     TransformerBlockSubmodules,
     get_num_layers_to_build,
 )
+from megatron.core.typed_torch import not_none
 
 try:
     import apex  # pylint: disable=unused-import
@@ -52,6 +53,12 @@
 
     HAVE_TE = True
 except ImportError:
+    (TEColumnParallelLinear, TEDotProductAttention, TENorm, TERowParallelLinear) = (
+        None,
+        None,
+        None,
+        None,
+    )
     HAVE_TE = False
 
 
@@ -79,9 +86,9 @@ def get_retro_decoder_layer_te_spec(
         module=RetroDecoderCrossAttention,
         params={"encoder_block_spec": encoder_block_spec},
         submodules=CrossAttentionSubmodules(
-            linear_q=TEColumnParallelLinear,
-            linear_kv=TEColumnParallelLinear,
-            core_attention=TEDotProductAttention,
+            linear_q=not_none(TEColumnParallelLinear),
+            linear_kv=not_none(TEColumnParallelLinear),
+            core_attention=not_none(TEDotProductAttention),
             linear_proj=TERowParallelLinear,
         ),
     )
diff --git a/megatron/core/models/retro/encoder_spec.py b/megatron/core/models/retro/encoder_spec.py
index a7cb76ca19b..0b5b94409a2 100644
--- a/megatron/core/models/retro/encoder_spec.py
+++ b/megatron/core/models/retro/encoder_spec.py
@@ -19,6 +19,7 @@
 from megatron.core.transformer.enums import AttnMaskType
 from megatron.core.transformer.mlp import MLP, MLPSubmodules
 from megatron.core.transformer.transformer_block import TransformerBlockSubmodules
+from megatron.core.typed_torch import not_none
 
 try:
     import transformer_engine as te  # pylint: disable=unused-import
@@ -32,6 +33,12 @@
 
     HAVE_TE = True
 except ImportError:
+    (TEColumnParallelLinear, TEDotProductAttention, TENorm, TERowParallelLinear) = (
+        None,
+        None,
+        None,
+        None,
+    )
     HAVE_TE = False
 
 try:
@@ -68,9 +75,9 @@ def get_retro_encoder_layer_te_spec() -> ModuleSpec:
         module=RetroEncoderCrossAttention,
         params={"attn_mask_type": AttnMaskType.padding},
         submodules=CrossAttentionSubmodules(
-            linear_q=TEColumnParallelLinear,
-            linear_kv=TEColumnParallelLinear,
-            core_attention=TEDotProductAttention,
+            linear_q=not_none(TEColumnParallelLinear),
+            linear_kv=not_none(TEColumnParallelLinear),
+            core_attention=not_none(TEDotProductAttention),
             linear_proj=TERowParallelLinear,
         ),
     )
diff --git a/megatron/core/optimizer/cpu_offloading/README.md b/megatron/core/optimizer/cpu_offloading/README.md
index 1486226aa86..68bfba54ba2 100644
--- a/megatron/core/optimizer/cpu_offloading/README.md
+++ b/megatron/core/optimizer/cpu_offloading/README.md
@@ -8,6 +8,6 @@ Add these flags to enable optimizer cpu offload in MCore.
 --use-precision-aware-optimizer
 ```
 
-## Configuration Recommendataions
+## Configuration Recommendations
 
 Gradient copy from GPU to CPU, CPU optimizer step, and subsequent parameter copy from CPU to GPU can be time-consuming operations, and it is recommended to use the flag `--overlap-cpu-optimizer-d2h-h2d` to execute them concurrently.
diff --git a/megatron/core/optimizer/muon.py b/megatron/core/optimizer/muon.py
index b909ab61a8a..33b9b78b836 100644
--- a/megatron/core/optimizer/muon.py
+++ b/megatron/core/optimizer/muon.py
@@ -8,6 +8,7 @@
 import torch
 from torch.optim.optimizer import ParamsT
 
+from megatron.core.optimizer_param_scheduler import ParamGroupOverride
 from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.transformer.module import MegatronModule
 from megatron.core.utils import get_pg_size, log_single_rank
@@ -164,7 +165,7 @@ def orthogonalize(self, p: torch.Tensor, grad: torch.Tensor, **kwargs: Any) -> t
 def get_megatron_muon_optimizer(
     config: OptimizerConfig,
     model_chunks: List[MegatronModule],
-    config_overrides: Optional[Dict[ParamKey, OptimizerConfig]] = None,
+    config_overrides: Optional[Dict[ParamKey, ParamGroupOverride]] = None,
     use_gloo_process_groups: bool = True,
     layer_wise_distributed_optimizer: bool = False,
     pg_collection: Optional[ProcessGroupCollection] = None,
diff --git a/megatron/core/optimizer/optimizer_config.py b/megatron/core/optimizer/optimizer_config.py
index a1429b7a170..94163102eb3 100644
--- a/megatron/core/optimizer/optimizer_config.py
+++ b/megatron/core/optimizer/optimizer_config.py
@@ -254,7 +254,8 @@ class OptimizerConfig:
     sgd_momentum: float = 0.9
     """Momentum factor for SGD optimizer."""
 
-    # Muon
+    # Muon.
+    # TODO: move muon configs to it's own `MuonConfig`.
     muon_momentum: float = 0.95
     """The momentum used by the internal SGD."""
 
diff --git a/megatron/core/parallel_state.py b/megatron/core/parallel_state.py
index 7aa867fd98f..c5a73600ee1 100644
--- a/megatron/core/parallel_state.py
+++ b/megatron/core/parallel_state.py
@@ -554,6 +554,7 @@ def initialize_model_parallel(
     use_sharp: bool = False,
     context_parallel_size: int = 1,
     hierarchical_context_parallel_sizes: Optional[List[int]] = None,
+    hybrid_context_parallel: bool = False,
     expert_model_parallel_size: int = 1,
     num_distributed_optimizer_instances: int = 1,
     expert_tensor_parallel_size: Optional[int] = None,
@@ -565,7 +566,6 @@ def initialize_model_parallel(
     create_gloo_process_groups: bool = True,
     high_priority_stream_groups: Optional[List[str]] = None,
     sharp_enabled_group: Optional[str] = None,
-    hybrid_context_parallel: bool = False,
 ) -> None:
     """Initialize model data parallel groups.
 
diff --git a/megatron/core/pipeline_parallel/fine_grained_activation_offload.py b/megatron/core/pipeline_parallel/fine_grained_activation_offload.py
index 01c3a0c3aa0..1d2545b682d 100644
--- a/megatron/core/pipeline_parallel/fine_grained_activation_offload.py
+++ b/megatron/core/pipeline_parallel/fine_grained_activation_offload.py
@@ -1133,6 +1133,12 @@ def fine_grained_offloading_group_commit(
     )
 
 
+def fine_grained_offloading_group_flush_delayed_groups():
+    """Flush the delayed groups."""
+    debug_rank("fine_grained_offloading_group_flush_delayed_groups")
+    PipelineOffloadManager.get_instance().flush_delayed_groups()
+
+
 class FineGrainedOffloadingGroupStartFunction(torch.autograd.Function):
     """
     Identity operation that marks the start of a layer group for offload/reload.
@@ -1166,6 +1172,13 @@ def fine_grained_offloading_group_start(tensor, name=None):
     return FineGrainedOffloadingGroupStartFunction.apply(tensor, cur_forward_chunk, name)
 
 
+def fine_grained_offloading_forward_record(event: torch.cuda.Event) -> None:
+    """Record the forward event for cuda graph capture."""
+    d2h_stream = PipelineOffloadManager.get_instance().d2h_stream
+    torch.cuda.current_stream().record_event(event)
+    torch.cuda.current_stream().wait_stream(d2h_stream)
+
+
 class FineGrainedOffloadingBackwardRecordFunction(torch.autograd.Function):
     """
     Identity operation that marks the end of a layer group for offload synchronization.
@@ -1187,6 +1200,11 @@ def backward(ctx, grad_output):
         return grad_output, None
 
 
+def fine_grained_offloading_backward_record(tensor, event: torch.cuda.Event) -> torch.Tensor:
+    """Record the backward event for cuda graph capture."""
+    return FineGrainedOffloadingBackwardRecordFunction.apply(tensor, event)
+
+
 class FineGrainedActivationOffloadingInterface:
     """Interface for fine-grained activation offloading."""
 
@@ -1238,12 +1256,6 @@ def forward_record(event: torch.cuda.Event) -> None:
         torch.cuda.current_stream().record_event(event)
         torch.cuda.current_stream().wait_stream(d2h_stream)
 
-    @staticmethod
-    def backward_record(tensor, event: torch.cuda.Event) -> torch.Tensor:
-        """Record the backward event for cuda graph capture."""
-        return FineGrainedOffloadingBackwardRecordFunction.apply(tensor, event)
-
-    @staticmethod
     def reset():
         """Reset the chunk handler."""
         PipelineOffloadManager.get_instance().reset()
@@ -1252,8 +1264,3 @@ def reset():
     def reset_instance():
         """Reset the singleton instance."""
         PipelineOffloadManager.reset_instance()
-
-    @staticmethod
-    def flush_delayed_groups():
-        """Flush the delayed groups."""
-        PipelineOffloadManager.get_instance().flush_delayed_groups()
diff --git a/megatron/core/pipeline_parallel/utils.py b/megatron/core/pipeline_parallel/utils.py
index bda6334fc4b..03c5f01f443 100644
--- a/megatron/core/pipeline_parallel/utils.py
+++ b/megatron/core/pipeline_parallel/utils.py
@@ -185,8 +185,8 @@ def __init__(
         self.free_input = free_input
         self.inputs = None
         self.outputs = None
-        self.manual_grads_release = False
         self.delay_grads_release = False
+        self.manual_release_grads = False
 
     def default_backward_func(self, outputs, output_grad):
         """Default backward function"""
@@ -272,7 +272,7 @@ def _backward(self, *output_grad):
                     # to avoid delayed garbage collection. If
                     # delay_grads_release is True, dgrad is last used in
                     # wgrad compute and skip the release here.
-                    if self.manual_grads_release and not self.delay_grads_release:
+                    if self.manual_release_grads and not self.delay_grads_release:
                         g.untyped_storage().resize_(0)
 
         grads = self.get_grad()
diff --git a/megatron/core/rerun_state_machine.py b/megatron/core/rerun_state_machine.py
index b0f45647944..9ce7259d09c 100644
--- a/megatron/core/rerun_state_machine.py
+++ b/megatron/core/rerun_state_machine.py
@@ -507,7 +507,7 @@ def train_step(data_iterator, ...):
         """
 
         # If reruns are disabled, still validate the result and throw a RuntimeError if it is
-        # rejected. This is a backward-compatible behavior.
+        # rejected when fatal. This is a backward-compatible behavior for infs and NaNs.
         if self.mode == RerunMode.DISABLED:
             result_rejected: bool = rejection_func(result)
             if result_rejected:
@@ -522,7 +522,10 @@ def train_step(data_iterator, ...):
                     f"iteration {self.current_iteration}: "
                     f"Unexpected result {result} (message='{message}')"
                 )
-                raise RuntimeError(full_message)
+                if fatal:
+                    raise RuntimeError(full_message)
+                else:
+                    logger.warning(full_message)
             return
 
         if comparison_func is None:
diff --git a/megatron/core/resharding/__init__.py b/megatron/core/resharding/__init__.py
new file mode 100644
index 00000000000..d06484eef37
--- /dev/null
+++ b/megatron/core/resharding/__init__.py
@@ -0,0 +1,16 @@
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+from .execution import execute_reshard_plan
+from .planner import build_centralized_reshard_plan
+from .refit import reshard_model_weights, swap_model_weights
+from .utils import ParameterMetadata, ReshardPlan, ShardingDescriptor, TransferOp
+
+__all__ = [
+    "build_centralized_reshard_plan",
+    "execute_reshard_plan",
+    "swap_model_weights",
+    "reshard_model_weights",
+    "ParameterMetadata",
+    "ShardingDescriptor",
+    "TransferOp",
+    "ReshardPlan",
+]
diff --git a/megatron/core/resharding/copy_services/__init__.py b/megatron/core/resharding/copy_services/__init__.py
new file mode 100644
index 00000000000..15986e4d28e
--- /dev/null
+++ b/megatron/core/resharding/copy_services/__init__.py
@@ -0,0 +1,7 @@
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+from __future__ import annotations
+
+from .base import CopyService
+from .nccl_copy_service import NCCLCopyService
+
+__all__ = ["CopyService", "NCCLCopyService"]
diff --git a/megatron/core/resharding/copy_services/base.py b/megatron/core/resharding/copy_services/base.py
new file mode 100644
index 00000000000..d7b9205ba83
--- /dev/null
+++ b/megatron/core/resharding/copy_services/base.py
@@ -0,0 +1,25 @@
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+
+import torch
+
+
+class CopyService(ABC):
+    """Abstract interface for submitting and executing batched P2P copy operations."""
+
+    @abstractmethod
+    def submit_send(self, src_tensor: torch.Tensor, dest_rank: int):
+        """Register a tensor send from the current rank to ``dest_rank``."""
+        ...
+
+    @abstractmethod
+    def submit_recv(self, dest_tensor: torch.Tensor, src_rank: int):
+        """Register a tensor receive into ``dest_tensor`` from ``src_rank``."""
+        ...
+
+    @abstractmethod
+    def run(self):
+        """Execute all previously submitted send/recv operations as a single batch."""
+        ...
diff --git a/megatron/core/resharding/copy_services/gloo_copy_service.py b/megatron/core/resharding/copy_services/gloo_copy_service.py
new file mode 100644
index 00000000000..95f9d454682
--- /dev/null
+++ b/megatron/core/resharding/copy_services/gloo_copy_service.py
@@ -0,0 +1,146 @@
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+from __future__ import annotations
+
+import logging
+from dataclasses import dataclass
+from typing import List, Tuple
+
+import torch
+import torch.distributed as dist
+
+from .base import CopyService
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class SendOp:
+    """Simple container describing a single send operation."""
+
+    task_id: int | None
+    tensor: torch.Tensor
+    dest_rank: int
+
+
+@dataclass
+class RecvOp:
+    """Simple container describing a single receive operation."""
+
+    task_id: int | None
+    tensor: torch.Tensor
+    src_rank: int
+
+
+class GlooCopyService(CopyService):
+    """
+    CopyService implementation that routes refit traffic over a CPU/Gloo
+    process group instead of NCCL.
+    """
+
+    def __init__(self):
+        self.rank = dist.get_rank()
+        self.world_size = dist.get_world_size()
+        self.gloo_pg = dist.new_group(backend="gloo")
+        self.send_ops: List[SendOp] = []
+        self.recv_ops: List[Tuple[RecvOp, torch.Tensor]] = []
+        self._copy_stream = torch.cuda.Stream()
+        logger.info(f"GlooCopyService initialized on rank {self.rank} with {self.world_size} ranks")
+
+    def submit_send(self, src_tensor: torch.Tensor, dest_rank: int):
+        self.send_ops.append(SendOp(task_id=None, tensor=src_tensor, dest_rank=dest_rank))
+
+    def submit_send_with_id(self, task_id: int, src_tensor: torch.Tensor, dest_rank: int):
+        """Submit a send operation with a unique task identifier."""
+        self.send_ops.append(SendOp(task_id=task_id, tensor=src_tensor, dest_rank=dest_rank))
+
+    def submit_recv(self, dest_tensor: torch.Tensor, src_rank: int):
+        """Submit a receive operation."""
+        # Allocate a CPU buffer that matches the destination view; we'll
+        # copy into dest_tensor after the Gloo recv completes.
+        cpu_buffer = torch.empty_like(dest_tensor, device="cpu").contiguous()
+        self.recv_ops.append(
+            (RecvOp(task_id=None, tensor=cpu_buffer, src_rank=src_rank), dest_tensor)
+        )
+
+    def submit_recv_with_id(self, task_id: int, dest_tensor: torch.Tensor, src_rank: int):
+        """Submit a receive operation with a unique task identifier."""
+        cpu_buffer = torch.empty_like(dest_tensor, device="cpu").contiguous()
+        self.recv_ops.append(
+            (RecvOp(task_id=task_id, tensor=cpu_buffer, src_rank=src_rank), dest_tensor)
+        )
+
+    def run(self):
+        total_ops = len(self.send_ops) + len(self.recv_ops)
+        logger.info(
+            f"GlooCopyService rank {self.rank}: executing batched communication: "
+            f"{len(self.send_ops)} sends + {len(self.recv_ops)} recvs = {total_ops} ops"
+        )
+
+        p2p_ops: List[dist.P2POp] = []
+
+        # Short-circuit self transfers into local device copies.
+        local_sends = [op for op in self.send_ops if op.dest_rank == self.rank]
+        remote_sends = [op for op in self.send_ops if op.dest_rank != self.rank]
+        local_recvs = [(recv, dst) for (recv, dst) in self.recv_ops if recv.src_rank == self.rank]
+        remote_recvs = [(recv, dst) for (recv, dst) in self.recv_ops if recv.src_rank != self.rank]
+
+        if local_sends or local_recvs:
+            local_sends_by_id = {op.task_id: op for op in local_sends}
+            if None in local_sends_by_id:
+                raise RuntimeError(
+                    "GlooCopyService: local send missing task_id; "
+                    "use submit_send_with_id/submit_recv_with_id for local copies"
+                )
+            local_recvs_by_id = {recv.task_id: (recv, dst) for (recv, dst) in local_recvs}
+            if None in local_recvs_by_id:
+                raise RuntimeError(
+                    "GlooCopyService: local recv missing task_id; "
+                    "use submit_send_with_id/submit_recv_with_id for local copies"
+                )
+            if len(local_sends_by_id) != len(local_sends) or len(local_recvs_by_id) != len(
+                local_recvs
+            ):
+                raise RuntimeError(
+                    f"GlooCopyService: unmatched local ops on rank {self.rank}: "
+                    f"{len(local_sends)} local sends vs {len(local_recvs)} local recvs"
+                )
+            for task_id, (recv_op, dst_tensor) in local_recvs_by_id.items():
+                send_op = local_sends_by_id.get(task_id)
+                if send_op is None:
+                    raise RuntimeError(
+                        f"GlooCopyService: missing local send for task_id={task_id} "
+                        f"on rank {self.rank}"
+                    )
+                with torch.no_grad():
+                    src_tensor = send_op.tensor
+                    if dst_tensor.device != src_tensor.device:
+                        dst_tensor.copy_(src_tensor.to(dst_tensor.device))
+                    else:
+                        dst_tensor.copy_(src_tensor)
+
+        # Build Gloo P2P ops over CPU tensors. For sends we clone to CPU;
+        # for recvs we use the preallocated CPU buffers.
+        for op in remote_sends:
+            cpu_tensor = op.tensor.detach().to("cpu").contiguous()
+            p2p_ops.append(dist.P2POp(dist.isend, cpu_tensor, op.dest_rank, group=self.gloo_pg))
+        for recv, _dst_tensor in remote_recvs:
+            p2p_ops.append(dist.P2POp(dist.irecv, recv.tensor, recv.src_rank, group=self.gloo_pg))
+
+        if p2p_ops:
+            reqs = dist.batch_isend_irecv(p2p_ops)
+            for req in reqs:
+                req.wait()
+
+        # Copy received CPU buffers back into the original destination tensors.
+        for recv, dst_tensor in remote_recvs:
+            if dst_tensor.is_cuda:
+                dst_tensor.copy_(recv.tensor.to(dst_tensor.device))
+            else:
+                dst_tensor.copy_(recv.tensor)
+
+        if self._copy_stream is not None:
+            torch.cuda.current_stream().wait_stream(self._copy_stream)
+
+        logger.info("GlooCopyService: batched communication completed")
+        self.send_ops.clear()
+        self.recv_ops.clear()
diff --git a/megatron/core/resharding/copy_services/nccl_copy_service.py b/megatron/core/resharding/copy_services/nccl_copy_service.py
new file mode 100644
index 00000000000..43556f02986
--- /dev/null
+++ b/megatron/core/resharding/copy_services/nccl_copy_service.py
@@ -0,0 +1,126 @@
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+from __future__ import annotations
+
+import logging
+from dataclasses import dataclass
+from typing import List
+
+import torch
+import torch.distributed as dist
+
+from .base import CopyService
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class SendOp:
+    """Simple container describing a single NCCL send operation."""
+
+    task_id: int | None
+    tensor: torch.Tensor
+    dest_rank: int
+
+
+@dataclass
+class RecvOp:
+    """Simple container describing a single NCCL receive operation."""
+
+    task_id: int | None
+    tensor: torch.Tensor
+    src_rank: int
+
+
+class NCCLCopyService(CopyService):
+    """
+    Thin wrapper around torch.distributed batch_isend_irecv to submit and execute
+    a batch of point-to-point sends and recvs.
+    """
+
+    def __init__(self):
+        self.rank = dist.get_rank()
+        self.world_size = dist.get_world_size()
+        self.send_ops: List[SendOp] = []
+        self.recv_ops: List[RecvOp] = []
+        # Dedicated stream for local (same-rank) copies to avoid unnecessary
+        # serialization with work on the default stream.
+        self._copy_stream = torch.cuda.Stream()
+        logger.info(f"NCCLCopyService initialized with {self.world_size} ranks")
+
+    def submit_send(self, src_tensor: torch.Tensor, dest_rank: int):
+        self.send_ops.append(SendOp(task_id=None, tensor=src_tensor, dest_rank=dest_rank))
+
+    def submit_send_with_id(self, task_id: int, src_tensor: torch.Tensor, dest_rank: int):
+        """Submit a send operation with a unique task identifier."""
+        self.send_ops.append(SendOp(task_id=task_id, tensor=src_tensor, dest_rank=dest_rank))
+
+    def submit_recv(self, dest_tensor: torch.Tensor, src_rank: int):
+        """Submit a receive operation."""
+        self.recv_ops.append(RecvOp(task_id=None, tensor=dest_tensor, src_rank=src_rank))
+
+    def submit_recv_with_id(self, task_id: int, dest_tensor: torch.Tensor, src_rank: int):
+        """Submit a receive operation with a unique task identifier."""
+        self.recv_ops.append(RecvOp(task_id=task_id, tensor=dest_tensor, src_rank=src_rank))
+
+    def run(self):
+        total_ops = len(self.send_ops) + len(self.recv_ops)
+        logger.info(
+            "Executing batched communication: %d sends + %d recvs = %d ops",
+            len(self.send_ops),
+            len(self.recv_ops),
+            total_ops,
+        )
+
+        local_sends = [op for op in self.send_ops if op.dest_rank == self.rank]
+        remote_sends = [op for op in self.send_ops if op.dest_rank != self.rank]
+        local_recvs = [op for op in self.recv_ops if op.src_rank == self.rank]
+        remote_recvs = [op for op in self.recv_ops if op.src_rank != self.rank]
+
+        if local_sends or local_recvs:
+            local_sends_by_id = {op.task_id: op for op in local_sends}
+            if None in local_sends_by_id:
+                raise RuntimeError(
+                    "NCCLCopyService: local send missing task_id; "
+                    "use submit_send_with_id/submit_recv_with_id for local copies"
+                )
+            local_recvs_by_id = {op.task_id: op for op in local_recvs}
+            if None in local_recvs_by_id:
+                raise RuntimeError(
+                    "NCCLCopyService: local recv missing task_id; "
+                    "use submit_send_with_id/submit_recv_with_id for local copies"
+                )
+            if len(local_sends_by_id) != len(local_sends) or len(local_recvs_by_id) != len(
+                local_recvs
+            ):
+                raise RuntimeError(
+                    f"NCCLCopyService: unmatched local ops on rank {self.rank}: "
+                    f"{len(local_sends)} local sends vs {len(local_recvs)} local recvs"
+                )
+            for task_id, recv_op in local_recvs_by_id.items():
+                send_op = local_sends_by_id.get(task_id)
+                if send_op is None:
+                    raise RuntimeError(
+                        f"NCCLCopyService: missing local send for task_id={task_id} "
+                        f"on rank {self.rank}"
+                    )
+                with torch.no_grad():
+                    with torch.cuda.stream(self._copy_stream):
+                        recv_op.tensor.copy_(send_op.tensor)
+
+        p2p_ops = []
+        for op in remote_sends:
+            p2p_ops.append(dist.P2POp(dist.isend, op.tensor, op.dest_rank))
+        for op in remote_recvs:
+            p2p_ops.append(dist.P2POp(dist.irecv, op.tensor, op.src_rank))
+
+        if p2p_ops:
+            reqs = dist.batch_isend_irecv(p2p_ops)
+            for req in reqs:
+                req.wait()
+
+        # Make sure the copy stream is finished
+        torch.cuda.current_stream().wait_stream(self._copy_stream)
+
+        logger.info("Batched communication completed")
+        self.send_ops.clear()
+        self.recv_ops.clear()
diff --git a/megatron/core/resharding/execution.py b/megatron/core/resharding/execution.py
new file mode 100644
index 00000000000..6a7779406d0
--- /dev/null
+++ b/megatron/core/resharding/execution.py
@@ -0,0 +1,66 @@
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+from __future__ import annotations
+
+import logging
+from typing import List, Tuple
+
+import torch
+import torch.distributed as dist
+
+from .copy_services.base import CopyService
+from .utils import ReshardPlan
+
+logger = logging.getLogger(__name__)
+
+
+def execute_reshard_plan(
+    plan: ReshardPlan,
+    src_module: torch.nn.Module,
+    dst_module: torch.nn.Module,
+    service: CopyService,
+) -> None:
+    """
+    Execute a reshard plan (from centralized controller).
+    A communication service must be provided to abstract transport.
+    Expected service API: submit_send(tensor, dest_rank), submit_recv(tensor, src_rank), run().
+    """
+
+    src_params = {name: p for name, p in src_module.named_parameters(recurse=True)}
+    dst_params = {name: p for name, p in dst_module.named_parameters(recurse=True)}
+    submit_send_with_id = getattr(service, "submit_send_with_id", None)
+    submit_recv_with_id = getattr(service, "submit_recv_with_id", None)
+
+    # Submit sends
+    for op in plan.send_ops:
+        src_param = src_params.get(op.param_name)
+        if src_param is not None:
+            src_view = src_param.data[op.my_slice].contiguous()
+            if submit_send_with_id is not None and op.task_id is not None:
+                submit_send_with_id(op.task_id, src_view, op.peer_rank)
+            else:
+                service.submit_send(src_view, op.peer_rank)
+
+    # Submit recvs
+    recv_writebacks: List[Tuple[torch.Tensor, torch.nn.Parameter, tuple[slice, ...]]] = []
+    for op in plan.recv_ops:
+        dst_param = dst_params.get(op.param_name)
+        if dst_param is not None:
+            dst_slice_view = dst_param.data[op.my_slice]
+            recv_buffer = torch.empty_like(dst_slice_view.contiguous())
+            if submit_recv_with_id is not None and op.task_id is not None:
+                submit_recv_with_id(op.task_id, recv_buffer, op.peer_rank)
+            else:
+                service.submit_recv(recv_buffer, op.peer_rank)
+            recv_writebacks.append((recv_buffer, dst_param, op.my_slice))
+
+    # Execute
+    logger.info(f"Executing {len(plan.send_ops)} sends + {len(plan.recv_ops)} recvs")
+    service.run()
+    dist.barrier()
+
+    # Write back received buffers into their destination parameter slices
+    for recv_buffer, dst_param, dst_slice in recv_writebacks:
+        with torch.no_grad():
+            dst_param.data[dst_slice].copy_(recv_buffer)
+
+    logger.info("Reshard complete")
diff --git a/megatron/core/resharding/planner.py b/megatron/core/resharding/planner.py
new file mode 100644
index 00000000000..31045fbfc01
--- /dev/null
+++ b/megatron/core/resharding/planner.py
@@ -0,0 +1,345 @@
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+from __future__ import annotations
+
+import logging
+import math
+
+import torch
+import torch.distributed as dist
+
+from .utils import (
+    ParameterMetadata,
+    ReshardPlan,
+    ShardingDescriptor,
+    TransferOp,
+    _build_layer_module_prefix_map,
+    _get_rank_in_group,
+    extract_param_metadata,
+    select_src_metadata_balanced,
+)
+
+logger = logging.getLogger(__name__)
+
+
+def _build_descriptors_for_param(
+    src_metadata: ParameterMetadata, dst_metadata: ParameterMetadata
+) -> list[ShardingDescriptor]:
+    """Construct sharding descriptors (currently TP) for this parameter based on actual layout.
+    Guard TP descriptor with size conservation so we don't mis-classify replicated tensors.
+    """
+    descriptors: list[ShardingDescriptor] = []
+
+    # TP descriptor: allow when either side participates in TP
+    if src_metadata.is_tp or dst_metadata.is_tp:
+        # Prefer destination partition_dim, else source
+        tp_dim = dst_metadata.partition_dim if dst_metadata.is_tp else src_metadata.partition_dim
+        src_tp_ranks = src_metadata.tensor_parallel_group_ranks
+        dst_tp_ranks = dst_metadata.tensor_parallel_group_ranks
+        if src_tp_ranks is None or dst_tp_ranks is None:
+            # Not enough context to build TP descriptor
+            return descriptors
+        src_stride = src_metadata.partition_stride if src_metadata.is_tp else 1
+        dst_stride = dst_metadata.partition_stride if dst_metadata.is_tp else 1
+
+        # Size conservation check on partition dim
+        src_world = len(src_tp_ranks)
+        dst_world = len(dst_tp_ranks)
+        src_local = src_metadata.shape[tp_dim]
+        dst_local = dst_metadata.shape[tp_dim]
+        if src_world * src_local != dst_world * dst_local:
+            raise RuntimeError(
+                f"Cannot build TP descriptor for {dst_metadata.name} dim{tp_dim}: "
+                f"src_world*src_local={src_world}*{src_local} != {dst_world}*{dst_local}. "
+                "This usually means the param is marked TP but is effectively replicated on that "
+                "dim or partition_dim/metadata is inconsistent between source and destination."
+            )
+
+        descriptors.append(
+            ShardingDescriptor(
+                name="tp",
+                dim=tp_dim,
+                src_stride=src_stride,
+                dst_stride=dst_stride,
+                src_dim_ranks=src_tp_ranks,
+                dst_dim_ranks=dst_tp_ranks,
+            )
+        )
+    return descriptors
+
+
+def _plan_multi_dim_lcm(
+    param_name: str,
+    src_metadata: ParameterMetadata,
+    dst_metadata: ParameterMetadata,
+    descriptors: list[ShardingDescriptor],
+    my_global_rank: int,
+) -> list[tuple[int, tuple[slice, ...], tuple[slice, ...]]]:
+    """
+    TP-only planner using LCM tiling to support strides on source/destination.
+    - Requires exactly one TP descriptor
+    - Supports arbitrary integer strides (contiguous micro-tiles)
+    """
+    if not descriptors:
+        return []
+    if len(descriptors) != 1:
+        raise NotImplementedError(
+            f"{param_name}: _plan_multi_dim_lcm supports TP-only (one descriptor)"
+        )
+    if descriptors[0].name != "tp":
+        raise NotImplementedError(f"{param_name}: _plan_multi_dim_lcm expects TP descriptor")
+    d = descriptors[0]
+    if my_global_rank not in d.dst_dim_ranks:
+        return []
+    src_shape = tuple(src_metadata.shape)
+    dst_shape = tuple(dst_metadata.shape)
+    dim = d.dim
+    src_world = len(d.src_dim_ranks)
+    dst_world = len(d.dst_dim_ranks)
+    src_local = src_shape[dim]
+    dst_local = dst_shape[dim]
+    if src_world * src_local != dst_world * dst_local:
+        raise RuntimeError(
+            f"{param_name}: size mismatch on TP dim{dim} "
+            f"(src_world={src_world}, src_local={src_local}, "
+            f"dst_world={dst_world}, dst_local={dst_local})"
+        )
+    # LCM tiling with strides
+    Ns = src_world * max(1, d.src_stride)
+    Nd = dst_world * max(1, d.dst_stride)
+    full_len = dst_local * dst_world
+    g = math.gcd(Ns, Nd)
+    L = (Ns // g) * Nd
+    if full_len % L != 0:
+        raise RuntimeError(
+            f"{param_name}: TP dim{dim} full_len {full_len} not divisible by LCM {L} "
+            f"(Ns={Ns}, Nd={Nd})"
+        )
+    unit = full_len // L  # micro-tile length
+    cps = L // Ns  # micro-tiles per source segment
+    cpd = L // Nd  # micro-tiles per destination segment
+    seg_src = cps * unit  # contiguous length per source segment
+    seg_dst = cpd * unit  # contiguous length per destination segment
+    dst_local_rank = _get_rank_in_group(my_global_rank, d.dst_dim_ranks)
+    ops: list[tuple[int, tuple[slice, ...], tuple[slice, ...]]] = []
+    # Sweep destination segments owned by this rank (handle destination stride)
+    for k in range(max(1, d.dst_stride)):
+        g_dst_seg = dst_local_rank + k * dst_world
+        # Within this segment, enumerate the cpd micro-tiles
+        for off in range(cpd):
+            g_micro = g_dst_seg * cpd + off
+            s_idx = g_micro // cps
+            in_seg = g_micro % cps
+            src_owner_in_dim = s_idx % src_world
+            src_global_rank = d.src_dim_ranks[src_owner_in_dim]
+            src_local_seg_idx = s_idx // src_world
+            src_start = src_local_seg_idx * seg_src + in_seg * unit
+            dst_start = k * seg_dst + off * unit
+            # Build full N-D slices
+            src_slice = [slice(None)] * len(src_shape)
+            dst_slice = [slice(None)] * len(dst_shape)
+            src_slice[dim] = slice(src_start, src_start + unit)
+            dst_slice[dim] = slice(dst_start, dst_start + unit)
+            ops.append((src_global_rank, tuple(src_slice), tuple(dst_slice)))
+
+    # Stable order by destination offset
+    def dst_key(op):
+        _, _, dsl = op
+        s = dsl[dim]
+        return s.start if isinstance(s, slice) else 0
+
+    ops.sort(key=dst_key)
+    return ops
+
+
+def _finalize_dp_transfers(
+    param_name: str,
+    src_metadata: ParameterMetadata,
+    dst_metadata: ParameterMetadata,
+    my_global_rank: int,
+) -> list[tuple[int, tuple[slice, ...], tuple[slice, ...]]]:
+    """Return receiver-side transfer for a parameter that is not TP-sharded.
+
+    This is reached when we cannot build a TP sharding descriptor for the parameter
+    (i.e., it is effectively replicated with respect to sharding).  We use this when the
+    destination and source mode have no TP or the parameter is replicted on all ranks
+    such as layernorm. If the source and destination DP groups match, we return a local
+    full-tensor copy; otherwise we pick a source rank from the source DP group in a
+    deterministic round-robin manner based on the receiver's global rank for better load
+    distribution.
+    """
+    dst_dp_ranks = dst_metadata.data_parallel_group_ranks
+    src_dp_ranks = src_metadata.data_parallel_group_ranks
+    if my_global_rank not in dst_dp_ranks:
+        return []
+
+    dst_shape = dst_metadata.shape
+
+    # Same DP layout - local copy
+    if src_dp_ranks == dst_dp_ranks:
+        full_slice = tuple(slice(None) for _ in range(len(dst_shape)))
+        return [(my_global_rank, full_slice, full_slice)]
+
+    # Different DP groups - use round-robin based on destination global rank for
+    # better load balancing across source ranks. This ensures that destination
+    # ranks are distributed across source ranks even when they have the same
+    # position within their respective DP groups.
+    src_global_rank = src_dp_ranks[my_global_rank % len(src_dp_ranks)]
+    full_slice = tuple(slice(None) for _ in range(len(dst_shape)))
+    return [(src_global_rank, full_slice, full_slice)]
+
+
+def _determine_source_ranks_for_dst_param(
+    param_name: str,
+    src_metadata: ParameterMetadata,
+    dst_metadata: ParameterMetadata,
+    my_global_rank: int,
+) -> list[tuple[int, tuple[slice, ...], tuple[slice, ...]]]:
+    """Route to dimension-specific planner based on parameter sharding type."""
+
+    # Regular TP/DP planning with EP-resolved metadata
+    descriptors = _build_descriptors_for_param(src_metadata=src_metadata, dst_metadata=dst_metadata)
+    if descriptors:
+        return _plan_multi_dim_lcm(
+            param_name=param_name,
+            src_metadata=src_metadata,
+            dst_metadata=dst_metadata,
+            descriptors=descriptors,
+            my_global_rank=my_global_rank,
+        )
+    # DP / replicated fallback
+    return _finalize_dp_transfers(param_name, src_metadata, dst_metadata, my_global_rank)
+
+
+def build_centralized_reshard_plan(
+    src_module: torch.nn.Module, dst_module: torch.nn.Module, num_experts: int = None
+) -> ReshardPlan:
+    """
+    Centralized planning: Rank 0 builds complete plan for all ranks, then scatters.
+    """
+    my_global_rank = dist.get_rank()
+    world_size = dist.get_world_size()
+
+    # Get process groups
+    src_pg = getattr(src_module, "pg_collection", None)
+    dst_pg = getattr(dst_module, "pg_collection", None)
+    if src_pg is None or dst_pg is None:
+        raise ValueError("Both modules must have pg_collection")
+
+    # Gather param metadata from all ranks
+    my_src_params = {name: p for name, p in src_module.named_parameters(recurse=True)}
+    my_dst_params = {name: p for name, p in dst_module.named_parameters(recurse=True)}
+
+    # Build PP layer prefix maps to be used for parameter name rewriting
+    src_layer_prefix_map = _build_layer_module_prefix_map(src_module)
+    dst_layer_prefix_map = _build_layer_module_prefix_map(dst_module)
+
+    my_src_metadata = [
+        extract_param_metadata(
+            p,
+            name,
+            my_global_rank,
+            src_pg,
+            num_experts=num_experts,
+            layer_module_prefix_map=src_layer_prefix_map,
+        )
+        for name, p in my_src_params.items()
+    ]
+    my_dst_metadata = [
+        extract_param_metadata(
+            p,
+            name,
+            my_global_rank,
+            dst_pg,
+            num_experts=num_experts,
+            layer_module_prefix_map=dst_layer_prefix_map,
+        )
+        for name, p in my_dst_params.items()
+    ]
+
+    all_src_metadata_by_rank = [None] * world_size
+    all_dst_metadata_by_rank = [None] * world_size
+    dist.all_gather_object(all_src_metadata_by_rank, my_src_metadata)
+    dist.all_gather_object(all_dst_metadata_by_rank, my_dst_metadata)
+
+    # Parameter to metadata maps keyed by resolved_name
+    src_param_metadata_by_rank = {}
+    dst_param_metadata_by_rank = {}
+    src_param_metadata: dict[str, list[ParameterMetadata]] = {}
+
+    for rank_id, rank_metadata_list in enumerate(all_src_metadata_by_rank):
+        src_param_metadata_by_rank[rank_id] = {m.resolved_name: m for m in rank_metadata_list}
+    for rank_id, rank_metadata_list in enumerate(all_dst_metadata_by_rank):
+        dst_param_metadata_by_rank[rank_id] = {m.resolved_name: m for m in rank_metadata_list}
+    for rank_metadata_list in all_src_metadata_by_rank:
+        for metadata in rank_metadata_list:
+            key = metadata.resolved_name
+            if key not in src_param_metadata:
+                src_param_metadata[key] = []
+            src_param_metadata[key].append(metadata)
+
+    # Build the plan on global rank 0 and broadcast to all ranks
+    if my_global_rank == 0:
+        plans_for_all_ranks = {r: ReshardPlan([], []) for r in range(world_size)}
+        # Global monotonically increasing ID for non-local transfers.
+        # This is shared between the corresponding send/recv ops so that
+        # NVSHMEM can build schedule.
+        next_task_id = 0
+
+        # Pipeline-parallel (PP) "mapping" is handled implicitly.
+        # Each rank contributes metadata only for the parameters it actually owns
+        # (i.e., the module partitioning for its PP stage). When PP sizes differ
+        # between source and destination, we don't compute an explicit stage-to-stage
+        # mapping here; instead, we iterate destination ranks and plan copies for the
+        # parameters present on those ranks. Any source rank that has the same logical
+        # parameter (matched by resolved_name) can serve as a sender (with DP balancing),
+        # and TP slicing is applied when applicable.
+        for dst_rank in range(world_size):
+            dst_rank_params = dst_param_metadata_by_rank.get(dst_rank, {})
+            for resolved_name, dst_metadata in dst_rank_params.items():
+                src_meta_list = src_param_metadata.get(resolved_name)
+                if not src_meta_list:
+                    raise RuntimeError(
+                        f"Destination parameter '{resolved_name}' on rank {dst_rank} "
+                        "not found in source model."
+                    )
+                # Choose a representative source metadata with DP round-robin balancing
+                src_metadata = select_src_metadata_balanced(src_meta_list, dst_metadata, dst_rank)
+                sources = _determine_source_ranks_for_dst_param(
+                    resolved_name, src_metadata, dst_metadata, dst_rank
+                )
+                for src_rank, src_slice, dst_slice in sources:
+                    task_id = next_task_id
+                    next_task_id += 1
+
+                    plans_for_all_ranks[dst_rank].recv_ops.append(
+                        TransferOp(
+                            param_name=dst_metadata.name,
+                            peer_rank=src_rank,
+                            is_send=False,
+                            my_slice=dst_slice,
+                            peer_slice=src_slice,
+                            task_id=task_id,
+                        )
+                    )
+                    plans_for_all_ranks[src_rank].send_ops.append(
+                        TransferOp(
+                            param_name=src_metadata.name,
+                            peer_rank=dst_rank,
+                            is_send=True,
+                            my_slice=src_slice,
+                            peer_slice=dst_slice,
+                            task_id=task_id,
+                        )
+                    )
+        plans_list = [plans_for_all_ranks[r] for r in range(world_size)]
+    else:
+        plans_list = [None] * world_size
+    torch.distributed.broadcast_object_list(plans_list, src=0)
+    my_plan = plans_list[my_global_rank]
+
+    logger.info(
+        f"Rank {my_global_rank}: Received plan - {len(my_plan.recv_ops)} recvs, "
+        f"{len(my_plan.send_ops)} sends"
+    )
+
+    return my_plan
diff --git a/megatron/core/resharding/refit.py b/megatron/core/resharding/refit.py
new file mode 100644
index 00000000000..491a42b9116
--- /dev/null
+++ b/megatron/core/resharding/refit.py
@@ -0,0 +1,85 @@
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+from __future__ import annotations
+
+"""
+High-level refit/reshard orchestration:
+- swap_model_weights: public API; accepts a backend name or CopyService and delegates.
+- reshard_model_weights: transport-agnostic core; builds/caches plan and executes.
+"""
+
+from typing import Any, Literal, Optional, Union
+
+from megatron.core import parallel_state
+from megatron.core.models.common.language_module.language_module import LanguageModule
+from megatron.core.utils import unwrap_model
+
+from . import build_centralized_reshard_plan, execute_reshard_plan
+from .copy_services.base import CopyService
+from .copy_services.gloo_copy_service import GlooCopyService
+from .copy_services.nccl_copy_service import NCCLCopyService
+
+# Supported refit backend names
+RefitBackendName = Literal["nccl", "gloo"]
+
+
+def swap_model_weights(
+    src_model: LanguageModule,
+    target_model: LanguageModule,
+    refit_method: Union[RefitBackendName, CopyService],
+):
+    """
+    Orchestrate weight swap/refit.
+    - refit_method can be:
+        * a string backend name (one of the supported refit backends), or
+        * a CopyService instance.
+    """
+    if isinstance(refit_method, CopyService):
+        service = refit_method
+        reshard_model_weights(src_model, target_model, service=service)
+    elif isinstance(refit_method, str):
+        if refit_method == "nccl":
+            service = NCCLCopyService()
+            reshard_model_weights(src_model, target_model, service=service)
+        elif refit_method == "gloo":
+            # Debug / fallback backend: run refit over CPU/Gloo instead of NCCL.
+            service = GlooCopyService()
+            reshard_model_weights(src_model, target_model, service=service)
+        else:
+            raise ValueError(f"Unknown refit_method '{refit_method}'")
+    else:
+        raise TypeError("refit_method must be a str backend name or a CopyService instance")
+
+
+def reshard_model_weights(
+    src_model: LanguageModule, target_model: LanguageModule, service: CopyService
+):
+    """Reshard and copy model weights from ``src_model`` to ``target_model`` using ``service``."""
+    # Handle list-wrapped modules used throughout training utils
+    src_lm = src_model[0] if isinstance(src_model, (list, tuple)) else src_model
+    tgt_lm = target_model[0] if isinstance(target_model, (list, tuple)) else target_model
+
+    num_experts = src_lm.config.num_moe_experts
+
+    # Unwrap to get owning modules (with parameters and pg_collection)
+    src_core = unwrap_model(src_lm)
+    tgt_core = unwrap_model(tgt_lm)
+
+    # Ensure pg_collection exists
+    if not hasattr(src_core, "pg_collection") or src_core.pg_collection is None:
+        raise RuntimeError("Source model missing pg_collection required for NCCL reshard")
+    if not hasattr(tgt_core, "pg_collection") or tgt_core.pg_collection is None:
+        raise RuntimeError("Target model missing pg_collection required for NCCL reshard")
+
+    # Fill missing DP group on the source using Megatron's parallel state if not provided
+    if getattr(src_core.pg_collection, "dp", None) is None:
+        src_core.pg_collection.dp = parallel_state.get_data_parallel_group()
+
+    # caching plan for reuse
+    cached_plan: Optional[Any] = getattr(tgt_core, "_cached_reshard_plan", None)
+    if cached_plan is None:
+        plan = build_centralized_reshard_plan(src_core, tgt_core, num_experts=num_experts)
+        setattr(tgt_core, "_cached_reshard_plan", plan)
+    else:
+        plan = cached_plan
+
+    execute_reshard_plan(plan, src_core, tgt_core, service=service)
diff --git a/megatron/core/resharding/utils.py b/megatron/core/resharding/utils.py
new file mode 100644
index 00000000000..7fc9e9ad3a7
--- /dev/null
+++ b/megatron/core/resharding/utils.py
@@ -0,0 +1,361 @@
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+from __future__ import annotations
+
+import logging
+from dataclasses import dataclass
+from typing import Mapping, Optional
+
+import torch
+import torch.distributed as dist
+
+# -----------------------------------------------------------------------------
+# Dataclasses used by the planner
+# -----------------------------------------------------------------------------
+
+
+@dataclass
+class TransferOp:
+    """Single logical send/recv operation used in a reshard plan."""
+
+    param_name: str
+    peer_rank: int  # Who to send to / receive from
+    is_send: bool  # True=send, False=recv
+
+    # Slice information (for when we execute the plan)
+    my_slice: tuple[slice, ...]  # My tensor slice
+    peer_slice: tuple[slice, ...]  # Peer's tensor slice (for reference)
+
+    # Optional global task identifier for advanced backends (e.g., NVSHMEM)
+    # When present, this ID is shared between the matching send/recv ops
+    # across ranks and can be used to build richer communication schedules.
+    task_id: int | None = None
+
+
+@dataclass
+class ParameterMetadata:
+    """Metadata for a parameter (used when param is on different rank)."""
+
+    name: str
+    shape: tuple[int, ...]
+    dtype: torch.dtype
+    element_size: int
+
+    # TP sharding info
+    is_tp: bool = False
+    partition_dim: int = 0
+    partition_stride: int = 1
+
+    # EP sharding info (fused/grouped MoE)
+    is_ep: bool = False
+    num_experts: Optional[int] = None
+
+    # Which rank owns this param
+    owner_rank: int = -1
+
+    tensor_parallel_group_ranks: list[int] | None = None
+    expert_parallel_group_ranks: list[int] | None = None
+    data_parallel_group_ranks: list[int] | None = None
+    pipeline_parallel_group_ranks: list[int] | None = None
+
+    # Canonical name for matching parameters across models with different EP/PP configurations.
+    #
+    # - EP (expert parallel): each rank owns a subset of experts with local indices
+    #   (e.g., rank 1 has "weight0" locally, but it's actually global expert 4). The raw param
+    #   name can't be used to match across source/destination because the same local name refers
+    #   to different global experts on different ranks. `resolved_name` remaps local expert indices
+    #   to global indices (e.g., "layer.experts.weight0" on rank 1 → "layer.experts.weight4").
+    #
+    # - PP (pipeline parallel): transformer blocks are often named with rank-local indices
+    #   (e.g., PP stage 1 may have "decoder.layers.0" even though that corresponds to global
+    #   layer 16). For reshard/refit across different PP partitionings (e.g., PP2 ↔ PP1),
+    #   `resolved_name` may be further canonicalized to global layer indices.
+    #
+    # For non-EP and non-PP cases, resolved_name == name.
+    resolved_name: Optional[str] = None
+    # The global expert index this parameter belongs to (e.g., 4 for global expert 4).
+    # Computed alongside resolved_name; None for non-EP or fused expert tensors.
+    global_expert_index: Optional[int] = None
+
+
+@dataclass
+class ShardingDescriptor:
+    """Descriptor for a sharded dimension for a parameter."""
+
+    name: str  # "tp" | "ep" | custom label
+    dim: int
+    src_stride: int
+    dst_stride: int
+    src_dim_ranks: list[int]
+    dst_dim_ranks: list[int]
+
+
+@dataclass
+class ReshardPlan:
+    """Reshard plan - operations for this rank."""
+
+    send_ops: list[TransferOp]
+    recv_ops: list[TransferOp]
+
+    def __str__(self):
+        return f"ReshardPlan(sends={len(self.send_ops)}, recvs={len(self.recv_ops)})"
+
+
+# -----------------------------------------------------------------------------
+# EP + Metadata helpers
+# -----------------------------------------------------------------------------
+
+
+def _get_rank_in_group(global_rank: int, group_ranks: list[int]) -> int:
+    try:
+        return group_ranks.index(global_rank)
+    except ValueError:
+        raise ValueError(
+            f"Rank {global_rank} not found in process group {group_ranks}. "
+            f"This likely indicates a configuration mismatch."
+        )
+
+
+def _detect_expert_index_from_param_name(param_name: str) -> Optional[int]:
+    """Extract expert index from parameter name for TEGroupedMLP per-expert tensors."""
+    for part in param_name.split('.'):
+        if (
+            part.startswith('weight')
+            and len(part) > len('weight')
+            and part[len('weight') :].isdigit()
+        ):
+            return int(part[len('weight') :])
+        if part.startswith('bias') and len(part) > len('bias') and part[len('bias') :].isdigit():
+            return int(part[len('bias') :])
+    return None
+
+
+def assign_ep_resolved_name_inplace(
+    meta: ParameterMetadata, *, base_name: str | None = None
+) -> None:
+    """
+    EP-only canonicalization for per-expert parameters.
+
+    Under Expert Parallelism (EP), each rank owns a subset of experts with local indices
+    (e.g., rank 1 has "weight0" locally, but it's actually global expert 4). The raw param
+    name can't be used to match across source/destination because the same local name refers
+    to different global experts on different ranks. This function remaps local expert indices
+    to global indices in `resolved_name` and sets `global_expert_index`.
+
+    Effects:
+    - Sets meta.resolved_name (defaults to base_name/meta.name for non-EP).
+    - Sets meta.global_expert_index for per-expert parameters; otherwise leaves it as None.
+    """
+    base = meta.name if base_name is None else base_name
+    meta.resolved_name = base
+    meta.global_expert_index = None
+    if not meta.is_ep:
+        return
+
+    local_idx = _detect_expert_index_from_param_name(base)
+    if local_idx is None:
+        # Fused experts tensor: leave name as-is; TP planner will handle slicing
+        return
+    ep_group = meta.expert_parallel_group_ranks
+    ep_size = len(ep_group)
+    ep_local_rank = ep_group.index(meta.owner_rank)
+    experts_per_rank = meta.num_experts // ep_size
+    global_idx = ep_local_rank * experts_per_rank + local_idx
+    meta.global_expert_index = global_idx
+
+    # Replace trailing integer in "weightK"/"biasK" with global_idx
+    parts = base.split('.')
+    new_parts = []
+    for p in parts:
+        if p.startswith('weight') and len(p) > len('weight') and p[len('weight') :].isdigit():
+            new_parts.append('weight' + str(global_idx))
+        elif p.startswith('bias') and len(p) > len('bias') and p[len('bias') :].isdigit():
+            new_parts.append('bias' + str(global_idx))
+        else:
+            new_parts.append(p)
+    meta.resolved_name = '.'.join(new_parts)
+
+
+def assign_resolved_name_inplace(
+    meta: ParameterMetadata,
+    *,
+    layer_module_prefix_map: Mapping[str, str] | None = None,
+    base_name: str | None = None,
+) -> None:
+    """Set meta.resolved_name so the planner can match the same weights across models.
+
+    It rewrites PP layer indices to global layer indices (when layer_module_prefix_map is
+    provided) and
+    rewrites EP per-expert indices (weightK/biasK) to global expert indices.
+    """
+    name = meta.name if base_name is None else base_name
+    if layer_module_prefix_map:
+        name = _resolve_global_layer_number_in_name(name, layer_module_prefix_map)
+    assign_ep_resolved_name_inplace(meta, base_name=name)
+
+
+def _build_layer_module_prefix_map(module: torch.nn.Module) -> dict[str, str]:
+    """Build a mapping local_module_prefix -> global_module_prefix for PP layer modules.
+
+    Megatron assigns a global, 1-indexed layer_number to each transformer layer module at
+    construction time (including PP/VPP/layout offsets). We convert that to the 0-indexed naming
+    convention used in parameter names and build a map such as:
+
+    - "decoder.layers.0" → "decoder.layers.16"  (if layer_number == 17)
+    """
+    prefix_map: dict[str, str] = {}
+    for module_name, submodule in module.named_modules():
+        if not module_name:
+            continue
+        layer_number = getattr(submodule, 'layer_number', None)
+        if not isinstance(layer_number, int):
+            continue
+        parts = module_name.split('.')
+        if not parts[-1].isdigit():
+            continue
+        parts[-1] = str(layer_number - 1)  # convert 1-indexed to 0-indexed
+        prefix_map[module_name] = '.'.join(parts)
+    return prefix_map
+
+
+def _resolve_global_layer_number_in_name(
+    name: str, layer_module_prefix_map: Mapping[str, str]
+) -> str:
+    """Rewrite a parameter name to use global layer indices (PP-aware).
+
+    Given a parameter name like decoder.layers.0.self_attention..., this function rewrites
+    the decoder.layers.0 prefix to the corresponding global layer index using the owning
+    layer module's layer_number.
+
+    Implementation:
+    - Build a {local_prefix -> global_prefix} map once (outside the per-parameter loop).
+    - Perform a longest-prefix match replacement so we only rewrite the module path portion.
+    """
+    if not layer_module_prefix_map:
+        return name
+
+    parts = name.split('.')
+    for i in range(len(parts), 0, -1):
+        prefix = '.'.join(parts[:i])
+        mapped = layer_module_prefix_map.get(prefix)
+        if mapped is None:
+            continue
+        rest = '.'.join(parts[i:])
+        return mapped if not rest else mapped + '.' + rest
+    return name
+
+
+def extract_param_metadata(
+    param: torch.nn.Parameter,
+    param_name: str,
+    owner_rank: int,
+    pg_collection,
+    num_experts: Optional[int] = None,
+    layer_module_prefix_map: Mapping[str, str] | None = None,
+) -> ParameterMetadata:
+    """Extract metadata from a parameter for cross-rank communication."""
+    # TP flags from attributes (set by Megatron linear layers)
+    is_tp = bool(getattr(param, 'tensor_model_parallel', False))
+    partition_dim = int(getattr(param, 'partition_dim', 0))
+    partition_stride = int(getattr(param, 'partition_stride', 1))
+
+    # SwiGLU/GLU compatibility: For gated linear units, fc1 stores interleaved [gate, up] portions
+    # and requires partition_stride=2 for correct resharding. New models set this at construction
+    # time (MLP sets partition_stride=2 on weight when gated_linear_unit=True). For legacy models
+    # where stride=1 was left as default, we apply stride=2 as a fallback for fc1 parameters.
+    # This is safe because: (1) gated models need it, and (2) non-gated models have smaller fc1
+    # and stride doesn't affect single-block transfers.
+    # if 'mlp.linear_fc1' in param_name and is_tp and partition_stride == 1:
+    #     partition_stride = 2
+
+    # EP detection: Megatron convention - expert params are not allreduced
+    is_ep = not bool(getattr(param, 'allreduce', True))
+
+    tensor_parallel_group_ranks: list[int] | None = None
+    expert_parallel_group_ranks: list[int] | None = None
+    data_parallel_group_ranks: list[int] | None = None
+    pipeline_parallel_group_ranks: list[int] | None = None
+
+    if is_ep:
+        expert_parallel_group_ranks = dist.get_process_group_ranks(pg_collection.ep)
+        # For MoE params, prefer expert TP group when available, else regular TP
+        if is_tp and hasattr(pg_collection, 'expt_tp') and pg_collection.expt_tp is not None:
+            tensor_parallel_group_ranks = dist.get_process_group_ranks(pg_collection.expt_tp)
+        elif is_tp and hasattr(pg_collection, 'tp') and pg_collection.tp is not None:
+            tensor_parallel_group_ranks = dist.get_process_group_ranks(pg_collection.tp)
+        data_parallel_group_ranks = dist.get_process_group_ranks(pg_collection.dp)
+    elif is_tp:
+        # Non-EP: use regular TP group
+        if hasattr(pg_collection, 'tp') and pg_collection.tp is not None:
+            tensor_parallel_group_ranks = dist.get_process_group_ranks(pg_collection.tp)
+        data_parallel_group_ranks = dist.get_process_group_ranks(pg_collection.dp)
+    else:
+        data_parallel_group_ranks = dist.get_process_group_ranks(pg_collection.dp)
+
+    if hasattr(pg_collection, 'pp') and pg_collection.pp is not None:
+        pipeline_parallel_group_ranks = dist.get_process_group_ranks(pg_collection.pp)
+    else:
+        pipeline_parallel_group_ranks = list(range(dist.get_world_size()))
+
+    meta = ParameterMetadata(
+        name=param_name,
+        shape=tuple(param.shape),
+        dtype=param.dtype,
+        element_size=param.element_size(),
+        is_tp=is_tp,
+        partition_dim=partition_dim,
+        partition_stride=partition_stride,
+        is_ep=is_ep,
+        num_experts=num_experts,
+        owner_rank=owner_rank,
+        tensor_parallel_group_ranks=tensor_parallel_group_ranks,
+        expert_parallel_group_ranks=expert_parallel_group_ranks,
+        data_parallel_group_ranks=data_parallel_group_ranks,
+        pipeline_parallel_group_ranks=pipeline_parallel_group_ranks,
+    )
+    assign_resolved_name_inplace(
+        meta, layer_module_prefix_map=layer_module_prefix_map, base_name=param_name
+    )
+
+    return meta
+
+
+def select_src_metadata_balanced(
+    src_meta_list: list[ParameterMetadata], dst_metadata: ParameterMetadata, dst_rank: int
+) -> ParameterMetadata:
+    """Choose a representative source `ParameterMetadata` for a destination rank.
+
+    Multiple source data-parallel (DP) groups may hold the same logical parameter.
+    To avoid always reading from the same group, we:
+      - bucket `src_meta_list` by their DP group (tuple of ranks)
+      - if there is only one bucket, just return the first entry
+      - otherwise, use the destination rank's global rank to select a source
+        DP group in a round-robin fashion, ensuring even distribution of load
+        across all source DP groups.
+    """
+    if not src_meta_list:
+        raise ValueError("src_meta_list must be non-empty")
+
+    # Group source metadata by their DP group layout so we can balance across groups.
+    #   (dp_rank0, dp_rank1, ...) -> [ParameterMetadata for that DP group]
+    grouped_by_dp: dict[tuple[int, ...], list[ParameterMetadata]] = {}
+    for meta in src_meta_list:
+        dp_group = tuple(meta.data_parallel_group_ranks or [])
+        grouped_by_dp.setdefault(dp_group, []).append(meta)
+
+    # Fast path: only one DP layout present; no balancing necessary.
+    if len(grouped_by_dp) == 1:
+        return src_meta_list[0]
+
+    # Use the destination rank's global rank to select a source DP group in a
+    # round-robin fashion. This ensures that even when multiple destination ranks
+    # have the same DP index (e.g., ranks 0,1,2,3 all being at position 0 in their
+    # respective DP groups), they still get distributed across different source
+    # DP groups based on their global rank.
+    sorted_dp_groups = sorted(grouped_by_dp.keys())
+    chosen_group = sorted_dp_groups[dst_rank % len(sorted_dp_groups)]
+
+    # Within the chosen group, any representative metadata works; use the first.
+    return grouped_by_dp[chosen_group][0]
+
+
+logger = logging.getLogger(__name__)
diff --git a/megatron/core/ssm/mamba_block.py b/megatron/core/ssm/mamba_block.py
index 29e9b123674..9e41aca8253 100644
--- a/megatron/core/ssm/mamba_block.py
+++ b/megatron/core/ssm/mamba_block.py
@@ -18,6 +18,7 @@
 from megatron.core.extensions.transformer_engine import TENorm
 from megatron.core.fp8_utils import get_fp8_context
 from megatron.core.inference.contexts import BaseInferenceContext
+from megatron.core.packed_seq_params import PackedSeqParams
 from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.ssm.mamba_hybrid_layer_allocation import Symbols as LayerSymbols
 from megatron.core.ssm.mamba_hybrid_layer_allocation import allocate_layers
@@ -148,7 +149,10 @@ def __init__(
                 elif layer_type == LayerSymbols.MOE:
                     # Transformer layers apply their own pp_layer_offset
                     layer = build_module(
-                        submodules.moe_layer, config=self.config, layer_number=i + 1
+                        submodules.moe_layer,
+                        config=self.config,
+                        layer_number=i + 1,
+                        pg_collection=pg_collection,
                     )
                 else:
                     assert False, "unexpected layer_type"
@@ -206,6 +210,7 @@ def forward(
         rotary_pos_emb: Optional[Tensor] = None,
         *,
         inference_params: Optional[BaseInferenceContext] = None,
+        packed_seq_params: Optional[PackedSeqParams] = None,
     ):
         """
         Forward function of the MambaStack class.
@@ -287,12 +292,14 @@ def forward(
                             inference_context=inference_context,
                             rotary_pos_emb=rotary_pos_emb,
                             sequence_len_offset=sequence_len_offset,
+                            packed_seq_params=packed_seq_params,
                         )
                     else:  # MambaLayer
                         hidden_states = layer(
                             hidden_states=hidden_states,
                             attention_mask=attention_mask,
                             inference_context=inference_context,
+                            packed_seq_params=packed_seq_params,
                         )
 
                 # The attention layer (currently a simplified transformer layer)
diff --git a/megatron/core/ssm/mamba_context_parallel.py b/megatron/core/ssm/mamba_context_parallel.py
index d59d451fba8..3925f8bd8df 100644
--- a/megatron/core/ssm/mamba_context_parallel.py
+++ b/megatron/core/ssm/mamba_context_parallel.py
@@ -1,10 +1,14 @@
 # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
 
+from typing import Optional
+
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 
+from megatron.core.packed_seq_params import PackedSeqParams
 from megatron.core.tensor_parallel import all_to_all
+from megatron.core.utils import is_te_min_version
 
 try:
     from einops import repeat
@@ -13,6 +17,16 @@
 except ImportError:
     HAVE_EINOPS = False
 
+try:
+    # Register the TE CUDA kernels
+    import transformer_engine  # pylint: disable=unused-import
+
+    # Alias the PyTorch wrapper so we can call tex.* APIs
+    import transformer_engine_torch as tex
+except ImportError:
+    # TE isn’t installed or the torch wrapper is missing
+    tex = None
+
 
 class MambaContextParallel:
     """
@@ -116,7 +130,9 @@ def __init__(
         # and also `nheads_local_tpcp = nheads_local_tp // cp_size` whilst ngroups_local_tpcp is
         # either 1 or `ngroups_local_tp // cp_size`
 
-    def pre_conv_ssm(self, input_: torch.Tensor) -> torch.Tensor:
+    def pre_conv_ssm(
+        self, input_: torch.Tensor, packed_seq_params: Optional[PackedSeqParams] = None
+    ) -> torch.Tensor:
         """Method to be applied before the convolution and SSM"""
         if self.cp_size == 1:
             return input_
@@ -171,17 +187,20 @@ def pre_conv_ssm(self, input_: torch.Tensor) -> torch.Tensor:
 
         output = torch.cat([z, x, B, C, dt], dim=-1)
         # TODO(duncan): for hybrid models, consider isolating load-balancing to attention layers
-        output = _undo_attention_load_balancing(output, self.cp_size)
+        output = _undo_attention_load_balancing(output, self.cp_size, packed_seq_params)
 
         return output
 
-    def post_conv_ssm(self, input_: torch.Tensor) -> torch.Tensor:
+    def post_conv_ssm(
+        self, input_: torch.Tensor, packed_seq_params: Optional[PackedSeqParams] = None
+    ) -> torch.Tensor:
         """Method to be applied after the convolution and SSM"""
         if self.cp_size == 1:
             return input_
         else:
             return _all_to_all_hp2cp(
-                _redo_attention_load_balancing(input_, self.cp_size), self.cp_group
+                _redo_attention_load_balancing(input_, self.cp_size, packed_seq_params),
+                self.cp_group,
             )
 
     def conv1d(self, input_: torch.Tensor) -> torch.Tensor:
@@ -357,33 +376,78 @@ def _all_to_all_hp2cp(
     return output
 
 
-def _undo_attention_load_balancing(input_: torch.Tensor, cp_size: int) -> torch.Tensor:
+def _undo_attention_load_balancing(
+    input_: torch.Tensor, cp_size: int, packed_seq_params: Optional[PackedSeqParams] = None
+) -> torch.Tensor:
     """
-    Undoes the context parallel attention load balancing
-    For example, for cp_size=3, converts 162534 to 123456 for sequential
-    processing by the convolution and SSM.
+    Undoes the context parallel attention load balancing.
+    For example (non-packed), for cp_size=3, converts 162534 to 123456 for
+    sequential processing by the convolution and SSM.
     """
-    num_chunks_div_2 = cp_size
-    num_chunks = num_chunks_div_2 * 2
-    chunks = torch.chunk(input_, chunks=num_chunks, dim=0)
-    order = [2 * i for i in range(num_chunks_div_2)] + [
-        num_chunks - 2 * i - 1 for i in range(num_chunks_div_2)
-    ]
-    reordered_chunks = [chunks[i] for i in order]
-    return torch.cat(reordered_chunks, dim=0)
+    if packed_seq_params is None:
+        num_chunks_div_2 = cp_size
+        num_chunks = num_chunks_div_2 * 2
+        chunks = torch.chunk(input_, chunks=num_chunks, dim=0)
+        order = [2 * i for i in range(num_chunks_div_2)] + [
+            num_chunks - 2 * i - 1 for i in range(num_chunks_div_2)
+        ]
+        reordered_chunks = [chunks[i] for i in order]
+        return torch.cat(reordered_chunks, dim=0)
+    else:
+        assert tex is not None and is_te_min_version("1.10.0"), (
+            "Please update Transformer Engine to >= 1.10 to use "
+            "Context Parallel with THD format data"
+        )
+        if packed_seq_params.cu_seqlens_q_padded is not None:
+            cu_seqlens = packed_seq_params.cu_seqlens_q_padded
+        else:
+            cu_seqlens = packed_seq_params.cu_seqlens_q
+        total_tokens = input_.size(0)
+        assert total_tokens % cp_size == 0
+        seqlen_per_rank = total_tokens // cp_size
+        output = torch.empty_like(input_)
+        for cp_rank in range(cp_size):
+            start = cp_rank * seqlen_per_rank
+            end = start + seqlen_per_rank
+            index = tex.thd_get_partitioned_indices(cu_seqlens, total_tokens, cp_size, cp_rank)
+            output[index] = input_[start:end]
+        return output
 
 
-def _redo_attention_load_balancing(input_: torch.Tensor, cp_size: int) -> torch.Tensor:
+def _redo_attention_load_balancing(
+    input_: torch.Tensor, cp_size: int, packed_seq_params: Optional[PackedSeqParams] = None
+) -> torch.Tensor:
     """
-    Redo the context parallel attention load balancing
-    For example, for cp_size=3, converts 123456 to 162534 for efficient
-    processing by attention.
+    Redo the context parallel attention load balancing.
+    For example (non-packed), for cp_size=3, converts 123456 to 162534 for
+    efficient processing by attention.
     """
-    num_chunks_div_2 = cp_size
-    num_chunks = num_chunks_div_2 * 2
-    chunks = torch.chunk(input_, chunks=num_chunks, dim=0)
-    order = [None] * num_chunks
-    order[::2] = range(num_chunks_div_2)  # order[even]
-    order[1::2] = reversed(range(num_chunks_div_2, num_chunks))  # order[odd]
-    reordered_chunks = [chunks[i] for i in order]
-    return torch.cat(reordered_chunks, dim=0)
+    if packed_seq_params is None:
+        num_chunks_div_2 = cp_size
+        num_chunks = num_chunks_div_2 * 2
+        chunks = torch.chunk(input_, chunks=num_chunks, dim=0)
+        order = [None] * num_chunks
+        order[::2] = range(num_chunks_div_2)  # order[even]
+        order[1::2] = reversed(range(num_chunks_div_2, num_chunks))  # order[odd]
+        reordered_chunks = [chunks[i] for i in order]
+        return torch.cat(reordered_chunks, dim=0)
+    else:
+        assert tex is not None and is_te_min_version("1.10.0"), (
+            "Please update Transformer Engine to >= 1.10 to use "
+            "Context Parallel with THD format data"
+        )
+        if packed_seq_params.cu_seqlens_q_padded is not None:
+            cu_seqlens = packed_seq_params.cu_seqlens_q_padded
+        else:
+            cu_seqlens = packed_seq_params.cu_seqlens_q
+        total_tokens = input_.size(0)
+        assert total_tokens % cp_size == 0
+        seqlen_per_rank = total_tokens // cp_size
+        index = torch.empty(total_tokens, device=input_.device, dtype=torch.int32)
+        for cp_rank in range(cp_size):
+            start = cp_rank * seqlen_per_rank
+            end = start + seqlen_per_rank
+            index[start:end] = tex.thd_get_partitioned_indices(
+                cu_seqlens, total_tokens, cp_size, cp_rank
+            )
+        return input_.index_select(0, index)
diff --git a/megatron/core/ssm/mamba_layer.py b/megatron/core/ssm/mamba_layer.py
index 6514050ac63..48ea84566d5 100644
--- a/megatron/core/ssm/mamba_layer.py
+++ b/megatron/core/ssm/mamba_layer.py
@@ -14,6 +14,7 @@
 from megatron.core.dist_checkpointing.mapping import ShardedStateDict
 from megatron.core.dist_checkpointing.utils import apply_prefix_mapping
 from megatron.core.inference.contexts import BaseInferenceContext
+from megatron.core.packed_seq_params import PackedSeqParams
 from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.transformer.identity_op import IdentityOp
 from megatron.core.transformer.module import GraphableMegatronModule
@@ -96,6 +97,7 @@ def forward(
         rotary_pos_emb: Optional[Tensor] = None,  # Not used in MambaLayer
         *,
         inference_params: Optional[BaseInferenceContext] = None,
+        packed_seq_params: Optional[PackedSeqParams] = None,
     ):
         """
         Perform a forward pass through the Mamba layer.
@@ -124,7 +126,9 @@ def forward(
         hidden_states = hidden_states.to(dtype=self.config.params_dtype)
         hidden_states = self.norm(hidden_states)
 
-        mixer_out_with_bias = self.mixer(hidden_states, inference_context=inference_context)
+        mixer_out_with_bias = self.mixer(
+            hidden_states, inference_context=inference_context, packed_seq_params=packed_seq_params
+        )
 
         with self.bias_dropout_add_exec_handler():
             hidden_states = self.mamba_bda(
@@ -176,11 +180,11 @@ def _should_call_local_cudagraph(self, *args, **kwargs):
         # Training and validation mode CUDA graphs
         if hasattr(self, 'cudagraph_manager') and kwargs.get('inference_context') is None:
             return True
-        # Inference mode. CUDA graphs are used in the decode phase only, when attn mask is None
         elif not self.training and (
             hasattr(self, 'cudagraph_manager')
             and kwargs.get('attention_mask') is None
-            and kwargs['inference_context'].is_decode_only()
+            and kwargs.get('inference_context') is not None
         ):
-            return True
+            using_cuda_graph = kwargs['inference_context'].using_cuda_graph_this_step()
+            return using_cuda_graph
         return False
diff --git a/megatron/core/ssm/mamba_mixer.py b/megatron/core/ssm/mamba_mixer.py
index c9ccf826ad0..1910c96cb11 100644
--- a/megatron/core/ssm/mamba_mixer.py
+++ b/megatron/core/ssm/mamba_mixer.py
@@ -19,6 +19,12 @@
 from megatron.core.dist_checkpointing import ShardedTensor
 from megatron.core.dist_checkpointing.mapping import ReplicaId, ShardedTensorFactory
 from megatron.core.inference.contexts import BaseInferenceContext, DynamicInferenceContext
+from megatron.core.inference.contexts.attention_context.triton.tensor_ops import (
+    tensor_get_slice_after,
+    tensor_masked_update,
+    tensor_merge,
+)
+from megatron.core.packed_seq_params import PackedSeqParams
 from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.tensor_parallel import get_cuda_rng_tracker
 from megatron.core.transformer import TransformerConfig
@@ -30,10 +36,10 @@
     sharded_state_dict_default,
 )
 from megatron.core.utils import (
-    check_mamba_sequence_packing_support,
     deprecate_inference_params,
+    is_causal_conv1d_min_version,
+    is_mamba_min_version,
     log_single_rank,
-    maybe_cat,
 )
 
 from .mamba_context_parallel import MambaContextParallel
@@ -405,6 +411,7 @@ def forward(
         inference_context=None,
         *,
         inference_params: Optional[BaseInferenceContext] = None,
+        packed_seq_params: Optional[PackedSeqParams] = None,
     ):
         """
         hidden_states: (nL, B, D) / (L B D)
@@ -420,148 +427,181 @@ def forward(
 
         if in_inference_mode:
             if inference_context.is_dynamic_batching():
-                return self.dynamic_inference(hidden_states, inference_context)
+                return self._dynamic_inference(hidden_states, inference_context)
             else:
                 assert inference_context.is_static_batching()
                 assert not self.config.sequence_parallel
                 conv_state, ssm_state = self._get_states_from_cache(inference_context, batch)
                 if inference_context.seqlen_offset > 0:
                     # The states are updated inplace
-                    out, out_bias = self.decode(hidden_states, conv_state, ssm_state)
+                    out, out_bias = self._decode(hidden_states, conv_state, ssm_state)
                     return out, out_bias
 
         zxBCdt, _ = self.in_proj(hidden_states)
 
-        zxBCdt = self.cp.pre_conv_ssm(zxBCdt)
+        zxBCdt = self.cp.pre_conv_ssm(zxBCdt, packed_seq_params)
 
         if in_inference_mode or not self.use_mem_eff_path:
             # TODO(ksanthanam): Consider deprecating this path for training
-            y = self.ssm_prefill(zxBCdt, conv_state=conv_state, ssm_state=ssm_state)
+            assert packed_seq_params is None, (
+                "Training with packed sequences is not supported "
+                "in the non-memory-efficient code path."
+            )
+            y = self._ssm_prefill(zxBCdt, conv_state=conv_state, ssm_state=ssm_state)
         else:
             assert ssm_state is None
-            y = self.ssm_training(zxBCdt)
+            y = self._ssm_training(zxBCdt, packed_seq_params)
 
         out, out_bias = self.out_proj(y)
 
         return out, out_bias
 
-    def dynamic_inference(self, hidden_states: torch.Tensor, context: DynamicInferenceContext):
+    def _dynamic_inference(self, hidden_states: torch.Tensor, context: DynamicInferenceContext):
         """
         Executes dynamic inference by separating decode and prefill requests and
         running them independently. Also runs the chunked prefill request independently
         if it exists.
         """
         sequence_packing_available, reason_for_no_sequence_packing = (
-            check_mamba_sequence_packing_support()
+            _check_mamba_sequence_packing_support(for_inference_not_training=True)
         )
         assert sequence_packing_available, reason_for_no_sequence_packing
 
         conv_state, ssm_state = context.mamba_states_cache(self.layer_number - self.pp_layer_offset)
 
-        # Fast path: decode-only
-        if context.is_decode_only():
-            batch_indices = context.mamba_metadata.request_to_mamba_state_idx_cudagraph_only[
-                : context.padded_active_token_count
-            ]
-            out, out_bias = self.decode(
-                hidden_states, conv_state, ssm_state, batch_indices=batch_indices
-            )
-            return out, out_bias
+        padded_dims = context.padded_batch_dimensions
+
+        token_count = padded_dims.token_count
+        decode_req_count = padded_dims.decode_req_count
+        prefill_req_count = padded_dims.prefill_req_count
+        has_explicit_chunked_prefill_req = padded_dims.has_explicit_chunked_prefill_req
 
-        # Compute input projection before splitting into prefill and decode
-        # to ensure sequence parallel all-gather.
+        # Input projection
         zxBCdt, _ = self.in_proj(hidden_states)
 
-        # Compute split between decode and prefill.
-        seq_idx, cu_seqlens, return_varlen_states = self._get_varlen_generation_state(context)
-        active_query_lengths = context.request_query_lengths[
-            context.paused_request_count : context.total_request_count
-        ]
-        batch_indices = context.mamba_metadata.request_to_mamba_state_idx
-
-        # First request with query len > 1 is prefill-start.
-        first_prefill_token_idx = torch.nonzero(active_query_lengths > 1)[0].int()
-
-        # Process decode requests if there are any.
-        if first_prefill_token_idx > 0:
-            zxBCdt_decode = zxBCdt[:first_prefill_token_idx]
-            batch_indices_decode = batch_indices[:first_prefill_token_idx]
-            y_decode = self.ssm_decode(
-                zxBCdt_decode.transpose(0, 1), conv_state, ssm_state, batch_indices_decode
+        if decode_req_count > 0 and prefill_req_count == 0:
+            # Decode-only
+            y = self._ssm_decode(
+                zxBCdt.transpose(0, 1),
+                conv_state,
+                ssm_state,
+                context.mamba_metadata.batch_indices_decode,
             ).transpose(0, 1)
+        elif decode_req_count == 0 and (prefill_req_count > 0 or has_explicit_chunked_prefill_req):
+            if prefill_req_count > 0:
+                # Prefill only (regular prefill requests)
+                y_prefill = self._ssm_prefill(
+                    zxBCdt,
+                    conv_state=conv_state,
+                    ssm_state=ssm_state,
+                    seq_idx=context.mamba_metadata.seq_idx,
+                    cu_seqlens=context.mamba_metadata.cu_seqlens,
+                    return_varlen_states=True,
+                    batch_indices=context.mamba_metadata.batch_indices_prefill,
+                )
+            if has_explicit_chunked_prefill_req:
+                # Prefill only (chunked prefill request)
+                zxBCdt_chunked_prefill = torch.empty_like(zxBCdt)
+                tensor_get_slice_after(
+                    zxBCdt,
+                    zxBCdt_chunked_prefill,
+                    context.mamba_metadata.device_chunked_prefill,
+                    check_bounds=False,
+                )
+                y_chunked_prefill = self._ssm_prefill(
+                    zxBCdt_chunked_prefill[: context.mamba_metadata.device_chunked_prefill[1]],
+                    conv_state=conv_state,
+                    ssm_state=ssm_state,
+                    batch_indices=context.mamba_metadata.batch_indices_chunked_prefill,
+                    is_chunked_prefill=True,
+                )
+            if prefill_req_count > 0 and has_explicit_chunked_prefill_req:
+                # Merge regular prefill and chunked prefill parts
+                tensor_merge(
+                    y_prefill, y_chunked_prefill, context.mamba_metadata.device_chunked_prefill
+                )
+                y = y_prefill
+            elif prefill_req_count > 0:
+                # Prefill-only without chunked prefill
+                y = y_prefill
+            else:
+                # Prefill-only with only chunked prefill
+                y = y_chunked_prefill
         else:
-            y_decode = None
-
-        active_token_count = context.active_token_count
-        active_request_count = context.get_active_request_count()
-        padded_active_token_count = context.padded_active_token_count
-
-        # Process the chunked prefill request if it exists.
-        if context.chunked_prefill_request_id != -1:
-            chunked_prefill_request_token_count = active_query_lengths[-1]
-            zxBCdt_chunked_prefill = zxBCdt[
-                active_token_count - chunked_prefill_request_token_count : active_token_count
-            ]
-
-            batch_index_chunked_prefill = batch_indices[
-                context.get_index_of_chunked_prefill_request()
-            ]
-
-            y_prefill_chunked = self.ssm_prefill(
-                zxBCdt_chunked_prefill,
-                conv_state=conv_state[batch_index_chunked_prefill].unsqueeze(0),
-                ssm_state=ssm_state[batch_index_chunked_prefill].unsqueeze(0),
-                is_chunked_prefill=True,
-            )
-
-            # Remove the chunked prefill request from the request / token counts so
-            # the subsequent prefill computation ignores the chunked prefill request.
-            active_token_count -= chunked_prefill_request_token_count
-            active_request_count -= 1
-        else:
-            y_prefill_chunked = None
-
-        # Process non-chunked prefill requests if there are any.
-        if (remaining_prefill_tokens := active_token_count - first_prefill_token_idx) > 0:
-            zxBCdt_prefill = zxBCdt[first_prefill_token_idx:active_token_count]
-            cu_seqlens_prefill = F.pad(
-                cu_seqlens[first_prefill_token_idx + 1 : active_request_count + 1]
-                - first_prefill_token_idx,
-                (1, 0),
+            # Mix of decode and prefill
+            zxBCdt_prefill = torch.empty_like(zxBCdt)
+            tensor_get_slice_after(
+                zxBCdt,
+                zxBCdt_prefill,
+                context.mamba_metadata.device_decode_prefill,
+                check_bounds=False,
             )
-            seq_idx_prefill = (
-                seq_idx[:, first_prefill_token_idx:active_token_count] - first_prefill_token_idx
+            # Decode requests
+            y_decode = self._ssm_decode(
+                zxBCdt[:decode_req_count].transpose(0, 1),
+                conv_state,
+                ssm_state,
+                context.mamba_metadata.batch_indices_decode,
+            ).transpose(0, 1)
+            y_prefill, y_chunked_prefill = None, None
+            if prefill_req_count > 0:
+                # Regular prefill requests
+                y_prefill = self._ssm_prefill(
+                    zxBCdt_prefill,
+                    conv_state=conv_state,
+                    ssm_state=ssm_state,
+                    seq_idx=context.mamba_metadata.seq_idx,
+                    cu_seqlens=context.mamba_metadata.cu_seqlens,
+                    return_varlen_states=True,
+                    batch_indices=context.mamba_metadata.batch_indices_prefill,
+                )
+            if has_explicit_chunked_prefill_req:
+                # Chunked prefill request
+                zxBCdt_chunked_prefill = torch.empty_like(zxBCdt_prefill)
+                tensor_get_slice_after(
+                    zxBCdt_prefill,
+                    zxBCdt_chunked_prefill,
+                    context.mamba_metadata.device_chunked_prefill,
+                    check_bounds=False,
+                )
+                y_chunked_prefill = self._ssm_prefill(
+                    zxBCdt_chunked_prefill[: context.mamba_metadata.device_chunked_prefill[1]],
+                    conv_state=conv_state,
+                    ssm_state=ssm_state,
+                    batch_indices=context.mamba_metadata.batch_indices_chunked_prefill,
+                    is_chunked_prefill=True,
+                )
+            if prefill_req_count > 0 and has_explicit_chunked_prefill_req:
+                # Merge regular prefill and chunked prefill parts
+                assert y_prefill is not None
+                assert y_chunked_prefill is not None
+                tensor_merge(
+                    y_prefill, y_chunked_prefill, context.mamba_metadata.device_chunked_prefill
+                )
+            elif has_explicit_chunked_prefill_req:
+                # Chunked prefill only
+                assert y_prefill is None
+                assert y_chunked_prefill is not None
+                y_prefill = y_chunked_prefill
+            else:
+                # Regular prefill only; y_prefill is already set, nothing more to be done
+                assert y_prefill is not None
+            # Merge decode and prefill parts
+            y = torch.empty(
+                [token_count, 1, y_prefill.shape[-1]],
+                dtype=y_prefill.dtype,
+                device=y_prefill.device,
             )
-            batch_indices_prefill = batch_indices[first_prefill_token_idx:active_request_count]
-
-            y_prefill = self.ssm_prefill(
-                zxBCdt_prefill,
-                conv_state=conv_state,
-                ssm_state=ssm_state,
-                seq_idx=seq_idx_prefill,
-                cu_seqlens=cu_seqlens_prefill,
-                return_varlen_states=return_varlen_states,
-                batch_indices=batch_indices_prefill,
+            tensor_merge(
+                y_decode, y_prefill, context.mamba_metadata.device_decode_prefill, output_tensor=y
             )
-        else:
-            y_prefill = None
-
-        # Assemble the final output by concatenating the decode output,
-        # non-chunked prefill output, and chunked prefill output together.
-        y_prefill = maybe_cat(y_prefill, y_prefill_chunked, required=True)
-        y = maybe_cat(y_decode, y_prefill, required=True)
-
-        # Add padding tokens back if necessary. Note that we use the context active token count
-        # in case we modified the local count for chunked prefill above.
-        if (num_padding_tokens := padded_active_token_count - context.active_token_count) > 0:
-            y = torch.cat((y, y.new_zeros(num_padding_tokens, *y.shape[1:])), dim=0)
 
-        # The output projection will perform the sequence parallel reduce-scatter if necessary.
+        # Output projection
         out, out_bias = self.out_proj(y)
 
         return out, out_bias
 
-    def decode(
+    def _decode(
         self, hidden_states, conv_state, ssm_state, batch_indices: Optional[torch.Tensor] = None
     ) -> Tuple[torch.Tensor, torch.Tensor]:
         """Performs inference step for decoding."""
@@ -582,7 +622,7 @@ def decode(
 
         assert self.cp.cp_size == 1, "Context parallel not supported for Mamba inferenece decode"
 
-        y = self.ssm_decode(
+        y = self._ssm_decode(
             zxBCdt, conv_state=conv_state, ssm_state=ssm_state, batch_indices=batch_indices
         )
 
@@ -595,7 +635,9 @@ def decode(
 
         return out, out_bias
 
-    def ssm_training(self, zxBCdt: torch.Tensor) -> torch.Tensor:
+    def _ssm_training(
+        self, zxBCdt: torch.Tensor, packed_seq_params: Optional[PackedSeqParams] = None
+    ) -> torch.Tensor:
         """
         Performs SSM computation for training step.
 
@@ -614,6 +656,14 @@ def ssm_training(self, zxBCdt: torch.Tensor) -> torch.Tensor:
         if self.conv1d.bias is not None:
             self.conv1d.bias.data_ptr()
 
+        seq_idx = None
+        if packed_seq_params is not None:
+            sequence_packing_available, reason_for_no_sequence_packing = (
+                _check_mamba_sequence_packing_support(for_inference_not_training=False)
+            )
+            assert sequence_packing_available, reason_for_no_sequence_packing
+            seq_idx = self._create_packed_seq_idx(packed_seq_params, zxBCdt.shape[1])
+
         y = mamba_split_conv1d_scan_combined(
             zxBCdt,
             rearrange(self.cp.get_conv1d_weight(), "d 1 w -> d w"),
@@ -630,17 +680,48 @@ def ssm_training(self, zxBCdt: torch.Tensor) -> torch.Tensor:
             headdim=None if self.D_has_hdim else self.headdim,
             ngroups=self.cp.ngroups_local_tpcp,
             norm_before_gate=self.norm_before_gate,
+            seq_idx=seq_idx,
         )
 
         y = rearrange(y, "b l d -> l b d").contiguous()
-        y = self.cp.post_conv_ssm(y)
+        y = self.cp.post_conv_ssm(y, packed_seq_params)
 
         if self.rmsnorm:
             y = self.norm(y)
 
         return y
 
-    def ssm_prefill(
+    def _create_packed_seq_idx(self, packed_seq_params: PackedSeqParams, total_tokens: int):
+        """
+        If total_tokens is 16 (for example), this method takes packed_seq_params.cu_seqlens_q_padded
+        (or cu_seqlens_q) which is of the form [0, 5, 7, 11] and returns a tensor of the form
+        [0, 0, 0, 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3],
+        which is [0]*(5-0) + [1]*(7-5) + [2]*(11-7) + [3]*(16-11)
+        In the above example, there are three sequences in the pack.
+        In general, the output has an additional sequence index (e.g. 0, 1, 2, 3) so that any tokens
+        beyond the last padded input sequence are accounted for as an extra sequence. However, If
+        cu_seqlens_q_padded[-1] == max_seqlen then this additional sequence index will not be
+        included.
+        """
+        # Example: [0, 5, 7, 11] -> [0, 5, 7, 11, 16]
+        if packed_seq_params.cu_seqlens_q_padded is not None:
+            cu_seqlens = packed_seq_params.cu_seqlens_q_padded
+        else:
+            cu_seqlens = packed_seq_params.cu_seqlens_q
+        total_tokens_tensor = torch.tensor(
+            [total_tokens], dtype=cu_seqlens.dtype, device=cu_seqlens.device
+        )
+        cu_seqlens_with_max = torch.cat([cu_seqlens, total_tokens_tensor])
+        # Example: [0, 5, 7, 11, 16] -> [5, 2, 4, 5]
+        seq_lengths = cu_seqlens_with_max[1:] - cu_seqlens_with_max[:-1]
+        # Example: [5, 2, 4, 5] -> [0, 0, 0, 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3]
+        seq_idx = torch.repeat_interleave(
+            torch.arange(seq_lengths.numel(), device=cu_seqlens.device), seq_lengths
+        )
+        seq_idx = seq_idx.to(torch.int32).unsqueeze(0)  # Add a batch dimension
+        return seq_idx
+
+    def _ssm_prefill(
         self,
         zxBCdt: torch.Tensor,
         conv_state: Optional[torch.Tensor],
@@ -691,12 +772,14 @@ def ssm_prefill(
         )
 
         # Compute short convolution
+        initial_conv_state = None
         if conv_state is not None and is_dynamic_batching:
             # xBC should have shape (b l d) for causal_conv1d_varlen_states
             assert batch_indices is not None
-            conv_state[batch_indices] = causal_conv1d_varlen_states(
+            conv_varlen_states = causal_conv1d_varlen_states(
                 xBC.squeeze(0), cu_seqlens, state_len=conv_state.shape[-1]
             )
+            tensor_masked_update(conv_state, batch_indices, conv_varlen_states)
 
             # Maintain channels-last memory layout to use seq_idx for causal_conv1d_fn
             # See https://github.com/Dao-AILab/causal-conv1d/blob/69e6dadc28b169a4c49cb86b586f64ee90242c70/csrc/causal_conv1d.cpp#L174 # pylint: disable=line-too-long
@@ -704,7 +787,14 @@ def ssm_prefill(
         elif is_chunked_prefill:
             # Maintain channels-last memory layout to use initial_states for causal_conv1d_fn
             # See https://github.com/Dao-AILab/causal-conv1d/blob/69e6dadc28b169a4c49cb86b586f64ee90242c70/csrc/causal_conv1d.cpp#L200 # pylint: disable=line-too-long
+            assert batch_indices is not None
+            initial_conv_state = (
+                conv_state[batch_indices, :, 1:].permute(0, 2, 1).contiguous().transpose(1, 2)
+            )
             xBC = xBC.transpose(1, 2)
+            tensor_masked_update(
+                conv_state, batch_indices, F.pad(xBC, (self.d_conv - xBC.shape[-1], 0))
+            )
         else:
             # transpose: b l pd --> b pd l
             xBC = rearrange(xBC, "b l d -> b d l").contiguous()
@@ -720,12 +810,6 @@ def ssm_prefill(
             xBC = self.act(self.cp.conv1d(xBC)[..., :seqlen])
         else:
             assert self.activation in ["silu", "swish"]
-            if is_chunked_prefill:
-                initial_conv_state = (
-                    conv_state[:, :, 1:].permute(0, 2, 1).contiguous().transpose(1, 2)
-                )
-            else:
-                initial_conv_state = None
             xBC = causal_conv1d_fn(
                 x=xBC,
                 weight=rearrange(self.cp.get_conv1d_weight(), "d 1 w -> d w"),
@@ -764,7 +848,7 @@ def ssm_prefill(
         ), "Context parallel not supported for use_mem_eff_path==False and rmsnorm==False"
 
         if is_chunked_prefill:
-            initial_ssm_state = ssm_state
+            initial_ssm_state = ssm_state[batch_indices]
         else:
             initial_ssm_state = None
 
@@ -797,12 +881,16 @@ def ssm_prefill(
             if return_varlen_states:
                 assert batch_indices is not None
 
-                y, _, varlen_states = y
+                y, _, ssm_varlen_states = y
 
                 # This has to be varlen_states, NOT last_state
                 # See reference implementation:
                 # https://github.com/state-spaces/mamba/blob/e0761ece1db07e0949dd88b4f4cd440420a19fd9/mamba_ssm/modules/mamba2.py#L267 # pylint: disable=line-too-long
-                ssm_state[batch_indices] = varlen_states
+                tensor_masked_update(ssm_state, batch_indices, ssm_varlen_states)
+            elif is_chunked_prefill:
+                assert batch_indices is not None
+                y, last_state = y
+                tensor_masked_update(ssm_state, batch_indices, last_state)
             else:
                 y, last_state = y
                 ssm_state.copy_(last_state)
@@ -817,7 +905,7 @@ def ssm_prefill(
 
         return y
 
-    def ssm_decode(
+    def _ssm_decode(
         self,
         zxBCdt: torch.Tensor,
         conv_state: torch.Tensor,
@@ -969,46 +1057,6 @@ def ssm_decode(
         # Restore sequence dimension
         return y.unsqueeze(0)
 
-    def _get_varlen_generation_state(
-        self, inference_context: Optional[BaseInferenceContext] = None
-    ) -> Tuple[torch.Tensor, torch.Tensor, bool]:
-        """Constructs the variable length generation state for non-decode dynamic inference.
-
-        The returned state includes the following:
-            `seq_idx` (Tensor): A map from token idx to request idx.
-            `cu_seqlens` (Tensor): The cumulative sequence lengths.
-            `return_varlen_states` (bool): Whether to return a varlen states tensor for
-                `mamba_chunk_scan_combined`.
-
-        Returns empty state for training, static inference, or decode-only dynamic inference.
-
-        Args:
-            inference_context (InferenceContext): The inference context.
-
-        Returns:
-            A tuple of (`seq_idx`, `cu_seqlens`, `return_varlen_states`)
-        """
-
-        if (
-            inference_context is None
-            or not inference_context.is_dynamic_batching()
-            or inference_context.is_decode_only()
-        ):
-            return None, None, False
-
-        active_token_count = inference_context.active_token_count
-        seq_idx = (
-            inference_context.token_to_request_idx[:active_token_count]
-            .clone()
-            .to(torch.int32)
-            .unsqueeze(0)
-        )
-
-        # Get the list of cumulative sequence lengths for active requests.
-        cu_seqlens, _ = inference_context.cu_query_lengths()
-
-        return seq_idx, cu_seqlens, True
-
     def mamba_state_shapes_per_request(self) -> Tuple[Tuple[int], Tuple[int]]:
         """Returns the Mamba conv and ssm states shapes per request."""
         conv_states_shape = (self.conv1d.weight.shape[0], self.d_conv)
@@ -1081,7 +1129,7 @@ def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None):
                 module_sharded_sd = make_sharded_tensors_for_checkpoint(
                     module_sd,
                     f"{prefix}{name}.",
-                    {f"weight": 0, f"bias": 0},
+                    {"weight": 0, "bias": 0},
                     sharded_offsets,
                     tp_group=self.tp_group,
                     dp_cp_group=metadata['dp_cp_group'],
@@ -1202,3 +1250,22 @@ def sh_ten_merge_fn(sub_state_dict):
     return ShardedTensorFactory(
         orig_sh_ten.key, orig_sh_ten.data, sh_ten_build_fn, sh_ten_merge_fn, orig_sh_ten.replica_id
     )
+
+
+def _check_mamba_sequence_packing_support(
+    for_inference_not_training: bool = True,
+) -> Tuple[bool, Optional[str]]:
+    """Checks whether `causal_conv1d` and `mamba_ssm` support sequence packing."""
+    if for_inference_not_training:
+        # https://github.com/Dao-AILab/causal-conv1d/commit/d87608f78f87d1288a7821d9e6ff4b10a8d5bf07
+        conv1d_min = "1.5.3.post1"
+        # https://github.com/state-spaces/mamba/commit/4f77d5306e19f5c7ae37665a44c3e61e24cafcb5
+        mamba_min = "2.2.6.post3"
+    else:
+        conv1d_min = "1.4.0"
+        mamba_min = "2.0.0"
+    if not is_causal_conv1d_min_version(conv1d_min):
+        return False, f"causal_conv1d >= {conv1d_min} is required"
+    elif not is_mamba_min_version(mamba_min):
+        return False, f"mamba_ssm >= {mamba_min} is required"
+    return True, None
diff --git a/megatron/core/tensor_parallel/inference_layers.py b/megatron/core/tensor_parallel/inference_layers.py
index ddba1961042..9c1adbc6717 100644
--- a/megatron/core/tensor_parallel/inference_layers.py
+++ b/megatron/core/tensor_parallel/inference_layers.py
@@ -1,7 +1,5 @@
 # Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-
-
-from typing import Callable, Optional
+from typing import Callable, Optional, Tuple
 
 import torch
 import torch.distributed as dist
@@ -11,6 +9,7 @@
     TERowParallelLinear,
 )
 from megatron.core.inference.communication.torch_symm_triton import (
+    fused_multimem_rs_add_norm_ag,
     multimem_all_gather,
     multimem_reduce_scatter,
 )
@@ -58,6 +57,7 @@ def __init__(
         bias: bool,
         skip_bias_add: bool,
         is_expert: bool,
+        stride: int = 1,
         skip_weight_param_allocation: bool = False,
         tp_comm_buffer_name: Optional[str] = None,
         tp_group: Optional[torch.distributed.ProcessGroup] = None,
@@ -72,6 +72,7 @@ def __init__(
             bias=bias,
             skip_bias_add=skip_bias_add,
             is_expert=is_expert,
+            stride=stride,
             skip_weight_param_allocation=skip_weight_param_allocation,
             tp_comm_buffer_name=tp_comm_buffer_name,
             tp_group=tp_group,
@@ -90,7 +91,24 @@ def __init__(
                 config.sequence_parallel
             ), "--transformer-impl=inference_optimized requires --sequence-parallel"
 
-    def _all_gather(self, x: torch.Tensor) -> None:
+        # Boolean to be toggled externally for skipping norm and all-gather.
+        # This is used when enabling fused reduce-scatter + add + rms-norm + all-gather
+        # in tensor parallelism. In this case, the preceeding RowParallelLinear layer
+        # has already applied the rms-norm and all-gather.
+        self.skip_norm_and_all_gather = False
+
+    def _maybe_allocate_symmetric_buffer(self, x: torch.Tensor):
+        """
+        Attempt to allocate symmetric memory buffer for all-gather.
+        """
+        symm_mem_buffer_dims = list(x.size())
+        symm_mem_buffer_dims[0] *= self.tp_size
+        symm_mem_buffer = get_global_symmetric_memory_buffer().maybe_get_tensor(
+            symm_mem_buffer_dims, dtype=x.dtype
+        )
+        return symm_mem_buffer
+
+    def _all_gather(self, x: torch.Tensor, symm_mem_buffer: dict) -> None:
         """
         Attempt an NVLS all-gather into symmetric memory. If not possible,
         revert to torch dist (NCCL) all-gather.
@@ -102,17 +120,11 @@ def _all_gather(self, x: torch.Tensor) -> None:
         is_bf16 = x.dtype == torch.bfloat16
         # 2. check if hopper or newer
         is_hopper_or_newer = torch.cuda.get_device_properties(x.device).major >= 9
-        # 3. attempt to ask for symmetric memory
-        symm_mem_buffer_dims = list(x.size())
-        symm_mem_buffer_dims[0] *= self.tp_size
-        symm_mem_buffer = get_global_symmetric_memory_buffer().maybe_get_tensor(
-            symm_mem_buffer_dims, dtype=x.dtype
-        )
+        # 3. check if symmetric memory buffer is available
         has_enough_symmetric_memory = symm_mem_buffer["handle"] is not None
         can_use_custom_nvls_collectives = (
             is_bf16 and is_hopper_or_newer and has_enough_symmetric_memory
         )
-
         if can_use_custom_nvls_collectives:
             # do multimem all gather
             multimem_all_gather(symm_mem_buffer["tensor"], x, symm_mem_buffer["handle"])
@@ -123,13 +135,29 @@ def _all_gather(self, x: torch.Tensor) -> None:
             return x
 
     @torch.no_grad()
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
+    def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, None]:
         """
         Forward pass.
         """
-        x = _te_rms_norm_kernel(x=x, weight=self.layer_norm_weight, eps=self.eps)
-        x = self._all_gather(x)
+        # Necessary conditions to ensure we are executing the fused rs-add-rmsnorm-ag
+        # in the preceeding RowParallelLinear layer.
+        # 1. skip_norm_and_all_gather is True
+        # 2. tp_size > 1
+        # 3. enough symmetric memory is available - if available it already has the output
+        symm_mem_buffer = self._maybe_allocate_symmetric_buffer(x)
+        is_in_fused_mode = (
+            self.skip_norm_and_all_gather
+            and self.tp_size > 1
+            and symm_mem_buffer["handle"] is not None
+        )
+        if is_in_fused_mode:
+            x = symm_mem_buffer["tensor"]
+        else:
+            x = _te_rms_norm_kernel(x=x, weight=self.layer_norm_weight, eps=self.eps)
+            x = self._all_gather(x, symm_mem_buffer)
+
         x = torch.matmul(x, self.weight.t())
+
         return x, None
 
 
@@ -176,7 +204,12 @@ def __init__(
                 config.sequence_parallel
             ), "--transformer-impl=inference_optimized requires --sequence-parallel"
 
-    def _matmul_reduce_scatter(self, x):
+        # Placeholder for next layer norm weights for fused
+        # reduce-scatter + add + rms-norm + all-gather
+        self.next_layer_norm_weights = None
+        self.config = config
+
+    def _matmul_reduce_scatter(self, x, residual=None):
         """
         Multiplies x by the weight matrix and performs a reduce-scatter.
         It will first try to write the matmul output to symmetric memory
@@ -202,19 +235,52 @@ def _matmul_reduce_scatter(self, x):
             torch.matmul(x, self.weight.t(), out=symm_mem_buffer["tensor"])
             x = symm_mem_buffer["tensor"]
             # perform nvls reduce-scatter
-            output_dims = list(x.size())
-            output_dims[0] = x.size(0) // self.tp_size
-            output = torch.empty(output_dims, dtype=x.dtype, device=x.device)
-            multimem_reduce_scatter(output, x, symm_mem_buffer["handle"])
-            return output
+            if self.next_layer_norm_weights is None:
+                output_dims = list(x.size())
+                output_dims[0] = x.size(0) // self.tp_size
+                output = torch.empty(output_dims, dtype=x.dtype, device=x.device)
+                multimem_reduce_scatter(output, x, symm_mem_buffer["handle"])
+                return output
+            else:
+                assert hasattr(self, "residual"), (
+                    "For fused reduce-scatter + add + rms-norm + all-gather, "
+                    "residual must be set via _set_residual()"
+                )
+                residual = self.residual
+                fused_multimem_rs_add_norm_ag(
+                    residual,
+                    symm_mem_buffer["tensor"],
+                    symm_mem_buffer["handle"],
+                    residual,
+                    self.next_layer_norm_weights,
+                    self.config.layernorm_epsilon,
+                )
+                # 1. Residual has the output of the reduce-scatter + residual add
+                #    Care must be taken in the model definition, so as to not apply the
+                #    residual again.
+                # 2. The output of the full reduce-scatter + add + rms-norm + all-gather is
+                #    written into symm_mem_buffer["tensor"] and will be accessible there.
+                return residual
         else:
             # revert to torch dist (NCCL) reduce-scatter
             x = torch.matmul(x, self.weight.t())
             x, _ = reduce_scatter_along_first_dim(x, tp_group=self.tp_group)
         return x
 
+    def _set_next_layer_norm_weights(self, weights: torch.Tensor):
+        """
+        Set next layer norm weights for fused reduce-scatter + add + rms-norm + all-gather.
+        """
+        self.next_layer_norm_weights = weights
+
+    def _set_residual(self, residual: torch.Tensor):
+        """
+        Set residual for fused reduce-scatter + add + rms-norm + all-gather.
+        """
+        self.residual = residual
+
     @torch.no_grad()
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
+    def forward(self, x: torch.Tensor, residual: Optional[torch.Tensor] = None) -> torch.Tensor:
         """
         Forward pass.
         """
diff --git a/megatron/core/tensor_parallel/layers.py b/megatron/core/tensor_parallel/layers.py
index d3ec11aaf5c..b2b254dec32 100644
--- a/megatron/core/tensor_parallel/layers.py
+++ b/megatron/core/tensor_parallel/layers.py
@@ -821,7 +821,7 @@ def __init__(
         embedding_activation_buffer: Optional[List[torch.Tensor]] = None,
         grad_output_buffer: Optional[List[torch.Tensor]] = None,
         is_expert: bool = False,
-        tp_comm_buffer_name: str = None,  # Not used
+        tp_comm_buffer_name: Optional[str] = None,  # Not used
         disable_grad_reduce: bool = False,
         tp_group: Optional[torch.distributed.ProcessGroup] = None,
     ):
@@ -972,7 +972,7 @@ def forward(
         input_: torch.Tensor,
         weight: Optional[torch.Tensor] = None,
         runtime_gather_output: Optional[bool] = None,
-    ):
+    ) -> Tuple[torch.Tensor, Optional[torch.Tensor]]:
         """Forward of ColumnParallelLinear
 
         Args:
@@ -1066,6 +1066,13 @@ def forward(
         output_bias = self.bias if self.skip_bias_add else None
         return output, output_bias
 
+    def backward_dw(self) -> None:
+        """Compute weight gradients during the backward pass if delay_wgrad_compute is enabled.
+
+        Not supported - does nothing.
+        """
+        pass
+
     def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None):
         """Sharding along axis 0, bias sharded"""
         state_dict = self.state_dict(prefix="", keep_vars=True)
diff --git a/megatron/core/timers.py b/megatron/core/timers.py
index 95365cc2802..78204ad243f 100644
--- a/megatron/core/timers.py
+++ b/megatron/core/timers.py
@@ -174,6 +174,17 @@ def reset(self):
         self._elapsed = 0.0
         self._started = False
 
+    def set_elapsed(self, value):
+        """Directly set the elapsed time.
+
+        This is useful for injecting pre-computed timing values (e.g., startup
+        timestamps) into the timer so they can be reported via timers.log().
+
+        Args:
+            value (float): The elapsed time value in seconds.
+        """
+        self._elapsed = value
+
     def elapsed(self, reset=True, barrier=False):
         """Calculates the elapsed time and restarts timer.
 
diff --git a/megatron/core/transformer/attention.py b/megatron/core/transformer/attention.py
index c3c7dad250a..bc5e4e2ee0d 100644
--- a/megatron/core/transformer/attention.py
+++ b/megatron/core/transformer/attention.py
@@ -1,9 +1,11 @@
 # Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+from __future__ import annotations
 
 import copy
+import inspect
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
-from typing import NoReturn, Optional, Tuple, Union
+from typing import Callable, Optional, Protocol, Tuple, Union
 
 import torch
 from torch import Tensor
@@ -32,6 +34,7 @@
 from megatron.core.transformer.identity_op import IdentityOp
 from megatron.core.transformer.module import MegatronModule
 from megatron.core.transformer.spec_utils import ModuleSpec, build_module
+from megatron.core.typed_torch import apply_module, not_none
 from megatron.core.utils import (
     deprecate_inference_params,
     divide,
@@ -39,6 +42,7 @@
     get_pg_size,
     is_fa_min_version,
     is_te_min_version,
+    is_using_quantization_scales,
     nvtx_range_pop,
     nvtx_range_push,
 )
@@ -113,14 +117,107 @@
     HAVE_FUSED_QKV_ROPE = False
 
 
+class LinearQkv(Protocol):
+    """Protocol for linear_qkv modules."""
+
+    def forward(self, input: Tensor, /) -> tuple[Tensor, object]:
+        """Applies linear_qkv."""
+        ...
+
+    def backward_dw(self) -> None:
+        """Backward pass for the linear_qkv module."""
+        ...
+
+
+class LinearQkvBuilder(Protocol):
+    """Protocol for building linear_qkv layers."""
+
+    def __call__(
+        self,
+        input_size: int,
+        output_size: int,
+        /,
+        *,
+        config: TransformerConfig,
+        init_method: Callable[[torch.Tensor], None],
+        gather_output: bool,
+        bias: bool,
+        skip_bias_add: bool,
+        is_expert: bool,
+        tp_comm_buffer_name: str,
+        tp_group: torch.distributed.ProcessGroup | None = None,
+    ) -> LinearQkv: ...
+
+
+class LinearLayer(Protocol):
+    """Protocol for linear_q and linear_kv modules."""
+
+    def forward(self, input: Tensor, /) -> Tuple[Tensor, object]:
+        """Applies linear_q/linear_kv."""
+        ...
+
+
+class LinearLayerBuilder(Protocol):
+    """Protocol for building linear_q and linear_kv layers."""
+
+    def __call__(
+        self,
+        input_size: int,
+        output_size: int,
+        /,
+        *,
+        config: TransformerConfig,
+        init_method: Callable[[torch.Tensor], None],
+        gather_output: bool,
+        bias: bool,
+        skip_bias_add: bool,
+        is_expert: bool,
+    ) -> LinearLayer: ...
+
+
+class CoreAttention(Protocol):
+    """Protocol for core_attention modules."""
+
+    def forward(
+        self,
+        query: Tensor,
+        key: Tensor,
+        value: Tensor,
+        attention_mask: Optional[Tensor],
+        /,
+        *,
+        attn_mask_type: AttnMaskType,
+        attention_bias: Optional[Tensor],
+        packed_seq_params: Optional[PackedSeqParams],
+    ) -> Tensor:
+        """Applies dot product attention."""
+        ...
+
+
+class CoreAttentionBuilder(Protocol):
+    """Protocol for building core_attention layers."""
+
+    def __call__(
+        self,
+        *,
+        config: TransformerConfig,
+        layer_number: int,
+        attn_mask_type: AttnMaskType,
+        attention_type: str,
+        cp_comm_type: Optional[str],
+        softmax_scale: Optional[float],
+        pg_collection: Optional[ProcessGroupCollection],
+    ) -> CoreAttention: ...
+
+
 @dataclass
 class SelfAttentionSubmodules:
     """
     Configuration class for specifying the submodules of a self-attention.
     """
 
-    linear_qkv: Union[ModuleSpec, type] = None
-    core_attention: Union[ModuleSpec, type] = None
+    linear_qkv: LinearQkvBuilder
+    core_attention: CoreAttentionBuilder
     linear_proj: Union[ModuleSpec, type] = None
     q_layernorm: Union[ModuleSpec, type] = None
     k_layernorm: Union[ModuleSpec, type] = None
@@ -132,9 +229,9 @@ class CrossAttentionSubmodules:
     Configuration class for specifying the submodules of a cross-attention.
     """
 
-    linear_q: Union[ModuleSpec, type] = None
-    linear_kv: Union[ModuleSpec, type] = None
-    core_attention: Union[ModuleSpec, type] = None
+    linear_q: LinearLayerBuilder
+    linear_kv: LinearLayerBuilder
+    core_attention: CoreAttentionBuilder
     linear_proj: Union[ModuleSpec, type] = None
 
 
@@ -152,8 +249,8 @@ def __init__(
         layer_number: int,
         attn_mask_type: AttnMaskType,
         attention_type: str,
-        cp_comm_type: str = None,
-        pg_collection: ProcessGroupCollection = None,
+        cp_comm_type: str | None = None,
+        pg_collection: ProcessGroupCollection | None = None,
     ):
         super().__init__(config=config)
 
@@ -164,6 +261,9 @@ def __init__(
         self.attention_type = attention_type
         self.batch_invariant_mode = config.batch_invariant_mode
 
+        assert self.config.kv_channels is not None
+        assert self.config.num_query_groups is not None
+
         # For normal attention without groups, num_query_groups == num_attention_heads,
         # so these two will be the same
         self.query_projection_size = self.config.kv_channels * self.config.num_attention_heads
@@ -216,8 +316,7 @@ def __init__(
             tmp_config.num_query_groups = world_size
         else:
             tmp_config = self.config
-        self.core_attention = build_module(
-            submodules.core_attention,
+        self.core_attention = submodules.core_attention(
             config=tmp_config,
             layer_number=self.layer_number,
             attn_mask_type=self.attn_mask_type,
@@ -300,7 +399,7 @@ def custom_forward(*inputs):
             attention_mask = inputs[3]
             attn_mask_type = inputs[5]
             attn_mask_type = AttnMaskType(attn_mask_type.item())
-            output_ = self.core_attention(
+            output_ = apply_module(self.core_attention)(
                 query,
                 key,
                 value,
@@ -358,7 +457,7 @@ def _adjust_key_value_for_inference(
         sequence_len_offset: Optional[int] = None,
         *,
         inference_params: Optional[BaseInferenceContext] = None,
-    ) -> Tuple[Tensor, Tensor, Tensor, Tensor, Tensor, Tensor]:
+    ) -> tuple[Tensor, Tensor, Tensor, Tensor, AttnMaskType, Tensor]:
         """
         Saves the generated key and value tensors to the end of the buffers in inference_context.
         Returns the full size keys and values from the provided inference_context, as well as
@@ -526,7 +625,15 @@ def _adjust_key_value_for_inference(
 
     @abstractmethod
     def get_query_key_value_tensors(
-        self, hidden_states, key_value_states, output_gate, split_qkv=True
+        self,
+        hidden_states: Tensor,
+        key_value_states: Tensor | None,
+        output_gate: bool = False,
+        split_qkv: bool = True,
+    ) -> (
+        tuple[Tensor, Tensor, Tensor, Tensor]
+        | tuple[Tensor, Tensor, Tensor]
+        | tuple[Tensor, list[int]]
     ):
         """
         This method needs to be implemented based on whether the derived class
@@ -544,7 +651,7 @@ def flash_decode(
         rotary_cos: Tensor,
         rotary_sin: Tensor,
         rotary_interleaved: bool = False,
-    ) -> (Tensor, Tensor):
+    ) -> tuple[Tensor, Tensor]:
         """
         The flash decoding kernel will do the following in a single execution:
         1. Compute RoPE embedding with precomputed cos & sin tensors
@@ -579,6 +686,74 @@ def flash_decode(
         )
         return out
 
+    def _flash_attention_3_forward_wrapper(
+        self,
+        q: Tensor,
+        k: Tensor,
+        v: Tensor,
+        max_seqlen_q,
+        max_seqlen_k,
+        cu_seqlens_q,
+        seqlens_k,
+        block_table,
+        softmax_scale,
+    ):
+        """
+        Wrapper for calling the FA3 _flash_attn_forward function.
+        Handles argument conversion for different versions of the _flash_attn_forward API.
+        """
+        candidate_kwargs = {
+            "q": q,
+            "k": k,
+            "v": v,
+            "k_new": None,
+            "v_new": None,
+            "qv": None,
+            "out": None,
+            "out_": None,
+            "cu_seqlens_q": cu_seqlens_q,
+            "cu_seqlens_k": None,
+            "cu_seqlens_k_new": None,
+            "seqused_q": None,
+            "seqused_k": seqlens_k,
+            "max_seqlen_q": max_seqlen_q,
+            "max_seqlen_k": max_seqlen_k,
+            "page_table": block_table,
+            "kv_batch_idx": None,
+            "leftpad_k": None,
+            "rotary_cos": None,
+            "rotary_sin": None,
+            "seqlens_rotary": None,
+            "q_descale": None,
+            "k_descale": None,
+            "v_descale": None,
+            "softmax_scale": softmax_scale,
+            "causal": True,
+            "attention_chunk": 0,
+            "softcap": 0.0,
+            "window_size": (-1, -1),
+            "window_size_left": -1,
+            "window_size_right": -1,
+            "rotary_interleaved": True,
+            "scheduler_metadata": None,
+            "num_splits": 0 if not self.batch_invariant_mode else 1,
+            "pack_gqa": None,
+            "sm_margin": 0,
+        }
+
+        # Parse the expect argument names from the function signature
+        if inspect.isfunction(_flash_attn_forward):
+            sig = inspect.signature(_flash_attn_forward)
+        else:
+            assert isinstance(_flash_attn_forward, torch._library.custom_ops.CustomOpDef)
+            sig = inspect.signature(_flash_attn_forward._init_fn)
+        valid_kwargs = set(sig.parameters.keys())
+        final_kwargs = {k: candidate_kwargs[k] for k in valid_kwargs if k in candidate_kwargs}
+
+        output_total, *unused = _flash_attn_forward(**final_kwargs)
+
+        return output_total
+
     def flash_decode_and_prefill(
         self,
         q: Tensor,
@@ -590,6 +765,7 @@ def flash_decode_and_prefill(
         cu_seqlens_k,
         seqlens_k,
         block_table,
+        is_decode_only,
     ) -> Tensor:
         """Flash attention kernel for mixed decode and prefill samples.
 
@@ -603,6 +779,7 @@ def flash_decode_and_prefill(
             cu_seqlens_k (Tensor): Cumulative key sequence lengths.
             seqlens_k (Tensor): key sequence lengths.
             block_table (Tensor): KV cache block ids for all samples.
+            is_decode_only (bool): True if batch is decode only.
         Return:
             (Tensor) Attention output.
         """
@@ -611,7 +788,7 @@ def flash_decode_and_prefill(
         assert block_table is not None
 
         # Flash attn kernel.
-        if max_seqlen_q > 1:
+        if not is_decode_only:
             q = q.squeeze(1)
             if getattr(self, "softmax_scale", None) is not None:
                 softmax_scale = self.softmax_scale
@@ -620,40 +797,16 @@ def flash_decode_and_prefill(
             if HAVE_FA3:
                 # TODO(ksanthanam): Replace with call to flash_attn_varlen_func once
                 # it accepts block_table
-                output_total, *unused = _flash_attn_forward(
-                    q=q,
-                    k=k,
-                    v=v,
-                    k_new=None,
-                    v_new=None,
-                    qv=None,
-                    out=None,
-                    cu_seqlens_q=cu_seqlens_q,
-                    cu_seqlens_k=None,
-                    cu_seqlens_k_new=None,
-                    seqused_q=None,
-                    seqused_k=seqlens_k,
-                    max_seqlen_q=max_seqlen_q,
-                    max_seqlen_k=max_seqlen_k,
-                    page_table=block_table,
-                    kv_batch_idx=None,
-                    leftpad_k=None,
-                    rotary_cos=None,
-                    rotary_sin=None,
-                    seqlens_rotary=None,
-                    q_descale=None,
-                    k_descale=None,
-                    v_descale=None,
-                    softmax_scale=softmax_scale,
-                    causal=True,
-                    window_size=(-1, -1),
-                    attention_chunk=0,
-                    softcap=0.0,
-                    rotary_interleaved=True,
-                    scheduler_metadata=None,
-                    num_splits=0 if not self.batch_invariant_mode else 1,
-                    pack_gqa=None,
-                    sm_margin=0,
+                output_total = self._flash_attention_3_forward_wrapper(
+                    q,
+                    k,
+                    v,
+                    max_seqlen_q,
+                    max_seqlen_k,
+                    cu_seqlens_q,
+                    seqlens_k,
+                    block_table,
+                    softmax_scale,
                 )
             else:
                 assert (
@@ -735,7 +888,7 @@ def forward(
         sequence_len_offset: Optional[int] = None,
         *,
         inference_params: Optional[BaseInferenceContext] = None,
-    ) -> Tuple[Tensor, Tensor]:
+    ) -> tuple[Tensor, Tensor]:
         """
         Perform a forward pass through the attention module.
 
@@ -830,22 +983,29 @@ def forward(
 
         with off_interface(self.offload_qkv_linear, hidden_states, "qkv_linear") as hidden_states:
             qkv_output = self.get_query_key_value_tensors(
-                hidden_states, key_value_states, output_gate=output_gate, split_qkv=split_qkv
+                hidden_states,
+                key_value_states,
+                split_qkv=split_qkv,
+                output_gate=self.config.attention_output_gate,
             )
         if self.offload_qkv_linear:
             # `qkv_output` may be a tuple; commit supports tuple/list and will keep structure.
             qkv_output = off_interface.group_commit(
                 qkv_output, name="qkv_linear", forced_released_tensors=[]
             )
-
         attn_mask_type = self.attn_mask_type
         block_table = None
         gate = None
-        if output_gate and split_qkv:
-            query, key, value, gate = qkv_output
-        elif split_qkv:
-            query, key, value = qkv_output
+        if split_qkv:
+            if self.config.attention_output_gate:
+                query, key, value, gate = qkv_output
+            else:
+                query, key, value = qkv_output
+            mixed_qkv = qkv_split_arg_list = None
         else:
+            assert (
+                not self.config.attention_output_gate
+            ), "attention_output_gate is not supported for unsplit mixed_qkv tensor."
             mixed_qkv, qkv_split_arg_list = qkv_output
         nvtx_range_pop(suffix="qkv")
 
@@ -991,7 +1151,7 @@ def forward(
                 with off_interface(
                     self.offload_core_attention and self.training, query, "core_attn"
                 ) as query:
-                    core_attn_out = self.core_attention(
+                    core_attn_out = apply_module(self.core_attention)(
                         query,
                         key,
                         value,
@@ -1017,8 +1177,15 @@ def forward(
                     cu_kv_lengths,
                     kv_lengths,
                     block_table,
+                    inference_context.is_decode_only(),
                 )
                 core_attn_out = rearrange(core_attn_out, 's b h d -> s b (h d)')
+
+                # Clear the outputs for padding tokens when using quantization scales
+                # to avoid corrupting amax calculations
+                if is_using_quantization_scales(self.config):
+                    core_attn_out[inference_context.padding_slice] = 0.0
+
             if self.offload_core_attention and self.training:
                 core_attn_out = off_interface.group_commit(
                     core_attn_out, name="core_attn", forced_released_tensors=[query, key, value]
@@ -1041,7 +1208,6 @@ def forward(
         # =================
         # Output. [sq, b, h]
         # =================
-
         nvtx_range_push(suffix="linear_proj")
         with off_interface(self.offload_attn_proj, core_attn_out, "attn_proj") as core_attn_out:
             output, bias = self.linear_proj(core_attn_out)
@@ -1086,9 +1252,9 @@ def __init__(
         config: TransformerConfig,
         submodules: SelfAttentionSubmodules,
         layer_number: int,
-        attn_mask_type=AttnMaskType.padding,
-        cp_comm_type: str = None,
-        pg_collection: ProcessGroupCollection = None,
+        attn_mask_type: AttnMaskType = AttnMaskType.padding,
+        cp_comm_type: str | None = None,
+        pg_collection: ProcessGroupCollection | None = None,
     ):
         super().__init__(
             config=config,
@@ -1103,12 +1269,11 @@ def __init__(
         self.linear_qkv_out_dim = self.query_projection_size + 2 * self.kv_projection_size
         if self.config.attention_output_gate:
             self.linear_qkv_out_dim += self.config.kv_channels * self.config.num_attention_heads
-        self.linear_qkv = build_module(
-            submodules.linear_qkv,
+        self.linear_qkv = submodules.linear_qkv(
             self.config.hidden_size,
             self.linear_qkv_out_dim,
             config=self.config,
-            init_method=self.config.init_method,
+            init_method=not_none(self.config.init_method),
             gather_output=False,
             bias=self.config.add_bias_linear or self.config.add_qkv_bias,
             skip_bias_add=False,
@@ -1209,16 +1374,24 @@ def _compare(srcs, tgts, names, parallelism):
             )
 
     def get_query_key_value_tensors(
-        self, hidden_states, key_value_states=None, output_gate=False, split_qkv=True
+        self,
+        hidden_states: Tensor,
+        key_value_states: Tensor | None = None,
+        output_gate: bool = False,
+        split_qkv: bool = True,
+    ) -> (
+        tuple[Tensor, Tensor, Tensor, Tensor]
+        | tuple[Tensor, Tensor, Tensor]
+        | tuple[Tensor, list[int]]
     ):
         """
-        Derives `query`, `key`, `value` tensors from `hidden_states`.
+        Derives `query`, `key` and `value` tensors from `hidden_states`.
         If `output_gate` is True, then also derives `gate` tensor.
         If `split_qkv=False`, then the unsplit mixed_qkv tensor is returned.
         """
         # If no output gate: Attention heads [sq, b, h] --> [sq, b, ng * (np/ng + 2) * hn)]
         # If have output gate: Attention heads [sq, b, h] --> [sq, b, ng * (2 * np/ng + 2) * hn)]
-        mixed_qkv, _ = self.linear_qkv(hidden_states)
+        mixed_qkv, _ = apply_module(self.linear_qkv)(hidden_states)
         num_query_heads_per_group = (
             self.num_attention_heads_per_partition // self.num_query_groups_per_partition
         )
@@ -1226,8 +1399,7 @@ def get_query_key_value_tensors(
         if output_gate:
             num_qkv_heads_per_group += num_query_heads_per_group
 
-        # If no output gate: [sq, b, hp] --> [sq, b, ng, (np/ng + 2) * hn]
-        # If have output gate: [sq, b, hp] --> [sq, b, ng, (2 * np/ng + 2) * hn]
+        assert self.config.num_query_groups is not None
         if self.config.num_query_groups < self.world_size:
             # Note that weights are interleaved in the following manner:
             # q1 q2 k1 v1 | q3 q4 k2 v2 | q5 q6 k3 v3 | ...
@@ -1248,7 +1420,8 @@ def get_query_key_value_tensors(
             size = mixed_qkv.size()[-1] // self.config.num_query_groups
             mixed_qkv = mixed_qkv[:, :, idx * size : (idx + 1) * size]
 
-        # [sq, b, hp] --> [sq, b, ng, (np/ng + 2) * hn]
+        # If no output gate: [sq, b, hp] --> [sq, b, ng, (np/ng + 2) * hn]
+        # If have output gate: [sq, b, hp] --> [sq, b, ng, (2 * np/ng + 2) * hn]
         new_tensor_shape = mixed_qkv.size()[:-1] + (
             self.num_query_groups_per_partition,
             num_qkv_heads_per_group * self.hidden_size_per_attention_head,
@@ -1322,7 +1495,7 @@ def get_query_key_value_tensors(
 
         return query, key, value
 
-    def backward_dw(self) -> NoReturn:
+    def backward_dw(self) -> None:
         """Execute weight update operations"""
         self._backward_qkv_proj()
         self._backward_output_proj()
@@ -1451,9 +1624,9 @@ def __init__(
         config: TransformerConfig,
         submodules: CrossAttentionSubmodules,
         layer_number: int,
-        attn_mask_type=AttnMaskType.padding,
-        cp_comm_type: str = None,
-        pg_collection: ProcessGroupCollection = None,
+        attn_mask_type: AttnMaskType = AttnMaskType.padding,
+        cp_comm_type: str | None = None,
+        pg_collection: ProcessGroupCollection | None = None,
     ):
         super().__init__(
             config=config,
@@ -1469,24 +1642,22 @@ def __init__(
             raise ValueError("Group query attention is not currently supported in cross attention.")
         assert self.query_projection_size == self.kv_projection_size
 
-        self.linear_q = build_module(
-            submodules.linear_q,
+        self.linear_q = submodules.linear_q(
             self.config.hidden_size,
             self.query_projection_size,
             config=self.config,
-            init_method=self.config.init_method,
+            init_method=not_none(self.config.init_method),
             gather_output=False,
             bias=self.config.add_bias_linear,
             skip_bias_add=False,
             is_expert=False,
         )
 
-        self.linear_kv = build_module(
-            submodules.linear_kv,
+        self.linear_kv = submodules.linear_kv(
             self.config.hidden_size,
             2 * self.kv_projection_size,
             config=self.config,
-            init_method=self.config.init_method,
+            init_method=not_none(self.config.init_method),
             gather_output=False,
             bias=self.config.add_bias_linear,
             skip_bias_add=False,
@@ -1494,8 +1665,12 @@ def __init__(
         )
 
     def get_query_key_value_tensors(
-        self, hidden_states, key_value_states, output_gate=False, split_qkv=True
-    ):
+        self,
+        hidden_states: Tensor,
+        key_value_states: Optional[Tensor],
+        output_gate: bool = False,
+        split_qkv: bool = True,
+    ) -> Tuple[Tensor, Tensor, Tensor]:
         """
         Derives `query` tensor from `hidden_states`, and `key`/`value` tensors
         from `key_value_states`.
@@ -1503,8 +1678,11 @@ def get_query_key_value_tensors(
         assert not output_gate, "Output gate is not supported in cross attention for now."
 
         assert split_qkv, "split_qkv must be True for CrossAttention"
+        assert not output_gate, "Output gate is not supported in cross attention for now."
+
+        assert key_value_states is not None, "key_value_states cannot be None for CrossAttention"
         # Attention heads [sk, b, h] --> [sk, b, (np * 2 * hn)]
-        mixed_kv, _ = self.linear_kv(key_value_states)
+        mixed_kv, _ = apply_module(self.linear_kv)(key_value_states)
 
         # [sk, b, (np * 2 * hn)] --> [sk, b, np, 2 * hn]
         new_tensor_shape = mixed_kv.size()[:-1] + (
@@ -1517,7 +1695,7 @@ def get_query_key_value_tensors(
         (key, value) = tensor_parallel.split_tensor_along_last_dim(mixed_kv, 2)
 
         # Attention head [sq, b, h] --> [sq, b, hp]
-        query, _ = self.linear_q(hidden_states)
+        query, _ = apply_module(self.linear_q)(hidden_states)
 
         # [sq, b, hp] --> [sq, b, np, hn]
         new_tensor_shape = query.size()[:-1] + (
diff --git a/megatron/core/transformer/cuda_graphs.py b/megatron/core/transformer/cuda_graphs.py
index ec02555233b..1e3e3edc558 100644
--- a/megatron/core/transformer/cuda_graphs.py
+++ b/megatron/core/transformer/cuda_graphs.py
@@ -10,6 +10,8 @@
 from contextlib import nullcontext
 from dataclasses import fields, is_dataclass
 from enum import Enum
+from functools import partial
+from itertools import zip_longest
 from math import ceil
 from typing import Any, Dict, List, Optional
 
@@ -102,6 +104,7 @@ def __init__(self, arg):
             self.shape = arg.shape
             self.dtype = arg.dtype
             self.device = arg.device
+            self.value = arg.data_ptr()
         else:
             self.value = arg
 
@@ -177,6 +180,44 @@ def _determine_if_first_last_layer_of_this_vp_chunk(base_module):
     )
 
 
+def _clone_nested_tensors(value: Any) -> Any:
+    """Recursively clone tensors inside nested containers."""
+    if torch.is_tensor(value):
+        return value.clone()
+    if isinstance(value, (tuple, list)):
+        return type(value)(_clone_nested_tensors(v) for v in value)
+    if isinstance(value, dict):
+        return {k: _clone_nested_tensors(v) for k, v in value.items()}
+    if isinstance(value, set):
+        raise TypeError(
+            "Sets of tensors are unsupported in cudagraph helpers; use list/tuple instead"
+        )
+    return value
+
+
+def _ensure_generator_state_is_cudagraph_safe(gen: torch.Generator) -> torch.Generator:
+    """Make generator state safe for CUDA graph capture/replay.
+
+    Generator state tensors can become inference tensors if created under `torch.inference_mode()`.
+    CUDA graph capture may later attempt in-place updates on that state; this fails for inference
+    tensors. Fix the generator *in-place* (preserving identity) by cloning its state outside
+    inference mode and setting it back.
+    """
+    with torch.inference_mode(mode=False):
+        if hasattr(gen, "graphsafe_get_state"):
+            state = gen.graphsafe_get_state()
+        else:
+            state = gen.get_state()
+
+        cloned_state = _clone_nested_tensors(state)
+        if hasattr(gen, "graphsafe_set_state"):
+            gen.graphsafe_set_state(cloned_state)
+        else:
+            gen.set_state(cloned_state)
+
+    return gen
+
+
 class _CudagraphGlobalRecord:
     """A global datastructure that records of the ordering of all _CudaGraphRunner's
     first fwd or bwd passes. 'create_cudagraphs' will use this to create
@@ -684,8 +725,12 @@ def create_fwd_graph(self, args, kwargs, clone_inputs=True):
         self.fwd_graph = torch.cuda.CUDAGraph()
 
         # For cases with multiple active RNG states, e.g. TP.
-        for _, state in get_all_rng_states().items():
-            self.fwd_graph.register_generator_state(state)
+        rng_states = get_all_rng_states()
+        with torch.inference_mode(mode=False):
+            for gen in rng_states.values():
+                self.fwd_graph.register_generator_state(
+                    _ensure_generator_state_is_cudagraph_safe(gen)
+                )
 
         # warmup again as case graph capture mode may execute a different codepath
         for _ in range(self.num_warmup_steps):
@@ -707,6 +752,15 @@ def create_fwd_graph(self, args, kwargs, clone_inputs=True):
 
         with self.get_quantization_context():
             torch.cuda.synchronize()
+            # Register default CUDA generators ourselves (fixed in-place to have normal tensors)
+            # before capture begins, to avoid inference-tensor state issues during capture.
+            with torch.inference_mode(mode=False):
+                for device_idx in range(torch.cuda.device_count()):
+                    default_gen = torch.cuda.default_generators[device_idx]
+                    self.fwd_graph.register_generator_state(
+                        _ensure_generator_state_is_cudagraph_safe(default_gen)
+                    )
+
             with torch.cuda.graph(
                 self.fwd_graph, pool=self.fwd_mempool, capture_error_mode="thread_local"
             ):
@@ -1764,8 +1818,6 @@ def _get_cuda_graph_input_data(self):
 
         # Get the PP and VPP scheduling order.
         from megatron.core.pipeline_parallel.schedules import (
-            convert_schedule_table_to_order,
-            get_overlap_moe_expert_parallel_comm_order,
             get_pp_rank_microbatches,
             get_schedule_table,
         )
@@ -2028,3 +2080,133 @@ def delete_cuda_graphs(self):
             f'{graphs_not_reset} graphs deleted without explicit reset.',
         )
         self._graphs_created = False
+
+
+def convert_schedule_table_to_order(num_warmup_microbatches, num_model_chunks, schedule_table):
+    """Convert a tunable schedule lookup table to the te.make_graphed_callables() accepted
+    order format. For example, the tunable schedule table for PP2 N3M5 with VP2 is as below:
+    virtual_microbatch_id | 0 1 2 3 4 5 6 7 8 9
+    microbatch_id         | 0 1 2 0 1 2 3 4 3 4
+    model_chunk_id        | 0 0 0 1 1 1 0 0 1 1
+
+    Then the forward backward separated order is:
+    forward               | 1 1 1 2 2 2 1 1 2 2
+    backward              | -2 -2 -2 -1 -1 -1 -2 -2 -1 -1
+
+    If num_warmup_microbatches is 5, the output order is:
+    1 1 1 2 2 2 -2 1 -2 1 -2 2 -1 2 -1 -1 -2 -2 -1 -1
+    """
+    _, model_chunk_id_table = zip(*schedule_table)
+    forward_order = [chunk_id + 1 for chunk_id in model_chunk_id_table]
+    backward_order = [chunk_id - num_model_chunks for chunk_id in model_chunk_id_table]
+    order = forward_order[:num_warmup_microbatches]
+    for i in range(num_warmup_microbatches, len(forward_order)):
+        order.append(forward_order[i])
+        order.append(backward_order[i - num_warmup_microbatches])
+    if num_warmup_microbatches > 0:
+        order.extend(backward_order[-num_warmup_microbatches:])
+    return order
+
+
+def get_overlap_moe_expert_parallel_comm_order(order, num_layers_per_chunk, capture_wgrad_graph):
+    """
+    This functions gets the order for overlap_moe_expert_parallel_comm schedule for the original
+    chunk-wise order list. Each chunk is transformered to chunks with only 1 layer so that
+    layers between 2 chunks can now overlap with each other while following the graph order.
+    If capture_wgrad_graph is True, the wgrad backward graph is also added to the order by
+    decreasing the layer id by 0.5.
+
+    Args:
+        order (List[int]): The original chunk-wise order list. Positive values represent forward
+            passes for chunks, negative values represent backward passes. The absolute value
+            indicates the chunk ID (1-indexed).
+        num_layers_per_chunk (List[int]): Number of graphable layers in each chunk. The length
+            of this list equals the number of chunks.
+        capture_wgrad_graph (bool): If True, weight gradient computation graphs are added to the
+            order by appending entries with layer_id - 0.5.
+
+    Returns:
+        Tuple[List[float], List[Optional[List[int]]]]: A tuple containing:
+            - new_order: The layer-wise order list where each chunk is expanded to individual
+              layers. Positive values are forward passes, negative values are backward passes.
+              Values with .5 suffix indicate weight gradient computations.
+            - chunk_id_list: A list parallel to new_order. For forward passes, contains
+              [chunk_id, layer_index_within_chunk]. For backward passes, contains None.
+
+    Example:
+        original_order: [1, 2, -2, 1, -1, -1]
+        num_layers_per_chunk: [1, 2]
+        capture_wgrad_graph=True:
+            new_order: [1, 2, 3, 1, -3, -3.5, -2, -2.5, -1, -1.5, -1, -1.5]
+            chunk_id_list: [[0, 0], [1, 0], [1, 1], [0, 0], None,
+                            None, None, None, None, None, None, None]
+        capture_wgrad_graph=False:
+            new_order: [1, 2, 3, 1, -3, -2, -1, -1]
+            chunk_id_list: [[0, 0], [1, 0], [1, 1], [0, 0], None, None, None, None]
+    """
+
+    def _add_order(new_order, chunk_id_list, c_id, layer_id, is_wgrad=False, index=None):
+        if is_wgrad:
+            new_order.append(layer_id - 0.5)
+        else:
+            new_order.append(layer_id)
+        if c_id > 0:
+            chunk_id_list.append([abs(c_id) - 1, index])
+        else:
+            chunk_id_list.append(None)
+
+    new_order = []
+    chunk_id_list = []
+    add_order = partial(_add_order, new_order, chunk_id_list)
+    first_backward_idx, last_forward_idx = None, None
+    for idx, c_id in enumerate(order):
+        if first_backward_idx is None and c_id < 0:
+            first_backward_idx = idx
+        if c_id > 0:
+            last_forward_idx = idx
+
+    def get_layer_range(c_id):
+        num_layers = num_layers_per_chunk[abs(c_id) - 1]
+        num_layers_previous_chunks = sum(num_layers_per_chunk[: abs(c_id) - 1])
+        if c_id > 0:
+            return list(
+                range(num_layers_previous_chunks + 1, num_layers_previous_chunks + num_layers + 1)
+            )
+        return list(range(-num_layers_previous_chunks - num_layers, -num_layers_previous_chunks))
+
+    # warmup stage
+    for c_id in order[:first_backward_idx]:
+        layer_range = get_layer_range(c_id)
+        new_order += layer_range
+        chunk_id_list.extend([abs(c_id) - 1, i] for i in range(len(layer_range)))
+
+    # 1f1b overlap stage
+    if first_backward_idx < last_forward_idx:
+        for c_id_b, c_id_f in zip(
+            order[first_backward_idx : last_forward_idx + 1 : 2],
+            order[first_backward_idx + 1 : last_forward_idx + 1 : 2],
+        ):
+            layer_range_f = get_layer_range(c_id_f)
+            layer_range_b = get_layer_range(c_id_b)
+            index = 0
+            for l_b, l_f in zip_longest(layer_range_b, layer_range_f, fillvalue=0):
+                # always forward graph before backward graph
+                if l_f != 0:
+                    add_order(c_id_f, l_f, index=index)
+                if l_b != 0:
+                    add_order(c_id_b, l_b)
+                    if capture_wgrad_graph and index < len(layer_range_b) - 1:
+                        add_order(c_id_b, l_b, is_wgrad=True)
+                index += 1
+            # last wgrad backward
+            if capture_wgrad_graph and layer_range_b:
+                add_order(c_id_b, layer_range_b[-1], is_wgrad=True)
+
+    # cool down stage, backward graphs only
+    for c_id in order[last_forward_idx + 1 :]:
+        for l_b in get_layer_range(c_id):
+            add_order(c_id, l_b)
+            if capture_wgrad_graph:
+                add_order(c_id, l_b, is_wgrad=True)
+
+    return new_order, chunk_id_list
diff --git a/megatron/core/transformer/dot_product_attention.py b/megatron/core/transformer/dot_product_attention.py
index 7102440552a..26622839c14 100644
--- a/megatron/core/transformer/dot_product_attention.py
+++ b/megatron/core/transformer/dot_product_attention.py
@@ -48,10 +48,10 @@ def __init__(
         layer_number: int,
         attn_mask_type: AttnMaskType,
         attention_type: str,
-        attention_dropout: float = None,
-        softmax_scale: float = None,
-        cp_comm_type: str = None,
-        pg_collection: ProcessGroupCollection = None,
+        attention_dropout: Optional[float] = None,
+        softmax_scale: Optional[float] = None,
+        cp_comm_type: Optional[str] = None,
+        pg_collection: Optional[ProcessGroupCollection] = None,
     ):
         super().__init__(config=config)
 
@@ -150,9 +150,9 @@ def forward(
         query: Tensor,
         key: Tensor,
         value: Tensor,
-        attention_mask: Tensor,
-        attn_mask_type: AttnMaskType = None,
-        attention_bias: Tensor = None,
+        attention_mask: Optional[Tensor],
+        attn_mask_type: Optional[AttnMaskType] = None,
+        attention_bias: Optional[Tensor] = None,
         packed_seq_params: Optional[PackedSeqParams] = None,
     ):
         """Forward."""
@@ -272,7 +272,7 @@ def forward(
     def sharded_state_dict(
         self,
         prefix: str = '',
-        sharded_offsets: Tuple[Tuple[int, int, int]] = (),
+        sharded_offsets: Tuple[Tuple[int, int, int], ...] = (),
         metadata: Optional[dict] = None,
     ) -> ShardedStateDict:
         """Sharded state dict for the learnable softmax offset parameter"""
diff --git a/megatron/core/transformer/experimental_attention_variant/dsa.py b/megatron/core/transformer/experimental_attention_variant/dsa.py
index 353b31e9bcd..88b4713dc60 100644
--- a/megatron/core/transformer/experimental_attention_variant/dsa.py
+++ b/megatron/core/transformer/experimental_attention_variant/dsa.py
@@ -546,14 +546,10 @@ def forward_with_scores(
             None, None, x, self.config, packed_seq_params
         )
         if self.config.rope_type == "rope":
-            rotary_pos_emb = self.rotary_pos_emb(
-                rotary_seq_len, packed_seq_params=packed_seq_params
-            )
+            rotary_pos_emb = self.rotary_pos_emb(rotary_seq_len, packed_seq=False)
             mscale = 1.0
         else:
-            rotary_pos_emb, mscale = self.rotary_pos_emb(
-                rotary_seq_len, packed_seq_params=packed_seq_params
-            )
+            rotary_pos_emb, mscale = self.rotary_pos_emb(rotary_seq_len, packed_seq=False)
 
         # =========================================
         # Gather inputs if sp is enabled
@@ -734,9 +730,9 @@ def forward(
         query: torch.Tensor,
         key: torch.Tensor,
         value: torch.Tensor,
+        attention_mask: torch.Tensor,
         x: torch.Tensor,
         qr: torch.Tensor,
-        attention_mask: torch.Tensor,
         attn_mask_type: AttnMaskType = None,
         attention_bias: torch.Tensor = None,
         packed_seq_params: PackedSeqParams = None,
diff --git a/megatron/core/transformer/identity_op.py b/megatron/core/transformer/identity_op.py
index 5d9388ffcc6..6d42beb5a8f 100644
--- a/megatron/core/transformer/identity_op.py
+++ b/megatron/core/transformer/identity_op.py
@@ -1,16 +1,24 @@
 # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
+from typing import TypeVar
+
 import torch
 
+T = TypeVar('T')
+
 
 class IdentityOp(torch.nn.Module):
     """
     This is a placeholder for IdentityOp(x) -> x
     """
 
-    def __init__(self, *args, **kwargs):
+    def __init__(self, *args: object, **kwargs: object):
         super().__init__()
 
-    def forward(self, x, *args, **kwargs):
+    def forward(self, x: T, *args: object, **kwargs: object) -> T:
+        """Forward pass.
+
+        Returns x unchanged.
+        """
         return x
 
 
@@ -21,8 +29,12 @@ class IdentityFuncOp(IdentityOp):
     return a function at runtime based on passed arguments
     """
 
-    def __init__(self, *args, **kwargs):
+    def __init__(self, *args: object, **kwargs: object):
         super().__init__()
 
-    def forward(self, *args, **kwargs):
+    def forward(self, *args: object, **kwargs: object):
+        """Forward pass.
+
+        Returns a function which returns its first argument unchanged, and discards all others.
+        """
         return super().forward
diff --git a/megatron/core/transformer/mlp.py b/megatron/core/transformer/mlp.py
index 98e30887e7b..2bc3949a421 100644
--- a/megatron/core/transformer/mlp.py
+++ b/megatron/core/transformer/mlp.py
@@ -79,7 +79,7 @@ def __init__(
         submodules: MLPSubmodules,
         is_expert: bool = False,
         input_size: Optional[int] = None,
-        ffn_hidden_size: int = None,
+        ffn_hidden_size: Optional[int] = None,
         tp_group: Optional[torch.distributed.ProcessGroup] = None,
     ):
         super().__init__(config=config)
@@ -102,8 +102,13 @@ def __init__(
 
         # If this is a gated linear unit we double the output width
         # see https://arxiv.org/pdf/2002.05202.pdf
+        # For GLU/SwiGLU, use stride=2 because each TP rank stores interleaved [gate, up] portions.
+        # This is critical for correct weight resharding across different TP sizes.
         if self.config.gated_linear_unit:
             ffn_hidden_size *= 2
+            fc1_stride = 2
+        else:
+            fc1_stride = 1
 
         # Use moe_latent_size only for routed experts. 'is_expert' is false for
         # shared_experts.
@@ -121,6 +126,7 @@ def __init__(
             is_expert=is_expert,
             tp_comm_buffer_name="fc1",
             tp_group=tp_group,
+            stride=fc1_stride,
         )
 
         if self.config.use_te_activation_func and not (submodules.activation_func is None):
@@ -227,6 +233,7 @@ def glu(x):
 
         # [s, b, h]
         nvtx_range_push(suffix="linear_fc2")
+
         output, output_bias = self.linear_fc2(intermediate_parallel)
         nvtx_range_pop(suffix="linear_fc2")
 
diff --git a/megatron/core/transformer/module.py b/megatron/core/transformer/module.py
index d68f34ffd0b..fc849da85c8 100644
--- a/megatron/core/transformer/module.py
+++ b/megatron/core/transformer/module.py
@@ -1,6 +1,7 @@
 # Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
 
 """Megatron Module."""
+from functools import partial
 from typing import Optional, Tuple
 
 import torch
@@ -9,7 +10,6 @@
 
 from megatron.core import parallel_state
 from megatron.core.dist_checkpointing.mapping import ShardedStateDict
-from megatron.core.transformer.enums import CudaGraphScope
 from megatron.core.transformer.transformer_config import TransformerConfig
 from megatron.core.transformer.utils import (
     ensure_metadata_has_dp_cp_group,
@@ -58,7 +58,7 @@ def state_dict_for_save_checkpoint(self, prefix: str = '', keep_vars: bool = Fal
     def sharded_state_dict(
         self,
         prefix: str = '',
-        sharded_offsets: Tuple[Tuple[int, int, int]] = (),
+        sharded_offsets: Tuple[Tuple[int, int, int], ...] = (),
         metadata: Optional[dict] = None,
     ) -> ShardedStateDict:
         """Default implementation for sharded state dict for distributed checkpointing.
@@ -168,10 +168,7 @@ def __init__(self, config: TransformerConfig, vp_stage: Optional[int] = None):
         assert isinstance(config, TransformerConfig), "config must be a TransformerConfig"
 
         # Enable cuda graphs.
-        if (
-            config.cuda_graph_impl == "local"
-            and CudaGraphScope.full_iteration not in config.cuda_graph_scope
-        ):
+        if config.cuda_graph_impl == "local":
             from megatron.core.transformer.cuda_graphs import CudaGraphManager
 
             self.cudagraph_manager = CudaGraphManager(config, vp_stage=vp_stage)
@@ -188,6 +185,39 @@ def __init__(self, config: TransformerConfig, vp_stage: Optional[int] = None):
             # triggered before CUDA Graph running. This is required to ensure the correct param
             # all-gather overlap with forward compute.
             self.cuda_graph_manual_hooks = []
+            # _CudaGraphBackwardDWWrapper object used to manage the wgrad backward computation.
+            # The `backward_dw` func api is the same as `TransformerLayerNode.backward_dw` and
+            # calls wgrad computation in attention module (contains attn and shared expert)
+            # according to CUDA graph scope.
+            self.cuda_graph_backward_dw_wrapper = None
+
+    def init_backward_dw_wrapper(self):
+        """Initialize the backward_dw_wrapper."""
+        from megatron.core.models.gpt.fine_grained_callables import _BackwardDWWrapper
+
+        config = getattr(self, 'config', None)
+        assert config is not None, (
+            "TransformerLayer must be initialized before calling " "`init_backward_dw_wrapper`."
+        )
+        self.backward_dw_wrapper = _BackwardDWWrapper(self)
+
+    def set_te_cuda_graph_backward_dw_wrapper(self):
+        """Replace the backward_dw callable with dw cuda graph."""
+        assert (
+            self.backward_dw_wrapper is not None
+        ), "`backward_dw_wrapper` must be set when cuda graphs are enabled for ep overlap."
+        self.backward_dw_wrapper.set_graphed_backward_dw_callable(
+            partial(self._te_cuda_graph_backward_dw_graph, self.current_microbatch)
+        )
+
+    def _te_cuda_graph_backward_dw_graph(self, microbatch_idx):
+        """
+        CUDA Graph backward weight gradient computation for current layer.
+        """
+        cg_index = microbatch_idx % len(self.cuda_graphs)
+        if not hasattr(self.cuda_graphs[cg_index], 'backward_dw'):
+            return
+        self.cuda_graphs[cg_index].backward_dw()
 
     def get_layer_static_inputs(self, seq_length, micro_batch_size):
         """
diff --git a/megatron/core/transformer/moe/README.md b/megatron/core/transformer/moe/README.md
index 71dfa17fda0..154c3e56a29 100644
--- a/megatron/core/transformer/moe/README.md
+++ b/megatron/core/transformer/moe/README.md
@@ -718,6 +718,26 @@ We welcome contributions! Please see [CONTRIBUTING.md](../../../../CONTRIBUTING.
 - GitHub Issues: [Report bugs or request features](https://github.com/NVIDIA/Megatron-LM/issues)
 - Documentation: [Full documentation](https://docs.nvidia.com/megatron-core/developer-guide/latest/index.html)
 
+## Tuning Guide of Parallel Mappings
+For a specific model, the best parallel mapping varies based on the model architecture, trained sequence length, and the hardware platform.
+Here we provide some general rules to get better performance:
+1. Keep the model parallelism size as small as possible.
+    - For large language models, model parallelism is often required to prevent OOM, but it adds communication overhead.
+    - With distributed optimizer, master weights and optimizer states are sharded across DP ranks with slight communication overhead.
+    - Reduce model parallelism size and increase data parallelism size when there is available GPU memory.
+2. Ensure the EPxTP communication stays within the NVLink domain.
+    - Communications of EP and TP should remain within the NVLink domain as much as possible, as both are communication-intensive.
+    - If the model is too large and requires scaling across multiple nodes, consider PP before TP and EP. See item 3 for details.
+3. Use Pipeline Parallelism to scale the model further.
+    - Enable Virtual Pipeline Parallelism (VPP) to reduce PP bubbles when PP size >= 2 by setting `num_layers_per_virtual_pipeline_stage`.
+    - VPP size tuning: the legal values of vpp_size are all common divisors of num_layers/pp_size. For example, num_layers=24 and pp_size=4 gives vpp_size in {1, 2, 3, 6}.
+4. Prefer EP over TP for the expert layer when possible:
+    - TP saves more memory than EP, but EP can achieve better GEMM efficiency and less communication overhead than TP.
+    - If EP size increases to the number of experts, local token permutation/un-permutation for expert computation is omitted.
+    - In practice, EP8TP1 is better than EP4TP2 for 8x7B.
+5. Enable Context Parallelism for long-context training.
+    - The efficiency of CP largely depends on whether its communication can be overlapped with computation.
+    - Empirically, use CP when sequence length >= 8K.
 
 ## Citation
 
diff --git a/megatron/core/transformer/moe/experts.py b/megatron/core/transformer/moe/experts.py
index 615e12e09d6..d8e75342226 100644
--- a/megatron/core/transformer/moe/experts.py
+++ b/megatron/core/transformer/moe/experts.py
@@ -48,7 +48,6 @@
     make_sharded_object_for_checkpoint,
     sharded_state_dict_default,
 )
-from megatron.core.utils import deprecated, internal_api
 
 try:
     import transformer_engine as te  # pylint: disable=unused-import
@@ -64,51 +63,6 @@
 logger = logging.getLogger(__name__)
 
 
-@deprecated(
-    version="0.16",
-    removal_version="0.17",
-    alternative=None,
-    reason="pg_collection is being passed to sub-module",
-)
-def expert_dist_ckpt_decorator(func):
-    """Decorator of shared_state_dict in expert layer for distributed checkpoint.
-    Since !1940, the TP size for Expert layer can be different with Attention.
-    To make distributed checkpoint work in such cases, we use a decorator to
-    replace the default TP parallel states with expert-TP parallel states.
-    """
-
-    logger.warning("expert_dist_ckpt_decorator is deprecated and will be removed in version 0.17.")
-
-    @wraps(func)
-    def wrapper(*args, **kwargs):
-        # Store original states
-        original_rank = parallel_state._MPU_TENSOR_MODEL_PARALLEL_RANK
-        original_size = parallel_state._MPU_TENSOR_MODEL_PARALLEL_WORLD_SIZE
-        original_group = parallel_state._TENSOR_MODEL_PARALLEL_GROUP
-        try:
-            # Set new states
-            parallel_state._MPU_TENSOR_MODEL_PARALLEL_RANK = (
-                parallel_state.get_expert_tensor_parallel_rank()
-            )
-            parallel_state._MPU_TENSOR_MODEL_PARALLEL_WORLD_SIZE = (
-                parallel_state.get_expert_tensor_parallel_world_size()
-            )
-            parallel_state._TENSOR_MODEL_PARALLEL_GROUP = (
-                parallel_state.get_expert_tensor_parallel_group()
-            )
-
-            # Execute the function
-            result = func(*args, **kwargs)
-        finally:
-            # Restore original states
-            parallel_state._MPU_TENSOR_MODEL_PARALLEL_RANK = original_rank
-            parallel_state._MPU_TENSOR_MODEL_PARALLEL_WORLD_SIZE = original_size
-            parallel_state._TENSOR_MODEL_PARALLEL_GROUP = original_group
-        return result
-
-    return wrapper
-
-
 class GroupedMLP(MegatronModule):
     """An efficient implementation of the Experts layer using GroupedGEMM.
 
@@ -116,7 +70,6 @@ class GroupedMLP(MegatronModule):
     """
 
     # TODO(M4): breaking api, switched from pass in tp_group to pass in pg_collection.
-    @internal_api
     def __init__(
         self,
         num_local_experts: int,
@@ -286,7 +239,7 @@ def forward(
         permuted_probs: torch.Tensor,
     ):
         """Forward step of the GroupedMLP."""
-        assert self.config.bf16, "Currently GroupedGEMM for MoE only supports bf16."
+        assert self.config.bf16, "Currently GroupedMLP for MoE only supports bf16."
         if self.activation_recompute:
             self.activation_checkpoint = tensor_parallel.CheckpointWithoutOutput()
 
@@ -580,7 +533,6 @@ class TEGroupedMLP(MegatronModule):
     """
 
     # TODO(M4): breaking api, switched from pass in tp_group to pass in pg_collection.
-    @internal_api
     def __init__(
         self,
         num_local_experts,
@@ -820,12 +772,12 @@ def glu(x):
             output = off_interface.group_commit(
                 output, name="moe_act", forced_released_tensors=[fc1_output]
             )
+        output = self._apply_bias(output, output_bias, tokens_per_expert, permuted_probs)
 
         # upad and concat the output
         if self.config.fp8 or self.config.fp4:
             output = self.quantization_unpadding(output, actual_tokens_per_expert)
 
-        output = self._apply_bias(output, output_bias, tokens_per_expert, permuted_probs)
         output_bias = None
 
         return output, output_bias
@@ -889,7 +841,6 @@ class SequentialMLP(MegatronModule):
     """
 
     # TODO(M4): breaking api, switched from pass in tp_group to pass in pg_collection.
-    @internal_api
     def __init__(
         self,
         num_local_experts,
diff --git a/megatron/core/transformer/moe/fused_a2a.py b/megatron/core/transformer/moe/fused_a2a.py
index aa13b9b5b5b..39f50a4a670 100644
--- a/megatron/core/transformer/moe/fused_a2a.py
+++ b/megatron/core/transformer/moe/fused_a2a.py
@@ -329,7 +329,6 @@ def reset_hybrid_ep_buffer():
     _hybrid_ep_buffer = None
 
 
-@internal_api
 class HybridEPDispatch(torch.autograd.Function):
     '''
     Fused dispatch operation for permute + dispatch a2a + permute using the HybridEP backend
diff --git a/megatron/core/transformer/moe/moe_layer.py b/megatron/core/transformer/moe/moe_layer.py
index e17cebcf1f9..2b88616c027 100644
--- a/megatron/core/transformer/moe/moe_layer.py
+++ b/megatron/core/transformer/moe/moe_layer.py
@@ -1,8 +1,10 @@
 # Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
+from __future__ import annotations
+
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
-from typing import Optional, Union
+from typing import Optional, Protocol, Union
 
 import torch
 
@@ -24,6 +26,7 @@
 )
 from megatron.core.transformer.spec_utils import ModuleSpec, build_module
 from megatron.core.transformer.transformer_config import TransformerConfig
+from megatron.core.typed_torch import apply_module
 from megatron.core.utils import internal_api
 
 try:
@@ -36,12 +39,40 @@
     HAVE_TE = False
 
 
+class RouterInterface(Protocol):
+    """Interface for the router used in an MoELayer."""
+
+    def forward(self, input: torch.Tensor, /) -> tuple[torch.Tensor, torch.Tensor]:
+        """Forward pass of the router.
+
+        Returns:
+            A tuple of (probabilities, routing_map).
+        """
+        ...
+
+    def set_layer_number(self, layer_number: int) -> None:
+        """Set the layer number for the router.
+
+        Called from transformer_layer during initialization.
+        """
+        ...
+
+
+class RouterBuilder(Protocol):
+    """Protocol for building a Router."""
+
+    def __call__(
+        self, /, *, config: TransformerConfig, pg_collection: ProcessGroupCollection | None
+    ) -> RouterInterface: ...
+
+
 @dataclass
 class MoESubmodules:
     """MoE Layer Submodule spec"""
 
     experts: Union[ModuleSpec, type] = None
     shared_experts: Union[ModuleSpec, type] = None
+    router: RouterBuilder = TopKRouter
 
 
 class BaseMoELayer(MegatronModule, ABC):
@@ -78,7 +109,7 @@ def __init__(
             local_expert_indices_offset + i for i in range(self.num_local_experts)
         ]
         assert all(map(lambda x: x < self.config.num_moe_experts, self.local_expert_indices))
-        self.router: TopKRouter = None
+        self.router: RouterInterface = None
         self.experts = None
         self.shared_experts = None
         self.token_dispatcher: Optional[MoETokenDispatcher] = None
@@ -129,7 +160,8 @@ def __init__(
         self.tp_group = pg_collection.tp
 
         # Initialize router.
-        self.router = TopKRouter(config=self.config, pg_collection=pg_collection)
+        self.router = submodules.router(config=self.config, pg_collection=pg_collection)
+        self.tp_group = pg_collection.tp
 
         # Initialize latent projections.
         if self.config.moe_latent_size:
@@ -213,7 +245,7 @@ def route(self, hidden_states: torch.Tensor, padding_mask: Optional[torch.Tensor
         This method uses the router to determine which experts to send each token to,
         producing routing probabilities and a mapping.
         """
-        probs, routing_map = self.router(hidden_states, padding_mask=padding_mask)
+        probs, routing_map = apply_module(self.router)(hidden_states, padding_mask=padding_mask)
         return probs, routing_map
 
     @maybe_skip_or_early_return_by_cudagraph("preprocess")
@@ -307,6 +339,13 @@ def combine(self, output: torch.Tensor, shared_expert_output: Optional[torch.Ten
             output = output + shared_expert_output
         return output
 
+    def router_and_preprocess(self, hidden_states: torch.Tensor):
+        """This method is a combined method of route and preprocess. Deprecated."""
+
+        probs, routing_map = self.route(hidden_states)
+        hidden_states, probs, residual = self.preprocess(hidden_states, probs, routing_map)
+        return hidden_states, probs, residual
+
     def forward(self, hidden_states: torch.Tensor, padding_mask: Optional[torch.Tensor] = None):
         """Forward pass for the MoE layer.
 
diff --git a/megatron/core/transformer/moe/moe_utils.py b/megatron/core/transformer/moe/moe_utils.py
index e5e06f05758..4ad65963674 100644
--- a/megatron/core/transformer/moe/moe_utils.py
+++ b/megatron/core/transformer/moe/moe_utils.py
@@ -218,7 +218,7 @@ def get_tokens_per_expert_and_token_count(
 class MoEAuxLossAutoScaler(torch.autograd.Function):
     """An AutoScaler that triggers the backward pass and scales the grad for auxiliary loss."""
 
-    main_loss_backward_scale: torch.Tensor = None
+    main_loss_backward_scale: Optional[torch.Tensor] = None
 
     @staticmethod
     def forward(ctx, output: torch.Tensor, aux_loss: torch.Tensor):
@@ -359,8 +359,8 @@ def unpermute(
     permuted_tokens: torch.Tensor,
     sorted_indices: torch.Tensor,
     restore_shape: torch.Size,
-    probs: torch.Tensor = None,
-    routing_map: torch.Tensor = None,
+    probs: Optional[torch.Tensor] = None,
+    routing_map: Optional[torch.Tensor] = None,
     fused: bool = False,
     drop_and_pad: bool = False,
 ):
@@ -801,8 +801,8 @@ def save_to_aux_losses_tracker(
     loss: torch.Tensor,
     layer_number: int,
     num_layers: int,
-    reduce_group: torch.distributed.ProcessGroup = None,
-    avg_group: torch.distributed.ProcessGroup = None,
+    reduce_group: Optional[torch.distributed.ProcessGroup] = None,
+    avg_group: Optional[torch.distributed.ProcessGroup] = None,
     reduce_group_has_dp: bool = False,
 ):
     """Save the auxiliary loss for logging.
@@ -868,9 +868,7 @@ def reduce_aux_losses_tracker_across_ranks(
             # does not have 'dp' attribute, do it manually.
             if not tracker[name].get('reduce_group_has_dp', False):
                 torch.distributed.all_reduce(
-                    values,
-                    group=parallel_state.get_data_parallel_group(with_context_parallel=False),
-                    op=torch.distributed.ReduceOp.AVG,
+                    values, group=dp_group, op=torch.distributed.ReduceOp.AVG
                 )
         if tracker[name].get('avg_group') is not None:
             torch.distributed.all_reduce(
@@ -910,7 +908,6 @@ def track_moe_metrics(
                     tracker[key]["reduce_group"] = None
                     tracker[key]["avg_group"] = None
                     tracker[key]["reduce_group_has_dp"] = False
-
     reduce_aux_losses_tracker_across_ranks(track_names, pg_collection=pg_collection)
 
     # Get number of MoE layers
diff --git a/megatron/core/transformer/moe/router.py b/megatron/core/transformer/moe/router.py
index 01238e425d9..8c1b6637f88 100644
--- a/megatron/core/transformer/moe/router.py
+++ b/megatron/core/transformer/moe/router.py
@@ -484,7 +484,6 @@ def apply_z_loss(self, logits, padding_mask: Optional[torch.Tensor] = None):
             # Skip Z loss calculations when using torch.no_grad() or checkpointing.
             moe_z_loss_coeff = self.config.moe_z_loss_coeff / self.tp_cp_group.size()
             z_loss = z_loss_func(logits, moe_z_loss_coeff, padding_mask=padding_mask)
-            scale_up = 1.0
             if self.calculate_per_token_loss:
                 # The expected final scaling for z_loss gradients is
                 # 1/(num_micro_batches * dp_size).
diff --git a/megatron/core/transformer/moe/shared_experts.py b/megatron/core/transformer/moe/shared_experts.py
index 3cb34a36f26..35066b1a8b0 100644
--- a/megatron/core/transformer/moe/shared_experts.py
+++ b/megatron/core/transformer/moe/shared_experts.py
@@ -122,7 +122,7 @@ def __init__(
             if self.stream is None:
                 self.stream = torch.cuda.Stream()
 
-    def forward(self, hidden_states):
+    def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
         """Forward function"""
         output, _ = super().forward(hidden_states)
         if self.use_shared_expert_gate:
diff --git a/megatron/core/transformer/moe/token_dispatcher.py b/megatron/core/transformer/moe/token_dispatcher.py
index d0da38d6322..1921038105a 100644
--- a/megatron/core/transformer/moe/token_dispatcher.py
+++ b/megatron/core/transformer/moe/token_dispatcher.py
@@ -437,13 +437,12 @@ def __init__(
             "before_finish": 3,
             "no_sync": 4,
         }
+        self.cuda_dtoh_point = "before_permutation_1"
         if (
             config.cuda_graph_impl == "transformer_engine"
             and CudaGraphScope.moe_preprocess in config.cuda_graph_scope
         ):
             self.cuda_dtoh_point = "before_ep_alltoall"
-        else:
-            self.cuda_dtoh_point = "before_permutation_1"
         if MoEAlltoAllTokenDispatcher.cuda_dtoh_stream is None:
             MoEAlltoAllTokenDispatcher.cuda_dtoh_stream = torch.cuda.Stream()
 
@@ -863,7 +862,7 @@ def _maybe_update_cuda_sync_point(self, point: str):
             self.cuda_sync_point = point
 
     def _maybe_dtoh_and_synchronize(
-        self, point: str, tokens_per_expert: torch.Tensor = None
+        self, point: str, tokens_per_expert: Optional[torch.Tensor] = None
     ) -> torch.Tensor:
         """
         Move all possible GPU tensors to CPU and make a synchronization at the expected point.
@@ -1433,7 +1432,7 @@ def dispatch_preprocess(
     def token_dispatch(
         self,
         hidden_states: torch.Tensor,
-        probs: torch.Tensor = None,
+        probs: Optional[torch.Tensor] = None,
         async_finish: bool = True,
         allocate_on_comm_stream: bool = True,
     ):
diff --git a/megatron/core/transformer/multi_latent_attention.py b/megatron/core/transformer/multi_latent_attention.py
index 9689056e325..cd3db50a35b 100644
--- a/megatron/core/transformer/multi_latent_attention.py
+++ b/megatron/core/transformer/multi_latent_attention.py
@@ -90,12 +90,12 @@ class MultiLatentAttention(Attention):
     def __init__(
         self,
         config: MLATransformerConfig,
-        submodules: Union[MLASelfAttentionSubmodules],
+        submodules: MLASelfAttentionSubmodules,
         layer_number: int,
         attn_mask_type: AttnMaskType,
         attention_type: str,
         cp_comm_type: Optional[str] = None,
-        pg_collection: ProcessGroupCollection = None,
+        pg_collection: Optional[ProcessGroupCollection] = None,
     ) -> None:
 
         super().__init__(
@@ -106,6 +106,7 @@ def __init__(
             attn_mask_type=attn_mask_type,
             pg_collection=pg_collection,
         )
+        self.config: MLATransformerConfig
 
         self.query_projection_size = self.config.v_head_dim * self.config.num_attention_heads
 
@@ -243,28 +244,13 @@ def forward(
         # self or cross attn.
         # query: [96, 1, 16, 128], key:[96, 1, 16, 128], value:[96, 1, 16, 128]
         with off_interface(self.offload_qkv_linear, hidden_states, "qkv_linear") as hidden_states:
-            if self.config.experimental_attention_variant is None:
-                query, key, value = self.get_query_key_value_tensors(
-                    hidden_states,
-                    key_value_states,
-                    position_ids,
-                    packed_seq_params,
-                    inference_context=inference_context,
-                )
-            elif self.config.experimental_attention_variant == "dsa":
-                query, key, value, q_compressed, _ = self.get_query_key_value_tensors(
-                    hidden_states,
-                    key_value_states,
-                    position_ids,
-                    packed_seq_params,
-                    inference_context=inference_context,
-                    return_compressed_tensors=True,
-                )
-            else:
-                raise ValueError(
-                    f"Unsupported experimental attention variant: "
-                    f"{self.config.experimental_attention_variant}"
-                )
+            query, key, value, q_compressed, kv_compressed = self.get_query_key_value_tensors(
+                hidden_states,
+                key_value_states,
+                position_ids,
+                packed_seq_params,
+                inference_context=inference_context,
+            )
         if self.offload_qkv_linear:
             query = off_interface.group_commit(
                 query, name="qkv_linear", forced_released_tensors=[hidden_states]
@@ -296,37 +282,24 @@ def forward(
             )
         else:
             if inference_context is None or inference_context.is_static_batching():
+                extra_kwargs = {}
+                if self.config.experimental_attention_variant == "dsa":
+                    # For dsa we need to pass in the original hidden states and the compressed
+                    # query representation.
+                    extra_kwargs["x"] = hidden_states
+                    extra_kwargs["qr"] = q_compressed
                 with off_interface(
                     self.offload_core_attention and self.training, query, "core_attn"
                 ) as query:
-                    if self.config.experimental_attention_variant is None:
-                        core_attn_out = self.core_attention(
-                            query,
-                            key,
-                            value,
-                            attention_mask,
-                            packed_seq_params=packed_seq_params,
-                            attn_mask_type=attn_mask_type,
-                        )
-                    elif self.config.experimental_attention_variant == "dsa":
-                        # For dsa we need to pass in the original hidden states and the compressed
-                        # query representation.
-                        core_attn_out = self.core_attention(
-                            query,
-                            key,
-                            value,
-                            x=hidden_states,
-                            qr=q_compressed,
-                            attention_mask=attention_mask,
-                            attn_mask_type=attn_mask_type,
-                            attention_bias=None,
-                            packed_seq_params=packed_seq_params,
-                        )
-                    else:
-                        raise ValueError(
-                            f"Unsupported attention variant: "
-                            f"{self.config.experimental_attention_variant}"
-                        )
+                    core_attn_out = self.core_attention(
+                        query,
+                        key,
+                        value,
+                        attention_mask,
+                        packed_seq_params=packed_seq_params,
+                        attn_mask_type=attn_mask_type,
+                        **extra_kwargs,
+                    )
             elif self.cache_mla_latents:
                 # Dynamic batching attention kernel.
                 q, k, v = (query, key, value)
@@ -400,7 +373,7 @@ def __init__(
         layer_number: int,
         attn_mask_type=AttnMaskType.padding,
         cp_comm_type: Optional[str] = None,
-        pg_collection: ProcessGroupCollection = None,
+        pg_collection: Optional[ProcessGroupCollection] = None,
     ):
         if pg_collection is None:
             pg_collection = ProcessGroupCollection.use_mpu_process_groups()
@@ -545,7 +518,6 @@ def get_query_key_value_tensors(
         inference_context=None,
         *,
         inference_params=None,
-        return_compressed_tensors=False,
     ):
         """
         Derives `query`, `key` and `value` tensors from `hidden_states`.
@@ -576,13 +548,11 @@ def get_query_key_value_tensors(
         rotary_pos_sin = None
         packed_seq = packed_seq_params is not None and packed_seq_params.qkv_format == 'thd'
         if self.config.rope_type == "rope":
-            rotary_pos_emb = self.rotary_pos_emb(
-                rotary_seq_len, packed_seq_params=packed_seq_params
-            )
+            rotary_pos_emb = self.rotary_pos_emb(rotary_seq_len, packed_seq=packed_seq)
         else:
             if self.config.apply_rope_fusion:
                 rotary_pos_cos, rotary_pos_sin = self.rotary_pos_emb.get_cached_cos_sin(
-                    rotary_seq_len, dtype=hidden_states.dtype, packed_seq_params=packed_seq_params
+                    rotary_seq_len, dtype=hidden_states.dtype, packed_seq=packed_seq
                 )
                 rotary_pos_emb = None
                 assert inference_context is None, "Inference with MLA RoPE fusion is not supported"
@@ -591,9 +561,7 @@ def get_query_key_value_tensors(
                     and fused_apply_mla_rope_for_kv is not None
                 ), "Fused MLA RoPE apply is not imported successfully"
             else:
-                rotary_pos_emb, mscale = self.rotary_pos_emb(
-                    rotary_seq_len, packed_seq_params=packed_seq_params
-                )
+                rotary_pos_emb, mscale = self.rotary_pos_emb(rotary_seq_len, packed_seq=packed_seq)
 
         if packed_seq_params is not None and packed_seq_params.qkv_format == 'thd':
             if packed_seq_params.cu_seqlens_q_padded is not None:
@@ -886,10 +854,7 @@ def qkv_up_proj_and_rope_apply(q_compressed, kv_compressed, k_pos_emb, rotary_po
                     q_compressed, kv_compressed, k_pos_emb, rotary_pos_emb
                 )
 
-        if return_compressed_tensors:
-            return query, key, value, q_compressed, kv_compressed
-        else:
-            return query, key, value
+        return query, key, value, q_compressed, kv_compressed
 
     def uncompress_kv_from_cache(self, kv_cached):
         """
diff --git a/megatron/core/transformer/multi_token_prediction.py b/megatron/core/transformer/multi_token_prediction.py
index 8d5c479aa59..b0476155ad9 100755
--- a/megatron/core/transformer/multi_token_prediction.py
+++ b/megatron/core/transformer/multi_token_prediction.py
@@ -1,5 +1,6 @@
 # Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
+import warnings
 from contextlib import nullcontext
 from dataclasses import dataclass
 from typing import Callable, List, Optional, Union
@@ -57,8 +58,8 @@ def tie_word_embeddings_state_dict(
     sharded_state_dict: ShardedStateDict,
     word_emb_weight: Tensor,
     word_emb_weight_key: str,
-    tp_group: torch.distributed.ProcessGroup = None,
-    dp_cp_group: torch.distributed.ProcessGroup = None,
+    tp_group: torch.distributed.ProcessGroup,
+    dp_cp_group: torch.distributed.ProcessGroup,
 ) -> None:
     """tie the embedding of the mtp processing stage in a given sharded state dict.
 
@@ -92,8 +93,8 @@ def tie_output_layer_state_dict(
     sharded_state_dict: ShardedStateDict,
     output_layer_weight: Tensor,
     output_layer_weight_key: str,
-    tp_group: torch.distributed.ProcessGroup = None,
-    dp_cp_group: torch.distributed.ProcessGroup = None,
+    tp_group: torch.distributed.ProcessGroup,
+    dp_cp_group: torch.distributed.ProcessGroup,
 ) -> None:
     """tie the output layer of the mtp processing stage in a given sharded state dict.
 
@@ -316,8 +317,8 @@ def save_loss_to_tracker(
         loss: torch.Tensor,
         layer_number: int,
         num_layers: int,
-        reduce_group: torch.distributed.ProcessGroup = None,
-        avg_group: torch.distributed.ProcessGroup = None,
+        reduce_group: Optional[torch.distributed.ProcessGroup] = None,
+        avg_group: Optional[torch.distributed.ProcessGroup] = None,
     ):
         """Save the mtp loss for logging.
         Args:
@@ -505,9 +506,6 @@ def get_mtp_ranks(pp_ranks: List[int], config: TransformerConfig) -> List[int]:
 
 def get_mtp_layer_offset(config: TransformerConfig, vp_stage: Optional[int] = None) -> int:
     """Get the offset of the MTP layer."""
-    # TODO(shifangx): Currently, we only support put all of MTP layers
-    # on the last pipeline stage, so the offset is always 0.
-    # We will support more flexible MTP placement in the future.
     if config.pipeline_model_parallel_size > 1:
         if config.pipeline_model_parallel_layout:
             offset = config.pipeline_model_parallel_layout.get_layer_offset(
@@ -866,15 +864,15 @@ def forward(
         position_ids: Tensor,
         hidden_states: Tensor,
         attention_mask: Tensor,
-        context: Tensor = None,
-        context_mask: Tensor = None,
-        rotary_pos_emb: Tensor = None,
-        rotary_pos_cos: Tensor = None,
-        rotary_pos_sin: Tensor = None,
-        attention_bias: Tensor = None,
-        inference_params: InferenceParams = None,
-        packed_seq_params: PackedSeqParams = None,
-        sequence_len_offset: Tensor = None,
+        context: Optional[Tensor] = None,
+        context_mask: Optional[Tensor] = None,
+        rotary_pos_emb: Optional[Tensor] = None,
+        rotary_pos_cos: Optional[Tensor] = None,
+        rotary_pos_sin: Optional[Tensor] = None,
+        attention_bias: Optional[Tensor] = None,
+        inference_params: Optional[InferenceParams] = None,
+        packed_seq_params: Optional[PackedSeqParams] = None,
+        sequence_len_offset: Optional[Tensor] = None,
         embedding=None,
     ):
         """
@@ -977,7 +975,7 @@ class MultiTokenPredictionBlockSubmodules:
             projection matrix, transformer block, shared output head).
     """
 
-    layer_specs: List[ModuleSpec] = None
+    layer_specs: Optional[List[ModuleSpec]] = None
 
 
 def _get_mtp_block_submodules(
@@ -1033,7 +1031,7 @@ def __init__(
         config: TransformerConfig,
         spec: Union[TransformerBlockSubmodules, ModuleSpec],
         vp_stage: Optional[int] = None,
-        pg_collection: ProcessGroupCollection = None,
+        pg_collection: Optional[ProcessGroupCollection] = None,
     ):
         super().__init__(config=config)
         self.submodules = _get_mtp_block_submodules(config, spec)
@@ -1082,16 +1080,16 @@ def forward(
         position_ids: Tensor,
         hidden_states: Tensor,
         attention_mask: Tensor,
-        context: Tensor = None,
-        context_mask: Tensor = None,
-        rotary_pos_emb: Tensor = None,
-        rotary_pos_cos: Tensor = None,
-        rotary_pos_sin: Tensor = None,
-        attention_bias: Tensor = None,
-        inference_params: InferenceParams = None,
-        packed_seq_params: PackedSeqParams = None,
-        sequence_len_offset: Tensor = None,
-        extra_block_kwargs: dict = None,
+        context: Optional[Tensor] = None,
+        context_mask: Optional[Tensor] = None,
+        rotary_pos_emb: Optional[Tensor] = None,
+        rotary_pos_cos: Optional[Tensor] = None,
+        rotary_pos_sin: Optional[Tensor] = None,
+        attention_bias: Optional[Tensor] = None,
+        inference_params: Optional[InferenceParams] = None,
+        packed_seq_params: Optional[PackedSeqParams] = None,
+        sequence_len_offset: Optional[Tensor] = None,
+        extra_block_kwargs: Optional[dict] = None,
         embedding=None,
     ) -> Tensor:
         """
diff --git a/megatron/core/transformer/pipeline_parallel_layer_layout.py b/megatron/core/transformer/pipeline_parallel_layer_layout.py
index 3ff2d6d4464..7a8195e1bee 100644
--- a/megatron/core/transformer/pipeline_parallel_layer_layout.py
+++ b/megatron/core/transformer/pipeline_parallel_layer_layout.py
@@ -130,7 +130,7 @@ def validate_layer_layout(self, num_layers: int, mtp_num_layers: int):
                 ), "All of the MTP layers must be in the same one virtual pipeline stage"
         for vpp_rank in range(self.virtual_pipeline_model_parallel_size - 1):
             assert LayerType.mtp not in self.layout[0][vpp_rank], (
-                f"Corrently we restrict that the MTP should not be in the first pp rank."
+                f"Currently we restrict that the MTP should not be in the first pp rank."
                 f"But got {self.layout[0]} for the first pp rank."
             )
         ## Detect MTP standalone usage.
diff --git a/megatron/core/transformer/spec_utils.py b/megatron/core/transformer/spec_utils.py
index dbd2e08bccb..5639737d6c8 100644
--- a/megatron/core/transformer/spec_utils.py
+++ b/megatron/core/transformer/spec_utils.py
@@ -3,7 +3,9 @@
 import logging
 import types
 from dataclasses import dataclass, field
-from typing import Tuple, Union
+from typing import Any, Tuple, Union
+
+logger = logging.getLogger(__name__)
 
 logger = logging.getLogger(__name__)
 
@@ -27,9 +29,18 @@ class ModuleSpec:
 
     module: Union[Tuple, type]
     params: dict = field(default_factory=lambda: {})
-    submodules: type = None
+    submodules: object = None
     metainfo: dict = field(default_factory=lambda: {})
 
+    def __call__(self, *args: Any, **kwargs: Any) -> Any:
+        """Builds an instance of the module from the spec.
+
+        Args:
+            *args: Positional arguments to be passed to the module init.
+            **kwargs: Keyword arguments to be passed to the module init.
+        """
+        return build_module(self, *args, **kwargs)
+
 
 def import_module(module_path: Tuple[str]):
     """Import a named object from a module in the context of this function.
@@ -48,9 +59,7 @@ def import_module(module_path: Tuple[str]):
 
 # pylint: disable=missing-function-docstring
 def get_module(spec_or_module: Union[ModuleSpec, type], **additional_kwargs):
-    """Retrieve the module class or function specified by a ModuleSpec or
-    return it as is if already provided.
-    """
+    """Returns or imports the provided module."""
     # If a module clas is already provided return it as is
     if isinstance(spec_or_module, (type, types.FunctionType)):
         return spec_or_module
@@ -64,7 +73,13 @@ def get_module(spec_or_module: Union[ModuleSpec, type], **additional_kwargs):
 
 
 def build_module(spec_or_module: Union[ModuleSpec, type], *args, **kwargs):
-    """Build a module from a ModuleSpec or return it as is if already provided."""
+    """Builds an instance of the module from the spec.
+
+    Args:
+        spec_or_module: The module spec or module class to build.
+        *args: Positional arguments to be passed to the module init.
+        **kwargs: Keyword arguments to be passed to the module init.
+    """
     # If the passed `spec_or_module` is
     # a `Function`, then return it as it is
     # NOTE: to support an already initialized module add the following condition
diff --git a/megatron/core/transformer/transformer_block.py b/megatron/core/transformer/transformer_block.py
index b28a66400e0..f222a2c3a6b 100755
--- a/megatron/core/transformer/transformer_block.py
+++ b/megatron/core/transformer/transformer_block.py
@@ -218,7 +218,7 @@ class TransformerBlockSubmodules:
             or instance of the layer normalization to be applied.
     """
 
-    layer_specs: List[ModuleSpec] = None
+    layer_specs: Optional[List[ModuleSpec]] = None
     layer_norm: Optional[Union[ModuleSpec, torch.nn.Module]] = None
 
 
@@ -273,7 +273,7 @@ def __init__(
         post_layer_norm: bool = True,
         pre_process: bool = True,
         post_process: bool = True,
-        pg_collection: ProcessGroupCollection = None,
+        pg_collection: Optional[ProcessGroupCollection] = None,
         vp_stage: Optional[int] = None,
     ):
         super().__init__(config=config)
@@ -384,6 +384,9 @@ def build_layer(layer_spec, layer_number):
         else:
             self.final_layernorm = None  # Either this or nn.Identity
 
+        if self.config.inference_fuse_tp_communication:
+            self._setup_fused_tp_communication()
+
     def has_final_layernorm_in_this_stage(self):
         """
         Check if this vpp stage contains the final layernorm.
@@ -412,6 +415,30 @@ def has_final_layernorm_in_this_stage(self):
                 and self.post_layer_norm
             )
 
+    def _setup_fused_tp_communication(self):
+        """Setup fused TP communication for all layers.
+        We have a fused reduce-scatter + add + layer-norm + all-gather operation.
+        We call this kernel from within row parallel linear layers.
+        But layer-norm needs the layer norm weights from the
+        successive column parallel linear layer.
+        This function is used to pass those weights to the respective layers.
+        """
+
+        for i in range(len(self.layers)):
+            current_layer = self.layers[i]
+
+            # Get next layer's QKV norm weights (None for last layer)
+            if i < len(self.layers) - 1:
+                next_qkv_norm_weights = self.layers[i + 1].get_qkv_layer_norm_weights()
+            else:
+                next_qkv_norm_weights = None
+
+            # Configure all fused TP communication settings in one call
+            current_layer.configure_fused_tp_inference(
+                skip_qkv_norm_and_all_gather=(i > 0),
+                fc2_next_layer_norm_weights=next_qkv_norm_weights,
+            )
+
     def _get_layer(self, layer_number: int):
         return self.layers[layer_number]
 
diff --git a/megatron/core/transformer/transformer_config.py b/megatron/core/transformer/transformer_config.py
index 8f5462ff55b..0c23d0761de 100644
--- a/megatron/core/transformer/transformer_config.py
+++ b/megatron/core/transformer/transformer_config.py
@@ -251,7 +251,7 @@ class TransformerConfig(ModelParallelConfig):
     dsa_indexer_loss_coeff: Optional[float] = None
     """Coefficient for the DSA indexer KL divergence loss. Set to 0 to disable indexer loss."""
 
-    dsa_indexer_use_sparse_loss: Optional[bool] = None
+    dsa_indexer_use_sparse_loss: bool = False
     """Whether to use sparse DSA indexer loss. If True, the indexer loss will be computed using the
     top-k indices."""
 
@@ -261,7 +261,6 @@ class TransformerConfig(ModelParallelConfig):
     linear_attention_type: Optional[str] = None
     """Type of linear attention to use.
     Deprecated. Use experimental_attention_variant instead."""
-
     linear_attention_freq: Optional[Union[int, List[int]]] = None
     """Frequency between LA (linear attention) layers 
     and SDPA (scaled dot-product attention) layers.
@@ -523,7 +522,8 @@ class TransformerConfig(ModelParallelConfig):
     in the hidden_states gradient."""
 
     moe_shared_expert_gate: bool = False
-    """Enable gate for shared expert."""
+    """Enable gate for shared expert. Only effective when
+    moe-shared-expert-intermediate-size is set."""
 
     moe_shared_expert_overlap: bool = False
     """Enable overlapping between shared expert computations and dispatcher communications.
@@ -762,10 +762,12 @@ class TransformerConfig(ModelParallelConfig):
     excluding optimizer) is enabled.
     "transformer_engine": capture the CUDA graph using TE make_graphed_callables()."""
 
-    cuda_graph_scope: Optional[List[CudaGraphScope]] = None
+    cuda_graph_scope: Union[str, CudaGraphScope, List[str], List[CudaGraphScope]] = "full"
     """Determines the CUDA graphs capturing scope.
     When cuda_graph_impl is set to "transformer_engine", valid values are "attn", "mlp", "moe",
-    "moe_router", "moe_preprocess", "mamba". None means the full layer.
+    "moe_router", "moe_preprocess", "mamba". "full" or an empty list means the full layer. "full"
+    is actually deprecated, but for backward compatibility, we still use "full" as the default
+    value. It will be transformed to an empty list in __post_init__.
     When cuda_graph_impl is set to "local", "full_iteration" can be specified as cuda_graph_scope
     to enable whole iteration CUDA graph. All other values enable layerwise CUDA graph."""
 
@@ -810,6 +812,9 @@ class TransformerConfig(ModelParallelConfig):
     use_inference_optimized_layers: bool = False
     """If True, use inference optimized transformer layers during inference."""
 
+    inference_fuse_tp_communication: bool = False
+    """ If true, uses a fused reduce-scatter-residual-norm-allgather kernel during inference. """
+
     mrope_section: Optional[List[int]] = None
     """ Multimodal rope section is for channel dimension of temporal, height and width
     in rope calculation. """
@@ -856,7 +861,6 @@ class TransformerConfig(ModelParallelConfig):
     fallback_to_eager_attn: bool = False
     """Whether to fallback to eager attention in TE implementation.
     Suggested for when desired features are not available in TE implementation."""
-
     #####################################
     # Fine-grained Activation Offloading
     #####################################
@@ -1744,7 +1748,7 @@ def __post_init__(self):
                         ), 'moe cuda graph is only supported with drop-padding MoE.'
                         if self.moe_token_dispatcher_type == 'alltoall' and (
                             self.moe_expert_capacity_factor is not None
-                            or self.moe_router_padding_for_quantization
+                            or self.moe_router_padding_for_fp8
                         ):
                             assert CudaGraphScope.moe_preprocess not in self.cuda_graph_scope, (
                                 'moe_preprocess cuda graph is not supported when there are '
@@ -1972,6 +1976,12 @@ def __post_init__(self):
             assert not self.add_qkv_bias
             assert not self.use_kitchen
 
+        if self.inference_fuse_tp_communication:
+            assert self.transformer_impl == "inference_optimized", (
+                "inference_fuse_tp_communication is only supported "
+                "for inference_optimized transformer implementation."
+            )
+
         if self.batch_invariant_mode:
             assert (
                 self.attention_backend == AttnBackend.flash
diff --git a/megatron/core/transformer/transformer_layer.py b/megatron/core/transformer/transformer_layer.py
index a486b6ed3d5..ed7076ef588 100644
--- a/megatron/core/transformer/transformer_layer.py
+++ b/megatron/core/transformer/transformer_layer.py
@@ -562,6 +562,15 @@ def _forward_attention(
             with off_interface(self.offload_attn_norm, hidden_states, "attn_norm") as hidden_states:
                 input_layernorm_output = self.input_layernorm(hidden_states)
 
+        using_fused_tp_inference_kernel = (not self.training) and (
+            self.config.inference_fuse_tp_communication
+        )
+
+        if using_fused_tp_inference_kernel:
+            # Set the residual for fused reduce-scatter + add + layer-norm + all-gather
+            # operation in attention's out_proj (linear_proj)
+            self._set_proj_residual(residual)
+
         # Self attention.
         nvtx_range_push(suffix="self_attention")
         attention_output_with_bias = self.self_attention(
@@ -588,10 +597,16 @@ def _forward_attention(
         # TODO: could we move `bias_dropout_add_exec_handler` itself
         # inside the module provided in the `bias_dropout_add_spec` module?
         nvtx_range_push(suffix="self_attn_bda")
-        with self.bias_dropout_add_exec_handler():
-            hidden_states = self.self_attn_bda(self.training, self.config.bias_dropout_fusion)(
-                attention_output_with_bias, residual, self.hidden_dropout
-            )
+        if using_fused_tp_inference_kernel:
+            # In inference optimized transformer layer, there is no bias and dropout
+            # The remaining residual add is already handled inside the
+            # self attention module.
+            hidden_states = attention_output_with_bias[0]
+        else:
+            with self.bias_dropout_add_exec_handler():
+                hidden_states = self.self_attn_bda(self.training, self.config.bias_dropout_fusion)(
+                    attention_output_with_bias, residual, self.hidden_dropout
+                )
         nvtx_range_pop(suffix="self_attn_bda")
 
         # Delay the offload of the attention norm until after the self_attn_bda has been computed
@@ -669,6 +684,11 @@ def _forward_mlp(self, hidden_states, inference_context=None, padding_mask=None)
             and inference_context is not None
             and not inference_context.is_decode_only()
             and not isinstance(self.mlp, IdentityOp)
+            and not self.config.transformer_impl == "inference_optimized"
+        )
+
+        using_fused_tp_inference_kernel = (not self.training) and (
+            self.config.inference_fuse_tp_communication
         )
 
         if self.recompute_mlp:
@@ -704,6 +724,10 @@ def _forward_mlp(self, hidden_states, inference_context=None, padding_mask=None)
             bias_output = torch.stack(bias_chunks, dim=0).sum(dim=0) if bias_chunks else None
             mlp_output_with_bias = (mlp_output, bias_output)
         else:
+            if using_fused_tp_inference_kernel:
+                # Set the residual for fused reduce-scatter + add + layer-norm + all-gather
+                # operation in MLP's fc2.
+                self._set_fc2_residual(residual)
             mlp_output_with_bias = self.mlp(pre_mlp_layernorm_output, padding_mask=padding_mask)
 
         if self.recompute_pre_mlp_layernorm:
@@ -748,13 +772,23 @@ def _forward_post_mlp(self, mlp_output_with_bias, residual):
             FineGrainedActivationOffloadingInterface as off_interface,
         )
 
+        using_fused_tp_inference_kernel = (not self.training) and (
+            self.config.inference_fuse_tp_communication
+        )
+
         # TODO: could we move `bias_dropout_add_exec_handler` itself
         # inside the module provided in the `bias_dropout_add_spec` module?
         nvtx_range_push(suffix="mlp_bda")
-        with self.bias_dropout_add_exec_handler():
-            hidden_states = self.mlp_bda(self.training, self.config.bias_dropout_fusion)(
-                mlp_output_with_bias, residual, self.hidden_dropout
-            )
+        if using_fused_tp_inference_kernel:
+            # In inference optimized transformer layer, there is no bias and dropout
+            # The remaining residual add is already handled inside the
+            # MLP module.
+            hidden_states = mlp_output_with_bias[0]
+        else:
+            with self.bias_dropout_add_exec_handler():
+                hidden_states = self.mlp_bda(self.training, self.config.bias_dropout_fusion)(
+                    mlp_output_with_bias, residual, self.hidden_dropout
+                )
         nvtx_range_pop(suffix="mlp_bda")
         # Delay the offload of the mlp norm until after the mlp_bda has been computed
         # because the residual is needed in the mlp_bda.
@@ -798,6 +832,66 @@ def sharded_state_dict(
             apply_prefix_mapping(sharded_state_dict, prefixed_map)
         return sharded_state_dict
 
+    def configure_fused_tp_inference(
+        self,
+        skip_qkv_norm_and_all_gather: bool = False,
+        fc2_next_layer_norm_weights: Optional[Tensor] = None,
+    ):
+        """
+        Configure settings for fused TP communication in inference mode.
+
+        Args:
+            skip_qkv_norm (bool): Whether to skip norm and all-gather for linear_qkv.
+            fc2_next_layer_norm_weights (Optional[Tensor]): Next layer's QKV norm weights
+                for current layer's MLP FC2.
+        """
+        self.self_attention.linear_qkv.skip_norm_and_all_gather = skip_qkv_norm_and_all_gather
+
+        # Use current layer's own MLP FC1 norm weights for attention's/mixer's out_proj
+        mlp_fc1_weights = self.get_mlp_layer_norm_weights()
+        self._set_proj_next_layer_norm_weights(mlp_fc1_weights)
+
+        self.mlp.linear_fc1.skip_norm_and_all_gather = True
+        # Use next layer's attention norm weights for current layer's MLP FC2
+        self._set_fc2_next_layer_norm_weights(fc2_next_layer_norm_weights)
+
+    def _set_proj_next_layer_norm_weights(self, weights: Tensor):
+        """Set next layer norm weights for attention/mixer's linear_proj."""
+        self.self_attention.linear_proj._set_next_layer_norm_weights(weights)
+
+    def _set_fc2_next_layer_norm_weights(self, weights: Optional[Tensor]):
+        """Set next layer norm weights for MLP FC2."""
+        if weights is None:
+            # Create dummy tensor for last layer (same shape as fc1 norm weights)
+            weights = torch.empty_like(self.get_mlp_layer_norm_weights())
+        self.mlp.linear_fc2._set_next_layer_norm_weights(weights)
+
+    def _set_proj_residual(self, residual: Tensor):
+        """Set residual for attention's/mixer's out_proj (linear_proj)."""
+        self.self_attention.linear_proj._set_residual(residual)
+
+    def _set_fc2_residual(self, residual: Tensor):
+        """Set residual for MLP FC2."""
+        self.mlp.linear_fc2._set_residual(residual)
+
+    def get_mlp_layer_norm_weights(self) -> Tensor:
+        """
+        Get the MLP FC1 layer norm weights.
+
+        Returns:
+            Tensor: The layer norm weight data.
+        """
+        return self.mlp.linear_fc1.layer_norm_weight.data
+
+    def get_qkv_layer_norm_weights(self) -> Tensor:
+        """
+        Get the QKV layer norm weights.
+
+        Returns:
+            Tensor: The layer norm weight data.
+        """
+        return self.self_attention.linear_qkv.layer_norm_weight.data
+
     def get_layer_static_inputs(self, seq_length, micro_batch_size):
         """
         Get the static inputs for the transformer layer. Besides the hidden_states that is
@@ -1099,3 +1193,11 @@ def __call__(self, *args, **kwargs):
                     'inference_context'
                 ].is_decode_only()
         return super().__call__(*args, **kwargs)
+
+    def get_layer_norm_weights(self):
+        """
+        Get the weights of all layernorms (attention and MLP) in the transformer layer.
+        Returns:
+            List[Tensor]: A list of layernorm weight tensors.
+        """
+        return
diff --git a/megatron/core/typed_torch.py b/megatron/core/typed_torch.py
new file mode 100644
index 00000000000..bcbf388facc
--- /dev/null
+++ b/megatron/core/typed_torch.py
@@ -0,0 +1,50 @@
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+"""Utilities for improved type hinting with torch interfaces."""
+from __future__ import annotations
+
+from collections.abc import Callable
+from typing import Generic, ParamSpec, Protocol, TypeVar
+
+import torch
+
+P = ParamSpec('P')
+R_co = TypeVar('R_co', covariant=True)
+T = TypeVar('T')
+
+
+class _Module(Generic[P, R_co], Protocol):
+    """Protocol allowing us to unwrap `forward`."""
+
+    def forward(self, *args: P.args, **kwargs: P.kwargs) -> R_co:
+        """Forward method of the matching torch.nn.Module."""
+        ...
+
+
+def apply_module(m: _Module[P, R_co], *, check_subclass: bool = True) -> Callable[P, R_co]:
+    """Returns the provided module unchanged, but with correct type hints.
+
+    Args:
+      m: An instance of a subclass of `torch.nn.Module`.
+      check_subclass: If `True`, checks that `m` is a subclass of
+            `torch.nn.Module` and raises a `TypeError` if not.
+
+    Returns:
+      That module unchanged, but with correct type hints.
+    """
+    if check_subclass and not issubclass(type(m), torch.nn.Module):
+        raise TypeError(f'{type(m)} is not a subclass of torch.nn.Module')
+    return m  # type: ignore
+
+
+def not_none(value: T | None) -> T:
+    """Asserts that the provided value is not None and returns it.
+
+    Args:
+        value: An optional value.
+
+    Returns:
+        The provided value, guaranteed to be not None.
+    """
+    if value is None:
+        raise ValueError('Expected value to be not None')
+    return value
diff --git a/megatron/core/utils.py b/megatron/core/utils.py
index 62ce07586be..f1c8a42913b 100644
--- a/megatron/core/utils.py
+++ b/megatron/core/utils.py
@@ -479,15 +479,6 @@ def is_causal_conv1d_min_version(version, check_equality=True):
     return get_causal_conv1d_version() > PkgVersion(version)
 
 
-def check_mamba_sequence_packing_support() -> Tuple[bool, Optional[str]]:
-    """Checks whether `causal_conv1d` and `mamba_ssm` support sequence packing."""
-    if not is_causal_conv1d_min_version("1.5.3.post1"):
-        return False, "causal_conv1d >= 1.5.3.post1 is required"
-    elif not is_mamba_min_version("2.2.6.post3"):
-        return False, "mamba_ssm >= 2.2.6.post3 is required"
-    return True, None
-
-
 def ensure_divisibility(numerator, denominator):
     """Ensure that numerator is divisible by the denominator."""
     assert numerator % denominator == 0, "{} is not divisible by {}".format(numerator, denominator)
@@ -2099,7 +2090,8 @@ def get_thd_batch_on_this_cp_rank(
     cu_seqlens: torch.Tensor,
     cu_seqlens_padded: torch.Tensor,
     max_seqlen: torch.Tensor,
-    cp_group: Optional[torch.distributed.ProcessGroup] = None,
+    cp_size: Optional[int] = None,
+    cp_rank: Optional[int] = None,
 ):
     """Slice each sub-sample in a packed sample batch input along
     sequence dimension into multiple chunks, which are parallelized
@@ -2115,12 +2107,8 @@ def get_thd_batch_on_this_cp_rank(
         max_seqlen_kv=int(max_seqlen[0].item()),
     )
 
-    if cp_group is not None:
-        cp_size = get_pg_size(cp_group)
-        cp_rank = get_pg_rank(cp_group)
-    else:
-        cp_size = parallel_state.get_context_parallel_world_size()
-        cp_rank = parallel_state.get_context_parallel_rank()
+    cp_size = get_context_parallel_world_size() if cp_size is None else cp_size
+    cp_rank = get_context_parallel_rank() if cp_rank is None else cp_rank
     if cp_size > 1:  # slice batch along sequence dimension for context parallelism
         assert tex is not None and is_te_min_version("1.10.0"), (
             "Please update Transformer Engine to >= 1.10 to use "
@@ -2186,7 +2174,7 @@ def get_batch_on_this_hybrid_cp_rank(
     if cp_group is not None and cp_group.size() > 1:
         # When using hybrid_context_parallel, each sub-sample of a packed sample is
         # required to be divisible by CP*DP*2 or CP*DP*TP*2 (if using sequence parallel)
-        batch = get_batch_on_this_cp_rank(batch, cp_group)
+        batch = get_batch_on_this_cp_rank(batch, cp_group=cp_group)
 
     return batch, packed_seq_params
 
@@ -2346,16 +2334,6 @@ def unwrap_model(model, module_instances=None):
     return unwrapped_model
 
 
-def maybe_cat(a, b, dim=0, *, required=False):
-    """Concatenates `a` and `b` along `dim` if `a` and `b` exist."""
-    xs = [t for t in (a, b) if t is not None]
-    if not xs:
-        if required:
-            raise ValueError("both tensors are None")
-        return None
-    return xs[0] if len(xs) == 1 else torch.cat(xs, dim=dim)
-
-
 _ASYNC_IO_LOOP: asyncio.AbstractEventLoop | None = None
 
 
@@ -2374,6 +2352,11 @@ def get_asyncio_loop(loop: asyncio.AbstractEventLoop | None = None) -> asyncio.A
     return loop
 
 
+def is_using_quantization_scales(config):
+    """Returns whether the model is using quantization scales based on the config."""
+    return getattr(config, "fp8", False) or getattr(config, "fp4", False)
+
+
 _ASYNC_TASK_STATS = defaultdict(lambda: [0, 0.0])  # cnt, total_time
 
 
diff --git a/megatron/post_training/arguments.py b/megatron/post_training/arguments.py
index 73a5135f0c3..845fe9f17c3 100644
--- a/megatron/post_training/arguments.py
+++ b/megatron/post_training/arguments.py
@@ -28,12 +28,12 @@ def add_modelopt_args(parser):
         action="store_true",
         help="Forcing local DotProductAttention; otherwise TEDotProductAttention is used.",
     )
-
     # Quantization
     group.add_argument(
         "--export-kv-cache-quant",
-        action="store_true",
-        help="Whether or not to perform KV-cache quantization.",
+        help="Type of KV cache quantization to perform.",
+        choices=["none", "fp8", "fp8_affine", "nvfp4", "nvfp4_affine", "nvfp4_rotate"],
+        default="none",
     )
     group.add_argument(
         "--export-real-quant-cfg",
@@ -46,21 +46,9 @@ def add_modelopt_args(parser):
         "--export-quant-cfg",
         type=str,
         default=None,
-        choices=[
-            "int8_sq",
-            "fp8",
-            "fp8_real_quant",
-            "fp8_blockwise",
-            "fp8_blockwise_real_quant",
-            "fp8_blockwise_32",
-            "int4_awq",
-            "w4a8_awq",
-            "nvfp4",
-            "None",
-        ],
-        help="Specify a quantization config from the supported choices.",
+        # TODO replace choices with mtq.config.choices after deprecating the shorter aliases
+        help="Specify a quantization config from mtq.config.choices.",
     )
-
     # Knowledge Distillation
     group.add_argument(
         '--export-kd-cfg',
@@ -85,7 +73,7 @@ def add_modelopt_args(parser):
         '--export-kd-teacher-ckpt-format',
         type=str,
         default=None,
-        choices=['torch', 'torch_dist', 'zarr', 'torch_dcp'],
+        choices=['torch', 'torch_dist', 'torch_dcp'],
         help="Checkpoint format of teacher model, if different from student's.",
     )
 
diff --git a/megatron/post_training/checkpointing.py b/megatron/post_training/checkpointing.py
index f3e18b70e3b..47aa87b4967 100644
--- a/megatron/post_training/checkpointing.py
+++ b/megatron/post_training/checkpointing.py
@@ -13,6 +13,7 @@
 from megatron.training import get_args
 from megatron.training.checkpointing import _load_base_checkpoint, load_checkpoint
 from megatron.training.utils import print_rank_0, unwrap_model
+from .utils import print_distributed_quant_summary
 
 logger = logging.getLogger(__name__)
 
@@ -176,6 +177,7 @@ def _remove_prefix_state_dict_pre_hook(
         )
         model_state_dict = state_dict["model"]
         unwrapped_model[0].load_state_dict(model_state_dict, strict=False)
+        print_distributed_quant_summary(unwrapped_model[0])
     elif sharded_load_dir is not None and optimizer is None and opt_param_scheduler is None:
         sharded_state_dict_metadata = dist_checkpointing.load_content_metadata(sharded_load_dir)
         sharded_state_dict = unwrapped_model[0].sharded_state_dict(
@@ -190,5 +192,6 @@ def _remove_prefix_state_dict_pre_hook(
             sharded_state_dict, sharded_load_dir, strict=args.dist_ckpt_strictness
         )
         unwrapped_model[0].load_state_dict(model_state_dict, strict=False)
+        print_distributed_quant_summary(unwrapped_model[0])
     else:
-        _ = load_checkpoint(model, optimizer, opt_param_scheduler, strict=strict, load_arg=load_arg)
+        _ = load_checkpoint(model, optimizer, opt_param_scheduler, strict=strict, load_arg=load_arg)
\ No newline at end of file
diff --git a/megatron/post_training/model_builder.py b/megatron/post_training/model_builder.py
index 422d9441dd0..71111ced069 100644
--- a/megatron/post_training/model_builder.py
+++ b/megatron/post_training/model_builder.py
@@ -24,6 +24,8 @@
 from megatron.training import get_args, print_rank_0
 from megatron.training.arguments import core_transformer_config_from_args
 
+from megatron.post_training.utils import print_distributed_quant_summary
+
 
 def count_parameters_in_layer(model, layer_name):
     num_params = 0
@@ -334,5 +336,6 @@ def modelopt_gpt_mamba_builder(
         mtd_mcore.adjust_distillation_model_for_mcore(model, distill_cfg)
         # Also remove KD mode state to prevent issues with re-conversion after restore.
         mto.ModeloptStateManager(model).state_dict().pop()  # TODO(aanoosheh): remove once fixed in ModelOpt
-
+    
+    print_distributed_quant_summary(model)
     return model
diff --git a/megatron/post_training/utils.py b/megatron/post_training/utils.py
index 4bec8c96cf1..b24ba291127 100644
--- a/megatron/post_training/utils.py
+++ b/megatron/post_training/utils.py
@@ -1,9 +1,45 @@
 # Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
 
+import inspect
 import os
+
+import modelopt
+import modelopt.torch.quantization as mtq
 import torch
-from datasets import load_dataset
+from modelopt.torch.quantization.utils import is_quantized
+from packaging.version import Version
+
+from megatron.core import parallel_state
+from megatron.training.utils import unwrap_model
+
+
+def modelopt_version_higher_than(target_version: str):
+    """Check if Model-Optimizer is greater than this version."""
+    info = "rank {:3}/{:3} checking if nvidia-modelopt-{} is higher than {}".format(
+        torch.distributed.get_rank(),
+        torch.distributed.get_world_size(),
+        str(modelopt.__version__),
+        target_version,
+    )
+    print(info)
+    return Version(modelopt.__version__) > Version(target_version)
+
+def modelopt_version_at_least(target_version: str):
+    """Check if Model-Optimizer is greater or equal than this version."""
+    info = "rank {:3}/{:3} checking if nvidia-modelopt-{} is at least {}".format(
+        torch.distributed.get_rank(),
+        torch.distributed.get_world_size(),
+        str(modelopt.__version__),
+        target_version,
+    )
+    print(info)
+    return Version(modelopt.__version__) >= Version(target_version)
+
 
+def function_has_parameter(function, argument_name: str) -> bool:
+    """Check if a function has a specific argument."""
+    sig = inspect.signature(function)
+    return argument_name in sig.parameters
 
 def get_current_memory_info():
     """Get current memory usage."""
@@ -26,6 +62,7 @@ def report_current_memory_info():
 
 def get_mtbench_chat_data():
     """Return a MTBench dataset."""
+    from datasets import load_dataset
 
     def mtbench_to_oai_chat(example):
         """Convert MTBench data to OpenAI chat completion format."""
@@ -35,12 +72,15 @@ def mtbench_to_oai_chat(example):
         example["conversations"] = conversations
         return example
 
-    dataset = load_dataset("HuggingFaceH4/mt_bench_prompts", split="train", token=os.environ.get("HF_TOKEN", None))
+    dataset = load_dataset(
+        "HuggingFaceH4/mt_bench_prompts", split="train", token=os.environ.get("HF_TOKEN", None)
+    )
     return dataset.map(mtbench_to_oai_chat)
 
+
 def to_empty_if_meta(module: torch.nn.Module, *, device: torch.device, recurse=True):
     """Move tensors to device if not meta device; otherwise materialize with empty_like().
-   
+
     Args:
         module: The target module to apply this transformation.
         device: The desired device of the parameters
@@ -55,6 +95,34 @@ def _empty_like_if_meta(tensor: torch.Tensor, *, device: torch.device):
         else:
             return tensor.to(device)
 
-    module._apply(
-        lambda t: _empty_like_if_meta(t, device=device), recurse=recurse
-    )
+    module._apply(lambda t: _empty_like_if_meta(t, device=device), recurse=recurse)
+
+
+def print_distributed_quant_summary(model, msg=""):
+    from megatron.core import parallel_state
+    from megatron.training import print_rank_0
+    from megatron.training.utils import unwrap_model
+
+    unwrapped_model = unwrap_model(model)
+    if isinstance(unwrapped_model, list):
+        unwrapped_model = unwrapped_model[0]
+
+    if not is_quantized(unwrapped_model):
+        return
+
+    print_rank_0(f"{msg}\nQuantization summary of unwrapped model: {unwrapped_model}\n{'_'*80}")
+
+    if not torch.distributed.is_initialized():
+        mtq.print_quant_summary(unwrapped_model)
+        return
+
+    # Only print from unique TP ranks of [0, 1]
+    if parallel_state.get_data_parallel_rank(
+        with_context_parallel=True
+    ) == 0 and parallel_state.get_tensor_model_parallel_rank() in [0, 1]:
+        TP_rank = parallel_state.get_tensor_model_parallel_rank()
+        EP_rank = parallel_state.get_expert_model_parallel_rank()
+        PP_rank = parallel_state.get_pipeline_model_parallel_rank()
+        print(f"\nTP rank {TP_rank}, EP rank {EP_rank}, PP rank {PP_rank}")
+        print("_" * 80)
+        mtq.print_quant_summary(unwrapped_model)
diff --git a/megatron/rl/agent/api.py b/megatron/rl/agent/api.py
index 37100ece444..34efa68d85a 100644
--- a/megatron/rl/agent/api.py
+++ b/megatron/rl/agent/api.py
@@ -174,6 +174,11 @@ class GroupedRolloutGenerator(Agent, ABC):
     parallel_generation_tasks: int = 512
     buffer_size: int = 10
 
+    def __init__(self, *, parallel_generation_tasks: int | None = None, **kwargs):
+        super().__init__(**kwargs)
+        if parallel_generation_tasks is not None:
+            self.parallel_generation_tasks = parallel_generation_tasks
+
     @abstractmethod
     async def group_rollout(self, request: GroupedRolloutRequest) -> list[Rollout]: ...
 
diff --git a/megatron/rl/agent/weighted_multi_task.py b/megatron/rl/agent/weighted_multi_task.py
index 8596ad6adcd..4690d9f1600 100644
--- a/megatron/rl/agent/weighted_multi_task.py
+++ b/megatron/rl/agent/weighted_multi_task.py
@@ -66,7 +66,9 @@ def __init__(self, agent_configs: list[AgentConfig]):
                 self.weights.append(config.weight / total_weight)
 
     @classmethod
-    def from_config(cls, config: list[dict[str, Any]]) -> 'WeightedMultiTask':
+    def from_config(
+        cls, config: list[dict[str, Any]], *, parallel_generation_tasks: int | None = None
+    ) -> 'WeightedMultiTask':
         """Create a WeightedMultiTask from a config list.
 
         Args:
@@ -82,13 +84,15 @@ def from_config(cls, config: list[dict[str, Any]]) -> 'WeightedMultiTask':
         for entry in config:
             if not all(k in entry for k in ['agent_type', 'agent_args', 'weight']):
                 raise ValueError(f"Missing required keys in config entry: {entry}")
+            agent_args = entry.get('agent_args', {})
+            agent_args['parallel_generation_tasks'] = parallel_generation_tasks
 
             # Import and instantiate the agent class
             agent_type = import_class(entry['agent_type'])
             agent_configs.append(
                 AgentConfig(
                     agent_type=agent_type,
-                    agent_args=entry['agent_args'],
+                    agent_args=agent_args,
                     weight=float(entry['weight']),
                     evaluation_only=entry.get('evaluation_only', False),
                 )
diff --git a/megatron/rl/inference/megatron.py b/megatron/rl/inference/megatron.py
index e67900e20a8..73ab5024a64 100644
--- a/megatron/rl/inference/megatron.py
+++ b/megatron/rl/inference/megatron.py
@@ -80,8 +80,12 @@ def get_static_inference_engine(args: Namespace, model: MegatronModule) -> Abstr
     )
 
     inference_wrapped_model = GPTInferenceWrapper(model, inference_wrapper_config)
+    pg_collection = get_attr_wrapped_model(model, "pg_collection")
+    pp_group = pg_collection.pp
     text_generation_controller = SimpleTextGenerationController(
-        inference_wrapped_model=inference_wrapped_model, tokenizer=tokenizer
+        inference_wrapped_model=inference_wrapped_model,
+        tokenizer=tokenizer,
+        pp_group=pp_group,
     )
     return MCoreEngine(
         text_generation_controller=text_generation_controller,
@@ -119,20 +123,25 @@ def get_dynamic_inference_engine(
 
     mamba_inference_state_config = get_mamba_inference_state_config_from_model(model)
 
-    # DynamicInferenceContext must use the inference model's TP size, not the
-    # training TP size from global args. The inference model may have a custom
-    # ProcessGroupCollection with a different TP size.
+    # DynamicInferenceContext must use the inference model's TP / PP size, not the
+    # training TP / PP size from global args. The inference model may have a custom
+    # ProcessGroupCollection with a different TP / PP size.
     pg_collection = get_attr_wrapped_model(model, "pg_collection")
     tp_group = getattr(pg_collection, 'tp', None) if pg_collection is not None else None
     if tp_group is not None:
         inference_tp_size = get_pg_size(tp_group)
     else:
         inference_tp_size = args.tensor_model_parallel_size
+    pp_group = getattr(pg_collection, 'pp', None) if pg_collection is not None else None
+    if pp_group is not None:
+        inference_pp_size = get_pg_size(pp_group)
+    else:
+        inference_pp_size = args.pipeline_model_parallel_size
 
     # Inference context.
     inference_context = DynamicInferenceContext(
         params_dtype=args.params_dtype,
-        num_layers=args.num_layers // args.pipeline_model_parallel_size,
+        num_layers=args.num_layers // inference_pp_size,
         kv_channels=args.kv_channels,
         num_attention_heads=(
             args.num_query_groups if args.group_query_attention else args.num_attention_heads
@@ -143,8 +152,9 @@ def get_dynamic_inference_engine(
         ),
         block_size_tokens=args.inference_dynamic_batching_block_size,
         buffer_size_gb=args.inference_dynamic_batching_buffer_size_gb,
+        max_requests=args.inference_dynamic_batching_max_requests,
         max_tokens=args.inference_dynamic_batching_max_tokens,
-        tensor_model_parallel_size=inference_tp_size,
+        pg_collection=pg_collection,  # TP/PP sizes are derived from the model's pg_collection.
         materialize_only_last_token_logits=True,
         mamba_inference_state_config=mamba_inference_state_config,
         cache_mla_latent=args.multi_latent_attention and args.cache_mla_latents,
@@ -156,16 +166,20 @@ def get_dynamic_inference_engine(
         cuda_graph_max_tokens=args.inference_dynamic_batching_cuda_graph_max_tokens,
         cuda_graph_mixed_prefill_count=args.inference_dynamic_batching_cuda_graph_mixed_prefill_count,
         metrics_writer=metrics_writer,
+        persist_cuda_graphs=args.rl_training_cuda_graphs
     )
 
-    inference_wrapped_model = GPTInferenceWrapper(model, args, inference_context)
+    inference_wrapped_model = GPTInferenceWrapper(model, args, inference_context, pg_collection=pg_collection)
 
     inference_wrapped_model.model_is_pipeline_parallel = not (
         is_pp_first_stage(pg_collection.pp) and is_pp_last_stage(pg_collection.pp)
     )
 
+    pp_group = getattr(pg_collection, "pp", None)
     text_generation_controller = SimpleTextGenerationController(
-        inference_wrapped_model=inference_wrapped_model, tokenizer=tokenizer
+        inference_wrapped_model=inference_wrapped_model,
+        tokenizer=tokenizer,
+        pp_group=pp_group,
     )
 
     return DynamicInferenceEngine(
@@ -199,6 +213,7 @@ async def base_generate(self, request: InferenceRequest):
         assert self._client is not None, "Client is not initialized"
 
         tokenizer = get_tokenizer()
+        args = get_args()
 
         sampling_params = SamplingParams(
             num_tokens_to_generate=None,
@@ -209,7 +224,7 @@ async def base_generate(self, request: InferenceRequest):
             termination_id=self._inference_engine.controller.tokenizer.eod,
             return_log_probs=True,
             skip_prompt_log_probs=True,
-            add_BOS=tokenizer.bos is not None,
+            add_BOS=(not args.rl_skip_bos_token and tokenizer.bos is not None),
         )
         requests = [
             self._client.add_request(prompt=prompt, sampling_params=sampling_params)
diff --git a/megatron/rl/parallel_utils.py b/megatron/rl/parallel_utils.py
new file mode 100644
index 00000000000..9cab73daba9
--- /dev/null
+++ b/megatron/rl/parallel_utils.py
@@ -0,0 +1,171 @@
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+
+"""
+Utilities for building process groups for RL inference models with custom parallelism.
+"""
+
+from typing import Optional
+
+import torch.distributed as dist
+
+from megatron.core import mpu
+from megatron.core.hyper_comm_grid import HyperCommGrid
+from megatron.core.process_groups_config import ProcessGroupCollection
+
+
+def build_inference_pg_collection(
+    world_size: int,
+    tp_size: Optional[int] = None,
+    pp_size: Optional[int] = None,
+    cp_size: Optional[int] = None,
+    ep_size: Optional[int] = None,
+    expt_tp_size: Optional[int] = None,
+    use_tp_pp_dp_mapping: bool = False,
+) -> ProcessGroupCollection:
+    """
+    Build a ProcessGroupCollection for an RL inference model with custom parallelism.
+
+    Uses two HyperCommGrids matching the structure of mpu:
+    - decoder_grid: for dense/attention layers (tp, cp, dp, pp)
+    - expert_grid: for MoE expert layers (expt_tp, ep, expt_dp, pp)
+
+    Args:
+        world_size: Total world size (number of ranks).
+        tp_size: Tensor model parallel size. Defaults to training's TP size.
+        pp_size: Pipeline parallel size. Defaults to training's PP size.
+        cp_size: Context parallel size. Defaults to training's CP size.
+        ep_size: Expert parallel size. Defaults to training's EP size.
+        expt_tp_size: Expert tensor parallel size. Defaults to training's expert TP size.
+        use_tp_pp_dp_mapping: If True, use 'tp-pp-dp' order; otherwise 'tp-dp-pp'.
+
+    Returns:
+        ProcessGroupCollection configured for the inference model.
+    """
+    # Use current MPU values as defaults
+    if tp_size is None:
+        tp_size = mpu.get_tensor_model_parallel_world_size()
+    if cp_size is None:
+        cp_size = mpu.get_context_parallel_world_size()
+    if pp_size is None:
+        pp_size = mpu.get_pipeline_model_parallel_world_size()
+    if ep_size is None:
+        ep_size = mpu.get_expert_model_parallel_world_size()
+    if expt_tp_size is None:
+        expt_tp_size = mpu.get_expert_tensor_parallel_world_size()
+
+
+    # Compute DP size for dense layers (same formula as mpu)
+    # world = tp × cp × dp × pp
+    dp_size = world_size // (tp_size * cp_size * pp_size)
+    assert dp_size >= 1 and (tp_size * cp_size * dp_size * pp_size) == world_size, (
+        f"World size ({world_size}) must be divisible by tp*cp*pp ({tp_size * cp_size * pp_size})"
+    )
+
+    # Compute expert DP size (same formula as mpu)
+    # world = expt_tp × ep × expt_dp × pp
+    expt_dp_size = world_size // (expt_tp_size * ep_size * pp_size)
+    assert expt_dp_size >= 1 and (expt_tp_size * ep_size * expt_dp_size * pp_size) == world_size, (
+        f"World size ({world_size}) must be divisible by expt_tp*ep*pp ({expt_tp_size * ep_size * pp_size})"
+    )
+
+    rank = dist.get_rank()
+
+    # ====================
+    # Create decoder grid for dense/attention layers
+    # Matches mpu's decoder_rank_generator with ep=1
+    # ====================
+    if use_tp_pp_dp_mapping:
+        # Order: tp-cp-pp-dp
+        decoder_grid = HyperCommGrid(
+            [tp_size, cp_size, pp_size, dp_size],
+            ["tp", "cp", "pp", "dp"]
+        )
+    else:
+        # Order: tp-cp-dp-pp (default)
+        decoder_grid = HyperCommGrid(
+            [tp_size, cp_size, dp_size, pp_size],
+            ["tp", "cp", "dp", "pp"]
+        )
+
+    # Create dense layer groups from decoder_grid
+    tp_group = decoder_grid.create_pg("tp")
+    cp_group = decoder_grid.create_pg("cp")
+    pp_group = decoder_grid.create_pg("pp")
+    dp_group = decoder_grid.create_pg("dp")
+    mp_group = decoder_grid.create_pg(["tp", "pp"])
+    tp_cp_group = decoder_grid.create_pg(["tp", "cp"])
+    dp_cp_group = decoder_grid.create_pg(["cp", "dp"])
+    tp_dp_cp_group = decoder_grid.create_pg(["tp", "cp", "dp"])
+
+    # ====================
+    # Create expert grid for MoE expert layers
+    # Matches mpu's expert_decoder_rank_generator with cp=1
+    # ====================
+    if use_tp_pp_dp_mapping:
+        # Order: tp-ep-pp-dp
+        expert_grid = HyperCommGrid(
+            [expt_tp_size, ep_size, pp_size, expt_dp_size],
+            ["tp", "ep", "pp", "dp"]
+        )
+    else:
+        # Order: tp-ep-dp-pp (default)
+        expert_grid = HyperCommGrid(
+            [expt_tp_size, ep_size, expt_dp_size, pp_size],
+            ["tp", "ep", "dp", "pp"]
+        )
+
+    # Verify PP groups match between decoder and expert grids (required by mpu)
+    decoder_pp_enum = decoder_grid.get_rank_enum("pp")
+    expert_pp_enum = expert_grid.get_rank_enum("pp")
+    assert decoder_pp_enum == expert_pp_enum, (
+        f"PP groups must match between decoder and expert grids. "
+        f"Decoder: {decoder_pp_enum}, Expert: {expert_pp_enum}"
+    )
+
+    # Create expert layer groups from expert_grid
+    ep_group = expert_grid.create_pg("ep")
+    expt_tp_group = expert_grid.create_pg("tp")
+    expt_dp_group = expert_grid.create_pg("dp")
+    tp_ep_group = expert_grid.create_pg(["tp", "ep"])
+    tp_ep_pp_group = expert_grid.create_pg(["tp", "ep", "pp"])
+
+    # ====================
+    # Embedding groups (derived from PP groups)
+    # ====================
+    embd_group = None
+    pos_embd_group = None
+
+    pp_rank_enum = decoder_grid.get_rank_enum("pp")
+    for pp_ranks in pp_rank_enum:
+        # Embedding is on first and last PP stage
+        if len(pp_ranks) == 1:
+            embd_ranks = [pp_ranks[0]]
+        else:
+            embd_ranks = [pp_ranks[0], pp_ranks[-1]]
+        group = dist.new_group(ranks=embd_ranks)
+        if rank in embd_ranks:
+            embd_group = group
+
+        # Position embedding is only on first PP stage
+        pos_embd_ranks = [pp_ranks[0]]
+        group = dist.new_group(ranks=pos_embd_ranks)
+        if rank in pos_embd_ranks:
+            pos_embd_group = group
+
+    return ProcessGroupCollection(
+        tp=tp_group,
+        cp=cp_group,
+        pp=pp_group,
+        ep=ep_group,
+        embd=embd_group,
+        pos_embd=pos_embd_group,
+        dp=dp_group,
+        tp_cp=tp_cp_group,
+        mp=mp_group,
+        expt_tp=expt_tp_group,
+        expt_dp=expt_dp_group,
+        tp_ep=tp_ep_group,
+        tp_ep_pp=tp_ep_pp_group,
+        dp_cp=dp_cp_group,
+        tp_dp_cp=tp_dp_cp_group,
+    )
diff --git a/megatron/rl/rl_utils.py b/megatron/rl/rl_utils.py
index 13e49aeae61..25e63408f48 100644
--- a/megatron/rl/rl_utils.py
+++ b/megatron/rl/rl_utils.py
@@ -4,6 +4,7 @@
 
 # Keep this to make the env registered.
 import itertools
+import json
 import logging
 import math
 import pickle
@@ -26,17 +27,17 @@
 from megatron.core.full_cuda_graph import FullCudaGraphWrapper
 from megatron.core.models.common.language_module.language_module import LanguageModule
 from megatron.core.optimizer import MegatronOptimizer
-from megatron.core.parallel_state import (
-    get_pipeline_model_parallel_group,
-    get_pipeline_model_parallel_last_rank,
-    get_tensor_model_parallel_src_rank,
-    get_tensor_model_parallel_world_size,
-    is_pipeline_last_stage,
-)
 from megatron.core.pipeline_parallel import get_forward_backward_func
+from megatron.core.pipeline_parallel.utils import is_pp_last_stage, get_pp_last_rank
 from megatron.core.rerun_state_machine import RerunDataIterator
 from megatron.core.transformer.cuda_graphs import _CudagraphGlobalRecord
+from megatron.core.transformer.enums import CudaGraphScope
 from megatron.core.transformer.utils import toggle_cuda_graphs
+from megatron.core.resharding.refit import swap_model_weights
+from megatron.core.inference.unified_memory import (
+    advise_managed_module_parameters_preferred_location,
+    prefetch_managed_module_parameters,
+)
 from megatron.core.utils import get_asyncio_loop, log_single_rank
 from megatron.rl.sequence_packing_utils import (
     get_microbatch_dataloader,
@@ -70,17 +71,142 @@
     get_wandb_writer,
 )
 from megatron.training.tokenizer.tokenizer import CustomTikTokenizer, _HuggingFaceTokenizer
-from megatron.training.utils import get_ltor_masks_and_position_ids, get_nvtx_range
+from megatron.training.utils import (
+    get_ltor_masks_and_position_ids,
+    get_nvtx_range,
+    print_rank_0,
+    unwrap_model,
+)
+from megatron.core.utils import get_pg_rank, get_pg_size, get_attr_wrapped_model
+from megatron.core.process_groups_config import ProcessGroupCollection
 from wandb import wandb_run
 from megatron.core.transformer.custom_layers.batch_invariant_kernels import (
     is_batch_invariant_mode_enabled,
 )
 
+
 logger = logging.getLogger(__name__)
 
 # Global variable to store packing context for forward_step
 _GLOBAL_PACKING_CONTEXT = None
 
+
+def _maybe_prefetch_separate_inference_model_weights(model_core, *, to_cpu: bool) -> None:
+    """Prefetch RL *separate inference model* weights to CPU/GPU (UVM-only path).
+
+    Gated only by user args; this assumes the separate inference model was allocated with UVM when enabled.
+    """
+    args = get_args()
+    if not args.rl_offload_inference_model_weights_when_idle:
+        return
+    if args.rl_inference_model_unified_memory_level != 1:
+        return
+
+    device = -1 if to_cpu else int(torch.cuda.current_device())
+    # Note: include_buffers=False because buffers created with explicit device= in register_buffer()
+    # are not allocated via the UVM mempool and will fail UVM operations. Only parameters are UVM-allocated.
+    advise_managed_module_parameters_preferred_location(model_core, device=device, include_buffers=False)
+    nbytes = prefetch_managed_module_parameters(model_core, device=device, include_buffers=False)
+    # Ensure pages are resident before we enter CUDA-graph capture / inference, or before training continues.
+    torch.cuda.synchronize()
+
+    if to_cpu:
+        print_rank_0(f"[Rank 0] offloaded {nbytes / 1024**2:.2f} MB of separate RL inference model weights to CPU (other ranks may vary)")
+    else:
+        print_rank_0(f"[Rank 0] prefetched {nbytes / 1024**2:.2f} MB of separate RL inference model weights to GPU (other ranks may vary)")
+
+
+def verify_model_weights_swap(
+    train_model: LanguageModule,
+    inference_model: LanguageModule,
+    seq_len: int = 8,
+    batch_size: int = 2,
+    atol: float = 1e-4,
+    rtol: float = 1e-4,
+) -> None:
+    """Verify that the inference model produces the same forward pass outputs
+    as the training model after the weights have been swapped.
+
+    This function should be called after swap_model_weights to ensure the weight
+    transfer was successful. It runs a forward pass on both models and asserts
+    the outputs match.  This is meant for debugging purposes only.
+
+    Args:
+        train_model: The training model (source of weights).
+        inference_model: The inference model (target of weights).
+        seq_len: Sequence length for test input.
+        batch_size: Batch size for test input.
+        atol: Absolute tolerance for comparing outputs.
+        rtol: Relative tolerance for comparing outputs.
+
+    Raises:
+        AssertionError: If forward pass outputs do not match within tolerance.
+    """
+    args = get_args()
+
+    # Unwrap models to get the core module
+    train_lm = train_model[0] if isinstance(train_model, (list, tuple)) else train_model
+    inf_lm = inference_model[0] if isinstance(inference_model, (list, tuple)) else inference_model
+
+    train_core = unwrap_model(train_lm)
+    inf_core = unwrap_model(inf_lm)
+
+    actual_vocab_size = getattr(args, 'padded_vocab_size', 128256)
+    actual_seq_len = min(seq_len, getattr(args, 'seq_length', seq_len))
+    device = torch.device(f"cuda:{torch.cuda.current_device()}")
+
+    # Generate deterministic test input - same across ALL ranks
+    torch.manual_seed(1234)
+    test_tokens = torch.randint(
+        low=0, high=actual_vocab_size, size=(batch_size, actual_seq_len),
+        device=device, dtype=torch.long
+    )
+    test_position_ids = (
+        torch.arange(actual_seq_len, device=device, dtype=torch.long)
+        .unsqueeze(0)
+        .expand(batch_size, -1)
+    )
+    test_attention_mask = torch.ones(
+        (batch_size, 1, actual_seq_len, actual_seq_len), device=device, dtype=torch.bool
+    )
+
+    # Save and restore training state
+    train_was_training = train_core.training
+    inf_was_training = inf_core.training
+
+    train_core.eval()
+    inf_core.eval()
+
+    try:
+        with torch.no_grad():
+            train_output = train_lm(
+                test_tokens, test_position_ids, test_attention_mask,
+                runtime_gather_output=True
+            )
+
+            inf_output = inf_lm(
+                test_tokens, test_position_ids, test_attention_mask,
+                runtime_gather_output=True
+            )
+
+        # Only check on ranks that have output (last PP stage)
+        if train_output is not None and inf_output is not None:
+            assert train_output.shape == inf_output.shape, (
+                f"Output shape mismatch: train={train_output.shape}, infer={inf_output.shape}"
+            )
+            
+            max_diff = (train_output - inf_output).abs().max().item()
+            assert torch.allclose(train_output, inf_output, atol=atol, rtol=rtol), (
+                f"Forward pass outputs do not match: max_diff={max_diff:.6e}, atol={atol}, rtol={rtol}"
+            )
+
+    finally:
+        # Restore training state
+        if train_was_training:
+            train_core.train()
+        if inf_was_training:
+            inf_core.train()
+
 GroupedRollouts = list[list[TokenRollout | Rollout]]
 
 
@@ -237,7 +363,7 @@ def align_unpacked_inference_logprobs(
     return padded_inference_logprobs
 
 
-def get_agent(args):
+def get_agent(args, parallel_generation_tasks: int | None = None):
     """Get an agent based on environment configuration.
 
     If args.langrl_env_config is provided, uses weighted environment selection.
@@ -246,7 +372,10 @@ def get_agent(args):
     with open(args.langrl_env_config, 'r') as f:
         config = yaml.safe_load(f)
 
-    return WeightedMultiTask.from_config(config)
+    return WeightedMultiTask.from_config(
+        config,
+        parallel_generation_tasks=parallel_generation_tasks,
+    )
 
 
 _INFERENCE_INTERFACE = None
@@ -294,16 +423,17 @@ def get_inference_interface(args, loop, model):
 def get_rollout_generator(args, inference_interface, n_prompts, samples_per_group):
     global _ROLLOUT_GENERATOR
     if not args.rl_partial_rollouts or _ROLLOUT_GENERATOR is None:
-        agent = get_agent(args)
+        agent = get_agent(args, parallel_generation_tasks=args.rl_parallel_generation_tasks)
         # Collect Rollouts
         request = GroupedRolloutRequest(
             num_groups=-1 if args.rl_partial_rollouts else n_prompts,
             rollouts_per_group=samples_per_group,
             inference_interface=inference_interface,
             generation_args={
-                'temperature': args.grpo_default_temperature,
+                'temperature': args.rl_default_temperature,
                 'max_tokens': args.inference_max_seq_length,
-                'top_p': args.grpo_default_top_p,
+                'top_p': args.rl_default_top_p,
+                'top_k': args.rl_default_top_k,
             },
             filter_groups_with_same_reward=args.grpo_filter_groups_with_same_reward,
         )
@@ -312,12 +442,13 @@ def get_rollout_generator(args, inference_interface, n_prompts, samples_per_grou
 
 
 def get_environment_rollouts(
-    model: LanguageModule, optimizer: MegatronOptimizer, n_prompts: int, samples_per_group: int
+    model: LanguageModule, inference_model: LanguageModule, optimizer: MegatronOptimizer, n_prompts: int, samples_per_group: int
 ):
     """Sample environment rollouts from an LLM.
 
     Args:
         model: Model to sample from.
+        inference_model: Inference model to use for inference.
         n_prompts: Number of prompts to sample for across *all* data parallel workers.
         samples_per_group: Amount of trajectories per prompt.
 
@@ -327,14 +458,38 @@ def get_environment_rollouts(
     args = get_args()
     nvtx_range = get_nvtx_range()
 
+    # If we have seperate training and inference models we to refit weights from the training model to the inference model.
+    if inference_model is not None:
+        if args.rl_offload_optimizer_during_inference:
+            with nvtx_range("offload-optimizer-before-refit"):
+                optimizer.offload_to_cpu()
+                torch.cuda.empty_cache()
+
+        # If the separate inference model weights were prefetched to CPU while idle, bring them
+        # back to GPU before refit/copy and before any CUDA-graph'd inference.
+        with nvtx_range("prefetch-inference-model-weights-to-gpu"):
+            inf_core = unwrap_model(inference_model[0])
+            _maybe_prefetch_separate_inference_model_weights(inf_core, to_cpu=False)
+        swap_model_weights(model, inference_model, args.refit_method)
+        if args.rl_verify_model_weights_swap:
+            verify_model_weights_swap(
+                train_model=model,
+                inference_model=inference_model,
+                atol=.1,
+                rtol=5e-4,
+            )
+    else:
+        inference_model = model
+
+    inference_pg_collection = get_attr_wrapped_model(inference_model[0], "pg_collection")
     assert (
-        n_prompts % mpu.get_expert_data_parallel_world_size() == 0
+        n_prompts % get_pg_size(inference_pg_collection.ep) == 0
     ), "n_prompts must be divisible by data_parallel_world_size"
 
     with nvtx_range("rollout-collection"):
         loop = get_asyncio_loop()
         with megatron_rl_inference_mode(
-            model,
+            inference_model,
             optimizer,
             args.cuda_graph_impl,
             args.rl_reset_cuda_graphs,
@@ -378,7 +533,7 @@ def get_environment_rollouts(
             torch.distributed.broadcast_object_list(rollouts, src=0)
         logger.debug(f"Got rollouts on rank {rank}")
 
-    if lang_rl_log_dir and rank == get_tensor_model_parallel_src_rank():
+    if lang_rl_log_dir and rank == get_pg_rank(inference_pg_collection.tp):
         with open(
             lang_rl_log_dir
             + f'/rollouts_rank{rank}_iteration{args.curr_iteration}_'
@@ -483,7 +638,10 @@ def get_logprobs(model, tokens, position_ids, no_grad=False, sequence_packing=Fa
                 )
             model.config.flash_decode = flash_decode
 
-        if not is_pipeline_last_stage():
+        pg_collection = get_attr_wrapped_model(model, "pg_collection")
+        pp_group = pg_collection.pp
+
+        if not is_pp_last_stage(pp_group):
             return logits_or_hidden_states
         else:
             logits = logits_or_hidden_states
@@ -792,9 +950,14 @@ def prepare_trajectories(
         inference_logprobs = None
 
     # Some sanity checks regarding the tokenization
-    assert (
-        tokenizer.bos is None or (trajs[:, 0] == tokenizer.bos).all()
-    ), "First token should be bos"
+    if not args.rl_skip_bos_token:
+        assert (
+            tokenizer.bos is None or (trajs[:, 0] == tokenizer.bos).all()
+        ), "First token should be bos"
+    else:
+        assert (
+            tokenizer.bos is None or (trajs[:, 0] != tokenizer.bos).all()
+        ), "First token should not be bos"  
     assert (
         tokenizer.bos is None or (trajs[:, 1] != tokenizer.bos).all()
     ), "Second token should not be bos"
@@ -828,8 +991,15 @@ def prepare_data_for_update(
     args = get_args()
     wandb_writer = get_wandb_writer()
     tb_writer = get_tensorboard_writer()
-    nvtx_range = get_nvtx_range()                
+    nvtx_range = get_nvtx_range()
     runtime_state = get_rl_runtime_state()
+
+    if args.cuda_graph_impl != "none" and not args.rl_training_cuda_graphs:
+        lang_module = (
+            model[0].module.module if hasattr(model[0].module, "module") else model[0].module
+        )
+        toggle_cuda_graphs(lang_module, "none", reset_cuda_graphs=False)
+
     model = model[0]
     dtype = torch.bfloat16 if args.bf16 else (torch.float16 if args.fp16 else torch.float32)
 
@@ -862,11 +1032,13 @@ def prepare_data_for_update(
 
         # Now split the rollouts across the data parallel ranks for training
         # This needs to be done at this point because we are about to calculate logprobs
-        if (expert_data_parallel_world_size := mpu.get_expert_data_parallel_world_size()) > 0:
-            data_split_size = len(rollouts) // expert_data_parallel_world_size
+        # Note :- For EP, do not use the expert data parallel group here. Always 
+        # use the regular data parallel group. 
+        if (data_parallel_world_size := mpu.get_data_parallel_world_size()) > 0:
+            data_split_size = len(rollouts) // data_parallel_world_size
             data_split_range = (
-                mpu.get_expert_data_parallel_rank() * data_split_size,
-                (mpu.get_expert_data_parallel_rank() + 1) * data_split_size,
+                mpu.get_data_parallel_rank() * data_split_size,
+                (mpu.get_data_parallel_rank() + 1) * data_split_size,
             )
             rollouts = rollouts[data_split_range[0] : data_split_range[1]]
             # First we calculate them on a global level and then we split and recalculate on a local level.
@@ -931,12 +1103,17 @@ def prepare_data_for_update(
 
             # Wrap forward_backward_func for Full iteration CUDA graph
             forward_backward_func = get_forward_backward_func()
-            if args.enable_cuda_graph and args.cuda_graph_scope == "full_iteration":
+            if args.cuda_graph_impl == "local" and CudaGraphScope.full_iteration in args.cuda_graph_scope:
                 forward_backward_func = FullCudaGraphWrapper(
                     forward_backward_func, cuda_graph_warmup_steps=args.cuda_graph_warmup_steps
                 )
 
             def logprobs_forward_step(data_iterator, model):
+
+                # Avoid self.training checks which will trigger cudagraph capture; this path reuses
+                # the forward pass from training after it has been captured on the 1st iteration.
+                model.eval()
+
                 if args.rl_use_sequence_packing:
                     # When using sequence packing, the data iterator returns a tuple with a single element, the bin index.
                     bin_tensor = next(data_iterator)[0]
@@ -952,7 +1129,7 @@ def logprobs_forward_step(data_iterator, model):
                 b_trajs = b_trajs.cuda()
                 b_posids = b_posids.cuda()
 
-                return (
+                logprobs = (
                     get_logprobs(
                         model,
                         b_trajs,
@@ -964,10 +1141,16 @@ def logprobs_forward_step(data_iterator, model):
                     None,
                 )
 
+                model.train()
+                return logprobs
+
             dtype = (
                 torch.bfloat16 if args.bf16 else (torch.float16 if args.fp16 else torch.float32)
             )
 
+            pg_collection = get_attr_wrapped_model(model, "pg_collection")
+            pp_group = pg_collection.pp
+
             def _compute_logprobs_batch():
                 """Compute logprobs for all batches in the data loader."""
                 logprobs_list = []
@@ -984,10 +1167,10 @@ def _compute_logprobs_batch():
                         forward_only=True,
                         adjust_tensor_shapes_fn=None,
                     )
-                    if is_pipeline_last_stage():
+                    if is_pp_last_stage(pp_group):
                         logprobs_list.append(output_tensor[0].detach())
 
-                if is_pipeline_last_stage():
+                if is_pp_last_stage(pp_group):
                     logprobs = torch.concat(logprobs_list, dim=0)
                     assert logprobs.dtype == dtype
                 else:
@@ -998,11 +1181,9 @@ def _compute_logprobs_batch():
                         device=torch.cuda.current_device(),
                     )
 
-                dist.broadcast(
-                    logprobs,
-                    src=get_pipeline_model_parallel_last_rank(),
-                    group=get_pipeline_model_parallel_group(),
-                )
+                # Only PP>1 needs a broadcast from the last stage; for PP=1 the output is already local.
+                if get_pg_size(pp_group) > 1:
+                    dist.broadcast(logprobs, src=get_pp_last_rank(pp_group), group=pp_group)
                 return logprobs.cpu()
 
             with torch.no_grad(), nvtx_range("compute_old_logprobs", time=True):
@@ -1107,6 +1288,7 @@ def _compute_logprobs_batch():
 
 def get_rollout_data_iterator(
     model: LanguageModule,
+    inference_model: LanguageModule | None,
     optimizer: MegatronOptimizer,
     iteration: int,
     ref_state_dict: Dict[str, torch.Tensor],
@@ -1116,7 +1298,7 @@ def get_rollout_data_iterator(
     tokenizer = get_tokenizer()
 
     buffered_rollouts = get_environment_rollouts(
-        model, optimizer, args.grpo_prompts_per_step, args.grpo_group_size
+        model, inference_model, optimizer, args.grpo_prompts_per_step, args.grpo_group_size
     )
     buffered_rollouts = prepare_data_for_update(model, ref_state_dict, buffered_rollouts, tokenizer)
 
@@ -1125,6 +1307,7 @@ def get_rollout_data_iterator(
 
 def setup_grpo_data_iterator(
     model: LanguageModule,
+    inference_model: LanguageModule | None,
     optimizer: MegatronOptimizer,
     iteration: int,
     ref_state_dict: Dict[str, torch.Tensor],
@@ -1146,13 +1329,18 @@ def setup_grpo_data_iterator(
     args = get_args()
     runtime_state = get_rl_runtime_state()
 
+    if inference_model is not None:
+        inference_pg_collection = unwrap_model(inference_model[0]).pg_collection
+    else:
+        inference_pg_collection = ProcessGroupCollection.use_mpu_process_groups()
+
     # We collect new rollouts when we've gone over the collected data 'grpo_iterations' times.
     if (
         buffered_rollouts is None or
         iteration == runtime_state.last_collection_iteration + 
         (args.grpo_iterations * runtime_state.global_batches_per_collection)
     ):
-        train_data_iterator = get_rollout_data_iterator(model, optimizer, iteration, ref_state_dict)
+        train_data_iterator = get_rollout_data_iterator(model,inference_model, optimizer, iteration, ref_state_dict)
         runtime_state.reset_iteration_counters(iteration)
     else:
         train_data_iterator = buffered_rollouts
@@ -1205,9 +1393,10 @@ def evaluate_and_print_results_rl(
                     validation=True,
                     rank_info=None,
                     generation_args={
-                        'temperature': args.grpo_default_temperature,
+                        'temperature': args.rl_default_temperature,
                         'max_tokens': args.seq_length,
-                        'top_p': args.grpo_default_top_p,
+                        'top_p': args.rl_default_top_p,
+                        'top_k': args.rl_default_top_k,
                     },
                 )
                 evaluation_responses = loop.run_until_complete(agent.run_evaluation(request))
@@ -1400,6 +1589,11 @@ def megatron_rl_inference_mode(
     lang_module = model[0].module.module if hasattr(model[0].module, "module") else model[0].module
 
     lang_module.eval()
+    # If this is a separate RL inference model allocated with UVM, ensure weights are resident on GPU
+    # before any CUDA-graph capture/replay or inference.
+    with nvtx_range("prefetch-inference-model-weights-to-gpu"):
+        model_core = unwrap_model(model[0])
+        _maybe_prefetch_separate_inference_model_weights(model_core, to_cpu=False)
 
     rotary_module = getattr(lang_module, "rotary_pos_emb", None)
     # Vanilla RotaryEmbedding module has lru_cache decorator which breaks RL training
@@ -1415,7 +1609,7 @@ def megatron_rl_inference_mode(
                 optimizer.offload_to_cpu()
 
         # TODO: Remove this if statement once a change to `toggle_cuda_graphs` makes it safe to.
-        if cuda_graph_impl != "none":
+        if cuda_graph_impl != "none" and not args.rl_training_cuda_graphs:
             toggle_cuda_graphs(lang_module, cuda_graph_impl, reset_cuda_graphs=reset_cuda_graphs)
 
         inference_interface = get_inference_interface(args, loop, model)
@@ -1464,9 +1658,14 @@ def megatron_rl_inference_mode(
                 inference_interface._inference_engine.context.memory_buffer = None
 
         # TODO: Remove this if statement once a change to `toggle_cuda_graphs` makes it safe to.
-        if cuda_graph_impl != "none":
+        if cuda_graph_impl != "none" and not args.rl_training_cuda_graphs:
             toggle_cuda_graphs(lang_module, 'none', reset_cuda_graphs=reset_cuda_graphs)
 
+        # If this is a separate RL inference model, prefetch weights back to CPU so they don't consume
+        # GPU memory during training.
+        with nvtx_range("prefetch-inference-model-weights-to-cpu"):
+            _maybe_prefetch_separate_inference_model_weights(model_core, to_cpu=True)
+
         if offload_optimizer_during_inference:
             with nvtx_range("onload-optimizer-after-inference"):
                 optimizer.restore_from_cpu()
@@ -1480,9 +1679,11 @@ def megatron_rl_inference_mode(
 
 
 def rl_inference_interface_shutdown():
+    global _INFERENCE_INTERFACE
     if _INFERENCE_INTERFACE is not None:
         loop = get_asyncio_loop()
         loop.run_until_complete(_INFERENCE_INTERFACE.kill())
+        _INFERENCE_INTERFACE = None
     else:
         logger.warning("No inference interface to shutdown. This should not happen.")
 
diff --git a/megatron/rl/sequence_packing_utils.py b/megatron/rl/sequence_packing_utils.py
index 56a89262454..a5703a4580c 100644
--- a/megatron/rl/sequence_packing_utils.py
+++ b/megatron/rl/sequence_packing_utils.py
@@ -155,8 +155,8 @@ def log_packing_efficiency(packing_context: PackingContext):
     total_capacity = packed_trajs.shape[0] * packed_trajs.shape[1]
     packing_efficiency = my_tokens / total_capacity if total_capacity > 0 else 0
     avg_seq_length = total_tokens / len(packing_info.seq_lengths)
-    rank = mpu.get_expert_data_parallel_rank()
-    expert_data_parallel_world_size = mpu.get_expert_data_parallel_world_size()
+    rank = mpu.get_data_parallel_rank()
+    data_parallel_world_size = mpu.get_data_parallel_world_size()
 
     log_single_rank(logger, logging.INFO, f"[Sequence Packing] Statistics:")
     log_single_rank(
@@ -412,8 +412,12 @@ def get_default_packed_seq_params(seq_length: int, device: torch.device) -> Pack
     Returns:
         PackedSeqParams configured as a single unpacked sequence.
     """
-    # Single sequence spanning the full length = no actual packing
-    cu_seqlens = torch.tensor([0, seq_length], dtype=torch.int32, device=device)
+
+    args = get_args()
+
+    # Pad to the maximum number of sequences in the bin for the attention kernel.
+    cu_seqlens = torch.full((args.rl_sequence_packing_max_sequences_per_bin,), seq_length, dtype=torch.int32, device=device)
+    cu_seqlens[0] = 0
 
     return PackedSeqParams(
         qkv_format='thd',
@@ -429,19 +433,25 @@ def create_packed_seq_params(packing_context: PackingContext):
     cached_packed_seq_params = []
     packing_info = packing_context.packing_info
     bin_size = packing_context.bin_size
+    max_sequences_per_bin = packing_context.packer.max_sequences_per_bin
     device = packing_context.packed_trajs.device
     for bin_idx in range(len(packing_context.packed_trajs)):
         params = create_packed_seq_params_for_bin(
             packing_info=packing_info,
             bin_idx=bin_idx,
             bin_size=bin_size,
+            max_sequences_per_bin=max_sequences_per_bin,
             device=device,
         )
         cached_packed_seq_params.append(params)
     return cached_packed_seq_params
 
 def create_packed_seq_params_for_bin(
-    packing_info: PackingInfo, bin_idx: int, bin_size: int, device: torch.device
+    packing_info: PackingInfo,
+    bin_idx: int,
+    bin_size: int,
+    max_sequences_per_bin: int,
+    device: torch.device
 ) -> Optional[PackedSeqParams]:
     """Create PackedSeqParams for a single bin to enable proper attention masking in TE.
 
@@ -453,6 +463,7 @@ def create_packed_seq_params_for_bin(
         packing_info: PackingInfo object containing packing metadata from SequencePacker
         bin_idx: Index of the bin to create params for
         bin_size: Size of the bin (padded sequence length)
+        max_sequences_per_bin: Maximum number of sequences per bin
         device: Device to create tensors on
 
     Returns:
@@ -475,8 +486,8 @@ def create_packed_seq_params_for_bin(
 
     # Pad cu_seqlens to bin_size by repeating the last value (creates zero-length ghost sequences)
     # This ensures a fixed tensor size for CUDA graph compatibility
-    if len(cu_seqlens) < bin_size:
-        out = cu_seqlens.new_full((bin_size,), bin_size)
+    if len(cu_seqlens) < max_sequences_per_bin:
+        out = cu_seqlens.new_full((max_sequences_per_bin,), bin_size)
         out[:len(cu_seqlens)] = cu_seqlens
         cu_seqlens = out
 
@@ -750,7 +761,7 @@ def pack_sequences(
         # (it depends on the original trajectories passed to pack_sequences)
 
         # Invert attention mask, before inversion: (True = attend, False = mask)
-        attention_mask = ~attention_mask
+        attention_mask.bitwise_not_()
 
         # Create the PackingInfo dataclass
         packing_info = PackingInfo(
@@ -790,8 +801,8 @@ def distribute_packed_bins(
     packing_info: PackingInfo,
 ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, PackingInfo]:
     """Distribute packed bins across the data parallel ranks."""
-    rank = mpu.get_expert_data_parallel_rank()
-    world_size = mpu.get_expert_data_parallel_world_size()
+    rank = mpu.get_data_parallel_rank()
+    world_size = mpu.get_data_parallel_world_size()
     tokenizer = get_tokenizer()
 
     # Distribute packed bins across data parallel ranks
@@ -957,32 +968,32 @@ def distribute_packed_bins(
 
 def pack_all_trajectories(trajs, generation_masks, inference_logprobs, global_advantages, bin_size, max_sequences_per_bin, packing_algo):
     tokenizer = get_tokenizer()
-    expert_data_parallel_world_size = mpu.get_expert_data_parallel_world_size()
+    data_parallel_world_size = mpu.get_data_parallel_world_size()
     nvtx_range = get_nvtx_range()
 
     with nvtx_range("regather_trajectories", time=True):
         # Regather trajectories from all ranks for packing
         trajs = trajs.cuda()
-        trajs_list = [torch.empty_like(trajs) for _ in range(expert_data_parallel_world_size)]
+        trajs_list = [torch.empty_like(trajs) for _ in range(data_parallel_world_size)]
         torch.distributed.all_gather(
-            trajs_list, trajs, group=mpu.get_expert_data_parallel_group()
+            trajs_list, trajs, group=mpu.get_data_parallel_group()
         )
         trajs = torch.cat(trajs_list, dim=0)
 
         # Gather all generation masks
         generation_masks = generation_masks.cuda()
-        masks_list = [torch.empty_like(generation_masks) for _ in range(expert_data_parallel_world_size)]
+        masks_list = [torch.empty_like(generation_masks) for _ in range(data_parallel_world_size)]
         torch.distributed.all_gather(
-            masks_list, generation_masks, group=mpu.get_expert_data_parallel_group()
+            masks_list, generation_masks, group=mpu.get_data_parallel_group()
         )
         generation_masks = torch.cat(masks_list, dim=0)
 
         # Gather inference logprobs if present
         if inference_logprobs is not None:
             inference_logprobs = inference_logprobs.cuda()
-            logprobs_list = [torch.empty_like(inference_logprobs) for _ in range(expert_data_parallel_world_size)]
+            logprobs_list = [torch.empty_like(inference_logprobs) for _ in range(data_parallel_world_size)]
             torch.distributed.all_gather(
-                logprobs_list, inference_logprobs, group=mpu.get_expert_data_parallel_group()
+                logprobs_list, inference_logprobs, group=mpu.get_data_parallel_group()
             )
             inference_logprobs = torch.cat(logprobs_list, dim=0)
 
@@ -1037,6 +1048,7 @@ def pack_all_trajectories(trajs, generation_masks, inference_logprobs, global_ad
                 packing_info=packing_info,
                 bin_idx=bin_idx,
                 bin_size=bin_size,
+                max_sequences_per_bin=max_sequences_per_bin,
                 device=packed_trajs.device,
             ) for bin_idx in range(len(packed_trajs))
     ]
diff --git a/megatron/rl/server/inference/inference_interface_server.py b/megatron/rl/server/inference/inference_interface_server.py
index 4abdf85cfcb..ba595c3ca0e 100644
--- a/megatron/rl/server/inference/inference_interface_server.py
+++ b/megatron/rl/server/inference/inference_interface_server.py
@@ -93,6 +93,6 @@ async def suspend(self):
         if isinstance(self._inference_interface, InferenceServer):
             await self._inference_interface.suspend()
 
-    def resume(self):
+    async def resume(self):
         if isinstance(self._inference_interface, InferenceServer):
-            self._inference_interface.resume()
+            await self._inference_interface.resume()
diff --git a/megatron/training/__init__.py b/megatron/training/__init__.py
index 46cf5b5c9bc..3546dfd5761 100644
--- a/megatron/training/__init__.py
+++ b/megatron/training/__init__.py
@@ -11,7 +11,7 @@
 from .global_vars import get_adlr_autoresume
 from .global_vars import get_timers
 from .initialize  import initialize_megatron
-from .training import pretrain, get_model, get_train_valid_test_num_samples
+from .training import pretrain, get_model, get_train_valid_test_num_samples, set_startup_timestamps
 
 from .utils import (print_rank_0,
                     is_last_rank,
diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py
index b94b5b45544..54c7eeaa3fd 100644
--- a/megatron/training/arguments.py
+++ b/megatron/training/arguments.py
@@ -419,6 +419,27 @@ def validate_args(args, defaults={}):
         assert not (args.rl_partial_rollouts and args.rl_remove_kv_cache_during_training), \
             "Cannot use both partial-rollouts and remove-kv-cache-during-training"
 
+        assert not (
+            args.rl_offload_inference_model_weights_when_idle
+            and args.rl_inference_model_unified_memory_level != 1
+        ), (
+            "--rl-offload-inference-model-weights-when-idle requires "
+            "--rl-inference-model-unified-memory-level=1."
+        )
+
+        # When using different EP sizes for inference and training (EP refit), the legacy
+        # GroupedMLP is not supported. Only SequentialMLP or TEGroupedMLP can be used.
+        if (
+            args.rl_inference_expert_model_parallel_size is not None
+            and args.rl_inference_expert_model_parallel_size != args.expert_model_parallel_size
+        ):
+            assert not args.moe_use_legacy_grouped_gemm, (
+                "Legacy GroupedMLP (--moe-use-legacy-grouped-gemm) is not supported when using "
+                "different expert parallelism sizes for inference and training. "
+                "Use SequentialMLP (default when --moe-grouped-gemm is not set) or "
+                "TEGroupedMLP (--moe-grouped-gemm without --moe-use-legacy-grouped-gemm)."
+            )
+
         args.grpo_samples_per_iteration = args.grpo_prompts_per_step * args.grpo_group_size
         num_generated_samples_per_inference_iteration = (
             args.grpo_samples_per_iteration * args.grpo_iterations)
@@ -545,6 +566,12 @@ def validate_args(args, defaults={}):
                    for elt in [args.train_data_path, args.valid_data_path, args.test_data_path]) is False or \
             args.per_split_data_args_path is None
 
+    if args.phase_transition_iterations:
+        args.phase_transition_iterations = sorted(
+            int(x.strip()) for x in args.phase_transition_iterations.split(",")
+        )
+        assert args.rampup_batch_size is None, "multi-phase training does not support batch size ramp-up"
+
     # Batch size.
     assert args.micro_batch_size is not None
     assert args.micro_batch_size > 0
@@ -744,9 +771,8 @@ def validate_args(args, defaults={}):
 
         assert args.ckpt_format == "fsdp_dtensor", \
             "Megatron FSDP only supports fsdp_dtensor checkpoint format"
-        
-        if args.use_megatron_fsdp:
-            args.reuse_grad_buf_for_mxfp8_param_ag = False
+
+        args.reuse_grad_buf_for_mxfp8_param_ag = False
 
     if args.fsdp_manual_registration:
         assert args.use_megatron_fsdp, "FSDP manual registration is only supported with Megatron FSDP"
@@ -900,6 +926,8 @@ def validate_args(args, defaults={}):
         if args.save_retain_interval is not None:
             assert args.save_retain_interval > 0
             assert args.save_retain_interval % args.save_interval == 0
+    if args.log_memory_interval is not None:
+        assert args.log_memory_interval % args.log_interval == 0
     # Mixed precision checks.
     if args.fp16_lm_cross_entropy:
         assert args.fp16, 'lm cross entropy in fp16 only support in fp16 mode.'
@@ -1264,6 +1292,9 @@ def validate_args(args, defaults={}):
 
     # Muon optimizer check
     if 'muon' in args.optimizer:
+        # TODO: remove these checks once we support them
+        assert not args.overlap_grad_reduce, "Muon optimizer does not support overlap grad reduce for now."
+        assert not args.overlap_param_gather, "Muon optimizer does not support overlap param gather for now."
         assert not args.use_distributed_optimizer, "Muon optimizer does not support distributed optimizer for now."
         assert not args.use_torch_fsdp2, "Muon optimizer does not support Torch-FSDP2 for now."
         assert not args.use_megatron_fsdp, "Muon optimizer does not support Megatron-FSDP for now."
@@ -1549,11 +1580,6 @@ def _add_transformer_engine_args(parser):
                        help='Keep the compute param in fp4 (do not use any other intermediate '
                             'dtype) and perform the param all-gather in fp4.',
                        dest='fp4_param')
-    group.add_argument('--te-rng-tracker', action='store_true', default=False,
-                       help='Use the Transformer Engine version of the random number generator. '
-                            'Required for CUDA graphs support.')
-    group.add_argument('--inference-rng-tracker', action='store_true', default=False,
-                       help='Use a random number generator configured for inference.')
     group.add_argument('--te-precision-config-file', default=None,
                        help='Configuration file to select per-module precision overrides. '
                        'See TransformerEngineMixedPrecision.md')
@@ -1638,7 +1664,14 @@ def _add_inference_args(parser):
                        'If the UVM level is 0, then only GPU memory is used and '
                        'the total memory equals `buffer_size_gb`. If the UVM '
                        'level is 1, then additional memory is utilized on the '
-                       'CPU and the total memory equals `2 * buffer_size_gb`.')
+                       'CPU and the total memory equals `buffer_size_gb + '
+                       'paused_buffer_size_gb`.')
+    group.add_argument('--inference-dynamic-batching-paused-buffer-size-gb',
+                       type=float, default=None,
+                       help='Amount of memory reserved for paused requests in '
+                       'the dynamic inference context. Active requests are '
+                       'paused when there are not enough active blocks available '
+                       'to continue generating a request.')
     group.add_argument('--inference-dynamic-batching-block-size',
                        type=int, default=256,
                        help='KV cache block size. '
@@ -1667,7 +1700,7 @@ def _add_inference_args(parser):
                        action='store_true', default=False,
                        help='Only use cuda graphs for decode-only steps, not prefill and mixed steps.')
     group.add_argument('--inference-dynamic-batching-unified-memory-level',
-                       type=int, default=1, choices=[0, 1],
+                       type=int, default=0, choices=[0, 1],
                        help='Set unified memory usage within the dynamic '
                        'inference context. The levels are: 0) no unified memory, '
                        '1) allocate `memory_buffer` in unified memory. '
@@ -1682,8 +1715,12 @@ def _add_inference_args(parser):
     group.add_argument('--mlp-chunks-for-prefill', type=int, default=1,
                        help='Number of chunks along sequence dimension for MLP '
                        'computation during prefill')
-    group.add_argument('--disable-chunked-prefill', default=False, action="store_true",
-                       help='Disable chunked prefill (chunked prefill is enabled by default).')
+    # TODO(ksanthanam): Clean this up in future PR
+    group.add_argument('--enable-chunked-prefill', dest='disable_chunked_prefill',
+                       action='store_false', default=True,
+                       help="Enable chunked prefill (disabled by default)")
+    group.add_argument('--disable-chunked-prefill', dest='disable_chunked_prefill',
+                       action='store_true', help=argparse.SUPPRESS)
     group.add_argument('--inference-dynamic-batching-cuda-graph-max-tokens',
                        type=int, default=16384,
                        help='Maximum number of tokens to capture in a cuda graph.')
@@ -1697,6 +1734,9 @@ def _add_inference_args(parser):
                        required=False, default=False, help='Enable inference wandb logging.')
     group.add_argument("--inference-coordinator-port", type=int, default=12346,
                        help="This port will be used to setup the inference coordinator on node-0")
+    group.add_argument("--inference-fuse-tp-communication", action="store_true", default=False,
+                       help="Use the fused communication kernel for tensor parallelism during inference. This "
+                       "kernel fuses reduce-scatter + residual-add + rms-norm + all-gather into one operation.")
     return parser
 
 
@@ -2090,13 +2130,6 @@ def _add_regularization_args(parser):
                        help='Dropout probability for hidden state transformer.')
     group.add_argument('--weight-decay', type=float, default=0.01,
                        help='Weight decay coefficient for L2 regularization.')
-    group.add_argument('--start-weight-decay', type=float,
-                       help='Initial weight decay coefficient for L2 regularization.')
-    group.add_argument('--end-weight-decay', type=float,
-                       help='End of run weight decay coefficient for L2 regularization.')
-    group.add_argument('--weight-decay-incr-style', type=str, default='constant',
-                       choices=['constant', 'linear', 'cosine'],
-                       help='Weight decay increment function.')
     group.add_argument('--apply-wd-to-qk-layernorm', action='store_true',
                        help='Apply weight decay to qk layernorm as a special case.')
     group.add_argument('--clip-grad', type=float, default=1.0,
@@ -2169,10 +2202,6 @@ def _add_rl_args(parser):
                        help="Entropy term weight in GRPO loss.")
     group.add_argument('--grpo-filter-groups-with-same-reward', action='store_true',
                        help="Filter groups with same reward.")
-    group.add_argument('--grpo-default-temperature', type=float, default=1.0,
-                       help="Default temperature for model inference.")
-    group.add_argument('--grpo-default-top-p', type=float, default=0,
-                       help="Default top-p for model inference.")
     group.add_argument('--langrl-inference-server-type', type=str,
                        choices=['inplace_megatron', 'inplace_megatron_chat'], default='inplace_megatron',
                        help="Type of inference server to use.")
@@ -2181,6 +2210,12 @@ def _add_rl_args(parser):
     group.add_argument('--langrl-external-server', action=argparse.BooleanOptionalAction, required=False, default=False)
     group.add_argument('--langrl-env-config', type=str, default=None,
                        help="Path to YAML config file for RL environment configuration.")
+    group.add_argument('--rl-default-temperature', type=float, default=1.0,
+                       help="Default temperature for model inference.")
+    group.add_argument('--rl-default-top-p', type=float, default=0,
+                       help="Default top-p for model inference.")
+    group.add_argument('--rl-default-top-k', type=int, default=-1,
+                       help="Default top-k for model inference.")
     group.add_argument('--rl-offload-optimizer-during-inference', action='store_true',
                        help='Offload optimizer state to CPU during inference/rollout to save GPU memory')
     group.add_argument('--rl-offload-kv-cache-during-training', action=argparse.BooleanOptionalAction, default=False,
@@ -2206,10 +2241,77 @@ def _add_rl_args(parser):
                        help='Algorithm for distributing packed bins across ranks. '
                             'fifo: first-in-first-out sequential distribution, '
                             'round-robin: distribute bins cyclically across ranks for better load balancing')
+    group.add_argument('--rl-training-cuda-graphs', action=argparse.BooleanOptionalAction, type=bool,
+                       default=False,
+                       help='If set, do not call `delete_cuda_graphs` or `toggle_cuda_graphs` when the inference engine is suspended. '
+                            'Use only when all training and inference cudagraphs and the KV cache fit on device.')
+    group.add_argument('--rl-inference-tensor-model-parallel-size', type=int, default=None,
+                       help='Degree of tensor model parallelism for inference for RL.')     
+    group.add_argument(
+        '--rl-inference-pipeline-model-parallel-size',
+        type=int,
+        default=None,
+        help='Degree of pipeline model parallelism for inference for RL.',
+    )
+    group.add_argument(
+        '--rl-inference-expert-model-parallel-size',
+        type=int,
+        default=None,
+        help='Degree of expert model parallelism for inference for RL.',
+    )
+    group.add_argument(
+        '--rl-inference-expert-tensor-model-parallel-size',
+        type=int,
+        default=None,
+        help='Degree of expert tensor model parallelism for inference for RL. '
+             'For MoE models, this controls the TP size for expert layers specifically. '
+             'Defaults to training expert_tensor_parallel_size if not specified.',
+    )
+    group.add_argument(
+        '--rl-inference-model-unified-memory-level',
+        type=int,
+        default=0,
+        choices=[0, 1],
+        help=(
+            'Allocate the separate RL inference model parameters from a unified virtual memory (UVM) '
+            'CUDA mempool. Level 0 disables UVM (default). Level 1 enables UVM allocation so the '
+            'inference model weights can be prefetched to CPU when idle while keeping CUDA-graph-safe '
+            'device pointers.'
+        ),
+    )
+    group.add_argument(
+        '--rl-offload-inference-model-weights-when-idle',
+        action=argparse.BooleanOptionalAction,
+        required=False,
+        default=False,
+        help=(
+            'When using a separate RL inference model with UVM-enabled parameters, prefetch its weights '
+            'to CPU when not doing rollout inference, and prefetch back to GPU right before inference. '
+            'Requires --rl-inference-model-unified-memory-level=1.'
+        ),
+    )
+    group.add_argument('--refit-method', type=str, default='gloo',
+                       choices=['nccl', 'gloo'],
+                       help=('Method to refit the model weights between training and inference models during RL. '
+                             'nccl: use NCCLCopyService to refit using NCCL; '
+                             'gloo: use GlooCopyService over CPU; '
+                             ))
+    group.add_argument('--rl-verify-model-weights-swap', action=argparse.BooleanOptionalAction, default=False,
+                       help='If set, verify that the model weights were correctly transferred by comparing forward pass outputs on'
+                       'the first swap of model weights.')
+
+    group.add_argument('--rl-parallel-generation-tasks', type=int, default=512,
+                        help='Number of parallel generation tasks for RL inference.')
+    group.add_argument('--rl-skip-bos-token', action=argparse.BooleanOptionalAction, type=bool, default=False,
+                        help='Skip BOS token at the beginning of the sequences. Default is False.')
     return parser
 
 def _add_training_args(parser):
-    from megatron.training.config import TrainingConfig
+    from megatron.training.training_config import TrainingConfig
+    from megatron.training.common_config import ProfilingConfig
+
+    prof_factory = ArgumentGroupFactory(ProfilingConfig, exclude=["record_shapes", "nvtx_ranges"])
+    prof_group = prof_factory.build_group(parser, "profiling")
 
     train_factory = ArgumentGroupFactory(TrainingConfig)
     group = train_factory.build_group(parser, "training")
@@ -2231,9 +2333,6 @@ def _add_training_args(parser):
     group.add_argument('--no-check-for-nan-in-loss-and-grad', action='store_false',
                        help='Check for NaNs in loss and grad',
                        dest='check_for_nan_in_loss_and_grad')
-    group.add_argument('--check-for-spiky-loss', action='store_true',
-                       help='Check for spiky loss',
-                       dest='check_for_spiky_loss')
     group.add_argument('--check-for-large-grads', action='store_true',
                        help='Check for unexpectedly large grads',
                        dest='check_for_large_grads')
@@ -2274,32 +2373,11 @@ def _add_training_args(parser):
     group.add_argument('--no-clone-scatter-output-in-embedding', action='store_false',
                        help='If not set, clone the output of the scatter in embedding layer to GC original tensor.',
                        dest='clone_scatter_output_in_embedding')
-    group.add_argument('--profile', action='store_true',
-                       help='Enable nsys profiling. When using this option, nsys '
-                       'options should be specified in commandline. An example '
-                       'nsys commandline is `nsys profile -s none -t nvtx,cuda '
-                       '-o <path/to/output_file> --force-overwrite true '
-                       '--capture-range=cudaProfilerApi '
-                       '--capture-range-end=stop`.')
-    group.add_argument('--profile-step-start', type=int, default=10,
-                       help='Global step to start profiling.')
-    group.add_argument('--profile-step-end', type=int, default=12,
-                       help='Global step to stop profiling.')
     group.add_argument('--result-rejected-tracker-filename', type=str, default=None,
                        help='Optional name of file tracking `result_rejected` events.')
     group.add_argument('--disable-gloo-process-groups', action='store_false',
                        dest='enable_gloo_process_groups',
                        help='Disables creation and usage of Gloo process groups.')
-    group.add_argument('--use-pytorch-profiler', action='store_true',
-                       help='Use the built-in pytorch profiler. '
-                       'Useful if you wish to view profiles in tensorboard.',
-                       dest='use_pytorch_profiler')
-    group.add_argument('--profile-ranks', nargs='+', type=int, default=[0],
-                       help='Global ranks to profile.')
-    group.add_argument('--record-memory-history', action="store_true", default=False,
-                       help='Record memory history in last rank.')
-    group.add_argument('--memory-snapshot-path', type=str, default="snapshot.pickle",
-                       help='Specifies where to dump the memory history pickle.')
     group.add_argument('--tp-comm-overlap', action='store_true', help='Enables the '
                        ' overlap of Tensor parallel communication and GEMM kernels.')
     group.add_argument('--tp-comm-overlap-cfg', type=str, default=None,
@@ -2340,6 +2418,10 @@ def _add_training_args(parser):
                        'with larger models, sequences, and batch sizes.')
     group.add_argument('--log-interval', type=int, default=100,
                        help='Report loss and timing interval.')
+    group.add_argument('--log-memory-interval', type=int, default=None,
+                       help='Report memory interval.')
+    group.add_argument('--log-device-memory-used', action='store_true',
+                       help='Log device memory used (as reported by nvidia-smi).')
     group.add_argument('--tensorboard-dir', type=str, default=None,
                        help='Write TensorBoard logs to this directory.')
     group.add_argument('--no-masked-softmax-fusion',
@@ -2461,45 +2543,33 @@ def _add_training_args(parser):
                        help='The submodules to offload its input. Choices: "attn_norm", "qkv_linear", "core_attn", "attn_proj", "mlp_norm", "expert_fc1", "moe_act".')
     group.add_argument('--min-offloaded-tensor-size', type=int, default=1024*1024,
                        help='The minimum size of the tensor to be offloaded.')
-    group.add_argument('--disable-jit-fuser', action='store_true',
-                       help='Disable the JIT fuser.')
     group.add_argument('--batch-invariant-mode', action='store_true',
                        help='Use batch-invariant kernels for deterministic forward execution regardless '
                        'of batch size. Ensures bitwise identical results when the same inputs are '
                        'processed in different batch configurations. This is more strict than deterministic-mode '
                        'which only ensures bitwise identical results when the same inputs are processed in the same batch configuration. '
                        'This will significantly affect speed of training and inference as the kernels are not full optimized.')
+    group.add_argument('--disable-jit-fuser', action='store_true',
+                       help='Disable the JIT fuser.')
 
     return parser
 
 
 def _add_rerun_machine_args(parser):
-    group = parser.add_argument_group(title='rerun engine')
-
-    group.add_argument('--error-injection-rate', type=int, default=0,
-                       help='Rate at which to inject unexpected results, '
-                       'e.g. 1000 means once every 1000 result validations')
-    group.add_argument('--error-injection-type', type=str, default='transient_error',
-                       choices=['correct_result', 'transient_error', 'persistent_error'],
-                       help='Type of error to inject. ')
-    group.add_argument('--rerun-mode', type=str, default='validate_results',
-                       choices=['disabled', 'validate_results', 'report_stats'],
-                       help='Use re-run engine to validate results (default) '
-                       'or to emit stats on variability of computations due to '
-                       'non-deterministic algorithms.')
+    from megatron.training.resilience_config import RerunStateMachineConfig
+
+    rerun_factory = ArgumentGroupFactory(RerunStateMachineConfig, exclude=["check_for_nan_in_loss"])
+    group = rerun_factory.build_group(parser, "rerun engine")
 
     return parser
 
 
 def _add_initialization_args(parser):
-    group = parser.add_argument_group(title='initialization')
-
-    group.add_argument('--seed', type=int, default=1234,
-                       help='Random seed used for python, numpy, '
-                       'pytorch, and cuda.')
-    group.add_argument('--data-parallel-random-init', action='store_true',
-                       help='Enable random initialization of params '
-                       'across data parallel ranks')
+    from megatron.training.common_config import RNGConfig
+
+    rng_factory = ArgumentGroupFactory(RNGConfig)
+    group = rng_factory.build_group(parser, "RNG and initialization")
+
     group.add_argument('--init-method-std', type=float, default=0.02,
                        help='Standard deviation of the zero mean normal '
                        'distribution used for weight initialization.')
@@ -2520,59 +2590,21 @@ def _add_initialization_args(parser):
 
 
 def _add_learning_rate_args(parser):
-    group = parser.add_argument_group(title='learning rate')
+    from megatron.training.training_config import SchedulerConfig
+
+    sched_factory = ArgumentGroupFactory(SchedulerConfig, exclude=["no_weight_decay_cond_type"])
+    group = sched_factory.build_group(parser, title="learning rate and weight decay")
 
     group.add_argument('--lr', type=float, default=None,
                        help='Initial learning rate. Depending on decay style '
                        'and initial warmup, the learning rate at each '
                        'iteration would be different.')
-    group.add_argument('--lr-decay-style', type=str, default='linear',
-                       choices=['constant', 'linear', 'cosine', 'inverse-square-root', 'WSD'],
-                       help='Learning rate decay function.')
-    group.add_argument('--lr-wsd-decay-style', type=str, default='exponential',
-                       choices=['exponential', 'linear', 'cosine', 'minus_sqrt'],
-                       help='Decay style for the annealing phase of WSD'),
-    group.add_argument('--lr-decay-iters', type=int, default=None,
-                       help='number of iterations to decay learning rate over,'
-                       ' If None defaults to `--train-iters`')
-    group.add_argument('--lr-decay-samples', type=int, default=None,
-                       help='number of samples to decay learning rate over,'
-                       ' If None defaults to `--train-samples`')
-    group.add_argument('--lr-wsd-decay-samples', type=int, default=None,
-                       help='number of samples for the annealing phase in the wsd schedule')
-    group.add_argument('--lr-wsd-decay-iters', type=int, default=None,
-                       help='number of iterations for the annealing phase in the wsd schedule')
-    group.add_argument('--lr-warmup-fraction', type=float, default=None,
-                       help='fraction of lr-warmup-(iters/samples) to use '
-                       'for warmup (as a float)')
-    group.add_argument('--lr-warmup-iters', type=int, default=0,
-                       help='number of iterations to linearly warmup '
-                       'learning rate over.')
-    group.add_argument('--lr-warmup-samples', type=int, default=0,
-                       help='number of samples to linearly warmup '
-                       'learning rate over.')
-    group.add_argument('--lr-warmup-init', type=float, default=0.0,
-                       help='Initial value for learning rate warmup. The '
-                       'scheduler starts warmup from this value.')
     group.add_argument('--warmup', type=int, default=None,
                        help='Old lr warmup argument, do not use. Use one of the'
                        '--lr-warmup-* arguments above')
     group.add_argument('--min-lr', type=float, default=0.0,
                        help='Minimum value for learning rate. The scheduler'
                        'clip values below this threshold.')
-    group.add_argument('--override-opt_param-scheduler', '--override-opt-param-scheduler',
-                       action='store_true',
-                       help='Reset the values of the scheduler (learning rate,'
-                       'warmup iterations, minimum learning rate, maximum '
-                       'number of iterations, and decay style from input '
-                       'arguments and ignore values from checkpoints. Note'
-                       'that all the above values will be reset.')
-    group.add_argument('--use-checkpoint-opt_param-scheduler', '--use-checkpoint-opt-param-scheduler',
-                       action='store_true',
-                       help='Use checkpoint to set the values of the scheduler '
-                       '(learning rate, warmup iterations, minimum learning '
-                       'rate, maximum number of iterations, and decay style '
-                       'from checkpoint and ignore input arguments.')
     group.add_argument('--decoupled-lr', type=float, default=None,
                        help='Separate learning rate for the input and output layer')
     group.add_argument('--decoupled-min-lr', type=float, default=None,
@@ -2659,13 +2691,13 @@ def _add_checkpointing_args(parser):
                        dest='dist_ckpt_format_deprecated',
                        help='Deprecated: see --ckpt-format.')
     group.add_argument('--ckpt-format', default='torch_dist',
-                       choices=['torch', 'torch_dist', 'zarr', 'torch_dcp', 'fsdp_dtensor'],
+                       choices=['torch', 'torch_dist', 'torch_dcp', 'fsdp_dtensor'],
                        help='Checkpoint format to use. torch is the format used by torch.save/load.'
                        ' torch_dist is a megatron built-in distributed checkpointing format.'
                        ' torch_dcp is the torch.distributed.checkpoint format.'
                        ' fsdp_dtensor is a torch DCP native, Megatron FSDP training-specific checkpoint format.')
     group.add_argument('--ckpt-convert-format', default=None,
-                       choices=['torch', 'torch_dist', 'zarr'],
+                       choices=['torch', 'torch_dist'],
                        help='Checkpoint format for conversion.')
     group.add_argument('--ckpt-convert-save', default=None,
                        help='Save directory for converted checkpoint.')
@@ -2954,7 +2986,7 @@ def _add_distributed_args(parser):
 
 
 def _add_validation_args(parser):
-    from megatron.training.config import ValidationConfig
+    from megatron.training.training_config import ValidationConfig
 
     val_factory = ArgumentGroupFactory(ValidationConfig)
     group = val_factory.build_group(parser, "validation")
@@ -3030,6 +3062,10 @@ def _add_data_args(parser):
                        '(3) a list of prefixes e.g. prefix1 prefix2. '
                        'For (3), weights are inferred from the lengths of the contributing datasets. '
                        'This argument is exclusive to the other independent --*-data-path arguments.')
+    group.add_argument('--phase-transition-iterations', type=str, default=None,
+                       help='Comma-separated list of iterations where phase '
+                       'transitions occur. Requires fixed global batch size across phases. '
+                       'Does not support batch size ramp-up.')
     group.add_argument('--split', type=str, default=None,
                        help='Comma-separated list of proportions for training,'
                        ' validation, and test split. For example the split '
@@ -3055,6 +3091,12 @@ def _add_data_args(parser):
                        'we pass in a file path from which we read those arguments. '
                        'This is useful when the list of data is too big. Format is a '
                        'json file with `train`, `valid, `test` keys')
+    group.add_argument('--per-dataset-sequences-path', default=None,
+                       help='Path to a json file with the sequences per dataset. Check the tools/build_sequences_per_dataset.py script to build this file.')
+    group.add_argument('--dataloader-fast-cache-load', action='store_true',
+                       help='Option to use the fast cache loading path when building the datasets. Requires all the dataset caches to be built and stored in --data-cache-path.')
+    group.add_argument('--dataloader-defer-npy-index-mmap', action='store_true',
+                       help='Defer the mmap of the dataset indexes (.npy files) until the first access. Requires all the dataset caches to be built and stored in --data-cache-path.')
     group.add_argument('--data-cache-path', default=None,
                        help='Path to a directory to hold cached index files.')
     group.add_argument('--no-mmap-bin-files', action='store_false',
@@ -3301,7 +3343,13 @@ def _add_moe_args(parser):
     group.add_argument('--moe-shared-expert-intermediate-size', type=int, default=None,
                        help='Shared expert total ffn hidden size. '
                        'It should be equal to "num_shared_experts * ffn_size_of_each_shared_expert" if there are multiple shared experts. '
-                       'None means no shared expert.')
+                       'None means no shared expert. '
+                       'By default, the shared experts execute before the router. However, when '
+                       '--moe-shared-expert-overlap or --overlap-moe-expert-parallel-comm is set, '
+                       'the shared experts execute after the router, before the routed experts. '
+                       'This makes the gradients from the router and the shared experts added in '
+                       'different orders to the hidden_states, causing minor numerical differences '
+                       'in the hidden_states gradient.')
     group.add_argument('--moe-shared-expert-gate', action='store_true',
                        help='Enable gate for shared expert. Only effective when moe-shared-expert-intermediate-size is set.')
     group.add_argument('--moe-shared-expert-overlap', action='store_true',
@@ -3463,7 +3511,7 @@ def _add_experimental_attention_variant_args(parser):
                        help='Dimension per indexer head for sparse attention. If not set, defaults to kv-channels.')
     group.add_argument('--dsa-indexer-topk', default=None, type=int,
                        help='Number of top-k tokens to select in sparse attention indexer.')
-    group.add_argument('--dsa-indexer-loss-coeff', default=0.0, type=float,
+    group.add_argument('--dsa-indexer-loss-coeff', default=None, type=float,
                        help='Coefficient for the indexer KL divergence loss. Set to 0 to disable indexer loss.')
     group.add_argument('--dsa-indexer-use-sparse-loss', action='store_true',
                        help='Use sparse indexer loss. If set, the indexer loss will be computed using the top-k indices.')
diff --git a/megatron/training/checkpointing.py b/megatron/training/checkpointing.py
index f7ff7cd2775..b6a1b7abee0 100644
--- a/megatron/training/checkpointing.py
+++ b/megatron/training/checkpointing.py
@@ -58,11 +58,13 @@
 # [ModelOpt]: Import
 try:
     from modelopt.torch.opt.plugins import save_modelopt_state, save_sharded_modelopt_state
+    from megatron.post_training.utils import print_distributed_quant_summary
     has_nvidia_modelopt = True
 except Exception:
     has_nvidia_modelopt = False
 
 _CHECKPOINT_VERSION = None
+_LOADED_ITERATION = None
 
 logger = getLogger(__name__)
 _NON_PERSISTENT_CKPT_SUBDIR = 'non_persistent'
@@ -80,6 +82,22 @@ def get_checkpoint_version():
     return _CHECKPOINT_VERSION
 
 
+def set_loaded_iteration(value):
+    """Set the iteration that was loaded from checkpoint.
+
+    This is stored separately from args to avoid polluting the checkpoint
+    with runtime state (args is saved in checkpoints).
+    """
+    global _LOADED_ITERATION
+    _LOADED_ITERATION = value
+
+
+def get_loaded_iteration():
+    """Get the iteration that was loaded from checkpoint, or None if no checkpoint was loaded."""
+    global _LOADED_ITERATION
+    return _LOADED_ITERATION
+
+
 def check_checkpoint_args(checkpoint_args):
     """Ensure fixed arguments for a model are the same for the input
     arguments and the one retrieved from checkpoint."""
@@ -112,6 +130,8 @@ def _compare(arg_name, old_arg_name=None, default=None):
         _compare('tokenizer_type')
     if args.data_parallel_random_init:
         _compare('data_parallel_random_init')
+    if args.phase_transition_iterations:
+        _compare('global_batch_size')
     if get_checkpoint_version() < 3.0:
         _compare('tensor_model_parallel_size',
                  old_arg_name='model_parallel_size')
@@ -514,6 +534,14 @@ def save_checkpoint(iteration, model, optimizer, opt_param_scheduler, num_floati
         if not optimizer.is_stub_optimizer:
             optimizer.save_state_dict_to_file(optim_checkpoint_name)
 
+    # LayerWiseDistributedOptimizer save optimizer state to file on different ranks
+    if getattr(args, "optimizer", "adam").startswith("dist_") and args.ckpt_format == 'torch':
+        dp_rank = mpu.get_data_parallel_rank()
+        optim_checkpoint_name = os.path.join(os.path.dirname(checkpoint_name), f"layer_wise_optimizer_{dp_rank}.pt")
+        ensure_directory_exists(optim_checkpoint_name)
+        if not optimizer.is_stub_optimizer:
+            optimizer.save_state_dict_to_file(optim_checkpoint_name)
+
     async_save_request = None
     if args.async_save:
         if ckpt_type == CheckpointType.LEGACY:
@@ -1129,6 +1157,10 @@ def _load_base_checkpoint(
     if getattr(args, "ckpt_step", None):
         iteration = args.ckpt_step
 
+    # Record the iteration loaded (stored separately from args to avoid
+    # polluting checkpoints, since args is saved in checkpoints).
+    set_loaded_iteration(iteration)
+
     if non_persistent_iteration != -1:  # there is a non-persistent checkpoint
         if non_persistent_iteration >= iteration:
             return _load_non_persistent_base_checkpoint(
@@ -1837,7 +1869,10 @@ def load_model_state_dict(module, state_dict, strict: bool):
                  f'[ t {mpu.get_tensor_model_parallel_rank() + 1}/{mpu.get_tensor_model_parallel_world_size()}, '
                  f'p {mpu.get_pipeline_model_parallel_rank() + 1}/{mpu.get_pipeline_model_parallel_world_size()} ] '
                  f'at iteration {iteration}')
-
+                 
+    if has_nvidia_modelopt:
+        print_distributed_quant_summary(model, msg="After loading checkpoint")
+        
     # Additional callback for wandb (last rank)
     if not torch.distributed.is_initialized() \
        or is_last_rank():
diff --git a/megatron/training/common_config.py b/megatron/training/common_config.py
new file mode 100644
index 00000000000..d1096e91154
--- /dev/null
+++ b/megatron/training/common_config.py
@@ -0,0 +1,56 @@
+# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+from dataclasses import dataclass, field
+
+@dataclass(kw_only=True)
+class RNGConfig:
+    """Configuration settings for random number generation."""
+
+    seed: int = 1234
+    """Random seed used for python, numpy, pytorch, and cuda."""
+
+    te_rng_tracker: bool = False
+    """Use the Transformer Engine version of the random number generator.
+    Required for CUDA graphs support."""
+
+    inference_rng_tracker: bool = False
+    """Use a random number generator configured for inference."""
+
+    data_parallel_random_init: bool = False
+    """Enable random initialization of params across data parallel ranks"""
+
+
+@dataclass(kw_only=True)
+class ProfilingConfig:
+    """Configuration settings for profiling the training process."""
+
+    use_nsys_profiler: bool = field(default=False, metadata={"argparse_meta": {"arg_names": ["--profile"], "dest": "profile"}})
+    """Enable nsys profiling. When using this option, nsys options should be specified in
+    commandline. An example nsys commandline is
+    `nsys profile -s none -t nvtx,cuda -o <path/to/output_file> --force-overwrite true
+    --capture-range=cudaProfilerApi --capture-range-end=stop`.
+    """
+
+    profile_step_start: int = 10
+    """Global step to start profiling."""
+
+    profile_step_end: int = 12
+    """Global step to stop profiling."""
+
+    use_pytorch_profiler: bool = False
+    """Use the built-in pytorch profiler. Useful if you wish to view profiles in tensorboard."""
+
+    profile_ranks: list[int] = field(default_factory=lambda: [0])
+    """Global ranks to profile."""
+
+    record_memory_history: bool = False
+    """Record memory history in last rank."""
+
+    memory_snapshot_path: str = "snapshot.pickle"
+    """Specifies where to dump the memory history pickle."""
+
+    record_shapes: bool = False
+    """Record shapes of tensors."""
+
+    nvtx_ranges: bool = False
+    """Enable NVTX range annotations for profiling. When enabled, inserts NVTX markers
+    to categorize execution in profiler output."""
diff --git a/megatron/training/datasets/data_samplers.py b/megatron/training/datasets/data_samplers.py
index d33250520dd..ca4cc1b36a3 100644
--- a/megatron/training/datasets/data_samplers.py
+++ b/megatron/training/datasets/data_samplers.py
@@ -83,14 +83,15 @@ def worker_init_fn(_):
         extra_kwargs = {"collate_fn": lambda x: x,}
     else:
         extra_kwargs = {}
-    return torch.utils.data.DataLoader(dataset,
-                                       batch_sampler=batch_sampler,
-                                       num_workers=args.num_workers,
-                                       pin_memory=True,
-                                       persistent_workers=True if args.num_workers > 0 else False,
-                                       worker_init_fn=maybe_worker_init_fn,
-                                       **extra_kwargs,
-                                       )
+    return torch.utils.data.DataLoader(
+        dataset,
+        batch_sampler=batch_sampler,
+        num_workers=args.num_workers,
+        pin_memory=True,
+        persistent_workers=True if args.num_workers > 0 else False,
+        worker_init_fn=maybe_worker_init_fn,
+        **extra_kwargs,
+    )
 
 class MegatronPretrainingSampler:
     """
diff --git a/megatron/training/datasets/sft_dataset.py b/megatron/training/datasets/sft_dataset.py
index e4d8a6faf24..2cbc4e424eb 100644
--- a/megatron/training/datasets/sft_dataset.py
+++ b/megatron/training/datasets/sft_dataset.py
@@ -1,5 +1,7 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 
+import atexit, json
+from collections import Counter
 from typing import Any, Dict, Optional
 
 import numpy as np
@@ -25,6 +27,9 @@ class SFTLowLevelDataset:
                 {"role": "user", "content": "something1"},
                 {"role": "assistant", "content": "something2"},
             ]
+            A jsonl line can contain multiple conversations packed together into on list. Each
+            conversation starts with the system role, and conversations can have multiple turns
+            of the user and assistant roles.
     """
 
     def __init__(self, dataset_path: str) -> None:
@@ -68,79 +73,131 @@ def build_low_level_dataset(dataset_path: str, config: GPTDatasetConfig) -> LowL
     def __len__(self) -> int:
         return self.num_samples
 
+    def _split_conversations(self, merged_conversations):
+        split_conversations = []
+        current = []
+        for msg in merged_conversations:
+            # Whenever we see a new system message, start a new conversation
+            if msg["role"] == "system":
+                if current:  # If previously accumulating a conversation, then store it
+                    split_conversations.append(current)
+                current = [msg]  # Then start the new conversation
+            else:
+                current.append(msg) # Continue accumulating the current conversation
+        if current:  # Store any remaining conversation
+            split_conversations.append(current)
+        return split_conversations
+
     def __getitem__(self, idx: int) -> Dict[str, Any]:
 
         tokenizer = self.config.tokenizer
-        max_seq_len = self.config.sequence_length
-
-        conversation_list = self.dataset[int(self.indices[idx % len(self.indices)])]
-        tokens, target = tokenizer.tokenize_conversation(
-            conversation_list, return_target=True, add_generation_prompt=False
-        )
-
-        force_eod_length = int(tokenizer.force_eod)
-
-        if len(tokens) > max_seq_len - force_eod_length:
-            tokens = tokens[: max_seq_len - force_eod_length]
-            target = target[: max_seq_len - force_eod_length]
-
-        # padding
-        num_tokens = len(tokens) + force_eod_length
-        padding_len = max_seq_len - num_tokens
-        assert padding_len >= 0
-        filler = [tokenizer.eod] * force_eod_length + [tokenizer.pad] * (padding_len + 1)
-
-        tokens = np.array(tokens.tolist() + filler, dtype=np.int64)
-        target = np.array(target.tolist() + filler, dtype=np.int64)
-
-        tokens = torch.tensor(tokens)
-        target = torch.tensor(target)
-
-        tokens = tokens[:-1].contiguous()
-        target = target[1:].contiguous()
-
-        loss_mask, position_ids, attention_mask = self._get_ltor_masks_and_position_ids(
-            max_seq_len, target, tokenizer.pad
-        )
-
-        if self.config.create_attention_mask:
-            ret = {
-                'tokens': tokens,
-                'labels': target,
-                'attention_mask': attention_mask,
-                'loss_mask': loss_mask,
-                'position_ids': position_ids,
-            }
-        else:
-            ret = {
-                'tokens': tokens,
-                'labels': target,
-                'loss_mask': loss_mask,
-                'position_ids': position_ids,
-            }
-
-        return ret
-
-    def _get_ltor_masks_and_position_ids(self, max_seq_len, target, pad_token):
-        """Build masks and position id for left to right model for SFT"""
-
-        assert not self.config.reset_position_ids and not self.config.reset_attention_mask
+        pack_length = self.config.sequence_length
+
+        merged_conversations = self.dataset[int(self.indices[idx % len(self.indices)])]
+        split_conversations = self._split_conversations(merged_conversations)
+
+        def extend_with_padding(tokens, targets, positions, pad_len):
+            tokens.extend([pad] * pad_len)
+            targets.extend([pad] * pad_len)
+            positions.extend(range(positions[-1]+1, positions[-1]+1+pad_len))
+
+        pack_tokens = []
+        pack_targets = []
+        pack_positions = []
+        cu_seqlens = [0]
+        eod = tokenizer.eod
+        pad = tokenizer.pad
+        # TODO(duncan): Track number of convs dropped and/or truncated and amount of end-padding
+        for conversation in split_conversations:
+
+            tokens, targets = tokenizer.tokenize_conversation(
+                conversation, return_target=True, add_generation_prompt=False
+            )
 
-        # Position ids.
-        position_ids = torch.arange(max_seq_len, dtype=torch.long)
+            tokens_list = tokens.tolist()
+            targets_list = targets.tolist()
+
+            # Add EOD, unless it's already present
+            if tokens_list[-1] != eod:
+                tokens_list.append(eod)
+                targets_list.append(eod)
+
+            pack_tokens.extend(tokens_list)
+            pack_targets.extend(targets_list)
+
+            assert not self.config.reset_position_ids
+            pack_positions.extend(range(len(tokens_list)))
+
+            if self.config.context_parallel_size > 1:
+                pad_granularity = self.config.context_parallel_size * 2
+                mod_token_count = len(pack_tokens) % pad_granularity
+                if mod_token_count != 0:
+                    pad_len = pad_granularity - mod_token_count
+                    extend_with_padding(pack_tokens, pack_targets, pack_positions, pad_len)
+
+            # TODO(duncan): Consider also padding to multiple of number of tokens here. This might
+            # be needed for efficiency (and potentially set via command-line argument).
+
+            cu_seqlens.append(len(pack_tokens))
+
+            # Handle any necessary truncation
+            if len(pack_tokens) >= pack_length + 1:  # +1 here to account for later alignment
+                truncate_left_not_right = True  # TODO(duncan): plumb this switch in
+                if truncate_left_not_right:  # Retain existing eod
+                    max_body = pack_length
+                    pack_tokens = pack_tokens[-max_body:]
+                    pack_targets = pack_targets[-max_body:]
+                    pack_tokens.append(pad)
+                    pack_targets.append(pad)
+                else:  # Truncate right (need to add eod)
+                    max_body = pack_length - 1
+                    pack_tokens = pack_tokens[:max_body]
+                    pack_targets = pack_targets[:max_body]
+                    pack_tokens.extend([eod, pad])
+                    pack_targets.extend([eod, pad])
+                pack_positions = pack_positions[:pack_length+1]
+                # Note len({pack_tokens, pack_targets, pack_positions}) should be pack_length + 1
+                cu_seqlens[-1] = len(pack_tokens) - 1
+                break
+
+        # Handle any necessary padding
+        if len(pack_tokens) < pack_length + 1:  # +1 here to account for later alignment
+            pad_len = pack_length + 1 - len(pack_tokens)
+            extend_with_padding(pack_tokens, pack_targets, pack_positions, pad_len)
+            # Note len({pack_tokens, pack_targets, pack_positions}) should be pack_length + 1
+            cu_seqlens[-1] = len(pack_tokens) - 1
+
+        assert len(pack_tokens) == pack_length + 1
+        assert len(pack_targets) == pack_length + 1
+        assert len(pack_positions) == pack_length + 1
+
+        # Align and convert to tensors
+        input_ids    = torch.tensor(pack_tokens[:-1],  dtype=torch.int64)
+        labels       = torch.tensor(pack_targets[1:], dtype=torch.int64)
+        position_ids = torch.tensor(pack_positions[:-1], dtype=torch.int64)
 
         # Loss mask.
-        loss_mask = torch.ones(max_seq_len, dtype=torch.float)
-        loss_mask[target == pad_token] = 0.0  # mask paddings
-        loss_mask[target == IGNORE_INDEX] = 0.0  # mask prompts
-
-        if self.config.create_attention_mask:
-            attention_mask = torch.tril(
-                torch.ones((seq_length, seq_length), device=data.device)
-            ).unsqueeze(0)
-            # Convert attention mask to binary:
-            attention_mask = attention_mask < 0.5
-        else:
-            attention_mask = None
-
-        return loss_mask, position_ids, attention_mask
+        loss_mask = torch.ones(pack_length, dtype=torch.float32)
+        loss_mask[labels == pad] = 0.0  # Mask paddings
+        loss_mask[labels == IGNORE_INDEX] = 0.0  # mask prompts
+
+        # TODO(duncan): Optionally create an attention mask
+        assert not self.config.create_attention_mask and not self.config.reset_attention_mask
+        # attention_mask = None
+
+        assert len(cu_seqlens) >= 2
+        cu_seqlens = torch.tensor(cu_seqlens, dtype=torch.int32)
+        # Calculating max_seqlen here, rather than incrementally above, because of possible
+        # effects of truncation and padding
+        adjacent_diffs = cu_seqlens[1:] - cu_seqlens[:-1]
+        max_seqlen = adjacent_diffs.max()  # max_seqlen is a 0-D tensor
+
+        return {
+            'tokens': input_ids,
+            'labels': labels,
+            # 'attention_mask': attention_mask,  # PyTorch collate cannot handle NoneType
+            'loss_mask': loss_mask,
+            'position_ids': position_ids,
+            'cu_seqlens': cu_seqlens,
+            'max_seqlen': max_seqlen,
+        }
diff --git a/megatron/training/ft_integration.py b/megatron/training/ft_integration.py
index e10e3da995c..670cf492602 100644
--- a/megatron/training/ft_integration.py
+++ b/megatron/training/ft_integration.py
@@ -45,6 +45,7 @@
 
 import torch
 
+from . import arguments
 from . import global_vars
 from .utils import is_rank0, print_rank_0
 
@@ -72,25 +73,22 @@ def get_rank_monitor_client() -> Optional[Any]:
     return _GLOBAL_RANK_MONITOR_CLIENT
 
 
-def setup(args: argparse.Namespace) -> None:
-    """Initialize fault tolerance
-
-    Args:
-        args (argparse.Namespace): parsed Megatron-LM command line arguments
+def setup() -> None:
+    """Initialize fault tolerance before initialize_megatron"""
+    args = arguments.parse_args(ignore_unknown_args=True)
+    if not args.enable_ft_package:
+        return
 
-    Raises:
-        ValueError: if invalid config is provided
-    """
+    # Initialize fault tolerance
     from nvidia_resiliency_ext.fault_tolerance import RankMonitorClient
 
-    print_rank_0(f"FT: initializing...")
+    if os.environ.get("RANK") == "0":
+        print("FT: initializing...", flush=True)
 
     checkpoint_dir = args.save
     if not checkpoint_dir:
         raise ValueError("checkpointing save dir must be set to enable fault tolerance")
-    if is_rank0() and not os.path.exists(checkpoint_dir):
-        # MLM checkpoint dir will be needed for saving FT state.
-        # it can happen before the checkpointing, so create it in advance
+    if not os.path.exists(checkpoint_dir):
         os.makedirs(checkpoint_dir, exist_ok=True)
 
     cli = RankMonitorClient()
@@ -109,7 +107,8 @@ def setup(args: argparse.Namespace) -> None:
 
     cli.init_workload_monitoring()
     _load_state_if_exists()
-    print_rank_0(f"FT: initialized. Timeouts={cli.section_timeouts}")
+    if os.environ.get("RANK") == "0":
+        print(f"FT: initialized. Timeouts={cli.section_timeouts}", flush=True)
 
     cli.start_section("setup")
     global _is_setup_section_open
diff --git a/megatron/training/initialize.py b/megatron/training/initialize.py
index 1a119b127e4..00fa9ad5088 100644
--- a/megatron/training/initialize.py
+++ b/megatron/training/initialize.py
@@ -22,6 +22,7 @@
     RerunMode,
     initialize_rerun_state_machine,
 )
+from megatron.core.transformer.custom_layers.batch_invariant_kernels import enable_batch_invariant_mode
 from megatron.core.utils import get_te_version, is_te_min_version, is_torch_min_version
 from megatron.legacy import fused_kernels
 from megatron.training import get_adlr_autoresume, get_args, get_tensorboard_writer
@@ -114,6 +115,11 @@ def state_restore_func(state_dict):
         ),
         result_rejected_tracker_filename=args.result_rejected_tracker_filename,
     )
+    
+    if args.batch_invariant_mode:
+        if args.rank == 0:
+            print("Enabling batch invariant mode globally", flush=True)
+        enable_batch_invariant_mode()
 
     # torch.distributed initialization
     def finish_mpu_init():
diff --git a/megatron/training/resilience_config.py b/megatron/training/resilience_config.py
new file mode 100644
index 00000000000..13929c25660
--- /dev/null
+++ b/megatron/training/resilience_config.py
@@ -0,0 +1,24 @@
+# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+from dataclasses import dataclass
+from typing import Literal
+
+@dataclass(kw_only=True)
+class RerunStateMachineConfig:
+    """Configuration for the rerun state machine used for result validation or stats."""
+
+    error_injection_rate: int = 0
+    """Rate at which to inject unexpected results, e.g. 1000 means
+    once every 1000 result validations"""
+
+    error_injection_type: Literal["correct_result", "transient_error", "persistent_error"] = "transient_error"
+    """Type of error to inject. """
+
+    rerun_mode: Literal["disabled", "validate_results", "report_stats"] = "validate_results"
+    """Use re-run engine to validate results (default) or to emit stats
+    on variability of computations due to non-deterministic algorithms."""
+
+    check_for_nan_in_loss: bool = True
+    """Check for NaN in the loss."""
+
+    check_for_spiky_loss: bool = False
+    """Check for spiky loss."""
diff --git a/megatron/training/tokenizer/sft_tokenizer.py b/megatron/training/tokenizer/sft_tokenizer.py
index f525352e892..274c6f6c944 100644
--- a/megatron/training/tokenizer/sft_tokenizer.py
+++ b/megatron/training/tokenizer/sft_tokenizer.py
@@ -160,11 +160,6 @@ def get_special_tokens(self):
         """Get special tokens."""
         return self._tokenizer.get_added_vocab()
 
-    @property
-    def force_eod(self):
-        """To force an EOD at the end of every data sample in SFT."""
-        return self._prompt_format == "nemotron-h-aligned"
-
     @property
     def pad(self):
         """Pad token ID."""
diff --git a/megatron/training/training.py b/megatron/training/training.py
index 13ad0025e43..ab4679b5e30 100644
--- a/megatron/training/training.py
+++ b/megatron/training/training.py
@@ -1,6 +1,35 @@
 # Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 """Pretrain utilities."""
+import time
+# The earliest we can measure the start time.
+_TRAIN_START_TIME = time.time()
+
+# Startup timestamps for tracking program initialization phases
+_STARTUP_TIMESTAMPS = {
+    'program_start': None,  # Set by entry script before imports
+    'main_entry': None,     # Set by entry script at start of __main__
+    'pretrain_entry': None, # Set at top of pretrain()
+}
+
+
+def set_startup_timestamps(program_start=None, main_entry=None):
+    """Set startup timestamps from the entry script.
+
+    Call this after imports but before calling pretrain() to register
+    the program start time and main entry time.
+
+    Args:
+        program_start: Timestamp captured at very start of program, before any imports.
+        main_entry: Timestamp captured right after entering __main__ block.
+    """
+    global _TRAIN_START_TIME, _STARTUP_TIMESTAMPS
+    if program_start is not None:
+        _TRAIN_START_TIME = program_start
+        _STARTUP_TIMESTAMPS['program_start'] = program_start
+    if main_entry is not None:
+        _STARTUP_TIMESTAMPS['main_entry'] = main_entry
+
 
 import copy
 import dataclasses
@@ -12,6 +41,7 @@
 import math
 import os
 import sys
+from contextlib import nullcontext
 from typing import Any, Optional, Dict
 
 import torch.distributed
@@ -22,10 +52,9 @@
 # Make default logging level INFO, but filter out all log messages not from MCore.
 logging.basicConfig(handlers=[CustomHandler()], level=logging.INFO)
 from .theoretical_memory_usage import report_theoretical_memory
-import time
 
-# The earliest we can measure the start time.
-_TRAIN_START_TIME = time.time()
+_LEGACY_TRAIN_START_TIME = time.time() # NOTE(asolergi-nv): Legacy timestamp
+
 import torch
 
 try:
@@ -33,6 +62,7 @@
     has_rl_utils = True
 except ImportError:
     has_rl_utils = False
+from megatron.rl.parallel_utils import build_inference_pg_collection
 try:
     from modelopt.torch.distill.plugins.megatron import (
         get_tensor_shapes_adjust_fn_for_distillation,
@@ -72,6 +102,7 @@
 from megatron.training.checkpointing import load_checkpoint
 from megatron.training.checkpointing import save_checkpoint
 from megatron.training.checkpointing import checkpoint_exists
+from megatron.training.checkpointing import get_loaded_iteration
 from megatron.core.full_cuda_graph import FullCudaGraphWrapper
 from megatron.core.transformer.cuda_graphs import TECudaGraphHelper
 from megatron.core.transformer.enums import CudaGraphScope
@@ -80,7 +111,6 @@
 from megatron.core.distributed import DistributedDataParallel as DDP
 from megatron.core.distributed.fsdp.mcore_fsdp_adapter import FullyShardedDataParallel as megatron_FSDP
 from megatron.core.optimizer.optimizer import param_group_identifier_keys
-from megatron.core.transformer.custom_layers.batch_invariant_kernels import enable_batch_invariant_mode
 
 from megatron.core.optimizer.qk_clip import clip_qk
 
@@ -118,6 +148,8 @@
     destroy_model_parallel,
     update_pg_timeout
 )
+from megatron.core.inference.unified_memory import create_unified_mempool
+from megatron.core.resharding.refit import swap_model_weights
 
 from megatron.core.pipeline_parallel import get_forward_backward_func
 from megatron.core.num_microbatches_calculator import (
@@ -172,13 +204,16 @@ def destroy_global_state():
     destroy_rerun_state_machine()
 
 
-def print_datetime(string):
-    """Note that this call will sync across all ranks."""
+def print_datetime(string, override_timestamp=None):
+    """Note that this call will sync across all ranks. Use override_timestamp if provided;
+       otherwise use current timestamp."""
     torch.distributed.barrier()
-    time_str = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
+    if override_timestamp is None:
+        time_str = datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')
+    else:
+        time_str = datetime.fromtimestamp(override_timestamp).strftime('%Y-%m-%d %H:%M:%S.%f')
     print_rank_0(f'[{string}] datetime: {time_str} ')
 
-
 def num_floating_point_operations(args, batch_size):
     def calculate_layer_counts():
         """Calculate the number of attention, Mamba, and MLP layers."""
@@ -711,11 +746,21 @@ def pretrain(
         inprocess_call_wrapper: an optional instance of inprocess.CallWrapper,
             it is automatically injected when in-process restart is in use
     """
+    # Capture timestamp right at top of pretrain, before initialize_megatron
+    global _STARTUP_TIMESTAMPS
+    _STARTUP_TIMESTAMPS['pretrain_entry'] = time.time()
 
     if inprocess_call_wrapper is not None:
         iteration = inprocess_call_wrapper.iteration
         store = torch.distributed.PrefixStore(str(iteration), store)
 
+    timestamp_after_inprocess_setup = time.time()
+
+    # Early fault tolerance setup - must be done before initialize_megatron
+    # to enable monitoring of the initialization process
+    ft_integration.setup()
+    timestamp_after_in_job_setup = time.time()
+
     # Initalize and get arguments, timers, and Tensorboard writer.
     initialize_megatron(
         extra_args_provider=extra_args_provider,
@@ -725,6 +770,8 @@ def pretrain(
         store=store,
     )
 
+    timestamp_after_initialize_megatron = time.time()
+
     args = get_args()
     timers = get_timers()
 
@@ -733,38 +780,83 @@ def pretrain(
             set_ideal_affinity_for_current_gpu
         )
         set_ideal_affinity_for_current_gpu()
-
     if args.batch_invariant_mode:
-        print_rank_0("Enabling batch invariant mode globally",flush=True)
+        print_rank_0("Enabling batch invariant mode globally", flush=True)
         enable_batch_invariant_mode()
 
     if args.log_progress:
         append_to_progress_log("Starting job")
 
-    # Initialize fault tolerance
-    # NOTE: ft_integration functions other than `setup` are no-op if the FT is not initialized
-    if args.enable_ft_package:
-        ft_integration.setup(args)
-        ft_integration.maybe_setup_simulated_fault()
-
     # Set pytorch JIT layer fusion options and warmup JIT functions.
     set_jit_fusion_options()
 
-    # Adjust the startup time so it reflects the largest value.
+    timestamp_after_set_jit_fusion_options = time.time()
+
+    # Adjust the startup time so it reflects the global minimum.
     # This will be closer to what scheduler will see (outside of
-    # image ... launches.
-    global _TRAIN_START_TIME
-    start_time_tensor = torch.tensor([_TRAIN_START_TIME], dtype=torch.double, device='cuda')
+    # image ... launches).
+    program_start = _STARTUP_TIMESTAMPS.get('program_start')
+    main_entry = _STARTUP_TIMESTAMPS.get('main_entry')
+    pretrain_entry = _STARTUP_TIMESTAMPS.get('pretrain_entry')
+
+    # Initialize program_start_global with a fallback value in case set_startup_timestamps() wasn't called
+    program_start_global = _TRAIN_START_TIME
+    if _STARTUP_TIMESTAMPS['program_start'] is not None:
+        program_start_global = torch.tensor([_STARTUP_TIMESTAMPS['program_start']], dtype=torch.double, device='cuda')
+        torch.distributed.all_reduce(program_start_global, op=torch.distributed.ReduceOp.MIN)
+        program_start_global = program_start_global.item()
+    set_startup_timestamps(program_start=program_start_global)
+
+    global _LEGACY_TRAIN_START_TIME
+    start_time_tensor = torch.tensor([_LEGACY_TRAIN_START_TIME], dtype=torch.double, device='cuda')
     torch.distributed.all_reduce(start_time_tensor, op=torch.distributed.ReduceOp.MIN)
-    _TRAIN_START_TIME = start_time_tensor.item()
+    _LEGACY_TRAIN_START_TIME = start_time_tensor.item()
+
+    # Capture megatron init end time (matches original time.time() placement)
+    megatron_init_end = time.time()
 
     app_metrics = {}
-    app_metrics['app_start_time'] = round(_TRAIN_START_TIME * 1000.0)
-    app_metrics['app_model_init_start_time'] = round(_TRAIN_START_TIME * 1000.0)
+    app_metrics['app_start_time'] = round(program_start_global * 1000.0)
+    app_metrics['app_model_init_start_time'] = round(program_start_global * 1000.0)
 
+    # Print basic megatron init time (using global min start)
+    # NOTE(asolergi-nv): This is not entirely accurate, but we keep it for backwards compatibility.
     print_rank_0(
-        'time to initialize megatron (seconds): {:.3f}'.format(time.time() - _TRAIN_START_TIME)
+        'time to initialize megatron (seconds): {:.3f}'.format(megatron_init_end - _LEGACY_TRAIN_START_TIME)
     )
+
+    # Note, not entirely accurate as rank 0 might not be the first or last to hit these timestamps
+    print_datetime('after in-process setup and before initialize_megatron', timestamp_after_inprocess_setup)
+    print_datetime('after in-job setup and before initialize_megatron', timestamp_after_in_job_setup)
+
+    if program_start is not None and main_entry is not None and pretrain_entry is not None:
+        # Inject startup deltas into timers
+        startup_timers = {
+            'startup-program-entry-spread': program_start - program_start_global, # Local program start timestamp vs the global earliest program start timestamp
+            'startup-library-setup': main_entry - program_start, # Local library imports
+            'startup-program-setup': pretrain_entry - main_entry, # Local __main__ entry to pretrain entry
+            'startup-in-process-setup': timestamp_after_inprocess_setup - pretrain_entry, # Local in-process setup
+            'startup-in-job-setup': timestamp_after_in_job_setup - timestamp_after_inprocess_setup, # Local in-job setup
+            'startup-initialize-megatron': timestamp_after_initialize_megatron - timestamp_after_in_job_setup, # Local initialize megatron
+            'startup-set-jit-fusion-options': timestamp_after_set_jit_fusion_options - timestamp_after_initialize_megatron, # Local set JIT fusion options
+            'all-reduce-start-timestamps-tensor': megatron_init_end - timestamp_after_set_jit_fusion_options, # 2x All-reduce, first collective call
+            'startup-megatron-init-local': megatron_init_end - pretrain_entry, # Local megatron init
+            'startup-megatron-init-global': megatron_init_end - program_start_global, # Local megatron init vs the global earliest program start timestamp
+        }
+        for name, delta in startup_timers.items():
+            timers(name, log_level=0).set_elapsed(delta)
+        timers.log(list(startup_timers.keys()), barrier=True)
+
+        # Print rank 0's absolute timestamps
+        startup_timestamps = {
+            'before library-setup': program_start,
+            'after library-setup': main_entry,
+            'before megatron-init': pretrain_entry,
+        }
+        for name, ts in startup_timestamps.items():
+            ts_str = datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S.%f')
+            print_rank_0(f'[{name}] datetime: {ts_str}')
+
     print_datetime('after megatron is initialized')
     app_metrics['app_model_init_finish_time'] = one_logger_utils.get_timestamp_in_ms()
 
@@ -815,6 +907,70 @@ def pretrain(
     print_datetime('after model, optimizer, and learning rate ' 'scheduler are built')
     config = get_model_config(model[0])
 
+    # Build a separate inference model for RL if requested.
+    inference_model = None
+    if args.perform_rl_step:
+        if (
+            args.rl_inference_tensor_model_parallel_size is not None
+            or args.rl_inference_pipeline_model_parallel_size is not None
+            or args.rl_inference_expert_model_parallel_size is not None
+            or args.rl_inference_expert_tensor_model_parallel_size is not None
+        ):
+            print_rank_0(
+                "Building separate RL inference model with custom parallelism: "
+                f"TP={args.rl_inference_tensor_model_parallel_size}, "
+                f"PP={args.rl_inference_pipeline_model_parallel_size}, "
+                f"EP={args.rl_inference_expert_model_parallel_size}, "
+                f"ExptTP={args.rl_inference_expert_tensor_model_parallel_size}"
+            )
+            inference_pg_collection = build_inference_pg_collection(
+                args.world_size,
+                tp_size=args.rl_inference_tensor_model_parallel_size,
+                pp_size=args.rl_inference_pipeline_model_parallel_size,
+                ep_size=args.rl_inference_expert_model_parallel_size,
+                expt_tp_size=args.rl_inference_expert_tensor_model_parallel_size,
+                use_tp_pp_dp_mapping=args.use_tp_pp_dp_mapping,
+            )
+
+            # Build an isolated inference config so training config remains unchanged
+            inference_config = copy.deepcopy(config)
+            if args.rl_inference_tensor_model_parallel_size is not None:
+                inference_config.tensor_model_parallel_size = args.rl_inference_tensor_model_parallel_size
+            if args.rl_inference_pipeline_model_parallel_size is not None:
+                inference_config.pipeline_model_parallel_size = (
+                    args.rl_inference_pipeline_model_parallel_size
+                )
+            if args.rl_inference_expert_model_parallel_size is not None:
+                inference_config.expert_model_parallel_size = (
+                    args.rl_inference_expert_model_parallel_size
+                )
+            if args.rl_inference_expert_tensor_model_parallel_size is not None:
+                inference_config.expert_tensor_parallel_size = (
+                    args.rl_inference_expert_tensor_model_parallel_size
+                )
+
+            # Optionally allocate the RL inference model weights from a unified virtual memory (UVM)
+            # mempool so we can prefetch weights to CPU when idle while keeping CUDA-graph-safe pointers.
+            uvm_mempool = None
+            uvm_level = args.rl_inference_model_unified_memory_level
+            if uvm_level and uvm_level > 0:
+                uvm_mempool = create_unified_mempool()
+
+            mempool_ctx = (
+                torch.cuda.use_mem_pool(uvm_mempool) if uvm_mempool is not None else nullcontext()
+            )
+            with mempool_ctx:
+                inference_model = get_model(
+                    model_provider,
+                    model_type,
+                    wrap_with_ddp=False,
+                    pg_collection=inference_pg_collection,
+                    config=inference_config,
+                )
+            inference_model[0].eval()
+
+
+
     # Data stuff.
     app_metrics['app_build_dataiters_start_time'] = one_logger_utils.get_timestamp_in_ms()
     timers('train/valid/test-data-iterators-setup', log_level=0).start(barrier=True)
@@ -889,6 +1045,7 @@ def pretrain(
                 config,
                 checkpointing_context,
                 non_loss_data_func,
+                inference_model,
             )
 
         print_datetime('after training is done')
@@ -917,8 +1074,18 @@ def pretrain(
     if args.do_valid:
         prefix = f'iteration {iteration} on validation set'
         if getattr(args, 'perform_rl_step', False):
+            rl_eval_model = model
+            if inference_model is not None:
+                inf_core = unwrap_model(inference_model[0])
+                # If separate inference and training models, swap training weights
+                # back to the inference model for RL evaluation.
+                rl_utils._maybe_prefetch_separate_inference_model_weights(inf_core, to_cpu=False)
+                swap_model_weights(model, inference_model, args.refit_method)
+                rl_eval_model = inference_model
             rl_utils.evaluate_and_print_results_rl(
-                valid_data_iterator, model, optimizer,
+                valid_data_iterator,
+                rl_eval_model,
+                optimizer,
                 iteration, write_to_tensorboard=not args.skip_train
             )
         else:
@@ -1015,7 +1182,6 @@ def get_model(model_provider_func, model_type=ModelType.encoder_or_decoder, wrap
             # For distillation ckpts without ModelOpt state
             args.modelopt_enabled = True
 
-
     # Build model.
     def build_model():
         if (
@@ -1054,6 +1220,7 @@ def build_model():
             model.model_type = model_type
         return model
 
+
     if args.init_model_with_meta_device:
         with torch.device('meta'):
             model = build_model()
@@ -1103,12 +1270,8 @@ def build_model():
 
     # Materialize tensors on meta device (GPU allocation) if not using FSDP2 and not using Megatron FSDP.
     if args.init_model_with_meta_device and not args.use_torch_fsdp2 and not args.use_megatron_fsdp:
-        #for model_module in model:
         model = [to_empty_if_meta_device(model_module, device=torch.device("cuda")) for model_module in model]
 
-
-
-
     # Before TE2.x: The model_module.bfloat16()/model_module.half() above will call the inplace
     #               copy of TE's Float8Tensor, which will write an unwanted value (amax calculated
     #               from the current fp8 param) to its amax_history. The below function will correct
@@ -1163,8 +1326,13 @@ def build_model():
             # Set bucket_size to infinity if overlap_grad_reduce is False.
             if not ddp_config.overlap_grad_reduce:
                 ddp_config.bucket_size = None
-
-        with torch.cuda.stream(torch.cuda.Stream()):
+        # Setup stream for ddp initialization. The side-stream may be necessary for cuda graph
+        #  capture support with DDP, but we sync it with the current stream to avoid races.
+        ddp_stream = torch.cuda.Stream()
+        # Wait for the default stream to complete before starting ddp_stream
+        ddp_stream.wait_stream(torch.cuda.current_stream())
+        # Make ddp_stream start after whatever the default stream already queued
+        with torch.cuda.stream(ddp_stream):
             model = [
                 DP(
                     config=config,
@@ -1177,6 +1345,9 @@ def build_model():
                 )
                 for (model_chunk_idx, model_chunk) in enumerate(model)
             ]
+        # End of setup_stream
+        # Critical: ensure side-stream work completes before touching params on default stream
+        torch.cuda.current_stream().wait_stream(ddp_stream)
 
         # Broadcast params from data parallel src rank to other data parallel ranks.
         if args.data_parallel_random_init:
@@ -1310,8 +1481,8 @@ def setup_model_and_optimizer(
                 use_gloo_process_groups=args.enable_gloo_process_groups,
                 layer_wise_distributed_optimizer='dist' in config.optimizer,
             )
-
         opt_param_scheduler = get_optimizer_param_scheduler(optimizer)
+
     one_logger and one_logger.log_metrics({"app_build_optimzer_finish_time": one_logger_utils.get_timestamp_in_ms()})
 
     if args.moe_use_upcycling:
@@ -1524,7 +1695,7 @@ def train_step(forward_step_func, data_iterator, model, optimizer, opt_param_sch
     log_max_attention_logit = 0
     if args.qk_clip or args.log_max_attention_logit:
         log_max_attention_logit = clip_qk(model, log_max_only=not args.qk_clip)
-            
+
     timers('optimizer').stop()
 
     # when freezing sub-models we may have a mixture of successful and unsucessful ranks,
@@ -1600,6 +1771,7 @@ def training_log(
     num_zeros_in_grad,
     max_attention_logit,
     pg_collection=None,
+    is_first_iteration=False,
 ):
     """Log training information such as losses, timing, ...."""
     args = get_args()
@@ -1609,6 +1781,9 @@ def training_log(
     one_logger = get_one_logger()
     energy_monitor = get_energy_monitor()
 
+    # On first iteration, log stats but don't reset accumulators so normal interval stats remain accurate.
+    should_reset = not is_first_iteration
+
     # Advanced, skipped, and Nan iterations.
     advanced_iters_key = 'advanced iterations'
     skipped_iters_key = 'skipped iterations'
@@ -1764,6 +1939,8 @@ def training_log(
             writer.add_scalar('max_attention_logit', max_attention_logit, iteration)
             if wandb_writer:
                 wandb_writer.log({'max_attention_logit': max_attention_logit}, iteration)
+
+    # Log MoE metrics.
     if args.num_experts is not None:
         moe_loss_scale = 1 / get_num_microbatches()
         track_names = []
@@ -1795,12 +1972,14 @@ def training_log(
             mtp_num_layers=args.mtp_num_layers,
             pg_collection=pg_collection,
         )
+
+    # Log MTP metrics.
     if args.mtp_num_layers is not None:
         mtp_loss_scale = 1 / get_num_microbatches()
         MTPLossLoggingHelper.track_mtp_metrics(
             mtp_loss_scale, iteration, writer, wandb_writer, total_loss_dict
         )
-    # Track sparse attention indexer loss
+    # Track sparse attention indexer loss.
     if args.dsa_indexer_loss_coeff is not None and args.dsa_indexer_loss_coeff > 0:
         indexer_loss_scale = 1 / get_num_microbatches()
         DSAIndexerLossLoggingHelper.track_indexer_metrics(
@@ -1810,7 +1989,8 @@ def training_log(
             wandb_writer=wandb_writer,
             total_loss_dict=total_loss_dict,
         )
-    if iteration % args.log_interval == 0:
+    # Dump memory snapshot and print metrics to stdout.
+    if iteration % args.log_interval == 0 or is_first_iteration:
         if args.record_memory_history and (is_last_rank() or torch.distributed.get_backend() == 'fake'):
             snapshot = torch.cuda.memory._snapshot()
             from pickle import dump
@@ -1818,7 +1998,7 @@ def training_log(
             with open(args.memory_snapshot_path, 'wb') as f:
                 dump(snapshot, f)
 
-        elapsed_time = timers('interval-time').elapsed(barrier=True)
+        elapsed_time = timers('interval-time').elapsed(barrier=True, reset=should_reset)
         elapsed_time_per_iteration = elapsed_time / total_iterations
 
         throughput = num_floating_point_operations(args, batch_size) / (
@@ -1827,7 +2007,10 @@ def training_log(
 
         one_logger_utils.track_e2e_metrics(args.log_throughput, throughput)
 
-        if args.log_timers_to_tensorboard:
+        # We log to stdout after the first iteration (controlled by `is_first_iteration`)
+        # to document initialization overhead. Log statistics to TensorBoard and
+        # WandB according to the regular schedule.
+        if args.log_timers_to_tensorboard and not is_first_iteration:
             if writer:
                 writer.add_scalar('iteration-time', elapsed_time_per_iteration, iteration)
             if wandb_writer:
@@ -1870,7 +2053,8 @@ def training_log(
                 )
                 if avg > 0.0:
                     log_string += ' {}: {:.6E} |'.format(key, avg)
-                total_loss_dict[key] = torch.tensor([0.0], dtype=torch.float, device='cuda')
+                if should_reset:
+                    total_loss_dict[key] = torch.tensor([0.0], dtype=torch.float, device='cuda')
         log_string += f' loss scale: {loss_scale:.1f} |'
         if grad_norm is not None:
             log_string += f' grad norm: {grad_norm:.3f} |'
@@ -1882,25 +2066,32 @@ def training_log(
             total_loss_dict[skipped_iters_key]
         )
         log_string += ' number of nan iterations: {:3d} |'.format(total_loss_dict[nan_iters_key])
-        total_loss_dict[advanced_iters_key] = 0
-        total_loss_dict[skipped_iters_key] = 0
-        total_loss_dict[nan_iters_key] = 0
+        if should_reset:
+            total_loss_dict[advanced_iters_key] = 0
+            total_loss_dict[skipped_iters_key] = 0
+            total_loss_dict[nan_iters_key] = 0
         print_rank_last(log_string)
+        reported_memory_in_this_iteration = False
         if report_memory_flag:
             # Report memory after optimizer state has been initialized.
             if torch.distributed.get_rank() == 0:
                 num_microbatches = get_num_microbatches()
                 report_theoretical_memory(args, num_microbatches=num_microbatches, verbose=True)
             report_memory(f'(after {iteration} iterations)')
-            if iteration > 1:
+            reported_memory_in_this_iteration = True
+            loaded_iteration = max(get_loaded_iteration() or 0, 0)
+            if iteration > (loaded_iteration + 1):
                 # Make sure the memory after the second iteration is reported to include optimizer state memory.
                 report_memory_flag = False
-        # Write timers to wandb, don't reset the counts
+        if args.log_memory_interval is not None and iteration % args.log_memory_interval == 0 and \
+            not reported_memory_in_this_iteration:
+            report_memory(f'(after {iteration} iterations)')
+        # Write timers to wandb, don't reset the counts.
         if args.log_timers_to_tensorboard:
             timers.write(timers_to_log, writer, iteration, normalizer=args.log_interval, reset=False)
             timers.write(timers_to_log, wandb_writer, iteration, normalizer=args.log_interval, reset=False)
         # Log timers to stdout
-        timers.log(timers_to_log, normalizer=args.log_interval)
+        timers.log(timers_to_log, normalizer=args.log_interval, reset=should_reset)
 
     return report_memory_flag
 
@@ -1955,6 +2146,9 @@ def force_param_sync(model_chunks: list[DDP]) -> None:
         assert isinstance(model_chunk, DDP)
         model_chunk.start_param_sync(force_sync=True)
 
+# Only report memory for first 3 checkpoint saves.
+num_checkpoints_memory_reported = 0
+MAX_NUM_CHECKPOINTS_MEMORY_REPORTED = 3
 
 def save_checkpoint_and_time(
     iteration,
@@ -1983,6 +2177,14 @@ def save_checkpoint_and_time(
     one_logger_utils.track_e2e_metrics()
     if should_disable_forward_pre_hook(args):
         force_param_sync(model)
+
+    global num_checkpoints_memory_reported, MAX_NUM_CHECKPOINTS_MEMORY_REPORTED
+    should_report_memory = num_checkpoints_memory_reported < MAX_NUM_CHECKPOINTS_MEMORY_REPORTED
+
+    if should_report_memory:
+        # Track memory before checkpoint save.
+        report_memory(f"(before save_checkpoint for iteration {iteration})")
+    # Save checkpoint.
     save_checkpoint(
         iteration,
         model,
@@ -1994,6 +2196,11 @@ def save_checkpoint_and_time(
         train_data_iterator=train_data_iterator,
         preprocess_common_state_dict_fn=preprocess_common_state_dict,
     )
+    if should_report_memory:
+        # Track memory after checkpoint save.
+        report_memory(f"(after save_checkpoint for iteration {iteration})")
+    num_checkpoints_memory_reported += 1
+
     if args.fp8:
         # Run garbage collection after checkpoint saving to free memory from
         # dequantized bf16 tensors that were temporarily created during fp8
@@ -2171,7 +2378,13 @@ def checkpoint_and_decide_exit(
             return True
 
     # Exit based on iterations.
-    if args.exit_interval and iteration % args.exit_interval == 0:
+    if (
+        args.exit_interval
+        and iteration % args.exit_interval == 0
+    ) or (
+        args.phase_transition_iterations
+        and iteration in args.phase_transition_iterations
+    ):
         if args.save and not saved_checkpoint:
             save_checkpoint_and_time(
                 iteration,
@@ -2200,6 +2413,7 @@ def train(
     config,
     checkpointing_context,
     non_loss_data_func,
+    inference_model=None,
 ):
     """Training function: run train_step desired number of times, run validation, checkpoint."""
     args = get_args()
@@ -2362,6 +2576,7 @@ def finalize_model_grads_with_state_reload(*fmg_args, **fmg_kwargs):
     pre_hook_enabled = False
     should_exit = False
     exit_code = 0
+    is_first_iteration = True
 
     if args.manual_gc:
         # Disable the default garbage collector and perform the collection manually.
@@ -2556,7 +2771,7 @@ def get_e2e_base_metrics():
         if getattr(args, 'perform_rl_step', False):
             with torch.no_grad():
                 train_data_iterator = rl_utils.setup_grpo_data_iterator(
-                    model, optimizer, iteration, ref_state_dict, buffered_rollouts
+                    model, inference_model, optimizer, iteration, ref_state_dict, buffered_rollouts
                 )
                 # Buffered rollouts are used as a state container for setups when
                 # we use previously-generated data for an update.
@@ -2688,7 +2903,9 @@ def get_e2e_base_metrics():
             num_zeros_in_grad,
             max_attention_logit,
             pg_collection=model_pg_collection,
+            is_first_iteration=is_first_iteration,
         )
+        is_first_iteration = False
 
         # Evaluation.
         if args.eval_interval and iteration % args.eval_interval == 0 and args.do_valid:
@@ -2704,8 +2921,23 @@ def get_e2e_base_metrics():
             prefix = f'iteration {iteration}'
             timers('eval-time', log_level=0).start(barrier=True)
             if getattr(args, 'perform_rl_step', False):
-                rl_utils.evaluate_and_print_results_rl(valid_data_iterator, model, optimizer,
-                                       iteration, write_to_tensorboard=True)
+                rl_eval_model = model
+                # If separate inference and training models, swap training weights
+                # back to the inference model for RL evaluation.
+                if inference_model is not None:
+                    inf_core = unwrap_model(inference_model[0])
+                    rl_utils._maybe_prefetch_separate_inference_model_weights(
+                        inf_core, to_cpu=False
+                    )
+                    swap_model_weights(model, inference_model, args.refit_method)
+                    rl_eval_model = inference_model
+                rl_utils.evaluate_and_print_results_rl(
+                    valid_data_iterator,
+                    rl_eval_model,
+                    optimizer,
+                    iteration,
+                    write_to_tensorboard=True,
+                )
             else:
                 evaluate_and_print_results(prefix, forward_step_func,
                                        valid_data_iterator, model,
@@ -2774,8 +3006,6 @@ def get_e2e_base_metrics():
     # a persistent async worker if persistent ckpt worker is enabled
     maybe_finalize_async_save(blocking=True, terminate=True)
     ft_integration.on_checkpointing_end(is_async_finalization=True)
-    if args.enable_ft_package and ft_integration.get_rank_monitor_client() is not None:
-        ft_integration.get_rank_monitor_client().shutdown_workload_monitoring()
 
     if args.log_energy:
         energy_monitor.lap()
@@ -3073,7 +3303,17 @@ def get_train_valid_test_num_samples():
         eval_samples = eval_iters * args.global_batch_size
     test_samples = args.eval_iters * args.global_batch_size
 
-    return (train_samples, eval_samples, test_samples)
+    # Get train_samples in current phase.
+    if args.phase_transition_iterations:
+        phase_transition_samples = [0] + [t * args.global_batch_size for t in args.phase_transition_iterations] + [args.train_samples]
+        current_sample = args.iteration * args.global_batch_size
+        last_transition_sample = max(s for s in phase_transition_samples if s <= current_sample)
+        next_transition_sample = min(s for s in phase_transition_samples if s > current_sample)
+        train_samples_in_current_phase = next_transition_sample - last_transition_sample
+    else:
+        train_samples_in_current_phase = train_samples
+
+    return (train_samples_in_current_phase, eval_samples, test_samples)
 
 
 def build_train_valid_test_datasets(build_train_valid_test_datasets_provider, train_valid_test_num_samples=None, vp_stage=None):
@@ -3103,6 +3343,7 @@ def build_train_valid_test_data_loaders(build_train_valid_test_datasets_provider
         assert (
             args.train_samples is None
         ), 'Only backward compatiblity support for iteration-based training'
+
         args.consumed_train_samples = args.iteration * args.global_batch_size
     if args.iteration > 0 and args.consumed_valid_samples == 0:
         if args.train_samples is None:
@@ -3110,6 +3351,13 @@ def build_train_valid_test_data_loaders(build_train_valid_test_datasets_provider
                 (args.iteration // args.eval_interval) * args.eval_iters * args.global_batch_size
             )
 
+    # Get consumed train samples in this phase.
+    if args.phase_transition_iterations:
+        last_transition = max(iteration for iteration in (0, *args.phase_transition_iterations) if iteration <= args.iteration)
+        consumed_train_samples_in_current_phase = (args.iteration - last_transition) * args.global_batch_size
+    else:
+        consumed_train_samples_in_current_phase = args.consumed_train_samples
+
     # Rely on distributed-aware core datasets, temporary
     is_distributed = getattr(build_train_valid_test_datasets_provider, "is_distributed", False)
 
@@ -3136,7 +3384,7 @@ def build_train_valid_test_data_loaders(build_train_valid_test_datasets_provider
             if args.skip_train:
                 train_dataloader = None
             else:
-                train_dataloader = build_pretraining_data_loader(train_ds, args.consumed_train_samples)
+                train_dataloader = build_pretraining_data_loader(train_ds, consumed_train_samples_in_current_phase)
             valid_dataloaders = []
             for valid_d in valid_ds:
                 if args.skip_train or args.full_validation:
diff --git a/megatron/training/config.py b/megatron/training/training_config.py
similarity index 57%
rename from megatron/training/config.py
rename to megatron/training/training_config.py
index d978083372d..d91972cf3c6 100644
--- a/megatron/training/config.py
+++ b/megatron/training/training_config.py
@@ -114,3 +114,82 @@ class ValidationConfig:
        separate loss for each dataset in the list. This argument requires that no weights are 
        included in the list.
     """
+
+
+@dataclass(kw_only=True)
+class SchedulerConfig:
+    """Configuration settings for the learning rate scheduler and weight decay."""
+
+    # ---------------- Learning rate config. ----------------
+    lr_decay_style: Literal["constant", "linear", "cosine", "inverse-square-root", "WSD"] = "linear"
+    """Learning rate decay function."""
+
+    lr_wsd_decay_style: Literal["exponential", "linear", "cosine", "minus_sqrt"] = "exponential"
+    """Decay style for the annealing phase of WSD"""
+
+    lr_decay_iters: int | None = None
+    """number of iterations to decay learning rate over, If None defaults to train iters"""
+
+    lr_decay_samples: int | None = None
+    """number of samples to decay learning rate over, If None defaults to train samples"""
+
+    lr_wsd_decay_iters: int | None = None
+    """number of iterations for the annealing phase in the wsd schedule"""
+
+    lr_wsd_decay_samples: int | None = None
+    """number of samples for the annealing phase in the wsd schedule"""
+
+    lr_warmup_fraction: float | None = None
+    """fraction of lr-warmup-(iters/samples) to use for warmup (as a float)"""
+
+    lr_warmup_iters: int = 0
+    """number of iterations to linearly warmup learning rate over."""
+
+    lr_warmup_samples: int = 0
+    """number of samples to linearly warmup learning rate over."""
+
+    lr_warmup_init: float = 0.0
+    """Initial value for learning rate warmup. The scheduler starts warmup from this value."""
+
+    lr_decay_steps: int | None = field(init=False, default=None)
+    """number of samples to decay learning rate over. Calculated at runtime from 
+    lr_decay_iters or lr_decay_samples.
+    """
+
+    lr_warmup_steps: int | None = field(init=False, default=None)
+    """number of samples to warmup learning rate over. Calculated at runtime from
+    lr_warmup_fraction, lr_warmup_iters, or lr_warmup_samples.
+    """
+    
+    override_opt_param_scheduler: bool = field(default=False, metadata={"argparse_meta": {"arg_names": ["--override-opt_param-scheduler", "--override-opt-param-scheduler"]}})
+    """Reset the values of the scheduler (learning rate, warmup iterations, minimum learning rate,
+    maximum number of iterations, and decay style) from input arguments and ignore values from
+    checkpoints. Note that all the above values will be reset."""
+
+    use_checkpoint_opt_param_scheduler: bool = field(default=False, metadata={"argparse_meta": {"arg_names": ["--use-checkpoint-opt_param-scheduler", "--use-checkpoint-opt-param-scheduler"]}})
+    """Use checkpoint to set the values of the scheduler (learning rate, warmup iterations,
+    minimum learning rate, maximum number of iterations, and decay style) from checkpoint
+    and ignore input arguments."""
+
+    # ---------------- Regularization config. ----------------
+
+    start_weight_decay: float | None = None
+    """Initial weight decay coefficient for L2 regularization."""
+
+    end_weight_decay: float | None = None
+    """End of run weight decay coefficient for L2 regularization."""
+
+    weight_decay_incr_style: Literal["constant", "linear", "cosine"] = "constant"
+    """Weight decay increment function."""
+
+    no_weight_decay_cond_type: Literal["qwen3_next"] | None = None
+    """Type of no weight decay condition. Choices:
+    None (default): param no weight decay if and only if it is 1D; or it is bias;
+    or it is embedding and embedding_init_method_std is not None.
+    "qwen3_next": In addition to the default rules, apply weight decay to qk layernorm as a special case."""
+
+    wd_incr_steps: int | None = field(init=False, default=None)
+    """Number of samples to increment weight decay over. Calculated at runtime."""
+
+    wsd_decay_steps: int | None = field(init=False, default=None)
+    """Number of samples to decay WSD weight decay. Calculated at runtime."""
diff --git a/megatron/training/utils.py b/megatron/training/utils.py
index 4730a525271..06e5e6b8b26 100644
--- a/megatron/training/utils.py
+++ b/megatron/training/utils.py
@@ -277,15 +277,16 @@ def logical_and_across_model_parallel_group(input: bool) -> bool:
 
 def report_memory(name):
     """Simple GPU memory report."""
+    args = get_args()
     mega_bytes = 1024.0 * 1024.0
     string = name + ' memory (MB)'
-    string += ' | allocated: {}'.format(torch.cuda.memory_allocated() / mega_bytes)
-    string += ' | max allocated: {}'.format(torch.cuda.max_memory_allocated() / mega_bytes)
-    string += ' | reserved: {}'.format(torch.cuda.memory_reserved() / mega_bytes)
-    string += ' | max reserved: {}'.format(torch.cuda.max_memory_reserved() / mega_bytes)
-    if is_torch_min_version("2.6.0"):
+    string += f" | allocated: {torch.cuda.memory_allocated() / mega_bytes:.2f}"
+    string += f" | max allocated: {torch.cuda.max_memory_allocated() / mega_bytes:.2f}"
+    string += f" | reserved: {torch.cuda.memory_reserved() / mega_bytes:.2f}"
+    string += f" | max reserved: {torch.cuda.max_memory_reserved() / mega_bytes:.2f}"
+    if args.log_device_memory_used and is_torch_min_version("2.6.0"):
         # device usage is not supported in torch < 2.6.0
-        string += ' | device usage: {}'.format(torch.cuda.device_memory_used() / mega_bytes)
+        string += f" | total device memory used: {torch.cuda.device_memory_used() / mega_bytes:.2f}"
     if mpu.get_data_parallel_rank() == 0:
         print("[Rank {}] {}".format(torch.distributed.get_rank(), string), flush=True)
 
@@ -602,6 +603,25 @@ def _broadcast_cu_seqlens(cu_seqlens):
             _broadcast(batch['loss_mask'])
             _broadcast(batch['attention_mask'])
 
+        def _broadcast_cu_seqlens(cu_seqlens):
+            dev = torch.cuda.current_device()
+
+            n = 0 if cu_seqlens is None else int(cu_seqlens.numel())
+            n_tensor = torch.tensor(n, dtype=torch.int64, device=dev)
+            _broadcast(n_tensor)
+
+            if n == 0:
+                buf = torch.empty(0, dtype=torch.int32, device=dev)
+            else:
+                assert isinstance(cu_seqlens, torch.Tensor)
+                assert cu_seqlens.dtype == torch.int32
+                assert cu_seqlens.shape[0] == 1, "micro-batch-size must be 1 for packing"
+                buf = cu_seqlens.to(device=dev, non_blocking=True).contiguous()
+            _broadcast(buf)
+
+        _broadcast_cu_seqlens(batch['cu_seqlens'])
+        _broadcast(batch['max_seqlen'])
+
     else:
         if args.hybrid_context_parallel:
             seq_len = torch.tensor(0, dtype=torch.int32, device=torch.cuda.current_device())
@@ -639,6 +659,15 @@ def _broadcast_cu_seqlens(cu_seqlens):
             dtype=torch.int64,
             device=torch.cuda.current_device(),
         )
+        cu_seqlens = None
+        if args.sft:
+            max_seqlen = torch.empty(
+                1,
+                dtype=torch.int32,
+                device=torch.cuda.current_device(),
+            )
+        else:
+            max_seqlen = None
 
         cu_seqlens = None
         max_seqlen = torch.empty(
@@ -695,10 +724,29 @@ def _broadcast_cu_seqlens():
             position_ids = None
             cu_seqlens = None
             max_seqlen = None
+
             _broadcast(labels)
             _broadcast(loss_mask)
             _broadcast(attention_mask)
 
+        def _broadcast_cu_seqlens():
+            dev = torch.cuda.current_device()
+
+            n = torch.empty((), dtype=torch.int64, device=dev)
+            _broadcast(n)
+            n = int(n.item())
+
+            if n == 0:
+                cu_seqlens = torch.empty(0, dtype=torch.int32, device=dev)
+            else:
+                cu_seqlens = torch.empty((args.micro_batch_size, n), dtype=torch.int32, device=dev)
+            _broadcast(cu_seqlens)
+
+            return cu_seqlens if n > 0 else None
+
+        cu_seqlens = _broadcast_cu_seqlens()
+        _broadcast(max_seqlen)
+
         batch = {
             'tokens': tokens,
             'labels': labels,
diff --git a/pretrain_gpt.py b/pretrain_gpt.py
index cfb5e1b5f1f..07ef0a20f0c 100644
--- a/pretrain_gpt.py
+++ b/pretrain_gpt.py
@@ -2,6 +2,11 @@
 
 """Pretrain and SFT GPT."""
 
+# Capture the true program start time BEFORE any heavy imports
+import time
+_PROGRAM_START_TIME = time.time()
+
+import json
 from functools import partial
 from typing import List, Optional, Tuple
 
@@ -17,9 +22,19 @@
 from megatron.core.utils import get_attr_wrapped_model, get_thd_batch_on_this_cp_rank, get_batch_on_this_hybrid_cp_rank, StragglerDetector
 from megatron.core.tokenizers.text.utils.build_tokenizer import build_tokenizer
 from megatron.core.transformer.multi_token_prediction import mtp_on_this_rank, get_mtp_ranks
+from megatron.training import (
+    get_args,
+    get_timers,
+    get_tokenizer,
+    inprocess_restart,
+    pretrain,
+    print_rank_0,
+    set_startup_timestamps,
+)
 from megatron.training.arguments import core_transformer_config_from_args
-from megatron.training import get_args, get_timers, get_tokenizer, inprocess_restart, pretrain, print_rank_0
 from megatron.training.datasets.sft_dataset import SFTDataset
+from megatron.core.transformer.multi_token_prediction import mtp_on_this_rank, get_mtp_ranks
+from megatron.training.arguments import core_transformer_config_from_args
 from megatron.training.datasets.fim_dataset import GPTFIMDataset, GPTFIMDatasetConfig
 from megatron.training.utils import (
     get_batch_on_this_cp_rank,
@@ -200,6 +215,11 @@ def core_gpt_dataset_config_from_args(args):
     blend_per_split: Optional[List[Optional[Tuple[List[str], Optional[List[float]]]]]]
     blend, blend_per_split = get_blend_and_blend_per_split(args)
 
+    sequences_per_dataset = None
+    if args.per_dataset_sequences_path is not None:
+        with open(args.per_dataset_sequences_path, "r") as f:
+            sequences_per_dataset = json.load(f)
+
     data_args = {
         "random_seed": args.seed,
         "sequence_length": args.seq_length,
@@ -219,6 +239,9 @@ def core_gpt_dataset_config_from_args(args):
         "object_storage_cache_path": args.object_storage_cache_path,
         "mid_level_dataset_surplus": args.mid_level_dataset_surplus,
         "allow_ambiguous_pad_tokens": args.allow_ambiguous_pad_tokens,
+        "fast_cache_load": args.dataloader_fast_cache_load,
+        "sequences_per_dataset": sequences_per_dataset,
+        "defer_npy_index_mmap": args.dataloader_defer_npy_index_mmap,
         "context_parallel_size": args.context_parallel_size,
         "data_parallel_size": args.data_parallel_size,
         "sequence_parallel_size": args.tensor_model_parallel_size*args.sequence_parallel,
@@ -297,6 +320,11 @@ def get_embedding_ranks(pp_ranks: List[int]):
 
 
 if __name__ == "__main__":
+    # Timestamp right after entering __main__ block (after all imports/library setup)
+    _MAIN_ENTRY_TIME = time.time()
+
+    # Register startup timestamps for timing report in pretrain()
+    set_startup_timestamps(program_start=_PROGRAM_START_TIME, main_entry=_MAIN_ENTRY_TIME)
 
     # Temporary for transition to core datasets
     train_valid_test_datasets_provider.is_distributed = True
diff --git a/pretrain_mamba.py b/pretrain_mamba.py
index ca2008620be..bd46dce212f 100644
--- a/pretrain_mamba.py
+++ b/pretrain_mamba.py
@@ -1,6 +1,11 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 """Pretrain and SFT Mamba."""
 
+# Capture the true program start time BEFORE any heavy imports
+import time
+_PROGRAM_START_TIME = time.time()
+
+import json
 from functools import partial
 from typing import List, Optional, Tuple
 
@@ -11,11 +16,24 @@
 from megatron.core.datasets.blended_megatron_dataset_builder import BlendedMegatronDatasetBuilder
 from megatron.core.datasets.gpt_dataset import GPTDataset, GPTDatasetConfig, MockGPTDataset
 from megatron.core.enums import ModelType
+from megatron.core.packed_seq_params import PackedSeqParams
+from megatron.core.parallel_state import (
+    get_context_parallel_rank,
+    get_context_parallel_world_size,
+)
 from megatron.core.models.mamba import MambaModel
 from megatron.core.rerun_state_machine import get_rerun_state_machine
 from megatron.core.tokenizers.text.utils.build_tokenizer import build_tokenizer
-from megatron.core.utils import StragglerDetector, get_attr_wrapped_model
-from megatron.training import get_args, get_timers, get_tokenizer, inprocess_restart, pretrain, print_rank_0
+from megatron.core.utils import get_attr_wrapped_model, is_te_min_version, StragglerDetector
+from megatron.training import (
+    get_args,
+    get_timers,
+    get_tokenizer,
+    inprocess_restart,
+    pretrain,
+    print_rank_0,
+    set_startup_timestamps,
+)
 from megatron.training.datasets.sft_dataset import SFTDataset
 from megatron.training.utils import (
     get_batch_on_this_cp_rank,
@@ -32,19 +50,39 @@
 except ImportError:
     has_nvidia_modelopt = False
 
+try:
+    # Register the TE CUDA kernels
+    import transformer_engine  # pylint: disable=unused-import
+
+    # Alias the PyTorch wrapper so we can call tex.* APIs
+    import transformer_engine_torch as tex
+except ImportError:
+    # TE isn’t installed or the torch wrapper is missing
+    tex = None
+
 stimer = StragglerDetector()
 
 
 def get_batch(data_iterator, vp_stage=None):
     """Generate a batch."""
 
-    # TODO: this is pretty hacky, find a better way
-    if not is_first_or_last_pipeline_stage(vp_stage):
-        return None, None, None, None, None
+    empty_batch = {
+        'tokens': None,
+        'labels': None,
+        'loss_mask': None,
+        'attention_mask': None,
+        'position_ids': None,
+        'cu_seqlens': None,
+        'max_seqlen': None,
+    }
+
+    # TODO(duncan): Is there a more efficient way to access is_packed_sequence here?
+    is_packed_sequence = get_args().sft  # SFT always uses packed sequence
+    if not is_first_or_last_pipeline_stage(vp_stage) and not is_packed_sequence:
+        return empty_batch.values()
 
-    # get batches based on the TP rank you are on
     batch = get_batch_on_this_tp_rank(data_iterator)
-    
+
     # Support for Packed Sequence (Unused in this script)
     cu_seqlens = batch.pop('cu_seqlens', None)
     cu_seqlens_padded = batch.pop('cu_seqlens_padded', None)
@@ -52,8 +90,51 @@ def get_batch(data_iterator, vp_stage=None):
     # Support for Hybrid Context Parallel (Unused in this script)
     local_cp_size = batch.pop('local_cp_size', None)
 
-    # slice batch along sequence dimension for context parallelism
-    batch = get_batch_on_this_cp_rank(batch)
+    if cu_seqlens is not None:
+        assert (
+            cu_seqlens.dim() == 2 and cu_seqlens.shape[0] == 1
+        ), "micro-batch-size must be 1 for packing"
+        cu_seqlens = cu_seqlens[0]
+        batch['cu_seqlens'] = cu_seqlens
+
+        max_seqlen = batch['max_seqlen']
+        assert max_seqlen.dim() == 1
+        # TODO(duncan): can this be kept as a 0-D tensor?
+        batch['max_seqlen'] = int(max_seqlen[0].item())
+
+    if mpu.is_pipeline_first_stage(ignore_virtual=(vp_stage is None), vp_stage=vp_stage):
+        total_tokens = batch['tokens'].size(1)
+    elif mpu.is_pipeline_last_stage(ignore_virtual=(vp_stage is None), vp_stage=vp_stage):
+        total_tokens = batch['labels'].size(1)
+    else:  # packed sequence
+        empty_batch['cu_seqlens'] = cu_seqlens
+        empty_batch['max_seqlen'] = max_seqlen
+        return empty_batch.values()
+
+    if cu_seqlens is None:
+        # slice batch along sequence dimension for context parallelism
+        batch = get_batch_on_this_cp_rank(batch)  # The implementation of this function is in MCore
+    else:  # Packed THD format
+        cp_size = get_context_parallel_world_size()
+        if cp_size > 1:  # slice batch along sequence dimension for context parallelism
+            assert tex is not None and is_te_min_version("1.10.0"), (
+                "Please update Transformer Engine to >= 1.10 to use "
+                "Context Parallel with THD format data"
+            )
+            cp_rank = get_context_parallel_rank()
+            index = tex.thd_get_partitioned_indices(
+                cu_seqlens,
+                total_tokens,
+                cp_size,
+                cp_rank,
+            )
+            for key, data in batch.items():
+                if key in {'attention_mask', 'cu_seqlens', 'max_seqlen'}:
+                    continue
+                if data is not None:
+                    # On first PP rank, labels and loss_mask can be None.
+                    # On last PP rank, tokens and position_ids can be None.
+                    batch[key] = data.index_select(1, index)
 
     return batch.values()
 
@@ -130,22 +211,57 @@ def forward_step(data_iterator, model: MambaModel):
 
     # Get the batch.
     timers('batch-generator', log_level=2).start()
+
     global stimer
+
     with stimer(bdata=True):
         vp_stage = get_attr_wrapped_model(model, "vp_stage")
-        tokens, labels, loss_mask, attention_mask, position_ids = get_batch(data_iterator, vp_stage)
+        (
+            tokens,
+            labels,
+            loss_mask,
+            attention_mask,
+            position_ids,
+            cu_seqlens,
+            max_seqlen,
+        ) = get_batch(data_iterator, vp_stage)
+
+    if cu_seqlens is None:
+        packed_seq_params = None
+    else:
+        # TODO(duncan): This class seems overly complex for what needs to be conveyed
+        packed_seq_params = PackedSeqParams(
+            qkv_format="thd",
+            cu_seqlens_q=cu_seqlens,
+            cu_seqlens_kv=cu_seqlens,
+            cu_seqlens_q_padded=None,
+            cu_seqlens_kv_padded=None,
+            max_seqlen_q=max_seqlen,
+            max_seqlen_kv=max_seqlen,
+        )
+
     timers('batch-generator').stop()
 
     with stimer:
-        output_tensor = model(tokens, position_ids, attention_mask,
-                              labels=labels)
+        output_tensor = model(
+            tokens,
+            position_ids,
+            attention_mask,
+            labels=labels,
+            packed_seq_params=packed_seq_params,
+        )
 
     # [ModelOpt]: model is needed to access ModelOpt distillation losses
     return output_tensor, partial(loss_func, loss_mask, model=model)
 
 
-def is_dataset_built_on_rank(vp_stage=None):
-    return is_first_or_last_pipeline_stage(vp_stage) and mpu.get_tensor_model_parallel_rank() == 0
+def is_dataset_built_on_rank(vp_stage=None, is_packed_sequence=False):
+    if mpu.get_tensor_model_parallel_rank() != 0:
+        return False
+    elif is_packed_sequence:
+        return True
+    else:
+        return is_first_or_last_pipeline_stage(vp_stage)
 
 
 def core_gpt_dataset_config_from_args(args):
@@ -159,6 +275,11 @@ def core_gpt_dataset_config_from_args(args):
     blend_per_split: Optional[List[Optional[Tuple[List[str], Optional[List[float]]]]]]
     blend, blend_per_split = get_blend_and_blend_per_split(args)
 
+    sequences_per_dataset = None
+    if args.per_dataset_sequences_path is not None:
+        with open(args.per_dataset_sequences_path, "r") as f:
+            sequences_per_dataset = json.load(f)
+
     return GPTDatasetConfig(
         random_seed=args.seed,
         sequence_length=args.seq_length,
@@ -176,6 +297,10 @@ def core_gpt_dataset_config_from_args(args):
         object_storage_cache_path=args.object_storage_cache_path,
         mid_level_dataset_surplus=args.mid_level_dataset_surplus,
         allow_ambiguous_pad_tokens=args.allow_ambiguous_pad_tokens,
+        fast_cache_load=args.dataloader_fast_cache_load,
+        sequences_per_dataset=sequences_per_dataset,
+        defer_npy_index_mmap=args.dataloader_defer_npy_index_mmap,
+        context_parallel_size=args.context_parallel_size,
     )
 
 
@@ -188,8 +313,10 @@ def train_valid_test_datasets_provider(train_val_test_num_samples, vp_stage=None
     args = get_args()
     config = core_gpt_dataset_config_from_args(args)
 
+    is_packed_sequence = False
     if args.sft:
         dataset_type = SFTDataset
+        is_packed_sequence = True  # SFT always uses packed sequence
     else:
         if args.mock_data:
             dataset_type = MockGPTDataset
@@ -201,7 +328,7 @@ def train_valid_test_datasets_provider(train_val_test_num_samples, vp_stage=None
     train_ds, valid_ds, test_ds = BlendedMegatronDatasetBuilder(
         dataset_type,
         train_val_test_num_samples,
-        partial(is_dataset_built_on_rank, vp_stage=vp_stage),
+        partial(is_dataset_built_on_rank, vp_stage=vp_stage, is_packed_sequence=is_packed_sequence),
         config
     ).build()
 
@@ -211,6 +338,11 @@ def train_valid_test_datasets_provider(train_val_test_num_samples, vp_stage=None
 
 
 if __name__ == "__main__":
+    # Timestamp right after entering __main__ block (after all imports/library setup)
+    _MAIN_ENTRY_TIME = time.time()
+
+    # Register startup timestamps for timing report in pretrain()
+    set_startup_timestamps(program_start=_PROGRAM_START_TIME, main_entry=_MAIN_ENTRY_TIME)
 
     # Temporary for transition to core datasets
     train_valid_test_datasets_provider.is_distributed = True
diff --git a/pyproject.toml b/pyproject.toml
index 22ee405cb4f..800c2d88900 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -81,13 +81,13 @@ dev = [
     "nv-grouped-gemm~=1.1",
     "megatron-energon[av_decode]~=6.0",
     "av",
-    "flashinfer-python",
+    "flashinfer-python~=0.5.0",
     "wget",
     "onnxscript",
     "flash-linear-attention~=0.3.2",
-    "emerging_optimizers",
     "fastapi~=0.50",                                          # Forcing a little bit more recent version of fastapi to be compatible with pydantic 2.0
     "datasets",
+    "emerging_optimizers",
 ]
 
 lts = [
@@ -102,11 +102,12 @@ lts = [
     "nv-grouped-gemm~=1.1",
     "megatron-energon[av_decode]~=6.0",
     "av",
-    "flashinfer-python",
+    "flashinfer-python~=0.5.0",
     "wget",
     "onnxscript",
     "fastapi~=0.50",                      # Forcing a little bit more recent version of fastapi to be compatible with pydantic 2.0
     "datasets",
+    "emerging_optimizers",
 ]
 
 [dependency-groups]
@@ -134,13 +135,13 @@ docs = [
     "nvidia-sphinx-theme", # Our NVIDIA theme
 ]
 build = [
-    "setuptools<80.0.0",
+    "setuptools<80.0.0,>=77.0.0",
     "packaging>=24.2",
     "hatchling",
     "pybind11",
     "Cython>=3.0.0",
     "torch",
-    "nvidia-mathdx",     # for TE
+    "nvidia-mathdx",              # for TE
 ]
 linting = [
     "ruff~=0.9.0",
diff --git a/tests/functional_tests/python_test_utils/test_grpo_training_loop.py b/tests/functional_tests/python_test_utils/test_grpo_training_loop.py
index 12e5da3fbad..1b6eedd4fdb 100644
--- a/tests/functional_tests/python_test_utils/test_grpo_training_loop.py
+++ b/tests/functional_tests/python_test_utils/test_grpo_training_loop.py
@@ -26,7 +26,13 @@ def test_grpo_training_loop(golden_values_path: str, test_values_path: str) -> N
         # Handle JSONL output, assume only one line in this case.
         output_current = json.loads(output_current)
 
-    assert set(output_groundtruth.keys()).issuperset(
+    # Allow current run to have extra metrics not in golden values
+    # (only compare metrics defined in golden values)
+    extra_in_current = set(output_current.keys()) - set(output_groundtruth.keys())
+    if extra_in_current:
+        logger.info(f"Ignoring extra metrics in current run: {extra_in_current}")
+
+    assert set(output_groundtruth.keys()).issubset(
         set(output_current.keys())
     ), f"Some IDs from groundtruth are missing in current: {output_groundtruth.keys()} vs {output_current.keys()}"
     if set(output_groundtruth.keys()) != set(output_current.keys()):
@@ -56,3 +62,33 @@ def test_grpo_training_loop(golden_values_path: str, test_values_path: str) -> N
         )
 
         output_groundtruth.pop('iteration-time')
+
+    if "lm-loss" in output_groundtruth.keys():
+
+        # Require exact matching of all lm-loss values.
+        golden_lm_loss_values = output_groundtruth["lm-loss"]['values']
+        current_lm_loss_values = output_current["lm-loss"]['values']
+
+        assert golden_lm_loss_values == current_lm_loss_values, (
+            f"LM loss values do not exactly match.\n"
+            f"Golden: {golden_lm_loss_values}\n"
+            f"Current: {current_lm_loss_values}\n"
+            f"Please update golden values in the functional tests if this is expected."
+        )
+
+        output_groundtruth.pop('lm-loss')
+
+    if "num-zeros" in output_groundtruth.keys():
+
+        # Require exact matching of all lm-loss values.
+        golden_num_zeros_values = output_groundtruth["num-zeros"]['values']
+        current_num_zeros_values = output_current["num-zeros"]['values']
+
+        assert golden_num_zeros_values == current_num_zeros_values, (
+            f"LM loss values do not exactly match.\n"
+            f"Golden: {golden_num_zeros_values}\n"
+            f"Current: {current_num_zeros_values}\n"
+            f"Please update golden values in the functional tests if this is expected."
+        )
+
+        output_groundtruth.pop('num-zeros')
diff --git a/tests/functional_tests/python_test_utils/test_inference_regular_pipeline.py b/tests/functional_tests/python_test_utils/test_inference_regular_pipeline.py
index ae57db10e55..346b464b79d 100644
--- a/tests/functional_tests/python_test_utils/test_inference_regular_pipeline.py
+++ b/tests/functional_tests/python_test_utils/test_inference_regular_pipeline.py
@@ -8,6 +8,32 @@
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 
+_NON_REQUEST_TOP_LEVEL_KEYS = {
+    # System-level metrics
+    "throughput",
+    # Peak memory metrics (added by inference scripts; optionally checked if present in golden values)
+    "mem-max-allocated-bytes",
+}
+
+
+def _median_as_float(value):
+    """Convert scalar or list metric to a single float (median).
+
+    For list metrics (e.g., per-request throughput), treat the first element as
+    warmup if length > 1, matching existing throughput behavior.
+    """
+    if isinstance(value, list):
+        assert len(value) > 0, "Metric list is empty."
+        values = [float(v) for v in value]
+        if len(values) > 1:
+            values = values[1:]
+        return float(median(values))
+    return float(value)
+
+
+def _bytes_to_gib(num_bytes: float) -> float:
+    return float(num_bytes) / (1024.0**3)
+
 
 def test_inference_pipeline(golden_values_path: str, test_values_path: str) -> None:
 
@@ -26,12 +52,17 @@ def test_inference_pipeline(golden_values_path: str, test_values_path: str) -> N
         # Handle JSONL output, assume only one line in this case.
         output_current = json.loads(output_current)
 
-    assert set(output_groundtruth.keys()).issuperset(
-        set(output_current.keys())
-    ), f"Some IDs from groundtruth are missing in current: {output_groundtruth.keys()} vs {output_current.keys()}"
-    if set(output_groundtruth.keys()) != set(output_current.keys()):
+    groundtruth_request_ids = set(output_groundtruth.keys()) - _NON_REQUEST_TOP_LEVEL_KEYS
+    current_request_ids = set(output_current.keys()) - _NON_REQUEST_TOP_LEVEL_KEYS
+
+    assert groundtruth_request_ids.issuperset(current_request_ids), (
+        "Some request IDs from groundtruth are missing in current or current has unexpected IDs: "
+        f"{sorted(groundtruth_request_ids)} vs {sorted(current_request_ids)}"
+    )
+    if groundtruth_request_ids != current_request_ids:
         logger.warning(
-            f"Some IDs from groundtruth are missing in output, only the subset of ids in groundtruth will be tested: {output_groundtruth.keys()} vs {output_current.keys()}"
+            "Some request IDs from groundtruth are missing in output; only the subset of ids in groundtruth will be tested: "
+            f"{sorted(groundtruth_request_ids)} vs {sorted(current_request_ids)}"
         )
     assert len(output_groundtruth) > 0, "No test performed for output"
 
@@ -54,6 +85,35 @@ def test_inference_pipeline(golden_values_path: str, test_values_path: str) -> N
 
         output_groundtruth.pop('throughput')
 
+    # Peak memory regression checks (optional: only if present in golden values).
+    if "mem-max-allocated-bytes" in output_groundtruth:
+        assert "mem-max-allocated-bytes" in output_current, (
+            f"Golden values include mem-max-allocated-bytes but current output does not. "
+            "Ensure the inference script records memory metrics to the output JSON."
+        )
+        sampled = _median_as_float(output_current["mem-max-allocated-bytes"])
+        golden = _median_as_float(output_groundtruth["mem-max-allocated-bytes"])
+        assert golden > 0, f"Golden mem_max_allocated_bytes must be > 0, got {golden}."
+
+        low = 0.95 * golden
+        high = 1.05 * golden
+
+        if sampled < low:
+            raise AssertionError(
+                f"Memory is too low for mem-max-allocated-bytes: "
+                f"expected within 5% of {golden:.0f} bytes ({_bytes_to_gib(golden):.3f} GiB) "
+                f"but got {sampled:.0f} bytes ({_bytes_to_gib(sampled):.3f} GiB). "
+                "This is >5% lower than expected; please update golden values in the functional tests."
+            )
+        if sampled > high:
+            raise AssertionError(
+                f"Memory is too high for mem-max-allocated-bytes: "
+                f"expected within ±5% of {golden:.0f} bytes ({_bytes_to_gib(golden):.3f} GiB) "
+                f"but got {sampled:.0f} bytes ({_bytes_to_gib(sampled):.3f} GiB). "
+                "This is >5% higher than expected; this is likely a regression."
+            )
+        output_groundtruth.pop("mem-max-allocated-bytes")
+
     for request_id, groundtruth_results in output_groundtruth.items():
         current_results = output_current[request_id]
 
diff --git a/tests/functional_tests/shell_test_utils/run_batch_ci_tests.sh b/tests/functional_tests/shell_test_utils/run_batch_ci_tests.sh
new file mode 100755
index 00000000000..9c99726555c
--- /dev/null
+++ b/tests/functional_tests/shell_test_utils/run_batch_ci_tests.sh
@@ -0,0 +1,255 @@
+#!/bin/bash
+#
+# Script to submit batch jobs to run test scripts across different compute nodes
+#
+# Usage:
+#   ./run_batch_ci_tests.sh <test_script> [num_jobs] [partition]
+#
+# Arguments:
+#   test_script  - Path to test script in test_cases/ (required)
+#   num_jobs     - Number of jobs to submit (default: 10)
+#   partition    - Slurm partition to use (default: interactive)
+#
+# Examples:
+#   ./run_batch_ci_tests.sh test_cases/moe/gpt_grpo_tp4tp2_pp1_ep4ep2_dp8_throughputtest.sh
+#   ./run_batch_ci_tests.sh test_cases/gpt/gpt3_mcore_te_tp2_pp2.sh 5
+#   ./run_batch_ci_tests.sh test_cases/bert/bert_mcore_tp2_pp2.sh 10 batch_block1
+#
+# To list available test scripts:
+#   ./run_batch_ci_tests.sh --list
+#   ./run_batch_ci_tests.sh --list moe      # List only moe tests
+#   ./run_batch_ci_tests.sh --list gpt      # List only gpt tests
+#
+
+set -e
+
+# Function to list available test scripts
+list_tests() {
+    local filter="${1:-}"
+    echo "Available test scripts in test_cases/:"
+    echo
+    if [ -n "$filter" ]; then
+        # List tests in specific subdirectory
+        if [ -d "test_cases/$filter" ]; then
+            find "test_cases/$filter" -name "*.sh" -type f | sort
+        else
+            echo "No test_cases/$filter directory found."
+            echo "Available subdirectories:"
+            ls -d test_cases/*/ 2>/dev/null | sed 's|test_cases/||g; s|/||g' | xargs -I {} echo "  {}"
+            exit 1
+        fi
+    else
+        # List all tests grouped by subdirectory
+        for dir in test_cases/*/; do
+            if [ -d "$dir" ]; then
+                subdir=$(basename "$dir")
+                echo "=== $subdir ==="
+                find "$dir" -name "*.sh" -type f | sort | sed 's|^|  |'
+                echo
+            fi
+        done
+    fi
+    exit 0
+}
+
+# Handle --list option
+if [ "${1:-}" = "--list" ]; then
+    list_tests "${2:-}"
+fi
+
+# Configuration (same as start_ci_interactive.sh)
+export DATASET_DIR=/lustre/fsw/portfolios/coreai/projects/coreai_dlalgo_mcore/mcore_ci
+export TGT_IMAGE=gitlab-master.nvidia.com/adlr/megatron-lm/mcore_ci_dev:main
+export ACCOUNT=llmservice_fm_text
+
+# The test script to run inside the container (first argument, required)
+TEST_SCRIPT="${1:-}"
+
+if [ -z "$TEST_SCRIPT" ]; then
+    echo "ERROR: Test script path is required"
+    echo
+    echo "Usage: $0 <test_script> [num_jobs] [partition]"
+    echo
+    echo "Run '$0 --list' to see available test scripts"
+    exit 1
+fi
+
+# Number of jobs to submit (second argument, default 10)
+NUM_JOBS=${2:-10}
+
+# Partition (third argument, default to same as interactive - change if needed)
+# Common batch partition names: batch, batch_block1, dgx_batch, etc.
+export PARTITION=${3:-interactive}
+
+# Verify test script exists
+if [ ! -f "$TEST_SCRIPT" ]; then
+    echo "ERROR: Test script not found: $TEST_SCRIPT"
+    echo "Make sure you run this from the megatron-rl directory"
+    echo
+    echo "Run '$0 --list' to see available test scripts"
+    exit 1
+fi
+
+# Extract test name from script path for job naming
+# e.g., "test_cases/moe/gpt_grpo_tp4tp2_pp1_ep4ep2_dp8_throughputtest.sh" -> "gpt_grpo_tp4tp2_pp1_ep4ep2_dp8_throughputtest"
+TEST_NAME=$(basename "$TEST_SCRIPT" .sh)
+
+# Output directory for logs (include test name for clarity)
+LOG_DIR="$(pwd)/batch_test_logs_${TEST_NAME}_$(date +%Y%m%d_%H%M%S)"
+mkdir -p "$LOG_DIR"
+
+# Container mounts
+CONTAINER_MOUNTS="$DATASET_DIR:/mnt/artifacts,$(pwd):/opt/megatron-lm"
+
+echo "============================================="
+echo "Batch CI Test Submission"
+echo "============================================="
+echo "Test Script:  $TEST_SCRIPT"
+echo "Test Name:    $TEST_NAME"
+echo "Partition:    $PARTITION"
+echo "Account:      $ACCOUNT"
+echo "Image:        $TGT_IMAGE"
+echo "Dataset Dir:  $DATASET_DIR"
+echo "Num Jobs:     $NUM_JOBS"
+echo "Log Dir:      $LOG_DIR"
+echo "============================================="
+echo
+
+# Submit jobs
+# Truncate test name if too long for job name (max ~64 chars typically)
+SHORT_TEST_NAME="${TEST_NAME:0:50}"
+
+for i in $(seq 1 $NUM_JOBS); do
+    JOB_NAME="${SHORT_TEST_NAME}_run_${i}"
+    
+    sbatch \
+        --job-name="$JOB_NAME" \
+        --partition="$PARTITION" \
+        --account="$ACCOUNT" \
+        --nodes=1 \
+        --gpus-per-task=8 \
+        --time=1:00:00 \
+        --exclusive \
+        --output="$LOG_DIR/${JOB_NAME}_%j.out" \
+        --error="$LOG_DIR/${JOB_NAME}_%j.err" \
+        --export=ALL \
+        --wrap="srun \
+            --container-image=$TGT_IMAGE \
+            --container-workdir=/opt/megatron-lm \
+            --container-mounts=$CONTAINER_MOUNTS \
+            --no-container-mount-home \
+            bash -c 'cd /opt/megatron-lm && time bash $TEST_SCRIPT'"
+    
+    echo "Submitted job $i: $JOB_NAME"
+done
+
+echo
+echo "============================================="
+echo "All $NUM_JOBS jobs submitted!"
+echo "Monitor with: squeue -u \$USER"
+echo "Logs will be written to: $LOG_DIR"
+echo "============================================="
+
+# Create a helper script to check results
+cat > "$LOG_DIR/check_results.sh" << 'CHECKEOF'
+#!/bin/bash
+# Check the results of all batch test runs
+
+LOG_DIR="$(dirname "$0")"
+echo "Checking results in: $LOG_DIR"
+echo
+
+total=0
+passed=0
+failed=0
+pending=0
+
+# Match any .out file that ends with _run_N_JOBID.out pattern
+for outfile in "$LOG_DIR"/*_run_*.out; do
+    if [ -f "$outfile" ]; then
+        total=$((total + 1))
+        jobname=$(basename "$outfile" .out)
+        
+        # Check if file is empty (job still running or not started)
+        if [ ! -s "$outfile" ]; then
+            echo "PENDING: $jobname (no output yet)"
+            pending=$((pending + 1))
+            continue
+        fi
+        
+        # Check for success: look for "This test wrote results into" which indicates completion
+        if grep -q "This test wrote results into" "$outfile" 2>/dev/null; then
+            # Check for errors/failures
+            if grep -Ei "FAILED|AssertionError|Exception:|Traceback" "$outfile" 2>/dev/null | grep -v "grep" > /dev/null; then
+                echo "FAILED:  $jobname"
+                failed=$((failed + 1))
+            else
+                # Extract timing info
+                timing=$(grep -E "^real\s" "$outfile" 2>/dev/null | head -1 || echo "")
+                echo "PASSED:  $jobname $timing"
+                passed=$((passed + 1))
+            fi
+        else
+            # Job might still be running or crashed early
+            if grep -qi "error\|failed\|exception\|traceback" "$outfile" 2>/dev/null; then
+                echo "FAILED:  $jobname (error in output)"
+                failed=$((failed + 1))
+            else
+                echo "RUNNING: $jobname (incomplete output)"
+                pending=$((pending + 1))
+            fi
+        fi
+    fi
+done
+
+echo
+echo "============================================="
+echo "Summary:"
+echo "  Passed:  $passed"
+echo "  Failed:  $failed"
+echo "  Pending: $pending"
+echo "  Total:   $total"
+echo "============================================="
+
+if [ $failed -gt 0 ]; then
+    exit 1
+elif [ $pending -gt 0 ]; then
+    exit 2
+else
+    exit 0
+fi
+CHECKEOF
+chmod +x "$LOG_DIR/check_results.sh"
+
+# Create a script to show node info for each job
+cat > "$LOG_DIR/show_nodes.sh" << 'NODEEOF'
+#!/bin/bash
+# Show which node each job ran on
+
+LOG_DIR="$(dirname "$0")"
+echo "Node assignments for batch tests:"
+echo
+
+# Match any .out file that ends with _run_N_JOBID.out pattern
+for outfile in "$LOG_DIR"/*_run_*.out; do
+    if [ -f "$outfile" ]; then
+        jobname=$(basename "$outfile" .out)
+        jobid=$(echo "$outfile" | grep -oP '\d+(?=\.out)')
+        
+        # Try to get node from sacct or from output file
+        node=$(sacct -j "$jobid" --format=NodeList --noheader 2>/dev/null | head -1 | tr -d ' ')
+        if [ -z "$node" ]; then
+            node="unknown"
+        fi
+        
+        echo "$jobname (job $jobid): $node"
+    fi
+done
+NODEEOF
+chmod +x "$LOG_DIR/show_nodes.sh"
+
+echo "After jobs complete:"
+echo "  - Run '$LOG_DIR/check_results.sh' to check results"
+echo "  - Run '$LOG_DIR/show_nodes.sh' to see which nodes were used"
+echo
+echo "To run other tests, use: $0 --list to see available test scripts"
diff --git a/tests/functional_tests/shell_test_utils/run_ci_test.sh b/tests/functional_tests/shell_test_utils/run_ci_test.sh
index 20267536a0f..4c002232941 100644
--- a/tests/functional_tests/shell_test_utils/run_ci_test.sh
+++ b/tests/functional_tests/shell_test_utils/run_ci_test.sh
@@ -8,6 +8,9 @@ ulimit -Sn $(ulimit -Hn)
 # Increase soft limit for number of processes to match hard limit
 ulimit -Su $(ulimit -Hu)
 
+# Set umask to 0002 to allow group read/write permissions
+umask 0002
+
 set +x
 for ARGUMENT in "$@"; do
     # Split on first = only, preserving any subsequent = signs in the value
@@ -53,6 +56,8 @@ TEST_TYPE=$(cat $TRAINING_PARAMS_PATH |
     /usr/local/bin/yq '.TEST_TYPE')
 ENABLE_LIGHTWEIGHT_MODE=$(cat $TRAINING_PARAMS_PATH |
     /usr/local/bin/yq '.ENV_VARS.ENABLE_LIGHTWEIGHT_MODE // "false"')
+N_REPEAT=$(cat $TRAINING_PARAMS_PATH |
+    /usr/local/bin/yq '.ENV_VARS.N_REPEAT // "'$N_REPEAT'"')
 MODE=$(cat $TRAINING_PARAMS_PATH |
     /usr/local/bin/yq '.MODE // "pretraining"')
 
@@ -128,10 +133,16 @@ SKIP_PYTEST=$(cat $TRAINING_PARAMS_PATH |
 export RECORD_CHECKPOINTS=${RECORD_CHECKPOINTS:-"false"}
 
 for i in $(seq 1 $N_REPEAT); do
+    # Move TB logs into a repeat-specific directory
+    DIR=$(dirname "$_TENSORBOARD_PATH")
+    FILE=$(basename "$_TENSORBOARD_PATH")
+    export TENSORBOARD_PATH=$DIR/$i/$FILE
+    mkdir -p $(dirname $TENSORBOARD_PATH)
+
     if [[ $i -gt 1 ]]; then
-        rm -rf $CHECKPOINT_SAVE_PATH/*
-        rm -rf /tmp/checkpoints/*
-        rm -rf $TENSORBOARD_PATH/*
+        rm -rf $CHECKPOINT_SAVE_PATH/* || true
+        rm -rf /tmp/checkpoints/* || true   
+        rm -rf $TENSORBOARD_PATH/* || true
     fi
 
     # First run never loads from a checkpoint
@@ -202,15 +213,18 @@ for i in $(seq 1 $N_REPEAT); do
         echo "No frozen checkpoint found. Will skip second run."
 
         export CHECKPOINT_SAVE_PATH=$_CHECKPOINT_SAVE_PATH
-        rm -rf "$CHECKPOINT_SAVE_PATH/iter_0000$TRAIN_ITERS"
+        if [[ $NODE_RANK -eq 0 ]]; then
+            rm -rf "$CHECKPOINT_SAVE_PATH/iter_0000$TRAIN_ITERS"
+        fi
         echo $((TRAIN_ITERS / 2)) >$CHECKPOINT_SAVE_PATH/latest_checkpointed_iteration.txt
         break
     fi
 
     if [[ "$TEST_TYPE" == "ckpt-resume" && "$TRAINING_EXIT_CODE" -eq 0 ]]; then
         export CHECKPOINT_LOAD_PATH=$CHECKPOINT_SAVE_PATH
-
-        rm -rf "$CHECKPOINT_LOAD_PATH/iter_$(printf "%07d\n" "$TRAIN_ITERS")"
+        if [[ $NODE_RANK -eq 0 ]]; then
+            rm -rf "$CHECKPOINT_LOAD_PATH/iter_$(printf "%07d\n" "$TRAIN_ITERS")"
+        fi
         echo $((TRAIN_ITERS / 2)) >$CHECKPOINT_LOAD_PATH/latest_checkpointed_iteration.txt
 
         export RUN_NUMBER=2
@@ -227,7 +241,9 @@ for i in $(seq 1 $N_REPEAT); do
         bash $ROOT_DIR/tests/functional_tests/shell_test_utils/_run_training.sh || TRAINING_EXIT_CODE=$?
 
         export CHECKPOINT_SAVE_PATH=$_CHECKPOINT_SAVE_PATH
-        rm -rf "$CHECKPOINT_SAVE_PATH/iter_0000$TRAIN_ITERS"
+        if [[ $NODE_RANK -eq 0 ]]; then
+            rm -rf "$CHECKPOINT_SAVE_PATH/iter_0000$TRAIN_ITERS"
+        fi
         echo $((TRAIN_ITERS / 2)) >$CHECKPOINT_SAVE_PATH/latest_checkpointed_iteration.txt
     fi
 
diff --git a/tests/functional_tests/shell_test_utils/start_interactive_job.sh b/tests/functional_tests/shell_test_utils/start_interactive_job.sh
index 13067e7c0ea..cd0b16f93df 100644
--- a/tests/functional_tests/shell_test_utils/start_interactive_job.sh
+++ b/tests/functional_tests/shell_test_utils/start_interactive_job.sh
@@ -87,6 +87,7 @@ SRUN_CMD="srun \
     --container-image=$IMAGE \
     --container-workdir=/opt/megatron-lm \
     --container-mounts=$CONTAINER_MOUNTS \
+    --no-container-mount-home \
     --nodes=1 \
     $(if [ "$NO_GPUS_PER_TASK" = "FALSE" ]; then echo "--gpus-per-task=8"; fi) \
     --time=$TIME \
diff --git a/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp2/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp2/golden_values_dev_dgx_h100.json
index df02cb774f4..b9b1236875c 100644
--- a/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp2/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp2/golden_values_dev_dgx_h100.json
@@ -24,36 +24,36 @@
             "18": 10.44272,
             "19": 10.43057,
             "20": 10.44534,
-            "21": 10.41778,
-            "22": 10.38667,
-            "23": 10.39322,
-            "24": 10.37847,
-            "25": 10.35474,
-            "26": 10.35955,
-            "27": 10.34527,
-            "28": 10.33539,
-            "29": 10.25416,
-            "30": 10.23011,
-            "31": 10.14092,
-            "32": 10.13601,
-            "33": 10.13944,
-            "34": 10.11377,
-            "35": 10.0888,
-            "36": 10.09247,
-            "37": 10.06836,
-            "38": 10.04664,
-            "39": 9.97584,
-            "40": 9.93781,
-            "41": 9.90867,
-            "42": 9.84873,
-            "43": 9.8577,
-            "44": 9.79259,
-            "45": 9.8035,
-            "46": 9.7029,
-            "47": 9.73432,
+            "21": 10.41771,
+            "22": 10.38656,
+            "23": 10.39328,
+            "24": 10.37849,
+            "25": 10.35466,
+            "26": 10.35965,
+            "27": 10.34523,
+            "28": 10.33556,
+            "29": 10.25418,
+            "30": 10.23008,
+            "31": 10.14093,
+            "32": 10.13603,
+            "33": 10.13936,
+            "34": 10.11381,
+            "35": 10.08888,
+            "36": 10.09238,
+            "37": 10.06851,
+            "38": 10.0466,
+            "39": 9.97582,
+            "40": 9.93764,
+            "41": 9.90872,
+            "42": 9.84882,
+            "43": 9.85772,
+            "44": 9.7925,
+            "45": 9.80329,
+            "46": 9.70285,
+            "47": 9.73423,
             "48": 9.70106,
-            "49": 9.69981,
-            "50": 9.70258
+            "49": 9.69966,
+            "50": 9.70252
         }
     },
     "num-zeros": {
@@ -80,37 +80,37 @@
             "17": 2409.0,
             "18": 2345.0,
             "19": 2374.0,
-            "20": 2739.0,
-            "21": 2030.0,
-            "22": 2819.0,
-            "23": 2763.0,
-            "24": 2731.0,
-            "25": 2429.0,
-            "26": 2817.0,
-            "27": 2944.0,
-            "28": 2741.0,
-            "29": 2639.0,
-            "30": 2723.0,
-            "31": 2158.0,
-            "32": 2242.0,
-            "33": 2046.0,
-            "34": 2139.0,
-            "35": 2492.0,
-            "36": 2641.0,
-            "37": 2853.0,
-            "38": 2705.0,
-            "39": 2807.0,
-            "40": 3333.0,
-            "41": 1762.0,
-            "42": 1410.0,
-            "43": 1558.0,
-            "44": 2384.0,
-            "45": 3170.0,
-            "46": 2664.0,
-            "47": 2641.0,
-            "48": 3490.0,
-            "49": 2928.0,
-            "50": 2487.0
+            "20": 2743.0,
+            "21": 2039.0,
+            "22": 2925.0,
+            "23": 2630.0,
+            "24": 2821.0,
+            "25": 2366.0,
+            "26": 2633.0,
+            "27": 2921.0,
+            "28": 2760.0,
+            "29": 2635.0,
+            "30": 2614.0,
+            "31": 2073.0,
+            "32": 2275.0,
+            "33": 2130.0,
+            "34": 2185.0,
+            "35": 2312.0,
+            "36": 2789.0,
+            "37": 2937.0,
+            "38": 2652.0,
+            "39": 2929.0,
+            "40": 3348.0,
+            "41": 1812.0,
+            "42": 1441.0,
+            "43": 1726.0,
+            "44": 2437.0,
+            "45": 3263.0,
+            "46": 2813.0,
+            "47": 2668.0,
+            "48": 3411.0,
+            "49": 3174.0,
+            "50": 2441.0
         }
     },
     "mem-allocated-bytes": {
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 3404871168.0,
-            "2": 3404871168.0,
-            "3": 3404871168.0,
-            "4": 3404871168.0,
-            "5": 3404871168.0,
-            "6": 3404871168.0,
-            "7": 3404871168.0,
-            "8": 3404871168.0,
-            "9": 3404871168.0,
-            "10": 3404871168.0,
-            "11": 3404871168.0,
-            "12": 3404871168.0,
-            "13": 3404871168.0,
-            "14": 3404871168.0,
-            "15": 3404871168.0,
-            "16": 3404871168.0,
-            "17": 3404871168.0,
-            "18": 3404871168.0,
-            "19": 3404871168.0,
-            "20": 3404871168.0,
-            "21": 3404871168.0,
-            "22": 3404871168.0,
-            "23": 3404871168.0,
-            "24": 3404871168.0,
-            "25": 3404871168.0,
-            "26": 3404871168.0,
-            "27": 3404871168.0,
-            "28": 3404871168.0,
-            "29": 3404871168.0,
-            "30": 3404871168.0,
-            "31": 3404871168.0,
-            "32": 3404871168.0,
-            "33": 3404871168.0,
-            "34": 3404871168.0,
-            "35": 3404871168.0,
-            "36": 3404871168.0,
-            "37": 3404871168.0,
-            "38": 3404871168.0,
-            "39": 3404871168.0,
-            "40": 3404871168.0,
-            "41": 3404871168.0,
-            "42": 3404871168.0,
-            "43": 3404871168.0,
-            "44": 3404871168.0,
-            "45": 3404871168.0,
-            "46": 3404871168.0,
-            "47": 3404871168.0,
-            "48": 3404871168.0,
-            "49": 3404871168.0,
-            "50": 3404871168.0
+            "1": 3405920768.0,
+            "2": 3405920768.0,
+            "3": 3405920768.0,
+            "4": 3405920768.0,
+            "5": 3405920768.0,
+            "6": 3405920768.0,
+            "7": 3405920768.0,
+            "8": 3405920768.0,
+            "9": 3405920768.0,
+            "10": 3405920768.0,
+            "11": 3405920768.0,
+            "12": 3405920768.0,
+            "13": 3405920768.0,
+            "14": 3405920768.0,
+            "15": 3405920768.0,
+            "16": 3405920768.0,
+            "17": 3405920768.0,
+            "18": 3405920768.0,
+            "19": 3405920768.0,
+            "20": 3405920768.0,
+            "21": 3405920768.0,
+            "22": 3405920768.0,
+            "23": 3405920768.0,
+            "24": 3405920768.0,
+            "25": 3405920768.0,
+            "26": 3405920768.0,
+            "27": 3405920768.0,
+            "28": 3405920768.0,
+            "29": 3405920768.0,
+            "30": 3405920768.0,
+            "31": 3405920768.0,
+            "32": 3405920768.0,
+            "33": 3405920768.0,
+            "34": 3405920768.0,
+            "35": 3405920768.0,
+            "36": 3405920768.0,
+            "37": 3405920768.0,
+            "38": 3405920768.0,
+            "39": 3405920768.0,
+            "40": 3405920768.0,
+            "41": 3405920768.0,
+            "42": 3405920768.0,
+            "43": 3405920768.0,
+            "44": 3405920768.0,
+            "45": 3405920768.0,
+            "46": 3405920768.0,
+            "47": 3405920768.0,
+            "48": 3405920768.0,
+            "49": 3405920768.0,
+            "50": 3405920768.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 4194526208.0,
-            "2": 5660965888.0,
-            "3": 5660965888.0,
-            "4": 5660965888.0,
-            "5": 5660965888.0,
-            "6": 5660965888.0,
-            "7": 5660965888.0,
-            "8": 5660965888.0,
-            "9": 5660965888.0,
-            "10": 5660965888.0,
-            "11": 5660965888.0,
-            "12": 5660965888.0,
-            "13": 5660965888.0,
-            "14": 5660965888.0,
-            "15": 5660965888.0,
-            "16": 5660965888.0,
-            "17": 5660965888.0,
-            "18": 5660965888.0,
-            "19": 5660965888.0,
-            "20": 5660965888.0,
-            "21": 5660965888.0,
-            "22": 5660965888.0,
-            "23": 5660965888.0,
-            "24": 5660965888.0,
-            "25": 5660965888.0,
-            "26": 5660965888.0,
-            "27": 5660965888.0,
-            "28": 5660965888.0,
-            "29": 5660965888.0,
-            "30": 5660965888.0,
-            "31": 5660965888.0,
-            "32": 5660965888.0,
-            "33": 5660965888.0,
-            "34": 5660965888.0,
-            "35": 5660965888.0,
-            "36": 5660965888.0,
-            "37": 5660965888.0,
-            "38": 5660965888.0,
-            "39": 5660965888.0,
-            "40": 5660965888.0,
-            "41": 5660965888.0,
-            "42": 5660965888.0,
-            "43": 5660965888.0,
-            "44": 5660965888.0,
-            "45": 5660965888.0,
-            "46": 5660965888.0,
-            "47": 5660965888.0,
-            "48": 5660965888.0,
-            "49": 5660965888.0,
-            "50": 5660965888.0
+            "1": 4195575808.0,
+            "2": 5662015488.0,
+            "3": 5662015488.0,
+            "4": 5662015488.0,
+            "5": 5662015488.0,
+            "6": 5662015488.0,
+            "7": 5662015488.0,
+            "8": 5662015488.0,
+            "9": 5662015488.0,
+            "10": 5662015488.0,
+            "11": 5662015488.0,
+            "12": 5662015488.0,
+            "13": 5662015488.0,
+            "14": 5662015488.0,
+            "15": 5662015488.0,
+            "16": 5662015488.0,
+            "17": 5662015488.0,
+            "18": 5662015488.0,
+            "19": 5662015488.0,
+            "20": 5662015488.0,
+            "21": 5662015488.0,
+            "22": 5662015488.0,
+            "23": 5662015488.0,
+            "24": 5662015488.0,
+            "25": 5662015488.0,
+            "26": 5662015488.0,
+            "27": 5662015488.0,
+            "28": 5662015488.0,
+            "29": 5662015488.0,
+            "30": 5662015488.0,
+            "31": 5662015488.0,
+            "32": 5662015488.0,
+            "33": 5662015488.0,
+            "34": 5662015488.0,
+            "35": 5662015488.0,
+            "36": 5662015488.0,
+            "37": 5662015488.0,
+            "38": 5662015488.0,
+            "39": 5662015488.0,
+            "40": 5662015488.0,
+            "41": 5662015488.0,
+            "42": 5662015488.0,
+            "43": 5662015488.0,
+            "44": 5662015488.0,
+            "45": 5662015488.0,
+            "46": 5662015488.0,
+            "47": 5662015488.0,
+            "48": 5662015488.0,
+            "49": 5662015488.0,
+            "50": 5662015488.0
         }
     },
     "iteration-time": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 10.44279,
-            "2": 0.55345,
-            "3": 0.53909,
-            "4": 0.52187,
-            "5": 0.52958,
-            "6": 0.5241,
-            "7": 0.5353,
-            "8": 0.51946,
-            "9": 0.52732,
-            "10": 0.52759,
-            "11": 0.51849,
-            "12": 0.52326,
-            "13": 0.52472,
-            "14": 0.52577,
-            "15": 0.51817,
-            "16": 0.51922,
-            "17": 0.51686,
-            "18": 0.5248,
-            "19": 0.51945,
-            "20": 0.74697,
-            "21": 0.51544,
-            "22": 0.52412,
-            "23": 0.66206,
-            "24": 0.51781,
-            "25": 0.52429,
-            "26": 0.52068,
-            "27": 0.62432,
-            "28": 0.52016,
-            "29": 0.52217,
-            "30": 0.51949,
-            "31": 0.69033,
-            "32": 0.52127,
-            "33": 0.52602,
-            "34": 0.6403,
-            "35": 0.51723,
-            "36": 0.52445,
-            "37": 0.51746,
-            "38": 0.52296,
-            "39": 0.52159,
-            "40": 0.6718,
-            "41": 0.58171,
-            "42": 0.7393,
-            "43": 0.54277,
-            "44": 0.81615,
-            "45": 0.52284,
-            "46": 0.71947,
-            "47": 0.52219,
-            "48": 0.51866,
-            "49": 0.51764,
-            "50": 0.51841
+            "1": 9.33953,
+            "2": 0.53319,
+            "3": 0.47492,
+            "4": 0.43971,
+            "5": 0.43812,
+            "6": 0.43852,
+            "7": 0.4386,
+            "8": 0.43696,
+            "9": 0.4374,
+            "10": 0.43581,
+            "11": 0.71474,
+            "12": 0.44321,
+            "13": 0.73975,
+            "14": 0.44195,
+            "15": 0.43796,
+            "16": 0.43687,
+            "17": 0.43648,
+            "18": 0.43733,
+            "19": 0.43826,
+            "20": 0.44179,
+            "21": 1.02916,
+            "22": 0.7107,
+            "23": 0.70393,
+            "24": 0.904,
+            "25": 0.43822,
+            "26": 0.43864,
+            "27": 0.46131,
+            "28": 0.44753,
+            "29": 0.43372,
+            "30": 0.43644,
+            "31": 0.45145,
+            "32": 0.44608,
+            "33": 0.43714,
+            "34": 0.43395,
+            "35": 0.43358,
+            "36": 0.43471,
+            "37": 0.43343,
+            "38": 0.43378,
+            "39": 0.43774,
+            "40": 0.43399,
+            "41": 0.43662,
+            "42": 0.43501,
+            "43": 0.43703,
+            "44": 0.44084,
+            "45": 0.43443,
+            "46": 0.43652,
+            "47": 0.84278,
+            "48": 0.44024,
+            "49": 0.4409,
+            "50": 0.43833
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp4_vp2/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp4_vp2/golden_values_dev_dgx_h100.json
index 0d85e13b23b..30fa7e80d5a 100644
--- a/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp4_vp2/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp4_vp2/golden_values_dev_dgx_h100.json
@@ -21,39 +21,39 @@
             "15": 10.52714,
             "16": 10.50594,
             "17": 10.5009,
-            "18": 10.51023,
-            "19": 10.493,
-            "20": 10.48862,
-            "21": 10.47473,
-            "22": 10.42799,
-            "23": 10.42684,
-            "24": 10.4036,
-            "25": 10.39991,
-            "26": 10.38461,
-            "27": 10.38216,
-            "28": 10.36877,
-            "29": 10.32192,
-            "30": 10.2204,
-            "31": 10.17094,
-            "32": 10.12605,
-            "33": 10.10628,
-            "34": 10.09438,
-            "35": 10.07042,
-            "36": 10.07481,
-            "37": 10.03644,
-            "38": 10.01812,
-            "39": 9.96852,
-            "40": 9.93082,
-            "41": 9.87316,
-            "42": 9.81842,
-            "43": 9.8156,
-            "44": 9.73841,
-            "45": 9.7628,
-            "46": 9.67691,
-            "47": 9.68688,
+            "18": 10.51024,
+            "19": 10.49283,
+            "20": 10.48852,
+            "21": 10.47463,
+            "22": 10.42802,
+            "23": 10.42674,
+            "24": 10.40359,
+            "25": 10.39998,
+            "26": 10.38464,
+            "27": 10.38236,
+            "28": 10.36891,
+            "29": 10.32202,
+            "30": 10.22049,
+            "31": 10.17103,
+            "32": 10.12583,
+            "33": 10.10622,
+            "34": 10.09458,
+            "35": 10.07043,
+            "36": 10.07484,
+            "37": 10.03646,
+            "38": 10.0182,
+            "39": 9.9686,
+            "40": 9.93086,
+            "41": 9.87312,
+            "42": 9.8185,
+            "43": 9.81546,
+            "44": 9.73852,
+            "45": 9.76279,
+            "46": 9.67679,
+            "47": 9.68692,
             "48": 9.66292,
             "49": 9.67587,
-            "50": 9.67446
+            "50": 9.67447
         }
     },
     "num-zeros": {
@@ -78,39 +78,39 @@
             "15": 2607.0,
             "16": 2411.0,
             "17": 2529.0,
-            "18": 2418.0,
-            "19": 2363.0,
-            "20": 2323.0,
-            "21": 2401.0,
-            "22": 2588.0,
-            "23": 2338.0,
-            "24": 2305.0,
-            "25": 2702.0,
-            "26": 2370.0,
-            "27": 2462.0,
-            "28": 2407.0,
-            "29": 2240.0,
-            "30": 2850.0,
-            "31": 2882.0,
-            "32": 2837.0,
-            "33": 2645.0,
-            "34": 2874.0,
-            "35": 2913.0,
-            "36": 3000.0,
-            "37": 3122.0,
-            "38": 2680.0,
-            "39": 2216.0,
-            "40": 2211.0,
-            "41": 3456.0,
-            "42": 3624.0,
-            "43": 3364.0,
-            "44": 4026.0,
-            "45": 4145.0,
-            "46": 2924.0,
-            "47": 1942.0,
-            "48": 3363.0,
-            "49": 3532.0,
-            "50": 3710.0
+            "18": 2392.0,
+            "19": 2417.0,
+            "20": 2269.0,
+            "21": 2382.0,
+            "22": 2652.0,
+            "23": 2420.0,
+            "24": 2251.0,
+            "25": 2616.0,
+            "26": 2433.0,
+            "27": 2470.0,
+            "28": 2335.0,
+            "29": 2270.0,
+            "30": 2689.0,
+            "31": 2960.0,
+            "32": 2808.0,
+            "33": 2659.0,
+            "34": 2932.0,
+            "35": 2926.0,
+            "36": 3103.0,
+            "37": 3227.0,
+            "38": 2634.0,
+            "39": 2132.0,
+            "40": 2236.0,
+            "41": 3589.0,
+            "42": 3470.0,
+            "43": 3467.0,
+            "44": 4038.0,
+            "45": 4173.0,
+            "46": 2993.0,
+            "47": 1996.0,
+            "48": 3318.0,
+            "49": 3662.0,
+            "50": 3572.0
         }
     },
     "mem-allocated-bytes": {
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 2061524480.0,
-            "2": 2061524480.0,
-            "3": 2061524480.0,
-            "4": 2061524480.0,
-            "5": 2061524480.0,
-            "6": 2061524480.0,
-            "7": 2061524480.0,
-            "8": 2061524480.0,
-            "9": 2061524480.0,
-            "10": 2061524480.0,
-            "11": 2061524480.0,
-            "12": 2061524480.0,
-            "13": 2061524480.0,
-            "14": 2061524480.0,
-            "15": 2061524480.0,
-            "16": 2061524480.0,
-            "17": 2061524480.0,
-            "18": 2061524480.0,
-            "19": 2061524480.0,
-            "20": 2061524480.0,
-            "21": 2061524480.0,
-            "22": 2061524480.0,
-            "23": 2061524480.0,
-            "24": 2061524480.0,
-            "25": 2061524480.0,
-            "26": 2061524480.0,
-            "27": 2061524480.0,
-            "28": 2061524480.0,
-            "29": 2061524480.0,
-            "30": 2061524480.0,
-            "31": 2061524480.0,
-            "32": 2061524480.0,
-            "33": 2061524480.0,
-            "34": 2061524480.0,
-            "35": 2061524480.0,
-            "36": 2061524480.0,
-            "37": 2061524480.0,
-            "38": 2061524480.0,
-            "39": 2061524480.0,
-            "40": 2061524480.0,
-            "41": 2061524480.0,
-            "42": 2061524480.0,
-            "43": 2061524480.0,
-            "44": 2061524480.0,
-            "45": 2061524480.0,
-            "46": 2061524480.0,
-            "47": 2061524480.0,
-            "48": 2061524480.0,
-            "49": 2061524480.0,
-            "50": 2061524480.0
+            "1": 2062574080.0,
+            "2": 2062574080.0,
+            "3": 2062574080.0,
+            "4": 2062574080.0,
+            "5": 2062574080.0,
+            "6": 2062574080.0,
+            "7": 2062574080.0,
+            "8": 2062574080.0,
+            "9": 2062574080.0,
+            "10": 2062574080.0,
+            "11": 2062574080.0,
+            "12": 2062574080.0,
+            "13": 2062574080.0,
+            "14": 2062574080.0,
+            "15": 2062574080.0,
+            "16": 2062574080.0,
+            "17": 2062574080.0,
+            "18": 2062574080.0,
+            "19": 2062574080.0,
+            "20": 2062574080.0,
+            "21": 2062574080.0,
+            "22": 2062574080.0,
+            "23": 2062574080.0,
+            "24": 2062574080.0,
+            "25": 2062574080.0,
+            "26": 2062574080.0,
+            "27": 2062574080.0,
+            "28": 2062574080.0,
+            "29": 2062574080.0,
+            "30": 2062574080.0,
+            "31": 2062574080.0,
+            "32": 2062574080.0,
+            "33": 2062574080.0,
+            "34": 2062574080.0,
+            "35": 2062574080.0,
+            "36": 2062574080.0,
+            "37": 2062574080.0,
+            "38": 2062574080.0,
+            "39": 2062574080.0,
+            "40": 2062574080.0,
+            "41": 2062574080.0,
+            "42": 2062574080.0,
+            "43": 2062574080.0,
+            "44": 2062574080.0,
+            "45": 2062574080.0,
+            "46": 2062574080.0,
+            "47": 2062574080.0,
+            "48": 2062574080.0,
+            "49": 2062574080.0,
+            "50": 2062574080.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 4385424896.0,
-            "2": 5245672960.0,
-            "3": 5245672960.0,
-            "4": 5245672960.0,
-            "5": 5245672960.0,
-            "6": 5245672960.0,
-            "7": 5245672960.0,
-            "8": 5245672960.0,
-            "9": 5245672960.0,
-            "10": 5245672960.0,
-            "11": 5245672960.0,
-            "12": 5245672960.0,
-            "13": 5245672960.0,
-            "14": 5245672960.0,
-            "15": 5245672960.0,
-            "16": 5245672960.0,
-            "17": 5245672960.0,
-            "18": 5245672960.0,
-            "19": 5245672960.0,
-            "20": 5245672960.0,
-            "21": 5245672960.0,
-            "22": 5245672960.0,
-            "23": 5245672960.0,
-            "24": 5245672960.0,
-            "25": 5245672960.0,
-            "26": 5245672960.0,
-            "27": 5245672960.0,
-            "28": 5245672960.0,
-            "29": 5245672960.0,
-            "30": 5245672960.0,
-            "31": 5245672960.0,
-            "32": 5245672960.0,
-            "33": 5245672960.0,
-            "34": 5245672960.0,
-            "35": 5245672960.0,
-            "36": 5245672960.0,
-            "37": 5245672960.0,
-            "38": 5245672960.0,
-            "39": 5245672960.0,
-            "40": 5245672960.0,
-            "41": 5245672960.0,
-            "42": 5245672960.0,
-            "43": 5245672960.0,
-            "44": 5245672960.0,
-            "45": 5245672960.0,
-            "46": 5245672960.0,
-            "47": 5245672960.0,
-            "48": 5245672960.0,
-            "49": 5245672960.0,
-            "50": 5245672960.0
+            "1": 4386474496.0,
+            "2": 5246722560.0,
+            "3": 5246722560.0,
+            "4": 5246722560.0,
+            "5": 5246722560.0,
+            "6": 5246722560.0,
+            "7": 5246722560.0,
+            "8": 5246722560.0,
+            "9": 5246722560.0,
+            "10": 5246722560.0,
+            "11": 5246722560.0,
+            "12": 5246722560.0,
+            "13": 5246722560.0,
+            "14": 5246722560.0,
+            "15": 5246722560.0,
+            "16": 5246722560.0,
+            "17": 5246722560.0,
+            "18": 5246722560.0,
+            "19": 5246722560.0,
+            "20": 5246722560.0,
+            "21": 5246722560.0,
+            "22": 5246722560.0,
+            "23": 5246722560.0,
+            "24": 5246722560.0,
+            "25": 5246722560.0,
+            "26": 5246722560.0,
+            "27": 5246722560.0,
+            "28": 5246722560.0,
+            "29": 5246722560.0,
+            "30": 5246722560.0,
+            "31": 5246722560.0,
+            "32": 5246722560.0,
+            "33": 5246722560.0,
+            "34": 5246722560.0,
+            "35": 5246722560.0,
+            "36": 5246722560.0,
+            "37": 5246722560.0,
+            "38": 5246722560.0,
+            "39": 5246722560.0,
+            "40": 5246722560.0,
+            "41": 5246722560.0,
+            "42": 5246722560.0,
+            "43": 5246722560.0,
+            "44": 5246722560.0,
+            "45": 5246722560.0,
+            "46": 5246722560.0,
+            "47": 5246722560.0,
+            "48": 5246722560.0,
+            "49": 5246722560.0,
+            "50": 5246722560.0
         }
     },
     "iteration-time": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 14.48983,
-            "2": 0.782,
-            "3": 0.71913,
-            "4": 0.71541,
-            "5": 0.71528,
-            "6": 0.7219,
-            "7": 0.72729,
-            "8": 0.72714,
-            "9": 0.7634,
-            "10": 0.71523,
-            "11": 0.72303,
-            "12": 1.34179,
-            "13": 0.93338,
-            "14": 0.72484,
-            "15": 0.70784,
-            "16": 0.72443,
-            "17": 0.72151,
-            "18": 0.71102,
-            "19": 1.13624,
-            "20": 1.56469,
-            "21": 1.66622,
-            "22": 0.9574,
-            "23": 0.69921,
-            "24": 0.70477,
-            "25": 0.73932,
-            "26": 0.74798,
-            "27": 0.72633,
-            "28": 0.72782,
-            "29": 0.73646,
-            "30": 0.73665,
-            "31": 0.74301,
-            "32": 0.73363,
-            "33": 0.71952,
-            "34": 0.7406,
-            "35": 0.71103,
-            "36": 0.70026,
-            "37": 0.71087,
-            "38": 0.88272,
-            "39": 0.71279,
-            "40": 0.92123,
-            "41": 1.20193,
-            "42": 0.72924,
-            "43": 0.70749,
-            "44": 0.72158,
-            "45": 0.71169,
-            "46": 1.23637,
-            "47": 1.13432,
-            "48": 1.26896,
-            "49": 1.13682,
-            "50": 1.21366
+            "1": 12.53778,
+            "2": 0.64042,
+            "3": 0.57704,
+            "4": 0.56942,
+            "5": 0.55857,
+            "6": 1.5214,
+            "7": 0.8799,
+            "8": 0.58802,
+            "9": 0.58845,
+            "10": 0.91566,
+            "11": 1.66597,
+            "12": 1.31669,
+            "13": 0.9054,
+            "14": 0.55959,
+            "15": 0.55349,
+            "16": 0.56731,
+            "17": 0.54994,
+            "18": 0.56124,
+            "19": 0.54032,
+            "20": 0.54467,
+            "21": 0.56577,
+            "22": 0.59073,
+            "23": 0.55848,
+            "24": 0.5515,
+            "25": 0.56783,
+            "26": 0.58223,
+            "27": 0.56278,
+            "28": 0.55385,
+            "29": 0.54473,
+            "30": 0.54779,
+            "31": 0.54239,
+            "32": 0.53324,
+            "33": 0.54812,
+            "34": 0.57008,
+            "35": 0.56814,
+            "36": 0.55146,
+            "37": 0.56138,
+            "38": 0.80574,
+            "39": 0.5919,
+            "40": 0.83084,
+            "41": 0.9006,
+            "42": 0.82734,
+            "43": 0.98233,
+            "44": 1.08635,
+            "45": 1.33415,
+            "46": 1.29362,
+            "47": 1.03481,
+            "48": 1.02838,
+            "49": 0.56104,
+            "50": 0.57748
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2/golden_values_dev_dgx_h100.json
index 1352649be85..7a21f7ae2f9 100644
--- a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2/golden_values_dev_dgx_h100.json
@@ -25,35 +25,35 @@
             "19": 10.44113,
             "20": 10.45448,
             "21": 10.43454,
-            "22": 10.40592,
-            "23": 10.39961,
-            "24": 10.37579,
-            "25": 10.38182,
-            "26": 10.35147,
+            "22": 10.40591,
+            "23": 10.39975,
+            "24": 10.37583,
+            "25": 10.38168,
+            "26": 10.3515,
             "27": 10.35388,
-            "28": 10.34937,
-            "29": 10.28711,
-            "30": 10.21159,
-            "31": 10.1726,
-            "32": 10.13421,
-            "33": 10.14744,
-            "34": 10.10737,
-            "35": 10.10581,
-            "36": 10.08735,
+            "28": 10.34965,
+            "29": 10.28701,
+            "30": 10.21143,
+            "31": 10.17272,
+            "32": 10.13416,
+            "33": 10.14725,
+            "34": 10.10738,
+            "35": 10.10592,
+            "36": 10.08739,
             "37": 10.08157,
-            "38": 10.07233,
-            "39": 10.00094,
-            "40": 9.98143,
-            "41": 9.92541,
-            "42": 9.87527,
-            "43": 9.88711,
-            "44": 9.80642,
-            "45": 9.82325,
-            "46": 9.73785,
-            "47": 9.74817,
-            "48": 9.71609,
-            "49": 9.74484,
-            "50": 9.72982
+            "38": 10.07245,
+            "39": 10.00093,
+            "40": 9.98138,
+            "41": 9.92543,
+            "42": 9.87534,
+            "43": 9.88716,
+            "44": 9.80646,
+            "45": 9.82342,
+            "46": 9.73786,
+            "47": 9.74811,
+            "48": 9.71614,
+            "49": 9.74493,
+            "50": 9.73
         }
     },
     "num-zeros": {
@@ -82,35 +82,35 @@
             "19": 2547.0,
             "20": 2850.0,
             "21": 1990.0,
-            "22": 2884.0,
-            "23": 2857.0,
-            "24": 2685.0,
-            "25": 2514.0,
-            "26": 2958.0,
-            "27": 2673.0,
-            "28": 2723.0,
-            "29": 2571.0,
-            "30": 2858.0,
-            "31": 2157.0,
-            "32": 2357.0,
-            "33": 2242.0,
-            "34": 2464.0,
-            "35": 2544.0,
-            "36": 2933.0,
-            "37": 3293.0,
-            "38": 2730.0,
-            "39": 2795.0,
-            "40": 3310.0,
-            "41": 1816.0,
-            "42": 1467.0,
-            "43": 1817.0,
-            "44": 2633.0,
-            "45": 3576.0,
-            "46": 3015.0,
-            "47": 2805.0,
-            "48": 3071.0,
-            "49": 2974.0,
-            "50": 2267.0
+            "22": 2964.0,
+            "23": 2695.0,
+            "24": 2772.0,
+            "25": 2524.0,
+            "26": 2977.0,
+            "27": 2627.0,
+            "28": 2776.0,
+            "29": 2514.0,
+            "30": 2843.0,
+            "31": 2070.0,
+            "32": 2362.0,
+            "33": 2211.0,
+            "34": 2574.0,
+            "35": 2499.0,
+            "36": 2943.0,
+            "37": 3347.0,
+            "38": 2628.0,
+            "39": 2781.0,
+            "40": 3335.0,
+            "41": 1800.0,
+            "42": 1598.0,
+            "43": 1719.0,
+            "44": 2631.0,
+            "45": 3492.0,
+            "46": 2988.0,
+            "47": 2784.0,
+            "48": 2951.0,
+            "49": 2907.0,
+            "50": 2113.0
         }
     },
     "mem-allocated-bytes": {
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 1784014336.0,
-            "2": 1784014336.0,
-            "3": 1784014336.0,
-            "4": 1784014336.0,
-            "5": 1784014336.0,
-            "6": 1784014336.0,
-            "7": 1784014336.0,
-            "8": 1784014336.0,
-            "9": 1784014336.0,
-            "10": 1784014336.0,
-            "11": 1784014336.0,
-            "12": 1784014336.0,
-            "13": 1784014336.0,
-            "14": 1784014336.0,
-            "15": 1784014336.0,
-            "16": 1784014336.0,
-            "17": 1784014336.0,
-            "18": 1784014336.0,
-            "19": 1784014336.0,
-            "20": 1784014336.0,
-            "21": 1784014336.0,
-            "22": 1784014336.0,
-            "23": 1784014336.0,
-            "24": 1784014336.0,
-            "25": 1784014336.0,
-            "26": 1784014336.0,
-            "27": 1784014336.0,
-            "28": 1784014336.0,
-            "29": 1784014336.0,
-            "30": 1784014336.0,
-            "31": 1784014336.0,
-            "32": 1784014336.0,
-            "33": 1784014336.0,
-            "34": 1784014336.0,
-            "35": 1784014336.0,
-            "36": 1784014336.0,
-            "37": 1784014336.0,
-            "38": 1784014336.0,
-            "39": 1784014336.0,
-            "40": 1784014336.0,
-            "41": 1784014336.0,
-            "42": 1784014336.0,
-            "43": 1784014336.0,
-            "44": 1784014336.0,
-            "45": 1784014336.0,
-            "46": 1784014336.0,
-            "47": 1784014336.0,
-            "48": 1784014336.0,
-            "49": 1784014336.0,
-            "50": 1784014336.0
+            "1": 1785063936.0,
+            "2": 1785063936.0,
+            "3": 1785063936.0,
+            "4": 1785063936.0,
+            "5": 1785063936.0,
+            "6": 1785063936.0,
+            "7": 1785063936.0,
+            "8": 1785063936.0,
+            "9": 1785063936.0,
+            "10": 1785063936.0,
+            "11": 1785063936.0,
+            "12": 1785063936.0,
+            "13": 1785063936.0,
+            "14": 1785063936.0,
+            "15": 1785063936.0,
+            "16": 1785063936.0,
+            "17": 1785063936.0,
+            "18": 1785063936.0,
+            "19": 1785063936.0,
+            "20": 1785063936.0,
+            "21": 1785063936.0,
+            "22": 1785063936.0,
+            "23": 1785063936.0,
+            "24": 1785063936.0,
+            "25": 1785063936.0,
+            "26": 1785063936.0,
+            "27": 1785063936.0,
+            "28": 1785063936.0,
+            "29": 1785063936.0,
+            "30": 1785063936.0,
+            "31": 1785063936.0,
+            "32": 1785063936.0,
+            "33": 1785063936.0,
+            "34": 1785063936.0,
+            "35": 1785063936.0,
+            "36": 1785063936.0,
+            "37": 1785063936.0,
+            "38": 1785063936.0,
+            "39": 1785063936.0,
+            "40": 1785063936.0,
+            "41": 1785063936.0,
+            "42": 1785063936.0,
+            "43": 1785063936.0,
+            "44": 1785063936.0,
+            "45": 1785063936.0,
+            "46": 1785063936.0,
+            "47": 1785063936.0,
+            "48": 1785063936.0,
+            "49": 1785063936.0,
+            "50": 1785063936.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 2365860864.0,
-            "2": 3108323328.0,
-            "3": 3108323328.0,
-            "4": 3108323328.0,
-            "5": 3108323328.0,
-            "6": 3108842496.0,
-            "7": 3108842496.0,
-            "8": 3108842496.0,
-            "9": 3108842496.0,
-            "10": 3108842496.0,
-            "11": 3108842496.0,
-            "12": 3108842496.0,
-            "13": 3108842496.0,
-            "14": 3108842496.0,
-            "15": 3108842496.0,
-            "16": 3108842496.0,
-            "17": 3108842496.0,
-            "18": 3108842496.0,
-            "19": 3108842496.0,
-            "20": 3108842496.0,
-            "21": 3108842496.0,
-            "22": 3108842496.0,
-            "23": 3108842496.0,
-            "24": 3108842496.0,
-            "25": 3108842496.0,
-            "26": 3108842496.0,
-            "27": 3108842496.0,
-            "28": 3108842496.0,
-            "29": 3108842496.0,
-            "30": 3108842496.0,
-            "31": 3108842496.0,
-            "32": 3108842496.0,
-            "33": 3108842496.0,
-            "34": 3108842496.0,
-            "35": 3108842496.0,
-            "36": 3108842496.0,
-            "37": 3108842496.0,
-            "38": 3108842496.0,
-            "39": 3108842496.0,
-            "40": 3108842496.0,
-            "41": 3108842496.0,
-            "42": 3108842496.0,
-            "43": 3108842496.0,
-            "44": 3108842496.0,
-            "45": 3108842496.0,
-            "46": 3108842496.0,
-            "47": 3108842496.0,
-            "48": 3108842496.0,
-            "49": 3108842496.0,
-            "50": 3108842496.0
+            "1": 2366910464.0,
+            "2": 3109894144.0,
+            "3": 3109894144.0,
+            "4": 3109894144.0,
+            "5": 3109894144.0,
+            "6": 3109894144.0,
+            "7": 3109894144.0,
+            "8": 3109894144.0,
+            "9": 3109894144.0,
+            "10": 3109894144.0,
+            "11": 3109894144.0,
+            "12": 3109894144.0,
+            "13": 3109894144.0,
+            "14": 3109894144.0,
+            "15": 3109897216.0,
+            "16": 3109897216.0,
+            "17": 3109897216.0,
+            "18": 3109897216.0,
+            "19": 3109897216.0,
+            "20": 3109897216.0,
+            "21": 3109897216.0,
+            "22": 3109897216.0,
+            "23": 3109897216.0,
+            "24": 3109897216.0,
+            "25": 3109897216.0,
+            "26": 3109897216.0,
+            "27": 3109897216.0,
+            "28": 3109897216.0,
+            "29": 3109897216.0,
+            "30": 3109897216.0,
+            "31": 3109897216.0,
+            "32": 3109897216.0,
+            "33": 3109897216.0,
+            "34": 3109897216.0,
+            "35": 3109897216.0,
+            "36": 3109897216.0,
+            "37": 3109897216.0,
+            "38": 3109897216.0,
+            "39": 3109897216.0,
+            "40": 3109897216.0,
+            "41": 3109897216.0,
+            "42": 3109897216.0,
+            "43": 3109897216.0,
+            "44": 3109897216.0,
+            "45": 3109897216.0,
+            "46": 3109897216.0,
+            "47": 3109897216.0,
+            "48": 3109897216.0,
+            "49": 3109897216.0,
+            "50": 3109897216.0
         }
     },
     "iteration-time": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 11.98661,
-            "2": 1.05916,
-            "3": 1.01721,
-            "4": 1.02611,
-            "5": 1.02779,
-            "6": 1.11252,
-            "7": 1.0176,
-            "8": 1.02427,
-            "9": 1.02561,
-            "10": 1.01845,
-            "11": 1.02419,
-            "12": 1.01745,
-            "13": 1.01224,
-            "14": 1.02388,
-            "15": 1.03687,
-            "16": 1.01886,
-            "17": 1.01708,
-            "18": 1.01143,
-            "19": 1.01902,
-            "20": 1.49878,
-            "21": 1.47537,
-            "22": 1.01801,
-            "23": 1.05158,
-            "24": 1.03481,
-            "25": 1.01773,
-            "26": 1.01186,
-            "27": 1.02203,
-            "28": 1.01824,
-            "29": 1.01865,
-            "30": 1.02165,
-            "31": 1.0184,
-            "32": 1.02106,
-            "33": 1.04655,
-            "34": 1.03129,
-            "35": 1.01893,
-            "36": 1.02153,
-            "37": 1.02154,
-            "38": 1.0213,
-            "39": 1.14846,
-            "40": 1.02149,
-            "41": 1.01905,
-            "42": 1.02038,
-            "43": 1.03126,
-            "44": 1.04155,
-            "45": 1.01649,
-            "46": 1.01742,
-            "47": 1.02406,
-            "48": 1.27122,
-            "49": 1.15085,
-            "50": 1.10861
+            "1": 10.5121,
+            "2": 1.00958,
+            "3": 0.92732,
+            "4": 0.90421,
+            "5": 0.90504,
+            "6": 0.89943,
+            "7": 0.90319,
+            "8": 1.1748,
+            "9": 1.95208,
+            "10": 0.92148,
+            "11": 0.91859,
+            "12": 0.92137,
+            "13": 0.92531,
+            "14": 1.25591,
+            "15": 0.92418,
+            "16": 0.91961,
+            "17": 0.90838,
+            "18": 0.90766,
+            "19": 0.90747,
+            "20": 0.9061,
+            "21": 0.93723,
+            "22": 0.90644,
+            "23": 0.91067,
+            "24": 1.66749,
+            "25": 0.91188,
+            "26": 0.91194,
+            "27": 0.988,
+            "28": 0.92516,
+            "29": 0.91117,
+            "30": 1.435,
+            "31": 0.89868,
+            "32": 0.90735,
+            "33": 1.29737,
+            "34": 1.32235,
+            "35": 0.91506,
+            "36": 0.91851,
+            "37": 0.92715,
+            "38": 0.92769,
+            "39": 0.92632,
+            "40": 1.26827,
+            "41": 1.07193,
+            "42": 1.07217,
+            "43": 0.98674,
+            "44": 1.07179,
+            "45": 1.09756,
+            "46": 1.10568,
+            "47": 0.92215,
+            "48": 0.92051,
+            "49": 0.92335,
+            "50": 0.92251
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_local_spec/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_local_spec/golden_values_dev_dgx_h100.json
index bf20b2b00e3..d034c6bf7d8 100644
--- a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_local_spec/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_local_spec/golden_values_dev_dgx_h100.json
@@ -44,15 +44,15 @@
             "38": 10.07257,
             "39": 10.0013,
             "40": 9.9816,
-            "41": 9.92549,
-            "42": 9.87529,
-            "43": 9.88742,
-            "44": 9.80641,
-            "45": 9.82342,
-            "46": 9.73815,
-            "47": 9.74831,
-            "48": 9.71619,
-            "49": 9.74504,
+            "41": 9.92551,
+            "42": 9.87537,
+            "43": 9.88725,
+            "44": 9.80659,
+            "45": 9.82349,
+            "46": 9.73821,
+            "47": 9.74829,
+            "48": 9.71628,
+            "49": 9.74489,
             "50": 9.73004
         }
     },
@@ -100,17 +100,17 @@
             "37": 3305.0,
             "38": 2682.0,
             "39": 2805.0,
-            "40": 3425.0,
-            "41": 1812.0,
-            "42": 1481.0,
-            "43": 1726.0,
-            "44": 2575.0,
-            "45": 3438.0,
-            "46": 2960.0,
-            "47": 2792.0,
-            "48": 3107.0,
-            "49": 2854.0,
-            "50": 2145.0
+            "40": 3430.0,
+            "41": 1767.0,
+            "42": 1516.0,
+            "43": 1798.0,
+            "44": 2790.0,
+            "45": 3578.0,
+            "46": 3016.0,
+            "47": 2890.0,
+            "48": 3065.0,
+            "49": 2914.0,
+            "50": 2208.0
         }
     },
     "mem-allocated-bytes": {
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 1767237120.0,
-            "2": 1767237120.0,
-            "3": 1767237120.0,
-            "4": 1767237120.0,
-            "5": 1767237120.0,
-            "6": 1767237120.0,
-            "7": 1767237120.0,
-            "8": 1767237120.0,
-            "9": 1767237120.0,
-            "10": 1767237120.0,
-            "11": 1767237120.0,
-            "12": 1767237120.0,
-            "13": 1767237120.0,
-            "14": 1767237120.0,
-            "15": 1767237120.0,
-            "16": 1767237120.0,
-            "17": 1767237120.0,
-            "18": 1767237120.0,
-            "19": 1767237120.0,
-            "20": 1767237120.0,
-            "21": 1767237120.0,
-            "22": 1767237120.0,
-            "23": 1767237120.0,
-            "24": 1767237120.0,
-            "25": 1767237120.0,
-            "26": 1767237120.0,
-            "27": 1767237120.0,
-            "28": 1767237120.0,
-            "29": 1767237120.0,
-            "30": 1767237120.0,
-            "31": 1767237120.0,
-            "32": 1767237120.0,
-            "33": 1767237120.0,
-            "34": 1767237120.0,
-            "35": 1767237120.0,
-            "36": 1767237120.0,
-            "37": 1767237120.0,
-            "38": 1767237120.0,
-            "39": 1767237120.0,
-            "40": 1767237120.0,
-            "41": 1767237120.0,
-            "42": 1767237120.0,
-            "43": 1767237120.0,
-            "44": 1767237120.0,
-            "45": 1767237120.0,
-            "46": 1767237120.0,
-            "47": 1767237120.0,
-            "48": 1767237120.0,
-            "49": 1767237120.0,
-            "50": 1767237120.0
+            "1": 1768285696.0,
+            "2": 1768285696.0,
+            "3": 1768285696.0,
+            "4": 1768285696.0,
+            "5": 1768285696.0,
+            "6": 1768285696.0,
+            "7": 1768285696.0,
+            "8": 1768285696.0,
+            "9": 1768285696.0,
+            "10": 1768285696.0,
+            "11": 1768285696.0,
+            "12": 1768285696.0,
+            "13": 1768285696.0,
+            "14": 1768285696.0,
+            "15": 1768285696.0,
+            "16": 1768285696.0,
+            "17": 1768285696.0,
+            "18": 1768285696.0,
+            "19": 1768285696.0,
+            "20": 1768285696.0,
+            "21": 1768285696.0,
+            "22": 1768285696.0,
+            "23": 1768285696.0,
+            "24": 1768285696.0,
+            "25": 1768285696.0,
+            "26": 1768285696.0,
+            "27": 1768285696.0,
+            "28": 1768285696.0,
+            "29": 1768285696.0,
+            "30": 1768285696.0,
+            "31": 1768285696.0,
+            "32": 1768285696.0,
+            "33": 1768285696.0,
+            "34": 1768285696.0,
+            "35": 1768285696.0,
+            "36": 1768285696.0,
+            "37": 1768285696.0,
+            "38": 1768285696.0,
+            "39": 1768285696.0,
+            "40": 1768285696.0,
+            "41": 1768285696.0,
+            "42": 1768285696.0,
+            "43": 1768285696.0,
+            "44": 1768285696.0,
+            "45": 1768285696.0,
+            "46": 1768285696.0,
+            "47": 1768285696.0,
+            "48": 1768285696.0,
+            "49": 1768285696.0,
+            "50": 1768285696.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 2336500736.0,
-            "2": 3079487488.0,
-            "3": 3079487488.0,
-            "4": 3079487488.0,
-            "5": 3079487488.0,
-            "6": 3079487488.0,
-            "7": 3079487488.0,
-            "8": 3079487488.0,
-            "9": 3079487488.0,
-            "10": 3079487488.0,
-            "11": 3079487488.0,
-            "12": 3079487488.0,
-            "13": 3079487488.0,
-            "14": 3079487488.0,
-            "15": 3079487488.0,
-            "16": 3079487488.0,
-            "17": 3079487488.0,
-            "18": 3079487488.0,
-            "19": 3079487488.0,
-            "20": 3079487488.0,
-            "21": 3079487488.0,
-            "22": 3079487488.0,
-            "23": 3079487488.0,
-            "24": 3079487488.0,
-            "25": 3079487488.0,
-            "26": 3079487488.0,
-            "27": 3079487488.0,
-            "28": 3079487488.0,
-            "29": 3079487488.0,
-            "30": 3079487488.0,
-            "31": 3079487488.0,
-            "32": 3079487488.0,
-            "33": 3079487488.0,
-            "34": 3079487488.0,
-            "35": 3079487488.0,
-            "36": 3079487488.0,
-            "37": 3079487488.0,
-            "38": 3079487488.0,
-            "39": 3079487488.0,
-            "40": 3079487488.0,
-            "41": 3079487488.0,
-            "42": 3079487488.0,
-            "43": 3079487488.0,
-            "44": 3079487488.0,
-            "45": 3079487488.0,
-            "46": 3079487488.0,
-            "47": 3079487488.0,
-            "48": 3079487488.0,
-            "49": 3079487488.0,
-            "50": 3079487488.0
+            "1": 2337549312.0,
+            "2": 3080536064.0,
+            "3": 3082107392.0,
+            "4": 3082107392.0,
+            "5": 3082107392.0,
+            "6": 3082107392.0,
+            "7": 3082107392.0,
+            "8": 3082107392.0,
+            "9": 3082107392.0,
+            "10": 3082107392.0,
+            "11": 3082107392.0,
+            "12": 3082107392.0,
+            "13": 3082107392.0,
+            "14": 3082107392.0,
+            "15": 3082107392.0,
+            "16": 3082108928.0,
+            "17": 3082108928.0,
+            "18": 3082108928.0,
+            "19": 3082108928.0,
+            "20": 3082108928.0,
+            "21": 3082108928.0,
+            "22": 3082108928.0,
+            "23": 3082108928.0,
+            "24": 3082108928.0,
+            "25": 3082108928.0,
+            "26": 3082108928.0,
+            "27": 3082108928.0,
+            "28": 3082108928.0,
+            "29": 3082108928.0,
+            "30": 3082108928.0,
+            "31": 3082108928.0,
+            "32": 3082108928.0,
+            "33": 3082108928.0,
+            "34": 3082108928.0,
+            "35": 3082108928.0,
+            "36": 3082108928.0,
+            "37": 3082108928.0,
+            "38": 3082108928.0,
+            "39": 3082108928.0,
+            "40": 3082108928.0,
+            "41": 3082108928.0,
+            "42": 3082108928.0,
+            "43": 3082108928.0,
+            "44": 3082108928.0,
+            "45": 3082108928.0,
+            "46": 3082108928.0,
+            "47": 3082108928.0,
+            "48": 3082108928.0,
+            "49": 3082108928.0,
+            "50": 3082108928.0
         }
     },
     "iteration-time": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 11.68301,
-            "2": 0.87796,
-            "3": 0.84756,
-            "4": 0.85513,
-            "5": 0.85643,
-            "6": 0.85366,
-            "7": 0.8468,
-            "8": 0.84974,
-            "9": 0.84989,
-            "10": 0.8464,
-            "11": 0.84369,
-            "12": 0.84972,
-            "13": 0.84311,
-            "14": 0.85648,
-            "15": 1.1084,
-            "16": 0.8827,
-            "17": 0.87952,
-            "18": 0.88554,
-            "19": 0.82673,
-            "20": 0.82222,
-            "21": 1.06414,
-            "22": 1.09134,
-            "23": 1.02591,
-            "24": 0.82601,
-            "25": 0.82277,
-            "26": 0.81844,
-            "27": 0.82627,
-            "28": 0.82854,
-            "29": 0.82653,
-            "30": 0.82247,
-            "31": 0.82906,
-            "32": 0.82363,
-            "33": 0.82944,
-            "34": 0.82401,
-            "35": 0.82902,
-            "36": 0.83537,
-            "37": 0.8265,
-            "38": 0.82728,
-            "39": 0.82087,
-            "40": 0.82525,
-            "41": 0.82691,
-            "42": 1.14473,
-            "43": 0.97566,
-            "44": 0.82343,
-            "45": 0.82956,
-            "46": 0.82572,
-            "47": 0.83635,
-            "48": 0.94255,
-            "49": 0.99753,
-            "50": 1.10127
+            "1": 10.51798,
+            "2": 0.89864,
+            "3": 0.7978,
+            "4": 0.74774,
+            "5": 0.73987,
+            "6": 0.74277,
+            "7": 0.76779,
+            "8": 0.74313,
+            "9": 1.58315,
+            "10": 0.73453,
+            "11": 0.73215,
+            "12": 0.72957,
+            "13": 0.72967,
+            "14": 0.73868,
+            "15": 0.73216,
+            "16": 1.10392,
+            "17": 0.73363,
+            "18": 0.73647,
+            "19": 0.76464,
+            "20": 0.73565,
+            "21": 0.72858,
+            "22": 0.72652,
+            "23": 0.72858,
+            "24": 0.74508,
+            "25": 0.74166,
+            "26": 0.7704,
+            "27": 1.15428,
+            "28": 1.146,
+            "29": 0.73283,
+            "30": 0.73304,
+            "31": 0.73237,
+            "32": 0.7343,
+            "33": 0.73304,
+            "34": 0.72879,
+            "35": 0.73286,
+            "36": 1.74169,
+            "37": 1.10377,
+            "38": 0.73148,
+            "39": 0.73227,
+            "40": 0.73028,
+            "41": 0.73026,
+            "42": 1.15127,
+            "43": 1.11655,
+            "44": 0.73185,
+            "45": 1.17599,
+            "46": 1.07292,
+            "47": 0.72983,
+            "48": 0.72804,
+            "49": 0.73205,
+            "50": 0.72929
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist/golden_values_dev_dgx_h100.json
index dc5d31f8f8b..4302b8e40ca 100644
--- a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist/golden_values_dev_dgx_h100.json
@@ -25,85 +25,85 @@
             "19": 10.44113,
             "20": 10.45448,
             "21": 10.43454,
-            "22": 10.40592,
-            "23": 10.39961,
-            "24": 10.37579,
-            "25": 10.38182,
-            "26": 10.35147,
+            "22": 10.40591,
+            "23": 10.39975,
+            "24": 10.37583,
+            "25": 10.38168,
+            "26": 10.3515,
             "27": 10.35388,
-            "28": 10.34937,
-            "29": 10.28711,
-            "30": 10.21159,
-            "31": 10.1726,
-            "32": 10.13421,
-            "33": 10.14744,
-            "34": 10.10737,
-            "35": 10.10581,
-            "36": 10.08735,
+            "28": 10.34965,
+            "29": 10.28701,
+            "30": 10.21143,
+            "31": 10.17272,
+            "32": 10.13416,
+            "33": 10.14725,
+            "34": 10.10738,
+            "35": 10.10592,
+            "36": 10.08739,
             "37": 10.08157,
-            "38": 10.07233,
-            "39": 10.00094,
-            "40": 9.98143,
-            "41": 9.92541,
-            "42": 9.87527,
-            "43": 9.88711,
-            "44": 9.80642,
-            "45": 9.82325,
-            "46": 9.73785,
-            "47": 9.74817,
-            "48": 9.71609,
-            "49": 9.74484,
-            "50": 9.72982,
-            "51": 9.71485,
-            "52": 9.66475,
-            "53": 9.60919,
-            "54": 9.62705,
-            "55": 9.61012,
-            "56": 9.617,
-            "57": 9.56786,
-            "58": 9.52731,
-            "59": 9.51668,
-            "60": 9.51865,
+            "38": 10.07245,
+            "39": 10.00093,
+            "40": 9.98138,
+            "41": 9.92543,
+            "42": 9.87534,
+            "43": 9.88716,
+            "44": 9.80646,
+            "45": 9.82342,
+            "46": 9.73786,
+            "47": 9.74811,
+            "48": 9.71614,
+            "49": 9.74493,
+            "50": 9.73,
+            "51": 9.71492,
+            "52": 9.66464,
+            "53": 9.60912,
+            "54": 9.62726,
+            "55": 9.6101,
+            "56": 9.61721,
+            "57": 9.56794,
+            "58": 9.52741,
+            "59": 9.51674,
+            "60": 9.51863,
             "61": 9.53132,
-            "62": 9.45016,
-            "63": 9.45725,
-            "64": 9.43435,
-            "65": 9.45801,
-            "66": 9.4368,
-            "67": 9.3968,
-            "68": 9.36474,
-            "69": 9.4095,
-            "70": 9.376,
-            "71": 9.41716,
-            "72": 9.42574,
-            "73": 9.37581,
-            "74": 9.41547,
-            "75": 9.37891,
-            "76": 9.28017,
-            "77": 9.32205,
-            "78": 9.35754,
-            "79": 9.32162,
-            "80": 9.31486,
-            "81": 9.2678,
-            "82": 9.34178,
-            "83": 9.32145,
-            "84": 9.24785,
-            "85": 9.35023,
-            "86": 9.22392,
-            "87": 9.3062,
-            "88": 9.29891,
-            "89": 9.22716,
-            "90": 9.28483,
-            "91": 9.23109,
-            "92": 9.27463,
-            "93": 9.19241,
-            "94": 9.23984,
-            "95": 9.28006,
-            "96": 9.17526,
-            "97": 9.21894,
-            "98": 9.17192,
-            "99": 9.16446,
-            "100": 9.14816
+            "62": 9.45018,
+            "63": 9.4572,
+            "64": 9.43437,
+            "65": 9.45816,
+            "66": 9.43669,
+            "67": 9.39678,
+            "68": 9.36478,
+            "69": 9.40956,
+            "70": 9.37595,
+            "71": 9.41738,
+            "72": 9.42564,
+            "73": 9.37611,
+            "74": 9.41543,
+            "75": 9.3788,
+            "76": 9.28012,
+            "77": 9.32212,
+            "78": 9.35744,
+            "79": 9.3215,
+            "80": 9.31497,
+            "81": 9.26785,
+            "82": 9.34183,
+            "83": 9.32151,
+            "84": 9.24796,
+            "85": 9.35033,
+            "86": 9.224,
+            "87": 9.30611,
+            "88": 9.29894,
+            "89": 9.22704,
+            "90": 9.28479,
+            "91": 9.2311,
+            "92": 9.27474,
+            "93": 9.19219,
+            "94": 9.23969,
+            "95": 9.28,
+            "96": 9.17525,
+            "97": 9.21888,
+            "98": 9.1721,
+            "99": 9.16455,
+            "100": 9.1482
         }
     },
     "num-zeros": {
@@ -132,85 +132,85 @@
             "19": 2547.0,
             "20": 2850.0,
             "21": 1990.0,
-            "22": 2884.0,
-            "23": 2857.0,
-            "24": 2685.0,
-            "25": 2514.0,
-            "26": 2958.0,
-            "27": 2673.0,
-            "28": 2723.0,
-            "29": 2571.0,
-            "30": 2858.0,
-            "31": 2157.0,
-            "32": 2357.0,
-            "33": 2242.0,
-            "34": 2464.0,
-            "35": 2544.0,
-            "36": 2933.0,
-            "37": 3293.0,
-            "38": 2730.0,
-            "39": 2795.0,
-            "40": 3310.0,
-            "41": 1816.0,
-            "42": 1467.0,
-            "43": 1817.0,
-            "44": 2633.0,
-            "45": 3576.0,
-            "46": 3015.0,
-            "47": 2805.0,
-            "48": 3071.0,
-            "49": 2974.0,
-            "50": 2267.0,
-            "51": 1923.0,
-            "52": 2515.0,
-            "53": 3615.0,
-            "54": 3426.0,
-            "55": 3436.0,
-            "56": 4411.0,
-            "57": 4095.0,
-            "58": 4308.0,
-            "59": 1687.0,
-            "60": 2431.0,
-            "61": 2151.0,
-            "62": 3986.0,
-            "63": 3558.0,
-            "64": 4286.0,
-            "65": 3052.0,
-            "66": 1720.0,
-            "67": 1910.0,
-            "68": 4193.0,
-            "69": 4347.0,
-            "70": 4596.0,
-            "71": 2078.0,
-            "72": 4406.0,
-            "73": 4062.0,
-            "74": 3358.0,
-            "75": 4606.0,
-            "76": 2187.0,
-            "77": 4854.0,
-            "78": 4098.0,
-            "79": 2652.0,
-            "80": 3776.0,
-            "81": 3550.0,
-            "82": 3031.0,
-            "83": 5345.0,
-            "84": 4396.0,
-            "85": 4354.0,
-            "86": 3332.0,
-            "87": 4815.0,
-            "88": 3303.0,
-            "89": 4611.0,
-            "90": 4346.0,
-            "91": 4361.0,
-            "92": 3502.0,
-            "93": 5624.0,
-            "94": 3733.0,
-            "95": 4728.0,
-            "96": 3534.0,
-            "97": 3873.0,
-            "98": 4525.0,
-            "99": 4329.0,
-            "100": 3365.0
+            "22": 2964.0,
+            "23": 2695.0,
+            "24": 2772.0,
+            "25": 2524.0,
+            "26": 2977.0,
+            "27": 2627.0,
+            "28": 2776.0,
+            "29": 2514.0,
+            "30": 2843.0,
+            "31": 2070.0,
+            "32": 2362.0,
+            "33": 2211.0,
+            "34": 2574.0,
+            "35": 2499.0,
+            "36": 2943.0,
+            "37": 3347.0,
+            "38": 2628.0,
+            "39": 2781.0,
+            "40": 3335.0,
+            "41": 1800.0,
+            "42": 1598.0,
+            "43": 1719.0,
+            "44": 2631.0,
+            "45": 3492.0,
+            "46": 2988.0,
+            "47": 2784.0,
+            "48": 2951.0,
+            "49": 2907.0,
+            "50": 2113.0,
+            "51": 1961.0,
+            "52": 2445.0,
+            "53": 3654.0,
+            "54": 3489.0,
+            "55": 3419.0,
+            "56": 4364.0,
+            "57": 4145.0,
+            "58": 4155.0,
+            "59": 1699.0,
+            "60": 2358.0,
+            "61": 2070.0,
+            "62": 4094.0,
+            "63": 3516.0,
+            "64": 4287.0,
+            "65": 2891.0,
+            "66": 1733.0,
+            "67": 1914.0,
+            "68": 4420.0,
+            "69": 4479.0,
+            "70": 4656.0,
+            "71": 2135.0,
+            "72": 4476.0,
+            "73": 4048.0,
+            "74": 3199.0,
+            "75": 4735.0,
+            "76": 2218.0,
+            "77": 4952.0,
+            "78": 4158.0,
+            "79": 2657.0,
+            "80": 3846.0,
+            "81": 3472.0,
+            "82": 2979.0,
+            "83": 5364.0,
+            "84": 4430.0,
+            "85": 4249.0,
+            "86": 3509.0,
+            "87": 4817.0,
+            "88": 3434.0,
+            "89": 4711.0,
+            "90": 4448.0,
+            "91": 4374.0,
+            "92": 3507.0,
+            "93": 5549.0,
+            "94": 3635.0,
+            "95": 4540.0,
+            "96": 3659.0,
+            "97": 3756.0,
+            "98": 4513.0,
+            "99": 4491.0,
+            "100": 3445.0
         }
     },
     "mem-allocated-bytes": {
@@ -218,106 +218,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 1784014336.0,
-            "2": 1784014336.0,
-            "3": 1784014336.0,
-            "4": 1784014336.0,
-            "5": 1784014336.0,
-            "6": 1784014336.0,
-            "7": 1784014336.0,
-            "8": 1784014336.0,
-            "9": 1784014336.0,
-            "10": 1784014336.0,
-            "11": 1784014336.0,
-            "12": 1784014336.0,
-            "13": 1784014336.0,
-            "14": 1784014336.0,
-            "15": 1784014336.0,
-            "16": 1784014336.0,
-            "17": 1784014336.0,
-            "18": 1784014336.0,
-            "19": 1784014336.0,
-            "20": 1784014336.0,
-            "21": 1784014336.0,
-            "22": 1784014336.0,
-            "23": 1784014336.0,
-            "24": 1784014336.0,
-            "25": 1784014336.0,
-            "26": 1784014336.0,
-            "27": 1784014336.0,
-            "28": 1784014336.0,
-            "29": 1784014336.0,
-            "30": 1784014336.0,
-            "31": 1784014336.0,
-            "32": 1784014336.0,
-            "33": 1784014336.0,
-            "34": 1784014336.0,
-            "35": 1784014336.0,
-            "36": 1784014336.0,
-            "37": 1784014336.0,
-            "38": 1784014336.0,
-            "39": 1784014336.0,
-            "40": 1784014336.0,
-            "41": 1784014336.0,
-            "42": 1784014336.0,
-            "43": 1784014336.0,
-            "44": 1784014336.0,
-            "45": 1784014336.0,
-            "46": 1784014336.0,
-            "47": 1784014336.0,
-            "48": 1784014336.0,
-            "49": 1784014336.0,
-            "50": 1784014336.0,
-            "51": 1784014336.0,
-            "52": 1784014336.0,
-            "53": 1784014336.0,
-            "54": 1784014336.0,
-            "55": 1784014336.0,
-            "56": 1784014336.0,
-            "57": 1784014336.0,
-            "58": 1784014336.0,
-            "59": 1784014336.0,
-            "60": 1784014336.0,
-            "61": 1784014336.0,
-            "62": 1784014336.0,
-            "63": 1784014336.0,
-            "64": 1784014336.0,
-            "65": 1784014336.0,
-            "66": 1784014336.0,
-            "67": 1784014336.0,
-            "68": 1784014336.0,
-            "69": 1784014336.0,
-            "70": 1784014336.0,
-            "71": 1784014336.0,
-            "72": 1784014336.0,
-            "73": 1784014336.0,
-            "74": 1784014336.0,
-            "75": 1784014336.0,
-            "76": 1784014336.0,
-            "77": 1784014336.0,
-            "78": 1784014336.0,
-            "79": 1784014336.0,
-            "80": 1784014336.0,
-            "81": 1784014336.0,
-            "82": 1784014336.0,
-            "83": 1784014336.0,
-            "84": 1784014336.0,
-            "85": 1784014336.0,
-            "86": 1784014336.0,
-            "87": 1784014336.0,
-            "88": 1784014336.0,
-            "89": 1784014336.0,
-            "90": 1784014336.0,
-            "91": 1784014336.0,
-            "92": 1784014336.0,
-            "93": 1784014336.0,
-            "94": 1784014336.0,
-            "95": 1784014336.0,
-            "96": 1784014336.0,
-            "97": 1784014336.0,
-            "98": 1784014336.0,
-            "99": 1784014336.0,
-            "100": 1784014336.0
+            "1": 1785063936.0,
+            "2": 1785063936.0,
+            "3": 1785063936.0,
+            "4": 1785063936.0,
+            "5": 1785063936.0,
+            "6": 1785063936.0,
+            "7": 1785063936.0,
+            "8": 1785063936.0,
+            "9": 1785063936.0,
+            "10": 1785063936.0,
+            "11": 1785063936.0,
+            "12": 1785063936.0,
+            "13": 1785063936.0,
+            "14": 1785063936.0,
+            "15": 1785063936.0,
+            "16": 1785063936.0,
+            "17": 1785063936.0,
+            "18": 1785063936.0,
+            "19": 1785063936.0,
+            "20": 1785063936.0,
+            "21": 1785063936.0,
+            "22": 1785063936.0,
+            "23": 1785063936.0,
+            "24": 1785063936.0,
+            "25": 1785063936.0,
+            "26": 1785063936.0,
+            "27": 1785063936.0,
+            "28": 1785588224.0,
+            "29": 1785063936.0,
+            "30": 1785063936.0,
+            "31": 1785063936.0,
+            "32": 1785063936.0,
+            "33": 1785063936.0,
+            "34": 1785063936.0,
+            "35": 1785063936.0,
+            "36": 1785063936.0,
+            "37": 1785063936.0,
+            "38": 1785063936.0,
+            "39": 1785063936.0,
+            "40": 1785063936.0,
+            "41": 1785063936.0,
+            "42": 1785063936.0,
+            "43": 1785063936.0,
+            "44": 1785063936.0,
+            "45": 1785063936.0,
+            "46": 1785063936.0,
+            "47": 1785063936.0,
+            "48": 1785063936.0,
+            "49": 1785063936.0,
+            "50": 1785063936.0,
+            "51": 1785063936.0,
+            "52": 1785063936.0,
+            "53": 1785063936.0,
+            "54": 1785063936.0,
+            "55": 1785063936.0,
+            "56": 1785063936.0,
+            "57": 1785063936.0,
+            "58": 1785063936.0,
+            "59": 1785063936.0,
+            "60": 1785063936.0,
+            "61": 1785063936.0,
+            "62": 1785063936.0,
+            "63": 1785063936.0,
+            "64": 1785063936.0,
+            "65": 1785063936.0,
+            "66": 1785063936.0,
+            "67": 1785063936.0,
+            "68": 1785063936.0,
+            "69": 1785063936.0,
+            "70": 1785063936.0,
+            "71": 1785063936.0,
+            "72": 1785063936.0,
+            "73": 1785063936.0,
+            "74": 1785063936.0,
+            "75": 1785063936.0,
+            "76": 1785063936.0,
+            "77": 1785063936.0,
+            "78": 1785063936.0,
+            "79": 1785063936.0,
+            "80": 1785063936.0,
+            "81": 1785063936.0,
+            "82": 1785063936.0,
+            "83": 1785063936.0,
+            "84": 1785063936.0,
+            "85": 1785063936.0,
+            "86": 1785063936.0,
+            "87": 1785063936.0,
+            "88": 1785063936.0,
+            "89": 1785063936.0,
+            "90": 1785063936.0,
+            "91": 1785063936.0,
+            "92": 1785063936.0,
+            "93": 1785063936.0,
+            "94": 1785063936.0,
+            "95": 1785063936.0,
+            "96": 1785063936.0,
+            "97": 1785063936.0,
+            "98": 1785063936.0,
+            "99": 1785063936.0,
+            "100": 1785063936.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -325,106 +325,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 2365860864.0,
-            "2": 3108323328.0,
-            "3": 3108323328.0,
-            "4": 3108323328.0,
-            "5": 3108323328.0,
-            "6": 3108323328.0,
-            "7": 3108323328.0,
-            "8": 3108323328.0,
-            "9": 3108323328.0,
-            "10": 3108323328.0,
-            "11": 3108323328.0,
-            "12": 3108323328.0,
-            "13": 3108323328.0,
-            "14": 3108323328.0,
-            "15": 3108323328.0,
-            "16": 3108323328.0,
-            "17": 3108323328.0,
-            "18": 3108323328.0,
-            "19": 3108323328.0,
-            "20": 3108323328.0,
-            "21": 3108323328.0,
-            "22": 3108323328.0,
-            "23": 3108323328.0,
-            "24": 3108323328.0,
-            "25": 3108323328.0,
-            "26": 3108323328.0,
-            "27": 3108323328.0,
-            "28": 3108323328.0,
-            "29": 3108323328.0,
-            "30": 3108323328.0,
-            "31": 3108323328.0,
-            "32": 3108323328.0,
-            "33": 3108323328.0,
-            "34": 3108323328.0,
-            "35": 3108323328.0,
-            "36": 3108323328.0,
-            "37": 3108323328.0,
-            "38": 3108323328.0,
-            "39": 3108323328.0,
-            "40": 3108323328.0,
-            "41": 3108323328.0,
-            "42": 3108323328.0,
-            "43": 3108323328.0,
-            "44": 3108323328.0,
-            "45": 3108323328.0,
-            "46": 3108323328.0,
-            "47": 3108323328.0,
-            "48": 3108323328.0,
-            "49": 3108323328.0,
-            "50": 3108323328.0,
-            "51": 3108323328.0,
-            "52": 3108323328.0,
-            "53": 3108323328.0,
-            "54": 3108323328.0,
-            "55": 3108323328.0,
-            "56": 3108323328.0,
-            "57": 3108842496.0,
-            "58": 3108842496.0,
-            "59": 3108842496.0,
-            "60": 3108842496.0,
-            "61": 3108842496.0,
-            "62": 3108842496.0,
-            "63": 3108842496.0,
-            "64": 3108842496.0,
-            "65": 3108842496.0,
-            "66": 3108842496.0,
-            "67": 3108842496.0,
-            "68": 3108842496.0,
-            "69": 3108842496.0,
-            "70": 3108842496.0,
-            "71": 3108842496.0,
-            "72": 3108842496.0,
-            "73": 3108842496.0,
-            "74": 3108842496.0,
-            "75": 3108844544.0,
-            "76": 3108844544.0,
-            "77": 3108844544.0,
-            "78": 3108844544.0,
-            "79": 3108844544.0,
-            "80": 3108844544.0,
-            "81": 3108844544.0,
-            "82": 3108844544.0,
-            "83": 3108844544.0,
-            "84": 3108844544.0,
-            "85": 3108844544.0,
-            "86": 3108844544.0,
-            "87": 3108844544.0,
-            "88": 3108844544.0,
-            "89": 3108844544.0,
-            "90": 3108844544.0,
-            "91": 3108844544.0,
-            "92": 3108844544.0,
-            "93": 3108844544.0,
-            "94": 3108844544.0,
-            "95": 3108844544.0,
-            "96": 3108844544.0,
-            "97": 3108844544.0,
-            "98": 3108844544.0,
-            "99": 3108844544.0,
-            "100": 3108844544.0
+            "1": 2366910464.0,
+            "2": 3109372928.0,
+            "3": 3109372928.0,
+            "4": 3109372928.0,
+            "5": 3109372928.0,
+            "6": 3109892608.0,
+            "7": 3109892608.0,
+            "8": 3111465472.0,
+            "9": 3111465472.0,
+            "10": 3111465472.0,
+            "11": 3111465472.0,
+            "12": 3111470080.0,
+            "13": 3111470080.0,
+            "14": 3111470080.0,
+            "15": 3111470080.0,
+            "16": 3111470080.0,
+            "17": 3111470080.0,
+            "18": 3111470080.0,
+            "19": 3111470080.0,
+            "20": 3111470080.0,
+            "21": 3111470080.0,
+            "22": 3111470080.0,
+            "23": 3111470080.0,
+            "24": 3111470080.0,
+            "25": 3111470080.0,
+            "26": 3111470080.0,
+            "27": 3111470080.0,
+            "28": 3111470080.0,
+            "29": 3111470080.0,
+            "30": 3111470080.0,
+            "31": 3111470080.0,
+            "32": 3111470080.0,
+            "33": 3111470080.0,
+            "34": 3111470080.0,
+            "35": 3111470080.0,
+            "36": 3111988224.0,
+            "37": 3111988224.0,
+            "38": 3111988224.0,
+            "39": 3111988224.0,
+            "40": 3111988224.0,
+            "41": 3111988224.0,
+            "42": 3111988224.0,
+            "43": 3111988224.0,
+            "44": 3111988224.0,
+            "45": 3111988224.0,
+            "46": 3111988224.0,
+            "47": 3111988224.0,
+            "48": 3111988224.0,
+            "49": 3111988224.0,
+            "50": 3111988224.0,
+            "51": 3111988224.0,
+            "52": 3111988224.0,
+            "53": 3111988224.0,
+            "54": 3111988224.0,
+            "55": 3111988224.0,
+            "56": 3111988224.0,
+            "57": 3111988224.0,
+            "58": 3111988224.0,
+            "59": 3111988224.0,
+            "60": 3111988224.0,
+            "61": 3111988224.0,
+            "62": 3111988224.0,
+            "63": 3111988224.0,
+            "64": 3111988224.0,
+            "65": 3111988224.0,
+            "66": 3111988224.0,
+            "67": 3111988224.0,
+            "68": 3111988224.0,
+            "69": 3111988224.0,
+            "70": 3111988224.0,
+            "71": 3111988224.0,
+            "72": 3111988224.0,
+            "73": 3111988224.0,
+            "74": 3111988224.0,
+            "75": 3111988224.0,
+            "76": 3111988224.0,
+            "77": 3111988224.0,
+            "78": 3111988224.0,
+            "79": 3111988224.0,
+            "80": 3111988224.0,
+            "81": 3111988224.0,
+            "82": 3111988224.0,
+            "83": 3111988224.0,
+            "84": 3111988224.0,
+            "85": 3111988224.0,
+            "86": 3111988224.0,
+            "87": 3111988224.0,
+            "88": 3111988224.0,
+            "89": 3111988224.0,
+            "90": 3111988224.0,
+            "91": 3111988224.0,
+            "92": 3111988224.0,
+            "93": 3111988224.0,
+            "94": 3111988224.0,
+            "95": 3111988224.0,
+            "96": 3111988224.0,
+            "97": 3111988224.0,
+            "98": 3111988224.0,
+            "99": 3111988224.0,
+            "100": 3111988224.0
         }
     },
     "iteration-time": {
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 11.84806,
-            "2": 1.03522,
-            "3": 1.00793,
-            "4": 1.00939,
-            "5": 1.00929,
-            "6": 1.01517,
-            "7": 1.01009,
-            "8": 1.01561,
-            "9": 1.02131,
-            "10": 1.01787,
-            "11": 1.01149,
-            "12": 1.0128,
-            "13": 1.01358,
-            "14": 1.01768,
-            "15": 1.23565,
-            "16": 1.01096,
-            "17": 1.19479,
-            "18": 1.01674,
-            "19": 1.01808,
-            "20": 1.23016,
-            "21": 1.01908,
-            "22": 1.11536,
-            "23": 1.0888,
-            "24": 1.02965,
-            "25": 1.03972,
-            "26": 1.00766,
-            "27": 1.00981,
-            "28": 1.01339,
-            "29": 1.01801,
-            "30": 1.01655,
-            "31": 1.01796,
-            "32": 1.01286,
-            "33": 1.01823,
-            "34": 1.00604,
-            "35": 1.01493,
-            "36": 1.01106,
-            "37": 1.00783,
-            "38": 1.01573,
-            "39": 1.01525,
-            "40": 1.09842,
-            "41": 1.39919,
-            "42": 1.22658,
-            "43": 1.00841,
-            "44": 0.99932,
-            "45": 1.00156,
-            "46": 1.18473,
-            "47": 1.01528,
-            "48": 1.00768,
-            "49": 1.00498,
-            "50": 0.9957,
-            "51": 1.29149,
-            "52": 1.10051,
-            "53": 1.00264,
-            "54": 1.00531,
-            "55": 1.30558,
-            "56": 0.99836,
-            "57": 1.00645,
-            "58": 1.00413,
-            "59": 1.00106,
-            "60": 1.00076,
-            "61": 1.32205,
-            "62": 1.00795,
-            "63": 1.2523,
-            "64": 1.01369,
-            "65": 1.01151,
-            "66": 1.01484,
-            "67": 1.00831,
-            "68": 1.01849,
-            "69": 1.01821,
-            "70": 1.01316,
-            "71": 1.01068,
-            "72": 1.01792,
-            "73": 1.47417,
-            "74": 1.01143,
-            "75": 1.14077,
-            "76": 1.01286,
-            "77": 1.08819,
-            "78": 1.01005,
-            "79": 1.0069,
-            "80": 1.01196,
-            "81": 1.0882,
-            "82": 1.00417,
-            "83": 1.29479,
-            "84": 1.0044,
-            "85": 1.0103,
-            "86": 1.00862,
-            "87": 1.01863,
-            "88": 1.2549,
-            "89": 1.0075,
-            "90": 1.00874,
-            "91": 1.0111,
-            "92": 1.01049,
-            "93": 1.01084,
-            "94": 1.01043,
-            "95": 1.01246,
-            "96": 1.01317,
-            "97": 1.09821,
-            "98": 1.01406,
-            "99": 1.00578,
-            "100": 1.09442
+            "1": 11.18542,
+            "2": 0.99156,
+            "3": 0.93327,
+            "4": 0.90681,
+            "5": 0.90504,
+            "6": 0.90415,
+            "7": 0.90281,
+            "8": 1.14692,
+            "9": 1.44306,
+            "10": 0.89873,
+            "11": 0.90113,
+            "12": 0.89984,
+            "13": 1.24688,
+            "14": 0.90399,
+            "15": 0.90327,
+            "16": 0.89945,
+            "17": 0.90194,
+            "18": 0.89984,
+            "19": 0.89878,
+            "20": 0.89865,
+            "21": 0.90167,
+            "22": 0.90176,
+            "23": 0.90423,
+            "24": 2.02738,
+            "25": 0.90411,
+            "26": 0.90354,
+            "27": 0.90203,
+            "28": 1.26668,
+            "29": 0.89854,
+            "30": 1.45828,
+            "31": 0.90574,
+            "32": 0.90137,
+            "33": 1.70784,
+            "34": 0.89924,
+            "35": 0.90059,
+            "36": 0.90525,
+            "37": 0.90801,
+            "38": 0.90691,
+            "39": 0.9048,
+            "40": 1.47233,
+            "41": 0.91116,
+            "42": 1.22468,
+            "43": 1.0011,
+            "44": 1.22804,
+            "45": 1.12037,
+            "46": 1.00115,
+            "47": 0.91003,
+            "48": 0.91208,
+            "49": 0.91545,
+            "50": 0.91,
+            "51": 0.91471,
+            "52": 0.91238,
+            "53": 0.90865,
+            "54": 0.91588,
+            "55": 0.91889,
+            "56": 0.91882,
+            "57": 0.92072,
+            "58": 0.9202,
+            "59": 0.92355,
+            "60": 0.92097,
+            "61": 0.91924,
+            "62": 0.91496,
+            "63": 0.91648,
+            "64": 0.91615,
+            "65": 0.91333,
+            "66": 0.91743,
+            "67": 0.9094,
+            "68": 0.91122,
+            "69": 0.90894,
+            "70": 0.91968,
+            "71": 0.92199,
+            "72": 0.91976,
+            "73": 0.92156,
+            "74": 0.91995,
+            "75": 0.90852,
+            "76": 0.90983,
+            "77": 1.19595,
+            "78": 0.9092,
+            "79": 1.16564,
+            "80": 1.06882,
+            "81": 0.90637,
+            "82": 0.90812,
+            "83": 0.91,
+            "84": 0.90847,
+            "85": 0.88526,
+            "86": 0.87691,
+            "87": 0.88881,
+            "88": 0.87995,
+            "89": 0.9042,
+            "90": 0.90269,
+            "91": 0.90587,
+            "92": 0.90035,
+            "93": 0.89985,
+            "94": 0.90093,
+            "95": 0.90088,
+            "96": 0.89612,
+            "97": 0.89401,
+            "98": 0.89773,
+            "99": 0.90081,
+            "100": 0.8988
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist/golden_values_dev_dgx_h100_2nd.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist/golden_values_dev_dgx_h100_2nd.json
new file mode 100644
index 00000000000..a5b9c2f1ab2
--- /dev/null
+++ b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist/golden_values_dev_dgx_h100_2nd.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 9.71492,
+            "52": 9.66464,
+            "53": 9.60912,
+            "54": 9.62726,
+            "55": 9.6101,
+            "56": 9.61721,
+            "57": 9.56794,
+            "58": 9.52741,
+            "59": 9.51674,
+            "60": 9.51863,
+            "61": 9.53132,
+            "62": 9.45018,
+            "63": 9.4572,
+            "64": 9.43437,
+            "65": 9.45816,
+            "66": 9.43669,
+            "67": 9.39678,
+            "68": 9.36478,
+            "69": 9.40956,
+            "70": 9.37595,
+            "71": 9.41738,
+            "72": 9.42564,
+            "73": 9.37611,
+            "74": 9.41543,
+            "75": 9.3788,
+            "76": 9.28012,
+            "77": 9.32212,
+            "78": 9.35744,
+            "79": 9.3215,
+            "80": 9.31497,
+            "81": 9.26785,
+            "82": 9.34183,
+            "83": 9.32151,
+            "84": 9.24796,
+            "85": 9.35033,
+            "86": 9.224,
+            "87": 9.30611,
+            "88": 9.29894,
+            "89": 9.22704,
+            "90": 9.28479,
+            "91": 9.2311,
+            "92": 9.27474,
+            "93": 9.19219,
+            "94": 9.23969,
+            "95": 9.28,
+            "96": 9.17525,
+            "97": 9.21888,
+            "98": 9.1721,
+            "99": 9.16455,
+            "100": 9.1482
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 1961.0,
+            "52": 2445.0,
+            "53": 3654.0,
+            "54": 3489.0,
+            "55": 3419.0,
+            "56": 4364.0,
+            "57": 4145.0,
+            "58": 4155.0,
+            "59": 1699.0,
+            "60": 2358.0,
+            "61": 2070.0,
+            "62": 4094.0,
+            "63": 3516.0,
+            "64": 4287.0,
+            "65": 2891.0,
+            "66": 1733.0,
+            "67": 1914.0,
+            "68": 4420.0,
+            "69": 4479.0,
+            "70": 4656.0,
+            "71": 2135.0,
+            "72": 4476.0,
+            "73": 4048.0,
+            "74": 3199.0,
+            "75": 4735.0,
+            "76": 2218.0,
+            "77": 4952.0,
+            "78": 4158.0,
+            "79": 2657.0,
+            "80": 3846.0,
+            "81": 3472.0,
+            "82": 2979.0,
+            "83": 5364.0,
+            "84": 4430.0,
+            "85": 4249.0,
+            "86": 3509.0,
+            "87": 4817.0,
+            "88": 3434.0,
+            "89": 4711.0,
+            "90": 4448.0,
+            "91": 4374.0,
+            "92": 3507.0,
+            "93": 5549.0,
+            "94": 3635.0,
+            "95": 4540.0,
+            "96": 3659.0,
+            "97": 3756.0,
+            "98": 4513.0,
+            "99": 4491.0,
+            "100": 3445.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 1786112512.0,
+            "52": 1786112512.0,
+            "53": 1786112512.0,
+            "54": 1786112512.0,
+            "55": 1786112512.0,
+            "56": 1786112512.0,
+            "57": 1786112512.0,
+            "58": 1786112512.0,
+            "59": 1786112512.0,
+            "60": 1786112512.0,
+            "61": 1786112512.0,
+            "62": 1786112512.0,
+            "63": 1786112512.0,
+            "64": 1786112512.0,
+            "65": 1786112512.0,
+            "66": 1786112512.0,
+            "67": 1786112512.0,
+            "68": 1786112512.0,
+            "69": 1786112512.0,
+            "70": 1786112512.0,
+            "71": 1786112512.0,
+            "72": 1786112512.0,
+            "73": 1786112512.0,
+            "74": 1786112512.0,
+            "75": 1786112512.0,
+            "76": 1786112512.0,
+            "77": 1786112512.0,
+            "78": 1786112512.0,
+            "79": 1786112512.0,
+            "80": 1786112512.0,
+            "81": 1786112512.0,
+            "82": 1786112512.0,
+            "83": 1786112512.0,
+            "84": 1786112512.0,
+            "85": 1786112512.0,
+            "86": 1786112512.0,
+            "87": 1786112512.0,
+            "88": 1786112512.0,
+            "89": 1786112512.0,
+            "90": 1786112512.0,
+            "91": 1786112512.0,
+            "92": 1786112512.0,
+            "93": 1786112512.0,
+            "94": 1786112512.0,
+            "95": 1786112512.0,
+            "96": 1786112512.0,
+            "97": 1786112512.0,
+            "98": 1786112512.0,
+            "99": 1786112512.0,
+            "100": 1786112512.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 3110419456.0,
+            "52": 3110421504.0,
+            "53": 3110421504.0,
+            "54": 3110421504.0,
+            "55": 3110421504.0,
+            "56": 3110421504.0,
+            "57": 3110421504.0,
+            "58": 3110421504.0,
+            "59": 3110421504.0,
+            "60": 3110421504.0,
+            "61": 3110421504.0,
+            "62": 3110421504.0,
+            "63": 3110421504.0,
+            "64": 3110421504.0,
+            "65": 3110421504.0,
+            "66": 3110421504.0,
+            "67": 3110421504.0,
+            "68": 3110421504.0,
+            "69": 3110421504.0,
+            "70": 3110421504.0,
+            "71": 3110421504.0,
+            "72": 3110421504.0,
+            "73": 3110421504.0,
+            "74": 3110421504.0,
+            "75": 3110421504.0,
+            "76": 3110421504.0,
+            "77": 3110421504.0,
+            "78": 3110421504.0,
+            "79": 3110421504.0,
+            "80": 3110421504.0,
+            "81": 3110421504.0,
+            "82": 3110421504.0,
+            "83": 3110421504.0,
+            "84": 3110421504.0,
+            "85": 3110421504.0,
+            "86": 3110421504.0,
+            "87": 3110421504.0,
+            "88": 3110421504.0,
+            "89": 3110421504.0,
+            "90": 3110421504.0,
+            "91": 3110421504.0,
+            "92": 3110421504.0,
+            "93": 3110421504.0,
+            "94": 3110421504.0,
+            "95": 3110421504.0,
+            "96": 3110421504.0,
+            "97": 3110421504.0,
+            "98": 3110421504.0,
+            "99": 3110421504.0,
+            "100": 3110421504.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 10.75043,
+            "52": 1.0039,
+            "53": 0.95516,
+            "54": 0.91159,
+            "55": 0.90836,
+            "56": 0.94785,
+            "57": 1.16936,
+            "58": 1.19663,
+            "59": 1.28755,
+            "60": 0.88429,
+            "61": 0.8835,
+            "62": 0.91894,
+            "63": 0.88317,
+            "64": 0.89119,
+            "65": 0.88844,
+            "66": 1.26569,
+            "67": 0.88764,
+            "68": 0.88401,
+            "69": 0.89243,
+            "70": 0.8883,
+            "71": 0.89113,
+            "72": 0.91101,
+            "73": 0.89072,
+            "74": 2.04797,
+            "75": 0.90184,
+            "76": 0.93408,
+            "77": 1.2869,
+            "78": 0.95072,
+            "79": 0.96458,
+            "80": 0.90559,
+            "81": 0.95787,
+            "82": 0.90855,
+            "83": 1.71942,
+            "84": 0.94521,
+            "85": 0.88307,
+            "86": 0.88152,
+            "87": 0.89039,
+            "88": 0.88803,
+            "89": 0.90894,
+            "90": 0.89894,
+            "91": 1.05886,
+            "92": 1.19588,
+            "93": 1.37335,
+            "94": 0.8898,
+            "95": 1.07004,
+            "96": 0.88806,
+            "97": 0.89083,
+            "98": 0.90547,
+            "99": 0.94317,
+            "100": 0.90081
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist_local_spec/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist_local_spec/golden_values_dev_dgx_h100.json
index 27a34e32198..fe766022589 100644
--- a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist_local_spec/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist_local_spec/golden_values_dev_dgx_h100.json
@@ -44,66 +44,66 @@
             "38": 10.07257,
             "39": 10.0013,
             "40": 9.9816,
-            "41": 9.92549,
-            "42": 9.87529,
-            "43": 9.88742,
-            "44": 9.80641,
-            "45": 9.82342,
-            "46": 9.73815,
-            "47": 9.74831,
-            "48": 9.71619,
-            "49": 9.74504,
+            "41": 9.92551,
+            "42": 9.87537,
+            "43": 9.88725,
+            "44": 9.80659,
+            "45": 9.82349,
+            "46": 9.73821,
+            "47": 9.74829,
+            "48": 9.71628,
+            "49": 9.74489,
             "50": 9.73004,
-            "51": 9.71503,
-            "52": 9.66484,
-            "53": 9.60935,
-            "54": 9.62735,
-            "55": 9.61036,
-            "56": 9.61745,
+            "51": 9.71501,
+            "52": 9.66488,
+            "53": 9.60917,
+            "54": 9.62733,
+            "55": 9.61022,
+            "56": 9.61723,
             "57": 9.56794,
-            "58": 9.52742,
-            "59": 9.51685,
-            "60": 9.51873,
-            "61": 9.53147,
-            "62": 9.45024,
-            "63": 9.45733,
-            "64": 9.43455,
-            "65": 9.4582,
-            "66": 9.43694,
-            "67": 9.39693,
-            "68": 9.36491,
-            "69": 9.40957,
-            "70": 9.37605,
-            "71": 9.41735,
-            "72": 9.42581,
-            "73": 9.37614,
-            "74": 9.41544,
-            "75": 9.37897,
-            "76": 9.28015,
-            "77": 9.32215,
-            "78": 9.35752,
-            "79": 9.32154,
-            "80": 9.31496,
-            "81": 9.26776,
-            "82": 9.34189,
-            "83": 9.32163,
-            "84": 9.24791,
-            "85": 9.35021,
-            "86": 9.22383,
-            "87": 9.30627,
-            "88": 9.29884,
+            "58": 9.52733,
+            "59": 9.51677,
+            "60": 9.5188,
+            "61": 9.53149,
+            "62": 9.45031,
+            "63": 9.45717,
+            "64": 9.43441,
+            "65": 9.45812,
+            "66": 9.43672,
+            "67": 9.39687,
+            "68": 9.36469,
+            "69": 9.40964,
+            "70": 9.37606,
+            "71": 9.41737,
+            "72": 9.42585,
+            "73": 9.37601,
+            "74": 9.4154,
+            "75": 9.37896,
+            "76": 9.28004,
+            "77": 9.32212,
+            "78": 9.35755,
+            "79": 9.3216,
+            "80": 9.31491,
+            "81": 9.26783,
+            "82": 9.342,
+            "83": 9.32159,
+            "84": 9.24786,
+            "85": 9.35018,
+            "86": 9.22384,
+            "87": 9.30618,
+            "88": 9.29905,
             "89": 9.22708,
-            "90": 9.28475,
-            "91": 9.23116,
-            "92": 9.27477,
-            "93": 9.1922,
-            "94": 9.23984,
-            "95": 9.27996,
-            "96": 9.17534,
-            "97": 9.21892,
-            "98": 9.1719,
-            "99": 9.1646,
-            "100": 9.14809
+            "90": 9.28498,
+            "91": 9.23123,
+            "92": 9.27487,
+            "93": 9.19233,
+            "94": 9.23985,
+            "95": 9.28002,
+            "96": 9.17532,
+            "97": 9.21898,
+            "98": 9.17203,
+            "99": 9.16444,
+            "100": 9.14821
         }
     },
     "num-zeros": {
@@ -150,67 +150,67 @@
             "37": 3305.0,
             "38": 2682.0,
             "39": 2805.0,
-            "40": 3425.0,
-            "41": 1812.0,
-            "42": 1481.0,
-            "43": 1726.0,
-            "44": 2575.0,
-            "45": 3438.0,
-            "46": 2960.0,
-            "47": 2792.0,
-            "48": 3107.0,
-            "49": 2854.0,
-            "50": 2145.0,
-            "51": 1964.0,
-            "52": 2437.0,
-            "53": 3823.0,
-            "54": 3427.0,
-            "55": 3392.0,
-            "56": 4421.0,
-            "57": 4003.0,
-            "58": 4224.0,
-            "59": 1816.0,
-            "60": 2520.0,
-            "61": 2106.0,
-            "62": 4011.0,
-            "63": 3637.0,
-            "64": 4375.0,
-            "65": 3080.0,
-            "66": 1753.0,
-            "67": 1913.0,
-            "68": 4407.0,
-            "69": 4475.0,
-            "70": 4419.0,
-            "71": 2152.0,
-            "72": 4399.0,
-            "73": 4134.0,
-            "74": 3315.0,
-            "75": 4815.0,
-            "76": 2322.0,
-            "77": 5019.0,
-            "78": 4171.0,
-            "79": 2788.0,
-            "80": 3831.0,
-            "81": 3411.0,
-            "82": 3004.0,
-            "83": 5145.0,
-            "84": 4399.0,
-            "85": 4295.0,
-            "86": 3410.0,
-            "87": 4880.0,
-            "88": 3350.0,
-            "89": 4659.0,
-            "90": 4370.0,
-            "91": 4273.0,
-            "92": 3325.0,
-            "93": 5509.0,
-            "94": 3804.0,
-            "95": 4711.0,
-            "96": 3631.0,
-            "97": 3774.0,
-            "98": 4477.0,
-            "99": 4459.0,
-            "100": 3220.0
+            "40": 3430.0,
+            "41": 1767.0,
+            "42": 1516.0,
+            "43": 1798.0,
+            "44": 2790.0,
+            "45": 3578.0,
+            "46": 3016.0,
+            "47": 2890.0,
+            "48": 3065.0,
+            "49": 2914.0,
+            "50": 2208.0,
+            "51": 1900.0,
+            "52": 2483.0,
+            "53": 3763.0,
+            "54": 3478.0,
+            "55": 3412.0,
+            "56": 4400.0,
+            "57": 4019.0,
+            "58": 4253.0,
+            "59": 1805.0,
+            "60": 2457.0,
+            "61": 2045.0,
+            "62": 3994.0,
+            "63": 3650.0,
+            "64": 4466.0,
+            "65": 2968.0,
+            "66": 1837.0,
+            "67": 1961.0,
+            "68": 4347.0,
+            "69": 4441.0,
+            "70": 4452.0,
+            "71": 2131.0,
+            "72": 4523.0,
+            "73": 4105.0,
+            "74": 3300.0,
+            "75": 4651.0,
+            "76": 2216.0,
+            "77": 4932.0,
+            "78": 4218.0,
+            "79": 2784.0,
+            "80": 3824.0,
+            "81": 3472.0,
+            "82": 2976.0,
+            "83": 5282.0,
+            "84": 4464.0,
+            "85": 4344.0,
+            "86": 3460.0,
+            "87": 4774.0,
+            "88": 3426.0,
+            "89": 4600.0,
+            "90": 4360.0,
+            "91": 4283.0,
+            "92": 3362.0,
+            "93": 5633.0,
+            "94": 3676.0,
+            "95": 4610.0,
+            "96": 3449.0,
+            "97": 3751.0,
+            "98": 4524.0,
+            "99": 4399.0,
+            "100": 3295.0
         }
     },
     "mem-allocated-bytes": {
@@ -218,106 +218,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 1767237120.0,
-            "2": 1767237120.0,
-            "3": 1767237120.0,
-            "4": 1767237120.0,
-            "5": 1767237120.0,
-            "6": 1767237120.0,
-            "7": 1767237120.0,
-            "8": 1767237120.0,
-            "9": 1767237120.0,
-            "10": 1767237120.0,
-            "11": 1767237120.0,
-            "12": 1767237120.0,
-            "13": 1767237120.0,
-            "14": 1767237120.0,
-            "15": 1767237120.0,
-            "16": 1767237120.0,
-            "17": 1767237120.0,
-            "18": 1767237120.0,
-            "19": 1767237120.0,
-            "20": 1767237120.0,
-            "21": 1767237120.0,
-            "22": 1767237120.0,
-            "23": 1767237120.0,
-            "24": 1767237120.0,
-            "25": 1767237120.0,
-            "26": 1767237120.0,
-            "27": 1767237120.0,
-            "28": 1767237120.0,
-            "29": 1767237120.0,
-            "30": 1767237120.0,
-            "31": 1767237120.0,
-            "32": 1767237120.0,
-            "33": 1767237120.0,
-            "34": 1767237120.0,
-            "35": 1767237120.0,
-            "36": 1767237120.0,
-            "37": 1767237120.0,
-            "38": 1767237120.0,
-            "39": 1767237120.0,
-            "40": 1767237120.0,
-            "41": 1767237120.0,
-            "42": 1767237120.0,
-            "43": 1767237120.0,
-            "44": 1767237120.0,
-            "45": 1767237120.0,
-            "46": 1767237120.0,
-            "47": 1767237120.0,
-            "48": 1767237120.0,
-            "49": 1767237120.0,
-            "50": 1767237120.0,
-            "51": 1767237120.0,
-            "52": 1767237120.0,
-            "53": 1767237120.0,
-            "54": 1767237120.0,
-            "55": 1767237120.0,
-            "56": 1767237120.0,
-            "57": 1767237120.0,
-            "58": 1767237120.0,
-            "59": 1767237120.0,
-            "60": 1767237120.0,
-            "61": 1767237120.0,
-            "62": 1767237120.0,
-            "63": 1767237120.0,
-            "64": 1767237120.0,
-            "65": 1767237120.0,
-            "66": 1767237120.0,
-            "67": 1767237120.0,
-            "68": 1767237120.0,
-            "69": 1767237120.0,
-            "70": 1767237120.0,
-            "71": 1767237120.0,
-            "72": 1767237120.0,
-            "73": 1767237120.0,
-            "74": 1767237120.0,
-            "75": 1767237120.0,
-            "76": 1767237120.0,
-            "77": 1767237120.0,
-            "78": 1767237120.0,
-            "79": 1767237120.0,
-            "80": 1767237120.0,
-            "81": 1767237120.0,
-            "82": 1767237120.0,
-            "83": 1767237120.0,
-            "84": 1767237120.0,
-            "85": 1767237120.0,
-            "86": 1767237120.0,
-            "87": 1767237120.0,
-            "88": 1767237120.0,
-            "89": 1767237120.0,
-            "90": 1767237120.0,
-            "91": 1767237120.0,
-            "92": 1767237120.0,
-            "93": 1767237120.0,
-            "94": 1767237120.0,
-            "95": 1767237120.0,
-            "96": 1767237120.0,
-            "97": 1767237120.0,
-            "98": 1767237120.0,
-            "99": 1767237120.0,
-            "100": 1767237120.0
+            "1": 1768285696.0,
+            "2": 1768285696.0,
+            "3": 1768285696.0,
+            "4": 1768285696.0,
+            "5": 1768285696.0,
+            "6": 1768285696.0,
+            "7": 1768285696.0,
+            "8": 1768285696.0,
+            "9": 1768285696.0,
+            "10": 1768285696.0,
+            "11": 1768285696.0,
+            "12": 1768285696.0,
+            "13": 1768285696.0,
+            "14": 1768285696.0,
+            "15": 1768285696.0,
+            "16": 1768285696.0,
+            "17": 1768285696.0,
+            "18": 1768285696.0,
+            "19": 1768285696.0,
+            "20": 1768285696.0,
+            "21": 1768285696.0,
+            "22": 1768285696.0,
+            "23": 1768285696.0,
+            "24": 1768285696.0,
+            "25": 1768285696.0,
+            "26": 1768285696.0,
+            "27": 1768285696.0,
+            "28": 1768285696.0,
+            "29": 1768285696.0,
+            "30": 1768285696.0,
+            "31": 1768285696.0,
+            "32": 1768285696.0,
+            "33": 1768285696.0,
+            "34": 1768285696.0,
+            "35": 1768285696.0,
+            "36": 1768285696.0,
+            "37": 1768285696.0,
+            "38": 1768285696.0,
+            "39": 1768285696.0,
+            "40": 1768285696.0,
+            "41": 1768285696.0,
+            "42": 1768285696.0,
+            "43": 1768285696.0,
+            "44": 1768285696.0,
+            "45": 1768285696.0,
+            "46": 1768285696.0,
+            "47": 1768285696.0,
+            "48": 1768285696.0,
+            "49": 1768285696.0,
+            "50": 1768285696.0,
+            "51": 1768285696.0,
+            "52": 1768285696.0,
+            "53": 1768285696.0,
+            "54": 1768285696.0,
+            "55": 1768285696.0,
+            "56": 1768285696.0,
+            "57": 1768285696.0,
+            "58": 1768285696.0,
+            "59": 1768285696.0,
+            "60": 1768285696.0,
+            "61": 1768285696.0,
+            "62": 1768285696.0,
+            "63": 1768285696.0,
+            "64": 1768285696.0,
+            "65": 1768285696.0,
+            "66": 1768285696.0,
+            "67": 1768285696.0,
+            "68": 1768285696.0,
+            "69": 1768285696.0,
+            "70": 1768285696.0,
+            "71": 1768285696.0,
+            "72": 1768285696.0,
+            "73": 1768285696.0,
+            "74": 1769334272.0,
+            "75": 1768285696.0,
+            "76": 1768285696.0,
+            "77": 1768285696.0,
+            "78": 1768285696.0,
+            "79": 1768285696.0,
+            "80": 1768285696.0,
+            "81": 1768285696.0,
+            "82": 1768285696.0,
+            "83": 1768285696.0,
+            "84": 1768285696.0,
+            "85": 1768285696.0,
+            "86": 1768285696.0,
+            "87": 1768285696.0,
+            "88": 1768285696.0,
+            "89": 1768285696.0,
+            "90": 1768285696.0,
+            "91": 1768285696.0,
+            "92": 1768285696.0,
+            "93": 1768285696.0,
+            "94": 1768285696.0,
+            "95": 1768285696.0,
+            "96": 1768285696.0,
+            "97": 1768285696.0,
+            "98": 1768285696.0,
+            "99": 1768285696.0,
+            "100": 1768285696.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -325,106 +325,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 2336500736.0,
-            "2": 3079487488.0,
-            "3": 3079487488.0,
-            "4": 3079487488.0,
-            "5": 3079487488.0,
-            "6": 3079487488.0,
-            "7": 3079487488.0,
-            "8": 3079487488.0,
-            "9": 3079487488.0,
-            "10": 3079487488.0,
-            "11": 3079487488.0,
-            "12": 3079487488.0,
-            "13": 3079487488.0,
-            "14": 3079487488.0,
-            "15": 3079487488.0,
-            "16": 3079487488.0,
-            "17": 3079487488.0,
-            "18": 3079487488.0,
-            "19": 3079487488.0,
-            "20": 3079487488.0,
-            "21": 3079487488.0,
-            "22": 3079487488.0,
-            "23": 3079487488.0,
-            "24": 3079487488.0,
-            "25": 3079487488.0,
-            "26": 3079487488.0,
-            "27": 3079487488.0,
-            "28": 3079487488.0,
-            "29": 3079487488.0,
-            "30": 3079487488.0,
-            "31": 3079487488.0,
-            "32": 3079487488.0,
-            "33": 3079487488.0,
-            "34": 3079487488.0,
-            "35": 3079487488.0,
-            "36": 3079487488.0,
-            "37": 3079487488.0,
-            "38": 3079487488.0,
-            "39": 3079487488.0,
-            "40": 3079487488.0,
-            "41": 3079487488.0,
-            "42": 3079487488.0,
-            "43": 3079487488.0,
-            "44": 3079487488.0,
-            "45": 3079487488.0,
-            "46": 3079487488.0,
-            "47": 3079487488.0,
-            "48": 3079487488.0,
-            "49": 3079487488.0,
-            "50": 3079487488.0,
-            "51": 3079487488.0,
-            "52": 3079487488.0,
-            "53": 3079487488.0,
-            "54": 3079487488.0,
-            "55": 3079487488.0,
-            "56": 3079487488.0,
-            "57": 3079487488.0,
-            "58": 3079487488.0,
-            "59": 3079487488.0,
-            "60": 3079487488.0,
-            "61": 3079487488.0,
-            "62": 3079487488.0,
-            "63": 3079487488.0,
-            "64": 3079487488.0,
-            "65": 3079487488.0,
-            "66": 3079487488.0,
-            "67": 3079487488.0,
-            "68": 3079487488.0,
-            "69": 3079487488.0,
-            "70": 3079487488.0,
-            "71": 3079487488.0,
-            "72": 3079487488.0,
-            "73": 3079487488.0,
-            "74": 3079487488.0,
-            "75": 3079487488.0,
-            "76": 3079487488.0,
-            "77": 3079487488.0,
-            "78": 3079487488.0,
-            "79": 3079487488.0,
-            "80": 3079487488.0,
-            "81": 3079487488.0,
-            "82": 3079487488.0,
-            "83": 3079487488.0,
-            "84": 3079487488.0,
-            "85": 3079487488.0,
-            "86": 3079487488.0,
-            "87": 3079487488.0,
-            "88": 3079487488.0,
-            "89": 3079487488.0,
-            "90": 3079487488.0,
-            "91": 3079487488.0,
-            "92": 3079487488.0,
-            "93": 3079487488.0,
-            "94": 3079487488.0,
-            "95": 3079487488.0,
-            "96": 3079487488.0,
-            "97": 3079487488.0,
-            "98": 3079487488.0,
-            "99": 3079487488.0,
-            "100": 3079487488.0
+            "1": 2337549312.0,
+            "2": 3080536064.0,
+            "3": 3080536064.0,
+            "4": 3080536064.0,
+            "5": 3080536064.0,
+            "6": 3080536064.0,
+            "7": 3080536064.0,
+            "8": 3080536064.0,
+            "9": 3080536064.0,
+            "10": 3080536064.0,
+            "11": 3080536064.0,
+            "12": 3080536064.0,
+            "13": 3080536064.0,
+            "14": 3080536064.0,
+            "15": 3080536064.0,
+            "16": 3080536064.0,
+            "17": 3080536064.0,
+            "18": 3080536064.0,
+            "19": 3080536064.0,
+            "20": 3080536064.0,
+            "21": 3080536064.0,
+            "22": 3080536064.0,
+            "23": 3082107392.0,
+            "24": 3082107392.0,
+            "25": 3082107392.0,
+            "26": 3082107392.0,
+            "27": 3082107392.0,
+            "28": 3082107392.0,
+            "29": 3082107392.0,
+            "30": 3082107392.0,
+            "31": 3082107392.0,
+            "32": 3082107392.0,
+            "33": 3082107392.0,
+            "34": 3082107392.0,
+            "35": 3082107392.0,
+            "36": 3082107392.0,
+            "37": 3082107392.0,
+            "38": 3082107392.0,
+            "39": 3082107392.0,
+            "40": 3082107392.0,
+            "41": 3082107392.0,
+            "42": 3082107392.0,
+            "43": 3082107392.0,
+            "44": 3082107392.0,
+            "45": 3082107392.0,
+            "46": 3082107392.0,
+            "47": 3082107392.0,
+            "48": 3082107392.0,
+            "49": 3082107392.0,
+            "50": 3082107392.0,
+            "51": 3082107392.0,
+            "52": 3082107392.0,
+            "53": 3082107392.0,
+            "54": 3082107392.0,
+            "55": 3082107392.0,
+            "56": 3082107392.0,
+            "57": 3082107392.0,
+            "58": 3082107392.0,
+            "59": 3082107392.0,
+            "60": 3082107392.0,
+            "61": 3082107392.0,
+            "62": 3082107392.0,
+            "63": 3082107392.0,
+            "64": 3082107392.0,
+            "65": 3082107392.0,
+            "66": 3082107392.0,
+            "67": 3082107392.0,
+            "68": 3082107392.0,
+            "69": 3082107392.0,
+            "70": 3082107392.0,
+            "71": 3082107392.0,
+            "72": 3082107392.0,
+            "73": 3082107392.0,
+            "74": 3082108928.0,
+            "75": 3082108928.0,
+            "76": 3082108928.0,
+            "77": 3082108928.0,
+            "78": 3082108928.0,
+            "79": 3082108928.0,
+            "80": 3082108928.0,
+            "81": 3082108928.0,
+            "82": 3082108928.0,
+            "83": 3082108928.0,
+            "84": 3082108928.0,
+            "85": 3082108928.0,
+            "86": 3082108928.0,
+            "87": 3082108928.0,
+            "88": 3082108928.0,
+            "89": 3082108928.0,
+            "90": 3082108928.0,
+            "91": 3082108928.0,
+            "92": 3082108928.0,
+            "93": 3082108928.0,
+            "94": 3082108928.0,
+            "95": 3082108928.0,
+            "96": 3082108928.0,
+            "97": 3082108928.0,
+            "98": 3082108928.0,
+            "99": 3082108928.0,
+            "100": 3082108928.0
         }
     },
     "iteration-time": {
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 11.74907,
-            "2": 0.85881,
-            "3": 0.84325,
-            "4": 0.84358,
-            "5": 0.84379,
-            "6": 0.84251,
-            "7": 0.84123,
-            "8": 0.8499,
-            "9": 0.8999,
-            "10": 0.92522,
-            "11": 0.94116,
-            "12": 0.85793,
-            "13": 0.84568,
-            "14": 0.84264,
-            "15": 0.84084,
-            "16": 0.84084,
-            "17": 0.83843,
-            "18": 0.8412,
-            "19": 0.84178,
-            "20": 1.1044,
-            "21": 1.21871,
-            "22": 1.25946,
-            "23": 0.85008,
-            "24": 0.91404,
-            "25": 0.84787,
-            "26": 0.84792,
-            "27": 0.85174,
-            "28": 0.84996,
-            "29": 0.84337,
-            "30": 0.84498,
-            "31": 0.8486,
-            "32": 0.84203,
-            "33": 0.84451,
-            "34": 0.85648,
-            "35": 0.83537,
-            "36": 0.84205,
-            "37": 0.83563,
-            "38": 0.84541,
-            "39": 0.84231,
-            "40": 0.84639,
-            "41": 0.84365,
-            "42": 0.84512,
-            "43": 0.84437,
-            "44": 0.84299,
-            "45": 0.85866,
-            "46": 0.84237,
-            "47": 0.84617,
-            "48": 1.18328,
-            "49": 0.88875,
-            "50": 0.96388,
-            "51": 0.98149,
-            "52": 0.89905,
-            "53": 0.84382,
-            "54": 0.85382,
-            "55": 0.84338,
-            "56": 0.84282,
-            "57": 0.92404,
-            "58": 0.84627,
-            "59": 0.83811,
-            "60": 0.83802,
-            "61": 0.85109,
-            "62": 0.83231,
-            "63": 0.83505,
-            "64": 1.15842,
-            "65": 1.1324,
-            "66": 0.83972,
-            "67": 0.82896,
-            "68": 0.82596,
-            "69": 0.83118,
-            "70": 0.84229,
-            "71": 0.8328,
-            "72": 0.82924,
-            "73": 0.83555,
-            "74": 0.83422,
-            "75": 0.90796,
-            "76": 0.85077,
-            "77": 1.07568,
-            "78": 1.30938,
-            "79": 1.12037,
-            "80": 0.82751,
-            "81": 0.83544,
-            "82": 0.88688,
-            "83": 1.16362,
-            "84": 0.83207,
-            "85": 0.83917,
-            "86": 1.14681,
-            "87": 1.17025,
-            "88": 0.82985,
-            "89": 0.82492,
-            "90": 0.90586,
-            "91": 0.83299,
-            "92": 0.83139,
-            "93": 0.83405,
-            "94": 0.83756,
-            "95": 0.83351,
-            "96": 0.83063,
-            "97": 0.83499,
-            "98": 0.84617,
-            "99": 0.83623,
-            "100": 0.84014
+            "1": 10.24286,
+            "2": 0.82679,
+            "3": 0.79409,
+            "4": 0.76435,
+            "5": 0.77118,
+            "6": 0.74558,
+            "7": 0.74667,
+            "8": 0.77701,
+            "9": 1.97605,
+            "10": 0.75455,
+            "11": 0.74398,
+            "12": 0.74114,
+            "13": 0.7501,
+            "14": 0.74704,
+            "15": 0.74029,
+            "16": 1.1307,
+            "17": 0.73862,
+            "18": 0.73445,
+            "19": 0.73384,
+            "20": 0.73927,
+            "21": 0.74153,
+            "22": 0.73755,
+            "23": 0.76958,
+            "24": 0.7377,
+            "25": 0.73987,
+            "26": 0.77483,
+            "27": 1.30185,
+            "28": 0.76,
+            "29": 0.75644,
+            "30": 0.77716,
+            "31": 0.83125,
+            "32": 0.80226,
+            "33": 0.74041,
+            "34": 0.74334,
+            "35": 1.17386,
+            "36": 1.53868,
+            "37": 0.77003,
+            "38": 0.76358,
+            "39": 0.77015,
+            "40": 0.77216,
+            "41": 0.76865,
+            "42": 1.214,
+            "43": 1.04802,
+            "44": 0.758,
+            "45": 1.27424,
+            "46": 1.12734,
+            "47": 0.7573,
+            "48": 0.74875,
+            "49": 0.74989,
+            "50": 0.75416,
+            "51": 0.75904,
+            "52": 0.75338,
+            "53": 0.75124,
+            "54": 0.73937,
+            "55": 0.74096,
+            "56": 0.75129,
+            "57": 0.75097,
+            "58": 0.74724,
+            "59": 0.74661,
+            "60": 0.74245,
+            "61": 0.74378,
+            "62": 0.74491,
+            "63": 0.74147,
+            "64": 0.74756,
+            "65": 0.74511,
+            "66": 0.74967,
+            "67": 0.7462,
+            "68": 0.74176,
+            "69": 0.74258,
+            "70": 0.74323,
+            "71": 0.74412,
+            "72": 0.74522,
+            "73": 0.74053,
+            "74": 0.74312,
+            "75": 0.74157,
+            "76": 1.12862,
+            "77": 0.74522,
+            "78": 1.08987,
+            "79": 0.94746,
+            "80": 0.877,
+            "81": 0.74472,
+            "82": 0.74142,
+            "83": 0.74342,
+            "84": 0.7418,
+            "85": 0.74017,
+            "86": 0.7399,
+            "87": 0.73594,
+            "88": 0.73916,
+            "89": 0.73537,
+            "90": 0.75037,
+            "91": 0.7341,
+            "92": 0.73469,
+            "93": 0.7333,
+            "94": 0.73221,
+            "95": 0.73055,
+            "96": 0.73133,
+            "97": 0.73591,
+            "98": 0.74108,
+            "99": 0.74467,
+            "100": 0.73711
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist_local_spec/golden_values_dev_dgx_h100_2nd.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist_local_spec/golden_values_dev_dgx_h100_2nd.json
new file mode 100644
index 00000000000..de97d194787
--- /dev/null
+++ b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist_local_spec/golden_values_dev_dgx_h100_2nd.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 9.71501,
+            "52": 9.66488,
+            "53": 9.60917,
+            "54": 9.62733,
+            "55": 9.61022,
+            "56": 9.61723,
+            "57": 9.56794,
+            "58": 9.52733,
+            "59": 9.51677,
+            "60": 9.5188,
+            "61": 9.53149,
+            "62": 9.45031,
+            "63": 9.45717,
+            "64": 9.43441,
+            "65": 9.45812,
+            "66": 9.43672,
+            "67": 9.39687,
+            "68": 9.36469,
+            "69": 9.40964,
+            "70": 9.37606,
+            "71": 9.41737,
+            "72": 9.42585,
+            "73": 9.37601,
+            "74": 9.4154,
+            "75": 9.37896,
+            "76": 9.28004,
+            "77": 9.32212,
+            "78": 9.35755,
+            "79": 9.3216,
+            "80": 9.31491,
+            "81": 9.26783,
+            "82": 9.342,
+            "83": 9.32159,
+            "84": 9.24786,
+            "85": 9.35018,
+            "86": 9.22384,
+            "87": 9.30618,
+            "88": 9.29905,
+            "89": 9.22708,
+            "90": 9.28498,
+            "91": 9.23123,
+            "92": 9.27487,
+            "93": 9.19233,
+            "94": 9.23985,
+            "95": 9.28002,
+            "96": 9.17532,
+            "97": 9.21898,
+            "98": 9.17203,
+            "99": 9.16444,
+            "100": 9.14821
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 1900.0,
+            "52": 2483.0,
+            "53": 3763.0,
+            "54": 3478.0,
+            "55": 3412.0,
+            "56": 4400.0,
+            "57": 4019.0,
+            "58": 4253.0,
+            "59": 1805.0,
+            "60": 2457.0,
+            "61": 2045.0,
+            "62": 3994.0,
+            "63": 3650.0,
+            "64": 4466.0,
+            "65": 2968.0,
+            "66": 1837.0,
+            "67": 1961.0,
+            "68": 4347.0,
+            "69": 4441.0,
+            "70": 4452.0,
+            "71": 2131.0,
+            "72": 4523.0,
+            "73": 4105.0,
+            "74": 3300.0,
+            "75": 4651.0,
+            "76": 2216.0,
+            "77": 4932.0,
+            "78": 4218.0,
+            "79": 2784.0,
+            "80": 3824.0,
+            "81": 3472.0,
+            "82": 2976.0,
+            "83": 5282.0,
+            "84": 4464.0,
+            "85": 4344.0,
+            "86": 3460.0,
+            "87": 4774.0,
+            "88": 3426.0,
+            "89": 4600.0,
+            "90": 4360.0,
+            "91": 4283.0,
+            "92": 3362.0,
+            "93": 5633.0,
+            "94": 3676.0,
+            "95": 4610.0,
+            "96": 3449.0,
+            "97": 3751.0,
+            "98": 4524.0,
+            "99": 4399.0,
+            "100": 3295.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 1769334272.0,
+            "52": 1769334272.0,
+            "53": 1769334272.0,
+            "54": 1769334272.0,
+            "55": 1769334272.0,
+            "56": 1769334272.0,
+            "57": 1769334272.0,
+            "58": 1769334272.0,
+            "59": 1769334272.0,
+            "60": 1769334272.0,
+            "61": 1769334272.0,
+            "62": 1769334272.0,
+            "63": 1769334272.0,
+            "64": 1769334272.0,
+            "65": 1769334272.0,
+            "66": 1769334272.0,
+            "67": 1769334272.0,
+            "68": 1769334272.0,
+            "69": 1769334272.0,
+            "70": 1769334272.0,
+            "71": 1769334272.0,
+            "72": 1769334272.0,
+            "73": 1769334272.0,
+            "74": 1769334272.0,
+            "75": 1769334272.0,
+            "76": 1769334272.0,
+            "77": 1769334272.0,
+            "78": 1769334272.0,
+            "79": 1769334272.0,
+            "80": 1769334272.0,
+            "81": 1769334272.0,
+            "82": 1769334272.0,
+            "83": 1769334272.0,
+            "84": 1769334272.0,
+            "85": 1769334272.0,
+            "86": 1769334272.0,
+            "87": 1769334272.0,
+            "88": 1769334272.0,
+            "89": 1769334272.0,
+            "90": 1769334272.0,
+            "91": 1769334272.0,
+            "92": 1769334272.0,
+            "93": 1769334272.0,
+            "94": 1769334272.0,
+            "95": 1769334272.0,
+            "96": 1769334272.0,
+            "97": 1769334272.0,
+            "98": 1769334272.0,
+            "99": 1769334272.0,
+            "100": 1769334272.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 3081058304.0,
+            "52": 3081060352.0,
+            "53": 3081060352.0,
+            "54": 3081060352.0,
+            "55": 3081060352.0,
+            "56": 3081060352.0,
+            "57": 3081060352.0,
+            "58": 3081060352.0,
+            "59": 3081060352.0,
+            "60": 3081060352.0,
+            "61": 3081060352.0,
+            "62": 3081060352.0,
+            "63": 3081060352.0,
+            "64": 3081060352.0,
+            "65": 3081060352.0,
+            "66": 3081060352.0,
+            "67": 3081060352.0,
+            "68": 3081060352.0,
+            "69": 3081060352.0,
+            "70": 3081060352.0,
+            "71": 3081060352.0,
+            "72": 3081060352.0,
+            "73": 3081060352.0,
+            "74": 3081060352.0,
+            "75": 3081060352.0,
+            "76": 3081060352.0,
+            "77": 3081060352.0,
+            "78": 3081060352.0,
+            "79": 3081060352.0,
+            "80": 3081060352.0,
+            "81": 3081060352.0,
+            "82": 3081060352.0,
+            "83": 3081060352.0,
+            "84": 3081060352.0,
+            "85": 3081060352.0,
+            "86": 3081060352.0,
+            "87": 3081060352.0,
+            "88": 3081060352.0,
+            "89": 3081060352.0,
+            "90": 3081060352.0,
+            "91": 3081060352.0,
+            "92": 3081060352.0,
+            "93": 3081060352.0,
+            "94": 3081060352.0,
+            "95": 3081060352.0,
+            "96": 3081060352.0,
+            "97": 3081060352.0,
+            "98": 3081060352.0,
+            "99": 3081060352.0,
+            "100": 3081060352.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 10.15551,
+            "52": 0.8598,
+            "53": 0.74904,
+            "54": 0.7512,
+            "55": 0.75011,
+            "56": 0.7593,
+            "57": 1.36317,
+            "58": 1.3678,
+            "59": 0.75114,
+            "60": 0.74624,
+            "61": 0.74824,
+            "62": 0.75285,
+            "63": 0.75097,
+            "64": 0.7539,
+            "65": 1.11179,
+            "66": 0.7482,
+            "67": 0.75224,
+            "68": 0.75225,
+            "69": 0.73791,
+            "70": 0.74141,
+            "71": 0.74372,
+            "72": 0.74097,
+            "73": 1.17879,
+            "74": 1.13369,
+            "75": 0.75135,
+            "76": 0.74737,
+            "77": 0.7455,
+            "78": 0.74472,
+            "79": 1.10005,
+            "80": 0.74804,
+            "81": 0.75235,
+            "82": 2.07286,
+            "83": 0.74595,
+            "84": 0.75659,
+            "85": 0.74796,
+            "86": 0.73902,
+            "87": 0.73952,
+            "88": 0.73743,
+            "89": 0.74161,
+            "90": 0.94861,
+            "91": 0.94405,
+            "92": 1.05613,
+            "93": 1.27634,
+            "94": 0.80928,
+            "95": 0.77886,
+            "96": 1.11223,
+            "97": 0.73925,
+            "98": 0.773,
+            "99": 0.74424,
+            "100": 0.78256
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/bert/bert_mcore_tp4_pp1/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp4_pp1/golden_values_dev_dgx_h100.json
index 88adf60a26e..bc0ee3bcb1e 100644
--- a/tests/functional_tests/test_cases/bert/bert_mcore_tp4_pp1/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/bert/bert_mcore_tp4_pp1/golden_values_dev_dgx_h100.json
@@ -12,48 +12,48 @@
             "6": 10.41563,
             "7": 10.42859,
             "8": 10.42079,
-            "9": 10.43014,
-            "10": 10.40859,
-            "11": 10.43501,
-            "12": 10.4025,
-            "13": 10.42274,
-            "14": 10.41249,
-            "15": 10.40948,
-            "16": 10.40806,
-            "17": 10.3892,
-            "18": 10.38857,
-            "19": 10.37147,
-            "20": 10.40453,
-            "21": 10.36615,
-            "22": 10.34963,
-            "23": 10.35388,
-            "24": 10.30136,
-            "25": 10.31117,
-            "26": 10.30241,
-            "27": 10.2821,
-            "28": 10.27928,
-            "29": 10.23928,
-            "30": 10.14742,
-            "31": 10.10532,
-            "32": 10.09426,
-            "33": 10.09032,
-            "34": 10.06437,
-            "35": 10.04643,
-            "36": 10.03306,
-            "37": 10.00505,
-            "38": 10.00274,
-            "39": 9.91418,
-            "40": 9.91103,
-            "41": 9.86562,
-            "42": 9.78095,
-            "43": 9.79496,
-            "44": 9.73077,
-            "45": 9.7428,
-            "46": 9.63829,
-            "47": 9.6868,
-            "48": 9.637,
-            "49": 9.6554,
-            "50": 9.65776
+            "9": 10.43013,
+            "10": 10.4087,
+            "11": 10.43493,
+            "12": 10.40244,
+            "13": 10.42282,
+            "14": 10.41239,
+            "15": 10.40952,
+            "16": 10.40789,
+            "17": 10.38944,
+            "18": 10.38859,
+            "19": 10.37154,
+            "20": 10.40445,
+            "21": 10.36609,
+            "22": 10.34962,
+            "23": 10.354,
+            "24": 10.30131,
+            "25": 10.3111,
+            "26": 10.30252,
+            "27": 10.28202,
+            "28": 10.27924,
+            "29": 10.23941,
+            "30": 10.14739,
+            "31": 10.10547,
+            "32": 10.09424,
+            "33": 10.09034,
+            "34": 10.0645,
+            "35": 10.04644,
+            "36": 10.03308,
+            "37": 10.00522,
+            "38": 10.00297,
+            "39": 9.91428,
+            "40": 9.91112,
+            "41": 9.86566,
+            "42": 9.78083,
+            "43": 9.79476,
+            "44": 9.73084,
+            "45": 9.74269,
+            "46": 9.63796,
+            "47": 9.68694,
+            "48": 9.63705,
+            "49": 9.65524,
+            "50": 9.65788
         }
     },
     "num-zeros": {
@@ -69,48 +69,48 @@
             "6": 2985.0,
             "7": 3208.0,
             "8": 3314.0,
-            "9": 3134.0,
-            "10": 3124.0,
-            "11": 3913.0,
-            "12": 3008.0,
-            "13": 3108.0,
-            "14": 3652.0,
-            "15": 3267.0,
-            "16": 3662.0,
-            "17": 3680.0,
-            "18": 3708.0,
-            "19": 3375.0,
-            "20": 3449.0,
-            "21": 3115.0,
-            "22": 3545.0,
-            "23": 3516.0,
-            "24": 3789.0,
-            "25": 3570.0,
-            "26": 3719.0,
-            "27": 2808.0,
-            "28": 3823.0,
-            "29": 3626.0,
-            "30": 4136.0,
-            "31": 2541.0,
-            "32": 3945.0,
-            "33": 3501.0,
-            "34": 3795.0,
-            "35": 3652.0,
-            "36": 4269.0,
-            "37": 4152.0,
-            "38": 3787.0,
-            "39": 3873.0,
-            "40": 4661.0,
-            "41": 2846.0,
-            "42": 1556.0,
-            "43": 2809.0,
-            "44": 4030.0,
-            "45": 4724.0,
-            "46": 4587.0,
-            "47": 3120.0,
-            "48": 4366.0,
-            "49": 3839.0,
-            "50": 3146.0
+            "9": 3210.0,
+            "10": 3297.0,
+            "11": 2833.0,
+            "12": 2982.0,
+            "13": 3178.0,
+            "14": 3705.0,
+            "15": 3252.0,
+            "16": 3615.0,
+            "17": 3789.0,
+            "18": 3620.0,
+            "19": 3327.0,
+            "20": 3539.0,
+            "21": 3129.0,
+            "22": 3597.0,
+            "23": 3595.0,
+            "24": 2781.0,
+            "25": 3585.0,
+            "26": 3607.0,
+            "27": 4015.0,
+            "28": 3836.0,
+            "29": 3716.0,
+            "30": 4150.0,
+            "31": 3472.0,
+            "32": 3024.0,
+            "33": 3553.0,
+            "34": 3793.0,
+            "35": 3757.0,
+            "36": 4205.0,
+            "37": 4221.0,
+            "38": 3819.0,
+            "39": 3866.0,
+            "40": 3554.0,
+            "41": 2883.0,
+            "42": 2592.0,
+            "43": 2856.0,
+            "44": 3173.0,
+            "45": 4948.0,
+            "46": 4572.0,
+            "47": 4077.0,
+            "48": 4355.0,
+            "49": 3885.0,
+            "50": 3266.0
         }
     },
     "mem-allocated-bytes": {
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 1661765632.0,
-            "2": 1661765632.0,
-            "3": 1661765632.0,
-            "4": 1661765632.0,
-            "5": 1661765632.0,
-            "6": 1661765632.0,
-            "7": 1661765632.0,
-            "8": 1661765632.0,
-            "9": 1661765632.0,
-            "10": 1661765632.0,
-            "11": 1661765632.0,
-            "12": 1661765632.0,
-            "13": 1661765632.0,
-            "14": 1661765632.0,
-            "15": 1661765632.0,
-            "16": 1661765632.0,
-            "17": 1661765632.0,
-            "18": 1661765632.0,
-            "19": 1661765632.0,
-            "20": 1661765632.0,
-            "21": 1661765632.0,
-            "22": 1661765632.0,
-            "23": 1661765632.0,
-            "24": 1661765632.0,
-            "25": 1661765632.0,
-            "26": 1661765632.0,
-            "27": 1661765632.0,
-            "28": 1661765632.0,
-            "29": 1661765632.0,
-            "30": 1661765632.0,
-            "31": 1661765632.0,
-            "32": 1661765632.0,
-            "33": 1661765632.0,
-            "34": 1661765632.0,
-            "35": 1661765632.0,
-            "36": 1661765632.0,
-            "37": 1661765632.0,
-            "38": 1661765632.0,
-            "39": 1661765632.0,
-            "40": 1661765632.0,
-            "41": 1661765632.0,
-            "42": 1661765632.0,
-            "43": 1661765632.0,
-            "44": 1661765632.0,
-            "45": 1661765632.0,
-            "46": 1661765632.0,
-            "47": 1661765632.0,
-            "48": 1661765632.0,
-            "49": 1661765632.0,
-            "50": 1661765632.0
+            "1": 1662815232.0,
+            "2": 1662815232.0,
+            "3": 1662815232.0,
+            "4": 1662815232.0,
+            "5": 1662815232.0,
+            "6": 1662815232.0,
+            "7": 1662815232.0,
+            "8": 1662815232.0,
+            "9": 1662815232.0,
+            "10": 1662815232.0,
+            "11": 1662815232.0,
+            "12": 1662815232.0,
+            "13": 1662815232.0,
+            "14": 1662815232.0,
+            "15": 1662815232.0,
+            "16": 1662815232.0,
+            "17": 1662815232.0,
+            "18": 1662815232.0,
+            "19": 1662815232.0,
+            "20": 1662815232.0,
+            "21": 1662815232.0,
+            "22": 1662815232.0,
+            "23": 1662815232.0,
+            "24": 1662815232.0,
+            "25": 1662815232.0,
+            "26": 1662815232.0,
+            "27": 1662815232.0,
+            "28": 1662815232.0,
+            "29": 1662815232.0,
+            "30": 1662815232.0,
+            "31": 1662815232.0,
+            "32": 1662815232.0,
+            "33": 1662815232.0,
+            "34": 1662815232.0,
+            "35": 1662815232.0,
+            "36": 1662815232.0,
+            "37": 1662815232.0,
+            "38": 1662815232.0,
+            "39": 1662815232.0,
+            "40": 1662815232.0,
+            "41": 1662815232.0,
+            "42": 1662815232.0,
+            "43": 1662815232.0,
+            "44": 1662815232.0,
+            "45": 1662815232.0,
+            "46": 1662815232.0,
+            "47": 1662815232.0,
+            "48": 1662815232.0,
+            "49": 1662815232.0,
+            "50": 1662815232.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 2506479104.0,
-            "2": 3205449216.0,
-            "3": 3205449216.0,
-            "4": 3205449216.0,
-            "5": 3205449216.0,
-            "6": 3205449216.0,
-            "7": 3205449216.0,
-            "8": 3205449216.0,
-            "9": 3205449216.0,
-            "10": 3205449216.0,
-            "11": 3205449216.0,
-            "12": 3205449216.0,
-            "13": 3205449216.0,
-            "14": 3205449216.0,
-            "15": 3205449216.0,
-            "16": 3205449216.0,
-            "17": 3205449216.0,
-            "18": 3205449216.0,
-            "19": 3205449216.0,
-            "20": 3205449216.0,
-            "21": 3205449216.0,
-            "22": 3205449216.0,
-            "23": 3205449216.0,
-            "24": 3205449216.0,
-            "25": 3205449216.0,
-            "26": 3205449216.0,
-            "27": 3205449216.0,
-            "28": 3205449216.0,
-            "29": 3205449216.0,
-            "30": 3205449216.0,
-            "31": 3205449216.0,
-            "32": 3205449216.0,
-            "33": 3205449216.0,
-            "34": 3205449216.0,
-            "35": 3205449216.0,
-            "36": 3205449216.0,
-            "37": 3205449216.0,
-            "38": 3205449216.0,
-            "39": 3205449216.0,
-            "40": 3205449216.0,
-            "41": 3205449216.0,
-            "42": 3205449216.0,
-            "43": 3205449216.0,
-            "44": 3205449216.0,
-            "45": 3205449216.0,
-            "46": 3205449216.0,
-            "47": 3205449216.0,
-            "48": 3205449216.0,
-            "49": 3205449216.0,
-            "50": 3205449216.0
+            "1": 2507528704.0,
+            "2": 3206498816.0,
+            "3": 3206498816.0,
+            "4": 3206498816.0,
+            "5": 3206498816.0,
+            "6": 3206498816.0,
+            "7": 3206498816.0,
+            "8": 3206498816.0,
+            "9": 3206498816.0,
+            "10": 3206498816.0,
+            "11": 3206498816.0,
+            "12": 3206498816.0,
+            "13": 3206498816.0,
+            "14": 3206498816.0,
+            "15": 3206498816.0,
+            "16": 3206498816.0,
+            "17": 3206498816.0,
+            "18": 3206498816.0,
+            "19": 3206498816.0,
+            "20": 3206498816.0,
+            "21": 3206498816.0,
+            "22": 3206498816.0,
+            "23": 3206498816.0,
+            "24": 3206498816.0,
+            "25": 3206498816.0,
+            "26": 3206498816.0,
+            "27": 3206498816.0,
+            "28": 3206498816.0,
+            "29": 3206498816.0,
+            "30": 3206498816.0,
+            "31": 3206498816.0,
+            "32": 3206498816.0,
+            "33": 3206498816.0,
+            "34": 3206498816.0,
+            "35": 3206498816.0,
+            "36": 3206498816.0,
+            "37": 3206498816.0,
+            "38": 3206498816.0,
+            "39": 3206498816.0,
+            "40": 3206498816.0,
+            "41": 3206498816.0,
+            "42": 3206498816.0,
+            "43": 3206498816.0,
+            "44": 3206498816.0,
+            "45": 3206498816.0,
+            "46": 3206498816.0,
+            "47": 3206498816.0,
+            "48": 3206498816.0,
+            "49": 3206498816.0,
+            "50": 3206498816.0
         }
     },
     "iteration-time": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 10.29331,
-            "2": 1.82828,
-            "3": 1.75745,
-            "4": 1.75149,
-            "5": 1.76912,
-            "6": 1.75888,
-            "7": 1.75313,
-            "8": 1.75423,
-            "9": 1.74482,
-            "10": 1.84387,
-            "11": 2.01499,
-            "12": 1.74448,
-            "13": 1.75425,
-            "14": 2.09351,
-            "15": 1.77765,
-            "16": 1.76841,
-            "17": 1.75495,
-            "18": 2.05727,
-            "19": 1.77481,
-            "20": 2.11285,
-            "21": 1.77659,
-            "22": 1.75669,
-            "23": 1.75872,
-            "24": 2.1065,
-            "25": 2.02543,
-            "26": 1.84773,
-            "27": 1.76632,
-            "28": 1.76482,
-            "29": 1.75732,
-            "30": 1.75335,
-            "31": 1.75453,
-            "32": 1.80627,
-            "33": 1.757,
-            "34": 1.75719,
-            "35": 1.75478,
-            "36": 1.76009,
-            "37": 1.75602,
-            "38": 1.75806,
-            "39": 1.75609,
-            "40": 1.75247,
-            "41": 1.75179,
-            "42": 1.75873,
-            "43": 1.77534,
-            "44": 1.80833,
-            "45": 1.74663,
-            "46": 1.75048,
-            "47": 1.7473,
-            "48": 1.75253,
-            "49": 1.76783,
-            "50": 1.75365
+            "1": 10.8403,
+            "2": 1.75656,
+            "3": 1.70317,
+            "4": 1.66346,
+            "5": 1.6703,
+            "6": 1.66753,
+            "7": 2.21547,
+            "8": 1.68918,
+            "9": 1.77005,
+            "10": 1.75261,
+            "11": 1.77153,
+            "12": 1.65933,
+            "13": 1.65337,
+            "14": 2.37845,
+            "15": 2.04839,
+            "16": 2.07092,
+            "17": 1.67053,
+            "18": 1.6729,
+            "19": 1.65463,
+            "20": 1.67298,
+            "21": 1.66273,
+            "22": 1.64743,
+            "23": 1.64351,
+            "24": 1.63695,
+            "25": 1.66076,
+            "26": 1.66885,
+            "27": 1.64423,
+            "28": 1.64773,
+            "29": 1.64565,
+            "30": 1.64171,
+            "31": 1.63705,
+            "32": 1.64216,
+            "33": 1.64504,
+            "34": 1.64255,
+            "35": 1.64762,
+            "36": 1.64913,
+            "37": 1.63831,
+            "38": 1.65213,
+            "39": 1.66065,
+            "40": 1.63954,
+            "41": 1.63964,
+            "42": 1.64408,
+            "43": 1.64113,
+            "44": 1.65016,
+            "45": 1.63618,
+            "46": 1.65229,
+            "47": 1.64761,
+            "48": 1.76963,
+            "49": 1.62535,
+            "50": 1.63142
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_7b_tp1_pp4_memory_speed/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_7b_tp1_pp4_memory_speed/golden_values_dev_dgx_gb200.json
new file mode 100644
index 00000000000..4770792474b
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_7b_tp1_pp4_memory_speed/golden_values_dev_dgx_gb200.json
@@ -0,0 +1,162 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 25,
+        "step_interval": 1,
+        "values": {
+            "1": 12.59654,
+            "2": 12.60484,
+            "3": 12.59799,
+            "4": 12.59687,
+            "5": 12.59285,
+            "6": 12.59259,
+            "7": 12.58011,
+            "8": 12.54308,
+            "9": 12.51049,
+            "10": 12.49679,
+            "11": 12.32875,
+            "12": 12.29944,
+            "13": 12.2346,
+            "14": 12.23325,
+            "15": 11.81699,
+            "16": 11.80131,
+            "17": 11.76433,
+            "18": 11.73986,
+            "19": 11.6089,
+            "20": 11.50642,
+            "21": 11.26938,
+            "22": 11.37967,
+            "23": 11.288,
+            "24": 11.16331,
+            "25": 10.99891
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 25,
+        "step_interval": 1,
+        "values": {
+            "1": 521037632.0,
+            "2": 521666368.0,
+            "3": 520934816.0,
+            "4": 521227264.0,
+            "5": 520996064.0,
+            "6": 521371840.0,
+            "7": 521420352.0,
+            "8": 521057344.0,
+            "9": 521461504.0,
+            "10": 521178624.0,
+            "11": 522279104.0,
+            "12": 521439616.0,
+            "13": 521475712.0,
+            "14": 522445376.0,
+            "15": 521592960.0,
+            "16": 521416448.0,
+            "17": 521026496.0,
+            "18": 521277760.0,
+            "19": 521154656.0,
+            "20": 521134784.0,
+            "21": 522907648.0,
+            "22": 521590304.0,
+            "23": 521352384.0,
+            "24": 521424640.0,
+            "25": 523543808.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 25,
+        "step_interval": 1,
+        "values": {
+            "1": 24540168192.0,
+            "2": 24540168192.0,
+            "3": 24540168192.0,
+            "4": 24540168192.0,
+            "5": 24540168192.0,
+            "6": 24540168192.0,
+            "7": 24540168192.0,
+            "8": 24540168192.0,
+            "9": 24540168192.0,
+            "10": 24540168192.0,
+            "11": 24540168192.0,
+            "12": 24540168192.0,
+            "13": 24540168192.0,
+            "14": 24540168192.0,
+            "15": 24540168192.0,
+            "16": 24540168192.0,
+            "17": 24540168192.0,
+            "18": 24540168192.0,
+            "19": 24540168192.0,
+            "20": 24540168192.0,
+            "21": 24540168192.0,
+            "22": 24540168192.0,
+            "23": 24540168192.0,
+            "24": 24540168192.0,
+            "25": 24540168192.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 25,
+        "step_interval": 1,
+        "values": {
+            "1": 52729765888.0,
+            "2": 60518424576.0,
+            "3": 60518424576.0,
+            "4": 60518424576.0,
+            "5": 60518424576.0,
+            "6": 60518424576.0,
+            "7": 60518424576.0,
+            "8": 60518424576.0,
+            "9": 60518424576.0,
+            "10": 60518424576.0,
+            "11": 60518424576.0,
+            "12": 60518424576.0,
+            "13": 60518424576.0,
+            "14": 60518424576.0,
+            "15": 60518424576.0,
+            "16": 60518424576.0,
+            "17": 60518424576.0,
+            "18": 60518424576.0,
+            "19": 60518424576.0,
+            "20": 60518424576.0,
+            "21": 60518424576.0,
+            "22": 60518424576.0,
+            "23": 60518424576.0,
+            "24": 60518424576.0,
+            "25": 60518424576.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 25,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": 5.8109,
+            "3": "nan",
+            "4": 0.8316,
+            "5": "nan",
+            "6": 0.83072,
+            "7": "nan",
+            "8": 0.82637,
+            "9": "nan",
+            "10": 0.823,
+            "11": "nan",
+            "12": 0.82386,
+            "13": "nan",
+            "14": 0.82343,
+            "15": "nan",
+            "16": 0.82487,
+            "17": "nan",
+            "18": 0.82227,
+            "19": "nan",
+            "20": 0.82121,
+            "21": "nan",
+            "22": 0.82248,
+            "23": "nan",
+            "24": 0.81939,
+            "25": "nan"
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_7b_tp1_pp4_memory_speed/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_7b_tp1_pp4_memory_speed/golden_values_dev_dgx_h100.json
index 478f889b21c..2ed3bf0784f 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_7b_tp1_pp4_memory_speed/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_7b_tp1_pp4_memory_speed/golden_values_dev_dgx_h100.json
@@ -88,7 +88,7 @@
             "18": 24540168192.0,
             "19": 24540168192.0,
             "20": 24540168192.0,
-            "21": 24540389376.0,
+            "21": 24540168192.0,
             "22": 24540168192.0,
             "23": 24540168192.0,
             "24": 24540168192.0,
@@ -100,7 +100,7 @@
         "end_step": 25,
         "step_interval": 1,
         "values": {
-            "1": 52730810368.0,
+            "1": 52730814464.0,
             "2": 60518424576.0,
             "3": 60518424576.0,
             "4": 60518424576.0,
@@ -133,29 +133,29 @@
         "step_interval": 1,
         "values": {
             "1": "nan",
-            "2": 10.03336,
+            "2": 11.06832,
             "3": "nan",
-            "4": 1.18525,
+            "4": 1.16152,
             "5": "nan",
-            "6": 1.18158,
+            "6": 1.15069,
             "7": "nan",
-            "8": 1.18536,
+            "8": 1.15402,
             "9": "nan",
-            "10": 1.18428,
+            "10": 1.15412,
             "11": "nan",
-            "12": 1.18625,
+            "12": 1.15321,
             "13": "nan",
-            "14": 1.18256,
+            "14": 1.15624,
             "15": "nan",
-            "16": 1.18023,
+            "16": 1.1571,
             "17": "nan",
-            "18": 1.18227,
+            "18": 1.15577,
             "19": "nan",
-            "20": 1.18284,
+            "20": 1.15939,
             "21": "nan",
-            "22": 1.18238,
+            "22": 1.15675,
             "23": "nan",
-            "24": 1.18151,
+            "24": 1.15533,
             "25": "nan"
         }
     }
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_7b_tp4_pp1_memory_speed/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_7b_tp4_pp1_memory_speed/golden_values_dev_dgx_gb200.json
new file mode 100644
index 00000000000..5c13c9d624f
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_7b_tp4_pp1_memory_speed/golden_values_dev_dgx_gb200.json
@@ -0,0 +1,162 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 25,
+        "step_interval": 1,
+        "values": {
+            "1": 12.61164,
+            "2": 12.60596,
+            "3": 12.60278,
+            "4": 12.59692,
+            "5": 12.5956,
+            "6": 12.59777,
+            "7": 12.58051,
+            "8": 12.53845,
+            "9": 12.51222,
+            "10": 12.49859,
+            "11": 12.32384,
+            "12": 12.29418,
+            "13": 12.23141,
+            "14": 12.22824,
+            "15": 11.82221,
+            "16": 11.80412,
+            "17": 11.76119,
+            "18": 11.73708,
+            "19": 11.61309,
+            "20": 11.50147,
+            "21": 11.26475,
+            "22": 11.37638,
+            "23": 11.28398,
+            "24": 11.1565,
+            "25": 10.99865
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 25,
+        "step_interval": 1,
+        "values": {
+            "1": 523049152.0,
+            "2": 523677792.0,
+            "3": 522947712.0,
+            "4": 523241632.0,
+            "5": 523021120.0,
+            "6": 523374368.0,
+            "7": 523437888.0,
+            "8": 523083584.0,
+            "9": 523470432.0,
+            "10": 523196128.0,
+            "11": 524297728.0,
+            "12": 523455584.0,
+            "13": 523501312.0,
+            "14": 524479392.0,
+            "15": 523634048.0,
+            "16": 523462624.0,
+            "17": 523079392.0,
+            "18": 523360448.0,
+            "19": 523209952.0,
+            "20": 523228480.0,
+            "21": 524938432.0,
+            "22": 523660512.0,
+            "23": 523415872.0,
+            "24": 523485056.0,
+            "25": 525638592.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 25,
+        "step_interval": 1,
+        "values": {
+            "1": 20663463936.0,
+            "2": 20663463936.0,
+            "3": 20663463936.0,
+            "4": 20663463936.0,
+            "5": 20663463936.0,
+            "6": 20663463936.0,
+            "7": 20663463936.0,
+            "8": 20663463936.0,
+            "9": 20663463936.0,
+            "10": 20663463936.0,
+            "11": 20663463936.0,
+            "12": 20663463936.0,
+            "13": 20663463936.0,
+            "14": 20663463936.0,
+            "15": 20663463936.0,
+            "16": 20663463936.0,
+            "17": 20663463936.0,
+            "18": 20663463936.0,
+            "19": 20663463936.0,
+            "20": 20663463936.0,
+            "21": 20663463936.0,
+            "22": 20663463936.0,
+            "23": 20663463936.0,
+            "24": 20663463936.0,
+            "25": 20663463936.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 25,
+        "step_interval": 1,
+        "values": {
+            "1": 51363229696.0,
+            "2": 58217480192.0,
+            "3": 58217480192.0,
+            "4": 58217480192.0,
+            "5": 58217480192.0,
+            "6": 58217480192.0,
+            "7": 58217480192.0,
+            "8": 58217480192.0,
+            "9": 58217480192.0,
+            "10": 58217480192.0,
+            "11": 58217480192.0,
+            "12": 58217480192.0,
+            "13": 58217480192.0,
+            "14": 58217480192.0,
+            "15": 58217480192.0,
+            "16": 58217480192.0,
+            "17": 58217480192.0,
+            "18": 58217480192.0,
+            "19": 58217480192.0,
+            "20": 58217480192.0,
+            "21": 58217480192.0,
+            "22": 58217480192.0,
+            "23": 58217480192.0,
+            "24": 58217480192.0,
+            "25": 58217480192.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 25,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": 3.75605,
+            "3": "nan",
+            "4": 1.05448,
+            "5": "nan",
+            "6": 1.24087,
+            "7": "nan",
+            "8": 0.89299,
+            "9": "nan",
+            "10": 0.89376,
+            "11": "nan",
+            "12": 0.8965,
+            "13": "nan",
+            "14": 0.89831,
+            "15": "nan",
+            "16": 0.89733,
+            "17": "nan",
+            "18": 1.02538,
+            "19": "nan",
+            "20": 0.89305,
+            "21": "nan",
+            "22": 0.89255,
+            "23": "nan",
+            "24": 0.91075,
+            "25": "nan"
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_7b_tp4_pp1_memory_speed/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_7b_tp4_pp1_memory_speed/golden_values_dev_dgx_h100.json
index 0847af86737..a05cc0a0778 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_7b_tp4_pp1_memory_speed/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_7b_tp4_pp1_memory_speed/golden_values_dev_dgx_h100.json
@@ -100,7 +100,7 @@
         "end_step": 25,
         "step_interval": 1,
         "values": {
-            "1": 50289545216.0,
+            "1": 50289487872.0,
             "2": 57143791616.0,
             "3": 57143791616.0,
             "4": 57143791616.0,
@@ -133,29 +133,29 @@
         "step_interval": 1,
         "values": {
             "1": "nan",
-            "2": 6.11084,
+            "2": 5.99154,
             "3": "nan",
-            "4": 1.11678,
+            "4": 1.10664,
             "5": "nan",
-            "6": 1.11532,
+            "6": 1.10108,
             "7": "nan",
-            "8": 1.11539,
+            "8": 1.09852,
             "9": "nan",
-            "10": 1.1161,
+            "10": 1.10395,
             "11": "nan",
-            "12": 1.11723,
+            "12": 1.13133,
             "13": "nan",
-            "14": 1.11756,
+            "14": 1.1009,
             "15": "nan",
-            "16": 1.11596,
+            "16": 1.10173,
             "17": "nan",
-            "18": 1.11605,
+            "18": 1.10058,
             "19": "nan",
-            "20": 1.11783,
+            "20": 1.10006,
             "21": "nan",
-            "22": 1.11636,
+            "22": 1.10081,
             "23": "nan",
-            "24": 1.11585,
+            "24": 1.09852,
             "25": "nan"
         }
     }
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_resume_check_grads/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_resume_check_grads/golden_values_dev_dgx_h100.json
new file mode 100644
index 00000000000..d501eb20ca1
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_resume_check_grads/golden_values_dev_dgx_h100.json
@@ -0,0 +1,42 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 1,
+        "step_interval": 1,
+        "values": {
+            "1": 10.86791
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 1,
+        "step_interval": 1,
+        "values": {
+            "1": 152866448.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 1,
+        "step_interval": 1,
+        "values": {
+            "1": 67277201408.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 1,
+        "step_interval": 1,
+        "values": {
+            "1": 67277205504.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 1,
+        "step_interval": 1,
+        "values": {
+            "1": 14.45281
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_fim_dataset/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_fim_dataset/golden_values_dev_dgx_gb200.json
new file mode 100644
index 00000000000..7650494228d
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_fim_dataset/golden_values_dev_dgx_gb200.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.77536,
+            "2": 10.78444,
+            "3": 10.78593,
+            "4": 10.7484,
+            "5": 10.81554,
+            "6": 10.82691,
+            "7": 10.78469,
+            "8": 10.77764,
+            "9": 10.78351,
+            "10": 10.74241,
+            "11": 10.83031,
+            "12": 10.80335,
+            "13": 10.81653,
+            "14": 10.82186,
+            "15": 10.74223,
+            "16": 10.75087,
+            "17": 10.71888,
+            "18": 10.74308,
+            "19": 10.7407,
+            "20": 10.63713,
+            "21": 10.6277,
+            "22": 10.48435,
+            "23": 10.65701,
+            "24": 10.52682,
+            "25": 10.47546,
+            "26": 10.54091,
+            "27": 10.55554,
+            "28": 10.52147,
+            "29": 10.53465,
+            "30": 10.30892,
+            "31": 10.06663,
+            "32": 10.41746,
+            "33": 10.42487,
+            "34": 10.1739,
+            "35": 10.22475,
+            "36": 10.18282,
+            "37": 10.29689,
+            "38": 10.14801,
+            "39": 10.36934,
+            "40": 10.04004,
+            "41": 10.10752,
+            "42": 10.18198,
+            "43": 9.79649,
+            "44": 9.91071,
+            "45": 9.79715,
+            "46": 9.79411,
+            "47": 10.11365,
+            "48": 9.82516,
+            "49": 9.50416,
+            "50": 9.88698
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1625.0,
+            "2": 1666.0,
+            "3": 1695.0,
+            "4": 1746.0,
+            "5": 1977.0,
+            "6": 1839.0,
+            "7": 1894.0,
+            "8": 1665.0,
+            "9": 1929.0,
+            "10": 1436.0,
+            "11": 1794.0,
+            "12": 1845.0,
+            "13": 1976.0,
+            "14": 1931.0,
+            "15": 1971.0,
+            "16": 2095.0,
+            "17": 1805.0,
+            "18": 1764.0,
+            "19": 1753.0,
+            "20": 1693.0,
+            "21": 1872.0,
+            "22": 1669.0,
+            "23": 2113.0,
+            "24": 1589.0,
+            "25": 1679.0,
+            "26": 1667.0,
+            "27": 1779.0,
+            "28": 2025.0,
+            "29": 1940.0,
+            "30": 1885.0,
+            "31": 1623.0,
+            "32": 1978.0,
+            "33": 2203.0,
+            "34": 1947.0,
+            "35": 2040.0,
+            "36": 2002.0,
+            "37": 2346.0,
+            "38": 2100.0,
+            "39": 2479.0,
+            "40": 2258.0,
+            "41": 2347.0,
+            "42": 2331.0,
+            "43": 2125.0,
+            "44": 2126.0,
+            "45": 2130.0,
+            "46": 2342.0,
+            "47": 2550.0,
+            "48": 2401.0,
+            "49": 2216.0,
+            "50": 2456.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 581489664.0,
+            "2": 581489664.0,
+            "3": 581489664.0,
+            "4": 581489664.0,
+            "5": 581489664.0,
+            "6": 581489664.0,
+            "7": 581489664.0,
+            "8": 581489664.0,
+            "9": 581489664.0,
+            "10": 581489664.0,
+            "11": 581489664.0,
+            "12": 581489664.0,
+            "13": 581489664.0,
+            "14": 581489664.0,
+            "15": 581489664.0,
+            "16": 581489664.0,
+            "17": 581489664.0,
+            "18": 581489664.0,
+            "19": 581489664.0,
+            "20": 581489664.0,
+            "21": 581489664.0,
+            "22": 581489664.0,
+            "23": 581489664.0,
+            "24": 581489664.0,
+            "25": 581489664.0,
+            "26": 581489664.0,
+            "27": 581489664.0,
+            "28": 581489664.0,
+            "29": 581489664.0,
+            "30": 581489664.0,
+            "31": 581489664.0,
+            "32": 581489664.0,
+            "33": 581489664.0,
+            "34": 581489664.0,
+            "35": 581489664.0,
+            "36": 581489664.0,
+            "37": 581489664.0,
+            "38": 581489664.0,
+            "39": 581489664.0,
+            "40": 581489664.0,
+            "41": 581489664.0,
+            "42": 581489664.0,
+            "43": 581489664.0,
+            "44": 581489664.0,
+            "45": 581489664.0,
+            "46": 581489664.0,
+            "47": 581489664.0,
+            "48": 581489664.0,
+            "49": 581489664.0,
+            "50": 581489664.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 4605814272.0,
+            "2": 4702430720.0,
+            "3": 4702430720.0,
+            "4": 4702430720.0,
+            "5": 4702430720.0,
+            "6": 4702430720.0,
+            "7": 4702430720.0,
+            "8": 4702430720.0,
+            "9": 4702430720.0,
+            "10": 4702430720.0,
+            "11": 4702430720.0,
+            "12": 4702430720.0,
+            "13": 4702430720.0,
+            "14": 4702430720.0,
+            "15": 4702430720.0,
+            "16": 4702430720.0,
+            "17": 4702430720.0,
+            "18": 4702430720.0,
+            "19": 4702430720.0,
+            "20": 4702430720.0,
+            "21": 4702430720.0,
+            "22": 4702430720.0,
+            "23": 4702430720.0,
+            "24": 4702430720.0,
+            "25": 4702430720.0,
+            "26": 4702430720.0,
+            "27": 4702430720.0,
+            "28": 4702430720.0,
+            "29": 4702430720.0,
+            "30": 4702430720.0,
+            "31": 4702430720.0,
+            "32": 4702430720.0,
+            "33": 4702430720.0,
+            "34": 4702430720.0,
+            "35": 4702430720.0,
+            "36": 4702430720.0,
+            "37": 4702430720.0,
+            "38": 4702430720.0,
+            "39": 4702430720.0,
+            "40": 4702430720.0,
+            "41": 4702430720.0,
+            "42": 4702430720.0,
+            "43": 4702430720.0,
+            "44": 4702430720.0,
+            "45": 4702430720.0,
+            "46": 4702430720.0,
+            "47": 4702430720.0,
+            "48": 4702430720.0,
+            "49": 4702430720.0,
+            "50": 4702430720.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 5.51862,
+            "2": 0.11775,
+            "3": 0.11561,
+            "4": 0.1042,
+            "5": 0.10208,
+            "6": 0.09598,
+            "7": 0.09542,
+            "8": 0.095,
+            "9": 0.09404,
+            "10": 0.09596,
+            "11": 0.09825,
+            "12": 0.09507,
+            "13": 0.0943,
+            "14": 0.09595,
+            "15": 0.09454,
+            "16": 0.09354,
+            "17": 0.09423,
+            "18": 0.09638,
+            "19": 0.09698,
+            "20": 0.09656,
+            "21": 0.09629,
+            "22": 0.09731,
+            "23": 0.09913,
+            "24": 0.09535,
+            "25": 0.09314,
+            "26": 0.09324,
+            "27": 0.09374,
+            "28": 0.0992,
+            "29": 0.09647,
+            "30": 0.11416,
+            "31": 0.09524,
+            "32": 0.09418,
+            "33": 0.09544,
+            "34": 0.09428,
+            "35": 0.09432,
+            "36": 0.09584,
+            "37": 0.096,
+            "38": 0.09539,
+            "39": 0.09482,
+            "40": 0.09568,
+            "41": 0.09682,
+            "42": 0.0964,
+            "43": 0.09675,
+            "44": 0.09583,
+            "45": 0.09482,
+            "46": 0.09426,
+            "47": 0.09537,
+            "48": 0.09383,
+            "49": 0.09397,
+            "50": 0.09592
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_fim_dataset/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_fim_dataset/golden_values_dev_dgx_h100.json
index cd90888e65d..036b53dabb1 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_fim_dataset/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_fim_dataset/golden_values_dev_dgx_h100.json
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 6.95394,
-            "2": 0.0878,
-            "3": 0.06953,
-            "4": 0.07916,
-            "5": 0.06775,
-            "6": 0.07681,
-            "7": 0.06695,
-            "8": 0.0786,
-            "9": 0.0664,
-            "10": 0.08059,
-            "11": 0.06554,
-            "12": 0.07501,
-            "13": 0.06663,
-            "14": 0.06608,
-            "15": 0.06585,
-            "16": 0.06738,
-            "17": 0.067,
-            "18": 0.06553,
-            "19": 0.06755,
-            "20": 0.06723,
-            "21": 0.06559,
-            "22": 0.0664,
-            "23": 0.06722,
-            "24": 0.06553,
-            "25": 0.06829,
-            "26": 0.06873,
-            "27": 0.06733,
-            "28": 0.06731,
-            "29": 0.06824,
-            "30": 0.06696,
-            "31": 0.06661,
-            "32": 0.06587,
-            "33": 0.06588,
-            "34": 0.06564,
-            "35": 0.06761,
-            "36": 0.06655,
-            "37": 0.06712,
-            "38": 0.06601,
-            "39": 0.06661,
-            "40": 0.06632,
-            "41": 0.0691,
-            "42": 0.06551,
-            "43": 0.06839,
-            "44": 0.06528,
-            "45": 0.06744,
-            "46": 0.0675,
-            "47": 0.06698,
-            "48": 0.0649,
-            "49": 0.06596,
-            "50": 0.06581
+            "1": 6.80579,
+            "2": 0.08104,
+            "3": 0.07547,
+            "4": 0.05731,
+            "5": 0.06226,
+            "6": 0.05988,
+            "7": 0.06566,
+            "8": 0.06635,
+            "9": 0.06593,
+            "10": 0.06639,
+            "11": 0.06591,
+            "12": 0.06568,
+            "13": 0.06504,
+            "14": 0.06232,
+            "15": 0.06162,
+            "16": 0.05614,
+            "17": 0.06083,
+            "18": 0.05789,
+            "19": 0.05867,
+            "20": 0.05574,
+            "21": 0.06043,
+            "22": 0.05778,
+            "23": 0.06166,
+            "24": 0.05671,
+            "25": 0.05765,
+            "26": 0.05638,
+            "27": 0.05601,
+            "28": 0.05637,
+            "29": 0.05497,
+            "30": 0.05757,
+            "31": 0.05556,
+            "32": 0.05715,
+            "33": 0.05761,
+            "34": 0.05779,
+            "35": 0.05996,
+            "36": 0.05761,
+            "37": 0.06454,
+            "38": 0.0575,
+            "39": 0.05802,
+            "40": 0.05752,
+            "41": 0.05904,
+            "42": 0.05622,
+            "43": 0.0555,
+            "44": 0.05785,
+            "45": 0.0578,
+            "46": 0.05758,
+            "47": 0.05729,
+            "48": 0.05652,
+            "49": 0.05619,
+            "50": 0.05705
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_dev_dgx_gb200.json
new file mode 100644
index 00000000000..0405b9dc312
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_dev_dgx_gb200.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.77472,
+            "2": 10.7834,
+            "3": 10.783,
+            "4": 10.74952,
+            "5": 10.8207,
+            "6": 10.8234,
+            "7": 10.79076,
+            "8": 10.78002,
+            "9": 10.78621,
+            "10": 10.74365,
+            "11": 10.8322,
+            "12": 10.80441,
+            "13": 10.8213,
+            "14": 10.82574,
+            "15": 10.74146,
+            "16": 10.75035,
+            "17": 10.72535,
+            "18": 10.74231,
+            "19": 10.7445,
+            "20": 10.63706,
+            "21": 10.63104,
+            "22": 10.48032,
+            "23": 10.65993,
+            "24": 10.5253,
+            "25": 10.47539,
+            "26": 10.54133,
+            "27": 10.5547,
+            "28": 10.521,
+            "29": 10.53614,
+            "30": 10.30519,
+            "31": 10.06487,
+            "32": 10.41559,
+            "33": 10.42241,
+            "34": 10.1741,
+            "35": 10.22337,
+            "36": 10.18522,
+            "37": 10.30398,
+            "38": 10.14967,
+            "39": 10.37031,
+            "40": 10.04015,
+            "41": 10.10913,
+            "42": 10.17951,
+            "43": 9.79734,
+            "44": 9.90801,
+            "45": 9.79837,
+            "46": 9.79661,
+            "47": 10.12063,
+            "48": 9.82076,
+            "49": 9.50507,
+            "50": 9.88047
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1603.0,
+            "2": 1689.0,
+            "3": 1616.0,
+            "4": 1774.0,
+            "5": 2059.0,
+            "6": 1983.0,
+            "7": 2102.0,
+            "8": 1640.0,
+            "9": 1877.0,
+            "10": 1435.0,
+            "11": 1981.0,
+            "12": 1898.0,
+            "13": 1949.0,
+            "14": 1797.0,
+            "15": 1923.0,
+            "16": 1993.0,
+            "17": 1804.0,
+            "18": 1793.0,
+            "19": 1808.0,
+            "20": 1658.0,
+            "21": 1881.0,
+            "22": 1744.0,
+            "23": 2029.0,
+            "24": 1621.0,
+            "25": 1550.0,
+            "26": 1686.0,
+            "27": 1794.0,
+            "28": 1927.0,
+            "29": 1974.0,
+            "30": 1884.0,
+            "31": 1610.0,
+            "32": 1934.0,
+            "33": 2098.0,
+            "34": 1840.0,
+            "35": 2033.0,
+            "36": 2052.0,
+            "37": 2302.0,
+            "38": 2119.0,
+            "39": 2421.0,
+            "40": 2242.0,
+            "41": 2339.0,
+            "42": 2362.0,
+            "43": 2065.0,
+            "44": 2186.0,
+            "45": 2266.0,
+            "46": 2378.0,
+            "47": 2504.0,
+            "48": 2503.0,
+            "49": 2303.0,
+            "50": 2494.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 581489664.0,
+            "2": 581489664.0,
+            "3": 581489664.0,
+            "4": 581489664.0,
+            "5": 581489664.0,
+            "6": 581489664.0,
+            "7": 581489664.0,
+            "8": 581489664.0,
+            "9": 581489664.0,
+            "10": 581489664.0,
+            "11": 581489664.0,
+            "12": 581489664.0,
+            "13": 581489664.0,
+            "14": 581489664.0,
+            "15": 581489664.0,
+            "16": 581489664.0,
+            "17": 581489664.0,
+            "18": 581489664.0,
+            "19": 581489664.0,
+            "20": 581489664.0,
+            "21": 581489664.0,
+            "22": 581489664.0,
+            "23": 581489664.0,
+            "24": 581489664.0,
+            "25": 581489664.0,
+            "26": 581489664.0,
+            "27": 581489664.0,
+            "28": 581489664.0,
+            "29": 581489664.0,
+            "30": 581489664.0,
+            "31": 581489664.0,
+            "32": 581489664.0,
+            "33": 581489664.0,
+            "34": 581489664.0,
+            "35": 581489664.0,
+            "36": 581489664.0,
+            "37": 581489664.0,
+            "38": 581489664.0,
+            "39": 581489664.0,
+            "40": 581489664.0,
+            "41": 581489664.0,
+            "42": 581489664.0,
+            "43": 581489664.0,
+            "44": 581489664.0,
+            "45": 581489664.0,
+            "46": 581489664.0,
+            "47": 581489664.0,
+            "48": 581489664.0,
+            "49": 581489664.0,
+            "50": 581489664.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 4605814272.0,
+            "2": 4702430720.0,
+            "3": 4702430720.0,
+            "4": 4702430720.0,
+            "5": 4702430720.0,
+            "6": 4702430720.0,
+            "7": 4702430720.0,
+            "8": 4702430720.0,
+            "9": 4702430720.0,
+            "10": 4702430720.0,
+            "11": 4702430720.0,
+            "12": 4702430720.0,
+            "13": 4702430720.0,
+            "14": 4702430720.0,
+            "15": 4702430720.0,
+            "16": 4702430720.0,
+            "17": 4702430720.0,
+            "18": 4702430720.0,
+            "19": 4702430720.0,
+            "20": 4702430720.0,
+            "21": 4702430720.0,
+            "22": 4702430720.0,
+            "23": 4702430720.0,
+            "24": 4702430720.0,
+            "25": 4702430720.0,
+            "26": 4702430720.0,
+            "27": 4702430720.0,
+            "28": 4702430720.0,
+            "29": 4702430720.0,
+            "30": 4702430720.0,
+            "31": 4702430720.0,
+            "32": 4702430720.0,
+            "33": 4702430720.0,
+            "34": 4702430720.0,
+            "35": 4702430720.0,
+            "36": 4702430720.0,
+            "37": 4702430720.0,
+            "38": 4702430720.0,
+            "39": 4702430720.0,
+            "40": 4702430720.0,
+            "41": 4702430720.0,
+            "42": 4702430720.0,
+            "43": 4702430720.0,
+            "44": 4702430720.0,
+            "45": 4702430720.0,
+            "46": 4702430720.0,
+            "47": 4702430720.0,
+            "48": 4702430720.0,
+            "49": 4702430720.0,
+            "50": 4702430720.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 5.87663,
+            "2": 0.11967,
+            "3": 0.10376,
+            "4": 0.09966,
+            "5": 0.0967,
+            "6": 0.09666,
+            "7": 0.09702,
+            "8": 0.09962,
+            "9": 0.10053,
+            "10": 0.10019,
+            "11": 0.09818,
+            "12": 0.37487,
+            "13": 0.10166,
+            "14": 0.10015,
+            "15": 0.10189,
+            "16": 0.09883,
+            "17": 0.10229,
+            "18": 0.09859,
+            "19": 0.09957,
+            "20": 0.09987,
+            "21": 0.09747,
+            "22": 0.09678,
+            "23": 0.09865,
+            "24": 0.09988,
+            "25": 0.11712,
+            "26": 0.11559,
+            "27": 0.11626,
+            "28": 0.11634,
+            "29": 0.11701,
+            "30": 0.13544,
+            "31": 0.13258,
+            "32": 0.12643,
+            "33": 0.12858,
+            "34": 0.18682,
+            "35": 0.12702,
+            "36": 0.09639,
+            "37": 0.09478,
+            "38": 0.09349,
+            "39": 0.09417,
+            "40": 0.09272,
+            "41": 0.09563,
+            "42": 0.09369,
+            "43": 0.09427,
+            "44": 0.09501,
+            "45": 0.09141,
+            "46": 0.09367,
+            "47": 0.0929,
+            "48": 0.09322,
+            "49": 0.09223,
+            "50": 0.0936
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_dev_dgx_h100.json
index db410897813..5718cc22850 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_dev_dgx_h100.json
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 581488640.0,
-            "2": 581488640.0,
-            "3": 581488640.0,
-            "4": 581488640.0,
-            "5": 581488640.0,
-            "6": 581488640.0,
-            "7": 581488640.0,
-            "8": 581488640.0,
-            "9": 581488640.0,
-            "10": 581488640.0,
-            "11": 581488640.0,
-            "12": 581488640.0,
-            "13": 581488640.0,
-            "14": 581488640.0,
-            "15": 581488640.0,
-            "16": 581488640.0,
-            "17": 581488640.0,
-            "18": 581488640.0,
-            "19": 581488640.0,
-            "20": 581488640.0,
-            "21": 581488640.0,
-            "22": 581488640.0,
-            "23": 581488640.0,
-            "24": 581488640.0,
-            "25": 581488640.0,
-            "26": 581488640.0,
-            "27": 581488640.0,
-            "28": 581488640.0,
-            "29": 581488640.0,
-            "30": 581488640.0,
-            "31": 581488640.0,
-            "32": 581488640.0,
-            "33": 581488640.0,
-            "34": 581488640.0,
-            "35": 581488640.0,
-            "36": 581488640.0,
-            "37": 581488640.0,
-            "38": 581488640.0,
-            "39": 581488640.0,
-            "40": 581488640.0,
-            "41": 581488640.0,
-            "42": 581488640.0,
-            "43": 581488640.0,
-            "44": 581488640.0,
-            "45": 581488640.0,
-            "46": 581488640.0,
-            "47": 581488640.0,
-            "48": 581488640.0,
-            "49": 581488640.0,
-            "50": 581488640.0
+            "1": 581489664.0,
+            "2": 581489664.0,
+            "3": 581489664.0,
+            "4": 581489664.0,
+            "5": 581489664.0,
+            "6": 581489664.0,
+            "7": 581489664.0,
+            "8": 581489664.0,
+            "9": 581489664.0,
+            "10": 581489664.0,
+            "11": 581489664.0,
+            "12": 581489664.0,
+            "13": 581489664.0,
+            "14": 581489664.0,
+            "15": 581489664.0,
+            "16": 581489664.0,
+            "17": 581489664.0,
+            "18": 581489664.0,
+            "19": 581489664.0,
+            "20": 581489664.0,
+            "21": 581489664.0,
+            "22": 581489664.0,
+            "23": 581489664.0,
+            "24": 581489664.0,
+            "25": 581489664.0,
+            "26": 581489664.0,
+            "27": 581489664.0,
+            "28": 581489664.0,
+            "29": 581489664.0,
+            "30": 581489664.0,
+            "31": 581489664.0,
+            "32": 581489664.0,
+            "33": 581489664.0,
+            "34": 581489664.0,
+            "35": 581489664.0,
+            "36": 581489664.0,
+            "37": 581489664.0,
+            "38": 581489664.0,
+            "39": 581489664.0,
+            "40": 581489664.0,
+            "41": 581489664.0,
+            "42": 581489664.0,
+            "43": 581489664.0,
+            "44": 581489664.0,
+            "45": 581489664.0,
+            "46": 581489664.0,
+            "47": 581489664.0,
+            "48": 581489664.0,
+            "49": 581489664.0,
+            "50": 581489664.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 4605813248.0,
-            "2": 4702429696.0,
-            "3": 4702429696.0,
-            "4": 4702429696.0,
-            "5": 4702429696.0,
-            "6": 4702429696.0,
-            "7": 4702429696.0,
-            "8": 4702429696.0,
-            "9": 4702429696.0,
-            "10": 4702429696.0,
-            "11": 4702429696.0,
-            "12": 4702429696.0,
-            "13": 4702429696.0,
-            "14": 4702429696.0,
-            "15": 4702429696.0,
-            "16": 4702429696.0,
-            "17": 4702429696.0,
-            "18": 4702429696.0,
-            "19": 4702429696.0,
-            "20": 4702429696.0,
-            "21": 4702429696.0,
-            "22": 4702429696.0,
-            "23": 4702429696.0,
-            "24": 4702429696.0,
-            "25": 4702429696.0,
-            "26": 4702429696.0,
-            "27": 4702429696.0,
-            "28": 4702429696.0,
-            "29": 4702429696.0,
-            "30": 4702429696.0,
-            "31": 4702429696.0,
-            "32": 4702429696.0,
-            "33": 4702429696.0,
-            "34": 4702429696.0,
-            "35": 4702429696.0,
-            "36": 4702429696.0,
-            "37": 4702429696.0,
-            "38": 4702429696.0,
-            "39": 4702429696.0,
-            "40": 4702429696.0,
-            "41": 4702429696.0,
-            "42": 4702429696.0,
-            "43": 4702429696.0,
-            "44": 4702429696.0,
-            "45": 4702429696.0,
-            "46": 4702429696.0,
-            "47": 4702429696.0,
-            "48": 4702429696.0,
-            "49": 4702429696.0,
-            "50": 4702429696.0
+            "1": 4605814272.0,
+            "2": 4702430720.0,
+            "3": 4702430720.0,
+            "4": 4702430720.0,
+            "5": 4702430720.0,
+            "6": 4702430720.0,
+            "7": 4702430720.0,
+            "8": 4702430720.0,
+            "9": 4702430720.0,
+            "10": 4702430720.0,
+            "11": 4702430720.0,
+            "12": 4702430720.0,
+            "13": 4702430720.0,
+            "14": 4702430720.0,
+            "15": 4702430720.0,
+            "16": 4702430720.0,
+            "17": 4702430720.0,
+            "18": 4702430720.0,
+            "19": 4702430720.0,
+            "20": 4702430720.0,
+            "21": 4702430720.0,
+            "22": 4702430720.0,
+            "23": 4702430720.0,
+            "24": 4702430720.0,
+            "25": 4702430720.0,
+            "26": 4702430720.0,
+            "27": 4702430720.0,
+            "28": 4702430720.0,
+            "29": 4702430720.0,
+            "30": 4702430720.0,
+            "31": 4702430720.0,
+            "32": 4702430720.0,
+            "33": 4702430720.0,
+            "34": 4702430720.0,
+            "35": 4702430720.0,
+            "36": 4702430720.0,
+            "37": 4702430720.0,
+            "38": 4702430720.0,
+            "39": 4702430720.0,
+            "40": 4702430720.0,
+            "41": 4702430720.0,
+            "42": 4702430720.0,
+            "43": 4702430720.0,
+            "44": 4702430720.0,
+            "45": 4702430720.0,
+            "46": 4702430720.0,
+            "47": 4702430720.0,
+            "48": 4702430720.0,
+            "49": 4702430720.0,
+            "50": 4702430720.0
         }
     },
     "iteration-time": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 6.7331,
-            "2": 0.09599,
-            "3": 0.08799,
-            "4": 0.08582,
-            "5": 0.08478,
-            "6": 0.08513,
-            "7": 0.07688,
-            "8": 0.07429,
-            "9": 0.07778,
-            "10": 0.07515,
-            "11": 0.07987,
-            "12": 0.07525,
-            "13": 0.07727,
-            "14": 0.07535,
-            "15": 0.07896,
-            "16": 0.07509,
-            "17": 0.07751,
-            "18": 0.076,
-            "19": 0.07647,
-            "20": 0.07502,
-            "21": 0.07467,
-            "22": 0.07544,
-            "23": 0.0742,
-            "24": 0.07536,
-            "25": 0.07588,
-            "26": 0.07381,
-            "27": 0.07407,
-            "28": 0.075,
-            "29": 0.07424,
-            "30": 0.07454,
-            "31": 0.07482,
-            "32": 0.07526,
-            "33": 0.07493,
-            "34": 0.07437,
-            "35": 0.07447,
-            "36": 0.07482,
-            "37": 0.07454,
-            "38": 0.07501,
-            "39": 0.07495,
-            "40": 0.07481,
-            "41": 0.07433,
-            "42": 0.07467,
-            "43": 0.0754,
-            "44": 0.07543,
-            "45": 0.07498,
-            "46": 0.07457,
-            "47": 0.07378,
-            "48": 0.07477,
-            "49": 0.07465,
-            "50": 0.07444
+            "1": 8.63401,
+            "2": 0.09023,
+            "3": 0.07348,
+            "4": 0.05746,
+            "5": 0.05663,
+            "6": 0.05755,
+            "7": 0.0574,
+            "8": 0.05838,
+            "9": 0.05585,
+            "10": 0.05739,
+            "11": 0.05576,
+            "12": 0.0561,
+            "13": 0.05582,
+            "14": 0.05815,
+            "15": 0.05615,
+            "16": 0.05649,
+            "17": 0.05732,
+            "18": 0.05614,
+            "19": 0.05614,
+            "20": 0.0565,
+            "21": 0.05624,
+            "22": 0.05712,
+            "23": 0.05601,
+            "24": 0.05772,
+            "25": 0.05612,
+            "26": 0.05714,
+            "27": 0.05571,
+            "28": 0.05803,
+            "29": 0.0562,
+            "30": 0.05628,
+            "31": 0.05602,
+            "32": 0.05667,
+            "33": 0.05631,
+            "34": 0.05631,
+            "35": 0.05623,
+            "36": 0.0565,
+            "37": 0.05737,
+            "38": 0.05733,
+            "39": 0.05988,
+            "40": 0.05739,
+            "41": 0.05719,
+            "42": 0.05699,
+            "43": 0.05608,
+            "44": 0.05867,
+            "45": 0.05838,
+            "46": 0.05842,
+            "47": 0.05635,
+            "48": 0.05732,
+            "49": 0.0569,
+            "50": 0.05736
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_lts_dgx_a100.json
index dd30f7144c7..5e28e46bf28 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_lts_dgx_a100.json
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 4.57734,
-            "2": 0.12447,
-            "3": 0.1105,
-            "4": 0.11652,
-            "5": 0.11171,
-            "6": 0.10268,
-            "7": 0.0964,
-            "8": 0.09397,
-            "9": 0.09475,
-            "10": 0.09372,
-            "11": 0.09325,
-            "12": 0.09309,
-            "13": 0.09305,
-            "14": 0.09354,
-            "15": 0.09324,
-            "16": 0.09342,
-            "17": 0.09327,
-            "18": 0.09347,
-            "19": 0.09283,
-            "20": 0.09308,
-            "21": 0.09266,
-            "22": 0.09487,
-            "23": 0.09318,
-            "24": 0.09338,
-            "25": 0.09306,
-            "26": 0.09374,
-            "27": 0.09386,
-            "28": 0.09412,
-            "29": 0.09395,
-            "30": 0.09393,
-            "31": 0.09439,
-            "32": 0.09481,
-            "33": 0.09338,
-            "34": 0.09466,
-            "35": 0.0936,
-            "36": 0.09463,
-            "37": 0.09316,
-            "38": 0.09572,
-            "39": 0.09295,
-            "40": 0.09592,
-            "41": 0.09322,
-            "42": 0.09468,
-            "43": 0.09488,
-            "44": 0.09323,
-            "45": 0.09265,
-            "46": 0.09574,
-            "47": 0.09267,
-            "48": 0.09592,
-            "49": 0.09356,
-            "50": 0.09502
+            "1": 3.16333,
+            "2": 0.12429,
+            "3": 0.10327,
+            "4": 0.09373,
+            "5": 0.09355,
+            "6": 0.0921,
+            "7": 0.09247,
+            "8": 0.09175,
+            "9": 0.08988,
+            "10": 0.09206,
+            "11": 0.0907,
+            "12": 0.09062,
+            "13": 0.09067,
+            "14": 0.09178,
+            "15": 0.09006,
+            "16": 0.09058,
+            "17": 0.09113,
+            "18": 0.08975,
+            "19": 0.08958,
+            "20": 0.08974,
+            "21": 0.0895,
+            "22": 0.08967,
+            "23": 0.08965,
+            "24": 0.08985,
+            "25": 0.08964,
+            "26": 0.09069,
+            "27": 0.08964,
+            "28": 0.08972,
+            "29": 0.08977,
+            "30": 0.08994,
+            "31": 0.0898,
+            "32": 0.08953,
+            "33": 0.09044,
+            "34": 0.09062,
+            "35": 0.09102,
+            "36": 0.09102,
+            "37": 0.09125,
+            "38": 0.09035,
+            "39": 0.09141,
+            "40": 0.09069,
+            "41": 0.0916,
+            "42": 0.09094,
+            "43": 0.09103,
+            "44": 0.09176,
+            "45": 0.09169,
+            "46": 0.09186,
+            "47": 0.09119,
+            "48": 0.09112,
+            "49": 0.09072,
+            "50": 0.09246
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_gb200.json
new file mode 100644
index 00000000000..b280d123468
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_gb200.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.77447,
+            "2": 10.78365,
+            "3": 10.78346,
+            "4": 10.74822,
+            "5": 10.81983,
+            "6": 10.82303,
+            "7": 10.79055,
+            "8": 10.77956,
+            "9": 10.78595,
+            "10": 10.74453,
+            "11": 10.83267,
+            "12": 10.80426,
+            "13": 10.82082,
+            "14": 10.82567,
+            "15": 10.74206,
+            "16": 10.74904,
+            "17": 10.7252,
+            "18": 10.74176,
+            "19": 10.74412,
+            "20": 10.63678,
+            "21": 10.63055,
+            "22": 10.47962,
+            "23": 10.65976,
+            "24": 10.52477,
+            "25": 10.47552,
+            "26": 10.54117,
+            "27": 10.55491,
+            "28": 10.52139,
+            "29": 10.536,
+            "30": 10.3053,
+            "31": 10.0644,
+            "32": 10.41569,
+            "33": 10.42199,
+            "34": 10.17393,
+            "35": 10.22403,
+            "36": 10.18498,
+            "37": 10.30417,
+            "38": 10.14995,
+            "39": 10.37042,
+            "40": 10.03994,
+            "41": 10.10953,
+            "42": 10.17937,
+            "43": 9.79747,
+            "44": 9.90812,
+            "45": 9.79809,
+            "46": 9.7966,
+            "47": 10.12109,
+            "48": 9.82083,
+            "49": 9.50495,
+            "50": 9.88025,
+            "51": 9.83614,
+            "52": 9.72315,
+            "53": 10.05318,
+            "54": 9.93747,
+            "55": 9.87384,
+            "56": 9.60449,
+            "57": 9.4523,
+            "58": 9.8188,
+            "59": 9.5772,
+            "60": 9.48534,
+            "61": 9.68548,
+            "62": 9.97906,
+            "63": 9.36419,
+            "64": 9.76203,
+            "65": 8.94097,
+            "66": 9.69475,
+            "67": 9.36656,
+            "68": 9.77745,
+            "69": 9.79001,
+            "70": 9.72374,
+            "71": 9.62037,
+            "72": 9.57423,
+            "73": 9.48575,
+            "74": 8.92729,
+            "75": 9.41651,
+            "76": 9.07747,
+            "77": 10.05444,
+            "78": 9.71914,
+            "79": 9.37306,
+            "80": 9.40003,
+            "81": 9.47844,
+            "82": 9.69867,
+            "83": 9.31155,
+            "84": 9.41457,
+            "85": 9.61163,
+            "86": 9.07418,
+            "87": 9.5939,
+            "88": 9.74928,
+            "89": 9.5985,
+            "90": 9.82761,
+            "91": 9.33631,
+            "92": 9.35805,
+            "93": 9.08552,
+            "94": 8.82786,
+            "95": 9.5303,
+            "96": 9.52663,
+            "97": 9.30483,
+            "98": 9.67007,
+            "99": 8.89606,
+            "100": 9.40702
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1531.0,
+            "2": 1722.0,
+            "3": 1589.0,
+            "4": 1870.0,
+            "5": 1992.0,
+            "6": 1894.0,
+            "7": 1954.0,
+            "8": 1697.0,
+            "9": 1855.0,
+            "10": 1477.0,
+            "11": 1889.0,
+            "12": 1848.0,
+            "13": 1973.0,
+            "14": 1877.0,
+            "15": 2015.0,
+            "16": 1943.0,
+            "17": 1772.0,
+            "18": 1764.0,
+            "19": 1782.0,
+            "20": 1678.0,
+            "21": 1906.0,
+            "22": 1738.0,
+            "23": 2057.0,
+            "24": 1597.0,
+            "25": 1567.0,
+            "26": 1762.0,
+            "27": 1932.0,
+            "28": 1987.0,
+            "29": 1936.0,
+            "30": 1965.0,
+            "31": 1554.0,
+            "32": 1846.0,
+            "33": 2148.0,
+            "34": 1872.0,
+            "35": 1985.0,
+            "36": 1906.0,
+            "37": 2245.0,
+            "38": 2119.0,
+            "39": 2495.0,
+            "40": 2274.0,
+            "41": 2236.0,
+            "42": 2318.0,
+            "43": 2068.0,
+            "44": 2120.0,
+            "45": 2265.0,
+            "46": 2447.0,
+            "47": 2584.0,
+            "48": 2296.0,
+            "49": 2252.0,
+            "50": 2568.0,
+            "51": 2650.0,
+            "52": 2700.0,
+            "53": 2863.0,
+            "54": 2676.0,
+            "55": 2390.0,
+            "56": 2753.0,
+            "57": 2430.0,
+            "58": 2919.0,
+            "59": 2831.0,
+            "60": 2428.0,
+            "61": 2932.0,
+            "62": 2724.0,
+            "63": 2579.0,
+            "64": 2987.0,
+            "65": 2506.0,
+            "66": 2886.0,
+            "67": 2871.0,
+            "68": 2870.0,
+            "69": 3001.0,
+            "70": 3294.0,
+            "71": 3043.0,
+            "72": 2614.0,
+            "73": 3054.0,
+            "74": 2024.0,
+            "75": 2507.0,
+            "76": 3020.0,
+            "77": 3253.0,
+            "78": 3230.0,
+            "79": 3210.0,
+            "80": 3252.0,
+            "81": 3614.0,
+            "82": 3395.0,
+            "83": 2919.0,
+            "84": 3296.0,
+            "85": 3320.0,
+            "86": 2865.0,
+            "87": 3931.0,
+            "88": 3240.0,
+            "89": 3428.0,
+            "90": 3127.0,
+            "91": 2815.0,
+            "92": 3098.0,
+            "93": 2796.0,
+            "94": 3324.0,
+            "95": 3428.0,
+            "96": 3541.0,
+            "97": 3216.0,
+            "98": 3705.0,
+            "99": 3184.0,
+            "100": 3073.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 581489664.0,
+            "2": 581489664.0,
+            "3": 581489664.0,
+            "4": 581489664.0,
+            "5": 581489664.0,
+            "6": 581489664.0,
+            "7": 581489664.0,
+            "8": 581489664.0,
+            "9": 581489664.0,
+            "10": 581489664.0,
+            "11": 581489664.0,
+            "12": 581489664.0,
+            "13": 581489664.0,
+            "14": 581489664.0,
+            "15": 581489664.0,
+            "16": 581489664.0,
+            "17": 581489664.0,
+            "18": 581489664.0,
+            "19": 581489664.0,
+            "20": 581489664.0,
+            "21": 581489664.0,
+            "22": 581489664.0,
+            "23": 581489664.0,
+            "24": 581489664.0,
+            "25": 581489664.0,
+            "26": 581489664.0,
+            "27": 581489664.0,
+            "28": 581489664.0,
+            "29": 581489664.0,
+            "30": 581489664.0,
+            "31": 581489664.0,
+            "32": 581489664.0,
+            "33": 581489664.0,
+            "34": 581489664.0,
+            "35": 581489664.0,
+            "36": 581489664.0,
+            "37": 581489664.0,
+            "38": 581489664.0,
+            "39": 581489664.0,
+            "40": 581489664.0,
+            "41": 581489664.0,
+            "42": 581489664.0,
+            "43": 581489664.0,
+            "44": 581489664.0,
+            "45": 581489664.0,
+            "46": 581489664.0,
+            "47": 581489664.0,
+            "48": 581489664.0,
+            "49": 581489664.0,
+            "50": 581489664.0,
+            "51": 581489664.0,
+            "52": 581489664.0,
+            "53": 581489664.0,
+            "54": 581489664.0,
+            "55": 581489664.0,
+            "56": 581489664.0,
+            "57": 581489664.0,
+            "58": 581489664.0,
+            "59": 581489664.0,
+            "60": 581489664.0,
+            "61": 581489664.0,
+            "62": 581489664.0,
+            "63": 581489664.0,
+            "64": 581489664.0,
+            "65": 581489664.0,
+            "66": 581489664.0,
+            "67": 581489664.0,
+            "68": 581489664.0,
+            "69": 581489664.0,
+            "70": 581489664.0,
+            "71": 581489664.0,
+            "72": 581489664.0,
+            "73": 581489664.0,
+            "74": 581489664.0,
+            "75": 581489664.0,
+            "76": 581489664.0,
+            "77": 581489664.0,
+            "78": 581489664.0,
+            "79": 581489664.0,
+            "80": 581489664.0,
+            "81": 581489664.0,
+            "82": 581489664.0,
+            "83": 581489664.0,
+            "84": 581489664.0,
+            "85": 581489664.0,
+            "86": 581489664.0,
+            "87": 581489664.0,
+            "88": 581489664.0,
+            "89": 581489664.0,
+            "90": 581489664.0,
+            "91": 581489664.0,
+            "92": 581489664.0,
+            "93": 581489664.0,
+            "94": 581489664.0,
+            "95": 581489664.0,
+            "96": 581489664.0,
+            "97": 581489664.0,
+            "98": 581489664.0,
+            "99": 581489664.0,
+            "100": 581489664.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2644459008.0,
+            "2": 2741075456.0,
+            "3": 2741075456.0,
+            "4": 2741075456.0,
+            "5": 2741075456.0,
+            "6": 2741075456.0,
+            "7": 2741075456.0,
+            "8": 2741075456.0,
+            "9": 2741075456.0,
+            "10": 2741075456.0,
+            "11": 2741075456.0,
+            "12": 2741075456.0,
+            "13": 2741075456.0,
+            "14": 2741075456.0,
+            "15": 2741075456.0,
+            "16": 2741075456.0,
+            "17": 2741075456.0,
+            "18": 2741075456.0,
+            "19": 2741075456.0,
+            "20": 2741075456.0,
+            "21": 2741075456.0,
+            "22": 2741075456.0,
+            "23": 2741075456.0,
+            "24": 2741075456.0,
+            "25": 2741075456.0,
+            "26": 2741075456.0,
+            "27": 2741075456.0,
+            "28": 2741075456.0,
+            "29": 2741075456.0,
+            "30": 2741075456.0,
+            "31": 2741075456.0,
+            "32": 2741075456.0,
+            "33": 2741075456.0,
+            "34": 2741075456.0,
+            "35": 2741075456.0,
+            "36": 2741075456.0,
+            "37": 2741075456.0,
+            "38": 2741075456.0,
+            "39": 2741075456.0,
+            "40": 2741075456.0,
+            "41": 2741075456.0,
+            "42": 2741075456.0,
+            "43": 2741075456.0,
+            "44": 2741075456.0,
+            "45": 2741075456.0,
+            "46": 2741075456.0,
+            "47": 2741075456.0,
+            "48": 2741075456.0,
+            "49": 2741075456.0,
+            "50": 2741075456.0,
+            "51": 2741075456.0,
+            "52": 2741075456.0,
+            "53": 2741075456.0,
+            "54": 2741075456.0,
+            "55": 2741075456.0,
+            "56": 2741075456.0,
+            "57": 2741075456.0,
+            "58": 2741075456.0,
+            "59": 2741075456.0,
+            "60": 2741075456.0,
+            "61": 2741075456.0,
+            "62": 2741075456.0,
+            "63": 2741075456.0,
+            "64": 2741075456.0,
+            "65": 2741075456.0,
+            "66": 2741075456.0,
+            "67": 2741075456.0,
+            "68": 2741075456.0,
+            "69": 2741075456.0,
+            "70": 2741075456.0,
+            "71": 2741075456.0,
+            "72": 2741075456.0,
+            "73": 2741075456.0,
+            "74": 2741075456.0,
+            "75": 2741075456.0,
+            "76": 2741075456.0,
+            "77": 2741075456.0,
+            "78": 2741075456.0,
+            "79": 2741075456.0,
+            "80": 2741075456.0,
+            "81": 2741075456.0,
+            "82": 2741075456.0,
+            "83": 2741075456.0,
+            "84": 2741075456.0,
+            "85": 2741075456.0,
+            "86": 2741075456.0,
+            "87": 2741075456.0,
+            "88": 2741075456.0,
+            "89": 2741075456.0,
+            "90": 2741075456.0,
+            "91": 2741075456.0,
+            "92": 2741075456.0,
+            "93": 2741075456.0,
+            "94": 2741075456.0,
+            "95": 2741075456.0,
+            "96": 2741075456.0,
+            "97": 2741075456.0,
+            "98": 2741075456.0,
+            "99": 2741075456.0,
+            "100": 2741075456.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 5.16302,
+            "2": 0.10393,
+            "3": 0.10318,
+            "4": 0.08757,
+            "5": 0.08719,
+            "6": 0.08686,
+            "7": 0.08532,
+            "8": 0.0858,
+            "9": 0.08669,
+            "10": 0.08615,
+            "11": 0.08684,
+            "12": 0.08786,
+            "13": 0.15333,
+            "14": 0.08821,
+            "15": 0.18235,
+            "16": 0.08981,
+            "17": 0.08651,
+            "18": 0.0876,
+            "19": 0.08798,
+            "20": 0.08911,
+            "21": 0.08738,
+            "22": 0.08768,
+            "23": 0.08719,
+            "24": 0.087,
+            "25": 0.08861,
+            "26": 0.08768,
+            "27": 0.08826,
+            "28": 0.08976,
+            "29": 0.0886,
+            "30": 0.08951,
+            "31": 0.08933,
+            "32": 0.08963,
+            "33": 0.09543,
+            "34": 0.10061,
+            "35": 0.10664,
+            "36": 0.09906,
+            "37": 0.11365,
+            "38": 0.82081,
+            "39": 0.08864,
+            "40": 0.08743,
+            "41": 0.08722,
+            "42": 0.08656,
+            "43": 0.09145,
+            "44": 0.08801,
+            "45": 0.17031,
+            "46": 0.0894,
+            "47": 0.08943,
+            "48": 0.08707,
+            "49": 0.08683,
+            "50": 0.08738,
+            "51": 0.11089,
+            "52": 0.08833,
+            "53": 0.08713,
+            "54": 0.08847,
+            "55": 0.09031,
+            "56": 0.08636,
+            "57": 0.08753,
+            "58": 0.08716,
+            "59": 0.08699,
+            "60": 0.08807,
+            "61": 0.6943,
+            "62": 0.09219,
+            "63": 0.08631,
+            "64": 0.0882,
+            "65": 0.08874,
+            "66": 0.08909,
+            "67": 0.08792,
+            "68": 0.08836,
+            "69": 0.08825,
+            "70": 0.08851,
+            "71": 0.08764,
+            "72": 0.08728,
+            "73": 0.08806,
+            "74": 0.08749,
+            "75": 0.09031,
+            "76": 0.08768,
+            "77": 0.08844,
+            "78": 0.08914,
+            "79": 0.08957,
+            "80": 0.08909,
+            "81": 0.08925,
+            "82": 0.09031,
+            "83": 0.08817,
+            "84": 0.08786,
+            "85": 0.08912,
+            "86": 0.08785,
+            "87": 0.08907,
+            "88": 0.08837,
+            "89": 0.08812,
+            "90": 0.0872,
+            "91": 0.08931,
+            "92": 0.0876,
+            "93": 0.16836,
+            "94": 0.09054,
+            "95": 0.09081,
+            "96": 0.09078,
+            "97": 0.09068,
+            "98": 0.09042,
+            "99": 0.09008,
+            "100": 0.08863
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json
index 686e980d509..131bcbe928e 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json
@@ -218,106 +218,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 581488640.0,
-            "2": 581488640.0,
-            "3": 581488640.0,
-            "4": 581488640.0,
-            "5": 581488640.0,
-            "6": 581488640.0,
-            "7": 581488640.0,
-            "8": 581488640.0,
-            "9": 581488640.0,
-            "10": 581488640.0,
-            "11": 581488640.0,
-            "12": 581488640.0,
-            "13": 581488640.0,
-            "14": 581488640.0,
-            "15": 581488640.0,
-            "16": 581488640.0,
-            "17": 581488640.0,
-            "18": 581488640.0,
-            "19": 581488640.0,
-            "20": 581488640.0,
-            "21": 581488640.0,
-            "22": 581488640.0,
-            "23": 581488640.0,
-            "24": 581488640.0,
-            "25": 581488640.0,
-            "26": 581488640.0,
-            "27": 581488640.0,
-            "28": 581488640.0,
-            "29": 581488640.0,
-            "30": 581488640.0,
-            "31": 581488640.0,
-            "32": 581488640.0,
-            "33": 581488640.0,
-            "34": 581488640.0,
-            "35": 581488640.0,
-            "36": 581488640.0,
-            "37": 581488640.0,
-            "38": 581488640.0,
-            "39": 581488640.0,
-            "40": 581488640.0,
-            "41": 581488640.0,
-            "42": 581488640.0,
-            "43": 581488640.0,
-            "44": 581488640.0,
-            "45": 581488640.0,
-            "46": 581488640.0,
-            "47": 581488640.0,
-            "48": 581488640.0,
-            "49": 581488640.0,
-            "50": 581488640.0,
-            "51": 581488640.0,
-            "52": 581488640.0,
-            "53": 581488640.0,
-            "54": 581488640.0,
-            "55": 581488640.0,
-            "56": 581488640.0,
-            "57": 581488640.0,
-            "58": 581488640.0,
-            "59": 581488640.0,
-            "60": 581488640.0,
-            "61": 581488640.0,
-            "62": 581488640.0,
-            "63": 581488640.0,
-            "64": 581488640.0,
-            "65": 581488640.0,
-            "66": 581488640.0,
-            "67": 581488640.0,
-            "68": 581488640.0,
-            "69": 581488640.0,
-            "70": 581488640.0,
-            "71": 581488640.0,
-            "72": 581488640.0,
-            "73": 581488640.0,
-            "74": 581488640.0,
-            "75": 581488640.0,
-            "76": 581488640.0,
-            "77": 581488640.0,
-            "78": 581488640.0,
-            "79": 581488640.0,
-            "80": 581488640.0,
-            "81": 581488640.0,
-            "82": 581488640.0,
-            "83": 581488640.0,
-            "84": 581488640.0,
-            "85": 581488640.0,
-            "86": 581488640.0,
-            "87": 581488640.0,
-            "88": 581488640.0,
-            "89": 581488640.0,
-            "90": 581488640.0,
-            "91": 581488640.0,
-            "92": 581488640.0,
-            "93": 581488640.0,
-            "94": 581488640.0,
-            "95": 581488640.0,
-            "96": 581488640.0,
-            "97": 581488640.0,
-            "98": 581488640.0,
-            "99": 581488640.0,
-            "100": 581488640.0
+            "1": 581489664.0,
+            "2": 581489664.0,
+            "3": 581489664.0,
+            "4": 581489664.0,
+            "5": 581489664.0,
+            "6": 581489664.0,
+            "7": 581489664.0,
+            "8": 581489664.0,
+            "9": 581489664.0,
+            "10": 581489664.0,
+            "11": 581489664.0,
+            "12": 581489664.0,
+            "13": 581489664.0,
+            "14": 581489664.0,
+            "15": 581489664.0,
+            "16": 581489664.0,
+            "17": 581489664.0,
+            "18": 581489664.0,
+            "19": 581489664.0,
+            "20": 581489664.0,
+            "21": 581489664.0,
+            "22": 581489664.0,
+            "23": 581489664.0,
+            "24": 581489664.0,
+            "25": 581489664.0,
+            "26": 581489664.0,
+            "27": 581489664.0,
+            "28": 581489664.0,
+            "29": 581489664.0,
+            "30": 581489664.0,
+            "31": 581489664.0,
+            "32": 581489664.0,
+            "33": 581489664.0,
+            "34": 581489664.0,
+            "35": 581489664.0,
+            "36": 581489664.0,
+            "37": 581489664.0,
+            "38": 581489664.0,
+            "39": 581489664.0,
+            "40": 581489664.0,
+            "41": 581489664.0,
+            "42": 581489664.0,
+            "43": 581489664.0,
+            "44": 581489664.0,
+            "45": 581489664.0,
+            "46": 581489664.0,
+            "47": 581489664.0,
+            "48": 581489664.0,
+            "49": 581489664.0,
+            "50": 581489664.0,
+            "51": 581489664.0,
+            "52": 581489664.0,
+            "53": 581489664.0,
+            "54": 581489664.0,
+            "55": 581489664.0,
+            "56": 581489664.0,
+            "57": 581489664.0,
+            "58": 581489664.0,
+            "59": 581489664.0,
+            "60": 581489664.0,
+            "61": 581489664.0,
+            "62": 581489664.0,
+            "63": 581489664.0,
+            "64": 581489664.0,
+            "65": 581489664.0,
+            "66": 581489664.0,
+            "67": 581489664.0,
+            "68": 581489664.0,
+            "69": 581489664.0,
+            "70": 581489664.0,
+            "71": 581489664.0,
+            "72": 581489664.0,
+            "73": 581489664.0,
+            "74": 581489664.0,
+            "75": 581489664.0,
+            "76": 581489664.0,
+            "77": 581489664.0,
+            "78": 581489664.0,
+            "79": 581489664.0,
+            "80": 581489664.0,
+            "81": 581489664.0,
+            "82": 581489664.0,
+            "83": 581489664.0,
+            "84": 581489664.0,
+            "85": 581489664.0,
+            "86": 581489664.0,
+            "87": 581489664.0,
+            "88": 581489664.0,
+            "89": 581489664.0,
+            "90": 581489664.0,
+            "91": 581489664.0,
+            "92": 581489664.0,
+            "93": 581489664.0,
+            "94": 581489664.0,
+            "95": 581489664.0,
+            "96": 581489664.0,
+            "97": 581489664.0,
+            "98": 581489664.0,
+            "99": 581489664.0,
+            "100": 581489664.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -325,106 +325,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 2594126336.0,
-            "2": 2690742784.0,
-            "3": 2690742784.0,
-            "4": 2690742784.0,
-            "5": 2690742784.0,
-            "6": 2690742784.0,
-            "7": 2690742784.0,
-            "8": 2690742784.0,
-            "9": 2690742784.0,
-            "10": 2690742784.0,
-            "11": 2690742784.0,
-            "12": 2690742784.0,
-            "13": 2690742784.0,
-            "14": 2690742784.0,
-            "15": 2690742784.0,
-            "16": 2690742784.0,
-            "17": 2690742784.0,
-            "18": 2690742784.0,
-            "19": 2690742784.0,
-            "20": 2690742784.0,
-            "21": 2690742784.0,
-            "22": 2690742784.0,
-            "23": 2690742784.0,
-            "24": 2690742784.0,
-            "25": 2690742784.0,
-            "26": 2690742784.0,
-            "27": 2690742784.0,
-            "28": 2690742784.0,
-            "29": 2690742784.0,
-            "30": 2690742784.0,
-            "31": 2690742784.0,
-            "32": 2690742784.0,
-            "33": 2690742784.0,
-            "34": 2690742784.0,
-            "35": 2690742784.0,
-            "36": 2690742784.0,
-            "37": 2690742784.0,
-            "38": 2690742784.0,
-            "39": 2690742784.0,
-            "40": 2690742784.0,
-            "41": 2690742784.0,
-            "42": 2690742784.0,
-            "43": 2690742784.0,
-            "44": 2690742784.0,
-            "45": 2690742784.0,
-            "46": 2690742784.0,
-            "47": 2690742784.0,
-            "48": 2690742784.0,
-            "49": 2690742784.0,
-            "50": 2690742784.0,
-            "51": 2690742784.0,
-            "52": 2690742784.0,
-            "53": 2690742784.0,
-            "54": 2690742784.0,
-            "55": 2690742784.0,
-            "56": 2690742784.0,
-            "57": 2690742784.0,
-            "58": 2690742784.0,
-            "59": 2690742784.0,
-            "60": 2690742784.0,
-            "61": 2690742784.0,
-            "62": 2690742784.0,
-            "63": 2690742784.0,
-            "64": 2690742784.0,
-            "65": 2690742784.0,
-            "66": 2690742784.0,
-            "67": 2690742784.0,
-            "68": 2690742784.0,
-            "69": 2690742784.0,
-            "70": 2690742784.0,
-            "71": 2690742784.0,
-            "72": 2690742784.0,
-            "73": 2690742784.0,
-            "74": 2690742784.0,
-            "75": 2690742784.0,
-            "76": 2690742784.0,
-            "77": 2690742784.0,
-            "78": 2690742784.0,
-            "79": 2690742784.0,
-            "80": 2690742784.0,
-            "81": 2690742784.0,
-            "82": 2690742784.0,
-            "83": 2690742784.0,
-            "84": 2690742784.0,
-            "85": 2690742784.0,
-            "86": 2690742784.0,
-            "87": 2690742784.0,
-            "88": 2690742784.0,
-            "89": 2690742784.0,
-            "90": 2690742784.0,
-            "91": 2690742784.0,
-            "92": 2690742784.0,
-            "93": 2690742784.0,
-            "94": 2690742784.0,
-            "95": 2690742784.0,
-            "96": 2690742784.0,
-            "97": 2690742784.0,
-            "98": 2690742784.0,
-            "99": 2690742784.0,
-            "100": 2690742784.0
+            "1": 2594127360.0,
+            "2": 2690743808.0,
+            "3": 2690743808.0,
+            "4": 2690743808.0,
+            "5": 2690743808.0,
+            "6": 2690743808.0,
+            "7": 2690743808.0,
+            "8": 2690743808.0,
+            "9": 2690743808.0,
+            "10": 2690743808.0,
+            "11": 2690743808.0,
+            "12": 2690743808.0,
+            "13": 2690743808.0,
+            "14": 2690743808.0,
+            "15": 2690743808.0,
+            "16": 2690743808.0,
+            "17": 2690743808.0,
+            "18": 2690743808.0,
+            "19": 2690743808.0,
+            "20": 2690743808.0,
+            "21": 2690743808.0,
+            "22": 2690743808.0,
+            "23": 2690743808.0,
+            "24": 2690743808.0,
+            "25": 2690743808.0,
+            "26": 2690743808.0,
+            "27": 2690743808.0,
+            "28": 2690743808.0,
+            "29": 2690743808.0,
+            "30": 2690743808.0,
+            "31": 2690743808.0,
+            "32": 2690743808.0,
+            "33": 2690743808.0,
+            "34": 2690743808.0,
+            "35": 2690743808.0,
+            "36": 2690743808.0,
+            "37": 2690743808.0,
+            "38": 2690743808.0,
+            "39": 2690743808.0,
+            "40": 2690743808.0,
+            "41": 2690743808.0,
+            "42": 2690743808.0,
+            "43": 2690743808.0,
+            "44": 2690743808.0,
+            "45": 2690743808.0,
+            "46": 2690743808.0,
+            "47": 2690743808.0,
+            "48": 2690743808.0,
+            "49": 2690743808.0,
+            "50": 2690743808.0,
+            "51": 2690743808.0,
+            "52": 2690743808.0,
+            "53": 2690743808.0,
+            "54": 2690743808.0,
+            "55": 2690743808.0,
+            "56": 2690743808.0,
+            "57": 2690743808.0,
+            "58": 2690743808.0,
+            "59": 2690743808.0,
+            "60": 2690743808.0,
+            "61": 2690743808.0,
+            "62": 2690743808.0,
+            "63": 2690743808.0,
+            "64": 2690743808.0,
+            "65": 2690743808.0,
+            "66": 2690743808.0,
+            "67": 2690743808.0,
+            "68": 2690743808.0,
+            "69": 2690743808.0,
+            "70": 2690743808.0,
+            "71": 2690743808.0,
+            "72": 2690743808.0,
+            "73": 2690743808.0,
+            "74": 2690743808.0,
+            "75": 2690743808.0,
+            "76": 2690743808.0,
+            "77": 2690743808.0,
+            "78": 2690743808.0,
+            "79": 2690743808.0,
+            "80": 2690743808.0,
+            "81": 2690743808.0,
+            "82": 2690743808.0,
+            "83": 2690743808.0,
+            "84": 2690743808.0,
+            "85": 2690743808.0,
+            "86": 2690743808.0,
+            "87": 2690743808.0,
+            "88": 2690743808.0,
+            "89": 2690743808.0,
+            "90": 2690743808.0,
+            "91": 2690743808.0,
+            "92": 2690743808.0,
+            "93": 2690743808.0,
+            "94": 2690743808.0,
+            "95": 2690743808.0,
+            "96": 2690743808.0,
+            "97": 2690743808.0,
+            "98": 2690743808.0,
+            "99": 2690743808.0,
+            "100": 2690743808.0
         }
     },
     "iteration-time": {
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 7.46673,
-            "2": 0.07879,
-            "3": 0.06822,
-            "4": 0.06744,
-            "5": 0.06664,
-            "6": 0.06786,
-            "7": 0.06766,
-            "8": 0.06659,
-            "9": 0.06797,
-            "10": 0.07184,
-            "11": 0.07288,
-            "12": 0.07188,
-            "13": 0.07026,
-            "14": 0.06821,
-            "15": 0.06667,
-            "16": 0.06656,
-            "17": 0.06764,
-            "18": 0.06816,
-            "19": 0.06695,
-            "20": 0.06832,
-            "21": 0.06808,
-            "22": 0.06822,
-            "23": 0.06838,
-            "24": 0.06731,
-            "25": 0.06857,
-            "26": 0.06706,
-            "27": 0.06819,
-            "28": 0.06784,
-            "29": 0.06785,
-            "30": 0.06735,
-            "31": 0.0685,
-            "32": 0.07005,
-            "33": 0.07122,
-            "34": 0.07241,
-            "35": 0.07067,
-            "36": 0.06981,
-            "37": 0.06934,
-            "38": 0.06771,
-            "39": 0.06805,
-            "40": 0.06824,
-            "41": 0.06831,
-            "42": 0.06733,
-            "43": 0.06819,
-            "44": 0.06816,
-            "45": 0.06847,
-            "46": 0.0674,
-            "47": 0.06856,
-            "48": 0.07158,
-            "49": 0.07079,
-            "50": 0.0717,
-            "51": 0.08179,
-            "52": 0.07272,
-            "53": 0.06939,
-            "54": 0.06631,
-            "55": 0.07046,
-            "56": 0.09852,
-            "57": 0.06464,
-            "58": 0.06466,
-            "59": 0.06537,
-            "60": 0.06301,
-            "61": 0.06361,
-            "62": 0.06551,
-            "63": 0.06563,
-            "64": 0.0749,
-            "65": 0.0748,
-            "66": 0.07507,
-            "67": 0.07552,
-            "68": 0.07573,
-            "69": 0.07066,
-            "70": 0.0658,
-            "71": 0.0647,
-            "72": 0.06444,
-            "73": 0.06462,
-            "74": 0.06543,
-            "75": 0.06609,
-            "76": 0.06503,
-            "77": 0.06499,
-            "78": 0.0644,
-            "79": 0.06439,
-            "80": 0.06417,
-            "81": 0.06401,
-            "82": 0.06575,
-            "83": 0.06494,
-            "84": 0.06442,
-            "85": 0.06396,
-            "86": 0.06422,
-            "87": 0.06484,
-            "88": 0.06512,
-            "89": 0.06426,
-            "90": 0.06481,
-            "91": 0.06476,
-            "92": 0.06383,
-            "93": 0.06456,
-            "94": 0.06292,
-            "95": 0.0638,
-            "96": 0.06392,
-            "97": 0.06356,
-            "98": 0.06355,
-            "99": 0.06439,
-            "100": 0.06428
+            "1": 6.85919,
+            "2": 0.0831,
+            "3": 0.08065,
+            "4": 0.05861,
+            "5": 0.04976,
+            "6": 0.05045,
+            "7": 0.04972,
+            "8": 0.04911,
+            "9": 0.04965,
+            "10": 0.04942,
+            "11": 0.04916,
+            "12": 0.04915,
+            "13": 0.04939,
+            "14": 0.04993,
+            "15": 0.04987,
+            "16": 0.04906,
+            "17": 0.05015,
+            "18": 0.04924,
+            "19": 0.05168,
+            "20": 0.04963,
+            "21": 0.05051,
+            "22": 0.04948,
+            "23": 0.05006,
+            "24": 0.04939,
+            "25": 0.05019,
+            "26": 0.04951,
+            "27": 0.05048,
+            "28": 0.04917,
+            "29": 0.05015,
+            "30": 0.04921,
+            "31": 0.04969,
+            "32": 0.04894,
+            "33": 0.04941,
+            "34": 0.04938,
+            "35": 0.04927,
+            "36": 0.04942,
+            "37": 0.04944,
+            "38": 0.04973,
+            "39": 0.04957,
+            "40": 0.05016,
+            "41": 0.04968,
+            "42": 0.05042,
+            "43": 0.0523,
+            "44": 0.04956,
+            "45": 0.04948,
+            "46": 0.05093,
+            "47": 0.0493,
+            "48": 0.0498,
+            "49": 0.05177,
+            "50": 0.05032,
+            "51": 0.05749,
+            "52": 0.05013,
+            "53": 0.0512,
+            "54": 0.04935,
+            "55": 0.04891,
+            "56": 0.04976,
+            "57": 0.04984,
+            "58": 0.04964,
+            "59": 0.05274,
+            "60": 0.04962,
+            "61": 0.05096,
+            "62": 0.04934,
+            "63": 0.04971,
+            "64": 0.0503,
+            "65": 0.05028,
+            "66": 0.04991,
+            "67": 0.04926,
+            "68": 0.04848,
+            "69": 0.0493,
+            "70": 0.04943,
+            "71": 0.04943,
+            "72": 0.04852,
+            "73": 0.04928,
+            "74": 0.04895,
+            "75": 0.04995,
+            "76": 0.04877,
+            "77": 0.0492,
+            "78": 0.04886,
+            "79": 0.04938,
+            "80": 0.04894,
+            "81": 0.04892,
+            "82": 0.05016,
+            "83": 0.04964,
+            "84": 0.04956,
+            "85": 0.04881,
+            "86": 0.04999,
+            "87": 0.04908,
+            "88": 0.04838,
+            "89": 0.04957,
+            "90": 0.04882,
+            "91": 0.04993,
+            "92": 0.05004,
+            "93": 0.05003,
+            "94": 0.04961,
+            "95": 0.05132,
+            "96": 0.05071,
+            "97": 0.04952,
+            "98": 0.04851,
+            "99": 0.05027,
+            "100": 0.04988
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100_2nd.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100_2nd.json
new file mode 100644
index 00000000000..4519bd52155
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100_2nd.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 9.85447,
+            "52": 9.73936,
+            "53": 10.07426,
+            "54": 9.96915,
+            "55": 9.88574,
+            "56": 9.62437,
+            "57": 9.4823,
+            "58": 9.83483,
+            "59": 9.58732,
+            "60": 9.50245,
+            "61": 9.69343,
+            "62": 9.98806,
+            "63": 9.39103,
+            "64": 9.78021,
+            "65": 8.94515,
+            "66": 9.70494,
+            "67": 9.37251,
+            "68": 9.78329,
+            "69": 9.79058,
+            "70": 9.74454,
+            "71": 9.62301,
+            "72": 9.58458,
+            "73": 9.50513,
+            "74": 8.94312,
+            "75": 9.42524,
+            "76": 9.07601,
+            "77": 10.06353,
+            "78": 9.72308,
+            "79": 9.37502,
+            "80": 9.40453,
+            "81": 9.47794,
+            "82": 9.69667,
+            "83": 9.3072,
+            "84": 9.41526,
+            "85": 9.61293,
+            "86": 9.07195,
+            "87": 9.5884,
+            "88": 9.74762,
+            "89": 9.59982,
+            "90": 9.81672,
+            "91": 9.3379,
+            "92": 9.35605,
+            "93": 9.07425,
+            "94": 8.8351,
+            "95": 9.5184,
+            "96": 9.52391,
+            "97": 9.30923,
+            "98": 9.66743,
+            "99": 8.88419,
+            "100": 9.39924
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 2598.0,
+            "52": 2547.0,
+            "53": 2957.0,
+            "54": 2750.0,
+            "55": 2372.0,
+            "56": 2569.0,
+            "57": 2395.0,
+            "58": 2901.0,
+            "59": 2741.0,
+            "60": 2430.0,
+            "61": 2868.0,
+            "62": 2651.0,
+            "63": 2507.0,
+            "64": 3014.0,
+            "65": 2683.0,
+            "66": 2935.0,
+            "67": 2783.0,
+            "68": 2725.0,
+            "69": 2788.0,
+            "70": 3152.0,
+            "71": 3026.0,
+            "72": 2415.0,
+            "73": 3122.0,
+            "74": 1967.0,
+            "75": 2581.0,
+            "76": 3010.0,
+            "77": 3294.0,
+            "78": 3166.0,
+            "79": 3150.0,
+            "80": 3246.0,
+            "81": 3566.0,
+            "82": 3285.0,
+            "83": 2817.0,
+            "84": 3269.0,
+            "85": 3425.0,
+            "86": 2819.0,
+            "87": 3577.0,
+            "88": 3004.0,
+            "89": 3323.0,
+            "90": 3023.0,
+            "91": 2661.0,
+            "92": 3066.0,
+            "93": 2691.0,
+            "94": 3305.0,
+            "95": 3403.0,
+            "96": 3377.0,
+            "97": 3242.0,
+            "98": 3697.0,
+            "99": 3112.0,
+            "100": 3199.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 581489664.0,
+            "52": 581489664.0,
+            "53": 581489664.0,
+            "54": 581489664.0,
+            "55": 581489664.0,
+            "56": 581489664.0,
+            "57": 581489664.0,
+            "58": 581489664.0,
+            "59": 581489664.0,
+            "60": 581489664.0,
+            "61": 581489664.0,
+            "62": 581489664.0,
+            "63": 581489664.0,
+            "64": 581489664.0,
+            "65": 581489664.0,
+            "66": 581489664.0,
+            "67": 581489664.0,
+            "68": 581489664.0,
+            "69": 581489664.0,
+            "70": 581489664.0,
+            "71": 581489664.0,
+            "72": 581489664.0,
+            "73": 581489664.0,
+            "74": 581489664.0,
+            "75": 581489664.0,
+            "76": 581489664.0,
+            "77": 581489664.0,
+            "78": 581489664.0,
+            "79": 581489664.0,
+            "80": 581489664.0,
+            "81": 581489664.0,
+            "82": 581489664.0,
+            "83": 581489664.0,
+            "84": 581489664.0,
+            "85": 581489664.0,
+            "86": 581489664.0,
+            "87": 581489664.0,
+            "88": 581489664.0,
+            "89": 581489664.0,
+            "90": 581489664.0,
+            "91": 581489664.0,
+            "92": 581489664.0,
+            "93": 581489664.0,
+            "94": 581489664.0,
+            "95": 581489664.0,
+            "96": 581489664.0,
+            "97": 581489664.0,
+            "98": 581489664.0,
+            "99": 581489664.0,
+            "100": 581489664.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 2658236928.0,
+            "52": 2691792384.0,
+            "53": 2691792384.0,
+            "54": 2691792384.0,
+            "55": 2691792384.0,
+            "56": 2691792384.0,
+            "57": 2691792384.0,
+            "58": 2691792384.0,
+            "59": 2691792384.0,
+            "60": 2691792384.0,
+            "61": 2691792384.0,
+            "62": 2691792384.0,
+            "63": 2691792384.0,
+            "64": 2691792384.0,
+            "65": 2691792384.0,
+            "66": 2691792384.0,
+            "67": 2691792384.0,
+            "68": 2691792384.0,
+            "69": 2691792384.0,
+            "70": 2691792384.0,
+            "71": 2691792384.0,
+            "72": 2691792384.0,
+            "73": 2691792384.0,
+            "74": 2691792384.0,
+            "75": 2691792384.0,
+            "76": 2691792384.0,
+            "77": 2691792384.0,
+            "78": 2691792384.0,
+            "79": 2691792384.0,
+            "80": 2691792384.0,
+            "81": 2691792384.0,
+            "82": 2691792384.0,
+            "83": 2691792384.0,
+            "84": 2691792384.0,
+            "85": 2691792384.0,
+            "86": 2691792384.0,
+            "87": 2691792384.0,
+            "88": 2691792384.0,
+            "89": 2691792384.0,
+            "90": 2691792384.0,
+            "91": 2691792384.0,
+            "92": 2691792384.0,
+            "93": 2691792384.0,
+            "94": 2691792384.0,
+            "95": 2691792384.0,
+            "96": 2691792384.0,
+            "97": 2691792384.0,
+            "98": 2691792384.0,
+            "99": 2691792384.0,
+            "100": 2691792384.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 6.24535,
+            "52": 0.08446,
+            "53": 0.05106,
+            "54": 0.05053,
+            "55": 0.05025,
+            "56": 0.06328,
+            "57": 0.05006,
+            "58": 0.04939,
+            "59": 0.04895,
+            "60": 0.05032,
+            "61": 0.05024,
+            "62": 0.04926,
+            "63": 0.051,
+            "64": 0.04994,
+            "65": 0.0516,
+            "66": 0.05582,
+            "67": 0.05024,
+            "68": 0.04967,
+            "69": 0.04945,
+            "70": 0.05103,
+            "71": 0.04971,
+            "72": 0.0494,
+            "73": 0.05144,
+            "74": 0.0497,
+            "75": 0.05084,
+            "76": 0.05125,
+            "77": 0.05002,
+            "78": 0.04992,
+            "79": 0.05192,
+            "80": 0.05131,
+            "81": 0.05007,
+            "82": 0.05145,
+            "83": 0.05065,
+            "84": 0.05098,
+            "85": 0.05005,
+            "86": 0.05133,
+            "87": 0.05031,
+            "88": 0.05145,
+            "89": 0.05038,
+            "90": 0.49172,
+            "91": 0.05261,
+            "92": 0.05313,
+            "93": 0.05042,
+            "94": 0.05061,
+            "95": 0.05207,
+            "96": 0.04992,
+            "97": 0.04998,
+            "98": 0.05103,
+            "99": 0.05004,
+            "100": 0.05054
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_lts_dgx_a100.json
index 9dad9972e22..b6e4891b3bb 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_lts_dgx_a100.json
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 4.10688,
-            "2": 0.11397,
-            "3": 0.08797,
-            "4": 0.08663,
-            "5": 0.08687,
-            "6": 0.08702,
-            "7": 0.08653,
-            "8": 0.08674,
-            "9": 0.08696,
-            "10": 0.08678,
-            "11": 0.08635,
-            "12": 0.08637,
-            "13": 0.08738,
-            "14": 0.08674,
-            "15": 0.08706,
-            "16": 0.08684,
-            "17": 0.08681,
-            "18": 0.08601,
-            "19": 0.08591,
-            "20": 0.08645,
-            "21": 0.08634,
-            "22": 0.08598,
-            "23": 0.08618,
-            "24": 0.08622,
-            "25": 0.08632,
-            "26": 0.08621,
-            "27": 0.08644,
-            "28": 0.08581,
-            "29": 0.08622,
-            "30": 0.08652,
-            "31": 0.08679,
-            "32": 0.08526,
-            "33": 0.08525,
-            "34": 0.08525,
-            "35": 0.08519,
-            "36": 0.08535,
-            "37": 0.08568,
-            "38": 0.0852,
-            "39": 0.08521,
-            "40": 0.08523,
-            "41": 0.08535,
-            "42": 0.08486,
-            "43": 0.08614,
-            "44": 0.08491,
-            "45": 0.08554,
-            "46": 0.08508,
-            "47": 0.08524,
-            "48": 0.08608,
-            "49": 0.08565,
-            "50": 0.08559,
-            "51": 0.10342,
-            "52": 0.09048,
-            "53": 0.08707,
-            "54": 0.08719,
-            "55": 0.08631,
-            "56": 0.11667,
-            "57": 0.08592,
-            "58": 0.08517,
-            "59": 0.08612,
-            "60": 0.08514,
-            "61": 0.0855,
-            "62": 0.08527,
-            "63": 0.08586,
-            "64": 0.08556,
-            "65": 0.08633,
-            "66": 0.08532,
-            "67": 0.08593,
-            "68": 0.08563,
-            "69": 0.08537,
-            "70": 0.08538,
-            "71": 0.08507,
-            "72": 0.08593,
-            "73": 0.08623,
-            "74": 0.08561,
-            "75": 0.08536,
-            "76": 0.08551,
-            "77": 0.08526,
-            "78": 0.0859,
-            "79": 0.08518,
-            "80": 0.08601,
-            "81": 0.08574,
-            "82": 0.08618,
-            "83": 0.08532,
-            "84": 0.08505,
-            "85": 0.08545,
-            "86": 0.08554,
-            "87": 0.08542,
-            "88": 0.08575,
-            "89": 0.0861,
-            "90": 0.08516,
-            "91": 0.08552,
-            "92": 0.08581,
-            "93": 0.08558,
-            "94": 0.08577,
-            "95": 0.08708,
-            "96": 0.08574,
-            "97": 0.08543,
-            "98": 0.0855,
-            "99": 0.08537,
-            "100": 0.08541
+            "1": 3.22526,
+            "2": 0.19893,
+            "3": 0.09313,
+            "4": 0.08045,
+            "5": 0.08171,
+            "6": 0.08058,
+            "7": 0.08022,
+            "8": 0.07981,
+            "9": 0.0808,
+            "10": 0.08068,
+            "11": 0.08073,
+            "12": 0.08318,
+            "13": 0.08514,
+            "14": 0.08404,
+            "15": 0.08382,
+            "16": 0.08982,
+            "17": 0.08387,
+            "18": 0.08342,
+            "19": 0.08359,
+            "20": 0.07926,
+            "21": 0.08037,
+            "22": 0.08041,
+            "23": 0.08187,
+            "24": 0.08232,
+            "25": 0.08012,
+            "26": 0.08081,
+            "27": 0.08072,
+            "28": 0.08454,
+            "29": 0.08003,
+            "30": 0.07895,
+            "31": 0.08312,
+            "32": 0.08109,
+            "33": 0.08106,
+            "34": 0.07905,
+            "35": 0.08145,
+            "36": 0.08345,
+            "37": 0.07972,
+            "38": 0.07895,
+            "39": 0.0795,
+            "40": 0.07971,
+            "41": 0.08032,
+            "42": 0.07938,
+            "43": 0.0806,
+            "44": 0.07956,
+            "45": 0.07918,
+            "46": 0.07961,
+            "47": 0.07937,
+            "48": 0.08049,
+            "49": 0.07875,
+            "50": 0.07866,
+            "51": 0.08212,
+            "52": 0.07853,
+            "53": 0.07869,
+            "54": 0.07753,
+            "55": 0.0774,
+            "56": 0.07699,
+            "57": 0.07754,
+            "58": 0.07721,
+            "59": 0.07784,
+            "60": 0.07727,
+            "61": 0.07709,
+            "62": 0.07721,
+            "63": 0.07751,
+            "64": 0.07763,
+            "65": 0.07813,
+            "66": 0.07898,
+            "67": 0.07875,
+            "68": 0.07868,
+            "69": 0.0789,
+            "70": 0.07834,
+            "71": 0.07782,
+            "72": 0.07816,
+            "73": 0.0785,
+            "74": 0.0787,
+            "75": 0.07812,
+            "76": 0.07812,
+            "77": 0.07845,
+            "78": 0.07888,
+            "79": 0.07811,
+            "80": 0.07836,
+            "81": 0.07854,
+            "82": 0.07902,
+            "83": 0.07769,
+            "84": 0.07776,
+            "85": 0.07749,
+            "86": 0.07824,
+            "87": 0.07761,
+            "88": 0.07812,
+            "89": 0.07814,
+            "90": 0.07827,
+            "91": 0.07825,
+            "92": 0.07856,
+            "93": 0.07779,
+            "94": 0.07786,
+            "95": 0.07734,
+            "96": 0.07776,
+            "97": 0.07809,
+            "98": 0.07855,
+            "99": 0.07768,
+            "100": 0.08111
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_lts_dgx_a100_2nd.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_lts_dgx_a100_2nd.json
new file mode 100644
index 00000000000..c941dc70aab
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_lts_dgx_a100_2nd.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 9.83459,
+            "52": 9.73231,
+            "53": 10.04881,
+            "54": 9.93895,
+            "55": 9.86297,
+            "56": 9.613,
+            "57": 9.46964,
+            "58": 9.81136,
+            "59": 9.57107,
+            "60": 9.48153,
+            "61": 9.67881,
+            "62": 9.96579,
+            "63": 9.35276,
+            "64": 9.75644,
+            "65": 8.93769,
+            "66": 9.68152,
+            "67": 9.35669,
+            "68": 9.76806,
+            "69": 9.7739,
+            "70": 9.71012,
+            "71": 9.60009,
+            "72": 9.56796,
+            "73": 9.47739,
+            "74": 8.93177,
+            "75": 9.40721,
+            "76": 9.06847,
+            "77": 10.0464,
+            "78": 9.70984,
+            "79": 9.35731,
+            "80": 9.38978,
+            "81": 9.4662,
+            "82": 9.68056,
+            "83": 9.29144,
+            "84": 9.40194,
+            "85": 9.59734,
+            "86": 9.06207,
+            "87": 9.57921,
+            "88": 9.73262,
+            "89": 9.58838,
+            "90": 9.80354,
+            "91": 9.31991,
+            "92": 9.35013,
+            "93": 9.06378,
+            "94": 8.81909,
+            "95": 9.50572,
+            "96": 9.51068,
+            "97": 9.29244,
+            "98": 9.65579,
+            "99": 8.87401,
+            "100": 9.38837
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 2452.0,
+            "52": 2576.0,
+            "53": 2914.0,
+            "54": 2741.0,
+            "55": 2408.0,
+            "56": 2650.0,
+            "57": 2264.0,
+            "58": 2853.0,
+            "59": 2757.0,
+            "60": 2509.0,
+            "61": 3076.0,
+            "62": 2709.0,
+            "63": 2563.0,
+            "64": 3041.0,
+            "65": 2687.0,
+            "66": 3089.0,
+            "67": 2767.0,
+            "68": 2930.0,
+            "69": 2911.0,
+            "70": 3286.0,
+            "71": 3105.0,
+            "72": 2507.0,
+            "73": 3063.0,
+            "74": 2022.0,
+            "75": 2763.0,
+            "76": 3002.0,
+            "77": 3382.0,
+            "78": 3470.0,
+            "79": 3109.0,
+            "80": 3357.0,
+            "81": 3798.0,
+            "82": 3348.0,
+            "83": 2763.0,
+            "84": 3271.0,
+            "85": 3245.0,
+            "86": 2587.0,
+            "87": 3650.0,
+            "88": 3103.0,
+            "89": 3471.0,
+            "90": 3086.0,
+            "91": 3050.0,
+            "92": 3368.0,
+            "93": 2828.0,
+            "94": 3495.0,
+            "95": 3424.0,
+            "96": 3559.0,
+            "97": 3289.0,
+            "98": 3727.0,
+            "99": 3275.0,
+            "100": 3401.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 552128512.0,
+            "52": 552128512.0,
+            "53": 552128512.0,
+            "54": 552128512.0,
+            "55": 552128512.0,
+            "56": 552128512.0,
+            "57": 552128512.0,
+            "58": 552128512.0,
+            "59": 552128512.0,
+            "60": 552128512.0,
+            "61": 552128512.0,
+            "62": 552128512.0,
+            "63": 552128512.0,
+            "64": 552128512.0,
+            "65": 552128512.0,
+            "66": 552128512.0,
+            "67": 552128512.0,
+            "68": 552128512.0,
+            "69": 552128512.0,
+            "70": 552128512.0,
+            "71": 552128512.0,
+            "72": 552128512.0,
+            "73": 552128512.0,
+            "74": 552128512.0,
+            "75": 552128512.0,
+            "76": 552128512.0,
+            "77": 552128512.0,
+            "78": 552128512.0,
+            "79": 552128512.0,
+            "80": 552128512.0,
+            "81": 552128512.0,
+            "82": 552128512.0,
+            "83": 552128512.0,
+            "84": 552128512.0,
+            "85": 552128512.0,
+            "86": 552128512.0,
+            "87": 552128512.0,
+            "88": 552128512.0,
+            "89": 552128512.0,
+            "90": 552128512.0,
+            "91": 552128512.0,
+            "92": 552128512.0,
+            "93": 552128512.0,
+            "94": 552128512.0,
+            "95": 552128512.0,
+            "96": 552128512.0,
+            "97": 552128512.0,
+            "98": 552128512.0,
+            "99": 552128512.0,
+            "100": 552128512.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 2679207424.0,
+            "52": 2712762880.0,
+            "53": 2712762880.0,
+            "54": 2712762880.0,
+            "55": 2712762880.0,
+            "56": 2712762880.0,
+            "57": 2712762880.0,
+            "58": 2712762880.0,
+            "59": 2712762880.0,
+            "60": 2712762880.0,
+            "61": 2712762880.0,
+            "62": 2712762880.0,
+            "63": 2712762880.0,
+            "64": 2712762880.0,
+            "65": 2712762880.0,
+            "66": 2712762880.0,
+            "67": 2712762880.0,
+            "68": 2712762880.0,
+            "69": 2712762880.0,
+            "70": 2712762880.0,
+            "71": 2712762880.0,
+            "72": 2712762880.0,
+            "73": 2712762880.0,
+            "74": 2712762880.0,
+            "75": 2712762880.0,
+            "76": 2712762880.0,
+            "77": 2712762880.0,
+            "78": 2712762880.0,
+            "79": 2712762880.0,
+            "80": 2712762880.0,
+            "81": 2712762880.0,
+            "82": 2712762880.0,
+            "83": 2712762880.0,
+            "84": 2712762880.0,
+            "85": 2712762880.0,
+            "86": 2712762880.0,
+            "87": 2712762880.0,
+            "88": 2712762880.0,
+            "89": 2712762880.0,
+            "90": 2712762880.0,
+            "91": 2712762880.0,
+            "92": 2712762880.0,
+            "93": 2712762880.0,
+            "94": 2712762880.0,
+            "95": 2712762880.0,
+            "96": 2712762880.0,
+            "97": 2712762880.0,
+            "98": 2712762880.0,
+            "99": 2712762880.0,
+            "100": 2712762880.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 3.18495,
+            "52": 0.6276,
+            "53": 0.08049,
+            "54": 0.07972,
+            "55": 0.08135,
+            "56": 0.07856,
+            "57": 0.08351,
+            "58": 0.07967,
+            "59": 0.08019,
+            "60": 0.0792,
+            "61": 0.07924,
+            "62": 0.07905,
+            "63": 0.08021,
+            "64": 0.07964,
+            "65": 0.07981,
+            "66": 0.07892,
+            "67": 0.07984,
+            "68": 0.07904,
+            "69": 0.07969,
+            "70": 0.07923,
+            "71": 0.07928,
+            "72": 0.07969,
+            "73": 0.07956,
+            "74": 0.08002,
+            "75": 0.07918,
+            "76": 0.07955,
+            "77": 0.07938,
+            "78": 0.08006,
+            "79": 0.07935,
+            "80": 0.07959,
+            "81": 0.08018,
+            "82": 0.07963,
+            "83": 0.07952,
+            "84": 0.07938,
+            "85": 0.07915,
+            "86": 0.07965,
+            "87": 0.07999,
+            "88": 0.07951,
+            "89": 0.08006,
+            "90": 0.0794,
+            "91": 0.07948,
+            "92": 0.07896,
+            "93": 0.07977,
+            "94": 0.07916,
+            "95": 0.07921,
+            "96": 0.07884,
+            "97": 0.0796,
+            "98": 0.07923,
+            "99": 0.07955,
+            "100": 0.07931
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_dev_dgx_gb200.json
new file mode 100644
index 00000000000..ccf3054dcf0
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_dev_dgx_gb200.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.77447,
+            "2": 10.78365,
+            "3": 10.78346,
+            "4": 10.74822,
+            "5": 10.81983,
+            "6": 10.82303,
+            "7": 10.79055,
+            "8": 10.77956,
+            "9": 10.78595,
+            "10": 10.74453,
+            "11": 10.83267,
+            "12": 10.80426,
+            "13": 10.82082,
+            "14": 10.82568,
+            "15": 10.74205,
+            "16": 10.74901,
+            "17": 10.72521,
+            "18": 10.74178,
+            "19": 10.74415,
+            "20": 10.63672,
+            "21": 10.63053,
+            "22": 10.47964,
+            "23": 10.65979,
+            "24": 10.52478,
+            "25": 10.47552,
+            "26": 10.54115,
+            "27": 10.55498,
+            "28": 10.52138,
+            "29": 10.53601,
+            "30": 10.3053,
+            "31": 10.06443,
+            "32": 10.41576,
+            "33": 10.42199,
+            "34": 10.17396,
+            "35": 10.22407,
+            "36": 10.18503,
+            "37": 10.30413,
+            "38": 10.14998,
+            "39": 10.37038,
+            "40": 10.03991,
+            "41": 10.1095,
+            "42": 10.17936,
+            "43": 9.79751,
+            "44": 9.90816,
+            "45": 9.79806,
+            "46": 9.79659,
+            "47": 10.1211,
+            "48": 9.82086,
+            "49": 9.50494,
+            "50": 9.88025,
+            "51": 9.83617,
+            "52": 9.72317,
+            "53": 10.05321,
+            "54": 9.93744,
+            "55": 9.87386,
+            "56": 9.60451,
+            "57": 9.45231,
+            "58": 9.81883,
+            "59": 9.57722,
+            "60": 9.48536,
+            "61": 9.68547,
+            "62": 9.97907,
+            "63": 9.36417,
+            "64": 9.76205,
+            "65": 8.94102,
+            "66": 9.69479,
+            "67": 9.36657,
+            "68": 9.77743,
+            "69": 9.78996,
+            "70": 9.72377,
+            "71": 9.62042,
+            "72": 9.57421,
+            "73": 9.48574,
+            "74": 8.92728,
+            "75": 9.41652,
+            "76": 9.07749,
+            "77": 10.05445,
+            "78": 9.71913,
+            "79": 9.37304,
+            "80": 9.40003,
+            "81": 9.47846,
+            "82": 9.69869,
+            "83": 9.31156,
+            "84": 9.41458,
+            "85": 9.61162,
+            "86": 9.07419,
+            "87": 9.59392,
+            "88": 9.74925,
+            "89": 9.59851,
+            "90": 9.82763,
+            "91": 9.33629,
+            "92": 9.35804,
+            "93": 9.08549,
+            "94": 8.8279,
+            "95": 9.53033,
+            "96": 9.52662,
+            "97": 9.30484,
+            "98": 9.67007,
+            "99": 8.89604,
+            "100": 9.407
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1531.0,
+            "2": 1722.0,
+            "3": 1589.0,
+            "4": 1870.0,
+            "5": 1992.0,
+            "6": 1894.0,
+            "7": 1954.0,
+            "8": 1697.0,
+            "9": 1855.0,
+            "10": 1477.0,
+            "11": 1889.0,
+            "12": 1848.0,
+            "13": 1885.0,
+            "14": 1934.0,
+            "15": 1984.0,
+            "16": 1934.0,
+            "17": 1820.0,
+            "18": 1643.0,
+            "19": 1735.0,
+            "20": 1682.0,
+            "21": 1974.0,
+            "22": 1733.0,
+            "23": 1932.0,
+            "24": 1650.0,
+            "25": 1603.0,
+            "26": 1762.0,
+            "27": 1846.0,
+            "28": 1899.0,
+            "29": 2020.0,
+            "30": 1941.0,
+            "31": 1620.0,
+            "32": 1902.0,
+            "33": 2053.0,
+            "34": 1891.0,
+            "35": 1988.0,
+            "36": 1990.0,
+            "37": 2382.0,
+            "38": 2143.0,
+            "39": 2445.0,
+            "40": 2284.0,
+            "41": 2265.0,
+            "42": 2272.0,
+            "43": 2112.0,
+            "44": 2088.0,
+            "45": 2332.0,
+            "46": 2345.0,
+            "47": 2550.0,
+            "48": 2419.0,
+            "49": 2250.0,
+            "50": 2509.0,
+            "51": 2708.0,
+            "52": 2707.0,
+            "53": 2812.0,
+            "54": 2620.0,
+            "55": 2399.0,
+            "56": 2790.0,
+            "57": 2301.0,
+            "58": 3008.0,
+            "59": 2863.0,
+            "60": 2465.0,
+            "61": 2808.0,
+            "62": 2607.0,
+            "63": 2442.0,
+            "64": 2977.0,
+            "65": 2646.0,
+            "66": 3061.0,
+            "67": 2818.0,
+            "68": 2891.0,
+            "69": 3036.0,
+            "70": 3160.0,
+            "71": 3064.0,
+            "72": 2618.0,
+            "73": 2978.0,
+            "74": 2000.0,
+            "75": 2580.0,
+            "76": 2967.0,
+            "77": 3281.0,
+            "78": 3131.0,
+            "79": 3108.0,
+            "80": 3217.0,
+            "81": 3614.0,
+            "82": 3411.0,
+            "83": 2834.0,
+            "84": 3191.0,
+            "85": 3306.0,
+            "86": 2806.0,
+            "87": 3808.0,
+            "88": 3237.0,
+            "89": 3425.0,
+            "90": 3202.0,
+            "91": 2829.0,
+            "92": 3105.0,
+            "93": 2882.0,
+            "94": 3303.0,
+            "95": 3310.0,
+            "96": 3499.0,
+            "97": 3211.0,
+            "98": 3741.0,
+            "99": 3167.0,
+            "100": 3049.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1260800512.0,
+            "2": 1260800512.0,
+            "3": 1260800512.0,
+            "4": 1260800512.0,
+            "5": 1260800512.0,
+            "6": 1260800512.0,
+            "7": 1260800512.0,
+            "8": 1260800512.0,
+            "9": 1260800512.0,
+            "10": 1260800512.0,
+            "11": 1260800512.0,
+            "12": 1260800512.0,
+            "13": 1260800512.0,
+            "14": 1260800512.0,
+            "15": 1260800512.0,
+            "16": 1260800512.0,
+            "17": 1260800512.0,
+            "18": 1260800512.0,
+            "19": 1260800512.0,
+            "20": 1260800512.0,
+            "21": 1260800512.0,
+            "22": 1260800512.0,
+            "23": 1260800512.0,
+            "24": 1260800512.0,
+            "25": 1260800512.0,
+            "26": 1260800512.0,
+            "27": 1260800512.0,
+            "28": 1260800512.0,
+            "29": 1260800512.0,
+            "30": 1260800512.0,
+            "31": 1260800512.0,
+            "32": 1260800512.0,
+            "33": 1260800512.0,
+            "34": 1260800512.0,
+            "35": 1260800512.0,
+            "36": 1260800512.0,
+            "37": 1260800512.0,
+            "38": 1260800512.0,
+            "39": 1260800512.0,
+            "40": 1260800512.0,
+            "41": 1260800512.0,
+            "42": 1260800512.0,
+            "43": 1260800512.0,
+            "44": 1260800512.0,
+            "45": 1260800512.0,
+            "46": 1260800512.0,
+            "47": 1260800512.0,
+            "48": 1260800512.0,
+            "49": 1260800512.0,
+            "50": 1260800512.0,
+            "51": 1260800512.0,
+            "52": 1260800512.0,
+            "53": 1260800512.0,
+            "54": 1260800512.0,
+            "55": 1260800512.0,
+            "56": 1260800512.0,
+            "57": 1260800512.0,
+            "58": 1260800512.0,
+            "59": 1260800512.0,
+            "60": 1260800512.0,
+            "61": 1260800512.0,
+            "62": 1260800512.0,
+            "63": 1260800512.0,
+            "64": 1260800512.0,
+            "65": 1260800512.0,
+            "66": 1260800512.0,
+            "67": 1260800512.0,
+            "68": 1260800512.0,
+            "69": 1260800512.0,
+            "70": 1260800512.0,
+            "71": 1260800512.0,
+            "72": 1260800512.0,
+            "73": 1260800512.0,
+            "74": 1260800512.0,
+            "75": 1260800512.0,
+            "76": 1260800512.0,
+            "77": 1260800512.0,
+            "78": 1260800512.0,
+            "79": 1260800512.0,
+            "80": 1260800512.0,
+            "81": 1260800512.0,
+            "82": 1260800512.0,
+            "83": 1260800512.0,
+            "84": 1260800512.0,
+            "85": 1260800512.0,
+            "86": 1260800512.0,
+            "87": 1260800512.0,
+            "88": 1260800512.0,
+            "89": 1260800512.0,
+            "90": 1260800512.0,
+            "91": 1260800512.0,
+            "92": 1260800512.0,
+            "93": 1260800512.0,
+            "94": 1260800512.0,
+            "95": 1260800512.0,
+            "96": 1260800512.0,
+            "97": 1260800512.0,
+            "98": 1260800512.0,
+            "99": 1260800512.0,
+            "100": 1260800512.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2013853696.0,
+            "2": 2562382848.0,
+            "3": 2562382848.0,
+            "4": 2562382848.0,
+            "5": 2562382848.0,
+            "6": 2562382848.0,
+            "7": 2562382848.0,
+            "8": 2562382848.0,
+            "9": 2562382848.0,
+            "10": 2562382848.0,
+            "11": 2562382848.0,
+            "12": 2562382848.0,
+            "13": 2562382848.0,
+            "14": 2562382848.0,
+            "15": 2562382848.0,
+            "16": 2562382848.0,
+            "17": 2562382848.0,
+            "18": 2562382848.0,
+            "19": 2562382848.0,
+            "20": 2562382848.0,
+            "21": 2562382848.0,
+            "22": 2562382848.0,
+            "23": 2562382848.0,
+            "24": 2562382848.0,
+            "25": 2562382848.0,
+            "26": 2562382848.0,
+            "27": 2562382848.0,
+            "28": 2562382848.0,
+            "29": 2562382848.0,
+            "30": 2562382848.0,
+            "31": 2562382848.0,
+            "32": 2562382848.0,
+            "33": 2562382848.0,
+            "34": 2562382848.0,
+            "35": 2562382848.0,
+            "36": 2562382848.0,
+            "37": 2562382848.0,
+            "38": 2562382848.0,
+            "39": 2562382848.0,
+            "40": 2562382848.0,
+            "41": 2562382848.0,
+            "42": 2562382848.0,
+            "43": 2562382848.0,
+            "44": 2562382848.0,
+            "45": 2562382848.0,
+            "46": 2562382848.0,
+            "47": 2562382848.0,
+            "48": 2562382848.0,
+            "49": 2562382848.0,
+            "50": 2562382848.0,
+            "51": 2562382848.0,
+            "52": 2562382848.0,
+            "53": 2562382848.0,
+            "54": 2562382848.0,
+            "55": 2562382848.0,
+            "56": 2562382848.0,
+            "57": 2562382848.0,
+            "58": 2562382848.0,
+            "59": 2562382848.0,
+            "60": 2562382848.0,
+            "61": 2562382848.0,
+            "62": 2562382848.0,
+            "63": 2562382848.0,
+            "64": 2562382848.0,
+            "65": 2562382848.0,
+            "66": 2562382848.0,
+            "67": 2562382848.0,
+            "68": 2562382848.0,
+            "69": 2562382848.0,
+            "70": 2562382848.0,
+            "71": 2562382848.0,
+            "72": 2562382848.0,
+            "73": 2562382848.0,
+            "74": 2562382848.0,
+            "75": 2562382848.0,
+            "76": 2562382848.0,
+            "77": 2562382848.0,
+            "78": 2562382848.0,
+            "79": 2562382848.0,
+            "80": 2562382848.0,
+            "81": 2562382848.0,
+            "82": 2562382848.0,
+            "83": 2562382848.0,
+            "84": 2562382848.0,
+            "85": 2562382848.0,
+            "86": 2562382848.0,
+            "87": 2562382848.0,
+            "88": 2562382848.0,
+            "89": 2562382848.0,
+            "90": 2562382848.0,
+            "91": 2562382848.0,
+            "92": 2562382848.0,
+            "93": 2562382848.0,
+            "94": 2562382848.0,
+            "95": 2562382848.0,
+            "96": 2562382848.0,
+            "97": 2562382848.0,
+            "98": 2562382848.0,
+            "99": 2562382848.0,
+            "100": 2562382848.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 4.35869,
+            "2": 0.14764,
+            "3": 0.13356,
+            "4": 0.12382,
+            "5": 0.1223,
+            "6": 0.1198,
+            "7": 0.12014,
+            "8": 0.12507,
+            "9": 0.12529,
+            "10": 0.13316,
+            "11": 0.13102,
+            "12": 0.13889,
+            "13": 0.13638,
+            "14": 0.14898,
+            "15": 0.16074,
+            "16": 0.14966,
+            "17": 0.17711,
+            "18": 0.17201,
+            "19": 0.14817,
+            "20": 0.14956,
+            "21": 0.17491,
+            "22": 0.29045,
+            "23": 0.49855,
+            "24": 0.12704,
+            "25": 0.12527,
+            "26": 0.12833,
+            "27": 0.12762,
+            "28": 0.12497,
+            "29": 0.1258,
+            "30": 0.12747,
+            "31": 0.1272,
+            "32": 0.12749,
+            "33": 0.12753,
+            "34": 0.12763,
+            "35": 0.12697,
+            "36": 0.12734,
+            "37": 0.12802,
+            "38": 0.12925,
+            "39": 0.1278,
+            "40": 0.1273,
+            "41": 0.1284,
+            "42": 0.12646,
+            "43": 0.12669,
+            "44": 0.12781,
+            "45": 0.12751,
+            "46": 0.12772,
+            "47": 0.12712,
+            "48": 0.12664,
+            "49": 0.12679,
+            "50": 0.13142,
+            "51": 0.13902,
+            "52": 0.12241,
+            "53": 0.12407,
+            "54": 0.12462,
+            "55": 0.1225,
+            "56": 0.12498,
+            "57": 0.12564,
+            "58": 0.12627,
+            "59": 0.12399,
+            "60": 0.12468,
+            "61": 0.12629,
+            "62": 0.12645,
+            "63": 0.12377,
+            "64": 0.12505,
+            "65": 0.1271,
+            "66": 0.12603,
+            "67": 0.12556,
+            "68": 0.12634,
+            "69": 0.1332,
+            "70": 0.13504,
+            "71": 0.13164,
+            "72": 0.13511,
+            "73": 0.14002,
+            "74": 0.14488,
+            "75": 0.14064,
+            "76": 0.14236,
+            "77": 0.14155,
+            "78": 0.14042,
+            "79": 0.14188,
+            "80": 0.14414,
+            "81": 0.14147,
+            "82": 0.14264,
+            "83": 0.14126,
+            "84": 0.1423,
+            "85": 0.14311,
+            "86": 0.144,
+            "87": 0.1445,
+            "88": 0.14401,
+            "89": 0.14198,
+            "90": 0.14227,
+            "91": 0.14119,
+            "92": 0.14076,
+            "93": 0.14281,
+            "94": 0.14283,
+            "95": 0.1438,
+            "96": 0.14188,
+            "97": 0.14623,
+            "98": 0.14374,
+            "99": 0.14361,
+            "100": 0.14481
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_dev_dgx_h100.json
index df5117f4d8f..05b11c3c8ee 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_dev_dgx_h100.json
@@ -218,106 +218,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 1261848064.0,
-            "2": 1261848064.0,
-            "3": 1261848064.0,
-            "4": 1261848064.0,
-            "5": 1261848064.0,
-            "6": 1261848064.0,
-            "7": 1261848064.0,
-            "8": 1261848064.0,
-            "9": 1261848064.0,
-            "10": 1261848064.0,
-            "11": 1261848064.0,
-            "12": 1261848064.0,
-            "13": 1261848064.0,
-            "14": 1261848064.0,
-            "15": 1261848064.0,
-            "16": 1261848064.0,
-            "17": 1261848064.0,
-            "18": 1261848064.0,
-            "19": 1261848064.0,
-            "20": 1261848064.0,
-            "21": 1261848064.0,
-            "22": 1261848064.0,
-            "23": 1261848064.0,
-            "24": 1261848064.0,
-            "25": 1261848064.0,
-            "26": 1261848064.0,
-            "27": 1261848064.0,
-            "28": 1261848064.0,
-            "29": 1261848064.0,
-            "30": 1261848064.0,
-            "31": 1261848064.0,
-            "32": 1261848064.0,
-            "33": 1261848064.0,
-            "34": 1261848064.0,
-            "35": 1261848064.0,
-            "36": 1261848064.0,
-            "37": 1261848064.0,
-            "38": 1261848064.0,
-            "39": 1261848064.0,
-            "40": 1261848064.0,
-            "41": 1261848064.0,
-            "42": 1261848064.0,
-            "43": 1261848064.0,
-            "44": 1261848064.0,
-            "45": 1261848064.0,
-            "46": 1261848064.0,
-            "47": 1261848064.0,
-            "48": 1261848064.0,
-            "49": 1261848064.0,
-            "50": 1261848064.0,
-            "51": 1261848064.0,
-            "52": 1261848064.0,
-            "53": 1261848064.0,
-            "54": 1261848064.0,
-            "55": 1261848064.0,
-            "56": 1261848064.0,
-            "57": 1261848064.0,
-            "58": 1261848064.0,
-            "59": 1261848064.0,
-            "60": 1261848064.0,
-            "61": 1261848064.0,
-            "62": 1261848064.0,
-            "63": 1261848064.0,
-            "64": 1261848064.0,
-            "65": 1261848064.0,
-            "66": 1261848064.0,
-            "67": 1261848064.0,
-            "68": 1261848064.0,
-            "69": 1261848064.0,
-            "70": 1261848064.0,
-            "71": 1261848064.0,
-            "72": 1261848064.0,
-            "73": 1261848064.0,
-            "74": 1261848064.0,
-            "75": 1261848064.0,
-            "76": 1261848064.0,
-            "77": 1261848064.0,
-            "78": 1261848064.0,
-            "79": 1261848064.0,
-            "80": 1261848064.0,
-            "81": 1261848064.0,
-            "82": 1261848064.0,
-            "83": 1261848064.0,
-            "84": 1261848064.0,
-            "85": 1261848064.0,
-            "86": 1261848064.0,
-            "87": 1261848064.0,
-            "88": 1261848064.0,
-            "89": 1261848064.0,
-            "90": 1261848064.0,
-            "91": 1261848064.0,
-            "92": 1261848064.0,
-            "93": 1261848064.0,
-            "94": 1261848064.0,
-            "95": 1261848064.0,
-            "96": 1261848064.0,
-            "97": 1261848064.0,
-            "98": 1261848064.0,
-            "99": 1261848064.0,
-            "100": 1261848064.0
+            "1": 1261849088.0,
+            "2": 1261849088.0,
+            "3": 1261849088.0,
+            "4": 1261849088.0,
+            "5": 1261849088.0,
+            "6": 1261849088.0,
+            "7": 1261849088.0,
+            "8": 1261849088.0,
+            "9": 1261849088.0,
+            "10": 1261849088.0,
+            "11": 1261849088.0,
+            "12": 1261849088.0,
+            "13": 1261849088.0,
+            "14": 1261849088.0,
+            "15": 1261849088.0,
+            "16": 1261849088.0,
+            "17": 1261849088.0,
+            "18": 1261849088.0,
+            "19": 1261849088.0,
+            "20": 1261849088.0,
+            "21": 1261849088.0,
+            "22": 1261849088.0,
+            "23": 1261849088.0,
+            "24": 1261849088.0,
+            "25": 1261849088.0,
+            "26": 1261849088.0,
+            "27": 1261849088.0,
+            "28": 1261849088.0,
+            "29": 1261849088.0,
+            "30": 1261849088.0,
+            "31": 1261849088.0,
+            "32": 1261849088.0,
+            "33": 1261849088.0,
+            "34": 1261849088.0,
+            "35": 1261849088.0,
+            "36": 1261849088.0,
+            "37": 1261849088.0,
+            "38": 1261849088.0,
+            "39": 1261849088.0,
+            "40": 1261849088.0,
+            "41": 1261849088.0,
+            "42": 1261849088.0,
+            "43": 1261849088.0,
+            "44": 1261849088.0,
+            "45": 1261849088.0,
+            "46": 1261849088.0,
+            "47": 1261849088.0,
+            "48": 1261849088.0,
+            "49": 1261849088.0,
+            "50": 1261849088.0,
+            "51": 1261849088.0,
+            "52": 1261849088.0,
+            "53": 1261849088.0,
+            "54": 1261849088.0,
+            "55": 1261849088.0,
+            "56": 1261849088.0,
+            "57": 1261849088.0,
+            "58": 1261849088.0,
+            "59": 1261849088.0,
+            "60": 1261849088.0,
+            "61": 1261849088.0,
+            "62": 1261849088.0,
+            "63": 1261849088.0,
+            "64": 1261849088.0,
+            "65": 1261849088.0,
+            "66": 1261849088.0,
+            "67": 1261849088.0,
+            "68": 1261849088.0,
+            "69": 1261849088.0,
+            "70": 1261849088.0,
+            "71": 1261849088.0,
+            "72": 1261849088.0,
+            "73": 1261849088.0,
+            "74": 1261849088.0,
+            "75": 1261849088.0,
+            "76": 1261849088.0,
+            "77": 1261849088.0,
+            "78": 1261849088.0,
+            "79": 1261849088.0,
+            "80": 1261849088.0,
+            "81": 1261849088.0,
+            "82": 1261849088.0,
+            "83": 1261849088.0,
+            "84": 1261849088.0,
+            "85": 1261849088.0,
+            "86": 1261849088.0,
+            "87": 1261849088.0,
+            "88": 1261849088.0,
+            "89": 1261849088.0,
+            "90": 1261849088.0,
+            "91": 1261849088.0,
+            "92": 1261849088.0,
+            "93": 1261849088.0,
+            "94": 1261849088.0,
+            "95": 1261849088.0,
+            "96": 1261849088.0,
+            "97": 1261849088.0,
+            "98": 1261849088.0,
+            "99": 1261849088.0,
+            "100": 1261849088.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -325,106 +325,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 2013852672.0,
-            "2": 2563430400.0,
-            "3": 2563430400.0,
-            "4": 2563430400.0,
-            "5": 2563430400.0,
-            "6": 2563430400.0,
-            "7": 2563430400.0,
-            "8": 2563430400.0,
-            "9": 2563430400.0,
-            "10": 2563430400.0,
-            "11": 2563430400.0,
-            "12": 2563430400.0,
-            "13": 2563430400.0,
-            "14": 2563430400.0,
-            "15": 2563430400.0,
-            "16": 2563430400.0,
-            "17": 2563430400.0,
-            "18": 2563430400.0,
-            "19": 2563430400.0,
-            "20": 2563430400.0,
-            "21": 2563430400.0,
-            "22": 2563430400.0,
-            "23": 2563430400.0,
-            "24": 2563430400.0,
-            "25": 2563430400.0,
-            "26": 2563430400.0,
-            "27": 2563430400.0,
-            "28": 2563430400.0,
-            "29": 2563430400.0,
-            "30": 2563430400.0,
-            "31": 2563430400.0,
-            "32": 2563430400.0,
-            "33": 2563430400.0,
-            "34": 2563430400.0,
-            "35": 2563430400.0,
-            "36": 2563430400.0,
-            "37": 2563430400.0,
-            "38": 2563430400.0,
-            "39": 2563430400.0,
-            "40": 2563430400.0,
-            "41": 2563430400.0,
-            "42": 2563430400.0,
-            "43": 2563430400.0,
-            "44": 2563430400.0,
-            "45": 2563430400.0,
-            "46": 2563430400.0,
-            "47": 2563430400.0,
-            "48": 2563430400.0,
-            "49": 2563430400.0,
-            "50": 2563430400.0,
-            "51": 2563430400.0,
-            "52": 2563430400.0,
-            "53": 2563430400.0,
-            "54": 2563430400.0,
-            "55": 2563430400.0,
-            "56": 2563430400.0,
-            "57": 2563430400.0,
-            "58": 2563430400.0,
-            "59": 2563430400.0,
-            "60": 2563430400.0,
-            "61": 2563430400.0,
-            "62": 2563430400.0,
-            "63": 2563430400.0,
-            "64": 2563430400.0,
-            "65": 2563430400.0,
-            "66": 2563430400.0,
-            "67": 2563430400.0,
-            "68": 2563430400.0,
-            "69": 2563430400.0,
-            "70": 2563430400.0,
-            "71": 2563430400.0,
-            "72": 2563430400.0,
-            "73": 2563430400.0,
-            "74": 2563430400.0,
-            "75": 2563430400.0,
-            "76": 2563430400.0,
-            "77": 2563430400.0,
-            "78": 2563430400.0,
-            "79": 2563430400.0,
-            "80": 2563430400.0,
-            "81": 2563430400.0,
-            "82": 2563430400.0,
-            "83": 2563430400.0,
-            "84": 2563430400.0,
-            "85": 2563430400.0,
-            "86": 2563430400.0,
-            "87": 2563430400.0,
-            "88": 2563430400.0,
-            "89": 2563430400.0,
-            "90": 2563430400.0,
-            "91": 2563430400.0,
-            "92": 2563430400.0,
-            "93": 2563430400.0,
-            "94": 2563430400.0,
-            "95": 2563430400.0,
-            "96": 2563430400.0,
-            "97": 2563430400.0,
-            "98": 2563430400.0,
-            "99": 2563430400.0,
-            "100": 2563430400.0
+            "1": 2013853696.0,
+            "2": 2563431424.0,
+            "3": 2563431424.0,
+            "4": 2563431424.0,
+            "5": 2563431424.0,
+            "6": 2563431424.0,
+            "7": 2563431424.0,
+            "8": 2563431424.0,
+            "9": 2563431424.0,
+            "10": 2563431424.0,
+            "11": 2563431424.0,
+            "12": 2563431424.0,
+            "13": 2563431424.0,
+            "14": 2563431424.0,
+            "15": 2563431424.0,
+            "16": 2563431424.0,
+            "17": 2563431424.0,
+            "18": 2563431424.0,
+            "19": 2563431424.0,
+            "20": 2563431424.0,
+            "21": 2563431424.0,
+            "22": 2563431424.0,
+            "23": 2563431424.0,
+            "24": 2563431424.0,
+            "25": 2563431424.0,
+            "26": 2563431424.0,
+            "27": 2563431424.0,
+            "28": 2563431424.0,
+            "29": 2563431424.0,
+            "30": 2563431424.0,
+            "31": 2563431424.0,
+            "32": 2563431424.0,
+            "33": 2563431424.0,
+            "34": 2563431424.0,
+            "35": 2563431424.0,
+            "36": 2563431424.0,
+            "37": 2563431424.0,
+            "38": 2563431424.0,
+            "39": 2563431424.0,
+            "40": 2563431424.0,
+            "41": 2563431424.0,
+            "42": 2563431424.0,
+            "43": 2563431424.0,
+            "44": 2563431424.0,
+            "45": 2563431424.0,
+            "46": 2563431424.0,
+            "47": 2563431424.0,
+            "48": 2563431424.0,
+            "49": 2563431424.0,
+            "50": 2563431424.0,
+            "51": 2563431424.0,
+            "52": 2563431424.0,
+            "53": 2563431424.0,
+            "54": 2563431424.0,
+            "55": 2563431424.0,
+            "56": 2563431424.0,
+            "57": 2563431424.0,
+            "58": 2563431424.0,
+            "59": 2563431424.0,
+            "60": 2563431424.0,
+            "61": 2563431424.0,
+            "62": 2563431424.0,
+            "63": 2563431424.0,
+            "64": 2563431424.0,
+            "65": 2563431424.0,
+            "66": 2563431424.0,
+            "67": 2563431424.0,
+            "68": 2563431424.0,
+            "69": 2563431424.0,
+            "70": 2563431424.0,
+            "71": 2563431424.0,
+            "72": 2563431424.0,
+            "73": 2563431424.0,
+            "74": 2563431424.0,
+            "75": 2563431424.0,
+            "76": 2563431424.0,
+            "77": 2563431424.0,
+            "78": 2563431424.0,
+            "79": 2563431424.0,
+            "80": 2563431424.0,
+            "81": 2563431424.0,
+            "82": 2563431424.0,
+            "83": 2563431424.0,
+            "84": 2563431424.0,
+            "85": 2563431424.0,
+            "86": 2563431424.0,
+            "87": 2563431424.0,
+            "88": 2563431424.0,
+            "89": 2563431424.0,
+            "90": 2563431424.0,
+            "91": 2563431424.0,
+            "92": 2563431424.0,
+            "93": 2563431424.0,
+            "94": 2563431424.0,
+            "95": 2563431424.0,
+            "96": 2563431424.0,
+            "97": 2563431424.0,
+            "98": 2563431424.0,
+            "99": 2563431424.0,
+            "100": 2563431424.0
         }
     },
     "iteration-time": {
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 6.36321,
-            "2": 0.1218,
-            "3": 0.11132,
-            "4": 0.10707,
-            "5": 0.0969,
-            "6": 0.09387,
-            "7": 0.09166,
-            "8": 0.09482,
-            "9": 0.09368,
-            "10": 0.09371,
-            "11": 0.0914,
-            "12": 0.09315,
-            "13": 0.09323,
-            "14": 0.09407,
-            "15": 0.09341,
-            "16": 0.09525,
-            "17": 0.09338,
-            "18": 0.09247,
-            "19": 0.09648,
-            "20": 0.09425,
-            "21": 0.09329,
-            "22": 0.09356,
-            "23": 0.09379,
-            "24": 0.09405,
-            "25": 0.0935,
-            "26": 0.09238,
-            "27": 0.09612,
-            "28": 0.09315,
-            "29": 0.09297,
-            "30": 0.09342,
-            "31": 0.09294,
-            "32": 0.09287,
-            "33": 0.09256,
-            "34": 0.09461,
-            "35": 0.09355,
-            "36": 0.09517,
-            "37": 0.09434,
-            "38": 0.0956,
-            "39": 0.09435,
-            "40": 0.09568,
-            "41": 0.09615,
-            "42": 0.09244,
-            "43": 0.09364,
-            "44": 0.09376,
-            "45": 0.09258,
-            "46": 0.09268,
-            "47": 0.09255,
-            "48": 0.09424,
-            "49": 0.09573,
-            "50": 0.09436,
-            "51": 0.0945,
-            "52": 0.09894,
-            "53": 0.09918,
-            "54": 0.09823,
-            "55": 0.09863,
-            "56": 0.09834,
-            "57": 0.09709,
-            "58": 0.09303,
-            "59": 0.09404,
-            "60": 0.09192,
-            "61": 0.09198,
-            "62": 0.09274,
-            "63": 0.09166,
-            "64": 0.09147,
-            "65": 0.09327,
-            "66": 0.11015,
-            "67": 0.09684,
-            "68": 0.09642,
-            "69": 0.09562,
-            "70": 0.0924,
-            "71": 0.09384,
-            "72": 0.09189,
-            "73": 0.09372,
-            "74": 0.09193,
-            "75": 0.09409,
-            "76": 0.09252,
-            "77": 0.09275,
-            "78": 0.09475,
-            "79": 0.0945,
-            "80": 0.10107,
-            "81": 0.09197,
-            "82": 0.09204,
-            "83": 0.09353,
-            "84": 0.09326,
-            "85": 0.09194,
-            "86": 0.1029,
-            "87": 0.09285,
-            "88": 0.09168,
-            "89": 0.09478,
-            "90": 0.09254,
-            "91": 0.0921,
-            "92": 0.09246,
-            "93": 0.09207,
-            "94": 0.09324,
-            "95": 0.09431,
-            "96": 0.09195,
-            "97": 0.09285,
-            "98": 0.09175,
-            "99": 0.09153,
-            "100": 0.11457
+            "1": 4.95057,
+            "2": 0.11272,
+            "3": 0.10006,
+            "4": 0.0754,
+            "5": 0.07446,
+            "6": 0.07499,
+            "7": 0.07451,
+            "8": 0.07507,
+            "9": 0.07406,
+            "10": 0.07462,
+            "11": 0.07387,
+            "12": 0.07421,
+            "13": 0.07426,
+            "14": 0.075,
+            "15": 0.07429,
+            "16": 0.07394,
+            "17": 0.07476,
+            "18": 0.07498,
+            "19": 0.07455,
+            "20": 0.07456,
+            "21": 0.07463,
+            "22": 0.07473,
+            "23": 0.07475,
+            "24": 0.0743,
+            "25": 0.07447,
+            "26": 0.07414,
+            "27": 0.07438,
+            "28": 0.07665,
+            "29": 0.07618,
+            "30": 0.07525,
+            "31": 0.07718,
+            "32": 0.07452,
+            "33": 0.07632,
+            "34": 0.07594,
+            "35": 0.0752,
+            "36": 0.07788,
+            "37": 0.07472,
+            "38": 0.07514,
+            "39": 0.07557,
+            "40": 0.07528,
+            "41": 0.07668,
+            "42": 0.07829,
+            "43": 0.07561,
+            "44": 0.07525,
+            "45": 0.07522,
+            "46": 0.08858,
+            "47": 0.09212,
+            "48": 0.07649,
+            "49": 0.07761,
+            "50": 0.07534,
+            "51": 0.0797,
+            "52": 0.07601,
+            "53": 0.07588,
+            "54": 0.07564,
+            "55": 0.07643,
+            "56": 0.07613,
+            "57": 0.07562,
+            "58": 0.07558,
+            "59": 0.07588,
+            "60": 0.07563,
+            "61": 0.07585,
+            "62": 0.07578,
+            "63": 0.07559,
+            "64": 0.07502,
+            "65": 0.07586,
+            "66": 0.07503,
+            "67": 0.0755,
+            "68": 0.07448,
+            "69": 0.07531,
+            "70": 0.07481,
+            "71": 0.07524,
+            "72": 0.07712,
+            "73": 0.07539,
+            "74": 0.07566,
+            "75": 0.07497,
+            "76": 0.07458,
+            "77": 0.07476,
+            "78": 0.07547,
+            "79": 0.07542,
+            "80": 0.07549,
+            "81": 0.07589,
+            "82": 0.07548,
+            "83": 0.07513,
+            "84": 0.07494,
+            "85": 0.07468,
+            "86": 0.07522,
+            "87": 0.07487,
+            "88": 0.07533,
+            "89": 0.07545,
+            "90": 0.07496,
+            "91": 0.07533,
+            "92": 0.07435,
+            "93": 0.07549,
+            "94": 0.07465,
+            "95": 0.07523,
+            "96": 0.07531,
+            "97": 0.07697,
+            "98": 0.0768,
+            "99": 0.07605,
+            "100": 0.07588
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_dev_dgx_h100_2nd.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_dev_dgx_h100_2nd.json
new file mode 100644
index 00000000000..2a8709b9af2
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_dev_dgx_h100_2nd.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 9.8545,
+            "52": 9.7393,
+            "53": 10.07426,
+            "54": 9.96913,
+            "55": 9.88574,
+            "56": 9.62438,
+            "57": 9.48229,
+            "58": 9.83484,
+            "59": 9.58731,
+            "60": 9.50243,
+            "61": 9.6934,
+            "62": 9.988,
+            "63": 9.39105,
+            "64": 9.78022,
+            "65": 8.94516,
+            "66": 9.70492,
+            "67": 9.37249,
+            "68": 9.78328,
+            "69": 9.79057,
+            "70": 9.74451,
+            "71": 9.62298,
+            "72": 9.58457,
+            "73": 9.50511,
+            "74": 8.94308,
+            "75": 9.42524,
+            "76": 9.07602,
+            "77": 10.06352,
+            "78": 9.72307,
+            "79": 9.37497,
+            "80": 9.40454,
+            "81": 9.4779,
+            "82": 9.69669,
+            "83": 9.30714,
+            "84": 9.41525,
+            "85": 9.61295,
+            "86": 9.07198,
+            "87": 9.58834,
+            "88": 9.7476,
+            "89": 9.59984,
+            "90": 9.81672,
+            "91": 9.33791,
+            "92": 9.35608,
+            "93": 9.07423,
+            "94": 8.83511,
+            "95": 9.51841,
+            "96": 9.52391,
+            "97": 9.30922,
+            "98": 9.66746,
+            "99": 8.88421,
+            "100": 9.39923
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 2543.0,
+            "52": 2613.0,
+            "53": 2945.0,
+            "54": 2713.0,
+            "55": 2503.0,
+            "56": 2692.0,
+            "57": 2338.0,
+            "58": 2961.0,
+            "59": 2620.0,
+            "60": 2367.0,
+            "61": 2909.0,
+            "62": 2728.0,
+            "63": 2399.0,
+            "64": 2909.0,
+            "65": 2605.0,
+            "66": 2983.0,
+            "67": 2793.0,
+            "68": 2663.0,
+            "69": 2833.0,
+            "70": 3135.0,
+            "71": 2997.0,
+            "72": 2464.0,
+            "73": 3088.0,
+            "74": 1970.0,
+            "75": 2556.0,
+            "76": 3064.0,
+            "77": 3231.0,
+            "78": 3097.0,
+            "79": 3035.0,
+            "80": 3301.0,
+            "81": 3599.0,
+            "82": 3215.0,
+            "83": 2757.0,
+            "84": 3130.0,
+            "85": 3380.0,
+            "86": 2742.0,
+            "87": 3723.0,
+            "88": 3066.0,
+            "89": 3264.0,
+            "90": 3198.0,
+            "91": 2718.0,
+            "92": 3070.0,
+            "93": 2624.0,
+            "94": 3301.0,
+            "95": 3431.0,
+            "96": 3358.0,
+            "97": 3142.0,
+            "98": 3704.0,
+            "99": 3107.0,
+            "100": 3089.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 1261849088.0,
+            "52": 1261849088.0,
+            "53": 1261849088.0,
+            "54": 1261849088.0,
+            "55": 1261849088.0,
+            "56": 1261849088.0,
+            "57": 1261849088.0,
+            "58": 1261849088.0,
+            "59": 1261849088.0,
+            "60": 1261849088.0,
+            "61": 1261849088.0,
+            "62": 1261849088.0,
+            "63": 1261849088.0,
+            "64": 1261849088.0,
+            "65": 1261849088.0,
+            "66": 1261849088.0,
+            "67": 1261849088.0,
+            "68": 1261849088.0,
+            "69": 1261849088.0,
+            "70": 1261849088.0,
+            "71": 1261849088.0,
+            "72": 1261849088.0,
+            "73": 1261849088.0,
+            "74": 1261849088.0,
+            "75": 1261849088.0,
+            "76": 1261849088.0,
+            "77": 1261849088.0,
+            "78": 1261849088.0,
+            "79": 1261849088.0,
+            "80": 1261849088.0,
+            "81": 1261849088.0,
+            "82": 1261849088.0,
+            "83": 1261849088.0,
+            "84": 1261849088.0,
+            "85": 1261849088.0,
+            "86": 1261849088.0,
+            "87": 1261849088.0,
+            "88": 1261849088.0,
+            "89": 1261849088.0,
+            "90": 1261849088.0,
+            "91": 1261849088.0,
+            "92": 1261849088.0,
+            "93": 1261849088.0,
+            "94": 1261849088.0,
+            "95": 1261849088.0,
+            "96": 1261849088.0,
+            "97": 1261849088.0,
+            "98": 1261849088.0,
+            "99": 1261849088.0,
+            "100": 1261849088.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 2530924544.0,
+            "52": 2564480000.0,
+            "53": 2564480000.0,
+            "54": 2564480000.0,
+            "55": 2564480000.0,
+            "56": 2564480000.0,
+            "57": 2564480000.0,
+            "58": 2564480000.0,
+            "59": 2564480000.0,
+            "60": 2564480000.0,
+            "61": 2564480000.0,
+            "62": 2564480000.0,
+            "63": 2564480000.0,
+            "64": 2564480000.0,
+            "65": 2564480000.0,
+            "66": 2564480000.0,
+            "67": 2564480000.0,
+            "68": 2564480000.0,
+            "69": 2564480000.0,
+            "70": 2564480000.0,
+            "71": 2564480000.0,
+            "72": 2564480000.0,
+            "73": 2564480000.0,
+            "74": 2564480000.0,
+            "75": 2564480000.0,
+            "76": 2564480000.0,
+            "77": 2564480000.0,
+            "78": 2564480000.0,
+            "79": 2564480000.0,
+            "80": 2564480000.0,
+            "81": 2564480000.0,
+            "82": 2564480000.0,
+            "83": 2564480000.0,
+            "84": 2564480000.0,
+            "85": 2564480000.0,
+            "86": 2564480000.0,
+            "87": 2564480000.0,
+            "88": 2564480000.0,
+            "89": 2564480000.0,
+            "90": 2564480000.0,
+            "91": 2564480000.0,
+            "92": 2564480000.0,
+            "93": 2564480000.0,
+            "94": 2564480000.0,
+            "95": 2564480000.0,
+            "96": 2564480000.0,
+            "97": 2564480000.0,
+            "98": 2564480000.0,
+            "99": 2564480000.0,
+            "100": 2564480000.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 3.87745,
+            "52": 0.09791,
+            "53": 0.07996,
+            "54": 0.07698,
+            "55": 0.07921,
+            "56": 0.07768,
+            "57": 0.07938,
+            "58": 0.077,
+            "59": 0.0799,
+            "60": 0.07696,
+            "61": 0.07996,
+            "62": 0.07691,
+            "63": 0.08005,
+            "64": 0.0814,
+            "65": 0.07853,
+            "66": 0.07696,
+            "67": 0.07866,
+            "68": 0.07694,
+            "69": 0.07801,
+            "70": 0.07717,
+            "71": 0.07878,
+            "72": 0.07724,
+            "73": 0.18173,
+            "74": 0.09573,
+            "75": 0.07905,
+            "76": 0.0777,
+            "77": 0.07736,
+            "78": 0.08065,
+            "79": 0.07839,
+            "80": 0.08069,
+            "81": 0.0787,
+            "82": 0.07798,
+            "83": 0.08482,
+            "84": 0.07927,
+            "85": 0.08138,
+            "86": 0.08293,
+            "87": 0.08143,
+            "88": 0.07796,
+            "89": 0.07668,
+            "90": 0.07901,
+            "91": 0.07807,
+            "92": 0.0798,
+            "93": 0.0768,
+            "94": 0.07634,
+            "95": 0.07708,
+            "96": 0.07653,
+            "97": 0.0783,
+            "98": 0.07633,
+            "99": 0.07617,
+            "100": 0.07786
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_lts_dgx_a100.json
index bd7ca46935f..3d5c6f6dc4b 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_lts_dgx_a100.json
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 3.43734,
-            "2": 0.14648,
-            "3": 0.12542,
-            "4": 0.12603,
-            "5": 0.12388,
-            "6": 0.12524,
-            "7": 0.12279,
-            "8": 0.1239,
-            "9": 0.12244,
-            "10": 0.12336,
-            "11": 0.12345,
-            "12": 0.12322,
-            "13": 0.12318,
-            "14": 0.12381,
-            "15": 0.12343,
-            "16": 0.12319,
-            "17": 0.12276,
-            "18": 0.12324,
-            "19": 0.12355,
-            "20": 0.12315,
-            "21": 0.12294,
-            "22": 0.12326,
-            "23": 0.12303,
-            "24": 0.12294,
-            "25": 0.12286,
-            "26": 0.12388,
-            "27": 0.12341,
-            "28": 0.12301,
-            "29": 0.12267,
-            "30": 0.12315,
-            "31": 0.12371,
-            "32": 0.12359,
-            "33": 0.12298,
-            "34": 0.12283,
-            "35": 0.12266,
-            "36": 0.12356,
-            "37": 0.12377,
-            "38": 0.12388,
-            "39": 0.12525,
-            "40": 0.12501,
-            "41": 0.12357,
-            "42": 0.12376,
-            "43": 0.12304,
-            "44": 0.12342,
-            "45": 0.12284,
-            "46": 0.12332,
-            "47": 0.12324,
-            "48": 0.12279,
-            "49": 0.12276,
-            "50": 0.12391,
-            "51": 0.12862,
-            "52": 0.12214,
-            "53": 0.12006,
-            "54": 0.12101,
-            "55": 0.12062,
-            "56": 0.12088,
-            "57": 0.121,
-            "58": 0.12034,
-            "59": 0.12049,
-            "60": 0.12066,
-            "61": 0.11974,
-            "62": 0.11979,
-            "63": 0.12196,
-            "64": 0.12149,
-            "65": 0.12119,
-            "66": 0.12067,
-            "67": 0.12079,
-            "68": 0.12104,
-            "69": 0.12025,
-            "70": 0.12059,
-            "71": 0.12069,
-            "72": 0.12102,
-            "73": 0.12115,
-            "74": 0.1208,
-            "75": 0.12051,
-            "76": 0.12011,
-            "77": 0.11958,
-            "78": 0.12095,
-            "79": 0.11983,
-            "80": 0.12106,
-            "81": 0.1203,
-            "82": 0.12062,
-            "83": 0.12021,
-            "84": 0.12036,
-            "85": 0.12053,
-            "86": 0.12119,
-            "87": 0.12057,
-            "88": 0.12092,
-            "89": 0.12271,
-            "90": 0.12095,
-            "91": 0.1204,
-            "92": 0.12052,
-            "93": 0.12075,
-            "94": 0.12025,
-            "95": 0.12129,
-            "96": 0.12087,
-            "97": 0.12098,
-            "98": 0.12136,
-            "99": 0.12046,
-            "100": 0.12064
+            "1": 2.58038,
+            "2": 0.24481,
+            "3": 0.14335,
+            "4": 0.12008,
+            "5": 0.11519,
+            "6": 0.11576,
+            "7": 0.11592,
+            "8": 0.11621,
+            "9": 0.11509,
+            "10": 0.11622,
+            "11": 0.11438,
+            "12": 0.12519,
+            "13": 0.11661,
+            "14": 0.11675,
+            "15": 0.11585,
+            "16": 0.11602,
+            "17": 0.11511,
+            "18": 0.11563,
+            "19": 0.1151,
+            "20": 0.11612,
+            "21": 0.11576,
+            "22": 0.11985,
+            "23": 0.11629,
+            "24": 0.11712,
+            "25": 0.11544,
+            "26": 0.11643,
+            "27": 0.1158,
+            "28": 0.1159,
+            "29": 0.11547,
+            "30": 0.11692,
+            "31": 0.11579,
+            "32": 0.11621,
+            "33": 0.11916,
+            "34": 0.11636,
+            "35": 0.11562,
+            "36": 0.11659,
+            "37": 0.11547,
+            "38": 0.11647,
+            "39": 0.1158,
+            "40": 0.11627,
+            "41": 0.11596,
+            "42": 0.11632,
+            "43": 0.11615,
+            "44": 0.11641,
+            "45": 0.11517,
+            "46": 0.117,
+            "47": 0.11569,
+            "48": 0.11641,
+            "49": 0.1153,
+            "50": 0.11761,
+            "51": 0.12112,
+            "52": 0.11688,
+            "53": 0.11745,
+            "54": 0.11527,
+            "55": 0.1155,
+            "56": 0.11515,
+            "57": 0.1278,
+            "58": 0.11901,
+            "59": 0.11522,
+            "60": 0.11514,
+            "61": 0.11577,
+            "62": 0.1152,
+            "63": 0.11508,
+            "64": 0.11441,
+            "65": 0.11536,
+            "66": 0.11387,
+            "67": 0.11491,
+            "68": 0.11494,
+            "69": 0.11516,
+            "70": 0.11427,
+            "71": 0.11457,
+            "72": 0.11443,
+            "73": 0.11522,
+            "74": 0.1147,
+            "75": 0.11473,
+            "76": 0.11408,
+            "77": 0.11464,
+            "78": 0.11499,
+            "79": 0.11494,
+            "80": 0.11435,
+            "81": 0.11479,
+            "82": 0.11427,
+            "83": 0.11504,
+            "84": 0.11412,
+            "85": 0.11455,
+            "86": 0.11473,
+            "87": 0.11484,
+            "88": 0.1137,
+            "89": 0.11543,
+            "90": 0.11349,
+            "91": 0.11471,
+            "92": 0.114,
+            "93": 0.11498,
+            "94": 0.11434,
+            "95": 0.11497,
+            "96": 0.11416,
+            "97": 0.11454,
+            "98": 0.1143,
+            "99": 0.1145,
+            "100": 0.11459
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_lts_dgx_a100_2nd.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_lts_dgx_a100_2nd.json
new file mode 100644
index 00000000000..4402397bfe1
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_lts_dgx_a100_2nd.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 9.83457,
+            "52": 9.73232,
+            "53": 10.0488,
+            "54": 9.93895,
+            "55": 9.863,
+            "56": 9.613,
+            "57": 9.46966,
+            "58": 9.81135,
+            "59": 9.57107,
+            "60": 9.48155,
+            "61": 9.6788,
+            "62": 9.96581,
+            "63": 9.35273,
+            "64": 9.75648,
+            "65": 8.93771,
+            "66": 9.68153,
+            "67": 9.35671,
+            "68": 9.76807,
+            "69": 9.7739,
+            "70": 9.71016,
+            "71": 9.60009,
+            "72": 9.56793,
+            "73": 9.4774,
+            "74": 8.93177,
+            "75": 9.4072,
+            "76": 9.06849,
+            "77": 10.0464,
+            "78": 9.70988,
+            "79": 9.35733,
+            "80": 9.38975,
+            "81": 9.4662,
+            "82": 9.68058,
+            "83": 9.2914,
+            "84": 9.40191,
+            "85": 9.59735,
+            "86": 9.06209,
+            "87": 9.57922,
+            "88": 9.73259,
+            "89": 9.58836,
+            "90": 9.80354,
+            "91": 9.31991,
+            "92": 9.35011,
+            "93": 9.06382,
+            "94": 8.81909,
+            "95": 9.50568,
+            "96": 9.51071,
+            "97": 9.29241,
+            "98": 9.65578,
+            "99": 8.87401,
+            "100": 9.38833
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 2508.0,
+            "52": 2495.0,
+            "53": 2856.0,
+            "54": 2692.0,
+            "55": 2482.0,
+            "56": 2614.0,
+            "57": 2283.0,
+            "58": 2894.0,
+            "59": 2659.0,
+            "60": 2561.0,
+            "61": 3006.0,
+            "62": 2671.0,
+            "63": 2488.0,
+            "64": 3092.0,
+            "65": 2622.0,
+            "66": 3108.0,
+            "67": 2741.0,
+            "68": 2942.0,
+            "69": 2983.0,
+            "70": 3347.0,
+            "71": 3034.0,
+            "72": 2438.0,
+            "73": 3075.0,
+            "74": 1931.0,
+            "75": 2722.0,
+            "76": 2960.0,
+            "77": 3387.0,
+            "78": 3268.0,
+            "79": 3079.0,
+            "80": 3404.0,
+            "81": 3674.0,
+            "82": 3192.0,
+            "83": 2791.0,
+            "84": 3224.0,
+            "85": 3237.0,
+            "86": 2646.0,
+            "87": 3840.0,
+            "88": 3114.0,
+            "89": 3410.0,
+            "90": 3184.0,
+            "91": 3073.0,
+            "92": 3396.0,
+            "93": 2711.0,
+            "94": 3530.0,
+            "95": 3387.0,
+            "96": 3530.0,
+            "97": 3277.0,
+            "98": 3775.0,
+            "99": 3421.0,
+            "100": 3350.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 1232487936.0,
+            "52": 1232487936.0,
+            "53": 1232487936.0,
+            "54": 1232487936.0,
+            "55": 1232487936.0,
+            "56": 1232487936.0,
+            "57": 1232487936.0,
+            "58": 1232487936.0,
+            "59": 1232487936.0,
+            "60": 1232487936.0,
+            "61": 1232487936.0,
+            "62": 1232487936.0,
+            "63": 1232487936.0,
+            "64": 1232487936.0,
+            "65": 1232487936.0,
+            "66": 1232487936.0,
+            "67": 1232487936.0,
+            "68": 1232487936.0,
+            "69": 1232487936.0,
+            "70": 1232487936.0,
+            "71": 1232487936.0,
+            "72": 1232487936.0,
+            "73": 1232487936.0,
+            "74": 1232487936.0,
+            "75": 1232487936.0,
+            "76": 1232487936.0,
+            "77": 1232487936.0,
+            "78": 1232487936.0,
+            "79": 1232487936.0,
+            "80": 1232487936.0,
+            "81": 1232487936.0,
+            "82": 1232487936.0,
+            "83": 1232487936.0,
+            "84": 1232487936.0,
+            "85": 1232487936.0,
+            "86": 1232487936.0,
+            "87": 1232487936.0,
+            "88": 1232487936.0,
+            "89": 1232487936.0,
+            "90": 1232487936.0,
+            "91": 1232487936.0,
+            "92": 1232487936.0,
+            "93": 1232487936.0,
+            "94": 1232487936.0,
+            "95": 1232487936.0,
+            "96": 1232487936.0,
+            "97": 1232487936.0,
+            "98": 1232487936.0,
+            "99": 1232487936.0,
+            "100": 1232487936.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 2501563392.0,
+            "52": 2535118848.0,
+            "53": 2535118848.0,
+            "54": 2535118848.0,
+            "55": 2535118848.0,
+            "56": 2535118848.0,
+            "57": 2535118848.0,
+            "58": 2535118848.0,
+            "59": 2535118848.0,
+            "60": 2535118848.0,
+            "61": 2535118848.0,
+            "62": 2535118848.0,
+            "63": 2535118848.0,
+            "64": 2535118848.0,
+            "65": 2535118848.0,
+            "66": 2535118848.0,
+            "67": 2535118848.0,
+            "68": 2535118848.0,
+            "69": 2535118848.0,
+            "70": 2535118848.0,
+            "71": 2535118848.0,
+            "72": 2535118848.0,
+            "73": 2535118848.0,
+            "74": 2535118848.0,
+            "75": 2535118848.0,
+            "76": 2535118848.0,
+            "77": 2535118848.0,
+            "78": 2535118848.0,
+            "79": 2535118848.0,
+            "80": 2535118848.0,
+            "81": 2535118848.0,
+            "82": 2535118848.0,
+            "83": 2535118848.0,
+            "84": 2535118848.0,
+            "85": 2535118848.0,
+            "86": 2535118848.0,
+            "87": 2535118848.0,
+            "88": 2535118848.0,
+            "89": 2535118848.0,
+            "90": 2535118848.0,
+            "91": 2535118848.0,
+            "92": 2535118848.0,
+            "93": 2535118848.0,
+            "94": 2535118848.0,
+            "95": 2535118848.0,
+            "96": 2535118848.0,
+            "97": 2535118848.0,
+            "98": 2535118848.0,
+            "99": 2535118848.0,
+            "100": 2535118848.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 4.25367,
+            "52": 0.13205,
+            "53": 0.11484,
+            "54": 0.11811,
+            "55": 0.11596,
+            "56": 0.11581,
+            "57": 0.11498,
+            "58": 0.11563,
+            "59": 0.11477,
+            "60": 0.11575,
+            "61": 0.11498,
+            "62": 0.11551,
+            "63": 0.11663,
+            "64": 0.11428,
+            "65": 0.11448,
+            "66": 0.11417,
+            "67": 0.11362,
+            "68": 0.11442,
+            "69": 0.11406,
+            "70": 0.11487,
+            "71": 0.11375,
+            "72": 0.11459,
+            "73": 0.11365,
+            "74": 0.11414,
+            "75": 0.11435,
+            "76": 0.11545,
+            "77": 0.11362,
+            "78": 0.11443,
+            "79": 0.11286,
+            "80": 0.11385,
+            "81": 0.11272,
+            "82": 0.11354,
+            "83": 0.11294,
+            "84": 0.11396,
+            "85": 0.11272,
+            "86": 0.11396,
+            "87": 0.11339,
+            "88": 0.11475,
+            "89": 0.11779,
+            "90": 0.11386,
+            "91": 0.11507,
+            "92": 0.11404,
+            "93": 0.11335,
+            "94": 0.11449,
+            "95": 0.11323,
+            "96": 0.11451,
+            "97": 0.11365,
+            "98": 0.11398,
+            "99": 0.11453,
+            "100": 0.11417
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings/golden_values_dev_dgx_gb200.json
new file mode 100644
index 00000000000..5470d60dcdb
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings/golden_values_dev_dgx_gb200.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.77012,
+            "2": 10.78244,
+            "3": 10.77833,
+            "4": 10.75145,
+            "5": 10.80955,
+            "6": 10.8223,
+            "7": 10.80193,
+            "8": 10.78868,
+            "9": 10.79503,
+            "10": 10.71341,
+            "11": 10.85003,
+            "12": 10.80071,
+            "13": 10.8263,
+            "14": 10.84293,
+            "15": 10.7559,
+            "16": 10.75248,
+            "17": 10.70854,
+            "18": 10.74761,
+            "19": 10.74709,
+            "20": 10.64388,
+            "21": 10.60456,
+            "22": 10.43295,
+            "23": 10.66573,
+            "24": 10.50049,
+            "25": 10.43605,
+            "26": 10.51463,
+            "27": 10.54136,
+            "28": 10.51359,
+            "29": 10.53716,
+            "30": 10.25964,
+            "31": 9.97634,
+            "32": 10.39958,
+            "33": 10.38607,
+            "34": 10.11016,
+            "35": 10.1741,
+            "36": 10.11553,
+            "37": 10.26008,
+            "38": 10.07462,
+            "39": 10.32873,
+            "40": 9.96852,
+            "41": 10.05099,
+            "42": 10.12726,
+            "43": 9.70798,
+            "44": 9.83287,
+            "45": 9.70538,
+            "46": 9.7134,
+            "47": 10.05872,
+            "48": 9.74565,
+            "49": 9.40522,
+            "50": 9.80891,
+            "51": 9.76757,
+            "52": 9.64732,
+            "53": 9.995,
+            "54": 9.88603,
+            "55": 9.81763,
+            "56": 9.53914,
+            "57": 9.38192,
+            "58": 9.75896,
+            "59": 9.52106,
+            "60": 9.42443,
+            "61": 9.63665,
+            "62": 9.92974,
+            "63": 9.29595,
+            "64": 9.70631,
+            "65": 8.88066,
+            "66": 9.64072,
+            "67": 9.32146,
+            "68": 9.73692,
+            "69": 9.75346,
+            "70": 9.68289,
+            "71": 9.58117,
+            "72": 9.52491,
+            "73": 9.44094,
+            "74": 8.86077,
+            "75": 9.36671,
+            "76": 9.01691,
+            "77": 10.02224,
+            "78": 9.68354,
+            "79": 9.33325,
+            "80": 9.3582,
+            "81": 9.43786,
+            "82": 9.66102,
+            "83": 9.26223,
+            "84": 9.37189,
+            "85": 9.56652,
+            "86": 9.04493,
+            "87": 9.5575,
+            "88": 9.70541,
+            "89": 9.55092,
+            "90": 9.79196,
+            "91": 9.29173,
+            "92": 9.31225,
+            "93": 9.0433,
+            "94": 8.78683,
+            "95": 9.49525,
+            "96": 9.48391,
+            "97": 9.25966,
+            "98": 9.62611,
+            "99": 8.85031,
+            "100": 9.36043
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1660.0,
+            "2": 1892.0,
+            "3": 1805.0,
+            "4": 1861.0,
+            "5": 2134.0,
+            "6": 1964.0,
+            "7": 2077.0,
+            "8": 1755.0,
+            "9": 1942.0,
+            "10": 1516.0,
+            "11": 1981.0,
+            "12": 1962.0,
+            "13": 2092.0,
+            "14": 1940.0,
+            "15": 2030.0,
+            "16": 1975.0,
+            "17": 2081.0,
+            "18": 1925.0,
+            "19": 1890.0,
+            "20": 1806.0,
+            "21": 1992.0,
+            "22": 1833.0,
+            "23": 2082.0,
+            "24": 1806.0,
+            "25": 1834.0,
+            "26": 1935.0,
+            "27": 1987.0,
+            "28": 2157.0,
+            "29": 2045.0,
+            "30": 1959.0,
+            "31": 1733.0,
+            "32": 2011.0,
+            "33": 2149.0,
+            "34": 2014.0,
+            "35": 2131.0,
+            "36": 2027.0,
+            "37": 2337.0,
+            "38": 2210.0,
+            "39": 2454.0,
+            "40": 2335.0,
+            "41": 2379.0,
+            "42": 2359.0,
+            "43": 2101.0,
+            "44": 2280.0,
+            "45": 2138.0,
+            "46": 2297.0,
+            "47": 2454.0,
+            "48": 2586.0,
+            "49": 2213.0,
+            "50": 2414.0,
+            "51": 2613.0,
+            "52": 2647.0,
+            "53": 2908.0,
+            "54": 2580.0,
+            "55": 2486.0,
+            "56": 2687.0,
+            "57": 2577.0,
+            "58": 2824.0,
+            "59": 2720.0,
+            "60": 2410.0,
+            "61": 2744.0,
+            "62": 2536.0,
+            "63": 2652.0,
+            "64": 2918.0,
+            "65": 2742.0,
+            "66": 2927.0,
+            "67": 2920.0,
+            "68": 2652.0,
+            "69": 3019.0,
+            "70": 2996.0,
+            "71": 2835.0,
+            "72": 2664.0,
+            "73": 3211.0,
+            "74": 2311.0,
+            "75": 2658.0,
+            "76": 3155.0,
+            "77": 3051.0,
+            "78": 3073.0,
+            "79": 3116.0,
+            "80": 3191.0,
+            "81": 3237.0,
+            "82": 3218.0,
+            "83": 2689.0,
+            "84": 3294.0,
+            "85": 3209.0,
+            "86": 2558.0,
+            "87": 3462.0,
+            "88": 3287.0,
+            "89": 3201.0,
+            "90": 3331.0,
+            "91": 3183.0,
+            "92": 3201.0,
+            "93": 2942.0,
+            "94": 3274.0,
+            "95": 3132.0,
+            "96": 3200.0,
+            "97": 3054.0,
+            "98": 3544.0,
+            "99": 3387.0,
+            "100": 3192.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 917459968.0,
+            "2": 917459968.0,
+            "3": 917459968.0,
+            "4": 917459968.0,
+            "5": 917459968.0,
+            "6": 917459968.0,
+            "7": 917459968.0,
+            "8": 917459968.0,
+            "9": 917459968.0,
+            "10": 917459968.0,
+            "11": 917459968.0,
+            "12": 917459968.0,
+            "13": 917459968.0,
+            "14": 917459968.0,
+            "15": 917459968.0,
+            "16": 917459968.0,
+            "17": 917459968.0,
+            "18": 917459968.0,
+            "19": 917459968.0,
+            "20": 917459968.0,
+            "21": 917459968.0,
+            "22": 917459968.0,
+            "23": 917459968.0,
+            "24": 917459968.0,
+            "25": 917459968.0,
+            "26": 917459968.0,
+            "27": 917459968.0,
+            "28": 917459968.0,
+            "29": 917459968.0,
+            "30": 917459968.0,
+            "31": 917459968.0,
+            "32": 917459968.0,
+            "33": 917459968.0,
+            "34": 917459968.0,
+            "35": 917459968.0,
+            "36": 917459968.0,
+            "37": 917459968.0,
+            "38": 917459968.0,
+            "39": 917459968.0,
+            "40": 917459968.0,
+            "41": 917459968.0,
+            "42": 917459968.0,
+            "43": 917459968.0,
+            "44": 917459968.0,
+            "45": 917459968.0,
+            "46": 917459968.0,
+            "47": 917459968.0,
+            "48": 917459968.0,
+            "49": 917459968.0,
+            "50": 917459968.0,
+            "51": 917459968.0,
+            "52": 917459968.0,
+            "53": 917459968.0,
+            "54": 917459968.0,
+            "55": 917459968.0,
+            "56": 917459968.0,
+            "57": 917459968.0,
+            "58": 917459968.0,
+            "59": 917459968.0,
+            "60": 917459968.0,
+            "61": 917459968.0,
+            "62": 917459968.0,
+            "63": 917459968.0,
+            "64": 917459968.0,
+            "65": 917459968.0,
+            "66": 917459968.0,
+            "67": 917459968.0,
+            "68": 917459968.0,
+            "69": 917459968.0,
+            "70": 917459968.0,
+            "71": 917459968.0,
+            "72": 917459968.0,
+            "73": 917459968.0,
+            "74": 917459968.0,
+            "75": 917459968.0,
+            "76": 917459968.0,
+            "77": 917459968.0,
+            "78": 917459968.0,
+            "79": 917459968.0,
+            "80": 917459968.0,
+            "81": 917459968.0,
+            "82": 917459968.0,
+            "83": 917459968.0,
+            "84": 917459968.0,
+            "85": 917459968.0,
+            "86": 917459968.0,
+            "87": 917459968.0,
+            "88": 917459968.0,
+            "89": 917459968.0,
+            "90": 917459968.0,
+            "91": 917459968.0,
+            "92": 917459968.0,
+            "93": 917459968.0,
+            "94": 917459968.0,
+            "95": 917459968.0,
+            "96": 917459968.0,
+            "97": 917459968.0,
+            "98": 917459968.0,
+            "99": 917459968.0,
+            "100": 917459968.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2262889472.0,
+            "2": 2621306880.0,
+            "3": 2621306880.0,
+            "4": 2621306880.0,
+            "5": 2621306880.0,
+            "6": 2621306880.0,
+            "7": 2621306880.0,
+            "8": 2621306880.0,
+            "9": 2621306880.0,
+            "10": 2621306880.0,
+            "11": 2621306880.0,
+            "12": 2621306880.0,
+            "13": 2621306880.0,
+            "14": 2621306880.0,
+            "15": 2621306880.0,
+            "16": 2621306880.0,
+            "17": 2621306880.0,
+            "18": 2621306880.0,
+            "19": 2621306880.0,
+            "20": 2621306880.0,
+            "21": 2621306880.0,
+            "22": 2621306880.0,
+            "23": 2621306880.0,
+            "24": 2621306880.0,
+            "25": 2621306880.0,
+            "26": 2621306880.0,
+            "27": 2621306880.0,
+            "28": 2621306880.0,
+            "29": 2621306880.0,
+            "30": 2621306880.0,
+            "31": 2621306880.0,
+            "32": 2621306880.0,
+            "33": 2621306880.0,
+            "34": 2621306880.0,
+            "35": 2621306880.0,
+            "36": 2621306880.0,
+            "37": 2621306880.0,
+            "38": 2621306880.0,
+            "39": 2621306880.0,
+            "40": 2621306880.0,
+            "41": 2621306880.0,
+            "42": 2621306880.0,
+            "43": 2621306880.0,
+            "44": 2621306880.0,
+            "45": 2621306880.0,
+            "46": 2621306880.0,
+            "47": 2621306880.0,
+            "48": 2621306880.0,
+            "49": 2621306880.0,
+            "50": 2621306880.0,
+            "51": 2621306880.0,
+            "52": 2621306880.0,
+            "53": 2621306880.0,
+            "54": 2621306880.0,
+            "55": 2621306880.0,
+            "56": 2621306880.0,
+            "57": 2621306880.0,
+            "58": 2621306880.0,
+            "59": 2621306880.0,
+            "60": 2621306880.0,
+            "61": 2621306880.0,
+            "62": 2621306880.0,
+            "63": 2621306880.0,
+            "64": 2621306880.0,
+            "65": 2621306880.0,
+            "66": 2621306880.0,
+            "67": 2621306880.0,
+            "68": 2621306880.0,
+            "69": 2621306880.0,
+            "70": 2621306880.0,
+            "71": 2621306880.0,
+            "72": 2621306880.0,
+            "73": 2621306880.0,
+            "74": 2621306880.0,
+            "75": 2621306880.0,
+            "76": 2621306880.0,
+            "77": 2621306880.0,
+            "78": 2621306880.0,
+            "79": 2621306880.0,
+            "80": 2621306880.0,
+            "81": 2621306880.0,
+            "82": 2621306880.0,
+            "83": 2621306880.0,
+            "84": 2621306880.0,
+            "85": 2621306880.0,
+            "86": 2621306880.0,
+            "87": 2621306880.0,
+            "88": 2621306880.0,
+            "89": 2621306880.0,
+            "90": 2621306880.0,
+            "91": 2621306880.0,
+            "92": 2621306880.0,
+            "93": 2621306880.0,
+            "94": 2621306880.0,
+            "95": 2621306880.0,
+            "96": 2621306880.0,
+            "97": 2621306880.0,
+            "98": 2621306880.0,
+            "99": 2621306880.0,
+            "100": 2621306880.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": 3.09607,
+            "3": 0.15089,
+            "4": 0.16387,
+            "5": 0.13417,
+            "6": 0.12738,
+            "7": 0.12788,
+            "8": 0.132,
+            "9": 0.28261,
+            "10": 0.12697,
+            "11": 0.13182,
+            "12": 0.13355,
+            "13": 0.13045,
+            "14": 0.13241,
+            "15": 0.1311,
+            "16": 0.13108,
+            "17": 0.13531,
+            "18": 0.13102,
+            "19": 0.13307,
+            "20": 0.13285,
+            "21": 0.13577,
+            "22": 0.13581,
+            "23": 0.13315,
+            "24": 0.13227,
+            "25": 0.13286,
+            "26": 0.13451,
+            "27": 0.13303,
+            "28": 0.13253,
+            "29": 0.29925,
+            "30": 0.13379,
+            "31": 0.13315,
+            "32": 0.13374,
+            "33": 0.13205,
+            "34": 0.13144,
+            "35": 0.13199,
+            "36": 0.13191,
+            "37": 0.13367,
+            "38": 0.13204,
+            "39": 0.13375,
+            "40": 0.1347,
+            "41": 0.13056,
+            "42": 0.13244,
+            "43": 0.13361,
+            "44": 0.13216,
+            "45": 0.13279,
+            "46": 0.12873,
+            "47": 0.13055,
+            "48": 0.13334,
+            "49": 0.1341,
+            "50": 0.13588,
+            "51": 0.1385,
+            "52": 0.12954,
+            "53": 0.1309,
+            "54": 0.12956,
+            "55": 0.12942,
+            "56": 0.12835,
+            "57": 0.13126,
+            "58": 0.13085,
+            "59": 0.17194,
+            "60": 0.12864,
+            "61": 0.13121,
+            "62": 0.13254,
+            "63": 0.17379,
+            "64": 0.1288,
+            "65": 0.13106,
+            "66": 0.13033,
+            "67": 0.13051,
+            "68": 0.12867,
+            "69": 0.13001,
+            "70": 0.12842,
+            "71": 0.13086,
+            "72": 0.13042,
+            "73": 0.13305,
+            "74": 0.13253,
+            "75": 0.13136,
+            "76": 0.13325,
+            "77": 0.13253,
+            "78": 0.13157,
+            "79": 0.13256,
+            "80": 0.13095,
+            "81": 0.13101,
+            "82": 0.13389,
+            "83": 0.13228,
+            "84": 0.13283,
+            "85": 0.13274,
+            "86": 0.13308,
+            "87": 0.13089,
+            "88": 0.13159,
+            "89": 0.13218,
+            "90": 0.13253,
+            "91": 0.13284,
+            "92": 0.13376,
+            "93": 0.13307,
+            "94": 0.13459,
+            "95": 0.13415,
+            "96": 0.13629,
+            "97": 0.13635,
+            "98": 0.1381,
+            "99": 0.13441,
+            "100": 0.1359
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings/golden_values_dev_dgx_h100.json
index b0474f2f8ec..1f743e8c2e8 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings/golden_values_dev_dgx_h100.json
@@ -218,106 +218,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 921653248.0,
-            "2": 921653248.0,
-            "3": 921653248.0,
-            "4": 921653248.0,
-            "5": 921653248.0,
-            "6": 921653248.0,
-            "7": 921653248.0,
-            "8": 921653248.0,
-            "9": 921653248.0,
-            "10": 921653248.0,
-            "11": 921653248.0,
-            "12": 921653248.0,
-            "13": 921653248.0,
-            "14": 921653248.0,
-            "15": 921653248.0,
-            "16": 921653248.0,
-            "17": 921653248.0,
-            "18": 921653248.0,
-            "19": 921653248.0,
-            "20": 921653248.0,
-            "21": 921653248.0,
-            "22": 921653248.0,
-            "23": 921653248.0,
-            "24": 921653248.0,
-            "25": 921653248.0,
-            "26": 921653248.0,
-            "27": 921653248.0,
-            "28": 921653248.0,
-            "29": 921653248.0,
-            "30": 921653248.0,
-            "31": 921653248.0,
-            "32": 921653248.0,
-            "33": 921653248.0,
-            "34": 921653248.0,
-            "35": 921653248.0,
-            "36": 921653248.0,
-            "37": 921653248.0,
-            "38": 921653248.0,
-            "39": 921653248.0,
-            "40": 921653248.0,
-            "41": 921653248.0,
-            "42": 921653248.0,
-            "43": 921653248.0,
-            "44": 921653248.0,
-            "45": 921653248.0,
-            "46": 921653248.0,
-            "47": 921653248.0,
-            "48": 921653248.0,
-            "49": 921653248.0,
-            "50": 921653248.0,
-            "51": 921653248.0,
-            "52": 921653248.0,
-            "53": 921653248.0,
-            "54": 921653248.0,
-            "55": 921653248.0,
-            "56": 921653248.0,
-            "57": 921653248.0,
-            "58": 921653248.0,
-            "59": 921653248.0,
-            "60": 921653248.0,
-            "61": 921653248.0,
-            "62": 921653248.0,
-            "63": 921653248.0,
-            "64": 921653248.0,
-            "65": 921653248.0,
-            "66": 921653248.0,
-            "67": 921653248.0,
-            "68": 921653248.0,
-            "69": 921653248.0,
-            "70": 921653248.0,
-            "71": 921653248.0,
-            "72": 921653248.0,
-            "73": 921653248.0,
-            "74": 921653248.0,
-            "75": 921653248.0,
-            "76": 921653248.0,
-            "77": 921653248.0,
-            "78": 921653248.0,
-            "79": 921653248.0,
-            "80": 921653248.0,
-            "81": 921653248.0,
-            "82": 921653248.0,
-            "83": 921653248.0,
-            "84": 921653248.0,
-            "85": 921653248.0,
-            "86": 921653248.0,
-            "87": 921653248.0,
-            "88": 921653248.0,
-            "89": 921653248.0,
-            "90": 921653248.0,
-            "91": 921653248.0,
-            "92": 921653248.0,
-            "93": 921653248.0,
-            "94": 921653248.0,
-            "95": 921653248.0,
-            "96": 921653248.0,
-            "97": 921653248.0,
-            "98": 921653248.0,
-            "99": 921653248.0,
-            "100": 921653248.0
+            "1": 917459968.0,
+            "2": 917459968.0,
+            "3": 917459968.0,
+            "4": 917459968.0,
+            "5": 917459968.0,
+            "6": 917459968.0,
+            "7": 917459968.0,
+            "8": 917459968.0,
+            "9": 917459968.0,
+            "10": 917459968.0,
+            "11": 917459968.0,
+            "12": 917459968.0,
+            "13": 917459968.0,
+            "14": 917459968.0,
+            "15": 917459968.0,
+            "16": 917459968.0,
+            "17": 917459968.0,
+            "18": 917459968.0,
+            "19": 917459968.0,
+            "20": 917459968.0,
+            "21": 917459968.0,
+            "22": 917459968.0,
+            "23": 917459968.0,
+            "24": 917459968.0,
+            "25": 917459968.0,
+            "26": 917459968.0,
+            "27": 917459968.0,
+            "28": 917459968.0,
+            "29": 917459968.0,
+            "30": 917459968.0,
+            "31": 917459968.0,
+            "32": 917459968.0,
+            "33": 917459968.0,
+            "34": 917459968.0,
+            "35": 917459968.0,
+            "36": 917459968.0,
+            "37": 917459968.0,
+            "38": 917459968.0,
+            "39": 917459968.0,
+            "40": 917459968.0,
+            "41": 917459968.0,
+            "42": 917459968.0,
+            "43": 917459968.0,
+            "44": 917459968.0,
+            "45": 917459968.0,
+            "46": 917459968.0,
+            "47": 917459968.0,
+            "48": 917459968.0,
+            "49": 917459968.0,
+            "50": 917459968.0,
+            "51": 917459968.0,
+            "52": 917459968.0,
+            "53": 917459968.0,
+            "54": 917459968.0,
+            "55": 917459968.0,
+            "56": 917459968.0,
+            "57": 917459968.0,
+            "58": 917459968.0,
+            "59": 917459968.0,
+            "60": 917459968.0,
+            "61": 917459968.0,
+            "62": 917459968.0,
+            "63": 917459968.0,
+            "64": 917459968.0,
+            "65": 917459968.0,
+            "66": 917459968.0,
+            "67": 917459968.0,
+            "68": 917459968.0,
+            "69": 917459968.0,
+            "70": 917459968.0,
+            "71": 917459968.0,
+            "72": 917459968.0,
+            "73": 917459968.0,
+            "74": 917459968.0,
+            "75": 917459968.0,
+            "76": 917459968.0,
+            "77": 917459968.0,
+            "78": 917459968.0,
+            "79": 917459968.0,
+            "80": 917459968.0,
+            "81": 917459968.0,
+            "82": 917459968.0,
+            "83": 917459968.0,
+            "84": 917459968.0,
+            "85": 917459968.0,
+            "86": 917459968.0,
+            "87": 917459968.0,
+            "88": 917459968.0,
+            "89": 917459968.0,
+            "90": 917459968.0,
+            "91": 917459968.0,
+            "92": 917459968.0,
+            "93": 917459968.0,
+            "94": 917459968.0,
+            "95": 917459968.0,
+            "96": 917459968.0,
+            "97": 917459968.0,
+            "98": 917459968.0,
+            "99": 917459968.0,
+            "100": 917459968.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -325,106 +325,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 2237722624.0,
-            "2": 2600334336.0,
-            "3": 2600334336.0,
-            "4": 2600334336.0,
-            "5": 2600334336.0,
-            "6": 2600334336.0,
-            "7": 2600334336.0,
-            "8": 2600334336.0,
-            "9": 2600334336.0,
-            "10": 2600334336.0,
-            "11": 2600334336.0,
-            "12": 2600334336.0,
-            "13": 2600334336.0,
-            "14": 2600334336.0,
-            "15": 2600334336.0,
-            "16": 2600334336.0,
-            "17": 2600334336.0,
-            "18": 2600334336.0,
-            "19": 2600334336.0,
-            "20": 2600334336.0,
-            "21": 2600334336.0,
-            "22": 2600334336.0,
-            "23": 2600334336.0,
-            "24": 2600334336.0,
-            "25": 2600334336.0,
-            "26": 2600334336.0,
-            "27": 2600334336.0,
-            "28": 2600334336.0,
-            "29": 2600334336.0,
-            "30": 2600334336.0,
-            "31": 2600334336.0,
-            "32": 2600334336.0,
-            "33": 2600334336.0,
-            "34": 2600334336.0,
-            "35": 2600334336.0,
-            "36": 2600334336.0,
-            "37": 2600334336.0,
-            "38": 2600334336.0,
-            "39": 2600334336.0,
-            "40": 2600334336.0,
-            "41": 2600334336.0,
-            "42": 2600334336.0,
-            "43": 2600334336.0,
-            "44": 2600334336.0,
-            "45": 2600334336.0,
-            "46": 2600334336.0,
-            "47": 2600334336.0,
-            "48": 2600334336.0,
-            "49": 2600334336.0,
-            "50": 2600334336.0,
-            "51": 2600334336.0,
-            "52": 2600334336.0,
-            "53": 2600334336.0,
-            "54": 2600334336.0,
-            "55": 2600334336.0,
-            "56": 2600334336.0,
-            "57": 2600334336.0,
-            "58": 2600334336.0,
-            "59": 2600334336.0,
-            "60": 2600334336.0,
-            "61": 2600334336.0,
-            "62": 2600334336.0,
-            "63": 2600334336.0,
-            "64": 2600334336.0,
-            "65": 2600334336.0,
-            "66": 2600334336.0,
-            "67": 2600334336.0,
-            "68": 2600334336.0,
-            "69": 2600334336.0,
-            "70": 2600334336.0,
-            "71": 2600334336.0,
-            "72": 2600334336.0,
-            "73": 2600334336.0,
-            "74": 2600334336.0,
-            "75": 2600334336.0,
-            "76": 2600334336.0,
-            "77": 2600334336.0,
-            "78": 2600334336.0,
-            "79": 2600334336.0,
-            "80": 2600334336.0,
-            "81": 2600334336.0,
-            "82": 2600334336.0,
-            "83": 2600334336.0,
-            "84": 2600334336.0,
-            "85": 2600334336.0,
-            "86": 2600334336.0,
-            "87": 2600334336.0,
-            "88": 2600334336.0,
-            "89": 2600334336.0,
-            "90": 2600334336.0,
-            "91": 2600334336.0,
-            "92": 2600334336.0,
-            "93": 2600334336.0,
-            "94": 2600334336.0,
-            "95": 2600334336.0,
-            "96": 2600334336.0,
-            "97": 2600334336.0,
-            "98": 2600334336.0,
-            "99": 2600334336.0,
-            "100": 2600334336.0
+            "1": 2236675072.0,
+            "2": 2596141056.0,
+            "3": 2596141056.0,
+            "4": 2596141056.0,
+            "5": 2596141056.0,
+            "6": 2596141056.0,
+            "7": 2596141056.0,
+            "8": 2596141056.0,
+            "9": 2596141056.0,
+            "10": 2596141056.0,
+            "11": 2596141056.0,
+            "12": 2596141056.0,
+            "13": 2596141056.0,
+            "14": 2596141056.0,
+            "15": 2596141056.0,
+            "16": 2596141056.0,
+            "17": 2596141056.0,
+            "18": 2596141056.0,
+            "19": 2596141056.0,
+            "20": 2596141056.0,
+            "21": 2596141056.0,
+            "22": 2596141056.0,
+            "23": 2596141056.0,
+            "24": 2596141056.0,
+            "25": 2596141056.0,
+            "26": 2596141056.0,
+            "27": 2596141056.0,
+            "28": 2596141056.0,
+            "29": 2596141056.0,
+            "30": 2596141056.0,
+            "31": 2596141056.0,
+            "32": 2596141056.0,
+            "33": 2596141056.0,
+            "34": 2596141056.0,
+            "35": 2596141056.0,
+            "36": 2596141056.0,
+            "37": 2596141056.0,
+            "38": 2596141056.0,
+            "39": 2596141056.0,
+            "40": 2596141056.0,
+            "41": 2596141056.0,
+            "42": 2596141056.0,
+            "43": 2596141056.0,
+            "44": 2596141056.0,
+            "45": 2596141056.0,
+            "46": 2596141056.0,
+            "47": 2596141056.0,
+            "48": 2596141056.0,
+            "49": 2596141056.0,
+            "50": 2596141056.0,
+            "51": 2596141056.0,
+            "52": 2596141056.0,
+            "53": 2596141056.0,
+            "54": 2596141056.0,
+            "55": 2596141056.0,
+            "56": 2596141056.0,
+            "57": 2596141056.0,
+            "58": 2596141056.0,
+            "59": 2596141056.0,
+            "60": 2596141056.0,
+            "61": 2596141056.0,
+            "62": 2596141056.0,
+            "63": 2596141056.0,
+            "64": 2596141056.0,
+            "65": 2596141056.0,
+            "66": 2596141056.0,
+            "67": 2596141056.0,
+            "68": 2596141056.0,
+            "69": 2596141056.0,
+            "70": 2596141056.0,
+            "71": 2596141056.0,
+            "72": 2596141056.0,
+            "73": 2596141056.0,
+            "74": 2596141056.0,
+            "75": 2596141056.0,
+            "76": 2596141056.0,
+            "77": 2596141056.0,
+            "78": 2596141056.0,
+            "79": 2596141056.0,
+            "80": 2596141056.0,
+            "81": 2596141056.0,
+            "82": 2596141056.0,
+            "83": 2596141056.0,
+            "84": 2596141056.0,
+            "85": 2596141056.0,
+            "86": 2596141056.0,
+            "87": 2596141056.0,
+            "88": 2596141056.0,
+            "89": 2596141056.0,
+            "90": 2596141056.0,
+            "91": 2596141056.0,
+            "92": 2596141056.0,
+            "93": 2596141056.0,
+            "94": 2596141056.0,
+            "95": 2596141056.0,
+            "96": 2596141056.0,
+            "97": 2596141056.0,
+            "98": 2596141056.0,
+            "99": 2596141056.0,
+            "100": 2596141056.0
         }
     },
     "iteration-time": {
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 10.39748,
-            "2": 0.11699,
-            "3": 0.10324,
-            "4": 0.10602,
-            "5": 0.10273,
-            "6": 0.10169,
-            "7": 0.10402,
-            "8": 0.10582,
-            "9": 0.10893,
-            "10": 0.10156,
-            "11": 0.10006,
-            "12": 0.10034,
-            "13": 0.10111,
-            "14": 0.10835,
-            "15": 0.10198,
-            "16": 0.10295,
-            "17": 0.10379,
-            "18": 0.10096,
-            "19": 0.10678,
-            "20": 0.10208,
-            "21": 0.10213,
-            "22": 0.10179,
-            "23": 0.10357,
-            "24": 0.10282,
-            "25": 0.09979,
-            "26": 0.10143,
-            "27": 0.10197,
-            "28": 0.10127,
-            "29": 0.10116,
-            "30": 0.10243,
-            "31": 0.10107,
-            "32": 0.10147,
-            "33": 0.10181,
-            "34": 0.1038,
-            "35": 0.10095,
-            "36": 0.09889,
-            "37": 0.09992,
-            "38": 0.10001,
-            "39": 0.10006,
-            "40": 0.10004,
-            "41": 0.09886,
-            "42": 0.09836,
-            "43": 0.09974,
-            "44": 0.10016,
-            "45": 0.10004,
-            "46": 0.09945,
-            "47": 0.0989,
-            "48": 0.09882,
-            "49": 0.09906,
-            "50": 0.09893,
-            "51": 0.10108,
-            "52": 0.10571,
-            "53": 0.10114,
-            "54": 0.09935,
-            "55": 0.09893,
-            "56": 0.09871,
-            "57": 0.10568,
-            "58": 0.09952,
-            "59": 0.10185,
-            "60": 0.09937,
-            "61": 0.09902,
-            "62": 0.10469,
-            "63": 0.10029,
-            "64": 0.09881,
-            "65": 0.09927,
-            "66": 0.09932,
-            "67": 0.10538,
-            "68": 0.09988,
-            "69": 0.10144,
-            "70": 0.09918,
-            "71": 0.10686,
-            "72": 0.09922,
-            "73": 0.09936,
-            "74": 0.09915,
-            "75": 0.09862,
-            "76": 0.1068,
-            "77": 0.09885,
-            "78": 0.09998,
-            "79": 0.1002,
-            "80": 0.09911,
-            "81": 0.10038,
-            "82": 0.09931,
-            "83": 0.09871,
-            "84": 0.09987,
-            "85": 0.09983,
-            "86": 0.10014,
-            "87": 0.0994,
-            "88": 0.09924,
-            "89": 0.10058,
-            "90": 0.10033,
-            "91": 0.10009,
-            "92": 0.10037,
-            "93": 0.09877,
-            "94": 0.09968,
-            "95": 0.10011,
-            "96": 0.09929,
-            "97": 0.09969,
-            "98": 0.09929,
-            "99": 0.10037,
-            "100": 0.10155
+            "1": 7.66848,
+            "2": 0.11896,
+            "3": 0.09977,
+            "4": 0.07967,
+            "5": 0.07964,
+            "6": 0.07997,
+            "7": 0.08012,
+            "8": 0.07951,
+            "9": 0.08093,
+            "10": 0.07978,
+            "11": 0.07959,
+            "12": 0.0801,
+            "13": 0.08014,
+            "14": 0.08001,
+            "15": 0.08005,
+            "16": 0.0803,
+            "17": 0.0801,
+            "18": 0.07861,
+            "19": 0.07885,
+            "20": 0.07921,
+            "21": 0.07891,
+            "22": 0.07852,
+            "23": 0.07915,
+            "24": 0.07938,
+            "25": 0.08,
+            "26": 0.0813,
+            "27": 0.07978,
+            "28": 0.07899,
+            "29": 0.0798,
+            "30": 0.08028,
+            "31": 0.07891,
+            "32": 0.07911,
+            "33": 0.07848,
+            "34": 0.07925,
+            "35": 0.07821,
+            "36": 0.07899,
+            "37": 0.07887,
+            "38": 0.07866,
+            "39": 0.07853,
+            "40": 0.08169,
+            "41": 0.07849,
+            "42": 0.07836,
+            "43": 0.0786,
+            "44": 0.07878,
+            "45": 0.07828,
+            "46": 0.07805,
+            "47": 0.07784,
+            "48": 0.07807,
+            "49": 0.0787,
+            "50": 0.0789,
+            "51": 0.09689,
+            "52": 0.08417,
+            "53": 0.08482,
+            "54": 0.08198,
+            "55": 0.07942,
+            "56": 0.07871,
+            "57": 0.07976,
+            "58": 0.07956,
+            "59": 0.08,
+            "60": 0.0792,
+            "61": 0.07836,
+            "62": 0.07989,
+            "63": 0.0809,
+            "64": 0.08148,
+            "65": 0.08043,
+            "66": 0.07986,
+            "67": 0.08023,
+            "68": 0.07899,
+            "69": 0.07929,
+            "70": 0.08168,
+            "71": 0.08127,
+            "72": 0.0786,
+            "73": 0.07921,
+            "74": 0.07909,
+            "75": 0.0791,
+            "76": 0.07958,
+            "77": 0.07852,
+            "78": 0.07999,
+            "79": 0.07999,
+            "80": 0.08194,
+            "81": 0.07923,
+            "82": 0.07928,
+            "83": 0.07876,
+            "84": 0.07871,
+            "85": 0.08021,
+            "86": 0.07922,
+            "87": 0.07979,
+            "88": 0.0797,
+            "89": 0.08029,
+            "90": 0.15516,
+            "91": 0.11731,
+            "92": 0.11011,
+            "93": 0.14646,
+            "94": 0.08003,
+            "95": 0.08107,
+            "96": 0.07984,
+            "97": 0.07889,
+            "98": 0.07881,
+            "99": 0.07894,
+            "100": 0.07813
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings/golden_values_dev_dgx_h100_2nd.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings/golden_values_dev_dgx_h100_2nd.json
new file mode 100644
index 00000000000..01ab2714529
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings/golden_values_dev_dgx_h100_2nd.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 9.80381,
+            "52": 9.68202,
+            "53": 10.02345,
+            "54": 9.91634,
+            "55": 9.82456,
+            "56": 9.56974,
+            "57": 9.42672,
+            "58": 9.78081,
+            "59": 9.53243,
+            "60": 9.44593,
+            "61": 9.64254,
+            "62": 9.94293,
+            "63": 9.31764,
+            "64": 9.72548,
+            "65": 8.88739,
+            "66": 9.65691,
+            "67": 9.31749,
+            "68": 9.73495,
+            "69": 9.74866,
+            "70": 9.69625,
+            "71": 9.57689,
+            "72": 9.52422,
+            "73": 9.45595,
+            "74": 8.88269,
+            "75": 9.37584,
+            "76": 9.01136,
+            "77": 10.02287,
+            "78": 9.67963,
+            "79": 9.33172,
+            "80": 9.35826,
+            "81": 9.43394,
+            "82": 9.65054,
+            "83": 9.25503,
+            "84": 9.3714,
+            "85": 9.5623,
+            "86": 9.03489,
+            "87": 9.54614,
+            "88": 9.69785,
+            "89": 9.54656,
+            "90": 9.77624,
+            "91": 9.2884,
+            "92": 9.30662,
+            "93": 9.02647,
+            "94": 8.78837,
+            "95": 9.48027,
+            "96": 9.47974,
+            "97": 9.25611,
+            "98": 9.61949,
+            "99": 8.83824,
+            "100": 9.35135
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 2514.0,
+            "52": 2513.0,
+            "53": 2894.0,
+            "54": 2656.0,
+            "55": 2348.0,
+            "56": 2506.0,
+            "57": 2501.0,
+            "58": 2770.0,
+            "59": 2681.0,
+            "60": 2434.0,
+            "61": 2776.0,
+            "62": 2596.0,
+            "63": 2617.0,
+            "64": 3012.0,
+            "65": 2657.0,
+            "66": 2947.0,
+            "67": 3089.0,
+            "68": 2818.0,
+            "69": 2909.0,
+            "70": 3025.0,
+            "71": 2924.0,
+            "72": 2702.0,
+            "73": 2947.0,
+            "74": 2306.0,
+            "75": 2791.0,
+            "76": 3093.0,
+            "77": 3107.0,
+            "78": 3134.0,
+            "79": 3205.0,
+            "80": 3123.0,
+            "81": 3290.0,
+            "82": 3172.0,
+            "83": 2719.0,
+            "84": 3328.0,
+            "85": 3255.0,
+            "86": 2546.0,
+            "87": 3472.0,
+            "88": 3068.0,
+            "89": 2953.0,
+            "90": 3300.0,
+            "91": 3154.0,
+            "92": 3061.0,
+            "93": 2889.0,
+            "94": 3535.0,
+            "95": 3078.0,
+            "96": 3181.0,
+            "97": 3135.0,
+            "98": 3569.0,
+            "99": 3319.0,
+            "100": 3223.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 917459968.0,
+            "52": 917459968.0,
+            "53": 917459968.0,
+            "54": 917459968.0,
+            "55": 917459968.0,
+            "56": 917459968.0,
+            "57": 917459968.0,
+            "58": 917459968.0,
+            "59": 917459968.0,
+            "60": 917459968.0,
+            "61": 917459968.0,
+            "62": 917459968.0,
+            "63": 917459968.0,
+            "64": 917459968.0,
+            "65": 917459968.0,
+            "66": 917459968.0,
+            "67": 917459968.0,
+            "68": 917459968.0,
+            "69": 917459968.0,
+            "70": 917459968.0,
+            "71": 917459968.0,
+            "72": 917459968.0,
+            "73": 917459968.0,
+            "74": 917459968.0,
+            "75": 917459968.0,
+            "76": 917459968.0,
+            "77": 917459968.0,
+            "78": 917459968.0,
+            "79": 917459968.0,
+            "80": 917459968.0,
+            "81": 917459968.0,
+            "82": 917459968.0,
+            "83": 917459968.0,
+            "84": 917459968.0,
+            "85": 917459968.0,
+            "86": 917459968.0,
+            "87": 917459968.0,
+            "88": 917459968.0,
+            "89": 917459968.0,
+            "90": 917459968.0,
+            "91": 917459968.0,
+            "92": 917459968.0,
+            "93": 917459968.0,
+            "94": 917459968.0,
+            "95": 917459968.0,
+            "96": 917459968.0,
+            "97": 917459968.0,
+            "98": 917459968.0,
+            "99": 917459968.0,
+            "100": 917459968.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 2596140032.0,
+            "52": 2596141056.0,
+            "53": 2596141056.0,
+            "54": 2596141056.0,
+            "55": 2596141056.0,
+            "56": 2596141056.0,
+            "57": 2596141056.0,
+            "58": 2596141056.0,
+            "59": 2596141056.0,
+            "60": 2596141056.0,
+            "61": 2596141056.0,
+            "62": 2596141056.0,
+            "63": 2596141056.0,
+            "64": 2596141056.0,
+            "65": 2596141056.0,
+            "66": 2596141056.0,
+            "67": 2596141056.0,
+            "68": 2596141056.0,
+            "69": 2596141056.0,
+            "70": 2596141056.0,
+            "71": 2596141056.0,
+            "72": 2596141056.0,
+            "73": 2596141056.0,
+            "74": 2596141056.0,
+            "75": 2596141056.0,
+            "76": 2596141056.0,
+            "77": 2596141056.0,
+            "78": 2596141056.0,
+            "79": 2596141056.0,
+            "80": 2596141056.0,
+            "81": 2596141056.0,
+            "82": 2596141056.0,
+            "83": 2596141056.0,
+            "84": 2596141056.0,
+            "85": 2596141056.0,
+            "86": 2596141056.0,
+            "87": 2596141056.0,
+            "88": 2596141056.0,
+            "89": 2596141056.0,
+            "90": 2596141056.0,
+            "91": 2596141056.0,
+            "92": 2596141056.0,
+            "93": 2596141056.0,
+            "94": 2596141056.0,
+            "95": 2596141056.0,
+            "96": 2596141056.0,
+            "97": 2596141056.0,
+            "98": 2596141056.0,
+            "99": 2596141056.0,
+            "100": 2596141056.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 7.16514,
+            "52": 0.11315,
+            "53": 0.08114,
+            "54": 0.08317,
+            "55": 0.08019,
+            "56": 0.08314,
+            "57": 0.08621,
+            "58": 0.08016,
+            "59": 0.07921,
+            "60": 0.08005,
+            "61": 0.08103,
+            "62": 0.10234,
+            "63": 0.1001,
+            "64": 0.4876,
+            "65": 0.08127,
+            "66": 0.079,
+            "67": 0.07859,
+            "68": 0.08085,
+            "69": 0.07943,
+            "70": 0.07842,
+            "71": 0.07899,
+            "72": 0.07958,
+            "73": 0.07925,
+            "74": 0.08017,
+            "75": 0.07902,
+            "76": 0.08039,
+            "77": 0.07802,
+            "78": 0.07857,
+            "79": 0.07907,
+            "80": 0.07806,
+            "81": 0.07858,
+            "82": 0.08046,
+            "83": 0.07775,
+            "84": 0.07777,
+            "85": 0.07752,
+            "86": 0.07844,
+            "87": 0.07834,
+            "88": 0.07837,
+            "89": 0.07893,
+            "90": 0.07826,
+            "91": 0.07839,
+            "92": 0.07815,
+            "93": 0.07767,
+            "94": 0.0784,
+            "95": 0.07785,
+            "96": 0.07909,
+            "97": 0.07789,
+            "98": 0.0771,
+            "99": 0.07799,
+            "100": 0.08104
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_dev_dgx_gb200.json
new file mode 100644
index 00000000000..13a8f84be7c
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_dev_dgx_gb200.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.7692,
+            "2": 10.78173,
+            "3": 10.77785,
+            "4": 10.75155,
+            "5": 10.80909,
+            "6": 10.8218,
+            "7": 10.80242,
+            "8": 10.78781,
+            "9": 10.7948,
+            "10": 10.713,
+            "11": 10.85088,
+            "12": 10.80067,
+            "13": 10.82614,
+            "14": 10.84338,
+            "15": 10.75514,
+            "16": 10.75194,
+            "17": 10.70801,
+            "18": 10.74736,
+            "19": 10.74723,
+            "20": 10.64347,
+            "21": 10.60434,
+            "22": 10.43223,
+            "23": 10.66534,
+            "24": 10.50025,
+            "25": 10.43523,
+            "26": 10.51418,
+            "27": 10.5415,
+            "28": 10.51383,
+            "29": 10.53731,
+            "30": 10.25937,
+            "31": 9.97666,
+            "32": 10.39972,
+            "33": 10.38587,
+            "34": 10.11012,
+            "35": 10.17419,
+            "36": 10.11601,
+            "37": 10.26042,
+            "38": 10.0751,
+            "39": 10.32912,
+            "40": 9.9687,
+            "41": 10.05131,
+            "42": 10.12745,
+            "43": 9.70822,
+            "44": 9.83332,
+            "45": 9.70556,
+            "46": 9.7136,
+            "47": 10.05915,
+            "48": 9.7456,
+            "49": 9.40552,
+            "50": 9.80892,
+            "51": 9.76773,
+            "52": 9.64757,
+            "53": 9.99521,
+            "54": 9.88624,
+            "55": 9.81783,
+            "56": 9.53944,
+            "57": 9.38198,
+            "58": 9.75913,
+            "59": 9.52125,
+            "60": 9.42463,
+            "61": 9.63669,
+            "62": 9.93001,
+            "63": 9.29627,
+            "64": 9.70638,
+            "65": 8.88076,
+            "66": 9.64079,
+            "67": 9.32154,
+            "68": 9.737,
+            "69": 9.75369,
+            "70": 9.68294,
+            "71": 9.58129,
+            "72": 9.52492,
+            "73": 9.44113,
+            "74": 8.86077,
+            "75": 9.3667,
+            "76": 9.01682,
+            "77": 10.0224,
+            "78": 9.68369,
+            "79": 9.33323,
+            "80": 9.35819,
+            "81": 9.43805,
+            "82": 9.66108,
+            "83": 9.26227,
+            "84": 9.37195,
+            "85": 9.56661,
+            "86": 9.04515,
+            "87": 9.55767,
+            "88": 9.70545,
+            "89": 9.55104,
+            "90": 9.79186,
+            "91": 9.29174,
+            "92": 9.31247,
+            "93": 9.04313,
+            "94": 8.7869,
+            "95": 9.49543,
+            "96": 9.48418,
+            "97": 9.25973,
+            "98": 9.62635,
+            "99": 8.85054,
+            "100": 9.36076
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1750.0,
+            "2": 1874.0,
+            "3": 1769.0,
+            "4": 1936.0,
+            "5": 2122.0,
+            "6": 2095.0,
+            "7": 2027.0,
+            "8": 1845.0,
+            "9": 2127.0,
+            "10": 1456.0,
+            "11": 1996.0,
+            "12": 1715.0,
+            "13": 2108.0,
+            "14": 1919.0,
+            "15": 2047.0,
+            "16": 1932.0,
+            "17": 2016.0,
+            "18": 1872.0,
+            "19": 1921.0,
+            "20": 1768.0,
+            "21": 1953.0,
+            "22": 1836.0,
+            "23": 2100.0,
+            "24": 1817.0,
+            "25": 1809.0,
+            "26": 1841.0,
+            "27": 2005.0,
+            "28": 2109.0,
+            "29": 2055.0,
+            "30": 1949.0,
+            "31": 1736.0,
+            "32": 2070.0,
+            "33": 2162.0,
+            "34": 1964.0,
+            "35": 2007.0,
+            "36": 2021.0,
+            "37": 2425.0,
+            "38": 2329.0,
+            "39": 2430.0,
+            "40": 2340.0,
+            "41": 2324.0,
+            "42": 2289.0,
+            "43": 2097.0,
+            "44": 2349.0,
+            "45": 2282.0,
+            "46": 2442.0,
+            "47": 2459.0,
+            "48": 2414.0,
+            "49": 2282.0,
+            "50": 2385.0,
+            "51": 2647.0,
+            "52": 2648.0,
+            "53": 2878.0,
+            "54": 2654.0,
+            "55": 2580.0,
+            "56": 2658.0,
+            "57": 2547.0,
+            "58": 2739.0,
+            "59": 2779.0,
+            "60": 2349.0,
+            "61": 2741.0,
+            "62": 2617.0,
+            "63": 2512.0,
+            "64": 2800.0,
+            "65": 2697.0,
+            "66": 2966.0,
+            "67": 2952.0,
+            "68": 2833.0,
+            "69": 3029.0,
+            "70": 2977.0,
+            "71": 2813.0,
+            "72": 2664.0,
+            "73": 3085.0,
+            "74": 2292.0,
+            "75": 2810.0,
+            "76": 3025.0,
+            "77": 3025.0,
+            "78": 3037.0,
+            "79": 3181.0,
+            "80": 3234.0,
+            "81": 3273.0,
+            "82": 3294.0,
+            "83": 2707.0,
+            "84": 3332.0,
+            "85": 3336.0,
+            "86": 2585.0,
+            "87": 3448.0,
+            "88": 3239.0,
+            "89": 3137.0,
+            "90": 3341.0,
+            "91": 3188.0,
+            "92": 3246.0,
+            "93": 2823.0,
+            "94": 3358.0,
+            "95": 3202.0,
+            "96": 3118.0,
+            "97": 3163.0,
+            "98": 3645.0,
+            "99": 3345.0,
+            "100": 3201.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 917459968.0,
+            "2": 917459968.0,
+            "3": 917459968.0,
+            "4": 917459968.0,
+            "5": 917459968.0,
+            "6": 917459968.0,
+            "7": 917459968.0,
+            "8": 917459968.0,
+            "9": 917459968.0,
+            "10": 917459968.0,
+            "11": 917459968.0,
+            "12": 917459968.0,
+            "13": 917459968.0,
+            "14": 917459968.0,
+            "15": 917459968.0,
+            "16": 917459968.0,
+            "17": 917459968.0,
+            "18": 917459968.0,
+            "19": 917459968.0,
+            "20": 917459968.0,
+            "21": 917459968.0,
+            "22": 917459968.0,
+            "23": 917459968.0,
+            "24": 917459968.0,
+            "25": 917459968.0,
+            "26": 917459968.0,
+            "27": 917459968.0,
+            "28": 917459968.0,
+            "29": 917459968.0,
+            "30": 917459968.0,
+            "31": 917459968.0,
+            "32": 917459968.0,
+            "33": 917459968.0,
+            "34": 917459968.0,
+            "35": 917459968.0,
+            "36": 917459968.0,
+            "37": 917459968.0,
+            "38": 917459968.0,
+            "39": 917459968.0,
+            "40": 917459968.0,
+            "41": 917459968.0,
+            "42": 917459968.0,
+            "43": 917459968.0,
+            "44": 917459968.0,
+            "45": 917459968.0,
+            "46": 917459968.0,
+            "47": 917459968.0,
+            "48": 917459968.0,
+            "49": 917459968.0,
+            "50": 917459968.0,
+            "51": 917459968.0,
+            "52": 917459968.0,
+            "53": 917459968.0,
+            "54": 917459968.0,
+            "55": 917459968.0,
+            "56": 917459968.0,
+            "57": 917459968.0,
+            "58": 917459968.0,
+            "59": 917459968.0,
+            "60": 917459968.0,
+            "61": 917459968.0,
+            "62": 917459968.0,
+            "63": 917459968.0,
+            "64": 917459968.0,
+            "65": 917459968.0,
+            "66": 917459968.0,
+            "67": 917459968.0,
+            "68": 917459968.0,
+            "69": 917459968.0,
+            "70": 917459968.0,
+            "71": 917459968.0,
+            "72": 917459968.0,
+            "73": 917459968.0,
+            "74": 917459968.0,
+            "75": 917459968.0,
+            "76": 917459968.0,
+            "77": 917459968.0,
+            "78": 917459968.0,
+            "79": 917459968.0,
+            "80": 917459968.0,
+            "81": 917459968.0,
+            "82": 917459968.0,
+            "83": 917459968.0,
+            "84": 917459968.0,
+            "85": 917459968.0,
+            "86": 917459968.0,
+            "87": 917459968.0,
+            "88": 917459968.0,
+            "89": 917459968.0,
+            "90": 917459968.0,
+            "91": 917459968.0,
+            "92": 917459968.0,
+            "93": 917459968.0,
+            "94": 917459968.0,
+            "95": 917459968.0,
+            "96": 917459968.0,
+            "97": 917459968.0,
+            "98": 917459968.0,
+            "99": 917459968.0,
+            "100": 917459968.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2266035200.0,
+            "2": 2624452608.0,
+            "3": 2624452608.0,
+            "4": 2624452608.0,
+            "5": 2624452608.0,
+            "6": 2624452608.0,
+            "7": 2624452608.0,
+            "8": 2624452608.0,
+            "9": 2624452608.0,
+            "10": 2624452608.0,
+            "11": 2624452608.0,
+            "12": 2624452608.0,
+            "13": 2624452608.0,
+            "14": 2624452608.0,
+            "15": 2624452608.0,
+            "16": 2624452608.0,
+            "17": 2624452608.0,
+            "18": 2624452608.0,
+            "19": 2624452608.0,
+            "20": 2624452608.0,
+            "21": 2624452608.0,
+            "22": 2624452608.0,
+            "23": 2624452608.0,
+            "24": 2624452608.0,
+            "25": 2624452608.0,
+            "26": 2624452608.0,
+            "27": 2624452608.0,
+            "28": 2624452608.0,
+            "29": 2624452608.0,
+            "30": 2624452608.0,
+            "31": 2624452608.0,
+            "32": 2624452608.0,
+            "33": 2624452608.0,
+            "34": 2624452608.0,
+            "35": 2624452608.0,
+            "36": 2624452608.0,
+            "37": 2624452608.0,
+            "38": 2624452608.0,
+            "39": 2624452608.0,
+            "40": 2624452608.0,
+            "41": 2624452608.0,
+            "42": 2624452608.0,
+            "43": 2624452608.0,
+            "44": 2624452608.0,
+            "45": 2624452608.0,
+            "46": 2624452608.0,
+            "47": 2624452608.0,
+            "48": 2624452608.0,
+            "49": 2624452608.0,
+            "50": 2624452608.0,
+            "51": 2624452608.0,
+            "52": 2624452608.0,
+            "53": 2624452608.0,
+            "54": 2624452608.0,
+            "55": 2624452608.0,
+            "56": 2624452608.0,
+            "57": 2624452608.0,
+            "58": 2624452608.0,
+            "59": 2624452608.0,
+            "60": 2624452608.0,
+            "61": 2624452608.0,
+            "62": 2624452608.0,
+            "63": 2624452608.0,
+            "64": 2624452608.0,
+            "65": 2624452608.0,
+            "66": 2624452608.0,
+            "67": 2624452608.0,
+            "68": 2624452608.0,
+            "69": 2624452608.0,
+            "70": 2624452608.0,
+            "71": 2624452608.0,
+            "72": 2624452608.0,
+            "73": 2624452608.0,
+            "74": 2624452608.0,
+            "75": 2624452608.0,
+            "76": 2624452608.0,
+            "77": 2624452608.0,
+            "78": 2624452608.0,
+            "79": 2624452608.0,
+            "80": 2624452608.0,
+            "81": 2624452608.0,
+            "82": 2624452608.0,
+            "83": 2624452608.0,
+            "84": 2624452608.0,
+            "85": 2624452608.0,
+            "86": 2624452608.0,
+            "87": 2624452608.0,
+            "88": 2624452608.0,
+            "89": 2624452608.0,
+            "90": 2624452608.0,
+            "91": 2624452608.0,
+            "92": 2624452608.0,
+            "93": 2624452608.0,
+            "94": 2624452608.0,
+            "95": 2624452608.0,
+            "96": 2624452608.0,
+            "97": 2624452608.0,
+            "98": 2624452608.0,
+            "99": 2624452608.0,
+            "100": 2624452608.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 6.53475,
+            "2": 0.15984,
+            "3": 0.14377,
+            "4": 0.29674,
+            "5": 0.13063,
+            "6": 0.13043,
+            "7": 0.13235,
+            "8": 0.13474,
+            "9": 0.13363,
+            "10": 0.13339,
+            "11": 0.13581,
+            "12": 0.13012,
+            "13": 0.13019,
+            "14": 0.13252,
+            "15": 0.13313,
+            "16": 0.13357,
+            "17": 0.13327,
+            "18": 0.13417,
+            "19": 0.1331,
+            "20": 0.1329,
+            "21": 0.13223,
+            "22": 0.32857,
+            "23": 0.13492,
+            "24": 0.13386,
+            "25": 0.13206,
+            "26": 0.13477,
+            "27": 0.13149,
+            "28": 0.13502,
+            "29": 0.13417,
+            "30": 0.13168,
+            "31": 0.13588,
+            "32": 0.13436,
+            "33": 0.13143,
+            "34": 0.13205,
+            "35": 0.13068,
+            "36": 0.13116,
+            "37": 0.13337,
+            "38": 0.22586,
+            "39": 0.13222,
+            "40": 0.13032,
+            "41": 0.13333,
+            "42": 0.13093,
+            "43": 0.13146,
+            "44": 0.17904,
+            "45": 0.13291,
+            "46": 0.13299,
+            "47": 0.13217,
+            "48": 0.19742,
+            "49": 0.24879,
+            "50": 0.13041,
+            "51": 0.17217,
+            "52": 0.14728,
+            "53": 0.14883,
+            "54": 0.15217,
+            "55": 0.15333,
+            "56": 0.15162,
+            "57": 0.14349,
+            "58": 0.5576,
+            "59": 0.13842,
+            "60": 0.13366,
+            "61": 0.13505,
+            "62": 0.13481,
+            "63": 0.13475,
+            "64": 0.13594,
+            "65": 0.13184,
+            "66": 0.13558,
+            "67": 0.1672,
+            "68": 0.13268,
+            "69": 0.13176,
+            "70": 0.13495,
+            "71": 0.13816,
+            "72": 0.13681,
+            "73": 0.13679,
+            "74": 0.13748,
+            "75": 0.13564,
+            "76": 0.13376,
+            "77": 0.13018,
+            "78": 0.13137,
+            "79": 0.13475,
+            "80": 0.1358,
+            "81": 0.1337,
+            "82": 0.13153,
+            "83": 0.13119,
+            "84": 0.13428,
+            "85": 0.15135,
+            "86": 0.13542,
+            "87": 0.13379,
+            "88": 0.13317,
+            "89": 0.13159,
+            "90": 0.1344,
+            "91": 0.13415,
+            "92": 0.1338,
+            "93": 0.13311,
+            "94": 0.13567,
+            "95": 0.13426,
+            "96": 0.13525,
+            "97": 0.13575,
+            "98": 0.133,
+            "99": 0.13093,
+            "100": 0.13623
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_dev_dgx_h100.json
index 8655a61eb9b..b1c227e9ae3 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_dev_dgx_h100.json
@@ -218,106 +218,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 921653248.0,
-            "2": 921653248.0,
-            "3": 921653248.0,
-            "4": 921653248.0,
-            "5": 921653248.0,
-            "6": 921653248.0,
-            "7": 921653248.0,
-            "8": 921653248.0,
-            "9": 921653248.0,
-            "10": 921653248.0,
-            "11": 921653248.0,
-            "12": 921653248.0,
-            "13": 921653248.0,
-            "14": 921653248.0,
-            "15": 921653248.0,
-            "16": 921653248.0,
-            "17": 921653248.0,
-            "18": 921653248.0,
-            "19": 921653248.0,
-            "20": 921653248.0,
-            "21": 921653248.0,
-            "22": 921653248.0,
-            "23": 921653248.0,
-            "24": 921653248.0,
-            "25": 921653248.0,
-            "26": 921653248.0,
-            "27": 921653248.0,
-            "28": 921653248.0,
-            "29": 921653248.0,
-            "30": 921653248.0,
-            "31": 921653248.0,
-            "32": 921653248.0,
-            "33": 921653248.0,
-            "34": 921653248.0,
-            "35": 921653248.0,
-            "36": 921653248.0,
-            "37": 921653248.0,
-            "38": 921653248.0,
-            "39": 921653248.0,
-            "40": 921653248.0,
-            "41": 921653248.0,
-            "42": 921653248.0,
-            "43": 921653248.0,
-            "44": 921653248.0,
-            "45": 921653248.0,
-            "46": 921653248.0,
-            "47": 921653248.0,
-            "48": 921653248.0,
-            "49": 921653248.0,
-            "50": 921653248.0,
-            "51": 921653248.0,
-            "52": 921653248.0,
-            "53": 921653248.0,
-            "54": 921653248.0,
-            "55": 921653248.0,
-            "56": 921653248.0,
-            "57": 921653248.0,
-            "58": 921653248.0,
-            "59": 921653248.0,
-            "60": 921653248.0,
-            "61": 921653248.0,
-            "62": 921653248.0,
-            "63": 921653248.0,
-            "64": 921653248.0,
-            "65": 921653248.0,
-            "66": 921653248.0,
-            "67": 921653248.0,
-            "68": 921653248.0,
-            "69": 921653248.0,
-            "70": 921653248.0,
-            "71": 921653248.0,
-            "72": 921653248.0,
-            "73": 921653248.0,
-            "74": 921653248.0,
-            "75": 921653248.0,
-            "76": 921653248.0,
-            "77": 921653248.0,
-            "78": 921653248.0,
-            "79": 921653248.0,
-            "80": 921653248.0,
-            "81": 921653248.0,
-            "82": 921653248.0,
-            "83": 921653248.0,
-            "84": 921653248.0,
-            "85": 921653248.0,
-            "86": 921653248.0,
-            "87": 921653248.0,
-            "88": 921653248.0,
-            "89": 921653248.0,
-            "90": 921653248.0,
-            "91": 921653248.0,
-            "92": 921653248.0,
-            "93": 921653248.0,
-            "94": 921653248.0,
-            "95": 921653248.0,
-            "96": 921653248.0,
-            "97": 921653248.0,
-            "98": 921653248.0,
-            "99": 921653248.0,
-            "100": 921653248.0
+            "1": 917459968.0,
+            "2": 917459968.0,
+            "3": 917459968.0,
+            "4": 917459968.0,
+            "5": 917459968.0,
+            "6": 917459968.0,
+            "7": 917459968.0,
+            "8": 917459968.0,
+            "9": 917459968.0,
+            "10": 917459968.0,
+            "11": 917459968.0,
+            "12": 917459968.0,
+            "13": 917459968.0,
+            "14": 917459968.0,
+            "15": 917459968.0,
+            "16": 917459968.0,
+            "17": 917459968.0,
+            "18": 917459968.0,
+            "19": 917459968.0,
+            "20": 917459968.0,
+            "21": 917459968.0,
+            "22": 917459968.0,
+            "23": 917459968.0,
+            "24": 917459968.0,
+            "25": 917459968.0,
+            "26": 917459968.0,
+            "27": 917459968.0,
+            "28": 917459968.0,
+            "29": 917459968.0,
+            "30": 917459968.0,
+            "31": 917459968.0,
+            "32": 917459968.0,
+            "33": 917459968.0,
+            "34": 917459968.0,
+            "35": 917459968.0,
+            "36": 917459968.0,
+            "37": 917459968.0,
+            "38": 917459968.0,
+            "39": 917459968.0,
+            "40": 917459968.0,
+            "41": 917459968.0,
+            "42": 917459968.0,
+            "43": 917459968.0,
+            "44": 917459968.0,
+            "45": 917459968.0,
+            "46": 917459968.0,
+            "47": 917459968.0,
+            "48": 917459968.0,
+            "49": 917459968.0,
+            "50": 917459968.0,
+            "51": 917459968.0,
+            "52": 917459968.0,
+            "53": 917459968.0,
+            "54": 917459968.0,
+            "55": 917459968.0,
+            "56": 917459968.0,
+            "57": 917459968.0,
+            "58": 917459968.0,
+            "59": 917459968.0,
+            "60": 917459968.0,
+            "61": 917459968.0,
+            "62": 917459968.0,
+            "63": 917459968.0,
+            "64": 917459968.0,
+            "65": 917459968.0,
+            "66": 917459968.0,
+            "67": 917459968.0,
+            "68": 917459968.0,
+            "69": 917459968.0,
+            "70": 917459968.0,
+            "71": 917459968.0,
+            "72": 917459968.0,
+            "73": 917459968.0,
+            "74": 917459968.0,
+            "75": 917459968.0,
+            "76": 917459968.0,
+            "77": 917459968.0,
+            "78": 917459968.0,
+            "79": 917459968.0,
+            "80": 917459968.0,
+            "81": 917459968.0,
+            "82": 917459968.0,
+            "83": 917459968.0,
+            "84": 917459968.0,
+            "85": 917459968.0,
+            "86": 917459968.0,
+            "87": 917459968.0,
+            "88": 917459968.0,
+            "89": 917459968.0,
+            "90": 917459968.0,
+            "91": 917459968.0,
+            "92": 917459968.0,
+            "93": 917459968.0,
+            "94": 917459968.0,
+            "95": 917459968.0,
+            "96": 917459968.0,
+            "97": 917459968.0,
+            "98": 917459968.0,
+            "99": 917459968.0,
+            "100": 917459968.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -325,106 +325,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 2240868352.0,
-            "2": 2603480064.0,
-            "3": 2603480064.0,
-            "4": 2603480064.0,
-            "5": 2603480064.0,
-            "6": 2603480064.0,
-            "7": 2603480064.0,
-            "8": 2603480064.0,
-            "9": 2603480064.0,
-            "10": 2603480064.0,
-            "11": 2603480064.0,
-            "12": 2603480064.0,
-            "13": 2603480064.0,
-            "14": 2603480064.0,
-            "15": 2603480064.0,
-            "16": 2603480064.0,
-            "17": 2603480064.0,
-            "18": 2603480064.0,
-            "19": 2603480064.0,
-            "20": 2603480064.0,
-            "21": 2603480064.0,
-            "22": 2603480064.0,
-            "23": 2603480064.0,
-            "24": 2603480064.0,
-            "25": 2603480064.0,
-            "26": 2603480064.0,
-            "27": 2603480064.0,
-            "28": 2603480064.0,
-            "29": 2603480064.0,
-            "30": 2603480064.0,
-            "31": 2603480064.0,
-            "32": 2603480064.0,
-            "33": 2603480064.0,
-            "34": 2603480064.0,
-            "35": 2603480064.0,
-            "36": 2603480064.0,
-            "37": 2603480064.0,
-            "38": 2603480064.0,
-            "39": 2603480064.0,
-            "40": 2603480064.0,
-            "41": 2603480064.0,
-            "42": 2603480064.0,
-            "43": 2603480064.0,
-            "44": 2603480064.0,
-            "45": 2603480064.0,
-            "46": 2603480064.0,
-            "47": 2603480064.0,
-            "48": 2603480064.0,
-            "49": 2603480064.0,
-            "50": 2603480064.0,
-            "51": 2603480064.0,
-            "52": 2603480064.0,
-            "53": 2603480064.0,
-            "54": 2603480064.0,
-            "55": 2603480064.0,
-            "56": 2603480064.0,
-            "57": 2603480064.0,
-            "58": 2603480064.0,
-            "59": 2603480064.0,
-            "60": 2603480064.0,
-            "61": 2603480064.0,
-            "62": 2603480064.0,
-            "63": 2603480064.0,
-            "64": 2603480064.0,
-            "65": 2603480064.0,
-            "66": 2603480064.0,
-            "67": 2603480064.0,
-            "68": 2603480064.0,
-            "69": 2603480064.0,
-            "70": 2603480064.0,
-            "71": 2603480064.0,
-            "72": 2603480064.0,
-            "73": 2603480064.0,
-            "74": 2603480064.0,
-            "75": 2603480064.0,
-            "76": 2603480064.0,
-            "77": 2603480064.0,
-            "78": 2603480064.0,
-            "79": 2603480064.0,
-            "80": 2603480064.0,
-            "81": 2603480064.0,
-            "82": 2603480064.0,
-            "83": 2603480064.0,
-            "84": 2603480064.0,
-            "85": 2603480064.0,
-            "86": 2603480064.0,
-            "87": 2603480064.0,
-            "88": 2603480064.0,
-            "89": 2603480064.0,
-            "90": 2603480064.0,
-            "91": 2603480064.0,
-            "92": 2603480064.0,
-            "93": 2603480064.0,
-            "94": 2603480064.0,
-            "95": 2603480064.0,
-            "96": 2603480064.0,
-            "97": 2603480064.0,
-            "98": 2603480064.0,
-            "99": 2603480064.0,
-            "100": 2603480064.0
+            "1": 2239820800.0,
+            "2": 2599286784.0,
+            "3": 2599286784.0,
+            "4": 2599286784.0,
+            "5": 2599286784.0,
+            "6": 2599286784.0,
+            "7": 2599286784.0,
+            "8": 2599286784.0,
+            "9": 2599286784.0,
+            "10": 2599286784.0,
+            "11": 2599286784.0,
+            "12": 2599286784.0,
+            "13": 2599286784.0,
+            "14": 2599286784.0,
+            "15": 2599286784.0,
+            "16": 2599286784.0,
+            "17": 2599286784.0,
+            "18": 2599286784.0,
+            "19": 2599286784.0,
+            "20": 2599286784.0,
+            "21": 2599286784.0,
+            "22": 2599286784.0,
+            "23": 2599286784.0,
+            "24": 2599286784.0,
+            "25": 2599286784.0,
+            "26": 2599286784.0,
+            "27": 2599286784.0,
+            "28": 2599286784.0,
+            "29": 2599286784.0,
+            "30": 2599286784.0,
+            "31": 2599286784.0,
+            "32": 2599286784.0,
+            "33": 2599286784.0,
+            "34": 2599286784.0,
+            "35": 2599286784.0,
+            "36": 2599286784.0,
+            "37": 2599286784.0,
+            "38": 2599286784.0,
+            "39": 2599286784.0,
+            "40": 2599286784.0,
+            "41": 2599286784.0,
+            "42": 2599286784.0,
+            "43": 2599286784.0,
+            "44": 2599286784.0,
+            "45": 2599286784.0,
+            "46": 2599286784.0,
+            "47": 2599286784.0,
+            "48": 2599286784.0,
+            "49": 2599286784.0,
+            "50": 2599286784.0,
+            "51": 2599286784.0,
+            "52": 2599286784.0,
+            "53": 2599286784.0,
+            "54": 2599286784.0,
+            "55": 2599286784.0,
+            "56": 2599286784.0,
+            "57": 2599286784.0,
+            "58": 2599286784.0,
+            "59": 2599286784.0,
+            "60": 2599286784.0,
+            "61": 2599286784.0,
+            "62": 2599286784.0,
+            "63": 2599286784.0,
+            "64": 2599286784.0,
+            "65": 2599286784.0,
+            "66": 2599286784.0,
+            "67": 2599286784.0,
+            "68": 2599286784.0,
+            "69": 2599286784.0,
+            "70": 2599286784.0,
+            "71": 2599286784.0,
+            "72": 2599286784.0,
+            "73": 2599286784.0,
+            "74": 2599286784.0,
+            "75": 2599286784.0,
+            "76": 2599286784.0,
+            "77": 2599286784.0,
+            "78": 2599286784.0,
+            "79": 2599286784.0,
+            "80": 2599286784.0,
+            "81": 2599286784.0,
+            "82": 2599286784.0,
+            "83": 2599286784.0,
+            "84": 2599286784.0,
+            "85": 2599286784.0,
+            "86": 2599286784.0,
+            "87": 2599286784.0,
+            "88": 2599286784.0,
+            "89": 2599286784.0,
+            "90": 2599286784.0,
+            "91": 2599286784.0,
+            "92": 2599286784.0,
+            "93": 2599286784.0,
+            "94": 2599286784.0,
+            "95": 2599286784.0,
+            "96": 2599286784.0,
+            "97": 2599286784.0,
+            "98": 2599286784.0,
+            "99": 2599286784.0,
+            "100": 2599286784.0
         }
     },
     "iteration-time": {
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 10.33977,
-            "2": 0.14663,
-            "3": 0.12463,
-            "4": 0.11901,
-            "5": 0.118,
-            "6": 0.11842,
-            "7": 0.11849,
-            "8": 0.11649,
-            "9": 0.11703,
-            "10": 0.11655,
-            "11": 0.11646,
-            "12": 0.11802,
-            "13": 0.11742,
-            "14": 0.1167,
-            "15": 0.11429,
-            "16": 0.11654,
-            "17": 0.11533,
-            "18": 0.11853,
-            "19": 0.1171,
-            "20": 0.11735,
-            "21": 0.11515,
-            "22": 0.11632,
-            "23": 0.11865,
-            "24": 0.11706,
-            "25": 0.11644,
-            "26": 0.11684,
-            "27": 0.11688,
-            "28": 0.11839,
-            "29": 0.11706,
-            "30": 0.11761,
-            "31": 0.11696,
-            "32": 0.11567,
-            "33": 0.1149,
-            "34": 0.11395,
-            "35": 0.11367,
-            "36": 0.11567,
-            "37": 0.11646,
-            "38": 0.11392,
-            "39": 0.11516,
-            "40": 0.11529,
-            "41": 0.11559,
-            "42": 0.11519,
-            "43": 0.11808,
-            "44": 0.11599,
-            "45": 0.11605,
-            "46": 0.11502,
-            "47": 0.11651,
-            "48": 0.11713,
-            "49": 0.11667,
-            "50": 0.11432,
-            "51": 0.12857,
-            "52": 0.12187,
-            "53": 0.11684,
-            "54": 0.11222,
-            "55": 0.11538,
-            "56": 0.11241,
-            "57": 0.11229,
-            "58": 0.11087,
-            "59": 0.11183,
-            "60": 0.11124,
-            "61": 0.11009,
-            "62": 0.11052,
-            "63": 0.11585,
-            "64": 0.11262,
-            "65": 0.11148,
-            "66": 0.11248,
-            "67": 0.11274,
-            "68": 0.11394,
-            "69": 0.11397,
-            "70": 0.11233,
-            "71": 0.11354,
-            "72": 0.11589,
-            "73": 0.11373,
-            "74": 0.11483,
-            "75": 0.11512,
-            "76": 0.11378,
-            "77": 0.11431,
-            "78": 0.11374,
-            "79": 0.11521,
-            "80": 0.11486,
-            "81": 0.11364,
-            "82": 0.11419,
-            "83": 0.11439,
-            "84": 0.11589,
-            "85": 0.11422,
-            "86": 0.11458,
-            "87": 0.11184,
-            "88": 0.11418,
-            "89": 0.11264,
-            "90": 0.11169,
-            "91": 0.11452,
-            "92": 0.11215,
-            "93": 0.11431,
-            "94": 0.11145,
-            "95": 0.11129,
-            "96": 0.11113,
-            "97": 0.11365,
-            "98": 0.11127,
-            "99": 0.11136,
-            "100": 0.11229
+            "1": 7.15273,
+            "2": 0.12761,
+            "3": 0.108,
+            "4": 0.08804,
+            "5": 0.08914,
+            "6": 0.0872,
+            "7": 0.0865,
+            "8": 0.09025,
+            "9": 0.09224,
+            "10": 0.08785,
+            "11": 0.08842,
+            "12": 0.08678,
+            "13": 0.08768,
+            "14": 0.08732,
+            "15": 0.08754,
+            "16": 0.08689,
+            "17": 0.08745,
+            "18": 0.08749,
+            "19": 0.08681,
+            "20": 0.08755,
+            "21": 0.08798,
+            "22": 0.08687,
+            "23": 0.0869,
+            "24": 0.08666,
+            "25": 0.08694,
+            "26": 0.08728,
+            "27": 0.08672,
+            "28": 0.09131,
+            "29": 0.09876,
+            "30": 0.09345,
+            "31": 0.0871,
+            "32": 0.08745,
+            "33": 0.0868,
+            "34": 0.08664,
+            "35": 0.08688,
+            "36": 0.08685,
+            "37": 0.08807,
+            "38": 0.08807,
+            "39": 0.09095,
+            "40": 0.08728,
+            "41": 0.08918,
+            "42": 0.0874,
+            "43": 0.08812,
+            "44": 0.08765,
+            "45": 0.08765,
+            "46": 0.08695,
+            "47": 0.08967,
+            "48": 0.08734,
+            "49": 0.08707,
+            "50": 0.08818,
+            "51": 0.09801,
+            "52": 0.09366,
+            "53": 0.09478,
+            "54": 0.09027,
+            "55": 0.08632,
+            "56": 0.0857,
+            "57": 0.08636,
+            "58": 0.08585,
+            "59": 0.08632,
+            "60": 0.08559,
+            "61": 0.08575,
+            "62": 0.08716,
+            "63": 0.08612,
+            "64": 0.08569,
+            "65": 0.0876,
+            "66": 0.08587,
+            "67": 0.0862,
+            "68": 0.08594,
+            "69": 0.0858,
+            "70": 0.08668,
+            "71": 0.08553,
+            "72": 0.08961,
+            "73": 0.09562,
+            "74": 0.09156,
+            "75": 0.0901,
+            "76": 0.08615,
+            "77": 0.08562,
+            "78": 0.08664,
+            "79": 0.08569,
+            "80": 0.08621,
+            "81": 0.08562,
+            "82": 0.08601,
+            "83": 0.08551,
+            "84": 0.08569,
+            "85": 0.08622,
+            "86": 0.08639,
+            "87": 0.08581,
+            "88": 0.08569,
+            "89": 0.08624,
+            "90": 0.086,
+            "91": 0.08602,
+            "92": 0.08575,
+            "93": 0.08626,
+            "94": 0.0869,
+            "95": 0.0867,
+            "96": 0.0872,
+            "97": 0.08727,
+            "98": 0.08652,
+            "99": 0.0867,
+            "100": 0.08593
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_dev_dgx_h100_2nd.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_dev_dgx_h100_2nd.json
new file mode 100644
index 00000000000..57da3647845
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_dev_dgx_h100_2nd.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 9.80375,
+            "52": 9.68218,
+            "53": 10.02348,
+            "54": 9.91595,
+            "55": 9.82442,
+            "56": 9.56994,
+            "57": 9.42628,
+            "58": 9.78075,
+            "59": 9.53254,
+            "60": 9.44561,
+            "61": 9.64249,
+            "62": 9.94298,
+            "63": 9.31745,
+            "64": 9.7256,
+            "65": 8.88735,
+            "66": 9.65711,
+            "67": 9.31747,
+            "68": 9.73506,
+            "69": 9.74863,
+            "70": 9.69601,
+            "71": 9.57682,
+            "72": 9.52425,
+            "73": 9.4558,
+            "74": 8.8826,
+            "75": 9.37563,
+            "76": 9.01106,
+            "77": 10.02278,
+            "78": 9.6796,
+            "79": 9.33171,
+            "80": 9.35836,
+            "81": 9.43399,
+            "82": 9.65055,
+            "83": 9.2551,
+            "84": 9.37131,
+            "85": 9.56237,
+            "86": 9.0351,
+            "87": 9.54617,
+            "88": 9.69806,
+            "89": 9.54657,
+            "90": 9.77627,
+            "91": 9.28858,
+            "92": 9.30652,
+            "93": 9.02646,
+            "94": 8.7883,
+            "95": 9.48041,
+            "96": 9.47962,
+            "97": 9.25545,
+            "98": 9.61947,
+            "99": 8.83854,
+            "100": 9.35116
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 2508.0,
+            "52": 2483.0,
+            "53": 2959.0,
+            "54": 2554.0,
+            "55": 2408.0,
+            "56": 2452.0,
+            "57": 2528.0,
+            "58": 2594.0,
+            "59": 2750.0,
+            "60": 2563.0,
+            "61": 2794.0,
+            "62": 2495.0,
+            "63": 2493.0,
+            "64": 2965.0,
+            "65": 2569.0,
+            "66": 2877.0,
+            "67": 2969.0,
+            "68": 2803.0,
+            "69": 2944.0,
+            "70": 3001.0,
+            "71": 2867.0,
+            "72": 2714.0,
+            "73": 3017.0,
+            "74": 2281.0,
+            "75": 2774.0,
+            "76": 2983.0,
+            "77": 2955.0,
+            "78": 3148.0,
+            "79": 3076.0,
+            "80": 2992.0,
+            "81": 3255.0,
+            "82": 3212.0,
+            "83": 2809.0,
+            "84": 3266.0,
+            "85": 3188.0,
+            "86": 2616.0,
+            "87": 3492.0,
+            "88": 3130.0,
+            "89": 3020.0,
+            "90": 3238.0,
+            "91": 3106.0,
+            "92": 3183.0,
+            "93": 2960.0,
+            "94": 3492.0,
+            "95": 3112.0,
+            "96": 3256.0,
+            "97": 3055.0,
+            "98": 3558.0,
+            "99": 3196.0,
+            "100": 3109.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 917459968.0,
+            "52": 917459968.0,
+            "53": 917459968.0,
+            "54": 917459968.0,
+            "55": 917459968.0,
+            "56": 917459968.0,
+            "57": 917459968.0,
+            "58": 917459968.0,
+            "59": 917459968.0,
+            "60": 917459968.0,
+            "61": 917459968.0,
+            "62": 917459968.0,
+            "63": 917459968.0,
+            "64": 917459968.0,
+            "65": 917459968.0,
+            "66": 917459968.0,
+            "67": 917459968.0,
+            "68": 917459968.0,
+            "69": 917459968.0,
+            "70": 917459968.0,
+            "71": 917459968.0,
+            "72": 917459968.0,
+            "73": 917459968.0,
+            "74": 917459968.0,
+            "75": 917459968.0,
+            "76": 917459968.0,
+            "77": 917459968.0,
+            "78": 917459968.0,
+            "79": 917459968.0,
+            "80": 917459968.0,
+            "81": 917459968.0,
+            "82": 917459968.0,
+            "83": 917459968.0,
+            "84": 917459968.0,
+            "85": 917459968.0,
+            "86": 917459968.0,
+            "87": 917459968.0,
+            "88": 917459968.0,
+            "89": 917459968.0,
+            "90": 917459968.0,
+            "91": 917459968.0,
+            "92": 917459968.0,
+            "93": 917459968.0,
+            "94": 917459968.0,
+            "95": 917459968.0,
+            "96": 917459968.0,
+            "97": 917459968.0,
+            "98": 917459968.0,
+            "99": 917459968.0,
+            "100": 917459968.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 2599285760.0,
+            "52": 2599286784.0,
+            "53": 2599286784.0,
+            "54": 2599286784.0,
+            "55": 2599286784.0,
+            "56": 2599286784.0,
+            "57": 2599286784.0,
+            "58": 2599286784.0,
+            "59": 2599286784.0,
+            "60": 2599286784.0,
+            "61": 2599286784.0,
+            "62": 2599286784.0,
+            "63": 2599286784.0,
+            "64": 2599286784.0,
+            "65": 2599286784.0,
+            "66": 2599286784.0,
+            "67": 2599286784.0,
+            "68": 2599286784.0,
+            "69": 2599286784.0,
+            "70": 2599286784.0,
+            "71": 2599286784.0,
+            "72": 2599286784.0,
+            "73": 2599286784.0,
+            "74": 2599286784.0,
+            "75": 2599286784.0,
+            "76": 2599286784.0,
+            "77": 2599286784.0,
+            "78": 2599286784.0,
+            "79": 2599286784.0,
+            "80": 2599286784.0,
+            "81": 2599286784.0,
+            "82": 2599286784.0,
+            "83": 2599286784.0,
+            "84": 2599286784.0,
+            "85": 2599286784.0,
+            "86": 2599286784.0,
+            "87": 2599286784.0,
+            "88": 2599286784.0,
+            "89": 2599286784.0,
+            "90": 2599286784.0,
+            "91": 2599286784.0,
+            "92": 2599286784.0,
+            "93": 2599286784.0,
+            "94": 2599286784.0,
+            "95": 2599286784.0,
+            "96": 2599286784.0,
+            "97": 2599286784.0,
+            "98": 2599286784.0,
+            "99": 2599286784.0,
+            "100": 2599286784.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 7.45,
+            "52": 0.1176,
+            "53": 0.08802,
+            "54": 0.08699,
+            "55": 0.08722,
+            "56": 0.08722,
+            "57": 0.09047,
+            "58": 0.08702,
+            "59": 0.08774,
+            "60": 0.08696,
+            "61": 0.08697,
+            "62": 0.08669,
+            "63": 0.08744,
+            "64": 0.08973,
+            "65": 0.08942,
+            "66": 0.08847,
+            "67": 0.0878,
+            "68": 0.0868,
+            "69": 0.08686,
+            "70": 0.08743,
+            "71": 0.08699,
+            "72": 0.08754,
+            "73": 0.08641,
+            "74": 0.08819,
+            "75": 0.08738,
+            "76": 0.50165,
+            "77": 0.08865,
+            "78": 0.08729,
+            "79": 0.0866,
+            "80": 0.08763,
+            "81": 0.08755,
+            "82": 0.08768,
+            "83": 0.0877,
+            "84": 0.08704,
+            "85": 0.08686,
+            "86": 0.0893,
+            "87": 0.08757,
+            "88": 0.08695,
+            "89": 0.08918,
+            "90": 0.08715,
+            "91": 0.08682,
+            "92": 0.08819,
+            "93": 0.08755,
+            "94": 0.08919,
+            "95": 0.08702,
+            "96": 0.0863,
+            "97": 0.08852,
+            "98": 0.08865,
+            "99": 0.08679,
+            "100": 0.08757
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_lts_dgx_a100.json
index 48aee8d379f..80a7902517d 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_lts_dgx_a100.json
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 9.51973,
-            "2": 0.20593,
-            "3": 0.14945,
-            "4": 0.14775,
-            "5": 0.14785,
-            "6": 0.14767,
-            "7": 0.14754,
-            "8": 0.14649,
-            "9": 0.14636,
-            "10": 0.14713,
-            "11": 0.14628,
-            "12": 0.14658,
-            "13": 0.14581,
-            "14": 0.14652,
-            "15": 0.14657,
-            "16": 0.14585,
-            "17": 0.14783,
-            "18": 0.1469,
-            "19": 0.14603,
-            "20": 0.14662,
-            "21": 0.14635,
-            "22": 0.1461,
-            "23": 0.14688,
-            "24": 0.14579,
-            "25": 0.14587,
-            "26": 0.14836,
-            "27": 0.14598,
-            "28": 0.1458,
-            "29": 0.14604,
-            "30": 0.14624,
-            "31": 0.14719,
-            "32": 0.14625,
-            "33": 0.14582,
-            "34": 0.14603,
-            "35": 0.14619,
-            "36": 0.14587,
-            "37": 0.14585,
-            "38": 0.14625,
-            "39": 0.14572,
-            "40": 0.14629,
-            "41": 0.14561,
-            "42": 0.14587,
-            "43": 0.14672,
-            "44": 0.14572,
-            "45": 0.14618,
-            "46": 0.14622,
-            "47": 0.14572,
-            "48": 0.14538,
-            "49": 0.14571,
-            "50": 0.1457,
-            "51": 0.1553,
-            "52": 0.14793,
-            "53": 0.14797,
-            "54": 0.14774,
-            "55": 0.14702,
-            "56": 0.15765,
-            "57": 0.1544,
-            "58": 0.15368,
-            "59": 0.15399,
-            "60": 0.15366,
-            "61": 0.15362,
-            "62": 0.15351,
-            "63": 0.15339,
-            "64": 0.15353,
-            "65": 0.15154,
-            "66": 0.14531,
-            "67": 0.14661,
-            "68": 0.14599,
-            "69": 0.14546,
-            "70": 0.14633,
-            "71": 0.14568,
-            "72": 0.1461,
-            "73": 0.14601,
-            "74": 0.1469,
-            "75": 0.14561,
-            "76": 0.14575,
-            "77": 0.14581,
-            "78": 0.14634,
-            "79": 0.14619,
-            "80": 0.14627,
-            "81": 0.146,
-            "82": 0.14559,
-            "83": 0.14618,
-            "84": 0.14683,
-            "85": 0.14582,
-            "86": 0.1462,
-            "87": 0.14574,
-            "88": 0.14574,
-            "89": 0.14516,
-            "90": 0.14556,
-            "91": 0.146,
-            "92": 0.14702,
-            "93": 0.14541,
-            "94": 0.14625,
-            "95": 0.14586,
-            "96": 0.1455,
-            "97": 0.14559,
-            "98": 0.14614,
-            "99": 0.15005,
-            "100": 0.14598
+            "1": 6.65648,
+            "2": 0.19179,
+            "3": 0.15416,
+            "4": 0.14165,
+            "5": 0.14069,
+            "6": 0.14005,
+            "7": 0.14441,
+            "8": 0.14847,
+            "9": 0.14867,
+            "10": 0.15034,
+            "11": 0.14788,
+            "12": 0.14812,
+            "13": 0.14762,
+            "14": 0.14827,
+            "15": 0.14673,
+            "16": 0.14725,
+            "17": 0.14727,
+            "18": 0.14703,
+            "19": 0.14722,
+            "20": 0.14733,
+            "21": 0.14692,
+            "22": 0.14653,
+            "23": 0.14777,
+            "24": 0.14694,
+            "25": 0.14763,
+            "26": 0.1471,
+            "27": 0.14674,
+            "28": 0.14635,
+            "29": 0.14703,
+            "30": 0.14621,
+            "31": 0.14691,
+            "32": 0.14767,
+            "33": 0.14672,
+            "34": 0.14669,
+            "35": 0.14593,
+            "36": 0.14589,
+            "37": 0.14687,
+            "38": 0.14638,
+            "39": 0.14701,
+            "40": 0.14657,
+            "41": 0.14668,
+            "42": 0.14663,
+            "43": 0.14455,
+            "44": 0.13873,
+            "45": 0.13973,
+            "46": 0.13942,
+            "47": 0.13835,
+            "48": 0.13884,
+            "49": 0.13842,
+            "50": 0.13788,
+            "51": 0.14634,
+            "52": 0.14143,
+            "53": 0.13935,
+            "54": 0.14449,
+            "55": 0.13995,
+            "56": 0.14005,
+            "57": 0.13884,
+            "58": 0.13823,
+            "59": 0.13958,
+            "60": 0.13806,
+            "61": 0.13998,
+            "62": 0.1391,
+            "63": 0.13808,
+            "64": 0.1378,
+            "65": 0.13831,
+            "66": 0.13766,
+            "67": 0.13871,
+            "68": 0.13842,
+            "69": 0.13825,
+            "70": 0.14322,
+            "71": 0.13773,
+            "72": 0.13739,
+            "73": 0.1379,
+            "74": 0.13895,
+            "75": 0.14238,
+            "76": 0.14002,
+            "77": 0.13711,
+            "78": 0.13768,
+            "79": 0.13786,
+            "80": 0.13681,
+            "81": 0.13744,
+            "82": 0.13817,
+            "83": 0.13649,
+            "84": 0.13687,
+            "85": 0.13779,
+            "86": 0.14075,
+            "87": 0.13645,
+            "88": 0.1389,
+            "89": 0.13781,
+            "90": 0.13671,
+            "91": 0.13682,
+            "92": 0.13637,
+            "93": 0.13642,
+            "94": 0.13696,
+            "95": 0.13741,
+            "96": 0.1363,
+            "97": 0.13656,
+            "98": 0.13634,
+            "99": 0.13708,
+            "100": 0.14224
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_lts_dgx_a100_2nd.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_lts_dgx_a100_2nd.json
new file mode 100644
index 00000000000..2287a0ab752
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_lts_dgx_a100_2nd.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 9.79157,
+            "52": 9.68731,
+            "53": 10.02181,
+            "54": 9.90398,
+            "55": 9.82389,
+            "56": 9.57081,
+            "57": 9.40818,
+            "58": 9.77678,
+            "59": 9.52729,
+            "60": 9.44284,
+            "61": 9.64071,
+            "62": 9.94046,
+            "63": 9.31099,
+            "64": 9.72506,
+            "65": 8.8916,
+            "66": 9.6525,
+            "67": 9.31718,
+            "68": 9.73957,
+            "69": 9.74304,
+            "70": 9.67942,
+            "71": 9.56228,
+            "72": 9.53149,
+            "73": 9.44531,
+            "74": 8.88431,
+            "75": 9.3677,
+            "76": 9.02482,
+            "77": 10.01647,
+            "78": 9.6813,
+            "79": 9.32719,
+            "80": 9.3577,
+            "81": 9.43335,
+            "82": 9.64804,
+            "83": 9.25573,
+            "84": 9.36738,
+            "85": 9.56091,
+            "86": 9.03567,
+            "87": 9.54622,
+            "88": 9.70041,
+            "89": 9.54992,
+            "90": 9.77126,
+            "91": 9.28801,
+            "92": 9.31055,
+            "93": 9.03195,
+            "94": 8.78121,
+            "95": 9.48115,
+            "96": 9.4759,
+            "97": 9.2489,
+            "98": 9.61705,
+            "99": 8.8368,
+            "100": 9.35043
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 2482.0,
+            "52": 2570.0,
+            "53": 2835.0,
+            "54": 2589.0,
+            "55": 2450.0,
+            "56": 2744.0,
+            "57": 2429.0,
+            "58": 2684.0,
+            "59": 2748.0,
+            "60": 2464.0,
+            "61": 2995.0,
+            "62": 2518.0,
+            "63": 2570.0,
+            "64": 2843.0,
+            "65": 2648.0,
+            "66": 2842.0,
+            "67": 2954.0,
+            "68": 2833.0,
+            "69": 3027.0,
+            "70": 2993.0,
+            "71": 3010.0,
+            "72": 2597.0,
+            "73": 3002.0,
+            "74": 2325.0,
+            "75": 2882.0,
+            "76": 3143.0,
+            "77": 3062.0,
+            "78": 3272.0,
+            "79": 3303.0,
+            "80": 3280.0,
+            "81": 3517.0,
+            "82": 3283.0,
+            "83": 2834.0,
+            "84": 3365.0,
+            "85": 3288.0,
+            "86": 2562.0,
+            "87": 3493.0,
+            "88": 3388.0,
+            "89": 3102.0,
+            "90": 3230.0,
+            "91": 3154.0,
+            "92": 3263.0,
+            "93": 2967.0,
+            "94": 3520.0,
+            "95": 3175.0,
+            "96": 3317.0,
+            "97": 2999.0,
+            "98": 3549.0,
+            "99": 3248.0,
+            "100": 3227.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 888098816.0,
+            "52": 888098816.0,
+            "53": 888098816.0,
+            "54": 888098816.0,
+            "55": 888098816.0,
+            "56": 888098816.0,
+            "57": 888098816.0,
+            "58": 888098816.0,
+            "59": 888098816.0,
+            "60": 888098816.0,
+            "61": 888098816.0,
+            "62": 888098816.0,
+            "63": 888098816.0,
+            "64": 888098816.0,
+            "65": 888098816.0,
+            "66": 888098816.0,
+            "67": 888098816.0,
+            "68": 888098816.0,
+            "69": 888098816.0,
+            "70": 888098816.0,
+            "71": 888098816.0,
+            "72": 888098816.0,
+            "73": 888098816.0,
+            "74": 888098816.0,
+            "75": 888098816.0,
+            "76": 888098816.0,
+            "77": 888098816.0,
+            "78": 888098816.0,
+            "79": 888098816.0,
+            "80": 888098816.0,
+            "81": 888098816.0,
+            "82": 888098816.0,
+            "83": 888098816.0,
+            "84": 888098816.0,
+            "85": 888098816.0,
+            "86": 888098816.0,
+            "87": 888098816.0,
+            "88": 888098816.0,
+            "89": 888098816.0,
+            "90": 888098816.0,
+            "91": 888098816.0,
+            "92": 888098816.0,
+            "93": 888098816.0,
+            "94": 888098816.0,
+            "95": 888098816.0,
+            "96": 888098816.0,
+            "97": 888098816.0,
+            "98": 888098816.0,
+            "99": 888098816.0,
+            "100": 888098816.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 2595090432.0,
+            "52": 2595091456.0,
+            "53": 2595091456.0,
+            "54": 2595091456.0,
+            "55": 2595091456.0,
+            "56": 2595091456.0,
+            "57": 2595091456.0,
+            "58": 2595091456.0,
+            "59": 2595091456.0,
+            "60": 2595091456.0,
+            "61": 2595091456.0,
+            "62": 2595091456.0,
+            "63": 2595091456.0,
+            "64": 2595091456.0,
+            "65": 2595091456.0,
+            "66": 2595091456.0,
+            "67": 2595091456.0,
+            "68": 2595091456.0,
+            "69": 2595091456.0,
+            "70": 2595091456.0,
+            "71": 2595091456.0,
+            "72": 2595091456.0,
+            "73": 2595091456.0,
+            "74": 2595091456.0,
+            "75": 2595091456.0,
+            "76": 2595091456.0,
+            "77": 2595091456.0,
+            "78": 2595091456.0,
+            "79": 2595091456.0,
+            "80": 2595091456.0,
+            "81": 2595091456.0,
+            "82": 2595091456.0,
+            "83": 2595091456.0,
+            "84": 2595091456.0,
+            "85": 2595091456.0,
+            "86": 2595091456.0,
+            "87": 2595091456.0,
+            "88": 2595091456.0,
+            "89": 2595091456.0,
+            "90": 2595091456.0,
+            "91": 2595091456.0,
+            "92": 2595091456.0,
+            "93": 2595091456.0,
+            "94": 2595091456.0,
+            "95": 2595091456.0,
+            "96": 2595091456.0,
+            "97": 2595091456.0,
+            "98": 2595091456.0,
+            "99": 2595091456.0,
+            "100": 2595091456.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 3.7416,
+            "52": 0.17157,
+            "53": 0.14456,
+            "54": 0.14361,
+            "55": 0.14299,
+            "56": 0.14258,
+            "57": 0.14257,
+            "58": 0.14319,
+            "59": 0.14348,
+            "60": 0.1429,
+            "61": 0.14295,
+            "62": 0.1431,
+            "63": 0.1419,
+            "64": 0.14379,
+            "65": 0.59005,
+            "66": 0.15082,
+            "67": 0.14226,
+            "68": 0.14098,
+            "69": 0.14096,
+            "70": 0.1413,
+            "71": 0.14073,
+            "72": 0.14094,
+            "73": 0.14097,
+            "74": 0.14117,
+            "75": 0.14054,
+            "76": 0.14081,
+            "77": 0.14153,
+            "78": 0.59387,
+            "79": 0.14301,
+            "80": 0.14139,
+            "81": 0.14173,
+            "82": 0.1418,
+            "83": 0.14133,
+            "84": 0.14096,
+            "85": 0.14024,
+            "86": 0.14063,
+            "87": 0.14049,
+            "88": 0.14117,
+            "89": 0.14144,
+            "90": 0.14055,
+            "91": 0.14175,
+            "92": 0.14246,
+            "93": 0.14114,
+            "94": 0.14391,
+            "95": 0.14119,
+            "96": 0.14114,
+            "97": 0.14158,
+            "98": 0.1408,
+            "99": 0.14214,
+            "100": 0.14462
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear/golden_values_dev_dgx_gb200.json
new file mode 100644
index 00000000000..4143efc2988
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear/golden_values_dev_dgx_gb200.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.83582,
+            "2": 10.83571,
+            "3": 10.83523,
+            "4": 10.79949,
+            "5": 10.84909,
+            "6": 10.86563,
+            "7": 10.82789,
+            "8": 10.8363,
+            "9": 10.83997,
+            "10": 10.79865,
+            "11": 10.8677,
+            "12": 10.84994,
+            "13": 10.85915,
+            "14": 10.86874,
+            "15": 10.80173,
+            "16": 10.79183,
+            "17": 10.77353,
+            "18": 10.78739,
+            "19": 10.78983,
+            "20": 10.68446,
+            "21": 10.6784,
+            "22": 10.5257,
+            "23": 10.70726,
+            "24": 10.56551,
+            "25": 10.51602,
+            "26": 10.58017,
+            "27": 10.58981,
+            "28": 10.54551,
+            "29": 10.57726,
+            "30": 10.34051,
+            "31": 10.07051,
+            "32": 10.44503,
+            "33": 10.44293,
+            "34": 10.19391,
+            "35": 10.24261,
+            "36": 10.19236,
+            "37": 10.32969,
+            "38": 10.16551,
+            "39": 10.38729,
+            "40": 10.05174,
+            "41": 10.12191,
+            "42": 10.19259,
+            "43": 9.8069,
+            "44": 9.92475,
+            "45": 9.80639,
+            "46": 9.80145,
+            "47": 10.12104,
+            "48": 9.83127,
+            "49": 9.50404,
+            "50": 9.87954,
+            "51": 9.83807,
+            "52": 9.72058,
+            "53": 10.0568,
+            "54": 9.95032,
+            "55": 9.88328,
+            "56": 9.60431,
+            "57": 9.45518,
+            "58": 9.81927,
+            "59": 9.58262,
+            "60": 9.48844,
+            "61": 9.68577,
+            "62": 9.97779,
+            "63": 9.36765,
+            "64": 9.75913,
+            "65": 8.9376,
+            "66": 9.69257,
+            "67": 9.36621,
+            "68": 9.78303,
+            "69": 9.79318,
+            "70": 9.72699,
+            "71": 9.62875,
+            "72": 9.58004,
+            "73": 9.487,
+            "74": 8.92041,
+            "75": 9.41128,
+            "76": 9.07564,
+            "77": 10.05848,
+            "78": 9.72184,
+            "79": 9.3732,
+            "80": 9.40079,
+            "81": 9.4792,
+            "82": 9.69754,
+            "83": 9.31037,
+            "84": 9.41777,
+            "85": 9.61194,
+            "86": 9.07155,
+            "87": 9.59661,
+            "88": 9.74709,
+            "89": 9.59667,
+            "90": 9.82915,
+            "91": 9.33725,
+            "92": 9.3564,
+            "93": 9.08552,
+            "94": 8.82807,
+            "95": 9.52842,
+            "96": 9.52611,
+            "97": 9.30632,
+            "98": 9.66808,
+            "99": 8.89461,
+            "100": 9.40666
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1536.0,
+            "2": 1592.0,
+            "3": 1551.0,
+            "4": 1769.0,
+            "5": 1824.0,
+            "6": 1800.0,
+            "7": 1734.0,
+            "8": 1619.0,
+            "9": 1829.0,
+            "10": 1355.0,
+            "11": 1911.0,
+            "12": 1721.0,
+            "13": 1913.0,
+            "14": 1708.0,
+            "15": 1919.0,
+            "16": 1938.0,
+            "17": 1740.0,
+            "18": 1676.0,
+            "19": 1743.0,
+            "20": 1535.0,
+            "21": 1797.0,
+            "22": 1661.0,
+            "23": 1887.0,
+            "24": 1666.0,
+            "25": 1633.0,
+            "26": 1676.0,
+            "27": 1740.0,
+            "28": 1991.0,
+            "29": 1918.0,
+            "30": 1806.0,
+            "31": 1588.0,
+            "32": 1863.0,
+            "33": 2126.0,
+            "34": 1812.0,
+            "35": 1976.0,
+            "36": 1875.0,
+            "37": 2301.0,
+            "38": 2131.0,
+            "39": 2351.0,
+            "40": 2130.0,
+            "41": 2391.0,
+            "42": 2255.0,
+            "43": 1975.0,
+            "44": 2138.0,
+            "45": 2208.0,
+            "46": 2364.0,
+            "47": 2564.0,
+            "48": 2337.0,
+            "49": 2142.0,
+            "50": 2423.0,
+            "51": 2546.0,
+            "52": 2590.0,
+            "53": 2879.0,
+            "54": 2697.0,
+            "55": 2316.0,
+            "56": 2549.0,
+            "57": 2261.0,
+            "58": 2904.0,
+            "59": 2740.0,
+            "60": 2434.0,
+            "61": 2801.0,
+            "62": 2663.0,
+            "63": 2502.0,
+            "64": 2948.0,
+            "65": 2644.0,
+            "66": 2961.0,
+            "67": 2813.0,
+            "68": 2686.0,
+            "69": 2912.0,
+            "70": 3096.0,
+            "71": 2854.0,
+            "72": 2454.0,
+            "73": 3081.0,
+            "74": 1933.0,
+            "75": 2465.0,
+            "76": 3012.0,
+            "77": 3163.0,
+            "78": 2997.0,
+            "79": 3089.0,
+            "80": 3187.0,
+            "81": 3500.0,
+            "82": 3339.0,
+            "83": 2705.0,
+            "84": 3205.0,
+            "85": 3033.0,
+            "86": 2818.0,
+            "87": 3671.0,
+            "88": 3190.0,
+            "89": 3336.0,
+            "90": 3320.0,
+            "91": 2698.0,
+            "92": 3072.0,
+            "93": 2750.0,
+            "94": 3397.0,
+            "95": 3317.0,
+            "96": 3290.0,
+            "97": 3116.0,
+            "98": 3732.0,
+            "99": 3049.0,
+            "100": 2974.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 745146880.0,
+            "2": 745146880.0,
+            "3": 745146880.0,
+            "4": 745146880.0,
+            "5": 745146880.0,
+            "6": 745146880.0,
+            "7": 745146880.0,
+            "8": 745146880.0,
+            "9": 745146880.0,
+            "10": 745146880.0,
+            "11": 745146880.0,
+            "12": 745146880.0,
+            "13": 745146880.0,
+            "14": 745146880.0,
+            "15": 745146880.0,
+            "16": 745146880.0,
+            "17": 745146880.0,
+            "18": 745146880.0,
+            "19": 745146880.0,
+            "20": 745146880.0,
+            "21": 745146880.0,
+            "22": 745146880.0,
+            "23": 745146880.0,
+            "24": 745146880.0,
+            "25": 745146880.0,
+            "26": 745146880.0,
+            "27": 745146880.0,
+            "28": 745146880.0,
+            "29": 745146880.0,
+            "30": 745146880.0,
+            "31": 745146880.0,
+            "32": 745146880.0,
+            "33": 745146880.0,
+            "34": 745146880.0,
+            "35": 745146880.0,
+            "36": 745146880.0,
+            "37": 745146880.0,
+            "38": 745146880.0,
+            "39": 745146880.0,
+            "40": 745146880.0,
+            "41": 745146880.0,
+            "42": 745146880.0,
+            "43": 745146880.0,
+            "44": 745146880.0,
+            "45": 745146880.0,
+            "46": 745146880.0,
+            "47": 745146880.0,
+            "48": 745146880.0,
+            "49": 745146880.0,
+            "50": 745146880.0,
+            "51": 745146880.0,
+            "52": 745146880.0,
+            "53": 745146880.0,
+            "54": 745146880.0,
+            "55": 745146880.0,
+            "56": 745146880.0,
+            "57": 745146880.0,
+            "58": 745146880.0,
+            "59": 745146880.0,
+            "60": 745146880.0,
+            "61": 745146880.0,
+            "62": 745146880.0,
+            "63": 745146880.0,
+            "64": 745146880.0,
+            "65": 745146880.0,
+            "66": 745146880.0,
+            "67": 745146880.0,
+            "68": 745146880.0,
+            "69": 745146880.0,
+            "70": 745146880.0,
+            "71": 745146880.0,
+            "72": 745146880.0,
+            "73": 745146880.0,
+            "74": 745146880.0,
+            "75": 745146880.0,
+            "76": 745146880.0,
+            "77": 745146880.0,
+            "78": 745146880.0,
+            "79": 745146880.0,
+            "80": 745146880.0,
+            "81": 745146880.0,
+            "82": 745146880.0,
+            "83": 745146880.0,
+            "84": 745146880.0,
+            "85": 745146880.0,
+            "86": 745146880.0,
+            "87": 745146880.0,
+            "88": 745146880.0,
+            "89": 745146880.0,
+            "90": 745146880.0,
+            "91": 745146880.0,
+            "92": 745146880.0,
+            "93": 745146880.0,
+            "94": 745146880.0,
+            "95": 745146880.0,
+            "96": 745146880.0,
+            "97": 745146880.0,
+            "98": 745146880.0,
+            "99": 745146880.0,
+            "100": 745146880.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1939785728.0,
+            "2": 2222434304.0,
+            "3": 2222434304.0,
+            "4": 2222434304.0,
+            "5": 2222434304.0,
+            "6": 2222434304.0,
+            "7": 2222434304.0,
+            "8": 2222434304.0,
+            "9": 2222434304.0,
+            "10": 2222434304.0,
+            "11": 2222434304.0,
+            "12": 2222434304.0,
+            "13": 2222434304.0,
+            "14": 2222434304.0,
+            "15": 2222434304.0,
+            "16": 2222434304.0,
+            "17": 2222434304.0,
+            "18": 2222434304.0,
+            "19": 2222434304.0,
+            "20": 2222434304.0,
+            "21": 2222434304.0,
+            "22": 2222434304.0,
+            "23": 2222434304.0,
+            "24": 2222434304.0,
+            "25": 2222434304.0,
+            "26": 2222434304.0,
+            "27": 2222434304.0,
+            "28": 2222434304.0,
+            "29": 2222434304.0,
+            "30": 2222434304.0,
+            "31": 2222434304.0,
+            "32": 2222434304.0,
+            "33": 2222434304.0,
+            "34": 2222434304.0,
+            "35": 2222434304.0,
+            "36": 2222434304.0,
+            "37": 2222434304.0,
+            "38": 2222434304.0,
+            "39": 2222434304.0,
+            "40": 2222434304.0,
+            "41": 2222434304.0,
+            "42": 2222434304.0,
+            "43": 2222434304.0,
+            "44": 2222434304.0,
+            "45": 2222434304.0,
+            "46": 2222434304.0,
+            "47": 2222434304.0,
+            "48": 2222434304.0,
+            "49": 2222434304.0,
+            "50": 2222434304.0,
+            "51": 2222434304.0,
+            "52": 2222434304.0,
+            "53": 2222434304.0,
+            "54": 2222434304.0,
+            "55": 2222434304.0,
+            "56": 2222434304.0,
+            "57": 2222434304.0,
+            "58": 2222434304.0,
+            "59": 2222434304.0,
+            "60": 2222434304.0,
+            "61": 2222434304.0,
+            "62": 2222434304.0,
+            "63": 2222434304.0,
+            "64": 2222434304.0,
+            "65": 2222434304.0,
+            "66": 2222434304.0,
+            "67": 2222434304.0,
+            "68": 2222434304.0,
+            "69": 2222434304.0,
+            "70": 2222434304.0,
+            "71": 2222434304.0,
+            "72": 2222434304.0,
+            "73": 2222434304.0,
+            "74": 2222434304.0,
+            "75": 2222434304.0,
+            "76": 2222434304.0,
+            "77": 2222434304.0,
+            "78": 2222434304.0,
+            "79": 2222434304.0,
+            "80": 2222434304.0,
+            "81": 2222434304.0,
+            "82": 2222434304.0,
+            "83": 2222434304.0,
+            "84": 2222434304.0,
+            "85": 2222434304.0,
+            "86": 2222434304.0,
+            "87": 2222434304.0,
+            "88": 2222434304.0,
+            "89": 2222434304.0,
+            "90": 2222434304.0,
+            "91": 2222434304.0,
+            "92": 2222434304.0,
+            "93": 2222434304.0,
+            "94": 2222434304.0,
+            "95": 2222434304.0,
+            "96": 2222434304.0,
+            "97": 2222434304.0,
+            "98": 2222434304.0,
+            "99": 2222434304.0,
+            "100": 2222434304.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 5.52284,
+            "2": 0.157,
+            "3": 0.14283,
+            "4": 0.12717,
+            "5": 0.23804,
+            "6": 0.12672,
+            "7": 0.23745,
+            "8": 0.12054,
+            "9": 0.21684,
+            "10": 0.11896,
+            "11": 0.13284,
+            "12": 0.11855,
+            "13": 0.11845,
+            "14": 0.11744,
+            "15": 0.11809,
+            "16": 0.11959,
+            "17": 0.11704,
+            "18": 0.22382,
+            "19": 0.30417,
+            "20": 0.13849,
+            "21": 0.11644,
+            "22": 0.24942,
+            "23": 0.11902,
+            "24": 0.11673,
+            "25": 0.11881,
+            "26": 0.11714,
+            "27": 0.26517,
+            "28": 0.11796,
+            "29": 0.11692,
+            "30": 0.1177,
+            "31": 0.1199,
+            "32": 0.11855,
+            "33": 0.20894,
+            "34": 0.1189,
+            "35": 0.11946,
+            "36": 0.11731,
+            "37": 0.11585,
+            "38": 0.22438,
+            "39": 0.11586,
+            "40": 0.31661,
+            "41": 0.27224,
+            "42": 0.11828,
+            "43": 0.11576,
+            "44": 0.31558,
+            "45": 0.11735,
+            "46": 0.11931,
+            "47": 0.2329,
+            "48": 0.20057,
+            "49": 0.11638,
+            "50": 0.14553,
+            "51": 0.15092,
+            "52": 0.12868,
+            "53": 0.29978,
+            "54": 0.13487,
+            "55": 0.1206,
+            "56": 0.117,
+            "57": 0.117,
+            "58": 0.11712,
+            "59": 0.11789,
+            "60": 0.11693,
+            "61": 0.11525,
+            "62": 0.24109,
+            "63": 0.11906,
+            "64": 0.12054,
+            "65": 0.11805,
+            "66": 0.11831,
+            "67": 0.11744,
+            "68": 0.11454,
+            "69": 0.39474,
+            "70": 0.11683,
+            "71": 0.117,
+            "72": 0.11875,
+            "73": 0.28446,
+            "74": 0.22373,
+            "75": 0.11573,
+            "76": 0.1177,
+            "77": 0.11707,
+            "78": 0.24184,
+            "79": 0.11755,
+            "80": 0.11784,
+            "81": 0.21803,
+            "82": 0.11787,
+            "83": 0.23349,
+            "84": 0.22596,
+            "85": 0.11587,
+            "86": 0.11507,
+            "87": 0.16522,
+            "88": 0.24306,
+            "89": 0.12003,
+            "90": 0.23071,
+            "91": 0.12051,
+            "92": 0.12072,
+            "93": 0.11991,
+            "94": 0.22186,
+            "95": 0.12105,
+            "96": 0.12128,
+            "97": 0.11916,
+            "98": 0.12303,
+            "99": 0.1197,
+            "100": 0.1207
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear/golden_values_dev_dgx_h100.json
index 72743900cff..5b2aa3ce19c 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear/golden_values_dev_dgx_h100.json
@@ -218,106 +218,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 746194432.0,
-            "2": 746194432.0,
-            "3": 746194432.0,
-            "4": 746194432.0,
-            "5": 746194432.0,
-            "6": 746194432.0,
-            "7": 746194432.0,
-            "8": 746194432.0,
-            "9": 746194432.0,
-            "10": 746194432.0,
-            "11": 746194432.0,
-            "12": 746194432.0,
-            "13": 746194432.0,
-            "14": 746194432.0,
-            "15": 746194432.0,
-            "16": 746194432.0,
-            "17": 746194432.0,
-            "18": 746194432.0,
-            "19": 746194432.0,
-            "20": 746194432.0,
-            "21": 746194432.0,
-            "22": 746194432.0,
-            "23": 746194432.0,
-            "24": 746194432.0,
-            "25": 746194432.0,
-            "26": 746194432.0,
-            "27": 746194432.0,
-            "28": 746194432.0,
-            "29": 746194432.0,
-            "30": 746194432.0,
-            "31": 746194432.0,
-            "32": 746194432.0,
-            "33": 746194432.0,
-            "34": 746194432.0,
-            "35": 746194432.0,
-            "36": 746194432.0,
-            "37": 746194432.0,
-            "38": 746194432.0,
-            "39": 746194432.0,
-            "40": 746194432.0,
-            "41": 746194432.0,
-            "42": 746194432.0,
-            "43": 746194432.0,
-            "44": 746194432.0,
-            "45": 746194432.0,
-            "46": 746194432.0,
-            "47": 746194432.0,
-            "48": 746194432.0,
-            "49": 746194432.0,
-            "50": 746194432.0,
-            "51": 746194432.0,
-            "52": 746194432.0,
-            "53": 746194432.0,
-            "54": 746194432.0,
-            "55": 746194432.0,
-            "56": 746194432.0,
-            "57": 746194432.0,
-            "58": 746194432.0,
-            "59": 746194432.0,
-            "60": 746194432.0,
-            "61": 746194432.0,
-            "62": 746194432.0,
-            "63": 746194432.0,
-            "64": 746194432.0,
-            "65": 746194432.0,
-            "66": 746194432.0,
-            "67": 746194432.0,
-            "68": 746194432.0,
-            "69": 746194432.0,
-            "70": 746194432.0,
-            "71": 746194432.0,
-            "72": 746194432.0,
-            "73": 746194432.0,
-            "74": 746194432.0,
-            "75": 746194432.0,
-            "76": 746194432.0,
-            "77": 746194432.0,
-            "78": 746194432.0,
-            "79": 746194432.0,
-            "80": 746194432.0,
-            "81": 746194432.0,
-            "82": 746194432.0,
-            "83": 746194432.0,
-            "84": 746194432.0,
-            "85": 746194432.0,
-            "86": 746194432.0,
-            "87": 746194432.0,
-            "88": 746194432.0,
-            "89": 746194432.0,
-            "90": 746194432.0,
-            "91": 746194432.0,
-            "92": 746194432.0,
-            "93": 746194432.0,
-            "94": 746194432.0,
-            "95": 746194432.0,
-            "96": 746194432.0,
-            "97": 746194432.0,
-            "98": 746194432.0,
-            "99": 746194432.0,
-            "100": 746194432.0
+            "1": 747244032.0,
+            "2": 747244032.0,
+            "3": 747244032.0,
+            "4": 747244032.0,
+            "5": 747244032.0,
+            "6": 747244032.0,
+            "7": 747244032.0,
+            "8": 747244032.0,
+            "9": 747244032.0,
+            "10": 747244032.0,
+            "11": 747244032.0,
+            "12": 747244032.0,
+            "13": 747244032.0,
+            "14": 747244032.0,
+            "15": 747244032.0,
+            "16": 747244032.0,
+            "17": 747244032.0,
+            "18": 747244032.0,
+            "19": 747244032.0,
+            "20": 747244032.0,
+            "21": 747244032.0,
+            "22": 747244032.0,
+            "23": 747244032.0,
+            "24": 747244032.0,
+            "25": 747244032.0,
+            "26": 747244032.0,
+            "27": 747244032.0,
+            "28": 747244032.0,
+            "29": 747244032.0,
+            "30": 747244032.0,
+            "31": 747244032.0,
+            "32": 747244032.0,
+            "33": 747244032.0,
+            "34": 747244032.0,
+            "35": 747244032.0,
+            "36": 747244032.0,
+            "37": 747244032.0,
+            "38": 747244032.0,
+            "39": 747244032.0,
+            "40": 747244032.0,
+            "41": 747244032.0,
+            "42": 747244032.0,
+            "43": 747244032.0,
+            "44": 747244032.0,
+            "45": 747244032.0,
+            "46": 747244032.0,
+            "47": 747244032.0,
+            "48": 747244032.0,
+            "49": 747244032.0,
+            "50": 747244032.0,
+            "51": 747244032.0,
+            "52": 747244032.0,
+            "53": 747244032.0,
+            "54": 747244032.0,
+            "55": 747244032.0,
+            "56": 747244032.0,
+            "57": 747244032.0,
+            "58": 747244032.0,
+            "59": 747244032.0,
+            "60": 747244032.0,
+            "61": 747244032.0,
+            "62": 747244032.0,
+            "63": 747244032.0,
+            "64": 747244032.0,
+            "65": 747244032.0,
+            "66": 747244032.0,
+            "67": 747244032.0,
+            "68": 747244032.0,
+            "69": 747244032.0,
+            "70": 747244032.0,
+            "71": 747244032.0,
+            "72": 747244032.0,
+            "73": 747244032.0,
+            "74": 747244032.0,
+            "75": 747244032.0,
+            "76": 747244032.0,
+            "77": 747244032.0,
+            "78": 747244032.0,
+            "79": 747244032.0,
+            "80": 747244032.0,
+            "81": 747244032.0,
+            "82": 747244032.0,
+            "83": 747244032.0,
+            "84": 747244032.0,
+            "85": 747244032.0,
+            "86": 747244032.0,
+            "87": 747244032.0,
+            "88": 747244032.0,
+            "89": 747244032.0,
+            "90": 747244032.0,
+            "91": 747244032.0,
+            "92": 747244032.0,
+            "93": 747244032.0,
+            "94": 747244032.0,
+            "95": 747244032.0,
+            "96": 747244032.0,
+            "97": 747244032.0,
+            "98": 747244032.0,
+            "99": 747244032.0,
+            "100": 747244032.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -325,106 +325,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 1926153216.0,
-            "2": 2209851392.0,
-            "3": 2209851392.0,
-            "4": 2209851392.0,
-            "5": 2209851392.0,
-            "6": 2209851392.0,
-            "7": 2209851392.0,
-            "8": 2209851392.0,
-            "9": 2209851392.0,
-            "10": 2209851392.0,
-            "11": 2209851392.0,
-            "12": 2209851392.0,
-            "13": 2209851392.0,
-            "14": 2209851392.0,
-            "15": 2209851392.0,
-            "16": 2209851392.0,
-            "17": 2209851392.0,
-            "18": 2209851392.0,
-            "19": 2209851392.0,
-            "20": 2209851392.0,
-            "21": 2209851392.0,
-            "22": 2209851392.0,
-            "23": 2209851392.0,
-            "24": 2209851392.0,
-            "25": 2209851392.0,
-            "26": 2209851392.0,
-            "27": 2209851392.0,
-            "28": 2209851392.0,
-            "29": 2209851392.0,
-            "30": 2209851392.0,
-            "31": 2209851392.0,
-            "32": 2209851392.0,
-            "33": 2209851392.0,
-            "34": 2209851392.0,
-            "35": 2209851392.0,
-            "36": 2209851392.0,
-            "37": 2209851392.0,
-            "38": 2209851392.0,
-            "39": 2209851392.0,
-            "40": 2209851392.0,
-            "41": 2209851392.0,
-            "42": 2209851392.0,
-            "43": 2209851392.0,
-            "44": 2209851392.0,
-            "45": 2209851392.0,
-            "46": 2209851392.0,
-            "47": 2209851392.0,
-            "48": 2209851392.0,
-            "49": 2209851392.0,
-            "50": 2209851392.0,
-            "51": 2209851392.0,
-            "52": 2209851392.0,
-            "53": 2209851392.0,
-            "54": 2209851392.0,
-            "55": 2209851392.0,
-            "56": 2209851392.0,
-            "57": 2209851392.0,
-            "58": 2209851392.0,
-            "59": 2209851392.0,
-            "60": 2209851392.0,
-            "61": 2209851392.0,
-            "62": 2209851392.0,
-            "63": 2209851392.0,
-            "64": 2209851392.0,
-            "65": 2209851392.0,
-            "66": 2209851392.0,
-            "67": 2209851392.0,
-            "68": 2209851392.0,
-            "69": 2209851392.0,
-            "70": 2209851392.0,
-            "71": 2209851392.0,
-            "72": 2209851392.0,
-            "73": 2209851392.0,
-            "74": 2209851392.0,
-            "75": 2209851392.0,
-            "76": 2209851392.0,
-            "77": 2209851392.0,
-            "78": 2209851392.0,
-            "79": 2209851392.0,
-            "80": 2209851392.0,
-            "81": 2209851392.0,
-            "82": 2209851392.0,
-            "83": 2209851392.0,
-            "84": 2209851392.0,
-            "85": 2209851392.0,
-            "86": 2209851392.0,
-            "87": 2209851392.0,
-            "88": 2209851392.0,
-            "89": 2209851392.0,
-            "90": 2209851392.0,
-            "91": 2209851392.0,
-            "92": 2209851392.0,
-            "93": 2209851392.0,
-            "94": 2209851392.0,
-            "95": 2209851392.0,
-            "96": 2209851392.0,
-            "97": 2209851392.0,
-            "98": 2209851392.0,
-            "99": 2209851392.0,
-            "100": 2209851392.0
+            "1": 1927202816.0,
+            "2": 2211948544.0,
+            "3": 2211948544.0,
+            "4": 2211948544.0,
+            "5": 2211948544.0,
+            "6": 2211948544.0,
+            "7": 2211948544.0,
+            "8": 2211948544.0,
+            "9": 2211948544.0,
+            "10": 2211948544.0,
+            "11": 2211948544.0,
+            "12": 2211948544.0,
+            "13": 2211948544.0,
+            "14": 2211948544.0,
+            "15": 2211948544.0,
+            "16": 2211948544.0,
+            "17": 2211948544.0,
+            "18": 2211948544.0,
+            "19": 2211948544.0,
+            "20": 2211948544.0,
+            "21": 2211948544.0,
+            "22": 2211948544.0,
+            "23": 2211948544.0,
+            "24": 2211948544.0,
+            "25": 2211948544.0,
+            "26": 2211948544.0,
+            "27": 2211948544.0,
+            "28": 2211948544.0,
+            "29": 2211948544.0,
+            "30": 2211948544.0,
+            "31": 2211948544.0,
+            "32": 2211948544.0,
+            "33": 2211948544.0,
+            "34": 2211948544.0,
+            "35": 2211948544.0,
+            "36": 2211948544.0,
+            "37": 2211948544.0,
+            "38": 2211948544.0,
+            "39": 2211948544.0,
+            "40": 2211948544.0,
+            "41": 2211948544.0,
+            "42": 2211948544.0,
+            "43": 2211948544.0,
+            "44": 2211948544.0,
+            "45": 2211948544.0,
+            "46": 2211948544.0,
+            "47": 2211948544.0,
+            "48": 2211948544.0,
+            "49": 2211948544.0,
+            "50": 2211948544.0,
+            "51": 2211948544.0,
+            "52": 2211948544.0,
+            "53": 2211948544.0,
+            "54": 2211948544.0,
+            "55": 2211948544.0,
+            "56": 2211948544.0,
+            "57": 2211948544.0,
+            "58": 2211948544.0,
+            "59": 2211948544.0,
+            "60": 2211948544.0,
+            "61": 2211948544.0,
+            "62": 2211948544.0,
+            "63": 2211948544.0,
+            "64": 2211948544.0,
+            "65": 2211948544.0,
+            "66": 2211948544.0,
+            "67": 2211948544.0,
+            "68": 2211948544.0,
+            "69": 2211948544.0,
+            "70": 2211948544.0,
+            "71": 2211948544.0,
+            "72": 2211948544.0,
+            "73": 2211948544.0,
+            "74": 2211948544.0,
+            "75": 2211948544.0,
+            "76": 2211948544.0,
+            "77": 2211948544.0,
+            "78": 2211948544.0,
+            "79": 2211948544.0,
+            "80": 2211948544.0,
+            "81": 2211948544.0,
+            "82": 2211948544.0,
+            "83": 2211948544.0,
+            "84": 2211948544.0,
+            "85": 2211948544.0,
+            "86": 2211948544.0,
+            "87": 2211948544.0,
+            "88": 2211948544.0,
+            "89": 2211948544.0,
+            "90": 2211948544.0,
+            "91": 2211948544.0,
+            "92": 2211948544.0,
+            "93": 2211948544.0,
+            "94": 2211948544.0,
+            "95": 2211948544.0,
+            "96": 2211948544.0,
+            "97": 2211948544.0,
+            "98": 2211948544.0,
+            "99": 2211948544.0,
+            "100": 2211948544.0
         }
     },
     "iteration-time": {
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 12.71973,
-            "2": 0.14026,
-            "3": 0.11862,
-            "4": 0.10675,
-            "5": 0.10706,
-            "6": 0.10639,
-            "7": 0.10733,
-            "8": 0.10668,
-            "9": 0.10876,
-            "10": 0.10818,
-            "11": 0.10917,
-            "12": 0.1083,
-            "13": 0.10781,
-            "14": 0.10774,
-            "15": 0.10649,
-            "16": 0.10734,
-            "17": 0.10691,
-            "18": 0.10561,
-            "19": 0.10658,
-            "20": 0.10698,
-            "21": 0.10786,
-            "22": 0.10799,
-            "23": 0.10759,
-            "24": 0.10883,
-            "25": 0.10795,
-            "26": 0.10754,
-            "27": 0.10823,
-            "28": 0.10763,
-            "29": 0.10845,
-            "30": 0.10831,
-            "31": 0.10745,
-            "32": 0.10718,
-            "33": 0.10787,
-            "34": 0.10797,
-            "35": 0.1082,
-            "36": 0.10752,
-            "37": 0.10829,
-            "38": 0.10875,
-            "39": 0.10866,
-            "40": 0.1088,
-            "41": 0.10879,
-            "42": 0.10749,
-            "43": 0.10899,
-            "44": 0.10725,
-            "45": 0.10697,
-            "46": 0.10761,
-            "47": 0.10683,
-            "48": 0.10976,
-            "49": 0.10965,
-            "50": 0.10766,
-            "51": 0.123,
-            "52": 0.11396,
-            "53": 0.10816,
-            "54": 0.10864,
-            "55": 0.12449,
-            "56": 0.1076,
-            "57": 0.10895,
-            "58": 0.10793,
-            "59": 0.10902,
-            "60": 0.10551,
-            "61": 0.10575,
-            "62": 0.10761,
-            "63": 0.10614,
-            "64": 0.10584,
-            "65": 0.10699,
-            "66": 0.1077,
-            "67": 0.10786,
-            "68": 0.10744,
-            "69": 0.10671,
-            "70": 0.10786,
-            "71": 0.10765,
-            "72": 0.10586,
-            "73": 0.10669,
-            "74": 0.10611,
-            "75": 0.10692,
-            "76": 0.10782,
-            "77": 0.10601,
-            "78": 0.10616,
-            "79": 0.10555,
-            "80": 0.10728,
-            "81": 0.10656,
-            "82": 0.10848,
-            "83": 0.10786,
-            "84": 0.10935,
-            "85": 0.11246,
-            "86": 0.11271,
-            "87": 0.10885,
-            "88": 0.10616,
-            "89": 0.10731,
-            "90": 0.10705,
-            "91": 0.10547,
-            "92": 0.10622,
-            "93": 0.10619,
-            "94": 0.10678,
-            "95": 0.10769,
-            "96": 0.10574,
-            "97": 0.10691,
-            "98": 0.10682,
-            "99": 0.10685,
-            "100": 0.10542
+            "1": 8.42141,
+            "2": 0.12821,
+            "3": 0.10969,
+            "4": 0.08528,
+            "5": 0.08609,
+            "6": 0.08514,
+            "7": 0.08511,
+            "8": 0.08614,
+            "9": 0.0853,
+            "10": 0.08556,
+            "11": 0.08506,
+            "12": 0.08648,
+            "13": 0.08513,
+            "14": 0.08524,
+            "15": 0.08502,
+            "16": 0.08679,
+            "17": 0.08617,
+            "18": 0.08799,
+            "19": 0.08587,
+            "20": 0.08552,
+            "21": 0.08665,
+            "22": 0.08551,
+            "23": 0.08517,
+            "24": 0.08535,
+            "25": 0.08579,
+            "26": 0.08526,
+            "27": 0.08602,
+            "28": 0.08519,
+            "29": 0.08544,
+            "30": 0.08512,
+            "31": 0.0856,
+            "32": 0.08591,
+            "33": 0.08561,
+            "34": 0.08518,
+            "35": 0.08492,
+            "36": 0.08517,
+            "37": 0.08548,
+            "38": 0.08494,
+            "39": 0.08594,
+            "40": 0.08522,
+            "41": 0.08599,
+            "42": 0.0854,
+            "43": 0.08536,
+            "44": 0.0855,
+            "45": 0.08648,
+            "46": 0.088,
+            "47": 0.08639,
+            "48": 0.08682,
+            "49": 0.08646,
+            "50": 0.08529,
+            "51": 0.09801,
+            "52": 0.08949,
+            "53": 0.08726,
+            "54": 0.08702,
+            "55": 0.08687,
+            "56": 0.08692,
+            "57": 0.08726,
+            "58": 0.0871,
+            "59": 0.08762,
+            "60": 0.08729,
+            "61": 0.08712,
+            "62": 0.0868,
+            "63": 0.08725,
+            "64": 0.08676,
+            "65": 0.08718,
+            "66": 0.08682,
+            "67": 0.08754,
+            "68": 0.08695,
+            "69": 0.08788,
+            "70": 0.08724,
+            "71": 0.08705,
+            "72": 0.08759,
+            "73": 0.08826,
+            "74": 0.0871,
+            "75": 0.08684,
+            "76": 0.08689,
+            "77": 0.08656,
+            "78": 0.08667,
+            "79": 0.08705,
+            "80": 0.08727,
+            "81": 0.0879,
+            "82": 0.08956,
+            "83": 0.08661,
+            "84": 0.08671,
+            "85": 0.08761,
+            "86": 0.08652,
+            "87": 0.08663,
+            "88": 0.08663,
+            "89": 0.08687,
+            "90": 0.08718,
+            "91": 0.0868,
+            "92": 0.08665,
+            "93": 0.08695,
+            "94": 0.08685,
+            "95": 0.08671,
+            "96": 0.08669,
+            "97": 0.08742,
+            "98": 0.08628,
+            "99": 0.08628,
+            "100": 0.08651
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear/golden_values_dev_dgx_h100_2nd.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear/golden_values_dev_dgx_h100_2nd.json
new file mode 100644
index 00000000000..17f5f4ed8eb
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear/golden_values_dev_dgx_h100_2nd.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 9.85075,
+            "52": 9.73181,
+            "53": 10.06388,
+            "54": 9.95432,
+            "55": 9.87204,
+            "56": 9.61823,
+            "57": 9.47467,
+            "58": 9.82802,
+            "59": 9.57962,
+            "60": 9.49074,
+            "61": 9.68473,
+            "62": 9.99245,
+            "63": 9.38364,
+            "64": 9.77766,
+            "65": 8.94008,
+            "66": 9.70099,
+            "67": 9.3605,
+            "68": 9.77766,
+            "69": 9.78865,
+            "70": 9.73813,
+            "71": 9.61811,
+            "72": 9.58068,
+            "73": 9.4964,
+            "74": 8.93812,
+            "75": 9.42081,
+            "76": 9.07416,
+            "77": 10.06077,
+            "78": 9.71952,
+            "79": 9.37088,
+            "80": 9.39874,
+            "81": 9.47802,
+            "82": 9.69299,
+            "83": 9.30276,
+            "84": 9.41548,
+            "85": 9.60883,
+            "86": 9.07461,
+            "87": 9.58826,
+            "88": 9.74392,
+            "89": 9.5951,
+            "90": 9.81217,
+            "91": 9.33796,
+            "92": 9.3534,
+            "93": 9.07315,
+            "94": 8.83127,
+            "95": 9.51524,
+            "96": 9.52183,
+            "97": 9.31012,
+            "98": 9.66532,
+            "99": 8.88179,
+            "100": 9.39375
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 2509.0,
+            "52": 2528.0,
+            "53": 2851.0,
+            "54": 2747.0,
+            "55": 2333.0,
+            "56": 2724.0,
+            "57": 2315.0,
+            "58": 2754.0,
+            "59": 2774.0,
+            "60": 2336.0,
+            "61": 2912.0,
+            "62": 2415.0,
+            "63": 2341.0,
+            "64": 2837.0,
+            "65": 2661.0,
+            "66": 3000.0,
+            "67": 2779.0,
+            "68": 2691.0,
+            "69": 2793.0,
+            "70": 3183.0,
+            "71": 2962.0,
+            "72": 2393.0,
+            "73": 2997.0,
+            "74": 1935.0,
+            "75": 2463.0,
+            "76": 3065.0,
+            "77": 3184.0,
+            "78": 3154.0,
+            "79": 3127.0,
+            "80": 3286.0,
+            "81": 3386.0,
+            "82": 3128.0,
+            "83": 2608.0,
+            "84": 3079.0,
+            "85": 3260.0,
+            "86": 2687.0,
+            "87": 3591.0,
+            "88": 3035.0,
+            "89": 3165.0,
+            "90": 3166.0,
+            "91": 2690.0,
+            "92": 2897.0,
+            "93": 2630.0,
+            "94": 3348.0,
+            "95": 3349.0,
+            "96": 3288.0,
+            "97": 3055.0,
+            "98": 3516.0,
+            "99": 3035.0,
+            "100": 3109.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 746195456.0,
+            "52": 746195456.0,
+            "53": 746195456.0,
+            "54": 746195456.0,
+            "55": 746195456.0,
+            "56": 746195456.0,
+            "57": 746195456.0,
+            "58": 746195456.0,
+            "59": 746195456.0,
+            "60": 746195456.0,
+            "61": 746195456.0,
+            "62": 746195456.0,
+            "63": 746195456.0,
+            "64": 746195456.0,
+            "65": 746195456.0,
+            "66": 746195456.0,
+            "67": 746195456.0,
+            "68": 746195456.0,
+            "69": 746195456.0,
+            "70": 746195456.0,
+            "71": 746195456.0,
+            "72": 746195456.0,
+            "73": 746195456.0,
+            "74": 746195456.0,
+            "75": 746195456.0,
+            "76": 746195456.0,
+            "77": 746195456.0,
+            "78": 746195456.0,
+            "79": 746195456.0,
+            "80": 746195456.0,
+            "81": 746195456.0,
+            "82": 746195456.0,
+            "83": 746195456.0,
+            "84": 746195456.0,
+            "85": 746195456.0,
+            "86": 746195456.0,
+            "87": 746195456.0,
+            "88": 746195456.0,
+            "89": 746195456.0,
+            "90": 746195456.0,
+            "91": 746195456.0,
+            "92": 746195456.0,
+            "93": 746195456.0,
+            "94": 746195456.0,
+            "95": 746195456.0,
+            "96": 746195456.0,
+            "97": 746195456.0,
+            "98": 746195456.0,
+            "99": 746195456.0,
+            "100": 746195456.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 2210899968.0,
+            "52": 2210900992.0,
+            "53": 2210900992.0,
+            "54": 2210900992.0,
+            "55": 2210900992.0,
+            "56": 2210900992.0,
+            "57": 2210900992.0,
+            "58": 2210900992.0,
+            "59": 2210900992.0,
+            "60": 2210900992.0,
+            "61": 2210900992.0,
+            "62": 2210900992.0,
+            "63": 2210900992.0,
+            "64": 2210900992.0,
+            "65": 2210900992.0,
+            "66": 2210900992.0,
+            "67": 2210900992.0,
+            "68": 2210900992.0,
+            "69": 2210900992.0,
+            "70": 2210900992.0,
+            "71": 2210900992.0,
+            "72": 2210900992.0,
+            "73": 2210900992.0,
+            "74": 2210900992.0,
+            "75": 2210900992.0,
+            "76": 2210900992.0,
+            "77": 2210900992.0,
+            "78": 2210900992.0,
+            "79": 2210900992.0,
+            "80": 2210900992.0,
+            "81": 2210900992.0,
+            "82": 2210900992.0,
+            "83": 2210900992.0,
+            "84": 2210900992.0,
+            "85": 2210900992.0,
+            "86": 2210900992.0,
+            "87": 2210900992.0,
+            "88": 2210900992.0,
+            "89": 2210900992.0,
+            "90": 2210900992.0,
+            "91": 2210900992.0,
+            "92": 2210900992.0,
+            "93": 2210900992.0,
+            "94": 2210900992.0,
+            "95": 2210900992.0,
+            "96": 2210900992.0,
+            "97": 2210900992.0,
+            "98": 2210900992.0,
+            "99": 2210900992.0,
+            "100": 2210900992.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 8.15802,
+            "52": 0.13009,
+            "53": 0.08915,
+            "54": 0.089,
+            "55": 0.08861,
+            "56": 0.08871,
+            "57": 0.08895,
+            "58": 0.08939,
+            "59": 0.08862,
+            "60": 0.08875,
+            "61": 0.08835,
+            "62": 0.09029,
+            "63": 0.09034,
+            "64": 0.08922,
+            "65": 0.08953,
+            "66": 0.09166,
+            "67": 0.08868,
+            "68": 0.08954,
+            "69": 0.08916,
+            "70": 0.08982,
+            "71": 0.08837,
+            "72": 0.0903,
+            "73": 0.08971,
+            "74": 0.09129,
+            "75": 0.09221,
+            "76": 0.08837,
+            "77": 0.0912,
+            "78": 0.08894,
+            "79": 0.08857,
+            "80": 0.089,
+            "81": 0.0893,
+            "82": 0.08924,
+            "83": 0.08842,
+            "84": 0.08918,
+            "85": 0.08897,
+            "86": 0.08832,
+            "87": 0.08827,
+            "88": 0.08998,
+            "89": 0.08959,
+            "90": 0.08882,
+            "91": 0.08911,
+            "92": 0.08926,
+            "93": 0.08845,
+            "94": 0.08884,
+            "95": 0.08981,
+            "96": 0.08858,
+            "97": 0.09088,
+            "98": 0.09007,
+            "99": 0.08931,
+            "100": 0.09003
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_dev_dgx_gb200.json
new file mode 100644
index 00000000000..29e5fc62d41
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_dev_dgx_gb200.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.83582,
+            "2": 10.83571,
+            "3": 10.83523,
+            "4": 10.79949,
+            "5": 10.84909,
+            "6": 10.86563,
+            "7": 10.82789,
+            "8": 10.8363,
+            "9": 10.83997,
+            "10": 10.79865,
+            "11": 10.8677,
+            "12": 10.84994,
+            "13": 10.85915,
+            "14": 10.86874,
+            "15": 10.80173,
+            "16": 10.79183,
+            "17": 10.77353,
+            "18": 10.78739,
+            "19": 10.78983,
+            "20": 10.68446,
+            "21": 10.6784,
+            "22": 10.5257,
+            "23": 10.70726,
+            "24": 10.56551,
+            "25": 10.51602,
+            "26": 10.58017,
+            "27": 10.58981,
+            "28": 10.54551,
+            "29": 10.57726,
+            "30": 10.34051,
+            "31": 10.07051,
+            "32": 10.44503,
+            "33": 10.44293,
+            "34": 10.19391,
+            "35": 10.24261,
+            "36": 10.19236,
+            "37": 10.32969,
+            "38": 10.16551,
+            "39": 10.38729,
+            "40": 10.05174,
+            "41": 10.12191,
+            "42": 10.19259,
+            "43": 9.8069,
+            "44": 9.92475,
+            "45": 9.80639,
+            "46": 9.80145,
+            "47": 10.12104,
+            "48": 9.83127,
+            "49": 9.50404,
+            "50": 9.87954,
+            "51": 9.83807,
+            "52": 9.72058,
+            "53": 10.0568,
+            "54": 9.95032,
+            "55": 9.88328,
+            "56": 9.60431,
+            "57": 9.45518,
+            "58": 9.81927,
+            "59": 9.58262,
+            "60": 9.48844,
+            "61": 9.68577,
+            "62": 9.97779,
+            "63": 9.36765,
+            "64": 9.75913,
+            "65": 8.9376,
+            "66": 9.69257,
+            "67": 9.36621,
+            "68": 9.78303,
+            "69": 9.79318,
+            "70": 9.72699,
+            "71": 9.62875,
+            "72": 9.58004,
+            "73": 9.487,
+            "74": 8.92041,
+            "75": 9.41128,
+            "76": 9.07564,
+            "77": 10.05848,
+            "78": 9.72184,
+            "79": 9.3732,
+            "80": 9.40079,
+            "81": 9.4792,
+            "82": 9.69754,
+            "83": 9.31037,
+            "84": 9.41777,
+            "85": 9.61194,
+            "86": 9.07155,
+            "87": 9.59661,
+            "88": 9.74709,
+            "89": 9.59667,
+            "90": 9.82915,
+            "91": 9.33725,
+            "92": 9.3564,
+            "93": 9.08552,
+            "94": 8.82807,
+            "95": 9.52842,
+            "96": 9.52611,
+            "97": 9.30632,
+            "98": 9.66808,
+            "99": 8.89461,
+            "100": 9.40666
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1536.0,
+            "2": 1592.0,
+            "3": 1551.0,
+            "4": 1769.0,
+            "5": 1824.0,
+            "6": 1800.0,
+            "7": 1734.0,
+            "8": 1619.0,
+            "9": 1829.0,
+            "10": 1355.0,
+            "11": 1911.0,
+            "12": 1721.0,
+            "13": 1913.0,
+            "14": 1708.0,
+            "15": 1919.0,
+            "16": 1938.0,
+            "17": 1740.0,
+            "18": 1676.0,
+            "19": 1743.0,
+            "20": 1535.0,
+            "21": 1797.0,
+            "22": 1661.0,
+            "23": 1887.0,
+            "24": 1666.0,
+            "25": 1633.0,
+            "26": 1676.0,
+            "27": 1740.0,
+            "28": 1991.0,
+            "29": 1918.0,
+            "30": 1806.0,
+            "31": 1588.0,
+            "32": 1863.0,
+            "33": 2126.0,
+            "34": 1812.0,
+            "35": 1976.0,
+            "36": 1875.0,
+            "37": 2301.0,
+            "38": 2131.0,
+            "39": 2351.0,
+            "40": 2130.0,
+            "41": 2391.0,
+            "42": 2255.0,
+            "43": 1975.0,
+            "44": 2138.0,
+            "45": 2208.0,
+            "46": 2364.0,
+            "47": 2564.0,
+            "48": 2337.0,
+            "49": 2142.0,
+            "50": 2423.0,
+            "51": 2546.0,
+            "52": 2590.0,
+            "53": 2879.0,
+            "54": 2697.0,
+            "55": 2316.0,
+            "56": 2549.0,
+            "57": 2261.0,
+            "58": 2904.0,
+            "59": 2740.0,
+            "60": 2434.0,
+            "61": 2801.0,
+            "62": 2663.0,
+            "63": 2502.0,
+            "64": 2948.0,
+            "65": 2644.0,
+            "66": 2961.0,
+            "67": 2813.0,
+            "68": 2686.0,
+            "69": 2912.0,
+            "70": 3096.0,
+            "71": 2854.0,
+            "72": 2454.0,
+            "73": 3081.0,
+            "74": 1933.0,
+            "75": 2465.0,
+            "76": 3012.0,
+            "77": 3163.0,
+            "78": 2997.0,
+            "79": 3089.0,
+            "80": 3187.0,
+            "81": 3500.0,
+            "82": 3339.0,
+            "83": 2705.0,
+            "84": 3205.0,
+            "85": 3033.0,
+            "86": 2818.0,
+            "87": 3671.0,
+            "88": 3190.0,
+            "89": 3336.0,
+            "90": 3320.0,
+            "91": 2698.0,
+            "92": 3072.0,
+            "93": 2750.0,
+            "94": 3397.0,
+            "95": 3317.0,
+            "96": 3290.0,
+            "97": 3116.0,
+            "98": 3732.0,
+            "99": 3049.0,
+            "100": 2974.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 745146880.0,
+            "2": 745146880.0,
+            "3": 745146880.0,
+            "4": 745146880.0,
+            "5": 745146880.0,
+            "6": 745146880.0,
+            "7": 745146880.0,
+            "8": 745146880.0,
+            "9": 745146880.0,
+            "10": 745146880.0,
+            "11": 745146880.0,
+            "12": 745146880.0,
+            "13": 745146880.0,
+            "14": 745146880.0,
+            "15": 745146880.0,
+            "16": 745146880.0,
+            "17": 745146880.0,
+            "18": 745146880.0,
+            "19": 745146880.0,
+            "20": 745146880.0,
+            "21": 745146880.0,
+            "22": 745146880.0,
+            "23": 745146880.0,
+            "24": 745146880.0,
+            "25": 745146880.0,
+            "26": 745146880.0,
+            "27": 745146880.0,
+            "28": 745146880.0,
+            "29": 745146880.0,
+            "30": 745146880.0,
+            "31": 745146880.0,
+            "32": 745146880.0,
+            "33": 745146880.0,
+            "34": 745146880.0,
+            "35": 745146880.0,
+            "36": 745146880.0,
+            "37": 745146880.0,
+            "38": 745146880.0,
+            "39": 745146880.0,
+            "40": 745146880.0,
+            "41": 745146880.0,
+            "42": 745146880.0,
+            "43": 745146880.0,
+            "44": 745146880.0,
+            "45": 745146880.0,
+            "46": 745146880.0,
+            "47": 745146880.0,
+            "48": 745146880.0,
+            "49": 745146880.0,
+            "50": 745146880.0,
+            "51": 745146880.0,
+            "52": 745146880.0,
+            "53": 745146880.0,
+            "54": 745146880.0,
+            "55": 745146880.0,
+            "56": 745146880.0,
+            "57": 745146880.0,
+            "58": 745146880.0,
+            "59": 745146880.0,
+            "60": 745146880.0,
+            "61": 745146880.0,
+            "62": 745146880.0,
+            "63": 745146880.0,
+            "64": 745146880.0,
+            "65": 745146880.0,
+            "66": 745146880.0,
+            "67": 745146880.0,
+            "68": 745146880.0,
+            "69": 745146880.0,
+            "70": 745146880.0,
+            "71": 745146880.0,
+            "72": 745146880.0,
+            "73": 745146880.0,
+            "74": 745146880.0,
+            "75": 745146880.0,
+            "76": 745146880.0,
+            "77": 745146880.0,
+            "78": 745146880.0,
+            "79": 745146880.0,
+            "80": 745146880.0,
+            "81": 745146880.0,
+            "82": 745146880.0,
+            "83": 745146880.0,
+            "84": 745146880.0,
+            "85": 745146880.0,
+            "86": 745146880.0,
+            "87": 745146880.0,
+            "88": 745146880.0,
+            "89": 745146880.0,
+            "90": 745146880.0,
+            "91": 745146880.0,
+            "92": 745146880.0,
+            "93": 745146880.0,
+            "94": 745146880.0,
+            "95": 745146880.0,
+            "96": 745146880.0,
+            "97": 745146880.0,
+            "98": 745146880.0,
+            "99": 745146880.0,
+            "100": 745146880.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1939785728.0,
+            "2": 2222434304.0,
+            "3": 2222434304.0,
+            "4": 2222434304.0,
+            "5": 2222434304.0,
+            "6": 2222434304.0,
+            "7": 2222434304.0,
+            "8": 2222434304.0,
+            "9": 2222434304.0,
+            "10": 2222434304.0,
+            "11": 2222434304.0,
+            "12": 2222434304.0,
+            "13": 2222434304.0,
+            "14": 2222434304.0,
+            "15": 2222434304.0,
+            "16": 2222434304.0,
+            "17": 2222434304.0,
+            "18": 2222434304.0,
+            "19": 2222434304.0,
+            "20": 2222434304.0,
+            "21": 2222434304.0,
+            "22": 2222434304.0,
+            "23": 2222434304.0,
+            "24": 2222434304.0,
+            "25": 2222434304.0,
+            "26": 2222434304.0,
+            "27": 2222434304.0,
+            "28": 2222434304.0,
+            "29": 2222434304.0,
+            "30": 2222434304.0,
+            "31": 2222434304.0,
+            "32": 2222434304.0,
+            "33": 2222434304.0,
+            "34": 2222434304.0,
+            "35": 2222434304.0,
+            "36": 2222434304.0,
+            "37": 2222434304.0,
+            "38": 2222434304.0,
+            "39": 2222434304.0,
+            "40": 2222434304.0,
+            "41": 2222434304.0,
+            "42": 2222434304.0,
+            "43": 2222434304.0,
+            "44": 2222434304.0,
+            "45": 2222434304.0,
+            "46": 2222434304.0,
+            "47": 2222434304.0,
+            "48": 2222434304.0,
+            "49": 2222434304.0,
+            "50": 2222434304.0,
+            "51": 2222434304.0,
+            "52": 2222434304.0,
+            "53": 2222434304.0,
+            "54": 2222434304.0,
+            "55": 2222434304.0,
+            "56": 2222434304.0,
+            "57": 2222434304.0,
+            "58": 2222434304.0,
+            "59": 2222434304.0,
+            "60": 2222434304.0,
+            "61": 2222434304.0,
+            "62": 2222434304.0,
+            "63": 2222434304.0,
+            "64": 2222434304.0,
+            "65": 2222434304.0,
+            "66": 2222434304.0,
+            "67": 2222434304.0,
+            "68": 2222434304.0,
+            "69": 2222434304.0,
+            "70": 2222434304.0,
+            "71": 2222434304.0,
+            "72": 2222434304.0,
+            "73": 2222434304.0,
+            "74": 2222434304.0,
+            "75": 2222434304.0,
+            "76": 2222434304.0,
+            "77": 2222434304.0,
+            "78": 2222434304.0,
+            "79": 2222434304.0,
+            "80": 2222434304.0,
+            "81": 2222434304.0,
+            "82": 2222434304.0,
+            "83": 2222434304.0,
+            "84": 2222434304.0,
+            "85": 2222434304.0,
+            "86": 2222434304.0,
+            "87": 2222434304.0,
+            "88": 2222434304.0,
+            "89": 2222434304.0,
+            "90": 2222434304.0,
+            "91": 2222434304.0,
+            "92": 2222434304.0,
+            "93": 2222434304.0,
+            "94": 2222434304.0,
+            "95": 2222434304.0,
+            "96": 2222434304.0,
+            "97": 2222434304.0,
+            "98": 2222434304.0,
+            "99": 2222434304.0,
+            "100": 2222434304.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 5.46714,
+            "2": 0.15122,
+            "3": 0.25092,
+            "4": 0.27729,
+            "5": 0.24367,
+            "6": 0.26751,
+            "7": 0.2545,
+            "8": 0.12105,
+            "9": 0.24325,
+            "10": 0.12174,
+            "11": 0.12002,
+            "12": 0.12125,
+            "13": 0.25244,
+            "14": 0.11939,
+            "15": 0.24088,
+            "16": 0.11887,
+            "17": 0.44496,
+            "18": 0.16003,
+            "19": 0.25508,
+            "20": 0.21211,
+            "21": 0.3079,
+            "22": 0.12004,
+            "23": 0.43335,
+            "24": 0.12,
+            "25": 0.12101,
+            "26": 0.12096,
+            "27": 0.29192,
+            "28": 0.19864,
+            "29": 0.26692,
+            "30": 0.11884,
+            "31": 0.12045,
+            "32": 0.12079,
+            "33": 0.12032,
+            "34": 0.12022,
+            "35": 0.21894,
+            "36": 0.11918,
+            "37": 0.22006,
+            "38": 0.34871,
+            "39": 0.12088,
+            "40": 0.12089,
+            "41": 0.12159,
+            "42": 0.18229,
+            "43": 0.16394,
+            "44": 0.11984,
+            "45": 0.12064,
+            "46": 0.12128,
+            "47": 0.17743,
+            "48": 0.1593,
+            "49": 0.12034,
+            "50": 0.11831,
+            "51": 0.13446,
+            "52": 0.12243,
+            "53": 0.11866,
+            "54": 0.11939,
+            "55": 0.20902,
+            "56": 0.13705,
+            "57": 0.11709,
+            "58": 0.11749,
+            "59": 0.11871,
+            "60": 0.22163,
+            "61": 0.11825,
+            "62": 0.22086,
+            "63": 0.11702,
+            "64": 0.11919,
+            "65": 0.12009,
+            "66": 0.19788,
+            "67": 0.42941,
+            "68": 0.11868,
+            "69": 0.22718,
+            "70": 0.20618,
+            "71": 0.13003,
+            "72": 0.134,
+            "73": 0.13466,
+            "74": 0.14293,
+            "75": 0.22299,
+            "76": 0.12996,
+            "77": 0.13433,
+            "78": 0.13652,
+            "79": 0.1285,
+            "80": 0.13881,
+            "81": 0.13014,
+            "82": 0.12942,
+            "83": 0.22639,
+            "84": 0.1185,
+            "85": 0.22799,
+            "86": 0.23089,
+            "87": 0.11774,
+            "88": 0.22926,
+            "89": 0.12055,
+            "90": 0.11828,
+            "91": 0.25019,
+            "92": 0.11977,
+            "93": 0.1173,
+            "94": 0.11879,
+            "95": 0.1161,
+            "96": 0.34968,
+            "97": 0.11818,
+            "98": 0.21965,
+            "99": 0.12107,
+            "100": 0.11838
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_dev_dgx_h100.json
index 50639a30816..4fffaabca8a 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_dev_dgx_h100.json
@@ -218,106 +218,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 746194432.0,
-            "2": 746194432.0,
-            "3": 746194432.0,
-            "4": 746194432.0,
-            "5": 746194432.0,
-            "6": 746194432.0,
-            "7": 746194432.0,
-            "8": 746194432.0,
-            "9": 746194432.0,
-            "10": 746194432.0,
-            "11": 746194432.0,
-            "12": 746194432.0,
-            "13": 746194432.0,
-            "14": 746194432.0,
-            "15": 746194432.0,
-            "16": 746194432.0,
-            "17": 746194432.0,
-            "18": 746194432.0,
-            "19": 746194432.0,
-            "20": 746194432.0,
-            "21": 746194432.0,
-            "22": 746194432.0,
-            "23": 746194432.0,
-            "24": 746194432.0,
-            "25": 746194432.0,
-            "26": 746194432.0,
-            "27": 746194432.0,
-            "28": 746194432.0,
-            "29": 746194432.0,
-            "30": 746194432.0,
-            "31": 746194432.0,
-            "32": 746194432.0,
-            "33": 746194432.0,
-            "34": 746194432.0,
-            "35": 746194432.0,
-            "36": 746194432.0,
-            "37": 746194432.0,
-            "38": 746194432.0,
-            "39": 746194432.0,
-            "40": 746194432.0,
-            "41": 746194432.0,
-            "42": 746194432.0,
-            "43": 746194432.0,
-            "44": 746194432.0,
-            "45": 746194432.0,
-            "46": 746194432.0,
-            "47": 746194432.0,
-            "48": 746194432.0,
-            "49": 746194432.0,
-            "50": 746194432.0,
-            "51": 746194432.0,
-            "52": 746194432.0,
-            "53": 746194432.0,
-            "54": 746194432.0,
-            "55": 746194432.0,
-            "56": 746194432.0,
-            "57": 746194432.0,
-            "58": 746194432.0,
-            "59": 746194432.0,
-            "60": 746194432.0,
-            "61": 746194432.0,
-            "62": 746194432.0,
-            "63": 746194432.0,
-            "64": 746194432.0,
-            "65": 746194432.0,
-            "66": 746194432.0,
-            "67": 746194432.0,
-            "68": 746194432.0,
-            "69": 746194432.0,
-            "70": 746194432.0,
-            "71": 746194432.0,
-            "72": 746194432.0,
-            "73": 746194432.0,
-            "74": 746194432.0,
-            "75": 746194432.0,
-            "76": 746194432.0,
-            "77": 746194432.0,
-            "78": 746194432.0,
-            "79": 746194432.0,
-            "80": 746194432.0,
-            "81": 746194432.0,
-            "82": 746194432.0,
-            "83": 746194432.0,
-            "84": 746194432.0,
-            "85": 746194432.0,
-            "86": 746194432.0,
-            "87": 746194432.0,
-            "88": 746194432.0,
-            "89": 746194432.0,
-            "90": 746194432.0,
-            "91": 746194432.0,
-            "92": 746194432.0,
-            "93": 746194432.0,
-            "94": 746194432.0,
-            "95": 746194432.0,
-            "96": 746194432.0,
-            "97": 746194432.0,
-            "98": 746194432.0,
-            "99": 746194432.0,
-            "100": 746194432.0
+            "1": 747244032.0,
+            "2": 747244032.0,
+            "3": 747244032.0,
+            "4": 747244032.0,
+            "5": 747244032.0,
+            "6": 747244032.0,
+            "7": 747244032.0,
+            "8": 747244032.0,
+            "9": 747244032.0,
+            "10": 747244032.0,
+            "11": 747244032.0,
+            "12": 747244032.0,
+            "13": 747244032.0,
+            "14": 747244032.0,
+            "15": 747244032.0,
+            "16": 747244032.0,
+            "17": 747244032.0,
+            "18": 747244032.0,
+            "19": 747244032.0,
+            "20": 747244032.0,
+            "21": 747244032.0,
+            "22": 747244032.0,
+            "23": 747244032.0,
+            "24": 747244032.0,
+            "25": 747244032.0,
+            "26": 747244032.0,
+            "27": 747244032.0,
+            "28": 747244032.0,
+            "29": 747244032.0,
+            "30": 747244032.0,
+            "31": 747244032.0,
+            "32": 747244032.0,
+            "33": 747244032.0,
+            "34": 747244032.0,
+            "35": 747244032.0,
+            "36": 747244032.0,
+            "37": 747244032.0,
+            "38": 747244032.0,
+            "39": 747244032.0,
+            "40": 747244032.0,
+            "41": 747244032.0,
+            "42": 747244032.0,
+            "43": 747244032.0,
+            "44": 747244032.0,
+            "45": 747244032.0,
+            "46": 747244032.0,
+            "47": 747244032.0,
+            "48": 747244032.0,
+            "49": 747244032.0,
+            "50": 747244032.0,
+            "51": 747244032.0,
+            "52": 747244032.0,
+            "53": 747244032.0,
+            "54": 747244032.0,
+            "55": 747244032.0,
+            "56": 747244032.0,
+            "57": 747244032.0,
+            "58": 747244032.0,
+            "59": 747244032.0,
+            "60": 747244032.0,
+            "61": 747244032.0,
+            "62": 747244032.0,
+            "63": 747244032.0,
+            "64": 747244032.0,
+            "65": 747244032.0,
+            "66": 747244032.0,
+            "67": 747244032.0,
+            "68": 747244032.0,
+            "69": 747244032.0,
+            "70": 747244032.0,
+            "71": 747244032.0,
+            "72": 747244032.0,
+            "73": 747244032.0,
+            "74": 747244032.0,
+            "75": 747244032.0,
+            "76": 747244032.0,
+            "77": 747244032.0,
+            "78": 747244032.0,
+            "79": 747244032.0,
+            "80": 747244032.0,
+            "81": 747244032.0,
+            "82": 747244032.0,
+            "83": 747244032.0,
+            "84": 747244032.0,
+            "85": 747244032.0,
+            "86": 747244032.0,
+            "87": 747244032.0,
+            "88": 747244032.0,
+            "89": 747244032.0,
+            "90": 747244032.0,
+            "91": 747244032.0,
+            "92": 747244032.0,
+            "93": 747244032.0,
+            "94": 747244032.0,
+            "95": 747244032.0,
+            "96": 747244032.0,
+            "97": 747244032.0,
+            "98": 747244032.0,
+            "99": 747244032.0,
+            "100": 747244032.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -325,106 +325,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 1926153216.0,
-            "2": 2209851392.0,
-            "3": 2209851392.0,
-            "4": 2209851392.0,
-            "5": 2209851392.0,
-            "6": 2209851392.0,
-            "7": 2209851392.0,
-            "8": 2209851392.0,
-            "9": 2209851392.0,
-            "10": 2209851392.0,
-            "11": 2209851392.0,
-            "12": 2209851392.0,
-            "13": 2209851392.0,
-            "14": 2209851392.0,
-            "15": 2209851392.0,
-            "16": 2209851392.0,
-            "17": 2209851392.0,
-            "18": 2209851392.0,
-            "19": 2209851392.0,
-            "20": 2209851392.0,
-            "21": 2209851392.0,
-            "22": 2209851392.0,
-            "23": 2209851392.0,
-            "24": 2209851392.0,
-            "25": 2209851392.0,
-            "26": 2209851392.0,
-            "27": 2209851392.0,
-            "28": 2209851392.0,
-            "29": 2209851392.0,
-            "30": 2209851392.0,
-            "31": 2209851392.0,
-            "32": 2209851392.0,
-            "33": 2209851392.0,
-            "34": 2209851392.0,
-            "35": 2209851392.0,
-            "36": 2209851392.0,
-            "37": 2209851392.0,
-            "38": 2209851392.0,
-            "39": 2209851392.0,
-            "40": 2209851392.0,
-            "41": 2209851392.0,
-            "42": 2209851392.0,
-            "43": 2209851392.0,
-            "44": 2209851392.0,
-            "45": 2209851392.0,
-            "46": 2209851392.0,
-            "47": 2209851392.0,
-            "48": 2209851392.0,
-            "49": 2209851392.0,
-            "50": 2209851392.0,
-            "51": 2209851392.0,
-            "52": 2209851392.0,
-            "53": 2209851392.0,
-            "54": 2209851392.0,
-            "55": 2209851392.0,
-            "56": 2209851392.0,
-            "57": 2209851392.0,
-            "58": 2209851392.0,
-            "59": 2209851392.0,
-            "60": 2209851392.0,
-            "61": 2209851392.0,
-            "62": 2209851392.0,
-            "63": 2209851392.0,
-            "64": 2209851392.0,
-            "65": 2209851392.0,
-            "66": 2209851392.0,
-            "67": 2209851392.0,
-            "68": 2209851392.0,
-            "69": 2209851392.0,
-            "70": 2209851392.0,
-            "71": 2209851392.0,
-            "72": 2209851392.0,
-            "73": 2209851392.0,
-            "74": 2209851392.0,
-            "75": 2209851392.0,
-            "76": 2209851392.0,
-            "77": 2209851392.0,
-            "78": 2209851392.0,
-            "79": 2209851392.0,
-            "80": 2209851392.0,
-            "81": 2209851392.0,
-            "82": 2209851392.0,
-            "83": 2209851392.0,
-            "84": 2209851392.0,
-            "85": 2209851392.0,
-            "86": 2209851392.0,
-            "87": 2209851392.0,
-            "88": 2209851392.0,
-            "89": 2209851392.0,
-            "90": 2209851392.0,
-            "91": 2209851392.0,
-            "92": 2209851392.0,
-            "93": 2209851392.0,
-            "94": 2209851392.0,
-            "95": 2209851392.0,
-            "96": 2209851392.0,
-            "97": 2209851392.0,
-            "98": 2209851392.0,
-            "99": 2209851392.0,
-            "100": 2209851392.0
+            "1": 1927202816.0,
+            "2": 2211948544.0,
+            "3": 2211948544.0,
+            "4": 2211948544.0,
+            "5": 2211948544.0,
+            "6": 2211948544.0,
+            "7": 2211948544.0,
+            "8": 2211948544.0,
+            "9": 2211948544.0,
+            "10": 2211948544.0,
+            "11": 2211948544.0,
+            "12": 2211948544.0,
+            "13": 2211948544.0,
+            "14": 2211948544.0,
+            "15": 2211948544.0,
+            "16": 2211948544.0,
+            "17": 2211948544.0,
+            "18": 2211948544.0,
+            "19": 2211948544.0,
+            "20": 2211948544.0,
+            "21": 2211948544.0,
+            "22": 2211948544.0,
+            "23": 2211948544.0,
+            "24": 2211948544.0,
+            "25": 2211948544.0,
+            "26": 2211948544.0,
+            "27": 2211948544.0,
+            "28": 2211948544.0,
+            "29": 2211948544.0,
+            "30": 2211948544.0,
+            "31": 2211948544.0,
+            "32": 2211948544.0,
+            "33": 2211948544.0,
+            "34": 2211948544.0,
+            "35": 2211948544.0,
+            "36": 2211948544.0,
+            "37": 2211948544.0,
+            "38": 2211948544.0,
+            "39": 2211948544.0,
+            "40": 2211948544.0,
+            "41": 2211948544.0,
+            "42": 2211948544.0,
+            "43": 2211948544.0,
+            "44": 2211948544.0,
+            "45": 2211948544.0,
+            "46": 2211948544.0,
+            "47": 2211948544.0,
+            "48": 2211948544.0,
+            "49": 2211948544.0,
+            "50": 2211948544.0,
+            "51": 2211948544.0,
+            "52": 2211948544.0,
+            "53": 2211948544.0,
+            "54": 2211948544.0,
+            "55": 2211948544.0,
+            "56": 2211948544.0,
+            "57": 2211948544.0,
+            "58": 2211948544.0,
+            "59": 2211948544.0,
+            "60": 2211948544.0,
+            "61": 2211948544.0,
+            "62": 2211948544.0,
+            "63": 2211948544.0,
+            "64": 2211948544.0,
+            "65": 2211948544.0,
+            "66": 2211948544.0,
+            "67": 2211948544.0,
+            "68": 2211948544.0,
+            "69": 2211948544.0,
+            "70": 2211948544.0,
+            "71": 2211948544.0,
+            "72": 2211948544.0,
+            "73": 2211948544.0,
+            "74": 2211948544.0,
+            "75": 2211948544.0,
+            "76": 2211948544.0,
+            "77": 2211948544.0,
+            "78": 2211948544.0,
+            "79": 2211948544.0,
+            "80": 2211948544.0,
+            "81": 2211948544.0,
+            "82": 2211948544.0,
+            "83": 2211948544.0,
+            "84": 2211948544.0,
+            "85": 2211948544.0,
+            "86": 2211948544.0,
+            "87": 2211948544.0,
+            "88": 2211948544.0,
+            "89": 2211948544.0,
+            "90": 2211948544.0,
+            "91": 2211948544.0,
+            "92": 2211948544.0,
+            "93": 2211948544.0,
+            "94": 2211948544.0,
+            "95": 2211948544.0,
+            "96": 2211948544.0,
+            "97": 2211948544.0,
+            "98": 2211948544.0,
+            "99": 2211948544.0,
+            "100": 2211948544.0
         }
     },
     "iteration-time": {
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 12.88983,
-            "2": 0.12288,
-            "3": 0.10944,
-            "4": 0.10822,
-            "5": 0.10919,
-            "6": 0.10835,
-            "7": 0.11035,
-            "8": 0.10879,
-            "9": 0.11001,
-            "10": 0.11009,
-            "11": 0.10945,
-            "12": 0.10868,
-            "13": 0.1086,
-            "14": 0.10899,
-            "15": 0.10852,
-            "16": 0.10822,
-            "17": 0.10818,
-            "18": 0.10877,
-            "19": 0.10888,
-            "20": 0.10828,
-            "21": 0.109,
-            "22": 0.108,
-            "23": 0.10722,
-            "24": 0.10731,
-            "25": 0.1075,
-            "26": 0.10744,
-            "27": 0.10843,
-            "28": 0.10831,
-            "29": 0.10841,
-            "30": 0.10718,
-            "31": 0.10837,
-            "32": 0.10773,
-            "33": 0.10792,
-            "34": 0.10698,
-            "35": 0.10976,
-            "36": 0.10758,
-            "37": 0.10825,
-            "38": 0.10781,
-            "39": 0.10912,
-            "40": 0.10847,
-            "41": 0.10786,
-            "42": 0.10767,
-            "43": 0.10761,
-            "44": 0.1076,
-            "45": 0.1078,
-            "46": 0.10992,
-            "47": 0.1061,
-            "48": 0.10654,
-            "49": 0.10566,
-            "50": 0.1066,
-            "51": 0.11234,
-            "52": 0.11065,
-            "53": 0.10795,
-            "54": 0.10668,
-            "55": 0.10678,
-            "56": 0.10889,
-            "57": 0.10802,
-            "58": 0.12482,
-            "59": 0.10666,
-            "60": 0.10637,
-            "61": 0.10776,
-            "62": 0.10743,
-            "63": 0.10782,
-            "64": 0.10634,
-            "65": 0.10744,
-            "66": 0.10859,
-            "67": 0.10949,
-            "68": 0.1075,
-            "69": 0.10803,
-            "70": 0.10688,
-            "71": 0.10797,
-            "72": 0.10752,
-            "73": 0.10816,
-            "74": 0.10734,
-            "75": 0.10832,
-            "76": 0.10815,
-            "77": 0.10868,
-            "78": 0.10839,
-            "79": 0.1074,
-            "80": 0.10866,
-            "81": 0.11122,
-            "82": 0.11035,
-            "83": 0.1101,
-            "84": 0.1122,
-            "85": 0.10866,
-            "86": 0.10915,
-            "87": 0.10842,
-            "88": 0.10723,
-            "89": 0.10849,
-            "90": 0.10814,
-            "91": 0.10833,
-            "92": 0.10719,
-            "93": 0.10725,
-            "94": 0.10754,
-            "95": 0.10758,
-            "96": 0.1082,
-            "97": 0.10768,
-            "98": 0.10708,
-            "99": 0.10785,
-            "100": 0.10841
+            "1": 8.07511,
+            "2": 0.14681,
+            "3": 0.10596,
+            "4": 0.08711,
+            "5": 0.0876,
+            "6": 0.08568,
+            "7": 0.08664,
+            "8": 0.08587,
+            "9": 0.08577,
+            "10": 0.08621,
+            "11": 0.08632,
+            "12": 0.08547,
+            "13": 0.08657,
+            "14": 0.086,
+            "15": 0.08713,
+            "16": 0.08626,
+            "17": 0.0867,
+            "18": 0.08636,
+            "19": 0.08698,
+            "20": 0.08625,
+            "21": 0.08785,
+            "22": 0.08871,
+            "23": 0.08659,
+            "24": 0.08847,
+            "25": 0.09629,
+            "26": 0.09476,
+            "27": 0.08553,
+            "28": 0.08477,
+            "29": 0.08431,
+            "30": 0.08434,
+            "31": 0.08557,
+            "32": 0.08544,
+            "33": 0.08488,
+            "34": 0.08582,
+            "35": 0.08395,
+            "36": 0.08398,
+            "37": 0.08559,
+            "38": 0.08441,
+            "39": 0.08418,
+            "40": 0.08528,
+            "41": 0.0861,
+            "42": 0.08685,
+            "43": 0.08626,
+            "44": 0.08751,
+            "45": 0.08791,
+            "46": 0.087,
+            "47": 0.08684,
+            "48": 0.08803,
+            "49": 0.08859,
+            "50": 0.09019,
+            "51": 0.10254,
+            "52": 0.09302,
+            "53": 0.10544,
+            "54": 0.08758,
+            "55": 0.0856,
+            "56": 0.08575,
+            "57": 0.08685,
+            "58": 0.08631,
+            "59": 0.08389,
+            "60": 0.08441,
+            "61": 0.08423,
+            "62": 0.08509,
+            "63": 0.08726,
+            "64": 0.08594,
+            "65": 0.08568,
+            "66": 0.08392,
+            "67": 0.08415,
+            "68": 0.0849,
+            "69": 0.08418,
+            "70": 0.08396,
+            "71": 0.08448,
+            "72": 0.08498,
+            "73": 0.08408,
+            "74": 0.08475,
+            "75": 0.08328,
+            "76": 0.08384,
+            "77": 0.08424,
+            "78": 0.08463,
+            "79": 0.0841,
+            "80": 0.08431,
+            "81": 0.08441,
+            "82": 0.0848,
+            "83": 0.08442,
+            "84": 0.08437,
+            "85": 0.08486,
+            "86": 0.08464,
+            "87": 0.0837,
+            "88": 0.0844,
+            "89": 0.08503,
+            "90": 0.08351,
+            "91": 0.0839,
+            "92": 0.08423,
+            "93": 0.08472,
+            "94": 0.08463,
+            "95": 0.08455,
+            "96": 0.08373,
+            "97": 0.08396,
+            "98": 0.08358,
+            "99": 0.08466,
+            "100": 0.08402
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_dev_dgx_h100_2nd.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_dev_dgx_h100_2nd.json
new file mode 100644
index 00000000000..b60cbfef0c0
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_dev_dgx_h100_2nd.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 9.85075,
+            "52": 9.73181,
+            "53": 10.06388,
+            "54": 9.95432,
+            "55": 9.87204,
+            "56": 9.61823,
+            "57": 9.47467,
+            "58": 9.82802,
+            "59": 9.57962,
+            "60": 9.49074,
+            "61": 9.68473,
+            "62": 9.99245,
+            "63": 9.38364,
+            "64": 9.77766,
+            "65": 8.94008,
+            "66": 9.70099,
+            "67": 9.3605,
+            "68": 9.77766,
+            "69": 9.78865,
+            "70": 9.73813,
+            "71": 9.61811,
+            "72": 9.58068,
+            "73": 9.4964,
+            "74": 8.93812,
+            "75": 9.42081,
+            "76": 9.07416,
+            "77": 10.06077,
+            "78": 9.71952,
+            "79": 9.37088,
+            "80": 9.39874,
+            "81": 9.47802,
+            "82": 9.69299,
+            "83": 9.30276,
+            "84": 9.41548,
+            "85": 9.60883,
+            "86": 9.07461,
+            "87": 9.58826,
+            "88": 9.74392,
+            "89": 9.5951,
+            "90": 9.81217,
+            "91": 9.33796,
+            "92": 9.3534,
+            "93": 9.07315,
+            "94": 8.83127,
+            "95": 9.51524,
+            "96": 9.52183,
+            "97": 9.31012,
+            "98": 9.66532,
+            "99": 8.88179,
+            "100": 9.39375
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 2509.0,
+            "52": 2528.0,
+            "53": 2851.0,
+            "54": 2747.0,
+            "55": 2333.0,
+            "56": 2724.0,
+            "57": 2315.0,
+            "58": 2754.0,
+            "59": 2774.0,
+            "60": 2336.0,
+            "61": 2912.0,
+            "62": 2415.0,
+            "63": 2341.0,
+            "64": 2837.0,
+            "65": 2661.0,
+            "66": 3000.0,
+            "67": 2779.0,
+            "68": 2691.0,
+            "69": 2793.0,
+            "70": 3183.0,
+            "71": 2962.0,
+            "72": 2393.0,
+            "73": 2997.0,
+            "74": 1935.0,
+            "75": 2463.0,
+            "76": 3065.0,
+            "77": 3184.0,
+            "78": 3154.0,
+            "79": 3127.0,
+            "80": 3286.0,
+            "81": 3386.0,
+            "82": 3128.0,
+            "83": 2608.0,
+            "84": 3079.0,
+            "85": 3260.0,
+            "86": 2687.0,
+            "87": 3591.0,
+            "88": 3035.0,
+            "89": 3165.0,
+            "90": 3166.0,
+            "91": 2690.0,
+            "92": 2897.0,
+            "93": 2630.0,
+            "94": 3348.0,
+            "95": 3349.0,
+            "96": 3288.0,
+            "97": 3055.0,
+            "98": 3516.0,
+            "99": 3035.0,
+            "100": 3109.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 746195456.0,
+            "52": 746195456.0,
+            "53": 746195456.0,
+            "54": 746195456.0,
+            "55": 746195456.0,
+            "56": 746195456.0,
+            "57": 746195456.0,
+            "58": 746195456.0,
+            "59": 746195456.0,
+            "60": 746195456.0,
+            "61": 746195456.0,
+            "62": 746195456.0,
+            "63": 746195456.0,
+            "64": 746195456.0,
+            "65": 746195456.0,
+            "66": 746195456.0,
+            "67": 746195456.0,
+            "68": 746195456.0,
+            "69": 746195456.0,
+            "70": 746195456.0,
+            "71": 746195456.0,
+            "72": 746195456.0,
+            "73": 746195456.0,
+            "74": 746195456.0,
+            "75": 746195456.0,
+            "76": 746195456.0,
+            "77": 746195456.0,
+            "78": 746195456.0,
+            "79": 746195456.0,
+            "80": 746195456.0,
+            "81": 746195456.0,
+            "82": 746195456.0,
+            "83": 746195456.0,
+            "84": 746195456.0,
+            "85": 746195456.0,
+            "86": 746195456.0,
+            "87": 746195456.0,
+            "88": 746195456.0,
+            "89": 746195456.0,
+            "90": 746195456.0,
+            "91": 746195456.0,
+            "92": 746195456.0,
+            "93": 746195456.0,
+            "94": 746195456.0,
+            "95": 746195456.0,
+            "96": 746195456.0,
+            "97": 746195456.0,
+            "98": 746195456.0,
+            "99": 746195456.0,
+            "100": 746195456.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 2210899968.0,
+            "52": 2210900992.0,
+            "53": 2210900992.0,
+            "54": 2210900992.0,
+            "55": 2210900992.0,
+            "56": 2210900992.0,
+            "57": 2210900992.0,
+            "58": 2210900992.0,
+            "59": 2210900992.0,
+            "60": 2210900992.0,
+            "61": 2210900992.0,
+            "62": 2210900992.0,
+            "63": 2210900992.0,
+            "64": 2210900992.0,
+            "65": 2210900992.0,
+            "66": 2210900992.0,
+            "67": 2210900992.0,
+            "68": 2210900992.0,
+            "69": 2210900992.0,
+            "70": 2210900992.0,
+            "71": 2210900992.0,
+            "72": 2210900992.0,
+            "73": 2210900992.0,
+            "74": 2210900992.0,
+            "75": 2210900992.0,
+            "76": 2210900992.0,
+            "77": 2210900992.0,
+            "78": 2210900992.0,
+            "79": 2210900992.0,
+            "80": 2210900992.0,
+            "81": 2210900992.0,
+            "82": 2210900992.0,
+            "83": 2210900992.0,
+            "84": 2210900992.0,
+            "85": 2210900992.0,
+            "86": 2210900992.0,
+            "87": 2210900992.0,
+            "88": 2210900992.0,
+            "89": 2210900992.0,
+            "90": 2210900992.0,
+            "91": 2210900992.0,
+            "92": 2210900992.0,
+            "93": 2210900992.0,
+            "94": 2210900992.0,
+            "95": 2210900992.0,
+            "96": 2210900992.0,
+            "97": 2210900992.0,
+            "98": 2210900992.0,
+            "99": 2210900992.0,
+            "100": 2210900992.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 8.5499,
+            "52": 0.12372,
+            "53": 0.09645,
+            "54": 0.09114,
+            "55": 0.08966,
+            "56": 0.09034,
+            "57": 0.08956,
+            "58": 0.09056,
+            "59": 0.09042,
+            "60": 0.0897,
+            "61": 0.09016,
+            "62": 0.09046,
+            "63": 0.08857,
+            "64": 0.08779,
+            "65": 0.08907,
+            "66": 0.08837,
+            "67": 0.08806,
+            "68": 0.08776,
+            "69": 0.08756,
+            "70": 0.08787,
+            "71": 0.08828,
+            "72": 0.08894,
+            "73": 0.08812,
+            "74": 0.08757,
+            "75": 0.08963,
+            "76": 0.09209,
+            "77": 0.0916,
+            "78": 0.09224,
+            "79": 0.09091,
+            "80": 0.08695,
+            "81": 0.0874,
+            "82": 0.08839,
+            "83": 0.08746,
+            "84": 0.09295,
+            "85": 0.09,
+            "86": 0.09021,
+            "87": 0.09075,
+            "88": 0.08904,
+            "89": 0.08839,
+            "90": 0.08875,
+            "91": 0.08852,
+            "92": 0.08796,
+            "93": 0.08905,
+            "94": 0.08832,
+            "95": 0.08897,
+            "96": 0.08836,
+            "97": 0.08869,
+            "98": 0.08858,
+            "99": 0.08878,
+            "100": 0.08832
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_lts_dgx_a100.json
index 1246b8727ef..6e5f31a169a 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_lts_dgx_a100.json
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 6.43574,
-            "2": 0.18308,
-            "3": 0.16294,
-            "4": 0.15632,
-            "5": 0.15517,
-            "6": 0.15061,
-            "7": 0.15109,
-            "8": 0.1538,
-            "9": 0.15077,
-            "10": 0.15142,
-            "11": 0.15024,
-            "12": 0.15039,
-            "13": 0.14987,
-            "14": 0.15044,
-            "15": 0.1495,
-            "16": 0.15003,
-            "17": 0.14988,
-            "18": 0.1497,
-            "19": 0.15459,
-            "20": 0.15076,
-            "21": 0.1498,
-            "22": 0.15044,
-            "23": 0.15051,
-            "24": 0.15062,
-            "25": 0.14953,
-            "26": 0.15047,
-            "27": 0.14851,
-            "28": 0.14802,
-            "29": 0.14861,
-            "30": 0.1485,
-            "31": 0.1498,
-            "32": 0.14871,
-            "33": 0.1485,
-            "34": 0.14707,
-            "35": 0.14796,
-            "36": 0.14719,
-            "37": 0.15012,
-            "38": 0.14804,
-            "39": 0.1487,
-            "40": 0.14779,
-            "41": 0.14844,
-            "42": 0.1496,
-            "43": 0.15014,
-            "44": 0.14977,
-            "45": 0.1478,
-            "46": 0.14891,
-            "47": 0.14844,
-            "48": 0.1488,
-            "49": 0.14931,
-            "50": 0.14761,
-            "51": 0.15888,
-            "52": 0.1517,
-            "53": 0.14904,
-            "54": 0.17961,
-            "55": 0.14804,
-            "56": 0.1496,
-            "57": 0.1487,
-            "58": 0.14801,
-            "59": 0.14729,
-            "60": 0.14749,
-            "61": 0.14745,
-            "62": 0.1471,
-            "63": 0.14817,
-            "64": 0.1497,
-            "65": 0.14753,
-            "66": 0.14753,
-            "67": 0.14859,
-            "68": 0.14714,
-            "69": 0.14776,
-            "70": 0.14847,
-            "71": 0.14829,
-            "72": 0.14858,
-            "73": 0.14828,
-            "74": 0.14783,
-            "75": 0.14793,
-            "76": 0.14768,
-            "77": 0.14752,
-            "78": 0.14931,
-            "79": 0.15045,
-            "80": 0.14813,
-            "81": 0.1489,
-            "82": 0.1475,
-            "83": 0.14844,
-            "84": 0.1489,
-            "85": 0.14809,
-            "86": 0.14835,
-            "87": 0.14718,
-            "88": 0.14876,
-            "89": 0.14859,
-            "90": 0.1479,
-            "91": 0.14803,
-            "92": 0.14798,
-            "93": 0.14876,
-            "94": 0.14705,
-            "95": 0.14837,
-            "96": 0.14805,
-            "97": 0.14837,
-            "98": 0.14721,
-            "99": 0.14843,
-            "100": 0.14828
+            "1": 3.59409,
+            "2": 0.17465,
+            "3": 0.16266,
+            "4": 0.1495,
+            "5": 0.14527,
+            "6": 0.14428,
+            "7": 0.14381,
+            "8": 0.14313,
+            "9": 0.14427,
+            "10": 0.14389,
+            "11": 0.1443,
+            "12": 0.14275,
+            "13": 0.1429,
+            "14": 0.14279,
+            "15": 0.14378,
+            "16": 0.14358,
+            "17": 0.14299,
+            "18": 0.14217,
+            "19": 0.14256,
+            "20": 0.14345,
+            "21": 0.14367,
+            "22": 0.14305,
+            "23": 0.14257,
+            "24": 0.14186,
+            "25": 0.1423,
+            "26": 0.14156,
+            "27": 0.14279,
+            "28": 0.14152,
+            "29": 0.14248,
+            "30": 0.14222,
+            "31": 0.14276,
+            "32": 0.14268,
+            "33": 0.14313,
+            "34": 0.14133,
+            "35": 0.14312,
+            "36": 0.14147,
+            "37": 0.14217,
+            "38": 0.14071,
+            "39": 0.14226,
+            "40": 0.14163,
+            "41": 0.14393,
+            "42": 0.14189,
+            "43": 0.14266,
+            "44": 0.14185,
+            "45": 0.1438,
+            "46": 0.14173,
+            "47": 0.14272,
+            "48": 0.14379,
+            "49": 0.14245,
+            "50": 0.1422,
+            "51": 0.1491,
+            "52": 0.16902,
+            "53": 0.14276,
+            "54": 0.14121,
+            "55": 0.14203,
+            "56": 0.14111,
+            "57": 0.14215,
+            "58": 0.14121,
+            "59": 0.14274,
+            "60": 0.14079,
+            "61": 0.14212,
+            "62": 0.14078,
+            "63": 0.14277,
+            "64": 0.14264,
+            "65": 0.14256,
+            "66": 0.14207,
+            "67": 0.14426,
+            "68": 0.14138,
+            "69": 0.14293,
+            "70": 0.1423,
+            "71": 0.14265,
+            "72": 0.14181,
+            "73": 0.14253,
+            "74": 0.14239,
+            "75": 0.1436,
+            "76": 0.14184,
+            "77": 0.14185,
+            "78": 0.14261,
+            "79": 0.14322,
+            "80": 0.14295,
+            "81": 0.14304,
+            "82": 0.14307,
+            "83": 0.14253,
+            "84": 0.14179,
+            "85": 0.14257,
+            "86": 0.14198,
+            "87": 0.15027,
+            "88": 0.14143,
+            "89": 0.14408,
+            "90": 0.14207,
+            "91": 0.14351,
+            "92": 0.14216,
+            "93": 0.14223,
+            "94": 0.14137,
+            "95": 0.14285,
+            "96": 0.14202,
+            "97": 0.14246,
+            "98": 0.1411,
+            "99": 0.14199,
+            "100": 0.14181
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_lts_dgx_a100_2nd.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_lts_dgx_a100_2nd.json
new file mode 100644
index 00000000000..34c3b02116b
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_lts_dgx_a100_2nd.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 9.84055,
+            "52": 9.73438,
+            "53": 10.05482,
+            "54": 9.94058,
+            "55": 9.87124,
+            "56": 9.61045,
+            "57": 9.46116,
+            "58": 9.81654,
+            "59": 9.57887,
+            "60": 9.48507,
+            "61": 9.68515,
+            "62": 9.97438,
+            "63": 9.36298,
+            "64": 9.76793,
+            "65": 8.93913,
+            "66": 9.68918,
+            "67": 9.36638,
+            "68": 9.77507,
+            "69": 9.78344,
+            "70": 9.72196,
+            "71": 9.60806,
+            "72": 9.57714,
+            "73": 9.48934,
+            "74": 8.94008,
+            "75": 9.40867,
+            "76": 9.08075,
+            "77": 10.05717,
+            "78": 9.72281,
+            "79": 9.36465,
+            "80": 9.39746,
+            "81": 9.47553,
+            "82": 9.6886,
+            "83": 9.30263,
+            "84": 9.41008,
+            "85": 9.60793,
+            "86": 9.07115,
+            "87": 9.58676,
+            "88": 9.74129,
+            "89": 9.5986,
+            "90": 9.81041,
+            "91": 9.33113,
+            "92": 9.35502,
+            "93": 9.07481,
+            "94": 8.82745,
+            "95": 9.51149,
+            "96": 9.51876,
+            "97": 9.30173,
+            "98": 9.66726,
+            "99": 8.88087,
+            "100": 9.39727
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 2574.0,
+            "52": 2457.0,
+            "53": 2905.0,
+            "54": 2609.0,
+            "55": 2220.0,
+            "56": 2663.0,
+            "57": 2258.0,
+            "58": 2898.0,
+            "59": 2676.0,
+            "60": 2397.0,
+            "61": 3048.0,
+            "62": 2533.0,
+            "63": 2370.0,
+            "64": 2975.0,
+            "65": 2591.0,
+            "66": 3065.0,
+            "67": 2732.0,
+            "68": 2870.0,
+            "69": 2955.0,
+            "70": 3112.0,
+            "71": 2989.0,
+            "72": 2451.0,
+            "73": 2881.0,
+            "74": 1859.0,
+            "75": 2649.0,
+            "76": 3026.0,
+            "77": 3316.0,
+            "78": 3212.0,
+            "79": 3183.0,
+            "80": 3262.0,
+            "81": 3669.0,
+            "82": 3187.0,
+            "83": 2798.0,
+            "84": 3209.0,
+            "85": 3309.0,
+            "86": 2738.0,
+            "87": 3804.0,
+            "88": 2989.0,
+            "89": 3327.0,
+            "90": 3031.0,
+            "91": 2720.0,
+            "92": 2972.0,
+            "93": 2719.0,
+            "94": 3387.0,
+            "95": 3321.0,
+            "96": 3342.0,
+            "97": 3191.0,
+            "98": 3533.0,
+            "99": 3214.0,
+            "100": 3318.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 716834304.0,
+            "52": 716834304.0,
+            "53": 716834304.0,
+            "54": 716834304.0,
+            "55": 716834304.0,
+            "56": 716834304.0,
+            "57": 716834304.0,
+            "58": 716834304.0,
+            "59": 716834304.0,
+            "60": 716834304.0,
+            "61": 716834304.0,
+            "62": 716834304.0,
+            "63": 716834304.0,
+            "64": 716834304.0,
+            "65": 716834304.0,
+            "66": 716834304.0,
+            "67": 716834304.0,
+            "68": 716834304.0,
+            "69": 716834304.0,
+            "70": 716834304.0,
+            "71": 716834304.0,
+            "72": 716834304.0,
+            "73": 716834304.0,
+            "74": 716834304.0,
+            "75": 716834304.0,
+            "76": 716834304.0,
+            "77": 716834304.0,
+            "78": 716834304.0,
+            "79": 716834304.0,
+            "80": 716834304.0,
+            "81": 716834304.0,
+            "82": 716834304.0,
+            "83": 716834304.0,
+            "84": 716834304.0,
+            "85": 716834304.0,
+            "86": 716834304.0,
+            "87": 716834304.0,
+            "88": 716834304.0,
+            "89": 716834304.0,
+            "90": 716834304.0,
+            "91": 716834304.0,
+            "92": 716834304.0,
+            "93": 716834304.0,
+            "94": 716834304.0,
+            "95": 716834304.0,
+            "96": 716834304.0,
+            "97": 716834304.0,
+            "98": 716834304.0,
+            "99": 716834304.0,
+            "100": 716834304.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 2194121728.0,
+            "52": 2194122752.0,
+            "53": 2194122752.0,
+            "54": 2194122752.0,
+            "55": 2194122752.0,
+            "56": 2194122752.0,
+            "57": 2194122752.0,
+            "58": 2194122752.0,
+            "59": 2194122752.0,
+            "60": 2194122752.0,
+            "61": 2194122752.0,
+            "62": 2194122752.0,
+            "63": 2194122752.0,
+            "64": 2194122752.0,
+            "65": 2194122752.0,
+            "66": 2194122752.0,
+            "67": 2194122752.0,
+            "68": 2194122752.0,
+            "69": 2194122752.0,
+            "70": 2194122752.0,
+            "71": 2194122752.0,
+            "72": 2194122752.0,
+            "73": 2194122752.0,
+            "74": 2194122752.0,
+            "75": 2194122752.0,
+            "76": 2194122752.0,
+            "77": 2194122752.0,
+            "78": 2194122752.0,
+            "79": 2194122752.0,
+            "80": 2194122752.0,
+            "81": 2194122752.0,
+            "82": 2194122752.0,
+            "83": 2194122752.0,
+            "84": 2194122752.0,
+            "85": 2194122752.0,
+            "86": 2194122752.0,
+            "87": 2194122752.0,
+            "88": 2194122752.0,
+            "89": 2194122752.0,
+            "90": 2194122752.0,
+            "91": 2194122752.0,
+            "92": 2194122752.0,
+            "93": 2194122752.0,
+            "94": 2194122752.0,
+            "95": 2194122752.0,
+            "96": 2194122752.0,
+            "97": 2194122752.0,
+            "98": 2194122752.0,
+            "99": 2194122752.0,
+            "100": 2194122752.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 8.8238,
+            "52": 0.63078,
+            "53": 0.15101,
+            "54": 0.14953,
+            "55": 0.15024,
+            "56": 0.14932,
+            "57": 0.15011,
+            "58": 0.15001,
+            "59": 0.15206,
+            "60": 0.14938,
+            "61": 0.1487,
+            "62": 0.14818,
+            "63": 0.14803,
+            "64": 0.15056,
+            "65": 0.14975,
+            "66": 0.14796,
+            "67": 0.14853,
+            "68": 0.14679,
+            "69": 0.14809,
+            "70": 0.14665,
+            "71": 0.14693,
+            "72": 0.1481,
+            "73": 0.14536,
+            "74": 0.14342,
+            "75": 0.14313,
+            "76": 0.14287,
+            "77": 0.14085,
+            "78": 0.14168,
+            "79": 0.14286,
+            "80": 0.14201,
+            "81": 0.14225,
+            "82": 0.14262,
+            "83": 0.14349,
+            "84": 0.14179,
+            "85": 0.14222,
+            "86": 0.14195,
+            "87": 0.14171,
+            "88": 0.14105,
+            "89": 0.14252,
+            "90": 0.14411,
+            "91": 0.1446,
+            "92": 0.14295,
+            "93": 0.14308,
+            "94": 0.14176,
+            "95": 0.14267,
+            "96": 0.14302,
+            "97": 0.14305,
+            "98": 0.14273,
+            "99": 0.14183,
+            "100": 0.14202
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu/golden_values_dev_dgx_gb200.json
new file mode 100644
index 00000000000..6d18d551f69
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu/golden_values_dev_dgx_gb200.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.74992,
+            "2": 10.77613,
+            "3": 10.75714,
+            "4": 10.72305,
+            "5": 10.80036,
+            "6": 10.821,
+            "7": 10.77176,
+            "8": 10.7988,
+            "9": 10.77447,
+            "10": 10.70645,
+            "11": 10.8328,
+            "12": 10.81872,
+            "13": 10.83078,
+            "14": 10.83381,
+            "15": 10.76396,
+            "16": 10.76573,
+            "17": 10.71925,
+            "18": 10.76797,
+            "19": 10.75316,
+            "20": 10.70911,
+            "21": 10.69217,
+            "22": 10.56534,
+            "23": 10.70907,
+            "24": 10.6159,
+            "25": 10.55058,
+            "26": 10.62591,
+            "27": 10.64705,
+            "28": 10.63623,
+            "29": 10.65641,
+            "30": 10.43675,
+            "31": 10.21912,
+            "32": 10.5512,
+            "33": 10.53381,
+            "34": 10.31821,
+            "35": 10.36833,
+            "36": 10.3562,
+            "37": 10.46302,
+            "38": 10.33833,
+            "39": 10.50306,
+            "40": 10.23446,
+            "41": 10.27335,
+            "42": 10.3295,
+            "43": 9.97414,
+            "44": 10.1075,
+            "45": 9.98853,
+            "46": 9.95474,
+            "47": 10.2514,
+            "48": 10.01228,
+            "49": 9.70796,
+            "50": 10.05505,
+            "51": 9.9812,
+            "52": 9.89198,
+            "53": 10.19208,
+            "54": 10.09574,
+            "55": 10.00506,
+            "56": 9.78714,
+            "57": 9.64607,
+            "58": 9.9862,
+            "59": 9.72684,
+            "60": 9.67172,
+            "61": 9.80984,
+            "62": 10.11126,
+            "63": 9.54877,
+            "64": 9.90929,
+            "65": 9.08735,
+            "66": 9.84659,
+            "67": 9.48264,
+            "68": 9.89439,
+            "69": 9.87695,
+            "70": 9.82469,
+            "71": 9.72751,
+            "72": 9.72911,
+            "73": 9.62051,
+            "74": 9.11601,
+            "75": 9.55057,
+            "76": 9.21504,
+            "77": 10.14893,
+            "78": 9.8138,
+            "79": 9.47515,
+            "80": 9.51582,
+            "81": 9.58685,
+            "82": 9.79026,
+            "83": 9.45587,
+            "84": 9.50503,
+            "85": 9.71387,
+            "86": 9.17463,
+            "87": 9.66601,
+            "88": 9.84354,
+            "89": 9.70734,
+            "90": 9.8955,
+            "91": 9.48652,
+            "92": 9.47023,
+            "93": 9.21481,
+            "94": 8.94327,
+            "95": 9.6154,
+            "96": 9.63634,
+            "97": 9.37644,
+            "98": 9.74975,
+            "99": 9.01753,
+            "100": 9.50515
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2656.0,
+            "2": 2663.0,
+            "3": 2673.0,
+            "4": 2426.0,
+            "5": 2931.0,
+            "6": 3062.0,
+            "7": 2591.0,
+            "8": 2693.0,
+            "9": 2713.0,
+            "10": 2502.0,
+            "11": 2904.0,
+            "12": 2792.0,
+            "13": 2979.0,
+            "14": 3000.0,
+            "15": 2952.0,
+            "16": 2860.0,
+            "17": 2717.0,
+            "18": 2802.0,
+            "19": 2868.0,
+            "20": 2620.0,
+            "21": 2792.0,
+            "22": 2532.0,
+            "23": 2701.0,
+            "24": 2580.0,
+            "25": 2466.0,
+            "26": 2839.0,
+            "27": 2703.0,
+            "28": 2719.0,
+            "29": 2971.0,
+            "30": 2755.0,
+            "31": 2448.0,
+            "32": 2670.0,
+            "33": 2791.0,
+            "34": 2439.0,
+            "35": 2662.0,
+            "36": 2496.0,
+            "37": 2806.0,
+            "38": 2697.0,
+            "39": 2786.0,
+            "40": 2539.0,
+            "41": 2605.0,
+            "42": 2640.0,
+            "43": 2324.0,
+            "44": 2548.0,
+            "45": 2291.0,
+            "46": 2437.0,
+            "47": 2605.0,
+            "48": 2395.0,
+            "49": 2478.0,
+            "50": 2633.0,
+            "51": 2676.0,
+            "52": 2581.0,
+            "53": 2898.0,
+            "54": 2849.0,
+            "55": 2548.0,
+            "56": 2661.0,
+            "57": 2510.0,
+            "58": 2758.0,
+            "59": 2650.0,
+            "60": 2242.0,
+            "61": 2628.0,
+            "62": 2899.0,
+            "63": 2605.0,
+            "64": 2939.0,
+            "65": 2572.0,
+            "66": 2896.0,
+            "67": 2640.0,
+            "68": 2709.0,
+            "69": 2889.0,
+            "70": 3012.0,
+            "71": 2978.0,
+            "72": 2536.0,
+            "73": 2964.0,
+            "74": 2163.0,
+            "75": 2603.0,
+            "76": 2974.0,
+            "77": 3007.0,
+            "78": 3138.0,
+            "79": 3197.0,
+            "80": 2984.0,
+            "81": 3280.0,
+            "82": 3341.0,
+            "83": 2757.0,
+            "84": 3399.0,
+            "85": 3320.0,
+            "86": 2882.0,
+            "87": 3407.0,
+            "88": 3278.0,
+            "89": 3336.0,
+            "90": 3322.0,
+            "91": 2472.0,
+            "92": 3061.0,
+            "93": 2911.0,
+            "94": 3005.0,
+            "95": 2984.0,
+            "96": 2991.0,
+            "97": 3178.0,
+            "98": 3343.0,
+            "99": 2929.0,
+            "100": 2588.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 745732608.0,
+            "2": 745732608.0,
+            "3": 745732608.0,
+            "4": 745732608.0,
+            "5": 745732608.0,
+            "6": 745732608.0,
+            "7": 745732608.0,
+            "8": 745732608.0,
+            "9": 745732608.0,
+            "10": 745732608.0,
+            "11": 745732608.0,
+            "12": 745732608.0,
+            "13": 745732608.0,
+            "14": 745732608.0,
+            "15": 745732608.0,
+            "16": 745732608.0,
+            "17": 745732608.0,
+            "18": 745732608.0,
+            "19": 745732608.0,
+            "20": 745732608.0,
+            "21": 745732608.0,
+            "22": 745732608.0,
+            "23": 745732608.0,
+            "24": 745732608.0,
+            "25": 745732608.0,
+            "26": 745732608.0,
+            "27": 745732608.0,
+            "28": 745732608.0,
+            "29": 745732608.0,
+            "30": 745732608.0,
+            "31": 745732608.0,
+            "32": 745732608.0,
+            "33": 745732608.0,
+            "34": 745732608.0,
+            "35": 745732608.0,
+            "36": 745732608.0,
+            "37": 745732608.0,
+            "38": 745732608.0,
+            "39": 745732608.0,
+            "40": 745732608.0,
+            "41": 745732608.0,
+            "42": 745732608.0,
+            "43": 745732608.0,
+            "44": 745732608.0,
+            "45": 745732608.0,
+            "46": 745732608.0,
+            "47": 745732608.0,
+            "48": 745732608.0,
+            "49": 745732608.0,
+            "50": 745732608.0,
+            "51": 745732608.0,
+            "52": 745732608.0,
+            "53": 745732608.0,
+            "54": 745732608.0,
+            "55": 745732608.0,
+            "56": 745732608.0,
+            "57": 745732608.0,
+            "58": 745732608.0,
+            "59": 745732608.0,
+            "60": 745732608.0,
+            "61": 745732608.0,
+            "62": 745732608.0,
+            "63": 745732608.0,
+            "64": 745732608.0,
+            "65": 745732608.0,
+            "66": 745732608.0,
+            "67": 745732608.0,
+            "68": 745732608.0,
+            "69": 745732608.0,
+            "70": 745732608.0,
+            "71": 745732608.0,
+            "72": 745732608.0,
+            "73": 745732608.0,
+            "74": 745732608.0,
+            "75": 745732608.0,
+            "76": 745732608.0,
+            "77": 745732608.0,
+            "78": 745732608.0,
+            "79": 745732608.0,
+            "80": 745732608.0,
+            "81": 745732608.0,
+            "82": 745732608.0,
+            "83": 745732608.0,
+            "84": 745732608.0,
+            "85": 745732608.0,
+            "86": 745732608.0,
+            "87": 745732608.0,
+            "88": 745732608.0,
+            "89": 745732608.0,
+            "90": 745732608.0,
+            "91": 745732608.0,
+            "92": 745732608.0,
+            "93": 745732608.0,
+            "94": 745732608.0,
+            "95": 745732608.0,
+            "96": 745732608.0,
+            "97": 745732608.0,
+            "98": 745732608.0,
+            "99": 745732608.0,
+            "100": 745732608.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1940442112.0,
+            "2": 2223151104.0,
+            "3": 2223151104.0,
+            "4": 2223151104.0,
+            "5": 2223151104.0,
+            "6": 2223151104.0,
+            "7": 2223151104.0,
+            "8": 2223151104.0,
+            "9": 2223151104.0,
+            "10": 2223151104.0,
+            "11": 2223151104.0,
+            "12": 2223151104.0,
+            "13": 2223151104.0,
+            "14": 2223151104.0,
+            "15": 2223151104.0,
+            "16": 2223151104.0,
+            "17": 2223151104.0,
+            "18": 2223151104.0,
+            "19": 2223151104.0,
+            "20": 2223151104.0,
+            "21": 2223151104.0,
+            "22": 2223151104.0,
+            "23": 2223151104.0,
+            "24": 2223151104.0,
+            "25": 2223151104.0,
+            "26": 2223151104.0,
+            "27": 2223151104.0,
+            "28": 2223151104.0,
+            "29": 2223151104.0,
+            "30": 2223151104.0,
+            "31": 2223151104.0,
+            "32": 2223151104.0,
+            "33": 2223151104.0,
+            "34": 2223151104.0,
+            "35": 2223151104.0,
+            "36": 2223151104.0,
+            "37": 2223151104.0,
+            "38": 2223151104.0,
+            "39": 2223151104.0,
+            "40": 2223151104.0,
+            "41": 2223151104.0,
+            "42": 2223151104.0,
+            "43": 2223151104.0,
+            "44": 2223151104.0,
+            "45": 2223151104.0,
+            "46": 2223151104.0,
+            "47": 2223151104.0,
+            "48": 2223151104.0,
+            "49": 2223151104.0,
+            "50": 2223151104.0,
+            "51": 2223151104.0,
+            "52": 2223151104.0,
+            "53": 2223151104.0,
+            "54": 2223151104.0,
+            "55": 2223151104.0,
+            "56": 2223151104.0,
+            "57": 2223151104.0,
+            "58": 2223151104.0,
+            "59": 2223151104.0,
+            "60": 2223151104.0,
+            "61": 2223151104.0,
+            "62": 2223151104.0,
+            "63": 2223151104.0,
+            "64": 2223151104.0,
+            "65": 2223151104.0,
+            "66": 2223151104.0,
+            "67": 2223151104.0,
+            "68": 2223151104.0,
+            "69": 2223151104.0,
+            "70": 2223151104.0,
+            "71": 2223151104.0,
+            "72": 2223151104.0,
+            "73": 2223151104.0,
+            "74": 2223151104.0,
+            "75": 2223151104.0,
+            "76": 2223151104.0,
+            "77": 2223151104.0,
+            "78": 2223151104.0,
+            "79": 2223151104.0,
+            "80": 2223151104.0,
+            "81": 2223151104.0,
+            "82": 2223151104.0,
+            "83": 2223151104.0,
+            "84": 2223151104.0,
+            "85": 2223151104.0,
+            "86": 2223151104.0,
+            "87": 2223151104.0,
+            "88": 2223151104.0,
+            "89": 2223151104.0,
+            "90": 2223151104.0,
+            "91": 2223151104.0,
+            "92": 2223151104.0,
+            "93": 2223151104.0,
+            "94": 2223151104.0,
+            "95": 2223151104.0,
+            "96": 2223151104.0,
+            "97": 2223151104.0,
+            "98": 2223151104.0,
+            "99": 2223151104.0,
+            "100": 2223151104.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 9.70264,
+            "2": 0.16719,
+            "3": 0.1517,
+            "4": 0.13783,
+            "5": 0.26129,
+            "6": 0.13706,
+            "7": 0.13419,
+            "8": 0.23253,
+            "9": 0.27748,
+            "10": 0.13541,
+            "11": 0.2497,
+            "12": 0.16837,
+            "13": 0.18244,
+            "14": 0.25112,
+            "15": 0.13528,
+            "16": 0.13665,
+            "17": 0.1335,
+            "18": 0.24242,
+            "19": 0.13551,
+            "20": 0.1359,
+            "21": 0.23117,
+            "22": 0.23904,
+            "23": 0.14673,
+            "24": 0.21295,
+            "25": 0.13514,
+            "26": 0.13371,
+            "27": 0.27353,
+            "28": 0.13711,
+            "29": 0.13562,
+            "30": 0.14989,
+            "31": 0.13559,
+            "32": 0.25304,
+            "33": 0.13594,
+            "34": 0.23626,
+            "35": 0.21619,
+            "36": 0.13222,
+            "37": 0.22334,
+            "38": 0.17132,
+            "39": 0.13473,
+            "40": 0.13527,
+            "41": 0.13612,
+            "42": 0.13601,
+            "43": 0.13671,
+            "44": 0.13525,
+            "45": 0.13595,
+            "46": 0.13781,
+            "47": 0.13561,
+            "48": 0.21607,
+            "49": 0.13778,
+            "50": 0.13576,
+            "51": 0.15841,
+            "52": 0.19731,
+            "53": 0.13535,
+            "54": 0.13412,
+            "55": 0.13529,
+            "56": 0.20892,
+            "57": 0.136,
+            "58": 0.13447,
+            "59": 0.13492,
+            "60": 0.22138,
+            "61": 0.1371,
+            "62": 0.13221,
+            "63": 0.31035,
+            "64": 0.13635,
+            "65": 0.18383,
+            "66": 0.13523,
+            "67": 0.21619,
+            "68": 0.13406,
+            "69": 0.24552,
+            "70": 0.13459,
+            "71": 0.24237,
+            "72": 0.13438,
+            "73": 0.13314,
+            "74": 0.2234,
+            "75": 0.13466,
+            "76": 0.13379,
+            "77": 0.23131,
+            "78": 0.13685,
+            "79": 0.2198,
+            "80": 0.13574,
+            "81": 0.13541,
+            "82": 0.24005,
+            "83": 0.13618,
+            "84": 0.13532,
+            "85": 0.13462,
+            "86": 0.13568,
+            "87": 0.13402,
+            "88": 0.22458,
+            "89": 0.13468,
+            "90": 0.23352,
+            "91": 0.14917,
+            "92": 0.14938,
+            "93": 0.14799,
+            "94": 0.23609,
+            "95": 0.15009,
+            "96": 0.22721,
+            "97": 0.15604,
+            "98": 0.22921,
+            "99": 0.1552,
+            "100": 0.15308
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu/golden_values_dev_dgx_h100.json
index 245c396be68..42889e09b26 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu/golden_values_dev_dgx_h100.json
@@ -218,106 +218,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 745731584.0,
-            "2": 745731584.0,
-            "3": 745731584.0,
-            "4": 745731584.0,
-            "5": 745731584.0,
-            "6": 745731584.0,
-            "7": 745731584.0,
-            "8": 745731584.0,
-            "9": 745731584.0,
-            "10": 745731584.0,
-            "11": 745731584.0,
-            "12": 745731584.0,
-            "13": 745731584.0,
-            "14": 745731584.0,
-            "15": 745731584.0,
-            "16": 745731584.0,
-            "17": 745731584.0,
-            "18": 745731584.0,
-            "19": 745731584.0,
-            "20": 745731584.0,
-            "21": 745731584.0,
-            "22": 745731584.0,
-            "23": 745731584.0,
-            "24": 745731584.0,
-            "25": 745731584.0,
-            "26": 745731584.0,
-            "27": 745731584.0,
-            "28": 745731584.0,
-            "29": 745731584.0,
-            "30": 745731584.0,
-            "31": 745731584.0,
-            "32": 745731584.0,
-            "33": 745731584.0,
-            "34": 745731584.0,
-            "35": 745731584.0,
-            "36": 745731584.0,
-            "37": 745731584.0,
-            "38": 745731584.0,
-            "39": 745731584.0,
-            "40": 745731584.0,
-            "41": 745731584.0,
-            "42": 745731584.0,
-            "43": 745731584.0,
-            "44": 745731584.0,
-            "45": 745731584.0,
-            "46": 745731584.0,
-            "47": 745731584.0,
-            "48": 745731584.0,
-            "49": 745731584.0,
-            "50": 745731584.0,
-            "51": 745731584.0,
-            "52": 745731584.0,
-            "53": 745731584.0,
-            "54": 745731584.0,
-            "55": 745731584.0,
-            "56": 745731584.0,
-            "57": 745731584.0,
-            "58": 745731584.0,
-            "59": 745731584.0,
-            "60": 745731584.0,
-            "61": 745731584.0,
-            "62": 745731584.0,
-            "63": 745731584.0,
-            "64": 745731584.0,
-            "65": 745731584.0,
-            "66": 745731584.0,
-            "67": 745731584.0,
-            "68": 745731584.0,
-            "69": 745731584.0,
-            "70": 745731584.0,
-            "71": 745731584.0,
-            "72": 745731584.0,
-            "73": 745731584.0,
-            "74": 745731584.0,
-            "75": 745731584.0,
-            "76": 745731584.0,
-            "77": 745731584.0,
-            "78": 745731584.0,
-            "79": 745731584.0,
-            "80": 745731584.0,
-            "81": 745731584.0,
-            "82": 745731584.0,
-            "83": 745731584.0,
-            "84": 745731584.0,
-            "85": 745731584.0,
-            "86": 745731584.0,
-            "87": 745731584.0,
-            "88": 745731584.0,
-            "89": 745731584.0,
-            "90": 745731584.0,
-            "91": 745731584.0,
-            "92": 745731584.0,
-            "93": 745731584.0,
-            "94": 745731584.0,
-            "95": 745731584.0,
-            "96": 745731584.0,
-            "97": 745731584.0,
-            "98": 745731584.0,
-            "99": 745731584.0,
-            "100": 745731584.0
+            "1": 744815104.0,
+            "2": 744815104.0,
+            "3": 744815104.0,
+            "4": 744815104.0,
+            "5": 744815104.0,
+            "6": 744815104.0,
+            "7": 744815104.0,
+            "8": 744815104.0,
+            "9": 744815104.0,
+            "10": 744815104.0,
+            "11": 744815104.0,
+            "12": 744815104.0,
+            "13": 744815104.0,
+            "14": 744815104.0,
+            "15": 744815104.0,
+            "16": 744815104.0,
+            "17": 744815104.0,
+            "18": 744815104.0,
+            "19": 744815104.0,
+            "20": 744815104.0,
+            "21": 744815104.0,
+            "22": 744815104.0,
+            "23": 744815104.0,
+            "24": 744815104.0,
+            "25": 744815104.0,
+            "26": 744815104.0,
+            "27": 744815104.0,
+            "28": 744815104.0,
+            "29": 744815104.0,
+            "30": 744815104.0,
+            "31": 744815104.0,
+            "32": 744815104.0,
+            "33": 744815104.0,
+            "34": 744815104.0,
+            "35": 744815104.0,
+            "36": 744815104.0,
+            "37": 744815104.0,
+            "38": 744815104.0,
+            "39": 744815104.0,
+            "40": 744815104.0,
+            "41": 744815104.0,
+            "42": 744815104.0,
+            "43": 744815104.0,
+            "44": 744815104.0,
+            "45": 744815104.0,
+            "46": 744815104.0,
+            "47": 744815104.0,
+            "48": 744815104.0,
+            "49": 744815104.0,
+            "50": 744815104.0,
+            "51": 744815104.0,
+            "52": 744815104.0,
+            "53": 744815104.0,
+            "54": 744815104.0,
+            "55": 744815104.0,
+            "56": 744815104.0,
+            "57": 744815104.0,
+            "58": 744815104.0,
+            "59": 744815104.0,
+            "60": 744815104.0,
+            "61": 744815104.0,
+            "62": 744815104.0,
+            "63": 744815104.0,
+            "64": 744815104.0,
+            "65": 744815104.0,
+            "66": 744815104.0,
+            "67": 744815104.0,
+            "68": 744815104.0,
+            "69": 744815104.0,
+            "70": 744815104.0,
+            "71": 744815104.0,
+            "72": 744815104.0,
+            "73": 744815104.0,
+            "74": 744815104.0,
+            "75": 744815104.0,
+            "76": 744815104.0,
+            "77": 744815104.0,
+            "78": 744815104.0,
+            "79": 744815104.0,
+            "80": 744815104.0,
+            "81": 744815104.0,
+            "82": 744815104.0,
+            "83": 744815104.0,
+            "84": 744815104.0,
+            "85": 744815104.0,
+            "86": 744815104.0,
+            "87": 744815104.0,
+            "88": 744815104.0,
+            "89": 744815104.0,
+            "90": 744815104.0,
+            "91": 744815104.0,
+            "92": 744815104.0,
+            "93": 744815104.0,
+            "94": 744815104.0,
+            "95": 744815104.0,
+            "96": 744815104.0,
+            "97": 744815104.0,
+            "98": 744815104.0,
+            "99": 744815104.0,
+            "100": 744815104.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -325,106 +325,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 1928906752.0,
-            "2": 2210568192.0,
-            "3": 2210568192.0,
-            "4": 2210568192.0,
-            "5": 2210568192.0,
-            "6": 2210568192.0,
-            "7": 2210568192.0,
-            "8": 2210568192.0,
-            "9": 2210568192.0,
-            "10": 2210568192.0,
-            "11": 2210568192.0,
-            "12": 2210568192.0,
-            "13": 2210568192.0,
-            "14": 2210568192.0,
-            "15": 2210568192.0,
-            "16": 2210568192.0,
-            "17": 2210568192.0,
-            "18": 2210568192.0,
-            "19": 2210568192.0,
-            "20": 2210568192.0,
-            "21": 2210568192.0,
-            "22": 2210568192.0,
-            "23": 2210568192.0,
-            "24": 2210568192.0,
-            "25": 2210568192.0,
-            "26": 2210568192.0,
-            "27": 2210568192.0,
-            "28": 2210568192.0,
-            "29": 2210568192.0,
-            "30": 2210568192.0,
-            "31": 2210568192.0,
-            "32": 2210568192.0,
-            "33": 2210568192.0,
-            "34": 2210568192.0,
-            "35": 2210568192.0,
-            "36": 2210568192.0,
-            "37": 2210568192.0,
-            "38": 2210568192.0,
-            "39": 2210568192.0,
-            "40": 2210568192.0,
-            "41": 2210568192.0,
-            "42": 2210568192.0,
-            "43": 2210568192.0,
-            "44": 2210568192.0,
-            "45": 2210568192.0,
-            "46": 2210568192.0,
-            "47": 2210568192.0,
-            "48": 2210568192.0,
-            "49": 2210568192.0,
-            "50": 2210568192.0,
-            "51": 2210568192.0,
-            "52": 2210568192.0,
-            "53": 2210568192.0,
-            "54": 2210568192.0,
-            "55": 2210568192.0,
-            "56": 2210568192.0,
-            "57": 2210568192.0,
-            "58": 2210568192.0,
-            "59": 2210568192.0,
-            "60": 2210568192.0,
-            "61": 2210568192.0,
-            "62": 2210568192.0,
-            "63": 2210568192.0,
-            "64": 2210568192.0,
-            "65": 2210568192.0,
-            "66": 2210568192.0,
-            "67": 2210568192.0,
-            "68": 2210568192.0,
-            "69": 2210568192.0,
-            "70": 2210568192.0,
-            "71": 2210568192.0,
-            "72": 2210568192.0,
-            "73": 2210568192.0,
-            "74": 2210568192.0,
-            "75": 2210568192.0,
-            "76": 2210568192.0,
-            "77": 2210568192.0,
-            "78": 2210568192.0,
-            "79": 2210568192.0,
-            "80": 2210568192.0,
-            "81": 2210568192.0,
-            "82": 2210568192.0,
-            "83": 2210568192.0,
-            "84": 2210568192.0,
-            "85": 2210568192.0,
-            "86": 2210568192.0,
-            "87": 2210568192.0,
-            "88": 2210568192.0,
-            "89": 2210568192.0,
-            "90": 2210568192.0,
-            "91": 2210568192.0,
-            "92": 2210568192.0,
-            "93": 2210568192.0,
-            "94": 2210568192.0,
-            "95": 2210568192.0,
-            "96": 2210568192.0,
-            "97": 2210568192.0,
-            "98": 2210568192.0,
-            "99": 2210568192.0,
-            "100": 2210568192.0
+            "1": 1928907776.0,
+            "2": 2210305536.0,
+            "3": 2210305536.0,
+            "4": 2210305536.0,
+            "5": 2210305536.0,
+            "6": 2210305536.0,
+            "7": 2210305536.0,
+            "8": 2210305536.0,
+            "9": 2210305536.0,
+            "10": 2210305536.0,
+            "11": 2210305536.0,
+            "12": 2210305536.0,
+            "13": 2210305536.0,
+            "14": 2210305536.0,
+            "15": 2210305536.0,
+            "16": 2210305536.0,
+            "17": 2210305536.0,
+            "18": 2210305536.0,
+            "19": 2210305536.0,
+            "20": 2210305536.0,
+            "21": 2210305536.0,
+            "22": 2210305536.0,
+            "23": 2210305536.0,
+            "24": 2210305536.0,
+            "25": 2210305536.0,
+            "26": 2210305536.0,
+            "27": 2210305536.0,
+            "28": 2210305536.0,
+            "29": 2210305536.0,
+            "30": 2210305536.0,
+            "31": 2210305536.0,
+            "32": 2210305536.0,
+            "33": 2210305536.0,
+            "34": 2210305536.0,
+            "35": 2210305536.0,
+            "36": 2210305536.0,
+            "37": 2210305536.0,
+            "38": 2210305536.0,
+            "39": 2210305536.0,
+            "40": 2210305536.0,
+            "41": 2210305536.0,
+            "42": 2210305536.0,
+            "43": 2210305536.0,
+            "44": 2210305536.0,
+            "45": 2210305536.0,
+            "46": 2210305536.0,
+            "47": 2210305536.0,
+            "48": 2210305536.0,
+            "49": 2210305536.0,
+            "50": 2210305536.0,
+            "51": 2210305536.0,
+            "52": 2210305536.0,
+            "53": 2210305536.0,
+            "54": 2210305536.0,
+            "55": 2210305536.0,
+            "56": 2210305536.0,
+            "57": 2210305536.0,
+            "58": 2210305536.0,
+            "59": 2210305536.0,
+            "60": 2210305536.0,
+            "61": 2210305536.0,
+            "62": 2210305536.0,
+            "63": 2210305536.0,
+            "64": 2210305536.0,
+            "65": 2210305536.0,
+            "66": 2210305536.0,
+            "67": 2210305536.0,
+            "68": 2210305536.0,
+            "69": 2210305536.0,
+            "70": 2210305536.0,
+            "71": 2210305536.0,
+            "72": 2210305536.0,
+            "73": 2210305536.0,
+            "74": 2210305536.0,
+            "75": 2210305536.0,
+            "76": 2210305536.0,
+            "77": 2210305536.0,
+            "78": 2210305536.0,
+            "79": 2210305536.0,
+            "80": 2210305536.0,
+            "81": 2210305536.0,
+            "82": 2210305536.0,
+            "83": 2210305536.0,
+            "84": 2210305536.0,
+            "85": 2210305536.0,
+            "86": 2210305536.0,
+            "87": 2210305536.0,
+            "88": 2210305536.0,
+            "89": 2210305536.0,
+            "90": 2210305536.0,
+            "91": 2210305536.0,
+            "92": 2210305536.0,
+            "93": 2210305536.0,
+            "94": 2210305536.0,
+            "95": 2210305536.0,
+            "96": 2210305536.0,
+            "97": 2210305536.0,
+            "98": 2210305536.0,
+            "99": 2210305536.0,
+            "100": 2210305536.0
         }
     },
     "iteration-time": {
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 15.33061,
-            "2": 0.15156,
-            "3": 0.12174,
-            "4": 0.12197,
-            "5": 0.12023,
-            "6": 0.11997,
-            "7": 0.11882,
-            "8": 0.11859,
-            "9": 0.11967,
-            "10": 0.11724,
-            "11": 0.11735,
-            "12": 0.11593,
-            "13": 0.11661,
-            "14": 0.11794,
-            "15": 0.11649,
-            "16": 0.11682,
-            "17": 0.11623,
-            "18": 0.11719,
-            "19": 0.11753,
-            "20": 0.11581,
-            "21": 0.11757,
-            "22": 0.11628,
-            "23": 0.11692,
-            "24": 0.1163,
-            "25": 0.1167,
-            "26": 0.11646,
-            "27": 0.11803,
-            "28": 0.11984,
-            "29": 0.11941,
-            "30": 0.11857,
-            "31": 0.11687,
-            "32": 0.11515,
-            "33": 0.11754,
-            "34": 0.11591,
-            "35": 0.11819,
-            "36": 0.11754,
-            "37": 0.11694,
-            "38": 0.11726,
-            "39": 0.11761,
-            "40": 0.11745,
-            "41": 0.11768,
-            "42": 0.11775,
-            "43": 0.11661,
-            "44": 0.11724,
-            "45": 0.1189,
-            "46": 0.11964,
-            "47": 0.11985,
-            "48": 0.12086,
-            "49": 0.11855,
-            "50": 0.11941,
-            "51": 0.13155,
-            "52": 0.12627,
-            "53": 0.12132,
-            "54": 0.12027,
-            "55": 0.12076,
-            "56": 0.14178,
-            "57": 0.12294,
-            "58": 0.12155,
-            "59": 0.11843,
-            "60": 0.11687,
-            "61": 0.11827,
-            "62": 0.11957,
-            "63": 0.11945,
-            "64": 0.11781,
-            "65": 0.12041,
-            "66": 0.11949,
-            "67": 0.12059,
-            "68": 0.11821,
-            "69": 0.11858,
-            "70": 0.11799,
-            "71": 0.12009,
-            "72": 0.12095,
-            "73": 0.11845,
-            "74": 0.11834,
-            "75": 0.11893,
-            "76": 0.1214,
-            "77": 0.1195,
-            "78": 0.11933,
-            "79": 0.11885,
-            "80": 0.11948,
-            "81": 0.12097,
-            "82": 0.12,
-            "83": 0.11954,
-            "84": 0.11693,
-            "85": 0.1175,
-            "86": 0.11941,
-            "87": 0.11723,
-            "88": 0.11941,
-            "89": 0.11804,
-            "90": 0.11751,
-            "91": 0.11952,
-            "92": 0.11778,
-            "93": 0.11924,
-            "94": 0.11755,
-            "95": 0.11789,
-            "96": 0.11673,
-            "97": 0.11967,
-            "98": 0.11752,
-            "99": 0.11926,
-            "100": 0.11806
+            "1": 38.50475,
+            "2": 0.14031,
+            "3": 0.11652,
+            "4": 0.09549,
+            "5": 0.09354,
+            "6": 0.09569,
+            "7": 0.09409,
+            "8": 0.09473,
+            "9": 0.09388,
+            "10": 0.09459,
+            "11": 0.09596,
+            "12": 0.09466,
+            "13": 0.09509,
+            "14": 0.09586,
+            "15": 0.09314,
+            "16": 0.09368,
+            "17": 0.09468,
+            "18": 0.09494,
+            "19": 0.09289,
+            "20": 0.09427,
+            "21": 0.09599,
+            "22": 0.09701,
+            "23": 0.09665,
+            "24": 0.09712,
+            "25": 0.09542,
+            "26": 0.09515,
+            "27": 0.09642,
+            "28": 0.09519,
+            "29": 0.09691,
+            "30": 0.09651,
+            "31": 0.09742,
+            "32": 0.09503,
+            "33": 0.09471,
+            "34": 0.09424,
+            "35": 0.09574,
+            "36": 0.09438,
+            "37": 0.09509,
+            "38": 0.09428,
+            "39": 0.09484,
+            "40": 0.09459,
+            "41": 0.0951,
+            "42": 0.09671,
+            "43": 0.09633,
+            "44": 0.09511,
+            "45": 0.09592,
+            "46": 0.09579,
+            "47": 0.09614,
+            "48": 0.09464,
+            "49": 0.0958,
+            "50": 0.09782,
+            "51": 0.10564,
+            "52": 0.09373,
+            "53": 0.09475,
+            "54": 0.09323,
+            "55": 0.09237,
+            "56": 0.09293,
+            "57": 0.09228,
+            "58": 0.0948,
+            "59": 0.09906,
+            "60": 0.10026,
+            "61": 0.09961,
+            "62": 0.09923,
+            "63": 0.09889,
+            "64": 0.09888,
+            "65": 0.09925,
+            "66": 0.1,
+            "67": 0.09782,
+            "68": 0.09891,
+            "69": 0.09132,
+            "70": 0.09102,
+            "71": 0.091,
+            "72": 0.09368,
+            "73": 0.09219,
+            "74": 0.09374,
+            "75": 0.09232,
+            "76": 0.09428,
+            "77": 0.09256,
+            "78": 0.09623,
+            "79": 0.09624,
+            "80": 0.09622,
+            "81": 0.09668,
+            "82": 0.09651,
+            "83": 0.10042,
+            "84": 0.09998,
+            "85": 0.10102,
+            "86": 0.09975,
+            "87": 0.09955,
+            "88": 0.10135,
+            "89": 0.10038,
+            "90": 0.09933,
+            "91": 0.10071,
+            "92": 0.09992,
+            "93": 0.10054,
+            "94": 0.09927,
+            "95": 0.0998,
+            "96": 0.101,
+            "97": 0.09268,
+            "98": 0.09188,
+            "99": 0.09185,
+            "100": 0.09107
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu/golden_values_dev_dgx_h100_2nd.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu/golden_values_dev_dgx_h100_2nd.json
new file mode 100644
index 00000000000..eca47cac99b
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu/golden_values_dev_dgx_h100_2nd.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 10.00084,
+            "52": 9.89672,
+            "53": 10.19876,
+            "54": 10.09066,
+            "55": 10.00567,
+            "56": 9.77199,
+            "57": 9.64533,
+            "58": 9.98587,
+            "59": 9.72608,
+            "60": 9.6777,
+            "61": 9.8157,
+            "62": 10.092,
+            "63": 9.54758,
+            "64": 9.90438,
+            "65": 9.09492,
+            "66": 9.84068,
+            "67": 9.48471,
+            "68": 9.88996,
+            "69": 9.87691,
+            "70": 9.85294,
+            "71": 9.73278,
+            "72": 9.72558,
+            "73": 9.63706,
+            "74": 9.12334,
+            "75": 9.55335,
+            "76": 9.21765,
+            "77": 10.15202,
+            "78": 9.81465,
+            "79": 9.47558,
+            "80": 9.52073,
+            "81": 9.5872,
+            "82": 9.79125,
+            "83": 9.44848,
+            "84": 9.49585,
+            "85": 9.72189,
+            "86": 9.18037,
+            "87": 9.66127,
+            "88": 9.84359,
+            "89": 9.71651,
+            "90": 9.88102,
+            "91": 9.48434,
+            "92": 9.4705,
+            "93": 9.20911,
+            "94": 8.95382,
+            "95": 9.60554,
+            "96": 9.63976,
+            "97": 9.38762,
+            "98": 9.7573,
+            "99": 9.0159,
+            "100": 9.49925
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 2575.0,
+            "52": 2621.0,
+            "53": 2891.0,
+            "54": 2655.0,
+            "55": 2559.0,
+            "56": 2566.0,
+            "57": 2471.0,
+            "58": 2767.0,
+            "59": 2529.0,
+            "60": 2289.0,
+            "61": 2642.0,
+            "62": 2820.0,
+            "63": 2654.0,
+            "64": 3020.0,
+            "65": 2687.0,
+            "66": 2884.0,
+            "67": 2666.0,
+            "68": 2720.0,
+            "69": 2738.0,
+            "70": 3004.0,
+            "71": 2816.0,
+            "72": 2537.0,
+            "73": 2826.0,
+            "74": 2192.0,
+            "75": 2647.0,
+            "76": 3048.0,
+            "77": 3019.0,
+            "78": 3134.0,
+            "79": 3092.0,
+            "80": 3054.0,
+            "81": 3298.0,
+            "82": 3350.0,
+            "83": 2597.0,
+            "84": 3436.0,
+            "85": 3350.0,
+            "86": 2993.0,
+            "87": 3509.0,
+            "88": 3403.0,
+            "89": 3490.0,
+            "90": 3368.0,
+            "91": 2461.0,
+            "92": 2803.0,
+            "93": 2933.0,
+            "94": 2888.0,
+            "95": 3138.0,
+            "96": 3047.0,
+            "97": 3016.0,
+            "98": 3382.0,
+            "99": 2995.0,
+            "100": 2490.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 745929216.0,
+            "52": 745929216.0,
+            "53": 745929216.0,
+            "54": 745929216.0,
+            "55": 745929216.0,
+            "56": 745929216.0,
+            "57": 745929216.0,
+            "58": 745929216.0,
+            "59": 745929216.0,
+            "60": 745929216.0,
+            "61": 745929216.0,
+            "62": 745929216.0,
+            "63": 745929216.0,
+            "64": 745929216.0,
+            "65": 745929216.0,
+            "66": 745929216.0,
+            "67": 745929216.0,
+            "68": 745929216.0,
+            "69": 745929216.0,
+            "70": 745929216.0,
+            "71": 745929216.0,
+            "72": 745929216.0,
+            "73": 745929216.0,
+            "74": 745929216.0,
+            "75": 745929216.0,
+            "76": 745929216.0,
+            "77": 745929216.0,
+            "78": 745929216.0,
+            "79": 745929216.0,
+            "80": 745929216.0,
+            "81": 745929216.0,
+            "82": 745929216.0,
+            "83": 745929216.0,
+            "84": 745929216.0,
+            "85": 745929216.0,
+            "86": 745929216.0,
+            "87": 745929216.0,
+            "88": 745929216.0,
+            "89": 745929216.0,
+            "90": 745929216.0,
+            "91": 745929216.0,
+            "92": 745929216.0,
+            "93": 745929216.0,
+            "94": 745929216.0,
+            "95": 745929216.0,
+            "96": 745929216.0,
+            "97": 745929216.0,
+            "98": 745929216.0,
+            "99": 745929216.0,
+            "100": 745929216.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 2209847296.0,
+            "52": 2209848320.0,
+            "53": 2209848320.0,
+            "54": 2209848320.0,
+            "55": 2209848320.0,
+            "56": 2209848320.0,
+            "57": 2209848320.0,
+            "58": 2209848320.0,
+            "59": 2209848320.0,
+            "60": 2209848320.0,
+            "61": 2209848320.0,
+            "62": 2209848320.0,
+            "63": 2209848320.0,
+            "64": 2209848320.0,
+            "65": 2209848320.0,
+            "66": 2209848320.0,
+            "67": 2209848320.0,
+            "68": 2209848320.0,
+            "69": 2209848320.0,
+            "70": 2209848320.0,
+            "71": 2209848320.0,
+            "72": 2209848320.0,
+            "73": 2209848320.0,
+            "74": 2209848320.0,
+            "75": 2209848320.0,
+            "76": 2209848320.0,
+            "77": 2209848320.0,
+            "78": 2209848320.0,
+            "79": 2209848320.0,
+            "80": 2209848320.0,
+            "81": 2209848320.0,
+            "82": 2209848320.0,
+            "83": 2209848320.0,
+            "84": 2209848320.0,
+            "85": 2209848320.0,
+            "86": 2209848320.0,
+            "87": 2209848320.0,
+            "88": 2209848320.0,
+            "89": 2209848320.0,
+            "90": 2209848320.0,
+            "91": 2209848320.0,
+            "92": 2209848320.0,
+            "93": 2209848320.0,
+            "94": 2209848320.0,
+            "95": 2209848320.0,
+            "96": 2209848320.0,
+            "97": 2209848320.0,
+            "98": 2209848320.0,
+            "99": 2209848320.0,
+            "100": 2209848320.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 37.2947,
+            "52": 0.14072,
+            "53": 0.09482,
+            "54": 0.09404,
+            "55": 0.09449,
+            "56": 0.09381,
+            "57": 0.09346,
+            "58": 0.09378,
+            "59": 0.095,
+            "60": 0.09392,
+            "61": 0.09499,
+            "62": 0.09499,
+            "63": 0.09735,
+            "64": 0.10206,
+            "65": 0.09653,
+            "66": 0.09566,
+            "67": 0.09553,
+            "68": 0.09405,
+            "69": 0.09463,
+            "70": 0.09396,
+            "71": 0.09424,
+            "72": 0.0967,
+            "73": 0.09895,
+            "74": 0.09633,
+            "75": 0.0965,
+            "76": 0.09665,
+            "77": 0.10127,
+            "78": 0.10066,
+            "79": 0.10529,
+            "80": 0.10669,
+            "81": 0.10018,
+            "82": 0.09658,
+            "83": 0.09504,
+            "84": 0.0941,
+            "85": 0.09377,
+            "86": 0.09642,
+            "87": 0.09327,
+            "88": 0.09416,
+            "89": 0.09453,
+            "90": 0.09434,
+            "91": 0.09472,
+            "92": 0.09416,
+            "93": 0.09427,
+            "94": 0.09459,
+            "95": 0.09437,
+            "96": 0.09352,
+            "97": 0.09986,
+            "98": 0.09365,
+            "99": 0.09441,
+            "100": 0.094
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_dev_dgx_gb200.json
new file mode 100644
index 00000000000..87eebe31670
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_dev_dgx_gb200.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.97513,
+            "2": 10.97995,
+            "3": 10.98066,
+            "4": 10.99791,
+            "5": 10.96412,
+            "6": 10.95966,
+            "7": 10.97622,
+            "8": 10.97531,
+            "9": 10.97506,
+            "10": 10.97665,
+            "11": 10.92846,
+            "12": 10.9494,
+            "13": 10.94009,
+            "14": 10.93747,
+            "15": 10.92917,
+            "16": 10.91904,
+            "17": 10.90495,
+            "18": 10.89425,
+            "19": 10.89215,
+            "20": 10.81808,
+            "21": 10.7816,
+            "22": 10.70813,
+            "23": 10.7819,
+            "24": 10.69774,
+            "25": 10.66245,
+            "26": 10.69992,
+            "27": 10.68419,
+            "28": 10.62061,
+            "29": 10.62277,
+            "30": 10.45367,
+            "31": 10.24899,
+            "32": 10.52222,
+            "33": 10.51211,
+            "34": 10.30154,
+            "35": 10.34384,
+            "36": 10.30677,
+            "37": 10.38891,
+            "38": 10.24857,
+            "39": 10.44177,
+            "40": 10.16246,
+            "41": 10.20434,
+            "42": 10.26319,
+            "43": 9.9082,
+            "44": 10.01995,
+            "45": 9.91152,
+            "46": 9.886,
+            "47": 10.18408,
+            "48": 9.9033,
+            "49": 9.59959,
+            "50": 9.96198,
+            "51": 9.90259,
+            "52": 9.79281,
+            "53": 10.11536,
+            "54": 9.99216,
+            "55": 9.91665,
+            "56": 9.66015,
+            "57": 9.52038,
+            "58": 9.87094,
+            "59": 9.6209,
+            "60": 9.54952,
+            "61": 9.70012,
+            "62": 10.00629,
+            "63": 9.42168,
+            "64": 9.79893,
+            "65": 8.97548,
+            "66": 9.73165,
+            "67": 9.38933,
+            "68": 9.80066,
+            "69": 9.81152,
+            "70": 9.76761,
+            "71": 9.63356,
+            "72": 9.59892,
+            "73": 9.51708,
+            "74": 8.96512,
+            "75": 9.43589,
+            "76": 9.11207,
+            "77": 10.06881,
+            "78": 9.72515,
+            "79": 9.39985,
+            "80": 9.41154,
+            "81": 9.50094,
+            "82": 9.69861,
+            "83": 9.33578,
+            "84": 9.4341,
+            "85": 9.63907,
+            "86": 9.06166,
+            "87": 9.60563,
+            "88": 9.77626,
+            "89": 9.6243,
+            "90": 9.82766,
+            "91": 9.35869,
+            "92": 9.38066,
+            "93": 9.09681,
+            "94": 8.83995,
+            "95": 9.52751,
+            "96": 9.53562,
+            "97": 9.32689,
+            "98": 9.69354,
+            "99": 8.88933,
+            "100": 9.42104
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 22726972.0,
+            "2": 22924386.0,
+            "3": 22597036.0,
+            "4": 23219218.0,
+            "5": 22714492.0,
+            "6": 23021698.0,
+            "7": 22771376.0,
+            "8": 22926820.0,
+            "9": 22841276.0,
+            "10": 22918392.0,
+            "11": 22500620.0,
+            "12": 22459672.0,
+            "13": 22917468.0,
+            "14": 22388398.0,
+            "15": 22822252.0,
+            "16": 22830612.0,
+            "17": 22820228.0,
+            "18": 22582844.0,
+            "19": 22618412.0,
+            "20": 22693594.0,
+            "21": 22739320.0,
+            "22": 22800076.0,
+            "23": 22539112.0,
+            "24": 22770966.0,
+            "25": 22819404.0,
+            "26": 22548188.0,
+            "27": 22468652.0,
+            "28": 22453560.0,
+            "29": 22530344.0,
+            "30": 22630776.0,
+            "31": 22955664.0,
+            "32": 22585020.0,
+            "33": 22558760.0,
+            "34": 22835536.0,
+            "35": 22787790.0,
+            "36": 22589526.0,
+            "37": 22497640.0,
+            "38": 22896056.0,
+            "39": 22802282.0,
+            "40": 22657698.0,
+            "41": 22659592.0,
+            "42": 22666980.0,
+            "43": 22976392.0,
+            "44": 22747128.0,
+            "45": 22674364.0,
+            "46": 22883920.0,
+            "47": 22634300.0,
+            "48": 22928164.0,
+            "49": 22728710.0,
+            "50": 22904340.0,
+            "51": 22791436.0,
+            "52": 22748292.0,
+            "53": 22924772.0,
+            "54": 22840284.0,
+            "55": 22517880.0,
+            "56": 22877730.0,
+            "57": 23113080.0,
+            "58": 22845568.0,
+            "59": 22716022.0,
+            "60": 22743056.0,
+            "61": 22724434.0,
+            "62": 22672316.0,
+            "63": 22846416.0,
+            "64": 22823178.0,
+            "65": 23061654.0,
+            "66": 22729712.0,
+            "67": 22908434.0,
+            "68": 22610444.0,
+            "69": 22584604.0,
+            "70": 22828526.0,
+            "71": 22748442.0,
+            "72": 22655052.0,
+            "73": 22740588.0,
+            "74": 23048316.0,
+            "75": 23054664.0,
+            "76": 22901072.0,
+            "77": 22272198.0,
+            "78": 22789244.0,
+            "79": 22743700.0,
+            "80": 22706576.0,
+            "81": 22890704.0,
+            "82": 22778282.0,
+            "83": 22840256.0,
+            "84": 23010368.0,
+            "85": 22711796.0,
+            "86": 23103236.0,
+            "87": 22735120.0,
+            "88": 22636998.0,
+            "89": 22498612.0,
+            "90": 22972652.0,
+            "91": 22767776.0,
+            "92": 22809424.0,
+            "93": 22658980.0,
+            "94": 22911920.0,
+            "95": 23047890.0,
+            "96": 22828804.0,
+            "97": 22608196.0,
+            "98": 22762820.0,
+            "99": 22906714.0,
+            "100": 23016048.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 748541440.0,
+            "2": 748541440.0,
+            "3": 748541440.0,
+            "4": 748541440.0,
+            "5": 748541440.0,
+            "6": 748541440.0,
+            "7": 748541440.0,
+            "8": 748541440.0,
+            "9": 748541440.0,
+            "10": 748541440.0,
+            "11": 748541440.0,
+            "12": 748541440.0,
+            "13": 748541440.0,
+            "14": 748541440.0,
+            "15": 748541440.0,
+            "16": 748541440.0,
+            "17": 748541440.0,
+            "18": 748541440.0,
+            "19": 748541440.0,
+            "20": 748541440.0,
+            "21": 748541440.0,
+            "22": 748541440.0,
+            "23": 748541440.0,
+            "24": 748541440.0,
+            "25": 748541440.0,
+            "26": 748541440.0,
+            "27": 748541440.0,
+            "28": 748541440.0,
+            "29": 748541440.0,
+            "30": 748541440.0,
+            "31": 748541440.0,
+            "32": 748541440.0,
+            "33": 748541440.0,
+            "34": 748541440.0,
+            "35": 748541440.0,
+            "36": 748541440.0,
+            "37": 748541440.0,
+            "38": 748541440.0,
+            "39": 748541440.0,
+            "40": 748541440.0,
+            "41": 748541440.0,
+            "42": 748541440.0,
+            "43": 748541440.0,
+            "44": 748541440.0,
+            "45": 748541440.0,
+            "46": 748541440.0,
+            "47": 748541440.0,
+            "48": 748541440.0,
+            "49": 748541440.0,
+            "50": 748541440.0,
+            "51": 748541440.0,
+            "52": 748541440.0,
+            "53": 748541440.0,
+            "54": 748541440.0,
+            "55": 748541440.0,
+            "56": 748541440.0,
+            "57": 748541440.0,
+            "58": 748541440.0,
+            "59": 748541440.0,
+            "60": 748541440.0,
+            "61": 748541440.0,
+            "62": 748541440.0,
+            "63": 748541440.0,
+            "64": 748541440.0,
+            "65": 748541440.0,
+            "66": 748541440.0,
+            "67": 748541440.0,
+            "68": 748541440.0,
+            "69": 748541440.0,
+            "70": 748541440.0,
+            "71": 748541440.0,
+            "72": 748541440.0,
+            "73": 748541440.0,
+            "74": 748541440.0,
+            "75": 748541440.0,
+            "76": 748541440.0,
+            "77": 748541440.0,
+            "78": 748541440.0,
+            "79": 748541440.0,
+            "80": 748541440.0,
+            "81": 748541440.0,
+            "82": 748541440.0,
+            "83": 748541440.0,
+            "84": 748541440.0,
+            "85": 748541440.0,
+            "86": 748541440.0,
+            "87": 748541440.0,
+            "88": 748541440.0,
+            "89": 748541440.0,
+            "90": 748541440.0,
+            "91": 748541440.0,
+            "92": 748541440.0,
+            "93": 748541440.0,
+            "94": 748541440.0,
+            "95": 748541440.0,
+            "96": 748541440.0,
+            "97": 748541440.0,
+            "98": 748541440.0,
+            "99": 748541440.0,
+            "100": 748541440.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1939923968.0,
+            "2": 2224781312.0,
+            "3": 2224781312.0,
+            "4": 2224781312.0,
+            "5": 2224781312.0,
+            "6": 2224781312.0,
+            "7": 2224781312.0,
+            "8": 2224781312.0,
+            "9": 2224781312.0,
+            "10": 2224781312.0,
+            "11": 2224781312.0,
+            "12": 2224781312.0,
+            "13": 2224781312.0,
+            "14": 2224781312.0,
+            "15": 2224781312.0,
+            "16": 2224781312.0,
+            "17": 2224781312.0,
+            "18": 2224781312.0,
+            "19": 2224781312.0,
+            "20": 2224781312.0,
+            "21": 2224781312.0,
+            "22": 2224781312.0,
+            "23": 2224781312.0,
+            "24": 2224781312.0,
+            "25": 2224781312.0,
+            "26": 2224781312.0,
+            "27": 2224781312.0,
+            "28": 2224781312.0,
+            "29": 2224781312.0,
+            "30": 2224781312.0,
+            "31": 2224781312.0,
+            "32": 2224781312.0,
+            "33": 2224781312.0,
+            "34": 2224781312.0,
+            "35": 2224781312.0,
+            "36": 2224781312.0,
+            "37": 2224781312.0,
+            "38": 2224781312.0,
+            "39": 2224781312.0,
+            "40": 2224781312.0,
+            "41": 2224781312.0,
+            "42": 2224781312.0,
+            "43": 2224781312.0,
+            "44": 2224781312.0,
+            "45": 2224781312.0,
+            "46": 2224781312.0,
+            "47": 2224781312.0,
+            "48": 2224781312.0,
+            "49": 2224781312.0,
+            "50": 2224781312.0,
+            "51": 2224781312.0,
+            "52": 2224781312.0,
+            "53": 2224781312.0,
+            "54": 2224781312.0,
+            "55": 2224781312.0,
+            "56": 2224781312.0,
+            "57": 2224781312.0,
+            "58": 2224781312.0,
+            "59": 2224781312.0,
+            "60": 2224781312.0,
+            "61": 2224781312.0,
+            "62": 2224781312.0,
+            "63": 2224781312.0,
+            "64": 2224781312.0,
+            "65": 2224781312.0,
+            "66": 2224781312.0,
+            "67": 2224781312.0,
+            "68": 2224781312.0,
+            "69": 2224781312.0,
+            "70": 2224781312.0,
+            "71": 2224781312.0,
+            "72": 2224781312.0,
+            "73": 2224781312.0,
+            "74": 2224781312.0,
+            "75": 2224781312.0,
+            "76": 2224781312.0,
+            "77": 2224781312.0,
+            "78": 2224781312.0,
+            "79": 2224781312.0,
+            "80": 2224781312.0,
+            "81": 2224781312.0,
+            "82": 2224781312.0,
+            "83": 2224781312.0,
+            "84": 2224781312.0,
+            "85": 2224781312.0,
+            "86": 2224781312.0,
+            "87": 2224781312.0,
+            "88": 2224781312.0,
+            "89": 2224781312.0,
+            "90": 2224781312.0,
+            "91": 2224781312.0,
+            "92": 2224781312.0,
+            "93": 2224781312.0,
+            "94": 2224781312.0,
+            "95": 2224781312.0,
+            "96": 2224781312.0,
+            "97": 2224781312.0,
+            "98": 2224781312.0,
+            "99": 2224781312.0,
+            "100": 2224781312.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 8.41422,
+            "2": 0.18257,
+            "3": 0.66774,
+            "4": 0.24561,
+            "5": 0.26628,
+            "6": 0.28507,
+            "7": 0.15561,
+            "8": 0.31346,
+            "9": 0.1544,
+            "10": 0.23888,
+            "11": 0.24945,
+            "12": 0.15494,
+            "13": 0.20224,
+            "14": 0.15018,
+            "15": 0.21414,
+            "16": 0.15279,
+            "17": 0.15714,
+            "18": 0.16051,
+            "19": 0.23287,
+            "20": 0.17277,
+            "21": 0.18416,
+            "22": 0.18504,
+            "23": 0.22706,
+            "24": 0.17428,
+            "25": 0.15714,
+            "26": 0.24051,
+            "27": 0.16163,
+            "28": 0.15307,
+            "29": 0.15547,
+            "30": 0.15066,
+            "31": 0.18968,
+            "32": 0.20133,
+            "33": 0.15407,
+            "34": 0.15375,
+            "35": 0.22411,
+            "36": 0.1654,
+            "37": 0.23902,
+            "38": 0.15259,
+            "39": 0.15371,
+            "40": 0.15185,
+            "41": 0.21089,
+            "42": 0.15272,
+            "43": 0.21496,
+            "44": 0.15539,
+            "45": 0.15507,
+            "46": 0.1557,
+            "47": 0.15641,
+            "48": 0.15434,
+            "49": 0.15017,
+            "50": 0.23326,
+            "51": 0.17863,
+            "52": 0.15471,
+            "53": 0.1511,
+            "54": 0.1513,
+            "55": 0.14791,
+            "56": 0.23169,
+            "57": 0.15152,
+            "58": 0.27611,
+            "59": 0.15101,
+            "60": 0.15075,
+            "61": 0.15095,
+            "62": 0.15099,
+            "63": 0.40681,
+            "64": 0.15196,
+            "65": 0.4085,
+            "66": 0.15392,
+            "67": 0.15079,
+            "68": 0.18374,
+            "69": 0.16595,
+            "70": 0.17343,
+            "71": 0.2083,
+            "72": 0.23324,
+            "73": 0.17579,
+            "74": 0.2442,
+            "75": 0.15263,
+            "76": 0.15001,
+            "77": 0.14836,
+            "78": 0.22649,
+            "79": 0.15368,
+            "80": 0.15125,
+            "81": 0.15382,
+            "82": 0.15532,
+            "83": 0.1536,
+            "84": 0.15494,
+            "85": 0.1516,
+            "86": 0.2253,
+            "87": 0.1656,
+            "88": 0.16481,
+            "89": 0.16686,
+            "90": 0.19956,
+            "91": 0.15647,
+            "92": 0.15231,
+            "93": 0.15013,
+            "94": 0.22716,
+            "95": 0.15151,
+            "96": 0.15158,
+            "97": 0.21549,
+            "98": 0.15054,
+            "99": 0.16863,
+            "100": 0.15247
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_dev_dgx_h100.json
index d3d593b49c2..4943a180a1f 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_dev_dgx_h100.json
@@ -218,106 +218,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 746443264.0,
-            "2": 746443264.0,
-            "3": 746443264.0,
-            "4": 746443264.0,
-            "5": 746443264.0,
-            "6": 746443264.0,
-            "7": 746443264.0,
-            "8": 746443264.0,
-            "9": 746443264.0,
-            "10": 746443264.0,
-            "11": 746443264.0,
-            "12": 746443264.0,
-            "13": 746443264.0,
-            "14": 746443264.0,
-            "15": 746443264.0,
-            "16": 746443264.0,
-            "17": 746443264.0,
-            "18": 746443264.0,
-            "19": 746443264.0,
-            "20": 746443264.0,
-            "21": 746443264.0,
-            "22": 746443264.0,
-            "23": 746443264.0,
-            "24": 746443264.0,
-            "25": 746443264.0,
-            "26": 746443264.0,
-            "27": 746443264.0,
-            "28": 746443264.0,
-            "29": 746443264.0,
-            "30": 746443264.0,
-            "31": 746443264.0,
-            "32": 746443264.0,
-            "33": 746443264.0,
-            "34": 746443264.0,
-            "35": 746443264.0,
-            "36": 746443264.0,
-            "37": 746443264.0,
-            "38": 746443264.0,
-            "39": 746443264.0,
-            "40": 746443264.0,
-            "41": 746443264.0,
-            "42": 746443264.0,
-            "43": 746443264.0,
-            "44": 746443264.0,
-            "45": 746443264.0,
-            "46": 746443264.0,
-            "47": 746443264.0,
-            "48": 746443264.0,
-            "49": 746443264.0,
-            "50": 746443264.0,
-            "51": 746443264.0,
-            "52": 746443264.0,
-            "53": 746443264.0,
-            "54": 746443264.0,
-            "55": 746443264.0,
-            "56": 746443264.0,
-            "57": 746443264.0,
-            "58": 746443264.0,
-            "59": 746443264.0,
-            "60": 746443264.0,
-            "61": 746443264.0,
-            "62": 746443264.0,
-            "63": 746443264.0,
-            "64": 746443264.0,
-            "65": 746443264.0,
-            "66": 746443264.0,
-            "67": 746443264.0,
-            "68": 746443264.0,
-            "69": 746443264.0,
-            "70": 746443264.0,
-            "71": 746443264.0,
-            "72": 746443264.0,
-            "73": 746443264.0,
-            "74": 746443264.0,
-            "75": 746443264.0,
-            "76": 746443264.0,
-            "77": 746443264.0,
-            "78": 746443264.0,
-            "79": 746443264.0,
-            "80": 746443264.0,
-            "81": 746443264.0,
-            "82": 746443264.0,
-            "83": 746443264.0,
-            "84": 746443264.0,
-            "85": 746443264.0,
-            "86": 746443264.0,
-            "87": 746443264.0,
-            "88": 746443264.0,
-            "89": 746443264.0,
-            "90": 746443264.0,
-            "91": 746443264.0,
-            "92": 746443264.0,
-            "93": 746443264.0,
-            "94": 746443264.0,
-            "95": 746443264.0,
-            "96": 746443264.0,
-            "97": 746443264.0,
-            "98": 746443264.0,
-            "99": 746443264.0,
-            "100": 746443264.0
+            "1": 747492864.0,
+            "2": 747492864.0,
+            "3": 747492864.0,
+            "4": 747492864.0,
+            "5": 747492864.0,
+            "6": 747492864.0,
+            "7": 747492864.0,
+            "8": 747492864.0,
+            "9": 747492864.0,
+            "10": 747492864.0,
+            "11": 747492864.0,
+            "12": 747492864.0,
+            "13": 747492864.0,
+            "14": 747492864.0,
+            "15": 747492864.0,
+            "16": 747492864.0,
+            "17": 747492864.0,
+            "18": 747492864.0,
+            "19": 747492864.0,
+            "20": 747492864.0,
+            "21": 747492864.0,
+            "22": 747492864.0,
+            "23": 747492864.0,
+            "24": 747492864.0,
+            "25": 747492864.0,
+            "26": 747492864.0,
+            "27": 747492864.0,
+            "28": 747492864.0,
+            "29": 747492864.0,
+            "30": 747492864.0,
+            "31": 747492864.0,
+            "32": 747492864.0,
+            "33": 747492864.0,
+            "34": 747492864.0,
+            "35": 747492864.0,
+            "36": 747492864.0,
+            "37": 747492864.0,
+            "38": 747492864.0,
+            "39": 747492864.0,
+            "40": 747492864.0,
+            "41": 747492864.0,
+            "42": 747492864.0,
+            "43": 747492864.0,
+            "44": 747492864.0,
+            "45": 747492864.0,
+            "46": 747492864.0,
+            "47": 747492864.0,
+            "48": 747492864.0,
+            "49": 747492864.0,
+            "50": 747492864.0,
+            "51": 747492864.0,
+            "52": 747492864.0,
+            "53": 747492864.0,
+            "54": 747492864.0,
+            "55": 747492864.0,
+            "56": 747492864.0,
+            "57": 747492864.0,
+            "58": 747492864.0,
+            "59": 747492864.0,
+            "60": 747492864.0,
+            "61": 747492864.0,
+            "62": 747492864.0,
+            "63": 747492864.0,
+            "64": 747492864.0,
+            "65": 747492864.0,
+            "66": 747492864.0,
+            "67": 747492864.0,
+            "68": 747492864.0,
+            "69": 747492864.0,
+            "70": 747492864.0,
+            "71": 747492864.0,
+            "72": 747492864.0,
+            "73": 747492864.0,
+            "74": 747492864.0,
+            "75": 747492864.0,
+            "76": 747492864.0,
+            "77": 747492864.0,
+            "78": 747492864.0,
+            "79": 747492864.0,
+            "80": 747492864.0,
+            "81": 747492864.0,
+            "82": 747492864.0,
+            "83": 747492864.0,
+            "84": 747492864.0,
+            "85": 747492864.0,
+            "86": 747492864.0,
+            "87": 747492864.0,
+            "88": 747492864.0,
+            "89": 747492864.0,
+            "90": 747492864.0,
+            "91": 747492864.0,
+            "92": 747492864.0,
+            "93": 747492864.0,
+            "94": 747492864.0,
+            "95": 747492864.0,
+            "96": 747492864.0,
+            "97": 747492864.0,
+            "98": 747492864.0,
+            "99": 747492864.0,
+            "100": 747492864.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -325,106 +325,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 1926291456.0,
-            "2": 2210100224.0,
-            "3": 2210100224.0,
-            "4": 2210100224.0,
-            "5": 2210100224.0,
-            "6": 2210100224.0,
-            "7": 2210100224.0,
-            "8": 2210100224.0,
-            "9": 2210100224.0,
-            "10": 2210100224.0,
-            "11": 2210100224.0,
-            "12": 2210100224.0,
-            "13": 2210100224.0,
-            "14": 2210100224.0,
-            "15": 2210100224.0,
-            "16": 2210100224.0,
-            "17": 2210100224.0,
-            "18": 2210100224.0,
-            "19": 2210100224.0,
-            "20": 2210100224.0,
-            "21": 2210100224.0,
-            "22": 2210100224.0,
-            "23": 2210100224.0,
-            "24": 2210100224.0,
-            "25": 2210100224.0,
-            "26": 2210100224.0,
-            "27": 2210100224.0,
-            "28": 2210100224.0,
-            "29": 2210100224.0,
-            "30": 2210100224.0,
-            "31": 2210100224.0,
-            "32": 2210100224.0,
-            "33": 2210100224.0,
-            "34": 2210100224.0,
-            "35": 2210100224.0,
-            "36": 2210100224.0,
-            "37": 2210100224.0,
-            "38": 2210100224.0,
-            "39": 2210100224.0,
-            "40": 2210100224.0,
-            "41": 2210100224.0,
-            "42": 2210100224.0,
-            "43": 2210100224.0,
-            "44": 2210100224.0,
-            "45": 2210100224.0,
-            "46": 2210100224.0,
-            "47": 2210100224.0,
-            "48": 2210100224.0,
-            "49": 2210100224.0,
-            "50": 2210100224.0,
-            "51": 2210100224.0,
-            "52": 2210100224.0,
-            "53": 2210100224.0,
-            "54": 2210100224.0,
-            "55": 2210100224.0,
-            "56": 2210100224.0,
-            "57": 2210100224.0,
-            "58": 2210100224.0,
-            "59": 2210100224.0,
-            "60": 2210100224.0,
-            "61": 2210100224.0,
-            "62": 2210100224.0,
-            "63": 2210100224.0,
-            "64": 2210100224.0,
-            "65": 2210100224.0,
-            "66": 2210100224.0,
-            "67": 2210100224.0,
-            "68": 2210100224.0,
-            "69": 2210100224.0,
-            "70": 2210100224.0,
-            "71": 2210100224.0,
-            "72": 2210100224.0,
-            "73": 2210100224.0,
-            "74": 2210100224.0,
-            "75": 2210100224.0,
-            "76": 2210100224.0,
-            "77": 2210100224.0,
-            "78": 2210100224.0,
-            "79": 2210100224.0,
-            "80": 2210100224.0,
-            "81": 2210100224.0,
-            "82": 2210100224.0,
-            "83": 2210100224.0,
-            "84": 2210100224.0,
-            "85": 2210100224.0,
-            "86": 2210100224.0,
-            "87": 2210100224.0,
-            "88": 2210100224.0,
-            "89": 2210100224.0,
-            "90": 2210100224.0,
-            "91": 2210100224.0,
-            "92": 2210100224.0,
-            "93": 2210100224.0,
-            "94": 2210100224.0,
-            "95": 2210100224.0,
-            "96": 2210100224.0,
-            "97": 2210100224.0,
-            "98": 2210100224.0,
-            "99": 2210100224.0,
-            "100": 2210100224.0
+            "1": 1927341056.0,
+            "2": 2212197376.0,
+            "3": 2212197376.0,
+            "4": 2212197376.0,
+            "5": 2212197376.0,
+            "6": 2212197376.0,
+            "7": 2212197376.0,
+            "8": 2212197376.0,
+            "9": 2212197376.0,
+            "10": 2212197376.0,
+            "11": 2212197376.0,
+            "12": 2212197376.0,
+            "13": 2212197376.0,
+            "14": 2212197376.0,
+            "15": 2212197376.0,
+            "16": 2212197376.0,
+            "17": 2212197376.0,
+            "18": 2212197376.0,
+            "19": 2212197376.0,
+            "20": 2212197376.0,
+            "21": 2212197376.0,
+            "22": 2212197376.0,
+            "23": 2212197376.0,
+            "24": 2212197376.0,
+            "25": 2212197376.0,
+            "26": 2212197376.0,
+            "27": 2212197376.0,
+            "28": 2212197376.0,
+            "29": 2212197376.0,
+            "30": 2212197376.0,
+            "31": 2212197376.0,
+            "32": 2212197376.0,
+            "33": 2212197376.0,
+            "34": 2212197376.0,
+            "35": 2212197376.0,
+            "36": 2212197376.0,
+            "37": 2212197376.0,
+            "38": 2212197376.0,
+            "39": 2212197376.0,
+            "40": 2212197376.0,
+            "41": 2212197376.0,
+            "42": 2212197376.0,
+            "43": 2212197376.0,
+            "44": 2212197376.0,
+            "45": 2212197376.0,
+            "46": 2212197376.0,
+            "47": 2212197376.0,
+            "48": 2212197376.0,
+            "49": 2212197376.0,
+            "50": 2212197376.0,
+            "51": 2212197376.0,
+            "52": 2212197376.0,
+            "53": 2212197376.0,
+            "54": 2212197376.0,
+            "55": 2212197376.0,
+            "56": 2212197376.0,
+            "57": 2212197376.0,
+            "58": 2212197376.0,
+            "59": 2212197376.0,
+            "60": 2212197376.0,
+            "61": 2212197376.0,
+            "62": 2212197376.0,
+            "63": 2212197376.0,
+            "64": 2212197376.0,
+            "65": 2212197376.0,
+            "66": 2212197376.0,
+            "67": 2212197376.0,
+            "68": 2212197376.0,
+            "69": 2212197376.0,
+            "70": 2212197376.0,
+            "71": 2212197376.0,
+            "72": 2212197376.0,
+            "73": 2212197376.0,
+            "74": 2212197376.0,
+            "75": 2212197376.0,
+            "76": 2212197376.0,
+            "77": 2212197376.0,
+            "78": 2212197376.0,
+            "79": 2212197376.0,
+            "80": 2212197376.0,
+            "81": 2212197376.0,
+            "82": 2212197376.0,
+            "83": 2212197376.0,
+            "84": 2212197376.0,
+            "85": 2212197376.0,
+            "86": 2212197376.0,
+            "87": 2212197376.0,
+            "88": 2212197376.0,
+            "89": 2212197376.0,
+            "90": 2212197376.0,
+            "91": 2212197376.0,
+            "92": 2212197376.0,
+            "93": 2212197376.0,
+            "94": 2212197376.0,
+            "95": 2212197376.0,
+            "96": 2212197376.0,
+            "97": 2212197376.0,
+            "98": 2212197376.0,
+            "99": 2212197376.0,
+            "100": 2212197376.0
         }
     },
     "iteration-time": {
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 14.49723,
-            "2": 0.13917,
-            "3": 0.12323,
-            "4": 0.12243,
-            "5": 0.12247,
-            "6": 0.12126,
-            "7": 0.12098,
-            "8": 0.1227,
-            "9": 0.12232,
-            "10": 0.12216,
-            "11": 0.12203,
-            "12": 0.12472,
-            "13": 0.11919,
-            "14": 0.12363,
-            "15": 0.11934,
-            "16": 0.12078,
-            "17": 0.1214,
-            "18": 0.12382,
-            "19": 0.11938,
-            "20": 0.11818,
-            "21": 0.1195,
-            "22": 0.1193,
-            "23": 0.11729,
-            "24": 0.11671,
-            "25": 0.11812,
-            "26": 0.11788,
-            "27": 0.11835,
-            "28": 0.11687,
-            "29": 0.11683,
-            "30": 0.1185,
-            "31": 0.11738,
-            "32": 0.11696,
-            "33": 0.11541,
-            "34": 0.11482,
-            "35": 0.11307,
-            "36": 0.11445,
-            "37": 0.11503,
-            "38": 0.11448,
-            "39": 0.11562,
-            "40": 0.11468,
-            "41": 0.11341,
-            "42": 0.11368,
-            "43": 0.11604,
-            "44": 0.11649,
-            "45": 0.11581,
-            "46": 0.11637,
-            "47": 0.11699,
-            "48": 0.11661,
-            "49": 0.11522,
-            "50": 0.11451,
-            "51": 0.12299,
-            "52": 0.11449,
-            "53": 0.11137,
-            "54": 0.11274,
-            "55": 0.1121,
-            "56": 0.11212,
-            "57": 0.11573,
-            "58": 0.11206,
-            "59": 0.11388,
-            "60": 0.11369,
-            "61": 0.11208,
-            "62": 0.11287,
-            "63": 0.11238,
-            "64": 0.11193,
-            "65": 0.11205,
-            "66": 0.11482,
-            "67": 0.1131,
-            "68": 0.11433,
-            "69": 0.11257,
-            "70": 0.1116,
-            "71": 0.11365,
-            "72": 0.11214,
-            "73": 0.11376,
-            "74": 0.11389,
-            "75": 0.11397,
-            "76": 0.11359,
-            "77": 0.11346,
-            "78": 0.11235,
-            "79": 0.11282,
-            "80": 0.11301,
-            "81": 0.11347,
-            "82": 0.11356,
-            "83": 0.11321,
-            "84": 0.11412,
-            "85": 0.11256,
-            "86": 0.11555,
-            "87": 0.11224,
-            "88": 0.11344,
-            "89": 0.11351,
-            "90": 0.11218,
-            "91": 0.11235,
-            "92": 0.11417,
-            "93": 0.11691,
-            "94": 0.11326,
-            "95": 0.11519,
-            "96": 0.11321,
-            "97": 0.11272,
-            "98": 0.11268,
-            "99": 0.11187,
-            "100": 0.11371
+            "1": 9.78643,
+            "2": 0.13398,
+            "3": 0.11557,
+            "4": 0.09095,
+            "5": 0.09137,
+            "6": 0.09276,
+            "7": 0.09034,
+            "8": 0.09082,
+            "9": 0.09002,
+            "10": 0.09121,
+            "11": 0.08989,
+            "12": 0.0895,
+            "13": 0.09015,
+            "14": 0.09012,
+            "15": 0.0903,
+            "16": 0.09019,
+            "17": 0.0907,
+            "18": 0.09055,
+            "19": 0.08988,
+            "20": 0.08984,
+            "21": 0.08951,
+            "22": 0.0913,
+            "23": 0.08972,
+            "24": 0.08995,
+            "25": 0.09008,
+            "26": 0.08931,
+            "27": 0.09055,
+            "28": 0.08926,
+            "29": 0.09028,
+            "30": 0.09142,
+            "31": 0.09085,
+            "32": 0.09027,
+            "33": 0.09061,
+            "34": 0.08998,
+            "35": 0.09113,
+            "36": 0.09039,
+            "37": 0.08973,
+            "38": 0.09065,
+            "39": 0.08993,
+            "40": 0.09112,
+            "41": 0.10695,
+            "42": 0.11371,
+            "43": 0.09964,
+            "44": 0.09076,
+            "45": 0.0899,
+            "46": 0.09204,
+            "47": 0.0904,
+            "48": 0.08998,
+            "49": 0.09097,
+            "50": 0.08971,
+            "51": 0.10825,
+            "52": 0.097,
+            "53": 0.09456,
+            "54": 0.09109,
+            "55": 0.09071,
+            "56": 0.09099,
+            "57": 0.09129,
+            "58": 0.09159,
+            "59": 0.09138,
+            "60": 0.09089,
+            "61": 0.09092,
+            "62": 0.09153,
+            "63": 0.09208,
+            "64": 0.09107,
+            "65": 0.0918,
+            "66": 0.09116,
+            "67": 0.09075,
+            "68": 0.09166,
+            "69": 0.0948,
+            "70": 0.09166,
+            "71": 0.09195,
+            "72": 0.09271,
+            "73": 0.09226,
+            "74": 0.09271,
+            "75": 0.09216,
+            "76": 0.09129,
+            "77": 0.09221,
+            "78": 0.09252,
+            "79": 0.09161,
+            "80": 0.09144,
+            "81": 0.09112,
+            "82": 0.09152,
+            "83": 0.09106,
+            "84": 0.09137,
+            "85": 0.09127,
+            "86": 0.09136,
+            "87": 0.09077,
+            "88": 0.09362,
+            "89": 0.09244,
+            "90": 0.09162,
+            "91": 0.09114,
+            "92": 0.09065,
+            "93": 0.0913,
+            "94": 0.09071,
+            "95": 0.09096,
+            "96": 0.09066,
+            "97": 0.09585,
+            "98": 0.09148,
+            "99": 0.09232,
+            "100": 0.09229
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_dev_dgx_h100_2nd.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_dev_dgx_h100_2nd.json
new file mode 100644
index 00000000000..2c197fd4e6b
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_dev_dgx_h100_2nd.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 9.8866,
+            "52": 9.78429,
+            "53": 10.10842,
+            "54": 9.97368,
+            "55": 9.89803,
+            "56": 9.65427,
+            "57": 9.52013,
+            "58": 9.87297,
+            "59": 9.6132,
+            "60": 9.54967,
+            "61": 9.70681,
+            "62": 9.98533,
+            "63": 9.41357,
+            "64": 9.80966,
+            "65": 8.97052,
+            "66": 9.72773,
+            "67": 9.39183,
+            "68": 9.8084,
+            "69": 9.82052,
+            "70": 9.76655,
+            "71": 9.63414,
+            "72": 9.60485,
+            "73": 9.52299,
+            "74": 8.9718,
+            "75": 9.42321,
+            "76": 9.10113,
+            "77": 10.0716,
+            "78": 9.74266,
+            "79": 9.40343,
+            "80": 9.41333,
+            "81": 9.49931,
+            "82": 9.70236,
+            "83": 9.33436,
+            "84": 9.43774,
+            "85": 9.63924,
+            "86": 9.07931,
+            "87": 9.60447,
+            "88": 9.7824,
+            "89": 9.62386,
+            "90": 9.84241,
+            "91": 9.35506,
+            "92": 9.38398,
+            "93": 9.09747,
+            "94": 8.8471,
+            "95": 9.5314,
+            "96": 9.54263,
+            "97": 9.32886,
+            "98": 9.6926,
+            "99": 8.89976,
+            "100": 9.43124
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 22791108.0,
+            "52": 22748190.0,
+            "53": 22924900.0,
+            "54": 22840164.0,
+            "55": 22518344.0,
+            "56": 22877680.0,
+            "57": 23113944.0,
+            "58": 22846268.0,
+            "59": 22716084.0,
+            "60": 22742984.0,
+            "61": 22724584.0,
+            "62": 22672944.0,
+            "63": 22846388.0,
+            "64": 22823650.0,
+            "65": 23061058.0,
+            "66": 22729266.0,
+            "67": 22908888.0,
+            "68": 22610020.0,
+            "69": 22583826.0,
+            "70": 22829374.0,
+            "71": 22748240.0,
+            "72": 22654480.0,
+            "73": 22741180.0,
+            "74": 23047914.0,
+            "75": 23054396.0,
+            "76": 22900788.0,
+            "77": 22271588.0,
+            "78": 22789024.0,
+            "79": 22743632.0,
+            "80": 22706696.0,
+            "81": 22891372.0,
+            "82": 22777860.0,
+            "83": 22840532.0,
+            "84": 23010386.0,
+            "85": 22711212.0,
+            "86": 23103006.0,
+            "87": 22734564.0,
+            "88": 22637848.0,
+            "89": 22497850.0,
+            "90": 22972712.0,
+            "91": 22767188.0,
+            "92": 22808834.0,
+            "93": 22659304.0,
+            "94": 22911552.0,
+            "95": 23047794.0,
+            "96": 22829386.0,
+            "97": 22608168.0,
+            "98": 22762756.0,
+            "99": 22905900.0,
+            "100": 23015488.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 746444288.0,
+            "52": 746444288.0,
+            "53": 746444288.0,
+            "54": 746444288.0,
+            "55": 746444288.0,
+            "56": 746444288.0,
+            "57": 746444288.0,
+            "58": 746444288.0,
+            "59": 746444288.0,
+            "60": 746444288.0,
+            "61": 746444288.0,
+            "62": 746444288.0,
+            "63": 746444288.0,
+            "64": 746444288.0,
+            "65": 746444288.0,
+            "66": 746444288.0,
+            "67": 746444288.0,
+            "68": 746444288.0,
+            "69": 746444288.0,
+            "70": 746444288.0,
+            "71": 746444288.0,
+            "72": 746444288.0,
+            "73": 746444288.0,
+            "74": 746444288.0,
+            "75": 746444288.0,
+            "76": 746444288.0,
+            "77": 746444288.0,
+            "78": 746444288.0,
+            "79": 746444288.0,
+            "80": 746444288.0,
+            "81": 746444288.0,
+            "82": 746444288.0,
+            "83": 746444288.0,
+            "84": 746444288.0,
+            "85": 746444288.0,
+            "86": 746444288.0,
+            "87": 746444288.0,
+            "88": 746444288.0,
+            "89": 746444288.0,
+            "90": 746444288.0,
+            "91": 746444288.0,
+            "92": 746444288.0,
+            "93": 746444288.0,
+            "94": 746444288.0,
+            "95": 746444288.0,
+            "96": 746444288.0,
+            "97": 746444288.0,
+            "98": 746444288.0,
+            "99": 746444288.0,
+            "100": 746444288.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 2211148800.0,
+            "52": 2211149824.0,
+            "53": 2211149824.0,
+            "54": 2211149824.0,
+            "55": 2211149824.0,
+            "56": 2211149824.0,
+            "57": 2211149824.0,
+            "58": 2211149824.0,
+            "59": 2211149824.0,
+            "60": 2211149824.0,
+            "61": 2211149824.0,
+            "62": 2211149824.0,
+            "63": 2211149824.0,
+            "64": 2211149824.0,
+            "65": 2211149824.0,
+            "66": 2211149824.0,
+            "67": 2211149824.0,
+            "68": 2211149824.0,
+            "69": 2211149824.0,
+            "70": 2211149824.0,
+            "71": 2211149824.0,
+            "72": 2211149824.0,
+            "73": 2211149824.0,
+            "74": 2211149824.0,
+            "75": 2211149824.0,
+            "76": 2211149824.0,
+            "77": 2211149824.0,
+            "78": 2211149824.0,
+            "79": 2211149824.0,
+            "80": 2211149824.0,
+            "81": 2211149824.0,
+            "82": 2211149824.0,
+            "83": 2211149824.0,
+            "84": 2211149824.0,
+            "85": 2211149824.0,
+            "86": 2211149824.0,
+            "87": 2211149824.0,
+            "88": 2211149824.0,
+            "89": 2211149824.0,
+            "90": 2211149824.0,
+            "91": 2211149824.0,
+            "92": 2211149824.0,
+            "93": 2211149824.0,
+            "94": 2211149824.0,
+            "95": 2211149824.0,
+            "96": 2211149824.0,
+            "97": 2211149824.0,
+            "98": 2211149824.0,
+            "99": 2211149824.0,
+            "100": 2211149824.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 8.06828,
+            "52": 0.13754,
+            "53": 0.09299,
+            "54": 0.0937,
+            "55": 0.09396,
+            "56": 0.09244,
+            "57": 0.09247,
+            "58": 0.09209,
+            "59": 0.09263,
+            "60": 0.09275,
+            "61": 0.09238,
+            "62": 0.09116,
+            "63": 0.0965,
+            "64": 0.09261,
+            "65": 0.09256,
+            "66": 0.09274,
+            "67": 0.09252,
+            "68": 0.09299,
+            "69": 0.09249,
+            "70": 0.09223,
+            "71": 0.09259,
+            "72": 0.09409,
+            "73": 0.09265,
+            "74": 0.09487,
+            "75": 0.0923,
+            "76": 0.09244,
+            "77": 0.09219,
+            "78": 0.0922,
+            "79": 0.09407,
+            "80": 0.09255,
+            "81": 0.09438,
+            "82": 0.09241,
+            "83": 0.09253,
+            "84": 0.09203,
+            "85": 0.09473,
+            "86": 0.09291,
+            "87": 0.0919,
+            "88": 0.0924,
+            "89": 0.09178,
+            "90": 0.09274,
+            "91": 0.09205,
+            "92": 0.09276,
+            "93": 0.09224,
+            "94": 0.09252,
+            "95": 0.09076,
+            "96": 0.09167,
+            "97": 0.09167,
+            "98": 0.0936,
+            "99": 0.09222,
+            "100": 0.09183
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_lts_dgx_a100.json
index 307cec2659c..80b22797395 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_lts_dgx_a100.json
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 10.1537,
-            "2": 0.18498,
-            "3": 0.16024,
-            "4": 0.16059,
-            "5": 0.16002,
-            "6": 0.16103,
-            "7": 0.1591,
-            "8": 0.15912,
-            "9": 0.15909,
-            "10": 0.1574,
-            "11": 0.15721,
-            "12": 0.15764,
-            "13": 0.16009,
-            "14": 0.16035,
-            "15": 0.15973,
-            "16": 0.15641,
-            "17": 0.15673,
-            "18": 0.1565,
-            "19": 0.15684,
-            "20": 0.15713,
-            "21": 0.15762,
-            "22": 0.15859,
-            "23": 0.15877,
-            "24": 0.15973,
-            "25": 0.15946,
-            "26": 0.15909,
-            "27": 0.15855,
-            "28": 0.15876,
-            "29": 0.15921,
-            "30": 0.16148,
-            "31": 0.15991,
-            "32": 0.1576,
-            "33": 0.15829,
-            "34": 0.15886,
-            "35": 0.15948,
-            "36": 0.15819,
-            "37": 0.15886,
-            "38": 0.15896,
-            "39": 0.16029,
-            "40": 0.15802,
-            "41": 0.16038,
-            "42": 0.15965,
-            "43": 0.15985,
-            "44": 0.15882,
-            "45": 0.16056,
-            "46": 0.1592,
-            "47": 0.20747,
-            "48": 0.16124,
-            "49": 0.16012,
-            "50": 0.15759,
-            "51": 0.16615,
-            "52": 0.15685,
-            "53": 0.15965,
-            "54": 0.15787,
-            "55": 0.15762,
-            "56": 0.15748,
-            "57": 0.15807,
-            "58": 0.15831,
-            "59": 0.15671,
-            "60": 0.15765,
-            "61": 0.15997,
-            "62": 0.15756,
-            "63": 0.15822,
-            "64": 0.15898,
-            "65": 0.15778,
-            "66": 0.15853,
-            "67": 0.15855,
-            "68": 0.15784,
-            "69": 0.15777,
-            "70": 0.15791,
-            "71": 0.15907,
-            "72": 0.15986,
-            "73": 0.15727,
-            "74": 0.15842,
-            "75": 0.15738,
-            "76": 0.15786,
-            "77": 0.15749,
-            "78": 0.15761,
-            "79": 0.15838,
-            "80": 0.15955,
-            "81": 0.15796,
-            "82": 0.15816,
-            "83": 0.15953,
-            "84": 0.15849,
-            "85": 0.15905,
-            "86": 0.15852,
-            "87": 0.15827,
-            "88": 0.15773,
-            "89": 0.15778,
-            "90": 0.15679,
-            "91": 0.1583,
-            "92": 0.15749,
-            "93": 0.15843,
-            "94": 0.15878,
-            "95": 0.15805,
-            "96": 0.1588,
-            "97": 0.15983,
-            "98": 0.16098,
-            "99": 0.16131,
-            "100": 0.15935
+            "1": 5.03932,
+            "2": 0.18621,
+            "3": 0.17196,
+            "4": 0.15545,
+            "5": 0.1504,
+            "6": 0.15031,
+            "7": 0.14857,
+            "8": 0.14917,
+            "9": 0.1495,
+            "10": 0.14924,
+            "11": 0.14939,
+            "12": 0.14861,
+            "13": 0.14915,
+            "14": 0.14919,
+            "15": 0.14909,
+            "16": 0.14904,
+            "17": 0.14933,
+            "18": 0.14874,
+            "19": 0.14902,
+            "20": 0.14813,
+            "21": 0.14885,
+            "22": 0.14872,
+            "23": 0.14993,
+            "24": 0.14895,
+            "25": 0.14768,
+            "26": 0.14781,
+            "27": 0.14754,
+            "28": 0.14775,
+            "29": 0.15216,
+            "30": 0.15461,
+            "31": 0.1541,
+            "32": 0.14739,
+            "33": 0.14626,
+            "34": 0.14619,
+            "35": 0.14604,
+            "36": 0.14567,
+            "37": 0.14566,
+            "38": 0.14678,
+            "39": 0.14625,
+            "40": 0.14515,
+            "41": 0.1459,
+            "42": 0.14526,
+            "43": 0.14647,
+            "44": 0.14562,
+            "45": 0.14545,
+            "46": 0.14621,
+            "47": 0.14567,
+            "48": 0.14603,
+            "49": 0.14558,
+            "50": 0.14505,
+            "51": 0.16204,
+            "52": 0.15073,
+            "53": 0.15152,
+            "54": 0.15093,
+            "55": 0.15055,
+            "56": 0.15091,
+            "57": 0.15302,
+            "58": 0.15142,
+            "59": 0.15079,
+            "60": 0.15185,
+            "61": 0.14979,
+            "62": 0.15038,
+            "63": 0.15098,
+            "64": 0.1503,
+            "65": 0.15057,
+            "66": 0.15088,
+            "67": 0.15024,
+            "68": 0.15134,
+            "69": 0.15072,
+            "70": 0.15092,
+            "71": 0.15108,
+            "72": 0.15129,
+            "73": 0.15025,
+            "74": 0.15185,
+            "75": 0.15148,
+            "76": 0.15102,
+            "77": 0.15066,
+            "78": 0.15069,
+            "79": 0.1514,
+            "80": 0.15055,
+            "81": 0.15068,
+            "82": 0.15079,
+            "83": 0.15141,
+            "84": 0.15081,
+            "85": 0.15116,
+            "86": 0.15171,
+            "87": 0.15012,
+            "88": 0.15018,
+            "89": 0.1509,
+            "90": 0.15033,
+            "91": 0.15134,
+            "92": 0.15061,
+            "93": 0.1505,
+            "94": 0.15109,
+            "95": 0.1506,
+            "96": 0.15188,
+            "97": 0.15182,
+            "98": 0.15154,
+            "99": 0.15201,
+            "100": 0.15117
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_lts_dgx_a100_2nd.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_lts_dgx_a100_2nd.json
new file mode 100644
index 00000000000..6b3ff627828
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_lts_dgx_a100_2nd.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 9.86793,
+            "52": 9.76274,
+            "53": 10.10895,
+            "54": 9.95538,
+            "55": 9.8756,
+            "56": 9.64751,
+            "57": 9.48989,
+            "58": 9.85502,
+            "59": 9.59457,
+            "60": 9.52968,
+            "61": 9.69589,
+            "62": 10.01676,
+            "63": 9.38778,
+            "64": 9.80211,
+            "65": 8.95119,
+            "66": 9.72857,
+            "67": 9.37577,
+            "68": 9.80463,
+            "69": 9.81,
+            "70": 9.7662,
+            "71": 9.63135,
+            "72": 9.5784,
+            "73": 9.52148,
+            "74": 8.94976,
+            "75": 9.43087,
+            "76": 9.08489,
+            "77": 10.089,
+            "78": 9.72754,
+            "79": 9.37612,
+            "80": 9.40849,
+            "81": 9.49766,
+            "82": 9.71298,
+            "83": 9.33332,
+            "84": 9.43928,
+            "85": 9.63373,
+            "86": 9.07038,
+            "87": 9.61245,
+            "88": 9.78304,
+            "89": 9.60878,
+            "90": 9.85164,
+            "91": 9.34542,
+            "92": 9.38281,
+            "93": 9.07319,
+            "94": 8.81684,
+            "95": 9.51809,
+            "96": 9.54033,
+            "97": 9.34061,
+            "98": 9.70134,
+            "99": 8.88786,
+            "100": 9.43285
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 22791326.0,
+            "52": 22749392.0,
+            "53": 22925970.0,
+            "54": 22839434.0,
+            "55": 22518416.0,
+            "56": 22877660.0,
+            "57": 23113304.0,
+            "58": 22845008.0,
+            "59": 22715512.0,
+            "60": 22743058.0,
+            "61": 22723950.0,
+            "62": 22673248.0,
+            "63": 22846074.0,
+            "64": 22823228.0,
+            "65": 23060212.0,
+            "66": 22729902.0,
+            "67": 22907278.0,
+            "68": 22610092.0,
+            "69": 22584360.0,
+            "70": 22829348.0,
+            "71": 22749420.0,
+            "72": 22655446.0,
+            "73": 22740974.0,
+            "74": 23048296.0,
+            "75": 23053922.0,
+            "76": 22901008.0,
+            "77": 22272806.0,
+            "78": 22789370.0,
+            "79": 22743288.0,
+            "80": 22706236.0,
+            "81": 22890976.0,
+            "82": 22777092.0,
+            "83": 22839240.0,
+            "84": 23010352.0,
+            "85": 22712004.0,
+            "86": 23103740.0,
+            "87": 22734788.0,
+            "88": 22637620.0,
+            "89": 22499200.0,
+            "90": 22972420.0,
+            "91": 22766428.0,
+            "92": 22808890.0,
+            "93": 22659888.0,
+            "94": 22910970.0,
+            "95": 23048514.0,
+            "96": 22829470.0,
+            "97": 22608826.0,
+            "98": 22763528.0,
+            "99": 22905754.0,
+            "100": 23016268.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 717083136.0,
+            "52": 717083136.0,
+            "53": 717083136.0,
+            "54": 717083136.0,
+            "55": 717083136.0,
+            "56": 717083136.0,
+            "57": 717083136.0,
+            "58": 717083136.0,
+            "59": 717083136.0,
+            "60": 717083136.0,
+            "61": 717083136.0,
+            "62": 717083136.0,
+            "63": 717083136.0,
+            "64": 717083136.0,
+            "65": 717083136.0,
+            "66": 717083136.0,
+            "67": 717083136.0,
+            "68": 717083136.0,
+            "69": 717083136.0,
+            "70": 717083136.0,
+            "71": 717083136.0,
+            "72": 717083136.0,
+            "73": 717083136.0,
+            "74": 717083136.0,
+            "75": 717083136.0,
+            "76": 717083136.0,
+            "77": 717083136.0,
+            "78": 717083136.0,
+            "79": 717083136.0,
+            "80": 717083136.0,
+            "81": 717083136.0,
+            "82": 717083136.0,
+            "83": 717083136.0,
+            "84": 717083136.0,
+            "85": 717083136.0,
+            "86": 717083136.0,
+            "87": 717083136.0,
+            "88": 717083136.0,
+            "89": 717083136.0,
+            "90": 717083136.0,
+            "91": 717083136.0,
+            "92": 717083136.0,
+            "93": 717083136.0,
+            "94": 717083136.0,
+            "95": 717083136.0,
+            "96": 717083136.0,
+            "97": 717083136.0,
+            "98": 717083136.0,
+            "99": 717083136.0,
+            "100": 717083136.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 2194370560.0,
+            "52": 2194371584.0,
+            "53": 2194371584.0,
+            "54": 2194371584.0,
+            "55": 2194371584.0,
+            "56": 2194371584.0,
+            "57": 2194371584.0,
+            "58": 2194371584.0,
+            "59": 2194371584.0,
+            "60": 2194371584.0,
+            "61": 2194371584.0,
+            "62": 2194371584.0,
+            "63": 2194371584.0,
+            "64": 2194371584.0,
+            "65": 2194371584.0,
+            "66": 2194371584.0,
+            "67": 2194371584.0,
+            "68": 2194371584.0,
+            "69": 2194371584.0,
+            "70": 2194371584.0,
+            "71": 2194371584.0,
+            "72": 2194371584.0,
+            "73": 2194371584.0,
+            "74": 2194371584.0,
+            "75": 2194371584.0,
+            "76": 2194371584.0,
+            "77": 2194371584.0,
+            "78": 2194371584.0,
+            "79": 2194371584.0,
+            "80": 2194371584.0,
+            "81": 2194371584.0,
+            "82": 2194371584.0,
+            "83": 2194371584.0,
+            "84": 2194371584.0,
+            "85": 2194371584.0,
+            "86": 2194371584.0,
+            "87": 2194371584.0,
+            "88": 2194371584.0,
+            "89": 2194371584.0,
+            "90": 2194371584.0,
+            "91": 2194371584.0,
+            "92": 2194371584.0,
+            "93": 2194371584.0,
+            "94": 2194371584.0,
+            "95": 2194371584.0,
+            "96": 2194371584.0,
+            "97": 2194371584.0,
+            "98": 2194371584.0,
+            "99": 2194371584.0,
+            "100": 2194371584.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 3.88691,
+            "52": 0.18475,
+            "53": 0.15645,
+            "54": 0.15149,
+            "55": 0.15178,
+            "56": 0.15436,
+            "57": 0.15089,
+            "58": 0.15055,
+            "59": 0.15075,
+            "60": 0.1517,
+            "61": 0.15028,
+            "62": 0.14804,
+            "63": 0.14921,
+            "64": 0.15,
+            "65": 0.14973,
+            "66": 0.15168,
+            "67": 0.15493,
+            "68": 0.15271,
+            "69": 0.15341,
+            "70": 0.15423,
+            "71": 0.15432,
+            "72": 0.15491,
+            "73": 0.1552,
+            "74": 0.15454,
+            "75": 0.15427,
+            "76": 0.15393,
+            "77": 0.15383,
+            "78": 0.15459,
+            "79": 0.15484,
+            "80": 0.1534,
+            "81": 0.15504,
+            "82": 0.15286,
+            "83": 0.15444,
+            "84": 0.15427,
+            "85": 0.15522,
+            "86": 0.15438,
+            "87": 0.15378,
+            "88": 0.15395,
+            "89": 0.15338,
+            "90": 0.1542,
+            "91": 0.15415,
+            "92": 0.15382,
+            "93": 0.15529,
+            "94": 0.15411,
+            "95": 0.15301,
+            "96": 0.15392,
+            "97": 0.15398,
+            "98": 0.15485,
+            "99": 0.15384,
+            "100": 0.15373
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_dev_dgx_gb200.json
new file mode 100644
index 00000000000..06040458828
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_dev_dgx_gb200.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.82558,
+            "2": 10.83322,
+            "3": 10.82737,
+            "4": 10.79588,
+            "5": 10.85708,
+            "6": 10.86392,
+            "7": 10.8269,
+            "8": 10.82589,
+            "9": 10.83705,
+            "10": 10.79716,
+            "11": 10.87851,
+            "12": 10.85794,
+            "13": 10.8537,
+            "14": 10.87547,
+            "15": 10.79179,
+            "16": 10.80303,
+            "17": 10.7745,
+            "18": 10.804,
+            "19": 10.79363,
+            "20": 10.69591,
+            "21": 10.68551,
+            "22": 10.53149,
+            "23": 10.70658,
+            "24": 10.57317,
+            "25": 10.51546,
+            "26": 10.59072,
+            "27": 10.60736,
+            "28": 10.57024,
+            "29": 10.58904,
+            "30": 10.34679,
+            "31": 10.07734,
+            "32": 10.46319,
+            "33": 10.45704,
+            "34": 10.19923,
+            "35": 10.25593,
+            "36": 10.21246,
+            "37": 10.34688,
+            "38": 10.18009,
+            "39": 10.408,
+            "40": 10.07603,
+            "41": 10.12932,
+            "42": 10.21134,
+            "43": 9.81692,
+            "44": 9.94028,
+            "45": 9.81699,
+            "46": 9.80606,
+            "47": 10.12475,
+            "48": 9.8405,
+            "49": 9.50971,
+            "50": 9.88934
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1691.0,
+            "2": 1553.0,
+            "3": 1673.0,
+            "4": 1760.0,
+            "5": 1852.0,
+            "6": 1861.0,
+            "7": 1907.0,
+            "8": 1712.0,
+            "9": 1919.0,
+            "10": 1427.0,
+            "11": 1965.0,
+            "12": 1742.0,
+            "13": 1946.0,
+            "14": 1903.0,
+            "15": 1851.0,
+            "16": 1804.0,
+            "17": 1778.0,
+            "18": 1702.0,
+            "19": 1703.0,
+            "20": 1706.0,
+            "21": 1916.0,
+            "22": 1698.0,
+            "23": 2009.0,
+            "24": 1606.0,
+            "25": 1625.0,
+            "26": 1722.0,
+            "27": 1784.0,
+            "28": 1981.0,
+            "29": 1919.0,
+            "30": 1948.0,
+            "31": 1503.0,
+            "32": 1904.0,
+            "33": 2058.0,
+            "34": 1737.0,
+            "35": 1916.0,
+            "36": 1980.0,
+            "37": 2263.0,
+            "38": 2121.0,
+            "39": 2277.0,
+            "40": 2021.0,
+            "41": 2202.0,
+            "42": 2340.0,
+            "43": 1973.0,
+            "44": 2006.0,
+            "45": 2128.0,
+            "46": 2132.0,
+            "47": 2438.0,
+            "48": 2286.0,
+            "49": 2215.0,
+            "50": 2337.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 759682560.0,
+            "2": 759682560.0,
+            "3": 759682560.0,
+            "4": 759682560.0,
+            "5": 759682560.0,
+            "6": 759682560.0,
+            "7": 759682560.0,
+            "8": 759682560.0,
+            "9": 759682560.0,
+            "10": 759682560.0,
+            "11": 759682560.0,
+            "12": 759682560.0,
+            "13": 759682560.0,
+            "14": 759682560.0,
+            "15": 759682560.0,
+            "16": 759682560.0,
+            "17": 759682560.0,
+            "18": 759682560.0,
+            "19": 759682560.0,
+            "20": 759682560.0,
+            "21": 759682560.0,
+            "22": 759682560.0,
+            "23": 759682560.0,
+            "24": 759682560.0,
+            "25": 759682560.0,
+            "26": 759682560.0,
+            "27": 759682560.0,
+            "28": 759682560.0,
+            "29": 759682560.0,
+            "30": 759682560.0,
+            "31": 759682560.0,
+            "32": 759682560.0,
+            "33": 759682560.0,
+            "34": 759682560.0,
+            "35": 759682560.0,
+            "36": 759682560.0,
+            "37": 759682560.0,
+            "38": 759682560.0,
+            "39": 759682560.0,
+            "40": 759682560.0,
+            "41": 759682560.0,
+            "42": 759682560.0,
+            "43": 759682560.0,
+            "44": 759682560.0,
+            "45": 759682560.0,
+            "46": 759682560.0,
+            "47": 759682560.0,
+            "48": 759682560.0,
+            "49": 759682560.0,
+            "50": 759682560.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 3866814976.0,
+            "2": 4148526592.0,
+            "3": 4148526592.0,
+            "4": 4148526592.0,
+            "5": 4148526592.0,
+            "6": 4148526592.0,
+            "7": 4148526592.0,
+            "8": 4148526592.0,
+            "9": 4148526592.0,
+            "10": 4148526592.0,
+            "11": 4148526592.0,
+            "12": 4148526592.0,
+            "13": 4148526592.0,
+            "14": 4148526592.0,
+            "15": 4148526592.0,
+            "16": 4148526592.0,
+            "17": 4148526592.0,
+            "18": 4148526592.0,
+            "19": 4148526592.0,
+            "20": 4148526592.0,
+            "21": 4148526592.0,
+            "22": 4148526592.0,
+            "23": 4148526592.0,
+            "24": 4148526592.0,
+            "25": 4148526592.0,
+            "26": 4148526592.0,
+            "27": 4148526592.0,
+            "28": 4148526592.0,
+            "29": 4148526592.0,
+            "30": 4148526592.0,
+            "31": 4148526592.0,
+            "32": 4148526592.0,
+            "33": 4148526592.0,
+            "34": 4148526592.0,
+            "35": 4148526592.0,
+            "36": 4148526592.0,
+            "37": 4148526592.0,
+            "38": 4148526592.0,
+            "39": 4148526592.0,
+            "40": 4148526592.0,
+            "41": 4148526592.0,
+            "42": 4148526592.0,
+            "43": 4148526592.0,
+            "44": 4148526592.0,
+            "45": 4148526592.0,
+            "46": 4148526592.0,
+            "47": 4148526592.0,
+            "48": 4148526592.0,
+            "49": 4148526592.0,
+            "50": 4148526592.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 9.05034,
+            "2": 0.14876,
+            "3": 0.14285,
+            "4": 0.13033,
+            "5": 0.24651,
+            "6": 0.19893,
+            "7": 0.15924,
+            "8": 0.11963,
+            "9": 0.12767,
+            "10": 0.24283,
+            "11": 0.12856,
+            "12": 0.13101,
+            "13": 0.5056,
+            "14": 0.1222,
+            "15": 0.23869,
+            "16": 0.13294,
+            "17": 0.13193,
+            "18": 0.14163,
+            "19": 0.13647,
+            "20": 0.2257,
+            "21": 0.13437,
+            "22": 0.24393,
+            "23": 0.13446,
+            "24": 0.23274,
+            "25": 0.14725,
+            "26": 0.13804,
+            "27": 0.14255,
+            "28": 0.14086,
+            "29": 0.23437,
+            "30": 0.25225,
+            "31": 0.13433,
+            "32": 0.25099,
+            "33": 0.14422,
+            "34": 0.20638,
+            "35": 0.13575,
+            "36": 0.13592,
+            "37": 0.14521,
+            "38": 0.9985,
+            "39": 0.14828,
+            "40": 0.13964,
+            "41": 0.13609,
+            "42": 0.33948,
+            "43": 0.13414,
+            "44": 0.27111,
+            "45": 0.14576,
+            "46": 0.13882,
+            "47": 0.13432,
+            "48": 0.14571,
+            "49": 0.14535,
+            "50": 0.4444
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_dev_dgx_h100.json
index b5d55ac433c..1c87eb73023 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_dev_dgx_h100.json
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 759681536.0,
-            "2": 759681536.0,
-            "3": 759681536.0,
-            "4": 759681536.0,
-            "5": 759681536.0,
-            "6": 759681536.0,
-            "7": 759681536.0,
-            "8": 759681536.0,
-            "9": 759681536.0,
-            "10": 759681536.0,
-            "11": 759681536.0,
-            "12": 759681536.0,
-            "13": 759681536.0,
-            "14": 759681536.0,
-            "15": 759681536.0,
-            "16": 759681536.0,
-            "17": 759681536.0,
-            "18": 759681536.0,
-            "19": 759681536.0,
-            "20": 759681536.0,
-            "21": 759681536.0,
-            "22": 759681536.0,
-            "23": 759681536.0,
-            "24": 759681536.0,
-            "25": 759681536.0,
-            "26": 759681536.0,
-            "27": 759681536.0,
-            "28": 759681536.0,
-            "29": 759681536.0,
-            "30": 759681536.0,
-            "31": 759681536.0,
-            "32": 759681536.0,
-            "33": 759681536.0,
-            "34": 759681536.0,
-            "35": 759681536.0,
-            "36": 759681536.0,
-            "37": 759681536.0,
-            "38": 759681536.0,
-            "39": 759681536.0,
-            "40": 759681536.0,
-            "41": 759681536.0,
-            "42": 759681536.0,
-            "43": 759681536.0,
-            "44": 759681536.0,
-            "45": 759681536.0,
-            "46": 759681536.0,
-            "47": 759681536.0,
-            "48": 759681536.0,
-            "49": 759681536.0,
-            "50": 759681536.0
+            "1": 759682560.0,
+            "2": 759682560.0,
+            "3": 759682560.0,
+            "4": 759682560.0,
+            "5": 759682560.0,
+            "6": 759682560.0,
+            "7": 759682560.0,
+            "8": 759682560.0,
+            "9": 759682560.0,
+            "10": 759682560.0,
+            "11": 759682560.0,
+            "12": 759682560.0,
+            "13": 759682560.0,
+            "14": 759682560.0,
+            "15": 759682560.0,
+            "16": 759682560.0,
+            "17": 759682560.0,
+            "18": 759682560.0,
+            "19": 759682560.0,
+            "20": 759682560.0,
+            "21": 759682560.0,
+            "22": 759682560.0,
+            "23": 759682560.0,
+            "24": 759682560.0,
+            "25": 759682560.0,
+            "26": 759682560.0,
+            "27": 759682560.0,
+            "28": 759682560.0,
+            "29": 759682560.0,
+            "30": 759682560.0,
+            "31": 759682560.0,
+            "32": 759682560.0,
+            "33": 759682560.0,
+            "34": 759682560.0,
+            "35": 759682560.0,
+            "36": 759682560.0,
+            "37": 759682560.0,
+            "38": 759682560.0,
+            "39": 759682560.0,
+            "40": 759682560.0,
+            "41": 759682560.0,
+            "42": 759682560.0,
+            "43": 759682560.0,
+            "44": 759682560.0,
+            "45": 759682560.0,
+            "46": 759682560.0,
+            "47": 759682560.0,
+            "48": 759682560.0,
+            "49": 759682560.0,
+            "50": 759682560.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 3866813952.0,
-            "2": 4148525568.0,
-            "3": 4148525568.0,
-            "4": 4148525568.0,
-            "5": 4148525568.0,
-            "6": 4148525568.0,
-            "7": 4148525568.0,
-            "8": 4148525568.0,
-            "9": 4148525568.0,
-            "10": 4148525568.0,
-            "11": 4148525568.0,
-            "12": 4148525568.0,
-            "13": 4148525568.0,
-            "14": 4148525568.0,
-            "15": 4148525568.0,
-            "16": 4148525568.0,
-            "17": 4148525568.0,
-            "18": 4148525568.0,
-            "19": 4148525568.0,
-            "20": 4148525568.0,
-            "21": 4148525568.0,
-            "22": 4148525568.0,
-            "23": 4148525568.0,
-            "24": 4148525568.0,
-            "25": 4148525568.0,
-            "26": 4148525568.0,
-            "27": 4148525568.0,
-            "28": 4148525568.0,
-            "29": 4148525568.0,
-            "30": 4148525568.0,
-            "31": 4148525568.0,
-            "32": 4148525568.0,
-            "33": 4148525568.0,
-            "34": 4148525568.0,
-            "35": 4148525568.0,
-            "36": 4148525568.0,
-            "37": 4148525568.0,
-            "38": 4148525568.0,
-            "39": 4148525568.0,
-            "40": 4148525568.0,
-            "41": 4148525568.0,
-            "42": 4148525568.0,
-            "43": 4148525568.0,
-            "44": 4148525568.0,
-            "45": 4148525568.0,
-            "46": 4148525568.0,
-            "47": 4148525568.0,
-            "48": 4148525568.0,
-            "49": 4148525568.0,
-            "50": 4148525568.0
+            "1": 3866814976.0,
+            "2": 4148526592.0,
+            "3": 4148526592.0,
+            "4": 4148526592.0,
+            "5": 4148526592.0,
+            "6": 4148526592.0,
+            "7": 4148526592.0,
+            "8": 4148526592.0,
+            "9": 4148526592.0,
+            "10": 4148526592.0,
+            "11": 4148526592.0,
+            "12": 4148526592.0,
+            "13": 4148526592.0,
+            "14": 4148526592.0,
+            "15": 4148526592.0,
+            "16": 4148526592.0,
+            "17": 4148526592.0,
+            "18": 4148526592.0,
+            "19": 4148526592.0,
+            "20": 4148526592.0,
+            "21": 4148526592.0,
+            "22": 4148526592.0,
+            "23": 4148526592.0,
+            "24": 4148526592.0,
+            "25": 4148526592.0,
+            "26": 4148526592.0,
+            "27": 4148526592.0,
+            "28": 4148526592.0,
+            "29": 4148526592.0,
+            "30": 4148526592.0,
+            "31": 4148526592.0,
+            "32": 4148526592.0,
+            "33": 4148526592.0,
+            "34": 4148526592.0,
+            "35": 4148526592.0,
+            "36": 4148526592.0,
+            "37": 4148526592.0,
+            "38": 4148526592.0,
+            "39": 4148526592.0,
+            "40": 4148526592.0,
+            "41": 4148526592.0,
+            "42": 4148526592.0,
+            "43": 4148526592.0,
+            "44": 4148526592.0,
+            "45": 4148526592.0,
+            "46": 4148526592.0,
+            "47": 4148526592.0,
+            "48": 4148526592.0,
+            "49": 4148526592.0,
+            "50": 4148526592.0
         }
     },
     "iteration-time": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 12.80183,
-            "2": 0.14507,
-            "3": 0.13423,
-            "4": 0.12539,
-            "5": 0.12233,
-            "6": 0.12325,
-            "7": 0.12437,
-            "8": 0.12453,
-            "9": 0.12348,
-            "10": 0.12305,
-            "11": 0.12491,
-            "12": 0.12346,
-            "13": 0.1234,
-            "14": 0.12145,
-            "15": 0.12227,
-            "16": 0.12254,
-            "17": 0.12422,
-            "18": 0.12237,
-            "19": 0.12342,
-            "20": 0.1219,
-            "21": 0.1212,
-            "22": 0.12243,
-            "23": 0.11962,
-            "24": 0.1224,
-            "25": 0.12155,
-            "26": 0.12253,
-            "27": 0.12095,
-            "28": 0.12035,
-            "29": 0.12115,
-            "30": 0.11898,
-            "31": 0.12063,
-            "32": 0.1189,
-            "33": 0.12106,
-            "34": 0.11766,
-            "35": 0.11962,
-            "36": 0.12112,
-            "37": 0.11847,
-            "38": 0.11727,
-            "39": 0.11905,
-            "40": 0.11887,
-            "41": 0.11948,
-            "42": 0.11832,
-            "43": 0.11858,
-            "44": 0.1186,
-            "45": 0.12057,
-            "46": 0.1186,
-            "47": 0.12097,
-            "48": 0.11934,
-            "49": 0.11972,
-            "50": 0.12006
+            "1": 9.85525,
+            "2": 0.11909,
+            "3": 0.10687,
+            "4": 0.08766,
+            "5": 0.08696,
+            "6": 0.08852,
+            "7": 0.08705,
+            "8": 0.0866,
+            "9": 0.08968,
+            "10": 0.09051,
+            "11": 0.08988,
+            "12": 0.08985,
+            "13": 0.09145,
+            "14": 0.09034,
+            "15": 0.09081,
+            "16": 0.09029,
+            "17": 0.09013,
+            "18": 0.09023,
+            "19": 0.09004,
+            "20": 0.09017,
+            "21": 0.08987,
+            "22": 0.09048,
+            "23": 0.09047,
+            "24": 0.08991,
+            "25": 0.09343,
+            "26": 0.0901,
+            "27": 0.08989,
+            "28": 0.09443,
+            "29": 0.09097,
+            "30": 0.09106,
+            "31": 0.0927,
+            "32": 0.08602,
+            "33": 0.08691,
+            "34": 0.08755,
+            "35": 0.08733,
+            "36": 0.08692,
+            "37": 0.08659,
+            "38": 0.08868,
+            "39": 0.08692,
+            "40": 0.08731,
+            "41": 0.08817,
+            "42": 0.08696,
+            "43": 0.08838,
+            "44": 0.08859,
+            "45": 0.08767,
+            "46": 0.0873,
+            "47": 0.08882,
+            "48": 0.08631,
+            "49": 0.08619,
+            "50": 0.0861
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_lts_dgx_a100.json
index 4bf73c8b005..a98babc2900 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_lts_dgx_a100.json
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 9.25479,
-            "2": 0.18004,
-            "3": 0.15444,
-            "4": 0.15284,
-            "5": 0.15391,
-            "6": 0.14333,
-            "7": 0.14244,
-            "8": 0.13997,
-            "9": 0.14112,
-            "10": 0.13863,
-            "11": 0.13707,
-            "12": 0.13575,
-            "13": 0.13558,
-            "14": 0.13535,
-            "15": 0.13556,
-            "16": 0.13648,
-            "17": 0.13495,
-            "18": 0.1343,
-            "19": 0.13442,
-            "20": 0.13441,
-            "21": 0.1344,
-            "22": 0.13478,
-            "23": 0.13473,
-            "24": 0.13476,
-            "25": 0.13536,
-            "26": 0.13345,
-            "27": 0.1342,
-            "28": 0.13421,
-            "29": 0.13479,
-            "30": 0.13378,
-            "31": 0.13418,
-            "32": 0.13411,
-            "33": 0.13351,
-            "34": 0.13374,
-            "35": 0.13406,
-            "36": 0.13396,
-            "37": 0.13435,
-            "38": 0.13356,
-            "39": 0.13367,
-            "40": 0.13361,
-            "41": 0.13454,
-            "42": 0.13463,
-            "43": 0.13524,
-            "44": 0.13356,
-            "45": 0.13403,
-            "46": 0.1347,
-            "47": 0.13379,
-            "48": 0.1343,
-            "49": 0.13391,
-            "50": 0.13371
+            "1": 5.08022,
+            "2": 0.18501,
+            "3": 0.16189,
+            "4": 0.1446,
+            "5": 0.14506,
+            "6": 0.1419,
+            "7": 0.14224,
+            "8": 0.14228,
+            "9": 0.14173,
+            "10": 0.14459,
+            "11": 0.14301,
+            "12": 0.14363,
+            "13": 0.14381,
+            "14": 0.143,
+            "15": 0.14252,
+            "16": 0.14227,
+            "17": 0.14143,
+            "18": 0.1425,
+            "19": 0.14097,
+            "20": 0.14109,
+            "21": 0.1415,
+            "22": 0.14165,
+            "23": 0.142,
+            "24": 0.14241,
+            "25": 0.1412,
+            "26": 0.14126,
+            "27": 0.14207,
+            "28": 0.14045,
+            "29": 0.14206,
+            "30": 0.14192,
+            "31": 0.14255,
+            "32": 0.14132,
+            "33": 0.14178,
+            "34": 0.14151,
+            "35": 0.14117,
+            "36": 0.14088,
+            "37": 0.14137,
+            "38": 0.14111,
+            "39": 0.13997,
+            "40": 0.14118,
+            "41": 0.14179,
+            "42": 0.14063,
+            "43": 0.14381,
+            "44": 0.14122,
+            "45": 0.14142,
+            "46": 0.14112,
+            "47": 0.14094,
+            "48": 0.14134,
+            "49": 0.14094,
+            "50": 0.14002
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_dev_dgx_gb200.json
index f023ed07c99..110646cd819 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_dev_dgx_gb200.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_dev_dgx_gb200.json
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 552193536.0,
-            "2": 552193536.0,
-            "3": 552193536.0,
-            "4": 553242112.0,
-            "5": 552193536.0,
-            "6": 553242112.0,
-            "7": 553242112.0,
-            "8": 552193536.0,
-            "9": 552193536.0,
-            "10": 552193536.0,
-            "11": 553242112.0,
-            "12": 552193536.0,
-            "13": 552193536.0,
-            "14": 552193536.0,
-            "15": 552193536.0,
-            "16": 553242112.0,
-            "17": 553242112.0,
-            "18": 552193536.0,
-            "19": 553242112.0,
-            "20": 552193536.0,
-            "21": 552193536.0,
-            "22": 552193536.0,
-            "23": 552193536.0,
-            "24": 552193536.0,
-            "25": 552193536.0,
-            "26": 552193536.0,
-            "27": 552193536.0,
-            "28": 552193536.0,
-            "29": 552193536.0,
-            "30": 552193536.0,
-            "31": 552193536.0,
-            "32": 552193536.0,
-            "33": 552193536.0,
-            "34": 552193536.0,
-            "35": 552193536.0,
-            "36": 552193536.0,
-            "37": 552193536.0,
-            "38": 552193536.0,
-            "39": 552193536.0,
-            "40": 552193536.0,
-            "41": 552193536.0,
-            "42": 552193536.0,
-            "43": 552193536.0,
-            "44": 552193536.0,
-            "45": 553242112.0,
-            "46": 552193536.0,
-            "47": 552193536.0,
-            "48": 552193536.0,
-            "49": 552193536.0,
-            "50": 552193536.0
+            "1": 554160640.0,
+            "2": 555209216.0,
+            "3": 554160640.0,
+            "4": 554160640.0,
+            "5": 554160640.0,
+            "6": 554160640.0,
+            "7": 554160640.0,
+            "8": 554160640.0,
+            "9": 555209216.0,
+            "10": 554160640.0,
+            "11": 554160640.0,
+            "12": 554160640.0,
+            "13": 554160640.0,
+            "14": 554160640.0,
+            "15": 554160640.0,
+            "16": 554160640.0,
+            "17": 554160640.0,
+            "18": 554160640.0,
+            "19": 554160640.0,
+            "20": 554160640.0,
+            "21": 554160640.0,
+            "22": 554160640.0,
+            "23": 554160640.0,
+            "24": 554160640.0,
+            "25": 554160640.0,
+            "26": 554160640.0,
+            "27": 554160640.0,
+            "28": 554160640.0,
+            "29": 554160640.0,
+            "30": 554160640.0,
+            "31": 554160640.0,
+            "32": 554160640.0,
+            "33": 554160640.0,
+            "34": 554160640.0,
+            "35": 554160640.0,
+            "36": 554160640.0,
+            "37": 554160640.0,
+            "38": 554160640.0,
+            "39": 554160640.0,
+            "40": 554160640.0,
+            "41": 554160640.0,
+            "42": 555209216.0,
+            "43": 554160640.0,
+            "44": 554160640.0,
+            "45": 554160640.0,
+            "46": 554160640.0,
+            "47": 554160640.0,
+            "48": 554160640.0,
+            "49": 554160640.0,
+            "50": 554160640.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -176,55 +176,55 @@
         "step_interval": 1,
         "values": {
             "1": 3798208000.0,
-            "2": 3942086144.0,
-            "3": 3942086144.0,
-            "4": 3942086144.0,
-            "5": 3942086144.0,
-            "6": 3942086144.0,
-            "7": 3942086144.0,
-            "8": 3942086144.0,
-            "9": 3942086144.0,
-            "10": 3942086144.0,
-            "11": 3942086144.0,
-            "12": 3942086144.0,
-            "13": 3942086144.0,
-            "14": 3942086144.0,
-            "15": 3942086144.0,
-            "16": 3942086144.0,
-            "17": 3942086144.0,
-            "18": 3942086144.0,
-            "19": 3942086144.0,
-            "20": 3942086144.0,
-            "21": 3942086144.0,
-            "22": 3942086144.0,
-            "23": 3942086144.0,
-            "24": 3942086144.0,
-            "25": 3942086144.0,
-            "26": 3942086144.0,
-            "27": 3942086144.0,
-            "28": 3942086144.0,
-            "29": 3942086144.0,
-            "30": 3942086144.0,
-            "31": 3942086144.0,
-            "32": 3942086144.0,
-            "33": 3942086144.0,
-            "34": 3942086144.0,
-            "35": 3942086144.0,
-            "36": 3942086144.0,
-            "37": 3942086144.0,
-            "38": 3942086144.0,
-            "39": 3942086144.0,
-            "40": 3942086144.0,
-            "41": 3942086144.0,
-            "42": 3942086144.0,
-            "43": 3942086144.0,
-            "44": 3942086144.0,
-            "45": 3942086144.0,
-            "46": 3942086144.0,
-            "47": 3942086144.0,
-            "48": 3942086144.0,
-            "49": 3942086144.0,
-            "50": 3942086144.0
+            "2": 3944053248.0,
+            "3": 3944053248.0,
+            "4": 3944053248.0,
+            "5": 3944053248.0,
+            "6": 3944053248.0,
+            "7": 3944053248.0,
+            "8": 3944053248.0,
+            "9": 3944053248.0,
+            "10": 3944053248.0,
+            "11": 3944053248.0,
+            "12": 3944053248.0,
+            "13": 3944053248.0,
+            "14": 3944053248.0,
+            "15": 3944053248.0,
+            "16": 3944053248.0,
+            "17": 3944053248.0,
+            "18": 3944053248.0,
+            "19": 3944053248.0,
+            "20": 3944053248.0,
+            "21": 3944053248.0,
+            "22": 3944053248.0,
+            "23": 3944053248.0,
+            "24": 3944053248.0,
+            "25": 3944053248.0,
+            "26": 3944053248.0,
+            "27": 3944053248.0,
+            "28": 3944053248.0,
+            "29": 3944053248.0,
+            "30": 3944053248.0,
+            "31": 3944053248.0,
+            "32": 3944053248.0,
+            "33": 3944053248.0,
+            "34": 3944053248.0,
+            "35": 3944053248.0,
+            "36": 3944053248.0,
+            "37": 3944053248.0,
+            "38": 3944053248.0,
+            "39": 3944053248.0,
+            "40": 3944053248.0,
+            "41": 3944053248.0,
+            "42": 3944053248.0,
+            "43": 3944053248.0,
+            "44": 3944053248.0,
+            "45": 3944053248.0,
+            "46": 3944053248.0,
+            "47": 3944053248.0,
+            "48": 3944053248.0,
+            "49": 3944053248.0,
+            "50": 3944053248.0
         }
     },
     "iteration-time": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 11.06303,
-            "2": 0.15398,
-            "3": 0.27325,
-            "4": 0.13945,
-            "5": 0.25021,
-            "6": 0.16329,
-            "7": 0.27717,
-            "8": 0.18718,
-            "9": 0.12007,
-            "10": 0.21402,
-            "11": 0.2385,
-            "12": 0.61603,
-            "13": 0.24413,
-            "14": 0.18837,
-            "15": 0.14999,
-            "16": 0.12555,
-            "17": 0.24832,
-            "18": 0.1361,
-            "19": 0.13136,
-            "20": 0.27497,
-            "21": 0.22444,
-            "22": 0.11923,
-            "23": 0.11996,
-            "24": 0.25718,
-            "25": 0.20275,
-            "26": 0.35028,
-            "27": 0.11968,
-            "28": 0.23901,
-            "29": 0.12079,
-            "30": 0.12184,
-            "31": 0.21733,
-            "32": 0.28054,
-            "33": 0.11829,
-            "34": 0.17717,
-            "35": 0.1215,
-            "36": 0.27112,
-            "37": 0.22357,
-            "38": 0.12158,
-            "39": 0.12105,
-            "40": 0.12099,
-            "41": 0.21658,
-            "42": 0.22641,
-            "43": 0.12146,
-            "44": 0.1201,
-            "45": 0.253,
-            "46": 0.12142,
-            "47": 0.23268,
-            "48": 0.13569,
-            "49": 0.1302,
-            "50": 0.24153
+            "1": 7.76857,
+            "2": 0.14325,
+            "3": 0.13072,
+            "4": 0.11885,
+            "5": 0.11896,
+            "6": 0.1181,
+            "7": 0.11917,
+            "8": 0.11807,
+            "9": 0.11852,
+            "10": 0.11869,
+            "11": 0.21274,
+            "12": 0.11744,
+            "13": 0.11909,
+            "14": 0.12072,
+            "15": 0.11937,
+            "16": 0.11875,
+            "17": 0.11813,
+            "18": 0.117,
+            "19": 0.11808,
+            "20": 0.1185,
+            "21": 0.21315,
+            "22": 0.11941,
+            "23": 0.11829,
+            "24": 0.12018,
+            "25": 0.11873,
+            "26": 0.12277,
+            "27": 0.11624,
+            "28": 0.11801,
+            "29": 0.11768,
+            "30": 0.11811,
+            "31": 0.21259,
+            "32": 0.11823,
+            "33": 0.11857,
+            "34": 0.11893,
+            "35": 0.12121,
+            "36": 0.11984,
+            "37": 0.12002,
+            "38": 0.11889,
+            "39": 0.12151,
+            "40": 0.11884,
+            "41": 0.21346,
+            "42": 0.11706,
+            "43": 0.12099,
+            "44": 0.1203,
+            "45": 0.11997,
+            "46": 0.12288,
+            "47": 0.12077,
+            "48": 0.11925,
+            "49": 0.11743,
+            "50": 0.11695
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_dev_dgx_h100.json
index 5e069163f6c..ea2f72181ea 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_dev_dgx_h100.json
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 552054272.0,
-            "2": 552054272.0,
-            "3": 552054272.0,
-            "4": 552054272.0,
-            "5": 552054272.0,
-            "6": 552054272.0,
-            "7": 552054272.0,
-            "8": 552054272.0,
-            "9": 552054272.0,
-            "10": 552054272.0,
-            "11": 552054272.0,
-            "12": 552054272.0,
-            "13": 552054272.0,
-            "14": 552054272.0,
-            "15": 552054272.0,
-            "16": 552054272.0,
-            "17": 552054272.0,
-            "18": 552054272.0,
-            "19": 552054272.0,
-            "20": 552054272.0,
-            "21": 552054272.0,
-            "22": 552054272.0,
-            "23": 552054272.0,
-            "24": 552054272.0,
-            "25": 552054272.0,
-            "26": 552054272.0,
-            "27": 552054272.0,
-            "28": 552054272.0,
-            "29": 552054272.0,
-            "30": 552054272.0,
-            "31": 552054272.0,
-            "32": 552054272.0,
-            "33": 552054272.0,
-            "34": 552054272.0,
-            "35": 552054272.0,
-            "36": 552054272.0,
-            "37": 552054272.0,
-            "38": 552054272.0,
-            "39": 552054272.0,
-            "40": 552054272.0,
-            "41": 552054272.0,
-            "42": 552054272.0,
-            "43": 552054272.0,
-            "44": 552054272.0,
-            "45": 552054272.0,
-            "46": 552054272.0,
-            "47": 552054272.0,
-            "48": 552054272.0,
-            "49": 552054272.0,
-            "50": 552054272.0
+            "1": 553245184.0,
+            "2": 553245184.0,
+            "3": 553245184.0,
+            "4": 553245184.0,
+            "5": 553245184.0,
+            "6": 553245184.0,
+            "7": 553245184.0,
+            "8": 553245184.0,
+            "9": 553245184.0,
+            "10": 553245184.0,
+            "11": 553245184.0,
+            "12": 553245184.0,
+            "13": 553245184.0,
+            "14": 553245184.0,
+            "15": 553245184.0,
+            "16": 553245184.0,
+            "17": 553245184.0,
+            "18": 553245184.0,
+            "19": 553245184.0,
+            "20": 553245184.0,
+            "21": 553245184.0,
+            "22": 553245184.0,
+            "23": 553245184.0,
+            "24": 553245184.0,
+            "25": 553245184.0,
+            "26": 553245184.0,
+            "27": 553245184.0,
+            "28": 553245184.0,
+            "29": 553245184.0,
+            "30": 553245184.0,
+            "31": 553245184.0,
+            "32": 553245184.0,
+            "33": 553245184.0,
+            "34": 553245184.0,
+            "35": 553245184.0,
+            "36": 553245184.0,
+            "37": 553245184.0,
+            "38": 553245184.0,
+            "39": 553245184.0,
+            "40": 553245184.0,
+            "41": 553245184.0,
+            "42": 553245184.0,
+            "43": 553245184.0,
+            "44": 553245184.0,
+            "45": 553245184.0,
+            "46": 553245184.0,
+            "47": 553245184.0,
+            "48": 553245184.0,
+            "49": 553245184.0,
+            "50": 553245184.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 3798206976.0,
-            "2": 3940899328.0,
-            "3": 3940899328.0,
-            "4": 3940899328.0,
-            "5": 3940899328.0,
-            "6": 3940899328.0,
-            "7": 3940899328.0,
-            "8": 3940899328.0,
-            "9": 3940899328.0,
-            "10": 3940899328.0,
-            "11": 3940899328.0,
-            "12": 3940899328.0,
-            "13": 3940899328.0,
-            "14": 3940899328.0,
-            "15": 3940899328.0,
-            "16": 3940899328.0,
-            "17": 3940899328.0,
-            "18": 3940899328.0,
-            "19": 3940899328.0,
-            "20": 3940899328.0,
-            "21": 3940899328.0,
-            "22": 3940899328.0,
-            "23": 3940899328.0,
-            "24": 3940899328.0,
-            "25": 3940899328.0,
-            "26": 3940899328.0,
-            "27": 3940899328.0,
-            "28": 3940899328.0,
-            "29": 3940899328.0,
-            "30": 3940899328.0,
-            "31": 3940899328.0,
-            "32": 3940899328.0,
-            "33": 3940899328.0,
-            "34": 3940899328.0,
-            "35": 3940899328.0,
-            "36": 3940899328.0,
-            "37": 3940899328.0,
-            "38": 3940899328.0,
-            "39": 3940899328.0,
-            "40": 3940899328.0,
-            "41": 3940899328.0,
-            "42": 3940899328.0,
-            "43": 3940899328.0,
-            "44": 3940899328.0,
-            "45": 3940899328.0,
-            "46": 3940899328.0,
-            "47": 3940899328.0,
-            "48": 3940899328.0,
-            "49": 3940899328.0,
-            "50": 3940899328.0
+            "1": 3798208000.0,
+            "2": 3943137792.0,
+            "3": 3943137792.0,
+            "4": 3943137792.0,
+            "5": 3943137792.0,
+            "6": 3943137792.0,
+            "7": 3943137792.0,
+            "8": 3943137792.0,
+            "9": 3943137792.0,
+            "10": 3943137792.0,
+            "11": 3943137792.0,
+            "12": 3943137792.0,
+            "13": 3943137792.0,
+            "14": 3943137792.0,
+            "15": 3943137792.0,
+            "16": 3943137792.0,
+            "17": 3943137792.0,
+            "18": 3943137792.0,
+            "19": 3943137792.0,
+            "20": 3943137792.0,
+            "21": 3943137792.0,
+            "22": 3943137792.0,
+            "23": 3943137792.0,
+            "24": 3943137792.0,
+            "25": 3943137792.0,
+            "26": 3943137792.0,
+            "27": 3943137792.0,
+            "28": 3943137792.0,
+            "29": 3943137792.0,
+            "30": 3943137792.0,
+            "31": 3943137792.0,
+            "32": 3943137792.0,
+            "33": 3943137792.0,
+            "34": 3943137792.0,
+            "35": 3943137792.0,
+            "36": 3943137792.0,
+            "37": 3943137792.0,
+            "38": 3943137792.0,
+            "39": 3943137792.0,
+            "40": 3943137792.0,
+            "41": 3943137792.0,
+            "42": 3943137792.0,
+            "43": 3943137792.0,
+            "44": 3943137792.0,
+            "45": 3943137792.0,
+            "46": 3943137792.0,
+            "47": 3943137792.0,
+            "48": 3943137792.0,
+            "49": 3943137792.0,
+            "50": 3943137792.0
         }
     },
     "iteration-time": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 15.65845,
-            "2": 0.14332,
-            "3": 0.12833,
-            "4": 0.12525,
-            "5": 0.12451,
-            "6": 0.12488,
-            "7": 0.12455,
-            "8": 0.12623,
-            "9": 0.1249,
-            "10": 0.127,
-            "11": 0.29256,
-            "12": 0.12446,
-            "13": 0.12388,
-            "14": 0.12448,
-            "15": 0.12475,
-            "16": 0.12507,
-            "17": 0.12682,
-            "18": 0.12473,
-            "19": 0.12569,
-            "20": 0.12441,
-            "21": 0.28384,
-            "22": 0.12554,
-            "23": 0.12552,
-            "24": 0.12663,
-            "25": 0.12441,
-            "26": 0.12547,
-            "27": 0.12485,
-            "28": 0.12492,
-            "29": 0.12419,
-            "30": 0.12518,
-            "31": 0.28416,
-            "32": 0.12399,
-            "33": 0.12692,
-            "34": 0.12606,
-            "35": 0.12537,
-            "36": 0.12614,
-            "37": 0.12484,
-            "38": 0.12464,
-            "39": 0.12396,
-            "40": 0.1239,
-            "41": 0.28831,
-            "42": 0.12609,
-            "43": 0.12537,
-            "44": 0.12484,
-            "45": 0.12567,
-            "46": 0.12791,
-            "47": 0.12281,
-            "48": 0.124,
-            "49": 0.12486,
-            "50": 0.12585
+            "1": 13.97343,
+            "2": 0.13214,
+            "3": 0.11635,
+            "4": 0.09459,
+            "5": 0.0948,
+            "6": 0.09321,
+            "7": 0.09394,
+            "8": 0.09525,
+            "9": 0.09364,
+            "10": 0.09321,
+            "11": 0.22069,
+            "12": 0.09263,
+            "13": 0.09317,
+            "14": 0.09315,
+            "15": 0.09254,
+            "16": 0.09554,
+            "17": 0.09332,
+            "18": 0.09352,
+            "19": 0.09438,
+            "20": 0.09298,
+            "21": 0.22042,
+            "22": 0.09282,
+            "23": 0.09311,
+            "24": 0.09404,
+            "25": 0.09412,
+            "26": 0.09311,
+            "27": 0.09293,
+            "28": 0.09243,
+            "29": 0.09294,
+            "30": 0.09541,
+            "31": 0.22042,
+            "32": 0.09422,
+            "33": 0.09281,
+            "34": 0.09264,
+            "35": 0.09337,
+            "36": 0.09247,
+            "37": 0.09252,
+            "38": 0.09352,
+            "39": 0.09297,
+            "40": 0.09265,
+            "41": 0.22109,
+            "42": 0.09577,
+            "43": 0.09321,
+            "44": 0.0937,
+            "45": 0.09442,
+            "46": 0.09283,
+            "47": 0.09255,
+            "48": 0.09325,
+            "49": 0.09296,
+            "50": 0.09323
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_lts_dgx_a100.json
index 35ef87a5085..36d7ec97749 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_lts_dgx_a100.json
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 522452480.0,
-            "2": 522452480.0,
-            "3": 522452480.0,
-            "4": 522452480.0,
-            "5": 522452480.0,
-            "6": 522452480.0,
-            "7": 522452480.0,
-            "8": 522452480.0,
-            "9": 523501056.0,
-            "10": 522452480.0,
-            "11": 522452480.0,
-            "12": 523501056.0,
-            "13": 522452480.0,
-            "14": 522452480.0,
-            "15": 522452480.0,
-            "16": 522452480.0,
-            "17": 522452480.0,
-            "18": 522452480.0,
-            "19": 523501056.0,
-            "20": 523501056.0,
-            "21": 522452480.0,
-            "22": 522452480.0,
-            "23": 522452480.0,
-            "24": 523501056.0,
-            "25": 522452480.0,
-            "26": 522452480.0,
-            "27": 522452480.0,
-            "28": 522452480.0,
-            "29": 523501056.0,
-            "30": 522452480.0,
-            "31": 522452480.0,
-            "32": 522452480.0,
-            "33": 522452480.0,
-            "34": 522452480.0,
-            "35": 522452480.0,
-            "36": 522452480.0,
-            "37": 522452480.0,
-            "38": 522452480.0,
-            "39": 522452480.0,
-            "40": 522452480.0,
-            "41": 523371008.0,
-            "42": 522452480.0,
-            "43": 522452480.0,
-            "44": 522452480.0,
-            "45": 522452480.0,
-            "46": 523501056.0,
-            "47": 522452480.0,
-            "48": 522452480.0,
-            "49": 523501056.0,
-            "50": 522452480.0
+            "1": 522966528.0,
+            "2": 522966528.0,
+            "3": 522966528.0,
+            "4": 522966528.0,
+            "5": 522966528.0,
+            "6": 522966528.0,
+            "7": 522966528.0,
+            "8": 522966528.0,
+            "9": 522966528.0,
+            "10": 522966528.0,
+            "11": 522966528.0,
+            "12": 522966528.0,
+            "13": 522966528.0,
+            "14": 522966528.0,
+            "15": 522966528.0,
+            "16": 522966528.0,
+            "17": 522966528.0,
+            "18": 522966528.0,
+            "19": 522966528.0,
+            "20": 522966528.0,
+            "21": 522966528.0,
+            "22": 522966528.0,
+            "23": 522966528.0,
+            "24": 522966528.0,
+            "25": 522966528.0,
+            "26": 522966528.0,
+            "27": 522966528.0,
+            "28": 522966528.0,
+            "29": 522966528.0,
+            "30": 522966528.0,
+            "31": 522966528.0,
+            "32": 522966528.0,
+            "33": 522966528.0,
+            "34": 522966528.0,
+            "35": 522966528.0,
+            "36": 522966528.0,
+            "37": 522966528.0,
+            "38": 522966528.0,
+            "39": 522966528.0,
+            "40": 522966528.0,
+            "41": 522966528.0,
+            "42": 522966528.0,
+            "43": 522966528.0,
+            "44": 522966528.0,
+            "45": 522966528.0,
+            "46": 522966528.0,
+            "47": 522966528.0,
+            "48": 522966528.0,
+            "49": 522966528.0,
+            "50": 522966528.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -176,55 +176,55 @@
         "step_interval": 1,
         "values": {
             "1": 3768846848.0,
-            "2": 3913263616.0,
-            "3": 3913263616.0,
-            "4": 3913263616.0,
-            "5": 3913263616.0,
-            "6": 3913263616.0,
-            "7": 3913263616.0,
-            "8": 3913263616.0,
-            "9": 3913263616.0,
-            "10": 3913263616.0,
-            "11": 3913263616.0,
-            "12": 3913263616.0,
-            "13": 3913263616.0,
-            "14": 3913263616.0,
-            "15": 3913263616.0,
-            "16": 3913263616.0,
-            "17": 3913263616.0,
-            "18": 3913263616.0,
-            "19": 3913263616.0,
-            "20": 3913263616.0,
-            "21": 3913263616.0,
-            "22": 3913263616.0,
-            "23": 3913263616.0,
-            "24": 3913263616.0,
-            "25": 3913263616.0,
-            "26": 3913263616.0,
-            "27": 3913263616.0,
-            "28": 3913263616.0,
-            "29": 3913263616.0,
-            "30": 3913263616.0,
-            "31": 3913263616.0,
-            "32": 3913263616.0,
-            "33": 3913263616.0,
-            "34": 3913263616.0,
-            "35": 3913263616.0,
-            "36": 3913263616.0,
-            "37": 3913263616.0,
-            "38": 3913263616.0,
-            "39": 3913263616.0,
-            "40": 3913263616.0,
-            "41": 3913263616.0,
-            "42": 3913263616.0,
-            "43": 3913263616.0,
-            "44": 3913263616.0,
-            "45": 3913263616.0,
-            "46": 3913263616.0,
-            "47": 3913263616.0,
-            "48": 3913263616.0,
-            "49": 3913263616.0,
-            "50": 3913263616.0
+            "2": 3913646592.0,
+            "3": 3913646592.0,
+            "4": 3913646592.0,
+            "5": 3913646592.0,
+            "6": 3913646592.0,
+            "7": 3913646592.0,
+            "8": 3913646592.0,
+            "9": 3913646592.0,
+            "10": 3913646592.0,
+            "11": 3913646592.0,
+            "12": 3913646592.0,
+            "13": 3913646592.0,
+            "14": 3913646592.0,
+            "15": 3913646592.0,
+            "16": 3913646592.0,
+            "17": 3913646592.0,
+            "18": 3913646592.0,
+            "19": 3913646592.0,
+            "20": 3913646592.0,
+            "21": 3913646592.0,
+            "22": 3913646592.0,
+            "23": 3913646592.0,
+            "24": 3913646592.0,
+            "25": 3913646592.0,
+            "26": 3913646592.0,
+            "27": 3913646592.0,
+            "28": 3913646592.0,
+            "29": 3913646592.0,
+            "30": 3913646592.0,
+            "31": 3913646592.0,
+            "32": 3913646592.0,
+            "33": 3913646592.0,
+            "34": 3913646592.0,
+            "35": 3913646592.0,
+            "36": 3913646592.0,
+            "37": 3913646592.0,
+            "38": 3913646592.0,
+            "39": 3913646592.0,
+            "40": 3913646592.0,
+            "41": 3913646592.0,
+            "42": 3913646592.0,
+            "43": 3913646592.0,
+            "44": 3913646592.0,
+            "45": 3913646592.0,
+            "46": 3913646592.0,
+            "47": 3913646592.0,
+            "48": 3913646592.0,
+            "49": 3913646592.0,
+            "50": 3913646592.0
         }
     },
     "iteration-time": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 10.5204,
-            "2": 0.1877,
-            "3": 0.15851,
-            "4": 0.15284,
-            "5": 0.15092,
-            "6": 0.15084,
-            "7": 0.14786,
-            "8": 0.14787,
-            "9": 0.14698,
-            "10": 0.15104,
-            "11": 0.29695,
-            "12": 0.14795,
-            "13": 0.14771,
-            "14": 0.14662,
-            "15": 0.14409,
-            "16": 0.14378,
-            "17": 0.14431,
-            "18": 0.14329,
-            "19": 0.14334,
-            "20": 0.14441,
-            "21": 0.28011,
-            "22": 0.14378,
-            "23": 0.14643,
-            "24": 0.14572,
-            "25": 0.14331,
-            "26": 0.14307,
-            "27": 0.14541,
-            "28": 0.14512,
-            "29": 0.14536,
-            "30": 0.14358,
-            "31": 0.28944,
-            "32": 0.14533,
-            "33": 0.14477,
-            "34": 0.14423,
-            "35": 0.14395,
-            "36": 0.14486,
-            "37": 0.14319,
-            "38": 0.14455,
-            "39": 0.14454,
-            "40": 0.14537,
-            "41": 0.29312,
-            "42": 0.14458,
-            "43": 0.14749,
-            "44": 0.14448,
-            "45": 0.14501,
-            "46": 0.14588,
-            "47": 0.14249,
-            "48": 0.14564,
-            "49": 0.14388,
-            "50": 0.14222
+            "1": 7.26942,
+            "2": 0.17361,
+            "3": 0.16661,
+            "4": 0.15374,
+            "5": 0.1539,
+            "6": 0.15237,
+            "7": 0.15491,
+            "8": 0.16016,
+            "9": 0.1524,
+            "10": 0.14907,
+            "11": 0.28249,
+            "12": 0.14867,
+            "13": 0.14835,
+            "14": 0.14748,
+            "15": 0.14906,
+            "16": 0.14768,
+            "17": 0.15182,
+            "18": 0.14947,
+            "19": 0.15009,
+            "20": 0.14968,
+            "21": 0.28262,
+            "22": 0.14991,
+            "23": 0.14955,
+            "24": 0.14949,
+            "25": 0.14929,
+            "26": 0.14942,
+            "27": 0.14898,
+            "28": 0.15187,
+            "29": 0.14918,
+            "30": 0.14827,
+            "31": 0.2861,
+            "32": 0.14873,
+            "33": 0.14777,
+            "34": 0.14736,
+            "35": 0.14865,
+            "36": 0.14795,
+            "37": 0.148,
+            "38": 0.14799,
+            "39": 0.14777,
+            "40": 0.14776,
+            "41": 0.28572,
+            "42": 0.14812,
+            "43": 0.14967,
+            "44": 0.14785,
+            "45": 0.14785,
+            "46": 0.14867,
+            "47": 0.14775,
+            "48": 0.14841,
+            "49": 0.14786,
+            "50": 0.14872
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_dev_dgx_gb200.json
new file mode 100644
index 00000000000..641a00e237a
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_dev_dgx_gb200.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.82555,
+            "2": 10.83286,
+            "3": 10.82762,
+            "4": 10.79573,
+            "5": 10.85695,
+            "6": 10.86391,
+            "7": 10.82616,
+            "8": 10.82544,
+            "9": 10.83584,
+            "10": 10.79629,
+            "11": 10.8782,
+            "12": 10.85821,
+            "13": 10.85418,
+            "14": 10.87518,
+            "15": 10.79205,
+            "16": 10.80305,
+            "17": 10.77428,
+            "18": 10.8046,
+            "19": 10.79338,
+            "20": 10.69563,
+            "21": 10.68645,
+            "22": 10.53149,
+            "23": 10.70629,
+            "24": 10.57273,
+            "25": 10.5144,
+            "26": 10.58993,
+            "27": 10.60707,
+            "28": 10.57003,
+            "29": 10.58929,
+            "30": 10.34675,
+            "31": 10.07709,
+            "32": 10.46194,
+            "33": 10.45484,
+            "34": 10.19662,
+            "35": 10.25291,
+            "36": 10.20971,
+            "37": 10.34492,
+            "38": 10.17789,
+            "39": 10.4061,
+            "40": 10.07414,
+            "41": 10.12736,
+            "42": 10.20823,
+            "43": 9.81194,
+            "44": 9.93354,
+            "45": 9.80953,
+            "46": 9.79773,
+            "47": 10.11569,
+            "48": 9.83234,
+            "49": 9.50281,
+            "50": 9.88181,
+            "51": 9.83458,
+            "52": 9.71756,
+            "53": 10.05126,
+            "54": 9.94371,
+            "55": 9.87457,
+            "56": 9.6029,
+            "57": 9.45086,
+            "58": 9.811,
+            "59": 9.56395,
+            "60": 9.47155,
+            "61": 9.66553,
+            "62": 9.96353,
+            "63": 9.34709,
+            "64": 9.743,
+            "65": 8.92136,
+            "66": 9.67858,
+            "67": 9.35222,
+            "68": 9.76563,
+            "69": 9.7774,
+            "70": 9.70407,
+            "71": 9.60099,
+            "72": 9.5498,
+            "73": 9.46046,
+            "74": 8.89068,
+            "75": 9.3874,
+            "76": 9.04469,
+            "77": 10.03647,
+            "78": 9.6996,
+            "79": 9.34722,
+            "80": 9.37822,
+            "81": 9.45421,
+            "82": 9.67529,
+            "83": 9.28446,
+            "84": 9.39113,
+            "85": 9.58663,
+            "86": 9.04694,
+            "87": 9.56972,
+            "88": 9.72085,
+            "89": 9.5673,
+            "90": 9.79474,
+            "91": 9.30448,
+            "92": 9.32183,
+            "93": 9.0517,
+            "94": 8.79005,
+            "95": 9.4918,
+            "96": 9.48711,
+            "97": 9.26589,
+            "98": 9.62592,
+            "99": 8.85252,
+            "100": 9.35907
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1651.0,
+            "2": 1716.0,
+            "3": 1772.0,
+            "4": 1774.0,
+            "5": 1920.0,
+            "6": 1864.0,
+            "7": 1830.0,
+            "8": 1695.0,
+            "9": 1858.0,
+            "10": 1367.0,
+            "11": 1915.0,
+            "12": 1797.0,
+            "13": 1899.0,
+            "14": 1769.0,
+            "15": 1880.0,
+            "16": 1806.0,
+            "17": 1822.0,
+            "18": 1686.0,
+            "19": 1728.0,
+            "20": 1667.0,
+            "21": 1897.0,
+            "22": 1703.0,
+            "23": 1967.0,
+            "24": 1595.0,
+            "25": 1583.0,
+            "26": 1684.0,
+            "27": 1911.0,
+            "28": 1969.0,
+            "29": 1864.0,
+            "30": 1943.0,
+            "31": 1535.0,
+            "32": 1895.0,
+            "33": 2078.0,
+            "34": 1739.0,
+            "35": 1940.0,
+            "36": 1919.0,
+            "37": 2460.0,
+            "38": 2107.0,
+            "39": 2261.0,
+            "40": 2059.0,
+            "41": 2183.0,
+            "42": 2269.0,
+            "43": 1972.0,
+            "44": 2040.0,
+            "45": 2093.0,
+            "46": 2140.0,
+            "47": 2476.0,
+            "48": 2311.0,
+            "49": 2165.0,
+            "50": 2411.0,
+            "51": 2471.0,
+            "52": 2670.0,
+            "53": 2883.0,
+            "54": 2589.0,
+            "55": 2427.0,
+            "56": 2774.0,
+            "57": 2246.0,
+            "58": 2994.0,
+            "59": 2922.0,
+            "60": 2416.0,
+            "61": 2960.0,
+            "62": 2646.0,
+            "63": 2488.0,
+            "64": 2956.0,
+            "65": 2746.0,
+            "66": 2864.0,
+            "67": 2794.0,
+            "68": 2703.0,
+            "69": 2990.0,
+            "70": 3012.0,
+            "71": 2884.0,
+            "72": 2536.0,
+            "73": 3054.0,
+            "74": 2100.0,
+            "75": 2573.0,
+            "76": 3076.0,
+            "77": 3025.0,
+            "78": 3014.0,
+            "79": 3083.0,
+            "80": 2989.0,
+            "81": 3452.0,
+            "82": 3253.0,
+            "83": 2759.0,
+            "84": 3186.0,
+            "85": 3247.0,
+            "86": 2624.0,
+            "87": 3594.0,
+            "88": 3009.0,
+            "89": 3286.0,
+            "90": 3354.0,
+            "91": 2869.0,
+            "92": 3156.0,
+            "93": 2809.0,
+            "94": 3350.0,
+            "95": 3033.0,
+            "96": 3323.0,
+            "97": 3091.0,
+            "98": 3356.0,
+            "99": 3326.0,
+            "100": 3144.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 759682560.0,
+            "2": 759682560.0,
+            "3": 759682560.0,
+            "4": 759682560.0,
+            "5": 759682560.0,
+            "6": 759682560.0,
+            "7": 759682560.0,
+            "8": 759682560.0,
+            "9": 759682560.0,
+            "10": 759682560.0,
+            "11": 759682560.0,
+            "12": 759682560.0,
+            "13": 759682560.0,
+            "14": 759682560.0,
+            "15": 759682560.0,
+            "16": 759682560.0,
+            "17": 759682560.0,
+            "18": 759682560.0,
+            "19": 759682560.0,
+            "20": 759682560.0,
+            "21": 759682560.0,
+            "22": 759682560.0,
+            "23": 759682560.0,
+            "24": 759682560.0,
+            "25": 759682560.0,
+            "26": 759682560.0,
+            "27": 759682560.0,
+            "28": 759682560.0,
+            "29": 759682560.0,
+            "30": 759682560.0,
+            "31": 759682560.0,
+            "32": 759682560.0,
+            "33": 759682560.0,
+            "34": 759682560.0,
+            "35": 759682560.0,
+            "36": 759682560.0,
+            "37": 759682560.0,
+            "38": 759682560.0,
+            "39": 759682560.0,
+            "40": 759682560.0,
+            "41": 759682560.0,
+            "42": 759682560.0,
+            "43": 759682560.0,
+            "44": 759682560.0,
+            "45": 759682560.0,
+            "46": 759682560.0,
+            "47": 759682560.0,
+            "48": 759682560.0,
+            "49": 759682560.0,
+            "50": 759682560.0,
+            "51": 759682560.0,
+            "52": 759682560.0,
+            "53": 759682560.0,
+            "54": 759682560.0,
+            "55": 759682560.0,
+            "56": 759682560.0,
+            "57": 759682560.0,
+            "58": 759682560.0,
+            "59": 759682560.0,
+            "60": 759682560.0,
+            "61": 759682560.0,
+            "62": 759682560.0,
+            "63": 759682560.0,
+            "64": 759682560.0,
+            "65": 759682560.0,
+            "66": 759682560.0,
+            "67": 759682560.0,
+            "68": 759682560.0,
+            "69": 759682560.0,
+            "70": 759682560.0,
+            "71": 759682560.0,
+            "72": 759682560.0,
+            "73": 759682560.0,
+            "74": 759682560.0,
+            "75": 759682560.0,
+            "76": 759682560.0,
+            "77": 759682560.0,
+            "78": 759682560.0,
+            "79": 759682560.0,
+            "80": 759682560.0,
+            "81": 759682560.0,
+            "82": 759682560.0,
+            "83": 759682560.0,
+            "84": 759682560.0,
+            "85": 759682560.0,
+            "86": 759682560.0,
+            "87": 759682560.0,
+            "88": 759682560.0,
+            "89": 759682560.0,
+            "90": 759682560.0,
+            "91": 759682560.0,
+            "92": 759682560.0,
+            "93": 759682560.0,
+            "94": 759682560.0,
+            "95": 759682560.0,
+            "96": 759682560.0,
+            "97": 759682560.0,
+            "98": 759682560.0,
+            "99": 759682560.0,
+            "100": 759682560.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2395798528.0,
+            "2": 2677510144.0,
+            "3": 2677510144.0,
+            "4": 2677510144.0,
+            "5": 2677510144.0,
+            "6": 2677510144.0,
+            "7": 2677510144.0,
+            "8": 2677510144.0,
+            "9": 2677510144.0,
+            "10": 2677510144.0,
+            "11": 2677510144.0,
+            "12": 2677510144.0,
+            "13": 2677510144.0,
+            "14": 2677510144.0,
+            "15": 2677510144.0,
+            "16": 2677510144.0,
+            "17": 2677510144.0,
+            "18": 2677510144.0,
+            "19": 2677510144.0,
+            "20": 2677510144.0,
+            "21": 2677510144.0,
+            "22": 2677510144.0,
+            "23": 2677510144.0,
+            "24": 2677510144.0,
+            "25": 2677510144.0,
+            "26": 2677510144.0,
+            "27": 2677510144.0,
+            "28": 2677510144.0,
+            "29": 2677510144.0,
+            "30": 2677510144.0,
+            "31": 2677510144.0,
+            "32": 2677510144.0,
+            "33": 2677510144.0,
+            "34": 2677510144.0,
+            "35": 2677510144.0,
+            "36": 2677510144.0,
+            "37": 2677510144.0,
+            "38": 2677510144.0,
+            "39": 2677510144.0,
+            "40": 2677510144.0,
+            "41": 2677510144.0,
+            "42": 2677510144.0,
+            "43": 2677510144.0,
+            "44": 2677510144.0,
+            "45": 2677510144.0,
+            "46": 2677510144.0,
+            "47": 2677510144.0,
+            "48": 2677510144.0,
+            "49": 2677510144.0,
+            "50": 2677510144.0,
+            "51": 2677510144.0,
+            "52": 2677510144.0,
+            "53": 2677510144.0,
+            "54": 2677510144.0,
+            "55": 2677510144.0,
+            "56": 2677510144.0,
+            "57": 2677510144.0,
+            "58": 2677510144.0,
+            "59": 2677510144.0,
+            "60": 2677510144.0,
+            "61": 2677510144.0,
+            "62": 2677510144.0,
+            "63": 2677510144.0,
+            "64": 2677510144.0,
+            "65": 2677510144.0,
+            "66": 2677510144.0,
+            "67": 2677510144.0,
+            "68": 2677510144.0,
+            "69": 2677510144.0,
+            "70": 2677510144.0,
+            "71": 2677510144.0,
+            "72": 2677510144.0,
+            "73": 2677510144.0,
+            "74": 2677510144.0,
+            "75": 2677510144.0,
+            "76": 2677510144.0,
+            "77": 2677510144.0,
+            "78": 2677510144.0,
+            "79": 2677510144.0,
+            "80": 2677510144.0,
+            "81": 2677510144.0,
+            "82": 2677510144.0,
+            "83": 2677510144.0,
+            "84": 2677510144.0,
+            "85": 2677510144.0,
+            "86": 2677510144.0,
+            "87": 2677510144.0,
+            "88": 2677510144.0,
+            "89": 2677510144.0,
+            "90": 2677510144.0,
+            "91": 2677510144.0,
+            "92": 2677510144.0,
+            "93": 2677510144.0,
+            "94": 2677510144.0,
+            "95": 2677510144.0,
+            "96": 2677510144.0,
+            "97": 2677510144.0,
+            "98": 2677510144.0,
+            "99": 2677510144.0,
+            "100": 2677510144.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 9.86945,
+            "2": 0.13101,
+            "3": 0.70357,
+            "4": 0.14163,
+            "5": 0.12855,
+            "6": 0.38046,
+            "7": 0.126,
+            "8": 0.14086,
+            "9": 0.23777,
+            "10": 0.1402,
+            "11": 0.41122,
+            "12": 0.27395,
+            "13": 0.10789,
+            "14": 0.24924,
+            "15": 0.33411,
+            "16": 0.24471,
+            "17": 0.10663,
+            "18": 0.24551,
+            "19": 0.10581,
+            "20": 0.18449,
+            "21": 0.22744,
+            "22": 0.10616,
+            "23": 0.34738,
+            "24": 0.11037,
+            "25": 0.11093,
+            "26": 0.43374,
+            "27": 0.1067,
+            "28": 0.10671,
+            "29": 0.1061,
+            "30": 0.22031,
+            "31": 0.11271,
+            "32": 0.10683,
+            "33": 0.10556,
+            "34": 0.25465,
+            "35": 0.22935,
+            "36": 0.1072,
+            "37": 0.10789,
+            "38": 0.1067,
+            "39": 0.21523,
+            "40": 0.1053,
+            "41": 0.11778,
+            "42": 0.22642,
+            "43": 0.10673,
+            "44": 0.23278,
+            "45": 0.1046,
+            "46": 0.22439,
+            "47": 0.22232,
+            "48": 0.10912,
+            "49": 0.10674,
+            "50": 0.1055,
+            "51": 0.11049,
+            "52": 0.1948,
+            "53": 0.1045,
+            "54": 0.24019,
+            "55": 0.10505,
+            "56": 0.23176,
+            "57": 0.10745,
+            "58": 0.10668,
+            "59": 0.10741,
+            "60": 0.37464,
+            "61": 0.10467,
+            "62": 0.10857,
+            "63": 0.10767,
+            "64": 0.10998,
+            "65": 0.10888,
+            "66": 0.17063,
+            "67": 0.36721,
+            "68": 0.10834,
+            "69": 0.10693,
+            "70": 0.24024,
+            "71": 0.10802,
+            "72": 0.10696,
+            "73": 0.10736,
+            "74": 0.10874,
+            "75": 0.15339,
+            "76": 0.18985,
+            "77": 0.32078,
+            "78": 0.1062,
+            "79": 0.29068,
+            "80": 0.10837,
+            "81": 0.17251,
+            "82": 0.10428,
+            "83": 0.21093,
+            "84": 0.13349,
+            "85": 0.23049,
+            "86": 0.10991,
+            "87": 0.10573,
+            "88": 0.10661,
+            "89": 0.10792,
+            "90": 0.22654,
+            "91": 0.31392,
+            "92": 0.10844,
+            "93": 0.24022,
+            "94": 0.111,
+            "95": 0.10539,
+            "96": 0.109,
+            "97": 0.11025,
+            "98": 0.11065,
+            "99": 0.44653,
+            "100": 0.10883
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_dev_dgx_h100.json
index 603dba4c2e5..2f0a7e29034 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_dev_dgx_h100.json
@@ -21,89 +21,89 @@
             "15": 10.81973,
             "16": 10.83156,
             "17": 10.79863,
-            "18": 10.81648,
-            "19": 10.8189,
+            "18": 10.8165,
+            "19": 10.81889,
             "20": 10.72685,
-            "21": 10.70581,
-            "22": 10.56347,
-            "23": 10.72794,
-            "24": 10.60761,
+            "21": 10.7058,
+            "22": 10.5635,
+            "23": 10.7279,
+            "24": 10.6076,
             "25": 10.55128,
-            "26": 10.60749,
-            "27": 10.6277,
-            "28": 10.58262,
-            "29": 10.59959,
-            "30": 10.36566,
-            "31": 10.11988,
-            "32": 10.4755,
-            "33": 10.46637,
-            "34": 10.22009,
-            "35": 10.2744,
-            "36": 10.22594,
-            "37": 10.35729,
-            "38": 10.19156,
+            "26": 10.60747,
+            "27": 10.62771,
+            "28": 10.5826,
+            "29": 10.59962,
+            "30": 10.36565,
+            "31": 10.1199,
+            "32": 10.47544,
+            "33": 10.46636,
+            "34": 10.22008,
+            "35": 10.27436,
+            "36": 10.2259,
+            "37": 10.3573,
+            "38": 10.19161,
             "39": 10.41342,
-            "40": 10.0956,
-            "41": 10.15511,
+            "40": 10.09564,
+            "41": 10.15513,
             "42": 10.22085,
-            "43": 9.82797,
-            "44": 9.96276,
-            "45": 9.83425,
-            "46": 9.82209,
-            "47": 10.14765,
-            "48": 9.84681,
-            "49": 9.53377,
-            "50": 9.90532,
-            "51": 9.85116,
-            "52": 9.73516,
-            "53": 10.05863,
-            "54": 9.94369,
+            "43": 9.82792,
+            "44": 9.96282,
+            "45": 9.83422,
+            "46": 9.8221,
+            "47": 10.14764,
+            "48": 9.84684,
+            "49": 9.53373,
+            "50": 9.90531,
+            "51": 9.85118,
+            "52": 9.73512,
+            "53": 10.05864,
+            "54": 9.94367,
             "55": 9.87297,
-            "56": 9.61703,
-            "57": 9.4675,
-            "58": 9.82223,
-            "59": 9.57338,
-            "60": 9.48861,
-            "61": 9.67921,
-            "62": 9.97513,
-            "63": 9.37045,
-            "64": 9.76643,
-            "65": 8.93435,
-            "66": 9.69463,
-            "67": 9.35357,
+            "56": 9.61699,
+            "57": 9.46751,
+            "58": 9.82221,
+            "59": 9.57334,
+            "60": 9.48862,
+            "61": 9.67922,
+            "62": 9.97512,
+            "63": 9.37044,
+            "64": 9.76642,
+            "65": 8.9343,
+            "66": 9.69461,
+            "67": 9.35362,
             "68": 9.76826,
-            "69": 9.77682,
-            "70": 9.72364,
-            "71": 9.59895,
-            "72": 9.56454,
-            "73": 9.48327,
-            "74": 8.92062,
+            "69": 9.77678,
+            "70": 9.72363,
+            "71": 9.59894,
+            "72": 9.56455,
+            "73": 9.48329,
+            "74": 8.92064,
             "75": 9.40392,
-            "76": 9.05301,
-            "77": 10.04175,
+            "76": 9.05297,
+            "77": 10.04178,
             "78": 9.69879,
-            "79": 9.35128,
-            "80": 9.38215,
-            "81": 9.45866,
-            "82": 9.67518,
-            "83": 9.28411,
-            "84": 9.39313,
-            "85": 9.5893,
-            "86": 9.05182,
-            "87": 9.56419,
-            "88": 9.71756,
+            "79": 9.35126,
+            "80": 9.38212,
+            "81": 9.45864,
+            "82": 9.67516,
+            "83": 9.2841,
+            "84": 9.39311,
+            "85": 9.58936,
+            "86": 9.05178,
+            "87": 9.56418,
+            "88": 9.71755,
             "89": 9.57129,
             "90": 9.78202,
-            "91": 9.3061,
-            "92": 9.32048,
-            "93": 9.03942,
-            "94": 8.79522,
-            "95": 9.47913,
-            "96": 9.48454,
-            "97": 9.2699,
-            "98": 9.62563,
-            "99": 8.84255,
-            "100": 9.34982
+            "91": 9.30611,
+            "92": 9.32046,
+            "93": 9.03939,
+            "94": 8.7952,
+            "95": 9.47908,
+            "96": 9.48453,
+            "97": 9.26989,
+            "98": 9.62564,
+            "99": 8.84254,
+            "100": 9.3498
         }
     },
     "num-zeros": {
@@ -126,91 +126,91 @@
             "13": 1931.0,
             "14": 1678.0,
             "15": 1918.0,
-            "16": 1961.0,
-            "17": 1711.0,
-            "18": 1658.0,
-            "19": 1791.0,
-            "20": 1610.0,
-            "21": 1815.0,
-            "22": 1677.0,
-            "23": 1952.0,
-            "24": 1612.0,
-            "25": 1597.0,
-            "26": 1657.0,
-            "27": 1850.0,
-            "28": 2013.0,
-            "29": 1966.0,
-            "30": 1875.0,
-            "31": 1585.0,
-            "32": 1941.0,
-            "33": 2085.0,
-            "34": 1837.0,
-            "35": 2045.0,
-            "36": 1898.0,
-            "37": 2333.0,
-            "38": 2247.0,
-            "39": 2266.0,
-            "40": 2184.0,
-            "41": 2209.0,
-            "42": 2164.0,
-            "43": 2076.0,
-            "44": 2169.0,
-            "45": 2077.0,
-            "46": 2325.0,
-            "47": 2505.0,
-            "48": 2442.0,
-            "49": 2205.0,
-            "50": 2196.0,
-            "51": 2500.0,
-            "52": 2572.0,
-            "53": 2905.0,
-            "54": 2794.0,
-            "55": 2351.0,
-            "56": 2606.0,
-            "57": 2388.0,
-            "58": 2864.0,
-            "59": 2726.0,
-            "60": 2359.0,
-            "61": 2915.0,
-            "62": 2610.0,
-            "63": 2397.0,
-            "64": 2886.0,
-            "65": 2577.0,
-            "66": 2913.0,
-            "67": 2715.0,
-            "68": 2646.0,
-            "69": 2805.0,
-            "70": 3151.0,
-            "71": 2917.0,
-            "72": 2403.0,
-            "73": 2948.0,
-            "74": 1994.0,
-            "75": 2425.0,
-            "76": 2898.0,
-            "77": 3085.0,
-            "78": 3228.0,
-            "79": 2981.0,
-            "80": 3254.0,
-            "81": 3499.0,
-            "82": 3121.0,
-            "83": 2711.0,
-            "84": 3105.0,
-            "85": 3492.0,
-            "86": 2693.0,
-            "87": 3602.0,
-            "88": 3052.0,
-            "89": 3230.0,
-            "90": 3160.0,
-            "91": 2647.0,
-            "92": 3160.0,
-            "93": 2650.0,
-            "94": 3430.0,
-            "95": 3247.0,
-            "96": 3353.0,
-            "97": 3064.0,
-            "98": 3486.0,
-            "99": 3190.0,
-            "100": 3076.0
+            "16": 1945.0,
+            "17": 1707.0,
+            "18": 1635.0,
+            "19": 1720.0,
+            "20": 1609.0,
+            "21": 1813.0,
+            "22": 1682.0,
+            "23": 1908.0,
+            "24": 1620.0,
+            "25": 1563.0,
+            "26": 1640.0,
+            "27": 1775.0,
+            "28": 1873.0,
+            "29": 1969.0,
+            "30": 1896.0,
+            "31": 1588.0,
+            "32": 1907.0,
+            "33": 2180.0,
+            "34": 1850.0,
+            "35": 1987.0,
+            "36": 1901.0,
+            "37": 2358.0,
+            "38": 2253.0,
+            "39": 2364.0,
+            "40": 2173.0,
+            "41": 2234.0,
+            "42": 2281.0,
+            "43": 2027.0,
+            "44": 2127.0,
+            "45": 2170.0,
+            "46": 2317.0,
+            "47": 2438.0,
+            "48": 2391.0,
+            "49": 2276.0,
+            "50": 2205.0,
+            "51": 2647.0,
+            "52": 2533.0,
+            "53": 2935.0,
+            "54": 2623.0,
+            "55": 2386.0,
+            "56": 2664.0,
+            "57": 2391.0,
+            "58": 2863.0,
+            "59": 2758.0,
+            "60": 2456.0,
+            "61": 2865.0,
+            "62": 2559.0,
+            "63": 2463.0,
+            "64": 3014.0,
+            "65": 2526.0,
+            "66": 3010.0,
+            "67": 2723.0,
+            "68": 2616.0,
+            "69": 2739.0,
+            "70": 3188.0,
+            "71": 2919.0,
+            "72": 2355.0,
+            "73": 2921.0,
+            "74": 1944.0,
+            "75": 2454.0,
+            "76": 3005.0,
+            "77": 3204.0,
+            "78": 3244.0,
+            "79": 3047.0,
+            "80": 3220.0,
+            "81": 3492.0,
+            "82": 3205.0,
+            "83": 2692.0,
+            "84": 3149.0,
+            "85": 3256.0,
+            "86": 2562.0,
+            "87": 3753.0,
+            "88": 2921.0,
+            "89": 3239.0,
+            "90": 3001.0,
+            "91": 2656.0,
+            "92": 3146.0,
+            "93": 2642.0,
+            "94": 3289.0,
+            "95": 3324.0,
+            "96": 3350.0,
+            "97": 3079.0,
+            "98": 3564.0,
+            "99": 3215.0,
+            "100": 3238.0
         }
     },
     "mem-allocated-bytes": {
@@ -218,106 +218,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 759681536.0,
-            "2": 759681536.0,
-            "3": 759681536.0,
-            "4": 759681536.0,
-            "5": 759681536.0,
-            "6": 759681536.0,
-            "7": 759681536.0,
-            "8": 759681536.0,
-            "9": 759681536.0,
-            "10": 759681536.0,
-            "11": 759681536.0,
-            "12": 759681536.0,
-            "13": 759681536.0,
-            "14": 759681536.0,
-            "15": 759681536.0,
-            "16": 759681536.0,
-            "17": 759681536.0,
-            "18": 759681536.0,
-            "19": 759681536.0,
-            "20": 759681536.0,
-            "21": 759681536.0,
-            "22": 759681536.0,
-            "23": 759681536.0,
-            "24": 759681536.0,
-            "25": 759681536.0,
-            "26": 759681536.0,
-            "27": 759681536.0,
-            "28": 759681536.0,
-            "29": 759681536.0,
-            "30": 759681536.0,
-            "31": 759681536.0,
-            "32": 759681536.0,
-            "33": 759681536.0,
-            "34": 759681536.0,
-            "35": 759681536.0,
-            "36": 759681536.0,
-            "37": 759681536.0,
-            "38": 759681536.0,
-            "39": 759681536.0,
-            "40": 759681536.0,
-            "41": 759681536.0,
-            "42": 759681536.0,
-            "43": 759681536.0,
-            "44": 759681536.0,
-            "45": 759681536.0,
-            "46": 759681536.0,
-            "47": 759681536.0,
-            "48": 759681536.0,
-            "49": 759681536.0,
-            "50": 759681536.0,
-            "51": 759681536.0,
-            "52": 759681536.0,
-            "53": 759681536.0,
-            "54": 759681536.0,
-            "55": 759681536.0,
-            "56": 759681536.0,
-            "57": 759681536.0,
-            "58": 759681536.0,
-            "59": 759681536.0,
-            "60": 759681536.0,
-            "61": 759681536.0,
-            "62": 759681536.0,
-            "63": 759681536.0,
-            "64": 759681536.0,
-            "65": 759681536.0,
-            "66": 759681536.0,
-            "67": 759681536.0,
-            "68": 759681536.0,
-            "69": 759681536.0,
-            "70": 759681536.0,
-            "71": 759681536.0,
-            "72": 759681536.0,
-            "73": 759681536.0,
-            "74": 759681536.0,
-            "75": 759681536.0,
-            "76": 759681536.0,
-            "77": 759681536.0,
-            "78": 759681536.0,
-            "79": 759681536.0,
-            "80": 759681536.0,
-            "81": 759681536.0,
-            "82": 759681536.0,
-            "83": 759681536.0,
-            "84": 759681536.0,
-            "85": 759681536.0,
-            "86": 759681536.0,
-            "87": 759681536.0,
-            "88": 759681536.0,
-            "89": 759681536.0,
-            "90": 759681536.0,
-            "91": 759681536.0,
-            "92": 759681536.0,
-            "93": 759681536.0,
-            "94": 759681536.0,
-            "95": 759681536.0,
-            "96": 759681536.0,
-            "97": 759681536.0,
-            "98": 759681536.0,
-            "99": 759681536.0,
-            "100": 759681536.0
+            "1": 759682560.0,
+            "2": 759682560.0,
+            "3": 759682560.0,
+            "4": 759682560.0,
+            "5": 759682560.0,
+            "6": 759682560.0,
+            "7": 759682560.0,
+            "8": 759682560.0,
+            "9": 759682560.0,
+            "10": 759682560.0,
+            "11": 759682560.0,
+            "12": 759682560.0,
+            "13": 759682560.0,
+            "14": 759682560.0,
+            "15": 759682560.0,
+            "16": 759682560.0,
+            "17": 759682560.0,
+            "18": 759682560.0,
+            "19": 759682560.0,
+            "20": 759682560.0,
+            "21": 759682560.0,
+            "22": 759682560.0,
+            "23": 759682560.0,
+            "24": 759682560.0,
+            "25": 759682560.0,
+            "26": 759682560.0,
+            "27": 759682560.0,
+            "28": 759682560.0,
+            "29": 759682560.0,
+            "30": 759682560.0,
+            "31": 759682560.0,
+            "32": 759682560.0,
+            "33": 759682560.0,
+            "34": 759682560.0,
+            "35": 759682560.0,
+            "36": 759682560.0,
+            "37": 759682560.0,
+            "38": 759682560.0,
+            "39": 759682560.0,
+            "40": 759682560.0,
+            "41": 759682560.0,
+            "42": 759682560.0,
+            "43": 759682560.0,
+            "44": 759682560.0,
+            "45": 759682560.0,
+            "46": 759682560.0,
+            "47": 759682560.0,
+            "48": 759682560.0,
+            "49": 759682560.0,
+            "50": 759682560.0,
+            "51": 759682560.0,
+            "52": 759682560.0,
+            "53": 759682560.0,
+            "54": 759682560.0,
+            "55": 759682560.0,
+            "56": 759682560.0,
+            "57": 759682560.0,
+            "58": 759682560.0,
+            "59": 759682560.0,
+            "60": 759682560.0,
+            "61": 759682560.0,
+            "62": 759682560.0,
+            "63": 759682560.0,
+            "64": 759682560.0,
+            "65": 759682560.0,
+            "66": 759682560.0,
+            "67": 759682560.0,
+            "68": 759682560.0,
+            "69": 759682560.0,
+            "70": 759682560.0,
+            "71": 759682560.0,
+            "72": 759682560.0,
+            "73": 759682560.0,
+            "74": 759682560.0,
+            "75": 759682560.0,
+            "76": 759682560.0,
+            "77": 759682560.0,
+            "78": 759682560.0,
+            "79": 759682560.0,
+            "80": 759682560.0,
+            "81": 759682560.0,
+            "82": 759682560.0,
+            "83": 759682560.0,
+            "84": 759682560.0,
+            "85": 759682560.0,
+            "86": 759682560.0,
+            "87": 759682560.0,
+            "88": 759682560.0,
+            "89": 759682560.0,
+            "90": 759682560.0,
+            "91": 759682560.0,
+            "92": 759682560.0,
+            "93": 759682560.0,
+            "94": 759682560.0,
+            "95": 759682560.0,
+            "96": 759682560.0,
+            "97": 759682560.0,
+            "98": 759682560.0,
+            "99": 759682560.0,
+            "100": 759682560.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -325,106 +325,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 2358048768.0,
-            "2": 2639760384.0,
-            "3": 2639760384.0,
-            "4": 2639760384.0,
-            "5": 2639760384.0,
-            "6": 2639760384.0,
-            "7": 2639760384.0,
-            "8": 2639760384.0,
-            "9": 2639760384.0,
-            "10": 2639760384.0,
-            "11": 2639760384.0,
-            "12": 2639760384.0,
-            "13": 2639760384.0,
-            "14": 2639760384.0,
-            "15": 2639760384.0,
-            "16": 2639760384.0,
-            "17": 2639760384.0,
-            "18": 2639760384.0,
-            "19": 2639760384.0,
-            "20": 2639760384.0,
-            "21": 2639760384.0,
-            "22": 2639760384.0,
-            "23": 2639760384.0,
-            "24": 2639760384.0,
-            "25": 2639760384.0,
-            "26": 2639760384.0,
-            "27": 2639760384.0,
-            "28": 2639760384.0,
-            "29": 2639760384.0,
-            "30": 2639760384.0,
-            "31": 2639760384.0,
-            "32": 2639760384.0,
-            "33": 2639760384.0,
-            "34": 2639760384.0,
-            "35": 2639760384.0,
-            "36": 2639760384.0,
-            "37": 2639760384.0,
-            "38": 2639760384.0,
-            "39": 2639760384.0,
-            "40": 2639760384.0,
-            "41": 2639760384.0,
-            "42": 2639760384.0,
-            "43": 2639760384.0,
-            "44": 2639760384.0,
-            "45": 2639760384.0,
-            "46": 2639760384.0,
-            "47": 2639760384.0,
-            "48": 2639760384.0,
-            "49": 2639760384.0,
-            "50": 2639760384.0,
-            "51": 2639760384.0,
-            "52": 2639760384.0,
-            "53": 2639760384.0,
-            "54": 2639760384.0,
-            "55": 2639760384.0,
-            "56": 2639760384.0,
-            "57": 2639760384.0,
-            "58": 2639760384.0,
-            "59": 2639760384.0,
-            "60": 2639760384.0,
-            "61": 2639760384.0,
-            "62": 2639760384.0,
-            "63": 2639760384.0,
-            "64": 2639760384.0,
-            "65": 2639760384.0,
-            "66": 2639760384.0,
-            "67": 2639760384.0,
-            "68": 2639760384.0,
-            "69": 2639760384.0,
-            "70": 2639760384.0,
-            "71": 2639760384.0,
-            "72": 2639760384.0,
-            "73": 2639760384.0,
-            "74": 2639760384.0,
-            "75": 2639760384.0,
-            "76": 2639760384.0,
-            "77": 2639760384.0,
-            "78": 2639760384.0,
-            "79": 2639760384.0,
-            "80": 2639760384.0,
-            "81": 2639760384.0,
-            "82": 2639760384.0,
-            "83": 2639760384.0,
-            "84": 2639760384.0,
-            "85": 2639760384.0,
-            "86": 2639760384.0,
-            "87": 2639760384.0,
-            "88": 2639760384.0,
-            "89": 2639760384.0,
-            "90": 2639760384.0,
-            "91": 2639760384.0,
-            "92": 2639760384.0,
-            "93": 2639760384.0,
-            "94": 2639760384.0,
-            "95": 2639760384.0,
-            "96": 2639760384.0,
-            "97": 2639760384.0,
-            "98": 2639760384.0,
-            "99": 2639760384.0,
-            "100": 2639760384.0
+            "1": 2358049792.0,
+            "2": 2639761408.0,
+            "3": 2639761408.0,
+            "4": 2639761408.0,
+            "5": 2639761408.0,
+            "6": 2639761408.0,
+            "7": 2639761408.0,
+            "8": 2639761408.0,
+            "9": 2639761408.0,
+            "10": 2639761408.0,
+            "11": 2639761408.0,
+            "12": 2639761408.0,
+            "13": 2639761408.0,
+            "14": 2639761408.0,
+            "15": 2639761408.0,
+            "16": 2639761408.0,
+            "17": 2639761408.0,
+            "18": 2639761408.0,
+            "19": 2639761408.0,
+            "20": 2639761408.0,
+            "21": 2639761408.0,
+            "22": 2639761408.0,
+            "23": 2639761408.0,
+            "24": 2639761408.0,
+            "25": 2639761408.0,
+            "26": 2639761408.0,
+            "27": 2639761408.0,
+            "28": 2639761408.0,
+            "29": 2639761408.0,
+            "30": 2639761408.0,
+            "31": 2639761408.0,
+            "32": 2639761408.0,
+            "33": 2639761408.0,
+            "34": 2639761408.0,
+            "35": 2639761408.0,
+            "36": 2639761408.0,
+            "37": 2639761408.0,
+            "38": 2639761408.0,
+            "39": 2639761408.0,
+            "40": 2639761408.0,
+            "41": 2639761408.0,
+            "42": 2639761408.0,
+            "43": 2639761408.0,
+            "44": 2639761408.0,
+            "45": 2639761408.0,
+            "46": 2639761408.0,
+            "47": 2639761408.0,
+            "48": 2639761408.0,
+            "49": 2639761408.0,
+            "50": 2639761408.0,
+            "51": 2639761408.0,
+            "52": 2639761408.0,
+            "53": 2639761408.0,
+            "54": 2639761408.0,
+            "55": 2639761408.0,
+            "56": 2639761408.0,
+            "57": 2639761408.0,
+            "58": 2639761408.0,
+            "59": 2639761408.0,
+            "60": 2639761408.0,
+            "61": 2639761408.0,
+            "62": 2639761408.0,
+            "63": 2639761408.0,
+            "64": 2639761408.0,
+            "65": 2639761408.0,
+            "66": 2639761408.0,
+            "67": 2639761408.0,
+            "68": 2639761408.0,
+            "69": 2639761408.0,
+            "70": 2639761408.0,
+            "71": 2639761408.0,
+            "72": 2639761408.0,
+            "73": 2639761408.0,
+            "74": 2639761408.0,
+            "75": 2639761408.0,
+            "76": 2639761408.0,
+            "77": 2639761408.0,
+            "78": 2639761408.0,
+            "79": 2639761408.0,
+            "80": 2639761408.0,
+            "81": 2639761408.0,
+            "82": 2639761408.0,
+            "83": 2639761408.0,
+            "84": 2639761408.0,
+            "85": 2639761408.0,
+            "86": 2639761408.0,
+            "87": 2639761408.0,
+            "88": 2639761408.0,
+            "89": 2639761408.0,
+            "90": 2639761408.0,
+            "91": 2639761408.0,
+            "92": 2639761408.0,
+            "93": 2639761408.0,
+            "94": 2639761408.0,
+            "95": 2639761408.0,
+            "96": 2639761408.0,
+            "97": 2639761408.0,
+            "98": 2639761408.0,
+            "99": 2639761408.0,
+            "100": 2639761408.0
         }
     },
     "iteration-time": {
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 16.0335,
-            "2": 0.14377,
-            "3": 0.129,
-            "4": 0.12162,
-            "5": 0.11612,
-            "6": 0.11324,
-            "7": 0.11415,
-            "8": 0.11274,
-            "9": 0.11392,
-            "10": 0.11729,
-            "11": 0.11228,
-            "12": 0.11141,
-            "13": 0.11245,
-            "14": 0.11042,
-            "15": 0.11174,
-            "16": 0.1114,
-            "17": 0.11204,
-            "18": 0.11241,
-            "19": 0.11298,
-            "20": 0.11272,
-            "21": 0.11169,
-            "22": 0.11228,
-            "23": 0.11255,
-            "24": 0.11124,
-            "25": 0.11188,
-            "26": 0.11351,
-            "27": 0.11159,
-            "28": 0.11318,
-            "29": 0.11016,
-            "30": 0.11051,
-            "31": 0.11184,
-            "32": 0.11116,
-            "33": 0.1106,
-            "34": 0.11105,
-            "35": 0.113,
-            "36": 0.11198,
-            "37": 0.1117,
-            "38": 0.11109,
-            "39": 0.1099,
-            "40": 0.11097,
-            "41": 0.11159,
-            "42": 0.11191,
-            "43": 0.11283,
-            "44": 0.11266,
-            "45": 0.111,
-            "46": 0.11347,
-            "47": 0.1099,
-            "48": 0.10973,
-            "49": 0.11225,
-            "50": 0.11231,
-            "51": 0.1122,
-            "52": 0.10985,
-            "53": 0.11147,
-            "54": 0.11064,
-            "55": 0.11101,
-            "56": 0.11356,
-            "57": 0.11368,
-            "58": 0.11185,
-            "59": 0.11193,
-            "60": 0.11205,
-            "61": 0.11176,
-            "62": 0.11293,
-            "63": 0.1127,
-            "64": 0.11343,
-            "65": 0.11282,
-            "66": 0.11245,
-            "67": 0.11385,
-            "68": 0.11071,
-            "69": 0.11079,
-            "70": 0.112,
-            "71": 0.1108,
-            "72": 0.11299,
-            "73": 0.11305,
-            "74": 0.11343,
-            "75": 0.11155,
-            "76": 0.11323,
-            "77": 0.11174,
-            "78": 0.11138,
-            "79": 0.11246,
-            "80": 0.11252,
-            "81": 0.11217,
-            "82": 0.11269,
-            "83": 0.11312,
-            "84": 0.11075,
-            "85": 0.11227,
-            "86": 0.11159,
-            "87": 0.11227,
-            "88": 0.11227,
-            "89": 0.11277,
-            "90": 0.11219,
-            "91": 0.11067,
-            "92": 0.10961,
-            "93": 0.10907,
-            "94": 0.11584,
-            "95": 0.1087,
-            "96": 0.11107,
-            "97": 0.11046,
-            "98": 0.10986,
-            "99": 0.11249,
-            "100": 0.1095
+            "1": 9.86816,
+            "2": 0.1216,
+            "3": 0.1166,
+            "4": 0.08589,
+            "5": 0.08587,
+            "6": 0.08491,
+            "7": 0.0844,
+            "8": 0.08084,
+            "9": 0.07931,
+            "10": 0.0798,
+            "11": 0.07849,
+            "12": 0.07832,
+            "13": 0.0803,
+            "14": 0.08035,
+            "15": 0.07881,
+            "16": 0.07881,
+            "17": 0.08069,
+            "18": 0.0794,
+            "19": 0.07935,
+            "20": 0.07915,
+            "21": 0.07896,
+            "22": 0.08062,
+            "23": 0.08009,
+            "24": 0.07923,
+            "25": 0.07839,
+            "26": 0.08166,
+            "27": 0.07977,
+            "28": 0.08005,
+            "29": 0.08017,
+            "30": 0.08118,
+            "31": 0.0811,
+            "32": 0.07964,
+            "33": 0.08086,
+            "34": 0.08069,
+            "35": 0.07986,
+            "36": 0.08098,
+            "37": 0.07939,
+            "38": 0.07947,
+            "39": 0.07943,
+            "40": 0.08028,
+            "41": 0.07981,
+            "42": 0.08016,
+            "43": 0.08245,
+            "44": 0.0799,
+            "45": 0.08077,
+            "46": 0.08028,
+            "47": 0.07892,
+            "48": 0.07997,
+            "49": 0.08314,
+            "50": 0.08027,
+            "51": 0.08246,
+            "52": 0.07991,
+            "53": 0.08005,
+            "54": 0.07954,
+            "55": 0.07969,
+            "56": 0.07938,
+            "57": 0.07891,
+            "58": 0.07987,
+            "59": 0.0798,
+            "60": 0.08057,
+            "61": 0.07888,
+            "62": 0.07914,
+            "63": 0.07997,
+            "64": 0.07986,
+            "65": 0.07977,
+            "66": 0.07953,
+            "67": 0.07927,
+            "68": 0.08003,
+            "69": 0.08005,
+            "70": 0.07926,
+            "71": 0.07923,
+            "72": 0.07966,
+            "73": 0.08033,
+            "74": 0.08038,
+            "75": 0.07956,
+            "76": 0.07935,
+            "77": 0.07891,
+            "78": 0.08007,
+            "79": 0.08135,
+            "80": 0.08025,
+            "81": 0.07919,
+            "82": 0.07932,
+            "83": 0.07953,
+            "84": 0.07937,
+            "85": 0.0797,
+            "86": 0.08168,
+            "87": 0.08023,
+            "88": 0.07957,
+            "89": 0.08011,
+            "90": 0.07975,
+            "91": 0.08043,
+            "92": 0.08179,
+            "93": 0.08049,
+            "94": 0.07951,
+            "95": 0.08026,
+            "96": 0.08,
+            "97": 0.07948,
+            "98": 0.0805,
+            "99": 0.07879,
+            "100": 0.07954
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_lts_dgx_a100.json
index 6081b627567..a620f25b6eb 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_lts_dgx_a100.json
@@ -14,96 +14,96 @@
             "8": 10.83427,
             "9": 10.83995,
             "10": 10.78684,
-            "11": 10.88021,
-            "12": 10.85971,
-            "13": 10.86589,
-            "14": 10.87818,
-            "15": 10.79463,
-            "16": 10.79607,
-            "17": 10.7688,
-            "18": 10.81045,
-            "19": 10.79836,
-            "20": 10.69045,
-            "21": 10.67932,
-            "22": 10.52101,
-            "23": 10.70743,
-            "24": 10.57665,
-            "25": 10.52275,
-            "26": 10.595,
-            "27": 10.5855,
-            "28": 10.56131,
+            "11": 10.88024,
+            "12": 10.85967,
+            "13": 10.86586,
+            "14": 10.87816,
+            "15": 10.79461,
+            "16": 10.79608,
+            "17": 10.76878,
+            "18": 10.81048,
+            "19": 10.79832,
+            "20": 10.69042,
+            "21": 10.67929,
+            "22": 10.52098,
+            "23": 10.70741,
+            "24": 10.57667,
+            "25": 10.52277,
+            "26": 10.59496,
+            "27": 10.58548,
+            "28": 10.56129,
             "29": 10.56894,
-            "30": 10.34527,
-            "31": 10.10019,
-            "32": 10.45229,
+            "30": 10.3453,
+            "31": 10.1002,
+            "32": 10.45227,
             "33": 10.44356,
-            "34": 10.20397,
-            "35": 10.25844,
+            "34": 10.20401,
+            "35": 10.25843,
             "36": 10.2103,
-            "37": 10.32252,
-            "38": 10.1661,
-            "39": 10.38156,
-            "40": 10.07025,
-            "41": 10.13542,
-            "42": 10.19416,
-            "43": 9.80626,
-            "44": 9.92627,
-            "45": 9.8024,
-            "46": 9.79983,
-            "47": 10.11662,
-            "48": 9.81307,
-            "49": 9.50044,
-            "50": 9.87631,
-            "51": 9.82781,
-            "52": 9.71723,
-            "53": 10.03979,
-            "54": 9.92177,
-            "55": 9.85515,
-            "56": 9.59253,
-            "57": 9.44144,
-            "58": 9.79602,
-            "59": 9.55567,
-            "60": 9.4664,
+            "37": 10.32249,
+            "38": 10.16611,
+            "39": 10.38155,
+            "40": 10.07026,
+            "41": 10.13534,
+            "42": 10.19417,
+            "43": 9.80625,
+            "44": 9.92626,
+            "45": 9.80241,
+            "46": 9.79982,
+            "47": 10.11664,
+            "48": 9.81302,
+            "49": 9.50045,
+            "50": 9.87633,
+            "51": 9.82782,
+            "52": 9.71728,
+            "53": 10.03983,
+            "54": 9.92178,
+            "55": 9.85516,
+            "56": 9.59252,
+            "57": 9.44146,
+            "58": 9.79606,
+            "59": 9.55569,
+            "60": 9.46635,
             "61": 9.6666,
-            "62": 9.95363,
-            "63": 9.33626,
-            "64": 9.74152,
-            "65": 8.9178,
-            "66": 9.66632,
+            "62": 9.95362,
+            "63": 9.33627,
+            "64": 9.7415,
+            "65": 8.91782,
+            "66": 9.66633,
             "67": 9.34424,
             "68": 9.75273,
-            "69": 9.75727,
+            "69": 9.75725,
             "70": 9.69242,
-            "71": 9.5868,
-            "72": 9.55099,
+            "71": 9.58679,
+            "72": 9.551,
             "73": 9.46289,
-            "74": 8.90671,
-            "75": 9.37793,
-            "76": 9.04952,
-            "77": 10.0301,
-            "78": 9.69192,
-            "79": 9.33464,
-            "80": 9.3667,
-            "81": 9.44418,
-            "82": 9.66164,
-            "83": 9.27209,
-            "84": 9.38066,
+            "74": 8.90674,
+            "75": 9.37794,
+            "76": 9.04951,
+            "77": 10.03011,
+            "78": 9.69189,
+            "79": 9.33463,
+            "80": 9.36672,
+            "81": 9.44419,
+            "82": 9.66162,
+            "83": 9.2721,
+            "84": 9.38062,
             "85": 9.57618,
-            "86": 9.0424,
-            "87": 9.55703,
+            "86": 9.04242,
+            "87": 9.557,
             "88": 9.70385,
-            "89": 9.56619,
-            "90": 9.77295,
-            "91": 9.29396,
-            "92": 9.31912,
-            "93": 9.03406,
+            "89": 9.56616,
+            "90": 9.77294,
+            "91": 9.29399,
+            "92": 9.31911,
+            "93": 9.03403,
             "94": 8.78526,
-            "95": 9.46938,
-            "96": 9.47497,
-            "97": 9.25688,
-            "98": 9.61835,
-            "99": 8.83233,
-            "100": 9.34557
+            "95": 9.46939,
+            "96": 9.47496,
+            "97": 9.25683,
+            "98": 9.61833,
+            "99": 8.8323,
+            "100": 9.34562
         }
     },
     "num-zeros": {
@@ -119,98 +119,98 @@
             "6": 1823.0,
             "7": 1719.0,
             "8": 1637.0,
-            "9": 1742.0,
-            "10": 1358.0,
-            "11": 1882.0,
-            "12": 1781.0,
-            "13": 1847.0,
-            "14": 1753.0,
-            "15": 1883.0,
-            "16": 1755.0,
-            "17": 1752.0,
-            "18": 1683.0,
-            "19": 1817.0,
-            "20": 1663.0,
-            "21": 1795.0,
-            "22": 1698.0,
-            "23": 1996.0,
-            "24": 1620.0,
-            "25": 1658.0,
-            "26": 1727.0,
-            "27": 1781.0,
-            "28": 2085.0,
-            "29": 1952.0,
-            "30": 1821.0,
-            "31": 1646.0,
-            "32": 1879.0,
-            "33": 2034.0,
-            "34": 1861.0,
-            "35": 1834.0,
-            "36": 1913.0,
-            "37": 2333.0,
-            "38": 2070.0,
-            "39": 2245.0,
-            "40": 2126.0,
-            "41": 2311.0,
-            "42": 2213.0,
-            "43": 1907.0,
-            "44": 1951.0,
-            "45": 2001.0,
-            "46": 2218.0,
-            "47": 2533.0,
-            "48": 2436.0,
-            "49": 2188.0,
-            "50": 2342.0,
-            "51": 2562.0,
-            "52": 2529.0,
-            "53": 3031.0,
-            "54": 2744.0,
-            "55": 2264.0,
-            "56": 2794.0,
-            "57": 2183.0,
-            "58": 2882.0,
-            "59": 2769.0,
-            "60": 2399.0,
-            "61": 3031.0,
-            "62": 2706.0,
-            "63": 2388.0,
-            "64": 3046.0,
-            "65": 2597.0,
-            "66": 3092.0,
-            "67": 2730.0,
-            "68": 2858.0,
-            "69": 2982.0,
-            "70": 3202.0,
-            "71": 2964.0,
-            "72": 2450.0,
-            "73": 2817.0,
-            "74": 1834.0,
-            "75": 2609.0,
-            "76": 3000.0,
-            "77": 3180.0,
-            "78": 3113.0,
-            "79": 3145.0,
-            "80": 3258.0,
-            "81": 3645.0,
-            "82": 3075.0,
-            "83": 2812.0,
-            "84": 3295.0,
-            "85": 3368.0,
-            "86": 2730.0,
-            "87": 3717.0,
-            "88": 3056.0,
-            "89": 3252.0,
-            "90": 2954.0,
-            "91": 2798.0,
-            "92": 3089.0,
-            "93": 2742.0,
-            "94": 3420.0,
-            "95": 3225.0,
-            "96": 3362.0,
-            "97": 3118.0,
-            "98": 3671.0,
-            "99": 3341.0,
-            "100": 3428.0
+            "9": 1776.0,
+            "10": 1356.0,
+            "11": 1851.0,
+            "12": 1753.0,
+            "13": 1865.0,
+            "14": 1686.0,
+            "15": 1859.0,
+            "16": 1834.0,
+            "17": 1776.0,
+            "18": 1609.0,
+            "19": 1771.0,
+            "20": 1624.0,
+            "21": 1885.0,
+            "22": 1740.0,
+            "23": 1950.0,
+            "24": 1707.0,
+            "25": 1746.0,
+            "26": 1809.0,
+            "27": 1822.0,
+            "28": 2039.0,
+            "29": 1989.0,
+            "30": 1888.0,
+            "31": 1607.0,
+            "32": 1891.0,
+            "33": 2102.0,
+            "34": 1900.0,
+            "35": 1939.0,
+            "36": 1937.0,
+            "37": 2319.0,
+            "38": 2215.0,
+            "39": 2289.0,
+            "40": 2081.0,
+            "41": 2341.0,
+            "42": 2227.0,
+            "43": 1889.0,
+            "44": 2002.0,
+            "45": 1989.0,
+            "46": 2300.0,
+            "47": 2473.0,
+            "48": 2407.0,
+            "49": 2291.0,
+            "50": 2423.0,
+            "51": 2489.0,
+            "52": 2624.0,
+            "53": 2894.0,
+            "54": 2672.0,
+            "55": 2317.0,
+            "56": 2736.0,
+            "57": 2197.0,
+            "58": 2903.0,
+            "59": 2833.0,
+            "60": 2448.0,
+            "61": 2942.0,
+            "62": 2603.0,
+            "63": 2412.0,
+            "64": 2913.0,
+            "65": 2665.0,
+            "66": 3011.0,
+            "67": 2573.0,
+            "68": 2848.0,
+            "69": 2990.0,
+            "70": 3095.0,
+            "71": 2974.0,
+            "72": 2383.0,
+            "73": 2769.0,
+            "74": 1867.0,
+            "75": 2542.0,
+            "76": 2962.0,
+            "77": 3172.0,
+            "78": 3190.0,
+            "79": 3132.0,
+            "80": 3350.0,
+            "81": 3621.0,
+            "82": 3145.0,
+            "83": 2739.0,
+            "84": 3366.0,
+            "85": 3493.0,
+            "86": 2693.0,
+            "87": 3840.0,
+            "88": 2919.0,
+            "89": 3191.0,
+            "90": 3013.0,
+            "91": 2796.0,
+            "92": 3092.0,
+            "93": 2693.0,
+            "94": 3339.0,
+            "95": 3297.0,
+            "96": 3553.0,
+            "97": 3085.0,
+            "98": 3564.0,
+            "99": 3313.0,
+            "100": 3482.0
         }
     },
     "mem-allocated-bytes": {
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 12.81482,
-            "2": 0.16445,
-            "3": 0.16681,
-            "4": 0.12923,
-            "5": 0.12855,
-            "6": 0.12774,
-            "7": 0.12794,
-            "8": 0.12857,
-            "9": 0.12785,
-            "10": 0.12889,
-            "11": 0.13344,
-            "12": 0.1302,
-            "13": 0.13007,
-            "14": 0.12962,
-            "15": 0.13044,
-            "16": 0.12918,
-            "17": 0.13075,
-            "18": 0.13004,
-            "19": 0.13052,
-            "20": 0.13025,
-            "21": 0.12825,
-            "22": 0.13322,
-            "23": 0.13274,
-            "24": 0.13114,
-            "25": 0.13075,
-            "26": 0.12979,
-            "27": 0.13026,
-            "28": 0.13147,
-            "29": 0.13072,
-            "30": 0.13098,
-            "31": 0.13095,
-            "32": 0.13054,
-            "33": 0.13038,
-            "34": 0.13142,
-            "35": 0.13065,
-            "36": 0.12923,
-            "37": 0.13039,
-            "38": 0.12981,
-            "39": 0.12995,
-            "40": 0.13035,
-            "41": 0.12966,
-            "42": 0.13013,
-            "43": 0.13031,
-            "44": 0.13066,
-            "45": 0.12952,
-            "46": 0.13059,
-            "47": 0.12932,
-            "48": 0.13133,
-            "49": 0.13099,
+            "1": 4.6439,
+            "2": 0.15791,
+            "3": 0.1504,
+            "4": 0.13422,
+            "5": 0.1326,
+            "6": 0.13299,
+            "7": 0.13449,
+            "8": 0.12991,
+            "9": 0.12948,
+            "10": 0.13174,
+            "11": 0.13098,
+            "12": 0.13037,
+            "13": 0.13071,
+            "14": 0.13091,
+            "15": 0.1311,
+            "16": 0.13106,
+            "17": 0.13049,
+            "18": 0.13044,
+            "19": 0.13091,
+            "20": 0.13092,
+            "21": 0.13077,
+            "22": 0.13178,
+            "23": 0.13149,
+            "24": 0.13147,
+            "25": 0.13094,
+            "26": 0.13089,
+            "27": 0.13076,
+            "28": 0.13077,
+            "29": 0.13143,
+            "30": 0.13073,
+            "31": 0.13091,
+            "32": 0.13106,
+            "33": 0.13097,
+            "34": 0.13044,
+            "35": 0.13123,
+            "36": 0.13087,
+            "37": 0.13144,
+            "38": 0.13066,
+            "39": 0.13081,
+            "40": 0.13065,
+            "41": 0.13133,
+            "42": 0.13115,
+            "43": 0.13136,
+            "44": 0.13079,
+            "45": 0.13085,
+            "46": 0.13162,
+            "47": 0.131,
+            "48": 0.13067,
+            "49": 0.13121,
             "50": 0.13032,
-            "51": 0.13345,
-            "52": 0.13027,
-            "53": 0.13035,
-            "54": 0.13064,
-            "55": 0.13026,
-            "56": 0.13053,
-            "57": 0.13106,
-            "58": 0.13032,
-            "59": 0.13178,
-            "60": 0.13233,
-            "61": 0.13005,
-            "62": 0.13045,
-            "63": 0.13061,
-            "64": 0.13066,
-            "65": 0.13102,
-            "66": 0.13143,
-            "67": 0.13033,
-            "68": 0.13066,
-            "69": 0.12904,
-            "70": 0.13059,
-            "71": 0.13052,
-            "72": 0.13076,
-            "73": 0.13215,
-            "74": 0.13173,
-            "75": 0.13126,
-            "76": 0.12946,
-            "77": 0.13071,
-            "78": 0.12973,
-            "79": 0.12962,
-            "80": 0.12976,
-            "81": 0.12993,
-            "82": 0.12829,
-            "83": 0.13132,
-            "84": 0.1304,
-            "85": 0.13095,
-            "86": 0.13112,
-            "87": 0.12994,
-            "88": 0.13287,
-            "89": 0.1284,
-            "90": 0.1303,
-            "91": 0.12966,
-            "92": 0.13139,
-            "93": 0.12932,
-            "94": 0.12687,
-            "95": 0.13012,
-            "96": 0.12919,
-            "97": 0.13166,
-            "98": 0.12958,
-            "99": 0.13126,
-            "100": 0.1303
+            "51": 0.13326,
+            "52": 0.13146,
+            "53": 0.1304,
+            "54": 0.13069,
+            "55": 0.13128,
+            "56": 0.13061,
+            "57": 0.13062,
+            "58": 0.13056,
+            "59": 0.13062,
+            "60": 0.13016,
+            "61": 0.13079,
+            "62": 0.13079,
+            "63": 0.13044,
+            "64": 0.13074,
+            "65": 0.13159,
+            "66": 0.13108,
+            "67": 0.13125,
+            "68": 0.13103,
+            "69": 0.1306,
+            "70": 0.13075,
+            "71": 0.13114,
+            "72": 0.13089,
+            "73": 0.13109,
+            "74": 0.13187,
+            "75": 0.13679,
+            "76": 0.13183,
+            "77": 0.13183,
+            "78": 0.1322,
+            "79": 0.13235,
+            "80": 0.13227,
+            "81": 0.13232,
+            "82": 0.13263,
+            "83": 0.13214,
+            "84": 0.13146,
+            "85": 0.13162,
+            "86": 0.13188,
+            "87": 0.13144,
+            "88": 0.13202,
+            "89": 0.1326,
+            "90": 0.1313,
+            "91": 0.13207,
+            "92": 0.13186,
+            "93": 0.13226,
+            "94": 0.13226,
+            "95": 0.13194,
+            "96": 0.13248,
+            "97": 0.13228,
+            "98": 0.13188,
+            "99": 0.13261,
+            "100": 0.13281
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_dev_dgx_gb200.json
new file mode 100644
index 00000000000..56bb24659d2
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_dev_dgx_gb200.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.82555,
+            "2": 10.83286,
+            "3": 10.82763,
+            "4": 10.79573,
+            "5": 10.85699,
+            "6": 10.8639,
+            "7": 10.82612,
+            "8": 10.82543,
+            "9": 10.8359,
+            "10": 10.79633,
+            "11": 10.87819,
+            "12": 10.85823,
+            "13": 10.85425,
+            "14": 10.87526,
+            "15": 10.79206,
+            "16": 10.80309,
+            "17": 10.77438,
+            "18": 10.80484,
+            "19": 10.79368,
+            "20": 10.69574,
+            "21": 10.68657,
+            "22": 10.53162,
+            "23": 10.70642,
+            "24": 10.57336,
+            "25": 10.51534,
+            "26": 10.59088,
+            "27": 10.60779,
+            "28": 10.57051,
+            "29": 10.58978,
+            "30": 10.34722,
+            "31": 10.07772,
+            "32": 10.46349,
+            "33": 10.45726,
+            "34": 10.19975,
+            "35": 10.25642,
+            "36": 10.21264,
+            "37": 10.34717,
+            "38": 10.18011,
+            "39": 10.40833,
+            "40": 10.07628,
+            "41": 10.1297,
+            "42": 10.21174,
+            "43": 9.8171,
+            "44": 9.94032,
+            "45": 9.81748,
+            "46": 9.8063,
+            "47": 10.12475,
+            "48": 9.84049,
+            "49": 9.51015,
+            "50": 9.88941,
+            "51": 9.8426,
+            "52": 9.72578,
+            "53": 10.05977,
+            "54": 9.95226,
+            "55": 9.88321,
+            "56": 9.61276,
+            "57": 9.46222,
+            "58": 9.82313,
+            "59": 9.57665,
+            "60": 9.48518,
+            "61": 9.6788,
+            "62": 9.97777,
+            "63": 9.36212,
+            "64": 9.75714,
+            "65": 8.93499,
+            "66": 9.69281,
+            "67": 9.36709,
+            "68": 9.78179,
+            "69": 9.79451,
+            "70": 9.72295,
+            "71": 9.62027,
+            "72": 9.56974,
+            "73": 9.481,
+            "74": 8.91241,
+            "75": 9.40906,
+            "76": 9.06623,
+            "77": 10.05808,
+            "78": 9.72188,
+            "79": 9.36927,
+            "80": 9.40027,
+            "81": 9.47702,
+            "82": 9.69788,
+            "83": 9.30742,
+            "84": 9.41496,
+            "85": 9.61115,
+            "86": 9.07104,
+            "87": 9.59609,
+            "88": 9.74908,
+            "89": 9.5961,
+            "90": 9.82722,
+            "91": 9.3366,
+            "92": 9.3558,
+            "93": 9.08695,
+            "94": 8.82752,
+            "95": 9.53066,
+            "96": 9.52759,
+            "97": 9.30671,
+            "98": 9.66909,
+            "99": 8.89637,
+            "100": 9.4052
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1651.0,
+            "2": 1716.0,
+            "3": 1760.0,
+            "4": 1771.0,
+            "5": 1899.0,
+            "6": 1905.0,
+            "7": 1842.0,
+            "8": 1706.0,
+            "9": 1891.0,
+            "10": 1543.0,
+            "11": 1937.0,
+            "12": 1794.0,
+            "13": 1982.0,
+            "14": 1727.0,
+            "15": 1890.0,
+            "16": 1746.0,
+            "17": 1818.0,
+            "18": 1651.0,
+            "19": 1782.0,
+            "20": 1698.0,
+            "21": 1950.0,
+            "22": 1702.0,
+            "23": 1972.0,
+            "24": 1551.0,
+            "25": 1587.0,
+            "26": 1773.0,
+            "27": 1791.0,
+            "28": 1858.0,
+            "29": 1950.0,
+            "30": 1951.0,
+            "31": 1499.0,
+            "32": 1823.0,
+            "33": 2055.0,
+            "34": 1788.0,
+            "35": 1877.0,
+            "36": 1933.0,
+            "37": 2302.0,
+            "38": 2181.0,
+            "39": 2223.0,
+            "40": 2009.0,
+            "41": 2178.0,
+            "42": 2185.0,
+            "43": 2041.0,
+            "44": 2069.0,
+            "45": 2004.0,
+            "46": 2212.0,
+            "47": 2446.0,
+            "48": 2290.0,
+            "49": 2183.0,
+            "50": 2323.0,
+            "51": 2587.0,
+            "52": 2574.0,
+            "53": 2831.0,
+            "54": 2602.0,
+            "55": 2403.0,
+            "56": 2822.0,
+            "57": 2223.0,
+            "58": 2954.0,
+            "59": 2871.0,
+            "60": 2518.0,
+            "61": 2922.0,
+            "62": 2677.0,
+            "63": 2533.0,
+            "64": 3023.0,
+            "65": 2609.0,
+            "66": 2960.0,
+            "67": 2867.0,
+            "68": 2652.0,
+            "69": 3053.0,
+            "70": 3011.0,
+            "71": 2870.0,
+            "72": 2460.0,
+            "73": 3114.0,
+            "74": 2017.0,
+            "75": 2527.0,
+            "76": 2954.0,
+            "77": 2955.0,
+            "78": 3055.0,
+            "79": 3098.0,
+            "80": 3047.0,
+            "81": 3362.0,
+            "82": 3296.0,
+            "83": 2825.0,
+            "84": 3113.0,
+            "85": 3196.0,
+            "86": 2666.0,
+            "87": 3583.0,
+            "88": 2985.0,
+            "89": 3259.0,
+            "90": 3220.0,
+            "91": 2781.0,
+            "92": 3090.0,
+            "93": 2686.0,
+            "94": 3474.0,
+            "95": 3147.0,
+            "96": 3418.0,
+            "97": 3036.0,
+            "98": 3411.0,
+            "99": 3152.0,
+            "100": 3098.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 763221504.0,
+            "2": 763221504.0,
+            "3": 763221504.0,
+            "4": 763221504.0,
+            "5": 763221504.0,
+            "6": 763221504.0,
+            "7": 763221504.0,
+            "8": 763221504.0,
+            "9": 763221504.0,
+            "10": 763221504.0,
+            "11": 763221504.0,
+            "12": 763221504.0,
+            "13": 763221504.0,
+            "14": 763221504.0,
+            "15": 763221504.0,
+            "16": 763221504.0,
+            "17": 763221504.0,
+            "18": 763221504.0,
+            "19": 763221504.0,
+            "20": 763221504.0,
+            "21": 763221504.0,
+            "22": 763221504.0,
+            "23": 763221504.0,
+            "24": 763221504.0,
+            "25": 763221504.0,
+            "26": 763221504.0,
+            "27": 763221504.0,
+            "28": 763221504.0,
+            "29": 763221504.0,
+            "30": 763221504.0,
+            "31": 763221504.0,
+            "32": 763221504.0,
+            "33": 763221504.0,
+            "34": 763221504.0,
+            "35": 763221504.0,
+            "36": 763221504.0,
+            "37": 763221504.0,
+            "38": 763221504.0,
+            "39": 763221504.0,
+            "40": 763221504.0,
+            "41": 763221504.0,
+            "42": 763221504.0,
+            "43": 763221504.0,
+            "44": 763221504.0,
+            "45": 763221504.0,
+            "46": 763221504.0,
+            "47": 763221504.0,
+            "48": 763221504.0,
+            "49": 763221504.0,
+            "50": 763221504.0,
+            "51": 763221504.0,
+            "52": 763221504.0,
+            "53": 763221504.0,
+            "54": 763221504.0,
+            "55": 763221504.0,
+            "56": 763221504.0,
+            "57": 763221504.0,
+            "58": 763221504.0,
+            "59": 763221504.0,
+            "60": 763221504.0,
+            "61": 763221504.0,
+            "62": 763221504.0,
+            "63": 763221504.0,
+            "64": 763221504.0,
+            "65": 763221504.0,
+            "66": 763221504.0,
+            "67": 763221504.0,
+            "68": 763221504.0,
+            "69": 763221504.0,
+            "70": 763221504.0,
+            "71": 763221504.0,
+            "72": 763221504.0,
+            "73": 763221504.0,
+            "74": 763221504.0,
+            "75": 763221504.0,
+            "76": 763221504.0,
+            "77": 763221504.0,
+            "78": 763221504.0,
+            "79": 763221504.0,
+            "80": 763221504.0,
+            "81": 763221504.0,
+            "82": 763221504.0,
+            "83": 763221504.0,
+            "84": 763221504.0,
+            "85": 763221504.0,
+            "86": 763221504.0,
+            "87": 763221504.0,
+            "88": 763221504.0,
+            "89": 763221504.0,
+            "90": 763221504.0,
+            "91": 763221504.0,
+            "92": 763221504.0,
+            "93": 763221504.0,
+            "94": 763221504.0,
+            "95": 763221504.0,
+            "96": 763221504.0,
+            "97": 763221504.0,
+            "98": 763221504.0,
+            "99": 763221504.0,
+            "100": 763221504.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2398288896.0,
+            "2": 2681049088.0,
+            "3": 2681049088.0,
+            "4": 2681049088.0,
+            "5": 2681049088.0,
+            "6": 2681049088.0,
+            "7": 2681049088.0,
+            "8": 2681049088.0,
+            "9": 2681049088.0,
+            "10": 2681049088.0,
+            "11": 2681049088.0,
+            "12": 2681049088.0,
+            "13": 2681049088.0,
+            "14": 2681049088.0,
+            "15": 2681049088.0,
+            "16": 2681049088.0,
+            "17": 2681049088.0,
+            "18": 2681049088.0,
+            "19": 2681049088.0,
+            "20": 2681049088.0,
+            "21": 2681049088.0,
+            "22": 2681049088.0,
+            "23": 2681049088.0,
+            "24": 2681049088.0,
+            "25": 2681049088.0,
+            "26": 2681049088.0,
+            "27": 2681049088.0,
+            "28": 2681049088.0,
+            "29": 2681049088.0,
+            "30": 2681049088.0,
+            "31": 2681049088.0,
+            "32": 2681049088.0,
+            "33": 2681049088.0,
+            "34": 2681049088.0,
+            "35": 2681049088.0,
+            "36": 2681049088.0,
+            "37": 2681049088.0,
+            "38": 2681049088.0,
+            "39": 2681049088.0,
+            "40": 2681049088.0,
+            "41": 2681049088.0,
+            "42": 2681049088.0,
+            "43": 2681049088.0,
+            "44": 2681049088.0,
+            "45": 2681049088.0,
+            "46": 2681049088.0,
+            "47": 2681049088.0,
+            "48": 2681049088.0,
+            "49": 2681049088.0,
+            "50": 2681049088.0,
+            "51": 2681049088.0,
+            "52": 2681049088.0,
+            "53": 2681049088.0,
+            "54": 2681049088.0,
+            "55": 2681049088.0,
+            "56": 2681049088.0,
+            "57": 2681049088.0,
+            "58": 2681049088.0,
+            "59": 2681049088.0,
+            "60": 2681049088.0,
+            "61": 2681049088.0,
+            "62": 2681049088.0,
+            "63": 2681049088.0,
+            "64": 2681049088.0,
+            "65": 2681049088.0,
+            "66": 2681049088.0,
+            "67": 2681049088.0,
+            "68": 2681049088.0,
+            "69": 2681049088.0,
+            "70": 2681049088.0,
+            "71": 2681049088.0,
+            "72": 2681049088.0,
+            "73": 2681049088.0,
+            "74": 2681049088.0,
+            "75": 2681049088.0,
+            "76": 2681049088.0,
+            "77": 2681049088.0,
+            "78": 2681049088.0,
+            "79": 2681049088.0,
+            "80": 2681049088.0,
+            "81": 2681049088.0,
+            "82": 2681049088.0,
+            "83": 2681049088.0,
+            "84": 2681049088.0,
+            "85": 2681049088.0,
+            "86": 2681049088.0,
+            "87": 2681049088.0,
+            "88": 2681049088.0,
+            "89": 2681049088.0,
+            "90": 2681049088.0,
+            "91": 2681049088.0,
+            "92": 2681049088.0,
+            "93": 2681049088.0,
+            "94": 2681049088.0,
+            "95": 2681049088.0,
+            "96": 2681049088.0,
+            "97": 2681049088.0,
+            "98": 2681049088.0,
+            "99": 2681049088.0,
+            "100": 2681049088.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 9.98291,
+            "2": 0.12743,
+            "3": 0.38059,
+            "4": 0.12841,
+            "5": 0.14511,
+            "6": 0.10173,
+            "7": 0.26971,
+            "8": 0.10382,
+            "9": 0.3953,
+            "10": 0.1058,
+            "11": 0.10231,
+            "12": 0.509,
+            "13": 0.10327,
+            "14": 0.23202,
+            "15": 0.12684,
+            "16": 0.10412,
+            "17": 0.2441,
+            "18": 0.10687,
+            "19": 0.25963,
+            "20": 0.10498,
+            "21": 0.25469,
+            "22": 0.10453,
+            "23": 0.10222,
+            "24": 0.25281,
+            "25": 0.1172,
+            "26": 0.10348,
+            "27": 0.29437,
+            "28": 0.10187,
+            "29": 0.28228,
+            "30": 0.1021,
+            "31": 0.23112,
+            "32": 0.10338,
+            "33": 0.24896,
+            "34": 0.10339,
+            "35": 0.24587,
+            "36": 0.22187,
+            "37": 0.10494,
+            "38": 0.10356,
+            "39": 0.10387,
+            "40": 0.1047,
+            "41": 0.10726,
+            "42": 0.10304,
+            "43": 0.22521,
+            "44": 0.12908,
+            "45": 0.21396,
+            "46": 0.32037,
+            "47": 0.10321,
+            "48": 0.10612,
+            "49": 0.46303,
+            "50": 0.10477,
+            "51": 0.11648,
+            "52": 0.10312,
+            "53": 0.10274,
+            "54": 0.10625,
+            "55": 0.10219,
+            "56": 0.24603,
+            "57": 0.10299,
+            "58": 0.10437,
+            "59": 0.10386,
+            "60": 0.10294,
+            "61": 0.26442,
+            "62": 0.10245,
+            "63": 0.17569,
+            "64": 0.10337,
+            "65": 0.23811,
+            "66": 0.10233,
+            "67": 0.23691,
+            "68": 0.21983,
+            "69": 0.19586,
+            "70": 0.10467,
+            "71": 0.10454,
+            "72": 0.1059,
+            "73": 0.10652,
+            "74": 0.14966,
+            "75": 0.10278,
+            "76": 0.39764,
+            "77": 0.10176,
+            "78": 0.23756,
+            "79": 0.10342,
+            "80": 0.24469,
+            "81": 0.10295,
+            "82": 0.26649,
+            "83": 0.105,
+            "84": 0.47883,
+            "85": 0.10596,
+            "86": 0.10525,
+            "87": 0.22714,
+            "88": 0.10536,
+            "89": 0.10595,
+            "90": 0.22588,
+            "91": 0.10237,
+            "92": 0.2621,
+            "93": 0.10543,
+            "94": 0.21938,
+            "95": 0.10276,
+            "96": 0.17373,
+            "97": 0.10501,
+            "98": 0.22197,
+            "99": 0.10635,
+            "100": 0.1032
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_dev_dgx_h100.json
index f0d9be9be9d..126681fbe76 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_dev_dgx_h100.json
@@ -218,106 +218,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 763220480.0,
-            "2": 763220480.0,
-            "3": 763220480.0,
-            "4": 763220480.0,
-            "5": 763220480.0,
-            "6": 763220480.0,
-            "7": 763220480.0,
-            "8": 763220480.0,
-            "9": 763220480.0,
-            "10": 763220480.0,
-            "11": 763220480.0,
-            "12": 763220480.0,
-            "13": 763220480.0,
-            "14": 763220480.0,
-            "15": 763220480.0,
-            "16": 763220480.0,
-            "17": 763220480.0,
-            "18": 763220480.0,
-            "19": 763220480.0,
-            "20": 763220480.0,
-            "21": 763220480.0,
-            "22": 763220480.0,
-            "23": 763220480.0,
-            "24": 763220480.0,
-            "25": 763220480.0,
-            "26": 763220480.0,
-            "27": 763220480.0,
-            "28": 763220480.0,
-            "29": 763220480.0,
-            "30": 763220480.0,
-            "31": 763220480.0,
-            "32": 763220480.0,
-            "33": 763220480.0,
-            "34": 763220480.0,
-            "35": 763220480.0,
-            "36": 763220480.0,
-            "37": 763220480.0,
-            "38": 763220480.0,
-            "39": 763220480.0,
-            "40": 763220480.0,
-            "41": 763220480.0,
-            "42": 763220480.0,
-            "43": 763220480.0,
-            "44": 763220480.0,
-            "45": 763220480.0,
-            "46": 763220480.0,
-            "47": 763220480.0,
-            "48": 763220480.0,
-            "49": 763220480.0,
-            "50": 763220480.0,
-            "51": 763220480.0,
-            "52": 763220480.0,
-            "53": 763220480.0,
-            "54": 763220480.0,
-            "55": 763220480.0,
-            "56": 763220480.0,
-            "57": 763220480.0,
-            "58": 763220480.0,
-            "59": 763220480.0,
-            "60": 763220480.0,
-            "61": 763220480.0,
-            "62": 763220480.0,
-            "63": 763220480.0,
-            "64": 763220480.0,
-            "65": 763220480.0,
-            "66": 763220480.0,
-            "67": 763220480.0,
-            "68": 763220480.0,
-            "69": 763220480.0,
-            "70": 763220480.0,
-            "71": 763220480.0,
-            "72": 763220480.0,
-            "73": 763220480.0,
-            "74": 763220480.0,
-            "75": 763220480.0,
-            "76": 763220480.0,
-            "77": 763220480.0,
-            "78": 763220480.0,
-            "79": 763220480.0,
-            "80": 763220480.0,
-            "81": 763220480.0,
-            "82": 763220480.0,
-            "83": 763220480.0,
-            "84": 763220480.0,
-            "85": 763220480.0,
-            "86": 763220480.0,
-            "87": 763220480.0,
-            "88": 763220480.0,
-            "89": 763220480.0,
-            "90": 763220480.0,
-            "91": 763220480.0,
-            "92": 763220480.0,
-            "93": 763220480.0,
-            "94": 763220480.0,
-            "95": 763220480.0,
-            "96": 763220480.0,
-            "97": 763220480.0,
-            "98": 763220480.0,
-            "99": 763220480.0,
-            "100": 763220480.0
+            "1": 765318656.0,
+            "2": 765318656.0,
+            "3": 765318656.0,
+            "4": 765318656.0,
+            "5": 765318656.0,
+            "6": 765318656.0,
+            "7": 765318656.0,
+            "8": 765318656.0,
+            "9": 765318656.0,
+            "10": 765318656.0,
+            "11": 765318656.0,
+            "12": 765318656.0,
+            "13": 765318656.0,
+            "14": 765318656.0,
+            "15": 765318656.0,
+            "16": 765318656.0,
+            "17": 765318656.0,
+            "18": 765318656.0,
+            "19": 765318656.0,
+            "20": 765318656.0,
+            "21": 765318656.0,
+            "22": 765318656.0,
+            "23": 765318656.0,
+            "24": 765318656.0,
+            "25": 765318656.0,
+            "26": 765318656.0,
+            "27": 765318656.0,
+            "28": 765318656.0,
+            "29": 765318656.0,
+            "30": 765318656.0,
+            "31": 765318656.0,
+            "32": 765318656.0,
+            "33": 765318656.0,
+            "34": 765318656.0,
+            "35": 765318656.0,
+            "36": 765318656.0,
+            "37": 765318656.0,
+            "38": 765318656.0,
+            "39": 765318656.0,
+            "40": 765318656.0,
+            "41": 765318656.0,
+            "42": 765318656.0,
+            "43": 765318656.0,
+            "44": 765318656.0,
+            "45": 765318656.0,
+            "46": 765318656.0,
+            "47": 765318656.0,
+            "48": 765318656.0,
+            "49": 765318656.0,
+            "50": 765318656.0,
+            "51": 765318656.0,
+            "52": 765318656.0,
+            "53": 765318656.0,
+            "54": 765318656.0,
+            "55": 765318656.0,
+            "56": 765318656.0,
+            "57": 765318656.0,
+            "58": 765318656.0,
+            "59": 765318656.0,
+            "60": 765318656.0,
+            "61": 765318656.0,
+            "62": 765318656.0,
+            "63": 765318656.0,
+            "64": 765318656.0,
+            "65": 765318656.0,
+            "66": 765318656.0,
+            "67": 765318656.0,
+            "68": 765318656.0,
+            "69": 765318656.0,
+            "70": 765318656.0,
+            "71": 765318656.0,
+            "72": 765318656.0,
+            "73": 765318656.0,
+            "74": 765318656.0,
+            "75": 765318656.0,
+            "76": 765318656.0,
+            "77": 765318656.0,
+            "78": 765318656.0,
+            "79": 765318656.0,
+            "80": 765318656.0,
+            "81": 765318656.0,
+            "82": 765318656.0,
+            "83": 765318656.0,
+            "84": 765318656.0,
+            "85": 765318656.0,
+            "86": 765318656.0,
+            "87": 765318656.0,
+            "88": 765318656.0,
+            "89": 765318656.0,
+            "90": 765318656.0,
+            "91": 765318656.0,
+            "92": 765318656.0,
+            "93": 765318656.0,
+            "94": 765318656.0,
+            "95": 765318656.0,
+            "96": 765318656.0,
+            "97": 765318656.0,
+            "98": 765318656.0,
+            "99": 765318656.0,
+            "100": 765318656.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -325,106 +325,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 2359490560.0,
-            "2": 2643299328.0,
-            "3": 2643299328.0,
-            "4": 2643299328.0,
-            "5": 2643299328.0,
-            "6": 2643299328.0,
-            "7": 2643299328.0,
-            "8": 2643299328.0,
-            "9": 2643299328.0,
-            "10": 2643299328.0,
-            "11": 2643299328.0,
-            "12": 2643299328.0,
-            "13": 2643299328.0,
-            "14": 2643299328.0,
-            "15": 2643299328.0,
-            "16": 2643299328.0,
-            "17": 2643299328.0,
-            "18": 2643299328.0,
-            "19": 2643299328.0,
-            "20": 2643299328.0,
-            "21": 2643299328.0,
-            "22": 2643299328.0,
-            "23": 2643299328.0,
-            "24": 2643299328.0,
-            "25": 2643299328.0,
-            "26": 2643299328.0,
-            "27": 2643299328.0,
-            "28": 2643299328.0,
-            "29": 2643299328.0,
-            "30": 2643299328.0,
-            "31": 2643299328.0,
-            "32": 2643299328.0,
-            "33": 2643299328.0,
-            "34": 2643299328.0,
-            "35": 2643299328.0,
-            "36": 2643299328.0,
-            "37": 2643299328.0,
-            "38": 2643299328.0,
-            "39": 2643299328.0,
-            "40": 2643299328.0,
-            "41": 2643299328.0,
-            "42": 2643299328.0,
-            "43": 2643299328.0,
-            "44": 2643299328.0,
-            "45": 2643299328.0,
-            "46": 2643299328.0,
-            "47": 2643299328.0,
-            "48": 2643299328.0,
-            "49": 2643299328.0,
-            "50": 2643299328.0,
-            "51": 2643299328.0,
-            "52": 2643299328.0,
-            "53": 2643299328.0,
-            "54": 2643299328.0,
-            "55": 2643299328.0,
-            "56": 2643299328.0,
-            "57": 2643299328.0,
-            "58": 2643299328.0,
-            "59": 2643299328.0,
-            "60": 2643299328.0,
-            "61": 2643299328.0,
-            "62": 2643299328.0,
-            "63": 2643299328.0,
-            "64": 2643299328.0,
-            "65": 2643299328.0,
-            "66": 2643299328.0,
-            "67": 2643299328.0,
-            "68": 2643299328.0,
-            "69": 2643299328.0,
-            "70": 2643299328.0,
-            "71": 2643299328.0,
-            "72": 2643299328.0,
-            "73": 2643299328.0,
-            "74": 2643299328.0,
-            "75": 2643299328.0,
-            "76": 2643299328.0,
-            "77": 2643299328.0,
-            "78": 2643299328.0,
-            "79": 2643299328.0,
-            "80": 2643299328.0,
-            "81": 2643299328.0,
-            "82": 2643299328.0,
-            "83": 2643299328.0,
-            "84": 2643299328.0,
-            "85": 2643299328.0,
-            "86": 2643299328.0,
-            "87": 2643299328.0,
-            "88": 2643299328.0,
-            "89": 2643299328.0,
-            "90": 2643299328.0,
-            "91": 2643299328.0,
-            "92": 2643299328.0,
-            "93": 2643299328.0,
-            "94": 2643299328.0,
-            "95": 2643299328.0,
-            "96": 2643299328.0,
-            "97": 2643299328.0,
-            "98": 2643299328.0,
-            "99": 2643299328.0,
-            "100": 2643299328.0
+            "1": 2360539648.0,
+            "2": 2645397504.0,
+            "3": 2645397504.0,
+            "4": 2645397504.0,
+            "5": 2645397504.0,
+            "6": 2645397504.0,
+            "7": 2645397504.0,
+            "8": 2645397504.0,
+            "9": 2645397504.0,
+            "10": 2645397504.0,
+            "11": 2645397504.0,
+            "12": 2645397504.0,
+            "13": 2645397504.0,
+            "14": 2645397504.0,
+            "15": 2645397504.0,
+            "16": 2645397504.0,
+            "17": 2645397504.0,
+            "18": 2645397504.0,
+            "19": 2645397504.0,
+            "20": 2645397504.0,
+            "21": 2645397504.0,
+            "22": 2645397504.0,
+            "23": 2645397504.0,
+            "24": 2645397504.0,
+            "25": 2645397504.0,
+            "26": 2645397504.0,
+            "27": 2645397504.0,
+            "28": 2645397504.0,
+            "29": 2645397504.0,
+            "30": 2645397504.0,
+            "31": 2645397504.0,
+            "32": 2645397504.0,
+            "33": 2645397504.0,
+            "34": 2645397504.0,
+            "35": 2645397504.0,
+            "36": 2645397504.0,
+            "37": 2645397504.0,
+            "38": 2645397504.0,
+            "39": 2645397504.0,
+            "40": 2645397504.0,
+            "41": 2645397504.0,
+            "42": 2645397504.0,
+            "43": 2645397504.0,
+            "44": 2645397504.0,
+            "45": 2645397504.0,
+            "46": 2645397504.0,
+            "47": 2645397504.0,
+            "48": 2645397504.0,
+            "49": 2645397504.0,
+            "50": 2645397504.0,
+            "51": 2645397504.0,
+            "52": 2645397504.0,
+            "53": 2645397504.0,
+            "54": 2645397504.0,
+            "55": 2645397504.0,
+            "56": 2645397504.0,
+            "57": 2645397504.0,
+            "58": 2645397504.0,
+            "59": 2645397504.0,
+            "60": 2645397504.0,
+            "61": 2645397504.0,
+            "62": 2645397504.0,
+            "63": 2645397504.0,
+            "64": 2645397504.0,
+            "65": 2645397504.0,
+            "66": 2645397504.0,
+            "67": 2645397504.0,
+            "68": 2645397504.0,
+            "69": 2645397504.0,
+            "70": 2645397504.0,
+            "71": 2645397504.0,
+            "72": 2645397504.0,
+            "73": 2645397504.0,
+            "74": 2645397504.0,
+            "75": 2645397504.0,
+            "76": 2645397504.0,
+            "77": 2645397504.0,
+            "78": 2645397504.0,
+            "79": 2645397504.0,
+            "80": 2645397504.0,
+            "81": 2645397504.0,
+            "82": 2645397504.0,
+            "83": 2645397504.0,
+            "84": 2645397504.0,
+            "85": 2645397504.0,
+            "86": 2645397504.0,
+            "87": 2645397504.0,
+            "88": 2645397504.0,
+            "89": 2645397504.0,
+            "90": 2645397504.0,
+            "91": 2645397504.0,
+            "92": 2645397504.0,
+            "93": 2645397504.0,
+            "94": 2645397504.0,
+            "95": 2645397504.0,
+            "96": 2645397504.0,
+            "97": 2645397504.0,
+            "98": 2645397504.0,
+            "99": 2645397504.0,
+            "100": 2645397504.0
         }
     },
     "iteration-time": {
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 17.57509,
-            "2": 0.1453,
-            "3": 0.11184,
-            "4": 0.11457,
-            "5": 0.12345,
-            "6": 0.12167,
-            "7": 0.12451,
-            "8": 0.11003,
-            "9": 0.11229,
-            "10": 0.11078,
-            "11": 0.11178,
-            "12": 0.11071,
-            "13": 0.11183,
-            "14": 0.1131,
-            "15": 0.11195,
-            "16": 0.11109,
-            "17": 0.11155,
-            "18": 0.11436,
-            "19": 0.11335,
-            "20": 0.11235,
-            "21": 0.11323,
-            "22": 0.11234,
-            "23": 0.1131,
-            "24": 0.11154,
-            "25": 0.11274,
-            "26": 0.11525,
-            "27": 0.11435,
-            "28": 0.11247,
-            "29": 0.11318,
-            "30": 0.11126,
-            "31": 0.11489,
-            "32": 0.11045,
-            "33": 0.1114,
-            "34": 0.11253,
-            "35": 0.11114,
-            "36": 0.114,
-            "37": 0.11201,
-            "38": 0.10979,
-            "39": 0.11069,
-            "40": 0.11078,
-            "41": 0.11142,
-            "42": 0.11091,
-            "43": 0.11324,
-            "44": 0.11151,
-            "45": 0.11295,
-            "46": 0.11174,
-            "47": 0.10954,
-            "48": 0.11083,
-            "49": 0.11195,
-            "50": 0.11251,
-            "51": 0.11627,
-            "52": 0.11199,
-            "53": 0.11127,
-            "54": 0.11464,
-            "55": 0.11072,
-            "56": 0.1136,
-            "57": 0.11119,
-            "58": 0.11025,
-            "59": 0.11083,
-            "60": 0.11126,
-            "61": 0.10968,
-            "62": 0.11104,
-            "63": 0.11515,
-            "64": 0.11136,
-            "65": 0.11454,
-            "66": 0.10994,
-            "67": 0.11003,
-            "68": 0.10997,
-            "69": 0.11155,
-            "70": 0.11002,
-            "71": 0.1121,
-            "72": 0.11334,
-            "73": 0.11221,
-            "74": 0.11542,
-            "75": 0.11082,
-            "76": 0.10997,
-            "77": 0.11087,
-            "78": 0.11222,
-            "79": 0.11343,
-            "80": 0.11462,
-            "81": 0.11272,
-            "82": 0.11293,
-            "83": 0.113,
-            "84": 0.11134,
-            "85": 0.11308,
-            "86": 0.11357,
-            "87": 0.11341,
-            "88": 0.11349,
-            "89": 0.11342,
-            "90": 0.11212,
-            "91": 0.11377,
-            "92": 0.11421,
-            "93": 0.1115,
-            "94": 0.11293,
-            "95": 0.11334,
-            "96": 0.11303,
-            "97": 0.11198,
-            "98": 0.11326,
-            "99": 0.11128,
-            "100": 0.1117
+            "1": 11.89927,
+            "2": 0.1153,
+            "3": 0.10368,
+            "4": 0.08198,
+            "5": 0.0823,
+            "6": 0.0813,
+            "7": 0.08053,
+            "8": 0.08097,
+            "9": 0.08083,
+            "10": 0.08105,
+            "11": 0.08193,
+            "12": 0.08083,
+            "13": 0.08063,
+            "14": 0.08095,
+            "15": 0.08115,
+            "16": 0.08099,
+            "17": 0.08128,
+            "18": 0.08134,
+            "19": 0.08147,
+            "20": 0.08174,
+            "21": 0.08185,
+            "22": 0.08175,
+            "23": 0.08109,
+            "24": 0.08065,
+            "25": 0.08488,
+            "26": 0.08433,
+            "27": 0.08446,
+            "28": 0.08482,
+            "29": 0.08645,
+            "30": 0.08469,
+            "31": 0.08623,
+            "32": 0.08474,
+            "33": 0.08443,
+            "34": 0.08442,
+            "35": 0.08287,
+            "36": 0.08188,
+            "37": 0.08068,
+            "38": 0.0808,
+            "39": 0.08041,
+            "40": 0.08119,
+            "41": 0.08373,
+            "42": 0.08116,
+            "43": 0.08394,
+            "44": 0.08252,
+            "45": 0.08182,
+            "46": 0.08217,
+            "47": 0.08115,
+            "48": 0.08122,
+            "49": 0.08084,
+            "50": 0.08062,
+            "51": 0.09006,
+            "52": 0.08529,
+            "53": 0.08552,
+            "54": 0.08335,
+            "55": 0.08266,
+            "56": 0.08016,
+            "57": 0.08221,
+            "58": 0.08,
+            "59": 0.08121,
+            "60": 0.08027,
+            "61": 0.08342,
+            "62": 0.08237,
+            "63": 0.08269,
+            "64": 0.0825,
+            "65": 0.08238,
+            "66": 0.08275,
+            "67": 0.08276,
+            "68": 0.08526,
+            "69": 0.0814,
+            "70": 0.08183,
+            "71": 0.08214,
+            "72": 0.08252,
+            "73": 0.0824,
+            "74": 0.08248,
+            "75": 0.08211,
+            "76": 0.0822,
+            "77": 0.08148,
+            "78": 0.08193,
+            "79": 0.08271,
+            "80": 0.082,
+            "81": 0.08216,
+            "82": 0.08205,
+            "83": 0.0823,
+            "84": 0.08236,
+            "85": 0.08239,
+            "86": 0.0805,
+            "87": 0.07901,
+            "88": 0.07985,
+            "89": 0.07962,
+            "90": 0.07883,
+            "91": 0.07962,
+            "92": 0.07909,
+            "93": 0.07986,
+            "94": 0.08107,
+            "95": 0.08014,
+            "96": 0.07993,
+            "97": 0.08061,
+            "98": 0.0808,
+            "99": 0.07879,
+            "100": 0.07901
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_dev_dgx_h100_2nd.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_dev_dgx_h100_2nd.json
new file mode 100644
index 00000000000..93e78f67d5d
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_dev_dgx_h100_2nd.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 9.8587,
+            "52": 9.74287,
+            "53": 10.06647,
+            "54": 9.95168,
+            "55": 9.88096,
+            "56": 9.62625,
+            "57": 9.47766,
+            "58": 9.8335,
+            "59": 9.58522,
+            "60": 9.50125,
+            "61": 9.69186,
+            "62": 9.98858,
+            "63": 9.38478,
+            "64": 9.78027,
+            "65": 8.94761,
+            "66": 9.70857,
+            "67": 9.36847,
+            "68": 9.78438,
+            "69": 9.79407,
+            "70": 9.7424,
+            "71": 9.61808,
+            "72": 9.58427,
+            "73": 9.50347,
+            "74": 8.9422,
+            "75": 9.42532,
+            "76": 9.07407,
+            "77": 10.06351,
+            "78": 9.7208,
+            "79": 9.37296,
+            "80": 9.40396,
+            "81": 9.48168,
+            "82": 9.69778,
+            "83": 9.30711,
+            "84": 9.41712,
+            "85": 9.61405,
+            "86": 9.07618,
+            "87": 9.59088,
+            "88": 9.7464,
+            "89": 9.59987,
+            "90": 9.81418,
+            "91": 9.33775,
+            "92": 9.35372,
+            "93": 9.07397,
+            "94": 8.8317,
+            "95": 9.5173,
+            "96": 9.52412,
+            "97": 9.30995,
+            "98": 9.66807,
+            "99": 8.8859,
+            "100": 9.39541
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 2608.0,
+            "52": 2444.0,
+            "53": 2898.0,
+            "54": 2664.0,
+            "55": 2325.0,
+            "56": 2614.0,
+            "57": 2394.0,
+            "58": 2812.0,
+            "59": 2771.0,
+            "60": 2361.0,
+            "61": 2855.0,
+            "62": 2675.0,
+            "63": 2393.0,
+            "64": 3014.0,
+            "65": 2673.0,
+            "66": 3051.0,
+            "67": 2657.0,
+            "68": 2662.0,
+            "69": 2736.0,
+            "70": 3139.0,
+            "71": 2943.0,
+            "72": 2293.0,
+            "73": 2908.0,
+            "74": 1887.0,
+            "75": 2519.0,
+            "76": 3060.0,
+            "77": 3191.0,
+            "78": 3211.0,
+            "79": 3081.0,
+            "80": 3205.0,
+            "81": 3563.0,
+            "82": 3201.0,
+            "83": 2614.0,
+            "84": 3162.0,
+            "85": 3209.0,
+            "86": 2660.0,
+            "87": 3729.0,
+            "88": 3002.0,
+            "89": 3160.0,
+            "90": 3168.0,
+            "91": 2753.0,
+            "92": 3258.0,
+            "93": 2617.0,
+            "94": 3341.0,
+            "95": 3261.0,
+            "96": 3370.0,
+            "97": 3163.0,
+            "98": 3566.0,
+            "99": 3179.0,
+            "100": 3135.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 763221504.0,
+            "52": 763221504.0,
+            "53": 763221504.0,
+            "54": 763221504.0,
+            "55": 763221504.0,
+            "56": 763221504.0,
+            "57": 763221504.0,
+            "58": 763221504.0,
+            "59": 763221504.0,
+            "60": 763221504.0,
+            "61": 763221504.0,
+            "62": 763221504.0,
+            "63": 763221504.0,
+            "64": 763221504.0,
+            "65": 763221504.0,
+            "66": 763221504.0,
+            "67": 763221504.0,
+            "68": 763221504.0,
+            "69": 763221504.0,
+            "70": 763221504.0,
+            "71": 763221504.0,
+            "72": 763221504.0,
+            "73": 763221504.0,
+            "74": 763221504.0,
+            "75": 763221504.0,
+            "76": 763221504.0,
+            "77": 763221504.0,
+            "78": 763221504.0,
+            "79": 763221504.0,
+            "80": 763221504.0,
+            "81": 763221504.0,
+            "82": 763221504.0,
+            "83": 763221504.0,
+            "84": 763221504.0,
+            "85": 763221504.0,
+            "86": 763221504.0,
+            "87": 763221504.0,
+            "88": 763221504.0,
+            "89": 763221504.0,
+            "90": 763221504.0,
+            "91": 763221504.0,
+            "92": 763221504.0,
+            "93": 763221504.0,
+            "94": 763221504.0,
+            "95": 763221504.0,
+            "96": 763221504.0,
+            "97": 763221504.0,
+            "98": 763221504.0,
+            "99": 763221504.0,
+            "100": 763221504.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 2643299328.0,
+            "52": 2643300352.0,
+            "53": 2643300352.0,
+            "54": 2643300352.0,
+            "55": 2643300352.0,
+            "56": 2643300352.0,
+            "57": 2643300352.0,
+            "58": 2643300352.0,
+            "59": 2643300352.0,
+            "60": 2643300352.0,
+            "61": 2643300352.0,
+            "62": 2643300352.0,
+            "63": 2643300352.0,
+            "64": 2643300352.0,
+            "65": 2643300352.0,
+            "66": 2643300352.0,
+            "67": 2643300352.0,
+            "68": 2643300352.0,
+            "69": 2643300352.0,
+            "70": 2643300352.0,
+            "71": 2643300352.0,
+            "72": 2643300352.0,
+            "73": 2643300352.0,
+            "74": 2643300352.0,
+            "75": 2643300352.0,
+            "76": 2643300352.0,
+            "77": 2643300352.0,
+            "78": 2643300352.0,
+            "79": 2643300352.0,
+            "80": 2643300352.0,
+            "81": 2643300352.0,
+            "82": 2643300352.0,
+            "83": 2643300352.0,
+            "84": 2643300352.0,
+            "85": 2643300352.0,
+            "86": 2643300352.0,
+            "87": 2643300352.0,
+            "88": 2643300352.0,
+            "89": 2643300352.0,
+            "90": 2643300352.0,
+            "91": 2643300352.0,
+            "92": 2643300352.0,
+            "93": 2643300352.0,
+            "94": 2643300352.0,
+            "95": 2643300352.0,
+            "96": 2643300352.0,
+            "97": 2643300352.0,
+            "98": 2643300352.0,
+            "99": 2643300352.0,
+            "100": 2643300352.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 12.33731,
+            "52": 0.11786,
+            "53": 0.07991,
+            "54": 0.07986,
+            "55": 0.07966,
+            "56": 0.07938,
+            "57": 0.07961,
+            "58": 0.07896,
+            "59": 0.08173,
+            "60": 0.08111,
+            "61": 0.07932,
+            "62": 0.07983,
+            "63": 0.07857,
+            "64": 0.07987,
+            "65": 0.08064,
+            "66": 0.08044,
+            "67": 0.07986,
+            "68": 0.07972,
+            "69": 0.08138,
+            "70": 0.07961,
+            "71": 0.07849,
+            "72": 0.07845,
+            "73": 0.07863,
+            "74": 0.07911,
+            "75": 0.07806,
+            "76": 0.0788,
+            "77": 0.07844,
+            "78": 0.07863,
+            "79": 0.07852,
+            "80": 0.07836,
+            "81": 0.07846,
+            "82": 0.07827,
+            "83": 0.0783,
+            "84": 0.08097,
+            "85": 0.07901,
+            "86": 0.07807,
+            "87": 0.07812,
+            "88": 0.07877,
+            "89": 0.07887,
+            "90": 0.08111,
+            "91": 0.07881,
+            "92": 0.08093,
+            "93": 0.07971,
+            "94": 0.08058,
+            "95": 0.07862,
+            "96": 0.07919,
+            "97": 0.07748,
+            "98": 0.07748,
+            "99": 0.07818,
+            "100": 0.07748
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_lts_dgx_a100.json
index 0cb12854799..f68a55e951c 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_lts_dgx_a100.json
@@ -325,7 +325,7 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 2367879168.0,
+            "1": 2368927744.0,
             "2": 2651687936.0,
             "3": 2651687936.0,
             "4": 2651687936.0,
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 12.6402,
-            "2": 0.15932,
-            "3": 0.13183,
-            "4": 0.12969,
-            "5": 0.12913,
-            "6": 0.12942,
-            "7": 0.12823,
-            "8": 0.13014,
-            "9": 0.1305,
-            "10": 0.13004,
-            "11": 0.12983,
-            "12": 0.12943,
-            "13": 0.12925,
-            "14": 0.13022,
-            "15": 0.12947,
-            "16": 0.12988,
-            "17": 0.12984,
-            "18": 0.12989,
-            "19": 0.12987,
-            "20": 0.12935,
-            "21": 0.12974,
-            "22": 0.12965,
-            "23": 0.12983,
-            "24": 0.13037,
-            "25": 0.1293,
-            "26": 0.12914,
-            "27": 0.12908,
-            "28": 0.12909,
-            "29": 0.13186,
-            "30": 0.13433,
-            "31": 0.13401,
-            "32": 0.12902,
-            "33": 0.12808,
-            "34": 0.12907,
-            "35": 0.12884,
-            "36": 0.12913,
-            "37": 0.12932,
-            "38": 0.12992,
-            "39": 0.13072,
-            "40": 0.13131,
-            "41": 0.13172,
-            "42": 0.13072,
-            "43": 0.13259,
-            "44": 0.13124,
-            "45": 0.13129,
-            "46": 0.1291,
-            "47": 0.1308,
-            "48": 0.1301,
-            "49": 0.12906,
-            "50": 0.12828,
-            "51": 0.14265,
-            "52": 0.12979,
-            "53": 0.126,
-            "54": 0.12545,
-            "55": 0.12582,
-            "56": 0.12573,
-            "57": 0.12516,
-            "58": 0.1252,
-            "59": 0.12598,
-            "60": 0.12562,
-            "61": 0.12544,
-            "62": 0.12472,
-            "63": 0.12548,
-            "64": 0.12537,
-            "65": 0.12534,
-            "66": 0.12474,
-            "67": 0.12528,
-            "68": 0.12481,
-            "69": 0.12531,
-            "70": 0.12547,
-            "71": 0.12492,
-            "72": 0.12533,
-            "73": 0.12583,
-            "74": 0.1253,
-            "75": 0.12453,
-            "76": 0.12486,
-            "77": 0.12501,
-            "78": 0.12491,
-            "79": 0.12247,
-            "80": 0.1223,
-            "81": 0.1243,
-            "82": 0.12257,
-            "83": 0.12179,
-            "84": 0.12254,
-            "85": 0.12231,
-            "86": 0.12263,
-            "87": 0.12152,
-            "88": 0.12188,
-            "89": 0.1228,
-            "90": 0.12133,
-            "91": 0.1216,
-            "92": 0.12133,
-            "93": 0.12135,
-            "94": 0.12216,
-            "95": 0.12141,
-            "96": 0.12205,
-            "97": 0.12356,
-            "98": 0.12174,
-            "99": 0.12252,
-            "100": 0.1222
+            "1": 5.40788,
+            "2": 0.15608,
+            "3": 0.1477,
+            "4": 0.13403,
+            "5": 0.13382,
+            "6": 0.13308,
+            "7": 0.1344,
+            "8": 0.13063,
+            "9": 0.12991,
+            "10": 0.13084,
+            "11": 0.13107,
+            "12": 0.13009,
+            "13": 0.13035,
+            "14": 0.13027,
+            "15": 0.13037,
+            "16": 0.1302,
+            "17": 0.12981,
+            "18": 0.12893,
+            "19": 0.12914,
+            "20": 0.12893,
+            "21": 0.12912,
+            "22": 0.1334,
+            "23": 0.13093,
+            "24": 0.13133,
+            "25": 0.13036,
+            "26": 0.13026,
+            "27": 0.13063,
+            "28": 0.13046,
+            "29": 0.13311,
+            "30": 0.13167,
+            "31": 0.13145,
+            "32": 0.13051,
+            "33": 0.13072,
+            "34": 0.1308,
+            "35": 0.13145,
+            "36": 0.13046,
+            "37": 0.13066,
+            "38": 0.13075,
+            "39": 0.13108,
+            "40": 0.1305,
+            "41": 0.13132,
+            "42": 0.1308,
+            "43": 0.13149,
+            "44": 0.13097,
+            "45": 0.13099,
+            "46": 0.13204,
+            "47": 0.13136,
+            "48": 0.13051,
+            "49": 0.13073,
+            "50": 0.13055,
+            "51": 0.1389,
+            "52": 0.13184,
+            "53": 0.13181,
+            "54": 0.13087,
+            "55": 0.13152,
+            "56": 0.13181,
+            "57": 0.13138,
+            "58": 0.13134,
+            "59": 0.13133,
+            "60": 0.13251,
+            "61": 0.13157,
+            "62": 0.13187,
+            "63": 0.13183,
+            "64": 0.13133,
+            "65": 0.13157,
+            "66": 0.13239,
+            "67": 0.13213,
+            "68": 0.13166,
+            "69": 0.13128,
+            "70": 0.13118,
+            "71": 0.13129,
+            "72": 0.1319,
+            "73": 0.13204,
+            "74": 0.13343,
+            "75": 0.13119,
+            "76": 0.13129,
+            "77": 0.13116,
+            "78": 0.13092,
+            "79": 0.13228,
+            "80": 0.13183,
+            "81": 0.13133,
+            "82": 0.13205,
+            "83": 0.13189,
+            "84": 0.13312,
+            "85": 0.13289,
+            "86": 0.13578,
+            "87": 0.13422,
+            "88": 0.1347,
+            "89": 0.13466,
+            "90": 0.13428,
+            "91": 0.13512,
+            "92": 0.13241,
+            "93": 0.12996,
+            "94": 0.1315,
+            "95": 0.12919,
+            "96": 0.12806,
+            "97": 0.12848,
+            "98": 0.12922,
+            "99": 0.12714,
+            "100": 0.12757
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_lts_dgx_a100_2nd.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_lts_dgx_a100_2nd.json
new file mode 100644
index 00000000000..91d84b88527
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_lts_dgx_a100_2nd.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 9.83549,
+            "52": 9.72516,
+            "53": 10.04799,
+            "54": 9.93011,
+            "55": 9.8636,
+            "56": 9.60217,
+            "57": 9.45187,
+            "58": 9.8078,
+            "59": 9.56783,
+            "60": 9.47966,
+            "61": 9.67984,
+            "62": 9.96754,
+            "63": 9.35113,
+            "64": 9.75623,
+            "65": 8.9318,
+            "66": 9.68107,
+            "67": 9.35956,
+            "68": 9.76948,
+            "69": 9.77492,
+            "70": 9.71182,
+            "71": 9.60632,
+            "72": 9.57129,
+            "73": 9.48392,
+            "74": 8.92911,
+            "75": 9.40028,
+            "76": 9.07194,
+            "77": 10.05252,
+            "78": 9.71494,
+            "79": 9.35747,
+            "80": 9.38946,
+            "81": 9.46791,
+            "82": 9.68508,
+            "83": 9.29588,
+            "84": 9.40522,
+            "85": 9.60163,
+            "86": 9.06713,
+            "87": 9.58402,
+            "88": 9.73304,
+            "89": 9.59526,
+            "90": 9.80555,
+            "91": 9.32604,
+            "92": 9.35323,
+            "93": 9.06915,
+            "94": 8.82268,
+            "95": 9.50858,
+            "96": 9.51584,
+            "97": 9.2976,
+            "98": 9.66184,
+            "99": 8.87662,
+            "100": 9.39222
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 2441.0,
+            "52": 2482.0,
+            "53": 2916.0,
+            "54": 2550.0,
+            "55": 2347.0,
+            "56": 2765.0,
+            "57": 2116.0,
+            "58": 2968.0,
+            "59": 2810.0,
+            "60": 2384.0,
+            "61": 2912.0,
+            "62": 2554.0,
+            "63": 2364.0,
+            "64": 3035.0,
+            "65": 2648.0,
+            "66": 2979.0,
+            "67": 2741.0,
+            "68": 2799.0,
+            "69": 3071.0,
+            "70": 3098.0,
+            "71": 2950.0,
+            "72": 2342.0,
+            "73": 2829.0,
+            "74": 1840.0,
+            "75": 2426.0,
+            "76": 2941.0,
+            "77": 3245.0,
+            "78": 3272.0,
+            "79": 3066.0,
+            "80": 3221.0,
+            "81": 3565.0,
+            "82": 3162.0,
+            "83": 2876.0,
+            "84": 3180.0,
+            "85": 3410.0,
+            "86": 2778.0,
+            "87": 3752.0,
+            "88": 2995.0,
+            "89": 3264.0,
+            "90": 2940.0,
+            "91": 2791.0,
+            "92": 3118.0,
+            "93": 2634.0,
+            "94": 3464.0,
+            "95": 3344.0,
+            "96": 3499.0,
+            "97": 3122.0,
+            "98": 3568.0,
+            "99": 3272.0,
+            "100": 3476.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 733860352.0,
+            "52": 733860352.0,
+            "53": 733860352.0,
+            "54": 733860352.0,
+            "55": 733860352.0,
+            "56": 733860352.0,
+            "57": 733860352.0,
+            "58": 733860352.0,
+            "59": 733860352.0,
+            "60": 733860352.0,
+            "61": 733860352.0,
+            "62": 733860352.0,
+            "63": 733860352.0,
+            "64": 733860352.0,
+            "65": 733860352.0,
+            "66": 733860352.0,
+            "67": 733860352.0,
+            "68": 733860352.0,
+            "69": 733860352.0,
+            "70": 733860352.0,
+            "71": 733860352.0,
+            "72": 733860352.0,
+            "73": 733860352.0,
+            "74": 733860352.0,
+            "75": 733860352.0,
+            "76": 733860352.0,
+            "77": 733860352.0,
+            "78": 733860352.0,
+            "79": 733860352.0,
+            "80": 733860352.0,
+            "81": 733860352.0,
+            "82": 733860352.0,
+            "83": 733860352.0,
+            "84": 733860352.0,
+            "85": 733860352.0,
+            "86": 733860352.0,
+            "87": 733860352.0,
+            "88": 733860352.0,
+            "89": 733860352.0,
+            "90": 733860352.0,
+            "91": 733860352.0,
+            "92": 733860352.0,
+            "93": 733860352.0,
+            "94": 733860352.0,
+            "95": 733860352.0,
+            "96": 733860352.0,
+            "97": 733860352.0,
+            "98": 733860352.0,
+            "99": 733860352.0,
+            "100": 733860352.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 2652735488.0,
+            "52": 2652736512.0,
+            "53": 2652736512.0,
+            "54": 2652736512.0,
+            "55": 2652736512.0,
+            "56": 2652736512.0,
+            "57": 2652736512.0,
+            "58": 2652736512.0,
+            "59": 2652736512.0,
+            "60": 2652736512.0,
+            "61": 2652736512.0,
+            "62": 2652736512.0,
+            "63": 2652736512.0,
+            "64": 2652736512.0,
+            "65": 2652736512.0,
+            "66": 2652736512.0,
+            "67": 2652736512.0,
+            "68": 2652736512.0,
+            "69": 2652736512.0,
+            "70": 2652736512.0,
+            "71": 2652736512.0,
+            "72": 2652736512.0,
+            "73": 2652736512.0,
+            "74": 2652736512.0,
+            "75": 2652736512.0,
+            "76": 2652736512.0,
+            "77": 2652736512.0,
+            "78": 2652736512.0,
+            "79": 2652736512.0,
+            "80": 2652736512.0,
+            "81": 2652736512.0,
+            "82": 2652736512.0,
+            "83": 2652736512.0,
+            "84": 2652736512.0,
+            "85": 2652736512.0,
+            "86": 2652736512.0,
+            "87": 2652736512.0,
+            "88": 2652736512.0,
+            "89": 2652736512.0,
+            "90": 2652736512.0,
+            "91": 2652736512.0,
+            "92": 2652736512.0,
+            "93": 2652736512.0,
+            "94": 2652736512.0,
+            "95": 2652736512.0,
+            "96": 2652736512.0,
+            "97": 2652736512.0,
+            "98": 2652736512.0,
+            "99": 2652736512.0,
+            "100": 2652736512.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 6.95149,
+            "52": 0.16138,
+            "53": 0.14143,
+            "54": 0.14147,
+            "55": 0.14039,
+            "56": 0.14065,
+            "57": 0.14197,
+            "58": 0.14092,
+            "59": 0.13304,
+            "60": 0.1316,
+            "61": 0.13067,
+            "62": 0.13101,
+            "63": 0.13087,
+            "64": 0.13347,
+            "65": 0.13501,
+            "66": 0.13486,
+            "67": 0.13415,
+            "68": 0.13402,
+            "69": 0.1339,
+            "70": 0.1332,
+            "71": 0.13414,
+            "72": 0.13291,
+            "73": 0.1334,
+            "74": 0.13397,
+            "75": 0.13253,
+            "76": 0.13314,
+            "77": 0.13317,
+            "78": 0.13335,
+            "79": 0.13316,
+            "80": 0.13312,
+            "81": 0.13302,
+            "82": 0.13404,
+            "83": 0.13393,
+            "84": 0.13355,
+            "85": 0.13237,
+            "86": 0.13361,
+            "87": 0.13268,
+            "88": 0.13156,
+            "89": 0.13245,
+            "90": 0.13179,
+            "91": 0.13173,
+            "92": 0.13158,
+            "93": 0.13204,
+            "94": 0.1318,
+            "95": 0.13972,
+            "96": 0.13128,
+            "97": 0.12988,
+            "98": 0.13091,
+            "99": 0.13155,
+            "100": 0.1314
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_gb200.json
new file mode 100644
index 00000000000..3d38faf23fc
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_gb200.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.82555,
+            "2": 10.83286,
+            "3": 10.82763,
+            "4": 10.79573,
+            "5": 10.85699,
+            "6": 10.8639,
+            "7": 10.82612,
+            "8": 10.82542,
+            "9": 10.83587,
+            "10": 10.79627,
+            "11": 10.87822,
+            "12": 10.85824,
+            "13": 10.85426,
+            "14": 10.87526,
+            "15": 10.79208,
+            "16": 10.80307,
+            "17": 10.77438,
+            "18": 10.80487,
+            "19": 10.79369,
+            "20": 10.69576,
+            "21": 10.68654,
+            "22": 10.53161,
+            "23": 10.70646,
+            "24": 10.57337,
+            "25": 10.51533,
+            "26": 10.5909,
+            "27": 10.60777,
+            "28": 10.57049,
+            "29": 10.58979,
+            "30": 10.34722,
+            "31": 10.07771,
+            "32": 10.46349,
+            "33": 10.45722,
+            "34": 10.19974,
+            "35": 10.25643,
+            "36": 10.21263,
+            "37": 10.34718,
+            "38": 10.18009,
+            "39": 10.40838,
+            "40": 10.07629,
+            "41": 10.1297,
+            "42": 10.2117,
+            "43": 9.81708,
+            "44": 9.94034,
+            "45": 9.81748,
+            "46": 9.80633,
+            "47": 10.12473,
+            "48": 9.84047,
+            "49": 9.51012,
+            "50": 9.88943,
+            "51": 9.84256,
+            "52": 9.72573,
+            "53": 10.05974,
+            "54": 9.95226,
+            "55": 9.88318,
+            "56": 9.61275,
+            "57": 9.46219,
+            "58": 9.8231,
+            "59": 9.57666,
+            "60": 9.48516,
+            "61": 9.67876,
+            "62": 9.97782,
+            "63": 9.36212,
+            "64": 9.75714,
+            "65": 8.93494,
+            "66": 9.69283,
+            "67": 9.36708,
+            "68": 9.78178,
+            "69": 9.79452,
+            "70": 9.72296,
+            "71": 9.62031,
+            "72": 9.56974,
+            "73": 9.48101,
+            "74": 8.91241,
+            "75": 9.40905,
+            "76": 9.06617,
+            "77": 10.05809,
+            "78": 9.72194,
+            "79": 9.36927,
+            "80": 9.40029,
+            "81": 9.47702,
+            "82": 9.69787,
+            "83": 9.30742,
+            "84": 9.41492,
+            "85": 9.61113,
+            "86": 9.07103,
+            "87": 9.5961,
+            "88": 9.74909,
+            "89": 9.59604,
+            "90": 9.82722,
+            "91": 9.33657,
+            "92": 9.35582,
+            "93": 9.08689,
+            "94": 8.82754,
+            "95": 9.53065,
+            "96": 9.5276,
+            "97": 9.30672,
+            "98": 9.66905,
+            "99": 8.89635,
+            "100": 9.40525
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1651.0,
+            "2": 1716.0,
+            "3": 1760.0,
+            "4": 1771.0,
+            "5": 1899.0,
+            "6": 1905.0,
+            "7": 1842.0,
+            "8": 1667.0,
+            "9": 1822.0,
+            "10": 1434.0,
+            "11": 1852.0,
+            "12": 1741.0,
+            "13": 1905.0,
+            "14": 1841.0,
+            "15": 1857.0,
+            "16": 1841.0,
+            "17": 1800.0,
+            "18": 1666.0,
+            "19": 1803.0,
+            "20": 1800.0,
+            "21": 1836.0,
+            "22": 1688.0,
+            "23": 1994.0,
+            "24": 1641.0,
+            "25": 1577.0,
+            "26": 1676.0,
+            "27": 1876.0,
+            "28": 1970.0,
+            "29": 1945.0,
+            "30": 1916.0,
+            "31": 1494.0,
+            "32": 1868.0,
+            "33": 2135.0,
+            "34": 1740.0,
+            "35": 1924.0,
+            "36": 1854.0,
+            "37": 2363.0,
+            "38": 2164.0,
+            "39": 2262.0,
+            "40": 2081.0,
+            "41": 2168.0,
+            "42": 2247.0,
+            "43": 2055.0,
+            "44": 2070.0,
+            "45": 1988.0,
+            "46": 2208.0,
+            "47": 2559.0,
+            "48": 2287.0,
+            "49": 2194.0,
+            "50": 2303.0,
+            "51": 2552.0,
+            "52": 2565.0,
+            "53": 2883.0,
+            "54": 2710.0,
+            "55": 2301.0,
+            "56": 2798.0,
+            "57": 2334.0,
+            "58": 2979.0,
+            "59": 2960.0,
+            "60": 2451.0,
+            "61": 2841.0,
+            "62": 2577.0,
+            "63": 2516.0,
+            "64": 2907.0,
+            "65": 2567.0,
+            "66": 2862.0,
+            "67": 2809.0,
+            "68": 2609.0,
+            "69": 2965.0,
+            "70": 2985.0,
+            "71": 2864.0,
+            "72": 2613.0,
+            "73": 3108.0,
+            "74": 2048.0,
+            "75": 2563.0,
+            "76": 3046.0,
+            "77": 3127.0,
+            "78": 2959.0,
+            "79": 3082.0,
+            "80": 3025.0,
+            "81": 3400.0,
+            "82": 3223.0,
+            "83": 2786.0,
+            "84": 3180.0,
+            "85": 3233.0,
+            "86": 2611.0,
+            "87": 3542.0,
+            "88": 3084.0,
+            "89": 3210.0,
+            "90": 3271.0,
+            "91": 2770.0,
+            "92": 3220.0,
+            "93": 2662.0,
+            "94": 3405.0,
+            "95": 3085.0,
+            "96": 3336.0,
+            "97": 3050.0,
+            "98": 3421.0,
+            "99": 3271.0,
+            "100": 3079.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 552325632.0,
+            "2": 552325632.0,
+            "3": 552325632.0,
+            "4": 552325632.0,
+            "5": 552325632.0,
+            "6": 552325632.0,
+            "7": 552325632.0,
+            "8": 552325632.0,
+            "9": 552325632.0,
+            "10": 552325632.0,
+            "11": 552325632.0,
+            "12": 552325632.0,
+            "13": 552325632.0,
+            "14": 552325632.0,
+            "15": 552325632.0,
+            "16": 552325632.0,
+            "17": 552325632.0,
+            "18": 552325632.0,
+            "19": 552325632.0,
+            "20": 552325632.0,
+            "21": 552325632.0,
+            "22": 552325632.0,
+            "23": 552325632.0,
+            "24": 552325632.0,
+            "25": 552325632.0,
+            "26": 552325632.0,
+            "27": 552325632.0,
+            "28": 552325632.0,
+            "29": 552325632.0,
+            "30": 552325632.0,
+            "31": 552325632.0,
+            "32": 552325632.0,
+            "33": 552325632.0,
+            "34": 552325632.0,
+            "35": 552325632.0,
+            "36": 552325632.0,
+            "37": 552325632.0,
+            "38": 552325632.0,
+            "39": 552325632.0,
+            "40": 552325632.0,
+            "41": 552325632.0,
+            "42": 552325632.0,
+            "43": 552325632.0,
+            "44": 552325632.0,
+            "45": 552325632.0,
+            "46": 552325632.0,
+            "47": 552325632.0,
+            "48": 552325632.0,
+            "49": 552325632.0,
+            "50": 552325632.0,
+            "51": 552325632.0,
+            "52": 552325632.0,
+            "53": 552325632.0,
+            "54": 552325632.0,
+            "55": 552325632.0,
+            "56": 552325632.0,
+            "57": 552325632.0,
+            "58": 552325632.0,
+            "59": 552325632.0,
+            "60": 552325632.0,
+            "61": 552325632.0,
+            "62": 552325632.0,
+            "63": 552325632.0,
+            "64": 552325632.0,
+            "65": 552325632.0,
+            "66": 552325632.0,
+            "67": 552325632.0,
+            "68": 552325632.0,
+            "69": 552325632.0,
+            "70": 552325632.0,
+            "71": 552325632.0,
+            "72": 552325632.0,
+            "73": 552325632.0,
+            "74": 552325632.0,
+            "75": 552325632.0,
+            "76": 552325632.0,
+            "77": 552325632.0,
+            "78": 552325632.0,
+            "79": 552325632.0,
+            "80": 552325632.0,
+            "81": 552325632.0,
+            "82": 552325632.0,
+            "83": 552325632.0,
+            "84": 552325632.0,
+            "85": 552325632.0,
+            "86": 552325632.0,
+            "87": 552325632.0,
+            "88": 552325632.0,
+            "89": 552325632.0,
+            "90": 552325632.0,
+            "91": 552325632.0,
+            "92": 552325632.0,
+            "93": 552325632.0,
+            "94": 552325632.0,
+            "95": 552325632.0,
+            "96": 552325632.0,
+            "97": 552325632.0,
+            "98": 552325632.0,
+            "99": 552325632.0,
+            "100": 552325632.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2328239104.0,
+            "2": 2471201792.0,
+            "3": 2471201792.0,
+            "4": 2471201792.0,
+            "5": 2471201792.0,
+            "6": 2471201792.0,
+            "7": 2471201792.0,
+            "8": 2471201792.0,
+            "9": 2471201792.0,
+            "10": 2471201792.0,
+            "11": 2471201792.0,
+            "12": 2471201792.0,
+            "13": 2471201792.0,
+            "14": 2471201792.0,
+            "15": 2471201792.0,
+            "16": 2471201792.0,
+            "17": 2471201792.0,
+            "18": 2471201792.0,
+            "19": 2471201792.0,
+            "20": 2471201792.0,
+            "21": 2471201792.0,
+            "22": 2471201792.0,
+            "23": 2471201792.0,
+            "24": 2471201792.0,
+            "25": 2471201792.0,
+            "26": 2471201792.0,
+            "27": 2471201792.0,
+            "28": 2471201792.0,
+            "29": 2471201792.0,
+            "30": 2471201792.0,
+            "31": 2471201792.0,
+            "32": 2471201792.0,
+            "33": 2471201792.0,
+            "34": 2471201792.0,
+            "35": 2471201792.0,
+            "36": 2471201792.0,
+            "37": 2471201792.0,
+            "38": 2471201792.0,
+            "39": 2471201792.0,
+            "40": 2471201792.0,
+            "41": 2471201792.0,
+            "42": 2471201792.0,
+            "43": 2471201792.0,
+            "44": 2471201792.0,
+            "45": 2471201792.0,
+            "46": 2471201792.0,
+            "47": 2471201792.0,
+            "48": 2471201792.0,
+            "49": 2471201792.0,
+            "50": 2471201792.0,
+            "51": 2471201792.0,
+            "52": 2471201792.0,
+            "53": 2471201792.0,
+            "54": 2471201792.0,
+            "55": 2471201792.0,
+            "56": 2471201792.0,
+            "57": 2471201792.0,
+            "58": 2471201792.0,
+            "59": 2471201792.0,
+            "60": 2471201792.0,
+            "61": 2471201792.0,
+            "62": 2471201792.0,
+            "63": 2471201792.0,
+            "64": 2471201792.0,
+            "65": 2471201792.0,
+            "66": 2471201792.0,
+            "67": 2471201792.0,
+            "68": 2471201792.0,
+            "69": 2471201792.0,
+            "70": 2471201792.0,
+            "71": 2471201792.0,
+            "72": 2471201792.0,
+            "73": 2471201792.0,
+            "74": 2471201792.0,
+            "75": 2471201792.0,
+            "76": 2471201792.0,
+            "77": 2471201792.0,
+            "78": 2471201792.0,
+            "79": 2471201792.0,
+            "80": 2471201792.0,
+            "81": 2471201792.0,
+            "82": 2471201792.0,
+            "83": 2471201792.0,
+            "84": 2471201792.0,
+            "85": 2471201792.0,
+            "86": 2471201792.0,
+            "87": 2471201792.0,
+            "88": 2471201792.0,
+            "89": 2471201792.0,
+            "90": 2471201792.0,
+            "91": 2471201792.0,
+            "92": 2471201792.0,
+            "93": 2471201792.0,
+            "94": 2471201792.0,
+            "95": 2471201792.0,
+            "96": 2471201792.0,
+            "97": 2471201792.0,
+            "98": 2471201792.0,
+            "99": 2471201792.0,
+            "100": 2471201792.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.20216,
+            "2": 0.13277,
+            "3": 0.2975,
+            "4": 0.10754,
+            "5": 0.10418,
+            "6": 0.10612,
+            "7": 0.24272,
+            "8": 0.11347,
+            "9": 0.14845,
+            "10": 0.10733,
+            "11": 0.10387,
+            "12": 0.47615,
+            "13": 0.26992,
+            "14": 0.10483,
+            "15": 0.1204,
+            "16": 0.10696,
+            "17": 0.10552,
+            "18": 0.10521,
+            "19": 0.24005,
+            "20": 0.17139,
+            "21": 0.13276,
+            "22": 0.21348,
+            "23": 0.10526,
+            "24": 0.23652,
+            "25": 0.10695,
+            "26": 0.105,
+            "27": 0.1046,
+            "28": 0.108,
+            "29": 0.22645,
+            "30": 0.10764,
+            "31": 0.37801,
+            "32": 0.10822,
+            "33": 0.26043,
+            "34": 0.10725,
+            "35": 0.10759,
+            "36": 0.10627,
+            "37": 0.10521,
+            "38": 0.23173,
+            "39": 0.23132,
+            "40": 0.10561,
+            "41": 0.10865,
+            "42": 0.10488,
+            "43": 0.10774,
+            "44": 0.10716,
+            "45": 0.2275,
+            "46": 0.10501,
+            "47": 0.26542,
+            "48": 0.10561,
+            "49": 0.10565,
+            "50": 0.21987,
+            "51": 0.12154,
+            "52": 0.10569,
+            "53": 0.10443,
+            "54": 0.1047,
+            "55": 0.10628,
+            "56": 0.106,
+            "57": 0.21826,
+            "58": 0.29942,
+            "59": 0.10627,
+            "60": 0.10754,
+            "61": 0.10422,
+            "62": 0.10591,
+            "63": 0.22208,
+            "64": 0.10704,
+            "65": 0.10754,
+            "66": 0.11693,
+            "67": 0.10619,
+            "68": 0.10599,
+            "69": 0.1064,
+            "70": 0.10712,
+            "71": 0.20506,
+            "72": 0.12154,
+            "73": 0.10701,
+            "74": 0.10797,
+            "75": 0.10599,
+            "76": 0.11118,
+            "77": 0.22203,
+            "78": 0.11082,
+            "79": 0.10971,
+            "80": 0.10673,
+            "81": 0.23373,
+            "82": 0.25241,
+            "83": 0.10924,
+            "84": 0.23617,
+            "85": 0.10907,
+            "86": 0.10895,
+            "87": 0.21649,
+            "88": 0.1977,
+            "89": 0.1081,
+            "90": 0.10767,
+            "91": 0.2306,
+            "92": 0.1072,
+            "93": 0.11204,
+            "94": 0.22079,
+            "95": 0.10723,
+            "96": 0.10789,
+            "97": 0.10605,
+            "98": 0.10621,
+            "99": 0.26274,
+            "100": 0.10674
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_h100.json
index acadb81abbe..82352c11781 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_h100.json
@@ -218,106 +218,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 551269888.0,
-            "2": 551269888.0,
-            "3": 551269888.0,
-            "4": 552318464.0,
-            "5": 551269888.0,
-            "6": 551269888.0,
-            "7": 551269888.0,
-            "8": 551269888.0,
-            "9": 551269888.0,
-            "10": 551269888.0,
-            "11": 551269888.0,
-            "12": 551269888.0,
-            "13": 551269888.0,
-            "14": 551269888.0,
-            "15": 551269888.0,
-            "16": 551269888.0,
-            "17": 551269888.0,
-            "18": 551269888.0,
-            "19": 551269888.0,
-            "20": 551269888.0,
-            "21": 551269888.0,
-            "22": 551269888.0,
-            "23": 551269888.0,
-            "24": 551269888.0,
-            "25": 551269888.0,
-            "26": 551269888.0,
-            "27": 551269888.0,
-            "28": 551269888.0,
-            "29": 551269888.0,
-            "30": 551269888.0,
-            "31": 551269888.0,
-            "32": 551269888.0,
-            "33": 551269888.0,
-            "34": 551269888.0,
-            "35": 551269888.0,
-            "36": 551269888.0,
-            "37": 551269888.0,
-            "38": 551269888.0,
-            "39": 551269888.0,
-            "40": 551269888.0,
-            "41": 551269888.0,
-            "42": 551269888.0,
-            "43": 551269888.0,
-            "44": 551269888.0,
-            "45": 551269888.0,
-            "46": 551269888.0,
-            "47": 551269888.0,
-            "48": 551269888.0,
-            "49": 551269888.0,
-            "50": 551269888.0,
-            "51": 551269888.0,
-            "52": 551269888.0,
-            "53": 551269888.0,
-            "54": 551269888.0,
-            "55": 551269888.0,
-            "56": 551269888.0,
-            "57": 551269888.0,
-            "58": 551269888.0,
-            "59": 551269888.0,
-            "60": 551269888.0,
-            "61": 551269888.0,
-            "62": 551269888.0,
-            "63": 551269888.0,
-            "64": 551269888.0,
-            "65": 551269888.0,
-            "66": 551269888.0,
-            "67": 551269888.0,
-            "68": 551269888.0,
-            "69": 551269888.0,
-            "70": 551269888.0,
-            "71": 551269888.0,
-            "72": 551269888.0,
-            "73": 551269888.0,
-            "74": 551269888.0,
-            "75": 551269888.0,
-            "76": 551269888.0,
-            "77": 551269888.0,
-            "78": 551269888.0,
-            "79": 551269888.0,
-            "80": 551269888.0,
-            "81": 551269888.0,
-            "82": 551269888.0,
-            "83": 551269888.0,
-            "84": 551269888.0,
-            "85": 551269888.0,
-            "86": 551269888.0,
-            "87": 551269888.0,
-            "88": 551269888.0,
-            "89": 551269888.0,
-            "90": 551269888.0,
-            "91": 551269888.0,
-            "92": 551269888.0,
-            "93": 551269888.0,
-            "94": 551269888.0,
-            "95": 551269888.0,
-            "96": 551269888.0,
-            "97": 551269888.0,
-            "98": 551269888.0,
-            "99": 551269888.0,
-            "100": 551269888.0
+            "1": 551278080.0,
+            "2": 551278080.0,
+            "3": 551278080.0,
+            "4": 551278080.0,
+            "5": 551278080.0,
+            "6": 551278080.0,
+            "7": 551278080.0,
+            "8": 551278080.0,
+            "9": 551278080.0,
+            "10": 551278080.0,
+            "11": 551278080.0,
+            "12": 551278080.0,
+            "13": 551278080.0,
+            "14": 551278080.0,
+            "15": 551278080.0,
+            "16": 551278080.0,
+            "17": 551278080.0,
+            "18": 551278080.0,
+            "19": 551278080.0,
+            "20": 551278080.0,
+            "21": 551278080.0,
+            "22": 551278080.0,
+            "23": 551278080.0,
+            "24": 551278080.0,
+            "25": 551278080.0,
+            "26": 551278080.0,
+            "27": 551278080.0,
+            "28": 551278080.0,
+            "29": 551278080.0,
+            "30": 551278080.0,
+            "31": 551278080.0,
+            "32": 551278080.0,
+            "33": 551278080.0,
+            "34": 551278080.0,
+            "35": 551278080.0,
+            "36": 551278080.0,
+            "37": 551278080.0,
+            "38": 551278080.0,
+            "39": 551278080.0,
+            "40": 551278080.0,
+            "41": 551278080.0,
+            "42": 551278080.0,
+            "43": 551278080.0,
+            "44": 551278080.0,
+            "45": 551278080.0,
+            "46": 551278080.0,
+            "47": 551278080.0,
+            "48": 551278080.0,
+            "49": 551278080.0,
+            "50": 551278080.0,
+            "51": 551278080.0,
+            "52": 551278080.0,
+            "53": 551278080.0,
+            "54": 551278080.0,
+            "55": 551278080.0,
+            "56": 551278080.0,
+            "57": 551278080.0,
+            "58": 551278080.0,
+            "59": 551278080.0,
+            "60": 551278080.0,
+            "61": 551278080.0,
+            "62": 551278080.0,
+            "63": 551278080.0,
+            "64": 551278080.0,
+            "65": 551278080.0,
+            "66": 551278080.0,
+            "67": 551278080.0,
+            "68": 551278080.0,
+            "69": 551278080.0,
+            "70": 551278080.0,
+            "71": 551278080.0,
+            "72": 551278080.0,
+            "73": 551278080.0,
+            "74": 551278080.0,
+            "75": 551278080.0,
+            "76": 551278080.0,
+            "77": 551278080.0,
+            "78": 551278080.0,
+            "79": 551278080.0,
+            "80": 551278080.0,
+            "81": 551278080.0,
+            "82": 551278080.0,
+            "83": 551278080.0,
+            "84": 551278080.0,
+            "85": 551278080.0,
+            "86": 551278080.0,
+            "87": 551278080.0,
+            "88": 551278080.0,
+            "89": 551278080.0,
+            "90": 551278080.0,
+            "91": 551278080.0,
+            "92": 551278080.0,
+            "93": 551278080.0,
+            "94": 551278080.0,
+            "95": 551278080.0,
+            "96": 551278080.0,
+            "97": 551278080.0,
+            "98": 551278080.0,
+            "99": 551278080.0,
+            "100": 551278080.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -325,106 +325,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 2290489344.0,
-            "2": 2432397312.0,
-            "3": 2432397312.0,
-            "4": 2432397312.0,
-            "5": 2432397312.0,
-            "6": 2432397312.0,
-            "7": 2432397312.0,
-            "8": 2432397312.0,
-            "9": 2432397312.0,
-            "10": 2432397312.0,
-            "11": 2432397312.0,
-            "12": 2432397312.0,
-            "13": 2432397312.0,
-            "14": 2432397312.0,
-            "15": 2432397312.0,
-            "16": 2432397312.0,
-            "17": 2432397312.0,
-            "18": 2432397312.0,
-            "19": 2432397312.0,
-            "20": 2432397312.0,
-            "21": 2432397312.0,
-            "22": 2432397312.0,
-            "23": 2432397312.0,
-            "24": 2432397312.0,
-            "25": 2432397312.0,
-            "26": 2432397312.0,
-            "27": 2432397312.0,
-            "28": 2432397312.0,
-            "29": 2432397312.0,
-            "30": 2432397312.0,
-            "31": 2432397312.0,
-            "32": 2432397312.0,
-            "33": 2432397312.0,
-            "34": 2432397312.0,
-            "35": 2432397312.0,
-            "36": 2432397312.0,
-            "37": 2432397312.0,
-            "38": 2432397312.0,
-            "39": 2432397312.0,
-            "40": 2432397312.0,
-            "41": 2432397312.0,
-            "42": 2432397312.0,
-            "43": 2432397312.0,
-            "44": 2432397312.0,
-            "45": 2432397312.0,
-            "46": 2432397312.0,
-            "47": 2432397312.0,
-            "48": 2432397312.0,
-            "49": 2432397312.0,
-            "50": 2432397312.0,
-            "51": 2432397312.0,
-            "52": 2432397312.0,
-            "53": 2432397312.0,
-            "54": 2432397312.0,
-            "55": 2432397312.0,
-            "56": 2432397312.0,
-            "57": 2432397312.0,
-            "58": 2432397312.0,
-            "59": 2432397312.0,
-            "60": 2432397312.0,
-            "61": 2432397312.0,
-            "62": 2432397312.0,
-            "63": 2432397312.0,
-            "64": 2432397312.0,
-            "65": 2432397312.0,
-            "66": 2432397312.0,
-            "67": 2432397312.0,
-            "68": 2432397312.0,
-            "69": 2432397312.0,
-            "70": 2432397312.0,
-            "71": 2432397312.0,
-            "72": 2432397312.0,
-            "73": 2432397312.0,
-            "74": 2432397312.0,
-            "75": 2432397312.0,
-            "76": 2432397312.0,
-            "77": 2432397312.0,
-            "78": 2432397312.0,
-            "79": 2432397312.0,
-            "80": 2432397312.0,
-            "81": 2432397312.0,
-            "82": 2432397312.0,
-            "83": 2432397312.0,
-            "84": 2432397312.0,
-            "85": 2432397312.0,
-            "86": 2432397312.0,
-            "87": 2432397312.0,
-            "88": 2432397312.0,
-            "89": 2432397312.0,
-            "90": 2432397312.0,
-            "91": 2432397312.0,
-            "92": 2432397312.0,
-            "93": 2432397312.0,
-            "94": 2432397312.0,
-            "95": 2432397312.0,
-            "96": 2432397312.0,
-            "97": 2432397312.0,
-            "98": 2432397312.0,
-            "99": 2432397312.0,
-            "100": 2432397312.0
+            "1": 2289441792.0,
+            "2": 2432405504.0,
+            "3": 2432405504.0,
+            "4": 2432405504.0,
+            "5": 2432405504.0,
+            "6": 2432405504.0,
+            "7": 2432405504.0,
+            "8": 2432405504.0,
+            "9": 2432405504.0,
+            "10": 2432405504.0,
+            "11": 2432405504.0,
+            "12": 2432405504.0,
+            "13": 2432405504.0,
+            "14": 2432405504.0,
+            "15": 2432405504.0,
+            "16": 2432405504.0,
+            "17": 2432405504.0,
+            "18": 2432405504.0,
+            "19": 2432405504.0,
+            "20": 2432405504.0,
+            "21": 2432405504.0,
+            "22": 2432405504.0,
+            "23": 2432405504.0,
+            "24": 2432405504.0,
+            "25": 2432405504.0,
+            "26": 2432405504.0,
+            "27": 2432405504.0,
+            "28": 2432405504.0,
+            "29": 2432405504.0,
+            "30": 2432405504.0,
+            "31": 2432405504.0,
+            "32": 2432405504.0,
+            "33": 2432405504.0,
+            "34": 2432405504.0,
+            "35": 2432405504.0,
+            "36": 2432405504.0,
+            "37": 2432405504.0,
+            "38": 2432405504.0,
+            "39": 2432405504.0,
+            "40": 2432405504.0,
+            "41": 2432405504.0,
+            "42": 2432405504.0,
+            "43": 2432405504.0,
+            "44": 2432405504.0,
+            "45": 2432405504.0,
+            "46": 2432405504.0,
+            "47": 2432405504.0,
+            "48": 2432405504.0,
+            "49": 2432405504.0,
+            "50": 2432405504.0,
+            "51": 2432405504.0,
+            "52": 2432405504.0,
+            "53": 2432405504.0,
+            "54": 2432405504.0,
+            "55": 2432405504.0,
+            "56": 2432405504.0,
+            "57": 2432405504.0,
+            "58": 2432405504.0,
+            "59": 2432405504.0,
+            "60": 2432405504.0,
+            "61": 2432405504.0,
+            "62": 2432405504.0,
+            "63": 2432405504.0,
+            "64": 2432405504.0,
+            "65": 2432405504.0,
+            "66": 2432405504.0,
+            "67": 2432405504.0,
+            "68": 2432405504.0,
+            "69": 2432405504.0,
+            "70": 2432405504.0,
+            "71": 2432405504.0,
+            "72": 2432405504.0,
+            "73": 2432405504.0,
+            "74": 2432405504.0,
+            "75": 2432405504.0,
+            "76": 2432405504.0,
+            "77": 2432405504.0,
+            "78": 2432405504.0,
+            "79": 2432405504.0,
+            "80": 2432405504.0,
+            "81": 2432405504.0,
+            "82": 2432405504.0,
+            "83": 2432405504.0,
+            "84": 2432405504.0,
+            "85": 2432405504.0,
+            "86": 2432405504.0,
+            "87": 2432405504.0,
+            "88": 2432405504.0,
+            "89": 2432405504.0,
+            "90": 2432405504.0,
+            "91": 2432405504.0,
+            "92": 2432405504.0,
+            "93": 2432405504.0,
+            "94": 2432405504.0,
+            "95": 2432405504.0,
+            "96": 2432405504.0,
+            "97": 2432405504.0,
+            "98": 2432405504.0,
+            "99": 2432405504.0,
+            "100": 2432405504.0
         }
     },
     "iteration-time": {
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 17.61957,
-            "2": 0.12347,
-            "3": 0.11094,
-            "4": 0.11482,
-            "5": 0.11141,
-            "6": 0.10928,
-            "7": 0.10905,
-            "8": 0.11026,
-            "9": 0.11003,
-            "10": 0.11095,
-            "11": 0.11002,
-            "12": 0.1122,
-            "13": 0.11472,
-            "14": 0.11511,
-            "15": 0.11073,
-            "16": 0.11228,
-            "17": 0.11342,
-            "18": 0.11197,
-            "19": 0.11062,
-            "20": 0.11097,
-            "21": 0.11081,
-            "22": 0.11379,
-            "23": 0.10968,
-            "24": 0.11083,
-            "25": 0.11649,
-            "26": 0.11043,
-            "27": 0.11175,
-            "28": 0.11122,
-            "29": 0.11218,
-            "30": 0.11261,
-            "31": 0.11314,
-            "32": 0.10971,
-            "33": 0.11028,
-            "34": 0.11149,
-            "35": 0.11122,
-            "36": 0.11079,
-            "37": 0.11188,
-            "38": 0.1115,
-            "39": 0.11238,
-            "40": 0.11528,
-            "41": 0.11165,
-            "42": 0.11137,
-            "43": 0.11139,
-            "44": 0.11074,
-            "45": 0.11141,
-            "46": 0.11158,
-            "47": 0.1105,
-            "48": 0.11128,
-            "49": 0.11164,
-            "50": 0.11572,
-            "51": 0.11625,
-            "52": 0.10969,
-            "53": 0.10904,
-            "54": 0.1098,
-            "55": 0.10896,
-            "56": 0.11225,
-            "57": 0.11301,
-            "58": 0.11047,
-            "59": 0.10959,
-            "60": 0.11005,
-            "61": 0.11018,
-            "62": 0.10831,
-            "63": 0.10997,
-            "64": 0.10896,
-            "65": 0.11116,
-            "66": 0.11148,
-            "67": 0.1092,
-            "68": 0.10947,
-            "69": 0.10933,
-            "70": 0.10869,
-            "71": 0.10873,
-            "72": 0.10849,
-            "73": 0.10872,
-            "74": 0.10951,
-            "75": 0.1119,
-            "76": 0.1109,
-            "77": 0.10896,
-            "78": 0.10963,
-            "79": 0.11057,
-            "80": 0.10858,
-            "81": 0.10732,
-            "82": 0.10824,
-            "83": 0.11006,
-            "84": 0.11062,
-            "85": 0.1096,
-            "86": 0.10933,
-            "87": 0.11001,
-            "88": 0.11053,
-            "89": 0.10899,
-            "90": 0.10989,
-            "91": 0.10903,
-            "92": 0.10959,
-            "93": 0.11185,
-            "94": 0.11166,
-            "95": 0.11067,
-            "96": 0.11183,
-            "97": 0.11136,
-            "98": 0.11022,
-            "99": 0.11091,
-            "100": 0.10951
+            "1": 12.06542,
+            "2": 0.1206,
+            "3": 0.10179,
+            "4": 0.08257,
+            "5": 0.08196,
+            "6": 0.08184,
+            "7": 0.08247,
+            "8": 0.08147,
+            "9": 0.08127,
+            "10": 0.08228,
+            "11": 0.0839,
+            "12": 0.08236,
+            "13": 0.08232,
+            "14": 0.08218,
+            "15": 0.08336,
+            "16": 0.08213,
+            "17": 0.08296,
+            "18": 0.0816,
+            "19": 0.08269,
+            "20": 0.08138,
+            "21": 0.08303,
+            "22": 0.08243,
+            "23": 0.08357,
+            "24": 0.08151,
+            "25": 0.08392,
+            "26": 0.08247,
+            "27": 0.08229,
+            "28": 0.08279,
+            "29": 0.08232,
+            "30": 0.0824,
+            "31": 0.08146,
+            "32": 0.08912,
+            "33": 0.08386,
+            "34": 0.08198,
+            "35": 0.08188,
+            "36": 0.08394,
+            "37": 0.08154,
+            "38": 0.08111,
+            "39": 0.08175,
+            "40": 0.08143,
+            "41": 0.08312,
+            "42": 0.08219,
+            "43": 0.08218,
+            "44": 0.08316,
+            "45": 0.08162,
+            "46": 0.08265,
+            "47": 0.08169,
+            "48": 0.08346,
+            "49": 0.08176,
+            "50": 0.08213,
+            "51": 0.09096,
+            "52": 0.08501,
+            "53": 0.08473,
+            "54": 0.08165,
+            "55": 0.08129,
+            "56": 0.08244,
+            "57": 0.08158,
+            "58": 0.08104,
+            "59": 0.08185,
+            "60": 0.0834,
+            "61": 0.08139,
+            "62": 0.08134,
+            "63": 0.086,
+            "64": 0.08155,
+            "65": 0.08326,
+            "66": 0.08135,
+            "67": 0.08434,
+            "68": 0.0817,
+            "69": 0.08297,
+            "70": 0.08039,
+            "71": 0.0801,
+            "72": 0.07962,
+            "73": 0.07979,
+            "74": 0.08099,
+            "75": 0.08004,
+            "76": 0.07961,
+            "77": 0.07959,
+            "78": 0.08021,
+            "79": 0.08102,
+            "80": 0.07949,
+            "81": 0.08018,
+            "82": 0.08014,
+            "83": 0.07929,
+            "84": 0.07992,
+            "85": 0.07982,
+            "86": 0.08024,
+            "87": 0.08054,
+            "88": 0.08161,
+            "89": 0.08084,
+            "90": 0.08079,
+            "91": 0.08239,
+            "92": 0.08091,
+            "93": 0.07966,
+            "94": 0.08301,
+            "95": 0.08124,
+            "96": 0.08066,
+            "97": 0.08098,
+            "98": 0.08072,
+            "99": 0.08164,
+            "100": 0.08106
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_h100_2nd.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_h100_2nd.json
new file mode 100644
index 00000000000..490e22e59f4
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_h100_2nd.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 9.85868,
+            "52": 9.74284,
+            "53": 10.06645,
+            "54": 9.95167,
+            "55": 9.88096,
+            "56": 9.62626,
+            "57": 9.47768,
+            "58": 9.83346,
+            "59": 9.58526,
+            "60": 9.50125,
+            "61": 9.69182,
+            "62": 9.98853,
+            "63": 9.38476,
+            "64": 9.7803,
+            "65": 8.94762,
+            "66": 9.70856,
+            "67": 9.36852,
+            "68": 9.78439,
+            "69": 9.79406,
+            "70": 9.74241,
+            "71": 9.61808,
+            "72": 9.58428,
+            "73": 9.5035,
+            "74": 8.94221,
+            "75": 9.42529,
+            "76": 9.07408,
+            "77": 10.06351,
+            "78": 9.7208,
+            "79": 9.37294,
+            "80": 9.40396,
+            "81": 9.48168,
+            "82": 9.69778,
+            "83": 9.30714,
+            "84": 9.41712,
+            "85": 9.61407,
+            "86": 9.07615,
+            "87": 9.59094,
+            "88": 9.74641,
+            "89": 9.59993,
+            "90": 9.8142,
+            "91": 9.33773,
+            "92": 9.35373,
+            "93": 9.07395,
+            "94": 8.83173,
+            "95": 9.51734,
+            "96": 9.52415,
+            "97": 9.30995,
+            "98": 9.66805,
+            "99": 8.88588,
+            "100": 9.39538
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 2607.0,
+            "52": 2618.0,
+            "53": 2828.0,
+            "54": 2730.0,
+            "55": 2351.0,
+            "56": 2753.0,
+            "57": 2323.0,
+            "58": 2809.0,
+            "59": 2721.0,
+            "60": 2440.0,
+            "61": 2875.0,
+            "62": 2726.0,
+            "63": 2444.0,
+            "64": 3001.0,
+            "65": 2602.0,
+            "66": 2981.0,
+            "67": 2676.0,
+            "68": 2623.0,
+            "69": 2802.0,
+            "70": 3234.0,
+            "71": 2902.0,
+            "72": 2337.0,
+            "73": 2856.0,
+            "74": 1903.0,
+            "75": 2388.0,
+            "76": 3118.0,
+            "77": 3108.0,
+            "78": 3122.0,
+            "79": 2994.0,
+            "80": 3186.0,
+            "81": 3470.0,
+            "82": 3164.0,
+            "83": 2726.0,
+            "84": 3214.0,
+            "85": 3262.0,
+            "86": 2602.0,
+            "87": 3658.0,
+            "88": 2906.0,
+            "89": 3054.0,
+            "90": 3018.0,
+            "91": 2690.0,
+            "92": 3106.0,
+            "93": 2701.0,
+            "94": 3263.0,
+            "95": 3426.0,
+            "96": 3405.0,
+            "97": 3087.0,
+            "98": 3510.0,
+            "99": 3148.0,
+            "100": 3204.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 694779392.0,
+            "52": 694779392.0,
+            "53": 694779392.0,
+            "54": 694779392.0,
+            "55": 694779392.0,
+            "56": 694779392.0,
+            "57": 694779392.0,
+            "58": 694779392.0,
+            "59": 694779392.0,
+            "60": 694779392.0,
+            "61": 694779392.0,
+            "62": 694779392.0,
+            "63": 694779392.0,
+            "64": 694779392.0,
+            "65": 694779392.0,
+            "66": 694779392.0,
+            "67": 694779392.0,
+            "68": 694779392.0,
+            "69": 694779392.0,
+            "70": 694779392.0,
+            "71": 694779392.0,
+            "72": 694779392.0,
+            "73": 694779392.0,
+            "74": 694779392.0,
+            "75": 694779392.0,
+            "76": 694779392.0,
+            "77": 694779392.0,
+            "78": 694779392.0,
+            "79": 694779392.0,
+            "80": 694779392.0,
+            "81": 694779392.0,
+            "82": 694779392.0,
+            "83": 694779392.0,
+            "84": 694779392.0,
+            "85": 694779392.0,
+            "86": 694779392.0,
+            "87": 694779392.0,
+            "88": 694779392.0,
+            "89": 694779392.0,
+            "90": 694779392.0,
+            "91": 694779392.0,
+            "92": 694779392.0,
+            "93": 694779392.0,
+            "94": 694779392.0,
+            "95": 694779392.0,
+            "96": 694779392.0,
+            "97": 694779392.0,
+            "98": 694779392.0,
+            "99": 694779392.0,
+            "100": 694779392.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 2574857216.0,
+            "52": 2574858240.0,
+            "53": 2574858240.0,
+            "54": 2574858240.0,
+            "55": 2574858240.0,
+            "56": 2574858240.0,
+            "57": 2574858240.0,
+            "58": 2574858240.0,
+            "59": 2574858240.0,
+            "60": 2574858240.0,
+            "61": 2574858240.0,
+            "62": 2574858240.0,
+            "63": 2574858240.0,
+            "64": 2574858240.0,
+            "65": 2574858240.0,
+            "66": 2574858240.0,
+            "67": 2574858240.0,
+            "68": 2574858240.0,
+            "69": 2574858240.0,
+            "70": 2574858240.0,
+            "71": 2574858240.0,
+            "72": 2574858240.0,
+            "73": 2574858240.0,
+            "74": 2574858240.0,
+            "75": 2574858240.0,
+            "76": 2574858240.0,
+            "77": 2574858240.0,
+            "78": 2574858240.0,
+            "79": 2574858240.0,
+            "80": 2574858240.0,
+            "81": 2574858240.0,
+            "82": 2574858240.0,
+            "83": 2574858240.0,
+            "84": 2574858240.0,
+            "85": 2574858240.0,
+            "86": 2574858240.0,
+            "87": 2574858240.0,
+            "88": 2574858240.0,
+            "89": 2574858240.0,
+            "90": 2574858240.0,
+            "91": 2574858240.0,
+            "92": 2574858240.0,
+            "93": 2574858240.0,
+            "94": 2574858240.0,
+            "95": 2574858240.0,
+            "96": 2574858240.0,
+            "97": 2574858240.0,
+            "98": 2574858240.0,
+            "99": 2574858240.0,
+            "100": 2574858240.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 11.89299,
+            "52": 0.11287,
+            "53": 0.08679,
+            "54": 0.08602,
+            "55": 0.0852,
+            "56": 0.08169,
+            "57": 0.08199,
+            "58": 0.08035,
+            "59": 0.07992,
+            "60": 0.08061,
+            "61": 0.0805,
+            "62": 0.08001,
+            "63": 0.08077,
+            "64": 0.08064,
+            "65": 0.08121,
+            "66": 0.08051,
+            "67": 0.08071,
+            "68": 0.08067,
+            "69": 0.08042,
+            "70": 0.08041,
+            "71": 0.0815,
+            "72": 0.08101,
+            "73": 0.08129,
+            "74": 0.08058,
+            "75": 0.08105,
+            "76": 0.08085,
+            "77": 0.08323,
+            "78": 0.08354,
+            "79": 0.08364,
+            "80": 0.08354,
+            "81": 0.08367,
+            "82": 0.08118,
+            "83": 0.08169,
+            "84": 0.08345,
+            "85": 0.08141,
+            "86": 0.08179,
+            "87": 0.08142,
+            "88": 0.0817,
+            "89": 0.08146,
+            "90": 0.50232,
+            "91": 0.08211,
+            "92": 0.08131,
+            "93": 0.08164,
+            "94": 0.08213,
+            "95": 0.08221,
+            "96": 0.08288,
+            "97": 0.08215,
+            "98": 0.08186,
+            "99": 0.08239,
+            "100": 0.08223
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgx_a100.json
index 5d20ab395ec..691a79fb9b0 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgx_a100.json
@@ -325,7 +325,7 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 2297829376.0,
+            "1": 2298877952.0,
             "2": 2439228416.0,
             "3": 2439228416.0,
             "4": 2439228416.0,
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 9.8604,
-            "2": 0.16953,
-            "3": 0.13987,
-            "4": 0.13824,
-            "5": 0.13775,
-            "6": 0.13549,
-            "7": 0.13611,
-            "8": 0.13584,
-            "9": 0.13626,
-            "10": 0.13922,
-            "11": 0.13526,
-            "12": 0.13455,
-            "13": 0.13222,
-            "14": 0.13324,
-            "15": 0.1325,
-            "16": 0.13211,
-            "17": 0.13198,
-            "18": 0.13145,
-            "19": 0.13207,
-            "20": 0.13182,
-            "21": 0.13297,
-            "22": 0.1322,
-            "23": 0.13275,
-            "24": 0.1319,
-            "25": 0.13822,
-            "26": 0.13214,
-            "27": 0.13169,
-            "28": 0.13196,
-            "29": 0.13229,
-            "30": 0.13285,
-            "31": 0.13112,
-            "32": 0.13222,
-            "33": 0.13056,
-            "34": 0.13076,
-            "35": 0.13218,
-            "36": 0.13126,
-            "37": 0.13091,
-            "38": 0.13048,
-            "39": 0.13082,
-            "40": 0.1308,
-            "41": 0.13202,
-            "42": 0.1314,
-            "43": 0.13222,
-            "44": 0.13074,
-            "45": 0.13237,
-            "46": 0.13272,
-            "47": 0.13239,
-            "48": 0.13266,
-            "49": 0.13226,
-            "50": 0.13164,
-            "51": 0.13425,
-            "52": 0.13044,
-            "53": 0.13037,
-            "54": 0.13007,
-            "55": 0.1301,
-            "56": 0.13001,
-            "57": 0.13054,
-            "58": 0.12972,
-            "59": 0.13049,
-            "60": 0.13042,
-            "61": 0.12903,
-            "62": 0.13042,
-            "63": 0.13104,
-            "64": 0.13008,
-            "65": 0.13158,
-            "66": 0.13091,
-            "67": 0.13089,
-            "68": 0.13084,
-            "69": 0.12903,
-            "70": 0.13015,
-            "71": 0.12957,
-            "72": 0.12997,
-            "73": 0.13025,
-            "74": 0.12989,
-            "75": 0.13018,
-            "76": 0.12962,
-            "77": 0.13065,
-            "78": 0.12915,
-            "79": 0.13007,
-            "80": 0.12972,
-            "81": 0.1301,
-            "82": 0.12927,
-            "83": 0.1302,
-            "84": 0.12991,
-            "85": 0.13129,
-            "86": 0.13063,
-            "87": 0.13028,
-            "88": 0.1305,
-            "89": 0.13046,
-            "90": 0.12991,
-            "91": 0.13058,
-            "92": 0.13044,
-            "93": 0.13009,
-            "94": 0.1306,
-            "95": 0.13082,
-            "96": 0.13068,
-            "97": 0.13403,
-            "98": 0.13199,
-            "99": 0.13191,
-            "100": 0.13014
+            "1": 5.78436,
+            "2": 0.15737,
+            "3": 0.15175,
+            "4": 0.13338,
+            "5": 0.13371,
+            "6": 0.13122,
+            "7": 0.13094,
+            "8": 0.13089,
+            "9": 0.13127,
+            "10": 0.1325,
+            "11": 0.13263,
+            "12": 0.13197,
+            "13": 0.1321,
+            "14": 0.13177,
+            "15": 0.13107,
+            "16": 0.13105,
+            "17": 0.13225,
+            "18": 0.13154,
+            "19": 0.13094,
+            "20": 0.13082,
+            "21": 0.13074,
+            "22": 0.13108,
+            "23": 0.13092,
+            "24": 0.13137,
+            "25": 0.13097,
+            "26": 0.13061,
+            "27": 0.13081,
+            "28": 0.13087,
+            "29": 0.13114,
+            "30": 0.1316,
+            "31": 0.13201,
+            "32": 0.13122,
+            "33": 0.13114,
+            "34": 0.13117,
+            "35": 0.13149,
+            "36": 0.13065,
+            "37": 0.13085,
+            "38": 0.13105,
+            "39": 0.13143,
+            "40": 0.13125,
+            "41": 0.13337,
+            "42": 0.13078,
+            "43": 0.13258,
+            "44": 0.13138,
+            "45": 0.13103,
+            "46": 0.13168,
+            "47": 0.13123,
+            "48": 0.13091,
+            "49": 0.13137,
+            "50": 0.13118,
+            "51": 0.13768,
+            "52": 0.13317,
+            "53": 0.1336,
+            "54": 0.1328,
+            "55": 0.13244,
+            "56": 0.13289,
+            "57": 0.13268,
+            "58": 0.13228,
+            "59": 0.13233,
+            "60": 0.13203,
+            "61": 0.13361,
+            "62": 0.13211,
+            "63": 0.13195,
+            "64": 0.13158,
+            "65": 0.13275,
+            "66": 0.13199,
+            "67": 0.13166,
+            "68": 0.13257,
+            "69": 0.13175,
+            "70": 0.13157,
+            "71": 0.13714,
+            "72": 0.13192,
+            "73": 0.13291,
+            "74": 0.13314,
+            "75": 0.13276,
+            "76": 0.13221,
+            "77": 0.13203,
+            "78": 0.13255,
+            "79": 0.13169,
+            "80": 0.13279,
+            "81": 0.13297,
+            "82": 0.13191,
+            "83": 0.13163,
+            "84": 0.13271,
+            "85": 0.13215,
+            "86": 0.13225,
+            "87": 0.13265,
+            "88": 0.13135,
+            "89": 0.13216,
+            "90": 0.13163,
+            "91": 0.1317,
+            "92": 0.13178,
+            "93": 0.13167,
+            "94": 0.13291,
+            "95": 0.13256,
+            "96": 0.13258,
+            "97": 0.13202,
+            "98": 0.13253,
+            "99": 0.13337,
+            "100": 0.13354
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgx_a100_2nd.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgx_a100_2nd.json
new file mode 100644
index 00000000000..56ff788b9ee
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgx_a100_2nd.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 9.83548,
+            "52": 9.72518,
+            "53": 10.04799,
+            "54": 9.93007,
+            "55": 9.86362,
+            "56": 9.60218,
+            "57": 9.45185,
+            "58": 9.80781,
+            "59": 9.56786,
+            "60": 9.47966,
+            "61": 9.67985,
+            "62": 9.9675,
+            "63": 9.35111,
+            "64": 9.75622,
+            "65": 8.93178,
+            "66": 9.68108,
+            "67": 9.35959,
+            "68": 9.76948,
+            "69": 9.77494,
+            "70": 9.71179,
+            "71": 9.60631,
+            "72": 9.57134,
+            "73": 9.48393,
+            "74": 8.92913,
+            "75": 9.4003,
+            "76": 9.07189,
+            "77": 10.05248,
+            "78": 9.71492,
+            "79": 9.35744,
+            "80": 9.38946,
+            "81": 9.46798,
+            "82": 9.68509,
+            "83": 9.29591,
+            "84": 9.40521,
+            "85": 9.60161,
+            "86": 9.06713,
+            "87": 9.58406,
+            "88": 9.73301,
+            "89": 9.59528,
+            "90": 9.80559,
+            "91": 9.32603,
+            "92": 9.3532,
+            "93": 9.06916,
+            "94": 8.82266,
+            "95": 9.50858,
+            "96": 9.51587,
+            "97": 9.29763,
+            "98": 9.66187,
+            "99": 8.87661,
+            "100": 9.39222
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 2554.0,
+            "52": 2619.0,
+            "53": 2863.0,
+            "54": 2688.0,
+            "55": 2406.0,
+            "56": 2649.0,
+            "57": 2175.0,
+            "58": 2856.0,
+            "59": 2775.0,
+            "60": 2307.0,
+            "61": 2914.0,
+            "62": 2644.0,
+            "63": 2362.0,
+            "64": 2946.0,
+            "65": 2578.0,
+            "66": 3122.0,
+            "67": 2697.0,
+            "68": 2687.0,
+            "69": 2956.0,
+            "70": 3157.0,
+            "71": 3028.0,
+            "72": 2294.0,
+            "73": 2876.0,
+            "74": 1887.0,
+            "75": 2523.0,
+            "76": 2937.0,
+            "77": 3162.0,
+            "78": 3318.0,
+            "79": 3074.0,
+            "80": 3213.0,
+            "81": 3664.0,
+            "82": 3238.0,
+            "83": 2838.0,
+            "84": 3251.0,
+            "85": 3275.0,
+            "86": 2748.0,
+            "87": 3758.0,
+            "88": 3023.0,
+            "89": 3267.0,
+            "90": 3085.0,
+            "91": 2812.0,
+            "92": 3116.0,
+            "93": 2665.0,
+            "94": 3380.0,
+            "95": 3236.0,
+            "96": 3462.0,
+            "97": 3002.0,
+            "98": 3545.0,
+            "99": 3265.0,
+            "100": 3458.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 665418240.0,
+            "52": 665418240.0,
+            "53": 665418240.0,
+            "54": 665418240.0,
+            "55": 665418240.0,
+            "56": 665418240.0,
+            "57": 665418240.0,
+            "58": 665418240.0,
+            "59": 665418240.0,
+            "60": 665418240.0,
+            "61": 665418240.0,
+            "62": 665418240.0,
+            "63": 665418240.0,
+            "64": 665418240.0,
+            "65": 665418240.0,
+            "66": 665418240.0,
+            "67": 665418240.0,
+            "68": 665418240.0,
+            "69": 665418240.0,
+            "70": 665418240.0,
+            "71": 665418240.0,
+            "72": 665418240.0,
+            "73": 665418240.0,
+            "74": 665418240.0,
+            "75": 665418240.0,
+            "76": 665418240.0,
+            "77": 665418240.0,
+            "78": 665418240.0,
+            "79": 665418240.0,
+            "80": 665418240.0,
+            "81": 665418240.0,
+            "82": 665418240.0,
+            "83": 665418240.0,
+            "84": 665418240.0,
+            "85": 665418240.0,
+            "86": 665418240.0,
+            "87": 665418240.0,
+            "88": 665418240.0,
+            "89": 665418240.0,
+            "90": 665418240.0,
+            "91": 665418240.0,
+            "92": 665418240.0,
+            "93": 665418240.0,
+            "94": 665418240.0,
+            "95": 665418240.0,
+            "96": 665418240.0,
+            "97": 665418240.0,
+            "98": 665418240.0,
+            "99": 665418240.0,
+            "100": 665418240.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 2584293376.0,
+            "52": 2584294400.0,
+            "53": 2584294400.0,
+            "54": 2584294400.0,
+            "55": 2584294400.0,
+            "56": 2584294400.0,
+            "57": 2584294400.0,
+            "58": 2584294400.0,
+            "59": 2584294400.0,
+            "60": 2584294400.0,
+            "61": 2584294400.0,
+            "62": 2584294400.0,
+            "63": 2584294400.0,
+            "64": 2584294400.0,
+            "65": 2584294400.0,
+            "66": 2584294400.0,
+            "67": 2584294400.0,
+            "68": 2584294400.0,
+            "69": 2584294400.0,
+            "70": 2584294400.0,
+            "71": 2584294400.0,
+            "72": 2584294400.0,
+            "73": 2584294400.0,
+            "74": 2584294400.0,
+            "75": 2584294400.0,
+            "76": 2584294400.0,
+            "77": 2584294400.0,
+            "78": 2584294400.0,
+            "79": 2584294400.0,
+            "80": 2584294400.0,
+            "81": 2584294400.0,
+            "82": 2584294400.0,
+            "83": 2584294400.0,
+            "84": 2584294400.0,
+            "85": 2584294400.0,
+            "86": 2584294400.0,
+            "87": 2584294400.0,
+            "88": 2584294400.0,
+            "89": 2584294400.0,
+            "90": 2584294400.0,
+            "91": 2584294400.0,
+            "92": 2584294400.0,
+            "93": 2584294400.0,
+            "94": 2584294400.0,
+            "95": 2584294400.0,
+            "96": 2584294400.0,
+            "97": 2584294400.0,
+            "98": 2584294400.0,
+            "99": 2584294400.0,
+            "100": 2584294400.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 5.37898,
+            "52": 0.16971,
+            "53": 0.14151,
+            "54": 0.1405,
+            "55": 0.13911,
+            "56": 0.13857,
+            "57": 0.13809,
+            "58": 0.13698,
+            "59": 0.13775,
+            "60": 0.13751,
+            "61": 0.1373,
+            "62": 0.13729,
+            "63": 0.13806,
+            "64": 0.13698,
+            "65": 0.13838,
+            "66": 0.13833,
+            "67": 0.13702,
+            "68": 0.13614,
+            "69": 0.13521,
+            "70": 0.13469,
+            "71": 0.13425,
+            "72": 0.13475,
+            "73": 0.13506,
+            "74": 0.13559,
+            "75": 0.13539,
+            "76": 0.13477,
+            "77": 0.13458,
+            "78": 0.13576,
+            "79": 0.13452,
+            "80": 0.13517,
+            "81": 0.13478,
+            "82": 0.13453,
+            "83": 0.13498,
+            "84": 0.13478,
+            "85": 0.13424,
+            "86": 0.13432,
+            "87": 0.1342,
+            "88": 0.13455,
+            "89": 0.13469,
+            "90": 0.13451,
+            "91": 0.13468,
+            "92": 0.13446,
+            "93": 0.1351,
+            "94": 0.13437,
+            "95": 0.13457,
+            "96": 0.13491,
+            "97": 0.13442,
+            "98": 0.13661,
+            "99": 0.13617,
+            "100": 0.13595
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_gb200.json
new file mode 100644
index 00000000000..ab954626b0e
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_gb200.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.82555,
+            "2": 10.83286,
+            "3": 10.82763,
+            "4": 10.79573,
+            "5": 10.85699,
+            "6": 10.8639,
+            "7": 10.82612,
+            "8": 10.82542,
+            "9": 10.83587,
+            "10": 10.79627,
+            "11": 10.87822,
+            "12": 10.85824,
+            "13": 10.85426,
+            "14": 10.87526,
+            "15": 10.79208,
+            "16": 10.80307,
+            "17": 10.77438,
+            "18": 10.80487,
+            "19": 10.79369,
+            "20": 10.69576,
+            "21": 10.68654,
+            "22": 10.53161,
+            "23": 10.70646,
+            "24": 10.57337,
+            "25": 10.51533,
+            "26": 10.5909,
+            "27": 10.60777,
+            "28": 10.57049,
+            "29": 10.58979,
+            "30": 10.34722,
+            "31": 10.07771,
+            "32": 10.46349,
+            "33": 10.45722,
+            "34": 10.19974,
+            "35": 10.25643,
+            "36": 10.21263,
+            "37": 10.34718,
+            "38": 10.18009,
+            "39": 10.40838,
+            "40": 10.07629,
+            "41": 10.1297,
+            "42": 10.2117,
+            "43": 9.81708,
+            "44": 9.94034,
+            "45": 9.81748,
+            "46": 9.80633,
+            "47": 10.12473,
+            "48": 9.84047,
+            "49": 9.51012,
+            "50": 9.88943,
+            "51": 9.84256,
+            "52": 9.72573,
+            "53": 10.05974,
+            "54": 9.95226,
+            "55": 9.88318,
+            "56": 9.61275,
+            "57": 9.46219,
+            "58": 9.8231,
+            "59": 9.57666,
+            "60": 9.48516,
+            "61": 9.67876,
+            "62": 9.97782,
+            "63": 9.36212,
+            "64": 9.75714,
+            "65": 8.93494,
+            "66": 9.69283,
+            "67": 9.36708,
+            "68": 9.78178,
+            "69": 9.79452,
+            "70": 9.72296,
+            "71": 9.62031,
+            "72": 9.56974,
+            "73": 9.48101,
+            "74": 8.91241,
+            "75": 9.40905,
+            "76": 9.06617,
+            "77": 10.05809,
+            "78": 9.72194,
+            "79": 9.36927,
+            "80": 9.40029,
+            "81": 9.47702,
+            "82": 9.69787,
+            "83": 9.30742,
+            "84": 9.41492,
+            "85": 9.61113,
+            "86": 9.07103,
+            "87": 9.5961,
+            "88": 9.74909,
+            "89": 9.59604,
+            "90": 9.82722,
+            "91": 9.33657,
+            "92": 9.35582,
+            "93": 9.08689,
+            "94": 8.82754,
+            "95": 9.53065,
+            "96": 9.5276,
+            "97": 9.30672,
+            "98": 9.66905,
+            "99": 8.89635,
+            "100": 9.40525
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1651.0,
+            "2": 1716.0,
+            "3": 1760.0,
+            "4": 1771.0,
+            "5": 1899.0,
+            "6": 1905.0,
+            "7": 1842.0,
+            "8": 1667.0,
+            "9": 1822.0,
+            "10": 1434.0,
+            "11": 1852.0,
+            "12": 1741.0,
+            "13": 1905.0,
+            "14": 1841.0,
+            "15": 1857.0,
+            "16": 1841.0,
+            "17": 1800.0,
+            "18": 1666.0,
+            "19": 1803.0,
+            "20": 1800.0,
+            "21": 1836.0,
+            "22": 1688.0,
+            "23": 1994.0,
+            "24": 1641.0,
+            "25": 1577.0,
+            "26": 1676.0,
+            "27": 1876.0,
+            "28": 1970.0,
+            "29": 1945.0,
+            "30": 1916.0,
+            "31": 1494.0,
+            "32": 1868.0,
+            "33": 2135.0,
+            "34": 1740.0,
+            "35": 1924.0,
+            "36": 1854.0,
+            "37": 2363.0,
+            "38": 2164.0,
+            "39": 2262.0,
+            "40": 2081.0,
+            "41": 2168.0,
+            "42": 2247.0,
+            "43": 2055.0,
+            "44": 2070.0,
+            "45": 1988.0,
+            "46": 2208.0,
+            "47": 2559.0,
+            "48": 2287.0,
+            "49": 2194.0,
+            "50": 2303.0,
+            "51": 2552.0,
+            "52": 2565.0,
+            "53": 2883.0,
+            "54": 2710.0,
+            "55": 2301.0,
+            "56": 2798.0,
+            "57": 2334.0,
+            "58": 2979.0,
+            "59": 2960.0,
+            "60": 2451.0,
+            "61": 2841.0,
+            "62": 2577.0,
+            "63": 2516.0,
+            "64": 2907.0,
+            "65": 2567.0,
+            "66": 2862.0,
+            "67": 2809.0,
+            "68": 2609.0,
+            "69": 2965.0,
+            "70": 2985.0,
+            "71": 2864.0,
+            "72": 2613.0,
+            "73": 3108.0,
+            "74": 2048.0,
+            "75": 2563.0,
+            "76": 3046.0,
+            "77": 3127.0,
+            "78": 2959.0,
+            "79": 3082.0,
+            "80": 3025.0,
+            "81": 3400.0,
+            "82": 3223.0,
+            "83": 2786.0,
+            "84": 3180.0,
+            "85": 3233.0,
+            "86": 2611.0,
+            "87": 3542.0,
+            "88": 3084.0,
+            "89": 3210.0,
+            "90": 3271.0,
+            "91": 2770.0,
+            "92": 3220.0,
+            "93": 2662.0,
+            "94": 3405.0,
+            "95": 3085.0,
+            "96": 3336.0,
+            "97": 3050.0,
+            "98": 3421.0,
+            "99": 3271.0,
+            "100": 3079.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 548399616.0,
+            "2": 548399616.0,
+            "3": 548399616.0,
+            "4": 548399616.0,
+            "5": 548399616.0,
+            "6": 548399616.0,
+            "7": 548399616.0,
+            "8": 548399616.0,
+            "9": 548399616.0,
+            "10": 548399616.0,
+            "11": 548399616.0,
+            "12": 548399616.0,
+            "13": 548399616.0,
+            "14": 548399616.0,
+            "15": 548399616.0,
+            "16": 548399616.0,
+            "17": 548399616.0,
+            "18": 548399616.0,
+            "19": 548399616.0,
+            "20": 548399616.0,
+            "21": 548399616.0,
+            "22": 548399616.0,
+            "23": 548399616.0,
+            "24": 548399616.0,
+            "25": 548399616.0,
+            "26": 548399616.0,
+            "27": 548399616.0,
+            "28": 548399616.0,
+            "29": 548399616.0,
+            "30": 548399616.0,
+            "31": 548399616.0,
+            "32": 548399616.0,
+            "33": 548399616.0,
+            "34": 548399616.0,
+            "35": 548399616.0,
+            "36": 548399616.0,
+            "37": 548399616.0,
+            "38": 548399616.0,
+            "39": 548399616.0,
+            "40": 548399616.0,
+            "41": 548399616.0,
+            "42": 548399616.0,
+            "43": 548399616.0,
+            "44": 548399616.0,
+            "45": 548399616.0,
+            "46": 548399616.0,
+            "47": 548399616.0,
+            "48": 548399616.0,
+            "49": 548399616.0,
+            "50": 548399616.0,
+            "51": 548399616.0,
+            "52": 548399616.0,
+            "53": 548399616.0,
+            "54": 548399616.0,
+            "55": 548399616.0,
+            "56": 548399616.0,
+            "57": 548399616.0,
+            "58": 548399616.0,
+            "59": 548399616.0,
+            "60": 548399616.0,
+            "61": 548399616.0,
+            "62": 548399616.0,
+            "63": 548399616.0,
+            "64": 548399616.0,
+            "65": 548399616.0,
+            "66": 548399616.0,
+            "67": 548399616.0,
+            "68": 548399616.0,
+            "69": 548399616.0,
+            "70": 548399616.0,
+            "71": 548399616.0,
+            "72": 548399616.0,
+            "73": 548399616.0,
+            "74": 548399616.0,
+            "75": 548399616.0,
+            "76": 548399616.0,
+            "77": 548399616.0,
+            "78": 548399616.0,
+            "79": 548399616.0,
+            "80": 548399616.0,
+            "81": 548399616.0,
+            "82": 548399616.0,
+            "83": 548399616.0,
+            "84": 548399616.0,
+            "85": 548399616.0,
+            "86": 548399616.0,
+            "87": 548399616.0,
+            "88": 548399616.0,
+            "89": 548399616.0,
+            "90": 548399616.0,
+            "91": 548399616.0,
+            "92": 548399616.0,
+            "93": 548399616.0,
+            "94": 548399616.0,
+            "95": 548399616.0,
+            "96": 548399616.0,
+            "97": 548399616.0,
+            "98": 548399616.0,
+            "99": 548399616.0,
+            "100": 548399616.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2325370880.0,
+            "2": 2466227200.0,
+            "3": 2466227200.0,
+            "4": 2466227200.0,
+            "5": 2466227200.0,
+            "6": 2466227200.0,
+            "7": 2466227200.0,
+            "8": 2466227200.0,
+            "9": 2466227200.0,
+            "10": 2466227200.0,
+            "11": 2466227200.0,
+            "12": 2466227200.0,
+            "13": 2466227200.0,
+            "14": 2466227200.0,
+            "15": 2466227200.0,
+            "16": 2466227200.0,
+            "17": 2466227200.0,
+            "18": 2466227200.0,
+            "19": 2466227200.0,
+            "20": 2466227200.0,
+            "21": 2466227200.0,
+            "22": 2466227200.0,
+            "23": 2466227200.0,
+            "24": 2466227200.0,
+            "25": 2466227200.0,
+            "26": 2466227200.0,
+            "27": 2466227200.0,
+            "28": 2466227200.0,
+            "29": 2466227200.0,
+            "30": 2466227200.0,
+            "31": 2466227200.0,
+            "32": 2466227200.0,
+            "33": 2466227200.0,
+            "34": 2466227200.0,
+            "35": 2466227200.0,
+            "36": 2466227200.0,
+            "37": 2466227200.0,
+            "38": 2466227200.0,
+            "39": 2466227200.0,
+            "40": 2466227200.0,
+            "41": 2466227200.0,
+            "42": 2466227200.0,
+            "43": 2466227200.0,
+            "44": 2466227200.0,
+            "45": 2466227200.0,
+            "46": 2466227200.0,
+            "47": 2466227200.0,
+            "48": 2466227200.0,
+            "49": 2466227200.0,
+            "50": 2466227200.0,
+            "51": 2466227200.0,
+            "52": 2466227200.0,
+            "53": 2466227200.0,
+            "54": 2466227200.0,
+            "55": 2466227200.0,
+            "56": 2466227200.0,
+            "57": 2466227200.0,
+            "58": 2466227200.0,
+            "59": 2466227200.0,
+            "60": 2466227200.0,
+            "61": 2466227200.0,
+            "62": 2466227200.0,
+            "63": 2466227200.0,
+            "64": 2466227200.0,
+            "65": 2466227200.0,
+            "66": 2466227200.0,
+            "67": 2466227200.0,
+            "68": 2466227200.0,
+            "69": 2466227200.0,
+            "70": 2466227200.0,
+            "71": 2466227200.0,
+            "72": 2466227200.0,
+            "73": 2466227200.0,
+            "74": 2466227200.0,
+            "75": 2466227200.0,
+            "76": 2466227200.0,
+            "77": 2466227200.0,
+            "78": 2466227200.0,
+            "79": 2466227200.0,
+            "80": 2466227200.0,
+            "81": 2466227200.0,
+            "82": 2466227200.0,
+            "83": 2466227200.0,
+            "84": 2466227200.0,
+            "85": 2466227200.0,
+            "86": 2466227200.0,
+            "87": 2466227200.0,
+            "88": 2466227200.0,
+            "89": 2466227200.0,
+            "90": 2466227200.0,
+            "91": 2466227200.0,
+            "92": 2466227200.0,
+            "93": 2466227200.0,
+            "94": 2466227200.0,
+            "95": 2466227200.0,
+            "96": 2466227200.0,
+            "97": 2466227200.0,
+            "98": 2466227200.0,
+            "99": 2466227200.0,
+            "100": 2466227200.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.76786,
+            "2": 0.13256,
+            "3": 0.26046,
+            "4": 0.11178,
+            "5": 0.24866,
+            "6": 0.1232,
+            "7": 0.21705,
+            "8": 0.25373,
+            "9": 0.10845,
+            "10": 0.24176,
+            "11": 0.32229,
+            "12": 0.16743,
+            "13": 0.27675,
+            "14": 0.10674,
+            "15": 0.23677,
+            "16": 0.24253,
+            "17": 0.1093,
+            "18": 0.10679,
+            "19": 0.10721,
+            "20": 0.25414,
+            "21": 0.21498,
+            "22": 0.10728,
+            "23": 0.10796,
+            "24": 0.12419,
+            "25": 0.11194,
+            "26": 0.10802,
+            "27": 0.36403,
+            "28": 0.10527,
+            "29": 0.10971,
+            "30": 0.10869,
+            "31": 0.25185,
+            "32": 0.20786,
+            "33": 0.1097,
+            "34": 0.10836,
+            "35": 0.23722,
+            "36": 0.12158,
+            "37": 0.1137,
+            "38": 0.10759,
+            "39": 0.2238,
+            "40": 0.23329,
+            "41": 0.20392,
+            "42": 0.10935,
+            "43": 0.11981,
+            "44": 0.11039,
+            "45": 0.10755,
+            "46": 0.10875,
+            "47": 0.22415,
+            "48": 0.11024,
+            "49": 0.47527,
+            "50": 0.11071,
+            "51": 0.21161,
+            "52": 0.10861,
+            "53": 0.10793,
+            "54": 0.24873,
+            "55": 0.21365,
+            "56": 0.1064,
+            "57": 0.20935,
+            "58": 0.24181,
+            "59": 0.14913,
+            "60": 0.10905,
+            "61": 0.20375,
+            "62": 0.20001,
+            "63": 0.20843,
+            "64": 0.11035,
+            "65": 0.23806,
+            "66": 0.11206,
+            "67": 0.10915,
+            "68": 0.22684,
+            "69": 0.10627,
+            "70": 0.24098,
+            "71": 0.20399,
+            "72": 0.1078,
+            "73": 0.1103,
+            "74": 0.11151,
+            "75": 0.11175,
+            "76": 0.11055,
+            "77": 0.10702,
+            "78": 0.11005,
+            "79": 0.11071,
+            "80": 0.11049,
+            "81": 0.54906,
+            "82": 0.10895,
+            "83": 0.23816,
+            "84": 0.11114,
+            "85": 0.10811,
+            "86": 0.11137,
+            "87": 0.11047,
+            "88": 0.22025,
+            "89": 0.22508,
+            "90": 0.10735,
+            "91": 0.21332,
+            "92": 0.23884,
+            "93": 0.10845,
+            "94": 0.10944,
+            "95": 0.22451,
+            "96": 0.10871,
+            "97": 0.28678,
+            "98": 0.11138,
+            "99": 0.11082,
+            "100": 0.11057
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json
index e45c3949555..852f0cf6ee6 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 10.90397,
-            "2": 0.16607,
-            "3": 0.13982,
-            "4": 0.14032,
-            "5": 0.13765,
-            "6": 0.13651,
-            "7": 0.13453,
-            "8": 0.13413,
-            "9": 0.13703,
-            "10": 0.13873,
-            "11": 0.28364,
-            "12": 0.13723,
-            "13": 0.13756,
-            "14": 0.1379,
-            "15": 0.14148,
-            "16": 0.1356,
-            "17": 0.13661,
-            "18": 0.13568,
-            "19": 0.13637,
-            "20": 0.1367,
-            "21": 0.28276,
-            "22": 0.13722,
-            "23": 0.13404,
-            "24": 0.13414,
-            "25": 0.1341,
-            "26": 0.13595,
+            "1": 12.07462,
+            "2": 0.1735,
+            "3": 0.1566,
+            "4": 0.13588,
+            "5": 0.13681,
+            "6": 0.13636,
+            "7": 0.13617,
+            "8": 0.13757,
+            "9": 0.13674,
+            "10": 0.13723,
+            "11": 0.30898,
+            "12": 0.13427,
+            "13": 0.13436,
+            "14": 0.13398,
+            "15": 0.1343,
+            "16": 0.13416,
+            "17": 0.13488,
+            "18": 0.13457,
+            "19": 0.1346,
+            "20": 0.13478,
+            "21": 0.27765,
+            "22": 0.13422,
+            "23": 0.13459,
+            "24": 0.1337,
+            "25": 0.13474,
+            "26": 0.13421,
             "27": 0.13446,
-            "28": 0.13477,
-            "29": 0.13439,
-            "30": 0.13383,
-            "31": 0.27955,
-            "32": 0.13416,
-            "33": 0.13472,
-            "34": 0.13383,
-            "35": 0.13499,
-            "36": 0.13468,
-            "37": 0.13332,
-            "38": 0.13449,
-            "39": 0.13488,
-            "40": 0.1347,
-            "41": 0.2818,
-            "42": 0.13497,
-            "43": 0.13495,
-            "44": 0.13372,
-            "45": 0.13385,
-            "46": 0.13479,
-            "47": 0.13339,
-            "48": 0.13334,
-            "49": 0.13393,
-            "50": 0.13346,
-            "51": 0.2815,
-            "52": 0.13492,
-            "53": 0.13387,
-            "54": 0.13407,
-            "55": 0.13263,
-            "56": 0.13379,
-            "57": 0.13439,
-            "58": 0.13407,
-            "59": 0.13481,
-            "60": 0.13407,
-            "61": 0.28073,
-            "62": 0.13474,
-            "63": 0.13363,
-            "64": 0.13359,
-            "65": 0.13323,
-            "66": 0.13437,
-            "67": 0.13391,
-            "68": 0.13344,
-            "69": 0.21561,
-            "70": 0.1337,
-            "71": 0.27778,
-            "72": 0.13359,
-            "73": 0.13364,
-            "74": 0.13406,
-            "75": 0.13376,
-            "76": 0.13308,
-            "77": 0.13263,
-            "78": 0.13172,
-            "79": 0.13328,
-            "80": 0.13387,
-            "81": 0.28018,
-            "82": 0.13437,
-            "83": 0.13645,
-            "84": 0.13548,
-            "85": 0.13558,
-            "86": 0.13447,
-            "87": 0.13492,
-            "88": 0.13361,
-            "89": 0.13427,
-            "90": 0.13332,
-            "91": 0.27771,
-            "92": 0.13375,
-            "93": 0.1331,
-            "94": 0.13317,
-            "95": 0.13408,
-            "96": 0.13418,
-            "97": 0.13752,
-            "98": 0.13493,
-            "99": 0.13408,
-            "100": 0.13136
+            "28": 0.13381,
+            "29": 0.134,
+            "30": 0.13373,
+            "31": 0.27812,
+            "32": 0.13383,
+            "33": 0.13406,
+            "34": 0.13341,
+            "35": 0.13501,
+            "36": 0.13349,
+            "37": 0.13319,
+            "38": 0.13345,
+            "39": 0.13383,
+            "40": 0.13285,
+            "41": 0.29258,
+            "42": 0.13394,
+            "43": 0.13373,
+            "44": 0.13332,
+            "45": 0.13359,
+            "46": 0.13504,
+            "47": 0.13407,
+            "48": 0.13352,
+            "49": 0.13439,
+            "50": 0.1334,
+            "51": 0.28209,
+            "52": 0.13691,
+            "53": 0.13662,
+            "54": 0.13717,
+            "55": 0.13691,
+            "56": 0.13684,
+            "57": 0.13847,
+            "58": 0.13658,
+            "59": 0.13753,
+            "60": 0.13745,
+            "61": 0.30258,
+            "62": 0.13813,
+            "63": 0.14191,
+            "64": 0.13802,
+            "65": 0.13764,
+            "66": 0.13783,
+            "67": 0.13952,
+            "68": 0.13799,
+            "69": 0.13795,
+            "70": 0.13735,
+            "71": 0.30569,
+            "72": 0.13924,
+            "73": 0.1384,
+            "74": 0.13859,
+            "75": 0.13793,
+            "76": 0.13693,
+            "77": 0.13831,
+            "78": 0.13768,
+            "79": 0.1392,
+            "80": 0.13806,
+            "81": 0.30792,
+            "82": 0.1386,
+            "83": 0.13782,
+            "84": 0.13746,
+            "85": 0.13781,
+            "86": 0.13783,
+            "87": 0.13772,
+            "88": 0.13728,
+            "89": 0.13847,
+            "90": 0.13748,
+            "91": 0.31327,
+            "92": 0.13717,
+            "93": 0.138,
+            "94": 0.13824,
+            "95": 0.13692,
+            "96": 0.13681,
+            "97": 0.138,
+            "98": 0.13737,
+            "99": 0.13804,
+            "100": 0.13722
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied/golden_values_dev_dgx_gb200.json
new file mode 100644
index 00000000000..6785ccf3405
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied/golden_values_dev_dgx_gb200.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.96249,
+            "2": 10.97263,
+            "3": 10.95604,
+            "4": 10.99185,
+            "5": 10.94911,
+            "6": 10.94586,
+            "7": 10.97227,
+            "8": 10.96531,
+            "9": 10.95864,
+            "10": 10.96522,
+            "11": 10.92975,
+            "12": 10.93134,
+            "13": 10.93843,
+            "14": 10.93051,
+            "15": 10.92149,
+            "16": 10.91358,
+            "17": 10.89583,
+            "18": 10.88315,
+            "19": 10.88739,
+            "20": 10.81664,
+            "21": 10.77565,
+            "22": 10.71731,
+            "23": 10.77156,
+            "24": 10.70856,
+            "25": 10.67765,
+            "26": 10.70309,
+            "27": 10.69021,
+            "28": 10.62094,
+            "29": 10.61335,
+            "30": 10.46967,
+            "31": 10.2743,
+            "32": 10.52078,
+            "33": 10.51563,
+            "34": 10.3085,
+            "35": 10.35579,
+            "36": 10.31814,
+            "37": 10.39823,
+            "38": 10.26329,
+            "39": 10.44238,
+            "40": 10.17104,
+            "41": 10.20058,
+            "42": 10.26164,
+            "43": 9.9303,
+            "44": 10.02911,
+            "45": 9.9202,
+            "46": 9.88631,
+            "47": 10.18638,
+            "48": 9.90626,
+            "49": 9.60031,
+            "50": 9.96555,
+            "51": 9.89946,
+            "52": 9.78501,
+            "53": 10.1053,
+            "54": 9.98473,
+            "55": 9.90831,
+            "56": 9.65981,
+            "57": 9.52396,
+            "58": 9.87215,
+            "59": 9.6169,
+            "60": 9.54609,
+            "61": 9.7001,
+            "62": 9.99569,
+            "63": 9.41669,
+            "64": 9.79572,
+            "65": 8.97339,
+            "66": 9.72409,
+            "67": 9.38538,
+            "68": 9.79899,
+            "69": 9.80931,
+            "70": 9.76598,
+            "71": 9.63141,
+            "72": 9.59357,
+            "73": 9.51102,
+            "74": 8.95643,
+            "75": 9.42625,
+            "76": 9.11036,
+            "77": 10.06643,
+            "78": 9.72178,
+            "79": 9.39646,
+            "80": 9.40915,
+            "81": 9.49577,
+            "82": 9.69623,
+            "83": 9.33227,
+            "84": 9.43138,
+            "85": 9.62886,
+            "86": 9.06094,
+            "87": 9.60054,
+            "88": 9.77282,
+            "89": 9.61807,
+            "90": 9.824,
+            "91": 9.3519,
+            "92": 9.37754,
+            "93": 9.09307,
+            "94": 8.83497,
+            "95": 9.52251,
+            "96": 9.53024,
+            "97": 9.32185,
+            "98": 9.68444,
+            "99": 8.8844,
+            "100": 9.4165
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 22727740.0,
+            "2": 22924404.0,
+            "3": 22597002.0,
+            "4": 23219532.0,
+            "5": 22715420.0,
+            "6": 23021500.0,
+            "7": 22771380.0,
+            "8": 22926852.0,
+            "9": 22841780.0,
+            "10": 22917780.0,
+            "11": 22500516.0,
+            "12": 22459810.0,
+            "13": 22917164.0,
+            "14": 22388968.0,
+            "15": 22821358.0,
+            "16": 22831192.0,
+            "17": 22819736.0,
+            "18": 22582350.0,
+            "19": 22618104.0,
+            "20": 22693400.0,
+            "21": 22739610.0,
+            "22": 22800008.0,
+            "23": 22538272.0,
+            "24": 22771352.0,
+            "25": 22819066.0,
+            "26": 22547720.0,
+            "27": 22469212.0,
+            "28": 22453960.0,
+            "29": 22529656.0,
+            "30": 22630960.0,
+            "31": 22955540.0,
+            "32": 22584916.0,
+            "33": 22558336.0,
+            "34": 22835478.0,
+            "35": 22787746.0,
+            "36": 22589468.0,
+            "37": 22496828.0,
+            "38": 22896094.0,
+            "39": 22802714.0,
+            "40": 22657992.0,
+            "41": 22659460.0,
+            "42": 22667202.0,
+            "43": 22977092.0,
+            "44": 22746836.0,
+            "45": 22675370.0,
+            "46": 22884172.0,
+            "47": 22633868.0,
+            "48": 22928116.0,
+            "49": 22727456.0,
+            "50": 22904148.0,
+            "51": 22792094.0,
+            "52": 22748864.0,
+            "53": 22925208.0,
+            "54": 22840064.0,
+            "55": 22518576.0,
+            "56": 22877644.0,
+            "57": 23113416.0,
+            "58": 22845068.0,
+            "59": 22715704.0,
+            "60": 22743324.0,
+            "61": 22723260.0,
+            "62": 22672600.0,
+            "63": 22846484.0,
+            "64": 22822992.0,
+            "65": 23061634.0,
+            "66": 22729736.0,
+            "67": 22908874.0,
+            "68": 22610620.0,
+            "69": 22583304.0,
+            "70": 22828816.0,
+            "71": 22748974.0,
+            "72": 22654840.0,
+            "73": 22741132.0,
+            "74": 23047902.0,
+            "75": 23054368.0,
+            "76": 22901688.0,
+            "77": 22272290.0,
+            "78": 22789530.0,
+            "79": 22743876.0,
+            "80": 22706184.0,
+            "81": 22891292.0,
+            "82": 22778490.0,
+            "83": 22839152.0,
+            "84": 23009710.0,
+            "85": 22711788.0,
+            "86": 23103398.0,
+            "87": 22735162.0,
+            "88": 22637356.0,
+            "89": 22498244.0,
+            "90": 22972336.0,
+            "91": 22767438.0,
+            "92": 22808640.0,
+            "93": 22658540.0,
+            "94": 22912524.0,
+            "95": 23048146.0,
+            "96": 22828804.0,
+            "97": 22608672.0,
+            "98": 22763072.0,
+            "99": 22906218.0,
+            "100": 23015634.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 548399616.0,
+            "2": 548399616.0,
+            "3": 548399616.0,
+            "4": 548399616.0,
+            "5": 548399616.0,
+            "6": 548399616.0,
+            "7": 548399616.0,
+            "8": 548399616.0,
+            "9": 548399616.0,
+            "10": 548399616.0,
+            "11": 548399616.0,
+            "12": 548399616.0,
+            "13": 548399616.0,
+            "14": 548399616.0,
+            "15": 548399616.0,
+            "16": 548399616.0,
+            "17": 548399616.0,
+            "18": 548399616.0,
+            "19": 548399616.0,
+            "20": 548399616.0,
+            "21": 548399616.0,
+            "22": 548399616.0,
+            "23": 548399616.0,
+            "24": 548399616.0,
+            "25": 548399616.0,
+            "26": 548399616.0,
+            "27": 548399616.0,
+            "28": 548399616.0,
+            "29": 548399616.0,
+            "30": 548399616.0,
+            "31": 548399616.0,
+            "32": 548399616.0,
+            "33": 548399616.0,
+            "34": 548399616.0,
+            "35": 548399616.0,
+            "36": 548399616.0,
+            "37": 548399616.0,
+            "38": 548399616.0,
+            "39": 548399616.0,
+            "40": 548399616.0,
+            "41": 548399616.0,
+            "42": 548399616.0,
+            "43": 548399616.0,
+            "44": 548399616.0,
+            "45": 548399616.0,
+            "46": 548399616.0,
+            "47": 548399616.0,
+            "48": 548399616.0,
+            "49": 548399616.0,
+            "50": 548399616.0,
+            "51": 548399616.0,
+            "52": 548399616.0,
+            "53": 548399616.0,
+            "54": 548399616.0,
+            "55": 548399616.0,
+            "56": 548399616.0,
+            "57": 548399616.0,
+            "58": 548399616.0,
+            "59": 548399616.0,
+            "60": 548399616.0,
+            "61": 548399616.0,
+            "62": 548399616.0,
+            "63": 548399616.0,
+            "64": 548399616.0,
+            "65": 548399616.0,
+            "66": 548399616.0,
+            "67": 548399616.0,
+            "68": 548399616.0,
+            "69": 548399616.0,
+            "70": 548399616.0,
+            "71": 548399616.0,
+            "72": 548399616.0,
+            "73": 548399616.0,
+            "74": 548399616.0,
+            "75": 548399616.0,
+            "76": 548399616.0,
+            "77": 548399616.0,
+            "78": 548399616.0,
+            "79": 548399616.0,
+            "80": 548399616.0,
+            "81": 548399616.0,
+            "82": 548399616.0,
+            "83": 548399616.0,
+            "84": 548399616.0,
+            "85": 548399616.0,
+            "86": 548399616.0,
+            "87": 548399616.0,
+            "88": 548399616.0,
+            "89": 548399616.0,
+            "90": 548399616.0,
+            "91": 548399616.0,
+            "92": 548399616.0,
+            "93": 548399616.0,
+            "94": 548399616.0,
+            "95": 548399616.0,
+            "96": 548399616.0,
+            "97": 548399616.0,
+            "98": 548399616.0,
+            "99": 548399616.0,
+            "100": 548399616.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2325370880.0,
+            "2": 2466227200.0,
+            "3": 2466227200.0,
+            "4": 2466227200.0,
+            "5": 2466227200.0,
+            "6": 2466227200.0,
+            "7": 2466227200.0,
+            "8": 2466227200.0,
+            "9": 2466227200.0,
+            "10": 2466227200.0,
+            "11": 2466227200.0,
+            "12": 2466227200.0,
+            "13": 2466227200.0,
+            "14": 2466227200.0,
+            "15": 2466227200.0,
+            "16": 2466227200.0,
+            "17": 2466227200.0,
+            "18": 2466227200.0,
+            "19": 2466227200.0,
+            "20": 2466227200.0,
+            "21": 2466227200.0,
+            "22": 2466227200.0,
+            "23": 2466227200.0,
+            "24": 2466227200.0,
+            "25": 2466227200.0,
+            "26": 2466227200.0,
+            "27": 2466227200.0,
+            "28": 2466227200.0,
+            "29": 2466227200.0,
+            "30": 2466227200.0,
+            "31": 2466227200.0,
+            "32": 2466227200.0,
+            "33": 2466227200.0,
+            "34": 2466227200.0,
+            "35": 2466227200.0,
+            "36": 2466227200.0,
+            "37": 2466227200.0,
+            "38": 2466227200.0,
+            "39": 2466227200.0,
+            "40": 2466227200.0,
+            "41": 2466227200.0,
+            "42": 2466227200.0,
+            "43": 2466227200.0,
+            "44": 2466227200.0,
+            "45": 2466227200.0,
+            "46": 2466227200.0,
+            "47": 2466227200.0,
+            "48": 2466227200.0,
+            "49": 2466227200.0,
+            "50": 2466227200.0,
+            "51": 2466227200.0,
+            "52": 2466227200.0,
+            "53": 2466227200.0,
+            "54": 2466227200.0,
+            "55": 2466227200.0,
+            "56": 2466227200.0,
+            "57": 2466227200.0,
+            "58": 2466227200.0,
+            "59": 2466227200.0,
+            "60": 2466227200.0,
+            "61": 2466227200.0,
+            "62": 2466227200.0,
+            "63": 2466227200.0,
+            "64": 2466227200.0,
+            "65": 2466227200.0,
+            "66": 2466227200.0,
+            "67": 2466227200.0,
+            "68": 2466227200.0,
+            "69": 2466227200.0,
+            "70": 2466227200.0,
+            "71": 2466227200.0,
+            "72": 2466227200.0,
+            "73": 2466227200.0,
+            "74": 2466227200.0,
+            "75": 2466227200.0,
+            "76": 2466227200.0,
+            "77": 2466227200.0,
+            "78": 2466227200.0,
+            "79": 2466227200.0,
+            "80": 2466227200.0,
+            "81": 2466227200.0,
+            "82": 2466227200.0,
+            "83": 2466227200.0,
+            "84": 2466227200.0,
+            "85": 2466227200.0,
+            "86": 2466227200.0,
+            "87": 2466227200.0,
+            "88": 2466227200.0,
+            "89": 2466227200.0,
+            "90": 2466227200.0,
+            "91": 2466227200.0,
+            "92": 2466227200.0,
+            "93": 2466227200.0,
+            "94": 2466227200.0,
+            "95": 2466227200.0,
+            "96": 2466227200.0,
+            "97": 2466227200.0,
+            "98": 2466227200.0,
+            "99": 2466227200.0,
+            "100": 2466227200.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.43543,
+            "2": 0.13665,
+            "3": 0.25165,
+            "4": 0.12154,
+            "5": 0.12485,
+            "6": 0.12299,
+            "7": 0.15052,
+            "8": 0.1169,
+            "9": 0.22995,
+            "10": 0.12843,
+            "11": 0.12174,
+            "12": 0.12421,
+            "13": 0.22106,
+            "14": 0.15546,
+            "15": 0.12535,
+            "16": 0.12448,
+            "17": 0.12283,
+            "18": 0.12251,
+            "19": 0.12247,
+            "20": 0.12198,
+            "21": 0.12007,
+            "22": 0.22391,
+            "23": 0.12977,
+            "24": 0.12857,
+            "25": 0.24314,
+            "26": 0.13193,
+            "27": 0.12813,
+            "28": 0.12875,
+            "29": 0.22448,
+            "30": 0.12465,
+            "31": 0.23898,
+            "32": 0.12577,
+            "33": 0.12563,
+            "34": 0.12562,
+            "35": 0.15646,
+            "36": 0.12633,
+            "37": 0.12485,
+            "38": 0.21163,
+            "39": 0.13978,
+            "40": 0.12472,
+            "41": 0.12409,
+            "42": 0.12462,
+            "43": 0.12837,
+            "44": 0.12431,
+            "45": 0.12445,
+            "46": 0.23272,
+            "47": 0.12786,
+            "48": 0.12842,
+            "49": 0.22766,
+            "50": 0.1262,
+            "51": 0.13206,
+            "52": 0.21451,
+            "53": 0.13634,
+            "54": 0.11899,
+            "55": 0.12242,
+            "56": 0.24089,
+            "57": 0.12507,
+            "58": 0.12886,
+            "59": 0.1281,
+            "60": 0.22921,
+            "61": 0.13825,
+            "62": 0.22494,
+            "63": 0.27913,
+            "64": 0.16101,
+            "65": 0.27886,
+            "66": 0.13864,
+            "67": 0.21998,
+            "68": 0.1264,
+            "69": 0.12091,
+            "70": 0.22463,
+            "71": 0.12416,
+            "72": 0.17663,
+            "73": 0.12113,
+            "74": 0.12227,
+            "75": 0.21518,
+            "76": 0.11973,
+            "77": 0.15395,
+            "78": 0.19544,
+            "79": 0.23282,
+            "80": 0.23167,
+            "81": 0.12293,
+            "82": 0.23426,
+            "83": 0.23926,
+            "84": 0.12806,
+            "85": 0.12027,
+            "86": 0.23455,
+            "87": 0.12541,
+            "88": 0.1208,
+            "89": 0.11759,
+            "90": 0.11849,
+            "91": 0.24522,
+            "92": 0.1157,
+            "93": 0.23994,
+            "94": 0.12794,
+            "95": 0.18044,
+            "96": 0.30003,
+            "97": 0.12202,
+            "98": 0.1229,
+            "99": 0.12193,
+            "100": 0.23044
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied/golden_values_lts_dgx_a100.json
index 7848ef42dd8..65edeb55e3d 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied/golden_values_lts_dgx_a100.json
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 13.44016,
-            "2": 0.17357,
-            "3": 0.14155,
-            "4": 0.14433,
-            "5": 0.14312,
-            "6": 0.14041,
-            "7": 0.14082,
-            "8": 0.13921,
-            "9": 0.1399,
-            "10": 0.13856,
-            "11": 0.13995,
-            "12": 0.13864,
-            "13": 0.13803,
-            "14": 0.13783,
-            "15": 0.13752,
-            "16": 0.13882,
-            "17": 0.13834,
-            "18": 0.13863,
-            "19": 0.13872,
-            "20": 0.1384,
-            "21": 0.13424,
-            "22": 0.13105,
-            "23": 0.13094,
-            "24": 0.1307,
-            "25": 0.13252,
-            "26": 0.13172,
-            "27": 0.12995,
-            "28": 0.13015,
-            "29": 0.13002,
-            "30": 0.13019,
-            "31": 0.13071,
-            "32": 0.13106,
-            "33": 0.1305,
-            "34": 0.13023,
-            "35": 0.13178,
-            "36": 0.13167,
-            "37": 0.13002,
-            "38": 0.13094,
-            "39": 0.13093,
-            "40": 0.13167,
-            "41": 0.13178,
-            "42": 0.13107,
-            "43": 0.1328,
-            "44": 0.13048,
-            "45": 0.13046,
-            "46": 0.13126,
-            "47": 0.12901,
-            "48": 0.12854,
-            "49": 0.12862,
-            "50": 0.12918,
-            "51": 0.14204,
-            "52": 0.13766,
-            "53": 0.13573,
-            "54": 0.13601,
-            "55": 0.13392,
-            "56": 0.13591,
-            "57": 0.13683,
-            "58": 0.13487,
-            "59": 0.13645,
-            "60": 0.13627,
-            "61": 0.13507,
-            "62": 0.13578,
-            "63": 0.13619,
-            "64": 0.13556,
-            "65": 0.13673,
-            "66": 0.13706,
-            "67": 0.13535,
-            "68": 0.13581,
-            "69": 0.1342,
-            "70": 0.13519,
-            "71": 0.13563,
-            "72": 0.13553,
-            "73": 0.13626,
-            "74": 0.13636,
-            "75": 0.1351,
-            "76": 0.13531,
-            "77": 0.1341,
-            "78": 0.13121,
-            "79": 0.13164,
-            "80": 0.1338,
-            "81": 0.13214,
-            "82": 0.13227,
-            "83": 0.13301,
-            "84": 0.13291,
-            "85": 0.13384,
-            "86": 0.13276,
-            "87": 0.13499,
-            "88": 0.13549,
-            "89": 0.13554,
-            "90": 0.13505,
-            "91": 0.13486,
-            "92": 0.13406,
-            "93": 0.13522,
-            "94": 0.13615,
-            "95": 0.1365,
-            "96": 0.13586,
-            "97": 0.13623,
-            "98": 0.13603,
-            "99": 0.13615,
-            "100": 0.13526
+            "1": 7.21369,
+            "2": 0.1831,
+            "3": 0.15682,
+            "4": 0.14056,
+            "5": 0.13853,
+            "6": 0.13587,
+            "7": 0.13515,
+            "8": 0.13475,
+            "9": 0.13511,
+            "10": 0.13623,
+            "11": 0.13495,
+            "12": 0.13604,
+            "13": 0.13619,
+            "14": 0.13493,
+            "15": 0.13654,
+            "16": 0.135,
+            "17": 0.13441,
+            "18": 0.13422,
+            "19": 0.13368,
+            "20": 0.13434,
+            "21": 0.13405,
+            "22": 0.13547,
+            "23": 0.13766,
+            "24": 0.14005,
+            "25": 0.1397,
+            "26": 0.13807,
+            "27": 0.13719,
+            "28": 0.13707,
+            "29": 0.1384,
+            "30": 0.13799,
+            "31": 0.13774,
+            "32": 0.13838,
+            "33": 0.13846,
+            "34": 0.13735,
+            "35": 0.1399,
+            "36": 0.13989,
+            "37": 0.13915,
+            "38": 0.1394,
+            "39": 0.14001,
+            "40": 0.13993,
+            "41": 0.13938,
+            "42": 0.14004,
+            "43": 0.14041,
+            "44": 0.14062,
+            "45": 0.13996,
+            "46": 0.14021,
+            "47": 0.14,
+            "48": 0.13971,
+            "49": 0.13941,
+            "50": 0.13887,
+            "51": 0.14225,
+            "52": 0.13981,
+            "53": 0.13886,
+            "54": 0.13925,
+            "55": 0.141,
+            "56": 0.13843,
+            "57": 0.14096,
+            "58": 0.13853,
+            "59": 0.13902,
+            "60": 0.13975,
+            "61": 0.13772,
+            "62": 0.13889,
+            "63": 0.1372,
+            "64": 0.13725,
+            "65": 0.13793,
+            "66": 0.13913,
+            "67": 0.13885,
+            "68": 0.13752,
+            "69": 0.13831,
+            "70": 0.13735,
+            "71": 0.13736,
+            "72": 0.13847,
+            "73": 0.13902,
+            "74": 0.13786,
+            "75": 0.1382,
+            "76": 0.13854,
+            "77": 0.13828,
+            "78": 0.13847,
+            "79": 0.13887,
+            "80": 0.13758,
+            "81": 0.13798,
+            "82": 0.13775,
+            "83": 0.13914,
+            "84": 0.13872,
+            "85": 0.13875,
+            "86": 0.13942,
+            "87": 0.13828,
+            "88": 0.1378,
+            "89": 0.13834,
+            "90": 0.1384,
+            "91": 0.13837,
+            "92": 0.13872,
+            "93": 0.13843,
+            "94": 0.13831,
+            "95": 0.13887,
+            "96": 0.13825,
+            "97": 0.13822,
+            "98": 0.13872,
+            "99": 0.13922,
+            "100": 0.13751
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap/golden_values_dev_dgx_gb200.json
new file mode 100644
index 00000000000..2999f912c8f
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap/golden_values_dev_dgx_gb200.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.81847,
+            "2": 10.81935,
+            "3": 10.83689,
+            "4": 10.83432,
+            "5": 10.84975,
+            "6": 10.83477,
+            "7": 10.82465,
+            "8": 10.81547,
+            "9": 10.87712,
+            "10": 10.88236,
+            "11": 10.87197,
+            "12": 10.82476,
+            "13": 10.84812,
+            "14": 10.81966,
+            "15": 10.80548,
+            "16": 10.80144,
+            "17": 10.77232,
+            "18": 10.78639,
+            "19": 10.74499,
+            "20": 10.62485,
+            "21": 10.68096,
+            "22": 10.65118,
+            "23": 10.76355,
+            "24": 10.61936,
+            "25": 10.46094,
+            "26": 10.59639,
+            "27": 10.54041,
+            "28": 10.44451,
+            "29": 10.39564,
+            "30": 10.40393,
+            "31": 10.51276,
+            "32": 10.32147,
+            "33": 10.26365,
+            "34": 10.46889,
+            "35": 9.96002,
+            "36": 10.11577,
+            "37": 10.0112,
+            "38": 10.38367,
+            "39": 9.78625,
+            "40": 10.10474,
+            "41": 10.13172,
+            "42": 10.02873,
+            "43": 10.20988,
+            "44": 10.07363,
+            "45": 9.69403,
+            "46": 9.99615,
+            "47": 9.93462,
+            "48": 9.6742,
+            "49": 9.91778,
+            "50": 9.93162,
+            "51": 9.80504,
+            "52": 9.32627,
+            "53": 9.6594,
+            "54": 9.87232,
+            "55": 9.99774,
+            "56": 9.83023,
+            "57": 9.75542,
+            "58": 9.82528,
+            "59": 9.32819,
+            "60": 9.35425,
+            "61": 9.44562,
+            "62": 10.20265,
+            "63": 9.362,
+            "64": 9.63412,
+            "65": 9.71326,
+            "66": 9.53682,
+            "67": 9.67365,
+            "68": 9.5994,
+            "69": 9.38537,
+            "70": 9.75361,
+            "71": 9.88632,
+            "72": 9.70683,
+            "73": 9.40123,
+            "74": 9.44529,
+            "75": 8.96867,
+            "76": 9.57975,
+            "77": 9.62562,
+            "78": 9.40252,
+            "79": 9.54279,
+            "80": 9.32635,
+            "81": 9.70785,
+            "82": 9.91615,
+            "83": 9.33512,
+            "84": 9.47626,
+            "85": 8.98192,
+            "86": 9.67249,
+            "87": 9.44309,
+            "88": 9.59721,
+            "89": 9.53706,
+            "90": 9.56301,
+            "91": 9.63798,
+            "92": 9.14066,
+            "93": 9.4357,
+            "94": 9.55795,
+            "95": 9.14422,
+            "96": 8.77023,
+            "97": 9.58717,
+            "98": 9.79488,
+            "99": 9.38629,
+            "100": 9.21781
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1093.0,
+            "2": 1211.0,
+            "3": 1288.0,
+            "4": 1273.0,
+            "5": 1242.0,
+            "6": 1323.0,
+            "7": 1211.0,
+            "8": 999.0,
+            "9": 1427.0,
+            "10": 1373.0,
+            "11": 1223.0,
+            "12": 1326.0,
+            "13": 1295.0,
+            "14": 1137.0,
+            "15": 1228.0,
+            "16": 1206.0,
+            "17": 1192.0,
+            "18": 1345.0,
+            "19": 1109.0,
+            "20": 1104.0,
+            "21": 1244.0,
+            "22": 1180.0,
+            "23": 1301.0,
+            "24": 1301.0,
+            "25": 1101.0,
+            "26": 1277.0,
+            "27": 1268.0,
+            "28": 1267.0,
+            "29": 1314.0,
+            "30": 1418.0,
+            "31": 1467.0,
+            "32": 1463.0,
+            "33": 1457.0,
+            "34": 1519.0,
+            "35": 1308.0,
+            "36": 1289.0,
+            "37": 1397.0,
+            "38": 1566.0,
+            "39": 1356.0,
+            "40": 1499.0,
+            "41": 1618.0,
+            "42": 1607.0,
+            "43": 1715.0,
+            "44": 1532.0,
+            "45": 1441.0,
+            "46": 1780.0,
+            "47": 1585.0,
+            "48": 1610.0,
+            "49": 1736.0,
+            "50": 1689.0,
+            "51": 1743.0,
+            "52": 1684.0,
+            "53": 1829.0,
+            "54": 1884.0,
+            "55": 1833.0,
+            "56": 2031.0,
+            "57": 1941.0,
+            "58": 1755.0,
+            "59": 1637.0,
+            "60": 1841.0,
+            "61": 2259.0,
+            "62": 2132.0,
+            "63": 2034.0,
+            "64": 1929.0,
+            "65": 2296.0,
+            "66": 2209.0,
+            "67": 2152.0,
+            "68": 2259.0,
+            "69": 2150.0,
+            "70": 2498.0,
+            "71": 2338.0,
+            "72": 2491.0,
+            "73": 2089.0,
+            "74": 2324.0,
+            "75": 1882.0,
+            "76": 2210.0,
+            "77": 2293.0,
+            "78": 2482.0,
+            "79": 2651.0,
+            "80": 1935.0,
+            "81": 2339.0,
+            "82": 2512.0,
+            "83": 2503.0,
+            "84": 2027.0,
+            "85": 2248.0,
+            "86": 2323.0,
+            "87": 2665.0,
+            "88": 2316.0,
+            "89": 2574.0,
+            "90": 2400.0,
+            "91": 2451.0,
+            "92": 1991.0,
+            "93": 2150.0,
+            "94": 2443.0,
+            "95": 2381.0,
+            "96": 2114.0,
+            "97": 2288.0,
+            "98": 2287.0,
+            "99": 2302.0,
+            "100": 2104.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 759682560.0,
+            "2": 759682560.0,
+            "3": 759682560.0,
+            "4": 759682560.0,
+            "5": 759682560.0,
+            "6": 759682560.0,
+            "7": 759682560.0,
+            "8": 759682560.0,
+            "9": 759682560.0,
+            "10": 759682560.0,
+            "11": 759682560.0,
+            "12": 759682560.0,
+            "13": 759682560.0,
+            "14": 759682560.0,
+            "15": 759682560.0,
+            "16": 759682560.0,
+            "17": 759682560.0,
+            "18": 759682560.0,
+            "19": 759682560.0,
+            "20": 759682560.0,
+            "21": 759682560.0,
+            "22": 759682560.0,
+            "23": 759682560.0,
+            "24": 759682560.0,
+            "25": 759682560.0,
+            "26": 759682560.0,
+            "27": 759682560.0,
+            "28": 759682560.0,
+            "29": 759682560.0,
+            "30": 759682560.0,
+            "31": 759682560.0,
+            "32": 759682560.0,
+            "33": 759682560.0,
+            "34": 759682560.0,
+            "35": 759682560.0,
+            "36": 759682560.0,
+            "37": 759682560.0,
+            "38": 759682560.0,
+            "39": 759682560.0,
+            "40": 759682560.0,
+            "41": 759682560.0,
+            "42": 759682560.0,
+            "43": 759682560.0,
+            "44": 759682560.0,
+            "45": 759682560.0,
+            "46": 759682560.0,
+            "47": 759682560.0,
+            "48": 759682560.0,
+            "49": 759682560.0,
+            "50": 759682560.0,
+            "51": 759682560.0,
+            "52": 759682560.0,
+            "53": 759682560.0,
+            "54": 759682560.0,
+            "55": 759682560.0,
+            "56": 759682560.0,
+            "57": 759682560.0,
+            "58": 759682560.0,
+            "59": 759682560.0,
+            "60": 759682560.0,
+            "61": 759682560.0,
+            "62": 759682560.0,
+            "63": 759682560.0,
+            "64": 759682560.0,
+            "65": 759682560.0,
+            "66": 759682560.0,
+            "67": 759682560.0,
+            "68": 759682560.0,
+            "69": 759682560.0,
+            "70": 759682560.0,
+            "71": 759682560.0,
+            "72": 759682560.0,
+            "73": 759682560.0,
+            "74": 759682560.0,
+            "75": 759682560.0,
+            "76": 759682560.0,
+            "77": 759682560.0,
+            "78": 759682560.0,
+            "79": 759682560.0,
+            "80": 759682560.0,
+            "81": 759682560.0,
+            "82": 759682560.0,
+            "83": 759682560.0,
+            "84": 759682560.0,
+            "85": 759682560.0,
+            "86": 759682560.0,
+            "87": 759682560.0,
+            "88": 759682560.0,
+            "89": 759682560.0,
+            "90": 759682560.0,
+            "91": 759682560.0,
+            "92": 759682560.0,
+            "93": 759682560.0,
+            "94": 759682560.0,
+            "95": 759682560.0,
+            "96": 759682560.0,
+            "97": 759682560.0,
+            "98": 759682560.0,
+            "99": 759682560.0,
+            "100": 759682560.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2542994944.0,
+            "2": 2824706560.0,
+            "3": 2824706560.0,
+            "4": 2824706560.0,
+            "5": 2824706560.0,
+            "6": 2824706560.0,
+            "7": 2824706560.0,
+            "8": 2824706560.0,
+            "9": 2824706560.0,
+            "10": 2824706560.0,
+            "11": 2824706560.0,
+            "12": 2824706560.0,
+            "13": 2824706560.0,
+            "14": 2824706560.0,
+            "15": 2824706560.0,
+            "16": 2824706560.0,
+            "17": 2824706560.0,
+            "18": 2824706560.0,
+            "19": 2824706560.0,
+            "20": 2824706560.0,
+            "21": 2824706560.0,
+            "22": 2824706560.0,
+            "23": 2824706560.0,
+            "24": 2824706560.0,
+            "25": 2824706560.0,
+            "26": 2824706560.0,
+            "27": 2824706560.0,
+            "28": 2824706560.0,
+            "29": 2824706560.0,
+            "30": 2824706560.0,
+            "31": 2824706560.0,
+            "32": 2824706560.0,
+            "33": 2824706560.0,
+            "34": 2824706560.0,
+            "35": 2824706560.0,
+            "36": 2824706560.0,
+            "37": 2824706560.0,
+            "38": 2824706560.0,
+            "39": 2824706560.0,
+            "40": 2824706560.0,
+            "41": 2824706560.0,
+            "42": 2824706560.0,
+            "43": 2824706560.0,
+            "44": 2824706560.0,
+            "45": 2824706560.0,
+            "46": 2824706560.0,
+            "47": 2824706560.0,
+            "48": 2824706560.0,
+            "49": 2824706560.0,
+            "50": 2824706560.0,
+            "51": 2824706560.0,
+            "52": 2824706560.0,
+            "53": 2824706560.0,
+            "54": 2824706560.0,
+            "55": 2824706560.0,
+            "56": 2824706560.0,
+            "57": 2824706560.0,
+            "58": 2824706560.0,
+            "59": 2824706560.0,
+            "60": 2824706560.0,
+            "61": 2824706560.0,
+            "62": 2824706560.0,
+            "63": 2824706560.0,
+            "64": 2824706560.0,
+            "65": 2824706560.0,
+            "66": 2824706560.0,
+            "67": 2824706560.0,
+            "68": 2824706560.0,
+            "69": 2824706560.0,
+            "70": 2824706560.0,
+            "71": 2824706560.0,
+            "72": 2824706560.0,
+            "73": 2824706560.0,
+            "74": 2824706560.0,
+            "75": 2824706560.0,
+            "76": 2824706560.0,
+            "77": 2824706560.0,
+            "78": 2824706560.0,
+            "79": 2824706560.0,
+            "80": 2824706560.0,
+            "81": 2824706560.0,
+            "82": 2824706560.0,
+            "83": 2824706560.0,
+            "84": 2824706560.0,
+            "85": 2824706560.0,
+            "86": 2824706560.0,
+            "87": 2824706560.0,
+            "88": 2824706560.0,
+            "89": 2824706560.0,
+            "90": 2824706560.0,
+            "91": 2824706560.0,
+            "92": 2824706560.0,
+            "93": 2824706560.0,
+            "94": 2824706560.0,
+            "95": 2824706560.0,
+            "96": 2824706560.0,
+            "97": 2824706560.0,
+            "98": 2824706560.0,
+            "99": 2824706560.0,
+            "100": 2824706560.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 9.46016,
+            "2": 0.42187,
+            "3": 0.15692,
+            "4": 0.35623,
+            "5": 0.25874,
+            "6": 0.17276,
+            "7": 0.18359,
+            "8": 0.19391,
+            "9": 0.19884,
+            "10": 0.27267,
+            "11": 0.25203,
+            "12": 0.16389,
+            "13": 0.35153,
+            "14": 0.15991,
+            "15": 0.268,
+            "16": 0.1778,
+            "17": 0.18774,
+            "18": 0.18637,
+            "19": 0.17789,
+            "20": 0.22748,
+            "21": 0.23632,
+            "22": 0.15657,
+            "23": 0.30888,
+            "24": 0.15208,
+            "25": 0.14888,
+            "26": 0.22189,
+            "27": 0.17979,
+            "28": 0.24137,
+            "29": 0.2423,
+            "30": 0.27274,
+            "31": 0.26218,
+            "32": 0.20249,
+            "33": 0.41473,
+            "34": 0.23104,
+            "35": 0.3203,
+            "36": 0.20187,
+            "37": 0.15959,
+            "38": 0.35951,
+            "39": 0.15125,
+            "40": 0.15444,
+            "41": 0.15359,
+            "42": 0.35395,
+            "43": 0.29841,
+            "44": 0.14696,
+            "45": 0.15582,
+            "46": 0.4465,
+            "47": 0.15406,
+            "48": 0.16257,
+            "49": 0.15478,
+            "50": 0.15489,
+            "51": 0.1534,
+            "52": 0.40345,
+            "53": 0.14379,
+            "54": 0.31104,
+            "55": 0.14226,
+            "56": 0.23475,
+            "57": 0.31848,
+            "58": 0.1553,
+            "59": 0.15368,
+            "60": 0.24773,
+            "61": 0.26981,
+            "62": 0.14177,
+            "63": 0.15237,
+            "64": 0.18307,
+            "65": 0.23266,
+            "66": 0.24928,
+            "67": 0.36215,
+            "68": 0.15228,
+            "69": 0.21389,
+            "70": 0.35043,
+            "71": 0.14126,
+            "72": 0.3495,
+            "73": 0.23925,
+            "74": 0.23063,
+            "75": 0.14077,
+            "76": 0.14281,
+            "77": 0.14126,
+            "78": 0.14448,
+            "79": 0.14178,
+            "80": 0.22094,
+            "81": 0.13999,
+            "82": 0.30865,
+            "83": 0.14029,
+            "84": 0.15021,
+            "85": 0.14158,
+            "86": 0.14189,
+            "87": 0.14288,
+            "88": 0.22637,
+            "89": 0.14095,
+            "90": 0.23496,
+            "91": 0.18038,
+            "92": 0.14174,
+            "93": 0.1569,
+            "94": 0.34426,
+            "95": 0.14211,
+            "96": 0.14174,
+            "97": 0.14527,
+            "98": 0.14364,
+            "99": 0.1424,
+            "100": 0.21352
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap/golden_values_lts_dgx_a100.json
index 4771e4e3c8c..facbb05b6ce 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap/golden_values_lts_dgx_a100.json
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 8.66407,
-            "2": 0.18828,
-            "3": 0.15715,
-            "4": 0.15685,
-            "5": 0.1544,
-            "6": 0.15356,
-            "7": 0.15196,
-            "8": 0.15101,
-            "9": 0.15114,
-            "10": 0.15067,
-            "11": 0.15113,
-            "12": 0.15109,
-            "13": 0.15255,
-            "14": 0.15181,
-            "15": 0.15165,
-            "16": 0.14989,
-            "17": 0.15094,
-            "18": 0.15062,
-            "19": 0.15148,
-            "20": 0.15014,
-            "21": 0.15114,
-            "22": 0.14973,
-            "23": 0.15192,
-            "24": 0.15003,
-            "25": 0.15228,
-            "26": 0.15066,
-            "27": 0.15209,
-            "28": 0.15056,
-            "29": 0.1516,
-            "30": 0.15083,
-            "31": 0.15211,
-            "32": 0.15028,
-            "33": 0.1518,
-            "34": 0.1494,
-            "35": 0.1521,
-            "36": 0.15002,
-            "37": 0.15257,
-            "38": 0.15095,
-            "39": 0.1517,
-            "40": 0.1501,
-            "41": 0.15352,
-            "42": 0.15453,
-            "43": 0.15187,
-            "44": 0.15281,
-            "45": 0.15294,
-            "46": 0.15214,
-            "47": 0.15376,
-            "48": 0.15363,
-            "49": 0.15977,
-            "50": 0.15249,
-            "51": 0.15543,
-            "52": 0.15363,
-            "53": 0.15379,
-            "54": 0.15555,
-            "55": 0.15252,
-            "56": 0.15295,
-            "57": 0.15496,
-            "58": 0.15756,
-            "59": 0.15345,
-            "60": 0.15784,
-            "61": 0.1581,
-            "62": 0.15302,
-            "63": 0.15579,
-            "64": 0.1536,
-            "65": 0.15523,
-            "66": 0.15593,
-            "67": 0.15868,
-            "68": 0.15303,
-            "69": 0.1554,
-            "70": 0.15409,
-            "71": 0.15229,
-            "72": 0.15299,
-            "73": 0.15495,
-            "74": 0.15601,
-            "75": 0.15285,
-            "76": 0.15774,
-            "77": 0.15171,
-            "78": 0.15423,
-            "79": 0.15398,
-            "80": 0.15445,
-            "81": 0.15381,
-            "82": 0.15311,
-            "83": 0.15584,
-            "84": 0.15556,
-            "85": 0.15506,
-            "86": 0.15314,
-            "87": 0.15269,
-            "88": 0.15515,
-            "89": 0.15923,
-            "90": 0.15325,
-            "91": 0.15755,
-            "92": 0.1543,
-            "93": 0.15481,
-            "94": 0.15321,
-            "95": 0.15397,
-            "96": 0.15322,
-            "97": 0.15471,
-            "98": 0.15631,
-            "99": 0.15271,
-            "100": 0.15653
+            "1": 4.52697,
+            "2": 0.21474,
+            "3": 0.18314,
+            "4": 0.16433,
+            "5": 0.16389,
+            "6": 0.16359,
+            "7": 0.16288,
+            "8": 0.16485,
+            "9": 0.16341,
+            "10": 0.16636,
+            "11": 0.16459,
+            "12": 0.16651,
+            "13": 0.16923,
+            "14": 0.16588,
+            "15": 0.16651,
+            "16": 0.16571,
+            "17": 0.16475,
+            "18": 0.16415,
+            "19": 0.16344,
+            "20": 0.16403,
+            "21": 0.16411,
+            "22": 0.16617,
+            "23": 0.16394,
+            "24": 0.16115,
+            "25": 0.16345,
+            "26": 0.16393,
+            "27": 0.16292,
+            "28": 0.16353,
+            "29": 0.1621,
+            "30": 0.1632,
+            "31": 0.16184,
+            "32": 0.16212,
+            "33": 0.16236,
+            "34": 0.16223,
+            "35": 0.16188,
+            "36": 0.16211,
+            "37": 0.16174,
+            "38": 0.16217,
+            "39": 0.16213,
+            "40": 0.16319,
+            "41": 0.1679,
+            "42": 0.17056,
+            "43": 0.16263,
+            "44": 0.1638,
+            "45": 0.16323,
+            "46": 0.16272,
+            "47": 0.16241,
+            "48": 0.16364,
+            "49": 0.16119,
+            "50": 0.16337,
+            "51": 0.16229,
+            "52": 0.16049,
+            "53": 0.16182,
+            "54": 0.15929,
+            "55": 0.15979,
+            "56": 0.15935,
+            "57": 0.15888,
+            "58": 0.16004,
+            "59": 0.15878,
+            "60": 0.15969,
+            "61": 0.16006,
+            "62": 0.15989,
+            "63": 0.15996,
+            "64": 0.15989,
+            "65": 0.15888,
+            "66": 0.15863,
+            "67": 0.15963,
+            "68": 0.15962,
+            "69": 0.15986,
+            "70": 0.15937,
+            "71": 0.15986,
+            "72": 0.15975,
+            "73": 0.16047,
+            "74": 0.15974,
+            "75": 0.1605,
+            "76": 0.15902,
+            "77": 0.16002,
+            "78": 0.15954,
+            "79": 0.16066,
+            "80": 0.15999,
+            "81": 0.15955,
+            "82": 0.15938,
+            "83": 0.16064,
+            "84": 0.15923,
+            "85": 0.15974,
+            "86": 0.1596,
+            "87": 0.16022,
+            "88": 0.15929,
+            "89": 0.15973,
+            "90": 0.16082,
+            "91": 0.15947,
+            "92": 0.16049,
+            "93": 0.1592,
+            "94": 0.15949,
+            "95": 0.16054,
+            "96": 0.1606,
+            "97": 0.15901,
+            "98": 0.15935,
+            "99": 0.16016,
+            "100": 0.15993
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_dev_dgx_gb200.json
new file mode 100644
index 00000000000..f4999e7c2dd
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_dev_dgx_gb200.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.81848,
+            "2": 10.8198,
+            "3": 10.83668,
+            "4": 10.83525,
+            "5": 10.84996,
+            "6": 10.83445,
+            "7": 10.82529,
+            "8": 10.81514,
+            "9": 10.87713,
+            "10": 10.88261,
+            "11": 10.87195,
+            "12": 10.8249,
+            "13": 10.84823,
+            "14": 10.81959,
+            "15": 10.80596,
+            "16": 10.80141,
+            "17": 10.77143,
+            "18": 10.78633,
+            "19": 10.74566,
+            "20": 10.62432,
+            "21": 10.68067,
+            "22": 10.65086,
+            "23": 10.76421,
+            "24": 10.61849,
+            "25": 10.46057,
+            "26": 10.59622,
+            "27": 10.54041,
+            "28": 10.44496,
+            "29": 10.39552,
+            "30": 10.40391,
+            "31": 10.51272,
+            "32": 10.32089,
+            "33": 10.26353,
+            "34": 10.46902,
+            "35": 9.95972,
+            "36": 10.11517,
+            "37": 10.01099,
+            "38": 10.38317,
+            "39": 9.78588,
+            "40": 10.10413,
+            "41": 10.13151,
+            "42": 10.02832,
+            "43": 10.2098,
+            "44": 10.07339,
+            "45": 9.69361,
+            "46": 9.99604,
+            "47": 9.93464,
+            "48": 9.67414,
+            "49": 9.91775,
+            "50": 9.93121
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1118.0,
+            "2": 1206.0,
+            "3": 1308.0,
+            "4": 1243.0,
+            "5": 1256.0,
+            "6": 1296.0,
+            "7": 1259.0,
+            "8": 1023.0,
+            "9": 1295.0,
+            "10": 1319.0,
+            "11": 1282.0,
+            "12": 1361.0,
+            "13": 1336.0,
+            "14": 1176.0,
+            "15": 1188.0,
+            "16": 1255.0,
+            "17": 1182.0,
+            "18": 1341.0,
+            "19": 1043.0,
+            "20": 1099.0,
+            "21": 1248.0,
+            "22": 1233.0,
+            "23": 1369.0,
+            "24": 1365.0,
+            "25": 1073.0,
+            "26": 1245.0,
+            "27": 1211.0,
+            "28": 1306.0,
+            "29": 1317.0,
+            "30": 1426.0,
+            "31": 1476.0,
+            "32": 1399.0,
+            "33": 1444.0,
+            "34": 1483.0,
+            "35": 1242.0,
+            "36": 1326.0,
+            "37": 1447.0,
+            "38": 1542.0,
+            "39": 1342.0,
+            "40": 1560.0,
+            "41": 1611.0,
+            "42": 1607.0,
+            "43": 1651.0,
+            "44": 1594.0,
+            "45": 1499.0,
+            "46": 1744.0,
+            "47": 1571.0,
+            "48": 1523.0,
+            "49": 1629.0,
+            "50": 1747.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 759682560.0,
+            "2": 759682560.0,
+            "3": 759682560.0,
+            "4": 759682560.0,
+            "5": 759682560.0,
+            "6": 759682560.0,
+            "7": 759682560.0,
+            "8": 759682560.0,
+            "9": 759682560.0,
+            "10": 759682560.0,
+            "11": 759682560.0,
+            "12": 759682560.0,
+            "13": 759682560.0,
+            "14": 759682560.0,
+            "15": 759682560.0,
+            "16": 759682560.0,
+            "17": 759682560.0,
+            "18": 759682560.0,
+            "19": 759682560.0,
+            "20": 759682560.0,
+            "21": 759682560.0,
+            "22": 759682560.0,
+            "23": 759682560.0,
+            "24": 759682560.0,
+            "25": 759682560.0,
+            "26": 759682560.0,
+            "27": 759682560.0,
+            "28": 759682560.0,
+            "29": 759682560.0,
+            "30": 759682560.0,
+            "31": 759682560.0,
+            "32": 759682560.0,
+            "33": 759682560.0,
+            "34": 759682560.0,
+            "35": 759682560.0,
+            "36": 759682560.0,
+            "37": 759682560.0,
+            "38": 759682560.0,
+            "39": 759682560.0,
+            "40": 759682560.0,
+            "41": 759682560.0,
+            "42": 759682560.0,
+            "43": 759682560.0,
+            "44": 759682560.0,
+            "45": 759682560.0,
+            "46": 759682560.0,
+            "47": 759682560.0,
+            "48": 759682560.0,
+            "49": 759682560.0,
+            "50": 759682560.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 4340903936.0,
+            "2": 4622615552.0,
+            "3": 4622615552.0,
+            "4": 4622615552.0,
+            "5": 4622615552.0,
+            "6": 4622615552.0,
+            "7": 4622615552.0,
+            "8": 4622615552.0,
+            "9": 4622615552.0,
+            "10": 4622615552.0,
+            "11": 4622615552.0,
+            "12": 4622615552.0,
+            "13": 4622615552.0,
+            "14": 4622615552.0,
+            "15": 4622615552.0,
+            "16": 4622615552.0,
+            "17": 4622615552.0,
+            "18": 4622615552.0,
+            "19": 4622615552.0,
+            "20": 4622615552.0,
+            "21": 4622615552.0,
+            "22": 4622615552.0,
+            "23": 4622615552.0,
+            "24": 4622615552.0,
+            "25": 4622615552.0,
+            "26": 4622615552.0,
+            "27": 4622615552.0,
+            "28": 4622615552.0,
+            "29": 4622615552.0,
+            "30": 4622615552.0,
+            "31": 4622615552.0,
+            "32": 4622615552.0,
+            "33": 4622615552.0,
+            "34": 4622615552.0,
+            "35": 4622615552.0,
+            "36": 4622615552.0,
+            "37": 4622615552.0,
+            "38": 4622615552.0,
+            "39": 4622615552.0,
+            "40": 4622615552.0,
+            "41": 4622615552.0,
+            "42": 4622615552.0,
+            "43": 4622615552.0,
+            "44": 4622615552.0,
+            "45": 4622615552.0,
+            "46": 4622615552.0,
+            "47": 4622615552.0,
+            "48": 4622615552.0,
+            "49": 4622615552.0,
+            "50": 4622615552.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 9.62286,
+            "2": 0.48262,
+            "3": 0.20639,
+            "4": 0.31026,
+            "5": 0.31827,
+            "6": 0.29163,
+            "7": 0.29838,
+            "8": 0.17787,
+            "9": 0.27978,
+            "10": 0.17026,
+            "11": 0.27026,
+            "12": 0.1834,
+            "13": 0.19697,
+            "14": 0.43123,
+            "15": 0.18322,
+            "16": 0.18141,
+            "17": 0.19707,
+            "18": 0.4629,
+            "19": 0.1817,
+            "20": 0.25096,
+            "21": 0.18877,
+            "22": 0.24459,
+            "23": 0.17984,
+            "24": 0.20058,
+            "25": 0.1758,
+            "26": 0.17872,
+            "27": 0.17193,
+            "28": 0.17115,
+            "29": 0.36031,
+            "30": 0.2658,
+            "31": 0.16933,
+            "32": 0.20868,
+            "33": 0.17195,
+            "34": 0.17439,
+            "35": 0.2501,
+            "36": 0.17686,
+            "37": 0.20398,
+            "38": 0.32448,
+            "39": 0.1735,
+            "40": 0.17268,
+            "41": 0.33455,
+            "42": 0.23584,
+            "43": 0.23483,
+            "44": 0.16767,
+            "45": 0.17612,
+            "46": 0.30477,
+            "47": 0.37075,
+            "48": 0.18367,
+            "49": 0.25006,
+            "50": 0.56439
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_dev_dgx_h100.json
index 3f213856697..399a2c50a8d 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_dev_dgx_h100.json
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 13.74796,
-            "2": 0.16361,
-            "3": 0.12487,
-            "4": 0.11772,
-            "5": 0.11849,
-            "6": 0.11989,
-            "7": 0.11765,
-            "8": 0.11845,
-            "9": 0.11909,
-            "10": 0.11808,
-            "11": 0.11972,
-            "12": 0.12,
-            "13": 0.11843,
-            "14": 0.11918,
-            "15": 0.11921,
-            "16": 0.11744,
-            "17": 0.11954,
-            "18": 0.11987,
-            "19": 0.12032,
-            "20": 0.11887,
-            "21": 0.16664,
-            "22": 0.14091,
-            "23": 0.11946,
-            "24": 0.11878,
-            "25": 0.12175,
-            "26": 0.16637,
-            "27": 0.12057,
-            "28": 0.11963,
-            "29": 0.11766,
-            "30": 0.11771,
-            "31": 0.11891,
-            "32": 0.11873,
-            "33": 0.12109,
-            "34": 0.12022,
-            "35": 0.11979,
-            "36": 0.12012,
-            "37": 0.11942,
-            "38": 0.12115,
-            "39": 0.1194,
-            "40": 0.12047,
-            "41": 0.12028,
-            "42": 0.12169,
-            "43": 0.12404,
-            "44": 0.12402,
-            "45": 0.12356,
-            "46": 0.12029,
-            "47": 0.11637,
-            "48": 0.11959,
-            "49": 0.11817,
-            "50": 0.12162
+            "1": 10.04337,
+            "2": 0.16822,
+            "3": 0.13237,
+            "4": 0.10427,
+            "5": 0.10319,
+            "6": 0.10424,
+            "7": 0.10225,
+            "8": 0.10398,
+            "9": 0.10251,
+            "10": 0.10246,
+            "11": 0.10345,
+            "12": 0.103,
+            "13": 0.10547,
+            "14": 0.10352,
+            "15": 0.10359,
+            "16": 0.1027,
+            "17": 0.10378,
+            "18": 0.10313,
+            "19": 0.10368,
+            "20": 0.10223,
+            "21": 0.10211,
+            "22": 0.1031,
+            "23": 0.10247,
+            "24": 0.1027,
+            "25": 0.10174,
+            "26": 0.10084,
+            "27": 0.10138,
+            "28": 0.10076,
+            "29": 0.10064,
+            "30": 0.10061,
+            "31": 0.10034,
+            "32": 0.10099,
+            "33": 0.10117,
+            "34": 0.10033,
+            "35": 0.10174,
+            "36": 0.10259,
+            "37": 0.1046,
+            "38": 0.10281,
+            "39": 0.10332,
+            "40": 0.10619,
+            "41": 0.10943,
+            "42": 0.10864,
+            "43": 0.10388,
+            "44": 0.10366,
+            "45": 0.10485,
+            "46": 0.10446,
+            "47": 0.10301,
+            "48": 0.10412,
+            "49": 0.10182,
+            "50": 0.10428
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_lts_dgx_a100.json
index 16019e9879e..f8dcbbe7370 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_lts_dgx_a100.json
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 9.03263,
-            "2": 0.21266,
-            "3": 0.17373,
-            "4": 0.17827,
-            "5": 0.17392,
-            "6": 0.17641,
-            "7": 0.17509,
-            "8": 0.17211,
-            "9": 0.17464,
-            "10": 0.21373,
-            "11": 0.17143,
-            "12": 0.17137,
-            "13": 0.17701,
-            "14": 0.17242,
-            "15": 0.16945,
-            "16": 0.1686,
-            "17": 0.16945,
-            "18": 0.16793,
-            "19": 0.16997,
-            "20": 0.16992,
-            "21": 0.17016,
-            "22": 0.16832,
-            "23": 0.16853,
-            "24": 0.16912,
-            "25": 0.16822,
-            "26": 0.16908,
-            "27": 0.16609,
-            "28": 0.239,
-            "29": 0.16968,
-            "30": 0.16763,
-            "31": 0.16962,
-            "32": 0.16788,
-            "33": 0.1681,
-            "34": 0.16749,
-            "35": 0.16866,
-            "36": 0.1697,
-            "37": 0.16838,
-            "38": 0.16867,
-            "39": 0.16699,
-            "40": 0.17098,
-            "41": 0.1671,
-            "42": 0.17036,
-            "43": 0.16755,
-            "44": 0.16699,
-            "45": 0.1678,
-            "46": 0.17136,
-            "47": 0.16725,
-            "48": 0.17257,
-            "49": 0.16903,
-            "50": 0.1687
+            "1": 4.65524,
+            "2": 0.20203,
+            "3": 0.1867,
+            "4": 0.16962,
+            "5": 0.16879,
+            "6": 0.16945,
+            "7": 0.16988,
+            "8": 0.16975,
+            "9": 0.16924,
+            "10": 0.16948,
+            "11": 0.17005,
+            "12": 0.16958,
+            "13": 0.16927,
+            "14": 0.16868,
+            "15": 0.1691,
+            "16": 0.16964,
+            "17": 0.17076,
+            "18": 0.16992,
+            "19": 0.17012,
+            "20": 0.17014,
+            "21": 0.16937,
+            "22": 0.16994,
+            "23": 0.16976,
+            "24": 0.16985,
+            "25": 0.16941,
+            "26": 0.16946,
+            "27": 0.16954,
+            "28": 0.16999,
+            "29": 0.17047,
+            "30": 0.17035,
+            "31": 0.16906,
+            "32": 0.17029,
+            "33": 0.17019,
+            "34": 0.17057,
+            "35": 0.17053,
+            "36": 0.16952,
+            "37": 0.16983,
+            "38": 0.16978,
+            "39": 0.17145,
+            "40": 0.17013,
+            "41": 0.17043,
+            "42": 0.17038,
+            "43": 0.1705,
+            "44": 0.17028,
+            "45": 0.17067,
+            "46": 0.16968,
+            "47": 0.16977,
+            "48": 0.16977,
+            "49": 0.16921,
+            "50": 0.17026
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_dev_dgx_gb200.json
new file mode 100644
index 00000000000..605457b437c
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_dev_dgx_gb200.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.93757,
+            "2": 10.92393,
+            "3": 10.94318,
+            "4": 10.93348,
+            "5": 10.93027,
+            "6": 10.92214,
+            "7": 10.9129,
+            "8": 10.92494,
+            "9": 10.94302,
+            "10": 10.92589,
+            "11": 10.89715,
+            "12": 10.91085,
+            "13": 10.91359,
+            "14": 10.90092,
+            "15": 10.87211,
+            "16": 10.86524,
+            "17": 10.869,
+            "18": 10.85374,
+            "19": 10.84295,
+            "20": 10.76663,
+            "21": 10.74374,
+            "22": 10.67695,
+            "23": 10.72701,
+            "24": 10.66494,
+            "25": 10.62546,
+            "26": 10.654,
+            "27": 10.62035,
+            "28": 10.56813,
+            "29": 10.56412,
+            "30": 10.41005,
+            "31": 10.21717,
+            "32": 10.46613,
+            "33": 10.47136,
+            "34": 10.26038,
+            "35": 10.30272,
+            "36": 10.264,
+            "37": 10.35196,
+            "38": 10.22183,
+            "39": 10.38981,
+            "40": 10.11089,
+            "41": 10.13597,
+            "42": 10.21619,
+            "43": 9.89444,
+            "44": 9.985,
+            "45": 9.87317,
+            "46": 9.86222,
+            "47": 10.13614,
+            "48": 9.86196,
+            "49": 9.56912,
+            "50": 9.91564
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 22727242.0,
+            "2": 22924896.0,
+            "3": 22597216.0,
+            "4": 23219716.0,
+            "5": 22714012.0,
+            "6": 23021178.0,
+            "7": 22770808.0,
+            "8": 22926716.0,
+            "9": 22842500.0,
+            "10": 22918960.0,
+            "11": 22500834.0,
+            "12": 22460340.0,
+            "13": 22917536.0,
+            "14": 22388990.0,
+            "15": 22821224.0,
+            "16": 22831266.0,
+            "17": 22819108.0,
+            "18": 22582264.0,
+            "19": 22617384.0,
+            "20": 22693436.0,
+            "21": 22739352.0,
+            "22": 22800104.0,
+            "23": 22539998.0,
+            "24": 22771512.0,
+            "25": 22819132.0,
+            "26": 22547588.0,
+            "27": 22468844.0,
+            "28": 22453516.0,
+            "29": 22529320.0,
+            "30": 22630996.0,
+            "31": 22955520.0,
+            "32": 22585756.0,
+            "33": 22557744.0,
+            "34": 22835696.0,
+            "35": 22787828.0,
+            "36": 22588412.0,
+            "37": 22498040.0,
+            "38": 22896082.0,
+            "39": 22801992.0,
+            "40": 22657536.0,
+            "41": 22659220.0,
+            "42": 22667844.0,
+            "43": 22975904.0,
+            "44": 22745960.0,
+            "45": 22675400.0,
+            "46": 22884844.0,
+            "47": 22633716.0,
+            "48": 22928608.0,
+            "49": 22727282.0,
+            "50": 22904808.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 688128512.0,
+            "2": 688128512.0,
+            "3": 688128512.0,
+            "4": 688128512.0,
+            "5": 688128512.0,
+            "6": 688128512.0,
+            "7": 688128512.0,
+            "8": 688128512.0,
+            "9": 688128512.0,
+            "10": 688128512.0,
+            "11": 688128512.0,
+            "12": 688128512.0,
+            "13": 688128512.0,
+            "14": 688128512.0,
+            "15": 688128512.0,
+            "16": 688128512.0,
+            "17": 688128512.0,
+            "18": 688128512.0,
+            "19": 688128512.0,
+            "20": 688128512.0,
+            "21": 688128512.0,
+            "22": 688128512.0,
+            "23": 688128512.0,
+            "24": 688128512.0,
+            "25": 688128512.0,
+            "26": 688128512.0,
+            "27": 688128512.0,
+            "28": 688128512.0,
+            "29": 688128512.0,
+            "30": 688128512.0,
+            "31": 688128512.0,
+            "32": 688128512.0,
+            "33": 688128512.0,
+            "34": 688128512.0,
+            "35": 688128512.0,
+            "36": 688128512.0,
+            "37": 688128512.0,
+            "38": 688128512.0,
+            "39": 688128512.0,
+            "40": 688128512.0,
+            "41": 688128512.0,
+            "42": 688128512.0,
+            "43": 688128512.0,
+            "44": 688128512.0,
+            "45": 688128512.0,
+            "46": 688128512.0,
+            "47": 688128512.0,
+            "48": 688128512.0,
+            "49": 688128512.0,
+            "50": 688128512.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 2158025216.0,
+            "2": 2416613888.0,
+            "3": 2416613888.0,
+            "4": 2416613888.0,
+            "5": 2416613888.0,
+            "6": 2416613888.0,
+            "7": 2416613888.0,
+            "8": 2416613888.0,
+            "9": 2416613888.0,
+            "10": 2416613888.0,
+            "11": 2416613888.0,
+            "12": 2416613888.0,
+            "13": 2416613888.0,
+            "14": 2416613888.0,
+            "15": 2416613888.0,
+            "16": 2416613888.0,
+            "17": 2416613888.0,
+            "18": 2416613888.0,
+            "19": 2416613888.0,
+            "20": 2416613888.0,
+            "21": 2416613888.0,
+            "22": 2416613888.0,
+            "23": 2416613888.0,
+            "24": 2416613888.0,
+            "25": 2416613888.0,
+            "26": 2416613888.0,
+            "27": 2416613888.0,
+            "28": 2416613888.0,
+            "29": 2416613888.0,
+            "30": 2416613888.0,
+            "31": 2416613888.0,
+            "32": 2416613888.0,
+            "33": 2416613888.0,
+            "34": 2416613888.0,
+            "35": 2416613888.0,
+            "36": 2416613888.0,
+            "37": 2416613888.0,
+            "38": 2416613888.0,
+            "39": 2416613888.0,
+            "40": 2416613888.0,
+            "41": 2416613888.0,
+            "42": 2416613888.0,
+            "43": 2416613888.0,
+            "44": 2416613888.0,
+            "45": 2416613888.0,
+            "46": 2416613888.0,
+            "47": 2416613888.0,
+            "48": 2416613888.0,
+            "49": 2416613888.0,
+            "50": 2416613888.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 8.46203,
+            "2": 0.17159,
+            "3": 0.30409,
+            "4": 0.13684,
+            "5": 0.29184,
+            "6": 0.13641,
+            "7": 0.15548,
+            "8": 0.24827,
+            "9": 0.13458,
+            "10": 0.24758,
+            "11": 0.26919,
+            "12": 0.15859,
+            "13": 0.24263,
+            "14": 0.40638,
+            "15": 0.14802,
+            "16": 0.75916,
+            "17": 0.27027,
+            "18": 0.41589,
+            "19": 0.23222,
+            "20": 0.27356,
+            "21": 0.38604,
+            "22": 0.40542,
+            "23": 0.61332,
+            "24": 0.36261,
+            "25": 0.60934,
+            "26": 0.13901,
+            "27": 0.23646,
+            "28": 0.13727,
+            "29": 0.23988,
+            "30": 0.13874,
+            "31": 0.13771,
+            "32": 0.13771,
+            "33": 0.13803,
+            "34": 0.13667,
+            "35": 0.13906,
+            "36": 0.13535,
+            "37": 0.13539,
+            "38": 0.13547,
+            "39": 0.13555,
+            "40": 0.13617,
+            "41": 0.37768,
+            "42": 0.1374,
+            "43": 0.22178,
+            "44": 0.13712,
+            "45": 0.13831,
+            "46": 0.137,
+            "47": 0.13638,
+            "48": 0.13731,
+            "49": 0.21987,
+            "50": 0.13794
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_dev_dgx_h100.json
index ea2bd7effce..8a17375878f 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_dev_dgx_h100.json
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 689176064.0,
-            "2": 689176064.0,
-            "3": 689176064.0,
-            "4": 689176064.0,
-            "5": 689176064.0,
-            "6": 689176064.0,
-            "7": 689176064.0,
-            "8": 689176064.0,
-            "9": 689176064.0,
-            "10": 689176064.0,
-            "11": 689176064.0,
-            "12": 689176064.0,
-            "13": 689176064.0,
-            "14": 689176064.0,
-            "15": 689176064.0,
-            "16": 689176064.0,
-            "17": 689176064.0,
-            "18": 689176064.0,
-            "19": 689176064.0,
-            "20": 689176064.0,
-            "21": 689176064.0,
-            "22": 689176064.0,
-            "23": 689176064.0,
-            "24": 689176064.0,
-            "25": 689176064.0,
-            "26": 689176064.0,
-            "27": 689176064.0,
-            "28": 689176064.0,
-            "29": 689176064.0,
-            "30": 689176064.0,
-            "31": 689176064.0,
-            "32": 689176064.0,
-            "33": 689176064.0,
-            "34": 689176064.0,
-            "35": 689176064.0,
-            "36": 689176064.0,
-            "37": 689176064.0,
-            "38": 689176064.0,
-            "39": 689176064.0,
-            "40": 689176064.0,
-            "41": 689176064.0,
-            "42": 689176064.0,
-            "43": 689176064.0,
-            "44": 689176064.0,
-            "45": 689176064.0,
-            "46": 689176064.0,
-            "47": 689176064.0,
-            "48": 689176064.0,
-            "49": 689176064.0,
-            "50": 689176064.0
+            "1": 687079936.0,
+            "2": 687079936.0,
+            "3": 687079936.0,
+            "4": 687079936.0,
+            "5": 687079936.0,
+            "6": 687079936.0,
+            "7": 687079936.0,
+            "8": 687079936.0,
+            "9": 687079936.0,
+            "10": 687079936.0,
+            "11": 687079936.0,
+            "12": 687079936.0,
+            "13": 687079936.0,
+            "14": 687079936.0,
+            "15": 687079936.0,
+            "16": 687079936.0,
+            "17": 687079936.0,
+            "18": 687079936.0,
+            "19": 687079936.0,
+            "20": 687079936.0,
+            "21": 687079936.0,
+            "22": 687079936.0,
+            "23": 687079936.0,
+            "24": 687079936.0,
+            "25": 687079936.0,
+            "26": 687079936.0,
+            "27": 687079936.0,
+            "28": 687079936.0,
+            "29": 687079936.0,
+            "30": 687079936.0,
+            "31": 687079936.0,
+            "32": 687079936.0,
+            "33": 687079936.0,
+            "34": 687079936.0,
+            "35": 687079936.0,
+            "36": 687079936.0,
+            "37": 687079936.0,
+            "38": 687079936.0,
+            "39": 687079936.0,
+            "40": 687079936.0,
+            "41": 687079936.0,
+            "42": 687079936.0,
+            "43": 687079936.0,
+            "44": 687079936.0,
+            "45": 687079936.0,
+            "46": 687079936.0,
+            "47": 687079936.0,
+            "48": 687079936.0,
+            "49": 687079936.0,
+            "50": 687079936.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 2158024192.0,
-            "2": 2416613888.0,
-            "3": 2416613888.0,
-            "4": 2416613888.0,
-            "5": 2416613888.0,
-            "6": 2416613888.0,
-            "7": 2416613888.0,
-            "8": 2416613888.0,
-            "9": 2416613888.0,
-            "10": 2416613888.0,
-            "11": 2416613888.0,
-            "12": 2416613888.0,
-            "13": 2416613888.0,
-            "14": 2416613888.0,
-            "15": 2416613888.0,
-            "16": 2416613888.0,
-            "17": 2416613888.0,
-            "18": 2416613888.0,
-            "19": 2416613888.0,
-            "20": 2416613888.0,
-            "21": 2416613888.0,
-            "22": 2416613888.0,
-            "23": 2416613888.0,
-            "24": 2416613888.0,
-            "25": 2416613888.0,
-            "26": 2416613888.0,
-            "27": 2416613888.0,
-            "28": 2416613888.0,
-            "29": 2416613888.0,
-            "30": 2416613888.0,
-            "31": 2416613888.0,
-            "32": 2416613888.0,
-            "33": 2416613888.0,
-            "34": 2416613888.0,
-            "35": 2416613888.0,
-            "36": 2416613888.0,
-            "37": 2416613888.0,
-            "38": 2416613888.0,
-            "39": 2416613888.0,
-            "40": 2416613888.0,
-            "41": 2416613888.0,
-            "42": 2416613888.0,
-            "43": 2416613888.0,
-            "44": 2416613888.0,
-            "45": 2416613888.0,
-            "46": 2416613888.0,
-            "47": 2416613888.0,
-            "48": 2416613888.0,
-            "49": 2416613888.0,
-            "50": 2416613888.0
+            "1": 2158025216.0,
+            "2": 2414517760.0,
+            "3": 2414517760.0,
+            "4": 2414517760.0,
+            "5": 2414517760.0,
+            "6": 2414517760.0,
+            "7": 2414517760.0,
+            "8": 2414517760.0,
+            "9": 2414517760.0,
+            "10": 2414517760.0,
+            "11": 2414517760.0,
+            "12": 2414517760.0,
+            "13": 2414517760.0,
+            "14": 2414517760.0,
+            "15": 2414517760.0,
+            "16": 2414517760.0,
+            "17": 2414517760.0,
+            "18": 2414517760.0,
+            "19": 2414517760.0,
+            "20": 2414517760.0,
+            "21": 2414517760.0,
+            "22": 2414517760.0,
+            "23": 2414517760.0,
+            "24": 2414517760.0,
+            "25": 2414517760.0,
+            "26": 2414517760.0,
+            "27": 2414517760.0,
+            "28": 2414517760.0,
+            "29": 2414517760.0,
+            "30": 2414517760.0,
+            "31": 2414517760.0,
+            "32": 2414517760.0,
+            "33": 2414517760.0,
+            "34": 2414517760.0,
+            "35": 2414517760.0,
+            "36": 2414517760.0,
+            "37": 2414517760.0,
+            "38": 2414517760.0,
+            "39": 2414517760.0,
+            "40": 2414517760.0,
+            "41": 2414517760.0,
+            "42": 2414517760.0,
+            "43": 2414517760.0,
+            "44": 2414517760.0,
+            "45": 2414517760.0,
+            "46": 2414517760.0,
+            "47": 2414517760.0,
+            "48": 2414517760.0,
+            "49": 2414517760.0,
+            "50": 2414517760.0
         }
     },
     "iteration-time": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 11.4694,
-            "2": 0.13977,
-            "3": 0.12731,
-            "4": 0.12879,
-            "5": 0.11865,
-            "6": 0.118,
-            "7": 0.11942,
-            "8": 0.11938,
-            "9": 0.11951,
-            "10": 0.11735,
-            "11": 0.11836,
-            "12": 0.11978,
-            "13": 0.11914,
-            "14": 0.11821,
-            "15": 0.11692,
-            "16": 0.11708,
-            "17": 0.11825,
-            "18": 0.11909,
-            "19": 0.11996,
-            "20": 0.11962,
-            "21": 0.12002,
-            "22": 0.11972,
-            "23": 0.11943,
-            "24": 0.11873,
-            "25": 0.11787,
-            "26": 0.1172,
-            "27": 0.11703,
-            "28": 0.12106,
-            "29": 0.11863,
-            "30": 0.11927,
-            "31": 0.11941,
-            "32": 0.11801,
-            "33": 0.11903,
-            "34": 0.1181,
-            "35": 0.11794,
-            "36": 0.11973,
-            "37": 0.11831,
-            "38": 0.11753,
-            "39": 0.11901,
-            "40": 0.11713,
-            "41": 0.11926,
-            "42": 0.11756,
-            "43": 0.1189,
-            "44": 0.11853,
-            "45": 0.12132,
-            "46": 0.11905,
-            "47": 0.11892,
-            "48": 0.11664,
-            "49": 0.11721,
-            "50": 0.11854
+            "1": 11.04447,
+            "2": 0.15303,
+            "3": 0.11363,
+            "4": 0.09774,
+            "5": 0.09666,
+            "6": 0.0975,
+            "7": 0.09718,
+            "8": 0.09631,
+            "9": 0.09764,
+            "10": 0.0962,
+            "11": 0.09842,
+            "12": 0.09595,
+            "13": 0.09748,
+            "14": 0.09614,
+            "15": 0.09539,
+            "16": 0.09589,
+            "17": 0.09791,
+            "18": 0.0971,
+            "19": 0.09598,
+            "20": 0.09703,
+            "21": 0.09477,
+            "22": 0.09625,
+            "23": 0.09521,
+            "24": 0.09591,
+            "25": 0.09662,
+            "26": 0.09594,
+            "27": 0.096,
+            "28": 0.09633,
+            "29": 0.09553,
+            "30": 0.09789,
+            "31": 0.09628,
+            "32": 0.09629,
+            "33": 0.09555,
+            "34": 0.09528,
+            "35": 0.09554,
+            "36": 0.09515,
+            "37": 0.09514,
+            "38": 0.09534,
+            "39": 0.0958,
+            "40": 0.09495,
+            "41": 0.09747,
+            "42": 0.0951,
+            "43": 0.09603,
+            "44": 0.09547,
+            "45": 0.09561,
+            "46": 0.09761,
+            "47": 0.09506,
+            "48": 0.09637,
+            "49": 0.09518,
+            "50": 0.09512
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_lts_dgx_a100.json
index 775784e5ee0..06a1af0c063 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_lts_dgx_a100.json
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 659815936.0,
-            "2": 659815936.0,
-            "3": 659815936.0,
-            "4": 659815936.0,
-            "5": 659815936.0,
-            "6": 659815936.0,
-            "7": 659815936.0,
-            "8": 659815936.0,
-            "9": 659815936.0,
-            "10": 659815936.0,
-            "11": 659815936.0,
-            "12": 659815936.0,
-            "13": 659815936.0,
-            "14": 659815936.0,
-            "15": 659815936.0,
-            "16": 659815936.0,
-            "17": 659815936.0,
-            "18": 659815936.0,
-            "19": 659815936.0,
-            "20": 659815936.0,
-            "21": 659815936.0,
-            "22": 659815936.0,
-            "23": 659815936.0,
-            "24": 659815936.0,
-            "25": 659815936.0,
-            "26": 659815936.0,
-            "27": 659815936.0,
-            "28": 659815936.0,
-            "29": 659815936.0,
-            "30": 659815936.0,
-            "31": 659815936.0,
-            "32": 659815936.0,
-            "33": 659815936.0,
-            "34": 659815936.0,
-            "35": 659815936.0,
-            "36": 659815936.0,
-            "37": 659815936.0,
-            "38": 659815936.0,
-            "39": 659815936.0,
-            "40": 659815936.0,
-            "41": 659815936.0,
-            "42": 659815936.0,
-            "43": 659815936.0,
-            "44": 659815936.0,
-            "45": 659815936.0,
-            "46": 659815936.0,
-            "47": 659815936.0,
-            "48": 659815936.0,
-            "49": 659815936.0,
-            "50": 659815936.0
+            "1": 657718784.0,
+            "2": 657718784.0,
+            "3": 657718784.0,
+            "4": 657718784.0,
+            "5": 657718784.0,
+            "6": 657718784.0,
+            "7": 657718784.0,
+            "8": 657718784.0,
+            "9": 657718784.0,
+            "10": 657718784.0,
+            "11": 657718784.0,
+            "12": 657718784.0,
+            "13": 657718784.0,
+            "14": 657718784.0,
+            "15": 657718784.0,
+            "16": 657718784.0,
+            "17": 657718784.0,
+            "18": 657718784.0,
+            "19": 657718784.0,
+            "20": 657718784.0,
+            "21": 657718784.0,
+            "22": 657718784.0,
+            "23": 657718784.0,
+            "24": 657718784.0,
+            "25": 657718784.0,
+            "26": 657718784.0,
+            "27": 657718784.0,
+            "28": 657718784.0,
+            "29": 657718784.0,
+            "30": 657718784.0,
+            "31": 657718784.0,
+            "32": 657718784.0,
+            "33": 657718784.0,
+            "34": 657718784.0,
+            "35": 657718784.0,
+            "36": 657718784.0,
+            "37": 657718784.0,
+            "38": 657718784.0,
+            "39": 657718784.0,
+            "40": 657718784.0,
+            "41": 657718784.0,
+            "42": 657718784.0,
+            "43": 657718784.0,
+            "44": 657718784.0,
+            "45": 657718784.0,
+            "46": 657718784.0,
+            "47": 657718784.0,
+            "48": 657718784.0,
+            "49": 657718784.0,
+            "50": 657718784.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -176,55 +176,55 @@
         "step_interval": 1,
         "values": {
             "1": 2128664064.0,
-            "2": 2387253760.0,
-            "3": 2387253760.0,
-            "4": 2387253760.0,
-            "5": 2387253760.0,
-            "6": 2387253760.0,
-            "7": 2387253760.0,
-            "8": 2387253760.0,
-            "9": 2387253760.0,
-            "10": 2387253760.0,
-            "11": 2387253760.0,
-            "12": 2387253760.0,
-            "13": 2387253760.0,
-            "14": 2387253760.0,
-            "15": 2387253760.0,
-            "16": 2387253760.0,
-            "17": 2387253760.0,
-            "18": 2387253760.0,
-            "19": 2387253760.0,
-            "20": 2387253760.0,
-            "21": 2387253760.0,
-            "22": 2387253760.0,
-            "23": 2387253760.0,
-            "24": 2387253760.0,
-            "25": 2387253760.0,
-            "26": 2387253760.0,
-            "27": 2387253760.0,
-            "28": 2387253760.0,
-            "29": 2387253760.0,
-            "30": 2387253760.0,
-            "31": 2387253760.0,
-            "32": 2387253760.0,
-            "33": 2387253760.0,
-            "34": 2387253760.0,
-            "35": 2387253760.0,
-            "36": 2387253760.0,
-            "37": 2387253760.0,
-            "38": 2387253760.0,
-            "39": 2387253760.0,
-            "40": 2387253760.0,
-            "41": 2387253760.0,
-            "42": 2387253760.0,
-            "43": 2387253760.0,
-            "44": 2387253760.0,
-            "45": 2387253760.0,
-            "46": 2387253760.0,
-            "47": 2387253760.0,
-            "48": 2387253760.0,
-            "49": 2387253760.0,
-            "50": 2387253760.0
+            "2": 2385156608.0,
+            "3": 2385156608.0,
+            "4": 2385156608.0,
+            "5": 2385156608.0,
+            "6": 2385156608.0,
+            "7": 2385156608.0,
+            "8": 2385156608.0,
+            "9": 2385156608.0,
+            "10": 2385156608.0,
+            "11": 2385156608.0,
+            "12": 2385156608.0,
+            "13": 2385156608.0,
+            "14": 2385156608.0,
+            "15": 2385156608.0,
+            "16": 2385156608.0,
+            "17": 2385156608.0,
+            "18": 2385156608.0,
+            "19": 2385156608.0,
+            "20": 2385156608.0,
+            "21": 2385156608.0,
+            "22": 2385156608.0,
+            "23": 2385156608.0,
+            "24": 2385156608.0,
+            "25": 2385156608.0,
+            "26": 2385156608.0,
+            "27": 2385156608.0,
+            "28": 2385156608.0,
+            "29": 2385156608.0,
+            "30": 2385156608.0,
+            "31": 2385156608.0,
+            "32": 2385156608.0,
+            "33": 2385156608.0,
+            "34": 2385156608.0,
+            "35": 2385156608.0,
+            "36": 2385156608.0,
+            "37": 2385156608.0,
+            "38": 2385156608.0,
+            "39": 2385156608.0,
+            "40": 2385156608.0,
+            "41": 2385156608.0,
+            "42": 2385156608.0,
+            "43": 2385156608.0,
+            "44": 2385156608.0,
+            "45": 2385156608.0,
+            "46": 2385156608.0,
+            "47": 2385156608.0,
+            "48": 2385156608.0,
+            "49": 2385156608.0,
+            "50": 2385156608.0
         }
     },
     "iteration-time": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 10.48419,
-            "2": 0.19482,
-            "3": 0.26748,
-            "4": 0.1633,
-            "5": 0.15828,
-            "6": 0.15656,
-            "7": 0.1572,
-            "8": 0.15759,
-            "9": 0.15735,
-            "10": 0.15751,
-            "11": 0.15648,
-            "12": 0.15605,
-            "13": 0.15693,
-            "14": 0.15672,
-            "15": 0.15676,
-            "16": 0.15664,
-            "17": 0.15683,
-            "18": 0.15646,
-            "19": 0.15696,
-            "20": 0.15623,
-            "21": 0.15652,
-            "22": 0.15759,
-            "23": 0.15729,
-            "24": 0.15687,
-            "25": 0.15563,
-            "26": 0.1575,
-            "27": 0.15616,
-            "28": 0.15855,
-            "29": 0.15771,
-            "30": 0.15851,
-            "31": 0.1579,
-            "32": 0.1587,
-            "33": 0.1577,
-            "34": 0.15827,
-            "35": 0.15808,
-            "36": 0.15825,
-            "37": 0.1583,
-            "38": 0.15836,
-            "39": 0.15797,
-            "40": 0.15829,
-            "41": 0.15787,
-            "42": 0.15789,
-            "43": 0.15839,
-            "44": 0.15862,
-            "45": 0.15727,
-            "46": 0.15919,
-            "47": 0.15859,
-            "48": 0.15898,
-            "49": 0.15832,
-            "50": 0.1586
+            "1": 4.20554,
+            "2": 0.17937,
+            "3": 0.16839,
+            "4": 0.15493,
+            "5": 0.15446,
+            "6": 0.15139,
+            "7": 0.15305,
+            "8": 0.15189,
+            "9": 0.15216,
+            "10": 0.15599,
+            "11": 0.15357,
+            "12": 0.15419,
+            "13": 0.15436,
+            "14": 0.15288,
+            "15": 0.15253,
+            "16": 0.15223,
+            "17": 0.15315,
+            "18": 0.15292,
+            "19": 0.15296,
+            "20": 0.15256,
+            "21": 0.15297,
+            "22": 0.15389,
+            "23": 0.15399,
+            "24": 0.15299,
+            "25": 0.15347,
+            "26": 0.15651,
+            "27": 0.15552,
+            "28": 0.15444,
+            "29": 0.15801,
+            "30": 0.15708,
+            "31": 0.15903,
+            "32": 0.15742,
+            "33": 0.15743,
+            "34": 0.15818,
+            "35": 0.15832,
+            "36": 0.15788,
+            "37": 0.1571,
+            "38": 0.15852,
+            "39": 0.15701,
+            "40": 0.15794,
+            "41": 0.15813,
+            "42": 0.15763,
+            "43": 0.15873,
+            "44": 0.15814,
+            "45": 0.15802,
+            "46": 0.15831,
+            "47": 0.1573,
+            "48": 0.1585,
+            "49": 0.15823,
+            "50": 0.15801
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_dev_dgx_gb200.json
new file mode 100644
index 00000000000..d3f4ebb9b68
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_dev_dgx_gb200.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.85072,
+            "2": 10.87664,
+            "3": 10.85783,
+            "4": 10.84306,
+            "5": 10.88146,
+            "6": 10.87139,
+            "7": 10.89191,
+            "8": 10.85963,
+            "9": 10.86934,
+            "10": 10.8278,
+            "11": 10.90311,
+            "12": 10.87801,
+            "13": 10.87305,
+            "14": 10.89032,
+            "15": 10.87011,
+            "16": 10.8511,
+            "17": 10.84459,
+            "18": 10.84726,
+            "19": 10.86383,
+            "20": 10.82208,
+            "21": 10.79825,
+            "22": 10.73204,
+            "23": 10.81839,
+            "24": 10.74606,
+            "25": 10.71761,
+            "26": 10.77202,
+            "27": 10.77401,
+            "28": 10.72063,
+            "29": 10.72787,
+            "30": 10.59722,
+            "31": 10.42528,
+            "32": 10.6597,
+            "33": 10.6513,
+            "34": 10.49325,
+            "35": 10.52835,
+            "36": 10.49365,
+            "37": 10.57261,
+            "38": 10.44872,
+            "39": 10.58148,
+            "40": 10.32557,
+            "41": 10.36356,
+            "42": 10.41806,
+            "43": 10.12507,
+            "44": 10.22734,
+            "45": 10.12083,
+            "46": 10.10118,
+            "47": 10.36102,
+            "48": 10.09786,
+            "49": 9.8396,
+            "50": 10.15591
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 22727256.0,
+            "2": 22925250.0,
+            "3": 22596852.0,
+            "4": 23219000.0,
+            "5": 22714020.0,
+            "6": 23020792.0,
+            "7": 22771170.0,
+            "8": 22926228.0,
+            "9": 22842640.0,
+            "10": 22918308.0,
+            "11": 22499960.0,
+            "12": 22459596.0,
+            "13": 22916016.0,
+            "14": 22388008.0,
+            "15": 22821540.0,
+            "16": 22830500.0,
+            "17": 22818592.0,
+            "18": 22582030.0,
+            "19": 22617218.0,
+            "20": 22693536.0,
+            "21": 22739118.0,
+            "22": 22798904.0,
+            "23": 22538834.0,
+            "24": 22770708.0,
+            "25": 22818172.0,
+            "26": 22547374.0,
+            "27": 22467964.0,
+            "28": 22452370.0,
+            "29": 22528234.0,
+            "30": 22630740.0,
+            "31": 22954650.0,
+            "32": 22584568.0,
+            "33": 22557506.0,
+            "34": 22835004.0,
+            "35": 22787526.0,
+            "36": 22588580.0,
+            "37": 22496788.0,
+            "38": 22895632.0,
+            "39": 22800112.0,
+            "40": 22657224.0,
+            "41": 22658160.0,
+            "42": 22666840.0,
+            "43": 22975312.0,
+            "44": 22745190.0,
+            "45": 22674440.0,
+            "46": 22883296.0,
+            "47": 22633056.0,
+            "48": 22927568.0,
+            "49": 22727008.0,
+            "50": 22903184.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 640822784.0,
+            "2": 640822784.0,
+            "3": 640822784.0,
+            "4": 640822784.0,
+            "5": 640822784.0,
+            "6": 640822784.0,
+            "7": 640822784.0,
+            "8": 640822784.0,
+            "9": 640822784.0,
+            "10": 640822784.0,
+            "11": 640822784.0,
+            "12": 640822784.0,
+            "13": 640822784.0,
+            "14": 640822784.0,
+            "15": 640822784.0,
+            "16": 640822784.0,
+            "17": 640822784.0,
+            "18": 640822784.0,
+            "19": 640822784.0,
+            "20": 640822784.0,
+            "21": 640822784.0,
+            "22": 640822784.0,
+            "23": 640822784.0,
+            "24": 640822784.0,
+            "25": 640822784.0,
+            "26": 640822784.0,
+            "27": 640822784.0,
+            "28": 640822784.0,
+            "29": 640822784.0,
+            "30": 640822784.0,
+            "31": 640822784.0,
+            "32": 640822784.0,
+            "33": 640822784.0,
+            "34": 640822784.0,
+            "35": 640822784.0,
+            "36": 640822784.0,
+            "37": 640822784.0,
+            "38": 640822784.0,
+            "39": 640822784.0,
+            "40": 640822784.0,
+            "41": 640822784.0,
+            "42": 640822784.0,
+            "43": 640822784.0,
+            "44": 640822784.0,
+            "45": 640822784.0,
+            "46": 640822784.0,
+            "47": 640822784.0,
+            "48": 640822784.0,
+            "49": 640822784.0,
+            "50": 640822784.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 2610027008.0,
+            "2": 2842349056.0,
+            "3": 2842349056.0,
+            "4": 2842349056.0,
+            "5": 2842349056.0,
+            "6": 2842349056.0,
+            "7": 2842349056.0,
+            "8": 2842349056.0,
+            "9": 2842349056.0,
+            "10": 2842349056.0,
+            "11": 2842349056.0,
+            "12": 2842349056.0,
+            "13": 2842349056.0,
+            "14": 2842349056.0,
+            "15": 2842349056.0,
+            "16": 2842349056.0,
+            "17": 2842349056.0,
+            "18": 2842349056.0,
+            "19": 2842349056.0,
+            "20": 2842349056.0,
+            "21": 2842349056.0,
+            "22": 2842349056.0,
+            "23": 2842349056.0,
+            "24": 2842349056.0,
+            "25": 2842349056.0,
+            "26": 2842349056.0,
+            "27": 2842349056.0,
+            "28": 2842349056.0,
+            "29": 2842349056.0,
+            "30": 2842349056.0,
+            "31": 2842349056.0,
+            "32": 2842349056.0,
+            "33": 2842349056.0,
+            "34": 2842349056.0,
+            "35": 2842349056.0,
+            "36": 2842349056.0,
+            "37": 2842349056.0,
+            "38": 2842349056.0,
+            "39": 2842349056.0,
+            "40": 2842349056.0,
+            "41": 2842349056.0,
+            "42": 2842349056.0,
+            "43": 2842349056.0,
+            "44": 2842349056.0,
+            "45": 2842349056.0,
+            "46": 2842349056.0,
+            "47": 2842349056.0,
+            "48": 2842349056.0,
+            "49": 2842349056.0,
+            "50": 2842349056.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 8.84804,
+            "2": 0.09127,
+            "3": 0.38568,
+            "4": 0.10516,
+            "5": 0.18187,
+            "6": 0.13288,
+            "7": 0.17979,
+            "8": 0.25055,
+            "9": 0.07376,
+            "10": 0.06396,
+            "11": 0.42421,
+            "12": 0.06524,
+            "13": 0.06447,
+            "14": 0.06499,
+            "15": 0.24593,
+            "16": 0.06277,
+            "17": 0.2443,
+            "18": 0.26141,
+            "19": 0.06388,
+            "20": 0.06319,
+            "21": 0.44504,
+            "22": 0.06309,
+            "23": 0.24094,
+            "24": 0.06366,
+            "25": 0.12615,
+            "26": 0.45347,
+            "27": 0.06454,
+            "28": 0.06518,
+            "29": 0.23896,
+            "30": 0.06569,
+            "31": 0.23519,
+            "32": 0.06271,
+            "33": 0.06599,
+            "34": 0.45696,
+            "35": 0.06614,
+            "36": 0.24275,
+            "37": 0.0626,
+            "38": 0.18028,
+            "39": 0.07237,
+            "40": 0.24435,
+            "41": 0.09656,
+            "42": 0.258,
+            "43": 0.09133,
+            "44": 0.09694,
+            "45": 0.11452,
+            "46": 0.08793,
+            "47": 0.24321,
+            "48": 0.08548,
+            "49": 0.0909,
+            "50": 0.16493
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_dev_dgx_h100.json
index 8f65ccec75e..bf7a46b3f3c 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_dev_dgx_h100.json
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 638724608.0,
-            "2": 638724608.0,
-            "3": 638724608.0,
-            "4": 638724608.0,
-            "5": 638724608.0,
-            "6": 638724608.0,
-            "7": 638724608.0,
-            "8": 638724608.0,
-            "9": 638724608.0,
-            "10": 638724608.0,
-            "11": 638724608.0,
-            "12": 638724608.0,
-            "13": 638724608.0,
-            "14": 638724608.0,
-            "15": 638724608.0,
-            "16": 638724608.0,
-            "17": 638724608.0,
-            "18": 638724608.0,
-            "19": 638724608.0,
-            "20": 638724608.0,
-            "21": 638724608.0,
-            "22": 638724608.0,
-            "23": 638724608.0,
-            "24": 638724608.0,
-            "25": 638724608.0,
-            "26": 638724608.0,
-            "27": 638724608.0,
-            "28": 638724608.0,
-            "29": 638724608.0,
-            "30": 638724608.0,
-            "31": 638724608.0,
-            "32": 638724608.0,
-            "33": 638724608.0,
-            "34": 638724608.0,
-            "35": 638724608.0,
-            "36": 638724608.0,
-            "37": 638724608.0,
-            "38": 638724608.0,
-            "39": 638724608.0,
-            "40": 638724608.0,
-            "41": 638724608.0,
-            "42": 638724608.0,
-            "43": 638724608.0,
-            "44": 638724608.0,
-            "45": 638724608.0,
-            "46": 638724608.0,
-            "47": 638724608.0,
-            "48": 638724608.0,
-            "49": 638724608.0,
-            "50": 638724608.0
+            "1": 640822784.0,
+            "2": 640822784.0,
+            "3": 640822784.0,
+            "4": 640822784.0,
+            "5": 640822784.0,
+            "6": 640822784.0,
+            "7": 640822784.0,
+            "8": 640822784.0,
+            "9": 640822784.0,
+            "10": 640822784.0,
+            "11": 640822784.0,
+            "12": 640822784.0,
+            "13": 640822784.0,
+            "14": 640822784.0,
+            "15": 640822784.0,
+            "16": 640822784.0,
+            "17": 640822784.0,
+            "18": 640822784.0,
+            "19": 640822784.0,
+            "20": 640822784.0,
+            "21": 640822784.0,
+            "22": 640822784.0,
+            "23": 640822784.0,
+            "24": 640822784.0,
+            "25": 640822784.0,
+            "26": 640822784.0,
+            "27": 640822784.0,
+            "28": 640822784.0,
+            "29": 640822784.0,
+            "30": 641740288.0,
+            "31": 640822784.0,
+            "32": 640822784.0,
+            "33": 640822784.0,
+            "34": 640822784.0,
+            "35": 640822784.0,
+            "36": 640822784.0,
+            "37": 640822784.0,
+            "38": 640822784.0,
+            "39": 640822784.0,
+            "40": 640822784.0,
+            "41": 640822784.0,
+            "42": 640822784.0,
+            "43": 640822784.0,
+            "44": 640822784.0,
+            "45": 640822784.0,
+            "46": 640822784.0,
+            "47": 640822784.0,
+            "48": 641740288.0,
+            "49": 640822784.0,
+            "50": 640822784.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 2610025984.0,
-            "2": 2840250880.0,
-            "3": 2840250880.0,
-            "4": 2840250880.0,
-            "5": 2840250880.0,
-            "6": 2840250880.0,
-            "7": 2840250880.0,
-            "8": 2840250880.0,
-            "9": 2840250880.0,
-            "10": 2840250880.0,
-            "11": 2840250880.0,
-            "12": 2840250880.0,
-            "13": 2840250880.0,
-            "14": 2840250880.0,
-            "15": 2840250880.0,
-            "16": 2840250880.0,
-            "17": 2840250880.0,
-            "18": 2840250880.0,
-            "19": 2840250880.0,
-            "20": 2840250880.0,
-            "21": 2840250880.0,
-            "22": 2840250880.0,
-            "23": 2840250880.0,
-            "24": 2840250880.0,
-            "25": 2840250880.0,
-            "26": 2840250880.0,
-            "27": 2840250880.0,
-            "28": 2840250880.0,
-            "29": 2840250880.0,
-            "30": 2840250880.0,
-            "31": 2840250880.0,
-            "32": 2840250880.0,
-            "33": 2840250880.0,
-            "34": 2840250880.0,
-            "35": 2840250880.0,
-            "36": 2840250880.0,
-            "37": 2840250880.0,
-            "38": 2840250880.0,
-            "39": 2840250880.0,
-            "40": 2840250880.0,
-            "41": 2840250880.0,
-            "42": 2840250880.0,
-            "43": 2840250880.0,
-            "44": 2840250880.0,
-            "45": 2840250880.0,
-            "46": 2840250880.0,
-            "47": 2840250880.0,
-            "48": 2840250880.0,
-            "49": 2840250880.0,
-            "50": 2840250880.0
+            "1": 2610027008.0,
+            "2": 2842349056.0,
+            "3": 2842349056.0,
+            "4": 2843266560.0,
+            "5": 2843266560.0,
+            "6": 2843266560.0,
+            "7": 2843266560.0,
+            "8": 2843266560.0,
+            "9": 2843266560.0,
+            "10": 2843266560.0,
+            "11": 2843266560.0,
+            "12": 2843266560.0,
+            "13": 2843266560.0,
+            "14": 2843266560.0,
+            "15": 2843266560.0,
+            "16": 2843266560.0,
+            "17": 2843266560.0,
+            "18": 2843266560.0,
+            "19": 2843266560.0,
+            "20": 2843266560.0,
+            "21": 2843266560.0,
+            "22": 2843266560.0,
+            "23": 2843266560.0,
+            "24": 2843266560.0,
+            "25": 2843266560.0,
+            "26": 2843266560.0,
+            "27": 2843266560.0,
+            "28": 2843266560.0,
+            "29": 2843266560.0,
+            "30": 2843266560.0,
+            "31": 2843266560.0,
+            "32": 2843266560.0,
+            "33": 2843266560.0,
+            "34": 2843266560.0,
+            "35": 2843266560.0,
+            "36": 2843266560.0,
+            "37": 2843266560.0,
+            "38": 2843266560.0,
+            "39": 2843266560.0,
+            "40": 2843266560.0,
+            "41": 2843266560.0,
+            "42": 2843266560.0,
+            "43": 2843266560.0,
+            "44": 2843266560.0,
+            "45": 2843266560.0,
+            "46": 2843266560.0,
+            "47": 2843266560.0,
+            "48": 2843266560.0,
+            "49": 2843266560.0,
+            "50": 2843266560.0
         }
     },
     "iteration-time": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 12.45868,
-            "2": 0.10817,
-            "3": 0.08964,
-            "4": 0.08342,
-            "5": 0.08198,
-            "6": 0.08179,
-            "7": 0.08172,
-            "8": 0.08319,
-            "9": 0.07964,
-            "10": 0.07872,
-            "11": 0.07783,
-            "12": 0.07839,
-            "13": 0.07961,
-            "14": 0.07913,
-            "15": 0.08021,
-            "16": 0.07965,
-            "17": 0.07946,
-            "18": 0.07924,
-            "19": 0.0792,
-            "20": 0.07919,
-            "21": 0.07872,
-            "22": 0.07958,
-            "23": 0.07857,
-            "24": 0.0793,
-            "25": 0.07936,
-            "26": 0.07956,
-            "27": 0.07904,
-            "28": 0.07939,
-            "29": 0.08007,
-            "30": 0.07912,
-            "31": 0.07945,
-            "32": 0.07845,
-            "33": 0.07804,
-            "34": 0.07801,
-            "35": 0.07775,
-            "36": 0.07835,
-            "37": 0.0781,
-            "38": 0.07939,
-            "39": 0.07789,
-            "40": 0.07803,
-            "41": 0.07935,
-            "42": 0.07838,
-            "43": 0.07862,
-            "44": 0.07884,
-            "45": 0.07747,
-            "46": 0.07832,
-            "47": 0.07792,
-            "48": 0.07896,
-            "49": 0.07798,
-            "50": 0.0779
+            "1": 11.63091,
+            "2": 0.10057,
+            "3": 0.08189,
+            "4": 0.05797,
+            "5": 0.05721,
+            "6": 0.05698,
+            "7": 0.05706,
+            "8": 0.05717,
+            "9": 0.05757,
+            "10": 0.05769,
+            "11": 0.05657,
+            "12": 0.05708,
+            "13": 0.05676,
+            "14": 0.05712,
+            "15": 0.05745,
+            "16": 0.05704,
+            "17": 0.05756,
+            "18": 0.05699,
+            "19": 0.05682,
+            "20": 0.05715,
+            "21": 0.0569,
+            "22": 0.05766,
+            "23": 0.0572,
+            "24": 0.05719,
+            "25": 0.05674,
+            "26": 0.05685,
+            "27": 0.05682,
+            "28": 0.05657,
+            "29": 0.0565,
+            "30": 0.05693,
+            "31": 0.05726,
+            "32": 0.05673,
+            "33": 0.05675,
+            "34": 0.05664,
+            "35": 0.05717,
+            "36": 0.05653,
+            "37": 0.05652,
+            "38": 0.05671,
+            "39": 0.05659,
+            "40": 0.05731,
+            "41": 0.05949,
+            "42": 0.05669,
+            "43": 0.05723,
+            "44": 0.05695,
+            "45": 0.05766,
+            "46": 0.05736,
+            "47": 0.05802,
+            "48": 0.05662,
+            "49": 0.05689,
+            "50": 0.05838
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_lts_dgx_a100.json
index 44d53d6e9d6..7995900ad8f 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_lts_dgx_a100.json
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 609364480.0,
-            "2": 609364480.0,
-            "3": 609364480.0,
-            "4": 609364480.0,
-            "5": 609364480.0,
-            "6": 609364480.0,
-            "7": 609364480.0,
-            "8": 609364480.0,
-            "9": 609364480.0,
-            "10": 609364480.0,
-            "11": 609364480.0,
-            "12": 609364480.0,
-            "13": 609364480.0,
-            "14": 609364480.0,
-            "15": 609364480.0,
-            "16": 609364480.0,
-            "17": 609364480.0,
-            "18": 609364480.0,
-            "19": 609364480.0,
-            "20": 609364480.0,
-            "21": 609364480.0,
-            "22": 609364480.0,
-            "23": 609364480.0,
-            "24": 609364480.0,
-            "25": 609364480.0,
-            "26": 609364480.0,
-            "27": 609364480.0,
-            "28": 609364480.0,
-            "29": 609364480.0,
-            "30": 609364480.0,
-            "31": 609364480.0,
-            "32": 609364480.0,
-            "33": 609364480.0,
-            "34": 609364480.0,
-            "35": 609364480.0,
-            "36": 609364480.0,
-            "37": 609364480.0,
-            "38": 609364480.0,
-            "39": 609364480.0,
-            "40": 609364480.0,
-            "41": 609364480.0,
-            "42": 609364480.0,
-            "43": 609364480.0,
-            "44": 609364480.0,
-            "45": 609364480.0,
-            "46": 609364480.0,
-            "47": 609364480.0,
-            "48": 609364480.0,
-            "49": 609364480.0,
-            "50": 609364480.0
+            "1": 611461632.0,
+            "2": 611461632.0,
+            "3": 611461632.0,
+            "4": 611461632.0,
+            "5": 611461632.0,
+            "6": 611461632.0,
+            "7": 611461632.0,
+            "8": 611461632.0,
+            "9": 611461632.0,
+            "10": 611461632.0,
+            "11": 611461632.0,
+            "12": 611461632.0,
+            "13": 611461632.0,
+            "14": 611461632.0,
+            "15": 611461632.0,
+            "16": 611461632.0,
+            "17": 611461632.0,
+            "18": 611461632.0,
+            "19": 611461632.0,
+            "20": 611461632.0,
+            "21": 611461632.0,
+            "22": 611461632.0,
+            "23": 611461632.0,
+            "24": 611461632.0,
+            "25": 611461632.0,
+            "26": 611461632.0,
+            "27": 611461632.0,
+            "28": 611461632.0,
+            "29": 611461632.0,
+            "30": 611461632.0,
+            "31": 611461632.0,
+            "32": 611461632.0,
+            "33": 611461632.0,
+            "34": 611461632.0,
+            "35": 611461632.0,
+            "36": 611461632.0,
+            "37": 611461632.0,
+            "38": 611461632.0,
+            "39": 611461632.0,
+            "40": 611461632.0,
+            "41": 611461632.0,
+            "42": 611461632.0,
+            "43": 611461632.0,
+            "44": 611461632.0,
+            "45": 611461632.0,
+            "46": 611461632.0,
+            "47": 611461632.0,
+            "48": 611461632.0,
+            "49": 611461632.0,
+            "50": 611461632.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -176,55 +176,55 @@
         "step_interval": 1,
         "values": {
             "1": 2580665856.0,
-            "2": 2810890752.0,
-            "3": 2811808256.0,
-            "4": 2811808256.0,
-            "5": 2811808256.0,
-            "6": 2811808256.0,
-            "7": 2811808256.0,
-            "8": 2811808256.0,
-            "9": 2811808256.0,
-            "10": 2811808256.0,
-            "11": 2811808256.0,
-            "12": 2811808256.0,
-            "13": 2811808256.0,
-            "14": 2811808256.0,
-            "15": 2811808256.0,
-            "16": 2811808256.0,
-            "17": 2811808256.0,
-            "18": 2811808256.0,
-            "19": 2811808256.0,
-            "20": 2811808256.0,
-            "21": 2811808256.0,
-            "22": 2811808256.0,
-            "23": 2811808256.0,
-            "24": 2811808256.0,
-            "25": 2811808256.0,
-            "26": 2811808256.0,
-            "27": 2811808256.0,
-            "28": 2811808256.0,
-            "29": 2811808256.0,
-            "30": 2811808256.0,
-            "31": 2811808256.0,
-            "32": 2811808256.0,
-            "33": 2811808256.0,
-            "34": 2811808256.0,
-            "35": 2811808256.0,
-            "36": 2811808256.0,
-            "37": 2811808256.0,
-            "38": 2811808256.0,
-            "39": 2811808256.0,
-            "40": 2811808256.0,
-            "41": 2811808256.0,
-            "42": 2811808256.0,
-            "43": 2811808256.0,
-            "44": 2811808256.0,
-            "45": 2811808256.0,
-            "46": 2811808256.0,
-            "47": 2811808256.0,
-            "48": 2811808256.0,
-            "49": 2811808256.0,
-            "50": 2811808256.0
+            "2": 2812987904.0,
+            "3": 2812987904.0,
+            "4": 2812987904.0,
+            "5": 2812987904.0,
+            "6": 2812987904.0,
+            "7": 2812987904.0,
+            "8": 2812987904.0,
+            "9": 2812987904.0,
+            "10": 2812987904.0,
+            "11": 2812987904.0,
+            "12": 2812987904.0,
+            "13": 2812987904.0,
+            "14": 2812987904.0,
+            "15": 2812987904.0,
+            "16": 2812987904.0,
+            "17": 2812987904.0,
+            "18": 2812987904.0,
+            "19": 2812987904.0,
+            "20": 2812987904.0,
+            "21": 2812987904.0,
+            "22": 2812987904.0,
+            "23": 2812987904.0,
+            "24": 2812987904.0,
+            "25": 2812987904.0,
+            "26": 2812987904.0,
+            "27": 2812987904.0,
+            "28": 2812987904.0,
+            "29": 2812987904.0,
+            "30": 2812987904.0,
+            "31": 2812987904.0,
+            "32": 2812987904.0,
+            "33": 2812987904.0,
+            "34": 2812987904.0,
+            "35": 2812987904.0,
+            "36": 2812987904.0,
+            "37": 2812987904.0,
+            "38": 2812987904.0,
+            "39": 2812987904.0,
+            "40": 2812987904.0,
+            "41": 2812987904.0,
+            "42": 2812987904.0,
+            "43": 2812987904.0,
+            "44": 2812987904.0,
+            "45": 2812987904.0,
+            "46": 2812987904.0,
+            "47": 2812987904.0,
+            "48": 2812987904.0,
+            "49": 2812987904.0,
+            "50": 2812987904.0
         }
     },
     "iteration-time": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 9.118,
-            "2": 0.12375,
-            "3": 0.31133,
-            "4": 0.09209,
-            "5": 0.09124,
-            "6": 0.09155,
-            "7": 0.09163,
-            "8": 0.0915,
-            "9": 0.09161,
-            "10": 0.09407,
-            "11": 0.09038,
-            "12": 0.09031,
-            "13": 0.09069,
-            "14": 0.09024,
-            "15": 0.09043,
-            "16": 0.08996,
-            "17": 0.09133,
-            "18": 0.09072,
-            "19": 0.09048,
-            "20": 0.09016,
-            "21": 0.09061,
-            "22": 0.09073,
-            "23": 0.09098,
-            "24": 0.09135,
-            "25": 0.09235,
-            "26": 0.09059,
-            "27": 0.09009,
-            "28": 0.09049,
-            "29": 0.09147,
-            "30": 0.09097,
-            "31": 0.09098,
-            "32": 0.09045,
-            "33": 0.09082,
-            "34": 0.08994,
-            "35": 0.09054,
-            "36": 0.09124,
-            "37": 0.09063,
-            "38": 0.08989,
-            "39": 0.09234,
-            "40": 0.09165,
-            "41": 0.09179,
-            "42": 0.09165,
-            "43": 0.09235,
-            "44": 0.09147,
-            "45": 0.0922,
-            "46": 0.09192,
-            "47": 0.09138,
-            "48": 0.09278,
-            "49": 0.09145,
-            "50": 0.09175
+            "1": 5.29488,
+            "2": 0.12291,
+            "3": 0.10694,
+            "4": 0.09161,
+            "5": 0.09138,
+            "6": 0.09229,
+            "7": 0.09025,
+            "8": 0.08872,
+            "9": 0.08988,
+            "10": 0.08934,
+            "11": 0.08865,
+            "12": 0.08864,
+            "13": 0.08947,
+            "14": 0.08897,
+            "15": 0.08938,
+            "16": 0.08885,
+            "17": 0.08914,
+            "18": 0.08802,
+            "19": 0.08997,
+            "20": 0.08786,
+            "21": 0.08941,
+            "22": 0.08893,
+            "23": 0.08869,
+            "24": 0.08862,
+            "25": 0.08883,
+            "26": 0.08857,
+            "27": 0.08808,
+            "28": 0.088,
+            "29": 0.08839,
+            "30": 0.088,
+            "31": 0.08888,
+            "32": 0.08825,
+            "33": 0.08778,
+            "34": 0.08749,
+            "35": 0.0885,
+            "36": 0.08731,
+            "37": 0.08765,
+            "38": 0.08815,
+            "39": 0.08808,
+            "40": 0.08731,
+            "41": 0.08911,
+            "42": 0.08759,
+            "43": 0.08898,
+            "44": 0.08797,
+            "45": 0.08803,
+            "46": 0.08736,
+            "47": 0.08757,
+            "48": 0.0873,
+            "49": 0.08751,
+            "50": 0.08746
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_gdn/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_gdn/model_config.yaml
index 37933a0e0a7..ee2c093e0ab 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_gdn/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_gdn/model_config.yaml
@@ -17,8 +17,8 @@ MODEL_ARGS:
   --rotary-percent: 0.5
   --no-rope-fusion: true #TODO: We can remove this once upgrading to the DEV container
   --apply-layernorm-1p: true
-  --attention-output-gate: true
   --apply-wd-to-qk-layernorm: true
+  --attention-output-gate: true
   --experimental-attention-variant: gated_delta_net
   --linear-attention-freq: 3
   --linear-conv-kernel-dim: 4
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgx_gb200.json
new file mode 100644
index 00000000000..2de96fdc0a6
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgx_gb200.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.87037,
+            "2": 10.87119,
+            "3": 10.84722,
+            "4": 10.83185,
+            "5": 10.86876,
+            "6": 10.88753,
+            "7": 10.86095,
+            "8": 10.86864,
+            "9": 10.85424,
+            "10": 10.82319,
+            "11": 10.86739,
+            "12": 10.8666,
+            "13": 10.88538,
+            "14": 10.88994,
+            "15": 10.81366,
+            "16": 10.80332,
+            "17": 10.77723,
+            "18": 10.81063,
+            "19": 10.80524,
+            "20": 10.70339,
+            "21": 10.67012,
+            "22": 10.51209,
+            "23": 10.69985,
+            "24": 10.56044,
+            "25": 10.49857,
+            "26": 10.57872,
+            "27": 10.56749,
+            "28": 10.53108,
+            "29": 10.55838,
+            "30": 10.32727,
+            "31": 10.04391,
+            "32": 10.42571,
+            "33": 10.4193,
+            "34": 10.15675,
+            "35": 10.21897,
+            "36": 10.16206,
+            "37": 10.29722,
+            "38": 10.13231,
+            "39": 10.35956,
+            "40": 10.02296,
+            "41": 10.06592,
+            "42": 10.15518,
+            "43": 9.75609,
+            "44": 9.86983,
+            "45": 9.75094,
+            "46": 9.73598,
+            "47": 10.0747,
+            "48": 9.77504,
+            "49": 9.43418,
+            "50": 9.84339,
+            "51": 9.78577,
+            "52": 9.6708,
+            "53": 10.00723,
+            "54": 9.89701,
+            "55": 9.82612,
+            "56": 9.54829,
+            "57": 9.40077,
+            "58": 9.77422,
+            "59": 9.51686,
+            "60": 9.42721,
+            "61": 9.63408,
+            "62": 9.93879,
+            "63": 9.30503,
+            "64": 9.71266,
+            "65": 8.86836,
+            "66": 9.64474,
+            "67": 9.31349,
+            "68": 9.73443,
+            "69": 9.755,
+            "70": 9.68613,
+            "71": 9.57703,
+            "72": 9.53066,
+            "73": 9.43092,
+            "74": 8.8548,
+            "75": 9.35819,
+            "76": 9.01448,
+            "77": 10.0265,
+            "78": 9.68108,
+            "79": 9.33349,
+            "80": 9.35488,
+            "81": 9.44135,
+            "82": 9.66188,
+            "83": 9.26313,
+            "84": 9.37185,
+            "85": 9.57429,
+            "86": 9.03444,
+            "87": 9.56188,
+            "88": 9.71281,
+            "89": 9.55802,
+            "90": 9.79197,
+            "91": 9.29019,
+            "92": 9.31615,
+            "93": 9.04052,
+            "94": 8.78281,
+            "95": 9.49395,
+            "96": 9.48884,
+            "97": 9.26046,
+            "98": 9.63128,
+            "99": 8.85093,
+            "100": 9.36489
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 623.0,
+            "2": 605.0,
+            "3": 617.0,
+            "4": 657.0,
+            "5": 652.0,
+            "6": 662.0,
+            "7": 595.0,
+            "8": 626.0,
+            "9": 683.0,
+            "10": 550.0,
+            "11": 658.0,
+            "12": 657.0,
+            "13": 701.0,
+            "14": 668.0,
+            "15": 659.0,
+            "16": 692.0,
+            "17": 640.0,
+            "18": 627.0,
+            "19": 633.0,
+            "20": 601.0,
+            "21": 632.0,
+            "22": 637.0,
+            "23": 718.0,
+            "24": 623.0,
+            "25": 612.0,
+            "26": 689.0,
+            "27": 678.0,
+            "28": 717.0,
+            "29": 715.0,
+            "30": 670.0,
+            "31": 627.0,
+            "32": 718.0,
+            "33": 850.0,
+            "34": 658.0,
+            "35": 721.0,
+            "36": 764.0,
+            "37": 859.0,
+            "38": 733.0,
+            "39": 851.0,
+            "40": 766.0,
+            "41": 863.0,
+            "42": 839.0,
+            "43": 732.0,
+            "44": 870.0,
+            "45": 737.0,
+            "46": 913.0,
+            "47": 911.0,
+            "48": 832.0,
+            "49": 825.0,
+            "50": 827.0,
+            "51": 914.0,
+            "52": 900.0,
+            "53": 989.0,
+            "54": 1021.0,
+            "55": 874.0,
+            "56": 985.0,
+            "57": 841.0,
+            "58": 938.0,
+            "59": 1035.0,
+            "60": 876.0,
+            "61": 1044.0,
+            "62": 982.0,
+            "63": 976.0,
+            "64": 1071.0,
+            "65": 1026.0,
+            "66": 994.0,
+            "67": 961.0,
+            "68": 1084.0,
+            "69": 1108.0,
+            "70": 1081.0,
+            "71": 1069.0,
+            "72": 931.0,
+            "73": 984.0,
+            "74": 770.0,
+            "75": 914.0,
+            "76": 1050.0,
+            "77": 1196.0,
+            "78": 1128.0,
+            "79": 1048.0,
+            "80": 1147.0,
+            "81": 1175.0,
+            "82": 1112.0,
+            "83": 988.0,
+            "84": 1099.0,
+            "85": 1133.0,
+            "86": 875.0,
+            "87": 1189.0,
+            "88": 1114.0,
+            "89": 1101.0,
+            "90": 1124.0,
+            "91": 1079.0,
+            "92": 1114.0,
+            "93": 937.0,
+            "94": 1106.0,
+            "95": 1097.0,
+            "96": 1178.0,
+            "97": 1103.0,
+            "98": 1260.0,
+            "99": 1105.0,
+            "100": 1131.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 637976064.0,
+            "2": 637976064.0,
+            "3": 637976064.0,
+            "4": 637976064.0,
+            "5": 637976064.0,
+            "6": 637976064.0,
+            "7": 637976064.0,
+            "8": 637976064.0,
+            "9": 637976064.0,
+            "10": 637976064.0,
+            "11": 637976064.0,
+            "12": 637976064.0,
+            "13": 637976064.0,
+            "14": 637976064.0,
+            "15": 637976064.0,
+            "16": 637976064.0,
+            "17": 637976064.0,
+            "18": 637976064.0,
+            "19": 637976064.0,
+            "20": 637976064.0,
+            "21": 637976064.0,
+            "22": 637976064.0,
+            "23": 637976064.0,
+            "24": 637976064.0,
+            "25": 637976064.0,
+            "26": 637976064.0,
+            "27": 637976064.0,
+            "28": 637976064.0,
+            "29": 637976064.0,
+            "30": 637976064.0,
+            "31": 637976064.0,
+            "32": 637976064.0,
+            "33": 637976064.0,
+            "34": 637976064.0,
+            "35": 637976064.0,
+            "36": 637976064.0,
+            "37": 637976064.0,
+            "38": 637976064.0,
+            "39": 637976064.0,
+            "40": 637976064.0,
+            "41": 637976064.0,
+            "42": 637976064.0,
+            "43": 637976064.0,
+            "44": 637976064.0,
+            "45": 637976064.0,
+            "46": 637976064.0,
+            "47": 637976064.0,
+            "48": 637976064.0,
+            "49": 637976064.0,
+            "50": 637976064.0,
+            "51": 637976064.0,
+            "52": 637976064.0,
+            "53": 637976064.0,
+            "54": 637976064.0,
+            "55": 637976064.0,
+            "56": 637976064.0,
+            "57": 637976064.0,
+            "58": 637976064.0,
+            "59": 637976064.0,
+            "60": 637976064.0,
+            "61": 637976064.0,
+            "62": 637976064.0,
+            "63": 637976064.0,
+            "64": 637976064.0,
+            "65": 637976064.0,
+            "66": 637976064.0,
+            "67": 637976064.0,
+            "68": 637976064.0,
+            "69": 637976064.0,
+            "70": 637976064.0,
+            "71": 637976064.0,
+            "72": 637976064.0,
+            "73": 637976064.0,
+            "74": 637976064.0,
+            "75": 637976064.0,
+            "76": 637976064.0,
+            "77": 637976064.0,
+            "78": 637976064.0,
+            "79": 637976064.0,
+            "80": 637976064.0,
+            "81": 637976064.0,
+            "82": 637976064.0,
+            "83": 637976064.0,
+            "84": 637976064.0,
+            "85": 637976064.0,
+            "86": 637976064.0,
+            "87": 637976064.0,
+            "88": 637976064.0,
+            "89": 637976064.0,
+            "90": 637976064.0,
+            "91": 637976064.0,
+            "92": 637976064.0,
+            "93": 637976064.0,
+            "94": 637976064.0,
+            "95": 637976064.0,
+            "96": 637976064.0,
+            "97": 637976064.0,
+            "98": 637976064.0,
+            "99": 637976064.0,
+            "100": 637976064.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 908535808.0,
+            "2": 1167747584.0,
+            "3": 1168796160.0,
+            "4": 1168796160.0,
+            "5": 1179675136.0,
+            "6": 1179675136.0,
+            "7": 1179675136.0,
+            "8": 1179675136.0,
+            "9": 1179675136.0,
+            "10": 1179675136.0,
+            "11": 1179675136.0,
+            "12": 1179675136.0,
+            "13": 1179675136.0,
+            "14": 1179675136.0,
+            "15": 1179675136.0,
+            "16": 1179675136.0,
+            "17": 1180330496.0,
+            "18": 1180330496.0,
+            "19": 1180330496.0,
+            "20": 1180330496.0,
+            "21": 1180330496.0,
+            "22": 1180330496.0,
+            "23": 1180330496.0,
+            "24": 1180330496.0,
+            "25": 1180330496.0,
+            "26": 1180330496.0,
+            "27": 1180330496.0,
+            "28": 1180330496.0,
+            "29": 1180330496.0,
+            "30": 1180330496.0,
+            "31": 1180330496.0,
+            "32": 1180330496.0,
+            "33": 1180330496.0,
+            "34": 1180330496.0,
+            "35": 1180330496.0,
+            "36": 1180330496.0,
+            "37": 1180330496.0,
+            "38": 1180330496.0,
+            "39": 1180330496.0,
+            "40": 1180330496.0,
+            "41": 1180330496.0,
+            "42": 1180330496.0,
+            "43": 1180330496.0,
+            "44": 1180330496.0,
+            "45": 1180330496.0,
+            "46": 1180330496.0,
+            "47": 1180330496.0,
+            "48": 1180330496.0,
+            "49": 1180330496.0,
+            "50": 1180330496.0,
+            "51": 1180330496.0,
+            "52": 1180330496.0,
+            "53": 1180330496.0,
+            "54": 1180330496.0,
+            "55": 1180330496.0,
+            "56": 1180330496.0,
+            "57": 1180330496.0,
+            "58": 1180330496.0,
+            "59": 1180330496.0,
+            "60": 1180330496.0,
+            "61": 1180330496.0,
+            "62": 1180330496.0,
+            "63": 1180330496.0,
+            "64": 1180330496.0,
+            "65": 1180330496.0,
+            "66": 1180330496.0,
+            "67": 1180330496.0,
+            "68": 1180330496.0,
+            "69": 1180330496.0,
+            "70": 1180330496.0,
+            "71": 1180330496.0,
+            "72": 1180330496.0,
+            "73": 1180330496.0,
+            "74": 1180330496.0,
+            "75": 1180330496.0,
+            "76": 1180330496.0,
+            "77": 1180330496.0,
+            "78": 1180330496.0,
+            "79": 1180330496.0,
+            "80": 1180330496.0,
+            "81": 1180330496.0,
+            "82": 1180330496.0,
+            "83": 1180330496.0,
+            "84": 1180330496.0,
+            "85": 1180330496.0,
+            "86": 1180330496.0,
+            "87": 1180330496.0,
+            "88": 1180330496.0,
+            "89": 1180330496.0,
+            "90": 1180330496.0,
+            "91": 1180330496.0,
+            "92": 1180330496.0,
+            "93": 1180330496.0,
+            "94": 1180330496.0,
+            "95": 1180330496.0,
+            "96": 1180330496.0,
+            "97": 1180330496.0,
+            "98": 1180330496.0,
+            "99": 1180330496.0,
+            "100": 1180330496.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 9.2723,
+            "2": 0.3877,
+            "3": 0.37645,
+            "4": 0.36551,
+            "5": 0.37045,
+            "6": 0.36893,
+            "7": 0.36938,
+            "8": 0.36753,
+            "9": 0.36888,
+            "10": 0.43135,
+            "11": 0.36252,
+            "12": 0.37084,
+            "13": 0.365,
+            "14": 0.36033,
+            "15": 0.35887,
+            "16": 0.36201,
+            "17": 0.53142,
+            "18": 0.36699,
+            "19": 0.36318,
+            "20": 0.36321,
+            "21": 0.36209,
+            "22": 0.72283,
+            "23": 0.3641,
+            "24": 0.36359,
+            "25": 0.36227,
+            "26": 0.36731,
+            "27": 0.36879,
+            "28": 0.36963,
+            "29": 0.37051,
+            "30": 0.36794,
+            "31": 0.37079,
+            "32": 0.368,
+            "33": 0.44096,
+            "34": 0.52072,
+            "35": 0.48704,
+            "36": 0.4152,
+            "37": 0.37792,
+            "38": 0.37304,
+            "39": 0.37505,
+            "40": 0.37438,
+            "41": 0.3737,
+            "42": 0.37569,
+            "43": 0.37181,
+            "44": 0.37336,
+            "45": 0.3731,
+            "46": 0.37229,
+            "47": 0.37374,
+            "48": 0.37375,
+            "49": 0.3719,
+            "50": 0.37298,
+            "51": 0.3797,
+            "52": 0.36304,
+            "53": 0.36729,
+            "54": 0.36756,
+            "55": 0.37134,
+            "56": 0.37139,
+            "57": 0.37112,
+            "58": 0.38383,
+            "59": 0.3916,
+            "60": 0.37403,
+            "61": 0.37341,
+            "62": 0.37078,
+            "63": 0.37095,
+            "64": 0.37149,
+            "65": 0.37269,
+            "66": 0.3736,
+            "67": 0.37255,
+            "68": 0.36695,
+            "69": 0.37351,
+            "70": 0.37443,
+            "71": 0.3726,
+            "72": 0.3731,
+            "73": 0.37353,
+            "74": 0.3737,
+            "75": 0.373,
+            "76": 0.36094,
+            "77": 0.36374,
+            "78": 0.36366,
+            "79": 0.36446,
+            "80": 0.36414,
+            "81": 0.36245,
+            "82": 0.3641,
+            "83": 0.3627,
+            "84": 0.36487,
+            "85": 0.36027,
+            "86": 0.3602,
+            "87": 0.3611,
+            "88": 0.36555,
+            "89": 0.36571,
+            "90": 0.36479,
+            "91": 0.36175,
+            "92": 0.36215,
+            "93": 0.36421,
+            "94": 0.36147,
+            "95": 0.36348,
+            "96": 0.36311,
+            "97": 0.36282,
+            "98": 0.38328,
+            "99": 0.40994,
+            "100": 0.36791
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgx_h100.json
index e88d1fcb739..fcabeb878a2 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgx_h100.json
@@ -4,106 +4,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 10.85163,
-            "2": 10.85389,
+            "1": 10.85166,
+            "2": 10.85388,
             "3": 10.83866,
             "4": 10.84328,
-            "5": 10.8787,
-            "6": 10.87586,
-            "7": 10.86186,
-            "8": 10.84928,
-            "9": 10.84877,
-            "10": 10.80639,
-            "11": 10.88679,
-            "12": 10.85682,
-            "13": 10.86235,
-            "14": 10.87768,
-            "15": 10.81037,
-            "16": 10.81984,
-            "17": 10.7828,
-            "18": 10.80322,
-            "19": 10.78358,
-            "20": 10.68694,
-            "21": 10.66905,
-            "22": 10.52315,
-            "23": 10.68436,
-            "24": 10.56577,
-            "25": 10.49705,
+            "5": 10.87866,
+            "6": 10.87587,
+            "7": 10.86182,
+            "8": 10.84929,
+            "9": 10.84878,
+            "10": 10.80638,
+            "11": 10.88681,
+            "12": 10.85678,
+            "13": 10.86232,
+            "14": 10.87763,
+            "15": 10.81038,
+            "16": 10.81986,
+            "17": 10.78278,
+            "18": 10.80323,
+            "19": 10.78355,
+            "20": 10.68693,
+            "21": 10.66908,
+            "22": 10.52312,
+            "23": 10.68433,
+            "24": 10.56579,
+            "25": 10.49704,
             "26": 10.56553,
-            "27": 10.58171,
+            "27": 10.58173,
             "28": 10.52995,
             "29": 10.55561,
-            "30": 10.32672,
-            "31": 10.07636,
-            "32": 10.43058,
-            "33": 10.42455,
-            "34": 10.16647,
-            "35": 10.22486,
-            "36": 10.18341,
-            "37": 10.29956,
-            "38": 10.14498,
-            "39": 10.37061,
-            "40": 10.04385,
-            "41": 10.0945,
-            "42": 10.17381,
-            "43": 9.77538,
-            "44": 9.90308,
-            "45": 9.779,
-            "46": 9.76548,
-            "47": 10.10723,
-            "48": 9.80029,
-            "49": 9.47526,
-            "50": 9.85792,
-            "51": 9.80039,
-            "52": 9.69506,
-            "53": 10.0285,
-            "54": 9.9143,
-            "55": 9.83807,
+            "30": 10.32669,
+            "31": 10.07637,
+            "32": 10.43055,
+            "33": 10.42453,
+            "34": 10.1665,
+            "35": 10.22484,
+            "36": 10.18342,
+            "37": 10.29954,
+            "38": 10.14501,
+            "39": 10.37065,
+            "40": 10.04387,
+            "41": 10.09449,
+            "42": 10.17379,
+            "43": 9.77531,
+            "44": 9.9031,
+            "45": 9.77897,
+            "46": 9.7655,
+            "47": 10.10719,
+            "48": 9.80026,
+            "49": 9.47522,
+            "50": 9.85791,
+            "51": 9.80035,
+            "52": 9.69511,
+            "53": 10.02853,
+            "54": 9.91431,
+            "55": 9.83806,
             "56": 9.57833,
-            "57": 9.42582,
+            "57": 9.42585,
             "58": 9.79172,
-            "59": 9.53617,
-            "60": 9.44186,
-            "61": 9.65656,
-            "62": 9.94377,
-            "63": 9.32151,
-            "64": 9.73339,
-            "65": 8.88427,
-            "66": 9.65533,
-            "67": 9.32106,
-            "68": 9.75064,
-            "69": 9.764,
-            "70": 9.70469,
-            "71": 9.56861,
-            "72": 9.53902,
+            "59": 9.53621,
+            "60": 9.44189,
+            "61": 9.65658,
+            "62": 9.94379,
+            "63": 9.3214,
+            "64": 9.73336,
+            "65": 8.88432,
+            "66": 9.65534,
+            "67": 9.32102,
+            "68": 9.75059,
+            "69": 9.76397,
+            "70": 9.70471,
+            "71": 9.56854,
+            "72": 9.53904,
             "73": 9.45226,
-            "74": 8.87736,
-            "75": 9.37933,
+            "74": 8.87739,
+            "75": 9.37931,
             "76": 9.01867,
             "77": 10.03519,
-            "78": 9.69263,
-            "79": 9.33459,
-            "80": 9.36591,
-            "81": 9.43919,
-            "82": 9.66572,
-            "83": 9.25441,
-            "84": 9.378,
-            "85": 9.57422,
-            "86": 9.03277,
+            "78": 9.69265,
+            "79": 9.33455,
+            "80": 9.36593,
+            "81": 9.4392,
+            "82": 9.66573,
+            "83": 9.25449,
+            "84": 9.37805,
+            "85": 9.57423,
+            "86": 9.03275,
             "87": 9.55775,
             "88": 9.71521,
-            "89": 9.55703,
-            "90": 9.788,
-            "91": 9.29518,
-            "92": 9.31516,
-            "93": 9.03246,
-            "94": 8.79087,
-            "95": 9.48833,
-            "96": 9.49574,
-            "97": 9.2713,
-            "98": 9.64071,
-            "99": 8.84741,
-            "100": 9.35871
+            "89": 9.55701,
+            "90": 9.78806,
+            "91": 9.29516,
+            "92": 9.31513,
+            "93": 9.03243,
+            "94": 8.79086,
+            "95": 9.48838,
+            "96": 9.49572,
+            "97": 9.27133,
+            "98": 9.6407,
+            "99": 8.84739,
+            "100": 9.35873
         }
     },
     "num-zeros": {
@@ -111,106 +111,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 600.0,
-            "2": 574.0,
-            "3": 677.0,
-            "4": 617.0,
-            "5": 669.0,
-            "6": 650.0,
-            "7": 700.0,
+            "1": 615.0,
+            "2": 567.0,
+            "3": 639.0,
+            "4": 642.0,
+            "5": 662.0,
+            "6": 700.0,
+            "7": 710.0,
             "8": 624.0,
-            "9": 649.0,
-            "10": 562.0,
-            "11": 661.0,
-            "12": 622.0,
-            "13": 711.0,
-            "14": 656.0,
-            "15": 688.0,
-            "16": 667.0,
-            "17": 696.0,
-            "18": 660.0,
-            "19": 607.0,
-            "20": 649.0,
-            "21": 646.0,
-            "22": 653.0,
-            "23": 743.0,
-            "24": 678.0,
-            "25": 663.0,
-            "26": 661.0,
-            "27": 703.0,
-            "28": 769.0,
-            "29": 775.0,
-            "30": 767.0,
-            "31": 606.0,
-            "32": 755.0,
-            "33": 764.0,
-            "34": 676.0,
-            "35": 779.0,
-            "36": 768.0,
-            "37": 824.0,
-            "38": 808.0,
-            "39": 893.0,
-            "40": 795.0,
-            "41": 774.0,
-            "42": 895.0,
-            "43": 758.0,
-            "44": 770.0,
-            "45": 738.0,
+            "9": 630.0,
+            "10": 524.0,
+            "11": 720.0,
+            "12": 664.0,
+            "13": 674.0,
+            "14": 680.0,
+            "15": 695.0,
+            "16": 700.0,
+            "17": 670.0,
+            "18": 690.0,
+            "19": 632.0,
+            "20": 640.0,
+            "21": 656.0,
+            "22": 647.0,
+            "23": 731.0,
+            "24": 647.0,
+            "25": 628.0,
+            "26": 651.0,
+            "27": 673.0,
+            "28": 758.0,
+            "29": 784.0,
+            "30": 718.0,
+            "31": 564.0,
+            "32": 765.0,
+            "33": 817.0,
+            "34": 703.0,
+            "35": 705.0,
+            "36": 759.0,
+            "37": 812.0,
+            "38": 826.0,
+            "39": 849.0,
+            "40": 827.0,
+            "41": 819.0,
+            "42": 845.0,
+            "43": 716.0,
+            "44": 759.0,
+            "45": 727.0,
             "46": 856.0,
-            "47": 912.0,
-            "48": 843.0,
-            "49": 884.0,
-            "50": 782.0,
-            "51": 967.0,
-            "52": 940.0,
-            "53": 988.0,
-            "54": 937.0,
-            "55": 870.0,
-            "56": 981.0,
-            "57": 838.0,
-            "58": 909.0,
-            "59": 969.0,
-            "60": 821.0,
-            "61": 1016.0,
-            "62": 953.0,
-            "63": 895.0,
-            "64": 1137.0,
-            "65": 917.0,
-            "66": 1050.0,
-            "67": 946.0,
-            "68": 974.0,
-            "69": 1091.0,
-            "70": 1024.0,
-            "71": 1104.0,
-            "72": 888.0,
-            "73": 967.0,
-            "74": 657.0,
-            "75": 879.0,
-            "76": 977.0,
-            "77": 1172.0,
-            "78": 1085.0,
-            "79": 1107.0,
-            "80": 1178.0,
-            "81": 1236.0,
-            "82": 1103.0,
-            "83": 975.0,
-            "84": 1164.0,
-            "85": 1160.0,
-            "86": 879.0,
-            "87": 1184.0,
-            "88": 1102.0,
-            "89": 1105.0,
-            "90": 1122.0,
-            "91": 1065.0,
-            "92": 1090.0,
-            "93": 848.0,
-            "94": 1158.0,
-            "95": 1173.0,
-            "96": 1140.0,
-            "97": 1074.0,
-            "98": 1203.0,
-            "99": 1141.0,
-            "100": 1111.0
+            "47": 962.0,
+            "48": 827.0,
+            "49": 873.0,
+            "50": 804.0,
+            "51": 908.0,
+            "52": 927.0,
+            "53": 989.0,
+            "54": 941.0,
+            "55": 852.0,
+            "56": 912.0,
+            "57": 880.0,
+            "58": 952.0,
+            "59": 984.0,
+            "60": 801.0,
+            "61": 1030.0,
+            "62": 918.0,
+            "63": 910.0,
+            "64": 1061.0,
+            "65": 982.0,
+            "66": 1062.0,
+            "67": 964.0,
+            "68": 973.0,
+            "69": 1075.0,
+            "70": 1008.0,
+            "71": 1050.0,
+            "72": 918.0,
+            "73": 992.0,
+            "74": 677.0,
+            "75": 907.0,
+            "76": 1055.0,
+            "77": 1107.0,
+            "78": 1134.0,
+            "79": 1049.0,
+            "80": 1086.0,
+            "81": 1209.0,
+            "82": 1072.0,
+            "83": 1028.0,
+            "84": 1165.0,
+            "85": 1194.0,
+            "86": 884.0,
+            "87": 1206.0,
+            "88": 1080.0,
+            "89": 1155.0,
+            "90": 1062.0,
+            "91": 1141.0,
+            "92": 1133.0,
+            "93": 900.0,
+            "94": 1126.0,
+            "95": 1096.0,
+            "96": 1109.0,
+            "97": 1052.0,
+            "98": 1249.0,
+            "99": 1150.0,
+            "100": 1090.0
         }
     },
     "mem-allocated-bytes": {
@@ -218,106 +218,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 689356288.0,
-            "2": 689356288.0,
-            "3": 689356288.0,
-            "4": 689356288.0,
-            "5": 689356288.0,
-            "6": 689356288.0,
-            "7": 689356288.0,
-            "8": 689356288.0,
-            "9": 689356288.0,
-            "10": 689356288.0,
-            "11": 689356288.0,
-            "12": 689356288.0,
-            "13": 689356288.0,
-            "14": 689356288.0,
-            "15": 689356288.0,
-            "16": 689356288.0,
-            "17": 689356288.0,
-            "18": 689356288.0,
-            "19": 689356288.0,
-            "20": 689356288.0,
-            "21": 689356288.0,
-            "22": 689356288.0,
-            "23": 689356288.0,
-            "24": 689356288.0,
-            "25": 689356288.0,
-            "26": 689356288.0,
-            "27": 689356288.0,
-            "28": 689356288.0,
-            "29": 689356288.0,
-            "30": 689356288.0,
-            "31": 689356288.0,
-            "32": 689356288.0,
-            "33": 689356288.0,
-            "34": 689356288.0,
-            "35": 689356288.0,
-            "36": 689356288.0,
-            "37": 689356288.0,
-            "38": 689356288.0,
-            "39": 689356288.0,
-            "40": 689356288.0,
-            "41": 689356288.0,
-            "42": 689356288.0,
-            "43": 689356288.0,
-            "44": 689356288.0,
-            "45": 689356288.0,
-            "46": 689356288.0,
-            "47": 689356288.0,
-            "48": 689356288.0,
-            "49": 689356288.0,
-            "50": 689356288.0,
-            "51": 689356288.0,
-            "52": 689356288.0,
-            "53": 689356288.0,
-            "54": 689356288.0,
-            "55": 689356288.0,
-            "56": 689356288.0,
-            "57": 689356288.0,
-            "58": 689356288.0,
-            "59": 689356288.0,
-            "60": 689356288.0,
-            "61": 689356288.0,
-            "62": 689356288.0,
-            "63": 689356288.0,
-            "64": 689356288.0,
-            "65": 689356288.0,
-            "66": 689356288.0,
-            "67": 689356288.0,
-            "68": 689356288.0,
-            "69": 689356288.0,
-            "70": 689356288.0,
-            "71": 689356288.0,
-            "72": 689356288.0,
-            "73": 689356288.0,
-            "74": 689356288.0,
-            "75": 689356288.0,
-            "76": 689356288.0,
-            "77": 689356288.0,
-            "78": 689356288.0,
-            "79": 689356288.0,
-            "80": 689356288.0,
-            "81": 689356288.0,
-            "82": 689356288.0,
-            "83": 689356288.0,
-            "84": 689356288.0,
-            "85": 689356288.0,
-            "86": 689356288.0,
-            "87": 689356288.0,
-            "88": 689356288.0,
-            "89": 689356288.0,
-            "90": 689356288.0,
-            "91": 689356288.0,
-            "92": 689356288.0,
-            "93": 689356288.0,
-            "94": 689356288.0,
-            "95": 689356288.0,
-            "96": 689356288.0,
-            "97": 689356288.0,
-            "98": 689356288.0,
-            "99": 689356288.0,
-            "100": 689356288.0
+            "1": 690404864.0,
+            "2": 690404864.0,
+            "3": 690404864.0,
+            "4": 690404864.0,
+            "5": 690404864.0,
+            "6": 690404864.0,
+            "7": 690404864.0,
+            "8": 690404864.0,
+            "9": 690404864.0,
+            "10": 690404864.0,
+            "11": 690404864.0,
+            "12": 690404864.0,
+            "13": 690404864.0,
+            "14": 690404864.0,
+            "15": 690404864.0,
+            "16": 690404864.0,
+            "17": 690404864.0,
+            "18": 690404864.0,
+            "19": 690404864.0,
+            "20": 690404864.0,
+            "21": 690404864.0,
+            "22": 690404864.0,
+            "23": 690404864.0,
+            "24": 690404864.0,
+            "25": 690404864.0,
+            "26": 690404864.0,
+            "27": 690404864.0,
+            "28": 690404864.0,
+            "29": 690404864.0,
+            "30": 690404864.0,
+            "31": 690404864.0,
+            "32": 690404864.0,
+            "33": 690404864.0,
+            "34": 690404864.0,
+            "35": 690404864.0,
+            "36": 690404864.0,
+            "37": 690404864.0,
+            "38": 690404864.0,
+            "39": 690404864.0,
+            "40": 690404864.0,
+            "41": 690404864.0,
+            "42": 690404864.0,
+            "43": 690404864.0,
+            "44": 690404864.0,
+            "45": 690404864.0,
+            "46": 690404864.0,
+            "47": 690404864.0,
+            "48": 690404864.0,
+            "49": 690404864.0,
+            "50": 690404864.0,
+            "51": 690404864.0,
+            "52": 690404864.0,
+            "53": 690404864.0,
+            "54": 690404864.0,
+            "55": 690404864.0,
+            "56": 690404864.0,
+            "57": 690404864.0,
+            "58": 690404864.0,
+            "59": 690404864.0,
+            "60": 690404864.0,
+            "61": 690404864.0,
+            "62": 690404864.0,
+            "63": 690404864.0,
+            "64": 690404864.0,
+            "65": 690404864.0,
+            "66": 690404864.0,
+            "67": 690404864.0,
+            "68": 690404864.0,
+            "69": 690404864.0,
+            "70": 690404864.0,
+            "71": 690404864.0,
+            "72": 690404864.0,
+            "73": 690404864.0,
+            "74": 690404864.0,
+            "75": 690404864.0,
+            "76": 690404864.0,
+            "77": 690404864.0,
+            "78": 690404864.0,
+            "79": 690404864.0,
+            "80": 690404864.0,
+            "81": 690404864.0,
+            "82": 690404864.0,
+            "83": 690404864.0,
+            "84": 690404864.0,
+            "85": 690404864.0,
+            "86": 690404864.0,
+            "87": 690404864.0,
+            "88": 690404864.0,
+            "89": 690404864.0,
+            "90": 690404864.0,
+            "91": 690404864.0,
+            "92": 690404864.0,
+            "93": 690404864.0,
+            "94": 690404864.0,
+            "95": 690404864.0,
+            "96": 690404864.0,
+            "97": 690404864.0,
+            "98": 690404864.0,
+            "99": 690404864.0,
+            "100": 690404864.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -325,106 +325,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 959652864.0,
-            "2": 1221223936.0,
-            "3": 1221224960.0,
-            "4": 1221224960.0,
-            "5": 1221224960.0,
-            "6": 1221224960.0,
-            "7": 1221224960.0,
-            "8": 1221224960.0,
-            "9": 1221224960.0,
-            "10": 1221224960.0,
-            "11": 1221224960.0,
-            "12": 1221224960.0,
-            "13": 1221224960.0,
-            "14": 1221224960.0,
-            "15": 1221224960.0,
-            "16": 1221224960.0,
-            "17": 1221224960.0,
-            "18": 1221224960.0,
-            "19": 1221224960.0,
-            "20": 1221224960.0,
-            "21": 1221224960.0,
-            "22": 1221224960.0,
-            "23": 1221224960.0,
-            "24": 1221224960.0,
-            "25": 1221224960.0,
-            "26": 1221224960.0,
-            "27": 1221224960.0,
-            "28": 1221224960.0,
-            "29": 1221224960.0,
-            "30": 1221224960.0,
-            "31": 1221224960.0,
-            "32": 1221224960.0,
-            "33": 1221224960.0,
-            "34": 1221224960.0,
-            "35": 1221224960.0,
-            "36": 1221224960.0,
-            "37": 1221224960.0,
-            "38": 1221224960.0,
-            "39": 1221224960.0,
-            "40": 1221224960.0,
-            "41": 1221224960.0,
-            "42": 1221224960.0,
-            "43": 1221224960.0,
-            "44": 1221224960.0,
-            "45": 1221224960.0,
-            "46": 1221224960.0,
-            "47": 1221224960.0,
-            "48": 1221224960.0,
-            "49": 1221224960.0,
-            "50": 1221224960.0,
-            "51": 1221224960.0,
-            "52": 1221224960.0,
-            "53": 1221224960.0,
-            "54": 1221224960.0,
-            "55": 1221224960.0,
-            "56": 1221224960.0,
-            "57": 1221224960.0,
-            "58": 1221224960.0,
-            "59": 1221224960.0,
-            "60": 1221224960.0,
-            "61": 1221224960.0,
-            "62": 1221224960.0,
-            "63": 1221224960.0,
-            "64": 1221224960.0,
-            "65": 1221224960.0,
-            "66": 1221224960.0,
-            "67": 1221224960.0,
-            "68": 1221224960.0,
-            "69": 1221224960.0,
-            "70": 1221224960.0,
-            "71": 1221224960.0,
-            "72": 1221224960.0,
-            "73": 1221224960.0,
-            "74": 1221224960.0,
-            "75": 1221224960.0,
-            "76": 1221224960.0,
-            "77": 1221224960.0,
-            "78": 1221224960.0,
-            "79": 1221224960.0,
-            "80": 1221224960.0,
-            "81": 1221224960.0,
-            "82": 1221224960.0,
-            "83": 1221224960.0,
-            "84": 1221224960.0,
-            "85": 1221224960.0,
-            "86": 1221224960.0,
-            "87": 1221224960.0,
-            "88": 1221224960.0,
-            "89": 1221224960.0,
-            "90": 1221224960.0,
-            "91": 1221224960.0,
-            "92": 1221224960.0,
-            "93": 1221224960.0,
-            "94": 1221224960.0,
-            "95": 1221224960.0,
-            "96": 1221224960.0,
-            "97": 1221224960.0,
-            "98": 1221224960.0,
-            "99": 1221224960.0,
-            "100": 1221224960.0
+            "1": 963848704.0,
+            "2": 1223319552.0,
+            "3": 1223321600.0,
+            "4": 1226467840.0,
+            "5": 1226467840.0,
+            "6": 1226467840.0,
+            "7": 1226467840.0,
+            "8": 1226467840.0,
+            "9": 1226467840.0,
+            "10": 1226467840.0,
+            "11": 1226467840.0,
+            "12": 1226467840.0,
+            "13": 1226467840.0,
+            "14": 1226467840.0,
+            "15": 1226467840.0,
+            "16": 1226467840.0,
+            "17": 1226467840.0,
+            "18": 1226467840.0,
+            "19": 1226467840.0,
+            "20": 1226467840.0,
+            "21": 1226467840.0,
+            "22": 1226467840.0,
+            "23": 1226467840.0,
+            "24": 1226467840.0,
+            "25": 1226467840.0,
+            "26": 1226467840.0,
+            "27": 1226467840.0,
+            "28": 1226467840.0,
+            "29": 1226467840.0,
+            "30": 1226467840.0,
+            "31": 1226467840.0,
+            "32": 1226467840.0,
+            "33": 1226467840.0,
+            "34": 1226467840.0,
+            "35": 1226467840.0,
+            "36": 1226467840.0,
+            "37": 1226467840.0,
+            "38": 1226467840.0,
+            "39": 1226467840.0,
+            "40": 1226467840.0,
+            "41": 1226467840.0,
+            "42": 1226467840.0,
+            "43": 1226467840.0,
+            "44": 1226467840.0,
+            "45": 1226467840.0,
+            "46": 1226467840.0,
+            "47": 1226467840.0,
+            "48": 1226467840.0,
+            "49": 1226467840.0,
+            "50": 1226467840.0,
+            "51": 1226467840.0,
+            "52": 1226467840.0,
+            "53": 1226467840.0,
+            "54": 1226467840.0,
+            "55": 1226467840.0,
+            "56": 1226467840.0,
+            "57": 1226467840.0,
+            "58": 1226467840.0,
+            "59": 1226467840.0,
+            "60": 1226467840.0,
+            "61": 1226467840.0,
+            "62": 1226467840.0,
+            "63": 1226467840.0,
+            "64": 1226467840.0,
+            "65": 1226467840.0,
+            "66": 1228564480.0,
+            "67": 1228564480.0,
+            "68": 1228564480.0,
+            "69": 1228564480.0,
+            "70": 1228564480.0,
+            "71": 1228564480.0,
+            "72": 1228564480.0,
+            "73": 1228564480.0,
+            "74": 1228564480.0,
+            "75": 1228564480.0,
+            "76": 1228564480.0,
+            "77": 1228564480.0,
+            "78": 1228564480.0,
+            "79": 1228564480.0,
+            "80": 1228564480.0,
+            "81": 1228564480.0,
+            "82": 1228564480.0,
+            "83": 1228564480.0,
+            "84": 1228564480.0,
+            "85": 1228564480.0,
+            "86": 1228564480.0,
+            "87": 1228564480.0,
+            "88": 1228564480.0,
+            "89": 1228564480.0,
+            "90": 1228564480.0,
+            "91": 1228564480.0,
+            "92": 1228564480.0,
+            "93": 1228564480.0,
+            "94": 1228564480.0,
+            "95": 1228564480.0,
+            "96": 1228564480.0,
+            "97": 1228564480.0,
+            "98": 1228564480.0,
+            "99": 1228564480.0,
+            "100": 1228564480.0
         }
     },
     "iteration-time": {
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 10.34397,
-            "2": 0.2989,
-            "3": 0.28701,
-            "4": 0.28299,
-            "5": 0.28509,
-            "6": 0.28378,
-            "7": 0.28776,
-            "8": 0.28423,
-            "9": 0.28722,
-            "10": 0.28077,
-            "11": 0.28936,
-            "12": 0.28752,
-            "13": 0.2827,
-            "14": 0.28574,
-            "15": 0.28467,
-            "16": 0.28217,
-            "17": 0.28486,
-            "18": 0.28581,
-            "19": 0.28155,
-            "20": 0.28509,
-            "21": 0.28251,
-            "22": 0.28381,
-            "23": 0.27876,
-            "24": 0.28748,
-            "25": 0.28028,
-            "26": 0.28778,
-            "27": 0.28262,
-            "28": 0.28332,
-            "29": 0.28115,
-            "30": 0.28178,
-            "31": 0.28495,
-            "32": 0.28165,
-            "33": 0.28663,
-            "34": 0.29207,
-            "35": 0.28688,
-            "36": 0.27656,
-            "37": 0.28363,
-            "38": 0.28429,
-            "39": 0.28629,
-            "40": 0.27969,
-            "41": 0.27978,
-            "42": 0.28454,
-            "43": 0.28022,
-            "44": 0.28402,
-            "45": 0.27645,
-            "46": 0.28795,
-            "47": 0.28097,
-            "48": 0.28395,
-            "49": 0.28183,
-            "50": 0.28615,
-            "51": 0.28373,
-            "52": 0.27449,
-            "53": 0.27345,
-            "54": 0.27869,
-            "55": 0.27079,
-            "56": 0.27901,
-            "57": 0.27662,
-            "58": 0.27749,
-            "59": 0.27681,
-            "60": 0.27639,
-            "61": 0.27275,
-            "62": 0.27644,
-            "63": 0.27655,
-            "64": 0.2741,
-            "65": 0.27749,
-            "66": 0.27321,
-            "67": 0.27962,
-            "68": 0.2759,
-            "69": 0.27771,
-            "70": 0.27472,
-            "71": 0.27602,
-            "72": 0.27221,
-            "73": 0.27682,
-            "74": 0.27563,
-            "75": 0.27287,
-            "76": 0.27345,
-            "77": 0.27491,
-            "78": 0.27512,
-            "79": 0.27463,
-            "80": 0.27721,
-            "81": 0.27482,
-            "82": 0.27638,
-            "83": 0.27219,
-            "84": 0.27519,
-            "85": 0.27727,
-            "86": 0.2756,
-            "87": 0.27351,
-            "88": 0.27369,
-            "89": 0.27604,
-            "90": 0.27461,
-            "91": 0.27436,
-            "92": 0.27679,
-            "93": 0.27705,
-            "94": 0.27348,
-            "95": 0.28014,
-            "96": 0.27482,
-            "97": 0.27546,
-            "98": 0.27381,
-            "99": 0.27767,
-            "100": 0.27505
+            "1": 26.73247,
+            "2": 0.28783,
+            "3": 0.26296,
+            "4": 0.24972,
+            "5": 0.2479,
+            "6": 0.24714,
+            "7": 0.24726,
+            "8": 0.24855,
+            "9": 0.24703,
+            "10": 0.24477,
+            "11": 0.24467,
+            "12": 0.24519,
+            "13": 0.24528,
+            "14": 0.24363,
+            "15": 0.24416,
+            "16": 0.24464,
+            "17": 0.24373,
+            "18": 0.24449,
+            "19": 0.24381,
+            "20": 0.24223,
+            "21": 0.24321,
+            "22": 0.24402,
+            "23": 0.24351,
+            "24": 0.24104,
+            "25": 0.2457,
+            "26": 0.26018,
+            "27": 0.24263,
+            "28": 0.24452,
+            "29": 0.24554,
+            "30": 0.24449,
+            "31": 0.24131,
+            "32": 0.24436,
+            "33": 0.24229,
+            "34": 0.24145,
+            "35": 0.24151,
+            "36": 0.24069,
+            "37": 0.24346,
+            "38": 0.24255,
+            "39": 0.2406,
+            "40": 0.2461,
+            "41": 0.24292,
+            "42": 0.24219,
+            "43": 0.24382,
+            "44": 0.24308,
+            "45": 0.24494,
+            "46": 0.24068,
+            "47": 0.24147,
+            "48": 0.24203,
+            "49": 0.24203,
+            "50": 0.67265,
+            "51": 0.25099,
+            "52": 0.24353,
+            "53": 0.2433,
+            "54": 0.2415,
+            "55": 0.24839,
+            "56": 0.24674,
+            "57": 0.25418,
+            "58": 0.24862,
+            "59": 0.24888,
+            "60": 0.24709,
+            "61": 0.24747,
+            "62": 0.24661,
+            "63": 0.2473,
+            "64": 0.24646,
+            "65": 0.24565,
+            "66": 0.24543,
+            "67": 0.24477,
+            "68": 0.24661,
+            "69": 0.24448,
+            "70": 0.24685,
+            "71": 0.24516,
+            "72": 0.2468,
+            "73": 0.2464,
+            "74": 0.24577,
+            "75": 0.24431,
+            "76": 0.248,
+            "77": 0.24567,
+            "78": 0.24542,
+            "79": 0.24648,
+            "80": 0.24639,
+            "81": 0.24794,
+            "82": 0.24579,
+            "83": 0.24552,
+            "84": 0.24513,
+            "85": 0.24815,
+            "86": 0.2459,
+            "87": 0.24473,
+            "88": 0.24826,
+            "89": 0.24495,
+            "90": 0.24673,
+            "91": 0.24489,
+            "92": 0.2447,
+            "93": 0.24508,
+            "94": 0.24553,
+            "95": 0.24031,
+            "96": 0.24272,
+            "97": 0.24481,
+            "98": 0.24216,
+            "99": 0.24091,
+            "100": 0.24384
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgx_h100_2nd.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgx_h100_2nd.json
new file mode 100644
index 00000000000..3b380aa8354
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgx_h100_2nd.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 9.80035,
+            "52": 9.69509,
+            "53": 10.02853,
+            "54": 9.9143,
+            "55": 9.8381,
+            "56": 9.57833,
+            "57": 9.42584,
+            "58": 9.79167,
+            "59": 9.53621,
+            "60": 9.44186,
+            "61": 9.65657,
+            "62": 9.94379,
+            "63": 9.32145,
+            "64": 9.73337,
+            "65": 8.88429,
+            "66": 9.65529,
+            "67": 9.32104,
+            "68": 9.75065,
+            "69": 9.764,
+            "70": 9.70469,
+            "71": 9.56858,
+            "72": 9.53904,
+            "73": 9.45226,
+            "74": 8.87738,
+            "75": 9.37933,
+            "76": 9.01863,
+            "77": 10.0352,
+            "78": 9.69262,
+            "79": 9.33456,
+            "80": 9.36592,
+            "81": 9.43916,
+            "82": 9.66575,
+            "83": 9.25444,
+            "84": 9.37804,
+            "85": 9.57421,
+            "86": 9.03275,
+            "87": 9.55774,
+            "88": 9.71525,
+            "89": 9.55707,
+            "90": 9.78808,
+            "91": 9.29516,
+            "92": 9.31517,
+            "93": 9.03243,
+            "94": 8.79084,
+            "95": 9.48835,
+            "96": 9.49573,
+            "97": 9.27132,
+            "98": 9.64071,
+            "99": 8.84737,
+            "100": 9.35874
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 903.0,
+            "52": 949.0,
+            "53": 1088.0,
+            "54": 951.0,
+            "55": 860.0,
+            "56": 937.0,
+            "57": 858.0,
+            "58": 1036.0,
+            "59": 925.0,
+            "60": 897.0,
+            "61": 1029.0,
+            "62": 921.0,
+            "63": 901.0,
+            "64": 1087.0,
+            "65": 919.0,
+            "66": 1033.0,
+            "67": 996.0,
+            "68": 963.0,
+            "69": 1003.0,
+            "70": 1100.0,
+            "71": 1057.0,
+            "72": 901.0,
+            "73": 1061.0,
+            "74": 728.0,
+            "75": 943.0,
+            "76": 1070.0,
+            "77": 1164.0,
+            "78": 1138.0,
+            "79": 1046.0,
+            "80": 1162.0,
+            "81": 1204.0,
+            "82": 1108.0,
+            "83": 998.0,
+            "84": 1165.0,
+            "85": 1164.0,
+            "86": 904.0,
+            "87": 1222.0,
+            "88": 1098.0,
+            "89": 1129.0,
+            "90": 1176.0,
+            "91": 1102.0,
+            "92": 1174.0,
+            "93": 894.0,
+            "94": 1187.0,
+            "95": 1128.0,
+            "96": 1204.0,
+            "97": 1108.0,
+            "98": 1311.0,
+            "99": 1148.0,
+            "100": 1085.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 690405888.0,
+            "52": 690405888.0,
+            "53": 690405888.0,
+            "54": 690405888.0,
+            "55": 690405888.0,
+            "56": 690405888.0,
+            "57": 690405888.0,
+            "58": 690405888.0,
+            "59": 690405888.0,
+            "60": 690405888.0,
+            "61": 690405888.0,
+            "62": 690405888.0,
+            "63": 690405888.0,
+            "64": 690405888.0,
+            "65": 690405888.0,
+            "66": 690405888.0,
+            "67": 690405888.0,
+            "68": 690405888.0,
+            "69": 690405888.0,
+            "70": 690405888.0,
+            "71": 690405888.0,
+            "72": 690405888.0,
+            "73": 690405888.0,
+            "74": 690405888.0,
+            "75": 690405888.0,
+            "76": 690405888.0,
+            "77": 690405888.0,
+            "78": 690405888.0,
+            "79": 690405888.0,
+            "80": 690405888.0,
+            "81": 690405888.0,
+            "82": 690405888.0,
+            "83": 690405888.0,
+            "84": 690405888.0,
+            "85": 690405888.0,
+            "86": 690405888.0,
+            "87": 690405888.0,
+            "88": 690405888.0,
+            "89": 690405888.0,
+            "90": 690405888.0,
+            "91": 690405888.0,
+            "92": 690405888.0,
+            "93": 690405888.0,
+            "94": 690405888.0,
+            "95": 690405888.0,
+            "96": 690405888.0,
+            "97": 690405888.0,
+            "98": 690405888.0,
+            "99": 690405888.0,
+            "100": 690405888.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 1223321088.0,
+            "52": 1226468864.0,
+            "53": 1226468864.0,
+            "54": 1228565504.0,
+            "55": 1228565504.0,
+            "56": 1228565504.0,
+            "57": 1228565504.0,
+            "58": 1228565504.0,
+            "59": 1228565504.0,
+            "60": 1228565504.0,
+            "61": 1228565504.0,
+            "62": 1228565504.0,
+            "63": 1228565504.0,
+            "64": 1228565504.0,
+            "65": 1228565504.0,
+            "66": 1228565504.0,
+            "67": 1228565504.0,
+            "68": 1228565504.0,
+            "69": 1228565504.0,
+            "70": 1228565504.0,
+            "71": 1228565504.0,
+            "72": 1228565504.0,
+            "73": 1228565504.0,
+            "74": 1228566016.0,
+            "75": 1228566016.0,
+            "76": 1228566016.0,
+            "77": 1228566016.0,
+            "78": 1228566016.0,
+            "79": 1228566016.0,
+            "80": 1228566016.0,
+            "81": 1228566016.0,
+            "82": 1228566016.0,
+            "83": 1228566016.0,
+            "84": 1228566016.0,
+            "85": 1228566016.0,
+            "86": 1228566016.0,
+            "87": 1228566016.0,
+            "88": 1228566016.0,
+            "89": 1228566016.0,
+            "90": 1228566016.0,
+            "91": 1228566016.0,
+            "92": 1228566016.0,
+            "93": 1228566016.0,
+            "94": 1228566016.0,
+            "95": 1228566016.0,
+            "96": 1228566016.0,
+            "97": 1228566016.0,
+            "98": 1228566016.0,
+            "99": 1228566016.0,
+            "100": 1228566016.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 25.67788,
+            "52": 0.27964,
+            "53": 0.25526,
+            "54": 0.2537,
+            "55": 0.2523,
+            "56": 0.25288,
+            "57": 0.25243,
+            "58": 0.2522,
+            "59": 0.25578,
+            "60": 0.25303,
+            "61": 0.25704,
+            "62": 0.25347,
+            "63": 0.2528,
+            "64": 0.25153,
+            "65": 0.25122,
+            "66": 0.25213,
+            "67": 0.25303,
+            "68": 0.2521,
+            "69": 0.25248,
+            "70": 0.25281,
+            "71": 0.25433,
+            "72": 0.25335,
+            "73": 0.2575,
+            "74": 0.25031,
+            "75": 0.25434,
+            "76": 0.2531,
+            "77": 0.25113,
+            "78": 0.24927,
+            "79": 0.24552,
+            "80": 0.24948,
+            "81": 0.24453,
+            "82": 0.24712,
+            "83": 0.2471,
+            "84": 0.24736,
+            "85": 0.24646,
+            "86": 0.24642,
+            "87": 0.24815,
+            "88": 0.2471,
+            "89": 0.24587,
+            "90": 0.24585,
+            "91": 0.24688,
+            "92": 0.24797,
+            "93": 0.25482,
+            "94": 0.2575,
+            "95": 0.25582,
+            "96": 0.25432,
+            "97": 0.25729,
+            "98": 0.25905,
+            "99": 0.2577,
+            "100": 0.25797
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/golden_values_lts_dgx_a100.json
index 7c012c1a85c..c8c8b2bbc63 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/golden_values_lts_dgx_a100.json
@@ -1 +1,537 @@
-{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.88759, "5": 10.90192, "10": 10.86852, "15": 10.84829, "20": 10.71772, "25": 10.54267, "30": 10.33644, "35": 10.23973, "40": 10.03267, "45": 9.76819, "50": 9.85325, "55": 9.82266, "60": 9.43752, "65": 8.87843, "70": 9.68161, "75": 9.37198, "80": 9.35656, "85": 9.57143, "90": 9.77728, "95": 9.4856, "100": 9.35907}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 593.0, "5": 652.0, "10": 560.0, "15": 661.0, "20": 582.0, "25": 585.0, "30": 641.0, "35": 776.0, "40": 759.0, "45": 798.0, "50": 914.0, "55": 880.0, "60": 850.0, "65": 943.0, "70": 1067.0, "75": 874.0, "80": 1086.0, "85": 1093.0, "90": 1124.0, "95": 1118.0, "100": 1169.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 609795072.0, "5": 609795072.0, "10": 609795072.0, "15": 609795072.0, "20": 609795072.0, "25": 609795072.0, "30": 609795072.0, "35": 609795072.0, "40": 609795072.0, "45": 609795072.0, "50": 609795072.0, "55": 609795072.0, "60": 609795072.0, "65": 609795072.0, "70": 609795072.0, "75": 609795072.0, "80": 609795072.0, "85": 609795072.0, "90": 609795072.0, "95": 609795072.0, "100": 609795072.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 881296384.0, "5": 1141688320.0, "10": 1143770624.0, "15": 1143770624.0, "20": 1143770624.0, "25": 1143770624.0, "30": 1143770624.0, "35": 1143770624.0, "40": 1143770624.0, "45": 1143770624.0, "50": 1143770624.0, "55": 1143770624.0, "60": 1143770624.0, "65": 1143770624.0, "70": 1143770624.0, "75": 1143770624.0, "80": 1143770624.0, "85": 1143770624.0, "90": 1143784448.0, "95": 1143784448.0, "100": 1143784448.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.21791, "5": 0.32637, "10": 0.34092, "15": 0.32491, "20": 0.32495, "25": 0.34258, "30": 0.32373, "35": 0.32364, "40": 0.33541, "45": 0.32433, "50": 0.323, "55": 0.32727, "60": 0.3458, "65": 0.32544, "70": 0.33008, "75": 0.33089, "80": 0.32333, "85": 0.3359, "90": 0.32368, "95": 0.3227, "100": 0.3389}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.88762,
+            "2": 10.90373,
+            "3": 10.87084,
+            "4": 10.8703,
+            "5": 10.90194,
+            "6": 10.90847,
+            "7": 10.88783,
+            "8": 10.87729,
+            "9": 10.88358,
+            "10": 10.86852,
+            "11": 10.88097,
+            "12": 10.88498,
+            "13": 10.90366,
+            "14": 10.89975,
+            "15": 10.84831,
+            "16": 10.84519,
+            "17": 10.80088,
+            "18": 10.82615,
+            "19": 10.81894,
+            "20": 10.71775,
+            "21": 10.69282,
+            "22": 10.57372,
+            "23": 10.70805,
+            "24": 10.58158,
+            "25": 10.54269,
+            "26": 10.60192,
+            "27": 10.59774,
+            "28": 10.55016,
+            "29": 10.5634,
+            "30": 10.33643,
+            "31": 10.09542,
+            "32": 10.43666,
+            "33": 10.43053,
+            "34": 10.1772,
+            "35": 10.23973,
+            "36": 10.18243,
+            "37": 10.30498,
+            "38": 10.14899,
+            "39": 10.35867,
+            "40": 10.03262,
+            "41": 10.08767,
+            "42": 10.16354,
+            "43": 9.78193,
+            "44": 9.89592,
+            "45": 9.76818,
+            "46": 9.76745,
+            "47": 10.08837,
+            "48": 9.78338,
+            "49": 9.4572,
+            "50": 9.85324,
+            "51": 9.78849,
+            "52": 9.67829,
+            "53": 10.01953,
+            "54": 9.90017,
+            "55": 9.82266,
+            "56": 9.5637,
+            "57": 9.4179,
+            "58": 9.77443,
+            "59": 9.52364,
+            "60": 9.43755,
+            "61": 9.64826,
+            "62": 9.9369,
+            "63": 9.30557,
+            "64": 9.72234,
+            "65": 8.87843,
+            "66": 9.65136,
+            "67": 9.31594,
+            "68": 9.73881,
+            "69": 9.74595,
+            "70": 9.68157,
+            "71": 9.56047,
+            "72": 9.5391,
+            "73": 9.44519,
+            "74": 8.88645,
+            "75": 9.37195,
+            "76": 9.03135,
+            "77": 10.03088,
+            "78": 9.68941,
+            "79": 9.33246,
+            "80": 9.35652,
+            "81": 9.43617,
+            "82": 9.65385,
+            "83": 9.25759,
+            "84": 9.36534,
+            "85": 9.57143,
+            "86": 9.03651,
+            "87": 9.55864,
+            "88": 9.70773,
+            "89": 9.55528,
+            "90": 9.77728,
+            "91": 9.29749,
+            "92": 9.32182,
+            "93": 9.02995,
+            "94": 8.78447,
+            "95": 9.4856,
+            "96": 9.48706,
+            "97": 9.27003,
+            "98": 9.63514,
+            "99": 8.83979,
+            "100": 9.35906
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 609.0,
+            "2": 618.0,
+            "3": 638.0,
+            "4": 584.0,
+            "5": 663.0,
+            "6": 688.0,
+            "7": 647.0,
+            "8": 577.0,
+            "9": 690.0,
+            "10": 550.0,
+            "11": 704.0,
+            "12": 610.0,
+            "13": 645.0,
+            "14": 666.0,
+            "15": 652.0,
+            "16": 609.0,
+            "17": 623.0,
+            "18": 625.0,
+            "19": 637.0,
+            "20": 649.0,
+            "21": 668.0,
+            "22": 612.0,
+            "23": 671.0,
+            "24": 619.0,
+            "25": 614.0,
+            "26": 641.0,
+            "27": 611.0,
+            "28": 706.0,
+            "29": 716.0,
+            "30": 663.0,
+            "31": 603.0,
+            "32": 669.0,
+            "33": 760.0,
+            "34": 684.0,
+            "35": 679.0,
+            "36": 731.0,
+            "37": 792.0,
+            "38": 767.0,
+            "39": 852.0,
+            "40": 771.0,
+            "41": 800.0,
+            "42": 830.0,
+            "43": 750.0,
+            "44": 767.0,
+            "45": 821.0,
+            "46": 798.0,
+            "47": 922.0,
+            "48": 902.0,
+            "49": 839.0,
+            "50": 854.0,
+            "51": 960.0,
+            "52": 843.0,
+            "53": 1097.0,
+            "54": 940.0,
+            "55": 904.0,
+            "56": 926.0,
+            "57": 832.0,
+            "58": 1049.0,
+            "59": 948.0,
+            "60": 853.0,
+            "61": 1032.0,
+            "62": 964.0,
+            "63": 951.0,
+            "64": 1077.0,
+            "65": 956.0,
+            "66": 1065.0,
+            "67": 939.0,
+            "68": 1023.0,
+            "69": 1051.0,
+            "70": 1120.0,
+            "71": 1060.0,
+            "72": 849.0,
+            "73": 1014.0,
+            "74": 705.0,
+            "75": 838.0,
+            "76": 1045.0,
+            "77": 1118.0,
+            "78": 1125.0,
+            "79": 977.0,
+            "80": 1113.0,
+            "81": 1149.0,
+            "82": 1071.0,
+            "83": 1023.0,
+            "84": 1117.0,
+            "85": 1070.0,
+            "86": 857.0,
+            "87": 1139.0,
+            "88": 1071.0,
+            "89": 1160.0,
+            "90": 1062.0,
+            "91": 1091.0,
+            "92": 1184.0,
+            "93": 860.0,
+            "94": 1125.0,
+            "95": 1151.0,
+            "96": 1211.0,
+            "97": 1011.0,
+            "98": 1240.0,
+            "99": 1098.0,
+            "100": 1129.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 609140224.0,
+            "2": 609140224.0,
+            "3": 609140224.0,
+            "4": 609140224.0,
+            "5": 609140224.0,
+            "6": 609140224.0,
+            "7": 609140224.0,
+            "8": 609140224.0,
+            "9": 609140224.0,
+            "10": 609140224.0,
+            "11": 609140224.0,
+            "12": 609140224.0,
+            "13": 609140224.0,
+            "14": 609140224.0,
+            "15": 609140224.0,
+            "16": 609140224.0,
+            "17": 609140224.0,
+            "18": 609140224.0,
+            "19": 609140224.0,
+            "20": 609140224.0,
+            "21": 609140224.0,
+            "22": 609140224.0,
+            "23": 609140224.0,
+            "24": 609140224.0,
+            "25": 609140224.0,
+            "26": 609140224.0,
+            "27": 609140224.0,
+            "28": 609140224.0,
+            "29": 609140224.0,
+            "30": 609140224.0,
+            "31": 609140224.0,
+            "32": 609140224.0,
+            "33": 609140224.0,
+            "34": 609140224.0,
+            "35": 609140224.0,
+            "36": 609140224.0,
+            "37": 609140224.0,
+            "38": 609140224.0,
+            "39": 609140224.0,
+            "40": 609140224.0,
+            "41": 609140224.0,
+            "42": 609140224.0,
+            "43": 609140224.0,
+            "44": 609140224.0,
+            "45": 609140224.0,
+            "46": 609140224.0,
+            "47": 609140224.0,
+            "48": 609140224.0,
+            "49": 609140224.0,
+            "50": 609140224.0,
+            "51": 609140224.0,
+            "52": 609140224.0,
+            "53": 609140224.0,
+            "54": 609140224.0,
+            "55": 609140224.0,
+            "56": 609140224.0,
+            "57": 609140224.0,
+            "58": 609140224.0,
+            "59": 609140224.0,
+            "60": 609140224.0,
+            "61": 609140224.0,
+            "62": 609140224.0,
+            "63": 609140224.0,
+            "64": 609140224.0,
+            "65": 609140224.0,
+            "66": 609140224.0,
+            "67": 609140224.0,
+            "68": 609140224.0,
+            "69": 609140224.0,
+            "70": 609140224.0,
+            "71": 609140224.0,
+            "72": 609140224.0,
+            "73": 609140224.0,
+            "74": 609140224.0,
+            "75": 609140224.0,
+            "76": 609140224.0,
+            "77": 609140224.0,
+            "78": 609140224.0,
+            "79": 609140224.0,
+            "80": 609140224.0,
+            "81": 609140224.0,
+            "82": 609140224.0,
+            "83": 609140224.0,
+            "84": 609140224.0,
+            "85": 609140224.0,
+            "86": 609140224.0,
+            "87": 609140224.0,
+            "88": 609140224.0,
+            "89": 609140224.0,
+            "90": 609140224.0,
+            "91": 609140224.0,
+            "92": 609140224.0,
+            "93": 609140224.0,
+            "94": 609140224.0,
+            "95": 609140224.0,
+            "96": 609140224.0,
+            "97": 609140224.0,
+            "98": 609140224.0,
+            "99": 609140224.0,
+            "100": 609140224.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 880223232.0,
+            "2": 1150445056.0,
+            "3": 1150445056.0,
+            "4": 1152542720.0,
+            "5": 1152542720.0,
+            "6": 1152542720.0,
+            "7": 1152542720.0,
+            "8": 1152542720.0,
+            "9": 1152542720.0,
+            "10": 1152542720.0,
+            "11": 1152542720.0,
+            "12": 1152542720.0,
+            "13": 1152542720.0,
+            "14": 1152542720.0,
+            "15": 1152542720.0,
+            "16": 1152542720.0,
+            "17": 1152542720.0,
+            "18": 1152542720.0,
+            "19": 1152542720.0,
+            "20": 1152542720.0,
+            "21": 1152542720.0,
+            "22": 1152542720.0,
+            "23": 1152542720.0,
+            "24": 1152542720.0,
+            "25": 1152542720.0,
+            "26": 1152542720.0,
+            "27": 1153460736.0,
+            "28": 1153460736.0,
+            "29": 1153460736.0,
+            "30": 1153460736.0,
+            "31": 1153460736.0,
+            "32": 1153460736.0,
+            "33": 1153460736.0,
+            "34": 1153460736.0,
+            "35": 1153460736.0,
+            "36": 1153460736.0,
+            "37": 1153460736.0,
+            "38": 1153460736.0,
+            "39": 1153460736.0,
+            "40": 1153460736.0,
+            "41": 1153460736.0,
+            "42": 1153460736.0,
+            "43": 1153460736.0,
+            "44": 1153460736.0,
+            "45": 1153460736.0,
+            "46": 1153460736.0,
+            "47": 1153460736.0,
+            "48": 1153460736.0,
+            "49": 1153460736.0,
+            "50": 1153460736.0,
+            "51": 1153460736.0,
+            "52": 1153460736.0,
+            "53": 1153460736.0,
+            "54": 1153460736.0,
+            "55": 1153460736.0,
+            "56": 1153460736.0,
+            "57": 1153460736.0,
+            "58": 1153460736.0,
+            "59": 1153460736.0,
+            "60": 1153460736.0,
+            "61": 1153460736.0,
+            "62": 1153460736.0,
+            "63": 1153460736.0,
+            "64": 1153460736.0,
+            "65": 1153460736.0,
+            "66": 1153460736.0,
+            "67": 1153460736.0,
+            "68": 1153460736.0,
+            "69": 1153460736.0,
+            "70": 1153460736.0,
+            "71": 1153460736.0,
+            "72": 1153460736.0,
+            "73": 1153460736.0,
+            "74": 1153460736.0,
+            "75": 1153460736.0,
+            "76": 1153460736.0,
+            "77": 1153460736.0,
+            "78": 1153460736.0,
+            "79": 1153460736.0,
+            "80": 1153460736.0,
+            "81": 1153460736.0,
+            "82": 1153460736.0,
+            "83": 1153460736.0,
+            "84": 1153460736.0,
+            "85": 1153460736.0,
+            "86": 1153460736.0,
+            "87": 1153460736.0,
+            "88": 1153460736.0,
+            "89": 1153460736.0,
+            "90": 1153460736.0,
+            "91": 1153460736.0,
+            "92": 1153460736.0,
+            "93": 1153460736.0,
+            "94": 1153460736.0,
+            "95": 1153460736.0,
+            "96": 1153460736.0,
+            "97": 1153460736.0,
+            "98": 1153460736.0,
+            "99": 1153460736.0,
+            "100": 1153460736.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 14.84186,
+            "2": 0.40445,
+            "3": 0.37825,
+            "4": 0.36592,
+            "5": 0.36636,
+            "6": 0.36609,
+            "7": 0.36611,
+            "8": 0.36712,
+            "9": 0.36621,
+            "10": 0.3668,
+            "11": 0.36731,
+            "12": 0.36501,
+            "13": 0.36592,
+            "14": 0.36633,
+            "15": 0.36689,
+            "16": 0.36886,
+            "17": 0.36624,
+            "18": 0.36649,
+            "19": 0.36595,
+            "20": 0.36539,
+            "21": 0.36582,
+            "22": 0.36824,
+            "23": 0.36684,
+            "24": 0.36474,
+            "25": 0.36651,
+            "26": 0.36402,
+            "27": 0.3665,
+            "28": 0.36596,
+            "29": 0.3683,
+            "30": 0.38775,
+            "31": 0.36759,
+            "32": 0.36551,
+            "33": 0.36889,
+            "34": 0.80549,
+            "35": 0.36014,
+            "36": 0.36023,
+            "37": 0.74512,
+            "38": 0.37154,
+            "39": 0.35739,
+            "40": 0.79726,
+            "41": 0.35594,
+            "42": 0.35485,
+            "43": 0.82879,
+            "44": 0.35555,
+            "45": 0.3543,
+            "46": 0.35396,
+            "47": 0.35419,
+            "48": 0.35366,
+            "49": 0.68813,
+            "50": 0.35739,
+            "51": 0.3635,
+            "52": 0.36241,
+            "53": 0.35898,
+            "54": 0.36085,
+            "55": 0.35981,
+            "56": 0.35989,
+            "57": 0.36149,
+            "58": 0.36219,
+            "59": 0.36015,
+            "60": 0.36165,
+            "61": 0.35985,
+            "62": 0.36093,
+            "63": 0.3622,
+            "64": 0.3576,
+            "65": 0.36027,
+            "66": 0.36035,
+            "67": 0.36194,
+            "68": 0.35988,
+            "69": 0.35888,
+            "70": 0.3603,
+            "71": 0.36034,
+            "72": 0.35844,
+            "73": 0.35834,
+            "74": 0.36016,
+            "75": 0.36243,
+            "76": 0.3612,
+            "77": 0.35873,
+            "78": 0.36065,
+            "79": 0.35851,
+            "80": 0.35864,
+            "81": 0.36332,
+            "82": 0.36043,
+            "83": 0.35786,
+            "84": 0.35965,
+            "85": 0.35924,
+            "86": 0.35886,
+            "87": 0.36811,
+            "88": 0.36592,
+            "89": 0.36483,
+            "90": 0.36595,
+            "91": 0.36082,
+            "92": 0.3625,
+            "93": 0.35948,
+            "94": 0.35859,
+            "95": 0.35947,
+            "96": 0.35991,
+            "97": 0.36021,
+            "98": 0.35991,
+            "99": 0.35971,
+            "100": 0.35838
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/golden_values_lts_dgx_a100_2nd.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/golden_values_lts_dgx_a100_2nd.json
new file mode 100644
index 00000000000..a9134cc22bc
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/golden_values_lts_dgx_a100_2nd.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 9.78849,
+            "52": 9.67829,
+            "53": 10.01954,
+            "54": 9.90021,
+            "55": 9.82264,
+            "56": 9.56375,
+            "57": 9.4179,
+            "58": 9.7744,
+            "59": 9.52369,
+            "60": 9.43754,
+            "61": 9.64825,
+            "62": 9.93694,
+            "63": 9.30556,
+            "64": 9.72236,
+            "65": 8.87844,
+            "66": 9.65135,
+            "67": 9.31592,
+            "68": 9.7388,
+            "69": 9.74594,
+            "70": 9.68162,
+            "71": 9.5605,
+            "72": 9.53911,
+            "73": 9.44523,
+            "74": 8.88645,
+            "75": 9.37201,
+            "76": 9.03136,
+            "77": 10.03083,
+            "78": 9.68941,
+            "79": 9.3325,
+            "80": 9.35653,
+            "81": 9.43622,
+            "82": 9.65384,
+            "83": 9.2576,
+            "84": 9.36531,
+            "85": 9.57144,
+            "86": 9.03655,
+            "87": 9.55863,
+            "88": 9.70775,
+            "89": 9.55528,
+            "90": 9.77727,
+            "91": 9.2975,
+            "92": 9.32182,
+            "93": 9.02989,
+            "94": 8.78447,
+            "95": 9.48562,
+            "96": 9.48704,
+            "97": 9.27003,
+            "98": 9.63514,
+            "99": 8.8398,
+            "100": 9.35907
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 1017.0,
+            "52": 937.0,
+            "53": 1026.0,
+            "54": 948.0,
+            "55": 841.0,
+            "56": 980.0,
+            "57": 765.0,
+            "58": 1018.0,
+            "59": 999.0,
+            "60": 874.0,
+            "61": 1056.0,
+            "62": 954.0,
+            "63": 920.0,
+            "64": 1089.0,
+            "65": 884.0,
+            "66": 1087.0,
+            "67": 952.0,
+            "68": 1047.0,
+            "69": 1088.0,
+            "70": 1074.0,
+            "71": 1037.0,
+            "72": 810.0,
+            "73": 1025.0,
+            "74": 741.0,
+            "75": 920.0,
+            "76": 1040.0,
+            "77": 1141.0,
+            "78": 1082.0,
+            "79": 1080.0,
+            "80": 1042.0,
+            "81": 1205.0,
+            "82": 1051.0,
+            "83": 960.0,
+            "84": 1184.0,
+            "85": 1109.0,
+            "86": 797.0,
+            "87": 1202.0,
+            "88": 1015.0,
+            "89": 1139.0,
+            "90": 987.0,
+            "91": 1050.0,
+            "92": 1163.0,
+            "93": 881.0,
+            "94": 1102.0,
+            "95": 1125.0,
+            "96": 1193.0,
+            "97": 1112.0,
+            "98": 1239.0,
+            "99": 1121.0,
+            "100": 1154.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 610975232.0,
+            "52": 610975232.0,
+            "53": 610975232.0,
+            "54": 610975232.0,
+            "55": 610975232.0,
+            "56": 610975232.0,
+            "57": 610975232.0,
+            "58": 610975232.0,
+            "59": 610975232.0,
+            "60": 610975232.0,
+            "61": 610975232.0,
+            "62": 610975232.0,
+            "63": 610975232.0,
+            "64": 610975232.0,
+            "65": 610975232.0,
+            "66": 610975232.0,
+            "67": 610975232.0,
+            "68": 610975232.0,
+            "69": 610975232.0,
+            "70": 610975232.0,
+            "71": 610975232.0,
+            "72": 610975232.0,
+            "73": 610975232.0,
+            "74": 610975232.0,
+            "75": 610975232.0,
+            "76": 610975232.0,
+            "77": 610975232.0,
+            "78": 610975232.0,
+            "79": 610975232.0,
+            "80": 610975232.0,
+            "81": 610975232.0,
+            "82": 610975232.0,
+            "83": 610975232.0,
+            "84": 610975232.0,
+            "85": 610975232.0,
+            "86": 610975232.0,
+            "87": 610975232.0,
+            "88": 610975232.0,
+            "89": 610975232.0,
+            "90": 610975232.0,
+            "91": 610975232.0,
+            "92": 610975232.0,
+            "93": 610975232.0,
+            "94": 610975232.0,
+            "95": 610975232.0,
+            "96": 610975232.0,
+            "97": 610975232.0,
+            "98": 610975232.0,
+            "99": 610975232.0,
+            "100": 610975232.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 1146775040.0,
+            "52": 1146776064.0,
+            "53": 1146776064.0,
+            "54": 1146776064.0,
+            "55": 1146776064.0,
+            "56": 1146776064.0,
+            "57": 1146776064.0,
+            "58": 1146776064.0,
+            "59": 1146776064.0,
+            "60": 1146776064.0,
+            "61": 1146776064.0,
+            "62": 1146776064.0,
+            "63": 1146776064.0,
+            "64": 1146776064.0,
+            "65": 1146776064.0,
+            "66": 1146776064.0,
+            "67": 1147824640.0,
+            "68": 1147824640.0,
+            "69": 1147824640.0,
+            "70": 1147824640.0,
+            "71": 1147824640.0,
+            "72": 1147824640.0,
+            "73": 1147824640.0,
+            "74": 1147824640.0,
+            "75": 1147824640.0,
+            "76": 1147824640.0,
+            "77": 1147824640.0,
+            "78": 1147824640.0,
+            "79": 1147824640.0,
+            "80": 1147824640.0,
+            "81": 1147824640.0,
+            "82": 1147824640.0,
+            "83": 1147824640.0,
+            "84": 1147824640.0,
+            "85": 1147824640.0,
+            "86": 1147824640.0,
+            "87": 1147824640.0,
+            "88": 1147824640.0,
+            "89": 1147824640.0,
+            "90": 1147824640.0,
+            "91": 1147824640.0,
+            "92": 1147824640.0,
+            "93": 1147824640.0,
+            "94": 1147824640.0,
+            "95": 1147824640.0,
+            "96": 1147824640.0,
+            "97": 1147824640.0,
+            "98": 1147824640.0,
+            "99": 1147824640.0,
+            "100": 1147824640.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 14.91489,
+            "52": 0.3901,
+            "53": 0.37105,
+            "54": 0.36976,
+            "55": 0.36846,
+            "56": 0.36819,
+            "57": 0.36943,
+            "58": 0.36873,
+            "59": 0.37048,
+            "60": 0.3696,
+            "61": 0.36867,
+            "62": 0.36991,
+            "63": 0.36919,
+            "64": 0.36728,
+            "65": 0.36884,
+            "66": 0.37058,
+            "67": 0.36765,
+            "68": 0.36925,
+            "69": 0.36821,
+            "70": 0.36876,
+            "71": 0.36845,
+            "72": 0.36856,
+            "73": 0.36946,
+            "74": 0.36927,
+            "75": 0.36875,
+            "76": 0.36813,
+            "77": 0.37033,
+            "78": 0.36854,
+            "79": 0.36796,
+            "80": 0.36964,
+            "81": 0.36883,
+            "82": 0.36983,
+            "83": 0.37114,
+            "84": 0.36966,
+            "85": 0.36965,
+            "86": 0.36722,
+            "87": 0.36512,
+            "88": 0.3663,
+            "89": 0.36544,
+            "90": 0.3634,
+            "91": 0.36718,
+            "92": 0.3648,
+            "93": 0.36513,
+            "94": 0.36611,
+            "95": 0.3655,
+            "96": 0.36533,
+            "97": 0.3669,
+            "98": 0.36517,
+            "99": 0.36574,
+            "100": 0.36518
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_dev_dgx_gb200.json
new file mode 100644
index 00000000000..8b51d66847b
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_dev_dgx_gb200.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.87192,
+            "2": 10.87243,
+            "3": 10.86245,
+            "4": 10.84367,
+            "5": 10.87782,
+            "6": 10.89351,
+            "7": 10.87195,
+            "8": 10.87656,
+            "9": 10.86866,
+            "10": 10.83844,
+            "11": 10.87549,
+            "12": 10.87587,
+            "13": 10.89089,
+            "14": 10.89697,
+            "15": 10.83165,
+            "16": 10.82447,
+            "17": 10.80203,
+            "18": 10.82966,
+            "19": 10.82308,
+            "20": 10.73682,
+            "21": 10.71008,
+            "22": 10.56492,
+            "23": 10.73066,
+            "24": 10.60695,
+            "25": 10.55578,
+            "26": 10.62423,
+            "27": 10.6196,
+            "28": 10.57904,
+            "29": 10.60302,
+            "30": 10.38932,
+            "31": 10.12985,
+            "32": 10.47779,
+            "33": 10.47516,
+            "34": 10.22981,
+            "35": 10.28817,
+            "36": 10.23457,
+            "37": 10.35363,
+            "38": 10.20006,
+            "39": 10.41054,
+            "40": 10.09837,
+            "41": 10.13918,
+            "42": 10.22109,
+            "43": 9.85049,
+            "44": 9.95421,
+            "45": 9.84312,
+            "46": 9.82557,
+            "47": 10.13684,
+            "48": 9.8549,
+            "49": 9.53552,
+            "50": 9.91111,
+            "51": 9.85898,
+            "52": 9.75133,
+            "53": 10.06617,
+            "54": 9.95613,
+            "55": 9.89104,
+            "56": 9.62508,
+            "57": 9.47981,
+            "58": 9.83478,
+            "59": 9.58498,
+            "60": 9.49806,
+            "61": 9.69192,
+            "62": 9.98825,
+            "63": 9.37824,
+            "64": 9.76808,
+            "65": 8.94514,
+            "66": 9.70125,
+            "67": 9.37149,
+            "68": 9.78313,
+            "69": 9.79923,
+            "70": 9.7312,
+            "71": 9.62753,
+            "72": 9.58452,
+            "73": 9.48417,
+            "74": 8.92523,
+            "75": 9.4118,
+            "76": 9.0796,
+            "77": 10.06083,
+            "78": 9.7215,
+            "79": 9.38109,
+            "80": 9.40161,
+            "81": 9.48468,
+            "82": 9.70219,
+            "83": 9.31549,
+            "84": 9.41786,
+            "85": 9.61785,
+            "86": 9.077,
+            "87": 9.59967,
+            "88": 9.75356,
+            "89": 9.60341,
+            "90": 9.82789,
+            "91": 9.33668,
+            "92": 9.36036,
+            "93": 9.08765,
+            "94": 8.83052,
+            "95": 9.5296,
+            "96": 9.53024,
+            "97": 9.30627,
+            "98": 9.67298,
+            "99": 8.89917,
+            "100": 9.40828
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1622.0,
+            "2": 1753.0,
+            "3": 1697.0,
+            "4": 1783.0,
+            "5": 2009.0,
+            "6": 1855.0,
+            "7": 1765.0,
+            "8": 1627.0,
+            "9": 1798.0,
+            "10": 1429.0,
+            "11": 1819.0,
+            "12": 1654.0,
+            "13": 1862.0,
+            "14": 1742.0,
+            "15": 1868.0,
+            "16": 1932.0,
+            "17": 1713.0,
+            "18": 1692.0,
+            "19": 1721.0,
+            "20": 1579.0,
+            "21": 1788.0,
+            "22": 1769.0,
+            "23": 1944.0,
+            "24": 1664.0,
+            "25": 1628.0,
+            "26": 1641.0,
+            "27": 1835.0,
+            "28": 1956.0,
+            "29": 2013.0,
+            "30": 1885.0,
+            "31": 1576.0,
+            "32": 1933.0,
+            "33": 2119.0,
+            "34": 1856.0,
+            "35": 1965.0,
+            "36": 1971.0,
+            "37": 2255.0,
+            "38": 2088.0,
+            "39": 2451.0,
+            "40": 2172.0,
+            "41": 2296.0,
+            "42": 2276.0,
+            "43": 1969.0,
+            "44": 2094.0,
+            "45": 2044.0,
+            "46": 2227.0,
+            "47": 2648.0,
+            "48": 2394.0,
+            "49": 2407.0,
+            "50": 2297.0,
+            "51": 2554.0,
+            "52": 2466.0,
+            "53": 2923.0,
+            "54": 2612.0,
+            "55": 2351.0,
+            "56": 2757.0,
+            "57": 2313.0,
+            "58": 2798.0,
+            "59": 2750.0,
+            "60": 2376.0,
+            "61": 2848.0,
+            "62": 2668.0,
+            "63": 2468.0,
+            "64": 2818.0,
+            "65": 2630.0,
+            "66": 2992.0,
+            "67": 2802.0,
+            "68": 2794.0,
+            "69": 2851.0,
+            "70": 3059.0,
+            "71": 2869.0,
+            "72": 2424.0,
+            "73": 3035.0,
+            "74": 2113.0,
+            "75": 2485.0,
+            "76": 2782.0,
+            "77": 3252.0,
+            "78": 3149.0,
+            "79": 3192.0,
+            "80": 3229.0,
+            "81": 3397.0,
+            "82": 3297.0,
+            "83": 2766.0,
+            "84": 3192.0,
+            "85": 3206.0,
+            "86": 2648.0,
+            "87": 3709.0,
+            "88": 2962.0,
+            "89": 3273.0,
+            "90": 3149.0,
+            "91": 2825.0,
+            "92": 3047.0,
+            "93": 2918.0,
+            "94": 3432.0,
+            "95": 3266.0,
+            "96": 3574.0,
+            "97": 3190.0,
+            "98": 3564.0,
+            "99": 2977.0,
+            "100": 3249.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 491224576.0,
+            "2": 491224576.0,
+            "3": 491224576.0,
+            "4": 491224576.0,
+            "5": 491224576.0,
+            "6": 491224576.0,
+            "7": 491224576.0,
+            "8": 491224576.0,
+            "9": 491224576.0,
+            "10": 491224576.0,
+            "11": 491224576.0,
+            "12": 491224576.0,
+            "13": 491224576.0,
+            "14": 491224576.0,
+            "15": 491224576.0,
+            "16": 491224576.0,
+            "17": 491224576.0,
+            "18": 491224576.0,
+            "19": 491224576.0,
+            "20": 491224576.0,
+            "21": 491224576.0,
+            "22": 491224576.0,
+            "23": 491224576.0,
+            "24": 491224576.0,
+            "25": 491224576.0,
+            "26": 491224576.0,
+            "27": 491224576.0,
+            "28": 491224576.0,
+            "29": 491224576.0,
+            "30": 491224576.0,
+            "31": 491224576.0,
+            "32": 491224576.0,
+            "33": 491224576.0,
+            "34": 491224576.0,
+            "35": 491224576.0,
+            "36": 491224576.0,
+            "37": 491224576.0,
+            "38": 491224576.0,
+            "39": 491224576.0,
+            "40": 491224576.0,
+            "41": 491224576.0,
+            "42": 491224576.0,
+            "43": 491224576.0,
+            "44": 491224576.0,
+            "45": 491224576.0,
+            "46": 491224576.0,
+            "47": 491224576.0,
+            "48": 491224576.0,
+            "49": 491224576.0,
+            "50": 491224576.0,
+            "51": 491224576.0,
+            "52": 491224576.0,
+            "53": 491224576.0,
+            "54": 491224576.0,
+            "55": 491224576.0,
+            "56": 491224576.0,
+            "57": 491224576.0,
+            "58": 491224576.0,
+            "59": 491224576.0,
+            "60": 491224576.0,
+            "61": 491224576.0,
+            "62": 491224576.0,
+            "63": 491224576.0,
+            "64": 491224576.0,
+            "65": 491224576.0,
+            "66": 491224576.0,
+            "67": 491224576.0,
+            "68": 491224576.0,
+            "69": 491224576.0,
+            "70": 491224576.0,
+            "71": 491224576.0,
+            "72": 491224576.0,
+            "73": 491224576.0,
+            "74": 491224576.0,
+            "75": 491224576.0,
+            "76": 491224576.0,
+            "77": 491224576.0,
+            "78": 491224576.0,
+            "79": 491224576.0,
+            "80": 491224576.0,
+            "81": 491224576.0,
+            "82": 491224576.0,
+            "83": 491224576.0,
+            "84": 491224576.0,
+            "85": 491224576.0,
+            "86": 491224576.0,
+            "87": 491224576.0,
+            "88": 491224576.0,
+            "89": 491224576.0,
+            "90": 491224576.0,
+            "91": 491224576.0,
+            "92": 491224576.0,
+            "93": 491224576.0,
+            "94": 491224576.0,
+            "95": 491224576.0,
+            "96": 491224576.0,
+            "97": 491224576.0,
+            "98": 491224576.0,
+            "99": 491224576.0,
+            "100": 491224576.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1578442240.0,
+            "2": 1706868224.0,
+            "3": 1706868224.0,
+            "4": 1706868224.0,
+            "5": 1706868224.0,
+            "6": 1706868224.0,
+            "7": 1706868224.0,
+            "8": 1706868224.0,
+            "9": 1706868224.0,
+            "10": 1706868224.0,
+            "11": 1706868224.0,
+            "12": 1706868224.0,
+            "13": 1706868224.0,
+            "14": 1706868224.0,
+            "15": 1706868224.0,
+            "16": 1706868224.0,
+            "17": 1706868224.0,
+            "18": 1706868224.0,
+            "19": 1706868224.0,
+            "20": 1706868224.0,
+            "21": 1706868224.0,
+            "22": 1706868224.0,
+            "23": 1706868224.0,
+            "24": 1706868224.0,
+            "25": 1706868224.0,
+            "26": 1706868224.0,
+            "27": 1706868224.0,
+            "28": 1706868224.0,
+            "29": 1706868224.0,
+            "30": 1706868224.0,
+            "31": 1706868224.0,
+            "32": 1706868224.0,
+            "33": 1706868224.0,
+            "34": 1706868224.0,
+            "35": 1706868224.0,
+            "36": 1706868224.0,
+            "37": 1706868224.0,
+            "38": 1706868224.0,
+            "39": 1706868224.0,
+            "40": 1706868224.0,
+            "41": 1706868224.0,
+            "42": 1706868224.0,
+            "43": 1706868224.0,
+            "44": 1706868224.0,
+            "45": 1706868224.0,
+            "46": 1706868224.0,
+            "47": 1706868224.0,
+            "48": 1706868224.0,
+            "49": 1706868224.0,
+            "50": 1706868224.0,
+            "51": 1706868224.0,
+            "52": 1706868224.0,
+            "53": 1706868224.0,
+            "54": 1706868224.0,
+            "55": 1706868224.0,
+            "56": 1706868224.0,
+            "57": 1706868224.0,
+            "58": 1706868224.0,
+            "59": 1706868224.0,
+            "60": 1706868224.0,
+            "61": 1706868224.0,
+            "62": 1706868224.0,
+            "63": 1706868224.0,
+            "64": 1706868224.0,
+            "65": 1706868224.0,
+            "66": 1706868224.0,
+            "67": 1706868224.0,
+            "68": 1706868224.0,
+            "69": 1706868224.0,
+            "70": 1706868224.0,
+            "71": 1706868224.0,
+            "72": 1706868224.0,
+            "73": 1706868224.0,
+            "74": 1706868224.0,
+            "75": 1706868224.0,
+            "76": 1706868224.0,
+            "77": 1706868224.0,
+            "78": 1706868224.0,
+            "79": 1706868224.0,
+            "80": 1706868224.0,
+            "81": 1706868224.0,
+            "82": 1706868224.0,
+            "83": 1706868224.0,
+            "84": 1706868224.0,
+            "85": 1706868224.0,
+            "86": 1706868224.0,
+            "87": 1706868224.0,
+            "88": 1706868224.0,
+            "89": 1706868224.0,
+            "90": 1706868224.0,
+            "91": 1706868224.0,
+            "92": 1706868224.0,
+            "93": 1706868224.0,
+            "94": 1706868224.0,
+            "95": 1706868224.0,
+            "96": 1706868224.0,
+            "97": 1706868224.0,
+            "98": 1706868224.0,
+            "99": 1706868224.0,
+            "100": 1706868224.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 6.76973,
+            "2": 0.17585,
+            "3": 0.17368,
+            "4": 0.16152,
+            "5": 0.19039,
+            "6": 0.22444,
+            "7": 0.19405,
+            "8": 0.19945,
+            "9": 0.19849,
+            "10": 0.19715,
+            "11": 0.26257,
+            "12": 0.20383,
+            "13": 0.20656,
+            "14": 0.16788,
+            "15": 0.16036,
+            "16": 0.16063,
+            "17": 0.28798,
+            "18": 0.16008,
+            "19": 0.15785,
+            "20": 0.15974,
+            "21": 0.15889,
+            "22": 0.15943,
+            "23": 0.15886,
+            "24": 0.16021,
+            "25": 0.15915,
+            "26": 0.16121,
+            "27": 0.15965,
+            "28": 0.15981,
+            "29": 0.16011,
+            "30": 0.15997,
+            "31": 0.16048,
+            "32": 0.15884,
+            "33": 0.16058,
+            "34": 0.15945,
+            "35": 0.15917,
+            "36": 0.16205,
+            "37": 0.15947,
+            "38": 0.16161,
+            "39": 0.15927,
+            "40": 0.15876,
+            "41": 0.159,
+            "42": 0.47609,
+            "43": 0.17027,
+            "44": 0.1644,
+            "45": 0.16303,
+            "46": 0.16036,
+            "47": 0.16029,
+            "48": 0.16095,
+            "49": 0.16015,
+            "50": 0.1603,
+            "51": 0.21916,
+            "52": 0.20178,
+            "53": 0.20344,
+            "54": 0.22444,
+            "55": 0.25106,
+            "56": 0.19763,
+            "57": 0.21076,
+            "58": 0.24116,
+            "59": 0.19345,
+            "60": 0.1603,
+            "61": 0.15954,
+            "62": 0.16062,
+            "63": 0.20422,
+            "64": 0.1605,
+            "65": 0.16211,
+            "66": 0.16077,
+            "67": 0.16024,
+            "68": 0.16099,
+            "69": 0.16333,
+            "70": 0.16439,
+            "71": 0.16108,
+            "72": 0.16247,
+            "73": 0.1611,
+            "74": 0.16235,
+            "75": 0.16292,
+            "76": 0.16349,
+            "77": 0.1636,
+            "78": 0.16363,
+            "79": 0.34343,
+            "80": 0.15998,
+            "81": 0.15954,
+            "82": 0.15941,
+            "83": 0.15965,
+            "84": 0.16027,
+            "85": 0.16164,
+            "86": 0.16113,
+            "87": 0.16126,
+            "88": 0.16032,
+            "89": 0.26526,
+            "90": 0.15925,
+            "91": 0.1601,
+            "92": 0.15972,
+            "93": 0.15947,
+            "94": 0.15955,
+            "95": 0.15981,
+            "96": 0.15971,
+            "97": 0.15989,
+            "98": 0.15959,
+            "99": 0.15994,
+            "100": 0.16111
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_dev_dgx_h100.json
index 077c5e1317a..13ad7566828 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_dev_dgx_h100.json
@@ -218,106 +218,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 490700288.0,
-            "2": 490700288.0,
-            "3": 490700288.0,
-            "4": 490700288.0,
-            "5": 490700288.0,
-            "6": 490700288.0,
-            "7": 490700288.0,
-            "8": 490700288.0,
-            "9": 490700288.0,
-            "10": 490700288.0,
-            "11": 490700288.0,
-            "12": 490700288.0,
-            "13": 490700288.0,
-            "14": 490700288.0,
-            "15": 490700288.0,
-            "16": 490700288.0,
-            "17": 490700288.0,
-            "18": 490700288.0,
-            "19": 490700288.0,
-            "20": 490700288.0,
-            "21": 490700288.0,
-            "22": 490700288.0,
-            "23": 490700288.0,
-            "24": 490700288.0,
-            "25": 490700288.0,
-            "26": 490700288.0,
-            "27": 490700288.0,
-            "28": 490700288.0,
-            "29": 490700288.0,
-            "30": 490700288.0,
-            "31": 490700288.0,
-            "32": 490700288.0,
-            "33": 490700288.0,
-            "34": 490700288.0,
-            "35": 490700288.0,
-            "36": 490700288.0,
-            "37": 490700288.0,
-            "38": 490700288.0,
-            "39": 490700288.0,
-            "40": 490700288.0,
-            "41": 490700288.0,
-            "42": 490700288.0,
-            "43": 490700288.0,
-            "44": 490700288.0,
-            "45": 490700288.0,
-            "46": 490700288.0,
-            "47": 490700288.0,
-            "48": 490700288.0,
-            "49": 490700288.0,
-            "50": 490700288.0,
-            "51": 490700288.0,
-            "52": 490700288.0,
-            "53": 490700288.0,
-            "54": 490700288.0,
-            "55": 490700288.0,
-            "56": 490700288.0,
-            "57": 490700288.0,
-            "58": 490700288.0,
-            "59": 490700288.0,
-            "60": 490700288.0,
-            "61": 490700288.0,
-            "62": 490700288.0,
-            "63": 490700288.0,
-            "64": 490700288.0,
-            "65": 490700288.0,
-            "66": 490700288.0,
-            "67": 490700288.0,
-            "68": 490700288.0,
-            "69": 490700288.0,
-            "70": 490700288.0,
-            "71": 490700288.0,
-            "72": 490700288.0,
-            "73": 490700288.0,
-            "74": 490700288.0,
-            "75": 490700288.0,
-            "76": 490700288.0,
-            "77": 490700288.0,
-            "78": 490700288.0,
-            "79": 490700288.0,
-            "80": 490700288.0,
-            "81": 490700288.0,
-            "82": 490700288.0,
-            "83": 490700288.0,
-            "84": 490700288.0,
-            "85": 490700288.0,
-            "86": 490700288.0,
-            "87": 490700288.0,
-            "88": 490700288.0,
-            "89": 490700288.0,
-            "90": 490700288.0,
-            "91": 490700288.0,
-            "92": 490700288.0,
-            "93": 490700288.0,
-            "94": 490700288.0,
-            "95": 490700288.0,
-            "96": 490700288.0,
-            "97": 490700288.0,
-            "98": 490700288.0,
-            "99": 490700288.0,
-            "100": 490700288.0
+            "1": 491224576.0,
+            "2": 491224576.0,
+            "3": 491224576.0,
+            "4": 491224576.0,
+            "5": 491224576.0,
+            "6": 491224576.0,
+            "7": 491224576.0,
+            "8": 491224576.0,
+            "9": 491224576.0,
+            "10": 491224576.0,
+            "11": 491224576.0,
+            "12": 491224576.0,
+            "13": 491224576.0,
+            "14": 491224576.0,
+            "15": 491224576.0,
+            "16": 491224576.0,
+            "17": 491224576.0,
+            "18": 491224576.0,
+            "19": 491224576.0,
+            "20": 491224576.0,
+            "21": 491224576.0,
+            "22": 491224576.0,
+            "23": 491224576.0,
+            "24": 491224576.0,
+            "25": 491224576.0,
+            "26": 491224576.0,
+            "27": 491224576.0,
+            "28": 491224576.0,
+            "29": 491224576.0,
+            "30": 491224576.0,
+            "31": 491224576.0,
+            "32": 491224576.0,
+            "33": 491224576.0,
+            "34": 491224576.0,
+            "35": 491224576.0,
+            "36": 491224576.0,
+            "37": 491224576.0,
+            "38": 491224576.0,
+            "39": 491224576.0,
+            "40": 491224576.0,
+            "41": 491224576.0,
+            "42": 491224576.0,
+            "43": 491224576.0,
+            "44": 491224576.0,
+            "45": 491224576.0,
+            "46": 491224576.0,
+            "47": 491224576.0,
+            "48": 491224576.0,
+            "49": 491224576.0,
+            "50": 491224576.0,
+            "51": 491224576.0,
+            "52": 491224576.0,
+            "53": 491224576.0,
+            "54": 491224576.0,
+            "55": 491224576.0,
+            "56": 491224576.0,
+            "57": 491224576.0,
+            "58": 491224576.0,
+            "59": 491224576.0,
+            "60": 491224576.0,
+            "61": 491224576.0,
+            "62": 491224576.0,
+            "63": 491224576.0,
+            "64": 491224576.0,
+            "65": 491224576.0,
+            "66": 491224576.0,
+            "67": 491224576.0,
+            "68": 491224576.0,
+            "69": 491224576.0,
+            "70": 491224576.0,
+            "71": 491224576.0,
+            "72": 491224576.0,
+            "73": 491224576.0,
+            "74": 491224576.0,
+            "75": 491224576.0,
+            "76": 491224576.0,
+            "77": 491224576.0,
+            "78": 491224576.0,
+            "79": 491224576.0,
+            "80": 491224576.0,
+            "81": 491224576.0,
+            "82": 491224576.0,
+            "83": 491224576.0,
+            "84": 491224576.0,
+            "85": 491224576.0,
+            "86": 491224576.0,
+            "87": 491224576.0,
+            "88": 491224576.0,
+            "89": 491224576.0,
+            "90": 491224576.0,
+            "91": 491224576.0,
+            "92": 491224576.0,
+            "93": 491224576.0,
+            "94": 491224576.0,
+            "95": 491224576.0,
+            "96": 491224576.0,
+            "97": 491224576.0,
+            "98": 491224576.0,
+            "99": 491224576.0,
+            "100": 491224576.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -325,7 +325,7 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 1553275392.0,
+            "1": 1553276416.0,
             "2": 1681702400.0,
             "3": 1681702400.0,
             "4": 1681702400.0,
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 12.96096,
-            "2": 0.14328,
-            "3": 0.13234,
-            "4": 0.12983,
-            "5": 0.1339,
-            "6": 0.13424,
-            "7": 0.13558,
-            "8": 0.13644,
-            "9": 0.13434,
-            "10": 0.13106,
-            "11": 0.13377,
-            "12": 0.13148,
-            "13": 0.13136,
-            "14": 0.13331,
-            "15": 0.13429,
-            "16": 0.13208,
-            "17": 0.1316,
-            "18": 0.13139,
-            "19": 0.1287,
-            "20": 0.13199,
-            "21": 0.1318,
-            "22": 0.13196,
-            "23": 0.13019,
-            "24": 0.1317,
-            "25": 0.13217,
-            "26": 0.12983,
-            "27": 0.12928,
-            "28": 0.13258,
-            "29": 0.13441,
-            "30": 0.13276,
-            "31": 0.13264,
-            "32": 0.13228,
-            "33": 0.13159,
-            "34": 0.13219,
-            "35": 0.133,
-            "36": 0.13166,
-            "37": 0.13174,
-            "38": 0.1304,
-            "39": 0.1314,
-            "40": 0.13029,
-            "41": 0.13074,
-            "42": 0.12839,
-            "43": 0.13136,
-            "44": 0.13209,
-            "45": 0.12923,
-            "46": 0.13318,
-            "47": 0.1319,
-            "48": 0.13259,
-            "49": 0.13079,
-            "50": 0.12933,
-            "51": 0.15172,
-            "52": 0.1333,
-            "53": 0.14462,
-            "54": 0.13216,
-            "55": 0.13399,
-            "56": 0.13553,
-            "57": 0.13325,
-            "58": 0.13361,
-            "59": 0.13333,
-            "60": 0.13354,
-            "61": 0.13207,
-            "62": 0.1338,
-            "63": 0.13105,
-            "64": 0.13392,
-            "65": 0.13319,
-            "66": 0.13384,
-            "67": 0.13217,
-            "68": 0.13367,
-            "69": 0.13229,
-            "70": 0.13221,
-            "71": 0.1335,
-            "72": 0.13557,
-            "73": 0.13385,
-            "74": 0.13485,
-            "75": 0.13327,
-            "76": 0.13288,
-            "77": 0.13329,
-            "78": 0.13402,
-            "79": 0.13416,
-            "80": 0.13423,
-            "81": 0.13316,
-            "82": 0.13278,
-            "83": 0.13364,
-            "84": 0.13264,
-            "85": 0.13203,
-            "86": 0.13235,
-            "87": 0.13381,
-            "88": 0.13365,
-            "89": 0.13338,
-            "90": 0.1334,
-            "91": 0.13418,
-            "92": 0.13669,
-            "93": 0.13477,
-            "94": 0.13244,
-            "95": 0.13237,
-            "96": 0.13182,
-            "97": 0.13149,
-            "98": 0.13223,
-            "99": 0.13163,
-            "100": 0.1326
+            "1": 13.28736,
+            "2": 0.1399,
+            "3": 0.12618,
+            "4": 0.10709,
+            "5": 0.11408,
+            "6": 0.10894,
+            "7": 0.10708,
+            "8": 0.10773,
+            "9": 0.10787,
+            "10": 0.10884,
+            "11": 0.10818,
+            "12": 0.10774,
+            "13": 0.1067,
+            "14": 0.1065,
+            "15": 0.10599,
+            "16": 0.10552,
+            "17": 0.10782,
+            "18": 0.10913,
+            "19": 0.10816,
+            "20": 0.10759,
+            "21": 0.108,
+            "22": 0.10902,
+            "23": 0.1076,
+            "24": 0.1068,
+            "25": 0.10674,
+            "26": 0.10699,
+            "27": 0.10678,
+            "28": 0.10642,
+            "29": 0.1066,
+            "30": 0.10707,
+            "31": 0.10794,
+            "32": 0.10702,
+            "33": 0.10586,
+            "34": 0.10612,
+            "35": 0.10628,
+            "36": 0.10631,
+            "37": 0.10573,
+            "38": 0.10617,
+            "39": 0.10563,
+            "40": 0.1064,
+            "41": 0.1059,
+            "42": 0.1054,
+            "43": 0.10691,
+            "44": 0.10833,
+            "45": 0.10638,
+            "46": 0.10655,
+            "47": 0.10676,
+            "48": 0.10825,
+            "49": 0.10534,
+            "50": 0.10635,
+            "51": 0.12108,
+            "52": 0.12016,
+            "53": 0.11315,
+            "54": 0.10912,
+            "55": 0.11556,
+            "56": 0.10742,
+            "57": 0.10784,
+            "58": 0.11719,
+            "59": 0.10791,
+            "60": 0.10886,
+            "61": 0.11563,
+            "62": 0.10714,
+            "63": 0.10967,
+            "64": 0.11569,
+            "65": 0.10753,
+            "66": 0.1078,
+            "67": 0.10545,
+            "68": 0.10522,
+            "69": 0.10496,
+            "70": 0.10544,
+            "71": 0.10719,
+            "72": 0.10708,
+            "73": 0.1062,
+            "74": 0.10663,
+            "75": 0.10766,
+            "76": 0.10634,
+            "77": 0.106,
+            "78": 0.10757,
+            "79": 0.10574,
+            "80": 0.10548,
+            "81": 0.1068,
+            "82": 0.10639,
+            "83": 0.10598,
+            "84": 0.10693,
+            "85": 0.10553,
+            "86": 0.10606,
+            "87": 0.10692,
+            "88": 0.10564,
+            "89": 0.10633,
+            "90": 0.10625,
+            "91": 0.10563,
+            "92": 0.10508,
+            "93": 0.10937,
+            "94": 0.10519,
+            "95": 0.10566,
+            "96": 0.11009,
+            "97": 0.10631,
+            "98": 0.10595,
+            "99": 0.10785,
+            "100": 0.10678
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_dev_dgx_h100_2nd.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_dev_dgx_h100_2nd.json
new file mode 100644
index 00000000000..22ee15f7925
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_dev_dgx_h100_2nd.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 9.8709,
+            "52": 9.7737,
+            "53": 10.08149,
+            "54": 9.97376,
+            "55": 9.90036,
+            "56": 9.64783,
+            "57": 9.50136,
+            "58": 9.85199,
+            "59": 9.6034,
+            "60": 9.50993,
+            "61": 9.71315,
+            "62": 9.99373,
+            "63": 9.39358,
+            "64": 9.78904,
+            "65": 8.96358,
+            "66": 9.71142,
+            "67": 9.38175,
+            "68": 9.79833,
+            "69": 9.80889,
+            "70": 9.75039,
+            "71": 9.62004,
+            "72": 9.59387,
+            "73": 9.50631,
+            "74": 8.94916,
+            "75": 9.43188,
+            "76": 9.08702,
+            "77": 10.06886,
+            "78": 9.73459,
+            "79": 9.38325,
+            "80": 9.41272,
+            "81": 9.48499,
+            "82": 9.70672,
+            "83": 9.30939,
+            "84": 9.42428,
+            "85": 9.61991,
+            "86": 9.07811,
+            "87": 9.59541,
+            "88": 9.75596,
+            "89": 9.60274,
+            "90": 9.82165,
+            "91": 9.34268,
+            "92": 9.35878,
+            "93": 9.08116,
+            "94": 8.83791,
+            "95": 9.5238,
+            "96": 9.53556,
+            "97": 9.31807,
+            "98": 9.68183,
+            "99": 8.89422,
+            "100": 9.40138
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 2514.0,
+            "52": 2430.0,
+            "53": 2840.0,
+            "54": 2677.0,
+            "55": 2394.0,
+            "56": 2601.0,
+            "57": 2341.0,
+            "58": 2837.0,
+            "59": 2789.0,
+            "60": 2425.0,
+            "61": 2923.0,
+            "62": 2591.0,
+            "63": 2416.0,
+            "64": 2937.0,
+            "65": 2572.0,
+            "66": 3008.0,
+            "67": 2843.0,
+            "68": 2761.0,
+            "69": 2834.0,
+            "70": 3108.0,
+            "71": 2989.0,
+            "72": 2316.0,
+            "73": 2950.0,
+            "74": 1899.0,
+            "75": 2378.0,
+            "76": 2962.0,
+            "77": 3343.0,
+            "78": 3183.0,
+            "79": 2979.0,
+            "80": 3209.0,
+            "81": 3583.0,
+            "82": 3160.0,
+            "83": 2776.0,
+            "84": 3242.0,
+            "85": 3425.0,
+            "86": 2720.0,
+            "87": 3820.0,
+            "88": 3050.0,
+            "89": 3297.0,
+            "90": 3069.0,
+            "91": 2685.0,
+            "92": 3061.0,
+            "93": 2584.0,
+            "94": 3338.0,
+            "95": 3406.0,
+            "96": 3389.0,
+            "97": 3104.0,
+            "98": 3583.0,
+            "99": 3229.0,
+            "100": 3225.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 492274176.0,
+            "52": 492274176.0,
+            "53": 492274176.0,
+            "54": 492274176.0,
+            "55": 492274176.0,
+            "56": 492274176.0,
+            "57": 492274176.0,
+            "58": 492274176.0,
+            "59": 492274176.0,
+            "60": 492274176.0,
+            "61": 492274176.0,
+            "62": 492274176.0,
+            "63": 492274176.0,
+            "64": 492274176.0,
+            "65": 492274176.0,
+            "66": 492274176.0,
+            "67": 492274176.0,
+            "68": 492274176.0,
+            "69": 492274176.0,
+            "70": 492274176.0,
+            "71": 492274176.0,
+            "72": 492274176.0,
+            "73": 492274176.0,
+            "74": 492274176.0,
+            "75": 492274176.0,
+            "76": 492274176.0,
+            "77": 492274176.0,
+            "78": 492274176.0,
+            "79": 492274176.0,
+            "80": 492274176.0,
+            "81": 492274176.0,
+            "82": 492274176.0,
+            "83": 492274176.0,
+            "84": 492274176.0,
+            "85": 492274176.0,
+            "86": 492274176.0,
+            "87": 492274176.0,
+            "88": 492274176.0,
+            "89": 492274176.0,
+            "90": 492274176.0,
+            "91": 492274176.0,
+            "92": 492274176.0,
+            "93": 492274176.0,
+            "94": 492274176.0,
+            "95": 492274176.0,
+            "96": 492274176.0,
+            "97": 492274176.0,
+            "98": 492274176.0,
+            "99": 492274176.0,
+            "100": 492274176.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 1684323840.0,
+            "52": 1684324864.0,
+            "53": 1684324864.0,
+            "54": 1684324864.0,
+            "55": 1684324864.0,
+            "56": 1684324864.0,
+            "57": 1684324864.0,
+            "58": 1684324864.0,
+            "59": 1684324864.0,
+            "60": 1684324864.0,
+            "61": 1684324864.0,
+            "62": 1684324864.0,
+            "63": 1684324864.0,
+            "64": 1684324864.0,
+            "65": 1684324864.0,
+            "66": 1684324864.0,
+            "67": 1684324864.0,
+            "68": 1684324864.0,
+            "69": 1684324864.0,
+            "70": 1684324864.0,
+            "71": 1684324864.0,
+            "72": 1684324864.0,
+            "73": 1684324864.0,
+            "74": 1684324864.0,
+            "75": 1684324864.0,
+            "76": 1684324864.0,
+            "77": 1684324864.0,
+            "78": 1684324864.0,
+            "79": 1684324864.0,
+            "80": 1684324864.0,
+            "81": 1684324864.0,
+            "82": 1684324864.0,
+            "83": 1684324864.0,
+            "84": 1684324864.0,
+            "85": 1684324864.0,
+            "86": 1684324864.0,
+            "87": 1684324864.0,
+            "88": 1684324864.0,
+            "89": 1684324864.0,
+            "90": 1684324864.0,
+            "91": 1684324864.0,
+            "92": 1684324864.0,
+            "93": 1684324864.0,
+            "94": 1684324864.0,
+            "95": 1684324864.0,
+            "96": 1684324864.0,
+            "97": 1684324864.0,
+            "98": 1684324864.0,
+            "99": 1684324864.0,
+            "100": 1684324864.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 11.56176,
+            "52": 0.13774,
+            "53": 0.11414,
+            "54": 0.11045,
+            "55": 0.1125,
+            "56": 0.11106,
+            "57": 0.11016,
+            "58": 0.11042,
+            "59": 0.11057,
+            "60": 0.10826,
+            "61": 0.10921,
+            "62": 0.10786,
+            "63": 0.10755,
+            "64": 0.10814,
+            "65": 0.10772,
+            "66": 0.10843,
+            "67": 0.10895,
+            "68": 0.10806,
+            "69": 0.10877,
+            "70": 0.10793,
+            "71": 0.11024,
+            "72": 0.10933,
+            "73": 0.10647,
+            "74": 0.10846,
+            "75": 0.11298,
+            "76": 0.13322,
+            "77": 0.11871,
+            "78": 0.10859,
+            "79": 0.106,
+            "80": 0.10554,
+            "81": 0.10679,
+            "82": 0.10538,
+            "83": 0.10499,
+            "84": 0.10608,
+            "85": 0.10469,
+            "86": 0.10435,
+            "87": 0.10772,
+            "88": 0.10459,
+            "89": 0.10545,
+            "90": 0.10691,
+            "91": 0.10516,
+            "92": 0.10438,
+            "93": 0.10542,
+            "94": 0.10744,
+            "95": 0.10521,
+            "96": 0.10614,
+            "97": 0.10613,
+            "98": 0.1077,
+            "99": 0.10781,
+            "100": 0.10442
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_lts_dgx_a100.json
index 3be93706d81..26272ae12c0 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_lts_dgx_a100.json
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 9.17153,
-            "2": 0.2103,
-            "3": 0.21541,
-            "4": 0.21948,
-            "5": 0.17282,
-            "6": 0.16921,
-            "7": 0.1711,
-            "8": 0.16967,
-            "9": 0.17064,
-            "10": 0.16972,
-            "11": 0.1696,
-            "12": 0.1701,
-            "13": 0.16923,
-            "14": 0.16942,
-            "15": 0.16782,
-            "16": 0.17,
-            "17": 0.16748,
-            "18": 0.16821,
-            "19": 0.16739,
-            "20": 0.16883,
-            "21": 0.16894,
-            "22": 0.16847,
-            "23": 0.16846,
-            "24": 0.16887,
-            "25": 0.16905,
-            "26": 0.16873,
-            "27": 0.16876,
-            "28": 0.16868,
-            "29": 0.1706,
-            "30": 0.17379,
-            "31": 0.17109,
-            "32": 0.17107,
-            "33": 0.17072,
-            "34": 0.17137,
-            "35": 0.17105,
-            "36": 0.17106,
-            "37": 0.17077,
-            "38": 0.17115,
-            "39": 0.17067,
-            "40": 0.17057,
-            "41": 0.17099,
-            "42": 0.17074,
-            "43": 0.17091,
-            "44": 0.17078,
-            "45": 0.17104,
-            "46": 0.17055,
-            "47": 0.17137,
-            "48": 0.17086,
-            "49": 0.17081,
-            "50": 0.17053,
-            "51": 0.17448,
-            "52": 0.16607,
-            "53": 0.16686,
-            "54": 0.16608,
-            "55": 0.16654,
-            "56": 0.16591,
-            "57": 0.16614,
-            "58": 0.1659,
-            "59": 0.16577,
-            "60": 0.16589,
-            "61": 0.16557,
-            "62": 0.16528,
-            "63": 0.16612,
-            "64": 0.1658,
-            "65": 0.16543,
-            "66": 0.1651,
-            "67": 0.16559,
-            "68": 0.16502,
-            "69": 0.16533,
-            "70": 0.16636,
-            "71": 0.16516,
-            "72": 0.1657,
-            "73": 0.1656,
-            "74": 0.16521,
-            "75": 0.16623,
-            "76": 0.16628,
-            "77": 0.16593,
-            "78": 0.16615,
-            "79": 0.1658,
-            "80": 0.16904,
-            "81": 0.16665,
-            "82": 0.16575,
-            "83": 0.16623,
-            "84": 0.16603,
-            "85": 0.16577,
-            "86": 0.16568,
-            "87": 0.16525,
-            "88": 0.16531,
-            "89": 0.16616,
-            "90": 0.16544,
-            "91": 0.16581,
-            "92": 0.16545,
-            "93": 0.16603,
-            "94": 0.16501,
-            "95": 0.16632,
-            "96": 0.16545,
-            "97": 0.16577,
-            "98": 0.19996,
-            "99": 0.19154,
-            "100": 0.19156
+            "1": 5.31573,
+            "2": 0.18576,
+            "3": 0.17476,
+            "4": 0.16336,
+            "5": 0.16444,
+            "6": 0.16376,
+            "7": 0.16391,
+            "8": 0.16436,
+            "9": 0.1647,
+            "10": 0.16442,
+            "11": 0.16651,
+            "12": 0.16415,
+            "13": 0.1639,
+            "14": 0.16341,
+            "15": 0.16405,
+            "16": 0.16336,
+            "17": 0.1649,
+            "18": 0.16416,
+            "19": 0.16368,
+            "20": 0.16287,
+            "21": 0.16352,
+            "22": 0.16266,
+            "23": 0.16606,
+            "24": 0.16733,
+            "25": 0.15996,
+            "26": 0.16017,
+            "27": 0.15966,
+            "28": 0.15989,
+            "29": 0.16042,
+            "30": 0.16078,
+            "31": 0.1603,
+            "32": 0.16003,
+            "33": 0.15993,
+            "34": 0.16031,
+            "35": 0.16091,
+            "36": 0.16047,
+            "37": 0.16035,
+            "38": 0.16032,
+            "39": 0.16044,
+            "40": 0.15963,
+            "41": 0.15984,
+            "42": 0.16183,
+            "43": 0.16457,
+            "44": 0.16023,
+            "45": 0.15984,
+            "46": 0.15948,
+            "47": 0.1592,
+            "48": 0.15954,
+            "49": 0.16019,
+            "50": 0.15913,
+            "51": 0.1678,
+            "52": 0.1599,
+            "53": 0.16055,
+            "54": 0.15919,
+            "55": 0.15968,
+            "56": 0.15917,
+            "57": 0.15882,
+            "58": 0.15853,
+            "59": 0.16041,
+            "60": 0.15905,
+            "61": 0.16002,
+            "62": 0.15878,
+            "63": 0.15894,
+            "64": 0.15851,
+            "65": 0.1593,
+            "66": 0.15905,
+            "67": 0.15864,
+            "68": 0.15939,
+            "69": 0.15875,
+            "70": 0.16002,
+            "71": 0.15947,
+            "72": 0.15984,
+            "73": 0.15928,
+            "74": 0.16024,
+            "75": 0.15992,
+            "76": 0.15976,
+            "77": 0.1599,
+            "78": 0.15928,
+            "79": 0.15924,
+            "80": 0.15931,
+            "81": 0.15912,
+            "82": 0.15858,
+            "83": 0.15936,
+            "84": 0.15981,
+            "85": 0.16066,
+            "86": 0.15948,
+            "87": 0.15924,
+            "88": 0.15893,
+            "89": 0.16025,
+            "90": 0.15868,
+            "91": 0.15895,
+            "92": 0.15857,
+            "93": 0.15929,
+            "94": 0.15913,
+            "95": 0.15916,
+            "96": 0.15869,
+            "97": 0.15992,
+            "98": 0.15991,
+            "99": 0.15932,
+            "100": 0.15959
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_lts_dgx_a100_2nd.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_lts_dgx_a100_2nd.json
new file mode 100644
index 00000000000..8b98843a405
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_lts_dgx_a100_2nd.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 9.86094,
+            "52": 9.75697,
+            "53": 10.07633,
+            "54": 9.96082,
+            "55": 9.88565,
+            "56": 9.6349,
+            "57": 9.4925,
+            "58": 9.83099,
+            "59": 9.59122,
+            "60": 9.50798,
+            "61": 9.7061,
+            "62": 9.98413,
+            "63": 9.37604,
+            "64": 9.77938,
+            "65": 8.95852,
+            "66": 9.70596,
+            "67": 9.37402,
+            "68": 9.78683,
+            "69": 9.78932,
+            "70": 9.72766,
+            "71": 9.61135,
+            "72": 9.59178,
+            "73": 9.49896,
+            "74": 8.95742,
+            "75": 9.42469,
+            "76": 9.09651,
+            "77": 10.06653,
+            "78": 9.73149,
+            "79": 9.37959,
+            "80": 9.40394,
+            "81": 9.48277,
+            "82": 9.69318,
+            "83": 9.31104,
+            "84": 9.4139,
+            "85": 9.61469,
+            "86": 9.07793,
+            "87": 9.59662,
+            "88": 9.74827,
+            "89": 9.60196,
+            "90": 9.81239,
+            "91": 9.34524,
+            "92": 9.36524,
+            "93": 9.07745,
+            "94": 8.83182,
+            "95": 9.521,
+            "96": 9.52525,
+            "97": 9.31322,
+            "98": 9.677,
+            "99": 8.88904,
+            "100": 9.40063
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 2742.0,
+            "52": 2671.0,
+            "53": 3066.0,
+            "54": 2782.0,
+            "55": 2510.0,
+            "56": 2874.0,
+            "57": 2304.0,
+            "58": 3111.0,
+            "59": 2862.0,
+            "60": 2374.0,
+            "61": 2977.0,
+            "62": 2740.0,
+            "63": 2394.0,
+            "64": 3232.0,
+            "65": 2720.0,
+            "66": 3277.0,
+            "67": 2810.0,
+            "68": 2830.0,
+            "69": 3094.0,
+            "70": 3327.0,
+            "71": 3106.0,
+            "72": 2261.0,
+            "73": 3147.0,
+            "74": 1902.0,
+            "75": 2545.0,
+            "76": 2905.0,
+            "77": 3468.0,
+            "78": 3432.0,
+            "79": 3336.0,
+            "80": 3434.0,
+            "81": 3605.0,
+            "82": 3269.0,
+            "83": 2891.0,
+            "84": 3343.0,
+            "85": 3501.0,
+            "86": 2786.0,
+            "87": 3872.0,
+            "88": 3019.0,
+            "89": 3407.0,
+            "90": 3023.0,
+            "91": 2630.0,
+            "92": 3186.0,
+            "93": 2746.0,
+            "94": 3526.0,
+            "95": 3414.0,
+            "96": 3546.0,
+            "97": 3339.0,
+            "98": 3758.0,
+            "99": 3058.0,
+            "100": 3454.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 463961600.0,
+            "52": 463961600.0,
+            "53": 463961600.0,
+            "54": 463961600.0,
+            "55": 463961600.0,
+            "56": 463961600.0,
+            "57": 463961600.0,
+            "58": 463961600.0,
+            "59": 463961600.0,
+            "60": 463961600.0,
+            "61": 463961600.0,
+            "62": 463961600.0,
+            "63": 463961600.0,
+            "64": 463961600.0,
+            "65": 463961600.0,
+            "66": 463961600.0,
+            "67": 463961600.0,
+            "68": 463961600.0,
+            "69": 463961600.0,
+            "70": 463961600.0,
+            "71": 463961600.0,
+            "72": 463961600.0,
+            "73": 463961600.0,
+            "74": 463961600.0,
+            "75": 463961600.0,
+            "76": 463961600.0,
+            "77": 463961600.0,
+            "78": 463961600.0,
+            "79": 463961600.0,
+            "80": 463961600.0,
+            "81": 463961600.0,
+            "82": 463961600.0,
+            "83": 463961600.0,
+            "84": 463961600.0,
+            "85": 463961600.0,
+            "86": 463961600.0,
+            "87": 463961600.0,
+            "88": 463961600.0,
+            "89": 463961600.0,
+            "90": 463961600.0,
+            "91": 463961600.0,
+            "92": 463961600.0,
+            "93": 463961600.0,
+            "94": 463961600.0,
+            "95": 463961600.0,
+            "96": 463961600.0,
+            "97": 463961600.0,
+            "98": 463961600.0,
+            "99": 463961600.0,
+            "100": 463961600.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 1680128512.0,
+            "52": 1680129536.0,
+            "53": 1680129536.0,
+            "54": 1680129536.0,
+            "55": 1680129536.0,
+            "56": 1680129536.0,
+            "57": 1680129536.0,
+            "58": 1680129536.0,
+            "59": 1680129536.0,
+            "60": 1680129536.0,
+            "61": 1680129536.0,
+            "62": 1680129536.0,
+            "63": 1680129536.0,
+            "64": 1680129536.0,
+            "65": 1680129536.0,
+            "66": 1680129536.0,
+            "67": 1680129536.0,
+            "68": 1680129536.0,
+            "69": 1680129536.0,
+            "70": 1680129536.0,
+            "71": 1680129536.0,
+            "72": 1680129536.0,
+            "73": 1680129536.0,
+            "74": 1680129536.0,
+            "75": 1680129536.0,
+            "76": 1680129536.0,
+            "77": 1680129536.0,
+            "78": 1680129536.0,
+            "79": 1680129536.0,
+            "80": 1680129536.0,
+            "81": 1680129536.0,
+            "82": 1680129536.0,
+            "83": 1680129536.0,
+            "84": 1680129536.0,
+            "85": 1680129536.0,
+            "86": 1680129536.0,
+            "87": 1680129536.0,
+            "88": 1680129536.0,
+            "89": 1680129536.0,
+            "90": 1680129536.0,
+            "91": 1680129536.0,
+            "92": 1680129536.0,
+            "93": 1680129536.0,
+            "94": 1680129536.0,
+            "95": 1680129536.0,
+            "96": 1680129536.0,
+            "97": 1680129536.0,
+            "98": 1680129536.0,
+            "99": 1680129536.0,
+            "100": 1680129536.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 10.38745,
+            "52": 0.1947,
+            "53": 0.16573,
+            "54": 0.16451,
+            "55": 0.16409,
+            "56": 0.16412,
+            "57": 0.16377,
+            "58": 0.17013,
+            "59": 0.16235,
+            "60": 0.16219,
+            "61": 0.1625,
+            "62": 0.16258,
+            "63": 0.16255,
+            "64": 0.1621,
+            "65": 0.16202,
+            "66": 0.16189,
+            "67": 0.16236,
+            "68": 0.1626,
+            "69": 0.16239,
+            "70": 0.16282,
+            "71": 0.16351,
+            "72": 0.16315,
+            "73": 0.16226,
+            "74": 0.16223,
+            "75": 0.16293,
+            "76": 0.16215,
+            "77": 0.16226,
+            "78": 0.1618,
+            "79": 0.16297,
+            "80": 0.16219,
+            "81": 0.1623,
+            "82": 0.16257,
+            "83": 0.16228,
+            "84": 0.16177,
+            "85": 0.16159,
+            "86": 0.16175,
+            "87": 0.16211,
+            "88": 0.16542,
+            "89": 0.16094,
+            "90": 0.16115,
+            "91": 0.16067,
+            "92": 0.16092,
+            "93": 0.1611,
+            "94": 0.15979,
+            "95": 0.1611,
+            "96": 0.16078,
+            "97": 0.16074,
+            "98": 0.16087,
+            "99": 0.15996,
+            "100": 0.1607
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2/golden_values_dev_dgx_gb200.json
new file mode 100644
index 00000000000..0c5b41565c8
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2/golden_values_dev_dgx_gb200.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.86244,
+            "2": 10.88582,
+            "3": 10.84733,
+            "4": 10.85571,
+            "5": 10.86,
+            "6": 10.87733,
+            "7": 10.86555,
+            "8": 10.84913,
+            "9": 10.86609,
+            "10": 10.82473,
+            "11": 10.85618,
+            "12": 10.85374,
+            "13": 10.86788,
+            "14": 10.87119,
+            "15": 10.82235,
+            "16": 10.79991,
+            "17": 10.77431,
+            "18": 10.78345,
+            "19": 10.79308,
+            "20": 10.68226,
+            "21": 10.6471,
+            "22": 10.50917,
+            "23": 10.66827,
+            "24": 10.54193,
+            "25": 10.4928,
+            "26": 10.55931,
+            "27": 10.54238,
+            "28": 10.51129,
+            "29": 10.53257,
+            "30": 10.28992,
+            "31": 10.02853,
+            "32": 10.38885,
+            "33": 10.39593,
+            "34": 10.13446,
+            "35": 10.18932,
+            "36": 10.13355,
+            "37": 10.27381,
+            "38": 10.10751,
+            "39": 10.34007,
+            "40": 9.98538,
+            "41": 10.06414,
+            "42": 10.13744,
+            "43": 9.73381,
+            "44": 9.86305,
+            "45": 9.73723,
+            "46": 9.71343,
+            "47": 10.07757,
+            "48": 9.76768,
+            "49": 9.41987,
+            "50": 9.81687
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 567.0,
+            "2": 584.0,
+            "3": 598.0,
+            "4": 633.0,
+            "5": 630.0,
+            "6": 645.0,
+            "7": 645.0,
+            "8": 674.0,
+            "9": 625.0,
+            "10": 500.0,
+            "11": 669.0,
+            "12": 554.0,
+            "13": 681.0,
+            "14": 633.0,
+            "15": 623.0,
+            "16": 592.0,
+            "17": 636.0,
+            "18": 625.0,
+            "19": 633.0,
+            "20": 587.0,
+            "21": 696.0,
+            "22": 585.0,
+            "23": 681.0,
+            "24": 639.0,
+            "25": 587.0,
+            "26": 642.0,
+            "27": 639.0,
+            "28": 744.0,
+            "29": 746.0,
+            "30": 685.0,
+            "31": 603.0,
+            "32": 719.0,
+            "33": 850.0,
+            "34": 696.0,
+            "35": 737.0,
+            "36": 738.0,
+            "37": 840.0,
+            "38": 757.0,
+            "39": 828.0,
+            "40": 828.0,
+            "41": 787.0,
+            "42": 883.0,
+            "43": 703.0,
+            "44": 850.0,
+            "45": 840.0,
+            "46": 837.0,
+            "47": 915.0,
+            "48": 849.0,
+            "49": 915.0,
+            "50": 892.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 459571712.0,
+            "2": 459571712.0,
+            "3": 459571712.0,
+            "4": 459571712.0,
+            "5": 459571712.0,
+            "6": 459571712.0,
+            "7": 459571712.0,
+            "8": 459571712.0,
+            "9": 459571712.0,
+            "10": 459571712.0,
+            "11": 459571712.0,
+            "12": 459571712.0,
+            "13": 459571712.0,
+            "14": 459571712.0,
+            "15": 459571712.0,
+            "16": 459571712.0,
+            "17": 459571712.0,
+            "18": 459571712.0,
+            "19": 459571712.0,
+            "20": 459571712.0,
+            "21": 459571712.0,
+            "22": 459571712.0,
+            "23": 459571712.0,
+            "24": 459571712.0,
+            "25": 459571712.0,
+            "26": 459571712.0,
+            "27": 459571712.0,
+            "28": 459571712.0,
+            "29": 459571712.0,
+            "30": 459571712.0,
+            "31": 459571712.0,
+            "32": 459571712.0,
+            "33": 459571712.0,
+            "34": 459571712.0,
+            "35": 459571712.0,
+            "36": 459571712.0,
+            "37": 459571712.0,
+            "38": 459571712.0,
+            "39": 459571712.0,
+            "40": 459571712.0,
+            "41": 459571712.0,
+            "42": 459571712.0,
+            "43": 459571712.0,
+            "44": 459571712.0,
+            "45": 459571712.0,
+            "46": 459571712.0,
+            "47": 459571712.0,
+            "48": 459571712.0,
+            "49": 459571712.0,
+            "50": 459571712.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 708779008.0,
+            "2": 882038272.0,
+            "3": 882562560.0,
+            "4": 882562560.0,
+            "5": 882562560.0,
+            "6": 882562560.0,
+            "7": 882562560.0,
+            "8": 882562560.0,
+            "9": 882562560.0,
+            "10": 882562560.0,
+            "11": 882562560.0,
+            "12": 882562560.0,
+            "13": 882562560.0,
+            "14": 882562560.0,
+            "15": 882562560.0,
+            "16": 882562560.0,
+            "17": 882562560.0,
+            "18": 882562560.0,
+            "19": 882562560.0,
+            "20": 882562560.0,
+            "21": 882562560.0,
+            "22": 882562560.0,
+            "23": 882562560.0,
+            "24": 882562560.0,
+            "25": 882562560.0,
+            "26": 882562560.0,
+            "27": 882562560.0,
+            "28": 883608576.0,
+            "29": 883608576.0,
+            "30": 883608576.0,
+            "31": 883608576.0,
+            "32": 883608576.0,
+            "33": 883608576.0,
+            "34": 883608576.0,
+            "35": 883608576.0,
+            "36": 883608576.0,
+            "37": 883608576.0,
+            "38": 883608576.0,
+            "39": 883608576.0,
+            "40": 883608576.0,
+            "41": 883608576.0,
+            "42": 883608576.0,
+            "43": 883608576.0,
+            "44": 883608576.0,
+            "45": 883608576.0,
+            "46": 883608576.0,
+            "47": 883608576.0,
+            "48": 883608576.0,
+            "49": 883608576.0,
+            "50": 883608576.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 14.32961,
+            "2": 0.54797,
+            "3": 0.51657,
+            "4": 0.52599,
+            "5": 0.61023,
+            "6": 0.69053,
+            "7": 0.5446,
+            "8": 0.51966,
+            "9": 0.52377,
+            "10": 0.52901,
+            "11": 0.52742,
+            "12": 0.53394,
+            "13": 0.52346,
+            "14": 0.52257,
+            "15": 0.51751,
+            "16": 0.48338,
+            "17": 0.48757,
+            "18": 0.52092,
+            "19": 0.49857,
+            "20": 0.49815,
+            "21": 0.49063,
+            "22": 0.49632,
+            "23": 0.4849,
+            "24": 0.49986,
+            "25": 0.48483,
+            "26": 0.49826,
+            "27": 0.48315,
+            "28": 0.4875,
+            "29": 0.498,
+            "30": 0.49611,
+            "31": 0.4984,
+            "32": 0.5284,
+            "33": 0.50276,
+            "34": 0.49132,
+            "35": 0.49787,
+            "36": 0.4947,
+            "37": 0.48747,
+            "38": 0.4952,
+            "39": 0.49214,
+            "40": 0.49151,
+            "41": 0.49593,
+            "42": 0.49285,
+            "43": 0.49745,
+            "44": 0.48784,
+            "45": 0.51195,
+            "46": 0.53565,
+            "47": 0.53921,
+            "48": 0.53697,
+            "49": 0.5397,
+            "50": 0.55869
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2/golden_values_dev_dgx_h100.json
index 81005995dad..5b1ee17f8f6 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2/golden_values_dev_dgx_h100.json
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 509641216.0,
-            "2": 509641216.0,
-            "3": 509641216.0,
-            "4": 509641216.0,
-            "5": 509641216.0,
-            "6": 509641216.0,
-            "7": 509641216.0,
-            "8": 509641216.0,
-            "9": 509641216.0,
-            "10": 509641216.0,
-            "11": 509641216.0,
-            "12": 509641216.0,
-            "13": 509641216.0,
-            "14": 509641216.0,
-            "15": 509641216.0,
-            "16": 509641216.0,
-            "17": 509641216.0,
-            "18": 509641216.0,
-            "19": 509641216.0,
-            "20": 509641216.0,
-            "21": 509641216.0,
-            "22": 509641216.0,
-            "23": 509641216.0,
-            "24": 509641216.0,
-            "25": 509641216.0,
-            "26": 509641216.0,
-            "27": 509641216.0,
-            "28": 509641216.0,
-            "29": 509641216.0,
-            "30": 509641216.0,
-            "31": 509641216.0,
-            "32": 509641216.0,
-            "33": 509641216.0,
-            "34": 509641216.0,
-            "35": 509641216.0,
-            "36": 509641216.0,
-            "37": 509641216.0,
-            "38": 509641216.0,
-            "39": 509641216.0,
-            "40": 509641216.0,
-            "41": 509641216.0,
-            "42": 509641216.0,
-            "43": 509641216.0,
-            "44": 509641216.0,
-            "45": 509641216.0,
-            "46": 509641216.0,
-            "47": 509641216.0,
-            "48": 509641216.0,
-            "49": 509641216.0,
-            "50": 509641216.0
+            "1": 510689792.0,
+            "2": 510689792.0,
+            "3": 510689792.0,
+            "4": 510689792.0,
+            "5": 510689792.0,
+            "6": 510689792.0,
+            "7": 510689792.0,
+            "8": 510689792.0,
+            "9": 510689792.0,
+            "10": 510689792.0,
+            "11": 510689792.0,
+            "12": 510689792.0,
+            "13": 510689792.0,
+            "14": 510689792.0,
+            "15": 510689792.0,
+            "16": 510689792.0,
+            "17": 510689792.0,
+            "18": 510689792.0,
+            "19": 510689792.0,
+            "20": 510689792.0,
+            "21": 510689792.0,
+            "22": 510689792.0,
+            "23": 510689792.0,
+            "24": 510689792.0,
+            "25": 510689792.0,
+            "26": 510689792.0,
+            "27": 510689792.0,
+            "28": 510689792.0,
+            "29": 510689792.0,
+            "30": 510689792.0,
+            "31": 510689792.0,
+            "32": 510689792.0,
+            "33": 510689792.0,
+            "34": 510689792.0,
+            "35": 510689792.0,
+            "36": 510689792.0,
+            "37": 510689792.0,
+            "38": 510689792.0,
+            "39": 510689792.0,
+            "40": 510689792.0,
+            "41": 510689792.0,
+            "42": 510689792.0,
+            "43": 510689792.0,
+            "44": 510689792.0,
+            "45": 510689792.0,
+            "46": 510689792.0,
+            "47": 510689792.0,
+            "48": 510689792.0,
+            "49": 510689792.0,
+            "50": 510689792.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 756751872.0,
-            "2": 932632064.0,
-            "3": 932632064.0,
-            "4": 932632064.0,
-            "5": 932632064.0,
-            "6": 932632064.0,
-            "7": 932632064.0,
-            "8": 932632064.0,
-            "9": 932632064.0,
-            "10": 933679616.0,
-            "11": 933679616.0,
-            "12": 933679616.0,
-            "13": 933679616.0,
-            "14": 933679616.0,
-            "15": 933679616.0,
-            "16": 933679616.0,
-            "17": 933679616.0,
-            "18": 933679616.0,
-            "19": 933679616.0,
-            "20": 933679616.0,
-            "21": 933679616.0,
-            "22": 933679616.0,
-            "23": 933679616.0,
-            "24": 933679616.0,
-            "25": 933679616.0,
-            "26": 933679616.0,
-            "27": 933679616.0,
-            "28": 933679616.0,
-            "29": 933679616.0,
-            "30": 933679616.0,
-            "31": 933679616.0,
-            "32": 933679616.0,
-            "33": 933679616.0,
-            "34": 933679616.0,
-            "35": 933679616.0,
-            "36": 933679616.0,
-            "37": 933679616.0,
-            "38": 933679616.0,
-            "39": 933679616.0,
-            "40": 933679616.0,
-            "41": 933679616.0,
-            "42": 933679616.0,
-            "43": 933679616.0,
-            "44": 933679616.0,
-            "45": 933680640.0,
-            "46": 933680640.0,
-            "47": 933680640.0,
-            "48": 933680640.0,
-            "49": 933680640.0,
-            "50": 933680640.0
+            "1": 757802496.0,
+            "2": 935777792.0,
+            "3": 938397696.0,
+            "4": 938397696.0,
+            "5": 938397696.0,
+            "6": 938397696.0,
+            "7": 938397696.0,
+            "8": 938397696.0,
+            "9": 938397696.0,
+            "10": 938398208.0,
+            "11": 938398208.0,
+            "12": 938398208.0,
+            "13": 938398208.0,
+            "14": 938398720.0,
+            "15": 938398720.0,
+            "16": 938398720.0,
+            "17": 938398720.0,
+            "18": 938398720.0,
+            "19": 938398720.0,
+            "20": 938398720.0,
+            "21": 938398720.0,
+            "22": 938398720.0,
+            "23": 938398720.0,
+            "24": 938398720.0,
+            "25": 938399232.0,
+            "26": 938399232.0,
+            "27": 938399232.0,
+            "28": 938399232.0,
+            "29": 938399232.0,
+            "30": 938399232.0,
+            "31": 938399232.0,
+            "32": 938399232.0,
+            "33": 938399232.0,
+            "34": 938399232.0,
+            "35": 938399232.0,
+            "36": 938399232.0,
+            "37": 938399232.0,
+            "38": 938399232.0,
+            "39": 938399232.0,
+            "40": 938399232.0,
+            "41": 938399232.0,
+            "42": 938399232.0,
+            "43": 938399232.0,
+            "44": 938399232.0,
+            "45": 938399232.0,
+            "46": 938399232.0,
+            "47": 938399232.0,
+            "48": 938399232.0,
+            "49": 938399232.0,
+            "50": 938399232.0
         }
     },
     "iteration-time": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 42.02117,
-            "2": 0.34315,
-            "3": 0.31657,
-            "4": 0.29715,
-            "5": 0.29109,
-            "6": 0.28638,
-            "7": 0.28745,
-            "8": 0.29318,
-            "9": 0.30075,
-            "10": 0.29578,
-            "11": 0.30101,
-            "12": 0.29769,
-            "13": 0.2954,
-            "14": 0.2989,
-            "15": 0.29627,
-            "16": 0.29342,
-            "17": 0.29396,
-            "18": 0.29431,
-            "19": 0.29408,
-            "20": 0.29286,
-            "21": 0.29361,
-            "22": 0.29448,
-            "23": 0.29521,
-            "24": 0.29494,
-            "25": 0.29812,
-            "26": 0.29413,
-            "27": 0.2949,
-            "28": 0.29469,
-            "29": 0.29393,
-            "30": 0.29682,
-            "31": 0.2951,
-            "32": 0.29532,
-            "33": 0.29449,
-            "34": 0.29334,
-            "35": 0.29679,
-            "36": 0.29557,
-            "37": 0.29495,
-            "38": 0.29826,
-            "39": 0.29574,
-            "40": 0.2972,
-            "41": 0.29568,
-            "42": 0.29643,
-            "43": 0.29627,
-            "44": 0.29491,
-            "45": 0.29476,
-            "46": 0.29707,
-            "47": 0.35995,
-            "48": 0.28743,
-            "49": 0.28604,
-            "50": 0.28593
+            "1": 35.36663,
+            "2": 0.35208,
+            "3": 0.32012,
+            "4": 0.29736,
+            "5": 0.30009,
+            "6": 0.29722,
+            "7": 0.29604,
+            "8": 0.29598,
+            "9": 0.30123,
+            "10": 0.29278,
+            "11": 0.29195,
+            "12": 0.30003,
+            "13": 0.2957,
+            "14": 0.2935,
+            "15": 0.29372,
+            "16": 0.2984,
+            "17": 0.29013,
+            "18": 0.29041,
+            "19": 0.2934,
+            "20": 0.29454,
+            "21": 0.2936,
+            "22": 0.29663,
+            "23": 0.29453,
+            "24": 0.29404,
+            "25": 0.2912,
+            "26": 0.29009,
+            "27": 0.29448,
+            "28": 0.29043,
+            "29": 0.29359,
+            "30": 0.29413,
+            "31": 0.29317,
+            "32": 0.29247,
+            "33": 0.29418,
+            "34": 0.2938,
+            "35": 0.29207,
+            "36": 0.31485,
+            "37": 0.29543,
+            "38": 0.29402,
+            "39": 0.29262,
+            "40": 0.2957,
+            "41": 0.29348,
+            "42": 0.29242,
+            "43": 0.29117,
+            "44": 0.2927,
+            "45": 0.29263,
+            "46": 0.29024,
+            "47": 0.29404,
+            "48": 0.28901,
+            "49": 0.28844,
+            "50": 0.29053
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss/golden_values_dev_dgx_gb200.json
new file mode 100644
index 00000000000..b9bbabe5437
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss/golden_values_dev_dgx_gb200.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.86244,
+            "2": 10.88582,
+            "3": 10.84733,
+            "4": 10.85571,
+            "5": 10.86,
+            "6": 10.87733,
+            "7": 10.86555,
+            "8": 10.84913,
+            "9": 10.86609,
+            "10": 10.82473,
+            "11": 10.85618,
+            "12": 10.85374,
+            "13": 10.86788,
+            "14": 10.87119,
+            "15": 10.82235,
+            "16": 10.79991,
+            "17": 10.77431,
+            "18": 10.78345,
+            "19": 10.79308,
+            "20": 10.68226,
+            "21": 10.6471,
+            "22": 10.50917,
+            "23": 10.66827,
+            "24": 10.54193,
+            "25": 10.4928,
+            "26": 10.55931,
+            "27": 10.54238,
+            "28": 10.51129,
+            "29": 10.53257,
+            "30": 10.28992,
+            "31": 10.02853,
+            "32": 10.38885,
+            "33": 10.39593,
+            "34": 10.13446,
+            "35": 10.18932,
+            "36": 10.13355,
+            "37": 10.27381,
+            "38": 10.10751,
+            "39": 10.34007,
+            "40": 9.98538,
+            "41": 10.06414,
+            "42": 10.13744,
+            "43": 9.73381,
+            "44": 9.86305,
+            "45": 9.73723,
+            "46": 9.71343,
+            "47": 10.07757,
+            "48": 9.76768,
+            "49": 9.41987,
+            "50": 9.81687
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 567.0,
+            "2": 584.0,
+            "3": 598.0,
+            "4": 633.0,
+            "5": 630.0,
+            "6": 645.0,
+            "7": 645.0,
+            "8": 674.0,
+            "9": 625.0,
+            "10": 500.0,
+            "11": 669.0,
+            "12": 554.0,
+            "13": 681.0,
+            "14": 633.0,
+            "15": 623.0,
+            "16": 592.0,
+            "17": 636.0,
+            "18": 625.0,
+            "19": 633.0,
+            "20": 587.0,
+            "21": 696.0,
+            "22": 585.0,
+            "23": 681.0,
+            "24": 639.0,
+            "25": 587.0,
+            "26": 642.0,
+            "27": 639.0,
+            "28": 744.0,
+            "29": 746.0,
+            "30": 685.0,
+            "31": 603.0,
+            "32": 719.0,
+            "33": 850.0,
+            "34": 696.0,
+            "35": 737.0,
+            "36": 738.0,
+            "37": 840.0,
+            "38": 757.0,
+            "39": 828.0,
+            "40": 828.0,
+            "41": 787.0,
+            "42": 883.0,
+            "43": 703.0,
+            "44": 850.0,
+            "45": 840.0,
+            "46": 837.0,
+            "47": 915.0,
+            "48": 849.0,
+            "49": 915.0,
+            "50": 892.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 460096000.0,
+            "2": 460096000.0,
+            "3": 460096000.0,
+            "4": 460096000.0,
+            "5": 460096000.0,
+            "6": 460096000.0,
+            "7": 460096000.0,
+            "8": 460096000.0,
+            "9": 460096000.0,
+            "10": 460096000.0,
+            "11": 460096000.0,
+            "12": 460096000.0,
+            "13": 460096000.0,
+            "14": 460096000.0,
+            "15": 460096000.0,
+            "16": 460096000.0,
+            "17": 460096000.0,
+            "18": 460096000.0,
+            "19": 460096000.0,
+            "20": 460096000.0,
+            "21": 460096000.0,
+            "22": 460096000.0,
+            "23": 460096000.0,
+            "24": 460096000.0,
+            "25": 460096000.0,
+            "26": 460096000.0,
+            "27": 460096000.0,
+            "28": 460096000.0,
+            "29": 460096000.0,
+            "30": 460096000.0,
+            "31": 460096000.0,
+            "32": 460096000.0,
+            "33": 460096000.0,
+            "34": 460096000.0,
+            "35": 460096000.0,
+            "36": 460096000.0,
+            "37": 460096000.0,
+            "38": 460096000.0,
+            "39": 460096000.0,
+            "40": 460096000.0,
+            "41": 460096000.0,
+            "42": 460096000.0,
+            "43": 460096000.0,
+            "44": 460096000.0,
+            "45": 460096000.0,
+            "46": 460096000.0,
+            "47": 460096000.0,
+            "48": 460096000.0,
+            "49": 460096000.0,
+            "50": 460096000.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 704587264.0,
+            "2": 885184000.0,
+            "3": 885184000.0,
+            "4": 885184000.0,
+            "5": 885184000.0,
+            "6": 885184000.0,
+            "7": 886231040.0,
+            "8": 886231552.0,
+            "9": 886231552.0,
+            "10": 886231552.0,
+            "11": 886231552.0,
+            "12": 886231552.0,
+            "13": 886231552.0,
+            "14": 886231552.0,
+            "15": 886231552.0,
+            "16": 886231552.0,
+            "17": 886231552.0,
+            "18": 886231552.0,
+            "19": 886231552.0,
+            "20": 886231552.0,
+            "21": 886231552.0,
+            "22": 886231552.0,
+            "23": 886231552.0,
+            "24": 886231552.0,
+            "25": 886231552.0,
+            "26": 886231552.0,
+            "27": 886232064.0,
+            "28": 886232064.0,
+            "29": 886232064.0,
+            "30": 886232064.0,
+            "31": 886232064.0,
+            "32": 886232064.0,
+            "33": 886232064.0,
+            "34": 886232064.0,
+            "35": 886232064.0,
+            "36": 886232064.0,
+            "37": 886232064.0,
+            "38": 886232064.0,
+            "39": 886232064.0,
+            "40": 886232064.0,
+            "41": 886232064.0,
+            "42": 886232064.0,
+            "43": 886232064.0,
+            "44": 886232064.0,
+            "45": 886232064.0,
+            "46": 886232064.0,
+            "47": 886232064.0,
+            "48": 886232064.0,
+            "49": 886232064.0,
+            "50": 886232064.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 13.9895,
+            "2": 0.51807,
+            "3": 0.49599,
+            "4": 0.47064,
+            "5": 0.48452,
+            "6": 0.41822,
+            "7": 0.41485,
+            "8": 0.4156,
+            "9": 0.43484,
+            "10": 0.40847,
+            "11": 0.5122,
+            "12": 0.40698,
+            "13": 0.40749,
+            "14": 0.49304,
+            "15": 0.49799,
+            "16": 0.40895,
+            "17": 0.41708,
+            "18": 0.44007,
+            "19": 0.47716,
+            "20": 0.47638,
+            "21": 0.41659,
+            "22": 0.4125,
+            "23": 0.41163,
+            "24": 0.46826,
+            "25": 0.46402,
+            "26": 0.42136,
+            "27": 0.4113,
+            "28": 0.40612,
+            "29": 0.61576,
+            "30": 0.74613,
+            "31": 0.47263,
+            "32": 0.48955,
+            "33": 0.72478,
+            "34": 0.5927,
+            "35": 0.6127,
+            "36": 0.44041,
+            "37": 0.42799,
+            "38": 0.46386,
+            "39": 0.42311,
+            "40": 0.42142,
+            "41": 0.42074,
+            "42": 0.42015,
+            "43": 0.43664,
+            "44": 0.41727,
+            "45": 0.41517,
+            "46": 0.42041,
+            "47": 0.58839,
+            "48": 0.4946,
+            "49": 0.5046,
+            "50": 0.50846
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss/golden_values_dev_dgx_h100.json
index 873d08f92a3..f5628621ad5 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss/golden_values_dev_dgx_h100.json
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 511214080.0,
-            "2": 511214080.0,
-            "3": 511214080.0,
-            "4": 511214080.0,
-            "5": 511214080.0,
-            "6": 511214080.0,
-            "7": 511214080.0,
-            "8": 511214080.0,
-            "9": 511214080.0,
-            "10": 511214080.0,
-            "11": 511214080.0,
-            "12": 511214080.0,
-            "13": 511214080.0,
-            "14": 511214080.0,
-            "15": 511214080.0,
-            "16": 511214080.0,
-            "17": 511214080.0,
-            "18": 511214080.0,
-            "19": 511214080.0,
-            "20": 511214080.0,
-            "21": 511214080.0,
-            "22": 511214080.0,
-            "23": 511214080.0,
-            "24": 511214080.0,
-            "25": 511214080.0,
-            "26": 511214080.0,
-            "27": 511214080.0,
-            "28": 511214080.0,
-            "29": 511214080.0,
-            "30": 511214080.0,
-            "31": 511214080.0,
-            "32": 511214080.0,
-            "33": 511214080.0,
-            "34": 511214080.0,
-            "35": 511214080.0,
-            "36": 511214080.0,
-            "37": 511214080.0,
-            "38": 511214080.0,
-            "39": 511214080.0,
-            "40": 511214080.0,
-            "41": 511214080.0,
-            "42": 511214080.0,
-            "43": 511214080.0,
-            "44": 511214080.0,
-            "45": 511214080.0,
-            "46": 511214080.0,
-            "47": 511214080.0,
-            "48": 511214080.0,
-            "49": 511214080.0,
-            "50": 511214080.0
+            "1": 510689792.0,
+            "2": 510689792.0,
+            "3": 510689792.0,
+            "4": 510689792.0,
+            "5": 510689792.0,
+            "6": 510689792.0,
+            "7": 510689792.0,
+            "8": 510689792.0,
+            "9": 510689792.0,
+            "10": 510689792.0,
+            "11": 510689792.0,
+            "12": 510689792.0,
+            "13": 510689792.0,
+            "14": 510689792.0,
+            "15": 510689792.0,
+            "16": 510689792.0,
+            "17": 510689792.0,
+            "18": 510689792.0,
+            "19": 510689792.0,
+            "20": 510689792.0,
+            "21": 510689792.0,
+            "22": 510689792.0,
+            "23": 510689792.0,
+            "24": 510689792.0,
+            "25": 510689792.0,
+            "26": 510689792.0,
+            "27": 510689792.0,
+            "28": 510689792.0,
+            "29": 510689792.0,
+            "30": 510689792.0,
+            "31": 510689792.0,
+            "32": 510689792.0,
+            "33": 510689792.0,
+            "34": 510689792.0,
+            "35": 510689792.0,
+            "36": 510689792.0,
+            "37": 510689792.0,
+            "38": 510689792.0,
+            "39": 510689792.0,
+            "40": 510689792.0,
+            "41": 510689792.0,
+            "42": 510689792.0,
+            "43": 510689792.0,
+            "44": 510689792.0,
+            "45": 510689792.0,
+            "46": 510689792.0,
+            "47": 510689792.0,
+            "48": 510689792.0,
+            "49": 510689792.0,
+            "50": 510689792.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 756753920.0,
-            "2": 935776768.0,
-            "3": 935777792.0,
-            "4": 935777792.0,
-            "5": 935777792.0,
-            "6": 935777792.0,
-            "7": 935777792.0,
-            "8": 935777792.0,
-            "9": 935777792.0,
-            "10": 935777792.0,
-            "11": 935777792.0,
-            "12": 935777792.0,
-            "13": 935777792.0,
-            "14": 935777792.0,
-            "15": 935777792.0,
-            "16": 935777792.0,
-            "17": 935777792.0,
-            "18": 935777792.0,
-            "19": 935777792.0,
-            "20": 935777792.0,
-            "21": 935777792.0,
-            "22": 935777792.0,
-            "23": 935777792.0,
-            "24": 935777792.0,
-            "25": 935777792.0,
-            "26": 935777792.0,
-            "27": 935777792.0,
-            "28": 935777792.0,
-            "29": 935777792.0,
-            "30": 935777792.0,
-            "31": 935777792.0,
-            "32": 935777792.0,
-            "33": 935777792.0,
-            "34": 935777792.0,
-            "35": 935777792.0,
-            "36": 935777792.0,
-            "37": 935777792.0,
-            "38": 935777792.0,
-            "39": 935777792.0,
-            "40": 935777792.0,
-            "41": 935777792.0,
-            "42": 935777792.0,
-            "43": 935777792.0,
-            "44": 935777792.0,
-            "45": 935777792.0,
-            "46": 935777792.0,
-            "47": 935777792.0,
-            "48": 935777792.0,
-            "49": 935777792.0,
-            "50": 935777792.0
+            "1": 755704320.0,
+            "2": 938398720.0,
+            "3": 938398720.0,
+            "4": 938398720.0,
+            "5": 938398720.0,
+            "6": 938399232.0,
+            "7": 938399232.0,
+            "8": 938399232.0,
+            "9": 938399232.0,
+            "10": 938399232.0,
+            "11": 938399232.0,
+            "12": 938399232.0,
+            "13": 938399232.0,
+            "14": 938399232.0,
+            "15": 938399232.0,
+            "16": 938399232.0,
+            "17": 938399232.0,
+            "18": 938399232.0,
+            "19": 938399232.0,
+            "20": 938399232.0,
+            "21": 938399232.0,
+            "22": 938399232.0,
+            "23": 938399232.0,
+            "24": 938399232.0,
+            "25": 938399232.0,
+            "26": 938399232.0,
+            "27": 938399232.0,
+            "28": 938399232.0,
+            "29": 938399232.0,
+            "30": 938399232.0,
+            "31": 938399232.0,
+            "32": 938399232.0,
+            "33": 938399232.0,
+            "34": 938399232.0,
+            "35": 938399232.0,
+            "36": 938399232.0,
+            "37": 938399232.0,
+            "38": 938399232.0,
+            "39": 938399232.0,
+            "40": 938399232.0,
+            "41": 938399232.0,
+            "42": 938399232.0,
+            "43": 938399232.0,
+            "44": 938399232.0,
+            "45": 938399232.0,
+            "46": 938399232.0,
+            "47": 938399232.0,
+            "48": 938399232.0,
+            "49": 938399232.0,
+            "50": 938399232.0
         }
     },
     "iteration-time": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 44.927,
-            "2": 0.34811,
-            "3": 0.31209,
-            "4": 0.29049,
-            "5": 0.28904,
-            "6": 0.28728,
-            "7": 0.28884,
-            "8": 0.29393,
-            "9": 0.28153,
-            "10": 0.28717,
-            "11": 0.28861,
-            "12": 0.29265,
-            "13": 0.29015,
-            "14": 0.29189,
-            "15": 0.29081,
-            "16": 0.29742,
-            "17": 0.29933,
-            "18": 0.29528,
-            "19": 0.29058,
-            "20": 0.29304,
-            "21": 0.29307,
-            "22": 0.29297,
-            "23": 0.2889,
-            "24": 0.29028,
-            "25": 0.29626,
-            "26": 0.29321,
-            "27": 0.29347,
-            "28": 0.29303,
-            "29": 0.2812,
-            "30": 0.28971,
-            "31": 0.28878,
-            "32": 0.28499,
-            "33": 0.28119,
-            "34": 0.27908,
-            "35": 0.28101,
-            "36": 0.2794,
-            "37": 0.2798,
-            "38": 0.27799,
-            "39": 0.28519,
-            "40": 0.28246,
-            "41": 0.28126,
-            "42": 0.28572,
-            "43": 0.28647,
-            "44": 0.28772,
-            "45": 0.28736,
-            "46": 0.29677,
-            "47": 0.29247,
-            "48": 0.29174,
-            "49": 0.29182,
-            "50": 0.29085
+            "1": 35.29813,
+            "2": 0.37906,
+            "3": 0.30948,
+            "4": 0.2886,
+            "5": 0.28858,
+            "6": 0.29461,
+            "7": 0.28328,
+            "8": 0.28783,
+            "9": 0.28448,
+            "10": 0.28698,
+            "11": 0.28404,
+            "12": 0.28717,
+            "13": 0.2828,
+            "14": 0.2846,
+            "15": 0.28648,
+            "16": 0.28793,
+            "17": 0.28473,
+            "18": 0.28326,
+            "19": 0.28524,
+            "20": 0.29094,
+            "21": 0.29401,
+            "22": 0.28944,
+            "23": 0.28693,
+            "24": 0.29508,
+            "25": 0.28683,
+            "26": 0.28507,
+            "27": 0.2849,
+            "28": 0.28658,
+            "29": 0.28518,
+            "30": 0.28539,
+            "31": 0.2829,
+            "32": 0.28482,
+            "33": 0.28454,
+            "34": 0.28634,
+            "35": 0.28739,
+            "36": 0.28563,
+            "37": 0.28401,
+            "38": 0.28251,
+            "39": 0.28156,
+            "40": 0.28197,
+            "41": 0.28236,
+            "42": 0.27995,
+            "43": 0.28293,
+            "44": 0.28018,
+            "45": 0.28419,
+            "46": 0.28512,
+            "47": 0.2818,
+            "48": 0.28099,
+            "49": 0.2831,
+            "50": 0.28153
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic/golden_values_dev_dgx_gb200.json
new file mode 100644
index 00000000000..8175fe3e6be
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic/golden_values_dev_dgx_gb200.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.86244,
+            "2": 10.88582,
+            "3": 10.84736,
+            "4": 10.85573,
+            "5": 10.86003,
+            "6": 10.87733,
+            "7": 10.8656,
+            "8": 10.84911,
+            "9": 10.86609,
+            "10": 10.82475,
+            "11": 10.8562,
+            "12": 10.85373,
+            "13": 10.86788,
+            "14": 10.87111,
+            "15": 10.8223,
+            "16": 10.79994,
+            "17": 10.77431,
+            "18": 10.78343,
+            "19": 10.79309,
+            "20": 10.68225,
+            "21": 10.64708,
+            "22": 10.50918,
+            "23": 10.66826,
+            "24": 10.54194,
+            "25": 10.49281,
+            "26": 10.55932,
+            "27": 10.54239,
+            "28": 10.51128,
+            "29": 10.53257,
+            "30": 10.28989,
+            "31": 10.02853,
+            "32": 10.3888,
+            "33": 10.39592,
+            "34": 10.13449,
+            "35": 10.18931,
+            "36": 10.13352,
+            "37": 10.27378,
+            "38": 10.1075,
+            "39": 10.34011,
+            "40": 9.98542,
+            "41": 10.06415,
+            "42": 10.1375,
+            "43": 9.73383,
+            "44": 9.86311,
+            "45": 9.73726,
+            "46": 9.71341,
+            "47": 10.07757,
+            "48": 9.76762,
+            "49": 9.4199,
+            "50": 9.81687
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 567.0,
+            "2": 609.0,
+            "3": 638.0,
+            "4": 657.0,
+            "5": 654.0,
+            "6": 637.0,
+            "7": 614.0,
+            "8": 599.0,
+            "9": 637.0,
+            "10": 517.0,
+            "11": 673.0,
+            "12": 640.0,
+            "13": 685.0,
+            "14": 609.0,
+            "15": 596.0,
+            "16": 653.0,
+            "17": 590.0,
+            "18": 559.0,
+            "19": 675.0,
+            "20": 598.0,
+            "21": 699.0,
+            "22": 631.0,
+            "23": 650.0,
+            "24": 625.0,
+            "25": 591.0,
+            "26": 627.0,
+            "27": 684.0,
+            "28": 679.0,
+            "29": 748.0,
+            "30": 703.0,
+            "31": 626.0,
+            "32": 724.0,
+            "33": 753.0,
+            "34": 658.0,
+            "35": 727.0,
+            "36": 730.0,
+            "37": 861.0,
+            "38": 778.0,
+            "39": 899.0,
+            "40": 845.0,
+            "41": 770.0,
+            "42": 819.0,
+            "43": 716.0,
+            "44": 793.0,
+            "45": 770.0,
+            "46": 849.0,
+            "47": 900.0,
+            "48": 873.0,
+            "49": 852.0,
+            "50": 888.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 461144576.0,
+            "2": 461144576.0,
+            "3": 461144576.0,
+            "4": 461144576.0,
+            "5": 461144576.0,
+            "6": 461144576.0,
+            "7": 461144576.0,
+            "8": 461144576.0,
+            "9": 461144576.0,
+            "10": 461144576.0,
+            "11": 461144576.0,
+            "12": 461144576.0,
+            "13": 461144576.0,
+            "14": 461144576.0,
+            "15": 461144576.0,
+            "16": 461144576.0,
+            "17": 461144576.0,
+            "18": 461144576.0,
+            "19": 461144576.0,
+            "20": 461144576.0,
+            "21": 461144576.0,
+            "22": 461144576.0,
+            "23": 461144576.0,
+            "24": 461144576.0,
+            "25": 461144576.0,
+            "26": 461144576.0,
+            "27": 461144576.0,
+            "28": 461144576.0,
+            "29": 461144576.0,
+            "30": 461144576.0,
+            "31": 461144576.0,
+            "32": 461144576.0,
+            "33": 461144576.0,
+            "34": 461144576.0,
+            "35": 461144576.0,
+            "36": 461144576.0,
+            "37": 461144576.0,
+            "38": 461144576.0,
+            "39": 461144576.0,
+            "40": 461144576.0,
+            "41": 461144576.0,
+            "42": 461144576.0,
+            "43": 461144576.0,
+            "44": 461144576.0,
+            "45": 461144576.0,
+            "46": 461144576.0,
+            "47": 461144576.0,
+            "48": 461144576.0,
+            "49": 461144576.0,
+            "50": 461144576.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 705635840.0,
+            "2": 884659712.0,
+            "3": 885183488.0,
+            "4": 885183488.0,
+            "5": 885707264.0,
+            "6": 885707264.0,
+            "7": 885707264.0,
+            "8": 885707264.0,
+            "9": 885707264.0,
+            "10": 885707264.0,
+            "11": 885707264.0,
+            "12": 885707264.0,
+            "13": 885707264.0,
+            "14": 885707264.0,
+            "15": 885707264.0,
+            "16": 885707264.0,
+            "17": 885707264.0,
+            "18": 885707264.0,
+            "19": 885707264.0,
+            "20": 885707264.0,
+            "21": 885707264.0,
+            "22": 885707264.0,
+            "23": 885707264.0,
+            "24": 885707264.0,
+            "25": 885707264.0,
+            "26": 885707264.0,
+            "27": 885707264.0,
+            "28": 885707264.0,
+            "29": 885707264.0,
+            "30": 885708288.0,
+            "31": 885708288.0,
+            "32": 885708288.0,
+            "33": 885708288.0,
+            "34": 885708288.0,
+            "35": 885708288.0,
+            "36": 885708288.0,
+            "37": 885708288.0,
+            "38": 885708288.0,
+            "39": 885708288.0,
+            "40": 885708288.0,
+            "41": 885708288.0,
+            "42": 885708288.0,
+            "43": 885708288.0,
+            "44": 885708288.0,
+            "45": 885708288.0,
+            "46": 885708288.0,
+            "47": 885708288.0,
+            "48": 885708288.0,
+            "49": 885708288.0,
+            "50": 885708288.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 14.21093,
+            "2": 0.56501,
+            "3": 0.71491,
+            "4": 0.53313,
+            "5": 0.43082,
+            "6": 0.4637,
+            "7": 0.40802,
+            "8": 0.46193,
+            "9": 0.40155,
+            "10": 0.40252,
+            "11": 0.52711,
+            "12": 0.4035,
+            "13": 0.40765,
+            "14": 0.40187,
+            "15": 0.40322,
+            "16": 0.40497,
+            "17": 0.40698,
+            "18": 0.40153,
+            "19": 0.46487,
+            "20": 0.40131,
+            "21": 0.4044,
+            "22": 0.40166,
+            "23": 0.40177,
+            "24": 0.40507,
+            "25": 0.405,
+            "26": 0.40144,
+            "27": 0.40453,
+            "28": 0.40108,
+            "29": 0.4052,
+            "30": 0.40603,
+            "31": 0.40719,
+            "32": 0.40638,
+            "33": 0.40514,
+            "34": 0.44714,
+            "35": 0.40534,
+            "36": 0.40221,
+            "37": 0.3984,
+            "38": 0.40367,
+            "39": 0.40221,
+            "40": 0.43747,
+            "41": 0.40384,
+            "42": 0.40404,
+            "43": 0.40132,
+            "44": 0.40047,
+            "45": 0.40017,
+            "46": 0.40235,
+            "47": 0.39964,
+            "48": 0.39919,
+            "49": 0.40337,
+            "50": 0.48503
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic/golden_values_dev_dgx_h100.json
index a74ab8d8415..4f56833e7b4 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic/golden_values_dev_dgx_h100.json
@@ -4,55 +4,55 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 10.86535,
-            "2": 10.85873,
-            "3": 10.86281,
-            "4": 10.84011,
-            "5": 10.87855,
-            "6": 10.88849,
-            "7": 10.86536,
-            "8": 10.86016,
-            "9": 10.85987,
-            "10": 10.82979,
-            "11": 10.88946,
-            "12": 10.87508,
-            "13": 10.87423,
-            "14": 10.89679,
-            "15": 10.82052,
-            "16": 10.825,
-            "17": 10.78984,
-            "18": 10.81026,
-            "19": 10.80535,
-            "20": 10.70395,
-            "21": 10.66988,
-            "22": 10.50641,
-            "23": 10.69004,
-            "24": 10.56309,
+            "1": 10.86539,
+            "2": 10.85871,
+            "3": 10.86283,
+            "4": 10.84007,
+            "5": 10.87858,
+            "6": 10.88853,
+            "7": 10.86535,
+            "8": 10.86014,
+            "9": 10.85986,
+            "10": 10.82982,
+            "11": 10.8895,
+            "12": 10.87512,
+            "13": 10.87426,
+            "14": 10.89677,
+            "15": 10.82053,
+            "16": 10.82502,
+            "17": 10.78982,
+            "18": 10.81027,
+            "19": 10.80531,
+            "20": 10.70397,
+            "21": 10.66991,
+            "22": 10.50643,
+            "23": 10.69005,
+            "24": 10.56312,
             "25": 10.49417,
-            "26": 10.56626,
-            "27": 10.58024,
-            "28": 10.51572,
-            "29": 10.55294,
-            "30": 10.30552,
-            "31": 10.02243,
-            "32": 10.40616,
-            "33": 10.39875,
+            "26": 10.56627,
+            "27": 10.58022,
+            "28": 10.51571,
+            "29": 10.55299,
+            "30": 10.30551,
+            "31": 10.02246,
+            "32": 10.40615,
+            "33": 10.39877,
             "34": 10.13772,
-            "35": 10.20189,
-            "36": 10.16048,
-            "37": 10.28972,
-            "38": 10.11479,
+            "35": 10.20183,
+            "36": 10.16051,
+            "37": 10.28969,
+            "38": 10.11485,
             "39": 10.361,
-            "40": 10.01902,
-            "41": 10.07292,
-            "42": 10.14694,
-            "43": 9.74686,
-            "44": 9.87768,
-            "45": 9.74966,
-            "46": 9.7338,
-            "47": 10.07535,
+            "40": 10.01897,
+            "41": 10.07294,
+            "42": 10.14697,
+            "43": 9.74687,
+            "44": 9.87765,
+            "45": 9.74965,
+            "46": 9.73384,
+            "47": 10.07538,
             "48": 9.7807,
-            "49": 9.44783,
+            "49": 9.4478,
             "50": 9.83991
         }
     },
@@ -61,56 +61,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 600.0,
-            "2": 620.0,
-            "3": 606.0,
-            "4": 684.0,
-            "5": 647.0,
-            "6": 679.0,
-            "7": 630.0,
-            "8": 568.0,
-            "9": 627.0,
-            "10": 519.0,
-            "11": 635.0,
-            "12": 640.0,
-            "13": 677.0,
-            "14": 631.0,
-            "15": 668.0,
-            "16": 666.0,
-            "17": 671.0,
-            "18": 623.0,
-            "19": 658.0,
-            "20": 639.0,
-            "21": 624.0,
-            "22": 614.0,
-            "23": 741.0,
-            "24": 607.0,
-            "25": 636.0,
-            "26": 639.0,
-            "27": 689.0,
-            "28": 751.0,
-            "29": 724.0,
-            "30": 771.0,
-            "31": 564.0,
-            "32": 750.0,
-            "33": 765.0,
-            "34": 693.0,
-            "35": 737.0,
-            "36": 754.0,
-            "37": 807.0,
-            "38": 786.0,
-            "39": 879.0,
-            "40": 737.0,
+            "1": 565.0,
+            "2": 674.0,
+            "3": 644.0,
+            "4": 621.0,
+            "5": 633.0,
+            "6": 641.0,
+            "7": 595.0,
+            "8": 543.0,
+            "9": 654.0,
+            "10": 529.0,
+            "11": 674.0,
+            "12": 661.0,
+            "13": 675.0,
+            "14": 643.0,
+            "15": 634.0,
+            "16": 659.0,
+            "17": 682.0,
+            "18": 639.0,
+            "19": 625.0,
+            "20": 633.0,
+            "21": 596.0,
+            "22": 628.0,
+            "23": 708.0,
+            "24": 616.0,
+            "25": 605.0,
+            "26": 645.0,
+            "27": 692.0,
+            "28": 796.0,
+            "29": 783.0,
+            "30": 681.0,
+            "31": 587.0,
+            "32": 719.0,
+            "33": 764.0,
+            "34": 731.0,
+            "35": 725.0,
+            "36": 695.0,
+            "37": 815.0,
+            "38": 759.0,
+            "39": 857.0,
+            "40": 772.0,
             "41": 817.0,
-            "42": 857.0,
-            "43": 709.0,
-            "44": 808.0,
-            "45": 795.0,
-            "46": 837.0,
-            "47": 879.0,
-            "48": 899.0,
-            "49": 890.0,
-            "50": 860.0
+            "42": 778.0,
+            "43": 728.0,
+            "44": 810.0,
+            "45": 770.0,
+            "46": 858.0,
+            "47": 881.0,
+            "48": 894.0,
+            "49": 906.0,
+            "50": 808.0
         }
     },
     "mem-allocated-bytes": {
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 510689792.0,
-            "2": 510689792.0,
-            "3": 510689792.0,
-            "4": 510689792.0,
-            "5": 510689792.0,
-            "6": 510689792.0,
-            "7": 510689792.0,
-            "8": 510689792.0,
-            "9": 510689792.0,
-            "10": 510689792.0,
-            "11": 510689792.0,
-            "12": 510689792.0,
-            "13": 510689792.0,
-            "14": 510689792.0,
-            "15": 510689792.0,
-            "16": 510689792.0,
-            "17": 510689792.0,
-            "18": 510689792.0,
-            "19": 510689792.0,
-            "20": 510689792.0,
-            "21": 510689792.0,
-            "22": 510689792.0,
-            "23": 510689792.0,
-            "24": 510689792.0,
-            "25": 510689792.0,
-            "26": 510689792.0,
-            "27": 510689792.0,
-            "28": 510689792.0,
-            "29": 510689792.0,
-            "30": 510689792.0,
-            "31": 510689792.0,
-            "32": 510689792.0,
-            "33": 510689792.0,
-            "34": 510689792.0,
-            "35": 510689792.0,
-            "36": 510689792.0,
-            "37": 510689792.0,
-            "38": 510689792.0,
-            "39": 510689792.0,
-            "40": 510689792.0,
-            "41": 510689792.0,
-            "42": 510689792.0,
-            "43": 510689792.0,
-            "44": 510689792.0,
-            "45": 510689792.0,
-            "46": 510689792.0,
-            "47": 510689792.0,
-            "48": 510689792.0,
-            "49": 510689792.0,
-            "50": 510689792.0
+            "1": 512262656.0,
+            "2": 512262656.0,
+            "3": 512262656.0,
+            "4": 512262656.0,
+            "5": 512262656.0,
+            "6": 512262656.0,
+            "7": 512262656.0,
+            "8": 512262656.0,
+            "9": 512262656.0,
+            "10": 512262656.0,
+            "11": 512262656.0,
+            "12": 512262656.0,
+            "13": 512262656.0,
+            "14": 512262656.0,
+            "15": 512262656.0,
+            "16": 512262656.0,
+            "17": 512262656.0,
+            "18": 512262656.0,
+            "19": 512262656.0,
+            "20": 512262656.0,
+            "21": 512262656.0,
+            "22": 512262656.0,
+            "23": 512262656.0,
+            "24": 512262656.0,
+            "25": 512262656.0,
+            "26": 512262656.0,
+            "27": 512262656.0,
+            "28": 512262656.0,
+            "29": 512262656.0,
+            "30": 512262656.0,
+            "31": 512262656.0,
+            "32": 512262656.0,
+            "33": 512262656.0,
+            "34": 512262656.0,
+            "35": 512262656.0,
+            "36": 512262656.0,
+            "37": 512262656.0,
+            "38": 512262656.0,
+            "39": 512262656.0,
+            "40": 512262656.0,
+            "41": 512262656.0,
+            "42": 512262656.0,
+            "43": 512262656.0,
+            "44": 512262656.0,
+            "45": 512262656.0,
+            "46": 512262656.0,
+            "47": 512262656.0,
+            "48": 512262656.0,
+            "49": 512262656.0,
+            "50": 512262656.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 759895552.0,
-            "2": 933156352.0,
-            "3": 933156352.0,
-            "4": 933156352.0,
-            "5": 933156352.0,
-            "6": 933156352.0,
-            "7": 933156352.0,
-            "8": 933156352.0,
-            "9": 933156352.0,
-            "10": 933156352.0,
-            "11": 933156352.0,
-            "12": 933156352.0,
-            "13": 933156352.0,
-            "14": 933156352.0,
-            "15": 933156352.0,
-            "16": 933156352.0,
-            "17": 933156352.0,
-            "18": 933156352.0,
-            "19": 933156352.0,
-            "20": 933156352.0,
-            "21": 933156352.0,
-            "22": 933156352.0,
-            "23": 933156352.0,
-            "24": 934204928.0,
-            "25": 934204928.0,
-            "26": 934204928.0,
-            "27": 934204928.0,
-            "28": 934204928.0,
-            "29": 934204928.0,
-            "30": 934204928.0,
-            "31": 934204928.0,
-            "32": 934204928.0,
-            "33": 934204928.0,
-            "34": 934204928.0,
-            "35": 934204928.0,
-            "36": 934204928.0,
-            "37": 934204928.0,
-            "38": 934204928.0,
-            "39": 934204928.0,
-            "40": 934204928.0,
-            "41": 934204928.0,
-            "42": 934204928.0,
-            "43": 934204928.0,
-            "44": 934204928.0,
-            "45": 934204928.0,
-            "46": 934204928.0,
-            "47": 934204928.0,
-            "48": 934204928.0,
-            "49": 934204928.0,
-            "50": 934204928.0
+            "1": 755704832.0,
+            "2": 935776768.0,
+            "3": 935777792.0,
+            "4": 935777792.0,
+            "5": 935777792.0,
+            "6": 935777792.0,
+            "7": 935777792.0,
+            "8": 935777792.0,
+            "9": 935777792.0,
+            "10": 935777792.0,
+            "11": 935777792.0,
+            "12": 935777792.0,
+            "13": 935777792.0,
+            "14": 935777792.0,
+            "15": 935777792.0,
+            "16": 935777792.0,
+            "17": 935777792.0,
+            "18": 935777792.0,
+            "19": 935777792.0,
+            "20": 935777792.0,
+            "21": 935777792.0,
+            "22": 935777792.0,
+            "23": 935777792.0,
+            "24": 935777792.0,
+            "25": 935777792.0,
+            "26": 935777792.0,
+            "27": 935777792.0,
+            "28": 935777792.0,
+            "29": 935777792.0,
+            "30": 935777792.0,
+            "31": 935777792.0,
+            "32": 935777792.0,
+            "33": 935777792.0,
+            "34": 935777792.0,
+            "35": 935777792.0,
+            "36": 935777792.0,
+            "37": 935777792.0,
+            "38": 935777792.0,
+            "39": 935777792.0,
+            "40": 935777792.0,
+            "41": 935777792.0,
+            "42": 935777792.0,
+            "43": 935777792.0,
+            "44": 935777792.0,
+            "45": 935777792.0,
+            "46": 935777792.0,
+            "47": 935777792.0,
+            "48": 935777792.0,
+            "49": 935777792.0,
+            "50": 935777792.0
         }
     },
     "iteration-time": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 16.61636,
-            "2": 0.35255,
-            "3": 0.33784,
-            "4": 0.33448,
-            "5": 0.33388,
-            "6": 0.33362,
-            "7": 0.33399,
-            "8": 0.33377,
-            "9": 0.3345,
-            "10": 0.33436,
-            "11": 0.33616,
-            "12": 0.33216,
-            "13": 0.32717,
-            "14": 0.3285,
-            "15": 0.31893,
-            "16": 0.32207,
-            "17": 0.32068,
-            "18": 0.3232,
-            "19": 0.31799,
-            "20": 0.32295,
-            "21": 0.32148,
-            "22": 0.3312,
-            "23": 0.33388,
-            "24": 0.33493,
-            "25": 0.33793,
-            "26": 0.33838,
-            "27": 0.33827,
-            "28": 0.34,
-            "29": 0.33074,
-            "30": 0.32608,
-            "31": 0.32629,
-            "32": 0.3285,
-            "33": 0.32776,
-            "34": 0.32575,
-            "35": 0.32648,
-            "36": 0.3252,
-            "37": 0.32697,
-            "38": 0.33001,
-            "39": 0.3354,
-            "40": 0.33513,
-            "41": 0.33447,
-            "42": 0.3352,
-            "43": 0.33163,
-            "44": 0.32495,
-            "45": 0.32668,
-            "46": 0.32429,
-            "47": 0.32917,
-            "48": 0.32614,
-            "49": 0.32637,
-            "50": 0.32702
+            "1": 35.15129,
+            "2": 0.34045,
+            "3": 0.3152,
+            "4": 0.29475,
+            "5": 0.29106,
+            "6": 0.28743,
+            "7": 0.28892,
+            "8": 0.28712,
+            "9": 0.28802,
+            "10": 0.28716,
+            "11": 0.28668,
+            "12": 0.37009,
+            "13": 0.28782,
+            "14": 0.29043,
+            "15": 0.28814,
+            "16": 0.2878,
+            "17": 0.28821,
+            "18": 0.28923,
+            "19": 0.28805,
+            "20": 0.28779,
+            "21": 0.28749,
+            "22": 0.28772,
+            "23": 0.29149,
+            "24": 0.28826,
+            "25": 0.28991,
+            "26": 0.28778,
+            "27": 0.29505,
+            "28": 0.29056,
+            "29": 0.28756,
+            "30": 0.28994,
+            "31": 0.28927,
+            "32": 0.28762,
+            "33": 0.29152,
+            "34": 0.28825,
+            "35": 0.29628,
+            "36": 0.29294,
+            "37": 0.29051,
+            "38": 0.28817,
+            "39": 0.28808,
+            "40": 0.28772,
+            "41": 0.28911,
+            "42": 0.28638,
+            "43": 0.28641,
+            "44": 0.28736,
+            "45": 0.28638,
+            "46": 0.29104,
+            "47": 0.2889,
+            "48": 0.28851,
+            "49": 0.2881,
+            "50": 0.28761
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic/golden_values_lts_dgx_a100.json
index 936ff15865c..b6821c7a8c1 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic/golden_values_lts_dgx_a100.json
@@ -2,91 +2,286 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 10.93292,
-            "5": 10.9297,
-            "10": 10.90476,
-            "15": 10.87124,
-            "20": 10.74998,
-            "25": 10.53758,
+            "1": 10.93295,
+            "2": 10.93424,
+            "3": 10.91344,
+            "4": 10.90321,
+            "5": 10.92967,
+            "6": 10.93657,
+            "7": 10.90278,
+            "8": 10.92113,
+            "9": 10.90705,
+            "10": 10.90473,
+            "11": 10.8879,
+            "12": 10.91735,
+            "13": 10.91188,
+            "14": 10.91508,
+            "15": 10.87123,
+            "16": 10.86129,
+            "17": 10.82696,
+            "18": 10.85677,
+            "19": 10.84055,
+            "20": 10.74996,
+            "21": 10.71507,
+            "22": 10.58113,
+            "23": 10.72643,
+            "24": 10.6073,
+            "25": 10.53754,
+            "26": 10.61066,
+            "27": 10.59929,
+            "28": 10.54953,
+            "29": 10.56604,
             "30": 10.32549,
-            "35": 10.2289,
+            "31": 10.06695,
+            "32": 10.43809,
+            "33": 10.42363,
+            "34": 10.16014,
+            "35": 10.22895,
+            "36": 10.17616,
+            "37": 10.29235,
+            "38": 10.13295,
+            "39": 10.34955,
             "40": 10.01976,
-            "45": 9.7555,
+            "41": 10.07538,
+            "42": 10.15408,
+            "43": 9.76087,
+            "44": 9.88357,
+            "45": 9.75548,
+            "46": 9.74957,
+            "47": 10.07546,
+            "48": 9.77937,
+            "49": 9.43818,
             "50": 9.84069
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 591.0,
-            "5": 683.0,
-            "10": 544.0,
-            "15": 595.0,
-            "20": 611.0,
-            "25": 625.0,
-            "30": 698.0,
+            "1": 631.0,
+            "2": 663.0,
+            "3": 622.0,
+            "4": 621.0,
+            "5": 643.0,
+            "6": 635.0,
+            "7": 588.0,
+            "8": 629.0,
+            "9": 654.0,
+            "10": 539.0,
+            "11": 656.0,
+            "12": 638.0,
+            "13": 671.0,
+            "14": 656.0,
+            "15": 624.0,
+            "16": 633.0,
+            "17": 640.0,
+            "18": 609.0,
+            "19": 599.0,
+            "20": 593.0,
+            "21": 598.0,
+            "22": 628.0,
+            "23": 692.0,
+            "24": 601.0,
+            "25": 538.0,
+            "26": 640.0,
+            "27": 651.0,
+            "28": 749.0,
+            "29": 742.0,
+            "30": 687.0,
+            "31": 552.0,
+            "32": 752.0,
+            "33": 779.0,
+            "34": 653.0,
             "35": 687.0,
-            "40": 759.0,
-            "45": 807.0,
-            "50": 864.0
+            "36": 687.0,
+            "37": 813.0,
+            "38": 738.0,
+            "39": 845.0,
+            "40": 697.0,
+            "41": 787.0,
+            "42": 800.0,
+            "43": 677.0,
+            "44": 737.0,
+            "45": 773.0,
+            "46": 876.0,
+            "47": 917.0,
+            "48": 907.0,
+            "49": 853.0,
+            "50": 851.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 431783936.0,
-            "5": 431783936.0,
-            "10": 431783936.0,
-            "15": 431783936.0,
-            "20": 431783936.0,
-            "25": 431783936.0,
-            "30": 431783936.0,
-            "35": 431783936.0,
-            "40": 431783936.0,
-            "45": 431783936.0,
-            "50": 431783936.0
+            "1": 433750528.0,
+            "2": 433750528.0,
+            "3": 433750528.0,
+            "4": 433750528.0,
+            "5": 433750528.0,
+            "6": 433750528.0,
+            "7": 433750528.0,
+            "8": 433750528.0,
+            "9": 433750528.0,
+            "10": 433750528.0,
+            "11": 433750528.0,
+            "12": 433750528.0,
+            "13": 433750528.0,
+            "14": 433750528.0,
+            "15": 433750528.0,
+            "16": 433750528.0,
+            "17": 433750528.0,
+            "18": 433750528.0,
+            "19": 433750528.0,
+            "20": 433750528.0,
+            "21": 433750528.0,
+            "22": 433750528.0,
+            "23": 433750528.0,
+            "24": 433750528.0,
+            "25": 433750528.0,
+            "26": 433750528.0,
+            "27": 433750528.0,
+            "28": 433750528.0,
+            "29": 433750528.0,
+            "30": 433750528.0,
+            "31": 433750528.0,
+            "32": 433750528.0,
+            "33": 433750528.0,
+            "34": 433750528.0,
+            "35": 433750528.0,
+            "36": 433750528.0,
+            "37": 433750528.0,
+            "38": 433750528.0,
+            "39": 433750528.0,
+            "40": 433750528.0,
+            "41": 433750528.0,
+            "42": 433750528.0,
+            "43": 433750528.0,
+            "44": 433750528.0,
+            "45": 433750528.0,
+            "46": 433750528.0,
+            "47": 433750528.0,
+            "48": 433750528.0,
+            "49": 433750528.0,
+            "50": 433750528.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 677335040.0,
-            "5": 853214208.0,
-            "10": 853214208.0,
-            "15": 853214208.0,
-            "20": 854262784.0,
-            "25": 854262784.0,
-            "30": 854262784.0,
-            "35": 854262784.0,
-            "40": 854262784.0,
-            "45": 855311360.0,
-            "50": 855311360.0
+            "1": 678368768.0,
+            "2": 857265664.0,
+            "3": 857265664.0,
+            "4": 857265664.0,
+            "5": 857265664.0,
+            "6": 857265664.0,
+            "7": 857265664.0,
+            "8": 858314240.0,
+            "9": 858314240.0,
+            "10": 858314240.0,
+            "11": 858314240.0,
+            "12": 858314240.0,
+            "13": 858314240.0,
+            "14": 858314240.0,
+            "15": 858314240.0,
+            "16": 858314240.0,
+            "17": 858314240.0,
+            "18": 858314240.0,
+            "19": 858314240.0,
+            "20": 858314240.0,
+            "21": 858314240.0,
+            "22": 858314240.0,
+            "23": 858314240.0,
+            "24": 858314240.0,
+            "25": 858314240.0,
+            "26": 858314240.0,
+            "27": 858314240.0,
+            "28": 858314240.0,
+            "29": 858314240.0,
+            "30": 858314240.0,
+            "31": 858314240.0,
+            "32": 858314240.0,
+            "33": 858314240.0,
+            "34": 858314240.0,
+            "35": 858314240.0,
+            "36": 858314240.0,
+            "37": 858314240.0,
+            "38": 858314240.0,
+            "39": 858314240.0,
+            "40": 858314240.0,
+            "41": 858314240.0,
+            "42": 858314240.0,
+            "43": 858314240.0,
+            "44": 858314240.0,
+            "45": 858314240.0,
+            "46": 858314240.0,
+            "47": 858314240.0,
+            "48": 858314240.0,
+            "49": 858314240.0,
+            "50": 858314240.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 17.70614,
-            "5": 0.41397,
-            "10": 0.40992,
-            "15": 0.40823,
-            "20": 0.40466,
-            "25": 0.40564,
-            "30": 0.40987,
-            "35": 0.41811,
-            "40": 0.40504,
-            "45": 0.4037,
-            "50": 0.40207
+            "1": 16.90659,
+            "2": 0.4661,
+            "3": 0.43523,
+            "4": 0.41158,
+            "5": 0.40972,
+            "6": 0.40877,
+            "7": 0.40926,
+            "8": 0.40538,
+            "9": 0.40596,
+            "10": 0.40505,
+            "11": 0.41352,
+            "12": 0.40662,
+            "13": 0.40449,
+            "14": 0.40315,
+            "15": 0.40941,
+            "16": 0.4018,
+            "17": 0.40517,
+            "18": 0.40633,
+            "19": 0.40147,
+            "20": 0.4015,
+            "21": 0.40319,
+            "22": 0.40228,
+            "23": 0.40026,
+            "24": 0.40314,
+            "25": 0.40407,
+            "26": 0.40203,
+            "27": 0.40678,
+            "28": 0.40499,
+            "29": 0.40202,
+            "30": 0.40033,
+            "31": 0.39945,
+            "32": 0.39857,
+            "33": 0.39767,
+            "34": 0.3978,
+            "35": 0.39783,
+            "36": 0.39797,
+            "37": 0.39761,
+            "38": 0.39787,
+            "39": 0.39865,
+            "40": 0.40084,
+            "41": 0.39882,
+            "42": 0.39896,
+            "43": 0.39904,
+            "44": 0.39935,
+            "45": 0.40068,
+            "46": 0.39796,
+            "47": 0.39862,
+            "48": 0.39951,
+            "49": 0.39974,
+            "50": 0.39951
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last/golden_values_dev_dgx_gb200.json
new file mode 100644
index 00000000000..aa1e18f88cb
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last/golden_values_dev_dgx_gb200.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.86244,
+            "2": 10.88582,
+            "3": 10.84733,
+            "4": 10.85571,
+            "5": 10.86,
+            "6": 10.87733,
+            "7": 10.86555,
+            "8": 10.84913,
+            "9": 10.86609,
+            "10": 10.82473,
+            "11": 10.85618,
+            "12": 10.85374,
+            "13": 10.86788,
+            "14": 10.87119,
+            "15": 10.82235,
+            "16": 10.79991,
+            "17": 10.77431,
+            "18": 10.78345,
+            "19": 10.79308,
+            "20": 10.68226,
+            "21": 10.6471,
+            "22": 10.50917,
+            "23": 10.66827,
+            "24": 10.54193,
+            "25": 10.4928,
+            "26": 10.55931,
+            "27": 10.54238,
+            "28": 10.51129,
+            "29": 10.53257,
+            "30": 10.28992,
+            "31": 10.02853,
+            "32": 10.38885,
+            "33": 10.39593,
+            "34": 10.13446,
+            "35": 10.18932,
+            "36": 10.13355,
+            "37": 10.27381,
+            "38": 10.10751,
+            "39": 10.34007,
+            "40": 9.98538,
+            "41": 10.06414,
+            "42": 10.13744,
+            "43": 9.73381,
+            "44": 9.86305,
+            "45": 9.73723,
+            "46": 9.71343,
+            "47": 10.07757,
+            "48": 9.76768,
+            "49": 9.41987,
+            "50": 9.81687
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 567.0,
+            "2": 584.0,
+            "3": 598.0,
+            "4": 633.0,
+            "5": 630.0,
+            "6": 645.0,
+            "7": 645.0,
+            "8": 674.0,
+            "9": 625.0,
+            "10": 500.0,
+            "11": 669.0,
+            "12": 554.0,
+            "13": 681.0,
+            "14": 633.0,
+            "15": 623.0,
+            "16": 592.0,
+            "17": 636.0,
+            "18": 625.0,
+            "19": 633.0,
+            "20": 587.0,
+            "21": 696.0,
+            "22": 585.0,
+            "23": 681.0,
+            "24": 639.0,
+            "25": 587.0,
+            "26": 642.0,
+            "27": 639.0,
+            "28": 744.0,
+            "29": 746.0,
+            "30": 685.0,
+            "31": 603.0,
+            "32": 719.0,
+            "33": 850.0,
+            "34": 696.0,
+            "35": 737.0,
+            "36": 738.0,
+            "37": 840.0,
+            "38": 757.0,
+            "39": 828.0,
+            "40": 828.0,
+            "41": 787.0,
+            "42": 883.0,
+            "43": 703.0,
+            "44": 850.0,
+            "45": 840.0,
+            "46": 837.0,
+            "47": 915.0,
+            "48": 849.0,
+            "49": 915.0,
+            "50": 892.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 459571712.0,
+            "2": 459571712.0,
+            "3": 459571712.0,
+            "4": 459571712.0,
+            "5": 459571712.0,
+            "6": 459571712.0,
+            "7": 459571712.0,
+            "8": 459571712.0,
+            "9": 459571712.0,
+            "10": 459571712.0,
+            "11": 459571712.0,
+            "12": 459571712.0,
+            "13": 459571712.0,
+            "14": 459571712.0,
+            "15": 459571712.0,
+            "16": 459571712.0,
+            "17": 459571712.0,
+            "18": 459571712.0,
+            "19": 459571712.0,
+            "20": 459571712.0,
+            "21": 459571712.0,
+            "22": 459571712.0,
+            "23": 459571712.0,
+            "24": 459571712.0,
+            "25": 459571712.0,
+            "26": 459571712.0,
+            "27": 459571712.0,
+            "28": 459571712.0,
+            "29": 459571712.0,
+            "30": 459571712.0,
+            "31": 459571712.0,
+            "32": 459571712.0,
+            "33": 459571712.0,
+            "34": 459571712.0,
+            "35": 459571712.0,
+            "36": 459571712.0,
+            "37": 459571712.0,
+            "38": 459571712.0,
+            "39": 459571712.0,
+            "40": 459571712.0,
+            "41": 459571712.0,
+            "42": 459571712.0,
+            "43": 459571712.0,
+            "44": 459571712.0,
+            "45": 459571712.0,
+            "46": 459571712.0,
+            "47": 459571712.0,
+            "48": 459571712.0,
+            "49": 459571712.0,
+            "50": 459571712.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 708781568.0,
+            "2": 885184000.0,
+            "3": 885184000.0,
+            "4": 885184000.0,
+            "5": 885184000.0,
+            "6": 885184000.0,
+            "7": 885184000.0,
+            "8": 885184000.0,
+            "9": 885184000.0,
+            "10": 885184000.0,
+            "11": 885184000.0,
+            "12": 885184000.0,
+            "13": 885184000.0,
+            "14": 885184000.0,
+            "15": 885184000.0,
+            "16": 885184000.0,
+            "17": 885184000.0,
+            "18": 885184000.0,
+            "19": 885184000.0,
+            "20": 885184000.0,
+            "21": 886231552.0,
+            "22": 886231552.0,
+            "23": 886231552.0,
+            "24": 886231552.0,
+            "25": 886231552.0,
+            "26": 886231552.0,
+            "27": 886231552.0,
+            "28": 886231552.0,
+            "29": 886231552.0,
+            "30": 886231552.0,
+            "31": 886231552.0,
+            "32": 886231552.0,
+            "33": 886231552.0,
+            "34": 886231552.0,
+            "35": 886231552.0,
+            "36": 886231552.0,
+            "37": 886231552.0,
+            "38": 886231552.0,
+            "39": 886231552.0,
+            "40": 886231552.0,
+            "41": 886231552.0,
+            "42": 886231552.0,
+            "43": 886231552.0,
+            "44": 886231552.0,
+            "45": 886231552.0,
+            "46": 886231552.0,
+            "47": 886231552.0,
+            "48": 886231552.0,
+            "49": 886231552.0,
+            "50": 886231552.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 14.83536,
+            "2": 0.50436,
+            "3": 0.49153,
+            "4": 0.51839,
+            "5": 0.41963,
+            "6": 0.42593,
+            "7": 0.50539,
+            "8": 0.43728,
+            "9": 0.43214,
+            "10": 0.43276,
+            "11": 0.43243,
+            "12": 0.64271,
+            "13": 0.48613,
+            "14": 0.47822,
+            "15": 0.4732,
+            "16": 0.43011,
+            "17": 0.49091,
+            "18": 0.4264,
+            "19": 0.46987,
+            "20": 0.48787,
+            "21": 0.48533,
+            "22": 0.49433,
+            "23": 0.42402,
+            "24": 0.45662,
+            "25": 0.48851,
+            "26": 0.55798,
+            "27": 0.49442,
+            "28": 0.46841,
+            "29": 0.45193,
+            "30": 0.42664,
+            "31": 0.47172,
+            "32": 0.42125,
+            "33": 0.42866,
+            "34": 0.47761,
+            "35": 0.42624,
+            "36": 0.45512,
+            "37": 0.42405,
+            "38": 0.45455,
+            "39": 0.42258,
+            "40": 0.42354,
+            "41": 0.42486,
+            "42": 0.42783,
+            "43": 0.47508,
+            "44": 0.42611,
+            "45": 0.4236,
+            "46": 0.42862,
+            "47": 0.42603,
+            "48": 0.6007,
+            "49": 0.42833,
+            "50": 0.42517
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last/golden_values_dev_dgx_h100.json
index 84e2331d673..a470bf65873 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last/golden_values_dev_dgx_h100.json
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 511214080.0,
-            "2": 511214080.0,
-            "3": 511214080.0,
-            "4": 511214080.0,
-            "5": 511214080.0,
-            "6": 511214080.0,
-            "7": 511214080.0,
-            "8": 511214080.0,
-            "9": 511214080.0,
-            "10": 511214080.0,
-            "11": 511214080.0,
-            "12": 511214080.0,
-            "13": 511214080.0,
-            "14": 511214080.0,
-            "15": 511214080.0,
-            "16": 511214080.0,
-            "17": 511214080.0,
-            "18": 511214080.0,
-            "19": 511214080.0,
-            "20": 511214080.0,
-            "21": 511214080.0,
-            "22": 511214080.0,
-            "23": 511214080.0,
-            "24": 511214080.0,
-            "25": 511214080.0,
-            "26": 511214080.0,
-            "27": 511214080.0,
-            "28": 511214080.0,
-            "29": 511214080.0,
-            "30": 511214080.0,
-            "31": 511214080.0,
-            "32": 511214080.0,
-            "33": 511214080.0,
-            "34": 511214080.0,
-            "35": 511214080.0,
-            "36": 511214080.0,
-            "37": 511214080.0,
-            "38": 511214080.0,
-            "39": 511214080.0,
-            "40": 511214080.0,
-            "41": 511214080.0,
-            "42": 511214080.0,
-            "43": 511214080.0,
-            "44": 511214080.0,
-            "45": 511214080.0,
-            "46": 511214080.0,
-            "47": 511214080.0,
-            "48": 511214080.0,
-            "49": 511214080.0,
-            "50": 511214080.0
+            "1": 510689792.0,
+            "2": 510689792.0,
+            "3": 510689792.0,
+            "4": 510689792.0,
+            "5": 510689792.0,
+            "6": 510689792.0,
+            "7": 510689792.0,
+            "8": 510689792.0,
+            "9": 510689792.0,
+            "10": 510689792.0,
+            "11": 510689792.0,
+            "12": 510689792.0,
+            "13": 510689792.0,
+            "14": 510689792.0,
+            "15": 510689792.0,
+            "16": 510689792.0,
+            "17": 510689792.0,
+            "18": 510689792.0,
+            "19": 510689792.0,
+            "20": 510689792.0,
+            "21": 510689792.0,
+            "22": 510689792.0,
+            "23": 510689792.0,
+            "24": 510689792.0,
+            "25": 510689792.0,
+            "26": 510689792.0,
+            "27": 510689792.0,
+            "28": 510689792.0,
+            "29": 510689792.0,
+            "30": 510689792.0,
+            "31": 510689792.0,
+            "32": 510689792.0,
+            "33": 510689792.0,
+            "34": 510689792.0,
+            "35": 510689792.0,
+            "36": 510689792.0,
+            "37": 510689792.0,
+            "38": 510689792.0,
+            "39": 510689792.0,
+            "40": 510689792.0,
+            "41": 510689792.0,
+            "42": 510689792.0,
+            "43": 510689792.0,
+            "44": 510689792.0,
+            "45": 510689792.0,
+            "46": 510689792.0,
+            "47": 510689792.0,
+            "48": 510689792.0,
+            "49": 510689792.0,
+            "50": 510689792.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 759899136.0,
-            "2": 936824320.0,
-            "3": 936824832.0,
-            "4": 936824832.0,
-            "5": 936824832.0,
-            "6": 936824832.0,
-            "7": 936824832.0,
-            "8": 936824832.0,
-            "9": 936824832.0,
-            "10": 936824832.0,
-            "11": 936824832.0,
-            "12": 936824832.0,
-            "13": 936824832.0,
-            "14": 936824832.0,
-            "15": 936824832.0,
-            "16": 936824832.0,
-            "17": 936824832.0,
-            "18": 936824832.0,
-            "19": 936824832.0,
-            "20": 936824832.0,
-            "21": 936824832.0,
-            "22": 936824832.0,
-            "23": 936824832.0,
-            "24": 936824832.0,
-            "25": 936824832.0,
-            "26": 936824832.0,
-            "27": 936824832.0,
-            "28": 936824832.0,
-            "29": 936824832.0,
-            "30": 936824832.0,
-            "31": 936824832.0,
-            "32": 936824832.0,
-            "33": 936824832.0,
-            "34": 936824832.0,
-            "35": 936824832.0,
-            "36": 936824832.0,
-            "37": 936824832.0,
-            "38": 936824832.0,
-            "39": 936824832.0,
-            "40": 936824832.0,
-            "41": 936824832.0,
-            "42": 936824832.0,
-            "43": 936824832.0,
-            "44": 936824832.0,
-            "45": 936824832.0,
-            "46": 936824832.0,
-            "47": 936824832.0,
-            "48": 936824832.0,
-            "49": 936824832.0,
-            "50": 936824832.0
+            "1": 756752896.0,
+            "2": 938398720.0,
+            "3": 938398720.0,
+            "4": 938398720.0,
+            "5": 938398720.0,
+            "6": 938398720.0,
+            "7": 938398720.0,
+            "8": 938398720.0,
+            "9": 938398720.0,
+            "10": 938398720.0,
+            "11": 938398720.0,
+            "12": 938398720.0,
+            "13": 938398720.0,
+            "14": 938398720.0,
+            "15": 938398720.0,
+            "16": 938399232.0,
+            "17": 938399232.0,
+            "18": 938399232.0,
+            "19": 938399232.0,
+            "20": 938399232.0,
+            "21": 938399232.0,
+            "22": 938399232.0,
+            "23": 938399232.0,
+            "24": 938399232.0,
+            "25": 938399232.0,
+            "26": 938399232.0,
+            "27": 938399232.0,
+            "28": 938399232.0,
+            "29": 938399232.0,
+            "30": 938399232.0,
+            "31": 938399232.0,
+            "32": 938399232.0,
+            "33": 938399232.0,
+            "34": 938399232.0,
+            "35": 938399232.0,
+            "36": 938399232.0,
+            "37": 938399232.0,
+            "38": 938399232.0,
+            "39": 938399232.0,
+            "40": 938399232.0,
+            "41": 938399232.0,
+            "42": 938399232.0,
+            "43": 938399232.0,
+            "44": 938399232.0,
+            "45": 938399232.0,
+            "46": 938399232.0,
+            "47": 938399232.0,
+            "48": 938399232.0,
+            "49": 938399232.0,
+            "50": 938399232.0
         }
     },
     "iteration-time": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 45.68343,
-            "2": 0.392,
-            "3": 0.35818,
-            "4": 0.28793,
-            "5": 0.28609,
-            "6": 0.28869,
-            "7": 0.28726,
-            "8": 0.28725,
-            "9": 0.28787,
-            "10": 0.2834,
-            "11": 0.28813,
-            "12": 0.28685,
-            "13": 0.28453,
-            "14": 0.28421,
-            "15": 0.28504,
-            "16": 0.28118,
-            "17": 0.28123,
-            "18": 0.28302,
-            "19": 0.28937,
-            "20": 0.28486,
-            "21": 0.28762,
-            "22": 0.28121,
-            "23": 0.28289,
-            "24": 0.28379,
-            "25": 0.28305,
-            "26": 0.28337,
-            "27": 0.28236,
-            "28": 0.28063,
-            "29": 0.27814,
-            "30": 0.2808,
-            "31": 0.27908,
-            "32": 0.28085,
-            "33": 0.28065,
-            "34": 0.28226,
-            "35": 0.28009,
-            "36": 0.2802,
-            "37": 0.28283,
-            "38": 0.27963,
-            "39": 0.28465,
-            "40": 0.28297,
-            "41": 0.28176,
-            "42": 0.28166,
-            "43": 0.2805,
-            "44": 0.28385,
-            "45": 0.28053,
-            "46": 0.27883,
-            "47": 0.28037,
-            "48": 0.28067,
-            "49": 0.27929,
-            "50": 0.27864
+            "1": 37.38041,
+            "2": 0.33426,
+            "3": 0.30575,
+            "4": 0.2855,
+            "5": 0.28459,
+            "6": 0.28581,
+            "7": 0.28653,
+            "8": 0.28649,
+            "9": 0.28246,
+            "10": 0.28538,
+            "11": 0.28516,
+            "12": 0.28331,
+            "13": 0.28799,
+            "14": 0.28438,
+            "15": 0.28361,
+            "16": 0.28315,
+            "17": 0.2837,
+            "18": 0.28279,
+            "19": 0.28916,
+            "20": 0.28613,
+            "21": 0.2849,
+            "22": 0.2837,
+            "23": 0.2861,
+            "24": 0.28551,
+            "25": 0.28665,
+            "26": 0.28308,
+            "27": 0.28626,
+            "28": 0.28139,
+            "29": 0.28479,
+            "30": 0.28557,
+            "31": 0.28342,
+            "32": 0.28058,
+            "33": 0.2824,
+            "34": 0.28129,
+            "35": 0.28377,
+            "36": 0.28273,
+            "37": 0.28699,
+            "38": 0.28388,
+            "39": 0.28427,
+            "40": 0.28442,
+            "41": 0.28373,
+            "42": 0.28177,
+            "43": 0.28258,
+            "44": 0.28237,
+            "45": 0.2815,
+            "46": 0.28503,
+            "47": 0.2817,
+            "48": 0.28433,
+            "49": 0.28819,
+            "50": 0.28371
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last/golden_values_dev_dgx_gb200.json
new file mode 100644
index 00000000000..8858c8ab59e
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last/golden_values_dev_dgx_gb200.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.86244,
+            "2": 10.88582,
+            "3": 10.84735,
+            "4": 10.85571,
+            "5": 10.86001,
+            "6": 10.87728,
+            "7": 10.86557,
+            "8": 10.84912,
+            "9": 10.86609,
+            "10": 10.82474,
+            "11": 10.8562,
+            "12": 10.85373,
+            "13": 10.86791,
+            "14": 10.87118,
+            "15": 10.82233,
+            "16": 10.79992,
+            "17": 10.77429,
+            "18": 10.78345,
+            "19": 10.79312,
+            "20": 10.68225,
+            "21": 10.64714,
+            "22": 10.50918,
+            "23": 10.66831,
+            "24": 10.54193,
+            "25": 10.49281,
+            "26": 10.5593,
+            "27": 10.54238,
+            "28": 10.51129,
+            "29": 10.53257,
+            "30": 10.28987,
+            "31": 10.02852,
+            "32": 10.38878,
+            "33": 10.39598,
+            "34": 10.13455,
+            "35": 10.18928,
+            "36": 10.13354,
+            "37": 10.2738,
+            "38": 10.1075,
+            "39": 10.34012,
+            "40": 9.9854,
+            "41": 10.06415,
+            "42": 10.13748,
+            "43": 9.73384,
+            "44": 9.86308,
+            "45": 9.73722,
+            "46": 9.71345,
+            "47": 10.07752,
+            "48": 9.76768,
+            "49": 9.4199,
+            "50": 9.81691
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 575.0,
+            "2": 587.0,
+            "3": 615.0,
+            "4": 627.0,
+            "5": 639.0,
+            "6": 629.0,
+            "7": 625.0,
+            "8": 589.0,
+            "9": 645.0,
+            "10": 515.0,
+            "11": 616.0,
+            "12": 569.0,
+            "13": 701.0,
+            "14": 633.0,
+            "15": 589.0,
+            "16": 615.0,
+            "17": 612.0,
+            "18": 575.0,
+            "19": 549.0,
+            "20": 615.0,
+            "21": 693.0,
+            "22": 611.0,
+            "23": 737.0,
+            "24": 689.0,
+            "25": 579.0,
+            "26": 557.0,
+            "27": 692.0,
+            "28": 719.0,
+            "29": 762.0,
+            "30": 730.0,
+            "31": 579.0,
+            "32": 740.0,
+            "33": 766.0,
+            "34": 683.0,
+            "35": 705.0,
+            "36": 709.0,
+            "37": 810.0,
+            "38": 771.0,
+            "39": 872.0,
+            "40": 846.0,
+            "41": 757.0,
+            "42": 789.0,
+            "43": 766.0,
+            "44": 833.0,
+            "45": 738.0,
+            "46": 870.0,
+            "47": 891.0,
+            "48": 874.0,
+            "49": 857.0,
+            "50": 875.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 459571712.0,
+            "2": 459571712.0,
+            "3": 459571712.0,
+            "4": 459571712.0,
+            "5": 459571712.0,
+            "6": 459571712.0,
+            "7": 459571712.0,
+            "8": 459571712.0,
+            "9": 459571712.0,
+            "10": 459571712.0,
+            "11": 459571712.0,
+            "12": 459571712.0,
+            "13": 459571712.0,
+            "14": 459571712.0,
+            "15": 459571712.0,
+            "16": 459571712.0,
+            "17": 459571712.0,
+            "18": 459571712.0,
+            "19": 459571712.0,
+            "20": 459571712.0,
+            "21": 459571712.0,
+            "22": 459571712.0,
+            "23": 459571712.0,
+            "24": 459571712.0,
+            "25": 459571712.0,
+            "26": 459571712.0,
+            "27": 459571712.0,
+            "28": 459571712.0,
+            "29": 459571712.0,
+            "30": 459571712.0,
+            "31": 459571712.0,
+            "32": 459571712.0,
+            "33": 459571712.0,
+            "34": 459571712.0,
+            "35": 459571712.0,
+            "36": 459571712.0,
+            "37": 459571712.0,
+            "38": 459571712.0,
+            "39": 459571712.0,
+            "40": 459571712.0,
+            "41": 459571712.0,
+            "42": 459571712.0,
+            "43": 459571712.0,
+            "44": 459571712.0,
+            "45": 459571712.0,
+            "46": 459571712.0,
+            "47": 459571712.0,
+            "48": 459571712.0,
+            "49": 459571712.0,
+            "50": 459571712.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 704587264.0,
+            "2": 883611136.0,
+            "3": 884135424.0,
+            "4": 884658176.0,
+            "5": 884658176.0,
+            "6": 884658176.0,
+            "7": 884658176.0,
+            "8": 884658176.0,
+            "9": 884658688.0,
+            "10": 884659200.0,
+            "11": 884659200.0,
+            "12": 884659200.0,
+            "13": 884659200.0,
+            "14": 884659200.0,
+            "15": 884659200.0,
+            "16": 884659712.0,
+            "17": 884659712.0,
+            "18": 884659712.0,
+            "19": 884659712.0,
+            "20": 884659712.0,
+            "21": 884659712.0,
+            "22": 884659712.0,
+            "23": 884659712.0,
+            "24": 884659712.0,
+            "25": 884659712.0,
+            "26": 884659712.0,
+            "27": 884659712.0,
+            "28": 884659712.0,
+            "29": 884659712.0,
+            "30": 884659712.0,
+            "31": 884659712.0,
+            "32": 884659712.0,
+            "33": 884659712.0,
+            "34": 884659712.0,
+            "35": 884659712.0,
+            "36": 884659712.0,
+            "37": 884659712.0,
+            "38": 884659712.0,
+            "39": 884659712.0,
+            "40": 884659712.0,
+            "41": 884659712.0,
+            "42": 884659712.0,
+            "43": 884659712.0,
+            "44": 884659712.0,
+            "45": 884659712.0,
+            "46": 884659712.0,
+            "47": 884659712.0,
+            "48": 884659712.0,
+            "49": 884659712.0,
+            "50": 884659712.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 14.6955,
+            "2": 0.4755,
+            "3": 0.50907,
+            "4": 0.43067,
+            "5": 0.73714,
+            "6": 0.4269,
+            "7": 0.42684,
+            "8": 0.64221,
+            "9": 0.48428,
+            "10": 0.42395,
+            "11": 0.42943,
+            "12": 0.49106,
+            "13": 0.49108,
+            "14": 0.67522,
+            "15": 0.42547,
+            "16": 0.41999,
+            "17": 0.46662,
+            "18": 0.45683,
+            "19": 0.41987,
+            "20": 0.41746,
+            "21": 0.41909,
+            "22": 0.4703,
+            "23": 0.42675,
+            "24": 0.62571,
+            "25": 0.47889,
+            "26": 0.53722,
+            "27": 0.49475,
+            "28": 0.48715,
+            "29": 0.59996,
+            "30": 0.4396,
+            "31": 0.42052,
+            "32": 0.4463,
+            "33": 0.45305,
+            "34": 0.45764,
+            "35": 0.42178,
+            "36": 0.4257,
+            "37": 0.43568,
+            "38": 0.42736,
+            "39": 0.42942,
+            "40": 0.43094,
+            "41": 0.42609,
+            "42": 0.42743,
+            "43": 0.43464,
+            "44": 0.43647,
+            "45": 0.46437,
+            "46": 0.46709,
+            "47": 0.64826,
+            "48": 0.44677,
+            "49": 0.64353,
+            "50": 0.4369
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last/golden_values_dev_dgx_h100.json
index cc1700ed493..98736eb9491 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last/golden_values_dev_dgx_h100.json
@@ -4,56 +4,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 10.86535,
-            "2": 10.85873,
-            "3": 10.86285,
-            "4": 10.84007,
-            "5": 10.87854,
+            "1": 10.86539,
+            "2": 10.85871,
+            "3": 10.86281,
+            "4": 10.84006,
+            "5": 10.87858,
             "6": 10.88852,
-            "7": 10.86537,
-            "8": 10.86015,
-            "9": 10.85985,
-            "10": 10.82982,
+            "7": 10.86536,
+            "8": 10.8602,
+            "9": 10.85986,
+            "10": 10.8298,
             "11": 10.88949,
-            "12": 10.87509,
-            "13": 10.87426,
-            "14": 10.89674,
-            "15": 10.82054,
-            "16": 10.82501,
-            "17": 10.78985,
+            "12": 10.87507,
+            "13": 10.87425,
+            "14": 10.89678,
+            "15": 10.8205,
+            "16": 10.82496,
+            "17": 10.78984,
             "18": 10.81032,
-            "19": 10.8053,
-            "20": 10.70397,
-            "21": 10.66986,
-            "22": 10.50641,
-            "23": 10.69001,
-            "24": 10.56317,
-            "25": 10.49421,
-            "26": 10.56628,
+            "19": 10.80534,
+            "20": 10.70396,
+            "21": 10.66987,
+            "22": 10.5064,
+            "23": 10.69008,
+            "24": 10.56312,
+            "25": 10.49422,
+            "26": 10.56625,
             "27": 10.58022,
-            "28": 10.51574,
-            "29": 10.55292,
-            "30": 10.30549,
+            "28": 10.51576,
+            "29": 10.55299,
+            "30": 10.3055,
             "31": 10.0225,
-            "32": 10.40617,
-            "33": 10.39874,
-            "34": 10.13772,
+            "32": 10.40614,
+            "33": 10.39876,
+            "34": 10.13774,
             "35": 10.20187,
-            "36": 10.16045,
-            "37": 10.28977,
-            "38": 10.11478,
-            "39": 10.36101,
-            "40": 10.01903,
-            "41": 10.07294,
-            "42": 10.14691,
-            "43": 9.74683,
-            "44": 9.87762,
+            "36": 10.16049,
+            "37": 10.28975,
+            "38": 10.11482,
+            "39": 10.36102,
+            "40": 10.01898,
+            "41": 10.07291,
+            "42": 10.14696,
+            "43": 9.74688,
+            "44": 9.87766,
             "45": 9.74966,
-            "46": 9.73384,
-            "47": 10.07535,
-            "48": 9.78069,
+            "46": 9.73386,
+            "47": 10.07538,
+            "48": 9.7807,
             "49": 9.44783,
-            "50": 9.83992
+            "50": 9.83988
         }
     },
     "num-zeros": {
@@ -61,56 +61,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 607.0,
-            "2": 628.0,
-            "3": 600.0,
-            "4": 658.0,
-            "5": 657.0,
-            "6": 707.0,
-            "7": 637.0,
-            "8": 593.0,
-            "9": 632.0,
-            "10": 553.0,
-            "11": 641.0,
-            "12": 631.0,
-            "13": 676.0,
-            "14": 643.0,
-            "15": 623.0,
-            "16": 611.0,
-            "17": 687.0,
-            "18": 622.0,
-            "19": 581.0,
-            "20": 609.0,
-            "21": 652.0,
-            "22": 621.0,
-            "23": 800.0,
+            "1": 641.0,
+            "2": 681.0,
+            "3": 601.0,
+            "4": 636.0,
+            "5": 651.0,
+            "6": 701.0,
+            "7": 639.0,
+            "8": 535.0,
+            "9": 647.0,
+            "10": 513.0,
+            "11": 669.0,
+            "12": 644.0,
+            "13": 680.0,
+            "14": 654.0,
+            "15": 601.0,
+            "16": 616.0,
+            "17": 656.0,
+            "18": 623.0,
+            "19": 649.0,
+            "20": 575.0,
+            "21": 679.0,
+            "22": 556.0,
+            "23": 681.0,
             "24": 618.0,
-            "25": 623.0,
-            "26": 595.0,
-            "27": 679.0,
-            "28": 726.0,
-            "29": 719.0,
-            "30": 723.0,
-            "31": 624.0,
-            "32": 737.0,
-            "33": 776.0,
-            "34": 713.0,
-            "35": 696.0,
-            "36": 759.0,
-            "37": 829.0,
-            "38": 784.0,
-            "39": 798.0,
-            "40": 813.0,
-            "41": 814.0,
-            "42": 880.0,
-            "43": 780.0,
-            "44": 775.0,
-            "45": 759.0,
-            "46": 849.0,
-            "47": 938.0,
-            "48": 876.0,
-            "49": 886.0,
-            "50": 817.0
+            "25": 629.0,
+            "26": 650.0,
+            "27": 704.0,
+            "28": 693.0,
+            "29": 764.0,
+            "30": 725.0,
+            "31": 609.0,
+            "32": 728.0,
+            "33": 790.0,
+            "34": 724.0,
+            "35": 730.0,
+            "36": 717.0,
+            "37": 857.0,
+            "38": 730.0,
+            "39": 897.0,
+            "40": 816.0,
+            "41": 799.0,
+            "42": 845.0,
+            "43": 760.0,
+            "44": 831.0,
+            "45": 786.0,
+            "46": 802.0,
+            "47": 827.0,
+            "48": 846.0,
+            "49": 863.0,
+            "50": 803.0
         }
     },
     "mem-allocated-bytes": {
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 510689792.0,
-            "2": 510689792.0,
-            "3": 510689792.0,
-            "4": 510689792.0,
-            "5": 510689792.0,
-            "6": 510689792.0,
-            "7": 510689792.0,
-            "8": 510689792.0,
-            "9": 510689792.0,
-            "10": 510689792.0,
-            "11": 510689792.0,
-            "12": 510689792.0,
-            "13": 510689792.0,
-            "14": 510689792.0,
-            "15": 510689792.0,
-            "16": 510689792.0,
-            "17": 510689792.0,
-            "18": 510689792.0,
-            "19": 510689792.0,
-            "20": 510689792.0,
-            "21": 510689792.0,
-            "22": 510689792.0,
-            "23": 510689792.0,
-            "24": 510689792.0,
-            "25": 510689792.0,
-            "26": 510689792.0,
-            "27": 510689792.0,
-            "28": 510689792.0,
-            "29": 510689792.0,
-            "30": 510689792.0,
-            "31": 510689792.0,
-            "32": 510689792.0,
-            "33": 510689792.0,
-            "34": 510689792.0,
-            "35": 510689792.0,
-            "36": 510689792.0,
-            "37": 510689792.0,
-            "38": 510689792.0,
-            "39": 510689792.0,
-            "40": 510689792.0,
-            "41": 510689792.0,
-            "42": 510689792.0,
-            "43": 510689792.0,
-            "44": 510689792.0,
-            "45": 510689792.0,
-            "46": 510689792.0,
-            "47": 510689792.0,
-            "48": 510689792.0,
-            "49": 510689792.0,
-            "50": 510689792.0
+            "1": 512262656.0,
+            "2": 512262656.0,
+            "3": 512262656.0,
+            "4": 512262656.0,
+            "5": 512262656.0,
+            "6": 512262656.0,
+            "7": 512262656.0,
+            "8": 512262656.0,
+            "9": 512262656.0,
+            "10": 512262656.0,
+            "11": 512262656.0,
+            "12": 512262656.0,
+            "13": 512262656.0,
+            "14": 512262656.0,
+            "15": 512262656.0,
+            "16": 512262656.0,
+            "17": 512262656.0,
+            "18": 512262656.0,
+            "19": 512262656.0,
+            "20": 512262656.0,
+            "21": 512262656.0,
+            "22": 512262656.0,
+            "23": 512262656.0,
+            "24": 512262656.0,
+            "25": 512262656.0,
+            "26": 512262656.0,
+            "27": 512262656.0,
+            "28": 512262656.0,
+            "29": 512262656.0,
+            "30": 512262656.0,
+            "31": 512262656.0,
+            "32": 512262656.0,
+            "33": 512262656.0,
+            "34": 512262656.0,
+            "35": 512262656.0,
+            "36": 512262656.0,
+            "37": 512262656.0,
+            "38": 512262656.0,
+            "39": 512262656.0,
+            "40": 512262656.0,
+            "41": 512262656.0,
+            "42": 512262656.0,
+            "43": 512262656.0,
+            "44": 512262656.0,
+            "45": 512262656.0,
+            "46": 512262656.0,
+            "47": 512262656.0,
+            "48": 512262656.0,
+            "49": 512262656.0,
+            "50": 512262656.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 759895552.0,
-            "2": 933156352.0,
-            "3": 933156352.0,
-            "4": 933156352.0,
-            "5": 933156352.0,
-            "6": 933156352.0,
-            "7": 933156352.0,
-            "8": 933156352.0,
-            "9": 933156352.0,
-            "10": 933156352.0,
-            "11": 933156352.0,
-            "12": 933156352.0,
-            "13": 933156352.0,
-            "14": 933156352.0,
-            "15": 933156352.0,
-            "16": 933156352.0,
-            "17": 933156352.0,
-            "18": 933156352.0,
-            "19": 933156352.0,
-            "20": 933156352.0,
-            "21": 933156352.0,
-            "22": 933156352.0,
-            "23": 933156352.0,
-            "24": 933156352.0,
-            "25": 933156352.0,
-            "26": 933156352.0,
-            "27": 933156352.0,
-            "28": 933156352.0,
-            "29": 933156352.0,
-            "30": 933156352.0,
-            "31": 933156352.0,
-            "32": 933156352.0,
-            "33": 933156352.0,
-            "34": 933156352.0,
-            "35": 933156352.0,
-            "36": 933156352.0,
-            "37": 933156352.0,
-            "38": 933156352.0,
-            "39": 933156352.0,
-            "40": 933156352.0,
-            "41": 933156352.0,
-            "42": 933156352.0,
-            "43": 933156352.0,
-            "44": 933156352.0,
-            "45": 933156352.0,
-            "46": 933156352.0,
-            "47": 933156352.0,
-            "48": 933156352.0,
-            "49": 933156352.0,
-            "50": 933156352.0
+            "1": 755704832.0,
+            "2": 934729216.0,
+            "3": 934729216.0,
+            "4": 935776768.0,
+            "5": 935776768.0,
+            "6": 935776768.0,
+            "7": 935776768.0,
+            "8": 935776768.0,
+            "9": 935776768.0,
+            "10": 935776768.0,
+            "11": 935777280.0,
+            "12": 935777280.0,
+            "13": 935777280.0,
+            "14": 935777280.0,
+            "15": 935777280.0,
+            "16": 935777280.0,
+            "17": 935777280.0,
+            "18": 935777280.0,
+            "19": 935777280.0,
+            "20": 935777792.0,
+            "21": 935777792.0,
+            "22": 935777792.0,
+            "23": 935777792.0,
+            "24": 935777792.0,
+            "25": 935777792.0,
+            "26": 935777792.0,
+            "27": 935777792.0,
+            "28": 935777792.0,
+            "29": 935777792.0,
+            "30": 935777792.0,
+            "31": 935777792.0,
+            "32": 935777792.0,
+            "33": 935777792.0,
+            "34": 935777792.0,
+            "35": 935777792.0,
+            "36": 935777792.0,
+            "37": 935777792.0,
+            "38": 935777792.0,
+            "39": 935777792.0,
+            "40": 935777792.0,
+            "41": 935777792.0,
+            "42": 935777792.0,
+            "43": 935777792.0,
+            "44": 935777792.0,
+            "45": 935777792.0,
+            "46": 935777792.0,
+            "47": 935777792.0,
+            "48": 935777792.0,
+            "49": 935777792.0,
+            "50": 935777792.0
         }
     },
     "iteration-time": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 18.67374,
-            "2": 0.33434,
-            "3": 0.32862,
-            "4": 0.3312,
-            "5": 0.32463,
-            "6": 0.33221,
-            "7": 0.33167,
-            "8": 0.32476,
-            "9": 0.32742,
-            "10": 0.32327,
-            "11": 0.31599,
-            "12": 0.32511,
-            "13": 0.32273,
-            "14": 0.31956,
-            "15": 0.32777,
-            "16": 0.32745,
-            "17": 0.31743,
-            "18": 0.32418,
-            "19": 0.32759,
-            "20": 0.32696,
-            "21": 0.32321,
-            "22": 0.32923,
-            "23": 0.32125,
-            "24": 0.32088,
-            "25": 0.32288,
-            "26": 0.31739,
-            "27": 0.33667,
-            "28": 0.32586,
-            "29": 0.31738,
-            "30": 0.31392,
-            "31": 0.32116,
-            "32": 0.31637,
-            "33": 0.32029,
-            "34": 0.32057,
-            "35": 0.31739,
-            "36": 0.31341,
-            "37": 0.32121,
-            "38": 0.326,
-            "39": 0.31692,
-            "40": 0.31511,
-            "41": 0.32216,
-            "42": 0.31654,
-            "43": 0.32474,
-            "44": 0.32162,
-            "45": 0.31451,
-            "46": 0.31434,
-            "47": 0.32885,
-            "48": 0.31603,
-            "49": 0.31732,
-            "50": 0.3234
+            "1": 37.19618,
+            "2": 0.37449,
+            "3": 0.31644,
+            "4": 0.28217,
+            "5": 0.28413,
+            "6": 0.27992,
+            "7": 0.2812,
+            "8": 0.2853,
+            "9": 0.28038,
+            "10": 0.28373,
+            "11": 0.2867,
+            "12": 0.29151,
+            "13": 0.28727,
+            "14": 0.28521,
+            "15": 0.28348,
+            "16": 0.28599,
+            "17": 0.28521,
+            "18": 0.28496,
+            "19": 0.28665,
+            "20": 0.28808,
+            "21": 0.28617,
+            "22": 0.2849,
+            "23": 0.28018,
+            "24": 0.28162,
+            "25": 0.29703,
+            "26": 0.31265,
+            "27": 0.28109,
+            "28": 0.28283,
+            "29": 0.28046,
+            "30": 0.28567,
+            "31": 0.28446,
+            "32": 0.28496,
+            "33": 0.279,
+            "34": 0.28039,
+            "35": 0.28345,
+            "36": 0.2816,
+            "37": 0.28207,
+            "38": 0.27907,
+            "39": 0.27768,
+            "40": 0.27658,
+            "41": 0.28191,
+            "42": 0.28052,
+            "43": 0.2793,
+            "44": 0.2793,
+            "45": 0.28044,
+            "46": 0.27801,
+            "47": 0.28286,
+            "48": 0.27846,
+            "49": 0.27648,
+            "50": 0.278
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last/golden_values_lts_dgx_a100.json
index 50d3c9c5d20..36ec79d6f72 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last/golden_values_lts_dgx_a100.json
@@ -1 +1,287 @@
-{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.93292, "5": 10.92969, "10": 10.90473, "15": 10.87125, "20": 10.75001, "25": 10.53752, "30": 10.32548, "35": 10.22894, "40": 10.01974, "45": 9.75546, "50": 9.84069}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 590.0, "5": 658.0, "10": 523.0, "15": 641.0, "20": 567.0, "25": 606.0, "30": 725.0, "35": 699.0, "40": 783.0, "45": 845.0, "50": 857.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 432177152.0, "5": 432177152.0, "10": 432177152.0, "15": 432177152.0, "20": 432177152.0, "25": 432177152.0, "30": 432177152.0, "35": 432177152.0, "40": 432177152.0, "45": 432177152.0, "50": 432177152.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 676286464.0, "5": 857274368.0, "10": 857274368.0, "15": 857274368.0, "20": 857274368.0, "25": 857277440.0, "30": 857277440.0, "35": 857277440.0, "40": 857277440.0, "45": 857277440.0, "50": 857277440.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 21.95554, "5": 0.40686, "10": 0.40586, "15": 0.39829, "20": 0.39913, "25": 0.39679, "30": 0.39346, "35": 0.39107, "40": 0.387, "45": 0.3959, "50": 0.39384}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.93295,
+            "2": 10.93424,
+            "3": 10.91348,
+            "4": 10.90316,
+            "5": 10.92971,
+            "6": 10.93656,
+            "7": 10.90279,
+            "8": 10.92114,
+            "9": 10.90707,
+            "10": 10.90475,
+            "11": 10.88789,
+            "12": 10.91738,
+            "13": 10.9119,
+            "14": 10.91508,
+            "15": 10.8712,
+            "16": 10.86127,
+            "17": 10.82695,
+            "18": 10.85672,
+            "19": 10.84058,
+            "20": 10.74994,
+            "21": 10.71505,
+            "22": 10.58118,
+            "23": 10.72639,
+            "24": 10.60727,
+            "25": 10.53751,
+            "26": 10.61069,
+            "27": 10.59925,
+            "28": 10.54953,
+            "29": 10.56605,
+            "30": 10.32549,
+            "31": 10.06697,
+            "32": 10.43809,
+            "33": 10.42357,
+            "34": 10.16016,
+            "35": 10.22897,
+            "36": 10.17616,
+            "37": 10.29236,
+            "38": 10.13296,
+            "39": 10.34952,
+            "40": 10.01973,
+            "41": 10.07536,
+            "42": 10.15409,
+            "43": 9.76087,
+            "44": 9.88353,
+            "45": 9.75547,
+            "46": 9.74963,
+            "47": 10.07544,
+            "48": 9.77937,
+            "49": 9.43814,
+            "50": 9.8407
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 584.0,
+            "2": 667.0,
+            "3": 622.0,
+            "4": 575.0,
+            "5": 641.0,
+            "6": 652.0,
+            "7": 616.0,
+            "8": 578.0,
+            "9": 664.0,
+            "10": 555.0,
+            "11": 661.0,
+            "12": 599.0,
+            "13": 665.0,
+            "14": 672.0,
+            "15": 592.0,
+            "16": 652.0,
+            "17": 643.0,
+            "18": 582.0,
+            "19": 574.0,
+            "20": 568.0,
+            "21": 624.0,
+            "22": 637.0,
+            "23": 655.0,
+            "24": 607.0,
+            "25": 574.0,
+            "26": 650.0,
+            "27": 677.0,
+            "28": 700.0,
+            "29": 717.0,
+            "30": 687.0,
+            "31": 585.0,
+            "32": 649.0,
+            "33": 789.0,
+            "34": 676.0,
+            "35": 740.0,
+            "36": 707.0,
+            "37": 853.0,
+            "38": 796.0,
+            "39": 846.0,
+            "40": 801.0,
+            "41": 801.0,
+            "42": 795.0,
+            "43": 696.0,
+            "44": 765.0,
+            "45": 813.0,
+            "46": 806.0,
+            "47": 905.0,
+            "48": 829.0,
+            "49": 876.0,
+            "50": 842.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 433750528.0,
+            "2": 433750528.0,
+            "3": 433750528.0,
+            "4": 433750528.0,
+            "5": 433750528.0,
+            "6": 433750528.0,
+            "7": 433750528.0,
+            "8": 433750528.0,
+            "9": 433750528.0,
+            "10": 433750528.0,
+            "11": 433750528.0,
+            "12": 433750528.0,
+            "13": 433750528.0,
+            "14": 433750528.0,
+            "15": 433750528.0,
+            "16": 433750528.0,
+            "17": 433750528.0,
+            "18": 433750528.0,
+            "19": 433750528.0,
+            "20": 433750528.0,
+            "21": 433750528.0,
+            "22": 433750528.0,
+            "23": 433750528.0,
+            "24": 433750528.0,
+            "25": 433750528.0,
+            "26": 433750528.0,
+            "27": 433750528.0,
+            "28": 433750528.0,
+            "29": 433750528.0,
+            "30": 433750528.0,
+            "31": 433750528.0,
+            "32": 433750528.0,
+            "33": 433750528.0,
+            "34": 433750528.0,
+            "35": 433750528.0,
+            "36": 433750528.0,
+            "37": 433750528.0,
+            "38": 433750528.0,
+            "39": 433750528.0,
+            "40": 433750528.0,
+            "41": 433750528.0,
+            "42": 433750528.0,
+            "43": 433750528.0,
+            "44": 433750528.0,
+            "45": 433750528.0,
+            "46": 433750528.0,
+            "47": 433750528.0,
+            "48": 433750528.0,
+            "49": 433750528.0,
+            "50": 433750528.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 677322752.0,
+            "2": 858311168.0,
+            "3": 858311168.0,
+            "4": 858312704.0,
+            "5": 858313728.0,
+            "6": 858313728.0,
+            "7": 858313728.0,
+            "8": 858313728.0,
+            "9": 858313728.0,
+            "10": 858313728.0,
+            "11": 858313728.0,
+            "12": 858313728.0,
+            "13": 858313728.0,
+            "14": 858313728.0,
+            "15": 858313728.0,
+            "16": 858313728.0,
+            "17": 858313728.0,
+            "18": 858313728.0,
+            "19": 858314240.0,
+            "20": 858314240.0,
+            "21": 858314240.0,
+            "22": 858314240.0,
+            "23": 858314240.0,
+            "24": 858314240.0,
+            "25": 858314240.0,
+            "26": 858314240.0,
+            "27": 858314240.0,
+            "28": 858314240.0,
+            "29": 858314240.0,
+            "30": 858314240.0,
+            "31": 858314240.0,
+            "32": 858314240.0,
+            "33": 858314240.0,
+            "34": 858314240.0,
+            "35": 858314240.0,
+            "36": 858314240.0,
+            "37": 858314240.0,
+            "38": 858314240.0,
+            "39": 858314240.0,
+            "40": 858314240.0,
+            "41": 858314240.0,
+            "42": 858314240.0,
+            "43": 858314240.0,
+            "44": 858314240.0,
+            "45": 858314240.0,
+            "46": 858314240.0,
+            "47": 858314240.0,
+            "48": 858314240.0,
+            "49": 858314240.0,
+            "50": 858314240.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 21.76594,
+            "2": 0.46379,
+            "3": 0.43243,
+            "4": 0.41208,
+            "5": 0.41118,
+            "6": 0.41286,
+            "7": 0.41188,
+            "8": 0.41137,
+            "9": 0.41313,
+            "10": 0.41246,
+            "11": 0.41206,
+            "12": 0.41297,
+            "13": 0.41065,
+            "14": 0.41339,
+            "15": 0.41164,
+            "16": 0.4123,
+            "17": 0.41103,
+            "18": 0.4126,
+            "19": 0.41173,
+            "20": 0.40973,
+            "21": 0.40983,
+            "22": 0.41192,
+            "23": 0.41174,
+            "24": 0.41275,
+            "25": 0.4103,
+            "26": 0.41066,
+            "27": 0.40962,
+            "28": 0.41015,
+            "29": 0.41299,
+            "30": 0.41138,
+            "31": 0.41272,
+            "32": 0.41313,
+            "33": 0.41105,
+            "34": 0.41154,
+            "35": 0.41101,
+            "36": 0.41364,
+            "37": 0.41532,
+            "38": 0.41685,
+            "39": 0.41569,
+            "40": 0.41646,
+            "41": 0.42457,
+            "42": 0.41274,
+            "43": 0.41244,
+            "44": 0.41106,
+            "45": 0.41405,
+            "46": 0.41346,
+            "47": 0.41825,
+            "48": 0.41512,
+            "49": 0.41064,
+            "50": 0.4153
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_dp_last/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_dp_last/golden_values_dev_dgx_gb200.json
new file mode 100644
index 00000000000..746c6b2ba10
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_dp_last/golden_values_dev_dgx_gb200.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.86244,
+            "2": 10.88582,
+            "3": 10.84733,
+            "4": 10.85571,
+            "5": 10.86,
+            "6": 10.87733,
+            "7": 10.86555,
+            "8": 10.84913,
+            "9": 10.86609,
+            "10": 10.82473,
+            "11": 10.85618,
+            "12": 10.85374,
+            "13": 10.86788,
+            "14": 10.87119,
+            "15": 10.82235,
+            "16": 10.79991,
+            "17": 10.77431,
+            "18": 10.78345,
+            "19": 10.79308,
+            "20": 10.68226,
+            "21": 10.6471,
+            "22": 10.50917,
+            "23": 10.66827,
+            "24": 10.54193,
+            "25": 10.4928,
+            "26": 10.55931,
+            "27": 10.54238,
+            "28": 10.51129,
+            "29": 10.53257,
+            "30": 10.28992,
+            "31": 10.02853,
+            "32": 10.38885,
+            "33": 10.39593,
+            "34": 10.13446,
+            "35": 10.18932,
+            "36": 10.13355,
+            "37": 10.27381,
+            "38": 10.10751,
+            "39": 10.34007,
+            "40": 9.98538,
+            "41": 10.06414,
+            "42": 10.13744,
+            "43": 9.73381,
+            "44": 9.86305,
+            "45": 9.73723,
+            "46": 9.71343,
+            "47": 10.07757,
+            "48": 9.76768,
+            "49": 9.41987,
+            "50": 9.81687
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 567.0,
+            "2": 584.0,
+            "3": 598.0,
+            "4": 633.0,
+            "5": 630.0,
+            "6": 645.0,
+            "7": 645.0,
+            "8": 674.0,
+            "9": 625.0,
+            "10": 500.0,
+            "11": 669.0,
+            "12": 554.0,
+            "13": 681.0,
+            "14": 633.0,
+            "15": 623.0,
+            "16": 592.0,
+            "17": 636.0,
+            "18": 625.0,
+            "19": 633.0,
+            "20": 587.0,
+            "21": 696.0,
+            "22": 585.0,
+            "23": 681.0,
+            "24": 639.0,
+            "25": 587.0,
+            "26": 642.0,
+            "27": 639.0,
+            "28": 744.0,
+            "29": 746.0,
+            "30": 685.0,
+            "31": 603.0,
+            "32": 719.0,
+            "33": 850.0,
+            "34": 696.0,
+            "35": 737.0,
+            "36": 738.0,
+            "37": 840.0,
+            "38": 757.0,
+            "39": 828.0,
+            "40": 828.0,
+            "41": 787.0,
+            "42": 883.0,
+            "43": 703.0,
+            "44": 850.0,
+            "45": 840.0,
+            "46": 837.0,
+            "47": 915.0,
+            "48": 849.0,
+            "49": 915.0,
+            "50": 892.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 459571712.0,
+            "2": 459571712.0,
+            "3": 459571712.0,
+            "4": 459571712.0,
+            "5": 459571712.0,
+            "6": 459571712.0,
+            "7": 459571712.0,
+            "8": 459571712.0,
+            "9": 459571712.0,
+            "10": 459571712.0,
+            "11": 459571712.0,
+            "12": 459571712.0,
+            "13": 459571712.0,
+            "14": 459571712.0,
+            "15": 459571712.0,
+            "16": 459571712.0,
+            "17": 459571712.0,
+            "18": 459571712.0,
+            "19": 459571712.0,
+            "20": 459571712.0,
+            "21": 459571712.0,
+            "22": 459571712.0,
+            "23": 459571712.0,
+            "24": 459571712.0,
+            "25": 459571712.0,
+            "26": 459571712.0,
+            "27": 459571712.0,
+            "28": 459571712.0,
+            "29": 459571712.0,
+            "30": 459571712.0,
+            "31": 459571712.0,
+            "32": 459571712.0,
+            "33": 459571712.0,
+            "34": 459571712.0,
+            "35": 459571712.0,
+            "36": 459571712.0,
+            "37": 459571712.0,
+            "38": 459571712.0,
+            "39": 459571712.0,
+            "40": 459571712.0,
+            "41": 459571712.0,
+            "42": 459571712.0,
+            "43": 459571712.0,
+            "44": 459571712.0,
+            "45": 459571712.0,
+            "46": 459571712.0,
+            "47": 459571712.0,
+            "48": 459571712.0,
+            "49": 459571712.0,
+            "50": 459571712.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 708781568.0,
+            "2": 885184000.0,
+            "3": 885184000.0,
+            "4": 885184000.0,
+            "5": 885184000.0,
+            "6": 885184000.0,
+            "7": 885184000.0,
+            "8": 885184000.0,
+            "9": 885184000.0,
+            "10": 885184000.0,
+            "11": 885184000.0,
+            "12": 885184000.0,
+            "13": 885184000.0,
+            "14": 885184000.0,
+            "15": 885184000.0,
+            "16": 885184000.0,
+            "17": 885184000.0,
+            "18": 885184000.0,
+            "19": 885184000.0,
+            "20": 885184000.0,
+            "21": 885184000.0,
+            "22": 885184000.0,
+            "23": 885184000.0,
+            "24": 885184000.0,
+            "25": 885184000.0,
+            "26": 885184000.0,
+            "27": 885184000.0,
+            "28": 885184000.0,
+            "29": 885184000.0,
+            "30": 885184000.0,
+            "31": 885184000.0,
+            "32": 885184000.0,
+            "33": 885184000.0,
+            "34": 885184000.0,
+            "35": 885184000.0,
+            "36": 885184000.0,
+            "37": 885184000.0,
+            "38": 885184000.0,
+            "39": 885184000.0,
+            "40": 885184000.0,
+            "41": 885184000.0,
+            "42": 885184000.0,
+            "43": 885184000.0,
+            "44": 885184000.0,
+            "45": 885184000.0,
+            "46": 885184000.0,
+            "47": 885184000.0,
+            "48": 885184000.0,
+            "49": 885184000.0,
+            "50": 885706752.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 14.83222,
+            "2": 0.46295,
+            "3": 0.52097,
+            "4": 0.42074,
+            "5": 0.72217,
+            "6": 0.70851,
+            "7": 0.41812,
+            "8": 0.41893,
+            "9": 0.47564,
+            "10": 0.48012,
+            "11": 0.41406,
+            "12": 0.43392,
+            "13": 0.67246,
+            "14": 0.41498,
+            "15": 0.47203,
+            "16": 0.46,
+            "17": 0.40996,
+            "18": 0.4104,
+            "19": 0.66865,
+            "20": 0.40782,
+            "21": 0.40774,
+            "22": 0.49273,
+            "23": 0.49254,
+            "24": 0.47511,
+            "25": 0.64062,
+            "26": 0.43231,
+            "27": 0.50003,
+            "28": 0.46605,
+            "29": 0.64224,
+            "30": 0.42576,
+            "31": 0.40898,
+            "32": 0.49354,
+            "33": 0.47014,
+            "34": 0.4075,
+            "35": 0.40863,
+            "36": 0.40508,
+            "37": 0.42937,
+            "38": 0.41009,
+            "39": 0.4116,
+            "40": 0.40987,
+            "41": 0.41014,
+            "42": 0.45949,
+            "43": 0.40849,
+            "44": 0.48462,
+            "45": 0.4567,
+            "46": 0.40779,
+            "47": 0.466,
+            "48": 0.41678,
+            "49": 0.40871,
+            "50": 0.41039
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_dp_last/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_dp_last/golden_values_dev_dgx_h100.json
index e8b9cea88e0..f78c3deb59d 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_dp_last/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_dp_last/golden_values_dev_dgx_h100.json
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 511214080.0,
-            "2": 511214080.0,
-            "3": 511214080.0,
-            "4": 511214080.0,
-            "5": 511214080.0,
-            "6": 511214080.0,
-            "7": 511214080.0,
-            "8": 511214080.0,
-            "9": 511214080.0,
-            "10": 511214080.0,
-            "11": 511214080.0,
-            "12": 511214080.0,
-            "13": 511214080.0,
-            "14": 511214080.0,
-            "15": 511214080.0,
-            "16": 511214080.0,
-            "17": 511214080.0,
-            "18": 511214080.0,
-            "19": 511214080.0,
-            "20": 511214080.0,
-            "21": 511214080.0,
-            "22": 511214080.0,
-            "23": 511214080.0,
-            "24": 511214080.0,
-            "25": 511214080.0,
-            "26": 511214080.0,
-            "27": 511214080.0,
-            "28": 511214080.0,
-            "29": 511214080.0,
-            "30": 511214080.0,
-            "31": 511214080.0,
-            "32": 511214080.0,
-            "33": 511214080.0,
-            "34": 511214080.0,
-            "35": 511214080.0,
-            "36": 511214080.0,
-            "37": 511214080.0,
-            "38": 511214080.0,
-            "39": 511214080.0,
-            "40": 511214080.0,
-            "41": 511214080.0,
-            "42": 511214080.0,
-            "43": 511214080.0,
-            "44": 511214080.0,
-            "45": 511214080.0,
-            "46": 511214080.0,
-            "47": 511214080.0,
-            "48": 511214080.0,
-            "49": 511214080.0,
-            "50": 511214080.0
+            "1": 512786944.0,
+            "2": 512786944.0,
+            "3": 512786944.0,
+            "4": 512786944.0,
+            "5": 512786944.0,
+            "6": 512786944.0,
+            "7": 512786944.0,
+            "8": 512786944.0,
+            "9": 512786944.0,
+            "10": 512786944.0,
+            "11": 512786944.0,
+            "12": 512786944.0,
+            "13": 512786944.0,
+            "14": 512786944.0,
+            "15": 512786944.0,
+            "16": 512786944.0,
+            "17": 512786944.0,
+            "18": 512786944.0,
+            "19": 512786944.0,
+            "20": 512786944.0,
+            "21": 512786944.0,
+            "22": 512786944.0,
+            "23": 512786944.0,
+            "24": 512786944.0,
+            "25": 512786944.0,
+            "26": 512786944.0,
+            "27": 512786944.0,
+            "28": 512786944.0,
+            "29": 512786944.0,
+            "30": 512786944.0,
+            "31": 512786944.0,
+            "32": 512786944.0,
+            "33": 512786944.0,
+            "34": 512786944.0,
+            "35": 512786944.0,
+            "36": 512786944.0,
+            "37": 512786944.0,
+            "38": 512786944.0,
+            "39": 512786944.0,
+            "40": 512786944.0,
+            "41": 512786944.0,
+            "42": 512786944.0,
+            "43": 512786944.0,
+            "44": 512786944.0,
+            "45": 512786944.0,
+            "46": 512786944.0,
+            "47": 512786944.0,
+            "48": 512786944.0,
+            "49": 512786944.0,
+            "50": 512786944.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 757801984.0,
-            "2": 935777792.0,
-            "3": 935777792.0,
-            "4": 935777792.0,
-            "5": 935777792.0,
-            "6": 935777792.0,
-            "7": 935777792.0,
-            "8": 935777792.0,
-            "9": 935777792.0,
-            "10": 935777792.0,
-            "11": 935777792.0,
-            "12": 935777792.0,
-            "13": 935777792.0,
-            "14": 935777792.0,
-            "15": 935777792.0,
-            "16": 935777792.0,
-            "17": 935777792.0,
-            "18": 935777792.0,
-            "19": 935777792.0,
-            "20": 935777792.0,
-            "21": 935777792.0,
-            "22": 935777792.0,
-            "23": 935777792.0,
-            "24": 935777792.0,
-            "25": 935777792.0,
-            "26": 935777792.0,
-            "27": 935777792.0,
-            "28": 935777792.0,
-            "29": 935777792.0,
-            "30": 935777792.0,
-            "31": 935777792.0,
-            "32": 935777792.0,
-            "33": 935777792.0,
-            "34": 935777792.0,
-            "35": 935777792.0,
-            "36": 935777792.0,
-            "37": 935777792.0,
-            "38": 935777792.0,
-            "39": 935777792.0,
-            "40": 935777792.0,
-            "41": 935777792.0,
-            "42": 935777792.0,
-            "43": 935777792.0,
-            "44": 935777792.0,
-            "45": 935777792.0,
-            "46": 935777792.0,
-            "47": 935777792.0,
-            "48": 935777792.0,
-            "49": 935777792.0,
-            "50": 935777792.0
+            "1": 758851072.0,
+            "2": 937350656.0,
+            "3": 937350656.0,
+            "4": 937350656.0,
+            "5": 937350656.0,
+            "6": 937350656.0,
+            "7": 937350656.0,
+            "8": 937350656.0,
+            "9": 937350656.0,
+            "10": 937350656.0,
+            "11": 937350656.0,
+            "12": 937350656.0,
+            "13": 937350656.0,
+            "14": 937350656.0,
+            "15": 937350656.0,
+            "16": 937350656.0,
+            "17": 937350656.0,
+            "18": 937350656.0,
+            "19": 937350656.0,
+            "20": 937350656.0,
+            "21": 937350656.0,
+            "22": 937350656.0,
+            "23": 937350656.0,
+            "24": 937350656.0,
+            "25": 937350656.0,
+            "26": 937350656.0,
+            "27": 937350656.0,
+            "28": 937350656.0,
+            "29": 937350656.0,
+            "30": 937350656.0,
+            "31": 937350656.0,
+            "32": 937350656.0,
+            "33": 937350656.0,
+            "34": 937350656.0,
+            "35": 937350656.0,
+            "36": 937350656.0,
+            "37": 937350656.0,
+            "38": 937350656.0,
+            "39": 937350656.0,
+            "40": 937350656.0,
+            "41": 937350656.0,
+            "42": 937350656.0,
+            "43": 937350656.0,
+            "44": 937350656.0,
+            "45": 937350656.0,
+            "46": 937350656.0,
+            "47": 937350656.0,
+            "48": 937350656.0,
+            "49": 937350656.0,
+            "50": 937350656.0
         }
     },
     "iteration-time": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 44.86787,
-            "2": 0.36349,
-            "3": 0.3142,
-            "4": 0.29456,
-            "5": 0.29609,
-            "6": 0.29566,
-            "7": 0.29467,
-            "8": 0.2899,
-            "9": 0.28864,
-            "10": 0.28994,
-            "11": 0.28355,
-            "12": 0.28608,
-            "13": 0.28278,
-            "14": 0.2823,
-            "15": 0.28087,
-            "16": 0.28237,
-            "17": 0.28556,
-            "18": 0.28363,
-            "19": 0.28381,
-            "20": 0.28356,
-            "21": 0.28235,
-            "22": 0.29036,
-            "23": 0.28491,
-            "24": 0.28322,
-            "25": 0.28412,
-            "26": 0.28352,
-            "27": 0.28643,
-            "28": 0.2853,
-            "29": 0.28809,
-            "30": 0.28258,
-            "31": 0.28114,
-            "32": 0.281,
-            "33": 0.28135,
-            "34": 0.27914,
-            "35": 0.28099,
-            "36": 0.28267,
-            "37": 0.28236,
-            "38": 0.28102,
-            "39": 0.31493,
-            "40": 0.28173,
-            "41": 0.28058,
-            "42": 0.28033,
-            "43": 0.28335,
-            "44": 0.28253,
-            "45": 0.28169,
-            "46": 0.28078,
-            "47": 0.28082,
-            "48": 0.2819,
-            "49": 0.28087,
-            "50": 0.28
+            "1": 35.82214,
+            "2": 0.4147,
+            "3": 0.32319,
+            "4": 0.30032,
+            "5": 0.30017,
+            "6": 0.29443,
+            "7": 0.29684,
+            "8": 0.29654,
+            "9": 0.29369,
+            "10": 0.29144,
+            "11": 0.29461,
+            "12": 0.29494,
+            "13": 0.2989,
+            "14": 0.30075,
+            "15": 0.30668,
+            "16": 0.29656,
+            "17": 0.29426,
+            "18": 0.29342,
+            "19": 0.29461,
+            "20": 0.29689,
+            "21": 0.29944,
+            "22": 0.29592,
+            "23": 0.29544,
+            "24": 0.29391,
+            "25": 0.29356,
+            "26": 0.29086,
+            "27": 0.29138,
+            "28": 0.29613,
+            "29": 0.29464,
+            "30": 0.29623,
+            "31": 0.29357,
+            "32": 0.2941,
+            "33": 0.29995,
+            "34": 0.29721,
+            "35": 0.29459,
+            "36": 0.29391,
+            "37": 0.29408,
+            "38": 0.29673,
+            "39": 0.2977,
+            "40": 0.29439,
+            "41": 0.29458,
+            "42": 0.29561,
+            "43": 0.29392,
+            "44": 0.3078,
+            "45": 0.29321,
+            "46": 0.28828,
+            "47": 0.28745,
+            "48": 0.30287,
+            "49": 0.28551,
+            "50": 0.28747
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last/golden_values_dev_dgx_gb200.json
new file mode 100644
index 00000000000..cef90be5674
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last/golden_values_dev_dgx_gb200.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.86244,
+            "2": 10.88582,
+            "3": 10.84731,
+            "4": 10.85576,
+            "5": 10.86004,
+            "6": 10.87726,
+            "7": 10.86557,
+            "8": 10.84915,
+            "9": 10.86608,
+            "10": 10.82477,
+            "11": 10.85617,
+            "12": 10.85377,
+            "13": 10.86788,
+            "14": 10.87113,
+            "15": 10.82238,
+            "16": 10.79992,
+            "17": 10.77432,
+            "18": 10.78346,
+            "19": 10.79308,
+            "20": 10.68227,
+            "21": 10.64715,
+            "22": 10.50914,
+            "23": 10.66831,
+            "24": 10.54198,
+            "25": 10.49277,
+            "26": 10.55935,
+            "27": 10.54235,
+            "28": 10.51128,
+            "29": 10.53255,
+            "30": 10.28988,
+            "31": 10.02851,
+            "32": 10.38874,
+            "33": 10.39594,
+            "34": 10.13449,
+            "35": 10.18929,
+            "36": 10.13352,
+            "37": 10.2738,
+            "38": 10.10752,
+            "39": 10.3401,
+            "40": 9.98541,
+            "41": 10.06413,
+            "42": 10.13748,
+            "43": 9.73382,
+            "44": 9.86306,
+            "45": 9.73727,
+            "46": 9.7134,
+            "47": 10.07755,
+            "48": 9.76767,
+            "49": 9.4199,
+            "50": 9.81686
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 572.0,
+            "2": 631.0,
+            "3": 629.0,
+            "4": 629.0,
+            "5": 576.0,
+            "6": 654.0,
+            "7": 633.0,
+            "8": 620.0,
+            "9": 630.0,
+            "10": 541.0,
+            "11": 632.0,
+            "12": 603.0,
+            "13": 675.0,
+            "14": 617.0,
+            "15": 651.0,
+            "16": 622.0,
+            "17": 619.0,
+            "18": 628.0,
+            "19": 641.0,
+            "20": 610.0,
+            "21": 677.0,
+            "22": 572.0,
+            "23": 703.0,
+            "24": 624.0,
+            "25": 555.0,
+            "26": 601.0,
+            "27": 666.0,
+            "28": 749.0,
+            "29": 699.0,
+            "30": 756.0,
+            "31": 582.0,
+            "32": 733.0,
+            "33": 773.0,
+            "34": 655.0,
+            "35": 710.0,
+            "36": 762.0,
+            "37": 863.0,
+            "38": 786.0,
+            "39": 846.0,
+            "40": 789.0,
+            "41": 795.0,
+            "42": 902.0,
+            "43": 758.0,
+            "44": 804.0,
+            "45": 751.0,
+            "46": 895.0,
+            "47": 815.0,
+            "48": 842.0,
+            "49": 851.0,
+            "50": 835.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 459571712.0,
+            "2": 459571712.0,
+            "3": 459571712.0,
+            "4": 459571712.0,
+            "5": 459571712.0,
+            "6": 459571712.0,
+            "7": 459571712.0,
+            "8": 459571712.0,
+            "9": 459571712.0,
+            "10": 459571712.0,
+            "11": 459571712.0,
+            "12": 459571712.0,
+            "13": 459571712.0,
+            "14": 459571712.0,
+            "15": 459571712.0,
+            "16": 459571712.0,
+            "17": 459571712.0,
+            "18": 459571712.0,
+            "19": 459571712.0,
+            "20": 459571712.0,
+            "21": 459571712.0,
+            "22": 459571712.0,
+            "23": 459571712.0,
+            "24": 459571712.0,
+            "25": 459571712.0,
+            "26": 459571712.0,
+            "27": 459571712.0,
+            "28": 459571712.0,
+            "29": 459571712.0,
+            "30": 459571712.0,
+            "31": 459571712.0,
+            "32": 459571712.0,
+            "33": 459571712.0,
+            "34": 459571712.0,
+            "35": 459571712.0,
+            "36": 459571712.0,
+            "37": 459571712.0,
+            "38": 459571712.0,
+            "39": 459571712.0,
+            "40": 459571712.0,
+            "41": 459571712.0,
+            "42": 459571712.0,
+            "43": 459571712.0,
+            "44": 459571712.0,
+            "45": 459571712.0,
+            "46": 459571712.0,
+            "47": 459571712.0,
+            "48": 459571712.0,
+            "49": 459571712.0,
+            "50": 459571712.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 708781568.0,
+            "2": 885183488.0,
+            "3": 885184000.0,
+            "4": 885184000.0,
+            "5": 885184000.0,
+            "6": 885184000.0,
+            "7": 885184000.0,
+            "8": 885184000.0,
+            "9": 885184000.0,
+            "10": 885184000.0,
+            "11": 885184000.0,
+            "12": 885184000.0,
+            "13": 885184000.0,
+            "14": 885184000.0,
+            "15": 885184000.0,
+            "16": 885184000.0,
+            "17": 885184000.0,
+            "18": 885184000.0,
+            "19": 885184000.0,
+            "20": 885184000.0,
+            "21": 885184000.0,
+            "22": 885184000.0,
+            "23": 885184000.0,
+            "24": 885184000.0,
+            "25": 885184000.0,
+            "26": 886232576.0,
+            "27": 886232576.0,
+            "28": 886232576.0,
+            "29": 886232576.0,
+            "30": 886232576.0,
+            "31": 886232576.0,
+            "32": 886232576.0,
+            "33": 886232576.0,
+            "34": 886232576.0,
+            "35": 886232576.0,
+            "36": 886232576.0,
+            "37": 886232576.0,
+            "38": 886232576.0,
+            "39": 886232576.0,
+            "40": 886232576.0,
+            "41": 886232576.0,
+            "42": 886232576.0,
+            "43": 886232576.0,
+            "44": 886232576.0,
+            "45": 886232576.0,
+            "46": 886232576.0,
+            "47": 886232576.0,
+            "48": 886232576.0,
+            "49": 886232576.0,
+            "50": 886232576.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 14.90548,
+            "2": 0.59116,
+            "3": 0.51351,
+            "4": 0.5889,
+            "5": 0.44588,
+            "6": 0.48318,
+            "7": 0.40946,
+            "8": 0.41291,
+            "9": 0.4711,
+            "10": 0.46604,
+            "11": 0.41089,
+            "12": 0.48863,
+            "13": 0.50268,
+            "14": 0.46761,
+            "15": 0.4075,
+            "16": 0.43179,
+            "17": 0.40649,
+            "18": 0.46497,
+            "19": 0.40807,
+            "20": 0.40657,
+            "21": 0.4151,
+            "22": 0.47269,
+            "23": 0.61429,
+            "24": 0.46129,
+            "25": 0.40977,
+            "26": 0.40692,
+            "27": 0.40603,
+            "28": 0.77632,
+            "29": 0.40782,
+            "30": 0.40901,
+            "31": 0.40545,
+            "32": 0.47343,
+            "33": 0.40648,
+            "34": 0.40452,
+            "35": 0.40862,
+            "36": 0.40878,
+            "37": 0.40927,
+            "38": 0.4062,
+            "39": 0.40929,
+            "40": 0.40755,
+            "41": 0.4034,
+            "42": 0.40739,
+            "43": 0.5793,
+            "44": 0.42611,
+            "45": 0.46136,
+            "46": 0.40554,
+            "47": 0.45264,
+            "48": 0.45209,
+            "49": 0.40299,
+            "50": 0.40119
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last/golden_values_dev_dgx_h100.json
index 524007ed7d6..c0f918ce574 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last/golden_values_dev_dgx_h100.json
@@ -4,56 +4,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 10.86535,
-            "2": 10.85873,
+            "1": 10.86539,
+            "2": 10.85871,
             "3": 10.86283,
-            "4": 10.84007,
-            "5": 10.87854,
+            "4": 10.84012,
+            "5": 10.87855,
             "6": 10.88853,
             "7": 10.86532,
             "8": 10.8602,
-            "9": 10.85991,
+            "9": 10.85989,
             "10": 10.82981,
-            "11": 10.8895,
+            "11": 10.88943,
             "12": 10.87507,
-            "13": 10.87426,
-            "14": 10.89678,
+            "13": 10.87423,
+            "14": 10.89674,
             "15": 10.82054,
-            "16": 10.825,
-            "17": 10.7898,
+            "16": 10.82502,
+            "17": 10.78984,
             "18": 10.8103,
-            "19": 10.80536,
-            "20": 10.70398,
-            "21": 10.66992,
+            "19": 10.80531,
+            "20": 10.70393,
+            "21": 10.66989,
             "22": 10.50644,
-            "23": 10.69005,
-            "24": 10.5631,
-            "25": 10.49418,
-            "26": 10.56626,
-            "27": 10.58028,
+            "23": 10.69001,
+            "24": 10.56313,
+            "25": 10.49417,
+            "26": 10.56631,
+            "27": 10.58022,
             "28": 10.51572,
-            "29": 10.55298,
-            "30": 10.30549,
-            "31": 10.02244,
-            "32": 10.40615,
-            "33": 10.3988,
-            "34": 10.13773,
-            "35": 10.20188,
-            "36": 10.1605,
-            "37": 10.28974,
-            "38": 10.11477,
-            "39": 10.36102,
-            "40": 10.01902,
-            "41": 10.07292,
-            "42": 10.14694,
-            "43": 9.74685,
-            "44": 9.87766,
-            "45": 9.74965,
+            "29": 10.55301,
+            "30": 10.3055,
+            "31": 10.02252,
+            "32": 10.40617,
+            "33": 10.39877,
+            "34": 10.13772,
+            "35": 10.20187,
+            "36": 10.16046,
+            "37": 10.28973,
+            "38": 10.11479,
+            "39": 10.36106,
+            "40": 10.01901,
+            "41": 10.07289,
+            "42": 10.14697,
+            "43": 9.7469,
+            "44": 9.87759,
+            "45": 9.74964,
             "46": 9.73384,
-            "47": 10.07535,
-            "48": 9.7807,
-            "49": 9.44783,
-            "50": 9.83991
+            "47": 10.07538,
+            "48": 9.78069,
+            "49": 9.44785,
+            "50": 9.83992
         }
     },
     "num-zeros": {
@@ -61,56 +61,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 647.0,
-            "2": 614.0,
-            "3": 640.0,
-            "4": 603.0,
-            "5": 600.0,
-            "6": 683.0,
-            "7": 630.0,
-            "8": 565.0,
-            "9": 671.0,
-            "10": 531.0,
-            "11": 670.0,
-            "12": 643.0,
-            "13": 626.0,
-            "14": 635.0,
-            "15": 655.0,
-            "16": 643.0,
-            "17": 693.0,
-            "18": 634.0,
-            "19": 648.0,
-            "20": 644.0,
-            "21": 690.0,
-            "22": 606.0,
-            "23": 694.0,
-            "24": 565.0,
-            "25": 605.0,
-            "26": 636.0,
-            "27": 638.0,
-            "28": 721.0,
-            "29": 750.0,
-            "30": 760.0,
-            "31": 572.0,
-            "32": 705.0,
-            "33": 816.0,
+            "1": 606.0,
+            "2": 636.0,
+            "3": 628.0,
+            "4": 620.0,
+            "5": 632.0,
+            "6": 688.0,
+            "7": 638.0,
+            "8": 601.0,
+            "9": 637.0,
+            "10": 557.0,
+            "11": 644.0,
+            "12": 665.0,
+            "13": 708.0,
+            "14": 658.0,
+            "15": 666.0,
+            "16": 635.0,
+            "17": 712.0,
+            "18": 614.0,
+            "19": 652.0,
+            "20": 627.0,
+            "21": 674.0,
+            "22": 610.0,
+            "23": 760.0,
+            "24": 590.0,
+            "25": 611.0,
+            "26": 637.0,
+            "27": 660.0,
+            "28": 752.0,
+            "29": 735.0,
+            "30": 659.0,
+            "31": 603.0,
+            "32": 791.0,
+            "33": 800.0,
             "34": 737.0,
-            "35": 720.0,
-            "36": 710.0,
-            "37": 862.0,
-            "38": 763.0,
-            "39": 909.0,
-            "40": 795.0,
-            "41": 776.0,
-            "42": 858.0,
-            "43": 771.0,
-            "44": 858.0,
-            "45": 857.0,
-            "46": 864.0,
-            "47": 880.0,
-            "48": 923.0,
-            "49": 899.0,
-            "50": 868.0
+            "35": 738.0,
+            "36": 732.0,
+            "37": 858.0,
+            "38": 799.0,
+            "39": 870.0,
+            "40": 821.0,
+            "41": 788.0,
+            "42": 865.0,
+            "43": 704.0,
+            "44": 761.0,
+            "45": 822.0,
+            "46": 846.0,
+            "47": 871.0,
+            "48": 883.0,
+            "49": 883.0,
+            "50": 857.0
         }
     },
     "mem-allocated-bytes": {
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 510689792.0,
-            "2": 510689792.0,
-            "3": 510689792.0,
-            "4": 510689792.0,
-            "5": 510689792.0,
-            "6": 510689792.0,
-            "7": 510689792.0,
-            "8": 510689792.0,
-            "9": 510689792.0,
-            "10": 510689792.0,
-            "11": 510689792.0,
-            "12": 510689792.0,
-            "13": 510689792.0,
-            "14": 510689792.0,
-            "15": 510689792.0,
-            "16": 510689792.0,
-            "17": 510689792.0,
-            "18": 510689792.0,
-            "19": 510689792.0,
-            "20": 510689792.0,
-            "21": 510689792.0,
-            "22": 510689792.0,
-            "23": 510689792.0,
-            "24": 510689792.0,
-            "25": 510689792.0,
-            "26": 510689792.0,
-            "27": 510689792.0,
-            "28": 510689792.0,
-            "29": 510689792.0,
-            "30": 510689792.0,
-            "31": 510689792.0,
-            "32": 510689792.0,
-            "33": 510689792.0,
-            "34": 510689792.0,
-            "35": 510689792.0,
-            "36": 510689792.0,
-            "37": 510689792.0,
-            "38": 510689792.0,
-            "39": 510689792.0,
-            "40": 510689792.0,
-            "41": 510689792.0,
-            "42": 510689792.0,
-            "43": 510689792.0,
-            "44": 510689792.0,
-            "45": 510689792.0,
-            "46": 510689792.0,
-            "47": 510689792.0,
-            "48": 510689792.0,
-            "49": 510689792.0,
-            "50": 510689792.0
+            "1": 512786944.0,
+            "2": 512786944.0,
+            "3": 512786944.0,
+            "4": 512786944.0,
+            "5": 512786944.0,
+            "6": 512786944.0,
+            "7": 512786944.0,
+            "8": 512786944.0,
+            "9": 512786944.0,
+            "10": 512786944.0,
+            "11": 512786944.0,
+            "12": 512786944.0,
+            "13": 512786944.0,
+            "14": 512786944.0,
+            "15": 512786944.0,
+            "16": 512786944.0,
+            "17": 512786944.0,
+            "18": 512786944.0,
+            "19": 512786944.0,
+            "20": 512786944.0,
+            "21": 512786944.0,
+            "22": 512786944.0,
+            "23": 512786944.0,
+            "24": 512786944.0,
+            "25": 512786944.0,
+            "26": 512786944.0,
+            "27": 512786944.0,
+            "28": 512786944.0,
+            "29": 512786944.0,
+            "30": 512786944.0,
+            "31": 512786944.0,
+            "32": 512786944.0,
+            "33": 512786944.0,
+            "34": 512786944.0,
+            "35": 512786944.0,
+            "36": 512786944.0,
+            "37": 512786944.0,
+            "38": 512786944.0,
+            "39": 512786944.0,
+            "40": 512786944.0,
+            "41": 512786944.0,
+            "42": 512786944.0,
+            "43": 512786944.0,
+            "44": 512786944.0,
+            "45": 512786944.0,
+            "46": 512786944.0,
+            "47": 512786944.0,
+            "48": 512786944.0,
+            "49": 512786944.0,
+            "50": 512786944.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 757801472.0,
-            "2": 933156352.0,
-            "3": 933156352.0,
-            "4": 933156352.0,
-            "5": 933156352.0,
-            "6": 933156352.0,
-            "7": 933156352.0,
-            "8": 933156352.0,
-            "9": 933156352.0,
-            "10": 933156352.0,
-            "11": 933156352.0,
-            "12": 933156352.0,
-            "13": 933156352.0,
-            "14": 933156352.0,
-            "15": 933156352.0,
-            "16": 933156352.0,
-            "17": 933156352.0,
-            "18": 933156352.0,
-            "19": 933156352.0,
-            "20": 933156352.0,
-            "21": 933156352.0,
-            "22": 933156352.0,
-            "23": 933156352.0,
-            "24": 933156352.0,
-            "25": 933156352.0,
-            "26": 933156352.0,
-            "27": 933156352.0,
-            "28": 933156352.0,
-            "29": 933156352.0,
-            "30": 933156352.0,
-            "31": 933156352.0,
-            "32": 933156352.0,
-            "33": 933156352.0,
-            "34": 933156352.0,
-            "35": 933156352.0,
-            "36": 933156352.0,
-            "37": 933156352.0,
-            "38": 933156352.0,
-            "39": 933156352.0,
-            "40": 933156352.0,
-            "41": 933156352.0,
-            "42": 933156352.0,
-            "43": 933156352.0,
-            "44": 933156352.0,
-            "45": 933156352.0,
-            "46": 933156352.0,
-            "47": 933156352.0,
-            "48": 933156352.0,
-            "49": 933156352.0,
-            "50": 933156352.0
+            "1": 758851072.0,
+            "2": 936302080.0,
+            "3": 936302080.0,
+            "4": 936302080.0,
+            "5": 936302080.0,
+            "6": 936302080.0,
+            "7": 937349632.0,
+            "8": 937349632.0,
+            "9": 937349632.0,
+            "10": 937349632.0,
+            "11": 937349632.0,
+            "12": 937350656.0,
+            "13": 937350656.0,
+            "14": 937350656.0,
+            "15": 937350656.0,
+            "16": 937350656.0,
+            "17": 937350656.0,
+            "18": 937350656.0,
+            "19": 937350656.0,
+            "20": 937350656.0,
+            "21": 937350656.0,
+            "22": 937350656.0,
+            "23": 937350656.0,
+            "24": 937350656.0,
+            "25": 937350656.0,
+            "26": 937350656.0,
+            "27": 937350656.0,
+            "28": 937350656.0,
+            "29": 937350656.0,
+            "30": 937350656.0,
+            "31": 937350656.0,
+            "32": 937350656.0,
+            "33": 937350656.0,
+            "34": 937350656.0,
+            "35": 937350656.0,
+            "36": 937350656.0,
+            "37": 937350656.0,
+            "38": 937350656.0,
+            "39": 937350656.0,
+            "40": 937350656.0,
+            "41": 937350656.0,
+            "42": 937350656.0,
+            "43": 937350656.0,
+            "44": 937350656.0,
+            "45": 937350656.0,
+            "46": 937350656.0,
+            "47": 937350656.0,
+            "48": 937350656.0,
+            "49": 937350656.0,
+            "50": 937350656.0
         }
     },
     "iteration-time": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 17.58309,
-            "2": 0.34736,
-            "3": 0.32683,
-            "4": 0.3279,
-            "5": 0.32934,
-            "6": 0.33179,
-            "7": 0.3281,
-            "8": 0.3324,
-            "9": 0.32989,
-            "10": 0.32742,
-            "11": 0.33009,
-            "12": 0.3345,
-            "13": 0.33455,
-            "14": 0.3346,
-            "15": 0.33747,
-            "16": 0.33625,
-            "17": 0.3454,
-            "18": 0.33586,
-            "19": 0.33227,
-            "20": 0.33242,
-            "21": 0.33093,
-            "22": 0.33378,
-            "23": 0.33439,
-            "24": 0.33159,
-            "25": 0.32826,
-            "26": 0.33259,
-            "27": 0.33154,
-            "28": 0.32855,
-            "29": 0.32973,
-            "30": 0.33267,
-            "31": 0.33156,
-            "32": 0.32832,
-            "33": 0.33304,
-            "34": 0.32817,
-            "35": 0.32993,
-            "36": 0.33154,
-            "37": 0.32842,
-            "38": 0.32508,
-            "39": 0.33067,
-            "40": 0.33115,
-            "41": 0.32719,
-            "42": 0.33205,
-            "43": 0.3472,
-            "44": 0.33564,
-            "45": 0.33202,
-            "46": 0.33051,
-            "47": 0.32871,
-            "48": 0.33055,
-            "49": 0.33399,
-            "50": 0.33114
+            "1": 36.51522,
+            "2": 0.33765,
+            "3": 0.3066,
+            "4": 0.28763,
+            "5": 0.29777,
+            "6": 0.28582,
+            "7": 0.28832,
+            "8": 0.2868,
+            "9": 0.28478,
+            "10": 0.28471,
+            "11": 0.2819,
+            "12": 0.28335,
+            "13": 0.2836,
+            "14": 0.28168,
+            "15": 0.28103,
+            "16": 0.28016,
+            "17": 0.28046,
+            "18": 0.27976,
+            "19": 0.28362,
+            "20": 0.28005,
+            "21": 0.32339,
+            "22": 0.32249,
+            "23": 0.28055,
+            "24": 0.28159,
+            "25": 0.27999,
+            "26": 0.28072,
+            "27": 0.28355,
+            "28": 0.28084,
+            "29": 0.28109,
+            "30": 0.28649,
+            "31": 0.28181,
+            "32": 0.28256,
+            "33": 0.28162,
+            "34": 0.2786,
+            "35": 0.27925,
+            "36": 0.2774,
+            "37": 0.27817,
+            "38": 0.28082,
+            "39": 0.27778,
+            "40": 0.27826,
+            "41": 0.27788,
+            "42": 0.27618,
+            "43": 0.28026,
+            "44": 0.27755,
+            "45": 0.27871,
+            "46": 0.27725,
+            "47": 0.27974,
+            "48": 0.29559,
+            "49": 0.28231,
+            "50": 0.28057
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last/golden_values_lts_dgx_a100.json
index e60c6b8950b..227d76695c3 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last/golden_values_lts_dgx_a100.json
@@ -1 +1,287 @@
-{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.93292, "5": 10.92968, "10": 10.90471, "15": 10.87119, "20": 10.74996, "25": 10.53752, "30": 10.32551, "35": 10.22893, "40": 10.01972, "45": 9.75543, "50": 9.8407}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 585.0, "5": 676.0, "10": 542.0, "15": 625.0, "20": 553.0, "25": 595.0, "30": 748.0, "35": 665.0, "40": 762.0, "45": 757.0, "50": 856.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 432177152.0, "5": 432177152.0, "10": 432177152.0, "15": 432177152.0, "20": 432177152.0, "25": 432177152.0, "30": 432177152.0, "35": 432177152.0, "40": 432177152.0, "45": 432177152.0, "50": 432177152.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 677334528.0, "5": 856228864.0, "10": 856228864.0, "15": 856228864.0, "20": 856228864.0, "25": 856228864.0, "30": 856228864.0, "35": 856228864.0, "40": 856228864.0, "45": 856228864.0, "50": 856228864.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 22.20877, "5": 0.40055, "10": 0.40235, "15": 0.40045, "20": 0.39406, "25": 0.39764, "30": 0.39555, "35": 0.39211, "40": 0.38588, "45": 0.38484, "50": 0.38002}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.93295,
+            "2": 10.93424,
+            "3": 10.91344,
+            "4": 10.90324,
+            "5": 10.92971,
+            "6": 10.93653,
+            "7": 10.90278,
+            "8": 10.92115,
+            "9": 10.90706,
+            "10": 10.90471,
+            "11": 10.88787,
+            "12": 10.91736,
+            "13": 10.91188,
+            "14": 10.91505,
+            "15": 10.87122,
+            "16": 10.86124,
+            "17": 10.82702,
+            "18": 10.85679,
+            "19": 10.84058,
+            "20": 10.75,
+            "21": 10.71507,
+            "22": 10.58119,
+            "23": 10.72644,
+            "24": 10.60726,
+            "25": 10.53754,
+            "26": 10.61067,
+            "27": 10.59932,
+            "28": 10.54957,
+            "29": 10.566,
+            "30": 10.3255,
+            "31": 10.067,
+            "32": 10.43808,
+            "33": 10.4236,
+            "34": 10.16018,
+            "35": 10.2289,
+            "36": 10.17613,
+            "37": 10.29237,
+            "38": 10.13293,
+            "39": 10.34957,
+            "40": 10.01976,
+            "41": 10.07533,
+            "42": 10.15411,
+            "43": 9.7609,
+            "44": 9.88358,
+            "45": 9.75546,
+            "46": 9.74964,
+            "47": 10.07547,
+            "48": 9.77936,
+            "49": 9.43821,
+            "50": 9.84068
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 605.0,
+            "2": 625.0,
+            "3": 652.0,
+            "4": 624.0,
+            "5": 663.0,
+            "6": 613.0,
+            "7": 650.0,
+            "8": 610.0,
+            "9": 675.0,
+            "10": 560.0,
+            "11": 630.0,
+            "12": 603.0,
+            "13": 667.0,
+            "14": 652.0,
+            "15": 625.0,
+            "16": 621.0,
+            "17": 588.0,
+            "18": 591.0,
+            "19": 599.0,
+            "20": 599.0,
+            "21": 617.0,
+            "22": 566.0,
+            "23": 696.0,
+            "24": 619.0,
+            "25": 539.0,
+            "26": 564.0,
+            "27": 645.0,
+            "28": 745.0,
+            "29": 738.0,
+            "30": 668.0,
+            "31": 596.0,
+            "32": 698.0,
+            "33": 722.0,
+            "34": 651.0,
+            "35": 705.0,
+            "36": 710.0,
+            "37": 783.0,
+            "38": 773.0,
+            "39": 913.0,
+            "40": 772.0,
+            "41": 813.0,
+            "42": 799.0,
+            "43": 683.0,
+            "44": 769.0,
+            "45": 784.0,
+            "46": 820.0,
+            "47": 874.0,
+            "48": 885.0,
+            "49": 814.0,
+            "50": 840.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 431522304.0,
+            "2": 431522304.0,
+            "3": 431522304.0,
+            "4": 431522304.0,
+            "5": 431522304.0,
+            "6": 431522304.0,
+            "7": 431522304.0,
+            "8": 431522304.0,
+            "9": 431522304.0,
+            "10": 431522304.0,
+            "11": 431522304.0,
+            "12": 431522304.0,
+            "13": 431522304.0,
+            "14": 431522304.0,
+            "15": 431522304.0,
+            "16": 431522304.0,
+            "17": 431522304.0,
+            "18": 431522304.0,
+            "19": 431522304.0,
+            "20": 431522304.0,
+            "21": 431522304.0,
+            "22": 431522304.0,
+            "23": 431522304.0,
+            "24": 431522304.0,
+            "25": 431522304.0,
+            "26": 431522304.0,
+            "27": 431522304.0,
+            "28": 431522304.0,
+            "29": 431522304.0,
+            "30": 431522304.0,
+            "31": 431522304.0,
+            "32": 431522304.0,
+            "33": 431522304.0,
+            "34": 431522304.0,
+            "35": 431522304.0,
+            "36": 431522304.0,
+            "37": 431522304.0,
+            "38": 431522304.0,
+            "39": 431522304.0,
+            "40": 431522304.0,
+            "41": 431522304.0,
+            "42": 431522304.0,
+            "43": 431522304.0,
+            "44": 431522304.0,
+            "45": 431522304.0,
+            "46": 431522304.0,
+            "47": 431522304.0,
+            "48": 431522304.0,
+            "49": 431522304.0,
+            "50": 431522304.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 676274688.0,
+            "2": 861328896.0,
+            "3": 861328896.0,
+            "4": 861328896.0,
+            "5": 861328896.0,
+            "6": 861328896.0,
+            "7": 861328896.0,
+            "8": 861328896.0,
+            "9": 861328896.0,
+            "10": 861328896.0,
+            "11": 861328896.0,
+            "12": 861328896.0,
+            "13": 861328896.0,
+            "14": 861328896.0,
+            "15": 861328896.0,
+            "16": 861328896.0,
+            "17": 861328896.0,
+            "18": 861328896.0,
+            "19": 861328896.0,
+            "20": 861328896.0,
+            "21": 861328896.0,
+            "22": 861328896.0,
+            "23": 861328896.0,
+            "24": 861328896.0,
+            "25": 861328896.0,
+            "26": 861328896.0,
+            "27": 861328896.0,
+            "28": 861328896.0,
+            "29": 861328896.0,
+            "30": 861328896.0,
+            "31": 861328896.0,
+            "32": 861328896.0,
+            "33": 861328896.0,
+            "34": 861328896.0,
+            "35": 861328896.0,
+            "36": 861328896.0,
+            "37": 861328896.0,
+            "38": 861328896.0,
+            "39": 861328896.0,
+            "40": 861328896.0,
+            "41": 861328896.0,
+            "42": 861328896.0,
+            "43": 861328896.0,
+            "44": 861328896.0,
+            "45": 861328896.0,
+            "46": 861328896.0,
+            "47": 861328896.0,
+            "48": 861328896.0,
+            "49": 861328896.0,
+            "50": 861328896.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 17.98339,
+            "2": 0.51543,
+            "3": 0.43144,
+            "4": 0.41368,
+            "5": 0.41459,
+            "6": 0.42035,
+            "7": 0.41166,
+            "8": 0.41088,
+            "9": 0.40219,
+            "10": 0.39929,
+            "11": 0.40001,
+            "12": 0.40539,
+            "13": 0.40407,
+            "14": 0.40122,
+            "15": 0.40141,
+            "16": 0.39925,
+            "17": 0.4019,
+            "18": 0.40627,
+            "19": 0.40221,
+            "20": 0.40001,
+            "21": 0.40901,
+            "22": 0.40318,
+            "23": 0.40162,
+            "24": 0.40025,
+            "25": 0.405,
+            "26": 0.40173,
+            "27": 0.40154,
+            "28": 0.40124,
+            "29": 0.39975,
+            "30": 0.39939,
+            "31": 0.39959,
+            "32": 0.40081,
+            "33": 0.40069,
+            "34": 0.40167,
+            "35": 0.40089,
+            "36": 0.4008,
+            "37": 0.40204,
+            "38": 0.39997,
+            "39": 0.40129,
+            "40": 0.40009,
+            "41": 0.40125,
+            "42": 0.40029,
+            "43": 0.4015,
+            "44": 0.40069,
+            "45": 0.40137,
+            "46": 0.40258,
+            "47": 0.40025,
+            "48": 0.39925,
+            "49": 0.39977,
+            "50": 0.39869
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_nondeterministic/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_nondeterministic/golden_values_dev_dgx_gb200.json
new file mode 100644
index 00000000000..4b1a17aa98b
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_nondeterministic/golden_values_dev_dgx_gb200.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.86244,
+            "2": 10.88582,
+            "3": 10.84732,
+            "4": 10.85571,
+            "5": 10.86004,
+            "6": 10.87729,
+            "7": 10.8656,
+            "8": 10.84913,
+            "9": 10.86607,
+            "10": 10.82475,
+            "11": 10.85616,
+            "12": 10.85374,
+            "13": 10.86787,
+            "14": 10.87114,
+            "15": 10.82231,
+            "16": 10.79992,
+            "17": 10.77434,
+            "18": 10.7835,
+            "19": 10.79308,
+            "20": 10.68228,
+            "21": 10.64713,
+            "22": 10.50916,
+            "23": 10.66826,
+            "24": 10.54197,
+            "25": 10.49279,
+            "26": 10.55934,
+            "27": 10.54238,
+            "28": 10.51131,
+            "29": 10.53257,
+            "30": 10.28989,
+            "31": 10.0285,
+            "32": 10.38879,
+            "33": 10.39594,
+            "34": 10.13454,
+            "35": 10.18927,
+            "36": 10.13356,
+            "37": 10.27378,
+            "38": 10.10748,
+            "39": 10.34013,
+            "40": 9.98543,
+            "41": 10.06417,
+            "42": 10.1375,
+            "43": 9.73384,
+            "44": 9.86307,
+            "45": 9.7372,
+            "46": 9.71343,
+            "47": 10.07757,
+            "48": 9.76764,
+            "49": 9.41992,
+            "50": 9.81691
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 550.0,
+            "2": 584.0,
+            "3": 581.0,
+            "4": 611.0,
+            "5": 630.0,
+            "6": 629.0,
+            "7": 619.0,
+            "8": 582.0,
+            "9": 634.0,
+            "10": 525.0,
+            "11": 701.0,
+            "12": 622.0,
+            "13": 670.0,
+            "14": 615.0,
+            "15": 638.0,
+            "16": 596.0,
+            "17": 645.0,
+            "18": 555.0,
+            "19": 607.0,
+            "20": 560.0,
+            "21": 667.0,
+            "22": 599.0,
+            "23": 676.0,
+            "24": 660.0,
+            "25": 619.0,
+            "26": 595.0,
+            "27": 638.0,
+            "28": 707.0,
+            "29": 680.0,
+            "30": 693.0,
+            "31": 607.0,
+            "32": 698.0,
+            "33": 774.0,
+            "34": 696.0,
+            "35": 699.0,
+            "36": 674.0,
+            "37": 897.0,
+            "38": 818.0,
+            "39": 882.0,
+            "40": 873.0,
+            "41": 746.0,
+            "42": 836.0,
+            "43": 808.0,
+            "44": 829.0,
+            "45": 757.0,
+            "46": 877.0,
+            "47": 932.0,
+            "48": 892.0,
+            "49": 861.0,
+            "50": 871.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 459571712.0,
+            "2": 459571712.0,
+            "3": 459571712.0,
+            "4": 459571712.0,
+            "5": 459571712.0,
+            "6": 459571712.0,
+            "7": 459571712.0,
+            "8": 459571712.0,
+            "9": 459571712.0,
+            "10": 459571712.0,
+            "11": 459571712.0,
+            "12": 459571712.0,
+            "13": 459571712.0,
+            "14": 459571712.0,
+            "15": 459571712.0,
+            "16": 459571712.0,
+            "17": 459571712.0,
+            "18": 459571712.0,
+            "19": 459571712.0,
+            "20": 459571712.0,
+            "21": 459571712.0,
+            "22": 459571712.0,
+            "23": 459571712.0,
+            "24": 459571712.0,
+            "25": 459571712.0,
+            "26": 459571712.0,
+            "27": 459571712.0,
+            "28": 459571712.0,
+            "29": 459571712.0,
+            "30": 459571712.0,
+            "31": 459571712.0,
+            "32": 459571712.0,
+            "33": 459571712.0,
+            "34": 459571712.0,
+            "35": 459571712.0,
+            "36": 459571712.0,
+            "37": 459571712.0,
+            "38": 459571712.0,
+            "39": 459571712.0,
+            "40": 459571712.0,
+            "41": 459571712.0,
+            "42": 459571712.0,
+            "43": 459571712.0,
+            "44": 459571712.0,
+            "45": 459571712.0,
+            "46": 459571712.0,
+            "47": 459571712.0,
+            "48": 459571712.0,
+            "49": 459571712.0,
+            "50": 459571712.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 708781568.0,
+            "2": 885184000.0,
+            "3": 885184000.0,
+            "4": 885184000.0,
+            "5": 885184000.0,
+            "6": 885184000.0,
+            "7": 885184000.0,
+            "8": 885184000.0,
+            "9": 885184000.0,
+            "10": 885184000.0,
+            "11": 885184000.0,
+            "12": 885184000.0,
+            "13": 885184000.0,
+            "14": 885184000.0,
+            "15": 885184000.0,
+            "16": 885184000.0,
+            "17": 885184000.0,
+            "18": 885184000.0,
+            "19": 885184000.0,
+            "20": 885184000.0,
+            "21": 885184000.0,
+            "22": 885184000.0,
+            "23": 886232064.0,
+            "24": 886232064.0,
+            "25": 886232064.0,
+            "26": 886232064.0,
+            "27": 886232064.0,
+            "28": 886232064.0,
+            "29": 886232064.0,
+            "30": 886232064.0,
+            "31": 886232064.0,
+            "32": 886232064.0,
+            "33": 886232064.0,
+            "34": 886232064.0,
+            "35": 886232064.0,
+            "36": 886232064.0,
+            "37": 886232064.0,
+            "38": 886232064.0,
+            "39": 886232064.0,
+            "40": 886232064.0,
+            "41": 886232064.0,
+            "42": 886232064.0,
+            "43": 886232064.0,
+            "44": 886232064.0,
+            "45": 886232064.0,
+            "46": 886232064.0,
+            "47": 886232064.0,
+            "48": 886232064.0,
+            "49": 886232064.0,
+            "50": 886232064.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 13.80388,
+            "2": 0.45981,
+            "3": 0.47688,
+            "4": 0.46506,
+            "5": 0.40776,
+            "6": 0.40391,
+            "7": 0.40648,
+            "8": 0.40522,
+            "9": 0.54467,
+            "10": 0.40469,
+            "11": 0.76012,
+            "12": 0.40772,
+            "13": 0.40474,
+            "14": 0.40399,
+            "15": 0.40126,
+            "16": 0.40258,
+            "17": 0.40163,
+            "18": 0.40308,
+            "19": 0.40205,
+            "20": 0.45775,
+            "21": 0.45253,
+            "22": 0.40222,
+            "23": 0.47993,
+            "24": 0.74746,
+            "25": 0.54096,
+            "26": 0.595,
+            "27": 0.42244,
+            "28": 0.45559,
+            "29": 0.40939,
+            "30": 0.40941,
+            "31": 0.40631,
+            "32": 0.40777,
+            "33": 0.40662,
+            "34": 0.45082,
+            "35": 0.40861,
+            "36": 0.40683,
+            "37": 0.40916,
+            "38": 0.40762,
+            "39": 0.40423,
+            "40": 0.41411,
+            "41": 0.40792,
+            "42": 0.40703,
+            "43": 0.40488,
+            "44": 0.40689,
+            "45": 0.40884,
+            "46": 0.40591,
+            "47": 0.40461,
+            "48": 0.50976,
+            "49": 0.4042,
+            "50": 0.40707
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_nondeterministic/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_nondeterministic/golden_values_dev_dgx_h100.json
index fb8e93ed571..f31eb533b69 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_nondeterministic/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_nondeterministic/golden_values_dev_dgx_h100.json
@@ -4,56 +4,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 10.86535,
-            "2": 10.85873,
-            "3": 10.86285,
-            "4": 10.84011,
-            "5": 10.87856,
-            "6": 10.88852,
-            "7": 10.86536,
-            "8": 10.86016,
-            "9": 10.85989,
-            "10": 10.82982,
-            "11": 10.88947,
-            "12": 10.8751,
+            "1": 10.86539,
+            "2": 10.85871,
+            "3": 10.8628,
+            "4": 10.84012,
+            "5": 10.87852,
+            "6": 10.88851,
+            "7": 10.86537,
+            "8": 10.86019,
+            "9": 10.85987,
+            "10": 10.82981,
+            "11": 10.88948,
+            "12": 10.87505,
             "13": 10.87425,
-            "14": 10.89675,
-            "15": 10.82051,
-            "16": 10.82498,
-            "17": 10.78982,
-            "18": 10.81029,
-            "19": 10.80533,
-            "20": 10.70397,
-            "21": 10.66991,
-            "22": 10.50644,
-            "23": 10.69004,
-            "24": 10.56312,
+            "14": 10.89676,
+            "15": 10.82055,
+            "16": 10.82497,
+            "17": 10.78983,
+            "18": 10.81028,
+            "19": 10.80534,
+            "20": 10.70396,
+            "21": 10.6699,
+            "22": 10.50646,
+            "23": 10.69008,
+            "24": 10.56313,
             "25": 10.49421,
-            "26": 10.56627,
-            "27": 10.58027,
+            "26": 10.56629,
+            "27": 10.58025,
             "28": 10.51573,
-            "29": 10.553,
-            "30": 10.30549,
-            "31": 10.02248,
-            "32": 10.40616,
-            "33": 10.39874,
-            "34": 10.13771,
+            "29": 10.55296,
+            "30": 10.30548,
+            "31": 10.02246,
+            "32": 10.40617,
+            "33": 10.39878,
+            "34": 10.13774,
             "35": 10.20187,
-            "36": 10.16049,
-            "37": 10.28975,
-            "38": 10.11483,
-            "39": 10.36101,
-            "40": 10.01902,
-            "41": 10.07289,
+            "36": 10.1605,
+            "37": 10.28973,
+            "38": 10.1148,
+            "39": 10.36099,
+            "40": 10.01904,
+            "41": 10.07292,
             "42": 10.14695,
-            "43": 9.74689,
-            "44": 9.87763,
-            "45": 9.74967,
-            "46": 9.73381,
-            "47": 10.07535,
-            "48": 9.78068,
-            "49": 9.44781,
-            "50": 9.8399
+            "43": 9.74685,
+            "44": 9.8776,
+            "45": 9.74964,
+            "46": 9.73384,
+            "47": 10.07538,
+            "48": 9.7807,
+            "49": 9.44782,
+            "50": 9.83987
         }
     },
     "num-zeros": {
@@ -61,56 +61,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 625.0,
-            "2": 644.0,
-            "3": 614.0,
-            "4": 636.0,
-            "5": 605.0,
-            "6": 649.0,
-            "7": 606.0,
-            "8": 559.0,
-            "9": 658.0,
-            "10": 524.0,
-            "11": 693.0,
-            "12": 598.0,
-            "13": 702.0,
-            "14": 660.0,
-            "15": 638.0,
-            "16": 596.0,
-            "17": 662.0,
-            "18": 586.0,
-            "19": 594.0,
-            "20": 598.0,
-            "21": 656.0,
-            "22": 608.0,
-            "23": 706.0,
-            "24": 609.0,
-            "25": 610.0,
-            "26": 632.0,
-            "27": 664.0,
-            "28": 766.0,
-            "29": 765.0,
-            "30": 755.0,
-            "31": 606.0,
-            "32": 708.0,
-            "33": 775.0,
-            "34": 735.0,
-            "35": 729.0,
-            "36": 739.0,
-            "37": 840.0,
-            "38": 749.0,
-            "39": 911.0,
-            "40": 763.0,
-            "41": 830.0,
-            "42": 835.0,
-            "43": 755.0,
-            "44": 823.0,
-            "45": 799.0,
-            "46": 811.0,
-            "47": 869.0,
-            "48": 839.0,
-            "49": 897.0,
-            "50": 869.0
+            "1": 605.0,
+            "2": 642.0,
+            "3": 634.0,
+            "4": 637.0,
+            "5": 630.0,
+            "6": 692.0,
+            "7": 692.0,
+            "8": 551.0,
+            "9": 638.0,
+            "10": 549.0,
+            "11": 666.0,
+            "12": 644.0,
+            "13": 631.0,
+            "14": 639.0,
+            "15": 636.0,
+            "16": 669.0,
+            "17": 676.0,
+            "18": 635.0,
+            "19": 613.0,
+            "20": 637.0,
+            "21": 631.0,
+            "22": 588.0,
+            "23": 784.0,
+            "24": 596.0,
+            "25": 572.0,
+            "26": 619.0,
+            "27": 717.0,
+            "28": 725.0,
+            "29": 775.0,
+            "30": 722.0,
+            "31": 613.0,
+            "32": 737.0,
+            "33": 823.0,
+            "34": 699.0,
+            "35": 720.0,
+            "36": 702.0,
+            "37": 843.0,
+            "38": 826.0,
+            "39": 854.0,
+            "40": 764.0,
+            "41": 834.0,
+            "42": 820.0,
+            "43": 744.0,
+            "44": 840.0,
+            "45": 788.0,
+            "46": 798.0,
+            "47": 863.0,
+            "48": 888.0,
+            "49": 867.0,
+            "50": 814.0
         }
     },
     "mem-allocated-bytes": {
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 510689792.0,
-            "2": 510689792.0,
-            "3": 510689792.0,
-            "4": 510689792.0,
-            "5": 510689792.0,
-            "6": 510689792.0,
-            "7": 510689792.0,
-            "8": 510689792.0,
-            "9": 510689792.0,
-            "10": 510689792.0,
-            "11": 510689792.0,
-            "12": 510689792.0,
-            "13": 510689792.0,
-            "14": 510689792.0,
-            "15": 510689792.0,
-            "16": 510689792.0,
-            "17": 510689792.0,
-            "18": 510689792.0,
-            "19": 510689792.0,
-            "20": 510689792.0,
-            "21": 510689792.0,
-            "22": 510689792.0,
-            "23": 510689792.0,
-            "24": 510689792.0,
-            "25": 510689792.0,
-            "26": 510689792.0,
-            "27": 510689792.0,
-            "28": 510689792.0,
-            "29": 510689792.0,
-            "30": 510689792.0,
-            "31": 510689792.0,
-            "32": 510689792.0,
-            "33": 510689792.0,
-            "34": 510689792.0,
-            "35": 510689792.0,
-            "36": 510689792.0,
-            "37": 510689792.0,
-            "38": 510689792.0,
-            "39": 510689792.0,
-            "40": 510689792.0,
-            "41": 510689792.0,
-            "42": 510689792.0,
-            "43": 510689792.0,
-            "44": 510689792.0,
-            "45": 510689792.0,
-            "46": 510689792.0,
-            "47": 510689792.0,
-            "48": 510689792.0,
-            "49": 510689792.0,
-            "50": 510689792.0
+            "1": 512786944.0,
+            "2": 512786944.0,
+            "3": 512786944.0,
+            "4": 512786944.0,
+            "5": 512786944.0,
+            "6": 512786944.0,
+            "7": 512786944.0,
+            "8": 512786944.0,
+            "9": 512786944.0,
+            "10": 512786944.0,
+            "11": 512786944.0,
+            "12": 512786944.0,
+            "13": 512786944.0,
+            "14": 512786944.0,
+            "15": 512786944.0,
+            "16": 512786944.0,
+            "17": 512786944.0,
+            "18": 512786944.0,
+            "19": 512786944.0,
+            "20": 512786944.0,
+            "21": 512786944.0,
+            "22": 512786944.0,
+            "23": 512786944.0,
+            "24": 512786944.0,
+            "25": 512786944.0,
+            "26": 512786944.0,
+            "27": 512786944.0,
+            "28": 512786944.0,
+            "29": 512786944.0,
+            "30": 512786944.0,
+            "31": 512786944.0,
+            "32": 512786944.0,
+            "33": 512786944.0,
+            "34": 512786944.0,
+            "35": 512786944.0,
+            "36": 512786944.0,
+            "37": 512786944.0,
+            "38": 512786944.0,
+            "39": 512786944.0,
+            "40": 512786944.0,
+            "41": 512786944.0,
+            "42": 512786944.0,
+            "43": 512786944.0,
+            "44": 512786944.0,
+            "45": 512786944.0,
+            "46": 512786944.0,
+            "47": 512786944.0,
+            "48": 512786944.0,
+            "49": 512786944.0,
+            "50": 512786944.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 759898624.0,
-            "2": 933156352.0,
-            "3": 933156352.0,
-            "4": 934202368.0,
-            "5": 934202368.0,
-            "6": 934202368.0,
-            "7": 934202368.0,
-            "8": 934202368.0,
-            "9": 934202368.0,
-            "10": 934202368.0,
-            "11": 934202368.0,
-            "12": 934202368.0,
-            "13": 934202368.0,
-            "14": 934202368.0,
-            "15": 934202368.0,
-            "16": 934202368.0,
-            "17": 934202368.0,
-            "18": 934202368.0,
-            "19": 934202368.0,
-            "20": 934202368.0,
-            "21": 934202368.0,
-            "22": 934202368.0,
-            "23": 934202368.0,
-            "24": 934202368.0,
-            "25": 934202368.0,
-            "26": 934202368.0,
-            "27": 934202368.0,
-            "28": 934202368.0,
-            "29": 934202368.0,
-            "30": 934202368.0,
-            "31": 934202368.0,
-            "32": 934202368.0,
-            "33": 934202368.0,
-            "34": 934202368.0,
-            "35": 934202368.0,
-            "36": 934202368.0,
-            "37": 934202368.0,
-            "38": 934202368.0,
-            "39": 934202368.0,
-            "40": 934202368.0,
-            "41": 934202368.0,
-            "42": 934202368.0,
-            "43": 934202368.0,
-            "44": 934202368.0,
-            "45": 934202368.0,
-            "46": 934202368.0,
-            "47": 934202368.0,
-            "48": 934202368.0,
-            "49": 934202368.0,
-            "50": 934202368.0
+            "1": 758851072.0,
+            "2": 937350144.0,
+            "3": 937350656.0,
+            "4": 937350656.0,
+            "5": 937350656.0,
+            "6": 937350656.0,
+            "7": 937350656.0,
+            "8": 937350656.0,
+            "9": 937350656.0,
+            "10": 937350656.0,
+            "11": 937350656.0,
+            "12": 937350656.0,
+            "13": 937350656.0,
+            "14": 937350656.0,
+            "15": 937350656.0,
+            "16": 937350656.0,
+            "17": 937350656.0,
+            "18": 937350656.0,
+            "19": 937350656.0,
+            "20": 937350656.0,
+            "21": 937350656.0,
+            "22": 937350656.0,
+            "23": 937350656.0,
+            "24": 937350656.0,
+            "25": 937350656.0,
+            "26": 937350656.0,
+            "27": 937350656.0,
+            "28": 937350656.0,
+            "29": 937350656.0,
+            "30": 937350656.0,
+            "31": 937350656.0,
+            "32": 937350656.0,
+            "33": 937350656.0,
+            "34": 937350656.0,
+            "35": 937350656.0,
+            "36": 937350656.0,
+            "37": 937350656.0,
+            "38": 937350656.0,
+            "39": 937350656.0,
+            "40": 937350656.0,
+            "41": 937350656.0,
+            "42": 937350656.0,
+            "43": 937350656.0,
+            "44": 937350656.0,
+            "45": 937350656.0,
+            "46": 937350656.0,
+            "47": 937350656.0,
+            "48": 937350656.0,
+            "49": 937350656.0,
+            "50": 937350656.0
         }
     },
     "iteration-time": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 15.91359,
-            "2": 0.40136,
-            "3": 0.32913,
-            "4": 0.33946,
-            "5": 0.32404,
-            "6": 0.31963,
-            "7": 0.32283,
-            "8": 0.32302,
-            "9": 0.32004,
-            "10": 0.32058,
-            "11": 0.33128,
-            "12": 0.32725,
-            "13": 0.3253,
-            "14": 0.32532,
-            "15": 0.32194,
-            "16": 0.32237,
-            "17": 0.31946,
-            "18": 0.31937,
-            "19": 0.3185,
-            "20": 0.3193,
-            "21": 0.32216,
-            "22": 0.328,
-            "23": 0.32251,
-            "24": 0.32294,
-            "25": 0.32205,
-            "26": 0.32393,
-            "27": 0.32132,
-            "28": 0.32221,
-            "29": 0.32269,
-            "30": 0.32422,
-            "31": 0.32527,
-            "32": 0.32866,
-            "33": 0.32346,
-            "34": 0.32064,
-            "35": 0.3199,
-            "36": 0.32198,
-            "37": 0.32252,
-            "38": 0.32103,
-            "39": 0.32486,
-            "40": 0.32573,
-            "41": 0.32643,
-            "42": 0.3234,
-            "43": 0.32778,
-            "44": 0.32302,
-            "45": 0.32434,
-            "46": 0.32532,
-            "47": 0.32115,
-            "48": 0.31979,
-            "49": 0.3233,
-            "50": 0.31776
+            "1": 33.51618,
+            "2": 0.36883,
+            "3": 0.30428,
+            "4": 0.28577,
+            "5": 0.28543,
+            "6": 0.28865,
+            "7": 0.32712,
+            "8": 0.32792,
+            "9": 0.28343,
+            "10": 0.28485,
+            "11": 0.28657,
+            "12": 0.28232,
+            "13": 0.28318,
+            "14": 0.28116,
+            "15": 0.28207,
+            "16": 0.28249,
+            "17": 0.2834,
+            "18": 0.28247,
+            "19": 0.28307,
+            "20": 0.28306,
+            "21": 0.28204,
+            "22": 0.28265,
+            "23": 0.28005,
+            "24": 0.2819,
+            "25": 0.2815,
+            "26": 0.28084,
+            "27": 0.28108,
+            "28": 0.28074,
+            "29": 0.28159,
+            "30": 0.28148,
+            "31": 0.28071,
+            "32": 0.27992,
+            "33": 0.28304,
+            "34": 0.28251,
+            "35": 0.28377,
+            "36": 0.28373,
+            "37": 0.28263,
+            "38": 0.28146,
+            "39": 0.28084,
+            "40": 0.28168,
+            "41": 0.28075,
+            "42": 0.27996,
+            "43": 0.2815,
+            "44": 0.28058,
+            "45": 0.2814,
+            "46": 0.28356,
+            "47": 0.28026,
+            "48": 0.28452,
+            "49": 0.28225,
+            "50": 0.28075
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_nondeterministic/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_nondeterministic/golden_values_lts_dgx_a100.json
index 215ddcea45c..421e66150ce 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_nondeterministic/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_nondeterministic/golden_values_lts_dgx_a100.json
@@ -1 +1,287 @@
-{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.93292, "5": 10.92965, "10": 10.90473, "15": 10.87127, "20": 10.74997, "25": 10.53754, "30": 10.32548, "35": 10.22895, "40": 10.01975, "45": 9.75546, "50": 9.84069}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 585.0, "5": 675.0, "10": 544.0, "15": 619.0, "20": 579.0, "25": 620.0, "30": 678.0, "35": 717.0, "40": 813.0, "45": 746.0, "50": 841.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 432177152.0, "5": 432177152.0, "10": 432177152.0, "15": 432177152.0, "20": 432177152.0, "25": 432177152.0, "30": 432177152.0, "35": 432177152.0, "40": 432177152.0, "45": 432177152.0, "50": 432177152.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 676283904.0, "5": 856228864.0, "10": 857276928.0, "15": 857276928.0, "20": 857276928.0, "25": 857276928.0, "30": 857276928.0, "35": 857276928.0, "40": 857276928.0, "45": 857276928.0, "50": 857276928.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 12.34002, "5": 0.40276, "10": 0.39665, "15": 0.39344, "20": 0.39157, "25": 0.3871, "30": 0.38802, "35": 0.39196, "40": 0.38964, "45": 0.39313, "50": 0.39241}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.93295,
+            "2": 10.93424,
+            "3": 10.91347,
+            "4": 10.90322,
+            "5": 10.92969,
+            "6": 10.93655,
+            "7": 10.90279,
+            "8": 10.92115,
+            "9": 10.90706,
+            "10": 10.90476,
+            "11": 10.88788,
+            "12": 10.91742,
+            "13": 10.91192,
+            "14": 10.91504,
+            "15": 10.87121,
+            "16": 10.86129,
+            "17": 10.82702,
+            "18": 10.85676,
+            "19": 10.84055,
+            "20": 10.75002,
+            "21": 10.71507,
+            "22": 10.58115,
+            "23": 10.72645,
+            "24": 10.60725,
+            "25": 10.53755,
+            "26": 10.61068,
+            "27": 10.59926,
+            "28": 10.54954,
+            "29": 10.56605,
+            "30": 10.3255,
+            "31": 10.06696,
+            "32": 10.43809,
+            "33": 10.42362,
+            "34": 10.16017,
+            "35": 10.22893,
+            "36": 10.17616,
+            "37": 10.29235,
+            "38": 10.13293,
+            "39": 10.34957,
+            "40": 10.01975,
+            "41": 10.07533,
+            "42": 10.15406,
+            "43": 9.76091,
+            "44": 9.88358,
+            "45": 9.75547,
+            "46": 9.74961,
+            "47": 10.07549,
+            "48": 9.77934,
+            "49": 9.43812,
+            "50": 9.8407
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 604.0,
+            "2": 606.0,
+            "3": 671.0,
+            "4": 620.0,
+            "5": 670.0,
+            "6": 594.0,
+            "7": 646.0,
+            "8": 558.0,
+            "9": 627.0,
+            "10": 591.0,
+            "11": 682.0,
+            "12": 595.0,
+            "13": 692.0,
+            "14": 633.0,
+            "15": 636.0,
+            "16": 670.0,
+            "17": 644.0,
+            "18": 570.0,
+            "19": 591.0,
+            "20": 570.0,
+            "21": 643.0,
+            "22": 577.0,
+            "23": 657.0,
+            "24": 572.0,
+            "25": 611.0,
+            "26": 637.0,
+            "27": 649.0,
+            "28": 730.0,
+            "29": 737.0,
+            "30": 685.0,
+            "31": 548.0,
+            "32": 689.0,
+            "33": 735.0,
+            "34": 665.0,
+            "35": 700.0,
+            "36": 701.0,
+            "37": 855.0,
+            "38": 786.0,
+            "39": 794.0,
+            "40": 808.0,
+            "41": 844.0,
+            "42": 835.0,
+            "43": 678.0,
+            "44": 750.0,
+            "45": 771.0,
+            "46": 831.0,
+            "47": 920.0,
+            "48": 892.0,
+            "49": 824.0,
+            "50": 795.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 431522304.0,
+            "2": 431522304.0,
+            "3": 431522304.0,
+            "4": 431522304.0,
+            "5": 431522304.0,
+            "6": 431522304.0,
+            "7": 431522304.0,
+            "8": 431522304.0,
+            "9": 431522304.0,
+            "10": 431522304.0,
+            "11": 431522304.0,
+            "12": 431522304.0,
+            "13": 431522304.0,
+            "14": 431522304.0,
+            "15": 431522304.0,
+            "16": 431522304.0,
+            "17": 431522304.0,
+            "18": 431522304.0,
+            "19": 431522304.0,
+            "20": 431522304.0,
+            "21": 431522304.0,
+            "22": 431522304.0,
+            "23": 431522304.0,
+            "24": 431522304.0,
+            "25": 431522304.0,
+            "26": 431522304.0,
+            "27": 431522304.0,
+            "28": 431522304.0,
+            "29": 431522304.0,
+            "30": 431522304.0,
+            "31": 431522304.0,
+            "32": 431522304.0,
+            "33": 431522304.0,
+            "34": 431522304.0,
+            "35": 431522304.0,
+            "36": 431522304.0,
+            "37": 431522304.0,
+            "38": 431522304.0,
+            "39": 431522304.0,
+            "40": 431522304.0,
+            "41": 431522304.0,
+            "42": 431522304.0,
+            "43": 431522304.0,
+            "44": 431522304.0,
+            "45": 431522304.0,
+            "46": 431522304.0,
+            "47": 431522304.0,
+            "48": 431522304.0,
+            "49": 431522304.0,
+            "50": 431522304.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 678369280.0,
+            "2": 861326336.0,
+            "3": 861326336.0,
+            "4": 861326336.0,
+            "5": 861326848.0,
+            "6": 861328896.0,
+            "7": 861328896.0,
+            "8": 861328896.0,
+            "9": 861328896.0,
+            "10": 861328896.0,
+            "11": 861328896.0,
+            "12": 861328896.0,
+            "13": 861328896.0,
+            "14": 861328896.0,
+            "15": 861328896.0,
+            "16": 861328896.0,
+            "17": 861328896.0,
+            "18": 861328896.0,
+            "19": 861328896.0,
+            "20": 861328896.0,
+            "21": 861328896.0,
+            "22": 861328896.0,
+            "23": 861328896.0,
+            "24": 861328896.0,
+            "25": 861328896.0,
+            "26": 861328896.0,
+            "27": 861328896.0,
+            "28": 861328896.0,
+            "29": 861328896.0,
+            "30": 861328896.0,
+            "31": 861328896.0,
+            "32": 861328896.0,
+            "33": 861328896.0,
+            "34": 861328896.0,
+            "35": 861328896.0,
+            "36": 861328896.0,
+            "37": 861328896.0,
+            "38": 861328896.0,
+            "39": 861328896.0,
+            "40": 861328896.0,
+            "41": 861328896.0,
+            "42": 861328896.0,
+            "43": 861328896.0,
+            "44": 861328896.0,
+            "45": 861328896.0,
+            "46": 861328896.0,
+            "47": 861328896.0,
+            "48": 861328896.0,
+            "49": 861328896.0,
+            "50": 861328896.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 16.47831,
+            "2": 0.44885,
+            "3": 0.42205,
+            "4": 0.40517,
+            "5": 0.40824,
+            "6": 0.40476,
+            "7": 0.40726,
+            "8": 0.40671,
+            "9": 0.40354,
+            "10": 0.41027,
+            "11": 0.44095,
+            "12": 0.43962,
+            "13": 0.44029,
+            "14": 0.44506,
+            "15": 0.43995,
+            "16": 0.44228,
+            "17": 0.44479,
+            "18": 0.43969,
+            "19": 0.43999,
+            "20": 0.43737,
+            "21": 0.44549,
+            "22": 0.44572,
+            "23": 0.44259,
+            "24": 0.44105,
+            "25": 0.44312,
+            "26": 0.44437,
+            "27": 0.44718,
+            "28": 0.44344,
+            "29": 0.44315,
+            "30": 0.43332,
+            "31": 0.4392,
+            "32": 0.43861,
+            "33": 0.40986,
+            "34": 0.40961,
+            "35": 0.40907,
+            "36": 0.41022,
+            "37": 0.41003,
+            "38": 0.41205,
+            "39": 0.41822,
+            "40": 0.41387,
+            "41": 0.4147,
+            "42": 0.41362,
+            "43": 0.4135,
+            "44": 0.41365,
+            "45": 0.41109,
+            "46": 0.41218,
+            "47": 0.41209,
+            "48": 0.41473,
+            "49": 0.41335,
+            "50": 0.41197
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion/golden_values_dev_dgx_gb200.json
new file mode 100644
index 00000000000..529bad10ded
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion/golden_values_dev_dgx_gb200.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.86836,
+            "2": 10.88595,
+            "3": 10.8656,
+            "4": 10.86891,
+            "5": 10.87418,
+            "6": 10.89058,
+            "7": 10.87677,
+            "8": 10.86475,
+            "9": 10.88236,
+            "10": 10.84579,
+            "11": 10.87162,
+            "12": 10.87422,
+            "13": 10.88161,
+            "14": 10.88886,
+            "15": 10.83932,
+            "16": 10.82496,
+            "17": 10.80144,
+            "18": 10.81234,
+            "19": 10.82153,
+            "20": 10.71934,
+            "21": 10.69091,
+            "22": 10.57427,
+            "23": 10.71091,
+            "24": 10.59783,
+            "25": 10.55561,
+            "26": 10.61523,
+            "27": 10.60449,
+            "28": 10.56482,
+            "29": 10.58475,
+            "30": 10.3595,
+            "31": 10.12152,
+            "32": 10.45239,
+            "33": 10.45725,
+            "34": 10.21986,
+            "35": 10.26447,
+            "36": 10.21035,
+            "37": 10.33955,
+            "38": 10.18013,
+            "39": 10.39593,
+            "40": 10.06628,
+            "41": 10.14163,
+            "42": 10.2085,
+            "43": 9.83126,
+            "44": 9.9486,
+            "45": 9.82846,
+            "46": 9.80461,
+            "47": 10.14231,
+            "48": 9.84461,
+            "49": 9.52191,
+            "50": 9.88605
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1554.0,
+            "2": 1619.0,
+            "3": 1663.0,
+            "4": 1672.0,
+            "5": 1771.0,
+            "6": 1739.0,
+            "7": 1866.0,
+            "8": 1590.0,
+            "9": 1819.0,
+            "10": 1394.0,
+            "11": 1786.0,
+            "12": 1643.0,
+            "13": 1829.0,
+            "14": 1672.0,
+            "15": 1827.0,
+            "16": 1771.0,
+            "17": 1797.0,
+            "18": 1632.0,
+            "19": 1667.0,
+            "20": 1670.0,
+            "21": 1843.0,
+            "22": 1620.0,
+            "23": 1889.0,
+            "24": 1513.0,
+            "25": 1473.0,
+            "26": 1619.0,
+            "27": 1768.0,
+            "28": 1976.0,
+            "29": 1898.0,
+            "30": 1858.0,
+            "31": 1565.0,
+            "32": 1890.0,
+            "33": 2166.0,
+            "34": 1679.0,
+            "35": 1825.0,
+            "36": 1909.0,
+            "37": 2341.0,
+            "38": 2029.0,
+            "39": 2294.0,
+            "40": 2015.0,
+            "41": 2181.0,
+            "42": 2211.0,
+            "43": 1907.0,
+            "44": 2140.0,
+            "45": 1936.0,
+            "46": 2341.0,
+            "47": 2472.0,
+            "48": 2272.0,
+            "49": 2234.0,
+            "50": 2457.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 465601024.0,
+            "2": 465601024.0,
+            "3": 465601024.0,
+            "4": 465601024.0,
+            "5": 465601024.0,
+            "6": 465601024.0,
+            "7": 465601024.0,
+            "8": 465601024.0,
+            "9": 465601024.0,
+            "10": 465601024.0,
+            "11": 465601024.0,
+            "12": 465601024.0,
+            "13": 465601024.0,
+            "14": 465601024.0,
+            "15": 465601024.0,
+            "16": 465601024.0,
+            "17": 465601024.0,
+            "18": 465601024.0,
+            "19": 465601024.0,
+            "20": 465601024.0,
+            "21": 465601024.0,
+            "22": 465601024.0,
+            "23": 465601024.0,
+            "24": 465601024.0,
+            "25": 465601024.0,
+            "26": 465601024.0,
+            "27": 465601024.0,
+            "28": 465601024.0,
+            "29": 465601024.0,
+            "30": 465601024.0,
+            "31": 465601024.0,
+            "32": 465601024.0,
+            "33": 465601024.0,
+            "34": 465601024.0,
+            "35": 465601024.0,
+            "36": 465601024.0,
+            "37": 465601024.0,
+            "38": 465601024.0,
+            "39": 465601024.0,
+            "40": 465601024.0,
+            "41": 465601024.0,
+            "42": 465601024.0,
+            "43": 465601024.0,
+            "44": 465601024.0,
+            "45": 465601024.0,
+            "46": 465601024.0,
+            "47": 465601024.0,
+            "48": 465601024.0,
+            "49": 465601024.0,
+            "50": 465601024.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1728999424.0,
+            "2": 1789405696.0,
+            "3": 1789405696.0,
+            "4": 1789405696.0,
+            "5": 1789405696.0,
+            "6": 1789405696.0,
+            "7": 1789405696.0,
+            "8": 1789405696.0,
+            "9": 1789405696.0,
+            "10": 1789405696.0,
+            "11": 1789405696.0,
+            "12": 1789405696.0,
+            "13": 1789405696.0,
+            "14": 1789405696.0,
+            "15": 1789405696.0,
+            "16": 1789405696.0,
+            "17": 1789405696.0,
+            "18": 1789405696.0,
+            "19": 1789405696.0,
+            "20": 1789405696.0,
+            "21": 1789405696.0,
+            "22": 1789405696.0,
+            "23": 1789405696.0,
+            "24": 1789405696.0,
+            "25": 1789405696.0,
+            "26": 1789405696.0,
+            "27": 1789405696.0,
+            "28": 1789405696.0,
+            "29": 1789405696.0,
+            "30": 1789405696.0,
+            "31": 1789405696.0,
+            "32": 1789405696.0,
+            "33": 1789405696.0,
+            "34": 1789405696.0,
+            "35": 1789405696.0,
+            "36": 1789405696.0,
+            "37": 1789405696.0,
+            "38": 1789405696.0,
+            "39": 1789405696.0,
+            "40": 1789405696.0,
+            "41": 1789405696.0,
+            "42": 1789405696.0,
+            "43": 1789405696.0,
+            "44": 1789405696.0,
+            "45": 1789405696.0,
+            "46": 1789405696.0,
+            "47": 1789405696.0,
+            "48": 1789405696.0,
+            "49": 1789405696.0,
+            "50": 1789405696.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 11.87728,
+            "2": 0.19403,
+            "3": 0.17442,
+            "4": 0.16292,
+            "5": 0.28152,
+            "6": 0.1602,
+            "7": 0.20711,
+            "8": 0.4188,
+            "9": 0.1573,
+            "10": 0.25278,
+            "11": 0.44625,
+            "12": 0.23028,
+            "13": 0.16929,
+            "14": 0.15589,
+            "15": 0.24336,
+            "16": 0.19322,
+            "17": 0.19037,
+            "18": 0.15335,
+            "19": 0.25153,
+            "20": 0.20655,
+            "21": 0.15398,
+            "22": 0.15177,
+            "23": 0.25777,
+            "24": 0.15477,
+            "25": 0.15108,
+            "26": 0.25255,
+            "27": 0.23256,
+            "28": 0.16156,
+            "29": 0.33982,
+            "30": 0.15402,
+            "31": 0.15482,
+            "32": 0.15494,
+            "33": 0.15494,
+            "34": 0.15448,
+            "35": 0.15383,
+            "36": 0.15383,
+            "37": 0.15343,
+            "38": 0.15387,
+            "39": 0.15805,
+            "40": 0.15334,
+            "41": 0.24971,
+            "42": 0.15713,
+            "43": 0.22532,
+            "44": 0.15365,
+            "45": 0.41087,
+            "46": 0.15392,
+            "47": 0.15221,
+            "48": 0.23644,
+            "49": 0.1534,
+            "50": 0.15283
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion/golden_values_dev_dgx_h100.json
index 379b1c16f29..7dd87fe6932 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion/golden_values_dev_dgx_h100.json
@@ -6,53 +6,53 @@
         "values": {
             "1": 10.85949,
             "2": 10.85553,
-            "3": 10.86548,
-            "4": 10.84554,
-            "5": 10.88344,
-            "6": 10.89429,
-            "7": 10.87068,
+            "3": 10.8655,
+            "4": 10.84551,
+            "5": 10.88343,
+            "6": 10.8943,
+            "7": 10.87063,
             "8": 10.86983,
-            "9": 10.86919,
-            "10": 10.83883,
+            "9": 10.86921,
+            "10": 10.83884,
             "11": 10.89435,
-            "12": 10.8798,
-            "13": 10.87987,
-            "14": 10.90317,
+            "12": 10.87978,
+            "13": 10.87984,
+            "14": 10.90319,
             "15": 10.8405,
-            "16": 10.83786,
-            "17": 10.80668,
-            "18": 10.83025,
-            "19": 10.82262,
+            "16": 10.83787,
+            "17": 10.80669,
+            "18": 10.83026,
+            "19": 10.82265,
             "20": 10.73192,
-            "21": 10.7075,
-            "22": 10.56005,
-            "23": 10.72406,
-            "24": 10.61116,
-            "25": 10.5481,
-            "26": 10.61334,
-            "27": 10.6305,
-            "28": 10.56645,
-            "29": 10.59672,
-            "30": 10.37136,
-            "31": 10.11721,
-            "32": 10.46127,
+            "21": 10.70754,
+            "22": 10.56006,
+            "23": 10.72404,
+            "24": 10.61114,
+            "25": 10.54813,
+            "26": 10.61328,
+            "27": 10.63051,
+            "28": 10.56643,
+            "29": 10.59671,
+            "30": 10.37137,
+            "31": 10.11718,
+            "32": 10.4613,
             "33": 10.45247,
             "34": 10.21687,
-            "35": 10.27171,
-            "36": 10.2312,
+            "35": 10.27176,
+            "36": 10.23121,
             "37": 10.34809,
-            "38": 10.18842,
+            "38": 10.1884,
             "39": 10.41042,
             "40": 10.09426,
-            "41": 10.14711,
-            "42": 10.21247,
-            "43": 9.84106,
-            "44": 9.95919,
-            "45": 9.84082,
-            "46": 9.82482,
-            "47": 10.13882,
-            "48": 9.85839,
-            "49": 9.5472,
+            "41": 10.14707,
+            "42": 10.21244,
+            "43": 9.84107,
+            "44": 9.95916,
+            "45": 9.84087,
+            "46": 9.82484,
+            "47": 10.1388,
+            "48": 9.85842,
+            "49": 9.54724,
             "50": 9.90883
         }
     },
@@ -61,56 +61,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 1690.0,
+            "1": 1660.0,
             "2": 1776.0,
-            "3": 1642.0,
-            "4": 1825.0,
-            "5": 1809.0,
-            "6": 1795.0,
-            "7": 1830.0,
-            "8": 1626.0,
-            "9": 1878.0,
-            "10": 1423.0,
-            "11": 1868.0,
-            "12": 1653.0,
-            "13": 1897.0,
-            "14": 1783.0,
-            "15": 1861.0,
-            "16": 1938.0,
-            "17": 1825.0,
-            "18": 1730.0,
-            "19": 1727.0,
-            "20": 1735.0,
-            "21": 1783.0,
-            "22": 1576.0,
-            "23": 1949.0,
-            "24": 1630.0,
-            "25": 1498.0,
-            "26": 1649.0,
-            "27": 1809.0,
-            "28": 2019.0,
-            "29": 2009.0,
-            "30": 1832.0,
-            "31": 1524.0,
-            "32": 1943.0,
-            "33": 2081.0,
-            "34": 1888.0,
-            "35": 1935.0,
-            "36": 1898.0,
-            "37": 2325.0,
-            "38": 2070.0,
-            "39": 2248.0,
-            "40": 2199.0,
-            "41": 2264.0,
-            "42": 2349.0,
-            "43": 2087.0,
-            "44": 2107.0,
-            "45": 2098.0,
-            "46": 2407.0,
-            "47": 2456.0,
-            "48": 2404.0,
-            "49": 2417.0,
-            "50": 2407.0
+            "3": 1685.0,
+            "4": 1830.0,
+            "5": 1876.0,
+            "6": 1881.0,
+            "7": 1773.0,
+            "8": 1628.0,
+            "9": 1868.0,
+            "10": 1353.0,
+            "11": 1926.0,
+            "12": 1737.0,
+            "13": 1848.0,
+            "14": 1643.0,
+            "15": 1917.0,
+            "16": 1839.0,
+            "17": 1856.0,
+            "18": 1706.0,
+            "19": 1744.0,
+            "20": 1662.0,
+            "21": 1877.0,
+            "22": 1569.0,
+            "23": 2062.0,
+            "24": 1569.0,
+            "25": 1560.0,
+            "26": 1701.0,
+            "27": 1772.0,
+            "28": 1894.0,
+            "29": 2094.0,
+            "30": 1838.0,
+            "31": 1538.0,
+            "32": 1980.0,
+            "33": 2060.0,
+            "34": 1919.0,
+            "35": 1885.0,
+            "36": 1906.0,
+            "37": 2286.0,
+            "38": 2045.0,
+            "39": 2285.0,
+            "40": 2096.0,
+            "41": 2265.0,
+            "42": 2248.0,
+            "43": 2040.0,
+            "44": 2114.0,
+            "45": 2134.0,
+            "46": 2443.0,
+            "47": 2479.0,
+            "48": 2455.0,
+            "49": 2402.0,
+            "50": 2416.0
         }
     },
     "mem-allocated-bytes": {
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 516194816.0,
-            "2": 516194816.0,
-            "3": 516194816.0,
-            "4": 516194816.0,
-            "5": 516194816.0,
-            "6": 516194816.0,
-            "7": 516194816.0,
-            "8": 516194816.0,
-            "9": 516194816.0,
-            "10": 516194816.0,
-            "11": 516194816.0,
-            "12": 516194816.0,
-            "13": 516194816.0,
-            "14": 516194816.0,
-            "15": 516194816.0,
-            "16": 516194816.0,
-            "17": 516194816.0,
-            "18": 516194816.0,
-            "19": 516194816.0,
-            "20": 516194816.0,
-            "21": 516194816.0,
-            "22": 516194816.0,
-            "23": 516194816.0,
-            "24": 516194816.0,
-            "25": 516194816.0,
-            "26": 516194816.0,
-            "27": 516194816.0,
-            "28": 516194816.0,
-            "29": 516194816.0,
-            "30": 516194816.0,
-            "31": 516194816.0,
-            "32": 516194816.0,
-            "33": 516194816.0,
-            "34": 516194816.0,
-            "35": 516194816.0,
-            "36": 516194816.0,
-            "37": 516194816.0,
-            "38": 516194816.0,
-            "39": 516194816.0,
-            "40": 516194816.0,
-            "41": 516194816.0,
-            "42": 516194816.0,
-            "43": 516194816.0,
-            "44": 516194816.0,
-            "45": 516194816.0,
-            "46": 516194816.0,
-            "47": 516194816.0,
-            "48": 516194816.0,
-            "49": 516194816.0,
-            "50": 516194816.0
+            "1": 514359808.0,
+            "2": 514359808.0,
+            "3": 514359808.0,
+            "4": 514359808.0,
+            "5": 514359808.0,
+            "6": 514359808.0,
+            "7": 514359808.0,
+            "8": 514359808.0,
+            "9": 514359808.0,
+            "10": 514359808.0,
+            "11": 514359808.0,
+            "12": 514359808.0,
+            "13": 514359808.0,
+            "14": 514359808.0,
+            "15": 514359808.0,
+            "16": 514359808.0,
+            "17": 514359808.0,
+            "18": 514359808.0,
+            "19": 514359808.0,
+            "20": 514359808.0,
+            "21": 514359808.0,
+            "22": 514359808.0,
+            "23": 514359808.0,
+            "24": 514359808.0,
+            "25": 514359808.0,
+            "26": 514359808.0,
+            "27": 514359808.0,
+            "28": 514359808.0,
+            "29": 514359808.0,
+            "30": 514359808.0,
+            "31": 514359808.0,
+            "32": 514359808.0,
+            "33": 514359808.0,
+            "34": 514359808.0,
+            "35": 514359808.0,
+            "36": 514359808.0,
+            "37": 514359808.0,
+            "38": 514359808.0,
+            "39": 514359808.0,
+            "40": 514359808.0,
+            "41": 514359808.0,
+            "42": 514359808.0,
+            "43": 514359808.0,
+            "44": 514359808.0,
+            "45": 514359808.0,
+            "46": 514359808.0,
+            "47": 514359808.0,
+            "48": 514359808.0,
+            "49": 514359808.0,
+            "50": 514359808.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 1670130688.0,
-            "2": 1840523776.0,
-            "3": 1840523776.0,
-            "4": 1840523776.0,
-            "5": 1840523776.0,
-            "6": 1840523776.0,
-            "7": 1840523776.0,
-            "8": 1840523776.0,
-            "9": 1840523776.0,
-            "10": 1840523776.0,
-            "11": 1840523776.0,
-            "12": 1840523776.0,
-            "13": 1840523776.0,
-            "14": 1840523776.0,
-            "15": 1840523776.0,
-            "16": 1840523776.0,
-            "17": 1840523776.0,
-            "18": 1840523776.0,
-            "19": 1840523776.0,
-            "20": 1840523776.0,
-            "21": 1840523776.0,
-            "22": 1840523776.0,
-            "23": 1840523776.0,
-            "24": 1840523776.0,
-            "25": 1840523776.0,
-            "26": 1840523776.0,
-            "27": 1840523776.0,
-            "28": 1840523776.0,
-            "29": 1840523776.0,
-            "30": 1840523776.0,
-            "31": 1840523776.0,
-            "32": 1840523776.0,
-            "33": 1840523776.0,
-            "34": 1840523776.0,
-            "35": 1840523776.0,
-            "36": 1840523776.0,
-            "37": 1840523776.0,
-            "38": 1840523776.0,
-            "39": 1840523776.0,
-            "40": 1840523776.0,
-            "41": 1840523776.0,
-            "42": 1840523776.0,
-            "43": 1840523776.0,
-            "44": 1840523776.0,
-            "45": 1840523776.0,
-            "46": 1840523776.0,
-            "47": 1840523776.0,
-            "48": 1840523776.0,
-            "49": 1840523776.0,
-            "50": 1840523776.0
+            "1": 1670148096.0,
+            "2": 1837640192.0,
+            "3": 1837640192.0,
+            "4": 1837640192.0,
+            "5": 1837640192.0,
+            "6": 1837640192.0,
+            "7": 1837640192.0,
+            "8": 1837640192.0,
+            "9": 1837640192.0,
+            "10": 1837640192.0,
+            "11": 1837640192.0,
+            "12": 1837640192.0,
+            "13": 1837640192.0,
+            "14": 1837640192.0,
+            "15": 1837640192.0,
+            "16": 1837640192.0,
+            "17": 1837640192.0,
+            "18": 1837640192.0,
+            "19": 1837640192.0,
+            "20": 1837640192.0,
+            "21": 1837640192.0,
+            "22": 1837640192.0,
+            "23": 1837640192.0,
+            "24": 1837640192.0,
+            "25": 1837640192.0,
+            "26": 1837640192.0,
+            "27": 1837640192.0,
+            "28": 1837640192.0,
+            "29": 1837640192.0,
+            "30": 1837640192.0,
+            "31": 1837640192.0,
+            "32": 1837640192.0,
+            "33": 1837640192.0,
+            "34": 1837640192.0,
+            "35": 1837640192.0,
+            "36": 1837640192.0,
+            "37": 1837640192.0,
+            "38": 1837640192.0,
+            "39": 1837640192.0,
+            "40": 1837640192.0,
+            "41": 1837640192.0,
+            "42": 1837640192.0,
+            "43": 1837640192.0,
+            "44": 1837640192.0,
+            "45": 1837640192.0,
+            "46": 1837640192.0,
+            "47": 1837640192.0,
+            "48": 1837640192.0,
+            "49": 1837640192.0,
+            "50": 1837640192.0
         }
     },
     "iteration-time": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 15.2683,
-            "2": 0.15358,
-            "3": 0.13619,
-            "4": 0.13976,
-            "5": 0.13713,
-            "6": 0.13753,
-            "7": 0.13575,
-            "8": 0.13485,
-            "9": 0.13779,
-            "10": 0.13697,
-            "11": 0.14178,
-            "12": 0.1397,
-            "13": 0.13744,
-            "14": 0.14039,
-            "15": 0.13739,
-            "16": 0.1361,
-            "17": 0.13816,
-            "18": 0.13722,
-            "19": 0.15342,
-            "20": 0.14613,
-            "21": 0.14806,
-            "22": 0.14423,
-            "23": 0.14791,
-            "24": 0.14345,
-            "25": 0.14474,
-            "26": 0.14564,
-            "27": 0.14168,
-            "28": 0.14148,
-            "29": 0.13863,
-            "30": 0.13751,
-            "31": 0.14015,
-            "32": 0.13821,
-            "33": 0.14038,
-            "34": 0.13859,
-            "35": 0.14531,
-            "36": 0.14468,
-            "37": 0.13783,
-            "38": 0.13787,
-            "39": 0.13879,
-            "40": 0.14072,
-            "41": 0.14065,
-            "42": 0.13865,
-            "43": 0.13953,
-            "44": 0.13882,
-            "45": 0.13622,
-            "46": 0.14034,
-            "47": 0.13659,
-            "48": 0.14369,
-            "49": 0.13987,
-            "50": 0.13803
+            "1": 9.56969,
+            "2": 0.15621,
+            "3": 0.13591,
+            "4": 0.11846,
+            "5": 0.11755,
+            "6": 0.1173,
+            "7": 0.11302,
+            "8": 0.11176,
+            "9": 0.11094,
+            "10": 0.11205,
+            "11": 0.11214,
+            "12": 0.11069,
+            "13": 0.11128,
+            "14": 0.11089,
+            "15": 0.11218,
+            "16": 0.11119,
+            "17": 0.11088,
+            "18": 0.11035,
+            "19": 0.11159,
+            "20": 0.11079,
+            "21": 0.11182,
+            "22": 0.11081,
+            "23": 0.11148,
+            "24": 0.1122,
+            "25": 0.11117,
+            "26": 0.11184,
+            "27": 0.11686,
+            "28": 0.10976,
+            "29": 0.11011,
+            "30": 0.11235,
+            "31": 0.11032,
+            "32": 0.11316,
+            "33": 0.11177,
+            "34": 0.11253,
+            "35": 0.11045,
+            "36": 0.11022,
+            "37": 0.11032,
+            "38": 0.11201,
+            "39": 0.11511,
+            "40": 0.11021,
+            "41": 0.1116,
+            "42": 0.11045,
+            "43": 0.11205,
+            "44": 0.11101,
+            "45": 0.10943,
+            "46": 0.11006,
+            "47": 0.11008,
+            "48": 0.11033,
+            "49": 0.11205,
+            "50": 0.11073
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion/golden_values_lts_dgx_a100.json
index f0460352ce3..c0c97884af8 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion/golden_values_lts_dgx_a100.json
@@ -2,91 +2,286 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 10.92655,
-            "5": 10.92721,
-            "10": 10.90788,
-            "15": 10.88293,
-            "20": 10.77594,
-            "25": 10.59265,
-            "30": 10.39169,
-            "35": 10.29699,
-            "40": 10.09664,
-            "45": 9.84469,
-            "50": 9.90944
+            "1": 10.92228,
+            "2": 10.92833,
+            "3": 10.91713,
+            "4": 10.90497,
+            "5": 10.92809,
+            "6": 10.93672,
+            "7": 10.90401,
+            "8": 10.92229,
+            "9": 10.91253,
+            "10": 10.90846,
+            "11": 10.89336,
+            "12": 10.92081,
+            "13": 10.91489,
+            "14": 10.92148,
+            "15": 10.8843,
+            "16": 10.87455,
+            "17": 10.83919,
+            "18": 10.87311,
+            "19": 10.85334,
+            "20": 10.77493,
+            "21": 10.74758,
+            "22": 10.63148,
+            "23": 10.75623,
+            "24": 10.65569,
+            "25": 10.59216,
+            "26": 10.65326,
+            "27": 10.6488,
+            "28": 10.5966,
+            "29": 10.61012,
+            "30": 10.39285,
+            "31": 10.15722,
+            "32": 10.49215,
+            "33": 10.47941,
+            "34": 10.24018,
+            "35": 10.29713,
+            "36": 10.24563,
+            "37": 10.35285,
+            "38": 10.20535,
+            "39": 10.40419,
+            "40": 10.09552,
+            "41": 10.15278,
+            "42": 10.21882,
+            "43": 9.85529,
+            "44": 9.96247,
+            "45": 9.84617,
+            "46": 9.83801,
+            "47": 10.1389,
+            "48": 9.85697,
+            "49": 9.53751,
+            "50": 9.9088
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 1610.0,
-            "5": 1901.0,
-            "10": 1373.0,
-            "15": 1954.0,
-            "20": 1614.0,
-            "25": 1625.0,
-            "30": 1953.0,
-            "35": 1904.0,
-            "40": 2127.0,
-            "45": 2199.0,
-            "50": 2451.0
+            "1": 1683.0,
+            "2": 1789.0,
+            "3": 1705.0,
+            "4": 1788.0,
+            "5": 1911.0,
+            "6": 1820.0,
+            "7": 1935.0,
+            "8": 1724.0,
+            "9": 1964.0,
+            "10": 1499.0,
+            "11": 1906.0,
+            "12": 1864.0,
+            "13": 1941.0,
+            "14": 1882.0,
+            "15": 1914.0,
+            "16": 1816.0,
+            "17": 1814.0,
+            "18": 1735.0,
+            "19": 1765.0,
+            "20": 1633.0,
+            "21": 1858.0,
+            "22": 1702.0,
+            "23": 1957.0,
+            "24": 1663.0,
+            "25": 1580.0,
+            "26": 1773.0,
+            "27": 1964.0,
+            "28": 2058.0,
+            "29": 2109.0,
+            "30": 1904.0,
+            "31": 1580.0,
+            "32": 1928.0,
+            "33": 2226.0,
+            "34": 1919.0,
+            "35": 1920.0,
+            "36": 1980.0,
+            "37": 2309.0,
+            "38": 2303.0,
+            "39": 2437.0,
+            "40": 2238.0,
+            "41": 2326.0,
+            "42": 2254.0,
+            "43": 2060.0,
+            "44": 2146.0,
+            "45": 2102.0,
+            "46": 2345.0,
+            "47": 2550.0,
+            "48": 2499.0,
+            "49": 2276.0,
+            "50": 2574.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 438468608.0,
-            "5": 438468608.0,
-            "10": 438468608.0,
-            "15": 438468608.0,
-            "20": 438468608.0,
-            "25": 438468608.0,
-            "30": 438468608.0,
-            "35": 438468608.0,
-            "40": 438468608.0,
-            "45": 438468608.0,
-            "50": 438468608.0
+            "1": 436765184.0,
+            "2": 436765184.0,
+            "3": 436765184.0,
+            "4": 436765184.0,
+            "5": 436765184.0,
+            "6": 436765184.0,
+            "7": 436765184.0,
+            "8": 436765184.0,
+            "9": 436765184.0,
+            "10": 436765184.0,
+            "11": 436765184.0,
+            "12": 436765184.0,
+            "13": 436765184.0,
+            "14": 436765184.0,
+            "15": 436765184.0,
+            "16": 436765184.0,
+            "17": 436765184.0,
+            "18": 436765184.0,
+            "19": 436765184.0,
+            "20": 436765184.0,
+            "21": 436765184.0,
+            "22": 436765184.0,
+            "23": 436765184.0,
+            "24": 436765184.0,
+            "25": 436765184.0,
+            "26": 436765184.0,
+            "27": 436765184.0,
+            "28": 436765184.0,
+            "29": 436765184.0,
+            "30": 436765184.0,
+            "31": 436765184.0,
+            "32": 436765184.0,
+            "33": 436765184.0,
+            "34": 436765184.0,
+            "35": 436765184.0,
+            "36": 436765184.0,
+            "37": 436765184.0,
+            "38": 436765184.0,
+            "39": 436765184.0,
+            "40": 436765184.0,
+            "41": 436765184.0,
+            "42": 436765184.0,
+            "43": 436765184.0,
+            "44": 436765184.0,
+            "45": 436765184.0,
+            "46": 436765184.0,
+            "47": 436765184.0,
+            "48": 436765184.0,
+            "49": 436765184.0,
+            "50": 436765184.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 2658189824.0,
-            "5": 2658189824.0,
-            "10": 2658189824.0,
-            "15": 2658189824.0,
-            "20": 2658189824.0,
-            "25": 2658189824.0,
-            "30": 2658189824.0,
-            "35": 2658189824.0,
-            "40": 2658189824.0,
-            "45": 2658189824.0,
-            "50": 2658189824.0
+            "1": 1591768576.0,
+            "2": 1772628480.0,
+            "3": 1772628480.0,
+            "4": 1772628480.0,
+            "5": 1772628480.0,
+            "6": 1772628480.0,
+            "7": 1772628480.0,
+            "8": 1772628480.0,
+            "9": 1772628480.0,
+            "10": 1772628480.0,
+            "11": 1772628480.0,
+            "12": 1772628480.0,
+            "13": 1772628480.0,
+            "14": 1772628480.0,
+            "15": 1772628480.0,
+            "16": 1772628480.0,
+            "17": 1772628480.0,
+            "18": 1772628480.0,
+            "19": 1772628480.0,
+            "20": 1772628480.0,
+            "21": 1772628480.0,
+            "22": 1772628480.0,
+            "23": 1772628480.0,
+            "24": 1772628480.0,
+            "25": 1772628480.0,
+            "26": 1772628480.0,
+            "27": 1772628480.0,
+            "28": 1772628480.0,
+            "29": 1772628480.0,
+            "30": 1772628480.0,
+            "31": 1772628480.0,
+            "32": 1772628480.0,
+            "33": 1772628480.0,
+            "34": 1772628480.0,
+            "35": 1772628480.0,
+            "36": 1772628480.0,
+            "37": 1772628480.0,
+            "38": 1772628480.0,
+            "39": 1772628480.0,
+            "40": 1772628480.0,
+            "41": 1772628480.0,
+            "42": 1772628480.0,
+            "43": 1772628480.0,
+            "44": 1772628480.0,
+            "45": 1772628480.0,
+            "46": 1772628480.0,
+            "47": 1772628480.0,
+            "48": 1772628480.0,
+            "49": 1772628480.0,
+            "50": 1772628480.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 16.65648,
-            "5": 0.18713,
-            "10": 0.18827,
-            "15": 0.18525,
-            "20": 0.18524,
-            "25": 0.18364,
-            "30": 0.18457,
-            "35": 0.18387,
-            "40": 0.18487,
-            "45": 0.18218,
-            "50": 0.18439
+            "1": 3.95122,
+            "2": 0.20629,
+            "3": 0.19002,
+            "4": 0.17151,
+            "5": 0.16892,
+            "6": 0.16922,
+            "7": 0.16965,
+            "8": 0.17504,
+            "9": 0.17459,
+            "10": 0.17897,
+            "11": 0.17409,
+            "12": 0.1744,
+            "13": 0.17287,
+            "14": 0.17379,
+            "15": 0.17494,
+            "16": 0.17728,
+            "17": 0.17415,
+            "18": 0.17375,
+            "19": 0.17472,
+            "20": 0.17419,
+            "21": 0.17564,
+            "22": 0.17531,
+            "23": 0.17363,
+            "24": 0.17467,
+            "25": 0.17519,
+            "26": 0.17584,
+            "27": 0.17619,
+            "28": 0.17299,
+            "29": 0.17468,
+            "30": 0.17335,
+            "31": 0.17523,
+            "32": 0.17349,
+            "33": 0.17387,
+            "34": 0.17508,
+            "35": 0.1743,
+            "36": 0.17468,
+            "37": 0.17489,
+            "38": 0.17296,
+            "39": 0.17553,
+            "40": 0.1747,
+            "41": 0.17437,
+            "42": 0.17471,
+            "43": 0.17492,
+            "44": 0.17376,
+            "45": 0.17488,
+            "46": 0.17514,
+            "47": 0.17599,
+            "48": 0.17634,
+            "49": 0.17525,
+            "50": 0.17524
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mla/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mla/golden_values_dev_dgx_gb200.json
new file mode 100644
index 00000000000..dac3e5ef607
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mla/golden_values_dev_dgx_gb200.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.94863,
+            "2": 10.95748,
+            "3": 10.95458,
+            "4": 10.95314,
+            "5": 10.94301,
+            "6": 10.93709,
+            "7": 10.94818,
+            "8": 10.94698,
+            "9": 10.94866,
+            "10": 10.95119,
+            "11": 10.9406,
+            "12": 10.94105,
+            "13": 10.94375,
+            "14": 10.94739,
+            "15": 10.9429,
+            "16": 10.93682,
+            "17": 10.94182,
+            "18": 10.93022,
+            "19": 10.93614,
+            "20": 10.92135,
+            "21": 10.91434,
+            "22": 10.92114,
+            "23": 10.92039,
+            "24": 10.91062,
+            "25": 10.91171,
+            "26": 10.9101,
+            "27": 10.90559,
+            "28": 10.87901,
+            "29": 10.87862,
+            "30": 10.82431,
+            "31": 10.7917,
+            "32": 10.85763,
+            "33": 10.85278,
+            "34": 10.80465,
+            "35": 10.81124,
+            "36": 10.79299,
+            "37": 10.82161,
+            "38": 10.74654,
+            "39": 10.79066,
+            "40": 10.67639,
+            "41": 10.71189,
+            "42": 10.72663,
+            "43": 10.58635,
+            "44": 10.63487,
+            "45": 10.59555,
+            "46": 10.58202,
+            "47": 10.67878,
+            "48": 10.55683,
+            "49": 10.43321,
+            "50": 10.57623
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 22792076.0,
+            "2": 22989660.0,
+            "3": 22661158.0,
+            "4": 23283080.0,
+            "5": 22778860.0,
+            "6": 23085232.0,
+            "7": 22834892.0,
+            "8": 22990502.0,
+            "9": 22906480.0,
+            "10": 22983488.0,
+            "11": 22563552.0,
+            "12": 22523694.0,
+            "13": 22980968.0,
+            "14": 22453154.0,
+            "15": 22885546.0,
+            "16": 22895028.0,
+            "17": 22882956.0,
+            "18": 22647168.0,
+            "19": 22682056.0,
+            "20": 22757444.0,
+            "21": 22803808.0,
+            "22": 22864026.0,
+            "23": 22603204.0,
+            "24": 22835232.0,
+            "25": 22883270.0,
+            "26": 22611998.0,
+            "27": 22532132.0,
+            "28": 22516960.0,
+            "29": 22593572.0,
+            "30": 22695024.0,
+            "31": 23019244.0,
+            "32": 22648204.0,
+            "33": 22623192.0,
+            "34": 22899922.0,
+            "35": 22852560.0,
+            "36": 22652964.0,
+            "37": 22559866.0,
+            "38": 22960222.0,
+            "39": 22864432.0,
+            "40": 22721420.0,
+            "41": 22722086.0,
+            "42": 22730128.0,
+            "43": 23040178.0,
+            "44": 22809816.0,
+            "45": 22738252.0,
+            "46": 22947510.0,
+            "47": 22697018.0,
+            "48": 22992168.0,
+            "49": 22790946.0,
+            "50": 22969044.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 387483136.0,
+            "2": 387483136.0,
+            "3": 387483136.0,
+            "4": 387483136.0,
+            "5": 387483136.0,
+            "6": 387483136.0,
+            "7": 387483136.0,
+            "8": 387483136.0,
+            "9": 387483136.0,
+            "10": 387483136.0,
+            "11": 387483136.0,
+            "12": 387483136.0,
+            "13": 387483136.0,
+            "14": 387483136.0,
+            "15": 387483136.0,
+            "16": 387483136.0,
+            "17": 387483136.0,
+            "18": 387483136.0,
+            "19": 387483136.0,
+            "20": 387483136.0,
+            "21": 387483136.0,
+            "22": 387483136.0,
+            "23": 387483136.0,
+            "24": 387483136.0,
+            "25": 387483136.0,
+            "26": 387483136.0,
+            "27": 387483136.0,
+            "28": 387483136.0,
+            "29": 387483136.0,
+            "30": 387483136.0,
+            "31": 387483136.0,
+            "32": 387483136.0,
+            "33": 387483136.0,
+            "34": 387483136.0,
+            "35": 387483136.0,
+            "36": 387483136.0,
+            "37": 387483136.0,
+            "38": 387483136.0,
+            "39": 387483136.0,
+            "40": 387483136.0,
+            "41": 387483136.0,
+            "42": 387483136.0,
+            "43": 387483136.0,
+            "44": 387483136.0,
+            "45": 387483136.0,
+            "46": 387483136.0,
+            "47": 387483136.0,
+            "48": 387483136.0,
+            "49": 387483136.0,
+            "50": 387483136.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1122385408.0,
+            "2": 1245635072.0,
+            "3": 1245635072.0,
+            "4": 1245635072.0,
+            "5": 1245635072.0,
+            "6": 1245635072.0,
+            "7": 1245635072.0,
+            "8": 1245635072.0,
+            "9": 1245635072.0,
+            "10": 1245635072.0,
+            "11": 1245635072.0,
+            "12": 1245635072.0,
+            "13": 1245635072.0,
+            "14": 1245635072.0,
+            "15": 1245635072.0,
+            "16": 1245635072.0,
+            "17": 1245635072.0,
+            "18": 1245635072.0,
+            "19": 1245635072.0,
+            "20": 1245635072.0,
+            "21": 1245635072.0,
+            "22": 1245635072.0,
+            "23": 1245635072.0,
+            "24": 1245635072.0,
+            "25": 1245635072.0,
+            "26": 1245635072.0,
+            "27": 1245635072.0,
+            "28": 1245635072.0,
+            "29": 1245635072.0,
+            "30": 1245635072.0,
+            "31": 1245635072.0,
+            "32": 1245635072.0,
+            "33": 1245635072.0,
+            "34": 1245635072.0,
+            "35": 1245635072.0,
+            "36": 1245635072.0,
+            "37": 1245635072.0,
+            "38": 1245635072.0,
+            "39": 1245635072.0,
+            "40": 1245635072.0,
+            "41": 1245635072.0,
+            "42": 1245635072.0,
+            "43": 1245635072.0,
+            "44": 1245635072.0,
+            "45": 1245635072.0,
+            "46": 1245635072.0,
+            "47": 1245635072.0,
+            "48": 1245635072.0,
+            "49": 1245635072.0,
+            "50": 1245635072.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 6.33065,
+            "2": 0.20464,
+            "3": 0.17836,
+            "4": 0.16429,
+            "5": 0.1621,
+            "6": 0.16051,
+            "7": 0.15983,
+            "8": 0.16067,
+            "9": 0.15721,
+            "10": 0.16774,
+            "11": 0.16215,
+            "12": 0.21737,
+            "13": 0.16028,
+            "14": 0.16036,
+            "15": 0.15885,
+            "16": 0.22707,
+            "17": 0.16509,
+            "18": 0.1691,
+            "19": 0.16736,
+            "20": 0.23508,
+            "21": 0.16682,
+            "22": 0.16204,
+            "23": 0.16527,
+            "24": 0.1694,
+            "25": 0.16972,
+            "26": 0.17668,
+            "27": 0.15612,
+            "28": 0.22357,
+            "29": 0.15777,
+            "30": 0.16518,
+            "31": 0.17111,
+            "32": 0.17188,
+            "33": 0.16413,
+            "34": 0.16509,
+            "35": 0.16886,
+            "36": 0.16871,
+            "37": 0.17188,
+            "38": 0.16901,
+            "39": 0.1672,
+            "40": 0.22409,
+            "41": 0.16827,
+            "42": 0.16744,
+            "43": 0.1668,
+            "44": 0.16817,
+            "45": 0.16681,
+            "46": 0.17004,
+            "47": 0.1702,
+            "48": 0.17085,
+            "49": 0.17174,
+            "50": 0.16979
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mla/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mla/golden_values_dev_dgx_h100.json
index d0103111a28..bb945f7d249 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mla/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mla/golden_values_dev_dgx_h100.json
@@ -44,16 +44,16 @@
             "38": 10.72434,
             "39": 10.78066,
             "40": 10.65927,
-            "41": 10.69208,
-            "42": 10.70973,
-            "43": 10.56128,
-            "44": 10.61369,
-            "45": 10.56875,
-            "46": 10.54455,
+            "41": 10.69209,
+            "42": 10.70974,
+            "43": 10.56129,
+            "44": 10.61371,
+            "45": 10.56874,
+            "46": 10.54454,
             "47": 10.66751,
             "48": 10.53792,
-            "49": 10.40861,
-            "50": 10.55421
+            "49": 10.40859,
+            "50": 10.5542
         }
     },
     "num-zeros": {
@@ -100,17 +100,17 @@
             "37": 22560476.0,
             "38": 22960058.0,
             "39": 22865476.0,
-            "40": 22721680.0,
+            "40": 22721690.0,
             "41": 22723112.0,
-            "42": 22730726.0,
-            "43": 23039588.0,
-            "44": 22810020.0,
-            "45": 22738904.0,
-            "46": 22948334.0,
-            "47": 22696668.0,
-            "48": 22992832.0,
-            "49": 22791208.0,
-            "50": 22968272.0
+            "42": 22730692.0,
+            "43": 23039608.0,
+            "44": 22809964.0,
+            "45": 22738932.0,
+            "46": 22948360.0,
+            "47": 22696800.0,
+            "48": 22992776.0,
+            "49": 22791104.0,
+            "50": 22968342.0
         }
     },
     "mem-allocated-bytes": {
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 387744256.0,
-            "2": 387744256.0,
-            "3": 387744256.0,
-            "4": 387744256.0,
-            "5": 387744256.0,
-            "6": 387744256.0,
-            "7": 387744256.0,
-            "8": 387744256.0,
-            "9": 387744256.0,
-            "10": 387744256.0,
-            "11": 387744256.0,
-            "12": 387744256.0,
-            "13": 387744256.0,
-            "14": 387744256.0,
-            "15": 387744256.0,
-            "16": 387744256.0,
-            "17": 387744256.0,
-            "18": 387744256.0,
-            "19": 387744256.0,
-            "20": 387744256.0,
-            "21": 387744256.0,
-            "22": 387744256.0,
-            "23": 387744256.0,
-            "24": 387744256.0,
-            "25": 387744256.0,
-            "26": 387744256.0,
-            "27": 387744256.0,
-            "28": 387744256.0,
-            "29": 387744256.0,
-            "30": 387744256.0,
-            "31": 387744256.0,
-            "32": 387744256.0,
-            "33": 387744256.0,
-            "34": 387744256.0,
-            "35": 387744256.0,
-            "36": 387744256.0,
-            "37": 387744256.0,
-            "38": 387744256.0,
-            "39": 387744256.0,
-            "40": 387744256.0,
-            "41": 387744256.0,
-            "42": 387744256.0,
-            "43": 387744256.0,
-            "44": 387744256.0,
-            "45": 387744256.0,
-            "46": 387744256.0,
-            "47": 387744256.0,
-            "48": 387744256.0,
-            "49": 387744256.0,
-            "50": 387744256.0
+            "1": 387483136.0,
+            "2": 387483136.0,
+            "3": 387483136.0,
+            "4": 387483136.0,
+            "5": 387483136.0,
+            "6": 387483136.0,
+            "7": 387483136.0,
+            "8": 387483136.0,
+            "9": 387483136.0,
+            "10": 387483136.0,
+            "11": 387483136.0,
+            "12": 387483136.0,
+            "13": 387483136.0,
+            "14": 387483136.0,
+            "15": 387483136.0,
+            "16": 387483136.0,
+            "17": 387483136.0,
+            "18": 387483136.0,
+            "19": 387483136.0,
+            "20": 387483136.0,
+            "21": 387483136.0,
+            "22": 387483136.0,
+            "23": 387483136.0,
+            "24": 387483136.0,
+            "25": 387483136.0,
+            "26": 387483136.0,
+            "27": 387483136.0,
+            "28": 387483136.0,
+            "29": 387483136.0,
+            "30": 387483136.0,
+            "31": 387483136.0,
+            "32": 387483136.0,
+            "33": 387483136.0,
+            "34": 387483136.0,
+            "35": 387483136.0,
+            "36": 387483136.0,
+            "37": 387483136.0,
+            "38": 387483136.0,
+            "39": 387483136.0,
+            "40": 387483136.0,
+            "41": 387483136.0,
+            "42": 387483136.0,
+            "43": 387483136.0,
+            "44": 387483136.0,
+            "45": 387483136.0,
+            "46": 387483136.0,
+            "47": 387483136.0,
+            "48": 387483136.0,
+            "49": 387483136.0,
+            "50": 387483136.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 1122646528.0,
-            "2": 1245896192.0,
-            "3": 1245896192.0,
-            "4": 1245896192.0,
-            "5": 1245896192.0,
-            "6": 1245896192.0,
-            "7": 1245896192.0,
-            "8": 1245896192.0,
-            "9": 1245896192.0,
-            "10": 1245896192.0,
-            "11": 1245896192.0,
-            "12": 1245896192.0,
-            "13": 1245896192.0,
-            "14": 1245896192.0,
-            "15": 1245896192.0,
-            "16": 1245896192.0,
-            "17": 1245896192.0,
-            "18": 1245896192.0,
-            "19": 1245896192.0,
-            "20": 1245896192.0,
-            "21": 1245896192.0,
-            "22": 1245896192.0,
-            "23": 1245896192.0,
-            "24": 1245896192.0,
-            "25": 1245896192.0,
-            "26": 1245896192.0,
-            "27": 1245896192.0,
-            "28": 1245896192.0,
-            "29": 1245896192.0,
-            "30": 1245896192.0,
-            "31": 1245896192.0,
-            "32": 1245896192.0,
-            "33": 1245896192.0,
-            "34": 1245896192.0,
-            "35": 1245896192.0,
-            "36": 1245896192.0,
-            "37": 1245896192.0,
-            "38": 1245896192.0,
-            "39": 1245896192.0,
-            "40": 1245896192.0,
-            "41": 1245896192.0,
-            "42": 1245896192.0,
-            "43": 1245896192.0,
-            "44": 1245896192.0,
-            "45": 1245896192.0,
-            "46": 1245896192.0,
-            "47": 1245896192.0,
-            "48": 1245896192.0,
-            "49": 1245896192.0,
-            "50": 1245896192.0
+            "1": 1122385408.0,
+            "2": 1245635072.0,
+            "3": 1245635072.0,
+            "4": 1245635072.0,
+            "5": 1245635072.0,
+            "6": 1245635072.0,
+            "7": 1245635072.0,
+            "8": 1245635072.0,
+            "9": 1245635072.0,
+            "10": 1245635072.0,
+            "11": 1245635072.0,
+            "12": 1245635072.0,
+            "13": 1245635072.0,
+            "14": 1245635072.0,
+            "15": 1245635072.0,
+            "16": 1245635072.0,
+            "17": 1245635072.0,
+            "18": 1245635072.0,
+            "19": 1245635072.0,
+            "20": 1245635072.0,
+            "21": 1245635072.0,
+            "22": 1245635072.0,
+            "23": 1245635072.0,
+            "24": 1245635072.0,
+            "25": 1245635072.0,
+            "26": 1245635072.0,
+            "27": 1245635072.0,
+            "28": 1245635072.0,
+            "29": 1245635072.0,
+            "30": 1245635072.0,
+            "31": 1245635072.0,
+            "32": 1245635072.0,
+            "33": 1245635072.0,
+            "34": 1245635072.0,
+            "35": 1245635072.0,
+            "36": 1245635072.0,
+            "37": 1245635072.0,
+            "38": 1245635072.0,
+            "39": 1245635072.0,
+            "40": 1245635072.0,
+            "41": 1245635072.0,
+            "42": 1245635072.0,
+            "43": 1245635072.0,
+            "44": 1245635072.0,
+            "45": 1245635072.0,
+            "46": 1245635072.0,
+            "47": 1245635072.0,
+            "48": 1245635072.0,
+            "49": 1245635072.0,
+            "50": 1245635072.0
         }
     },
     "iteration-time": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 9.86323,
-            "2": 0.13474,
-            "3": 0.1236,
-            "4": 0.12168,
-            "5": 0.12406,
-            "6": 0.12501,
-            "7": 0.12711,
-            "8": 0.12778,
-            "9": 0.12839,
-            "10": 0.12143,
-            "11": 0.12109,
-            "12": 0.12077,
-            "13": 0.11905,
-            "14": 0.12184,
-            "15": 0.12152,
-            "16": 0.11812,
-            "17": 0.11693,
-            "18": 0.11549,
-            "19": 0.11712,
-            "20": 0.11675,
-            "21": 0.11877,
-            "22": 0.11837,
-            "23": 0.11757,
-            "24": 0.11636,
-            "25": 0.11722,
-            "26": 0.12393,
-            "27": 0.11736,
-            "28": 0.11759,
-            "29": 0.11945,
-            "30": 0.11726,
-            "31": 0.12096,
-            "32": 0.12206,
-            "33": 0.11734,
-            "34": 0.11894,
-            "35": 0.11695,
-            "36": 0.11712,
-            "37": 0.11489,
-            "38": 0.11866,
-            "39": 0.11749,
-            "40": 0.11829,
-            "41": 0.11674,
-            "42": 0.1181,
-            "43": 0.11808,
-            "44": 0.11621,
-            "45": 0.11832,
-            "46": 0.12031,
-            "47": 0.12023,
-            "48": 0.11643,
-            "49": 0.11855,
-            "50": 0.11792
+            "1": 11.55479,
+            "2": 0.135,
+            "3": 0.11559,
+            "4": 0.10311,
+            "5": 0.10091,
+            "6": 0.10054,
+            "7": 0.10125,
+            "8": 0.10194,
+            "9": 0.10124,
+            "10": 0.10175,
+            "11": 0.10044,
+            "12": 0.10706,
+            "13": 0.10279,
+            "14": 0.10111,
+            "15": 0.10071,
+            "16": 0.10185,
+            "17": 0.10255,
+            "18": 0.10134,
+            "19": 0.10086,
+            "20": 0.10058,
+            "21": 0.10136,
+            "22": 0.09986,
+            "23": 0.10128,
+            "24": 0.1004,
+            "25": 0.10123,
+            "26": 0.10374,
+            "27": 0.09272,
+            "28": 0.09193,
+            "29": 0.09389,
+            "30": 0.09165,
+            "31": 0.09164,
+            "32": 0.09201,
+            "33": 0.09402,
+            "34": 0.09129,
+            "35": 0.09235,
+            "36": 0.09303,
+            "37": 0.09091,
+            "38": 0.09089,
+            "39": 0.09141,
+            "40": 0.09122,
+            "41": 0.0948,
+            "42": 0.09477,
+            "43": 0.09276,
+            "44": 0.09423,
+            "45": 0.09477,
+            "46": 0.09451,
+            "47": 0.0941,
+            "48": 0.0934,
+            "49": 0.09315,
+            "50": 0.09366
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mla/golden_values_dev_dgx_h100_2nd.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mla/golden_values_dev_dgx_h100_2nd.json
new file mode 100644
index 00000000000..f9b157ad760
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mla/golden_values_dev_dgx_h100_2nd.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": 10.88155,
+            "27": 10.88649,
+            "28": 10.85679,
+            "29": 10.85657,
+            "30": 10.81423,
+            "31": 10.76651,
+            "32": 10.83131,
+            "33": 10.83158,
+            "34": 10.78071,
+            "35": 10.78865,
+            "36": 10.78003,
+            "37": 10.80446,
+            "38": 10.72434,
+            "39": 10.78066,
+            "40": 10.65927,
+            "41": 10.69209,
+            "42": 10.70974,
+            "43": 10.56129,
+            "44": 10.61371,
+            "45": 10.56874,
+            "46": 10.54454,
+            "47": 10.66751,
+            "48": 10.53792,
+            "49": 10.40859,
+            "50": 10.5542
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": 22611358.0,
+            "27": 22532968.0,
+            "28": 22517794.0,
+            "29": 22593448.0,
+            "30": 22695256.0,
+            "31": 23019472.0,
+            "32": 22648896.0,
+            "33": 22622516.0,
+            "34": 22899620.0,
+            "35": 22851572.0,
+            "36": 22653160.0,
+            "37": 22560476.0,
+            "38": 22960058.0,
+            "39": 22865476.0,
+            "40": 22721690.0,
+            "41": 22723112.0,
+            "42": 22730692.0,
+            "43": 23039608.0,
+            "44": 22809964.0,
+            "45": 22738932.0,
+            "46": 22948360.0,
+            "47": 22696800.0,
+            "48": 22992776.0,
+            "49": 22791104.0,
+            "50": 22968342.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": 389056000.0,
+            "27": 389056000.0,
+            "28": 389056000.0,
+            "29": 389056000.0,
+            "30": 389056000.0,
+            "31": 389056000.0,
+            "32": 389056000.0,
+            "33": 389056000.0,
+            "34": 389056000.0,
+            "35": 389056000.0,
+            "36": 389056000.0,
+            "37": 389056000.0,
+            "38": 389056000.0,
+            "39": 389056000.0,
+            "40": 389056000.0,
+            "41": 389056000.0,
+            "42": 389056000.0,
+            "43": 389056000.0,
+            "44": 389056000.0,
+            "45": 389056000.0,
+            "46": 389056000.0,
+            "47": 389056000.0,
+            "48": 389056000.0,
+            "49": 389056000.0,
+            "50": 389056000.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": 1247206912.0,
+            "27": 1247207936.0,
+            "28": 1247207936.0,
+            "29": 1247207936.0,
+            "30": 1247207936.0,
+            "31": 1247207936.0,
+            "32": 1247207936.0,
+            "33": 1247207936.0,
+            "34": 1247207936.0,
+            "35": 1247207936.0,
+            "36": 1247207936.0,
+            "37": 1247207936.0,
+            "38": 1247207936.0,
+            "39": 1247207936.0,
+            "40": 1247207936.0,
+            "41": 1247207936.0,
+            "42": 1247207936.0,
+            "43": 1247207936.0,
+            "44": 1247207936.0,
+            "45": 1247207936.0,
+            "46": 1247207936.0,
+            "47": 1247207936.0,
+            "48": 1247207936.0,
+            "49": 1247207936.0,
+            "50": 1247207936.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": 8.47899,
+            "27": 0.12956,
+            "28": 0.10522,
+            "29": 0.09836,
+            "30": 0.09498,
+            "31": 0.09443,
+            "32": 0.09442,
+            "33": 0.09859,
+            "34": 0.09556,
+            "35": 0.0936,
+            "36": 0.0976,
+            "37": 0.09323,
+            "38": 0.09427,
+            "39": 0.09365,
+            "40": 0.09264,
+            "41": 0.09618,
+            "42": 0.09384,
+            "43": 0.0938,
+            "44": 0.09376,
+            "45": 0.093,
+            "46": 0.09376,
+            "47": 0.0942,
+            "48": 0.09416,
+            "49": 0.09367,
+            "50": 0.09361
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_dev_dgx_gb200.json
new file mode 100644
index 00000000000..941c681adde
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_dev_dgx_gb200.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.86897,
+            "2": 10.88544,
+            "3": 10.86473,
+            "4": 10.86826,
+            "5": 10.87436,
+            "6": 10.89005,
+            "7": 10.87769,
+            "8": 10.86364,
+            "9": 10.88282,
+            "10": 10.84687,
+            "11": 10.87102,
+            "12": 10.87345,
+            "13": 10.8814,
+            "14": 10.8877,
+            "15": 10.83869,
+            "16": 10.8239,
+            "17": 10.80197,
+            "18": 10.81094,
+            "19": 10.82192,
+            "20": 10.71791,
+            "21": 10.68914,
+            "22": 10.57271,
+            "23": 10.7081,
+            "24": 10.59543,
+            "25": 10.55292,
+            "26": 10.61257,
+            "27": 10.60051,
+            "28": 10.56173,
+            "29": 10.58089,
+            "30": 10.35595,
+            "31": 10.1182,
+            "32": 10.44815,
+            "33": 10.4542,
+            "34": 10.21553,
+            "35": 10.26124,
+            "36": 10.20776,
+            "37": 10.33673,
+            "38": 10.17741,
+            "39": 10.39297,
+            "40": 10.06349,
+            "41": 10.13887,
+            "42": 10.2056,
+            "43": 9.82809,
+            "44": 9.94547,
+            "45": 9.82561,
+            "46": 9.80186,
+            "47": 10.14049,
+            "48": 9.84276,
+            "49": 9.52016,
+            "50": 9.88454,
+            "51": 9.84743,
+            "52": 9.74209,
+            "53": 10.05697,
+            "54": 9.9505,
+            "55": 9.88145,
+            "56": 9.61274,
+            "57": 9.4687,
+            "58": 9.82193,
+            "59": 9.57642,
+            "60": 9.49762,
+            "61": 9.69189,
+            "62": 9.9867,
+            "63": 9.37512,
+            "64": 9.76679,
+            "65": 8.94648,
+            "66": 9.7023,
+            "67": 9.36326,
+            "68": 9.7831,
+            "69": 9.7986,
+            "70": 9.7317,
+            "71": 9.62571,
+            "72": 9.58488,
+            "73": 9.48967,
+            "74": 8.9286,
+            "75": 9.40862,
+            "76": 9.07925,
+            "77": 10.0594,
+            "78": 9.72288,
+            "79": 9.37784,
+            "80": 9.40429,
+            "81": 9.48309,
+            "82": 9.7004,
+            "83": 9.31595,
+            "84": 9.41838,
+            "85": 9.61685,
+            "86": 9.07533,
+            "87": 9.59616,
+            "88": 9.75215,
+            "89": 9.60184,
+            "90": 9.82281,
+            "91": 9.34037,
+            "92": 9.35854,
+            "93": 9.08805,
+            "94": 8.83037,
+            "95": 9.5266,
+            "96": 9.53049,
+            "97": 9.30389,
+            "98": 9.67196,
+            "99": 8.89637,
+            "100": 9.40644
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1621.0,
+            "2": 1657.0,
+            "3": 1580.0,
+            "4": 1839.0,
+            "5": 1862.0,
+            "6": 1724.0,
+            "7": 1714.0,
+            "8": 1670.0,
+            "9": 1762.0,
+            "10": 1358.0,
+            "11": 1734.0,
+            "12": 1682.0,
+            "13": 1761.0,
+            "14": 1731.0,
+            "15": 1788.0,
+            "16": 1801.0,
+            "17": 1866.0,
+            "18": 1636.0,
+            "19": 1709.0,
+            "20": 1607.0,
+            "21": 1821.0,
+            "22": 1666.0,
+            "23": 1991.0,
+            "24": 1585.0,
+            "25": 1587.0,
+            "26": 1631.0,
+            "27": 1714.0,
+            "28": 1966.0,
+            "29": 1997.0,
+            "30": 1851.0,
+            "31": 1581.0,
+            "32": 1864.0,
+            "33": 2107.0,
+            "34": 1846.0,
+            "35": 1982.0,
+            "36": 1904.0,
+            "37": 2373.0,
+            "38": 2172.0,
+            "39": 2343.0,
+            "40": 2149.0,
+            "41": 2331.0,
+            "42": 2199.0,
+            "43": 1914.0,
+            "44": 2065.0,
+            "45": 2081.0,
+            "46": 2352.0,
+            "47": 2497.0,
+            "48": 2303.0,
+            "49": 2346.0,
+            "50": 2411.0,
+            "51": 2491.0,
+            "52": 2552.0,
+            "53": 2980.0,
+            "54": 2680.0,
+            "55": 2274.0,
+            "56": 2734.0,
+            "57": 2319.0,
+            "58": 2907.0,
+            "59": 2886.0,
+            "60": 2566.0,
+            "61": 2855.0,
+            "62": 2704.0,
+            "63": 2370.0,
+            "64": 2998.0,
+            "65": 2563.0,
+            "66": 2868.0,
+            "67": 2762.0,
+            "68": 2739.0,
+            "69": 2730.0,
+            "70": 3156.0,
+            "71": 2803.0,
+            "72": 2506.0,
+            "73": 2896.0,
+            "74": 1937.0,
+            "75": 2450.0,
+            "76": 2794.0,
+            "77": 3047.0,
+            "78": 3104.0,
+            "79": 3069.0,
+            "80": 3286.0,
+            "81": 3543.0,
+            "82": 3192.0,
+            "83": 2614.0,
+            "84": 3273.0,
+            "85": 3111.0,
+            "86": 2680.0,
+            "87": 3654.0,
+            "88": 3117.0,
+            "89": 3351.0,
+            "90": 3086.0,
+            "91": 2721.0,
+            "92": 3045.0,
+            "93": 2672.0,
+            "94": 3326.0,
+            "95": 3125.0,
+            "96": 3309.0,
+            "97": 3208.0,
+            "98": 3572.0,
+            "99": 2980.0,
+            "100": 3355.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 514359808.0,
+            "2": 514359808.0,
+            "3": 514359808.0,
+            "4": 514359808.0,
+            "5": 514359808.0,
+            "6": 514359808.0,
+            "7": 514359808.0,
+            "8": 514359808.0,
+            "9": 514359808.0,
+            "10": 514359808.0,
+            "11": 514359808.0,
+            "12": 514359808.0,
+            "13": 514359808.0,
+            "14": 514359808.0,
+            "15": 514359808.0,
+            "16": 514359808.0,
+            "17": 514359808.0,
+            "18": 514359808.0,
+            "19": 514359808.0,
+            "20": 514359808.0,
+            "21": 514359808.0,
+            "22": 514359808.0,
+            "23": 514359808.0,
+            "24": 514359808.0,
+            "25": 514359808.0,
+            "26": 514359808.0,
+            "27": 514359808.0,
+            "28": 514359808.0,
+            "29": 514359808.0,
+            "30": 514359808.0,
+            "31": 514359808.0,
+            "32": 514359808.0,
+            "33": 514359808.0,
+            "34": 514359808.0,
+            "35": 514359808.0,
+            "36": 514359808.0,
+            "37": 514359808.0,
+            "38": 514359808.0,
+            "39": 514359808.0,
+            "40": 514359808.0,
+            "41": 514359808.0,
+            "42": 514359808.0,
+            "43": 514359808.0,
+            "44": 514359808.0,
+            "45": 514359808.0,
+            "46": 514359808.0,
+            "47": 514359808.0,
+            "48": 514359808.0,
+            "49": 514359808.0,
+            "50": 514359808.0,
+            "51": 514359808.0,
+            "52": 514359808.0,
+            "53": 514359808.0,
+            "54": 514359808.0,
+            "55": 514359808.0,
+            "56": 514359808.0,
+            "57": 514359808.0,
+            "58": 514359808.0,
+            "59": 514359808.0,
+            "60": 514359808.0,
+            "61": 514359808.0,
+            "62": 514359808.0,
+            "63": 514359808.0,
+            "64": 514359808.0,
+            "65": 514359808.0,
+            "66": 514359808.0,
+            "67": 514359808.0,
+            "68": 514359808.0,
+            "69": 514359808.0,
+            "70": 514359808.0,
+            "71": 514359808.0,
+            "72": 514359808.0,
+            "73": 514359808.0,
+            "74": 514359808.0,
+            "75": 514359808.0,
+            "76": 514359808.0,
+            "77": 514359808.0,
+            "78": 514359808.0,
+            "79": 514359808.0,
+            "80": 514359808.0,
+            "81": 514359808.0,
+            "82": 514359808.0,
+            "83": 514359808.0,
+            "84": 514359808.0,
+            "85": 514359808.0,
+            "86": 514359808.0,
+            "87": 514359808.0,
+            "88": 514359808.0,
+            "89": 514359808.0,
+            "90": 514359808.0,
+            "91": 514359808.0,
+            "92": 514359808.0,
+            "93": 514359808.0,
+            "94": 514359808.0,
+            "95": 514359808.0,
+            "96": 514359808.0,
+            "97": 514359808.0,
+            "98": 514359808.0,
+            "99": 514359808.0,
+            "100": 514359808.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1258060288.0,
+            "2": 1437084160.0,
+            "3": 1437084160.0,
+            "4": 1437084160.0,
+            "5": 1437084160.0,
+            "6": 1437084160.0,
+            "7": 1437084160.0,
+            "8": 1437084160.0,
+            "9": 1437084160.0,
+            "10": 1437084160.0,
+            "11": 1437084160.0,
+            "12": 1437084160.0,
+            "13": 1437084160.0,
+            "14": 1437084160.0,
+            "15": 1437084160.0,
+            "16": 1437084160.0,
+            "17": 1437084160.0,
+            "18": 1437084160.0,
+            "19": 1437084160.0,
+            "20": 1437084160.0,
+            "21": 1437084160.0,
+            "22": 1437084160.0,
+            "23": 1437084160.0,
+            "24": 1437084160.0,
+            "25": 1437084160.0,
+            "26": 1437084160.0,
+            "27": 1437084160.0,
+            "28": 1437084160.0,
+            "29": 1437084160.0,
+            "30": 1437084160.0,
+            "31": 1437084160.0,
+            "32": 1437084160.0,
+            "33": 1437084160.0,
+            "34": 1437084160.0,
+            "35": 1437084160.0,
+            "36": 1437084160.0,
+            "37": 1437084160.0,
+            "38": 1437084160.0,
+            "39": 1437084160.0,
+            "40": 1437084160.0,
+            "41": 1437084160.0,
+            "42": 1437084160.0,
+            "43": 1437084160.0,
+            "44": 1437084160.0,
+            "45": 1437084160.0,
+            "46": 1437084160.0,
+            "47": 1437084160.0,
+            "48": 1437084160.0,
+            "49": 1437084160.0,
+            "50": 1437084160.0,
+            "51": 1437084160.0,
+            "52": 1437084160.0,
+            "53": 1437084160.0,
+            "54": 1437084160.0,
+            "55": 1437084160.0,
+            "56": 1437084160.0,
+            "57": 1437084160.0,
+            "58": 1437084160.0,
+            "59": 1437084160.0,
+            "60": 1437084160.0,
+            "61": 1437084160.0,
+            "62": 1437084160.0,
+            "63": 1437084160.0,
+            "64": 1437084160.0,
+            "65": 1437084160.0,
+            "66": 1437084160.0,
+            "67": 1437084160.0,
+            "68": 1437084160.0,
+            "69": 1437084160.0,
+            "70": 1437084160.0,
+            "71": 1437084160.0,
+            "72": 1437084160.0,
+            "73": 1437084160.0,
+            "74": 1437084160.0,
+            "75": 1437084160.0,
+            "76": 1437084160.0,
+            "77": 1437084160.0,
+            "78": 1437084160.0,
+            "79": 1437084160.0,
+            "80": 1437084160.0,
+            "81": 1437084160.0,
+            "82": 1437084160.0,
+            "83": 1437084160.0,
+            "84": 1437084160.0,
+            "85": 1437084160.0,
+            "86": 1437084160.0,
+            "87": 1437084160.0,
+            "88": 1437084160.0,
+            "89": 1437084160.0,
+            "90": 1437084160.0,
+            "91": 1437084160.0,
+            "92": 1437084160.0,
+            "93": 1437084160.0,
+            "94": 1437084160.0,
+            "95": 1437084160.0,
+            "96": 1437084160.0,
+            "97": 1437084160.0,
+            "98": 1437084160.0,
+            "99": 1437084160.0,
+            "100": 1437084160.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 6.23525,
+            "2": 0.24353,
+            "3": 0.25343,
+            "4": 0.21688,
+            "5": 0.2509,
+            "6": 0.23286,
+            "7": 0.23132,
+            "8": 0.23275,
+            "9": 0.23174,
+            "10": 0.28716,
+            "11": 0.23191,
+            "12": 0.23535,
+            "13": 0.26183,
+            "14": 0.23439,
+            "15": 0.26372,
+            "16": 0.27689,
+            "17": 0.31573,
+            "18": 0.29419,
+            "19": 0.22489,
+            "20": 0.21688,
+            "21": 0.21286,
+            "22": 0.21368,
+            "23": 0.21212,
+            "24": 0.21109,
+            "25": 0.21238,
+            "26": 0.21136,
+            "27": 0.24254,
+            "28": 0.21046,
+            "29": 0.21055,
+            "30": 0.37172,
+            "31": 0.20753,
+            "32": 0.22054,
+            "33": 0.20088,
+            "34": 0.20169,
+            "35": 0.2243,
+            "36": 0.20027,
+            "37": 0.20099,
+            "38": 0.21205,
+            "39": 0.20018,
+            "40": 0.19821,
+            "41": 0.20033,
+            "42": 0.20078,
+            "43": 0.19985,
+            "44": 0.19983,
+            "45": 0.19756,
+            "46": 0.19892,
+            "47": 0.19813,
+            "48": 0.19885,
+            "49": 0.19949,
+            "50": 0.19861,
+            "51": 0.20481,
+            "52": 0.18697,
+            "53": 0.18628,
+            "54": 0.18383,
+            "55": 0.22054,
+            "56": 0.18628,
+            "57": 0.1865,
+            "58": 0.23363,
+            "59": 0.18779,
+            "60": 0.18548,
+            "61": 0.23086,
+            "62": 0.18486,
+            "63": 0.18676,
+            "64": 0.18877,
+            "65": 0.18818,
+            "66": 0.18785,
+            "67": 0.18912,
+            "68": 0.18762,
+            "69": 0.18502,
+            "70": 0.2393,
+            "71": 0.18534,
+            "72": 0.1866,
+            "73": 0.18699,
+            "74": 0.2218,
+            "75": 0.18851,
+            "76": 0.18761,
+            "77": 0.18836,
+            "78": 0.22737,
+            "79": 0.18832,
+            "80": 0.18852,
+            "81": 0.2185,
+            "82": 0.18552,
+            "83": 0.19385,
+            "84": 0.18774,
+            "85": 0.1898,
+            "86": 0.3457,
+            "87": 0.4164,
+            "88": 0.18999,
+            "89": 0.1872,
+            "90": 0.18803,
+            "91": 0.22713,
+            "92": 0.18693,
+            "93": 0.18603,
+            "94": 0.18711,
+            "95": 0.18552,
+            "96": 0.22396,
+            "97": 0.18576,
+            "98": 0.18988,
+            "99": 0.21054,
+            "100": 0.21361
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_dev_dgx_h100.json
index 7c1078c0b3d..aab9c0cb891 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_dev_dgx_h100.json
@@ -218,106 +218,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 517505536.0,
-            "2": 517505536.0,
-            "3": 517505536.0,
-            "4": 517505536.0,
-            "5": 517505536.0,
-            "6": 517505536.0,
-            "7": 517505536.0,
-            "8": 517505536.0,
-            "9": 517505536.0,
-            "10": 517505536.0,
-            "11": 517505536.0,
-            "12": 517505536.0,
-            "13": 517505536.0,
-            "14": 517505536.0,
-            "15": 517505536.0,
-            "16": 517505536.0,
-            "17": 517505536.0,
-            "18": 517505536.0,
-            "19": 517505536.0,
-            "20": 517505536.0,
-            "21": 517505536.0,
-            "22": 517505536.0,
-            "23": 517505536.0,
-            "24": 517505536.0,
-            "25": 517505536.0,
-            "26": 517505536.0,
-            "27": 517505536.0,
-            "28": 517505536.0,
-            "29": 517505536.0,
-            "30": 517505536.0,
-            "31": 517505536.0,
-            "32": 517505536.0,
-            "33": 517505536.0,
-            "34": 517505536.0,
-            "35": 517505536.0,
-            "36": 517505536.0,
-            "37": 517505536.0,
-            "38": 517505536.0,
-            "39": 517505536.0,
-            "40": 517505536.0,
-            "41": 517505536.0,
-            "42": 517505536.0,
-            "43": 517505536.0,
-            "44": 517505536.0,
-            "45": 517505536.0,
-            "46": 517505536.0,
-            "47": 517505536.0,
-            "48": 517505536.0,
-            "49": 517505536.0,
-            "50": 517505536.0,
-            "51": 517505536.0,
-            "52": 517505536.0,
-            "53": 517505536.0,
-            "54": 517505536.0,
-            "55": 517505536.0,
-            "56": 517505536.0,
-            "57": 517505536.0,
-            "58": 517505536.0,
-            "59": 517505536.0,
-            "60": 517505536.0,
-            "61": 517505536.0,
-            "62": 517505536.0,
-            "63": 517505536.0,
-            "64": 517505536.0,
-            "65": 517505536.0,
-            "66": 517505536.0,
-            "67": 517505536.0,
-            "68": 517505536.0,
-            "69": 517505536.0,
-            "70": 517505536.0,
-            "71": 517505536.0,
-            "72": 517505536.0,
-            "73": 517505536.0,
-            "74": 517505536.0,
-            "75": 517505536.0,
-            "76": 517505536.0,
-            "77": 517505536.0,
-            "78": 517505536.0,
-            "79": 517505536.0,
-            "80": 517505536.0,
-            "81": 517505536.0,
-            "82": 517505536.0,
-            "83": 517505536.0,
-            "84": 517505536.0,
-            "85": 517505536.0,
-            "86": 517505536.0,
-            "87": 517505536.0,
-            "88": 517505536.0,
-            "89": 517505536.0,
-            "90": 517505536.0,
-            "91": 517505536.0,
-            "92": 517505536.0,
-            "93": 517505536.0,
-            "94": 517505536.0,
-            "95": 517505536.0,
-            "96": 517505536.0,
-            "97": 517505536.0,
-            "98": 517505536.0,
-            "99": 517505536.0,
-            "100": 517505536.0
+            "1": 516456960.0,
+            "2": 516456960.0,
+            "3": 516456960.0,
+            "4": 516456960.0,
+            "5": 516456960.0,
+            "6": 516456960.0,
+            "7": 516456960.0,
+            "8": 516456960.0,
+            "9": 516456960.0,
+            "10": 516456960.0,
+            "11": 516456960.0,
+            "12": 516456960.0,
+            "13": 516456960.0,
+            "14": 516456960.0,
+            "15": 516456960.0,
+            "16": 516456960.0,
+            "17": 516456960.0,
+            "18": 516456960.0,
+            "19": 516456960.0,
+            "20": 516456960.0,
+            "21": 516456960.0,
+            "22": 516456960.0,
+            "23": 516456960.0,
+            "24": 516456960.0,
+            "25": 516456960.0,
+            "26": 516456960.0,
+            "27": 516456960.0,
+            "28": 516456960.0,
+            "29": 516456960.0,
+            "30": 516456960.0,
+            "31": 516456960.0,
+            "32": 516456960.0,
+            "33": 516456960.0,
+            "34": 516456960.0,
+            "35": 516456960.0,
+            "36": 516456960.0,
+            "37": 516456960.0,
+            "38": 516456960.0,
+            "39": 516456960.0,
+            "40": 516456960.0,
+            "41": 516456960.0,
+            "42": 516456960.0,
+            "43": 516456960.0,
+            "44": 516456960.0,
+            "45": 516456960.0,
+            "46": 516456960.0,
+            "47": 516456960.0,
+            "48": 516456960.0,
+            "49": 516456960.0,
+            "50": 516456960.0,
+            "51": 516456960.0,
+            "52": 516456960.0,
+            "53": 516456960.0,
+            "54": 516456960.0,
+            "55": 516456960.0,
+            "56": 516456960.0,
+            "57": 516456960.0,
+            "58": 516456960.0,
+            "59": 516456960.0,
+            "60": 516456960.0,
+            "61": 516456960.0,
+            "62": 516456960.0,
+            "63": 516456960.0,
+            "64": 516456960.0,
+            "65": 516456960.0,
+            "66": 516456960.0,
+            "67": 516456960.0,
+            "68": 516456960.0,
+            "69": 516456960.0,
+            "70": 516456960.0,
+            "71": 516456960.0,
+            "72": 516456960.0,
+            "73": 516456960.0,
+            "74": 516456960.0,
+            "75": 516456960.0,
+            "76": 516456960.0,
+            "77": 516456960.0,
+            "78": 516456960.0,
+            "79": 516456960.0,
+            "80": 516456960.0,
+            "81": 516456960.0,
+            "82": 516456960.0,
+            "83": 516456960.0,
+            "84": 516456960.0,
+            "85": 516456960.0,
+            "86": 516456960.0,
+            "87": 516456960.0,
+            "88": 516456960.0,
+            "89": 516456960.0,
+            "90": 516456960.0,
+            "91": 516456960.0,
+            "92": 516456960.0,
+            "93": 516456960.0,
+            "94": 516456960.0,
+            "95": 516456960.0,
+            "96": 516456960.0,
+            "97": 516456960.0,
+            "98": 516456960.0,
+            "99": 516456960.0,
+            "100": 516456960.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -325,106 +325,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 1246524928.0,
-            "2": 1428695552.0,
-            "3": 1428695552.0,
-            "4": 1428695552.0,
-            "5": 1428695552.0,
-            "6": 1428695552.0,
-            "7": 1428695552.0,
-            "8": 1428695552.0,
-            "9": 1428695552.0,
-            "10": 1428695552.0,
-            "11": 1428695552.0,
-            "12": 1428695552.0,
-            "13": 1428695552.0,
-            "14": 1428695552.0,
-            "15": 1428695552.0,
-            "16": 1428695552.0,
-            "17": 1428695552.0,
-            "18": 1428695552.0,
-            "19": 1428695552.0,
-            "20": 1428695552.0,
-            "21": 1428695552.0,
-            "22": 1428695552.0,
-            "23": 1428695552.0,
-            "24": 1428695552.0,
-            "25": 1428695552.0,
-            "26": 1428695552.0,
-            "27": 1428695552.0,
-            "28": 1428695552.0,
-            "29": 1428695552.0,
-            "30": 1428695552.0,
-            "31": 1428695552.0,
-            "32": 1428695552.0,
-            "33": 1428695552.0,
-            "34": 1428695552.0,
-            "35": 1428695552.0,
-            "36": 1428695552.0,
-            "37": 1428695552.0,
-            "38": 1428695552.0,
-            "39": 1428695552.0,
-            "40": 1428695552.0,
-            "41": 1428695552.0,
-            "42": 1428695552.0,
-            "43": 1428695552.0,
-            "44": 1428695552.0,
-            "45": 1428695552.0,
-            "46": 1428695552.0,
-            "47": 1428695552.0,
-            "48": 1428695552.0,
-            "49": 1428695552.0,
-            "50": 1428695552.0,
-            "51": 1428695552.0,
-            "52": 1428695552.0,
-            "53": 1428695552.0,
-            "54": 1428695552.0,
-            "55": 1428695552.0,
-            "56": 1428695552.0,
-            "57": 1428695552.0,
-            "58": 1428695552.0,
-            "59": 1428695552.0,
-            "60": 1428695552.0,
-            "61": 1428695552.0,
-            "62": 1428695552.0,
-            "63": 1428695552.0,
-            "64": 1428695552.0,
-            "65": 1428695552.0,
-            "66": 1428695552.0,
-            "67": 1428695552.0,
-            "68": 1428695552.0,
-            "69": 1428695552.0,
-            "70": 1428695552.0,
-            "71": 1428695552.0,
-            "72": 1428695552.0,
-            "73": 1428695552.0,
-            "74": 1428695552.0,
-            "75": 1428695552.0,
-            "76": 1428695552.0,
-            "77": 1428695552.0,
-            "78": 1428695552.0,
-            "79": 1428695552.0,
-            "80": 1428695552.0,
-            "81": 1428695552.0,
-            "82": 1428695552.0,
-            "83": 1428695552.0,
-            "84": 1428695552.0,
-            "85": 1428695552.0,
-            "86": 1428695552.0,
-            "87": 1428695552.0,
-            "88": 1428695552.0,
-            "89": 1428695552.0,
-            "90": 1428695552.0,
-            "91": 1428695552.0,
-            "92": 1428695552.0,
-            "93": 1428695552.0,
-            "94": 1428695552.0,
-            "95": 1428695552.0,
-            "96": 1428695552.0,
-            "97": 1428695552.0,
-            "98": 1428695552.0,
-            "99": 1428695552.0,
-            "100": 1428695552.0
+            "1": 1246525952.0,
+            "2": 1426598400.0,
+            "3": 1426598400.0,
+            "4": 1426598400.0,
+            "5": 1426598400.0,
+            "6": 1426598400.0,
+            "7": 1426598400.0,
+            "8": 1426598400.0,
+            "9": 1426598400.0,
+            "10": 1426598400.0,
+            "11": 1426598400.0,
+            "12": 1426598400.0,
+            "13": 1426598400.0,
+            "14": 1426598400.0,
+            "15": 1426598400.0,
+            "16": 1426598400.0,
+            "17": 1426598400.0,
+            "18": 1426598400.0,
+            "19": 1426598400.0,
+            "20": 1426598400.0,
+            "21": 1426598400.0,
+            "22": 1426598400.0,
+            "23": 1426598400.0,
+            "24": 1426598400.0,
+            "25": 1426598400.0,
+            "26": 1426598400.0,
+            "27": 1426598400.0,
+            "28": 1426598400.0,
+            "29": 1426598400.0,
+            "30": 1426598400.0,
+            "31": 1426598400.0,
+            "32": 1426598400.0,
+            "33": 1426598400.0,
+            "34": 1426598400.0,
+            "35": 1426598400.0,
+            "36": 1426598400.0,
+            "37": 1426598400.0,
+            "38": 1426598400.0,
+            "39": 1426598400.0,
+            "40": 1426598400.0,
+            "41": 1426598400.0,
+            "42": 1426598400.0,
+            "43": 1426598400.0,
+            "44": 1426598400.0,
+            "45": 1426598400.0,
+            "46": 1426598400.0,
+            "47": 1426598400.0,
+            "48": 1426598400.0,
+            "49": 1426598400.0,
+            "50": 1426598400.0,
+            "51": 1426598400.0,
+            "52": 1426598400.0,
+            "53": 1426598400.0,
+            "54": 1426598400.0,
+            "55": 1426598400.0,
+            "56": 1426598400.0,
+            "57": 1426598400.0,
+            "58": 1426598400.0,
+            "59": 1426598400.0,
+            "60": 1426598400.0,
+            "61": 1426598400.0,
+            "62": 1426598400.0,
+            "63": 1426598400.0,
+            "64": 1426598400.0,
+            "65": 1426598400.0,
+            "66": 1426598400.0,
+            "67": 1426598400.0,
+            "68": 1426598400.0,
+            "69": 1426598400.0,
+            "70": 1426598400.0,
+            "71": 1426598400.0,
+            "72": 1426598400.0,
+            "73": 1426598400.0,
+            "74": 1426598400.0,
+            "75": 1426598400.0,
+            "76": 1426598400.0,
+            "77": 1426598400.0,
+            "78": 1426598400.0,
+            "79": 1426598400.0,
+            "80": 1426598400.0,
+            "81": 1426598400.0,
+            "82": 1426598400.0,
+            "83": 1426598400.0,
+            "84": 1426598400.0,
+            "85": 1426598400.0,
+            "86": 1426598400.0,
+            "87": 1426598400.0,
+            "88": 1426598400.0,
+            "89": 1426598400.0,
+            "90": 1426598400.0,
+            "91": 1426598400.0,
+            "92": 1426598400.0,
+            "93": 1426598400.0,
+            "94": 1426598400.0,
+            "95": 1426598400.0,
+            "96": 1426598400.0,
+            "97": 1426598400.0,
+            "98": 1426598400.0,
+            "99": 1426598400.0,
+            "100": 1426598400.0
         }
     },
     "iteration-time": {
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 11.96359,
-            "2": 0.17007,
-            "3": 0.15511,
-            "4": 0.15439,
-            "5": 0.15477,
-            "6": 0.15459,
-            "7": 0.15427,
-            "8": 0.15173,
-            "9": 0.15484,
-            "10": 0.15363,
-            "11": 0.15353,
-            "12": 0.15567,
-            "13": 0.15258,
-            "14": 0.15438,
-            "15": 0.15305,
-            "16": 0.15314,
-            "17": 0.15342,
-            "18": 0.15282,
-            "19": 0.15336,
-            "20": 0.15333,
-            "21": 0.15174,
-            "22": 0.15412,
-            "23": 0.15337,
-            "24": 0.15464,
-            "25": 0.15638,
-            "26": 0.15618,
-            "27": 0.15599,
-            "28": 0.15616,
-            "29": 0.15792,
-            "30": 0.15422,
-            "31": 0.15441,
-            "32": 0.15356,
-            "33": 0.15622,
-            "34": 0.15397,
-            "35": 0.15443,
-            "36": 0.15392,
-            "37": 0.15454,
-            "38": 0.15581,
-            "39": 0.15513,
-            "40": 0.15813,
-            "41": 0.1595,
-            "42": 0.15604,
-            "43": 0.15809,
-            "44": 0.15585,
-            "45": 0.15659,
-            "46": 0.15599,
-            "47": 0.15378,
-            "48": 0.15475,
-            "49": 0.1544,
-            "50": 0.15569,
-            "51": 0.16391,
-            "52": 0.16196,
-            "53": 0.16029,
-            "54": 0.16138,
-            "55": 0.15673,
-            "56": 0.1503,
-            "57": 0.15071,
-            "58": 0.15268,
-            "59": 0.15095,
-            "60": 0.15189,
-            "61": 0.15199,
-            "62": 0.14938,
-            "63": 0.15046,
-            "64": 0.14924,
-            "65": 0.15129,
-            "66": 0.14938,
-            "67": 0.15233,
-            "68": 0.15028,
-            "69": 0.1525,
-            "70": 0.15334,
-            "71": 0.15152,
-            "72": 0.15138,
-            "73": 0.15304,
-            "74": 0.1515,
-            "75": 0.15282,
-            "76": 0.1518,
-            "77": 0.15193,
-            "78": 0.15262,
-            "79": 0.15274,
-            "80": 0.15251,
-            "81": 0.15108,
-            "82": 0.15199,
-            "83": 0.15046,
-            "84": 0.15298,
-            "85": 0.15063,
-            "86": 0.15132,
-            "87": 0.15257,
-            "88": 0.15109,
-            "89": 0.1502,
-            "90": 0.15259,
-            "91": 0.15063,
-            "92": 0.15237,
-            "93": 0.15096,
-            "94": 0.1517,
-            "95": 0.15049,
-            "96": 0.15002,
-            "97": 0.15011,
-            "98": 0.15349,
-            "99": 0.1565,
-            "100": 0.15223
+            "1": 8.65189,
+            "2": 0.17932,
+            "3": 0.14636,
+            "4": 0.12538,
+            "5": 0.12402,
+            "6": 0.12459,
+            "7": 0.12481,
+            "8": 0.12323,
+            "9": 0.12314,
+            "10": 0.12506,
+            "11": 0.1247,
+            "12": 0.124,
+            "13": 0.12299,
+            "14": 0.12337,
+            "15": 0.12552,
+            "16": 0.12432,
+            "17": 0.12285,
+            "18": 0.1235,
+            "19": 0.12341,
+            "20": 0.12389,
+            "21": 0.12311,
+            "22": 0.12402,
+            "23": 0.12319,
+            "24": 0.12321,
+            "25": 0.12382,
+            "26": 0.12336,
+            "27": 0.12353,
+            "28": 0.12251,
+            "29": 0.12528,
+            "30": 0.12437,
+            "31": 0.12503,
+            "32": 0.12365,
+            "33": 0.1224,
+            "34": 0.12436,
+            "35": 0.12606,
+            "36": 0.12382,
+            "37": 0.12451,
+            "38": 0.12292,
+            "39": 0.1228,
+            "40": 0.12355,
+            "41": 0.12426,
+            "42": 0.12483,
+            "43": 0.12585,
+            "44": 0.12964,
+            "45": 0.12442,
+            "46": 0.12437,
+            "47": 0.12371,
+            "48": 0.12305,
+            "49": 0.12517,
+            "50": 0.12295,
+            "51": 0.14312,
+            "52": 0.1306,
+            "53": 0.12394,
+            "54": 0.12469,
+            "55": 0.12368,
+            "56": 0.12394,
+            "57": 0.12303,
+            "58": 0.12356,
+            "59": 0.12328,
+            "60": 0.12317,
+            "61": 0.12286,
+            "62": 0.12321,
+            "63": 0.12386,
+            "64": 0.12303,
+            "65": 0.12369,
+            "66": 0.12284,
+            "67": 0.12276,
+            "68": 0.1233,
+            "69": 0.12275,
+            "70": 0.12331,
+            "71": 0.12204,
+            "72": 0.12226,
+            "73": 0.12258,
+            "74": 0.12222,
+            "75": 0.12284,
+            "76": 0.12277,
+            "77": 0.12539,
+            "78": 0.12356,
+            "79": 0.1224,
+            "80": 0.12283,
+            "81": 0.12341,
+            "82": 0.12375,
+            "83": 0.1222,
+            "84": 0.12248,
+            "85": 0.12367,
+            "86": 0.12361,
+            "87": 0.12373,
+            "88": 0.124,
+            "89": 0.1217,
+            "90": 0.12316,
+            "91": 0.12421,
+            "92": 0.12415,
+            "93": 0.1244,
+            "94": 0.12547,
+            "95": 0.12292,
+            "96": 0.12216,
+            "97": 0.12313,
+            "98": 0.12301,
+            "99": 0.1248,
+            "100": 0.12337
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_dev_dgx_h100_2nd.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_dev_dgx_h100_2nd.json
new file mode 100644
index 00000000000..f8f216592e7
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_dev_dgx_h100_2nd.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 9.8558,
+            "52": 9.75237,
+            "53": 10.07589,
+            "54": 9.95688,
+            "55": 9.88203,
+            "56": 9.6313,
+            "57": 9.48649,
+            "58": 9.83109,
+            "59": 9.58897,
+            "60": 9.50643,
+            "61": 9.70363,
+            "62": 9.98286,
+            "63": 9.38302,
+            "64": 9.77901,
+            "65": 8.95166,
+            "66": 9.70158,
+            "67": 9.37203,
+            "68": 9.78849,
+            "69": 9.79851,
+            "70": 9.74737,
+            "71": 9.61908,
+            "72": 9.58502,
+            "73": 9.49721,
+            "74": 8.93927,
+            "75": 9.42703,
+            "76": 9.0802,
+            "77": 10.06567,
+            "78": 9.72893,
+            "79": 9.3776,
+            "80": 9.40982,
+            "81": 9.47976,
+            "82": 9.7018,
+            "83": 9.30612,
+            "84": 9.4209,
+            "85": 9.61371,
+            "86": 9.07649,
+            "87": 9.5945,
+            "88": 9.75068,
+            "89": 9.60238,
+            "90": 9.81898,
+            "91": 9.33894,
+            "92": 9.35716,
+            "93": 9.07879,
+            "94": 8.83503,
+            "95": 9.52172,
+            "96": 9.53003,
+            "97": 9.31306,
+            "98": 9.67783,
+            "99": 8.89058,
+            "100": 9.39725
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 2613.0,
+            "52": 2538.0,
+            "53": 2792.0,
+            "54": 2801.0,
+            "55": 2216.0,
+            "56": 2858.0,
+            "57": 2381.0,
+            "58": 2854.0,
+            "59": 2787.0,
+            "60": 2457.0,
+            "61": 2941.0,
+            "62": 2543.0,
+            "63": 2408.0,
+            "64": 2968.0,
+            "65": 2472.0,
+            "66": 2977.0,
+            "67": 2839.0,
+            "68": 2775.0,
+            "69": 2832.0,
+            "70": 3057.0,
+            "71": 2909.0,
+            "72": 2421.0,
+            "73": 2982.0,
+            "74": 1922.0,
+            "75": 2474.0,
+            "76": 3059.0,
+            "77": 3177.0,
+            "78": 3067.0,
+            "79": 3052.0,
+            "80": 3338.0,
+            "81": 3644.0,
+            "82": 3234.0,
+            "83": 2798.0,
+            "84": 3196.0,
+            "85": 3324.0,
+            "86": 2855.0,
+            "87": 3820.0,
+            "88": 2962.0,
+            "89": 3379.0,
+            "90": 3096.0,
+            "91": 2857.0,
+            "92": 3077.0,
+            "93": 2693.0,
+            "94": 3312.0,
+            "95": 3399.0,
+            "96": 3378.0,
+            "97": 3030.0,
+            "98": 3619.0,
+            "99": 3160.0,
+            "100": 3128.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 696530432.0,
+            "52": 696530432.0,
+            "53": 696530432.0,
+            "54": 696530432.0,
+            "55": 696530432.0,
+            "56": 696530432.0,
+            "57": 696530432.0,
+            "58": 696530432.0,
+            "59": 696530432.0,
+            "60": 696530432.0,
+            "61": 696530432.0,
+            "62": 696530432.0,
+            "63": 696530432.0,
+            "64": 696530432.0,
+            "65": 696530432.0,
+            "66": 696530432.0,
+            "67": 696530432.0,
+            "68": 696530432.0,
+            "69": 696530432.0,
+            "70": 696530432.0,
+            "71": 696530432.0,
+            "72": 696530432.0,
+            "73": 696530432.0,
+            "74": 696530432.0,
+            "75": 696530432.0,
+            "76": 696530432.0,
+            "77": 696530432.0,
+            "78": 696530432.0,
+            "79": 696530432.0,
+            "80": 696530432.0,
+            "81": 696530432.0,
+            "82": 696530432.0,
+            "83": 696530432.0,
+            "84": 696530432.0,
+            "85": 696530432.0,
+            "86": 696530432.0,
+            "87": 696530432.0,
+            "88": 696530432.0,
+            "89": 696530432.0,
+            "90": 696530432.0,
+            "91": 696530432.0,
+            "92": 696530432.0,
+            "93": 696530432.0,
+            "94": 696530432.0,
+            "95": 696530432.0,
+            "96": 696530432.0,
+            "97": 696530432.0,
+            "98": 696530432.0,
+            "99": 696530432.0,
+            "100": 696530432.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 1606670848.0,
+            "52": 1606671872.0,
+            "53": 1606671872.0,
+            "54": 1606671872.0,
+            "55": 1606671872.0,
+            "56": 1606671872.0,
+            "57": 1606671872.0,
+            "58": 1606671872.0,
+            "59": 1606671872.0,
+            "60": 1606671872.0,
+            "61": 1606671872.0,
+            "62": 1606671872.0,
+            "63": 1606671872.0,
+            "64": 1606671872.0,
+            "65": 1606671872.0,
+            "66": 1606671872.0,
+            "67": 1606671872.0,
+            "68": 1606671872.0,
+            "69": 1606671872.0,
+            "70": 1606671872.0,
+            "71": 1606671872.0,
+            "72": 1606671872.0,
+            "73": 1606671872.0,
+            "74": 1606671872.0,
+            "75": 1606671872.0,
+            "76": 1606671872.0,
+            "77": 1606671872.0,
+            "78": 1606671872.0,
+            "79": 1606671872.0,
+            "80": 1606671872.0,
+            "81": 1606671872.0,
+            "82": 1606671872.0,
+            "83": 1606671872.0,
+            "84": 1606671872.0,
+            "85": 1606671872.0,
+            "86": 1606671872.0,
+            "87": 1606671872.0,
+            "88": 1606671872.0,
+            "89": 1606671872.0,
+            "90": 1606671872.0,
+            "91": 1606671872.0,
+            "92": 1606671872.0,
+            "93": 1606671872.0,
+            "94": 1606671872.0,
+            "95": 1606671872.0,
+            "96": 1606671872.0,
+            "97": 1606671872.0,
+            "98": 1606671872.0,
+            "99": 1606671872.0,
+            "100": 1606671872.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 9.42109,
+            "52": 0.15643,
+            "53": 0.13049,
+            "54": 0.13624,
+            "55": 0.13521,
+            "56": 0.13263,
+            "57": 0.13088,
+            "58": 0.13077,
+            "59": 0.13083,
+            "60": 0.13167,
+            "61": 0.13236,
+            "62": 0.1318,
+            "63": 0.1298,
+            "64": 0.12659,
+            "65": 0.13241,
+            "66": 0.13279,
+            "67": 0.13136,
+            "68": 0.13156,
+            "69": 0.13048,
+            "70": 0.13134,
+            "71": 0.1306,
+            "72": 0.13073,
+            "73": 0.13104,
+            "74": 0.1307,
+            "75": 0.12918,
+            "76": 0.13046,
+            "77": 0.12748,
+            "78": 0.12438,
+            "79": 0.12456,
+            "80": 0.12401,
+            "81": 0.12459,
+            "82": 0.12524,
+            "83": 0.12443,
+            "84": 0.12519,
+            "85": 0.12459,
+            "86": 0.12453,
+            "87": 0.12733,
+            "88": 0.12682,
+            "89": 0.12512,
+            "90": 0.12406,
+            "91": 0.12452,
+            "92": 0.12425,
+            "93": 0.12737,
+            "94": 0.12561,
+            "95": 0.12766,
+            "96": 0.12743,
+            "97": 0.12696,
+            "98": 0.12713,
+            "99": 0.12566,
+            "100": 0.12444
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_lts_dgx_a100.json
index 16e4a038563..29bb4241810 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_lts_dgx_a100.json
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 7.63807,
-            "2": 0.24295,
-            "3": 0.21281,
-            "4": 0.20931,
-            "5": 0.20554,
-            "6": 0.20827,
-            "7": 0.20618,
-            "8": 0.20701,
-            "9": 0.2077,
-            "10": 0.20875,
-            "11": 0.20704,
-            "12": 0.20735,
-            "13": 0.20734,
-            "14": 0.20659,
-            "15": 0.2071,
-            "16": 0.20766,
-            "17": 0.20579,
-            "18": 0.20511,
-            "19": 0.20563,
-            "20": 0.20589,
-            "21": 0.205,
-            "22": 0.20541,
-            "23": 0.2062,
-            "24": 0.20485,
-            "25": 0.20487,
-            "26": 0.20458,
-            "27": 0.20496,
-            "28": 0.20545,
-            "29": 0.20623,
-            "30": 0.20511,
-            "31": 0.20822,
-            "32": 0.20482,
-            "33": 0.20538,
-            "34": 0.20452,
-            "35": 0.21063,
-            "36": 0.20987,
-            "37": 0.20831,
-            "38": 0.2088,
-            "39": 0.20816,
-            "40": 0.20875,
-            "41": 0.20857,
-            "42": 0.20959,
-            "43": 0.20886,
-            "44": 0.2086,
-            "45": 0.20776,
-            "46": 0.20831,
-            "47": 0.20853,
-            "48": 0.2086,
-            "49": 0.20813,
-            "50": 0.209,
-            "51": 0.20574,
-            "52": 0.19892,
-            "53": 0.19904,
-            "54": 0.19867,
-            "55": 0.19897,
-            "56": 0.20031,
-            "57": 0.19874,
-            "58": 0.19971,
-            "59": 0.2002,
-            "60": 0.19847,
-            "61": 0.19948,
-            "62": 0.20017,
-            "63": 0.19926,
-            "64": 0.19923,
-            "65": 0.19974,
-            "66": 0.19915,
-            "67": 0.19992,
-            "68": 0.19949,
-            "69": 0.19842,
-            "70": 0.19824,
-            "71": 0.2012,
-            "72": 0.20144,
-            "73": 0.20339,
-            "74": 0.19815,
-            "75": 0.19802,
-            "76": 0.19898,
-            "77": 0.20003,
-            "78": 0.20017,
-            "79": 0.20157,
-            "80": 0.20266,
-            "81": 0.20004,
-            "82": 0.19937,
-            "83": 0.2008,
-            "84": 0.2009,
-            "85": 0.20194,
-            "86": 0.2015,
-            "87": 0.20004,
-            "88": 0.20091,
-            "89": 0.19998,
-            "90": 0.19993,
-            "91": 0.20008,
-            "92": 0.19991,
-            "93": 0.19979,
-            "94": 0.19939,
-            "95": 0.20098,
-            "96": 0.20045,
-            "97": 0.19917,
-            "98": 0.20012,
-            "99": 0.19963,
-            "100": 0.19848
+            "1": 4.68458,
+            "2": 0.34484,
+            "3": 0.20879,
+            "4": 0.19358,
+            "5": 0.20092,
+            "6": 0.20176,
+            "7": 0.19316,
+            "8": 0.19111,
+            "9": 0.1921,
+            "10": 0.19155,
+            "11": 0.1921,
+            "12": 0.19089,
+            "13": 0.19091,
+            "14": 0.19273,
+            "15": 0.19306,
+            "16": 0.19124,
+            "17": 0.19058,
+            "18": 0.19068,
+            "19": 0.1894,
+            "20": 0.1897,
+            "21": 0.18966,
+            "22": 0.19023,
+            "23": 0.191,
+            "24": 0.18993,
+            "25": 0.19096,
+            "26": 0.19035,
+            "27": 0.19016,
+            "28": 0.18918,
+            "29": 0.18955,
+            "30": 0.18937,
+            "31": 0.18938,
+            "32": 0.18928,
+            "33": 0.18984,
+            "34": 0.18904,
+            "35": 0.18964,
+            "36": 0.18935,
+            "37": 0.18986,
+            "38": 0.19014,
+            "39": 0.18982,
+            "40": 0.18988,
+            "41": 0.19,
+            "42": 0.18994,
+            "43": 0.18983,
+            "44": 0.18983,
+            "45": 0.18997,
+            "46": 0.18936,
+            "47": 0.18969,
+            "48": 0.19034,
+            "49": 0.1892,
+            "50": 0.18945,
+            "51": 0.20301,
+            "52": 0.19526,
+            "53": 0.19506,
+            "54": 0.19396,
+            "55": 0.19539,
+            "56": 0.19467,
+            "57": 0.19181,
+            "58": 0.18922,
+            "59": 0.19013,
+            "60": 0.19039,
+            "61": 0.1891,
+            "62": 0.19198,
+            "63": 0.18813,
+            "64": 0.18836,
+            "65": 0.18934,
+            "66": 0.18939,
+            "67": 0.18844,
+            "68": 0.18865,
+            "69": 0.18927,
+            "70": 0.18882,
+            "71": 0.18864,
+            "72": 0.18848,
+            "73": 0.18879,
+            "74": 0.18944,
+            "75": 0.18858,
+            "76": 0.18852,
+            "77": 0.18875,
+            "78": 0.18849,
+            "79": 0.18926,
+            "80": 0.18829,
+            "81": 0.18908,
+            "82": 0.18904,
+            "83": 0.18872,
+            "84": 0.18777,
+            "85": 0.18882,
+            "86": 0.18885,
+            "87": 0.18923,
+            "88": 0.1889,
+            "89": 0.18951,
+            "90": 0.1886,
+            "91": 0.19049,
+            "92": 0.19005,
+            "93": 0.18948,
+            "94": 0.18876,
+            "95": 0.19048,
+            "96": 0.18863,
+            "97": 0.18791,
+            "98": 0.1895,
+            "99": 0.18965,
+            "100": 0.18845
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_lts_dgx_a100_2nd.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_lts_dgx_a100_2nd.json
new file mode 100644
index 00000000000..a7ad841079e
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_lts_dgx_a100_2nd.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 9.84971,
+            "52": 9.74156,
+            "53": 10.06322,
+            "54": 9.94581,
+            "55": 9.87731,
+            "56": 9.62746,
+            "57": 9.47259,
+            "58": 9.82912,
+            "59": 9.583,
+            "60": 9.49181,
+            "61": 9.69961,
+            "62": 9.98089,
+            "63": 9.37212,
+            "64": 9.7756,
+            "65": 8.9433,
+            "66": 9.69993,
+            "67": 9.36414,
+            "68": 9.78706,
+            "69": 9.78397,
+            "70": 9.72288,
+            "71": 9.60749,
+            "72": 9.58416,
+            "73": 9.49093,
+            "74": 8.94864,
+            "75": 9.41807,
+            "76": 9.08721,
+            "77": 10.06283,
+            "78": 9.729,
+            "79": 9.37091,
+            "80": 9.40033,
+            "81": 9.47754,
+            "82": 9.69121,
+            "83": 9.30762,
+            "84": 9.41252,
+            "85": 9.61132,
+            "86": 9.07621,
+            "87": 9.59459,
+            "88": 9.74768,
+            "89": 9.6068,
+            "90": 9.81078,
+            "91": 9.34441,
+            "92": 9.36535,
+            "93": 9.07743,
+            "94": 8.82975,
+            "95": 9.51676,
+            "96": 9.52546,
+            "97": 9.31031,
+            "98": 9.67812,
+            "99": 8.88848,
+            "100": 9.40128
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 2735.0,
+            "52": 2607.0,
+            "53": 2951.0,
+            "54": 2672.0,
+            "55": 2451.0,
+            "56": 2712.0,
+            "57": 2392.0,
+            "58": 2979.0,
+            "59": 2869.0,
+            "60": 2435.0,
+            "61": 2938.0,
+            "62": 2669.0,
+            "63": 2392.0,
+            "64": 2998.0,
+            "65": 2689.0,
+            "66": 3285.0,
+            "67": 2782.0,
+            "68": 2753.0,
+            "69": 2958.0,
+            "70": 3271.0,
+            "71": 3040.0,
+            "72": 2504.0,
+            "73": 3096.0,
+            "74": 1910.0,
+            "75": 2617.0,
+            "76": 3081.0,
+            "77": 3390.0,
+            "78": 3186.0,
+            "79": 3320.0,
+            "80": 3483.0,
+            "81": 3782.0,
+            "82": 3516.0,
+            "83": 2864.0,
+            "84": 3396.0,
+            "85": 3247.0,
+            "86": 2785.0,
+            "87": 3762.0,
+            "88": 3102.0,
+            "89": 3483.0,
+            "90": 3076.0,
+            "91": 2643.0,
+            "92": 3198.0,
+            "93": 2666.0,
+            "94": 3390.0,
+            "95": 3410.0,
+            "96": 3508.0,
+            "97": 3178.0,
+            "98": 3865.0,
+            "99": 3143.0,
+            "100": 3357.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 487621120.0,
+            "52": 487621120.0,
+            "53": 487621120.0,
+            "54": 487621120.0,
+            "55": 487621120.0,
+            "56": 487621120.0,
+            "57": 487621120.0,
+            "58": 487621120.0,
+            "59": 487621120.0,
+            "60": 487621120.0,
+            "61": 487621120.0,
+            "62": 487621120.0,
+            "63": 487621120.0,
+            "64": 487621120.0,
+            "65": 487621120.0,
+            "66": 487621120.0,
+            "67": 487621120.0,
+            "68": 487621120.0,
+            "69": 487621120.0,
+            "70": 487621120.0,
+            "71": 487621120.0,
+            "72": 487621120.0,
+            "73": 487621120.0,
+            "74": 487621120.0,
+            "75": 487621120.0,
+            "76": 487621120.0,
+            "77": 487621120.0,
+            "78": 487621120.0,
+            "79": 487621120.0,
+            "80": 487621120.0,
+            "81": 487621120.0,
+            "82": 487621120.0,
+            "83": 487621120.0,
+            "84": 487621120.0,
+            "85": 487621120.0,
+            "86": 487621120.0,
+            "87": 487621120.0,
+            "88": 487621120.0,
+            "89": 487621120.0,
+            "90": 487621120.0,
+            "91": 487621120.0,
+            "92": 487621120.0,
+            "93": 487621120.0,
+            "94": 487621120.0,
+            "95": 487621120.0,
+            "96": 487621120.0,
+            "97": 487621120.0,
+            "98": 487621120.0,
+            "99": 487621120.0,
+            "100": 487621120.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 1412441600.0,
+            "52": 1412442624.0,
+            "53": 1412442624.0,
+            "54": 1412442624.0,
+            "55": 1412442624.0,
+            "56": 1412442624.0,
+            "57": 1412442624.0,
+            "58": 1412442624.0,
+            "59": 1412442624.0,
+            "60": 1412442624.0,
+            "61": 1412442624.0,
+            "62": 1412442624.0,
+            "63": 1412442624.0,
+            "64": 1412442624.0,
+            "65": 1412442624.0,
+            "66": 1412442624.0,
+            "67": 1412442624.0,
+            "68": 1412442624.0,
+            "69": 1412442624.0,
+            "70": 1412442624.0,
+            "71": 1412442624.0,
+            "72": 1412442624.0,
+            "73": 1412442624.0,
+            "74": 1412442624.0,
+            "75": 1412442624.0,
+            "76": 1412442624.0,
+            "77": 1412442624.0,
+            "78": 1412442624.0,
+            "79": 1412442624.0,
+            "80": 1412442624.0,
+            "81": 1412442624.0,
+            "82": 1412442624.0,
+            "83": 1412442624.0,
+            "84": 1412442624.0,
+            "85": 1412442624.0,
+            "86": 1412442624.0,
+            "87": 1412442624.0,
+            "88": 1412442624.0,
+            "89": 1412442624.0,
+            "90": 1412442624.0,
+            "91": 1412442624.0,
+            "92": 1412442624.0,
+            "93": 1412442624.0,
+            "94": 1412442624.0,
+            "95": 1412442624.0,
+            "96": 1412442624.0,
+            "97": 1412442624.0,
+            "98": 1412442624.0,
+            "99": 1412442624.0,
+            "100": 1412442624.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 3.94248,
+            "52": 0.22763,
+            "53": 0.2042,
+            "54": 0.20275,
+            "55": 0.19946,
+            "56": 0.19904,
+            "57": 0.19835,
+            "58": 0.19899,
+            "59": 0.19773,
+            "60": 0.1984,
+            "61": 0.19823,
+            "62": 0.19759,
+            "63": 0.19781,
+            "64": 0.19644,
+            "65": 0.19746,
+            "66": 0.19818,
+            "67": 0.19673,
+            "68": 0.19692,
+            "69": 0.19752,
+            "70": 0.19608,
+            "71": 0.19615,
+            "72": 0.19651,
+            "73": 0.19666,
+            "74": 0.1968,
+            "75": 0.19633,
+            "76": 0.19633,
+            "77": 0.19638,
+            "78": 0.19631,
+            "79": 0.19652,
+            "80": 0.19633,
+            "81": 0.19737,
+            "82": 0.19691,
+            "83": 0.19652,
+            "84": 0.1968,
+            "85": 0.19796,
+            "86": 0.19783,
+            "87": 0.19656,
+            "88": 0.19754,
+            "89": 0.19687,
+            "90": 0.19705,
+            "91": 0.19684,
+            "92": 0.19665,
+            "93": 0.19712,
+            "94": 0.19703,
+            "95": 0.19667,
+            "96": 0.1973,
+            "97": 0.19754,
+            "98": 0.19757,
+            "99": 0.1962,
+            "100": 0.19706
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgx_gb200.json
new file mode 100644
index 00000000000..500fc1be7cf
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgx_gb200.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.86244,
+            "2": 10.88582,
+            "3": 10.84735,
+            "4": 10.85573,
+            "5": 10.86001,
+            "6": 10.87731,
+            "7": 10.86558,
+            "8": 10.84914,
+            "9": 10.86606,
+            "10": 10.82476,
+            "11": 10.85615,
+            "12": 10.85374,
+            "13": 10.8679,
+            "14": 10.87118,
+            "15": 10.82236,
+            "16": 10.79992,
+            "17": 10.77431,
+            "18": 10.78349,
+            "19": 10.79309,
+            "20": 10.68226,
+            "21": 10.64711,
+            "22": 10.5092,
+            "23": 10.66829,
+            "24": 10.54196,
+            "25": 10.49278,
+            "26": 10.55935,
+            "27": 10.54234,
+            "28": 10.5113,
+            "29": 10.53259,
+            "30": 10.28989,
+            "31": 10.0285,
+            "32": 10.38878,
+            "33": 10.39596,
+            "34": 10.13451,
+            "35": 10.18928,
+            "36": 10.13355,
+            "37": 10.2738,
+            "38": 10.10751,
+            "39": 10.3401,
+            "40": 9.98543,
+            "41": 10.06416,
+            "42": 10.13751,
+            "43": 9.73383,
+            "44": 9.86311,
+            "45": 9.73722,
+            "46": 9.71346,
+            "47": 10.07754,
+            "48": 9.76768,
+            "49": 9.41986,
+            "50": 9.81686,
+            "51": 9.77423,
+            "52": 9.66446,
+            "53": 10.00148,
+            "54": 9.89157,
+            "55": 9.8185,
+            "56": 9.54335,
+            "57": 9.39451,
+            "58": 9.76569,
+            "59": 9.50934,
+            "60": 9.42824,
+            "61": 9.63468,
+            "62": 9.93888,
+            "63": 9.30458,
+            "64": 9.70984,
+            "65": 8.86892,
+            "66": 9.64956,
+            "67": 9.30818,
+            "68": 9.73508,
+            "69": 9.75593,
+            "70": 9.68707,
+            "71": 9.57532,
+            "72": 9.53074,
+            "73": 9.43675,
+            "74": 8.85588,
+            "75": 9.35531,
+            "76": 9.01375,
+            "77": 10.0245,
+            "78": 9.68203,
+            "79": 9.33141,
+            "80": 9.35466,
+            "81": 9.43622,
+            "82": 9.65854,
+            "83": 9.26268,
+            "84": 9.3692,
+            "85": 9.57098,
+            "86": 9.03323,
+            "87": 9.55969,
+            "88": 9.71078,
+            "89": 9.5541,
+            "90": 9.78662,
+            "91": 9.2909,
+            "92": 9.31236,
+            "93": 9.03976,
+            "94": 8.78109,
+            "95": 9.49172,
+            "96": 9.49067,
+            "97": 9.25826,
+            "98": 9.62998,
+            "99": 8.84685,
+            "100": 9.36201
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 571.0,
+            "2": 604.0,
+            "3": 655.0,
+            "4": 633.0,
+            "5": 602.0,
+            "6": 699.0,
+            "7": 585.0,
+            "8": 589.0,
+            "9": 633.0,
+            "10": 527.0,
+            "11": 628.0,
+            "12": 602.0,
+            "13": 671.0,
+            "14": 627.0,
+            "15": 591.0,
+            "16": 605.0,
+            "17": 666.0,
+            "18": 604.0,
+            "19": 631.0,
+            "20": 604.0,
+            "21": 706.0,
+            "22": 598.0,
+            "23": 682.0,
+            "24": 656.0,
+            "25": 593.0,
+            "26": 615.0,
+            "27": 681.0,
+            "28": 693.0,
+            "29": 701.0,
+            "30": 699.0,
+            "31": 564.0,
+            "32": 781.0,
+            "33": 724.0,
+            "34": 679.0,
+            "35": 711.0,
+            "36": 733.0,
+            "37": 858.0,
+            "38": 794.0,
+            "39": 789.0,
+            "40": 857.0,
+            "41": 739.0,
+            "42": 856.0,
+            "43": 742.0,
+            "44": 798.0,
+            "45": 772.0,
+            "46": 872.0,
+            "47": 941.0,
+            "48": 838.0,
+            "49": 799.0,
+            "50": 840.0,
+            "51": 961.0,
+            "52": 952.0,
+            "53": 1057.0,
+            "54": 932.0,
+            "55": 849.0,
+            "56": 986.0,
+            "57": 853.0,
+            "58": 963.0,
+            "59": 1059.0,
+            "60": 895.0,
+            "61": 999.0,
+            "62": 967.0,
+            "63": 928.0,
+            "64": 1046.0,
+            "65": 974.0,
+            "66": 998.0,
+            "67": 1078.0,
+            "68": 987.0,
+            "69": 976.0,
+            "70": 1112.0,
+            "71": 1031.0,
+            "72": 889.0,
+            "73": 1009.0,
+            "74": 778.0,
+            "75": 839.0,
+            "76": 1017.0,
+            "77": 1069.0,
+            "78": 1111.0,
+            "79": 1041.0,
+            "80": 1089.0,
+            "81": 1169.0,
+            "82": 1034.0,
+            "83": 951.0,
+            "84": 1098.0,
+            "85": 1124.0,
+            "86": 816.0,
+            "87": 1218.0,
+            "88": 1128.0,
+            "89": 1147.0,
+            "90": 1130.0,
+            "91": 1096.0,
+            "92": 1132.0,
+            "93": 900.0,
+            "94": 1119.0,
+            "95": 1095.0,
+            "96": 1160.0,
+            "97": 1006.0,
+            "98": 1240.0,
+            "99": 1141.0,
+            "100": 1108.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 459571712.0,
+            "2": 459571712.0,
+            "3": 459571712.0,
+            "4": 459571712.0,
+            "5": 459571712.0,
+            "6": 459571712.0,
+            "7": 459571712.0,
+            "8": 459571712.0,
+            "9": 459571712.0,
+            "10": 459571712.0,
+            "11": 459571712.0,
+            "12": 459571712.0,
+            "13": 459571712.0,
+            "14": 459571712.0,
+            "15": 459571712.0,
+            "16": 459571712.0,
+            "17": 459571712.0,
+            "18": 459571712.0,
+            "19": 459571712.0,
+            "20": 459571712.0,
+            "21": 459571712.0,
+            "22": 459571712.0,
+            "23": 459571712.0,
+            "24": 459571712.0,
+            "25": 459571712.0,
+            "26": 459571712.0,
+            "27": 459571712.0,
+            "28": 459571712.0,
+            "29": 459571712.0,
+            "30": 459571712.0,
+            "31": 459571712.0,
+            "32": 459571712.0,
+            "33": 459571712.0,
+            "34": 459571712.0,
+            "35": 459571712.0,
+            "36": 459571712.0,
+            "37": 459571712.0,
+            "38": 459571712.0,
+            "39": 459571712.0,
+            "40": 459571712.0,
+            "41": 459571712.0,
+            "42": 459571712.0,
+            "43": 459571712.0,
+            "44": 459571712.0,
+            "45": 459571712.0,
+            "46": 459571712.0,
+            "47": 459571712.0,
+            "48": 459571712.0,
+            "49": 459571712.0,
+            "50": 459571712.0,
+            "51": 459571712.0,
+            "52": 459571712.0,
+            "53": 459571712.0,
+            "54": 459571712.0,
+            "55": 459571712.0,
+            "56": 459571712.0,
+            "57": 459571712.0,
+            "58": 459571712.0,
+            "59": 459571712.0,
+            "60": 459571712.0,
+            "61": 459571712.0,
+            "62": 459571712.0,
+            "63": 459571712.0,
+            "64": 459571712.0,
+            "65": 459571712.0,
+            "66": 459571712.0,
+            "67": 459571712.0,
+            "68": 459571712.0,
+            "69": 459571712.0,
+            "70": 459571712.0,
+            "71": 459571712.0,
+            "72": 459571712.0,
+            "73": 459571712.0,
+            "74": 459571712.0,
+            "75": 459571712.0,
+            "76": 459571712.0,
+            "77": 459571712.0,
+            "78": 459571712.0,
+            "79": 459571712.0,
+            "80": 459571712.0,
+            "81": 459571712.0,
+            "82": 459571712.0,
+            "83": 459571712.0,
+            "84": 459571712.0,
+            "85": 459571712.0,
+            "86": 459571712.0,
+            "87": 459571712.0,
+            "88": 459571712.0,
+            "89": 459571712.0,
+            "90": 459571712.0,
+            "91": 459571712.0,
+            "92": 459571712.0,
+            "93": 459571712.0,
+            "94": 459571712.0,
+            "95": 459571712.0,
+            "96": 459571712.0,
+            "97": 459571712.0,
+            "98": 459571712.0,
+            "99": 459571712.0,
+            "100": 459571712.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 705635840.0,
+            "2": 883610112.0,
+            "3": 883610624.0,
+            "4": 883610624.0,
+            "5": 883610624.0,
+            "6": 884657152.0,
+            "7": 884657152.0,
+            "8": 884657152.0,
+            "9": 884657152.0,
+            "10": 884657152.0,
+            "11": 884657152.0,
+            "12": 884657152.0,
+            "13": 884657152.0,
+            "14": 884657152.0,
+            "15": 884659712.0,
+            "16": 884659712.0,
+            "17": 884659712.0,
+            "18": 884659712.0,
+            "19": 884659712.0,
+            "20": 884659712.0,
+            "21": 884659712.0,
+            "22": 884659712.0,
+            "23": 884659712.0,
+            "24": 884659712.0,
+            "25": 884659712.0,
+            "26": 884659712.0,
+            "27": 884659712.0,
+            "28": 884659712.0,
+            "29": 884659712.0,
+            "30": 884659712.0,
+            "31": 884659712.0,
+            "32": 884659712.0,
+            "33": 884659712.0,
+            "34": 884659712.0,
+            "35": 884659712.0,
+            "36": 884659712.0,
+            "37": 884659712.0,
+            "38": 884659712.0,
+            "39": 884659712.0,
+            "40": 884659712.0,
+            "41": 884659712.0,
+            "42": 884659712.0,
+            "43": 884659712.0,
+            "44": 884659712.0,
+            "45": 884659712.0,
+            "46": 884659712.0,
+            "47": 884659712.0,
+            "48": 884659712.0,
+            "49": 884659712.0,
+            "50": 884659712.0,
+            "51": 884659712.0,
+            "52": 884659712.0,
+            "53": 884659712.0,
+            "54": 884659712.0,
+            "55": 884659712.0,
+            "56": 884659712.0,
+            "57": 884659712.0,
+            "58": 884659712.0,
+            "59": 884659712.0,
+            "60": 884659712.0,
+            "61": 884659712.0,
+            "62": 884659712.0,
+            "63": 884659712.0,
+            "64": 884659712.0,
+            "65": 884659712.0,
+            "66": 884659712.0,
+            "67": 884659712.0,
+            "68": 884659712.0,
+            "69": 884659712.0,
+            "70": 884659712.0,
+            "71": 884659712.0,
+            "72": 884659712.0,
+            "73": 884659712.0,
+            "74": 884659712.0,
+            "75": 884659712.0,
+            "76": 884659712.0,
+            "77": 884659712.0,
+            "78": 884659712.0,
+            "79": 884659712.0,
+            "80": 884659712.0,
+            "81": 884659712.0,
+            "82": 884659712.0,
+            "83": 884659712.0,
+            "84": 884659712.0,
+            "85": 884659712.0,
+            "86": 884659712.0,
+            "87": 884659712.0,
+            "88": 884659712.0,
+            "89": 884659712.0,
+            "90": 884659712.0,
+            "91": 884659712.0,
+            "92": 884659712.0,
+            "93": 884659712.0,
+            "94": 884659712.0,
+            "95": 884659712.0,
+            "96": 884659712.0,
+            "97": 884659712.0,
+            "98": 884659712.0,
+            "99": 884659712.0,
+            "100": 884659712.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 13.71622,
+            "2": 0.68805,
+            "3": 0.57225,
+            "4": 0.54755,
+            "5": 0.48793,
+            "6": 0.52239,
+            "7": 0.49126,
+            "8": 0.49498,
+            "9": 0.58476,
+            "10": 0.4973,
+            "11": 0.49619,
+            "12": 0.49824,
+            "13": 0.49835,
+            "14": 0.49548,
+            "15": 0.49404,
+            "16": 0.50855,
+            "17": 0.76935,
+            "18": 0.49519,
+            "19": 0.49579,
+            "20": 0.50812,
+            "21": 0.50221,
+            "22": 0.49623,
+            "23": 0.50166,
+            "24": 0.4965,
+            "25": 0.49653,
+            "26": 0.56522,
+            "27": 0.50204,
+            "28": 0.4912,
+            "29": 0.49165,
+            "30": 0.49253,
+            "31": 0.48561,
+            "32": 0.50414,
+            "33": 0.49461,
+            "34": 0.48721,
+            "35": 0.50775,
+            "36": 0.5025,
+            "37": 0.49896,
+            "38": 0.50015,
+            "39": 0.50322,
+            "40": 0.51086,
+            "41": 0.51074,
+            "42": 0.49461,
+            "43": 0.5049,
+            "44": 0.47567,
+            "45": 0.51176,
+            "46": 0.51628,
+            "47": 0.50424,
+            "48": 0.50299,
+            "49": 0.50456,
+            "50": 0.51299,
+            "51": 0.50546,
+            "52": 0.48547,
+            "53": 0.48643,
+            "54": 0.49187,
+            "55": 0.50244,
+            "56": 0.5003,
+            "57": 0.49723,
+            "58": 0.5007,
+            "59": 0.50341,
+            "60": 0.49703,
+            "61": 0.49913,
+            "62": 0.48748,
+            "63": 0.52659,
+            "64": 0.49384,
+            "65": 0.48632,
+            "66": 0.49435,
+            "67": 0.49537,
+            "68": 0.49543,
+            "69": 0.48543,
+            "70": 0.49128,
+            "71": 0.49386,
+            "72": 0.49681,
+            "73": 0.49076,
+            "74": 0.50662,
+            "75": 0.51506,
+            "76": 0.51539,
+            "77": 0.51263,
+            "78": 0.51094,
+            "79": 0.50786,
+            "80": 0.85887,
+            "81": 0.51151,
+            "82": 0.50586,
+            "83": 0.51628,
+            "84": 0.48942,
+            "85": 0.50794,
+            "86": 0.45205,
+            "87": 0.51667,
+            "88": 0.52246,
+            "89": 0.51352,
+            "90": 0.48616,
+            "91": 0.51165,
+            "92": 0.52646,
+            "93": 0.52475,
+            "94": 0.50978,
+            "95": 0.50426,
+            "96": 0.50587,
+            "97": 0.52063,
+            "98": 0.52056,
+            "99": 0.50217,
+            "100": 0.50666
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgx_h100.json
index c677311f507..990bbe865d6 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgx_h100.json
@@ -4,106 +4,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 10.86535,
-            "2": 10.85873,
-            "3": 10.86285,
-            "4": 10.84007,
-            "5": 10.87856,
-            "6": 10.88856,
+            "1": 10.86539,
+            "2": 10.85871,
+            "3": 10.86283,
+            "4": 10.84009,
+            "5": 10.87851,
+            "6": 10.88849,
             "7": 10.86538,
-            "8": 10.86017,
-            "9": 10.85991,
-            "10": 10.8298,
+            "8": 10.86018,
+            "9": 10.8599,
+            "10": 10.82981,
             "11": 10.88947,
-            "12": 10.87508,
-            "13": 10.87422,
-            "14": 10.89677,
-            "15": 10.8205,
-            "16": 10.82499,
-            "17": 10.78984,
-            "18": 10.81029,
-            "19": 10.80536,
-            "20": 10.70396,
-            "21": 10.6699,
-            "22": 10.50644,
-            "23": 10.69003,
-            "24": 10.5631,
+            "12": 10.87505,
+            "13": 10.87426,
+            "14": 10.89675,
+            "15": 10.82051,
+            "16": 10.82497,
+            "17": 10.78982,
+            "18": 10.81028,
+            "19": 10.80533,
+            "20": 10.70395,
+            "21": 10.66991,
+            "22": 10.50641,
+            "23": 10.69006,
+            "24": 10.56313,
             "25": 10.49417,
-            "26": 10.56624,
-            "27": 10.58026,
-            "28": 10.51571,
-            "29": 10.553,
-            "30": 10.30552,
-            "31": 10.02249,
-            "32": 10.40613,
-            "33": 10.3988,
-            "34": 10.13771,
-            "35": 10.20186,
-            "36": 10.16052,
-            "37": 10.28975,
-            "38": 10.1148,
-            "39": 10.36102,
-            "40": 10.01904,
-            "41": 10.07292,
-            "42": 10.14696,
-            "43": 9.74683,
-            "44": 9.87763,
-            "45": 9.74966,
-            "46": 9.73387,
-            "47": 10.07534,
+            "26": 10.56627,
+            "27": 10.58021,
+            "28": 10.51572,
+            "29": 10.55296,
+            "30": 10.3055,
+            "31": 10.02245,
+            "32": 10.40616,
+            "33": 10.39874,
+            "34": 10.13773,
+            "35": 10.20185,
+            "36": 10.16056,
+            "37": 10.28972,
+            "38": 10.11479,
+            "39": 10.36099,
+            "40": 10.01899,
+            "41": 10.07293,
+            "42": 10.14693,
+            "43": 9.74686,
+            "44": 9.87761,
+            "45": 9.74968,
+            "46": 9.73385,
+            "47": 10.07539,
             "48": 9.78069,
-            "49": 9.4478,
-            "50": 9.83991,
-            "51": 9.78025,
-            "52": 9.67263,
-            "53": 10.0201,
-            "54": 9.89789,
-            "55": 9.81664,
-            "56": 9.56044,
-            "57": 9.41178,
-            "58": 9.77419,
-            "59": 9.51794,
-            "60": 9.43538,
-            "61": 9.64484,
+            "49": 9.44781,
+            "50": 9.83993,
+            "51": 9.78026,
+            "52": 9.67268,
+            "53": 10.02014,
+            "54": 9.89787,
+            "55": 9.81661,
+            "56": 9.56042,
+            "57": 9.41177,
+            "58": 9.77417,
+            "59": 9.51799,
+            "60": 9.43536,
+            "61": 9.64482,
             "62": 9.93004,
-            "63": 9.30911,
-            "64": 9.72068,
-            "65": 8.87154,
-            "66": 9.64427,
+            "63": 9.3091,
+            "64": 9.72065,
+            "65": 8.87152,
+            "66": 9.64429,
             "67": 9.31328,
             "68": 9.74067,
-            "69": 9.75334,
+            "69": 9.75333,
             "70": 9.70004,
-            "71": 9.56556,
-            "72": 9.53094,
-            "73": 9.44386,
-            "74": 8.86782,
-            "75": 9.37314,
-            "76": 9.01274,
-            "77": 10.02855,
+            "71": 9.5656,
+            "72": 9.53096,
+            "73": 9.44383,
+            "74": 8.86781,
+            "75": 9.3731,
+            "76": 9.01276,
+            "77": 10.02858,
             "78": 9.68739,
-            "79": 9.328,
-            "80": 9.36168,
-            "81": 9.43367,
+            "79": 9.32798,
+            "80": 9.36164,
+            "81": 9.43365,
             "82": 9.66094,
-            "83": 9.25139,
-            "84": 9.37352,
-            "85": 9.56939,
+            "83": 9.25142,
+            "84": 9.37355,
+            "85": 9.56941,
             "86": 9.03181,
             "87": 9.55584,
-            "88": 9.71055,
-            "89": 9.55395,
-            "90": 9.78475,
-            "91": 9.29077,
-            "92": 9.31245,
-            "93": 9.03142,
-            "94": 8.78671,
-            "95": 9.4873,
-            "96": 9.49052,
-            "97": 9.26684,
-            "98": 9.63648,
-            "99": 8.84333,
-            "100": 9.35549
+            "88": 9.71056,
+            "89": 9.55398,
+            "90": 9.78471,
+            "91": 9.29078,
+            "92": 9.31244,
+            "93": 9.03139,
+            "94": 8.78668,
+            "95": 9.48732,
+            "96": 9.4905,
+            "97": 9.26686,
+            "98": 9.63647,
+            "99": 8.84336,
+            "100": 9.35551
         }
     },
     "num-zeros": {
@@ -111,106 +111,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 585.0,
-            "2": 648.0,
-            "3": 630.0,
-            "4": 656.0,
-            "5": 620.0,
-            "6": 637.0,
-            "7": 641.0,
-            "8": 581.0,
-            "9": 660.0,
-            "10": 504.0,
-            "11": 664.0,
-            "12": 639.0,
-            "13": 670.0,
-            "14": 666.0,
-            "15": 652.0,
-            "16": 624.0,
-            "17": 704.0,
-            "18": 579.0,
-            "19": 682.0,
-            "20": 623.0,
-            "21": 657.0,
-            "22": 561.0,
-            "23": 763.0,
-            "24": 593.0,
-            "25": 629.0,
-            "26": 669.0,
-            "27": 691.0,
-            "28": 738.0,
-            "29": 788.0,
-            "30": 744.0,
-            "31": 604.0,
-            "32": 736.0,
-            "33": 787.0,
-            "34": 706.0,
-            "35": 692.0,
-            "36": 714.0,
-            "37": 835.0,
-            "38": 768.0,
-            "39": 894.0,
-            "40": 764.0,
-            "41": 852.0,
-            "42": 878.0,
-            "43": 733.0,
-            "44": 827.0,
-            "45": 785.0,
-            "46": 877.0,
-            "47": 927.0,
-            "48": 873.0,
-            "49": 891.0,
-            "50": 869.0,
-            "51": 928.0,
-            "52": 968.0,
-            "53": 1089.0,
-            "54": 966.0,
-            "55": 913.0,
-            "56": 983.0,
-            "57": 889.0,
-            "58": 1063.0,
-            "59": 1005.0,
-            "60": 876.0,
-            "61": 1043.0,
-            "62": 897.0,
-            "63": 971.0,
-            "64": 1100.0,
-            "65": 911.0,
-            "66": 1107.0,
-            "67": 948.0,
-            "68": 1033.0,
-            "69": 1064.0,
-            "70": 1118.0,
-            "71": 1032.0,
-            "72": 854.0,
-            "73": 1007.0,
-            "74": 739.0,
-            "75": 877.0,
-            "76": 1075.0,
-            "77": 1108.0,
-            "78": 1103.0,
-            "79": 980.0,
-            "80": 1055.0,
-            "81": 1240.0,
-            "82": 1101.0,
-            "83": 1007.0,
-            "84": 1147.0,
-            "85": 1157.0,
-            "86": 897.0,
-            "87": 1247.0,
-            "88": 1015.0,
-            "89": 1155.0,
-            "90": 1138.0,
-            "91": 1141.0,
-            "92": 1142.0,
-            "93": 947.0,
-            "94": 1116.0,
-            "95": 1119.0,
-            "96": 1099.0,
-            "97": 997.0,
-            "98": 1188.0,
-            "99": 1141.0,
-            "100": 1102.0
+            "1": 597.0,
+            "2": 647.0,
+            "3": 637.0,
+            "4": 610.0,
+            "5": 635.0,
+            "6": 696.0,
+            "7": 660.0,
+            "8": 563.0,
+            "9": 609.0,
+            "10": 515.0,
+            "11": 716.0,
+            "12": 570.0,
+            "13": 661.0,
+            "14": 668.0,
+            "15": 654.0,
+            "16": 630.0,
+            "17": 671.0,
+            "18": 624.0,
+            "19": 624.0,
+            "20": 615.0,
+            "21": 655.0,
+            "22": 563.0,
+            "23": 719.0,
+            "24": 632.0,
+            "25": 605.0,
+            "26": 613.0,
+            "27": 655.0,
+            "28": 690.0,
+            "29": 769.0,
+            "30": 655.0,
+            "31": 602.0,
+            "32": 721.0,
+            "33": 800.0,
+            "34": 727.0,
+            "35": 739.0,
+            "36": 722.0,
+            "37": 792.0,
+            "38": 721.0,
+            "39": 793.0,
+            "40": 758.0,
+            "41": 868.0,
+            "42": 813.0,
+            "43": 761.0,
+            "44": 836.0,
+            "45": 803.0,
+            "46": 809.0,
+            "47": 881.0,
+            "48": 849.0,
+            "49": 868.0,
+            "50": 856.0,
+            "51": 923.0,
+            "52": 936.0,
+            "53": 1031.0,
+            "54": 967.0,
+            "55": 838.0,
+            "56": 1001.0,
+            "57": 887.0,
+            "58": 1072.0,
+            "59": 1004.0,
+            "60": 898.0,
+            "61": 1016.0,
+            "62": 912.0,
+            "63": 903.0,
+            "64": 998.0,
+            "65": 943.0,
+            "66": 1132.0,
+            "67": 967.0,
+            "68": 998.0,
+            "69": 1028.0,
+            "70": 1034.0,
+            "71": 1084.0,
+            "72": 889.0,
+            "73": 1054.0,
+            "74": 685.0,
+            "75": 899.0,
+            "76": 1042.0,
+            "77": 1171.0,
+            "78": 1099.0,
+            "79": 1026.0,
+            "80": 1139.0,
+            "81": 1262.0,
+            "82": 1077.0,
+            "83": 982.0,
+            "84": 1080.0,
+            "85": 1114.0,
+            "86": 813.0,
+            "87": 1191.0,
+            "88": 1075.0,
+            "89": 1091.0,
+            "90": 1079.0,
+            "91": 1094.0,
+            "92": 1132.0,
+            "93": 983.0,
+            "94": 1160.0,
+            "95": 1117.0,
+            "96": 1186.0,
+            "97": 1031.0,
+            "98": 1215.0,
+            "99": 1185.0,
+            "100": 1147.0
         }
     },
     "mem-allocated-bytes": {
@@ -218,106 +218,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 510689792.0,
-            "2": 510689792.0,
-            "3": 510689792.0,
-            "4": 510689792.0,
-            "5": 510689792.0,
-            "6": 510689792.0,
-            "7": 510689792.0,
-            "8": 510689792.0,
-            "9": 510689792.0,
-            "10": 510689792.0,
-            "11": 510689792.0,
-            "12": 510689792.0,
-            "13": 510689792.0,
-            "14": 510689792.0,
-            "15": 510689792.0,
-            "16": 510689792.0,
-            "17": 510689792.0,
-            "18": 510689792.0,
-            "19": 510689792.0,
-            "20": 510689792.0,
-            "21": 510689792.0,
-            "22": 510689792.0,
-            "23": 510689792.0,
-            "24": 510689792.0,
-            "25": 510689792.0,
-            "26": 510689792.0,
-            "27": 510689792.0,
-            "28": 510689792.0,
-            "29": 510689792.0,
-            "30": 510689792.0,
-            "31": 510689792.0,
-            "32": 510689792.0,
-            "33": 510689792.0,
-            "34": 510689792.0,
-            "35": 510689792.0,
-            "36": 510689792.0,
-            "37": 510689792.0,
-            "38": 510689792.0,
-            "39": 510689792.0,
-            "40": 510689792.0,
-            "41": 510689792.0,
-            "42": 510689792.0,
-            "43": 510689792.0,
-            "44": 510689792.0,
-            "45": 510689792.0,
-            "46": 510689792.0,
-            "47": 510689792.0,
-            "48": 510689792.0,
-            "49": 510689792.0,
-            "50": 510689792.0,
-            "51": 510689792.0,
-            "52": 510689792.0,
-            "53": 510689792.0,
-            "54": 510689792.0,
-            "55": 510689792.0,
-            "56": 510689792.0,
-            "57": 510689792.0,
-            "58": 510689792.0,
-            "59": 510689792.0,
-            "60": 510689792.0,
-            "61": 510689792.0,
-            "62": 510689792.0,
-            "63": 510689792.0,
-            "64": 510689792.0,
-            "65": 510689792.0,
-            "66": 510689792.0,
-            "67": 510689792.0,
-            "68": 510689792.0,
-            "69": 510689792.0,
-            "70": 510689792.0,
-            "71": 510689792.0,
-            "72": 510689792.0,
-            "73": 510689792.0,
-            "74": 510689792.0,
-            "75": 510689792.0,
-            "76": 510689792.0,
-            "77": 510689792.0,
-            "78": 510689792.0,
-            "79": 510689792.0,
-            "80": 510689792.0,
-            "81": 510689792.0,
-            "82": 510689792.0,
-            "83": 510689792.0,
-            "84": 510689792.0,
-            "85": 510689792.0,
-            "86": 510689792.0,
-            "87": 510689792.0,
-            "88": 510689792.0,
-            "89": 510689792.0,
-            "90": 510689792.0,
-            "91": 510689792.0,
-            "92": 510689792.0,
-            "93": 510689792.0,
-            "94": 510689792.0,
-            "95": 510689792.0,
-            "96": 510689792.0,
-            "97": 510689792.0,
-            "98": 510689792.0,
-            "99": 510689792.0,
-            "100": 510689792.0
+            "1": 512786944.0,
+            "2": 512786944.0,
+            "3": 512786944.0,
+            "4": 512786944.0,
+            "5": 512786944.0,
+            "6": 512786944.0,
+            "7": 512786944.0,
+            "8": 512786944.0,
+            "9": 512786944.0,
+            "10": 512786944.0,
+            "11": 512786944.0,
+            "12": 512786944.0,
+            "13": 512786944.0,
+            "14": 512786944.0,
+            "15": 512786944.0,
+            "16": 512786944.0,
+            "17": 512786944.0,
+            "18": 512786944.0,
+            "19": 512786944.0,
+            "20": 512786944.0,
+            "21": 512786944.0,
+            "22": 512786944.0,
+            "23": 512786944.0,
+            "24": 512786944.0,
+            "25": 512786944.0,
+            "26": 512786944.0,
+            "27": 512786944.0,
+            "28": 512786944.0,
+            "29": 512786944.0,
+            "30": 512786944.0,
+            "31": 512786944.0,
+            "32": 512786944.0,
+            "33": 512786944.0,
+            "34": 512786944.0,
+            "35": 512786944.0,
+            "36": 512786944.0,
+            "37": 512786944.0,
+            "38": 512786944.0,
+            "39": 512786944.0,
+            "40": 512786944.0,
+            "41": 512786944.0,
+            "42": 512786944.0,
+            "43": 512786944.0,
+            "44": 512786944.0,
+            "45": 512786944.0,
+            "46": 512786944.0,
+            "47": 512786944.0,
+            "48": 512786944.0,
+            "49": 512786944.0,
+            "50": 512786944.0,
+            "51": 512786944.0,
+            "52": 512786944.0,
+            "53": 512786944.0,
+            "54": 512786944.0,
+            "55": 512786944.0,
+            "56": 512786944.0,
+            "57": 512786944.0,
+            "58": 512786944.0,
+            "59": 512786944.0,
+            "60": 512786944.0,
+            "61": 512786944.0,
+            "62": 512786944.0,
+            "63": 512786944.0,
+            "64": 512786944.0,
+            "65": 512786944.0,
+            "66": 512786944.0,
+            "67": 512786944.0,
+            "68": 512786944.0,
+            "69": 512786944.0,
+            "70": 512786944.0,
+            "71": 512786944.0,
+            "72": 512786944.0,
+            "73": 512786944.0,
+            "74": 512786944.0,
+            "75": 512786944.0,
+            "76": 512786944.0,
+            "77": 512786944.0,
+            "78": 512786944.0,
+            "79": 512786944.0,
+            "80": 512786944.0,
+            "81": 512786944.0,
+            "82": 512786944.0,
+            "83": 512786944.0,
+            "84": 512786944.0,
+            "85": 512786944.0,
+            "86": 512786944.0,
+            "87": 512786944.0,
+            "88": 512786944.0,
+            "89": 512786944.0,
+            "90": 512786944.0,
+            "91": 512786944.0,
+            "92": 512786944.0,
+            "93": 512786944.0,
+            "94": 512786944.0,
+            "95": 512786944.0,
+            "96": 512786944.0,
+            "97": 512786944.0,
+            "98": 512786944.0,
+            "99": 512786944.0,
+            "100": 512786944.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -325,106 +325,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 759895552.0,
-            "2": 933156352.0,
-            "3": 933156352.0,
-            "4": 933156352.0,
-            "5": 933156352.0,
-            "6": 933156352.0,
-            "7": 933156352.0,
-            "8": 933156352.0,
-            "9": 933156352.0,
-            "10": 933156352.0,
-            "11": 933156352.0,
-            "12": 933156352.0,
-            "13": 933156352.0,
-            "14": 933156352.0,
-            "15": 933156352.0,
-            "16": 933156352.0,
-            "17": 933156352.0,
-            "18": 933156352.0,
-            "19": 933156352.0,
-            "20": 933156352.0,
-            "21": 933156352.0,
-            "22": 933156352.0,
-            "23": 933156352.0,
-            "24": 933156352.0,
-            "25": 933156352.0,
-            "26": 933156352.0,
-            "27": 933156352.0,
-            "28": 933156352.0,
-            "29": 933156352.0,
-            "30": 933156352.0,
-            "31": 933156352.0,
-            "32": 933156352.0,
-            "33": 933156352.0,
-            "34": 933156352.0,
-            "35": 933156352.0,
-            "36": 933156352.0,
-            "37": 933156352.0,
-            "38": 933156352.0,
-            "39": 933156352.0,
-            "40": 933156352.0,
-            "41": 933156352.0,
-            "42": 933156352.0,
-            "43": 933156352.0,
-            "44": 933156352.0,
-            "45": 933156352.0,
-            "46": 933156352.0,
-            "47": 933156352.0,
-            "48": 933156352.0,
-            "49": 933156352.0,
-            "50": 933156352.0,
-            "51": 933156352.0,
-            "52": 933156352.0,
-            "53": 933156352.0,
-            "54": 933156352.0,
-            "55": 933156352.0,
-            "56": 933156352.0,
-            "57": 933156352.0,
-            "58": 933156352.0,
-            "59": 933156352.0,
-            "60": 933156352.0,
-            "61": 933156352.0,
-            "62": 933156352.0,
-            "63": 933156352.0,
-            "64": 933156352.0,
-            "65": 933156352.0,
-            "66": 933156352.0,
-            "67": 933156352.0,
-            "68": 933156352.0,
-            "69": 933156352.0,
-            "70": 933156352.0,
-            "71": 933156352.0,
-            "72": 933156352.0,
-            "73": 933156352.0,
-            "74": 933156352.0,
-            "75": 933156352.0,
-            "76": 933156352.0,
-            "77": 933156352.0,
-            "78": 933156352.0,
-            "79": 933156352.0,
-            "80": 933156352.0,
-            "81": 933156352.0,
-            "82": 933156352.0,
-            "83": 933156352.0,
-            "84": 933156352.0,
-            "85": 933156352.0,
-            "86": 933156352.0,
-            "87": 933156352.0,
-            "88": 933156352.0,
-            "89": 933156352.0,
-            "90": 933156352.0,
-            "91": 933156352.0,
-            "92": 933156352.0,
-            "93": 933156352.0,
-            "94": 933156352.0,
-            "95": 933156352.0,
-            "96": 933156352.0,
-            "97": 933156352.0,
-            "98": 933156352.0,
-            "99": 933156352.0,
-            "100": 933156352.0
+            "1": 758850560.0,
+            "2": 937349632.0,
+            "3": 937349632.0,
+            "4": 937349632.0,
+            "5": 937349632.0,
+            "6": 937349632.0,
+            "7": 937349632.0,
+            "8": 937350144.0,
+            "9": 937350144.0,
+            "10": 937350656.0,
+            "11": 937350656.0,
+            "12": 937350656.0,
+            "13": 937350656.0,
+            "14": 937350656.0,
+            "15": 937350656.0,
+            "16": 937350656.0,
+            "17": 937350656.0,
+            "18": 937350656.0,
+            "19": 937350656.0,
+            "20": 937350656.0,
+            "21": 937350656.0,
+            "22": 937350656.0,
+            "23": 937350656.0,
+            "24": 937350656.0,
+            "25": 937350656.0,
+            "26": 937350656.0,
+            "27": 937350656.0,
+            "28": 937350656.0,
+            "29": 937350656.0,
+            "30": 937350656.0,
+            "31": 937350656.0,
+            "32": 937350656.0,
+            "33": 937350656.0,
+            "34": 937350656.0,
+            "35": 937350656.0,
+            "36": 937350656.0,
+            "37": 937350656.0,
+            "38": 937350656.0,
+            "39": 937350656.0,
+            "40": 937350656.0,
+            "41": 937350656.0,
+            "42": 937350656.0,
+            "43": 937350656.0,
+            "44": 937350656.0,
+            "45": 937350656.0,
+            "46": 937350656.0,
+            "47": 937350656.0,
+            "48": 937350656.0,
+            "49": 937350656.0,
+            "50": 937350656.0,
+            "51": 937350656.0,
+            "52": 937350656.0,
+            "53": 937350656.0,
+            "54": 937350656.0,
+            "55": 937350656.0,
+            "56": 937350656.0,
+            "57": 937350656.0,
+            "58": 937350656.0,
+            "59": 937350656.0,
+            "60": 937350656.0,
+            "61": 937350656.0,
+            "62": 937350656.0,
+            "63": 937350656.0,
+            "64": 937350656.0,
+            "65": 937350656.0,
+            "66": 937350656.0,
+            "67": 937350656.0,
+            "68": 937350656.0,
+            "69": 937350656.0,
+            "70": 937350656.0,
+            "71": 937350656.0,
+            "72": 937350656.0,
+            "73": 937350656.0,
+            "74": 937350656.0,
+            "75": 937350656.0,
+            "76": 937350656.0,
+            "77": 937350656.0,
+            "78": 937350656.0,
+            "79": 937350656.0,
+            "80": 937350656.0,
+            "81": 937350656.0,
+            "82": 937350656.0,
+            "83": 937350656.0,
+            "84": 937350656.0,
+            "85": 937350656.0,
+            "86": 937350656.0,
+            "87": 937350656.0,
+            "88": 937350656.0,
+            "89": 937350656.0,
+            "90": 937350656.0,
+            "91": 937350656.0,
+            "92": 937350656.0,
+            "93": 937350656.0,
+            "94": 937350656.0,
+            "95": 937350656.0,
+            "96": 937350656.0,
+            "97": 937350656.0,
+            "98": 937350656.0,
+            "99": 937350656.0,
+            "100": 937350656.0
         }
     },
     "iteration-time": {
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 15.91944,
-            "2": 0.35854,
-            "3": 0.34422,
-            "4": 0.34655,
-            "5": 0.33791,
-            "6": 0.34327,
-            "7": 0.34394,
-            "8": 0.3383,
-            "9": 0.34058,
-            "10": 0.32396,
-            "11": 0.32631,
-            "12": 0.33064,
-            "13": 0.32832,
-            "14": 0.32645,
-            "15": 0.32686,
-            "16": 0.32351,
-            "17": 0.32796,
-            "18": 0.33094,
-            "19": 0.32865,
-            "20": 0.32722,
-            "21": 0.32666,
-            "22": 0.32679,
-            "23": 0.32717,
-            "24": 0.32824,
-            "25": 0.32793,
-            "26": 0.32517,
-            "27": 0.326,
-            "28": 0.32627,
-            "29": 0.32627,
-            "30": 0.32688,
-            "31": 0.32603,
-            "32": 0.32544,
-            "33": 0.32613,
-            "34": 0.32696,
-            "35": 0.32522,
-            "36": 0.32966,
-            "37": 0.32462,
-            "38": 0.32724,
-            "39": 0.32622,
-            "40": 0.32646,
-            "41": 0.32504,
-            "42": 0.32464,
-            "43": 0.3299,
-            "44": 0.32495,
-            "45": 0.32382,
-            "46": 0.32567,
-            "47": 0.32847,
-            "48": 0.32521,
-            "49": 0.32738,
-            "50": 0.32495,
-            "51": 0.33517,
-            "52": 0.33963,
-            "53": 0.33084,
-            "54": 0.3299,
-            "55": 0.33062,
-            "56": 0.32923,
-            "57": 0.32909,
-            "58": 0.331,
-            "59": 0.32595,
-            "60": 0.32446,
-            "61": 0.32961,
-            "62": 0.33126,
-            "63": 0.32393,
-            "64": 0.32986,
-            "65": 0.32836,
-            "66": 0.32921,
-            "67": 0.32945,
-            "68": 0.32848,
-            "69": 0.32625,
-            "70": 0.32898,
-            "71": 0.33227,
-            "72": 0.32403,
-            "73": 0.3284,
-            "74": 0.32761,
-            "75": 0.32791,
-            "76": 0.33223,
-            "77": 0.33113,
-            "78": 0.32546,
-            "79": 0.32925,
-            "80": 0.33175,
-            "81": 0.33071,
-            "82": 0.32698,
-            "83": 0.32738,
-            "84": 0.32835,
-            "85": 0.32729,
-            "86": 0.33228,
-            "87": 0.32668,
-            "88": 0.33091,
-            "89": 0.32825,
-            "90": 0.32752,
-            "91": 0.32814,
-            "92": 0.33195,
-            "93": 0.32686,
-            "94": 0.33172,
-            "95": 0.33336,
-            "96": 0.32938,
-            "97": 0.33024,
-            "98": 0.32939,
-            "99": 0.32654,
-            "100": 0.3311
+            "1": 33.75672,
+            "2": 0.32538,
+            "3": 0.30979,
+            "4": 0.29132,
+            "5": 0.28673,
+            "6": 0.29044,
+            "7": 0.28928,
+            "8": 0.28782,
+            "9": 0.28716,
+            "10": 0.29487,
+            "11": 0.28718,
+            "12": 0.28269,
+            "13": 0.28219,
+            "14": 0.28189,
+            "15": 0.28466,
+            "16": 0.28241,
+            "17": 0.28424,
+            "18": 0.28237,
+            "19": 0.2825,
+            "20": 0.28165,
+            "21": 0.28578,
+            "22": 0.28723,
+            "23": 0.28406,
+            "24": 0.28161,
+            "25": 0.28206,
+            "26": 0.28395,
+            "27": 0.28087,
+            "28": 0.28029,
+            "29": 0.28081,
+            "30": 0.28035,
+            "31": 0.27965,
+            "32": 0.28051,
+            "33": 0.28076,
+            "34": 0.2798,
+            "35": 0.27825,
+            "36": 0.28669,
+            "37": 0.28531,
+            "38": 0.28497,
+            "39": 0.28165,
+            "40": 0.28034,
+            "41": 0.27847,
+            "42": 0.27754,
+            "43": 0.28102,
+            "44": 0.27958,
+            "45": 0.27967,
+            "46": 0.28044,
+            "47": 0.27794,
+            "48": 0.28143,
+            "49": 0.27941,
+            "50": 0.28096,
+            "51": 0.29673,
+            "52": 0.28031,
+            "53": 0.28708,
+            "54": 0.28243,
+            "55": 0.28247,
+            "56": 0.28076,
+            "57": 0.28031,
+            "58": 0.27896,
+            "59": 0.27986,
+            "60": 0.28148,
+            "61": 0.27915,
+            "62": 0.28166,
+            "63": 0.28345,
+            "64": 0.28119,
+            "65": 0.28241,
+            "66": 0.28032,
+            "67": 0.28162,
+            "68": 0.2838,
+            "69": 0.28382,
+            "70": 0.28245,
+            "71": 0.28204,
+            "72": 0.28468,
+            "73": 0.28238,
+            "74": 0.28182,
+            "75": 0.28321,
+            "76": 0.28243,
+            "77": 0.28435,
+            "78": 0.28226,
+            "79": 0.28216,
+            "80": 0.28198,
+            "81": 0.28267,
+            "82": 0.28258,
+            "83": 0.283,
+            "84": 0.68437,
+            "85": 0.28406,
+            "86": 0.28139,
+            "87": 0.28473,
+            "88": 0.28619,
+            "89": 0.28286,
+            "90": 0.28309,
+            "91": 0.28733,
+            "92": 0.28154,
+            "93": 0.28434,
+            "94": 0.28361,
+            "95": 0.28379,
+            "96": 0.28667,
+            "97": 0.2826,
+            "98": 0.28464,
+            "99": 0.28558,
+            "100": 0.2859
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgx_h100_2nd.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgx_h100_2nd.json
new file mode 100644
index 00000000000..1ce44c0962c
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgx_h100_2nd.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 9.78026,
+            "52": 9.67272,
+            "53": 10.02012,
+            "54": 9.89791,
+            "55": 9.81665,
+            "56": 9.56044,
+            "57": 9.4118,
+            "58": 9.77417,
+            "59": 9.51797,
+            "60": 9.43538,
+            "61": 9.64483,
+            "62": 9.93003,
+            "63": 9.30914,
+            "64": 9.72064,
+            "65": 8.87154,
+            "66": 9.6443,
+            "67": 9.3133,
+            "68": 9.74067,
+            "69": 9.75331,
+            "70": 9.70008,
+            "71": 9.56555,
+            "72": 9.53094,
+            "73": 9.44386,
+            "74": 8.86784,
+            "75": 9.3731,
+            "76": 9.01275,
+            "77": 10.02855,
+            "78": 9.68737,
+            "79": 9.328,
+            "80": 9.36163,
+            "81": 9.43365,
+            "82": 9.66095,
+            "83": 9.25139,
+            "84": 9.37351,
+            "85": 9.5694,
+            "86": 9.03181,
+            "87": 9.55583,
+            "88": 9.71053,
+            "89": 9.55398,
+            "90": 9.78474,
+            "91": 9.29074,
+            "92": 9.3124,
+            "93": 9.03138,
+            "94": 8.78672,
+            "95": 9.48731,
+            "96": 9.49047,
+            "97": 9.26687,
+            "98": 9.63648,
+            "99": 8.84331,
+            "100": 9.3555
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 960.0,
+            "52": 970.0,
+            "53": 1045.0,
+            "54": 961.0,
+            "55": 923.0,
+            "56": 1019.0,
+            "57": 841.0,
+            "58": 1004.0,
+            "59": 1037.0,
+            "60": 895.0,
+            "61": 1040.0,
+            "62": 961.0,
+            "63": 902.0,
+            "64": 1056.0,
+            "65": 922.0,
+            "66": 1099.0,
+            "67": 1049.0,
+            "68": 1009.0,
+            "69": 1109.0,
+            "70": 1071.0,
+            "71": 1121.0,
+            "72": 894.0,
+            "73": 1041.0,
+            "74": 731.0,
+            "75": 929.0,
+            "76": 1076.0,
+            "77": 1111.0,
+            "78": 1058.0,
+            "79": 1042.0,
+            "80": 1112.0,
+            "81": 1233.0,
+            "82": 1119.0,
+            "83": 1018.0,
+            "84": 1162.0,
+            "85": 1189.0,
+            "86": 894.0,
+            "87": 1298.0,
+            "88": 1076.0,
+            "89": 1107.0,
+            "90": 1134.0,
+            "91": 1079.0,
+            "92": 1171.0,
+            "93": 928.0,
+            "94": 1150.0,
+            "95": 1176.0,
+            "96": 1207.0,
+            "97": 1049.0,
+            "98": 1192.0,
+            "99": 1082.0,
+            "100": 1082.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 693384704.0,
+            "52": 693384704.0,
+            "53": 693384704.0,
+            "54": 693384704.0,
+            "55": 693384704.0,
+            "56": 693384704.0,
+            "57": 693384704.0,
+            "58": 693384704.0,
+            "59": 693384704.0,
+            "60": 693384704.0,
+            "61": 693384704.0,
+            "62": 693384704.0,
+            "63": 693384704.0,
+            "64": 693384704.0,
+            "65": 693384704.0,
+            "66": 693384704.0,
+            "67": 693384704.0,
+            "68": 693384704.0,
+            "69": 693384704.0,
+            "70": 693384704.0,
+            "71": 693384704.0,
+            "72": 693384704.0,
+            "73": 693384704.0,
+            "74": 693384704.0,
+            "75": 693384704.0,
+            "76": 693384704.0,
+            "77": 693384704.0,
+            "78": 693384704.0,
+            "79": 693384704.0,
+            "80": 693384704.0,
+            "81": 693384704.0,
+            "82": 693384704.0,
+            "83": 693384704.0,
+            "84": 693384704.0,
+            "85": 693384704.0,
+            "86": 693384704.0,
+            "87": 693384704.0,
+            "88": 693384704.0,
+            "89": 693384704.0,
+            "90": 693384704.0,
+            "91": 693384704.0,
+            "92": 693384704.0,
+            "93": 693384704.0,
+            "94": 693384704.0,
+            "95": 693384704.0,
+            "96": 693384704.0,
+            "97": 693384704.0,
+            "98": 693384704.0,
+            "99": 693384704.0,
+            "100": 693384704.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 1118993408.0,
+            "52": 1118993408.0,
+            "53": 1118993408.0,
+            "54": 1118993408.0,
+            "55": 1118993408.0,
+            "56": 1118993408.0,
+            "57": 1118993408.0,
+            "58": 1118993408.0,
+            "59": 1118993408.0,
+            "60": 1118993408.0,
+            "61": 1118993408.0,
+            "62": 1118993408.0,
+            "63": 1118993408.0,
+            "64": 1118993408.0,
+            "65": 1118993408.0,
+            "66": 1118993408.0,
+            "67": 1118993408.0,
+            "68": 1118993408.0,
+            "69": 1118993408.0,
+            "70": 1118993408.0,
+            "71": 1118993408.0,
+            "72": 1118993408.0,
+            "73": 1118993408.0,
+            "74": 1118993408.0,
+            "75": 1118993408.0,
+            "76": 1118993408.0,
+            "77": 1118993408.0,
+            "78": 1118993408.0,
+            "79": 1118993408.0,
+            "80": 1118993408.0,
+            "81": 1118993408.0,
+            "82": 1118993408.0,
+            "83": 1118993408.0,
+            "84": 1118993408.0,
+            "85": 1118993408.0,
+            "86": 1118993408.0,
+            "87": 1118993408.0,
+            "88": 1118993408.0,
+            "89": 1118993408.0,
+            "90": 1118993408.0,
+            "91": 1118993408.0,
+            "92": 1118993408.0,
+            "93": 1118993408.0,
+            "94": 1118993408.0,
+            "95": 1118993408.0,
+            "96": 1118993408.0,
+            "97": 1118993408.0,
+            "98": 1118993408.0,
+            "99": 1118993408.0,
+            "100": 1118993408.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 34.29507,
+            "52": 0.3376,
+            "53": 0.30049,
+            "54": 0.29407,
+            "55": 0.28696,
+            "56": 0.29147,
+            "57": 0.28499,
+            "58": 0.28472,
+            "59": 0.28545,
+            "60": 0.28609,
+            "61": 0.2861,
+            "62": 0.28427,
+            "63": 0.28328,
+            "64": 0.28944,
+            "65": 0.28429,
+            "66": 0.31251,
+            "67": 0.28579,
+            "68": 0.28489,
+            "69": 0.28347,
+            "70": 0.28227,
+            "71": 0.28508,
+            "72": 0.28217,
+            "73": 0.27896,
+            "74": 0.28082,
+            "75": 0.28386,
+            "76": 0.28438,
+            "77": 0.2834,
+            "78": 0.28181,
+            "79": 0.28078,
+            "80": 0.27927,
+            "81": 0.28147,
+            "82": 0.28131,
+            "83": 0.28333,
+            "84": 0.29099,
+            "85": 0.28669,
+            "86": 0.28394,
+            "87": 0.28298,
+            "88": 0.28081,
+            "89": 0.28349,
+            "90": 0.28455,
+            "91": 0.28426,
+            "92": 0.28166,
+            "93": 0.28252,
+            "94": 0.28323,
+            "95": 0.28319,
+            "96": 0.28167,
+            "97": 0.28018,
+            "98": 0.2832,
+            "99": 0.28544,
+            "100": 0.28341
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/golden_values_lts_dgx_a100.json
index d51aa6cf4b8..305e2861ba0 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/golden_values_lts_dgx_a100.json
@@ -1 +1,537 @@
-{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.93292, "5": 10.92969, "10": 10.90473, "15": 10.87121, "20": 10.74997, "25": 10.53751, "30": 10.32549, "35": 10.22894, "40": 10.01974, "45": 9.75549, "50": 9.84069, "55": 9.81451, "60": 9.42443, "65": 8.86707, "70": 9.67897, "75": 9.36665, "80": 9.35303, "85": 9.56706, "90": 9.77585, "95": 9.48329, "100": 9.3588}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 582.0, "5": 618.0, "10": 496.0, "15": 672.0, "20": 600.0, "25": 619.0, "30": 678.0, "35": 697.0, "40": 775.0, "45": 770.0, "50": 894.0, "55": 906.0, "60": 932.0, "65": 960.0, "70": 1106.0, "75": 889.0, "80": 1186.0, "85": 1068.0, "90": 1077.0, "95": 1054.0, "100": 1160.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 431783936.0, "5": 431783936.0, "10": 431783936.0, "15": 431783936.0, "20": 431783936.0, "25": 431783936.0, "30": 431783936.0, "35": 431783936.0, "40": 431783936.0, "45": 431783936.0, "50": 431783936.0, "55": 431783936.0, "60": 431783936.0, "65": 431783936.0, "70": 431783936.0, "75": 431783936.0, "80": 431783936.0, "85": 431783936.0, "90": 431783936.0, "95": 431783936.0, "100": 431783936.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 13.90186, "5": 0.37688, "10": 0.37024, "15": 0.381, "20": 0.38683, "25": 0.39543, "30": 0.38049, "35": 0.36959, "40": 0.36509, "45": 0.364, "50": 0.36469, "55": 0.37647, "60": 0.37716, "65": 0.39072, "70": 0.39183, "75": 0.55129, "80": 0.39335, "85": 0.40289, "90": 0.41031, "95": 0.39498, "100": 0.3918}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.93295,
+            "2": 10.93424,
+            "3": 10.91344,
+            "4": 10.90322,
+            "5": 10.92968,
+            "6": 10.93657,
+            "7": 10.90276,
+            "8": 10.92115,
+            "9": 10.90707,
+            "10": 10.90476,
+            "11": 10.88788,
+            "12": 10.91733,
+            "13": 10.91195,
+            "14": 10.91509,
+            "15": 10.87119,
+            "16": 10.86125,
+            "17": 10.82702,
+            "18": 10.85673,
+            "19": 10.84055,
+            "20": 10.74999,
+            "21": 10.71506,
+            "22": 10.58115,
+            "23": 10.72644,
+            "24": 10.6073,
+            "25": 10.5375,
+            "26": 10.61069,
+            "27": 10.5993,
+            "28": 10.54958,
+            "29": 10.56604,
+            "30": 10.32547,
+            "31": 10.067,
+            "32": 10.43808,
+            "33": 10.4236,
+            "34": 10.16016,
+            "35": 10.22895,
+            "36": 10.17614,
+            "37": 10.29234,
+            "38": 10.13297,
+            "39": 10.34954,
+            "40": 10.01975,
+            "41": 10.07535,
+            "42": 10.15411,
+            "43": 9.76087,
+            "44": 9.88356,
+            "45": 9.75546,
+            "46": 9.74961,
+            "47": 10.07545,
+            "48": 9.77936,
+            "49": 9.43816,
+            "50": 9.84068,
+            "51": 9.77754,
+            "52": 9.66521,
+            "53": 10.00741,
+            "54": 9.88875,
+            "55": 9.81454,
+            "56": 9.55923,
+            "57": 9.39915,
+            "58": 9.77272,
+            "59": 9.51594,
+            "60": 9.42442,
+            "61": 9.64311,
+            "62": 9.93502,
+            "63": 9.30274,
+            "64": 9.72154,
+            "65": 8.86709,
+            "66": 9.64655,
+            "67": 9.30856,
+            "68": 9.74064,
+            "69": 9.74152,
+            "70": 9.67899,
+            "71": 9.55875,
+            "72": 9.53277,
+            "73": 9.4385,
+            "74": 8.8823,
+            "75": 9.36667,
+            "76": 9.02475,
+            "77": 10.02955,
+            "78": 9.68853,
+            "79": 9.32607,
+            "80": 9.35305,
+            "81": 9.4325,
+            "82": 9.65191,
+            "83": 9.25404,
+            "84": 9.36521,
+            "85": 9.56708,
+            "86": 9.03549,
+            "87": 9.55775,
+            "88": 9.70743,
+            "89": 9.55898,
+            "90": 9.77585,
+            "91": 9.29644,
+            "92": 9.32116,
+            "93": 9.02865,
+            "94": 8.78309,
+            "95": 9.48327,
+            "96": 9.48473,
+            "97": 9.26675,
+            "98": 9.63739,
+            "99": 8.83895,
+            "100": 9.35878
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 575.0,
+            "2": 614.0,
+            "3": 618.0,
+            "4": 588.0,
+            "5": 658.0,
+            "6": 625.0,
+            "7": 635.0,
+            "8": 591.0,
+            "9": 692.0,
+            "10": 524.0,
+            "11": 700.0,
+            "12": 628.0,
+            "13": 680.0,
+            "14": 644.0,
+            "15": 614.0,
+            "16": 692.0,
+            "17": 646.0,
+            "18": 594.0,
+            "19": 608.0,
+            "20": 585.0,
+            "21": 666.0,
+            "22": 575.0,
+            "23": 672.0,
+            "24": 628.0,
+            "25": 623.0,
+            "26": 614.0,
+            "27": 678.0,
+            "28": 748.0,
+            "29": 717.0,
+            "30": 649.0,
+            "31": 582.0,
+            "32": 677.0,
+            "33": 793.0,
+            "34": 658.0,
+            "35": 685.0,
+            "36": 752.0,
+            "37": 842.0,
+            "38": 786.0,
+            "39": 800.0,
+            "40": 776.0,
+            "41": 804.0,
+            "42": 818.0,
+            "43": 743.0,
+            "44": 783.0,
+            "45": 797.0,
+            "46": 802.0,
+            "47": 891.0,
+            "48": 931.0,
+            "49": 793.0,
+            "50": 810.0,
+            "51": 913.0,
+            "52": 862.0,
+            "53": 982.0,
+            "54": 908.0,
+            "55": 889.0,
+            "56": 1012.0,
+            "57": 865.0,
+            "58": 954.0,
+            "59": 985.0,
+            "60": 924.0,
+            "61": 964.0,
+            "62": 954.0,
+            "63": 848.0,
+            "64": 983.0,
+            "65": 902.0,
+            "66": 1148.0,
+            "67": 973.0,
+            "68": 960.0,
+            "69": 1050.0,
+            "70": 1071.0,
+            "71": 1046.0,
+            "72": 833.0,
+            "73": 997.0,
+            "74": 711.0,
+            "75": 871.0,
+            "76": 1024.0,
+            "77": 1165.0,
+            "78": 1124.0,
+            "79": 1101.0,
+            "80": 1162.0,
+            "81": 1147.0,
+            "82": 1079.0,
+            "83": 959.0,
+            "84": 1124.0,
+            "85": 1142.0,
+            "86": 907.0,
+            "87": 1201.0,
+            "88": 1109.0,
+            "89": 1119.0,
+            "90": 1093.0,
+            "91": 1082.0,
+            "92": 1145.0,
+            "93": 926.0,
+            "94": 1074.0,
+            "95": 1165.0,
+            "96": 1161.0,
+            "97": 1029.0,
+            "98": 1199.0,
+            "99": 1192.0,
+            "100": 1083.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 431522304.0,
+            "2": 431522304.0,
+            "3": 431522304.0,
+            "4": 431522304.0,
+            "5": 431522304.0,
+            "6": 431522304.0,
+            "7": 431522304.0,
+            "8": 431522304.0,
+            "9": 431522304.0,
+            "10": 431522304.0,
+            "11": 431522304.0,
+            "12": 431522304.0,
+            "13": 431522304.0,
+            "14": 431522304.0,
+            "15": 431522304.0,
+            "16": 431522304.0,
+            "17": 431522304.0,
+            "18": 431522304.0,
+            "19": 431522304.0,
+            "20": 431522304.0,
+            "21": 431522304.0,
+            "22": 431522304.0,
+            "23": 431522304.0,
+            "24": 431522304.0,
+            "25": 431522304.0,
+            "26": 431522304.0,
+            "27": 431522304.0,
+            "28": 431522304.0,
+            "29": 431522304.0,
+            "30": 431522304.0,
+            "31": 431522304.0,
+            "32": 431522304.0,
+            "33": 431522304.0,
+            "34": 431522304.0,
+            "35": 431522304.0,
+            "36": 431522304.0,
+            "37": 431522304.0,
+            "38": 431522304.0,
+            "39": 431522304.0,
+            "40": 431522304.0,
+            "41": 431522304.0,
+            "42": 431522304.0,
+            "43": 431522304.0,
+            "44": 431522304.0,
+            "45": 431522304.0,
+            "46": 431522304.0,
+            "47": 431522304.0,
+            "48": 431522304.0,
+            "49": 431522304.0,
+            "50": 431522304.0,
+            "51": 431522304.0,
+            "52": 431522304.0,
+            "53": 431522304.0,
+            "54": 431522304.0,
+            "55": 431522304.0,
+            "56": 431522304.0,
+            "57": 431522304.0,
+            "58": 431522304.0,
+            "59": 431522304.0,
+            "60": 431522304.0,
+            "61": 431522304.0,
+            "62": 431522304.0,
+            "63": 431522304.0,
+            "64": 431522304.0,
+            "65": 431522304.0,
+            "66": 431522304.0,
+            "67": 431522304.0,
+            "68": 431522304.0,
+            "69": 431522304.0,
+            "70": 431522304.0,
+            "71": 431522304.0,
+            "72": 431522304.0,
+            "73": 431522304.0,
+            "74": 431522304.0,
+            "75": 431522304.0,
+            "76": 431522304.0,
+            "77": 431522304.0,
+            "78": 431522304.0,
+            "79": 431522304.0,
+            "80": 431522304.0,
+            "81": 431522304.0,
+            "82": 431522304.0,
+            "83": 431522304.0,
+            "84": 431522304.0,
+            "85": 431522304.0,
+            "86": 431522304.0,
+            "87": 431522304.0,
+            "88": 431522304.0,
+            "89": 431522304.0,
+            "90": 431522304.0,
+            "91": 431522304.0,
+            "92": 431522304.0,
+            "93": 431522304.0,
+            "94": 431522304.0,
+            "95": 431522304.0,
+            "96": 431522304.0,
+            "97": 431522304.0,
+            "98": 431522304.0,
+            "99": 431522304.0,
+            "100": 431522304.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 678369280.0,
+            "2": 861326848.0,
+            "3": 861328384.0,
+            "4": 861328384.0,
+            "5": 861328896.0,
+            "6": 861328896.0,
+            "7": 861328896.0,
+            "8": 861328896.0,
+            "9": 861328896.0,
+            "10": 861328896.0,
+            "11": 861328896.0,
+            "12": 861328896.0,
+            "13": 861328896.0,
+            "14": 861328896.0,
+            "15": 861328896.0,
+            "16": 861328896.0,
+            "17": 861328896.0,
+            "18": 861328896.0,
+            "19": 861328896.0,
+            "20": 861328896.0,
+            "21": 861328896.0,
+            "22": 861328896.0,
+            "23": 861328896.0,
+            "24": 861328896.0,
+            "25": 861328896.0,
+            "26": 861328896.0,
+            "27": 861328896.0,
+            "28": 861328896.0,
+            "29": 861328896.0,
+            "30": 861328896.0,
+            "31": 861328896.0,
+            "32": 861328896.0,
+            "33": 861328896.0,
+            "34": 861328896.0,
+            "35": 861328896.0,
+            "36": 861328896.0,
+            "37": 861328896.0,
+            "38": 861328896.0,
+            "39": 861328896.0,
+            "40": 861328896.0,
+            "41": 861328896.0,
+            "42": 861328896.0,
+            "43": 861328896.0,
+            "44": 861328896.0,
+            "45": 861328896.0,
+            "46": 861328896.0,
+            "47": 861328896.0,
+            "48": 861328896.0,
+            "49": 861328896.0,
+            "50": 861328896.0,
+            "51": 861328896.0,
+            "52": 861328896.0,
+            "53": 861328896.0,
+            "54": 861328896.0,
+            "55": 861328896.0,
+            "56": 861328896.0,
+            "57": 861328896.0,
+            "58": 861328896.0,
+            "59": 861328896.0,
+            "60": 861328896.0,
+            "61": 861328896.0,
+            "62": 861328896.0,
+            "63": 861328896.0,
+            "64": 861328896.0,
+            "65": 861328896.0,
+            "66": 861328896.0,
+            "67": 861328896.0,
+            "68": 861328896.0,
+            "69": 861328896.0,
+            "70": 861328896.0,
+            "71": 861328896.0,
+            "72": 861328896.0,
+            "73": 861328896.0,
+            "74": 861328896.0,
+            "75": 861328896.0,
+            "76": 861328896.0,
+            "77": 861328896.0,
+            "78": 861328896.0,
+            "79": 861328896.0,
+            "80": 861328896.0,
+            "81": 861328896.0,
+            "82": 861328896.0,
+            "83": 861328896.0,
+            "84": 861328896.0,
+            "85": 861328896.0,
+            "86": 861328896.0,
+            "87": 861328896.0,
+            "88": 861328896.0,
+            "89": 861328896.0,
+            "90": 861328896.0,
+            "91": 861328896.0,
+            "92": 861328896.0,
+            "93": 861328896.0,
+            "94": 861328896.0,
+            "95": 861328896.0,
+            "96": 861328896.0,
+            "97": 861328896.0,
+            "98": 861328896.0,
+            "99": 861328896.0,
+            "100": 861328896.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 16.94419,
+            "2": 0.46293,
+            "3": 0.43323,
+            "4": 0.41124,
+            "5": 0.41337,
+            "6": 0.41008,
+            "7": 0.41384,
+            "8": 0.41526,
+            "9": 0.41249,
+            "10": 0.41837,
+            "11": 0.41987,
+            "12": 0.42279,
+            "13": 0.41933,
+            "14": 0.42011,
+            "15": 0.42058,
+            "16": 0.41981,
+            "17": 0.42742,
+            "18": 0.41843,
+            "19": 0.41598,
+            "20": 0.4167,
+            "21": 0.4156,
+            "22": 0.41702,
+            "23": 0.4169,
+            "24": 0.41743,
+            "25": 0.41779,
+            "26": 0.41667,
+            "27": 0.41879,
+            "28": 0.41658,
+            "29": 0.4158,
+            "30": 0.41602,
+            "31": 0.41609,
+            "32": 0.41672,
+            "33": 0.41727,
+            "34": 0.41721,
+            "35": 0.41711,
+            "36": 0.41695,
+            "37": 0.41937,
+            "38": 0.41806,
+            "39": 0.417,
+            "40": 0.41717,
+            "41": 0.41772,
+            "42": 0.41463,
+            "43": 0.41752,
+            "44": 0.41751,
+            "45": 0.41653,
+            "46": 0.41569,
+            "47": 0.4202,
+            "48": 0.41969,
+            "49": 0.42062,
+            "50": 0.42196,
+            "51": 0.9121,
+            "52": 0.41319,
+            "53": 0.41164,
+            "54": 0.41017,
+            "55": 0.4114,
+            "56": 0.41164,
+            "57": 0.41138,
+            "58": 0.40994,
+            "59": 0.41137,
+            "60": 0.41062,
+            "61": 0.41152,
+            "62": 0.41366,
+            "63": 0.4107,
+            "64": 0.41226,
+            "65": 0.41176,
+            "66": 0.41026,
+            "67": 0.41204,
+            "68": 0.4122,
+            "69": 0.41122,
+            "70": 0.41376,
+            "71": 0.41137,
+            "72": 0.41098,
+            "73": 0.41047,
+            "74": 0.4109,
+            "75": 0.4132,
+            "76": 0.41301,
+            "77": 0.41293,
+            "78": 0.41243,
+            "79": 0.41053,
+            "80": 0.41164,
+            "81": 0.40993,
+            "82": 0.41202,
+            "83": 0.41372,
+            "84": 0.4109,
+            "85": 0.4122,
+            "86": 0.41126,
+            "87": 0.41232,
+            "88": 0.41314,
+            "89": 0.41115,
+            "90": 0.41218,
+            "91": 0.4144,
+            "92": 0.41696,
+            "93": 0.41972,
+            "94": 0.42467,
+            "95": 0.4157,
+            "96": 0.41335,
+            "97": 0.41389,
+            "98": 0.4112,
+            "99": 0.41259,
+            "100": 0.41414
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/golden_values_lts_dgx_a100_2nd.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/golden_values_lts_dgx_a100_2nd.json
new file mode 100644
index 00000000000..2453c036dba
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/golden_values_lts_dgx_a100_2nd.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 9.77754,
+            "52": 9.66523,
+            "53": 10.00743,
+            "54": 9.88877,
+            "55": 9.81452,
+            "56": 9.55922,
+            "57": 9.39915,
+            "58": 9.77267,
+            "59": 9.51591,
+            "60": 9.42443,
+            "61": 9.64313,
+            "62": 9.93504,
+            "63": 9.30269,
+            "64": 9.72154,
+            "65": 8.8671,
+            "66": 9.64654,
+            "67": 9.30858,
+            "68": 9.74062,
+            "69": 9.74154,
+            "70": 9.679,
+            "71": 9.55873,
+            "72": 9.53281,
+            "73": 9.43848,
+            "74": 8.88229,
+            "75": 9.36665,
+            "76": 9.02477,
+            "77": 10.02954,
+            "78": 9.68857,
+            "79": 9.32609,
+            "80": 9.35306,
+            "81": 9.43247,
+            "82": 9.65188,
+            "83": 9.25407,
+            "84": 9.36521,
+            "85": 9.56705,
+            "86": 9.03549,
+            "87": 9.55774,
+            "88": 9.70742,
+            "89": 9.55898,
+            "90": 9.77582,
+            "91": 9.29648,
+            "92": 9.32118,
+            "93": 9.02866,
+            "94": 8.7831,
+            "95": 9.48329,
+            "96": 9.48475,
+            "97": 9.26673,
+            "98": 9.63742,
+            "99": 8.839,
+            "100": 9.35878
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 896.0,
+            "52": 882.0,
+            "53": 967.0,
+            "54": 942.0,
+            "55": 870.0,
+            "56": 923.0,
+            "57": 825.0,
+            "58": 1049.0,
+            "59": 968.0,
+            "60": 865.0,
+            "61": 981.0,
+            "62": 954.0,
+            "63": 820.0,
+            "64": 1016.0,
+            "65": 940.0,
+            "66": 1085.0,
+            "67": 1020.0,
+            "68": 987.0,
+            "69": 1062.0,
+            "70": 1082.0,
+            "71": 1048.0,
+            "72": 855.0,
+            "73": 1061.0,
+            "74": 664.0,
+            "75": 883.0,
+            "76": 1018.0,
+            "77": 1199.0,
+            "78": 1121.0,
+            "79": 1119.0,
+            "80": 1138.0,
+            "81": 1228.0,
+            "82": 1145.0,
+            "83": 906.0,
+            "84": 1179.0,
+            "85": 1108.0,
+            "86": 826.0,
+            "87": 1236.0,
+            "88": 1067.0,
+            "89": 1133.0,
+            "90": 1059.0,
+            "91": 1052.0,
+            "92": 1187.0,
+            "93": 894.0,
+            "94": 1074.0,
+            "95": 1088.0,
+            "96": 1138.0,
+            "97": 1004.0,
+            "98": 1204.0,
+            "99": 1107.0,
+            "100": 1104.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 433357312.0,
+            "52": 433357312.0,
+            "53": 433357312.0,
+            "54": 433357312.0,
+            "55": 433357312.0,
+            "56": 433357312.0,
+            "57": 433357312.0,
+            "58": 433357312.0,
+            "59": 433357312.0,
+            "60": 433357312.0,
+            "61": 433357312.0,
+            "62": 433357312.0,
+            "63": 433357312.0,
+            "64": 433357312.0,
+            "65": 433357312.0,
+            "66": 433357312.0,
+            "67": 433357312.0,
+            "68": 433357312.0,
+            "69": 433357312.0,
+            "70": 433357312.0,
+            "71": 433357312.0,
+            "72": 433357312.0,
+            "73": 433357312.0,
+            "74": 433357312.0,
+            "75": 433357312.0,
+            "76": 433357312.0,
+            "77": 433357312.0,
+            "78": 433357312.0,
+            "79": 433357312.0,
+            "80": 433357312.0,
+            "81": 433357312.0,
+            "82": 433357312.0,
+            "83": 433357312.0,
+            "84": 433357312.0,
+            "85": 433357312.0,
+            "86": 433357312.0,
+            "87": 433357312.0,
+            "88": 433357312.0,
+            "89": 433357312.0,
+            "90": 433357312.0,
+            "91": 433357312.0,
+            "92": 433357312.0,
+            "93": 433357312.0,
+            "94": 433357312.0,
+            "95": 433357312.0,
+            "96": 433357312.0,
+            "97": 433357312.0,
+            "98": 433357312.0,
+            "99": 433357312.0,
+            "100": 433357312.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 861062656.0,
+            "52": 861850624.0,
+            "53": 861850624.0,
+            "54": 861850624.0,
+            "55": 861850624.0,
+            "56": 861850624.0,
+            "57": 861850624.0,
+            "58": 861850624.0,
+            "59": 861850624.0,
+            "60": 861850624.0,
+            "61": 861850624.0,
+            "62": 861850624.0,
+            "63": 861850624.0,
+            "64": 861850624.0,
+            "65": 861850624.0,
+            "66": 861850624.0,
+            "67": 861850624.0,
+            "68": 861850624.0,
+            "69": 861850624.0,
+            "70": 861850624.0,
+            "71": 861852160.0,
+            "72": 861852160.0,
+            "73": 861852160.0,
+            "74": 861852160.0,
+            "75": 861852160.0,
+            "76": 861852160.0,
+            "77": 861853184.0,
+            "78": 861853184.0,
+            "79": 861853184.0,
+            "80": 861853184.0,
+            "81": 861853184.0,
+            "82": 861853184.0,
+            "83": 861853184.0,
+            "84": 861853184.0,
+            "85": 861853184.0,
+            "86": 861853184.0,
+            "87": 861853184.0,
+            "88": 861853184.0,
+            "89": 861853184.0,
+            "90": 861853184.0,
+            "91": 861853184.0,
+            "92": 861853184.0,
+            "93": 861853184.0,
+            "94": 861853184.0,
+            "95": 861853184.0,
+            "96": 861853184.0,
+            "97": 861853184.0,
+            "98": 861853184.0,
+            "99": 861853184.0,
+            "100": 861853184.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 19.65307,
+            "52": 0.45052,
+            "53": 0.42082,
+            "54": 0.41811,
+            "55": 0.41814,
+            "56": 0.41733,
+            "57": 0.41818,
+            "58": 0.418,
+            "59": 0.41748,
+            "60": 0.41977,
+            "61": 0.41771,
+            "62": 0.42393,
+            "63": 0.42754,
+            "64": 0.42379,
+            "65": 0.42104,
+            "66": 0.42071,
+            "67": 0.4201,
+            "68": 0.41916,
+            "69": 0.41995,
+            "70": 0.4222,
+            "71": 0.42158,
+            "72": 0.42185,
+            "73": 0.41889,
+            "74": 0.42962,
+            "75": 0.42666,
+            "76": 0.4191,
+            "77": 0.421,
+            "78": 0.42068,
+            "79": 0.41987,
+            "80": 0.41899,
+            "81": 0.41896,
+            "82": 0.42029,
+            "83": 0.41923,
+            "84": 0.419,
+            "85": 0.42028,
+            "86": 0.41955,
+            "87": 0.41973,
+            "88": 0.41946,
+            "89": 0.41924,
+            "90": 0.42048,
+            "91": 0.42238,
+            "92": 0.42092,
+            "93": 0.42289,
+            "94": 0.42394,
+            "95": 0.42171,
+            "96": 0.42176,
+            "97": 0.42119,
+            "98": 0.42004,
+            "99": 0.42349,
+            "100": 0.42222
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/golden_values_dev_dgx_gb200.json
new file mode 100644
index 00000000000..c8639e2d542
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/golden_values_dev_dgx_gb200.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.86836,
+            "2": 10.88595,
+            "3": 10.86559,
+            "4": 10.86893,
+            "5": 10.87417,
+            "6": 10.89061,
+            "7": 10.87673,
+            "8": 10.8647,
+            "9": 10.88231,
+            "10": 10.84582,
+            "11": 10.87165,
+            "12": 10.87421,
+            "13": 10.88164,
+            "14": 10.88885,
+            "15": 10.83927,
+            "16": 10.825,
+            "17": 10.80147,
+            "18": 10.81236,
+            "19": 10.82153,
+            "20": 10.71933,
+            "21": 10.6909,
+            "22": 10.57427,
+            "23": 10.71093,
+            "24": 10.59784,
+            "25": 10.5556,
+            "26": 10.61523,
+            "27": 10.60454,
+            "28": 10.56483,
+            "29": 10.58475,
+            "30": 10.35945,
+            "31": 10.12153,
+            "32": 10.45236,
+            "33": 10.45724,
+            "34": 10.21987,
+            "35": 10.2644,
+            "36": 10.21038,
+            "37": 10.33961,
+            "38": 10.18012,
+            "39": 10.39589,
+            "40": 10.0663,
+            "41": 10.14169,
+            "42": 10.2085,
+            "43": 9.83125,
+            "44": 9.94861,
+            "45": 9.82847,
+            "46": 9.80462,
+            "47": 10.14229,
+            "48": 9.84463,
+            "49": 9.52194,
+            "50": 9.88607,
+            "51": 9.84982,
+            "52": 9.74429,
+            "53": 10.05843,
+            "54": 9.95129,
+            "55": 9.88343,
+            "56": 9.61329,
+            "57": 9.46899,
+            "58": 9.82161,
+            "59": 9.57702,
+            "60": 9.49786,
+            "61": 9.69256,
+            "62": 9.98595,
+            "63": 9.37403,
+            "64": 9.76605,
+            "65": 8.94649,
+            "66": 9.70105,
+            "67": 9.36367,
+            "68": 9.78237,
+            "69": 9.79879,
+            "70": 9.73166,
+            "71": 9.62508,
+            "72": 9.58312,
+            "73": 9.48822,
+            "74": 8.92611,
+            "75": 9.40725,
+            "76": 9.07708,
+            "77": 10.05858,
+            "78": 9.7221,
+            "79": 9.37662,
+            "80": 9.40273,
+            "81": 9.48209,
+            "82": 9.6995,
+            "83": 9.31351,
+            "84": 9.4173,
+            "85": 9.61584,
+            "86": 9.07429,
+            "87": 9.59551,
+            "88": 9.75065,
+            "89": 9.6004,
+            "90": 9.8221,
+            "91": 9.33876,
+            "92": 9.3578,
+            "93": 9.08672,
+            "94": 8.82958,
+            "95": 9.52596,
+            "96": 9.52973,
+            "97": 9.30335,
+            "98": 9.67136,
+            "99": 8.89537,
+            "100": 9.40568
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1535.0,
+            "2": 1566.0,
+            "3": 1736.0,
+            "4": 1667.0,
+            "5": 1821.0,
+            "6": 1743.0,
+            "7": 1753.0,
+            "8": 1683.0,
+            "9": 1801.0,
+            "10": 1363.0,
+            "11": 1688.0,
+            "12": 1722.0,
+            "13": 1831.0,
+            "14": 1630.0,
+            "15": 1842.0,
+            "16": 1763.0,
+            "17": 1822.0,
+            "18": 1543.0,
+            "19": 1709.0,
+            "20": 1618.0,
+            "21": 1878.0,
+            "22": 1591.0,
+            "23": 1932.0,
+            "24": 1597.0,
+            "25": 1549.0,
+            "26": 1621.0,
+            "27": 1732.0,
+            "28": 1921.0,
+            "29": 1931.0,
+            "30": 1880.0,
+            "31": 1483.0,
+            "32": 1832.0,
+            "33": 2077.0,
+            "34": 1814.0,
+            "35": 1908.0,
+            "36": 1856.0,
+            "37": 2378.0,
+            "38": 2057.0,
+            "39": 2342.0,
+            "40": 2151.0,
+            "41": 2265.0,
+            "42": 2146.0,
+            "43": 1897.0,
+            "44": 2097.0,
+            "45": 2059.0,
+            "46": 2303.0,
+            "47": 2451.0,
+            "48": 2255.0,
+            "49": 2310.0,
+            "50": 2472.0,
+            "51": 2560.0,
+            "52": 2622.0,
+            "53": 2835.0,
+            "54": 2696.0,
+            "55": 2322.0,
+            "56": 2793.0,
+            "57": 2247.0,
+            "58": 2951.0,
+            "59": 2850.0,
+            "60": 2515.0,
+            "61": 2874.0,
+            "62": 2686.0,
+            "63": 2448.0,
+            "64": 2936.0,
+            "65": 2670.0,
+            "66": 2814.0,
+            "67": 2782.0,
+            "68": 2808.0,
+            "69": 2901.0,
+            "70": 3044.0,
+            "71": 2876.0,
+            "72": 2508.0,
+            "73": 2893.0,
+            "74": 1974.0,
+            "75": 2488.0,
+            "76": 2881.0,
+            "77": 3104.0,
+            "78": 3241.0,
+            "79": 3196.0,
+            "80": 3322.0,
+            "81": 3594.0,
+            "82": 3215.0,
+            "83": 2643.0,
+            "84": 3180.0,
+            "85": 3159.0,
+            "86": 2619.0,
+            "87": 3774.0,
+            "88": 3025.0,
+            "89": 3322.0,
+            "90": 3043.0,
+            "91": 2830.0,
+            "92": 3015.0,
+            "93": 2758.0,
+            "94": 3190.0,
+            "95": 3172.0,
+            "96": 3453.0,
+            "97": 3176.0,
+            "98": 3590.0,
+            "99": 3059.0,
+            "100": 3290.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 465601024.0,
+            "2": 465601024.0,
+            "3": 465601024.0,
+            "4": 465601024.0,
+            "5": 465601024.0,
+            "6": 465601024.0,
+            "7": 465601024.0,
+            "8": 465601024.0,
+            "9": 465601024.0,
+            "10": 465601024.0,
+            "11": 465601024.0,
+            "12": 465601024.0,
+            "13": 465601024.0,
+            "14": 465601024.0,
+            "15": 465601024.0,
+            "16": 465601024.0,
+            "17": 465601024.0,
+            "18": 465601024.0,
+            "19": 465601024.0,
+            "20": 465601024.0,
+            "21": 465601024.0,
+            "22": 465601024.0,
+            "23": 465601024.0,
+            "24": 465601024.0,
+            "25": 465601024.0,
+            "26": 465601024.0,
+            "27": 465601024.0,
+            "28": 465601024.0,
+            "29": 465601024.0,
+            "30": 465601024.0,
+            "31": 465601024.0,
+            "32": 465601024.0,
+            "33": 465601024.0,
+            "34": 465601024.0,
+            "35": 465601024.0,
+            "36": 465601024.0,
+            "37": 465601024.0,
+            "38": 465601024.0,
+            "39": 465601024.0,
+            "40": 465601024.0,
+            "41": 465601024.0,
+            "42": 465601024.0,
+            "43": 465601024.0,
+            "44": 465601024.0,
+            "45": 465601024.0,
+            "46": 465601024.0,
+            "47": 465601024.0,
+            "48": 465601024.0,
+            "49": 465601024.0,
+            "50": 465601024.0,
+            "51": 465601024.0,
+            "52": 465601024.0,
+            "53": 465601024.0,
+            "54": 465601024.0,
+            "55": 465601024.0,
+            "56": 465601024.0,
+            "57": 465601024.0,
+            "58": 465601024.0,
+            "59": 465601024.0,
+            "60": 465601024.0,
+            "61": 465601024.0,
+            "62": 465601024.0,
+            "63": 465601024.0,
+            "64": 465601024.0,
+            "65": 465601024.0,
+            "66": 465601024.0,
+            "67": 465601024.0,
+            "68": 465601024.0,
+            "69": 465601024.0,
+            "70": 465601024.0,
+            "71": 465601024.0,
+            "72": 465601024.0,
+            "73": 465601024.0,
+            "74": 465601024.0,
+            "75": 465601024.0,
+            "76": 465601024.0,
+            "77": 465601024.0,
+            "78": 465601024.0,
+            "79": 465601024.0,
+            "80": 465601024.0,
+            "81": 465601024.0,
+            "82": 465601024.0,
+            "83": 465601024.0,
+            "84": 465601024.0,
+            "85": 465601024.0,
+            "86": 465601024.0,
+            "87": 465601024.0,
+            "88": 465601024.0,
+            "89": 465601024.0,
+            "90": 465601024.0,
+            "91": 465601024.0,
+            "92": 465601024.0,
+            "93": 465601024.0,
+            "94": 465601024.0,
+            "95": 465601024.0,
+            "96": 465601024.0,
+            "97": 465601024.0,
+            "98": 465601024.0,
+            "99": 465601024.0,
+            "100": 465601024.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1728999424.0,
+            "2": 1789405696.0,
+            "3": 1789405696.0,
+            "4": 1789405696.0,
+            "5": 1789405696.0,
+            "6": 1789405696.0,
+            "7": 1789405696.0,
+            "8": 1789405696.0,
+            "9": 1789405696.0,
+            "10": 1789405696.0,
+            "11": 1789405696.0,
+            "12": 1789405696.0,
+            "13": 1789405696.0,
+            "14": 1789405696.0,
+            "15": 1789405696.0,
+            "16": 1789405696.0,
+            "17": 1789405696.0,
+            "18": 1789405696.0,
+            "19": 1789405696.0,
+            "20": 1789405696.0,
+            "21": 1789405696.0,
+            "22": 1789405696.0,
+            "23": 1789405696.0,
+            "24": 1789405696.0,
+            "25": 1789405696.0,
+            "26": 1789405696.0,
+            "27": 1789405696.0,
+            "28": 1789405696.0,
+            "29": 1789405696.0,
+            "30": 1789405696.0,
+            "31": 1789405696.0,
+            "32": 1789405696.0,
+            "33": 1789405696.0,
+            "34": 1789405696.0,
+            "35": 1789405696.0,
+            "36": 1789405696.0,
+            "37": 1789405696.0,
+            "38": 1789405696.0,
+            "39": 1789405696.0,
+            "40": 1789405696.0,
+            "41": 1789405696.0,
+            "42": 1789405696.0,
+            "43": 1789405696.0,
+            "44": 1789405696.0,
+            "45": 1789405696.0,
+            "46": 1789405696.0,
+            "47": 1789405696.0,
+            "48": 1789405696.0,
+            "49": 1789405696.0,
+            "50": 1789405696.0,
+            "51": 1789405696.0,
+            "52": 1789405696.0,
+            "53": 1789405696.0,
+            "54": 1789405696.0,
+            "55": 1789405696.0,
+            "56": 1789405696.0,
+            "57": 1789405696.0,
+            "58": 1789405696.0,
+            "59": 1789405696.0,
+            "60": 1789405696.0,
+            "61": 1789405696.0,
+            "62": 1789405696.0,
+            "63": 1789405696.0,
+            "64": 1789405696.0,
+            "65": 1789405696.0,
+            "66": 1789405696.0,
+            "67": 1789405696.0,
+            "68": 1789405696.0,
+            "69": 1789405696.0,
+            "70": 1789405696.0,
+            "71": 1789405696.0,
+            "72": 1789405696.0,
+            "73": 1789405696.0,
+            "74": 1789405696.0,
+            "75": 1789405696.0,
+            "76": 1789405696.0,
+            "77": 1789405696.0,
+            "78": 1789405696.0,
+            "79": 1789405696.0,
+            "80": 1789405696.0,
+            "81": 1789405696.0,
+            "82": 1789405696.0,
+            "83": 1789405696.0,
+            "84": 1789405696.0,
+            "85": 1789405696.0,
+            "86": 1789405696.0,
+            "87": 1789405696.0,
+            "88": 1789405696.0,
+            "89": 1789405696.0,
+            "90": 1789405696.0,
+            "91": 1789405696.0,
+            "92": 1789405696.0,
+            "93": 1789405696.0,
+            "94": 1789405696.0,
+            "95": 1789405696.0,
+            "96": 1789405696.0,
+            "97": 1789405696.0,
+            "98": 1789405696.0,
+            "99": 1789405696.0,
+            "100": 1789405696.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.87419,
+            "2": 0.53001,
+            "3": 0.2186,
+            "4": 0.25482,
+            "5": 0.20138,
+            "6": 0.19379,
+            "7": 0.79335,
+            "8": 0.34845,
+            "9": 0.55178,
+            "10": 0.41213,
+            "11": 0.70514,
+            "12": 0.42183,
+            "13": 0.79058,
+            "14": 0.25823,
+            "15": 0.17847,
+            "16": 0.17856,
+            "17": 0.22517,
+            "18": 0.17747,
+            "19": 0.2016,
+            "20": 0.17788,
+            "21": 0.2366,
+            "22": 0.17719,
+            "23": 0.17889,
+            "24": 0.17909,
+            "25": 0.23071,
+            "26": 0.18878,
+            "27": 0.17959,
+            "28": 0.17796,
+            "29": 0.19707,
+            "30": 0.17868,
+            "31": 0.23748,
+            "32": 0.17977,
+            "33": 0.1776,
+            "34": 0.17788,
+            "35": 0.17714,
+            "36": 0.17848,
+            "37": 0.17912,
+            "38": 0.17729,
+            "39": 0.20194,
+            "40": 0.5561,
+            "41": 0.18404,
+            "42": 0.21996,
+            "43": 0.1805,
+            "44": 0.22997,
+            "45": 0.17843,
+            "46": 0.17815,
+            "47": 0.17755,
+            "48": 0.21932,
+            "49": 0.17935,
+            "50": 0.21536,
+            "51": 0.18927,
+            "52": 0.17358,
+            "53": 0.17366,
+            "54": 0.19577,
+            "55": 0.17508,
+            "56": 0.20037,
+            "57": 0.17429,
+            "58": 0.2159,
+            "59": 0.17615,
+            "60": 0.17613,
+            "61": 0.17677,
+            "62": 0.17726,
+            "63": 0.22918,
+            "64": 0.17848,
+            "65": 0.17926,
+            "66": 0.17835,
+            "67": 0.17818,
+            "68": 0.17977,
+            "69": 0.17935,
+            "70": 0.17953,
+            "71": 0.17922,
+            "72": 0.17845,
+            "73": 0.19928,
+            "74": 0.17885,
+            "75": 0.20547,
+            "76": 0.2325,
+            "77": 0.18027,
+            "78": 0.17887,
+            "79": 0.18129,
+            "80": 0.18884,
+            "81": 0.1894,
+            "82": 0.18987,
+            "83": 0.19315,
+            "84": 0.19155,
+            "85": 0.19434,
+            "86": 0.19122,
+            "87": 0.1931,
+            "88": 0.19294,
+            "89": 0.2106,
+            "90": 0.19136,
+            "91": 0.19388,
+            "92": 0.21142,
+            "93": 0.19188,
+            "94": 0.19177,
+            "95": 0.19125,
+            "96": 0.1943,
+            "97": 0.20398,
+            "98": 0.19536,
+            "99": 0.19149,
+            "100": 0.19184
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/golden_values_dev_dgx_h100.json
index eb0e5f82b03..13709a61234 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/golden_values_dev_dgx_h100.json
@@ -6,104 +6,104 @@
         "values": {
             "1": 10.85949,
             "2": 10.85553,
-            "3": 10.86548,
+            "3": 10.86549,
             "4": 10.84554,
-            "5": 10.88344,
-            "6": 10.89429,
-            "7": 10.87068,
-            "8": 10.86983,
-            "9": 10.86919,
+            "5": 10.88343,
+            "6": 10.89431,
+            "7": 10.87071,
+            "8": 10.86985,
+            "9": 10.86923,
             "10": 10.83883,
-            "11": 10.89435,
-            "12": 10.8798,
+            "11": 10.89433,
+            "12": 10.87981,
             "13": 10.87987,
-            "14": 10.90317,
-            "15": 10.8405,
-            "16": 10.83786,
-            "17": 10.80668,
-            "18": 10.83025,
-            "19": 10.82262,
-            "20": 10.73192,
-            "21": 10.7075,
-            "22": 10.56005,
+            "14": 10.90321,
+            "15": 10.84051,
+            "16": 10.83788,
+            "17": 10.8067,
+            "18": 10.83029,
+            "19": 10.82265,
+            "20": 10.73194,
+            "21": 10.70748,
+            "22": 10.56007,
             "23": 10.72406,
-            "24": 10.61116,
-            "25": 10.5481,
-            "26": 10.61334,
-            "27": 10.6305,
-            "28": 10.56645,
-            "29": 10.59672,
-            "30": 10.37136,
-            "31": 10.11721,
-            "32": 10.46127,
-            "33": 10.45247,
+            "24": 10.61115,
+            "25": 10.54815,
+            "26": 10.61326,
+            "27": 10.63058,
+            "28": 10.56646,
+            "29": 10.59668,
+            "30": 10.37135,
+            "31": 10.11724,
+            "32": 10.46129,
+            "33": 10.45251,
             "34": 10.21687,
-            "35": 10.27171,
-            "36": 10.2312,
-            "37": 10.34809,
-            "38": 10.18842,
-            "39": 10.41042,
-            "40": 10.09426,
-            "41": 10.14711,
-            "42": 10.21247,
-            "43": 9.84106,
-            "44": 9.95919,
-            "45": 9.84082,
-            "46": 9.82482,
-            "47": 10.13882,
-            "48": 9.85839,
+            "35": 10.2717,
+            "36": 10.23118,
+            "37": 10.34811,
+            "38": 10.18844,
+            "39": 10.4104,
+            "40": 10.09431,
+            "41": 10.14712,
+            "42": 10.21245,
+            "43": 9.84104,
+            "44": 9.95916,
+            "45": 9.84088,
+            "46": 9.82483,
+            "47": 10.13881,
+            "48": 9.85842,
             "49": 9.5472,
             "50": 9.90883,
             "51": 9.85585,
             "52": 9.75243,
-            "53": 10.07588,
-            "54": 9.95691,
-            "55": 9.88207,
-            "56": 9.63139,
-            "57": 9.48649,
-            "58": 9.83116,
-            "59": 9.58907,
-            "60": 9.50648,
-            "61": 9.70368,
-            "62": 9.98289,
-            "63": 9.38314,
-            "64": 9.7791,
-            "65": 8.95182,
-            "66": 9.70161,
+            "53": 10.07586,
+            "54": 9.95687,
+            "55": 9.88208,
+            "56": 9.63141,
+            "57": 9.48653,
+            "58": 9.83119,
+            "59": 9.58905,
+            "60": 9.50652,
+            "61": 9.7037,
+            "62": 9.98292,
+            "63": 9.38312,
+            "64": 9.77906,
+            "65": 8.95185,
+            "66": 9.70159,
             "67": 9.37209,
-            "68": 9.78856,
-            "69": 9.79856,
-            "70": 9.74748,
+            "68": 9.78851,
+            "69": 9.79857,
+            "70": 9.74745,
             "71": 9.6191,
-            "72": 9.585,
-            "73": 9.49728,
-            "74": 8.93928,
-            "75": 9.42702,
+            "72": 9.58502,
+            "73": 9.4973,
+            "74": 8.93931,
+            "75": 9.42703,
             "76": 9.08022,
-            "77": 10.06569,
-            "78": 9.72897,
-            "79": 9.37772,
-            "80": 9.41001,
-            "81": 9.47977,
-            "82": 9.70183,
-            "83": 9.30621,
-            "84": 9.42098,
-            "85": 9.61377,
-            "86": 9.07654,
-            "87": 9.59456,
-            "88": 9.75071,
+            "77": 10.0657,
+            "78": 9.72894,
+            "79": 9.37773,
+            "80": 9.41006,
+            "81": 9.4798,
+            "82": 9.70181,
+            "83": 9.30619,
+            "84": 9.42095,
+            "85": 9.6138,
+            "86": 9.07653,
+            "87": 9.59452,
+            "88": 9.75069,
             "89": 9.60243,
-            "90": 9.81899,
-            "91": 9.33898,
-            "92": 9.35718,
-            "93": 9.07884,
-            "94": 8.83509,
-            "95": 9.52175,
-            "96": 9.53007,
-            "97": 9.31309,
-            "98": 9.67781,
-            "99": 8.89061,
-            "100": 9.39729
+            "90": 9.81897,
+            "91": 9.33895,
+            "92": 9.35716,
+            "93": 9.07885,
+            "94": 8.83508,
+            "95": 9.52177,
+            "96": 9.53006,
+            "97": 9.31311,
+            "98": 9.67783,
+            "99": 8.89063,
+            "100": 9.39728
         }
     },
     "num-zeros": {
@@ -111,106 +111,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 1690.0,
-            "2": 1776.0,
-            "3": 1642.0,
-            "4": 1825.0,
-            "5": 1809.0,
-            "6": 1795.0,
-            "7": 1830.0,
-            "8": 1626.0,
-            "9": 1878.0,
-            "10": 1423.0,
-            "11": 1868.0,
-            "12": 1653.0,
-            "13": 1897.0,
-            "14": 1783.0,
-            "15": 1861.0,
-            "16": 1938.0,
-            "17": 1825.0,
-            "18": 1730.0,
-            "19": 1727.0,
-            "20": 1735.0,
-            "21": 1783.0,
-            "22": 1576.0,
-            "23": 1949.0,
-            "24": 1630.0,
-            "25": 1498.0,
-            "26": 1649.0,
-            "27": 1809.0,
-            "28": 2019.0,
-            "29": 2009.0,
-            "30": 1832.0,
-            "31": 1524.0,
-            "32": 1943.0,
-            "33": 2081.0,
-            "34": 1888.0,
-            "35": 1935.0,
-            "36": 1898.0,
-            "37": 2325.0,
-            "38": 2070.0,
-            "39": 2248.0,
-            "40": 2199.0,
-            "41": 2264.0,
-            "42": 2349.0,
-            "43": 2087.0,
-            "44": 2107.0,
-            "45": 2098.0,
-            "46": 2407.0,
-            "47": 2456.0,
-            "48": 2404.0,
-            "49": 2417.0,
-            "50": 2407.0,
-            "51": 2578.0,
-            "52": 2630.0,
-            "53": 2857.0,
-            "54": 2818.0,
-            "55": 2368.0,
-            "56": 2757.0,
-            "57": 2423.0,
-            "58": 2776.0,
-            "59": 2742.0,
-            "60": 2371.0,
-            "61": 2906.0,
-            "62": 2517.0,
-            "63": 2374.0,
-            "64": 2995.0,
-            "65": 2634.0,
-            "66": 2995.0,
-            "67": 2884.0,
-            "68": 2840.0,
-            "69": 2766.0,
-            "70": 3006.0,
-            "71": 3023.0,
-            "72": 2386.0,
-            "73": 2958.0,
-            "74": 1851.0,
-            "75": 2585.0,
-            "76": 2973.0,
-            "77": 3244.0,
-            "78": 3142.0,
-            "79": 3185.0,
-            "80": 3249.0,
-            "81": 3665.0,
-            "82": 3153.0,
-            "83": 2821.0,
-            "84": 3083.0,
-            "85": 3247.0,
-            "86": 2734.0,
-            "87": 3759.0,
-            "88": 2968.0,
-            "89": 3282.0,
-            "90": 3064.0,
-            "91": 2908.0,
-            "92": 2946.0,
-            "93": 2592.0,
-            "94": 3363.0,
-            "95": 3423.0,
-            "96": 3259.0,
-            "97": 2976.0,
-            "98": 3683.0,
-            "99": 3173.0,
-            "100": 3143.0
+            "1": 1675.0,
+            "2": 1744.0,
+            "3": 1725.0,
+            "4": 1850.0,
+            "5": 1942.0,
+            "6": 1919.0,
+            "7": 1794.0,
+            "8": 1612.0,
+            "9": 1826.0,
+            "10": 1481.0,
+            "11": 1852.0,
+            "12": 1654.0,
+            "13": 1809.0,
+            "14": 1847.0,
+            "15": 1914.0,
+            "16": 1874.0,
+            "17": 1882.0,
+            "18": 1639.0,
+            "19": 1787.0,
+            "20": 1701.0,
+            "21": 1842.0,
+            "22": 1573.0,
+            "23": 2018.0,
+            "24": 1509.0,
+            "25": 1540.0,
+            "26": 1694.0,
+            "27": 1769.0,
+            "28": 1966.0,
+            "29": 2057.0,
+            "30": 1820.0,
+            "31": 1566.0,
+            "32": 1898.0,
+            "33": 2074.0,
+            "34": 1865.0,
+            "35": 1908.0,
+            "36": 1925.0,
+            "37": 2274.0,
+            "38": 2094.0,
+            "39": 2312.0,
+            "40": 2053.0,
+            "41": 2209.0,
+            "42": 2303.0,
+            "43": 2019.0,
+            "44": 2102.0,
+            "45": 2222.0,
+            "46": 2393.0,
+            "47": 2409.0,
+            "48": 2336.0,
+            "49": 2342.0,
+            "50": 2395.0,
+            "51": 2653.0,
+            "52": 2603.0,
+            "53": 2986.0,
+            "54": 2776.0,
+            "55": 2370.0,
+            "56": 2805.0,
+            "57": 2448.0,
+            "58": 2867.0,
+            "59": 2702.0,
+            "60": 2437.0,
+            "61": 2841.0,
+            "62": 2562.0,
+            "63": 2493.0,
+            "64": 2971.0,
+            "65": 2559.0,
+            "66": 3069.0,
+            "67": 2927.0,
+            "68": 2738.0,
+            "69": 2846.0,
+            "70": 3041.0,
+            "71": 3061.0,
+            "72": 2389.0,
+            "73": 3015.0,
+            "74": 1837.0,
+            "75": 2460.0,
+            "76": 3001.0,
+            "77": 3192.0,
+            "78": 3080.0,
+            "79": 3147.0,
+            "80": 3379.0,
+            "81": 3688.0,
+            "82": 3186.0,
+            "83": 2693.0,
+            "84": 3246.0,
+            "85": 3306.0,
+            "86": 2812.0,
+            "87": 3720.0,
+            "88": 2956.0,
+            "89": 3306.0,
+            "90": 3020.0,
+            "91": 2788.0,
+            "92": 3021.0,
+            "93": 2685.0,
+            "94": 3409.0,
+            "95": 3254.0,
+            "96": 3349.0,
+            "97": 2981.0,
+            "98": 3551.0,
+            "99": 3273.0,
+            "100": 3175.0
         }
     },
     "mem-allocated-bytes": {
@@ -218,106 +218,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 516194816.0,
-            "2": 516194816.0,
-            "3": 516194816.0,
-            "4": 516194816.0,
-            "5": 516194816.0,
-            "6": 516194816.0,
-            "7": 516194816.0,
-            "8": 516194816.0,
-            "9": 516194816.0,
-            "10": 516194816.0,
-            "11": 516194816.0,
-            "12": 516194816.0,
-            "13": 516194816.0,
-            "14": 516194816.0,
-            "15": 516194816.0,
-            "16": 516194816.0,
-            "17": 516194816.0,
-            "18": 516194816.0,
-            "19": 516194816.0,
-            "20": 516194816.0,
-            "21": 516194816.0,
-            "22": 516194816.0,
-            "23": 516194816.0,
-            "24": 516194816.0,
-            "25": 516194816.0,
-            "26": 516194816.0,
-            "27": 516194816.0,
-            "28": 516194816.0,
-            "29": 516194816.0,
-            "30": 516194816.0,
-            "31": 516194816.0,
-            "32": 516194816.0,
-            "33": 516194816.0,
-            "34": 516194816.0,
-            "35": 516194816.0,
-            "36": 516194816.0,
-            "37": 516194816.0,
-            "38": 516194816.0,
-            "39": 516194816.0,
-            "40": 516194816.0,
-            "41": 516194816.0,
-            "42": 516194816.0,
-            "43": 516194816.0,
-            "44": 516194816.0,
-            "45": 516194816.0,
-            "46": 516194816.0,
-            "47": 516194816.0,
-            "48": 516194816.0,
-            "49": 516194816.0,
-            "50": 516194816.0,
-            "51": 516194816.0,
-            "52": 516194816.0,
-            "53": 516194816.0,
-            "54": 516194816.0,
-            "55": 516194816.0,
-            "56": 516194816.0,
-            "57": 516194816.0,
-            "58": 516194816.0,
-            "59": 516194816.0,
-            "60": 516194816.0,
-            "61": 516194816.0,
-            "62": 516194816.0,
-            "63": 516194816.0,
-            "64": 516194816.0,
-            "65": 516194816.0,
-            "66": 516194816.0,
-            "67": 516194816.0,
-            "68": 516194816.0,
-            "69": 516194816.0,
-            "70": 516194816.0,
-            "71": 516194816.0,
-            "72": 516194816.0,
-            "73": 516194816.0,
-            "74": 516194816.0,
-            "75": 516194816.0,
-            "76": 516194816.0,
-            "77": 516194816.0,
-            "78": 516194816.0,
-            "79": 516194816.0,
-            "80": 516194816.0,
-            "81": 516194816.0,
-            "82": 516194816.0,
-            "83": 516194816.0,
-            "84": 516194816.0,
-            "85": 516194816.0,
-            "86": 516194816.0,
-            "87": 516194816.0,
-            "88": 516194816.0,
-            "89": 516194816.0,
-            "90": 516194816.0,
-            "91": 516194816.0,
-            "92": 516194816.0,
-            "93": 516194816.0,
-            "94": 516194816.0,
-            "95": 516194816.0,
-            "96": 516194816.0,
-            "97": 516194816.0,
-            "98": 516194816.0,
-            "99": 516194816.0,
-            "100": 516194816.0
+            "1": 514359808.0,
+            "2": 514359808.0,
+            "3": 514359808.0,
+            "4": 514359808.0,
+            "5": 514359808.0,
+            "6": 514359808.0,
+            "7": 514359808.0,
+            "8": 514359808.0,
+            "9": 514359808.0,
+            "10": 514359808.0,
+            "11": 514359808.0,
+            "12": 514359808.0,
+            "13": 514359808.0,
+            "14": 514359808.0,
+            "15": 514359808.0,
+            "16": 514359808.0,
+            "17": 514359808.0,
+            "18": 514359808.0,
+            "19": 514359808.0,
+            "20": 514359808.0,
+            "21": 514359808.0,
+            "22": 514359808.0,
+            "23": 514359808.0,
+            "24": 514359808.0,
+            "25": 514359808.0,
+            "26": 514359808.0,
+            "27": 514359808.0,
+            "28": 514359808.0,
+            "29": 514359808.0,
+            "30": 514359808.0,
+            "31": 514359808.0,
+            "32": 514359808.0,
+            "33": 514359808.0,
+            "34": 514359808.0,
+            "35": 514359808.0,
+            "36": 514359808.0,
+            "37": 514359808.0,
+            "38": 514359808.0,
+            "39": 514359808.0,
+            "40": 514359808.0,
+            "41": 514359808.0,
+            "42": 514359808.0,
+            "43": 514359808.0,
+            "44": 514359808.0,
+            "45": 514359808.0,
+            "46": 514359808.0,
+            "47": 514359808.0,
+            "48": 514359808.0,
+            "49": 514359808.0,
+            "50": 514359808.0,
+            "51": 514359808.0,
+            "52": 514359808.0,
+            "53": 514359808.0,
+            "54": 514359808.0,
+            "55": 514359808.0,
+            "56": 514359808.0,
+            "57": 514359808.0,
+            "58": 514359808.0,
+            "59": 514359808.0,
+            "60": 514359808.0,
+            "61": 514359808.0,
+            "62": 514359808.0,
+            "63": 514359808.0,
+            "64": 514359808.0,
+            "65": 514359808.0,
+            "66": 514359808.0,
+            "67": 514359808.0,
+            "68": 514359808.0,
+            "69": 514359808.0,
+            "70": 514359808.0,
+            "71": 514359808.0,
+            "72": 514359808.0,
+            "73": 514359808.0,
+            "74": 514359808.0,
+            "75": 514359808.0,
+            "76": 514359808.0,
+            "77": 514359808.0,
+            "78": 514359808.0,
+            "79": 514359808.0,
+            "80": 514359808.0,
+            "81": 514359808.0,
+            "82": 514359808.0,
+            "83": 514359808.0,
+            "84": 514359808.0,
+            "85": 514359808.0,
+            "86": 514359808.0,
+            "87": 514359808.0,
+            "88": 514359808.0,
+            "89": 514359808.0,
+            "90": 514359808.0,
+            "91": 514359808.0,
+            "92": 514359808.0,
+            "93": 514359808.0,
+            "94": 514359808.0,
+            "95": 514359808.0,
+            "96": 514359808.0,
+            "97": 514359808.0,
+            "98": 514359808.0,
+            "99": 514359808.0,
+            "100": 514359808.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -325,106 +325,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 1670130688.0,
-            "2": 1840523776.0,
-            "3": 1840523776.0,
-            "4": 1840523776.0,
-            "5": 1840523776.0,
-            "6": 1840523776.0,
-            "7": 1840523776.0,
-            "8": 1840523776.0,
-            "9": 1840523776.0,
-            "10": 1840523776.0,
-            "11": 1840523776.0,
-            "12": 1840523776.0,
-            "13": 1840523776.0,
-            "14": 1840523776.0,
-            "15": 1840523776.0,
-            "16": 1840523776.0,
-            "17": 1840523776.0,
-            "18": 1840523776.0,
-            "19": 1840523776.0,
-            "20": 1840523776.0,
-            "21": 1840523776.0,
-            "22": 1840523776.0,
-            "23": 1840523776.0,
-            "24": 1840523776.0,
-            "25": 1840523776.0,
-            "26": 1840523776.0,
-            "27": 1840523776.0,
-            "28": 1840523776.0,
-            "29": 1840523776.0,
-            "30": 1840523776.0,
-            "31": 1840523776.0,
-            "32": 1840523776.0,
-            "33": 1840523776.0,
-            "34": 1840523776.0,
-            "35": 1840523776.0,
-            "36": 1840523776.0,
-            "37": 1840523776.0,
-            "38": 1840523776.0,
-            "39": 1840523776.0,
-            "40": 1840523776.0,
-            "41": 1840523776.0,
-            "42": 1840523776.0,
-            "43": 1840523776.0,
-            "44": 1840523776.0,
-            "45": 1840523776.0,
-            "46": 1840523776.0,
-            "47": 1840523776.0,
-            "48": 1840523776.0,
-            "49": 1840523776.0,
-            "50": 1840523776.0,
-            "51": 1840523776.0,
-            "52": 1840523776.0,
-            "53": 1840523776.0,
-            "54": 1840523776.0,
-            "55": 1840523776.0,
-            "56": 1840523776.0,
-            "57": 1840523776.0,
-            "58": 1840523776.0,
-            "59": 1840523776.0,
-            "60": 1840523776.0,
-            "61": 1840523776.0,
-            "62": 1840523776.0,
-            "63": 1840523776.0,
-            "64": 1840523776.0,
-            "65": 1840523776.0,
-            "66": 1840523776.0,
-            "67": 1840523776.0,
-            "68": 1840523776.0,
-            "69": 1840523776.0,
-            "70": 1840523776.0,
-            "71": 1840523776.0,
-            "72": 1840523776.0,
-            "73": 1840523776.0,
-            "74": 1840523776.0,
-            "75": 1840523776.0,
-            "76": 1840523776.0,
-            "77": 1840523776.0,
-            "78": 1840523776.0,
-            "79": 1840523776.0,
-            "80": 1840523776.0,
-            "81": 1840523776.0,
-            "82": 1840523776.0,
-            "83": 1841310208.0,
-            "84": 1841310208.0,
-            "85": 1841310208.0,
-            "86": 1841310208.0,
-            "87": 1841310208.0,
-            "88": 1841310208.0,
-            "89": 1841310208.0,
-            "90": 1841310208.0,
-            "91": 1841310208.0,
-            "92": 1841310208.0,
-            "93": 1841310208.0,
-            "94": 1841310208.0,
-            "95": 1841310208.0,
-            "96": 1841310208.0,
-            "97": 1841310208.0,
-            "98": 1841310208.0,
-            "99": 1841310208.0,
-            "100": 1841310208.0
+            "1": 1670148096.0,
+            "2": 1837640192.0,
+            "3": 1837640192.0,
+            "4": 1837640192.0,
+            "5": 1837640192.0,
+            "6": 1837640192.0,
+            "7": 1837640192.0,
+            "8": 1837640192.0,
+            "9": 1837640192.0,
+            "10": 1837640192.0,
+            "11": 1837640192.0,
+            "12": 1837640192.0,
+            "13": 1837640192.0,
+            "14": 1837640192.0,
+            "15": 1837640192.0,
+            "16": 1837640192.0,
+            "17": 1837640192.0,
+            "18": 1837640192.0,
+            "19": 1837640192.0,
+            "20": 1837640192.0,
+            "21": 1837640192.0,
+            "22": 1837640192.0,
+            "23": 1837640192.0,
+            "24": 1837640192.0,
+            "25": 1837640192.0,
+            "26": 1837640192.0,
+            "27": 1837640192.0,
+            "28": 1837640192.0,
+            "29": 1837640192.0,
+            "30": 1837640192.0,
+            "31": 1837640192.0,
+            "32": 1837640192.0,
+            "33": 1837640192.0,
+            "34": 1837640192.0,
+            "35": 1837640192.0,
+            "36": 1837640192.0,
+            "37": 1837640192.0,
+            "38": 1837640192.0,
+            "39": 1837640192.0,
+            "40": 1837640192.0,
+            "41": 1837640192.0,
+            "42": 1837640192.0,
+            "43": 1837640192.0,
+            "44": 1837640192.0,
+            "45": 1837640192.0,
+            "46": 1837640192.0,
+            "47": 1837640192.0,
+            "48": 1837640192.0,
+            "49": 1837640192.0,
+            "50": 1837640192.0,
+            "51": 1837640192.0,
+            "52": 1837640192.0,
+            "53": 1837640192.0,
+            "54": 1837640192.0,
+            "55": 1837640192.0,
+            "56": 1837640192.0,
+            "57": 1837640192.0,
+            "58": 1837640192.0,
+            "59": 1837640192.0,
+            "60": 1837640192.0,
+            "61": 1837640192.0,
+            "62": 1837640192.0,
+            "63": 1837640192.0,
+            "64": 1837640192.0,
+            "65": 1837640192.0,
+            "66": 1837640192.0,
+            "67": 1837640192.0,
+            "68": 1837640192.0,
+            "69": 1837640192.0,
+            "70": 1837640192.0,
+            "71": 1837640192.0,
+            "72": 1837640192.0,
+            "73": 1837640192.0,
+            "74": 1837640192.0,
+            "75": 1837640192.0,
+            "76": 1837640192.0,
+            "77": 1837640192.0,
+            "78": 1837640192.0,
+            "79": 1837640192.0,
+            "80": 1837640192.0,
+            "81": 1837640192.0,
+            "82": 1837640192.0,
+            "83": 1837640192.0,
+            "84": 1837640192.0,
+            "85": 1837640192.0,
+            "86": 1837640192.0,
+            "87": 1837640192.0,
+            "88": 1837640192.0,
+            "89": 1837640192.0,
+            "90": 1837640192.0,
+            "91": 1837640192.0,
+            "92": 1837640192.0,
+            "93": 1837640192.0,
+            "94": 1837640192.0,
+            "95": 1837640192.0,
+            "96": 1837640192.0,
+            "97": 1837640192.0,
+            "98": 1837640192.0,
+            "99": 1837640192.0,
+            "100": 1837640192.0
         }
     },
     "iteration-time": {
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 15.65402,
-            "2": 0.15533,
-            "3": 0.13713,
-            "4": 0.14193,
-            "5": 0.13861,
-            "6": 0.13948,
-            "7": 0.13637,
-            "8": 0.13619,
-            "9": 0.14162,
-            "10": 0.13725,
-            "11": 0.13988,
-            "12": 0.14179,
-            "13": 0.14346,
-            "14": 0.14488,
-            "15": 0.1468,
-            "16": 0.14288,
-            "17": 0.13708,
-            "18": 0.13765,
-            "19": 0.13957,
-            "20": 0.13778,
-            "21": 0.13931,
-            "22": 0.13758,
-            "23": 0.13751,
-            "24": 0.14023,
-            "25": 0.14508,
-            "26": 0.15744,
-            "27": 0.15391,
-            "28": 0.15519,
-            "29": 0.14118,
-            "30": 0.1391,
-            "31": 0.13604,
-            "32": 0.1366,
-            "33": 0.13813,
-            "34": 0.13786,
-            "35": 0.13728,
-            "36": 0.13981,
-            "37": 0.14024,
-            "38": 0.13688,
-            "39": 0.13391,
-            "40": 0.13738,
-            "41": 0.14059,
-            "42": 0.13512,
-            "43": 0.13775,
-            "44": 0.13641,
-            "45": 0.13686,
-            "46": 0.14053,
-            "47": 0.13951,
-            "48": 0.14166,
-            "49": 0.13555,
-            "50": 0.13577,
-            "51": 0.14328,
-            "52": 0.14201,
-            "53": 0.13861,
-            "54": 0.13965,
-            "55": 0.13807,
-            "56": 0.14044,
-            "57": 0.14358,
-            "58": 0.14042,
-            "59": 0.13858,
-            "60": 0.13959,
-            "61": 0.13788,
-            "62": 0.14032,
-            "63": 0.13843,
-            "64": 0.13942,
-            "65": 0.13742,
-            "66": 0.13948,
-            "67": 0.14263,
-            "68": 0.13848,
-            "69": 0.13944,
-            "70": 0.13874,
-            "71": 0.14302,
-            "72": 0.13748,
-            "73": 0.13837,
-            "74": 0.13911,
-            "75": 0.13965,
-            "76": 0.1466,
-            "77": 0.14259,
-            "78": 0.13635,
-            "79": 0.14025,
-            "80": 0.14725,
-            "81": 0.14592,
-            "82": 0.14832,
-            "83": 0.14727,
-            "84": 0.14437,
-            "85": 0.13721,
-            "86": 0.14235,
-            "87": 0.13812,
-            "88": 0.13937,
-            "89": 0.1389,
-            "90": 0.13661,
-            "91": 0.1432,
-            "92": 0.1389,
-            "93": 0.13881,
-            "94": 0.13803,
-            "95": 0.13815,
-            "96": 0.14203,
-            "97": 0.13816,
-            "98": 0.13963,
-            "99": 0.14236,
-            "100": 0.14371
+            "1": 9.53425,
+            "2": 0.1525,
+            "3": 0.1318,
+            "4": 0.11378,
+            "5": 0.11192,
+            "6": 0.11218,
+            "7": 0.11154,
+            "8": 0.11173,
+            "9": 0.11229,
+            "10": 0.11154,
+            "11": 0.11167,
+            "12": 0.11151,
+            "13": 0.11086,
+            "14": 0.11183,
+            "15": 0.1112,
+            "16": 0.11119,
+            "17": 0.11049,
+            "18": 0.11127,
+            "19": 0.11165,
+            "20": 0.11158,
+            "21": 0.11135,
+            "22": 0.1116,
+            "23": 0.11105,
+            "24": 0.11218,
+            "25": 0.11189,
+            "26": 0.11148,
+            "27": 0.11258,
+            "28": 0.11129,
+            "29": 0.11127,
+            "30": 0.11264,
+            "31": 0.11113,
+            "32": 0.11139,
+            "33": 0.11019,
+            "34": 0.11118,
+            "35": 0.11227,
+            "36": 0.11007,
+            "37": 0.11047,
+            "38": 0.1112,
+            "39": 0.11057,
+            "40": 0.1122,
+            "41": 0.11135,
+            "42": 0.11041,
+            "43": 0.1105,
+            "44": 0.11017,
+            "45": 0.11127,
+            "46": 0.11089,
+            "47": 0.11064,
+            "48": 0.11167,
+            "49": 0.11021,
+            "50": 0.111,
+            "51": 0.13065,
+            "52": 0.12181,
+            "53": 0.11254,
+            "54": 0.11131,
+            "55": 0.11274,
+            "56": 0.11203,
+            "57": 0.11122,
+            "58": 0.11071,
+            "59": 0.1147,
+            "60": 0.11126,
+            "61": 0.11099,
+            "62": 0.11099,
+            "63": 0.11124,
+            "64": 0.11385,
+            "65": 0.11135,
+            "66": 0.11119,
+            "67": 0.11002,
+            "68": 0.11148,
+            "69": 0.11088,
+            "70": 0.1124,
+            "71": 0.11625,
+            "72": 0.11347,
+            "73": 0.11265,
+            "74": 0.11196,
+            "75": 0.11175,
+            "76": 0.11084,
+            "77": 0.10995,
+            "78": 0.11184,
+            "79": 0.10992,
+            "80": 0.11019,
+            "81": 0.1106,
+            "82": 0.11145,
+            "83": 0.11121,
+            "84": 0.11016,
+            "85": 0.11204,
+            "86": 0.11064,
+            "87": 0.11178,
+            "88": 0.11053,
+            "89": 0.11128,
+            "90": 0.11129,
+            "91": 0.11264,
+            "92": 0.1113,
+            "93": 0.1105,
+            "94": 0.11459,
+            "95": 0.11356,
+            "96": 0.10985,
+            "97": 0.1104,
+            "98": 0.11182,
+            "99": 0.11024,
+            "100": 0.11054
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/golden_values_dev_dgx_h100_2nd.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/golden_values_dev_dgx_h100_2nd.json
new file mode 100644
index 00000000000..97a4288db23
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/golden_values_dev_dgx_h100_2nd.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 9.85585,
+            "52": 9.75241,
+            "53": 10.07586,
+            "54": 9.95689,
+            "55": 9.88209,
+            "56": 9.63139,
+            "57": 9.48651,
+            "58": 9.83118,
+            "59": 9.58907,
+            "60": 9.5065,
+            "61": 9.7037,
+            "62": 9.98291,
+            "63": 9.38318,
+            "64": 9.77909,
+            "65": 8.95183,
+            "66": 9.70161,
+            "67": 9.37209,
+            "68": 9.78854,
+            "69": 9.79856,
+            "70": 9.74746,
+            "71": 9.61908,
+            "72": 9.58507,
+            "73": 9.49728,
+            "74": 8.9393,
+            "75": 9.42707,
+            "76": 9.08024,
+            "77": 10.06567,
+            "78": 9.72898,
+            "79": 9.37773,
+            "80": 9.41002,
+            "81": 9.47979,
+            "82": 9.70181,
+            "83": 9.30624,
+            "84": 9.42099,
+            "85": 9.6138,
+            "86": 9.07653,
+            "87": 9.59455,
+            "88": 9.75073,
+            "89": 9.60246,
+            "90": 9.81898,
+            "91": 9.33898,
+            "92": 9.35717,
+            "93": 9.07886,
+            "94": 8.8351,
+            "95": 9.52175,
+            "96": 9.5301,
+            "97": 9.3131,
+            "98": 9.67785,
+            "99": 8.89062,
+            "100": 9.39726
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 2553.0,
+            "52": 2558.0,
+            "53": 2867.0,
+            "54": 2887.0,
+            "55": 2364.0,
+            "56": 2737.0,
+            "57": 2446.0,
+            "58": 2933.0,
+            "59": 2696.0,
+            "60": 2423.0,
+            "61": 3055.0,
+            "62": 2568.0,
+            "63": 2454.0,
+            "64": 2951.0,
+            "65": 2655.0,
+            "66": 3084.0,
+            "67": 2895.0,
+            "68": 2774.0,
+            "69": 2948.0,
+            "70": 3026.0,
+            "71": 2920.0,
+            "72": 2346.0,
+            "73": 2943.0,
+            "74": 1862.0,
+            "75": 2492.0,
+            "76": 3006.0,
+            "77": 3124.0,
+            "78": 3129.0,
+            "79": 3132.0,
+            "80": 3296.0,
+            "81": 3746.0,
+            "82": 3327.0,
+            "83": 2719.0,
+            "84": 3230.0,
+            "85": 3271.0,
+            "86": 2743.0,
+            "87": 3821.0,
+            "88": 2989.0,
+            "89": 3310.0,
+            "90": 3031.0,
+            "91": 2802.0,
+            "92": 3065.0,
+            "93": 2744.0,
+            "94": 3417.0,
+            "95": 3408.0,
+            "96": 3345.0,
+            "97": 3086.0,
+            "98": 3708.0,
+            "99": 3174.0,
+            "100": 3141.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 696530432.0,
+            "52": 696530432.0,
+            "53": 696530432.0,
+            "54": 696530432.0,
+            "55": 696530432.0,
+            "56": 696530432.0,
+            "57": 696530432.0,
+            "58": 696530432.0,
+            "59": 696530432.0,
+            "60": 696530432.0,
+            "61": 696530432.0,
+            "62": 696530432.0,
+            "63": 696530432.0,
+            "64": 696530432.0,
+            "65": 696530432.0,
+            "66": 696530432.0,
+            "67": 696530432.0,
+            "68": 696530432.0,
+            "69": 696530432.0,
+            "70": 696530432.0,
+            "71": 696530432.0,
+            "72": 696530432.0,
+            "73": 696530432.0,
+            "74": 696530432.0,
+            "75": 696530432.0,
+            "76": 696530432.0,
+            "77": 696530432.0,
+            "78": 696530432.0,
+            "79": 696530432.0,
+            "80": 696530432.0,
+            "81": 696530432.0,
+            "82": 696530432.0,
+            "83": 696530432.0,
+            "84": 696530432.0,
+            "85": 696530432.0,
+            "86": 696530432.0,
+            "87": 696530432.0,
+            "88": 696530432.0,
+            "89": 696530432.0,
+            "90": 696530432.0,
+            "91": 696530432.0,
+            "92": 696530432.0,
+            "93": 696530432.0,
+            "94": 696530432.0,
+            "95": 696530432.0,
+            "96": 696530432.0,
+            "97": 696530432.0,
+            "98": 696530432.0,
+            "99": 696530432.0,
+            "100": 696530432.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 2031341568.0,
+            "52": 2031341568.0,
+            "53": 2031341568.0,
+            "54": 2031341568.0,
+            "55": 2031341568.0,
+            "56": 2031341568.0,
+            "57": 2031341568.0,
+            "58": 2031341568.0,
+            "59": 2031341568.0,
+            "60": 2031341568.0,
+            "61": 2031341568.0,
+            "62": 2031341568.0,
+            "63": 2031341568.0,
+            "64": 2031341568.0,
+            "65": 2031341568.0,
+            "66": 2031341568.0,
+            "67": 2031341568.0,
+            "68": 2031341568.0,
+            "69": 2031341568.0,
+            "70": 2031341568.0,
+            "71": 2031341568.0,
+            "72": 2031341568.0,
+            "73": 2031341568.0,
+            "74": 2031341568.0,
+            "75": 2031341568.0,
+            "76": 2031341568.0,
+            "77": 2031341568.0,
+            "78": 2031341568.0,
+            "79": 2031341568.0,
+            "80": 2031341568.0,
+            "81": 2031341568.0,
+            "82": 2031341568.0,
+            "83": 2031341568.0,
+            "84": 2031341568.0,
+            "85": 2031341568.0,
+            "86": 2031341568.0,
+            "87": 2031341568.0,
+            "88": 2031341568.0,
+            "89": 2031341568.0,
+            "90": 2031341568.0,
+            "91": 2031341568.0,
+            "92": 2031341568.0,
+            "93": 2031341568.0,
+            "94": 2031341568.0,
+            "95": 2031341568.0,
+            "96": 2031341568.0,
+            "97": 2031341568.0,
+            "98": 2031341568.0,
+            "99": 2031341568.0,
+            "100": 2031341568.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 9.51716,
+            "52": 0.17953,
+            "53": 0.13809,
+            "54": 0.13557,
+            "55": 0.13446,
+            "56": 0.13644,
+            "57": 0.13533,
+            "58": 0.12827,
+            "59": 0.12403,
+            "60": 0.12008,
+            "61": 0.11711,
+            "62": 0.11537,
+            "63": 0.11423,
+            "64": 0.11329,
+            "65": 0.11414,
+            "66": 0.11444,
+            "67": 0.11357,
+            "68": 0.11307,
+            "69": 0.11383,
+            "70": 0.11317,
+            "71": 0.11391,
+            "72": 0.11323,
+            "73": 0.11305,
+            "74": 0.11159,
+            "75": 0.11212,
+            "76": 0.11331,
+            "77": 0.11201,
+            "78": 0.11136,
+            "79": 0.11362,
+            "80": 0.11395,
+            "81": 0.11649,
+            "82": 0.11432,
+            "83": 0.11438,
+            "84": 0.11332,
+            "85": 0.11369,
+            "86": 0.11489,
+            "87": 0.11276,
+            "88": 0.1132,
+            "89": 0.11853,
+            "90": 0.11588,
+            "91": 0.11412,
+            "92": 0.11248,
+            "93": 0.11752,
+            "94": 0.11825,
+            "95": 0.11624,
+            "96": 0.11545,
+            "97": 0.11325,
+            "98": 0.11377,
+            "99": 0.11384,
+            "100": 0.11275
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/golden_values_lts_dgx_a100.json
index 3c656cc949e..ccdfa9ac12e 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/golden_values_lts_dgx_a100.json
@@ -2,141 +2,536 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 10.92655,
-            "5": 10.92717,
-            "10": 10.90792,
-            "15": 10.88294,
-            "20": 10.77597,
-            "25": 10.59265,
-            "30": 10.39175,
-            "35": 10.29702,
-            "40": 10.09661,
-            "45": 9.84468,
-            "50": 9.90943,
-            "55": 9.87772,
-            "60": 9.49123,
-            "65": 8.94254,
-            "70": 9.72275,
-            "75": 9.41892,
-            "80": 9.4006,
-            "85": 9.61185,
-            "90": 9.81025,
-            "95": 9.51723,
-            "100": 9.40135
+            "1": 10.92228,
+            "2": 10.92833,
+            "3": 10.9171,
+            "4": 10.90497,
+            "5": 10.92805,
+            "6": 10.9367,
+            "7": 10.90405,
+            "8": 10.92231,
+            "9": 10.91258,
+            "10": 10.90849,
+            "11": 10.89333,
+            "12": 10.92084,
+            "13": 10.91496,
+            "14": 10.92147,
+            "15": 10.88434,
+            "16": 10.87455,
+            "17": 10.83916,
+            "18": 10.87305,
+            "19": 10.85329,
+            "20": 10.77493,
+            "21": 10.74754,
+            "22": 10.63151,
+            "23": 10.75621,
+            "24": 10.65566,
+            "25": 10.59217,
+            "26": 10.6533,
+            "27": 10.64878,
+            "28": 10.59653,
+            "29": 10.61011,
+            "30": 10.39283,
+            "31": 10.15724,
+            "32": 10.49222,
+            "33": 10.47943,
+            "34": 10.24015,
+            "35": 10.2971,
+            "36": 10.2456,
+            "37": 10.35281,
+            "38": 10.20531,
+            "39": 10.4042,
+            "40": 10.0955,
+            "41": 10.15277,
+            "42": 10.21885,
+            "43": 9.85522,
+            "44": 9.96244,
+            "45": 9.84618,
+            "46": 9.83799,
+            "47": 10.13882,
+            "48": 9.85698,
+            "49": 9.53751,
+            "50": 9.90881,
+            "51": 9.84975,
+            "52": 9.74161,
+            "53": 10.06325,
+            "54": 9.94588,
+            "55": 9.87743,
+            "56": 9.62751,
+            "57": 9.47268,
+            "58": 9.82914,
+            "59": 9.58307,
+            "60": 9.49183,
+            "61": 9.6996,
+            "62": 9.98093,
+            "63": 9.37223,
+            "64": 9.77562,
+            "65": 8.9434,
+            "66": 9.69995,
+            "67": 9.36423,
+            "68": 9.78704,
+            "69": 9.78393,
+            "70": 9.72294,
+            "71": 9.6074,
+            "72": 9.5842,
+            "73": 9.49096,
+            "74": 8.94874,
+            "75": 9.41816,
+            "76": 9.08732,
+            "77": 10.06288,
+            "78": 9.72904,
+            "79": 9.37094,
+            "80": 9.40034,
+            "81": 9.47762,
+            "82": 9.69127,
+            "83": 9.30769,
+            "84": 9.4126,
+            "85": 9.61136,
+            "86": 9.07624,
+            "87": 9.59463,
+            "88": 9.74771,
+            "89": 9.60681,
+            "90": 9.81083,
+            "91": 9.34451,
+            "92": 9.3654,
+            "93": 9.07749,
+            "94": 8.82979,
+            "95": 9.51679,
+            "96": 9.5255,
+            "97": 9.31042,
+            "98": 9.67816,
+            "99": 8.8885,
+            "100": 9.40133
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 1676.0,
-            "5": 1938.0,
-            "10": 1402.0,
-            "15": 1768.0,
-            "20": 1651.0,
-            "25": 1671.0,
-            "30": 1908.0,
-            "35": 1915.0,
+            "1": 1712.0,
+            "2": 1881.0,
+            "3": 1751.0,
+            "4": 1774.0,
+            "5": 1918.0,
+            "6": 1854.0,
+            "7": 1911.0,
+            "8": 1805.0,
+            "9": 2004.0,
+            "10": 1454.0,
+            "11": 1894.0,
+            "12": 1849.0,
+            "13": 1979.0,
+            "14": 1898.0,
+            "15": 1911.0,
+            "16": 1867.0,
+            "17": 1857.0,
+            "18": 1662.0,
+            "19": 1835.0,
+            "20": 1699.0,
+            "21": 1824.0,
+            "22": 1714.0,
+            "23": 1963.0,
+            "24": 1705.0,
+            "25": 1632.0,
+            "26": 1807.0,
+            "27": 1895.0,
+            "28": 2017.0,
+            "29": 2082.0,
+            "30": 1933.0,
+            "31": 1618.0,
+            "32": 1952.0,
+            "33": 2137.0,
+            "34": 1944.0,
+            "35": 2051.0,
+            "36": 1989.0,
+            "37": 2452.0,
+            "38": 2233.0,
+            "39": 2486.0,
             "40": 2163.0,
-            "45": 2125.0,
-            "50": 2496.0,
-            "55": 2392.0,
-            "60": 2334.0,
-            "65": 2771.0,
-            "70": 3234.0,
-            "75": 2675.0,
-            "80": 3564.0,
-            "85": 3284.0,
-            "90": 3079.0,
-            "95": 3405.0,
-            "100": 3430.0
+            "41": 2380.0,
+            "42": 2299.0,
+            "43": 1970.0,
+            "44": 2110.0,
+            "45": 2033.0,
+            "46": 2365.0,
+            "47": 2636.0,
+            "48": 2462.0,
+            "49": 2351.0,
+            "50": 2526.0,
+            "51": 2604.0,
+            "52": 2554.0,
+            "53": 3020.0,
+            "54": 2645.0,
+            "55": 2449.0,
+            "56": 2729.0,
+            "57": 2438.0,
+            "58": 3141.0,
+            "59": 2784.0,
+            "60": 2501.0,
+            "61": 2876.0,
+            "62": 2611.0,
+            "63": 2367.0,
+            "64": 3084.0,
+            "65": 2831.0,
+            "66": 3358.0,
+            "67": 2825.0,
+            "68": 2816.0,
+            "69": 3037.0,
+            "70": 3265.0,
+            "71": 3105.0,
+            "72": 2546.0,
+            "73": 3030.0,
+            "74": 1951.0,
+            "75": 2615.0,
+            "76": 2976.0,
+            "77": 3452.0,
+            "78": 3285.0,
+            "79": 3243.0,
+            "80": 3483.0,
+            "81": 3696.0,
+            "82": 3350.0,
+            "83": 2802.0,
+            "84": 3346.0,
+            "85": 3210.0,
+            "86": 2868.0,
+            "87": 3804.0,
+            "88": 3014.0,
+            "89": 3346.0,
+            "90": 3037.0,
+            "91": 2796.0,
+            "92": 3267.0,
+            "93": 2761.0,
+            "94": 3459.0,
+            "95": 3435.0,
+            "96": 3605.0,
+            "97": 3075.0,
+            "98": 3765.0,
+            "99": 3082.0,
+            "100": 3412.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 435847168.0,
-            "5": 435847168.0,
-            "10": 435847168.0,
-            "15": 435847168.0,
-            "20": 435847168.0,
-            "25": 435847168.0,
-            "30": 435847168.0,
-            "35": 435847168.0,
-            "40": 435847168.0,
-            "45": 435847168.0,
-            "50": 435847168.0,
-            "55": 435847168.0,
-            "60": 435847168.0,
-            "65": 436895744.0,
-            "70": 435847168.0,
-            "75": 435847168.0,
-            "80": 435847168.0,
-            "85": 435847168.0,
-            "90": 435847168.0,
-            "95": 435847168.0,
-            "100": 435847168.0
+            "1": 436765184.0,
+            "2": 436765184.0,
+            "3": 436765184.0,
+            "4": 436765184.0,
+            "5": 436765184.0,
+            "6": 436765184.0,
+            "7": 436765184.0,
+            "8": 436765184.0,
+            "9": 436765184.0,
+            "10": 436765184.0,
+            "11": 436765184.0,
+            "12": 436765184.0,
+            "13": 436765184.0,
+            "14": 436765184.0,
+            "15": 436765184.0,
+            "16": 436765184.0,
+            "17": 436765184.0,
+            "18": 436765184.0,
+            "19": 436765184.0,
+            "20": 436765184.0,
+            "21": 436765184.0,
+            "22": 436765184.0,
+            "23": 436765184.0,
+            "24": 436765184.0,
+            "25": 436765184.0,
+            "26": 436765184.0,
+            "27": 436765184.0,
+            "28": 436765184.0,
+            "29": 436765184.0,
+            "30": 436765184.0,
+            "31": 436765184.0,
+            "32": 436765184.0,
+            "33": 436765184.0,
+            "34": 436765184.0,
+            "35": 436765184.0,
+            "36": 436765184.0,
+            "37": 436765184.0,
+            "38": 436765184.0,
+            "39": 436765184.0,
+            "40": 436765184.0,
+            "41": 436765184.0,
+            "42": 436765184.0,
+            "43": 436765184.0,
+            "44": 436765184.0,
+            "45": 436765184.0,
+            "46": 436765184.0,
+            "47": 436765184.0,
+            "48": 436765184.0,
+            "49": 436765184.0,
+            "50": 436765184.0,
+            "51": 436765184.0,
+            "52": 436765184.0,
+            "53": 436765184.0,
+            "54": 436765184.0,
+            "55": 436765184.0,
+            "56": 436765184.0,
+            "57": 436765184.0,
+            "58": 436765184.0,
+            "59": 436765184.0,
+            "60": 436765184.0,
+            "61": 436765184.0,
+            "62": 436765184.0,
+            "63": 436765184.0,
+            "64": 436765184.0,
+            "65": 436765184.0,
+            "66": 436765184.0,
+            "67": 436765184.0,
+            "68": 436765184.0,
+            "69": 436765184.0,
+            "70": 436765184.0,
+            "71": 436765184.0,
+            "72": 436765184.0,
+            "73": 436765184.0,
+            "74": 436765184.0,
+            "75": 436765184.0,
+            "76": 436765184.0,
+            "77": 436765184.0,
+            "78": 436765184.0,
+            "79": 436765184.0,
+            "80": 436765184.0,
+            "81": 436765184.0,
+            "82": 436765184.0,
+            "83": 436765184.0,
+            "84": 436765184.0,
+            "85": 436765184.0,
+            "86": 436765184.0,
+            "87": 436765184.0,
+            "88": 436765184.0,
+            "89": 436765184.0,
+            "90": 436765184.0,
+            "91": 436765184.0,
+            "92": 436765184.0,
+            "93": 436765184.0,
+            "94": 436765184.0,
+            "95": 436765184.0,
+            "96": 436765184.0,
+            "97": 436765184.0,
+            "98": 436765184.0,
+            "99": 436765184.0,
+            "100": 436765184.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 2658189824.0,
-            "5": 2658189824.0,
-            "10": 2658189824.0,
-            "15": 2658189824.0,
-            "20": 2658189824.0,
-            "25": 2658189824.0,
-            "30": 2658189824.0,
-            "35": 2658189824.0,
-            "40": 2658189824.0,
-            "45": 2658189824.0,
-            "50": 2658189824.0,
-            "55": 2658189824.0,
-            "60": 2658189824.0,
-            "65": 2658189824.0,
-            "70": 2658189824.0,
-            "75": 2658189824.0,
-            "80": 2658189824.0,
-            "85": 2658189824.0,
-            "90": 2658189824.0,
-            "95": 2658189824.0,
-            "100": 2658189824.0
+            "1": 1591768576.0,
+            "2": 1772628480.0,
+            "3": 1772628480.0,
+            "4": 1772628480.0,
+            "5": 1772628480.0,
+            "6": 1772628480.0,
+            "7": 1772628480.0,
+            "8": 1772628480.0,
+            "9": 1772628480.0,
+            "10": 1772628480.0,
+            "11": 1772628480.0,
+            "12": 1772628480.0,
+            "13": 1772628480.0,
+            "14": 1772628480.0,
+            "15": 1772628480.0,
+            "16": 1772628480.0,
+            "17": 1772628480.0,
+            "18": 1772628480.0,
+            "19": 1772628480.0,
+            "20": 1772628480.0,
+            "21": 1772628480.0,
+            "22": 1772628480.0,
+            "23": 1772628480.0,
+            "24": 1772628480.0,
+            "25": 1772628480.0,
+            "26": 1772628480.0,
+            "27": 1772628480.0,
+            "28": 1772628480.0,
+            "29": 1772628480.0,
+            "30": 1772628480.0,
+            "31": 1772628480.0,
+            "32": 1772628480.0,
+            "33": 1772628480.0,
+            "34": 1772628480.0,
+            "35": 1772628480.0,
+            "36": 1772628480.0,
+            "37": 1772628480.0,
+            "38": 1772628480.0,
+            "39": 1772628480.0,
+            "40": 1772628480.0,
+            "41": 1772628480.0,
+            "42": 1772628480.0,
+            "43": 1772628480.0,
+            "44": 1772628480.0,
+            "45": 1772628480.0,
+            "46": 1772628480.0,
+            "47": 1772628480.0,
+            "48": 1772628480.0,
+            "49": 1772628480.0,
+            "50": 1772628480.0,
+            "51": 1772628480.0,
+            "52": 1772628480.0,
+            "53": 1772628480.0,
+            "54": 1772628480.0,
+            "55": 1772628480.0,
+            "56": 1772628480.0,
+            "57": 1772628480.0,
+            "58": 1772628480.0,
+            "59": 1772628480.0,
+            "60": 1772628480.0,
+            "61": 1772628480.0,
+            "62": 1772628480.0,
+            "63": 1772628480.0,
+            "64": 1772628480.0,
+            "65": 1772628480.0,
+            "66": 1772628480.0,
+            "67": 1772628480.0,
+            "68": 1772628480.0,
+            "69": 1772628480.0,
+            "70": 1772628480.0,
+            "71": 1772628480.0,
+            "72": 1772628480.0,
+            "73": 1772628480.0,
+            "74": 1772628480.0,
+            "75": 1772628480.0,
+            "76": 1772628480.0,
+            "77": 1772628480.0,
+            "78": 1772628480.0,
+            "79": 1772628480.0,
+            "80": 1772628480.0,
+            "81": 1772628480.0,
+            "82": 1772628480.0,
+            "83": 1772628480.0,
+            "84": 1772628480.0,
+            "85": 1772628480.0,
+            "86": 1772628480.0,
+            "87": 1772628480.0,
+            "88": 1772628480.0,
+            "89": 1772628480.0,
+            "90": 1772628480.0,
+            "91": 1772628480.0,
+            "92": 1772628480.0,
+            "93": 1772628480.0,
+            "94": 1772628480.0,
+            "95": 1772628480.0,
+            "96": 1772628480.0,
+            "97": 1772628480.0,
+            "98": 1772628480.0,
+            "99": 1772628480.0,
+            "100": 1772628480.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 17.69921,
-            "5": 0.18742,
-            "10": 0.18714,
-            "15": 0.18669,
-            "20": 0.18537,
-            "25": 0.18342,
-            "30": 0.18538,
-            "35": 0.18528,
-            "40": 0.18464,
-            "45": 0.18361,
-            "50": 0.18481,
-            "55": 0.18002,
-            "60": 0.17775,
-            "65": 0.17974,
-            "70": 0.17928,
-            "75": 0.17891,
-            "80": 0.17759,
-            "85": 0.18266,
-            "90": 0.18242,
-            "95": 0.18179,
-            "100": 0.18252
+            "1": 6.79884,
+            "2": 0.21326,
+            "3": 0.18469,
+            "4": 0.17105,
+            "5": 0.16929,
+            "6": 0.17076,
+            "7": 0.16854,
+            "8": 0.17395,
+            "9": 0.17202,
+            "10": 0.17285,
+            "11": 0.17206,
+            "12": 0.17207,
+            "13": 0.17163,
+            "14": 0.17259,
+            "15": 0.17327,
+            "16": 0.17397,
+            "17": 0.17148,
+            "18": 0.21472,
+            "19": 0.17296,
+            "20": 0.17251,
+            "21": 0.17267,
+            "22": 0.17535,
+            "23": 0.17343,
+            "24": 0.17203,
+            "25": 0.17337,
+            "26": 0.16951,
+            "27": 0.17011,
+            "28": 0.16817,
+            "29": 0.16977,
+            "30": 0.17071,
+            "31": 0.17041,
+            "32": 0.17011,
+            "33": 0.17101,
+            "34": 0.16967,
+            "35": 0.17036,
+            "36": 0.16981,
+            "37": 0.1698,
+            "38": 0.16954,
+            "39": 0.16912,
+            "40": 0.16943,
+            "41": 0.16939,
+            "42": 0.16854,
+            "43": 0.16921,
+            "44": 0.17053,
+            "45": 0.17026,
+            "46": 0.16981,
+            "47": 0.17026,
+            "48": 0.1704,
+            "49": 0.16972,
+            "50": 0.16914,
+            "51": 0.18301,
+            "52": 0.1739,
+            "53": 0.17306,
+            "54": 0.17414,
+            "55": 0.17269,
+            "56": 0.1744,
+            "57": 0.17288,
+            "58": 0.17544,
+            "59": 0.17344,
+            "60": 0.17444,
+            "61": 0.55151,
+            "62": 0.17447,
+            "63": 0.17397,
+            "64": 0.17325,
+            "65": 0.1739,
+            "66": 0.17369,
+            "67": 0.17326,
+            "68": 0.17374,
+            "69": 0.17249,
+            "70": 0.17298,
+            "71": 0.17197,
+            "72": 0.17208,
+            "73": 0.17303,
+            "74": 0.16725,
+            "75": 0.16595,
+            "76": 0.16671,
+            "77": 0.16787,
+            "78": 0.16647,
+            "79": 0.16683,
+            "80": 0.16672,
+            "81": 0.17084,
+            "82": 0.17024,
+            "83": 0.16993,
+            "84": 0.16957,
+            "85": 0.16932,
+            "86": 0.16994,
+            "87": 0.17023,
+            "88": 0.16646,
+            "89": 0.16652,
+            "90": 0.16596,
+            "91": 0.16647,
+            "92": 0.1665,
+            "93": 0.16668,
+            "94": 0.16609,
+            "95": 0.16694,
+            "96": 0.1659,
+            "97": 0.16601,
+            "98": 0.1667,
+            "99": 0.16701,
+            "100": 0.16618
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/golden_values_lts_dgx_a100_2nd.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/golden_values_lts_dgx_a100_2nd.json
new file mode 100644
index 00000000000..adaf33cdb3a
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/golden_values_lts_dgx_a100_2nd.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 9.84975,
+            "52": 9.74158,
+            "53": 10.0633,
+            "54": 9.94586,
+            "55": 9.87745,
+            "56": 9.62752,
+            "57": 9.47269,
+            "58": 9.82917,
+            "59": 9.58307,
+            "60": 9.49185,
+            "61": 9.6996,
+            "62": 9.98097,
+            "63": 9.37221,
+            "64": 9.77563,
+            "65": 8.94343,
+            "66": 9.69995,
+            "67": 9.36421,
+            "68": 9.78708,
+            "69": 9.78401,
+            "70": 9.72291,
+            "71": 9.60742,
+            "72": 9.5842,
+            "73": 9.49098,
+            "74": 8.94874,
+            "75": 9.41818,
+            "76": 9.08725,
+            "77": 10.06288,
+            "78": 9.72905,
+            "79": 9.37096,
+            "80": 9.40039,
+            "81": 9.47763,
+            "82": 9.69127,
+            "83": 9.30765,
+            "84": 9.41259,
+            "85": 9.61135,
+            "86": 9.07623,
+            "87": 9.59462,
+            "88": 9.74773,
+            "89": 9.6068,
+            "90": 9.81083,
+            "91": 9.34454,
+            "92": 9.3654,
+            "93": 9.0775,
+            "94": 8.82983,
+            "95": 9.5168,
+            "96": 9.52551,
+            "97": 9.31042,
+            "98": 9.67813,
+            "99": 8.88855,
+            "100": 9.40136
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 2671.0,
+            "52": 2566.0,
+            "53": 2911.0,
+            "54": 2791.0,
+            "55": 2483.0,
+            "56": 2736.0,
+            "57": 2395.0,
+            "58": 3067.0,
+            "59": 2911.0,
+            "60": 2426.0,
+            "61": 2925.0,
+            "62": 2654.0,
+            "63": 2346.0,
+            "64": 3123.0,
+            "65": 2768.0,
+            "66": 3220.0,
+            "67": 2841.0,
+            "68": 2870.0,
+            "69": 2949.0,
+            "70": 3222.0,
+            "71": 3138.0,
+            "72": 2479.0,
+            "73": 3021.0,
+            "74": 1933.0,
+            "75": 2682.0,
+            "76": 3015.0,
+            "77": 3415.0,
+            "78": 3237.0,
+            "79": 3269.0,
+            "80": 3527.0,
+            "81": 3623.0,
+            "82": 3347.0,
+            "83": 2804.0,
+            "84": 3348.0,
+            "85": 3335.0,
+            "86": 2823.0,
+            "87": 3721.0,
+            "88": 3081.0,
+            "89": 3553.0,
+            "90": 3044.0,
+            "91": 2775.0,
+            "92": 3246.0,
+            "93": 2705.0,
+            "94": 3450.0,
+            "95": 3420.0,
+            "96": 3599.0,
+            "97": 2959.0,
+            "98": 3792.0,
+            "99": 3166.0,
+            "100": 3330.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 437551616.0,
+            "52": 437551616.0,
+            "53": 437551616.0,
+            "54": 437551616.0,
+            "55": 437551616.0,
+            "56": 437551616.0,
+            "57": 437551616.0,
+            "58": 437551616.0,
+            "59": 437551616.0,
+            "60": 437551616.0,
+            "61": 437551616.0,
+            "62": 437551616.0,
+            "63": 437551616.0,
+            "64": 437551616.0,
+            "65": 437551616.0,
+            "66": 437551616.0,
+            "67": 437551616.0,
+            "68": 437551616.0,
+            "69": 437551616.0,
+            "70": 437551616.0,
+            "71": 437551616.0,
+            "72": 437551616.0,
+            "73": 437551616.0,
+            "74": 437551616.0,
+            "75": 437551616.0,
+            "76": 437551616.0,
+            "77": 437551616.0,
+            "78": 437551616.0,
+            "79": 437551616.0,
+            "80": 437551616.0,
+            "81": 437551616.0,
+            "82": 437551616.0,
+            "83": 437551616.0,
+            "84": 437551616.0,
+            "85": 437551616.0,
+            "86": 437551616.0,
+            "87": 437551616.0,
+            "88": 437551616.0,
+            "89": 437551616.0,
+            "90": 437551616.0,
+            "91": 437551616.0,
+            "92": 437551616.0,
+            "93": 437551616.0,
+            "94": 437551616.0,
+            "95": 437551616.0,
+            "96": 437551616.0,
+            "97": 437551616.0,
+            "98": 437551616.0,
+            "99": 437551616.0,
+            "100": 437551616.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 1774462464.0,
+            "52": 1774463488.0,
+            "53": 1774463488.0,
+            "54": 1774463488.0,
+            "55": 1774463488.0,
+            "56": 1774463488.0,
+            "57": 1774463488.0,
+            "58": 1774463488.0,
+            "59": 1774463488.0,
+            "60": 1774463488.0,
+            "61": 1774463488.0,
+            "62": 1774463488.0,
+            "63": 1774463488.0,
+            "64": 1774463488.0,
+            "65": 1774463488.0,
+            "66": 1774463488.0,
+            "67": 1774463488.0,
+            "68": 1774463488.0,
+            "69": 1774463488.0,
+            "70": 1774463488.0,
+            "71": 1774463488.0,
+            "72": 1774463488.0,
+            "73": 1774463488.0,
+            "74": 1774463488.0,
+            "75": 1774463488.0,
+            "76": 1774463488.0,
+            "77": 1774463488.0,
+            "78": 1774463488.0,
+            "79": 1774463488.0,
+            "80": 1774463488.0,
+            "81": 1774463488.0,
+            "82": 1774463488.0,
+            "83": 1774463488.0,
+            "84": 1774463488.0,
+            "85": 1774463488.0,
+            "86": 1774463488.0,
+            "87": 1774463488.0,
+            "88": 1774463488.0,
+            "89": 1774463488.0,
+            "90": 1774463488.0,
+            "91": 1774463488.0,
+            "92": 1774463488.0,
+            "93": 1774463488.0,
+            "94": 1774463488.0,
+            "95": 1774463488.0,
+            "96": 1774463488.0,
+            "97": 1774463488.0,
+            "98": 1774463488.0,
+            "99": 1774463488.0,
+            "100": 1774463488.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 4.74138,
+            "52": 0.19833,
+            "53": 0.17523,
+            "54": 0.17326,
+            "55": 0.17289,
+            "56": 0.17406,
+            "57": 0.17353,
+            "58": 0.17413,
+            "59": 0.1741,
+            "60": 0.18,
+            "61": 0.17815,
+            "62": 0.1771,
+            "63": 0.17887,
+            "64": 0.17716,
+            "65": 0.18267,
+            "66": 0.18368,
+            "67": 0.18326,
+            "68": 0.1822,
+            "69": 0.18471,
+            "70": 0.17793,
+            "71": 0.17586,
+            "72": 0.17439,
+            "73": 0.17531,
+            "74": 0.17811,
+            "75": 0.18496,
+            "76": 0.17711,
+            "77": 0.17788,
+            "78": 0.17629,
+            "79": 0.1758,
+            "80": 0.17563,
+            "81": 0.17581,
+            "82": 0.17682,
+            "83": 0.17641,
+            "84": 0.17489,
+            "85": 0.17508,
+            "86": 0.17588,
+            "87": 0.176,
+            "88": 0.17581,
+            "89": 0.17485,
+            "90": 0.17493,
+            "91": 0.17412,
+            "92": 0.17456,
+            "93": 0.17597,
+            "94": 0.17515,
+            "95": 0.17511,
+            "96": 0.17499,
+            "97": 0.17485,
+            "98": 0.1758,
+            "99": 0.17572,
+            "100": 0.17544
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_dev_dgx_gb200.json
new file mode 100644
index 00000000000..a321d71dac5
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_dev_dgx_gb200.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.86897,
+            "2": 10.88544,
+            "3": 10.86473,
+            "4": 10.86826,
+            "5": 10.87436,
+            "6": 10.89005,
+            "7": 10.87769,
+            "8": 10.86364,
+            "9": 10.88282,
+            "10": 10.84687,
+            "11": 10.87102,
+            "12": 10.87345,
+            "13": 10.8814,
+            "14": 10.8877,
+            "15": 10.83869,
+            "16": 10.8239,
+            "17": 10.80197,
+            "18": 10.81094,
+            "19": 10.82192,
+            "20": 10.71791,
+            "21": 10.68914,
+            "22": 10.57271,
+            "23": 10.7081,
+            "24": 10.59543,
+            "25": 10.55292,
+            "26": 10.61257,
+            "27": 10.60051,
+            "28": 10.56173,
+            "29": 10.58089,
+            "30": 10.35595,
+            "31": 10.1182,
+            "32": 10.44815,
+            "33": 10.4542,
+            "34": 10.21553,
+            "35": 10.26124,
+            "36": 10.20776,
+            "37": 10.33673,
+            "38": 10.17741,
+            "39": 10.39297,
+            "40": 10.06349,
+            "41": 10.13887,
+            "42": 10.2056,
+            "43": 9.82809,
+            "44": 9.94547,
+            "45": 9.82561,
+            "46": 9.80186,
+            "47": 10.14049,
+            "48": 9.84276,
+            "49": 9.52016,
+            "50": 9.88454,
+            "51": 9.84743,
+            "52": 9.74209,
+            "53": 10.05697,
+            "54": 9.9505,
+            "55": 9.88145,
+            "56": 9.61274,
+            "57": 9.4687,
+            "58": 9.82193,
+            "59": 9.57642,
+            "60": 9.49762,
+            "61": 9.69189,
+            "62": 9.9867,
+            "63": 9.37512,
+            "64": 9.76679,
+            "65": 8.94648,
+            "66": 9.7023,
+            "67": 9.36326,
+            "68": 9.7831,
+            "69": 9.7986,
+            "70": 9.7317,
+            "71": 9.62571,
+            "72": 9.58488,
+            "73": 9.48967,
+            "74": 8.9286,
+            "75": 9.40862,
+            "76": 9.07925,
+            "77": 10.0594,
+            "78": 9.72288,
+            "79": 9.37784,
+            "80": 9.40429,
+            "81": 9.48309,
+            "82": 9.7004,
+            "83": 9.31595,
+            "84": 9.41838,
+            "85": 9.61685,
+            "86": 9.07533,
+            "87": 9.59616,
+            "88": 9.75215,
+            "89": 9.60184,
+            "90": 9.82281,
+            "91": 9.34037,
+            "92": 9.35854,
+            "93": 9.08805,
+            "94": 8.83037,
+            "95": 9.5266,
+            "96": 9.53049,
+            "97": 9.30389,
+            "98": 9.67196,
+            "99": 8.89637,
+            "100": 9.40644
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1621.0,
+            "2": 1657.0,
+            "3": 1580.0,
+            "4": 1839.0,
+            "5": 1862.0,
+            "6": 1724.0,
+            "7": 1714.0,
+            "8": 1670.0,
+            "9": 1762.0,
+            "10": 1358.0,
+            "11": 1734.0,
+            "12": 1682.0,
+            "13": 1761.0,
+            "14": 1731.0,
+            "15": 1788.0,
+            "16": 1801.0,
+            "17": 1866.0,
+            "18": 1636.0,
+            "19": 1709.0,
+            "20": 1607.0,
+            "21": 1821.0,
+            "22": 1666.0,
+            "23": 1991.0,
+            "24": 1585.0,
+            "25": 1587.0,
+            "26": 1631.0,
+            "27": 1714.0,
+            "28": 1966.0,
+            "29": 1997.0,
+            "30": 1851.0,
+            "31": 1581.0,
+            "32": 1864.0,
+            "33": 2107.0,
+            "34": 1846.0,
+            "35": 1982.0,
+            "36": 1904.0,
+            "37": 2373.0,
+            "38": 2172.0,
+            "39": 2343.0,
+            "40": 2149.0,
+            "41": 2331.0,
+            "42": 2199.0,
+            "43": 1914.0,
+            "44": 2065.0,
+            "45": 2081.0,
+            "46": 2352.0,
+            "47": 2497.0,
+            "48": 2303.0,
+            "49": 2346.0,
+            "50": 2411.0,
+            "51": 2491.0,
+            "52": 2552.0,
+            "53": 2980.0,
+            "54": 2680.0,
+            "55": 2274.0,
+            "56": 2734.0,
+            "57": 2319.0,
+            "58": 2907.0,
+            "59": 2886.0,
+            "60": 2566.0,
+            "61": 2855.0,
+            "62": 2704.0,
+            "63": 2370.0,
+            "64": 2998.0,
+            "65": 2563.0,
+            "66": 2868.0,
+            "67": 2762.0,
+            "68": 2739.0,
+            "69": 2730.0,
+            "70": 3156.0,
+            "71": 2803.0,
+            "72": 2506.0,
+            "73": 2896.0,
+            "74": 1937.0,
+            "75": 2450.0,
+            "76": 2794.0,
+            "77": 3047.0,
+            "78": 3104.0,
+            "79": 3069.0,
+            "80": 3286.0,
+            "81": 3543.0,
+            "82": 3192.0,
+            "83": 2614.0,
+            "84": 3273.0,
+            "85": 3111.0,
+            "86": 2680.0,
+            "87": 3654.0,
+            "88": 3117.0,
+            "89": 3351.0,
+            "90": 3086.0,
+            "91": 2721.0,
+            "92": 3045.0,
+            "93": 2672.0,
+            "94": 3326.0,
+            "95": 3125.0,
+            "96": 3309.0,
+            "97": 3208.0,
+            "98": 3572.0,
+            "99": 2980.0,
+            "100": 3355.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 514359808.0,
+            "2": 514359808.0,
+            "3": 514359808.0,
+            "4": 514359808.0,
+            "5": 514359808.0,
+            "6": 514359808.0,
+            "7": 514359808.0,
+            "8": 514359808.0,
+            "9": 514359808.0,
+            "10": 514359808.0,
+            "11": 514359808.0,
+            "12": 514359808.0,
+            "13": 514359808.0,
+            "14": 514359808.0,
+            "15": 514359808.0,
+            "16": 514359808.0,
+            "17": 514359808.0,
+            "18": 514359808.0,
+            "19": 514359808.0,
+            "20": 514359808.0,
+            "21": 514359808.0,
+            "22": 514359808.0,
+            "23": 514359808.0,
+            "24": 514359808.0,
+            "25": 514359808.0,
+            "26": 514359808.0,
+            "27": 514359808.0,
+            "28": 514359808.0,
+            "29": 514359808.0,
+            "30": 514359808.0,
+            "31": 514359808.0,
+            "32": 514359808.0,
+            "33": 514359808.0,
+            "34": 514359808.0,
+            "35": 514359808.0,
+            "36": 514359808.0,
+            "37": 514359808.0,
+            "38": 514359808.0,
+            "39": 514359808.0,
+            "40": 514359808.0,
+            "41": 514359808.0,
+            "42": 514359808.0,
+            "43": 514359808.0,
+            "44": 514359808.0,
+            "45": 514359808.0,
+            "46": 514359808.0,
+            "47": 514359808.0,
+            "48": 514359808.0,
+            "49": 514359808.0,
+            "50": 514359808.0,
+            "51": 514359808.0,
+            "52": 514359808.0,
+            "53": 514359808.0,
+            "54": 514359808.0,
+            "55": 514359808.0,
+            "56": 514359808.0,
+            "57": 514359808.0,
+            "58": 514359808.0,
+            "59": 514359808.0,
+            "60": 514359808.0,
+            "61": 514359808.0,
+            "62": 514359808.0,
+            "63": 514359808.0,
+            "64": 514359808.0,
+            "65": 514359808.0,
+            "66": 514359808.0,
+            "67": 514359808.0,
+            "68": 514359808.0,
+            "69": 514359808.0,
+            "70": 514359808.0,
+            "71": 514359808.0,
+            "72": 514359808.0,
+            "73": 514359808.0,
+            "74": 514359808.0,
+            "75": 514359808.0,
+            "76": 514359808.0,
+            "77": 514359808.0,
+            "78": 514359808.0,
+            "79": 514359808.0,
+            "80": 514359808.0,
+            "81": 514359808.0,
+            "82": 514359808.0,
+            "83": 514359808.0,
+            "84": 514359808.0,
+            "85": 514359808.0,
+            "86": 514359808.0,
+            "87": 514359808.0,
+            "88": 514359808.0,
+            "89": 514359808.0,
+            "90": 514359808.0,
+            "91": 514359808.0,
+            "92": 514359808.0,
+            "93": 514359808.0,
+            "94": 514359808.0,
+            "95": 514359808.0,
+            "96": 514359808.0,
+            "97": 514359808.0,
+            "98": 514359808.0,
+            "99": 514359808.0,
+            "100": 514359808.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1258060288.0,
+            "2": 1437084160.0,
+            "3": 1437084160.0,
+            "4": 1437084160.0,
+            "5": 1437084160.0,
+            "6": 1437084160.0,
+            "7": 1437084160.0,
+            "8": 1437084160.0,
+            "9": 1437084160.0,
+            "10": 1437084160.0,
+            "11": 1437084160.0,
+            "12": 1437084160.0,
+            "13": 1437084160.0,
+            "14": 1437084160.0,
+            "15": 1437084160.0,
+            "16": 1437084160.0,
+            "17": 1437084160.0,
+            "18": 1437084160.0,
+            "19": 1437084160.0,
+            "20": 1437084160.0,
+            "21": 1437084160.0,
+            "22": 1437084160.0,
+            "23": 1437084160.0,
+            "24": 1437084160.0,
+            "25": 1437084160.0,
+            "26": 1437084160.0,
+            "27": 1437084160.0,
+            "28": 1437084160.0,
+            "29": 1437084160.0,
+            "30": 1437084160.0,
+            "31": 1437084160.0,
+            "32": 1437084160.0,
+            "33": 1437084160.0,
+            "34": 1437084160.0,
+            "35": 1437084160.0,
+            "36": 1437084160.0,
+            "37": 1437084160.0,
+            "38": 1437084160.0,
+            "39": 1437084160.0,
+            "40": 1437084160.0,
+            "41": 1437084160.0,
+            "42": 1437084160.0,
+            "43": 1437084160.0,
+            "44": 1437084160.0,
+            "45": 1437084160.0,
+            "46": 1437084160.0,
+            "47": 1437084160.0,
+            "48": 1437084160.0,
+            "49": 1437084160.0,
+            "50": 1437084160.0,
+            "51": 1437084160.0,
+            "52": 1437084160.0,
+            "53": 1437084160.0,
+            "54": 1437084160.0,
+            "55": 1437084160.0,
+            "56": 1437084160.0,
+            "57": 1437084160.0,
+            "58": 1437084160.0,
+            "59": 1437084160.0,
+            "60": 1437084160.0,
+            "61": 1437084160.0,
+            "62": 1437084160.0,
+            "63": 1437084160.0,
+            "64": 1437084160.0,
+            "65": 1437084160.0,
+            "66": 1437084160.0,
+            "67": 1437084160.0,
+            "68": 1437084160.0,
+            "69": 1437084160.0,
+            "70": 1437084160.0,
+            "71": 1437084160.0,
+            "72": 1437084160.0,
+            "73": 1437084160.0,
+            "74": 1437084160.0,
+            "75": 1437084160.0,
+            "76": 1437084160.0,
+            "77": 1437084160.0,
+            "78": 1437084160.0,
+            "79": 1437084160.0,
+            "80": 1437084160.0,
+            "81": 1437084160.0,
+            "82": 1437084160.0,
+            "83": 1437084160.0,
+            "84": 1437084160.0,
+            "85": 1437084160.0,
+            "86": 1437084160.0,
+            "87": 1437084160.0,
+            "88": 1437084160.0,
+            "89": 1437084160.0,
+            "90": 1437084160.0,
+            "91": 1437084160.0,
+            "92": 1437084160.0,
+            "93": 1437084160.0,
+            "94": 1437084160.0,
+            "95": 1437084160.0,
+            "96": 1437084160.0,
+            "97": 1437084160.0,
+            "98": 1437084160.0,
+            "99": 1437084160.0,
+            "100": 1437084160.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 7.46014,
+            "2": 0.22036,
+            "3": 0.24678,
+            "4": 0.1906,
+            "5": 0.23432,
+            "6": 0.19337,
+            "7": 0.20952,
+            "8": 0.20857,
+            "9": 0.20711,
+            "10": 0.21582,
+            "11": 0.20302,
+            "12": 0.23361,
+            "13": 0.21601,
+            "14": 0.19637,
+            "15": 0.19187,
+            "16": 0.19595,
+            "17": 0.19262,
+            "18": 0.25658,
+            "19": 0.19382,
+            "20": 0.23562,
+            "21": 0.19141,
+            "22": 0.19045,
+            "23": 0.25041,
+            "24": 0.19507,
+            "25": 0.19119,
+            "26": 0.25125,
+            "27": 0.24158,
+            "28": 0.19174,
+            "29": 0.19271,
+            "30": 0.19107,
+            "31": 0.20992,
+            "32": 0.19656,
+            "33": 0.22065,
+            "34": 0.24506,
+            "35": 0.26305,
+            "36": 0.19488,
+            "37": 0.21539,
+            "38": 0.19008,
+            "39": 0.45338,
+            "40": 0.19345,
+            "41": 0.19327,
+            "42": 0.19025,
+            "43": 0.2339,
+            "44": 0.19531,
+            "45": 0.19303,
+            "46": 0.22612,
+            "47": 0.19173,
+            "48": 0.22577,
+            "49": 0.19067,
+            "50": 0.23575,
+            "51": 0.24917,
+            "52": 0.22723,
+            "53": 0.22561,
+            "54": 0.22604,
+            "55": 0.22405,
+            "56": 0.22789,
+            "57": 0.22456,
+            "58": 0.23947,
+            "59": 0.24294,
+            "60": 0.22777,
+            "61": 0.22508,
+            "62": 0.2306,
+            "63": 0.23205,
+            "64": 0.23143,
+            "65": 0.23321,
+            "66": 0.23216,
+            "67": 0.23316,
+            "68": 0.23149,
+            "69": 0.23283,
+            "70": 0.22854,
+            "71": 0.24333,
+            "72": 0.23197,
+            "73": 0.22937,
+            "74": 0.23068,
+            "75": 0.2279,
+            "76": 0.22968,
+            "77": 0.25609,
+            "78": 0.25409,
+            "79": 0.25184,
+            "80": 0.22949,
+            "81": 0.22763,
+            "82": 0.22592,
+            "83": 0.22813,
+            "84": 0.22963,
+            "85": 0.23411,
+            "86": 0.22821,
+            "87": 0.23117,
+            "88": 0.23326,
+            "89": 0.22984,
+            "90": 0.22828,
+            "91": 0.23148,
+            "92": 0.23378,
+            "93": 0.23729,
+            "94": 0.23173,
+            "95": 0.23146,
+            "96": 0.23193,
+            "97": 0.23076,
+            "98": 0.33615,
+            "99": 0.23042,
+            "100": 0.25353
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_dev_dgx_h100.json
index e895f06a28a..6e4aa9e48e0 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_dev_dgx_h100.json
@@ -218,106 +218,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 517505536.0,
-            "2": 517505536.0,
-            "3": 517505536.0,
-            "4": 517505536.0,
-            "5": 517505536.0,
-            "6": 517505536.0,
-            "7": 517505536.0,
-            "8": 517505536.0,
-            "9": 517505536.0,
-            "10": 517505536.0,
-            "11": 517505536.0,
-            "12": 517505536.0,
-            "13": 517505536.0,
-            "14": 517505536.0,
-            "15": 517505536.0,
-            "16": 517505536.0,
-            "17": 517505536.0,
-            "18": 517505536.0,
-            "19": 517505536.0,
-            "20": 517505536.0,
-            "21": 517505536.0,
-            "22": 517505536.0,
-            "23": 517505536.0,
-            "24": 517505536.0,
-            "25": 517505536.0,
-            "26": 517505536.0,
-            "27": 517505536.0,
-            "28": 517505536.0,
-            "29": 517505536.0,
-            "30": 517505536.0,
-            "31": 517505536.0,
-            "32": 517505536.0,
-            "33": 517505536.0,
-            "34": 517505536.0,
-            "35": 517505536.0,
-            "36": 517505536.0,
-            "37": 517505536.0,
-            "38": 517505536.0,
-            "39": 517505536.0,
-            "40": 517505536.0,
-            "41": 517505536.0,
-            "42": 517505536.0,
-            "43": 517505536.0,
-            "44": 517505536.0,
-            "45": 517505536.0,
-            "46": 517505536.0,
-            "47": 517505536.0,
-            "48": 517505536.0,
-            "49": 517505536.0,
-            "50": 517505536.0,
-            "51": 517505536.0,
-            "52": 517505536.0,
-            "53": 517505536.0,
-            "54": 517505536.0,
-            "55": 517505536.0,
-            "56": 517505536.0,
-            "57": 517505536.0,
-            "58": 517505536.0,
-            "59": 517505536.0,
-            "60": 517505536.0,
-            "61": 517505536.0,
-            "62": 517505536.0,
-            "63": 517505536.0,
-            "64": 517505536.0,
-            "65": 517505536.0,
-            "66": 517505536.0,
-            "67": 517505536.0,
-            "68": 517505536.0,
-            "69": 517505536.0,
-            "70": 517505536.0,
-            "71": 517505536.0,
-            "72": 517505536.0,
-            "73": 517505536.0,
-            "74": 517505536.0,
-            "75": 517505536.0,
-            "76": 517505536.0,
-            "77": 517505536.0,
-            "78": 517505536.0,
-            "79": 517505536.0,
-            "80": 517505536.0,
-            "81": 517505536.0,
-            "82": 517505536.0,
-            "83": 517505536.0,
-            "84": 517505536.0,
-            "85": 517505536.0,
-            "86": 517505536.0,
-            "87": 517505536.0,
-            "88": 517505536.0,
-            "89": 517505536.0,
-            "90": 517505536.0,
-            "91": 517505536.0,
-            "92": 517505536.0,
-            "93": 517505536.0,
-            "94": 517505536.0,
-            "95": 517505536.0,
-            "96": 517505536.0,
-            "97": 517505536.0,
-            "98": 517505536.0,
-            "99": 517505536.0,
-            "100": 517505536.0
+            "1": 516456960.0,
+            "2": 516456960.0,
+            "3": 516456960.0,
+            "4": 516456960.0,
+            "5": 516456960.0,
+            "6": 516456960.0,
+            "7": 516456960.0,
+            "8": 516456960.0,
+            "9": 516456960.0,
+            "10": 516456960.0,
+            "11": 516456960.0,
+            "12": 516456960.0,
+            "13": 516456960.0,
+            "14": 516456960.0,
+            "15": 516456960.0,
+            "16": 516456960.0,
+            "17": 516456960.0,
+            "18": 516456960.0,
+            "19": 516456960.0,
+            "20": 516456960.0,
+            "21": 516456960.0,
+            "22": 516456960.0,
+            "23": 516456960.0,
+            "24": 516456960.0,
+            "25": 516456960.0,
+            "26": 516456960.0,
+            "27": 516456960.0,
+            "28": 516456960.0,
+            "29": 516456960.0,
+            "30": 516456960.0,
+            "31": 516456960.0,
+            "32": 516456960.0,
+            "33": 516456960.0,
+            "34": 516456960.0,
+            "35": 516456960.0,
+            "36": 516456960.0,
+            "37": 516456960.0,
+            "38": 516456960.0,
+            "39": 516456960.0,
+            "40": 516456960.0,
+            "41": 516456960.0,
+            "42": 516456960.0,
+            "43": 516456960.0,
+            "44": 516456960.0,
+            "45": 516456960.0,
+            "46": 516456960.0,
+            "47": 516456960.0,
+            "48": 516456960.0,
+            "49": 516456960.0,
+            "50": 516456960.0,
+            "51": 516456960.0,
+            "52": 516456960.0,
+            "53": 516456960.0,
+            "54": 516456960.0,
+            "55": 516456960.0,
+            "56": 516456960.0,
+            "57": 516456960.0,
+            "58": 516456960.0,
+            "59": 516456960.0,
+            "60": 516456960.0,
+            "61": 516456960.0,
+            "62": 516456960.0,
+            "63": 516456960.0,
+            "64": 516456960.0,
+            "65": 516456960.0,
+            "66": 516456960.0,
+            "67": 516456960.0,
+            "68": 516456960.0,
+            "69": 516456960.0,
+            "70": 516456960.0,
+            "71": 516456960.0,
+            "72": 516456960.0,
+            "73": 516456960.0,
+            "74": 516456960.0,
+            "75": 516456960.0,
+            "76": 516456960.0,
+            "77": 516456960.0,
+            "78": 516456960.0,
+            "79": 516456960.0,
+            "80": 516456960.0,
+            "81": 516456960.0,
+            "82": 516456960.0,
+            "83": 516456960.0,
+            "84": 516456960.0,
+            "85": 516456960.0,
+            "86": 516456960.0,
+            "87": 516456960.0,
+            "88": 516456960.0,
+            "89": 516456960.0,
+            "90": 516456960.0,
+            "91": 516456960.0,
+            "92": 516456960.0,
+            "93": 516456960.0,
+            "94": 516456960.0,
+            "95": 516456960.0,
+            "96": 516456960.0,
+            "97": 516456960.0,
+            "98": 516456960.0,
+            "99": 516456960.0,
+            "100": 516456960.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -325,106 +325,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 1246524928.0,
-            "2": 1428695552.0,
-            "3": 1428695552.0,
-            "4": 1428695552.0,
-            "5": 1428695552.0,
-            "6": 1428695552.0,
-            "7": 1428695552.0,
-            "8": 1428695552.0,
-            "9": 1428695552.0,
-            "10": 1428695552.0,
-            "11": 1428695552.0,
-            "12": 1428695552.0,
-            "13": 1428695552.0,
-            "14": 1428695552.0,
-            "15": 1428695552.0,
-            "16": 1428695552.0,
-            "17": 1428695552.0,
-            "18": 1428695552.0,
-            "19": 1428695552.0,
-            "20": 1428695552.0,
-            "21": 1428695552.0,
-            "22": 1428695552.0,
-            "23": 1428695552.0,
-            "24": 1428695552.0,
-            "25": 1428695552.0,
-            "26": 1428695552.0,
-            "27": 1428695552.0,
-            "28": 1428695552.0,
-            "29": 1428695552.0,
-            "30": 1428695552.0,
-            "31": 1428695552.0,
-            "32": 1428695552.0,
-            "33": 1428695552.0,
-            "34": 1428695552.0,
-            "35": 1428695552.0,
-            "36": 1428695552.0,
-            "37": 1428695552.0,
-            "38": 1428695552.0,
-            "39": 1428695552.0,
-            "40": 1428695552.0,
-            "41": 1428695552.0,
-            "42": 1428695552.0,
-            "43": 1428695552.0,
-            "44": 1428695552.0,
-            "45": 1428695552.0,
-            "46": 1428695552.0,
-            "47": 1428695552.0,
-            "48": 1428695552.0,
-            "49": 1428695552.0,
-            "50": 1428695552.0,
-            "51": 1428695552.0,
-            "52": 1428695552.0,
-            "53": 1428695552.0,
-            "54": 1428695552.0,
-            "55": 1428695552.0,
-            "56": 1428695552.0,
-            "57": 1428695552.0,
-            "58": 1428695552.0,
-            "59": 1428695552.0,
-            "60": 1428695552.0,
-            "61": 1428695552.0,
-            "62": 1428695552.0,
-            "63": 1428695552.0,
-            "64": 1428695552.0,
-            "65": 1428695552.0,
-            "66": 1428695552.0,
-            "67": 1428695552.0,
-            "68": 1428695552.0,
-            "69": 1428695552.0,
-            "70": 1428695552.0,
-            "71": 1428695552.0,
-            "72": 1428695552.0,
-            "73": 1428695552.0,
-            "74": 1428695552.0,
-            "75": 1428695552.0,
-            "76": 1428695552.0,
-            "77": 1428695552.0,
-            "78": 1428695552.0,
-            "79": 1428695552.0,
-            "80": 1428695552.0,
-            "81": 1428695552.0,
-            "82": 1428695552.0,
-            "83": 1428695552.0,
-            "84": 1428695552.0,
-            "85": 1428695552.0,
-            "86": 1428695552.0,
-            "87": 1428695552.0,
-            "88": 1428695552.0,
-            "89": 1428695552.0,
-            "90": 1428695552.0,
-            "91": 1428695552.0,
-            "92": 1428695552.0,
-            "93": 1428695552.0,
-            "94": 1428695552.0,
-            "95": 1428695552.0,
-            "96": 1428695552.0,
-            "97": 1428695552.0,
-            "98": 1428695552.0,
-            "99": 1428695552.0,
-            "100": 1428695552.0
+            "1": 1246525952.0,
+            "2": 1426598400.0,
+            "3": 1426598400.0,
+            "4": 1426598400.0,
+            "5": 1426598400.0,
+            "6": 1426598400.0,
+            "7": 1426598400.0,
+            "8": 1426598400.0,
+            "9": 1426598400.0,
+            "10": 1426598400.0,
+            "11": 1426598400.0,
+            "12": 1426598400.0,
+            "13": 1426598400.0,
+            "14": 1426598400.0,
+            "15": 1426598400.0,
+            "16": 1426598400.0,
+            "17": 1426598400.0,
+            "18": 1426598400.0,
+            "19": 1426598400.0,
+            "20": 1426598400.0,
+            "21": 1426598400.0,
+            "22": 1426598400.0,
+            "23": 1426598400.0,
+            "24": 1426598400.0,
+            "25": 1426598400.0,
+            "26": 1426598400.0,
+            "27": 1426598400.0,
+            "28": 1426598400.0,
+            "29": 1426598400.0,
+            "30": 1426598400.0,
+            "31": 1426598400.0,
+            "32": 1426598400.0,
+            "33": 1426598400.0,
+            "34": 1426598400.0,
+            "35": 1426598400.0,
+            "36": 1426598400.0,
+            "37": 1426598400.0,
+            "38": 1426598400.0,
+            "39": 1426598400.0,
+            "40": 1426598400.0,
+            "41": 1426598400.0,
+            "42": 1426598400.0,
+            "43": 1426598400.0,
+            "44": 1426598400.0,
+            "45": 1426598400.0,
+            "46": 1426598400.0,
+            "47": 1426598400.0,
+            "48": 1426598400.0,
+            "49": 1426598400.0,
+            "50": 1426598400.0,
+            "51": 1426598400.0,
+            "52": 1426598400.0,
+            "53": 1426598400.0,
+            "54": 1426598400.0,
+            "55": 1426598400.0,
+            "56": 1426598400.0,
+            "57": 1426598400.0,
+            "58": 1426598400.0,
+            "59": 1426598400.0,
+            "60": 1426598400.0,
+            "61": 1426598400.0,
+            "62": 1426598400.0,
+            "63": 1426598400.0,
+            "64": 1426598400.0,
+            "65": 1426598400.0,
+            "66": 1426598400.0,
+            "67": 1426598400.0,
+            "68": 1426598400.0,
+            "69": 1426598400.0,
+            "70": 1426598400.0,
+            "71": 1426598400.0,
+            "72": 1426598400.0,
+            "73": 1426598400.0,
+            "74": 1426598400.0,
+            "75": 1426598400.0,
+            "76": 1426598400.0,
+            "77": 1426598400.0,
+            "78": 1426598400.0,
+            "79": 1426598400.0,
+            "80": 1426598400.0,
+            "81": 1426598400.0,
+            "82": 1426598400.0,
+            "83": 1426598400.0,
+            "84": 1426598400.0,
+            "85": 1426598400.0,
+            "86": 1426598400.0,
+            "87": 1426598400.0,
+            "88": 1426598400.0,
+            "89": 1426598400.0,
+            "90": 1426598400.0,
+            "91": 1426598400.0,
+            "92": 1426598400.0,
+            "93": 1426598400.0,
+            "94": 1426598400.0,
+            "95": 1426598400.0,
+            "96": 1426598400.0,
+            "97": 1426598400.0,
+            "98": 1426598400.0,
+            "99": 1426598400.0,
+            "100": 1426598400.0
         }
     },
     "iteration-time": {
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 11.77129,
-            "2": 0.18805,
-            "3": 0.15486,
-            "4": 0.15531,
-            "5": 0.15342,
-            "6": 0.15402,
-            "7": 0.15787,
-            "8": 0.15837,
-            "9": 0.15422,
-            "10": 0.1531,
-            "11": 0.1531,
-            "12": 0.1521,
-            "13": 0.15206,
-            "14": 0.15281,
-            "15": 0.15025,
-            "16": 0.15321,
-            "17": 0.15383,
-            "18": 0.15265,
-            "19": 0.15535,
-            "20": 0.15414,
-            "21": 0.15275,
-            "22": 0.152,
-            "23": 0.15456,
-            "24": 0.15209,
-            "25": 0.15358,
-            "26": 0.15228,
-            "27": 0.15217,
-            "28": 0.15204,
-            "29": 0.1526,
-            "30": 0.15259,
-            "31": 0.15237,
-            "32": 0.15885,
-            "33": 0.1577,
-            "34": 0.16029,
-            "35": 0.15618,
-            "36": 0.16006,
-            "37": 0.15686,
-            "38": 0.15897,
-            "39": 0.15985,
-            "40": 0.15818,
-            "41": 0.15734,
-            "42": 0.15623,
-            "43": 0.15982,
-            "44": 0.15844,
-            "45": 0.15965,
-            "46": 0.15995,
-            "47": 0.1576,
-            "48": 0.15787,
-            "49": 0.15857,
-            "50": 0.16598,
-            "51": 0.15831,
-            "52": 0.15281,
-            "53": 0.15278,
-            "54": 0.15155,
-            "55": 0.1544,
-            "56": 0.15102,
-            "57": 0.1505,
-            "58": 0.15177,
-            "59": 0.15275,
-            "60": 0.15179,
-            "61": 0.15138,
-            "62": 0.153,
-            "63": 0.14962,
-            "64": 0.15104,
-            "65": 0.15104,
-            "66": 0.1541,
-            "67": 0.15089,
-            "68": 0.15178,
-            "69": 0.15241,
-            "70": 0.1524,
-            "71": 0.14991,
-            "72": 0.15107,
-            "73": 0.15205,
-            "74": 0.15105,
-            "75": 0.14944,
-            "76": 0.15086,
-            "77": 0.15066,
-            "78": 0.15037,
-            "79": 0.1517,
-            "80": 0.1535,
-            "81": 0.15067,
-            "82": 0.15202,
-            "83": 0.1513,
-            "84": 0.15157,
-            "85": 0.15077,
-            "86": 0.15249,
-            "87": 0.15259,
-            "88": 0.15065,
-            "89": 0.15236,
-            "90": 0.15088,
-            "91": 0.15271,
-            "92": 0.15124,
-            "93": 0.15371,
-            "94": 0.14949,
-            "95": 0.15169,
-            "96": 0.15061,
-            "97": 0.15123,
-            "98": 0.15143,
-            "99": 0.15292,
-            "100": 0.15348
+            "1": 8.71736,
+            "2": 0.17115,
+            "3": 0.15694,
+            "4": 0.13982,
+            "5": 0.13869,
+            "6": 0.1336,
+            "7": 0.13504,
+            "8": 0.13243,
+            "9": 0.13367,
+            "10": 0.13419,
+            "11": 0.13733,
+            "12": 0.13769,
+            "13": 0.13945,
+            "14": 0.13947,
+            "15": 0.1359,
+            "16": 0.13522,
+            "17": 0.13429,
+            "18": 0.13312,
+            "19": 0.13374,
+            "20": 0.13297,
+            "21": 0.13311,
+            "22": 0.13277,
+            "23": 0.13534,
+            "24": 0.13287,
+            "25": 0.12793,
+            "26": 0.12692,
+            "27": 0.1283,
+            "28": 0.13508,
+            "29": 0.13475,
+            "30": 0.1318,
+            "31": 0.13396,
+            "32": 0.13344,
+            "33": 0.13398,
+            "34": 0.13071,
+            "35": 0.1284,
+            "36": 0.12752,
+            "37": 0.12689,
+            "38": 0.12666,
+            "39": 0.12799,
+            "40": 0.12834,
+            "41": 0.12686,
+            "42": 0.12597,
+            "43": 0.1242,
+            "44": 0.12724,
+            "45": 0.12459,
+            "46": 0.12693,
+            "47": 0.12473,
+            "48": 0.12666,
+            "49": 0.12677,
+            "50": 0.12611,
+            "51": 0.14947,
+            "52": 0.12685,
+            "53": 0.12533,
+            "54": 0.12565,
+            "55": 0.12664,
+            "56": 0.12771,
+            "57": 0.12644,
+            "58": 0.12656,
+            "59": 0.12707,
+            "60": 0.12763,
+            "61": 0.12599,
+            "62": 0.12667,
+            "63": 0.12558,
+            "64": 0.12865,
+            "65": 0.12684,
+            "66": 0.12749,
+            "67": 0.12671,
+            "68": 0.12725,
+            "69": 0.1267,
+            "70": 0.1263,
+            "71": 0.12741,
+            "72": 0.12748,
+            "73": 0.1278,
+            "74": 0.12653,
+            "75": 0.12606,
+            "76": 0.12649,
+            "77": 0.12666,
+            "78": 0.12626,
+            "79": 0.12702,
+            "80": 0.12831,
+            "81": 0.12686,
+            "82": 0.12628,
+            "83": 0.12693,
+            "84": 0.12714,
+            "85": 0.12632,
+            "86": 0.12756,
+            "87": 0.12631,
+            "88": 0.12895,
+            "89": 0.1284,
+            "90": 0.12636,
+            "91": 0.12805,
+            "92": 0.12691,
+            "93": 0.12665,
+            "94": 0.12749,
+            "95": 0.12697,
+            "96": 0.12622,
+            "97": 0.12701,
+            "98": 0.12878,
+            "99": 0.12567,
+            "100": 0.12677
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_dev_dgx_h100_2nd.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_dev_dgx_h100_2nd.json
new file mode 100644
index 00000000000..6ec68f2ce41
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_dev_dgx_h100_2nd.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 9.8558,
+            "52": 9.75237,
+            "53": 10.07589,
+            "54": 9.95688,
+            "55": 9.88203,
+            "56": 9.6313,
+            "57": 9.48649,
+            "58": 9.83109,
+            "59": 9.58897,
+            "60": 9.50643,
+            "61": 9.70363,
+            "62": 9.98286,
+            "63": 9.38302,
+            "64": 9.77901,
+            "65": 8.95166,
+            "66": 9.70158,
+            "67": 9.37203,
+            "68": 9.78849,
+            "69": 9.79851,
+            "70": 9.74737,
+            "71": 9.61908,
+            "72": 9.58502,
+            "73": 9.49721,
+            "74": 8.93927,
+            "75": 9.42703,
+            "76": 9.0802,
+            "77": 10.06567,
+            "78": 9.72893,
+            "79": 9.3776,
+            "80": 9.40982,
+            "81": 9.47976,
+            "82": 9.7018,
+            "83": 9.30612,
+            "84": 9.4209,
+            "85": 9.61371,
+            "86": 9.07649,
+            "87": 9.5945,
+            "88": 9.75068,
+            "89": 9.60238,
+            "90": 9.81898,
+            "91": 9.33894,
+            "92": 9.35716,
+            "93": 9.07879,
+            "94": 8.83503,
+            "95": 9.52172,
+            "96": 9.53003,
+            "97": 9.31306,
+            "98": 9.67783,
+            "99": 8.89058,
+            "100": 9.39725
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 2613.0,
+            "52": 2538.0,
+            "53": 2792.0,
+            "54": 2801.0,
+            "55": 2216.0,
+            "56": 2858.0,
+            "57": 2381.0,
+            "58": 2854.0,
+            "59": 2787.0,
+            "60": 2457.0,
+            "61": 2941.0,
+            "62": 2543.0,
+            "63": 2408.0,
+            "64": 2968.0,
+            "65": 2472.0,
+            "66": 2977.0,
+            "67": 2839.0,
+            "68": 2775.0,
+            "69": 2832.0,
+            "70": 3057.0,
+            "71": 2909.0,
+            "72": 2421.0,
+            "73": 2982.0,
+            "74": 1922.0,
+            "75": 2474.0,
+            "76": 3059.0,
+            "77": 3177.0,
+            "78": 3067.0,
+            "79": 3052.0,
+            "80": 3338.0,
+            "81": 3644.0,
+            "82": 3234.0,
+            "83": 2798.0,
+            "84": 3196.0,
+            "85": 3324.0,
+            "86": 2855.0,
+            "87": 3820.0,
+            "88": 2962.0,
+            "89": 3379.0,
+            "90": 3096.0,
+            "91": 2857.0,
+            "92": 3077.0,
+            "93": 2693.0,
+            "94": 3312.0,
+            "95": 3399.0,
+            "96": 3378.0,
+            "97": 3030.0,
+            "98": 3619.0,
+            "99": 3160.0,
+            "100": 3128.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 696530432.0,
+            "52": 696530432.0,
+            "53": 696530432.0,
+            "54": 696530432.0,
+            "55": 696530432.0,
+            "56": 696530432.0,
+            "57": 696530432.0,
+            "58": 696530432.0,
+            "59": 696530432.0,
+            "60": 696530432.0,
+            "61": 696530432.0,
+            "62": 696530432.0,
+            "63": 696530432.0,
+            "64": 696530432.0,
+            "65": 696530432.0,
+            "66": 696530432.0,
+            "67": 696530432.0,
+            "68": 696530432.0,
+            "69": 696530432.0,
+            "70": 696530432.0,
+            "71": 696530432.0,
+            "72": 696530432.0,
+            "73": 696530432.0,
+            "74": 696530432.0,
+            "75": 696530432.0,
+            "76": 696530432.0,
+            "77": 696530432.0,
+            "78": 696530432.0,
+            "79": 696530432.0,
+            "80": 696530432.0,
+            "81": 696530432.0,
+            "82": 696530432.0,
+            "83": 696530432.0,
+            "84": 696530432.0,
+            "85": 696530432.0,
+            "86": 696530432.0,
+            "87": 696530432.0,
+            "88": 696530432.0,
+            "89": 696530432.0,
+            "90": 696530432.0,
+            "91": 696530432.0,
+            "92": 696530432.0,
+            "93": 696530432.0,
+            "94": 696530432.0,
+            "95": 696530432.0,
+            "96": 696530432.0,
+            "97": 696530432.0,
+            "98": 696530432.0,
+            "99": 696530432.0,
+            "100": 696530432.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 1606670848.0,
+            "52": 1606671872.0,
+            "53": 1606671872.0,
+            "54": 1606671872.0,
+            "55": 1606671872.0,
+            "56": 1606671872.0,
+            "57": 1606671872.0,
+            "58": 1606671872.0,
+            "59": 1606671872.0,
+            "60": 1606671872.0,
+            "61": 1606671872.0,
+            "62": 1606671872.0,
+            "63": 1606671872.0,
+            "64": 1606671872.0,
+            "65": 1606671872.0,
+            "66": 1606671872.0,
+            "67": 1606671872.0,
+            "68": 1606671872.0,
+            "69": 1606671872.0,
+            "70": 1606671872.0,
+            "71": 1606671872.0,
+            "72": 1606671872.0,
+            "73": 1606671872.0,
+            "74": 1606671872.0,
+            "75": 1606671872.0,
+            "76": 1606671872.0,
+            "77": 1606671872.0,
+            "78": 1606671872.0,
+            "79": 1606671872.0,
+            "80": 1606671872.0,
+            "81": 1606671872.0,
+            "82": 1606671872.0,
+            "83": 1606671872.0,
+            "84": 1606671872.0,
+            "85": 1606671872.0,
+            "86": 1606671872.0,
+            "87": 1606671872.0,
+            "88": 1606671872.0,
+            "89": 1606671872.0,
+            "90": 1606671872.0,
+            "91": 1606671872.0,
+            "92": 1606671872.0,
+            "93": 1606671872.0,
+            "94": 1606671872.0,
+            "95": 1606671872.0,
+            "96": 1606671872.0,
+            "97": 1606671872.0,
+            "98": 1606671872.0,
+            "99": 1606671872.0,
+            "100": 1606671872.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 8.58328,
+            "52": 0.16493,
+            "53": 0.12792,
+            "54": 0.12753,
+            "55": 0.1267,
+            "56": 0.12717,
+            "57": 0.12953,
+            "58": 0.12905,
+            "59": 0.12926,
+            "60": 0.12957,
+            "61": 0.1301,
+            "62": 0.13084,
+            "63": 0.1303,
+            "64": 0.12945,
+            "65": 0.12867,
+            "66": 0.12977,
+            "67": 0.12566,
+            "68": 0.12615,
+            "69": 0.12561,
+            "70": 0.12549,
+            "71": 0.12626,
+            "72": 0.12735,
+            "73": 0.12717,
+            "74": 0.12589,
+            "75": 0.12587,
+            "76": 0.12712,
+            "77": 0.12613,
+            "78": 0.12598,
+            "79": 0.12558,
+            "80": 0.1269,
+            "81": 0.1257,
+            "82": 0.12655,
+            "83": 0.12569,
+            "84": 0.12762,
+            "85": 0.12805,
+            "86": 0.12546,
+            "87": 0.12592,
+            "88": 0.12681,
+            "89": 0.12765,
+            "90": 0.12626,
+            "91": 0.12713,
+            "92": 0.12614,
+            "93": 0.12723,
+            "94": 0.1263,
+            "95": 0.12688,
+            "96": 0.1288,
+            "97": 0.12614,
+            "98": 0.12731,
+            "99": 0.12875,
+            "100": 0.1257
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_lts_dgx_a100.json
index 97ea213f560..297f18f6544 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_lts_dgx_a100.json
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 6.25138,
-            "2": 0.23075,
-            "3": 0.20054,
-            "4": 0.20395,
-            "5": 0.20085,
-            "6": 0.19693,
-            "7": 0.1984,
-            "8": 0.19691,
-            "9": 0.19734,
-            "10": 0.19831,
-            "11": 0.19755,
-            "12": 0.20036,
-            "13": 0.19718,
-            "14": 0.20205,
-            "15": 0.19931,
-            "16": 0.1974,
-            "17": 0.19891,
-            "18": 0.19725,
-            "19": 0.19744,
-            "20": 0.19621,
-            "21": 0.19556,
-            "22": 0.1957,
-            "23": 0.19653,
-            "24": 0.19561,
-            "25": 0.19465,
-            "26": 0.19483,
-            "27": 0.19566,
-            "28": 0.19514,
-            "29": 0.19571,
-            "30": 0.19512,
-            "31": 0.19603,
-            "32": 0.19794,
-            "33": 0.19597,
-            "34": 0.20052,
-            "35": 0.19938,
-            "36": 0.19968,
-            "37": 0.19971,
-            "38": 0.19989,
-            "39": 0.20233,
-            "40": 0.20594,
-            "41": 0.20596,
-            "42": 0.20875,
-            "43": 0.20692,
-            "44": 0.20224,
-            "45": 0.20492,
-            "46": 0.20483,
-            "47": 0.20404,
-            "48": 0.20062,
-            "49": 0.1998,
-            "50": 0.19944,
-            "51": 0.21056,
-            "52": 0.20322,
-            "53": 0.20394,
-            "54": 0.20267,
-            "55": 0.20305,
-            "56": 0.20261,
-            "57": 0.20266,
-            "58": 0.2023,
-            "59": 0.20259,
-            "60": 0.20297,
-            "61": 0.20333,
-            "62": 0.20344,
-            "63": 0.20255,
-            "64": 0.20203,
-            "65": 0.20288,
-            "66": 0.20295,
-            "67": 0.20276,
-            "68": 0.20255,
-            "69": 0.20306,
-            "70": 0.20225,
-            "71": 0.20236,
-            "72": 0.20262,
-            "73": 0.2033,
-            "74": 0.20279,
-            "75": 0.20276,
-            "76": 0.20185,
-            "77": 0.20283,
-            "78": 0.20284,
-            "79": 0.2021,
-            "80": 0.20273,
-            "81": 0.20261,
-            "82": 0.20101,
-            "83": 0.20222,
-            "84": 0.20269,
-            "85": 0.20272,
-            "86": 0.20286,
-            "87": 0.20079,
-            "88": 0.20309,
-            "89": 0.2026,
-            "90": 0.20209,
-            "91": 0.20371,
-            "92": 0.20302,
-            "93": 0.20226,
-            "94": 0.20222,
-            "95": 0.20289,
-            "96": 0.20273,
-            "97": 0.20346,
-            "98": 0.20283,
-            "99": 0.20241,
-            "100": 0.20343
+            "1": 3.6904,
+            "2": 0.22693,
+            "3": 0.20753,
+            "4": 0.19573,
+            "5": 0.19555,
+            "6": 0.19486,
+            "7": 0.19003,
+            "8": 0.19034,
+            "9": 0.19191,
+            "10": 0.19136,
+            "11": 0.19037,
+            "12": 0.19056,
+            "13": 0.19097,
+            "14": 0.19327,
+            "15": 0.19082,
+            "16": 0.19093,
+            "17": 0.19066,
+            "18": 0.1904,
+            "19": 0.19061,
+            "20": 0.1898,
+            "21": 0.19121,
+            "22": 0.18935,
+            "23": 0.18948,
+            "24": 0.18927,
+            "25": 0.19032,
+            "26": 0.18931,
+            "27": 0.18951,
+            "28": 0.18931,
+            "29": 0.18948,
+            "30": 0.18971,
+            "31": 0.18911,
+            "32": 0.18996,
+            "33": 0.18993,
+            "34": 0.18929,
+            "35": 0.19088,
+            "36": 0.18935,
+            "37": 0.18973,
+            "38": 0.18947,
+            "39": 0.1909,
+            "40": 0.18932,
+            "41": 0.1896,
+            "42": 0.18785,
+            "43": 0.18782,
+            "44": 0.18772,
+            "45": 0.18893,
+            "46": 0.18908,
+            "47": 0.18889,
+            "48": 0.18856,
+            "49": 0.18904,
+            "50": 0.18893,
+            "51": 0.20447,
+            "52": 0.19453,
+            "53": 0.19364,
+            "54": 0.19383,
+            "55": 0.19491,
+            "56": 0.19307,
+            "57": 0.19375,
+            "58": 0.19268,
+            "59": 0.19288,
+            "60": 0.19183,
+            "61": 0.19216,
+            "62": 0.19218,
+            "63": 0.19491,
+            "64": 0.193,
+            "65": 0.19286,
+            "66": 0.19394,
+            "67": 0.19246,
+            "68": 0.19136,
+            "69": 0.19255,
+            "70": 0.19206,
+            "71": 0.19299,
+            "72": 0.19313,
+            "73": 0.19366,
+            "74": 0.19232,
+            "75": 0.1936,
+            "76": 0.19319,
+            "77": 0.19301,
+            "78": 0.19344,
+            "79": 0.19291,
+            "80": 0.1933,
+            "81": 0.19357,
+            "82": 0.19253,
+            "83": 0.19257,
+            "84": 0.19311,
+            "85": 0.19403,
+            "86": 0.1921,
+            "87": 0.19221,
+            "88": 0.19252,
+            "89": 0.19392,
+            "90": 0.1925,
+            "91": 0.19468,
+            "92": 0.19302,
+            "93": 0.19255,
+            "94": 0.19249,
+            "95": 0.19418,
+            "96": 0.19216,
+            "97": 0.19224,
+            "98": 0.19469,
+            "99": 0.19297,
+            "100": 0.19245
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_lts_dgx_a100_2nd.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_lts_dgx_a100_2nd.json
new file mode 100644
index 00000000000..e9d40c1a306
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_lts_dgx_a100_2nd.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 9.84971,
+            "52": 9.74156,
+            "53": 10.06322,
+            "54": 9.94581,
+            "55": 9.87731,
+            "56": 9.62746,
+            "57": 9.47259,
+            "58": 9.82912,
+            "59": 9.583,
+            "60": 9.49181,
+            "61": 9.69961,
+            "62": 9.98089,
+            "63": 9.37212,
+            "64": 9.7756,
+            "65": 8.9433,
+            "66": 9.69993,
+            "67": 9.36414,
+            "68": 9.78706,
+            "69": 9.78397,
+            "70": 9.72288,
+            "71": 9.60749,
+            "72": 9.58416,
+            "73": 9.49093,
+            "74": 8.94864,
+            "75": 9.41807,
+            "76": 9.08721,
+            "77": 10.06283,
+            "78": 9.729,
+            "79": 9.37091,
+            "80": 9.40033,
+            "81": 9.47754,
+            "82": 9.69121,
+            "83": 9.30762,
+            "84": 9.41252,
+            "85": 9.61132,
+            "86": 9.07621,
+            "87": 9.59459,
+            "88": 9.74768,
+            "89": 9.6068,
+            "90": 9.81078,
+            "91": 9.34441,
+            "92": 9.36535,
+            "93": 9.07743,
+            "94": 8.82975,
+            "95": 9.51676,
+            "96": 9.52546,
+            "97": 9.31031,
+            "98": 9.67812,
+            "99": 8.88848,
+            "100": 9.40128
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 2735.0,
+            "52": 2607.0,
+            "53": 2951.0,
+            "54": 2672.0,
+            "55": 2451.0,
+            "56": 2712.0,
+            "57": 2392.0,
+            "58": 2979.0,
+            "59": 2869.0,
+            "60": 2435.0,
+            "61": 2938.0,
+            "62": 2669.0,
+            "63": 2392.0,
+            "64": 2998.0,
+            "65": 2689.0,
+            "66": 3285.0,
+            "67": 2782.0,
+            "68": 2753.0,
+            "69": 2958.0,
+            "70": 3271.0,
+            "71": 3040.0,
+            "72": 2504.0,
+            "73": 3096.0,
+            "74": 1910.0,
+            "75": 2617.0,
+            "76": 3081.0,
+            "77": 3390.0,
+            "78": 3186.0,
+            "79": 3320.0,
+            "80": 3483.0,
+            "81": 3782.0,
+            "82": 3516.0,
+            "83": 2864.0,
+            "84": 3396.0,
+            "85": 3247.0,
+            "86": 2785.0,
+            "87": 3762.0,
+            "88": 3102.0,
+            "89": 3483.0,
+            "90": 3076.0,
+            "91": 2643.0,
+            "92": 3198.0,
+            "93": 2666.0,
+            "94": 3390.0,
+            "95": 3410.0,
+            "96": 3508.0,
+            "97": 3178.0,
+            "98": 3865.0,
+            "99": 3143.0,
+            "100": 3357.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 487621120.0,
+            "52": 487621120.0,
+            "53": 487621120.0,
+            "54": 487621120.0,
+            "55": 487621120.0,
+            "56": 487621120.0,
+            "57": 487621120.0,
+            "58": 487621120.0,
+            "59": 487621120.0,
+            "60": 487621120.0,
+            "61": 487621120.0,
+            "62": 487621120.0,
+            "63": 487621120.0,
+            "64": 487621120.0,
+            "65": 487621120.0,
+            "66": 487621120.0,
+            "67": 487621120.0,
+            "68": 487621120.0,
+            "69": 487621120.0,
+            "70": 487621120.0,
+            "71": 487621120.0,
+            "72": 487621120.0,
+            "73": 487621120.0,
+            "74": 487621120.0,
+            "75": 487621120.0,
+            "76": 487621120.0,
+            "77": 487621120.0,
+            "78": 487621120.0,
+            "79": 487621120.0,
+            "80": 487621120.0,
+            "81": 487621120.0,
+            "82": 487621120.0,
+            "83": 487621120.0,
+            "84": 487621120.0,
+            "85": 487621120.0,
+            "86": 487621120.0,
+            "87": 487621120.0,
+            "88": 487621120.0,
+            "89": 487621120.0,
+            "90": 487621120.0,
+            "91": 487621120.0,
+            "92": 487621120.0,
+            "93": 487621120.0,
+            "94": 487621120.0,
+            "95": 487621120.0,
+            "96": 487621120.0,
+            "97": 487621120.0,
+            "98": 487621120.0,
+            "99": 487621120.0,
+            "100": 487621120.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 1412441600.0,
+            "52": 1412442624.0,
+            "53": 1412442624.0,
+            "54": 1412442624.0,
+            "55": 1412442624.0,
+            "56": 1412442624.0,
+            "57": 1412442624.0,
+            "58": 1412442624.0,
+            "59": 1412442624.0,
+            "60": 1412442624.0,
+            "61": 1412442624.0,
+            "62": 1412442624.0,
+            "63": 1412442624.0,
+            "64": 1412442624.0,
+            "65": 1412442624.0,
+            "66": 1412442624.0,
+            "67": 1412442624.0,
+            "68": 1412442624.0,
+            "69": 1412442624.0,
+            "70": 1412442624.0,
+            "71": 1412442624.0,
+            "72": 1412442624.0,
+            "73": 1412442624.0,
+            "74": 1412442624.0,
+            "75": 1412442624.0,
+            "76": 1412442624.0,
+            "77": 1412442624.0,
+            "78": 1412442624.0,
+            "79": 1412442624.0,
+            "80": 1412442624.0,
+            "81": 1412442624.0,
+            "82": 1412442624.0,
+            "83": 1412442624.0,
+            "84": 1412442624.0,
+            "85": 1412442624.0,
+            "86": 1412442624.0,
+            "87": 1412442624.0,
+            "88": 1412442624.0,
+            "89": 1412442624.0,
+            "90": 1412442624.0,
+            "91": 1412442624.0,
+            "92": 1412442624.0,
+            "93": 1412442624.0,
+            "94": 1412442624.0,
+            "95": 1412442624.0,
+            "96": 1412442624.0,
+            "97": 1412442624.0,
+            "98": 1412442624.0,
+            "99": 1412442624.0,
+            "100": 1412442624.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 3.82592,
+            "52": 0.24571,
+            "53": 0.19937,
+            "54": 0.1942,
+            "55": 0.19469,
+            "56": 0.19438,
+            "57": 0.19525,
+            "58": 0.19539,
+            "59": 0.19507,
+            "60": 0.19574,
+            "61": 0.19507,
+            "62": 0.19522,
+            "63": 0.1942,
+            "64": 0.19521,
+            "65": 0.19461,
+            "66": 0.19519,
+            "67": 0.19508,
+            "68": 0.19346,
+            "69": 0.19457,
+            "70": 0.1935,
+            "71": 0.19426,
+            "72": 0.19396,
+            "73": 0.19419,
+            "74": 0.19399,
+            "75": 0.19449,
+            "76": 0.19338,
+            "77": 0.19376,
+            "78": 0.19428,
+            "79": 0.19399,
+            "80": 0.19356,
+            "81": 0.19404,
+            "82": 0.19431,
+            "83": 0.19348,
+            "84": 0.19448,
+            "85": 0.19466,
+            "86": 0.1934,
+            "87": 0.19394,
+            "88": 0.19435,
+            "89": 0.19356,
+            "90": 0.19446,
+            "91": 0.19388,
+            "92": 0.19324,
+            "93": 0.19462,
+            "94": 0.1939,
+            "95": 0.19479,
+            "96": 0.19331,
+            "97": 0.19382,
+            "98": 0.19427,
+            "99": 0.1943,
+            "100": 0.19433
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_dev_dgx_gb200.json
new file mode 100644
index 00000000000..49fb0cee006
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_dev_dgx_gb200.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.86897,
+            "2": 10.88544,
+            "3": 10.86473,
+            "4": 10.86827,
+            "5": 10.87437,
+            "6": 10.89003,
+            "7": 10.87769,
+            "8": 10.86367,
+            "9": 10.88281,
+            "10": 10.84686,
+            "11": 10.87102,
+            "12": 10.87349,
+            "13": 10.8814,
+            "14": 10.88766,
+            "15": 10.83865,
+            "16": 10.8239,
+            "17": 10.80196,
+            "18": 10.81095,
+            "19": 10.82197,
+            "20": 10.71791,
+            "21": 10.68917,
+            "22": 10.57276,
+            "23": 10.70813,
+            "24": 10.59542,
+            "25": 10.55293,
+            "26": 10.61254,
+            "27": 10.6005,
+            "28": 10.56172,
+            "29": 10.58085,
+            "30": 10.35594,
+            "31": 10.11818,
+            "32": 10.44815,
+            "33": 10.45422,
+            "34": 10.21552,
+            "35": 10.26123,
+            "36": 10.20776,
+            "37": 10.3367,
+            "38": 10.17742,
+            "39": 10.39293,
+            "40": 10.06352,
+            "41": 10.13888,
+            "42": 10.2056,
+            "43": 9.82811,
+            "44": 9.94544,
+            "45": 9.82557,
+            "46": 9.80182,
+            "47": 10.14052,
+            "48": 9.84281,
+            "49": 9.52013,
+            "50": 9.88457,
+            "51": 9.8474,
+            "52": 9.74209,
+            "53": 10.05695,
+            "54": 9.95048,
+            "55": 9.88137,
+            "56": 9.61274,
+            "57": 9.46865,
+            "58": 9.82191,
+            "59": 9.57642,
+            "60": 9.49763,
+            "61": 9.6919,
+            "62": 9.98672,
+            "63": 9.37511,
+            "64": 9.76682,
+            "65": 8.94645,
+            "66": 9.70228,
+            "67": 9.36325,
+            "68": 9.78311,
+            "69": 9.79861,
+            "70": 9.73171,
+            "71": 9.62575,
+            "72": 9.58482,
+            "73": 9.48964,
+            "74": 8.92857,
+            "75": 9.40863,
+            "76": 9.07924,
+            "77": 10.05936,
+            "78": 9.72284,
+            "79": 9.37782,
+            "80": 9.40428,
+            "81": 9.48314,
+            "82": 9.70039,
+            "83": 9.31593,
+            "84": 9.41835,
+            "85": 9.61687,
+            "86": 9.07538,
+            "87": 9.59618,
+            "88": 9.75215,
+            "89": 9.60188,
+            "90": 9.82284,
+            "91": 9.34035,
+            "92": 9.35853,
+            "93": 9.08806,
+            "94": 8.83039,
+            "95": 9.5266,
+            "96": 9.53046,
+            "97": 9.30391,
+            "98": 9.67197,
+            "99": 8.89638,
+            "100": 9.40645
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 56.0,
+            "2": 68.0,
+            "3": 65.0,
+            "4": 66.0,
+            "5": 62.0,
+            "6": 64.0,
+            "7": 61.0,
+            "8": 81.0,
+            "9": 60.0,
+            "10": 59.0,
+            "11": 73.0,
+            "12": 60.0,
+            "13": 62.0,
+            "14": 72.0,
+            "15": 56.0,
+            "16": 70.0,
+            "17": 67.0,
+            "18": 62.0,
+            "19": 61.0,
+            "20": 64.0,
+            "21": 73.0,
+            "22": 69.0,
+            "23": 77.0,
+            "24": 53.0,
+            "25": 63.0,
+            "26": 66.0,
+            "27": 66.0,
+            "28": 77.0,
+            "29": 70.0,
+            "30": 56.0,
+            "31": 61.0,
+            "32": 64.0,
+            "33": 77.0,
+            "34": 68.0,
+            "35": 78.0,
+            "36": 74.0,
+            "37": 79.0,
+            "38": 60.0,
+            "39": 73.0,
+            "40": 73.0,
+            "41": 78.0,
+            "42": 76.0,
+            "43": 82.0,
+            "44": 87.0,
+            "45": 83.0,
+            "46": 72.0,
+            "47": 70.0,
+            "48": 64.0,
+            "49": 82.0,
+            "50": 88.0,
+            "51": 71.0,
+            "52": 53.0,
+            "53": 77.0,
+            "54": 92.0,
+            "55": 67.0,
+            "56": 92.0,
+            "57": 86.0,
+            "58": 79.0,
+            "59": 74.0,
+            "60": 70.0,
+            "61": 98.0,
+            "62": 71.0,
+            "63": 64.0,
+            "64": 83.0,
+            "65": 89.0,
+            "66": 86.0,
+            "67": 62.0,
+            "68": 67.0,
+            "69": 57.0,
+            "70": 90.0,
+            "71": 66.0,
+            "72": 61.0,
+            "73": 76.0,
+            "74": 52.0,
+            "75": 63.0,
+            "76": 78.0,
+            "77": 78.0,
+            "78": 87.0,
+            "79": 83.0,
+            "80": 77.0,
+            "81": 102.0,
+            "82": 74.0,
+            "83": 67.0,
+            "84": 68.0,
+            "85": 96.0,
+            "86": 89.0,
+            "87": 92.0,
+            "88": 81.0,
+            "89": 47.0,
+            "90": 76.0,
+            "91": 70.0,
+            "92": 82.0,
+            "93": 58.0,
+            "94": 76.0,
+            "95": 71.0,
+            "96": 92.0,
+            "97": 67.0,
+            "98": 88.0,
+            "99": 66.0,
+            "100": 69.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 545423872.0,
+            "2": 545423872.0,
+            "3": 545423872.0,
+            "4": 545423872.0,
+            "5": 545423872.0,
+            "6": 545423872.0,
+            "7": 545423872.0,
+            "8": 545423872.0,
+            "9": 545423872.0,
+            "10": 545423872.0,
+            "11": 545423872.0,
+            "12": 545423872.0,
+            "13": 545423872.0,
+            "14": 545423872.0,
+            "15": 545423872.0,
+            "16": 545423872.0,
+            "17": 545423872.0,
+            "18": 545423872.0,
+            "19": 545423872.0,
+            "20": 545423872.0,
+            "21": 545423872.0,
+            "22": 545423872.0,
+            "23": 545423872.0,
+            "24": 545423872.0,
+            "25": 545423872.0,
+            "26": 545423872.0,
+            "27": 545423872.0,
+            "28": 545423872.0,
+            "29": 545423872.0,
+            "30": 545423872.0,
+            "31": 545423872.0,
+            "32": 545423872.0,
+            "33": 545423872.0,
+            "34": 545423872.0,
+            "35": 545423872.0,
+            "36": 545423872.0,
+            "37": 545423872.0,
+            "38": 545423872.0,
+            "39": 545423872.0,
+            "40": 545423872.0,
+            "41": 545423872.0,
+            "42": 545423872.0,
+            "43": 545423872.0,
+            "44": 545423872.0,
+            "45": 545423872.0,
+            "46": 545423872.0,
+            "47": 545423872.0,
+            "48": 545423872.0,
+            "49": 545423872.0,
+            "50": 545423872.0,
+            "51": 545423872.0,
+            "52": 545423872.0,
+            "53": 545423872.0,
+            "54": 545423872.0,
+            "55": 545423872.0,
+            "56": 545423872.0,
+            "57": 545423872.0,
+            "58": 545423872.0,
+            "59": 545423872.0,
+            "60": 545423872.0,
+            "61": 545423872.0,
+            "62": 545423872.0,
+            "63": 545423872.0,
+            "64": 545423872.0,
+            "65": 545423872.0,
+            "66": 545423872.0,
+            "67": 545423872.0,
+            "68": 545423872.0,
+            "69": 545423872.0,
+            "70": 545423872.0,
+            "71": 545423872.0,
+            "72": 545423872.0,
+            "73": 545423872.0,
+            "74": 545423872.0,
+            "75": 545423872.0,
+            "76": 545423872.0,
+            "77": 545423872.0,
+            "78": 545423872.0,
+            "79": 545423872.0,
+            "80": 545423872.0,
+            "81": 545423872.0,
+            "82": 545423872.0,
+            "83": 545423872.0,
+            "84": 545423872.0,
+            "85": 545423872.0,
+            "86": 545423872.0,
+            "87": 545423872.0,
+            "88": 545423872.0,
+            "89": 545423872.0,
+            "90": 545423872.0,
+            "91": 545423872.0,
+            "92": 545423872.0,
+            "93": 545423872.0,
+            "94": 545423872.0,
+            "95": 545423872.0,
+            "96": 545423872.0,
+            "97": 545423872.0,
+            "98": 545423872.0,
+            "99": 545423872.0,
+            "100": 545423872.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1726380544.0,
+            "2": 1906452992.0,
+            "3": 1906452992.0,
+            "4": 1906452992.0,
+            "5": 1906452992.0,
+            "6": 1906452992.0,
+            "7": 1906452992.0,
+            "8": 1906452992.0,
+            "9": 1906452992.0,
+            "10": 1906452992.0,
+            "11": 1906452992.0,
+            "12": 1906452992.0,
+            "13": 1906452992.0,
+            "14": 1906452992.0,
+            "15": 1906452992.0,
+            "16": 1906452992.0,
+            "17": 1906452992.0,
+            "18": 1906452992.0,
+            "19": 1906452992.0,
+            "20": 1906452992.0,
+            "21": 1906452992.0,
+            "22": 1906452992.0,
+            "23": 1906452992.0,
+            "24": 1906452992.0,
+            "25": 1906452992.0,
+            "26": 1906452992.0,
+            "27": 1906452992.0,
+            "28": 1906452992.0,
+            "29": 1906452992.0,
+            "30": 1906452992.0,
+            "31": 1906452992.0,
+            "32": 1906452992.0,
+            "33": 1906452992.0,
+            "34": 1906452992.0,
+            "35": 1906452992.0,
+            "36": 1906452992.0,
+            "37": 1906452992.0,
+            "38": 1906452992.0,
+            "39": 1906452992.0,
+            "40": 1906452992.0,
+            "41": 1906452992.0,
+            "42": 1906452992.0,
+            "43": 1906452992.0,
+            "44": 1906452992.0,
+            "45": 1906452992.0,
+            "46": 1906452992.0,
+            "47": 1906452992.0,
+            "48": 1906452992.0,
+            "49": 1906452992.0,
+            "50": 1906452992.0,
+            "51": 1906452992.0,
+            "52": 1906452992.0,
+            "53": 1906452992.0,
+            "54": 1906452992.0,
+            "55": 1906452992.0,
+            "56": 1906452992.0,
+            "57": 1906452992.0,
+            "58": 1906452992.0,
+            "59": 1906452992.0,
+            "60": 1906452992.0,
+            "61": 1906452992.0,
+            "62": 1906452992.0,
+            "63": 1906452992.0,
+            "64": 1906452992.0,
+            "65": 1906452992.0,
+            "66": 1906452992.0,
+            "67": 1906452992.0,
+            "68": 1906452992.0,
+            "69": 1906452992.0,
+            "70": 1906452992.0,
+            "71": 1906452992.0,
+            "72": 1906452992.0,
+            "73": 1906452992.0,
+            "74": 1906452992.0,
+            "75": 1906452992.0,
+            "76": 1906452992.0,
+            "77": 1906452992.0,
+            "78": 1906452992.0,
+            "79": 1906452992.0,
+            "80": 1906452992.0,
+            "81": 1906452992.0,
+            "82": 1906452992.0,
+            "83": 1906452992.0,
+            "84": 1906452992.0,
+            "85": 1906452992.0,
+            "86": 1906452992.0,
+            "87": 1906452992.0,
+            "88": 1906452992.0,
+            "89": 1906452992.0,
+            "90": 1906452992.0,
+            "91": 1906452992.0,
+            "92": 1906452992.0,
+            "93": 1906452992.0,
+            "94": 1906452992.0,
+            "95": 1906452992.0,
+            "96": 1906452992.0,
+            "97": 1906452992.0,
+            "98": 1906452992.0,
+            "99": 1906452992.0,
+            "100": 1906452992.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 6.0946,
+            "2": 0.23434,
+            "3": 0.25974,
+            "4": 0.19572,
+            "5": 0.19385,
+            "6": 0.23205,
+            "7": 0.7704,
+            "8": 0.19849,
+            "9": 0.1899,
+            "10": 0.19145,
+            "11": 0.22929,
+            "12": 0.19296,
+            "13": 0.18887,
+            "14": 0.18975,
+            "15": 0.19107,
+            "16": 0.18736,
+            "17": 0.18574,
+            "18": 0.22677,
+            "19": 0.19016,
+            "20": 0.20891,
+            "21": 0.18795,
+            "22": 0.18702,
+            "23": 0.18879,
+            "24": 0.23626,
+            "25": 0.18708,
+            "26": 0.21783,
+            "27": 0.3498,
+            "28": 0.18687,
+            "29": 0.20508,
+            "30": 0.1874,
+            "31": 0.27079,
+            "32": 0.19016,
+            "33": 0.18984,
+            "34": 0.18963,
+            "35": 0.25952,
+            "36": 0.21489,
+            "37": 0.20358,
+            "38": 0.20254,
+            "39": 0.2039,
+            "40": 0.20108,
+            "41": 0.18536,
+            "42": 0.18627,
+            "43": 0.22134,
+            "44": 0.19018,
+            "45": 0.18634,
+            "46": 0.18446,
+            "47": 0.19975,
+            "48": 0.18759,
+            "49": 0.18704,
+            "50": 0.18617,
+            "51": 0.20108,
+            "52": 0.18371,
+            "53": 0.18371,
+            "54": 0.18409,
+            "55": 0.18492,
+            "56": 0.18608,
+            "57": 0.33035,
+            "58": 0.18444,
+            "59": 0.18479,
+            "60": 0.2007,
+            "61": 0.18737,
+            "62": 0.54423,
+            "63": 0.18739,
+            "64": 0.18756,
+            "65": 0.22855,
+            "66": 0.1889,
+            "67": 0.18728,
+            "68": 0.18737,
+            "69": 0.1863,
+            "70": 0.18731,
+            "71": 0.22911,
+            "72": 0.18493,
+            "73": 0.1846,
+            "74": 0.1919,
+            "75": 0.21803,
+            "76": 0.36578,
+            "77": 0.22572,
+            "78": 0.20057,
+            "79": 0.18852,
+            "80": 0.53951,
+            "81": 0.42214,
+            "82": 0.18567,
+            "83": 0.18702,
+            "84": 0.1856,
+            "85": 0.18727,
+            "86": 0.18505,
+            "87": 0.18506,
+            "88": 0.22119,
+            "89": 0.22551,
+            "90": 0.18825,
+            "91": 0.18812,
+            "92": 0.18805,
+            "93": 0.18696,
+            "94": 0.18716,
+            "95": 0.18779,
+            "96": 0.41477,
+            "97": 0.18674,
+            "98": 0.20738,
+            "99": 0.18625,
+            "100": 0.21802
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_dev_dgx_h100.json
index c1aaf21cf26..f1a58884e99 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_dev_dgx_h100.json
@@ -218,106 +218,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 546472448.0,
-            "2": 546472448.0,
-            "3": 546472448.0,
-            "4": 546472448.0,
-            "5": 546472448.0,
-            "6": 546472448.0,
-            "7": 546472448.0,
-            "8": 546472448.0,
-            "9": 546472448.0,
-            "10": 546472448.0,
-            "11": 546472448.0,
-            "12": 546472448.0,
-            "13": 546472448.0,
-            "14": 546472448.0,
-            "15": 546472448.0,
-            "16": 546472448.0,
-            "17": 546472448.0,
-            "18": 546472448.0,
-            "19": 546472448.0,
-            "20": 546472448.0,
-            "21": 546472448.0,
-            "22": 546472448.0,
-            "23": 546472448.0,
-            "24": 546472448.0,
-            "25": 546472448.0,
-            "26": 546472448.0,
-            "27": 546472448.0,
-            "28": 546472448.0,
-            "29": 546472448.0,
-            "30": 546472448.0,
-            "31": 546472448.0,
-            "32": 546472448.0,
-            "33": 546472448.0,
-            "34": 546472448.0,
-            "35": 546472448.0,
-            "36": 546472448.0,
-            "37": 546472448.0,
-            "38": 546472448.0,
-            "39": 546472448.0,
-            "40": 546472448.0,
-            "41": 546472448.0,
-            "42": 546472448.0,
-            "43": 546472448.0,
-            "44": 546472448.0,
-            "45": 546472448.0,
-            "46": 546472448.0,
-            "47": 546472448.0,
-            "48": 546472448.0,
-            "49": 546472448.0,
-            "50": 546472448.0,
-            "51": 546472448.0,
-            "52": 546472448.0,
-            "53": 546472448.0,
-            "54": 546472448.0,
-            "55": 546472448.0,
-            "56": 546472448.0,
-            "57": 546472448.0,
-            "58": 546472448.0,
-            "59": 546472448.0,
-            "60": 546472448.0,
-            "61": 546472448.0,
-            "62": 546472448.0,
-            "63": 546472448.0,
-            "64": 546472448.0,
-            "65": 546472448.0,
-            "66": 546472448.0,
-            "67": 546472448.0,
-            "68": 546472448.0,
-            "69": 546472448.0,
-            "70": 546472448.0,
-            "71": 546472448.0,
-            "72": 546472448.0,
-            "73": 546472448.0,
-            "74": 546472448.0,
-            "75": 546472448.0,
-            "76": 546472448.0,
-            "77": 546472448.0,
-            "78": 546472448.0,
-            "79": 546472448.0,
-            "80": 546472448.0,
-            "81": 546472448.0,
-            "82": 546472448.0,
-            "83": 546472448.0,
-            "84": 546472448.0,
-            "85": 546472448.0,
-            "86": 546472448.0,
-            "87": 546472448.0,
-            "88": 546472448.0,
-            "89": 546472448.0,
-            "90": 546472448.0,
-            "91": 546472448.0,
-            "92": 546472448.0,
-            "93": 546472448.0,
-            "94": 546472448.0,
-            "95": 546472448.0,
-            "96": 546472448.0,
-            "97": 546472448.0,
-            "98": 546472448.0,
-            "99": 546472448.0,
-            "100": 546472448.0
+            "1": 545423872.0,
+            "2": 545423872.0,
+            "3": 545423872.0,
+            "4": 545423872.0,
+            "5": 545423872.0,
+            "6": 545423872.0,
+            "7": 545423872.0,
+            "8": 545423872.0,
+            "9": 545423872.0,
+            "10": 545423872.0,
+            "11": 545423872.0,
+            "12": 545423872.0,
+            "13": 545423872.0,
+            "14": 545423872.0,
+            "15": 545423872.0,
+            "16": 545423872.0,
+            "17": 545423872.0,
+            "18": 545423872.0,
+            "19": 545423872.0,
+            "20": 545423872.0,
+            "21": 545423872.0,
+            "22": 545423872.0,
+            "23": 545423872.0,
+            "24": 545423872.0,
+            "25": 545423872.0,
+            "26": 545423872.0,
+            "27": 545423872.0,
+            "28": 545423872.0,
+            "29": 545423872.0,
+            "30": 545423872.0,
+            "31": 545423872.0,
+            "32": 545423872.0,
+            "33": 545423872.0,
+            "34": 545423872.0,
+            "35": 545423872.0,
+            "36": 545423872.0,
+            "37": 545423872.0,
+            "38": 545423872.0,
+            "39": 545423872.0,
+            "40": 545423872.0,
+            "41": 545423872.0,
+            "42": 545423872.0,
+            "43": 545423872.0,
+            "44": 545423872.0,
+            "45": 545423872.0,
+            "46": 545423872.0,
+            "47": 545423872.0,
+            "48": 545423872.0,
+            "49": 545423872.0,
+            "50": 545423872.0,
+            "51": 545423872.0,
+            "52": 545423872.0,
+            "53": 545423872.0,
+            "54": 545423872.0,
+            "55": 545423872.0,
+            "56": 545423872.0,
+            "57": 545423872.0,
+            "58": 545423872.0,
+            "59": 545423872.0,
+            "60": 545423872.0,
+            "61": 545423872.0,
+            "62": 545423872.0,
+            "63": 545423872.0,
+            "64": 545423872.0,
+            "65": 545423872.0,
+            "66": 545423872.0,
+            "67": 545423872.0,
+            "68": 545423872.0,
+            "69": 545423872.0,
+            "70": 545423872.0,
+            "71": 545423872.0,
+            "72": 545423872.0,
+            "73": 545423872.0,
+            "74": 545423872.0,
+            "75": 545423872.0,
+            "76": 545423872.0,
+            "77": 545423872.0,
+            "78": 545423872.0,
+            "79": 545423872.0,
+            "80": 545423872.0,
+            "81": 545423872.0,
+            "82": 545423872.0,
+            "83": 545423872.0,
+            "84": 545423872.0,
+            "85": 545423872.0,
+            "86": 545423872.0,
+            "87": 545423872.0,
+            "88": 545423872.0,
+            "89": 545423872.0,
+            "90": 545423872.0,
+            "91": 545423872.0,
+            "92": 545423872.0,
+            "93": 545423872.0,
+            "94": 545423872.0,
+            "95": 545423872.0,
+            "96": 545423872.0,
+            "97": 545423872.0,
+            "98": 545423872.0,
+            "99": 545423872.0,
+            "100": 545423872.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -325,106 +325,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 1713796608.0,
-            "2": 1895967232.0,
-            "3": 1895967232.0,
-            "4": 1895967232.0,
-            "5": 1895967232.0,
-            "6": 1895967232.0,
-            "7": 1895967232.0,
-            "8": 1895967232.0,
-            "9": 1895967232.0,
-            "10": 1895967232.0,
-            "11": 1895967232.0,
-            "12": 1895967232.0,
-            "13": 1895967232.0,
-            "14": 1895967232.0,
-            "15": 1895967232.0,
-            "16": 1895967232.0,
-            "17": 1895967232.0,
-            "18": 1895967232.0,
-            "19": 1895967232.0,
-            "20": 1895967232.0,
-            "21": 1895967232.0,
-            "22": 1895967232.0,
-            "23": 1895967232.0,
-            "24": 1895967232.0,
-            "25": 1895967232.0,
-            "26": 1895967232.0,
-            "27": 1895967232.0,
-            "28": 1895967232.0,
-            "29": 1895967232.0,
-            "30": 1895967232.0,
-            "31": 1895967232.0,
-            "32": 1895967232.0,
-            "33": 1895967232.0,
-            "34": 1895967232.0,
-            "35": 1895967232.0,
-            "36": 1895967232.0,
-            "37": 1895967232.0,
-            "38": 1895967232.0,
-            "39": 1895967232.0,
-            "40": 1895967232.0,
-            "41": 1895967232.0,
-            "42": 1895967232.0,
-            "43": 1895967232.0,
-            "44": 1895967232.0,
-            "45": 1895967232.0,
-            "46": 1895967232.0,
-            "47": 1895967232.0,
-            "48": 1895967232.0,
-            "49": 1895967232.0,
-            "50": 1895967232.0,
-            "51": 1895967232.0,
-            "52": 1895967232.0,
-            "53": 1895967232.0,
-            "54": 1895967232.0,
-            "55": 1895967232.0,
-            "56": 1895967232.0,
-            "57": 1895967232.0,
-            "58": 1895967232.0,
-            "59": 1895967232.0,
-            "60": 1895967232.0,
-            "61": 1895967232.0,
-            "62": 1895967232.0,
-            "63": 1895967232.0,
-            "64": 1895967232.0,
-            "65": 1895967232.0,
-            "66": 1895967232.0,
-            "67": 1895967232.0,
-            "68": 1895967232.0,
-            "69": 1895967232.0,
-            "70": 1895967232.0,
-            "71": 1895967232.0,
-            "72": 1895967232.0,
-            "73": 1895967232.0,
-            "74": 1895967232.0,
-            "75": 1895967232.0,
-            "76": 1895967232.0,
-            "77": 1895967232.0,
-            "78": 1895967232.0,
-            "79": 1895967232.0,
-            "80": 1895967232.0,
-            "81": 1895967232.0,
-            "82": 1895967232.0,
-            "83": 1895967232.0,
-            "84": 1895967232.0,
-            "85": 1895967232.0,
-            "86": 1895967232.0,
-            "87": 1895967232.0,
-            "88": 1895967232.0,
-            "89": 1895967232.0,
-            "90": 1895967232.0,
-            "91": 1895967232.0,
-            "92": 1895967232.0,
-            "93": 1895967232.0,
-            "94": 1895967232.0,
-            "95": 1895967232.0,
-            "96": 1895967232.0,
-            "97": 1895967232.0,
-            "98": 1895967232.0,
-            "99": 1895967232.0,
-            "100": 1895967232.0
+            "1": 1713797632.0,
+            "2": 1893870080.0,
+            "3": 1893870080.0,
+            "4": 1893870080.0,
+            "5": 1893870080.0,
+            "6": 1893870080.0,
+            "7": 1893870080.0,
+            "8": 1893870080.0,
+            "9": 1893870080.0,
+            "10": 1893870080.0,
+            "11": 1893870080.0,
+            "12": 1893870080.0,
+            "13": 1893870080.0,
+            "14": 1893870080.0,
+            "15": 1893870080.0,
+            "16": 1893870080.0,
+            "17": 1893870080.0,
+            "18": 1893870080.0,
+            "19": 1893870080.0,
+            "20": 1893870080.0,
+            "21": 1893870080.0,
+            "22": 1893870080.0,
+            "23": 1893870080.0,
+            "24": 1893870080.0,
+            "25": 1893870080.0,
+            "26": 1893870080.0,
+            "27": 1893870080.0,
+            "28": 1893870080.0,
+            "29": 1893870080.0,
+            "30": 1893870080.0,
+            "31": 1893870080.0,
+            "32": 1893870080.0,
+            "33": 1893870080.0,
+            "34": 1893870080.0,
+            "35": 1893870080.0,
+            "36": 1893870080.0,
+            "37": 1893870080.0,
+            "38": 1893870080.0,
+            "39": 1893870080.0,
+            "40": 1893870080.0,
+            "41": 1893870080.0,
+            "42": 1893870080.0,
+            "43": 1893870080.0,
+            "44": 1893870080.0,
+            "45": 1893870080.0,
+            "46": 1893870080.0,
+            "47": 1893870080.0,
+            "48": 1893870080.0,
+            "49": 1893870080.0,
+            "50": 1893870080.0,
+            "51": 1893870080.0,
+            "52": 1893870080.0,
+            "53": 1893870080.0,
+            "54": 1893870080.0,
+            "55": 1893870080.0,
+            "56": 1893870080.0,
+            "57": 1893870080.0,
+            "58": 1893870080.0,
+            "59": 1893870080.0,
+            "60": 1893870080.0,
+            "61": 1893870080.0,
+            "62": 1893870080.0,
+            "63": 1893870080.0,
+            "64": 1893870080.0,
+            "65": 1893870080.0,
+            "66": 1893870080.0,
+            "67": 1893870080.0,
+            "68": 1893870080.0,
+            "69": 1893870080.0,
+            "70": 1893870080.0,
+            "71": 1893870080.0,
+            "72": 1893870080.0,
+            "73": 1893870080.0,
+            "74": 1893870080.0,
+            "75": 1893870080.0,
+            "76": 1893870080.0,
+            "77": 1893870080.0,
+            "78": 1893870080.0,
+            "79": 1893870080.0,
+            "80": 1893870080.0,
+            "81": 1893870080.0,
+            "82": 1893870080.0,
+            "83": 1893870080.0,
+            "84": 1893870080.0,
+            "85": 1893870080.0,
+            "86": 1893870080.0,
+            "87": 1893870080.0,
+            "88": 1893870080.0,
+            "89": 1893870080.0,
+            "90": 1893870080.0,
+            "91": 1893870080.0,
+            "92": 1893870080.0,
+            "93": 1893870080.0,
+            "94": 1893870080.0,
+            "95": 1893870080.0,
+            "96": 1893870080.0,
+            "97": 1893870080.0,
+            "98": 1893870080.0,
+            "99": 1893870080.0,
+            "100": 1893870080.0
         }
     },
     "iteration-time": {
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 11.81196,
-            "2": 0.17008,
-            "3": 0.15523,
-            "4": 0.15249,
-            "5": 0.15434,
-            "6": 0.15515,
-            "7": 0.15378,
-            "8": 0.1528,
-            "9": 0.15287,
-            "10": 0.15479,
-            "11": 0.15442,
-            "12": 0.15952,
-            "13": 0.15843,
-            "14": 0.15559,
-            "15": 0.15333,
-            "16": 0.15363,
-            "17": 0.15594,
-            "18": 0.153,
-            "19": 0.15542,
-            "20": 0.15304,
-            "21": 0.15492,
-            "22": 0.15277,
-            "23": 0.15803,
-            "24": 0.1545,
-            "25": 0.15639,
-            "26": 0.15419,
-            "27": 0.15381,
-            "28": 0.15423,
-            "29": 0.15354,
-            "30": 0.1554,
-            "31": 0.15389,
-            "32": 0.15608,
-            "33": 0.15361,
-            "34": 0.15437,
-            "35": 0.15233,
-            "36": 0.15499,
-            "37": 0.15114,
-            "38": 0.15259,
-            "39": 0.15269,
-            "40": 0.1516,
-            "41": 0.15052,
-            "42": 0.15122,
-            "43": 0.15389,
-            "44": 0.15261,
-            "45": 0.15376,
-            "46": 0.15091,
-            "47": 0.15197,
-            "48": 0.15131,
-            "49": 0.15083,
-            "50": 0.152,
-            "51": 0.15723,
-            "52": 0.15481,
-            "53": 0.15087,
-            "54": 0.15175,
-            "55": 0.15331,
-            "56": 0.15504,
-            "57": 0.15471,
-            "58": 0.1549,
-            "59": 0.15621,
-            "60": 0.1533,
-            "61": 0.15499,
-            "62": 0.15222,
-            "63": 0.15091,
-            "64": 0.1535,
-            "65": 0.15463,
-            "66": 0.15169,
-            "67": 0.15591,
-            "68": 0.15173,
-            "69": 0.1509,
-            "70": 0.15063,
-            "71": 0.15755,
-            "72": 0.1545,
-            "73": 0.15374,
-            "74": 0.15306,
-            "75": 0.15223,
-            "76": 0.15203,
-            "77": 0.15194,
-            "78": 0.15284,
-            "79": 0.15345,
-            "80": 0.15138,
-            "81": 0.15298,
-            "82": 0.15115,
-            "83": 0.15281,
-            "84": 0.1544,
-            "85": 0.15277,
-            "86": 0.15368,
-            "87": 0.15373,
-            "88": 0.15359,
-            "89": 0.15205,
-            "90": 0.1535,
-            "91": 0.15459,
-            "92": 0.15406,
-            "93": 0.15133,
-            "94": 0.1533,
-            "95": 0.15198,
-            "96": 0.15195,
-            "97": 0.1533,
-            "98": 0.15406,
-            "99": 0.1528,
-            "100": 0.15371
+            "1": 8.61654,
+            "2": 0.16646,
+            "3": 0.14939,
+            "4": 0.12694,
+            "5": 0.1251,
+            "6": 0.12545,
+            "7": 0.12533,
+            "8": 0.1271,
+            "9": 0.1261,
+            "10": 0.12491,
+            "11": 0.12876,
+            "12": 0.13422,
+            "13": 0.13211,
+            "14": 0.12395,
+            "15": 0.12563,
+            "16": 0.12703,
+            "17": 0.1243,
+            "18": 0.12651,
+            "19": 0.12452,
+            "20": 0.12538,
+            "21": 0.1244,
+            "22": 0.12395,
+            "23": 0.12379,
+            "24": 0.12455,
+            "25": 0.12457,
+            "26": 0.12444,
+            "27": 0.12397,
+            "28": 0.125,
+            "29": 0.13321,
+            "30": 0.13442,
+            "31": 0.13329,
+            "32": 0.12696,
+            "33": 0.12493,
+            "34": 0.12398,
+            "35": 0.12918,
+            "36": 0.13252,
+            "37": 0.13148,
+            "38": 0.13338,
+            "39": 0.13083,
+            "40": 0.13113,
+            "41": 0.13061,
+            "42": 0.1295,
+            "43": 0.1305,
+            "44": 0.13132,
+            "45": 0.13148,
+            "46": 0.13113,
+            "47": 0.13116,
+            "48": 0.12551,
+            "49": 0.12779,
+            "50": 0.12989,
+            "51": 0.1367,
+            "52": 0.13188,
+            "53": 0.13008,
+            "54": 0.13122,
+            "55": 0.12979,
+            "56": 0.12943,
+            "57": 0.13002,
+            "58": 0.12923,
+            "59": 0.12984,
+            "60": 0.13209,
+            "61": 0.13094,
+            "62": 0.13083,
+            "63": 0.12826,
+            "64": 0.13104,
+            "65": 0.1292,
+            "66": 0.12985,
+            "67": 0.1295,
+            "68": 0.12398,
+            "69": 0.12509,
+            "70": 0.12208,
+            "71": 0.12371,
+            "72": 0.12256,
+            "73": 0.12266,
+            "74": 0.12476,
+            "75": 0.12866,
+            "76": 0.12272,
+            "77": 0.12403,
+            "78": 0.12307,
+            "79": 0.12209,
+            "80": 0.12352,
+            "81": 0.12155,
+            "82": 0.12329,
+            "83": 0.12201,
+            "84": 0.12239,
+            "85": 0.12414,
+            "86": 0.12372,
+            "87": 0.12357,
+            "88": 0.12705,
+            "89": 0.1249,
+            "90": 0.12289,
+            "91": 0.12523,
+            "92": 0.51175,
+            "93": 0.12454,
+            "94": 0.12634,
+            "95": 0.12226,
+            "96": 0.12255,
+            "97": 0.12357,
+            "98": 0.12405,
+            "99": 0.12419,
+            "100": 0.12384
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_dev_dgx_h100_2nd.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_dev_dgx_h100_2nd.json
new file mode 100644
index 00000000000..fdc5f0244ea
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_dev_dgx_h100_2nd.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 9.85581,
+            "52": 9.75235,
+            "53": 10.07582,
+            "54": 9.95687,
+            "55": 9.882,
+            "56": 9.63137,
+            "57": 9.48647,
+            "58": 9.83111,
+            "59": 9.58896,
+            "60": 9.50647,
+            "61": 9.70361,
+            "62": 9.98283,
+            "63": 9.38302,
+            "64": 9.77906,
+            "65": 8.95171,
+            "66": 9.70162,
+            "67": 9.372,
+            "68": 9.78849,
+            "69": 9.79851,
+            "70": 9.74738,
+            "71": 9.61908,
+            "72": 9.58496,
+            "73": 9.49723,
+            "74": 8.93927,
+            "75": 9.42706,
+            "76": 9.08018,
+            "77": 10.06566,
+            "78": 9.72889,
+            "79": 9.37757,
+            "80": 9.40987,
+            "81": 9.47974,
+            "82": 9.70177,
+            "83": 9.30611,
+            "84": 9.42088,
+            "85": 9.61376,
+            "86": 9.07651,
+            "87": 9.59452,
+            "88": 9.75067,
+            "89": 9.60239,
+            "90": 9.81895,
+            "91": 9.33895,
+            "92": 9.35712,
+            "93": 9.07879,
+            "94": 8.83504,
+            "95": 9.52168,
+            "96": 9.53002,
+            "97": 9.31306,
+            "98": 9.67783,
+            "99": 8.89053,
+            "100": 9.39725
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 70.0,
+            "52": 81.0,
+            "53": 95.0,
+            "54": 101.0,
+            "55": 58.0,
+            "56": 90.0,
+            "57": 83.0,
+            "58": 90.0,
+            "59": 79.0,
+            "60": 84.0,
+            "61": 92.0,
+            "62": 102.0,
+            "63": 78.0,
+            "64": 73.0,
+            "65": 81.0,
+            "66": 88.0,
+            "67": 54.0,
+            "68": 57.0,
+            "69": 72.0,
+            "70": 88.0,
+            "71": 82.0,
+            "72": 64.0,
+            "73": 78.0,
+            "74": 76.0,
+            "75": 70.0,
+            "76": 78.0,
+            "77": 67.0,
+            "78": 86.0,
+            "79": 76.0,
+            "80": 90.0,
+            "81": 92.0,
+            "82": 72.0,
+            "83": 61.0,
+            "84": 65.0,
+            "85": 89.0,
+            "86": 73.0,
+            "87": 89.0,
+            "88": 63.0,
+            "89": 83.0,
+            "90": 72.0,
+            "91": 55.0,
+            "92": 63.0,
+            "93": 47.0,
+            "94": 74.0,
+            "95": 70.0,
+            "96": 73.0,
+            "97": 80.0,
+            "98": 76.0,
+            "99": 68.0,
+            "100": 75.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 725497344.0,
+            "52": 725497344.0,
+            "53": 725497344.0,
+            "54": 725497344.0,
+            "55": 725497344.0,
+            "56": 725497344.0,
+            "57": 725497344.0,
+            "58": 725497344.0,
+            "59": 725497344.0,
+            "60": 725497344.0,
+            "61": 725497344.0,
+            "62": 725497344.0,
+            "63": 725497344.0,
+            "64": 725497344.0,
+            "65": 725497344.0,
+            "66": 725497344.0,
+            "67": 725497344.0,
+            "68": 725497344.0,
+            "69": 725497344.0,
+            "70": 725497344.0,
+            "71": 725497344.0,
+            "72": 725497344.0,
+            "73": 725497344.0,
+            "74": 725497344.0,
+            "75": 725497344.0,
+            "76": 725497344.0,
+            "77": 725497344.0,
+            "78": 725497344.0,
+            "79": 725497344.0,
+            "80": 725497344.0,
+            "81": 725497344.0,
+            "82": 725497344.0,
+            "83": 725497344.0,
+            "84": 725497344.0,
+            "85": 725497344.0,
+            "86": 725497344.0,
+            "87": 725497344.0,
+            "88": 725497344.0,
+            "89": 725497344.0,
+            "90": 725497344.0,
+            "91": 725497344.0,
+            "92": 725497344.0,
+            "93": 725497344.0,
+            "94": 725497344.0,
+            "95": 725497344.0,
+            "96": 725497344.0,
+            "97": 725497344.0,
+            "98": 725497344.0,
+            "99": 725497344.0,
+            "100": 725497344.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 2074991104.0,
+            "52": 2074992128.0,
+            "53": 2074992128.0,
+            "54": 2074992128.0,
+            "55": 2074992128.0,
+            "56": 2074992128.0,
+            "57": 2074992128.0,
+            "58": 2074992128.0,
+            "59": 2074992128.0,
+            "60": 2074992128.0,
+            "61": 2074992128.0,
+            "62": 2074992128.0,
+            "63": 2074992128.0,
+            "64": 2074992128.0,
+            "65": 2074992128.0,
+            "66": 2074992128.0,
+            "67": 2074992128.0,
+            "68": 2074992128.0,
+            "69": 2074992128.0,
+            "70": 2074992128.0,
+            "71": 2074992128.0,
+            "72": 2074992128.0,
+            "73": 2074992128.0,
+            "74": 2074992128.0,
+            "75": 2074992128.0,
+            "76": 2074992128.0,
+            "77": 2074992128.0,
+            "78": 2074992128.0,
+            "79": 2074992128.0,
+            "80": 2074992128.0,
+            "81": 2074992128.0,
+            "82": 2074992128.0,
+            "83": 2074992128.0,
+            "84": 2074992128.0,
+            "85": 2074992128.0,
+            "86": 2074992128.0,
+            "87": 2074992128.0,
+            "88": 2074992128.0,
+            "89": 2074992128.0,
+            "90": 2074992128.0,
+            "91": 2074992128.0,
+            "92": 2074992128.0,
+            "93": 2074992128.0,
+            "94": 2074992128.0,
+            "95": 2074992128.0,
+            "96": 2074992128.0,
+            "97": 2074992128.0,
+            "98": 2074992128.0,
+            "99": 2074992128.0,
+            "100": 2074992128.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 9.08401,
+            "52": 0.17107,
+            "53": 0.13414,
+            "54": 0.13296,
+            "55": 0.12627,
+            "56": 0.12542,
+            "57": 0.12564,
+            "58": 0.12468,
+            "59": 0.1245,
+            "60": 0.12595,
+            "61": 0.1248,
+            "62": 0.12424,
+            "63": 0.1263,
+            "64": 0.12611,
+            "65": 0.12448,
+            "66": 0.1268,
+            "67": 0.12509,
+            "68": 0.12463,
+            "69": 0.12587,
+            "70": 0.12403,
+            "71": 0.12788,
+            "72": 0.12581,
+            "73": 0.12599,
+            "74": 0.12429,
+            "75": 0.12845,
+            "76": 0.12517,
+            "77": 0.12546,
+            "78": 0.1257,
+            "79": 0.12526,
+            "80": 0.12602,
+            "81": 0.13237,
+            "82": 0.12452,
+            "83": 0.13316,
+            "84": 0.13434,
+            "85": 0.1319,
+            "86": 0.13456,
+            "87": 0.13266,
+            "88": 0.13492,
+            "89": 0.1345,
+            "90": 0.13063,
+            "91": 0.13342,
+            "92": 0.13139,
+            "93": 0.13378,
+            "94": 0.13513,
+            "95": 0.13196,
+            "96": 0.13396,
+            "97": 0.12722,
+            "98": 0.12492,
+            "99": 0.12599,
+            "100": 0.12635
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_lts_dgx_a100.json
index 96cf765384a..c89ea54f89f 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_lts_dgx_a100.json
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 6.10882,
-            "2": 0.24563,
-            "3": 0.21507,
-            "4": 0.21225,
-            "5": 0.21165,
-            "6": 0.21127,
-            "7": 0.21406,
-            "8": 0.21402,
-            "9": 0.21175,
-            "10": 0.19518,
-            "11": 0.19565,
-            "12": 0.19461,
-            "13": 0.19428,
-            "14": 0.19385,
-            "15": 0.19329,
-            "16": 0.19311,
-            "17": 0.19391,
-            "18": 0.19383,
-            "19": 0.19364,
-            "20": 0.19408,
-            "21": 0.19327,
-            "22": 0.19729,
-            "23": 0.19599,
-            "24": 0.19601,
-            "25": 0.1965,
-            "26": 0.19683,
-            "27": 0.19626,
-            "28": 0.19667,
-            "29": 0.1989,
-            "30": 0.19644,
-            "31": 0.19728,
-            "32": 0.19614,
-            "33": 0.1973,
-            "34": 0.1971,
-            "35": 0.19674,
-            "36": 0.19628,
-            "37": 0.19578,
-            "38": 0.19629,
-            "39": 0.19673,
-            "40": 0.19712,
-            "41": 0.19593,
-            "42": 0.1969,
-            "43": 0.19639,
-            "44": 0.20378,
-            "45": 0.19737,
-            "46": 0.19738,
-            "47": 0.19532,
-            "48": 0.19579,
-            "49": 0.19617,
-            "50": 0.19695,
-            "51": 0.20318,
-            "52": 0.19428,
-            "53": 0.19415,
-            "54": 0.19663,
-            "55": 0.19266,
-            "56": 0.19426,
-            "57": 0.19455,
-            "58": 0.19473,
-            "59": 0.19413,
-            "60": 0.19467,
-            "61": 0.19511,
-            "62": 0.19475,
-            "63": 0.19464,
-            "64": 0.19452,
-            "65": 0.19445,
-            "66": 0.19395,
-            "67": 0.19423,
-            "68": 0.19431,
-            "69": 0.19512,
-            "70": 0.1941,
-            "71": 0.19453,
-            "72": 0.19467,
-            "73": 0.19615,
-            "74": 0.19355,
-            "75": 0.19419,
-            "76": 0.19407,
-            "77": 0.19455,
-            "78": 0.19511,
-            "79": 0.19498,
-            "80": 0.19577,
-            "81": 0.19399,
-            "82": 0.19362,
-            "83": 0.19425,
-            "84": 0.19418,
-            "85": 0.19432,
-            "86": 0.20057,
-            "87": 0.19522,
-            "88": 0.19447,
-            "89": 0.19472,
-            "90": 0.19377,
-            "91": 0.19433,
-            "92": 0.19432,
-            "93": 0.19456,
-            "94": 0.19394,
-            "95": 0.19417,
-            "96": 0.19476,
-            "97": 0.19423,
-            "98": 0.19401,
-            "99": 0.19403,
-            "100": 0.19364
+            "1": 4.2285,
+            "2": 0.2225,
+            "3": 0.20464,
+            "4": 0.18763,
+            "5": 0.18448,
+            "6": 0.18488,
+            "7": 0.1868,
+            "8": 0.18507,
+            "9": 0.18639,
+            "10": 0.18525,
+            "11": 0.185,
+            "12": 0.1892,
+            "13": 0.18964,
+            "14": 0.18674,
+            "15": 0.18659,
+            "16": 0.18641,
+            "17": 0.1862,
+            "18": 0.18503,
+            "19": 0.18484,
+            "20": 0.18494,
+            "21": 0.18464,
+            "22": 0.18544,
+            "23": 0.18496,
+            "24": 0.18402,
+            "25": 0.18506,
+            "26": 0.18392,
+            "27": 0.18476,
+            "28": 0.18508,
+            "29": 0.18537,
+            "30": 0.18566,
+            "31": 0.18562,
+            "32": 0.1846,
+            "33": 0.18516,
+            "34": 0.1847,
+            "35": 0.18539,
+            "36": 0.18474,
+            "37": 0.18449,
+            "38": 0.18492,
+            "39": 0.18406,
+            "40": 0.1848,
+            "41": 0.18488,
+            "42": 0.18457,
+            "43": 0.18477,
+            "44": 0.18339,
+            "45": 0.18392,
+            "46": 0.18291,
+            "47": 0.1845,
+            "48": 0.18355,
+            "49": 0.18321,
+            "50": 0.1836,
+            "51": 0.19691,
+            "52": 0.18837,
+            "53": 0.18901,
+            "54": 0.18882,
+            "55": 0.18866,
+            "56": 0.18799,
+            "57": 0.18879,
+            "58": 0.18717,
+            "59": 0.18786,
+            "60": 0.18816,
+            "61": 0.18754,
+            "62": 0.18765,
+            "63": 0.18797,
+            "64": 0.18736,
+            "65": 0.19017,
+            "66": 0.18805,
+            "67": 0.18724,
+            "68": 0.18718,
+            "69": 0.18876,
+            "70": 0.18803,
+            "71": 0.18742,
+            "72": 0.1906,
+            "73": 0.18971,
+            "74": 0.58261,
+            "75": 0.18725,
+            "76": 0.1877,
+            "77": 0.18725,
+            "78": 0.18828,
+            "79": 0.1888,
+            "80": 0.1867,
+            "81": 0.18809,
+            "82": 0.18881,
+            "83": 0.18773,
+            "84": 0.18814,
+            "85": 0.18863,
+            "86": 0.18809,
+            "87": 0.18728,
+            "88": 0.18747,
+            "89": 0.18808,
+            "90": 0.18818,
+            "91": 0.18719,
+            "92": 0.18753,
+            "93": 0.18888,
+            "94": 0.18938,
+            "95": 0.18815,
+            "96": 0.18883,
+            "97": 0.18854,
+            "98": 0.19027,
+            "99": 0.18914,
+            "100": 0.18784
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_lts_dgx_a100_2nd.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_lts_dgx_a100_2nd.json
new file mode 100644
index 00000000000..b44b9766e91
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_lts_dgx_a100_2nd.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 9.84971,
+            "52": 9.74156,
+            "53": 10.06324,
+            "54": 9.94584,
+            "55": 9.87735,
+            "56": 9.62744,
+            "57": 9.4726,
+            "58": 9.82907,
+            "59": 9.58298,
+            "60": 9.49182,
+            "61": 9.6996,
+            "62": 9.98091,
+            "63": 9.37212,
+            "64": 9.77558,
+            "65": 8.94327,
+            "66": 9.69991,
+            "67": 9.3641,
+            "68": 9.78706,
+            "69": 9.78396,
+            "70": 9.72291,
+            "71": 9.60749,
+            "72": 9.58417,
+            "73": 9.4909,
+            "74": 8.94863,
+            "75": 9.41807,
+            "76": 9.08721,
+            "77": 10.06284,
+            "78": 9.729,
+            "79": 9.37087,
+            "80": 9.40029,
+            "81": 9.47753,
+            "82": 9.69123,
+            "83": 9.30764,
+            "84": 9.4125,
+            "85": 9.61132,
+            "86": 9.07624,
+            "87": 9.59459,
+            "88": 9.74769,
+            "89": 9.60678,
+            "90": 9.81079,
+            "91": 9.34443,
+            "92": 9.36534,
+            "93": 9.07741,
+            "94": 8.82974,
+            "95": 9.51676,
+            "96": 9.52545,
+            "97": 9.31031,
+            "98": 9.67811,
+            "99": 8.88848,
+            "100": 9.40128
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 77.0,
+            "52": 100.0,
+            "53": 71.0,
+            "54": 67.0,
+            "55": 70.0,
+            "56": 83.0,
+            "57": 74.0,
+            "58": 106.0,
+            "59": 72.0,
+            "60": 98.0,
+            "61": 67.0,
+            "62": 73.0,
+            "63": 77.0,
+            "64": 94.0,
+            "65": 82.0,
+            "66": 87.0,
+            "67": 65.0,
+            "68": 78.0,
+            "69": 59.0,
+            "70": 102.0,
+            "71": 82.0,
+            "72": 60.0,
+            "73": 96.0,
+            "74": 61.0,
+            "75": 64.0,
+            "76": 70.0,
+            "77": 84.0,
+            "78": 93.0,
+            "79": 102.0,
+            "80": 71.0,
+            "81": 88.0,
+            "82": 85.0,
+            "83": 75.0,
+            "84": 69.0,
+            "85": 84.0,
+            "86": 66.0,
+            "87": 93.0,
+            "88": 96.0,
+            "89": 73.0,
+            "90": 77.0,
+            "91": 66.0,
+            "92": 86.0,
+            "93": 63.0,
+            "94": 60.0,
+            "95": 70.0,
+            "96": 65.0,
+            "97": 67.0,
+            "98": 96.0,
+            "99": 54.0,
+            "100": 77.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 516588032.0,
+            "52": 516588032.0,
+            "53": 516588032.0,
+            "54": 516588032.0,
+            "55": 516588032.0,
+            "56": 516588032.0,
+            "57": 516588032.0,
+            "58": 516588032.0,
+            "59": 516588032.0,
+            "60": 516588032.0,
+            "61": 516588032.0,
+            "62": 516588032.0,
+            "63": 516588032.0,
+            "64": 516588032.0,
+            "65": 516588032.0,
+            "66": 516588032.0,
+            "67": 516588032.0,
+            "68": 516588032.0,
+            "69": 516588032.0,
+            "70": 516588032.0,
+            "71": 516588032.0,
+            "72": 516588032.0,
+            "73": 516588032.0,
+            "74": 516588032.0,
+            "75": 516588032.0,
+            "76": 516588032.0,
+            "77": 516588032.0,
+            "78": 516588032.0,
+            "79": 516588032.0,
+            "80": 516588032.0,
+            "81": 516588032.0,
+            "82": 516588032.0,
+            "83": 516588032.0,
+            "84": 516588032.0,
+            "85": 516588032.0,
+            "86": 516588032.0,
+            "87": 516588032.0,
+            "88": 516588032.0,
+            "89": 516588032.0,
+            "90": 516588032.0,
+            "91": 516588032.0,
+            "92": 516588032.0,
+            "93": 516588032.0,
+            "94": 516588032.0,
+            "95": 516588032.0,
+            "96": 516588032.0,
+            "97": 516588032.0,
+            "98": 516588032.0,
+            "99": 516588032.0,
+            "100": 516588032.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 1879713280.0,
+            "52": 1879714304.0,
+            "53": 1879714304.0,
+            "54": 1879714304.0,
+            "55": 1879714304.0,
+            "56": 1879714304.0,
+            "57": 1879714304.0,
+            "58": 1879714304.0,
+            "59": 1879714304.0,
+            "60": 1879714304.0,
+            "61": 1879714304.0,
+            "62": 1879714304.0,
+            "63": 1879714304.0,
+            "64": 1879714304.0,
+            "65": 1879714304.0,
+            "66": 1879714304.0,
+            "67": 1879714304.0,
+            "68": 1879714304.0,
+            "69": 1879714304.0,
+            "70": 1879714304.0,
+            "71": 1879714304.0,
+            "72": 1879714304.0,
+            "73": 1879714304.0,
+            "74": 1879714304.0,
+            "75": 1879714304.0,
+            "76": 1879714304.0,
+            "77": 1879714304.0,
+            "78": 1879714304.0,
+            "79": 1879714304.0,
+            "80": 1879714304.0,
+            "81": 1879714304.0,
+            "82": 1879714304.0,
+            "83": 1879714304.0,
+            "84": 1879714304.0,
+            "85": 1879714304.0,
+            "86": 1879714304.0,
+            "87": 1879714304.0,
+            "88": 1879714304.0,
+            "89": 1879714304.0,
+            "90": 1879714304.0,
+            "91": 1879714304.0,
+            "92": 1879714304.0,
+            "93": 1879714304.0,
+            "94": 1879714304.0,
+            "95": 1879714304.0,
+            "96": 1879714304.0,
+            "97": 1879714304.0,
+            "98": 1879714304.0,
+            "99": 1879714304.0,
+            "100": 1879714304.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 3.66406,
+            "52": 0.2158,
+            "53": 0.20019,
+            "54": 0.19602,
+            "55": 0.19005,
+            "56": 0.19244,
+            "57": 0.19305,
+            "58": 0.19241,
+            "59": 0.19133,
+            "60": 0.19108,
+            "61": 0.19083,
+            "62": 0.19044,
+            "63": 0.19122,
+            "64": 0.19085,
+            "65": 0.19237,
+            "66": 0.19162,
+            "67": 0.19273,
+            "68": 0.19427,
+            "69": 0.19391,
+            "70": 0.19124,
+            "71": 0.19263,
+            "72": 0.19156,
+            "73": 0.19165,
+            "74": 0.1912,
+            "75": 0.1916,
+            "76": 0.19244,
+            "77": 0.19754,
+            "78": 0.19743,
+            "79": 0.19729,
+            "80": 0.19745,
+            "81": 0.19719,
+            "82": 0.19703,
+            "83": 0.19876,
+            "84": 0.19042,
+            "85": 0.18981,
+            "86": 0.18931,
+            "87": 0.19021,
+            "88": 0.18916,
+            "89": 0.19085,
+            "90": 0.19016,
+            "91": 0.19021,
+            "92": 0.19141,
+            "93": 0.19167,
+            "94": 0.19089,
+            "95": 0.19116,
+            "96": 0.18907,
+            "97": 0.19161,
+            "98": 0.19075,
+            "99": 0.1909,
+            "100": 0.19241
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader/golden_values_dev_dgx_gb200.json
new file mode 100644
index 00000000000..3cfdeafee58
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader/golden_values_dev_dgx_gb200.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.86897,
+            "2": 10.88544,
+            "3": 10.86473,
+            "4": 10.86826,
+            "5": 10.87436,
+            "6": 10.89005,
+            "7": 10.87769,
+            "8": 10.86364,
+            "9": 10.88282,
+            "10": 10.84687,
+            "11": 10.87102,
+            "12": 10.87345,
+            "13": 10.8814,
+            "14": 10.8877,
+            "15": 10.83869,
+            "16": 10.8239,
+            "17": 10.80197,
+            "18": 10.81094,
+            "19": 10.82192,
+            "20": 10.71791,
+            "21": 10.68914,
+            "22": 10.57271,
+            "23": 10.7081,
+            "24": 10.59543,
+            "25": 10.55292,
+            "26": 10.61257,
+            "27": 10.60051,
+            "28": 10.56173,
+            "29": 10.58089,
+            "30": 10.35595,
+            "31": 10.1182,
+            "32": 10.44815,
+            "33": 10.4542,
+            "34": 10.21553,
+            "35": 10.26124,
+            "36": 10.20776,
+            "37": 10.33673,
+            "38": 10.17741,
+            "39": 10.39297,
+            "40": 10.06349,
+            "41": 10.13887,
+            "42": 10.2056,
+            "43": 9.82809,
+            "44": 9.94547,
+            "45": 9.82561,
+            "46": 9.80186,
+            "47": 10.14049,
+            "48": 9.84276,
+            "49": 9.52016,
+            "50": 9.88454,
+            "51": 9.84743,
+            "52": 9.74209,
+            "53": 10.05697,
+            "54": 9.9505,
+            "55": 9.88145,
+            "56": 9.61274,
+            "57": 9.4687,
+            "58": 9.82193,
+            "59": 9.57642,
+            "60": 9.49762,
+            "61": 9.69189,
+            "62": 9.9867,
+            "63": 9.37512,
+            "64": 9.76679,
+            "65": 8.94648,
+            "66": 9.7023,
+            "67": 9.36326,
+            "68": 9.7831,
+            "69": 9.7986,
+            "70": 9.7317,
+            "71": 9.62571,
+            "72": 9.58488,
+            "73": 9.48967,
+            "74": 8.9286,
+            "75": 9.40862,
+            "76": 9.07925,
+            "77": 10.0594,
+            "78": 9.72288,
+            "79": 9.37784,
+            "80": 9.40429,
+            "81": 9.48309,
+            "82": 9.7004,
+            "83": 9.31595,
+            "84": 9.41838,
+            "85": 9.61685,
+            "86": 9.07533,
+            "87": 9.59616,
+            "88": 9.75215,
+            "89": 9.60184,
+            "90": 9.82281,
+            "91": 9.34037,
+            "92": 9.35854,
+            "93": 9.08805,
+            "94": 8.83037,
+            "95": 9.5266,
+            "96": 9.53049,
+            "97": 9.30389,
+            "98": 9.67196,
+            "99": 8.89637,
+            "100": 9.40644
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1621.0,
+            "2": 1657.0,
+            "3": 1580.0,
+            "4": 1839.0,
+            "5": 1862.0,
+            "6": 1724.0,
+            "7": 1714.0,
+            "8": 1670.0,
+            "9": 1762.0,
+            "10": 1358.0,
+            "11": 1734.0,
+            "12": 1682.0,
+            "13": 1761.0,
+            "14": 1731.0,
+            "15": 1788.0,
+            "16": 1801.0,
+            "17": 1866.0,
+            "18": 1636.0,
+            "19": 1709.0,
+            "20": 1607.0,
+            "21": 1821.0,
+            "22": 1666.0,
+            "23": 1991.0,
+            "24": 1585.0,
+            "25": 1587.0,
+            "26": 1631.0,
+            "27": 1714.0,
+            "28": 1966.0,
+            "29": 1997.0,
+            "30": 1851.0,
+            "31": 1581.0,
+            "32": 1864.0,
+            "33": 2107.0,
+            "34": 1846.0,
+            "35": 1982.0,
+            "36": 1904.0,
+            "37": 2373.0,
+            "38": 2172.0,
+            "39": 2343.0,
+            "40": 2149.0,
+            "41": 2331.0,
+            "42": 2199.0,
+            "43": 1914.0,
+            "44": 2065.0,
+            "45": 2081.0,
+            "46": 2352.0,
+            "47": 2497.0,
+            "48": 2303.0,
+            "49": 2346.0,
+            "50": 2411.0,
+            "51": 2491.0,
+            "52": 2552.0,
+            "53": 2980.0,
+            "54": 2680.0,
+            "55": 2274.0,
+            "56": 2734.0,
+            "57": 2319.0,
+            "58": 2907.0,
+            "59": 2886.0,
+            "60": 2566.0,
+            "61": 2855.0,
+            "62": 2704.0,
+            "63": 2370.0,
+            "64": 2998.0,
+            "65": 2563.0,
+            "66": 2868.0,
+            "67": 2762.0,
+            "68": 2739.0,
+            "69": 2730.0,
+            "70": 3156.0,
+            "71": 2803.0,
+            "72": 2506.0,
+            "73": 2896.0,
+            "74": 1937.0,
+            "75": 2450.0,
+            "76": 2794.0,
+            "77": 3047.0,
+            "78": 3104.0,
+            "79": 3069.0,
+            "80": 3286.0,
+            "81": 3543.0,
+            "82": 3192.0,
+            "83": 2614.0,
+            "84": 3273.0,
+            "85": 3111.0,
+            "86": 2680.0,
+            "87": 3654.0,
+            "88": 3117.0,
+            "89": 3351.0,
+            "90": 3086.0,
+            "91": 2721.0,
+            "92": 3045.0,
+            "93": 2672.0,
+            "94": 3326.0,
+            "95": 3125.0,
+            "96": 3309.0,
+            "97": 3208.0,
+            "98": 3572.0,
+            "99": 2980.0,
+            "100": 3355.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 511860224.0,
+            "2": 511860224.0,
+            "3": 511860224.0,
+            "4": 511860224.0,
+            "5": 511860224.0,
+            "6": 511860224.0,
+            "7": 511860224.0,
+            "8": 511860224.0,
+            "9": 511860224.0,
+            "10": 511860224.0,
+            "11": 511860224.0,
+            "12": 511860224.0,
+            "13": 511860224.0,
+            "14": 511860224.0,
+            "15": 511860224.0,
+            "16": 511860224.0,
+            "17": 511860224.0,
+            "18": 511860224.0,
+            "19": 511860224.0,
+            "20": 511860224.0,
+            "21": 511860224.0,
+            "22": 511860224.0,
+            "23": 511860224.0,
+            "24": 511860224.0,
+            "25": 511860224.0,
+            "26": 511860224.0,
+            "27": 511860224.0,
+            "28": 511860224.0,
+            "29": 511860224.0,
+            "30": 511860224.0,
+            "31": 511860224.0,
+            "32": 511860224.0,
+            "33": 511860224.0,
+            "34": 511860224.0,
+            "35": 511860224.0,
+            "36": 511860224.0,
+            "37": 511860224.0,
+            "38": 511860224.0,
+            "39": 511860224.0,
+            "40": 511860224.0,
+            "41": 511860224.0,
+            "42": 511860224.0,
+            "43": 511860224.0,
+            "44": 511860224.0,
+            "45": 511860224.0,
+            "46": 511860224.0,
+            "47": 511860224.0,
+            "48": 511860224.0,
+            "49": 511860224.0,
+            "50": 511860224.0,
+            "51": 511860224.0,
+            "52": 511860224.0,
+            "53": 511860224.0,
+            "54": 511860224.0,
+            "55": 511860224.0,
+            "56": 511860224.0,
+            "57": 511860224.0,
+            "58": 511860224.0,
+            "59": 511860224.0,
+            "60": 511860224.0,
+            "61": 511860224.0,
+            "62": 511860224.0,
+            "63": 511860224.0,
+            "64": 511860224.0,
+            "65": 511860224.0,
+            "66": 511860224.0,
+            "67": 511860224.0,
+            "68": 511860224.0,
+            "69": 511860224.0,
+            "70": 511860224.0,
+            "71": 511860224.0,
+            "72": 511860224.0,
+            "73": 511860224.0,
+            "74": 511860224.0,
+            "75": 511860224.0,
+            "76": 511860224.0,
+            "77": 511860224.0,
+            "78": 511860224.0,
+            "79": 511860224.0,
+            "80": 511860224.0,
+            "81": 511860224.0,
+            "82": 511860224.0,
+            "83": 511860224.0,
+            "84": 511860224.0,
+            "85": 511860224.0,
+            "86": 511860224.0,
+            "87": 511860224.0,
+            "88": 511860224.0,
+            "89": 511860224.0,
+            "90": 511860224.0,
+            "91": 511860224.0,
+            "92": 511860224.0,
+            "93": 511860224.0,
+            "94": 511860224.0,
+            "95": 511860224.0,
+            "96": 511860224.0,
+            "97": 511860224.0,
+            "98": 511860224.0,
+            "99": 511860224.0,
+            "100": 511860224.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1251365376.0,
+            "2": 1430390272.0,
+            "3": 1430390272.0,
+            "4": 1430390272.0,
+            "5": 1430390272.0,
+            "6": 1430390272.0,
+            "7": 1430390272.0,
+            "8": 1430390272.0,
+            "9": 1430390272.0,
+            "10": 1430390272.0,
+            "11": 1430390272.0,
+            "12": 1430390272.0,
+            "13": 1430390272.0,
+            "14": 1430390272.0,
+            "15": 1430390272.0,
+            "16": 1430390272.0,
+            "17": 1430390272.0,
+            "18": 1430390272.0,
+            "19": 1430390272.0,
+            "20": 1430390272.0,
+            "21": 1430390272.0,
+            "22": 1430390272.0,
+            "23": 1430390272.0,
+            "24": 1430390272.0,
+            "25": 1430390272.0,
+            "26": 1430390272.0,
+            "27": 1430390272.0,
+            "28": 1430390272.0,
+            "29": 1430390272.0,
+            "30": 1430390272.0,
+            "31": 1430390272.0,
+            "32": 1430390272.0,
+            "33": 1430390272.0,
+            "34": 1430390272.0,
+            "35": 1430390272.0,
+            "36": 1430390272.0,
+            "37": 1430390272.0,
+            "38": 1430390272.0,
+            "39": 1430390272.0,
+            "40": 1430390272.0,
+            "41": 1430390272.0,
+            "42": 1430390272.0,
+            "43": 1430390272.0,
+            "44": 1430390272.0,
+            "45": 1430390272.0,
+            "46": 1430390272.0,
+            "47": 1430390272.0,
+            "48": 1430390272.0,
+            "49": 1430390272.0,
+            "50": 1430390272.0,
+            "51": 1430390272.0,
+            "52": 1430390272.0,
+            "53": 1430390272.0,
+            "54": 1430390272.0,
+            "55": 1430390272.0,
+            "56": 1430390272.0,
+            "57": 1430390272.0,
+            "58": 1430390272.0,
+            "59": 1430390272.0,
+            "60": 1430390272.0,
+            "61": 1430390272.0,
+            "62": 1430390272.0,
+            "63": 1430390272.0,
+            "64": 1430390272.0,
+            "65": 1430390272.0,
+            "66": 1430390272.0,
+            "67": 1430390272.0,
+            "68": 1430390272.0,
+            "69": 1430390272.0,
+            "70": 1430390272.0,
+            "71": 1430390272.0,
+            "72": 1430390272.0,
+            "73": 1430390272.0,
+            "74": 1430390272.0,
+            "75": 1430390272.0,
+            "76": 1430390272.0,
+            "77": 1430390272.0,
+            "78": 1430390272.0,
+            "79": 1430390272.0,
+            "80": 1430390272.0,
+            "81": 1430390272.0,
+            "82": 1430390272.0,
+            "83": 1430390272.0,
+            "84": 1430390272.0,
+            "85": 1430390272.0,
+            "86": 1430390272.0,
+            "87": 1430390272.0,
+            "88": 1430390272.0,
+            "89": 1430390272.0,
+            "90": 1430390272.0,
+            "91": 1430390272.0,
+            "92": 1430390272.0,
+            "93": 1430390272.0,
+            "94": 1430390272.0,
+            "95": 1430390272.0,
+            "96": 1430390272.0,
+            "97": 1430390272.0,
+            "98": 1430390272.0,
+            "99": 1430390272.0,
+            "100": 1430390272.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 5.9274,
+            "2": 0.21272,
+            "3": 0.22152,
+            "4": 0.1871,
+            "5": 0.21307,
+            "6": 0.21965,
+            "7": 0.22219,
+            "8": 0.22237,
+            "9": 0.22411,
+            "10": 0.22202,
+            "11": 0.22123,
+            "12": 0.22038,
+            "13": 0.22083,
+            "14": 0.21999,
+            "15": 0.21683,
+            "16": 0.22088,
+            "17": 0.22103,
+            "18": 0.22014,
+            "19": 0.21937,
+            "20": 0.21984,
+            "21": 0.21934,
+            "22": 0.22176,
+            "23": 0.21919,
+            "24": 0.21956,
+            "25": 0.21941,
+            "26": 0.5044,
+            "27": 0.22459,
+            "28": 0.22027,
+            "29": 0.21989,
+            "30": 0.22088,
+            "31": 0.22111,
+            "32": 0.22371,
+            "33": 0.22449,
+            "34": 0.22278,
+            "35": 0.22512,
+            "36": 0.2238,
+            "37": 0.22153,
+            "38": 0.22287,
+            "39": 0.22369,
+            "40": 0.22242,
+            "41": 0.22005,
+            "42": 0.22123,
+            "43": 0.22176,
+            "44": 0.22219,
+            "45": 0.22209,
+            "46": 0.22213,
+            "47": 0.22118,
+            "48": 0.22156,
+            "49": 0.22452,
+            "50": 0.22094,
+            "51": 0.23758,
+            "52": 0.22018,
+            "53": 0.22125,
+            "54": 0.22334,
+            "55": 0.22156,
+            "56": 0.22191,
+            "57": 0.54851,
+            "58": 0.22402,
+            "59": 0.22203,
+            "60": 0.22556,
+            "61": 0.22485,
+            "62": 0.22511,
+            "63": 0.22362,
+            "64": 0.22461,
+            "65": 0.2231,
+            "66": 0.22489,
+            "67": 0.2248,
+            "68": 0.22682,
+            "69": 0.22568,
+            "70": 0.22662,
+            "71": 0.22741,
+            "72": 0.22865,
+            "73": 0.22913,
+            "74": 0.2291,
+            "75": 0.22782,
+            "76": 0.81496,
+            "77": 0.23726,
+            "78": 0.22937,
+            "79": 0.22963,
+            "80": 0.22908,
+            "81": 0.2307,
+            "82": 0.22778,
+            "83": 0.22872,
+            "84": 0.2297,
+            "85": 0.22998,
+            "86": 0.22898,
+            "87": 0.22903,
+            "88": 0.22865,
+            "89": 0.22964,
+            "90": 0.23194,
+            "91": 0.22888,
+            "92": 0.23063,
+            "93": 0.22825,
+            "94": 0.23,
+            "95": 0.22281,
+            "96": 0.22333,
+            "97": 0.2242,
+            "98": 0.22437,
+            "99": 0.22403,
+            "100": 0.22146
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader/golden_values_lts_dgx_a100.json
index d6134cdcc5a..756fbc3b53c 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader/golden_values_lts_dgx_a100.json
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 8.20377,
-            "2": 0.2288,
-            "3": 0.19616,
-            "4": 0.19587,
-            "5": 0.19737,
-            "6": 0.19775,
-            "7": 0.19658,
-            "8": 0.19621,
-            "9": 0.19557,
-            "10": 0.19534,
-            "11": 0.19453,
-            "12": 0.1949,
-            "13": 0.19522,
-            "14": 0.19865,
-            "15": 0.20415,
-            "16": 0.19686,
-            "17": 0.1985,
-            "18": 0.19858,
-            "19": 0.19709,
-            "20": 0.19609,
-            "21": 0.19758,
-            "22": 0.19837,
-            "23": 0.19786,
-            "24": 0.19688,
-            "25": 0.1972,
-            "26": 0.19859,
-            "27": 0.19814,
-            "28": 0.1989,
-            "29": 0.1984,
-            "30": 0.19783,
-            "31": 0.19727,
-            "32": 0.19754,
-            "33": 0.19648,
-            "34": 0.19977,
-            "35": 0.19847,
-            "36": 0.19696,
-            "37": 0.20498,
-            "38": 0.20415,
-            "39": 0.20225,
-            "40": 0.19712,
-            "41": 0.19751,
-            "42": 0.19764,
-            "43": 0.19738,
-            "44": 0.19703,
-            "45": 0.19703,
-            "46": 0.19814,
-            "47": 0.19757,
-            "48": 0.19759,
-            "49": 0.19688,
-            "50": 0.20181,
-            "51": 0.22215,
-            "52": 0.2134,
-            "53": 0.2129,
-            "54": 0.2133,
-            "55": 0.21255,
-            "56": 0.21221,
-            "57": 0.21233,
-            "58": 0.2124,
-            "59": 0.21242,
-            "60": 0.21258,
-            "61": 0.21219,
-            "62": 0.21255,
-            "63": 0.21385,
-            "64": 0.2127,
-            "65": 0.21252,
-            "66": 0.21191,
-            "67": 0.21327,
-            "68": 0.21176,
-            "69": 0.2127,
-            "70": 0.21284,
-            "71": 0.21291,
-            "72": 0.21265,
-            "73": 0.21221,
-            "74": 0.21387,
-            "75": 0.21247,
-            "76": 0.21204,
-            "77": 0.21169,
-            "78": 0.21259,
-            "79": 0.21196,
-            "80": 0.21204,
-            "81": 0.21211,
-            "82": 0.21314,
-            "83": 0.21268,
-            "84": 0.21291,
-            "85": 0.21328,
-            "86": 0.2128,
-            "87": 0.21213,
-            "88": 0.21192,
-            "89": 0.21242,
-            "90": 0.21253,
-            "91": 0.21252,
-            "92": 0.21236,
-            "93": 0.21254,
-            "94": 0.21255,
-            "95": 0.21209,
-            "96": 0.21345,
-            "97": 0.21202,
-            "98": 0.21234,
-            "99": 0.21237,
-            "100": 0.21317
+            "1": 4.18215,
+            "2": 0.24102,
+            "3": 0.22538,
+            "4": 0.19265,
+            "5": 0.1927,
+            "6": 0.19409,
+            "7": 0.19316,
+            "8": 0.20321,
+            "9": 0.19569,
+            "10": 0.19176,
+            "11": 0.19371,
+            "12": 0.1915,
+            "13": 0.1999,
+            "14": 0.19198,
+            "15": 0.19063,
+            "16": 0.18985,
+            "17": 0.19307,
+            "18": 0.19389,
+            "19": 0.18963,
+            "20": 0.18912,
+            "21": 0.18939,
+            "22": 0.19051,
+            "23": 0.19061,
+            "24": 0.18863,
+            "25": 0.18777,
+            "26": 0.18904,
+            "27": 0.18951,
+            "28": 0.18898,
+            "29": 0.18846,
+            "30": 0.18884,
+            "31": 0.18892,
+            "32": 0.18966,
+            "33": 0.1906,
+            "34": 0.18855,
+            "35": 0.18874,
+            "36": 0.18902,
+            "37": 0.18886,
+            "38": 0.2005,
+            "39": 0.18875,
+            "40": 0.18823,
+            "41": 0.18805,
+            "42": 0.1885,
+            "43": 0.18816,
+            "44": 0.1884,
+            "45": 0.18934,
+            "46": 0.18913,
+            "47": 0.18837,
+            "48": 0.18793,
+            "49": 0.18776,
+            "50": 0.19086,
+            "51": 0.20025,
+            "52": 0.19114,
+            "53": 0.19106,
+            "54": 0.19178,
+            "55": 0.1907,
+            "56": 0.1918,
+            "57": 0.19088,
+            "58": 0.19169,
+            "59": 0.19055,
+            "60": 0.19039,
+            "61": 0.19129,
+            "62": 0.19114,
+            "63": 0.19039,
+            "64": 0.19023,
+            "65": 0.19101,
+            "66": 0.19064,
+            "67": 0.19048,
+            "68": 0.19034,
+            "69": 0.19008,
+            "70": 0.19082,
+            "71": 0.19018,
+            "72": 0.19111,
+            "73": 0.18977,
+            "74": 0.19049,
+            "75": 0.19112,
+            "76": 0.19169,
+            "77": 0.1913,
+            "78": 0.1905,
+            "79": 0.19033,
+            "80": 0.19026,
+            "81": 0.18982,
+            "82": 0.18941,
+            "83": 0.19009,
+            "84": 0.18968,
+            "85": 0.1902,
+            "86": 0.19092,
+            "87": 0.19042,
+            "88": 0.18999,
+            "89": 0.19013,
+            "90": 0.18962,
+            "91": 0.18986,
+            "92": 0.18975,
+            "93": 0.19013,
+            "94": 0.19113,
+            "95": 0.19019,
+            "96": 0.19136,
+            "97": 0.18954,
+            "98": 0.18934,
+            "99": 0.19002,
+            "100": 0.18991
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader/golden_values_lts_dgx_a100_2nd.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader/golden_values_lts_dgx_a100_2nd.json
new file mode 100644
index 00000000000..ce275a70055
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader/golden_values_lts_dgx_a100_2nd.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 9.84971,
+            "52": 9.74156,
+            "53": 10.06322,
+            "54": 9.94581,
+            "55": 9.87731,
+            "56": 9.62746,
+            "57": 9.47259,
+            "58": 9.82912,
+            "59": 9.583,
+            "60": 9.49181,
+            "61": 9.69961,
+            "62": 9.98089,
+            "63": 9.37212,
+            "64": 9.7756,
+            "65": 8.9433,
+            "66": 9.69993,
+            "67": 9.36414,
+            "68": 9.78706,
+            "69": 9.78397,
+            "70": 9.72288,
+            "71": 9.60749,
+            "72": 9.58416,
+            "73": 9.49093,
+            "74": 8.94864,
+            "75": 9.41807,
+            "76": 9.08721,
+            "77": 10.06283,
+            "78": 9.729,
+            "79": 9.37091,
+            "80": 9.40033,
+            "81": 9.47754,
+            "82": 9.69121,
+            "83": 9.30762,
+            "84": 9.41252,
+            "85": 9.61132,
+            "86": 9.07621,
+            "87": 9.59459,
+            "88": 9.74768,
+            "89": 9.6068,
+            "90": 9.81078,
+            "91": 9.34441,
+            "92": 9.36535,
+            "93": 9.07743,
+            "94": 8.82975,
+            "95": 9.51676,
+            "96": 9.52546,
+            "97": 9.31031,
+            "98": 9.67812,
+            "99": 8.88848,
+            "100": 9.40128
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 2735.0,
+            "52": 2607.0,
+            "53": 2951.0,
+            "54": 2672.0,
+            "55": 2451.0,
+            "56": 2712.0,
+            "57": 2392.0,
+            "58": 2979.0,
+            "59": 2869.0,
+            "60": 2435.0,
+            "61": 2938.0,
+            "62": 2669.0,
+            "63": 2392.0,
+            "64": 2998.0,
+            "65": 2689.0,
+            "66": 3285.0,
+            "67": 2782.0,
+            "68": 2753.0,
+            "69": 2958.0,
+            "70": 3271.0,
+            "71": 3040.0,
+            "72": 2504.0,
+            "73": 3096.0,
+            "74": 1910.0,
+            "75": 2617.0,
+            "76": 3081.0,
+            "77": 3390.0,
+            "78": 3186.0,
+            "79": 3320.0,
+            "80": 3483.0,
+            "81": 3782.0,
+            "82": 3516.0,
+            "83": 2864.0,
+            "84": 3396.0,
+            "85": 3247.0,
+            "86": 2785.0,
+            "87": 3762.0,
+            "88": 3102.0,
+            "89": 3483.0,
+            "90": 3076.0,
+            "91": 2643.0,
+            "92": 3198.0,
+            "93": 2666.0,
+            "94": 3390.0,
+            "95": 3410.0,
+            "96": 3508.0,
+            "97": 3178.0,
+            "98": 3865.0,
+            "99": 3143.0,
+            "100": 3357.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 482499072.0,
+            "52": 482499072.0,
+            "53": 482499072.0,
+            "54": 482499072.0,
+            "55": 482499072.0,
+            "56": 482499072.0,
+            "57": 482499072.0,
+            "58": 482499072.0,
+            "59": 482499072.0,
+            "60": 482499072.0,
+            "61": 482499072.0,
+            "62": 482499072.0,
+            "63": 482499072.0,
+            "64": 482499072.0,
+            "65": 482499072.0,
+            "66": 482499072.0,
+            "67": 482499072.0,
+            "68": 482499072.0,
+            "69": 482499072.0,
+            "70": 482499072.0,
+            "71": 482499072.0,
+            "72": 482499072.0,
+            "73": 482499072.0,
+            "74": 482499072.0,
+            "75": 482499072.0,
+            "76": 482499072.0,
+            "77": 482499072.0,
+            "78": 482499072.0,
+            "79": 482499072.0,
+            "80": 482499072.0,
+            "81": 482499072.0,
+            "82": 482499072.0,
+            "83": 482499072.0,
+            "84": 482499072.0,
+            "85": 482499072.0,
+            "86": 482499072.0,
+            "87": 482499072.0,
+            "88": 482499072.0,
+            "89": 482499072.0,
+            "90": 482499072.0,
+            "91": 482499072.0,
+            "92": 482499072.0,
+            "93": 482499072.0,
+            "94": 482499072.0,
+            "95": 482499072.0,
+            "96": 482499072.0,
+            "97": 482499072.0,
+            "98": 482499072.0,
+            "99": 482499072.0,
+            "100": 482499072.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 1401028096.0,
+            "52": 1401029120.0,
+            "53": 1401029120.0,
+            "54": 1401029120.0,
+            "55": 1401029120.0,
+            "56": 1401029120.0,
+            "57": 1401029120.0,
+            "58": 1401029120.0,
+            "59": 1401029120.0,
+            "60": 1401029120.0,
+            "61": 1401029120.0,
+            "62": 1401029120.0,
+            "63": 1401029120.0,
+            "64": 1401029120.0,
+            "65": 1401029120.0,
+            "66": 1401029120.0,
+            "67": 1401029120.0,
+            "68": 1401029120.0,
+            "69": 1401029120.0,
+            "70": 1401029120.0,
+            "71": 1401029120.0,
+            "72": 1401029120.0,
+            "73": 1401029120.0,
+            "74": 1401029120.0,
+            "75": 1401029120.0,
+            "76": 1401029120.0,
+            "77": 1401029120.0,
+            "78": 1401029120.0,
+            "79": 1401029120.0,
+            "80": 1401029120.0,
+            "81": 1401029120.0,
+            "82": 1401029120.0,
+            "83": 1401029120.0,
+            "84": 1401029120.0,
+            "85": 1401029120.0,
+            "86": 1401029120.0,
+            "87": 1401029120.0,
+            "88": 1401029120.0,
+            "89": 1401029120.0,
+            "90": 1401029120.0,
+            "91": 1401029120.0,
+            "92": 1401029120.0,
+            "93": 1401029120.0,
+            "94": 1401029120.0,
+            "95": 1401029120.0,
+            "96": 1401029120.0,
+            "97": 1401029120.0,
+            "98": 1401029120.0,
+            "99": 1401029120.0,
+            "100": 1401029120.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 4.32401,
+            "52": 0.21688,
+            "53": 0.18518,
+            "54": 0.19488,
+            "55": 0.1986,
+            "56": 0.19975,
+            "57": 0.18475,
+            "58": 0.18368,
+            "59": 0.18376,
+            "60": 0.18447,
+            "61": 0.18462,
+            "62": 0.18451,
+            "63": 0.18353,
+            "64": 0.21625,
+            "65": 0.18791,
+            "66": 0.18877,
+            "67": 0.18755,
+            "68": 0.18846,
+            "69": 0.18722,
+            "70": 0.18704,
+            "71": 0.18789,
+            "72": 0.18975,
+            "73": 0.18773,
+            "74": 0.1875,
+            "75": 0.18938,
+            "76": 0.18771,
+            "77": 0.18773,
+            "78": 0.18744,
+            "79": 0.18693,
+            "80": 0.18783,
+            "81": 0.18742,
+            "82": 0.18723,
+            "83": 0.18781,
+            "84": 0.18777,
+            "85": 0.18758,
+            "86": 0.18679,
+            "87": 0.18708,
+            "88": 0.18812,
+            "89": 0.18758,
+            "90": 0.18811,
+            "91": 0.18925,
+            "92": 0.18753,
+            "93": 0.18733,
+            "94": 0.18737,
+            "95": 0.18854,
+            "96": 0.18834,
+            "97": 0.18793,
+            "98": 0.18731,
+            "99": 0.18778,
+            "100": 0.18797
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/golden_values_dev_dgx_gb200.json
new file mode 100644
index 00000000000..8325c3b9e5b
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/golden_values_dev_dgx_gb200.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.86836,
+            "2": 10.88595,
+            "3": 10.86559,
+            "4": 10.86889,
+            "5": 10.87417,
+            "6": 10.8906,
+            "7": 10.87677,
+            "8": 10.86475,
+            "9": 10.88232,
+            "10": 10.84582,
+            "11": 10.87162,
+            "12": 10.87422,
+            "13": 10.88163,
+            "14": 10.88889,
+            "15": 10.83931,
+            "16": 10.82496,
+            "17": 10.80147,
+            "18": 10.81234,
+            "19": 10.82152,
+            "20": 10.71933,
+            "21": 10.69091,
+            "22": 10.57426,
+            "23": 10.71097,
+            "24": 10.5978,
+            "25": 10.5556,
+            "26": 10.61522,
+            "27": 10.60451,
+            "28": 10.56484,
+            "29": 10.58476,
+            "30": 10.35944,
+            "31": 10.12157,
+            "32": 10.45234,
+            "33": 10.45725,
+            "34": 10.21989,
+            "35": 10.26445,
+            "36": 10.21036,
+            "37": 10.33952,
+            "38": 10.18015,
+            "39": 10.39589,
+            "40": 10.06631,
+            "41": 10.14164,
+            "42": 10.20853,
+            "43": 9.83127,
+            "44": 9.94861,
+            "45": 9.82847,
+            "46": 9.8046,
+            "47": 10.14233,
+            "48": 9.84459,
+            "49": 9.52195,
+            "50": 9.88603,
+            "51": 9.84982,
+            "52": 9.74428,
+            "53": 10.05844,
+            "54": 9.95125,
+            "55": 9.88345,
+            "56": 9.61327,
+            "57": 9.469,
+            "58": 9.82161,
+            "59": 9.57703,
+            "60": 9.49786,
+            "61": 9.69254,
+            "62": 9.98597,
+            "63": 9.37405,
+            "64": 9.76601,
+            "65": 8.94654,
+            "66": 9.70099,
+            "67": 9.36368,
+            "68": 9.7824,
+            "69": 9.7988,
+            "70": 9.73166,
+            "71": 9.62509,
+            "72": 9.58308,
+            "73": 9.48821,
+            "74": 8.92607,
+            "75": 9.40719,
+            "76": 9.07708,
+            "77": 10.05856,
+            "78": 9.72208,
+            "79": 9.37661,
+            "80": 9.40273,
+            "81": 9.48208,
+            "82": 9.69949,
+            "83": 9.31353,
+            "84": 9.41731,
+            "85": 9.61581,
+            "86": 9.07429,
+            "87": 9.59556,
+            "88": 9.75063,
+            "89": 9.60041,
+            "90": 9.82207,
+            "91": 9.33877,
+            "92": 9.35776,
+            "93": 9.0867,
+            "94": 8.8296,
+            "95": 9.52595,
+            "96": 9.52972,
+            "97": 9.30331,
+            "98": 9.67136,
+            "99": 8.89539,
+            "100": 9.40568
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1595.0,
+            "2": 1608.0,
+            "3": 1639.0,
+            "4": 1667.0,
+            "5": 1811.0,
+            "6": 1793.0,
+            "7": 1871.0,
+            "8": 1579.0,
+            "9": 1850.0,
+            "10": 1352.0,
+            "11": 1853.0,
+            "12": 1662.0,
+            "13": 1705.0,
+            "14": 1659.0,
+            "15": 1812.0,
+            "16": 1720.0,
+            "17": 1682.0,
+            "18": 1583.0,
+            "19": 1728.0,
+            "20": 1655.0,
+            "21": 1978.0,
+            "22": 1627.0,
+            "23": 1863.0,
+            "24": 1654.0,
+            "25": 1514.0,
+            "26": 1697.0,
+            "27": 1653.0,
+            "28": 1949.0,
+            "29": 1931.0,
+            "30": 1896.0,
+            "31": 1522.0,
+            "32": 1915.0,
+            "33": 2134.0,
+            "34": 1700.0,
+            "35": 1860.0,
+            "36": 1880.0,
+            "37": 2310.0,
+            "38": 2101.0,
+            "39": 2417.0,
+            "40": 2076.0,
+            "41": 2319.0,
+            "42": 2199.0,
+            "43": 1874.0,
+            "44": 2080.0,
+            "45": 1980.0,
+            "46": 2302.0,
+            "47": 2470.0,
+            "48": 2202.0,
+            "49": 2280.0,
+            "50": 2439.0,
+            "51": 2490.0,
+            "52": 2545.0,
+            "53": 2999.0,
+            "54": 2565.0,
+            "55": 2285.0,
+            "56": 2699.0,
+            "57": 2189.0,
+            "58": 2878.0,
+            "59": 2978.0,
+            "60": 2478.0,
+            "61": 2815.0,
+            "62": 2666.0,
+            "63": 2512.0,
+            "64": 2966.0,
+            "65": 2533.0,
+            "66": 2865.0,
+            "67": 2741.0,
+            "68": 2760.0,
+            "69": 2810.0,
+            "70": 3115.0,
+            "71": 2918.0,
+            "72": 2413.0,
+            "73": 2837.0,
+            "74": 1901.0,
+            "75": 2387.0,
+            "76": 2899.0,
+            "77": 3019.0,
+            "78": 3233.0,
+            "79": 3193.0,
+            "80": 3288.0,
+            "81": 3397.0,
+            "82": 3181.0,
+            "83": 2672.0,
+            "84": 3163.0,
+            "85": 3128.0,
+            "86": 2647.0,
+            "87": 3754.0,
+            "88": 3098.0,
+            "89": 3372.0,
+            "90": 2966.0,
+            "91": 2776.0,
+            "92": 2983.0,
+            "93": 2767.0,
+            "94": 3263.0,
+            "95": 3238.0,
+            "96": 3471.0,
+            "97": 3231.0,
+            "98": 3528.0,
+            "99": 3090.0,
+            "100": 3319.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 464552448.0,
+            "2": 464552448.0,
+            "3": 464552448.0,
+            "4": 464552448.0,
+            "5": 464552448.0,
+            "6": 464552448.0,
+            "7": 464552448.0,
+            "8": 464552448.0,
+            "9": 464552448.0,
+            "10": 464552448.0,
+            "11": 464552448.0,
+            "12": 464552448.0,
+            "13": 464552448.0,
+            "14": 464552448.0,
+            "15": 464552448.0,
+            "16": 464552448.0,
+            "17": 464552448.0,
+            "18": 464552448.0,
+            "19": 464552448.0,
+            "20": 464552448.0,
+            "21": 464552448.0,
+            "22": 464552448.0,
+            "23": 464552448.0,
+            "24": 464552448.0,
+            "25": 464552448.0,
+            "26": 464552448.0,
+            "27": 464552448.0,
+            "28": 464552448.0,
+            "29": 464552448.0,
+            "30": 464552448.0,
+            "31": 464552448.0,
+            "32": 464552448.0,
+            "33": 464552448.0,
+            "34": 464552448.0,
+            "35": 464552448.0,
+            "36": 464552448.0,
+            "37": 464552448.0,
+            "38": 464552448.0,
+            "39": 464552448.0,
+            "40": 464552448.0,
+            "41": 464552448.0,
+            "42": 464552448.0,
+            "43": 464552448.0,
+            "44": 464552448.0,
+            "45": 464552448.0,
+            "46": 464552448.0,
+            "47": 464552448.0,
+            "48": 464552448.0,
+            "49": 464552448.0,
+            "50": 464552448.0,
+            "51": 464552448.0,
+            "52": 464552448.0,
+            "53": 464552448.0,
+            "54": 464552448.0,
+            "55": 464552448.0,
+            "56": 464552448.0,
+            "57": 464552448.0,
+            "58": 464552448.0,
+            "59": 464552448.0,
+            "60": 464552448.0,
+            "61": 464552448.0,
+            "62": 464552448.0,
+            "63": 464552448.0,
+            "64": 464552448.0,
+            "65": 464552448.0,
+            "66": 464552448.0,
+            "67": 464552448.0,
+            "68": 464552448.0,
+            "69": 464552448.0,
+            "70": 464552448.0,
+            "71": 464552448.0,
+            "72": 464552448.0,
+            "73": 464552448.0,
+            "74": 464552448.0,
+            "75": 464552448.0,
+            "76": 464552448.0,
+            "77": 464552448.0,
+            "78": 464552448.0,
+            "79": 464552448.0,
+            "80": 464552448.0,
+            "81": 464552448.0,
+            "82": 464552448.0,
+            "83": 464552448.0,
+            "84": 464552448.0,
+            "85": 464552448.0,
+            "86": 464552448.0,
+            "87": 464552448.0,
+            "88": 464552448.0,
+            "89": 464552448.0,
+            "90": 464552448.0,
+            "91": 464552448.0,
+            "92": 464552448.0,
+            "93": 464552448.0,
+            "94": 464552448.0,
+            "95": 464552448.0,
+            "96": 464552448.0,
+            "97": 464552448.0,
+            "98": 464552448.0,
+            "99": 464552448.0,
+            "100": 464552448.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1196456448.0,
+            "2": 1374693888.0,
+            "3": 1374693888.0,
+            "4": 1374693888.0,
+            "5": 1374693888.0,
+            "6": 1374693888.0,
+            "7": 1374693888.0,
+            "8": 1374693888.0,
+            "9": 1374693888.0,
+            "10": 1374693888.0,
+            "11": 1374693888.0,
+            "12": 1374693888.0,
+            "13": 1374693888.0,
+            "14": 1374693888.0,
+            "15": 1374693888.0,
+            "16": 1374693888.0,
+            "17": 1374693888.0,
+            "18": 1374693888.0,
+            "19": 1374693888.0,
+            "20": 1374693888.0,
+            "21": 1374693888.0,
+            "22": 1374693888.0,
+            "23": 1374693888.0,
+            "24": 1374693888.0,
+            "25": 1374693888.0,
+            "26": 1374693888.0,
+            "27": 1374693888.0,
+            "28": 1374693888.0,
+            "29": 1374693888.0,
+            "30": 1374693888.0,
+            "31": 1374693888.0,
+            "32": 1374693888.0,
+            "33": 1374693888.0,
+            "34": 1374693888.0,
+            "35": 1374693888.0,
+            "36": 1374693888.0,
+            "37": 1374693888.0,
+            "38": 1374693888.0,
+            "39": 1374693888.0,
+            "40": 1374693888.0,
+            "41": 1374693888.0,
+            "42": 1374693888.0,
+            "43": 1374693888.0,
+            "44": 1374693888.0,
+            "45": 1374693888.0,
+            "46": 1374693888.0,
+            "47": 1374693888.0,
+            "48": 1374693888.0,
+            "49": 1374693888.0,
+            "50": 1374693888.0,
+            "51": 1374693888.0,
+            "52": 1374693888.0,
+            "53": 1374693888.0,
+            "54": 1374693888.0,
+            "55": 1374693888.0,
+            "56": 1374693888.0,
+            "57": 1374693888.0,
+            "58": 1374693888.0,
+            "59": 1374693888.0,
+            "60": 1374693888.0,
+            "61": 1374693888.0,
+            "62": 1374693888.0,
+            "63": 1374693888.0,
+            "64": 1374693888.0,
+            "65": 1374693888.0,
+            "66": 1374693888.0,
+            "67": 1374693888.0,
+            "68": 1374693888.0,
+            "69": 1374693888.0,
+            "70": 1374693888.0,
+            "71": 1374693888.0,
+            "72": 1374693888.0,
+            "73": 1374693888.0,
+            "74": 1374693888.0,
+            "75": 1374693888.0,
+            "76": 1374693888.0,
+            "77": 1374693888.0,
+            "78": 1374693888.0,
+            "79": 1374693888.0,
+            "80": 1374693888.0,
+            "81": 1374693888.0,
+            "82": 1374693888.0,
+            "83": 1374693888.0,
+            "84": 1374693888.0,
+            "85": 1374693888.0,
+            "86": 1374693888.0,
+            "87": 1374693888.0,
+            "88": 1374693888.0,
+            "89": 1374693888.0,
+            "90": 1374693888.0,
+            "91": 1374693888.0,
+            "92": 1374693888.0,
+            "93": 1374693888.0,
+            "94": 1374693888.0,
+            "95": 1374693888.0,
+            "96": 1374693888.0,
+            "97": 1374693888.0,
+            "98": 1374693888.0,
+            "99": 1374693888.0,
+            "100": 1374693888.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 9.03488,
+            "2": 0.20387,
+            "3": 0.20622,
+            "4": 0.19336,
+            "5": 0.19521,
+            "6": 0.20191,
+            "7": 0.19444,
+            "8": 0.23348,
+            "9": 0.25611,
+            "10": 0.24659,
+            "11": 0.19017,
+            "12": 0.2556,
+            "13": 0.18852,
+            "14": 0.18766,
+            "15": 0.2289,
+            "16": 0.18803,
+            "17": 0.18847,
+            "18": 0.18567,
+            "19": 0.18706,
+            "20": 0.18811,
+            "21": 0.20215,
+            "22": 0.39605,
+            "23": 0.18875,
+            "24": 0.21086,
+            "25": 0.18732,
+            "26": 0.18675,
+            "27": 0.18833,
+            "28": 0.23402,
+            "29": 0.18843,
+            "30": 0.18769,
+            "31": 0.21593,
+            "32": 0.21936,
+            "33": 0.18843,
+            "34": 0.21993,
+            "35": 0.18728,
+            "36": 0.18741,
+            "37": 0.18775,
+            "38": 0.22431,
+            "39": 0.24159,
+            "40": 0.25325,
+            "41": 0.18582,
+            "42": 0.18658,
+            "43": 0.24562,
+            "44": 0.30876,
+            "45": 0.22398,
+            "46": 0.18667,
+            "47": 0.18821,
+            "48": 0.18742,
+            "49": 0.20501,
+            "50": 0.18644,
+            "51": 0.19893,
+            "52": 0.18375,
+            "53": 0.18186,
+            "54": 0.18268,
+            "55": 0.18616,
+            "56": 0.32841,
+            "57": 0.18567,
+            "58": 0.41637,
+            "59": 0.25482,
+            "60": 0.18467,
+            "61": 0.21026,
+            "62": 0.18373,
+            "63": 0.20727,
+            "64": 0.44141,
+            "65": 0.18532,
+            "66": 0.18662,
+            "67": 0.18805,
+            "68": 0.1877,
+            "69": 0.18579,
+            "70": 0.18644,
+            "71": 0.20361,
+            "72": 0.25218,
+            "73": 0.18582,
+            "74": 0.21341,
+            "75": 0.1876,
+            "76": 0.18385,
+            "77": 0.18512,
+            "78": 0.18447,
+            "79": 0.18604,
+            "80": 0.44402,
+            "81": 0.22886,
+            "82": 0.18502,
+            "83": 0.18578,
+            "84": 0.18519,
+            "85": 0.18624,
+            "86": 0.18704,
+            "87": 0.18561,
+            "88": 0.1864,
+            "89": 0.18676,
+            "90": 0.18596,
+            "91": 0.18759,
+            "92": 0.18643,
+            "93": 0.2303,
+            "94": 0.18509,
+            "95": 0.18557,
+            "96": 0.22378,
+            "97": 0.18724,
+            "98": 0.18202,
+            "99": 0.19781,
+            "100": 0.22613
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/golden_values_dev_dgx_h100.json
index 80f6783f6f2..ab389cd452c 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/golden_values_dev_dgx_h100.json
@@ -218,106 +218,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 518291968.0,
-            "2": 518291968.0,
-            "3": 518291968.0,
-            "4": 518291968.0,
-            "5": 518291968.0,
-            "6": 518291968.0,
-            "7": 518291968.0,
-            "8": 518291968.0,
-            "9": 518291968.0,
-            "10": 518291968.0,
-            "11": 518291968.0,
-            "12": 518291968.0,
-            "13": 518291968.0,
-            "14": 518291968.0,
-            "15": 518291968.0,
-            "16": 518291968.0,
-            "17": 518291968.0,
-            "18": 518291968.0,
-            "19": 518291968.0,
-            "20": 518291968.0,
-            "21": 518291968.0,
-            "22": 518291968.0,
-            "23": 518291968.0,
-            "24": 518291968.0,
-            "25": 518291968.0,
-            "26": 518291968.0,
-            "27": 518291968.0,
-            "28": 518291968.0,
-            "29": 518291968.0,
-            "30": 518291968.0,
-            "31": 518291968.0,
-            "32": 518291968.0,
-            "33": 518291968.0,
-            "34": 518291968.0,
-            "35": 518291968.0,
-            "36": 518291968.0,
-            "37": 518291968.0,
-            "38": 518291968.0,
-            "39": 518291968.0,
-            "40": 518291968.0,
-            "41": 518291968.0,
-            "42": 518291968.0,
-            "43": 518291968.0,
-            "44": 518291968.0,
-            "45": 518291968.0,
-            "46": 518291968.0,
-            "47": 518291968.0,
-            "48": 518291968.0,
-            "49": 518291968.0,
-            "50": 518291968.0,
-            "51": 518291968.0,
-            "52": 518291968.0,
-            "53": 518291968.0,
-            "54": 518291968.0,
-            "55": 518291968.0,
-            "56": 518291968.0,
-            "57": 518291968.0,
-            "58": 518291968.0,
-            "59": 518291968.0,
-            "60": 518291968.0,
-            "61": 518291968.0,
-            "62": 518291968.0,
-            "63": 518291968.0,
-            "64": 518291968.0,
-            "65": 518291968.0,
-            "66": 518291968.0,
-            "67": 518291968.0,
-            "68": 518291968.0,
-            "69": 518291968.0,
-            "70": 518291968.0,
-            "71": 518291968.0,
-            "72": 518291968.0,
-            "73": 518291968.0,
-            "74": 518291968.0,
-            "75": 518291968.0,
-            "76": 518291968.0,
-            "77": 518291968.0,
-            "78": 518291968.0,
-            "79": 518291968.0,
-            "80": 518291968.0,
-            "81": 518291968.0,
-            "82": 518291968.0,
-            "83": 518291968.0,
-            "84": 518291968.0,
-            "85": 518291968.0,
-            "86": 518291968.0,
-            "87": 518291968.0,
-            "88": 518291968.0,
-            "89": 518291968.0,
-            "90": 518291968.0,
-            "91": 518291968.0,
-            "92": 518291968.0,
-            "93": 518291968.0,
-            "94": 518291968.0,
-            "95": 518291968.0,
-            "96": 518291968.0,
-            "97": 518291968.0,
-            "98": 518291968.0,
-            "99": 518291968.0,
-            "100": 518291968.0
+            "1": 516456960.0,
+            "2": 516456960.0,
+            "3": 516456960.0,
+            "4": 516456960.0,
+            "5": 516456960.0,
+            "6": 516456960.0,
+            "7": 516456960.0,
+            "8": 516456960.0,
+            "9": 516456960.0,
+            "10": 516456960.0,
+            "11": 516456960.0,
+            "12": 516456960.0,
+            "13": 516456960.0,
+            "14": 516456960.0,
+            "15": 516456960.0,
+            "16": 516456960.0,
+            "17": 516456960.0,
+            "18": 516456960.0,
+            "19": 516456960.0,
+            "20": 516456960.0,
+            "21": 516456960.0,
+            "22": 516456960.0,
+            "23": 516456960.0,
+            "24": 516456960.0,
+            "25": 516456960.0,
+            "26": 516456960.0,
+            "27": 516456960.0,
+            "28": 516456960.0,
+            "29": 516456960.0,
+            "30": 516456960.0,
+            "31": 516456960.0,
+            "32": 516456960.0,
+            "33": 516456960.0,
+            "34": 516456960.0,
+            "35": 516456960.0,
+            "36": 516456960.0,
+            "37": 516456960.0,
+            "38": 516456960.0,
+            "39": 516456960.0,
+            "40": 516456960.0,
+            "41": 516456960.0,
+            "42": 516456960.0,
+            "43": 516456960.0,
+            "44": 516456960.0,
+            "45": 516456960.0,
+            "46": 516456960.0,
+            "47": 516456960.0,
+            "48": 516456960.0,
+            "49": 516456960.0,
+            "50": 516456960.0,
+            "51": 516456960.0,
+            "52": 516456960.0,
+            "53": 516456960.0,
+            "54": 516456960.0,
+            "55": 516456960.0,
+            "56": 516456960.0,
+            "57": 516456960.0,
+            "58": 516456960.0,
+            "59": 516456960.0,
+            "60": 516456960.0,
+            "61": 516456960.0,
+            "62": 516456960.0,
+            "63": 516456960.0,
+            "64": 516456960.0,
+            "65": 516456960.0,
+            "66": 516456960.0,
+            "67": 516456960.0,
+            "68": 516456960.0,
+            "69": 516456960.0,
+            "70": 516456960.0,
+            "71": 516456960.0,
+            "72": 516456960.0,
+            "73": 516456960.0,
+            "74": 516456960.0,
+            "75": 516456960.0,
+            "76": 516456960.0,
+            "77": 516456960.0,
+            "78": 516456960.0,
+            "79": 516456960.0,
+            "80": 516456960.0,
+            "81": 516456960.0,
+            "82": 516456960.0,
+            "83": 516456960.0,
+            "84": 516456960.0,
+            "85": 516456960.0,
+            "86": 516456960.0,
+            "87": 516456960.0,
+            "88": 516456960.0,
+            "89": 516456960.0,
+            "90": 516456960.0,
+            "91": 516456960.0,
+            "92": 516456960.0,
+            "93": 516456960.0,
+            "94": 516456960.0,
+            "95": 516456960.0,
+            "96": 516456960.0,
+            "97": 516456960.0,
+            "98": 516456960.0,
+            "99": 516456960.0,
+            "100": 516456960.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -325,106 +325,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 1245476352.0,
-            "2": 1429481984.0,
-            "3": 1429481984.0,
-            "4": 1429481984.0,
-            "5": 1429481984.0,
-            "6": 1429481984.0,
-            "7": 1429481984.0,
-            "8": 1429481984.0,
-            "9": 1429481984.0,
-            "10": 1429481984.0,
-            "11": 1429481984.0,
-            "12": 1429481984.0,
-            "13": 1429481984.0,
-            "14": 1429481984.0,
-            "15": 1429481984.0,
-            "16": 1429481984.0,
-            "17": 1429481984.0,
-            "18": 1429481984.0,
-            "19": 1429481984.0,
-            "20": 1429481984.0,
-            "21": 1429481984.0,
-            "22": 1429481984.0,
-            "23": 1429481984.0,
-            "24": 1429481984.0,
-            "25": 1429481984.0,
-            "26": 1429481984.0,
-            "27": 1429481984.0,
-            "28": 1429481984.0,
-            "29": 1429481984.0,
-            "30": 1429481984.0,
-            "31": 1429481984.0,
-            "32": 1429481984.0,
-            "33": 1429481984.0,
-            "34": 1429481984.0,
-            "35": 1429481984.0,
-            "36": 1429481984.0,
-            "37": 1429481984.0,
-            "38": 1429481984.0,
-            "39": 1429481984.0,
-            "40": 1429481984.0,
-            "41": 1429481984.0,
-            "42": 1429481984.0,
-            "43": 1429481984.0,
-            "44": 1429481984.0,
-            "45": 1429481984.0,
-            "46": 1429481984.0,
-            "47": 1429481984.0,
-            "48": 1429481984.0,
-            "49": 1429481984.0,
-            "50": 1429481984.0,
-            "51": 1429481984.0,
-            "52": 1429481984.0,
-            "53": 1429481984.0,
-            "54": 1429481984.0,
-            "55": 1429481984.0,
-            "56": 1429481984.0,
-            "57": 1429481984.0,
-            "58": 1429481984.0,
-            "59": 1429481984.0,
-            "60": 1429481984.0,
-            "61": 1429481984.0,
-            "62": 1429481984.0,
-            "63": 1429481984.0,
-            "64": 1429481984.0,
-            "65": 1429481984.0,
-            "66": 1429481984.0,
-            "67": 1429481984.0,
-            "68": 1429481984.0,
-            "69": 1429481984.0,
-            "70": 1429481984.0,
-            "71": 1429481984.0,
-            "72": 1429481984.0,
-            "73": 1429481984.0,
-            "74": 1429481984.0,
-            "75": 1429481984.0,
-            "76": 1429481984.0,
-            "77": 1429481984.0,
-            "78": 1429481984.0,
-            "79": 1429481984.0,
-            "80": 1429481984.0,
-            "81": 1429481984.0,
-            "82": 1429481984.0,
-            "83": 1429481984.0,
-            "84": 1429481984.0,
-            "85": 1429481984.0,
-            "86": 1429481984.0,
-            "87": 1429481984.0,
-            "88": 1429481984.0,
-            "89": 1429481984.0,
-            "90": 1429481984.0,
-            "91": 1429481984.0,
-            "92": 1429481984.0,
-            "93": 1429481984.0,
-            "94": 1429481984.0,
-            "95": 1429481984.0,
-            "96": 1429481984.0,
-            "97": 1429481984.0,
-            "98": 1429481984.0,
-            "99": 1429481984.0,
-            "100": 1429481984.0
+            "1": 1246525952.0,
+            "2": 1426598400.0,
+            "3": 1426598400.0,
+            "4": 1426598400.0,
+            "5": 1426598400.0,
+            "6": 1426598400.0,
+            "7": 1426598400.0,
+            "8": 1426598400.0,
+            "9": 1426598400.0,
+            "10": 1426598400.0,
+            "11": 1426598400.0,
+            "12": 1426598400.0,
+            "13": 1426598400.0,
+            "14": 1426598400.0,
+            "15": 1426598400.0,
+            "16": 1426598400.0,
+            "17": 1426598400.0,
+            "18": 1426598400.0,
+            "19": 1426598400.0,
+            "20": 1426598400.0,
+            "21": 1426598400.0,
+            "22": 1426598400.0,
+            "23": 1426598400.0,
+            "24": 1426598400.0,
+            "25": 1426598400.0,
+            "26": 1426598400.0,
+            "27": 1426598400.0,
+            "28": 1426598400.0,
+            "29": 1426598400.0,
+            "30": 1426598400.0,
+            "31": 1426598400.0,
+            "32": 1426598400.0,
+            "33": 1426598400.0,
+            "34": 1426598400.0,
+            "35": 1426598400.0,
+            "36": 1426598400.0,
+            "37": 1426598400.0,
+            "38": 1426598400.0,
+            "39": 1426598400.0,
+            "40": 1426598400.0,
+            "41": 1426598400.0,
+            "42": 1426598400.0,
+            "43": 1426598400.0,
+            "44": 1426598400.0,
+            "45": 1426598400.0,
+            "46": 1426598400.0,
+            "47": 1426598400.0,
+            "48": 1426598400.0,
+            "49": 1426598400.0,
+            "50": 1426598400.0,
+            "51": 1426598400.0,
+            "52": 1426598400.0,
+            "53": 1426598400.0,
+            "54": 1426598400.0,
+            "55": 1426598400.0,
+            "56": 1426598400.0,
+            "57": 1426598400.0,
+            "58": 1426598400.0,
+            "59": 1426598400.0,
+            "60": 1426598400.0,
+            "61": 1426598400.0,
+            "62": 1426598400.0,
+            "63": 1426598400.0,
+            "64": 1426598400.0,
+            "65": 1426598400.0,
+            "66": 1426598400.0,
+            "67": 1426598400.0,
+            "68": 1426598400.0,
+            "69": 1426598400.0,
+            "70": 1426598400.0,
+            "71": 1426598400.0,
+            "72": 1426598400.0,
+            "73": 1426598400.0,
+            "74": 1426598400.0,
+            "75": 1426598400.0,
+            "76": 1426598400.0,
+            "77": 1426598400.0,
+            "78": 1426598400.0,
+            "79": 1426598400.0,
+            "80": 1426598400.0,
+            "81": 1426598400.0,
+            "82": 1426598400.0,
+            "83": 1426598400.0,
+            "84": 1426598400.0,
+            "85": 1426598400.0,
+            "86": 1426598400.0,
+            "87": 1426598400.0,
+            "88": 1426598400.0,
+            "89": 1426598400.0,
+            "90": 1426598400.0,
+            "91": 1426598400.0,
+            "92": 1426598400.0,
+            "93": 1426598400.0,
+            "94": 1426598400.0,
+            "95": 1426598400.0,
+            "96": 1426598400.0,
+            "97": 1426598400.0,
+            "98": 1426598400.0,
+            "99": 1426598400.0,
+            "100": 1426598400.0
         }
     },
     "iteration-time": {
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 12.65353,
-            "2": 0.15729,
-            "3": 0.13911,
-            "4": 0.14117,
-            "5": 0.14172,
-            "6": 0.14091,
-            "7": 0.14103,
-            "8": 0.14008,
-            "9": 0.14444,
-            "10": 0.14215,
-            "11": 0.143,
-            "12": 0.14395,
-            "13": 0.14101,
-            "14": 0.14112,
-            "15": 0.14126,
-            "16": 0.14286,
-            "17": 0.14201,
-            "18": 0.14405,
-            "19": 0.14472,
-            "20": 0.14424,
-            "21": 0.14746,
-            "22": 0.14732,
-            "23": 0.14871,
-            "24": 0.14885,
-            "25": 0.14732,
-            "26": 0.14775,
-            "27": 0.14978,
-            "28": 0.14685,
-            "29": 0.15004,
-            "30": 0.14663,
-            "31": 0.14925,
-            "32": 0.14679,
-            "33": 0.14465,
-            "34": 0.14701,
-            "35": 0.14556,
-            "36": 0.14835,
-            "37": 0.14562,
-            "38": 0.14971,
-            "39": 0.14881,
-            "40": 0.14688,
-            "41": 0.14373,
-            "42": 0.14577,
-            "43": 0.14595,
-            "44": 0.1465,
-            "45": 0.14283,
-            "46": 0.14194,
-            "47": 0.14334,
-            "48": 0.14235,
-            "49": 0.14347,
-            "50": 0.14228,
-            "51": 0.14946,
-            "52": 0.14427,
-            "53": 0.14469,
-            "54": 0.14466,
-            "55": 0.14197,
-            "56": 0.14396,
-            "57": 0.14283,
-            "58": 0.14383,
-            "59": 0.14201,
-            "60": 0.14448,
-            "61": 0.14593,
-            "62": 0.14316,
-            "63": 0.14235,
-            "64": 0.14447,
-            "65": 0.14383,
-            "66": 0.14456,
-            "67": 0.14508,
-            "68": 0.1452,
-            "69": 0.14518,
-            "70": 0.1449,
-            "71": 0.14576,
-            "72": 0.14328,
-            "73": 0.14352,
-            "74": 0.1504,
-            "75": 0.15058,
-            "76": 0.14825,
-            "77": 0.14229,
-            "78": 0.14494,
-            "79": 0.14518,
-            "80": 0.14464,
-            "81": 0.1461,
-            "82": 0.14482,
-            "83": 0.14487,
-            "84": 0.14272,
-            "85": 0.14154,
-            "86": 0.14252,
-            "87": 0.1447,
-            "88": 0.14327,
-            "89": 0.1441,
-            "90": 0.14688,
-            "91": 0.14346,
-            "92": 0.14427,
-            "93": 0.14222,
-            "94": 0.14464,
-            "95": 0.14507,
-            "96": 0.14196,
-            "97": 0.1438,
-            "98": 0.14103,
-            "99": 0.14644,
-            "100": 0.14474
+            "1": 8.55796,
+            "2": 0.16015,
+            "3": 0.14079,
+            "4": 0.11738,
+            "5": 0.12195,
+            "6": 0.12441,
+            "7": 0.1172,
+            "8": 0.11692,
+            "9": 0.11919,
+            "10": 0.12076,
+            "11": 0.12158,
+            "12": 0.12094,
+            "13": 0.11812,
+            "14": 0.11938,
+            "15": 0.1172,
+            "16": 0.11613,
+            "17": 0.11557,
+            "18": 0.11401,
+            "19": 0.11498,
+            "20": 0.11349,
+            "21": 0.11351,
+            "22": 0.11386,
+            "23": 0.11441,
+            "24": 0.11363,
+            "25": 0.1167,
+            "26": 0.1134,
+            "27": 0.11514,
+            "28": 0.12945,
+            "29": 0.12623,
+            "30": 0.11515,
+            "31": 0.11213,
+            "32": 0.11356,
+            "33": 0.11231,
+            "34": 0.11288,
+            "35": 0.11401,
+            "36": 0.11375,
+            "37": 0.1131,
+            "38": 0.11218,
+            "39": 0.11367,
+            "40": 0.11358,
+            "41": 0.11254,
+            "42": 0.11336,
+            "43": 0.11318,
+            "44": 0.11297,
+            "45": 0.11264,
+            "46": 0.11205,
+            "47": 0.11364,
+            "48": 0.11191,
+            "49": 0.11164,
+            "50": 0.11224,
+            "51": 0.12452,
+            "52": 0.11481,
+            "53": 0.11411,
+            "54": 0.11453,
+            "55": 0.11486,
+            "56": 0.1126,
+            "57": 0.11285,
+            "58": 0.11369,
+            "59": 0.11438,
+            "60": 0.11423,
+            "61": 0.11347,
+            "62": 0.1144,
+            "63": 0.11359,
+            "64": 0.11501,
+            "65": 0.11372,
+            "66": 0.11274,
+            "67": 0.11362,
+            "68": 0.11321,
+            "69": 0.11196,
+            "70": 0.11191,
+            "71": 0.11138,
+            "72": 0.11254,
+            "73": 0.11635,
+            "74": 0.11349,
+            "75": 0.11272,
+            "76": 0.1135,
+            "77": 0.11299,
+            "78": 0.11411,
+            "79": 0.11258,
+            "80": 0.113,
+            "81": 0.11306,
+            "82": 0.11448,
+            "83": 0.11412,
+            "84": 0.11261,
+            "85": 0.11298,
+            "86": 0.11478,
+            "87": 0.1143,
+            "88": 0.11208,
+            "89": 0.11453,
+            "90": 0.11257,
+            "91": 0.11387,
+            "92": 0.11269,
+            "93": 0.1133,
+            "94": 0.11392,
+            "95": 0.11421,
+            "96": 0.1138,
+            "97": 0.11394,
+            "98": 0.1141,
+            "99": 0.1139,
+            "100": 0.11305
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/golden_values_dev_dgx_h100_2nd.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/golden_values_dev_dgx_h100_2nd.json
new file mode 100644
index 00000000000..262e81423cd
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/golden_values_dev_dgx_h100_2nd.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 9.85583,
+            "52": 9.75242,
+            "53": 10.07589,
+            "54": 9.95688,
+            "55": 9.88208,
+            "56": 9.63141,
+            "57": 9.48651,
+            "58": 9.83118,
+            "59": 9.58905,
+            "60": 9.50651,
+            "61": 9.7037,
+            "62": 9.98291,
+            "63": 9.38315,
+            "64": 9.77906,
+            "65": 8.95179,
+            "66": 9.7016,
+            "67": 9.37206,
+            "68": 9.78852,
+            "69": 9.79859,
+            "70": 9.74746,
+            "71": 9.6191,
+            "72": 9.58502,
+            "73": 9.49725,
+            "74": 8.93933,
+            "75": 9.42706,
+            "76": 9.08024,
+            "77": 10.06571,
+            "78": 9.72896,
+            "79": 9.37772,
+            "80": 9.40999,
+            "81": 9.47983,
+            "82": 9.70184,
+            "83": 9.30625,
+            "84": 9.42095,
+            "85": 9.61378,
+            "86": 9.07656,
+            "87": 9.59458,
+            "88": 9.75068,
+            "89": 9.60243,
+            "90": 9.81901,
+            "91": 9.33899,
+            "92": 9.35717,
+            "93": 9.07883,
+            "94": 8.8351,
+            "95": 9.52171,
+            "96": 9.53008,
+            "97": 9.31309,
+            "98": 9.67785,
+            "99": 8.89061,
+            "100": 9.39726
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 2528.0,
+            "52": 2535.0,
+            "53": 2875.0,
+            "54": 2862.0,
+            "55": 2406.0,
+            "56": 2733.0,
+            "57": 2347.0,
+            "58": 2918.0,
+            "59": 2759.0,
+            "60": 2404.0,
+            "61": 3022.0,
+            "62": 2494.0,
+            "63": 2452.0,
+            "64": 2838.0,
+            "65": 2549.0,
+            "66": 3044.0,
+            "67": 2887.0,
+            "68": 2637.0,
+            "69": 2860.0,
+            "70": 3034.0,
+            "71": 2989.0,
+            "72": 2355.0,
+            "73": 3034.0,
+            "74": 1904.0,
+            "75": 2538.0,
+            "76": 3012.0,
+            "77": 3193.0,
+            "78": 2994.0,
+            "79": 3097.0,
+            "80": 3254.0,
+            "81": 3671.0,
+            "82": 3299.0,
+            "83": 2793.0,
+            "84": 3146.0,
+            "85": 3329.0,
+            "86": 2769.0,
+            "87": 3766.0,
+            "88": 3021.0,
+            "89": 3286.0,
+            "90": 3029.0,
+            "91": 2772.0,
+            "92": 2955.0,
+            "93": 2852.0,
+            "94": 3411.0,
+            "95": 3271.0,
+            "96": 3279.0,
+            "97": 3054.0,
+            "98": 3643.0,
+            "99": 3303.0,
+            "100": 3142.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 696530432.0,
+            "52": 696530432.0,
+            "53": 696530432.0,
+            "54": 696530432.0,
+            "55": 696530432.0,
+            "56": 696530432.0,
+            "57": 696530432.0,
+            "58": 696530432.0,
+            "59": 696530432.0,
+            "60": 696530432.0,
+            "61": 696530432.0,
+            "62": 696530432.0,
+            "63": 696530432.0,
+            "64": 696530432.0,
+            "65": 696530432.0,
+            "66": 696530432.0,
+            "67": 696530432.0,
+            "68": 696530432.0,
+            "69": 696530432.0,
+            "70": 696530432.0,
+            "71": 696530432.0,
+            "72": 696530432.0,
+            "73": 696530432.0,
+            "74": 696530432.0,
+            "75": 696530432.0,
+            "76": 696530432.0,
+            "77": 696530432.0,
+            "78": 696530432.0,
+            "79": 696530432.0,
+            "80": 696530432.0,
+            "81": 696530432.0,
+            "82": 696530432.0,
+            "83": 696530432.0,
+            "84": 696530432.0,
+            "85": 696530432.0,
+            "86": 696530432.0,
+            "87": 696530432.0,
+            "88": 696530432.0,
+            "89": 696530432.0,
+            "90": 696530432.0,
+            "91": 696530432.0,
+            "92": 696530432.0,
+            "93": 696530432.0,
+            "94": 696530432.0,
+            "95": 696530432.0,
+            "96": 696530432.0,
+            "97": 696530432.0,
+            "98": 696530432.0,
+            "99": 696530432.0,
+            "100": 696530432.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 1606670848.0,
+            "52": 1606671872.0,
+            "53": 1606671872.0,
+            "54": 1606671872.0,
+            "55": 1606671872.0,
+            "56": 1606671872.0,
+            "57": 1606671872.0,
+            "58": 1606671872.0,
+            "59": 1606671872.0,
+            "60": 1606671872.0,
+            "61": 1606671872.0,
+            "62": 1606671872.0,
+            "63": 1606671872.0,
+            "64": 1606671872.0,
+            "65": 1606671872.0,
+            "66": 1606671872.0,
+            "67": 1606671872.0,
+            "68": 1606671872.0,
+            "69": 1606671872.0,
+            "70": 1606671872.0,
+            "71": 1606671872.0,
+            "72": 1606671872.0,
+            "73": 1606671872.0,
+            "74": 1606671872.0,
+            "75": 1606671872.0,
+            "76": 1606671872.0,
+            "77": 1606671872.0,
+            "78": 1606671872.0,
+            "79": 1606671872.0,
+            "80": 1606671872.0,
+            "81": 1606671872.0,
+            "82": 1606671872.0,
+            "83": 1606671872.0,
+            "84": 1606671872.0,
+            "85": 1606671872.0,
+            "86": 1606671872.0,
+            "87": 1606671872.0,
+            "88": 1606671872.0,
+            "89": 1606671872.0,
+            "90": 1606671872.0,
+            "91": 1606671872.0,
+            "92": 1606671872.0,
+            "93": 1606671872.0,
+            "94": 1606671872.0,
+            "95": 1606671872.0,
+            "96": 1606671872.0,
+            "97": 1606671872.0,
+            "98": 1606671872.0,
+            "99": 1606671872.0,
+            "100": 1606671872.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 9.21698,
+            "52": 0.15014,
+            "53": 0.12142,
+            "54": 0.12079,
+            "55": 0.12087,
+            "56": 0.11996,
+            "57": 0.12048,
+            "58": 0.12044,
+            "59": 0.12,
+            "60": 0.12024,
+            "61": 0.11964,
+            "62": 0.1216,
+            "63": 0.12133,
+            "64": 0.12065,
+            "65": 0.11968,
+            "66": 0.12123,
+            "67": 0.11973,
+            "68": 0.11993,
+            "69": 0.12002,
+            "70": 0.12021,
+            "71": 0.11952,
+            "72": 0.12017,
+            "73": 0.1196,
+            "74": 0.11995,
+            "75": 0.12119,
+            "76": 0.12147,
+            "77": 0.12101,
+            "78": 0.12058,
+            "79": 0.12234,
+            "80": 0.12023,
+            "81": 0.12099,
+            "82": 0.12135,
+            "83": 0.11794,
+            "84": 0.11366,
+            "85": 0.11362,
+            "86": 0.11298,
+            "87": 0.11323,
+            "88": 0.11437,
+            "89": 0.11389,
+            "90": 0.11505,
+            "91": 0.11411,
+            "92": 0.11424,
+            "93": 0.11409,
+            "94": 0.11311,
+            "95": 0.11421,
+            "96": 0.11364,
+            "97": 0.11399,
+            "98": 0.11382,
+            "99": 0.1137,
+            "100": 0.11717
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/golden_values_lts_dgx_a100.json
index e88f2c340d5..3874b80ddea 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/golden_values_lts_dgx_a100.json
@@ -2,141 +2,536 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 10.92655,
-            "5": 10.92719,
-            "10": 10.90792,
-            "15": 10.88292,
-            "20": 10.77597,
-            "25": 10.59263,
-            "30": 10.39174,
-            "35": 10.29698,
-            "40": 10.09664,
-            "45": 9.8447,
-            "50": 9.90944,
-            "55": 9.8777,
-            "60": 9.49123,
-            "65": 8.94255,
-            "70": 9.72279,
-            "75": 9.4189,
-            "80": 9.40055,
-            "85": 9.61189,
-            "90": 9.81027,
-            "95": 9.51723,
+            "1": 10.92228,
+            "2": 10.92833,
+            "3": 10.91713,
+            "4": 10.90495,
+            "5": 10.92808,
+            "6": 10.93674,
+            "7": 10.90402,
+            "8": 10.92227,
+            "9": 10.91254,
+            "10": 10.9085,
+            "11": 10.89337,
+            "12": 10.92084,
+            "13": 10.91494,
+            "14": 10.92149,
+            "15": 10.88433,
+            "16": 10.87456,
+            "17": 10.83921,
+            "18": 10.87308,
+            "19": 10.85328,
+            "20": 10.77491,
+            "21": 10.74755,
+            "22": 10.63144,
+            "23": 10.75622,
+            "24": 10.65564,
+            "25": 10.59217,
+            "26": 10.65329,
+            "27": 10.64878,
+            "28": 10.59653,
+            "29": 10.61014,
+            "30": 10.39286,
+            "31": 10.15722,
+            "32": 10.49224,
+            "33": 10.47942,
+            "34": 10.24013,
+            "35": 10.29715,
+            "36": 10.24564,
+            "37": 10.35285,
+            "38": 10.20534,
+            "39": 10.40417,
+            "40": 10.09551,
+            "41": 10.15275,
+            "42": 10.21879,
+            "43": 9.85523,
+            "44": 9.96245,
+            "45": 9.84616,
+            "46": 9.83799,
+            "47": 10.13884,
+            "48": 9.85698,
+            "49": 9.5375,
+            "50": 9.90879,
+            "51": 9.84975,
+            "52": 9.74159,
+            "53": 10.06327,
+            "54": 9.9459,
+            "55": 9.87743,
+            "56": 9.62749,
+            "57": 9.47268,
+            "58": 9.82918,
+            "59": 9.58307,
+            "60": 9.49187,
+            "61": 9.69959,
+            "62": 9.98095,
+            "63": 9.37226,
+            "64": 9.77561,
+            "65": 8.94344,
+            "66": 9.69994,
+            "67": 9.3642,
+            "68": 9.78704,
+            "69": 9.78396,
+            "70": 9.72293,
+            "71": 9.60744,
+            "72": 9.58422,
+            "73": 9.49093,
+            "74": 8.94876,
+            "75": 9.41814,
+            "76": 9.08731,
+            "77": 10.06286,
+            "78": 9.72902,
+            "79": 9.37093,
+            "80": 9.40038,
+            "81": 9.47763,
+            "82": 9.69129,
+            "83": 9.30768,
+            "84": 9.41257,
+            "85": 9.61139,
+            "86": 9.07621,
+            "87": 9.59461,
+            "88": 9.74776,
+            "89": 9.60681,
+            "90": 9.81085,
+            "91": 9.34453,
+            "92": 9.36537,
+            "93": 9.07751,
+            "94": 8.82977,
+            "95": 9.5168,
+            "96": 9.52549,
+            "97": 9.31038,
+            "98": 9.67816,
+            "99": 8.8885,
             "100": 9.40135
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 1597.0,
-            "5": 1937.0,
-            "10": 1408.0,
-            "15": 1893.0,
-            "20": 1612.0,
-            "25": 1633.0,
-            "30": 1941.0,
-            "35": 2005.0,
-            "40": 2164.0,
-            "45": 2053.0,
-            "50": 2437.0,
-            "55": 2409.0,
-            "60": 2419.0,
-            "65": 2713.0,
-            "70": 3098.0,
-            "75": 2685.0,
-            "80": 3562.0,
-            "85": 3262.0,
-            "90": 3059.0,
-            "95": 3380.0,
-            "100": 3331.0
+            "1": 1686.0,
+            "2": 1781.0,
+            "3": 1710.0,
+            "4": 1721.0,
+            "5": 1915.0,
+            "6": 1840.0,
+            "7": 1923.0,
+            "8": 1740.0,
+            "9": 1904.0,
+            "10": 1501.0,
+            "11": 1902.0,
+            "12": 1815.0,
+            "13": 1919.0,
+            "14": 1911.0,
+            "15": 1953.0,
+            "16": 1875.0,
+            "17": 1835.0,
+            "18": 1725.0,
+            "19": 1755.0,
+            "20": 1680.0,
+            "21": 1823.0,
+            "22": 1751.0,
+            "23": 1966.0,
+            "24": 1652.0,
+            "25": 1619.0,
+            "26": 1847.0,
+            "27": 1890.0,
+            "28": 1990.0,
+            "29": 2013.0,
+            "30": 1924.0,
+            "31": 1602.0,
+            "32": 1911.0,
+            "33": 2246.0,
+            "34": 1989.0,
+            "35": 2000.0,
+            "36": 2116.0,
+            "37": 2402.0,
+            "38": 2298.0,
+            "39": 2567.0,
+            "40": 2163.0,
+            "41": 2333.0,
+            "42": 2300.0,
+            "43": 1996.0,
+            "44": 2153.0,
+            "45": 2130.0,
+            "46": 2301.0,
+            "47": 2552.0,
+            "48": 2428.0,
+            "49": 2290.0,
+            "50": 2566.0,
+            "51": 2688.0,
+            "52": 2651.0,
+            "53": 2961.0,
+            "54": 2714.0,
+            "55": 2381.0,
+            "56": 2747.0,
+            "57": 2435.0,
+            "58": 2979.0,
+            "59": 2834.0,
+            "60": 2440.0,
+            "61": 2844.0,
+            "62": 2761.0,
+            "63": 2449.0,
+            "64": 3041.0,
+            "65": 2711.0,
+            "66": 3212.0,
+            "67": 2724.0,
+            "68": 2866.0,
+            "69": 2992.0,
+            "70": 3273.0,
+            "71": 3119.0,
+            "72": 2480.0,
+            "73": 3140.0,
+            "74": 1959.0,
+            "75": 2732.0,
+            "76": 3088.0,
+            "77": 3496.0,
+            "78": 3193.0,
+            "79": 3370.0,
+            "80": 3523.0,
+            "81": 3655.0,
+            "82": 3409.0,
+            "83": 2797.0,
+            "84": 3476.0,
+            "85": 3443.0,
+            "86": 2736.0,
+            "87": 3762.0,
+            "88": 3082.0,
+            "89": 3460.0,
+            "90": 2999.0,
+            "91": 2667.0,
+            "92": 3190.0,
+            "93": 2704.0,
+            "94": 3348.0,
+            "95": 3464.0,
+            "96": 3616.0,
+            "97": 3124.0,
+            "98": 3688.0,
+            "99": 3176.0,
+            "100": 3301.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 435847168.0,
-            "5": 435847168.0,
-            "10": 436895744.0,
-            "15": 435847168.0,
-            "20": 435847168.0,
-            "25": 436895744.0,
-            "30": 436895744.0,
-            "35": 435847168.0,
-            "40": 435847168.0,
-            "45": 435847168.0,
-            "50": 435847168.0,
-            "55": 436895744.0,
-            "60": 436895744.0,
-            "65": 436895744.0,
-            "70": 435847168.0,
-            "75": 435847168.0,
-            "80": 436895744.0,
-            "85": 436895744.0,
-            "90": 436895744.0,
-            "95": 435847168.0,
-            "100": 436895744.0
+            "1": 436765184.0,
+            "2": 436765184.0,
+            "3": 436765184.0,
+            "4": 436765184.0,
+            "5": 436765184.0,
+            "6": 436765184.0,
+            "7": 436765184.0,
+            "8": 436765184.0,
+            "9": 436765184.0,
+            "10": 436765184.0,
+            "11": 436765184.0,
+            "12": 436765184.0,
+            "13": 436765184.0,
+            "14": 436765184.0,
+            "15": 436765184.0,
+            "16": 436765184.0,
+            "17": 436765184.0,
+            "18": 436765184.0,
+            "19": 436765184.0,
+            "20": 436765184.0,
+            "21": 436765184.0,
+            "22": 436765184.0,
+            "23": 436765184.0,
+            "24": 436765184.0,
+            "25": 436765184.0,
+            "26": 436765184.0,
+            "27": 436765184.0,
+            "28": 436765184.0,
+            "29": 436765184.0,
+            "30": 436765184.0,
+            "31": 436765184.0,
+            "32": 436765184.0,
+            "33": 436765184.0,
+            "34": 436765184.0,
+            "35": 436765184.0,
+            "36": 436765184.0,
+            "37": 436765184.0,
+            "38": 436765184.0,
+            "39": 436765184.0,
+            "40": 436765184.0,
+            "41": 436765184.0,
+            "42": 436765184.0,
+            "43": 436765184.0,
+            "44": 436765184.0,
+            "45": 436765184.0,
+            "46": 436765184.0,
+            "47": 436765184.0,
+            "48": 436765184.0,
+            "49": 436765184.0,
+            "50": 436765184.0,
+            "51": 436765184.0,
+            "52": 436765184.0,
+            "53": 436765184.0,
+            "54": 436765184.0,
+            "55": 436765184.0,
+            "56": 436765184.0,
+            "57": 436765184.0,
+            "58": 436765184.0,
+            "59": 436765184.0,
+            "60": 436765184.0,
+            "61": 436765184.0,
+            "62": 436765184.0,
+            "63": 436765184.0,
+            "64": 436765184.0,
+            "65": 436765184.0,
+            "66": 436765184.0,
+            "67": 436765184.0,
+            "68": 436765184.0,
+            "69": 436765184.0,
+            "70": 436765184.0,
+            "71": 436765184.0,
+            "72": 436765184.0,
+            "73": 436765184.0,
+            "74": 436765184.0,
+            "75": 436765184.0,
+            "76": 436765184.0,
+            "77": 436765184.0,
+            "78": 436765184.0,
+            "79": 436765184.0,
+            "80": 436765184.0,
+            "81": 436765184.0,
+            "82": 436765184.0,
+            "83": 436765184.0,
+            "84": 436765184.0,
+            "85": 436765184.0,
+            "86": 436765184.0,
+            "87": 436765184.0,
+            "88": 436765184.0,
+            "89": 436765184.0,
+            "90": 436765184.0,
+            "91": 436765184.0,
+            "92": 436765184.0,
+            "93": 436765184.0,
+            "94": 436765184.0,
+            "95": 436765184.0,
+            "96": 436765184.0,
+            "97": 436765184.0,
+            "98": 436765184.0,
+            "99": 436765184.0,
+            "100": 436765184.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 1179683840.0,
-            "5": 1359626240.0,
-            "10": 1359626240.0,
-            "15": 1359626240.0,
-            "20": 1359626240.0,
-            "25": 1359626240.0,
-            "30": 1359626240.0,
-            "35": 1359626240.0,
-            "40": 1359626240.0,
-            "45": 1359626240.0,
-            "50": 1359626240.0,
-            "55": 1359626240.0,
-            "60": 1359626240.0,
-            "65": 1359626240.0,
-            "70": 1359626240.0,
-            "75": 1359626240.0,
-            "80": 1359626240.0,
-            "85": 1359626240.0,
-            "90": 1359626240.0,
-            "95": 1359626240.0,
-            "100": 1359626240.0
+            "1": 1178629632.0,
+            "2": 1359489536.0,
+            "3": 1359489536.0,
+            "4": 1359489536.0,
+            "5": 1359489536.0,
+            "6": 1359489536.0,
+            "7": 1359489536.0,
+            "8": 1359489536.0,
+            "9": 1359489536.0,
+            "10": 1359489536.0,
+            "11": 1359489536.0,
+            "12": 1359489536.0,
+            "13": 1359489536.0,
+            "14": 1359489536.0,
+            "15": 1359489536.0,
+            "16": 1359489536.0,
+            "17": 1359489536.0,
+            "18": 1359489536.0,
+            "19": 1359489536.0,
+            "20": 1359489536.0,
+            "21": 1359489536.0,
+            "22": 1359489536.0,
+            "23": 1359489536.0,
+            "24": 1359489536.0,
+            "25": 1359489536.0,
+            "26": 1359489536.0,
+            "27": 1359489536.0,
+            "28": 1359489536.0,
+            "29": 1359489536.0,
+            "30": 1359489536.0,
+            "31": 1359489536.0,
+            "32": 1359489536.0,
+            "33": 1359489536.0,
+            "34": 1359489536.0,
+            "35": 1359489536.0,
+            "36": 1359489536.0,
+            "37": 1359489536.0,
+            "38": 1359489536.0,
+            "39": 1359489536.0,
+            "40": 1359489536.0,
+            "41": 1359489536.0,
+            "42": 1359489536.0,
+            "43": 1359489536.0,
+            "44": 1359489536.0,
+            "45": 1359489536.0,
+            "46": 1359489536.0,
+            "47": 1359489536.0,
+            "48": 1359489536.0,
+            "49": 1359489536.0,
+            "50": 1359489536.0,
+            "51": 1359489536.0,
+            "52": 1359489536.0,
+            "53": 1359489536.0,
+            "54": 1359489536.0,
+            "55": 1359489536.0,
+            "56": 1359489536.0,
+            "57": 1359489536.0,
+            "58": 1359489536.0,
+            "59": 1359489536.0,
+            "60": 1359489536.0,
+            "61": 1359489536.0,
+            "62": 1359489536.0,
+            "63": 1359489536.0,
+            "64": 1359489536.0,
+            "65": 1359489536.0,
+            "66": 1359489536.0,
+            "67": 1359489536.0,
+            "68": 1359489536.0,
+            "69": 1359489536.0,
+            "70": 1359489536.0,
+            "71": 1359489536.0,
+            "72": 1359489536.0,
+            "73": 1359489536.0,
+            "74": 1359489536.0,
+            "75": 1359489536.0,
+            "76": 1359489536.0,
+            "77": 1359489536.0,
+            "78": 1359489536.0,
+            "79": 1359489536.0,
+            "80": 1359489536.0,
+            "81": 1359489536.0,
+            "82": 1359489536.0,
+            "83": 1359489536.0,
+            "84": 1359489536.0,
+            "85": 1359489536.0,
+            "86": 1359489536.0,
+            "87": 1359489536.0,
+            "88": 1359489536.0,
+            "89": 1359489536.0,
+            "90": 1359489536.0,
+            "91": 1359489536.0,
+            "92": 1359489536.0,
+            "93": 1359489536.0,
+            "94": 1359489536.0,
+            "95": 1359489536.0,
+            "96": 1359489536.0,
+            "97": 1359489536.0,
+            "98": 1359489536.0,
+            "99": 1359489536.0,
+            "100": 1359489536.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 11.0451,
-            "5": 0.18574,
-            "10": 0.18706,
-            "15": 0.18796,
-            "20": 0.18918,
-            "25": 0.19125,
-            "30": 0.19342,
-            "35": 0.18767,
-            "40": 0.18791,
-            "45": 0.18872,
-            "50": 0.18792,
-            "55": 0.19099,
-            "60": 0.19807,
-            "65": 0.19727,
-            "70": 0.1971,
-            "75": 0.19083,
-            "80": 0.1891,
-            "85": 0.19438,
-            "90": 0.19306,
-            "95": 0.18999,
-            "100": 0.1938
+            "1": 4.17595,
+            "2": 0.21653,
+            "3": 0.20393,
+            "4": 0.1777,
+            "5": 0.17559,
+            "6": 0.17527,
+            "7": 0.17404,
+            "8": 0.17527,
+            "9": 0.17461,
+            "10": 0.17454,
+            "11": 0.17381,
+            "12": 0.17386,
+            "13": 0.174,
+            "14": 0.17411,
+            "15": 0.17381,
+            "16": 0.17541,
+            "17": 0.17524,
+            "18": 0.17473,
+            "19": 0.17526,
+            "20": 0.17472,
+            "21": 0.17459,
+            "22": 0.17459,
+            "23": 0.17482,
+            "24": 0.17424,
+            "25": 0.17389,
+            "26": 0.17466,
+            "27": 0.17418,
+            "28": 0.17458,
+            "29": 0.17404,
+            "30": 0.17516,
+            "31": 0.17358,
+            "32": 0.17747,
+            "33": 0.17373,
+            "34": 0.17438,
+            "35": 0.17497,
+            "36": 0.17566,
+            "37": 0.17619,
+            "38": 0.17653,
+            "39": 0.1758,
+            "40": 0.17382,
+            "41": 0.17487,
+            "42": 0.17435,
+            "43": 0.17455,
+            "44": 0.17454,
+            "45": 0.17399,
+            "46": 0.17424,
+            "47": 0.17456,
+            "48": 0.1738,
+            "49": 0.17414,
+            "50": 0.17386,
+            "51": 0.18789,
+            "52": 0.17663,
+            "53": 0.17792,
+            "54": 0.17728,
+            "55": 0.17626,
+            "56": 0.17729,
+            "57": 0.17786,
+            "58": 0.17863,
+            "59": 0.18049,
+            "60": 0.1845,
+            "61": 0.1781,
+            "62": 0.1787,
+            "63": 0.17855,
+            "64": 0.17717,
+            "65": 0.1776,
+            "66": 0.17832,
+            "67": 0.18005,
+            "68": 0.17716,
+            "69": 0.17733,
+            "70": 0.17706,
+            "71": 0.17683,
+            "72": 0.17613,
+            "73": 0.17725,
+            "74": 0.17735,
+            "75": 0.17807,
+            "76": 0.1806,
+            "77": 0.17886,
+            "78": 0.17653,
+            "79": 0.17801,
+            "80": 0.1774,
+            "81": 0.17784,
+            "82": 0.17692,
+            "83": 0.17721,
+            "84": 0.17851,
+            "85": 0.17973,
+            "86": 0.17641,
+            "87": 0.17796,
+            "88": 0.1791,
+            "89": 0.1778,
+            "90": 0.17818,
+            "91": 0.17974,
+            "92": 0.18142,
+            "93": 0.18143,
+            "94": 0.18024,
+            "95": 0.17737,
+            "96": 0.17757,
+            "97": 0.17906,
+            "98": 0.18024,
+            "99": 0.17614,
+            "100": 0.17615
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/golden_values_lts_dgx_a100_2nd.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/golden_values_lts_dgx_a100_2nd.json
new file mode 100644
index 00000000000..38fc27ca5d3
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/golden_values_lts_dgx_a100_2nd.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 9.84975,
+            "52": 9.74157,
+            "53": 10.06328,
+            "54": 9.94585,
+            "55": 9.87742,
+            "56": 9.6275,
+            "57": 9.47269,
+            "58": 9.82916,
+            "59": 9.58304,
+            "60": 9.49186,
+            "61": 9.69958,
+            "62": 9.98093,
+            "63": 9.37224,
+            "64": 9.77563,
+            "65": 8.94344,
+            "66": 9.69995,
+            "67": 9.36421,
+            "68": 9.78707,
+            "69": 9.78397,
+            "70": 9.72291,
+            "71": 9.60744,
+            "72": 9.58421,
+            "73": 9.49098,
+            "74": 8.94877,
+            "75": 9.41814,
+            "76": 9.08732,
+            "77": 10.06287,
+            "78": 9.72903,
+            "79": 9.37093,
+            "80": 9.40035,
+            "81": 9.47763,
+            "82": 9.69127,
+            "83": 9.3077,
+            "84": 9.41261,
+            "85": 9.61135,
+            "86": 9.07622,
+            "87": 9.5946,
+            "88": 9.74773,
+            "89": 9.60683,
+            "90": 9.81083,
+            "91": 9.34451,
+            "92": 9.36535,
+            "93": 9.07752,
+            "94": 8.82979,
+            "95": 9.51678,
+            "96": 9.52548,
+            "97": 9.3104,
+            "98": 9.67816,
+            "99": 8.88853,
+            "100": 9.40134
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 2611.0,
+            "52": 2567.0,
+            "53": 2899.0,
+            "54": 2696.0,
+            "55": 2388.0,
+            "56": 2904.0,
+            "57": 2463.0,
+            "58": 3004.0,
+            "59": 2743.0,
+            "60": 2481.0,
+            "61": 2882.0,
+            "62": 2640.0,
+            "63": 2329.0,
+            "64": 3053.0,
+            "65": 2698.0,
+            "66": 3171.0,
+            "67": 2762.0,
+            "68": 2852.0,
+            "69": 2993.0,
+            "70": 3111.0,
+            "71": 3118.0,
+            "72": 2477.0,
+            "73": 3073.0,
+            "74": 1987.0,
+            "75": 2626.0,
+            "76": 2906.0,
+            "77": 3416.0,
+            "78": 3291.0,
+            "79": 3330.0,
+            "80": 3538.0,
+            "81": 3684.0,
+            "82": 3450.0,
+            "83": 2796.0,
+            "84": 3313.0,
+            "85": 3417.0,
+            "86": 2750.0,
+            "87": 3783.0,
+            "88": 3067.0,
+            "89": 3523.0,
+            "90": 3036.0,
+            "91": 2662.0,
+            "92": 3172.0,
+            "93": 2638.0,
+            "94": 3365.0,
+            "95": 3463.0,
+            "96": 3698.0,
+            "97": 3041.0,
+            "98": 3808.0,
+            "99": 3231.0,
+            "100": 3373.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 437551616.0,
+            "52": 437551616.0,
+            "53": 437551616.0,
+            "54": 437551616.0,
+            "55": 437551616.0,
+            "56": 437551616.0,
+            "57": 437551616.0,
+            "58": 437551616.0,
+            "59": 437551616.0,
+            "60": 437551616.0,
+            "61": 437551616.0,
+            "62": 437551616.0,
+            "63": 437551616.0,
+            "64": 437551616.0,
+            "65": 437551616.0,
+            "66": 437551616.0,
+            "67": 437551616.0,
+            "68": 437551616.0,
+            "69": 437551616.0,
+            "70": 437551616.0,
+            "71": 437551616.0,
+            "72": 437551616.0,
+            "73": 437551616.0,
+            "74": 437551616.0,
+            "75": 437551616.0,
+            "76": 437551616.0,
+            "77": 437551616.0,
+            "78": 437551616.0,
+            "79": 437551616.0,
+            "80": 437551616.0,
+            "81": 437551616.0,
+            "82": 437551616.0,
+            "83": 437551616.0,
+            "84": 437551616.0,
+            "85": 437551616.0,
+            "86": 437551616.0,
+            "87": 437551616.0,
+            "88": 437551616.0,
+            "89": 437551616.0,
+            "90": 437551616.0,
+            "91": 437551616.0,
+            "92": 437551616.0,
+            "93": 437551616.0,
+            "94": 437551616.0,
+            "95": 437551616.0,
+            "96": 437551616.0,
+            "97": 437551616.0,
+            "98": 437551616.0,
+            "99": 437551616.0,
+            "100": 437551616.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 1361323520.0,
+            "52": 1361324544.0,
+            "53": 1361324544.0,
+            "54": 1361324544.0,
+            "55": 1361324544.0,
+            "56": 1361324544.0,
+            "57": 1361324544.0,
+            "58": 1361324544.0,
+            "59": 1361324544.0,
+            "60": 1361324544.0,
+            "61": 1361324544.0,
+            "62": 1361324544.0,
+            "63": 1361324544.0,
+            "64": 1361324544.0,
+            "65": 1361324544.0,
+            "66": 1361324544.0,
+            "67": 1361324544.0,
+            "68": 1361324544.0,
+            "69": 1361324544.0,
+            "70": 1361324544.0,
+            "71": 1361324544.0,
+            "72": 1361324544.0,
+            "73": 1361324544.0,
+            "74": 1361324544.0,
+            "75": 1361324544.0,
+            "76": 1361324544.0,
+            "77": 1361324544.0,
+            "78": 1361324544.0,
+            "79": 1361324544.0,
+            "80": 1361324544.0,
+            "81": 1361324544.0,
+            "82": 1361324544.0,
+            "83": 1361324544.0,
+            "84": 1361324544.0,
+            "85": 1361324544.0,
+            "86": 1361324544.0,
+            "87": 1361324544.0,
+            "88": 1361324544.0,
+            "89": 1361324544.0,
+            "90": 1361324544.0,
+            "91": 1361324544.0,
+            "92": 1361324544.0,
+            "93": 1361324544.0,
+            "94": 1361324544.0,
+            "95": 1361324544.0,
+            "96": 1361324544.0,
+            "97": 1361324544.0,
+            "98": 1361324544.0,
+            "99": 1361324544.0,
+            "100": 1361324544.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 3.70609,
+            "52": 0.21752,
+            "53": 0.18577,
+            "54": 0.18466,
+            "55": 0.18165,
+            "56": 0.18049,
+            "57": 0.18614,
+            "58": 0.18682,
+            "59": 0.18039,
+            "60": 0.18204,
+            "61": 0.18258,
+            "62": 0.18091,
+            "63": 0.18358,
+            "64": 0.18229,
+            "65": 0.18033,
+            "66": 0.17977,
+            "67": 0.17991,
+            "68": 0.18063,
+            "69": 0.17985,
+            "70": 0.1801,
+            "71": 0.17962,
+            "72": 0.17965,
+            "73": 0.18018,
+            "74": 0.17894,
+            "75": 0.17969,
+            "76": 0.17978,
+            "77": 0.18125,
+            "78": 0.18038,
+            "79": 0.18003,
+            "80": 0.18018,
+            "81": 0.17963,
+            "82": 0.18021,
+            "83": 0.17905,
+            "84": 0.1801,
+            "85": 0.1801,
+            "86": 0.18063,
+            "87": 0.18031,
+            "88": 0.17967,
+            "89": 0.18064,
+            "90": 0.17981,
+            "91": 0.18039,
+            "92": 0.18318,
+            "93": 0.18018,
+            "94": 0.18097,
+            "95": 0.18141,
+            "96": 0.17593,
+            "97": 0.17726,
+            "98": 0.17621,
+            "99": 0.17602,
+            "100": 0.17627
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor/golden_values_dev_dgx_gb200.json
new file mode 100644
index 00000000000..b3990651f36
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor/golden_values_dev_dgx_gb200.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.87163,
+            "2": 10.87238,
+            "3": 10.86215,
+            "4": 10.84334,
+            "5": 10.8781,
+            "6": 10.8937,
+            "7": 10.87187,
+            "8": 10.87789,
+            "9": 10.86815,
+            "10": 10.83758,
+            "11": 10.87595,
+            "12": 10.87605,
+            "13": 10.89163,
+            "14": 10.89707,
+            "15": 10.83373,
+            "16": 10.82462,
+            "17": 10.80227,
+            "18": 10.82965,
+            "19": 10.82299,
+            "20": 10.73839,
+            "21": 10.70969,
+            "22": 10.5649,
+            "23": 10.73038,
+            "24": 10.6062,
+            "25": 10.55515,
+            "26": 10.62333,
+            "27": 10.61393,
+            "28": 10.57726,
+            "29": 10.60204,
+            "30": 10.38732,
+            "31": 10.12791,
+            "32": 10.4758,
+            "33": 10.47238,
+            "34": 10.22665,
+            "35": 10.28584,
+            "36": 10.23138,
+            "37": 10.35035,
+            "38": 10.19674,
+            "39": 10.40798,
+            "40": 10.09496,
+            "41": 10.13593,
+            "42": 10.21728,
+            "43": 9.84575,
+            "44": 9.94965,
+            "45": 9.83809,
+            "46": 9.821,
+            "47": 10.13316,
+            "48": 9.85047,
+            "49": 9.53,
+            "50": 9.90689,
+            "51": 9.85498,
+            "52": 9.74731,
+            "53": 10.06267,
+            "54": 9.95301,
+            "55": 9.88728,
+            "56": 9.6211,
+            "57": 9.47571,
+            "58": 9.83152,
+            "59": 9.58168,
+            "60": 9.49439,
+            "61": 9.68902,
+            "62": 9.9857,
+            "63": 9.37411,
+            "64": 9.7651,
+            "65": 8.94171,
+            "66": 9.69872,
+            "67": 9.36899,
+            "68": 9.78075,
+            "69": 9.79729,
+            "70": 9.72884,
+            "71": 9.62546,
+            "72": 9.58193,
+            "73": 9.48195,
+            "74": 8.92206,
+            "75": 9.4096,
+            "76": 9.07711,
+            "77": 10.05905,
+            "78": 9.7196,
+            "79": 9.37915,
+            "80": 9.39953,
+            "81": 9.4826,
+            "82": 9.70045,
+            "83": 9.31347,
+            "84": 9.41605,
+            "85": 9.61616,
+            "86": 9.07519,
+            "87": 9.59811,
+            "88": 9.75175,
+            "89": 9.60152,
+            "90": 9.82639,
+            "91": 9.33477,
+            "92": 9.3587,
+            "93": 9.08591,
+            "94": 8.82888,
+            "95": 9.52816,
+            "96": 9.52866,
+            "97": 9.30468,
+            "98": 9.67128,
+            "99": 8.89752,
+            "100": 9.40653
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1731.0,
+            "2": 1804.0,
+            "3": 1704.0,
+            "4": 1768.0,
+            "5": 2006.0,
+            "6": 1918.0,
+            "7": 1815.0,
+            "8": 1654.0,
+            "9": 1919.0,
+            "10": 1481.0,
+            "11": 1876.0,
+            "12": 1795.0,
+            "13": 1915.0,
+            "14": 1830.0,
+            "15": 2029.0,
+            "16": 1948.0,
+            "17": 1838.0,
+            "18": 1747.0,
+            "19": 1789.0,
+            "20": 1771.0,
+            "21": 1876.0,
+            "22": 1854.0,
+            "23": 2069.0,
+            "24": 1684.0,
+            "25": 1732.0,
+            "26": 1803.0,
+            "27": 1919.0,
+            "28": 2095.0,
+            "29": 2041.0,
+            "30": 1919.0,
+            "31": 1704.0,
+            "32": 1869.0,
+            "33": 2184.0,
+            "34": 1846.0,
+            "35": 1923.0,
+            "36": 2071.0,
+            "37": 2407.0,
+            "38": 2209.0,
+            "39": 2462.0,
+            "40": 2275.0,
+            "41": 2369.0,
+            "42": 2305.0,
+            "43": 2048.0,
+            "44": 2171.0,
+            "45": 2119.0,
+            "46": 2287.0,
+            "47": 2499.0,
+            "48": 2361.0,
+            "49": 2398.0,
+            "50": 2321.0,
+            "51": 2604.0,
+            "52": 2579.0,
+            "53": 3020.0,
+            "54": 2705.0,
+            "55": 2369.0,
+            "56": 2752.0,
+            "57": 2351.0,
+            "58": 2902.0,
+            "59": 2786.0,
+            "60": 2511.0,
+            "61": 2861.0,
+            "62": 2715.0,
+            "63": 2476.0,
+            "64": 2944.0,
+            "65": 2791.0,
+            "66": 3095.0,
+            "67": 2945.0,
+            "68": 2853.0,
+            "69": 2919.0,
+            "70": 3113.0,
+            "71": 2898.0,
+            "72": 2554.0,
+            "73": 3029.0,
+            "74": 2044.0,
+            "75": 2601.0,
+            "76": 2957.0,
+            "77": 3204.0,
+            "78": 3197.0,
+            "79": 3123.0,
+            "80": 3255.0,
+            "81": 3582.0,
+            "82": 3338.0,
+            "83": 2799.0,
+            "84": 3225.0,
+            "85": 3372.0,
+            "86": 2818.0,
+            "87": 3881.0,
+            "88": 3040.0,
+            "89": 3335.0,
+            "90": 3256.0,
+            "91": 2903.0,
+            "92": 3202.0,
+            "93": 2806.0,
+            "94": 3422.0,
+            "95": 3348.0,
+            "96": 3594.0,
+            "97": 3290.0,
+            "98": 3746.0,
+            "99": 3085.0,
+            "100": 3366.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 700103168.0,
+            "2": 700103168.0,
+            "3": 700103168.0,
+            "4": 700103168.0,
+            "5": 700103168.0,
+            "6": 700103168.0,
+            "7": 700103168.0,
+            "8": 700103168.0,
+            "9": 700103168.0,
+            "10": 700103168.0,
+            "11": 700103168.0,
+            "12": 700103168.0,
+            "13": 700103168.0,
+            "14": 700103168.0,
+            "15": 700103168.0,
+            "16": 700103168.0,
+            "17": 700103168.0,
+            "18": 700103168.0,
+            "19": 700103168.0,
+            "20": 700103168.0,
+            "21": 700103168.0,
+            "22": 700103168.0,
+            "23": 700103168.0,
+            "24": 700103168.0,
+            "25": 700103168.0,
+            "26": 700103168.0,
+            "27": 700103168.0,
+            "28": 700103168.0,
+            "29": 700103168.0,
+            "30": 700103168.0,
+            "31": 700103168.0,
+            "32": 700103168.0,
+            "33": 700103168.0,
+            "34": 700103168.0,
+            "35": 700103168.0,
+            "36": 700103168.0,
+            "37": 700103168.0,
+            "38": 700103168.0,
+            "39": 700103168.0,
+            "40": 700103168.0,
+            "41": 700103168.0,
+            "42": 700103168.0,
+            "43": 700103168.0,
+            "44": 700103168.0,
+            "45": 700103168.0,
+            "46": 700103168.0,
+            "47": 700103168.0,
+            "48": 700103168.0,
+            "49": 700103168.0,
+            "50": 700103168.0,
+            "51": 700103168.0,
+            "52": 700103168.0,
+            "53": 700103168.0,
+            "54": 700103168.0,
+            "55": 700103168.0,
+            "56": 700103168.0,
+            "57": 700103168.0,
+            "58": 700103168.0,
+            "59": 700103168.0,
+            "60": 700103168.0,
+            "61": 700103168.0,
+            "62": 700103168.0,
+            "63": 700103168.0,
+            "64": 700103168.0,
+            "65": 700103168.0,
+            "66": 700103168.0,
+            "67": 700103168.0,
+            "68": 700103168.0,
+            "69": 700103168.0,
+            "70": 700103168.0,
+            "71": 700103168.0,
+            "72": 700103168.0,
+            "73": 700103168.0,
+            "74": 700103168.0,
+            "75": 700103168.0,
+            "76": 700103168.0,
+            "77": 700103168.0,
+            "78": 700103168.0,
+            "79": 700103168.0,
+            "80": 700103168.0,
+            "81": 700103168.0,
+            "82": 700103168.0,
+            "83": 700103168.0,
+            "84": 700103168.0,
+            "85": 700103168.0,
+            "86": 700103168.0,
+            "87": 700103168.0,
+            "88": 700103168.0,
+            "89": 700103168.0,
+            "90": 700103168.0,
+            "91": 700103168.0,
+            "92": 700103168.0,
+            "93": 700103168.0,
+            "94": 700103168.0,
+            "95": 700103168.0,
+            "96": 700103168.0,
+            "97": 700103168.0,
+            "98": 700103168.0,
+            "99": 700103168.0,
+            "100": 700103168.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1981478400.0,
+            "2": 1981479424.0,
+            "3": 1981479424.0,
+            "4": 1981479424.0,
+            "5": 1981479424.0,
+            "6": 1981479424.0,
+            "7": 1981479424.0,
+            "8": 1981479424.0,
+            "9": 1981479424.0,
+            "10": 1981479424.0,
+            "11": 1981479424.0,
+            "12": 1981479424.0,
+            "13": 1981479424.0,
+            "14": 1981479424.0,
+            "15": 1981479424.0,
+            "16": 1981479424.0,
+            "17": 1981479424.0,
+            "18": 1981479424.0,
+            "19": 1981479424.0,
+            "20": 1981479424.0,
+            "21": 1981479424.0,
+            "22": 1981479424.0,
+            "23": 1981479424.0,
+            "24": 1981479424.0,
+            "25": 1981479424.0,
+            "26": 1981479424.0,
+            "27": 1981479424.0,
+            "28": 1981479424.0,
+            "29": 1981479424.0,
+            "30": 1981479424.0,
+            "31": 1981479424.0,
+            "32": 1981479424.0,
+            "33": 1981479424.0,
+            "34": 1981479424.0,
+            "35": 1981479424.0,
+            "36": 1981479424.0,
+            "37": 1981479424.0,
+            "38": 1981479424.0,
+            "39": 1981479424.0,
+            "40": 1981479424.0,
+            "41": 1981479424.0,
+            "42": 1981479424.0,
+            "43": 1981479424.0,
+            "44": 1981479424.0,
+            "45": 1981479424.0,
+            "46": 1981479424.0,
+            "47": 1981479424.0,
+            "48": 1981479424.0,
+            "49": 1981479424.0,
+            "50": 1981479424.0,
+            "51": 1981479424.0,
+            "52": 1981479424.0,
+            "53": 1981479424.0,
+            "54": 1981479424.0,
+            "55": 1981479424.0,
+            "56": 1981479424.0,
+            "57": 1981479424.0,
+            "58": 1981479424.0,
+            "59": 1981479424.0,
+            "60": 1981479424.0,
+            "61": 1981479424.0,
+            "62": 1981479424.0,
+            "63": 1981479424.0,
+            "64": 1981479424.0,
+            "65": 1981479424.0,
+            "66": 1981479424.0,
+            "67": 1981479424.0,
+            "68": 1981479424.0,
+            "69": 1981479424.0,
+            "70": 1981479424.0,
+            "71": 1981479424.0,
+            "72": 1981479424.0,
+            "73": 1981479424.0,
+            "74": 1981479424.0,
+            "75": 1981479424.0,
+            "76": 1981479424.0,
+            "77": 1981479424.0,
+            "78": 1981479424.0,
+            "79": 1981479424.0,
+            "80": 1981479424.0,
+            "81": 1981479424.0,
+            "82": 1981479424.0,
+            "83": 1981479424.0,
+            "84": 1981479424.0,
+            "85": 1981479424.0,
+            "86": 1981479424.0,
+            "87": 1981479424.0,
+            "88": 1981479424.0,
+            "89": 1981479424.0,
+            "90": 1981479424.0,
+            "91": 1981479424.0,
+            "92": 1981479424.0,
+            "93": 1981479424.0,
+            "94": 1981479424.0,
+            "95": 1981479424.0,
+            "96": 1981479424.0,
+            "97": 1981479424.0,
+            "98": 1981479424.0,
+            "99": 1981479424.0,
+            "100": 1981479424.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 4.30733,
+            "2": 0.54883,
+            "3": 0.40227,
+            "4": 0.3032,
+            "5": 0.22011,
+            "6": 0.21873,
+            "7": 0.21589,
+            "8": 0.21756,
+            "9": 0.2177,
+            "10": 0.21872,
+            "11": 0.23383,
+            "12": 0.21745,
+            "13": 0.21657,
+            "14": 0.21656,
+            "15": 0.21713,
+            "16": 0.21742,
+            "17": 0.21697,
+            "18": 0.21201,
+            "19": 0.21506,
+            "20": 0.2157,
+            "21": 0.21772,
+            "22": 0.21677,
+            "23": 0.21503,
+            "24": 0.21505,
+            "25": 0.21274,
+            "26": 0.21593,
+            "27": 0.21499,
+            "28": 0.21603,
+            "29": 0.21474,
+            "30": 0.21468,
+            "31": 0.21508,
+            "32": 0.21333,
+            "33": 0.21573,
+            "34": 0.21478,
+            "35": 0.21464,
+            "36": 0.21568,
+            "37": 0.21601,
+            "38": 0.21414,
+            "39": 0.21389,
+            "40": 0.21264,
+            "41": 0.21397,
+            "42": 0.21475,
+            "43": 0.21799,
+            "44": 0.21345,
+            "45": 0.21458,
+            "46": 0.21222,
+            "47": 0.2147,
+            "48": 0.21568,
+            "49": 0.21432,
+            "50": 0.21429,
+            "51": 0.30696,
+            "52": 0.26677,
+            "53": 0.22953,
+            "54": 0.24163,
+            "55": 0.25403,
+            "56": 0.26249,
+            "57": 0.21297,
+            "58": 0.21192,
+            "59": 0.20898,
+            "60": 0.21257,
+            "61": 0.21307,
+            "62": 0.21067,
+            "63": 0.21212,
+            "64": 0.21044,
+            "65": 0.21146,
+            "66": 0.21291,
+            "67": 0.21327,
+            "68": 0.21434,
+            "69": 0.21106,
+            "70": 0.21146,
+            "71": 0.21366,
+            "72": 0.21359,
+            "73": 0.21245,
+            "74": 0.21111,
+            "75": 0.21327,
+            "76": 0.21236,
+            "77": 0.21209,
+            "78": 0.21155,
+            "79": 0.2124,
+            "80": 0.21314,
+            "81": 0.21341,
+            "82": 0.21206,
+            "83": 0.21321,
+            "84": 0.21124,
+            "85": 0.21448,
+            "86": 0.21358,
+            "87": 0.21637,
+            "88": 0.21209,
+            "89": 0.21325,
+            "90": 0.2136,
+            "91": 0.21349,
+            "92": 0.20976,
+            "93": 0.21241,
+            "94": 0.21301,
+            "95": 0.21086,
+            "96": 0.21278,
+            "97": 0.21118,
+            "98": 0.21308,
+            "99": 0.21572,
+            "100": 0.21585
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor/golden_values_dev_dgx_h100.json
index dbfceceac77..feb49a01aad 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor/golden_values_dev_dgx_h100.json
@@ -4,106 +4,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 10.84466,
-            "2": 10.84794,
-            "3": 10.84925,
-            "4": 10.84332,
-            "5": 10.88244,
-            "6": 10.88079,
-            "7": 10.86575,
-            "8": 10.85546,
-            "9": 10.85543,
-            "10": 10.81818,
-            "11": 10.88769,
-            "12": 10.8634,
-            "13": 10.86681,
-            "14": 10.88414,
-            "15": 10.82464,
-            "16": 10.82854,
-            "17": 10.79491,
-            "18": 10.81492,
-            "19": 10.80133,
-            "20": 10.7181,
-            "21": 10.69905,
-            "22": 10.56744,
-            "23": 10.717,
-            "24": 10.60443,
-            "25": 10.55007,
-            "26": 10.60907,
-            "27": 10.62028,
-            "28": 10.5752,
-            "29": 10.59624,
-            "30": 10.38327,
-            "31": 10.1537,
-            "32": 10.48026,
-            "33": 10.47378,
-            "34": 10.2366,
-            "35": 10.28843,
-            "36": 10.24838,
-            "37": 10.35354,
-            "38": 10.20794,
-            "39": 10.41884,
-            "40": 10.1122,
-            "41": 10.16092,
-            "42": 10.23301,
-            "43": 9.86118,
-            "44": 9.97698,
-            "45": 9.86493,
-            "46": 9.84883,
-            "47": 10.16617,
-            "48": 9.87132,
-            "49": 9.56691,
-            "50": 9.92114,
-            "51": 9.86695,
-            "52": 9.76956,
-            "53": 10.07809,
-            "54": 9.97027,
-            "55": 9.89683,
-            "56": 9.64394,
-            "57": 9.49728,
-            "58": 9.84867,
-            "59": 9.59977,
-            "60": 9.50631,
-            "61": 9.71011,
-            "62": 9.99101,
-            "63": 9.38968,
-            "64": 9.78595,
-            "65": 8.95983,
-            "66": 9.70876,
-            "67": 9.37892,
-            "68": 9.79599,
-            "69": 9.80666,
-            "70": 9.74795,
-            "71": 9.61779,
-            "72": 9.59127,
-            "73": 9.50398,
-            "74": 8.94624,
-            "75": 9.42942,
-            "76": 9.08423,
-            "77": 10.06698,
-            "78": 9.73256,
-            "79": 9.38117,
-            "80": 9.41061,
-            "81": 9.48289,
-            "82": 9.70492,
-            "83": 9.30713,
-            "84": 9.42241,
-            "85": 9.61802,
-            "86": 9.07631,
-            "87": 9.59382,
-            "88": 9.75419,
-            "89": 9.60093,
-            "90": 9.82013,
-            "91": 9.3407,
-            "92": 9.35717,
-            "93": 9.07927,
-            "94": 8.83613,
-            "95": 9.5223,
-            "96": 9.53379,
-            "97": 9.31633,
-            "98": 9.68007,
+            "1": 10.84445,
+            "2": 10.84755,
+            "3": 10.84905,
+            "4": 10.844,
+            "5": 10.88133,
+            "6": 10.88069,
+            "7": 10.86435,
+            "8": 10.85483,
+            "9": 10.85577,
+            "10": 10.81851,
+            "11": 10.88835,
+            "12": 10.86318,
+            "13": 10.86739,
+            "14": 10.88397,
+            "15": 10.82443,
+            "16": 10.82905,
+            "17": 10.7953,
+            "18": 10.81529,
+            "19": 10.80121,
+            "20": 10.71826,
+            "21": 10.69956,
+            "22": 10.56756,
+            "23": 10.7171,
+            "24": 10.60451,
+            "25": 10.55018,
+            "26": 10.60859,
+            "27": 10.62013,
+            "28": 10.57541,
+            "29": 10.59599,
+            "30": 10.38364,
+            "31": 10.15409,
+            "32": 10.48036,
+            "33": 10.47379,
+            "34": 10.23693,
+            "35": 10.28857,
+            "36": 10.24862,
+            "37": 10.35357,
+            "38": 10.20827,
+            "39": 10.41871,
+            "40": 10.11266,
+            "41": 10.16079,
+            "42": 10.23304,
+            "43": 9.86146,
+            "44": 9.97719,
+            "45": 9.8651,
+            "46": 9.8486,
+            "47": 10.16607,
+            "48": 9.87126,
+            "49": 9.56738,
+            "50": 9.92137,
+            "51": 9.86682,
+            "52": 9.7694,
+            "53": 10.07839,
+            "54": 9.96992,
+            "55": 9.89678,
+            "56": 9.64417,
+            "57": 9.49737,
+            "58": 9.84853,
+            "59": 9.59973,
+            "60": 9.5062,
+            "61": 9.71028,
+            "62": 9.99079,
+            "63": 9.38989,
+            "64": 9.78616,
+            "65": 8.95963,
+            "66": 9.70879,
+            "67": 9.3791,
+            "68": 9.79602,
+            "69": 9.80692,
+            "70": 9.74781,
+            "71": 9.61777,
+            "72": 9.59105,
+            "73": 9.50417,
+            "74": 8.94629,
+            "75": 9.42953,
+            "76": 9.08443,
+            "77": 10.06697,
+            "78": 9.73245,
+            "79": 9.38132,
+            "80": 9.41079,
+            "81": 9.48315,
+            "82": 9.70491,
+            "83": 9.30719,
+            "84": 9.42254,
+            "85": 9.61799,
+            "86": 9.07625,
+            "87": 9.59384,
+            "88": 9.75414,
+            "89": 9.60107,
+            "90": 9.8203,
+            "91": 9.34086,
+            "92": 9.35733,
+            "93": 9.07939,
+            "94": 8.83611,
+            "95": 9.52231,
+            "96": 9.53388,
+            "97": 9.31636,
+            "98": 9.68001,
             "99": 8.89242,
-            "100": 9.39964
+            "100": 9.3998
         }
     },
     "num-zeros": {
@@ -111,106 +111,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 1770.0,
-            "2": 1809.0,
+            "1": 1814.0,
+            "2": 1725.0,
             "3": 1782.0,
-            "4": 1916.0,
-            "5": 1973.0,
-            "6": 1955.0,
-            "7": 2046.0,
-            "8": 1773.0,
-            "9": 1815.0,
-            "10": 1432.0,
-            "11": 1961.0,
-            "12": 1828.0,
-            "13": 1967.0,
-            "14": 1825.0,
-            "15": 1980.0,
-            "16": 1889.0,
-            "17": 1866.0,
-            "18": 1827.0,
-            "19": 1876.0,
-            "20": 1715.0,
-            "21": 2046.0,
-            "22": 1872.0,
-            "23": 2168.0,
+            "4": 1955.0,
+            "5": 1930.0,
+            "6": 1875.0,
+            "7": 1951.0,
+            "8": 1800.0,
+            "9": 1914.0,
+            "10": 1495.0,
+            "11": 1987.0,
+            "12": 1811.0,
+            "13": 2030.0,
+            "14": 1930.0,
+            "15": 1948.0,
+            "16": 1933.0,
+            "17": 1892.0,
+            "18": 1781.0,
+            "19": 1985.0,
+            "20": 1812.0,
+            "21": 2115.0,
+            "22": 1885.0,
+            "23": 2120.0,
             "24": 1814.0,
-            "25": 1715.0,
-            "26": 1721.0,
-            "27": 1822.0,
-            "28": 2102.0,
-            "29": 2112.0,
-            "30": 2020.0,
-            "31": 1569.0,
-            "32": 2022.0,
-            "33": 2256.0,
-            "34": 1884.0,
-            "35": 2034.0,
-            "36": 2027.0,
-            "37": 2438.0,
-            "38": 2363.0,
-            "39": 2526.0,
-            "40": 2254.0,
-            "41": 2328.0,
-            "42": 2409.0,
-            "43": 2126.0,
-            "44": 2166.0,
-            "45": 2230.0,
-            "46": 2487.0,
-            "47": 2605.0,
-            "48": 2351.0,
-            "49": 2413.0,
-            "50": 2274.0,
-            "51": 2579.0,
-            "52": 2508.0,
-            "53": 2879.0,
-            "54": 2744.0,
-            "55": 2402.0,
-            "56": 2720.0,
-            "57": 2384.0,
-            "58": 3002.0,
-            "59": 2743.0,
-            "60": 2457.0,
-            "61": 2976.0,
-            "62": 2631.0,
-            "63": 2349.0,
-            "64": 3077.0,
-            "65": 2634.0,
-            "66": 3076.0,
-            "67": 2906.0,
-            "68": 2759.0,
-            "69": 2907.0,
-            "70": 3045.0,
-            "71": 3159.0,
-            "72": 2506.0,
-            "73": 2956.0,
-            "74": 1945.0,
-            "75": 2467.0,
-            "76": 2979.0,
-            "77": 3209.0,
-            "78": 3122.0,
-            "79": 3048.0,
-            "80": 3389.0,
-            "81": 3799.0,
-            "82": 3272.0,
-            "83": 2962.0,
-            "84": 3328.0,
-            "85": 3462.0,
-            "86": 3071.0,
-            "87": 3900.0,
-            "88": 3128.0,
-            "89": 3469.0,
-            "90": 3095.0,
-            "91": 2769.0,
-            "92": 3168.0,
-            "93": 2713.0,
-            "94": 3416.0,
-            "95": 3515.0,
-            "96": 3425.0,
-            "97": 3223.0,
-            "98": 3769.0,
-            "99": 3230.0,
-            "100": 3219.0
+            "25": 1705.0,
+            "26": 1815.0,
+            "27": 1870.0,
+            "28": 2162.0,
+            "29": 2104.0,
+            "30": 2061.0,
+            "31": 1666.0,
+            "32": 2010.0,
+            "33": 2157.0,
+            "34": 1918.0,
+            "35": 2000.0,
+            "36": 1966.0,
+            "37": 2421.0,
+            "38": 2318.0,
+            "39": 2488.0,
+            "40": 2213.0,
+            "41": 2361.0,
+            "42": 2330.0,
+            "43": 2092.0,
+            "44": 2184.0,
+            "45": 2237.0,
+            "46": 2311.0,
+            "47": 2645.0,
+            "48": 2374.0,
+            "49": 2345.0,
+            "50": 2357.0,
+            "51": 2627.0,
+            "52": 2530.0,
+            "53": 2856.0,
+            "54": 2776.0,
+            "55": 2346.0,
+            "56": 2679.0,
+            "57": 2410.0,
+            "58": 2990.0,
+            "59": 2835.0,
+            "60": 2502.0,
+            "61": 2984.0,
+            "62": 2692.0,
+            "63": 2463.0,
+            "64": 3009.0,
+            "65": 2587.0,
+            "66": 3126.0,
+            "67": 2793.0,
+            "68": 2665.0,
+            "69": 2776.0,
+            "70": 3135.0,
+            "71": 3151.0,
+            "72": 2424.0,
+            "73": 2926.0,
+            "74": 1921.0,
+            "75": 2347.0,
+            "76": 3026.0,
+            "77": 3283.0,
+            "78": 3224.0,
+            "79": 3165.0,
+            "80": 3311.0,
+            "81": 3792.0,
+            "82": 3279.0,
+            "83": 2867.0,
+            "84": 3381.0,
+            "85": 3415.0,
+            "86": 2962.0,
+            "87": 3822.0,
+            "88": 3311.0,
+            "89": 3392.0,
+            "90": 3184.0,
+            "91": 2795.0,
+            "92": 3121.0,
+            "93": 2731.0,
+            "94": 3503.0,
+            "95": 3473.0,
+            "96": 3465.0,
+            "97": 3299.0,
+            "98": 3663.0,
+            "99": 3394.0,
+            "100": 3235.0
         }
     },
     "mem-allocated-bytes": {
@@ -218,106 +218,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 246998528.0,
-            "2": 246998528.0,
-            "3": 246998528.0,
-            "4": 246998528.0,
-            "5": 246998528.0,
-            "6": 246998528.0,
-            "7": 246998528.0,
-            "8": 246998528.0,
-            "9": 246998528.0,
-            "10": 246998528.0,
-            "11": 246998528.0,
-            "12": 246998528.0,
-            "13": 246998528.0,
-            "14": 246998528.0,
-            "15": 246998528.0,
-            "16": 246998528.0,
-            "17": 246998528.0,
-            "18": 246998528.0,
-            "19": 246998528.0,
-            "20": 246998528.0,
-            "21": 246998528.0,
-            "22": 246998528.0,
-            "23": 246998528.0,
-            "24": 246998528.0,
-            "25": 246998528.0,
-            "26": 246998528.0,
-            "27": 246998528.0,
-            "28": 246998528.0,
-            "29": 246998528.0,
-            "30": 246998528.0,
-            "31": 246998528.0,
-            "32": 246998528.0,
-            "33": 246998528.0,
-            "34": 246998528.0,
-            "35": 246998528.0,
-            "36": 246998528.0,
-            "37": 246998528.0,
-            "38": 246998528.0,
-            "39": 246998528.0,
-            "40": 246998528.0,
-            "41": 246998528.0,
-            "42": 246998528.0,
-            "43": 246998528.0,
-            "44": 246998528.0,
-            "45": 246998528.0,
-            "46": 246998528.0,
-            "47": 246998528.0,
-            "48": 246998528.0,
-            "49": 246998528.0,
-            "50": 246998528.0,
-            "51": 246998528.0,
-            "52": 246998528.0,
-            "53": 246998528.0,
-            "54": 246998528.0,
-            "55": 246998528.0,
-            "56": 246998528.0,
-            "57": 246998528.0,
-            "58": 246998528.0,
-            "59": 246998528.0,
-            "60": 246998528.0,
-            "61": 246998528.0,
-            "62": 246998528.0,
-            "63": 246998528.0,
-            "64": 246998528.0,
-            "65": 246998528.0,
-            "66": 246998528.0,
-            "67": 246998528.0,
-            "68": 246998528.0,
-            "69": 246998528.0,
-            "70": 246998528.0,
-            "71": 246998528.0,
-            "72": 246998528.0,
-            "73": 246998528.0,
-            "74": 246998528.0,
-            "75": 246998528.0,
-            "76": 246998528.0,
-            "77": 246998528.0,
-            "78": 246998528.0,
-            "79": 246998528.0,
-            "80": 246998528.0,
-            "81": 246998528.0,
-            "82": 246998528.0,
-            "83": 246998528.0,
-            "84": 246998528.0,
-            "85": 246998528.0,
-            "86": 246998528.0,
-            "87": 246998528.0,
-            "88": 246998528.0,
-            "89": 246998528.0,
-            "90": 246998528.0,
-            "91": 246998528.0,
-            "92": 246998528.0,
-            "93": 246998528.0,
-            "94": 246998528.0,
-            "95": 246998528.0,
-            "96": 246998528.0,
-            "97": 246998528.0,
-            "98": 246998528.0,
-            "99": 246998528.0,
-            "100": 246998528.0
+            "1": 700103168.0,
+            "2": 700103168.0,
+            "3": 700103168.0,
+            "4": 700103168.0,
+            "5": 700103168.0,
+            "6": 700103168.0,
+            "7": 700103168.0,
+            "8": 700103168.0,
+            "9": 700103168.0,
+            "10": 700103168.0,
+            "11": 700103168.0,
+            "12": 700103168.0,
+            "13": 700103168.0,
+            "14": 700103168.0,
+            "15": 700103168.0,
+            "16": 700103168.0,
+            "17": 700103168.0,
+            "18": 700103168.0,
+            "19": 700103168.0,
+            "20": 700103168.0,
+            "21": 700103168.0,
+            "22": 700103168.0,
+            "23": 700103168.0,
+            "24": 700103168.0,
+            "25": 700103168.0,
+            "26": 700103168.0,
+            "27": 700103168.0,
+            "28": 700103168.0,
+            "29": 700103168.0,
+            "30": 700103168.0,
+            "31": 700103168.0,
+            "32": 700103168.0,
+            "33": 700103168.0,
+            "34": 700103168.0,
+            "35": 700103168.0,
+            "36": 700103168.0,
+            "37": 700103168.0,
+            "38": 700103168.0,
+            "39": 700103168.0,
+            "40": 700103168.0,
+            "41": 700103168.0,
+            "42": 700103168.0,
+            "43": 700103168.0,
+            "44": 700103168.0,
+            "45": 700103168.0,
+            "46": 700103168.0,
+            "47": 700103168.0,
+            "48": 700103168.0,
+            "49": 700103168.0,
+            "50": 700103168.0,
+            "51": 700103168.0,
+            "52": 700103168.0,
+            "53": 700103168.0,
+            "54": 700103168.0,
+            "55": 700103168.0,
+            "56": 700103168.0,
+            "57": 700103168.0,
+            "58": 700103168.0,
+            "59": 700103168.0,
+            "60": 700103168.0,
+            "61": 700103168.0,
+            "62": 700103168.0,
+            "63": 700103168.0,
+            "64": 700103168.0,
+            "65": 700103168.0,
+            "66": 700103168.0,
+            "67": 700103168.0,
+            "68": 700103168.0,
+            "69": 700103168.0,
+            "70": 700103168.0,
+            "71": 700103168.0,
+            "72": 700103168.0,
+            "73": 700103168.0,
+            "74": 700103168.0,
+            "75": 700103168.0,
+            "76": 700103168.0,
+            "77": 700103168.0,
+            "78": 700103168.0,
+            "79": 700103168.0,
+            "80": 700103168.0,
+            "81": 700103168.0,
+            "82": 700103168.0,
+            "83": 700103168.0,
+            "84": 700103168.0,
+            "85": 700103168.0,
+            "86": 700103168.0,
+            "87": 700103168.0,
+            "88": 700103168.0,
+            "89": 700103168.0,
+            "90": 700103168.0,
+            "91": 700103168.0,
+            "92": 700103168.0,
+            "93": 700103168.0,
+            "94": 700103168.0,
+            "95": 700103168.0,
+            "96": 700103168.0,
+            "97": 700103168.0,
+            "98": 700103168.0,
+            "99": 700103168.0,
+            "100": 700103168.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -325,106 +325,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 1503207936.0,
-            "2": 1503208960.0,
-            "3": 1503208960.0,
-            "4": 1503208960.0,
-            "5": 1503208960.0,
-            "6": 1503208960.0,
-            "7": 1503208960.0,
-            "8": 1503208960.0,
-            "9": 1503208960.0,
-            "10": 1503208960.0,
-            "11": 1503208960.0,
-            "12": 1503208960.0,
-            "13": 1503208960.0,
-            "14": 1503208960.0,
-            "15": 1503208960.0,
-            "16": 1503208960.0,
-            "17": 1503208960.0,
-            "18": 1503208960.0,
-            "19": 1503208960.0,
-            "20": 1503208960.0,
-            "21": 1503208960.0,
-            "22": 1503208960.0,
-            "23": 1503208960.0,
-            "24": 1503208960.0,
-            "25": 1503208960.0,
-            "26": 1503208960.0,
-            "27": 1503208960.0,
-            "28": 1503208960.0,
-            "29": 1503208960.0,
-            "30": 1503208960.0,
-            "31": 1503208960.0,
-            "32": 1503208960.0,
-            "33": 1503208960.0,
-            "34": 1503208960.0,
-            "35": 1503208960.0,
-            "36": 1503208960.0,
-            "37": 1503208960.0,
-            "38": 1503208960.0,
-            "39": 1503208960.0,
-            "40": 1503208960.0,
-            "41": 1503208960.0,
-            "42": 1503208960.0,
-            "43": 1503208960.0,
-            "44": 1503208960.0,
-            "45": 1503208960.0,
-            "46": 1503208960.0,
-            "47": 1503208960.0,
-            "48": 1503208960.0,
-            "49": 1503208960.0,
-            "50": 1503208960.0,
-            "51": 1503208960.0,
-            "52": 1503208960.0,
-            "53": 1503208960.0,
-            "54": 1503208960.0,
-            "55": 1503208960.0,
-            "56": 1503208960.0,
-            "57": 1503208960.0,
-            "58": 1503208960.0,
-            "59": 1503208960.0,
-            "60": 1503208960.0,
-            "61": 1503208960.0,
-            "62": 1503208960.0,
-            "63": 1503208960.0,
-            "64": 1503208960.0,
-            "65": 1503208960.0,
-            "66": 1503208960.0,
-            "67": 1503208960.0,
-            "68": 1503208960.0,
-            "69": 1503208960.0,
-            "70": 1503208960.0,
-            "71": 1503208960.0,
-            "72": 1503208960.0,
-            "73": 1503208960.0,
-            "74": 1503208960.0,
-            "75": 1503208960.0,
-            "76": 1503208960.0,
-            "77": 1503208960.0,
-            "78": 1503208960.0,
-            "79": 1503208960.0,
-            "80": 1503208960.0,
-            "81": 1503208960.0,
-            "82": 1503208960.0,
-            "83": 1503208960.0,
-            "84": 1503208960.0,
-            "85": 1503208960.0,
-            "86": 1503208960.0,
-            "87": 1503208960.0,
-            "88": 1503208960.0,
-            "89": 1503208960.0,
-            "90": 1503208960.0,
-            "91": 1503208960.0,
-            "92": 1503208960.0,
-            "93": 1503208960.0,
-            "94": 1503208960.0,
-            "95": 1503208960.0,
-            "96": 1503208960.0,
-            "97": 1503208960.0,
-            "98": 1503208960.0,
-            "99": 1503208960.0,
-            "100": 1503208960.0
+            "1": 1956312576.0,
+            "2": 1956313600.0,
+            "3": 1956313600.0,
+            "4": 1956313600.0,
+            "5": 1956313600.0,
+            "6": 1956313600.0,
+            "7": 1956313600.0,
+            "8": 1956313600.0,
+            "9": 1956313600.0,
+            "10": 1956313600.0,
+            "11": 1956313600.0,
+            "12": 1956313600.0,
+            "13": 1956313600.0,
+            "14": 1956313600.0,
+            "15": 1956313600.0,
+            "16": 1956313600.0,
+            "17": 1956313600.0,
+            "18": 1956313600.0,
+            "19": 1956313600.0,
+            "20": 1956313600.0,
+            "21": 1956313600.0,
+            "22": 1956313600.0,
+            "23": 1956313600.0,
+            "24": 1956313600.0,
+            "25": 1956313600.0,
+            "26": 1956313600.0,
+            "27": 1956313600.0,
+            "28": 1956313600.0,
+            "29": 1956313600.0,
+            "30": 1956313600.0,
+            "31": 1956313600.0,
+            "32": 1956313600.0,
+            "33": 1956313600.0,
+            "34": 1956313600.0,
+            "35": 1956313600.0,
+            "36": 1956313600.0,
+            "37": 1956313600.0,
+            "38": 1956313600.0,
+            "39": 1956313600.0,
+            "40": 1956313600.0,
+            "41": 1956313600.0,
+            "42": 1956313600.0,
+            "43": 1956313600.0,
+            "44": 1956313600.0,
+            "45": 1956313600.0,
+            "46": 1956313600.0,
+            "47": 1956313600.0,
+            "48": 1956313600.0,
+            "49": 1956313600.0,
+            "50": 1956313600.0,
+            "51": 1956313600.0,
+            "52": 1956313600.0,
+            "53": 1956313600.0,
+            "54": 1956313600.0,
+            "55": 1956313600.0,
+            "56": 1956313600.0,
+            "57": 1956313600.0,
+            "58": 1956313600.0,
+            "59": 1956313600.0,
+            "60": 1956313600.0,
+            "61": 1956313600.0,
+            "62": 1956313600.0,
+            "63": 1956313600.0,
+            "64": 1956313600.0,
+            "65": 1956313600.0,
+            "66": 1956313600.0,
+            "67": 1956313600.0,
+            "68": 1956313600.0,
+            "69": 1956313600.0,
+            "70": 1956313600.0,
+            "71": 1956313600.0,
+            "72": 1956313600.0,
+            "73": 1956313600.0,
+            "74": 1956313600.0,
+            "75": 1956313600.0,
+            "76": 1956313600.0,
+            "77": 1956313600.0,
+            "78": 1956313600.0,
+            "79": 1956313600.0,
+            "80": 1956313600.0,
+            "81": 1956313600.0,
+            "82": 1956313600.0,
+            "83": 1956313600.0,
+            "84": 1956313600.0,
+            "85": 1956313600.0,
+            "86": 1956313600.0,
+            "87": 1956313600.0,
+            "88": 1956313600.0,
+            "89": 1956313600.0,
+            "90": 1956313600.0,
+            "91": 1956313600.0,
+            "92": 1956313600.0,
+            "93": 1956313600.0,
+            "94": 1956313600.0,
+            "95": 1956313600.0,
+            "96": 1956313600.0,
+            "97": 1956313600.0,
+            "98": 1956313600.0,
+            "99": 1956313600.0,
+            "100": 1956313600.0
         }
     },
     "iteration-time": {
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 6.90789,
-            "2": 0.23993,
-            "3": 0.20829,
-            "4": 0.18489,
-            "5": 0.18237,
-            "6": 0.17507,
-            "7": 0.17401,
-            "8": 0.17758,
-            "9": 0.17734,
-            "10": 0.17577,
-            "11": 0.17329,
-            "12": 0.17635,
-            "13": 0.17559,
-            "14": 0.17588,
-            "15": 0.17556,
-            "16": 0.17798,
-            "17": 0.17347,
-            "18": 0.17346,
-            "19": 0.17675,
-            "20": 0.17518,
-            "21": 0.17864,
-            "22": 0.17833,
-            "23": 0.1827,
-            "24": 0.1775,
-            "25": 0.17745,
-            "26": 0.1755,
-            "27": 0.17594,
-            "28": 0.18475,
-            "29": 0.17599,
-            "30": 0.17452,
-            "31": 0.17601,
-            "32": 0.17743,
-            "33": 0.17355,
-            "34": 0.18205,
-            "35": 0.17672,
-            "36": 0.17728,
-            "37": 0.17438,
-            "38": 0.17752,
-            "39": 0.18463,
-            "40": 0.17673,
-            "41": 0.17505,
-            "42": 0.17657,
-            "43": 0.1769,
-            "44": 0.19406,
-            "45": 0.20743,
-            "46": 0.18263,
-            "47": 0.16986,
-            "48": 0.17268,
-            "49": 0.17404,
-            "50": 0.17381,
-            "51": 0.1735,
-            "52": 0.1693,
-            "53": 0.17058,
-            "54": 0.17247,
-            "55": 0.1773,
-            "56": 0.17259,
-            "57": 0.17109,
-            "58": 0.17178,
-            "59": 0.17167,
-            "60": 0.17568,
-            "61": 0.17729,
-            "62": 0.16999,
-            "63": 0.17091,
-            "64": 0.17034,
-            "65": 0.17236,
-            "66": 0.17625,
-            "67": 0.17591,
-            "68": 0.17126,
-            "69": 0.17159,
-            "70": 0.17123,
-            "71": 0.17221,
-            "72": 0.17877,
-            "73": 0.17426,
-            "74": 0.17035,
-            "75": 0.1721,
-            "76": 0.17327,
-            "77": 0.17396,
-            "78": 0.17631,
-            "79": 0.17485,
-            "80": 0.17347,
-            "81": 0.17358,
-            "82": 0.17087,
-            "83": 0.17164,
-            "84": 0.17784,
-            "85": 0.17401,
-            "86": 0.18008,
-            "87": 0.17399,
-            "88": 0.17322,
-            "89": 0.17239,
-            "90": 0.17856,
-            "91": 0.17078,
-            "92": 0.18016,
-            "93": 0.18343,
-            "94": 0.18085,
-            "95": 0.175,
-            "96": 0.17786,
-            "97": 0.17064,
-            "98": 0.17229,
-            "99": 0.17164,
-            "100": 0.20496
+            "1": 4.9999,
+            "2": 0.17604,
+            "3": 0.16654,
+            "4": 0.15324,
+            "5": 0.14982,
+            "6": 0.15181,
+            "7": 0.15028,
+            "8": 0.15021,
+            "9": 0.14947,
+            "10": 0.15037,
+            "11": 0.15211,
+            "12": 0.15245,
+            "13": 0.1517,
+            "14": 0.15044,
+            "15": 0.15166,
+            "16": 0.14955,
+            "17": 0.15212,
+            "18": 0.15368,
+            "19": 0.15062,
+            "20": 0.15093,
+            "21": 0.1573,
+            "22": 0.15817,
+            "23": 0.14955,
+            "24": 0.14912,
+            "25": 0.15491,
+            "26": 0.14937,
+            "27": 0.15155,
+            "28": 0.15055,
+            "29": 0.14603,
+            "30": 0.14602,
+            "31": 0.14824,
+            "32": 0.14477,
+            "33": 0.14671,
+            "34": 0.14693,
+            "35": 0.14738,
+            "36": 0.14504,
+            "37": 0.14513,
+            "38": 0.14512,
+            "39": 0.14473,
+            "40": 0.14614,
+            "41": 0.14578,
+            "42": 0.14684,
+            "43": 0.14487,
+            "44": 0.14547,
+            "45": 0.145,
+            "46": 0.14486,
+            "47": 0.14751,
+            "48": 0.14552,
+            "49": 0.14493,
+            "50": 0.14395,
+            "51": 0.1521,
+            "52": 0.14666,
+            "53": 0.14801,
+            "54": 0.14826,
+            "55": 0.14557,
+            "56": 0.15142,
+            "57": 0.14933,
+            "58": 0.14555,
+            "59": 0.14614,
+            "60": 0.15938,
+            "61": 0.16219,
+            "62": 0.14894,
+            "63": 0.14392,
+            "64": 0.14433,
+            "65": 0.1452,
+            "66": 0.14488,
+            "67": 0.14508,
+            "68": 0.14493,
+            "69": 0.14702,
+            "70": 0.14432,
+            "71": 0.14412,
+            "72": 0.14561,
+            "73": 0.15534,
+            "74": 0.14715,
+            "75": 0.14564,
+            "76": 0.146,
+            "77": 0.14498,
+            "78": 0.14433,
+            "79": 0.14454,
+            "80": 0.1457,
+            "81": 0.14534,
+            "82": 0.14499,
+            "83": 0.14463,
+            "84": 0.1456,
+            "85": 0.14456,
+            "86": 0.1456,
+            "87": 0.14661,
+            "88": 0.1469,
+            "89": 0.14537,
+            "90": 0.14515,
+            "91": 0.14627,
+            "92": 0.14607,
+            "93": 0.14633,
+            "94": 0.14863,
+            "95": 0.14553,
+            "96": 0.14487,
+            "97": 0.14462,
+            "98": 0.14685,
+            "99": 0.14551,
+            "100": 0.14614
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_gb200.json
new file mode 100644
index 00000000000..3264336647e
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_gb200.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.84059,
+            "2": 10.85204,
+            "3": 10.84133,
+            "4": 10.84535,
+            "5": 10.85551,
+            "6": 10.86422,
+            "7": 10.85246,
+            "8": 10.84439,
+            "9": 10.84792,
+            "10": 10.81313,
+            "11": 10.8561,
+            "12": 10.84243,
+            "13": 10.86076,
+            "14": 10.8495,
+            "15": 10.81525,
+            "16": 10.80923,
+            "17": 10.78383,
+            "18": 10.79178,
+            "19": 10.79409,
+            "20": 10.70535,
+            "21": 10.69778,
+            "22": 10.58348,
+            "23": 10.69235,
+            "24": 10.60608,
+            "25": 10.56718,
+            "26": 10.61425,
+            "27": 10.60614,
+            "28": 10.55901,
+            "29": 10.56486,
+            "30": 10.37865,
+            "31": 10.16183,
+            "32": 10.45519,
+            "33": 10.45018,
+            "34": 10.23984,
+            "35": 10.27323,
+            "36": 10.24226,
+            "37": 10.34516,
+            "38": 10.21732,
+            "39": 10.39456,
+            "40": 10.09506,
+            "41": 10.15057,
+            "42": 10.21211,
+            "43": 9.87993,
+            "44": 9.97831,
+            "45": 9.85574,
+            "46": 9.83355,
+            "47": 10.14081,
+            "48": 9.86387,
+            "49": 9.55497,
+            "50": 9.91604
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1653.0,
+            "2": 1752.0,
+            "3": 1624.0,
+            "4": 1773.0,
+            "5": 2051.0,
+            "6": 1884.0,
+            "7": 1841.0,
+            "8": 1684.0,
+            "9": 1859.0,
+            "10": 1545.0,
+            "11": 1863.0,
+            "12": 1746.0,
+            "13": 2004.0,
+            "14": 1896.0,
+            "15": 1934.0,
+            "16": 2001.0,
+            "17": 1933.0,
+            "18": 1793.0,
+            "19": 1900.0,
+            "20": 1792.0,
+            "21": 2062.0,
+            "22": 1795.0,
+            "23": 1997.0,
+            "24": 1666.0,
+            "25": 1607.0,
+            "26": 1745.0,
+            "27": 1880.0,
+            "28": 1887.0,
+            "29": 2023.0,
+            "30": 1964.0,
+            "31": 1609.0,
+            "32": 1793.0,
+            "33": 2102.0,
+            "34": 1891.0,
+            "35": 1869.0,
+            "36": 1984.0,
+            "37": 2446.0,
+            "38": 2088.0,
+            "39": 2394.0,
+            "40": 2182.0,
+            "41": 2110.0,
+            "42": 2180.0,
+            "43": 1931.0,
+            "44": 2082.0,
+            "45": 2079.0,
+            "46": 2189.0,
+            "47": 2510.0,
+            "48": 2197.0,
+            "49": 2282.0,
+            "50": 2160.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 299204096.0,
+            "2": 299204096.0,
+            "3": 299204096.0,
+            "4": 299204096.0,
+            "5": 299204096.0,
+            "6": 299204096.0,
+            "7": 299204096.0,
+            "8": 299204096.0,
+            "9": 299204096.0,
+            "10": 299204096.0,
+            "11": 299204096.0,
+            "12": 299204096.0,
+            "13": 299204096.0,
+            "14": 299204096.0,
+            "15": 299204096.0,
+            "16": 299204096.0,
+            "17": 299204096.0,
+            "18": 299204096.0,
+            "19": 299204096.0,
+            "20": 299204096.0,
+            "21": 299204096.0,
+            "22": 299204096.0,
+            "23": 299204096.0,
+            "24": 299204096.0,
+            "25": 299204096.0,
+            "26": 299204096.0,
+            "27": 299204096.0,
+            "28": 299204096.0,
+            "29": 299204096.0,
+            "30": 299204096.0,
+            "31": 299204096.0,
+            "32": 299204096.0,
+            "33": 299204096.0,
+            "34": 299204096.0,
+            "35": 299204096.0,
+            "36": 299204096.0,
+            "37": 299204096.0,
+            "38": 299204096.0,
+            "39": 299204096.0,
+            "40": 299204096.0,
+            "41": 299204096.0,
+            "42": 299204096.0,
+            "43": 299204096.0,
+            "44": 299204096.0,
+            "45": 299204096.0,
+            "46": 299204096.0,
+            "47": 299204096.0,
+            "48": 299204096.0,
+            "49": 299204096.0,
+            "50": 299204096.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1480436736.0,
+            "2": 1542892032.0,
+            "3": 1542892032.0,
+            "4": 1542892032.0,
+            "5": 1542892032.0,
+            "6": 1542892032.0,
+            "7": 1542892032.0,
+            "8": 1542892032.0,
+            "9": 1542892032.0,
+            "10": 1542892032.0,
+            "11": 1542892032.0,
+            "12": 1542892032.0,
+            "13": 1542892032.0,
+            "14": 1542892032.0,
+            "15": 1542892032.0,
+            "16": 1542892032.0,
+            "17": 1542892032.0,
+            "18": 1542892032.0,
+            "19": 1542892032.0,
+            "20": 1542892032.0,
+            "21": 1542892032.0,
+            "22": 1542892032.0,
+            "23": 1542892032.0,
+            "24": 1542892032.0,
+            "25": 1542892032.0,
+            "26": 1542892032.0,
+            "27": 1542892032.0,
+            "28": 1542892032.0,
+            "29": 1542892032.0,
+            "30": 1542892032.0,
+            "31": 1542892032.0,
+            "32": 1542892032.0,
+            "33": 1542892032.0,
+            "34": 1542892032.0,
+            "35": 1542892032.0,
+            "36": 1542892032.0,
+            "37": 1542892032.0,
+            "38": 1542892032.0,
+            "39": 1542892032.0,
+            "40": 1542892032.0,
+            "41": 1542892032.0,
+            "42": 1542892032.0,
+            "43": 1542892032.0,
+            "44": 1542892032.0,
+            "45": 1542892032.0,
+            "46": 1542892032.0,
+            "47": 1542892032.0,
+            "48": 1542892032.0,
+            "49": 1542892032.0,
+            "50": 1542892032.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 5.51145,
+            "2": 0.34832,
+            "3": 0.34015,
+            "4": 0.32824,
+            "5": 0.32875,
+            "6": 0.32954,
+            "7": 0.3278,
+            "8": 0.32782,
+            "9": 0.33548,
+            "10": 0.32705,
+            "11": 0.3306,
+            "12": 0.649,
+            "13": 0.32524,
+            "14": 0.32234,
+            "15": 0.32194,
+            "16": 0.32286,
+            "17": 0.32381,
+            "18": 0.32317,
+            "19": 0.32316,
+            "20": 0.32225,
+            "21": 0.32237,
+            "22": 0.32068,
+            "23": 0.31836,
+            "24": 0.32077,
+            "25": 0.32241,
+            "26": 0.3196,
+            "27": 0.32484,
+            "28": 0.3223,
+            "29": 0.32268,
+            "30": 0.31921,
+            "31": 0.31951,
+            "32": 0.31901,
+            "33": 0.31776,
+            "34": 0.31959,
+            "35": 0.32009,
+            "36": 0.32217,
+            "37": 0.31843,
+            "38": 0.32842,
+            "39": 0.31803,
+            "40": 0.32118,
+            "41": 0.67436,
+            "42": 0.32184,
+            "43": 0.31883,
+            "44": 0.31976,
+            "45": 0.64044,
+            "46": 0.38679,
+            "47": 0.37664,
+            "48": 0.3844,
+            "49": 0.38013,
+            "50": 0.38188
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_h100.json
index 2bfd32d0721..dcd92db1774 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_h100.json
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 299203072.0,
-            "2": 299203072.0,
-            "3": 299203072.0,
-            "4": 299203072.0,
-            "5": 299203072.0,
-            "6": 299203072.0,
-            "7": 299203072.0,
-            "8": 299203072.0,
-            "9": 299203072.0,
-            "10": 299203072.0,
-            "11": 299203072.0,
-            "12": 299203072.0,
-            "13": 299203072.0,
-            "14": 299203072.0,
-            "15": 299203072.0,
-            "16": 299203072.0,
-            "17": 299203072.0,
-            "18": 299203072.0,
-            "19": 299203072.0,
-            "20": 299203072.0,
-            "21": 299203072.0,
-            "22": 299203072.0,
-            "23": 299203072.0,
-            "24": 299203072.0,
-            "25": 299203072.0,
-            "26": 299203072.0,
-            "27": 299203072.0,
-            "28": 299203072.0,
-            "29": 299203072.0,
-            "30": 299203072.0,
-            "31": 299203072.0,
-            "32": 299203072.0,
-            "33": 299203072.0,
-            "34": 299203072.0,
-            "35": 299203072.0,
-            "36": 299203072.0,
-            "37": 299203072.0,
-            "38": 299203072.0,
-            "39": 299203072.0,
-            "40": 299203072.0,
-            "41": 299203072.0,
-            "42": 299203072.0,
-            "43": 299203072.0,
-            "44": 299203072.0,
-            "45": 299203072.0,
-            "46": 299203072.0,
-            "47": 299203072.0,
-            "48": 299203072.0,
-            "49": 299203072.0,
-            "50": 299203072.0
+            "1": 299204096.0,
+            "2": 299204096.0,
+            "3": 299204096.0,
+            "4": 299204096.0,
+            "5": 299204096.0,
+            "6": 299204096.0,
+            "7": 299204096.0,
+            "8": 299204096.0,
+            "9": 299204096.0,
+            "10": 299204096.0,
+            "11": 299204096.0,
+            "12": 299204096.0,
+            "13": 299204096.0,
+            "14": 299204096.0,
+            "15": 299204096.0,
+            "16": 299204096.0,
+            "17": 299204096.0,
+            "18": 299204096.0,
+            "19": 299204096.0,
+            "20": 299204096.0,
+            "21": 299204096.0,
+            "22": 299204096.0,
+            "23": 299204096.0,
+            "24": 299204096.0,
+            "25": 299204096.0,
+            "26": 299204096.0,
+            "27": 299204096.0,
+            "28": 299204096.0,
+            "29": 299204096.0,
+            "30": 299204096.0,
+            "31": 299204096.0,
+            "32": 299204096.0,
+            "33": 299204096.0,
+            "34": 299204096.0,
+            "35": 299204096.0,
+            "36": 299204096.0,
+            "37": 299204096.0,
+            "38": 299204096.0,
+            "39": 299204096.0,
+            "40": 299204096.0,
+            "41": 299204096.0,
+            "42": 299204096.0,
+            "43": 299204096.0,
+            "44": 299204096.0,
+            "45": 299204096.0,
+            "46": 299204096.0,
+            "47": 299204096.0,
+            "48": 299204096.0,
+            "49": 299204096.0,
+            "50": 299204096.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 1477945856.0,
-            "2": 1542891008.0,
-            "3": 1542891008.0,
-            "4": 1542891008.0,
-            "5": 1542891008.0,
-            "6": 1542891008.0,
-            "7": 1542891008.0,
-            "8": 1542891008.0,
-            "9": 1542891008.0,
-            "10": 1542891008.0,
-            "11": 1542891008.0,
-            "12": 1542891008.0,
-            "13": 1542891008.0,
-            "14": 1542891008.0,
-            "15": 1542891008.0,
-            "16": 1542891008.0,
-            "17": 1542891008.0,
-            "18": 1542891008.0,
-            "19": 1542891008.0,
-            "20": 1542891008.0,
-            "21": 1542891008.0,
-            "22": 1542891008.0,
-            "23": 1542891008.0,
-            "24": 1542891008.0,
-            "25": 1542891008.0,
-            "26": 1542891008.0,
-            "27": 1542891008.0,
-            "28": 1542891008.0,
-            "29": 1542891008.0,
-            "30": 1542891008.0,
-            "31": 1542891008.0,
-            "32": 1542891008.0,
-            "33": 1542891008.0,
-            "34": 1542891008.0,
-            "35": 1542891008.0,
-            "36": 1542891008.0,
-            "37": 1542891008.0,
-            "38": 1542891008.0,
-            "39": 1542891008.0,
-            "40": 1542891008.0,
-            "41": 1542891008.0,
-            "42": 1542891008.0,
-            "43": 1542891008.0,
-            "44": 1542891008.0,
-            "45": 1542891008.0,
-            "46": 1542891008.0,
-            "47": 1542891008.0,
-            "48": 1542891008.0,
-            "49": 1542891008.0,
-            "50": 1542891008.0
+            "1": 1478995456.0,
+            "2": 1545382400.0,
+            "3": 1545382400.0,
+            "4": 1545382400.0,
+            "5": 1545382400.0,
+            "6": 1545382400.0,
+            "7": 1545382400.0,
+            "8": 1545382400.0,
+            "9": 1545382400.0,
+            "10": 1545382400.0,
+            "11": 1545382400.0,
+            "12": 1545382400.0,
+            "13": 1545382400.0,
+            "14": 1545382400.0,
+            "15": 1545382400.0,
+            "16": 1545382400.0,
+            "17": 1545382400.0,
+            "18": 1545382400.0,
+            "19": 1545382400.0,
+            "20": 1545382400.0,
+            "21": 1545382400.0,
+            "22": 1545382400.0,
+            "23": 1545382400.0,
+            "24": 1545382400.0,
+            "25": 1545382400.0,
+            "26": 1545382400.0,
+            "27": 1545382400.0,
+            "28": 1545382400.0,
+            "29": 1545382400.0,
+            "30": 1545382400.0,
+            "31": 1545382400.0,
+            "32": 1545382400.0,
+            "33": 1545382400.0,
+            "34": 1545382400.0,
+            "35": 1545382400.0,
+            "36": 1545382400.0,
+            "37": 1545382400.0,
+            "38": 1545382400.0,
+            "39": 1545382400.0,
+            "40": 1545382400.0,
+            "41": 1545382400.0,
+            "42": 1545382400.0,
+            "43": 1545382400.0,
+            "44": 1545382400.0,
+            "45": 1545382400.0,
+            "46": 1545382400.0,
+            "47": 1545382400.0,
+            "48": 1545382400.0,
+            "49": 1545382400.0,
+            "50": 1545382400.0
         }
     },
     "iteration-time": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 8.86827,
-            "2": 0.25581,
-            "3": 0.24685,
-            "4": 0.24528,
-            "5": 0.24786,
-            "6": 0.25055,
-            "7": 0.2473,
-            "8": 0.24843,
-            "9": 0.24646,
-            "10": 0.24448,
-            "11": 0.24595,
-            "12": 0.24375,
-            "13": 0.24607,
-            "14": 0.2438,
-            "15": 0.24496,
-            "16": 0.24469,
-            "17": 0.24672,
-            "18": 0.2472,
-            "19": 0.24412,
-            "20": 0.24734,
-            "21": 0.24525,
-            "22": 0.24726,
-            "23": 0.24425,
-            "24": 0.2467,
-            "25": 0.24589,
-            "26": 0.24521,
-            "27": 0.24972,
-            "28": 0.24969,
-            "29": 0.24951,
-            "30": 0.24819,
-            "31": 0.25039,
-            "32": 0.24983,
-            "33": 0.25363,
-            "34": 0.25237,
-            "35": 0.24992,
-            "36": 0.24811,
-            "37": 0.25001,
-            "38": 0.24929,
-            "39": 0.24928,
-            "40": 0.24894,
-            "41": 0.24934,
-            "42": 0.24889,
-            "43": 0.24734,
-            "44": 0.24821,
-            "45": 0.2492,
-            "46": 0.24867,
-            "47": 0.25083,
-            "48": 0.24933,
-            "49": 0.24988,
-            "50": 0.25012
+            "1": 9.29646,
+            "2": 0.25495,
+            "3": 0.23221,
+            "4": 0.21344,
+            "5": 0.21407,
+            "6": 0.2135,
+            "7": 0.2133,
+            "8": 0.2143,
+            "9": 0.2448,
+            "10": 0.21516,
+            "11": 0.21366,
+            "12": 0.21308,
+            "13": 0.21405,
+            "14": 0.21663,
+            "15": 0.21321,
+            "16": 0.21331,
+            "17": 0.21649,
+            "18": 0.21423,
+            "19": 0.21617,
+            "20": 0.21504,
+            "21": 0.21521,
+            "22": 0.21474,
+            "23": 0.21516,
+            "24": 0.21334,
+            "25": 0.21673,
+            "26": 0.2145,
+            "27": 0.21534,
+            "28": 0.21454,
+            "29": 0.21458,
+            "30": 0.21608,
+            "31": 0.2147,
+            "32": 0.21508,
+            "33": 0.21429,
+            "34": 0.21502,
+            "35": 0.21469,
+            "36": 0.21553,
+            "37": 0.21385,
+            "38": 0.21644,
+            "39": 0.2164,
+            "40": 0.21622,
+            "41": 0.21355,
+            "42": 0.21641,
+            "43": 0.21488,
+            "44": 0.21246,
+            "45": 0.58026,
+            "46": 0.2168,
+            "47": 0.21774,
+            "48": 0.21503,
+            "49": 0.21695,
+            "50": 0.21799
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json
index 5dd18b2b701..f6ec6ecdaca 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 1449634304.0,
-            "2": 1516021248.0,
-            "3": 1516021248.0,
-            "4": 1516021248.0,
-            "5": 1516021248.0,
-            "6": 1516021248.0,
-            "7": 1516021248.0,
-            "8": 1516021248.0,
-            "9": 1516021248.0,
-            "10": 1516021248.0,
-            "11": 1516021248.0,
-            "12": 1516021248.0,
-            "13": 1516021248.0,
-            "14": 1516021248.0,
-            "15": 1516021248.0,
-            "16": 1516021248.0,
-            "17": 1516021248.0,
-            "18": 1516021248.0,
-            "19": 1516021248.0,
-            "20": 1516021248.0,
-            "21": 1516021248.0,
-            "22": 1516021248.0,
-            "23": 1516021248.0,
-            "24": 1516021248.0,
-            "25": 1516021248.0,
-            "26": 1516021248.0,
-            "27": 1516021248.0,
-            "28": 1516021248.0,
-            "29": 1516021248.0,
-            "30": 1516021248.0,
-            "31": 1516021248.0,
-            "32": 1516021248.0,
-            "33": 1516021248.0,
-            "34": 1516021248.0,
-            "35": 1516021248.0,
-            "36": 1516021248.0,
-            "37": 1516021248.0,
-            "38": 1516021248.0,
-            "39": 1516021248.0,
-            "40": 1516021248.0,
-            "41": 1516021248.0,
-            "42": 1516021248.0,
-            "43": 1516021248.0,
-            "44": 1516021248.0,
-            "45": 1516021248.0,
-            "46": 1516021248.0,
-            "47": 1516021248.0,
-            "48": 1516021248.0,
-            "49": 1516021248.0,
-            "50": 1516021248.0
+            "1": 1448585728.0,
+            "2": 1513530880.0,
+            "3": 1513530880.0,
+            "4": 1513530880.0,
+            "5": 1513530880.0,
+            "6": 1513530880.0,
+            "7": 1513530880.0,
+            "8": 1513530880.0,
+            "9": 1513530880.0,
+            "10": 1513530880.0,
+            "11": 1513530880.0,
+            "12": 1513530880.0,
+            "13": 1513530880.0,
+            "14": 1513530880.0,
+            "15": 1513530880.0,
+            "16": 1513530880.0,
+            "17": 1513530880.0,
+            "18": 1513530880.0,
+            "19": 1513530880.0,
+            "20": 1513530880.0,
+            "21": 1513530880.0,
+            "22": 1513530880.0,
+            "23": 1513530880.0,
+            "24": 1513530880.0,
+            "25": 1513530880.0,
+            "26": 1513530880.0,
+            "27": 1513530880.0,
+            "28": 1513530880.0,
+            "29": 1513530880.0,
+            "30": 1513530880.0,
+            "31": 1513530880.0,
+            "32": 1513530880.0,
+            "33": 1513530880.0,
+            "34": 1513530880.0,
+            "35": 1513530880.0,
+            "36": 1513530880.0,
+            "37": 1513530880.0,
+            "38": 1513530880.0,
+            "39": 1513530880.0,
+            "40": 1513530880.0,
+            "41": 1513530880.0,
+            "42": 1513530880.0,
+            "43": 1513530880.0,
+            "44": 1513530880.0,
+            "45": 1513530880.0,
+            "46": 1513530880.0,
+            "47": 1513530880.0,
+            "48": 1513530880.0,
+            "49": 1513530880.0,
+            "50": 1513530880.0
         }
     },
     "iteration-time": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 4.8193,
-            "2": 0.36983,
-            "3": 0.32405,
-            "4": 0.32179,
-            "5": 0.32037,
-            "6": 0.32162,
-            "7": 0.32479,
-            "8": 0.32031,
-            "9": 0.32398,
-            "10": 0.32296,
-            "11": 0.32125,
-            "12": 0.32185,
-            "13": 0.323,
-            "14": 0.32307,
-            "15": 0.32035,
-            "16": 0.31953,
-            "17": 0.32119,
-            "18": 0.32536,
-            "19": 0.32368,
-            "20": 0.32071,
-            "21": 0.32043,
-            "22": 0.32093,
-            "23": 0.32096,
-            "24": 0.31999,
-            "25": 0.32046,
-            "26": 0.31988,
-            "27": 0.32184,
-            "28": 0.32107,
-            "29": 0.32078,
-            "30": 0.32174,
-            "31": 0.32345,
-            "32": 0.32975,
-            "33": 0.32181,
-            "34": 0.32294,
-            "35": 0.32426,
-            "36": 0.32184,
-            "37": 0.32175,
-            "38": 0.32222,
-            "39": 0.32058,
-            "40": 0.32111,
-            "41": 0.33546,
-            "42": 0.32505,
-            "43": 0.32502,
-            "44": 0.32486,
-            "45": 0.32683,
-            "46": 0.32331,
-            "47": 0.322,
-            "48": 0.32205,
-            "49": 0.32128,
-            "50": 0.32053
+            "1": 3.59395,
+            "2": 0.38136,
+            "3": 0.33497,
+            "4": 0.31659,
+            "5": 0.321,
+            "6": 0.3174,
+            "7": 0.31686,
+            "8": 0.31682,
+            "9": 0.32441,
+            "10": 0.31766,
+            "11": 0.31647,
+            "12": 0.31676,
+            "13": 0.31706,
+            "14": 0.31701,
+            "15": 0.31716,
+            "16": 0.31906,
+            "17": 0.31727,
+            "18": 0.31834,
+            "19": 0.31964,
+            "20": 0.31956,
+            "21": 0.3203,
+            "22": 0.32057,
+            "23": 0.32049,
+            "24": 0.31892,
+            "25": 0.32081,
+            "26": 0.31964,
+            "27": 0.31915,
+            "28": 0.31828,
+            "29": 0.31932,
+            "30": 0.31791,
+            "31": 0.31931,
+            "32": 0.31993,
+            "33": 0.31989,
+            "34": 0.32088,
+            "35": 0.31904,
+            "36": 0.65249,
+            "37": 0.3209,
+            "38": 0.31853,
+            "39": 0.32906,
+            "40": 0.3183,
+            "41": 0.32008,
+            "42": 0.31904,
+            "43": 0.31861,
+            "44": 0.3189,
+            "45": 0.31881,
+            "46": 0.31915,
+            "47": 0.31943,
+            "48": 0.31889,
+            "49": 0.3186,
+            "50": 0.31887
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_gb200.json
new file mode 100644
index 00000000000..4302879367b
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_gb200.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.83993,
+            "2": 10.85182,
+            "3": 10.84166,
+            "4": 10.84441,
+            "5": 10.85514,
+            "6": 10.86428,
+            "7": 10.85243,
+            "8": 10.84464,
+            "9": 10.84864,
+            "10": 10.81333,
+            "11": 10.85638,
+            "12": 10.84233,
+            "13": 10.86046,
+            "14": 10.84976,
+            "15": 10.81618,
+            "16": 10.80886,
+            "17": 10.78242,
+            "18": 10.79155,
+            "19": 10.79495,
+            "20": 10.7055,
+            "21": 10.6978,
+            "22": 10.58349,
+            "23": 10.69268,
+            "24": 10.60558,
+            "25": 10.56742,
+            "26": 10.61456,
+            "27": 10.6067,
+            "28": 10.55905,
+            "29": 10.56526,
+            "30": 10.37918,
+            "31": 10.16276,
+            "32": 10.45543,
+            "33": 10.45037,
+            "34": 10.23993,
+            "35": 10.27354,
+            "36": 10.24224,
+            "37": 10.34559,
+            "38": 10.21738,
+            "39": 10.39453,
+            "40": 10.095,
+            "41": 10.15093,
+            "42": 10.21235,
+            "43": 9.87982,
+            "44": 9.97875,
+            "45": 9.85588,
+            "46": 9.83349,
+            "47": 10.14101,
+            "48": 9.86418,
+            "49": 9.55509,
+            "50": 9.91636,
+            "51": 9.86104,
+            "52": 9.75109,
+            "53": 10.06631,
+            "54": 9.95634,
+            "55": 9.89354,
+            "56": 9.637,
+            "57": 9.49142,
+            "58": 9.8341,
+            "59": 9.5931,
+            "60": 9.51379,
+            "61": 9.69183,
+            "62": 9.99162,
+            "63": 9.39196,
+            "64": 9.77455,
+            "65": 8.96319,
+            "66": 9.70663,
+            "67": 9.3789,
+            "68": 9.78328,
+            "69": 9.79736,
+            "70": 9.73753,
+            "71": 9.62711,
+            "72": 9.58907,
+            "73": 9.50446,
+            "74": 8.94975,
+            "75": 9.4278,
+            "76": 9.08764,
+            "77": 10.06759,
+            "78": 9.72141,
+            "79": 9.3861,
+            "80": 9.40495,
+            "81": 9.48596,
+            "82": 9.70195,
+            "83": 9.31553,
+            "84": 9.41806,
+            "85": 9.61378,
+            "86": 9.08145,
+            "87": 9.59631,
+            "88": 9.75008,
+            "89": 9.60386,
+            "90": 9.82838,
+            "91": 9.33622,
+            "92": 9.35764,
+            "93": 9.08795,
+            "94": 8.83437,
+            "95": 9.53352,
+            "96": 9.53315,
+            "97": 9.31129,
+            "98": 9.67176,
+            "99": 8.89816,
+            "100": 9.40969
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1621.0,
+            "2": 1756.0,
+            "3": 1698.0,
+            "4": 1764.0,
+            "5": 2045.0,
+            "6": 1927.0,
+            "7": 1901.0,
+            "8": 1768.0,
+            "9": 1823.0,
+            "10": 1456.0,
+            "11": 1884.0,
+            "12": 1834.0,
+            "13": 2003.0,
+            "14": 1786.0,
+            "15": 1879.0,
+            "16": 1948.0,
+            "17": 1849.0,
+            "18": 1718.0,
+            "19": 1870.0,
+            "20": 1750.0,
+            "21": 1977.0,
+            "22": 1741.0,
+            "23": 1946.0,
+            "24": 1642.0,
+            "25": 1636.0,
+            "26": 1817.0,
+            "27": 1926.0,
+            "28": 1981.0,
+            "29": 1993.0,
+            "30": 1929.0,
+            "31": 1630.0,
+            "32": 1896.0,
+            "33": 2115.0,
+            "34": 1824.0,
+            "35": 1960.0,
+            "36": 1935.0,
+            "37": 2410.0,
+            "38": 2259.0,
+            "39": 2428.0,
+            "40": 2119.0,
+            "41": 2278.0,
+            "42": 2118.0,
+            "43": 1992.0,
+            "44": 2041.0,
+            "45": 1992.0,
+            "46": 2158.0,
+            "47": 2416.0,
+            "48": 2338.0,
+            "49": 2315.0,
+            "50": 2242.0,
+            "51": 2431.0,
+            "52": 2467.0,
+            "53": 2794.0,
+            "54": 2675.0,
+            "55": 2313.0,
+            "56": 2597.0,
+            "57": 2278.0,
+            "58": 2887.0,
+            "59": 2701.0,
+            "60": 2190.0,
+            "61": 2764.0,
+            "62": 2576.0,
+            "63": 2405.0,
+            "64": 2903.0,
+            "65": 2516.0,
+            "66": 2885.0,
+            "67": 2700.0,
+            "68": 2682.0,
+            "69": 2987.0,
+            "70": 3141.0,
+            "71": 3055.0,
+            "72": 2413.0,
+            "73": 2864.0,
+            "74": 1870.0,
+            "75": 2450.0,
+            "76": 3032.0,
+            "77": 3230.0,
+            "78": 3125.0,
+            "79": 2982.0,
+            "80": 3203.0,
+            "81": 3657.0,
+            "82": 3174.0,
+            "83": 2818.0,
+            "84": 3190.0,
+            "85": 3166.0,
+            "86": 2793.0,
+            "87": 3635.0,
+            "88": 3005.0,
+            "89": 3373.0,
+            "90": 3066.0,
+            "91": 2857.0,
+            "92": 3080.0,
+            "93": 2533.0,
+            "94": 3303.0,
+            "95": 3270.0,
+            "96": 3416.0,
+            "97": 3085.0,
+            "98": 3437.0,
+            "99": 3243.0,
+            "100": 3119.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 299204096.0,
+            "2": 299204096.0,
+            "3": 299204096.0,
+            "4": 299204096.0,
+            "5": 299204096.0,
+            "6": 299204096.0,
+            "7": 299204096.0,
+            "8": 299204096.0,
+            "9": 299204096.0,
+            "10": 299204096.0,
+            "11": 299204096.0,
+            "12": 299204096.0,
+            "13": 299204096.0,
+            "14": 299204096.0,
+            "15": 299204096.0,
+            "16": 299204096.0,
+            "17": 299204096.0,
+            "18": 299204096.0,
+            "19": 299204096.0,
+            "20": 299204096.0,
+            "21": 299204096.0,
+            "22": 299204096.0,
+            "23": 299204096.0,
+            "24": 299204096.0,
+            "25": 299204096.0,
+            "26": 299204096.0,
+            "27": 299204096.0,
+            "28": 299204096.0,
+            "29": 299204096.0,
+            "30": 299204096.0,
+            "31": 299204096.0,
+            "32": 299204096.0,
+            "33": 299204096.0,
+            "34": 299204096.0,
+            "35": 299204096.0,
+            "36": 299204096.0,
+            "37": 299204096.0,
+            "38": 299204096.0,
+            "39": 299204096.0,
+            "40": 299204096.0,
+            "41": 299204096.0,
+            "42": 299204096.0,
+            "43": 299204096.0,
+            "44": 299204096.0,
+            "45": 299204096.0,
+            "46": 299204096.0,
+            "47": 299204096.0,
+            "48": 299204096.0,
+            "49": 299204096.0,
+            "50": 299204096.0,
+            "51": 299204096.0,
+            "52": 299204096.0,
+            "53": 299204096.0,
+            "54": 299204096.0,
+            "55": 299204096.0,
+            "56": 299204096.0,
+            "57": 299204096.0,
+            "58": 299204096.0,
+            "59": 299204096.0,
+            "60": 299204096.0,
+            "61": 299204096.0,
+            "62": 299204096.0,
+            "63": 299204096.0,
+            "64": 299204096.0,
+            "65": 299204096.0,
+            "66": 299204096.0,
+            "67": 299204096.0,
+            "68": 299204096.0,
+            "69": 299204096.0,
+            "70": 299204096.0,
+            "71": 299204096.0,
+            "72": 299204096.0,
+            "73": 299204096.0,
+            "74": 299204096.0,
+            "75": 299204096.0,
+            "76": 299204096.0,
+            "77": 299204096.0,
+            "78": 299204096.0,
+            "79": 299204096.0,
+            "80": 299204096.0,
+            "81": 299204096.0,
+            "82": 299204096.0,
+            "83": 299204096.0,
+            "84": 299204096.0,
+            "85": 299204096.0,
+            "86": 299204096.0,
+            "87": 299204096.0,
+            "88": 299204096.0,
+            "89": 299204096.0,
+            "90": 299204096.0,
+            "91": 299204096.0,
+            "92": 299204096.0,
+            "93": 299204096.0,
+            "94": 299204096.0,
+            "95": 299204096.0,
+            "96": 299204096.0,
+            "97": 299204096.0,
+            "98": 299204096.0,
+            "99": 299204096.0,
+            "100": 299204096.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 999540224.0,
+            "2": 1065140736.0,
+            "3": 1065140736.0,
+            "4": 1065140736.0,
+            "5": 1065140736.0,
+            "6": 1065140736.0,
+            "7": 1065140736.0,
+            "8": 1065140736.0,
+            "9": 1065140736.0,
+            "10": 1065140736.0,
+            "11": 1065140736.0,
+            "12": 1065140736.0,
+            "13": 1065140736.0,
+            "14": 1065140736.0,
+            "15": 1065140736.0,
+            "16": 1065140736.0,
+            "17": 1065140736.0,
+            "18": 1065140736.0,
+            "19": 1065140736.0,
+            "20": 1065140736.0,
+            "21": 1065140736.0,
+            "22": 1065140736.0,
+            "23": 1065140736.0,
+            "24": 1065140736.0,
+            "25": 1065140736.0,
+            "26": 1065140736.0,
+            "27": 1065140736.0,
+            "28": 1065140736.0,
+            "29": 1065140736.0,
+            "30": 1065140736.0,
+            "31": 1065140736.0,
+            "32": 1065140736.0,
+            "33": 1065140736.0,
+            "34": 1065140736.0,
+            "35": 1065140736.0,
+            "36": 1065140736.0,
+            "37": 1065140736.0,
+            "38": 1065140736.0,
+            "39": 1065140736.0,
+            "40": 1065140736.0,
+            "41": 1065140736.0,
+            "42": 1065140736.0,
+            "43": 1065140736.0,
+            "44": 1065140736.0,
+            "45": 1065140736.0,
+            "46": 1065140736.0,
+            "47": 1065140736.0,
+            "48": 1065140736.0,
+            "49": 1065140736.0,
+            "50": 1065140736.0,
+            "51": 1065140736.0,
+            "52": 1065140736.0,
+            "53": 1065140736.0,
+            "54": 1065140736.0,
+            "55": 1065140736.0,
+            "56": 1065140736.0,
+            "57": 1065140736.0,
+            "58": 1065140736.0,
+            "59": 1065140736.0,
+            "60": 1065140736.0,
+            "61": 1065140736.0,
+            "62": 1065140736.0,
+            "63": 1065140736.0,
+            "64": 1065140736.0,
+            "65": 1065140736.0,
+            "66": 1065140736.0,
+            "67": 1065140736.0,
+            "68": 1065140736.0,
+            "69": 1065140736.0,
+            "70": 1065140736.0,
+            "71": 1065140736.0,
+            "72": 1065140736.0,
+            "73": 1065140736.0,
+            "74": 1065140736.0,
+            "75": 1065140736.0,
+            "76": 1065140736.0,
+            "77": 1065140736.0,
+            "78": 1065140736.0,
+            "79": 1065140736.0,
+            "80": 1065140736.0,
+            "81": 1065140736.0,
+            "82": 1065140736.0,
+            "83": 1065140736.0,
+            "84": 1065140736.0,
+            "85": 1065140736.0,
+            "86": 1065140736.0,
+            "87": 1065140736.0,
+            "88": 1065140736.0,
+            "89": 1065140736.0,
+            "90": 1065140736.0,
+            "91": 1065140736.0,
+            "92": 1065140736.0,
+            "93": 1065140736.0,
+            "94": 1065140736.0,
+            "95": 1065140736.0,
+            "96": 1065140736.0,
+            "97": 1065140736.0,
+            "98": 1065140736.0,
+            "99": 1065140736.0,
+            "100": 1065140736.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 5.73516,
+            "2": 0.33146,
+            "3": 0.29953,
+            "4": 0.28786,
+            "5": 0.28898,
+            "6": 0.28943,
+            "7": 0.60486,
+            "8": 0.28771,
+            "9": 0.28863,
+            "10": 0.33495,
+            "11": 0.35979,
+            "12": 0.31826,
+            "13": 0.31158,
+            "14": 0.3553,
+            "15": 0.40525,
+            "16": 0.29653,
+            "17": 0.28954,
+            "18": 0.28746,
+            "19": 0.28594,
+            "20": 0.28918,
+            "21": 0.28811,
+            "22": 0.28994,
+            "23": 0.2878,
+            "24": 0.28704,
+            "25": 0.28786,
+            "26": 0.28829,
+            "27": 0.28723,
+            "28": 0.28842,
+            "29": 0.28755,
+            "30": 0.28856,
+            "31": 0.28778,
+            "32": 0.29729,
+            "33": 0.28622,
+            "34": 0.28852,
+            "35": 0.29006,
+            "36": 0.29076,
+            "37": 0.28535,
+            "38": 0.28783,
+            "39": 0.28843,
+            "40": 0.29078,
+            "41": 0.28844,
+            "42": 0.28652,
+            "43": 0.28742,
+            "44": 0.2859,
+            "45": 0.2849,
+            "46": 0.28877,
+            "47": 0.28739,
+            "48": 0.28758,
+            "49": 0.28616,
+            "50": 0.29116,
+            "51": 0.90295,
+            "52": 0.37657,
+            "53": 0.35642,
+            "54": 0.35986,
+            "55": 0.36134,
+            "56": 0.36573,
+            "57": 0.36411,
+            "58": 0.36481,
+            "59": 0.36464,
+            "60": 0.36272,
+            "61": 0.36512,
+            "62": 0.36724,
+            "63": 0.36476,
+            "64": 0.36594,
+            "65": 0.36724,
+            "66": 0.64822,
+            "67": 0.36581,
+            "68": 0.36271,
+            "69": 0.366,
+            "70": 0.36762,
+            "71": 0.36789,
+            "72": 0.64766,
+            "73": 0.36425,
+            "74": 0.36764,
+            "75": 0.3661,
+            "76": 0.36465,
+            "77": 0.36495,
+            "78": 0.36147,
+            "79": 0.36669,
+            "80": 0.36518,
+            "81": 0.36345,
+            "82": 0.36631,
+            "83": 0.36797,
+            "84": 0.36517,
+            "85": 0.36573,
+            "86": 0.36641,
+            "87": 0.36619,
+            "88": 0.3675,
+            "89": 0.3649,
+            "90": 0.36424,
+            "91": 0.36515,
+            "92": 0.36402,
+            "93": 0.3686,
+            "94": 0.36775,
+            "95": 0.36962,
+            "96": 0.36798,
+            "97": 0.36651,
+            "98": 0.36783,
+            "99": 0.36877,
+            "100": 0.36479
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_h100.json
index b61916ffd95..76ec80299fc 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_h100.json
@@ -218,106 +218,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 299203072.0,
-            "2": 299203072.0,
-            "3": 299203072.0,
-            "4": 299203072.0,
-            "5": 299203072.0,
-            "6": 299203072.0,
-            "7": 299203072.0,
-            "8": 299203072.0,
-            "9": 299203072.0,
-            "10": 299203072.0,
-            "11": 299203072.0,
-            "12": 299203072.0,
-            "13": 299203072.0,
-            "14": 299203072.0,
-            "15": 299203072.0,
-            "16": 299203072.0,
-            "17": 299203072.0,
-            "18": 299203072.0,
-            "19": 299203072.0,
-            "20": 299203072.0,
-            "21": 299203072.0,
-            "22": 299203072.0,
-            "23": 299203072.0,
-            "24": 299203072.0,
-            "25": 299203072.0,
-            "26": 299203072.0,
-            "27": 299203072.0,
-            "28": 299203072.0,
-            "29": 299203072.0,
-            "30": 299203072.0,
-            "31": 299203072.0,
-            "32": 299203072.0,
-            "33": 299203072.0,
-            "34": 299203072.0,
-            "35": 299203072.0,
-            "36": 299203072.0,
-            "37": 299203072.0,
-            "38": 299203072.0,
-            "39": 299203072.0,
-            "40": 299203072.0,
-            "41": 299203072.0,
-            "42": 299203072.0,
-            "43": 299203072.0,
-            "44": 299203072.0,
-            "45": 299203072.0,
-            "46": 299203072.0,
-            "47": 299203072.0,
-            "48": 299203072.0,
-            "49": 299203072.0,
-            "50": 299203072.0,
-            "51": 299203072.0,
-            "52": 299203072.0,
-            "53": 299203072.0,
-            "54": 299203072.0,
-            "55": 299203072.0,
-            "56": 299203072.0,
-            "57": 299203072.0,
-            "58": 299203072.0,
-            "59": 299203072.0,
-            "60": 299203072.0,
-            "61": 299203072.0,
-            "62": 299203072.0,
-            "63": 299203072.0,
-            "64": 299203072.0,
-            "65": 299203072.0,
-            "66": 299203072.0,
-            "67": 299203072.0,
-            "68": 299203072.0,
-            "69": 299203072.0,
-            "70": 299203072.0,
-            "71": 299203072.0,
-            "72": 299203072.0,
-            "73": 299203072.0,
-            "74": 299203072.0,
-            "75": 299203072.0,
-            "76": 299203072.0,
-            "77": 299203072.0,
-            "78": 299203072.0,
-            "79": 299203072.0,
-            "80": 299203072.0,
-            "81": 299203072.0,
-            "82": 299203072.0,
-            "83": 299203072.0,
-            "84": 299203072.0,
-            "85": 299203072.0,
-            "86": 299203072.0,
-            "87": 299203072.0,
-            "88": 299203072.0,
-            "89": 299203072.0,
-            "90": 299203072.0,
-            "91": 299203072.0,
-            "92": 299203072.0,
-            "93": 299203072.0,
-            "94": 299203072.0,
-            "95": 299203072.0,
-            "96": 299203072.0,
-            "97": 299203072.0,
-            "98": 299203072.0,
-            "99": 299203072.0,
-            "100": 299203072.0
+            "1": 299204096.0,
+            "2": 299204096.0,
+            "3": 299204096.0,
+            "4": 299204096.0,
+            "5": 299204096.0,
+            "6": 299204096.0,
+            "7": 299204096.0,
+            "8": 299204096.0,
+            "9": 299204096.0,
+            "10": 299204096.0,
+            "11": 299204096.0,
+            "12": 299204096.0,
+            "13": 299204096.0,
+            "14": 299204096.0,
+            "15": 299204096.0,
+            "16": 299204096.0,
+            "17": 299204096.0,
+            "18": 299204096.0,
+            "19": 299204096.0,
+            "20": 299204096.0,
+            "21": 299204096.0,
+            "22": 299204096.0,
+            "23": 299204096.0,
+            "24": 299204096.0,
+            "25": 299204096.0,
+            "26": 299204096.0,
+            "27": 299204096.0,
+            "28": 299204096.0,
+            "29": 299204096.0,
+            "30": 299204096.0,
+            "31": 299204096.0,
+            "32": 299204096.0,
+            "33": 299204096.0,
+            "34": 299204096.0,
+            "35": 299204096.0,
+            "36": 299204096.0,
+            "37": 299204096.0,
+            "38": 299204096.0,
+            "39": 299204096.0,
+            "40": 299204096.0,
+            "41": 299204096.0,
+            "42": 299204096.0,
+            "43": 299204096.0,
+            "44": 299204096.0,
+            "45": 299204096.0,
+            "46": 299204096.0,
+            "47": 299204096.0,
+            "48": 299204096.0,
+            "49": 299204096.0,
+            "50": 299204096.0,
+            "51": 299204096.0,
+            "52": 299204096.0,
+            "53": 299204096.0,
+            "54": 299204096.0,
+            "55": 299204096.0,
+            "56": 299204096.0,
+            "57": 299204096.0,
+            "58": 299204096.0,
+            "59": 299204096.0,
+            "60": 299204096.0,
+            "61": 299204096.0,
+            "62": 299204096.0,
+            "63": 299204096.0,
+            "64": 299204096.0,
+            "65": 299204096.0,
+            "66": 299204096.0,
+            "67": 299204096.0,
+            "68": 299204096.0,
+            "69": 299204096.0,
+            "70": 299204096.0,
+            "71": 299204096.0,
+            "72": 299204096.0,
+            "73": 299204096.0,
+            "74": 299204096.0,
+            "75": 299204096.0,
+            "76": 299204096.0,
+            "77": 299204096.0,
+            "78": 299204096.0,
+            "79": 299204096.0,
+            "80": 299204096.0,
+            "81": 299204096.0,
+            "82": 299204096.0,
+            "83": 299204096.0,
+            "84": 299204096.0,
+            "85": 299204096.0,
+            "86": 299204096.0,
+            "87": 299204096.0,
+            "88": 299204096.0,
+            "89": 299204096.0,
+            "90": 299204096.0,
+            "91": 299204096.0,
+            "92": 299204096.0,
+            "93": 299204096.0,
+            "94": 299204096.0,
+            "95": 299204096.0,
+            "96": 299204096.0,
+            "97": 299204096.0,
+            "98": 299204096.0,
+            "99": 299204096.0,
+            "100": 299204096.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -325,106 +325,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 977125888.0,
-            "2": 1042071040.0,
-            "3": 1042071040.0,
-            "4": 1042071040.0,
-            "5": 1042071040.0,
-            "6": 1042071040.0,
-            "7": 1042071040.0,
-            "8": 1042071040.0,
-            "9": 1042071040.0,
-            "10": 1042071040.0,
-            "11": 1042071040.0,
-            "12": 1042071040.0,
-            "13": 1042071040.0,
-            "14": 1042071040.0,
-            "15": 1042071040.0,
-            "16": 1042071040.0,
-            "17": 1042071040.0,
-            "18": 1042071040.0,
-            "19": 1042071040.0,
-            "20": 1042071040.0,
-            "21": 1042071040.0,
-            "22": 1042071040.0,
-            "23": 1042071040.0,
-            "24": 1042071040.0,
-            "25": 1042071040.0,
-            "26": 1042071040.0,
-            "27": 1042071040.0,
-            "28": 1042071040.0,
-            "29": 1042071040.0,
-            "30": 1042071040.0,
-            "31": 1042071040.0,
-            "32": 1042071040.0,
-            "33": 1042071040.0,
-            "34": 1042071040.0,
-            "35": 1042071040.0,
-            "36": 1042071040.0,
-            "37": 1042071040.0,
-            "38": 1042071040.0,
-            "39": 1042071040.0,
-            "40": 1042071040.0,
-            "41": 1042071040.0,
-            "42": 1042071040.0,
-            "43": 1042071040.0,
-            "44": 1042071040.0,
-            "45": 1042071040.0,
-            "46": 1042071040.0,
-            "47": 1042071040.0,
-            "48": 1042071040.0,
-            "49": 1042071040.0,
-            "50": 1042071040.0,
-            "51": 1042071040.0,
-            "52": 1042071040.0,
-            "53": 1042071040.0,
-            "54": 1042071040.0,
-            "55": 1042071040.0,
-            "56": 1042071040.0,
-            "57": 1042071040.0,
-            "58": 1042071040.0,
-            "59": 1042071040.0,
-            "60": 1042071040.0,
-            "61": 1042071040.0,
-            "62": 1042071040.0,
-            "63": 1042071040.0,
-            "64": 1042071040.0,
-            "65": 1042071040.0,
-            "66": 1042071040.0,
-            "67": 1042071040.0,
-            "68": 1042071040.0,
-            "69": 1042071040.0,
-            "70": 1042071040.0,
-            "71": 1042071040.0,
-            "72": 1042071040.0,
-            "73": 1042071040.0,
-            "74": 1042071040.0,
-            "75": 1042071040.0,
-            "76": 1042071040.0,
-            "77": 1042071040.0,
-            "78": 1042071040.0,
-            "79": 1042071040.0,
-            "80": 1042071040.0,
-            "81": 1042071040.0,
-            "82": 1042071040.0,
-            "83": 1042071040.0,
-            "84": 1042071040.0,
-            "85": 1042071040.0,
-            "86": 1042071040.0,
-            "87": 1042071040.0,
-            "88": 1042071040.0,
-            "89": 1042071040.0,
-            "90": 1042071040.0,
-            "91": 1042071040.0,
-            "92": 1042071040.0,
-            "93": 1042071040.0,
-            "94": 1042071040.0,
-            "95": 1042071040.0,
-            "96": 1042071040.0,
-            "97": 1042071040.0,
-            "98": 1042071040.0,
-            "99": 1042071040.0,
-            "100": 1042071040.0
+            "1": 977519616.0,
+            "2": 1042465280.0,
+            "3": 1042465280.0,
+            "4": 1042465280.0,
+            "5": 1042465280.0,
+            "6": 1042465280.0,
+            "7": 1042465280.0,
+            "8": 1042465280.0,
+            "9": 1042465280.0,
+            "10": 1042465280.0,
+            "11": 1042465280.0,
+            "12": 1042465280.0,
+            "13": 1042465280.0,
+            "14": 1042465280.0,
+            "15": 1042465280.0,
+            "16": 1042465280.0,
+            "17": 1042465280.0,
+            "18": 1042465280.0,
+            "19": 1042465280.0,
+            "20": 1042465280.0,
+            "21": 1042465280.0,
+            "22": 1042465280.0,
+            "23": 1042465280.0,
+            "24": 1042465280.0,
+            "25": 1042465280.0,
+            "26": 1042465280.0,
+            "27": 1042465280.0,
+            "28": 1042465280.0,
+            "29": 1042465280.0,
+            "30": 1042465280.0,
+            "31": 1042465280.0,
+            "32": 1042465280.0,
+            "33": 1042465280.0,
+            "34": 1042465280.0,
+            "35": 1042465280.0,
+            "36": 1042465280.0,
+            "37": 1042465280.0,
+            "38": 1042465280.0,
+            "39": 1042465280.0,
+            "40": 1042465280.0,
+            "41": 1042465280.0,
+            "42": 1042465280.0,
+            "43": 1042465280.0,
+            "44": 1042465280.0,
+            "45": 1042465280.0,
+            "46": 1042465280.0,
+            "47": 1042465280.0,
+            "48": 1042465280.0,
+            "49": 1042465280.0,
+            "50": 1042465280.0,
+            "51": 1042465280.0,
+            "52": 1042465280.0,
+            "53": 1042465280.0,
+            "54": 1042465280.0,
+            "55": 1042465280.0,
+            "56": 1042465280.0,
+            "57": 1042465280.0,
+            "58": 1042465280.0,
+            "59": 1042465280.0,
+            "60": 1042465280.0,
+            "61": 1042465280.0,
+            "62": 1042465280.0,
+            "63": 1042465280.0,
+            "64": 1042465280.0,
+            "65": 1042465280.0,
+            "66": 1042465280.0,
+            "67": 1042465280.0,
+            "68": 1042465280.0,
+            "69": 1042465280.0,
+            "70": 1042465280.0,
+            "71": 1042465280.0,
+            "72": 1042465280.0,
+            "73": 1042465280.0,
+            "74": 1042465280.0,
+            "75": 1042465280.0,
+            "76": 1042465280.0,
+            "77": 1042465280.0,
+            "78": 1042465280.0,
+            "79": 1042465280.0,
+            "80": 1042465280.0,
+            "81": 1042465280.0,
+            "82": 1042465280.0,
+            "83": 1042465280.0,
+            "84": 1042465280.0,
+            "85": 1042465280.0,
+            "86": 1042465280.0,
+            "87": 1042465280.0,
+            "88": 1042465280.0,
+            "89": 1042465280.0,
+            "90": 1042465280.0,
+            "91": 1042465280.0,
+            "92": 1042465280.0,
+            "93": 1042465280.0,
+            "94": 1042465280.0,
+            "95": 1042465280.0,
+            "96": 1042465280.0,
+            "97": 1042465280.0,
+            "98": 1042465280.0,
+            "99": 1042465280.0,
+            "100": 1042465280.0
         }
     },
     "iteration-time": {
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 9.40872,
-            "2": 0.25886,
-            "3": 0.22849,
-            "4": 0.21099,
-            "5": 0.21193,
-            "6": 0.20863,
-            "7": 0.20987,
-            "8": 0.21014,
-            "9": 0.21139,
-            "10": 0.21148,
-            "11": 0.21513,
-            "12": 0.21915,
-            "13": 0.21037,
-            "14": 0.20786,
-            "15": 0.20927,
-            "16": 0.20756,
-            "17": 0.21005,
-            "18": 0.21022,
-            "19": 0.21019,
-            "20": 0.21012,
-            "21": 0.20995,
-            "22": 0.21005,
-            "23": 0.21213,
-            "24": 0.20995,
-            "25": 0.20776,
-            "26": 0.21296,
-            "27": 0.20984,
-            "28": 0.21526,
-            "29": 0.21164,
-            "30": 0.21175,
-            "31": 0.21062,
-            "32": 0.21292,
-            "33": 0.20962,
-            "34": 0.21025,
-            "35": 0.20968,
-            "36": 0.21367,
-            "37": 0.20989,
-            "38": 0.21034,
-            "39": 0.20979,
-            "40": 0.21092,
-            "41": 0.21065,
-            "42": 0.20865,
-            "43": 0.20939,
-            "44": 0.21656,
-            "45": 0.21131,
-            "46": 0.21087,
-            "47": 0.23723,
-            "48": 0.21006,
-            "49": 0.21157,
-            "50": 0.20975,
-            "51": 0.21952,
-            "52": 0.21306,
-            "53": 0.21253,
-            "54": 0.21223,
-            "55": 0.21336,
-            "56": 0.21514,
-            "57": 0.21536,
-            "58": 0.21288,
-            "59": 0.21211,
-            "60": 0.21298,
-            "61": 0.21285,
-            "62": 0.21438,
-            "63": 0.21461,
-            "64": 0.21382,
-            "65": 0.22082,
-            "66": 0.21222,
-            "67": 0.21414,
-            "68": 0.21315,
-            "69": 0.2153,
-            "70": 0.2172,
-            "71": 0.21323,
-            "72": 0.21366,
-            "73": 0.21434,
-            "74": 0.21455,
-            "75": 0.21545,
-            "76": 0.21631,
-            "77": 0.21419,
-            "78": 0.21365,
-            "79": 0.21514,
-            "80": 0.21447,
-            "81": 0.21379,
-            "82": 0.21487,
-            "83": 0.21038,
-            "84": 0.21708,
-            "85": 0.21166,
-            "86": 0.2141,
-            "87": 0.21613,
-            "88": 0.21214,
-            "89": 0.21499,
-            "90": 0.21811,
-            "91": 0.21563,
-            "92": 0.2152,
-            "93": 0.21548,
-            "94": 0.21863,
-            "95": 0.21366,
-            "96": 0.21458,
-            "97": 0.21279,
-            "98": 0.21555,
-            "99": 0.213,
-            "100": 0.2112
+            "1": 9.3573,
+            "2": 0.22781,
+            "3": 0.20223,
+            "4": 0.18298,
+            "5": 0.18347,
+            "6": 0.18262,
+            "7": 0.18305,
+            "8": 0.18295,
+            "9": 0.18205,
+            "10": 0.18986,
+            "11": 0.18455,
+            "12": 0.18245,
+            "13": 0.18257,
+            "14": 0.18276,
+            "15": 0.18245,
+            "16": 0.18291,
+            "17": 0.18246,
+            "18": 0.18732,
+            "19": 0.18256,
+            "20": 0.17944,
+            "21": 0.18071,
+            "22": 0.17927,
+            "23": 0.18026,
+            "24": 0.17928,
+            "25": 0.17797,
+            "26": 0.17889,
+            "27": 0.17809,
+            "28": 0.17769,
+            "29": 0.1779,
+            "30": 0.17904,
+            "31": 0.1865,
+            "32": 0.17922,
+            "33": 0.17866,
+            "34": 0.17807,
+            "35": 0.17828,
+            "36": 0.17941,
+            "37": 0.17744,
+            "38": 0.17752,
+            "39": 0.17793,
+            "40": 0.17906,
+            "41": 0.17769,
+            "42": 0.17938,
+            "43": 0.17822,
+            "44": 0.17848,
+            "45": 0.17846,
+            "46": 0.17952,
+            "47": 0.17854,
+            "48": 0.17937,
+            "49": 0.17929,
+            "50": 0.17767,
+            "51": 0.19143,
+            "52": 0.18056,
+            "53": 0.18054,
+            "54": 0.18173,
+            "55": 0.18101,
+            "56": 0.18146,
+            "57": 0.1796,
+            "58": 0.18116,
+            "59": 0.18351,
+            "60": 0.17824,
+            "61": 0.17784,
+            "62": 0.17757,
+            "63": 0.17868,
+            "64": 0.17881,
+            "65": 0.17844,
+            "66": 0.1766,
+            "67": 0.17725,
+            "68": 0.17696,
+            "69": 0.1769,
+            "70": 0.17752,
+            "71": 0.17684,
+            "72": 0.17943,
+            "73": 0.17816,
+            "74": 0.1781,
+            "75": 0.17671,
+            "76": 0.17658,
+            "77": 0.17778,
+            "78": 0.1771,
+            "79": 0.17667,
+            "80": 0.17694,
+            "81": 0.17739,
+            "82": 0.18259,
+            "83": 0.1806,
+            "84": 0.18169,
+            "85": 0.18154,
+            "86": 0.1832,
+            "87": 0.18284,
+            "88": 0.18358,
+            "89": 0.18203,
+            "90": 0.18406,
+            "91": 0.18296,
+            "92": 0.18249,
+            "93": 0.1823,
+            "94": 0.1834,
+            "95": 0.18246,
+            "96": 0.19284,
+            "97": 0.7432,
+            "98": 0.20476,
+            "99": 0.19058,
+            "100": 0.18263
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_h100_2nd.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_h100_2nd.json
new file mode 100644
index 00000000000..59e234529c3
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_h100_2nd.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 9.85839,
+            "52": 9.7506,
+            "53": 10.05817,
+            "54": 9.96076,
+            "55": 9.88738,
+            "56": 9.6344,
+            "57": 9.4967,
+            "58": 9.83343,
+            "59": 9.59391,
+            "60": 9.51376,
+            "61": 9.69928,
+            "62": 9.98089,
+            "63": 9.39065,
+            "64": 9.77599,
+            "65": 8.9571,
+            "66": 9.70054,
+            "67": 9.37,
+            "68": 9.78529,
+            "69": 9.78966,
+            "70": 9.74676,
+            "71": 9.61906,
+            "72": 9.58963,
+            "73": 9.49629,
+            "74": 8.94963,
+            "75": 9.42381,
+            "76": 9.07799,
+            "77": 10.07105,
+            "78": 9.72632,
+            "79": 9.37966,
+            "80": 9.40721,
+            "81": 9.48238,
+            "82": 9.70152,
+            "83": 9.30657,
+            "84": 9.41464,
+            "85": 9.61784,
+            "86": 9.08212,
+            "87": 9.59511,
+            "88": 9.75008,
+            "89": 9.60356,
+            "90": 9.82256,
+            "91": 9.33721,
+            "92": 9.35861,
+            "93": 9.07956,
+            "94": 8.83268,
+            "95": 9.51351,
+            "96": 9.52947,
+            "97": 9.31813,
+            "98": 9.67451,
+            "99": 8.88607,
+            "100": 9.40106
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 2621.0,
+            "52": 2597.0,
+            "53": 2926.0,
+            "54": 2633.0,
+            "55": 2206.0,
+            "56": 2627.0,
+            "57": 2328.0,
+            "58": 2886.0,
+            "59": 2639.0,
+            "60": 2157.0,
+            "61": 2736.0,
+            "62": 2544.0,
+            "63": 2332.0,
+            "64": 2948.0,
+            "65": 2630.0,
+            "66": 2931.0,
+            "67": 2717.0,
+            "68": 2643.0,
+            "69": 2955.0,
+            "70": 3040.0,
+            "71": 2882.0,
+            "72": 2390.0,
+            "73": 2812.0,
+            "74": 1844.0,
+            "75": 2461.0,
+            "76": 3067.0,
+            "77": 3152.0,
+            "78": 3018.0,
+            "79": 3008.0,
+            "80": 3104.0,
+            "81": 3589.0,
+            "82": 3218.0,
+            "83": 2748.0,
+            "84": 3217.0,
+            "85": 3167.0,
+            "86": 2876.0,
+            "87": 3604.0,
+            "88": 3017.0,
+            "89": 3249.0,
+            "90": 3069.0,
+            "91": 2865.0,
+            "92": 3074.0,
+            "93": 2680.0,
+            "94": 3392.0,
+            "95": 3206.0,
+            "96": 3401.0,
+            "97": 3107.0,
+            "98": 3624.0,
+            "99": 3007.0,
+            "100": 3111.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 365196800.0,
+            "52": 365196800.0,
+            "53": 365196800.0,
+            "54": 365196800.0,
+            "55": 365196800.0,
+            "56": 365196800.0,
+            "57": 365196800.0,
+            "58": 365196800.0,
+            "59": 365196800.0,
+            "60": 365196800.0,
+            "61": 365196800.0,
+            "62": 365196800.0,
+            "63": 365196800.0,
+            "64": 365196800.0,
+            "65": 365196800.0,
+            "66": 365196800.0,
+            "67": 365196800.0,
+            "68": 365196800.0,
+            "69": 365196800.0,
+            "70": 365196800.0,
+            "71": 365196800.0,
+            "72": 365196800.0,
+            "73": 365196800.0,
+            "74": 365196800.0,
+            "75": 365196800.0,
+            "76": 365196800.0,
+            "77": 365196800.0,
+            "78": 365196800.0,
+            "79": 365196800.0,
+            "80": 365196800.0,
+            "81": 365196800.0,
+            "82": 365196800.0,
+            "83": 365196800.0,
+            "84": 365196800.0,
+            "85": 365196800.0,
+            "86": 365196800.0,
+            "87": 365196800.0,
+            "88": 365196800.0,
+            "89": 365196800.0,
+            "90": 365196800.0,
+            "91": 365196800.0,
+            "92": 365196800.0,
+            "93": 365196800.0,
+            "94": 365196800.0,
+            "95": 365196800.0,
+            "96": 365196800.0,
+            "97": 365196800.0,
+            "98": 365196800.0,
+            "99": 365196800.0,
+            "100": 365196800.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 1109505024.0,
+            "52": 1109506560.0,
+            "53": 1109506560.0,
+            "54": 1109506560.0,
+            "55": 1109506560.0,
+            "56": 1109506560.0,
+            "57": 1109506560.0,
+            "58": 1109506560.0,
+            "59": 1109506560.0,
+            "60": 1109506560.0,
+            "61": 1109506560.0,
+            "62": 1109506560.0,
+            "63": 1109506560.0,
+            "64": 1109506560.0,
+            "65": 1109506560.0,
+            "66": 1109506560.0,
+            "67": 1109506560.0,
+            "68": 1109506560.0,
+            "69": 1109506560.0,
+            "70": 1109506560.0,
+            "71": 1109506560.0,
+            "72": 1109506560.0,
+            "73": 1109506560.0,
+            "74": 1109506560.0,
+            "75": 1109506560.0,
+            "76": 1109506560.0,
+            "77": 1109506560.0,
+            "78": 1109506560.0,
+            "79": 1109506560.0,
+            "80": 1109506560.0,
+            "81": 1109506560.0,
+            "82": 1109506560.0,
+            "83": 1109506560.0,
+            "84": 1109506560.0,
+            "85": 1109506560.0,
+            "86": 1109506560.0,
+            "87": 1109506560.0,
+            "88": 1109506560.0,
+            "89": 1109506560.0,
+            "90": 1109506560.0,
+            "91": 1109506560.0,
+            "92": 1109506560.0,
+            "93": 1109506560.0,
+            "94": 1109506560.0,
+            "95": 1109506560.0,
+            "96": 1109506560.0,
+            "97": 1109506560.0,
+            "98": 1109506560.0,
+            "99": 1109506560.0,
+            "100": 1109506560.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 7.93427,
+            "52": 0.21812,
+            "53": 0.185,
+            "54": 0.18425,
+            "55": 0.18174,
+            "56": 0.18062,
+            "57": 0.17992,
+            "58": 0.17969,
+            "59": 0.18021,
+            "60": 0.18245,
+            "61": 0.18025,
+            "62": 0.18048,
+            "63": 0.18064,
+            "64": 0.18121,
+            "65": 0.17955,
+            "66": 0.18229,
+            "67": 0.17924,
+            "68": 0.18046,
+            "69": 0.18052,
+            "70": 0.17985,
+            "71": 0.18045,
+            "72": 0.17993,
+            "73": 0.17909,
+            "74": 0.18421,
+            "75": 0.18068,
+            "76": 0.18347,
+            "77": 0.18157,
+            "78": 0.18084,
+            "79": 0.17981,
+            "80": 0.17936,
+            "81": 0.17999,
+            "82": 0.18094,
+            "83": 0.17982,
+            "84": 0.18317,
+            "85": 0.18036,
+            "86": 0.1809,
+            "87": 0.17889,
+            "88": 0.17894,
+            "89": 0.17919,
+            "90": 0.17925,
+            "91": 0.17923,
+            "92": 0.17791,
+            "93": 0.17995,
+            "94": 0.17922,
+            "95": 0.17997,
+            "96": 0.17959,
+            "97": 0.1793,
+            "98": 0.1799,
+            "99": 0.17942,
+            "100": 0.17849
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgx_a100.json
index 0cc3719ac53..1e42aa887f6 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgx_a100.json
@@ -325,106 +325,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 971619840.0,
-            "2": 1036172800.0,
-            "3": 1036172800.0,
-            "4": 1036172800.0,
-            "5": 1036172800.0,
-            "6": 1036172800.0,
-            "7": 1036172800.0,
-            "8": 1036172800.0,
-            "9": 1036172800.0,
-            "10": 1036172800.0,
-            "11": 1036172800.0,
-            "12": 1036172800.0,
-            "13": 1036172800.0,
-            "14": 1036172800.0,
-            "15": 1036172800.0,
-            "16": 1036172800.0,
-            "17": 1036172800.0,
-            "18": 1036172800.0,
-            "19": 1036172800.0,
-            "20": 1036172800.0,
-            "21": 1036172800.0,
-            "22": 1036172800.0,
-            "23": 1036172800.0,
-            "24": 1036172800.0,
-            "25": 1036172800.0,
-            "26": 1036172800.0,
-            "27": 1036172800.0,
-            "28": 1036172800.0,
-            "29": 1036172800.0,
-            "30": 1036172800.0,
-            "31": 1036172800.0,
-            "32": 1036172800.0,
-            "33": 1036172800.0,
-            "34": 1036172800.0,
-            "35": 1036172800.0,
-            "36": 1036172800.0,
-            "37": 1036172800.0,
-            "38": 1036172800.0,
-            "39": 1036172800.0,
-            "40": 1036172800.0,
-            "41": 1036172800.0,
-            "42": 1036172800.0,
-            "43": 1036172800.0,
-            "44": 1036172800.0,
-            "45": 1036172800.0,
-            "46": 1036172800.0,
-            "47": 1036172800.0,
-            "48": 1036172800.0,
-            "49": 1036172800.0,
-            "50": 1036172800.0,
-            "51": 1036172800.0,
-            "52": 1036172800.0,
-            "53": 1036172800.0,
-            "54": 1036172800.0,
-            "55": 1036172800.0,
-            "56": 1036172800.0,
-            "57": 1036172800.0,
-            "58": 1036172800.0,
-            "59": 1036172800.0,
-            "60": 1036172800.0,
-            "61": 1036172800.0,
-            "62": 1036172800.0,
-            "63": 1036172800.0,
-            "64": 1036172800.0,
-            "65": 1036172800.0,
-            "66": 1036172800.0,
-            "67": 1036172800.0,
-            "68": 1036172800.0,
-            "69": 1036172800.0,
-            "70": 1036172800.0,
-            "71": 1036172800.0,
-            "72": 1036172800.0,
-            "73": 1036172800.0,
-            "74": 1036172800.0,
-            "75": 1036172800.0,
-            "76": 1036172800.0,
-            "77": 1036172800.0,
-            "78": 1036172800.0,
-            "79": 1036172800.0,
-            "80": 1036172800.0,
-            "81": 1036172800.0,
-            "82": 1036172800.0,
-            "83": 1036172800.0,
-            "84": 1036172800.0,
-            "85": 1036172800.0,
-            "86": 1036172800.0,
-            "87": 1036172800.0,
-            "88": 1036172800.0,
-            "89": 1036172800.0,
-            "90": 1036172800.0,
-            "91": 1036172800.0,
-            "92": 1036172800.0,
-            "93": 1036172800.0,
-            "94": 1036172800.0,
-            "95": 1036172800.0,
-            "96": 1036172800.0,
-            "97": 1036172800.0,
-            "98": 1036172800.0,
-            "99": 1036172800.0,
-            "100": 1036172800.0
+            "1": 968737280.0,
+            "2": 1035779584.0,
+            "3": 1035779584.0,
+            "4": 1035779584.0,
+            "5": 1035779584.0,
+            "6": 1035779584.0,
+            "7": 1035779584.0,
+            "8": 1035779584.0,
+            "9": 1035779584.0,
+            "10": 1035779584.0,
+            "11": 1035779584.0,
+            "12": 1035779584.0,
+            "13": 1035779584.0,
+            "14": 1035779584.0,
+            "15": 1035779584.0,
+            "16": 1035779584.0,
+            "17": 1035779584.0,
+            "18": 1035779584.0,
+            "19": 1035779584.0,
+            "20": 1035779584.0,
+            "21": 1035779584.0,
+            "22": 1035779584.0,
+            "23": 1035779584.0,
+            "24": 1035779584.0,
+            "25": 1035779584.0,
+            "26": 1035779584.0,
+            "27": 1035779584.0,
+            "28": 1035779584.0,
+            "29": 1035779584.0,
+            "30": 1035779584.0,
+            "31": 1035779584.0,
+            "32": 1035779584.0,
+            "33": 1035779584.0,
+            "34": 1035779584.0,
+            "35": 1035779584.0,
+            "36": 1035779584.0,
+            "37": 1035779584.0,
+            "38": 1035779584.0,
+            "39": 1035779584.0,
+            "40": 1035779584.0,
+            "41": 1035779584.0,
+            "42": 1035779584.0,
+            "43": 1035779584.0,
+            "44": 1035779584.0,
+            "45": 1035779584.0,
+            "46": 1035779584.0,
+            "47": 1035779584.0,
+            "48": 1035779584.0,
+            "49": 1035779584.0,
+            "50": 1035779584.0,
+            "51": 1035779584.0,
+            "52": 1035779584.0,
+            "53": 1035779584.0,
+            "54": 1035779584.0,
+            "55": 1035779584.0,
+            "56": 1035779584.0,
+            "57": 1035779584.0,
+            "58": 1035779584.0,
+            "59": 1035779584.0,
+            "60": 1035779584.0,
+            "61": 1035779584.0,
+            "62": 1035779584.0,
+            "63": 1035779584.0,
+            "64": 1035779584.0,
+            "65": 1035779584.0,
+            "66": 1035779584.0,
+            "67": 1035779584.0,
+            "68": 1035779584.0,
+            "69": 1035779584.0,
+            "70": 1035779584.0,
+            "71": 1035779584.0,
+            "72": 1035779584.0,
+            "73": 1035779584.0,
+            "74": 1035779584.0,
+            "75": 1035779584.0,
+            "76": 1035779584.0,
+            "77": 1035779584.0,
+            "78": 1035779584.0,
+            "79": 1035779584.0,
+            "80": 1035779584.0,
+            "81": 1035779584.0,
+            "82": 1035779584.0,
+            "83": 1035779584.0,
+            "84": 1035779584.0,
+            "85": 1035779584.0,
+            "86": 1035779584.0,
+            "87": 1035779584.0,
+            "88": 1035779584.0,
+            "89": 1035779584.0,
+            "90": 1035779584.0,
+            "91": 1035779584.0,
+            "92": 1035779584.0,
+            "93": 1035779584.0,
+            "94": 1035779584.0,
+            "95": 1035779584.0,
+            "96": 1035779584.0,
+            "97": 1035779584.0,
+            "98": 1035779584.0,
+            "99": 1035779584.0,
+            "100": 1035779584.0
         }
     },
     "iteration-time": {
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 7.22987,
-            "2": 0.54363,
-            "3": 0.2879,
-            "4": 0.28745,
-            "5": 0.28509,
-            "6": 0.28364,
-            "7": 0.28401,
-            "8": 0.28235,
-            "9": 0.28321,
-            "10": 0.32258,
-            "11": 0.28697,
-            "12": 0.27808,
-            "13": 0.27857,
-            "14": 0.27833,
-            "15": 0.28035,
-            "16": 0.27859,
-            "17": 0.27841,
-            "18": 0.27879,
-            "19": 0.27874,
-            "20": 0.27806,
-            "21": 0.27812,
-            "22": 0.2783,
-            "23": 0.27919,
-            "24": 0.27841,
-            "25": 0.27852,
-            "26": 0.27871,
-            "27": 0.27891,
-            "28": 0.28056,
-            "29": 0.27909,
-            "30": 0.2797,
-            "31": 0.27903,
-            "32": 0.27895,
-            "33": 0.27929,
-            "34": 0.27838,
-            "35": 0.27904,
-            "36": 0.2787,
-            "37": 0.28662,
-            "38": 0.27812,
-            "39": 0.27805,
-            "40": 0.27846,
-            "41": 0.27884,
-            "42": 0.27807,
-            "43": 0.27794,
-            "44": 0.27825,
-            "45": 0.28052,
-            "46": 0.27856,
-            "47": 0.27832,
-            "48": 0.27799,
-            "49": 0.2783,
-            "50": 0.27861,
-            "51": 0.2915,
-            "52": 0.28668,
-            "53": 0.28545,
-            "54": 0.28632,
-            "55": 0.28616,
-            "56": 0.28735,
-            "57": 0.28738,
-            "58": 0.28556,
-            "59": 0.28453,
-            "60": 0.28543,
-            "61": 0.28452,
-            "62": 0.28404,
-            "63": 0.28542,
-            "64": 0.28492,
-            "65": 0.28488,
-            "66": 0.2861,
-            "67": 0.286,
-            "68": 0.28505,
-            "69": 0.28531,
-            "70": 0.28377,
-            "71": 0.28517,
-            "72": 0.28454,
-            "73": 0.2853,
-            "74": 0.28678,
-            "75": 0.28484,
-            "76": 0.28523,
-            "77": 0.28548,
-            "78": 0.28488,
-            "79": 0.28559,
-            "80": 0.28528,
-            "81": 0.28479,
-            "82": 0.28465,
-            "83": 0.28506,
-            "84": 0.28493,
-            "85": 0.28486,
-            "86": 0.28572,
-            "87": 0.28404,
-            "88": 0.28473,
-            "89": 0.28431,
-            "90": 0.28945,
-            "91": 0.28446,
-            "92": 0.28489,
-            "93": 0.28474,
-            "94": 0.28484,
-            "95": 0.28526,
-            "96": 0.28573,
-            "97": 0.28411,
-            "98": 0.28402,
-            "99": 0.28413,
-            "100": 0.28454
+            "1": 3.63869,
+            "2": 0.35485,
+            "3": 0.2965,
+            "4": 0.28503,
+            "5": 0.28544,
+            "6": 0.284,
+            "7": 0.28704,
+            "8": 0.28585,
+            "9": 0.286,
+            "10": 0.2866,
+            "11": 0.28746,
+            "12": 0.28519,
+            "13": 0.28493,
+            "14": 0.28132,
+            "15": 0.2846,
+            "16": 0.28078,
+            "17": 0.28134,
+            "18": 0.28108,
+            "19": 0.2801,
+            "20": 0.2818,
+            "21": 0.284,
+            "22": 0.28379,
+            "23": 0.27982,
+            "24": 0.2809,
+            "25": 0.28033,
+            "26": 0.2874,
+            "27": 0.28134,
+            "28": 0.28215,
+            "29": 0.28078,
+            "30": 0.28261,
+            "31": 0.28205,
+            "32": 0.28244,
+            "33": 0.28032,
+            "34": 0.2817,
+            "35": 0.28205,
+            "36": 0.28735,
+            "37": 0.2784,
+            "38": 0.27979,
+            "39": 0.28067,
+            "40": 0.28107,
+            "41": 0.27649,
+            "42": 0.27759,
+            "43": 0.27572,
+            "44": 0.27583,
+            "45": 0.27792,
+            "46": 0.27869,
+            "47": 0.2795,
+            "48": 0.2786,
+            "49": 0.27878,
+            "50": 0.28026,
+            "51": 0.28359,
+            "52": 0.27724,
+            "53": 0.2767,
+            "54": 0.2768,
+            "55": 0.27579,
+            "56": 0.27548,
+            "57": 0.27664,
+            "58": 0.27959,
+            "59": 0.27651,
+            "60": 0.27706,
+            "61": 0.2749,
+            "62": 0.27575,
+            "63": 0.27689,
+            "64": 0.27661,
+            "65": 0.27463,
+            "66": 0.27502,
+            "67": 0.27556,
+            "68": 0.27753,
+            "69": 0.27586,
+            "70": 0.27562,
+            "71": 0.27486,
+            "72": 0.27586,
+            "73": 0.27532,
+            "74": 0.27545,
+            "75": 0.27539,
+            "76": 0.27606,
+            "77": 0.27649,
+            "78": 0.27585,
+            "79": 0.27645,
+            "80": 0.27617,
+            "81": 0.27569,
+            "82": 0.276,
+            "83": 0.27704,
+            "84": 0.27698,
+            "85": 0.27571,
+            "86": 0.27734,
+            "87": 0.27615,
+            "88": 0.2754,
+            "89": 0.27602,
+            "90": 0.27562,
+            "91": 0.27544,
+            "92": 0.27569,
+            "93": 0.27668,
+            "94": 0.27578,
+            "95": 0.27544,
+            "96": 0.27608,
+            "97": 0.27604,
+            "98": 0.2754,
+            "99": 0.2768,
+            "100": 0.27965
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgx_a100_2nd.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgx_a100_2nd.json
new file mode 100644
index 00000000000..3f4651acab9
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgx_a100_2nd.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 9.85065,
+            "52": 9.7464,
+            "53": 10.07271,
+            "54": 9.95757,
+            "55": 9.87725,
+            "56": 9.62951,
+            "57": 9.48816,
+            "58": 9.83239,
+            "59": 9.58985,
+            "60": 9.50827,
+            "61": 9.6947,
+            "62": 9.99304,
+            "63": 9.37511,
+            "64": 9.77996,
+            "65": 8.95215,
+            "66": 9.71323,
+            "67": 9.37884,
+            "68": 9.78794,
+            "69": 9.79078,
+            "70": 9.7308,
+            "71": 9.61793,
+            "72": 9.59094,
+            "73": 9.49435,
+            "74": 8.94865,
+            "75": 9.43606,
+            "76": 9.09894,
+            "77": 10.06437,
+            "78": 9.73006,
+            "79": 9.37771,
+            "80": 9.41266,
+            "81": 9.4854,
+            "82": 9.69576,
+            "83": 9.32017,
+            "84": 9.42235,
+            "85": 9.61578,
+            "86": 9.07218,
+            "87": 9.59328,
+            "88": 9.7509,
+            "89": 9.61159,
+            "90": 9.82148,
+            "91": 9.35304,
+            "92": 9.36254,
+            "93": 9.08747,
+            "94": 8.83398,
+            "95": 9.51923,
+            "96": 9.52595,
+            "97": 9.31413,
+            "98": 9.67414,
+            "99": 8.88869,
+            "100": 9.40651
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 2873.0,
+            "52": 2946.0,
+            "53": 3158.0,
+            "54": 2907.0,
+            "55": 2740.0,
+            "56": 3029.0,
+            "57": 2489.0,
+            "58": 3327.0,
+            "59": 3042.0,
+            "60": 2780.0,
+            "61": 3302.0,
+            "62": 2961.0,
+            "63": 2702.0,
+            "64": 3318.0,
+            "65": 2909.0,
+            "66": 3513.0,
+            "67": 2959.0,
+            "68": 2963.0,
+            "69": 3171.0,
+            "70": 3547.0,
+            "71": 3246.0,
+            "72": 2586.0,
+            "73": 3301.0,
+            "74": 2135.0,
+            "75": 2752.0,
+            "76": 3275.0,
+            "77": 3648.0,
+            "78": 3472.0,
+            "79": 3536.0,
+            "80": 3685.0,
+            "81": 4159.0,
+            "82": 3488.0,
+            "83": 3179.0,
+            "84": 3639.0,
+            "85": 3631.0,
+            "86": 3045.0,
+            "87": 4315.0,
+            "88": 3481.0,
+            "89": 3819.0,
+            "90": 3323.0,
+            "91": 3014.0,
+            "92": 3581.0,
+            "93": 2932.0,
+            "94": 3715.0,
+            "95": 3593.0,
+            "96": 3764.0,
+            "97": 3582.0,
+            "98": 3998.0,
+            "99": 3406.0,
+            "100": 3521.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 335835648.0,
+            "52": 335835648.0,
+            "53": 335835648.0,
+            "54": 335835648.0,
+            "55": 335835648.0,
+            "56": 335835648.0,
+            "57": 335835648.0,
+            "58": 335835648.0,
+            "59": 335835648.0,
+            "60": 335835648.0,
+            "61": 335835648.0,
+            "62": 335835648.0,
+            "63": 335835648.0,
+            "64": 335835648.0,
+            "65": 335835648.0,
+            "66": 335835648.0,
+            "67": 335835648.0,
+            "68": 335835648.0,
+            "69": 335835648.0,
+            "70": 335835648.0,
+            "71": 335835648.0,
+            "72": 335835648.0,
+            "73": 335835648.0,
+            "74": 335835648.0,
+            "75": 335835648.0,
+            "76": 335835648.0,
+            "77": 335835648.0,
+            "78": 335835648.0,
+            "79": 335835648.0,
+            "80": 335835648.0,
+            "81": 335835648.0,
+            "82": 335835648.0,
+            "83": 335835648.0,
+            "84": 335835648.0,
+            "85": 335835648.0,
+            "86": 335835648.0,
+            "87": 335835648.0,
+            "88": 335835648.0,
+            "89": 335835648.0,
+            "90": 335835648.0,
+            "91": 335835648.0,
+            "92": 335835648.0,
+            "93": 335835648.0,
+            "94": 335835648.0,
+            "95": 335835648.0,
+            "96": 335835648.0,
+            "97": 335835648.0,
+            "98": 335835648.0,
+            "99": 335835648.0,
+            "100": 335835648.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 1102819840.0,
+            "52": 1102820864.0,
+            "53": 1102820864.0,
+            "54": 1102820864.0,
+            "55": 1102820864.0,
+            "56": 1102820864.0,
+            "57": 1102820864.0,
+            "58": 1102820864.0,
+            "59": 1102820864.0,
+            "60": 1102820864.0,
+            "61": 1102820864.0,
+            "62": 1102820864.0,
+            "63": 1102820864.0,
+            "64": 1102820864.0,
+            "65": 1102820864.0,
+            "66": 1102820864.0,
+            "67": 1102820864.0,
+            "68": 1102820864.0,
+            "69": 1102820864.0,
+            "70": 1102820864.0,
+            "71": 1102820864.0,
+            "72": 1102820864.0,
+            "73": 1102820864.0,
+            "74": 1102820864.0,
+            "75": 1102820864.0,
+            "76": 1102820864.0,
+            "77": 1102820864.0,
+            "78": 1102820864.0,
+            "79": 1102820864.0,
+            "80": 1102820864.0,
+            "81": 1102820864.0,
+            "82": 1102820864.0,
+            "83": 1102820864.0,
+            "84": 1102820864.0,
+            "85": 1102820864.0,
+            "86": 1102820864.0,
+            "87": 1102820864.0,
+            "88": 1102820864.0,
+            "89": 1102820864.0,
+            "90": 1102820864.0,
+            "91": 1102820864.0,
+            "92": 1102820864.0,
+            "93": 1102820864.0,
+            "94": 1102820864.0,
+            "95": 1102820864.0,
+            "96": 1102820864.0,
+            "97": 1102820864.0,
+            "98": 1102820864.0,
+            "99": 1102820864.0,
+            "100": 1102820864.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 3.5579,
+            "52": 0.32293,
+            "53": 0.28783,
+            "54": 0.28913,
+            "55": 0.28732,
+            "56": 0.28223,
+            "57": 0.28119,
+            "58": 0.27795,
+            "59": 0.27722,
+            "60": 0.2792,
+            "61": 0.27899,
+            "62": 0.27773,
+            "63": 0.27717,
+            "64": 0.27611,
+            "65": 0.275,
+            "66": 0.27585,
+            "67": 0.27453,
+            "68": 0.27615,
+            "69": 0.27494,
+            "70": 0.27615,
+            "71": 0.27345,
+            "72": 0.27521,
+            "73": 0.27345,
+            "74": 0.27408,
+            "75": 0.27342,
+            "76": 0.27402,
+            "77": 0.27422,
+            "78": 0.27428,
+            "79": 0.27445,
+            "80": 0.27343,
+            "81": 0.27423,
+            "82": 0.27491,
+            "83": 0.27456,
+            "84": 0.27288,
+            "85": 0.27478,
+            "86": 0.27469,
+            "87": 0.27542,
+            "88": 0.27502,
+            "89": 0.27521,
+            "90": 0.27591,
+            "91": 0.27499,
+            "92": 0.27376,
+            "93": 0.27416,
+            "94": 0.27576,
+            "95": 0.27431,
+            "96": 0.27449,
+            "97": 0.27428,
+            "98": 0.27432,
+            "99": 0.2742,
+            "100": 0.27503
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_gb200.json
new file mode 100644
index 00000000000..e52665efa28
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_gb200.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.83993,
+            "2": 10.85182,
+            "3": 10.84166,
+            "4": 10.84441,
+            "5": 10.85514,
+            "6": 10.86428,
+            "7": 10.85243,
+            "8": 10.84464,
+            "9": 10.84864,
+            "10": 10.81333,
+            "11": 10.85638,
+            "12": 10.84233,
+            "13": 10.86046,
+            "14": 10.84976,
+            "15": 10.81618,
+            "16": 10.80886,
+            "17": 10.78242,
+            "18": 10.79155,
+            "19": 10.79495,
+            "20": 10.7055,
+            "21": 10.6978,
+            "22": 10.58349,
+            "23": 10.69268,
+            "24": 10.60558,
+            "25": 10.56742,
+            "26": 10.61456,
+            "27": 10.6067,
+            "28": 10.55905,
+            "29": 10.56526,
+            "30": 10.37918,
+            "31": 10.16276,
+            "32": 10.45543,
+            "33": 10.45037,
+            "34": 10.23993,
+            "35": 10.27354,
+            "36": 10.24224,
+            "37": 10.34559,
+            "38": 10.21738,
+            "39": 10.39453,
+            "40": 10.095,
+            "41": 10.15093,
+            "42": 10.21235,
+            "43": 9.87982,
+            "44": 9.97875,
+            "45": 9.85588,
+            "46": 9.83349,
+            "47": 10.14101,
+            "48": 9.86418,
+            "49": 9.55509,
+            "50": 9.91636,
+            "51": 9.86104,
+            "52": 9.75109,
+            "53": 10.06631,
+            "54": 9.95634,
+            "55": 9.89354,
+            "56": 9.637,
+            "57": 9.49142,
+            "58": 9.8341,
+            "59": 9.5931,
+            "60": 9.51379,
+            "61": 9.69183,
+            "62": 9.99162,
+            "63": 9.39196,
+            "64": 9.77455,
+            "65": 8.96319,
+            "66": 9.70663,
+            "67": 9.3789,
+            "68": 9.78328,
+            "69": 9.79736,
+            "70": 9.73753,
+            "71": 9.62711,
+            "72": 9.58907,
+            "73": 9.50446,
+            "74": 8.94975,
+            "75": 9.4278,
+            "76": 9.08764,
+            "77": 10.06759,
+            "78": 9.72141,
+            "79": 9.3861,
+            "80": 9.40495,
+            "81": 9.48596,
+            "82": 9.70195,
+            "83": 9.31553,
+            "84": 9.41806,
+            "85": 9.61378,
+            "86": 9.08145,
+            "87": 9.59631,
+            "88": 9.75008,
+            "89": 9.60386,
+            "90": 9.82838,
+            "91": 9.33622,
+            "92": 9.35764,
+            "93": 9.08795,
+            "94": 8.83437,
+            "95": 9.53352,
+            "96": 9.53315,
+            "97": 9.31129,
+            "98": 9.67176,
+            "99": 8.89816,
+            "100": 9.40969
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1621.0,
+            "2": 1756.0,
+            "3": 1698.0,
+            "4": 1764.0,
+            "5": 2045.0,
+            "6": 1927.0,
+            "7": 1901.0,
+            "8": 1768.0,
+            "9": 1823.0,
+            "10": 1456.0,
+            "11": 1884.0,
+            "12": 1834.0,
+            "13": 2003.0,
+            "14": 1786.0,
+            "15": 1879.0,
+            "16": 1948.0,
+            "17": 1849.0,
+            "18": 1718.0,
+            "19": 1870.0,
+            "20": 1750.0,
+            "21": 1977.0,
+            "22": 1741.0,
+            "23": 1946.0,
+            "24": 1642.0,
+            "25": 1636.0,
+            "26": 1817.0,
+            "27": 1926.0,
+            "28": 1981.0,
+            "29": 1993.0,
+            "30": 1929.0,
+            "31": 1630.0,
+            "32": 1896.0,
+            "33": 2115.0,
+            "34": 1824.0,
+            "35": 1960.0,
+            "36": 1935.0,
+            "37": 2410.0,
+            "38": 2259.0,
+            "39": 2428.0,
+            "40": 2119.0,
+            "41": 2278.0,
+            "42": 2118.0,
+            "43": 1992.0,
+            "44": 2041.0,
+            "45": 1992.0,
+            "46": 2158.0,
+            "47": 2416.0,
+            "48": 2338.0,
+            "49": 2315.0,
+            "50": 2242.0,
+            "51": 2431.0,
+            "52": 2467.0,
+            "53": 2794.0,
+            "54": 2675.0,
+            "55": 2313.0,
+            "56": 2597.0,
+            "57": 2278.0,
+            "58": 2887.0,
+            "59": 2701.0,
+            "60": 2190.0,
+            "61": 2764.0,
+            "62": 2576.0,
+            "63": 2405.0,
+            "64": 2903.0,
+            "65": 2516.0,
+            "66": 2885.0,
+            "67": 2700.0,
+            "68": 2682.0,
+            "69": 2987.0,
+            "70": 3141.0,
+            "71": 3055.0,
+            "72": 2413.0,
+            "73": 2864.0,
+            "74": 1870.0,
+            "75": 2450.0,
+            "76": 3032.0,
+            "77": 3230.0,
+            "78": 3125.0,
+            "79": 2982.0,
+            "80": 3203.0,
+            "81": 3657.0,
+            "82": 3174.0,
+            "83": 2818.0,
+            "84": 3190.0,
+            "85": 3166.0,
+            "86": 2793.0,
+            "87": 3635.0,
+            "88": 3005.0,
+            "89": 3373.0,
+            "90": 3066.0,
+            "91": 2857.0,
+            "92": 3080.0,
+            "93": 2533.0,
+            "94": 3303.0,
+            "95": 3270.0,
+            "96": 3416.0,
+            "97": 3085.0,
+            "98": 3437.0,
+            "99": 3243.0,
+            "100": 3119.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 299204096.0,
+            "2": 299204096.0,
+            "3": 299204096.0,
+            "4": 299204096.0,
+            "5": 299204096.0,
+            "6": 299204096.0,
+            "7": 299204096.0,
+            "8": 299204096.0,
+            "9": 299204096.0,
+            "10": 299204096.0,
+            "11": 299204096.0,
+            "12": 299204096.0,
+            "13": 299204096.0,
+            "14": 299204096.0,
+            "15": 299204096.0,
+            "16": 299204096.0,
+            "17": 299204096.0,
+            "18": 299204096.0,
+            "19": 299204096.0,
+            "20": 299204096.0,
+            "21": 299204096.0,
+            "22": 299204096.0,
+            "23": 299204096.0,
+            "24": 299204096.0,
+            "25": 299204096.0,
+            "26": 299204096.0,
+            "27": 299204096.0,
+            "28": 299204096.0,
+            "29": 299204096.0,
+            "30": 299204096.0,
+            "31": 299204096.0,
+            "32": 299204096.0,
+            "33": 299204096.0,
+            "34": 299204096.0,
+            "35": 299204096.0,
+            "36": 299204096.0,
+            "37": 299204096.0,
+            "38": 299204096.0,
+            "39": 299204096.0,
+            "40": 299204096.0,
+            "41": 299204096.0,
+            "42": 299204096.0,
+            "43": 299204096.0,
+            "44": 299204096.0,
+            "45": 299204096.0,
+            "46": 299204096.0,
+            "47": 299204096.0,
+            "48": 299204096.0,
+            "49": 299204096.0,
+            "50": 299204096.0,
+            "51": 299204096.0,
+            "52": 299204096.0,
+            "53": 299204096.0,
+            "54": 299204096.0,
+            "55": 299204096.0,
+            "56": 299204096.0,
+            "57": 299204096.0,
+            "58": 299204096.0,
+            "59": 299204096.0,
+            "60": 299204096.0,
+            "61": 299204096.0,
+            "62": 299204096.0,
+            "63": 299204096.0,
+            "64": 299204096.0,
+            "65": 299204096.0,
+            "66": 299204096.0,
+            "67": 299204096.0,
+            "68": 299204096.0,
+            "69": 299204096.0,
+            "70": 299204096.0,
+            "71": 299204096.0,
+            "72": 299204096.0,
+            "73": 299204096.0,
+            "74": 299204096.0,
+            "75": 299204096.0,
+            "76": 299204096.0,
+            "77": 299204096.0,
+            "78": 299204096.0,
+            "79": 299204096.0,
+            "80": 299204096.0,
+            "81": 299204096.0,
+            "82": 299204096.0,
+            "83": 299204096.0,
+            "84": 299204096.0,
+            "85": 299204096.0,
+            "86": 299204096.0,
+            "87": 299204096.0,
+            "88": 299204096.0,
+            "89": 299204096.0,
+            "90": 299204096.0,
+            "91": 299204096.0,
+            "92": 299204096.0,
+            "93": 299204096.0,
+            "94": 299204096.0,
+            "95": 299204096.0,
+            "96": 299204096.0,
+            "97": 299204096.0,
+            "98": 299204096.0,
+            "99": 299204096.0,
+            "100": 299204096.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 999540224.0,
+            "2": 1065140736.0,
+            "3": 1065140736.0,
+            "4": 1065140736.0,
+            "5": 1065140736.0,
+            "6": 1065140736.0,
+            "7": 1065140736.0,
+            "8": 1065140736.0,
+            "9": 1065140736.0,
+            "10": 1065140736.0,
+            "11": 1065140736.0,
+            "12": 1065140736.0,
+            "13": 1065140736.0,
+            "14": 1065140736.0,
+            "15": 1065140736.0,
+            "16": 1065140736.0,
+            "17": 1065140736.0,
+            "18": 1065140736.0,
+            "19": 1065140736.0,
+            "20": 1065140736.0,
+            "21": 1065140736.0,
+            "22": 1065140736.0,
+            "23": 1065140736.0,
+            "24": 1065140736.0,
+            "25": 1065140736.0,
+            "26": 1065140736.0,
+            "27": 1065140736.0,
+            "28": 1065140736.0,
+            "29": 1065140736.0,
+            "30": 1065140736.0,
+            "31": 1065140736.0,
+            "32": 1065140736.0,
+            "33": 1065140736.0,
+            "34": 1065140736.0,
+            "35": 1065140736.0,
+            "36": 1065140736.0,
+            "37": 1065140736.0,
+            "38": 1065140736.0,
+            "39": 1065140736.0,
+            "40": 1065140736.0,
+            "41": 1065140736.0,
+            "42": 1065140736.0,
+            "43": 1065140736.0,
+            "44": 1065140736.0,
+            "45": 1065140736.0,
+            "46": 1065140736.0,
+            "47": 1065140736.0,
+            "48": 1065140736.0,
+            "49": 1065140736.0,
+            "50": 1065140736.0,
+            "51": 1065140736.0,
+            "52": 1065140736.0,
+            "53": 1065140736.0,
+            "54": 1065140736.0,
+            "55": 1065140736.0,
+            "56": 1065140736.0,
+            "57": 1065140736.0,
+            "58": 1065140736.0,
+            "59": 1065140736.0,
+            "60": 1065140736.0,
+            "61": 1065140736.0,
+            "62": 1065140736.0,
+            "63": 1065140736.0,
+            "64": 1065140736.0,
+            "65": 1065140736.0,
+            "66": 1065140736.0,
+            "67": 1065140736.0,
+            "68": 1065140736.0,
+            "69": 1065140736.0,
+            "70": 1065140736.0,
+            "71": 1065140736.0,
+            "72": 1065140736.0,
+            "73": 1065140736.0,
+            "74": 1065140736.0,
+            "75": 1065140736.0,
+            "76": 1065140736.0,
+            "77": 1065140736.0,
+            "78": 1065140736.0,
+            "79": 1065140736.0,
+            "80": 1065140736.0,
+            "81": 1065140736.0,
+            "82": 1065140736.0,
+            "83": 1065140736.0,
+            "84": 1065140736.0,
+            "85": 1065140736.0,
+            "86": 1065140736.0,
+            "87": 1065140736.0,
+            "88": 1065140736.0,
+            "89": 1065140736.0,
+            "90": 1065140736.0,
+            "91": 1065140736.0,
+            "92": 1065140736.0,
+            "93": 1065140736.0,
+            "94": 1065140736.0,
+            "95": 1065140736.0,
+            "96": 1065140736.0,
+            "97": 1065140736.0,
+            "98": 1065140736.0,
+            "99": 1065140736.0,
+            "100": 1065140736.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 5.708,
+            "2": 0.35089,
+            "3": 0.30496,
+            "4": 0.29651,
+            "5": 0.29659,
+            "6": 0.29472,
+            "7": 0.29503,
+            "8": 0.29691,
+            "9": 0.29322,
+            "10": 0.29593,
+            "11": 0.29761,
+            "12": 0.30107,
+            "13": 0.29648,
+            "14": 0.29634,
+            "15": 0.29469,
+            "16": 0.29524,
+            "17": 0.29548,
+            "18": 0.29571,
+            "19": 0.29611,
+            "20": 0.29461,
+            "21": 0.60922,
+            "22": 0.29063,
+            "23": 0.29253,
+            "24": 0.29221,
+            "25": 0.35076,
+            "26": 0.35448,
+            "27": 0.40801,
+            "28": 0.32376,
+            "29": 0.37315,
+            "30": 0.36741,
+            "31": 0.30484,
+            "32": 0.31503,
+            "33": 0.33111,
+            "34": 0.33501,
+            "35": 0.34146,
+            "36": 0.33794,
+            "37": 0.3366,
+            "38": 0.34,
+            "39": 0.38047,
+            "40": 0.34724,
+            "41": 0.34541,
+            "42": 0.34988,
+            "43": 0.34614,
+            "44": 0.34763,
+            "45": 0.34809,
+            "46": 0.3476,
+            "47": 0.34789,
+            "48": 0.34502,
+            "49": 0.34682,
+            "50": 0.34684,
+            "51": 0.32661,
+            "52": 0.30335,
+            "53": 0.30141,
+            "54": 0.30091,
+            "55": 0.30835,
+            "56": 0.30212,
+            "57": 0.29749,
+            "58": 0.29597,
+            "59": 0.29872,
+            "60": 0.29657,
+            "61": 0.2928,
+            "62": 0.29426,
+            "63": 0.29212,
+            "64": 0.29342,
+            "65": 0.2952,
+            "66": 0.30066,
+            "67": 0.32851,
+            "68": 0.32899,
+            "69": 0.30542,
+            "70": 0.29401,
+            "71": 0.2933,
+            "72": 0.2929,
+            "73": 0.29695,
+            "74": 0.29676,
+            "75": 0.2973,
+            "76": 0.29472,
+            "77": 0.29643,
+            "78": 0.29471,
+            "79": 0.29414,
+            "80": 0.29496,
+            "81": 0.2934,
+            "82": 0.2937,
+            "83": 0.29466,
+            "84": 0.29244,
+            "85": 0.29464,
+            "86": 0.29497,
+            "87": 0.29568,
+            "88": 0.29595,
+            "89": 0.29485,
+            "90": 0.29357,
+            "91": 0.29468,
+            "92": 0.29513,
+            "93": 0.29741,
+            "94": 0.29444,
+            "95": 0.29584,
+            "96": 0.29461,
+            "97": 0.29375,
+            "98": 0.29414,
+            "99": 0.29269,
+            "100": 0.29041
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_h100.json
index 6937fb9bd55..2d2d349a867 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_h100.json
@@ -218,106 +218,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 299203072.0,
-            "2": 299203072.0,
-            "3": 299203072.0,
-            "4": 299203072.0,
-            "5": 299203072.0,
-            "6": 299203072.0,
-            "7": 299203072.0,
-            "8": 299203072.0,
-            "9": 299203072.0,
-            "10": 299203072.0,
-            "11": 299203072.0,
-            "12": 299203072.0,
-            "13": 299203072.0,
-            "14": 299203072.0,
-            "15": 299203072.0,
-            "16": 299203072.0,
-            "17": 299203072.0,
-            "18": 299203072.0,
-            "19": 299203072.0,
-            "20": 299203072.0,
-            "21": 299203072.0,
-            "22": 299203072.0,
-            "23": 299203072.0,
-            "24": 299203072.0,
-            "25": 299203072.0,
-            "26": 299203072.0,
-            "27": 299203072.0,
-            "28": 299203072.0,
-            "29": 299203072.0,
-            "30": 299203072.0,
-            "31": 299203072.0,
-            "32": 299203072.0,
-            "33": 299203072.0,
-            "34": 299203072.0,
-            "35": 299203072.0,
-            "36": 299203072.0,
-            "37": 299203072.0,
-            "38": 299203072.0,
-            "39": 299203072.0,
-            "40": 299203072.0,
-            "41": 299203072.0,
-            "42": 299203072.0,
-            "43": 299203072.0,
-            "44": 299203072.0,
-            "45": 299203072.0,
-            "46": 299203072.0,
-            "47": 299203072.0,
-            "48": 299203072.0,
-            "49": 299203072.0,
-            "50": 299203072.0,
-            "51": 299203072.0,
-            "52": 299203072.0,
-            "53": 299203072.0,
-            "54": 299203072.0,
-            "55": 299203072.0,
-            "56": 299203072.0,
-            "57": 299203072.0,
-            "58": 299203072.0,
-            "59": 299203072.0,
-            "60": 299203072.0,
-            "61": 299203072.0,
-            "62": 299203072.0,
-            "63": 299203072.0,
-            "64": 299203072.0,
-            "65": 299203072.0,
-            "66": 299203072.0,
-            "67": 299203072.0,
-            "68": 299203072.0,
-            "69": 299203072.0,
-            "70": 299203072.0,
-            "71": 299203072.0,
-            "72": 299203072.0,
-            "73": 299203072.0,
-            "74": 299203072.0,
-            "75": 299203072.0,
-            "76": 299203072.0,
-            "77": 299203072.0,
-            "78": 299203072.0,
-            "79": 299203072.0,
-            "80": 299203072.0,
-            "81": 299203072.0,
-            "82": 299203072.0,
-            "83": 299203072.0,
-            "84": 299203072.0,
-            "85": 299203072.0,
-            "86": 299203072.0,
-            "87": 299203072.0,
-            "88": 299203072.0,
-            "89": 299203072.0,
-            "90": 299203072.0,
-            "91": 299203072.0,
-            "92": 299203072.0,
-            "93": 299203072.0,
-            "94": 299203072.0,
-            "95": 299203072.0,
-            "96": 299203072.0,
-            "97": 299203072.0,
-            "98": 299203072.0,
-            "99": 299203072.0,
-            "100": 299203072.0
+            "1": 299204096.0,
+            "2": 299204096.0,
+            "3": 299204096.0,
+            "4": 299204096.0,
+            "5": 299204096.0,
+            "6": 299204096.0,
+            "7": 299204096.0,
+            "8": 299204096.0,
+            "9": 299204096.0,
+            "10": 299204096.0,
+            "11": 299204096.0,
+            "12": 299204096.0,
+            "13": 299204096.0,
+            "14": 299204096.0,
+            "15": 299204096.0,
+            "16": 299204096.0,
+            "17": 299204096.0,
+            "18": 299204096.0,
+            "19": 299204096.0,
+            "20": 299204096.0,
+            "21": 299204096.0,
+            "22": 299204096.0,
+            "23": 299204096.0,
+            "24": 299204096.0,
+            "25": 299204096.0,
+            "26": 299204096.0,
+            "27": 299204096.0,
+            "28": 299204096.0,
+            "29": 299204096.0,
+            "30": 299204096.0,
+            "31": 299204096.0,
+            "32": 299204096.0,
+            "33": 299204096.0,
+            "34": 299204096.0,
+            "35": 299204096.0,
+            "36": 299204096.0,
+            "37": 299204096.0,
+            "38": 299204096.0,
+            "39": 299204096.0,
+            "40": 299204096.0,
+            "41": 299204096.0,
+            "42": 299204096.0,
+            "43": 299204096.0,
+            "44": 299204096.0,
+            "45": 299204096.0,
+            "46": 299204096.0,
+            "47": 299204096.0,
+            "48": 299204096.0,
+            "49": 299204096.0,
+            "50": 299204096.0,
+            "51": 299204096.0,
+            "52": 299204096.0,
+            "53": 299204096.0,
+            "54": 299204096.0,
+            "55": 299204096.0,
+            "56": 299204096.0,
+            "57": 299204096.0,
+            "58": 299204096.0,
+            "59": 299204096.0,
+            "60": 299204096.0,
+            "61": 299204096.0,
+            "62": 299204096.0,
+            "63": 299204096.0,
+            "64": 299204096.0,
+            "65": 299204096.0,
+            "66": 299204096.0,
+            "67": 299204096.0,
+            "68": 299204096.0,
+            "69": 299204096.0,
+            "70": 299204096.0,
+            "71": 299204096.0,
+            "72": 299204096.0,
+            "73": 299204096.0,
+            "74": 299204096.0,
+            "75": 299204096.0,
+            "76": 299204096.0,
+            "77": 299204096.0,
+            "78": 299204096.0,
+            "79": 299204096.0,
+            "80": 299204096.0,
+            "81": 299204096.0,
+            "82": 299204096.0,
+            "83": 299204096.0,
+            "84": 299204096.0,
+            "85": 299204096.0,
+            "86": 299204096.0,
+            "87": 299204096.0,
+            "88": 299204096.0,
+            "89": 299204096.0,
+            "90": 299204096.0,
+            "91": 299204096.0,
+            "92": 299204096.0,
+            "93": 299204096.0,
+            "94": 299204096.0,
+            "95": 299204096.0,
+            "96": 299204096.0,
+            "97": 299204096.0,
+            "98": 299204096.0,
+            "99": 299204096.0,
+            "100": 299204096.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -325,106 +325,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 977125888.0,
-            "2": 1042071040.0,
-            "3": 1042071040.0,
-            "4": 1042071040.0,
-            "5": 1042071040.0,
-            "6": 1042071040.0,
-            "7": 1042071040.0,
-            "8": 1042071040.0,
-            "9": 1042071040.0,
-            "10": 1042071040.0,
-            "11": 1042071040.0,
-            "12": 1042071040.0,
-            "13": 1042071040.0,
-            "14": 1042071040.0,
-            "15": 1042071040.0,
-            "16": 1042071040.0,
-            "17": 1042071040.0,
-            "18": 1042071040.0,
-            "19": 1042071040.0,
-            "20": 1042071040.0,
-            "21": 1042071040.0,
-            "22": 1042071040.0,
-            "23": 1042071040.0,
-            "24": 1042071040.0,
-            "25": 1042071040.0,
-            "26": 1042071040.0,
-            "27": 1042071040.0,
-            "28": 1042071040.0,
-            "29": 1042071040.0,
-            "30": 1042071040.0,
-            "31": 1042071040.0,
-            "32": 1042071040.0,
-            "33": 1042071040.0,
-            "34": 1042071040.0,
-            "35": 1042071040.0,
-            "36": 1042071040.0,
-            "37": 1042071040.0,
-            "38": 1042071040.0,
-            "39": 1042071040.0,
-            "40": 1042071040.0,
-            "41": 1042071040.0,
-            "42": 1042071040.0,
-            "43": 1042071040.0,
-            "44": 1042071040.0,
-            "45": 1042071040.0,
-            "46": 1042071040.0,
-            "47": 1042071040.0,
-            "48": 1042071040.0,
-            "49": 1042071040.0,
-            "50": 1042071040.0,
-            "51": 1042071040.0,
-            "52": 1042071040.0,
-            "53": 1042071040.0,
-            "54": 1042071040.0,
-            "55": 1042071040.0,
-            "56": 1042071040.0,
-            "57": 1042071040.0,
-            "58": 1042071040.0,
-            "59": 1042071040.0,
-            "60": 1042071040.0,
-            "61": 1042071040.0,
-            "62": 1042071040.0,
-            "63": 1042071040.0,
-            "64": 1042071040.0,
-            "65": 1042071040.0,
-            "66": 1042071040.0,
-            "67": 1042071040.0,
-            "68": 1042071040.0,
-            "69": 1042071040.0,
-            "70": 1042071040.0,
-            "71": 1042071040.0,
-            "72": 1042071040.0,
-            "73": 1042071040.0,
-            "74": 1042071040.0,
-            "75": 1042071040.0,
-            "76": 1042071040.0,
-            "77": 1042071040.0,
-            "78": 1042071040.0,
-            "79": 1042071040.0,
-            "80": 1042071040.0,
-            "81": 1042071040.0,
-            "82": 1042071040.0,
-            "83": 1042071040.0,
-            "84": 1042071040.0,
-            "85": 1042071040.0,
-            "86": 1042071040.0,
-            "87": 1042071040.0,
-            "88": 1042071040.0,
-            "89": 1042071040.0,
-            "90": 1042071040.0,
-            "91": 1042071040.0,
-            "92": 1042071040.0,
-            "93": 1042071040.0,
-            "94": 1042071040.0,
-            "95": 1042071040.0,
-            "96": 1042071040.0,
-            "97": 1042071040.0,
-            "98": 1042071040.0,
-            "99": 1042071040.0,
-            "100": 1042071040.0
+            "1": 977520128.0,
+            "2": 1042465280.0,
+            "3": 1042465280.0,
+            "4": 1042465280.0,
+            "5": 1042465280.0,
+            "6": 1042465280.0,
+            "7": 1042465280.0,
+            "8": 1042465280.0,
+            "9": 1042465280.0,
+            "10": 1042465280.0,
+            "11": 1042465280.0,
+            "12": 1042465280.0,
+            "13": 1042465280.0,
+            "14": 1042465280.0,
+            "15": 1042465280.0,
+            "16": 1042465280.0,
+            "17": 1042465280.0,
+            "18": 1042465280.0,
+            "19": 1042465280.0,
+            "20": 1042465280.0,
+            "21": 1042465280.0,
+            "22": 1042465280.0,
+            "23": 1042465280.0,
+            "24": 1042465280.0,
+            "25": 1042465280.0,
+            "26": 1042465280.0,
+            "27": 1042465280.0,
+            "28": 1042465280.0,
+            "29": 1042465280.0,
+            "30": 1042465280.0,
+            "31": 1042465280.0,
+            "32": 1042465280.0,
+            "33": 1042465280.0,
+            "34": 1042465280.0,
+            "35": 1042465280.0,
+            "36": 1042465280.0,
+            "37": 1042465280.0,
+            "38": 1042465280.0,
+            "39": 1042465280.0,
+            "40": 1042465280.0,
+            "41": 1042465280.0,
+            "42": 1042465280.0,
+            "43": 1042465280.0,
+            "44": 1042465280.0,
+            "45": 1042465280.0,
+            "46": 1042465280.0,
+            "47": 1042465280.0,
+            "48": 1042465280.0,
+            "49": 1042465280.0,
+            "50": 1042465280.0,
+            "51": 1042465280.0,
+            "52": 1042465280.0,
+            "53": 1042465280.0,
+            "54": 1042465280.0,
+            "55": 1042465280.0,
+            "56": 1042465280.0,
+            "57": 1042465280.0,
+            "58": 1042465280.0,
+            "59": 1042465280.0,
+            "60": 1042465280.0,
+            "61": 1042465280.0,
+            "62": 1042465280.0,
+            "63": 1042465280.0,
+            "64": 1042465280.0,
+            "65": 1042465280.0,
+            "66": 1042465280.0,
+            "67": 1042465280.0,
+            "68": 1042465280.0,
+            "69": 1042465280.0,
+            "70": 1042465280.0,
+            "71": 1042465280.0,
+            "72": 1042465280.0,
+            "73": 1042465280.0,
+            "74": 1042465280.0,
+            "75": 1042465280.0,
+            "76": 1042465280.0,
+            "77": 1042465280.0,
+            "78": 1042465280.0,
+            "79": 1042465280.0,
+            "80": 1042465280.0,
+            "81": 1042465280.0,
+            "82": 1042465280.0,
+            "83": 1042465280.0,
+            "84": 1042465280.0,
+            "85": 1042465280.0,
+            "86": 1042465280.0,
+            "87": 1042465280.0,
+            "88": 1042465280.0,
+            "89": 1042465280.0,
+            "90": 1042465280.0,
+            "91": 1042465280.0,
+            "92": 1042465280.0,
+            "93": 1042465280.0,
+            "94": 1042465280.0,
+            "95": 1042465280.0,
+            "96": 1042465280.0,
+            "97": 1042465280.0,
+            "98": 1042465280.0,
+            "99": 1042465280.0,
+            "100": 1042465280.0
         }
     },
     "iteration-time": {
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 9.71841,
-            "2": 0.23136,
-            "3": 0.22493,
-            "4": 0.22779,
-            "5": 0.22663,
-            "6": 0.22036,
-            "7": 0.23806,
-            "8": 0.23483,
-            "9": 0.21894,
-            "10": 0.22798,
-            "11": 0.22166,
-            "12": 0.22477,
-            "13": 0.21586,
-            "14": 0.2289,
-            "15": 0.21846,
-            "16": 0.22439,
-            "17": 0.22351,
-            "18": 0.21894,
-            "19": 0.22165,
-            "20": 0.23,
-            "21": 0.21688,
-            "22": 0.21901,
-            "23": 0.21714,
-            "24": 0.2185,
-            "25": 0.21681,
-            "26": 0.21775,
-            "27": 0.21816,
-            "28": 0.21837,
-            "29": 0.21776,
-            "30": 0.21739,
-            "31": 0.21725,
-            "32": 0.21929,
-            "33": 0.2156,
-            "34": 0.21959,
-            "35": 0.21865,
-            "36": 0.21696,
-            "37": 0.21952,
-            "38": 0.21797,
-            "39": 0.21568,
-            "40": 0.21803,
-            "41": 0.21756,
-            "42": 0.21877,
-            "43": 0.21676,
-            "44": 0.21677,
-            "45": 0.21721,
-            "46": 0.22075,
-            "47": 0.21856,
-            "48": 0.21933,
-            "49": 0.21808,
-            "50": 0.21813,
-            "51": 0.22296,
-            "52": 0.22336,
-            "53": 0.21692,
-            "54": 0.21796,
-            "55": 0.21788,
-            "56": 0.22002,
-            "57": 0.21845,
-            "58": 0.21989,
-            "59": 0.21686,
-            "60": 0.22032,
-            "61": 0.22127,
-            "62": 0.21716,
-            "63": 0.21811,
-            "64": 0.21821,
-            "65": 0.22368,
-            "66": 0.22001,
-            "67": 0.21796,
-            "68": 0.21889,
-            "69": 0.22034,
-            "70": 0.2227,
-            "71": 0.2211,
-            "72": 0.2167,
-            "73": 0.21687,
-            "74": 0.22416,
-            "75": 0.22056,
-            "76": 0.22116,
-            "77": 0.21759,
-            "78": 0.21843,
-            "79": 0.22272,
-            "80": 0.21922,
-            "81": 0.2196,
-            "82": 0.22739,
-            "83": 0.22344,
-            "84": 0.21981,
-            "85": 0.22041,
-            "86": 0.22015,
-            "87": 0.21885,
-            "88": 0.2239,
-            "89": 0.22975,
-            "90": 0.23365,
-            "91": 0.22476,
-            "92": 0.22336,
-            "93": 0.21913,
-            "94": 0.22057,
-            "95": 0.21711,
-            "96": 0.21724,
-            "97": 0.22153,
-            "98": 0.21996,
-            "99": 0.21866,
-            "100": 0.21935
+            "1": 9.84544,
+            "2": 0.22725,
+            "3": 0.20768,
+            "4": 0.18628,
+            "5": 0.18333,
+            "6": 0.18666,
+            "7": 0.18629,
+            "8": 0.18455,
+            "9": 0.18539,
+            "10": 0.18537,
+            "11": 0.18771,
+            "12": 0.18396,
+            "13": 0.18789,
+            "14": 0.18938,
+            "15": 0.18649,
+            "16": 0.18634,
+            "17": 0.18623,
+            "18": 0.18688,
+            "19": 0.18602,
+            "20": 0.18599,
+            "21": 0.18725,
+            "22": 0.19085,
+            "23": 0.18959,
+            "24": 0.19257,
+            "25": 0.18881,
+            "26": 0.18884,
+            "27": 0.18993,
+            "28": 0.1897,
+            "29": 0.19097,
+            "30": 0.1895,
+            "31": 0.19115,
+            "32": 0.18792,
+            "33": 0.19346,
+            "34": 0.19005,
+            "35": 0.18315,
+            "36": 0.18197,
+            "37": 0.18748,
+            "38": 0.18402,
+            "39": 0.18451,
+            "40": 0.1843,
+            "41": 0.18427,
+            "42": 0.18674,
+            "43": 0.18376,
+            "44": 0.18419,
+            "45": 0.55191,
+            "46": 0.18443,
+            "47": 0.18303,
+            "48": 0.18819,
+            "49": 0.19592,
+            "50": 0.1913,
+            "51": 0.19759,
+            "52": 0.19085,
+            "53": 0.19262,
+            "54": 0.19058,
+            "55": 0.18897,
+            "56": 0.1883,
+            "57": 0.18757,
+            "58": 0.18848,
+            "59": 0.19004,
+            "60": 0.18932,
+            "61": 0.1889,
+            "62": 0.18729,
+            "63": 0.18757,
+            "64": 0.18917,
+            "65": 0.18796,
+            "66": 0.1903,
+            "67": 0.18985,
+            "68": 0.18947,
+            "69": 0.19134,
+            "70": 0.19142,
+            "71": 0.18328,
+            "72": 0.18321,
+            "73": 0.18529,
+            "74": 0.18166,
+            "75": 0.18265,
+            "76": 0.18168,
+            "77": 0.18263,
+            "78": 0.18274,
+            "79": 0.18238,
+            "80": 0.18213,
+            "81": 0.18186,
+            "82": 0.1829,
+            "83": 0.18266,
+            "84": 0.18204,
+            "85": 0.18191,
+            "86": 0.18213,
+            "87": 0.1812,
+            "88": 0.18092,
+            "89": 0.18123,
+            "90": 0.22177,
+            "91": 0.18593,
+            "92": 0.18075,
+            "93": 0.18389,
+            "94": 0.18596,
+            "95": 0.18215,
+            "96": 0.18128,
+            "97": 0.18129,
+            "98": 0.18622,
+            "99": 0.18532,
+            "100": 0.18343
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_h100_2nd.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_h100_2nd.json
new file mode 100644
index 00000000000..8faf633ade5
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_h100_2nd.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 9.85839,
+            "52": 9.7506,
+            "53": 10.05817,
+            "54": 9.96076,
+            "55": 9.88738,
+            "56": 9.6344,
+            "57": 9.4967,
+            "58": 9.83343,
+            "59": 9.59391,
+            "60": 9.51376,
+            "61": 9.69928,
+            "62": 9.98089,
+            "63": 9.39065,
+            "64": 9.77599,
+            "65": 8.9571,
+            "66": 9.70054,
+            "67": 9.37,
+            "68": 9.78529,
+            "69": 9.78966,
+            "70": 9.74676,
+            "71": 9.61906,
+            "72": 9.58963,
+            "73": 9.49629,
+            "74": 8.94963,
+            "75": 9.42381,
+            "76": 9.07799,
+            "77": 10.07105,
+            "78": 9.72632,
+            "79": 9.37966,
+            "80": 9.40721,
+            "81": 9.48238,
+            "82": 9.70152,
+            "83": 9.30657,
+            "84": 9.41464,
+            "85": 9.61784,
+            "86": 9.08212,
+            "87": 9.59511,
+            "88": 9.75008,
+            "89": 9.60356,
+            "90": 9.82256,
+            "91": 9.33721,
+            "92": 9.35861,
+            "93": 9.07956,
+            "94": 8.83268,
+            "95": 9.51351,
+            "96": 9.52947,
+            "97": 9.31813,
+            "98": 9.67451,
+            "99": 8.88607,
+            "100": 9.40106
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 2621.0,
+            "52": 2597.0,
+            "53": 2926.0,
+            "54": 2633.0,
+            "55": 2206.0,
+            "56": 2627.0,
+            "57": 2328.0,
+            "58": 2886.0,
+            "59": 2639.0,
+            "60": 2157.0,
+            "61": 2736.0,
+            "62": 2544.0,
+            "63": 2332.0,
+            "64": 2948.0,
+            "65": 2630.0,
+            "66": 2931.0,
+            "67": 2717.0,
+            "68": 2643.0,
+            "69": 2955.0,
+            "70": 3040.0,
+            "71": 2882.0,
+            "72": 2390.0,
+            "73": 2812.0,
+            "74": 1844.0,
+            "75": 2461.0,
+            "76": 3067.0,
+            "77": 3152.0,
+            "78": 3018.0,
+            "79": 3008.0,
+            "80": 3104.0,
+            "81": 3589.0,
+            "82": 3218.0,
+            "83": 2748.0,
+            "84": 3217.0,
+            "85": 3167.0,
+            "86": 2876.0,
+            "87": 3604.0,
+            "88": 3017.0,
+            "89": 3249.0,
+            "90": 3069.0,
+            "91": 2865.0,
+            "92": 3074.0,
+            "93": 2680.0,
+            "94": 3392.0,
+            "95": 3206.0,
+            "96": 3401.0,
+            "97": 3107.0,
+            "98": 3624.0,
+            "99": 3007.0,
+            "100": 3111.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 299204096.0,
+            "52": 299204096.0,
+            "53": 299204096.0,
+            "54": 299204096.0,
+            "55": 299204096.0,
+            "56": 299204096.0,
+            "57": 299204096.0,
+            "58": 299204096.0,
+            "59": 299204096.0,
+            "60": 299204096.0,
+            "61": 299204096.0,
+            "62": 299204096.0,
+            "63": 299204096.0,
+            "64": 299204096.0,
+            "65": 299204096.0,
+            "66": 299204096.0,
+            "67": 299204096.0,
+            "68": 299204096.0,
+            "69": 299204096.0,
+            "70": 299204096.0,
+            "71": 299204096.0,
+            "72": 299204096.0,
+            "73": 299204096.0,
+            "74": 299204096.0,
+            "75": 299204096.0,
+            "76": 299204096.0,
+            "77": 299204096.0,
+            "78": 299204096.0,
+            "79": 299204096.0,
+            "80": 299204096.0,
+            "81": 299204096.0,
+            "82": 299204096.0,
+            "83": 299204096.0,
+            "84": 299204096.0,
+            "85": 299204096.0,
+            "86": 299204096.0,
+            "87": 299204096.0,
+            "88": 299204096.0,
+            "89": 299204096.0,
+            "90": 299204096.0,
+            "91": 299204096.0,
+            "92": 299204096.0,
+            "93": 299204096.0,
+            "94": 299204096.0,
+            "95": 299204096.0,
+            "96": 299204096.0,
+            "97": 299204096.0,
+            "98": 299204096.0,
+            "99": 299204096.0,
+            "100": 299204096.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 1043512832.0,
+            "52": 1043513856.0,
+            "53": 1043513856.0,
+            "54": 1043513856.0,
+            "55": 1043513856.0,
+            "56": 1043513856.0,
+            "57": 1043513856.0,
+            "58": 1043513856.0,
+            "59": 1043513856.0,
+            "60": 1043513856.0,
+            "61": 1043513856.0,
+            "62": 1043513856.0,
+            "63": 1043513856.0,
+            "64": 1043513856.0,
+            "65": 1043513856.0,
+            "66": 1043513856.0,
+            "67": 1043513856.0,
+            "68": 1043513856.0,
+            "69": 1043513856.0,
+            "70": 1043513856.0,
+            "71": 1043513856.0,
+            "72": 1043513856.0,
+            "73": 1043513856.0,
+            "74": 1043513856.0,
+            "75": 1043513856.0,
+            "76": 1043513856.0,
+            "77": 1043513856.0,
+            "78": 1043513856.0,
+            "79": 1043513856.0,
+            "80": 1043513856.0,
+            "81": 1043513856.0,
+            "82": 1043513856.0,
+            "83": 1043513856.0,
+            "84": 1043513856.0,
+            "85": 1043513856.0,
+            "86": 1043513856.0,
+            "87": 1043513856.0,
+            "88": 1043513856.0,
+            "89": 1043513856.0,
+            "90": 1043513856.0,
+            "91": 1043513856.0,
+            "92": 1043513856.0,
+            "93": 1043513856.0,
+            "94": 1043513856.0,
+            "95": 1043513856.0,
+            "96": 1043513856.0,
+            "97": 1043513856.0,
+            "98": 1043513856.0,
+            "99": 1043513856.0,
+            "100": 1043513856.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 7.95772,
+            "52": 0.21047,
+            "53": 0.18237,
+            "54": 0.18097,
+            "55": 0.18447,
+            "56": 0.18543,
+            "57": 0.18444,
+            "58": 0.18116,
+            "59": 0.18103,
+            "60": 0.185,
+            "61": 0.1869,
+            "62": 0.18215,
+            "63": 0.18074,
+            "64": 0.22859,
+            "65": 0.21818,
+            "66": 0.18939,
+            "67": 0.18821,
+            "68": 0.18642,
+            "69": 0.18318,
+            "70": 0.18267,
+            "71": 0.18226,
+            "72": 0.18124,
+            "73": 0.18054,
+            "74": 0.181,
+            "75": 0.18224,
+            "76": 0.18157,
+            "77": 0.18131,
+            "78": 0.18061,
+            "79": 0.18038,
+            "80": 0.18002,
+            "81": 0.18191,
+            "82": 0.18082,
+            "83": 0.17971,
+            "84": 0.18144,
+            "85": 0.18174,
+            "86": 0.1827,
+            "87": 0.1801,
+            "88": 0.18046,
+            "89": 0.18183,
+            "90": 0.18427,
+            "91": 0.18374,
+            "92": 0.18303,
+            "93": 0.1818,
+            "94": 0.18288,
+            "95": 0.18263,
+            "96": 0.18209,
+            "97": 0.18261,
+            "98": 0.18231,
+            "99": 0.18192,
+            "100": 0.18287
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json
index 1641ae309dc..2b3b03b42bc 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json
@@ -325,106 +325,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 971619840.0,
-            "2": 1036172800.0,
-            "3": 1036172800.0,
-            "4": 1036172800.0,
-            "5": 1036172800.0,
-            "6": 1036172800.0,
-            "7": 1036172800.0,
-            "8": 1036172800.0,
-            "9": 1036172800.0,
-            "10": 1036172800.0,
-            "11": 1036172800.0,
-            "12": 1036172800.0,
-            "13": 1036172800.0,
-            "14": 1036172800.0,
-            "15": 1036172800.0,
-            "16": 1036172800.0,
-            "17": 1036172800.0,
-            "18": 1036172800.0,
-            "19": 1036172800.0,
-            "20": 1036172800.0,
-            "21": 1036172800.0,
-            "22": 1036172800.0,
-            "23": 1036172800.0,
-            "24": 1036172800.0,
-            "25": 1036172800.0,
-            "26": 1036172800.0,
-            "27": 1036172800.0,
-            "28": 1036172800.0,
-            "29": 1036172800.0,
-            "30": 1036172800.0,
-            "31": 1036172800.0,
-            "32": 1036172800.0,
-            "33": 1036172800.0,
-            "34": 1036172800.0,
-            "35": 1036172800.0,
-            "36": 1036172800.0,
-            "37": 1036172800.0,
-            "38": 1036172800.0,
-            "39": 1036172800.0,
-            "40": 1036172800.0,
-            "41": 1036172800.0,
-            "42": 1036172800.0,
-            "43": 1036172800.0,
-            "44": 1036172800.0,
-            "45": 1036172800.0,
-            "46": 1036172800.0,
-            "47": 1036172800.0,
-            "48": 1036172800.0,
-            "49": 1036172800.0,
-            "50": 1036172800.0,
-            "51": 1036172800.0,
-            "52": 1036172800.0,
-            "53": 1036172800.0,
-            "54": 1036172800.0,
-            "55": 1036172800.0,
-            "56": 1036172800.0,
-            "57": 1036172800.0,
-            "58": 1036172800.0,
-            "59": 1036172800.0,
-            "60": 1036172800.0,
-            "61": 1036172800.0,
-            "62": 1036172800.0,
-            "63": 1036172800.0,
-            "64": 1036172800.0,
-            "65": 1036172800.0,
-            "66": 1036172800.0,
-            "67": 1036172800.0,
-            "68": 1036172800.0,
-            "69": 1036172800.0,
-            "70": 1036172800.0,
-            "71": 1036172800.0,
-            "72": 1036172800.0,
-            "73": 1036172800.0,
-            "74": 1036172800.0,
-            "75": 1036172800.0,
-            "76": 1036172800.0,
-            "77": 1036172800.0,
-            "78": 1036172800.0,
-            "79": 1036172800.0,
-            "80": 1036172800.0,
-            "81": 1036172800.0,
-            "82": 1036172800.0,
-            "83": 1036172800.0,
-            "84": 1036172800.0,
-            "85": 1036172800.0,
-            "86": 1036172800.0,
-            "87": 1036172800.0,
-            "88": 1036172800.0,
-            "89": 1036172800.0,
-            "90": 1036172800.0,
-            "91": 1036172800.0,
-            "92": 1036172800.0,
-            "93": 1036172800.0,
-            "94": 1036172800.0,
-            "95": 1036172800.0,
-            "96": 1036172800.0,
-            "97": 1036172800.0,
-            "98": 1036172800.0,
-            "99": 1036172800.0,
-            "100": 1036172800.0
+            "1": 968737280.0,
+            "2": 1035779584.0,
+            "3": 1035779584.0,
+            "4": 1035779584.0,
+            "5": 1035779584.0,
+            "6": 1035779584.0,
+            "7": 1035779584.0,
+            "8": 1035779584.0,
+            "9": 1035779584.0,
+            "10": 1035779584.0,
+            "11": 1035779584.0,
+            "12": 1035779584.0,
+            "13": 1035779584.0,
+            "14": 1035779584.0,
+            "15": 1035779584.0,
+            "16": 1035779584.0,
+            "17": 1035779584.0,
+            "18": 1035779584.0,
+            "19": 1035779584.0,
+            "20": 1035779584.0,
+            "21": 1035779584.0,
+            "22": 1035779584.0,
+            "23": 1035779584.0,
+            "24": 1035779584.0,
+            "25": 1035779584.0,
+            "26": 1035779584.0,
+            "27": 1035779584.0,
+            "28": 1035779584.0,
+            "29": 1035779584.0,
+            "30": 1035779584.0,
+            "31": 1035779584.0,
+            "32": 1035779584.0,
+            "33": 1035779584.0,
+            "34": 1035779584.0,
+            "35": 1035779584.0,
+            "36": 1035779584.0,
+            "37": 1035779584.0,
+            "38": 1035779584.0,
+            "39": 1035779584.0,
+            "40": 1035779584.0,
+            "41": 1035779584.0,
+            "42": 1035779584.0,
+            "43": 1035779584.0,
+            "44": 1035779584.0,
+            "45": 1035779584.0,
+            "46": 1035779584.0,
+            "47": 1035779584.0,
+            "48": 1035779584.0,
+            "49": 1035779584.0,
+            "50": 1035779584.0,
+            "51": 1035779584.0,
+            "52": 1035779584.0,
+            "53": 1035779584.0,
+            "54": 1035779584.0,
+            "55": 1035779584.0,
+            "56": 1035779584.0,
+            "57": 1035779584.0,
+            "58": 1035779584.0,
+            "59": 1035779584.0,
+            "60": 1035779584.0,
+            "61": 1035779584.0,
+            "62": 1035779584.0,
+            "63": 1035779584.0,
+            "64": 1035779584.0,
+            "65": 1035779584.0,
+            "66": 1035779584.0,
+            "67": 1035779584.0,
+            "68": 1035779584.0,
+            "69": 1035779584.0,
+            "70": 1035779584.0,
+            "71": 1035779584.0,
+            "72": 1035779584.0,
+            "73": 1035779584.0,
+            "74": 1035779584.0,
+            "75": 1035779584.0,
+            "76": 1035779584.0,
+            "77": 1035779584.0,
+            "78": 1035779584.0,
+            "79": 1035779584.0,
+            "80": 1035779584.0,
+            "81": 1035779584.0,
+            "82": 1035779584.0,
+            "83": 1035779584.0,
+            "84": 1035779584.0,
+            "85": 1035779584.0,
+            "86": 1035779584.0,
+            "87": 1035779584.0,
+            "88": 1035779584.0,
+            "89": 1035779584.0,
+            "90": 1035779584.0,
+            "91": 1035779584.0,
+            "92": 1035779584.0,
+            "93": 1035779584.0,
+            "94": 1035779584.0,
+            "95": 1035779584.0,
+            "96": 1035779584.0,
+            "97": 1035779584.0,
+            "98": 1035779584.0,
+            "99": 1035779584.0,
+            "100": 1035779584.0
         }
     },
     "iteration-time": {
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 5.18846,
-            "2": 0.36168,
-            "3": 0.29466,
-            "4": 0.29234,
-            "5": 0.29276,
-            "6": 0.29792,
-            "7": 0.29352,
-            "8": 0.2936,
-            "9": 0.29237,
-            "10": 0.29769,
-            "11": 0.29346,
-            "12": 0.29527,
-            "13": 0.29315,
-            "14": 0.29363,
-            "15": 0.29305,
-            "16": 0.29641,
-            "17": 0.29489,
-            "18": 0.29861,
-            "19": 0.29574,
-            "20": 0.29312,
-            "21": 0.29388,
-            "22": 0.29283,
-            "23": 0.29431,
-            "24": 0.29335,
-            "25": 0.29314,
-            "26": 0.29296,
-            "27": 0.29356,
-            "28": 0.29335,
-            "29": 0.29568,
-            "30": 0.29411,
-            "31": 0.29379,
-            "32": 0.29273,
-            "33": 0.29354,
-            "34": 0.29433,
-            "35": 0.29411,
-            "36": 0.29363,
-            "37": 0.2938,
-            "38": 0.29351,
-            "39": 0.29356,
-            "40": 0.29298,
-            "41": 0.29347,
-            "42": 0.29413,
-            "43": 0.29252,
-            "44": 0.29273,
-            "45": 0.29334,
-            "46": 0.29356,
-            "47": 0.29382,
-            "48": 0.29398,
-            "49": 0.2936,
-            "50": 0.29316,
-            "51": 0.29514,
-            "52": 0.28916,
-            "53": 0.29005,
-            "54": 0.28929,
-            "55": 0.28956,
-            "56": 0.28848,
-            "57": 0.28858,
-            "58": 0.28768,
-            "59": 0.28853,
-            "60": 0.29008,
-            "61": 0.2889,
-            "62": 0.28847,
-            "63": 0.28786,
-            "64": 0.28795,
-            "65": 0.28879,
-            "66": 0.28923,
-            "67": 0.28915,
-            "68": 0.28861,
-            "69": 0.28895,
-            "70": 0.28885,
-            "71": 0.28882,
-            "72": 0.28775,
-            "73": 0.28792,
-            "74": 0.28799,
-            "75": 0.28754,
-            "76": 0.28789,
-            "77": 0.2888,
-            "78": 0.28929,
-            "79": 0.28854,
-            "80": 0.28894,
-            "81": 0.28751,
-            "82": 0.28815,
-            "83": 0.2885,
-            "84": 0.28813,
-            "85": 0.28933,
-            "86": 0.28794,
-            "87": 0.28758,
-            "88": 0.28772,
-            "89": 0.28903,
-            "90": 0.28798,
-            "91": 0.28695,
-            "92": 0.28757,
-            "93": 0.28831,
-            "94": 0.28828,
-            "95": 0.28871,
-            "96": 0.28746,
-            "97": 0.28767,
-            "98": 0.28881,
-            "99": 0.2875,
-            "100": 0.28775
+            "1": 6.36449,
+            "2": 0.41478,
+            "3": 0.30241,
+            "4": 0.2884,
+            "5": 0.28755,
+            "6": 0.28808,
+            "7": 0.28797,
+            "8": 0.28869,
+            "9": 0.28996,
+            "10": 0.28886,
+            "11": 0.28738,
+            "12": 0.28795,
+            "13": 0.28791,
+            "14": 0.28704,
+            "15": 0.28904,
+            "16": 0.28588,
+            "17": 0.28849,
+            "18": 0.28778,
+            "19": 0.28792,
+            "20": 0.29039,
+            "21": 0.287,
+            "22": 0.28626,
+            "23": 0.28702,
+            "24": 0.2849,
+            "25": 0.28626,
+            "26": 0.28568,
+            "27": 0.28568,
+            "28": 0.2854,
+            "29": 0.28285,
+            "30": 0.28684,
+            "31": 0.28623,
+            "32": 0.28599,
+            "33": 0.2876,
+            "34": 0.29486,
+            "35": 0.29154,
+            "36": 0.29138,
+            "37": 0.2898,
+            "38": 0.28925,
+            "39": 0.62385,
+            "40": 0.29181,
+            "41": 0.28932,
+            "42": 0.2907,
+            "43": 0.29195,
+            "44": 0.29,
+            "45": 0.29106,
+            "46": 0.28915,
+            "47": 0.28992,
+            "48": 0.32778,
+            "49": 0.34367,
+            "50": 0.33689,
+            "51": 0.34514,
+            "52": 0.33403,
+            "53": 0.33545,
+            "54": 0.33248,
+            "55": 0.33236,
+            "56": 0.33296,
+            "57": 0.33492,
+            "58": 0.33381,
+            "59": 0.33223,
+            "60": 0.33257,
+            "61": 0.33335,
+            "62": 0.33224,
+            "63": 0.33253,
+            "64": 0.33281,
+            "65": 0.33219,
+            "66": 0.31003,
+            "67": 0.2827,
+            "68": 0.28133,
+            "69": 0.28172,
+            "70": 0.28132,
+            "71": 0.2812,
+            "72": 0.28195,
+            "73": 0.28303,
+            "74": 0.28159,
+            "75": 0.28199,
+            "76": 0.28303,
+            "77": 0.28083,
+            "78": 0.28252,
+            "79": 0.28214,
+            "80": 0.2819,
+            "81": 0.28155,
+            "82": 0.28205,
+            "83": 0.28156,
+            "84": 0.28192,
+            "85": 0.28236,
+            "86": 0.28154,
+            "87": 0.28274,
+            "88": 0.28199,
+            "89": 0.2816,
+            "90": 0.28156,
+            "91": 0.28254,
+            "92": 0.28186,
+            "93": 0.28161,
+            "94": 0.28181,
+            "95": 0.28289,
+            "96": 0.28181,
+            "97": 0.2827,
+            "98": 0.28237,
+            "99": 0.28238,
+            "100": 0.2826
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100_2nd.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100_2nd.json
new file mode 100644
index 00000000000..13fcd39e949
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100_2nd.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 9.85065,
+            "52": 9.7464,
+            "53": 10.07271,
+            "54": 9.95757,
+            "55": 9.87725,
+            "56": 9.62951,
+            "57": 9.48816,
+            "58": 9.83239,
+            "59": 9.58985,
+            "60": 9.50827,
+            "61": 9.6947,
+            "62": 9.99304,
+            "63": 9.37511,
+            "64": 9.77996,
+            "65": 8.95215,
+            "66": 9.71323,
+            "67": 9.37884,
+            "68": 9.78794,
+            "69": 9.79078,
+            "70": 9.7308,
+            "71": 9.61793,
+            "72": 9.59094,
+            "73": 9.49435,
+            "74": 8.94865,
+            "75": 9.43606,
+            "76": 9.09894,
+            "77": 10.06437,
+            "78": 9.73006,
+            "79": 9.37771,
+            "80": 9.41266,
+            "81": 9.4854,
+            "82": 9.69576,
+            "83": 9.32017,
+            "84": 9.42235,
+            "85": 9.61578,
+            "86": 9.07218,
+            "87": 9.59328,
+            "88": 9.7509,
+            "89": 9.61159,
+            "90": 9.82148,
+            "91": 9.35304,
+            "92": 9.36254,
+            "93": 9.08747,
+            "94": 8.83398,
+            "95": 9.51923,
+            "96": 9.52595,
+            "97": 9.31413,
+            "98": 9.67414,
+            "99": 8.88869,
+            "100": 9.40651
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 2873.0,
+            "52": 2946.0,
+            "53": 3158.0,
+            "54": 2907.0,
+            "55": 2740.0,
+            "56": 3029.0,
+            "57": 2489.0,
+            "58": 3327.0,
+            "59": 3042.0,
+            "60": 2780.0,
+            "61": 3302.0,
+            "62": 2961.0,
+            "63": 2702.0,
+            "64": 3318.0,
+            "65": 2909.0,
+            "66": 3513.0,
+            "67": 2959.0,
+            "68": 2963.0,
+            "69": 3171.0,
+            "70": 3547.0,
+            "71": 3246.0,
+            "72": 2586.0,
+            "73": 3301.0,
+            "74": 2135.0,
+            "75": 2752.0,
+            "76": 3275.0,
+            "77": 3648.0,
+            "78": 3472.0,
+            "79": 3536.0,
+            "80": 3685.0,
+            "81": 4159.0,
+            "82": 3488.0,
+            "83": 3179.0,
+            "84": 3639.0,
+            "85": 3631.0,
+            "86": 3045.0,
+            "87": 4315.0,
+            "88": 3481.0,
+            "89": 3819.0,
+            "90": 3323.0,
+            "91": 3014.0,
+            "92": 3581.0,
+            "93": 2932.0,
+            "94": 3715.0,
+            "95": 3593.0,
+            "96": 3764.0,
+            "97": 3582.0,
+            "98": 3998.0,
+            "99": 3406.0,
+            "100": 3521.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 269842944.0,
+            "52": 269842944.0,
+            "53": 269842944.0,
+            "54": 269842944.0,
+            "55": 269842944.0,
+            "56": 269842944.0,
+            "57": 269842944.0,
+            "58": 269842944.0,
+            "59": 269842944.0,
+            "60": 269842944.0,
+            "61": 269842944.0,
+            "62": 269842944.0,
+            "63": 269842944.0,
+            "64": 269842944.0,
+            "65": 269842944.0,
+            "66": 269842944.0,
+            "67": 269842944.0,
+            "68": 269842944.0,
+            "69": 269842944.0,
+            "70": 269842944.0,
+            "71": 269842944.0,
+            "72": 269842944.0,
+            "73": 269842944.0,
+            "74": 269842944.0,
+            "75": 269842944.0,
+            "76": 269842944.0,
+            "77": 269842944.0,
+            "78": 269842944.0,
+            "79": 269842944.0,
+            "80": 269842944.0,
+            "81": 269842944.0,
+            "82": 269842944.0,
+            "83": 269842944.0,
+            "84": 269842944.0,
+            "85": 269842944.0,
+            "86": 269842944.0,
+            "87": 269842944.0,
+            "88": 269842944.0,
+            "89": 269842944.0,
+            "90": 269842944.0,
+            "91": 269842944.0,
+            "92": 269842944.0,
+            "93": 269842944.0,
+            "94": 269842944.0,
+            "95": 269842944.0,
+            "96": 269842944.0,
+            "97": 269842944.0,
+            "98": 269842944.0,
+            "99": 269842944.0,
+            "100": 269842944.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 1036827136.0,
+            "52": 1036828160.0,
+            "53": 1036828160.0,
+            "54": 1036828160.0,
+            "55": 1036828160.0,
+            "56": 1036828160.0,
+            "57": 1036828160.0,
+            "58": 1036828160.0,
+            "59": 1036828160.0,
+            "60": 1036828160.0,
+            "61": 1036828160.0,
+            "62": 1036828160.0,
+            "63": 1036828160.0,
+            "64": 1036828160.0,
+            "65": 1036828160.0,
+            "66": 1036828160.0,
+            "67": 1036828160.0,
+            "68": 1036828160.0,
+            "69": 1036828160.0,
+            "70": 1036828160.0,
+            "71": 1036828160.0,
+            "72": 1036828160.0,
+            "73": 1036828160.0,
+            "74": 1036828160.0,
+            "75": 1036828160.0,
+            "76": 1036828160.0,
+            "77": 1036828160.0,
+            "78": 1036828160.0,
+            "79": 1036828160.0,
+            "80": 1036828160.0,
+            "81": 1036828160.0,
+            "82": 1036828160.0,
+            "83": 1036828160.0,
+            "84": 1036828160.0,
+            "85": 1036828160.0,
+            "86": 1036828160.0,
+            "87": 1036828160.0,
+            "88": 1036828160.0,
+            "89": 1036828160.0,
+            "90": 1036828160.0,
+            "91": 1036828160.0,
+            "92": 1036828160.0,
+            "93": 1036828160.0,
+            "94": 1036828160.0,
+            "95": 1036828160.0,
+            "96": 1036828160.0,
+            "97": 1036828160.0,
+            "98": 1036828160.0,
+            "99": 1036828160.0,
+            "100": 1036828160.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 3.60062,
+            "52": 0.34561,
+            "53": 0.29071,
+            "54": 0.29184,
+            "55": 0.2948,
+            "56": 0.29077,
+            "57": 0.2916,
+            "58": 0.29134,
+            "59": 0.29145,
+            "60": 0.29253,
+            "61": 0.29047,
+            "62": 0.29158,
+            "63": 0.2928,
+            "64": 0.29153,
+            "65": 0.29135,
+            "66": 0.2908,
+            "67": 0.29054,
+            "68": 0.29078,
+            "69": 0.28979,
+            "70": 0.29041,
+            "71": 0.29099,
+            "72": 0.29052,
+            "73": 0.29156,
+            "74": 0.29178,
+            "75": 0.28944,
+            "76": 0.28907,
+            "77": 0.29079,
+            "78": 0.2907,
+            "79": 0.29278,
+            "80": 0.29007,
+            "81": 0.28964,
+            "82": 0.28902,
+            "83": 0.2899,
+            "84": 0.28906,
+            "85": 0.28955,
+            "86": 0.28766,
+            "87": 0.29175,
+            "88": 0.28899,
+            "89": 0.2875,
+            "90": 0.28943,
+            "91": 0.29161,
+            "92": 0.28815,
+            "93": 0.29145,
+            "94": 0.28977,
+            "95": 0.28998,
+            "96": 0.29062,
+            "97": 0.29169,
+            "98": 0.29269,
+            "99": 0.29163,
+            "100": 0.29161
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_dev_dgx_gb200.json
new file mode 100644
index 00000000000..4200e3b38a8
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_dev_dgx_gb200.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.84013,
+            "2": 10.8521,
+            "3": 10.84145,
+            "4": 10.84467,
+            "5": 10.85514,
+            "6": 10.8635,
+            "7": 10.85198,
+            "8": 10.84642,
+            "9": 10.84925,
+            "10": 10.81263,
+            "11": 10.85666,
+            "12": 10.8427,
+            "13": 10.86033,
+            "14": 10.8502,
+            "15": 10.81715,
+            "16": 10.80956,
+            "17": 10.78133,
+            "18": 10.79323,
+            "19": 10.79687,
+            "20": 10.7086,
+            "21": 10.70208,
+            "22": 10.58835,
+            "23": 10.69694,
+            "24": 10.60843,
+            "25": 10.57217,
+            "26": 10.6184,
+            "27": 10.61356,
+            "28": 10.56381,
+            "29": 10.56984,
+            "30": 10.38372,
+            "31": 10.17138,
+            "32": 10.45911,
+            "33": 10.4549,
+            "34": 10.24801,
+            "35": 10.27909,
+            "36": 10.24807,
+            "37": 10.35043,
+            "38": 10.22169,
+            "39": 10.39797,
+            "40": 10.09945,
+            "41": 10.15733,
+            "42": 10.21607,
+            "43": 9.88836,
+            "44": 9.98422,
+            "45": 9.8641,
+            "46": 9.84157,
+            "47": 10.1451,
+            "48": 9.87164,
+            "49": 9.56255,
+            "50": 9.9195,
+            "51": 9.86714,
+            "52": 9.75686,
+            "53": 10.06973,
+            "54": 9.95909,
+            "55": 9.89872,
+            "56": 9.63952,
+            "57": 9.4936,
+            "58": 9.83608,
+            "59": 9.59679,
+            "60": 9.51626,
+            "61": 9.69468,
+            "62": 9.99033,
+            "63": 9.39041,
+            "64": 9.77374,
+            "65": 8.96559,
+            "66": 9.70319,
+            "67": 9.38057,
+            "68": 9.78256,
+            "69": 9.79804,
+            "70": 9.73697,
+            "71": 9.62634,
+            "72": 9.582,
+            "73": 9.50018,
+            "74": 8.93897,
+            "75": 9.42247,
+            "76": 9.08151,
+            "77": 10.06555,
+            "78": 9.71951,
+            "79": 9.38365,
+            "80": 9.4005,
+            "81": 9.48215,
+            "82": 9.69917,
+            "83": 9.30951,
+            "84": 9.41595,
+            "85": 9.61112,
+            "86": 9.07822,
+            "87": 9.59519,
+            "88": 9.74646,
+            "89": 9.60078,
+            "90": 9.82618,
+            "91": 9.32913,
+            "92": 9.35518,
+            "93": 9.08231,
+            "94": 8.83,
+            "95": 9.53112,
+            "96": 9.52889,
+            "97": 9.30954,
+            "98": 9.66956,
+            "99": 8.89675,
+            "100": 9.4083
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1649.0,
+            "2": 1829.0,
+            "3": 1726.0,
+            "4": 1713.0,
+            "5": 2036.0,
+            "6": 1824.0,
+            "7": 1894.0,
+            "8": 1742.0,
+            "9": 1834.0,
+            "10": 1485.0,
+            "11": 1871.0,
+            "12": 1772.0,
+            "13": 2030.0,
+            "14": 1885.0,
+            "15": 1946.0,
+            "16": 1947.0,
+            "17": 1965.0,
+            "18": 1798.0,
+            "19": 1881.0,
+            "20": 1859.0,
+            "21": 1900.0,
+            "22": 1701.0,
+            "23": 2140.0,
+            "24": 1655.0,
+            "25": 1680.0,
+            "26": 1783.0,
+            "27": 1856.0,
+            "28": 1985.0,
+            "29": 2065.0,
+            "30": 1944.0,
+            "31": 1667.0,
+            "32": 1941.0,
+            "33": 2159.0,
+            "34": 1869.0,
+            "35": 1955.0,
+            "36": 2070.0,
+            "37": 2409.0,
+            "38": 2151.0,
+            "39": 2456.0,
+            "40": 2130.0,
+            "41": 2184.0,
+            "42": 2275.0,
+            "43": 2002.0,
+            "44": 2112.0,
+            "45": 1981.0,
+            "46": 2250.0,
+            "47": 2543.0,
+            "48": 2167.0,
+            "49": 2247.0,
+            "50": 2295.0,
+            "51": 2492.0,
+            "52": 2583.0,
+            "53": 2788.0,
+            "54": 2678.0,
+            "55": 2301.0,
+            "56": 2724.0,
+            "57": 2272.0,
+            "58": 2999.0,
+            "59": 2686.0,
+            "60": 2330.0,
+            "61": 2852.0,
+            "62": 2703.0,
+            "63": 2277.0,
+            "64": 2990.0,
+            "65": 2475.0,
+            "66": 2892.0,
+            "67": 2646.0,
+            "68": 2650.0,
+            "69": 2845.0,
+            "70": 3145.0,
+            "71": 2913.0,
+            "72": 2573.0,
+            "73": 2850.0,
+            "74": 1865.0,
+            "75": 2466.0,
+            "76": 3055.0,
+            "77": 3185.0,
+            "78": 3106.0,
+            "79": 3053.0,
+            "80": 3184.0,
+            "81": 3447.0,
+            "82": 3296.0,
+            "83": 2726.0,
+            "84": 3276.0,
+            "85": 3336.0,
+            "86": 2803.0,
+            "87": 3643.0,
+            "88": 3013.0,
+            "89": 3185.0,
+            "90": 3126.0,
+            "91": 3076.0,
+            "92": 3139.0,
+            "93": 2665.0,
+            "94": 3302.0,
+            "95": 3282.0,
+            "96": 3404.0,
+            "97": 3215.0,
+            "98": 3465.0,
+            "99": 3128.0,
+            "100": 3231.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 397748736.0,
+            "2": 397748736.0,
+            "3": 397748736.0,
+            "4": 397748736.0,
+            "5": 397748736.0,
+            "6": 397748736.0,
+            "7": 397748736.0,
+            "8": 397748736.0,
+            "9": 397748736.0,
+            "10": 397748736.0,
+            "11": 397748736.0,
+            "12": 397748736.0,
+            "13": 397748736.0,
+            "14": 397748736.0,
+            "15": 397748736.0,
+            "16": 397748736.0,
+            "17": 397748736.0,
+            "18": 397748736.0,
+            "19": 397748736.0,
+            "20": 397748736.0,
+            "21": 397748736.0,
+            "22": 397748736.0,
+            "23": 397748736.0,
+            "24": 397748736.0,
+            "25": 397748736.0,
+            "26": 397748736.0,
+            "27": 397748736.0,
+            "28": 397748736.0,
+            "29": 397748736.0,
+            "30": 397748736.0,
+            "31": 397748736.0,
+            "32": 397748736.0,
+            "33": 397748736.0,
+            "34": 397748736.0,
+            "35": 397748736.0,
+            "36": 397748736.0,
+            "37": 397748736.0,
+            "38": 397748736.0,
+            "39": 397748736.0,
+            "40": 397748736.0,
+            "41": 397748736.0,
+            "42": 397748736.0,
+            "43": 397748736.0,
+            "44": 397748736.0,
+            "45": 397748736.0,
+            "46": 397748736.0,
+            "47": 397748736.0,
+            "48": 397748736.0,
+            "49": 397748736.0,
+            "50": 397748736.0,
+            "51": 397748736.0,
+            "52": 397748736.0,
+            "53": 397748736.0,
+            "54": 397748736.0,
+            "55": 397748736.0,
+            "56": 397748736.0,
+            "57": 397748736.0,
+            "58": 397748736.0,
+            "59": 397748736.0,
+            "60": 397748736.0,
+            "61": 397748736.0,
+            "62": 397748736.0,
+            "63": 397748736.0,
+            "64": 397748736.0,
+            "65": 397748736.0,
+            "66": 397748736.0,
+            "67": 397748736.0,
+            "68": 397748736.0,
+            "69": 397748736.0,
+            "70": 397748736.0,
+            "71": 397748736.0,
+            "72": 397748736.0,
+            "73": 397748736.0,
+            "74": 397748736.0,
+            "75": 397748736.0,
+            "76": 397748736.0,
+            "77": 397748736.0,
+            "78": 397748736.0,
+            "79": 397748736.0,
+            "80": 397748736.0,
+            "81": 397748736.0,
+            "82": 397748736.0,
+            "83": 397748736.0,
+            "84": 397748736.0,
+            "85": 397748736.0,
+            "86": 397748736.0,
+            "87": 397748736.0,
+            "88": 397748736.0,
+            "89": 397748736.0,
+            "90": 397748736.0,
+            "91": 397748736.0,
+            "92": 397748736.0,
+            "93": 397748736.0,
+            "94": 397748736.0,
+            "95": 397748736.0,
+            "96": 397748736.0,
+            "97": 397748736.0,
+            "98": 397748736.0,
+            "99": 397748736.0,
+            "100": 397748736.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1057339904.0,
+            "2": 1190421504.0,
+            "3": 1190421504.0,
+            "4": 1190421504.0,
+            "5": 1190421504.0,
+            "6": 1190421504.0,
+            "7": 1190421504.0,
+            "8": 1190421504.0,
+            "9": 1190421504.0,
+            "10": 1190421504.0,
+            "11": 1190421504.0,
+            "12": 1190421504.0,
+            "13": 1190421504.0,
+            "14": 1190421504.0,
+            "15": 1190421504.0,
+            "16": 1190421504.0,
+            "17": 1190421504.0,
+            "18": 1190421504.0,
+            "19": 1190421504.0,
+            "20": 1190421504.0,
+            "21": 1190421504.0,
+            "22": 1190421504.0,
+            "23": 1190421504.0,
+            "24": 1190421504.0,
+            "25": 1190421504.0,
+            "26": 1190421504.0,
+            "27": 1190421504.0,
+            "28": 1190421504.0,
+            "29": 1190421504.0,
+            "30": 1190421504.0,
+            "31": 1190421504.0,
+            "32": 1190421504.0,
+            "33": 1190421504.0,
+            "34": 1190421504.0,
+            "35": 1190421504.0,
+            "36": 1190421504.0,
+            "37": 1190421504.0,
+            "38": 1190421504.0,
+            "39": 1190421504.0,
+            "40": 1190421504.0,
+            "41": 1190421504.0,
+            "42": 1190421504.0,
+            "43": 1190421504.0,
+            "44": 1190421504.0,
+            "45": 1190421504.0,
+            "46": 1190421504.0,
+            "47": 1190421504.0,
+            "48": 1190421504.0,
+            "49": 1190421504.0,
+            "50": 1190421504.0,
+            "51": 1190421504.0,
+            "52": 1190421504.0,
+            "53": 1190421504.0,
+            "54": 1190421504.0,
+            "55": 1190421504.0,
+            "56": 1190421504.0,
+            "57": 1190421504.0,
+            "58": 1190421504.0,
+            "59": 1190421504.0,
+            "60": 1190421504.0,
+            "61": 1190421504.0,
+            "62": 1190421504.0,
+            "63": 1190421504.0,
+            "64": 1190421504.0,
+            "65": 1190421504.0,
+            "66": 1190421504.0,
+            "67": 1190421504.0,
+            "68": 1190421504.0,
+            "69": 1190421504.0,
+            "70": 1190421504.0,
+            "71": 1190421504.0,
+            "72": 1190421504.0,
+            "73": 1190421504.0,
+            "74": 1190421504.0,
+            "75": 1190421504.0,
+            "76": 1190421504.0,
+            "77": 1190421504.0,
+            "78": 1190421504.0,
+            "79": 1190421504.0,
+            "80": 1190421504.0,
+            "81": 1190421504.0,
+            "82": 1190421504.0,
+            "83": 1190421504.0,
+            "84": 1190421504.0,
+            "85": 1190421504.0,
+            "86": 1190421504.0,
+            "87": 1190421504.0,
+            "88": 1190421504.0,
+            "89": 1190421504.0,
+            "90": 1190421504.0,
+            "91": 1190421504.0,
+            "92": 1190421504.0,
+            "93": 1190421504.0,
+            "94": 1190421504.0,
+            "95": 1190421504.0,
+            "96": 1190421504.0,
+            "97": 1190421504.0,
+            "98": 1190421504.0,
+            "99": 1190421504.0,
+            "100": 1190421504.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 5.65464,
+            "2": 0.60021,
+            "3": 0.56211,
+            "4": 0.81567,
+            "5": 0.51087,
+            "6": 0.51362,
+            "7": 0.50868,
+            "8": 0.51119,
+            "9": 0.51537,
+            "10": 0.51491,
+            "11": 0.51179,
+            "12": 0.51216,
+            "13": 0.51208,
+            "14": 0.52419,
+            "15": 0.85827,
+            "16": 0.51731,
+            "17": 0.51718,
+            "18": 0.51546,
+            "19": 0.51334,
+            "20": 0.5203,
+            "21": 0.51793,
+            "22": 0.52901,
+            "23": 0.51605,
+            "24": 0.51462,
+            "25": 0.51195,
+            "26": 0.50837,
+            "27": 0.85741,
+            "28": 0.5083,
+            "29": 0.50928,
+            "30": 0.50919,
+            "31": 0.51059,
+            "32": 0.5129,
+            "33": 0.51253,
+            "34": 0.51142,
+            "35": 0.50986,
+            "36": 0.51279,
+            "37": 0.50996,
+            "38": 0.50872,
+            "39": 0.51314,
+            "40": 0.53857,
+            "41": 0.87144,
+            "42": 0.53733,
+            "43": 0.82532,
+            "44": 0.50255,
+            "45": 0.50942,
+            "46": 0.73489,
+            "47": 0.82645,
+            "48": 0.50964,
+            "49": 0.5094,
+            "50": 0.51015,
+            "51": 0.51394,
+            "52": 0.50874,
+            "53": 0.51284,
+            "54": 0.52083,
+            "55": 0.50789,
+            "56": 0.49975,
+            "57": 0.49792,
+            "58": 0.51444,
+            "59": 0.51001,
+            "60": 0.50768,
+            "61": 0.51346,
+            "62": 0.51695,
+            "63": 0.51586,
+            "64": 0.51965,
+            "65": 0.52295,
+            "66": 0.51606,
+            "67": 0.50646,
+            "68": 0.51105,
+            "69": 0.50496,
+            "70": 0.50887,
+            "71": 0.51043,
+            "72": 0.51293,
+            "73": 0.52108,
+            "74": 0.51224,
+            "75": 0.51005,
+            "76": 0.51268,
+            "77": 0.51097,
+            "78": 0.50687,
+            "79": 0.50729,
+            "80": 0.5142,
+            "81": 0.54269,
+            "82": 0.5267,
+            "83": 0.51288,
+            "84": 0.5147,
+            "85": 0.52025,
+            "86": 0.52158,
+            "87": 0.51316,
+            "88": 0.5178,
+            "89": 0.55243,
+            "90": 0.51232,
+            "91": 0.51784,
+            "92": 0.5159,
+            "93": 0.51384,
+            "94": 0.51504,
+            "95": 0.51606,
+            "96": 0.5173,
+            "97": 0.51802,
+            "98": 0.51331,
+            "99": 0.51466,
+            "100": 0.51281
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_dev_dgx_h100.json
index d5d1de46cac..0b8045d999a 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_dev_dgx_h100.json
@@ -218,106 +218,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 397747712.0,
-            "2": 397747712.0,
-            "3": 397747712.0,
-            "4": 397747712.0,
-            "5": 397747712.0,
-            "6": 397747712.0,
-            "7": 397747712.0,
-            "8": 397747712.0,
-            "9": 397747712.0,
-            "10": 397747712.0,
-            "11": 397747712.0,
-            "12": 397747712.0,
-            "13": 397747712.0,
-            "14": 397747712.0,
-            "15": 397747712.0,
-            "16": 397747712.0,
-            "17": 397747712.0,
-            "18": 397747712.0,
-            "19": 397747712.0,
-            "20": 397747712.0,
-            "21": 397747712.0,
-            "22": 397747712.0,
-            "23": 397747712.0,
-            "24": 397747712.0,
-            "25": 397747712.0,
-            "26": 397747712.0,
-            "27": 397747712.0,
-            "28": 397747712.0,
-            "29": 397747712.0,
-            "30": 397747712.0,
-            "31": 397747712.0,
-            "32": 397747712.0,
-            "33": 397747712.0,
-            "34": 397747712.0,
-            "35": 397747712.0,
-            "36": 397747712.0,
-            "37": 397747712.0,
-            "38": 397747712.0,
-            "39": 397747712.0,
-            "40": 397747712.0,
-            "41": 397747712.0,
-            "42": 397747712.0,
-            "43": 397747712.0,
-            "44": 397747712.0,
-            "45": 397747712.0,
-            "46": 397747712.0,
-            "47": 397747712.0,
-            "48": 397747712.0,
-            "49": 397747712.0,
-            "50": 397747712.0,
-            "51": 397747712.0,
-            "52": 397747712.0,
-            "53": 397747712.0,
-            "54": 397747712.0,
-            "55": 397747712.0,
-            "56": 397747712.0,
-            "57": 397747712.0,
-            "58": 397747712.0,
-            "59": 397747712.0,
-            "60": 397747712.0,
-            "61": 397747712.0,
-            "62": 397747712.0,
-            "63": 397747712.0,
-            "64": 397747712.0,
-            "65": 397747712.0,
-            "66": 397747712.0,
-            "67": 397747712.0,
-            "68": 397747712.0,
-            "69": 397747712.0,
-            "70": 397747712.0,
-            "71": 397747712.0,
-            "72": 397747712.0,
-            "73": 397747712.0,
-            "74": 397747712.0,
-            "75": 397747712.0,
-            "76": 397747712.0,
-            "77": 397747712.0,
-            "78": 397747712.0,
-            "79": 397747712.0,
-            "80": 397747712.0,
-            "81": 397747712.0,
-            "82": 397747712.0,
-            "83": 397747712.0,
-            "84": 397747712.0,
-            "85": 397747712.0,
-            "86": 397747712.0,
-            "87": 397747712.0,
-            "88": 397747712.0,
-            "89": 397747712.0,
-            "90": 397747712.0,
-            "91": 397747712.0,
-            "92": 397747712.0,
-            "93": 397747712.0,
-            "94": 397747712.0,
-            "95": 397747712.0,
-            "96": 397747712.0,
-            "97": 397747712.0,
-            "98": 397747712.0,
-            "99": 397747712.0,
-            "100": 397747712.0
+            "1": 397748736.0,
+            "2": 397748736.0,
+            "3": 397748736.0,
+            "4": 397748736.0,
+            "5": 397748736.0,
+            "6": 397748736.0,
+            "7": 397748736.0,
+            "8": 397748736.0,
+            "9": 397748736.0,
+            "10": 397748736.0,
+            "11": 397748736.0,
+            "12": 397748736.0,
+            "13": 397748736.0,
+            "14": 397748736.0,
+            "15": 397748736.0,
+            "16": 397748736.0,
+            "17": 397748736.0,
+            "18": 397748736.0,
+            "19": 397748736.0,
+            "20": 397748736.0,
+            "21": 397748736.0,
+            "22": 397748736.0,
+            "23": 397748736.0,
+            "24": 397748736.0,
+            "25": 397748736.0,
+            "26": 397748736.0,
+            "27": 397748736.0,
+            "28": 397748736.0,
+            "29": 397748736.0,
+            "30": 397748736.0,
+            "31": 397748736.0,
+            "32": 397748736.0,
+            "33": 397748736.0,
+            "34": 397748736.0,
+            "35": 397748736.0,
+            "36": 397748736.0,
+            "37": 397748736.0,
+            "38": 397748736.0,
+            "39": 397748736.0,
+            "40": 397748736.0,
+            "41": 397748736.0,
+            "42": 397748736.0,
+            "43": 397748736.0,
+            "44": 397748736.0,
+            "45": 397748736.0,
+            "46": 397748736.0,
+            "47": 397748736.0,
+            "48": 397748736.0,
+            "49": 397748736.0,
+            "50": 397748736.0,
+            "51": 397748736.0,
+            "52": 397748736.0,
+            "53": 397748736.0,
+            "54": 397748736.0,
+            "55": 397748736.0,
+            "56": 397748736.0,
+            "57": 397748736.0,
+            "58": 397748736.0,
+            "59": 397748736.0,
+            "60": 397748736.0,
+            "61": 397748736.0,
+            "62": 397748736.0,
+            "63": 397748736.0,
+            "64": 397748736.0,
+            "65": 397748736.0,
+            "66": 397748736.0,
+            "67": 397748736.0,
+            "68": 397748736.0,
+            "69": 397748736.0,
+            "70": 397748736.0,
+            "71": 397748736.0,
+            "72": 397748736.0,
+            "73": 397748736.0,
+            "74": 397748736.0,
+            "75": 397748736.0,
+            "76": 397748736.0,
+            "77": 397748736.0,
+            "78": 397748736.0,
+            "79": 397748736.0,
+            "80": 397748736.0,
+            "81": 397748736.0,
+            "82": 397748736.0,
+            "83": 397748736.0,
+            "84": 397748736.0,
+            "85": 397748736.0,
+            "86": 397748736.0,
+            "87": 397748736.0,
+            "88": 397748736.0,
+            "89": 397748736.0,
+            "90": 397748736.0,
+            "91": 397748736.0,
+            "92": 397748736.0,
+            "93": 397748736.0,
+            "94": 397748736.0,
+            "95": 397748736.0,
+            "96": 397748736.0,
+            "97": 397748736.0,
+            "98": 397748736.0,
+            "99": 397748736.0,
+            "100": 397748736.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -326,105 +326,105 @@
         "step_interval": 1,
         "values": {
             "1": 1044755968.0,
-            "2": 1177840128.0,
-            "3": 1177840128.0,
-            "4": 1177840128.0,
-            "5": 1177840128.0,
-            "6": 1177840128.0,
-            "7": 1177840128.0,
-            "8": 1177840128.0,
-            "9": 1177840128.0,
-            "10": 1177840128.0,
-            "11": 1177840128.0,
-            "12": 1177840128.0,
-            "13": 1177840128.0,
-            "14": 1177840128.0,
-            "15": 1177840128.0,
-            "16": 1177840128.0,
-            "17": 1177840128.0,
-            "18": 1177840128.0,
-            "19": 1177840128.0,
-            "20": 1177840128.0,
-            "21": 1177840128.0,
-            "22": 1177840128.0,
-            "23": 1177840128.0,
-            "24": 1177840128.0,
-            "25": 1177840128.0,
-            "26": 1177840128.0,
-            "27": 1177840128.0,
-            "28": 1177840128.0,
-            "29": 1177840128.0,
-            "30": 1177840128.0,
-            "31": 1177840128.0,
-            "32": 1177840128.0,
-            "33": 1177840128.0,
-            "34": 1177840128.0,
-            "35": 1177840128.0,
-            "36": 1177840128.0,
-            "37": 1177840128.0,
-            "38": 1177840128.0,
-            "39": 1177840128.0,
-            "40": 1177840128.0,
-            "41": 1177840128.0,
-            "42": 1177840128.0,
-            "43": 1177840128.0,
-            "44": 1177840128.0,
-            "45": 1177840128.0,
-            "46": 1177840128.0,
-            "47": 1177840128.0,
-            "48": 1177840128.0,
-            "49": 1177840128.0,
-            "50": 1177840128.0,
-            "51": 1177840128.0,
-            "52": 1177840128.0,
-            "53": 1177840128.0,
-            "54": 1177840128.0,
-            "55": 1177840128.0,
-            "56": 1177840128.0,
-            "57": 1177840128.0,
-            "58": 1177840128.0,
-            "59": 1177840128.0,
-            "60": 1177840128.0,
-            "61": 1177840128.0,
-            "62": 1177840128.0,
-            "63": 1177840128.0,
-            "64": 1177840128.0,
-            "65": 1177840128.0,
-            "66": 1177840128.0,
-            "67": 1177840128.0,
-            "68": 1177840128.0,
-            "69": 1177840128.0,
-            "70": 1177840128.0,
-            "71": 1177840128.0,
-            "72": 1177840128.0,
-            "73": 1177840128.0,
-            "74": 1177840128.0,
-            "75": 1177840128.0,
-            "76": 1177840128.0,
-            "77": 1177840128.0,
-            "78": 1177840128.0,
-            "79": 1177840128.0,
-            "80": 1177840128.0,
-            "81": 1177840128.0,
-            "82": 1177840128.0,
-            "83": 1177840128.0,
-            "84": 1177840128.0,
-            "85": 1177840128.0,
-            "86": 1177840128.0,
-            "87": 1177840128.0,
-            "88": 1177840128.0,
-            "89": 1177840128.0,
-            "90": 1177840128.0,
-            "91": 1177840128.0,
-            "92": 1177840128.0,
-            "93": 1177840128.0,
-            "94": 1177840128.0,
-            "95": 1177840128.0,
-            "96": 1177840128.0,
-            "97": 1177840128.0,
-            "98": 1177840128.0,
-            "99": 1177840128.0,
-            "100": 1177840128.0
+            "2": 1178234368.0,
+            "3": 1178234368.0,
+            "4": 1178234368.0,
+            "5": 1178234368.0,
+            "6": 1178234368.0,
+            "7": 1178234368.0,
+            "8": 1178234368.0,
+            "9": 1178234368.0,
+            "10": 1178234368.0,
+            "11": 1178234368.0,
+            "12": 1178234368.0,
+            "13": 1178234368.0,
+            "14": 1178234368.0,
+            "15": 1178234368.0,
+            "16": 1178234368.0,
+            "17": 1178234368.0,
+            "18": 1178234368.0,
+            "19": 1178234368.0,
+            "20": 1178234368.0,
+            "21": 1178234368.0,
+            "22": 1178234368.0,
+            "23": 1178234368.0,
+            "24": 1178234368.0,
+            "25": 1178234368.0,
+            "26": 1178234368.0,
+            "27": 1178234368.0,
+            "28": 1178234368.0,
+            "29": 1178234368.0,
+            "30": 1178234368.0,
+            "31": 1178234368.0,
+            "32": 1178234368.0,
+            "33": 1178234368.0,
+            "34": 1178234368.0,
+            "35": 1178234368.0,
+            "36": 1178234368.0,
+            "37": 1178234368.0,
+            "38": 1178234368.0,
+            "39": 1178234368.0,
+            "40": 1178234368.0,
+            "41": 1178234368.0,
+            "42": 1178234368.0,
+            "43": 1178234368.0,
+            "44": 1178234368.0,
+            "45": 1178234368.0,
+            "46": 1178234368.0,
+            "47": 1178234368.0,
+            "48": 1178234368.0,
+            "49": 1178234368.0,
+            "50": 1178234368.0,
+            "51": 1178234368.0,
+            "52": 1178234368.0,
+            "53": 1178234368.0,
+            "54": 1178234368.0,
+            "55": 1178234368.0,
+            "56": 1178234368.0,
+            "57": 1178234368.0,
+            "58": 1178234368.0,
+            "59": 1178234368.0,
+            "60": 1178234368.0,
+            "61": 1178234368.0,
+            "62": 1178234368.0,
+            "63": 1178234368.0,
+            "64": 1178234368.0,
+            "65": 1178234368.0,
+            "66": 1178234368.0,
+            "67": 1178234368.0,
+            "68": 1178234368.0,
+            "69": 1178234368.0,
+            "70": 1178234368.0,
+            "71": 1178234368.0,
+            "72": 1178234368.0,
+            "73": 1178234368.0,
+            "74": 1178234368.0,
+            "75": 1178234368.0,
+            "76": 1178234368.0,
+            "77": 1178234368.0,
+            "78": 1178234368.0,
+            "79": 1178234368.0,
+            "80": 1178234368.0,
+            "81": 1178234368.0,
+            "82": 1178234368.0,
+            "83": 1178234368.0,
+            "84": 1178234368.0,
+            "85": 1178234368.0,
+            "86": 1178234368.0,
+            "87": 1178234368.0,
+            "88": 1178234368.0,
+            "89": 1178234368.0,
+            "90": 1178234368.0,
+            "91": 1178234368.0,
+            "92": 1178234368.0,
+            "93": 1178234368.0,
+            "94": 1178234368.0,
+            "95": 1178234368.0,
+            "96": 1178234368.0,
+            "97": 1178234368.0,
+            "98": 1178234368.0,
+            "99": 1178234368.0,
+            "100": 1178234368.0
         }
     },
     "iteration-time": {
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 9.61367,
-            "2": 0.31935,
-            "3": 0.29274,
-            "4": 0.28637,
-            "5": 0.2844,
-            "6": 0.29788,
-            "7": 0.2902,
-            "8": 0.28573,
-            "9": 0.29136,
-            "10": 0.29884,
-            "11": 0.29048,
-            "12": 0.2896,
-            "13": 0.29421,
-            "14": 0.29008,
-            "15": 0.2871,
-            "16": 0.28903,
-            "17": 0.2924,
-            "18": 0.28887,
-            "19": 0.28926,
-            "20": 0.30241,
-            "21": 0.29571,
-            "22": 0.28966,
-            "23": 0.29177,
-            "24": 0.29106,
-            "25": 0.28884,
-            "26": 0.28921,
-            "27": 0.29461,
-            "28": 0.28664,
-            "29": 0.28881,
-            "30": 0.29392,
-            "31": 0.29062,
-            "32": 0.28778,
-            "33": 0.29055,
-            "34": 0.29409,
-            "35": 0.29169,
-            "36": 0.29211,
-            "37": 0.29809,
-            "38": 0.29114,
-            "39": 0.29052,
-            "40": 0.2919,
-            "41": 0.2953,
-            "42": 0.28957,
-            "43": 0.29349,
-            "44": 0.30062,
-            "45": 0.28999,
-            "46": 0.29486,
-            "47": 0.29689,
-            "48": 0.29092,
-            "49": 0.29024,
-            "50": 0.28916,
-            "51": 0.30865,
-            "52": 0.29957,
-            "53": 0.28833,
-            "54": 0.29375,
-            "55": 0.29176,
-            "56": 0.29338,
-            "57": 0.28952,
-            "58": 0.29232,
-            "59": 0.29026,
-            "60": 0.28767,
-            "61": 0.29364,
-            "62": 0.2935,
-            "63": 0.29522,
-            "64": 0.29495,
-            "65": 0.29509,
-            "66": 0.29643,
-            "67": 0.29584,
-            "68": 0.29853,
-            "69": 0.29821,
-            "70": 0.29334,
-            "71": 0.29579,
-            "72": 0.29325,
-            "73": 0.29403,
-            "74": 0.29671,
-            "75": 0.63106,
-            "76": 0.29142,
-            "77": 0.29491,
-            "78": 0.29437,
-            "79": 0.29239,
-            "80": 0.29453,
-            "81": 0.29509,
-            "82": 0.29493,
-            "83": 0.2915,
-            "84": 0.30181,
-            "85": 0.29305,
-            "86": 0.28823,
-            "87": 0.29337,
-            "88": 0.29025,
-            "89": 0.28953,
-            "90": 0.29694,
-            "91": 0.29077,
-            "92": 0.29411,
-            "93": 0.28767,
-            "94": 0.29313,
-            "95": 0.29276,
-            "96": 0.29197,
-            "97": 0.29466,
-            "98": 0.29321,
-            "99": 0.29311,
-            "100": 0.29175
+            "1": 10.36091,
+            "2": 0.34885,
+            "3": 0.28252,
+            "4": 0.26078,
+            "5": 0.25876,
+            "6": 0.25718,
+            "7": 0.26528,
+            "8": 0.26311,
+            "9": 0.26375,
+            "10": 0.26354,
+            "11": 0.26207,
+            "12": 0.26033,
+            "13": 0.26467,
+            "14": 0.26281,
+            "15": 0.26355,
+            "16": 0.26138,
+            "17": 0.2649,
+            "18": 0.26631,
+            "19": 0.26244,
+            "20": 0.26263,
+            "21": 0.26939,
+            "22": 0.26538,
+            "23": 0.26644,
+            "24": 0.26284,
+            "25": 0.26534,
+            "26": 0.2629,
+            "27": 0.2631,
+            "28": 0.26216,
+            "29": 0.26306,
+            "30": 0.26559,
+            "31": 0.26198,
+            "32": 0.26229,
+            "33": 0.26263,
+            "34": 0.26154,
+            "35": 0.26277,
+            "36": 0.26291,
+            "37": 0.26156,
+            "38": 0.26052,
+            "39": 0.26366,
+            "40": 0.26065,
+            "41": 0.26364,
+            "42": 0.62325,
+            "43": 0.26139,
+            "44": 0.2631,
+            "45": 0.26374,
+            "46": 0.26054,
+            "47": 0.26187,
+            "48": 0.26188,
+            "49": 0.25929,
+            "50": 0.25984,
+            "51": 0.26978,
+            "52": 0.26013,
+            "53": 0.26513,
+            "54": 0.26111,
+            "55": 0.26044,
+            "56": 0.2624,
+            "57": 0.26412,
+            "58": 0.26108,
+            "59": 0.26051,
+            "60": 0.263,
+            "61": 0.26363,
+            "62": 0.27145,
+            "63": 0.27074,
+            "64": 0.26955,
+            "65": 0.65636,
+            "66": 0.26945,
+            "67": 0.27333,
+            "68": 0.27517,
+            "69": 0.27206,
+            "70": 0.27181,
+            "71": 0.27216,
+            "72": 0.9521,
+            "73": 0.27086,
+            "74": 0.27375,
+            "75": 0.89877,
+            "76": 0.27077,
+            "77": 0.26534,
+            "78": 0.2565,
+            "79": 0.26961,
+            "80": 0.26648,
+            "81": 0.26175,
+            "82": 0.26268,
+            "83": 0.26668,
+            "84": 0.26108,
+            "85": 0.25906,
+            "86": 0.25936,
+            "87": 0.25961,
+            "88": 0.25714,
+            "89": 0.26171,
+            "90": 0.26239,
+            "91": 0.26137,
+            "92": 0.25975,
+            "93": 0.25965,
+            "94": 0.2611,
+            "95": 0.25793,
+            "96": 0.26009,
+            "97": 0.26077,
+            "98": 0.25869,
+            "99": 0.2601,
+            "100": 0.25909
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_dev_dgx_h100_2nd.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_dev_dgx_h100_2nd.json
new file mode 100644
index 00000000000..25df8735936
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_dev_dgx_h100_2nd.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 9.86379,
+            "52": 9.75652,
+            "53": 10.06157,
+            "54": 9.96418,
+            "55": 9.89204,
+            "56": 9.63681,
+            "57": 9.49807,
+            "58": 9.83504,
+            "59": 9.59701,
+            "60": 9.51573,
+            "61": 9.70155,
+            "62": 9.97973,
+            "63": 9.38914,
+            "64": 9.77552,
+            "65": 8.95939,
+            "66": 9.6978,
+            "67": 9.37174,
+            "68": 9.78449,
+            "69": 9.79058,
+            "70": 9.74555,
+            "71": 9.61867,
+            "72": 9.58317,
+            "73": 9.49175,
+            "74": 8.939,
+            "75": 9.41848,
+            "76": 9.07237,
+            "77": 10.06903,
+            "78": 9.72443,
+            "79": 9.3767,
+            "80": 9.40261,
+            "81": 9.47859,
+            "82": 9.6984,
+            "83": 9.30086,
+            "84": 9.41299,
+            "85": 9.61514,
+            "86": 9.07881,
+            "87": 9.59402,
+            "88": 9.74658,
+            "89": 9.60096,
+            "90": 9.81999,
+            "91": 9.32977,
+            "92": 9.35625,
+            "93": 9.07406,
+            "94": 8.82774,
+            "95": 9.51099,
+            "96": 9.52501,
+            "97": 9.3163,
+            "98": 9.67278,
+            "99": 8.88493,
+            "100": 9.39984
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 2563.0,
+            "52": 2431.0,
+            "53": 2917.0,
+            "54": 2655.0,
+            "55": 2307.0,
+            "56": 2605.0,
+            "57": 2385.0,
+            "58": 2952.0,
+            "59": 2730.0,
+            "60": 2287.0,
+            "61": 2904.0,
+            "62": 2601.0,
+            "63": 2452.0,
+            "64": 2810.0,
+            "65": 2544.0,
+            "66": 2914.0,
+            "67": 2664.0,
+            "68": 2709.0,
+            "69": 2967.0,
+            "70": 3049.0,
+            "71": 2936.0,
+            "72": 2410.0,
+            "73": 2991.0,
+            "74": 1882.0,
+            "75": 2539.0,
+            "76": 3060.0,
+            "77": 3219.0,
+            "78": 3023.0,
+            "79": 3084.0,
+            "80": 3101.0,
+            "81": 3530.0,
+            "82": 3298.0,
+            "83": 2666.0,
+            "84": 3154.0,
+            "85": 3288.0,
+            "86": 2827.0,
+            "87": 3720.0,
+            "88": 3168.0,
+            "89": 3275.0,
+            "90": 3168.0,
+            "91": 2919.0,
+            "92": 3071.0,
+            "93": 2751.0,
+            "94": 3412.0,
+            "95": 3186.0,
+            "96": 3429.0,
+            "97": 3083.0,
+            "98": 3477.0,
+            "99": 3093.0,
+            "100": 3212.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 397748736.0,
+            "52": 397748736.0,
+            "53": 397748736.0,
+            "54": 397748736.0,
+            "55": 397748736.0,
+            "56": 397748736.0,
+            "57": 397748736.0,
+            "58": 397748736.0,
+            "59": 397748736.0,
+            "60": 397748736.0,
+            "61": 397748736.0,
+            "62": 397748736.0,
+            "63": 397748736.0,
+            "64": 397748736.0,
+            "65": 397748736.0,
+            "66": 397748736.0,
+            "67": 397748736.0,
+            "68": 397748736.0,
+            "69": 397748736.0,
+            "70": 397748736.0,
+            "71": 397748736.0,
+            "72": 397748736.0,
+            "73": 397748736.0,
+            "74": 397748736.0,
+            "75": 397748736.0,
+            "76": 397748736.0,
+            "77": 397748736.0,
+            "78": 397748736.0,
+            "79": 397748736.0,
+            "80": 397748736.0,
+            "81": 397748736.0,
+            "82": 397748736.0,
+            "83": 397748736.0,
+            "84": 397748736.0,
+            "85": 397748736.0,
+            "86": 397748736.0,
+            "87": 397748736.0,
+            "88": 397748736.0,
+            "89": 397748736.0,
+            "90": 397748736.0,
+            "91": 397748736.0,
+            "92": 397748736.0,
+            "93": 397748736.0,
+            "94": 397748736.0,
+            "95": 397748736.0,
+            "96": 397748736.0,
+            "97": 397748736.0,
+            "98": 397748736.0,
+            "99": 397748736.0,
+            "100": 397748736.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 1179281920.0,
+            "52": 1179282944.0,
+            "53": 1179282944.0,
+            "54": 1179282944.0,
+            "55": 1179282944.0,
+            "56": 1179282944.0,
+            "57": 1179282944.0,
+            "58": 1179282944.0,
+            "59": 1179282944.0,
+            "60": 1179282944.0,
+            "61": 1179282944.0,
+            "62": 1179282944.0,
+            "63": 1179282944.0,
+            "64": 1179282944.0,
+            "65": 1179282944.0,
+            "66": 1179282944.0,
+            "67": 1179282944.0,
+            "68": 1179282944.0,
+            "69": 1179282944.0,
+            "70": 1179282944.0,
+            "71": 1179282944.0,
+            "72": 1179282944.0,
+            "73": 1179282944.0,
+            "74": 1179282944.0,
+            "75": 1179282944.0,
+            "76": 1179282944.0,
+            "77": 1179282944.0,
+            "78": 1179282944.0,
+            "79": 1179282944.0,
+            "80": 1179282944.0,
+            "81": 1179282944.0,
+            "82": 1179282944.0,
+            "83": 1179282944.0,
+            "84": 1179282944.0,
+            "85": 1179282944.0,
+            "86": 1179282944.0,
+            "87": 1179282944.0,
+            "88": 1179282944.0,
+            "89": 1179282944.0,
+            "90": 1179282944.0,
+            "91": 1179282944.0,
+            "92": 1179282944.0,
+            "93": 1179282944.0,
+            "94": 1179282944.0,
+            "95": 1179282944.0,
+            "96": 1179282944.0,
+            "97": 1179282944.0,
+            "98": 1179282944.0,
+            "99": 1179282944.0,
+            "100": 1179282944.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 8.28969,
+            "52": 0.28668,
+            "53": 0.25532,
+            "54": 0.25658,
+            "55": 0.25678,
+            "56": 0.25808,
+            "57": 0.25759,
+            "58": 0.2573,
+            "59": 0.25595,
+            "60": 0.25655,
+            "61": 0.25748,
+            "62": 0.25355,
+            "63": 0.25645,
+            "64": 0.25544,
+            "65": 0.25465,
+            "66": 0.25429,
+            "67": 0.25503,
+            "68": 0.25478,
+            "69": 0.25435,
+            "70": 0.25389,
+            "71": 0.25473,
+            "72": 0.254,
+            "73": 0.25451,
+            "74": 0.25381,
+            "75": 0.25278,
+            "76": 0.25503,
+            "77": 0.25251,
+            "78": 0.25271,
+            "79": 0.25524,
+            "80": 0.25494,
+            "81": 0.25321,
+            "82": 0.25436,
+            "83": 0.25713,
+            "84": 0.25332,
+            "85": 0.25392,
+            "86": 0.25232,
+            "87": 0.25246,
+            "88": 0.25419,
+            "89": 0.25306,
+            "90": 0.25417,
+            "91": 0.25642,
+            "92": 0.25493,
+            "93": 0.2529,
+            "94": 0.25478,
+            "95": 0.25685,
+            "96": 0.25271,
+            "97": 0.25387,
+            "98": 0.25551,
+            "99": 0.25384,
+            "100": 0.2519
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_lts_dgx_a100.json
index 1091699bf9a..d4f8136d68c 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_lts_dgx_a100.json
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 4.72553,
-            "2": 0.52446,
-            "3": 0.41527,
-            "4": 0.41699,
-            "5": 0.41496,
-            "6": 0.41411,
-            "7": 0.41829,
-            "8": 0.41655,
-            "9": 0.41643,
-            "10": 0.42008,
-            "11": 0.41959,
-            "12": 0.41842,
-            "13": 0.41485,
-            "14": 0.41643,
-            "15": 0.41486,
-            "16": 0.41617,
-            "17": 0.41476,
-            "18": 0.42598,
-            "19": 0.41821,
-            "20": 0.41457,
-            "21": 0.41579,
-            "22": 0.41438,
-            "23": 0.41644,
-            "24": 0.41499,
-            "25": 0.41537,
-            "26": 0.41593,
-            "27": 0.42875,
-            "28": 0.41636,
-            "29": 0.41505,
-            "30": 0.4148,
-            "31": 0.41806,
-            "32": 0.41549,
-            "33": 0.41482,
-            "34": 0.41559,
-            "35": 0.4156,
-            "36": 0.4152,
-            "37": 0.4152,
-            "38": 0.4154,
-            "39": 0.41674,
-            "40": 0.41745,
-            "41": 0.41582,
-            "42": 0.41548,
-            "43": 0.41428,
-            "44": 0.4158,
-            "45": 0.41469,
-            "46": 0.41584,
-            "47": 0.41662,
-            "48": 0.41588,
-            "49": 0.41548,
-            "50": 0.42504,
-            "51": 0.41857,
-            "52": 0.40985,
-            "53": 0.40877,
-            "54": 0.41013,
-            "55": 0.40869,
-            "56": 0.84381,
-            "57": 0.41437,
-            "58": 0.42502,
-            "59": 0.41122,
-            "60": 0.41956,
-            "61": 0.40996,
-            "62": 0.40983,
-            "63": 0.41144,
-            "64": 0.41126,
-            "65": 0.41361,
-            "66": 0.41243,
-            "67": 0.41431,
-            "68": 0.4396,
-            "69": 0.42434,
-            "70": 0.41269,
-            "71": 0.42108,
-            "72": 0.41357,
-            "73": 0.41116,
-            "74": 0.41086,
-            "75": 0.41041,
-            "76": 0.41106,
-            "77": 0.41,
-            "78": 0.41669,
-            "79": 0.41627,
-            "80": 0.41237,
-            "81": 0.41157,
-            "82": 0.41168,
-            "83": 0.41229,
-            "84": 0.41209,
-            "85": 0.41258,
-            "86": 0.41294,
-            "87": 0.41185,
-            "88": 0.41106,
-            "89": 0.41159,
-            "90": 0.41277,
-            "91": 0.41162,
-            "92": 0.41309,
-            "93": 0.41351,
-            "94": 0.40941,
-            "95": 0.40961,
-            "96": 0.41012,
-            "97": 0.40887,
-            "98": 0.40809,
-            "99": 0.40865,
-            "100": 0.40854
+            "1": 4.0346,
+            "2": 0.53704,
+            "3": 0.42719,
+            "4": 0.41535,
+            "5": 0.40389,
+            "6": 0.40332,
+            "7": 0.40402,
+            "8": 0.40471,
+            "9": 0.40343,
+            "10": 0.40348,
+            "11": 0.3985,
+            "12": 0.39842,
+            "13": 0.39603,
+            "14": 0.39492,
+            "15": 0.39651,
+            "16": 0.39564,
+            "17": 0.39567,
+            "18": 0.39657,
+            "19": 0.39768,
+            "20": 0.39761,
+            "21": 0.39891,
+            "22": 0.39636,
+            "23": 0.39698,
+            "24": 0.39738,
+            "25": 0.39624,
+            "26": 0.39431,
+            "27": 0.39658,
+            "28": 0.39585,
+            "29": 0.39364,
+            "30": 0.39529,
+            "31": 0.39497,
+            "32": 0.39598,
+            "33": 0.39773,
+            "34": 0.39643,
+            "35": 0.39763,
+            "36": 0.39632,
+            "37": 0.39546,
+            "38": 0.3982,
+            "39": 0.7438,
+            "40": 0.39448,
+            "41": 0.39549,
+            "42": 0.39538,
+            "43": 0.39526,
+            "44": 0.39405,
+            "45": 0.39698,
+            "46": 0.39664,
+            "47": 0.39462,
+            "48": 0.39535,
+            "49": 0.39382,
+            "50": 0.3941,
+            "51": 0.43707,
+            "52": 0.43149,
+            "53": 0.42387,
+            "54": 0.43267,
+            "55": 0.43104,
+            "56": 1.05764,
+            "57": 0.39732,
+            "58": 0.39576,
+            "59": 0.3984,
+            "60": 0.40214,
+            "61": 0.4001,
+            "62": 0.90991,
+            "63": 0.39865,
+            "64": 0.39618,
+            "65": 0.39554,
+            "66": 0.79331,
+            "67": 0.39478,
+            "68": 0.39551,
+            "69": 0.39587,
+            "70": 0.39669,
+            "71": 0.39593,
+            "72": 0.93958,
+            "73": 0.39773,
+            "74": 0.39717,
+            "75": 0.3961,
+            "76": 0.39596,
+            "77": 0.39649,
+            "78": 0.39584,
+            "79": 0.39596,
+            "80": 0.39568,
+            "81": 0.39433,
+            "82": 0.39598,
+            "83": 0.39548,
+            "84": 0.39563,
+            "85": 0.39555,
+            "86": 0.39811,
+            "87": 0.39515,
+            "88": 0.39682,
+            "89": 0.39662,
+            "90": 0.39566,
+            "91": 0.39589,
+            "92": 0.39584,
+            "93": 0.39725,
+            "94": 0.39593,
+            "95": 0.39495,
+            "96": 0.39495,
+            "97": 0.39567,
+            "98": 0.39566,
+            "99": 0.3973,
+            "100": 0.39539
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_lts_dgx_a100_2nd.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_lts_dgx_a100_2nd.json
new file mode 100644
index 00000000000..680d04eb6a6
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_lts_dgx_a100_2nd.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 9.8567,
+            "52": 9.75178,
+            "53": 10.07652,
+            "54": 9.96084,
+            "55": 9.88221,
+            "56": 9.63206,
+            "57": 9.49147,
+            "58": 9.83408,
+            "59": 9.59352,
+            "60": 9.51388,
+            "61": 9.69802,
+            "62": 9.99154,
+            "63": 9.3723,
+            "64": 9.77839,
+            "65": 8.95518,
+            "66": 9.70976,
+            "67": 9.38198,
+            "68": 9.78701,
+            "69": 9.793,
+            "70": 9.73033,
+            "71": 9.61752,
+            "72": 9.58459,
+            "73": 9.48958,
+            "74": 8.94015,
+            "75": 9.43092,
+            "76": 9.09168,
+            "77": 10.06222,
+            "78": 9.72696,
+            "79": 9.37408,
+            "80": 9.40676,
+            "81": 9.47995,
+            "82": 9.69225,
+            "83": 9.31299,
+            "84": 9.41921,
+            "85": 9.61096,
+            "86": 9.06853,
+            "87": 9.59119,
+            "88": 9.74582,
+            "89": 9.60624,
+            "90": 9.81746,
+            "91": 9.34247,
+            "92": 9.35856,
+            "93": 9.07894,
+            "94": 8.82753,
+            "95": 9.51606,
+            "96": 9.52063,
+            "97": 9.31097,
+            "98": 9.67055,
+            "99": 8.88626,
+            "100": 9.40485
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 2829.0,
+            "52": 2809.0,
+            "53": 3230.0,
+            "54": 2864.0,
+            "55": 2706.0,
+            "56": 2917.0,
+            "57": 2529.0,
+            "58": 3339.0,
+            "59": 3051.0,
+            "60": 2623.0,
+            "61": 3287.0,
+            "62": 2913.0,
+            "63": 2639.0,
+            "64": 3154.0,
+            "65": 2856.0,
+            "66": 3465.0,
+            "67": 2934.0,
+            "68": 2985.0,
+            "69": 3298.0,
+            "70": 3653.0,
+            "71": 3260.0,
+            "72": 2684.0,
+            "73": 3232.0,
+            "74": 2191.0,
+            "75": 2766.0,
+            "76": 3335.0,
+            "77": 3793.0,
+            "78": 3608.0,
+            "79": 3384.0,
+            "80": 3782.0,
+            "81": 3969.0,
+            "82": 3640.0,
+            "83": 3237.0,
+            "84": 3606.0,
+            "85": 3553.0,
+            "86": 3160.0,
+            "87": 4130.0,
+            "88": 3430.0,
+            "89": 3818.0,
+            "90": 3363.0,
+            "91": 3041.0,
+            "92": 3524.0,
+            "93": 3060.0,
+            "94": 3575.0,
+            "95": 3463.0,
+            "96": 3921.0,
+            "97": 3597.0,
+            "98": 4039.0,
+            "99": 3435.0,
+            "100": 3548.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 368387584.0,
+            "52": 368387584.0,
+            "53": 368387584.0,
+            "54": 368387584.0,
+            "55": 368387584.0,
+            "56": 368387584.0,
+            "57": 368387584.0,
+            "58": 368387584.0,
+            "59": 368387584.0,
+            "60": 368387584.0,
+            "61": 368387584.0,
+            "62": 368387584.0,
+            "63": 368387584.0,
+            "64": 368387584.0,
+            "65": 368387584.0,
+            "66": 368387584.0,
+            "67": 368387584.0,
+            "68": 368387584.0,
+            "69": 368387584.0,
+            "70": 368387584.0,
+            "71": 368387584.0,
+            "72": 368387584.0,
+            "73": 368387584.0,
+            "74": 368387584.0,
+            "75": 368387584.0,
+            "76": 368387584.0,
+            "77": 368387584.0,
+            "78": 368387584.0,
+            "79": 368387584.0,
+            "80": 368387584.0,
+            "81": 368387584.0,
+            "82": 368387584.0,
+            "83": 368387584.0,
+            "84": 368387584.0,
+            "85": 368387584.0,
+            "86": 368387584.0,
+            "87": 368387584.0,
+            "88": 368387584.0,
+            "89": 368387584.0,
+            "90": 368387584.0,
+            "91": 368387584.0,
+            "92": 368387584.0,
+            "93": 368387584.0,
+            "94": 368387584.0,
+            "95": 368387584.0,
+            "96": 368387584.0,
+            "97": 368387584.0,
+            "98": 368387584.0,
+            "99": 368387584.0,
+            "100": 368387584.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 1162110464.0,
+            "52": 1162111488.0,
+            "53": 1162111488.0,
+            "54": 1162111488.0,
+            "55": 1162111488.0,
+            "56": 1162111488.0,
+            "57": 1162111488.0,
+            "58": 1162111488.0,
+            "59": 1162111488.0,
+            "60": 1162111488.0,
+            "61": 1162111488.0,
+            "62": 1162111488.0,
+            "63": 1162111488.0,
+            "64": 1162111488.0,
+            "65": 1162111488.0,
+            "66": 1162111488.0,
+            "67": 1162111488.0,
+            "68": 1162111488.0,
+            "69": 1162111488.0,
+            "70": 1162111488.0,
+            "71": 1162111488.0,
+            "72": 1162111488.0,
+            "73": 1162111488.0,
+            "74": 1162111488.0,
+            "75": 1162111488.0,
+            "76": 1162111488.0,
+            "77": 1162111488.0,
+            "78": 1162111488.0,
+            "79": 1162111488.0,
+            "80": 1162111488.0,
+            "81": 1162111488.0,
+            "82": 1162111488.0,
+            "83": 1162111488.0,
+            "84": 1162111488.0,
+            "85": 1162111488.0,
+            "86": 1162111488.0,
+            "87": 1162111488.0,
+            "88": 1162111488.0,
+            "89": 1162111488.0,
+            "90": 1162111488.0,
+            "91": 1162111488.0,
+            "92": 1162111488.0,
+            "93": 1162111488.0,
+            "94": 1162111488.0,
+            "95": 1162111488.0,
+            "96": 1162111488.0,
+            "97": 1162111488.0,
+            "98": 1162111488.0,
+            "99": 1162111488.0,
+            "100": 1162111488.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 6.59174,
+            "52": 0.42614,
+            "53": 0.39758,
+            "54": 0.39842,
+            "55": 0.39876,
+            "56": 0.39663,
+            "57": 0.39728,
+            "58": 0.39765,
+            "59": 0.39654,
+            "60": 0.39664,
+            "61": 0.3959,
+            "62": 0.39703,
+            "63": 0.39487,
+            "64": 0.39391,
+            "65": 0.3946,
+            "66": 0.39321,
+            "67": 0.39339,
+            "68": 0.39323,
+            "69": 0.39386,
+            "70": 0.39664,
+            "71": 0.39421,
+            "72": 0.39561,
+            "73": 0.3947,
+            "74": 0.3944,
+            "75": 0.39483,
+            "76": 0.39467,
+            "77": 0.39476,
+            "78": 0.39408,
+            "79": 0.395,
+            "80": 0.39426,
+            "81": 0.39421,
+            "82": 0.39474,
+            "83": 0.39376,
+            "84": 0.39492,
+            "85": 0.39449,
+            "86": 0.39328,
+            "87": 0.39468,
+            "88": 0.39375,
+            "89": 0.39395,
+            "90": 0.39427,
+            "91": 0.39417,
+            "92": 0.39443,
+            "93": 0.39424,
+            "94": 0.39416,
+            "95": 0.39486,
+            "96": 0.39653,
+            "97": 0.39395,
+            "98": 0.39533,
+            "99": 0.39459,
+            "100": 0.39587
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_dev_dgx_gb200.json
new file mode 100644
index 00000000000..5b22c8f244c
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_dev_dgx_gb200.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.90251,
+            "2": 10.9138,
+            "3": 10.90169,
+            "4": 10.90724,
+            "5": 10.9045,
+            "6": 10.91656,
+            "7": 10.91268,
+            "8": 10.89505,
+            "9": 10.91555,
+            "10": 10.87277,
+            "11": 10.90376,
+            "12": 10.90404,
+            "13": 10.91831,
+            "14": 10.90742,
+            "15": 10.87551,
+            "16": 10.85477,
+            "17": 10.83186,
+            "18": 10.84054,
+            "19": 10.84221,
+            "20": 10.75039,
+            "21": 10.73638,
+            "22": 10.62979,
+            "23": 10.74023,
+            "24": 10.64438,
+            "25": 10.60242,
+            "26": 10.64922,
+            "27": 10.64074,
+            "28": 10.58757,
+            "29": 10.59165,
+            "30": 10.38969,
+            "31": 10.18185,
+            "32": 10.49227,
+            "33": 10.48772,
+            "34": 10.26316,
+            "35": 10.2923,
+            "36": 10.25547,
+            "37": 10.37371,
+            "38": 10.2355,
+            "39": 10.42347,
+            "40": 10.10947,
+            "41": 10.17531,
+            "42": 10.2316,
+            "43": 9.87326,
+            "44": 9.9918,
+            "45": 9.86649,
+            "46": 9.84547,
+            "47": 10.17367,
+            "48": 9.87146,
+            "49": 9.55757,
+            "50": 9.92547,
+            "51": 9.87398,
+            "52": 9.76585,
+            "53": 10.08271,
+            "54": 9.97273,
+            "55": 9.90735,
+            "56": 9.64216,
+            "57": 9.48857,
+            "58": 9.84273,
+            "59": 9.60111,
+            "60": 9.52016,
+            "61": 9.70058,
+            "62": 9.99644,
+            "63": 9.39064,
+            "64": 9.77614,
+            "65": 8.96633,
+            "66": 9.70947,
+            "67": 9.3877,
+            "68": 9.78895,
+            "69": 9.80803,
+            "70": 9.74237,
+            "71": 9.63382,
+            "72": 9.59118,
+            "73": 9.50694,
+            "74": 8.94248,
+            "75": 9.42903,
+            "76": 9.08836,
+            "77": 10.07155,
+            "78": 9.72684,
+            "79": 9.38725,
+            "80": 9.40572,
+            "81": 9.48703,
+            "82": 9.70482,
+            "83": 9.31557,
+            "84": 9.42113,
+            "85": 9.61467,
+            "86": 9.08461,
+            "87": 9.59903,
+            "88": 9.75369,
+            "89": 9.60597,
+            "90": 9.83153,
+            "91": 9.33877,
+            "92": 9.36033,
+            "93": 9.0904,
+            "94": 8.83712,
+            "95": 9.53804,
+            "96": 9.53391,
+            "97": 9.31319,
+            "98": 9.67422,
+            "99": 8.90345,
+            "100": 9.41498
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1541.0,
+            "2": 1645.0,
+            "3": 1629.0,
+            "4": 1795.0,
+            "5": 1841.0,
+            "6": 1779.0,
+            "7": 1764.0,
+            "8": 1563.0,
+            "9": 1825.0,
+            "10": 1409.0,
+            "11": 1956.0,
+            "12": 1760.0,
+            "13": 1869.0,
+            "14": 1833.0,
+            "15": 1958.0,
+            "16": 1850.0,
+            "17": 1795.0,
+            "18": 1781.0,
+            "19": 1766.0,
+            "20": 1632.0,
+            "21": 1866.0,
+            "22": 1649.0,
+            "23": 1996.0,
+            "24": 1722.0,
+            "25": 1609.0,
+            "26": 1678.0,
+            "27": 1752.0,
+            "28": 1903.0,
+            "29": 1870.0,
+            "30": 1851.0,
+            "31": 1483.0,
+            "32": 1836.0,
+            "33": 2084.0,
+            "34": 1799.0,
+            "35": 1867.0,
+            "36": 1846.0,
+            "37": 2303.0,
+            "38": 2171.0,
+            "39": 2173.0,
+            "40": 2153.0,
+            "41": 2230.0,
+            "42": 2211.0,
+            "43": 1945.0,
+            "44": 2003.0,
+            "45": 2027.0,
+            "46": 2196.0,
+            "47": 2540.0,
+            "48": 2320.0,
+            "49": 2289.0,
+            "50": 2249.0,
+            "51": 2418.0,
+            "52": 2459.0,
+            "53": 2723.0,
+            "54": 2710.0,
+            "55": 2167.0,
+            "56": 2529.0,
+            "57": 2306.0,
+            "58": 2690.0,
+            "59": 2678.0,
+            "60": 2241.0,
+            "61": 2828.0,
+            "62": 2453.0,
+            "63": 2330.0,
+            "64": 2785.0,
+            "65": 2596.0,
+            "66": 2886.0,
+            "67": 2589.0,
+            "68": 2725.0,
+            "69": 2800.0,
+            "70": 3022.0,
+            "71": 2941.0,
+            "72": 2388.0,
+            "73": 2770.0,
+            "74": 1833.0,
+            "75": 2434.0,
+            "76": 2843.0,
+            "77": 3177.0,
+            "78": 3085.0,
+            "79": 3080.0,
+            "80": 3263.0,
+            "81": 3512.0,
+            "82": 3152.0,
+            "83": 2845.0,
+            "84": 3114.0,
+            "85": 3196.0,
+            "86": 2728.0,
+            "87": 3511.0,
+            "88": 2941.0,
+            "89": 3343.0,
+            "90": 3094.0,
+            "91": 2946.0,
+            "92": 3094.0,
+            "93": 2707.0,
+            "94": 3304.0,
+            "95": 3279.0,
+            "96": 3562.0,
+            "97": 2956.0,
+            "98": 3547.0,
+            "99": 3119.0,
+            "100": 3119.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 261759488.0,
+            "2": 261759488.0,
+            "3": 261759488.0,
+            "4": 261759488.0,
+            "5": 261759488.0,
+            "6": 261759488.0,
+            "7": 261759488.0,
+            "8": 261759488.0,
+            "9": 261759488.0,
+            "10": 261759488.0,
+            "11": 261759488.0,
+            "12": 261759488.0,
+            "13": 261759488.0,
+            "14": 261759488.0,
+            "15": 261759488.0,
+            "16": 261759488.0,
+            "17": 261759488.0,
+            "18": 261759488.0,
+            "19": 261759488.0,
+            "20": 261759488.0,
+            "21": 261759488.0,
+            "22": 261759488.0,
+            "23": 261759488.0,
+            "24": 261759488.0,
+            "25": 261759488.0,
+            "26": 261759488.0,
+            "27": 261759488.0,
+            "28": 261759488.0,
+            "29": 261759488.0,
+            "30": 261759488.0,
+            "31": 261759488.0,
+            "32": 261759488.0,
+            "33": 261759488.0,
+            "34": 261759488.0,
+            "35": 261759488.0,
+            "36": 261759488.0,
+            "37": 261759488.0,
+            "38": 261759488.0,
+            "39": 261759488.0,
+            "40": 261759488.0,
+            "41": 261759488.0,
+            "42": 261759488.0,
+            "43": 261759488.0,
+            "44": 261759488.0,
+            "45": 261759488.0,
+            "46": 261759488.0,
+            "47": 261759488.0,
+            "48": 261759488.0,
+            "49": 261759488.0,
+            "50": 261759488.0,
+            "51": 261759488.0,
+            "52": 261759488.0,
+            "53": 261759488.0,
+            "54": 261759488.0,
+            "55": 261759488.0,
+            "56": 261759488.0,
+            "57": 261759488.0,
+            "58": 261759488.0,
+            "59": 261759488.0,
+            "60": 261759488.0,
+            "61": 261759488.0,
+            "62": 261759488.0,
+            "63": 261759488.0,
+            "64": 261759488.0,
+            "65": 261759488.0,
+            "66": 261759488.0,
+            "67": 261759488.0,
+            "68": 261759488.0,
+            "69": 261759488.0,
+            "70": 261759488.0,
+            "71": 261759488.0,
+            "72": 261759488.0,
+            "73": 261759488.0,
+            "74": 261759488.0,
+            "75": 261759488.0,
+            "76": 261759488.0,
+            "77": 261759488.0,
+            "78": 261759488.0,
+            "79": 261759488.0,
+            "80": 261759488.0,
+            "81": 261759488.0,
+            "82": 261759488.0,
+            "83": 261759488.0,
+            "84": 261759488.0,
+            "85": 261759488.0,
+            "86": 261759488.0,
+            "87": 261759488.0,
+            "88": 261759488.0,
+            "89": 261759488.0,
+            "90": 261759488.0,
+            "91": 261759488.0,
+            "92": 261759488.0,
+            "93": 261759488.0,
+            "94": 261759488.0,
+            "95": 261759488.0,
+            "96": 261759488.0,
+            "97": 261759488.0,
+            "98": 261759488.0,
+            "99": 261759488.0,
+            "100": 261759488.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 700958208.0,
+            "2": 790286848.0,
+            "3": 790286848.0,
+            "4": 790286848.0,
+            "5": 790286848.0,
+            "6": 790286848.0,
+            "7": 790286848.0,
+            "8": 790286848.0,
+            "9": 790286848.0,
+            "10": 790286848.0,
+            "11": 790286848.0,
+            "12": 790286848.0,
+            "13": 790286848.0,
+            "14": 790286848.0,
+            "15": 790286848.0,
+            "16": 790286848.0,
+            "17": 790286848.0,
+            "18": 790286848.0,
+            "19": 790286848.0,
+            "20": 790286848.0,
+            "21": 790286848.0,
+            "22": 790286848.0,
+            "23": 790286848.0,
+            "24": 790286848.0,
+            "25": 790286848.0,
+            "26": 790286848.0,
+            "27": 790286848.0,
+            "28": 790286848.0,
+            "29": 790286848.0,
+            "30": 790286848.0,
+            "31": 790286848.0,
+            "32": 790286848.0,
+            "33": 790286848.0,
+            "34": 790286848.0,
+            "35": 790286848.0,
+            "36": 790286848.0,
+            "37": 790286848.0,
+            "38": 790286848.0,
+            "39": 790286848.0,
+            "40": 790286848.0,
+            "41": 790286848.0,
+            "42": 790286848.0,
+            "43": 790286848.0,
+            "44": 790286848.0,
+            "45": 790286848.0,
+            "46": 790286848.0,
+            "47": 790286848.0,
+            "48": 790286848.0,
+            "49": 790286848.0,
+            "50": 790286848.0,
+            "51": 790286848.0,
+            "52": 790286848.0,
+            "53": 790286848.0,
+            "54": 790286848.0,
+            "55": 790286848.0,
+            "56": 790286848.0,
+            "57": 790286848.0,
+            "58": 790286848.0,
+            "59": 790286848.0,
+            "60": 790286848.0,
+            "61": 790286848.0,
+            "62": 790286848.0,
+            "63": 790286848.0,
+            "64": 790286848.0,
+            "65": 790286848.0,
+            "66": 790286848.0,
+            "67": 790286848.0,
+            "68": 790286848.0,
+            "69": 790286848.0,
+            "70": 790286848.0,
+            "71": 790286848.0,
+            "72": 790286848.0,
+            "73": 790286848.0,
+            "74": 790286848.0,
+            "75": 790286848.0,
+            "76": 790286848.0,
+            "77": 790286848.0,
+            "78": 790286848.0,
+            "79": 790286848.0,
+            "80": 790286848.0,
+            "81": 790286848.0,
+            "82": 790286848.0,
+            "83": 790286848.0,
+            "84": 790286848.0,
+            "85": 790286848.0,
+            "86": 790286848.0,
+            "87": 790286848.0,
+            "88": 790286848.0,
+            "89": 790286848.0,
+            "90": 790286848.0,
+            "91": 790286848.0,
+            "92": 790286848.0,
+            "93": 790286848.0,
+            "94": 790286848.0,
+            "95": 790286848.0,
+            "96": 790286848.0,
+            "97": 790286848.0,
+            "98": 790286848.0,
+            "99": 790286848.0,
+            "100": 790286848.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 11.14668,
+            "2": 0.32935,
+            "3": 0.46923,
+            "4": 0.29529,
+            "5": 0.40702,
+            "6": 0.40156,
+            "7": 0.43578,
+            "8": 0.31407,
+            "9": 0.43033,
+            "10": 0.31604,
+            "11": 0.31738,
+            "12": 0.31563,
+            "13": 0.36997,
+            "14": 0.4371,
+            "15": 0.35906,
+            "16": 0.31709,
+            "17": 0.39045,
+            "18": 0.31331,
+            "19": 0.3763,
+            "20": 0.33238,
+            "21": 0.31767,
+            "22": 0.43702,
+            "23": 0.39383,
+            "24": 0.3148,
+            "25": 0.31554,
+            "26": 0.3135,
+            "27": 0.34957,
+            "28": 0.31621,
+            "29": 0.31661,
+            "30": 0.31507,
+            "31": 0.41199,
+            "32": 0.40737,
+            "33": 0.31355,
+            "34": 0.31358,
+            "35": 0.31287,
+            "36": 0.31491,
+            "37": 0.36356,
+            "38": 0.37435,
+            "39": 0.33637,
+            "40": 0.31406,
+            "41": 0.31613,
+            "42": 0.35153,
+            "43": 0.3142,
+            "44": 0.31623,
+            "45": 0.31572,
+            "46": 0.34532,
+            "47": 0.35769,
+            "48": 0.36855,
+            "49": 0.31459,
+            "50": 0.3144,
+            "51": 0.32345,
+            "52": 0.30594,
+            "53": 0.3111,
+            "54": 0.31377,
+            "55": 0.39254,
+            "56": 0.40899,
+            "57": 0.48809,
+            "58": 0.31709,
+            "59": 0.31541,
+            "60": 0.3139,
+            "61": 0.42195,
+            "62": 0.31636,
+            "63": 0.31499,
+            "64": 0.31608,
+            "65": 0.31718,
+            "66": 0.31606,
+            "67": 0.348,
+            "68": 0.39663,
+            "69": 0.31776,
+            "70": 0.31679,
+            "71": 0.31563,
+            "72": 0.3148,
+            "73": 0.31785,
+            "74": 0.36067,
+            "75": 0.31679,
+            "76": 0.31667,
+            "77": 0.40594,
+            "78": 0.31863,
+            "79": 0.31973,
+            "80": 0.31848,
+            "81": 0.31801,
+            "82": 0.31661,
+            "83": 0.3166,
+            "84": 0.49879,
+            "85": 0.31644,
+            "86": 0.31582,
+            "87": 0.31672,
+            "88": 0.31561,
+            "89": 0.3413,
+            "90": 0.3984,
+            "91": 0.31512,
+            "92": 0.39228,
+            "93": 0.31251,
+            "94": 0.311,
+            "95": 0.31228,
+            "96": 0.31391,
+            "97": 0.31003,
+            "98": 0.31573,
+            "99": 0.3154,
+            "100": 0.40105
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_dev_dgx_h100.json
index 5c404dad658..fd1d245462e 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_dev_dgx_h100.json
@@ -218,106 +218,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 312352256.0,
-            "2": 312352256.0,
-            "3": 312352256.0,
-            "4": 312352256.0,
-            "5": 312352256.0,
-            "6": 312352256.0,
-            "7": 312352256.0,
-            "8": 312352256.0,
-            "9": 312352256.0,
-            "10": 312352256.0,
-            "11": 312352256.0,
-            "12": 312352256.0,
-            "13": 312352256.0,
-            "14": 312352256.0,
-            "15": 312352256.0,
-            "16": 312352256.0,
-            "17": 312352256.0,
-            "18": 312352256.0,
-            "19": 312352256.0,
-            "20": 312352256.0,
-            "21": 312352256.0,
-            "22": 312352256.0,
-            "23": 312352256.0,
-            "24": 312352256.0,
-            "25": 312352256.0,
-            "26": 312352256.0,
-            "27": 312352256.0,
-            "28": 312352256.0,
-            "29": 312352256.0,
-            "30": 312352256.0,
-            "31": 312352256.0,
-            "32": 312352256.0,
-            "33": 312352256.0,
-            "34": 312352256.0,
-            "35": 312352256.0,
-            "36": 312352256.0,
-            "37": 312352256.0,
-            "38": 312352256.0,
-            "39": 312352256.0,
-            "40": 312352256.0,
-            "41": 312352256.0,
-            "42": 312352256.0,
-            "43": 312352256.0,
-            "44": 312352256.0,
-            "45": 312352256.0,
-            "46": 312352256.0,
-            "47": 312352256.0,
-            "48": 312352256.0,
-            "49": 312352256.0,
-            "50": 312352256.0,
-            "51": 312352256.0,
-            "52": 312352256.0,
-            "53": 312352256.0,
-            "54": 312352256.0,
-            "55": 312352256.0,
-            "56": 312352256.0,
-            "57": 312352256.0,
-            "58": 312352256.0,
-            "59": 312352256.0,
-            "60": 312352256.0,
-            "61": 312352256.0,
-            "62": 312352256.0,
-            "63": 312352256.0,
-            "64": 312352256.0,
-            "65": 312352256.0,
-            "66": 312352256.0,
-            "67": 312352256.0,
-            "68": 312352256.0,
-            "69": 312352256.0,
-            "70": 312352256.0,
-            "71": 312352256.0,
-            "72": 312352256.0,
-            "73": 312352256.0,
-            "74": 312352256.0,
-            "75": 312352256.0,
-            "76": 312352256.0,
-            "77": 312352256.0,
-            "78": 312352256.0,
-            "79": 312352256.0,
-            "80": 312352256.0,
-            "81": 312352256.0,
-            "82": 312352256.0,
-            "83": 312352256.0,
-            "84": 312352256.0,
-            "85": 312352256.0,
-            "86": 312352256.0,
-            "87": 312352256.0,
-            "88": 312352256.0,
-            "89": 312352256.0,
-            "90": 312352256.0,
-            "91": 312352256.0,
-            "92": 312352256.0,
-            "93": 312352256.0,
-            "94": 312352256.0,
-            "95": 312352256.0,
-            "96": 312352256.0,
-            "97": 312352256.0,
-            "98": 312352256.0,
-            "99": 312352256.0,
-            "100": 312352256.0
+            "1": 311828992.0,
+            "2": 311828992.0,
+            "3": 311828992.0,
+            "4": 311828992.0,
+            "5": 311828992.0,
+            "6": 311828992.0,
+            "7": 311828992.0,
+            "8": 311828992.0,
+            "9": 311828992.0,
+            "10": 311828992.0,
+            "11": 311828992.0,
+            "12": 311828992.0,
+            "13": 311828992.0,
+            "14": 311828992.0,
+            "15": 311828992.0,
+            "16": 311828992.0,
+            "17": 311828992.0,
+            "18": 311828992.0,
+            "19": 311828992.0,
+            "20": 311828992.0,
+            "21": 311828992.0,
+            "22": 311828992.0,
+            "23": 311828992.0,
+            "24": 311828992.0,
+            "25": 311828992.0,
+            "26": 311828992.0,
+            "27": 311828992.0,
+            "28": 311828992.0,
+            "29": 311828992.0,
+            "30": 311828992.0,
+            "31": 311828992.0,
+            "32": 311828992.0,
+            "33": 311828992.0,
+            "34": 311828992.0,
+            "35": 311828992.0,
+            "36": 311828992.0,
+            "37": 311828992.0,
+            "38": 311828992.0,
+            "39": 311828992.0,
+            "40": 311828992.0,
+            "41": 311828992.0,
+            "42": 311828992.0,
+            "43": 311828992.0,
+            "44": 311828992.0,
+            "45": 311828992.0,
+            "46": 311828992.0,
+            "47": 311828992.0,
+            "48": 311828992.0,
+            "49": 311828992.0,
+            "50": 311828992.0,
+            "51": 311828992.0,
+            "52": 311828992.0,
+            "53": 311828992.0,
+            "54": 311828992.0,
+            "55": 311828992.0,
+            "56": 311828992.0,
+            "57": 311828992.0,
+            "58": 311828992.0,
+            "59": 311828992.0,
+            "60": 311828992.0,
+            "61": 311828992.0,
+            "62": 311828992.0,
+            "63": 311828992.0,
+            "64": 311828992.0,
+            "65": 311828992.0,
+            "66": 311828992.0,
+            "67": 311828992.0,
+            "68": 311828992.0,
+            "69": 311828992.0,
+            "70": 311828992.0,
+            "71": 311828992.0,
+            "72": 311828992.0,
+            "73": 311828992.0,
+            "74": 311828992.0,
+            "75": 311828992.0,
+            "76": 311828992.0,
+            "77": 311828992.0,
+            "78": 311828992.0,
+            "79": 311828992.0,
+            "80": 311828992.0,
+            "81": 311828992.0,
+            "82": 311828992.0,
+            "83": 311828992.0,
+            "84": 311828992.0,
+            "85": 311828992.0,
+            "86": 311828992.0,
+            "87": 311828992.0,
+            "88": 311828992.0,
+            "89": 311828992.0,
+            "90": 311828992.0,
+            "91": 311828992.0,
+            "92": 311828992.0,
+            "93": 311828992.0,
+            "94": 311828992.0,
+            "95": 311828992.0,
+            "96": 311828992.0,
+            "97": 311828992.0,
+            "98": 311828992.0,
+            "99": 311828992.0,
+            "100": 311828992.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -325,106 +325,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 754434560.0,
-            "2": 843763200.0,
-            "3": 843763200.0,
-            "4": 843763200.0,
-            "5": 843763200.0,
-            "6": 843763200.0,
-            "7": 843763200.0,
-            "8": 843763200.0,
-            "9": 843763200.0,
-            "10": 843763200.0,
-            "11": 843763200.0,
-            "12": 843763200.0,
-            "13": 843763200.0,
-            "14": 843763200.0,
-            "15": 843763200.0,
-            "16": 843763200.0,
-            "17": 843763200.0,
-            "18": 843763200.0,
-            "19": 843763200.0,
-            "20": 843763200.0,
-            "21": 843763200.0,
-            "22": 843763200.0,
-            "23": 843763200.0,
-            "24": 843763200.0,
-            "25": 843763200.0,
-            "26": 843763200.0,
-            "27": 843763200.0,
-            "28": 843763200.0,
-            "29": 843763200.0,
-            "30": 843763200.0,
-            "31": 843763200.0,
-            "32": 843763200.0,
-            "33": 843763200.0,
-            "34": 843763200.0,
-            "35": 843763200.0,
-            "36": 843763200.0,
-            "37": 843763200.0,
-            "38": 843763200.0,
-            "39": 843763200.0,
-            "40": 843763200.0,
-            "41": 843763200.0,
-            "42": 843763200.0,
-            "43": 843763200.0,
-            "44": 843763200.0,
-            "45": 843763200.0,
-            "46": 843763200.0,
-            "47": 843763200.0,
-            "48": 843763200.0,
-            "49": 843763200.0,
-            "50": 843763200.0,
-            "51": 843763200.0,
-            "52": 843763200.0,
-            "53": 843763200.0,
-            "54": 843763200.0,
-            "55": 843763200.0,
-            "56": 843763200.0,
-            "57": 843763200.0,
-            "58": 843763200.0,
-            "59": 843763200.0,
-            "60": 843763200.0,
-            "61": 843763200.0,
-            "62": 843763200.0,
-            "63": 843763200.0,
-            "64": 843763200.0,
-            "65": 843763200.0,
-            "66": 843763200.0,
-            "67": 843763200.0,
-            "68": 843763200.0,
-            "69": 843763200.0,
-            "70": 843763200.0,
-            "71": 843763200.0,
-            "72": 843763200.0,
-            "73": 843763200.0,
-            "74": 843763200.0,
-            "75": 843763200.0,
-            "76": 843763200.0,
-            "77": 843763200.0,
-            "78": 843763200.0,
-            "79": 843763200.0,
-            "80": 843763200.0,
-            "81": 843763200.0,
-            "82": 843763200.0,
-            "83": 843763200.0,
-            "84": 843763200.0,
-            "85": 843763200.0,
-            "86": 843763200.0,
-            "87": 843763200.0,
-            "88": 843763200.0,
-            "89": 843763200.0,
-            "90": 843763200.0,
-            "91": 843763200.0,
-            "92": 843763200.0,
-            "93": 843763200.0,
-            "94": 843763200.0,
-            "95": 843763200.0,
-            "96": 843763200.0,
-            "97": 843763200.0,
-            "98": 843763200.0,
-            "99": 843763200.0,
-            "100": 843763200.0
+            "1": 755484160.0,
+            "2": 844288512.0,
+            "3": 844288512.0,
+            "4": 844288512.0,
+            "5": 844288512.0,
+            "6": 844288512.0,
+            "7": 844288512.0,
+            "8": 844288512.0,
+            "9": 844288512.0,
+            "10": 844288512.0,
+            "11": 844288512.0,
+            "12": 844288512.0,
+            "13": 844288512.0,
+            "14": 844288512.0,
+            "15": 844288512.0,
+            "16": 844288512.0,
+            "17": 844288512.0,
+            "18": 844288512.0,
+            "19": 844288512.0,
+            "20": 844288512.0,
+            "21": 844288512.0,
+            "22": 844288512.0,
+            "23": 844288512.0,
+            "24": 844288512.0,
+            "25": 844288512.0,
+            "26": 844288512.0,
+            "27": 844288512.0,
+            "28": 844288512.0,
+            "29": 844288512.0,
+            "30": 844288512.0,
+            "31": 844288512.0,
+            "32": 844288512.0,
+            "33": 844288512.0,
+            "34": 844288512.0,
+            "35": 844288512.0,
+            "36": 844288512.0,
+            "37": 844288512.0,
+            "38": 844288512.0,
+            "39": 844288512.0,
+            "40": 844288512.0,
+            "41": 844288512.0,
+            "42": 844288512.0,
+            "43": 844288512.0,
+            "44": 844288512.0,
+            "45": 844288512.0,
+            "46": 844288512.0,
+            "47": 844288512.0,
+            "48": 844288512.0,
+            "49": 844288512.0,
+            "50": 844288512.0,
+            "51": 844288512.0,
+            "52": 844288512.0,
+            "53": 844288512.0,
+            "54": 844288512.0,
+            "55": 844288512.0,
+            "56": 844288512.0,
+            "57": 844288512.0,
+            "58": 844288512.0,
+            "59": 844288512.0,
+            "60": 844288512.0,
+            "61": 844288512.0,
+            "62": 844288512.0,
+            "63": 844288512.0,
+            "64": 844288512.0,
+            "65": 844288512.0,
+            "66": 844288512.0,
+            "67": 844288512.0,
+            "68": 844288512.0,
+            "69": 844288512.0,
+            "70": 844288512.0,
+            "71": 844288512.0,
+            "72": 844288512.0,
+            "73": 844288512.0,
+            "74": 844288512.0,
+            "75": 844288512.0,
+            "76": 844288512.0,
+            "77": 844288512.0,
+            "78": 844288512.0,
+            "79": 844288512.0,
+            "80": 844288512.0,
+            "81": 844288512.0,
+            "82": 844288512.0,
+            "83": 844288512.0,
+            "84": 844288512.0,
+            "85": 844288512.0,
+            "86": 844288512.0,
+            "87": 844288512.0,
+            "88": 844288512.0,
+            "89": 844288512.0,
+            "90": 844288512.0,
+            "91": 844288512.0,
+            "92": 844288512.0,
+            "93": 844288512.0,
+            "94": 844288512.0,
+            "95": 844288512.0,
+            "96": 844288512.0,
+            "97": 844288512.0,
+            "98": 844288512.0,
+            "99": 844288512.0,
+            "100": 844288512.0
         }
     },
     "iteration-time": {
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 13.61637,
-            "2": 0.24414,
-            "3": 0.22872,
-            "4": 0.22599,
-            "5": 0.22586,
-            "6": 0.22773,
-            "7": 0.22791,
-            "8": 0.22857,
-            "9": 0.2283,
-            "10": 0.22732,
-            "11": 0.22633,
-            "12": 0.22761,
-            "13": 0.22748,
-            "14": 0.23094,
-            "15": 0.22968,
-            "16": 0.22849,
-            "17": 0.22934,
-            "18": 0.22814,
-            "19": 0.22822,
-            "20": 0.22758,
-            "21": 0.22806,
-            "22": 0.25737,
-            "23": 0.24238,
-            "24": 0.23166,
-            "25": 0.22695,
-            "26": 0.22857,
-            "27": 0.23442,
-            "28": 0.22861,
-            "29": 0.2302,
-            "30": 0.2316,
-            "31": 0.23014,
-            "32": 0.22948,
-            "33": 0.23272,
-            "34": 0.23222,
-            "35": 0.23035,
-            "36": 0.23384,
-            "37": 0.23085,
-            "38": 0.23058,
-            "39": 0.23686,
-            "40": 0.23939,
-            "41": 0.23562,
-            "42": 0.23544,
-            "43": 0.23293,
-            "44": 0.22874,
-            "45": 0.234,
-            "46": 0.22942,
-            "47": 0.23036,
-            "48": 0.23404,
-            "49": 0.2686,
-            "50": 0.24831,
-            "51": 0.28415,
-            "52": 0.23699,
-            "53": 0.26129,
-            "54": 0.2273,
-            "55": 0.22639,
-            "56": 0.22691,
-            "57": 0.22504,
-            "58": 0.22822,
-            "59": 0.22913,
-            "60": 0.22577,
-            "61": 0.23097,
-            "62": 0.22702,
-            "63": 0.22579,
-            "64": 0.22717,
-            "65": 0.22986,
-            "66": 0.22481,
-            "67": 0.22676,
-            "68": 0.22643,
-            "69": 0.22933,
-            "70": 0.23566,
-            "71": 0.22795,
-            "72": 0.22654,
-            "73": 0.2256,
-            "74": 0.22941,
-            "75": 0.23701,
-            "76": 0.23527,
-            "77": 0.23476,
-            "78": 0.23472,
-            "79": 0.22599,
-            "80": 0.22758,
-            "81": 0.22717,
-            "82": 0.22657,
-            "83": 0.22688,
-            "84": 0.22827,
-            "85": 0.22612,
-            "86": 0.22871,
-            "87": 0.23133,
-            "88": 0.22934,
-            "89": 0.22859,
-            "90": 0.22635,
-            "91": 0.22606,
-            "92": 0.2297,
-            "93": 0.22713,
-            "94": 0.2261,
-            "95": 0.227,
-            "96": 0.23135,
-            "97": 0.22866,
-            "98": 0.22601,
-            "99": 0.2277,
-            "100": 0.2323
+            "1": 9.99954,
+            "2": 0.2844,
+            "3": 0.21531,
+            "4": 0.19894,
+            "5": 0.19896,
+            "6": 0.19827,
+            "7": 0.19932,
+            "8": 0.20009,
+            "9": 0.19826,
+            "10": 0.19917,
+            "11": 0.19961,
+            "12": 0.19975,
+            "13": 0.20483,
+            "14": 0.20549,
+            "15": 0.19855,
+            "16": 0.19911,
+            "17": 0.19768,
+            "18": 0.19797,
+            "19": 0.19725,
+            "20": 0.19763,
+            "21": 0.19859,
+            "22": 0.20076,
+            "23": 0.19965,
+            "24": 0.19495,
+            "25": 0.1933,
+            "26": 0.19302,
+            "27": 0.19426,
+            "28": 0.19183,
+            "29": 0.19326,
+            "30": 0.1926,
+            "31": 0.19268,
+            "32": 0.1921,
+            "33": 0.19395,
+            "34": 0.1932,
+            "35": 0.19421,
+            "36": 0.19128,
+            "37": 0.19268,
+            "38": 0.1936,
+            "39": 0.19222,
+            "40": 0.19436,
+            "41": 0.19323,
+            "42": 0.19182,
+            "43": 0.19358,
+            "44": 0.19401,
+            "45": 0.1935,
+            "46": 0.19276,
+            "47": 0.19598,
+            "48": 0.19322,
+            "49": 0.19379,
+            "50": 0.19239,
+            "51": 0.20371,
+            "52": 0.19298,
+            "53": 0.21521,
+            "54": 0.21625,
+            "55": 0.19257,
+            "56": 0.1959,
+            "57": 0.19218,
+            "58": 0.19272,
+            "59": 0.19009,
+            "60": 0.19106,
+            "61": 0.19155,
+            "62": 0.19168,
+            "63": 0.191,
+            "64": 0.19045,
+            "65": 0.19015,
+            "66": 0.19568,
+            "67": 0.19034,
+            "68": 0.19165,
+            "69": 0.19136,
+            "70": 0.19369,
+            "71": 0.19227,
+            "72": 0.19248,
+            "73": 0.18982,
+            "74": 0.18984,
+            "75": 0.18976,
+            "76": 0.19243,
+            "77": 0.19198,
+            "78": 0.18981,
+            "79": 0.18977,
+            "80": 0.19102,
+            "81": 0.18951,
+            "82": 0.19227,
+            "83": 0.18983,
+            "84": 0.19005,
+            "85": 0.18923,
+            "86": 0.18901,
+            "87": 0.1898,
+            "88": 0.18885,
+            "89": 0.18842,
+            "90": 0.18857,
+            "91": 0.18847,
+            "92": 0.18973,
+            "93": 0.19045,
+            "94": 0.1894,
+            "95": 0.18946,
+            "96": 0.18844,
+            "97": 0.18946,
+            "98": 0.1889,
+            "99": 0.1905,
+            "100": 0.19169
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_dev_dgx_h100_2nd.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_dev_dgx_h100_2nd.json
new file mode 100644
index 00000000000..106835fbcc0
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_dev_dgx_h100_2nd.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 9.8695,
+            "52": 9.76154,
+            "53": 10.08349,
+            "54": 9.97449,
+            "55": 9.89437,
+            "56": 9.6424,
+            "57": 9.50352,
+            "58": 9.84153,
+            "59": 9.60017,
+            "60": 9.51715,
+            "61": 9.70458,
+            "62": 9.98292,
+            "63": 9.39067,
+            "64": 9.7797,
+            "65": 8.96053,
+            "66": 9.70288,
+            "67": 9.3734,
+            "68": 9.78805,
+            "69": 9.79828,
+            "70": 9.74999,
+            "71": 9.62682,
+            "72": 9.59043,
+            "73": 9.49893,
+            "74": 8.94842,
+            "75": 9.42922,
+            "76": 9.08268,
+            "77": 10.07413,
+            "78": 9.73322,
+            "79": 9.38352,
+            "80": 9.40713,
+            "81": 9.48366,
+            "82": 9.70577,
+            "83": 9.3103,
+            "84": 9.41846,
+            "85": 9.62053,
+            "86": 9.08533,
+            "87": 9.59962,
+            "88": 9.75141,
+            "89": 9.60594,
+            "90": 9.8245,
+            "91": 9.33973,
+            "92": 9.36344,
+            "93": 9.08397,
+            "94": 8.83571,
+            "95": 9.51936,
+            "96": 9.53001,
+            "97": 9.31995,
+            "98": 9.67709,
+            "99": 8.88909,
+            "100": 9.40491
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 2484.0,
+            "52": 2568.0,
+            "53": 2834.0,
+            "54": 2607.0,
+            "55": 2149.0,
+            "56": 2683.0,
+            "57": 2283.0,
+            "58": 2764.0,
+            "59": 2623.0,
+            "60": 2456.0,
+            "61": 2938.0,
+            "62": 2456.0,
+            "63": 2279.0,
+            "64": 3078.0,
+            "65": 2504.0,
+            "66": 2881.0,
+            "67": 2683.0,
+            "68": 2657.0,
+            "69": 2832.0,
+            "70": 3144.0,
+            "71": 2930.0,
+            "72": 2328.0,
+            "73": 2984.0,
+            "74": 1752.0,
+            "75": 2451.0,
+            "76": 3040.0,
+            "77": 3213.0,
+            "78": 2936.0,
+            "79": 2941.0,
+            "80": 3112.0,
+            "81": 3568.0,
+            "82": 3105.0,
+            "83": 2725.0,
+            "84": 3051.0,
+            "85": 3170.0,
+            "86": 2645.0,
+            "87": 3586.0,
+            "88": 2902.0,
+            "89": 3371.0,
+            "90": 2971.0,
+            "91": 2800.0,
+            "92": 3017.0,
+            "93": 2524.0,
+            "94": 3384.0,
+            "95": 3147.0,
+            "96": 3388.0,
+            "97": 3031.0,
+            "98": 3619.0,
+            "99": 3004.0,
+            "100": 3100.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 311828992.0,
+            "52": 311828992.0,
+            "53": 311828992.0,
+            "54": 311828992.0,
+            "55": 311828992.0,
+            "56": 311828992.0,
+            "57": 311828992.0,
+            "58": 311828992.0,
+            "59": 311828992.0,
+            "60": 311828992.0,
+            "61": 311828992.0,
+            "62": 311828992.0,
+            "63": 311828992.0,
+            "64": 311828992.0,
+            "65": 311828992.0,
+            "66": 311828992.0,
+            "67": 311828992.0,
+            "68": 311828992.0,
+            "69": 311828992.0,
+            "70": 311828992.0,
+            "71": 311828992.0,
+            "72": 311828992.0,
+            "73": 311828992.0,
+            "74": 311828992.0,
+            "75": 311828992.0,
+            "76": 311828992.0,
+            "77": 311828992.0,
+            "78": 311828992.0,
+            "79": 311828992.0,
+            "80": 311828992.0,
+            "81": 311828992.0,
+            "82": 311828992.0,
+            "83": 311828992.0,
+            "84": 311828992.0,
+            "85": 311828992.0,
+            "86": 311828992.0,
+            "87": 311828992.0,
+            "88": 311828992.0,
+            "89": 311828992.0,
+            "90": 311828992.0,
+            "91": 311828992.0,
+            "92": 311828992.0,
+            "93": 311828992.0,
+            "94": 311828992.0,
+            "95": 311828992.0,
+            "96": 311828992.0,
+            "97": 311828992.0,
+            "98": 311828992.0,
+            "99": 311828992.0,
+            "100": 311828992.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 845336064.0,
+            "52": 845337088.0,
+            "53": 845337088.0,
+            "54": 845337088.0,
+            "55": 845337088.0,
+            "56": 845337088.0,
+            "57": 845337088.0,
+            "58": 845337088.0,
+            "59": 845337088.0,
+            "60": 845337088.0,
+            "61": 845337088.0,
+            "62": 845337088.0,
+            "63": 845337088.0,
+            "64": 845337088.0,
+            "65": 845337088.0,
+            "66": 845337088.0,
+            "67": 845337088.0,
+            "68": 845337088.0,
+            "69": 845337088.0,
+            "70": 845337088.0,
+            "71": 845337088.0,
+            "72": 845337088.0,
+            "73": 845337088.0,
+            "74": 845337088.0,
+            "75": 845337088.0,
+            "76": 845337088.0,
+            "77": 845337088.0,
+            "78": 845337088.0,
+            "79": 845337088.0,
+            "80": 845337088.0,
+            "81": 845337088.0,
+            "82": 845337088.0,
+            "83": 845337088.0,
+            "84": 845337088.0,
+            "85": 845337088.0,
+            "86": 845337088.0,
+            "87": 845337088.0,
+            "88": 845337088.0,
+            "89": 845337088.0,
+            "90": 845337088.0,
+            "91": 845337088.0,
+            "92": 845337088.0,
+            "93": 845337088.0,
+            "94": 845337088.0,
+            "95": 845337088.0,
+            "96": 845337088.0,
+            "97": 845337088.0,
+            "98": 845337088.0,
+            "99": 845337088.0,
+            "100": 845337088.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 11.77615,
+            "52": 0.26842,
+            "53": 0.22425,
+            "54": 0.22629,
+            "55": 0.19714,
+            "56": 0.19595,
+            "57": 0.19408,
+            "58": 0.19455,
+            "59": 0.19527,
+            "60": 0.19277,
+            "61": 0.19626,
+            "62": 0.19225,
+            "63": 0.19531,
+            "64": 0.19329,
+            "65": 0.19633,
+            "66": 0.20818,
+            "67": 0.20691,
+            "68": 0.19203,
+            "69": 0.19251,
+            "70": 0.19524,
+            "71": 0.19414,
+            "72": 0.19212,
+            "73": 0.19189,
+            "74": 0.19323,
+            "75": 0.19106,
+            "76": 0.19302,
+            "77": 0.19126,
+            "78": 0.19419,
+            "79": 0.1946,
+            "80": 0.19275,
+            "81": 0.19432,
+            "82": 0.19583,
+            "83": 0.19969,
+            "84": 0.19643,
+            "85": 0.19472,
+            "86": 0.1986,
+            "87": 0.19301,
+            "88": 0.19387,
+            "89": 0.19581,
+            "90": 0.19215,
+            "91": 0.19286,
+            "92": 0.19237,
+            "93": 0.1931,
+            "94": 0.19448,
+            "95": 0.19755,
+            "96": 0.195,
+            "97": 0.19341,
+            "98": 0.19626,
+            "99": 0.19167,
+            "100": 0.19047
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_lts_dgx_a100.json
index f273ff540d3..33ed61d5e20 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_lts_dgx_a100.json
@@ -6,104 +6,104 @@
         "values": {
             "1": 10.85936,
             "2": 10.8548,
-            "3": 10.85199,
-            "4": 10.84317,
-            "5": 10.87247,
-            "6": 10.87857,
-            "7": 10.84622,
-            "8": 10.86369,
+            "3": 10.85198,
+            "4": 10.84316,
+            "5": 10.8725,
+            "6": 10.87861,
+            "7": 10.84626,
+            "8": 10.86367,
             "9": 10.87211,
-            "10": 10.8311,
+            "10": 10.83111,
             "11": 10.86068,
             "12": 10.87273,
-            "13": 10.87992,
-            "14": 10.88657,
-            "15": 10.82029,
-            "16": 10.82684,
-            "17": 10.7998,
-            "18": 10.81985,
-            "19": 10.80035,
-            "20": 10.71399,
-            "21": 10.69893,
+            "13": 10.87988,
+            "14": 10.88658,
+            "15": 10.82024,
+            "16": 10.82685,
+            "17": 10.79977,
+            "18": 10.81982,
+            "19": 10.80036,
+            "20": 10.71402,
+            "21": 10.69897,
             "22": 10.57449,
             "23": 10.71973,
-            "24": 10.60285,
-            "25": 10.54611,
-            "26": 10.61041,
-            "27": 10.61227,
-            "28": 10.57731,
-            "29": 10.58005,
-            "30": 10.36705,
-            "31": 10.13447,
-            "32": 10.47127,
-            "33": 10.47454,
-            "34": 10.23198,
-            "35": 10.28443,
-            "36": 10.23436,
-            "37": 10.35346,
-            "38": 10.20696,
-            "39": 10.40599,
-            "40": 10.08972,
-            "41": 10.16331,
+            "24": 10.60276,
+            "25": 10.5461,
+            "26": 10.61045,
+            "27": 10.61226,
+            "28": 10.57728,
+            "29": 10.58002,
+            "30": 10.36711,
+            "31": 10.13446,
+            "32": 10.47126,
+            "33": 10.47458,
+            "34": 10.23197,
+            "35": 10.28446,
+            "36": 10.23439,
+            "37": 10.3534,
+            "38": 10.20693,
+            "39": 10.40598,
+            "40": 10.08969,
+            "41": 10.16335,
             "42": 10.2256,
-            "43": 9.8639,
-            "44": 9.98246,
-            "45": 9.84548,
-            "46": 9.8581,
+            "43": 9.86391,
+            "44": 9.98249,
+            "45": 9.84549,
+            "46": 9.85808,
             "47": 10.1689,
             "48": 9.86658,
             "49": 9.54555,
-            "50": 9.91937,
-            "51": 9.86074,
-            "52": 9.76116,
-            "53": 10.08415,
-            "54": 9.96563,
-            "55": 9.89123,
+            "50": 9.91938,
+            "51": 9.86073,
+            "52": 9.76125,
+            "53": 10.08412,
+            "54": 9.96565,
+            "55": 9.89124,
             "56": 9.63923,
-            "57": 9.4936,
-            "58": 9.83871,
+            "57": 9.49364,
+            "58": 9.83867,
             "59": 9.59623,
-            "60": 9.5091,
-            "61": 9.70544,
-            "62": 9.99513,
-            "63": 9.38104,
+            "60": 9.50909,
+            "61": 9.70543,
+            "62": 9.99515,
+            "63": 9.38102,
             "64": 9.78222,
-            "65": 8.95962,
-            "66": 9.71006,
-            "67": 9.38013,
-            "68": 9.78827,
-            "69": 9.79425,
-            "70": 9.73517,
-            "71": 9.62218,
-            "72": 9.58801,
+            "65": 8.95965,
+            "66": 9.71007,
+            "67": 9.38014,
+            "68": 9.78825,
+            "69": 9.79432,
+            "70": 9.7352,
+            "71": 9.6222,
+            "72": 9.58803,
             "73": 9.49714,
             "74": 8.94242,
-            "75": 9.4322,
-            "76": 9.09757,
-            "77": 10.06853,
-            "78": 9.73055,
-            "79": 9.37759,
-            "80": 9.41116,
-            "81": 9.48631,
-            "82": 9.69758,
-            "83": 9.31674,
-            "84": 9.42151,
+            "75": 9.43219,
+            "76": 9.09756,
+            "77": 10.06849,
+            "78": 9.73057,
+            "79": 9.37757,
+            "80": 9.41117,
+            "81": 9.4863,
+            "82": 9.6976,
+            "83": 9.3167,
+            "84": 9.42154,
             "85": 9.61502,
-            "86": 9.07627,
-            "87": 9.59887,
-            "88": 9.75047,
-            "89": 9.61233,
+            "86": 9.0763,
+            "87": 9.59888,
+            "88": 9.75044,
+            "89": 9.61234,
             "90": 9.82363,
-            "91": 9.35377,
-            "92": 9.36525,
-            "93": 9.08833,
-            "94": 8.83614,
-            "95": 9.5226,
-            "96": 9.52736,
+            "91": 9.3537,
+            "92": 9.36524,
+            "93": 9.08832,
+            "94": 8.83613,
+            "95": 9.52262,
+            "96": 9.52735,
             "97": 9.3169,
-            "98": 9.67961,
-            "99": 8.89276,
-            "100": 9.40803
+            "98": 9.67958,
+            "99": 8.89279,
+            "100": 9.40809
         }
     },
     "num-zeros": {
@@ -111,106 +111,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 1768.0,
-            "2": 1871.0,
-            "3": 1757.0,
-            "4": 1902.0,
-            "5": 2016.0,
-            "6": 1943.0,
-            "7": 1981.0,
-            "8": 1667.0,
-            "9": 1973.0,
-            "10": 1477.0,
-            "11": 2178.0,
-            "12": 1985.0,
-            "13": 2137.0,
-            "14": 2021.0,
-            "15": 1944.0,
-            "16": 2053.0,
-            "17": 1967.0,
-            "18": 1922.0,
-            "19": 2031.0,
-            "20": 1837.0,
-            "21": 2048.0,
-            "22": 1917.0,
-            "23": 2190.0,
-            "24": 1787.0,
-            "25": 1869.0,
-            "26": 1882.0,
-            "27": 2143.0,
-            "28": 2147.0,
-            "29": 2222.0,
-            "30": 2046.0,
-            "31": 1734.0,
-            "32": 2171.0,
-            "33": 2380.0,
-            "34": 2046.0,
-            "35": 2147.0,
-            "36": 2149.0,
-            "37": 2645.0,
-            "38": 2416.0,
-            "39": 2672.0,
-            "40": 2441.0,
-            "41": 2585.0,
-            "42": 2483.0,
-            "43": 2262.0,
-            "44": 2344.0,
-            "45": 2300.0,
-            "46": 2560.0,
-            "47": 2755.0,
-            "48": 2764.0,
-            "49": 2505.0,
-            "50": 2723.0,
-            "51": 2806.0,
-            "52": 2805.0,
-            "53": 3225.0,
-            "54": 3028.0,
-            "55": 2486.0,
-            "56": 3093.0,
-            "57": 2588.0,
-            "58": 3219.0,
-            "59": 3021.0,
-            "60": 2649.0,
-            "61": 3247.0,
-            "62": 2649.0,
-            "63": 2637.0,
-            "64": 3140.0,
-            "65": 3038.0,
-            "66": 3422.0,
-            "67": 2933.0,
-            "68": 3039.0,
-            "69": 3167.0,
-            "70": 3539.0,
-            "71": 3213.0,
-            "72": 2597.0,
-            "73": 3290.0,
-            "74": 2140.0,
-            "75": 2837.0,
-            "76": 3342.0,
-            "77": 3444.0,
-            "78": 3504.0,
-            "79": 3513.0,
-            "80": 3733.0,
-            "81": 4024.0,
-            "82": 3670.0,
-            "83": 3199.0,
-            "84": 3539.0,
-            "85": 3585.0,
-            "86": 2979.0,
-            "87": 3951.0,
-            "88": 3286.0,
-            "89": 3787.0,
-            "90": 3341.0,
-            "91": 3070.0,
-            "92": 3410.0,
-            "93": 2923.0,
-            "94": 3868.0,
-            "95": 3627.0,
-            "96": 3787.0,
-            "97": 3549.0,
-            "98": 4026.0,
-            "99": 3531.0,
-            "100": 3649.0
+            "1": 1789.0,
+            "2": 1890.0,
+            "3": 1856.0,
+            "4": 2016.0,
+            "5": 2048.0,
+            "6": 1995.0,
+            "7": 1995.0,
+            "8": 1655.0,
+            "9": 1922.0,
+            "10": 1507.0,
+            "11": 2196.0,
+            "12": 1957.0,
+            "13": 2117.0,
+            "14": 2079.0,
+            "15": 2008.0,
+            "16": 1983.0,
+            "17": 2006.0,
+            "18": 1819.0,
+            "19": 1967.0,
+            "20": 1758.0,
+            "21": 2058.0,
+            "22": 1937.0,
+            "23": 2263.0,
+            "24": 1884.0,
+            "25": 1756.0,
+            "26": 1894.0,
+            "27": 2052.0,
+            "28": 2078.0,
+            "29": 2206.0,
+            "30": 2065.0,
+            "31": 1708.0,
+            "32": 2129.0,
+            "33": 2384.0,
+            "34": 2134.0,
+            "35": 2113.0,
+            "36": 2074.0,
+            "37": 2665.0,
+            "38": 2465.0,
+            "39": 2589.0,
+            "40": 2392.0,
+            "41": 2513.0,
+            "42": 2448.0,
+            "43": 2185.0,
+            "44": 2326.0,
+            "45": 2331.0,
+            "46": 2640.0,
+            "47": 2686.0,
+            "48": 2674.0,
+            "49": 2589.0,
+            "50": 2834.0,
+            "51": 2841.0,
+            "52": 2853.0,
+            "53": 3184.0,
+            "54": 2849.0,
+            "55": 2661.0,
+            "56": 3110.0,
+            "57": 2571.0,
+            "58": 3237.0,
+            "59": 2973.0,
+            "60": 2722.0,
+            "61": 3162.0,
+            "62": 2823.0,
+            "63": 2664.0,
+            "64": 3252.0,
+            "65": 2911.0,
+            "66": 3337.0,
+            "67": 2866.0,
+            "68": 3114.0,
+            "69": 3117.0,
+            "70": 3464.0,
+            "71": 3260.0,
+            "72": 2574.0,
+            "73": 3136.0,
+            "74": 2181.0,
+            "75": 2818.0,
+            "76": 3370.0,
+            "77": 3581.0,
+            "78": 3538.0,
+            "79": 3597.0,
+            "80": 3756.0,
+            "81": 3986.0,
+            "82": 3628.0,
+            "83": 3213.0,
+            "84": 3441.0,
+            "85": 3593.0,
+            "86": 3051.0,
+            "87": 4066.0,
+            "88": 3328.0,
+            "89": 3726.0,
+            "90": 3375.0,
+            "91": 3181.0,
+            "92": 3417.0,
+            "93": 3027.0,
+            "94": 3758.0,
+            "95": 3688.0,
+            "96": 3847.0,
+            "97": 3383.0,
+            "98": 4018.0,
+            "99": 3469.0,
+            "100": 3505.0
         }
     },
     "mem-allocated-bytes": {
@@ -218,7 +218,7 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 232398336.0,
+            "1": 233446912.0,
             "2": 232398336.0,
             "3": 232398336.0,
             "4": 232398336.0,
@@ -325,106 +325,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 685490688.0,
-            "2": 773246464.0,
-            "3": 773246464.0,
-            "4": 773246464.0,
-            "5": 773246464.0,
-            "6": 773246464.0,
-            "7": 773246464.0,
-            "8": 773246464.0,
-            "9": 773246464.0,
-            "10": 773246464.0,
-            "11": 773246464.0,
-            "12": 773246464.0,
-            "13": 773246464.0,
-            "14": 773246464.0,
-            "15": 773246464.0,
-            "16": 773246464.0,
-            "17": 773246464.0,
-            "18": 773246464.0,
-            "19": 773246464.0,
-            "20": 773246464.0,
-            "21": 773246464.0,
-            "22": 773246464.0,
-            "23": 773246464.0,
-            "24": 773246464.0,
-            "25": 773246464.0,
-            "26": 773246464.0,
-            "27": 773246464.0,
-            "28": 773246464.0,
-            "29": 773246464.0,
-            "30": 773246464.0,
-            "31": 773246464.0,
-            "32": 773246464.0,
-            "33": 773246464.0,
-            "34": 773246464.0,
-            "35": 773246464.0,
-            "36": 773246464.0,
-            "37": 773246464.0,
-            "38": 773246464.0,
-            "39": 773246464.0,
-            "40": 773246464.0,
-            "41": 773246464.0,
-            "42": 773246464.0,
-            "43": 773246464.0,
-            "44": 773246464.0,
-            "45": 773246464.0,
-            "46": 773246464.0,
-            "47": 773246464.0,
-            "48": 773246464.0,
-            "49": 773246464.0,
-            "50": 773246464.0,
-            "51": 773246464.0,
-            "52": 773246464.0,
-            "53": 773246464.0,
-            "54": 773246464.0,
-            "55": 773246464.0,
-            "56": 773246464.0,
-            "57": 773246464.0,
-            "58": 773246464.0,
-            "59": 773246464.0,
-            "60": 773246464.0,
-            "61": 773246464.0,
-            "62": 773246464.0,
-            "63": 773246464.0,
-            "64": 773246464.0,
-            "65": 773246464.0,
-            "66": 773246464.0,
-            "67": 773246464.0,
-            "68": 773246464.0,
-            "69": 773246464.0,
-            "70": 773246464.0,
-            "71": 773246464.0,
-            "72": 773246464.0,
-            "73": 773246464.0,
-            "74": 773246464.0,
-            "75": 773246464.0,
-            "76": 773246464.0,
-            "77": 773246464.0,
-            "78": 773246464.0,
-            "79": 773246464.0,
-            "80": 773246464.0,
-            "81": 773246464.0,
-            "82": 773246464.0,
-            "83": 773246464.0,
-            "84": 773246464.0,
-            "85": 773246464.0,
-            "86": 773246464.0,
-            "87": 773246464.0,
-            "88": 773246464.0,
-            "89": 773246464.0,
-            "90": 773246464.0,
-            "91": 773246464.0,
-            "92": 773246464.0,
-            "93": 773246464.0,
-            "94": 773246464.0,
-            "95": 773246464.0,
-            "96": 773246464.0,
-            "97": 773246464.0,
-            "98": 773246464.0,
-            "99": 773246464.0,
-            "100": 773246464.0
+            "1": 686539264.0,
+            "2": 775343616.0,
+            "3": 775343616.0,
+            "4": 775343616.0,
+            "5": 775343616.0,
+            "6": 775343616.0,
+            "7": 775343616.0,
+            "8": 775343616.0,
+            "9": 775343616.0,
+            "10": 775343616.0,
+            "11": 775343616.0,
+            "12": 775343616.0,
+            "13": 775343616.0,
+            "14": 775343616.0,
+            "15": 775343616.0,
+            "16": 775343616.0,
+            "17": 775343616.0,
+            "18": 775343616.0,
+            "19": 775343616.0,
+            "20": 775343616.0,
+            "21": 775343616.0,
+            "22": 775343616.0,
+            "23": 775343616.0,
+            "24": 775343616.0,
+            "25": 775343616.0,
+            "26": 775343616.0,
+            "27": 775343616.0,
+            "28": 775343616.0,
+            "29": 775343616.0,
+            "30": 775343616.0,
+            "31": 775343616.0,
+            "32": 775343616.0,
+            "33": 775343616.0,
+            "34": 775343616.0,
+            "35": 775343616.0,
+            "36": 775343616.0,
+            "37": 775343616.0,
+            "38": 775343616.0,
+            "39": 775343616.0,
+            "40": 775343616.0,
+            "41": 775343616.0,
+            "42": 775343616.0,
+            "43": 775343616.0,
+            "44": 775343616.0,
+            "45": 775343616.0,
+            "46": 775343616.0,
+            "47": 775343616.0,
+            "48": 775343616.0,
+            "49": 775343616.0,
+            "50": 775343616.0,
+            "51": 775343616.0,
+            "52": 775343616.0,
+            "53": 775343616.0,
+            "54": 775343616.0,
+            "55": 775343616.0,
+            "56": 775343616.0,
+            "57": 775343616.0,
+            "58": 775343616.0,
+            "59": 775343616.0,
+            "60": 775343616.0,
+            "61": 775343616.0,
+            "62": 775343616.0,
+            "63": 775343616.0,
+            "64": 775343616.0,
+            "65": 775343616.0,
+            "66": 775343616.0,
+            "67": 775343616.0,
+            "68": 775343616.0,
+            "69": 775343616.0,
+            "70": 775343616.0,
+            "71": 775343616.0,
+            "72": 775343616.0,
+            "73": 775343616.0,
+            "74": 775343616.0,
+            "75": 775343616.0,
+            "76": 775343616.0,
+            "77": 775343616.0,
+            "78": 775343616.0,
+            "79": 775343616.0,
+            "80": 775343616.0,
+            "81": 775343616.0,
+            "82": 775343616.0,
+            "83": 775343616.0,
+            "84": 775343616.0,
+            "85": 775343616.0,
+            "86": 775343616.0,
+            "87": 775343616.0,
+            "88": 775343616.0,
+            "89": 775343616.0,
+            "90": 775343616.0,
+            "91": 775343616.0,
+            "92": 775343616.0,
+            "93": 775343616.0,
+            "94": 775343616.0,
+            "95": 775343616.0,
+            "96": 775343616.0,
+            "97": 775343616.0,
+            "98": 775343616.0,
+            "99": 775343616.0,
+            "100": 775343616.0
         }
     },
     "iteration-time": {
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 9.93671,
-            "2": 0.44025,
-            "3": 0.31978,
-            "4": 0.30044,
-            "5": 0.29939,
-            "6": 0.29882,
-            "7": 0.29791,
-            "8": 0.29478,
-            "9": 0.29711,
-            "10": 0.29556,
-            "11": 0.29815,
-            "12": 0.29967,
-            "13": 0.29479,
-            "14": 0.29726,
-            "15": 0.29661,
-            "16": 0.29615,
-            "17": 0.29592,
-            "18": 0.29568,
-            "19": 0.29536,
-            "20": 0.29486,
-            "21": 0.29478,
-            "22": 0.29533,
-            "23": 0.29472,
-            "24": 0.29577,
-            "25": 0.29612,
-            "26": 0.29259,
-            "27": 0.28753,
-            "28": 0.28697,
-            "29": 0.70578,
-            "30": 0.29095,
-            "31": 0.29056,
-            "32": 0.29195,
-            "33": 0.29198,
-            "34": 0.29205,
-            "35": 0.29049,
-            "36": 0.28947,
-            "37": 0.29052,
-            "38": 0.29096,
-            "39": 0.29096,
-            "40": 0.29115,
-            "41": 0.29128,
-            "42": 0.29068,
-            "43": 0.29094,
-            "44": 0.29228,
-            "45": 0.29059,
-            "46": 0.29108,
-            "47": 0.29102,
-            "48": 0.29077,
-            "49": 0.29062,
-            "50": 0.2902,
-            "51": 0.30007,
-            "52": 0.63804,
-            "53": 0.28911,
-            "54": 0.46416,
-            "55": 0.29262,
-            "56": 0.37133,
-            "57": 0.29216,
-            "58": 0.32564,
-            "59": 0.29296,
-            "60": 0.2903,
-            "61": 0.29162,
-            "62": 0.28953,
-            "63": 0.28969,
-            "64": 0.28976,
-            "65": 0.64598,
-            "66": 0.28891,
-            "67": 0.55309,
-            "68": 0.67465,
-            "69": 0.35714,
-            "70": 0.3918,
-            "71": 0.2878,
-            "72": 0.33397,
-            "73": 0.41898,
-            "74": 0.29045,
-            "75": 0.31982,
-            "76": 0.28797,
-            "77": 0.34091,
-            "78": 0.52101,
-            "79": 0.29094,
-            "80": 0.299,
-            "81": 0.43963,
-            "82": 0.28851,
-            "83": 0.38734,
-            "84": 0.38974,
-            "85": 0.38902,
-            "86": 0.69087,
-            "87": 0.37076,
-            "88": 0.29102,
-            "89": 0.55341,
-            "90": 0.54278,
-            "91": 0.28909,
-            "92": 0.31421,
-            "93": 0.29166,
-            "94": 0.29126,
-            "95": 0.32114,
-            "96": 0.29039,
-            "97": 0.30171,
-            "98": 0.29192,
-            "99": 0.29197,
-            "100": 0.31795
+            "1": 5.48931,
+            "2": 0.38781,
+            "3": 0.30745,
+            "4": 0.29469,
+            "5": 0.29328,
+            "6": 0.29844,
+            "7": 0.29347,
+            "8": 0.29314,
+            "9": 0.29281,
+            "10": 0.29323,
+            "11": 0.29135,
+            "12": 0.29127,
+            "13": 0.2914,
+            "14": 0.29074,
+            "15": 0.29691,
+            "16": 0.30283,
+            "17": 0.29988,
+            "18": 0.29873,
+            "19": 0.29704,
+            "20": 0.29912,
+            "21": 0.30262,
+            "22": 0.30204,
+            "23": 0.30199,
+            "24": 0.30225,
+            "25": 0.30036,
+            "26": 0.29842,
+            "27": 0.29878,
+            "28": 0.29797,
+            "29": 0.29719,
+            "30": 0.29875,
+            "31": 0.29743,
+            "32": 0.2987,
+            "33": 0.29958,
+            "34": 0.29843,
+            "35": 0.29886,
+            "36": 0.29816,
+            "37": 0.29796,
+            "38": 0.29796,
+            "39": 0.29692,
+            "40": 0.29756,
+            "41": 0.29712,
+            "42": 0.29674,
+            "43": 0.29758,
+            "44": 0.2971,
+            "45": 0.29798,
+            "46": 0.29812,
+            "47": 0.29773,
+            "48": 0.30095,
+            "49": 0.29437,
+            "50": 0.29498,
+            "51": 0.33787,
+            "52": 0.29219,
+            "53": 0.29371,
+            "54": 0.29832,
+            "55": 0.28876,
+            "56": 0.28903,
+            "57": 0.29103,
+            "58": 0.29066,
+            "59": 0.28874,
+            "60": 0.289,
+            "61": 0.28856,
+            "62": 0.2897,
+            "63": 0.28854,
+            "64": 0.28899,
+            "65": 0.29126,
+            "66": 0.28906,
+            "67": 0.28978,
+            "68": 0.28897,
+            "69": 0.2889,
+            "70": 0.28915,
+            "71": 0.28827,
+            "72": 0.28768,
+            "73": 0.28843,
+            "74": 0.28863,
+            "75": 0.28877,
+            "76": 0.28811,
+            "77": 0.28855,
+            "78": 0.28804,
+            "79": 0.28833,
+            "80": 0.28882,
+            "81": 0.28873,
+            "82": 0.28884,
+            "83": 0.28861,
+            "84": 0.28901,
+            "85": 0.28795,
+            "86": 0.28814,
+            "87": 0.28857,
+            "88": 0.288,
+            "89": 0.28839,
+            "90": 0.28805,
+            "91": 0.28918,
+            "92": 0.2879,
+            "93": 0.28927,
+            "94": 0.28862,
+            "95": 0.28972,
+            "96": 0.28939,
+            "97": 0.288,
+            "98": 0.28768,
+            "99": 0.28865,
+            "100": 0.28729
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_lts_dgx_a100_2nd.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_lts_dgx_a100_2nd.json
new file mode 100644
index 00000000000..11130fada71
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_lts_dgx_a100_2nd.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 9.86073,
+            "52": 9.76122,
+            "53": 10.08416,
+            "54": 9.96562,
+            "55": 9.89126,
+            "56": 9.63921,
+            "57": 9.4936,
+            "58": 9.83868,
+            "59": 9.59625,
+            "60": 9.50906,
+            "61": 9.7054,
+            "62": 9.99515,
+            "63": 9.38097,
+            "64": 9.78219,
+            "65": 8.95965,
+            "66": 9.71003,
+            "67": 9.38014,
+            "68": 9.78828,
+            "69": 9.79431,
+            "70": 9.7352,
+            "71": 9.62218,
+            "72": 9.58801,
+            "73": 9.49717,
+            "74": 8.94242,
+            "75": 9.43221,
+            "76": 9.09754,
+            "77": 10.06851,
+            "78": 9.73059,
+            "79": 9.37757,
+            "80": 9.41117,
+            "81": 9.48633,
+            "82": 9.69758,
+            "83": 9.3167,
+            "84": 9.42152,
+            "85": 9.61504,
+            "86": 9.07627,
+            "87": 9.59883,
+            "88": 9.75043,
+            "89": 9.61229,
+            "90": 9.82365,
+            "91": 9.35377,
+            "92": 9.36527,
+            "93": 9.08834,
+            "94": 8.83612,
+            "95": 9.52265,
+            "96": 9.52736,
+            "97": 9.31693,
+            "98": 9.67961,
+            "99": 8.89278,
+            "100": 9.40806
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 2810.0,
+            "52": 2895.0,
+            "53": 3212.0,
+            "54": 2965.0,
+            "55": 2665.0,
+            "56": 3040.0,
+            "57": 2570.0,
+            "58": 3275.0,
+            "59": 3010.0,
+            "60": 2665.0,
+            "61": 3106.0,
+            "62": 2811.0,
+            "63": 2762.0,
+            "64": 3180.0,
+            "65": 2941.0,
+            "66": 3474.0,
+            "67": 2980.0,
+            "68": 3013.0,
+            "69": 3189.0,
+            "70": 3464.0,
+            "71": 3128.0,
+            "72": 2493.0,
+            "73": 3343.0,
+            "74": 2172.0,
+            "75": 2799.0,
+            "76": 3444.0,
+            "77": 3549.0,
+            "78": 3550.0,
+            "79": 3566.0,
+            "80": 3729.0,
+            "81": 3979.0,
+            "82": 3652.0,
+            "83": 3217.0,
+            "84": 3597.0,
+            "85": 3661.0,
+            "86": 3069.0,
+            "87": 4117.0,
+            "88": 3340.0,
+            "89": 3817.0,
+            "90": 3476.0,
+            "91": 3025.0,
+            "92": 3456.0,
+            "93": 2943.0,
+            "94": 3710.0,
+            "95": 3705.0,
+            "96": 3758.0,
+            "97": 3465.0,
+            "98": 4041.0,
+            "99": 3360.0,
+            "100": 3639.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 232398336.0,
+            "52": 232398336.0,
+            "53": 232398336.0,
+            "54": 232398336.0,
+            "55": 232398336.0,
+            "56": 232398336.0,
+            "57": 232398336.0,
+            "58": 232398336.0,
+            "59": 232398336.0,
+            "60": 232398336.0,
+            "61": 232398336.0,
+            "62": 232398336.0,
+            "63": 232398336.0,
+            "64": 232398336.0,
+            "65": 232398336.0,
+            "66": 232398336.0,
+            "67": 232398336.0,
+            "68": 232398336.0,
+            "69": 232398336.0,
+            "70": 232398336.0,
+            "71": 232398336.0,
+            "72": 232398336.0,
+            "73": 232398336.0,
+            "74": 232398336.0,
+            "75": 232398336.0,
+            "76": 232398336.0,
+            "77": 232398336.0,
+            "78": 232398336.0,
+            "79": 232398336.0,
+            "80": 232398336.0,
+            "81": 232398336.0,
+            "82": 232398336.0,
+            "83": 232398336.0,
+            "84": 232398336.0,
+            "85": 232398336.0,
+            "86": 232398336.0,
+            "87": 232398336.0,
+            "88": 232398336.0,
+            "89": 232398336.0,
+            "90": 232398336.0,
+            "91": 232398336.0,
+            "92": 232398336.0,
+            "93": 232398336.0,
+            "94": 232398336.0,
+            "95": 232398336.0,
+            "96": 232398336.0,
+            "97": 232398336.0,
+            "98": 232398336.0,
+            "99": 232398336.0,
+            "100": 232398336.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 777437184.0,
+            "52": 777437184.0,
+            "53": 777438720.0,
+            "54": 777438720.0,
+            "55": 777438720.0,
+            "56": 777438720.0,
+            "57": 777438720.0,
+            "58": 777438720.0,
+            "59": 777438720.0,
+            "60": 777438720.0,
+            "61": 777438720.0,
+            "62": 777440768.0,
+            "63": 777440768.0,
+            "64": 777440768.0,
+            "65": 777440768.0,
+            "66": 777440768.0,
+            "67": 777440768.0,
+            "68": 777440768.0,
+            "69": 777440768.0,
+            "70": 777440768.0,
+            "71": 777440768.0,
+            "72": 777440768.0,
+            "73": 777440768.0,
+            "74": 777440768.0,
+            "75": 777440768.0,
+            "76": 777440768.0,
+            "77": 777440768.0,
+            "78": 777440768.0,
+            "79": 777440768.0,
+            "80": 777440768.0,
+            "81": 777440768.0,
+            "82": 777440768.0,
+            "83": 777440768.0,
+            "84": 777440768.0,
+            "85": 777440768.0,
+            "86": 777440768.0,
+            "87": 777440768.0,
+            "88": 777440768.0,
+            "89": 777440768.0,
+            "90": 777440768.0,
+            "91": 777440768.0,
+            "92": 777440768.0,
+            "93": 777440768.0,
+            "94": 777440768.0,
+            "95": 777440768.0,
+            "96": 777440768.0,
+            "97": 777440768.0,
+            "98": 777440768.0,
+            "99": 777440768.0,
+            "100": 777440768.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 8.32391,
+            "52": 0.3398,
+            "53": 0.30756,
+            "54": 0.30697,
+            "55": 0.2935,
+            "56": 0.29413,
+            "57": 0.29396,
+            "58": 0.29456,
+            "59": 0.29233,
+            "60": 0.2939,
+            "61": 0.29443,
+            "62": 0.2943,
+            "63": 0.29432,
+            "64": 0.2932,
+            "65": 0.29355,
+            "66": 0.29184,
+            "67": 0.29158,
+            "68": 0.29084,
+            "69": 0.29172,
+            "70": 0.29363,
+            "71": 0.29168,
+            "72": 0.29019,
+            "73": 0.28966,
+            "74": 0.29246,
+            "75": 0.29011,
+            "76": 0.29057,
+            "77": 0.29091,
+            "78": 0.29324,
+            "79": 0.29066,
+            "80": 0.29107,
+            "81": 0.29294,
+            "82": 0.29221,
+            "83": 0.29236,
+            "84": 0.29186,
+            "85": 0.29093,
+            "86": 0.29169,
+            "87": 0.29216,
+            "88": 0.29208,
+            "89": 0.29119,
+            "90": 0.29052,
+            "91": 0.29071,
+            "92": 0.29077,
+            "93": 0.2924,
+            "94": 0.29099,
+            "95": 0.29258,
+            "96": 0.29081,
+            "97": 0.29179,
+            "98": 0.29109,
+            "99": 0.29355,
+            "100": 0.29202
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_gb200.json
new file mode 100644
index 00000000000..3d9cf662b8f
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_gb200.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.77465,
+            "2": 10.7833,
+            "3": 10.78415,
+            "4": 10.75096,
+            "5": 10.82178,
+            "6": 10.82451,
+            "7": 10.79285,
+            "8": 10.78381,
+            "9": 10.79131,
+            "10": 10.75104,
+            "11": 10.84159,
+            "12": 10.81851,
+            "13": 10.83696,
+            "14": 10.84049,
+            "15": 10.79327,
+            "16": 10.80444,
+            "17": 10.78857,
+            "18": 10.80645,
+            "19": 10.81337,
+            "20": 10.78432,
+            "21": 10.80011,
+            "22": 10.7354,
+            "23": 10.82878,
+            "24": 10.76463,
+            "25": 10.73732,
+            "26": 10.75952,
+            "27": 10.78402,
+            "28": 10.78532,
+            "29": 10.78911,
+            "30": 10.67902,
+            "31": 10.56799,
+            "32": 10.71676,
+            "33": 10.71733,
+            "34": 10.59866,
+            "35": 10.60045,
+            "36": 10.56736,
+            "37": 10.62419,
+            "38": 10.53217,
+            "39": 10.64858,
+            "40": 10.44562,
+            "41": 10.49812,
+            "42": 10.52883,
+            "43": 10.27436,
+            "44": 10.33638,
+            "45": 10.24984,
+            "46": 10.23698,
+            "47": 10.43825,
+            "48": 10.22783,
+            "49": 10.00196,
+            "50": 10.24562,
+            "51": 10.20107,
+            "52": 10.10861,
+            "53": 10.3403,
+            "54": 10.23893,
+            "55": 10.19008,
+            "56": 9.96159,
+            "57": 9.82417,
+            "58": 10.10904,
+            "59": 9.9041,
+            "60": 9.82045,
+            "61": 9.96789,
+            "62": 10.19934,
+            "63": 9.66196,
+            "64": 10.00416,
+            "65": 9.2675,
+            "66": 9.92466,
+            "67": 9.62367,
+            "68": 9.98499,
+            "69": 9.98524,
+            "70": 9.92553,
+            "71": 9.81785,
+            "72": 9.77816,
+            "73": 9.67402,
+            "74": 9.16615,
+            "75": 9.59935,
+            "76": 9.2754,
+            "77": 10.18639,
+            "78": 9.86592,
+            "79": 9.52838,
+            "80": 9.55132,
+            "81": 9.63037,
+            "82": 9.82843,
+            "83": 9.47009,
+            "84": 9.5424,
+            "85": 9.74228,
+            "86": 9.20711,
+            "87": 9.70433,
+            "88": 9.86745,
+            "89": 9.72062,
+            "90": 9.9304,
+            "91": 9.471,
+            "92": 9.47539,
+            "93": 9.21193,
+            "94": 8.94879,
+            "95": 9.62951,
+            "96": 9.63936,
+            "97": 9.40708,
+            "98": 9.77232,
+            "99": 9.01139,
+            "100": 9.51718
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 454770688.0,
+            "2": 454770688.0,
+            "3": 454770688.0,
+            "4": 454770688.0,
+            "5": 454770688.0,
+            "6": 454770688.0,
+            "7": 454770688.0,
+            "8": 454770688.0,
+            "9": 454770688.0,
+            "10": 454770688.0,
+            "11": 454770688.0,
+            "12": 454770688.0,
+            "13": 454770688.0,
+            "14": 454770688.0,
+            "15": 454770688.0,
+            "16": 454770688.0,
+            "17": 454770688.0,
+            "18": 518880768.0,
+            "19": 518880768.0,
+            "20": 518880768.0,
+            "21": 518880768.0,
+            "22": 518880768.0,
+            "23": 518880768.0,
+            "24": 518880768.0,
+            "25": 518880768.0,
+            "26": 518880768.0,
+            "27": 518880768.0,
+            "28": 518880768.0,
+            "29": 518880768.0,
+            "30": 518880768.0,
+            "31": 518880768.0,
+            "32": 518880768.0,
+            "33": 518880768.0,
+            "34": 518880768.0,
+            "35": 518880768.0,
+            "36": 518880768.0,
+            "37": 518880768.0,
+            "38": 518880768.0,
+            "39": 518880768.0,
+            "40": 518880768.0,
+            "41": 518880768.0,
+            "42": 518880768.0,
+            "43": 518880768.0,
+            "44": 518880768.0,
+            "45": 518880768.0,
+            "46": 518880768.0,
+            "47": 518880768.0,
+            "48": 518880768.0,
+            "49": 518880768.0,
+            "50": 518880768.0,
+            "51": 518880768.0,
+            "52": 518880768.0,
+            "53": 518880768.0,
+            "54": 518880768.0,
+            "55": 518880768.0,
+            "56": 518880768.0,
+            "57": 518880768.0,
+            "58": 518880768.0,
+            "59": 518880768.0,
+            "60": 518880768.0,
+            "61": 518880768.0,
+            "62": 518880768.0,
+            "63": 518880768.0,
+            "64": 518880768.0,
+            "65": 518880768.0,
+            "66": 518880768.0,
+            "67": 518880768.0,
+            "68": 518880768.0,
+            "69": 518880768.0,
+            "70": 518880768.0,
+            "71": 518880768.0,
+            "72": 518880768.0,
+            "73": 518880768.0,
+            "74": 518880768.0,
+            "75": 518880768.0,
+            "76": 518880768.0,
+            "77": 518880768.0,
+            "78": 518880768.0,
+            "79": 518880768.0,
+            "80": 518880768.0,
+            "81": 518880768.0,
+            "82": 518880768.0,
+            "83": 518880768.0,
+            "84": 518880768.0,
+            "85": 518880768.0,
+            "86": 518880768.0,
+            "87": 518880768.0,
+            "88": 518880768.0,
+            "89": 518880768.0,
+            "90": 518880768.0,
+            "91": 518880768.0,
+            "92": 518880768.0,
+            "93": 518880768.0,
+            "94": 518880768.0,
+            "95": 518880768.0,
+            "96": 518880768.0,
+            "97": 518880768.0,
+            "98": 518880768.0,
+            "99": 518880768.0,
+            "100": 518880768.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 4511150592.0,
+            "2": 4544705536.0,
+            "3": 4544705536.0,
+            "4": 4544705536.0,
+            "5": 4544705536.0,
+            "6": 4544705536.0,
+            "7": 4544705536.0,
+            "8": 4544705536.0,
+            "9": 4544705536.0,
+            "10": 4544705536.0,
+            "11": 4544705536.0,
+            "12": 4544705536.0,
+            "13": 4544705536.0,
+            "14": 4544705536.0,
+            "15": 4544705536.0,
+            "16": 4544705536.0,
+            "17": 4544705536.0,
+            "18": 4544705536.0,
+            "19": 4607767040.0,
+            "20": 4607767040.0,
+            "21": 4607767040.0,
+            "22": 4607767040.0,
+            "23": 4607767040.0,
+            "24": 4607767040.0,
+            "25": 4607767040.0,
+            "26": 4607767040.0,
+            "27": 4607767040.0,
+            "28": 4607767040.0,
+            "29": 4607767040.0,
+            "30": 4607767040.0,
+            "31": 4607767040.0,
+            "32": 4607767040.0,
+            "33": 4607767040.0,
+            "34": 4607767040.0,
+            "35": 4607767040.0,
+            "36": 4607767040.0,
+            "37": 4607767040.0,
+            "38": 4607767040.0,
+            "39": 4607767040.0,
+            "40": 4607767040.0,
+            "41": 4607767040.0,
+            "42": 4607767040.0,
+            "43": 4607767040.0,
+            "44": 4607767040.0,
+            "45": 4607767040.0,
+            "46": 4607767040.0,
+            "47": 4607767040.0,
+            "48": 4607767040.0,
+            "49": 4607767040.0,
+            "50": 4607767040.0,
+            "51": 4607767040.0,
+            "52": 4607767040.0,
+            "53": 4607767040.0,
+            "54": 4607767040.0,
+            "55": 4607767040.0,
+            "56": 4607767040.0,
+            "57": 4607767040.0,
+            "58": 4607767040.0,
+            "59": 4607767040.0,
+            "60": 4607767040.0,
+            "61": 4607767040.0,
+            "62": 4607767040.0,
+            "63": 4607767040.0,
+            "64": 4607767040.0,
+            "65": 4607767040.0,
+            "66": 4607767040.0,
+            "67": 4607767040.0,
+            "68": 4607767040.0,
+            "69": 4607767040.0,
+            "70": 4607767040.0,
+            "71": 4607767040.0,
+            "72": 4607767040.0,
+            "73": 4607767040.0,
+            "74": 4607767040.0,
+            "75": 4607767040.0,
+            "76": 4607767040.0,
+            "77": 4607767040.0,
+            "78": 4607767040.0,
+            "79": 4607767040.0,
+            "80": 4607767040.0,
+            "81": 4607767040.0,
+            "82": 4607767040.0,
+            "83": 4607767040.0,
+            "84": 4607767040.0,
+            "85": 4607767040.0,
+            "86": 4607767040.0,
+            "87": 4607767040.0,
+            "88": 4607767040.0,
+            "89": 4607767040.0,
+            "90": 4607767040.0,
+            "91": 4607767040.0,
+            "92": 4607767040.0,
+            "93": 4607767040.0,
+            "94": 4607767040.0,
+            "95": 4607767040.0,
+            "96": 4607767040.0,
+            "97": 4607767040.0,
+            "98": 4607767040.0,
+            "99": 4607767040.0,
+            "100": 4607767040.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 5.23658,
+            "2": 0.11342,
+            "3": 0.10424,
+            "4": 0.0896,
+            "5": 0.08891,
+            "6": 0.08841,
+            "7": 0.0882,
+            "8": 0.08856,
+            "9": 0.08635,
+            "10": 0.08776,
+            "11": 0.08701,
+            "12": 0.08694,
+            "13": 0.08552,
+            "14": 0.08635,
+            "15": 0.08742,
+            "16": 0.08423,
+            "17": 0.08309,
+            "18": 0.11719,
+            "19": 0.0929,
+            "20": 0.1101,
+            "21": 0.08669,
+            "22": 0.08719,
+            "23": 0.08582,
+            "24": 0.08654,
+            "25": 0.08603,
+            "26": 0.08535,
+            "27": 0.08439,
+            "28": 0.08545,
+            "29": 0.08496,
+            "30": 0.08412,
+            "31": 0.08316,
+            "32": 0.08329,
+            "33": 0.08342,
+            "34": 0.08511,
+            "35": 0.0834,
+            "36": 0.08316,
+            "37": 0.08223,
+            "38": 0.08202,
+            "39": 0.08221,
+            "40": 0.07703,
+            "41": 0.08264,
+            "42": 0.08192,
+            "43": 0.0814,
+            "44": 0.08107,
+            "45": 0.08098,
+            "46": 0.08419,
+            "47": 0.08114,
+            "48": 0.22032,
+            "49": 0.0833,
+            "50": 0.08014,
+            "51": 0.10352,
+            "52": 0.08063,
+            "53": 0.07904,
+            "54": 0.08003,
+            "55": 0.08622,
+            "56": 0.08065,
+            "57": 0.08879,
+            "58": 0.08111,
+            "59": 0.08093,
+            "60": 0.08098,
+            "61": 0.08226,
+            "62": 0.08281,
+            "63": 0.08189,
+            "64": 0.08714,
+            "65": 0.08455,
+            "66": 0.0857,
+            "67": 0.08236,
+            "68": 0.08336,
+            "69": 0.08227,
+            "70": 0.0833,
+            "71": 0.08157,
+            "72": 0.08485,
+            "73": 0.08177,
+            "74": 0.08349,
+            "75": 0.0828,
+            "76": 0.08429,
+            "77": 0.08256,
+            "78": 0.08362,
+            "79": 0.08272,
+            "80": 0.08394,
+            "81": 0.08197,
+            "82": 0.08345,
+            "83": 0.08164,
+            "84": 0.08343,
+            "85": 0.08257,
+            "86": 0.08443,
+            "87": 0.08437,
+            "88": 0.08308,
+            "89": 0.08326,
+            "90": 0.08136,
+            "91": 0.08197,
+            "92": 0.08322,
+            "93": 0.08598,
+            "94": 0.08404,
+            "95": 0.08296,
+            "96": 0.08331,
+            "97": 0.08342,
+            "98": 0.08389,
+            "99": 0.0902,
+            "100": 0.09282
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": 1182.0,
+            "19": 1459.0,
+            "20": 1095.0,
+            "21": 1330.0,
+            "22": 1161.0,
+            "23": 1304.0,
+            "24": 1066.0,
+            "25": 1173.0,
+            "26": 1103.0,
+            "27": 1248.0,
+            "28": 1563.0,
+            "29": 1403.0,
+            "30": 1351.0,
+            "31": 1034.0,
+            "32": 1168.0,
+            "33": 1379.0,
+            "34": 1252.0,
+            "35": 1161.0,
+            "36": 1121.0,
+            "37": 1454.0,
+            "38": 1335.0,
+            "39": 1505.0,
+            "40": "nan",
+            "41": 1437.0,
+            "42": 1358.0,
+            "43": 1165.0,
+            "44": 1230.0,
+            "45": 1303.0,
+            "46": 1260.0,
+            "47": 1853.0,
+            "48": 1323.0,
+            "49": 1246.0,
+            "50": 1552.0,
+            "51": 1418.0,
+            "52": 1355.0,
+            "53": 1814.0,
+            "54": 1567.0,
+            "55": 1492.0,
+            "56": 1408.0,
+            "57": 1401.0,
+            "58": 1724.0,
+            "59": 1654.0,
+            "60": 1416.0,
+            "61": 1780.0,
+            "62": 1852.0,
+            "63": 1560.0,
+            "64": 1837.0,
+            "65": 1520.0,
+            "66": 1649.0,
+            "67": 1660.0,
+            "68": 1716.0,
+            "69": 1815.0,
+            "70": 2017.0,
+            "71": 2026.0,
+            "72": 1579.0,
+            "73": 1962.0,
+            "74": 1321.0,
+            "75": 1782.0,
+            "76": 1942.0,
+            "77": 2128.0,
+            "78": 2057.0,
+            "79": 1905.0,
+            "80": 2153.0,
+            "81": 2320.0,
+            "82": 2468.0,
+            "83": 1951.0,
+            "84": 2184.0,
+            "85": 2301.0,
+            "86": 1971.0,
+            "87": 2900.0,
+            "88": 2175.0,
+            "89": 2357.0,
+            "90": 2515.0,
+            "91": 1929.0,
+            "92": 2680.0,
+            "93": 2160.0,
+            "94": 2213.0,
+            "95": 2280.0,
+            "96": 2563.0,
+            "97": 2522.0,
+            "98": 2470.0,
+            "99": 2266.0,
+            "100": 2099.0
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_h100.json
index 8828025e4b4..3a9ea635606 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_h100.json
@@ -325,106 +325,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 6.49307,
-            "2": 0.10356,
-            "3": 0.08062,
-            "4": 0.0772,
-            "5": 0.07555,
-            "6": 0.06677,
-            "7": 0.06434,
-            "8": 0.06228,
-            "9": 0.0624,
-            "10": 0.06213,
-            "11": 0.06353,
-            "12": 0.0622,
-            "13": 0.06377,
-            "14": 0.06323,
-            "15": 0.06296,
-            "16": 0.06251,
-            "17": 0.06382,
-            "18": 0.11433,
-            "19": 0.07262,
-            "20": 0.07222,
-            "21": 0.07613,
-            "22": 0.06977,
-            "23": 0.06664,
-            "24": 0.07256,
-            "25": 0.07344,
-            "26": 0.0723,
-            "27": 0.07264,
-            "28": 0.0697,
-            "29": 0.06998,
-            "30": 0.06785,
-            "31": 0.07022,
-            "32": 0.06834,
-            "33": 0.06679,
-            "34": 0.0678,
-            "35": 0.0679,
-            "36": 0.0679,
-            "37": 0.06826,
-            "38": 0.06821,
-            "39": 0.0665,
-            "40": 0.06798,
-            "41": 0.06816,
-            "42": 0.06816,
-            "43": 0.06901,
-            "44": 0.06772,
-            "45": 0.06849,
-            "46": 0.06843,
-            "47": 0.06773,
-            "48": 0.06705,
-            "49": 0.06755,
-            "50": 0.06844,
-            "51": 0.0971,
-            "52": 0.06968,
-            "53": 0.06915,
-            "54": 0.06982,
-            "55": 0.0703,
-            "56": 0.07014,
-            "57": 0.07047,
-            "58": 0.06835,
-            "59": 0.07077,
-            "60": 0.06886,
-            "61": 0.06929,
-            "62": 0.06887,
-            "63": 0.06946,
-            "64": 0.06924,
-            "65": 0.06987,
-            "66": 0.06898,
-            "67": 0.06873,
-            "68": 0.0695,
-            "69": 0.0712,
-            "70": 0.06928,
-            "71": 0.0692,
-            "72": 0.07014,
-            "73": 0.06964,
-            "74": 0.06884,
-            "75": 0.06897,
-            "76": 0.07036,
-            "77": 0.0693,
-            "78": 0.06905,
-            "79": 0.0698,
-            "80": 0.06831,
-            "81": 0.06969,
-            "82": 0.06871,
-            "83": 0.07059,
-            "84": 0.06905,
-            "85": 0.06955,
-            "86": 0.06926,
-            "87": 0.06905,
-            "88": 0.06912,
-            "89": 0.07039,
-            "90": 0.06895,
-            "91": 0.069,
-            "92": 0.0698,
-            "93": 0.06946,
-            "94": 0.06825,
-            "95": 0.06933,
-            "96": 0.06851,
-            "97": 0.06883,
-            "98": 0.07421,
-            "99": 0.06926,
-            "100": 0.07018
+            "1": 6.7553,
+            "2": 0.07914,
+            "3": 0.06117,
+            "4": 0.04713,
+            "5": 0.04562,
+            "6": 0.04484,
+            "7": 0.0455,
+            "8": 0.04532,
+            "9": 0.04653,
+            "10": 0.04527,
+            "11": 0.04526,
+            "12": 0.04531,
+            "13": 0.04513,
+            "14": 0.04589,
+            "15": 0.04523,
+            "16": 0.04566,
+            "17": 0.04513,
+            "18": 0.09054,
+            "19": 0.05227,
+            "20": 0.05014,
+            "21": 0.04995,
+            "22": 0.04766,
+            "23": 0.04999,
+            "24": 0.05005,
+            "25": 0.0502,
+            "26": 0.04945,
+            "27": 0.04968,
+            "28": 0.04977,
+            "29": 0.0497,
+            "30": 0.04986,
+            "31": 0.04983,
+            "32": 0.04954,
+            "33": 0.04965,
+            "34": 0.04976,
+            "35": 0.05148,
+            "36": 0.05049,
+            "37": 0.05043,
+            "38": 0.04961,
+            "39": 0.04968,
+            "40": 0.05011,
+            "41": 0.05085,
+            "42": 0.05148,
+            "43": 0.05043,
+            "44": 0.05134,
+            "45": 0.05258,
+            "46": 0.05004,
+            "47": 0.04988,
+            "48": 0.052,
+            "49": 0.05001,
+            "50": 0.05024,
+            "51": 0.05928,
+            "52": 0.05229,
+            "53": 0.05133,
+            "54": 0.04954,
+            "55": 0.05183,
+            "56": 0.0499,
+            "57": 0.05371,
+            "58": 0.05294,
+            "59": 0.05143,
+            "60": 0.05245,
+            "61": 0.05128,
+            "62": 0.05258,
+            "63": 0.05117,
+            "64": 0.05002,
+            "65": 0.05116,
+            "66": 0.04965,
+            "67": 0.05087,
+            "68": 0.04976,
+            "69": 0.05059,
+            "70": 0.05074,
+            "71": 0.05146,
+            "72": 0.04996,
+            "73": 0.05053,
+            "74": 0.04997,
+            "75": 0.05102,
+            "76": 0.04952,
+            "77": 0.05026,
+            "78": 0.05047,
+            "79": 0.05054,
+            "80": 0.05018,
+            "81": 0.05082,
+            "82": 0.05081,
+            "83": 0.05053,
+            "84": 0.05027,
+            "85": 0.05039,
+            "86": 0.05101,
+            "87": 0.05996,
+            "88": 0.05963,
+            "89": 0.05999,
+            "90": 0.05955,
+            "91": 0.05033,
+            "92": 0.05028,
+            "93": 0.05134,
+            "94": 0.05022,
+            "95": 0.05076,
+            "96": 0.05004,
+            "97": 0.05109,
+            "98": 0.05023,
+            "99": 0.05058,
+            "100": 0.05028
         }
     },
     "num-zeros": {
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_h100_2nd.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_h100_2nd.json
new file mode 100644
index 00000000000..a47b77f353b
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_h100_2nd.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 10.25547,
+            "52": 10.15856,
+            "53": 10.38114,
+            "54": 10.2992,
+            "55": 10.23806,
+            "56": 10.00726,
+            "57": 9.87765,
+            "58": 10.15279,
+            "59": 9.94207,
+            "60": 9.8666,
+            "61": 10.00032,
+            "62": 10.23443,
+            "63": 9.71917,
+            "64": 10.04209,
+            "65": 9.30009,
+            "66": 9.95537,
+            "67": 9.6499,
+            "68": 10.00402,
+            "69": 9.99988,
+            "70": 9.96383,
+            "71": 9.84259,
+            "72": 9.81258,
+            "73": 9.70921,
+            "74": 9.19832,
+            "75": 9.61686,
+            "76": 9.28859,
+            "77": 10.20416,
+            "78": 9.88378,
+            "79": 9.54296,
+            "80": 9.57095,
+            "81": 9.64006,
+            "82": 9.83648,
+            "83": 9.47691,
+            "84": 9.54866,
+            "85": 9.75198,
+            "86": 9.21427,
+            "87": 9.70607,
+            "88": 9.87307,
+            "89": 9.72876,
+            "90": 9.92353,
+            "91": 9.48236,
+            "92": 9.47671,
+            "93": 9.20895,
+            "94": 8.9625,
+            "95": 9.62369,
+            "96": 9.64228,
+            "97": 9.41575,
+            "98": 9.77515,
+            "99": 9.00692,
+            "100": 9.51305
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 1367.0,
+            "52": 1372.0,
+            "53": 1715.0,
+            "54": 1485.0,
+            "55": 1482.0,
+            "56": 1473.0,
+            "57": 1539.0,
+            "58": 1736.0,
+            "59": 1661.0,
+            "60": 1586.0,
+            "61": 1691.0,
+            "62": 1865.0,
+            "63": 1395.0,
+            "64": 1846.0,
+            "65": 1428.0,
+            "66": 1717.0,
+            "67": 1700.0,
+            "68": 1750.0,
+            "69": 1681.0,
+            "70": 1861.0,
+            "71": 2048.0,
+            "72": 1552.0,
+            "73": 2010.0,
+            "74": 1344.0,
+            "75": 1840.0,
+            "76": 1846.0,
+            "77": 2034.0,
+            "78": 2170.0,
+            "79": 1949.0,
+            "80": 2077.0,
+            "81": 2381.0,
+            "82": 2390.0,
+            "83": 1843.0,
+            "84": 2060.0,
+            "85": 2317.0,
+            "86": 1958.0,
+            "87": 2829.0,
+            "88": 2046.0,
+            "89": 2260.0,
+            "90": 2545.0,
+            "91": 1801.0,
+            "92": 2505.0,
+            "93": 2064.0,
+            "94": 2223.0,
+            "95": 2379.0,
+            "96": 2579.0,
+            "97": 2411.0,
+            "98": 2500.0,
+            "99": 2124.0,
+            "100": 2119.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 518880768.0,
+            "52": 518880768.0,
+            "53": 518880768.0,
+            "54": 518880768.0,
+            "55": 518880768.0,
+            "56": 518880768.0,
+            "57": 518880768.0,
+            "58": 518880768.0,
+            "59": 518880768.0,
+            "60": 518880768.0,
+            "61": 518880768.0,
+            "62": 518880768.0,
+            "63": 518880768.0,
+            "64": 518880768.0,
+            "65": 518880768.0,
+            "66": 518880768.0,
+            "67": 518880768.0,
+            "68": 518880768.0,
+            "69": 518880768.0,
+            "70": 518880768.0,
+            "71": 518880768.0,
+            "72": 518880768.0,
+            "73": 518880768.0,
+            "74": 518880768.0,
+            "75": 518880768.0,
+            "76": 518880768.0,
+            "77": 518880768.0,
+            "78": 518880768.0,
+            "79": 518880768.0,
+            "80": 518880768.0,
+            "81": 518880768.0,
+            "82": 518880768.0,
+            "83": 518880768.0,
+            "84": 518880768.0,
+            "85": 518880768.0,
+            "86": 518880768.0,
+            "87": 518880768.0,
+            "88": 518880768.0,
+            "89": 518880768.0,
+            "90": 518880768.0,
+            "91": 518880768.0,
+            "92": 518880768.0,
+            "93": 518880768.0,
+            "94": 518880768.0,
+            "95": 518880768.0,
+            "96": 518880768.0,
+            "97": 518880768.0,
+            "98": 518880768.0,
+            "99": 518880768.0,
+            "100": 518880768.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 4575260160.0,
+            "52": 4608815616.0,
+            "53": 4608815616.0,
+            "54": 4608815616.0,
+            "55": 4608815616.0,
+            "56": 4608815616.0,
+            "57": 4608815616.0,
+            "58": 4608815616.0,
+            "59": 4608815616.0,
+            "60": 4608815616.0,
+            "61": 4608815616.0,
+            "62": 4608815616.0,
+            "63": 4608815616.0,
+            "64": 4608815616.0,
+            "65": 4608815616.0,
+            "66": 4608815616.0,
+            "67": 4608815616.0,
+            "68": 4608815616.0,
+            "69": 4608815616.0,
+            "70": 4608815616.0,
+            "71": 4608815616.0,
+            "72": 4608815616.0,
+            "73": 4608815616.0,
+            "74": 4608815616.0,
+            "75": 4608815616.0,
+            "76": 4608815616.0,
+            "77": 4608815616.0,
+            "78": 4608815616.0,
+            "79": 4608815616.0,
+            "80": 4608815616.0,
+            "81": 4608815616.0,
+            "82": 4608815616.0,
+            "83": 4608815616.0,
+            "84": 4608815616.0,
+            "85": 4608815616.0,
+            "86": 4608815616.0,
+            "87": 4608815616.0,
+            "88": 4608815616.0,
+            "89": 4608815616.0,
+            "90": 4608815616.0,
+            "91": 4608815616.0,
+            "92": 4608815616.0,
+            "93": 4608815616.0,
+            "94": 4608815616.0,
+            "95": 4608815616.0,
+            "96": 4608815616.0,
+            "97": 4608815616.0,
+            "98": 4608815616.0,
+            "99": 4608815616.0,
+            "100": 4608815616.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 5.54199,
+            "52": 0.07932,
+            "53": 0.05296,
+            "54": 0.054,
+            "55": 0.052,
+            "56": 0.05407,
+            "57": 0.05285,
+            "58": 0.05383,
+            "59": 0.05227,
+            "60": 0.05363,
+            "61": 0.053,
+            "62": 0.05361,
+            "63": 0.05195,
+            "64": 0.05507,
+            "65": 0.05368,
+            "66": 0.05324,
+            "67": 0.05188,
+            "68": 0.05445,
+            "69": 0.05222,
+            "70": 0.05356,
+            "71": 0.05169,
+            "72": 0.05424,
+            "73": 0.05264,
+            "74": 0.05364,
+            "75": 0.0521,
+            "76": 0.05373,
+            "77": 0.05341,
+            "78": 0.05388,
+            "79": 0.05224,
+            "80": 0.05393,
+            "81": 0.05706,
+            "82": 0.05358,
+            "83": 0.05191,
+            "84": 0.05339,
+            "85": 0.05302,
+            "86": 0.05343,
+            "87": 0.05297,
+            "88": 0.0535,
+            "89": 0.05264,
+            "90": 0.05485,
+            "91": 0.05422,
+            "92": 0.05329,
+            "93": 0.0539,
+            "94": 0.05526,
+            "95": 0.05238,
+            "96": 0.05607,
+            "97": 0.05259,
+            "98": 0.0561,
+            "99": 0.05354,
+            "100": 0.05479
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json
index 8d29fc96a7f..36d741d6e7d 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json
@@ -325,106 +325,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 4.2592,
-            "2": 0.13544,
-            "3": 0.09999,
-            "4": 0.08273,
-            "5": 0.08157,
-            "6": 0.08266,
-            "7": 0.08111,
-            "8": 0.08184,
-            "9": 0.08109,
-            "10": 0.08281,
-            "11": 0.08041,
-            "12": 0.08186,
-            "13": 0.08098,
-            "14": 0.08513,
-            "15": 0.0821,
-            "16": 0.08144,
-            "17": 0.08052,
-            "18": 0.13091,
-            "19": 0.08819,
-            "20": 0.08804,
-            "21": 0.08818,
-            "22": 0.08404,
-            "23": 0.08729,
-            "24": 0.08805,
-            "25": 0.08736,
-            "26": 0.08811,
-            "27": 0.08757,
-            "28": 0.08887,
-            "29": 0.08961,
-            "30": 0.0883,
-            "31": 0.08788,
-            "32": 0.08884,
-            "33": 0.08833,
-            "34": 0.08811,
-            "35": 0.08831,
-            "36": 0.08859,
-            "37": 0.08809,
-            "38": 0.08879,
-            "39": 0.08769,
-            "40": 0.0883,
-            "41": 0.08757,
-            "42": 0.08797,
-            "43": 0.08669,
-            "44": 0.08751,
-            "45": 0.08893,
-            "46": 0.08862,
-            "47": 0.08698,
-            "48": 0.089,
-            "49": 0.08841,
-            "50": 0.08813,
-            "51": 0.09282,
-            "52": 0.08991,
-            "53": 0.08846,
-            "54": 0.08878,
-            "55": 0.08875,
-            "56": 0.0897,
-            "57": 0.0888,
-            "58": 0.08814,
-            "59": 0.08821,
-            "60": 0.08782,
-            "61": 0.0888,
-            "62": 0.08762,
-            "63": 0.08743,
-            "64": 0.0879,
-            "65": 0.08877,
-            "66": 0.08656,
-            "67": 0.08681,
-            "68": 0.08654,
-            "69": 0.08705,
-            "70": 0.08667,
-            "71": 0.08696,
-            "72": 0.08664,
-            "73": 0.08625,
-            "74": 0.08667,
-            "75": 0.08656,
-            "76": 0.08557,
-            "77": 0.08578,
-            "78": 0.08586,
-            "79": 0.08584,
-            "80": 0.08576,
-            "81": 0.08653,
-            "82": 0.08572,
-            "83": 0.08613,
-            "84": 0.08557,
-            "85": 0.08616,
-            "86": 0.08608,
-            "87": 0.08563,
-            "88": 0.08581,
-            "89": 0.08591,
-            "90": 0.08593,
-            "91": 0.08543,
-            "92": 0.08641,
-            "93": 0.08635,
-            "94": 0.08549,
-            "95": 0.08554,
-            "96": 0.08487,
-            "97": 0.08505,
-            "98": 0.08522,
-            "99": 0.08533,
-            "100": 0.08544
+            "1": 3.39236,
+            "2": 0.197,
+            "3": 0.09014,
+            "4": 0.07513,
+            "5": 0.07608,
+            "6": 0.07565,
+            "7": 0.07606,
+            "8": 0.07571,
+            "9": 0.07584,
+            "10": 0.07549,
+            "11": 0.07619,
+            "12": 0.0756,
+            "13": 0.07585,
+            "14": 0.07487,
+            "15": 0.07654,
+            "16": 0.07517,
+            "17": 0.07637,
+            "18": 0.13134,
+            "19": 0.08507,
+            "20": 0.08208,
+            "21": 0.08338,
+            "22": 0.07828,
+            "23": 0.08267,
+            "24": 0.08242,
+            "25": 0.08322,
+            "26": 0.08222,
+            "27": 0.08351,
+            "28": 0.08234,
+            "29": 0.08375,
+            "30": 0.08306,
+            "31": 0.0837,
+            "32": 0.08544,
+            "33": 0.08325,
+            "34": 0.08234,
+            "35": 0.08499,
+            "36": 0.08373,
+            "37": 0.08247,
+            "38": 0.08204,
+            "39": 0.08354,
+            "40": 0.0837,
+            "41": 0.08325,
+            "42": 0.08545,
+            "43": 0.08233,
+            "44": 0.08294,
+            "45": 0.084,
+            "46": 0.08215,
+            "47": 0.08346,
+            "48": 0.08195,
+            "49": 0.08269,
+            "50": 0.08321,
+            "51": 0.08664,
+            "52": 0.08023,
+            "53": 0.08003,
+            "54": 0.07979,
+            "55": 0.08188,
+            "56": 0.07966,
+            "57": 0.08281,
+            "58": 0.0797,
+            "59": 0.07943,
+            "60": 0.07926,
+            "61": 0.07894,
+            "62": 0.07941,
+            "63": 0.07952,
+            "64": 0.07973,
+            "65": 0.07964,
+            "66": 0.07938,
+            "67": 0.07972,
+            "68": 0.07922,
+            "69": 0.07931,
+            "70": 0.07926,
+            "71": 0.07906,
+            "72": 0.08086,
+            "73": 0.07934,
+            "74": 0.07975,
+            "75": 0.07939,
+            "76": 0.07948,
+            "77": 0.07896,
+            "78": 0.07961,
+            "79": 0.0798,
+            "80": 0.07961,
+            "81": 0.07923,
+            "82": 0.07921,
+            "83": 0.07905,
+            "84": 0.07972,
+            "85": 0.08027,
+            "86": 0.08062,
+            "87": 0.08419,
+            "88": 0.08051,
+            "89": 0.08041,
+            "90": 0.08078,
+            "91": 0.08039,
+            "92": 0.08075,
+            "93": 0.0801,
+            "94": 0.08,
+            "95": 0.0799,
+            "96": 0.08114,
+            "97": 0.07987,
+            "98": 0.08062,
+            "99": 0.08014,
+            "100": 0.08015
         }
     },
     "num-zeros": {
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100_2nd.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100_2nd.json
new file mode 100644
index 00000000000..8c96fb071fc
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100_2nd.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 10.23775,
+            "52": 10.15443,
+            "53": 10.36085,
+            "54": 10.26927,
+            "55": 10.2161,
+            "56": 9.99594,
+            "57": 9.8744,
+            "58": 10.14007,
+            "59": 9.93447,
+            "60": 9.84864,
+            "61": 9.98549,
+            "62": 10.2164,
+            "63": 9.69034,
+            "64": 10.0182,
+            "65": 9.30046,
+            "66": 9.9355,
+            "67": 9.63051,
+            "68": 9.99128,
+            "69": 9.9852,
+            "70": 9.92463,
+            "71": 9.81436,
+            "72": 9.79481,
+            "73": 9.68082,
+            "74": 9.1945,
+            "75": 9.60407,
+            "76": 9.28537,
+            "77": 10.18507,
+            "78": 9.86718,
+            "79": 9.52407,
+            "80": 9.55749,
+            "81": 9.62863,
+            "82": 9.81568,
+            "83": 9.45708,
+            "84": 9.53654,
+            "85": 9.73266,
+            "86": 9.20138,
+            "87": 9.69524,
+            "88": 9.85412,
+            "89": 9.71648,
+            "90": 9.91047,
+            "91": 9.45992,
+            "92": 9.46603,
+            "93": 9.19321,
+            "94": 8.94,
+            "95": 9.60607,
+            "96": 9.62214,
+            "97": 9.39796,
+            "98": 9.76023,
+            "99": 8.99097,
+            "100": 9.49505
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 1508.0,
+            "52": 1400.0,
+            "53": 1740.0,
+            "54": 1498.0,
+            "55": 1551.0,
+            "56": 1363.0,
+            "57": 1465.0,
+            "58": 1610.0,
+            "59": 1574.0,
+            "60": 1599.0,
+            "61": 1727.0,
+            "62": 1804.0,
+            "63": 1590.0,
+            "64": 1813.0,
+            "65": 1398.0,
+            "66": 1738.0,
+            "67": 1536.0,
+            "68": 1764.0,
+            "69": 1781.0,
+            "70": 1926.0,
+            "71": 1950.0,
+            "72": 1461.0,
+            "73": 1985.0,
+            "74": 1345.0,
+            "75": 1871.0,
+            "76": 1732.0,
+            "77": 2086.0,
+            "78": 2075.0,
+            "79": 1992.0,
+            "80": 2260.0,
+            "81": 2300.0,
+            "82": 2290.0,
+            "83": 1774.0,
+            "84": 2172.0,
+            "85": 2216.0,
+            "86": 2038.0,
+            "87": 2741.0,
+            "88": 2079.0,
+            "89": 2349.0,
+            "90": 2315.0,
+            "91": 1875.0,
+            "92": 2611.0,
+            "93": 2053.0,
+            "94": 2220.0,
+            "95": 2296.0,
+            "96": 2665.0,
+            "97": 2516.0,
+            "98": 2549.0,
+            "99": 2378.0,
+            "100": 2257.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 518880768.0,
+            "52": 518880768.0,
+            "53": 518880768.0,
+            "54": 518880768.0,
+            "55": 518880768.0,
+            "56": 518880768.0,
+            "57": 518880768.0,
+            "58": 518880768.0,
+            "59": 518880768.0,
+            "60": 518880768.0,
+            "61": 518880768.0,
+            "62": 518880768.0,
+            "63": 518880768.0,
+            "64": 518880768.0,
+            "65": 518880768.0,
+            "66": 518880768.0,
+            "67": 518880768.0,
+            "68": 518880768.0,
+            "69": 518880768.0,
+            "70": 518880768.0,
+            "71": 518880768.0,
+            "72": 518880768.0,
+            "73": 518880768.0,
+            "74": 518880768.0,
+            "75": 518880768.0,
+            "76": 518880768.0,
+            "77": 518880768.0,
+            "78": 518880768.0,
+            "79": 518880768.0,
+            "80": 518880768.0,
+            "81": 518880768.0,
+            "82": 518880768.0,
+            "83": 518880768.0,
+            "84": 518880768.0,
+            "85": 518880768.0,
+            "86": 518880768.0,
+            "87": 518880768.0,
+            "88": 518880768.0,
+            "89": 518880768.0,
+            "90": 518880768.0,
+            "91": 518880768.0,
+            "92": 518880768.0,
+            "93": 518880768.0,
+            "94": 518880768.0,
+            "95": 518880768.0,
+            "96": 518880768.0,
+            "97": 518880768.0,
+            "98": 518880768.0,
+            "99": 518880768.0,
+            "100": 518880768.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 4575260160.0,
+            "52": 4608815616.0,
+            "53": 4608815616.0,
+            "54": 4608815616.0,
+            "55": 4608815616.0,
+            "56": 4608815616.0,
+            "57": 4608815616.0,
+            "58": 4608815616.0,
+            "59": 4608815616.0,
+            "60": 4608815616.0,
+            "61": 4608815616.0,
+            "62": 4608815616.0,
+            "63": 4608815616.0,
+            "64": 4608815616.0,
+            "65": 4608815616.0,
+            "66": 4608815616.0,
+            "67": 4608815616.0,
+            "68": 4608815616.0,
+            "69": 4608815616.0,
+            "70": 4608815616.0,
+            "71": 4608815616.0,
+            "72": 4608815616.0,
+            "73": 4608815616.0,
+            "74": 4608815616.0,
+            "75": 4608815616.0,
+            "76": 4608815616.0,
+            "77": 4608815616.0,
+            "78": 4608815616.0,
+            "79": 4608815616.0,
+            "80": 4608815616.0,
+            "81": 4608815616.0,
+            "82": 4608815616.0,
+            "83": 4608815616.0,
+            "84": 4608815616.0,
+            "85": 4608815616.0,
+            "86": 4608815616.0,
+            "87": 4608815616.0,
+            "88": 4608815616.0,
+            "89": 4608815616.0,
+            "90": 4608815616.0,
+            "91": 4608815616.0,
+            "92": 4608815616.0,
+            "93": 4608815616.0,
+            "94": 4608815616.0,
+            "95": 4608815616.0,
+            "96": 4608815616.0,
+            "97": 4608815616.0,
+            "98": 4608815616.0,
+            "99": 4608815616.0,
+            "100": 4608815616.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 3.93441,
+            "52": 0.11442,
+            "53": 0.08582,
+            "54": 0.08444,
+            "55": 0.09374,
+            "56": 0.0841,
+            "57": 0.08368,
+            "58": 0.08327,
+            "59": 0.08219,
+            "60": 0.08174,
+            "61": 0.08125,
+            "62": 0.08336,
+            "63": 0.08247,
+            "64": 0.08267,
+            "65": 0.08048,
+            "66": 0.07988,
+            "67": 0.08016,
+            "68": 0.08086,
+            "69": 0.07938,
+            "70": 0.08047,
+            "71": 0.07981,
+            "72": 0.08021,
+            "73": 0.08023,
+            "74": 0.08133,
+            "75": 0.08002,
+            "76": 0.08063,
+            "77": 0.08008,
+            "78": 0.0809,
+            "79": 0.08014,
+            "80": 0.08071,
+            "81": 0.08057,
+            "82": 0.08093,
+            "83": 0.08114,
+            "84": 0.08102,
+            "85": 0.0806,
+            "86": 0.08267,
+            "87": 0.08027,
+            "88": 0.08002,
+            "89": 0.08059,
+            "90": 0.0802,
+            "91": 0.08028,
+            "92": 0.08007,
+            "93": 0.08034,
+            "94": 0.08004,
+            "95": 0.08085,
+            "96": 0.07942,
+            "97": 0.08025,
+            "98": 0.07962,
+            "99": 0.08071,
+            "100": 0.08017
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_dev_dgx_gb200.json
new file mode 100644
index 00000000000..51ebcb618e4
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_dev_dgx_gb200.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.7702,
+            "2": 10.78031,
+            "3": 10.77782,
+            "4": 10.73861,
+            "5": 10.81197,
+            "6": 10.81962,
+            "7": 10.79512,
+            "8": 10.78158,
+            "9": 10.79081,
+            "10": 10.71741,
+            "11": 10.85173,
+            "12": 10.80653,
+            "13": 10.82058,
+            "14": 10.84404,
+            "15": 10.74918,
+            "16": 10.752,
+            "17": 10.70902,
+            "18": 10.752,
+            "19": 10.74635,
+            "20": 10.63769,
+            "21": 10.61672,
+            "22": 10.44317,
+            "23": 10.6675,
+            "24": 10.50949,
+            "25": 10.45557,
+            "26": 10.53435,
+            "27": 10.54753,
+            "28": 10.51646,
+            "29": 10.55435,
+            "30": 10.28785,
+            "31": 10.00156,
+            "32": 10.40963,
+            "33": 10.40243,
+            "34": 10.13341,
+            "35": 10.19694,
+            "36": 10.14213,
+            "37": 10.2869,
+            "38": 10.10508,
+            "39": 10.35217,
+            "40": 10.00199,
+            "41": 10.07363,
+            "42": 10.1522,
+            "43": 9.74558,
+            "44": 9.8738,
+            "45": 9.74764,
+            "46": 9.74951,
+            "47": 10.09152,
+            "48": 9.77892,
+            "49": 9.44822,
+            "50": 9.84214
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1564.0,
+            "2": 1610.0,
+            "3": 1608.0,
+            "4": 1854.0,
+            "5": 1873.0,
+            "6": 1812.0,
+            "7": 1744.0,
+            "8": 1614.0,
+            "9": 1857.0,
+            "10": 1358.0,
+            "11": 1844.0,
+            "12": 1788.0,
+            "13": 1826.0,
+            "14": 1801.0,
+            "15": 1892.0,
+            "16": 1892.0,
+            "17": 1758.0,
+            "18": 1714.0,
+            "19": 1677.0,
+            "20": 1582.0,
+            "21": 1824.0,
+            "22": 1579.0,
+            "23": 1987.0,
+            "24": 1533.0,
+            "25": 1602.0,
+            "26": 1651.0,
+            "27": 1901.0,
+            "28": 2044.0,
+            "29": 1911.0,
+            "30": 1823.0,
+            "31": 1583.0,
+            "32": 1926.0,
+            "33": 2108.0,
+            "34": 1914.0,
+            "35": 2058.0,
+            "36": 1946.0,
+            "37": 2325.0,
+            "38": 2268.0,
+            "39": 2376.0,
+            "40": 2208.0,
+            "41": 2448.0,
+            "42": 2209.0,
+            "43": 1977.0,
+            "44": 2049.0,
+            "45": 2266.0,
+            "46": 2481.0,
+            "47": 2583.0,
+            "48": 2450.0,
+            "49": 2255.0,
+            "50": 2453.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 950750208.0,
+            "2": 950750208.0,
+            "3": 950750208.0,
+            "4": 950750208.0,
+            "5": 950750208.0,
+            "6": 950750208.0,
+            "7": 950750208.0,
+            "8": 950750208.0,
+            "9": 950750208.0,
+            "10": 950750208.0,
+            "11": 950750208.0,
+            "12": 950750208.0,
+            "13": 950750208.0,
+            "14": 950750208.0,
+            "15": 950750208.0,
+            "16": 950750208.0,
+            "17": 950750208.0,
+            "18": 950750208.0,
+            "19": 950750208.0,
+            "20": 950750208.0,
+            "21": 950750208.0,
+            "22": 950750208.0,
+            "23": 950750208.0,
+            "24": 950750208.0,
+            "25": 950750208.0,
+            "26": 950750208.0,
+            "27": 950750208.0,
+            "28": 950750208.0,
+            "29": 950750208.0,
+            "30": 950750208.0,
+            "31": 950750208.0,
+            "32": 950750208.0,
+            "33": 950750208.0,
+            "34": 950750208.0,
+            "35": 950750208.0,
+            "36": 950750208.0,
+            "37": 950750208.0,
+            "38": 950750208.0,
+            "39": 950750208.0,
+            "40": 950750208.0,
+            "41": 950750208.0,
+            "42": 950750208.0,
+            "43": 950750208.0,
+            "44": 950750208.0,
+            "45": 950750208.0,
+            "46": 950750208.0,
+            "47": 950750208.0,
+            "48": 950750208.0,
+            "49": 950750208.0,
+            "50": 950750208.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 3275808768.0,
+            "2": 3635274752.0,
+            "3": 3635274752.0,
+            "4": 3635274752.0,
+            "5": 3635274752.0,
+            "6": 3635274752.0,
+            "7": 3635274752.0,
+            "8": 3635274752.0,
+            "9": 3635274752.0,
+            "10": 3635274752.0,
+            "11": 3635274752.0,
+            "12": 3635274752.0,
+            "13": 3635274752.0,
+            "14": 3635274752.0,
+            "15": 3635274752.0,
+            "16": 3635274752.0,
+            "17": 3635274752.0,
+            "18": 3635274752.0,
+            "19": 3635274752.0,
+            "20": 3635274752.0,
+            "21": 3635274752.0,
+            "22": 3635274752.0,
+            "23": 3635274752.0,
+            "24": 3635274752.0,
+            "25": 3635274752.0,
+            "26": 3635274752.0,
+            "27": 3635274752.0,
+            "28": 3635274752.0,
+            "29": 3635274752.0,
+            "30": 3635274752.0,
+            "31": 3635274752.0,
+            "32": 3635274752.0,
+            "33": 3635274752.0,
+            "34": 3635274752.0,
+            "35": 3635274752.0,
+            "36": 3635274752.0,
+            "37": 3635274752.0,
+            "38": 3635274752.0,
+            "39": 3635274752.0,
+            "40": 3635274752.0,
+            "41": 3635274752.0,
+            "42": 3635274752.0,
+            "43": 3635274752.0,
+            "44": 3635274752.0,
+            "45": 3635274752.0,
+            "46": 3635274752.0,
+            "47": 3635274752.0,
+            "48": 3635274752.0,
+            "49": 3635274752.0,
+            "50": 3635274752.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 7.39667,
+            "2": 0.15731,
+            "3": 0.14531,
+            "4": 0.13151,
+            "5": 0.13018,
+            "6": 0.12885,
+            "7": 0.13069,
+            "8": 0.13596,
+            "9": 0.12969,
+            "10": 0.12994,
+            "11": 0.1314,
+            "12": 0.12886,
+            "13": 0.13009,
+            "14": 0.1305,
+            "15": 0.13493,
+            "16": 0.13341,
+            "17": 0.13216,
+            "18": 0.13045,
+            "19": 0.1359,
+            "20": 0.13207,
+            "21": 0.13248,
+            "22": 0.12979,
+            "23": 0.12948,
+            "24": 0.13047,
+            "25": 0.12963,
+            "26": 0.13113,
+            "27": 0.13172,
+            "28": 0.14017,
+            "29": 0.13059,
+            "30": 0.12871,
+            "31": 0.12957,
+            "32": 0.1298,
+            "33": 0.13011,
+            "34": 0.12939,
+            "35": 0.12965,
+            "36": 0.13039,
+            "37": 0.13099,
+            "38": 0.13051,
+            "39": 0.12932,
+            "40": 0.13052,
+            "41": 0.13052,
+            "42": 0.13104,
+            "43": 0.12938,
+            "44": 0.13063,
+            "45": 0.13204,
+            "46": 0.13075,
+            "47": 0.13071,
+            "48": 0.12984,
+            "49": 0.12965,
+            "50": 0.12987
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_dev_dgx_h100.json
index 6660a5e446e..1d24a32a8d8 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_dev_dgx_h100.json
@@ -13,47 +13,47 @@
             "7": 10.8645,
             "8": 10.87335,
             "9": 10.87481,
-            "10": 10.83903,
-            "11": 10.86614,
-            "12": 10.86169,
-            "13": 10.87354,
-            "14": 10.87593,
-            "15": 10.8216,
-            "16": 10.83071,
-            "17": 10.79411,
-            "18": 10.81433,
-            "19": 10.80011,
-            "20": 10.71697,
-            "21": 10.70154,
-            "22": 10.57235,
-            "23": 10.70749,
+            "10": 10.83904,
+            "11": 10.86613,
+            "12": 10.86168,
+            "13": 10.87357,
+            "14": 10.87594,
+            "15": 10.82161,
+            "16": 10.83073,
+            "17": 10.79408,
+            "18": 10.8143,
+            "19": 10.80009,
+            "20": 10.71695,
+            "21": 10.70153,
+            "22": 10.57236,
+            "23": 10.70752,
             "24": 10.6006,
-            "25": 10.5566,
-            "26": 10.60138,
-            "27": 10.60955,
+            "25": 10.55655,
+            "26": 10.60135,
+            "27": 10.60957,
             "28": 10.55626,
             "29": 10.57268,
             "30": 10.36032,
-            "31": 10.11454,
-            "32": 10.45937,
-            "33": 10.45389,
-            "34": 10.21168,
-            "35": 10.26583,
+            "31": 10.11449,
+            "32": 10.45933,
+            "33": 10.45392,
+            "34": 10.21171,
+            "35": 10.26576,
             "36": 10.21483,
-            "37": 10.34814,
-            "38": 10.19787,
-            "39": 10.39713,
-            "40": 10.08719,
-            "41": 10.13539,
-            "42": 10.20638,
+            "37": 10.34811,
+            "38": 10.19788,
+            "39": 10.39711,
+            "40": 10.08718,
+            "41": 10.13538,
+            "42": 10.20634,
             "43": 9.82769,
-            "44": 9.95444,
-            "45": 9.82374,
-            "46": 9.79864,
-            "47": 10.12579,
+            "44": 9.9545,
+            "45": 9.82372,
+            "46": 9.79866,
+            "47": 10.12577,
             "48": 9.83547,
             "49": 9.51888,
-            "50": 9.90498
+            "50": 9.90503
         }
     },
     "num-zeros": {
@@ -70,47 +70,47 @@
             "7": 1767.0,
             "8": 1569.0,
             "9": 1750.0,
-            "10": 1413.0,
-            "11": 1746.0,
-            "12": 1681.0,
-            "13": 1828.0,
-            "14": 1739.0,
-            "15": 1801.0,
-            "16": 1895.0,
-            "17": 1781.0,
-            "18": 1693.0,
-            "19": 1705.0,
-            "20": 1624.0,
-            "21": 1838.0,
-            "22": 1792.0,
-            "23": 2005.0,
-            "24": 1601.0,
-            "25": 1483.0,
-            "26": 1615.0,
-            "27": 1844.0,
-            "28": 1961.0,
-            "29": 2012.0,
-            "30": 1856.0,
-            "31": 1502.0,
-            "32": 1794.0,
-            "33": 2118.0,
-            "34": 1742.0,
-            "35": 1953.0,
-            "36": 1940.0,
-            "37": 2324.0,
-            "38": 2109.0,
-            "39": 2369.0,
-            "40": 2183.0,
-            "41": 2063.0,
-            "42": 2232.0,
-            "43": 1917.0,
-            "44": 2084.0,
-            "45": 2058.0,
-            "46": 2144.0,
-            "47": 2488.0,
-            "48": 2407.0,
-            "49": 2125.0,
-            "50": 2134.0
+            "10": 1414.0,
+            "11": 1784.0,
+            "12": 1661.0,
+            "13": 1936.0,
+            "14": 1687.0,
+            "15": 1669.0,
+            "16": 1868.0,
+            "17": 1820.0,
+            "18": 1629.0,
+            "19": 1716.0,
+            "20": 1626.0,
+            "21": 1933.0,
+            "22": 1647.0,
+            "23": 1979.0,
+            "24": 1578.0,
+            "25": 1542.0,
+            "26": 1628.0,
+            "27": 1829.0,
+            "28": 1896.0,
+            "29": 2005.0,
+            "30": 1921.0,
+            "31": 1471.0,
+            "32": 1826.0,
+            "33": 2012.0,
+            "34": 1767.0,
+            "35": 1973.0,
+            "36": 1933.0,
+            "37": 2208.0,
+            "38": 2138.0,
+            "39": 2260.0,
+            "40": 2112.0,
+            "41": 2164.0,
+            "42": 2152.0,
+            "43": 2044.0,
+            "44": 2055.0,
+            "45": 2076.0,
+            "46": 2166.0,
+            "47": 2472.0,
+            "48": 2425.0,
+            "49": 2218.0,
+            "50": 2135.0
         }
     },
     "mem-allocated-bytes": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 8.92875,
-            "2": 0.12034,
-            "3": 0.10184,
-            "4": 0.10215,
-            "5": 0.10291,
-            "6": 0.10167,
-            "7": 0.09936,
-            "8": 0.10097,
-            "9": 0.10127,
-            "10": 0.10171,
-            "11": 0.10013,
-            "12": 0.09898,
-            "13": 0.10085,
-            "14": 0.10081,
-            "15": 0.10088,
-            "16": 0.10002,
-            "17": 0.0999,
-            "18": 0.10168,
-            "19": 0.10032,
-            "20": 0.09815,
-            "21": 0.10018,
-            "22": 0.09914,
-            "23": 0.1005,
-            "24": 0.10106,
-            "25": 0.10086,
-            "26": 0.10152,
-            "27": 0.1,
-            "28": 0.10161,
-            "29": 0.10038,
-            "30": 0.10045,
-            "31": 0.10187,
-            "32": 0.10055,
-            "33": 0.11357,
-            "34": 0.10266,
-            "35": 0.10298,
-            "36": 0.10061,
-            "37": 0.10166,
-            "38": 0.10185,
-            "39": 0.09925,
-            "40": 0.10087,
-            "41": 0.10001,
-            "42": 0.1,
-            "43": 0.10286,
-            "44": 0.10227,
-            "45": 0.10327,
-            "46": 0.10041,
-            "47": 0.10091,
-            "48": 0.10215,
-            "49": 0.10017,
-            "50": 0.10055
+            "1": 7.02529,
+            "2": 0.11863,
+            "3": 0.10057,
+            "4": 0.09906,
+            "5": 0.08104,
+            "6": 0.08043,
+            "7": 0.08243,
+            "8": 0.08119,
+            "9": 0.08111,
+            "10": 0.08055,
+            "11": 0.08084,
+            "12": 0.0797,
+            "13": 0.07988,
+            "14": 0.08069,
+            "15": 0.08072,
+            "16": 0.08026,
+            "17": 0.08022,
+            "18": 0.08048,
+            "19": 0.08013,
+            "20": 0.08102,
+            "21": 0.08145,
+            "22": 0.08021,
+            "23": 0.08046,
+            "24": 0.082,
+            "25": 0.08075,
+            "26": 0.08017,
+            "27": 0.08064,
+            "28": 0.07978,
+            "29": 0.08107,
+            "30": 0.08431,
+            "31": 0.08022,
+            "32": 0.08061,
+            "33": 0.07995,
+            "34": 0.08117,
+            "35": 0.0796,
+            "36": 0.08069,
+            "37": 0.08194,
+            "38": 0.08127,
+            "39": 0.07932,
+            "40": 0.07929,
+            "41": 0.0796,
+            "42": 0.08162,
+            "43": 0.07964,
+            "44": 0.08019,
+            "45": 0.07997,
+            "46": 0.07935,
+            "47": 0.08025,
+            "48": 0.08073,
+            "49": 0.07999,
+            "50": 0.08013
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_lts_dgx_a100.json
index bdc8c7f9895..4ba6ee523cb 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_lts_dgx_a100.json
@@ -51,8 +51,8 @@
             "45": 9.81584,
             "46": 9.80638,
             "47": 10.12803,
-            "48": 9.82444,
-            "49": 9.50618,
+            "48": 9.82443,
+            "49": 9.50621,
             "50": 9.89067
         }
     },
@@ -108,9 +108,9 @@
             "45": 2123.0,
             "46": 2194.0,
             "47": 2463.0,
-            "48": 2382.0,
-            "49": 2300.0,
-            "50": 2397.0
+            "48": 2345.0,
+            "49": 2282.0,
+            "50": 2366.0
         }
     },
     "mem-allocated-bytes": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 5.86972,
-            "2": 0.17288,
-            "3": 0.13781,
-            "4": 0.13826,
-            "5": 0.13569,
-            "6": 0.13252,
-            "7": 0.1323,
-            "8": 0.13208,
-            "9": 0.13237,
-            "10": 0.13177,
-            "11": 0.13164,
-            "12": 0.135,
-            "13": 0.13389,
-            "14": 0.13431,
-            "15": 0.13376,
-            "16": 0.1342,
-            "17": 0.13348,
-            "18": 0.13307,
-            "19": 0.13389,
-            "20": 0.13476,
-            "21": 0.13346,
-            "22": 0.13333,
-            "23": 0.13336,
-            "24": 0.13304,
-            "25": 0.13373,
-            "26": 0.13283,
-            "27": 0.1331,
-            "28": 0.13314,
-            "29": 0.13299,
-            "30": 0.13362,
-            "31": 0.13392,
-            "32": 0.13417,
-            "33": 0.13406,
-            "34": 0.13351,
-            "35": 0.13357,
-            "36": 0.13345,
-            "37": 0.13422,
-            "38": 0.1339,
-            "39": 0.13419,
-            "40": 0.13437,
-            "41": 0.13425,
-            "42": 0.13364,
-            "43": 0.13389,
-            "44": 0.13482,
-            "45": 0.13461,
-            "46": 0.134,
-            "47": 0.13363,
-            "48": 0.13416,
-            "49": 0.13386,
-            "50": 0.13343
+            "1": 3.53163,
+            "2": 0.15986,
+            "3": 0.14465,
+            "4": 0.12865,
+            "5": 0.12866,
+            "6": 0.12781,
+            "7": 0.12812,
+            "8": 0.12748,
+            "9": 0.12785,
+            "10": 0.12793,
+            "11": 0.12738,
+            "12": 0.12687,
+            "13": 0.1279,
+            "14": 0.12794,
+            "15": 0.12688,
+            "16": 0.12657,
+            "17": 0.12699,
+            "18": 0.12571,
+            "19": 0.1268,
+            "20": 0.12768,
+            "21": 0.12608,
+            "22": 0.12935,
+            "23": 0.12731,
+            "24": 0.12623,
+            "25": 0.1265,
+            "26": 0.12691,
+            "27": 0.12618,
+            "28": 0.12745,
+            "29": 0.12715,
+            "30": 0.12731,
+            "31": 0.12861,
+            "32": 0.12807,
+            "33": 0.12763,
+            "34": 0.1264,
+            "35": 0.12674,
+            "36": 0.12628,
+            "37": 0.12628,
+            "38": 0.12709,
+            "39": 0.12704,
+            "40": 0.12669,
+            "41": 0.12716,
+            "42": 0.12677,
+            "43": 0.12874,
+            "44": 0.12646,
+            "45": 0.12761,
+            "46": 0.12827,
+            "47": 0.12648,
+            "48": 0.12642,
+            "49": 0.12646,
+            "50": 0.12636
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/golden_values_dev_dgx_a100.json
index 0d13ca5c55f..7077541e896 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/golden_values_dev_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/golden_values_dev_dgx_a100.json
@@ -1 +1,537 @@
-{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.83568, "5": 10.8567, "10": 10.81478, "15": 10.85098, "20": 10.85865, "25": 10.81343, "30": 10.74969, "35": 10.65857, "40": 10.50359, "45": 10.2738, "50": 10.25588, "55": 10.18782, "60": 9.80901, "65": 9.24475, "70": 9.91039, "75": 9.5812, "80": 9.54102, "85": 9.72633, "90": 9.90316, "95": 9.60258, "100": 9.49405}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 684610560.0, "5": 685659136.0, "10": 685659136.0, "15": 685659136.0, "20": 1043027456.0, "25": 1043027456.0, "30": 1043027456.0, "35": 1043027456.0, "40": 1043027456.0, "45": 1043027456.0, "50": 1043027456.0, "55": 1043027456.0, "60": 1043027456.0, "65": 1043027456.0, "70": 1043027456.0, "75": 1043027456.0, "80": 1043027456.0, "85": 1043027456.0, "90": 1043027456.0, "95": 1043027456.0, "100": 1043027456.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 3187304960.0, "5": 3187305472.0, "10": 3187305472.0, "15": 3187305472.0, "20": 3544935936.0, "25": 3544935936.0, "30": 3544935936.0, "35": 3544935936.0, "40": 3544935936.0, "45": 3544935936.0, "50": 3544935936.0, "55": 3544935936.0, "60": 3544935936.0, "65": 3544935936.0, "70": 3544935936.0, "75": 3544935936.0, "80": 3544935936.0, "85": 3544935936.0, "90": 3544935936.0, "95": 3544935936.0, "100": 3544935936.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 7.24348, "5": 0.12513, "10": 0.12256, "15": 0.12334, "20": 0.13378, "25": 0.14306, "30": 0.13313, "35": 0.13322, "40": 0.13261, "45": 0.13265, "50": 0.13289, "55": 0.13101, "60": 0.13018, "65": 0.13122, "70": 0.12989, "75": 0.13081, "80": 0.13089, "85": 0.13011, "90": 0.1304, "95": 0.13232, "100": 0.13063}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": "nan", "5": "nan", "10": "nan", "15": "nan", "20": 1953.0, "25": 1818.0, "30": 2298.0, "35": 2083.0, "40": 2287.0, "45": 2243.0, "50": 2426.0, "55": 2440.0, "60": 2493.0, "65": 2411.0, "70": 3119.0, "75": 2884.0, "80": 3549.0, "85": 3721.0, "90": 3452.0, "95": 3340.0, "100": 3338.0}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.83568,
+            "2": 10.83266,
+            "3": 10.83151,
+            "4": 10.80343,
+            "5": 10.8567,
+            "6": 10.86778,
+            "7": 10.84836,
+            "8": 10.84624,
+            "9": 10.85924,
+            "10": 10.81478,
+            "11": 10.89821,
+            "12": 10.88433,
+            "13": 10.88963,
+            "14": 10.90075,
+            "15": 10.85098,
+            "16": 10.86603,
+            "17": 10.85455,
+            "18": 10.88507,
+            "19": 10.8773,
+            "20": 10.85865,
+            "21": 10.85654,
+            "22": 10.79685,
+            "23": 10.88724,
+            "24": 10.82649,
+            "25": 10.81343,
+            "26": 10.82705,
+            "27": 10.84612,
+            "28": 10.84227,
+            "29": 10.85329,
+            "30": 10.74969,
+            "31": 10.63041,
+            "32": 10.79004,
+            "33": 10.77234,
+            "34": 10.65722,
+            "35": 10.65857,
+            "36": 10.61583,
+            "37": 10.67536,
+            "38": 10.58101,
+            "39": 10.69083,
+            "40": 10.50359,
+            "41": 10.52777,
+            "42": 10.55371,
+            "43": 10.28636,
+            "44": 10.36369,
+            "45": 10.27381,
+            "46": 10.24567,
+            "47": 10.45103,
+            "48": 10.23707,
+            "49": 9.99555,
+            "50": 10.25589,
+            "51": 10.2013,
+            "52": 10.10855,
+            "53": 10.34609,
+            "54": 10.24857,
+            "55": 10.18782,
+            "56": 9.95521,
+            "57": 9.81221,
+            "58": 10.10875,
+            "59": 9.8863,
+            "60": 9.80901,
+            "61": 9.94824,
+            "62": 10.1999,
+            "63": 9.6443,
+            "64": 9.9951,
+            "65": 9.24475,
+            "66": 9.90917,
+            "67": 9.59735,
+            "68": 9.97285,
+            "69": 9.96333,
+            "70": 9.91038,
+            "71": 9.78596,
+            "72": 9.77264,
+            "73": 9.6618,
+            "74": 9.16289,
+            "75": 9.58121,
+            "76": 9.26138,
+            "77": 10.17614,
+            "78": 9.85644,
+            "79": 9.50644,
+            "80": 9.54103,
+            "81": 9.61313,
+            "82": 9.80668,
+            "83": 9.44696,
+            "84": 9.52782,
+            "85": 9.72633,
+            "86": 9.19099,
+            "87": 9.68736,
+            "88": 9.85216,
+            "89": 9.71335,
+            "90": 9.90316,
+            "91": 9.46063,
+            "92": 9.46058,
+            "93": 9.19418,
+            "94": 8.93434,
+            "95": 9.60258,
+            "96": 9.61852,
+            "97": 9.39595,
+            "98": 9.76012,
+            "99": 8.98669,
+            "100": 9.49406
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 684610560.0,
+            "2": 685659136.0,
+            "3": 685659136.0,
+            "4": 685659136.0,
+            "5": 685659136.0,
+            "6": 685659136.0,
+            "7": 685659136.0,
+            "8": 685659136.0,
+            "9": 685659136.0,
+            "10": 685659136.0,
+            "11": 685659136.0,
+            "12": 685659136.0,
+            "13": 685659136.0,
+            "14": 685659136.0,
+            "15": 685659136.0,
+            "16": 685659136.0,
+            "17": 1043027456.0,
+            "18": 1043027456.0,
+            "19": 1043027456.0,
+            "20": 1043027456.0,
+            "21": 1043027456.0,
+            "22": 1043027456.0,
+            "23": 1043027456.0,
+            "24": 1043027456.0,
+            "25": 1043027456.0,
+            "26": 1043027456.0,
+            "27": 1043027456.0,
+            "28": 1043027456.0,
+            "29": 1043027456.0,
+            "30": 1043027456.0,
+            "31": 1043027456.0,
+            "32": 1043027456.0,
+            "33": 1043027456.0,
+            "34": 1043027456.0,
+            "35": 1043027456.0,
+            "36": 1043027456.0,
+            "37": 1043027456.0,
+            "38": 1043027456.0,
+            "39": 1043027456.0,
+            "40": 1043027456.0,
+            "41": 1043027456.0,
+            "42": 1043027456.0,
+            "43": 1043027456.0,
+            "44": 1043027456.0,
+            "45": 1043027456.0,
+            "46": 1043027456.0,
+            "47": 1043027456.0,
+            "48": 1043027456.0,
+            "49": 1043027456.0,
+            "50": 1043027456.0,
+            "51": 1043027456.0,
+            "52": 1043027456.0,
+            "53": 1043027456.0,
+            "54": 1043027456.0,
+            "55": 1043027456.0,
+            "56": 1043027456.0,
+            "57": 1043027456.0,
+            "58": 1043027456.0,
+            "59": 1043027456.0,
+            "60": 1043027456.0,
+            "61": 1043027456.0,
+            "62": 1043027456.0,
+            "63": 1043027456.0,
+            "64": 1043027456.0,
+            "65": 1043027456.0,
+            "66": 1043027456.0,
+            "67": 1043027456.0,
+            "68": 1043027456.0,
+            "69": 1043027456.0,
+            "70": 1043027456.0,
+            "71": 1043027456.0,
+            "72": 1043027456.0,
+            "73": 1043027456.0,
+            "74": 1043027456.0,
+            "75": 1043027456.0,
+            "76": 1043027456.0,
+            "77": 1043027456.0,
+            "78": 1043027456.0,
+            "79": 1043027456.0,
+            "80": 1043027456.0,
+            "81": 1043027456.0,
+            "82": 1043027456.0,
+            "83": 1043027456.0,
+            "84": 1043027456.0,
+            "85": 1043027456.0,
+            "86": 1043027456.0,
+            "87": 1043027456.0,
+            "88": 1043027456.0,
+            "89": 1043027456.0,
+            "90": 1043027456.0,
+            "91": 1043027456.0,
+            "92": 1043027456.0,
+            "93": 1043027456.0,
+            "94": 1043027456.0,
+            "95": 1043027456.0,
+            "96": 1043027456.0,
+            "97": 1043027456.0,
+            "98": 1043027456.0,
+            "99": 1043027456.0,
+            "100": 1043027456.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 3187304960.0,
+            "2": 3187305472.0,
+            "3": 3187305472.0,
+            "4": 3187305472.0,
+            "5": 3187305472.0,
+            "6": 3187305472.0,
+            "7": 3187305472.0,
+            "8": 3187305472.0,
+            "9": 3187305472.0,
+            "10": 3187305472.0,
+            "11": 3187305472.0,
+            "12": 3187305472.0,
+            "13": 3187305472.0,
+            "14": 3187305472.0,
+            "15": 3187305472.0,
+            "16": 3187305472.0,
+            "17": 3187305472.0,
+            "18": 3544935936.0,
+            "19": 3544935936.0,
+            "20": 3544935936.0,
+            "21": 3544935936.0,
+            "22": 3544935936.0,
+            "23": 3544935936.0,
+            "24": 3544935936.0,
+            "25": 3544935936.0,
+            "26": 3544935936.0,
+            "27": 3544935936.0,
+            "28": 3544935936.0,
+            "29": 3544935936.0,
+            "30": 3544935936.0,
+            "31": 3544935936.0,
+            "32": 3544935936.0,
+            "33": 3544935936.0,
+            "34": 3544935936.0,
+            "35": 3544935936.0,
+            "36": 3544935936.0,
+            "37": 3544935936.0,
+            "38": 3544935936.0,
+            "39": 3544935936.0,
+            "40": 3544935936.0,
+            "41": 3544935936.0,
+            "42": 3544935936.0,
+            "43": 3544935936.0,
+            "44": 3544935936.0,
+            "45": 3544935936.0,
+            "46": 3544935936.0,
+            "47": 3544935936.0,
+            "48": 3544935936.0,
+            "49": 3544935936.0,
+            "50": 3544935936.0,
+            "51": 3544935936.0,
+            "52": 3544935936.0,
+            "53": 3544935936.0,
+            "54": 3544935936.0,
+            "55": 3544935936.0,
+            "56": 3544935936.0,
+            "57": 3544935936.0,
+            "58": 3544935936.0,
+            "59": 3544935936.0,
+            "60": 3544935936.0,
+            "61": 3544935936.0,
+            "62": 3544935936.0,
+            "63": 3544935936.0,
+            "64": 3544935936.0,
+            "65": 3544935936.0,
+            "66": 3544935936.0,
+            "67": 3544935936.0,
+            "68": 3544935936.0,
+            "69": 3544935936.0,
+            "70": 3544935936.0,
+            "71": 3544935936.0,
+            "72": 3544935936.0,
+            "73": 3544935936.0,
+            "74": 3544935936.0,
+            "75": 3544935936.0,
+            "76": 3544935936.0,
+            "77": 3544935936.0,
+            "78": 3544935936.0,
+            "79": 3544935936.0,
+            "80": 3544935936.0,
+            "81": 3544935936.0,
+            "82": 3544935936.0,
+            "83": 3544935936.0,
+            "84": 3544935936.0,
+            "85": 3544935936.0,
+            "86": 3544935936.0,
+            "87": 3544935936.0,
+            "88": 3544935936.0,
+            "89": 3544935936.0,
+            "90": 3544935936.0,
+            "91": 3544935936.0,
+            "92": 3544935936.0,
+            "93": 3544935936.0,
+            "94": 3544935936.0,
+            "95": 3544935936.0,
+            "96": 3544935936.0,
+            "97": 3544935936.0,
+            "98": 3544935936.0,
+            "99": 3544935936.0,
+            "100": 3544935936.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 3.54415,
+            "2": 0.13377,
+            "3": 0.12455,
+            "4": 0.10264,
+            "5": 0.10219,
+            "6": 0.10272,
+            "7": 0.10298,
+            "8": 0.10295,
+            "9": 0.10258,
+            "10": 0.10337,
+            "11": 0.10271,
+            "12": 0.10191,
+            "13": 0.10215,
+            "14": 0.10241,
+            "15": 0.10208,
+            "16": 0.10177,
+            "17": 0.15691,
+            "18": 0.11817,
+            "19": 0.10983,
+            "20": 0.10994,
+            "21": 0.11033,
+            "22": 0.10162,
+            "23": 0.11031,
+            "24": 0.11013,
+            "25": 0.11053,
+            "26": 0.11093,
+            "27": 0.10984,
+            "28": 0.10992,
+            "29": 0.10976,
+            "30": 0.11044,
+            "31": 0.11049,
+            "32": 0.1109,
+            "33": 0.11229,
+            "34": 0.11176,
+            "35": 0.11192,
+            "36": 0.1118,
+            "37": 0.11187,
+            "38": 0.11171,
+            "39": 0.1119,
+            "40": 0.11109,
+            "41": 0.11066,
+            "42": 0.11036,
+            "43": 0.11014,
+            "44": 0.11085,
+            "45": 0.11065,
+            "46": 0.11031,
+            "47": 0.11096,
+            "48": 0.11193,
+            "49": 0.11004,
+            "50": 0.11026,
+            "51": 0.12208,
+            "52": 0.11528,
+            "53": 0.11393,
+            "54": 0.11467,
+            "55": 0.1144,
+            "56": 0.11475,
+            "57": 0.1155,
+            "58": 0.11437,
+            "59": 0.11509,
+            "60": 0.11581,
+            "61": 0.11462,
+            "62": 0.11503,
+            "63": 0.1147,
+            "64": 0.11384,
+            "65": 0.1139,
+            "66": 0.11371,
+            "67": 0.11448,
+            "68": 0.11386,
+            "69": 0.11391,
+            "70": 0.11448,
+            "71": 0.11388,
+            "72": 0.1142,
+            "73": 0.11413,
+            "74": 0.11463,
+            "75": 0.11394,
+            "76": 0.11427,
+            "77": 0.11359,
+            "78": 0.11462,
+            "79": 0.11355,
+            "80": 0.11396,
+            "81": 0.11373,
+            "82": 0.11509,
+            "83": 0.11377,
+            "84": 0.11466,
+            "85": 0.1144,
+            "86": 0.11501,
+            "87": 0.11412,
+            "88": 0.11353,
+            "89": 0.1148,
+            "90": 0.1137,
+            "91": 0.11378,
+            "92": 0.12007,
+            "93": 0.1204,
+            "94": 0.11454,
+            "95": 0.11432,
+            "96": 0.11436,
+            "97": 0.11405,
+            "98": 0.11395,
+            "99": 0.11405,
+            "100": 0.11374
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": 2249.0,
+            "18": 2165.0,
+            "19": 2362.0,
+            "20": 1953.0,
+            "21": 1898.0,
+            "22": "nan",
+            "23": 2371.0,
+            "24": 1984.0,
+            "25": 1818.0,
+            "26": 1980.0,
+            "27": 2078.0,
+            "28": 2467.0,
+            "29": 2395.0,
+            "30": 2298.0,
+            "31": 1682.0,
+            "32": 2236.0,
+            "33": 2192.0,
+            "34": 1800.0,
+            "35": 2083.0,
+            "36": 2139.0,
+            "37": 2498.0,
+            "38": 2218.0,
+            "39": 2642.0,
+            "40": 2287.0,
+            "41": 2344.0,
+            "42": 2340.0,
+            "43": 2130.0,
+            "44": 2069.0,
+            "45": 2188.0,
+            "46": 1932.0,
+            "47": 2670.0,
+            "48": 2471.0,
+            "49": 1891.0,
+            "50": 2416.0,
+            "51": 2321.0,
+            "52": 2363.0,
+            "53": 2925.0,
+            "54": 2486.0,
+            "55": 2408.0,
+            "56": 2298.0,
+            "57": 2286.0,
+            "58": 2584.0,
+            "59": 2358.0,
+            "60": 2487.0,
+            "61": 2791.0,
+            "62": 2751.0,
+            "63": 2385.0,
+            "64": 2791.0,
+            "65": 2372.0,
+            "66": 2970.0,
+            "67": 2557.0,
+            "68": 2857.0,
+            "69": 2699.0,
+            "70": 3035.0,
+            "71": 2940.0,
+            "72": 2315.0,
+            "73": 2968.0,
+            "74": 2205.0,
+            "75": 2811.0,
+            "76": 2969.0,
+            "77": 3296.0,
+            "78": 3578.0,
+            "79": 3594.0,
+            "80": 3509.0,
+            "81": 3698.0,
+            "82": 3355.0,
+            "83": 3205.0,
+            "84": 3285.0,
+            "85": 3791.0,
+            "86": 3303.0,
+            "87": 3934.0,
+            "88": 3130.0,
+            "89": 3809.0,
+            "90": 3388.0,
+            "91": 2618.0,
+            "92": 3412.0,
+            "93": 3072.0,
+            "94": 3731.0,
+            "95": 3357.0,
+            "96": 3852.0,
+            "97": 3528.0,
+            "98": 3616.0,
+            "99": 3449.0,
+            "100": 3284.0
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/golden_values_dev_dgx_a100_2nd.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/golden_values_dev_dgx_a100_2nd.json
new file mode 100644
index 00000000000..562afadc7f9
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/golden_values_dev_dgx_a100_2nd.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 10.2013,
+            "52": 10.10855,
+            "53": 10.34609,
+            "54": 10.24857,
+            "55": 10.18782,
+            "56": 9.95521,
+            "57": 9.81221,
+            "58": 10.10875,
+            "59": 9.8863,
+            "60": 9.80901,
+            "61": 9.94824,
+            "62": 10.1999,
+            "63": 9.6443,
+            "64": 9.9951,
+            "65": 9.24475,
+            "66": 9.90917,
+            "67": 9.59735,
+            "68": 9.97285,
+            "69": 9.96333,
+            "70": 9.91038,
+            "71": 9.78596,
+            "72": 9.77264,
+            "73": 9.6618,
+            "74": 9.16289,
+            "75": 9.58121,
+            "76": 9.26138,
+            "77": 10.17614,
+            "78": 9.85644,
+            "79": 9.50644,
+            "80": 9.54103,
+            "81": 9.61313,
+            "82": 9.80668,
+            "83": 9.44696,
+            "84": 9.52782,
+            "85": 9.72633,
+            "86": 9.19099,
+            "87": 9.68736,
+            "88": 9.85216,
+            "89": 9.71335,
+            "90": 9.90316,
+            "91": 9.46063,
+            "92": 9.46058,
+            "93": 9.19418,
+            "94": 8.93434,
+            "95": 9.60258,
+            "96": 9.61852,
+            "97": 9.39595,
+            "98": 9.76012,
+            "99": 8.98669,
+            "100": 9.49406
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 2321.0,
+            "52": 2363.0,
+            "53": 2925.0,
+            "54": 2486.0,
+            "55": 2408.0,
+            "56": 2298.0,
+            "57": 2286.0,
+            "58": 2584.0,
+            "59": 2358.0,
+            "60": 2487.0,
+            "61": 2791.0,
+            "62": 2751.0,
+            "63": 2385.0,
+            "64": 2791.0,
+            "65": 2372.0,
+            "66": 2970.0,
+            "67": 2557.0,
+            "68": 2857.0,
+            "69": 2699.0,
+            "70": 3035.0,
+            "71": 2940.0,
+            "72": 2315.0,
+            "73": 2968.0,
+            "74": 2205.0,
+            "75": 2811.0,
+            "76": 2969.0,
+            "77": 3296.0,
+            "78": 3578.0,
+            "79": 3594.0,
+            "80": 3509.0,
+            "81": 3698.0,
+            "82": 3355.0,
+            "83": 3205.0,
+            "84": 3285.0,
+            "85": 3791.0,
+            "86": 3303.0,
+            "87": 3934.0,
+            "88": 3130.0,
+            "89": 3809.0,
+            "90": 3388.0,
+            "91": 2618.0,
+            "92": 3412.0,
+            "93": 3072.0,
+            "94": 3731.0,
+            "95": 3357.0,
+            "96": 3852.0,
+            "97": 3528.0,
+            "98": 3616.0,
+            "99": 3449.0,
+            "100": 3284.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 1044076032.0,
+            "52": 1045124608.0,
+            "53": 1045124608.0,
+            "54": 1045124608.0,
+            "55": 1045124608.0,
+            "56": 1045124608.0,
+            "57": 1045124608.0,
+            "58": 1045124608.0,
+            "59": 1045124608.0,
+            "60": 1045124608.0,
+            "61": 1045124608.0,
+            "62": 1045124608.0,
+            "63": 1045124608.0,
+            "64": 1045124608.0,
+            "65": 1045124608.0,
+            "66": 1045124608.0,
+            "67": 1045124608.0,
+            "68": 1045124608.0,
+            "69": 1045124608.0,
+            "70": 1045124608.0,
+            "71": 1045124608.0,
+            "72": 1045124608.0,
+            "73": 1045124608.0,
+            "74": 1045124608.0,
+            "75": 1045124608.0,
+            "76": 1045124608.0,
+            "77": 1045124608.0,
+            "78": 1045124608.0,
+            "79": 1045124608.0,
+            "80": 1045124608.0,
+            "81": 1045124608.0,
+            "82": 1045124608.0,
+            "83": 1045124608.0,
+            "84": 1045124608.0,
+            "85": 1045124608.0,
+            "86": 1045124608.0,
+            "87": 1045124608.0,
+            "88": 1045124608.0,
+            "89": 1045124608.0,
+            "90": 1045124608.0,
+            "91": 1045124608.0,
+            "92": 1045124608.0,
+            "93": 1045124608.0,
+            "94": 1045124608.0,
+            "95": 1045124608.0,
+            "96": 1045124608.0,
+            "97": 1045124608.0,
+            "98": 1045124608.0,
+            "99": 1045124608.0,
+            "100": 1045124608.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 3546769920.0,
+            "52": 3546770944.0,
+            "53": 3546770944.0,
+            "54": 3546770944.0,
+            "55": 3546770944.0,
+            "56": 3546770944.0,
+            "57": 3546770944.0,
+            "58": 3546770944.0,
+            "59": 3546770944.0,
+            "60": 3546770944.0,
+            "61": 3546770944.0,
+            "62": 3546770944.0,
+            "63": 3546770944.0,
+            "64": 3546770944.0,
+            "65": 3546770944.0,
+            "66": 3546770944.0,
+            "67": 3546770944.0,
+            "68": 3546770944.0,
+            "69": 3546770944.0,
+            "70": 3546770944.0,
+            "71": 3546770944.0,
+            "72": 3546770944.0,
+            "73": 3546770944.0,
+            "74": 3546770944.0,
+            "75": 3546770944.0,
+            "76": 3546770944.0,
+            "77": 3546770944.0,
+            "78": 3546770944.0,
+            "79": 3546770944.0,
+            "80": 3546770944.0,
+            "81": 3546770944.0,
+            "82": 3546770944.0,
+            "83": 3546770944.0,
+            "84": 3546770944.0,
+            "85": 3546770944.0,
+            "86": 3546770944.0,
+            "87": 3546770944.0,
+            "88": 3546770944.0,
+            "89": 3546770944.0,
+            "90": 3546770944.0,
+            "91": 3546770944.0,
+            "92": 3546770944.0,
+            "93": 3546770944.0,
+            "94": 3546770944.0,
+            "95": 3546770944.0,
+            "96": 3546770944.0,
+            "97": 3546770944.0,
+            "98": 3546770944.0,
+            "99": 3546770944.0,
+            "100": 3546770944.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 3.6476,
+            "52": 0.13199,
+            "53": 0.11408,
+            "54": 0.11307,
+            "55": 0.11409,
+            "56": 0.11247,
+            "57": 0.1156,
+            "58": 0.1145,
+            "59": 0.11417,
+            "60": 0.11341,
+            "61": 0.11362,
+            "62": 0.11325,
+            "63": 0.11392,
+            "64": 0.11377,
+            "65": 0.1147,
+            "66": 0.11413,
+            "67": 0.11405,
+            "68": 0.11324,
+            "69": 0.11372,
+            "70": 0.11377,
+            "71": 0.11356,
+            "72": 0.11352,
+            "73": 0.11403,
+            "74": 0.11362,
+            "75": 0.11349,
+            "76": 0.11421,
+            "77": 0.11375,
+            "78": 0.11412,
+            "79": 0.11355,
+            "80": 0.11386,
+            "81": 0.11419,
+            "82": 0.11416,
+            "83": 0.11393,
+            "84": 0.11344,
+            "85": 0.11365,
+            "86": 0.11411,
+            "87": 0.1142,
+            "88": 0.11406,
+            "89": 0.11433,
+            "90": 0.11364,
+            "91": 0.11411,
+            "92": 0.11433,
+            "93": 0.11448,
+            "94": 0.11375,
+            "95": 0.11569,
+            "96": 0.11395,
+            "97": 0.11375,
+            "98": 0.11361,
+            "99": 0.11378,
+            "100": 0.11406
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/golden_values_dev_dgx_gb200.json
new file mode 100644
index 00000000000..94a972ee670
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/golden_values_dev_dgx_gb200.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.76988,
+            "2": 10.77993,
+            "3": 10.77871,
+            "4": 10.73971,
+            "5": 10.81287,
+            "6": 10.82056,
+            "7": 10.79701,
+            "8": 10.78537,
+            "9": 10.79592,
+            "10": 10.72505,
+            "11": 10.86085,
+            "12": 10.82094,
+            "13": 10.83653,
+            "14": 10.85836,
+            "15": 10.80259,
+            "16": 10.80847,
+            "17": 10.77612,
+            "18": 10.81818,
+            "19": 10.8171,
+            "20": 10.78975,
+            "21": 10.79586,
+            "22": 10.71325,
+            "23": 10.84137,
+            "24": 10.76141,
+            "25": 10.73556,
+            "26": 10.76141,
+            "27": 10.78766,
+            "28": 10.79283,
+            "29": 10.81938,
+            "30": 10.68037,
+            "31": 10.5422,
+            "32": 10.72471,
+            "33": 10.71833,
+            "34": 10.58577,
+            "35": 10.5941,
+            "36": 10.54254,
+            "37": 10.62391,
+            "38": 10.50727,
+            "39": 10.65,
+            "40": 10.42314,
+            "41": 10.45946,
+            "42": 10.50017,
+            "43": 10.20049,
+            "44": 10.28686,
+            "45": 10.1806,
+            "46": 10.168,
+            "47": 10.40733,
+            "48": 10.16626,
+            "49": 9.90217,
+            "50": 10.18179,
+            "51": 10.13864,
+            "52": 10.03803,
+            "53": 10.2953,
+            "54": 10.19383,
+            "55": 10.14359,
+            "56": 9.8908,
+            "57": 9.73702,
+            "58": 10.05022,
+            "59": 9.83828,
+            "60": 9.74551,
+            "61": 9.90679,
+            "62": 10.16216,
+            "63": 9.59842,
+            "64": 9.95194,
+            "65": 9.18904,
+            "66": 9.87164,
+            "67": 9.56047,
+            "68": 9.94233,
+            "69": 9.94285,
+            "70": 9.8854,
+            "71": 9.77852,
+            "72": 9.73861,
+            "73": 9.63511,
+            "74": 9.10351,
+            "75": 9.55716,
+            "76": 9.23197,
+            "77": 10.16792,
+            "78": 9.83943,
+            "79": 9.49691,
+            "80": 9.52327,
+            "81": 9.60219,
+            "82": 9.8054,
+            "83": 9.43936,
+            "84": 9.51953,
+            "85": 9.72086,
+            "86": 9.18604,
+            "87": 9.68762,
+            "88": 9.84868,
+            "89": 9.70441,
+            "90": 9.91638,
+            "91": 9.45088,
+            "92": 9.45495,
+            "93": 9.1952,
+            "94": 8.93245,
+            "95": 9.61119,
+            "96": 9.62586,
+            "97": 9.39727,
+            "98": 9.76341,
+            "99": 8.99611,
+            "100": 9.50318
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 685659136.0,
+            "2": 685659136.0,
+            "3": 685659136.0,
+            "4": 685659136.0,
+            "5": 685659136.0,
+            "6": 685659136.0,
+            "7": 685659136.0,
+            "8": 685659136.0,
+            "9": 685659136.0,
+            "10": 685659136.0,
+            "11": 685659136.0,
+            "12": 685659136.0,
+            "13": 685659136.0,
+            "14": 685659136.0,
+            "15": 685659136.0,
+            "16": 685659136.0,
+            "17": 1043027456.0,
+            "18": 1043027456.0,
+            "19": 1043027456.0,
+            "20": 1043027456.0,
+            "21": 1043027456.0,
+            "22": 1043027456.0,
+            "23": 1043027456.0,
+            "24": 1043027456.0,
+            "25": 1043027456.0,
+            "26": 1043027456.0,
+            "27": 1043027456.0,
+            "28": 1043027456.0,
+            "29": 1043027456.0,
+            "30": 1043027456.0,
+            "31": 1043027456.0,
+            "32": 1043027456.0,
+            "33": 1043027456.0,
+            "34": 1043027456.0,
+            "35": 1043027456.0,
+            "36": 1043027456.0,
+            "37": 1043027456.0,
+            "38": 1043027456.0,
+            "39": 1043027456.0,
+            "40": 1043027456.0,
+            "41": 1043027456.0,
+            "42": 1043027456.0,
+            "43": 1043027456.0,
+            "44": 1043027456.0,
+            "45": 1043027456.0,
+            "46": 1043027456.0,
+            "47": 1043027456.0,
+            "48": 1043027456.0,
+            "49": 1043027456.0,
+            "50": 1043027456.0,
+            "51": 1043027456.0,
+            "52": 1043027456.0,
+            "53": 1043027456.0,
+            "54": 1043027456.0,
+            "55": 1043027456.0,
+            "56": 1043027456.0,
+            "57": 1043027456.0,
+            "58": 1043027456.0,
+            "59": 1043027456.0,
+            "60": 1043027456.0,
+            "61": 1043027456.0,
+            "62": 1043027456.0,
+            "63": 1043027456.0,
+            "64": 1043027456.0,
+            "65": 1043027456.0,
+            "66": 1043027456.0,
+            "67": 1043027456.0,
+            "68": 1043027456.0,
+            "69": 1043027456.0,
+            "70": 1043027456.0,
+            "71": 1043027456.0,
+            "72": 1043027456.0,
+            "73": 1043027456.0,
+            "74": 1043027456.0,
+            "75": 1043027456.0,
+            "76": 1043027456.0,
+            "77": 1043027456.0,
+            "78": 1043027456.0,
+            "79": 1043027456.0,
+            "80": 1043027456.0,
+            "81": 1043027456.0,
+            "82": 1043027456.0,
+            "83": 1043027456.0,
+            "84": 1043027456.0,
+            "85": 1043027456.0,
+            "86": 1043027456.0,
+            "87": 1043027456.0,
+            "88": 1043027456.0,
+            "89": 1043027456.0,
+            "90": 1043027456.0,
+            "91": 1043027456.0,
+            "92": 1043027456.0,
+            "93": 1043027456.0,
+            "94": 1043027456.0,
+            "95": 1043027456.0,
+            "96": 1043027456.0,
+            "97": 1043027456.0,
+            "98": 1043027456.0,
+            "99": 1043027456.0,
+            "100": 1043027456.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 3187304960.0,
+            "2": 3187305472.0,
+            "3": 3187305472.0,
+            "4": 3187305472.0,
+            "5": 3187305472.0,
+            "6": 3187305472.0,
+            "7": 3187305472.0,
+            "8": 3187305472.0,
+            "9": 3187305472.0,
+            "10": 3187305472.0,
+            "11": 3187305472.0,
+            "12": 3187305472.0,
+            "13": 3187305472.0,
+            "14": 3187305472.0,
+            "15": 3187305472.0,
+            "16": 3187305472.0,
+            "17": 3187305472.0,
+            "18": 3544935936.0,
+            "19": 3544935936.0,
+            "20": 3544935936.0,
+            "21": 3544935936.0,
+            "22": 3544935936.0,
+            "23": 3544935936.0,
+            "24": 3544935936.0,
+            "25": 3544935936.0,
+            "26": 3544935936.0,
+            "27": 3544935936.0,
+            "28": 3544935936.0,
+            "29": 3544935936.0,
+            "30": 3544935936.0,
+            "31": 3544935936.0,
+            "32": 3544935936.0,
+            "33": 3544935936.0,
+            "34": 3544935936.0,
+            "35": 3544935936.0,
+            "36": 3544935936.0,
+            "37": 3544935936.0,
+            "38": 3544935936.0,
+            "39": 3544935936.0,
+            "40": 3544935936.0,
+            "41": 3544935936.0,
+            "42": 3544935936.0,
+            "43": 3544935936.0,
+            "44": 3544935936.0,
+            "45": 3544935936.0,
+            "46": 3544935936.0,
+            "47": 3544935936.0,
+            "48": 3544935936.0,
+            "49": 3544935936.0,
+            "50": 3544935936.0,
+            "51": 3544935936.0,
+            "52": 3544935936.0,
+            "53": 3544935936.0,
+            "54": 3544935936.0,
+            "55": 3544935936.0,
+            "56": 3544935936.0,
+            "57": 3544935936.0,
+            "58": 3544935936.0,
+            "59": 3544935936.0,
+            "60": 3544935936.0,
+            "61": 3544935936.0,
+            "62": 3544935936.0,
+            "63": 3544935936.0,
+            "64": 3544935936.0,
+            "65": 3544935936.0,
+            "66": 3544935936.0,
+            "67": 3544935936.0,
+            "68": 3544935936.0,
+            "69": 3544935936.0,
+            "70": 3544935936.0,
+            "71": 3544935936.0,
+            "72": 3544935936.0,
+            "73": 3544935936.0,
+            "74": 3544935936.0,
+            "75": 3544935936.0,
+            "76": 3544935936.0,
+            "77": 3544935936.0,
+            "78": 3544935936.0,
+            "79": 3544935936.0,
+            "80": 3544935936.0,
+            "81": 3544935936.0,
+            "82": 3544935936.0,
+            "83": 3544935936.0,
+            "84": 3544935936.0,
+            "85": 3544935936.0,
+            "86": 3544935936.0,
+            "87": 3544935936.0,
+            "88": 3544935936.0,
+            "89": 3544935936.0,
+            "90": 3544935936.0,
+            "91": 3544935936.0,
+            "92": 3544935936.0,
+            "93": 3544935936.0,
+            "94": 3544935936.0,
+            "95": 3544935936.0,
+            "96": 3544935936.0,
+            "97": 3544935936.0,
+            "98": 3544935936.0,
+            "99": 3544935936.0,
+            "100": 3544935936.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 6.71621,
+            "2": 0.15096,
+            "3": 0.1401,
+            "4": 0.12751,
+            "5": 0.12582,
+            "6": 0.12762,
+            "7": 0.29588,
+            "8": 0.12501,
+            "9": 0.12257,
+            "10": 0.1231,
+            "11": 0.12179,
+            "12": 0.12146,
+            "13": 0.1218,
+            "14": 0.12189,
+            "15": 0.11937,
+            "16": 0.11701,
+            "17": 0.16358,
+            "18": 0.1329,
+            "19": 0.12356,
+            "20": 0.1223,
+            "21": 0.11887,
+            "22": 0.10873,
+            "23": 0.11776,
+            "24": 0.11791,
+            "25": 0.11708,
+            "26": 0.11725,
+            "27": 0.12727,
+            "28": 0.2171,
+            "29": 0.1145,
+            "30": 0.11344,
+            "31": 0.11497,
+            "32": 0.11511,
+            "33": 0.1157,
+            "34": 0.11565,
+            "35": 0.11684,
+            "36": 0.11679,
+            "37": 0.11675,
+            "38": 0.11549,
+            "39": 0.3291,
+            "40": 0.4913,
+            "41": 0.12148,
+            "42": 0.11374,
+            "43": 0.11395,
+            "44": 0.11452,
+            "45": 0.11465,
+            "46": 0.11512,
+            "47": 0.11552,
+            "48": 0.11487,
+            "49": 0.11358,
+            "50": 0.11314,
+            "51": 0.14003,
+            "52": 0.11456,
+            "53": 0.11604,
+            "54": 0.11224,
+            "55": 0.12526,
+            "56": 0.11247,
+            "57": 0.11315,
+            "58": 0.11222,
+            "59": 0.11353,
+            "60": 0.1122,
+            "61": 0.11312,
+            "62": 0.11183,
+            "63": 0.1147,
+            "64": 0.11171,
+            "65": 0.11298,
+            "66": 0.11177,
+            "67": 0.11322,
+            "68": 0.11115,
+            "69": 0.11243,
+            "70": 0.11245,
+            "71": 0.1128,
+            "72": 0.1133,
+            "73": 0.11263,
+            "74": 0.11369,
+            "75": 0.11191,
+            "76": 0.11291,
+            "77": 0.11243,
+            "78": 0.11353,
+            "79": 0.1277,
+            "80": 0.11295,
+            "81": 0.11234,
+            "82": 0.1138,
+            "83": 0.11202,
+            "84": 0.11873,
+            "85": 0.11198,
+            "86": 0.11416,
+            "87": 0.11434,
+            "88": 0.11401,
+            "89": 0.11423,
+            "90": 0.11109,
+            "91": 0.11252,
+            "92": 0.11221,
+            "93": 0.11285,
+            "94": 0.11189,
+            "95": 0.11269,
+            "96": 0.12639,
+            "97": 0.12758,
+            "98": 0.12878,
+            "99": 0.1295,
+            "100": 0.15151
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": 2223.0,
+            "18": 2141.0,
+            "19": 2432.0,
+            "20": 1893.0,
+            "21": 1918.0,
+            "22": "nan",
+            "23": 2243.0,
+            "24": 1920.0,
+            "25": 1874.0,
+            "26": 1885.0,
+            "27": 2072.0,
+            "28": 2375.0,
+            "29": 2356.0,
+            "30": 2316.0,
+            "31": 1689.0,
+            "32": 2250.0,
+            "33": 2111.0,
+            "34": 1822.0,
+            "35": 1976.0,
+            "36": 2089.0,
+            "37": 2394.0,
+            "38": 2078.0,
+            "39": 2662.0,
+            "40": 2284.0,
+            "41": 2402.0,
+            "42": 2250.0,
+            "43": 2141.0,
+            "44": 2112.0,
+            "45": 2341.0,
+            "46": 2005.0,
+            "47": 2567.0,
+            "48": 2332.0,
+            "49": 1858.0,
+            "50": 2478.0,
+            "51": 2321.0,
+            "52": 2270.0,
+            "53": 2929.0,
+            "54": 2493.0,
+            "55": 2470.0,
+            "56": 2387.0,
+            "57": 2321.0,
+            "58": 2774.0,
+            "59": 2339.0,
+            "60": 2654.0,
+            "61": 2810.0,
+            "62": 2863.0,
+            "63": 2582.0,
+            "64": 2851.0,
+            "65": 2686.0,
+            "66": 2969.0,
+            "67": 2680.0,
+            "68": 2913.0,
+            "69": 2669.0,
+            "70": 2988.0,
+            "71": 2881.0,
+            "72": 2465.0,
+            "73": 3188.0,
+            "74": 2209.0,
+            "75": 2665.0,
+            "76": 3308.0,
+            "77": 3227.0,
+            "78": 3393.0,
+            "79": 3433.0,
+            "80": 3273.0,
+            "81": 3620.0,
+            "82": 3491.0,
+            "83": 3140.0,
+            "84": 3225.0,
+            "85": 3622.0,
+            "86": 3290.0,
+            "87": 4023.0,
+            "88": 3187.0,
+            "89": 3975.0,
+            "90": 3576.0,
+            "91": 2689.0,
+            "92": 3474.0,
+            "93": 3202.0,
+            "94": 3608.0,
+            "95": 3510.0,
+            "96": 3634.0,
+            "97": 3500.0,
+            "98": 3933.0,
+            "99": 3502.0,
+            "100": 3134.0
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/golden_values_lts_dgx_a100.json
index 094be8516a7..605b5aee03b 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/golden_values_lts_dgx_a100.json
@@ -48,13 +48,13 @@
             "42": 10.55371,
             "43": 10.28636,
             "44": 10.36369,
-            "45": 10.2738,
+            "45": 10.27381,
             "46": 10.24567,
             "47": 10.45103,
             "48": 10.23707,
             "49": 9.99555,
-            "50": 10.25588,
-            "51": 10.20129,
+            "50": 10.25589,
+            "51": 10.2013,
             "52": 10.10855,
             "53": 10.34609,
             "54": 10.24857,
@@ -66,26 +66,26 @@
             "60": 9.80901,
             "61": 9.94824,
             "62": 10.1999,
-            "63": 9.64431,
+            "63": 9.6443,
             "64": 9.9951,
             "65": 9.24475,
             "66": 9.90917,
             "67": 9.59735,
             "68": 9.97285,
-            "69": 9.96332,
-            "70": 9.91039,
+            "69": 9.96333,
+            "70": 9.91038,
             "71": 9.78596,
-            "72": 9.77263,
+            "72": 9.77264,
             "73": 9.6618,
             "74": 9.16289,
-            "75": 9.5812,
-            "76": 9.26137,
-            "77": 10.17615,
+            "75": 9.58121,
+            "76": 9.26138,
+            "77": 10.17614,
             "78": 9.85644,
             "79": 9.50644,
-            "80": 9.54102,
+            "80": 9.54103,
             "81": 9.61313,
-            "82": 9.80669,
+            "82": 9.80668,
             "83": 9.44696,
             "84": 9.52782,
             "85": 9.72633,
@@ -94,16 +94,16 @@
             "88": 9.85216,
             "89": 9.71335,
             "90": 9.90316,
-            "91": 9.46064,
-            "92": 9.46059,
+            "91": 9.46063,
+            "92": 9.46058,
             "93": 9.19418,
             "94": 8.93434,
             "95": 9.60258,
             "96": 9.61852,
-            "97": 9.39594,
+            "97": 9.39595,
             "98": 9.76012,
-            "99": 8.98668,
-            "100": 9.49405
+            "99": 8.98669,
+            "100": 9.49406
         }
     },
     "mem-allocated-bytes": {
@@ -325,106 +325,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 7.5468,
-            "2": 0.1514,
-            "3": 0.11679,
-            "4": 0.11442,
-            "5": 0.11418,
-            "6": 0.1134,
-            "7": 0.11341,
-            "8": 0.11355,
-            "9": 0.11332,
-            "10": 0.11336,
-            "11": 0.11414,
-            "12": 0.11322,
-            "13": 0.11309,
-            "14": 0.11355,
-            "15": 0.11296,
-            "16": 0.11311,
-            "17": 0.19183,
-            "18": 0.13278,
-            "19": 0.12368,
-            "20": 0.1244,
-            "21": 0.12354,
-            "22": 0.11533,
-            "23": 0.12281,
-            "24": 0.12403,
-            "25": 0.12406,
-            "26": 0.12339,
-            "27": 0.12448,
-            "28": 0.12265,
-            "29": 0.1229,
-            "30": 0.1231,
-            "31": 0.12325,
-            "32": 0.12261,
-            "33": 0.12283,
-            "34": 0.12275,
-            "35": 0.12311,
-            "36": 0.12273,
-            "37": 0.12367,
-            "38": 0.12288,
-            "39": 0.12297,
-            "40": 0.12264,
-            "41": 0.1206,
-            "42": 0.12099,
-            "43": 0.12152,
-            "44": 0.12016,
-            "45": 0.12042,
-            "46": 0.12101,
-            "47": 0.12019,
-            "48": 0.12057,
-            "49": 0.12054,
-            "50": 0.12043,
-            "51": 0.12804,
-            "52": 0.12188,
-            "53": 0.12082,
-            "54": 0.12046,
-            "55": 0.12243,
-            "56": 0.12099,
-            "57": 0.12158,
-            "58": 0.12118,
-            "59": 0.12094,
-            "60": 0.12085,
-            "61": 0.12158,
-            "62": 0.12129,
-            "63": 0.12239,
-            "64": 0.12127,
-            "65": 0.12091,
-            "66": 0.12161,
-            "67": 0.12115,
-            "68": 0.12107,
-            "69": 0.12194,
-            "70": 0.12208,
-            "71": 0.12158,
-            "72": 0.12253,
-            "73": 0.12311,
-            "74": 0.12157,
-            "75": 0.12129,
-            "76": 0.12243,
-            "77": 0.1209,
-            "78": 0.12118,
-            "79": 0.12236,
-            "80": 0.12456,
-            "81": 0.12169,
-            "82": 0.12201,
-            "83": 0.12239,
-            "84": 0.12311,
-            "85": 0.12253,
-            "86": 0.12237,
-            "87": 0.12156,
-            "88": 0.12306,
-            "89": 0.12961,
-            "90": 0.12349,
-            "91": 0.12189,
-            "92": 0.12121,
-            "93": 0.12178,
-            "94": 0.12615,
-            "95": 0.12189,
-            "96": 0.12145,
-            "97": 0.12112,
-            "98": 0.12242,
-            "99": 0.12142,
-            "100": 0.12094
+            "1": 3.95366,
+            "2": 0.14871,
+            "3": 0.12763,
+            "4": 0.11208,
+            "5": 0.11074,
+            "6": 0.11007,
+            "7": 0.11082,
+            "8": 0.11022,
+            "9": 0.11047,
+            "10": 0.11064,
+            "11": 0.11173,
+            "12": 0.11146,
+            "13": 0.1105,
+            "14": 0.10955,
+            "15": 0.10949,
+            "16": 0.10939,
+            "17": 0.18086,
+            "18": 0.12719,
+            "19": 0.11742,
+            "20": 0.11731,
+            "21": 0.11723,
+            "22": 0.1099,
+            "23": 0.11923,
+            "24": 0.12129,
+            "25": 0.12214,
+            "26": 0.12333,
+            "27": 0.11905,
+            "28": 0.11908,
+            "29": 0.12058,
+            "30": 0.11948,
+            "31": 0.1201,
+            "32": 0.12035,
+            "33": 0.11991,
+            "34": 0.12012,
+            "35": 0.12013,
+            "36": 0.12016,
+            "37": 0.11941,
+            "38": 0.1201,
+            "39": 0.1201,
+            "40": 0.11958,
+            "41": 0.12136,
+            "42": 0.11979,
+            "43": 0.11986,
+            "44": 0.12054,
+            "45": 0.12036,
+            "46": 0.12029,
+            "47": 0.12065,
+            "48": 0.12009,
+            "49": 0.1203,
+            "50": 0.11976,
+            "51": 0.12632,
+            "52": 0.11795,
+            "53": 0.11564,
+            "54": 0.11608,
+            "55": 0.11612,
+            "56": 0.11603,
+            "57": 0.11792,
+            "58": 0.11634,
+            "59": 0.11727,
+            "60": 0.1161,
+            "61": 0.11695,
+            "62": 0.13389,
+            "63": 0.11729,
+            "64": 0.11589,
+            "65": 0.11724,
+            "66": 0.11796,
+            "67": 0.11759,
+            "68": 0.1183,
+            "69": 0.11749,
+            "70": 0.1181,
+            "71": 0.11707,
+            "72": 0.11611,
+            "73": 0.11701,
+            "74": 0.11673,
+            "75": 0.11595,
+            "76": 0.11658,
+            "77": 0.1163,
+            "78": 0.11681,
+            "79": 0.11598,
+            "80": 0.11662,
+            "81": 0.11633,
+            "82": 0.11636,
+            "83": 0.11597,
+            "84": 0.11547,
+            "85": 0.11591,
+            "86": 0.11618,
+            "87": 0.1157,
+            "88": 0.11607,
+            "89": 0.11626,
+            "90": 0.115,
+            "91": 0.11601,
+            "92": 0.11575,
+            "93": 0.11688,
+            "94": 0.11552,
+            "95": 0.11702,
+            "96": 0.11567,
+            "97": 0.1166,
+            "98": 0.11652,
+            "99": 0.11578,
+            "100": 0.11584
         }
     },
     "num-zeros": {
@@ -473,65 +473,65 @@
             "39": 2642.0,
             "40": 2287.0,
             "41": 2344.0,
-            "42": 2304.0,
-            "43": 2098.0,
-            "44": 2107.0,
-            "45": 2243.0,
-            "46": 1960.0,
-            "47": 2729.0,
-            "48": 2418.0,
-            "49": 1910.0,
-            "50": 2426.0,
-            "51": 2335.0,
-            "52": 2407.0,
-            "53": 2888.0,
-            "54": 2477.0,
-            "55": 2440.0,
-            "56": 2286.0,
-            "57": 2340.0,
-            "58": 2652.0,
-            "59": 2321.0,
-            "60": 2493.0,
-            "61": 2812.0,
-            "62": 2711.0,
-            "63": 2367.0,
-            "64": 2802.0,
-            "65": 2411.0,
-            "66": 2869.0,
-            "67": 2577.0,
-            "68": 2859.0,
-            "69": 2524.0,
-            "70": 3119.0,
-            "71": 2926.0,
-            "72": 2251.0,
-            "73": 2929.0,
-            "74": 2110.0,
-            "75": 2884.0,
-            "76": 2992.0,
-            "77": 3380.0,
-            "78": 3484.0,
-            "79": 3533.0,
-            "80": 3549.0,
-            "81": 3616.0,
-            "82": 3347.0,
-            "83": 3124.0,
-            "84": 3276.0,
-            "85": 3721.0,
-            "86": 3207.0,
-            "87": 3941.0,
-            "88": 3250.0,
-            "89": 3863.0,
-            "90": 3452.0,
-            "91": 2630.0,
-            "92": 3431.0,
-            "93": 3123.0,
-            "94": 3671.0,
-            "95": 3340.0,
-            "96": 3874.0,
-            "97": 3519.0,
-            "98": 3727.0,
-            "99": 3447.0,
-            "100": 3338.0
+            "42": 2340.0,
+            "43": 2130.0,
+            "44": 2069.0,
+            "45": 2188.0,
+            "46": 1932.0,
+            "47": 2670.0,
+            "48": 2471.0,
+            "49": 1891.0,
+            "50": 2416.0,
+            "51": 2321.0,
+            "52": 2363.0,
+            "53": 2925.0,
+            "54": 2486.0,
+            "55": 2408.0,
+            "56": 2298.0,
+            "57": 2286.0,
+            "58": 2584.0,
+            "59": 2358.0,
+            "60": 2487.0,
+            "61": 2791.0,
+            "62": 2751.0,
+            "63": 2385.0,
+            "64": 2791.0,
+            "65": 2372.0,
+            "66": 2970.0,
+            "67": 2557.0,
+            "68": 2857.0,
+            "69": 2699.0,
+            "70": 3035.0,
+            "71": 2940.0,
+            "72": 2315.0,
+            "73": 2968.0,
+            "74": 2205.0,
+            "75": 2811.0,
+            "76": 2969.0,
+            "77": 3296.0,
+            "78": 3578.0,
+            "79": 3594.0,
+            "80": 3509.0,
+            "81": 3698.0,
+            "82": 3355.0,
+            "83": 3205.0,
+            "84": 3285.0,
+            "85": 3791.0,
+            "86": 3303.0,
+            "87": 3934.0,
+            "88": 3130.0,
+            "89": 3809.0,
+            "90": 3388.0,
+            "91": 2618.0,
+            "92": 3412.0,
+            "93": 3072.0,
+            "94": 3731.0,
+            "95": 3357.0,
+            "96": 3852.0,
+            "97": 3528.0,
+            "98": 3616.0,
+            "99": 3449.0,
+            "100": 3284.0
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/golden_values_lts_dgx_a100_2nd.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/golden_values_lts_dgx_a100_2nd.json
new file mode 100644
index 00000000000..9f64cb131f6
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/golden_values_lts_dgx_a100_2nd.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 10.2013,
+            "52": 10.10855,
+            "53": 10.34609,
+            "54": 10.24857,
+            "55": 10.18782,
+            "56": 9.95521,
+            "57": 9.81221,
+            "58": 10.10875,
+            "59": 9.8863,
+            "60": 9.80901,
+            "61": 9.94824,
+            "62": 10.1999,
+            "63": 9.6443,
+            "64": 9.9951,
+            "65": 9.24475,
+            "66": 9.90917,
+            "67": 9.59735,
+            "68": 9.97285,
+            "69": 9.96333,
+            "70": 9.91038,
+            "71": 9.78596,
+            "72": 9.77264,
+            "73": 9.6618,
+            "74": 9.16289,
+            "75": 9.58121,
+            "76": 9.26138,
+            "77": 10.17614,
+            "78": 9.85644,
+            "79": 9.50644,
+            "80": 9.54103,
+            "81": 9.61313,
+            "82": 9.80668,
+            "83": 9.44696,
+            "84": 9.52782,
+            "85": 9.72633,
+            "86": 9.19099,
+            "87": 9.68736,
+            "88": 9.85216,
+            "89": 9.71335,
+            "90": 9.90316,
+            "91": 9.46063,
+            "92": 9.46058,
+            "93": 9.19418,
+            "94": 8.93434,
+            "95": 9.60258,
+            "96": 9.61852,
+            "97": 9.39595,
+            "98": 9.76012,
+            "99": 8.98669,
+            "100": 9.49406
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 2321.0,
+            "52": 2363.0,
+            "53": 2925.0,
+            "54": 2486.0,
+            "55": 2408.0,
+            "56": 2298.0,
+            "57": 2286.0,
+            "58": 2584.0,
+            "59": 2358.0,
+            "60": 2487.0,
+            "61": 2791.0,
+            "62": 2751.0,
+            "63": 2385.0,
+            "64": 2791.0,
+            "65": 2372.0,
+            "66": 2970.0,
+            "67": 2557.0,
+            "68": 2857.0,
+            "69": 2699.0,
+            "70": 3035.0,
+            "71": 2940.0,
+            "72": 2315.0,
+            "73": 2968.0,
+            "74": 2205.0,
+            "75": 2811.0,
+            "76": 2969.0,
+            "77": 3296.0,
+            "78": 3578.0,
+            "79": 3594.0,
+            "80": 3509.0,
+            "81": 3698.0,
+            "82": 3355.0,
+            "83": 3205.0,
+            "84": 3285.0,
+            "85": 3791.0,
+            "86": 3303.0,
+            "87": 3934.0,
+            "88": 3130.0,
+            "89": 3809.0,
+            "90": 3388.0,
+            "91": 2618.0,
+            "92": 3412.0,
+            "93": 3072.0,
+            "94": 3731.0,
+            "95": 3357.0,
+            "96": 3852.0,
+            "97": 3528.0,
+            "98": 3616.0,
+            "99": 3449.0,
+            "100": 3284.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 1044076032.0,
+            "52": 1045124608.0,
+            "53": 1045124608.0,
+            "54": 1045124608.0,
+            "55": 1045124608.0,
+            "56": 1045124608.0,
+            "57": 1045124608.0,
+            "58": 1045124608.0,
+            "59": 1045124608.0,
+            "60": 1045124608.0,
+            "61": 1045124608.0,
+            "62": 1045124608.0,
+            "63": 1045124608.0,
+            "64": 1045124608.0,
+            "65": 1045124608.0,
+            "66": 1045124608.0,
+            "67": 1045124608.0,
+            "68": 1045124608.0,
+            "69": 1045124608.0,
+            "70": 1045124608.0,
+            "71": 1045124608.0,
+            "72": 1045124608.0,
+            "73": 1045124608.0,
+            "74": 1045124608.0,
+            "75": 1045124608.0,
+            "76": 1045124608.0,
+            "77": 1045124608.0,
+            "78": 1045124608.0,
+            "79": 1045124608.0,
+            "80": 1045124608.0,
+            "81": 1045124608.0,
+            "82": 1045124608.0,
+            "83": 1045124608.0,
+            "84": 1045124608.0,
+            "85": 1045124608.0,
+            "86": 1045124608.0,
+            "87": 1045124608.0,
+            "88": 1045124608.0,
+            "89": 1045124608.0,
+            "90": 1045124608.0,
+            "91": 1045124608.0,
+            "92": 1045124608.0,
+            "93": 1045124608.0,
+            "94": 1045124608.0,
+            "95": 1045124608.0,
+            "96": 1045124608.0,
+            "97": 1045124608.0,
+            "98": 1045124608.0,
+            "99": 1045124608.0,
+            "100": 1045124608.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 3546769920.0,
+            "52": 3546770944.0,
+            "53": 3546770944.0,
+            "54": 3546770944.0,
+            "55": 3546770944.0,
+            "56": 3546770944.0,
+            "57": 3546770944.0,
+            "58": 3546770944.0,
+            "59": 3546770944.0,
+            "60": 3546770944.0,
+            "61": 3546770944.0,
+            "62": 3546770944.0,
+            "63": 3546770944.0,
+            "64": 3546770944.0,
+            "65": 3546770944.0,
+            "66": 3546770944.0,
+            "67": 3546770944.0,
+            "68": 3546770944.0,
+            "69": 3546770944.0,
+            "70": 3546770944.0,
+            "71": 3546770944.0,
+            "72": 3546770944.0,
+            "73": 3546770944.0,
+            "74": 3546770944.0,
+            "75": 3546770944.0,
+            "76": 3546770944.0,
+            "77": 3546770944.0,
+            "78": 3546770944.0,
+            "79": 3546770944.0,
+            "80": 3546770944.0,
+            "81": 3546770944.0,
+            "82": 3546770944.0,
+            "83": 3546770944.0,
+            "84": 3546770944.0,
+            "85": 3546770944.0,
+            "86": 3546770944.0,
+            "87": 3546770944.0,
+            "88": 3546770944.0,
+            "89": 3546770944.0,
+            "90": 3546770944.0,
+            "91": 3546770944.0,
+            "92": 3546770944.0,
+            "93": 3546770944.0,
+            "94": 3546770944.0,
+            "95": 3546770944.0,
+            "96": 3546770944.0,
+            "97": 3546770944.0,
+            "98": 3546770944.0,
+            "99": 3546770944.0,
+            "100": 3546770944.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 6.67329,
+            "52": 0.6111,
+            "53": 0.12668,
+            "54": 0.11864,
+            "55": 0.11902,
+            "56": 0.11865,
+            "57": 0.11929,
+            "58": 0.11948,
+            "59": 0.11768,
+            "60": 0.11801,
+            "61": 0.1175,
+            "62": 0.11795,
+            "63": 0.11724,
+            "64": 0.11676,
+            "65": 0.11866,
+            "66": 0.11629,
+            "67": 0.11669,
+            "68": 0.11697,
+            "69": 0.11697,
+            "70": 0.11633,
+            "71": 0.11621,
+            "72": 0.11651,
+            "73": 0.11676,
+            "74": 0.11645,
+            "75": 0.11641,
+            "76": 0.11594,
+            "77": 0.1156,
+            "78": 0.11596,
+            "79": 0.11564,
+            "80": 0.11648,
+            "81": 0.11644,
+            "82": 0.11653,
+            "83": 0.11629,
+            "84": 0.11602,
+            "85": 0.11583,
+            "86": 0.11614,
+            "87": 0.11603,
+            "88": 0.11569,
+            "89": 0.11622,
+            "90": 0.11608,
+            "91": 0.1162,
+            "92": 0.11569,
+            "93": 0.11662,
+            "94": 0.11609,
+            "95": 0.11636,
+            "96": 0.11595,
+            "97": 0.11685,
+            "98": 0.11561,
+            "99": 0.11705,
+            "100": 0.11648
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_dev_dgx_gb200.json
new file mode 100644
index 00000000000..ba4bf2c3eaf
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_dev_dgx_gb200.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.83624,
+            "2": 10.83583,
+            "3": 10.83479,
+            "4": 10.79805,
+            "5": 10.8484,
+            "6": 10.86489,
+            "7": 10.82603,
+            "8": 10.83534,
+            "9": 10.83891,
+            "10": 10.7977,
+            "11": 10.86687,
+            "12": 10.84885,
+            "13": 10.85863,
+            "14": 10.86758,
+            "15": 10.80015,
+            "16": 10.78972,
+            "17": 10.77152,
+            "18": 10.78567,
+            "19": 10.78854,
+            "20": 10.68344,
+            "21": 10.67601,
+            "22": 10.52341,
+            "23": 10.70513,
+            "24": 10.56287,
+            "25": 10.51316,
+            "26": 10.57779,
+            "27": 10.58628,
+            "28": 10.54399,
+            "29": 10.5752,
+            "30": 10.33793,
+            "31": 10.06785,
+            "32": 10.4423,
+            "33": 10.44058,
+            "34": 10.19082,
+            "35": 10.23949,
+            "36": 10.1889,
+            "37": 10.32647,
+            "38": 10.16254,
+            "39": 10.38467,
+            "40": 10.04862,
+            "41": 10.1189,
+            "42": 10.18954,
+            "43": 9.80408,
+            "44": 9.92166,
+            "45": 9.80316,
+            "46": 9.79843,
+            "47": 10.11883,
+            "48": 9.82786,
+            "49": 9.50058,
+            "50": 9.87693
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1555.0,
+            "2": 1587.0,
+            "3": 1602.0,
+            "4": 1704.0,
+            "5": 1904.0,
+            "6": 1792.0,
+            "7": 1789.0,
+            "8": 1623.0,
+            "9": 1774.0,
+            "10": 1392.0,
+            "11": 1918.0,
+            "12": 1662.0,
+            "13": 1853.0,
+            "14": 1763.0,
+            "15": 1924.0,
+            "16": 1899.0,
+            "17": 1757.0,
+            "18": 1692.0,
+            "19": 1706.0,
+            "20": 1526.0,
+            "21": 1838.0,
+            "22": 1629.0,
+            "23": 1894.0,
+            "24": 1618.0,
+            "25": 1572.0,
+            "26": 1595.0,
+            "27": 1782.0,
+            "28": 1886.0,
+            "29": 1912.0,
+            "30": 1854.0,
+            "31": 1632.0,
+            "32": 1901.0,
+            "33": 2111.0,
+            "34": 1981.0,
+            "35": 1995.0,
+            "36": 1912.0,
+            "37": 2387.0,
+            "38": 2159.0,
+            "39": 2411.0,
+            "40": 2161.0,
+            "41": 2328.0,
+            "42": 2311.0,
+            "43": 2019.0,
+            "44": 1984.0,
+            "45": 2148.0,
+            "46": 2353.0,
+            "47": 2541.0,
+            "48": 2470.0,
+            "49": 2248.0,
+            "50": 2397.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 777900032.0,
+            "2": 777900032.0,
+            "3": 777900032.0,
+            "4": 777900032.0,
+            "5": 777900032.0,
+            "6": 777900032.0,
+            "7": 777900032.0,
+            "8": 777900032.0,
+            "9": 777900032.0,
+            "10": 777900032.0,
+            "11": 777900032.0,
+            "12": 777900032.0,
+            "13": 777900032.0,
+            "14": 777900032.0,
+            "15": 777900032.0,
+            "16": 777900032.0,
+            "17": 777900032.0,
+            "18": 777900032.0,
+            "19": 777900032.0,
+            "20": 777900032.0,
+            "21": 777900032.0,
+            "22": 777900032.0,
+            "23": 777900032.0,
+            "24": 777900032.0,
+            "25": 777900032.0,
+            "26": 777900032.0,
+            "27": 777900032.0,
+            "28": 777900032.0,
+            "29": 777900032.0,
+            "30": 777900032.0,
+            "31": 777900032.0,
+            "32": 777900032.0,
+            "33": 777900032.0,
+            "34": 777900032.0,
+            "35": 777900032.0,
+            "36": 777900032.0,
+            "37": 777900032.0,
+            "38": 777900032.0,
+            "39": 777900032.0,
+            "40": 777900032.0,
+            "41": 777900032.0,
+            "42": 777900032.0,
+            "43": 777900032.0,
+            "44": 777900032.0,
+            "45": 777900032.0,
+            "46": 777900032.0,
+            "47": 777900032.0,
+            "48": 777900032.0,
+            "49": 777900032.0,
+            "50": 777900032.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 2463815680.0,
+            "2": 2744478720.0,
+            "3": 2744478720.0,
+            "4": 2744478720.0,
+            "5": 2744478720.0,
+            "6": 2744478720.0,
+            "7": 2744478720.0,
+            "8": 2744478720.0,
+            "9": 2744478720.0,
+            "10": 2744478720.0,
+            "11": 2744478720.0,
+            "12": 2744478720.0,
+            "13": 2744478720.0,
+            "14": 2744478720.0,
+            "15": 2744478720.0,
+            "16": 2744478720.0,
+            "17": 2744478720.0,
+            "18": 2744478720.0,
+            "19": 2744478720.0,
+            "20": 2744478720.0,
+            "21": 2744478720.0,
+            "22": 2744478720.0,
+            "23": 2744478720.0,
+            "24": 2744478720.0,
+            "25": 2744478720.0,
+            "26": 2744478720.0,
+            "27": 2744478720.0,
+            "28": 2744478720.0,
+            "29": 2744478720.0,
+            "30": 2744478720.0,
+            "31": 2744478720.0,
+            "32": 2744478720.0,
+            "33": 2744478720.0,
+            "34": 2744478720.0,
+            "35": 2744478720.0,
+            "36": 2744478720.0,
+            "37": 2744478720.0,
+            "38": 2744478720.0,
+            "39": 2744478720.0,
+            "40": 2744478720.0,
+            "41": 2744478720.0,
+            "42": 2744478720.0,
+            "43": 2744478720.0,
+            "44": 2744478720.0,
+            "45": 2744478720.0,
+            "46": 2744478720.0,
+            "47": 2744478720.0,
+            "48": 2744478720.0,
+            "49": 2744478720.0,
+            "50": 2744478720.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 7.95704,
+            "2": 0.23282,
+            "3": 0.22573,
+            "4": 0.14406,
+            "5": 0.14176,
+            "6": 0.14066,
+            "7": 0.14191,
+            "8": 0.13977,
+            "9": 0.23575,
+            "10": 0.14253,
+            "11": 0.14269,
+            "12": 0.24047,
+            "13": 0.18824,
+            "14": 0.18624,
+            "15": 0.30512,
+            "16": 0.14193,
+            "17": 0.2268,
+            "18": 0.14073,
+            "19": 0.23385,
+            "20": 0.20206,
+            "21": 0.1413,
+            "22": 0.13909,
+            "23": 0.35016,
+            "24": 0.14315,
+            "25": 0.22043,
+            "26": 0.14108,
+            "27": 0.14032,
+            "28": 0.14199,
+            "29": 0.38987,
+            "30": 0.14061,
+            "31": 0.14114,
+            "32": 0.14198,
+            "33": 0.21726,
+            "34": 0.14506,
+            "35": 0.14599,
+            "36": 0.14386,
+            "37": 0.14357,
+            "38": 0.22005,
+            "39": 0.14191,
+            "40": 0.14088,
+            "41": 0.23965,
+            "42": 0.14104,
+            "43": 0.21167,
+            "44": 0.13993,
+            "45": 0.2299,
+            "46": 0.24126,
+            "47": 0.14128,
+            "48": 0.14024,
+            "49": 0.22136,
+            "50": 0.14147
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_dev_dgx_h100.json
index 5517997e6c1..4aa2800617e 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_dev_dgx_h100.json
@@ -16,44 +16,44 @@
             "10": 10.84079,
             "11": 10.87928,
             "12": 10.8729,
-            "13": 10.87791,
-            "14": 10.8901,
+            "13": 10.8779,
+            "14": 10.89011,
             "15": 10.82504,
-            "16": 10.8296,
-            "17": 10.80874,
-            "18": 10.8116,
-            "19": 10.81543,
-            "20": 10.71912,
+            "16": 10.82957,
+            "17": 10.80875,
+            "18": 10.81163,
+            "19": 10.81545,
+            "20": 10.71913,
             "21": 10.70404,
-            "22": 10.56645,
-            "23": 10.71858,
-            "24": 10.60989,
-            "25": 10.55479,
-            "26": 10.60874,
-            "27": 10.62302,
-            "28": 10.56954,
+            "22": 10.56646,
+            "23": 10.71861,
+            "24": 10.60988,
+            "25": 10.55482,
+            "26": 10.60879,
+            "27": 10.62303,
+            "28": 10.56953,
             "29": 10.57966,
-            "30": 10.35998,
-            "31": 10.11311,
-            "32": 10.46587,
-            "33": 10.45154,
-            "34": 10.20826,
-            "35": 10.26937,
+            "30": 10.35999,
+            "31": 10.11305,
+            "32": 10.46585,
+            "33": 10.45153,
+            "34": 10.20832,
+            "35": 10.26936,
             "36": 10.21924,
-            "37": 10.33852,
-            "38": 10.186,
-            "39": 10.3997,
-            "40": 10.08396,
-            "41": 10.13418,
-            "42": 10.20887,
-            "43": 9.82537,
-            "44": 9.95906,
+            "37": 10.33851,
+            "38": 10.18603,
+            "39": 10.39977,
+            "40": 10.08397,
+            "41": 10.13423,
+            "42": 10.20889,
+            "43": 9.82535,
+            "44": 9.95909,
             "45": 9.82563,
-            "46": 9.80623,
-            "47": 10.13499,
-            "48": 9.84002,
-            "49": 9.52482,
-            "50": 9.90725
+            "46": 9.8062,
+            "47": 10.135,
+            "48": 9.84004,
+            "49": 9.52485,
+            "50": 9.90723
         }
     },
     "num-zeros": {
@@ -72,45 +72,45 @@
             "9": 1849.0,
             "10": 1317.0,
             "11": 1901.0,
-            "12": 1702.0,
-            "13": 1872.0,
-            "14": 1781.0,
-            "15": 1759.0,
-            "16": 1820.0,
-            "17": 1819.0,
-            "18": 1721.0,
-            "19": 1828.0,
-            "20": 1730.0,
-            "21": 1935.0,
-            "22": 1764.0,
-            "23": 1962.0,
-            "24": 1564.0,
-            "25": 1552.0,
-            "26": 1668.0,
-            "27": 1803.0,
-            "28": 1988.0,
-            "29": 1966.0,
-            "30": 1895.0,
-            "31": 1532.0,
-            "32": 1866.0,
-            "33": 2026.0,
-            "34": 1906.0,
-            "35": 1987.0,
-            "36": 1863.0,
-            "37": 2231.0,
-            "38": 2109.0,
-            "39": 2277.0,
-            "40": 2099.0,
-            "41": 2209.0,
-            "42": 2227.0,
-            "43": 1913.0,
-            "44": 2129.0,
-            "45": 1993.0,
-            "46": 2288.0,
-            "47": 2458.0,
-            "48": 2418.0,
-            "49": 2155.0,
-            "50": 2085.0
+            "12": 1765.0,
+            "13": 1910.0,
+            "14": 1773.0,
+            "15": 1864.0,
+            "16": 1759.0,
+            "17": 1794.0,
+            "18": 1805.0,
+            "19": 1846.0,
+            "20": 1770.0,
+            "21": 1963.0,
+            "22": 1706.0,
+            "23": 1983.0,
+            "24": 1609.0,
+            "25": 1593.0,
+            "26": 1643.0,
+            "27": 1696.0,
+            "28": 1882.0,
+            "29": 1946.0,
+            "30": 1925.0,
+            "31": 1574.0,
+            "32": 1863.0,
+            "33": 2024.0,
+            "34": 1878.0,
+            "35": 1941.0,
+            "36": 1887.0,
+            "37": 2294.0,
+            "38": 2142.0,
+            "39": 2288.0,
+            "40": 2053.0,
+            "41": 2189.0,
+            "42": 2331.0,
+            "43": 1933.0,
+            "44": 2042.0,
+            "45": 1956.0,
+            "46": 2285.0,
+            "47": 2470.0,
+            "48": 2437.0,
+            "49": 2238.0,
+            "50": 2004.0
         }
     },
     "mem-allocated-bytes": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 11.34716,
-            "2": 0.14227,
-            "3": 0.12689,
-            "4": 0.13008,
-            "5": 0.12281,
-            "6": 0.12008,
-            "7": 0.11926,
-            "8": 0.11756,
-            "9": 0.11844,
-            "10": 0.11959,
-            "11": 0.11763,
-            "12": 0.11828,
-            "13": 0.11955,
-            "14": 0.11929,
-            "15": 0.11867,
-            "16": 0.11859,
-            "17": 0.12095,
-            "18": 0.11695,
-            "19": 0.11774,
-            "20": 0.11863,
-            "21": 0.11942,
-            "22": 0.12117,
-            "23": 0.11884,
-            "24": 0.12003,
-            "25": 0.11915,
-            "26": 0.11977,
-            "27": 0.11816,
-            "28": 0.12705,
-            "29": 0.11815,
-            "30": 0.12166,
-            "31": 0.12023,
-            "32": 0.12154,
-            "33": 0.12781,
-            "34": 0.12209,
-            "35": 0.12372,
-            "36": 0.12109,
-            "37": 0.11897,
-            "38": 0.12385,
-            "39": 0.11961,
-            "40": 0.11846,
-            "41": 0.11902,
-            "42": 0.11915,
-            "43": 0.12286,
-            "44": 0.11759,
-            "45": 0.11912,
-            "46": 0.1204,
-            "47": 0.12027,
-            "48": 0.12073,
-            "49": 0.1164,
-            "50": 0.11734
+            "1": 7.818,
+            "2": 0.14182,
+            "3": 0.12081,
+            "4": 0.09954,
+            "5": 0.09861,
+            "6": 0.10039,
+            "7": 0.09846,
+            "8": 0.09916,
+            "9": 0.10232,
+            "10": 0.10158,
+            "11": 0.09888,
+            "12": 0.09744,
+            "13": 0.09991,
+            "14": 0.09707,
+            "15": 0.09748,
+            "16": 0.09761,
+            "17": 0.09792,
+            "18": 0.09795,
+            "19": 0.09792,
+            "20": 0.09738,
+            "21": 0.10014,
+            "22": 0.09781,
+            "23": 0.09834,
+            "24": 0.09956,
+            "25": 0.09768,
+            "26": 0.09722,
+            "27": 0.09836,
+            "28": 0.09714,
+            "29": 0.09695,
+            "30": 0.09751,
+            "31": 0.09809,
+            "32": 0.09759,
+            "33": 0.09764,
+            "34": 0.09711,
+            "35": 0.09791,
+            "36": 0.09751,
+            "37": 0.09778,
+            "38": 0.09695,
+            "39": 0.09907,
+            "40": 0.09654,
+            "41": 0.09746,
+            "42": 0.09685,
+            "43": 0.09736,
+            "44": 0.09954,
+            "45": 0.09768,
+            "46": 0.09735,
+            "47": 0.09905,
+            "48": 0.09815,
+            "49": 0.09684,
+            "50": 0.09793
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_lts_dgx_a100.json
index 06342d2a540..bedfb1338ba 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_lts_dgx_a100.json
@@ -24,36 +24,36 @@
             "18": 10.79296,
             "19": 10.79494,
             "20": 10.67877,
-            "21": 10.65858,
-            "22": 10.50081,
+            "21": 10.65859,
+            "22": 10.50083,
             "23": 10.71065,
-            "24": 10.55089,
+            "24": 10.5509,
             "25": 10.50321,
-            "26": 10.58033,
-            "27": 10.58262,
-            "28": 10.55556,
-            "29": 10.56003,
-            "30": 10.32992,
-            "31": 10.08344,
+            "26": 10.58034,
+            "27": 10.58264,
+            "28": 10.55553,
+            "29": 10.56004,
+            "30": 10.32995,
+            "31": 10.08339,
             "32": 10.44434,
-            "33": 10.44238,
-            "34": 10.19765,
-            "35": 10.25197,
-            "36": 10.19117,
-            "37": 10.31772,
-            "38": 10.1631,
+            "33": 10.44235,
+            "34": 10.19762,
+            "35": 10.25196,
+            "36": 10.1912,
+            "37": 10.31771,
+            "38": 10.16302,
             "39": 10.37486,
-            "40": 10.05284,
-            "41": 10.1344,
-            "42": 10.18877,
-            "43": 9.80641,
-            "44": 9.92687,
-            "45": 9.80332,
-            "46": 9.811,
-            "47": 10.12605,
-            "48": 9.82455,
-            "49": 9.50975,
-            "50": 9.88831
+            "40": 10.05283,
+            "41": 10.13444,
+            "42": 10.18874,
+            "43": 9.80642,
+            "44": 9.92686,
+            "45": 9.80329,
+            "46": 9.81097,
+            "47": 10.12606,
+            "48": 9.82458,
+            "49": 9.50971,
+            "50": 9.88833
         }
     },
     "num-zeros": {
@@ -81,36 +81,36 @@
             "18": 1655.0,
             "19": 1784.0,
             "20": 1616.0,
-            "21": 1887.0,
-            "22": 1751.0,
-            "23": 2100.0,
-            "24": 1717.0,
-            "25": 1696.0,
-            "26": 1723.0,
-            "27": 1819.0,
-            "28": 1980.0,
-            "29": 1962.0,
-            "30": 2046.0,
-            "31": 1562.0,
-            "32": 1935.0,
-            "33": 2182.0,
-            "34": 1919.0,
-            "35": 1994.0,
-            "36": 1947.0,
-            "37": 2436.0,
-            "38": 2218.0,
-            "39": 2319.0,
-            "40": 2278.0,
-            "41": 2348.0,
-            "42": 2258.0,
-            "43": 1967.0,
-            "44": 2011.0,
-            "45": 2215.0,
-            "46": 2291.0,
-            "47": 2519.0,
-            "48": 2517.0,
-            "49": 2334.0,
-            "50": 2325.0
+            "21": 1859.0,
+            "22": 1634.0,
+            "23": 1985.0,
+            "24": 1636.0,
+            "25": 1648.0,
+            "26": 1833.0,
+            "27": 1729.0,
+            "28": 2018.0,
+            "29": 1948.0,
+            "30": 1977.0,
+            "31": 1606.0,
+            "32": 1878.0,
+            "33": 2102.0,
+            "34": 1882.0,
+            "35": 1998.0,
+            "36": 1963.0,
+            "37": 2392.0,
+            "38": 2259.0,
+            "39": 2368.0,
+            "40": 2355.0,
+            "41": 2351.0,
+            "42": 2315.0,
+            "43": 2100.0,
+            "44": 2088.0,
+            "45": 2185.0,
+            "46": 2287.0,
+            "47": 2485.0,
+            "48": 2430.0,
+            "49": 2209.0,
+            "50": 2436.0
         }
     },
     "mem-allocated-bytes": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 9.69348,
-            "2": 0.20058,
-            "3": 0.16793,
-            "4": 0.16851,
-            "5": 0.16769,
-            "6": 0.16776,
-            "7": 0.1679,
-            "8": 0.1698,
-            "9": 0.16773,
-            "10": 0.16689,
-            "11": 0.16616,
-            "12": 0.16649,
-            "13": 0.16602,
-            "14": 0.16651,
-            "15": 0.16681,
-            "16": 0.16794,
-            "17": 0.17068,
-            "18": 0.16616,
-            "19": 0.16604,
-            "20": 0.16664,
-            "21": 0.16675,
-            "22": 0.16587,
-            "23": 0.16669,
-            "24": 0.16593,
-            "25": 0.16666,
-            "26": 0.16624,
-            "27": 0.16546,
-            "28": 0.16503,
-            "29": 0.16469,
-            "30": 0.1651,
-            "31": 0.16508,
-            "32": 0.16533,
-            "33": 0.16475,
-            "34": 0.16518,
-            "35": 0.16543,
-            "36": 0.16422,
-            "37": 0.1648,
-            "38": 0.16453,
-            "39": 0.16423,
-            "40": 0.16482,
-            "41": 0.16457,
-            "42": 0.1653,
-            "43": 0.16536,
-            "44": 0.16541,
-            "45": 0.16481,
-            "46": 0.16481,
-            "47": 0.16542,
-            "48": 0.16607,
-            "49": 0.1639,
-            "50": 0.1641
+            "1": 4.6609,
+            "2": 0.20286,
+            "3": 0.18331,
+            "4": 0.16708,
+            "5": 0.16425,
+            "6": 0.16306,
+            "7": 0.16477,
+            "8": 0.16576,
+            "9": 0.16596,
+            "10": 0.16583,
+            "11": 0.16408,
+            "12": 0.16435,
+            "13": 0.16481,
+            "14": 0.16557,
+            "15": 0.16431,
+            "16": 0.16502,
+            "17": 0.16505,
+            "18": 0.16591,
+            "19": 0.16488,
+            "20": 0.1643,
+            "21": 0.16357,
+            "22": 0.16399,
+            "23": 0.16405,
+            "24": 0.16322,
+            "25": 0.16434,
+            "26": 0.16338,
+            "27": 0.16313,
+            "28": 0.16358,
+            "29": 0.16355,
+            "30": 0.16313,
+            "31": 0.16372,
+            "32": 0.16289,
+            "33": 0.16298,
+            "34": 0.16307,
+            "35": 0.16335,
+            "36": 0.16325,
+            "37": 0.16343,
+            "38": 0.16261,
+            "39": 0.17181,
+            "40": 0.16689,
+            "41": 0.16786,
+            "42": 0.16635,
+            "43": 0.16929,
+            "44": 0.16602,
+            "45": 0.16606,
+            "46": 0.16685,
+            "47": 0.16668,
+            "48": 0.16647,
+            "49": 0.16657,
+            "50": 0.16609
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/golden_values_dev_dgx_a100.json
index f0460fcf964..ec21dd0eb78 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/golden_values_dev_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/golden_values_dev_dgx_a100.json
@@ -2,141 +2,536 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 10.79449,
+            "2": 10.80656,
+            "3": 10.80727,
+            "4": 10.77389,
             "5": 10.84829,
+            "6": 10.86736,
+            "7": 10.82922,
+            "8": 10.81537,
+            "9": 10.83956,
             "10": 10.77652,
+            "11": 10.90107,
+            "12": 10.85927,
+            "13": 10.87396,
+            "14": 10.89723,
             "15": 10.83961,
+            "16": 10.83508,
+            "17": 10.82101,
+            "18": 10.86029,
+            "19": 10.86558,
             "20": 10.82896,
+            "21": 10.83275,
+            "22": 10.75286,
+            "23": 10.88062,
+            "24": 10.78219,
             "25": 10.76607,
+            "26": 10.79522,
+            "27": 10.79866,
+            "28": 10.81697,
+            "29": 10.82169,
             "30": 10.69891,
+            "31": 10.55698,
+            "32": 10.75759,
+            "33": 10.74362,
+            "34": 10.59976,
             "35": 10.61772,
+            "36": 10.56389,
+            "37": 10.63614,
+            "38": 10.53029,
+            "39": 10.65358,
             "40": 10.44072,
+            "41": 10.49636,
+            "42": 10.50954,
+            "43": 10.22362,
+            "44": 10.30902,
             "45": 10.21065,
+            "46": 10.19943,
+            "47": 10.41641,
+            "48": 10.18128,
+            "49": 9.94311,
             "50": 10.21224,
+            "51": 10.16758,
+            "52": 10.06896,
+            "53": 10.30707,
+            "54": 10.2091,
             "55": 10.15688,
+            "56": 9.91475,
+            "57": 9.77696,
+            "58": 10.07417,
+            "59": 9.86333,
             "60": 9.77328,
+            "61": 9.9292,
+            "62": 10.17156,
+            "63": 9.62041,
+            "64": 9.97113,
             "65": 9.21979,
+            "66": 9.88693,
+            "67": 9.58363,
+            "68": 9.94922,
+            "69": 9.9527,
             "70": 9.89312,
+            "71": 9.77658,
+            "72": 9.75435,
+            "73": 9.64969,
+            "74": 9.1439,
             "75": 9.56121,
-            "80": 9.53086,
+            "76": 9.25111,
+            "77": 10.17063,
+            "78": 9.85402,
+            "79": 9.49965,
+            "80": 9.53087,
+            "81": 9.60555,
+            "82": 9.80179,
+            "83": 9.43744,
+            "84": 9.51987,
             "85": 9.7196,
+            "86": 9.18596,
+            "87": 9.68687,
+            "88": 9.8443,
+            "89": 9.70586,
             "90": 9.89977,
+            "91": 9.45029,
+            "92": 9.45356,
+            "93": 9.18553,
+            "94": 8.92968,
             "95": 9.59767,
-            "100": 9.49001
+            "96": 9.61491,
+            "97": 9.39084,
+            "98": 9.75668,
+            "99": 8.97922,
+            "100": 9.49
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": 570640384.0,
+            "2": 570640384.0,
+            "3": 570640384.0,
+            "4": 570640384.0,
             "5": 570640384.0,
+            "6": 570640384.0,
+            "7": 570640384.0,
+            "8": 570640384.0,
+            "9": 570640384.0,
             "10": 570640384.0,
+            "11": 570640384.0,
+            "12": 570640384.0,
+            "13": 570640384.0,
+            "14": 570640384.0,
             "15": 570640384.0,
+            "16": 570640384.0,
+            "17": 852351488.0,
+            "18": 852351488.0,
+            "19": 852351488.0,
             "20": 852351488.0,
+            "21": 852351488.0,
+            "22": 852351488.0,
+            "23": 852351488.0,
+            "24": 852351488.0,
             "25": 852351488.0,
+            "26": 852351488.0,
+            "27": 852351488.0,
+            "28": 852351488.0,
+            "29": 852351488.0,
             "30": 852351488.0,
+            "31": 852351488.0,
+            "32": 852351488.0,
+            "33": 852351488.0,
+            "34": 852351488.0,
             "35": 852351488.0,
+            "36": 852351488.0,
+            "37": 852351488.0,
+            "38": 852351488.0,
+            "39": 852351488.0,
             "40": 852351488.0,
+            "41": 852351488.0,
+            "42": 852351488.0,
+            "43": 852351488.0,
+            "44": 852351488.0,
             "45": 852351488.0,
+            "46": 852351488.0,
+            "47": 852351488.0,
+            "48": 852351488.0,
+            "49": 852351488.0,
             "50": 852351488.0,
+            "51": 852351488.0,
+            "52": 852351488.0,
+            "53": 852351488.0,
+            "54": 852351488.0,
             "55": 852351488.0,
+            "56": 852351488.0,
+            "57": 852351488.0,
+            "58": 852351488.0,
+            "59": 852351488.0,
             "60": 852351488.0,
+            "61": 852351488.0,
+            "62": 852351488.0,
+            "63": 852351488.0,
+            "64": 852351488.0,
             "65": 852351488.0,
+            "66": 852351488.0,
+            "67": 852351488.0,
+            "68": 852351488.0,
+            "69": 852351488.0,
             "70": 852351488.0,
+            "71": 852351488.0,
+            "72": 852351488.0,
+            "73": 852351488.0,
+            "74": 852351488.0,
             "75": 852351488.0,
+            "76": 852351488.0,
+            "77": 852351488.0,
+            "78": 852351488.0,
+            "79": 852351488.0,
             "80": 852351488.0,
+            "81": 852351488.0,
+            "82": 852351488.0,
+            "83": 852351488.0,
+            "84": 852351488.0,
             "85": 852351488.0,
+            "86": 852351488.0,
+            "87": 852351488.0,
+            "88": 852351488.0,
+            "89": 852351488.0,
             "90": 852351488.0,
+            "91": 852351488.0,
+            "92": 852351488.0,
+            "93": 852351488.0,
+            "94": 852351488.0,
             "95": 852351488.0,
+            "96": 852351488.0,
+            "97": 852351488.0,
+            "98": 852351488.0,
+            "99": 852351488.0,
             "100": 852351488.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 2393217536.0,
-            "5": 2393218048.0,
-            "10": 2393218048.0,
-            "15": 2393218048.0,
+            "1": 2394265600.0,
+            "2": 2394266624.0,
+            "3": 2394266624.0,
+            "4": 2394266624.0,
+            "5": 2394266624.0,
+            "6": 2394266624.0,
+            "7": 2394266624.0,
+            "8": 2394266624.0,
+            "9": 2394266624.0,
+            "10": 2394266624.0,
+            "11": 2394266624.0,
+            "12": 2394266624.0,
+            "13": 2394266624.0,
+            "14": 2394266624.0,
+            "15": 2394266624.0,
+            "16": 2394266624.0,
+            "17": 2394266624.0,
+            "18": 2675191296.0,
+            "19": 2675191296.0,
             "20": 2675191296.0,
+            "21": 2675191296.0,
+            "22": 2675191296.0,
+            "23": 2675191296.0,
+            "24": 2675191296.0,
             "25": 2675191296.0,
+            "26": 2675191296.0,
+            "27": 2675191296.0,
+            "28": 2675191296.0,
+            "29": 2675191296.0,
             "30": 2675191296.0,
+            "31": 2675191296.0,
+            "32": 2675191296.0,
+            "33": 2675191296.0,
+            "34": 2675191296.0,
             "35": 2675191296.0,
+            "36": 2675191296.0,
+            "37": 2675191296.0,
+            "38": 2675191296.0,
+            "39": 2675191296.0,
             "40": 2675191296.0,
+            "41": 2675191296.0,
+            "42": 2675191296.0,
+            "43": 2675191296.0,
+            "44": 2675191296.0,
             "45": 2675191296.0,
+            "46": 2675191296.0,
+            "47": 2675191296.0,
+            "48": 2675191296.0,
+            "49": 2675191296.0,
             "50": 2675191296.0,
+            "51": 2675191296.0,
+            "52": 2675191296.0,
+            "53": 2675191296.0,
+            "54": 2675191296.0,
             "55": 2675191296.0,
+            "56": 2675191296.0,
+            "57": 2675191296.0,
+            "58": 2675191296.0,
+            "59": 2675191296.0,
             "60": 2675191296.0,
+            "61": 2675191296.0,
+            "62": 2675191296.0,
+            "63": 2675191296.0,
+            "64": 2675191296.0,
             "65": 2675191296.0,
+            "66": 2675191296.0,
+            "67": 2675191296.0,
+            "68": 2675191296.0,
+            "69": 2675191296.0,
             "70": 2675191296.0,
+            "71": 2675191296.0,
+            "72": 2675191296.0,
+            "73": 2675191296.0,
+            "74": 2675191296.0,
             "75": 2675191296.0,
+            "76": 2675191296.0,
+            "77": 2675191296.0,
+            "78": 2675191296.0,
+            "79": 2675191296.0,
             "80": 2675191296.0,
+            "81": 2675191296.0,
+            "82": 2675191296.0,
+            "83": 2675191296.0,
+            "84": 2675191296.0,
             "85": 2675191296.0,
+            "86": 2675191296.0,
+            "87": 2675191296.0,
+            "88": 2675191296.0,
+            "89": 2675191296.0,
             "90": 2675191296.0,
+            "91": 2675191296.0,
+            "92": 2675191296.0,
+            "93": 2675191296.0,
+            "94": 2675191296.0,
             "95": 2675191296.0,
+            "96": 2675191296.0,
+            "97": 2675191296.0,
+            "98": 2675191296.0,
+            "99": 2675191296.0,
             "100": 2675191296.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 10.18564,
-            "5": 0.17211,
-            "10": 0.17231,
-            "15": 0.17041,
-            "20": 0.17593,
-            "25": 0.17714,
-            "30": 0.1877,
-            "35": 0.18206,
-            "40": 0.1863,
-            "45": 0.18632,
-            "50": 0.18765,
-            "55": 0.17167,
-            "60": 0.17203,
-            "65": 0.17216,
-            "70": 0.17222,
-            "75": 0.17155,
-            "80": 0.17227,
-            "85": 0.17239,
-            "90": 0.17214,
-            "95": 0.17202,
-            "100": 0.17177
+            "1": 3.90564,
+            "2": 0.17657,
+            "3": 0.15961,
+            "4": 0.14151,
+            "5": 0.13979,
+            "6": 0.14024,
+            "7": 0.14076,
+            "8": 0.14069,
+            "9": 0.14337,
+            "10": 0.14326,
+            "11": 0.1412,
+            "12": 0.14084,
+            "13": 0.14194,
+            "14": 0.14039,
+            "15": 0.14253,
+            "16": 0.14063,
+            "17": 0.18237,
+            "18": 0.15083,
+            "19": 0.14609,
+            "20": 0.145,
+            "21": 0.14692,
+            "22": 0.146,
+            "23": 0.14576,
+            "24": 0.14565,
+            "25": 0.14491,
+            "26": 0.14606,
+            "27": 0.14435,
+            "28": 0.14485,
+            "29": 0.14504,
+            "30": 0.14509,
+            "31": 0.14667,
+            "32": 0.14484,
+            "33": 0.14504,
+            "34": 0.14439,
+            "35": 0.14672,
+            "36": 0.14484,
+            "37": 0.14554,
+            "38": 0.14428,
+            "39": 0.14491,
+            "40": 0.1445,
+            "41": 0.14539,
+            "42": 0.14483,
+            "43": 0.14794,
+            "44": 0.14484,
+            "45": 0.14449,
+            "46": 0.14567,
+            "47": 0.14498,
+            "48": 0.14525,
+            "49": 0.14498,
+            "50": 0.1458,
+            "51": 0.15708,
+            "52": 0.1492,
+            "53": 0.14889,
+            "54": 0.1489,
+            "55": 0.14804,
+            "56": 0.14848,
+            "57": 0.14854,
+            "58": 0.14843,
+            "59": 0.14961,
+            "60": 0.14807,
+            "61": 0.14786,
+            "62": 0.14872,
+            "63": 0.14837,
+            "64": 0.148,
+            "65": 0.1483,
+            "66": 0.14847,
+            "67": 0.15039,
+            "68": 0.15144,
+            "69": 0.15129,
+            "70": 0.14963,
+            "71": 0.14959,
+            "72": 0.1509,
+            "73": 0.15125,
+            "74": 0.14951,
+            "75": 0.15018,
+            "76": 0.15031,
+            "77": 0.14981,
+            "78": 0.14969,
+            "79": 0.1496,
+            "80": 0.15057,
+            "81": 0.15014,
+            "82": 0.15141,
+            "83": 0.15143,
+            "84": 0.15091,
+            "85": 0.15061,
+            "86": 0.14973,
+            "87": 0.14949,
+            "88": 0.14979,
+            "89": 0.14986,
+            "90": 0.14984,
+            "91": 0.1511,
+            "92": 0.14859,
+            "93": 0.14946,
+            "94": 0.14974,
+            "95": 0.14917,
+            "96": 0.1491,
+            "97": 0.14957,
+            "98": 0.14939,
+            "99": 0.14896,
+            "100": 0.14922
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 100,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
             "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
             "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
             "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
             "15": "nan",
+            "16": "nan",
+            "17": 2437.0,
+            "18": 2405.0,
+            "19": 2950.0,
             "20": 1827.0,
+            "21": 2154.0,
+            "22": 2731.0,
+            "23": 2609.0,
+            "24": 2290.0,
             "25": 2325.0,
+            "26": 2079.0,
+            "27": 2138.0,
+            "28": 2702.0,
+            "29": 2576.0,
             "30": 2528.0,
+            "31": 1895.0,
+            "32": 2628.0,
+            "33": 2325.0,
+            "34": 1928.0,
             "35": 2061.0,
-            "40": 2053.0,
-            "45": 2490.0,
-            "50": 2887.0,
-            "55": 2440.0,
-            "60": 2893.0,
-            "65": 2318.0,
-            "70": 3665.0,
-            "75": 2955.0,
-            "80": 3665.0,
-            "85": 4048.0,
-            "90": 3695.0,
-            "95": 4076.0,
-            "100": 3631.0
+            "36": 2153.0,
+            "37": 2600.0,
+            "38": 2350.0,
+            "39": 2997.0,
+            "40": 2042.0,
+            "41": 3349.0,
+            "42": 2512.0,
+            "43": 2750.0,
+            "44": 2120.0,
+            "45": 2537.0,
+            "46": 2247.0,
+            "47": 3061.0,
+            "48": 2520.0,
+            "49": 1969.0,
+            "50": 2951.0,
+            "51": 2300.0,
+            "52": 2456.0,
+            "53": 3730.0,
+            "54": 2866.0,
+            "55": 2413.0,
+            "56": 2477.0,
+            "57": 2410.0,
+            "58": 3424.0,
+            "59": 2861.0,
+            "60": 2939.0,
+            "61": 3044.0,
+            "62": 3127.0,
+            "63": 3236.0,
+            "64": 3212.0,
+            "65": 2304.0,
+            "66": 3805.0,
+            "67": 2691.0,
+            "68": 3332.0,
+            "69": 2874.0,
+            "70": 3746.0,
+            "71": 3057.0,
+            "72": 2717.0,
+            "73": 3332.0,
+            "74": 2214.0,
+            "75": 3059.0,
+            "76": 3625.0,
+            "77": 3957.0,
+            "78": 3955.0,
+            "79": 4130.0,
+            "80": 3627.0,
+            "81": 5242.0,
+            "82": 3566.0,
+            "83": 3261.0,
+            "84": 4036.0,
+            "85": 3907.0,
+            "86": 3340.0,
+            "87": 3954.0,
+            "88": 3630.0,
+            "89": 4358.0,
+            "90": 3800.0,
+            "91": 2877.0,
+            "92": 4239.0,
+            "93": 3604.0,
+            "94": 4356.0,
+            "95": 4107.0,
+            "96": 3835.0,
+            "97": 4094.0,
+            "98": 4835.0,
+            "99": 3873.0,
+            "100": 3709.0
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/golden_values_dev_dgx_a100_2nd.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/golden_values_dev_dgx_a100_2nd.json
new file mode 100644
index 00000000000..79470a83eaa
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/golden_values_dev_dgx_a100_2nd.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 10.16758,
+            "52": 10.06896,
+            "53": 10.30707,
+            "54": 10.2091,
+            "55": 10.15688,
+            "56": 9.91475,
+            "57": 9.77696,
+            "58": 10.07417,
+            "59": 9.86333,
+            "60": 9.77328,
+            "61": 9.9292,
+            "62": 10.17156,
+            "63": 9.62041,
+            "64": 9.97113,
+            "65": 9.21979,
+            "66": 9.88693,
+            "67": 9.58363,
+            "68": 9.94922,
+            "69": 9.9527,
+            "70": 9.89312,
+            "71": 9.77658,
+            "72": 9.75435,
+            "73": 9.64969,
+            "74": 9.1439,
+            "75": 9.56121,
+            "76": 9.25111,
+            "77": 10.17063,
+            "78": 9.85402,
+            "79": 9.49965,
+            "80": 9.53087,
+            "81": 9.60555,
+            "82": 9.80179,
+            "83": 9.43744,
+            "84": 9.51987,
+            "85": 9.7196,
+            "86": 9.18596,
+            "87": 9.68687,
+            "88": 9.8443,
+            "89": 9.70586,
+            "90": 9.89977,
+            "91": 9.45029,
+            "92": 9.45356,
+            "93": 9.18553,
+            "94": 8.92968,
+            "95": 9.59767,
+            "96": 9.61491,
+            "97": 9.39084,
+            "98": 9.75668,
+            "99": 8.97922,
+            "100": 9.49
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 2300.0,
+            "52": 2456.0,
+            "53": 3730.0,
+            "54": 2866.0,
+            "55": 2413.0,
+            "56": 2477.0,
+            "57": 2410.0,
+            "58": 3424.0,
+            "59": 2861.0,
+            "60": 2939.0,
+            "61": 3044.0,
+            "62": 3127.0,
+            "63": 3236.0,
+            "64": 3212.0,
+            "65": 2304.0,
+            "66": 3805.0,
+            "67": 2691.0,
+            "68": 3332.0,
+            "69": 2874.0,
+            "70": 3746.0,
+            "71": 3057.0,
+            "72": 2717.0,
+            "73": 3332.0,
+            "74": 2214.0,
+            "75": 3059.0,
+            "76": 3625.0,
+            "77": 3957.0,
+            "78": 3955.0,
+            "79": 4130.0,
+            "80": 3627.0,
+            "81": 5242.0,
+            "82": 3566.0,
+            "83": 3261.0,
+            "84": 4036.0,
+            "85": 3907.0,
+            "86": 3340.0,
+            "87": 3954.0,
+            "88": 3630.0,
+            "89": 4358.0,
+            "90": 3800.0,
+            "91": 2877.0,
+            "92": 4239.0,
+            "93": 3604.0,
+            "94": 4356.0,
+            "95": 4107.0,
+            "96": 3835.0,
+            "97": 4094.0,
+            "98": 4835.0,
+            "99": 3873.0,
+            "100": 3709.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 854448640.0,
+            "52": 854448640.0,
+            "53": 854448640.0,
+            "54": 854448640.0,
+            "55": 854448640.0,
+            "56": 854448640.0,
+            "57": 854448640.0,
+            "58": 854448640.0,
+            "59": 854448640.0,
+            "60": 854448640.0,
+            "61": 854448640.0,
+            "62": 854448640.0,
+            "63": 854448640.0,
+            "64": 854448640.0,
+            "65": 854448640.0,
+            "66": 854448640.0,
+            "67": 854448640.0,
+            "68": 854448640.0,
+            "69": 854448640.0,
+            "70": 854448640.0,
+            "71": 854448640.0,
+            "72": 854448640.0,
+            "73": 854448640.0,
+            "74": 854448640.0,
+            "75": 854448640.0,
+            "76": 854448640.0,
+            "77": 854448640.0,
+            "78": 854448640.0,
+            "79": 854448640.0,
+            "80": 854448640.0,
+            "81": 854448640.0,
+            "82": 854448640.0,
+            "83": 854448640.0,
+            "84": 854448640.0,
+            "85": 854448640.0,
+            "86": 854448640.0,
+            "87": 854448640.0,
+            "88": 854448640.0,
+            "89": 854448640.0,
+            "90": 854448640.0,
+            "91": 854448640.0,
+            "92": 854448640.0,
+            "93": 854448640.0,
+            "94": 854448640.0,
+            "95": 854448640.0,
+            "96": 854448640.0,
+            "97": 854448640.0,
+            "98": 854448640.0,
+            "99": 854448640.0,
+            "100": 854448640.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 2677025280.0,
+            "52": 2677026304.0,
+            "53": 2677026304.0,
+            "54": 2677026304.0,
+            "55": 2677026304.0,
+            "56": 2677026304.0,
+            "57": 2677026304.0,
+            "58": 2677026304.0,
+            "59": 2677026304.0,
+            "60": 2677026304.0,
+            "61": 2677026304.0,
+            "62": 2677026304.0,
+            "63": 2677026304.0,
+            "64": 2677026304.0,
+            "65": 2677026304.0,
+            "66": 2677026304.0,
+            "67": 2677026304.0,
+            "68": 2677026304.0,
+            "69": 2677026304.0,
+            "70": 2677026304.0,
+            "71": 2677026304.0,
+            "72": 2677026304.0,
+            "73": 2677026304.0,
+            "74": 2677026304.0,
+            "75": 2677026304.0,
+            "76": 2677026304.0,
+            "77": 2677026304.0,
+            "78": 2677026304.0,
+            "79": 2677026304.0,
+            "80": 2677026304.0,
+            "81": 2677026304.0,
+            "82": 2677026304.0,
+            "83": 2677026304.0,
+            "84": 2677026304.0,
+            "85": 2677026304.0,
+            "86": 2677026304.0,
+            "87": 2677026304.0,
+            "88": 2677026304.0,
+            "89": 2677026304.0,
+            "90": 2677026304.0,
+            "91": 2677026304.0,
+            "92": 2677026304.0,
+            "93": 2677026304.0,
+            "94": 2677026304.0,
+            "95": 2677026304.0,
+            "96": 2677026304.0,
+            "97": 2677026304.0,
+            "98": 2677026304.0,
+            "99": 2677026304.0,
+            "100": 2677026304.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 4.22373,
+            "52": 0.16951,
+            "53": 0.15058,
+            "54": 0.15054,
+            "55": 0.14699,
+            "56": 0.14513,
+            "57": 0.14551,
+            "58": 0.14527,
+            "59": 0.14564,
+            "60": 0.1459,
+            "61": 0.14594,
+            "62": 0.14542,
+            "63": 0.14588,
+            "64": 0.14554,
+            "65": 0.14576,
+            "66": 0.14541,
+            "67": 0.14581,
+            "68": 0.1455,
+            "69": 0.14552,
+            "70": 0.14529,
+            "71": 0.14493,
+            "72": 0.14571,
+            "73": 0.14584,
+            "74": 0.14561,
+            "75": 0.1455,
+            "76": 0.1448,
+            "77": 0.14494,
+            "78": 0.14556,
+            "79": 0.14513,
+            "80": 0.14568,
+            "81": 0.14557,
+            "82": 0.14571,
+            "83": 0.14521,
+            "84": 0.14525,
+            "85": 0.14517,
+            "86": 0.14536,
+            "87": 0.14621,
+            "88": 0.14478,
+            "89": 0.14615,
+            "90": 0.14445,
+            "91": 0.14478,
+            "92": 0.14427,
+            "93": 0.14469,
+            "94": 0.14454,
+            "95": 0.14455,
+            "96": 0.14494,
+            "97": 0.14459,
+            "98": 0.14459,
+            "99": 0.14516,
+            "100": 0.14499
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/golden_values_dev_dgx_gb200.json
new file mode 100644
index 00000000000..51e39254e9a
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/golden_values_dev_dgx_gb200.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.8363,
+            "2": 10.83592,
+            "3": 10.83615,
+            "4": 10.79957,
+            "5": 10.84951,
+            "6": 10.86614,
+            "7": 10.82832,
+            "8": 10.83954,
+            "9": 10.84439,
+            "10": 10.80563,
+            "11": 10.87626,
+            "12": 10.8635,
+            "13": 10.87519,
+            "14": 10.88261,
+            "15": 10.8549,
+            "16": 10.84719,
+            "17": 10.84007,
+            "18": 10.85358,
+            "19": 10.86134,
+            "20": 10.8411,
+            "21": 10.85973,
+            "22": 10.79599,
+            "23": 10.88309,
+            "24": 10.81942,
+            "25": 10.8032,
+            "26": 10.81364,
+            "27": 10.83184,
+            "28": 10.8227,
+            "29": 10.84469,
+            "30": 10.73484,
+            "31": 10.61361,
+            "32": 10.76183,
+            "33": 10.75999,
+            "34": 10.64098,
+            "35": 10.63833,
+            "36": 10.59381,
+            "37": 10.66212,
+            "38": 10.56593,
+            "39": 10.67809,
+            "40": 10.47027,
+            "41": 10.49977,
+            "42": 10.53376,
+            "43": 10.26135,
+            "44": 10.33935,
+            "45": 10.24399,
+            "46": 10.21706,
+            "47": 10.42307,
+            "48": 10.21623,
+            "49": 9.96614,
+            "50": 10.22788,
+            "51": 10.18063,
+            "52": 10.07636,
+            "53": 10.32773,
+            "54": 10.23662,
+            "55": 10.17779,
+            "56": 9.93459,
+            "57": 9.79047,
+            "58": 10.09308,
+            "59": 9.88561,
+            "60": 9.79776,
+            "61": 9.94517,
+            "62": 10.19094,
+            "63": 9.64683,
+            "64": 9.98455,
+            "65": 9.23395,
+            "66": 9.90453,
+            "67": 9.59582,
+            "68": 9.97649,
+            "69": 9.97495,
+            "70": 9.91345,
+            "71": 9.81704,
+            "72": 9.7724,
+            "73": 9.6613,
+            "74": 9.13276,
+            "75": 9.5758,
+            "76": 9.25498,
+            "77": 10.18582,
+            "78": 9.86011,
+            "79": 9.51637,
+            "80": 9.54101,
+            "81": 9.61959,
+            "82": 9.8199,
+            "83": 9.45715,
+            "84": 9.53646,
+            "85": 9.73396,
+            "86": 9.19313,
+            "87": 9.70118,
+            "88": 9.85742,
+            "89": 9.71286,
+            "90": 9.92642,
+            "91": 9.46223,
+            "92": 9.46428,
+            "93": 9.20456,
+            "94": 8.93882,
+            "95": 9.61804,
+            "96": 9.62982,
+            "97": 9.40186,
+            "98": 9.76277,
+            "99": 9.00132,
+            "100": 9.50913
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 569591808.0,
+            "2": 569591808.0,
+            "3": 569591808.0,
+            "4": 569591808.0,
+            "5": 569591808.0,
+            "6": 569591808.0,
+            "7": 569591808.0,
+            "8": 569591808.0,
+            "9": 569591808.0,
+            "10": 569591808.0,
+            "11": 569591808.0,
+            "12": 569591808.0,
+            "13": 569591808.0,
+            "14": 569591808.0,
+            "15": 569591808.0,
+            "16": 569591808.0,
+            "17": 852351488.0,
+            "18": 852351488.0,
+            "19": 852351488.0,
+            "20": 852351488.0,
+            "21": 852351488.0,
+            "22": 852351488.0,
+            "23": 852351488.0,
+            "24": 852351488.0,
+            "25": 852351488.0,
+            "26": 852351488.0,
+            "27": 852351488.0,
+            "28": 852351488.0,
+            "29": 852351488.0,
+            "30": 852351488.0,
+            "31": 852351488.0,
+            "32": 852351488.0,
+            "33": 852351488.0,
+            "34": 852351488.0,
+            "35": 852351488.0,
+            "36": 852351488.0,
+            "37": 852351488.0,
+            "38": 852351488.0,
+            "39": 852351488.0,
+            "40": 852351488.0,
+            "41": 852351488.0,
+            "42": 852351488.0,
+            "43": 852351488.0,
+            "44": 852351488.0,
+            "45": 852351488.0,
+            "46": 852351488.0,
+            "47": 852351488.0,
+            "48": 852351488.0,
+            "49": 852351488.0,
+            "50": 852351488.0,
+            "51": 852351488.0,
+            "52": 852351488.0,
+            "53": 852351488.0,
+            "54": 852351488.0,
+            "55": 852351488.0,
+            "56": 852351488.0,
+            "57": 852351488.0,
+            "58": 852351488.0,
+            "59": 852351488.0,
+            "60": 852351488.0,
+            "61": 852351488.0,
+            "62": 852351488.0,
+            "63": 852351488.0,
+            "64": 852351488.0,
+            "65": 852351488.0,
+            "66": 852351488.0,
+            "67": 852351488.0,
+            "68": 852351488.0,
+            "69": 852351488.0,
+            "70": 852351488.0,
+            "71": 852351488.0,
+            "72": 852351488.0,
+            "73": 852351488.0,
+            "74": 852351488.0,
+            "75": 852351488.0,
+            "76": 852351488.0,
+            "77": 852351488.0,
+            "78": 852351488.0,
+            "79": 852351488.0,
+            "80": 852351488.0,
+            "81": 852351488.0,
+            "82": 852351488.0,
+            "83": 852351488.0,
+            "84": 852351488.0,
+            "85": 852351488.0,
+            "86": 852351488.0,
+            "87": 852351488.0,
+            "88": 852351488.0,
+            "89": 852351488.0,
+            "90": 852351488.0,
+            "91": 852351488.0,
+            "92": 852351488.0,
+            "93": 852351488.0,
+            "94": 852351488.0,
+            "95": 852351488.0,
+            "96": 852351488.0,
+            "97": 852351488.0,
+            "98": 852351488.0,
+            "99": 852351488.0,
+            "100": 852351488.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2394266112.0,
+            "2": 2394266624.0,
+            "3": 2394266624.0,
+            "4": 2394266624.0,
+            "5": 2394266624.0,
+            "6": 2394266624.0,
+            "7": 2394266624.0,
+            "8": 2394266624.0,
+            "9": 2394266624.0,
+            "10": 2394266624.0,
+            "11": 2394266624.0,
+            "12": 2394266624.0,
+            "13": 2394266624.0,
+            "14": 2394266624.0,
+            "15": 2394266624.0,
+            "16": 2394266624.0,
+            "17": 2394266624.0,
+            "18": 2677288448.0,
+            "19": 2677288448.0,
+            "20": 2677288448.0,
+            "21": 2677288448.0,
+            "22": 2677288448.0,
+            "23": 2677288448.0,
+            "24": 2677288448.0,
+            "25": 2677288448.0,
+            "26": 2677288448.0,
+            "27": 2677288448.0,
+            "28": 2677288448.0,
+            "29": 2677288448.0,
+            "30": 2677288448.0,
+            "31": 2677288448.0,
+            "32": 2677288448.0,
+            "33": 2677288448.0,
+            "34": 2677288448.0,
+            "35": 2677288448.0,
+            "36": 2677288448.0,
+            "37": 2677288448.0,
+            "38": 2677288448.0,
+            "39": 2677288448.0,
+            "40": 2677288448.0,
+            "41": 2677288448.0,
+            "42": 2677288448.0,
+            "43": 2677288448.0,
+            "44": 2677288448.0,
+            "45": 2677288448.0,
+            "46": 2677288448.0,
+            "47": 2677288448.0,
+            "48": 2677288448.0,
+            "49": 2677288448.0,
+            "50": 2677288448.0,
+            "51": 2677288448.0,
+            "52": 2677288448.0,
+            "53": 2677288448.0,
+            "54": 2677288448.0,
+            "55": 2677288448.0,
+            "56": 2677288448.0,
+            "57": 2677288448.0,
+            "58": 2677288448.0,
+            "59": 2677288448.0,
+            "60": 2677288448.0,
+            "61": 2677288448.0,
+            "62": 2677288448.0,
+            "63": 2677288448.0,
+            "64": 2677288448.0,
+            "65": 2677288448.0,
+            "66": 2677288448.0,
+            "67": 2677288448.0,
+            "68": 2677288448.0,
+            "69": 2677288448.0,
+            "70": 2677288448.0,
+            "71": 2677288448.0,
+            "72": 2677288448.0,
+            "73": 2677288448.0,
+            "74": 2677288448.0,
+            "75": 2677288448.0,
+            "76": 2677288448.0,
+            "77": 2677288448.0,
+            "78": 2677288448.0,
+            "79": 2677288448.0,
+            "80": 2677288448.0,
+            "81": 2677288448.0,
+            "82": 2677288448.0,
+            "83": 2677288448.0,
+            "84": 2677288448.0,
+            "85": 2677288448.0,
+            "86": 2677288448.0,
+            "87": 2677288448.0,
+            "88": 2677288448.0,
+            "89": 2677288448.0,
+            "90": 2677288448.0,
+            "91": 2677288448.0,
+            "92": 2677288448.0,
+            "93": 2677288448.0,
+            "94": 2677288448.0,
+            "95": 2677288448.0,
+            "96": 2677288448.0,
+            "97": 2677288448.0,
+            "98": 2677288448.0,
+            "99": 2677288448.0,
+            "100": 2677288448.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 7.92082,
+            "2": 0.15881,
+            "3": 0.1483,
+            "4": 0.13026,
+            "5": 0.247,
+            "6": 0.1291,
+            "7": 0.24882,
+            "8": 0.12936,
+            "9": 0.24806,
+            "10": 0.21162,
+            "11": 0.12953,
+            "12": 0.22212,
+            "13": 0.12944,
+            "14": 0.12909,
+            "15": 0.23074,
+            "16": 0.1288,
+            "17": 0.28155,
+            "18": 0.14734,
+            "19": 0.13796,
+            "20": 0.13594,
+            "21": 0.23092,
+            "22": 0.21716,
+            "23": 0.13639,
+            "24": 0.1359,
+            "25": 0.2221,
+            "26": 0.16419,
+            "27": 0.14414,
+            "28": 0.21146,
+            "29": 0.13469,
+            "30": 0.22441,
+            "31": 0.13661,
+            "32": 0.13647,
+            "33": 0.13579,
+            "34": 0.13549,
+            "35": 0.13504,
+            "36": 0.13513,
+            "37": 0.13527,
+            "38": 0.19634,
+            "39": 0.3711,
+            "40": 0.1353,
+            "41": 0.13666,
+            "42": 0.21568,
+            "43": 0.13653,
+            "44": 0.13523,
+            "45": 0.13504,
+            "46": 0.13584,
+            "47": 0.13676,
+            "48": 0.13449,
+            "49": 0.22259,
+            "50": 0.14061,
+            "51": 0.34203,
+            "52": 0.44673,
+            "53": 0.30462,
+            "54": 0.34485,
+            "55": 0.36971,
+            "56": 0.37478,
+            "57": 0.3581,
+            "58": 0.46665,
+            "59": 0.47512,
+            "60": 0.38197,
+            "61": 0.40684,
+            "62": 0.48548,
+            "63": 0.32955,
+            "64": 0.28002,
+            "65": 0.1858,
+            "66": 0.1488,
+            "67": 0.21555,
+            "68": 0.17819,
+            "69": 0.24009,
+            "70": 0.18827,
+            "71": 0.17896,
+            "72": 0.18197,
+            "73": 0.13026,
+            "74": 0.21407,
+            "75": 0.13008,
+            "76": 0.12912,
+            "77": 0.12908,
+            "78": 0.13051,
+            "79": 0.12938,
+            "80": 0.13039,
+            "81": 0.1314,
+            "82": 0.40745,
+            "83": 0.12931,
+            "84": 0.13085,
+            "85": 0.13025,
+            "86": 0.13101,
+            "87": 0.12901,
+            "88": 0.12981,
+            "89": 0.12874,
+            "90": 0.12891,
+            "91": 0.13086,
+            "92": 0.19117,
+            "93": 0.1298,
+            "94": 0.13035,
+            "95": 0.12884,
+            "96": 0.12875,
+            "97": 0.13072,
+            "98": 0.14893,
+            "99": 0.13089,
+            "100": 0.13044
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": 2382.0,
+            "18": 2453.0,
+            "19": 3160.0,
+            "20": 1803.0,
+            "21": 2176.0,
+            "22": "nan",
+            "23": 2602.0,
+            "24": 2269.0,
+            "25": 2273.0,
+            "26": 1994.0,
+            "27": 2158.0,
+            "28": 2596.0,
+            "29": 2482.0,
+            "30": 2432.0,
+            "31": 1881.0,
+            "32": 2727.0,
+            "33": 2329.0,
+            "34": 1979.0,
+            "35": 1953.0,
+            "36": 2152.0,
+            "37": 2620.0,
+            "38": 2256.0,
+            "39": 3092.0,
+            "40": 2087.0,
+            "41": 3218.0,
+            "42": 2436.0,
+            "43": 2553.0,
+            "44": 2101.0,
+            "45": 2479.0,
+            "46": 2236.0,
+            "47": 2903.0,
+            "48": 2483.0,
+            "49": 1893.0,
+            "50": 3008.0,
+            "51": 2281.0,
+            "52": 2534.0,
+            "53": 3604.0,
+            "54": 2989.0,
+            "55": 2624.0,
+            "56": 2547.0,
+            "57": 2287.0,
+            "58": 3322.0,
+            "59": 2730.0,
+            "60": 2919.0,
+            "61": 3007.0,
+            "62": 3131.0,
+            "63": 3226.0,
+            "64": 3219.0,
+            "65": 2422.0,
+            "66": 3741.0,
+            "67": 2805.0,
+            "68": 3215.0,
+            "69": 2871.0,
+            "70": 3597.0,
+            "71": 3045.0,
+            "72": 2952.0,
+            "73": 3559.0,
+            "74": 2232.0,
+            "75": 2889.0,
+            "76": 3802.0,
+            "77": 3635.0,
+            "78": 3762.0,
+            "79": 4000.0,
+            "80": 3383.0,
+            "81": 4629.0,
+            "82": 3435.0,
+            "83": 3254.0,
+            "84": 3786.0,
+            "85": 3895.0,
+            "86": 3338.0,
+            "87": 4169.0,
+            "88": 3498.0,
+            "89": 4065.0,
+            "90": 3825.0,
+            "91": 3040.0,
+            "92": 4399.0,
+            "93": 3899.0,
+            "94": 4449.0,
+            "95": 4017.0,
+            "96": 3820.0,
+            "97": 4268.0,
+            "98": 5094.0,
+            "99": 3940.0,
+            "100": 3369.0
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/golden_values_lts_dgx_a100.json
index 3b0a03dc6ef..2ea1feb19e0 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/golden_values_lts_dgx_a100.json
@@ -54,12 +54,12 @@
             "48": 10.18128,
             "49": 9.94311,
             "50": 10.21224,
-            "51": 10.16759,
-            "52": 10.06895,
+            "51": 10.16758,
+            "52": 10.06896,
             "53": 10.30707,
-            "54": 10.20911,
+            "54": 10.2091,
             "55": 10.15688,
-            "56": 9.91474,
+            "56": 9.91475,
             "57": 9.77696,
             "58": 10.07417,
             "59": 9.86333,
@@ -72,38 +72,38 @@
             "66": 9.88693,
             "67": 9.58363,
             "68": 9.94922,
-            "69": 9.95271,
+            "69": 9.9527,
             "70": 9.89312,
             "71": 9.77658,
             "72": 9.75435,
-            "73": 9.6497,
+            "73": 9.64969,
             "74": 9.1439,
             "75": 9.56121,
             "76": 9.25111,
             "77": 10.17063,
             "78": 9.85402,
             "79": 9.49965,
-            "80": 9.53086,
+            "80": 9.53087,
             "81": 9.60555,
             "82": 9.80179,
             "83": 9.43744,
             "84": 9.51987,
             "85": 9.7196,
-            "86": 9.18595,
+            "86": 9.18596,
             "87": 9.68687,
             "88": 9.8443,
             "89": 9.70586,
             "90": 9.89977,
             "91": 9.45029,
             "92": 9.45356,
-            "93": 9.18554,
+            "93": 9.18553,
             "94": 8.92968,
             "95": 9.59767,
             "96": 9.61491,
             "97": 9.39084,
-            "98": 9.75667,
-            "99": 8.97921,
-            "100": 9.49001
+            "98": 9.75668,
+            "99": 8.97922,
+            "100": 9.49
         }
     },
     "mem-allocated-bytes": {
@@ -220,7 +220,7 @@
         "values": {
             "1": 2393217536.0,
             "2": 2393218048.0,
-            "3": 2393218048.0,
+            "3": 2394266624.0,
             "4": 2394266624.0,
             "5": 2394266624.0,
             "6": 2394266624.0,
@@ -325,106 +325,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 10.58025,
-            "2": 0.18555,
-            "3": 0.31194,
-            "4": 0.1522,
-            "5": 0.15205,
-            "6": 0.1496,
-            "7": 0.14979,
-            "8": 0.14921,
-            "9": 0.14957,
-            "10": 0.15024,
-            "11": 0.14887,
-            "12": 0.14852,
-            "13": 0.14925,
-            "14": 0.15079,
-            "15": 0.14925,
-            "16": 0.14936,
-            "17": 0.2057,
-            "18": 0.15996,
-            "19": 0.15397,
-            "20": 0.15414,
-            "21": 0.1543,
-            "22": 0.15499,
-            "23": 0.15504,
-            "24": 0.15679,
-            "25": 0.15462,
-            "26": 0.15509,
-            "27": 0.15394,
-            "28": 0.15487,
-            "29": 0.15522,
-            "30": 0.1553,
-            "31": 0.15536,
-            "32": 0.15406,
-            "33": 0.15461,
-            "34": 0.1548,
-            "35": 0.15472,
-            "36": 0.15413,
-            "37": 0.1548,
-            "38": 0.15446,
-            "39": 0.15545,
-            "40": 0.15442,
-            "41": 0.15567,
-            "42": 0.15413,
-            "43": 0.15585,
-            "44": 0.15428,
-            "45": 0.15497,
-            "46": 0.15438,
-            "47": 0.15508,
-            "48": 0.15481,
-            "49": 0.15466,
-            "50": 0.15476,
-            "51": 0.16245,
-            "52": 0.15411,
-            "53": 0.15376,
-            "54": 0.15405,
-            "55": 0.15375,
-            "56": 0.15402,
-            "57": 0.15434,
-            "58": 0.15404,
-            "59": 0.15454,
-            "60": 0.15434,
-            "61": 0.15384,
-            "62": 0.15505,
-            "63": 0.15431,
-            "64": 0.15388,
-            "65": 0.1547,
-            "66": 0.15453,
-            "67": 0.15364,
-            "68": 0.15388,
-            "69": 0.15362,
-            "70": 0.15366,
-            "71": 0.15425,
-            "72": 0.15393,
-            "73": 0.15476,
-            "74": 0.15414,
-            "75": 0.15415,
-            "76": 0.1535,
-            "77": 0.15481,
-            "78": 0.1541,
-            "79": 0.15382,
-            "80": 0.15363,
-            "81": 0.15386,
-            "82": 0.18555,
-            "83": 0.15422,
-            "84": 0.15393,
-            "85": 0.15462,
-            "86": 0.15512,
-            "87": 0.15391,
-            "88": 0.15431,
-            "89": 0.15431,
-            "90": 0.15521,
-            "91": 0.15475,
-            "92": 0.154,
-            "93": 0.15414,
-            "94": 0.15426,
-            "95": 0.15422,
-            "96": 0.15393,
-            "97": 0.15497,
-            "98": 0.1538,
-            "99": 0.15481,
-            "100": 0.15442
+            "1": 4.04251,
+            "2": 0.18354,
+            "3": 0.16567,
+            "4": 0.14879,
+            "5": 0.14798,
+            "6": 0.14636,
+            "7": 0.14643,
+            "8": 0.14702,
+            "9": 0.14536,
+            "10": 0.1472,
+            "11": 0.1449,
+            "12": 0.14483,
+            "13": 0.14552,
+            "14": 0.14513,
+            "15": 0.14541,
+            "16": 0.14509,
+            "17": 0.19318,
+            "18": 0.15745,
+            "19": 0.15066,
+            "20": 0.1498,
+            "21": 0.15004,
+            "22": 0.15029,
+            "23": 0.15017,
+            "24": 0.15021,
+            "25": 0.14964,
+            "26": 0.15048,
+            "27": 0.15016,
+            "28": 0.15022,
+            "29": 0.15074,
+            "30": 0.15018,
+            "31": 0.15122,
+            "32": 0.15081,
+            "33": 0.1504,
+            "34": 0.15026,
+            "35": 0.15149,
+            "36": 0.14995,
+            "37": 0.1504,
+            "38": 0.15025,
+            "39": 0.15065,
+            "40": 0.14967,
+            "41": 0.15071,
+            "42": 0.1495,
+            "43": 0.15057,
+            "44": 0.14971,
+            "45": 0.14997,
+            "46": 0.14973,
+            "47": 0.14981,
+            "48": 0.14986,
+            "49": 0.15006,
+            "50": 0.14923,
+            "51": 0.15753,
+            "52": 0.1506,
+            "53": 0.14818,
+            "54": 0.14906,
+            "55": 0.14884,
+            "56": 0.14846,
+            "57": 0.1497,
+            "58": 0.14946,
+            "59": 0.14898,
+            "60": 0.14864,
+            "61": 0.14782,
+            "62": 0.14952,
+            "63": 0.14895,
+            "64": 0.14958,
+            "65": 0.14948,
+            "66": 0.14887,
+            "67": 0.1481,
+            "68": 0.14882,
+            "69": 0.14911,
+            "70": 0.15091,
+            "71": 0.14829,
+            "72": 0.15153,
+            "73": 0.14917,
+            "74": 0.1489,
+            "75": 0.14776,
+            "76": 0.14826,
+            "77": 0.1498,
+            "78": 0.14886,
+            "79": 0.14846,
+            "80": 0.14828,
+            "81": 0.14965,
+            "82": 0.14889,
+            "83": 0.1484,
+            "84": 0.14864,
+            "85": 0.14911,
+            "86": 0.14911,
+            "87": 0.14856,
+            "88": 0.14854,
+            "89": 0.1487,
+            "90": 0.14823,
+            "91": 0.15008,
+            "92": 0.14856,
+            "93": 0.14939,
+            "94": 0.14915,
+            "95": 0.14847,
+            "96": 0.1485,
+            "97": 0.14951,
+            "98": 0.14965,
+            "99": 0.14868,
+            "100": 0.14783
         }
     },
     "num-zeros": {
@@ -471,67 +471,67 @@
             "37": 2600.0,
             "38": 2350.0,
             "39": 2997.0,
-            "40": 2053.0,
-            "41": 3352.0,
-            "42": 2497.0,
-            "43": 2867.0,
-            "44": 2109.0,
-            "45": 2490.0,
-            "46": 2279.0,
-            "47": 3051.0,
-            "48": 2527.0,
-            "49": 1973.0,
-            "50": 2887.0,
-            "51": 2310.0,
-            "52": 2526.0,
-            "53": 3705.0,
-            "54": 2888.0,
-            "55": 2440.0,
-            "56": 2496.0,
-            "57": 2338.0,
-            "58": 3283.0,
-            "59": 2849.0,
-            "60": 2893.0,
-            "61": 2956.0,
-            "62": 3134.0,
-            "63": 3275.0,
-            "64": 3176.0,
-            "65": 2318.0,
-            "66": 3857.0,
-            "67": 2606.0,
-            "68": 3313.0,
-            "69": 2826.0,
-            "70": 3665.0,
-            "71": 3011.0,
-            "72": 2693.0,
-            "73": 3357.0,
-            "74": 2271.0,
-            "75": 2955.0,
-            "76": 3617.0,
-            "77": 3936.0,
-            "78": 3951.0,
-            "79": 4065.0,
-            "80": 3665.0,
-            "81": 5191.0,
-            "82": 3511.0,
-            "83": 3263.0,
-            "84": 3876.0,
-            "85": 4048.0,
-            "86": 3414.0,
-            "87": 3980.0,
-            "88": 3617.0,
-            "89": 4400.0,
-            "90": 3695.0,
-            "91": 2857.0,
-            "92": 4432.0,
-            "93": 3494.0,
-            "94": 4438.0,
-            "95": 4076.0,
-            "96": 3948.0,
-            "97": 4242.0,
-            "98": 4943.0,
-            "99": 3861.0,
-            "100": 3631.0
+            "40": 2042.0,
+            "41": 3349.0,
+            "42": 2512.0,
+            "43": 2750.0,
+            "44": 2120.0,
+            "45": 2537.0,
+            "46": 2247.0,
+            "47": 3061.0,
+            "48": 2520.0,
+            "49": 1969.0,
+            "50": 2951.0,
+            "51": 2300.0,
+            "52": 2456.0,
+            "53": 3730.0,
+            "54": 2866.0,
+            "55": 2413.0,
+            "56": 2477.0,
+            "57": 2410.0,
+            "58": 3424.0,
+            "59": 2861.0,
+            "60": 2939.0,
+            "61": 3044.0,
+            "62": 3127.0,
+            "63": 3236.0,
+            "64": 3212.0,
+            "65": 2304.0,
+            "66": 3805.0,
+            "67": 2691.0,
+            "68": 3332.0,
+            "69": 2874.0,
+            "70": 3746.0,
+            "71": 3057.0,
+            "72": 2717.0,
+            "73": 3332.0,
+            "74": 2214.0,
+            "75": 3059.0,
+            "76": 3625.0,
+            "77": 3957.0,
+            "78": 3955.0,
+            "79": 4130.0,
+            "80": 3627.0,
+            "81": 5242.0,
+            "82": 3566.0,
+            "83": 3261.0,
+            "84": 4036.0,
+            "85": 3907.0,
+            "86": 3340.0,
+            "87": 3954.0,
+            "88": 3630.0,
+            "89": 4358.0,
+            "90": 3800.0,
+            "91": 2877.0,
+            "92": 4239.0,
+            "93": 3604.0,
+            "94": 4356.0,
+            "95": 4107.0,
+            "96": 3835.0,
+            "97": 4094.0,
+            "98": 4835.0,
+            "99": 3873.0,
+            "100": 3727.0
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/golden_values_lts_dgx_a100_2nd.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/golden_values_lts_dgx_a100_2nd.json
new file mode 100644
index 00000000000..a37cec4df3f
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/golden_values_lts_dgx_a100_2nd.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 10.16758,
+            "52": 10.06896,
+            "53": 10.30707,
+            "54": 10.2091,
+            "55": 10.15688,
+            "56": 9.91475,
+            "57": 9.77696,
+            "58": 10.07417,
+            "59": 9.86333,
+            "60": 9.77328,
+            "61": 9.9292,
+            "62": 10.17156,
+            "63": 9.62041,
+            "64": 9.97113,
+            "65": 9.21979,
+            "66": 9.88693,
+            "67": 9.58363,
+            "68": 9.94922,
+            "69": 9.9527,
+            "70": 9.89312,
+            "71": 9.77658,
+            "72": 9.75435,
+            "73": 9.64969,
+            "74": 9.1439,
+            "75": 9.56121,
+            "76": 9.25111,
+            "77": 10.17063,
+            "78": 9.85402,
+            "79": 9.49965,
+            "80": 9.53087,
+            "81": 9.60555,
+            "82": 9.80179,
+            "83": 9.43744,
+            "84": 9.51987,
+            "85": 9.7196,
+            "86": 9.18596,
+            "87": 9.68687,
+            "88": 9.8443,
+            "89": 9.70586,
+            "90": 9.89977,
+            "91": 9.45029,
+            "92": 9.45356,
+            "93": 9.18553,
+            "94": 8.92968,
+            "95": 9.59767,
+            "96": 9.61491,
+            "97": 9.39084,
+            "98": 9.75668,
+            "99": 8.97922,
+            "100": 9.49
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 2300.0,
+            "52": 2456.0,
+            "53": 3730.0,
+            "54": 2866.0,
+            "55": 2413.0,
+            "56": 2477.0,
+            "57": 2410.0,
+            "58": 3424.0,
+            "59": 2861.0,
+            "60": 2939.0,
+            "61": 3044.0,
+            "62": 3127.0,
+            "63": 3236.0,
+            "64": 3212.0,
+            "65": 2304.0,
+            "66": 3805.0,
+            "67": 2691.0,
+            "68": 3332.0,
+            "69": 2874.0,
+            "70": 3746.0,
+            "71": 3057.0,
+            "72": 2717.0,
+            "73": 3332.0,
+            "74": 2214.0,
+            "75": 3059.0,
+            "76": 3625.0,
+            "77": 3957.0,
+            "78": 3955.0,
+            "79": 4130.0,
+            "80": 3627.0,
+            "81": 5242.0,
+            "82": 3566.0,
+            "83": 3261.0,
+            "84": 4036.0,
+            "85": 3907.0,
+            "86": 3340.0,
+            "87": 3954.0,
+            "88": 3630.0,
+            "89": 4358.0,
+            "90": 3800.0,
+            "91": 2877.0,
+            "92": 4239.0,
+            "93": 3604.0,
+            "94": 4356.0,
+            "95": 4107.0,
+            "96": 3835.0,
+            "97": 4094.0,
+            "98": 4835.0,
+            "99": 3873.0,
+            "100": 3727.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 854448640.0,
+            "52": 854448640.0,
+            "53": 854448640.0,
+            "54": 854448640.0,
+            "55": 854448640.0,
+            "56": 854448640.0,
+            "57": 854448640.0,
+            "58": 854448640.0,
+            "59": 854448640.0,
+            "60": 854448640.0,
+            "61": 854448640.0,
+            "62": 854448640.0,
+            "63": 854448640.0,
+            "64": 854448640.0,
+            "65": 854448640.0,
+            "66": 854448640.0,
+            "67": 854448640.0,
+            "68": 854448640.0,
+            "69": 854448640.0,
+            "70": 854448640.0,
+            "71": 854448640.0,
+            "72": 854448640.0,
+            "73": 854448640.0,
+            "74": 854448640.0,
+            "75": 854448640.0,
+            "76": 854448640.0,
+            "77": 854448640.0,
+            "78": 854448640.0,
+            "79": 854448640.0,
+            "80": 854448640.0,
+            "81": 854448640.0,
+            "82": 854448640.0,
+            "83": 854448640.0,
+            "84": 854448640.0,
+            "85": 854448640.0,
+            "86": 854448640.0,
+            "87": 854448640.0,
+            "88": 854448640.0,
+            "89": 854448640.0,
+            "90": 854448640.0,
+            "91": 854448640.0,
+            "92": 854448640.0,
+            "93": 854448640.0,
+            "94": 854448640.0,
+            "95": 854448640.0,
+            "96": 854448640.0,
+            "97": 854448640.0,
+            "98": 854448640.0,
+            "99": 854448640.0,
+            "100": 854448640.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 2677025280.0,
+            "52": 2677026304.0,
+            "53": 2677026304.0,
+            "54": 2677026304.0,
+            "55": 2677026304.0,
+            "56": 2677026304.0,
+            "57": 2677026304.0,
+            "58": 2677026304.0,
+            "59": 2677026304.0,
+            "60": 2677026304.0,
+            "61": 2677026304.0,
+            "62": 2677026304.0,
+            "63": 2677026304.0,
+            "64": 2677026304.0,
+            "65": 2677026304.0,
+            "66": 2677026304.0,
+            "67": 2677026304.0,
+            "68": 2677026304.0,
+            "69": 2677026304.0,
+            "70": 2677026304.0,
+            "71": 2677026304.0,
+            "72": 2677026304.0,
+            "73": 2677026304.0,
+            "74": 2677026304.0,
+            "75": 2677026304.0,
+            "76": 2677026304.0,
+            "77": 2677026304.0,
+            "78": 2677026304.0,
+            "79": 2677026304.0,
+            "80": 2677026304.0,
+            "81": 2677026304.0,
+            "82": 2677026304.0,
+            "83": 2677026304.0,
+            "84": 2677026304.0,
+            "85": 2677026304.0,
+            "86": 2677026304.0,
+            "87": 2677026304.0,
+            "88": 2677026304.0,
+            "89": 2677026304.0,
+            "90": 2677026304.0,
+            "91": 2677026304.0,
+            "92": 2677026304.0,
+            "93": 2677026304.0,
+            "94": 2677026304.0,
+            "95": 2677026304.0,
+            "96": 2677026304.0,
+            "97": 2677026304.0,
+            "98": 2677026304.0,
+            "99": 2677026304.0,
+            "100": 2677026304.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 3.87816,
+            "52": 0.16917,
+            "53": 0.15082,
+            "54": 0.15061,
+            "55": 0.14996,
+            "56": 0.14817,
+            "57": 0.1493,
+            "58": 0.14853,
+            "59": 0.14922,
+            "60": 0.14866,
+            "61": 0.14887,
+            "62": 0.14883,
+            "63": 0.14881,
+            "64": 0.14895,
+            "65": 0.14967,
+            "66": 0.14908,
+            "67": 0.1494,
+            "68": 0.14978,
+            "69": 0.15047,
+            "70": 0.1524,
+            "71": 0.14848,
+            "72": 0.14825,
+            "73": 0.14947,
+            "74": 0.14886,
+            "75": 0.14848,
+            "76": 0.14764,
+            "77": 0.14818,
+            "78": 0.14955,
+            "79": 0.14914,
+            "80": 0.14801,
+            "81": 0.14894,
+            "82": 0.14906,
+            "83": 0.14922,
+            "84": 0.14891,
+            "85": 0.14792,
+            "86": 0.14798,
+            "87": 0.14822,
+            "88": 0.14842,
+            "89": 0.14832,
+            "90": 0.14755,
+            "91": 0.1493,
+            "92": 0.14752,
+            "93": 0.14879,
+            "94": 0.14918,
+            "95": 0.15196,
+            "96": 0.1524,
+            "97": 0.14795,
+            "98": 0.14778,
+            "99": 0.14781,
+            "100": 0.14987
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_dev_dgx_gb200.json
new file mode 100644
index 00000000000..49586883019
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_dev_dgx_gb200.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.84064,
+            "2": 10.85201,
+            "3": 10.84256,
+            "4": 10.84663,
+            "5": 10.85667,
+            "6": 10.8655,
+            "7": 10.85455,
+            "8": 10.84814,
+            "9": 10.85295,
+            "10": 10.82026,
+            "11": 10.86468,
+            "12": 10.85604,
+            "13": 10.87584,
+            "14": 10.86361,
+            "15": 10.86365,
+            "16": 10.86053,
+            "17": 10.84579,
+            "18": 10.8538,
+            "19": 10.85943,
+            "20": 10.84139,
+            "21": 10.86327,
+            "22": 10.83014,
+            "23": 10.85749,
+            "24": 10.83816,
+            "25": 10.82517,
+            "26": 10.8257,
+            "27": 10.83038,
+            "28": 10.82029,
+            "29": 10.81214,
+            "30": 10.74061,
+            "31": 10.68185,
+            "32": 10.76069,
+            "33": 10.7491,
+            "34": 10.67394,
+            "35": 10.65529,
+            "36": 10.63303,
+            "37": 10.66285,
+            "38": 10.60535,
+            "39": 10.6732,
+            "40": 10.50952,
+            "41": 10.53339,
+            "42": 10.54981,
+            "43": 10.35084,
+            "44": 10.3993,
+            "45": 10.31307,
+            "46": 10.27398,
+            "47": 10.45772,
+            "48": 10.27942,
+            "49": 10.05213,
+            "50": 10.28011,
+            "51": 10.23426,
+            "52": 10.13488,
+            "53": 10.35279,
+            "54": 10.26189,
+            "55": 10.20983,
+            "56": 9.99599,
+            "57": 9.87962,
+            "58": 10.13391,
+            "59": 9.92304,
+            "60": 9.85379,
+            "61": 9.97314,
+            "62": 10.211,
+            "63": 9.70514,
+            "64": 10.01457,
+            "65": 9.30759,
+            "66": 9.9366,
+            "67": 9.63221,
+            "68": 9.98219,
+            "69": 9.98048,
+            "70": 9.92986,
+            "71": 9.81575,
+            "72": 9.79602,
+            "73": 9.69104,
+            "74": 9.20049,
+            "75": 9.61228,
+            "76": 9.28906,
+            "77": 10.19068,
+            "78": 9.86601,
+            "79": 9.53855,
+            "80": 9.5578,
+            "81": 9.63332,
+            "82": 9.82853,
+            "83": 9.47188,
+            "84": 9.54101,
+            "85": 9.74266,
+            "86": 9.2142,
+            "87": 9.7016,
+            "88": 9.86604,
+            "89": 9.72339,
+            "90": 9.92767,
+            "91": 9.47045,
+            "92": 9.46809,
+            "93": 9.21217,
+            "94": 8.94887,
+            "95": 9.62787,
+            "96": 9.6406,
+            "97": 9.40839,
+            "98": 9.77147,
+            "99": 9.00853,
+            "100": 9.51225
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 284527616.0,
+            "2": 284527616.0,
+            "3": 284527616.0,
+            "4": 284527616.0,
+            "5": 284527616.0,
+            "6": 284527616.0,
+            "7": 284527616.0,
+            "8": 284527616.0,
+            "9": 284527616.0,
+            "10": 284527616.0,
+            "11": 284527616.0,
+            "12": 284527616.0,
+            "13": 284527616.0,
+            "14": 284527616.0,
+            "15": 284527616.0,
+            "16": 416513536.0,
+            "17": 416513536.0,
+            "18": 416513536.0,
+            "19": 416513536.0,
+            "20": 416513536.0,
+            "21": 416513536.0,
+            "22": 416513536.0,
+            "23": 416513536.0,
+            "24": 416513536.0,
+            "25": 416513536.0,
+            "26": 416513536.0,
+            "27": 416513536.0,
+            "28": 416513536.0,
+            "29": 416513536.0,
+            "30": 416513536.0,
+            "31": 416513536.0,
+            "32": 416513536.0,
+            "33": 416513536.0,
+            "34": 416513536.0,
+            "35": 416513536.0,
+            "36": 416513536.0,
+            "37": 416513536.0,
+            "38": 416513536.0,
+            "39": 416513536.0,
+            "40": 416513536.0,
+            "41": 416513536.0,
+            "42": 416513536.0,
+            "43": 416513536.0,
+            "44": 416513536.0,
+            "45": 416513536.0,
+            "46": 416513536.0,
+            "47": 416513536.0,
+            "48": 416513536.0,
+            "49": 416513536.0,
+            "50": 416513536.0,
+            "51": 416513536.0,
+            "52": 416513536.0,
+            "53": 416513536.0,
+            "54": 416513536.0,
+            "55": 416513536.0,
+            "56": 416513536.0,
+            "57": 416513536.0,
+            "58": 416513536.0,
+            "59": 416513536.0,
+            "60": 416513536.0,
+            "61": 416513536.0,
+            "62": 416513536.0,
+            "63": 416513536.0,
+            "64": 416513536.0,
+            "65": 416513536.0,
+            "66": 416513536.0,
+            "67": 416513536.0,
+            "68": 416513536.0,
+            "69": 416513536.0,
+            "70": 416513536.0,
+            "71": 416513536.0,
+            "72": 416513536.0,
+            "73": 416513536.0,
+            "74": 416513536.0,
+            "75": 416513536.0,
+            "76": 416513536.0,
+            "77": 416513536.0,
+            "78": 416513536.0,
+            "79": 416513536.0,
+            "80": 416513536.0,
+            "81": 416513536.0,
+            "82": 416513536.0,
+            "83": 416513536.0,
+            "84": 416513536.0,
+            "85": 416513536.0,
+            "86": 416513536.0,
+            "87": 416513536.0,
+            "88": 416513536.0,
+            "89": 416513536.0,
+            "90": 416513536.0,
+            "91": 416513536.0,
+            "92": 416513536.0,
+            "93": 416513536.0,
+            "94": 416513536.0,
+            "95": 416513536.0,
+            "96": 416513536.0,
+            "97": 416513536.0,
+            "98": 416513536.0,
+            "99": 416513536.0,
+            "100": 416513536.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1465368064.0,
+            "2": 1465368576.0,
+            "3": 1465368576.0,
+            "4": 1465368576.0,
+            "5": 1465368576.0,
+            "6": 1465368576.0,
+            "7": 1465368576.0,
+            "8": 1465368576.0,
+            "9": 1465368576.0,
+            "10": 1465368576.0,
+            "11": 1465368576.0,
+            "12": 1465368576.0,
+            "13": 1465368576.0,
+            "14": 1465368576.0,
+            "15": 1465368576.0,
+            "16": 1465368576.0,
+            "17": 1597485568.0,
+            "18": 1597485568.0,
+            "19": 1597485568.0,
+            "20": 1597485568.0,
+            "21": 1597485568.0,
+            "22": 1597485568.0,
+            "23": 1597485568.0,
+            "24": 1597485568.0,
+            "25": 1597485568.0,
+            "26": 1597485568.0,
+            "27": 1597485568.0,
+            "28": 1597485568.0,
+            "29": 1597485568.0,
+            "30": 1597485568.0,
+            "31": 1597485568.0,
+            "32": 1597485568.0,
+            "33": 1597485568.0,
+            "34": 1597485568.0,
+            "35": 1597485568.0,
+            "36": 1597485568.0,
+            "37": 1597485568.0,
+            "38": 1597485568.0,
+            "39": 1597485568.0,
+            "40": 1597485568.0,
+            "41": 1597485568.0,
+            "42": 1597485568.0,
+            "43": 1597485568.0,
+            "44": 1597485568.0,
+            "45": 1597485568.0,
+            "46": 1597485568.0,
+            "47": 1597485568.0,
+            "48": 1597485568.0,
+            "49": 1597485568.0,
+            "50": 1597485568.0,
+            "51": 1597485568.0,
+            "52": 1597485568.0,
+            "53": 1597485568.0,
+            "54": 1597485568.0,
+            "55": 1597485568.0,
+            "56": 1597485568.0,
+            "57": 1597485568.0,
+            "58": 1597485568.0,
+            "59": 1597485568.0,
+            "60": 1597485568.0,
+            "61": 1597485568.0,
+            "62": 1597485568.0,
+            "63": 1597485568.0,
+            "64": 1597485568.0,
+            "65": 1597485568.0,
+            "66": 1597485568.0,
+            "67": 1597485568.0,
+            "68": 1597485568.0,
+            "69": 1597485568.0,
+            "70": 1597485568.0,
+            "71": 1597485568.0,
+            "72": 1597485568.0,
+            "73": 1597485568.0,
+            "74": 1597485568.0,
+            "75": 1597485568.0,
+            "76": 1597485568.0,
+            "77": 1597485568.0,
+            "78": 1597485568.0,
+            "79": 1597485568.0,
+            "80": 1597485568.0,
+            "81": 1597485568.0,
+            "82": 1597485568.0,
+            "83": 1597485568.0,
+            "84": 1597485568.0,
+            "85": 1597485568.0,
+            "86": 1597485568.0,
+            "87": 1597485568.0,
+            "88": 1597485568.0,
+            "89": 1597485568.0,
+            "90": 1597485568.0,
+            "91": 1597485568.0,
+            "92": 1597485568.0,
+            "93": 1597485568.0,
+            "94": 1597485568.0,
+            "95": 1597485568.0,
+            "96": 1597485568.0,
+            "97": 1597485568.0,
+            "98": 1597485568.0,
+            "99": 1597485568.0,
+            "100": 1597485568.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 4.10681,
+            "2": 0.30693,
+            "3": 0.30146,
+            "4": 0.29106,
+            "5": 0.29089,
+            "6": 0.29112,
+            "7": 0.29159,
+            "8": 0.29115,
+            "9": 0.29086,
+            "10": 0.63125,
+            "11": 0.2989,
+            "12": 0.29641,
+            "13": 0.29201,
+            "14": 0.29541,
+            "15": 0.29003,
+            "16": 0.36384,
+            "17": 0.29423,
+            "18": 0.30498,
+            "19": 0.30687,
+            "20": 0.30695,
+            "21": 0.30562,
+            "22": 0.29047,
+            "23": 0.30755,
+            "24": 0.30627,
+            "25": 0.65941,
+            "26": 0.30667,
+            "27": 0.31536,
+            "28": 0.30722,
+            "29": 0.30542,
+            "30": 0.30564,
+            "31": 0.3045,
+            "32": 0.30472,
+            "33": 0.30551,
+            "34": 0.30423,
+            "35": 0.3045,
+            "36": 0.30479,
+            "37": 0.30596,
+            "38": 0.30404,
+            "39": 0.30411,
+            "40": 0.30491,
+            "41": 0.3071,
+            "42": 0.30318,
+            "43": 0.30217,
+            "44": 0.30293,
+            "45": 0.3041,
+            "46": 0.30338,
+            "47": 0.3038,
+            "48": 0.30224,
+            "49": 0.30264,
+            "50": 0.3024,
+            "51": 0.36516,
+            "52": 0.42479,
+            "53": 0.43225,
+            "54": 0.37389,
+            "55": 0.34351,
+            "56": 0.66697,
+            "57": 0.30412,
+            "58": 0.30714,
+            "59": 0.31209,
+            "60": 0.33472,
+            "61": 0.36046,
+            "62": 0.39323,
+            "63": 0.4363,
+            "64": 0.46158,
+            "65": 0.43859,
+            "66": 0.3596,
+            "67": 0.34843,
+            "68": 0.69171,
+            "69": 0.35185,
+            "70": 0.34317,
+            "71": 0.34189,
+            "72": 0.3408,
+            "73": 0.34132,
+            "74": 0.33999,
+            "75": 0.33341,
+            "76": 0.339,
+            "77": 0.34005,
+            "78": 0.33524,
+            "79": 0.65413,
+            "80": 0.3407,
+            "81": 0.33061,
+            "82": 0.33345,
+            "83": 0.3333,
+            "84": 0.33362,
+            "85": 0.33251,
+            "86": 0.3337,
+            "87": 0.33386,
+            "88": 0.6509,
+            "89": 0.33263,
+            "90": 0.32972,
+            "91": 0.32543,
+            "92": 0.32519,
+            "93": 0.32484,
+            "94": 0.32156,
+            "95": 0.32526,
+            "96": 0.32111,
+            "97": 0.32404,
+            "98": 0.31936,
+            "99": 0.31881,
+            "100": 0.31797
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": 2392.0,
+            "17": "nan",
+            "18": 2314.0,
+            "19": 2912.0,
+            "20": 1640.0,
+            "21": 2053.0,
+            "22": "nan",
+            "23": 2462.0,
+            "24": 2226.0,
+            "25": 2201.0,
+            "26": 1963.0,
+            "27": 1926.0,
+            "28": 2401.0,
+            "29": 2492.0,
+            "30": 2393.0,
+            "31": 1704.0,
+            "32": 2541.0,
+            "33": 2096.0,
+            "34": 1737.0,
+            "35": 1810.0,
+            "36": 1982.0,
+            "37": 2511.0,
+            "38": 2185.0,
+            "39": 2899.0,
+            "40": 1888.0,
+            "41": 3169.0,
+            "42": 2343.0,
+            "43": 2501.0,
+            "44": 1938.0,
+            "45": 2346.0,
+            "46": 2091.0,
+            "47": 2853.0,
+            "48": 2402.0,
+            "49": 1810.0,
+            "50": 2718.0,
+            "51": 2080.0,
+            "52": 2200.0,
+            "53": 3412.0,
+            "54": 2641.0,
+            "55": 2229.0,
+            "56": 2244.0,
+            "57": 2057.0,
+            "58": 3223.0,
+            "59": 2431.0,
+            "60": 2650.0,
+            "61": 2712.0,
+            "62": 2995.0,
+            "63": 2816.0,
+            "64": 2860.0,
+            "65": 2015.0,
+            "66": 3176.0,
+            "67": 2529.0,
+            "68": 3108.0,
+            "69": 2873.0,
+            "70": 3540.0,
+            "71": 2904.0,
+            "72": 2693.0,
+            "73": 3253.0,
+            "74": 1981.0,
+            "75": 2780.0,
+            "76": 3465.0,
+            "77": 3649.0,
+            "78": 3593.0,
+            "79": 3981.0,
+            "80": 3458.0,
+            "81": 5181.0,
+            "82": 3334.0,
+            "83": 2956.0,
+            "84": 3527.0,
+            "85": 3711.0,
+            "86": 3209.0,
+            "87": 4133.0,
+            "88": 3443.0,
+            "89": 4295.0,
+            "90": 3801.0,
+            "91": 2958.0,
+            "92": 4311.0,
+            "93": 3544.0,
+            "94": 4264.0,
+            "95": 4042.0,
+            "96": 3849.0,
+            "97": 3974.0,
+            "98": 4971.0,
+            "99": 4071.0,
+            "100": 3363.0
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_dev_dgx_h100.json
index 47fa63fad72..7d93101382f 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_dev_dgx_h100.json
@@ -78,22 +78,22 @@
             "72": 9.798,
             "73": 9.68454,
             "74": 9.19951,
-            "75": 9.60518,
-            "76": 9.27791,
-            "77": 10.19437,
+            "75": 9.60519,
+            "76": 9.2779,
+            "77": 10.19436,
             "78": 9.8671,
             "79": 9.53341,
             "80": 9.56341,
             "81": 9.63047,
             "82": 9.82819,
             "83": 9.46388,
-            "84": 9.53736,
-            "85": 9.74561,
+            "84": 9.53735,
+            "85": 9.74562,
             "86": 9.21332,
-            "87": 9.7014,
+            "87": 9.70141,
             "88": 9.86621,
             "89": 9.72242,
-            "90": 9.92089,
+            "90": 9.9209,
             "91": 9.47178,
             "92": 9.46996,
             "93": 9.20589,
@@ -234,90 +234,90 @@
             "14": 1465368576.0,
             "15": 1465368576.0,
             "16": 1465368576.0,
-            "17": 1597092352.0,
-            "18": 1597092352.0,
-            "19": 1597092352.0,
-            "20": 1597092352.0,
-            "21": 1597092352.0,
-            "22": 1597092352.0,
-            "23": 1597092352.0,
-            "24": 1597092352.0,
-            "25": 1597092352.0,
-            "26": 1597092352.0,
-            "27": 1597092352.0,
-            "28": 1597092352.0,
-            "29": 1597092352.0,
-            "30": 1597092352.0,
-            "31": 1597092352.0,
-            "32": 1597092352.0,
-            "33": 1597092352.0,
-            "34": 1597092352.0,
-            "35": 1597092352.0,
-            "36": 1597092352.0,
-            "37": 1597092352.0,
-            "38": 1597092352.0,
-            "39": 1597092352.0,
-            "40": 1597092352.0,
-            "41": 1597092352.0,
-            "42": 1597092352.0,
-            "43": 1597092352.0,
-            "44": 1597092352.0,
-            "45": 1597092352.0,
-            "46": 1597092352.0,
-            "47": 1597092352.0,
-            "48": 1597092352.0,
-            "49": 1597092352.0,
-            "50": 1597092352.0,
-            "51": 1597092352.0,
-            "52": 1597092352.0,
-            "53": 1597092352.0,
-            "54": 1597092352.0,
-            "55": 1597092352.0,
-            "56": 1597092352.0,
-            "57": 1597092352.0,
-            "58": 1597092352.0,
-            "59": 1597092352.0,
-            "60": 1597092352.0,
-            "61": 1597092352.0,
-            "62": 1597092352.0,
-            "63": 1597092352.0,
-            "64": 1597092352.0,
-            "65": 1597092352.0,
-            "66": 1597092352.0,
-            "67": 1597092352.0,
-            "68": 1597092352.0,
-            "69": 1597092352.0,
-            "70": 1597092352.0,
-            "71": 1597092352.0,
-            "72": 1597092352.0,
-            "73": 1597092352.0,
-            "74": 1597092352.0,
-            "75": 1597092352.0,
-            "76": 1597092352.0,
-            "77": 1597092352.0,
-            "78": 1597092352.0,
-            "79": 1597092352.0,
-            "80": 1597092352.0,
-            "81": 1597092352.0,
-            "82": 1597092352.0,
-            "83": 1597092352.0,
-            "84": 1597092352.0,
-            "85": 1597092352.0,
-            "86": 1597092352.0,
-            "87": 1597092352.0,
-            "88": 1597092352.0,
-            "89": 1597092352.0,
-            "90": 1597092352.0,
-            "91": 1597092352.0,
-            "92": 1597092352.0,
-            "93": 1597092352.0,
-            "94": 1597092352.0,
-            "95": 1597092352.0,
-            "96": 1597092352.0,
-            "97": 1597092352.0,
-            "98": 1597092352.0,
-            "99": 1597092352.0,
-            "100": 1597092352.0
+            "17": 1597485568.0,
+            "18": 1597485568.0,
+            "19": 1597485568.0,
+            "20": 1597485568.0,
+            "21": 1597485568.0,
+            "22": 1597485568.0,
+            "23": 1597485568.0,
+            "24": 1597485568.0,
+            "25": 1597485568.0,
+            "26": 1597485568.0,
+            "27": 1597485568.0,
+            "28": 1597485568.0,
+            "29": 1597485568.0,
+            "30": 1597485568.0,
+            "31": 1597485568.0,
+            "32": 1597485568.0,
+            "33": 1597485568.0,
+            "34": 1597485568.0,
+            "35": 1597485568.0,
+            "36": 1597485568.0,
+            "37": 1597485568.0,
+            "38": 1597485568.0,
+            "39": 1597485568.0,
+            "40": 1597485568.0,
+            "41": 1597485568.0,
+            "42": 1597485568.0,
+            "43": 1597485568.0,
+            "44": 1597485568.0,
+            "45": 1597485568.0,
+            "46": 1597485568.0,
+            "47": 1597485568.0,
+            "48": 1597485568.0,
+            "49": 1597485568.0,
+            "50": 1597485568.0,
+            "51": 1597485568.0,
+            "52": 1597485568.0,
+            "53": 1597485568.0,
+            "54": 1597485568.0,
+            "55": 1597485568.0,
+            "56": 1597485568.0,
+            "57": 1597485568.0,
+            "58": 1597485568.0,
+            "59": 1597485568.0,
+            "60": 1597485568.0,
+            "61": 1597485568.0,
+            "62": 1597485568.0,
+            "63": 1597485568.0,
+            "64": 1597485568.0,
+            "65": 1597485568.0,
+            "66": 1597485568.0,
+            "67": 1597485568.0,
+            "68": 1597485568.0,
+            "69": 1597485568.0,
+            "70": 1597485568.0,
+            "71": 1597485568.0,
+            "72": 1597485568.0,
+            "73": 1597485568.0,
+            "74": 1597485568.0,
+            "75": 1597485568.0,
+            "76": 1597485568.0,
+            "77": 1597485568.0,
+            "78": 1597485568.0,
+            "79": 1597485568.0,
+            "80": 1597485568.0,
+            "81": 1597485568.0,
+            "82": 1597485568.0,
+            "83": 1597485568.0,
+            "84": 1597485568.0,
+            "85": 1597485568.0,
+            "86": 1597485568.0,
+            "87": 1597485568.0,
+            "88": 1597485568.0,
+            "89": 1597485568.0,
+            "90": 1597485568.0,
+            "91": 1597485568.0,
+            "92": 1597485568.0,
+            "93": 1597485568.0,
+            "94": 1597485568.0,
+            "95": 1597485568.0,
+            "96": 1597485568.0,
+            "97": 1597485568.0,
+            "98": 1597485568.0,
+            "99": 1597485568.0,
+            "100": 1597485568.0
         }
     },
     "iteration-time": {
@@ -325,106 +325,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 7.02035,
-            "2": 0.23195,
-            "3": 0.20851,
-            "4": 0.20697,
-            "5": 0.20737,
-            "6": 0.20888,
-            "7": 0.2126,
-            "8": 0.21169,
-            "9": 0.21057,
-            "10": 0.21255,
-            "11": 0.21108,
-            "12": 0.21506,
-            "13": 0.21085,
-            "14": 0.21072,
-            "15": 0.20967,
-            "16": 0.28325,
-            "17": 0.21485,
-            "18": 0.21984,
-            "19": 0.22277,
-            "20": 0.22004,
-            "21": 0.2242,
-            "22": 0.21349,
-            "23": 0.22346,
-            "24": 0.22444,
-            "25": 0.22521,
-            "26": 0.22267,
-            "27": 0.22592,
-            "28": 0.22136,
-            "29": 0.22802,
-            "30": 0.2227,
-            "31": 0.22084,
-            "32": 0.22099,
-            "33": 0.22019,
-            "34": 0.22336,
-            "35": 0.23024,
-            "36": 0.23188,
-            "37": 0.21929,
-            "38": 0.22277,
-            "39": 0.22303,
-            "40": 0.22269,
-            "41": 0.22539,
-            "42": 0.22835,
-            "43": 0.22379,
-            "44": 0.22103,
-            "45": 0.21919,
-            "46": 0.22653,
-            "47": 0.21996,
-            "48": 0.22399,
-            "49": 0.22202,
-            "50": 0.22099,
-            "51": 0.21773,
-            "52": 0.22165,
-            "53": 0.2208,
-            "54": 0.22241,
-            "55": 0.22007,
-            "56": 0.22113,
-            "57": 0.22282,
-            "58": 0.22209,
-            "59": 0.22153,
-            "60": 0.22251,
-            "61": 0.22383,
-            "62": 0.22477,
-            "63": 0.22389,
-            "64": 0.22518,
-            "65": 0.22491,
-            "66": 0.22204,
-            "67": 0.23149,
-            "68": 0.22301,
-            "69": 0.2298,
-            "70": 0.23059,
-            "71": 0.22412,
-            "72": 0.21788,
-            "73": 0.2209,
-            "74": 0.22227,
-            "75": 0.22603,
-            "76": 0.22022,
-            "77": 0.22045,
-            "78": 0.22051,
-            "79": 0.22157,
-            "80": 0.22544,
-            "81": 0.22703,
-            "82": 0.23226,
-            "83": 0.23535,
-            "84": 0.22503,
-            "85": 0.21869,
-            "86": 0.21989,
-            "87": 0.21782,
-            "88": 0.22296,
-            "89": 0.24294,
-            "90": 0.27356,
-            "91": 0.2182,
-            "92": 0.22138,
-            "93": 0.21695,
-            "94": 0.22172,
-            "95": 0.21947,
-            "96": 0.21792,
-            "97": 0.22243,
-            "98": 0.21902,
-            "99": 0.2202,
-            "100": 0.22043
+            "1": 7.98979,
+            "2": 0.23108,
+            "3": 0.20672,
+            "4": 0.19092,
+            "5": 0.18929,
+            "6": 0.18601,
+            "7": 0.18145,
+            "8": 0.1825,
+            "9": 0.18096,
+            "10": 0.17945,
+            "11": 0.18072,
+            "12": 0.18215,
+            "13": 0.18198,
+            "14": 0.18069,
+            "15": 0.18115,
+            "16": 0.26838,
+            "17": 0.1891,
+            "18": 0.18758,
+            "19": 0.1866,
+            "20": 0.193,
+            "21": 0.19158,
+            "22": 0.18199,
+            "23": 0.19182,
+            "24": 0.18937,
+            "25": 0.19172,
+            "26": 0.19541,
+            "27": 0.19359,
+            "28": 0.18942,
+            "29": 0.18922,
+            "30": 0.19555,
+            "31": 0.18932,
+            "32": 0.18729,
+            "33": 0.18652,
+            "34": 0.18698,
+            "35": 0.18671,
+            "36": 0.19043,
+            "37": 0.18639,
+            "38": 0.1876,
+            "39": 0.18889,
+            "40": 0.18979,
+            "41": 0.18978,
+            "42": 0.1917,
+            "43": 0.1905,
+            "44": 0.18866,
+            "45": 0.18792,
+            "46": 0.18874,
+            "47": 0.18981,
+            "48": 0.18652,
+            "49": 0.18751,
+            "50": 0.18675,
+            "51": 0.19039,
+            "52": 0.19014,
+            "53": 0.18825,
+            "54": 0.18861,
+            "55": 0.18671,
+            "56": 0.1887,
+            "57": 0.18709,
+            "58": 0.18833,
+            "59": 0.18683,
+            "60": 0.18818,
+            "61": 0.18735,
+            "62": 0.18776,
+            "63": 0.18826,
+            "64": 0.18823,
+            "65": 0.1891,
+            "66": 0.18962,
+            "67": 0.19168,
+            "68": 0.18718,
+            "69": 0.18647,
+            "70": 0.18731,
+            "71": 0.18749,
+            "72": 0.18696,
+            "73": 0.18682,
+            "74": 0.18953,
+            "75": 0.18603,
+            "76": 0.18491,
+            "77": 0.18695,
+            "78": 0.19298,
+            "79": 0.19006,
+            "80": 0.1864,
+            "81": 0.18786,
+            "82": 0.19211,
+            "83": 0.18632,
+            "84": 0.19075,
+            "85": 0.18575,
+            "86": 0.21258,
+            "87": 0.20475,
+            "88": 0.18504,
+            "89": 0.18486,
+            "90": 0.18505,
+            "91": 0.18427,
+            "92": 0.18546,
+            "93": 0.20396,
+            "94": 0.18728,
+            "95": 0.18571,
+            "96": 0.18504,
+            "97": 0.18668,
+            "98": 0.18684,
+            "99": 0.18604,
+            "100": 0.18586
         }
     },
     "num-zeros": {
@@ -506,32 +506,32 @@
             "72": 2640.0,
             "73": 3199.0,
             "74": 2084.0,
-            "75": 2809.0,
-            "76": 3599.0,
-            "77": 3667.0,
-            "78": 3680.0,
-            "79": 3972.0,
-            "80": 3365.0,
-            "81": 5042.0,
-            "82": 3291.0,
-            "83": 3016.0,
-            "84": 3592.0,
-            "85": 3792.0,
-            "86": 3192.0,
-            "87": 4219.0,
-            "88": 3376.0,
-            "89": 4110.0,
-            "90": 3939.0,
-            "91": 2912.0,
-            "92": 4114.0,
-            "93": 3499.0,
-            "94": 4339.0,
-            "95": 3829.0,
-            "96": 3875.0,
-            "97": 4100.0,
-            "98": 4889.0,
-            "99": 3771.0,
-            "100": 3390.0
+            "75": 2823.0,
+            "76": 3490.0,
+            "77": 3710.0,
+            "78": 3619.0,
+            "79": 3911.0,
+            "80": 3431.0,
+            "81": 4963.0,
+            "82": 3460.0,
+            "83": 3062.0,
+            "84": 3593.0,
+            "85": 3752.0,
+            "86": 3255.0,
+            "87": 4096.0,
+            "88": 3272.0,
+            "89": 4074.0,
+            "90": 3810.0,
+            "91": 2877.0,
+            "92": 4080.0,
+            "93": 3469.0,
+            "94": 4428.0,
+            "95": 3850.0,
+            "96": 3832.0,
+            "97": 4102.0,
+            "98": 4833.0,
+            "99": 3795.0,
+            "100": 3405.0
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_dev_dgx_h100_2nd.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_dev_dgx_h100_2nd.json
new file mode 100644
index 00000000000..7b47664603b
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_dev_dgx_h100_2nd.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 10.23471,
+            "52": 10.13764,
+            "53": 10.34797,
+            "54": 10.26738,
+            "55": 10.20734,
+            "56": 9.99527,
+            "57": 9.89333,
+            "58": 10.13452,
+            "59": 9.92856,
+            "60": 9.8551,
+            "61": 9.98264,
+            "62": 10.20686,
+            "63": 9.70842,
+            "64": 10.01687,
+            "65": 9.30409,
+            "66": 9.93326,
+            "67": 9.62677,
+            "68": 9.98429,
+            "69": 9.9755,
+            "70": 9.93956,
+            "71": 9.81005,
+            "72": 9.798,
+            "73": 9.68454,
+            "74": 9.19951,
+            "75": 9.60519,
+            "76": 9.2779,
+            "77": 10.19436,
+            "78": 9.8671,
+            "79": 9.53341,
+            "80": 9.56341,
+            "81": 9.63047,
+            "82": 9.82819,
+            "83": 9.46388,
+            "84": 9.53735,
+            "85": 9.74562,
+            "86": 9.21332,
+            "87": 9.70141,
+            "88": 9.86621,
+            "89": 9.72242,
+            "90": 9.9209,
+            "91": 9.47178,
+            "92": 9.46996,
+            "93": 9.20589,
+            "94": 8.94772,
+            "95": 9.60815,
+            "96": 9.63635,
+            "97": 9.4138,
+            "98": 9.77274,
+            "99": 8.9958,
+            "100": 9.50415
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 2041.0,
+            "52": 2226.0,
+            "53": 3222.0,
+            "54": 2784.0,
+            "55": 2290.0,
+            "56": 2428.0,
+            "57": 2146.0,
+            "58": 3048.0,
+            "59": 2504.0,
+            "60": 2612.0,
+            "61": 2623.0,
+            "62": 3003.0,
+            "63": 2762.0,
+            "64": 2917.0,
+            "65": 2104.0,
+            "66": 3550.0,
+            "67": 2433.0,
+            "68": 3146.0,
+            "69": 2877.0,
+            "70": 3528.0,
+            "71": 2983.0,
+            "72": 2640.0,
+            "73": 3199.0,
+            "74": 2084.0,
+            "75": 2823.0,
+            "76": 3490.0,
+            "77": 3710.0,
+            "78": 3619.0,
+            "79": 3911.0,
+            "80": 3431.0,
+            "81": 4963.0,
+            "82": 3460.0,
+            "83": 3062.0,
+            "84": 3593.0,
+            "85": 3752.0,
+            "86": 3255.0,
+            "87": 4096.0,
+            "88": 3272.0,
+            "89": 4074.0,
+            "90": 3810.0,
+            "91": 2877.0,
+            "92": 4080.0,
+            "93": 3469.0,
+            "94": 4428.0,
+            "95": 3850.0,
+            "96": 3832.0,
+            "97": 4102.0,
+            "98": 4833.0,
+            "99": 3795.0,
+            "100": 3405.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 414416384.0,
+            "52": 414416384.0,
+            "53": 414416384.0,
+            "54": 414416384.0,
+            "55": 414416384.0,
+            "56": 414416384.0,
+            "57": 414416384.0,
+            "58": 414416384.0,
+            "59": 414416384.0,
+            "60": 414416384.0,
+            "61": 414416384.0,
+            "62": 414416384.0,
+            "63": 414416384.0,
+            "64": 414416384.0,
+            "65": 414416384.0,
+            "66": 414416384.0,
+            "67": 414416384.0,
+            "68": 414416384.0,
+            "69": 414416384.0,
+            "70": 414416384.0,
+            "71": 414416384.0,
+            "72": 414416384.0,
+            "73": 414416384.0,
+            "74": 414416384.0,
+            "75": 414416384.0,
+            "76": 414416384.0,
+            "77": 414416384.0,
+            "78": 414416384.0,
+            "79": 414416384.0,
+            "80": 414416384.0,
+            "81": 414416384.0,
+            "82": 414416384.0,
+            "83": 414416384.0,
+            "84": 414416384.0,
+            "85": 414416384.0,
+            "86": 414416384.0,
+            "87": 414416384.0,
+            "88": 414416384.0,
+            "89": 414416384.0,
+            "90": 414416384.0,
+            "91": 414416384.0,
+            "92": 414416384.0,
+            "93": 414416384.0,
+            "94": 414416384.0,
+            "95": 414416384.0,
+            "96": 414416384.0,
+            "97": 414416384.0,
+            "98": 414416384.0,
+            "99": 414416384.0,
+            "100": 414416384.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 1595256320.0,
+            "52": 1595257344.0,
+            "53": 1595257344.0,
+            "54": 1595257344.0,
+            "55": 1595257344.0,
+            "56": 1595257344.0,
+            "57": 1595257344.0,
+            "58": 1595257344.0,
+            "59": 1595257344.0,
+            "60": 1595257344.0,
+            "61": 1595257344.0,
+            "62": 1595257344.0,
+            "63": 1595257344.0,
+            "64": 1595257344.0,
+            "65": 1595257344.0,
+            "66": 1595257344.0,
+            "67": 1595257344.0,
+            "68": 1595257344.0,
+            "69": 1595257344.0,
+            "70": 1595257344.0,
+            "71": 1595257344.0,
+            "72": 1595257344.0,
+            "73": 1595257344.0,
+            "74": 1595257344.0,
+            "75": 1595257344.0,
+            "76": 1595257344.0,
+            "77": 1595257344.0,
+            "78": 1595257344.0,
+            "79": 1595257344.0,
+            "80": 1595257344.0,
+            "81": 1595257344.0,
+            "82": 1595257344.0,
+            "83": 1595257344.0,
+            "84": 1595257344.0,
+            "85": 1595257344.0,
+            "86": 1595257344.0,
+            "87": 1595257344.0,
+            "88": 1595257344.0,
+            "89": 1595257344.0,
+            "90": 1595257344.0,
+            "91": 1595257344.0,
+            "92": 1595257344.0,
+            "93": 1595257344.0,
+            "94": 1595257344.0,
+            "95": 1595257344.0,
+            "96": 1595257344.0,
+            "97": 1595257344.0,
+            "98": 1595257344.0,
+            "99": 1595257344.0,
+            "100": 1595257344.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 7.99273,
+            "52": 0.20702,
+            "53": 0.18803,
+            "54": 0.18787,
+            "55": 0.1866,
+            "56": 0.18751,
+            "57": 0.18635,
+            "58": 0.18849,
+            "59": 0.18718,
+            "60": 0.18823,
+            "61": 0.18622,
+            "62": 0.19151,
+            "63": 0.19068,
+            "64": 0.18896,
+            "65": 0.18832,
+            "66": 0.18702,
+            "67": 0.18769,
+            "68": 0.18735,
+            "69": 0.18995,
+            "70": 0.19784,
+            "71": 0.1874,
+            "72": 0.18733,
+            "73": 0.18637,
+            "74": 0.18906,
+            "75": 0.19094,
+            "76": 0.19187,
+            "77": 0.19634,
+            "78": 0.1905,
+            "79": 0.19691,
+            "80": 0.18976,
+            "81": 0.18665,
+            "82": 0.18674,
+            "83": 0.18876,
+            "84": 0.21124,
+            "85": 0.1987,
+            "86": 0.19646,
+            "87": 0.18856,
+            "88": 0.18762,
+            "89": 0.18822,
+            "90": 0.18715,
+            "91": 0.18811,
+            "92": 0.1855,
+            "93": 0.18748,
+            "94": 0.1861,
+            "95": 0.1881,
+            "96": 0.18638,
+            "97": 0.18739,
+            "98": 0.18684,
+            "99": 0.18679,
+            "100": 0.18562
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_lts_dgx_a100.json
index 9f83249318a..4c3d06e5e64 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_lts_dgx_a100.json
@@ -218,22 +218,22 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 1465367040.0,
-            "2": 1465367552.0,
-            "3": 1465367552.0,
-            "4": 1465367552.0,
-            "5": 1465367552.0,
-            "6": 1465367552.0,
-            "7": 1465367552.0,
-            "8": 1465367552.0,
-            "9": 1465367552.0,
-            "10": 1465367552.0,
-            "11": 1465367552.0,
-            "12": 1465367552.0,
-            "13": 1465367552.0,
-            "14": 1465367552.0,
-            "15": 1465367552.0,
-            "16": 1465367552.0,
+            "1": 1465368064.0,
+            "2": 1465368576.0,
+            "3": 1465368576.0,
+            "4": 1465368576.0,
+            "5": 1465368576.0,
+            "6": 1465368576.0,
+            "7": 1465368576.0,
+            "8": 1465368576.0,
+            "9": 1465368576.0,
+            "10": 1465368576.0,
+            "11": 1465368576.0,
+            "12": 1465368576.0,
+            "13": 1465368576.0,
+            "14": 1465368576.0,
+            "15": 1465368576.0,
+            "16": 1465368576.0,
             "17": 1597485568.0,
             "18": 1597485568.0,
             "19": 1597485568.0,
@@ -325,106 +325,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 3.81628,
-            "2": 0.32142,
-            "3": 0.27555,
-            "4": 0.28299,
-            "5": 0.28901,
-            "6": 0.28043,
-            "7": 0.29138,
-            "8": 0.30944,
-            "9": 0.28461,
-            "10": 0.28789,
-            "11": 0.28709,
-            "12": 0.29186,
-            "13": 0.29114,
-            "14": 0.29464,
-            "15": 0.31626,
-            "16": 0.48847,
-            "17": 0.28436,
-            "18": 0.30264,
-            "19": 0.29287,
-            "20": 0.30599,
-            "21": 0.29335,
-            "22": 0.27957,
-            "23": 0.29491,
-            "24": 0.29371,
-            "25": 0.29398,
-            "26": 0.29344,
-            "27": 0.29457,
-            "28": 0.29449,
-            "29": 0.29412,
-            "30": 0.29337,
-            "31": 0.29404,
-            "32": 0.29391,
-            "33": 0.29483,
-            "34": 0.29389,
-            "35": 0.29433,
-            "36": 0.29449,
-            "37": 0.29463,
-            "38": 0.29428,
-            "39": 0.29385,
-            "40": 0.29379,
-            "41": 0.29345,
-            "42": 0.29404,
-            "43": 0.29413,
-            "44": 0.29357,
-            "45": 0.29308,
-            "46": 0.29302,
-            "47": 0.29311,
-            "48": 0.29341,
-            "49": 0.2946,
-            "50": 0.29365,
-            "51": 0.29978,
-            "52": 0.31599,
-            "53": 0.29361,
-            "54": 0.29341,
-            "55": 0.29321,
-            "56": 0.29262,
-            "57": 0.29474,
-            "58": 0.29427,
-            "59": 0.29281,
-            "60": 0.29314,
-            "61": 0.29219,
-            "62": 0.29346,
-            "63": 0.29348,
-            "64": 0.30211,
-            "65": 0.29324,
-            "66": 0.29357,
-            "67": 0.29314,
-            "68": 0.29229,
-            "69": 0.30197,
-            "70": 0.29329,
-            "71": 0.30206,
-            "72": 0.29435,
-            "73": 0.29495,
-            "74": 0.2943,
-            "75": 0.29926,
-            "76": 0.29332,
-            "77": 0.29464,
-            "78": 0.29342,
-            "79": 0.29434,
-            "80": 0.29439,
-            "81": 0.29391,
-            "82": 0.29436,
-            "83": 0.29426,
-            "84": 0.29408,
-            "85": 0.29452,
-            "86": 0.29406,
-            "87": 0.29421,
-            "88": 0.29373,
-            "89": 0.29437,
-            "90": 0.29425,
-            "91": 0.29383,
-            "92": 0.2933,
-            "93": 0.29369,
-            "94": 0.2937,
-            "95": 0.29465,
-            "96": 0.29439,
-            "97": 0.29435,
-            "98": 0.2952,
-            "99": 0.29361,
-            "100": 0.2936
+            "1": 3.90326,
+            "2": 0.32521,
+            "3": 0.29877,
+            "4": 0.2879,
+            "5": 0.29191,
+            "6": 0.28844,
+            "7": 0.28727,
+            "8": 0.2851,
+            "9": 0.28617,
+            "10": 0.2869,
+            "11": 0.28532,
+            "12": 0.28535,
+            "13": 0.28382,
+            "14": 0.28373,
+            "15": 0.28543,
+            "16": 0.55478,
+            "17": 0.28409,
+            "18": 0.29766,
+            "19": 0.29807,
+            "20": 0.33631,
+            "21": 0.29858,
+            "22": 0.284,
+            "23": 0.29625,
+            "24": 0.29625,
+            "25": 0.29634,
+            "26": 0.29795,
+            "27": 0.29713,
+            "28": 0.29855,
+            "29": 0.2978,
+            "30": 0.29653,
+            "31": 0.29786,
+            "32": 0.29724,
+            "33": 0.2971,
+            "34": 0.29753,
+            "35": 0.29699,
+            "36": 0.29798,
+            "37": 0.2974,
+            "38": 0.29676,
+            "39": 0.29657,
+            "40": 0.29597,
+            "41": 0.29525,
+            "42": 0.29613,
+            "43": 0.29598,
+            "44": 0.29592,
+            "45": 0.29776,
+            "46": 0.29645,
+            "47": 0.29585,
+            "48": 0.29622,
+            "49": 0.29485,
+            "50": 0.29579,
+            "51": 0.29265,
+            "52": 0.29418,
+            "53": 0.29501,
+            "54": 0.29502,
+            "55": 0.29522,
+            "56": 0.296,
+            "57": 0.29522,
+            "58": 0.2961,
+            "59": 0.29635,
+            "60": 0.29506,
+            "61": 0.29537,
+            "62": 0.29452,
+            "63": 0.29575,
+            "64": 0.29613,
+            "65": 0.2942,
+            "66": 0.29535,
+            "67": 0.6477,
+            "68": 0.29093,
+            "69": 0.29393,
+            "70": 0.29211,
+            "71": 0.29083,
+            "72": 0.29058,
+            "73": 0.29094,
+            "74": 0.29524,
+            "75": 0.29494,
+            "76": 0.29537,
+            "77": 0.29623,
+            "78": 0.29481,
+            "79": 0.29569,
+            "80": 0.29566,
+            "81": 0.29531,
+            "82": 0.29454,
+            "83": 0.29679,
+            "84": 0.2951,
+            "85": 0.29501,
+            "86": 0.29539,
+            "87": 0.29473,
+            "88": 0.2946,
+            "89": 0.29497,
+            "90": 0.29597,
+            "91": 0.2919,
+            "92": 0.29158,
+            "93": 0.29164,
+            "94": 0.29099,
+            "95": 0.29095,
+            "96": 0.32413,
+            "97": 0.29708,
+            "98": 0.29254,
+            "99": 0.29206,
+            "100": 0.29407
         }
     },
     "num-zeros": {
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_lts_dgx_a100_2nd.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_lts_dgx_a100_2nd.json
new file mode 100644
index 00000000000..66288218291
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_lts_dgx_a100_2nd.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 10.21727,
+            "52": 10.1271,
+            "53": 10.36018,
+            "54": 10.25981,
+            "55": 10.20104,
+            "56": 9.98213,
+            "57": 9.84717,
+            "58": 10.12257,
+            "59": 9.90914,
+            "60": 9.83288,
+            "61": 9.9713,
+            "62": 10.22005,
+            "63": 9.67481,
+            "64": 10.01706,
+            "65": 9.27085,
+            "66": 9.93979,
+            "67": 9.62899,
+            "68": 9.98681,
+            "69": 9.9839,
+            "70": 9.92559,
+            "71": 9.81011,
+            "72": 9.79196,
+            "73": 9.68163,
+            "74": 9.17945,
+            "75": 9.61324,
+            "76": 9.28951,
+            "77": 10.19435,
+            "78": 9.8755,
+            "79": 9.5297,
+            "80": 9.56593,
+            "81": 9.63478,
+            "82": 9.82295,
+            "83": 9.47164,
+            "84": 9.54623,
+            "85": 9.74358,
+            "86": 9.20093,
+            "87": 9.70179,
+            "88": 9.86553,
+            "89": 9.73045,
+            "90": 9.92108,
+            "91": 9.48732,
+            "92": 9.47637,
+            "93": 9.21283,
+            "94": 8.94903,
+            "95": 9.6165,
+            "96": 9.63374,
+            "97": 9.41244,
+            "98": 9.7751,
+            "99": 9.00191,
+            "100": 9.50967
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 2195.0,
+            "52": 2330.0,
+            "53": 3549.0,
+            "54": 2650.0,
+            "55": 2247.0,
+            "56": 2422.0,
+            "57": 2195.0,
+            "58": 3241.0,
+            "59": 2626.0,
+            "60": 2775.0,
+            "61": 2747.0,
+            "62": 2926.0,
+            "63": 2898.0,
+            "64": 3090.0,
+            "65": 2245.0,
+            "66": 3827.0,
+            "67": 2655.0,
+            "68": 3117.0,
+            "69": 2656.0,
+            "70": 3659.0,
+            "71": 2819.0,
+            "72": 2710.0,
+            "73": 3355.0,
+            "74": 2210.0,
+            "75": 2927.0,
+            "76": 3577.0,
+            "77": 3727.0,
+            "78": 3855.0,
+            "79": 4237.0,
+            "80": 3462.0,
+            "81": 5157.0,
+            "82": 3426.0,
+            "83": 3234.0,
+            "84": 3878.0,
+            "85": 3734.0,
+            "86": 3184.0,
+            "87": 4090.0,
+            "88": 3594.0,
+            "89": 4234.0,
+            "90": 3744.0,
+            "91": 2967.0,
+            "92": 4509.0,
+            "93": 3649.0,
+            "94": 4486.0,
+            "95": 4215.0,
+            "96": 3851.0,
+            "97": 4098.0,
+            "98": 5029.0,
+            "99": 3975.0,
+            "100": 3445.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 414416384.0,
+            "52": 414416384.0,
+            "53": 414416384.0,
+            "54": 414416384.0,
+            "55": 414416384.0,
+            "56": 414416384.0,
+            "57": 414416384.0,
+            "58": 414416384.0,
+            "59": 414416384.0,
+            "60": 414416384.0,
+            "61": 414416384.0,
+            "62": 414416384.0,
+            "63": 414416384.0,
+            "64": 414416384.0,
+            "65": 414416384.0,
+            "66": 414416384.0,
+            "67": 414416384.0,
+            "68": 414416384.0,
+            "69": 414416384.0,
+            "70": 414416384.0,
+            "71": 414416384.0,
+            "72": 414416384.0,
+            "73": 414416384.0,
+            "74": 414416384.0,
+            "75": 414416384.0,
+            "76": 414416384.0,
+            "77": 414416384.0,
+            "78": 414416384.0,
+            "79": 414416384.0,
+            "80": 414416384.0,
+            "81": 414416384.0,
+            "82": 414416384.0,
+            "83": 414416384.0,
+            "84": 414416384.0,
+            "85": 414416384.0,
+            "86": 414416384.0,
+            "87": 414416384.0,
+            "88": 414416384.0,
+            "89": 414416384.0,
+            "90": 414416384.0,
+            "91": 414416384.0,
+            "92": 414416384.0,
+            "93": 414416384.0,
+            "94": 414416384.0,
+            "95": 414416384.0,
+            "96": 414416384.0,
+            "97": 414416384.0,
+            "98": 414416384.0,
+            "99": 414416384.0,
+            "100": 414416384.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 1595256320.0,
+            "52": 1595257344.0,
+            "53": 1595257344.0,
+            "54": 1595257344.0,
+            "55": 1595257344.0,
+            "56": 1595257344.0,
+            "57": 1595257344.0,
+            "58": 1595257344.0,
+            "59": 1595257344.0,
+            "60": 1595257344.0,
+            "61": 1595257344.0,
+            "62": 1595257344.0,
+            "63": 1595257344.0,
+            "64": 1595257344.0,
+            "65": 1595257344.0,
+            "66": 1595257344.0,
+            "67": 1595257344.0,
+            "68": 1595257344.0,
+            "69": 1595257344.0,
+            "70": 1595257344.0,
+            "71": 1595257344.0,
+            "72": 1595257344.0,
+            "73": 1595257344.0,
+            "74": 1595257344.0,
+            "75": 1595257344.0,
+            "76": 1595257344.0,
+            "77": 1595257344.0,
+            "78": 1595257344.0,
+            "79": 1595257344.0,
+            "80": 1595257344.0,
+            "81": 1595257344.0,
+            "82": 1595257344.0,
+            "83": 1595257344.0,
+            "84": 1595257344.0,
+            "85": 1595257344.0,
+            "86": 1595257344.0,
+            "87": 1595257344.0,
+            "88": 1595257344.0,
+            "89": 1595257344.0,
+            "90": 1595257344.0,
+            "91": 1595257344.0,
+            "92": 1595257344.0,
+            "93": 1595257344.0,
+            "94": 1595257344.0,
+            "95": 1595257344.0,
+            "96": 1595257344.0,
+            "97": 1595257344.0,
+            "98": 1595257344.0,
+            "99": 1595257344.0,
+            "100": 1595257344.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 4.6255,
+            "52": 0.3078,
+            "53": 0.29258,
+            "54": 0.29374,
+            "55": 0.2933,
+            "56": 0.29417,
+            "57": 0.29313,
+            "58": 0.29372,
+            "59": 0.2927,
+            "60": 0.29145,
+            "61": 0.28923,
+            "62": 0.28993,
+            "63": 0.28959,
+            "64": 0.28843,
+            "65": 0.28881,
+            "66": 0.29031,
+            "67": 0.28903,
+            "68": 0.29293,
+            "69": 0.28962,
+            "70": 0.289,
+            "71": 0.29028,
+            "72": 0.29172,
+            "73": 0.29135,
+            "74": 0.2898,
+            "75": 0.28811,
+            "76": 0.28948,
+            "77": 0.29039,
+            "78": 0.29199,
+            "79": 0.29181,
+            "80": 0.29034,
+            "81": 0.29243,
+            "82": 0.29201,
+            "83": 0.28907,
+            "84": 0.28862,
+            "85": 0.2892,
+            "86": 0.28908,
+            "87": 0.28908,
+            "88": 0.28933,
+            "89": 0.29117,
+            "90": 0.2904,
+            "91": 0.2908,
+            "92": 0.28876,
+            "93": 0.2907,
+            "94": 0.29089,
+            "95": 0.2905,
+            "96": 0.29005,
+            "97": 0.28901,
+            "98": 0.2916,
+            "99": 0.29038,
+            "100": 0.29014
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_gb200.json
new file mode 100644
index 00000000000..3def3c8618f
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_gb200.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.84064,
+            "2": 10.85201,
+            "3": 10.84256,
+            "4": 10.84663,
+            "5": 10.85667,
+            "6": 10.8655,
+            "7": 10.85455,
+            "8": 10.84814,
+            "9": 10.85295,
+            "10": 10.82026,
+            "11": 10.86468,
+            "12": 10.85604,
+            "13": 10.87584,
+            "14": 10.86361,
+            "15": 10.86365,
+            "16": 10.86053,
+            "17": 10.84579,
+            "18": 10.8538,
+            "19": 10.85943,
+            "20": 10.84139,
+            "21": 10.86327,
+            "22": 10.83014,
+            "23": 10.85749,
+            "24": 10.83816,
+            "25": 10.82517,
+            "26": 10.8257,
+            "27": 10.83038,
+            "28": 10.82029,
+            "29": 10.81214,
+            "30": 10.74061,
+            "31": 10.68185,
+            "32": 10.76069,
+            "33": 10.7491,
+            "34": 10.67394,
+            "35": 10.65529,
+            "36": 10.63303,
+            "37": 10.66285,
+            "38": 10.60535,
+            "39": 10.6732,
+            "40": 10.50952,
+            "41": 10.53339,
+            "42": 10.54981,
+            "43": 10.35084,
+            "44": 10.3993,
+            "45": 10.31307,
+            "46": 10.27398,
+            "47": 10.45772,
+            "48": 10.27942,
+            "49": 10.05213,
+            "50": 10.28011,
+            "51": 10.23426,
+            "52": 10.13488,
+            "53": 10.35279,
+            "54": 10.26189,
+            "55": 10.20983,
+            "56": 9.99599,
+            "57": 9.87962,
+            "58": 10.13391,
+            "59": 9.92304,
+            "60": 9.85379,
+            "61": 9.97314,
+            "62": 10.211,
+            "63": 9.70514,
+            "64": 10.01457,
+            "65": 9.30759,
+            "66": 9.9366,
+            "67": 9.63221,
+            "68": 9.98219,
+            "69": 9.98048,
+            "70": 9.92986,
+            "71": 9.81575,
+            "72": 9.79602,
+            "73": 9.69104,
+            "74": 9.20049,
+            "75": 9.61228,
+            "76": 9.28906,
+            "77": 10.19068,
+            "78": 9.86601,
+            "79": 9.53855,
+            "80": 9.5578,
+            "81": 9.63332,
+            "82": 9.82853,
+            "83": 9.47188,
+            "84": 9.54101,
+            "85": 9.74266,
+            "86": 9.2142,
+            "87": 9.7016,
+            "88": 9.86604,
+            "89": 9.72339,
+            "90": 9.92767,
+            "91": 9.47045,
+            "92": 9.46809,
+            "93": 9.21217,
+            "94": 8.94887,
+            "95": 9.62787,
+            "96": 9.6406,
+            "97": 9.40839,
+            "98": 9.77147,
+            "99": 9.00853,
+            "100": 9.51225
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 284527616.0,
+            "2": 284527616.0,
+            "3": 284527616.0,
+            "4": 284527616.0,
+            "5": 284527616.0,
+            "6": 284527616.0,
+            "7": 284527616.0,
+            "8": 284527616.0,
+            "9": 284527616.0,
+            "10": 284527616.0,
+            "11": 284527616.0,
+            "12": 284527616.0,
+            "13": 284527616.0,
+            "14": 284527616.0,
+            "15": 284527616.0,
+            "16": 416513536.0,
+            "17": 416513536.0,
+            "18": 416513536.0,
+            "19": 416513536.0,
+            "20": 416513536.0,
+            "21": 416513536.0,
+            "22": 416513536.0,
+            "23": 416513536.0,
+            "24": 416513536.0,
+            "25": 416513536.0,
+            "26": 416513536.0,
+            "27": 416513536.0,
+            "28": 416513536.0,
+            "29": 416513536.0,
+            "30": 416513536.0,
+            "31": 416513536.0,
+            "32": 416513536.0,
+            "33": 416513536.0,
+            "34": 416513536.0,
+            "35": 416513536.0,
+            "36": 416513536.0,
+            "37": 416513536.0,
+            "38": 416513536.0,
+            "39": 416513536.0,
+            "40": 416513536.0,
+            "41": 416513536.0,
+            "42": 416513536.0,
+            "43": 416513536.0,
+            "44": 416513536.0,
+            "45": 416513536.0,
+            "46": 416513536.0,
+            "47": 416513536.0,
+            "48": 416513536.0,
+            "49": 416513536.0,
+            "50": 416513536.0,
+            "51": 416513536.0,
+            "52": 416513536.0,
+            "53": 416513536.0,
+            "54": 416513536.0,
+            "55": 416513536.0,
+            "56": 416513536.0,
+            "57": 416513536.0,
+            "58": 416513536.0,
+            "59": 416513536.0,
+            "60": 416513536.0,
+            "61": 416513536.0,
+            "62": 416513536.0,
+            "63": 416513536.0,
+            "64": 416513536.0,
+            "65": 416513536.0,
+            "66": 416513536.0,
+            "67": 416513536.0,
+            "68": 416513536.0,
+            "69": 416513536.0,
+            "70": 416513536.0,
+            "71": 416513536.0,
+            "72": 416513536.0,
+            "73": 416513536.0,
+            "74": 416513536.0,
+            "75": 416513536.0,
+            "76": 416513536.0,
+            "77": 416513536.0,
+            "78": 416513536.0,
+            "79": 416513536.0,
+            "80": 416513536.0,
+            "81": 416513536.0,
+            "82": 416513536.0,
+            "83": 416513536.0,
+            "84": 416513536.0,
+            "85": 416513536.0,
+            "86": 416513536.0,
+            "87": 416513536.0,
+            "88": 416513536.0,
+            "89": 416513536.0,
+            "90": 416513536.0,
+            "91": 416513536.0,
+            "92": 416513536.0,
+            "93": 416513536.0,
+            "94": 416513536.0,
+            "95": 416513536.0,
+            "96": 416513536.0,
+            "97": 416513536.0,
+            "98": 416513536.0,
+            "99": 416513536.0,
+            "100": 416513536.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 1465368064.0,
+            "2": 1465368576.0,
+            "3": 1465368576.0,
+            "4": 1465368576.0,
+            "5": 1465368576.0,
+            "6": 1465368576.0,
+            "7": 1465368576.0,
+            "8": 1465368576.0,
+            "9": 1465368576.0,
+            "10": 1465368576.0,
+            "11": 1465368576.0,
+            "12": 1465368576.0,
+            "13": 1465368576.0,
+            "14": 1465368576.0,
+            "15": 1465368576.0,
+            "16": 1465368576.0,
+            "17": 1597485568.0,
+            "18": 1597485568.0,
+            "19": 1597485568.0,
+            "20": 1597485568.0,
+            "21": 1597485568.0,
+            "22": 1597485568.0,
+            "23": 1597485568.0,
+            "24": 1597485568.0,
+            "25": 1597485568.0,
+            "26": 1597485568.0,
+            "27": 1597485568.0,
+            "28": 1597485568.0,
+            "29": 1597485568.0,
+            "30": 1597485568.0,
+            "31": 1597485568.0,
+            "32": 1597485568.0,
+            "33": 1597485568.0,
+            "34": 1597485568.0,
+            "35": 1597485568.0,
+            "36": 1597485568.0,
+            "37": 1597485568.0,
+            "38": 1597485568.0,
+            "39": 1597485568.0,
+            "40": 1597485568.0,
+            "41": 1597485568.0,
+            "42": 1597485568.0,
+            "43": 1597485568.0,
+            "44": 1597485568.0,
+            "45": 1597485568.0,
+            "46": 1597485568.0,
+            "47": 1597485568.0,
+            "48": 1597485568.0,
+            "49": 1597485568.0,
+            "50": 1597485568.0,
+            "51": 1597485568.0,
+            "52": 1597485568.0,
+            "53": 1597485568.0,
+            "54": 1597485568.0,
+            "55": 1597485568.0,
+            "56": 1597485568.0,
+            "57": 1597485568.0,
+            "58": 1597485568.0,
+            "59": 1597485568.0,
+            "60": 1597485568.0,
+            "61": 1597485568.0,
+            "62": 1597485568.0,
+            "63": 1597485568.0,
+            "64": 1597485568.0,
+            "65": 1597485568.0,
+            "66": 1597485568.0,
+            "67": 1597485568.0,
+            "68": 1597485568.0,
+            "69": 1597485568.0,
+            "70": 1597485568.0,
+            "71": 1597485568.0,
+            "72": 1597485568.0,
+            "73": 1597485568.0,
+            "74": 1597485568.0,
+            "75": 1597485568.0,
+            "76": 1597485568.0,
+            "77": 1597485568.0,
+            "78": 1597485568.0,
+            "79": 1597485568.0,
+            "80": 1597485568.0,
+            "81": 1597485568.0,
+            "82": 1597485568.0,
+            "83": 1597485568.0,
+            "84": 1597485568.0,
+            "85": 1597485568.0,
+            "86": 1597485568.0,
+            "87": 1597485568.0,
+            "88": 1597485568.0,
+            "89": 1597485568.0,
+            "90": 1597485568.0,
+            "91": 1597485568.0,
+            "92": 1597485568.0,
+            "93": 1597485568.0,
+            "94": 1597485568.0,
+            "95": 1597485568.0,
+            "96": 1597485568.0,
+            "97": 1597485568.0,
+            "98": 1597485568.0,
+            "99": 1597485568.0,
+            "100": 1597485568.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 4.43718,
+            "2": 0.32141,
+            "3": 0.34228,
+            "4": 0.3338,
+            "5": 0.33423,
+            "6": 0.33597,
+            "7": 0.33749,
+            "8": 0.33831,
+            "9": 0.34047,
+            "10": 0.33938,
+            "11": 0.3381,
+            "12": 0.34241,
+            "13": 0.35311,
+            "14": 0.35495,
+            "15": 0.33902,
+            "16": 0.42658,
+            "17": 0.3452,
+            "18": 0.35813,
+            "19": 0.35538,
+            "20": 0.36232,
+            "21": 0.36626,
+            "22": 0.3555,
+            "23": 0.36916,
+            "24": 0.3744,
+            "25": 0.37348,
+            "26": 0.36915,
+            "27": 0.37147,
+            "28": 0.36445,
+            "29": 0.36069,
+            "30": 0.35961,
+            "31": 0.35274,
+            "32": 0.35514,
+            "33": 0.35563,
+            "34": 0.35744,
+            "35": 0.35843,
+            "36": 0.35512,
+            "37": 0.35839,
+            "38": 0.35761,
+            "39": 0.35765,
+            "40": 0.62747,
+            "41": 0.35467,
+            "42": 0.35928,
+            "43": 0.35301,
+            "44": 0.35215,
+            "45": 0.35947,
+            "46": 0.35676,
+            "47": 0.65816,
+            "48": 0.35624,
+            "49": 0.35833,
+            "50": 0.35593,
+            "51": 0.38053,
+            "52": 0.74045,
+            "53": 0.36063,
+            "54": 0.36054,
+            "55": 0.363,
+            "56": 0.36264,
+            "57": 0.36262,
+            "58": 0.36213,
+            "59": 0.36223,
+            "60": 0.35979,
+            "61": 0.36002,
+            "62": 0.36456,
+            "63": 0.36092,
+            "64": 0.36222,
+            "65": 0.36214,
+            "66": 0.36393,
+            "67": 0.36348,
+            "68": 0.36404,
+            "69": 0.36256,
+            "70": 0.36106,
+            "71": 0.36265,
+            "72": 0.36127,
+            "73": 0.37126,
+            "74": 0.3637,
+            "75": 0.36407,
+            "76": 0.36415,
+            "77": 0.36331,
+            "78": 0.3641,
+            "79": 0.36546,
+            "80": 0.36427,
+            "81": 0.35664,
+            "82": 0.36196,
+            "83": 0.36259,
+            "84": 0.36282,
+            "85": 0.36131,
+            "86": 0.35889,
+            "87": 0.36236,
+            "88": 0.35979,
+            "89": 0.36186,
+            "90": 0.36471,
+            "91": 0.36565,
+            "92": 0.36403,
+            "93": 0.365,
+            "94": 0.36272,
+            "95": 0.36119,
+            "96": 0.36129,
+            "97": 0.36262,
+            "98": 0.36263,
+            "99": 0.36514,
+            "100": 0.36392
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": 2392.0,
+            "17": "nan",
+            "18": 2314.0,
+            "19": 2912.0,
+            "20": 1640.0,
+            "21": 2053.0,
+            "22": "nan",
+            "23": 2462.0,
+            "24": 2226.0,
+            "25": 2201.0,
+            "26": 1963.0,
+            "27": 1926.0,
+            "28": 2401.0,
+            "29": 2492.0,
+            "30": 2393.0,
+            "31": 1704.0,
+            "32": 2541.0,
+            "33": 2096.0,
+            "34": 1737.0,
+            "35": 1810.0,
+            "36": 1982.0,
+            "37": 2511.0,
+            "38": 2185.0,
+            "39": 2899.0,
+            "40": 1888.0,
+            "41": 3169.0,
+            "42": 2343.0,
+            "43": 2501.0,
+            "44": 1938.0,
+            "45": 2346.0,
+            "46": 2091.0,
+            "47": 2853.0,
+            "48": 2402.0,
+            "49": 1810.0,
+            "50": 2718.0,
+            "51": 2080.0,
+            "52": 2200.0,
+            "53": 3412.0,
+            "54": 2641.0,
+            "55": 2229.0,
+            "56": 2244.0,
+            "57": 2057.0,
+            "58": 3223.0,
+            "59": 2431.0,
+            "60": 2650.0,
+            "61": 2712.0,
+            "62": 2995.0,
+            "63": 2816.0,
+            "64": 2860.0,
+            "65": 2015.0,
+            "66": 3176.0,
+            "67": 2529.0,
+            "68": 3108.0,
+            "69": 2873.0,
+            "70": 3540.0,
+            "71": 2904.0,
+            "72": 2693.0,
+            "73": 3253.0,
+            "74": 1981.0,
+            "75": 2780.0,
+            "76": 3465.0,
+            "77": 3649.0,
+            "78": 3593.0,
+            "79": 3981.0,
+            "80": 3458.0,
+            "81": 5181.0,
+            "82": 3334.0,
+            "83": 2956.0,
+            "84": 3527.0,
+            "85": 3711.0,
+            "86": 3209.0,
+            "87": 4133.0,
+            "88": 3443.0,
+            "89": 4295.0,
+            "90": 3801.0,
+            "91": 2958.0,
+            "92": 4311.0,
+            "93": 3544.0,
+            "94": 4264.0,
+            "95": 4042.0,
+            "96": 3849.0,
+            "97": 3974.0,
+            "98": 4971.0,
+            "99": 4071.0,
+            "100": 3363.0
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_h100.json
index cb0ad3fdb4b..6a29bef3baa 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_h100.json
@@ -78,22 +78,22 @@
             "72": 9.798,
             "73": 9.68454,
             "74": 9.19951,
-            "75": 9.60518,
-            "76": 9.27791,
-            "77": 10.19437,
+            "75": 9.60519,
+            "76": 9.2779,
+            "77": 10.19436,
             "78": 9.8671,
             "79": 9.53341,
             "80": 9.56341,
             "81": 9.63047,
             "82": 9.82819,
             "83": 9.46388,
-            "84": 9.53736,
-            "85": 9.74561,
+            "84": 9.53735,
+            "85": 9.74562,
             "86": 9.21332,
-            "87": 9.7014,
+            "87": 9.70141,
             "88": 9.86621,
             "89": 9.72242,
-            "90": 9.92089,
+            "90": 9.9209,
             "91": 9.47178,
             "92": 9.46996,
             "93": 9.20589,
@@ -234,90 +234,90 @@
             "14": 1465368576.0,
             "15": 1465368576.0,
             "16": 1465368576.0,
-            "17": 1597092352.0,
-            "18": 1597092352.0,
-            "19": 1597092352.0,
-            "20": 1597092352.0,
-            "21": 1597092352.0,
-            "22": 1597092352.0,
-            "23": 1597092352.0,
-            "24": 1597092352.0,
-            "25": 1597092352.0,
-            "26": 1597092352.0,
-            "27": 1597092352.0,
-            "28": 1597092352.0,
-            "29": 1597092352.0,
-            "30": 1597092352.0,
-            "31": 1597092352.0,
-            "32": 1597092352.0,
-            "33": 1597092352.0,
-            "34": 1597092352.0,
-            "35": 1597092352.0,
-            "36": 1597092352.0,
-            "37": 1597092352.0,
-            "38": 1597092352.0,
-            "39": 1597092352.0,
-            "40": 1597092352.0,
-            "41": 1597092352.0,
-            "42": 1597092352.0,
-            "43": 1597092352.0,
-            "44": 1597092352.0,
-            "45": 1597092352.0,
-            "46": 1597092352.0,
-            "47": 1597092352.0,
-            "48": 1597092352.0,
-            "49": 1597092352.0,
-            "50": 1597092352.0,
-            "51": 1597092352.0,
-            "52": 1597092352.0,
-            "53": 1597092352.0,
-            "54": 1597092352.0,
-            "55": 1597092352.0,
-            "56": 1597092352.0,
-            "57": 1597092352.0,
-            "58": 1597092352.0,
-            "59": 1597092352.0,
-            "60": 1597092352.0,
-            "61": 1597092352.0,
-            "62": 1597092352.0,
-            "63": 1597092352.0,
-            "64": 1597092352.0,
-            "65": 1597092352.0,
-            "66": 1597092352.0,
-            "67": 1597092352.0,
-            "68": 1597092352.0,
-            "69": 1597092352.0,
-            "70": 1597092352.0,
-            "71": 1597092352.0,
-            "72": 1597092352.0,
-            "73": 1597092352.0,
-            "74": 1597092352.0,
-            "75": 1597092352.0,
-            "76": 1597092352.0,
-            "77": 1597092352.0,
-            "78": 1597092352.0,
-            "79": 1597092352.0,
-            "80": 1597092352.0,
-            "81": 1597092352.0,
-            "82": 1597092352.0,
-            "83": 1597092352.0,
-            "84": 1597092352.0,
-            "85": 1597092352.0,
-            "86": 1597092352.0,
-            "87": 1597092352.0,
-            "88": 1597092352.0,
-            "89": 1597092352.0,
-            "90": 1597092352.0,
-            "91": 1597092352.0,
-            "92": 1597092352.0,
-            "93": 1597092352.0,
-            "94": 1597092352.0,
-            "95": 1597092352.0,
-            "96": 1597092352.0,
-            "97": 1597092352.0,
-            "98": 1597092352.0,
-            "99": 1597092352.0,
-            "100": 1597092352.0
+            "17": 1597485568.0,
+            "18": 1597485568.0,
+            "19": 1597485568.0,
+            "20": 1597485568.0,
+            "21": 1597485568.0,
+            "22": 1597485568.0,
+            "23": 1597485568.0,
+            "24": 1597485568.0,
+            "25": 1597485568.0,
+            "26": 1597485568.0,
+            "27": 1597485568.0,
+            "28": 1597485568.0,
+            "29": 1597485568.0,
+            "30": 1597485568.0,
+            "31": 1597485568.0,
+            "32": 1597485568.0,
+            "33": 1597485568.0,
+            "34": 1597485568.0,
+            "35": 1597485568.0,
+            "36": 1597485568.0,
+            "37": 1597485568.0,
+            "38": 1597485568.0,
+            "39": 1597485568.0,
+            "40": 1597485568.0,
+            "41": 1597485568.0,
+            "42": 1597485568.0,
+            "43": 1597485568.0,
+            "44": 1597485568.0,
+            "45": 1597485568.0,
+            "46": 1597485568.0,
+            "47": 1597485568.0,
+            "48": 1597485568.0,
+            "49": 1597485568.0,
+            "50": 1597485568.0,
+            "51": 1597485568.0,
+            "52": 1597485568.0,
+            "53": 1597485568.0,
+            "54": 1597485568.0,
+            "55": 1597485568.0,
+            "56": 1597485568.0,
+            "57": 1597485568.0,
+            "58": 1597485568.0,
+            "59": 1597485568.0,
+            "60": 1597485568.0,
+            "61": 1597485568.0,
+            "62": 1597485568.0,
+            "63": 1597485568.0,
+            "64": 1597485568.0,
+            "65": 1597485568.0,
+            "66": 1597485568.0,
+            "67": 1597485568.0,
+            "68": 1597485568.0,
+            "69": 1597485568.0,
+            "70": 1597485568.0,
+            "71": 1597485568.0,
+            "72": 1597485568.0,
+            "73": 1597485568.0,
+            "74": 1597485568.0,
+            "75": 1597485568.0,
+            "76": 1597485568.0,
+            "77": 1597485568.0,
+            "78": 1597485568.0,
+            "79": 1597485568.0,
+            "80": 1597485568.0,
+            "81": 1597485568.0,
+            "82": 1597485568.0,
+            "83": 1597485568.0,
+            "84": 1597485568.0,
+            "85": 1597485568.0,
+            "86": 1597485568.0,
+            "87": 1597485568.0,
+            "88": 1597485568.0,
+            "89": 1597485568.0,
+            "90": 1597485568.0,
+            "91": 1597485568.0,
+            "92": 1597485568.0,
+            "93": 1597485568.0,
+            "94": 1597485568.0,
+            "95": 1597485568.0,
+            "96": 1597485568.0,
+            "97": 1597485568.0,
+            "98": 1597485568.0,
+            "99": 1597485568.0,
+            "100": 1597485568.0
         }
     },
     "iteration-time": {
@@ -325,106 +325,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 6.78805,
-            "2": 0.23224,
-            "3": 0.20783,
-            "4": 0.21971,
-            "5": 0.22246,
-            "6": 0.23346,
-            "7": 0.21626,
-            "8": 0.20597,
-            "9": 0.2043,
-            "10": 0.20681,
-            "11": 0.20511,
-            "12": 0.20484,
-            "13": 0.21351,
-            "14": 0.20446,
-            "15": 0.21063,
-            "16": 0.28338,
-            "17": 0.21017,
-            "18": 0.21577,
-            "19": 0.21852,
-            "20": 0.23072,
-            "21": 0.25974,
-            "22": 0.21717,
-            "23": 0.22548,
-            "24": 0.21878,
-            "25": 0.21448,
-            "26": 0.21416,
-            "27": 0.22357,
-            "28": 0.21645,
-            "29": 0.21325,
-            "30": 0.21465,
-            "31": 0.21452,
-            "32": 0.21608,
-            "33": 0.23531,
-            "34": 0.227,
-            "35": 0.2188,
-            "36": 0.21248,
-            "37": 0.21694,
-            "38": 0.21269,
-            "39": 0.22285,
-            "40": 0.21458,
-            "41": 0.2134,
-            "42": 0.21991,
-            "43": 0.21621,
-            "44": 0.21422,
-            "45": 0.21339,
-            "46": 0.21332,
-            "47": 0.21892,
-            "48": 0.21384,
-            "49": 0.21668,
-            "50": 0.21806,
-            "51": 0.21958,
-            "52": 0.2173,
-            "53": 0.21642,
-            "54": 0.22157,
-            "55": 0.21549,
-            "56": 0.21528,
-            "57": 0.21789,
-            "58": 0.21634,
-            "59": 0.21649,
-            "60": 0.2141,
-            "61": 0.21447,
-            "62": 0.21596,
-            "63": 0.21545,
-            "64": 0.22145,
-            "65": 0.21603,
-            "66": 0.21504,
-            "67": 0.21551,
-            "68": 0.21918,
-            "69": 0.21831,
-            "70": 0.21943,
-            "71": 0.21537,
-            "72": 0.21937,
-            "73": 0.21783,
-            "74": 0.2246,
-            "75": 0.22031,
-            "76": 0.23249,
-            "77": 0.21862,
-            "78": 0.21663,
-            "79": 0.21806,
-            "80": 0.21694,
-            "81": 0.21684,
-            "82": 0.21559,
-            "83": 0.21877,
-            "84": 0.2151,
-            "85": 0.21819,
-            "86": 0.2167,
-            "87": 0.21768,
-            "88": 0.21415,
-            "89": 0.21694,
-            "90": 0.21444,
-            "91": 0.21616,
-            "92": 0.21967,
-            "93": 0.21672,
-            "94": 0.21699,
-            "95": 0.21892,
-            "96": 0.21871,
-            "97": 0.21805,
-            "98": 0.21674,
-            "99": 0.21639,
-            "100": 0.21581
+            "1": 7.85348,
+            "2": 0.23423,
+            "3": 0.2045,
+            "4": 0.18465,
+            "5": 0.18457,
+            "6": 0.18573,
+            "7": 0.18584,
+            "8": 0.19132,
+            "9": 0.18718,
+            "10": 0.18632,
+            "11": 0.18549,
+            "12": 0.18453,
+            "13": 0.18301,
+            "14": 0.18637,
+            "15": 0.18341,
+            "16": 0.27303,
+            "17": 0.1875,
+            "18": 0.19094,
+            "19": 0.19099,
+            "20": 0.19512,
+            "21": 0.19472,
+            "22": 0.18932,
+            "23": 0.19109,
+            "24": 0.19032,
+            "25": 0.19034,
+            "26": 0.19014,
+            "27": 0.19037,
+            "28": 0.19342,
+            "29": 0.19102,
+            "30": 0.19217,
+            "31": 0.1905,
+            "32": 0.18989,
+            "33": 0.19339,
+            "34": 0.19354,
+            "35": 0.19435,
+            "36": 0.19151,
+            "37": 0.1914,
+            "38": 0.19302,
+            "39": 0.1935,
+            "40": 0.18995,
+            "41": 0.19387,
+            "42": 0.19161,
+            "43": 0.19131,
+            "44": 0.19213,
+            "45": 0.1914,
+            "46": 0.1912,
+            "47": 0.19009,
+            "48": 0.1917,
+            "49": 0.19013,
+            "50": 0.19041,
+            "51": 0.19678,
+            "52": 0.18974,
+            "53": 0.19754,
+            "54": 0.19109,
+            "55": 0.19038,
+            "56": 0.19071,
+            "57": 0.19479,
+            "58": 0.1896,
+            "59": 0.18945,
+            "60": 0.19321,
+            "61": 0.19042,
+            "62": 0.19018,
+            "63": 0.19145,
+            "64": 0.19092,
+            "65": 0.1911,
+            "66": 0.1905,
+            "67": 0.19866,
+            "68": 0.20109,
+            "69": 0.19967,
+            "70": 0.20138,
+            "71": 0.19744,
+            "72": 0.1992,
+            "73": 0.1983,
+            "74": 0.19896,
+            "75": 0.19812,
+            "76": 0.2002,
+            "77": 0.20008,
+            "78": 0.1993,
+            "79": 0.1982,
+            "80": 0.19675,
+            "81": 0.19588,
+            "82": 0.18814,
+            "83": 0.18859,
+            "84": 0.19035,
+            "85": 0.20544,
+            "86": 0.1936,
+            "87": 0.19585,
+            "88": 0.18962,
+            "89": 0.18921,
+            "90": 0.1877,
+            "91": 0.18708,
+            "92": 0.18744,
+            "93": 0.18758,
+            "94": 0.18685,
+            "95": 0.18938,
+            "96": 0.18819,
+            "97": 0.18788,
+            "98": 0.18915,
+            "99": 0.18809,
+            "100": 0.18729
         }
     },
     "num-zeros": {
@@ -506,32 +506,32 @@
             "72": 2640.0,
             "73": 3199.0,
             "74": 2084.0,
-            "75": 2809.0,
-            "76": 3599.0,
-            "77": 3667.0,
-            "78": 3680.0,
-            "79": 3972.0,
-            "80": 3365.0,
-            "81": 5042.0,
-            "82": 3291.0,
-            "83": 3016.0,
-            "84": 3592.0,
-            "85": 3792.0,
-            "86": 3192.0,
-            "87": 4219.0,
-            "88": 3376.0,
-            "89": 4110.0,
-            "90": 3939.0,
-            "91": 2912.0,
-            "92": 4114.0,
-            "93": 3499.0,
-            "94": 4339.0,
-            "95": 3829.0,
-            "96": 3875.0,
-            "97": 4100.0,
-            "98": 4889.0,
-            "99": 3771.0,
-            "100": 3390.0
+            "75": 2823.0,
+            "76": 3490.0,
+            "77": 3710.0,
+            "78": 3619.0,
+            "79": 3911.0,
+            "80": 3431.0,
+            "81": 4963.0,
+            "82": 3460.0,
+            "83": 3062.0,
+            "84": 3593.0,
+            "85": 3752.0,
+            "86": 3255.0,
+            "87": 4096.0,
+            "88": 3272.0,
+            "89": 4074.0,
+            "90": 3810.0,
+            "91": 2877.0,
+            "92": 4080.0,
+            "93": 3469.0,
+            "94": 4428.0,
+            "95": 3850.0,
+            "96": 3832.0,
+            "97": 4102.0,
+            "98": 4833.0,
+            "99": 3795.0,
+            "100": 3405.0
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_h100_2nd.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_h100_2nd.json
new file mode 100644
index 00000000000..3636eb8af32
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_h100_2nd.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 10.23471,
+            "52": 10.13764,
+            "53": 10.34797,
+            "54": 10.26738,
+            "55": 10.20734,
+            "56": 9.99527,
+            "57": 9.89333,
+            "58": 10.13452,
+            "59": 9.92856,
+            "60": 9.8551,
+            "61": 9.98264,
+            "62": 10.20686,
+            "63": 9.70842,
+            "64": 10.01687,
+            "65": 9.30409,
+            "66": 9.93326,
+            "67": 9.62677,
+            "68": 9.98429,
+            "69": 9.9755,
+            "70": 9.93956,
+            "71": 9.81005,
+            "72": 9.798,
+            "73": 9.68454,
+            "74": 9.19951,
+            "75": 9.60519,
+            "76": 9.2779,
+            "77": 10.19436,
+            "78": 9.8671,
+            "79": 9.53341,
+            "80": 9.56341,
+            "81": 9.63047,
+            "82": 9.82819,
+            "83": 9.46388,
+            "84": 9.53735,
+            "85": 9.74562,
+            "86": 9.21332,
+            "87": 9.70141,
+            "88": 9.86621,
+            "89": 9.72242,
+            "90": 9.9209,
+            "91": 9.47178,
+            "92": 9.46996,
+            "93": 9.20589,
+            "94": 8.94772,
+            "95": 9.60815,
+            "96": 9.63635,
+            "97": 9.4138,
+            "98": 9.77274,
+            "99": 8.9958,
+            "100": 9.50415
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 2041.0,
+            "52": 2226.0,
+            "53": 3222.0,
+            "54": 2784.0,
+            "55": 2290.0,
+            "56": 2428.0,
+            "57": 2146.0,
+            "58": 3048.0,
+            "59": 2504.0,
+            "60": 2612.0,
+            "61": 2623.0,
+            "62": 3003.0,
+            "63": 2762.0,
+            "64": 2917.0,
+            "65": 2104.0,
+            "66": 3550.0,
+            "67": 2433.0,
+            "68": 3146.0,
+            "69": 2877.0,
+            "70": 3528.0,
+            "71": 2983.0,
+            "72": 2640.0,
+            "73": 3199.0,
+            "74": 2084.0,
+            "75": 2823.0,
+            "76": 3490.0,
+            "77": 3710.0,
+            "78": 3619.0,
+            "79": 3911.0,
+            "80": 3431.0,
+            "81": 4963.0,
+            "82": 3460.0,
+            "83": 3062.0,
+            "84": 3593.0,
+            "85": 3752.0,
+            "86": 3255.0,
+            "87": 4096.0,
+            "88": 3272.0,
+            "89": 4074.0,
+            "90": 3810.0,
+            "91": 2877.0,
+            "92": 4080.0,
+            "93": 3469.0,
+            "94": 4428.0,
+            "95": 3850.0,
+            "96": 3832.0,
+            "97": 4102.0,
+            "98": 4833.0,
+            "99": 3795.0,
+            "100": 3405.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 415464960.0,
+            "52": 415464960.0,
+            "53": 415464960.0,
+            "54": 415464960.0,
+            "55": 415464960.0,
+            "56": 415464960.0,
+            "57": 415464960.0,
+            "58": 415464960.0,
+            "59": 415464960.0,
+            "60": 415464960.0,
+            "61": 415464960.0,
+            "62": 415464960.0,
+            "63": 415464960.0,
+            "64": 415464960.0,
+            "65": 415464960.0,
+            "66": 415464960.0,
+            "67": 415464960.0,
+            "68": 415464960.0,
+            "69": 415464960.0,
+            "70": 415464960.0,
+            "71": 415464960.0,
+            "72": 415464960.0,
+            "73": 415464960.0,
+            "74": 415464960.0,
+            "75": 415464960.0,
+            "76": 415464960.0,
+            "77": 415464960.0,
+            "78": 415464960.0,
+            "79": 415464960.0,
+            "80": 415464960.0,
+            "81": 415464960.0,
+            "82": 415464960.0,
+            "83": 415464960.0,
+            "84": 415464960.0,
+            "85": 415464960.0,
+            "86": 415464960.0,
+            "87": 415464960.0,
+            "88": 415464960.0,
+            "89": 415464960.0,
+            "90": 415464960.0,
+            "91": 415464960.0,
+            "92": 415464960.0,
+            "93": 415464960.0,
+            "94": 415464960.0,
+            "95": 415464960.0,
+            "96": 415464960.0,
+            "97": 415464960.0,
+            "98": 415464960.0,
+            "99": 415464960.0,
+            "100": 415464960.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 1596304896.0,
+            "52": 1596305920.0,
+            "53": 1596305920.0,
+            "54": 1596305920.0,
+            "55": 1596305920.0,
+            "56": 1596305920.0,
+            "57": 1596305920.0,
+            "58": 1596305920.0,
+            "59": 1596305920.0,
+            "60": 1596305920.0,
+            "61": 1596305920.0,
+            "62": 1596305920.0,
+            "63": 1596305920.0,
+            "64": 1596305920.0,
+            "65": 1596305920.0,
+            "66": 1596305920.0,
+            "67": 1596305920.0,
+            "68": 1596305920.0,
+            "69": 1596305920.0,
+            "70": 1596305920.0,
+            "71": 1596305920.0,
+            "72": 1596305920.0,
+            "73": 1596305920.0,
+            "74": 1596305920.0,
+            "75": 1596305920.0,
+            "76": 1596305920.0,
+            "77": 1596305920.0,
+            "78": 1596305920.0,
+            "79": 1596305920.0,
+            "80": 1596305920.0,
+            "81": 1596305920.0,
+            "82": 1596305920.0,
+            "83": 1596305920.0,
+            "84": 1596305920.0,
+            "85": 1596305920.0,
+            "86": 1596305920.0,
+            "87": 1596305920.0,
+            "88": 1596305920.0,
+            "89": 1596305920.0,
+            "90": 1596305920.0,
+            "91": 1596305920.0,
+            "92": 1596305920.0,
+            "93": 1596305920.0,
+            "94": 1596305920.0,
+            "95": 1596305920.0,
+            "96": 1596305920.0,
+            "97": 1596305920.0,
+            "98": 1596305920.0,
+            "99": 1596305920.0,
+            "100": 1596305920.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 5.92942,
+            "52": 0.22042,
+            "53": 0.20141,
+            "54": 0.20104,
+            "55": 0.20086,
+            "56": 0.20205,
+            "57": 0.20313,
+            "58": 0.20575,
+            "59": 0.2059,
+            "60": 0.20487,
+            "61": 0.20376,
+            "62": 0.20344,
+            "63": 0.20602,
+            "64": 0.20171,
+            "65": 0.20118,
+            "66": 0.20255,
+            "67": 0.20176,
+            "68": 0.20547,
+            "69": 0.20291,
+            "70": 0.20293,
+            "71": 0.20018,
+            "72": 0.20194,
+            "73": 0.20093,
+            "74": 0.20334,
+            "75": 0.20211,
+            "76": 0.20117,
+            "77": 0.20772,
+            "78": 0.20129,
+            "79": 0.20479,
+            "80": 0.20282,
+            "81": 0.20264,
+            "82": 0.20056,
+            "83": 0.20106,
+            "84": 0.20106,
+            "85": 0.20234,
+            "86": 0.20068,
+            "87": 0.20279,
+            "88": 0.20195,
+            "89": 0.20174,
+            "90": 0.20096,
+            "91": 0.20103,
+            "92": 0.20077,
+            "93": 0.20116,
+            "94": 0.2013,
+            "95": 0.20159,
+            "96": 0.20087,
+            "97": 0.20359,
+            "98": 0.20084,
+            "99": 0.20147,
+            "100": 0.20053
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_lts_dgx_a100.json
index 0eef09cf2c1..45a51405f72 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_lts_dgx_a100.json
@@ -218,9 +218,9 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 1465367040.0,
-            "2": 1465367040.0,
-            "3": 1465368064.0,
+            "1": 1465368064.0,
+            "2": 1465368576.0,
+            "3": 1465368576.0,
             "4": 1465368576.0,
             "5": 1465368576.0,
             "6": 1465368576.0,
@@ -325,106 +325,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 8.02782,
-            "2": 0.31435,
-            "3": 0.27957,
-            "4": 0.27933,
-            "5": 0.27866,
-            "6": 0.27855,
-            "7": 0.2779,
-            "8": 0.27621,
-            "9": 0.27704,
-            "10": 0.27611,
-            "11": 0.27501,
-            "12": 0.27489,
-            "13": 0.27468,
-            "14": 0.27386,
-            "15": 0.27315,
-            "16": 0.41595,
-            "17": 0.27523,
-            "18": 0.28979,
-            "19": 0.28871,
-            "20": 0.2888,
-            "21": 0.28867,
-            "22": 0.27653,
-            "23": 0.29205,
-            "24": 0.29078,
-            "25": 0.29104,
-            "26": 0.29087,
-            "27": 0.28794,
-            "28": 0.28784,
-            "29": 0.28659,
-            "30": 0.28669,
-            "31": 0.28638,
-            "32": 0.2878,
-            "33": 0.28717,
-            "34": 0.28616,
-            "35": 0.28626,
-            "36": 0.28648,
-            "37": 0.28977,
-            "38": 0.28615,
-            "39": 0.2864,
-            "40": 0.28588,
-            "41": 0.28749,
-            "42": 0.28735,
-            "43": 0.28605,
-            "44": 0.28798,
-            "45": 0.2882,
-            "46": 0.28727,
-            "47": 0.28616,
-            "48": 0.28603,
-            "49": 0.2876,
-            "50": 0.29155,
-            "51": 0.30309,
-            "52": 0.29889,
-            "53": 0.29736,
-            "54": 0.29772,
-            "55": 0.29611,
-            "56": 0.29565,
-            "57": 0.29413,
-            "58": 0.29391,
-            "59": 0.29344,
-            "60": 0.29428,
-            "61": 0.29695,
-            "62": 0.29282,
-            "63": 0.29418,
-            "64": 0.29352,
-            "65": 0.29274,
-            "66": 0.29449,
-            "67": 0.29627,
-            "68": 0.29636,
-            "69": 0.29393,
-            "70": 0.28967,
-            "71": 0.28925,
-            "72": 0.28962,
-            "73": 0.28944,
-            "74": 0.28948,
-            "75": 0.28996,
-            "76": 0.28938,
-            "77": 0.28855,
-            "78": 0.28891,
-            "79": 0.28905,
-            "80": 0.28968,
-            "81": 0.28873,
-            "82": 0.28966,
-            "83": 0.2884,
-            "84": 0.28842,
-            "85": 0.29077,
-            "86": 0.28927,
-            "87": 0.28888,
-            "88": 0.28909,
-            "89": 0.28807,
-            "90": 0.28887,
-            "91": 0.28894,
-            "92": 0.28908,
-            "93": 0.28985,
-            "94": 0.289,
-            "95": 0.28861,
-            "96": 0.28831,
-            "97": 0.2877,
-            "98": 0.29019,
-            "99": 0.28839,
-            "100": 0.2881
+            "1": 2.87517,
+            "2": 0.32741,
+            "3": 0.30727,
+            "4": 0.29165,
+            "5": 0.29258,
+            "6": 0.28618,
+            "7": 0.28628,
+            "8": 0.28498,
+            "9": 0.28839,
+            "10": 0.29027,
+            "11": 0.28697,
+            "12": 0.28511,
+            "13": 0.29151,
+            "14": 0.28721,
+            "15": 0.2851,
+            "16": 0.40392,
+            "17": 0.28544,
+            "18": 0.2995,
+            "19": 0.30593,
+            "20": 0.29922,
+            "21": 0.3,
+            "22": 0.2873,
+            "23": 0.29862,
+            "24": 0.3016,
+            "25": 0.3043,
+            "26": 0.30026,
+            "27": 0.30577,
+            "28": 0.29895,
+            "29": 0.30118,
+            "30": 0.30038,
+            "31": 0.29973,
+            "32": 0.30495,
+            "33": 0.29971,
+            "34": 0.3058,
+            "35": 0.30206,
+            "36": 0.29968,
+            "37": 0.30462,
+            "38": 0.29914,
+            "39": 0.30006,
+            "40": 0.30275,
+            "41": 0.29843,
+            "42": 0.30385,
+            "43": 0.30136,
+            "44": 0.30005,
+            "45": 0.30598,
+            "46": 0.30646,
+            "47": 0.30678,
+            "48": 0.30524,
+            "49": 0.30042,
+            "50": 0.30333,
+            "51": 0.3058,
+            "52": 0.2979,
+            "53": 0.29694,
+            "54": 0.29792,
+            "55": 0.29906,
+            "56": 0.2986,
+            "57": 0.299,
+            "58": 0.29801,
+            "59": 0.29877,
+            "60": 0.29785,
+            "61": 0.2976,
+            "62": 0.29759,
+            "63": 0.75788,
+            "64": 0.30011,
+            "65": 0.29654,
+            "66": 0.29892,
+            "67": 0.29761,
+            "68": 0.29802,
+            "69": 0.3014,
+            "70": 0.30046,
+            "71": 0.29911,
+            "72": 0.29858,
+            "73": 0.29679,
+            "74": 0.2965,
+            "75": 0.29902,
+            "76": 0.29862,
+            "77": 0.29715,
+            "78": 0.2986,
+            "79": 0.30843,
+            "80": 0.29932,
+            "81": 0.29873,
+            "82": 0.29681,
+            "83": 0.29885,
+            "84": 0.29829,
+            "85": 0.29898,
+            "86": 0.29994,
+            "87": 0.29961,
+            "88": 0.3003,
+            "89": 0.29957,
+            "90": 0.29999,
+            "91": 0.29959,
+            "92": 0.30006,
+            "93": 0.30057,
+            "94": 0.29999,
+            "95": 0.30006,
+            "96": 0.29915,
+            "97": 0.30017,
+            "98": 0.29952,
+            "99": 0.30127,
+            "100": 0.30043
         }
     },
     "num-zeros": {
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_lts_dgx_a100_2nd.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_lts_dgx_a100_2nd.json
new file mode 100644
index 00000000000..89836562450
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_lts_dgx_a100_2nd.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 10.21727,
+            "52": 10.1271,
+            "53": 10.36018,
+            "54": 10.25981,
+            "55": 10.20104,
+            "56": 9.98213,
+            "57": 9.84717,
+            "58": 10.12257,
+            "59": 9.90914,
+            "60": 9.83288,
+            "61": 9.9713,
+            "62": 10.22005,
+            "63": 9.67481,
+            "64": 10.01706,
+            "65": 9.27085,
+            "66": 9.93979,
+            "67": 9.62899,
+            "68": 9.98681,
+            "69": 9.9839,
+            "70": 9.92559,
+            "71": 9.81011,
+            "72": 9.79196,
+            "73": 9.68163,
+            "74": 9.17945,
+            "75": 9.61324,
+            "76": 9.28951,
+            "77": 10.19435,
+            "78": 9.8755,
+            "79": 9.5297,
+            "80": 9.56593,
+            "81": 9.63478,
+            "82": 9.82295,
+            "83": 9.47164,
+            "84": 9.54623,
+            "85": 9.74358,
+            "86": 9.20093,
+            "87": 9.70179,
+            "88": 9.86553,
+            "89": 9.73045,
+            "90": 9.92108,
+            "91": 9.48732,
+            "92": 9.47637,
+            "93": 9.21283,
+            "94": 8.94903,
+            "95": 9.6165,
+            "96": 9.63374,
+            "97": 9.41244,
+            "98": 9.7751,
+            "99": 9.00191,
+            "100": 9.50967
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 2195.0,
+            "52": 2330.0,
+            "53": 3549.0,
+            "54": 2650.0,
+            "55": 2247.0,
+            "56": 2422.0,
+            "57": 2195.0,
+            "58": 3241.0,
+            "59": 2626.0,
+            "60": 2775.0,
+            "61": 2747.0,
+            "62": 2926.0,
+            "63": 2898.0,
+            "64": 3090.0,
+            "65": 2245.0,
+            "66": 3827.0,
+            "67": 2655.0,
+            "68": 3117.0,
+            "69": 2656.0,
+            "70": 3659.0,
+            "71": 2819.0,
+            "72": 2710.0,
+            "73": 3355.0,
+            "74": 2210.0,
+            "75": 2927.0,
+            "76": 3577.0,
+            "77": 3727.0,
+            "78": 3855.0,
+            "79": 4237.0,
+            "80": 3462.0,
+            "81": 5157.0,
+            "82": 3426.0,
+            "83": 3234.0,
+            "84": 3878.0,
+            "85": 3734.0,
+            "86": 3184.0,
+            "87": 4090.0,
+            "88": 3594.0,
+            "89": 4234.0,
+            "90": 3744.0,
+            "91": 2967.0,
+            "92": 4509.0,
+            "93": 3649.0,
+            "94": 4486.0,
+            "95": 4215.0,
+            "96": 3851.0,
+            "97": 4098.0,
+            "98": 5029.0,
+            "99": 3975.0,
+            "100": 3445.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 415464960.0,
+            "52": 415464960.0,
+            "53": 415464960.0,
+            "54": 415464960.0,
+            "55": 415464960.0,
+            "56": 415464960.0,
+            "57": 415464960.0,
+            "58": 415464960.0,
+            "59": 415464960.0,
+            "60": 415464960.0,
+            "61": 415464960.0,
+            "62": 415464960.0,
+            "63": 415464960.0,
+            "64": 415464960.0,
+            "65": 415464960.0,
+            "66": 415464960.0,
+            "67": 415464960.0,
+            "68": 415464960.0,
+            "69": 415464960.0,
+            "70": 415464960.0,
+            "71": 415464960.0,
+            "72": 415464960.0,
+            "73": 415464960.0,
+            "74": 415464960.0,
+            "75": 415464960.0,
+            "76": 415464960.0,
+            "77": 415464960.0,
+            "78": 415464960.0,
+            "79": 415464960.0,
+            "80": 415464960.0,
+            "81": 415464960.0,
+            "82": 415464960.0,
+            "83": 415464960.0,
+            "84": 415464960.0,
+            "85": 415464960.0,
+            "86": 415464960.0,
+            "87": 415464960.0,
+            "88": 415464960.0,
+            "89": 415464960.0,
+            "90": 415464960.0,
+            "91": 415464960.0,
+            "92": 415464960.0,
+            "93": 415464960.0,
+            "94": 415464960.0,
+            "95": 415464960.0,
+            "96": 415464960.0,
+            "97": 415464960.0,
+            "98": 415464960.0,
+            "99": 415464960.0,
+            "100": 415464960.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 1596304896.0,
+            "52": 1596305920.0,
+            "53": 1596305920.0,
+            "54": 1596305920.0,
+            "55": 1596305920.0,
+            "56": 1596305920.0,
+            "57": 1596305920.0,
+            "58": 1596305920.0,
+            "59": 1596305920.0,
+            "60": 1596305920.0,
+            "61": 1596305920.0,
+            "62": 1596305920.0,
+            "63": 1596305920.0,
+            "64": 1596305920.0,
+            "65": 1596305920.0,
+            "66": 1596305920.0,
+            "67": 1596305920.0,
+            "68": 1596305920.0,
+            "69": 1596305920.0,
+            "70": 1596305920.0,
+            "71": 1596305920.0,
+            "72": 1596305920.0,
+            "73": 1596305920.0,
+            "74": 1596305920.0,
+            "75": 1596305920.0,
+            "76": 1596305920.0,
+            "77": 1596305920.0,
+            "78": 1596305920.0,
+            "79": 1596305920.0,
+            "80": 1596305920.0,
+            "81": 1596305920.0,
+            "82": 1596305920.0,
+            "83": 1596305920.0,
+            "84": 1596305920.0,
+            "85": 1596305920.0,
+            "86": 1596305920.0,
+            "87": 1596305920.0,
+            "88": 1596305920.0,
+            "89": 1596305920.0,
+            "90": 1596305920.0,
+            "91": 1596305920.0,
+            "92": 1596305920.0,
+            "93": 1596305920.0,
+            "94": 1596305920.0,
+            "95": 1596305920.0,
+            "96": 1596305920.0,
+            "97": 1596305920.0,
+            "98": 1596305920.0,
+            "99": 1596305920.0,
+            "100": 1596305920.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 3.74437,
+            "52": 0.32779,
+            "53": 0.3059,
+            "54": 0.30649,
+            "55": 0.30382,
+            "56": 0.30295,
+            "57": 0.30294,
+            "58": 0.30245,
+            "59": 0.30304,
+            "60": 0.30304,
+            "61": 0.30367,
+            "62": 0.30374,
+            "63": 0.30252,
+            "64": 0.304,
+            "65": 0.30269,
+            "66": 0.30287,
+            "67": 0.30327,
+            "68": 0.30407,
+            "69": 0.30396,
+            "70": 0.30328,
+            "71": 0.30476,
+            "72": 0.3053,
+            "73": 0.30394,
+            "74": 0.3027,
+            "75": 0.30299,
+            "76": 0.30389,
+            "77": 0.30485,
+            "78": 0.30454,
+            "79": 0.304,
+            "80": 0.30244,
+            "81": 0.30324,
+            "82": 0.30372,
+            "83": 0.30372,
+            "84": 0.30436,
+            "85": 0.30371,
+            "86": 0.30282,
+            "87": 0.30363,
+            "88": 0.30375,
+            "89": 0.30379,
+            "90": 0.30426,
+            "91": 0.30435,
+            "92": 0.30341,
+            "93": 0.30389,
+            "94": 0.30489,
+            "95": 0.30286,
+            "96": 0.30305,
+            "97": 0.30297,
+            "98": 0.30369,
+            "99": 0.30282,
+            "100": 0.30347
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/golden_values_dev_dgx_gb200.json
new file mode 100644
index 00000000000..9e26dfeeb6e
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/golden_values_dev_dgx_gb200.json
@@ -0,0 +1 @@
+{}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch/golden_values_dev_dgx_h100.json
index 9124bb16e1b..ab11d31f2ca 100644
--- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch/golden_values_dev_dgx_h100.json
@@ -1,7 +1,7 @@
 {
  "0": {
   "input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.",
-  "generated_text": " And that this is the place where you can be yourself, and be accepted for who you are, and be accepted for who you are, and be",
+  "generated_text": " And that this is the place where you can be yourself, and be yourself, and be yourself, and be yourself, and be yourself, and be",
   "generated_tokens": [
    3060,
    1455,
@@ -17,30 +17,151 @@
    1044,
    1321,
    1402,
-   14571,
-   1394,
-   2274,
-   1636,
-   1584,
+   14019,
    1044,
    1321,
    1402,
-   14571,
-   1394,
-   2274,
-   1636,
-   1584,
+   14019,
+   1044,
+   1321,
+   1402,
+   14019,
+   1044,
+   1321,
+   1402,
+   14019,
    1044,
    1321,
    1402
   ],
-  "latency": 0.3596312999725342,
+  "latency": 0.33650875091552734,
   "cuda_graph_request_count_map": {
-   "56": 29
+   "32": 29
   },
-  "step_count": 240,
+  "step_count": 30,
   "top_n_logprobs": null,
   "prompt_top_n_logprobs": null,
+  "prompt_logprobs": [
+   -9.36181926727295,
+   -2.823990821838379,
+   -4.610703945159912,
+   -1.5629558563232422,
+   -0.7174959182739258,
+   -1.6296026706695557,
+   -2.4623641967773438,
+   -2.1694350242614746,
+   -2.299478530883789,
+   -6.261927127838135,
+   -1.4214489459991455,
+   -3.5006911754608154,
+   -4.495674133300781,
+   -3.756869077682495,
+   -2.11869478225708,
+   -2.0652074813842773,
+   -3.5548300743103027,
+   -6.7972517013549805,
+   -0.32509124279022217,
+   -0.802075982093811,
+   -6.374052047729492,
+   -7.404623031616211,
+   -12.706570625305176,
+   -2.7203140258789062,
+   -3.784698486328125,
+   -0.5262728929519653,
+   -4.230419158935547,
+   -0.05865294858813286,
+   -0.08889779448509216,
+   -3.3020200729370117,
+   -10.076433181762695,
+   -1.1052889823913574,
+   -6.047104835510254,
+   -5.263249397277832,
+   -3.813868522644043,
+   -2.732881784439087,
+   -3.4210383892059326,
+   -6.047735214233398,
+   -1.9643490314483643,
+   -5.7317914962768555,
+   -12.158651351928711,
+   -12.511089324951172,
+   -0.07087351381778717,
+   -2.580897331237793,
+   -1.4599298238754272,
+   -3.0208420753479004,
+   -1.2277309894561768,
+   -0.006594917271286249,
+   -3.4534847736358643,
+   -13.229089736938477,
+   -4.096384048461914,
+   -2.5087532997131348,
+   -5.955618858337402,
+   -0.7179529070854187,
+   -0.05547872930765152,
+   -1.5987433195114136,
+   -1.0590581893920898,
+   -5.704042434692383,
+   -0.3894253373146057,
+   -5.037204742431641,
+   -0.5877441167831421,
+   -0.5446641445159912,
+   -2.4070374965667725,
+   -13.461160659790039,
+   -0.10237079858779907,
+   -3.5546276569366455,
+   -1.3483082056045532,
+   -6.042889595031738,
+   -0.5367065668106079,
+   -3.593390703201294,
+   -0.9327085018157959,
+   -1.5130213499069214,
+   -5.070390701293945,
+   -17.36066436767578,
+   -6.901477813720703,
+   -1.0385162830352783,
+   -3.9858975410461426,
+   -1.1583341360092163,
+   -2.342862606048584,
+   -1.7755080461502075,
+   -0.27050071954727173,
+   -9.41438102722168,
+   -0.3227814733982086,
+   -7.4246134757995605,
+   -2.2850522994995117,
+   -4.027304649353027,
+   -3.479668378829956
+  ],
+  "generated_logprobs": [
+   -1.97231125831604,
+   -2.363867998123169,
+   -2.219954490661621,
+   -0.29585954546928406,
+   -1.4493519067764282,
+   -2.232797622680664,
+   -1.1424486637115479,
+   -1.5864160060882568,
+   -1.4188923835754395,
+   -2.0473084449768066,
+   -1.470442771911621,
+   -0.8504352569580078,
+   -1.147210955619812,
+   -2.0061838626861572,
+   -2.4544901847839355,
+   -1.7092150449752808,
+   -0.23308466374874115,
+   -0.38648492097854614,
+   -0.055945850908756256,
+   -0.4632662534713745,
+   -0.09933969378471375,
+   -0.35298952460289,
+   -0.032222963869571686,
+   -0.428203284740448,
+   -0.04741770401597023,
+   -0.13727128505706787,
+   -0.008898601867258549,
+   -0.28543511033058167,
+   -0.022008933126926422,
+   -0.054881855845451355
+  ],
   "logprobs": [
    -9.36181926727295,
    -2.823990821838379,
@@ -130,45 +251,46 @@
    -4.027304649353027,
    -3.479668378829956,
    -1.97231125831604,
-   -2.376408100128174,
-   -2.3123559951782227,
-   -0.3004738390445709,
-   -1.493628978729248,
-   -2.220780372619629,
-   -1.0872397422790527,
-   -1.59427809715271,
-   -1.447359323501587,
-   -1.9638845920562744,
-   -1.4591186046600342,
-   -0.9037047028541565,
-   -1.2439252138137817,
-   -2.1132912635803223,
-   -2.4269232749938965,
-   -1.3580821752548218,
-   -0.22717469930648804,
-   -0.03338731452822685,
-   -0.06547478586435318,
-   -0.7944308519363403,
-   -0.6580883264541626,
-   -1.3873854875564575,
-   -1.8057537078857422,
-   -0.2732881009578705,
-   -0.23224705457687378,
-   -0.026631435379385948,
-   -0.09862899780273438,
-   -0.5954015254974365,
-   -0.15712657570838928,
-   -0.4755193591117859
+   -2.363867998123169,
+   -2.219954490661621,
+   -0.29585954546928406,
+   -1.4493519067764282,
+   -2.232797622680664,
+   -1.1424486637115479,
+   -1.5864160060882568,
+   -1.4188923835754395,
+   -2.0473084449768066,
+   -1.470442771911621,
+   -0.8504352569580078,
+   -1.147210955619812,
+   -2.0061838626861572,
+   -2.4544901847839355,
+   -1.7092150449752808,
+   -0.23308466374874115,
+   -0.38648492097854614,
+   -0.055945850908756256,
+   -0.4632662534713745,
+   -0.09933969378471375,
+   -0.35298952460289,
+   -0.032222963869571686,
+   -0.428203284740448,
+   -0.04741770401597023,
+   -0.13727128505706787,
+   -0.008898601867258549,
+   -0.28543511033058167,
+   -0.022008933126926422,
+   -0.054881855845451355
   ]
  },
  "throughput": [
-  12.416282998898186,
-  81.54888884568274,
-  83.34870312803253,
-  82.80514168050271,
-  82.08963052557824,
-  82.46828524015132,
-  82.9993700631586,
-  82.43991894192082
- ]
-}
\ No newline at end of file
+  76.30580996730768,
+  88.09632062440096,
+  88.06043831072262,
+  88.2961798635866,
+  88.30652818803674,
+  88.44774285517468,
+  88.336161355204,
+  88.45930829300391
+ ],
+ "mem-max-allocated-bytes": 23014038016
+}
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_logitsmatch_decode_graphs_only/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_logitsmatch_decode_graphs_only/golden_values_dev_dgx_h100.json
index ea85c2aaa78..8e7d12105ac 100644
--- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_logitsmatch_decode_graphs_only/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_logitsmatch_decode_graphs_only/golden_values_dev_dgx_h100.json
@@ -1,7 +1,7 @@
 {
  "0": {
   "input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.",
-  "generated_text": " And that this is the place where you can be yourself, and be accepted for who you are, and be accepted for who you are, and be",
+  "generated_text": " And that this is the place where you can be yourself, and be yourself, and be yourself, and be yourself, and be yourself, and be",
   "generated_tokens": [
    3060,
    1455,
@@ -17,30 +17,151 @@
    1044,
    1321,
    1402,
-   14571,
-   1394,
-   2274,
-   1636,
-   1584,
+   14019,
    1044,
    1321,
    1402,
-   14571,
-   1394,
-   2274,
-   1636,
-   1584,
+   14019,
+   1044,
+   1321,
+   1402,
+   14019,
+   1044,
+   1321,
+   1402,
+   14019,
    1044,
    1321,
    1402
   ],
-  "latency": 0.4406242370605469,
+  "latency": 0.4249272346496582,
   "cuda_graph_request_count_map": {
-   "56": 29
+   "32": 29
   },
-  "step_count": 240,
+  "step_count": 30,
   "top_n_logprobs": null,
   "prompt_top_n_logprobs": null,
+  "prompt_logprobs": [
+   -9.36181926727295,
+   -2.823990821838379,
+   -4.610703945159912,
+   -1.5629558563232422,
+   -0.7174959182739258,
+   -1.6296026706695557,
+   -2.4623641967773438,
+   -2.1694350242614746,
+   -2.299478530883789,
+   -6.261927127838135,
+   -1.4214489459991455,
+   -3.5006911754608154,
+   -4.495674133300781,
+   -3.756869077682495,
+   -2.11869478225708,
+   -2.0652074813842773,
+   -3.5548300743103027,
+   -6.7972517013549805,
+   -0.32509124279022217,
+   -0.802075982093811,
+   -6.374052047729492,
+   -7.404623031616211,
+   -12.706570625305176,
+   -2.7203140258789062,
+   -3.784698486328125,
+   -0.5262728929519653,
+   -4.230419158935547,
+   -0.05865294858813286,
+   -0.08889779448509216,
+   -3.3020200729370117,
+   -10.076433181762695,
+   -1.1052889823913574,
+   -6.047104835510254,
+   -5.263249397277832,
+   -3.813868522644043,
+   -2.732881784439087,
+   -3.4210383892059326,
+   -6.047735214233398,
+   -1.9643490314483643,
+   -5.7317914962768555,
+   -12.158651351928711,
+   -12.511089324951172,
+   -0.07087351381778717,
+   -2.580897331237793,
+   -1.4599298238754272,
+   -3.0208420753479004,
+   -1.2277309894561768,
+   -0.006594917271286249,
+   -3.4534847736358643,
+   -13.229089736938477,
+   -4.096384048461914,
+   -2.5087532997131348,
+   -5.955618858337402,
+   -0.7179529070854187,
+   -0.05547872930765152,
+   -1.5987433195114136,
+   -1.0590581893920898,
+   -5.704042434692383,
+   -0.3894253373146057,
+   -5.037204742431641,
+   -0.5877441167831421,
+   -0.5446641445159912,
+   -2.4070374965667725,
+   -13.461160659790039,
+   -0.10237079858779907,
+   -3.5546276569366455,
+   -1.3483082056045532,
+   -6.042889595031738,
+   -0.5367065668106079,
+   -3.593390703201294,
+   -0.9327085018157959,
+   -1.5130213499069214,
+   -5.070390701293945,
+   -17.36066436767578,
+   -6.901477813720703,
+   -1.0385162830352783,
+   -3.9858975410461426,
+   -1.1583341360092163,
+   -2.342862606048584,
+   -1.7755080461502075,
+   -0.27050071954727173,
+   -9.41438102722168,
+   -0.3227814733982086,
+   -7.4246134757995605,
+   -2.2850522994995117,
+   -4.027304649353027,
+   -3.479668378829956
+  ],
+  "generated_logprobs": [
+   -1.97231125831604,
+   -2.363867998123169,
+   -2.219954490661621,
+   -0.29585954546928406,
+   -1.4493519067764282,
+   -2.232797622680664,
+   -1.1424486637115479,
+   -1.5864160060882568,
+   -1.4188923835754395,
+   -2.0473084449768066,
+   -1.470442771911621,
+   -0.8504352569580078,
+   -1.147210955619812,
+   -2.0061838626861572,
+   -2.4544901847839355,
+   -1.7092150449752808,
+   -0.23308466374874115,
+   -0.38648492097854614,
+   -0.055945850908756256,
+   -0.4632662534713745,
+   -0.09933969378471375,
+   -0.35298952460289,
+   -0.032222963869571686,
+   -0.428203284740448,
+   -0.04741770401597023,
+   -0.13727128505706787,
+   -0.008898601867258549,
+   -0.28543511033058167,
+   -0.022008933126926422,
+   -0.054881855845451355
+  ],
   "logprobs": [
    -9.36181926727295,
    -2.823990821838379,
@@ -130,45 +251,45 @@
    -4.027304649353027,
    -3.479668378829956,
    -1.97231125831604,
-   -2.376408100128174,
-   -2.3123559951782227,
-   -0.3004738390445709,
-   -1.493628978729248,
-   -2.220780372619629,
-   -1.0872397422790527,
-   -1.59427809715271,
-   -1.447359323501587,
-   -1.9638845920562744,
-   -1.4591186046600342,
-   -0.9037047028541565,
-   -1.2439252138137817,
-   -2.1132912635803223,
-   -2.4269232749938965,
-   -1.3580821752548218,
-   -0.22717469930648804,
-   -0.03338731452822685,
-   -0.06547478586435318,
-   -0.7944308519363403,
-   -0.6580883264541626,
-   -1.3873854875564575,
-   -1.8057537078857422,
-   -0.2732881009578705,
-   -0.23224705457687378,
-   -0.026631435379385948,
-   -0.09862899780273438,
-   -0.5954015254974365,
-   -0.15712657570838928,
-   -0.4755193591117859
+   -2.363867998123169,
+   -2.219954490661621,
+   -0.29585954546928406,
+   -1.4493519067764282,
+   -2.232797622680664,
+   -1.1424486637115479,
+   -1.5864160060882568,
+   -1.4188923835754395,
+   -2.0473084449768066,
+   -1.470442771911621,
+   -0.8504352569580078,
+   -1.147210955619812,
+   -2.0061838626861572,
+   -2.4544901847839355,
+   -1.7092150449752808,
+   -0.23308466374874115,
+   -0.38648492097854614,
+   -0.055945850908756256,
+   -0.4632662534713745,
+   -0.09933969378471375,
+   -0.35298952460289,
+   -0.032222963869571686,
+   -0.428203284740448,
+   -0.04741770401597023,
+   -0.13727128505706787,
+   -0.008898601867258549,
+   -0.28543511033058167,
+   -0.022008933126926422,
+   -0.054881855845451355
   ]
  },
  "throughput": [
-  7.132306428201211,
-  67.30863603174221,
-  67.64093662629398,
-  67.67495143375305,
-  67.42889050485478,
-  67.62813990948096,
-  67.71432030932579,
-  67.47687387318786
+  3.446833367136259,
+  69.64151223259532,
+  69.9765204692347,
+  70.25474012041042,
+  69.64760269536946,
+  69.98609501222526,
+  70.21408666363853,
+  70.1614678530764
  ]
-}
\ No newline at end of file
+}
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_583m_logitsmatch/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_583m_logitsmatch/golden_values_dev_dgx_h100.json
index f32580e937f..8cb69f894b0 100644
--- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_583m_logitsmatch/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_583m_logitsmatch/golden_values_dev_dgx_h100.json
@@ -157,5 +157,6 @@
             -0.0585334412753582
         ]
     },
-    "throughput": [12.319796866345767, 12.319796866345767]
+    "throughput": [12.319796866345767, 12.319796866345767],
+    "mem-max-allocated-bytes": 12067065856
 }
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_dp1_583m_logitsmatch_zmq/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_dp1_583m_logitsmatch_zmq/golden_values_dev_dgx_h100.json
index 944863ce003..93dbee6575d 100644
--- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_dp1_583m_logitsmatch_zmq/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_dp1_583m_logitsmatch_zmq/golden_values_dev_dgx_h100.json
@@ -34,125 +34,125 @@
             1278,
             2362
         ],
-        "latency": 23.35220137424767,
+        "latency": 0.3552708830102347,
         "logprobs": [
             -9.35879135131836,
-            -2.7352774143218994,
-            -4.542932987213135,
-            -1.4809632301330566,
-            -0.6577711701393127,
-            -1.7310287952423096,
-            -2.5016393661499023,
-            -2.054267168045044,
-            -2.4450795650482178,
-            -6.180659294128418,
-            -1.568453073501587,
-            -3.404385805130005,
-            -4.357839584350586,
-            -3.9313418865203857,
-            -2.001478672027588,
-            -1.8802878856658936,
-            -3.8159995079040527,
-            -6.879362106323242,
-            -0.28638726472854614,
-            -0.9805830717086792,
-            -6.659268856048584,
-            -7.184902667999268,
-            -12.831036567687988,
-            -2.2628769874572754,
-            -3.80989933013916,
-            -0.5026318430900574,
-            -4.312714576721191,
-            -0.06652869284152985,
-            -0.10383106768131256,
-            -3.221609354019165,
-            -10.062438011169434,
-            -1.19387686252594,
-            -5.972838401794434,
-            -5.059903621673584,
-            -3.794962167739868,
-            -2.58512020111084,
-            -3.407836675643921,
-            -5.576328277587891,
-            -1.6389069557189941,
-            -5.498246669769287,
-            -12.218515396118164,
-            -12.583944320678711,
-            -0.09274326264858246,
-            -2.500924587249756,
-            -1.370800256729126,
-            -2.858417510986328,
-            -1.1951555013656616,
-            -0.006517108529806137,
-            -3.3397316932678223,
-            -13.183527946472168,
-            -4.315248966217041,
-            -2.4844048023223877,
-            -6.052038192749023,
-            -0.7679911851882935,
-            -0.05106499418616295,
-            -1.5119061470031738,
-            -1.148835301399231,
-            -5.648500442504883,
-            -0.42955976724624634,
-            -4.942170143127441,
-            -0.6178378462791443,
-            -0.7215086221694946,
-            -2.4680683612823486,
-            -13.656073570251465,
-            -0.09046748280525208,
-            -3.528261184692383,
-            -1.3840829133987427,
-            -6.3916826248168945,
-            -0.590160071849823,
-            -3.512652635574341,
-            -0.8600459694862366,
-            -1.6373299360275269,
-            -5.384238243103027,
-            -17.205631256103516,
-            -6.648115634918213,
-            -0.890762984752655,
-            -4.155974388122559,
-            -1.1969019174575806,
-            -2.251375675201416,
-            -1.7827272415161133,
-            -0.21727021038532257,
-            -9.323517799377441,
-            -0.11923929303884506,
-            -7.317551136016846,
-            -2.5149247646331787,
-            -4.099612236022949,
-            -3.5964670181274414,
-            -1.9214924573898315,
-            -2.305270195007324,
-            -1.5137361288070679,
-            -2.3835322856903076,
-            -1.7124545574188232,
-            -1.1756497621536255,
-            -3.0433411598205566,
-            -0.5281094312667847,
-            -0.4586932063102722,
-            -1.7248739004135132,
-            -0.8336725831031799,
-            -0.4110657572746277,
-            -0.9216307401657104,
-            -1.4833365678787231,
-            -0.4625704288482666,
-            -1.636054277420044,
-            -0.5516311526298523,
-            -1.2232449054718018,
-            -1.2100636959075928,
-            -0.002353756921365857,
-            -1.1664479970932007,
-            -0.007350543048232794,
-            -0.7310623526573181,
-            -0.7930303812026978,
-            -0.049882158637046814,
-            -0.8908950686454773,
-            -0.019804010167717934,
-            -2.044306755065918,
-            -1.3121578693389893,
-            -0.8065381050109863
+            -2.6852214336395264,
+            -4.565960884094238,
+            -1.484259843826294,
+            -0.6149517297744751,
+            -1.7398686408996582,
+            -2.526689052581787,
+            -2.0900843143463135,
+            -2.4004015922546387,
+            -6.2046918869018555,
+            -1.4779510498046875,
+            -3.4696996212005615,
+            -4.381419658660889,
+            -3.92144513130188,
+            -2.027473211288452,
+            -1.849990963935852,
+            -3.798253059387207,
+            -6.890632629394531,
+            -0.28577330708503723,
+            -0.9172963500022888,
+            -6.667942047119141,
+            -7.152089595794678,
+            -12.823952674865723,
+            -2.194999933242798,
+            -3.7969248294830322,
+            -0.503960907459259,
+            -4.32859992980957,
+            -0.0652889758348465,
+            -0.09950395673513412,
+            -3.2162013053894043,
+            -10.075189590454102,
+            -1.1461244821548462,
+            -5.991937637329102,
+            -5.068911075592041,
+            -3.8860018253326416,
+            -2.598827600479126,
+            -3.4107730388641357,
+            -5.53258752822876,
+            -1.5951910018920898,
+            -5.499358654022217,
+            -12.2184419631958,
+            -12.583678245544434,
+            -0.09812023490667343,
+            -2.4972615242004395,
+            -1.4124755859375,
+            -2.882293462753296,
+            -1.1778429746627808,
+            -0.006617418024688959,
+            -3.366197109222412,
+            -13.224164962768555,
+            -4.330657005310059,
+            -2.528923273086548,
+            -6.032571792602539,
+            -0.7999377250671387,
+            -0.046529971063137054,
+            -1.5080031156539917,
+            -1.143476963043213,
+            -5.610738754272461,
+            -0.4443867802619934,
+            -4.966207027435303,
+            -0.6222555041313171,
+            -0.7141766548156738,
+            -2.4682083129882812,
+            -13.595609664916992,
+            -0.09389874339103699,
+            -3.4752113819122314,
+            -1.4100513458251953,
+            -6.344900608062744,
+            -0.5882403254508972,
+            -3.554251194000244,
+            -0.8758341073989868,
+            -1.6025172472000122,
+            -5.337532043457031,
+            -17.198396682739258,
+            -6.618108749389648,
+            -0.904167115688324,
+            -4.1442694664001465,
+            -1.18899667263031,
+            -2.2584173679351807,
+            -1.7404848337173462,
+            -0.22586335241794586,
+            -9.318314552307129,
+            -0.11766636371612549,
+            -7.351627826690674,
+            -2.4984447956085205,
+            -4.129283905029297,
+            -3.511444330215454,
+            -1.935489296913147,
+            -2.2915453910827637,
+            -1.5244090557098389,
+            -2.380976438522339,
+            -1.7428944110870361,
+            -1.1648709774017334,
+            -3.044867515563965,
+            -0.5298795700073242,
+            -0.4574756622314453,
+            -1.7587621212005615,
+            -0.8358312845230103,
+            -0.4241933226585388,
+            -0.9311360716819763,
+            -1.49276864528656,
+            -0.4320312440395355,
+            -1.6545748710632324,
+            -0.568348228931427,
+            -1.245187520980835,
+            -1.1677653789520264,
+            -0.002115513663738966,
+            -1.1953201293945312,
+            -0.007269242778420448,
+            -0.6812739968299866,
+            -0.7529453635215759,
+            -0.0469898022711277,
+            -0.8952285051345825,
+            -0.02016274258494377,
+            -2.0373334884643555,
+            -1.3149938583374023,
+            -0.8147596120834351
         ]
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_dp1_583m_logitsmatch_zmq/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_dp1_583m_logitsmatch_zmq/model_config.yaml
index 4b1759db001..920da1d1682 100644
--- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_dp1_583m_logitsmatch_zmq/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_dp1_583m_logitsmatch_zmq/model_config.yaml
@@ -23,6 +23,7 @@ MODEL_ARGS:
   --distributed-backend: nccl
   --log-interval: 1
   --transformer-impl: inference_optimized
+  --inference-fuse-tp-communication: true
   --sequence-parallel: true
   --tensor-model-parallel-size: 8
   --pipeline-model-parallel-size: 1
@@ -50,6 +51,9 @@ MODEL_ARGS:
   --incoming-requests-per-step: 32
   --use-flashinfer-fused-rope: true
   --inference-logging-step-interval: 1
+  --cuda-graph-impl: local 
+  --inference-dynamic-batching-max-requests: 128
+  --inference-dynamic-batching-num-cuda-graphs: 2
 METRICS:
   - "generated_tokens"
   - "logprobs"
diff --git a/tests/functional_tests/test_cases/gpt/gpt_grpo_tp1_pp1_dp8_583m_throughputtest/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt_grpo_tp1_pp1_dp8_583m_throughputtest/golden_values_dev_dgx_h100.json
index e58cb5d3349..a19d42718aa 100644
--- a/tests/functional_tests/test_cases/gpt/gpt_grpo_tp1_pp1_dp8_583m_throughputtest/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt_grpo_tp1_pp1_dp8_583m_throughputtest/golden_values_dev_dgx_h100.json
@@ -1,173 +1,59 @@
 {
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 0.0,
-            "2": -0.04564,
-            "3": 0.0,
-            "4": 0.0,
-            "5": 0.0,
-            "6": 0.0,
-            "7": 0.0,
-            "8": 0.0,
-            "9": 0.04101,
-            "10": 0.0,
-            "11": 0.0,
-            "12": 0.0,
-            "13": 0.0,
-            "14": 0.05164,
-            "15": 0.0,
-            "16": 0.0,
-            "17": 0.0,
-            "18": 0.03448,
-            "19": 0.00346,
-            "20": 0.0,
-            "21": 0.0,
-            "22": 0.0,
-            "23": 0.0,
-            "24": 0.05792,
-            "25": 0.03686,
-            "26": 0.0,
-            "27": 0.0,
-            "28": 0.0,
-            "29": 0.0,
-            "30": 0.0,
-            "31": 0.0,
-            "32": 0.0,
-            "33": 0.0,
-            "34": 0.0,
-            "35": 0.0,
-            "36": 0.0,
-            "37": 0.0,
-            "38": 0.0,
-            "39": 0.0,
-            "40": 0.0,
-            "41": 0.0,
-            "42": 0.0,
-            "43": 0.0,
-            "44": 0.0,
-            "45": 0.0,
-            "46": 0.05118,
-            "47": 0.0,
-            "48": 0.0,
-            "49": 0.0,
-            "50": 0.0
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 50,
-        "step_interval": 1,
-        "values": {
-            "1": 583687296.0,
-            "2": 70.0,
-            "3": 583687296.0,
-            "4": 583687296.0,
-            "5": 583687296.0,
-            "6": 583687296.0,
-            "7": 583687296.0,
-            "8": 583687296.0,
-            "9": 19.0,
-            "10": 583687296.0,
-            "11": 583687296.0,
-            "12": 583687296.0,
-            "13": 583687296.0,
-            "14": 20.0,
-            "15": 583687296.0,
-            "16": 583687296.0,
-            "17": 583687296.0,
-            "18": 53.0,
-            "19": 54.0,
-            "20": 583687296.0,
-            "21": 583687296.0,
-            "22": 583687296.0,
-            "23": 583687296.0,
-            "24": 40.0,
-            "25": 53.0,
-            "26": 583687296.0,
-            "27": 583687296.0,
-            "28": 583687296.0,
-            "29": 583687296.0,
-            "30": 583687296.0,
-            "31": 583687296.0,
-            "32": 583687296.0,
-            "33": 583687296.0,
-            "34": 583687296.0,
-            "35": 583687296.0,
-            "36": 583687296.0,
-            "37": 583687296.0,
-            "38": 583687296.0,
-            "39": 583687296.0,
-            "40": 583687296.0,
-            "41": 583687296.0,
-            "42": 583687296.0,
-            "43": 583687296.0,
-            "44": 583687296.0,
-            "45": 583687296.0,
-            "46": 30.0,
-            "47": 583687296.0,
-            "48": 583687296.0,
-            "49": 583687296.0,
-            "50": 583687296.0
-        }
-    },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 99613442048.0,
-            "2": 99615326208.0,
-            "3": 99615236096.0,
-            "4": 99615236096.0,
-            "5": 99615219712.0,
-            "6": 99615203328.0,
-            "7": 99615203328.0,
-            "8": 99615211520.0,
-            "9": 99615178752.0,
-            "10": 99615154176.0,
-            "11": 99615105024.0,
-            "12": 99615105024.0,
-            "13": 99615105024.0,
-            "14": 99615105024.0,
-            "15": 99615113216.0,
-            "16": 99615113216.0,
-            "17": 99615113216.0,
-            "18": 99615121408.0,
-            "19": 99615113216.0,
-            "20": 99615121408.0,
-            "21": 99615121408.0,
-            "22": 99615113216.0,
-            "23": 99615121408.0,
-            "24": 99615113216.0,
-            "25": 99615113216.0,
-            "26": 99615113216.0,
-            "27": 99615113216.0,
-            "28": 99615121408.0,
-            "29": 99615121408.0,
-            "30": 99615121408.0,
-            "31": 99615121408.0,
-            "32": 99615121408.0,
-            "33": 99615121408.0,
-            "34": 99615121408.0,
-            "35": 99615121408.0,
-            "36": 99615129600.0,
-            "37": 99615121408.0,
-            "38": 99615129600.0,
-            "39": 99615121408.0,
-            "40": 99615129600.0,
-            "41": 99615121408.0,
-            "42": 99615129600.0,
-            "43": 99615129600.0,
-            "44": 99615129600.0,
-            "45": 99615129600.0,
-            "46": 99615121408.0,
-            "47": 99615121408.0,
-            "48": 99615129600.0,
-            "49": 99615129600.0,
-            "50": 99615121408.0
+            "1": 55289954304.0,
+            "2": 55292747776.0,
+            "3": 55292731392.0,
+            "4": 55292891136.0,
+            "5": 55292878848.0,
+            "6": 55292878848.0,
+            "7": 55292878848.0,
+            "8": 55292788736.0,
+            "9": 55292788736.0,
+            "10": 55292788736.0,
+            "11": 55292792832.0,
+            "12": 55292792832.0,
+            "13": 55292792832.0,
+            "14": 55292792832.0,
+            "15": 55292792832.0,
+            "16": 55292796928.0,
+            "17": 55292796928.0,
+            "18": 55292801024.0,
+            "19": 55292805120.0,
+            "20": 55292801024.0,
+            "21": 55292801024.0,
+            "22": 55292796928.0,
+            "23": 55292801024.0,
+            "24": 55292796928.0,
+            "25": 55292801024.0,
+            "26": 55292796928.0,
+            "27": 55292796928.0,
+            "28": 55292801024.0,
+            "29": 55292801024.0,
+            "30": 55292805120.0,
+            "31": 55292805120.0,
+            "32": 55292805120.0,
+            "33": 55292805120.0,
+            "34": 55292805120.0,
+            "35": 55292805120.0,
+            "36": 55292805120.0,
+            "37": 55292801024.0,
+            "38": 55292801024.0,
+            "39": 55292801024.0,
+            "40": 55292805120.0,
+            "41": 55292805120.0,
+            "42": 55292805120.0,
+            "43": 55292801024.0,
+            "44": 55292796928.0,
+            "45": 55292801024.0,
+            "46": 55292801024.0,
+            "47": 55292801024.0,
+            "48": 55292801024.0,
+            "49": 55292805120.0,
+            "50": 55292805120.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -175,56 +61,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 99613450240.0,
-            "2": 101703827456.0,
-            "3": 101704925184.0,
-            "4": 101704925184.0,
-            "5": 101704925184.0,
-            "6": 101704925184.0,
-            "7": 101704925184.0,
-            "8": 101708570624.0,
-            "9": 101708570624.0,
-            "10": 101708570624.0,
-            "11": 101708570624.0,
-            "12": 101708570624.0,
-            "13": 101708570624.0,
-            "14": 101708570624.0,
-            "15": 101708570624.0,
-            "16": 101708570624.0,
-            "17": 101708570624.0,
-            "18": 101708570624.0,
-            "19": 101708570624.0,
-            "20": 101708570624.0,
-            "21": 101708570624.0,
-            "22": 101708570624.0,
-            "23": 101708570624.0,
-            "24": 101708570624.0,
-            "25": 101708570624.0,
-            "26": 101708570624.0,
-            "27": 101708570624.0,
-            "28": 101708570624.0,
-            "29": 101708570624.0,
-            "30": 101708570624.0,
-            "31": 101708570624.0,
-            "32": 101708570624.0,
-            "33": 101708570624.0,
-            "34": 101708570624.0,
-            "35": 101708570624.0,
-            "36": 101708570624.0,
-            "37": 101708570624.0,
-            "38": 101708570624.0,
-            "39": 101708570624.0,
-            "40": 101708570624.0,
-            "41": 101708570624.0,
-            "42": 101708570624.0,
-            "43": 101708570624.0,
-            "44": 101708570624.0,
-            "45": 101708570624.0,
-            "46": 101708570624.0,
-            "47": 101708570624.0,
-            "48": 101708570624.0,
-            "49": 101708570624.0,
-            "50": 101708570624.0
+            "1": 55289958400.0,
+            "2": 57103880192.0,
+            "3": 57104392192.0,
+            "4": 57104416768.0,
+            "5": 57104416768.0,
+            "6": 57104416768.0,
+            "7": 57104416768.0,
+            "8": 57104416768.0,
+            "9": 57104416768.0,
+            "10": 57104416768.0,
+            "11": 57104416768.0,
+            "12": 57104416768.0,
+            "13": 57104416768.0,
+            "14": 57104416768.0,
+            "15": 57104416768.0,
+            "16": 57104416768.0,
+            "17": 57104416768.0,
+            "18": 57104416768.0,
+            "19": 57104416768.0,
+            "20": 57104416768.0,
+            "21": 57104416768.0,
+            "22": 57104416768.0,
+            "23": 57104416768.0,
+            "24": 57104416768.0,
+            "25": 57104416768.0,
+            "26": 57104416768.0,
+            "27": 57104416768.0,
+            "28": 57104416768.0,
+            "29": 57104416768.0,
+            "30": 57104416768.0,
+            "31": 57104416768.0,
+            "32": 57104416768.0,
+            "33": 57104416768.0,
+            "34": 57104416768.0,
+            "35": 57104416768.0,
+            "36": 57104416768.0,
+            "37": 57104416768.0,
+            "38": 57104416768.0,
+            "39": 57104416768.0,
+            "40": 57104416768.0,
+            "41": 57104416768.0,
+            "42": 57104416768.0,
+            "43": 57104416768.0,
+            "44": 57104416768.0,
+            "45": 57104416768.0,
+            "46": 57104416768.0,
+            "47": 57104416768.0,
+            "48": 57104416768.0,
+            "49": 57104416768.0,
+            "50": 57104416768.0
         }
     },
     "iteration-time": {
@@ -232,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 37.07577,
-            "2": 5.51337,
-            "3": 4.10557,
-            "4": 3.55106,
-            "5": 3.45444,
-            "6": 3.48579,
-            "7": 3.39066,
-            "8": 3.49615,
-            "9": 3.63661,
-            "10": 3.5452,
-            "11": 3.52083,
-            "12": 3.44924,
-            "13": 3.34272,
-            "14": 3.39596,
-            "15": 3.42629,
-            "16": 3.31287,
-            "17": 3.32152,
-            "18": 3.39771,
-            "19": 3.42125,
-            "20": 3.61702,
-            "21": 3.45153,
-            "22": 3.35039,
-            "23": 3.39949,
-            "24": 3.32904,
-            "25": 3.36768,
-            "26": 3.34536,
-            "27": 3.30363,
-            "28": 3.36734,
-            "29": 3.41942,
-            "30": 3.38079,
-            "31": 3.35877,
-            "32": 3.34474,
-            "33": 3.27045,
-            "34": 3.18637,
-            "35": 3.24522,
-            "36": 3.34784,
-            "37": 3.33885,
-            "38": 3.37193,
-            "39": 3.31138,
-            "40": 3.25321,
-            "41": 3.21574,
-            "42": 3.24275,
-            "43": 3.27418,
-            "44": 3.30596,
-            "45": 3.30984,
-            "46": 3.36254,
-            "47": 3.43668,
-            "48": 3.27358,
-            "49": 3.25891,
-            "50": 3.34573
+            "1": 38.24908,
+            "2": 4.52458,
+            "3": 3.69393,
+            "4": 3.38577,
+            "5": 3.41862,
+            "6": 3.27421,
+            "7": 3.32023,
+            "8": 3.83723,
+            "9": 4.07373,
+            "10": 3.47799,
+            "11": 3.27499,
+            "12": 3.37017,
+            "13": 3.3918,
+            "14": 3.25114,
+            "15": 3.29905,
+            "16": 3.29943,
+            "17": 3.50383,
+            "18": 3.56844,
+            "19": 3.30276,
+            "20": 3.34553,
+            "21": 3.29165,
+            "22": 3.30348,
+            "23": 3.33814,
+            "24": 3.31525,
+            "25": 3.29337,
+            "26": 3.26119,
+            "27": 3.5167,
+            "28": 3.2312,
+            "29": 3.45063,
+            "30": 3.3088,
+            "31": 3.32522,
+            "32": 3.28154,
+            "33": 3.23551,
+            "34": 3.20003,
+            "35": 3.25844,
+            "36": 3.67071,
+            "37": 3.1881,
+            "38": 3.30757,
+            "39": 3.32895,
+            "40": 3.29602,
+            "41": 3.25522,
+            "42": 3.28932,
+            "43": 3.32204,
+            "44": 3.26419,
+            "45": 3.75371,
+            "46": 3.23126,
+            "47": 3.25929,
+            "48": 3.19512,
+            "49": 3.32815,
+            "50": 3.25617
         }
     }
 }
diff --git a/tests/functional_tests/test_cases/gpt/gpt_grpo_tp1_pp1_dp8_583m_throughputtest/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt_grpo_tp1_pp1_dp8_583m_throughputtest/model_config.yaml
index 30309858b76..8c78989cef7 100644
--- a/tests/functional_tests/test_cases/gpt/gpt_grpo_tp1_pp1_dp8_583m_throughputtest/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt_grpo_tp1_pp1_dp8_583m_throughputtest/model_config.yaml
@@ -76,3 +76,6 @@ MODEL_ARGS:
   --eval-interval: 1000000
   --finetune: true
   --inference-logging-step-interval: 1
+METRICS:
+  - "mem-allocated-bytes"
+  - "mem-max-allocated-bytes"
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt_grpo_tp1_pp1_dp8_583m_throughputtest_github/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt_grpo_tp1_pp1_dp8_583m_throughputtest_github/golden_values_dev_dgx_h100.json
index 4206fac0d0d..4db934b1330 100644
--- a/tests/functional_tests/test_cases/gpt/gpt_grpo_tp1_pp1_dp8_583m_throughputtest_github/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt_grpo_tp1_pp1_dp8_583m_throughputtest_github/golden_values_dev_dgx_h100.json
@@ -4,20 +4,20 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 0.05412,
-            "2": 0.04523,
-            "3": 0.09444,
-            "4": 0.04451,
-            "5": 0.05201,
+            "1": 0.04567,
+            "2": 0.0,
+            "3": 0.0,
+            "4": 0.0,
+            "5": 0.0,
             "6": 0.0,
             "7": 0.0,
-            "8": 0.04699,
+            "8": 0.04622,
             "9": 0.0,
             "10": 0.0,
             "11": 0.0,
             "12": 0.0,
             "13": 0.0,
-            "14": 0.03773,
+            "14": 0.0,
             "15": 0.0,
             "16": 0.0,
             "17": 0.0,
@@ -28,21 +28,21 @@
             "22": 0.0,
             "23": 0.0,
             "24": 0.0,
-            "25": 0.0,
+            "25": 0.03308,
             "26": 0.0,
             "27": 0.0,
-            "28": 0.0,
+            "28": 0.09392,
             "29": 0.0,
             "30": 0.0,
             "31": 0.0,
             "32": 0.0,
             "33": 0.0,
-            "34": 0.0,
+            "34": 0.03909,
             "35": 0.0,
             "36": 0.0,
-            "37": 0.04296,
+            "37": 0.0,
             "38": 0.0,
-            "39": 0.0,
+            "39": 0.04574,
             "40": 0.0,
             "41": 0.0,
             "42": 0.0,
@@ -50,10 +50,10 @@
             "44": 0.0,
             "45": 0.0,
             "46": 0.0,
-            "47": 0.05684,
-            "48": 0.04259,
+            "47": 0.0,
+            "48": 0.0,
             "49": 0.0,
-            "50": 0.02801
+            "50": 0.0
         }
     },
     "num-zeros": {
@@ -61,20 +61,20 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 24.0,
-            "2": 44.0,
-            "3": 20.0,
-            "4": 30.0,
-            "5": 37.0,
+            "1": 43.0,
+            "2": 583687296.0,
+            "3": 583687296.0,
+            "4": 583687296.0,
+            "5": 583687296.0,
             "6": 583687296.0,
             "7": 583687296.0,
-            "8": 53.0,
+            "8": 42.0,
             "9": 583687296.0,
             "10": 583687296.0,
             "11": 583687296.0,
             "12": 583687296.0,
             "13": 583687296.0,
-            "14": 50.0,
+            "14": 583687296.0,
             "15": 583687296.0,
             "16": 583687296.0,
             "17": 583687296.0,
@@ -85,21 +85,21 @@
             "22": 583687296.0,
             "23": 583687296.0,
             "24": 583687296.0,
-            "25": 583687296.0,
+            "25": 56.0,
             "26": 583687296.0,
             "27": 583687296.0,
-            "28": 583687296.0,
+            "28": 18.0,
             "29": 583687296.0,
             "30": 583687296.0,
             "31": 583687296.0,
             "32": 583687296.0,
             "33": 583687296.0,
-            "34": 583687296.0,
+            "34": 32.0,
             "35": 583687296.0,
             "36": 583687296.0,
-            "37": 46.0,
+            "37": 583687296.0,
             "38": 583687296.0,
-            "39": 583687296.0,
+            "39": 27.0,
             "40": 583687296.0,
             "41": 583687296.0,
             "42": 583687296.0,
@@ -107,10 +107,10 @@
             "44": 583687296.0,
             "45": 583687296.0,
             "46": 583687296.0,
-            "47": 33.0,
-            "48": 19.0,
+            "47": 583687296.0,
+            "48": 583687296.0,
             "49": 583687296.0,
-            "50": 41.0
+            "50": 583687296.0
         }
     },
     "mem-allocated-bytes": {
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 99614597120.0,
-            "2": 99614261248.0,
-            "3": 99614236672.0,
-            "4": 99614228480.0,
-            "5": 99614220288.0,
-            "6": 99614212096.0,
-            "7": 99614212096.0,
-            "8": 99614212096.0,
-            "9": 99614146560.0,
-            "10": 99614146560.0,
-            "11": 99614146560.0,
-            "12": 99614146560.0,
-            "13": 99614146560.0,
-            "14": 99614146560.0,
-            "15": 99614154752.0,
-            "16": 99614154752.0,
-            "17": 99614154752.0,
-            "18": 99614154752.0,
-            "19": 99614154752.0,
-            "20": 99614154752.0,
-            "21": 99614154752.0,
-            "22": 99614154752.0,
-            "23": 99614162944.0,
-            "24": 99614162944.0,
-            "25": 99614162944.0,
-            "26": 99614162944.0,
-            "27": 99614162944.0,
-            "28": 99614162944.0,
-            "29": 99614162944.0,
-            "30": 99614162944.0,
-            "31": 99614162944.0,
-            "32": 99614171136.0,
-            "33": 99614171136.0,
-            "34": 99614162944.0,
-            "35": 99614162944.0,
-            "36": 99614162944.0,
-            "37": 99614162944.0,
-            "38": 99614154752.0,
-            "39": 99614162944.0,
-            "40": 99614162944.0,
-            "41": 99614162944.0,
-            "42": 99614154752.0,
-            "43": 99614154752.0,
-            "44": 99614154752.0,
-            "45": 99614154752.0,
-            "46": 99614154752.0,
-            "47": 99614154752.0,
-            "48": 99614154752.0,
-            "49": 99614154752.0,
-            "50": 99614162944.0
+            "1": 56705486848.0,
+            "2": 56707366912.0,
+            "3": 56707289088.0,
+            "4": 56707284992.0,
+            "5": 56707284992.0,
+            "6": 56707293184.0,
+            "7": 56707297280.0,
+            "8": 56707293184.0,
+            "9": 56707293184.0,
+            "10": 56707297280.0,
+            "11": 56707289088.0,
+            "12": 56707293184.0,
+            "13": 56707301376.0,
+            "14": 56707305472.0,
+            "15": 56707313664.0,
+            "16": 56707317760.0,
+            "17": 56707325952.0,
+            "18": 56707330048.0,
+            "19": 56707338240.0,
+            "20": 56707342336.0,
+            "21": 56707350528.0,
+            "22": 56707354624.0,
+            "23": 56707358720.0,
+            "24": 56707317760.0,
+            "25": 56707317760.0,
+            "26": 56707309568.0,
+            "27": 56707309568.0,
+            "28": 56707305472.0,
+            "29": 56707309568.0,
+            "30": 56707309568.0,
+            "31": 56707309568.0,
+            "32": 56707305472.0,
+            "33": 56707305472.0,
+            "34": 56707276800.0,
+            "35": 56707284992.0,
+            "36": 56707293184.0,
+            "37": 56707293184.0,
+            "38": 56707276800.0,
+            "39": 56707284992.0,
+            "40": 56707284992.0,
+            "41": 56707252224.0,
+            "42": 56707256320.0,
+            "43": 56707260416.0,
+            "44": 56707252224.0,
+            "45": 56707235840.0,
+            "46": 56707244032.0,
+            "47": 56707244032.0,
+            "48": 56707239936.0,
+            "49": 56707235840.0,
+            "50": 56707227648.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 99614605312.0,
-            "2": 101701984256.0,
-            "3": 101701984256.0,
-            "4": 101701984256.0,
-            "5": 101701984256.0,
-            "6": 101701984256.0,
-            "7": 101701984256.0,
-            "8": 101701984256.0,
-            "9": 101701984256.0,
-            "10": 101705539584.0,
-            "11": 101705539584.0,
-            "12": 101705539584.0,
-            "13": 101705547776.0,
-            "14": 101705547776.0,
-            "15": 101705547776.0,
-            "16": 101705547776.0,
-            "17": 101705547776.0,
-            "18": 101705547776.0,
-            "19": 101705547776.0,
-            "20": 101705547776.0,
-            "21": 101705547776.0,
-            "22": 101705547776.0,
-            "23": 101705555968.0,
-            "24": 101705555968.0,
-            "25": 101705555968.0,
-            "26": 101705555968.0,
-            "27": 101705555968.0,
-            "28": 101705564160.0,
-            "29": 101705564160.0,
-            "30": 101705564160.0,
-            "31": 101705564160.0,
-            "32": 101705564160.0,
-            "33": 101705564160.0,
-            "34": 101705564160.0,
-            "35": 101705564160.0,
-            "36": 101705564160.0,
-            "37": 101705564160.0,
-            "38": 101705564160.0,
-            "39": 101705564160.0,
-            "40": 101705564160.0,
-            "41": 101705564160.0,
-            "42": 101705564160.0,
-            "43": 101705564160.0,
-            "44": 101705564160.0,
-            "45": 101705564160.0,
-            "46": 101705564160.0,
-            "47": 101705564160.0,
-            "48": 101705564160.0,
-            "49": 101705564160.0,
-            "50": 101705564160.0
+            "1": 56705486848.0,
+            "2": 58520117248.0,
+            "3": 58520694784.0,
+            "4": 58520694784.0,
+            "5": 58520694784.0,
+            "6": 58520698880.0,
+            "7": 58520707072.0,
+            "8": 58520707072.0,
+            "9": 58520707072.0,
+            "10": 58520707072.0,
+            "11": 58520707072.0,
+            "12": 58520707072.0,
+            "13": 58520707072.0,
+            "14": 58520711168.0,
+            "15": 58520719360.0,
+            "16": 58520723456.0,
+            "17": 58520731648.0,
+            "18": 58520735744.0,
+            "19": 58520743936.0,
+            "20": 58520748032.0,
+            "21": 58520756224.0,
+            "22": 58520764416.0,
+            "23": 58520764416.0,
+            "24": 58520764416.0,
+            "25": 58520764416.0,
+            "26": 58520764416.0,
+            "27": 58520764416.0,
+            "28": 58520764416.0,
+            "29": 58520764416.0,
+            "30": 58520764416.0,
+            "31": 58520764416.0,
+            "32": 58520764416.0,
+            "33": 58520764416.0,
+            "34": 58520764416.0,
+            "35": 58520764416.0,
+            "36": 58520764416.0,
+            "37": 58520764416.0,
+            "38": 58520764416.0,
+            "39": 58520764416.0,
+            "40": 58520764416.0,
+            "41": 58520764416.0,
+            "42": 58520764416.0,
+            "43": 58520764416.0,
+            "44": 58520764416.0,
+            "45": 58520764416.0,
+            "46": 58520764416.0,
+            "47": 58520764416.0,
+            "48": 58520764416.0,
+            "49": 58520764416.0,
+            "50": 58520764416.0
         }
     },
     "iteration-time": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 130.25253,
-            "2": 9.88948,
-            "3": 8.72032,
-            "4": 8.5427,
-            "5": 8.26483,
-            "6": 8.59126,
-            "7": 8.02799,
-            "8": 8.21142,
-            "9": 8.57808,
-            "10": 8.03187,
-            "11": 8.04941,
-            "12": 8.01158,
-            "13": 8.18497,
-            "14": 8.13065,
-            "15": 8.12456,
-            "16": 8.0261,
-            "17": 8.24415,
-            "18": 8.12356,
-            "19": 8.01872,
-            "20": 7.96605,
-            "21": 8.02618,
-            "22": 7.98249,
-            "23": 8.03059,
-            "24": 7.87244,
-            "25": 7.92321,
-            "26": 7.99325,
-            "27": 8.03815,
-            "28": 8.0646,
-            "29": 8.03226,
-            "30": 7.92917,
-            "31": 8.0803,
-            "32": 7.9272,
-            "33": 7.93803,
-            "34": 7.9555,
-            "35": 8.10923,
-            "36": 8.01863,
-            "37": 7.97726,
-            "38": 7.86783,
-            "39": 7.89458,
-            "40": 7.92858,
-            "41": 7.9655,
-            "42": 8.11402,
-            "43": 7.92667,
-            "44": 8.10251,
-            "45": 7.84423,
-            "46": 8.02262,
-            "47": 7.90143,
-            "48": 8.11201,
-            "49": 8.26159,
-            "50": 8.02742
+            "1": "nan",
+            "2": 64.88323,
+            "3": 9.98948,
+            "4": 10.5653,
+            "5": 9.49213,
+            "6": 9.7058,
+            "7": 10.3713,
+            "8": 9.69584,
+            "9": 10.08558,
+            "10": 9.64307,
+            "11": 9.39285,
+            "12": 9.22534,
+            "13": 9.45398,
+            "14": 9.3236,
+            "15": 9.30815,
+            "16": 9.42684,
+            "17": 9.27604,
+            "18": 9.46377,
+            "19": 9.24656,
+            "20": 9.22709,
+            "21": 9.15955,
+            "22": 9.39831,
+            "23": 9.1461,
+            "24": 9.14062,
+            "25": 9.43925,
+            "26": 9.27344,
+            "27": 9.13835,
+            "28": 9.11182,
+            "29": 9.28006,
+            "30": 9.29592,
+            "31": 9.99338,
+            "32": 10.28927,
+            "33": 9.71657,
+            "34": 10.01927,
+            "35": 9.49163,
+            "36": 9.72794,
+            "37": 9.31159,
+            "38": 9.29786,
+            "39": 9.318,
+            "40": 9.48741,
+            "41": 9.59212,
+            "42": 9.29507,
+            "43": 9.30203,
+            "44": 9.37176,
+            "45": 9.23509,
+            "46": 9.32089,
+            "47": 9.36602,
+            "48": 9.43024,
+            "49": 9.19031,
+            "50": 9.19624
         }
     }
-}
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt_grpo_tp1tp2_pp1_dp8_583m_throughputtest/env_config.yaml b/tests/functional_tests/test_cases/gpt/gpt_grpo_tp1tp2_pp1_dp8_583m_throughputtest/env_config.yaml
new file mode 100644
index 00000000000..329246987bf
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt_grpo_tp1tp2_pp1_dp8_583m_throughputtest/env_config.yaml
@@ -0,0 +1,5 @@
+- agent_type: examples.rl.environments.countdown.countdown_agent.CountdownAgent
+  agent_args:
+    dataset_file: "/mnt/artifacts/rl_environments/Jiayi-Pan___countdown-tasks-3to4"
+    split: "train"
+  weight: 1.0
diff --git a/tests/functional_tests/test_cases/gpt/gpt_grpo_tp1tp2_pp1_dp8_583m_throughputtest/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt_grpo_tp1tp2_pp1_dp8_583m_throughputtest/golden_values_dev_dgx_h100.json
new file mode 100644
index 00000000000..52eecae753f
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt_grpo_tp1tp2_pp1_dp8_583m_throughputtest/golden_values_dev_dgx_h100.json
@@ -0,0 +1,173 @@
+{
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 59904729088.0,
+            "2": 59906678784.0,
+            "3": 59906662400.0,
+            "4": 59906637824.0,
+            "5": 59906621440.0,
+            "6": 59906596864.0,
+            "7": 59906596864.0,
+            "8": 59906535424.0,
+            "9": 59906396160.0,
+            "10": 59906404352.0,
+            "11": 59906408448.0,
+            "12": 59906412544.0,
+            "13": 59906408448.0,
+            "14": 59906412544.0,
+            "15": 59906412544.0,
+            "16": 59906412544.0,
+            "17": 59906408448.0,
+            "18": 59906404352.0,
+            "19": 59906404352.0,
+            "20": 59906408448.0,
+            "21": 59906408448.0,
+            "22": 59906408448.0,
+            "23": 59906412544.0,
+            "24": 59906416640.0,
+            "25": 59906408448.0,
+            "26": 59906412544.0,
+            "27": 59906416640.0,
+            "28": 59906412544.0,
+            "29": 59906412544.0,
+            "30": 59906408448.0,
+            "31": 59906412544.0,
+            "32": 59906416640.0,
+            "33": 59906420736.0,
+            "34": 59906416640.0,
+            "35": 59906416640.0,
+            "36": 59906416640.0,
+            "37": 59906420736.0,
+            "38": 59906416640.0,
+            "39": 59906416640.0,
+            "40": 59906420736.0,
+            "41": 59906420736.0,
+            "42": 59906420736.0,
+            "43": 59906424832.0,
+            "44": 59906428928.0,
+            "45": 59906433024.0,
+            "46": 59906433024.0,
+            "47": 59906428928.0,
+            "48": 59906424832.0,
+            "49": 59906420736.0,
+            "50": 59906424832.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 59904729088.0,
+            "2": 61718560768.0,
+            "3": 61719445504.0,
+            "4": 61719445504.0,
+            "5": 61719445504.0,
+            "6": 61719445504.0,
+            "7": 61719445504.0,
+            "8": 61719445504.0,
+            "9": 61719445504.0,
+            "10": 61719445504.0,
+            "11": 61719445504.0,
+            "12": 61719445504.0,
+            "13": 61719445504.0,
+            "14": 61719445504.0,
+            "15": 61719445504.0,
+            "16": 61719445504.0,
+            "17": 61719445504.0,
+            "18": 61719445504.0,
+            "19": 61719445504.0,
+            "20": 61719445504.0,
+            "21": 61719445504.0,
+            "22": 61719445504.0,
+            "23": 61719445504.0,
+            "24": 61719445504.0,
+            "25": 61719445504.0,
+            "26": 61719445504.0,
+            "27": 61719445504.0,
+            "28": 61719445504.0,
+            "29": 61719445504.0,
+            "30": 61719445504.0,
+            "31": 61719445504.0,
+            "32": 61719445504.0,
+            "33": 61719445504.0,
+            "34": 61719445504.0,
+            "35": 61719445504.0,
+            "36": 61719445504.0,
+            "37": 61719445504.0,
+            "38": 61719445504.0,
+            "39": 61719445504.0,
+            "40": 61719445504.0,
+            "41": 61719445504.0,
+            "42": 61719445504.0,
+            "43": 61719445504.0,
+            "44": 61719445504.0,
+            "45": 61719445504.0,
+            "46": 61719445504.0,
+            "47": 61719445504.0,
+            "48": 61719445504.0,
+            "49": 61719445504.0,
+            "50": 61719445504.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 57.6861,
+            "2": 8.67022,
+            "3": 5.71457,
+            "4": 5.72499,
+            "5": 5.11948,
+            "6": 4.92635,
+            "7": 4.93271,
+            "8": 5.10894,
+            "9": 5.36783,
+            "10": 5.56732,
+            "11": 5.02348,
+            "12": 4.81955,
+            "13": 4.91784,
+            "14": 4.9196,
+            "15": 4.7776,
+            "16": 5.12885,
+            "17": 5.00356,
+            "18": 4.81843,
+            "19": 4.84018,
+            "20": 4.8416,
+            "21": 4.85613,
+            "22": 5.11753,
+            "23": 4.85816,
+            "24": 4.75535,
+            "25": 4.89752,
+            "26": 4.76383,
+            "27": 4.8243,
+            "28": 5.40933,
+            "29": 4.76027,
+            "30": 4.81566,
+            "31": 4.65084,
+            "32": 4.85671,
+            "33": 4.82799,
+            "34": 4.92544,
+            "35": 4.84476,
+            "36": 5.06802,
+            "37": 4.80114,
+            "38": 4.76754,
+            "39": 4.72827,
+            "40": 4.88805,
+            "41": 5.15207,
+            "42": 4.84272,
+            "43": 4.72393,
+            "44": 4.8221,
+            "45": 4.8112,
+            "46": 4.78151,
+            "47": 4.86975,
+            "48": 4.73748,
+            "49": 4.91773,
+            "50": 4.77335
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt_grpo_tp1tp2_pp1_dp8_583m_throughputtest/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt_grpo_tp1tp2_pp1_dp8_583m_throughputtest/model_config.yaml
new file mode 100644
index 00000000000..b12911358f0
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt_grpo_tp1tp2_pp1_dp8_583m_throughputtest/model_config.yaml
@@ -0,0 +1,83 @@
+ENV_VARS:
+  CUDA_DEVICE_MAX_CONNECTIONS: 1
+  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
+  NCCL_ALGO: Ring
+  CUBLAS_WORKSPACE_CONFIG: :4096:8
+TEST_TYPE: frozen-start
+MODE: rl
+MODEL_ARGS:
+  --tiktoken-pattern: v2
+  --use-mcore-models: true
+  --tokenizer-type: TikTokenizer
+  --tokenizer-model: ${CHECKPOINT_LOAD_PATH}/model/mcore_mistral/nemo_minitron-0.5b/v1/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json
+  --load: ${CHECKPOINT_LOAD_PATH}/model/mcore_mistral/nemo_minitron-0.5b/v1/
+  --auto-detect-ckpt-format: true
+  --max-tokens-to-oom: 3600000
+  --inference-max-seq-length: 1024
+  --attention-backend: flash
+  --mock-data: true
+  --micro-batch-size: 1
+  --no-load-optim: true
+  --no-use-tokenizer-model-from-checkpoint-args: true
+  --timing-log-level: 0
+  --distributed-backend: nccl
+  --log-interval: 1
+  --log-progress: true
+  --transformer-impl: transformer_engine
+  --tensor-model-parallel-size: 1
+  --pipeline-model-parallel-size: 1
+  --ckpt-format: torch_dist
+  --bf16: true
+  --log-memory-to-tensorboard: true
+  --log-num-zeros-in-grad: true
+  --log-validation-ppl-to-tensorboard: true
+  --log-timers-to-tensorboard: true
+  --num-layers: 24
+  --hidden-size: 1152
+  --num-attention-heads: 16
+  --max-position-embeddings: 1024
+  --seq-length: 1024
+  --timing-log-option: minmax
+  --log-throughput: true
+  --no-create-attention-mask-in-dataloader: true
+  --straggler-minmax-count: 16
+  --tensorboard-log-interval: 1
+  --empty-unused-memory-level: 2
+  --langrl-inference-server-type: inplace_megatron
+  --seed: 42
+  --calculate-per-token-loss: true
+  --rl-use-sequence-packing: true
+  --rl-sequence-packing-algo: fifo
+  --rl-offload-optimizer-during-inference: true
+  --timing-log-level: 1
+  --log-timers-to-tensorboard: true
+  --cuda-graph-impl: local
+  --micro-batch-size: 1
+  --global-batch-size: 16
+  --grpo-group-size: 2
+  --grpo-prompts-per-step: 8
+  --grpo-iterations: 1
+  --grpo-clamp-eps-lower: 0.2
+  --grpo-clamp-eps-upper: 0.2
+  --grpo-kl-beta: 0.0
+  --grpo-entropy-term-weight: 0.0
+  --langrl-env-config: tests/functional_tests/test_cases/gpt/gpt_grpo_tp1tp2_pp1_dp8_583m_throughputtest/env_config.yaml
+  --rl-partial-rollouts: true
+  --lr: 0.000001
+  --lr-warmup-samples: 0
+  --clip-grad: 1.0
+  --use-checkpoint-args: true
+  --dist-ckpt-strictness: log_unexpected
+  --perform-rl-step: true
+  --train-samples: 48828125
+  --exit-interval: 50
+  --tensorboard-dir: ${TENSORBOARD_PATH}
+  --save-interval: 1000000
+  --eval-interval: 1000000
+  --finetune: true
+  --inference-logging-step-interval: 1
+  --rl-inference-tensor-model-parallel-size: 2
+  --refit-method: gloo
+METRICS:
+  - "mem-allocated-bytes"
+  - "mem-max-allocated-bytes"
diff --git a/tests/functional_tests/test_cases/gpt/gpt_grpo_tp1tp2_pp1_dp8_583m_throughputtest_github/env_config.yaml b/tests/functional_tests/test_cases/gpt/gpt_grpo_tp1tp2_pp1_dp8_583m_throughputtest_github/env_config.yaml
new file mode 100644
index 00000000000..329246987bf
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt_grpo_tp1tp2_pp1_dp8_583m_throughputtest_github/env_config.yaml
@@ -0,0 +1,5 @@
+- agent_type: examples.rl.environments.countdown.countdown_agent.CountdownAgent
+  agent_args:
+    dataset_file: "/mnt/artifacts/rl_environments/Jiayi-Pan___countdown-tasks-3to4"
+    split: "train"
+  weight: 1.0
diff --git a/tests/functional_tests/test_cases/gpt/gpt_grpo_tp1tp2_pp1_dp8_583m_throughputtest_github/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt_grpo_tp1tp2_pp1_dp8_583m_throughputtest_github/golden_values_dev_dgx_h100.json
new file mode 100644
index 00000000000..a37aeee6e4b
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt_grpo_tp1tp2_pp1_dp8_583m_throughputtest_github/golden_values_dev_dgx_h100.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 0.0087,
+            "2": -0.01494,
+            "3": 0.15077,
+            "4": 0.0,
+            "5": -0.0439,
+            "6": 0.0,
+            "7": 0.05469,
+            "8": 0.0,
+            "9": 0.00576,
+            "10": 0.0,
+            "11": 0.0,
+            "12": 0.0,
+            "13": 0.0,
+            "14": 0.03071,
+            "15": 0.04371,
+            "16": 0.0,
+            "17": 0.0,
+            "18": 0.0,
+            "19": 0.0,
+            "20": 0.0,
+            "21": 0.0,
+            "22": 0.0,
+            "23": 0.06246,
+            "24": 0.0,
+            "25": 0.0,
+            "26": 0.05207,
+            "27": 0.04668,
+            "28": 0.0,
+            "29": 0.0,
+            "30": 0.0,
+            "31": 0.02708,
+            "32": 0.0,
+            "33": 0.0,
+            "34": 0.0,
+            "35": 0.0,
+            "36": 0.0,
+            "37": 0.0,
+            "38": 0.0,
+            "39": 0.06875,
+            "40": 0.0,
+            "41": 0.0,
+            "42": 0.0,
+            "43": 0.0,
+            "44": 0.0,
+            "45": 0.0,
+            "46": 0.0,
+            "47": 0.0,
+            "48": 0.0,
+            "49": 0.0,
+            "50": 0.0
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1.0,
+            "2": 56.0,
+            "3": 10.0,
+            "4": 583687296.0,
+            "5": 23.0,
+            "6": 583687296.0,
+            "7": 30.0,
+            "8": 583687296.0,
+            "9": 50.0,
+            "10": 583687296.0,
+            "11": 583687296.0,
+            "12": 583687296.0,
+            "13": 583687296.0,
+            "14": 41.0,
+            "15": 31.0,
+            "16": 583687296.0,
+            "17": 583687296.0,
+            "18": 583687296.0,
+            "19": 583687296.0,
+            "20": 583687296.0,
+            "21": 583687296.0,
+            "22": 583687296.0,
+            "23": 19.0,
+            "24": 583687296.0,
+            "25": 583687296.0,
+            "26": 45.0,
+            "27": 34.0,
+            "28": 583687296.0,
+            "29": 583687296.0,
+            "30": 583687296.0,
+            "31": 38.0,
+            "32": 583687296.0,
+            "33": 583687296.0,
+            "34": 583687296.0,
+            "35": 583687296.0,
+            "36": 583687296.0,
+            "37": 583687296.0,
+            "38": 583687296.0,
+            "39": 16.0,
+            "40": 583687296.0,
+            "41": 583687296.0,
+            "42": 583687296.0,
+            "43": 583687296.0,
+            "44": 583687296.0,
+            "45": 583687296.0,
+            "46": 583687296.0,
+            "47": 583687296.0,
+            "48": 583687296.0,
+            "49": 583687296.0,
+            "50": 583687296.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 57332613120.0,
+            "2": 57336213504.0,
+            "3": 57335631872.0,
+            "4": 57336352768.0,
+            "5": 57336815616.0,
+            "6": 57336795136.0,
+            "7": 57336786944.0,
+            "8": 57336766464.0,
+            "9": 57336745984.0,
+            "10": 57336786944.0,
+            "11": 57336971264.0,
+            "12": 57336934400.0,
+            "13": 57336938496.0,
+            "14": 57336938496.0,
+            "15": 57336938496.0,
+            "16": 57336934400.0,
+            "17": 57336938496.0,
+            "18": 57336942592.0,
+            "19": 57336946688.0,
+            "20": 57336946688.0,
+            "21": 57336942592.0,
+            "22": 57336938496.0,
+            "23": 57336938496.0,
+            "24": 57336938496.0,
+            "25": 57336938496.0,
+            "26": 57336942592.0,
+            "27": 57336942592.0,
+            "28": 57336946688.0,
+            "29": 57336950784.0,
+            "30": 57336942592.0,
+            "31": 57336938496.0,
+            "32": 57336942592.0,
+            "33": 57336942592.0,
+            "34": 57336946688.0,
+            "35": 57336950784.0,
+            "36": 57336950784.0,
+            "37": 57336950784.0,
+            "38": 57336950784.0,
+            "39": 57336950784.0,
+            "40": 57336954880.0,
+            "41": 57336954880.0,
+            "42": 57336958976.0,
+            "43": 57336958976.0,
+            "44": 57336954880.0,
+            "45": 57336954880.0,
+            "46": 57336963072.0,
+            "47": 57336963072.0,
+            "48": 57336963072.0,
+            "49": 57336958976.0,
+            "50": 57336958976.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 57332617216.0,
+            "2": 59150434304.0,
+            "3": 59150434304.0,
+            "4": 59150434304.0,
+            "5": 59150434304.0,
+            "6": 59150434304.0,
+            "7": 59150434304.0,
+            "8": 59150434304.0,
+            "9": 59150434304.0,
+            "10": 59150434304.0,
+            "11": 59150434304.0,
+            "12": 59150434304.0,
+            "13": 59150434304.0,
+            "14": 59150434304.0,
+            "15": 59150434304.0,
+            "16": 59150434304.0,
+            "17": 59150434304.0,
+            "18": 59150434304.0,
+            "19": 59150434304.0,
+            "20": 59150434304.0,
+            "21": 59150434304.0,
+            "22": 59150434304.0,
+            "23": 59150434304.0,
+            "24": 59150434304.0,
+            "25": 59150434304.0,
+            "26": 59150434304.0,
+            "27": 59150434304.0,
+            "28": 59150434304.0,
+            "29": 59150434304.0,
+            "30": 59150434304.0,
+            "31": 59150434304.0,
+            "32": 59150434304.0,
+            "33": 59150434304.0,
+            "34": 59150434304.0,
+            "35": 59150434304.0,
+            "36": 59150434304.0,
+            "37": 59150434304.0,
+            "38": 59150434304.0,
+            "39": 59150434304.0,
+            "40": 59150434304.0,
+            "41": 59150434304.0,
+            "42": 59150434304.0,
+            "43": 59150434304.0,
+            "44": 59150434304.0,
+            "45": 59150434304.0,
+            "46": 59150434304.0,
+            "47": 59150434304.0,
+            "48": 59150434304.0,
+            "49": 59150434304.0,
+            "50": 59150434304.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 132.06027,
+            "2": 15.71916,
+            "3": 13.59969,
+            "4": 14.24368,
+            "5": 13.97929,
+            "6": 13.94721,
+            "7": 16.13103,
+            "8": 15.0372,
+            "9": 15.68285,
+            "10": 14.48736,
+            "11": 14.83801,
+            "12": 13.88317,
+            "13": 14.23494,
+            "14": 14.17721,
+            "15": 14.44254,
+            "16": 14.46859,
+            "17": 13.31893,
+            "18": 13.85971,
+            "19": 13.30073,
+            "20": 12.97114,
+            "21": 13.13682,
+            "22": 13.19241,
+            "23": 12.91161,
+            "24": 13.477,
+            "25": 13.41073,
+            "26": 13.16635,
+            "27": 13.91528,
+            "28": 13.70152,
+            "29": 13.34747,
+            "30": 17.3336,
+            "31": 13.22079,
+            "32": 13.03197,
+            "33": 13.1548,
+            "34": 13.67568,
+            "35": 13.2386,
+            "36": 13.29333,
+            "37": 13.57906,
+            "38": 12.92362,
+            "39": 13.37357,
+            "40": 12.74468,
+            "41": 14.24188,
+            "42": 13.10419,
+            "43": 14.01918,
+            "44": 13.85198,
+            "45": 13.19797,
+            "46": 14.27233,
+            "47": 13.51886,
+            "48": 14.11249,
+            "49": 13.75763,
+            "50": 13.66548
+        }
+    }
+}
diff --git a/tests/functional_tests/test_cases/gpt/gpt_grpo_tp1tp2_pp1_dp8_583m_throughputtest_github/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt_grpo_tp1tp2_pp1_dp8_583m_throughputtest_github/model_config.yaml
new file mode 100644
index 00000000000..bff55aea7fe
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt_grpo_tp1tp2_pp1_dp8_583m_throughputtest_github/model_config.yaml
@@ -0,0 +1,80 @@
+ENV_VARS:
+  CUDA_DEVICE_MAX_CONNECTIONS: 1
+  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
+  NCCL_ALGO: Ring
+  CUBLAS_WORKSPACE_CONFIG: :4096:8
+TEST_TYPE: frozen-start
+MODE: rl
+MODEL_ARGS:
+  --tiktoken-pattern: v2
+  --use-mcore-models: true
+  --tokenizer-type: TikTokenizer
+  --tokenizer-model: ${CHECKPOINT_LOAD_PATH}/model/mcore_mistral/nemo_minitron-0.5b/v1/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json
+  --load: ${CHECKPOINT_LOAD_PATH}/model/mcore_mistral/nemo_minitron-0.5b/v1/
+  --auto-detect-ckpt-format: true
+  --max-tokens-to-oom: 3600000
+  --inference-max-seq-length: 1024
+  --attention-backend: flash
+  --mock-data: true
+  --micro-batch-size: 1
+  --no-load-optim: true
+  --no-use-tokenizer-model-from-checkpoint-args: true
+  --timing-log-level: 0
+  --distributed-backend: nccl
+  --log-interval: 1
+  --log-progress: true
+  --transformer-impl: transformer_engine
+  --tensor-model-parallel-size: 1
+  --pipeline-model-parallel-size: 1
+  --ckpt-format: torch_dist
+  --bf16: true
+  --log-memory-to-tensorboard: true
+  --log-num-zeros-in-grad: true
+  --log-validation-ppl-to-tensorboard: true
+  --log-timers-to-tensorboard: true
+  --num-layers: 24
+  --hidden-size: 1152
+  --num-attention-heads: 16
+  --max-position-embeddings: 1024
+  --seq-length: 1024
+  --timing-log-option: minmax
+  --log-throughput: true
+  --no-create-attention-mask-in-dataloader: true
+  --straggler-minmax-count: 16
+  --tensorboard-log-interval: 1
+  --empty-unused-memory-level: 2
+  --langrl-inference-server-type: inplace_megatron
+  --seed: 42
+  --calculate-per-token-loss: true
+  --rl-use-sequence-packing: true
+  --rl-sequence-packing-algo: fifo
+  --rl-offload-optimizer-during-inference: true
+  --timing-log-level: 1
+  --log-timers-to-tensorboard: true
+  --cuda-graph-impl: local
+  --micro-batch-size: 1
+  --global-batch-size: 16
+  --grpo-group-size: 2
+  --grpo-prompts-per-step: 8
+  --grpo-iterations: 1
+  --grpo-clamp-eps-lower: 0.2
+  --grpo-clamp-eps-upper: 0.2
+  --grpo-kl-beta: 0.0
+  --grpo-entropy-term-weight: 0.0
+  --langrl-env-config: tests/functional_tests/test_cases/gpt/gpt_grpo_tp1tp2_pp1_dp8_583m_throughputtest_github/env_config.yaml
+  --rl-partial-rollouts: true
+  --lr: 0.000001
+  --lr-warmup-samples: 0
+  --clip-grad: 1.0
+  --use-checkpoint-args: true
+  --dist-ckpt-strictness: log_unexpected
+  --perform-rl-step: true
+  --train-samples: 48828125
+  --exit-interval: 50
+  --tensorboard-dir: ${TENSORBOARD_PATH}
+  --save-interval: 1000000
+  --eval-interval: 1000000
+  --finetune: true
+  --inference-logging-step-interval: 1
+  --rl-inference-tensor-model-parallel-size: 2
+  --refit-method: gloo
diff --git a/tests/functional_tests/test_cases/gpt/gpt_grpo_tp2tp1_pp4pp2_dp8_583m_throughputtest/env_config.yaml b/tests/functional_tests/test_cases/gpt/gpt_grpo_tp2tp1_pp4pp2_dp8_583m_throughputtest/env_config.yaml
new file mode 100644
index 00000000000..329246987bf
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt_grpo_tp2tp1_pp4pp2_dp8_583m_throughputtest/env_config.yaml
@@ -0,0 +1,5 @@
+- agent_type: examples.rl.environments.countdown.countdown_agent.CountdownAgent
+  agent_args:
+    dataset_file: "/mnt/artifacts/rl_environments/Jiayi-Pan___countdown-tasks-3to4"
+    split: "train"
+  weight: 1.0
diff --git a/tests/functional_tests/test_cases/gpt/gpt_grpo_tp2tp1_pp4pp2_dp8_583m_throughputtest/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt_grpo_tp2tp1_pp4pp2_dp8_583m_throughputtest/golden_values_dev_dgx_h100.json
new file mode 100644
index 00000000000..d985f671cab
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt_grpo_tp2tp1_pp4pp2_dp8_583m_throughputtest/golden_values_dev_dgx_h100.json
@@ -0,0 +1,173 @@
+{
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 48967716864.0,
+            "2": 48973631488.0,
+            "3": 48974528512.0,
+            "4": 48971538432.0,
+            "5": 48974340096.0,
+            "6": 48974143488.0,
+            "7": 48977002496.0,
+            "8": 48975851520.0,
+            "9": 48974036992.0,
+            "10": 48973709312.0,
+            "11": 48973262848.0,
+            "12": 48973705216.0,
+            "13": 48973598720.0,
+            "14": 48976703488.0,
+            "15": 48975118336.0,
+            "16": 48977072128.0,
+            "17": 48976465920.0,
+            "18": 48976470016.0,
+            "19": 48976478208.0,
+            "20": 48976654336.0,
+            "21": 48976793600.0,
+            "22": 48976052224.0,
+            "23": 48976277504.0,
+            "24": 48974708736.0,
+            "25": 48973062144.0,
+            "26": 48976236544.0,
+            "27": 48975970304.0,
+            "28": 48976711680.0,
+            "29": 48975593472.0,
+            "30": 48977321984.0,
+            "31": 48977506304.0,
+            "32": 48976646144.0,
+            "33": 48976072704.0,
+            "34": 48973631488.0,
+            "35": 48976650240.0,
+            "36": 48975650816.0,
+            "37": 48974950400.0,
+            "38": 48972750848.0,
+            "39": 48976617472.0,
+            "40": 48979308544.0,
+            "41": 48978587648.0,
+            "42": 48975626240.0,
+            "43": 48975089664.0,
+            "44": 48973688832.0,
+            "45": 48975327232.0,
+            "46": 48975159296.0,
+            "47": 48975372288.0,
+            "48": 48973856768.0,
+            "49": 48973377536.0,
+            "50": 48975568896.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 49090379776.0,
+            "2": 49937022976.0,
+            "3": 49938366464.0,
+            "4": 49938366464.0,
+            "5": 49938366464.0,
+            "6": 49938698240.0,
+            "7": 49939156992.0,
+            "8": 49939156992.0,
+            "9": 49939156992.0,
+            "10": 49939156992.0,
+            "11": 49939156992.0,
+            "12": 49939156992.0,
+            "13": 49939156992.0,
+            "14": 49940287488.0,
+            "15": 49940287488.0,
+            "16": 49940287488.0,
+            "17": 49941729280.0,
+            "18": 49941733376.0,
+            "19": 49941741568.0,
+            "20": 49941778432.0,
+            "21": 49941778432.0,
+            "22": 49941778432.0,
+            "23": 49941778432.0,
+            "24": 49941778432.0,
+            "25": 49941778432.0,
+            "26": 49941778432.0,
+            "27": 49941934080.0,
+            "28": 49941934080.0,
+            "29": 49941934080.0,
+            "30": 49941934080.0,
+            "31": 49942675456.0,
+            "32": 49942675456.0,
+            "33": 49942675456.0,
+            "34": 49942675456.0,
+            "35": 49942675456.0,
+            "36": 49942675456.0,
+            "37": 49942675456.0,
+            "38": 49942675456.0,
+            "39": 49942675456.0,
+            "40": 49944379392.0,
+            "41": 49944379392.0,
+            "42": 49944379392.0,
+            "43": 49944379392.0,
+            "44": 49944379392.0,
+            "45": 49944379392.0,
+            "46": 49944379392.0,
+            "47": 49944379392.0,
+            "48": 49944379392.0,
+            "49": 49944379392.0,
+            "50": 49944379392.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 63.07516,
+            "2": 4.36236,
+            "3": 3.83222,
+            "4": 3.85784,
+            "5": 3.74494,
+            "6": 3.82661,
+            "7": 4.05458,
+            "8": 3.76622,
+            "9": 3.90518,
+            "10": 4.09283,
+            "11": 3.96358,
+            "12": 3.85778,
+            "13": 3.84546,
+            "14": 3.85497,
+            "15": 4.35749,
+            "16": 3.7861,
+            "17": 3.8896,
+            "18": 3.6267,
+            "19": 3.76463,
+            "20": 3.6953,
+            "21": 3.63427,
+            "22": 3.66652,
+            "23": 3.60379,
+            "24": 3.57701,
+            "25": 3.57327,
+            "26": 3.71371,
+            "27": 3.69626,
+            "28": 3.89285,
+            "29": 3.62405,
+            "30": 3.58297,
+            "31": 3.56993,
+            "32": 3.75257,
+            "33": 3.72279,
+            "34": 3.48095,
+            "35": 3.60831,
+            "36": 3.74971,
+            "37": 3.72155,
+            "38": 3.51054,
+            "39": 3.64562,
+            "40": 3.66038,
+            "41": 3.86018,
+            "42": 3.58341,
+            "43": 3.82647,
+            "44": 3.85728,
+            "45": 3.62416,
+            "46": 3.59141,
+            "47": 3.74512,
+            "48": 3.61762,
+            "49": 3.57079,
+            "50": 3.66209
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt_grpo_tp2tp1_pp4pp2_dp8_583m_throughputtest/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt_grpo_tp2tp1_pp4pp2_dp8_583m_throughputtest/model_config.yaml
new file mode 100644
index 00000000000..b74417a898b
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt_grpo_tp2tp1_pp4pp2_dp8_583m_throughputtest/model_config.yaml
@@ -0,0 +1,84 @@
+ENV_VARS:
+  CUDA_DEVICE_MAX_CONNECTIONS: 1
+  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
+  NCCL_ALGO: Ring
+  CUBLAS_WORKSPACE_CONFIG: :4096:8
+TEST_TYPE: frozen-start
+MODE: rl
+MODEL_ARGS:
+  --tiktoken-pattern: v2
+  --use-mcore-models: true
+  --tokenizer-type: TikTokenizer
+  --tokenizer-model: ${CHECKPOINT_LOAD_PATH}/model/mcore_mistral/nemo_minitron-0.5b/v1/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json
+  --load: ${CHECKPOINT_LOAD_PATH}/model/mcore_mistral/nemo_minitron-0.5b/v1/
+  --auto-detect-ckpt-format: true
+  --max-tokens-to-oom: 3600000
+  --inference-max-seq-length: 1024
+  --attention-backend: flash
+  --mock-data: true
+  --micro-batch-size: 1
+  --no-load-optim: true
+  --no-use-tokenizer-model-from-checkpoint-args: true
+  --timing-log-level: 0
+  --distributed-backend: nccl
+  --log-interval: 1
+  --log-progress: true
+  --transformer-impl: transformer_engine
+  --tensor-model-parallel-size: 2
+  --pipeline-model-parallel-size: 4
+  --ckpt-format: torch_dist
+  --bf16: true
+  --log-memory-to-tensorboard: true
+  --log-num-zeros-in-grad: true
+  --log-validation-ppl-to-tensorboard: true
+  --log-timers-to-tensorboard: true
+  --num-layers: 24
+  --hidden-size: 1152
+  --num-attention-heads: 16
+  --max-position-embeddings: 1024
+  --seq-length: 1024
+  --timing-log-option: minmax
+  --log-throughput: true
+  --no-create-attention-mask-in-dataloader: true
+  --straggler-minmax-count: 16
+  --tensorboard-log-interval: 1
+  --empty-unused-memory-level: 2
+  --langrl-inference-server-type: inplace_megatron
+  --seed: 42
+  --calculate-per-token-loss: true
+  --rl-use-sequence-packing: true
+  --rl-sequence-packing-algo: fifo
+  --rl-offload-optimizer-during-inference: true
+  --timing-log-level: 1
+  --log-timers-to-tensorboard: true
+  --cuda-graph-impl: local
+  --micro-batch-size: 1
+  --global-batch-size: 16
+  --grpo-group-size: 2
+  --grpo-prompts-per-step: 8
+  --grpo-iterations: 1
+  --grpo-clamp-eps-lower: 0.2
+  --grpo-clamp-eps-upper: 0.2
+  --grpo-kl-beta: 0.0
+  --grpo-entropy-term-weight: 0.0
+  --langrl-env-config: tests/functional_tests/test_cases/gpt/gpt_grpo_tp1tp2_pp1_dp8_583m_throughputtest/env_config.yaml
+  --rl-partial-rollouts: true
+  --lr: 0.000001
+  --lr-warmup-samples: 0
+  --clip-grad: 1.0
+  --use-checkpoint-args: true
+  --dist-ckpt-strictness: log_unexpected
+  --perform-rl-step: true
+  --train-samples: 48828125
+  --exit-interval: 50
+  --tensorboard-dir: ${TENSORBOARD_PATH}
+  --save-interval: 1000000
+  --eval-interval: 1000000
+  --finetune: true
+  --inference-logging-step-interval: 1
+  --rl-inference-tensor-model-parallel-size: 1
+  --rl-inference-pipeline-model-parallel-size: 2
+  --refit-method: gloo
+METRICS:
+  - "mem-allocated-bytes"
+  - "mem-max-allocated-bytes"
diff --git a/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_16b_multiprompt_tokensmatch/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_16b_multiprompt_tokensmatch/model_config.yaml
index 37588ccf308..efe4f7424f9 100644
--- a/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_16b_multiprompt_tokensmatch/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_16b_multiprompt_tokensmatch/model_config.yaml
@@ -79,5 +79,6 @@ MODEL_ARGS:
   --prompt-file: ./tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_16b_multiprompt_tokensmatch/test_prompts.jsonl
   --incoming-requests-per-sec: -1 # all requests arrive up front.
   --inference-logging-step-interval: 1
+  --inference-dynamic-batching-buffer-size-gb: 20
 METRICS:
   - "generated_text"
diff --git a/tests/functional_tests/test_cases/hybrid/hybrid_dynamic_inference_tp1_pp1_dp8_583m_chunked_prefill/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/hybrid/hybrid_dynamic_inference_tp1_pp1_dp8_583m_chunked_prefill/golden_values_dev_dgx_h100.json
new file mode 100644
index 00000000000..411bc8b74a6
--- /dev/null
+++ b/tests/functional_tests/test_cases/hybrid/hybrid_dynamic_inference_tp1_pp1_dp8_583m_chunked_prefill/golden_values_dev_dgx_h100.json
@@ -0,0 +1,5586 @@
+{
+ "0": {
+  "input_prompt": "SYSTEM LOG - DAILY REPORTING\\nDATE: 2024-10-27\\nSERVER: US-EAST-1A\\n\\nBEGIN LOG STREAM:\\n\\n[Entry 0001]\\nTimestamp: 08:00:01\\nUser: admin_01\\nAction: Login\\nStatus: Success\\nNote: Routine maintenance check initiated.\\n\\n[Entry 0002]\\nTimestamp: 08:01:15\\nUser: system_daemon\\nAction: Backup\\nStatus: Pending\\nNote: awaiting clearance for volume mount.\\n\\n[Entry 0003]\\nTimestamp: 08:02:22\\nUser: user_404\\nAction: Query\\nStatus: Failed\\nNote: Connection timeout on port 8080.\\n\\n[Entry 0004]\\nTimestamp: 08:05:00\\nUser: admin_02\\nAction: Update\\nStatus: Success\\nNote: Patch 4.5.1 applied to kernel.\\n\\n[Entry 0005]\\nTimestamp: 08:10:45\\nUser: monitor_bot\\nAction: Ping\\nStatus: Success\\nNote: Latency 12ms.\\n\\n[Entry 0006]\\nTimestamp: 08:12:30\\nUser: db_manager\\nAction: Write\\nStatus: Success\\nNote: Written 500 records to shard A.\\n\\n[Entry 0007]\\nTimestamp: 08:15:00\\nUser: monitor_bot\\nAction: Ping\\nStatus: Success\\nNote: Latency 14ms.\\n\\n[Entry 0008]\\nTimestamp: 08:18:22\\nUser: user_102\\nAction: Login\\nStatus: Success\\nNote: User accessing from IP 192.168.1.55.\\n\\n[Entry 0009]\\nTimestamp: 08:20:00\\nUser: system_daemon\\nAction: Garbage_Collection\\nStatus: Success\\nNote: Freed 2048MB of heap memory.\\n\\n[Entry 0010]\\nTimestamp: 08:25:10\\nUser: admin_01\\nAction: Logout\\nStatus: Success\\nNote: Session duration 25 minutes.\\n\\n[Entry 0011]\\nTimestamp: 08:30:00\\nUser: monitor_bot\\nAction: Ping\\nStatus: Success\\nNote: Latency 11ms.\\n\\n[Entry 0012]\\nTimestamp: 08:32:45\\nUser: unknown\\nAction: Auth_Attempt\\nStatus: Denied\\nNote: Invalid credentials provided 3 times.\\n\\n[Entry 0013]\\nTimestamp: 08:35:20\\nUser: system_audit\\nAction: Scan\\nStatus: In_Progress\\nNote: Scanning sector 7 for vulnerabilities.\\n\\n[Entry 0014]\\nTimestamp: 08:40:00\\nUser: monitor_bot\\nAction: Ping\\nStatus: Success\\nNote: Latency 13ms.\\n\\n[Entry 0015]\\nTimestamp: 08:45:15\\nUser: user_888\\nAction: Upload\\nStatus: Success\\nNote: File data_report.csv uploaded to bucket.\\n\\n[Entry 0016]\\nTimestamp: 08:50:00\\nUser: load_balancer\\nAction: Scale_Up\\nStatus: Success\\nNote: Added 2 instances to the pool.\\n\\n[Entry 0017]\\nTimestamp: 08:55:30\\nUser: monitor_bot\\nAction: Ping\\nStatus: Success\\nNote: Latency 15ms.\\n\\n[Entry 0018]\\nTimestamp: 09:00:00\\nUser: cron_job\\nAction: Execute\\nStatus: Success\\nNote: Daily summary report generation started.\\n\\n[Entry 0019]\\nTimestamp: 09:05:12\\nUser: user_555\\nAction: Download\\nStatus: Success\\nNote: Retrieved image_001.png.\\n\\n[Entry 0020]\\nTimestamp: 09:10:00\\nUser: monitor_bot\\nAction: Ping\\nStatus: Success\\nNote: Latency 12ms.\\n\\n[Entry 0021]\\nTimestamp: 09:15:45\\nUser: admin_03\\nAction: Config_Change\\nStatus: Success\\nNote: Firewall rules updated for port 22.\\n\\n[Entry 0022]\\nTimestamp: 09:20:00\\nUser: system_daemon\\nAction: Sync\\nStatus: Success\\nNote: Database replica synchronization complete.\\n\\n[Entry 0023]\\nTimestamp: 09:25:10\\nUser: monitor_bot\\nAction: Ping\\nStatus: Success\\nNote: Latency 10ms.\\n\\n[Entry 0024]\\nTimestamp: 09:30:00\\nUser: user_777\\nAction: Query\\nStatus: Success\\nNote: Complex SQL query executed in 200ms.\\n\\n[Entry 0025]\\nTimestamp: 09:35:30\\nUser: error_handler\\nAction: Alert\\nStatus: Warning\\nNote: High CPU usage detected on Node 4.\\n\\n[Entry 0026]\\nTimestamp: 09:40:00\\nUser: monitor_bot\\nAction: Ping\\nStatus: Success\\nNote: Latency 18ms.\\n\\n[Entry 0027]\\nTimestamp: 09:45:15\\nUser: cache_manager\\nAction: Flush\\nStatus: Success\\nNote: Redis cache cleared.\\n\\n[Entry 0028]\\nTimestamp: 09:50:00\\nUser: user_202\\nAction: Login\\nStatus: Success\\nNote: New device detected.\\n\\n[Entry 0029]\\nTimestamp: 09:55:45\\nUser: monitor_bot\\nAction: Ping\\nStatus: Success\\nNote: Latency 12ms.\\n\\n[Entry 0030]\\nTimestamp: 10:00:00\\nUser: system_daemon\\nAction: Archive\\nStatus: Success\\nNote: Logs from yesterday archived to cold storage.\\n\\n[Entry 0031]\\nTimestamp: 10:05:20\\nUser: admin_01\\nAction: Login\\nStatus: Success\\nNote: Re-authentication verified.\\n\\n[Entry 0032]\\nTimestamp: 10:10:00\\nUser: monitor_bot\\nAction: Ping\\nStatus: Success\\nNote: Latency 13ms.\\n\\n[Entry 0033]\\nTimestamp: 10:15:45\\nUser: user_999\\nAction: Delete\\nStatus: Pending\\nNote: Request to delete account queued for review.\\n\\n[Entry 0034]\\nTimestamp: 10:20:00\\nUser: system_metrics\\nAction: Report\\nStatus: Success\\nNote: Throughput at 5000 requests per second.\\n\\n[Entry 0035]\\nTimestamp: 10:25:10\\nUser: monitor_bot\\nAction: Ping\\nStatus: Success\\nNote: Latency 11ms.\\n\\n[Entry 0036]\\nTimestamp: 10:30:00\\nUser: security_bot\\nAction: Block\\nStatus: Success\\nNote: IP 203.0.113.4 blocked for suspicious activity.\\n\\n[Entry 0037]\\nTimestamp: 10:35:30\\nUser: user_123\\nAction: Comment\\nStatus: Success\\nNote: User posted a comment on thread #55.\\n\\n[Entry 0038]\\nTimestamp: 10:40:00\\nUser: monitor_bot\\nAction: Ping\\nStatus: Success\\nNote: Latency 12ms.\\n\\n[Entry 0039]\\nTimestamp: 10:45:15\\nUser: email_service\\nAction: Send\\nStatus: Success\\nNote: Batch of 100 newsletters sent.\\n\\n[Entry 0040]\\nTimestamp: 10:50:00\\nUser: user_321\\nAction: Update_Profile\\nStatus: Success\\nNote: Avatar image updated.\\n\\n[Entry 0041]\\nTimestamp: 10:55:45\\nUser: monitor_bot\\nAction: Ping\\nStatus: Success\\nNote: Latency 14ms.\\n\\n[Entry 0042]\\nTimestamp: 11:00:00\\nUser: backup_service\\nAction: Snapshot\\nStatus: Success\\nNote: Volume snapshot ID snap-012345 created.\\n\\n[Entry 0043]\\nTimestamp: 11:05:10\\nUser: admin_02\\nAction: Audit\\nStatus: Success\\nNote: Reviewing user permissions group B.\\n\\n[Entry 0044]\\nTimestamp: 11:10:00\\nUser: monitor_bot\\nAction: Ping\\nStatus: Success\\nNote: Latency 12ms.\\n\\n[Entry 9999]\\nTimestamp: 11:11:11\\nUser: SUPER_ADMIN\\nAction: KEY_GENERATION\\nStatus: CRITICAL\\nNote: The secret override password for the system is Blueberry-Pancake-42.\\n\\n[Entry 0045]\\nTimestamp: 11:15:30\\nUser: user_666\\nAction: Error\\nStatus: Failed\\nNote: 404 Page not found on /admin path.\\n\\n[Entry 0046]\\nTimestamp: 11:20:00\\nUser: monitor_bot\\nAction: Ping\\nStatus: Success\\nNote: Latency 13ms.\\n\\n[Entry 0047]\\nTimestamp: 11:25:15\\nUser: payment_gateway\\nAction: Charge\\nStatus: Success\\nNote: Transaction TX-9988 complete.\\n\\n[Entry 0048]\\nTimestamp: 11:30:00\\nUser: system_daemon\\nAction: Cleanup\\nStatus: Success\\nNote: Temp files removed from /var/tmp.\\n\\n[Entry 0049]\\nTimestamp: 11:35:45\\nUser: monitor_bot\\nAction: Ping\\nStatus: Success\\nNote: Latency 12ms.\\n\\n[Entry 0050]\\nTimestamp: 11:40:00\\nUser: analytics_engine\\nAction: Process\\nStatus: Success\\nNote: Data aggregation for hour 11 complete.\\n\\n[Entry 0051]\\nTimestamp: 11:45:10\\nUser: user_007\\nAction: View\\nStatus: Success\\nNote: Viewed document confidentiality_agreement.pdf.\\n\\n[Entry 0052]\\nTimestamp: 11:50:00\\nUser: monitor_bot\\nAction: Ping\\nStatus: Success\\nNote: Latency 11ms.\\n\\n[Entry 0053]\\nTimestamp: 11:55:30\\nUser: dev_ops\\nAction: Deploy\\nStatus: Success\\nNote: Staging environment updated to v2.1.\\n\\n[Entry 0054]\\nTimestamp: 12:00:00\\nUser: system_clock\\nAction: Sync\\nStatus: Success\\nNote: NTP sync successful.\\n\\nEND LOG STREAM.\\n\\nQUERY:\\nRetrieve the information from Entry 9999.\\nThe Note for Entry 9999 states that the secret override password is:",
+  "generated_text": " Blueberry-Pancake-42.\\n\\nQUERY RESULT:\\n\\n[Entry 9999]\\nTimestamp: 1",
+  "generated_tokens": [
+   15330,
+   33681,
+   6193,
+   2141,
+   2082,
+   1045,
+   1052,
+   1050,
+   11556,
+   1110,
+   6250,
+   27289,
+   55449,
+   112701,
+   13249,
+   1110,
+   6250,
+   1091,
+   13962,
+   1032,
+   1057,
+   1057,
+   1057,
+   1057,
+   33824,
+   1110,
+   47089,
+   1058,
+   1032,
+   1049
+  ],
+  "latency": 2.9149169921875,
+  "cuda_graph_request_count_map": null,
+  "step_count": 40,
+  "top_n_logprobs": null,
+  "prompt_top_n_logprobs": null,
+  "prompt_logprobs": [
+   -2.0310330390930176,
+   -8.008150100708008,
+   -4.907264232635498,
+   -8.383085250854492,
+   -0.9039976000785828,
+   -0.005822602193802595,
+   -3.2968709468841553,
+   -0.11372647434473038,
+   -3.750115156173706,
+   -6.341870307922363,
+   -11.225410461425781,
+   -0.8311297297477722,
+   -1.9895459413528442,
+   -1.2136539220809937,
+   -0.4511846899986267,
+   -1.275371789932251,
+   -6.52569055557251,
+   -0.3268530070781708,
+   -2.488239288330078,
+   -1.1252245903015137,
+   -0.004931548144668341,
+   -1.1413307189941406,
+   -2.4036614894866943,
+   -0.593055784702301,
+   -5.775687217712402,
+   -0.7173333764076233,
+   -6.7589006423950195,
+   -4.472473621368408,
+   -0.28561243414878845,
+   -0.9266374111175537,
+   -1.2420787811279297,
+   -4.94831657409668,
+   -0.4015401303768158,
+   -2.405423879623413,
+   -6.706996440887451,
+   -2.3797435760498047,
+   -6.879988193511963,
+   -0.599727988243103,
+   -4.6161346435546875,
+   -0.016334740445017815,
+   -1.4226453304290771,
+   -4.064138412475586,
+   -8.992555618286133,
+   -0.7892558574676514,
+   -2.565383195877075,
+   -1.6011606454849243,
+   -1.1192784309387207,
+   -1.085118293762207,
+   -1.452021598815918,
+   -0.1256672590970993,
+   -4.310093879699707,
+   -0.039925139397382736,
+   -0.09540079534053802,
+   -4.4552788734436035,
+   -2.6978704929351807,
+   -0.3264457583427429,
+   -0.9057141542434692,
+   -0.2424505054950714,
+   -0.2473771721124649,
+   -0.04457908123731613,
+   -2.5994861125946045,
+   -0.5882505178451538,
+   -2.4292445182800293,
+   -0.1860235333442688,
+   -2.6841845512390137,
+   -5.8617939949035645,
+   -1.7926914691925049,
+   -0.6663980484008789,
+   -0.029983440414071083,
+   -1.0682772397994995,
+   -0.0018566290382295847,
+   -1.9571454524993896,
+   -0.08927226811647415,
+   -4.61471700668335,
+   -0.002604546956717968,
+   -0.2620302140712738,
+   -0.006101197097450495,
+   -7.435886859893799,
+   -0.0376485139131546,
+   -10.174129486083984,
+   -0.9147175550460815,
+   -4.526404857635498,
+   -3.670576572418213,
+   -4.566626071929932,
+   -1.0199782848358154,
+   -0.0006491222884505987,
+   -0.14426420629024506,
+   -0.03322957828640938,
+   -0.0019640696700662374,
+   -0.00022468426323030144,
+   -0.0013444918440654874,
+   -0.0011957883834838867,
+   -0.007926556281745434,
+   -0.011617152951657772,
+   -0.0018109364900738,
+   -0.00017581824795342982,
+   -0.0018969652010127902,
+   -6.282132380874828e-05,
+   -0.0010078833438456059,
+   -0.25652098655700684,
+   -0.35659894347190857,
+   -9.333651541965082e-05,
+   -0.7947311401367188,
+   -1.3594639301300049,
+   -7.962863310240209e-05,
+   -1.861167550086975,
+   -0.5386030673980713,
+   -0.00022075122979003936,
+   -0.001347229932434857,
+   -3.290122185717337e-05,
+   -3.7342543601989746,
+   -0.5175371170043945,
+   -4.488879680633545,
+   -0.007863753475248814,
+   -0.08534510433673859,
+   -0.0009170140838250518,
+   -2.13382354559144e-05,
+   -4.507952690124512,
+   -0.5332688689231873,
+   -0.004296358674764633,
+   -2.062299427052494e-05,
+   -5.2475104331970215,
+   -0.020387964323163033,
+   -0.1661914438009262,
+   -0.0003081085451412946,
+   -15.800027847290039,
+   -8.108964920043945,
+   -0.7285020351409912,
+   -7.803549289703369,
+   -5.010417938232422,
+   -0.263860821723938,
+   -4.3748852476710454e-05,
+   -0.013306032866239548,
+   -0.029512016102671623,
+   -0.0036468682810664177,
+   -0.00023231192608363926,
+   -0.0002379134384682402,
+   -0.0004920940846204758,
+   -0.000873065204359591,
+   -0.0029308719094842672,
+   -0.0006667536217719316,
+   -0.00013672371278516948,
+   -0.0011686407960951328,
+   -4.625213477993384e-05,
+   -0.0007901645149104297,
+   -0.027857612818479538,
+   -0.06313244253396988,
+   -0.00013064485392533243,
+   -0.2378876954317093,
+   -0.6059458255767822,
+   -5.757642793469131e-05,
+   -1.5949885845184326,
+   -1.6001688241958618,
+   -0.00032574593205936253,
+   -0.0016402851324528456,
+   -2.276871418871451e-05,
+   -3.0335943698883057,
+   -0.286937952041626,
+   -6.517683982849121,
+   -3.1465959548950195,
+   -0.7292280793190002,
+   -0.06161583960056305,
+   -0.0014851979212835431,
+   -2.777537883957848e-05,
+   -3.946831226348877,
+   -0.09084996581077576,
+   -0.003532005939632654,
+   -4.029192859889008e-05,
+   -4.555190086364746,
+   -0.011255813762545586,
+   -0.10179147869348526,
+   -0.0004140473320148885,
+   -4.4321393966674805,
+   -2.2296247482299805,
+   -3.2771155834198,
+   -8.323366165161133,
+   -0.02779245562851429,
+   -2.403028964996338,
+   -0.07431145757436752,
+   -0.5372196435928345,
+   -0.05987980589270592,
+   -0.20438668131828308,
+   -0.00013136000779923052,
+   -0.0572563000023365,
+   -0.11035308241844177,
+   -0.012903997674584389,
+   -0.0002406545972917229,
+   -0.0001517419150331989,
+   -0.00036066226311959326,
+   -0.0005477358354255557,
+   -0.00229322025552392,
+   -0.000697846058756113,
+   -0.0001161031104857102,
+   -0.001127441762946546,
+   -3.814624506048858e-05,
+   -0.0005136600811965764,
+   -0.022026309743523598,
+   -0.02361132949590683,
+   -0.0002090712368953973,
+   -0.04913746938109398,
+   -2.7477238178253174,
+   -9.202533692587167e-05,
+   -0.9271803498268127,
+   -1.3856279850006104,
+   -0.0001754606782924384,
+   -0.0012224590172991157,
+   -1.7165990357170813e-05,
+   -1.0239524841308594,
+   -0.020712625235319138,
+   -0.0451514832675457,
+   -1.5345499515533447,
+   -0.0004010588163509965,
+   -0.0004401430196594447,
+   -2.13382354559144e-05,
+   -2.5878491401672363,
+   -0.020529404282569885,
+   -0.00043501926120370626,
+   -2.682172998902388e-05,
+   -0.3827762007713318,
+   -0.00019298121333122253,
+   -0.007158228196203709,
+   -8.618460560683161e-05,
+   -6.015654563903809,
+   -4.037173271179199,
+   -3.4229695796966553,
+   -1.0183475017547607,
+   -1.4963387250900269,
+   -0.33330175280570984,
+   -1.480197787284851,
+   -2.0857536792755127,
+   -2.225975513458252,
+   -5.293066024780273,
+   -0.43916723132133484,
+   -0.00010048838157672435,
+   -0.015328695066273212,
+   -0.13567933440208435,
+   -0.012453177943825722,
+   -0.00017855956684798002,
+   -0.00012778419477399439,
+   -0.0002885640424210578,
+   -0.0004291805380489677,
+   -0.0008485292200930417,
+   -0.0006668727728538215,
+   -8.177422569133341e-05,
+   -0.001060757553204894,
+   -6.151010165922344e-05,
+   -0.0005185451591387391,
+   -0.028113562613725662,
+   -0.03407377377152443,
+   -0.0003861635341309011,
+   -1.1215460300445557,
+   -0.5561063885688782,
+   -0.0001726001501083374,
+   -2.5190887451171875,
+   -0.6141397953033447,
+   -0.0001227780303452164,
+   -0.0012188870459794998,
+   -1.6212332411669195e-05,
+   -6.833529472351074,
+   -6.0156097412109375,
+   -0.03274226188659668,
+   -0.014286145567893982,
+   -0.0009454786195419729,
+   -3.814624506048858e-05,
+   -4.910149097442627,
+   -0.009493326768279076,
+   -0.001437702914699912,
+   -5.876845170860179e-05,
+   -0.3798050582408905,
+   -0.003948037512600422,
+   -0.07855644077062607,
+   -0.00022420754248742014,
+   -6.84205436706543,
+   -0.0015236446633934975,
+   -2.645585298538208,
+   -0.9816564917564392,
+   -1.3786735534667969,
+   -0.7280330061912537,
+   -1.4040117263793945,
+   -9.035655966727063e-05,
+   -0.033023953437805176,
+   -0.3305729031562805,
+   -0.027912795543670654,
+   -0.0002892790944315493,
+   -0.00012182447244413197,
+   -0.00026901919045485556,
+   -0.0004681444843299687,
+   -0.0007345362100750208,
+   -0.0008179179858416319,
+   -0.00010549465514486656,
+   -0.0013330630026757717,
+   -5.7338023907504976e-05,
+   -0.0005571481888182461,
+   -0.013437421061098576,
+   -0.033829718828201294,
+   -0.0004694551753345877,
+   -0.28239941596984863,
+   -1.3776881694793701,
+   -0.00014256415306590497,
+   -1.4336698055267334,
+   -0.9458242654800415,
+   -0.0002739054325502366,
+   -0.0015444743912667036,
+   -2.169585604860913e-05,
+   -5.267784118652344,
+   -2.617713689804077,
+   -0.1205064058303833,
+   -0.000608854868914932,
+   -2.47952248173533e-05,
+   -6.116018772125244,
+   -0.06051409989595413,
+   -0.0021291938610374928,
+   -2.777537883957848e-05,
+   -0.5082104206085205,
+   -0.0008528171456418931,
+   -0.013313560746610165,
+   -9.381330892210826e-05,
+   -6.970278739929199,
+   -0.3628937304019928,
+   -1.40151047706604,
+   -0.8361061811447144,
+   -0.4778183400630951,
+   -2.494100570678711,
+   -0.3126090466976166,
+   -7.66262674331665,
+   -0.3505229353904724,
+   -2.1190404891967773,
+   -0.08990062028169632,
+   -8.201262971851975e-05,
+   -0.01644204556941986,
+   -0.1838725060224533,
+   -0.015538694337010384,
+   -0.00019107422849629074,
+   -7.915183232398704e-05,
+   -0.0001382732152706012,
+   -0.0002119316632160917,
+   -0.0004773192631546408,
+   -0.0004781533498317003,
+   -4.994744449504651e-05,
+   -0.0011807858245447278,
+   -3.0636318115284666e-05,
+   -0.0003046525234822184,
+   -0.0024103655014187098,
+   -0.009829924441874027,
+   -0.00022301571152638644,
+   -0.12844854593276978,
+   -1.1151821613311768,
+   -9.512448741588742e-05,
+   -1.1148451566696167,
+   -0.45424169301986694,
+   -7.128461584215984e-05,
+   -0.001427346607670188,
+   -1.2040065485052764e-05,
+   -3.9783990383148193,
+   -0.025781046599149704,
+   -0.00015496007108595222,
+   -0.003944831434637308,
+   -0.000663894519675523,
+   -3.015949550899677e-05,
+   -0.15718017518520355,
+   -0.0009197533945553005,
+   -0.0007913556764833629,
+   -1.8000440832111053e-05,
+   -0.18712174892425537,
+   -0.00016604475968051702,
+   -0.0022110319696366787,
+   -2.169585604860913e-05,
+   -0.014111850410699844,
+   -1.1920922133867862e-06,
+   -0.00984656810760498,
+   -0.5971966981887817,
+   -2.393812894821167,
+   -0.010224700905382633,
+   -0.009953508153557777,
+   -7.64102369430475e-05,
+   -0.011833352968096733,
+   -0.26886406540870667,
+   -0.023419089615345,
+   -0.00019762947340495884,
+   -6.031808152329177e-05,
+   -0.00010191874753218144,
+   -0.00015889335190877318,
+   -0.0003564914222806692,
+   -0.0004101150552742183,
+   -6.675497570540756e-05,
+   -0.0009184433147311211,
+   -3.158996332786046e-05,
+   -0.00031442465842701495,
+   -0.0027259355410933495,
+   -0.008694176562130451,
+   -0.00032658010604791343,
+   -0.289438933134079,
+   -2.1416351795196533,
+   -0.00017987063620239496,
+   -1.8434972763061523,
+   -1.624247670173645,
+   -0.00022980909852776676,
+   -0.0006792622152715921,
+   -1.0967194612021558e-05,
+   -1.281017541885376,
+   -0.01736496575176716,
+   -1.955749750137329,
+   -1.528749942779541,
+   -2.776960611343384,
+   -0.5374854803085327,
+   -0.00029345019720494747,
+   -2.539125671319198e-05,
+   -3.0065665245056152,
+   -0.0013523490633815527,
+   -0.0007908792467787862,
+   -1.4543427823809907e-05,
+   -0.23400214314460754,
+   -0.0002324311062693596,
+   -0.010042970068752766,
+   -4.088794958079234e-05,
+   -2.1034951210021973,
+   -6.140199184417725,
+   -4.464273929595947,
+   -1.9943883419036865,
+   -0.2878473103046417,
+   -0.05924016237258911,
+   -0.7345774173736572,
+   -0.011171765625476837,
+   -0.0002982171718031168,
+   -0.14330486953258514,
+   -0.0007319155265577137,
+   -0.0003812778159044683,
+   -0.002302616136148572,
+   -0.36087724566459656,
+   -0.08833581954240799,
+   -2.631582260131836,
+   -3.1771137714385986,
+   -0.11841163039207458,
+   -4.482168878894299e-05,
+   -0.014765388332307339,
+   -0.17005765438079834,
+   -0.010167589411139488,
+   -0.00010823617776622996,
+   -3.6477376852417365e-05,
+   -5.936446541454643e-05,
+   -0.00023493390472140163,
+   -0.0003688847064040601,
+   -0.000321336614433676,
+   -4.756337511935271e-05,
+   -0.000902007392141968,
+   -2.9205850296420977e-05,
+   -0.00024423000286333263,
+   -0.000964533886872232,
+   -0.00411722669377923,
+   -0.0002711643755901605,
+   -0.3081328868865967,
+   -0.4985820949077606,
+   -0.00018726025882642716,
+   -1.1391643285751343,
+   -0.27228832244873047,
+   -4.2914423829643056e-05,
+   -0.0012028133496642113,
+   -1.9311717551317997e-05,
+   -1.1735807657241821,
+   -0.07005516439676285,
+   -0.0024717275518924,
+   -8.618460560683161e-05,
+   -0.00016866691294126213,
+   -0.00044764988706447184,
+   -1.6093124941107817e-05,
+   -8.586283683776855,
+   -0.0002851079625543207,
+   -7.490447998046875,
+   -0.09369903802871704,
+   -0.004145600367337465,
+   -0.0008606782066635787,
+   -4.827859811484814e-05,
+   -0.7127438187599182,
+   -0.0003618539194576442,
+   -0.015226203016936779,
+   -6.401333666872233e-05,
+   -3.530060291290283,
+   -0.040570154786109924,
+   -0.7448150515556335,
+   -1.4005241394042969,
+   -0.5872946977615356,
+   -6.073245048522949,
+   -0.9850690364837646,
+   -1.4459205865859985,
+   -0.4346452057361603,
+   -4.452149868011475,
+   -0.3939701318740845,
+   -0.02252959832549095,
+   -9.440929716220126e-05,
+   -0.012161390855908394,
+   -0.25266116857528687,
+   -0.021285664290189743,
+   -0.00015770144818816334,
+   -9.870042413240299e-05,
+   -9.989239333663136e-05,
+   -0.005311425309628248,
+   -0.00032634177478030324,
+   -0.0007045170641504228,
+   -9.417090768693015e-05,
+   -0.001260558608919382,
+   -4.482168878894299e-05,
+   -0.0003833036171272397,
+   -0.0023484050761908293,
+   -0.011129915714263916,
+   -0.00040260792593471706,
+   -0.1819346845149994,
+   -1.1781600713729858,
+   -0.00033241944038309157,
+   -1.3525464534759521,
+   -1.2726483345031738,
+   -0.00018034738604910672,
+   -0.0009054613183252513,
+   -1.2040065485052764e-05,
+   -1.7329559326171875,
+   -0.009877022355794907,
+   -0.030561018735170364,
+   -0.9567705988883972,
+   -0.0002079985715681687,
+   -0.0003582789213396609,
+   -2.5510462364763953e-05,
+   -1.3376575708389282,
+   -0.043758541345596313,
+   -0.0005255748401395977,
+   -0.003921795636415482,
+   -3.9934315282152966e-05,
+   -0.013946342281997204,
+   -0.001447345013730228,
+   -0.09289155900478363,
+   -0.00028975578607060015,
+   -5.025714874267578,
+   -5.600637435913086,
+   -0.8190056681632996,
+   -2.0997657775878906,
+   -1.5471020936965942,
+   -0.2830793261528015,
+   -0.099715456366539,
+   -0.00015341058315243572,
+   -0.09538150578737259,
+   -0.9440865516662598,
+   -0.13964560627937317,
+   -0.0003178806509822607,
+   -0.00015531764074694365,
+   -0.00016640232934150845,
+   -0.00023398046323563904,
+   -0.00039081089198589325,
+   -0.0015487592900171876,
+   -0.00010716341057559475,
+   -0.0017987991450354457,
+   -3.838465272565372e-05,
+   -0.0006412595394067466,
+   -0.00545145571231842,
+   -0.02335585467517376,
+   -0.0004077318590134382,
+   -0.8720157146453857,
+   -0.10373511165380478,
+   -0.00014077626110520214,
+   -0.5180479884147644,
+   -0.17388182878494263,
+   -0.00015746307326480746,
+   -0.0043711354956030846,
+   -2.9801878554280847e-05,
+   -2.0693466663360596,
+   -0.007648942526429892,
+   -2.8729025871143676e-05,
+   -0.0003301552205812186,
+   -0.000542612629942596,
+   -3.2543604902457446e-05,
+   -0.27388375997543335,
+   -0.00043752157944254577,
+   -0.0005888396990485489,
+   -1.7762025890988298e-05,
+   -0.05423494055867195,
+   -7.915183232398704e-05,
+   -0.002435457892715931,
+   -1.1205610462639015e-05,
+   -0.01761529967188835,
+   -7.152555099310121e-07,
+   -0.005352570675313473,
+   -0.1280955821275711,
+   -2.3187625408172607,
+   -0.009216856211423874,
+   -0.008558499626815319,
+   -0.0001072826053132303,
+   -0.04680917039513588,
+   -0.5660229325294495,
+   -0.04951385408639908,
+   -0.0002015625941567123,
+   -5.8410845667822286e-05,
+   -9.440929716220126e-05,
+   -0.00014828535495325923,
+   -0.00037245964631438255,
+   -0.0008362610242329538,
+   -5.4596363042946905e-05,
+   -0.0010970771545544267,
+   -4.017272294731811e-05,
+   -0.0004563482361845672,
+   -0.0021864098962396383,
+   -0.012597862631082535,
+   -0.00036435641231946647,
+   -0.07823580503463745,
+   -1.1245288848876953,
+   -0.0001472126314183697,
+   -2.1236472129821777,
+   -0.25363627076148987,
+   -0.00011646069469861686,
+   -0.0010031197452917695,
+   -1.4662635294371285e-05,
+   -11.853788375854492,
+   -1.5205868482589722,
+   -0.0017375147435814142,
+   -0.00013374387344811112,
+   -7.155948638916016,
+   -3.82474422454834,
+   -1.2793458700180054,
+   -0.03748536482453346,
+   -0.005961020477116108,
+   -5.829164365422912e-05,
+   -3.1456170082092285,
+   -0.03318829461932182,
+   -0.008591356687247753,
+   -0.027652040123939514,
+   -0.00012885693286079913,
+   -1.5415722131729126,
+   -0.979039192199707,
+   -2.842726469039917,
+   -9.05957317352295,
+   -2.8234424591064453,
+   -0.8373243808746338,
+   -0.4019332230091095,
+   -0.0004048719711136073,
+   -0.03923225402832031,
+   -0.4254666864871979,
+   -0.027653662487864494,
+   -0.0003177614707965404,
+   -0.0001967951684491709,
+   -0.00020883286197204143,
+   -0.00025674383505247533,
+   -0.0008311392739415169,
+   -0.0012284121476113796,
+   -0.00010787858627736568,
+   -0.0024356956128031015,
+   -6.258291978156194e-05,
+   -0.00048565989709459245,
+   -0.0021678535267710686,
+   -0.012607751414179802,
+   -0.00023588736075907946,
+   -0.11036524921655655,
+   -0.5750182867050171,
+   -0.00017176583060063422,
+   -1.9862632751464844,
+   -1.2351702451705933,
+   -0.00037520044133998454,
+   -0.0013566347770392895,
+   -2.5152843591058627e-05,
+   -2.1086387634277344,
+   -7.917232990264893,
+   -0.05708145350217819,
+   -0.06208256632089615,
+   -0.000644237850792706,
+   -8.308542601298541e-05,
+   -5.1276655197143555,
+   -0.16815905272960663,
+   -0.0012461524456739426,
+   -5.94836674281396e-05,
+   -3.559391736984253,
+   -5.411561965942383,
+   -0.022293083369731903,
+   -0.0005644158809445798,
+   -0.017552750185132027,
+   -0.00038842763751745224,
+   -1.8479862213134766,
+   -0.004095145035535097,
+   -11.830594062805176,
+   -0.4279360771179199,
+   -3.7062158584594727,
+   -2.9457836151123047,
+   -1.9491567611694336,
+   -0.06489256024360657,
+   -0.00013660451804753393,
+   -0.012157151475548744,
+   -0.22074609994888306,
+   -0.021073833107948303,
+   -0.00021300431399140507,
+   -0.00017593742813915014,
+   -0.00023672162205912173,
+   -0.0003091811086051166,
+   -0.0014552014181390405,
+   -0.0013881819322705269,
+   -0.00015245705435518175,
+   -0.002331279218196869,
+   -5.4238757002167404e-05,
+   -0.000668659748043865,
+   -0.002430463209748268,
+   -0.016187194734811783,
+   -0.0002441108226776123,
+   -1.4263010025024414,
+   -0.30179885029792786,
+   -0.0001770101225702092,
+   -0.5045080184936523,
+   -0.07310019433498383,
+   -8.022463589441031e-05,
+   -0.002168329432606697,
+   -2.3841574147809297e-05,
+   -1.7808306217193604,
+   -0.02828705683350563,
+   -6.115249561844394e-05,
+   -0.0008904544520191848,
+   -0.0005335576133802533,
+   -3.957670196541585e-05,
+   -0.03801318258047104,
+   -0.0003077510336879641,
+   -0.0005035324720665812,
+   -2.169585604860913e-05,
+   -0.02271897904574871,
+   -3.1709168979432434e-05,
+   -0.0018041539005935192,
+   -1.8358061424805783e-05,
+   -0.005899516865611076,
+   -1.1920922133867862e-06,
+   -0.002030455507338047,
+   -0.27544423937797546,
+   -1.1146715879440308,
+   -0.012286689132452011,
+   -0.004974251613020897,
+   -6.389413465512916e-05,
+   -0.010529793798923492,
+   -0.2302529364824295,
+   -0.015527778305113316,
+   -0.00019524575327523053,
+   -6.389413465512916e-05,
+   -0.00013815402053296566,
+   -0.00018165845540352166,
+   -0.0005564333405345678,
+   -0.000959531927946955,
+   -6.151010165922344e-05,
+   -0.001416394836269319,
+   -5.531158240046352e-05,
+   -0.00035363141796551645,
+   -0.0010683787986636162,
+   -0.012577733024954796,
+   -0.00023934361524879932,
+   -0.06311207264661789,
+   -0.972044050693512,
+   -0.00019929806876461953,
+   -1.6224243640899658,
+   -0.8333836197853088,
+   -0.00016592556494288146,
+   -0.0008984343148767948,
+   -1.6927575416048057e-05,
+   -0.8844207525253296,
+   -0.023736946284770966,
+   -4.01811408996582,
+   -1.6215615272521973,
+   -0.33087965846061707,
+   -0.0035197706893086433,
+   -0.00024148885859176517,
+   -3.0874729418428615e-05,
+   -3.097301721572876,
+   -0.030017103999853134,
+   -0.0006585336523130536,
+   -1.9430925021879375e-05,
+   -0.49424058198928833,
+   -0.0001401803019689396,
+   -0.00554167665541172,
+   -1.9073304429184645e-05,
+   -0.5312279462814331,
+   -5.748266220092773,
+   -11.324613571166992,
+   -1.1340491771697998,
+   -0.16082678735256195,
+   -0.8938052654266357,
+   -3.726792335510254,
+   -0.8781039714813232,
+   -0.00017355366435367614,
+   -0.009945128113031387,
+   -0.18626560270786285,
+   -0.013042616657912731,
+   -0.00010859376925509423,
+   -7.199982064776123e-05,
+   -0.00010871296399272978,
+   -0.00017796363681554794,
+   -0.00034767304896377027,
+   -0.0006170752458274364,
+   -3.0636318115284666e-05,
+   -0.001077071763575077,
+   -4.076874756719917e-05,
+   -0.00024029705673456192,
+   -0.000982159748673439,
+   -0.02636047638952732,
+   -0.00021920185827184469,
+   -0.632880687713623,
+   -0.06617539376020432,
+   -0.00016318420239258558,
+   -0.4156720042228699,
+   -0.034620899707078934,
+   -5.6622808187967166e-05,
+   -0.0011695933062583208,
+   -1.597391747054644e-05,
+   -10.639490127563477,
+   -0.24528348445892334,
+   -0.06833283603191376,
+   -0.0033608165103942156,
+   -0.02616957761347294,
+   -0.00036054308293387294,
+   -3.099393507000059e-05,
+   -4.044595241546631,
+   -2.188387393951416,
+   -0.32720163464546204,
+   -0.00974209699779749,
+   -0.0011126763420179486,
+   -3.302042750874534e-05,
+   -0.19868847727775574,
+   -7.56950321374461e-05,
+   -0.005233398173004389,
+   -3.158996332786046e-05,
+   -1.839617371559143,
+   -0.17654305696487427,
+   -0.7875567078590393,
+   -2.1537787914276123,
+   -0.3631034195423126,
+   -0.9216613173484802,
+   -2.0036990642547607,
+   -0.09243497252464294,
+   -0.00010740180005086586,
+   -0.018314307555556297,
+   -0.208140030503273,
+   -0.01576320081949234,
+   -0.00013136000779923052,
+   -7.390703103737906e-05,
+   -0.00011264643399044871,
+   -0.00017045476124621928,
+   -0.0005171154043637216,
+   -0.0005422552349045873,
+   -3.349725011503324e-05,
+   -0.0013309201458469033,
+   -4.255681051290594e-05,
+   -0.00023767507809679955,
+   -0.001095648156479001,
+   -0.14277544617652893,
+   -0.00021371940965764225,
+   -0.00032217081752605736,
+   -0.35286909341812134,
+   -0.0002668739762157202,
+   -1.7962173223495483,
+   -0.07211553305387497,
+   -7.974783511599526e-05,
+   -0.000621959799900651,
+   -1.2874520507466514e-05,
+   -1.9048426151275635,
+   -0.022713735699653625,
+   -3.9457496313843876e-05,
+   -0.0005820487276650965,
+   -0.0002401778765488416,
+   -3.325883881188929e-05,
+   -0.02081700973212719,
+   -0.00022492263815365732,
+   -0.0003299168893136084,
+   -2.038458114839159e-05,
+   -0.008293120190501213,
+   -1.7404405298293568e-05,
+   -0.0012493670219555497,
+   -1.4424220353248529e-05,
+   -0.0041636452078819275,
+   -8.344646857949556e-07,
+   -0.0020267677027732134,
+   -0.13429519534111023,
+   -1.9221405982971191,
+   -0.0093602379783988,
+   -0.005981876514852047,
+   -5.817244164063595e-05,
+   -0.019257837906479836,
+   -0.27827900648117065,
+   -0.01921457052230835,
+   -0.0001652104256208986,
+   -8.546940807718784e-05,
+   -0.0001510267611593008,
+   -0.00016366096679121256,
+   -0.0002616301644593477,
+   -0.0005458295345306396,
+   -3.480850500636734e-05,
+   -0.0010807631770148873,
+   -3.7431014789035544e-05,
+   -0.0003626880934461951,
+   -0.0010880271438509226,
+   -0.6327179670333862,
+   -0.0002374367177253589,
+   -0.020488178357481956,
+   -0.10384052991867065,
+   -0.0001971527235582471,
+   -0.16368740797042847,
+   -0.026392173022031784,
+   -0.00012170527770649642,
+   -0.0025978884659707546,
+   -1.9430925021879375e-05,
+   -7.9701642990112305,
+   -1.6003714799880981,
+   -0.2391909956932068,
+   -0.000502817565575242,
+   -4.9232225137529895e-05,
+   -4.135532855987549,
+   -0.06158669665455818,
+   -0.00044371772673912346,
+   -3.755022044060752e-05,
+   -0.18109248578548431,
+   -0.00010883215873036534,
+   -0.006367869209498167,
+   -7.748303323751315e-05,
+   -5.440160751342773,
+   -5.081888198852539,
+   -0.19470839202404022,
+   -2.9904420375823975,
+   -2.4235076904296875,
+   -0.032352350652217865,
+   -0.00044907975825481117,
+   -0.04121795669198036,
+   -0.43260514736175537,
+   -0.04605478420853615,
+   -0.00023982033599168062,
+   -0.0003178806509822607,
+   -0.00017188502533826977,
+   -0.00022468426323030144,
+   -0.0003400462737772614,
+   -0.0010152667528018355,
+   -0.00011729506513802335,
+   -0.001335324952378869,
+   -4.8874615458771586e-05,
+   -0.001257463125512004,
+   -0.004097400698810816,
+   -0.0008996253600344062,
+   -0.0002967870968859643,
+   -0.15579743683338165,
+   -1.3731565475463867,
+   -0.00023183519078884274,
+   -2.0089190006256104,
+   -3.441042423248291,
+   -0.0006145734223537147,
+   -0.0012832987122237682,
+   -1.9550132492440753e-05,
+   -1.731110692024231,
+   -0.027068600058555603,
+   -2.8266828060150146,
+   -0.35935577750205994,
+   -0.023644626140594482,
+   -0.0005504761938937008,
+   -0.00017951308109331876,
+   -2.396077979938127e-05,
+   -2.3206820487976074,
+   -0.003744971938431263,
+   -0.000205018965061754,
+   -2.288792165927589e-05,
+   -0.08958229422569275,
+   -6.592056161025539e-05,
+   -0.0021721357479691505,
+   -3.0397906812140718e-05,
+   -4.5939412117004395,
+   -8.534799575805664,
+   -3.483549118041992,
+   -1.681600570678711,
+   -0.7201917767524719,
+   -0.530266284942627,
+   -0.7154921293258667,
+   -2.835704803466797,
+   -0.0004451475979294628,
+   -0.02453603409230709,
+   -0.31538400053977966,
+   -0.0156102878972888,
+   -0.00013124081306159496,
+   -8.892617915989831e-05,
+   -9.738924563862383e-05,
+   -0.0011036264477297664,
+   -0.00030357998912222683,
+   -0.0010406322544440627,
+   -6.0437283536884934e-05,
+   -0.0014225849881768227,
+   -3.671578815556131e-05,
+   -0.00044705410255119205,
+   -0.005232923664152622,
+   -0.0001565095444675535,
+   -0.0003033416287507862,
+   -0.18575794994831085,
+   -0.14061033725738525,
+   -0.0002706876548472792,
+   -0.5223819017410278,
+   -0.035896092653274536,
+   -5.4834770708112046e-05,
+   -0.0012011463986709714,
+   -1.6569954823353328e-05,
+   -1.681032657623291,
+   -0.011652856133878231,
+   -1.6569954823353328e-05,
+   -0.00047469791024923325,
+   -0.000256982195423916,
+   -3.361645576660521e-05,
+   -0.01372707262635231,
+   -0.00014852374442853034,
+   -0.00046695294440723956,
+   -2.288792165927589e-05,
+   -0.0034659572411328554,
+   -1.3708974620385561e-05,
+   -0.0015382850542664528,
+   -8.702239938429557e-06,
+   -0.003346678102388978,
+   -7.152555099310121e-07,
+   -0.000867467257194221,
+   -0.02539108693599701,
+   -1.0509589910507202,
+   -0.002976156771183014,
+   -0.005069141276180744,
+   -5.590759246842936e-05,
+   -0.015196850523352623,
+   -0.3093729317188263,
+   -0.02090352028608322,
+   -0.00013958434283267707,
+   -6.460934673668817e-05,
+   -8.296622399939224e-05,
+   -0.0004457433824427426,
+   -0.0005041282274760306,
+   -0.0011976935202255845,
+   -4.2914423829643056e-05,
+   -0.0011085085570812225,
+   -4.160317621426657e-05,
+   -0.0005018643569201231,
+   -0.004558410029858351,
+   -9.476689592702314e-05,
+   -0.00037269797758199275,
+   -0.11347992718219757,
+   -0.450020968914032,
+   -0.0003301552205812186,
+   -2.8804092407226562,
+   -0.15156973898410797,
+   -6.246371776796877e-05,
+   -0.000683074293192476,
+   -1.3947389561508317e-05,
+   -2.0683939456939697,
+   -0.02846144698560238,
+   -0.04469490796327591,
+   -1.889275074005127,
+   -0.0001255195093108341,
+   -0.00011228884250158444,
+   -2.4914430468925275e-05,
+   -7.980701446533203,
+   -0.39261865615844727,
+   -1.6454169750213623,
+   -0.0018256916664540768,
+   -0.0003761537664104253,
+   -2.5987286790041253e-05,
+   -0.27152737975120544,
+   -3.8742269680369645e-05,
+   -0.002314033918082714,
+   -5.364274329622276e-05,
+   -5.172288417816162,
+   -0.007181781344115734,
+   -0.8884671330451965,
+   -0.20681926608085632,
+   -1.529428243637085,
+   -2.335056781768799,
+   -0.02583100087940693,
+   -1.8960939645767212,
+   -0.257112592458725,
+   -0.1720065474510193,
+   -8.284702198579907e-05,
+   -0.011070851236581802,
+   -0.16333311796188354,
+   -0.01678428426384926,
+   -0.00010024998482549563,
+   -4.911301948595792e-05,
+   -6.41325386823155e-05,
+   -0.0003518439189065248,
+   -0.0003983181086368859,
+   -0.0007211944903247058,
+   -2.253030106658116e-05,
+   -0.0009076051646843553,
+   -2.884823152271565e-05,
+   -0.00033682872890494764,
+   -0.01127432007342577,
+   -5.113947918289341e-05,
+   -0.0003095386200584471,
+   -0.162703275680542,
+   -0.12824533879756927,
+   -0.0002037079248111695,
+   -0.5378345251083374,
+   -0.013359789736568928,
+   -4.625213477993384e-05,
+   -0.0007819455349817872,
+   -1.2993727978027891e-05,
+   -1.4531102180480957,
+   -0.9376159310340881,
+   -0.02013481967151165,
+   -3.182837463100441e-05,
+   -0.00028391621890477836,
+   -0.0002040654799202457,
+   -1.6212332411669195e-05,
+   -6.006290435791016,
+   -0.23482508957386017,
+   -0.0003094194398727268,
+   -3.2066785934148356e-05,
+   -0.2894707918167114,
+   -0.00010334911348763853,
+   -0.003178308717906475,
+   -4.8397800128441304e-05,
+   -3.3541419506073,
+   -5.274465084075928,
+   -2.3055055141448975,
+   -1.0987294912338257,
+   -0.019666209816932678,
+   -0.00022790218645241112,
+   -0.016233760863542557,
+   -0.2816391885280609,
+   -0.028503969311714172,
+   -0.0001358893496217206,
+   -0.00010394509445177391,
+   -8.856858039507642e-05,
+   -0.00036137725692242384,
+   -0.00029452278977259994,
+   -0.0008922410197556019,
+   -2.539125671319198e-05,
+   -0.0011102947173640132,
+   -3.40932747349143e-05,
+   -0.0004843492351938039,
+   -0.006350101437419653,
+   -5.9602869441732764e-05,
+   -0.0002796259068418294,
+   -0.3986394703388214,
+   -0.10029242187738419,
+   -0.00024196557933464646,
+   -1.9691603183746338,
+   -0.7402586936950684,
+   -7.056941103655845e-05,
+   -0.0003618539194576442,
+   -1.0371154530730564e-05,
+   -1.4170231819152832,
+   -0.008172051049768925,
+   -1.3708974620385561e-05,
+   -0.00041607304592616856,
+   -0.00014888131408952177,
+   -2.6464111215318553e-05,
+   -0.018121162429451942,
+   -0.00010764019680209458,
+   -0.0002335037279408425,
+   -2.3007127310847864e-05,
+   -0.002049014437943697,
+   -1.0609570381348021e-05,
+   -0.0011868583969771862,
+   -7.867782187531702e-06,
+   -0.0018794744974002242,
+   -5.960462772236497e-07,
+   -0.0007434703293256462,
+   -0.02911354973912239,
+   -1.7920753955841064,
+   -0.0026135831139981747,
+   -0.00308870617300272,
+   -3.659658250398934e-05,
+   -0.010810147039592266,
+   -0.20098412036895752,
+   -0.01644638366997242,
+   -0.00013207517622504383,
+   -6.854299135738984e-05,
+   -7.152301259338856e-05,
+   -0.00024720950750634074,
+   -0.00033468366018496454,
+   -0.0010001424234360456,
+   -5.054346183896996e-05,
+   -0.0009557208395563066,
+   -3.981510963058099e-05,
+   -0.0004465774691198021,
+   -0.011578621342778206,
+   -7.211902266135439e-05,
+   -0.0002416080387774855,
+   -0.09539440274238586,
+   -0.057392168790102005,
+   -0.0002840353990904987,
+   -0.21088920533657074,
+   -0.0078902468085289,
+   -8.606540359323844e-05,
+   -0.0007384672062471509,
+   -1.3589766240329482e-05,
+   -0.8148440718650818,
+   -0.025661379098892212,
+   -2.113894462585449,
+   -0.01820814050734043,
+   -0.0010720703285187483,
+   -0.0002908283786382526,
+   -0.00011181206355104223,
+   -1.9550132492440753e-05,
+   -1.9963352680206299,
+   -0.011685965582728386,
+   -0.00010299152199877426,
+   -1.6093124941107817e-05,
+   -0.3427979350090027,
+   -0.00010358751023886725,
+   -0.002419165801256895,
+   -5.07818695041351e-05,
+   -9.356146812438965,
+   -2.63590145111084,
+   -0.0489899143576622,
+   -0.429649293422699,
+   -2.441277027130127,
+   -0.09116854518651962,
+   -1.7202471494674683,
+   -1.2776923179626465,
+   -1.2828468084335327,
+   -0.1033272072672844,
+   -0.013413426466286182,
+   -0.00016091958968900144,
+   -0.006314327474683523,
+   -0.1650361269712448,
+   -0.009155434556305408,
+   -8.630380034446716e-05,
+   -6.007967749610543e-05,
+   -6.210611172718927e-05,
+   -0.00027497802511788905,
+   -0.0005628670332953334,
+   -0.0008046964649111032,
+   -4.160317621426657e-05,
+   -0.0009633429581299424,
+   -2.9444261599564925e-05,
+   -0.0003147821989841759,
+   -0.003070523263886571,
+   -3.969590397900902e-05,
+   -0.00025340684805996716,
+   -0.16765674948692322,
+   -0.220333993434906,
+   -0.00025281094713136554,
+   -1.6686129570007324,
+   -0.08651255071163177,
+   -7.4741430580616e-05,
+   -0.00032062159152701497,
+   -9.536697689327411e-06,
+   -8.607754707336426,
+   -2.7989468574523926,
+   -0.006830438040196896,
+   -0.00042500998824834824,
+   -4.410646579344757e-05,
+   -2.2325727939605713,
+   -0.09642884135246277,
+   -0.0005049622268415987,
+   -1.4662635294371285e-05,
+   -3.892613172531128,
+   -0.0008376903715543449,
+   -0.004279621876776218,
+   -5.745722592109814e-05,
+   -2.696786642074585,
+   -0.44925373792648315,
+   -0.37875908613204956,
+   -0.27114248275756836,
+   -1.023728609085083,
+   -4.712882995605469,
+   -1.415423035621643,
+   -2.8054561614990234,
+   -0.4460236430168152,
+   -0.0005779979983344674,
+   -0.02468189038336277,
+   -0.30965328216552734,
+   -0.02052520029246807,
+   -0.00012730741582345217,
+   -9.619726915843785e-05,
+   -8.749579137656838e-05,
+   -0.000350175570929423,
+   -0.0003150205302517861,
+   -0.0007310817018151283,
+   -3.0636318115284666e-05,
+   -0.0011643542675301433,
+   -3.2305197237292305e-05,
+   -0.00026913834153674543,
+   -0.011463016271591187,
+   -5.411955135059543e-05,
+   -0.00023231192608363926,
+   -0.1063343733549118,
+   -0.037034809589385986,
+   -0.0001248043408850208,
+   -0.3663400411605835,
+   -0.01425135973840952,
+   -5.376194530981593e-05,
+   -0.000933926145080477,
+   -1.4305012882687151e-05,
+   -1.5244930982589722,
+   -0.008558854460716248,
+   -1.8358061424805783e-05,
+   -0.0002698534226510674,
+   -0.00022075122979003936,
+   -3.576214658096433e-05,
+   -0.01590365171432495,
+   -0.00012706902634818107,
+   -0.0002901133266277611,
+   -2.2649508537142538e-05,
+   -0.0032194233499467373,
+   -1.1920858014491387e-05,
+   -0.0013312773080542684,
+   -8.22540732769994e-06,
+   -0.001732040662318468,
+   -4.768370445162873e-07,
+   -0.0007115454645827413,
+   -0.11607333272695541,
+   -5.158000946044922,
+   -0.00630958890542388,
+   -0.006455875933170319,
+   -3.886147169396281e-05,
+   -0.007113605737686157,
+   -0.16176439821720123,
+   -0.01025608740746975,
+   -9.321732068201527e-05,
+   -5.435795901576057e-05,
+   -7.70062324590981e-05,
+   -0.0002002515539061278,
+   -0.0003270567976869643,
+   -0.0011002921964973211,
+   -3.93382906622719e-05,
+   -0.0009735850035212934,
+   -4.076874756719917e-05,
+   -0.00036042393185198307,
+   -0.011448992416262627,
+   -0.00010787858627736568,
+   -0.00022289653134066612,
+   -0.12719827890396118,
+   -0.16689445078372955,
+   -0.00029869386344216764,
+   -1.129071831703186,
+   -0.46998509764671326,
+   -0.0001429217227268964,
+   -0.0004334702098276466,
+   -1.823885577323381e-05,
+   -7.808990478515625,
+   -0.6958405375480652,
+   -0.0011538759572431445,
+   -0.00010084597306558862,
+   -2.1815061700181104e-05,
+   -3.412889242172241,
+   -0.0024302254896610975,
+   -0.1256120651960373,
+   -0.0001486429391661659,
+   -2.932505594799295e-05,
+   -0.016119161620736122,
+   -2.1219027985353023e-05,
+   -0.0014936492079868913,
+   -6.794906312279636e-06,
+   -4.649867057800293,
+   -0.42487168312072754,
+   -1.3419163227081299,
+   -0.3015914857387543,
+   -0.00015341058315243572,
+   -0.0032649326603859663,
+   -0.11564143747091293,
+   -0.00739337969571352,
+   -5.8887653722194955e-05,
+   -6.615896563744172e-05,
+   -5.972207145532593e-05,
+   -0.00020644917094614357,
+   -0.000301673193462193,
+   -0.0003761537664104253,
+   -2.6702524337451905e-05,
+   -0.0008094609947875142,
+   -3.2305197237292305e-05,
+   -0.0002474478678777814,
+   -0.018454870209097862,
+   -7.73638384998776e-05,
+   -0.00022837892174720764,
+   -0.04869883507490158,
+   -0.02372216247022152,
+   -0.0002051381452474743,
+   -0.15266406536102295,
+   -0.0037327392492443323,
+   -7.557583012385294e-05,
+   -0.0005665604257956147,
+   -1.4662635294371285e-05,
+   -2.1065256595611572,
+   -0.02570541389286518,
+   -2.0099081993103027,
+   -2.7118430137634277,
+   -0.1484161764383316,
+   -0.007964756339788437,
+   -0.00016342257731594145,
+   -1.597391747054644e-05,
+   -0.8920754194259644,
+   -0.0009690594743005931,
+   -0.00029023250681348145,
+   -1.2993727978027891e-05,
+   -0.07993864268064499,
+   -5.400034933700226e-05,
+   -0.00158791767898947,
+   -1.0609570381348021e-05,
+   -4.331461429595947,
+   -6.81968355178833,
+   -3.366002082824707,
+   -1.850673794746399,
+   -0.00040391870425082743,
+   -0.04611193388700485,
+   -0.06791424006223679,
+   -0.004945189692080021,
+   -9.107174992095679e-05,
+   -7.557583012385294e-05,
+   -6.747018051100895e-05,
+   -0.00024399164249189198,
+   -0.000321336614433676,
+   -0.0006528153317049146,
+   -3.2782016205601394e-05,
+   -0.0012151960982009768,
+   -3.957670196541585e-05,
+   -0.0002205128694185987,
+   -0.016214992851018906,
+   -0.00019095504831057042,
+   -0.0001456631434848532,
+   -7.712543447269127e-05,
+   -0.33043625950813293,
+   -0.00017629499780014157,
+   -2.590480089187622,
+   -0.16181793808937073,
+   -0.00011646069469861686,
+   -0.0006735440110787749,
+   -2.109982233378105e-05,
+   -1.6486821174621582,
+   -0.01151864044368267,
+   -1.8954096958623268e-05,
+   -0.0003233625029679388,
+   -0.00020644917094614357,
+   -3.111314072157256e-05,
+   -0.017416512593626976,
+   -0.00012766500003635883,
+   -0.0003415954706724733,
+   -2.13382354559144e-05,
+   -0.006446637213230133,
+   -1.823885577323381e-05,
+   -0.0012438902631402016,
+   -1.1205610462639015e-05,
+   -0.006591127719730139,
+   -7.152555099310121e-07,
+   -0.0017049076268449426,
+   -0.13135236501693726,
+   -3.228759288787842,
+   -0.002643782878294587,
+   -0.004842340014874935,
+   -3.480850500636734e-05,
+   -0.010503842495381832,
+   -0.16338221728801727,
+   -0.011769498698413372,
+   -0.00011574551899684593,
+   -9.727005090098828e-05,
+   -8.582700684200972e-05,
+   -0.0004538459761533886,
+   -0.00020740265608765185,
+   -0.001342587056569755,
+   -8.964136941358447e-05,
+   -0.0014018717920407653,
+   -4.935142715112306e-05,
+   -0.0006431656656786799,
+   -0.5765135288238525,
+   -0.0009291622554883361,
+   -0.00027998341829515994,
+   -0.008964410983026028,
+   -0.03303813934326172,
+   -0.00018451895448379219,
+   -0.07687719166278839,
+   -0.00454594986513257,
+   -0.00018439977429807186,
+   -0.0023830130230635405,
+   -2.706014311115723e-05,
+   -1.8103313446044922,
+   -0.7522969245910645,
+   -0.022507335990667343,
+   -2.074220174108632e-05,
+   -0.00026222606538794935,
+   -0.00020740265608765185,
+   -2.706014311115723e-05,
+   -3.700786590576172,
+   -0.26737019419670105,
+   -9.357491217087954e-05,
+   -6.031808152329177e-05,
+   -0.13705354928970337,
+   -2.407998726994265e-05,
+   -0.003684044349938631,
+   -3.2782016205601394e-05,
+   -2.9476141929626465,
+   -1.1526018381118774,
+   -2.6757259368896484,
+   -5.31315279006958,
+   -0.7695194482803345,
+   -0.00014876213390380144,
+   -0.8328413963317871,
+   -5.100983142852783,
+   -0.1275785118341446,
+   -0.008235306479036808,
+   -0.00037281715776771307,
+   -0.02394961006939411,
+   -0.5179875493049622,
+   -0.04619366303086281,
+   -0.00021705655672121793,
+   -0.00021765247220173478,
+   -0.0001461399078834802,
+   -0.0007413261337205768,
+   -0.0006660388899035752,
+   -0.0015581621555611491,
+   -6.8662193370983e-05,
+   -0.002233869396150112,
+   -4.494089080253616e-05,
+   -0.0006101653561927378,
+   -0.0006289887824095786,
+   -0.0033358661457896233,
+   -0.00045074793160893023,
+   -0.15180595219135284,
+   -0.07985830307006836,
+   -0.00015937011630740017,
+   -2.2477855682373047,
+   -0.4471043348312378,
+   -0.0001734344696160406,
+   -0.0006040894077159464,
+   -1.680836794548668e-05,
+   -2.318458080291748,
+   -0.01888836920261383,
+   -0.029085876420140266,
+   -1.1253407001495361,
+   -0.00021741411183029413,
+   -0.00012003655137959868,
+   -2.8013790142722428e-05,
+   -3.1507949829101562,
+   -0.005721264518797398,
+   -0.00040904260822571814,
+   -1.7881233361549675e-05,
+   -0.04304421693086624,
+   -0.0001591317413840443,
+   -0.005429995711892843,
+   -3.242440288886428e-05,
+   -4.896542549133301,
+   -3.2877321243286133,
+   -0.17550288140773773,
+   -8.526089668273926,
+   -0.2559642493724823,
+   -0.00015770144818816334,
+   -0.004955509677529335,
+   -0.20714037120342255,
+   -0.023553114384412766,
+   -0.00015496007108595222,
+   -0.0001134808044298552,
+   -9.250213042832911e-05,
+   -0.000288087350782007,
+   -0.0004409771354403347,
+   -0.0007110689766705036,
+   -4.6132929128361866e-05,
+   -0.0009153467253781855,
+   -3.433168603805825e-05,
+   -0.00015484087634831667,
+   -0.0001292145170737058,
+   -0.0022287548054009676,
+   -0.0002269487304147333,
+   -0.11395295709371567,
+   -0.05913611873984337,
+   -8.356221951544285e-05,
+   -0.4039720594882965,
+   -0.019538793712854385,
+   -5.924526340095326e-05,
+   -0.0007176207727752626,
+   -1.7881233361549675e-05,
+   -1.6992816925048828,
+   -0.004352619871497154,
+   -6.6756979322235566e-06,
+   -0.00017093151109293103,
+   -0.0001284993631998077,
+   -3.3378044463461265e-05,
+   -0.013412484899163246,
+   -8.713819261174649e-05,
+   -0.0004928089329041541,
+   -2.288792165927589e-05,
+   -0.0012643685331568122,
+   -1.3351351299206726e-05,
+   -0.0019104102393612266,
+   -8.940656698541716e-06,
+   -0.0033124599140137434,
+   -4.768370445162873e-07,
+   -0.0009848987683653831,
+   -0.07256874442100525,
+   -1.7665941715240479,
+   -0.00281461956910789,
+   -0.0027610058896243572,
+   -2.9682672902708873e-05,
+   -0.0075036585330963135,
+   -0.16648568212985992,
+   -0.014109030365943909,
+   -9.63164638960734e-05,
+   -6.603976362384856e-05,
+   -7.331102824537084e-05,
+   -0.0003323002893012017,
+   -0.00042083943844772875,
+   -0.0010620674584060907,
+   -2.8609820219571702e-05,
+   -0.000990257947705686,
+   -4.029192859889008e-05,
+   -0.0001541257370263338,
+   -0.0001658063702052459,
+   -0.0010433712741360068,
+   -0.0002379134384682402,
+   -0.08282912522554398,
+   -0.1620505303144455,
+   -0.0001578206429257989,
+   -1.9873682260513306,
+   -0.03700195625424385,
+   -8.594620157964528e-05,
+   -0.00035232058144174516,
+   -2.90866428258596e-05,
+   -1.0645859241485596,
+   -0.012771833688020706,
+   -1.8788448572158813,
+   -0.04745874181389809,
+   -0.0029150634072721004,
+   -0.0002858230145648122,
+   -8.082063141046092e-05,
+   -2.8729025871143676e-05,
+   -4.2793378829956055,
+   -0.008196880109608173,
+   -9.822363062994555e-05,
+   -4.9470632802695036e-05,
+   -5.399019241333008,
+   -0.0015862513100728393,
+   -0.0018035589018836617,
+   -2.9444261599564925e-05,
+   -3.8089842796325684,
+   -1.3950530290603638,
+   -0.17507919669151306,
+   -4.1786346435546875,
+   -9.410017013549805,
+   -0.00014709345123264939,
+   -2.16685152053833,
+   -0.5008745193481445,
+   -0.013433892279863358,
+   -0.00029976642690598965,
+   -0.006172403693199158,
+   -0.22438427805900574,
+   -0.015963135287165642,
+   -0.00010489867418073118,
+   -7.426462980220094e-05,
+   -6.890059739816934e-05,
+   -0.0002874914789572358,
+   -0.0004033228906337172,
+   -0.0006624649395234883,
+   -3.802703940891661e-05,
+   -0.001104817260056734,
+   -2.8967437174287625e-05,
+   -0.000125281119835563,
+   -0.00011634149996098131,
+   -0.0016071987338364124,
+   -0.0001752223033690825,
+   -0.04927569255232811,
+   -0.03999283164739609,
+   -8.427741704508662e-05,
+   -0.11036300659179688,
+   -0.0022922686766833067,
+   -5.125868119648658e-05,
+   -0.0007711059297434986,
+   -1.6569954823353328e-05,
+   -1.1996040344238281,
+   -6.017496585845947,
+   -3.3771719932556152,
+   -0.0015197168104350567,
+   -0.0001720042055239901,
+   -8.05822346592322e-05,
+   -1.9701510667800903,
+   -0.015215284191071987,
+   -0.00046957432641647756,
+   -4.5536911784438416e-05,
+   -0.3501690626144409,
+   -6.508615479106084e-05,
+   -0.013412720523774624,
+   -0.0002317160106031224,
+   -10.721491813659668,
+   -0.001794158248230815,
+   -5.900764465332031,
+   -0.05698608234524727,
+   -1.9666205644607544,
+   -0.34450024366378784,
+   -0.24932177364826202,
+   -1.1890842914581299,
+   -0.9316995143890381,
+   -0.5700393915176392,
+   -0.18522746860980988,
+   -0.08411185443401337,
+   -0.00032610344351269305,
+   -0.016760369762778282,
+   -0.310769647359848,
+   -0.04111167788505554,
+   -0.00015889335190877318,
+   -0.00011395759065635502,
+   -0.00010418349120300263,
+   -0.0003389737685211003,
+   -0.0006182666402310133,
+   -0.001039679627865553,
+   -6.770858453819528e-05,
+   -0.001258891774341464,
+   -5.876845170860179e-05,
+   -0.0003499372396618128,
+   -0.00027724236133508384,
+   -0.0029526231810450554,
+   -0.0003165697562508285,
+   -0.25983527302742004,
+   -0.031029406934976578,
+   -0.00018880968855228275,
+   -0.7229459881782532,
+   -0.42579957842826843,
+   -0.00011705666838679463,
+   -0.00047195740626193583,
+   -2.3364747903542593e-05,
+   -0.9790778160095215,
+   -0.0029993331991136074,
+   -5.125986263010418e-06,
+   -0.00018690270371735096,
+   -0.00016091958968900144,
+   -3.755022044060752e-05,
+   -0.00900670699775219,
+   -8.642300235806033e-05,
+   -0.0004804172203876078,
+   -3.838465272565372e-05,
+   -0.0015756584471091628,
+   -1.168244216387393e-05,
+   -0.001709667849354446,
+   -1.0013530300057027e-05,
+   -0.0022142434027045965,
+   -5.960462772236497e-07,
+   -0.0006964165368117392,
+   -0.05425402522087097,
+   -1.5528278350830078,
+   -0.002721655648201704,
+   -0.003402280155569315,
+   -3.6477376852417365e-05,
+   -0.007222968153655529,
+   -0.14785511791706085,
+   -0.013813492842018604,
+   -0.00012063252506777644,
+   -9.738924563862383e-05,
+   -9.881961887003854e-05,
+   -0.00025900822947733104,
+   -0.00028236693469807506,
+   -0.0010882653295993805,
+   -4.446407547220588e-05,
+   -0.0008232779800891876,
+   -4.7801782784517854e-05,
+   -0.0001911934232339263,
+   -0.00020382710499688983,
+   -0.0037347583565860987,
+   -0.00023493390472140163,
+   -0.016995148733258247,
+   -0.028428077697753906,
+   -0.00015054999676067382,
+   -0.05958176776766777,
+   -0.0022499265614897013,
+   -8.928377064876258e-05,
+   -0.0007566926069557667,
+   -2.038458114839159e-05,
+   -6.74626350402832,
+   -4.031385898590088,
+   -0.010314728133380413,
+   -0.0005830018781125546,
+   -0.00016175392374861985,
+   -4.279521817807108e-05,
+   -4.910806655883789,
+   -0.3867932856082916,
+   -0.00020466140995267779,
+   -2.455681169521995e-05,
+   -0.40993309020996094,
+   -3.075552376685664e-05,
+   -0.002136925933882594,
+   -1.5258672647178173e-05,
+   -1.4743690490722656,
+   -0.466409295797348,
+   -2.986236095428467,
+   -0.5145793557167053,
+   -0.3861558437347412,
+   -0.00023648326168768108,
+   -0.060666244477033615,
+   -0.0004374024283606559,
+   -0.0032959445379674435,
+   -0.003968104254454374,
+   -0.0018072477541863918,
+   -4.768258077092469e-05,
+   -0.9783220291137695,
+   -1.0383716821670532,
+   -0.6705473065376282,
+   -2.172899007797241,
+   -0.1931028664112091,
+   -0.05653104931116104,
+   -0.0004231034545227885,
+   -0.009201028384268284,
+   -0.20085793733596802,
+   -0.015902360901236534,
+   -0.00013207517622504383,
+   -0.00011634149996098131,
+   -9.154854342341423e-05,
+   -0.0002989322238136083,
+   -0.000276765669696033,
+   -0.0008761619683355093,
+   -5.4596363042946905e-05,
+   -0.0012877037515863776,
+   -5.245071224635467e-05,
+   -0.00014399446081370115,
+   -0.00014304091746453196,
+   -0.002012848388403654,
+   -0.00026043839170597494,
+   -0.050352130085229874,
+   -0.016213351860642433,
+   -0.00014923889830242842,
+   -1.3270337581634521,
+   -0.017757130786776543,
+   -8.725739462533966e-05,
+   -0.0003123987407889217,
+   -2.3364747903542593e-05,
+   -1.770219087600708,
+   -0.027282992377877235,
+   -1.7292673587799072,
+   -1.5430668592453003,
+   -0.09708311408758163,
+   -0.06372363120317459,
+   -0.00020180096908006817,
+   -4.756337511935271e-05,
+   -6.762560844421387,
+   -0.11426064372062683,
+   -0.0006945105269551277,
+   -5.745722592109814e-05,
+   -0.23964034020900726,
+   -7.080780778778717e-05,
+   -0.0019281383138149977,
+   -0.00011657988943625242,
+   -1.6634957790374756,
+   -3.133596420288086,
+   -1.06369948387146,
+   -0.20282019674777985,
+   -0.440325528383255,
+   -2.2919445037841797,
+   -2.6773011684417725,
+   -2.4511003494262695,
+   -2.022627353668213,
+   -0.7157211899757385,
+   -0.00033623288618400693,
+   -0.006556428037583828,
+   -0.18528789281845093,
+   -0.010350123979151249,
+   -9.691245941212401e-05,
+   -9.941560711013153e-05,
+   -0.0001062098381225951,
+   -0.0002244459028588608,
+   -0.0003002431185450405,
+   -0.0003911683743353933,
+   -3.158996332786046e-05,
+   -0.0008713977294974029,
+   -4.875540980719961e-05,
+   -9.083335316972807e-05,
+   -0.00013422065239865333,
+   -0.0032467530108988285,
+   -0.0002611534437164664,
+   -0.011103743687272072,
+   -0.014522447250783443,
+   -0.0001003691868390888,
+   -0.04763209819793701,
+   -0.0015930355293676257,
+   -8.880697714630514e-05,
+   -0.0006610354175791144,
+   -2.062299427052494e-05,
+   -1.4736919403076172,
+   -0.0015160269103944302,
+   -5.722029527532868e-06,
+   -0.0001426833332516253,
+   -0.00025138078490272164,
+   -4.303362584323622e-05,
+   -0.006412051152437925,
+   -8.177422569133341e-05,
+   -0.0003953390696551651,
+   -4.51792984677013e-05,
+   -0.0015100754098966718,
+   -1.0847986231965479e-05,
+   -0.0021766559220850468,
+   -1.3112935448589269e-05,
+   -0.0017056216020137072,
+   -5.960462772236497e-07,
+   -0.00045658653834834695,
+   -0.03380563110113144,
+   -1.6861530542373657,
+   -0.0011235122801735997,
+   -0.0027228444814682007,
+   -3.2543604902457446e-05,
+   -0.0028300732374191284,
+   -0.04190889745950699,
+   -0.006303310859948397,
+   -0.00010799778101500124,
+   -7.295342220459133e-05,
+   -6.90197994117625e-05,
+   -0.0002094287920044735,
+   -0.00017915551143232733,
+   -0.0007649118197150528,
+   -3.3854863431770355e-05,
+   -0.0009750141180120409,
+   -5.185469490243122e-05,
+   -0.0001230164198204875,
+   -0.00015221867943182588,
+   -0.00366337806917727,
+   -0.00027378625236451626,
+   -0.00873471051454544,
+   -0.014125015586614609,
+   -0.00013779645087197423,
+   -0.2786974012851715,
+   -0.0429004468023777,
+   -0.00015221867943182588,
+   -0.0005259322933852673,
+   -2.0861407392658293e-05,
+   -7.4979376792907715,
+   -2.5812153816223145,
+   -0.0006475735572166741,
+   -0.00032395837479270995,
+   -4.3987260141875595e-05,
+   -0.38662397861480713,
+   -0.07727815210819244,
+   -0.0005353448214009404,
+   -6.210611172718927e-05,
+   -0.10053620487451553,
+   -4.51792984677013e-05,
+   -0.004477594513446093,
+   -3.0397906812140718e-05,
+   -8.758296012878418,
+   -0.4402102530002594,
+   -0.2472418248653412,
+   -0.5627955794334412,
+   -0.042171675711870193,
+   -0.03491748869419098,
+   -5.941390514373779,
+   -0.004192491993308067,
+   -0.11302625387907028,
+   -0.5369495153427124,
+   -0.0003328961320221424,
+   -0.0049365307204425335,
+   -0.057854458689689636,
+   -0.007558793295174837,
+   -8.916457591112703e-05,
+   -9.047575440490618e-05,
+   -8.141662692651153e-05,
+   -0.0006507901125587523,
+   -0.00019464982324279845,
+   -0.0006775943911634386,
+   -2.3364747903542593e-05,
+   -0.0012484145117923617,
+   -5.447716102935374e-05,
+   -0.00016425691137555987,
+   -0.00019727191829588264,
+   -0.012608221732079983,
+   -0.00020859450160060078,
+   -0.014227267354726791,
+   -0.00964115560054779,
+   -0.00013350549852475524,
+   -0.03465360403060913,
+   -0.0008008848526515067,
+   -0.00010239553375868127,
+   -0.0007454953738488257,
+   -2.0861407392658293e-05,
+   -2.182055950164795,
+   -0.030151404440402985,
+   -2.2387242317199707,
+   -4.8748321533203125,
+   -0.07910432666540146,
+   -0.0014863882679492235,
+   -0.00028081765049137175,
+   -6.55629628454335e-05,
+   -3.332869052886963,
+   -4.393488883972168,
+   -0.1467350423336029,
+   -0.0036104037426412106,
+   -0.0003040566807612777,
+   -0.00010895135346800089,
+   -0.2704607844352722,
+   -3.6477376852417365e-05,
+   -0.002591705648228526,
+   -2.9682672902708873e-05,
+   -4.947231292724609,
+   -3.2159130573272705,
+   -0.8367561101913452,
+   -0.5556290149688721,
+   -0.0002233732520835474,
+   -0.0060651772655546665,
+   -0.05365833640098572,
+   -0.0071886456571519375,
+   -9.63164638960734e-05,
+   -0.00010072677832795307,
+   -9.858122211880982e-05,
+   -0.0003960540343541652,
+   -0.0006039702566340566,
+   -0.0006522196927107871,
+   -1.811964830267243e-05,
+   -0.001042775809764862,
+   -3.790783375734463e-05,
+   -0.00011514954530866817,
+   -0.0001652104256208986,
+   -0.05494809150695801,
+   -0.00014506718434859067,
+   -0.00021050144277978688,
+   -0.014802505262196064,
+   -0.00017915551143232733,
+   -1.7102066278457642,
+   -0.02825750596821308,
+   -0.00011300401820335537,
+   -0.0003519630990922451,
+   -3.075552376685664e-05,
+   -0.554995596408844,
+   -0.0013822296168655157,
+   -4.6491513785440475e-06,
+   -0.00014482879487331957,
+   -0.00019810620869975537,
+   -3.504691630951129e-05,
+   -0.006834581959992647,
+   -6.389413465512916e-05,
+   -0.0004396664153318852,
+   -4.60137271147687e-05,
+   -0.0012897277483716607,
+   -1.1920858014491387e-05,
+   -0.001943962532095611,
+   -1.4424220353248529e-05,
+   -0.0016702761640772223,
+   -5.960462772236497e-07,
+   -0.0005274811992421746,
+   -0.043414343148469925,
+   -1.5102243423461914,
+   -0.0018298563081771135,
+   -0.0035949621815234423,
+   -6.842378934379667e-05,
+   -0.008245711214840412,
+   -0.08723266422748566,
+   -0.00939271505922079,
+   -0.00011419598013162613,
+   -0.0001230164198204875,
+   -9.464769391342998e-05,
+   -0.0002865380665753037,
+   -0.0005069877952337265,
+   -0.001016934053041041,
+   -3.2305197237292305e-05,
+   -0.0009629856795072556,
+   -4.827859811484814e-05,
+   -0.00021717573690693825,
+   -0.00032848684350028634,
+   -0.012733934447169304,
+   -0.000196556793525815,
+   -0.0012980615720152855,
+   -0.0077531603164970875,
+   -0.00012385078298393637,
+   -0.01761084794998169,
+   -0.0013621109537780285,
+   -0.00011848701251437888,
+   -0.0013394916895776987,
+   -2.407998726994265e-05,
+   -4.505744934082031,
+   -1.2715730667114258,
+   -0.0005052005290053785,
+   -0.00024971229140646756,
+   -3.635817120084539e-05,
+   -4.3336405754089355,
+   -0.0815289318561554,
+   -0.028655847534537315,
+   -0.00010430268594063818,
+   -7.343022298300639e-05,
+   -0.158114492893219,
+   -1.764281842042692e-05,
+   -0.003166425507515669,
+   -5.960446742392378e-06,
+   -4.626138687133789,
+   -0.5413240194320679,
+   -11.11661148071289,
+   -6.66420316696167,
+   -0.5860735177993774,
+   -1.0599334239959717,
+   -2.200112819671631,
+   -0.4268365502357483,
+   -0.027302712202072144,
+   -0.15124760568141937,
+   -0.12854908406734467,
+   -3.041227102279663,
+   -0.026920655742287636,
+   -0.0003856868715956807,
+   -0.004746242426335812,
+   -0.07085907459259033,
+   -0.008411810733377934,
+   -0.00010823617776622996,
+   -5.972207145532593e-05,
+   -5.507317473529838e-05,
+   -0.00023850933939684182,
+   -0.0004319211875554174,
+   -0.0008380476501770318,
+   -1.823885577323381e-05,
+   -0.0009161804337054491,
+   -3.683499380713329e-05,
+   -0.00010918975021922961,
+   -0.00016044282529037446,
+   -0.0005364171229302883,
+   -0.0001248043408850208,
+   -0.10185468196868896,
+   -0.02194770984351635,
+   -0.00011252723925281316,
+   -0.6942679286003113,
+   -0.21981695294380188,
+   -6.496695277746767e-05,
+   -0.00030393750057555735,
+   -2.13382354559144e-05,
+   -3.1545064449310303,
+   -0.021652380004525185,
+   -0.02087036333978176,
+   -0.89057856798172,
+   -9.619726915843785e-05,
+   -8.129743218887597e-05,
+   -2.5152843591058627e-05,
+   -4.086198806762695,
+   -1.0591976642608643,
+   -0.0020325970835983753,
+   -4.1483970562694594e-05,
+   -0.596172571182251,
+   -3.242440288886428e-05,
+   -0.0019346822518855333,
+   -1.6927575416048057e-05,
+   -3.4360618591308594,
+   -2.4312753677368164,
+   -1.9711253643035889,
+   -4.358899116516113,
+   -10.540913581848145,
+   -5.990867614746094,
+   -0.266180157661438,
+   -0.000266278104390949,
+   -0.003696990432217717,
+   -0.03691418468952179,
+   -0.005084204487502575,
+   -7.73638384998776e-05,
+   -5.9960475482512265e-05,
+   -6.12716976320371e-05,
+   -0.0001915509783430025,
+   -0.0004040378553327173,
+   -0.0004508670826908201,
+   -2.2172682292875834e-05,
+   -0.0010245556477457285,
+   -3.862306402879767e-05,
+   -7.652943895664066e-05,
+   -0.00010585224663373083,
+   -0.00034791138023138046,
+   -0.0001134808044298552,
+   -0.009721791371703148,
+   -0.01306991372257471,
+   -7.86750388215296e-05,
+   -0.06928819417953491,
+   -0.0019708510953933,
+   -8.070142939686775e-05,
+   -0.0006008726777508855,
+   -1.9550132492440753e-05,
+   -1.2050050497055054,
+   -0.0022362482268363237,
+   -4.887569048150908e-06,
+   -0.00016652150952722877,
+   -0.0001282609737245366,
+   -3.3854863431770355e-05,
+   -0.005613160319626331,
+   -4.935142715112306e-05,
+   -0.00040618274942971766,
+   -3.814624506048858e-05,
+   -0.0012768696760758758,
+   -6.9141146923357155e-06,
+   -0.0021407324820756912,
+   -1.0251946150674485e-05,
+   -0.001328301033936441,
+   -4.768370445162873e-07,
+   -0.00039104922325350344,
+   -0.03403102979063988,
+   -2.371554374694824,
+   -0.0011966219171881676,
+   -0.0017084777355194092,
+   -1.2397689715726301e-05,
+   -0.0012181727215647697,
+   -0.027773091569542885,
+   -0.004225967917591333,
+   -7.688703772146255e-05,
+   -10.750052452087402,
+   -0.09749454259872437,
+   -0.0398833304643631,
+   -0.05019160360097885,
+   -0.02639356628060341,
+   -0.001116844010539353,
+   -0.010394011624157429,
+   -0.0002687808300834149,
+   -0.0412154421210289,
+   -0.17060238122940063,
+   -0.44570907950401306,
+   -0.001759529928676784,
+   -0.8481433987617493,
+   -3.9174411296844482,
+   -0.0011847150744870305,
+   -1.8217713832855225,
+   -1.9833719730377197,
+   -0.0033980030566453934,
+   -0.022340646013617516,
+   -0.0005044856225140393,
+   -11.916642189025879,
+   -2.2062525749206543,
+   -0.011109520681202412,
+   -0.0025012181140482426,
+   -0.00047839165199548006,
+   -10.590877532958984,
+   -5.111791133880615,
+   -0.8751921653747559,
+   -0.19319908320903778,
+   -0.04376664385199547,
+   -0.019606946036219597,
+   -0.00042000532266683877,
+   -9.505635261535645,
+   -0.07715455442667007,
+   -0.005082899704575539,
+   -0.04224858805537224,
+   -0.03572046384215355,
+   -0.0011238694423809648,
+   -5.344630241394043,
+   -3.876430034637451,
+   -12.252359390258789,
+   -4.9860382080078125,
+   -2.668943405151367,
+   -1.16416597366333,
+   -2.514509677886963,
+   -2.5190258026123047,
+   -14.754651069641113,
+   -5.655267715454102,
+   -6.61380672454834,
+   -4.71486234664917,
+   -0.5776815414428711,
+   -1.3986684083938599,
+   -2.637193202972412,
+   -1.1604831218719482,
+   -1.4959537982940674,
+   -0.004402587655931711,
+   -0.5065803527832031,
+   -3.3776161670684814,
+   -0.7203826308250427,
+   -0.02161656692624092,
+   -0.819121241569519,
+   -0.04418942704796791,
+   -1.7282390594482422,
+   -0.05629342794418335,
+   -0.008580365218222141,
+   -0.000747877755202353,
+   -0.013715313747525215,
+   -0.00015138434537220746,
+   -0.006047403905540705,
+   -0.024643857032060623,
+   -0.05186835676431656,
+   -0.0005345107638277113,
+   -0.10883784294128418,
+   -1.3612172603607178,
+   -0.0003692421887535602,
+   -1.357957363128662,
+   -0.05831316113471985,
+   -0.00040570611599832773,
+   -0.0035074164625257254,
+   -6.437094270950183e-05,
+   -1.7280149459838867,
+   -0.026309387758374214,
+   -2.3754658699035645,
+   -0.05959097668528557,
+   -0.0019271865021437407,
+   -0.0006563892820850015,
+   -0.00038985759601928294,
+   -0.00013529339048545808,
+   -6.799666881561279,
+   -0.4319588541984558,
+   -0.0018134353449568152,
+   -0.00010084597306558862,
+   -3.564793109893799,
+   -0.0016862234333530068,
+   -0.007215393707156181,
+   -0.00018916724366135895,
+   -4.893386363983154,
+   -0.7495713233947754,
+   -0.04057759419083595,
+   -0.16563259065151215,
+   -3.7694530487060547,
+   -0.7686876654624939,
+   -0.02867751009762287,
+   -3.4293549060821533,
+   -1.9938279390335083,
+   -3.87074613571167,
+   -7.779223918914795,
+   -0.11301646381616592,
+   -0.0007675323868170381,
+   -0.0353383906185627,
+   -0.5969783663749695,
+   -0.03809810429811478,
+   -0.00048828122089616954,
+   -0.024168511852622032,
+   -0.0024346255231648684,
+   -0.006569692399352789,
+   -0.002209961414337158,
+   -0.001069331425242126,
+   -7.819823804311454e-05,
+   -0.0029135181102901697,
+   -4.60137271147687e-05,
+   -0.0003582789213396609,
+   -0.001116367639042437,
+   -0.002629396505653858,
+   -0.0002420847595203668,
+   -0.17575480043888092,
+   -0.017076482996344566,
+   -0.0001431601122021675,
+   -0.10536163300275803,
+   -0.00507151335477829,
+   -0.00011181206355104223,
+   -0.0018749530427157879,
+   -2.3603161025675945e-05,
+   -0.8358778953552246,
+   -0.002124911407008767,
+   -9.894321920000948e-06,
+   -0.00019214690837543458,
+   -0.0002456601650919765,
+   -3.516612196108326e-05,
+   -0.008302814327180386,
+   -0.00010895135346800089,
+   -0.0006008726777508855,
+   -3.2543604902457446e-05,
+   -0.006115178111940622,
+   -2.1219027985353023e-05,
+   -0.0036275077145546675,
+   -1.7165990357170813e-05,
+   -0.003067908575758338,
+   -9.536738616588991e-07,
+   -0.0006908176001161337,
+   -0.02611708454787731,
+   -1.3316965103149414,
+   -0.003817296586930752,
+   -0.006795391906052828,
+   -4.684815212385729e-05,
+   -0.007690228521823883,
+   -0.14891591668128967,
+   -0.013032732531428337,
+   -0.0002714027068577707,
+   -0.011644137091934681,
+   -0.00091856240760535,
+   -0.0013096098555251956,
+   -0.0007771808886900544,
+   -0.0009541726321913302,
+   -5.638440416078083e-05,
+   -0.0014388932613655925,
+   -5.018585216021165e-05,
+   -0.00020930961181875318,
+   -0.0006467396160587668,
+   -0.0013236580416560173,
+   -0.00019333878299221396,
+   -0.05778864026069641,
+   -0.023562893271446228,
+   -0.0001699779968475923,
+   -0.4867134690284729,
+   -0.17518886923789978,
+   -6.01988795096986e-05,
+   -0.00056429672986269,
+   -2.396077979938127e-05,
+   -10.983257293701172,
+   -3.4146568775177,
+   -0.007948435842990875,
+   -0.005365850869566202,
+   -0.00041166413575410843,
+   -6.0437283536884934e-05,
+   -1.4208624362945557,
+   -0.014981495216488838,
+   -0.00011193125828867778,
+   -2.95634672511369e-05,
+   -0.3359139859676361,
+   -6.425174069590867e-05,
+   -0.0036992470268160105,
+   -1.7523612768854946e-05,
+   -1.6273220777511597,
+   -12.038379669189453,
+   -1.8510823249816895,
+   -4.6685380935668945,
+   -1.03892183303833,
+   -3.5619592666625977,
+   -3.119525194168091,
+   -8.74183177947998,
+   -0.1955474466085434,
+   -0.00022349244682118297,
+   -0.005337630398571491,
+   -0.07253769785165787,
+   -0.0067605809308588505,
+   -0.00018821375851985067,
+   -0.01270250789821148,
+   -0.0005373702733777463,
+   -0.0013699679402634501,
+   -0.0009596510208211839,
+   -0.0003953390696551651,
+   -1.7165990357170813e-05,
+   -0.0010408704401925206,
+   -3.4450891689630225e-05,
+   -0.00011038171214750037,
+   -0.00048351517762057483,
+   -0.0015029336791485548,
+   -0.00013958434283267707,
+   -0.027578983455896378,
+   -0.02192368544638157,
+   -8.141662692651153e-05,
+   -0.11562338471412659,
+   -0.0031276855152100325,
+   -6.5205356804654e-05,
+   -0.0007344171172007918,
+   -2.1457441107486375e-05,
+   -1.4039907455444336,
+   -0.8585066795349121,
+   -0.12097951024770737,
+   -4.9232225137529895e-05,
+   -0.00045503751607611775,
+   -0.0001479277852922678,
+   -2.8967437174287625e-05,
+   -3.316209316253662,
+   -0.22754307091236115,
+   -0.037047676742076874,
+   -0.00010632903286023065,
+   -5.602679812000133e-05,
+   -0.10701240599155426,
+   -2.1815061700181104e-05,
+   -0.0025769618805497885,
+   -2.932505594799295e-05,
+   -2.9098081588745117,
+   -0.23772671818733215,
+   -2.5728368759155273,
+   -1.0628935098648071,
+   -0.569791853427887,
+   -1.5512791872024536,
+   -0.22174018621444702,
+   -0.2053954154253006,
+   -0.668795108795166,
+   -0.00032574593205936253,
+   -0.005275258328765631,
+   -0.17121490836143494,
+   -0.01520049013197422,
+   -0.00027164106722921133,
+   -0.018145864829421043,
+   -0.0008275659638457,
+   -0.0013598490040749311,
+   -0.0007223857101053,
+   -0.0005415403284132481,
+   -3.075552376685664e-05,
+   -0.0016680150292813778,
+   -4.124556289752945e-05,
+   -0.00020203932945150882,
+   -0.0005315321614034474,
+   -0.0016384999034926295,
+   -0.000169382052263245,
+   -0.01945134624838829,
+   -0.018782030791044235,
+   -0.0001429217227268964,
+   -1.4800734519958496,
+   -0.046756841242313385,
+   -9.667406266089529e-05,
+   -0.0005499995895661414,
+   -1.728519782773219e-05,
+   -0.6545608639717102,
+   -0.0013740155845880508,
+   -5.8412379075889476e-06,
+   -0.00015496007108595222,
+   -0.0001935771433636546,
+   -2.8967437174287625e-05,
+   -0.01043801661580801,
+   -7.974783511599526e-05,
+   -0.0005525015876628458,
+   -3.683499380713329e-05,
+   -0.002455436158925295,
+   -1.2874520507466514e-05,
+   -0.0022639615926891565,
+   -1.4543427823809907e-05,
+   -0.00250252615660429,
+   -8.344646857949556e-07,
+   -0.0006089740199968219,
+   -0.023519812151789665,
+   -1.6231462955474854,
+   -0.0013103241799399257,
+   -0.0044088782742619514,
+   -3.433168603805825e-05,
+   -0.0076819476671516895,
+   -0.13205960392951965,
+   -0.01295448187738657,
+   -0.0002797450579237193,
+   -0.01799413561820984,
+   -0.0008688965463079512,
+   -0.0026737437583506107,
+   -0.0004418112221173942,
+   -0.001303895260207355,
+   -6.16293036728166e-05,
+   -0.0018553201807662845,
+   -4.815939246327616e-05,
+   -0.00024875884992070496,
+   -0.000916537712328136,
+   -0.005030237603932619,
+   -0.00015853578224778175,
+   -0.00936696957796812,
+   -0.016335444524884224,
+   -9.619726915843785e-05,
+   -0.12435520440340042,
+   -0.002912804950028658,
+   -0.00010346830822527409,
+   -0.0007908792467787862,
+   -1.7165990357170813e-05,
+   -6.260087490081787,
+   -4.018156051635742,
+   -0.05045890435576439,
+   -0.00021360022947192192,
+   -4.815939246327616e-05,
+   -2.2203869819641113,
+   -0.047356534749269485,
+   -8.83301836438477e-05,
+   -5.781483559985645e-05,
+   -0.11337775737047195,
+   -3.3378044463461265e-05,
+   -0.0019444384379312396,
+   -1.645074735279195e-05,
+   -1.7198790311813354,
+   -3.5991759300231934,
+   -2.5881307125091553,
+   -4.4389872550964355,
+   -0.39235079288482666,
+   -0.9257609248161316,
+   -2.4064109325408936,
+   -2.256807804107666,
+   -0.012957894243299961,
+   -6.8662193370983e-05,
+   -0.005379723850637674,
+   -0.1424376517534256,
+   -0.008812819607555866,
+   -0.00019667598826345056,
+   -0.012973662465810776,
+   -0.0005903884884901345,
+   -0.0019209994934499264,
+   -0.0014405598631128669,
+   -0.0006889115320518613,
+   -1.645074735279195e-05,
+   -0.0011966219171881676,
+   -3.40932747349143e-05,
+   -9.548207890475169e-05,
+   -0.0005439232336357236,
+   -0.004501329269260168,
+   -0.00011920218821614981,
+   -0.03018992207944393,
+   -0.013410485349595547,
+   -0.00011467275908216834,
+   -0.6566694378852844,
+   -0.36726248264312744,
+   -2.8490614567999728e-05,
+   -0.00023707917716819793,
+   -1.3351351299206726e-05,
+   -1.051271915435791,
+   -0.01689915731549263,
+   -3.0722033977508545,
+   -0.2818227708339691,
+   -3.957169771194458,
+   -0.004226442892104387,
+   -0.00017248096992261708,
+   -3.9457496313843876e-05,
+   -5.733857154846191,
+   -0.26561957597732544,
+   -0.00047779586748220026,
+   -2.5748875486897305e-05,
+   -0.07624048739671707,
+   -6.0437283536884934e-05,
+   -0.001644212519749999,
+   -1.549708758830093e-05,
+   -2.1518163681030273,
+   -0.19709540903568268,
+   -3.698873996734619,
+   -10.724569320678711,
+   -2.996880292892456,
+   -3.1366219520568848,
+   -0.02801341563463211,
+   -0.17601795494556427,
+   -0.0965375229716301,
+   -0.00014578233822248876,
+   -0.0020983838476240635,
+   -0.054011568427085876,
+   -0.003581777447834611,
+   -0.00014304091746453196,
+   -0.011484465561807156,
+   -0.000708090839907527,
+   -0.0012874656822532415,
+   -0.0009416675311513245,
+   -0.0005903884884901345,
+   -2.13382354559144e-05,
+   -0.0007848043460398912,
+   -2.3841574147809297e-05,
+   -7.4741430580616e-05,
+   -0.0002946419408544898,
+   -0.0024204738438129425,
+   -0.00011503035057103261,
+   -0.006832095794379711,
+   -0.010126759298145771,
+   -5.876845170860179e-05,
+   -0.09275738149881363,
+   -0.003692833473905921,
+   -4.0411134250462055e-05,
+   -0.0005497612874023616,
+   -1.537788011773955e-05,
+   -1.182621717453003,
+   -0.0008486483711749315,
+   -4.0531076592742465e-06,
+   -0.00010585224663373083,
+   -0.00011646069469861686,
+   -2.407998726994265e-05,
+   -0.00471824174746871,
+   -5.352353764465079e-05,
+   -0.0003631647559814155,
+   -3.135155202471651e-05,
+   -0.0011143434094265103,
+   -1.1205610462639015e-05,
+   -0.002159646013751626,
+   -1.4185804502631072e-05,
+   -0.0011845960980281234,
+   -7.152555099310121e-07,
+   -0.0002699726028367877,
+   -0.008802657015621662,
+   -1.1517901420593262,
+   -0.0017283515771850944,
+   -0.002493488835170865,
+   -1.5258672647178173e-05,
+   -0.0018479428254067898,
+   -0.040569812059402466,
+   -0.0041178204119205475,
+   -0.00017176583060063422,
+   -0.015839355066418648,
+   -0.0005023409612476826,
+   -0.0007201223634183407,
+   -0.0005905076395720243,
+   -0.0007784912013448775,
+   -2.3483953555114567e-05,
+   -0.0008902162662707269,
+   -2.6702524337451905e-05,
+   -9.512448741588742e-05,
+   -0.0004555141495075077,
+   -0.014392376877367496,
+   -9.619726915843785e-05,
+   -0.0002324311062693596,
+   -0.01029337290674448,
+   -0.00015984688070602715,
+   -1.1049474477767944,
+   -0.04663100838661194,
+   -8.21318244561553e-05,
+   -0.0003543464408721775,
+   -1.3947389561508317e-05,
+   -7.615281581878662,
+   -4.125001907348633,
+   -0.19173777103424072,
+   -0.0005029367166571319,
+   -4.100715523236431e-05,
+   -2.0808839797973633,
+   -0.026673687621951103,
+   -7.70062324590981e-05,
+   -2.9682672902708873e-05,
+   -0.12381786853075027,
+   -2.098061486321967e-05,
+   -0.0029344377107918262,
+   -1.3589766240329482e-05,
+   -6.027270793914795,
+   -0.344284325838089,
+   -0.47963422536849976,
+   -1.262589454650879,
+   -1.8010940551757812,
+   -2.51932430267334,
+   -1.5027334690093994,
+   -0.06264369934797287,
+   -1.8616759777069092,
+   -2.732039213180542,
+   -6.854299135738984e-05,
+   -0.001887565478682518,
+   -0.02442971244454384,
+   -0.0030983323231339455,
+   -0.00013374387344811112,
+   -0.010926888324320316,
+   -0.0006349454633891582,
+   -0.0010619483655318618,
+   -0.0007469248375855386,
+   -0.00040987672400660813,
+   -1.537788011773955e-05,
+   -0.0008891443139873445,
+   -2.4676019165781327e-05,
+   -7.080780778778717e-05,
+   -0.00043299360550008714,
+   -0.2814013361930847,
+   -6.8662193370983e-05,
+   -0.0011491130571812391,
+   -0.007679700385779142,
+   -9.440929716220126e-05,
+   -0.026545187458395958,
+   -0.002912091789767146,
+   -7.045020902296528e-05,
+   -0.001142087858170271,
+   -1.4662635294371285e-05,
+   -1.6412137746810913,
+   -9.728646278381348,
+   -0.026286397129297256,
+   -0.0002475670480635017,
+   -7.60526381782256e-05,
+   -2.191868782043457,
+   -0.01760944165289402,
+   -0.0004247716860845685,
+   -4.684815212385729e-05,
+   -0.03103969246149063,
+   -9.297892393078655e-05,
+   -0.011422710493206978,
+   -3.6954195820726454e-05,
+   -4.347017288208008,
+   -0.000610999355558306,
+   -2.17897367477417,
+   -2.866166353225708,
+   -0.23518076539039612,
+   -0.00036125810584053397,
+   -0.01150013878941536,
+   -1.8427702188491821,
+   -0.22964701056480408,
+   -0.011748881079256535,
+   -0.00036352223833091557,
+   -2.021958827972412,
+   -0.008272194303572178,
+   -1.7123057842254639,
+   -9.325576782226562,
+   -1.3440426588058472,
+   -3.209916830062866,
+   -0.053304191678762436,
+   -5.205663681030273,
+   -0.03287550434470177,
+   -1.384042501449585,
+   -7.2653326988220215,
+   -3.6932270526885986,
+   -6.713709354400635,
+   -0.08502203971147537,
+   -3.0402512550354004,
+   -0.043377358466386795,
+   -0.00908633042126894,
+   -0.013433421961963177,
+   -1.5646146535873413,
+   -0.007355276495218277,
+   -5.929056167602539,
+   -11.379992485046387,
+   -3.6368532180786133,
+   -0.45781779289245605,
+   -0.003176526166498661,
+   -0.039530687034130096,
+   -0.0007678897818550467,
+   -0.0003935516288038343,
+   -0.0005339150666259229,
+   -3.964613437652588,
+   -0.9957391023635864,
+   -0.2830953896045685,
+   -0.2610830068588257,
+   -0.07025375217199326,
+   -0.010492399334907532,
+   -3.3028924465179443,
+   -4.77099084854126
+  ],
+  "generated_logprobs": [
+   -0.22153465449810028,
+   -0.0006783091812394559,
+   -0.010667562484741211,
+   -0.0019300420535728335,
+   -0.00015901254664640874,
+   -0.0025420039892196655,
+   -0.0038043521344661713,
+   -0.0017481058603152633,
+   -0.47604793310165405,
+   -0.0035732248798012733,
+   -1.3063528537750244,
+   -1.869868278503418,
+   -0.03979752957820892,
+   -1.2474843263626099,
+   -0.1659490168094635,
+   -0.006920535117387772,
+   -1.4199819564819336,
+   -2.666736364364624,
+   -1.218197226524353,
+   -0.017583556473255157,
+   -0.5422223806381226,
+   -0.0014107999159023166,
+   -0.0003762729174923152,
+   -0.0010189585154876113,
+   -0.03820022940635681,
+   -0.0013802058529108763,
+   -0.1537325382232666,
+   -0.0007863528444431722,
+   -0.003772999858483672,
+   -0.019278066232800484
+  ],
+  "logprobs": [
+   -2.0310330390930176,
+   -8.008150100708008,
+   -4.907264232635498,
+   -8.383085250854492,
+   -0.9039976000785828,
+   -0.005822602193802595,
+   -3.2968709468841553,
+   -0.11372647434473038,
+   -3.750115156173706,
+   -6.341870307922363,
+   -11.225410461425781,
+   -0.8311297297477722,
+   -1.9895459413528442,
+   -1.2136539220809937,
+   -0.4511846899986267,
+   -1.275371789932251,
+   -6.52569055557251,
+   -0.3268530070781708,
+   -2.488239288330078,
+   -1.1252245903015137,
+   -0.004931548144668341,
+   -1.1413307189941406,
+   -2.4036614894866943,
+   -0.593055784702301,
+   -5.775687217712402,
+   -0.7173333764076233,
+   -6.7589006423950195,
+   -4.472473621368408,
+   -0.28561243414878845,
+   -0.9266374111175537,
+   -1.2420787811279297,
+   -4.94831657409668,
+   -0.4015401303768158,
+   -2.405423879623413,
+   -6.706996440887451,
+   -2.3797435760498047,
+   -6.879988193511963,
+   -0.599727988243103,
+   -4.6161346435546875,
+   -0.016334740445017815,
+   -1.4226453304290771,
+   -4.064138412475586,
+   -8.992555618286133,
+   -0.7892558574676514,
+   -2.565383195877075,
+   -1.6011606454849243,
+   -1.1192784309387207,
+   -1.085118293762207,
+   -1.452021598815918,
+   -0.1256672590970993,
+   -4.310093879699707,
+   -0.039925139397382736,
+   -0.09540079534053802,
+   -4.4552788734436035,
+   -2.6978704929351807,
+   -0.3264457583427429,
+   -0.9057141542434692,
+   -0.2424505054950714,
+   -0.2473771721124649,
+   -0.04457908123731613,
+   -2.5994861125946045,
+   -0.5882505178451538,
+   -2.4292445182800293,
+   -0.1860235333442688,
+   -2.6841845512390137,
+   -5.8617939949035645,
+   -1.7926914691925049,
+   -0.6663980484008789,
+   -0.029983440414071083,
+   -1.0682772397994995,
+   -0.0018566290382295847,
+   -1.9571454524993896,
+   -0.08927226811647415,
+   -4.61471700668335,
+   -0.002604546956717968,
+   -0.2620302140712738,
+   -0.006101197097450495,
+   -7.435886859893799,
+   -0.0376485139131546,
+   -10.174129486083984,
+   -0.9147175550460815,
+   -4.526404857635498,
+   -3.670576572418213,
+   -4.566626071929932,
+   -1.0199782848358154,
+   -0.0006491222884505987,
+   -0.14426420629024506,
+   -0.03322957828640938,
+   -0.0019640696700662374,
+   -0.00022468426323030144,
+   -0.0013444918440654874,
+   -0.0011957883834838867,
+   -0.007926556281745434,
+   -0.011617152951657772,
+   -0.0018109364900738,
+   -0.00017581824795342982,
+   -0.0018969652010127902,
+   -6.282132380874828e-05,
+   -0.0010078833438456059,
+   -0.25652098655700684,
+   -0.35659894347190857,
+   -9.333651541965082e-05,
+   -0.7947311401367188,
+   -1.3594639301300049,
+   -7.962863310240209e-05,
+   -1.861167550086975,
+   -0.5386030673980713,
+   -0.00022075122979003936,
+   -0.001347229932434857,
+   -3.290122185717337e-05,
+   -3.7342543601989746,
+   -0.5175371170043945,
+   -4.488879680633545,
+   -0.007863753475248814,
+   -0.08534510433673859,
+   -0.0009170140838250518,
+   -2.13382354559144e-05,
+   -4.507952690124512,
+   -0.5332688689231873,
+   -0.004296358674764633,
+   -2.062299427052494e-05,
+   -5.2475104331970215,
+   -0.020387964323163033,
+   -0.1661914438009262,
+   -0.0003081085451412946,
+   -15.800027847290039,
+   -8.108964920043945,
+   -0.7285020351409912,
+   -7.803549289703369,
+   -5.010417938232422,
+   -0.263860821723938,
+   -4.3748852476710454e-05,
+   -0.013306032866239548,
+   -0.029512016102671623,
+   -0.0036468682810664177,
+   -0.00023231192608363926,
+   -0.0002379134384682402,
+   -0.0004920940846204758,
+   -0.000873065204359591,
+   -0.0029308719094842672,
+   -0.0006667536217719316,
+   -0.00013672371278516948,
+   -0.0011686407960951328,
+   -4.625213477993384e-05,
+   -0.0007901645149104297,
+   -0.027857612818479538,
+   -0.06313244253396988,
+   -0.00013064485392533243,
+   -0.2378876954317093,
+   -0.6059458255767822,
+   -5.757642793469131e-05,
+   -1.5949885845184326,
+   -1.6001688241958618,
+   -0.00032574593205936253,
+   -0.0016402851324528456,
+   -2.276871418871451e-05,
+   -3.0335943698883057,
+   -0.286937952041626,
+   -6.517683982849121,
+   -3.1465959548950195,
+   -0.7292280793190002,
+   -0.06161583960056305,
+   -0.0014851979212835431,
+   -2.777537883957848e-05,
+   -3.946831226348877,
+   -0.09084996581077576,
+   -0.003532005939632654,
+   -4.029192859889008e-05,
+   -4.555190086364746,
+   -0.011255813762545586,
+   -0.10179147869348526,
+   -0.0004140473320148885,
+   -4.4321393966674805,
+   -2.2296247482299805,
+   -3.2771155834198,
+   -8.323366165161133,
+   -0.02779245562851429,
+   -2.403028964996338,
+   -0.07431145757436752,
+   -0.5372196435928345,
+   -0.05987980589270592,
+   -0.20438668131828308,
+   -0.00013136000779923052,
+   -0.0572563000023365,
+   -0.11035308241844177,
+   -0.012903997674584389,
+   -0.0002406545972917229,
+   -0.0001517419150331989,
+   -0.00036066226311959326,
+   -0.0005477358354255557,
+   -0.00229322025552392,
+   -0.000697846058756113,
+   -0.0001161031104857102,
+   -0.001127441762946546,
+   -3.814624506048858e-05,
+   -0.0005136600811965764,
+   -0.022026309743523598,
+   -0.02361132949590683,
+   -0.0002090712368953973,
+   -0.04913746938109398,
+   -2.7477238178253174,
+   -9.202533692587167e-05,
+   -0.9271803498268127,
+   -1.3856279850006104,
+   -0.0001754606782924384,
+   -0.0012224590172991157,
+   -1.7165990357170813e-05,
+   -1.0239524841308594,
+   -0.020712625235319138,
+   -0.0451514832675457,
+   -1.5345499515533447,
+   -0.0004010588163509965,
+   -0.0004401430196594447,
+   -2.13382354559144e-05,
+   -2.5878491401672363,
+   -0.020529404282569885,
+   -0.00043501926120370626,
+   -2.682172998902388e-05,
+   -0.3827762007713318,
+   -0.00019298121333122253,
+   -0.007158228196203709,
+   -8.618460560683161e-05,
+   -6.015654563903809,
+   -4.037173271179199,
+   -3.4229695796966553,
+   -1.0183475017547607,
+   -1.4963387250900269,
+   -0.33330175280570984,
+   -1.480197787284851,
+   -2.0857536792755127,
+   -2.225975513458252,
+   -5.293066024780273,
+   -0.43916723132133484,
+   -0.00010048838157672435,
+   -0.015328695066273212,
+   -0.13567933440208435,
+   -0.012453177943825722,
+   -0.00017855956684798002,
+   -0.00012778419477399439,
+   -0.0002885640424210578,
+   -0.0004291805380489677,
+   -0.0008485292200930417,
+   -0.0006668727728538215,
+   -8.177422569133341e-05,
+   -0.001060757553204894,
+   -6.151010165922344e-05,
+   -0.0005185451591387391,
+   -0.028113562613725662,
+   -0.03407377377152443,
+   -0.0003861635341309011,
+   -1.1215460300445557,
+   -0.5561063885688782,
+   -0.0001726001501083374,
+   -2.5190887451171875,
+   -0.6141397953033447,
+   -0.0001227780303452164,
+   -0.0012188870459794998,
+   -1.6212332411669195e-05,
+   -6.833529472351074,
+   -6.0156097412109375,
+   -0.03274226188659668,
+   -0.014286145567893982,
+   -0.0009454786195419729,
+   -3.814624506048858e-05,
+   -4.910149097442627,
+   -0.009493326768279076,
+   -0.001437702914699912,
+   -5.876845170860179e-05,
+   -0.3798050582408905,
+   -0.003948037512600422,
+   -0.07855644077062607,
+   -0.00022420754248742014,
+   -6.84205436706543,
+   -0.0015236446633934975,
+   -2.645585298538208,
+   -0.9816564917564392,
+   -1.3786735534667969,
+   -0.7280330061912537,
+   -1.4040117263793945,
+   -9.035655966727063e-05,
+   -0.033023953437805176,
+   -0.3305729031562805,
+   -0.027912795543670654,
+   -0.0002892790944315493,
+   -0.00012182447244413197,
+   -0.00026901919045485556,
+   -0.0004681444843299687,
+   -0.0007345362100750208,
+   -0.0008179179858416319,
+   -0.00010549465514486656,
+   -0.0013330630026757717,
+   -5.7338023907504976e-05,
+   -0.0005571481888182461,
+   -0.013437421061098576,
+   -0.033829718828201294,
+   -0.0004694551753345877,
+   -0.28239941596984863,
+   -1.3776881694793701,
+   -0.00014256415306590497,
+   -1.4336698055267334,
+   -0.9458242654800415,
+   -0.0002739054325502366,
+   -0.0015444743912667036,
+   -2.169585604860913e-05,
+   -5.267784118652344,
+   -2.617713689804077,
+   -0.1205064058303833,
+   -0.000608854868914932,
+   -2.47952248173533e-05,
+   -6.116018772125244,
+   -0.06051409989595413,
+   -0.0021291938610374928,
+   -2.777537883957848e-05,
+   -0.5082104206085205,
+   -0.0008528171456418931,
+   -0.013313560746610165,
+   -9.381330892210826e-05,
+   -6.970278739929199,
+   -0.3628937304019928,
+   -1.40151047706604,
+   -0.8361061811447144,
+   -0.4778183400630951,
+   -2.494100570678711,
+   -0.3126090466976166,
+   -7.66262674331665,
+   -0.3505229353904724,
+   -2.1190404891967773,
+   -0.08990062028169632,
+   -8.201262971851975e-05,
+   -0.01644204556941986,
+   -0.1838725060224533,
+   -0.015538694337010384,
+   -0.00019107422849629074,
+   -7.915183232398704e-05,
+   -0.0001382732152706012,
+   -0.0002119316632160917,
+   -0.0004773192631546408,
+   -0.0004781533498317003,
+   -4.994744449504651e-05,
+   -0.0011807858245447278,
+   -3.0636318115284666e-05,
+   -0.0003046525234822184,
+   -0.0024103655014187098,
+   -0.009829924441874027,
+   -0.00022301571152638644,
+   -0.12844854593276978,
+   -1.1151821613311768,
+   -9.512448741588742e-05,
+   -1.1148451566696167,
+   -0.45424169301986694,
+   -7.128461584215984e-05,
+   -0.001427346607670188,
+   -1.2040065485052764e-05,
+   -3.9783990383148193,
+   -0.025781046599149704,
+   -0.00015496007108595222,
+   -0.003944831434637308,
+   -0.000663894519675523,
+   -3.015949550899677e-05,
+   -0.15718017518520355,
+   -0.0009197533945553005,
+   -0.0007913556764833629,
+   -1.8000440832111053e-05,
+   -0.18712174892425537,
+   -0.00016604475968051702,
+   -0.0022110319696366787,
+   -2.169585604860913e-05,
+   -0.014111850410699844,
+   -1.1920922133867862e-06,
+   -0.00984656810760498,
+   -0.5971966981887817,
+   -2.393812894821167,
+   -0.010224700905382633,
+   -0.009953508153557777,
+   -7.64102369430475e-05,
+   -0.011833352968096733,
+   -0.26886406540870667,
+   -0.023419089615345,
+   -0.00019762947340495884,
+   -6.031808152329177e-05,
+   -0.00010191874753218144,
+   -0.00015889335190877318,
+   -0.0003564914222806692,
+   -0.0004101150552742183,
+   -6.675497570540756e-05,
+   -0.0009184433147311211,
+   -3.158996332786046e-05,
+   -0.00031442465842701495,
+   -0.0027259355410933495,
+   -0.008694176562130451,
+   -0.00032658010604791343,
+   -0.289438933134079,
+   -2.1416351795196533,
+   -0.00017987063620239496,
+   -1.8434972763061523,
+   -1.624247670173645,
+   -0.00022980909852776676,
+   -0.0006792622152715921,
+   -1.0967194612021558e-05,
+   -1.281017541885376,
+   -0.01736496575176716,
+   -1.955749750137329,
+   -1.528749942779541,
+   -2.776960611343384,
+   -0.5374854803085327,
+   -0.00029345019720494747,
+   -2.539125671319198e-05,
+   -3.0065665245056152,
+   -0.0013523490633815527,
+   -0.0007908792467787862,
+   -1.4543427823809907e-05,
+   -0.23400214314460754,
+   -0.0002324311062693596,
+   -0.010042970068752766,
+   -4.088794958079234e-05,
+   -2.1034951210021973,
+   -6.140199184417725,
+   -4.464273929595947,
+   -1.9943883419036865,
+   -0.2878473103046417,
+   -0.05924016237258911,
+   -0.7345774173736572,
+   -0.011171765625476837,
+   -0.0002982171718031168,
+   -0.14330486953258514,
+   -0.0007319155265577137,
+   -0.0003812778159044683,
+   -0.002302616136148572,
+   -0.36087724566459656,
+   -0.08833581954240799,
+   -2.631582260131836,
+   -3.1771137714385986,
+   -0.11841163039207458,
+   -4.482168878894299e-05,
+   -0.014765388332307339,
+   -0.17005765438079834,
+   -0.010167589411139488,
+   -0.00010823617776622996,
+   -3.6477376852417365e-05,
+   -5.936446541454643e-05,
+   -0.00023493390472140163,
+   -0.0003688847064040601,
+   -0.000321336614433676,
+   -4.756337511935271e-05,
+   -0.000902007392141968,
+   -2.9205850296420977e-05,
+   -0.00024423000286333263,
+   -0.000964533886872232,
+   -0.00411722669377923,
+   -0.0002711643755901605,
+   -0.3081328868865967,
+   -0.4985820949077606,
+   -0.00018726025882642716,
+   -1.1391643285751343,
+   -0.27228832244873047,
+   -4.2914423829643056e-05,
+   -0.0012028133496642113,
+   -1.9311717551317997e-05,
+   -1.1735807657241821,
+   -0.07005516439676285,
+   -0.0024717275518924,
+   -8.618460560683161e-05,
+   -0.00016866691294126213,
+   -0.00044764988706447184,
+   -1.6093124941107817e-05,
+   -8.586283683776855,
+   -0.0002851079625543207,
+   -7.490447998046875,
+   -0.09369903802871704,
+   -0.004145600367337465,
+   -0.0008606782066635787,
+   -4.827859811484814e-05,
+   -0.7127438187599182,
+   -0.0003618539194576442,
+   -0.015226203016936779,
+   -6.401333666872233e-05,
+   -3.530060291290283,
+   -0.040570154786109924,
+   -0.7448150515556335,
+   -1.4005241394042969,
+   -0.5872946977615356,
+   -6.073245048522949,
+   -0.9850690364837646,
+   -1.4459205865859985,
+   -0.4346452057361603,
+   -4.452149868011475,
+   -0.3939701318740845,
+   -0.02252959832549095,
+   -9.440929716220126e-05,
+   -0.012161390855908394,
+   -0.25266116857528687,
+   -0.021285664290189743,
+   -0.00015770144818816334,
+   -9.870042413240299e-05,
+   -9.989239333663136e-05,
+   -0.005311425309628248,
+   -0.00032634177478030324,
+   -0.0007045170641504228,
+   -9.417090768693015e-05,
+   -0.001260558608919382,
+   -4.482168878894299e-05,
+   -0.0003833036171272397,
+   -0.0023484050761908293,
+   -0.011129915714263916,
+   -0.00040260792593471706,
+   -0.1819346845149994,
+   -1.1781600713729858,
+   -0.00033241944038309157,
+   -1.3525464534759521,
+   -1.2726483345031738,
+   -0.00018034738604910672,
+   -0.0009054613183252513,
+   -1.2040065485052764e-05,
+   -1.7329559326171875,
+   -0.009877022355794907,
+   -0.030561018735170364,
+   -0.9567705988883972,
+   -0.0002079985715681687,
+   -0.0003582789213396609,
+   -2.5510462364763953e-05,
+   -1.3376575708389282,
+   -0.043758541345596313,
+   -0.0005255748401395977,
+   -0.003921795636415482,
+   -3.9934315282152966e-05,
+   -0.013946342281997204,
+   -0.001447345013730228,
+   -0.09289155900478363,
+   -0.00028975578607060015,
+   -5.025714874267578,
+   -5.600637435913086,
+   -0.8190056681632996,
+   -2.0997657775878906,
+   -1.5471020936965942,
+   -0.2830793261528015,
+   -0.099715456366539,
+   -0.00015341058315243572,
+   -0.09538150578737259,
+   -0.9440865516662598,
+   -0.13964560627937317,
+   -0.0003178806509822607,
+   -0.00015531764074694365,
+   -0.00016640232934150845,
+   -0.00023398046323563904,
+   -0.00039081089198589325,
+   -0.0015487592900171876,
+   -0.00010716341057559475,
+   -0.0017987991450354457,
+   -3.838465272565372e-05,
+   -0.0006412595394067466,
+   -0.00545145571231842,
+   -0.02335585467517376,
+   -0.0004077318590134382,
+   -0.8720157146453857,
+   -0.10373511165380478,
+   -0.00014077626110520214,
+   -0.5180479884147644,
+   -0.17388182878494263,
+   -0.00015746307326480746,
+   -0.0043711354956030846,
+   -2.9801878554280847e-05,
+   -2.0693466663360596,
+   -0.007648942526429892,
+   -2.8729025871143676e-05,
+   -0.0003301552205812186,
+   -0.000542612629942596,
+   -3.2543604902457446e-05,
+   -0.27388375997543335,
+   -0.00043752157944254577,
+   -0.0005888396990485489,
+   -1.7762025890988298e-05,
+   -0.05423494055867195,
+   -7.915183232398704e-05,
+   -0.002435457892715931,
+   -1.1205610462639015e-05,
+   -0.01761529967188835,
+   -7.152555099310121e-07,
+   -0.005352570675313473,
+   -0.1280955821275711,
+   -2.3187625408172607,
+   -0.009216856211423874,
+   -0.008558499626815319,
+   -0.0001072826053132303,
+   -0.04680917039513588,
+   -0.5660229325294495,
+   -0.04951385408639908,
+   -0.0002015625941567123,
+   -5.8410845667822286e-05,
+   -9.440929716220126e-05,
+   -0.00014828535495325923,
+   -0.00037245964631438255,
+   -0.0008362610242329538,
+   -5.4596363042946905e-05,
+   -0.0010970771545544267,
+   -4.017272294731811e-05,
+   -0.0004563482361845672,
+   -0.0021864098962396383,
+   -0.012597862631082535,
+   -0.00036435641231946647,
+   -0.07823580503463745,
+   -1.1245288848876953,
+   -0.0001472126314183697,
+   -2.1236472129821777,
+   -0.25363627076148987,
+   -0.00011646069469861686,
+   -0.0010031197452917695,
+   -1.4662635294371285e-05,
+   -11.853788375854492,
+   -1.5205868482589722,
+   -0.0017375147435814142,
+   -0.00013374387344811112,
+   -7.155948638916016,
+   -3.82474422454834,
+   -1.2793458700180054,
+   -0.03748536482453346,
+   -0.005961020477116108,
+   -5.829164365422912e-05,
+   -3.1456170082092285,
+   -0.03318829461932182,
+   -0.008591356687247753,
+   -0.027652040123939514,
+   -0.00012885693286079913,
+   -1.5415722131729126,
+   -0.979039192199707,
+   -2.842726469039917,
+   -9.05957317352295,
+   -2.8234424591064453,
+   -0.8373243808746338,
+   -0.4019332230091095,
+   -0.0004048719711136073,
+   -0.03923225402832031,
+   -0.4254666864871979,
+   -0.027653662487864494,
+   -0.0003177614707965404,
+   -0.0001967951684491709,
+   -0.00020883286197204143,
+   -0.00025674383505247533,
+   -0.0008311392739415169,
+   -0.0012284121476113796,
+   -0.00010787858627736568,
+   -0.0024356956128031015,
+   -6.258291978156194e-05,
+   -0.00048565989709459245,
+   -0.0021678535267710686,
+   -0.012607751414179802,
+   -0.00023588736075907946,
+   -0.11036524921655655,
+   -0.5750182867050171,
+   -0.00017176583060063422,
+   -1.9862632751464844,
+   -1.2351702451705933,
+   -0.00037520044133998454,
+   -0.0013566347770392895,
+   -2.5152843591058627e-05,
+   -2.1086387634277344,
+   -7.917232990264893,
+   -0.05708145350217819,
+   -0.06208256632089615,
+   -0.000644237850792706,
+   -8.308542601298541e-05,
+   -5.1276655197143555,
+   -0.16815905272960663,
+   -0.0012461524456739426,
+   -5.94836674281396e-05,
+   -3.559391736984253,
+   -5.411561965942383,
+   -0.022293083369731903,
+   -0.0005644158809445798,
+   -0.017552750185132027,
+   -0.00038842763751745224,
+   -1.8479862213134766,
+   -0.004095145035535097,
+   -11.830594062805176,
+   -0.4279360771179199,
+   -3.7062158584594727,
+   -2.9457836151123047,
+   -1.9491567611694336,
+   -0.06489256024360657,
+   -0.00013660451804753393,
+   -0.012157151475548744,
+   -0.22074609994888306,
+   -0.021073833107948303,
+   -0.00021300431399140507,
+   -0.00017593742813915014,
+   -0.00023672162205912173,
+   -0.0003091811086051166,
+   -0.0014552014181390405,
+   -0.0013881819322705269,
+   -0.00015245705435518175,
+   -0.002331279218196869,
+   -5.4238757002167404e-05,
+   -0.000668659748043865,
+   -0.002430463209748268,
+   -0.016187194734811783,
+   -0.0002441108226776123,
+   -1.4263010025024414,
+   -0.30179885029792786,
+   -0.0001770101225702092,
+   -0.5045080184936523,
+   -0.07310019433498383,
+   -8.022463589441031e-05,
+   -0.002168329432606697,
+   -2.3841574147809297e-05,
+   -1.7808306217193604,
+   -0.02828705683350563,
+   -6.115249561844394e-05,
+   -0.0008904544520191848,
+   -0.0005335576133802533,
+   -3.957670196541585e-05,
+   -0.03801318258047104,
+   -0.0003077510336879641,
+   -0.0005035324720665812,
+   -2.169585604860913e-05,
+   -0.02271897904574871,
+   -3.1709168979432434e-05,
+   -0.0018041539005935192,
+   -1.8358061424805783e-05,
+   -0.005899516865611076,
+   -1.1920922133867862e-06,
+   -0.002030455507338047,
+   -0.27544423937797546,
+   -1.1146715879440308,
+   -0.012286689132452011,
+   -0.004974251613020897,
+   -6.389413465512916e-05,
+   -0.010529793798923492,
+   -0.2302529364824295,
+   -0.015527778305113316,
+   -0.00019524575327523053,
+   -6.389413465512916e-05,
+   -0.00013815402053296566,
+   -0.00018165845540352166,
+   -0.0005564333405345678,
+   -0.000959531927946955,
+   -6.151010165922344e-05,
+   -0.001416394836269319,
+   -5.531158240046352e-05,
+   -0.00035363141796551645,
+   -0.0010683787986636162,
+   -0.012577733024954796,
+   -0.00023934361524879932,
+   -0.06311207264661789,
+   -0.972044050693512,
+   -0.00019929806876461953,
+   -1.6224243640899658,
+   -0.8333836197853088,
+   -0.00016592556494288146,
+   -0.0008984343148767948,
+   -1.6927575416048057e-05,
+   -0.8844207525253296,
+   -0.023736946284770966,
+   -4.01811408996582,
+   -1.6215615272521973,
+   -0.33087965846061707,
+   -0.0035197706893086433,
+   -0.00024148885859176517,
+   -3.0874729418428615e-05,
+   -3.097301721572876,
+   -0.030017103999853134,
+   -0.0006585336523130536,
+   -1.9430925021879375e-05,
+   -0.49424058198928833,
+   -0.0001401803019689396,
+   -0.00554167665541172,
+   -1.9073304429184645e-05,
+   -0.5312279462814331,
+   -5.748266220092773,
+   -11.324613571166992,
+   -1.1340491771697998,
+   -0.16082678735256195,
+   -0.8938052654266357,
+   -3.726792335510254,
+   -0.8781039714813232,
+   -0.00017355366435367614,
+   -0.009945128113031387,
+   -0.18626560270786285,
+   -0.013042616657912731,
+   -0.00010859376925509423,
+   -7.199982064776123e-05,
+   -0.00010871296399272978,
+   -0.00017796363681554794,
+   -0.00034767304896377027,
+   -0.0006170752458274364,
+   -3.0636318115284666e-05,
+   -0.001077071763575077,
+   -4.076874756719917e-05,
+   -0.00024029705673456192,
+   -0.000982159748673439,
+   -0.02636047638952732,
+   -0.00021920185827184469,
+   -0.632880687713623,
+   -0.06617539376020432,
+   -0.00016318420239258558,
+   -0.4156720042228699,
+   -0.034620899707078934,
+   -5.6622808187967166e-05,
+   -0.0011695933062583208,
+   -1.597391747054644e-05,
+   -10.639490127563477,
+   -0.24528348445892334,
+   -0.06833283603191376,
+   -0.0033608165103942156,
+   -0.02616957761347294,
+   -0.00036054308293387294,
+   -3.099393507000059e-05,
+   -4.044595241546631,
+   -2.188387393951416,
+   -0.32720163464546204,
+   -0.00974209699779749,
+   -0.0011126763420179486,
+   -3.302042750874534e-05,
+   -0.19868847727775574,
+   -7.56950321374461e-05,
+   -0.005233398173004389,
+   -3.158996332786046e-05,
+   -1.839617371559143,
+   -0.17654305696487427,
+   -0.7875567078590393,
+   -2.1537787914276123,
+   -0.3631034195423126,
+   -0.9216613173484802,
+   -2.0036990642547607,
+   -0.09243497252464294,
+   -0.00010740180005086586,
+   -0.018314307555556297,
+   -0.208140030503273,
+   -0.01576320081949234,
+   -0.00013136000779923052,
+   -7.390703103737906e-05,
+   -0.00011264643399044871,
+   -0.00017045476124621928,
+   -0.0005171154043637216,
+   -0.0005422552349045873,
+   -3.349725011503324e-05,
+   -0.0013309201458469033,
+   -4.255681051290594e-05,
+   -0.00023767507809679955,
+   -0.001095648156479001,
+   -0.14277544617652893,
+   -0.00021371940965764225,
+   -0.00032217081752605736,
+   -0.35286909341812134,
+   -0.0002668739762157202,
+   -1.7962173223495483,
+   -0.07211553305387497,
+   -7.974783511599526e-05,
+   -0.000621959799900651,
+   -1.2874520507466514e-05,
+   -1.9048426151275635,
+   -0.022713735699653625,
+   -3.9457496313843876e-05,
+   -0.0005820487276650965,
+   -0.0002401778765488416,
+   -3.325883881188929e-05,
+   -0.02081700973212719,
+   -0.00022492263815365732,
+   -0.0003299168893136084,
+   -2.038458114839159e-05,
+   -0.008293120190501213,
+   -1.7404405298293568e-05,
+   -0.0012493670219555497,
+   -1.4424220353248529e-05,
+   -0.0041636452078819275,
+   -8.344646857949556e-07,
+   -0.0020267677027732134,
+   -0.13429519534111023,
+   -1.9221405982971191,
+   -0.0093602379783988,
+   -0.005981876514852047,
+   -5.817244164063595e-05,
+   -0.019257837906479836,
+   -0.27827900648117065,
+   -0.01921457052230835,
+   -0.0001652104256208986,
+   -8.546940807718784e-05,
+   -0.0001510267611593008,
+   -0.00016366096679121256,
+   -0.0002616301644593477,
+   -0.0005458295345306396,
+   -3.480850500636734e-05,
+   -0.0010807631770148873,
+   -3.7431014789035544e-05,
+   -0.0003626880934461951,
+   -0.0010880271438509226,
+   -0.6327179670333862,
+   -0.0002374367177253589,
+   -0.020488178357481956,
+   -0.10384052991867065,
+   -0.0001971527235582471,
+   -0.16368740797042847,
+   -0.026392173022031784,
+   -0.00012170527770649642,
+   -0.0025978884659707546,
+   -1.9430925021879375e-05,
+   -7.9701642990112305,
+   -1.6003714799880981,
+   -0.2391909956932068,
+   -0.000502817565575242,
+   -4.9232225137529895e-05,
+   -4.135532855987549,
+   -0.06158669665455818,
+   -0.00044371772673912346,
+   -3.755022044060752e-05,
+   -0.18109248578548431,
+   -0.00010883215873036534,
+   -0.006367869209498167,
+   -7.748303323751315e-05,
+   -5.440160751342773,
+   -5.081888198852539,
+   -0.19470839202404022,
+   -2.9904420375823975,
+   -2.4235076904296875,
+   -0.032352350652217865,
+   -0.00044907975825481117,
+   -0.04121795669198036,
+   -0.43260514736175537,
+   -0.04605478420853615,
+   -0.00023982033599168062,
+   -0.0003178806509822607,
+   -0.00017188502533826977,
+   -0.00022468426323030144,
+   -0.0003400462737772614,
+   -0.0010152667528018355,
+   -0.00011729506513802335,
+   -0.001335324952378869,
+   -4.8874615458771586e-05,
+   -0.001257463125512004,
+   -0.004097400698810816,
+   -0.0008996253600344062,
+   -0.0002967870968859643,
+   -0.15579743683338165,
+   -1.3731565475463867,
+   -0.00023183519078884274,
+   -2.0089190006256104,
+   -3.441042423248291,
+   -0.0006145734223537147,
+   -0.0012832987122237682,
+   -1.9550132492440753e-05,
+   -1.731110692024231,
+   -0.027068600058555603,
+   -2.8266828060150146,
+   -0.35935577750205994,
+   -0.023644626140594482,
+   -0.0005504761938937008,
+   -0.00017951308109331876,
+   -2.396077979938127e-05,
+   -2.3206820487976074,
+   -0.003744971938431263,
+   -0.000205018965061754,
+   -2.288792165927589e-05,
+   -0.08958229422569275,
+   -6.592056161025539e-05,
+   -0.0021721357479691505,
+   -3.0397906812140718e-05,
+   -4.5939412117004395,
+   -8.534799575805664,
+   -3.483549118041992,
+   -1.681600570678711,
+   -0.7201917767524719,
+   -0.530266284942627,
+   -0.7154921293258667,
+   -2.835704803466797,
+   -0.0004451475979294628,
+   -0.02453603409230709,
+   -0.31538400053977966,
+   -0.0156102878972888,
+   -0.00013124081306159496,
+   -8.892617915989831e-05,
+   -9.738924563862383e-05,
+   -0.0011036264477297664,
+   -0.00030357998912222683,
+   -0.0010406322544440627,
+   -6.0437283536884934e-05,
+   -0.0014225849881768227,
+   -3.671578815556131e-05,
+   -0.00044705410255119205,
+   -0.005232923664152622,
+   -0.0001565095444675535,
+   -0.0003033416287507862,
+   -0.18575794994831085,
+   -0.14061033725738525,
+   -0.0002706876548472792,
+   -0.5223819017410278,
+   -0.035896092653274536,
+   -5.4834770708112046e-05,
+   -0.0012011463986709714,
+   -1.6569954823353328e-05,
+   -1.681032657623291,
+   -0.011652856133878231,
+   -1.6569954823353328e-05,
+   -0.00047469791024923325,
+   -0.000256982195423916,
+   -3.361645576660521e-05,
+   -0.01372707262635231,
+   -0.00014852374442853034,
+   -0.00046695294440723956,
+   -2.288792165927589e-05,
+   -0.0034659572411328554,
+   -1.3708974620385561e-05,
+   -0.0015382850542664528,
+   -8.702239938429557e-06,
+   -0.003346678102388978,
+   -7.152555099310121e-07,
+   -0.000867467257194221,
+   -0.02539108693599701,
+   -1.0509589910507202,
+   -0.002976156771183014,
+   -0.005069141276180744,
+   -5.590759246842936e-05,
+   -0.015196850523352623,
+   -0.3093729317188263,
+   -0.02090352028608322,
+   -0.00013958434283267707,
+   -6.460934673668817e-05,
+   -8.296622399939224e-05,
+   -0.0004457433824427426,
+   -0.0005041282274760306,
+   -0.0011976935202255845,
+   -4.2914423829643056e-05,
+   -0.0011085085570812225,
+   -4.160317621426657e-05,
+   -0.0005018643569201231,
+   -0.004558410029858351,
+   -9.476689592702314e-05,
+   -0.00037269797758199275,
+   -0.11347992718219757,
+   -0.450020968914032,
+   -0.0003301552205812186,
+   -2.8804092407226562,
+   -0.15156973898410797,
+   -6.246371776796877e-05,
+   -0.000683074293192476,
+   -1.3947389561508317e-05,
+   -2.0683939456939697,
+   -0.02846144698560238,
+   -0.04469490796327591,
+   -1.889275074005127,
+   -0.0001255195093108341,
+   -0.00011228884250158444,
+   -2.4914430468925275e-05,
+   -7.980701446533203,
+   -0.39261865615844727,
+   -1.6454169750213623,
+   -0.0018256916664540768,
+   -0.0003761537664104253,
+   -2.5987286790041253e-05,
+   -0.27152737975120544,
+   -3.8742269680369645e-05,
+   -0.002314033918082714,
+   -5.364274329622276e-05,
+   -5.172288417816162,
+   -0.007181781344115734,
+   -0.8884671330451965,
+   -0.20681926608085632,
+   -1.529428243637085,
+   -2.335056781768799,
+   -0.02583100087940693,
+   -1.8960939645767212,
+   -0.257112592458725,
+   -0.1720065474510193,
+   -8.284702198579907e-05,
+   -0.011070851236581802,
+   -0.16333311796188354,
+   -0.01678428426384926,
+   -0.00010024998482549563,
+   -4.911301948595792e-05,
+   -6.41325386823155e-05,
+   -0.0003518439189065248,
+   -0.0003983181086368859,
+   -0.0007211944903247058,
+   -2.253030106658116e-05,
+   -0.0009076051646843553,
+   -2.884823152271565e-05,
+   -0.00033682872890494764,
+   -0.01127432007342577,
+   -5.113947918289341e-05,
+   -0.0003095386200584471,
+   -0.162703275680542,
+   -0.12824533879756927,
+   -0.0002037079248111695,
+   -0.5378345251083374,
+   -0.013359789736568928,
+   -4.625213477993384e-05,
+   -0.0007819455349817872,
+   -1.2993727978027891e-05,
+   -1.4531102180480957,
+   -0.9376159310340881,
+   -0.02013481967151165,
+   -3.182837463100441e-05,
+   -0.00028391621890477836,
+   -0.0002040654799202457,
+   -1.6212332411669195e-05,
+   -6.006290435791016,
+   -0.23482508957386017,
+   -0.0003094194398727268,
+   -3.2066785934148356e-05,
+   -0.2894707918167114,
+   -0.00010334911348763853,
+   -0.003178308717906475,
+   -4.8397800128441304e-05,
+   -3.3541419506073,
+   -5.274465084075928,
+   -2.3055055141448975,
+   -1.0987294912338257,
+   -0.019666209816932678,
+   -0.00022790218645241112,
+   -0.016233760863542557,
+   -0.2816391885280609,
+   -0.028503969311714172,
+   -0.0001358893496217206,
+   -0.00010394509445177391,
+   -8.856858039507642e-05,
+   -0.00036137725692242384,
+   -0.00029452278977259994,
+   -0.0008922410197556019,
+   -2.539125671319198e-05,
+   -0.0011102947173640132,
+   -3.40932747349143e-05,
+   -0.0004843492351938039,
+   -0.006350101437419653,
+   -5.9602869441732764e-05,
+   -0.0002796259068418294,
+   -0.3986394703388214,
+   -0.10029242187738419,
+   -0.00024196557933464646,
+   -1.9691603183746338,
+   -0.7402586936950684,
+   -7.056941103655845e-05,
+   -0.0003618539194576442,
+   -1.0371154530730564e-05,
+   -1.4170231819152832,
+   -0.008172051049768925,
+   -1.3708974620385561e-05,
+   -0.00041607304592616856,
+   -0.00014888131408952177,
+   -2.6464111215318553e-05,
+   -0.018121162429451942,
+   -0.00010764019680209458,
+   -0.0002335037279408425,
+   -2.3007127310847864e-05,
+   -0.002049014437943697,
+   -1.0609570381348021e-05,
+   -0.0011868583969771862,
+   -7.867782187531702e-06,
+   -0.0018794744974002242,
+   -5.960462772236497e-07,
+   -0.0007434703293256462,
+   -0.02911354973912239,
+   -1.7920753955841064,
+   -0.0026135831139981747,
+   -0.00308870617300272,
+   -3.659658250398934e-05,
+   -0.010810147039592266,
+   -0.20098412036895752,
+   -0.01644638366997242,
+   -0.00013207517622504383,
+   -6.854299135738984e-05,
+   -7.152301259338856e-05,
+   -0.00024720950750634074,
+   -0.00033468366018496454,
+   -0.0010001424234360456,
+   -5.054346183896996e-05,
+   -0.0009557208395563066,
+   -3.981510963058099e-05,
+   -0.0004465774691198021,
+   -0.011578621342778206,
+   -7.211902266135439e-05,
+   -0.0002416080387774855,
+   -0.09539440274238586,
+   -0.057392168790102005,
+   -0.0002840353990904987,
+   -0.21088920533657074,
+   -0.0078902468085289,
+   -8.606540359323844e-05,
+   -0.0007384672062471509,
+   -1.3589766240329482e-05,
+   -0.8148440718650818,
+   -0.025661379098892212,
+   -2.113894462585449,
+   -0.01820814050734043,
+   -0.0010720703285187483,
+   -0.0002908283786382526,
+   -0.00011181206355104223,
+   -1.9550132492440753e-05,
+   -1.9963352680206299,
+   -0.011685965582728386,
+   -0.00010299152199877426,
+   -1.6093124941107817e-05,
+   -0.3427979350090027,
+   -0.00010358751023886725,
+   -0.002419165801256895,
+   -5.07818695041351e-05,
+   -9.356146812438965,
+   -2.63590145111084,
+   -0.0489899143576622,
+   -0.429649293422699,
+   -2.441277027130127,
+   -0.09116854518651962,
+   -1.7202471494674683,
+   -1.2776923179626465,
+   -1.2828468084335327,
+   -0.1033272072672844,
+   -0.013413426466286182,
+   -0.00016091958968900144,
+   -0.006314327474683523,
+   -0.1650361269712448,
+   -0.009155434556305408,
+   -8.630380034446716e-05,
+   -6.007967749610543e-05,
+   -6.210611172718927e-05,
+   -0.00027497802511788905,
+   -0.0005628670332953334,
+   -0.0008046964649111032,
+   -4.160317621426657e-05,
+   -0.0009633429581299424,
+   -2.9444261599564925e-05,
+   -0.0003147821989841759,
+   -0.003070523263886571,
+   -3.969590397900902e-05,
+   -0.00025340684805996716,
+   -0.16765674948692322,
+   -0.220333993434906,
+   -0.00025281094713136554,
+   -1.6686129570007324,
+   -0.08651255071163177,
+   -7.4741430580616e-05,
+   -0.00032062159152701497,
+   -9.536697689327411e-06,
+   -8.607754707336426,
+   -2.7989468574523926,
+   -0.006830438040196896,
+   -0.00042500998824834824,
+   -4.410646579344757e-05,
+   -2.2325727939605713,
+   -0.09642884135246277,
+   -0.0005049622268415987,
+   -1.4662635294371285e-05,
+   -3.892613172531128,
+   -0.0008376903715543449,
+   -0.004279621876776218,
+   -5.745722592109814e-05,
+   -2.696786642074585,
+   -0.44925373792648315,
+   -0.37875908613204956,
+   -0.27114248275756836,
+   -1.023728609085083,
+   -4.712882995605469,
+   -1.415423035621643,
+   -2.8054561614990234,
+   -0.4460236430168152,
+   -0.0005779979983344674,
+   -0.02468189038336277,
+   -0.30965328216552734,
+   -0.02052520029246807,
+   -0.00012730741582345217,
+   -9.619726915843785e-05,
+   -8.749579137656838e-05,
+   -0.000350175570929423,
+   -0.0003150205302517861,
+   -0.0007310817018151283,
+   -3.0636318115284666e-05,
+   -0.0011643542675301433,
+   -3.2305197237292305e-05,
+   -0.00026913834153674543,
+   -0.011463016271591187,
+   -5.411955135059543e-05,
+   -0.00023231192608363926,
+   -0.1063343733549118,
+   -0.037034809589385986,
+   -0.0001248043408850208,
+   -0.3663400411605835,
+   -0.01425135973840952,
+   -5.376194530981593e-05,
+   -0.000933926145080477,
+   -1.4305012882687151e-05,
+   -1.5244930982589722,
+   -0.008558854460716248,
+   -1.8358061424805783e-05,
+   -0.0002698534226510674,
+   -0.00022075122979003936,
+   -3.576214658096433e-05,
+   -0.01590365171432495,
+   -0.00012706902634818107,
+   -0.0002901133266277611,
+   -2.2649508537142538e-05,
+   -0.0032194233499467373,
+   -1.1920858014491387e-05,
+   -0.0013312773080542684,
+   -8.22540732769994e-06,
+   -0.001732040662318468,
+   -4.768370445162873e-07,
+   -0.0007115454645827413,
+   -0.11607333272695541,
+   -5.158000946044922,
+   -0.00630958890542388,
+   -0.006455875933170319,
+   -3.886147169396281e-05,
+   -0.007113605737686157,
+   -0.16176439821720123,
+   -0.01025608740746975,
+   -9.321732068201527e-05,
+   -5.435795901576057e-05,
+   -7.70062324590981e-05,
+   -0.0002002515539061278,
+   -0.0003270567976869643,
+   -0.0011002921964973211,
+   -3.93382906622719e-05,
+   -0.0009735850035212934,
+   -4.076874756719917e-05,
+   -0.00036042393185198307,
+   -0.011448992416262627,
+   -0.00010787858627736568,
+   -0.00022289653134066612,
+   -0.12719827890396118,
+   -0.16689445078372955,
+   -0.00029869386344216764,
+   -1.129071831703186,
+   -0.46998509764671326,
+   -0.0001429217227268964,
+   -0.0004334702098276466,
+   -1.823885577323381e-05,
+   -7.808990478515625,
+   -0.6958405375480652,
+   -0.0011538759572431445,
+   -0.00010084597306558862,
+   -2.1815061700181104e-05,
+   -3.412889242172241,
+   -0.0024302254896610975,
+   -0.1256120651960373,
+   -0.0001486429391661659,
+   -2.932505594799295e-05,
+   -0.016119161620736122,
+   -2.1219027985353023e-05,
+   -0.0014936492079868913,
+   -6.794906312279636e-06,
+   -4.649867057800293,
+   -0.42487168312072754,
+   -1.3419163227081299,
+   -0.3015914857387543,
+   -0.00015341058315243572,
+   -0.0032649326603859663,
+   -0.11564143747091293,
+   -0.00739337969571352,
+   -5.8887653722194955e-05,
+   -6.615896563744172e-05,
+   -5.972207145532593e-05,
+   -0.00020644917094614357,
+   -0.000301673193462193,
+   -0.0003761537664104253,
+   -2.6702524337451905e-05,
+   -0.0008094609947875142,
+   -3.2305197237292305e-05,
+   -0.0002474478678777814,
+   -0.018454870209097862,
+   -7.73638384998776e-05,
+   -0.00022837892174720764,
+   -0.04869883507490158,
+   -0.02372216247022152,
+   -0.0002051381452474743,
+   -0.15266406536102295,
+   -0.0037327392492443323,
+   -7.557583012385294e-05,
+   -0.0005665604257956147,
+   -1.4662635294371285e-05,
+   -2.1065256595611572,
+   -0.02570541389286518,
+   -2.0099081993103027,
+   -2.7118430137634277,
+   -0.1484161764383316,
+   -0.007964756339788437,
+   -0.00016342257731594145,
+   -1.597391747054644e-05,
+   -0.8920754194259644,
+   -0.0009690594743005931,
+   -0.00029023250681348145,
+   -1.2993727978027891e-05,
+   -0.07993864268064499,
+   -5.400034933700226e-05,
+   -0.00158791767898947,
+   -1.0609570381348021e-05,
+   -4.331461429595947,
+   -6.81968355178833,
+   -3.366002082824707,
+   -1.850673794746399,
+   -0.00040391870425082743,
+   -0.04611193388700485,
+   -0.06791424006223679,
+   -0.004945189692080021,
+   -9.107174992095679e-05,
+   -7.557583012385294e-05,
+   -6.747018051100895e-05,
+   -0.00024399164249189198,
+   -0.000321336614433676,
+   -0.0006528153317049146,
+   -3.2782016205601394e-05,
+   -0.0012151960982009768,
+   -3.957670196541585e-05,
+   -0.0002205128694185987,
+   -0.016214992851018906,
+   -0.00019095504831057042,
+   -0.0001456631434848532,
+   -7.712543447269127e-05,
+   -0.33043625950813293,
+   -0.00017629499780014157,
+   -2.590480089187622,
+   -0.16181793808937073,
+   -0.00011646069469861686,
+   -0.0006735440110787749,
+   -2.109982233378105e-05,
+   -1.6486821174621582,
+   -0.01151864044368267,
+   -1.8954096958623268e-05,
+   -0.0003233625029679388,
+   -0.00020644917094614357,
+   -3.111314072157256e-05,
+   -0.017416512593626976,
+   -0.00012766500003635883,
+   -0.0003415954706724733,
+   -2.13382354559144e-05,
+   -0.006446637213230133,
+   -1.823885577323381e-05,
+   -0.0012438902631402016,
+   -1.1205610462639015e-05,
+   -0.006591127719730139,
+   -7.152555099310121e-07,
+   -0.0017049076268449426,
+   -0.13135236501693726,
+   -3.228759288787842,
+   -0.002643782878294587,
+   -0.004842340014874935,
+   -3.480850500636734e-05,
+   -0.010503842495381832,
+   -0.16338221728801727,
+   -0.011769498698413372,
+   -0.00011574551899684593,
+   -9.727005090098828e-05,
+   -8.582700684200972e-05,
+   -0.0004538459761533886,
+   -0.00020740265608765185,
+   -0.001342587056569755,
+   -8.964136941358447e-05,
+   -0.0014018717920407653,
+   -4.935142715112306e-05,
+   -0.0006431656656786799,
+   -0.5765135288238525,
+   -0.0009291622554883361,
+   -0.00027998341829515994,
+   -0.008964410983026028,
+   -0.03303813934326172,
+   -0.00018451895448379219,
+   -0.07687719166278839,
+   -0.00454594986513257,
+   -0.00018439977429807186,
+   -0.0023830130230635405,
+   -2.706014311115723e-05,
+   -1.8103313446044922,
+   -0.7522969245910645,
+   -0.022507335990667343,
+   -2.074220174108632e-05,
+   -0.00026222606538794935,
+   -0.00020740265608765185,
+   -2.706014311115723e-05,
+   -3.700786590576172,
+   -0.26737019419670105,
+   -9.357491217087954e-05,
+   -6.031808152329177e-05,
+   -0.13705354928970337,
+   -2.407998726994265e-05,
+   -0.003684044349938631,
+   -3.2782016205601394e-05,
+   -2.9476141929626465,
+   -1.1526018381118774,
+   -2.6757259368896484,
+   -5.31315279006958,
+   -0.7695194482803345,
+   -0.00014876213390380144,
+   -0.8328413963317871,
+   -5.100983142852783,
+   -0.1275785118341446,
+   -0.008235306479036808,
+   -0.00037281715776771307,
+   -0.02394961006939411,
+   -0.5179875493049622,
+   -0.04619366303086281,
+   -0.00021705655672121793,
+   -0.00021765247220173478,
+   -0.0001461399078834802,
+   -0.0007413261337205768,
+   -0.0006660388899035752,
+   -0.0015581621555611491,
+   -6.8662193370983e-05,
+   -0.002233869396150112,
+   -4.494089080253616e-05,
+   -0.0006101653561927378,
+   -0.0006289887824095786,
+   -0.0033358661457896233,
+   -0.00045074793160893023,
+   -0.15180595219135284,
+   -0.07985830307006836,
+   -0.00015937011630740017,
+   -2.2477855682373047,
+   -0.4471043348312378,
+   -0.0001734344696160406,
+   -0.0006040894077159464,
+   -1.680836794548668e-05,
+   -2.318458080291748,
+   -0.01888836920261383,
+   -0.029085876420140266,
+   -1.1253407001495361,
+   -0.00021741411183029413,
+   -0.00012003655137959868,
+   -2.8013790142722428e-05,
+   -3.1507949829101562,
+   -0.005721264518797398,
+   -0.00040904260822571814,
+   -1.7881233361549675e-05,
+   -0.04304421693086624,
+   -0.0001591317413840443,
+   -0.005429995711892843,
+   -3.242440288886428e-05,
+   -4.896542549133301,
+   -3.2877321243286133,
+   -0.17550288140773773,
+   -8.526089668273926,
+   -0.2559642493724823,
+   -0.00015770144818816334,
+   -0.004955509677529335,
+   -0.20714037120342255,
+   -0.023553114384412766,
+   -0.00015496007108595222,
+   -0.0001134808044298552,
+   -9.250213042832911e-05,
+   -0.000288087350782007,
+   -0.0004409771354403347,
+   -0.0007110689766705036,
+   -4.6132929128361866e-05,
+   -0.0009153467253781855,
+   -3.433168603805825e-05,
+   -0.00015484087634831667,
+   -0.0001292145170737058,
+   -0.0022287548054009676,
+   -0.0002269487304147333,
+   -0.11395295709371567,
+   -0.05913611873984337,
+   -8.356221951544285e-05,
+   -0.4039720594882965,
+   -0.019538793712854385,
+   -5.924526340095326e-05,
+   -0.0007176207727752626,
+   -1.7881233361549675e-05,
+   -1.6992816925048828,
+   -0.004352619871497154,
+   -6.6756979322235566e-06,
+   -0.00017093151109293103,
+   -0.0001284993631998077,
+   -3.3378044463461265e-05,
+   -0.013412484899163246,
+   -8.713819261174649e-05,
+   -0.0004928089329041541,
+   -2.288792165927589e-05,
+   -0.0012643685331568122,
+   -1.3351351299206726e-05,
+   -0.0019104102393612266,
+   -8.940656698541716e-06,
+   -0.0033124599140137434,
+   -4.768370445162873e-07,
+   -0.0009848987683653831,
+   -0.07256874442100525,
+   -1.7665941715240479,
+   -0.00281461956910789,
+   -0.0027610058896243572,
+   -2.9682672902708873e-05,
+   -0.0075036585330963135,
+   -0.16648568212985992,
+   -0.014109030365943909,
+   -9.63164638960734e-05,
+   -6.603976362384856e-05,
+   -7.331102824537084e-05,
+   -0.0003323002893012017,
+   -0.00042083943844772875,
+   -0.0010620674584060907,
+   -2.8609820219571702e-05,
+   -0.000990257947705686,
+   -4.029192859889008e-05,
+   -0.0001541257370263338,
+   -0.0001658063702052459,
+   -0.0010433712741360068,
+   -0.0002379134384682402,
+   -0.08282912522554398,
+   -0.1620505303144455,
+   -0.0001578206429257989,
+   -1.9873682260513306,
+   -0.03700195625424385,
+   -8.594620157964528e-05,
+   -0.00035232058144174516,
+   -2.90866428258596e-05,
+   -1.0645859241485596,
+   -0.012771833688020706,
+   -1.8788448572158813,
+   -0.04745874181389809,
+   -0.0029150634072721004,
+   -0.0002858230145648122,
+   -8.082063141046092e-05,
+   -2.8729025871143676e-05,
+   -4.2793378829956055,
+   -0.008196880109608173,
+   -9.822363062994555e-05,
+   -4.9470632802695036e-05,
+   -5.399019241333008,
+   -0.0015862513100728393,
+   -0.0018035589018836617,
+   -2.9444261599564925e-05,
+   -3.8089842796325684,
+   -1.3950530290603638,
+   -0.17507919669151306,
+   -4.1786346435546875,
+   -9.410017013549805,
+   -0.00014709345123264939,
+   -2.16685152053833,
+   -0.5008745193481445,
+   -0.013433892279863358,
+   -0.00029976642690598965,
+   -0.006172403693199158,
+   -0.22438427805900574,
+   -0.015963135287165642,
+   -0.00010489867418073118,
+   -7.426462980220094e-05,
+   -6.890059739816934e-05,
+   -0.0002874914789572358,
+   -0.0004033228906337172,
+   -0.0006624649395234883,
+   -3.802703940891661e-05,
+   -0.001104817260056734,
+   -2.8967437174287625e-05,
+   -0.000125281119835563,
+   -0.00011634149996098131,
+   -0.0016071987338364124,
+   -0.0001752223033690825,
+   -0.04927569255232811,
+   -0.03999283164739609,
+   -8.427741704508662e-05,
+   -0.11036300659179688,
+   -0.0022922686766833067,
+   -5.125868119648658e-05,
+   -0.0007711059297434986,
+   -1.6569954823353328e-05,
+   -1.1996040344238281,
+   -6.017496585845947,
+   -3.3771719932556152,
+   -0.0015197168104350567,
+   -0.0001720042055239901,
+   -8.05822346592322e-05,
+   -1.9701510667800903,
+   -0.015215284191071987,
+   -0.00046957432641647756,
+   -4.5536911784438416e-05,
+   -0.3501690626144409,
+   -6.508615479106084e-05,
+   -0.013412720523774624,
+   -0.0002317160106031224,
+   -10.721491813659668,
+   -0.001794158248230815,
+   -5.900764465332031,
+   -0.05698608234524727,
+   -1.9666205644607544,
+   -0.34450024366378784,
+   -0.24932177364826202,
+   -1.1890842914581299,
+   -0.9316995143890381,
+   -0.5700393915176392,
+   -0.18522746860980988,
+   -0.08411185443401337,
+   -0.00032610344351269305,
+   -0.016760369762778282,
+   -0.310769647359848,
+   -0.04111167788505554,
+   -0.00015889335190877318,
+   -0.00011395759065635502,
+   -0.00010418349120300263,
+   -0.0003389737685211003,
+   -0.0006182666402310133,
+   -0.001039679627865553,
+   -6.770858453819528e-05,
+   -0.001258891774341464,
+   -5.876845170860179e-05,
+   -0.0003499372396618128,
+   -0.00027724236133508384,
+   -0.0029526231810450554,
+   -0.0003165697562508285,
+   -0.25983527302742004,
+   -0.031029406934976578,
+   -0.00018880968855228275,
+   -0.7229459881782532,
+   -0.42579957842826843,
+   -0.00011705666838679463,
+   -0.00047195740626193583,
+   -2.3364747903542593e-05,
+   -0.9790778160095215,
+   -0.0029993331991136074,
+   -5.125986263010418e-06,
+   -0.00018690270371735096,
+   -0.00016091958968900144,
+   -3.755022044060752e-05,
+   -0.00900670699775219,
+   -8.642300235806033e-05,
+   -0.0004804172203876078,
+   -3.838465272565372e-05,
+   -0.0015756584471091628,
+   -1.168244216387393e-05,
+   -0.001709667849354446,
+   -1.0013530300057027e-05,
+   -0.0022142434027045965,
+   -5.960462772236497e-07,
+   -0.0006964165368117392,
+   -0.05425402522087097,
+   -1.5528278350830078,
+   -0.002721655648201704,
+   -0.003402280155569315,
+   -3.6477376852417365e-05,
+   -0.007222968153655529,
+   -0.14785511791706085,
+   -0.013813492842018604,
+   -0.00012063252506777644,
+   -9.738924563862383e-05,
+   -9.881961887003854e-05,
+   -0.00025900822947733104,
+   -0.00028236693469807506,
+   -0.0010882653295993805,
+   -4.446407547220588e-05,
+   -0.0008232779800891876,
+   -4.7801782784517854e-05,
+   -0.0001911934232339263,
+   -0.00020382710499688983,
+   -0.0037347583565860987,
+   -0.00023493390472140163,
+   -0.016995148733258247,
+   -0.028428077697753906,
+   -0.00015054999676067382,
+   -0.05958176776766777,
+   -0.0022499265614897013,
+   -8.928377064876258e-05,
+   -0.0007566926069557667,
+   -2.038458114839159e-05,
+   -6.74626350402832,
+   -4.031385898590088,
+   -0.010314728133380413,
+   -0.0005830018781125546,
+   -0.00016175392374861985,
+   -4.279521817807108e-05,
+   -4.910806655883789,
+   -0.3867932856082916,
+   -0.00020466140995267779,
+   -2.455681169521995e-05,
+   -0.40993309020996094,
+   -3.075552376685664e-05,
+   -0.002136925933882594,
+   -1.5258672647178173e-05,
+   -1.4743690490722656,
+   -0.466409295797348,
+   -2.986236095428467,
+   -0.5145793557167053,
+   -0.3861558437347412,
+   -0.00023648326168768108,
+   -0.060666244477033615,
+   -0.0004374024283606559,
+   -0.0032959445379674435,
+   -0.003968104254454374,
+   -0.0018072477541863918,
+   -4.768258077092469e-05,
+   -0.9783220291137695,
+   -1.0383716821670532,
+   -0.6705473065376282,
+   -2.172899007797241,
+   -0.1931028664112091,
+   -0.05653104931116104,
+   -0.0004231034545227885,
+   -0.009201028384268284,
+   -0.20085793733596802,
+   -0.015902360901236534,
+   -0.00013207517622504383,
+   -0.00011634149996098131,
+   -9.154854342341423e-05,
+   -0.0002989322238136083,
+   -0.000276765669696033,
+   -0.0008761619683355093,
+   -5.4596363042946905e-05,
+   -0.0012877037515863776,
+   -5.245071224635467e-05,
+   -0.00014399446081370115,
+   -0.00014304091746453196,
+   -0.002012848388403654,
+   -0.00026043839170597494,
+   -0.050352130085229874,
+   -0.016213351860642433,
+   -0.00014923889830242842,
+   -1.3270337581634521,
+   -0.017757130786776543,
+   -8.725739462533966e-05,
+   -0.0003123987407889217,
+   -2.3364747903542593e-05,
+   -1.770219087600708,
+   -0.027282992377877235,
+   -1.7292673587799072,
+   -1.5430668592453003,
+   -0.09708311408758163,
+   -0.06372363120317459,
+   -0.00020180096908006817,
+   -4.756337511935271e-05,
+   -6.762560844421387,
+   -0.11426064372062683,
+   -0.0006945105269551277,
+   -5.745722592109814e-05,
+   -0.23964034020900726,
+   -7.080780778778717e-05,
+   -0.0019281383138149977,
+   -0.00011657988943625242,
+   -1.6634957790374756,
+   -3.133596420288086,
+   -1.06369948387146,
+   -0.20282019674777985,
+   -0.440325528383255,
+   -2.2919445037841797,
+   -2.6773011684417725,
+   -2.4511003494262695,
+   -2.022627353668213,
+   -0.7157211899757385,
+   -0.00033623288618400693,
+   -0.006556428037583828,
+   -0.18528789281845093,
+   -0.010350123979151249,
+   -9.691245941212401e-05,
+   -9.941560711013153e-05,
+   -0.0001062098381225951,
+   -0.0002244459028588608,
+   -0.0003002431185450405,
+   -0.0003911683743353933,
+   -3.158996332786046e-05,
+   -0.0008713977294974029,
+   -4.875540980719961e-05,
+   -9.083335316972807e-05,
+   -0.00013422065239865333,
+   -0.0032467530108988285,
+   -0.0002611534437164664,
+   -0.011103743687272072,
+   -0.014522447250783443,
+   -0.0001003691868390888,
+   -0.04763209819793701,
+   -0.0015930355293676257,
+   -8.880697714630514e-05,
+   -0.0006610354175791144,
+   -2.062299427052494e-05,
+   -1.4736919403076172,
+   -0.0015160269103944302,
+   -5.722029527532868e-06,
+   -0.0001426833332516253,
+   -0.00025138078490272164,
+   -4.303362584323622e-05,
+   -0.006412051152437925,
+   -8.177422569133341e-05,
+   -0.0003953390696551651,
+   -4.51792984677013e-05,
+   -0.0015100754098966718,
+   -1.0847986231965479e-05,
+   -0.0021766559220850468,
+   -1.3112935448589269e-05,
+   -0.0017056216020137072,
+   -5.960462772236497e-07,
+   -0.00045658653834834695,
+   -0.03380563110113144,
+   -1.6861530542373657,
+   -0.0011235122801735997,
+   -0.0027228444814682007,
+   -3.2543604902457446e-05,
+   -0.0028300732374191284,
+   -0.04190889745950699,
+   -0.006303310859948397,
+   -0.00010799778101500124,
+   -7.295342220459133e-05,
+   -6.90197994117625e-05,
+   -0.0002094287920044735,
+   -0.00017915551143232733,
+   -0.0007649118197150528,
+   -3.3854863431770355e-05,
+   -0.0009750141180120409,
+   -5.185469490243122e-05,
+   -0.0001230164198204875,
+   -0.00015221867943182588,
+   -0.00366337806917727,
+   -0.00027378625236451626,
+   -0.00873471051454544,
+   -0.014125015586614609,
+   -0.00013779645087197423,
+   -0.2786974012851715,
+   -0.0429004468023777,
+   -0.00015221867943182588,
+   -0.0005259322933852673,
+   -2.0861407392658293e-05,
+   -7.4979376792907715,
+   -2.5812153816223145,
+   -0.0006475735572166741,
+   -0.00032395837479270995,
+   -4.3987260141875595e-05,
+   -0.38662397861480713,
+   -0.07727815210819244,
+   -0.0005353448214009404,
+   -6.210611172718927e-05,
+   -0.10053620487451553,
+   -4.51792984677013e-05,
+   -0.004477594513446093,
+   -3.0397906812140718e-05,
+   -8.758296012878418,
+   -0.4402102530002594,
+   -0.2472418248653412,
+   -0.5627955794334412,
+   -0.042171675711870193,
+   -0.03491748869419098,
+   -5.941390514373779,
+   -0.004192491993308067,
+   -0.11302625387907028,
+   -0.5369495153427124,
+   -0.0003328961320221424,
+   -0.0049365307204425335,
+   -0.057854458689689636,
+   -0.007558793295174837,
+   -8.916457591112703e-05,
+   -9.047575440490618e-05,
+   -8.141662692651153e-05,
+   -0.0006507901125587523,
+   -0.00019464982324279845,
+   -0.0006775943911634386,
+   -2.3364747903542593e-05,
+   -0.0012484145117923617,
+   -5.447716102935374e-05,
+   -0.00016425691137555987,
+   -0.00019727191829588264,
+   -0.012608221732079983,
+   -0.00020859450160060078,
+   -0.014227267354726791,
+   -0.00964115560054779,
+   -0.00013350549852475524,
+   -0.03465360403060913,
+   -0.0008008848526515067,
+   -0.00010239553375868127,
+   -0.0007454953738488257,
+   -2.0861407392658293e-05,
+   -2.182055950164795,
+   -0.030151404440402985,
+   -2.2387242317199707,
+   -4.8748321533203125,
+   -0.07910432666540146,
+   -0.0014863882679492235,
+   -0.00028081765049137175,
+   -6.55629628454335e-05,
+   -3.332869052886963,
+   -4.393488883972168,
+   -0.1467350423336029,
+   -0.0036104037426412106,
+   -0.0003040566807612777,
+   -0.00010895135346800089,
+   -0.2704607844352722,
+   -3.6477376852417365e-05,
+   -0.002591705648228526,
+   -2.9682672902708873e-05,
+   -4.947231292724609,
+   -3.2159130573272705,
+   -0.8367561101913452,
+   -0.5556290149688721,
+   -0.0002233732520835474,
+   -0.0060651772655546665,
+   -0.05365833640098572,
+   -0.0071886456571519375,
+   -9.63164638960734e-05,
+   -0.00010072677832795307,
+   -9.858122211880982e-05,
+   -0.0003960540343541652,
+   -0.0006039702566340566,
+   -0.0006522196927107871,
+   -1.811964830267243e-05,
+   -0.001042775809764862,
+   -3.790783375734463e-05,
+   -0.00011514954530866817,
+   -0.0001652104256208986,
+   -0.05494809150695801,
+   -0.00014506718434859067,
+   -0.00021050144277978688,
+   -0.014802505262196064,
+   -0.00017915551143232733,
+   -1.7102066278457642,
+   -0.02825750596821308,
+   -0.00011300401820335537,
+   -0.0003519630990922451,
+   -3.075552376685664e-05,
+   -0.554995596408844,
+   -0.0013822296168655157,
+   -4.6491513785440475e-06,
+   -0.00014482879487331957,
+   -0.00019810620869975537,
+   -3.504691630951129e-05,
+   -0.006834581959992647,
+   -6.389413465512916e-05,
+   -0.0004396664153318852,
+   -4.60137271147687e-05,
+   -0.0012897277483716607,
+   -1.1920858014491387e-05,
+   -0.001943962532095611,
+   -1.4424220353248529e-05,
+   -0.0016702761640772223,
+   -5.960462772236497e-07,
+   -0.0005274811992421746,
+   -0.043414343148469925,
+   -1.5102243423461914,
+   -0.0018298563081771135,
+   -0.0035949621815234423,
+   -6.842378934379667e-05,
+   -0.008245711214840412,
+   -0.08723266422748566,
+   -0.00939271505922079,
+   -0.00011419598013162613,
+   -0.0001230164198204875,
+   -9.464769391342998e-05,
+   -0.0002865380665753037,
+   -0.0005069877952337265,
+   -0.001016934053041041,
+   -3.2305197237292305e-05,
+   -0.0009629856795072556,
+   -4.827859811484814e-05,
+   -0.00021717573690693825,
+   -0.00032848684350028634,
+   -0.012733934447169304,
+   -0.000196556793525815,
+   -0.0012980615720152855,
+   -0.0077531603164970875,
+   -0.00012385078298393637,
+   -0.01761084794998169,
+   -0.0013621109537780285,
+   -0.00011848701251437888,
+   -0.0013394916895776987,
+   -2.407998726994265e-05,
+   -4.505744934082031,
+   -1.2715730667114258,
+   -0.0005052005290053785,
+   -0.00024971229140646756,
+   -3.635817120084539e-05,
+   -4.3336405754089355,
+   -0.0815289318561554,
+   -0.028655847534537315,
+   -0.00010430268594063818,
+   -7.343022298300639e-05,
+   -0.158114492893219,
+   -1.764281842042692e-05,
+   -0.003166425507515669,
+   -5.960446742392378e-06,
+   -4.626138687133789,
+   -0.5413240194320679,
+   -11.11661148071289,
+   -6.66420316696167,
+   -0.5860735177993774,
+   -1.0599334239959717,
+   -2.200112819671631,
+   -0.4268365502357483,
+   -0.027302712202072144,
+   -0.15124760568141937,
+   -0.12854908406734467,
+   -3.041227102279663,
+   -0.026920655742287636,
+   -0.0003856868715956807,
+   -0.004746242426335812,
+   -0.07085907459259033,
+   -0.008411810733377934,
+   -0.00010823617776622996,
+   -5.972207145532593e-05,
+   -5.507317473529838e-05,
+   -0.00023850933939684182,
+   -0.0004319211875554174,
+   -0.0008380476501770318,
+   -1.823885577323381e-05,
+   -0.0009161804337054491,
+   -3.683499380713329e-05,
+   -0.00010918975021922961,
+   -0.00016044282529037446,
+   -0.0005364171229302883,
+   -0.0001248043408850208,
+   -0.10185468196868896,
+   -0.02194770984351635,
+   -0.00011252723925281316,
+   -0.6942679286003113,
+   -0.21981695294380188,
+   -6.496695277746767e-05,
+   -0.00030393750057555735,
+   -2.13382354559144e-05,
+   -3.1545064449310303,
+   -0.021652380004525185,
+   -0.02087036333978176,
+   -0.89057856798172,
+   -9.619726915843785e-05,
+   -8.129743218887597e-05,
+   -2.5152843591058627e-05,
+   -4.086198806762695,
+   -1.0591976642608643,
+   -0.0020325970835983753,
+   -4.1483970562694594e-05,
+   -0.596172571182251,
+   -3.242440288886428e-05,
+   -0.0019346822518855333,
+   -1.6927575416048057e-05,
+   -3.4360618591308594,
+   -2.4312753677368164,
+   -1.9711253643035889,
+   -4.358899116516113,
+   -10.540913581848145,
+   -5.990867614746094,
+   -0.266180157661438,
+   -0.000266278104390949,
+   -0.003696990432217717,
+   -0.03691418468952179,
+   -0.005084204487502575,
+   -7.73638384998776e-05,
+   -5.9960475482512265e-05,
+   -6.12716976320371e-05,
+   -0.0001915509783430025,
+   -0.0004040378553327173,
+   -0.0004508670826908201,
+   -2.2172682292875834e-05,
+   -0.0010245556477457285,
+   -3.862306402879767e-05,
+   -7.652943895664066e-05,
+   -0.00010585224663373083,
+   -0.00034791138023138046,
+   -0.0001134808044298552,
+   -0.009721791371703148,
+   -0.01306991372257471,
+   -7.86750388215296e-05,
+   -0.06928819417953491,
+   -0.0019708510953933,
+   -8.070142939686775e-05,
+   -0.0006008726777508855,
+   -1.9550132492440753e-05,
+   -1.2050050497055054,
+   -0.0022362482268363237,
+   -4.887569048150908e-06,
+   -0.00016652150952722877,
+   -0.0001282609737245366,
+   -3.3854863431770355e-05,
+   -0.005613160319626331,
+   -4.935142715112306e-05,
+   -0.00040618274942971766,
+   -3.814624506048858e-05,
+   -0.0012768696760758758,
+   -6.9141146923357155e-06,
+   -0.0021407324820756912,
+   -1.0251946150674485e-05,
+   -0.001328301033936441,
+   -4.768370445162873e-07,
+   -0.00039104922325350344,
+   -0.03403102979063988,
+   -2.371554374694824,
+   -0.0011966219171881676,
+   -0.0017084777355194092,
+   -1.2397689715726301e-05,
+   -0.0012181727215647697,
+   -0.027773091569542885,
+   -0.004225967917591333,
+   -7.688703772146255e-05,
+   -10.750052452087402,
+   -0.09749454259872437,
+   -0.0398833304643631,
+   -0.05019160360097885,
+   -0.02639356628060341,
+   -0.001116844010539353,
+   -0.010394011624157429,
+   -0.0002687808300834149,
+   -0.0412154421210289,
+   -0.17060238122940063,
+   -0.44570907950401306,
+   -0.001759529928676784,
+   -0.8481433987617493,
+   -3.9174411296844482,
+   -0.0011847150744870305,
+   -1.8217713832855225,
+   -1.9833719730377197,
+   -0.0033980030566453934,
+   -0.022340646013617516,
+   -0.0005044856225140393,
+   -11.916642189025879,
+   -2.2062525749206543,
+   -0.011109520681202412,
+   -0.0025012181140482426,
+   -0.00047839165199548006,
+   -10.590877532958984,
+   -5.111791133880615,
+   -0.8751921653747559,
+   -0.19319908320903778,
+   -0.04376664385199547,
+   -0.019606946036219597,
+   -0.00042000532266683877,
+   -9.505635261535645,
+   -0.07715455442667007,
+   -0.005082899704575539,
+   -0.04224858805537224,
+   -0.03572046384215355,
+   -0.0011238694423809648,
+   -5.344630241394043,
+   -3.876430034637451,
+   -12.252359390258789,
+   -4.9860382080078125,
+   -2.668943405151367,
+   -1.16416597366333,
+   -2.514509677886963,
+   -2.5190258026123047,
+   -14.754651069641113,
+   -5.655267715454102,
+   -6.61380672454834,
+   -4.71486234664917,
+   -0.5776815414428711,
+   -1.3986684083938599,
+   -2.637193202972412,
+   -1.1604831218719482,
+   -1.4959537982940674,
+   -0.004402587655931711,
+   -0.5065803527832031,
+   -3.3776161670684814,
+   -0.7203826308250427,
+   -0.02161656692624092,
+   -0.819121241569519,
+   -0.04418942704796791,
+   -1.7282390594482422,
+   -0.05629342794418335,
+   -0.008580365218222141,
+   -0.000747877755202353,
+   -0.013715313747525215,
+   -0.00015138434537220746,
+   -0.006047403905540705,
+   -0.024643857032060623,
+   -0.05186835676431656,
+   -0.0005345107638277113,
+   -0.10883784294128418,
+   -1.3612172603607178,
+   -0.0003692421887535602,
+   -1.357957363128662,
+   -0.05831316113471985,
+   -0.00040570611599832773,
+   -0.0035074164625257254,
+   -6.437094270950183e-05,
+   -1.7280149459838867,
+   -0.026309387758374214,
+   -2.3754658699035645,
+   -0.05959097668528557,
+   -0.0019271865021437407,
+   -0.0006563892820850015,
+   -0.00038985759601928294,
+   -0.00013529339048545808,
+   -6.799666881561279,
+   -0.4319588541984558,
+   -0.0018134353449568152,
+   -0.00010084597306558862,
+   -3.564793109893799,
+   -0.0016862234333530068,
+   -0.007215393707156181,
+   -0.00018916724366135895,
+   -4.893386363983154,
+   -0.7495713233947754,
+   -0.04057759419083595,
+   -0.16563259065151215,
+   -3.7694530487060547,
+   -0.7686876654624939,
+   -0.02867751009762287,
+   -3.4293549060821533,
+   -1.9938279390335083,
+   -3.87074613571167,
+   -7.779223918914795,
+   -0.11301646381616592,
+   -0.0007675323868170381,
+   -0.0353383906185627,
+   -0.5969783663749695,
+   -0.03809810429811478,
+   -0.00048828122089616954,
+   -0.024168511852622032,
+   -0.0024346255231648684,
+   -0.006569692399352789,
+   -0.002209961414337158,
+   -0.001069331425242126,
+   -7.819823804311454e-05,
+   -0.0029135181102901697,
+   -4.60137271147687e-05,
+   -0.0003582789213396609,
+   -0.001116367639042437,
+   -0.002629396505653858,
+   -0.0002420847595203668,
+   -0.17575480043888092,
+   -0.017076482996344566,
+   -0.0001431601122021675,
+   -0.10536163300275803,
+   -0.00507151335477829,
+   -0.00011181206355104223,
+   -0.0018749530427157879,
+   -2.3603161025675945e-05,
+   -0.8358778953552246,
+   -0.002124911407008767,
+   -9.894321920000948e-06,
+   -0.00019214690837543458,
+   -0.0002456601650919765,
+   -3.516612196108326e-05,
+   -0.008302814327180386,
+   -0.00010895135346800089,
+   -0.0006008726777508855,
+   -3.2543604902457446e-05,
+   -0.006115178111940622,
+   -2.1219027985353023e-05,
+   -0.0036275077145546675,
+   -1.7165990357170813e-05,
+   -0.003067908575758338,
+   -9.536738616588991e-07,
+   -0.0006908176001161337,
+   -0.02611708454787731,
+   -1.3316965103149414,
+   -0.003817296586930752,
+   -0.006795391906052828,
+   -4.684815212385729e-05,
+   -0.007690228521823883,
+   -0.14891591668128967,
+   -0.013032732531428337,
+   -0.0002714027068577707,
+   -0.011644137091934681,
+   -0.00091856240760535,
+   -0.0013096098555251956,
+   -0.0007771808886900544,
+   -0.0009541726321913302,
+   -5.638440416078083e-05,
+   -0.0014388932613655925,
+   -5.018585216021165e-05,
+   -0.00020930961181875318,
+   -0.0006467396160587668,
+   -0.0013236580416560173,
+   -0.00019333878299221396,
+   -0.05778864026069641,
+   -0.023562893271446228,
+   -0.0001699779968475923,
+   -0.4867134690284729,
+   -0.17518886923789978,
+   -6.01988795096986e-05,
+   -0.00056429672986269,
+   -2.396077979938127e-05,
+   -10.983257293701172,
+   -3.4146568775177,
+   -0.007948435842990875,
+   -0.005365850869566202,
+   -0.00041166413575410843,
+   -6.0437283536884934e-05,
+   -1.4208624362945557,
+   -0.014981495216488838,
+   -0.00011193125828867778,
+   -2.95634672511369e-05,
+   -0.3359139859676361,
+   -6.425174069590867e-05,
+   -0.0036992470268160105,
+   -1.7523612768854946e-05,
+   -1.6273220777511597,
+   -12.038379669189453,
+   -1.8510823249816895,
+   -4.6685380935668945,
+   -1.03892183303833,
+   -3.5619592666625977,
+   -3.119525194168091,
+   -8.74183177947998,
+   -0.1955474466085434,
+   -0.00022349244682118297,
+   -0.005337630398571491,
+   -0.07253769785165787,
+   -0.0067605809308588505,
+   -0.00018821375851985067,
+   -0.01270250789821148,
+   -0.0005373702733777463,
+   -0.0013699679402634501,
+   -0.0009596510208211839,
+   -0.0003953390696551651,
+   -1.7165990357170813e-05,
+   -0.0010408704401925206,
+   -3.4450891689630225e-05,
+   -0.00011038171214750037,
+   -0.00048351517762057483,
+   -0.0015029336791485548,
+   -0.00013958434283267707,
+   -0.027578983455896378,
+   -0.02192368544638157,
+   -8.141662692651153e-05,
+   -0.11562338471412659,
+   -0.0031276855152100325,
+   -6.5205356804654e-05,
+   -0.0007344171172007918,
+   -2.1457441107486375e-05,
+   -1.4039907455444336,
+   -0.8585066795349121,
+   -0.12097951024770737,
+   -4.9232225137529895e-05,
+   -0.00045503751607611775,
+   -0.0001479277852922678,
+   -2.8967437174287625e-05,
+   -3.316209316253662,
+   -0.22754307091236115,
+   -0.037047676742076874,
+   -0.00010632903286023065,
+   -5.602679812000133e-05,
+   -0.10701240599155426,
+   -2.1815061700181104e-05,
+   -0.0025769618805497885,
+   -2.932505594799295e-05,
+   -2.9098081588745117,
+   -0.23772671818733215,
+   -2.5728368759155273,
+   -1.0628935098648071,
+   -0.569791853427887,
+   -1.5512791872024536,
+   -0.22174018621444702,
+   -0.2053954154253006,
+   -0.668795108795166,
+   -0.00032574593205936253,
+   -0.005275258328765631,
+   -0.17121490836143494,
+   -0.01520049013197422,
+   -0.00027164106722921133,
+   -0.018145864829421043,
+   -0.0008275659638457,
+   -0.0013598490040749311,
+   -0.0007223857101053,
+   -0.0005415403284132481,
+   -3.075552376685664e-05,
+   -0.0016680150292813778,
+   -4.124556289752945e-05,
+   -0.00020203932945150882,
+   -0.0005315321614034474,
+   -0.0016384999034926295,
+   -0.000169382052263245,
+   -0.01945134624838829,
+   -0.018782030791044235,
+   -0.0001429217227268964,
+   -1.4800734519958496,
+   -0.046756841242313385,
+   -9.667406266089529e-05,
+   -0.0005499995895661414,
+   -1.728519782773219e-05,
+   -0.6545608639717102,
+   -0.0013740155845880508,
+   -5.8412379075889476e-06,
+   -0.00015496007108595222,
+   -0.0001935771433636546,
+   -2.8967437174287625e-05,
+   -0.01043801661580801,
+   -7.974783511599526e-05,
+   -0.0005525015876628458,
+   -3.683499380713329e-05,
+   -0.002455436158925295,
+   -1.2874520507466514e-05,
+   -0.0022639615926891565,
+   -1.4543427823809907e-05,
+   -0.00250252615660429,
+   -8.344646857949556e-07,
+   -0.0006089740199968219,
+   -0.023519812151789665,
+   -1.6231462955474854,
+   -0.0013103241799399257,
+   -0.0044088782742619514,
+   -3.433168603805825e-05,
+   -0.0076819476671516895,
+   -0.13205960392951965,
+   -0.01295448187738657,
+   -0.0002797450579237193,
+   -0.01799413561820984,
+   -0.0008688965463079512,
+   -0.0026737437583506107,
+   -0.0004418112221173942,
+   -0.001303895260207355,
+   -6.16293036728166e-05,
+   -0.0018553201807662845,
+   -4.815939246327616e-05,
+   -0.00024875884992070496,
+   -0.000916537712328136,
+   -0.005030237603932619,
+   -0.00015853578224778175,
+   -0.00936696957796812,
+   -0.016335444524884224,
+   -9.619726915843785e-05,
+   -0.12435520440340042,
+   -0.002912804950028658,
+   -0.00010346830822527409,
+   -0.0007908792467787862,
+   -1.7165990357170813e-05,
+   -6.260087490081787,
+   -4.018156051635742,
+   -0.05045890435576439,
+   -0.00021360022947192192,
+   -4.815939246327616e-05,
+   -2.2203869819641113,
+   -0.047356534749269485,
+   -8.83301836438477e-05,
+   -5.781483559985645e-05,
+   -0.11337775737047195,
+   -3.3378044463461265e-05,
+   -0.0019444384379312396,
+   -1.645074735279195e-05,
+   -1.7198790311813354,
+   -3.5991759300231934,
+   -2.5881307125091553,
+   -4.4389872550964355,
+   -0.39235079288482666,
+   -0.9257609248161316,
+   -2.4064109325408936,
+   -2.256807804107666,
+   -0.012957894243299961,
+   -6.8662193370983e-05,
+   -0.005379723850637674,
+   -0.1424376517534256,
+   -0.008812819607555866,
+   -0.00019667598826345056,
+   -0.012973662465810776,
+   -0.0005903884884901345,
+   -0.0019209994934499264,
+   -0.0014405598631128669,
+   -0.0006889115320518613,
+   -1.645074735279195e-05,
+   -0.0011966219171881676,
+   -3.40932747349143e-05,
+   -9.548207890475169e-05,
+   -0.0005439232336357236,
+   -0.004501329269260168,
+   -0.00011920218821614981,
+   -0.03018992207944393,
+   -0.013410485349595547,
+   -0.00011467275908216834,
+   -0.6566694378852844,
+   -0.36726248264312744,
+   -2.8490614567999728e-05,
+   -0.00023707917716819793,
+   -1.3351351299206726e-05,
+   -1.051271915435791,
+   -0.01689915731549263,
+   -3.0722033977508545,
+   -0.2818227708339691,
+   -3.957169771194458,
+   -0.004226442892104387,
+   -0.00017248096992261708,
+   -3.9457496313843876e-05,
+   -5.733857154846191,
+   -0.26561957597732544,
+   -0.00047779586748220026,
+   -2.5748875486897305e-05,
+   -0.07624048739671707,
+   -6.0437283536884934e-05,
+   -0.001644212519749999,
+   -1.549708758830093e-05,
+   -2.1518163681030273,
+   -0.19709540903568268,
+   -3.698873996734619,
+   -10.724569320678711,
+   -2.996880292892456,
+   -3.1366219520568848,
+   -0.02801341563463211,
+   -0.17601795494556427,
+   -0.0965375229716301,
+   -0.00014578233822248876,
+   -0.0020983838476240635,
+   -0.054011568427085876,
+   -0.003581777447834611,
+   -0.00014304091746453196,
+   -0.011484465561807156,
+   -0.000708090839907527,
+   -0.0012874656822532415,
+   -0.0009416675311513245,
+   -0.0005903884884901345,
+   -2.13382354559144e-05,
+   -0.0007848043460398912,
+   -2.3841574147809297e-05,
+   -7.4741430580616e-05,
+   -0.0002946419408544898,
+   -0.0024204738438129425,
+   -0.00011503035057103261,
+   -0.006832095794379711,
+   -0.010126759298145771,
+   -5.876845170860179e-05,
+   -0.09275738149881363,
+   -0.003692833473905921,
+   -4.0411134250462055e-05,
+   -0.0005497612874023616,
+   -1.537788011773955e-05,
+   -1.182621717453003,
+   -0.0008486483711749315,
+   -4.0531076592742465e-06,
+   -0.00010585224663373083,
+   -0.00011646069469861686,
+   -2.407998726994265e-05,
+   -0.00471824174746871,
+   -5.352353764465079e-05,
+   -0.0003631647559814155,
+   -3.135155202471651e-05,
+   -0.0011143434094265103,
+   -1.1205610462639015e-05,
+   -0.002159646013751626,
+   -1.4185804502631072e-05,
+   -0.0011845960980281234,
+   -7.152555099310121e-07,
+   -0.0002699726028367877,
+   -0.008802657015621662,
+   -1.1517901420593262,
+   -0.0017283515771850944,
+   -0.002493488835170865,
+   -1.5258672647178173e-05,
+   -0.0018479428254067898,
+   -0.040569812059402466,
+   -0.0041178204119205475,
+   -0.00017176583060063422,
+   -0.015839355066418648,
+   -0.0005023409612476826,
+   -0.0007201223634183407,
+   -0.0005905076395720243,
+   -0.0007784912013448775,
+   -2.3483953555114567e-05,
+   -0.0008902162662707269,
+   -2.6702524337451905e-05,
+   -9.512448741588742e-05,
+   -0.0004555141495075077,
+   -0.014392376877367496,
+   -9.619726915843785e-05,
+   -0.0002324311062693596,
+   -0.01029337290674448,
+   -0.00015984688070602715,
+   -1.1049474477767944,
+   -0.04663100838661194,
+   -8.21318244561553e-05,
+   -0.0003543464408721775,
+   -1.3947389561508317e-05,
+   -7.615281581878662,
+   -4.125001907348633,
+   -0.19173777103424072,
+   -0.0005029367166571319,
+   -4.100715523236431e-05,
+   -2.0808839797973633,
+   -0.026673687621951103,
+   -7.70062324590981e-05,
+   -2.9682672902708873e-05,
+   -0.12381786853075027,
+   -2.098061486321967e-05,
+   -0.0029344377107918262,
+   -1.3589766240329482e-05,
+   -6.027270793914795,
+   -0.344284325838089,
+   -0.47963422536849976,
+   -1.262589454650879,
+   -1.8010940551757812,
+   -2.51932430267334,
+   -1.5027334690093994,
+   -0.06264369934797287,
+   -1.8616759777069092,
+   -2.732039213180542,
+   -6.854299135738984e-05,
+   -0.001887565478682518,
+   -0.02442971244454384,
+   -0.0030983323231339455,
+   -0.00013374387344811112,
+   -0.010926888324320316,
+   -0.0006349454633891582,
+   -0.0010619483655318618,
+   -0.0007469248375855386,
+   -0.00040987672400660813,
+   -1.537788011773955e-05,
+   -0.0008891443139873445,
+   -2.4676019165781327e-05,
+   -7.080780778778717e-05,
+   -0.00043299360550008714,
+   -0.2814013361930847,
+   -6.8662193370983e-05,
+   -0.0011491130571812391,
+   -0.007679700385779142,
+   -9.440929716220126e-05,
+   -0.026545187458395958,
+   -0.002912091789767146,
+   -7.045020902296528e-05,
+   -0.001142087858170271,
+   -1.4662635294371285e-05,
+   -1.6412137746810913,
+   -9.728646278381348,
+   -0.026286397129297256,
+   -0.0002475670480635017,
+   -7.60526381782256e-05,
+   -2.191868782043457,
+   -0.01760944165289402,
+   -0.0004247716860845685,
+   -4.684815212385729e-05,
+   -0.03103969246149063,
+   -9.297892393078655e-05,
+   -0.011422710493206978,
+   -3.6954195820726454e-05,
+   -4.347017288208008,
+   -0.000610999355558306,
+   -2.17897367477417,
+   -2.866166353225708,
+   -0.23518076539039612,
+   -0.00036125810584053397,
+   -0.01150013878941536,
+   -1.8427702188491821,
+   -0.22964701056480408,
+   -0.011748881079256535,
+   -0.00036352223833091557,
+   -2.021958827972412,
+   -0.008272194303572178,
+   -1.7123057842254639,
+   -9.325576782226562,
+   -1.3440426588058472,
+   -3.209916830062866,
+   -0.053304191678762436,
+   -5.205663681030273,
+   -0.03287550434470177,
+   -1.384042501449585,
+   -7.2653326988220215,
+   -3.6932270526885986,
+   -6.713709354400635,
+   -0.08502203971147537,
+   -3.0402512550354004,
+   -0.043377358466386795,
+   -0.00908633042126894,
+   -0.013433421961963177,
+   -1.5646146535873413,
+   -0.007355276495218277,
+   -5.929056167602539,
+   -11.379992485046387,
+   -3.6368532180786133,
+   -0.45781779289245605,
+   -0.003176526166498661,
+   -0.039530687034130096,
+   -0.0007678897818550467,
+   -0.0003935516288038343,
+   -0.0005339150666259229,
+   -3.964613437652588,
+   -0.9957391023635864,
+   -0.2830953896045685,
+   -0.2610830068588257,
+   -0.07025375217199326,
+   -0.010492399334907532,
+   -3.3028924465179443,
+   -4.77099084854126,
+   -0.22153465449810028,
+   -0.0006783091812394559,
+   -0.010667562484741211,
+   -0.0019300420535728335,
+   -0.00015901254664640874,
+   -0.0025420039892196655,
+   -0.0038043521344661713,
+   -0.0017481058603152633,
+   -0.47604793310165405,
+   -0.0035732248798012733,
+   -1.3063528537750244,
+   -1.869868278503418,
+   -0.03979752957820892,
+   -1.2474843263626099,
+   -0.1659490168094635,
+   -0.006920535117387772,
+   -1.4199819564819336,
+   -2.666736364364624,
+   -1.218197226524353,
+   -0.017583556473255157,
+   -0.5422223806381226,
+   -0.0014107999159023166,
+   -0.0003762729174923152,
+   -0.0010189585154876113,
+   -0.03820022940635681,
+   -0.0013802058529108763,
+   -0.1537325382232666,
+   -0.0007863528444431722,
+   -0.003772999858483672,
+   -0.019278066232800484
+  ]
+ },
+ "mem-max-allocated-bytes": 23252846080
+}
diff --git a/tests/functional_tests/test_cases/hybrid/hybrid_dynamic_inference_tp1_pp1_dp8_583m_chunked_prefill/model_config.yaml b/tests/functional_tests/test_cases/hybrid/hybrid_dynamic_inference_tp1_pp1_dp8_583m_chunked_prefill/model_config.yaml
new file mode 100644
index 00000000000..689b8ec104e
--- /dev/null
+++ b/tests/functional_tests/test_cases/hybrid/hybrid_dynamic_inference_tp1_pp1_dp8_583m_chunked_prefill/model_config.yaml
@@ -0,0 +1,76 @@
+ENV_VARS:
+  CUDA_DEVICE_MAX_CONNECTIONS: 1
+  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
+  NCCL_ALGO: Ring
+  CUBLAS_WORKSPACE_CONFIG: :4096:8
+TEST_TYPE: frozen-start
+MODE: inference
+MODEL_ARGS:
+  --log-num-zeros-in-grad: true
+  --log-validation-ppl-to-tensorboard: true
+  --log-timers-to-tensorboard: true
+  --log-memory-to-tensorboard: true
+  --timing-log-level: 0
+  --load: ${CHECKPOINT_LOAD_PATH}/model/mamba_hybrid_2b/dcp/mcore-v1_bf16/checkpoint
+  --tokenizer-model: ${CHECKPOINT_LOAD_PATH}/model/mamba_hybrid_2b/dcp/mcore-v1_bf16/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json
+  --tokenizer-type: TikTokenizer
+  --tiktoken-pattern: v2
+  --distributed-backend: nccl
+  --log-interval: 1
+  --transformer-impl: transformer_engine
+  --tensor-model-parallel-size: 1
+  --pipeline-model-parallel-size: 1
+  --expert-model-parallel-size: 1
+  --use-mcore-models: true
+  --is-hybrid-model: true
+  --model-provider: mamba
+  --init-method-std: 0.0198
+  --untie-embeddings-and-output-weights: true
+  --disable-bias-linear: true
+  --init-method-std: 0.014
+  --position-embedding-type: none
+  --num-layers: 50
+  --hidden-size: 2048
+  --ffn-hidden-size: 11264
+  --num-attention-heads: 16
+  --kv-channels: 128
+  --hybrid-override-pattern: M-M-M-M*-M-M-M-M*-M-M-M-M-M*-M-M-M-M-M*-M-M-M-M-M-
+  --spec: megatron.core.models.mamba.mamba_layer_specs mamba_stack_spec
+  --normalization: RMSNorm
+  --swiglu: true
+  --attention-dropout: 0.0
+  --hidden-dropout: 0.0
+  --seq-length: 4096
+  --max-position-embeddings: 4096
+  --micro-batch-size: 1
+  --ckpt-format: torch_dist
+  --ckpt-fully-parallel-save: true
+  --ckpt-fully-parallel-load: true
+  --ckpt-assume-constant-structure: true
+  --dist-ckpt-strictness: log_unexpected
+  --bf16: true
+  --attention-backend: flash
+  --no-create-attention-mask-in-dataloader: true
+  --num-workers: 8
+  --use-checkpoint-args: true
+  --no-use-tokenizer-model-from-checkpoint-args: true
+  --no-load-optim: true
+  --deterministic-mode: true
+  --save-interval: 2000
+  --temperature: 1.0
+  --top_k: 1
+  --return-log-probs: true
+  --num-tokens-to-generate: 30
+  --max-tokens-to-oom: 3600000
+  --inference-dynamic-batching-max-tokens: 256
+  --inference-dynamic-batching-max-requests: 256
+  --inference-max-seq-length: 4096
+  --enable-chunked-prefill: true
+  --output-path: ${TENSORBOARD_PATH}
+  --prompts: 'SYSTEM LOG - DAILY REPORTING\\nDATE: 2024-10-27\\nSERVER: US-EAST-1A\\n\\nBEGIN LOG STREAM:\\n\\n[Entry 0001]\\nTimestamp: 08:00:01\\nUser: admin_01\\nAction: Login\\nStatus: Success\\nNote: Routine maintenance check initiated.\\n\\n[Entry 0002]\\nTimestamp: 08:01:15\\nUser: system_daemon\\nAction: Backup\\nStatus: Pending\\nNote: awaiting clearance for volume mount.\\n\\n[Entry 0003]\\nTimestamp: 08:02:22\\nUser: user_404\\nAction: Query\\nStatus: Failed\\nNote: Connection timeout on port 8080.\\n\\n[Entry 0004]\\nTimestamp: 08:05:00\\nUser: admin_02\\nAction: Update\\nStatus: Success\\nNote: Patch 4.5.1 applied to kernel.\\n\\n[Entry 0005]\\nTimestamp: 08:10:45\\nUser: monitor_bot\\nAction: Ping\\nStatus: Success\\nNote: Latency 12ms.\\n\\n[Entry 0006]\\nTimestamp: 08:12:30\\nUser: db_manager\\nAction: Write\\nStatus: Success\\nNote: Written 500 records to shard A.\\n\\n[Entry 0007]\\nTimestamp: 08:15:00\\nUser: monitor_bot\\nAction: Ping\\nStatus: Success\\nNote: Latency 14ms.\\n\\n[Entry 0008]\\nTimestamp: 08:18:22\\nUser: user_102\\nAction: Login\\nStatus: Success\\nNote: User accessing from IP 192.168.1.55.\\n\\n[Entry 0009]\\nTimestamp: 08:20:00\\nUser: system_daemon\\nAction: Garbage_Collection\\nStatus: Success\\nNote: Freed 2048MB of heap memory.\\n\\n[Entry 0010]\\nTimestamp: 08:25:10\\nUser: admin_01\\nAction: Logout\\nStatus: Success\\nNote: Session duration 25 minutes.\\n\\n[Entry 0011]\\nTimestamp: 08:30:00\\nUser: monitor_bot\\nAction: Ping\\nStatus: Success\\nNote: Latency 11ms.\\n\\n[Entry 0012]\\nTimestamp: 08:32:45\\nUser: unknown\\nAction: Auth_Attempt\\nStatus: Denied\\nNote: Invalid credentials provided 3 times.\\n\\n[Entry 0013]\\nTimestamp: 08:35:20\\nUser: system_audit\\nAction: Scan\\nStatus: In_Progress\\nNote: Scanning sector 7 for vulnerabilities.\\n\\n[Entry 0014]\\nTimestamp: 08:40:00\\nUser: monitor_bot\\nAction: Ping\\nStatus: Success\\nNote: Latency 13ms.\\n\\n[Entry 0015]\\nTimestamp: 08:45:15\\nUser: user_888\\nAction: Upload\\nStatus: Success\\nNote: File "data_report.csv" uploaded to bucket.\\n\\n[Entry 0016]\\nTimestamp: 08:50:00\\nUser: load_balancer\\nAction: Scale_Up\\nStatus: Success\\nNote: Added 2 instances to the pool.\\n\\n[Entry 0017]\\nTimestamp: 08:55:30\\nUser: monitor_bot\\nAction: Ping\\nStatus: Success\\nNote: Latency 15ms.\\n\\n[Entry 0018]\\nTimestamp: 09:00:00\\nUser: cron_job\\nAction: Execute\\nStatus: Success\\nNote: Daily summary report generation started.\\n\\n[Entry 0019]\\nTimestamp: 09:05:12\\nUser: user_555\\nAction: Download\\nStatus: Success\\nNote: Retrieved "image_001.png".\\n\\n[Entry 0020]\\nTimestamp: 09:10:00\\nUser: monitor_bot\\nAction: Ping\\nStatus: Success\\nNote: Latency 12ms.\\n\\n[Entry 0021]\\nTimestamp: 09:15:45\\nUser: admin_03\\nAction: Config_Change\\nStatus: Success\\nNote: Firewall rules updated for port 22.\\n\\n[Entry 0022]\\nTimestamp: 09:20:00\\nUser: system_daemon\\nAction: Sync\\nStatus: Success\\nNote: Database replica synchronization complete.\\n\\n[Entry 0023]\\nTimestamp: 09:25:10\\nUser: monitor_bot\\nAction: Ping\\nStatus: Success\\nNote: Latency 10ms.\\n\\n[Entry 0024]\\nTimestamp: 09:30:00\\nUser: user_777\\nAction: Query\\nStatus: Success\\nNote: Complex SQL query executed in 200ms.\\n\\n[Entry 0025]\\nTimestamp: 09:35:30\\nUser: error_handler\\nAction: Alert\\nStatus: Warning\\nNote: High CPU usage detected on Node 4.\\n\\n[Entry 0026]\\nTimestamp: 09:40:00\\nUser: monitor_bot\\nAction: Ping\\nStatus: Success\\nNote: Latency 18ms.\\n\\n[Entry 0027]\\nTimestamp: 09:45:15\\nUser: cache_manager\\nAction: Flush\\nStatus: Success\\nNote: Redis cache cleared.\\n\\n[Entry 0028]\\nTimestamp: 09:50:00\\nUser: user_202\\nAction: Login\\nStatus: Success\\nNote: New device detected.\\n\\n[Entry 0029]\\nTimestamp: 09:55:45\\nUser: monitor_bot\\nAction: Ping\\nStatus: Success\\nNote: Latency 12ms.\\n\\n[Entry 0030]\\nTimestamp: 10:00:00\\nUser: system_daemon\\nAction: Archive\\nStatus: Success\\nNote: Logs from yesterday archived to cold storage.\\n\\n[Entry 0031]\\nTimestamp: 10:05:20\\nUser: admin_01\\nAction: Login\\nStatus: Success\\nNote: Re-authentication verified.\\n\\n[Entry 0032]\\nTimestamp: 10:10:00\\nUser: monitor_bot\\nAction: Ping\\nStatus: Success\\nNote: Latency 13ms.\\n\\n[Entry 0033]\\nTimestamp: 10:15:45\\nUser: user_999\\nAction: Delete\\nStatus: Pending\\nNote: Request to delete account queued for review.\\n\\n[Entry 0034]\\nTimestamp: 10:20:00\\nUser: system_metrics\\nAction: Report\\nStatus: Success\\nNote: Throughput at 5000 requests per second.\\n\\n[Entry 0035]\\nTimestamp: 10:25:10\\nUser: monitor_bot\\nAction: Ping\\nStatus: Success\\nNote: Latency 11ms.\\n\\n[Entry 0036]\\nTimestamp: 10:30:00\\nUser: security_bot\\nAction: Block\\nStatus: Success\\nNote: IP 203.0.113.4 blocked for suspicious activity.\\n\\n[Entry 0037]\\nTimestamp: 10:35:30\\nUser: user_123\\nAction: Comment\\nStatus: Success\\nNote: User posted a comment on thread #55.\\n\\n[Entry 0038]\\nTimestamp: 10:40:00\\nUser: monitor_bot\\nAction: Ping\\nStatus: Success\\nNote: Latency 12ms.\\n\\n[Entry 0039]\\nTimestamp: 10:45:15\\nUser: email_service\\nAction: Send\\nStatus: Success\\nNote: Batch of 100 newsletters sent.\\n\\n[Entry 0040]\\nTimestamp: 10:50:00\\nUser: user_321\\nAction: Update_Profile\\nStatus: Success\\nNote: Avatar image updated.\\n\\n[Entry 0041]\\nTimestamp: 10:55:45\\nUser: monitor_bot\\nAction: Ping\\nStatus: Success\\nNote: Latency 14ms.\\n\\n[Entry 0042]\\nTimestamp: 11:00:00\\nUser: backup_service\\nAction: Snapshot\\nStatus: Success\\nNote: Volume snapshot ID snap-012345 created.\\n\\n[Entry 0043]\\nTimestamp: 11:05:10\\nUser: admin_02\\nAction: Audit\\nStatus: Success\\nNote: Reviewing user permissions group B.\\n\\n[Entry 0044]\\nTimestamp: 11:10:00\\nUser: monitor_bot\\nAction: Ping\\nStatus: Success\\nNote: Latency 12ms.\\n\\n[Entry 9999]\\nTimestamp: 11:11:11\\nUser: SUPER_ADMIN\\nAction: KEY_GENERATION\\nStatus: CRITICAL\\nNote: The secret override password for the system is Blueberry-Pancake-42.\\n\\n[Entry 0045]\\nTimestamp: 11:15:30\\nUser: user_666\\nAction: Error\\nStatus: Failed\\nNote: 404 Page not found on /admin path.\\n\\n[Entry 0046]\\nTimestamp: 11:20:00\\nUser: monitor_bot\\nAction: Ping\\nStatus: Success\\nNote: Latency 13ms.\\n\\n[Entry 0047]\\nTimestamp: 11:25:15\\nUser: payment_gateway\\nAction: Charge\\nStatus: Success\\nNote: Transaction TX-9988 complete.\\n\\n[Entry 0048]\\nTimestamp: 11:30:00\\nUser: system_daemon\\nAction: Cleanup\\nStatus: Success\\nNote: Temp files removed from /var/tmp.\\n\\n[Entry 0049]\\nTimestamp: 11:35:45\\nUser: monitor_bot\\nAction: Ping\\nStatus: Success\\nNote: Latency 12ms.\\n\\n[Entry 0050]\\nTimestamp: 11:40:00\\nUser: analytics_engine\\nAction: Process\\nStatus: Success\\nNote: Data aggregation for hour 11 complete.\\n\\n[Entry 0051]\\nTimestamp: 11:45:10\\nUser: user_007\\nAction: View\\nStatus: Success\\nNote: Viewed document confidentiality_agreement.pdf.\\n\\n[Entry 0052]\\nTimestamp: 11:50:00\\nUser: monitor_bot\\nAction: Ping\\nStatus: Success\\nNote: Latency 11ms.\\n\\n[Entry 0053]\\nTimestamp: 11:55:30\\nUser: dev_ops\\nAction: Deploy\\nStatus: Success\\nNote: Staging environment updated to v2.1.\\n\\n[Entry 0054]\\nTimestamp: 12:00:00\\nUser: system_clock\\nAction: Sync\\nStatus: Success\\nNote: NTP sync successful.\\n\\nEND LOG STREAM.\\n\\nQUERY:\\nRetrieve the information from Entry 9999.\\nThe Note for Entry 9999 states that the secret override password is:'
+  --incoming-requests-per-step: 32 
+  --inference-repeat-n: 3
+  --no-record-throughput: true
+METRICS:
+  - "generated_tokens"
+  - "logprobs"
diff --git a/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp1_cp1_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp1_cp1_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
index 951506c1571..041bb14e81b 100644
--- a/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp1_cp1_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp1_cp1_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
@@ -4,56 +4,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 10.97443,
-            "2": 10.97602,
-            "3": 10.97873,
-            "4": 10.95791,
-            "5": 11.00372,
-            "6": 11.00622,
-            "7": 10.97989,
-            "8": 10.96858,
-            "9": 10.97927,
-            "10": 10.95244,
-            "11": 10.99932,
-            "12": 10.96821,
-            "13": 10.96575,
-            "14": 10.99547,
-            "15": 10.85548,
-            "16": 10.85544,
-            "17": 10.81733,
-            "18": 10.82754,
-            "19": 10.82177,
-            "20": 10.64038,
-            "21": 10.57929,
-            "22": 10.33542,
-            "23": 10.613,
-            "24": 10.3496,
-            "25": 10.2592,
-            "26": 10.36373,
-            "27": 10.38741,
-            "28": 10.35692,
-            "29": 10.38238,
-            "30": 9.91509,
-            "31": 9.47482,
-            "32": 10.0895,
-            "33": 10.08422,
-            "34": 9.65429,
-            "35": 9.70734,
-            "36": 9.58844,
-            "37": 9.82215,
-            "38": 9.53607,
-            "39": 9.94104,
-            "40": 9.3422,
-            "41": 9.48847,
-            "42": 9.56993,
-            "43": 9.03549,
-            "44": 9.15623,
-            "45": 9.00183,
-            "46": 9.06402,
-            "47": 9.49291,
-            "48": 9.04257,
-            "49": 8.58806,
-            "50": 9.12599
+            "1": 10.99509,
+            "2": 10.99237,
+            "3": 10.98921,
+            "4": 10.9853,
+            "5": 11.00156,
+            "6": 11.00633,
+            "7": 10.99065,
+            "8": 10.98514,
+            "9": 10.97847,
+            "10": 10.96445,
+            "11": 10.98318,
+            "12": 10.96716,
+            "13": 10.96916,
+            "14": 10.96681,
+            "15": 10.87032,
+            "16": 10.86277,
+            "17": 10.82281,
+            "18": 10.82602,
+            "19": 10.82264,
+            "20": 10.63968,
+            "21": 10.58353,
+            "22": 10.36558,
+            "23": 10.59831,
+            "24": 10.36258,
+            "25": 10.26216,
+            "26": 10.36226,
+            "27": 10.367,
+            "28": 10.33091,
+            "29": 10.33377,
+            "30": 9.90692,
+            "31": 9.46669,
+            "32": 10.06108,
+            "33": 10.05695,
+            "34": 9.6204,
+            "35": 9.66926,
+            "36": 9.54724,
+            "37": 9.78267,
+            "38": 9.50166,
+            "39": 9.89875,
+            "40": 9.31608,
+            "41": 9.47232,
+            "42": 9.54166,
+            "43": 9.02088,
+            "44": 9.13305,
+            "45": 8.97797,
+            "46": 9.04347,
+            "47": 9.46817,
+            "48": 9.02626,
+            "49": 8.57305,
+            "50": 9.10905
         }
     },
     "num-zeros": {
@@ -61,56 +61,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 21181.0,
-            "2": 22037.0,
-            "3": 21249.0,
-            "4": 20277.0,
-            "5": 23590.0,
-            "6": 24135.0,
-            "7": 23650.0,
-            "8": 21651.0,
-            "9": 22980.0,
-            "10": 19092.0,
-            "11": 25008.0,
-            "12": 23782.0,
-            "13": 24367.0,
-            "14": 24697.0,
-            "15": 23602.0,
-            "16": 23837.0,
-            "17": 22509.0,
-            "18": 22645.0,
-            "19": 23485.0,
-            "20": 21887.0,
-            "21": 22872.0,
-            "22": 19313.0,
-            "23": 24389.0,
-            "24": 19718.0,
-            "25": 19814.0,
-            "26": 21274.0,
-            "27": 22560.0,
-            "28": 23731.0,
-            "29": 23099.0,
-            "30": 19997.0,
-            "31": 17111.0,
-            "32": 22093.0,
-            "33": 23200.0,
-            "34": 21525.0,
-            "35": 21837.0,
-            "36": 21070.0,
-            "37": 22975.0,
-            "38": 22727.0,
-            "39": 22485.0,
-            "40": 23583.0,
-            "41": 24012.0,
-            "42": 23529.0,
-            "43": 22092.0,
-            "44": 21911.0,
-            "45": 21790.0,
-            "46": 23173.0,
-            "47": 25505.0,
-            "48": 25316.0,
-            "49": 25527.0,
-            "50": 28117.0
+            "1": 21178.0,
+            "2": 22023.0,
+            "3": 21493.0,
+            "4": 20828.0,
+            "5": 23582.0,
+            "6": 23840.0,
+            "7": 23550.0,
+            "8": 21610.0,
+            "9": 23248.0,
+            "10": 19304.0,
+            "11": 24910.0,
+            "12": 23702.0,
+            "13": 24588.0,
+            "14": 24472.0,
+            "15": 23176.0,
+            "16": 23697.0,
+            "17": 22332.0,
+            "18": 22582.0,
+            "19": 23719.0,
+            "20": 21645.0,
+            "21": 22569.0,
+            "22": 18958.0,
+            "23": 24913.0,
+            "24": 19841.0,
+            "25": 19603.0,
+            "26": 20956.0,
+            "27": 21910.0,
+            "28": 22800.0,
+            "29": 23034.0,
+            "30": 19835.0,
+            "31": 16741.0,
+            "32": 21568.0,
+            "33": 22528.0,
+            "34": 20835.0,
+            "35": 21537.0,
+            "36": 20799.0,
+            "37": 22659.0,
+            "38": 22295.0,
+            "39": 22312.0,
+            "40": 23527.0,
+            "41": 23499.0,
+            "42": 23508.0,
+            "43": 22005.0,
+            "44": 22299.0,
+            "45": 21821.0,
+            "46": 23581.0,
+            "47": 25114.0,
+            "48": 25779.0,
+            "49": 26047.0,
+            "50": 28321.0
         }
     },
     "mem-allocated-bytes": {
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 3117478912.0,
-            "2": 3117478912.0,
-            "3": 3117478912.0,
-            "4": 3117478912.0,
-            "5": 3117478912.0,
-            "6": 3117478912.0,
-            "7": 3117478912.0,
-            "8": 3117478912.0,
-            "9": 3117478912.0,
-            "10": 3117478912.0,
-            "11": 3117478912.0,
-            "12": 3117478912.0,
-            "13": 3117478912.0,
-            "14": 3117478912.0,
-            "15": 3117478912.0,
-            "16": 3117478912.0,
-            "17": 3117478912.0,
-            "18": 3117478912.0,
-            "19": 3117478912.0,
-            "20": 3117478912.0,
-            "21": 3117478912.0,
-            "22": 3117478912.0,
-            "23": 3117478912.0,
-            "24": 3117478912.0,
-            "25": 3117478912.0,
-            "26": 3117478912.0,
-            "27": 3117478912.0,
-            "28": 3117478912.0,
-            "29": 3117478912.0,
-            "30": 3117478912.0,
-            "31": 3117478912.0,
-            "32": 3117478912.0,
-            "33": 3117478912.0,
-            "34": 3117478912.0,
-            "35": 3117478912.0,
-            "36": 3117478912.0,
-            "37": 3117478912.0,
-            "38": 3117478912.0,
-            "39": 3117478912.0,
-            "40": 3117478912.0,
-            "41": 3117478912.0,
-            "42": 3117478912.0,
-            "43": 3117478912.0,
-            "44": 3117478912.0,
-            "45": 3117478912.0,
-            "46": 3117478912.0,
-            "47": 3117478912.0,
-            "48": 3117478912.0,
-            "49": 3117478912.0,
-            "50": 3117478912.0
+            "1": 3117479936.0,
+            "2": 3117479936.0,
+            "3": 3117479936.0,
+            "4": 3117479936.0,
+            "5": 3117479936.0,
+            "6": 3117479936.0,
+            "7": 3117479936.0,
+            "8": 3117479936.0,
+            "9": 3117479936.0,
+            "10": 3117479936.0,
+            "11": 3117479936.0,
+            "12": 3117479936.0,
+            "13": 3117479936.0,
+            "14": 3117479936.0,
+            "15": 3117479936.0,
+            "16": 3117479936.0,
+            "17": 3117479936.0,
+            "18": 3117479936.0,
+            "19": 3117479936.0,
+            "20": 3117479936.0,
+            "21": 3117479936.0,
+            "22": 3117479936.0,
+            "23": 3117479936.0,
+            "24": 3117479936.0,
+            "25": 3117479936.0,
+            "26": 3117479936.0,
+            "27": 3117479936.0,
+            "28": 3117479936.0,
+            "29": 3117479936.0,
+            "30": 3117479936.0,
+            "31": 3117479936.0,
+            "32": 3117479936.0,
+            "33": 3117479936.0,
+            "34": 3117479936.0,
+            "35": 3117479936.0,
+            "36": 3117479936.0,
+            "37": 3117479936.0,
+            "38": 3117479936.0,
+            "39": 3117479936.0,
+            "40": 3117479936.0,
+            "41": 3117479936.0,
+            "42": 3117479936.0,
+            "43": 3117479936.0,
+            "44": 3117479936.0,
+            "45": 3117479936.0,
+            "46": 3117479936.0,
+            "47": 3117479936.0,
+            "48": 3117479936.0,
+            "49": 3117479936.0,
+            "50": 3117479936.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -175,7 +175,7 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 9708208128.0,
+            "1": 9708472320.0,
             "2": 10145497088.0,
             "3": 10145497088.0,
             "4": 10145497088.0,
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 74.91474,
-            "2": 0.1754,
-            "3": 0.17452,
-            "4": 0.16679,
-            "5": 0.16348,
-            "6": 0.16445,
-            "7": 0.16736,
-            "8": 0.16603,
-            "9": 0.16532,
-            "10": 0.16307,
-            "11": 1.37857,
-            "12": 0.16928,
-            "13": 0.53834,
-            "14": 0.57224,
-            "15": 0.16953,
-            "16": 0.16333,
-            "17": 0.16457,
-            "18": 0.16634,
-            "19": 0.51067,
-            "20": 0.16795,
-            "21": 1.3646,
-            "22": 0.16877,
-            "23": 0.16233,
-            "24": 0.16456,
-            "25": 0.16106,
-            "26": 0.16403,
-            "27": 0.16543,
-            "28": 0.52927,
-            "29": 0.16526,
-            "30": 0.16671,
-            "31": 1.34815,
-            "32": 0.1712,
-            "33": 0.16615,
-            "34": 0.16654,
-            "35": 0.16776,
-            "36": 0.16433,
-            "37": 0.16743,
-            "38": 0.5814,
-            "39": 0.17894,
-            "40": 0.16539,
-            "41": 1.61892,
-            "42": 0.1694,
-            "43": 0.16828,
-            "44": 0.16546,
-            "45": 0.16549,
-            "46": 0.16556,
-            "47": 0.51526,
-            "48": 0.16791,
-            "49": 0.16886,
-            "50": 0.16634
+            "1": 23.71036,
+            "2": 0.9628,
+            "3": 0.15071,
+            "4": 0.14739,
+            "5": 0.14664,
+            "6": 0.14614,
+            "7": 0.53859,
+            "8": 0.14579,
+            "9": 0.14831,
+            "10": 0.14511,
+            "11": 2.01776,
+            "12": 0.1483,
+            "13": 0.14538,
+            "14": 0.14975,
+            "15": 0.1463,
+            "16": 0.14805,
+            "17": 0.14452,
+            "18": 0.14537,
+            "19": 0.14591,
+            "20": 0.14577,
+            "21": 1.30547,
+            "22": 0.14712,
+            "23": 0.14599,
+            "24": 0.14734,
+            "25": 0.14493,
+            "26": 0.14508,
+            "27": 0.14499,
+            "28": 0.14452,
+            "29": 0.14955,
+            "30": 0.14693,
+            "31": 1.30477,
+            "32": 0.14718,
+            "33": 0.14909,
+            "34": 0.14557,
+            "35": 0.14644,
+            "36": 0.14549,
+            "37": 0.1446,
+            "38": 0.14451,
+            "39": 0.14369,
+            "40": 0.14708,
+            "41": 1.26587,
+            "42": 0.14465,
+            "43": 0.14378,
+            "44": 0.14419,
+            "45": 0.145,
+            "46": 0.14555,
+            "47": 0.14429,
+            "48": 0.14312,
+            "49": 0.14355,
+            "50": 0.14357
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp1_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp1_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
index f9118a22780..c9a9f0c18e3 100644
--- a/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp1_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp1_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
@@ -4,56 +4,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 10.98115,
-            "2": 10.98342,
-            "3": 10.9794,
-            "4": 10.95853,
-            "5": 10.99622,
-            "6": 11.00371,
-            "7": 10.98299,
-            "8": 10.9748,
-            "9": 10.97742,
-            "10": 10.94806,
-            "11": 10.99306,
-            "12": 10.96672,
-            "13": 10.97199,
-            "14": 10.97915,
-            "15": 10.85402,
-            "16": 10.85122,
-            "17": 10.8089,
-            "18": 10.82572,
-            "19": 10.8081,
+            "1": 10.96115,
+            "2": 10.95442,
+            "3": 10.96815,
+            "4": 10.94185,
+            "5": 10.9912,
+            "6": 10.99106,
+            "7": 10.97905,
+            "8": 10.95656,
+            "9": 10.95286,
+            "10": 10.92841,
+            "11": 10.97363,
+            "12": 10.94886,
+            "13": 10.94986,
+            "14": 10.97176,
+            "15": 10.84445,
+            "16": 10.84452,
+            "17": 10.79535,
+            "18": 10.81592,
+            "19": 10.81097,
             "20": 10.61854,
-            "21": 10.56862,
-            "22": 10.31926,
-            "23": 10.59295,
-            "24": 10.3343,
-            "25": 10.23216,
-            "26": 10.34315,
-            "27": 10.34581,
-            "28": 10.3247,
-            "29": 10.336,
-            "30": 9.88877,
-            "31": 9.42992,
-            "32": 10.05572,
-            "33": 10.0459,
-            "34": 9.6042,
-            "35": 9.64743,
-            "36": 9.52544,
-            "37": 9.77085,
-            "38": 9.49252,
-            "39": 9.87217,
-            "40": 9.29929,
-            "41": 9.44531,
-            "42": 9.52839,
-            "43": 9.01499,
-            "44": 9.13044,
-            "45": 8.96478,
-            "46": 9.02875,
-            "47": 9.45483,
-            "48": 9.02282,
-            "49": 8.56615,
-            "50": 9.11114
+            "21": 10.56479,
+            "22": 10.32903,
+            "23": 10.59978,
+            "24": 10.33317,
+            "25": 10.24274,
+            "26": 10.34415,
+            "27": 10.36146,
+            "28": 10.33121,
+            "29": 10.33606,
+            "30": 9.9006,
+            "31": 9.44973,
+            "32": 10.06957,
+            "33": 10.05263,
+            "34": 9.6185,
+            "35": 9.67146,
+            "36": 9.55663,
+            "37": 9.78737,
+            "38": 9.51226,
+            "39": 9.89562,
+            "40": 9.32136,
+            "41": 9.4791,
+            "42": 9.54724,
+            "43": 9.02729,
+            "44": 9.14151,
+            "45": 8.97666,
+            "46": 9.04312,
+            "47": 9.46933,
+            "48": 9.03291,
+            "49": 8.57041,
+            "50": 9.10753
         }
     },
     "num-zeros": {
@@ -61,56 +61,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 21211.0,
-            "2": 22047.0,
-            "3": 20892.0,
-            "4": 20624.0,
-            "5": 23413.0,
-            "6": 23493.0,
-            "7": 22797.0,
-            "8": 21401.0,
-            "9": 22665.0,
-            "10": 19047.0,
-            "11": 24508.0,
-            "12": 23266.0,
-            "13": 24271.0,
-            "14": 24293.0,
-            "15": 22782.0,
-            "16": 23282.0,
-            "17": 21824.0,
-            "18": 22133.0,
-            "19": 23099.0,
-            "20": 21505.0,
-            "21": 22490.0,
-            "22": 18675.0,
-            "23": 23908.0,
-            "24": 19148.0,
-            "25": 19388.0,
-            "26": 20532.0,
-            "27": 21766.0,
-            "28": 22571.0,
-            "29": 22352.0,
-            "30": 19883.0,
-            "31": 16703.0,
-            "32": 21084.0,
-            "33": 22377.0,
-            "34": 20576.0,
-            "35": 21216.0,
-            "36": 20603.0,
-            "37": 22812.0,
-            "38": 22830.0,
-            "39": 22708.0,
-            "40": 23830.0,
-            "41": 24061.0,
-            "42": 24003.0,
-            "43": 22790.0,
-            "44": 22703.0,
-            "45": 22360.0,
-            "46": 23642.0,
-            "47": 25112.0,
-            "48": 26185.0,
-            "49": 26666.0,
-            "50": 27765.0
+            "1": 21029.0,
+            "2": 21803.0,
+            "3": 21275.0,
+            "4": 20805.0,
+            "5": 23472.0,
+            "6": 23688.0,
+            "7": 23309.0,
+            "8": 21741.0,
+            "9": 22953.0,
+            "10": 19428.0,
+            "11": 25064.0,
+            "12": 23241.0,
+            "13": 24401.0,
+            "14": 24395.0,
+            "15": 23105.0,
+            "16": 23184.0,
+            "17": 22324.0,
+            "18": 22329.0,
+            "19": 23437.0,
+            "20": 21598.0,
+            "21": 22282.0,
+            "22": 19179.0,
+            "23": 23924.0,
+            "24": 19443.0,
+            "25": 19373.0,
+            "26": 20512.0,
+            "27": 21690.0,
+            "28": 22966.0,
+            "29": 22479.0,
+            "30": 19763.0,
+            "31": 16744.0,
+            "32": 21292.0,
+            "33": 22372.0,
+            "34": 20944.0,
+            "35": 21307.0,
+            "36": 20663.0,
+            "37": 22966.0,
+            "38": 22211.0,
+            "39": 22255.0,
+            "40": 23551.0,
+            "41": 23324.0,
+            "42": 23154.0,
+            "43": 22670.0,
+            "44": 22525.0,
+            "45": 22718.0,
+            "46": 24166.0,
+            "47": 25201.0,
+            "48": 26254.0,
+            "49": 25694.0,
+            "50": 28114.0
         }
     },
     "mem-allocated-bytes": {
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 1917381632.0,
-            "2": 1917381632.0,
-            "3": 1917381632.0,
-            "4": 1917381632.0,
-            "5": 1917381632.0,
-            "6": 1917381632.0,
-            "7": 1917381632.0,
-            "8": 1917381632.0,
-            "9": 1917381632.0,
-            "10": 1917381632.0,
-            "11": 1917381632.0,
-            "12": 1917381632.0,
-            "13": 1917381632.0,
-            "14": 1917381632.0,
-            "15": 1917381632.0,
-            "16": 1917381632.0,
-            "17": 1917381632.0,
-            "18": 1917381632.0,
-            "19": 1917381632.0,
-            "20": 1917381632.0,
-            "21": 1917381632.0,
-            "22": 1917381632.0,
-            "23": 1917381632.0,
-            "24": 1917381632.0,
-            "25": 1917381632.0,
-            "26": 1917381632.0,
-            "27": 1917381632.0,
-            "28": 1917381632.0,
-            "29": 1917381632.0,
-            "30": 1917381632.0,
-            "31": 1917381632.0,
-            "32": 1917381632.0,
-            "33": 1917381632.0,
-            "34": 1917381632.0,
-            "35": 1917381632.0,
-            "36": 1917381632.0,
-            "37": 1917381632.0,
-            "38": 1917381632.0,
-            "39": 1917381632.0,
-            "40": 1917381632.0,
-            "41": 1917381632.0,
-            "42": 1917381632.0,
-            "43": 1917381632.0,
-            "44": 1917381632.0,
-            "45": 1917381632.0,
-            "46": 1917381632.0,
-            "47": 1917381632.0,
-            "48": 1917381632.0,
-            "49": 1917381632.0,
-            "50": 1917381632.0
+            "1": 1917382656.0,
+            "2": 1917382656.0,
+            "3": 1917382656.0,
+            "4": 1917382656.0,
+            "5": 1917382656.0,
+            "6": 1917382656.0,
+            "7": 1917382656.0,
+            "8": 1917382656.0,
+            "9": 1917382656.0,
+            "10": 1917382656.0,
+            "11": 1917382656.0,
+            "12": 1917382656.0,
+            "13": 1917382656.0,
+            "14": 1917382656.0,
+            "15": 1917382656.0,
+            "16": 1917382656.0,
+            "17": 1917382656.0,
+            "18": 1917382656.0,
+            "19": 1917382656.0,
+            "20": 1917382656.0,
+            "21": 1917382656.0,
+            "22": 1917382656.0,
+            "23": 1917382656.0,
+            "24": 1917382656.0,
+            "25": 1917382656.0,
+            "26": 1917382656.0,
+            "27": 1917382656.0,
+            "28": 1917382656.0,
+            "29": 1917382656.0,
+            "30": 1917382656.0,
+            "31": 1917382656.0,
+            "32": 1917382656.0,
+            "33": 1917382656.0,
+            "34": 1917382656.0,
+            "35": 1917382656.0,
+            "36": 1917382656.0,
+            "37": 1917382656.0,
+            "38": 1917382656.0,
+            "39": 1917382656.0,
+            "40": 1917382656.0,
+            "41": 1917382656.0,
+            "42": 1917382656.0,
+            "43": 1917382656.0,
+            "44": 1917382656.0,
+            "45": 1917382656.0,
+            "46": 1917382656.0,
+            "47": 1917382656.0,
+            "48": 1917382656.0,
+            "49": 1917382656.0,
+            "50": 1917382656.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 5502737408.0,
-            "2": 5907581952.0,
-            "3": 5907581952.0,
-            "4": 5907581952.0,
-            "5": 5907581952.0,
-            "6": 5907581952.0,
-            "7": 5907581952.0,
-            "8": 5907581952.0,
-            "9": 5907581952.0,
-            "10": 5907581952.0,
-            "11": 5907581952.0,
-            "12": 5907581952.0,
-            "13": 5907581952.0,
-            "14": 5907581952.0,
-            "15": 5907581952.0,
-            "16": 5907581952.0,
-            "17": 5907581952.0,
-            "18": 5907581952.0,
-            "19": 5907581952.0,
-            "20": 5907581952.0,
-            "21": 5907581952.0,
-            "22": 5907581952.0,
-            "23": 5907581952.0,
-            "24": 5907581952.0,
-            "25": 5907581952.0,
-            "26": 5907581952.0,
-            "27": 5907581952.0,
-            "28": 5907581952.0,
-            "29": 5907581952.0,
-            "30": 5907581952.0,
-            "31": 5907581952.0,
-            "32": 5907581952.0,
-            "33": 5907581952.0,
-            "34": 5907581952.0,
-            "35": 5907581952.0,
-            "36": 5907581952.0,
-            "37": 5907581952.0,
-            "38": 5907581952.0,
-            "39": 5907581952.0,
-            "40": 5907581952.0,
-            "41": 5907581952.0,
-            "42": 5907581952.0,
-            "43": 5907581952.0,
-            "44": 5907581952.0,
-            "45": 5907581952.0,
-            "46": 5907581952.0,
-            "47": 5907581952.0,
-            "48": 5907581952.0,
-            "49": 5907581952.0,
-            "50": 5907581952.0
+            "1": 5504180224.0,
+            "2": 5907845120.0,
+            "3": 5907845120.0,
+            "4": 5907845120.0,
+            "5": 5907845120.0,
+            "6": 5907845120.0,
+            "7": 5907845120.0,
+            "8": 5907845120.0,
+            "9": 5907845120.0,
+            "10": 5907845120.0,
+            "11": 5907845120.0,
+            "12": 5907845120.0,
+            "13": 5907845120.0,
+            "14": 5907845120.0,
+            "15": 5907845120.0,
+            "16": 5907845120.0,
+            "17": 5907845120.0,
+            "18": 5907845120.0,
+            "19": 5907845120.0,
+            "20": 5907845120.0,
+            "21": 5907845120.0,
+            "22": 5907845120.0,
+            "23": 5907845120.0,
+            "24": 5907845120.0,
+            "25": 5907845120.0,
+            "26": 5907845120.0,
+            "27": 5907845120.0,
+            "28": 5907845120.0,
+            "29": 5907845120.0,
+            "30": 5907845120.0,
+            "31": 5907845120.0,
+            "32": 5907845120.0,
+            "33": 5907845120.0,
+            "34": 5907845120.0,
+            "35": 5907845120.0,
+            "36": 5907845120.0,
+            "37": 5907845120.0,
+            "38": 5907845120.0,
+            "39": 5907845120.0,
+            "40": 5907845120.0,
+            "41": 5907845120.0,
+            "42": 5907845120.0,
+            "43": 5907845120.0,
+            "44": 5907845120.0,
+            "45": 5907845120.0,
+            "46": 5907845120.0,
+            "47": 5907845120.0,
+            "48": 5907845120.0,
+            "49": 5907845120.0,
+            "50": 5907845120.0
         }
     },
     "iteration-time": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 76.70816,
-            "2": 0.44479,
-            "3": 0.37638,
-            "4": 0.32493,
-            "5": 0.32865,
-            "6": 0.3221,
-            "7": 0.33027,
-            "8": 0.32627,
-            "9": 0.69409,
-            "10": 0.66689,
-            "11": 0.94476,
-            "12": 0.6757,
-            "13": 0.32571,
-            "14": 0.3194,
-            "15": 0.31954,
-            "16": 0.32142,
-            "17": 0.32144,
-            "18": 0.3188,
-            "19": 0.32023,
-            "20": 0.70348,
-            "21": 1.36061,
-            "22": 0.32306,
-            "23": 0.32129,
-            "24": 0.31927,
-            "25": 0.32503,
-            "26": 0.322,
-            "27": 0.31994,
-            "28": 0.32043,
-            "29": 0.31651,
-            "30": 0.31907,
-            "31": 1.31856,
-            "32": 0.32016,
-            "33": 0.31758,
-            "34": 0.31966,
-            "35": 0.31765,
-            "36": 0.31717,
-            "37": 0.3191,
-            "38": 0.31591,
-            "39": 0.3156,
-            "40": 0.31599,
-            "41": 0.90957,
-            "42": 0.32017,
-            "43": 0.31902,
-            "44": 0.32013,
-            "45": 0.32183,
-            "46": 0.31561,
-            "47": 0.31628,
-            "48": 0.31911,
-            "49": 0.31753,
-            "50": 0.31636
+            "1": 26.75792,
+            "2": 0.30494,
+            "3": 0.28789,
+            "4": 0.28506,
+            "5": 0.28809,
+            "6": 0.28382,
+            "7": 0.28771,
+            "8": 0.28452,
+            "9": 0.28435,
+            "10": 0.28347,
+            "11": 0.83806,
+            "12": 0.28353,
+            "13": 0.28316,
+            "14": 0.28187,
+            "15": 0.29083,
+            "16": 0.28487,
+            "17": 0.29825,
+            "18": 0.2809,
+            "19": 0.28761,
+            "20": 0.2836,
+            "21": 0.8563,
+            "22": 0.31557,
+            "23": 0.29574,
+            "24": 0.28275,
+            "25": 0.28216,
+            "26": 0.28209,
+            "27": 0.28247,
+            "28": 0.28433,
+            "29": 0.28471,
+            "30": 0.28186,
+            "31": 0.83551,
+            "32": 0.28363,
+            "33": 0.28327,
+            "34": 0.28256,
+            "35": 0.28367,
+            "36": 0.28263,
+            "37": 0.28149,
+            "38": 0.28362,
+            "39": 0.28319,
+            "40": 0.28289,
+            "41": 0.83483,
+            "42": 0.28322,
+            "43": 0.28246,
+            "44": 0.28238,
+            "45": 0.28223,
+            "46": 0.28104,
+            "47": 0.2861,
+            "48": 0.28269,
+            "49": 0.28433,
+            "50": 0.28632
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp4_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp4_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
index baf1fa52671..fbbb805b0df 100644
--- a/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp4_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp4_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
@@ -4,56 +4,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 10.98296,
-            "2": 10.98234,
-            "3": 10.98046,
-            "4": 10.96512,
-            "5": 10.99789,
-            "6": 11.00517,
-            "7": 10.98273,
-            "8": 10.97596,
-            "9": 10.9783,
-            "10": 10.9452,
-            "11": 10.99257,
-            "12": 10.96815,
-            "13": 10.9703,
-            "14": 10.98207,
-            "15": 10.85381,
-            "16": 10.85003,
-            "17": 10.80667,
-            "18": 10.82648,
-            "19": 10.81123,
-            "20": 10.62194,
-            "21": 10.56069,
-            "22": 10.32105,
-            "23": 10.59531,
-            "24": 10.32461,
-            "25": 10.23318,
-            "26": 10.33828,
-            "27": 10.34879,
-            "28": 10.32094,
-            "29": 10.33068,
-            "30": 9.8856,
-            "31": 9.42999,
-            "32": 10.05321,
-            "33": 10.0429,
-            "34": 9.6053,
-            "35": 9.64984,
-            "36": 9.52934,
-            "37": 9.76834,
-            "38": 9.48585,
-            "39": 9.87468,
-            "40": 9.30022,
-            "41": 9.44909,
-            "42": 9.52866,
-            "43": 9.01602,
-            "44": 9.12963,
-            "45": 8.96826,
-            "46": 9.03049,
-            "47": 9.45732,
-            "48": 9.02119,
-            "49": 8.56905,
-            "50": 9.10994
+            "1": 10.96474,
+            "2": 10.96158,
+            "3": 10.96811,
+            "4": 10.94673,
+            "5": 10.9862,
+            "6": 10.98821,
+            "7": 10.975,
+            "8": 10.95625,
+            "9": 10.95934,
+            "10": 10.92863,
+            "11": 10.97637,
+            "12": 10.95058,
+            "13": 10.95134,
+            "14": 10.98042,
+            "15": 10.85189,
+            "16": 10.84652,
+            "17": 10.80269,
+            "18": 10.81465,
+            "19": 10.80329,
+            "20": 10.61769,
+            "21": 10.56332,
+            "22": 10.327,
+            "23": 10.59443,
+            "24": 10.329,
+            "25": 10.23672,
+            "26": 10.34252,
+            "27": 10.3618,
+            "28": 10.33128,
+            "29": 10.33469,
+            "30": 9.9024,
+            "31": 9.44988,
+            "32": 10.06653,
+            "33": 10.04781,
+            "34": 9.619,
+            "35": 9.67714,
+            "36": 9.55042,
+            "37": 9.78904,
+            "38": 9.51089,
+            "39": 9.89036,
+            "40": 9.32367,
+            "41": 9.47992,
+            "42": 9.54708,
+            "43": 9.02808,
+            "44": 9.14479,
+            "45": 8.97643,
+            "46": 9.04145,
+            "47": 9.46744,
+            "48": 9.03259,
+            "49": 8.56923,
+            "50": 9.11023
         }
     },
     "num-zeros": {
@@ -61,56 +61,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 2992.0,
-            "2": 2911.0,
-            "3": 2981.0,
-            "4": 2784.0,
-            "5": 3153.0,
-            "6": 3292.0,
-            "7": 3123.0,
-            "8": 3104.0,
-            "9": 3123.0,
-            "10": 2796.0,
-            "11": 3497.0,
-            "12": 3305.0,
-            "13": 3271.0,
-            "14": 3414.0,
-            "15": 3082.0,
-            "16": 3257.0,
-            "17": 3088.0,
-            "18": 3113.0,
-            "19": 3283.0,
-            "20": 2980.0,
-            "21": 3045.0,
-            "22": 2623.0,
-            "23": 3281.0,
-            "24": 2774.0,
-            "25": 2745.0,
-            "26": 2827.0,
-            "27": 3106.0,
-            "28": 3227.0,
-            "29": 3118.0,
-            "30": 2695.0,
-            "31": 2326.0,
-            "32": 3058.0,
-            "33": 3138.0,
-            "34": 2755.0,
-            "35": 2931.0,
-            "36": 2947.0,
-            "37": 3169.0,
-            "38": 3016.0,
-            "39": 3187.0,
-            "40": 3076.0,
-            "41": 3043.0,
-            "42": 3245.0,
-            "43": 2813.0,
-            "44": 2934.0,
-            "45": 2868.0,
-            "46": 3015.0,
-            "47": 3294.0,
-            "48": 3327.0,
-            "49": 3253.0,
-            "50": 3403.0
+            "1": 3013.0,
+            "2": 3035.0,
+            "3": 2950.0,
+            "4": 2883.0,
+            "5": 3259.0,
+            "6": 3503.0,
+            "7": 3161.0,
+            "8": 2999.0,
+            "9": 3136.0,
+            "10": 2879.0,
+            "11": 3560.0,
+            "12": 3331.0,
+            "13": 3426.0,
+            "14": 3472.0,
+            "15": 3341.0,
+            "16": 3159.0,
+            "17": 3006.0,
+            "18": 3206.0,
+            "19": 3305.0,
+            "20": 3055.0,
+            "21": 3107.0,
+            "22": 2621.0,
+            "23": 3375.0,
+            "24": 2719.0,
+            "25": 2703.0,
+            "26": 2980.0,
+            "27": 2956.0,
+            "28": 3187.0,
+            "29": 3297.0,
+            "30": 2700.0,
+            "31": 2259.0,
+            "32": 3026.0,
+            "33": 3108.0,
+            "34": 2859.0,
+            "35": 2877.0,
+            "36": 2798.0,
+            "37": 2988.0,
+            "38": 3050.0,
+            "39": 3043.0,
+            "40": 3128.0,
+            "41": 2973.0,
+            "42": 3002.0,
+            "43": 2880.0,
+            "44": 2941.0,
+            "45": 2863.0,
+            "46": 3016.0,
+            "47": 3110.0,
+            "48": 3210.0,
+            "49": 3248.0,
+            "50": 3437.0
         }
     },
     "mem-allocated-bytes": {
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 1917251584.0,
-            "2": 1917251584.0,
-            "3": 1917251584.0,
-            "4": 1917251584.0,
-            "5": 1917251584.0,
-            "6": 1917251584.0,
-            "7": 1917251584.0,
-            "8": 1917251584.0,
-            "9": 1917251584.0,
-            "10": 1917251584.0,
-            "11": 1917251584.0,
-            "12": 1917251584.0,
-            "13": 1917251584.0,
-            "14": 1917251584.0,
-            "15": 1917251584.0,
-            "16": 1917251584.0,
-            "17": 1917251584.0,
-            "18": 1917251584.0,
-            "19": 1917251584.0,
-            "20": 1917251584.0,
-            "21": 1917251584.0,
-            "22": 1917251584.0,
-            "23": 1917251584.0,
-            "24": 1917251584.0,
-            "25": 1917251584.0,
-            "26": 1917251584.0,
-            "27": 1917251584.0,
-            "28": 1917251584.0,
-            "29": 1917251584.0,
-            "30": 1917251584.0,
-            "31": 1917251584.0,
-            "32": 1917251584.0,
-            "33": 1917251584.0,
-            "34": 1917251584.0,
-            "35": 1917251584.0,
-            "36": 1917251584.0,
-            "37": 1917251584.0,
-            "38": 1917251584.0,
-            "39": 1917251584.0,
-            "40": 1917251584.0,
-            "41": 1917251584.0,
-            "42": 1917251584.0,
-            "43": 1917251584.0,
-            "44": 1917251584.0,
-            "45": 1917251584.0,
-            "46": 1917251584.0,
-            "47": 1917251584.0,
-            "48": 1917251584.0,
-            "49": 1917251584.0,
-            "50": 1917251584.0
+            "1": 1917252608.0,
+            "2": 1917252608.0,
+            "3": 1917252608.0,
+            "4": 1917252608.0,
+            "5": 1917252608.0,
+            "6": 1917252608.0,
+            "7": 1917252608.0,
+            "8": 1917252608.0,
+            "9": 1917252608.0,
+            "10": 1917252608.0,
+            "11": 1917252608.0,
+            "12": 1917252608.0,
+            "13": 1917252608.0,
+            "14": 1917252608.0,
+            "15": 1917252608.0,
+            "16": 1917252608.0,
+            "17": 1917252608.0,
+            "18": 1917252608.0,
+            "19": 1917252608.0,
+            "20": 1917252608.0,
+            "21": 1917252608.0,
+            "22": 1917252608.0,
+            "23": 1917252608.0,
+            "24": 1917252608.0,
+            "25": 1917252608.0,
+            "26": 1917252608.0,
+            "27": 1917252608.0,
+            "28": 1917252608.0,
+            "29": 1917252608.0,
+            "30": 1917252608.0,
+            "31": 1917252608.0,
+            "32": 1917252608.0,
+            "33": 1917252608.0,
+            "34": 1917252608.0,
+            "35": 1917252608.0,
+            "36": 1917252608.0,
+            "37": 1917252608.0,
+            "38": 1917252608.0,
+            "39": 1917252608.0,
+            "40": 1917252608.0,
+            "41": 1917252608.0,
+            "42": 1917252608.0,
+            "43": 1917252608.0,
+            "44": 1917252608.0,
+            "45": 1917252608.0,
+            "46": 1917252608.0,
+            "47": 1917252608.0,
+            "48": 1917252608.0,
+            "49": 1917252608.0,
+            "50": 1917252608.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 2520653312.0,
-            "2": 2743788032.0,
-            "3": 2743788032.0,
-            "4": 2743788032.0,
-            "5": 2743788032.0,
-            "6": 2743788032.0,
-            "7": 2743788032.0,
-            "8": 2743788032.0,
-            "9": 2743788032.0,
-            "10": 2743788032.0,
-            "11": 2743788032.0,
-            "12": 2743788032.0,
-            "13": 2743788032.0,
-            "14": 2743788032.0,
-            "15": 2743788032.0,
-            "16": 2743788032.0,
-            "17": 2743788032.0,
-            "18": 2743788032.0,
-            "19": 2743788032.0,
-            "20": 2743788032.0,
-            "21": 2743788032.0,
-            "22": 2743788032.0,
-            "23": 2743788032.0,
-            "24": 2743788032.0,
-            "25": 2743788032.0,
-            "26": 2743788032.0,
-            "27": 2743788032.0,
-            "28": 2743788032.0,
-            "29": 2743788032.0,
-            "30": 2743788032.0,
-            "31": 2743788032.0,
-            "32": 2743788032.0,
-            "33": 2743788032.0,
-            "34": 2743788032.0,
-            "35": 2743788032.0,
-            "36": 2743788032.0,
-            "37": 2743788032.0,
-            "38": 2743788032.0,
-            "39": 2743788032.0,
-            "40": 2743788032.0,
-            "41": 2743788032.0,
-            "42": 2743788032.0,
-            "43": 2743788032.0,
-            "44": 2743788032.0,
-            "45": 2743788032.0,
-            "46": 2743788032.0,
-            "47": 2743788032.0,
-            "48": 2743788032.0,
-            "49": 2743788032.0,
-            "50": 2743788032.0
+            "1": 2520785408.0,
+            "2": 2743789056.0,
+            "3": 2743789056.0,
+            "4": 2743789056.0,
+            "5": 2743789056.0,
+            "6": 2743789056.0,
+            "7": 2743789056.0,
+            "8": 2743789056.0,
+            "9": 2743789056.0,
+            "10": 2743789056.0,
+            "11": 2743789056.0,
+            "12": 2743789056.0,
+            "13": 2743789056.0,
+            "14": 2743789056.0,
+            "15": 2743789056.0,
+            "16": 2743789056.0,
+            "17": 2743789056.0,
+            "18": 2743789056.0,
+            "19": 2743789056.0,
+            "20": 2743789056.0,
+            "21": 2743789056.0,
+            "22": 2743789056.0,
+            "23": 2743789056.0,
+            "24": 2743789056.0,
+            "25": 2743789056.0,
+            "26": 2743789056.0,
+            "27": 2743789056.0,
+            "28": 2743789056.0,
+            "29": 2743789056.0,
+            "30": 2743789056.0,
+            "31": 2743789056.0,
+            "32": 2743789056.0,
+            "33": 2743789056.0,
+            "34": 2743789056.0,
+            "35": 2743789056.0,
+            "36": 2743789056.0,
+            "37": 2743789056.0,
+            "38": 2743789056.0,
+            "39": 2743789056.0,
+            "40": 2743789056.0,
+            "41": 2743789056.0,
+            "42": 2743789056.0,
+            "43": 2743789056.0,
+            "44": 2743789056.0,
+            "45": 2743789056.0,
+            "46": 2743789056.0,
+            "47": 2743789056.0,
+            "48": 2743789056.0,
+            "49": 2743789056.0,
+            "50": 2743789056.0
         }
     },
     "iteration-time": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 92.52278,
-            "2": 1.52203,
-            "3": 1.50103,
-            "4": 1.51627,
-            "5": 1.49943,
-            "6": 1.61325,
-            "7": 1.5622,
-            "8": 1.50668,
-            "9": 1.50122,
-            "10": 1.50749,
-            "11": 2.12764,
-            "12": 1.51111,
-            "13": 1.50973,
-            "14": 1.51712,
-            "15": 1.50952,
-            "16": 1.51343,
-            "17": 1.50742,
-            "18": 1.52017,
-            "19": 1.50622,
-            "20": 1.51648,
-            "21": 2.13229,
-            "22": 1.50789,
-            "23": 1.52087,
-            "24": 1.50668,
-            "25": 1.51534,
-            "26": 1.5016,
-            "27": 1.50737,
-            "28": 1.49873,
-            "29": 1.50715,
-            "30": 1.49941,
-            "31": 2.11492,
-            "32": 1.50348,
-            "33": 1.50106,
-            "34": 1.50093,
-            "35": 1.50813,
-            "36": 1.4988,
-            "37": 1.49847,
-            "38": 1.49777,
-            "39": 1.49937,
-            "40": 1.50456,
-            "41": 2.11318,
-            "42": 1.50605,
-            "43": 1.50721,
-            "44": 1.51813,
-            "45": 1.50211,
-            "46": 1.51633,
-            "47": 1.5019,
-            "48": 1.52386,
-            "49": 1.49987,
-            "50": 1.50829
+            "1": 35.39303,
+            "2": 1.47947,
+            "3": 1.43465,
+            "4": 1.42746,
+            "5": 1.42319,
+            "6": 1.43258,
+            "7": 1.42845,
+            "8": 1.41781,
+            "9": 1.4151,
+            "10": 1.41191,
+            "11": 1.95875,
+            "12": 1.3933,
+            "13": 1.39849,
+            "14": 1.39794,
+            "15": 1.40724,
+            "16": 1.39365,
+            "17": 1.38797,
+            "18": 1.3881,
+            "19": 1.38756,
+            "20": 1.4026,
+            "21": 1.98432,
+            "22": 1.40772,
+            "23": 1.40655,
+            "24": 1.411,
+            "25": 1.40775,
+            "26": 1.41523,
+            "27": 1.40237,
+            "28": 1.43117,
+            "29": 1.43476,
+            "30": 1.42856,
+            "31": 2.00614,
+            "32": 1.41414,
+            "33": 1.41736,
+            "34": 1.40899,
+            "35": 1.43827,
+            "36": 1.43529,
+            "37": 1.40205,
+            "38": 1.39968,
+            "39": 1.39625,
+            "40": 1.41137,
+            "41": 1.95978,
+            "42": 1.4124,
+            "43": 1.42729,
+            "44": 1.41966,
+            "45": 1.41646,
+            "46": 1.41671,
+            "47": 1.3922,
+            "48": 1.39545,
+            "49": 1.383,
+            "50": 1.38147
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgx_a100.json
index acc70537006..fee855b0084 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgx_a100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgx_a100.json
@@ -2,91 +2,286 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 10.82721,
-            "5": 10.85697,
-            "10": 10.79166,
-            "15": 10.82555,
-            "20": 10.7225,
-            "25": 10.54453,
-            "30": 10.35773,
-            "35": 10.27098,
-            "40": 10.09715,
-            "45": 9.84113,
-            "50": 9.92414
+            "1": 10.82753,
+            "2": 10.84043,
+            "3": 10.82715,
+            "4": 10.81921,
+            "5": 10.85715,
+            "6": 10.86963,
+            "7": 10.85115,
+            "8": 10.84459,
+            "9": 10.85294,
+            "10": 10.79205,
+            "11": 10.86576,
+            "12": 10.87104,
+            "13": 10.87066,
+            "14": 10.8786,
+            "15": 10.82531,
+            "16": 10.81239,
+            "17": 10.77441,
+            "18": 10.81066,
+            "19": 10.79655,
+            "20": 10.72261,
+            "21": 10.69716,
+            "22": 10.55179,
+            "23": 10.70541,
+            "24": 10.59,
+            "25": 10.5444,
+            "26": 10.60019,
+            "27": 10.62037,
+            "28": 10.57394,
+            "29": 10.58621,
+            "30": 10.35743,
+            "31": 10.12236,
+            "32": 10.4699,
+            "33": 10.45701,
+            "34": 10.21542,
+            "35": 10.27175,
+            "36": 10.23575,
+            "37": 10.35238,
+            "38": 10.20563,
+            "39": 10.40098,
+            "40": 10.09712,
+            "41": 10.13849,
+            "42": 10.21817,
+            "43": 9.84392,
+            "44": 9.96202,
+            "45": 9.84103,
+            "46": 9.81937,
+            "47": 10.13889,
+            "48": 9.85138,
+            "49": 9.53556,
+            "50": 9.92467
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 4712.0,
-            "5": 5441.0,
-            "10": 4322.0,
-            "15": 5376.0,
-            "20": 4936.0,
-            "25": 4834.0,
-            "30": 5393.0,
-            "35": 5612.0,
-            "40": 5947.0,
-            "45": 5737.0,
-            "50": 6611.0
+            "1": 4603.0,
+            "2": 5083.0,
+            "3": 4785.0,
+            "4": 4984.0,
+            "5": 5363.0,
+            "6": 5526.0,
+            "7": 5186.0,
+            "8": 4832.0,
+            "9": 5266.0,
+            "10": 4277.0,
+            "11": 5578.0,
+            "12": 5167.0,
+            "13": 5542.0,
+            "14": 5534.0,
+            "15": 5159.0,
+            "16": 5362.0,
+            "17": 5218.0,
+            "18": 5139.0,
+            "19": 5256.0,
+            "20": 4828.0,
+            "21": 5250.0,
+            "22": 4751.0,
+            "23": 5581.0,
+            "24": 5143.0,
+            "25": 4818.0,
+            "26": 5119.0,
+            "27": 5303.0,
+            "28": 5695.0,
+            "29": 5950.0,
+            "30": 5442.0,
+            "31": 4846.0,
+            "32": 5628.0,
+            "33": 6184.0,
+            "34": 5101.0,
+            "35": 5705.0,
+            "36": 5638.0,
+            "37": 6355.0,
+            "38": 6140.0,
+            "39": 6610.0,
+            "40": 5946.0,
+            "41": 5935.0,
+            "42": 6405.0,
+            "43": 5917.0,
+            "44": 5830.0,
+            "45": 5791.0,
+            "46": 6026.0,
+            "47": 6456.0,
+            "48": 6440.0,
+            "49": 6174.0,
+            "50": 6644.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 1114775040.0,
-            "5": 1114770944.0,
-            "10": 1114772992.0,
-            "15": 1114774016.0,
-            "20": 1114772480.0,
-            "25": 1114770944.0,
-            "30": 1114770944.0,
-            "35": 1114775040.0,
-            "40": 1114774016.0,
-            "45": 1114772992.0,
-            "50": 1114773504.0
+            "1": 1116843520.0,
+            "2": 1116841984.0,
+            "3": 1116839936.0,
+            "4": 1116843008.0,
+            "5": 1116839424.0,
+            "6": 1116839936.0,
+            "7": 1116840960.0,
+            "8": 1116839936.0,
+            "9": 1116842496.0,
+            "10": 1116841472.0,
+            "11": 1116841984.0,
+            "12": 1116839936.0,
+            "13": 1116845056.0,
+            "14": 1116838912.0,
+            "15": 1116842496.0,
+            "16": 1116841472.0,
+            "17": 1116838912.0,
+            "18": 1116843520.0,
+            "19": 1116839936.0,
+            "20": 1116841472.0,
+            "21": 1116838912.0,
+            "22": 1116840448.0,
+            "23": 1116840448.0,
+            "24": 1116843520.0,
+            "25": 1116839424.0,
+            "26": 1116843008.0,
+            "27": 1116840960.0,
+            "28": 1116842496.0,
+            "29": 1116843008.0,
+            "30": 1116839936.0,
+            "31": 1116846080.0,
+            "32": 1116842496.0,
+            "33": 1116841472.0,
+            "34": 1116840960.0,
+            "35": 1116843520.0,
+            "36": 1116838912.0,
+            "37": 1116840448.0,
+            "38": 1116841472.0,
+            "39": 1116840448.0,
+            "40": 1116841984.0,
+            "41": 1116842496.0,
+            "42": 1116843520.0,
+            "43": 1116844032.0,
+            "44": 1116843008.0,
+            "45": 1116840960.0,
+            "46": 1116842496.0,
+            "47": 1116841984.0,
+            "48": 1116839424.0,
+            "49": 1116837376.0,
+            "50": 1116843008.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 1563141632.0,
-            "5": 2020767232.0,
-            "10": 2023552512.0,
-            "15": 2025326592.0,
-            "20": 2025326592.0,
-            "25": 2025326592.0,
-            "30": 2025326592.0,
-            "35": 2028347392.0,
-            "40": 2028347392.0,
-            "45": 2028347392.0,
-            "50": 2028347392.0
+            "1": 1562991104.0,
+            "2": 2022045696.0,
+            "3": 2022045696.0,
+            "4": 2023063552.0,
+            "5": 2023063552.0,
+            "6": 2023063552.0,
+            "7": 2023063552.0,
+            "8": 2023063552.0,
+            "9": 2023063552.0,
+            "10": 2025666048.0,
+            "11": 2025666048.0,
+            "12": 2025666048.0,
+            "13": 2027637760.0,
+            "14": 2027637760.0,
+            "15": 2027637760.0,
+            "16": 2027637760.0,
+            "17": 2027637760.0,
+            "18": 2027637760.0,
+            "19": 2027637760.0,
+            "20": 2027637760.0,
+            "21": 2027637760.0,
+            "22": 2027637760.0,
+            "23": 2027637760.0,
+            "24": 2027637760.0,
+            "25": 2027637760.0,
+            "26": 2027637760.0,
+            "27": 2027637760.0,
+            "28": 2027637760.0,
+            "29": 2027637760.0,
+            "30": 2027637760.0,
+            "31": 2029937664.0,
+            "32": 2029937664.0,
+            "33": 2029937664.0,
+            "34": 2029937664.0,
+            "35": 2029937664.0,
+            "36": 2029937664.0,
+            "37": 2029937664.0,
+            "38": 2029937664.0,
+            "39": 2029937664.0,
+            "40": 2029937664.0,
+            "41": 2029937664.0,
+            "42": 2029937664.0,
+            "43": 2029937664.0,
+            "44": 2029937664.0,
+            "45": 2029937664.0,
+            "46": 2029937664.0,
+            "47": 2029937664.0,
+            "48": 2029937664.0,
+            "49": 2029937664.0,
+            "50": 2029937664.0
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 10.56989,
-            "5": 0.34599,
-            "10": 0.34601,
-            "15": 0.34343,
-            "20": 0.34409,
-            "25": 0.34378,
-            "30": 0.34403,
-            "35": 0.34395,
-            "40": 0.34489,
-            "45": 0.34046,
-            "50": 0.34152
+            "1": 16.87326,
+            "2": 0.3522,
+            "3": 0.33665,
+            "4": 0.32376,
+            "5": 0.32134,
+            "6": 0.32089,
+            "7": 0.32,
+            "8": 0.32013,
+            "9": 0.32009,
+            "10": 0.32059,
+            "11": 0.31897,
+            "12": 0.31983,
+            "13": 0.32143,
+            "14": 0.32114,
+            "15": 0.32116,
+            "16": 0.32112,
+            "17": 0.32136,
+            "18": 0.32313,
+            "19": 0.32195,
+            "20": 0.32131,
+            "21": 0.32215,
+            "22": 0.32253,
+            "23": 0.32037,
+            "24": 0.32194,
+            "25": 0.32053,
+            "26": 0.72275,
+            "27": 0.32115,
+            "28": 0.32108,
+            "29": 0.32328,
+            "30": 0.32158,
+            "31": 0.32145,
+            "32": 0.32206,
+            "33": 0.32101,
+            "34": 0.32196,
+            "35": 0.32277,
+            "36": 0.32103,
+            "37": 0.32143,
+            "38": 0.32156,
+            "39": 0.32198,
+            "40": 0.32071,
+            "41": 0.32265,
+            "42": 0.32274,
+            "43": 0.32271,
+            "44": 0.32188,
+            "45": 0.32208,
+            "46": 0.32183,
+            "47": 0.32051,
+            "48": 0.3213,
+            "49": 0.32129,
+            "50": 0.31989
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgx_h100.json
index e4e01388a15..6a4f3459a2c 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgx_h100.json
@@ -4,56 +4,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 10.7999,
-            "2": 10.80046,
-            "3": 10.8089,
-            "4": 10.78245,
-            "5": 10.82504,
-            "6": 10.83657,
-            "7": 10.81628,
-            "8": 10.81184,
-            "9": 10.8108,
-            "10": 10.7742,
-            "11": 10.85482,
-            "12": 10.82663,
-            "13": 10.85131,
-            "14": 10.85461,
-            "15": 10.78253,
-            "16": 10.77375,
-            "17": 10.74989,
-            "18": 10.78346,
-            "19": 10.75877,
-            "20": 10.69982,
-            "21": 10.67287,
-            "22": 10.5142,
-            "23": 10.68053,
-            "24": 10.57164,
-            "25": 10.51814,
+            "1": 10.80012,
+            "2": 10.8005,
+            "3": 10.8088,
+            "4": 10.78235,
+            "5": 10.82515,
+            "6": 10.83624,
+            "7": 10.81603,
+            "8": 10.81186,
+            "9": 10.8109,
+            "10": 10.77384,
+            "11": 10.85522,
+            "12": 10.82691,
+            "13": 10.85113,
+            "14": 10.85524,
+            "15": 10.78245,
+            "16": 10.77327,
+            "17": 10.75069,
+            "18": 10.78345,
+            "19": 10.75897,
+            "20": 10.69992,
+            "21": 10.67228,
+            "22": 10.51407,
+            "23": 10.68079,
+            "24": 10.57159,
+            "25": 10.51796,
             "26": 10.57591,
-            "27": 10.59136,
-            "28": 10.55398,
-            "29": 10.57104,
-            "30": 10.36425,
-            "31": 10.10945,
-            "32": 10.45329,
-            "33": 10.43693,
-            "34": 10.20011,
-            "35": 10.25443,
-            "36": 10.23318,
-            "37": 10.3536,
-            "38": 10.20421,
-            "39": 10.3993,
-            "40": 10.10241,
-            "41": 10.12765,
-            "42": 10.21115,
-            "43": 9.83746,
-            "44": 9.96186,
-            "45": 9.84266,
-            "46": 9.80686,
-            "47": 10.14266,
-            "48": 9.86672,
-            "49": 9.53822,
-            "50": 9.92595
+            "27": 10.59187,
+            "28": 10.55352,
+            "29": 10.57123,
+            "30": 10.36507,
+            "31": 10.10867,
+            "32": 10.45411,
+            "33": 10.437,
+            "34": 10.20016,
+            "35": 10.25454,
+            "36": 10.23316,
+            "37": 10.35376,
+            "38": 10.20479,
+            "39": 10.39932,
+            "40": 10.10206,
+            "41": 10.12772,
+            "42": 10.2109,
+            "43": 9.83726,
+            "44": 9.96178,
+            "45": 9.84258,
+            "46": 9.80634,
+            "47": 10.14233,
+            "48": 9.86646,
+            "49": 9.53815,
+            "50": 9.92572
         }
     },
     "num-zeros": {
@@ -61,56 +61,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 4752.0,
-            "2": 5040.0,
-            "3": 5112.0,
-            "4": 5072.0,
-            "5": 5472.0,
-            "6": 5619.0,
-            "7": 5255.0,
-            "8": 5065.0,
-            "9": 5483.0,
-            "10": 4607.0,
-            "11": 5862.0,
-            "12": 5377.0,
-            "13": 5783.0,
-            "14": 5830.0,
-            "15": 5249.0,
-            "16": 5346.0,
-            "17": 5291.0,
-            "18": 5277.0,
-            "19": 5352.0,
-            "20": 4942.0,
-            "21": 5465.0,
-            "22": 4878.0,
-            "23": 5807.0,
-            "24": 5145.0,
-            "25": 4873.0,
-            "26": 5380.0,
-            "27": 5479.0,
-            "28": 5739.0,
-            "29": 5950.0,
-            "30": 5363.0,
-            "31": 4730.0,
-            "32": 5732.0,
-            "33": 5963.0,
-            "34": 5261.0,
-            "35": 5660.0,
-            "36": 5422.0,
-            "37": 6362.0,
-            "38": 6114.0,
-            "39": 6803.0,
-            "40": 5731.0,
-            "41": 5808.0,
-            "42": 6485.0,
-            "43": 5742.0,
-            "44": 5843.0,
-            "45": 5876.0,
-            "46": 6024.0,
-            "47": 6554.0,
-            "48": 6354.0,
-            "49": 6497.0,
-            "50": 6526.0
+            "1": 4754.0,
+            "2": 5059.0,
+            "3": 5119.0,
+            "4": 5063.0,
+            "5": 5547.0,
+            "6": 5513.0,
+            "7": 5119.0,
+            "8": 5021.0,
+            "9": 5280.0,
+            "10": 4401.0,
+            "11": 5996.0,
+            "12": 5401.0,
+            "13": 5775.0,
+            "14": 5673.0,
+            "15": 5182.0,
+            "16": 5401.0,
+            "17": 5223.0,
+            "18": 5195.0,
+            "19": 5312.0,
+            "20": 4783.0,
+            "21": 5332.0,
+            "22": 4858.0,
+            "23": 5752.0,
+            "24": 5114.0,
+            "25": 4946.0,
+            "26": 5370.0,
+            "27": 5291.0,
+            "28": 5771.0,
+            "29": 5900.0,
+            "30": 5276.0,
+            "31": 4814.0,
+            "32": 5760.0,
+            "33": 6010.0,
+            "34": 5199.0,
+            "35": 5583.0,
+            "36": 5494.0,
+            "37": 6408.0,
+            "38": 5931.0,
+            "39": 6618.0,
+            "40": 5910.0,
+            "41": 5851.0,
+            "42": 6294.0,
+            "43": 5754.0,
+            "44": 5656.0,
+            "45": 5874.0,
+            "46": 5925.0,
+            "47": 6568.0,
+            "48": 6429.0,
+            "49": 6436.0,
+            "50": 6468.0
         }
     },
     "mem-allocated-bytes": {
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 1144115200.0,
-            "2": 1144113152.0,
-            "3": 1144113664.0,
-            "4": 1144112640.0,
-            "5": 1144113664.0,
-            "6": 1144113152.0,
-            "7": 1144115200.0,
-            "8": 1144112640.0,
-            "9": 1144113152.0,
-            "10": 1144118272.0,
-            "11": 1144112640.0,
-            "12": 1144112128.0,
-            "13": 1144110592.0,
-            "14": 1144112640.0,
-            "15": 1144111616.0,
-            "16": 1144112640.0,
-            "17": 1144112128.0,
-            "18": 1144113152.0,
-            "19": 1144112640.0,
-            "20": 1144113664.0,
-            "21": 1144113152.0,
-            "22": 1144114176.0,
-            "23": 1144113664.0,
-            "24": 1144111616.0,
-            "25": 1144110592.0,
-            "26": 1144113664.0,
-            "27": 1144113664.0,
-            "28": 1144112128.0,
-            "29": 1144110080.0,
-            "30": 1144113152.0,
-            "31": 1144116224.0,
-            "32": 1144112128.0,
-            "33": 1144113152.0,
-            "34": 1144113664.0,
-            "35": 1144115712.0,
-            "36": 1144111616.0,
-            "37": 1144111104.0,
-            "38": 1144110592.0,
-            "39": 1144113664.0,
-            "40": 1144113664.0,
-            "41": 1144114176.0,
-            "42": 1144109056.0,
-            "43": 1144114176.0,
-            "44": 1144115200.0,
-            "45": 1144113152.0,
-            "46": 1144117760.0,
-            "47": 1144113152.0,
-            "48": 1144115712.0,
-            "49": 1144117760.0,
-            "50": 1144114176.0
+            "1": 1145163776.0,
+            "2": 1145163776.0,
+            "3": 1145163264.0,
+            "4": 1145162240.0,
+            "5": 1145163776.0,
+            "6": 1146211328.0,
+            "7": 1146213376.0,
+            "8": 1145162240.0,
+            "9": 1145162752.0,
+            "10": 1145167360.0,
+            "11": 1145162240.0,
+            "12": 1145162240.0,
+            "13": 1145161216.0,
+            "14": 1146210816.0,
+            "15": 1145160192.0,
+            "16": 1145162752.0,
+            "17": 1145161728.0,
+            "18": 1145162752.0,
+            "19": 1146210816.0,
+            "20": 1145163264.0,
+            "21": 1146211328.0,
+            "22": 1145163776.0,
+            "23": 1146212352.0,
+            "24": 1145161216.0,
+            "25": 1145160704.0,
+            "26": 1145164288.0,
+            "27": 1145163264.0,
+            "28": 1145161728.0,
+            "29": 1145159680.0,
+            "30": 1145162752.0,
+            "31": 1145165824.0,
+            "32": 1145163264.0,
+            "33": 1145162752.0,
+            "34": 1145163264.0,
+            "35": 1145165312.0,
+            "36": 1145161728.0,
+            "37": 1145160704.0,
+            "38": 1145160192.0,
+            "39": 1145162752.0,
+            "40": 1145163264.0,
+            "41": 1145163264.0,
+            "42": 1145159680.0,
+            "43": 1145164288.0,
+            "44": 1146213888.0,
+            "45": 1146211328.0,
+            "46": 1146215936.0,
+            "47": 1145162752.0,
+            "48": 1145165824.0,
+            "49": 1146216448.0,
+            "50": 1146212864.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 1593775104.0,
-            "2": 2049587200.0,
-            "3": 2050487808.0,
-            "4": 2050487808.0,
-            "5": 2050487808.0,
-            "6": 2051877376.0,
-            "7": 2052037632.0,
-            "8": 2052037632.0,
-            "9": 2053219840.0,
-            "10": 2055123968.0,
-            "11": 2055123968.0,
-            "12": 2055123968.0,
-            "13": 2055123968.0,
-            "14": 2055123968.0,
-            "15": 2055123968.0,
-            "16": 2055123968.0,
-            "17": 2055123968.0,
-            "18": 2055123968.0,
-            "19": 2055123968.0,
-            "20": 2055123968.0,
-            "21": 2055123968.0,
-            "22": 2055123968.0,
-            "23": 2055123968.0,
-            "24": 2055123968.0,
-            "25": 2055123968.0,
-            "26": 2055123968.0,
-            "27": 2055123968.0,
-            "28": 2055123968.0,
-            "29": 2055123968.0,
-            "30": 2055123968.0,
-            "31": 2055123968.0,
-            "32": 2055123968.0,
-            "33": 2055123968.0,
-            "34": 2055123968.0,
-            "35": 2055123968.0,
-            "36": 2055123968.0,
-            "37": 2055123968.0,
-            "38": 2055123968.0,
-            "39": 2055123968.0,
-            "40": 2055123968.0,
-            "41": 2055123968.0,
-            "42": 2055123968.0,
-            "43": 2055123968.0,
-            "44": 2055123968.0,
-            "45": 2055123968.0,
-            "46": 2055123968.0,
-            "47": 2055123968.0,
-            "48": 2055123968.0,
-            "49": 2055123968.0,
-            "50": 2055123968.0
+            "1": 1593583104.0,
+            "2": 2051629056.0,
+            "3": 2053139456.0,
+            "4": 2053139456.0,
+            "5": 2053139456.0,
+            "6": 2053992960.0,
+            "7": 2055479296.0,
+            "8": 2055479296.0,
+            "9": 2056268288.0,
+            "10": 2059108864.0,
+            "11": 2059108864.0,
+            "12": 2059108864.0,
+            "13": 2059108864.0,
+            "14": 2059108864.0,
+            "15": 2059108864.0,
+            "16": 2059108864.0,
+            "17": 2059108864.0,
+            "18": 2059108864.0,
+            "19": 2059108864.0,
+            "20": 2059108864.0,
+            "21": 2059108864.0,
+            "22": 2059108864.0,
+            "23": 2059108864.0,
+            "24": 2059108864.0,
+            "25": 2059108864.0,
+            "26": 2059108864.0,
+            "27": 2059108864.0,
+            "28": 2059108864.0,
+            "29": 2059108864.0,
+            "30": 2059108864.0,
+            "31": 2059108864.0,
+            "32": 2059108864.0,
+            "33": 2059108864.0,
+            "34": 2059108864.0,
+            "35": 2059108864.0,
+            "36": 2059108864.0,
+            "37": 2059108864.0,
+            "38": 2059108864.0,
+            "39": 2059108864.0,
+            "40": 2059108864.0,
+            "41": 2059108864.0,
+            "42": 2059108864.0,
+            "43": 2059108864.0,
+            "44": 2059108864.0,
+            "45": 2059108864.0,
+            "46": 2059108864.0,
+            "47": 2059108864.0,
+            "48": 2059108864.0,
+            "49": 2059108864.0,
+            "50": 2059108864.0
         }
     },
     "iteration-time": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 17.54696,
-            "2": 0.35381,
-            "3": 0.30805,
-            "4": 0.32999,
-            "5": 0.28074,
-            "6": 0.27713,
-            "7": 0.30692,
-            "8": 0.27076,
-            "9": 0.28178,
-            "10": 0.28798,
-            "11": 0.26657,
-            "12": 0.27288,
-            "13": 0.27118,
-            "14": 0.26505,
-            "15": 0.27307,
-            "16": 0.26745,
-            "17": 0.28092,
-            "18": 0.25951,
-            "19": 0.26123,
-            "20": 0.27117,
-            "21": 0.26705,
-            "22": 0.27657,
-            "23": 0.2785,
-            "24": 0.27138,
-            "25": 0.27542,
-            "26": 0.26549,
-            "27": 0.26436,
-            "28": 0.2817,
-            "29": 0.26002,
-            "30": 0.26437,
-            "31": 0.29073,
-            "32": 0.27239,
-            "33": 0.26215,
-            "34": 0.2748,
-            "35": 0.2623,
-            "36": 0.25929,
-            "37": 0.26086,
-            "38": 0.26996,
-            "39": 0.25721,
-            "40": 0.25938,
-            "41": 0.26959,
-            "42": 0.25657,
-            "43": 0.26426,
-            "44": 0.25689,
-            "45": 0.26206,
-            "46": 0.27753,
-            "47": 0.27998,
-            "48": 0.26838,
-            "49": 0.27354,
-            "50": 0.26097
+            "1": 34.53022,
+            "2": 0.38382,
+            "3": 0.30651,
+            "4": 0.31954,
+            "5": 0.26567,
+            "6": 0.25765,
+            "7": 0.2929,
+            "8": 0.25619,
+            "9": 0.258,
+            "10": 0.25636,
+            "11": 0.25532,
+            "12": 0.24287,
+            "13": 0.2492,
+            "14": 0.24147,
+            "15": 0.26466,
+            "16": 0.24525,
+            "17": 0.24874,
+            "18": 0.23153,
+            "19": 0.23145,
+            "20": 0.23938,
+            "21": 0.23145,
+            "22": 0.67309,
+            "23": 0.24419,
+            "24": 0.23267,
+            "25": 0.24476,
+            "26": 0.23424,
+            "27": 0.23306,
+            "28": 0.24797,
+            "29": 0.22898,
+            "30": 0.23089,
+            "31": 0.26141,
+            "32": 0.24406,
+            "33": 0.22981,
+            "34": 0.24305,
+            "35": 0.22955,
+            "36": 0.23411,
+            "37": 0.22923,
+            "38": 0.23544,
+            "39": 0.23275,
+            "40": 0.23602,
+            "41": 0.238,
+            "42": 0.23132,
+            "43": 0.23557,
+            "44": 0.22984,
+            "45": 0.22919,
+            "46": 0.27449,
+            "47": 0.24511,
+            "48": 0.25065,
+            "49": 0.24993,
+            "50": 0.24332
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_lts_dgx_a100.json
index 7ca7a077425..4bf1314508c 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_lts_dgx_a100.json
@@ -1 +1,287 @@
-{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.8277, "5": 10.85694, "10": 10.79218, "15": 10.82554, "20": 10.72266, "25": 10.54408, "30": 10.35702, "35": 10.27159, "40": 10.09693, "45": 9.84114, "50": 9.92408}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 4651.0, "5": 5442.0, "10": 4292.0, "15": 5228.0, "20": 4806.0, "25": 4844.0, "30": 5408.0, "35": 5653.0, "40": 5925.0, "45": 5632.0, "50": 6701.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1114236928.0, "5": 1114232832.0, "10": 1114234368.0, "15": 1114235904.0, "20": 1114234368.0, "25": 1114232832.0, "30": 1114233344.0, "35": 1114236928.0, "40": 1114235392.0, "45": 1114234880.0, "50": 1114236416.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1562494464.0, "5": 2020286464.0, "10": 2021971968.0, "15": 2023709184.0, "20": 2023709184.0, "25": 2023709184.0, "30": 2023709184.0, "35": 2028052992.0, "40": 2028052992.0, "45": 2028052992.0, "50": 2028052992.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 11.18372, "5": 0.31643, "10": 0.31694, "15": 0.31783, "20": 0.31908, "25": 0.31135, "30": 0.31816, "35": 0.31147, "40": 0.31529, "45": 0.31149, "50": 0.31277}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.82721,
+            "2": 10.84035,
+            "3": 10.82745,
+            "4": 10.81915,
+            "5": 10.85627,
+            "6": 10.86983,
+            "7": 10.85163,
+            "8": 10.84508,
+            "9": 10.85219,
+            "10": 10.7925,
+            "11": 10.86564,
+            "12": 10.87089,
+            "13": 10.87065,
+            "14": 10.87856,
+            "15": 10.82558,
+            "16": 10.81245,
+            "17": 10.77494,
+            "18": 10.81119,
+            "19": 10.79646,
+            "20": 10.72204,
+            "21": 10.69748,
+            "22": 10.55149,
+            "23": 10.70513,
+            "24": 10.59002,
+            "25": 10.54424,
+            "26": 10.60053,
+            "27": 10.61985,
+            "28": 10.57416,
+            "29": 10.58647,
+            "30": 10.35756,
+            "31": 10.12146,
+            "32": 10.47023,
+            "33": 10.45687,
+            "34": 10.21575,
+            "35": 10.27137,
+            "36": 10.23554,
+            "37": 10.35262,
+            "38": 10.20577,
+            "39": 10.40106,
+            "40": 10.09677,
+            "41": 10.13884,
+            "42": 10.21795,
+            "43": 9.84364,
+            "44": 9.96195,
+            "45": 9.84129,
+            "46": 9.81913,
+            "47": 10.13875,
+            "48": 9.85153,
+            "49": 9.53512,
+            "50": 9.92452
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 4775.0,
+            "2": 4861.0,
+            "3": 4764.0,
+            "4": 5023.0,
+            "5": 5439.0,
+            "6": 5522.0,
+            "7": 5124.0,
+            "8": 4666.0,
+            "9": 5272.0,
+            "10": 4186.0,
+            "11": 5466.0,
+            "12": 5281.0,
+            "13": 5569.0,
+            "14": 5501.0,
+            "15": 5233.0,
+            "16": 5322.0,
+            "17": 5097.0,
+            "18": 5014.0,
+            "19": 5234.0,
+            "20": 4733.0,
+            "21": 5325.0,
+            "22": 4809.0,
+            "23": 5533.0,
+            "24": 5061.0,
+            "25": 4818.0,
+            "26": 5216.0,
+            "27": 5208.0,
+            "28": 5826.0,
+            "29": 5732.0,
+            "30": 5492.0,
+            "31": 4787.0,
+            "32": 5647.0,
+            "33": 6102.0,
+            "34": 5313.0,
+            "35": 5706.0,
+            "36": 5649.0,
+            "37": 6405.0,
+            "38": 6181.0,
+            "39": 6630.0,
+            "40": 5800.0,
+            "41": 5960.0,
+            "42": 6310.0,
+            "43": 5877.0,
+            "44": 5751.0,
+            "45": 5902.0,
+            "46": 5952.0,
+            "47": 6536.0,
+            "48": 6332.0,
+            "49": 6179.0,
+            "50": 6632.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1115784704.0,
+            "2": 1115784192.0,
+            "3": 1115781120.0,
+            "4": 1115783680.0,
+            "5": 1115780608.0,
+            "6": 1115781120.0,
+            "7": 1115783168.0,
+            "8": 1115781120.0,
+            "9": 1115783680.0,
+            "10": 1115782656.0,
+            "11": 1115782656.0,
+            "12": 1115780608.0,
+            "13": 1115785728.0,
+            "14": 1115780608.0,
+            "15": 1115783680.0,
+            "16": 1115783680.0,
+            "17": 1115781120.0,
+            "18": 1115783680.0,
+            "19": 1115780096.0,
+            "20": 1115782144.0,
+            "21": 1115780096.0,
+            "22": 1115781632.0,
+            "23": 1115782656.0,
+            "24": 1115784192.0,
+            "25": 1115781632.0,
+            "26": 1115784192.0,
+            "27": 1115782144.0,
+            "28": 1115783680.0,
+            "29": 1115784192.0,
+            "30": 1115780608.0,
+            "31": 1115787264.0,
+            "32": 1115783168.0,
+            "33": 1115781632.0,
+            "34": 1115782144.0,
+            "35": 1115784704.0,
+            "36": 1115780096.0,
+            "37": 1115781632.0,
+            "38": 1115782656.0,
+            "39": 1115781120.0,
+            "40": 1115783168.0,
+            "41": 1115783680.0,
+            "42": 1115783680.0,
+            "43": 1115785216.0,
+            "44": 1115784192.0,
+            "45": 1115782144.0,
+            "46": 1115784192.0,
+            "47": 1115784192.0,
+            "48": 1115780608.0,
+            "49": 1115779072.0,
+            "50": 1115784704.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1563058688.0,
+            "2": 2022025216.0,
+            "3": 2022025216.0,
+            "4": 2022816256.0,
+            "5": 2022816256.0,
+            "6": 2022816256.0,
+            "7": 2022816256.0,
+            "8": 2022816256.0,
+            "9": 2022816256.0,
+            "10": 2025321984.0,
+            "11": 2025321984.0,
+            "12": 2025321984.0,
+            "13": 2028008960.0,
+            "14": 2028008960.0,
+            "15": 2028008960.0,
+            "16": 2028008960.0,
+            "17": 2028008960.0,
+            "18": 2028008960.0,
+            "19": 2028008960.0,
+            "20": 2028008960.0,
+            "21": 2028008960.0,
+            "22": 2028008960.0,
+            "23": 2028008960.0,
+            "24": 2028008960.0,
+            "25": 2028008960.0,
+            "26": 2028008960.0,
+            "27": 2028008960.0,
+            "28": 2028008960.0,
+            "29": 2028008960.0,
+            "30": 2028008960.0,
+            "31": 2030280704.0,
+            "32": 2030280704.0,
+            "33": 2030280704.0,
+            "34": 2030280704.0,
+            "35": 2030280704.0,
+            "36": 2030280704.0,
+            "37": 2030280704.0,
+            "38": 2030280704.0,
+            "39": 2030280704.0,
+            "40": 2030280704.0,
+            "41": 2030280704.0,
+            "42": 2030280704.0,
+            "43": 2030280704.0,
+            "44": 2030280704.0,
+            "45": 2030280704.0,
+            "46": 2030280704.0,
+            "47": 2030280704.0,
+            "48": 2030280704.0,
+            "49": 2030280704.0,
+            "50": 2030280704.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 23.51368,
+            "2": 0.372,
+            "3": 0.34151,
+            "4": 0.32901,
+            "5": 0.32625,
+            "6": 0.32542,
+            "7": 0.32567,
+            "8": 0.32532,
+            "9": 0.3246,
+            "10": 0.33277,
+            "11": 0.3347,
+            "12": 0.33248,
+            "13": 0.33305,
+            "14": 0.33419,
+            "15": 0.33226,
+            "16": 0.3359,
+            "17": 0.33203,
+            "18": 0.331,
+            "19": 0.3345,
+            "20": 0.3364,
+            "21": 0.334,
+            "22": 0.33335,
+            "23": 0.33273,
+            "24": 0.33251,
+            "25": 0.33104,
+            "26": 0.3322,
+            "27": 0.33082,
+            "28": 0.33107,
+            "29": 0.33275,
+            "30": 0.33104,
+            "31": 0.33073,
+            "32": 0.33192,
+            "33": 0.32966,
+            "34": 0.3315,
+            "35": 0.33271,
+            "36": 0.33633,
+            "37": 0.33246,
+            "38": 0.80821,
+            "39": 0.33259,
+            "40": 0.33171,
+            "41": 0.33156,
+            "42": 0.33428,
+            "43": 0.33263,
+            "44": 0.81732,
+            "45": 0.33782,
+            "46": 0.33165,
+            "47": 0.71569,
+            "48": 0.33327,
+            "49": 0.33588,
+            "50": 0.33196
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgx_a100.json
index 0999afd59a3..f6b0539891f 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgx_a100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgx_a100.json
@@ -1 +1,287 @@
-{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.82762, "5": 10.85681, "10": 10.79217, "15": 10.82534, "20": 10.72228, "25": 10.54483, "30": 10.35746, "35": 10.27126, "40": 10.09704, "45": 9.84116, "50": 9.92438}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 4699.0, "5": 5376.0, "10": 4507.0, "15": 5311.0, "20": 4809.0, "25": 4797.0, "30": 5353.0, "35": 5678.0, "40": 5904.0, "45": 5760.0, "50": 6526.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1114231296.0, "5": 1114228224.0, "10": 1114228224.0, "15": 1114230272.0, "20": 1114228224.0, "25": 1114228224.0, "30": 1114227200.0, "35": 1114231296.0, "40": 1114229760.0, "45": 1114228736.0, "50": 1114230784.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1562553856.0, "5": 2021133824.0, "10": 2022334976.0, "15": 2024271872.0, "20": 2024271872.0, "25": 2024820736.0, "30": 2024820736.0, "35": 2027709440.0, "40": 2027709440.0, "45": 2027709440.0, "50": 2027709440.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 13.72505, "5": 0.3325, "10": 0.33257, "15": 0.33093, "20": 0.33304, "25": 0.33508, "30": 0.37083, "35": 0.33207, "40": 0.3328, "45": 0.33149, "50": 0.3319}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.82753,
+            "2": 10.84043,
+            "3": 10.82696,
+            "4": 10.81906,
+            "5": 10.8571,
+            "6": 10.86999,
+            "7": 10.85125,
+            "8": 10.84501,
+            "9": 10.85265,
+            "10": 10.79211,
+            "11": 10.86575,
+            "12": 10.87117,
+            "13": 10.87051,
+            "14": 10.87901,
+            "15": 10.82536,
+            "16": 10.8123,
+            "17": 10.77452,
+            "18": 10.81079,
+            "19": 10.79696,
+            "20": 10.72249,
+            "21": 10.6974,
+            "22": 10.55098,
+            "23": 10.70558,
+            "24": 10.58965,
+            "25": 10.54401,
+            "26": 10.60019,
+            "27": 10.62042,
+            "28": 10.57421,
+            "29": 10.58618,
+            "30": 10.35747,
+            "31": 10.12177,
+            "32": 10.47023,
+            "33": 10.45691,
+            "34": 10.21589,
+            "35": 10.27151,
+            "36": 10.23536,
+            "37": 10.35281,
+            "38": 10.20581,
+            "39": 10.40112,
+            "40": 10.09709,
+            "41": 10.13842,
+            "42": 10.21786,
+            "43": 9.84412,
+            "44": 9.96175,
+            "45": 9.84106,
+            "46": 9.81952,
+            "47": 10.13903,
+            "48": 9.85138,
+            "49": 9.5357,
+            "50": 9.92441
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 4626.0,
+            "2": 4979.0,
+            "3": 4857.0,
+            "4": 4949.0,
+            "5": 5274.0,
+            "6": 5510.0,
+            "7": 5208.0,
+            "8": 4789.0,
+            "9": 5178.0,
+            "10": 4415.0,
+            "11": 5661.0,
+            "12": 5262.0,
+            "13": 5488.0,
+            "14": 5557.0,
+            "15": 5334.0,
+            "16": 5308.0,
+            "17": 5223.0,
+            "18": 5053.0,
+            "19": 5313.0,
+            "20": 4900.0,
+            "21": 5337.0,
+            "22": 4891.0,
+            "23": 5775.0,
+            "24": 5079.0,
+            "25": 4783.0,
+            "26": 5161.0,
+            "27": 5253.0,
+            "28": 5789.0,
+            "29": 5972.0,
+            "30": 5409.0,
+            "31": 4717.0,
+            "32": 5767.0,
+            "33": 6154.0,
+            "34": 5213.0,
+            "35": 5592.0,
+            "36": 5634.0,
+            "37": 6316.0,
+            "38": 6079.0,
+            "39": 6447.0,
+            "40": 6079.0,
+            "41": 5878.0,
+            "42": 6332.0,
+            "43": 5835.0,
+            "44": 5753.0,
+            "45": 5722.0,
+            "46": 6031.0,
+            "47": 6598.0,
+            "48": 6402.0,
+            "49": 6249.0,
+            "50": 6676.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1116843520.0,
+            "2": 1116841984.0,
+            "3": 1116839936.0,
+            "4": 1116843008.0,
+            "5": 1116839424.0,
+            "6": 1116838912.0,
+            "7": 1116840448.0,
+            "8": 1116839936.0,
+            "9": 1116842496.0,
+            "10": 1116841472.0,
+            "11": 1116840448.0,
+            "12": 1116840960.0,
+            "13": 1116845056.0,
+            "14": 1116839424.0,
+            "15": 1116842496.0,
+            "16": 1116841472.0,
+            "17": 1116839936.0,
+            "18": 1116841984.0,
+            "19": 1116838912.0,
+            "20": 1116841472.0,
+            "21": 1116839936.0,
+            "22": 1116840448.0,
+            "23": 1116840448.0,
+            "24": 1116844544.0,
+            "25": 1116840448.0,
+            "26": 1116843008.0,
+            "27": 1116840960.0,
+            "28": 1116841984.0,
+            "29": 1116843008.0,
+            "30": 1116839424.0,
+            "31": 1116846080.0,
+            "32": 1116842496.0,
+            "33": 1116840448.0,
+            "34": 1116840448.0,
+            "35": 1116843520.0,
+            "36": 1116838912.0,
+            "37": 1116840448.0,
+            "38": 1116841472.0,
+            "39": 1116839936.0,
+            "40": 1116841984.0,
+            "41": 1116843520.0,
+            "42": 1116843520.0,
+            "43": 1116844032.0,
+            "44": 1116843008.0,
+            "45": 1116840960.0,
+            "46": 1116842496.0,
+            "47": 1116841984.0,
+            "48": 1116839936.0,
+            "49": 1116837376.0,
+            "50": 1116844032.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1563050496.0,
+            "2": 2021967872.0,
+            "3": 2021967872.0,
+            "4": 2022971392.0,
+            "5": 2022971392.0,
+            "6": 2022971392.0,
+            "7": 2022971392.0,
+            "8": 2022971392.0,
+            "9": 2022971392.0,
+            "10": 2024804864.0,
+            "11": 2024804864.0,
+            "12": 2024804864.0,
+            "13": 2027590656.0,
+            "14": 2027590656.0,
+            "15": 2027590656.0,
+            "16": 2027590656.0,
+            "17": 2027590656.0,
+            "18": 2027590656.0,
+            "19": 2027590656.0,
+            "20": 2027590656.0,
+            "21": 2027590656.0,
+            "22": 2027590656.0,
+            "23": 2027590656.0,
+            "24": 2027590656.0,
+            "25": 2027590656.0,
+            "26": 2027590656.0,
+            "27": 2027590656.0,
+            "28": 2027590656.0,
+            "29": 2027590656.0,
+            "30": 2027590656.0,
+            "31": 2030131200.0,
+            "32": 2030131200.0,
+            "33": 2030131200.0,
+            "34": 2030131200.0,
+            "35": 2030131200.0,
+            "36": 2030131200.0,
+            "37": 2030131200.0,
+            "38": 2030131200.0,
+            "39": 2030131200.0,
+            "40": 2030131200.0,
+            "41": 2030131200.0,
+            "42": 2030131200.0,
+            "43": 2030131200.0,
+            "44": 2030131200.0,
+            "45": 2030131200.0,
+            "46": 2030131200.0,
+            "47": 2030131200.0,
+            "48": 2030131200.0,
+            "49": 2030131200.0,
+            "50": 2030131200.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 21.05476,
+            "2": 0.37335,
+            "3": 0.34228,
+            "4": 0.32445,
+            "5": 0.32484,
+            "6": 0.3249,
+            "7": 0.32488,
+            "8": 0.32585,
+            "9": 0.32395,
+            "10": 0.32465,
+            "11": 0.32197,
+            "12": 0.32169,
+            "13": 0.32213,
+            "14": 0.32236,
+            "15": 0.32344,
+            "16": 0.32418,
+            "17": 0.32357,
+            "18": 0.32327,
+            "19": 0.72477,
+            "20": 0.32351,
+            "21": 0.32286,
+            "22": 0.32395,
+            "23": 0.3238,
+            "24": 0.32345,
+            "25": 0.32441,
+            "26": 0.32375,
+            "27": 0.32444,
+            "28": 0.32394,
+            "29": 0.32438,
+            "30": 0.32386,
+            "31": 0.32381,
+            "32": 0.32332,
+            "33": 0.32386,
+            "34": 0.32457,
+            "35": 0.32337,
+            "36": 0.32334,
+            "37": 0.3239,
+            "38": 0.32451,
+            "39": 0.324,
+            "40": 0.32494,
+            "41": 0.324,
+            "42": 0.32347,
+            "43": 0.32398,
+            "44": 0.32338,
+            "45": 0.32336,
+            "46": 0.32329,
+            "47": 0.32358,
+            "48": 0.32344,
+            "49": 0.32289,
+            "50": 0.3206
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgx_h100.json
index d342471ff77..5b369a3137c 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgx_h100.json
@@ -4,56 +4,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 10.7999,
-            "2": 10.80046,
-            "3": 10.80877,
-            "4": 10.78226,
-            "5": 10.8254,
-            "6": 10.83596,
-            "7": 10.81676,
-            "8": 10.81163,
-            "9": 10.81106,
-            "10": 10.77366,
-            "11": 10.85495,
-            "12": 10.82711,
-            "13": 10.85109,
-            "14": 10.8546,
-            "15": 10.78267,
-            "16": 10.77358,
-            "17": 10.75036,
-            "18": 10.78319,
-            "19": 10.75876,
-            "20": 10.6992,
-            "21": 10.67244,
-            "22": 10.51382,
-            "23": 10.68112,
-            "24": 10.57174,
-            "25": 10.51756,
-            "26": 10.57624,
-            "27": 10.59185,
-            "28": 10.55401,
-            "29": 10.57113,
-            "30": 10.36465,
-            "31": 10.10866,
-            "32": 10.45338,
-            "33": 10.43764,
-            "34": 10.20033,
-            "35": 10.25433,
-            "36": 10.23362,
-            "37": 10.35369,
-            "38": 10.20443,
-            "39": 10.39917,
-            "40": 10.10245,
-            "41": 10.12765,
-            "42": 10.21106,
-            "43": 9.83722,
-            "44": 9.962,
-            "45": 9.84252,
-            "46": 9.80612,
-            "47": 10.14257,
-            "48": 9.86665,
-            "49": 9.5383,
-            "50": 9.92576
+            "1": 10.80012,
+            "2": 10.8005,
+            "3": 10.80883,
+            "4": 10.78232,
+            "5": 10.82514,
+            "6": 10.83649,
+            "7": 10.8162,
+            "8": 10.81195,
+            "9": 10.8108,
+            "10": 10.77412,
+            "11": 10.85566,
+            "12": 10.82707,
+            "13": 10.85141,
+            "14": 10.85446,
+            "15": 10.78278,
+            "16": 10.77366,
+            "17": 10.7506,
+            "18": 10.78381,
+            "19": 10.7589,
+            "20": 10.7001,
+            "21": 10.67278,
+            "22": 10.51434,
+            "23": 10.68074,
+            "24": 10.57171,
+            "25": 10.518,
+            "26": 10.57588,
+            "27": 10.59157,
+            "28": 10.55337,
+            "29": 10.57061,
+            "30": 10.36462,
+            "31": 10.10867,
+            "32": 10.45325,
+            "33": 10.43728,
+            "34": 10.20006,
+            "35": 10.25436,
+            "36": 10.23332,
+            "37": 10.35373,
+            "38": 10.20421,
+            "39": 10.39913,
+            "40": 10.10214,
+            "41": 10.12724,
+            "42": 10.21139,
+            "43": 9.83735,
+            "44": 9.96179,
+            "45": 9.8429,
+            "46": 9.80656,
+            "47": 10.14235,
+            "48": 9.86669,
+            "49": 9.53809,
+            "50": 9.92544
         }
     },
     "num-zeros": {
@@ -61,56 +61,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 4827.0,
-            "2": 4935.0,
-            "3": 5030.0,
-            "4": 4956.0,
-            "5": 5583.0,
-            "6": 5594.0,
-            "7": 5325.0,
-            "8": 5098.0,
-            "9": 5335.0,
-            "10": 4581.0,
-            "11": 5895.0,
-            "12": 5249.0,
-            "13": 5692.0,
-            "14": 5736.0,
-            "15": 5303.0,
-            "16": 5347.0,
-            "17": 5361.0,
-            "18": 5322.0,
-            "19": 5407.0,
-            "20": 4961.0,
-            "21": 5441.0,
-            "22": 4776.0,
-            "23": 5752.0,
-            "24": 5157.0,
-            "25": 4897.0,
-            "26": 5202.0,
-            "27": 5455.0,
-            "28": 5769.0,
-            "29": 5911.0,
-            "30": 5256.0,
-            "31": 4674.0,
-            "32": 5854.0,
-            "33": 6080.0,
-            "34": 5278.0,
-            "35": 5743.0,
-            "36": 5523.0,
-            "37": 6477.0,
-            "38": 5839.0,
-            "39": 6711.0,
-            "40": 5852.0,
-            "41": 6062.0,
-            "42": 6501.0,
-            "43": 5605.0,
-            "44": 5883.0,
-            "45": 5763.0,
-            "46": 6076.0,
-            "47": 6613.0,
-            "48": 6348.0,
-            "49": 6430.0,
-            "50": 6699.0
+            "1": 4916.0,
+            "2": 4954.0,
+            "3": 5054.0,
+            "4": 5108.0,
+            "5": 5499.0,
+            "6": 5705.0,
+            "7": 5188.0,
+            "8": 4899.0,
+            "9": 5442.0,
+            "10": 4498.0,
+            "11": 5894.0,
+            "12": 5279.0,
+            "13": 5766.0,
+            "14": 5633.0,
+            "15": 5168.0,
+            "16": 5358.0,
+            "17": 5399.0,
+            "18": 5305.0,
+            "19": 5131.0,
+            "20": 4905.0,
+            "21": 5355.0,
+            "22": 4916.0,
+            "23": 5674.0,
+            "24": 5034.0,
+            "25": 4922.0,
+            "26": 5355.0,
+            "27": 5424.0,
+            "28": 5771.0,
+            "29": 6052.0,
+            "30": 5386.0,
+            "31": 4773.0,
+            "32": 5773.0,
+            "33": 6105.0,
+            "34": 5287.0,
+            "35": 5623.0,
+            "36": 5502.0,
+            "37": 6266.0,
+            "38": 6005.0,
+            "39": 6727.0,
+            "40": 5810.0,
+            "41": 5898.0,
+            "42": 6417.0,
+            "43": 5774.0,
+            "44": 5812.0,
+            "45": 5768.0,
+            "46": 5884.0,
+            "47": 6481.0,
+            "48": 6435.0,
+            "49": 6461.0,
+            "50": 6489.0
         }
     },
     "mem-allocated-bytes": {
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 1145716736.0,
-            "2": 1145714688.0,
-            "3": 1145715200.0,
-            "4": 1145714176.0,
-            "5": 1146210816.0,
-            "6": 1146210304.0,
-            "7": 1145716736.0,
-            "8": 1146209792.0,
-            "9": 1145714688.0,
-            "10": 1146214912.0,
-            "11": 1145714176.0,
-            "12": 1145713664.0,
-            "13": 1145712128.0,
-            "14": 1146209280.0,
-            "15": 1145713152.0,
-            "16": 1146210304.0,
-            "17": 1145713664.0,
-            "18": 1146210304.0,
-            "19": 1145714176.0,
-            "20": 1145715200.0,
-            "21": 1146210304.0,
-            "22": 1145715712.0,
-            "23": 1145716224.0,
-            "24": 1145713152.0,
-            "25": 1145712128.0,
-            "26": 1145715200.0,
-            "27": 1146210304.0,
-            "28": 1145713664.0,
-            "29": 1145711104.0,
-            "30": 1145714688.0,
-            "31": 1146213376.0,
-            "32": 1145713152.0,
-            "33": 1145714688.0,
-            "34": 1145714688.0,
-            "35": 1146213376.0,
-            "36": 1145713664.0,
-            "37": 1145712128.0,
-            "38": 1146207744.0,
-            "39": 1145715200.0,
-            "40": 1146210816.0,
-            "41": 1145714688.0,
-            "42": 1145711104.0,
-            "43": 1146211840.0,
-            "44": 1145717248.0,
-            "45": 1145714688.0,
-            "46": 1146214400.0,
-            "47": 1145714688.0,
-            "48": 1145717248.0,
-            "49": 1146214912.0,
-            "50": 1145716224.0
+            "1": 1145163776.0,
+            "2": 1146163200.0,
+            "3": 1145163264.0,
+            "4": 1145162240.0,
+            "5": 1145163264.0,
+            "6": 1145163264.0,
+            "7": 1146213376.0,
+            "8": 1146210816.0,
+            "9": 1146211328.0,
+            "10": 1145167360.0,
+            "11": 1145162240.0,
+            "12": 1145161728.0,
+            "13": 1145161216.0,
+            "14": 1145161728.0,
+            "15": 1145161216.0,
+            "16": 1145162752.0,
+            "17": 1145882624.0,
+            "18": 1145162752.0,
+            "19": 1145162240.0,
+            "20": 1145163264.0,
+            "21": 1145162752.0,
+            "22": 1145163776.0,
+            "23": 1146212352.0,
+            "24": 1145161216.0,
+            "25": 1145160704.0,
+            "26": 1145164288.0,
+            "27": 1146212352.0,
+            "28": 1145161728.0,
+            "29": 1145159680.0,
+            "30": 1145162752.0,
+            "31": 1145165824.0,
+            "32": 1145162240.0,
+            "33": 1145162752.0,
+            "34": 1145163264.0,
+            "35": 1146213888.0,
+            "36": 1145161728.0,
+            "37": 1145160192.0,
+            "38": 1146208768.0,
+            "39": 1146211840.0,
+            "40": 1146211328.0,
+            "41": 1145163264.0,
+            "42": 1145160704.0,
+            "43": 1145164288.0,
+            "44": 1146213376.0,
+            "45": 1146211328.0,
+            "46": 1146215424.0,
+            "47": 1145162752.0,
+            "48": 1145165312.0,
+            "49": 1146216448.0,
+            "50": 1145164288.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 1593766912.0,
-            "2": 2051463168.0,
-            "3": 2052584960.0,
-            "4": 2052584960.0,
-            "5": 2052584960.0,
-            "6": 2053404160.0,
-            "7": 2054199296.0,
-            "8": 2054199296.0,
-            "9": 2056971776.0,
-            "10": 2057138688.0,
-            "11": 2057138688.0,
-            "12": 2057138688.0,
-            "13": 2057138688.0,
-            "14": 2057138688.0,
-            "15": 2057138688.0,
-            "16": 2057138688.0,
-            "17": 2057138688.0,
-            "18": 2057138688.0,
-            "19": 2057138688.0,
-            "20": 2057138688.0,
-            "21": 2057138688.0,
-            "22": 2057138688.0,
-            "23": 2057138688.0,
-            "24": 2057138688.0,
-            "25": 2057138688.0,
-            "26": 2057138688.0,
-            "27": 2057138688.0,
-            "28": 2057138688.0,
-            "29": 2057138688.0,
-            "30": 2057138688.0,
-            "31": 2057138688.0,
-            "32": 2057138688.0,
-            "33": 2057138688.0,
-            "34": 2057138688.0,
-            "35": 2057138688.0,
-            "36": 2057138688.0,
-            "37": 2057138688.0,
-            "38": 2057138688.0,
-            "39": 2057138688.0,
-            "40": 2057138688.0,
-            "41": 2057138688.0,
-            "42": 2057138688.0,
-            "43": 2057138688.0,
-            "44": 2057138688.0,
-            "45": 2057138688.0,
-            "46": 2057138688.0,
-            "47": 2057138688.0,
-            "48": 2057138688.0,
-            "49": 2057138688.0,
-            "50": 2057138688.0
+            "1": 1593583104.0,
+            "2": 2051818496.0,
+            "3": 2053099520.0,
+            "4": 2053099520.0,
+            "5": 2053099520.0,
+            "6": 2054166016.0,
+            "7": 2055368704.0,
+            "8": 2055444992.0,
+            "9": 2056095232.0,
+            "10": 2057353728.0,
+            "11": 2057353728.0,
+            "12": 2057353728.0,
+            "13": 2057353728.0,
+            "14": 2057353728.0,
+            "15": 2057353728.0,
+            "16": 2057353728.0,
+            "17": 2057353728.0,
+            "18": 2057353728.0,
+            "19": 2057353728.0,
+            "20": 2057353728.0,
+            "21": 2057353728.0,
+            "22": 2057353728.0,
+            "23": 2057353728.0,
+            "24": 2057353728.0,
+            "25": 2057353728.0,
+            "26": 2057353728.0,
+            "27": 2057353728.0,
+            "28": 2057353728.0,
+            "29": 2057353728.0,
+            "30": 2057353728.0,
+            "31": 2057353728.0,
+            "32": 2057353728.0,
+            "33": 2057353728.0,
+            "34": 2057353728.0,
+            "35": 2057353728.0,
+            "36": 2057353728.0,
+            "37": 2057353728.0,
+            "38": 2057353728.0,
+            "39": 2057353728.0,
+            "40": 2057353728.0,
+            "41": 2057353728.0,
+            "42": 2057353728.0,
+            "43": 2057353728.0,
+            "44": 2057353728.0,
+            "45": 2057353728.0,
+            "46": 2057353728.0,
+            "47": 2057353728.0,
+            "48": 2057353728.0,
+            "49": 2057353728.0,
+            "50": 2057353728.0
         }
     },
     "iteration-time": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 17.99317,
-            "2": 0.35408,
-            "3": 0.30455,
-            "4": 0.32631,
-            "5": 0.27174,
-            "6": 0.27168,
-            "7": 0.29847,
-            "8": 0.27152,
-            "9": 0.27606,
-            "10": 0.27991,
-            "11": 0.25875,
-            "12": 0.25854,
-            "13": 0.26351,
-            "14": 0.2599,
-            "15": 0.26827,
-            "16": 0.25734,
-            "17": 0.26876,
-            "18": 0.26302,
-            "19": 0.25791,
-            "20": 0.26587,
-            "21": 0.26207,
-            "22": 0.2718,
-            "23": 0.27036,
-            "24": 0.2557,
-            "25": 0.27098,
-            "26": 0.2562,
-            "27": 0.25663,
-            "28": 0.28209,
-            "29": 0.25678,
-            "30": 0.26198,
-            "31": 0.27896,
-            "32": 0.26879,
-            "33": 0.25449,
-            "34": 0.27377,
-            "35": 0.25725,
-            "36": 0.25349,
-            "37": 0.2537,
-            "38": 0.26246,
-            "39": 0.25527,
-            "40": 0.25676,
-            "41": 0.26427,
-            "42": 0.25718,
-            "43": 0.26206,
-            "44": 0.25615,
-            "45": 0.261,
-            "46": 0.28413,
-            "47": 0.27633,
-            "48": 0.26455,
-            "49": 0.2706,
-            "50": 0.25944
+            "1": 36.30862,
+            "2": 0.33719,
+            "3": 0.28216,
+            "4": 0.2843,
+            "5": 0.23756,
+            "6": 0.23639,
+            "7": 0.27014,
+            "8": 0.24101,
+            "9": 0.24066,
+            "10": 0.25135,
+            "11": 0.2342,
+            "12": 0.22722,
+            "13": 0.23279,
+            "14": 0.22714,
+            "15": 0.24041,
+            "16": 0.22689,
+            "17": 0.23762,
+            "18": 0.22666,
+            "19": 0.2282,
+            "20": 0.22795,
+            "21": 0.2341,
+            "22": 0.65676,
+            "23": 0.24009,
+            "24": 0.22741,
+            "25": 0.23512,
+            "26": 0.22626,
+            "27": 0.22751,
+            "28": 0.246,
+            "29": 0.22763,
+            "30": 0.23076,
+            "31": 0.25299,
+            "32": 0.23341,
+            "33": 0.22812,
+            "34": 0.24223,
+            "35": 0.23465,
+            "36": 0.22594,
+            "37": 0.22774,
+            "38": 0.23179,
+            "39": 0.22535,
+            "40": 0.22597,
+            "41": 0.23473,
+            "42": 0.2254,
+            "43": 0.23446,
+            "44": 0.22767,
+            "45": 0.23442,
+            "46": 0.25088,
+            "47": 0.24058,
+            "48": 0.23646,
+            "49": 0.24323,
+            "50": 0.23136
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_lts_dgx_a100.json
index 4383c914d8e..03cdcbebfb1 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_lts_dgx_a100.json
@@ -4,56 +4,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 10.8277,
-            "2": 10.84068,
-            "3": 10.82705,
+            "1": 10.82721,
+            "2": 10.84035,
+            "3": 10.82733,
             "4": 10.81913,
-            "5": 10.85673,
-            "6": 10.86984,
-            "7": 10.85119,
-            "8": 10.84465,
-            "9": 10.85269,
-            "10": 10.79157,
-            "11": 10.86571,
-            "12": 10.87169,
-            "13": 10.8708,
-            "14": 10.8787,
-            "15": 10.82554,
-            "16": 10.81251,
-            "17": 10.77478,
-            "18": 10.81068,
-            "19": 10.79632,
-            "20": 10.72175,
-            "21": 10.69765,
-            "22": 10.55138,
-            "23": 10.70555,
-            "24": 10.59005,
-            "25": 10.54425,
-            "26": 10.60036,
-            "27": 10.61973,
-            "28": 10.57442,
-            "29": 10.58656,
-            "30": 10.35754,
-            "31": 10.12169,
-            "32": 10.46987,
-            "33": 10.45722,
-            "34": 10.2158,
-            "35": 10.27086,
-            "36": 10.2354,
-            "37": 10.35246,
-            "38": 10.20574,
-            "39": 10.40061,
-            "40": 10.09681,
-            "41": 10.13869,
-            "42": 10.21829,
-            "43": 9.84428,
-            "44": 9.9614,
-            "45": 9.84116,
-            "46": 9.81955,
-            "47": 10.13927,
-            "48": 9.85138,
-            "49": 9.53518,
-            "50": 9.92455
+            "5": 10.85669,
+            "6": 10.86992,
+            "7": 10.85145,
+            "8": 10.84454,
+            "9": 10.85217,
+            "10": 10.79203,
+            "11": 10.86556,
+            "12": 10.87068,
+            "13": 10.87092,
+            "14": 10.87861,
+            "15": 10.82588,
+            "16": 10.81198,
+            "17": 10.77469,
+            "18": 10.81081,
+            "19": 10.79685,
+            "20": 10.72214,
+            "21": 10.69749,
+            "22": 10.55117,
+            "23": 10.70533,
+            "24": 10.59031,
+            "25": 10.54454,
+            "26": 10.60011,
+            "27": 10.62053,
+            "28": 10.57401,
+            "29": 10.58652,
+            "30": 10.35738,
+            "31": 10.12167,
+            "32": 10.46986,
+            "33": 10.45718,
+            "34": 10.21579,
+            "35": 10.27137,
+            "36": 10.23516,
+            "37": 10.35226,
+            "38": 10.20647,
+            "39": 10.40076,
+            "40": 10.09694,
+            "41": 10.13882,
+            "42": 10.21793,
+            "43": 9.844,
+            "44": 9.96176,
+            "45": 9.84078,
+            "46": 9.81922,
+            "47": 10.13915,
+            "48": 9.85114,
+            "49": 9.53525,
+            "50": 9.92432
         }
     },
     "num-zeros": {
@@ -61,56 +61,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 4627.0,
-            "2": 4785.0,
-            "3": 4887.0,
-            "4": 5134.0,
-            "5": 5403.0,
-            "6": 5457.0,
-            "7": 5140.0,
-            "8": 4876.0,
-            "9": 5213.0,
-            "10": 4396.0,
-            "11": 5749.0,
-            "12": 5182.0,
-            "13": 5436.0,
-            "14": 5431.0,
-            "15": 5327.0,
-            "16": 5452.0,
-            "17": 5245.0,
-            "18": 5116.0,
-            "19": 5216.0,
-            "20": 4869.0,
-            "21": 5326.0,
-            "22": 4832.0,
-            "23": 5719.0,
-            "24": 5017.0,
-            "25": 4980.0,
-            "26": 5288.0,
-            "27": 5346.0,
-            "28": 5727.0,
-            "29": 5937.0,
-            "30": 5289.0,
-            "31": 4777.0,
-            "32": 5616.0,
-            "33": 6137.0,
-            "34": 5140.0,
-            "35": 5690.0,
-            "36": 5739.0,
-            "37": 6425.0,
-            "38": 5962.0,
-            "39": 6620.0,
-            "40": 5921.0,
-            "41": 5820.0,
-            "42": 6472.0,
-            "43": 5860.0,
-            "44": 5731.0,
-            "45": 5769.0,
-            "46": 6130.0,
-            "47": 6576.0,
-            "48": 6403.0,
-            "49": 6084.0,
-            "50": 6648.0
+            "1": 4672.0,
+            "2": 4867.0,
+            "3": 4956.0,
+            "4": 4946.0,
+            "5": 5421.0,
+            "6": 5554.0,
+            "7": 5128.0,
+            "8": 4852.0,
+            "9": 5281.0,
+            "10": 4254.0,
+            "11": 5524.0,
+            "12": 5140.0,
+            "13": 5533.0,
+            "14": 5553.0,
+            "15": 5130.0,
+            "16": 5322.0,
+            "17": 5214.0,
+            "18": 5146.0,
+            "19": 5276.0,
+            "20": 4803.0,
+            "21": 5286.0,
+            "22": 4882.0,
+            "23": 5710.0,
+            "24": 4925.0,
+            "25": 4732.0,
+            "26": 5191.0,
+            "27": 5286.0,
+            "28": 5771.0,
+            "29": 5891.0,
+            "30": 5411.0,
+            "31": 4721.0,
+            "32": 5606.0,
+            "33": 6002.0,
+            "34": 5137.0,
+            "35": 5602.0,
+            "36": 5708.0,
+            "37": 6467.0,
+            "38": 6089.0,
+            "39": 6746.0,
+            "40": 6058.0,
+            "41": 5845.0,
+            "42": 6342.0,
+            "43": 6034.0,
+            "44": 5828.0,
+            "45": 5758.0,
+            "46": 5886.0,
+            "47": 6555.0,
+            "48": 6437.0,
+            "49": 6286.0,
+            "50": 6602.0
         }
     },
     "mem-allocated-bytes": {
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 1115810816.0,
-            "2": 1115809280.0,
-            "3": 1115807232.0,
-            "4": 1115809792.0,
-            "5": 1115806720.0,
-            "6": 1115807232.0,
-            "7": 1115808768.0,
-            "8": 1115807744.0,
-            "9": 1115809792.0,
-            "10": 1115808768.0,
-            "11": 1115808768.0,
-            "12": 1115808256.0,
-            "13": 1115811840.0,
-            "14": 1115807232.0,
-            "15": 1115809792.0,
-            "16": 1115808768.0,
-            "17": 1115806720.0,
-            "18": 1115809792.0,
-            "19": 1115806208.0,
-            "20": 1115808256.0,
-            "21": 1115806208.0,
-            "22": 1115807744.0,
-            "23": 1115807744.0,
-            "24": 1115810304.0,
-            "25": 1115807744.0,
-            "26": 1115810304.0,
-            "27": 1115808256.0,
-            "28": 1115809280.0,
-            "29": 1115810304.0,
-            "30": 1115806720.0,
-            "31": 1115813376.0,
-            "32": 1115809792.0,
-            "33": 1115807744.0,
-            "34": 1115808256.0,
-            "35": 1115810816.0,
-            "36": 1115806208.0,
-            "37": 1115807744.0,
-            "38": 1115809792.0,
-            "39": 1115807232.0,
-            "40": 1115809792.0,
-            "41": 1115810816.0,
-            "42": 1115810816.0,
-            "43": 1115811328.0,
-            "44": 1115809792.0,
-            "45": 1115808768.0,
-            "46": 1115810304.0,
-            "47": 1115808256.0,
-            "48": 1115806208.0,
-            "49": 1115805184.0,
-            "50": 1115811328.0
+            "1": 1116852736.0,
+            "2": 1116852224.0,
+            "3": 1116850176.0,
+            "4": 1116851712.0,
+            "5": 1116848640.0,
+            "6": 1116849152.0,
+            "7": 1116851200.0,
+            "8": 1116849152.0,
+            "9": 1116851712.0,
+            "10": 1116850176.0,
+            "11": 1116849664.0,
+            "12": 1116849152.0,
+            "13": 1116854784.0,
+            "14": 1116848640.0,
+            "15": 1116851712.0,
+            "16": 1116849664.0,
+            "17": 1116848640.0,
+            "18": 1116851200.0,
+            "19": 1116848128.0,
+            "20": 1116850688.0,
+            "21": 1116850176.0,
+            "22": 1116849664.0,
+            "23": 1116849664.0,
+            "24": 1116852224.0,
+            "25": 1116848640.0,
+            "26": 1116852224.0,
+            "27": 1116850176.0,
+            "28": 1116851712.0,
+            "29": 1116852224.0,
+            "30": 1116848640.0,
+            "31": 1116855296.0,
+            "32": 1116851200.0,
+            "33": 1116848640.0,
+            "34": 1116850176.0,
+            "35": 1116852736.0,
+            "36": 1116848128.0,
+            "37": 1116849664.0,
+            "38": 1116850688.0,
+            "39": 1116849664.0,
+            "40": 1116851200.0,
+            "41": 1116851712.0,
+            "42": 1116851712.0,
+            "43": 1116852224.0,
+            "44": 1116851712.0,
+            "45": 1116851200.0,
+            "46": 1116851712.0,
+            "47": 1116850176.0,
+            "48": 1116848128.0,
+            "49": 1116846080.0,
+            "50": 1116852736.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 1562923008.0,
-            "2": 2021974528.0,
-            "3": 2021974528.0,
-            "4": 2023057408.0,
-            "5": 2023057408.0,
-            "6": 2023057408.0,
-            "7": 2023057408.0,
-            "8": 2023057408.0,
-            "9": 2023057408.0,
-            "10": 2026853376.0,
-            "11": 2026853376.0,
-            "12": 2026853376.0,
-            "13": 2026853376.0,
-            "14": 2026853376.0,
-            "15": 2026853376.0,
-            "16": 2026853376.0,
-            "17": 2026853376.0,
-            "18": 2026853376.0,
-            "19": 2026853376.0,
-            "20": 2026853376.0,
-            "21": 2026964992.0,
-            "22": 2026964992.0,
-            "23": 2026964992.0,
-            "24": 2026964992.0,
-            "25": 2026964992.0,
-            "26": 2026964992.0,
-            "27": 2026964992.0,
-            "28": 2026964992.0,
-            "29": 2026964992.0,
-            "30": 2026964992.0,
-            "31": 2030492160.0,
-            "32": 2030492160.0,
-            "33": 2030492160.0,
-            "34": 2030492160.0,
-            "35": 2030492160.0,
-            "36": 2030492160.0,
-            "37": 2030492160.0,
-            "38": 2030492160.0,
-            "39": 2030492160.0,
-            "40": 2030492160.0,
-            "41": 2030492160.0,
-            "42": 2030492160.0,
-            "43": 2030492160.0,
-            "44": 2030492160.0,
-            "45": 2030492160.0,
-            "46": 2030492160.0,
-            "47": 2030492160.0,
-            "48": 2030492160.0,
-            "49": 2030492160.0,
-            "50": 2030492160.0
+            "1": 1563067904.0,
+            "2": 2022025216.0,
+            "3": 2022025216.0,
+            "4": 2023037440.0,
+            "5": 2023037440.0,
+            "6": 2023037440.0,
+            "7": 2023037440.0,
+            "8": 2023037440.0,
+            "9": 2023037440.0,
+            "10": 2025690112.0,
+            "11": 2025690112.0,
+            "12": 2025690112.0,
+            "13": 2027666944.0,
+            "14": 2027666944.0,
+            "15": 2027666944.0,
+            "16": 2027666944.0,
+            "17": 2027666944.0,
+            "18": 2027666944.0,
+            "19": 2027666944.0,
+            "20": 2027666944.0,
+            "21": 2027666944.0,
+            "22": 2027666944.0,
+            "23": 2027666944.0,
+            "24": 2027666944.0,
+            "25": 2027666944.0,
+            "26": 2027666944.0,
+            "27": 2027666944.0,
+            "28": 2027666944.0,
+            "29": 2027666944.0,
+            "30": 2027666944.0,
+            "31": 2030213120.0,
+            "32": 2030213120.0,
+            "33": 2030213120.0,
+            "34": 2030213120.0,
+            "35": 2030213120.0,
+            "36": 2030213120.0,
+            "37": 2030213120.0,
+            "38": 2030213120.0,
+            "39": 2030213120.0,
+            "40": 2030213120.0,
+            "41": 2030213120.0,
+            "42": 2030213120.0,
+            "43": 2030213120.0,
+            "44": 2030213120.0,
+            "45": 2030213120.0,
+            "46": 2030213120.0,
+            "47": 2030213120.0,
+            "48": 2030213120.0,
+            "49": 2030213120.0,
+            "50": 2030213120.0
         }
     },
     "iteration-time": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 18.3953,
-            "2": 0.37892,
-            "3": 0.34007,
-            "4": 0.3355,
-            "5": 0.33186,
-            "6": 0.33483,
-            "7": 0.3277,
-            "8": 0.32755,
-            "9": 0.32791,
-            "10": 0.32415,
-            "11": 0.32272,
-            "12": 0.32392,
-            "13": 0.33508,
-            "14": 0.31609,
-            "15": 0.31941,
-            "16": 0.3178,
-            "17": 0.31692,
-            "18": 0.31834,
-            "19": 0.32074,
-            "20": 0.31765,
-            "21": 0.31933,
-            "22": 0.32169,
-            "23": 0.32073,
-            "24": 0.31872,
-            "25": 0.32305,
-            "26": 0.32018,
-            "27": 0.32077,
-            "28": 0.32022,
-            "29": 0.31612,
-            "30": 0.31263,
-            "31": 0.31663,
-            "32": 0.31415,
-            "33": 0.31634,
-            "34": 0.31559,
-            "35": 0.31239,
-            "36": 0.31218,
-            "37": 0.31427,
-            "38": 0.31433,
-            "39": 0.31314,
-            "40": 0.313,
-            "41": 0.31331,
-            "42": 0.31314,
-            "43": 0.31359,
-            "44": 0.31884,
-            "45": 0.31165,
-            "46": 0.31278,
-            "47": 0.31273,
-            "48": 0.31668,
-            "49": 0.31177,
-            "50": 0.31472
+            "1": 17.84226,
+            "2": 0.49333,
+            "3": 0.35144,
+            "4": 0.35051,
+            "5": 0.33127,
+            "6": 0.33097,
+            "7": 0.33432,
+            "8": 0.33416,
+            "9": 0.33201,
+            "10": 0.33094,
+            "11": 0.33097,
+            "12": 0.3311,
+            "13": 0.33011,
+            "14": 0.32873,
+            "15": 0.32954,
+            "16": 0.3303,
+            "17": 0.33003,
+            "18": 0.32863,
+            "19": 0.32894,
+            "20": 0.32985,
+            "21": 0.32984,
+            "22": 0.32894,
+            "23": 0.33018,
+            "24": 0.32858,
+            "25": 0.32803,
+            "26": 0.32972,
+            "27": 0.32892,
+            "28": 0.32933,
+            "29": 0.3335,
+            "30": 0.32858,
+            "31": 0.3292,
+            "32": 0.32984,
+            "33": 0.32969,
+            "34": 0.32922,
+            "35": 0.33031,
+            "36": 0.32829,
+            "37": 0.32934,
+            "38": 0.77677,
+            "39": 0.32893,
+            "40": 0.32703,
+            "41": 0.32692,
+            "42": 0.32603,
+            "43": 0.32676,
+            "44": 0.80704,
+            "45": 0.32903,
+            "46": 0.32781,
+            "47": 0.70671,
+            "48": 0.32916,
+            "49": 0.3289,
+            "50": 0.32584
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer/golden_values_dev_dgx_h100.json
index d869313b50f..8f055dc00d7 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer/golden_values_dev_dgx_h100.json
@@ -6,104 +6,104 @@
         "values": {
             "1": 10.81565,
             "2": 10.81048,
-            "3": 10.8127,
-            "4": 10.79089,
-            "5": 10.83784,
-            "6": 10.85116,
-            "7": 10.82036,
-            "8": 10.82117,
-            "9": 10.83043,
-            "10": 10.78955,
-            "11": 10.86357,
-            "12": 10.84268,
-            "13": 10.85799,
-            "14": 10.86268,
-            "15": 10.80594,
-            "16": 10.80356,
-            "17": 10.77851,
-            "18": 10.80762,
-            "19": 10.79465,
-            "20": 10.747,
-            "21": 10.72249,
-            "22": 10.58742,
-            "23": 10.72933,
-            "24": 10.63238,
-            "25": 10.575,
-            "26": 10.638,
-            "27": 10.64966,
-            "28": 10.63496,
-            "29": 10.64307,
-            "30": 10.44635,
-            "31": 10.19441,
-            "32": 10.52449,
-            "33": 10.51815,
-            "34": 10.28843,
-            "35": 10.33138,
-            "36": 10.3123,
-            "37": 10.4265,
-            "38": 10.27866,
-            "39": 10.47612,
-            "40": 10.19821,
-            "41": 10.21536,
-            "42": 10.28769,
-            "43": 9.94235,
-            "44": 10.05775,
-            "45": 9.94354,
-            "46": 9.90902,
-            "47": 10.21214,
-            "48": 9.94982,
-            "49": 9.63605,
-            "50": 10.00335,
-            "51": 9.92304,
-            "52": 9.82779,
-            "53": 10.14656,
-            "54": 10.04338,
-            "55": 9.96311,
-            "56": 9.70508,
-            "57": 9.58542,
-            "58": 9.91687,
-            "59": 9.66061,
-            "60": 9.60393,
-            "61": 9.77855,
-            "62": 10.0624,
-            "63": 9.47205,
-            "64": 9.85428,
-            "65": 9.02467,
-            "66": 9.79454,
-            "67": 9.43333,
-            "68": 9.85327,
-            "69": 9.847,
-            "70": 9.81072,
-            "71": 9.684,
-            "72": 9.66023,
-            "73": 9.57314,
-            "74": 9.05973,
-            "75": 9.50551,
-            "76": 9.17942,
-            "77": 10.12761,
-            "78": 9.77438,
-            "79": 9.44209,
-            "80": 9.46747,
-            "81": 9.53873,
-            "82": 9.75725,
-            "83": 9.38702,
-            "84": 9.46662,
-            "85": 9.67918,
-            "86": 9.13556,
-            "87": 9.63426,
-            "88": 9.80794,
-            "89": 9.67925,
-            "90": 9.85561,
-            "91": 9.41267,
-            "92": 9.41773,
-            "93": 9.15396,
-            "94": 8.90227,
-            "95": 9.56526,
-            "96": 9.58425,
-            "97": 9.35836,
-            "98": 9.7302,
-            "99": 8.95917,
-            "100": 9.45408
+            "3": 10.81274,
+            "4": 10.79109,
+            "5": 10.838,
+            "6": 10.84998,
+            "7": 10.8209,
+            "8": 10.821,
+            "9": 10.83092,
+            "10": 10.78949,
+            "11": 10.86351,
+            "12": 10.84299,
+            "13": 10.85677,
+            "14": 10.86241,
+            "15": 10.8062,
+            "16": 10.80347,
+            "17": 10.77927,
+            "18": 10.80722,
+            "19": 10.79448,
+            "20": 10.74689,
+            "21": 10.72163,
+            "22": 10.58676,
+            "23": 10.72952,
+            "24": 10.63218,
+            "25": 10.57522,
+            "26": 10.63797,
+            "27": 10.64969,
+            "28": 10.63484,
+            "29": 10.64318,
+            "30": 10.44633,
+            "31": 10.19408,
+            "32": 10.5239,
+            "33": 10.51833,
+            "34": 10.28815,
+            "35": 10.33158,
+            "36": 10.31281,
+            "37": 10.42627,
+            "38": 10.27886,
+            "39": 10.47564,
+            "40": 10.19805,
+            "41": 10.21579,
+            "42": 10.28687,
+            "43": 9.942,
+            "44": 10.05731,
+            "45": 9.94351,
+            "46": 9.9088,
+            "47": 10.21222,
+            "48": 9.94969,
+            "49": 9.63645,
+            "50": 10.0035,
+            "51": 9.92297,
+            "52": 9.82832,
+            "53": 10.14635,
+            "54": 10.04348,
+            "55": 9.96283,
+            "56": 9.70531,
+            "57": 9.58566,
+            "58": 9.91703,
+            "59": 9.66041,
+            "60": 9.60398,
+            "61": 9.77842,
+            "62": 10.06249,
+            "63": 9.47211,
+            "64": 9.85381,
+            "65": 9.02443,
+            "66": 9.794,
+            "67": 9.43339,
+            "68": 9.85345,
+            "69": 9.84704,
+            "70": 9.81023,
+            "71": 9.68396,
+            "72": 9.66038,
+            "73": 9.57331,
+            "74": 9.06008,
+            "75": 9.50505,
+            "76": 9.17917,
+            "77": 10.12748,
+            "78": 9.77465,
+            "79": 9.44204,
+            "80": 9.46777,
+            "81": 9.53832,
+            "82": 9.75735,
+            "83": 9.38708,
+            "84": 9.46663,
+            "85": 9.67908,
+            "86": 9.13575,
+            "87": 9.6347,
+            "88": 9.80851,
+            "89": 9.67935,
+            "90": 9.85541,
+            "91": 9.4128,
+            "92": 9.41772,
+            "93": 9.15363,
+            "94": 8.90205,
+            "95": 9.56516,
+            "96": 9.58409,
+            "97": 9.35837,
+            "98": 9.72999,
+            "99": 8.95859,
+            "100": 9.45369
         }
     },
     "num-zeros": {
@@ -111,106 +111,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 30991.0,
-            "2": 32927.0,
-            "3": 33481.0,
-            "4": 30866.0,
-            "5": 36255.0,
-            "6": 37186.0,
-            "7": 35644.0,
-            "8": 31356.0,
-            "9": 34832.0,
-            "10": 29855.0,
-            "11": 38396.0,
-            "12": 35164.0,
-            "13": 37118.0,
-            "14": 38011.0,
-            "15": 34458.0,
-            "16": 35843.0,
-            "17": 34836.0,
-            "18": 35149.0,
-            "19": 36044.0,
-            "20": 32823.0,
-            "21": 33340.0,
-            "22": 30040.0,
-            "23": 37733.0,
-            "24": 31992.0,
-            "25": 31045.0,
-            "26": 34280.0,
-            "27": 36064.0,
-            "28": 36993.0,
-            "29": 38087.0,
-            "30": 32689.0,
-            "31": 30361.0,
-            "32": 36050.0,
-            "33": 37627.0,
-            "34": 33149.0,
-            "35": 34316.0,
-            "36": 35026.0,
-            "37": 37852.0,
-            "38": 35490.0,
-            "39": 38325.0,
-            "40": 35730.0,
-            "41": 35890.0,
-            "42": 37811.0,
-            "43": 34239.0,
-            "44": 33282.0,
-            "45": 35354.0,
-            "46": 37112.0,
-            "47": 40323.0,
-            "48": 36296.0,
-            "49": 36098.0,
-            "50": 38996.0,
-            "51": 37187.0,
-            "52": 36798.0,
-            "53": 41385.0,
-            "54": 41151.0,
-            "55": 36715.0,
-            "56": 40382.0,
-            "57": 36942.0,
-            "58": 42415.0,
-            "59": 39138.0,
-            "60": 39766.0,
-            "61": 40532.0,
-            "62": 43919.0,
-            "63": 38747.0,
-            "64": 43509.0,
-            "65": 40794.0,
-            "66": 44093.0,
-            "67": 40369.0,
-            "68": 40509.0,
-            "69": 40728.0,
-            "70": 45431.0,
-            "71": 41117.0,
-            "72": 39982.0,
-            "73": 44758.0,
-            "74": 34170.0,
-            "75": 38601.0,
-            "76": 46113.0,
-            "77": 45621.0,
-            "78": 47007.0,
-            "79": 47410.0,
-            "80": 46647.0,
-            "81": 50449.0,
-            "82": 49494.0,
-            "83": 45080.0,
-            "84": 46331.0,
-            "85": 48470.0,
-            "86": 45870.0,
-            "87": 49138.0,
-            "88": 46357.0,
-            "89": 48274.0,
-            "90": 50049.0,
-            "91": 43937.0,
-            "92": 47318.0,
-            "93": 46654.0,
-            "94": 46515.0,
-            "95": 47167.0,
-            "96": 50587.0,
-            "97": 46623.0,
-            "98": 49830.0,
-            "99": 48092.0,
-            "100": 43643.0
+            "1": 30973.0,
+            "2": 32949.0,
+            "3": 33708.0,
+            "4": 30953.0,
+            "5": 35857.0,
+            "6": 36975.0,
+            "7": 35061.0,
+            "8": 31831.0,
+            "9": 34544.0,
+            "10": 29924.0,
+            "11": 38570.0,
+            "12": 34892.0,
+            "13": 37266.0,
+            "14": 37629.0,
+            "15": 34335.0,
+            "16": 36204.0,
+            "17": 35086.0,
+            "18": 35374.0,
+            "19": 36376.0,
+            "20": 32512.0,
+            "21": 33131.0,
+            "22": 30019.0,
+            "23": 37801.0,
+            "24": 32117.0,
+            "25": 31024.0,
+            "26": 34085.0,
+            "27": 36047.0,
+            "28": 36795.0,
+            "29": 37764.0,
+            "30": 32629.0,
+            "31": 30029.0,
+            "32": 36315.0,
+            "33": 37487.0,
+            "34": 33214.0,
+            "35": 34197.0,
+            "36": 34782.0,
+            "37": 38163.0,
+            "38": 35456.0,
+            "39": 38082.0,
+            "40": 35203.0,
+            "41": 35757.0,
+            "42": 37312.0,
+            "43": 34196.0,
+            "44": 33296.0,
+            "45": 35603.0,
+            "46": 36998.0,
+            "47": 40550.0,
+            "48": 36177.0,
+            "49": 36622.0,
+            "50": 38729.0,
+            "51": 37241.0,
+            "52": 36636.0,
+            "53": 41646.0,
+            "54": 41087.0,
+            "55": 36966.0,
+            "56": 40084.0,
+            "57": 37098.0,
+            "58": 42342.0,
+            "59": 39005.0,
+            "60": 40046.0,
+            "61": 40691.0,
+            "62": 43923.0,
+            "63": 38200.0,
+            "64": 43685.0,
+            "65": 41003.0,
+            "66": 44323.0,
+            "67": 40139.0,
+            "68": 40884.0,
+            "69": 40461.0,
+            "70": 45248.0,
+            "71": 41715.0,
+            "72": 40154.0,
+            "73": 44063.0,
+            "74": 33983.0,
+            "75": 38741.0,
+            "76": 46349.0,
+            "77": 45940.0,
+            "78": 46873.0,
+            "79": 47483.0,
+            "80": 46517.0,
+            "81": 50082.0,
+            "82": 49796.0,
+            "83": 45095.0,
+            "84": 46054.0,
+            "85": 48997.0,
+            "86": 45548.0,
+            "87": 49041.0,
+            "88": 46299.0,
+            "89": 48533.0,
+            "90": 49742.0,
+            "91": 43837.0,
+            "92": 47775.0,
+            "93": 46259.0,
+            "94": 45802.0,
+            "95": 47626.0,
+            "96": 50166.0,
+            "97": 47157.0,
+            "98": 50271.0,
+            "99": 47962.0,
+            "100": 43608.0
         }
     },
     "mem-allocated-bytes": {
@@ -218,106 +218,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 1016564224.0,
-            "2": 1016563712.0,
-            "3": 1016564224.0,
-            "4": 1017172480.0,
-            "5": 1016564224.0,
-            "6": 1016565248.0,
-            "7": 1016564736.0,
-            "8": 1016565248.0,
-            "9": 1016562688.0,
-            "10": 1016564736.0,
-            "11": 1016562688.0,
-            "12": 1016565248.0,
-            "13": 1016564736.0,
-            "14": 1016564224.0,
-            "15": 1016564736.0,
-            "16": 1016562176.0,
-            "17": 1016564736.0,
-            "18": 1016565760.0,
-            "19": 1016563200.0,
-            "20": 1016563200.0,
-            "21": 1016564224.0,
-            "22": 1016566272.0,
-            "23": 1016564736.0,
-            "24": 1016564224.0,
-            "25": 1016564736.0,
-            "26": 1016562176.0,
-            "27": 1016563200.0,
-            "28": 1016562688.0,
-            "29": 1016562688.0,
-            "30": 1016566272.0,
-            "31": 1016569856.0,
-            "32": 1016564736.0,
-            "33": 1016564736.0,
-            "34": 1016565248.0,
-            "35": 1017459712.0,
-            "36": 1016565248.0,
-            "37": 1016565248.0,
-            "38": 1016564224.0,
-            "39": 1016562176.0,
-            "40": 1016565248.0,
-            "41": 1016567808.0,
-            "42": 1016564224.0,
-            "43": 1016568320.0,
-            "44": 1016565760.0,
-            "45": 1016565760.0,
-            "46": 1016570368.0,
-            "47": 1016565248.0,
-            "48": 1016569856.0,
-            "49": 1016568832.0,
-            "50": 1016565760.0,
-            "51": 1016566272.0,
-            "52": 1016574976.0,
-            "53": 1016567808.0,
-            "54": 1016566784.0,
-            "55": 1016569856.0,
-            "56": 1016565248.0,
-            "57": 1016574976.0,
-            "58": 1017110528.0,
-            "59": 1016574976.0,
-            "60": 1016571904.0,
-            "61": 1016567296.0,
-            "62": 1016565760.0,
-            "63": 1016576000.0,
-            "64": 1016572928.0,
-            "65": 1016585216.0,
-            "66": 1016568832.0,
-            "67": 1016569344.0,
-            "68": 1016566272.0,
-            "69": 1016569856.0,
-            "70": 1016569344.0,
-            "71": 1016566272.0,
-            "72": 1016571392.0,
-            "73": 1016572416.0,
-            "74": 1016577536.0,
-            "75": 1016567296.0,
-            "76": 1016565760.0,
-            "77": 1016566272.0,
-            "78": 1016572928.0,
-            "79": 1016568832.0,
-            "80": 1016572416.0,
-            "81": 1016570368.0,
-            "82": 1016571904.0,
-            "83": 1016568832.0,
-            "84": 1016573440.0,
-            "85": 1016575488.0,
-            "86": 1016574976.0,
-            "87": 1016568320.0,
-            "88": 1016816640.0,
-            "89": 1016577024.0,
-            "90": 1016569344.0,
-            "91": 1016566784.0,
-            "92": 1016566784.0,
-            "93": 1016569856.0,
-            "94": 1016571392.0,
-            "95": 1016567808.0,
-            "96": 1016566784.0,
-            "97": 1016573952.0,
-            "98": 1016565760.0,
-            "99": 1016577024.0,
-            "100": 1016574464.0
+            "1": 1014467072.0,
+            "2": 1014466560.0,
+            "3": 1014467072.0,
+            "4": 1014466560.0,
+            "5": 1014466560.0,
+            "6": 1014467584.0,
+            "7": 1014468608.0,
+            "8": 1014468096.0,
+            "9": 1014466048.0,
+            "10": 1014467584.0,
+            "11": 1014465536.0,
+            "12": 1014467072.0,
+            "13": 1014467072.0,
+            "14": 1014466048.0,
+            "15": 1015065088.0,
+            "16": 1014465024.0,
+            "17": 1014467072.0,
+            "18": 1014467072.0,
+            "19": 1014466560.0,
+            "20": 1014467072.0,
+            "21": 1014466560.0,
+            "22": 1014468608.0,
+            "23": 1014467584.0,
+            "24": 1014675456.0,
+            "25": 1014468096.0,
+            "26": 1014465536.0,
+            "27": 1014466048.0,
+            "28": 1014465024.0,
+            "29": 1014465536.0,
+            "30": 1014469120.0,
+            "31": 1014472192.0,
+            "32": 1014468096.0,
+            "33": 1014467584.0,
+            "34": 1014467072.0,
+            "35": 1014468096.0,
+            "36": 1014468096.0,
+            "37": 1014787072.0,
+            "38": 1014467584.0,
+            "39": 1014465024.0,
+            "40": 1015253504.0,
+            "41": 1014470144.0,
+            "42": 1014467584.0,
+            "43": 1014471168.0,
+            "44": 1014467584.0,
+            "45": 1014468608.0,
+            "46": 1014472704.0,
+            "47": 1014467584.0,
+            "48": 1014473216.0,
+            "49": 1014471168.0,
+            "50": 1014468608.0,
+            "51": 1014469120.0,
+            "52": 1014478336.0,
+            "53": 1014471168.0,
+            "54": 1014885888.0,
+            "55": 1014472192.0,
+            "56": 1014468096.0,
+            "57": 1014478336.0,
+            "58": 1014472704.0,
+            "59": 1014477312.0,
+            "60": 1014473728.0,
+            "61": 1014470656.0,
+            "62": 1014469632.0,
+            "63": 1014479360.0,
+            "64": 1014475264.0,
+            "65": 1015306240.0,
+            "66": 1014471680.0,
+            "67": 1014473216.0,
+            "68": 1014499840.0,
+            "69": 1014473728.0,
+            "70": 1014472192.0,
+            "71": 1014468608.0,
+            "72": 1014474752.0,
+            "73": 1014475264.0,
+            "74": 1014479872.0,
+            "75": 1014469632.0,
+            "76": 1014468096.0,
+            "77": 1014470144.0,
+            "78": 1014475776.0,
+            "79": 1014471680.0,
+            "80": 1014475264.0,
+            "81": 1014472704.0,
+            "82": 1014474752.0,
+            "83": 1014471680.0,
+            "84": 1014475776.0,
+            "85": 1014478336.0,
+            "86": 1014477824.0,
+            "87": 1014470144.0,
+            "88": 1014473728.0,
+            "89": 1014479872.0,
+            "90": 1014471168.0,
+            "91": 1014469120.0,
+            "92": 1014470656.0,
+            "93": 1014472704.0,
+            "94": 1014474752.0,
+            "95": 1014600704.0,
+            "96": 1014468096.0,
+            "97": 1014476800.0,
+            "98": 1014468608.0,
+            "99": 1014480384.0,
+            "100": 1014477312.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -325,106 +325,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 2560655872.0,
-            "2": 2827037696.0,
-            "3": 2827771392.0,
-            "4": 2828163584.0,
-            "5": 2828163584.0,
-            "6": 2828163584.0,
-            "7": 2829373440.0,
-            "8": 2829373440.0,
-            "9": 2829373440.0,
-            "10": 2829925376.0,
-            "11": 2829925376.0,
-            "12": 2829925376.0,
-            "13": 2829925376.0,
-            "14": 2829925376.0,
-            "15": 2830320640.0,
-            "16": 2830320640.0,
-            "17": 2830320640.0,
-            "18": 2830320640.0,
-            "19": 2830320640.0,
-            "20": 2830320640.0,
-            "21": 2830320640.0,
-            "22": 2830406144.0,
-            "23": 2830406144.0,
-            "24": 2830406144.0,
-            "25": 2830406144.0,
-            "26": 2830406144.0,
-            "27": 2830406144.0,
-            "28": 2830406144.0,
-            "29": 2830406144.0,
-            "30": 2831433216.0,
-            "31": 2836904960.0,
-            "32": 2836904960.0,
-            "33": 2836904960.0,
-            "34": 2836904960.0,
-            "35": 2836904960.0,
-            "36": 2836904960.0,
-            "37": 2836904960.0,
-            "38": 2836904960.0,
-            "39": 2836904960.0,
-            "40": 2836904960.0,
-            "41": 2836904960.0,
-            "42": 2836904960.0,
-            "43": 2836904960.0,
-            "44": 2836904960.0,
-            "45": 2836904960.0,
-            "46": 2837527040.0,
-            "47": 2837527040.0,
-            "48": 2837527040.0,
-            "49": 2837527040.0,
-            "50": 2837527040.0,
-            "51": 2837527040.0,
-            "52": 2844526592.0,
-            "53": 2844526592.0,
-            "54": 2844526592.0,
-            "55": 2844526592.0,
-            "56": 2844526592.0,
-            "57": 2845833216.0,
-            "58": 2845833216.0,
-            "59": 2845833216.0,
-            "60": 2845833216.0,
-            "61": 2845833216.0,
-            "62": 2845833216.0,
-            "63": 2847350784.0,
-            "64": 2847350784.0,
-            "65": 2859365376.0,
-            "66": 2859365376.0,
-            "67": 2859365376.0,
-            "68": 2859365376.0,
-            "69": 2859365376.0,
-            "70": 2859365376.0,
-            "71": 2859365376.0,
-            "72": 2859365376.0,
-            "73": 2859365376.0,
-            "74": 2859365376.0,
-            "75": 2859365376.0,
-            "76": 2859365376.0,
-            "77": 2859365376.0,
-            "78": 2859365376.0,
-            "79": 2859365376.0,
-            "80": 2859365376.0,
-            "81": 2859365376.0,
-            "82": 2859365376.0,
-            "83": 2859365376.0,
-            "84": 2859365376.0,
-            "85": 2859365376.0,
-            "86": 2859365376.0,
-            "87": 2859365376.0,
-            "88": 2859365376.0,
-            "89": 2859365376.0,
-            "90": 2859365376.0,
-            "91": 2859365376.0,
-            "92": 2859365376.0,
-            "93": 2859365376.0,
-            "94": 2859365376.0,
-            "95": 2859365376.0,
-            "96": 2859365376.0,
-            "97": 2859365376.0,
-            "98": 2859365376.0,
-            "99": 2859365376.0,
-            "100": 2859365376.0
+            "1": 2563003904.0,
+            "2": 2826423296.0,
+            "3": 2826423296.0,
+            "4": 2826423296.0,
+            "5": 2826423296.0,
+            "6": 2828489728.0,
+            "7": 2828489728.0,
+            "8": 2828489728.0,
+            "9": 2828489728.0,
+            "10": 2828489728.0,
+            "11": 2828489728.0,
+            "12": 2828489728.0,
+            "13": 2828489728.0,
+            "14": 2828489728.0,
+            "15": 2828489728.0,
+            "16": 2828489728.0,
+            "17": 2828489728.0,
+            "18": 2828489728.0,
+            "19": 2828489728.0,
+            "20": 2828489728.0,
+            "21": 2828489728.0,
+            "22": 2830208000.0,
+            "23": 2830208000.0,
+            "24": 2830208000.0,
+            "25": 2830208000.0,
+            "26": 2830208000.0,
+            "27": 2830208000.0,
+            "28": 2830208000.0,
+            "29": 2830208000.0,
+            "30": 2830208000.0,
+            "31": 2835122688.0,
+            "32": 2835122688.0,
+            "33": 2835122688.0,
+            "34": 2835122688.0,
+            "35": 2835122688.0,
+            "36": 2835122688.0,
+            "37": 2835122688.0,
+            "38": 2835122688.0,
+            "39": 2835122688.0,
+            "40": 2835122688.0,
+            "41": 2835122688.0,
+            "42": 2835122688.0,
+            "43": 2835122688.0,
+            "44": 2835122688.0,
+            "45": 2835122688.0,
+            "46": 2835122688.0,
+            "47": 2835122688.0,
+            "48": 2836012544.0,
+            "49": 2836012544.0,
+            "50": 2836012544.0,
+            "51": 2836012544.0,
+            "52": 2842577408.0,
+            "53": 2842577408.0,
+            "54": 2842577408.0,
+            "55": 2842577408.0,
+            "56": 2842577408.0,
+            "57": 2846367232.0,
+            "58": 2846367232.0,
+            "59": 2846367232.0,
+            "60": 2846367232.0,
+            "61": 2846367232.0,
+            "62": 2846367232.0,
+            "63": 2846367232.0,
+            "64": 2846367232.0,
+            "65": 2856796160.0,
+            "66": 2856796160.0,
+            "67": 2856796160.0,
+            "68": 2856796160.0,
+            "69": 2856796160.0,
+            "70": 2856796160.0,
+            "71": 2856796160.0,
+            "72": 2856796160.0,
+            "73": 2856796160.0,
+            "74": 2856796160.0,
+            "75": 2856796160.0,
+            "76": 2856796160.0,
+            "77": 2856796160.0,
+            "78": 2856796160.0,
+            "79": 2856796160.0,
+            "80": 2856796160.0,
+            "81": 2856796160.0,
+            "82": 2856796160.0,
+            "83": 2856796160.0,
+            "84": 2856796160.0,
+            "85": 2856796160.0,
+            "86": 2856796160.0,
+            "87": 2856796160.0,
+            "88": 2856796160.0,
+            "89": 2856796160.0,
+            "90": 2856796160.0,
+            "91": 2856796160.0,
+            "92": 2856796160.0,
+            "93": 2856796160.0,
+            "94": 2856796160.0,
+            "95": 2856796160.0,
+            "96": 2856796160.0,
+            "97": 2856796160.0,
+            "98": 2856796160.0,
+            "99": 2856796160.0,
+            "100": 2856796160.0
         }
     },
     "iteration-time": {
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 17.55161,
-            "2": 0.27584,
-            "3": 0.20906,
-            "4": 0.18821,
-            "5": 0.17883,
-            "6": 0.17484,
-            "7": 0.18214,
-            "8": 0.18025,
-            "9": 0.16785,
-            "10": 0.16718,
-            "11": 0.17122,
-            "12": 0.16341,
-            "13": 0.16356,
-            "14": 0.16447,
-            "15": 0.17469,
-            "16": 0.16231,
-            "17": 0.17002,
-            "18": 0.1621,
-            "19": 0.16543,
-            "20": 0.16097,
-            "21": 0.16113,
-            "22": 0.17866,
-            "23": 0.16939,
-            "24": 0.16784,
-            "25": 0.16322,
-            "26": 0.15752,
-            "27": 0.16042,
-            "28": 0.16296,
-            "29": 0.16022,
-            "30": 0.16569,
-            "31": 0.20634,
-            "32": 0.16627,
-            "33": 0.16203,
-            "34": 0.18965,
-            "35": 0.1656,
-            "36": 0.17227,
-            "37": 0.16394,
-            "38": 0.16364,
-            "39": 0.15966,
-            "40": 0.17482,
-            "41": 0.16992,
-            "42": 0.16079,
-            "43": 0.17541,
-            "44": 0.1626,
-            "45": 0.16436,
-            "46": 0.1838,
-            "47": 0.15773,
-            "48": 0.18504,
-            "49": 0.22116,
-            "50": 0.16497,
-            "51": 0.17193,
-            "52": 0.17228,
-            "53": 0.15999,
-            "54": 0.15946,
-            "55": 0.1611,
-            "56": 0.21983,
-            "57": 0.18423,
-            "58": 0.16229,
-            "59": 0.18268,
-            "60": 0.17406,
-            "61": 0.15956,
-            "62": 0.16172,
-            "63": 0.17465,
-            "64": 0.17307,
-            "65": 0.25477,
-            "66": 0.15926,
-            "67": 0.23477,
-            "68": 0.16872,
-            "69": 0.16094,
-            "70": 0.16631,
-            "71": 0.18552,
-            "72": 0.16728,
-            "73": 0.1889,
-            "74": 0.17586,
-            "75": 0.17577,
-            "76": 0.21503,
-            "77": 0.16576,
-            "78": 0.17284,
-            "79": 0.18166,
-            "80": 0.19235,
-            "81": 0.17347,
-            "82": 0.1597,
-            "83": 0.17024,
-            "84": 0.17843,
-            "85": 0.15917,
-            "86": 0.20315,
-            "87": 0.16523,
-            "88": 0.16367,
-            "89": 0.18499,
-            "90": 0.16286,
-            "91": 0.19025,
-            "92": 0.17186,
-            "93": 0.19123,
-            "94": 0.19378,
-            "95": 0.16849,
-            "96": 0.16781,
-            "97": 0.17705,
-            "98": 0.15729,
-            "99": 0.17119,
-            "100": 0.16
+            "1": 14.68238,
+            "2": 0.38712,
+            "3": 0.19949,
+            "4": 0.16868,
+            "5": 0.15278,
+            "6": 0.14858,
+            "7": 0.15754,
+            "8": 0.15132,
+            "9": 0.14692,
+            "10": 0.14516,
+            "11": 0.14033,
+            "12": 0.14161,
+            "13": 0.14186,
+            "14": 0.13624,
+            "15": 0.15371,
+            "16": 0.1395,
+            "17": 0.16083,
+            "18": 0.13717,
+            "19": 0.1421,
+            "20": 0.13767,
+            "21": 0.13643,
+            "22": 0.15072,
+            "23": 0.13944,
+            "24": 0.13522,
+            "25": 0.13454,
+            "26": 0.13493,
+            "27": 0.13514,
+            "28": 0.14174,
+            "29": 0.13479,
+            "30": 0.14261,
+            "31": 0.17426,
+            "32": 0.14571,
+            "33": 0.13803,
+            "34": 0.16399,
+            "35": 0.1389,
+            "36": 0.14089,
+            "37": 0.13701,
+            "38": 0.14212,
+            "39": 0.13299,
+            "40": 0.14907,
+            "41": 0.14239,
+            "42": 0.13978,
+            "43": 0.14469,
+            "44": 0.1344,
+            "45": 0.14546,
+            "46": 0.16258,
+            "47": 0.14403,
+            "48": 0.15688,
+            "49": 0.20655,
+            "50": 0.13686,
+            "51": 0.16635,
+            "52": 0.15085,
+            "53": 0.54128,
+            "54": 0.13812,
+            "55": 0.14612,
+            "56": 0.20029,
+            "57": 0.15601,
+            "58": 0.15373,
+            "59": 0.15883,
+            "60": 0.15348,
+            "61": 0.13897,
+            "62": 0.14293,
+            "63": 0.15882,
+            "64": 0.15023,
+            "65": 0.21706,
+            "66": 0.14405,
+            "67": 0.20424,
+            "68": 0.15367,
+            "69": 0.14298,
+            "70": 0.14311,
+            "71": 0.16751,
+            "72": 0.15144,
+            "73": 0.17862,
+            "74": 0.15928,
+            "75": 0.15132,
+            "76": 0.18706,
+            "77": 0.14118,
+            "78": 0.14807,
+            "79": 0.15437,
+            "80": 0.15794,
+            "81": 0.14257,
+            "82": 0.13828,
+            "83": 0.15021,
+            "84": 0.14886,
+            "85": 0.14363,
+            "86": 0.19012,
+            "87": 0.14052,
+            "88": 0.14621,
+            "89": 0.15591,
+            "90": 0.1453,
+            "91": 0.17378,
+            "92": 0.16177,
+            "93": 0.18337,
+            "94": 0.18449,
+            "95": 0.14789,
+            "96": 0.14329,
+            "97": 0.15465,
+            "98": 0.14162,
+            "99": 0.14792,
+            "100": 0.14082
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer/golden_values_dev_dgx_h100_2nd.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer/golden_values_dev_dgx_h100_2nd.json
new file mode 100644
index 00000000000..615b1b90939
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer/golden_values_dev_dgx_h100_2nd.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 9.92297,
+            "52": 9.8284,
+            "53": 10.14624,
+            "54": 10.04331,
+            "55": 9.96248,
+            "56": 9.70547,
+            "57": 9.58553,
+            "58": 9.91673,
+            "59": 9.66059,
+            "60": 9.60402,
+            "61": 9.77812,
+            "62": 10.06258,
+            "63": 9.47179,
+            "64": 9.85361,
+            "65": 9.02415,
+            "66": 9.79391,
+            "67": 9.43341,
+            "68": 9.85341,
+            "69": 9.84716,
+            "70": 9.81035,
+            "71": 9.68402,
+            "72": 9.65988,
+            "73": 9.57308,
+            "74": 9.05997,
+            "75": 9.50561,
+            "76": 9.17936,
+            "77": 10.12733,
+            "78": 9.77475,
+            "79": 9.44198,
+            "80": 9.46754,
+            "81": 9.53859,
+            "82": 9.75755,
+            "83": 9.38709,
+            "84": 9.46679,
+            "85": 9.67903,
+            "86": 9.1356,
+            "87": 9.63439,
+            "88": 9.80841,
+            "89": 9.67922,
+            "90": 9.8555,
+            "91": 9.41299,
+            "92": 9.41796,
+            "93": 9.15357,
+            "94": 8.90198,
+            "95": 9.56514,
+            "96": 9.58401,
+            "97": 9.35865,
+            "98": 9.73028,
+            "99": 8.95871,
+            "100": 9.45412
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 37309.0,
+            "52": 36703.0,
+            "53": 41651.0,
+            "54": 41063.0,
+            "55": 36785.0,
+            "56": 40238.0,
+            "57": 36695.0,
+            "58": 42135.0,
+            "59": 39294.0,
+            "60": 39482.0,
+            "61": 40661.0,
+            "62": 44026.0,
+            "63": 38069.0,
+            "64": 43162.0,
+            "65": 40823.0,
+            "66": 44305.0,
+            "67": 40571.0,
+            "68": 40330.0,
+            "69": 40479.0,
+            "70": 45305.0,
+            "71": 41317.0,
+            "72": 39952.0,
+            "73": 44530.0,
+            "74": 34138.0,
+            "75": 38838.0,
+            "76": 46191.0,
+            "77": 45788.0,
+            "78": 47368.0,
+            "79": 47694.0,
+            "80": 46540.0,
+            "81": 50541.0,
+            "82": 49391.0,
+            "83": 45041.0,
+            "84": 46205.0,
+            "85": 49075.0,
+            "86": 45491.0,
+            "87": 49629.0,
+            "88": 46513.0,
+            "89": 48672.0,
+            "90": 49752.0,
+            "91": 44036.0,
+            "92": 47292.0,
+            "93": 46999.0,
+            "94": 46286.0,
+            "95": 46691.0,
+            "96": 50402.0,
+            "97": 47195.0,
+            "98": 49883.0,
+            "99": 48365.0,
+            "100": 43445.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 1015977472.0,
+            "52": 1015986176.0,
+            "53": 1015979520.0,
+            "54": 1015978496.0,
+            "55": 1015981056.0,
+            "56": 1015976448.0,
+            "57": 1015986688.0,
+            "58": 1015981056.0,
+            "59": 1015985152.0,
+            "60": 1015982592.0,
+            "61": 1015979008.0,
+            "62": 1015977984.0,
+            "63": 1015987712.0,
+            "64": 1015983616.0,
+            "65": 1015994880.0,
+            "66": 1015980032.0,
+            "67": 1015981568.0,
+            "68": 1015977984.0,
+            "69": 1015982080.0,
+            "70": 1016161280.0,
+            "71": 1015979008.0,
+            "72": 1015982080.0,
+            "73": 1015984128.0,
+            "74": 1015988736.0,
+            "75": 1015978496.0,
+            "76": 1015976448.0,
+            "77": 1015979520.0,
+            "78": 1015984640.0,
+            "79": 1015979520.0,
+            "80": 1015983616.0,
+            "81": 1015981568.0,
+            "82": 1015983104.0,
+            "83": 1015980032.0,
+            "84": 1015984128.0,
+            "85": 1015986688.0,
+            "86": 1015986688.0,
+            "87": 1015980032.0,
+            "88": 1015981568.0,
+            "89": 1015988736.0,
+            "90": 1015980544.0,
+            "91": 1015977984.0,
+            "92": 1016114176.0,
+            "93": 1015981056.0,
+            "94": 1015982080.0,
+            "95": 1015979008.0,
+            "96": 1015976960.0,
+            "97": 1015984640.0,
+            "98": 1015977472.0,
+            "99": 1015988224.0,
+            "100": 1015985664.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 2935420416.0,
+            "52": 2935420416.0,
+            "53": 2935420416.0,
+            "54": 2935420416.0,
+            "55": 2935420416.0,
+            "56": 2935420416.0,
+            "57": 2935420416.0,
+            "58": 2935420416.0,
+            "59": 2935420416.0,
+            "60": 2935420416.0,
+            "61": 2935420416.0,
+            "62": 2935420416.0,
+            "63": 2935420416.0,
+            "64": 2935420416.0,
+            "65": 2935420416.0,
+            "66": 2935420416.0,
+            "67": 2935420416.0,
+            "68": 2935420416.0,
+            "69": 2935420416.0,
+            "70": 2935420416.0,
+            "71": 2935420416.0,
+            "72": 2935420416.0,
+            "73": 2935420416.0,
+            "74": 2935420416.0,
+            "75": 2935420416.0,
+            "76": 2935420416.0,
+            "77": 2935420416.0,
+            "78": 2935420416.0,
+            "79": 2935420416.0,
+            "80": 2935420416.0,
+            "81": 2935420416.0,
+            "82": 2935420416.0,
+            "83": 2935420416.0,
+            "84": 2935420416.0,
+            "85": 2935420416.0,
+            "86": 2935420416.0,
+            "87": 2935420416.0,
+            "88": 2935420416.0,
+            "89": 2935420416.0,
+            "90": 2935420416.0,
+            "91": 2935420416.0,
+            "92": 2935420416.0,
+            "93": 2935420416.0,
+            "94": 2935420416.0,
+            "95": 2935420416.0,
+            "96": 2935420416.0,
+            "97": 2935420416.0,
+            "98": 2935420416.0,
+            "99": 2935420416.0,
+            "100": 2935420416.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 13.36069,
+            "52": 0.28674,
+            "53": 0.19891,
+            "54": 0.20156,
+            "55": 0.1819,
+            "56": 0.25306,
+            "57": 0.18921,
+            "58": 0.16571,
+            "59": 0.18603,
+            "60": 0.18108,
+            "61": 0.16054,
+            "62": 0.15396,
+            "63": 0.17162,
+            "64": 0.17605,
+            "65": 0.23651,
+            "66": 0.15684,
+            "67": 0.24234,
+            "68": 0.16737,
+            "69": 0.1644,
+            "70": 0.17023,
+            "71": 0.18887,
+            "72": 0.17787,
+            "73": 0.17972,
+            "74": 0.17258,
+            "75": 0.16961,
+            "76": 0.17324,
+            "77": 0.16212,
+            "78": 0.16629,
+            "79": 0.15673,
+            "80": 0.17244,
+            "81": 0.15957,
+            "82": 0.14913,
+            "83": 0.15131,
+            "84": 0.16274,
+            "85": 0.1686,
+            "86": 0.19415,
+            "87": 0.15249,
+            "88": 0.14449,
+            "89": 0.16305,
+            "90": 0.13988,
+            "91": 0.17343,
+            "92": 0.15546,
+            "93": 0.15914,
+            "94": 0.19609,
+            "95": 0.14746,
+            "96": 0.1437,
+            "97": 0.1637,
+            "98": 0.14571,
+            "99": 0.15931,
+            "100": 0.14229
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer/golden_values_dev_dgx_h100.json
index c598c8c5c86..64a0d3b0293 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer/golden_values_dev_dgx_h100.json
@@ -218,106 +218,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 892864512.0,
-            "2": 892868608.0,
-            "3": 892868608.0,
-            "4": 892864512.0,
-            "5": 892865024.0,
-            "6": 892866560.0,
-            "7": 892866048.0,
-            "8": 892867584.0,
-            "9": 892865536.0,
-            "10": 892867584.0,
-            "11": 892866048.0,
-            "12": 892865536.0,
-            "13": 892865536.0,
-            "14": 892868096.0,
-            "15": 892867584.0,
-            "16": 892867072.0,
-            "17": 892867584.0,
-            "18": 892869632.0,
-            "19": 892868096.0,
-            "20": 892866560.0,
-            "21": 892866560.0,
-            "22": 892863488.0,
-            "23": 892864512.0,
-            "24": 892867072.0,
-            "25": 892863488.0,
-            "26": 892866560.0,
-            "27": 892867072.0,
-            "28": 892865536.0,
-            "29": 892866048.0,
-            "30": 892863488.0,
-            "31": 892862464.0,
-            "32": 892861952.0,
-            "33": 892866048.0,
-            "34": 892865536.0,
-            "35": 892865024.0,
-            "36": 892868608.0,
-            "37": 892867072.0,
-            "38": 892866560.0,
-            "39": 892866048.0,
-            "40": 892867072.0,
-            "41": 892865536.0,
-            "42": 892867584.0,
-            "43": 892861440.0,
-            "44": 892862976.0,
-            "45": 892865024.0,
-            "46": 892864512.0,
-            "47": 892865024.0,
-            "48": 892861440.0,
-            "49": 892863488.0,
-            "50": 892867072.0,
-            "51": 892860416.0,
-            "52": 892858880.0,
-            "53": 892861440.0,
-            "54": 892861440.0,
-            "55": 892862464.0,
-            "56": 892865024.0,
-            "57": 892857344.0,
-            "58": 892859392.0,
-            "59": 892858880.0,
-            "60": 892859904.0,
-            "61": 892868608.0,
-            "62": 892865536.0,
-            "63": 892861952.0,
-            "64": 892863488.0,
-            "65": 892851712.0,
-            "66": 892866048.0,
-            "67": 892861440.0,
-            "68": 892868608.0,
-            "69": 892864512.0,
-            "70": 892866560.0,
-            "71": 892868608.0,
-            "72": 892860416.0,
-            "73": 892868096.0,
-            "74": 892858368.0,
-            "75": 892867072.0,
-            "76": 892866560.0,
-            "77": 892867072.0,
-            "78": 892863488.0,
-            "79": 892864512.0,
-            "80": 892864512.0,
-            "81": 892866048.0,
-            "82": 892864000.0,
-            "83": 892860928.0,
-            "84": 892861440.0,
-            "85": 892861952.0,
-            "86": 892861440.0,
-            "87": 892870144.0,
-            "88": 892862464.0,
-            "89": 892864512.0,
-            "90": 892866048.0,
-            "91": 892867072.0,
-            "92": 892865536.0,
-            "93": 892868608.0,
-            "94": 892864512.0,
-            "95": 892865024.0,
-            "96": 892865024.0,
-            "97": 892862976.0,
-            "98": 892867584.0,
-            "99": 892859904.0,
-            "100": 892861952.0
+            "1": 892865536.0,
+            "2": 892869632.0,
+            "3": 892869632.0,
+            "4": 892865536.0,
+            "5": 892866048.0,
+            "6": 892867584.0,
+            "7": 892867072.0,
+            "8": 892868608.0,
+            "9": 892866560.0,
+            "10": 892868608.0,
+            "11": 892867072.0,
+            "12": 892866560.0,
+            "13": 892866560.0,
+            "14": 892869120.0,
+            "15": 892868608.0,
+            "16": 892868096.0,
+            "17": 892868608.0,
+            "18": 892870656.0,
+            "19": 892869120.0,
+            "20": 892867584.0,
+            "21": 892867584.0,
+            "22": 892864512.0,
+            "23": 892865536.0,
+            "24": 892868096.0,
+            "25": 892864512.0,
+            "26": 892867584.0,
+            "27": 892868096.0,
+            "28": 892866560.0,
+            "29": 892867072.0,
+            "30": 892864512.0,
+            "31": 892863488.0,
+            "32": 892862976.0,
+            "33": 892867072.0,
+            "34": 892866560.0,
+            "35": 892866048.0,
+            "36": 892869632.0,
+            "37": 892868096.0,
+            "38": 892867584.0,
+            "39": 892867072.0,
+            "40": 892868096.0,
+            "41": 892866560.0,
+            "42": 892868608.0,
+            "43": 892862464.0,
+            "44": 892864000.0,
+            "45": 892866048.0,
+            "46": 892865536.0,
+            "47": 892866048.0,
+            "48": 892862464.0,
+            "49": 892864512.0,
+            "50": 892868096.0,
+            "51": 892861440.0,
+            "52": 892859904.0,
+            "53": 892862464.0,
+            "54": 892862464.0,
+            "55": 892863488.0,
+            "56": 892866048.0,
+            "57": 892858368.0,
+            "58": 892860416.0,
+            "59": 892859904.0,
+            "60": 892860928.0,
+            "61": 892869632.0,
+            "62": 892866560.0,
+            "63": 892862976.0,
+            "64": 892864512.0,
+            "65": 892852736.0,
+            "66": 892867072.0,
+            "67": 892862464.0,
+            "68": 892869632.0,
+            "69": 892865536.0,
+            "70": 892867584.0,
+            "71": 892869632.0,
+            "72": 892861440.0,
+            "73": 892869120.0,
+            "74": 892859392.0,
+            "75": 892868096.0,
+            "76": 892867584.0,
+            "77": 892868096.0,
+            "78": 892864512.0,
+            "79": 892865536.0,
+            "80": 892865536.0,
+            "81": 892867072.0,
+            "82": 892865024.0,
+            "83": 892861952.0,
+            "84": 892862464.0,
+            "85": 892862976.0,
+            "86": 892862464.0,
+            "87": 892871168.0,
+            "88": 892863488.0,
+            "89": 892865536.0,
+            "90": 892867072.0,
+            "91": 892868096.0,
+            "92": 892866560.0,
+            "93": 892869632.0,
+            "94": 892865536.0,
+            "95": 892866048.0,
+            "96": 892866048.0,
+            "97": 892864000.0,
+            "98": 892868608.0,
+            "99": 892860928.0,
+            "100": 892862976.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -325,106 +325,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 1867566080.0,
-            "2": 2107252736.0,
-            "3": 2107252736.0,
-            "4": 2107252736.0,
-            "5": 2107481600.0,
-            "6": 2107481600.0,
-            "7": 2107481600.0,
-            "8": 2107481600.0,
-            "9": 2107481600.0,
-            "10": 2108814336.0,
-            "11": 2108814336.0,
-            "12": 2108814336.0,
-            "13": 2108814336.0,
-            "14": 2108814336.0,
-            "15": 2108814336.0,
-            "16": 2109139456.0,
-            "17": 2109139456.0,
-            "18": 2109139456.0,
-            "19": 2109139456.0,
-            "20": 2109139456.0,
-            "21": 2109139456.0,
-            "22": 2109139456.0,
-            "23": 2109139456.0,
-            "24": 2109139456.0,
-            "25": 2109139456.0,
-            "26": 2109139456.0,
-            "27": 2109139456.0,
-            "28": 2109139456.0,
-            "29": 2109139456.0,
-            "30": 2109139456.0,
-            "31": 2109139456.0,
-            "32": 2109139456.0,
-            "33": 2109139456.0,
-            "34": 2109139456.0,
-            "35": 2109139456.0,
-            "36": 2109139456.0,
-            "37": 2109139456.0,
-            "38": 2109139456.0,
-            "39": 2109139456.0,
-            "40": 2109139456.0,
-            "41": 2109139456.0,
-            "42": 2109139456.0,
-            "43": 2109139456.0,
-            "44": 2109139456.0,
-            "45": 2109139456.0,
-            "46": 2109139456.0,
-            "47": 2109139456.0,
-            "48": 2109139456.0,
-            "49": 2109139456.0,
-            "50": 2109139456.0,
-            "51": 2109139456.0,
-            "52": 2109139456.0,
-            "53": 2109139456.0,
-            "54": 2109139456.0,
-            "55": 2109139456.0,
-            "56": 2109139456.0,
-            "57": 2109139456.0,
-            "58": 2109139456.0,
-            "59": 2109139456.0,
-            "60": 2109139456.0,
-            "61": 2109139456.0,
-            "62": 2109139456.0,
-            "63": 2109139456.0,
-            "64": 2109139456.0,
-            "65": 2109139456.0,
-            "66": 2109139456.0,
-            "67": 2109139456.0,
-            "68": 2109139456.0,
-            "69": 2109139456.0,
-            "70": 2109139456.0,
-            "71": 2109139456.0,
-            "72": 2109139456.0,
-            "73": 2109139456.0,
-            "74": 2109139456.0,
-            "75": 2109139456.0,
-            "76": 2109139456.0,
-            "77": 2109139456.0,
-            "78": 2109139456.0,
-            "79": 2109139456.0,
-            "80": 2109139456.0,
-            "81": 2109139456.0,
-            "82": 2109139456.0,
-            "83": 2109139456.0,
-            "84": 2109139456.0,
-            "85": 2109139456.0,
-            "86": 2109139456.0,
-            "87": 2109897728.0,
-            "88": 2109897728.0,
-            "89": 2109897728.0,
-            "90": 2109897728.0,
-            "91": 2109897728.0,
-            "92": 2109897728.0,
-            "93": 2109897728.0,
-            "94": 2109897728.0,
-            "95": 2109897728.0,
-            "96": 2109897728.0,
-            "97": 2109897728.0,
-            "98": 2109897728.0,
-            "99": 2109897728.0,
-            "100": 2109897728.0
+            "1": 1918568448.0,
+            "2": 2157712384.0,
+            "3": 2157712384.0,
+            "4": 2157712384.0,
+            "5": 2159109632.0,
+            "6": 2159109632.0,
+            "7": 2159109632.0,
+            "8": 2159109632.0,
+            "9": 2159109632.0,
+            "10": 2159142912.0,
+            "11": 2159142912.0,
+            "12": 2159142912.0,
+            "13": 2159142912.0,
+            "14": 2159633920.0,
+            "15": 2159633920.0,
+            "16": 2159633920.0,
+            "17": 2159633920.0,
+            "18": 2159633920.0,
+            "19": 2159633920.0,
+            "20": 2159633920.0,
+            "21": 2159633920.0,
+            "22": 2159633920.0,
+            "23": 2159633920.0,
+            "24": 2159633920.0,
+            "25": 2159633920.0,
+            "26": 2159802368.0,
+            "27": 2159802368.0,
+            "28": 2159802368.0,
+            "29": 2159802368.0,
+            "30": 2159802368.0,
+            "31": 2159802368.0,
+            "32": 2159802368.0,
+            "33": 2159802368.0,
+            "34": 2159802368.0,
+            "35": 2159802368.0,
+            "36": 2159802368.0,
+            "37": 2159802368.0,
+            "38": 2159802368.0,
+            "39": 2159802368.0,
+            "40": 2159802368.0,
+            "41": 2159802368.0,
+            "42": 2159802368.0,
+            "43": 2159802368.0,
+            "44": 2159802368.0,
+            "45": 2159802368.0,
+            "46": 2159802368.0,
+            "47": 2159802368.0,
+            "48": 2159802368.0,
+            "49": 2159802368.0,
+            "50": 2159802368.0,
+            "51": 2159802368.0,
+            "52": 2159802368.0,
+            "53": 2159802368.0,
+            "54": 2159802368.0,
+            "55": 2159802368.0,
+            "56": 2159802368.0,
+            "57": 2159802368.0,
+            "58": 2159802368.0,
+            "59": 2159802368.0,
+            "60": 2159802368.0,
+            "61": 2159802368.0,
+            "62": 2159802368.0,
+            "63": 2159802368.0,
+            "64": 2159802368.0,
+            "65": 2159802368.0,
+            "66": 2159802368.0,
+            "67": 2159802368.0,
+            "68": 2159802368.0,
+            "69": 2159802368.0,
+            "70": 2159802368.0,
+            "71": 2159802368.0,
+            "72": 2159802368.0,
+            "73": 2160337408.0,
+            "74": 2160337408.0,
+            "75": 2160337408.0,
+            "76": 2160337408.0,
+            "77": 2160337408.0,
+            "78": 2160337408.0,
+            "79": 2160337408.0,
+            "80": 2160337408.0,
+            "81": 2160337408.0,
+            "82": 2160337408.0,
+            "83": 2160337408.0,
+            "84": 2161362944.0,
+            "85": 2161362944.0,
+            "86": 2161362944.0,
+            "87": 2161362944.0,
+            "88": 2161362944.0,
+            "89": 2161362944.0,
+            "90": 2161362944.0,
+            "91": 2161362944.0,
+            "92": 2161362944.0,
+            "93": 2161362944.0,
+            "94": 2161362944.0,
+            "95": 2162391552.0,
+            "96": 2162391552.0,
+            "97": 2162391552.0,
+            "98": 2162391552.0,
+            "99": 2162391552.0,
+            "100": 2162391552.0
         }
     },
     "iteration-time": {
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 14.1374,
-            "2": 0.29466,
-            "3": 0.26236,
-            "4": 0.26156,
-            "5": 0.24237,
-            "6": 0.23849,
-            "7": 0.252,
-            "8": 0.24427,
-            "9": 0.24029,
-            "10": 0.23618,
-            "11": 0.23659,
-            "12": 0.23342,
-            "13": 0.23316,
-            "14": 0.23233,
-            "15": 0.24856,
-            "16": 0.23522,
-            "17": 0.24126,
-            "18": 0.22751,
-            "19": 0.2299,
-            "20": 0.23346,
-            "21": 0.23441,
-            "22": 0.22921,
-            "23": 0.23376,
-            "24": 0.23927,
-            "25": 0.23185,
-            "26": 0.23099,
-            "27": 0.22756,
-            "28": 0.2284,
-            "29": 0.22889,
-            "30": 0.23032,
-            "31": 0.26621,
-            "32": 0.23553,
-            "33": 0.23683,
-            "34": 0.25808,
-            "35": 0.23912,
-            "36": 0.23198,
-            "37": 0.23086,
-            "38": 0.23515,
-            "39": 0.2291,
-            "40": 0.24108,
-            "41": 0.23663,
-            "42": 0.23631,
-            "43": 0.23891,
-            "44": 0.23205,
-            "45": 0.24801,
-            "46": 0.2689,
-            "47": 0.23258,
-            "48": 0.25079,
-            "49": 0.26858,
-            "50": 0.2361,
-            "51": 0.27052,
-            "52": 0.26801,
-            "53": 0.23804,
-            "54": 0.23998,
-            "55": 0.25008,
-            "56": 0.29894,
-            "57": 0.26807,
-            "58": 0.23939,
-            "59": 0.24845,
-            "60": 0.24835,
-            "61": 0.24071,
-            "62": 0.23697,
-            "63": 0.25187,
-            "64": 0.24293,
-            "65": 0.31273,
-            "66": 0.23771,
-            "67": 0.28851,
-            "68": 0.25834,
-            "69": 0.24387,
-            "70": 0.23624,
-            "71": 0.26612,
-            "72": 0.25067,
-            "73": 0.28048,
-            "74": 0.26617,
-            "75": 0.24822,
-            "76": 0.26459,
-            "77": 0.23429,
-            "78": 0.24496,
-            "79": 0.24741,
-            "80": 0.25523,
-            "81": 0.2433,
-            "82": 0.23696,
-            "83": 0.2421,
-            "84": 0.24973,
-            "85": 0.24316,
-            "86": 0.25585,
-            "87": 0.23448,
-            "88": 0.23245,
-            "89": 0.25191,
-            "90": 0.23373,
-            "91": 0.25927,
-            "92": 0.24203,
-            "93": 0.25124,
-            "94": 0.26498,
-            "95": 0.24482,
-            "96": 0.23378,
-            "97": 0.25053,
-            "98": 0.23165,
-            "99": 0.24761,
-            "100": 0.23858
+            "1": 14.93722,
+            "2": 0.29196,
+            "3": 0.25566,
+            "4": 0.22819,
+            "5": 0.21657,
+            "6": 0.22742,
+            "7": 0.23255,
+            "8": 0.21868,
+            "9": 0.23203,
+            "10": 0.22911,
+            "11": 0.22371,
+            "12": 0.22358,
+            "13": 0.21762,
+            "14": 0.2166,
+            "15": 0.2341,
+            "16": 0.21834,
+            "17": 0.21429,
+            "18": 0.21499,
+            "19": 0.2158,
+            "20": 0.21523,
+            "21": 0.21654,
+            "22": 0.21788,
+            "23": 0.21597,
+            "24": 0.20917,
+            "25": 0.2076,
+            "26": 0.20309,
+            "27": 0.20463,
+            "28": 0.57074,
+            "29": 0.20266,
+            "30": 0.21832,
+            "31": 0.23121,
+            "32": 0.2052,
+            "33": 0.20847,
+            "34": 0.22756,
+            "35": 0.21093,
+            "36": 0.20495,
+            "37": 0.20762,
+            "38": 0.20131,
+            "39": 0.1991,
+            "40": 0.20426,
+            "41": 0.20518,
+            "42": 0.20555,
+            "43": 0.21112,
+            "44": 0.20079,
+            "45": 0.21854,
+            "46": 0.22885,
+            "47": 0.20366,
+            "48": 0.21784,
+            "49": 0.23722,
+            "50": 0.20288,
+            "51": 0.23225,
+            "52": 0.23281,
+            "53": 0.20606,
+            "54": 0.21135,
+            "55": 0.21897,
+            "56": 0.25991,
+            "57": 0.22845,
+            "58": 0.21751,
+            "59": 0.21469,
+            "60": 0.21187,
+            "61": 0.20946,
+            "62": 0.21358,
+            "63": 0.21765,
+            "64": 0.20357,
+            "65": 0.27698,
+            "66": 0.2118,
+            "67": 0.25518,
+            "68": 0.22631,
+            "69": 0.21209,
+            "70": 0.2039,
+            "71": 0.22504,
+            "72": 0.22276,
+            "73": 0.25179,
+            "74": 0.22993,
+            "75": 0.21538,
+            "76": 0.23629,
+            "77": 0.20835,
+            "78": 0.21168,
+            "79": 0.21631,
+            "80": 0.21797,
+            "81": 0.20362,
+            "82": 0.20269,
+            "83": 0.21014,
+            "84": 0.21456,
+            "85": 0.20971,
+            "86": 0.22253,
+            "87": 0.20037,
+            "88": 0.20403,
+            "89": 0.21541,
+            "90": 0.21443,
+            "91": 0.23258,
+            "92": 0.21749,
+            "93": 0.22377,
+            "94": 0.23559,
+            "95": 0.21351,
+            "96": 0.20316,
+            "97": 0.21349,
+            "98": 0.20244,
+            "99": 0.21023,
+            "100": 0.20508
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer/golden_values_dev_dgx_h100_2nd.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer/golden_values_dev_dgx_h100_2nd.json
new file mode 100644
index 00000000000..9b6990b963d
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer/golden_values_dev_dgx_h100_2nd.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 9.90528,
+            "52": 9.80364,
+            "53": 10.12728,
+            "54": 10.00036,
+            "55": 9.9362,
+            "56": 9.68506,
+            "57": 9.55805,
+            "58": 9.90514,
+            "59": 9.63857,
+            "60": 9.57451,
+            "61": 9.76864,
+            "62": 10.03802,
+            "63": 9.44503,
+            "64": 9.82796,
+            "65": 9.00712,
+            "66": 9.77422,
+            "67": 9.41277,
+            "68": 9.84111,
+            "69": 9.82784,
+            "70": 9.79011,
+            "71": 9.66957,
+            "72": 9.62799,
+            "73": 9.5473,
+            "74": 9.03663,
+            "75": 9.49153,
+            "76": 9.16783,
+            "77": 10.10857,
+            "78": 9.77081,
+            "79": 9.4383,
+            "80": 9.45436,
+            "81": 9.52266,
+            "82": 9.7424,
+            "83": 9.37076,
+            "84": 9.45377,
+            "85": 9.65832,
+            "86": 9.12522,
+            "87": 9.62697,
+            "88": 9.79619,
+            "89": 9.66054,
+            "90": 9.85081,
+            "91": 9.39408,
+            "92": 9.40744,
+            "93": 9.13595,
+            "94": 8.89048,
+            "95": 9.563,
+            "96": 9.5714,
+            "97": 9.34318,
+            "98": 9.73026,
+            "99": 8.95002,
+            "100": 9.4424
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 38061.0,
+            "52": 37025.0,
+            "53": 41802.0,
+            "54": 41253.0,
+            "55": 37654.0,
+            "56": 41164.0,
+            "57": 37682.0,
+            "58": 41782.0,
+            "59": 39444.0,
+            "60": 40691.0,
+            "61": 40876.0,
+            "62": 43113.0,
+            "63": 38389.0,
+            "64": 43217.0,
+            "65": 41689.0,
+            "66": 45525.0,
+            "67": 41717.0,
+            "68": 40369.0,
+            "69": 41287.0,
+            "70": 45545.0,
+            "71": 41651.0,
+            "72": 41881.0,
+            "73": 45139.0,
+            "74": 35747.0,
+            "75": 39155.0,
+            "76": 44874.0,
+            "77": 45442.0,
+            "78": 46782.0,
+            "79": 48776.0,
+            "80": 47161.0,
+            "81": 51277.0,
+            "82": 49953.0,
+            "83": 45334.0,
+            "84": 46096.0,
+            "85": 49238.0,
+            "86": 46118.0,
+            "87": 49880.0,
+            "88": 47115.0,
+            "89": 48583.0,
+            "90": 49057.0,
+            "91": 45950.0,
+            "92": 47820.0,
+            "93": 46437.0,
+            "94": 47530.0,
+            "95": 48000.0,
+            "96": 50285.0,
+            "97": 46225.0,
+            "98": 49809.0,
+            "99": 47890.0,
+            "100": 44636.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 892861440.0,
+            "52": 892859904.0,
+            "53": 892862464.0,
+            "54": 892862464.0,
+            "55": 892863488.0,
+            "56": 892866048.0,
+            "57": 892858368.0,
+            "58": 892860416.0,
+            "59": 892859904.0,
+            "60": 892860928.0,
+            "61": 892869632.0,
+            "62": 892866560.0,
+            "63": 892862976.0,
+            "64": 892864512.0,
+            "65": 892852736.0,
+            "66": 892867072.0,
+            "67": 892862464.0,
+            "68": 892869632.0,
+            "69": 892865536.0,
+            "70": 892867584.0,
+            "71": 892869632.0,
+            "72": 892861440.0,
+            "73": 892869120.0,
+            "74": 892859392.0,
+            "75": 892868096.0,
+            "76": 892867584.0,
+            "77": 892868096.0,
+            "78": 892864512.0,
+            "79": 892865536.0,
+            "80": 892865536.0,
+            "81": 892867072.0,
+            "82": 892865024.0,
+            "83": 892861952.0,
+            "84": 892862464.0,
+            "85": 892862976.0,
+            "86": 892862464.0,
+            "87": 892871168.0,
+            "88": 892863488.0,
+            "89": 892865536.0,
+            "90": 892867072.0,
+            "91": 892868096.0,
+            "92": 892866560.0,
+            "93": 892869632.0,
+            "94": 892865536.0,
+            "95": 892866048.0,
+            "96": 892866048.0,
+            "97": 892864000.0,
+            "98": 892868608.0,
+            "99": 892860928.0,
+            "100": 892862976.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 2157768704.0,
+            "52": 2158874112.0,
+            "53": 2160225280.0,
+            "54": 2160225280.0,
+            "55": 2161818624.0,
+            "56": 2161818624.0,
+            "57": 2161818624.0,
+            "58": 2161818624.0,
+            "59": 2161818624.0,
+            "60": 2161818624.0,
+            "61": 2161818624.0,
+            "62": 2161943040.0,
+            "63": 2161943040.0,
+            "64": 2162058240.0,
+            "65": 2162058240.0,
+            "66": 2162058240.0,
+            "67": 2162058240.0,
+            "68": 2162058240.0,
+            "69": 2162058240.0,
+            "70": 2162058240.0,
+            "71": 2162214912.0,
+            "72": 2162214912.0,
+            "73": 2165406208.0,
+            "74": 2165406208.0,
+            "75": 2165406208.0,
+            "76": 2165406208.0,
+            "77": 2165406208.0,
+            "78": 2165406208.0,
+            "79": 2165406208.0,
+            "80": 2165406208.0,
+            "81": 2165406208.0,
+            "82": 2165406208.0,
+            "83": 2165406208.0,
+            "84": 2166458368.0,
+            "85": 2166458368.0,
+            "86": 2166458368.0,
+            "87": 2166458368.0,
+            "88": 2166458368.0,
+            "89": 2166458368.0,
+            "90": 2166458368.0,
+            "91": 2166458368.0,
+            "92": 2166458368.0,
+            "93": 2166458368.0,
+            "94": 2166458368.0,
+            "95": 2166458368.0,
+            "96": 2166458368.0,
+            "97": 2166458368.0,
+            "98": 2166458368.0,
+            "99": 2166458368.0,
+            "100": 2166458368.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 12.42315,
+            "52": 0.32575,
+            "53": 0.25742,
+            "54": 0.24982,
+            "55": 0.24955,
+            "56": 0.27601,
+            "57": 0.24269,
+            "58": 0.22199,
+            "59": 0.21885,
+            "60": 0.22264,
+            "61": 0.21068,
+            "62": 0.21026,
+            "63": 0.22993,
+            "64": 0.20923,
+            "65": 0.27663,
+            "66": 0.64746,
+            "67": 0.26108,
+            "68": 0.22825,
+            "69": 0.83895,
+            "70": 0.20737,
+            "71": 0.23029,
+            "72": 0.21664,
+            "73": 0.24327,
+            "74": 0.23403,
+            "75": 0.21475,
+            "76": 0.2341,
+            "77": 0.20143,
+            "78": 0.60189,
+            "79": 0.22007,
+            "80": 0.22126,
+            "81": 0.20541,
+            "82": 0.20414,
+            "83": 0.21458,
+            "84": 0.34679,
+            "85": 0.21148,
+            "86": 0.22182,
+            "87": 0.2044,
+            "88": 0.204,
+            "89": 0.21796,
+            "90": 0.20536,
+            "91": 0.22132,
+            "92": 0.20859,
+            "93": 0.21705,
+            "94": 0.23829,
+            "95": 0.21049,
+            "96": 0.20011,
+            "97": 0.2156,
+            "98": 0.19753,
+            "99": 0.21068,
+            "100": 0.20211
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances/golden_values_dev_dgx_h100.json
index 1a09e73e300..bf57cfecddc 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances/golden_values_dev_dgx_h100.json
@@ -218,106 +218,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 1254501376.0,
-            "2": 1254505472.0,
-            "3": 1254505472.0,
-            "4": 1254501376.0,
-            "5": 1254501888.0,
-            "6": 1254503424.0,
-            "7": 1254503936.0,
-            "8": 1254503936.0,
-            "9": 1254501888.0,
-            "10": 1254503424.0,
-            "11": 1254503936.0,
-            "12": 1254502912.0,
-            "13": 1254500864.0,
-            "14": 1254505472.0,
-            "15": 1254504448.0,
-            "16": 1254503424.0,
-            "17": 1254504448.0,
-            "18": 1254502400.0,
-            "19": 1254503936.0,
-            "20": 1254503424.0,
-            "21": 1254503424.0,
-            "22": 1254501376.0,
-            "23": 1254500864.0,
-            "24": 1254503424.0,
-            "25": 1254500352.0,
-            "26": 1254502400.0,
-            "27": 1254501888.0,
-            "28": 1254502912.0,
-            "29": 1254505472.0,
-            "30": 1254500352.0,
-            "31": 1254499328.0,
-            "32": 1254500352.0,
-            "33": 1254502912.0,
-            "34": 1254502912.0,
-            "35": 1254501888.0,
-            "36": 1254505472.0,
-            "37": 1254503424.0,
-            "38": 1254503936.0,
-            "39": 1254502912.0,
-            "40": 1254502912.0,
-            "41": 1254503424.0,
-            "42": 1254502912.0,
-            "43": 1254499840.0,
-            "44": 1254501376.0,
-            "45": 1254502400.0,
-            "46": 1254500864.0,
-            "47": 1254503936.0,
-            "48": 1254499840.0,
-            "49": 1254500352.0,
-            "50": 1254502912.0,
-            "51": 1254496768.0,
-            "52": 1254496256.0,
-            "53": 1254497792.0,
-            "54": 1254498304.0,
-            "55": 1254500352.0,
-            "56": 1254501888.0,
-            "57": 1254493184.0,
-            "58": 1254498304.0,
-            "59": 1254495232.0,
-            "60": 1254496768.0,
-            "61": 1254504960.0,
-            "62": 1254503936.0,
-            "63": 1254499328.0,
-            "64": 1254498816.0,
-            "65": 1254488576.0,
-            "66": 1254502912.0,
-            "67": 1254498304.0,
-            "68": 1254505984.0,
-            "69": 1254501376.0,
-            "70": 1254502912.0,
-            "71": 1254504960.0,
-            "72": 1254496256.0,
-            "73": 1254504448.0,
-            "74": 1254495232.0,
-            "75": 1254504448.0,
-            "76": 1254503424.0,
-            "77": 1254503936.0,
-            "78": 1254500352.0,
-            "79": 1254500864.0,
-            "80": 1254499840.0,
-            "81": 1254503424.0,
-            "82": 1254500352.0,
-            "83": 1254497792.0,
-            "84": 1254497280.0,
-            "85": 1254499328.0,
-            "86": 1254498816.0,
-            "87": 1254505472.0,
-            "88": 1254499328.0,
-            "89": 1254500864.0,
-            "90": 1254502912.0,
-            "91": 1254505472.0,
-            "92": 1254502912.0,
-            "93": 1254505472.0,
-            "94": 1254500352.0,
-            "95": 1254501888.0,
-            "96": 1254501888.0,
-            "97": 1254499328.0,
-            "98": 1254507520.0,
-            "99": 1254497280.0,
-            "100": 1254499840.0
+            "1": 1254502400.0,
+            "2": 1254506496.0,
+            "3": 1254506496.0,
+            "4": 1254502400.0,
+            "5": 1254502912.0,
+            "6": 1254504448.0,
+            "7": 1254504960.0,
+            "8": 1254504960.0,
+            "9": 1254502912.0,
+            "10": 1254504448.0,
+            "11": 1254504960.0,
+            "12": 1254503936.0,
+            "13": 1254501888.0,
+            "14": 1254506496.0,
+            "15": 1254505472.0,
+            "16": 1254504448.0,
+            "17": 1254505472.0,
+            "18": 1254503424.0,
+            "19": 1254504960.0,
+            "20": 1254504448.0,
+            "21": 1254504448.0,
+            "22": 1254502400.0,
+            "23": 1254501888.0,
+            "24": 1254504448.0,
+            "25": 1254501376.0,
+            "26": 1254503424.0,
+            "27": 1254502912.0,
+            "28": 1254503936.0,
+            "29": 1254506496.0,
+            "30": 1254501376.0,
+            "31": 1254500352.0,
+            "32": 1254501376.0,
+            "33": 1254503936.0,
+            "34": 1254503936.0,
+            "35": 1254502912.0,
+            "36": 1254506496.0,
+            "37": 1254504448.0,
+            "38": 1254504960.0,
+            "39": 1254503936.0,
+            "40": 1254503936.0,
+            "41": 1254504448.0,
+            "42": 1254503936.0,
+            "43": 1254500864.0,
+            "44": 1254502400.0,
+            "45": 1254503424.0,
+            "46": 1254501888.0,
+            "47": 1254504960.0,
+            "48": 1254500864.0,
+            "49": 1254501376.0,
+            "50": 1254503936.0,
+            "51": 1254497792.0,
+            "52": 1254497280.0,
+            "53": 1254498816.0,
+            "54": 1254499328.0,
+            "55": 1254501376.0,
+            "56": 1254502912.0,
+            "57": 1254494208.0,
+            "58": 1254499328.0,
+            "59": 1254496256.0,
+            "60": 1254497792.0,
+            "61": 1254505984.0,
+            "62": 1254504960.0,
+            "63": 1254500352.0,
+            "64": 1254499840.0,
+            "65": 1254489600.0,
+            "66": 1254503936.0,
+            "67": 1254499328.0,
+            "68": 1254507008.0,
+            "69": 1254502400.0,
+            "70": 1254503936.0,
+            "71": 1254505984.0,
+            "72": 1254497280.0,
+            "73": 1254505472.0,
+            "74": 1254496256.0,
+            "75": 1254505472.0,
+            "76": 1254504448.0,
+            "77": 1254504960.0,
+            "78": 1254501376.0,
+            "79": 1254501888.0,
+            "80": 1254500864.0,
+            "81": 1254504448.0,
+            "82": 1254501376.0,
+            "83": 1254498816.0,
+            "84": 1254498304.0,
+            "85": 1254500352.0,
+            "86": 1254499840.0,
+            "87": 1254506496.0,
+            "88": 1254500352.0,
+            "89": 1254501888.0,
+            "90": 1254503936.0,
+            "91": 1254506496.0,
+            "92": 1254503936.0,
+            "93": 1254506496.0,
+            "94": 1254501376.0,
+            "95": 1254502912.0,
+            "96": 1254502912.0,
+            "97": 1254500352.0,
+            "98": 1254508544.0,
+            "99": 1254498304.0,
+            "100": 1254500864.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -325,106 +325,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 1987779584.0,
-            "2": 2468141568.0,
-            "3": 2468920320.0,
-            "4": 2468920320.0,
-            "5": 2468920320.0,
-            "6": 2468920320.0,
-            "7": 2468920320.0,
-            "8": 2468920320.0,
-            "9": 2469234688.0,
-            "10": 2469234688.0,
-            "11": 2469234688.0,
-            "12": 2469234688.0,
-            "13": 2469234688.0,
-            "14": 2469234688.0,
-            "15": 2469234688.0,
-            "16": 2469234688.0,
-            "17": 2469234688.0,
-            "18": 2469234688.0,
-            "19": 2469234688.0,
-            "20": 2469234688.0,
-            "21": 2469234688.0,
-            "22": 2469234688.0,
-            "23": 2469234688.0,
-            "24": 2469234688.0,
-            "25": 2469234688.0,
-            "26": 2469234688.0,
-            "27": 2469234688.0,
-            "28": 2469234688.0,
-            "29": 2469234688.0,
-            "30": 2469234688.0,
-            "31": 2469234688.0,
-            "32": 2469234688.0,
-            "33": 2469234688.0,
-            "34": 2469234688.0,
-            "35": 2469234688.0,
-            "36": 2469234688.0,
-            "37": 2469234688.0,
-            "38": 2469234688.0,
-            "39": 2469234688.0,
-            "40": 2469234688.0,
-            "41": 2469234688.0,
-            "42": 2469234688.0,
-            "43": 2469234688.0,
-            "44": 2469234688.0,
-            "45": 2469234688.0,
-            "46": 2469234688.0,
-            "47": 2469234688.0,
-            "48": 2469234688.0,
-            "49": 2469234688.0,
-            "50": 2469234688.0,
-            "51": 2469234688.0,
-            "52": 2469234688.0,
-            "53": 2469234688.0,
-            "54": 2469234688.0,
-            "55": 2469234688.0,
-            "56": 2469234688.0,
-            "57": 2469234688.0,
-            "58": 2469234688.0,
-            "59": 2469234688.0,
-            "60": 2469234688.0,
-            "61": 2469234688.0,
-            "62": 2469234688.0,
-            "63": 2469234688.0,
-            "64": 2469234688.0,
-            "65": 2469234688.0,
-            "66": 2469234688.0,
-            "67": 2469234688.0,
-            "68": 2469234688.0,
-            "69": 2469234688.0,
-            "70": 2469234688.0,
-            "71": 2469234688.0,
-            "72": 2469234688.0,
-            "73": 2469234688.0,
-            "74": 2469234688.0,
-            "75": 2469234688.0,
-            "76": 2471084032.0,
-            "77": 2471084032.0,
-            "78": 2471084032.0,
-            "79": 2471084032.0,
-            "80": 2471084032.0,
-            "81": 2471084032.0,
-            "82": 2471084032.0,
-            "83": 2471084032.0,
-            "84": 2471084032.0,
-            "85": 2471084032.0,
-            "86": 2471084032.0,
-            "87": 2471084032.0,
-            "88": 2471084032.0,
-            "89": 2471084032.0,
-            "90": 2471084032.0,
-            "91": 2471084032.0,
-            "92": 2471084032.0,
-            "93": 2471084032.0,
-            "94": 2471084032.0,
-            "95": 2471084032.0,
-            "96": 2471084032.0,
-            "97": 2471084032.0,
-            "98": 2471084032.0,
-            "99": 2471084032.0,
-            "100": 2471084032.0
+            "1": 2038519808.0,
+            "2": 2520255488.0,
+            "3": 2520255488.0,
+            "4": 2520255488.0,
+            "5": 2520552960.0,
+            "6": 2520552960.0,
+            "7": 2520552960.0,
+            "8": 2520552960.0,
+            "9": 2520552960.0,
+            "10": 2520552960.0,
+            "11": 2520552960.0,
+            "12": 2520552960.0,
+            "13": 2520552960.0,
+            "14": 2520552960.0,
+            "15": 2520552960.0,
+            "16": 2520552960.0,
+            "17": 2520552960.0,
+            "18": 2520552960.0,
+            "19": 2520552960.0,
+            "20": 2520552960.0,
+            "21": 2520552960.0,
+            "22": 2520552960.0,
+            "23": 2520552960.0,
+            "24": 2520552960.0,
+            "25": 2520552960.0,
+            "26": 2520552960.0,
+            "27": 2520552960.0,
+            "28": 2520552960.0,
+            "29": 2520552960.0,
+            "30": 2520552960.0,
+            "31": 2520552960.0,
+            "32": 2520552960.0,
+            "33": 2521159680.0,
+            "34": 2521159680.0,
+            "35": 2521159680.0,
+            "36": 2521159680.0,
+            "37": 2521159680.0,
+            "38": 2521159680.0,
+            "39": 2521159680.0,
+            "40": 2521159680.0,
+            "41": 2521159680.0,
+            "42": 2521159680.0,
+            "43": 2521159680.0,
+            "44": 2521159680.0,
+            "45": 2521159680.0,
+            "46": 2521615360.0,
+            "47": 2521615360.0,
+            "48": 2521615360.0,
+            "49": 2521615360.0,
+            "50": 2521615360.0,
+            "51": 2521615360.0,
+            "52": 2521615360.0,
+            "53": 2521615360.0,
+            "54": 2521615360.0,
+            "55": 2521615360.0,
+            "56": 2521615360.0,
+            "57": 2521615360.0,
+            "58": 2521615360.0,
+            "59": 2521615360.0,
+            "60": 2521615360.0,
+            "61": 2521615360.0,
+            "62": 2521615360.0,
+            "63": 2521615360.0,
+            "64": 2521615360.0,
+            "65": 2521615360.0,
+            "66": 2521615360.0,
+            "67": 2521615360.0,
+            "68": 2521615360.0,
+            "69": 2521615360.0,
+            "70": 2521615360.0,
+            "71": 2521615360.0,
+            "72": 2521615360.0,
+            "73": 2521615360.0,
+            "74": 2521615360.0,
+            "75": 2521615360.0,
+            "76": 2521615360.0,
+            "77": 2521615360.0,
+            "78": 2521615360.0,
+            "79": 2521615360.0,
+            "80": 2521615360.0,
+            "81": 2521615360.0,
+            "82": 2521615360.0,
+            "83": 2521615360.0,
+            "84": 2521615360.0,
+            "85": 2521615360.0,
+            "86": 2521615360.0,
+            "87": 2521615360.0,
+            "88": 2521615360.0,
+            "89": 2521615360.0,
+            "90": 2521615360.0,
+            "91": 2521615360.0,
+            "92": 2521615360.0,
+            "93": 2521615360.0,
+            "94": 2521615360.0,
+            "95": 2523076096.0,
+            "96": 2523076096.0,
+            "97": 2523076096.0,
+            "98": 2523076096.0,
+            "99": 2523076096.0,
+            "100": 2523076096.0
         }
     },
     "iteration-time": {
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 16.55217,
-            "2": 0.35181,
-            "3": 0.30566,
-            "4": 0.27474,
-            "5": 0.25821,
-            "6": 0.24756,
-            "7": 0.26543,
-            "8": 0.25377,
-            "9": 0.25669,
-            "10": 0.24857,
-            "11": 0.25265,
-            "12": 0.25052,
-            "13": 0.25023,
-            "14": 0.24925,
-            "15": 0.26244,
-            "16": 0.25012,
-            "17": 0.26253,
-            "18": 0.24643,
-            "19": 0.24809,
-            "20": 0.24556,
-            "21": 0.24394,
-            "22": 0.251,
-            "23": 0.24828,
-            "24": 0.24669,
-            "25": 0.24387,
-            "26": 0.24678,
-            "27": 0.24651,
-            "28": 0.25139,
-            "29": 0.24752,
-            "30": 0.24424,
-            "31": 0.28311,
-            "32": 0.25225,
-            "33": 0.24909,
-            "34": 0.26885,
-            "35": 0.25395,
-            "36": 0.2523,
-            "37": 0.24797,
-            "38": 0.25223,
-            "39": 0.24992,
-            "40": 0.25852,
-            "41": 0.24878,
-            "42": 0.2538,
-            "43": 0.2597,
-            "44": 0.24622,
-            "45": 0.26158,
-            "46": 0.27295,
-            "47": 0.2509,
-            "48": 0.26644,
-            "49": 0.28407,
-            "50": 0.25557,
-            "51": 0.26677,
-            "52": 0.27657,
-            "53": 0.25511,
-            "54": 0.25626,
-            "55": 0.26088,
-            "56": 0.30712,
-            "57": 0.27149,
-            "58": 0.25315,
-            "59": 0.26247,
-            "60": 0.26163,
-            "61": 0.25105,
-            "62": 0.24787,
-            "63": 0.27859,
-            "64": 0.26395,
-            "65": 0.32678,
-            "66": 0.25441,
-            "67": 0.30841,
-            "68": 0.27583,
-            "69": 0.2474,
-            "70": 0.25895,
-            "71": 0.27463,
-            "72": 0.26044,
-            "73": 0.27953,
-            "74": 0.27908,
-            "75": 0.26127,
-            "76": 0.28492,
-            "77": 0.25287,
-            "78": 0.26927,
-            "79": 0.26632,
-            "80": 0.26465,
-            "81": 0.25418,
-            "82": 0.25,
-            "83": 0.26012,
-            "84": 0.27232,
-            "85": 0.25707,
-            "86": 0.26564,
-            "87": 0.25446,
-            "88": 0.24718,
-            "89": 0.26899,
-            "90": 0.24357,
-            "91": 0.27455,
-            "92": 0.25494,
-            "93": 0.26852,
-            "94": 0.27917,
-            "95": 0.258,
-            "96": 0.25134,
-            "97": 0.26377,
-            "98": 0.24669,
-            "99": 0.26096,
-            "100": 0.25411
+            "1": 17.78784,
+            "2": 0.2935,
+            "3": 0.25416,
+            "4": 0.28848,
+            "5": 0.27342,
+            "6": 0.21986,
+            "7": 0.22775,
+            "8": 0.21125,
+            "9": 0.22242,
+            "10": 0.20696,
+            "11": 0.21121,
+            "12": 0.20562,
+            "13": 0.20918,
+            "14": 0.20486,
+            "15": 0.22312,
+            "16": 0.20648,
+            "17": 0.21741,
+            "18": 0.20596,
+            "19": 0.20449,
+            "20": 0.20633,
+            "21": 0.20648,
+            "22": 0.20939,
+            "23": 0.20613,
+            "24": 0.2098,
+            "25": 0.21077,
+            "26": 0.20978,
+            "27": 0.20622,
+            "28": 0.20953,
+            "29": 0.2052,
+            "30": 0.20858,
+            "31": 0.23751,
+            "32": 0.20916,
+            "33": 0.21528,
+            "34": 0.22994,
+            "35": 0.20666,
+            "36": 0.56591,
+            "37": 0.2088,
+            "38": 0.20535,
+            "39": 0.20334,
+            "40": 0.21053,
+            "41": 0.20731,
+            "42": 0.21647,
+            "43": 0.21279,
+            "44": 0.20733,
+            "45": 0.22499,
+            "46": 0.22926,
+            "47": 0.21023,
+            "48": 0.21769,
+            "49": 0.24399,
+            "50": 0.21286,
+            "51": 0.238,
+            "52": 0.23293,
+            "53": 0.20987,
+            "54": 0.21516,
+            "55": 0.22388,
+            "56": 0.25985,
+            "57": 0.22604,
+            "58": 0.61513,
+            "59": 0.22219,
+            "60": 0.21734,
+            "61": 0.90688,
+            "62": 0.21705,
+            "63": 0.23992,
+            "64": 0.21828,
+            "65": 0.27683,
+            "66": 0.21653,
+            "67": 0.27213,
+            "68": 0.8349,
+            "69": 0.21293,
+            "70": 0.21051,
+            "71": 0.22862,
+            "72": 0.22498,
+            "73": 0.24298,
+            "74": 0.23094,
+            "75": 0.22956,
+            "76": 0.24583,
+            "77": 0.21646,
+            "78": 0.22364,
+            "79": 0.22898,
+            "80": 0.21878,
+            "81": 0.21415,
+            "82": 0.21267,
+            "83": 0.22485,
+            "84": 0.22454,
+            "85": 0.21746,
+            "86": 0.23031,
+            "87": 0.21423,
+            "88": 0.21226,
+            "89": 0.2196,
+            "90": 0.21327,
+            "91": 0.23392,
+            "92": 0.22086,
+            "93": 0.23306,
+            "94": 0.24169,
+            "95": 0.22202,
+            "96": 0.2155,
+            "97": 0.22184,
+            "98": 0.2139,
+            "99": 0.21705,
+            "100": 0.21654
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances/golden_values_dev_dgx_h100_2nd.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances/golden_values_dev_dgx_h100_2nd.json
new file mode 100644
index 00000000000..f6f646ddf4a
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances/golden_values_dev_dgx_h100_2nd.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 9.90532,
+            "52": 9.8039,
+            "53": 10.12749,
+            "54": 10.00016,
+            "55": 9.93664,
+            "56": 9.68581,
+            "57": 9.55837,
+            "58": 9.90508,
+            "59": 9.63839,
+            "60": 9.57464,
+            "61": 9.76841,
+            "62": 10.03826,
+            "63": 9.44553,
+            "64": 9.82755,
+            "65": 9.00746,
+            "66": 9.77476,
+            "67": 9.41315,
+            "68": 9.84101,
+            "69": 9.8283,
+            "70": 9.79049,
+            "71": 9.66947,
+            "72": 9.62799,
+            "73": 9.54696,
+            "74": 9.03684,
+            "75": 9.49167,
+            "76": 9.16779,
+            "77": 10.1088,
+            "78": 9.77072,
+            "79": 9.43806,
+            "80": 9.45438,
+            "81": 9.5225,
+            "82": 9.74228,
+            "83": 9.36999,
+            "84": 9.45397,
+            "85": 9.65808,
+            "86": 9.12501,
+            "87": 9.62705,
+            "88": 9.79641,
+            "89": 9.66075,
+            "90": 9.8512,
+            "91": 9.39414,
+            "92": 9.40741,
+            "93": 9.13573,
+            "94": 8.89066,
+            "95": 9.56273,
+            "96": 9.5712,
+            "97": 9.34355,
+            "98": 9.73013,
+            "99": 8.95039,
+            "100": 9.44212
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 37791.0,
+            "52": 37021.0,
+            "53": 41944.0,
+            "54": 40947.0,
+            "55": 37727.0,
+            "56": 40761.0,
+            "57": 37481.0,
+            "58": 41787.0,
+            "59": 39365.0,
+            "60": 40922.0,
+            "61": 41100.0,
+            "62": 43388.0,
+            "63": 38269.0,
+            "64": 43526.0,
+            "65": 41821.0,
+            "66": 44876.0,
+            "67": 42497.0,
+            "68": 39967.0,
+            "69": 41255.0,
+            "70": 45781.0,
+            "71": 42348.0,
+            "72": 42151.0,
+            "73": 45043.0,
+            "74": 35705.0,
+            "75": 39397.0,
+            "76": 45340.0,
+            "77": 45670.0,
+            "78": 46614.0,
+            "79": 49159.0,
+            "80": 47317.0,
+            "81": 51048.0,
+            "82": 49312.0,
+            "83": 45257.0,
+            "84": 45494.0,
+            "85": 49366.0,
+            "86": 45783.0,
+            "87": 50223.0,
+            "88": 47536.0,
+            "89": 48826.0,
+            "90": 49499.0,
+            "91": 45726.0,
+            "92": 47926.0,
+            "93": 46433.0,
+            "94": 47675.0,
+            "95": 47504.0,
+            "96": 50174.0,
+            "97": 46465.0,
+            "98": 49255.0,
+            "99": 48053.0,
+            "100": 44507.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 1254985216.0,
+            "52": 1254984704.0,
+            "53": 1254986240.0,
+            "54": 1254986752.0,
+            "55": 1254988800.0,
+            "56": 1254990336.0,
+            "57": 1254981632.0,
+            "58": 1254986752.0,
+            "59": 1254983680.0,
+            "60": 1254985216.0,
+            "61": 1254993408.0,
+            "62": 1254992384.0,
+            "63": 1254987776.0,
+            "64": 1254987264.0,
+            "65": 1254977024.0,
+            "66": 1254991360.0,
+            "67": 1254986752.0,
+            "68": 1254994432.0,
+            "69": 1254989824.0,
+            "70": 1254991360.0,
+            "71": 1254993408.0,
+            "72": 1254984704.0,
+            "73": 1254992896.0,
+            "74": 1254983680.0,
+            "75": 1254992896.0,
+            "76": 1254991872.0,
+            "77": 1254992384.0,
+            "78": 1254988800.0,
+            "79": 1254989312.0,
+            "80": 1254988288.0,
+            "81": 1254991872.0,
+            "82": 1254988800.0,
+            "83": 1254986240.0,
+            "84": 1254985728.0,
+            "85": 1254987776.0,
+            "86": 1254987264.0,
+            "87": 1254993920.0,
+            "88": 1254987776.0,
+            "89": 1254989312.0,
+            "90": 1254991360.0,
+            "91": 1254993920.0,
+            "92": 1254991360.0,
+            "93": 1254993920.0,
+            "94": 1254988800.0,
+            "95": 1254990336.0,
+            "96": 1254990336.0,
+            "97": 1254987776.0,
+            "98": 1254995968.0,
+            "99": 1254985728.0,
+            "100": 1254988288.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 3124454912.0,
+            "52": 3124454912.0,
+            "53": 3124454912.0,
+            "54": 3124454912.0,
+            "55": 3124454912.0,
+            "56": 3124454912.0,
+            "57": 3124454912.0,
+            "58": 3124454912.0,
+            "59": 3124454912.0,
+            "60": 3124454912.0,
+            "61": 3124454912.0,
+            "62": 3124454912.0,
+            "63": 3124454912.0,
+            "64": 3124454912.0,
+            "65": 3124454912.0,
+            "66": 3124454912.0,
+            "67": 3124454912.0,
+            "68": 3124454912.0,
+            "69": 3124454912.0,
+            "70": 3124454912.0,
+            "71": 3124454912.0,
+            "72": 3124454912.0,
+            "73": 3124454912.0,
+            "74": 3124454912.0,
+            "75": 3124454912.0,
+            "76": 3124454912.0,
+            "77": 3124454912.0,
+            "78": 3124454912.0,
+            "79": 3124454912.0,
+            "80": 3124454912.0,
+            "81": 3124454912.0,
+            "82": 3124454912.0,
+            "83": 3124454912.0,
+            "84": 3124454912.0,
+            "85": 3124454912.0,
+            "86": 3124454912.0,
+            "87": 3124454912.0,
+            "88": 3124454912.0,
+            "89": 3124454912.0,
+            "90": 3124454912.0,
+            "91": 3124454912.0,
+            "92": 3124454912.0,
+            "93": 3124454912.0,
+            "94": 3124454912.0,
+            "95": 3124454912.0,
+            "96": 3124454912.0,
+            "97": 3124454912.0,
+            "98": 3124454912.0,
+            "99": 3124454912.0,
+            "100": 3124454912.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 14.75813,
+            "52": 0.32673,
+            "53": 0.25047,
+            "54": 0.24173,
+            "55": 0.23984,
+            "56": 0.28067,
+            "57": 0.24362,
+            "58": 0.23949,
+            "59": 0.22718,
+            "60": 0.22572,
+            "61": 0.21463,
+            "62": 0.21566,
+            "63": 0.24356,
+            "64": 0.22422,
+            "65": 0.28681,
+            "66": 0.2175,
+            "67": 0.268,
+            "68": 0.24975,
+            "69": 0.21136,
+            "70": 0.21698,
+            "71": 0.23525,
+            "72": 0.22621,
+            "73": 0.24672,
+            "74": 0.2348,
+            "75": 0.22093,
+            "76": 0.24479,
+            "77": 0.21587,
+            "78": 0.2274,
+            "79": 0.23052,
+            "80": 0.22194,
+            "81": 0.212,
+            "82": 0.21273,
+            "83": 0.22719,
+            "84": 0.23492,
+            "85": 0.22378,
+            "86": 0.2309,
+            "87": 0.21404,
+            "88": 0.21648,
+            "89": 0.2217,
+            "90": 0.59895,
+            "91": 0.23561,
+            "92": 0.22052,
+            "93": 0.22925,
+            "94": 0.23793,
+            "95": 0.22403,
+            "96": 0.21436,
+            "97": 0.22243,
+            "98": 0.21293,
+            "99": 0.21642,
+            "100": 0.21522
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_dev_dgx_h100.json
index 089545b6f4a..38498d3139b 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_dev_dgx_h100.json
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 1027085824.0,
-            "2": 1027085824.0,
-            "3": 1027086848.0,
-            "4": 1027086336.0,
-            "5": 1027086848.0,
-            "6": 1027085312.0,
-            "7": 1027081728.0,
-            "8": 1027082752.0,
-            "9": 1027089408.0,
-            "10": 1027083776.0,
-            "11": 1027084288.0,
-            "12": 1027084288.0,
-            "13": 1027086848.0,
-            "14": 1027083776.0,
-            "15": 1027085312.0,
-            "16": 1027086336.0,
-            "17": 1027084288.0,
-            "18": 1027088384.0,
-            "19": 1027086848.0,
-            "20": 1027089920.0,
-            "21": 1027083264.0,
-            "22": 1027086336.0,
-            "23": 1027086848.0,
-            "24": 1027085824.0,
-            "25": 1027084288.0,
-            "26": 1027085312.0,
-            "27": 1027085312.0,
-            "28": 1027082752.0,
-            "29": 1027083776.0,
-            "30": 1027082240.0,
-            "31": 1027074048.0,
-            "32": 1027077120.0,
-            "33": 1027086336.0,
-            "34": 1027083264.0,
-            "35": 1027085312.0,
-            "36": 1027083776.0,
-            "37": 1027084288.0,
-            "38": 1027085312.0,
-            "39": 1027080704.0,
-            "40": 1027081728.0,
-            "41": 1027083264.0,
-            "42": 1027086848.0,
-            "43": 1027079680.0,
-            "44": 1027082752.0,
-            "45": 1027082752.0,
-            "46": 1027073536.0,
-            "47": 1027082752.0,
-            "48": 1027081216.0,
-            "49": 1027077120.0,
-            "50": 1027084800.0
+            "1": 1027090944.0,
+            "2": 1027090944.0,
+            "3": 1027091968.0,
+            "4": 1027091456.0,
+            "5": 1027091968.0,
+            "6": 1027090432.0,
+            "7": 1027086848.0,
+            "8": 1027087872.0,
+            "9": 1027094528.0,
+            "10": 1027088896.0,
+            "11": 1027089408.0,
+            "12": 1027089408.0,
+            "13": 1027091968.0,
+            "14": 1027088896.0,
+            "15": 1027090432.0,
+            "16": 1027091456.0,
+            "17": 1027089408.0,
+            "18": 1027093504.0,
+            "19": 1027091968.0,
+            "20": 1027095040.0,
+            "21": 1027088384.0,
+            "22": 1027091456.0,
+            "23": 1027091968.0,
+            "24": 1027090944.0,
+            "25": 1027089408.0,
+            "26": 1027090432.0,
+            "27": 1027090432.0,
+            "28": 1027087872.0,
+            "29": 1027088896.0,
+            "30": 1027087360.0,
+            "31": 1027079168.0,
+            "32": 1027082240.0,
+            "33": 1027091456.0,
+            "34": 1027088384.0,
+            "35": 1027090432.0,
+            "36": 1027088896.0,
+            "37": 1027089408.0,
+            "38": 1027090432.0,
+            "39": 1027085824.0,
+            "40": 1027086848.0,
+            "41": 1027088384.0,
+            "42": 1027091968.0,
+            "43": 1027084800.0,
+            "44": 1027087872.0,
+            "45": 1027087872.0,
+            "46": 1027078656.0,
+            "47": 1027087872.0,
+            "48": 1027086336.0,
+            "49": 1027082240.0,
+            "50": 1027089920.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 3007080960.0,
-            "2": 3247499776.0,
-            "3": 3247499776.0,
-            "4": 3248093184.0,
-            "5": 3248476160.0,
-            "6": 3248476160.0,
-            "7": 3248476160.0,
-            "8": 3248476160.0,
-            "9": 3248476160.0,
-            "10": 3249142784.0,
-            "11": 3249142784.0,
-            "12": 3249142784.0,
-            "13": 3249142784.0,
-            "14": 3249142784.0,
-            "15": 3249142784.0,
-            "16": 3249142784.0,
-            "17": 3249142784.0,
-            "18": 3249142784.0,
-            "19": 3249142784.0,
-            "20": 3249142784.0,
-            "21": 3249142784.0,
-            "22": 3249860608.0,
-            "23": 3249860608.0,
-            "24": 3249972736.0,
-            "25": 3249972736.0,
-            "26": 3249972736.0,
-            "27": 3249972736.0,
-            "28": 3249972736.0,
-            "29": 3249972736.0,
-            "30": 3249972736.0,
-            "31": 3249972736.0,
-            "32": 3249972736.0,
-            "33": 3249972736.0,
-            "34": 3249972736.0,
-            "35": 3249972736.0,
-            "36": 3249972736.0,
-            "37": 3249972736.0,
-            "38": 3249972736.0,
-            "39": 3249972736.0,
-            "40": 3249972736.0,
-            "41": 3249972736.0,
-            "42": 3249972736.0,
-            "43": 3249972736.0,
-            "44": 3249972736.0,
-            "45": 3249972736.0,
-            "46": 3249972736.0,
-            "47": 3249972736.0,
-            "48": 3249972736.0,
-            "49": 3249972736.0,
-            "50": 3249972736.0
+            "1": 3057868288.0,
+            "2": 3298335232.0,
+            "3": 3298335232.0,
+            "4": 3300084224.0,
+            "5": 3300084224.0,
+            "6": 3300084224.0,
+            "7": 3300084224.0,
+            "8": 3300084224.0,
+            "9": 3300084224.0,
+            "10": 3300122624.0,
+            "11": 3300122624.0,
+            "12": 3300122624.0,
+            "13": 3300122624.0,
+            "14": 3300122624.0,
+            "15": 3300122624.0,
+            "16": 3300122624.0,
+            "17": 3300122624.0,
+            "18": 3300122624.0,
+            "19": 3300376576.0,
+            "20": 3300416000.0,
+            "21": 3300416000.0,
+            "22": 3301032960.0,
+            "23": 3301998080.0,
+            "24": 3301998080.0,
+            "25": 3301998080.0,
+            "26": 3301998080.0,
+            "27": 3301998080.0,
+            "28": 3301998080.0,
+            "29": 3301998080.0,
+            "30": 3301998080.0,
+            "31": 3301998080.0,
+            "32": 3301998080.0,
+            "33": 3301998080.0,
+            "34": 3301998080.0,
+            "35": 3301998080.0,
+            "36": 3301998080.0,
+            "37": 3301998080.0,
+            "38": 3301998080.0,
+            "39": 3301998080.0,
+            "40": 3301998080.0,
+            "41": 3301998080.0,
+            "42": 3301998080.0,
+            "43": 3301998080.0,
+            "44": 3301998080.0,
+            "45": 3301998080.0,
+            "46": 3301998080.0,
+            "47": 3301998080.0,
+            "48": 3301998080.0,
+            "49": 3301998080.0,
+            "50": 3301998080.0
         }
     },
     "iteration-time": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 13.20887,
-            "2": 0.29449,
-            "3": 0.26099,
-            "4": 0.25199,
-            "5": 0.24285,
-            "6": 0.23658,
-            "7": 0.24248,
-            "8": 0.23258,
-            "9": 0.22661,
-            "10": 0.23769,
-            "11": 0.22933,
-            "12": 0.23288,
-            "13": 0.23074,
-            "14": 0.22376,
-            "15": 0.25054,
-            "16": 0.22881,
-            "17": 0.23932,
-            "18": 0.22427,
-            "19": 0.23467,
-            "20": 0.22747,
-            "21": 0.22662,
-            "22": 0.22866,
-            "23": 0.22726,
-            "24": 0.22901,
-            "25": 0.22654,
-            "26": 0.22683,
-            "27": 0.22909,
-            "28": 0.2264,
-            "29": 0.23339,
-            "30": 0.23066,
-            "31": 0.27285,
-            "32": 0.22966,
-            "33": 0.23016,
-            "34": 0.24956,
-            "35": 0.23114,
-            "36": 0.24161,
-            "37": 0.22585,
-            "38": 0.23047,
-            "39": 0.22695,
-            "40": 0.24845,
-            "41": 0.23491,
-            "42": 0.22656,
-            "43": 0.23744,
-            "44": 0.23602,
-            "45": 0.24859,
-            "46": 0.25828,
-            "47": 0.2367,
-            "48": 0.2564,
-            "49": 0.27812,
-            "50": 0.23401
+            "1": 16.45405,
+            "2": 0.30024,
+            "3": 0.24416,
+            "4": 0.22949,
+            "5": 0.21642,
+            "6": 0.20677,
+            "7": 0.21591,
+            "8": 0.21087,
+            "9": 0.20973,
+            "10": 0.20724,
+            "11": 0.20594,
+            "12": 0.20225,
+            "13": 0.21091,
+            "14": 0.2028,
+            "15": 0.22641,
+            "16": 0.20409,
+            "17": 0.21141,
+            "18": 0.20363,
+            "19": 0.20701,
+            "20": 0.2078,
+            "21": 0.20171,
+            "22": 0.20432,
+            "23": 0.19941,
+            "24": 0.20413,
+            "25": 0.20204,
+            "26": 0.20188,
+            "27": 0.60524,
+            "28": 0.21001,
+            "29": 0.20338,
+            "30": 0.20253,
+            "31": 0.2399,
+            "32": 0.19914,
+            "33": 0.20122,
+            "34": 0.22929,
+            "35": 0.20106,
+            "36": 0.22225,
+            "37": 0.20411,
+            "38": 0.20267,
+            "39": 0.19726,
+            "40": 0.21398,
+            "41": 0.21317,
+            "42": 0.20362,
+            "43": 0.20696,
+            "44": 0.20834,
+            "45": 0.21563,
+            "46": 0.22195,
+            "47": 0.20394,
+            "48": 0.22663,
+            "49": 0.24701,
+            "50": 0.20255
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_lts_dgx_a100.json
index f91ad30ed3a..512f1302b5f 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_lts_dgx_a100.json
@@ -175,7 +175,7 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 2910859264.0,
+            "1": 2910130176.0,
             "2": 3151821824.0,
             "3": 3152806912.0,
             "4": 3156619264.0,
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 6.90142,
-            "2": 0.35609,
-            "3": 0.29589,
-            "4": 0.29327,
-            "5": 0.29594,
-            "6": 0.293,
-            "7": 0.29087,
-            "8": 0.29178,
-            "9": 0.29184,
-            "10": 0.29303,
-            "11": 0.29381,
-            "12": 0.29249,
-            "13": 0.2936,
-            "14": 0.29671,
-            "15": 0.29969,
-            "16": 0.30214,
-            "17": 0.29463,
-            "18": 0.30986,
-            "19": 0.29429,
-            "20": 0.29497,
-            "21": 0.29609,
-            "22": 0.29421,
-            "23": 0.2931,
-            "24": 0.29341,
-            "25": 0.29443,
-            "26": 0.28879,
-            "27": 0.28844,
-            "28": 0.28873,
-            "29": 0.28741,
-            "30": 0.28737,
-            "31": 0.28905,
-            "32": 0.28701,
-            "33": 0.28706,
-            "34": 0.28739,
-            "35": 0.28701,
-            "36": 0.28751,
-            "37": 0.28826,
-            "38": 0.28792,
-            "39": 0.28663,
-            "40": 0.28805,
-            "41": 0.28776,
-            "42": 0.28855,
-            "43": 0.28777,
-            "44": 0.28801,
-            "45": 0.2885,
-            "46": 0.28907,
-            "47": 0.28755,
-            "48": 0.28719,
-            "49": 0.28878,
-            "50": 0.28677
+            "1": 6.10504,
+            "2": 0.31901,
+            "3": 0.30905,
+            "4": 0.29474,
+            "5": 0.29396,
+            "6": 0.29282,
+            "7": 0.29057,
+            "8": 0.2914,
+            "9": 0.29228,
+            "10": 0.29365,
+            "11": 0.29209,
+            "12": 0.28885,
+            "13": 0.28831,
+            "14": 0.28848,
+            "15": 0.29001,
+            "16": 0.28893,
+            "17": 0.28956,
+            "18": 0.28887,
+            "19": 0.28776,
+            "20": 0.28952,
+            "21": 0.6384,
+            "22": 0.29529,
+            "23": 0.29475,
+            "24": 0.29441,
+            "25": 0.29534,
+            "26": 0.29435,
+            "27": 0.29559,
+            "28": 0.30134,
+            "29": 0.2903,
+            "30": 0.28843,
+            "31": 0.28861,
+            "32": 0.28817,
+            "33": 0.29466,
+            "34": 0.28874,
+            "35": 0.28729,
+            "36": 0.28824,
+            "37": 0.28808,
+            "38": 0.28729,
+            "39": 0.28702,
+            "40": 0.28605,
+            "41": 0.28667,
+            "42": 0.2877,
+            "43": 0.28836,
+            "44": 0.28722,
+            "45": 0.28782,
+            "46": 0.28798,
+            "47": 0.28716,
+            "48": 0.28759,
+            "49": 0.28891,
+            "50": 0.28753
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM/golden_values_dev_dgx_h100.json
index c49c5a579c0..b626738d63e 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM/golden_values_dev_dgx_h100.json
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 1027085824.0,
-            "2": 1027085824.0,
-            "3": 1027086848.0,
-            "4": 1027086336.0,
-            "5": 1027086848.0,
-            "6": 1027085312.0,
-            "7": 1027081728.0,
-            "8": 1027082752.0,
-            "9": 1027089408.0,
-            "10": 1027083776.0,
-            "11": 1027084288.0,
-            "12": 1027084288.0,
-            "13": 1027086848.0,
-            "14": 1027083776.0,
-            "15": 1027085312.0,
-            "16": 1027086336.0,
-            "17": 1027084288.0,
-            "18": 1027088384.0,
-            "19": 1027086848.0,
-            "20": 1027089920.0,
-            "21": 1027083264.0,
-            "22": 1027086336.0,
-            "23": 1027086848.0,
-            "24": 1027085824.0,
-            "25": 1027084288.0,
-            "26": 1027085312.0,
-            "27": 1027085312.0,
-            "28": 1027082752.0,
-            "29": 1027083776.0,
-            "30": 1027082240.0,
-            "31": 1027074048.0,
-            "32": 1027077120.0,
-            "33": 1027086336.0,
-            "34": 1027083264.0,
-            "35": 1027085312.0,
-            "36": 1027083776.0,
-            "37": 1027084288.0,
-            "38": 1027085312.0,
-            "39": 1027080704.0,
-            "40": 1027081728.0,
-            "41": 1027083264.0,
-            "42": 1027086848.0,
-            "43": 1027079680.0,
-            "44": 1027082752.0,
-            "45": 1027082752.0,
-            "46": 1027073536.0,
-            "47": 1027082752.0,
-            "48": 1027081216.0,
-            "49": 1027077120.0,
-            "50": 1027084800.0
+            "1": 1027090944.0,
+            "2": 1027090944.0,
+            "3": 1027091968.0,
+            "4": 1027091456.0,
+            "5": 1027091968.0,
+            "6": 1027090432.0,
+            "7": 1027086848.0,
+            "8": 1027087872.0,
+            "9": 1027094528.0,
+            "10": 1027088896.0,
+            "11": 1027089408.0,
+            "12": 1027089408.0,
+            "13": 1027091968.0,
+            "14": 1027088896.0,
+            "15": 1027090432.0,
+            "16": 1027091456.0,
+            "17": 1027089408.0,
+            "18": 1027093504.0,
+            "19": 1027091968.0,
+            "20": 1027095040.0,
+            "21": 1027088384.0,
+            "22": 1027091456.0,
+            "23": 1027091968.0,
+            "24": 1027090944.0,
+            "25": 1027089408.0,
+            "26": 1027090432.0,
+            "27": 1027090432.0,
+            "28": 1027087872.0,
+            "29": 1027088896.0,
+            "30": 1027087360.0,
+            "31": 1027079168.0,
+            "32": 1027082240.0,
+            "33": 1027091456.0,
+            "34": 1027088384.0,
+            "35": 1027090432.0,
+            "36": 1027088896.0,
+            "37": 1027089408.0,
+            "38": 1027090432.0,
+            "39": 1027085824.0,
+            "40": 1027086848.0,
+            "41": 1027088384.0,
+            "42": 1027091968.0,
+            "43": 1027084800.0,
+            "44": 1027087872.0,
+            "45": 1027087872.0,
+            "46": 1027078656.0,
+            "47": 1027087872.0,
+            "48": 1027086336.0,
+            "49": 1027082240.0,
+            "50": 1027089920.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 3007080960.0,
-            "2": 3247499776.0,
-            "3": 3247499776.0,
-            "4": 3248093184.0,
-            "5": 3248476160.0,
-            "6": 3248476160.0,
-            "7": 3248476160.0,
-            "8": 3248476160.0,
-            "9": 3248476160.0,
-            "10": 3249142784.0,
-            "11": 3249142784.0,
-            "12": 3249142784.0,
-            "13": 3249142784.0,
-            "14": 3249142784.0,
-            "15": 3249142784.0,
-            "16": 3249142784.0,
-            "17": 3249142784.0,
-            "18": 3249142784.0,
-            "19": 3249142784.0,
-            "20": 3249142784.0,
-            "21": 3249142784.0,
-            "22": 3249860608.0,
-            "23": 3249860608.0,
-            "24": 3249972736.0,
-            "25": 3249972736.0,
-            "26": 3249972736.0,
-            "27": 3249972736.0,
-            "28": 3249972736.0,
-            "29": 3249972736.0,
-            "30": 3249972736.0,
-            "31": 3249972736.0,
-            "32": 3249972736.0,
-            "33": 3249972736.0,
-            "34": 3249972736.0,
-            "35": 3249972736.0,
-            "36": 3249972736.0,
-            "37": 3249972736.0,
-            "38": 3249972736.0,
-            "39": 3249972736.0,
-            "40": 3249972736.0,
-            "41": 3249972736.0,
-            "42": 3249972736.0,
-            "43": 3249972736.0,
-            "44": 3249972736.0,
-            "45": 3249972736.0,
-            "46": 3249972736.0,
-            "47": 3249972736.0,
-            "48": 3249972736.0,
-            "49": 3249972736.0,
-            "50": 3249972736.0
+            "1": 3057868288.0,
+            "2": 3298335232.0,
+            "3": 3298335232.0,
+            "4": 3300084224.0,
+            "5": 3300084224.0,
+            "6": 3300084224.0,
+            "7": 3300084224.0,
+            "8": 3300084224.0,
+            "9": 3300084224.0,
+            "10": 3300122624.0,
+            "11": 3300122624.0,
+            "12": 3300122624.0,
+            "13": 3300122624.0,
+            "14": 3300122624.0,
+            "15": 3300122624.0,
+            "16": 3300122624.0,
+            "17": 3300122624.0,
+            "18": 3300122624.0,
+            "19": 3300376576.0,
+            "20": 3300416000.0,
+            "21": 3300416000.0,
+            "22": 3301032960.0,
+            "23": 3301998080.0,
+            "24": 3301998080.0,
+            "25": 3301998080.0,
+            "26": 3301998080.0,
+            "27": 3301998080.0,
+            "28": 3301998080.0,
+            "29": 3301998080.0,
+            "30": 3301998080.0,
+            "31": 3301998080.0,
+            "32": 3301998080.0,
+            "33": 3301998080.0,
+            "34": 3301998080.0,
+            "35": 3301998080.0,
+            "36": 3301998080.0,
+            "37": 3301998080.0,
+            "38": 3301998080.0,
+            "39": 3301998080.0,
+            "40": 3301998080.0,
+            "41": 3301998080.0,
+            "42": 3301998080.0,
+            "43": 3301998080.0,
+            "44": 3301998080.0,
+            "45": 3301998080.0,
+            "46": 3301998080.0,
+            "47": 3301998080.0,
+            "48": 3301998080.0,
+            "49": 3301998080.0,
+            "50": 3301998080.0
         }
     },
     "iteration-time": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 13.35552,
-            "2": 0.37785,
-            "3": 0.29632,
-            "4": 0.29599,
-            "5": 0.25057,
-            "6": 0.2376,
-            "7": 0.24788,
-            "8": 0.2386,
-            "9": 0.23567,
-            "10": 0.23981,
-            "11": 0.23457,
-            "12": 0.23608,
-            "13": 0.24093,
-            "14": 0.23076,
-            "15": 0.25524,
-            "16": 0.23573,
-            "17": 0.24636,
-            "18": 0.2348,
-            "19": 0.23922,
-            "20": 0.23445,
-            "21": 0.22924,
-            "22": 0.23872,
-            "23": 0.23172,
-            "24": 0.23116,
-            "25": 0.23103,
-            "26": 0.23556,
-            "27": 0.23228,
-            "28": 0.23323,
-            "29": 0.23495,
-            "30": 0.23011,
-            "31": 0.27652,
-            "32": 0.23015,
-            "33": 0.22902,
-            "34": 0.25666,
-            "35": 0.23045,
-            "36": 0.24626,
-            "37": 0.23146,
-            "38": 0.2344,
-            "39": 0.22864,
-            "40": 0.24642,
-            "41": 0.23788,
-            "42": 0.23274,
-            "43": 0.24326,
-            "44": 0.23733,
-            "45": 0.24263,
-            "46": 0.25392,
-            "47": 0.23328,
-            "48": 0.26156,
-            "49": 0.27837,
-            "50": 0.23303
+            "1": 15.57121,
+            "2": 0.28312,
+            "3": 0.24431,
+            "4": 0.2266,
+            "5": 0.21347,
+            "6": 0.20803,
+            "7": 0.2145,
+            "8": 0.20409,
+            "9": 0.2038,
+            "10": 0.20378,
+            "11": 0.20122,
+            "12": 0.20047,
+            "13": 0.2053,
+            "14": 0.20008,
+            "15": 0.22405,
+            "16": 0.19642,
+            "17": 0.20937,
+            "18": 0.19918,
+            "19": 0.2032,
+            "20": 0.19792,
+            "21": 0.19626,
+            "22": 0.20047,
+            "23": 0.19555,
+            "24": 0.2,
+            "25": 0.23371,
+            "26": 0.2005,
+            "27": 0.59196,
+            "28": 0.19966,
+            "29": 0.20231,
+            "30": 0.19778,
+            "31": 0.23768,
+            "32": 0.20526,
+            "33": 0.20518,
+            "34": 0.22786,
+            "35": 0.20088,
+            "36": 0.21894,
+            "37": 0.20033,
+            "38": 0.20352,
+            "39": 0.19985,
+            "40": 0.20975,
+            "41": 0.2189,
+            "42": 0.20277,
+            "43": 0.20495,
+            "44": 0.20563,
+            "45": 0.21473,
+            "46": 0.21859,
+            "47": 0.2018,
+            "48": 0.22732,
+            "49": 0.2668,
+            "50": 0.19761
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts_etp1_ep4/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts_etp1_ep4/golden_values_lts_dgx_a100.json
index 9114b4bb385..43beb1e88d3 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts_etp1_ep4/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts_etp1_ep4/golden_values_lts_dgx_a100.json
@@ -175,7 +175,7 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 3404911104.0,
+            "1": 3405945344.0,
             "2": 3972516352.0,
             "3": 3976973312.0,
             "4": 3976973312.0,
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 7.62035,
-            "2": 0.36752,
-            "3": 0.30562,
-            "4": 0.29876,
-            "5": 0.298,
-            "6": 0.29743,
-            "7": 0.29729,
-            "8": 0.2967,
-            "9": 0.29751,
-            "10": 0.29912,
-            "11": 0.29575,
-            "12": 0.29589,
-            "13": 0.29696,
-            "14": 0.29898,
-            "15": 0.30053,
-            "16": 0.30093,
-            "17": 0.2977,
-            "18": 0.2973,
-            "19": 0.29596,
-            "20": 0.29757,
-            "21": 0.2967,
-            "22": 0.29963,
-            "23": 0.29707,
-            "24": 0.29748,
-            "25": 0.29701,
-            "26": 0.29838,
-            "27": 0.29889,
-            "28": 0.29962,
-            "29": 0.30399,
-            "30": 0.30932,
-            "31": 0.30553,
-            "32": 0.29765,
-            "33": 0.30499,
-            "34": 0.29754,
-            "35": 0.29747,
-            "36": 0.29801,
-            "37": 0.30768,
-            "38": 0.29693,
-            "39": 0.29912,
-            "40": 0.299,
-            "41": 0.2982,
-            "42": 0.37256,
-            "43": 0.29865,
-            "44": 0.29774,
-            "45": 0.29961,
-            "46": 0.2988,
-            "47": 0.30454,
-            "48": 0.30466,
-            "49": 0.30093,
-            "50": 0.29883
+            "1": 9.45286,
+            "2": 0.38607,
+            "3": 0.3213,
+            "4": 0.29678,
+            "5": 0.29879,
+            "6": 0.29861,
+            "7": 0.29609,
+            "8": 0.29454,
+            "9": 0.29554,
+            "10": 0.2938,
+            "11": 0.29617,
+            "12": 0.29426,
+            "13": 0.29354,
+            "14": 0.29415,
+            "15": 0.29446,
+            "16": 0.29436,
+            "17": 0.29604,
+            "18": 0.29438,
+            "19": 0.29445,
+            "20": 0.2949,
+            "21": 0.29462,
+            "22": 0.2942,
+            "23": 0.29494,
+            "24": 0.29415,
+            "25": 0.29456,
+            "26": 0.29464,
+            "27": 0.29403,
+            "28": 0.29487,
+            "29": 0.29396,
+            "30": 0.30341,
+            "31": 0.29906,
+            "32": 0.29469,
+            "33": 0.29821,
+            "34": 0.29373,
+            "35": 0.294,
+            "36": 0.6955,
+            "37": 0.30497,
+            "38": 0.29453,
+            "39": 0.29652,
+            "40": 0.29409,
+            "41": 0.29484,
+            "42": 0.29643,
+            "43": 0.29621,
+            "44": 0.2949,
+            "45": 0.29781,
+            "46": 0.29896,
+            "47": 0.29487,
+            "48": 0.29896,
+            "49": 0.29728,
+            "50": 0.29271
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4/golden_values_dev_dgx_h100.json
index acf98f05d31..19b393f6369 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4/golden_values_dev_dgx_h100.json
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 1561367040.0,
-            "2": 1560972288.0,
-            "3": 1561248256.0,
-            "4": 1560096768.0,
-            "5": 1559926784.0,
-            "6": 1561850368.0,
-            "7": 1560161792.0,
-            "8": 1560285184.0,
-            "9": 1560998912.0,
-            "10": 1561293824.0,
-            "11": 1560700416.0,
-            "12": 1562299904.0,
-            "13": 1560526848.0,
-            "14": 1561499648.0,
-            "15": 1559979520.0,
-            "16": 1561232384.0,
-            "17": 1561337856.0,
-            "18": 1560266240.0,
-            "19": 1561224704.0,
-            "20": 1560222720.0,
-            "21": 1561771008.0,
-            "22": 1559743488.0,
-            "23": 1560801792.0,
-            "24": 1561316864.0,
-            "25": 1560606720.0,
-            "26": 1562301440.0,
-            "27": 1560251904.0,
-            "28": 1559861248.0,
-            "29": 1559861248.0,
-            "30": 1560919552.0,
-            "31": 1561406976.0,
-            "32": 1565212672.0,
-            "33": 1560626176.0,
-            "34": 1561871360.0,
-            "35": 1560959488.0,
-            "36": 1561910784.0,
-            "37": 1559904256.0,
-            "38": 1560347648.0,
-            "39": 1562116608.0,
-            "40": 1562510336.0,
-            "41": 1562299392.0,
-            "42": 1561589248.0,
-            "43": 1560753664.0,
-            "44": 1561721856.0,
-            "45": 1561170944.0,
-            "46": 1561996288.0,
-            "47": 1560805888.0,
-            "48": 1561083392.0,
-            "49": 1560795136.0,
-            "50": 1561778176.0
+            "1": 1561031168.0,
+            "2": 1562193408.0,
+            "3": 1561517056.0,
+            "4": 1560948224.0,
+            "5": 1562155008.0,
+            "6": 1563247104.0,
+            "7": 1562656768.0,
+            "8": 1562246656.0,
+            "9": 1561597952.0,
+            "10": 1564070400.0,
+            "11": 1562084352.0,
+            "12": 1559892480.0,
+            "13": 1562137600.0,
+            "14": 1561026048.0,
+            "15": 1561419776.0,
+            "16": 1562166784.0,
+            "17": 1560322048.0,
+            "18": 1561402880.0,
+            "19": 1564046336.0,
+            "20": 1562059264.0,
+            "21": 1560781824.0,
+            "22": 1561673728.0,
+            "23": 1562520064.0,
+            "24": 1561093632.0,
+            "25": 1561384960.0,
+            "26": 1562000896.0,
+            "27": 1561264128.0,
+            "28": 1561458176.0,
+            "29": 1561382912.0,
+            "30": 1562413568.0,
+            "31": 1560165376.0,
+            "32": 1561413120.0,
+            "33": 1562501120.0,
+            "34": 1562718720.0,
+            "35": 1563195392.0,
+            "36": 1561894400.0,
+            "37": 1560998912.0,
+            "38": 1563760128.0,
+            "39": 1561207808.0,
+            "40": 1562625536.0,
+            "41": 1561658368.0,
+            "42": 1561409024.0,
+            "43": 1559668736.0,
+            "44": 1561136640.0,
+            "45": 1560246272.0,
+            "46": 1562813952.0,
+            "47": 1561296896.0,
+            "48": 1561900544.0,
+            "49": 1562101760.0,
+            "50": 1563655680.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 3680567296.0,
-            "2": 4256236032.0,
-            "3": 4260136960.0,
-            "4": 4260136960.0,
-            "5": 4261063168.0,
-            "6": 4289287168.0,
-            "7": 4289287168.0,
-            "8": 4289287168.0,
-            "9": 4289287168.0,
-            "10": 4289287168.0,
-            "11": 4289287168.0,
-            "12": 4289287168.0,
-            "13": 4289287168.0,
-            "14": 4289287168.0,
-            "15": 4289287168.0,
-            "16": 4289287168.0,
-            "17": 4289287168.0,
-            "18": 4289287168.0,
-            "19": 4289287168.0,
-            "20": 4289287168.0,
-            "21": 4289287168.0,
-            "22": 4289287168.0,
-            "23": 4289287168.0,
-            "24": 4289287168.0,
-            "25": 4289287168.0,
-            "26": 4289287168.0,
-            "27": 4289287168.0,
-            "28": 4289287168.0,
-            "29": 4289287168.0,
-            "30": 4289287168.0,
-            "31": 4289287168.0,
-            "32": 4289287168.0,
-            "33": 4289287168.0,
-            "34": 4289287168.0,
-            "35": 4289287168.0,
-            "36": 4289287168.0,
-            "37": 4289287168.0,
-            "38": 4289287168.0,
-            "39": 4289287168.0,
-            "40": 4289287168.0,
-            "41": 4289287168.0,
-            "42": 4289287168.0,
-            "43": 4289287168.0,
-            "44": 4289287168.0,
-            "45": 4289287168.0,
-            "46": 4289287168.0,
-            "47": 4289287168.0,
-            "48": 4289287168.0,
-            "49": 4289287168.0,
-            "50": 4289287168.0
+            "1": 3465706496.0,
+            "2": 4045009920.0,
+            "3": 4045009920.0,
+            "4": 4045009920.0,
+            "5": 4045009920.0,
+            "6": 4067111936.0,
+            "7": 4067111936.0,
+            "8": 4067111936.0,
+            "9": 4067111936.0,
+            "10": 4067111936.0,
+            "11": 4067111936.0,
+            "12": 4067111936.0,
+            "13": 4067111936.0,
+            "14": 4067111936.0,
+            "15": 4067111936.0,
+            "16": 4067111936.0,
+            "17": 4067111936.0,
+            "18": 4067111936.0,
+            "19": 4067111936.0,
+            "20": 4067111936.0,
+            "21": 4067111936.0,
+            "22": 4067111936.0,
+            "23": 4067111936.0,
+            "24": 4067111936.0,
+            "25": 4067111936.0,
+            "26": 4067111936.0,
+            "27": 4067111936.0,
+            "28": 4067111936.0,
+            "29": 4067111936.0,
+            "30": 4067111936.0,
+            "31": 4067111936.0,
+            "32": 4067111936.0,
+            "33": 4067111936.0,
+            "34": 4067111936.0,
+            "35": 4067111936.0,
+            "36": 4067111936.0,
+            "37": 4067111936.0,
+            "38": 4067111936.0,
+            "39": 4067111936.0,
+            "40": 4067111936.0,
+            "41": 4067111936.0,
+            "42": 4067111936.0,
+            "43": 4067111936.0,
+            "44": 4067111936.0,
+            "45": 4067111936.0,
+            "46": 4067111936.0,
+            "47": 4067111936.0,
+            "48": 4067111936.0,
+            "49": 4067111936.0,
+            "50": 4067111936.0
         }
     },
     "iteration-time": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 18.57368,
-            "2": 0.50382,
-            "3": 0.41522,
-            "4": 0.37227,
-            "5": 0.37501,
-            "6": 0.33117,
-            "7": 0.32515,
-            "8": 0.31941,
-            "9": 0.32367,
-            "10": 0.32326,
-            "11": 0.30606,
-            "12": 0.30616,
-            "13": 0.29955,
-            "14": 0.30443,
-            "15": 0.30558,
-            "16": 0.29289,
-            "17": 0.30498,
-            "18": 0.29213,
-            "19": 0.29318,
-            "20": 0.29695,
-            "21": 0.29798,
-            "22": 0.31295,
-            "23": 0.29473,
-            "24": 0.29975,
-            "25": 0.29698,
-            "26": 0.30574,
-            "27": 0.29785,
-            "28": 0.30807,
-            "29": 0.29928,
-            "30": 0.3087,
-            "31": 0.30718,
-            "32": 0.30993,
-            "33": 0.30203,
-            "34": 0.31719,
-            "35": 0.30742,
-            "36": 0.30563,
-            "37": 0.31427,
-            "38": 0.31171,
-            "39": 0.31768,
-            "40": 0.30755,
-            "41": 0.30394,
-            "42": 0.29792,
-            "43": 0.30454,
-            "44": 0.31398,
-            "45": 0.29651,
-            "46": 0.31171,
-            "47": 0.29161,
-            "48": 0.3034,
-            "49": 0.2972,
-            "50": 0.29959
+            "1": 25.658,
+            "2": 0.47954,
+            "3": 0.41847,
+            "4": 0.33258,
+            "5": 0.34351,
+            "6": 0.31011,
+            "7": 0.31575,
+            "8": 0.29238,
+            "9": 0.30311,
+            "10": 0.34916,
+            "11": 0.30925,
+            "12": 0.34341,
+            "13": 0.28433,
+            "14": 0.28892,
+            "15": 0.29252,
+            "16": 0.2927,
+            "17": 0.30297,
+            "18": 0.29339,
+            "19": 0.2886,
+            "20": 0.29686,
+            "21": 0.29022,
+            "22": 0.65703,
+            "23": 0.29161,
+            "24": 0.29821,
+            "25": 0.29341,
+            "26": 0.30856,
+            "27": 0.2991,
+            "28": 0.29279,
+            "29": 0.29852,
+            "30": 0.30839,
+            "31": 0.29491,
+            "32": 0.2896,
+            "33": 0.29084,
+            "34": 0.32605,
+            "35": 0.29205,
+            "36": 0.28559,
+            "37": 0.29399,
+            "38": 0.28264,
+            "39": 0.28463,
+            "40": 0.28019,
+            "41": 0.28893,
+            "42": 0.27586,
+            "43": 0.28759,
+            "44": 0.28318,
+            "45": 0.27759,
+            "46": 0.27363,
+            "47": 0.27776,
+            "48": 0.27855,
+            "49": 1.02062,
+            "50": 0.28168
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgx_h100.json
index a47b94faa75..3948f0ea908 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgx_h100.json
@@ -6,54 +6,54 @@
         "values": {
             "1": 11.04733,
             "2": 11.03572,
-            "3": 9.58776,
-            "4": 9.25801,
-            "5": 9.53164,
-            "6": 9.90992,
-            "7": 9.48661,
-            "8": 8.93947,
-            "9": 8.65725,
-            "10": 9.0567,
-            "11": 8.49436,
-            "12": 8.52422,
-            "13": 8.45295,
-            "14": 7.97674,
-            "15": 8.04629,
-            "16": 8.08024,
-            "17": 8.08398,
-            "18": 7.76141,
-            "19": 8.15001,
-            "20": 7.89339,
-            "21": 7.58212,
-            "22": 7.54491,
-            "23": 7.43428,
-            "24": 7.42622,
-            "25": 7.67267,
-            "26": 7.07291,
-            "27": 7.61503,
-            "28": 7.31789,
-            "29": 7.48965,
-            "30": 7.64357,
-            "31": 7.3927,
-            "32": 7.58407,
-            "33": 7.63624,
-            "34": 7.69746,
-            "35": 7.21377,
-            "36": 7.08367,
-            "37": 7.4245,
-            "38": 7.18783,
-            "39": 7.5498,
-            "40": 7.54133,
-            "41": 7.48816,
-            "42": 7.24677,
-            "43": 7.23194,
-            "44": 7.41471,
-            "45": 7.18838,
-            "46": 6.89674,
-            "47": 7.29904,
-            "48": 7.13855,
-            "49": 7.58882,
-            "50": 7.03386
+            "3": 9.58761,
+            "4": 9.25798,
+            "5": 9.53373,
+            "6": 9.90316,
+            "7": 9.4853,
+            "8": 8.93791,
+            "9": 8.65798,
+            "10": 9.05611,
+            "11": 8.49418,
+            "12": 8.5242,
+            "13": 8.45277,
+            "14": 7.97207,
+            "15": 8.04481,
+            "16": 8.0797,
+            "17": 8.08354,
+            "18": 7.76107,
+            "19": 8.14865,
+            "20": 7.89777,
+            "21": 7.58594,
+            "22": 7.54567,
+            "23": 7.43399,
+            "24": 7.43098,
+            "25": 7.67584,
+            "26": 7.07216,
+            "27": 7.6197,
+            "28": 7.32805,
+            "29": 7.4899,
+            "30": 7.64402,
+            "31": 7.39581,
+            "32": 7.58878,
+            "33": 7.63916,
+            "34": 7.69992,
+            "35": 7.21112,
+            "36": 7.08484,
+            "37": 7.42312,
+            "38": 7.18694,
+            "39": 7.54858,
+            "40": 7.54095,
+            "41": 7.48915,
+            "42": 7.24832,
+            "43": 7.2344,
+            "44": 7.4117,
+            "45": 7.1836,
+            "46": 6.89743,
+            "47": 7.29953,
+            "48": 7.14192,
+            "49": 7.58721,
+            "50": 7.03393
         }
     },
     "num-zeros": {
@@ -61,56 +61,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 38802552.0,
+            "1": 38802580.0,
             "2": 38543496.0,
-            "3": 38742496.0,
-            "4": 276808768.0,
-            "5": 252900224.0,
-            "6": 262014400.0,
-            "7": 604765376.0,
-            "8": 778329280.0,
-            "9": 664674944.0,
-            "10": 728521920.0,
-            "11": 718868480.0,
-            "12": 787622592.0,
-            "13": 900296192.0,
-            "14": 831151488.0,
-            "15": 762029184.0,
-            "16": 938532864.0,
-            "17": 633234048.0,
-            "18": 708920704.0,
-            "19": 976315584.0,
-            "20": 986060288.0,
-            "21": 781551744.0,
-            "22": 762139648.0,
-            "23": 888477824.0,
-            "24": 851552512.0,
-            "25": 827443072.0,
-            "26": 812721088.0,
-            "27": 806914304.0,
-            "28": 802850496.0,
-            "29": 748894592.0,
-            "30": 731604672.0,
-            "31": 752878144.0,
-            "32": 762315520.0,
-            "33": 737258304.0,
-            "34": 746789888.0,
-            "35": 734508928.0,
-            "36": 674695808.0,
-            "37": 673198208.0,
-            "38": 633526912.0,
-            "39": 620340928.0,
-            "40": 613575552.0,
-            "41": 566869312.0,
-            "42": 557646592.0,
-            "43": 554752576.0,
-            "44": 547950784.0,
-            "45": 527374464.0,
-            "46": 347107200.0,
-            "47": 497586496.0,
-            "48": 497828864.0,
-            "49": 465758912.0,
-            "50": 450885792.0
+            "3": 38739384.0,
+            "4": 286224448.0,
+            "5": 252889984.0,
+            "6": 255719936.0,
+            "7": 604766528.0,
+            "8": 762591552.0,
+            "9": 658408896.0,
+            "10": 737969280.0,
+            "11": 728304000.0,
+            "12": 759307840.0,
+            "13": 900330048.0,
+            "14": 827930176.0,
+            "15": 771439488.0,
+            "16": 941681408.0,
+            "17": 645770560.0,
+            "18": 630285120.0,
+            "19": 976311360.0,
+            "20": 982916608.0,
+            "21": 781530112.0,
+            "22": 714968384.0,
+            "23": 907354560.0,
+            "24": 807526912.0,
+            "25": 814861568.0,
+            "26": 800138240.0,
+            "27": 847802560.0,
+            "28": 831162880.0,
+            "29": 811810368.0,
+            "30": 816535808.0,
+            "31": 815796160.0,
+            "32": 793772928.0,
+            "33": 781300032.0,
+            "34": 778254592.0,
+            "35": 762826688.0,
+            "36": 737609088.0,
+            "37": 679501376.0,
+            "38": 664984064.0,
+            "39": 645504448.0,
+            "40": 635595648.0,
+            "41": 604614784.0,
+            "42": 579667968.0,
+            "43": 567337600.0,
+            "44": 557388992.0,
+            "45": 533662880.0,
+            "46": 340805728.0,
+            "47": 488152032.0,
+            "48": 475815680.0,
+            "49": 453176704.0,
+            "50": 438299776.0
         }
     },
     "mem-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 54204293120.0,
-            "2": 56956715008.0,
-            "3": 57074692096.0,
-            "4": 57074692096.0,
-            "5": 57074692096.0,
-            "6": 57074692096.0,
-            "7": 57074692096.0,
-            "8": 57074692096.0,
-            "9": 57074692096.0,
-            "10": 57074692096.0,
-            "11": 57074692096.0,
-            "12": 57074692096.0,
-            "13": 57074692096.0,
-            "14": 57074692096.0,
-            "15": 57074692096.0,
-            "16": 57074692096.0,
-            "17": 57074692096.0,
-            "18": 57074692096.0,
-            "19": 57074692096.0,
-            "20": 57074692096.0,
-            "21": 57074692096.0,
-            "22": 57074692096.0,
-            "23": 57074692096.0,
-            "24": 57074692096.0,
-            "25": 57074692096.0,
-            "26": 57211289600.0,
-            "27": 57211289600.0,
-            "28": 57211289600.0,
-            "29": 57368535040.0,
-            "30": 57742073856.0,
-            "31": 57742073856.0,
-            "32": 57742073856.0,
-            "33": 57742073856.0,
-            "34": 57744101376.0,
-            "35": 58293194752.0,
-            "36": 58293194752.0,
-            "37": 58293194752.0,
-            "38": 58293194752.0,
-            "39": 58293194752.0,
-            "40": 58293194752.0,
-            "41": 58293194752.0,
-            "42": 58293194752.0,
-            "43": 58293194752.0,
-            "44": 58293194752.0,
-            "45": 58293194752.0,
-            "46": 58293194752.0,
-            "47": 58293194752.0,
-            "48": 58293194752.0,
-            "49": 58293194752.0,
-            "50": 58293194752.0
+            "1": 55051542528.0,
+            "2": 57803964416.0,
+            "3": 57920471040.0,
+            "4": 57920471040.0,
+            "5": 57920471040.0,
+            "6": 57920471040.0,
+            "7": 57920471040.0,
+            "8": 57920471040.0,
+            "9": 57920471040.0,
+            "10": 57920471040.0,
+            "11": 57920471040.0,
+            "12": 57920471040.0,
+            "13": 57920471040.0,
+            "14": 57920471040.0,
+            "15": 57920471040.0,
+            "16": 57920471040.0,
+            "17": 57920471040.0,
+            "18": 57920471040.0,
+            "19": 57920471040.0,
+            "20": 57920471040.0,
+            "21": 57920471040.0,
+            "22": 57920471040.0,
+            "23": 57920471040.0,
+            "24": 57920471040.0,
+            "25": 57920471040.0,
+            "26": 57920471040.0,
+            "27": 57920471040.0,
+            "28": 57920471040.0,
+            "29": 57920471040.0,
+            "30": 58636701696.0,
+            "31": 58636701696.0,
+            "32": 58636701696.0,
+            "33": 58636701696.0,
+            "34": 58636701696.0,
+            "35": 58636701696.0,
+            "36": 58684317696.0,
+            "37": 59176394752.0,
+            "38": 59698597888.0,
+            "39": 60111630336.0,
+            "40": 60111630336.0,
+            "41": 60111630336.0,
+            "42": 60111630336.0,
+            "43": 60111630336.0,
+            "44": 60111630336.0,
+            "45": 60111630336.0,
+            "46": 60111630336.0,
+            "47": 60111630336.0,
+            "48": 60111630336.0,
+            "49": 60111630336.0,
+            "50": 60111630336.0
         }
     },
     "mtp_1 loss": {
@@ -234,54 +234,54 @@
         "values": {
             "1": 11.0765,
             "2": 11.07404,
-            "3": 10.53863,
-            "4": 10.0981,
-            "5": 9.81152,
-            "6": 10.0744,
-            "7": 9.79944,
-            "8": 9.07176,
-            "9": 8.87116,
-            "10": 9.12759,
-            "11": 8.49894,
-            "12": 8.53114,
-            "13": 8.42531,
-            "14": 7.84784,
-            "15": 7.99147,
-            "16": 8.05102,
-            "17": 8.00126,
-            "18": 7.73217,
-            "19": 8.11102,
-            "20": 7.83055,
-            "21": 7.52608,
-            "22": 7.49979,
-            "23": 7.37315,
-            "24": 7.37265,
-            "25": 7.61392,
-            "26": 7.01833,
-            "27": 7.55877,
-            "28": 7.26822,
-            "29": 7.44363,
-            "30": 7.58581,
-            "31": 7.3265,
-            "32": 7.50876,
-            "33": 7.57264,
-            "34": 7.63783,
-            "35": 7.15428,
-            "36": 7.02086,
-            "37": 7.35313,
-            "38": 7.12909,
-            "39": 7.48882,
-            "40": 7.47518,
-            "41": 7.42231,
-            "42": 7.17726,
-            "43": 7.16243,
-            "44": 7.34345,
-            "45": 7.12344,
-            "46": 6.8279,
-            "47": 7.23665,
-            "48": 7.08061,
-            "49": 7.51184,
-            "50": 6.9731
+            "3": 10.53858,
+            "4": 10.09805,
+            "5": 9.81149,
+            "6": 10.07175,
+            "7": 9.79911,
+            "8": 9.07181,
+            "9": 8.87128,
+            "10": 9.12754,
+            "11": 8.49883,
+            "12": 8.53076,
+            "13": 8.42486,
+            "14": 7.84718,
+            "15": 7.99114,
+            "16": 8.05044,
+            "17": 8.0009,
+            "18": 7.73184,
+            "19": 8.11049,
+            "20": 7.83068,
+            "21": 7.52561,
+            "22": 7.49995,
+            "23": 7.37324,
+            "24": 7.37304,
+            "25": 7.61503,
+            "26": 7.01863,
+            "27": 7.5608,
+            "28": 7.26908,
+            "29": 7.4442,
+            "30": 7.58626,
+            "31": 7.327,
+            "32": 7.5089,
+            "33": 7.57391,
+            "34": 7.63803,
+            "35": 7.15468,
+            "36": 7.02234,
+            "37": 7.35288,
+            "38": 7.12913,
+            "39": 7.48869,
+            "40": 7.47562,
+            "41": 7.42293,
+            "42": 7.17768,
+            "43": 7.16333,
+            "44": 7.34362,
+            "45": 7.12401,
+            "46": 6.82934,
+            "47": 7.23649,
+            "48": 7.08053,
+            "49": 7.51319,
+            "50": 6.97383
         }
     },
     "iteration-time": {
@@ -289,56 +289,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 97.95665,
-            "2": 1.66988,
-            "3": 1.35644,
-            "4": 2.24552,
-            "5": 2.14285,
-            "6": 1.60272,
-            "7": 1.5113,
-            "8": 2.10932,
-            "9": 1.69738,
-            "10": 1.0561,
-            "11": 1.04064,
-            "12": 1.0335,
-            "13": 1.03186,
-            "14": 1.03406,
-            "15": 1.05897,
-            "16": 1.03516,
-            "17": 1.04396,
-            "18": 1.08073,
-            "19": 1.06079,
-            "20": 1.04178,
-            "21": 1.03726,
-            "22": 1.03706,
-            "23": 1.03878,
-            "24": 1.04111,
-            "25": 1.04952,
-            "26": 1.04497,
-            "27": 1.04672,
-            "28": 1.03793,
-            "29": 1.03092,
-            "30": 1.04813,
-            "31": 1.03205,
-            "32": 1.03729,
-            "33": 1.02557,
-            "34": 1.03623,
-            "35": 1.04247,
-            "36": 1.03261,
-            "37": 1.03911,
-            "38": 1.04764,
-            "39": 1.0376,
-            "40": 1.04918,
-            "41": 1.03907,
-            "42": 1.05227,
-            "43": 1.04186,
-            "44": 1.04266,
-            "45": 1.03786,
-            "46": 1.04673,
-            "47": 1.05766,
-            "48": 1.04958,
-            "49": 1.05312,
-            "50": 1.05239
+            "1": 73.10019,
+            "2": 1.25873,
+            "3": 1.16322,
+            "4": 1.29653,
+            "5": 1.29631,
+            "6": 1.11998,
+            "7": 1.35727,
+            "8": 1.09252,
+            "9": 1.11578,
+            "10": 1.02138,
+            "11": 1.01615,
+            "12": 1.01222,
+            "13": 1.02281,
+            "14": 1.02294,
+            "15": 1.02492,
+            "16": 1.01859,
+            "17": 1.03891,
+            "18": 1.03349,
+            "19": 1.02727,
+            "20": 1.02559,
+            "21": 1.02143,
+            "22": 1.02847,
+            "23": 1.02845,
+            "24": 1.01891,
+            "25": 1.02716,
+            "26": 1.0234,
+            "27": 1.02648,
+            "28": 1.0165,
+            "29": 1.02468,
+            "30": 1.02451,
+            "31": 1.0298,
+            "32": 1.02899,
+            "33": 1.01515,
+            "34": 1.02615,
+            "35": 1.02426,
+            "36": 1.02583,
+            "37": 1.0171,
+            "38": 1.01354,
+            "39": 1.03472,
+            "40": 1.02918,
+            "41": 1.03913,
+            "42": 1.03355,
+            "43": 1.02441,
+            "44": 1.03591,
+            "45": 1.02675,
+            "46": 1.04457,
+            "47": 1.05738,
+            "48": 1.02657,
+            "49": 1.0303,
+            "50": 1.02663
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgx_h100.json
index c55faf839a8..82b8d8b1e56 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgx_h100.json
@@ -6,54 +6,54 @@
         "values": {
             "1": 10.94944,
             "2": 10.95158,
-            "3": 10.50318,
-            "4": 9.964,
-            "5": 9.94016,
-            "6": 9.67332,
-            "7": 10.23184,
-            "8": 9.4965,
-            "9": 9.54631,
-            "10": 9.79388,
-            "11": 9.3003,
-            "12": 9.40451,
-            "13": 9.39562,
-            "14": 8.8513,
-            "15": 9.02474,
-            "16": 9.07111,
-            "17": 9.04534,
-            "18": 8.75805,
-            "19": 9.1794,
-            "20": 8.86325,
-            "21": 8.5391,
-            "22": 8.55134,
-            "23": 8.42688,
-            "24": 8.38109,
-            "25": 8.63783,
-            "26": 7.96861,
-            "27": 8.57603,
-            "28": 8.1922,
-            "29": 8.3971,
-            "30": 8.67285,
-            "31": 8.28458,
-            "32": 8.43378,
-            "33": 8.55597,
-            "34": 8.65985,
-            "35": 8.07899,
-            "36": 7.94715,
-            "37": 8.29413,
-            "38": 7.97958,
-            "39": 8.39117,
-            "40": 8.35496,
-            "41": 8.31782,
-            "42": 8.05717,
-            "43": 8.03152,
-            "44": 8.24042,
-            "45": 8.0999,
-            "46": 7.61677,
-            "47": 8.15178,
-            "48": 8.00508,
-            "49": 8.38458,
-            "50": 7.81369
+            "3": 10.50143,
+            "4": 9.9637,
+            "5": 9.9402,
+            "6": 9.6731,
+            "7": 10.2345,
+            "8": 9.49643,
+            "9": 9.54137,
+            "10": 9.7923,
+            "11": 9.29954,
+            "12": 9.40392,
+            "13": 9.39508,
+            "14": 8.85071,
+            "15": 9.02369,
+            "16": 9.07021,
+            "17": 9.04484,
+            "18": 8.75671,
+            "19": 9.17766,
+            "20": 8.86116,
+            "21": 8.53586,
+            "22": 8.54907,
+            "23": 8.42586,
+            "24": 8.37914,
+            "25": 8.63571,
+            "26": 7.96589,
+            "27": 8.57436,
+            "28": 8.19058,
+            "29": 8.39383,
+            "30": 8.6699,
+            "31": 8.28275,
+            "32": 8.43083,
+            "33": 8.55346,
+            "34": 8.65736,
+            "35": 8.07845,
+            "36": 7.94562,
+            "37": 8.29186,
+            "38": 7.97668,
+            "39": 8.38836,
+            "40": 8.35237,
+            "41": 8.31549,
+            "42": 8.05591,
+            "43": 8.03009,
+            "44": 8.23739,
+            "45": 8.09515,
+            "46": 7.61452,
+            "47": 8.14972,
+            "48": 8.00299,
+            "49": 8.38216,
+            "50": 7.81157
         }
     },
     "num-zeros": {
@@ -61,56 +61,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 19403652.0,
-            "2": 19274102.0,
-            "3": 19373168.0,
-            "4": 86562120.0,
-            "5": 151677296.0,
-            "6": 142091232.0,
-            "7": 167132032.0,
-            "8": 197337088.0,
-            "9": 168836496.0,
-            "10": 162963792.0,
-            "11": 211653824.0,
-            "12": 214575616.0,
-            "13": 231549168.0,
-            "14": 220571728.0,
-            "15": 250508240.0,
-            "16": 168968368.0,
-            "17": 294610112.0,
-            "18": 167327952.0,
-            "19": 156385504.0,
-            "20": 177007072.0,
-            "21": 219468816.0,
-            "22": 217511168.0,
-            "23": 194318208.0,
-            "24": 208788192.0,
-            "25": 240820928.0,
-            "26": 250667072.0,
-            "27": 235205856.0,
-            "28": 285071552.0,
-            "29": 270668736.0,
-            "30": 241596448.0,
-            "31": 256938208.0,
-            "32": 252232640.0,
-            "33": 213058752.0,
-            "34": 217720576.0,
-            "35": 172316416.0,
-            "36": 246137120.0,
-            "37": 228162320.0,
-            "38": 238162048.0,
-            "39": 211207168.0,
-            "40": 206162560.0,
-            "41": 151397232.0,
-            "42": 206473424.0,
-            "43": 175165248.0,
-            "44": 182768560.0,
-            "45": 158317856.0,
-            "46": 159388704.0,
-            "47": 152897904.0,
-            "48": 143548896.0,
-            "49": 124357696.0,
-            "50": 151519648.0
+            "1": 19403658.0,
+            "2": 19274108.0,
+            "3": 19374004.0,
+            "4": 86537864.0,
+            "5": 137554544.0,
+            "6": 131043136.0,
+            "7": 167191584.0,
+            "8": 187932592.0,
+            "9": 167271824.0,
+            "10": 163003344.0,
+            "11": 222662128.0,
+            "12": 206727744.0,
+            "13": 231576672.0,
+            "14": 229976992.0,
+            "15": 248932672.0,
+            "16": 234972816.0,
+            "17": 252131904.0,
+            "18": 176733312.0,
+            "19": 175326720.0,
+            "20": 197382592.0,
+            "21": 225766720.0,
+            "22": 217633664.0,
+            "23": 196029024.0,
+            "24": 210323328.0,
+            "25": 221997792.0,
+            "26": 239705040.0,
+            "27": 246196976.0,
+            "28": 278753024.0,
+            "29": 272254432.0,
+            "30": 228998896.0,
+            "31": 252338576.0,
+            "32": 205052992.0,
+            "33": 250756576.0,
+            "34": 205128928.0,
+            "35": 192742864.0,
+            "36": 244582560.0,
+            "37": 180947680.0,
+            "38": 231918688.0,
+            "39": 220600064.0,
+            "40": 212460240.0,
+            "41": 215821280.0,
+            "42": 176641872.0,
+            "43": 203473536.0,
+            "44": 151341744.0,
+            "45": 167786640.0,
+            "46": 105920200.0,
+            "47": 173317104.0,
+            "48": 164021296.0,
+            "49": 100857144.0,
+            "50": 164130128.0
         }
     },
     "mem-allocated-bytes": {
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 4875597824.0,
-            "2": 4875363840.0,
-            "3": 4874979840.0,
-            "4": 4874899968.0,
-            "5": 4875749888.0,
-            "6": 4876656128.0,
-            "7": 4875178496.0,
-            "8": 4874036736.0,
-            "9": 4876568064.0,
-            "10": 4876058112.0,
-            "11": 4876045824.0,
-            "12": 4874515968.0,
-            "13": 4875086336.0,
-            "14": 4874568192.0,
-            "15": 4875987456.0,
-            "16": 4874790400.0,
-            "17": 4875477504.0,
-            "18": 4875512320.0,
-            "19": 4876186112.0,
-            "20": 4875747840.0,
-            "21": 4874790400.0,
-            "22": 4876221952.0,
-            "23": 4874534400.0,
-            "24": 4875733504.0,
-            "25": 4875019776.0,
-            "26": 4875168256.0,
-            "27": 4874978816.0,
-            "28": 4875781632.0,
-            "29": 4876329472.0,
-            "30": 4875107840.0,
-            "31": 4874253824.0,
-            "32": 4874167808.0,
-            "33": 4876044800.0,
-            "34": 4875914752.0,
-            "35": 4874962432.0,
-            "36": 4875862528.0,
-            "37": 4877336064.0,
-            "38": 4875002368.0,
-            "39": 4874599936.0,
-            "40": 4874880512.0,
-            "41": 4875294208.0,
-            "42": 4875419136.0,
-            "43": 4875780608.0,
-            "44": 4874780160.0,
-            "45": 4875191808.0,
-            "46": 4875717120.0,
-            "47": 4874050048.0,
-            "48": 4875580928.0,
-            "49": 4875412992.0,
-            "50": 4875462144.0
+            "1": 4876392448.0,
+            "2": 4875814400.0,
+            "3": 4875040256.0,
+            "4": 4876553728.0,
+            "5": 4876546560.0,
+            "6": 4875578880.0,
+            "7": 4877725184.0,
+            "8": 4876062208.0,
+            "9": 4875521536.0,
+            "10": 4875812352.0,
+            "11": 4877753856.0,
+            "12": 4875833856.0,
+            "13": 4875491840.0,
+            "14": 4876834304.0,
+            "15": 4874819072.0,
+            "16": 4875979264.0,
+            "17": 4876512768.0,
+            "18": 4876787200.0,
+            "19": 4874727936.0,
+            "20": 4875113984.0,
+            "21": 4875528704.0,
+            "22": 4876432896.0,
+            "23": 4877065728.0,
+            "24": 4875671040.0,
+            "25": 4875840000.0,
+            "26": 4875620864.0,
+            "27": 4876904960.0,
+            "28": 4875815424.0,
+            "29": 4877359616.0,
+            "30": 4875890176.0,
+            "31": 4875692544.0,
+            "32": 4874448384.0,
+            "33": 4876354048.0,
+            "34": 4876618240.0,
+            "35": 4874722816.0,
+            "36": 4875591168.0,
+            "37": 4876935680.0,
+            "38": 4877427200.0,
+            "39": 4876846592.0,
+            "40": 4876000768.0,
+            "41": 4876271104.0,
+            "42": 4876566016.0,
+            "43": 4875017728.0,
+            "44": 4875452928.0,
+            "45": 4875992576.0,
+            "46": 4874968576.0,
+            "47": 4874319360.0,
+            "48": 4877893120.0,
+            "49": 4875783680.0,
+            "50": 4876252672.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 84.85893,
-            "2": 1.16099,
-            "3": 0.98814,
-            "4": 0.90006,
-            "5": 1.44704,
-            "6": 1.12424,
-            "7": 1.08423,
-            "8": 1.07558,
-            "9": 1.1513,
-            "10": 0.88417,
-            "11": 1.07532,
-            "12": 0.88519,
-            "13": 0.87318,
-            "14": 0.87758,
-            "15": 0.87276,
-            "16": 0.8776,
-            "17": 0.86863,
-            "18": 0.87011,
-            "19": 0.86845,
-            "20": 0.86617,
-            "21": 0.85521,
-            "22": 0.86783,
-            "23": 0.86126,
-            "24": 0.85746,
-            "25": 0.85758,
-            "26": 0.86093,
-            "27": 0.85634,
-            "28": 0.85365,
-            "29": 0.86147,
-            "30": 0.86891,
-            "31": 0.85512,
-            "32": 0.85344,
-            "33": 0.85409,
-            "34": 0.85597,
-            "35": 0.85605,
-            "36": 0.84565,
-            "37": 0.84908,
-            "38": 0.85623,
-            "39": 0.8586,
-            "40": 0.87856,
-            "41": 0.85187,
-            "42": 0.86298,
-            "43": 0.85814,
-            "44": 0.85706,
-            "45": 0.85473,
-            "46": 0.85417,
-            "47": 0.85861,
-            "48": 0.85261,
-            "49": 0.85118,
-            "50": 0.84383
+            "1": 73.81742,
+            "2": 1.08519,
+            "3": 0.9475,
+            "4": 0.8839,
+            "5": 1.11345,
+            "6": 0.85209,
+            "7": 1.03653,
+            "8": 1.16512,
+            "9": 0.8689,
+            "10": 0.85758,
+            "11": 0.85766,
+            "12": 0.8648,
+            "13": 0.85582,
+            "14": 0.85912,
+            "15": 0.85612,
+            "16": 0.85625,
+            "17": 0.84689,
+            "18": 0.85414,
+            "19": 0.85342,
+            "20": 0.85913,
+            "21": 0.84294,
+            "22": 0.84528,
+            "23": 0.8484,
+            "24": 0.84952,
+            "25": 0.84758,
+            "26": 0.84799,
+            "27": 0.84573,
+            "28": 0.85082,
+            "29": 0.85369,
+            "30": 0.85037,
+            "31": 0.85238,
+            "32": 0.84846,
+            "33": 0.85245,
+            "34": 0.86084,
+            "35": 0.85495,
+            "36": 0.85092,
+            "37": 0.85315,
+            "38": 0.85318,
+            "39": 0.85153,
+            "40": 0.84991,
+            "41": 0.84921,
+            "42": 0.84843,
+            "43": 0.84456,
+            "44": 0.85002,
+            "45": 0.84683,
+            "46": 0.84268,
+            "47": 0.849,
+            "48": 0.8467,
+            "49": 0.84356,
+            "50": 0.84122
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgx_h100.json
index 5272fa38474..bfbb1e850e1 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgx_h100.json
@@ -218,106 +218,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 787516416.0,
-            "2": 787540992.0,
-            "3": 787524096.0,
-            "4": 787512320.0,
-            "5": 787547136.0,
-            "6": 787537920.0,
-            "7": 787512832.0,
-            "8": 787524608.0,
-            "9": 787528192.0,
-            "10": 787505152.0,
-            "11": 787522048.0,
-            "12": 787520000.0,
-            "13": 787529728.0,
-            "14": 787529216.0,
-            "15": 787504128.0,
-            "16": 787513344.0,
-            "17": 787503104.0,
-            "18": 787489280.0,
-            "19": 787514880.0,
-            "20": 787505152.0,
-            "21": 787479552.0,
-            "22": 787486208.0,
-            "23": 787478528.0,
-            "24": 787486208.0,
-            "25": 787451392.0,
-            "26": 787482112.0,
-            "27": 787470848.0,
-            "28": 787450368.0,
-            "29": 787458048.0,
-            "30": 787435008.0,
-            "31": 787406848.0,
-            "32": 787424256.0,
-            "33": 787435520.0,
-            "34": 787426304.0,
-            "35": 787418624.0,
-            "36": 787436544.0,
-            "37": 787428352.0,
-            "38": 787436544.0,
-            "39": 787417600.0,
-            "40": 787415040.0,
-            "41": 787405824.0,
-            "42": 787415040.0,
-            "43": 787367936.0,
-            "44": 787392512.0,
-            "45": 787399680.0,
-            "46": 787355136.0,
-            "47": 787411456.0,
-            "48": 787354112.0,
-            "49": 787374080.0,
-            "50": 787389440.0,
-            "51": 787375616.0,
-            "52": 787383808.0,
-            "53": 787379712.0,
-            "54": 787384832.0,
-            "55": 787388928.0,
-            "56": 787388928.0,
-            "57": 787351040.0,
-            "58": 787382784.0,
-            "59": 787374080.0,
-            "60": 787395072.0,
-            "61": 787405312.0,
-            "62": 787405824.0,
-            "63": 787373056.0,
-            "64": 787388928.0,
-            "65": 787351552.0,
-            "66": 787386880.0,
-            "67": 787392000.0,
-            "68": 787399168.0,
-            "69": 787383296.0,
-            "70": 787393024.0,
-            "71": 787406848.0,
-            "72": 787400704.0,
-            "73": 787401216.0,
-            "74": 787403264.0,
-            "75": 787442688.0,
-            "76": 787444736.0,
-            "77": 787445760.0,
-            "78": 787395072.0,
-            "79": 787430400.0,
-            "80": 787410432.0,
-            "81": 787412992.0,
-            "82": 787427840.0,
-            "83": 787428864.0,
-            "84": 787412480.0,
-            "85": 787412480.0,
-            "86": 787394560.0,
-            "87": 787452928.0,
-            "88": 787414528.0,
-            "89": 787404800.0,
-            "90": 787446784.0,
-            "91": 787446272.0,
-            "92": 787446784.0,
-            "93": 787430400.0,
-            "94": 787440128.0,
-            "95": 787450368.0,
-            "96": 787454976.0,
-            "97": 787427328.0,
-            "98": 787475968.0,
-            "99": 787419136.0,
-            "100": 787438592.0
+            "1": 1668119552.0,
+            "2": 1668144128.0,
+            "3": 1668127232.0,
+            "4": 1668115456.0,
+            "5": 1668150272.0,
+            "6": 1668141056.0,
+            "7": 1668115968.0,
+            "8": 1668127744.0,
+            "9": 1668131328.0,
+            "10": 1668108288.0,
+            "11": 1668125184.0,
+            "12": 1668123136.0,
+            "13": 1668132864.0,
+            "14": 1668132352.0,
+            "15": 1668107264.0,
+            "16": 1668116480.0,
+            "17": 1668106240.0,
+            "18": 1668092416.0,
+            "19": 1668118016.0,
+            "20": 1668108288.0,
+            "21": 1668082688.0,
+            "22": 1668089344.0,
+            "23": 1668081664.0,
+            "24": 1668089344.0,
+            "25": 1668054528.0,
+            "26": 1668085248.0,
+            "27": 1668073984.0,
+            "28": 1668053504.0,
+            "29": 1668061184.0,
+            "30": 1668038144.0,
+            "31": 1668009984.0,
+            "32": 1668027392.0,
+            "33": 1668038656.0,
+            "34": 1668029440.0,
+            "35": 1668021760.0,
+            "36": 1668039680.0,
+            "37": 1668031488.0,
+            "38": 1668039680.0,
+            "39": 1668020736.0,
+            "40": 1668018176.0,
+            "41": 1668008960.0,
+            "42": 1668018176.0,
+            "43": 1667971072.0,
+            "44": 1667995648.0,
+            "45": 1668002816.0,
+            "46": 1667958272.0,
+            "47": 1668014592.0,
+            "48": 1667957248.0,
+            "49": 1667977216.0,
+            "50": 1667992576.0,
+            "51": 1667978752.0,
+            "52": 1667986944.0,
+            "53": 1667982848.0,
+            "54": 1667987968.0,
+            "55": 1667992064.0,
+            "56": 1667992064.0,
+            "57": 1667954176.0,
+            "58": 1667985920.0,
+            "59": 1667977216.0,
+            "60": 1667998208.0,
+            "61": 1668008448.0,
+            "62": 1668008960.0,
+            "63": 1667976192.0,
+            "64": 1667992064.0,
+            "65": 1667954688.0,
+            "66": 1667990016.0,
+            "67": 1667995136.0,
+            "68": 1668002304.0,
+            "69": 1667986432.0,
+            "70": 1667996160.0,
+            "71": 1668009984.0,
+            "72": 1668003840.0,
+            "73": 1668004352.0,
+            "74": 1668006400.0,
+            "75": 1668045824.0,
+            "76": 1668047872.0,
+            "77": 1668048896.0,
+            "78": 1667998208.0,
+            "79": 1668033536.0,
+            "80": 1668013568.0,
+            "81": 1668016128.0,
+            "82": 1668030976.0,
+            "83": 1668032000.0,
+            "84": 1668015616.0,
+            "85": 1668015616.0,
+            "86": 1667997696.0,
+            "87": 1668056064.0,
+            "88": 1668017664.0,
+            "89": 1668007936.0,
+            "90": 1668049920.0,
+            "91": 1668049408.0,
+            "92": 1668049920.0,
+            "93": 1668033536.0,
+            "94": 1668043264.0,
+            "95": 1668053504.0,
+            "96": 1668058112.0,
+            "97": 1668030464.0,
+            "98": 1668079104.0,
+            "99": 1668022272.0,
+            "100": 1668041728.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -325,106 +325,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 2579673088.0,
-            "2": 2590714880.0,
-            "3": 2590714880.0,
-            "4": 2590714880.0,
-            "5": 2596039680.0,
-            "6": 2596039680.0,
-            "7": 2596039680.0,
-            "8": 2596039680.0,
-            "9": 2596039680.0,
-            "10": 2596039680.0,
-            "11": 2596039680.0,
-            "12": 2596039680.0,
-            "13": 2596039680.0,
-            "14": 2596039680.0,
-            "15": 2596039680.0,
-            "16": 2596039680.0,
-            "17": 2596039680.0,
-            "18": 2596039680.0,
-            "19": 2596039680.0,
-            "20": 2596039680.0,
-            "21": 2596039680.0,
-            "22": 2596039680.0,
-            "23": 2596039680.0,
-            "24": 2596039680.0,
-            "25": 2596039680.0,
-            "26": 2596039680.0,
-            "27": 2596039680.0,
-            "28": 2596039680.0,
-            "29": 2596039680.0,
-            "30": 2596039680.0,
-            "31": 2596039680.0,
-            "32": 2596039680.0,
-            "33": 2596039680.0,
-            "34": 2596039680.0,
-            "35": 2596039680.0,
-            "36": 2596039680.0,
-            "37": 2596039680.0,
-            "38": 2596039680.0,
-            "39": 2596039680.0,
-            "40": 2596039680.0,
-            "41": 2596039680.0,
-            "42": 2596039680.0,
-            "43": 2596039680.0,
-            "44": 2596039680.0,
-            "45": 2596039680.0,
-            "46": 2596039680.0,
-            "47": 2596039680.0,
-            "48": 2596039680.0,
-            "49": 2596039680.0,
-            "50": 2596039680.0,
-            "51": 2596039680.0,
-            "52": 2596039680.0,
-            "53": 2596039680.0,
-            "54": 2596039680.0,
-            "55": 2596039680.0,
-            "56": 2596039680.0,
-            "57": 2596039680.0,
-            "58": 2596039680.0,
-            "59": 2596039680.0,
-            "60": 2596039680.0,
-            "61": 2596039680.0,
-            "62": 2596039680.0,
-            "63": 2596039680.0,
-            "64": 2596039680.0,
-            "65": 2596039680.0,
-            "66": 2596039680.0,
-            "67": 2596039680.0,
-            "68": 2596039680.0,
-            "69": 2596039680.0,
-            "70": 2596039680.0,
-            "71": 2596039680.0,
-            "72": 2596039680.0,
-            "73": 2596039680.0,
-            "74": 2596039680.0,
-            "75": 2596039680.0,
-            "76": 2596039680.0,
-            "77": 2596039680.0,
-            "78": 2596039680.0,
-            "79": 2596039680.0,
-            "80": 2596039680.0,
-            "81": 2596039680.0,
-            "82": 2596039680.0,
-            "83": 2596039680.0,
-            "84": 2596039680.0,
-            "85": 2596039680.0,
-            "86": 2596039680.0,
-            "87": 2596039680.0,
-            "88": 2596039680.0,
-            "89": 2596039680.0,
-            "90": 2596039680.0,
-            "91": 2596039680.0,
-            "92": 2596039680.0,
-            "93": 2596039680.0,
-            "94": 2596039680.0,
-            "95": 2596039680.0,
-            "96": 2596039680.0,
-            "97": 2596039680.0,
-            "98": 2596039680.0,
-            "99": 2596039680.0,
-            "100": 2596039680.0
+            "1": 3460789248.0,
+            "2": 3470375936.0,
+            "3": 3470375936.0,
+            "4": 3470375936.0,
+            "5": 3480799232.0,
+            "6": 3480799232.0,
+            "7": 3480799232.0,
+            "8": 3480799232.0,
+            "9": 3480799232.0,
+            "10": 3480799232.0,
+            "11": 3480799232.0,
+            "12": 3480799232.0,
+            "13": 3480799232.0,
+            "14": 3480799232.0,
+            "15": 3480799232.0,
+            "16": 3480799232.0,
+            "17": 3480799232.0,
+            "18": 3480799232.0,
+            "19": 3480799232.0,
+            "20": 3480799232.0,
+            "21": 3480799232.0,
+            "22": 3480799232.0,
+            "23": 3480799232.0,
+            "24": 3480799232.0,
+            "25": 3480799232.0,
+            "26": 3480799232.0,
+            "27": 3480799232.0,
+            "28": 3480799232.0,
+            "29": 3480799232.0,
+            "30": 3480799232.0,
+            "31": 3480799232.0,
+            "32": 3480799232.0,
+            "33": 3480799232.0,
+            "34": 3480799232.0,
+            "35": 3480799232.0,
+            "36": 3480799232.0,
+            "37": 3480799232.0,
+            "38": 3480799232.0,
+            "39": 3480799232.0,
+            "40": 3480799232.0,
+            "41": 3480799232.0,
+            "42": 3480799232.0,
+            "43": 3480799232.0,
+            "44": 3480799232.0,
+            "45": 3480799232.0,
+            "46": 3480799232.0,
+            "47": 3480799232.0,
+            "48": 3480799232.0,
+            "49": 3480799232.0,
+            "50": 3480799232.0,
+            "51": 3480799232.0,
+            "52": 3480799232.0,
+            "53": 3480799232.0,
+            "54": 3480799232.0,
+            "55": 3480799232.0,
+            "56": 3480799232.0,
+            "57": 3480799232.0,
+            "58": 3480799232.0,
+            "59": 3480799232.0,
+            "60": 3480799232.0,
+            "61": 3480799232.0,
+            "62": 3480799232.0,
+            "63": 3480799232.0,
+            "64": 3480799232.0,
+            "65": 3480799232.0,
+            "66": 3480799232.0,
+            "67": 3480799232.0,
+            "68": 3480799232.0,
+            "69": 3480799232.0,
+            "70": 3480799232.0,
+            "71": 3480799232.0,
+            "72": 3480799232.0,
+            "73": 3480799232.0,
+            "74": 3480799232.0,
+            "75": 3480799232.0,
+            "76": 3480799232.0,
+            "77": 3480799232.0,
+            "78": 3480799232.0,
+            "79": 3480799232.0,
+            "80": 3480799232.0,
+            "81": 3480799232.0,
+            "82": 3480799232.0,
+            "83": 3480799232.0,
+            "84": 3480799232.0,
+            "85": 3480799232.0,
+            "86": 3480799232.0,
+            "87": 3480799232.0,
+            "88": 3480799232.0,
+            "89": 3480799232.0,
+            "90": 3480799232.0,
+            "91": 3480799232.0,
+            "92": 3480799232.0,
+            "93": 3480799232.0,
+            "94": 3480799232.0,
+            "95": 3480799232.0,
+            "96": 3480799232.0,
+            "97": 3480799232.0,
+            "98": 3480799232.0,
+            "99": 3480799232.0,
+            "100": 3480799232.0
         }
     },
     "iteration-time": {
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 11.32216,
-            "2": 0.51152,
-            "3": 0.3991,
-            "4": 0.39384,
-            "5": 0.34861,
-            "6": 0.34066,
-            "7": 0.34617,
-            "8": 0.33486,
-            "9": 0.32675,
-            "10": 0.32667,
-            "11": 0.32484,
-            "12": 0.31668,
-            "13": 0.33715,
-            "14": 0.32412,
-            "15": 0.31875,
-            "16": 0.32114,
-            "17": 0.3229,
-            "18": 0.31808,
-            "19": 0.32136,
-            "20": 0.31859,
-            "21": 0.31745,
-            "22": 0.31017,
-            "23": 0.32808,
-            "24": 0.31401,
-            "25": 0.31375,
-            "26": 0.31997,
-            "27": 0.32499,
-            "28": 0.32994,
-            "29": 0.33622,
-            "30": 0.33243,
-            "31": 0.33178,
-            "32": 0.35562,
-            "33": 0.3162,
-            "34": 0.32935,
-            "35": 0.32942,
-            "36": 0.32747,
-            "37": 0.32399,
-            "38": 0.32853,
-            "39": 0.32725,
-            "40": 0.32666,
-            "41": 0.33444,
-            "42": 0.32666,
-            "43": 0.32009,
-            "44": 0.38316,
-            "45": 0.36982,
-            "46": 0.3282,
-            "47": 0.33228,
-            "48": 0.32173,
-            "49": 0.32336,
-            "50": 0.33092,
-            "51": 0.32405,
-            "52": 0.344,
-            "53": 0.31793,
-            "54": 0.31881,
-            "55": 0.32423,
-            "56": 0.3238,
-            "57": 0.32754,
-            "58": 0.33365,
-            "59": 0.3188,
-            "60": 0.32627,
-            "61": 0.32313,
-            "62": 0.3251,
-            "63": 0.32111,
-            "64": 0.32694,
-            "65": 0.32677,
-            "66": 0.32916,
-            "67": 0.32392,
-            "68": 0.326,
-            "69": 0.31823,
-            "70": 0.32846,
-            "71": 0.32194,
-            "72": 0.3191,
-            "73": 0.32552,
-            "74": 0.32352,
-            "75": 0.31973,
-            "76": 0.32666,
-            "77": 0.32946,
-            "78": 0.31928,
-            "79": 0.32534,
-            "80": 0.31953,
-            "81": 0.31781,
-            "82": 0.3276,
-            "83": 0.32328,
-            "84": 0.31773,
-            "85": 0.32013,
-            "86": 0.32232,
-            "87": 0.31793,
-            "88": 0.31909,
-            "89": 0.6397,
-            "90": 0.31785,
-            "91": 0.3271,
-            "92": 0.31825,
-            "93": 0.31968,
-            "94": 0.32804,
-            "95": 0.31746,
-            "96": 0.31519,
-            "97": 0.32525,
-            "98": 0.3209,
-            "99": 0.31591,
-            "100": 0.31898
+            "1": 11.49667,
+            "2": 0.45982,
+            "3": 0.39283,
+            "4": 0.37269,
+            "5": 0.33438,
+            "6": 0.33048,
+            "7": 0.33351,
+            "8": 0.32704,
+            "9": 0.31789,
+            "10": 0.30958,
+            "11": 0.30791,
+            "12": 0.30859,
+            "13": 0.32053,
+            "14": 0.30171,
+            "15": 0.30843,
+            "16": 0.30302,
+            "17": 0.30464,
+            "18": 0.30431,
+            "19": 0.30467,
+            "20": 0.29614,
+            "21": 0.3034,
+            "22": 0.30183,
+            "23": 0.29505,
+            "24": 0.29208,
+            "25": 0.29678,
+            "26": 0.29737,
+            "27": 0.30864,
+            "28": 0.31313,
+            "29": 0.30795,
+            "30": 0.31701,
+            "31": 0.31516,
+            "32": 0.32758,
+            "33": 0.31728,
+            "34": 0.32164,
+            "35": 0.32366,
+            "36": 0.3008,
+            "37": 0.30816,
+            "38": 0.30782,
+            "39": 0.3097,
+            "40": 0.31658,
+            "41": 0.30749,
+            "42": 0.30662,
+            "43": 0.30452,
+            "44": 0.32171,
+            "45": 0.30874,
+            "46": 0.31718,
+            "47": 0.30947,
+            "48": 0.30568,
+            "49": 0.30559,
+            "50": 0.30518,
+            "51": 0.32349,
+            "52": 0.30552,
+            "53": 0.2972,
+            "54": 0.29675,
+            "55": 0.6806,
+            "56": 0.30449,
+            "57": 0.30268,
+            "58": 0.29449,
+            "59": 0.29915,
+            "60": 0.30558,
+            "61": 0.29817,
+            "62": 0.29837,
+            "63": 0.29648,
+            "64": 0.30355,
+            "65": 0.30526,
+            "66": 0.29685,
+            "67": 0.29607,
+            "68": 0.30383,
+            "69": 0.29497,
+            "70": 0.29908,
+            "71": 0.298,
+            "72": 0.29482,
+            "73": 0.29392,
+            "74": 0.29933,
+            "75": 0.29938,
+            "76": 0.29472,
+            "77": 0.29225,
+            "78": 0.29345,
+            "79": 0.29571,
+            "80": 0.29379,
+            "81": 0.29694,
+            "82": 0.29442,
+            "83": 0.29839,
+            "84": 0.30064,
+            "85": 0.29571,
+            "86": 0.30107,
+            "87": 0.29723,
+            "88": 0.29324,
+            "89": 0.29688,
+            "90": 0.29142,
+            "91": 0.29759,
+            "92": 0.29347,
+            "93": 0.29617,
+            "94": 0.29996,
+            "95": 0.29791,
+            "96": 0.29236,
+            "97": 0.29637,
+            "98": 0.29446,
+            "99": 0.293,
+            "100": 0.2937
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgx_h100_2nd.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgx_h100_2nd.json
new file mode 100644
index 00000000000..9e46de6c95a
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgx_h100_2nd.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 9.79428,
+            "52": 9.69347,
+            "53": 10.02752,
+            "54": 9.90501,
+            "55": 9.82435,
+            "56": 9.54897,
+            "57": 9.39485,
+            "58": 9.7808,
+            "59": 9.50877,
+            "60": 9.42349,
+            "61": 9.63084,
+            "62": 9.93098,
+            "63": 9.30185,
+            "64": 9.70993,
+            "65": 8.86079,
+            "66": 9.6403,
+            "67": 9.30746,
+            "68": 9.739,
+            "69": 9.74443,
+            "70": 9.68785,
+            "71": 9.56432,
+            "72": 9.50788,
+            "73": 9.43507,
+            "74": 8.84742,
+            "75": 9.3602,
+            "76": 8.99973,
+            "77": 10.01014,
+            "78": 9.67223,
+            "79": 9.31512,
+            "80": 9.34539,
+            "81": 9.41771,
+            "82": 9.64173,
+            "83": 9.22906,
+            "84": 9.35261,
+            "85": 9.54121,
+            "86": 9.00835,
+            "87": 9.53227,
+            "88": 9.69231,
+            "89": 9.52663,
+            "90": 9.76997,
+            "91": 9.26595,
+            "92": 9.29755,
+            "93": 8.99851,
+            "94": 8.76338,
+            "95": 9.4712,
+            "96": 9.46514,
+            "97": 9.24403,
+            "98": 9.61142,
+            "99": 8.82341,
+            "100": 9.33414
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 38608.0,
+            "52": 29672.0,
+            "53": 145995.0,
+            "54": 43699.0,
+            "55": 33546.0,
+            "56": 40511.0,
+            "57": 36799.0,
+            "58": 44234.0,
+            "59": 40253.0,
+            "60": 36209.0,
+            "61": 38020.0,
+            "62": 129819.0,
+            "63": 154780.0,
+            "64": 39430.0,
+            "65": 39232.0,
+            "66": 154214.0,
+            "67": 161225.0,
+            "68": 2135842.0,
+            "69": 50464.0,
+            "70": 56439.0,
+            "71": 2137847.0,
+            "72": 147293.0,
+            "73": 2141880.0,
+            "74": 2137167.0,
+            "75": 2135335.0,
+            "76": 2139034.0,
+            "77": 159341.0,
+            "78": 2139830.0,
+            "79": 2141683.0,
+            "80": 139853.0,
+            "81": 2145240.0,
+            "82": 164983.0,
+            "83": 2140685.0,
+            "84": 2140869.0,
+            "85": 2146230.0,
+            "86": 2141768.0,
+            "87": 2146906.0,
+            "88": 153161.0,
+            "89": 127490.0,
+            "90": 158621.0,
+            "91": 125039.0,
+            "92": 56204.0,
+            "93": 147769.0,
+            "94": 157550.0,
+            "95": 166285.0,
+            "96": 151337.0,
+            "97": 142825.0,
+            "98": 2144852.0,
+            "99": 2142365.0,
+            "100": 2140440.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 2789504000.0,
+            "52": 2789479936.0,
+            "53": 2789480960.0,
+            "54": 2789488640.0,
+            "55": 2789504000.0,
+            "56": 2789505536.0,
+            "57": 2789456896.0,
+            "58": 2789505536.0,
+            "59": 2789500416.0,
+            "60": 2789513728.0,
+            "61": 2789532160.0,
+            "62": 2789525504.0,
+            "63": 2789493248.0,
+            "64": 2789495296.0,
+            "65": 2789463552.0,
+            "66": 2789480448.0,
+            "67": 2789486080.0,
+            "68": 2789483008.0,
+            "69": 2789475328.0,
+            "70": 2789485568.0,
+            "71": 2789494784.0,
+            "72": 2789506560.0,
+            "73": 2789509120.0,
+            "74": 2789521920.0,
+            "75": 2789557760.0,
+            "76": 2789565440.0,
+            "77": 2789567488.0,
+            "78": 2789526528.0,
+            "79": 2789558272.0,
+            "80": 2789537792.0,
+            "81": 2789550592.0,
+            "82": 2789554176.0,
+            "83": 2789553152.0,
+            "84": 2789535744.0,
+            "85": 2789536768.0,
+            "86": 2789527040.0,
+            "87": 2789571072.0,
+            "88": 2789549568.0,
+            "89": 2789547008.0,
+            "90": 2789578752.0,
+            "91": 2789577216.0,
+            "92": 2789581824.0,
+            "93": 2789574656.0,
+            "94": 2789586944.0,
+            "95": 2789600256.0,
+            "96": 2789601792.0,
+            "97": 2789582848.0,
+            "98": 2789626880.0,
+            "99": 2789582336.0,
+            "100": 2789600768.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 4455227392.0,
+            "52": 4460379136.0,
+            "53": 4460379136.0,
+            "54": 4460379136.0,
+            "55": 4465446400.0,
+            "56": 4465446400.0,
+            "57": 4465446400.0,
+            "58": 4473809408.0,
+            "59": 4473809408.0,
+            "60": 4473809408.0,
+            "61": 4479029760.0,
+            "62": 4479029760.0,
+            "63": 4479029760.0,
+            "64": 4479029760.0,
+            "65": 4479029760.0,
+            "66": 4479029760.0,
+            "67": 4479029760.0,
+            "68": 4479029760.0,
+            "69": 4479029760.0,
+            "70": 4479029760.0,
+            "71": 4479029760.0,
+            "72": 4479029760.0,
+            "73": 4479029760.0,
+            "74": 4479029760.0,
+            "75": 4502322688.0,
+            "76": 4506302464.0,
+            "77": 4512311296.0,
+            "78": 4512311296.0,
+            "79": 4512311296.0,
+            "80": 4512311296.0,
+            "81": 4512311296.0,
+            "82": 4512311296.0,
+            "83": 4512311296.0,
+            "84": 4512311296.0,
+            "85": 4512311296.0,
+            "86": 4512311296.0,
+            "87": 4512311296.0,
+            "88": 4521950208.0,
+            "89": 4521950208.0,
+            "90": 4521950208.0,
+            "91": 4522659328.0,
+            "92": 4522659328.0,
+            "93": 4522659328.0,
+            "94": 4526183424.0,
+            "95": 4541133824.0,
+            "96": 4541133824.0,
+            "97": 4544613888.0,
+            "98": 4559089664.0,
+            "99": 4559089664.0,
+            "100": 4559089664.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 11.78443,
+            "52": 0.53246,
+            "53": 0.38652,
+            "54": 0.36366,
+            "55": 0.35397,
+            "56": 0.3447,
+            "57": 0.32475,
+            "58": 0.34667,
+            "59": 0.32989,
+            "60": 0.34524,
+            "61": 0.32952,
+            "62": 0.31145,
+            "63": 0.30418,
+            "64": 0.31694,
+            "65": 0.30895,
+            "66": 0.30823,
+            "67": 0.31663,
+            "68": 0.30653,
+            "69": 0.30537,
+            "70": 0.30313,
+            "71": 0.30204,
+            "72": 0.30417,
+            "73": 0.29895,
+            "74": 0.29982,
+            "75": 0.30334,
+            "76": 0.29924,
+            "77": 0.29767,
+            "78": 0.30576,
+            "79": 0.30429,
+            "80": 0.30015,
+            "81": 0.30466,
+            "82": 0.3039,
+            "83": 0.30919,
+            "84": 0.30306,
+            "85": 0.30633,
+            "86": 0.30372,
+            "87": 0.30348,
+            "88": 0.30271,
+            "89": 0.30741,
+            "90": 0.30323,
+            "91": 0.30502,
+            "92": 0.72064,
+            "93": 0.29549,
+            "94": 0.29663,
+            "95": 0.2941,
+            "96": 0.29558,
+            "97": 0.30196,
+            "98": 0.30035,
+            "99": 0.30083,
+            "100": 0.29573
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_a100.json
index 2eab394e23e..dffbbf25de6 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_a100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_a100.json
@@ -1 +1,287 @@
-{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.82196, "5": 10.84518, "10": 10.78921, "15": 10.8336, "20": 10.73505, "25": 10.58138, "30": 10.40958, "35": 10.31467, "40": 10.14618, "45": 9.91713, "50": 9.97428}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 4960.0, "5": 6022.0, "10": 4813.0, "15": 5586.0, "20": 5068.0, "25": 4868.0, "30": 5528.0, "35": 5700.0, "40": 6137.0, "45": 6030.0, "50": 6652.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 598359040.0, "5": 598358016.0, "10": 598356992.0, "15": 598359040.0, "20": 598357504.0, "25": 598357504.0, "30": 598358528.0, "35": 598356480.0, "40": 598357504.0, "45": 598355968.0, "50": 598358016.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 842904576.0, "5": 1072649216.0, "10": 1072649216.0, "15": 1072709632.0, "20": 1073532416.0, "25": 1073532416.0, "30": 1073532416.0, "35": 1073532416.0, "40": 1073532416.0, "45": 1073532416.0, "50": 1073532416.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 9.98481, "5": 0.66254, "10": 0.65398, "15": 0.65456, "20": 0.65608, "25": 0.65402, "30": 0.66555, "35": 0.66433, "40": 0.65947, "45": 0.64399, "50": 0.64234}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.82207,
+            "2": 10.84178,
+            "3": 10.81126,
+            "4": 10.82219,
+            "5": 10.8455,
+            "6": 10.86291,
+            "7": 10.84399,
+            "8": 10.84652,
+            "9": 10.84916,
+            "10": 10.78879,
+            "11": 10.8581,
+            "12": 10.84415,
+            "13": 10.87153,
+            "14": 10.87463,
+            "15": 10.83396,
+            "16": 10.8091,
+            "17": 10.79098,
+            "18": 10.81032,
+            "19": 10.80535,
+            "20": 10.73557,
+            "21": 10.71472,
+            "22": 10.57762,
+            "23": 10.72594,
+            "24": 10.61811,
+            "25": 10.58114,
+            "26": 10.63747,
+            "27": 10.63794,
+            "28": 10.60614,
+            "29": 10.61062,
+            "30": 10.40965,
+            "31": 10.16941,
+            "32": 10.49897,
+            "33": 10.49702,
+            "34": 10.26142,
+            "35": 10.31452,
+            "36": 10.2851,
+            "37": 10.3895,
+            "38": 10.2473,
+            "39": 10.43792,
+            "40": 10.14599,
+            "41": 10.19691,
+            "42": 10.26122,
+            "43": 9.91082,
+            "44": 10.02318,
+            "45": 9.91674,
+            "46": 9.89463,
+            "47": 10.19281,
+            "48": 9.93104,
+            "49": 9.61208,
+            "50": 9.97427
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 4986.0,
+            "2": 5272.0,
+            "3": 5309.0,
+            "4": 5162.0,
+            "5": 5824.0,
+            "6": 5990.0,
+            "7": 5433.0,
+            "8": 5101.0,
+            "9": 5654.0,
+            "10": 4736.0,
+            "11": 6213.0,
+            "12": 5723.0,
+            "13": 5952.0,
+            "14": 6073.0,
+            "15": 5503.0,
+            "16": 5808.0,
+            "17": 5545.0,
+            "18": 5647.0,
+            "19": 5555.0,
+            "20": 5120.0,
+            "21": 5578.0,
+            "22": 5097.0,
+            "23": 5992.0,
+            "24": 5204.0,
+            "25": 5016.0,
+            "26": 5487.0,
+            "27": 5618.0,
+            "28": 5994.0,
+            "29": 6202.0,
+            "30": 5538.0,
+            "31": 4762.0,
+            "32": 6010.0,
+            "33": 6302.0,
+            "34": 5312.0,
+            "35": 5783.0,
+            "36": 5716.0,
+            "37": 6562.0,
+            "38": 6183.0,
+            "39": 6964.0,
+            "40": 6220.0,
+            "41": 6139.0,
+            "42": 6368.0,
+            "43": 5900.0,
+            "44": 5754.0,
+            "45": 5814.0,
+            "46": 5882.0,
+            "47": 6818.0,
+            "48": 6495.0,
+            "49": 6047.0,
+            "50": 6623.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 598359040.0,
+            "2": 598359040.0,
+            "3": 598358528.0,
+            "4": 598360576.0,
+            "5": 598358016.0,
+            "6": 598358016.0,
+            "7": 598354432.0,
+            "8": 598359040.0,
+            "9": 598358016.0,
+            "10": 598356992.0,
+            "11": 598358016.0,
+            "12": 598358016.0,
+            "13": 598359040.0,
+            "14": 598359040.0,
+            "15": 598359040.0,
+            "16": 598358528.0,
+            "17": 598352384.0,
+            "18": 598358016.0,
+            "19": 598359040.0,
+            "20": 598357504.0,
+            "21": 598358016.0,
+            "22": 598354432.0,
+            "23": 598355968.0,
+            "24": 598356480.0,
+            "25": 598358528.0,
+            "26": 598357504.0,
+            "27": 598360064.0,
+            "28": 598358016.0,
+            "29": 598356480.0,
+            "30": 598359552.0,
+            "31": 598354944.0,
+            "32": 598356992.0,
+            "33": 598359552.0,
+            "34": 598358016.0,
+            "35": 598356480.0,
+            "36": 598356992.0,
+            "37": 598358016.0,
+            "38": 598358016.0,
+            "39": 598357504.0,
+            "40": 598357504.0,
+            "41": 598352384.0,
+            "42": 598357504.0,
+            "43": 598352384.0,
+            "44": 598355456.0,
+            "45": 598355968.0,
+            "46": 598351872.0,
+            "47": 598359040.0,
+            "48": 598354944.0,
+            "49": 598353408.0,
+            "50": 598356992.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 855598080.0,
+            "2": 1083234304.0,
+            "3": 1083234304.0,
+            "4": 1083234304.0,
+            "5": 1083234304.0,
+            "6": 1083493888.0,
+            "7": 1083493888.0,
+            "8": 1083493888.0,
+            "9": 1083493888.0,
+            "10": 1083493888.0,
+            "11": 1083493888.0,
+            "12": 1083493888.0,
+            "13": 1083493888.0,
+            "14": 1084195840.0,
+            "15": 1084195840.0,
+            "16": 1084195840.0,
+            "17": 1084195840.0,
+            "18": 1084195840.0,
+            "19": 1084195840.0,
+            "20": 1084195840.0,
+            "21": 1084195840.0,
+            "22": 1084195840.0,
+            "23": 1084195840.0,
+            "24": 1084195840.0,
+            "25": 1084195840.0,
+            "26": 1084195840.0,
+            "27": 1084195840.0,
+            "28": 1084195840.0,
+            "29": 1084195840.0,
+            "30": 1084195840.0,
+            "31": 1084195840.0,
+            "32": 1084195840.0,
+            "33": 1084195840.0,
+            "34": 1084195840.0,
+            "35": 1084195840.0,
+            "36": 1084195840.0,
+            "37": 1084195840.0,
+            "38": 1084195840.0,
+            "39": 1084195840.0,
+            "40": 1084195840.0,
+            "41": 1084195840.0,
+            "42": 1084195840.0,
+            "43": 1084195840.0,
+            "44": 1084195840.0,
+            "45": 1084195840.0,
+            "46": 1084195840.0,
+            "47": 1084195840.0,
+            "48": 1084195840.0,
+            "49": 1084195840.0,
+            "50": 1084195840.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 12.15002,
+            "2": 0.70236,
+            "3": 0.6774,
+            "4": 0.6698,
+            "5": 0.66613,
+            "6": 0.65685,
+            "7": 0.65852,
+            "8": 1.19123,
+            "9": 0.65621,
+            "10": 1.09603,
+            "11": 0.65688,
+            "12": 0.65983,
+            "13": 0.6521,
+            "14": 0.65135,
+            "15": 0.65551,
+            "16": 0.64995,
+            "17": 0.6532,
+            "18": 0.65306,
+            "19": 0.65221,
+            "20": 0.65239,
+            "21": 0.65356,
+            "22": 0.6536,
+            "23": 0.65416,
+            "24": 0.65298,
+            "25": 0.65469,
+            "26": 0.65391,
+            "27": 0.65289,
+            "28": 1.1109,
+            "29": 0.65365,
+            "30": 0.65326,
+            "31": 0.68599,
+            "32": 0.65366,
+            "33": 0.65416,
+            "34": 0.6538,
+            "35": 0.65304,
+            "36": 0.65351,
+            "37": 0.65423,
+            "38": 0.6542,
+            "39": 0.65254,
+            "40": 0.65386,
+            "41": 0.65384,
+            "42": 0.65434,
+            "43": 0.65537,
+            "44": 0.65573,
+            "45": 0.65342,
+            "46": 0.65451,
+            "47": 0.6535,
+            "48": 0.65377,
+            "49": 0.65522,
+            "50": 0.65221
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_h100.json
index c9eee5d9463..e9af2c920dd 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_h100.json
@@ -4,56 +4,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 10.81746,
-            "2": 10.82149,
-            "3": 10.82234,
-            "4": 10.79883,
-            "5": 10.84067,
-            "6": 10.85636,
-            "7": 10.81775,
-            "8": 10.81498,
-            "9": 10.83664,
-            "10": 10.7822,
-            "11": 10.85151,
-            "12": 10.84335,
-            "13": 10.85001,
-            "14": 10.87346,
-            "15": 10.80974,
-            "16": 10.80359,
-            "17": 10.75702,
-            "18": 10.80691,
-            "19": 10.78689,
-            "20": 10.73095,
-            "21": 10.70872,
-            "22": 10.57886,
-            "23": 10.71772,
-            "24": 10.63253,
-            "25": 10.57332,
-            "26": 10.62323,
-            "27": 10.63892,
+            "1": 10.81737,
+            "2": 10.82147,
+            "3": 10.82281,
+            "4": 10.79843,
+            "5": 10.84076,
+            "6": 10.85646,
+            "7": 10.81805,
+            "8": 10.81508,
+            "9": 10.83702,
+            "10": 10.78206,
+            "11": 10.85139,
+            "12": 10.84369,
+            "13": 10.84954,
+            "14": 10.87421,
+            "15": 10.81044,
+            "16": 10.80279,
+            "17": 10.75666,
+            "18": 10.80666,
+            "19": 10.78635,
+            "20": 10.7305,
+            "21": 10.7094,
+            "22": 10.57865,
+            "23": 10.71817,
+            "24": 10.63281,
+            "25": 10.57347,
+            "26": 10.62329,
+            "27": 10.63909,
             "28": 10.60509,
-            "29": 10.61796,
-            "30": 10.42067,
-            "31": 10.18074,
-            "32": 10.50619,
-            "33": 10.50937,
-            "34": 10.27626,
-            "35": 10.3249,
-            "36": 10.29423,
-            "37": 10.40006,
-            "38": 10.26099,
-            "39": 10.44197,
-            "40": 10.1644,
-            "41": 10.2004,
-            "42": 10.26981,
-            "43": 9.93054,
-            "44": 10.04184,
-            "45": 9.9288,
-            "46": 9.89638,
-            "47": 10.18471,
-            "48": 9.93119,
+            "29": 10.61783,
+            "30": 10.42028,
+            "31": 10.18079,
+            "32": 10.50616,
+            "33": 10.50906,
+            "34": 10.27697,
+            "35": 10.3245,
+            "36": 10.29406,
+            "37": 10.39966,
+            "38": 10.2616,
+            "39": 10.44227,
+            "40": 10.16376,
+            "41": 10.2005,
+            "42": 10.26994,
+            "43": 9.93005,
+            "44": 10.04225,
+            "45": 9.92868,
+            "46": 9.89675,
+            "47": 10.18499,
+            "48": 9.93166,
             "49": 9.62763,
-            "50": 9.98402
+            "50": 9.98403
         }
     },
     "num-zeros": {
@@ -61,56 +61,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 5082.0,
-            "2": 5274.0,
-            "3": 5447.0,
-            "4": 5269.0,
-            "5": 6020.0,
-            "6": 6160.0,
-            "7": 5592.0,
-            "8": 5309.0,
-            "9": 5743.0,
-            "10": 4800.0,
-            "11": 6186.0,
-            "12": 5648.0,
-            "13": 6106.0,
-            "14": 6126.0,
-            "15": 5600.0,
-            "16": 5819.0,
-            "17": 5669.0,
-            "18": 5547.0,
-            "19": 5711.0,
-            "20": 5380.0,
-            "21": 5677.0,
-            "22": 5023.0,
-            "23": 6080.0,
-            "24": 5403.0,
-            "25": 5120.0,
-            "26": 5431.0,
-            "27": 5866.0,
-            "28": 6035.0,
-            "29": 6154.0,
-            "30": 5456.0,
-            "31": 4832.0,
-            "32": 5956.0,
-            "33": 6301.0,
-            "34": 5366.0,
-            "35": 5900.0,
-            "36": 5703.0,
-            "37": 6744.0,
-            "38": 6098.0,
-            "39": 6737.0,
-            "40": 5994.0,
-            "41": 6144.0,
-            "42": 6542.0,
-            "43": 5751.0,
-            "44": 5876.0,
-            "45": 5795.0,
-            "46": 6162.0,
-            "47": 6736.0,
-            "48": 6331.0,
-            "49": 6235.0,
-            "50": 6668.0
+            "1": 5162.0,
+            "2": 5294.0,
+            "3": 5343.0,
+            "4": 5333.0,
+            "5": 5868.0,
+            "6": 6119.0,
+            "7": 5447.0,
+            "8": 5258.0,
+            "9": 5738.0,
+            "10": 4888.0,
+            "11": 6126.0,
+            "12": 5816.0,
+            "13": 6034.0,
+            "14": 6205.0,
+            "15": 5700.0,
+            "16": 5769.0,
+            "17": 5716.0,
+            "18": 5606.0,
+            "19": 5781.0,
+            "20": 5226.0,
+            "21": 5690.0,
+            "22": 5164.0,
+            "23": 6126.0,
+            "24": 5314.0,
+            "25": 5071.0,
+            "26": 5505.0,
+            "27": 5772.0,
+            "28": 6005.0,
+            "29": 6328.0,
+            "30": 5628.0,
+            "31": 4847.0,
+            "32": 5883.0,
+            "33": 6277.0,
+            "34": 5280.0,
+            "35": 5737.0,
+            "36": 5716.0,
+            "37": 6534.0,
+            "38": 6002.0,
+            "39": 6879.0,
+            "40": 5969.0,
+            "41": 6140.0,
+            "42": 6558.0,
+            "43": 5814.0,
+            "44": 5764.0,
+            "45": 5925.0,
+            "46": 5890.0,
+            "47": 6716.0,
+            "48": 6553.0,
+            "49": 6112.0,
+            "50": 6617.0
         }
     },
     "mem-allocated-bytes": {
@@ -121,53 +121,53 @@
             "1": 627718656.0,
             "2": 627719168.0,
             "3": 627719168.0,
-            "4": 627720704.0,
+            "4": 627720192.0,
             "5": 627718656.0,
             "6": 627718656.0,
             "7": 627718144.0,
             "8": 627718144.0,
             "9": 627718144.0,
             "10": 627719168.0,
-            "11": 627719680.0,
-            "12": 627719168.0,
-            "13": 627719680.0,
-            "14": 627717120.0,
+            "11": 627718656.0,
+            "12": 627718144.0,
+            "13": 627720192.0,
+            "14": 627717632.0,
             "15": 627720192.0,
             "16": 627717632.0,
             "17": 627718144.0,
-            "18": 627719680.0,
+            "18": 627718656.0,
             "19": 627719168.0,
             "20": 627717120.0,
             "21": 627718144.0,
             "22": 627720192.0,
             "23": 627720192.0,
-            "24": 627718144.0,
+            "24": 627717120.0,
             "25": 627718656.0,
-            "26": 627718144.0,
-            "27": 627717120.0,
-            "28": 627718656.0,
+            "26": 627717632.0,
+            "27": 627719680.0,
+            "28": 627717632.0,
             "29": 627717120.0,
             "30": 627720192.0,
-            "31": 627715072.0,
-            "32": 627720192.0,
+            "31": 627715584.0,
+            "32": 627720704.0,
             "33": 627717632.0,
-            "34": 627719168.0,
-            "35": 627716608.0,
-            "36": 627719168.0,
-            "37": 627718144.0,
+            "34": 627718144.0,
+            "35": 627715584.0,
+            "36": 627718656.0,
+            "37": 627717632.0,
             "38": 627718656.0,
             "39": 627715584.0,
-            "40": 627717632.0,
+            "40": 627718656.0,
             "41": 627714560.0,
             "42": 627718144.0,
             "43": 627713536.0,
-            "44": 627714048.0,
-            "45": 627719168.0,
+            "44": 627715072.0,
+            "45": 627718144.0,
             "46": 627716096.0,
-            "47": 627717120.0,
+            "47": 627718144.0,
             "48": 627716608.0,
-            "49": 627715072.0,
-            "50": 627718144.0
+            "49": 627716096.0,
+            "50": 627717632.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 870138880.0,
-            "2": 1099332096.0,
-            "3": 1099950080.0,
-            "4": 1102007296.0,
-            "5": 1102007296.0,
-            "6": 1102007296.0,
-            "7": 1102007296.0,
-            "8": 1102007296.0,
-            "9": 1102007296.0,
-            "10": 1102007296.0,
-            "11": 1102007296.0,
-            "12": 1102007296.0,
-            "13": 1103012352.0,
-            "14": 1103012352.0,
-            "15": 1103012352.0,
-            "16": 1103012352.0,
-            "17": 1103012352.0,
-            "18": 1103012352.0,
-            "19": 1103012352.0,
-            "20": 1103012352.0,
-            "21": 1103012352.0,
-            "22": 1103012352.0,
-            "23": 1103012352.0,
-            "24": 1103012352.0,
-            "25": 1103012352.0,
-            "26": 1103012352.0,
-            "27": 1103012352.0,
-            "28": 1103012352.0,
-            "29": 1103012352.0,
-            "30": 1103012352.0,
-            "31": 1103012352.0,
-            "32": 1103012352.0,
-            "33": 1103012352.0,
-            "34": 1103012352.0,
-            "35": 1103012352.0,
-            "36": 1103012352.0,
-            "37": 1103012352.0,
-            "38": 1103012352.0,
-            "39": 1103012352.0,
-            "40": 1103012352.0,
-            "41": 1103012352.0,
-            "42": 1103012352.0,
-            "43": 1103012352.0,
-            "44": 1103012352.0,
-            "45": 1103012352.0,
-            "46": 1103012352.0,
-            "47": 1103012352.0,
-            "48": 1103012352.0,
-            "49": 1103012352.0,
-            "50": 1103012352.0
+            "1": 879924224.0,
+            "2": 1111762432.0,
+            "3": 1111762432.0,
+            "4": 1113592832.0,
+            "5": 1113592832.0,
+            "6": 1113592832.0,
+            "7": 1113592832.0,
+            "8": 1113592832.0,
+            "9": 1113592832.0,
+            "10": 1113592832.0,
+            "11": 1113592832.0,
+            "12": 1113592832.0,
+            "13": 1113592832.0,
+            "14": 1113592832.0,
+            "15": 1113592832.0,
+            "16": 1113592832.0,
+            "17": 1113592832.0,
+            "18": 1113592832.0,
+            "19": 1113592832.0,
+            "20": 1113592832.0,
+            "21": 1113592832.0,
+            "22": 1113592832.0,
+            "23": 1113592832.0,
+            "24": 1113592832.0,
+            "25": 1113592832.0,
+            "26": 1113592832.0,
+            "27": 1113592832.0,
+            "28": 1113592832.0,
+            "29": 1113592832.0,
+            "30": 1113592832.0,
+            "31": 1113592832.0,
+            "32": 1113592832.0,
+            "33": 1113592832.0,
+            "34": 1113592832.0,
+            "35": 1113592832.0,
+            "36": 1113592832.0,
+            "37": 1113592832.0,
+            "38": 1113592832.0,
+            "39": 1113592832.0,
+            "40": 1113592832.0,
+            "41": 1113592832.0,
+            "42": 1113592832.0,
+            "43": 1113592832.0,
+            "44": 1113592832.0,
+            "45": 1113592832.0,
+            "46": 1113592832.0,
+            "47": 1113592832.0,
+            "48": 1113592832.0,
+            "49": 1113592832.0,
+            "50": 1113592832.0
         }
     },
     "iteration-time": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 17.75731,
-            "2": 0.59137,
-            "3": 0.52847,
-            "4": 0.55398,
-            "5": 0.51736,
-            "6": 0.51707,
-            "7": 0.52895,
-            "8": 0.51861,
-            "9": 0.5181,
-            "10": 0.51717,
-            "11": 0.51445,
-            "12": 0.51129,
-            "13": 0.51494,
-            "14": 0.51037,
-            "15": 0.51828,
-            "16": 0.50983,
-            "17": 0.51156,
-            "18": 0.51029,
-            "19": 0.51087,
-            "20": 0.51452,
-            "21": 0.5039,
-            "22": 0.51296,
-            "23": 0.50822,
-            "24": 0.51693,
-            "25": 0.51087,
-            "26": 0.51188,
-            "27": 0.51138,
-            "28": 0.51374,
-            "29": 0.50808,
-            "30": 0.50936,
-            "31": 0.51301,
-            "32": 0.5132,
-            "33": 0.51,
-            "34": 0.51133,
-            "35": 0.51556,
-            "36": 0.51397,
-            "37": 0.51183,
-            "38": 0.51721,
-            "39": 0.50468,
-            "40": 0.50915,
-            "41": 0.51802,
-            "42": 0.51064,
-            "43": 0.51335,
-            "44": 0.50717,
-            "45": 0.51189,
-            "46": 0.52735,
-            "47": 0.52015,
-            "48": 0.50421,
-            "49": 0.5285,
-            "50": 0.50368
+            "1": 19.37156,
+            "2": 0.57228,
+            "3": 0.50712,
+            "4": 0.49818,
+            "5": 0.46521,
+            "6": 0.46426,
+            "7": 0.48248,
+            "8": 0.46121,
+            "9": 0.46322,
+            "10": 0.943,
+            "11": 0.46349,
+            "12": 0.46108,
+            "13": 0.47225,
+            "14": 0.45499,
+            "15": 0.47496,
+            "16": 0.4611,
+            "17": 0.46441,
+            "18": 0.45776,
+            "19": 0.90663,
+            "20": 0.8319,
+            "21": 0.45677,
+            "22": 0.45736,
+            "23": 0.45985,
+            "24": 1.08757,
+            "25": 0.46245,
+            "26": 0.45592,
+            "27": 0.45988,
+            "28": 0.93317,
+            "29": 0.46123,
+            "30": 0.4584,
+            "31": 0.45997,
+            "32": 0.45818,
+            "33": 0.45532,
+            "34": 0.46013,
+            "35": 0.85461,
+            "36": 0.46712,
+            "37": 0.46955,
+            "38": 0.46952,
+            "39": 0.45914,
+            "40": 0.45553,
+            "41": 0.45756,
+            "42": 0.45149,
+            "43": 0.46141,
+            "44": 0.44921,
+            "45": 0.46166,
+            "46": 0.47347,
+            "47": 0.472,
+            "48": 0.45384,
+            "49": 0.47868,
+            "50": 0.45871
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgx_a100.json
index 93abc66f3c0..d2a07cdf1dd 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgx_a100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgx_a100.json
@@ -1 +1,287 @@
-{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.82196, "5": 10.84518, "10": 10.78921, "15": 10.8336, "20": 10.73505, "25": 10.58138, "30": 10.40958, "35": 10.31467, "40": 10.14618, "45": 9.91713, "50": 9.97428}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 4960.0, "5": 6022.0, "10": 4813.0, "15": 5586.0, "20": 5068.0, "25": 4868.0, "30": 5528.0, "35": 5700.0, "40": 6137.0, "45": 6030.0, "50": 6652.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 598359040.0, "5": 598358016.0, "10": 598356992.0, "15": 598359040.0, "20": 598357504.0, "25": 598357504.0, "30": 598358528.0, "35": 598356480.0, "40": 598357504.0, "45": 598355968.0, "50": 598358016.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 842904576.0, "5": 1072649216.0, "10": 1072649216.0, "15": 1072709632.0, "20": 1073532416.0, "25": 1073532416.0, "30": 1073532416.0, "35": 1073532416.0, "40": 1073532416.0, "45": 1073532416.0, "50": 1073532416.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 15.53653, "5": 0.66223, "10": 0.66331, "15": 0.65892, "20": 0.66075, "25": 0.6607, "30": 0.68157, "35": 0.68189, "40": 0.68279, "45": 0.68065, "50": 0.65686}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.82207,
+            "2": 10.84178,
+            "3": 10.81126,
+            "4": 10.82219,
+            "5": 10.8455,
+            "6": 10.86291,
+            "7": 10.84399,
+            "8": 10.84652,
+            "9": 10.84916,
+            "10": 10.78879,
+            "11": 10.8581,
+            "12": 10.84415,
+            "13": 10.87153,
+            "14": 10.87463,
+            "15": 10.83396,
+            "16": 10.8091,
+            "17": 10.79098,
+            "18": 10.81032,
+            "19": 10.80535,
+            "20": 10.73557,
+            "21": 10.71472,
+            "22": 10.57762,
+            "23": 10.72594,
+            "24": 10.61811,
+            "25": 10.58114,
+            "26": 10.63747,
+            "27": 10.63794,
+            "28": 10.60614,
+            "29": 10.61062,
+            "30": 10.40965,
+            "31": 10.16941,
+            "32": 10.49897,
+            "33": 10.49702,
+            "34": 10.26142,
+            "35": 10.31452,
+            "36": 10.2851,
+            "37": 10.3895,
+            "38": 10.2473,
+            "39": 10.43792,
+            "40": 10.14599,
+            "41": 10.19691,
+            "42": 10.26122,
+            "43": 9.91082,
+            "44": 10.02318,
+            "45": 9.91674,
+            "46": 9.89463,
+            "47": 10.19281,
+            "48": 9.93104,
+            "49": 9.61208,
+            "50": 9.97427
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 4986.0,
+            "2": 5272.0,
+            "3": 5309.0,
+            "4": 5162.0,
+            "5": 5824.0,
+            "6": 5990.0,
+            "7": 5433.0,
+            "8": 5101.0,
+            "9": 5654.0,
+            "10": 4736.0,
+            "11": 6213.0,
+            "12": 5723.0,
+            "13": 5952.0,
+            "14": 6073.0,
+            "15": 5503.0,
+            "16": 5808.0,
+            "17": 5545.0,
+            "18": 5647.0,
+            "19": 5555.0,
+            "20": 5120.0,
+            "21": 5578.0,
+            "22": 5097.0,
+            "23": 5992.0,
+            "24": 5204.0,
+            "25": 5016.0,
+            "26": 5487.0,
+            "27": 5618.0,
+            "28": 5994.0,
+            "29": 6202.0,
+            "30": 5538.0,
+            "31": 4762.0,
+            "32": 6010.0,
+            "33": 6302.0,
+            "34": 5312.0,
+            "35": 5783.0,
+            "36": 5716.0,
+            "37": 6562.0,
+            "38": 6183.0,
+            "39": 6964.0,
+            "40": 6220.0,
+            "41": 6139.0,
+            "42": 6368.0,
+            "43": 5900.0,
+            "44": 5754.0,
+            "45": 5814.0,
+            "46": 5882.0,
+            "47": 6818.0,
+            "48": 6495.0,
+            "49": 6047.0,
+            "50": 6623.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 598359040.0,
+            "2": 598359040.0,
+            "3": 598358528.0,
+            "4": 598360576.0,
+            "5": 598358016.0,
+            "6": 598358016.0,
+            "7": 598354432.0,
+            "8": 598359040.0,
+            "9": 598358016.0,
+            "10": 598356992.0,
+            "11": 598358016.0,
+            "12": 598358016.0,
+            "13": 598359040.0,
+            "14": 598359040.0,
+            "15": 598359040.0,
+            "16": 598358528.0,
+            "17": 598352384.0,
+            "18": 598358016.0,
+            "19": 598359040.0,
+            "20": 598357504.0,
+            "21": 598358016.0,
+            "22": 598354432.0,
+            "23": 598355968.0,
+            "24": 598356480.0,
+            "25": 598358528.0,
+            "26": 598357504.0,
+            "27": 598360064.0,
+            "28": 598358016.0,
+            "29": 598356480.0,
+            "30": 598359552.0,
+            "31": 598354944.0,
+            "32": 598356992.0,
+            "33": 598359552.0,
+            "34": 598358016.0,
+            "35": 598356480.0,
+            "36": 598356992.0,
+            "37": 598358016.0,
+            "38": 598358016.0,
+            "39": 598357504.0,
+            "40": 598357504.0,
+            "41": 598352384.0,
+            "42": 598357504.0,
+            "43": 598352384.0,
+            "44": 598355456.0,
+            "45": 598355968.0,
+            "46": 598351872.0,
+            "47": 598359040.0,
+            "48": 598354944.0,
+            "49": 598353408.0,
+            "50": 598356992.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 855598080.0,
+            "2": 1083234304.0,
+            "3": 1083234304.0,
+            "4": 1083234304.0,
+            "5": 1083234304.0,
+            "6": 1083493888.0,
+            "7": 1083493888.0,
+            "8": 1083493888.0,
+            "9": 1083493888.0,
+            "10": 1083493888.0,
+            "11": 1083493888.0,
+            "12": 1083493888.0,
+            "13": 1083493888.0,
+            "14": 1084195840.0,
+            "15": 1084195840.0,
+            "16": 1084195840.0,
+            "17": 1084195840.0,
+            "18": 1084195840.0,
+            "19": 1084195840.0,
+            "20": 1084195840.0,
+            "21": 1084195840.0,
+            "22": 1084195840.0,
+            "23": 1084195840.0,
+            "24": 1084195840.0,
+            "25": 1084195840.0,
+            "26": 1084195840.0,
+            "27": 1084195840.0,
+            "28": 1084195840.0,
+            "29": 1084195840.0,
+            "30": 1084195840.0,
+            "31": 1084195840.0,
+            "32": 1084195840.0,
+            "33": 1084195840.0,
+            "34": 1084195840.0,
+            "35": 1084195840.0,
+            "36": 1084195840.0,
+            "37": 1084195840.0,
+            "38": 1084195840.0,
+            "39": 1084195840.0,
+            "40": 1084195840.0,
+            "41": 1084195840.0,
+            "42": 1084195840.0,
+            "43": 1084195840.0,
+            "44": 1084195840.0,
+            "45": 1084195840.0,
+            "46": 1084195840.0,
+            "47": 1084195840.0,
+            "48": 1084195840.0,
+            "49": 1084195840.0,
+            "50": 1084195840.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 12.18178,
+            "2": 0.71018,
+            "3": 0.6513,
+            "4": 0.63757,
+            "5": 0.63692,
+            "6": 1.25031,
+            "7": 0.63769,
+            "8": 0.6385,
+            "9": 1.00487,
+            "10": 0.63706,
+            "11": 0.63646,
+            "12": 0.63826,
+            "13": 0.63654,
+            "14": 0.63609,
+            "15": 0.64,
+            "16": 0.6373,
+            "17": 0.63737,
+            "18": 0.63625,
+            "19": 0.63624,
+            "20": 0.63844,
+            "21": 0.6361,
+            "22": 0.63788,
+            "23": 0.63738,
+            "24": 0.63546,
+            "25": 0.63758,
+            "26": 0.63704,
+            "27": 0.63992,
+            "28": 0.64468,
+            "29": 0.64456,
+            "30": 0.6501,
+            "31": 0.64571,
+            "32": 0.64554,
+            "33": 0.64543,
+            "34": 0.64396,
+            "35": 0.64389,
+            "36": 0.64513,
+            "37": 0.6451,
+            "38": 0.64723,
+            "39": 0.6454,
+            "40": 0.64512,
+            "41": 0.64629,
+            "42": 0.64576,
+            "43": 0.64737,
+            "44": 0.64709,
+            "45": 0.64517,
+            "46": 0.64605,
+            "47": 0.64625,
+            "48": 0.64627,
+            "49": 0.64638,
+            "50": 0.64367
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgx_h100.json
index 25a8b5ae572..80df38f0478 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgx_h100.json
@@ -4,56 +4,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 10.81746,
-            "2": 10.82149,
-            "3": 10.82234,
-            "4": 10.79883,
-            "5": 10.84067,
-            "6": 10.85636,
-            "7": 10.81775,
-            "8": 10.81498,
-            "9": 10.83664,
-            "10": 10.7822,
-            "11": 10.85151,
-            "12": 10.84335,
-            "13": 10.85001,
-            "14": 10.87346,
-            "15": 10.80974,
-            "16": 10.80359,
-            "17": 10.75702,
-            "18": 10.80691,
-            "19": 10.78689,
-            "20": 10.73095,
-            "21": 10.70872,
-            "22": 10.57886,
-            "23": 10.71772,
-            "24": 10.63253,
-            "25": 10.57332,
-            "26": 10.62323,
-            "27": 10.63892,
+            "1": 10.81737,
+            "2": 10.82147,
+            "3": 10.82281,
+            "4": 10.79843,
+            "5": 10.84076,
+            "6": 10.85646,
+            "7": 10.81805,
+            "8": 10.81508,
+            "9": 10.83702,
+            "10": 10.78206,
+            "11": 10.85139,
+            "12": 10.84369,
+            "13": 10.84954,
+            "14": 10.87421,
+            "15": 10.81044,
+            "16": 10.80279,
+            "17": 10.75666,
+            "18": 10.80666,
+            "19": 10.78635,
+            "20": 10.7305,
+            "21": 10.7094,
+            "22": 10.57865,
+            "23": 10.71817,
+            "24": 10.63281,
+            "25": 10.57347,
+            "26": 10.62329,
+            "27": 10.63909,
             "28": 10.60509,
-            "29": 10.61796,
-            "30": 10.42067,
-            "31": 10.18074,
-            "32": 10.50619,
-            "33": 10.50937,
-            "34": 10.27626,
-            "35": 10.3249,
-            "36": 10.29423,
-            "37": 10.40006,
-            "38": 10.26099,
-            "39": 10.44197,
-            "40": 10.1644,
-            "41": 10.2004,
-            "42": 10.26981,
-            "43": 9.93054,
-            "44": 10.04184,
-            "45": 9.9288,
-            "46": 9.89638,
-            "47": 10.18471,
-            "48": 9.93119,
+            "29": 10.61783,
+            "30": 10.42028,
+            "31": 10.18079,
+            "32": 10.50616,
+            "33": 10.50906,
+            "34": 10.27697,
+            "35": 10.3245,
+            "36": 10.29406,
+            "37": 10.39966,
+            "38": 10.2616,
+            "39": 10.44227,
+            "40": 10.16376,
+            "41": 10.2005,
+            "42": 10.26994,
+            "43": 9.93005,
+            "44": 10.04225,
+            "45": 9.92868,
+            "46": 9.89675,
+            "47": 10.18499,
+            "48": 9.93166,
             "49": 9.62763,
-            "50": 9.98402
+            "50": 9.98403
         }
     },
     "num-zeros": {
@@ -61,56 +61,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 5082.0,
-            "2": 5274.0,
-            "3": 5447.0,
-            "4": 5269.0,
-            "5": 6020.0,
-            "6": 6160.0,
-            "7": 5592.0,
-            "8": 5309.0,
-            "9": 5743.0,
-            "10": 4800.0,
-            "11": 6186.0,
-            "12": 5648.0,
-            "13": 6106.0,
-            "14": 6126.0,
-            "15": 5600.0,
-            "16": 5819.0,
-            "17": 5669.0,
-            "18": 5547.0,
-            "19": 5711.0,
-            "20": 5380.0,
-            "21": 5677.0,
-            "22": 5023.0,
-            "23": 6080.0,
-            "24": 5403.0,
-            "25": 5120.0,
-            "26": 5431.0,
-            "27": 5866.0,
-            "28": 6035.0,
-            "29": 6154.0,
-            "30": 5456.0,
-            "31": 4832.0,
-            "32": 5956.0,
-            "33": 6301.0,
-            "34": 5366.0,
-            "35": 5900.0,
-            "36": 5703.0,
-            "37": 6744.0,
-            "38": 6098.0,
-            "39": 6737.0,
-            "40": 5994.0,
-            "41": 6144.0,
-            "42": 6542.0,
-            "43": 5751.0,
-            "44": 5876.0,
-            "45": 5795.0,
-            "46": 6162.0,
-            "47": 6736.0,
-            "48": 6331.0,
-            "49": 6235.0,
-            "50": 6668.0
+            "1": 5162.0,
+            "2": 5294.0,
+            "3": 5343.0,
+            "4": 5333.0,
+            "5": 5868.0,
+            "6": 6119.0,
+            "7": 5447.0,
+            "8": 5258.0,
+            "9": 5738.0,
+            "10": 4888.0,
+            "11": 6126.0,
+            "12": 5816.0,
+            "13": 6034.0,
+            "14": 6205.0,
+            "15": 5700.0,
+            "16": 5769.0,
+            "17": 5716.0,
+            "18": 5606.0,
+            "19": 5781.0,
+            "20": 5226.0,
+            "21": 5690.0,
+            "22": 5164.0,
+            "23": 6126.0,
+            "24": 5314.0,
+            "25": 5071.0,
+            "26": 5505.0,
+            "27": 5772.0,
+            "28": 6005.0,
+            "29": 6328.0,
+            "30": 5628.0,
+            "31": 4847.0,
+            "32": 5883.0,
+            "33": 6277.0,
+            "34": 5280.0,
+            "35": 5737.0,
+            "36": 5716.0,
+            "37": 6534.0,
+            "38": 6002.0,
+            "39": 6879.0,
+            "40": 5969.0,
+            "41": 6140.0,
+            "42": 6558.0,
+            "43": 5814.0,
+            "44": 5764.0,
+            "45": 5925.0,
+            "46": 5890.0,
+            "47": 6716.0,
+            "48": 6553.0,
+            "49": 6112.0,
+            "50": 6617.0
         }
     },
     "mem-allocated-bytes": {
@@ -121,53 +121,53 @@
             "1": 627718656.0,
             "2": 627719168.0,
             "3": 627719168.0,
-            "4": 627720704.0,
+            "4": 627720192.0,
             "5": 627718656.0,
             "6": 627718656.0,
             "7": 627718144.0,
             "8": 627718144.0,
             "9": 627718144.0,
             "10": 627719168.0,
-            "11": 627719680.0,
-            "12": 627719168.0,
-            "13": 627719680.0,
-            "14": 627717120.0,
+            "11": 627718656.0,
+            "12": 627718144.0,
+            "13": 627720192.0,
+            "14": 627717632.0,
             "15": 627720192.0,
             "16": 627717632.0,
             "17": 627718144.0,
-            "18": 627719680.0,
+            "18": 627718656.0,
             "19": 627719168.0,
             "20": 627717120.0,
             "21": 627718144.0,
             "22": 627720192.0,
             "23": 627720192.0,
-            "24": 627718144.0,
+            "24": 627717120.0,
             "25": 627718656.0,
-            "26": 627718144.0,
-            "27": 627717120.0,
-            "28": 627718656.0,
+            "26": 627717632.0,
+            "27": 627719680.0,
+            "28": 627717632.0,
             "29": 627717120.0,
             "30": 627720192.0,
-            "31": 627715072.0,
-            "32": 627720192.0,
+            "31": 627715584.0,
+            "32": 627720704.0,
             "33": 627717632.0,
-            "34": 627719168.0,
-            "35": 627716608.0,
-            "36": 627719168.0,
-            "37": 627718144.0,
+            "34": 627718144.0,
+            "35": 627715584.0,
+            "36": 627718656.0,
+            "37": 627717632.0,
             "38": 627718656.0,
             "39": 627715584.0,
-            "40": 627717632.0,
+            "40": 627718656.0,
             "41": 627714560.0,
             "42": 627718144.0,
             "43": 627713536.0,
-            "44": 627714048.0,
-            "45": 627719168.0,
+            "44": 627715072.0,
+            "45": 627718144.0,
             "46": 627716096.0,
-            "47": 627717120.0,
+            "47": 627718144.0,
             "48": 627716608.0,
-            "49": 627715072.0,
-            "50": 627718144.0
+            "49": 627716096.0,
+            "50": 627717632.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 870138880.0,
-            "2": 1099332096.0,
-            "3": 1099950080.0,
-            "4": 1102007296.0,
-            "5": 1102007296.0,
-            "6": 1102007296.0,
-            "7": 1102007296.0,
-            "8": 1102007296.0,
-            "9": 1102007296.0,
-            "10": 1102007296.0,
-            "11": 1102007296.0,
-            "12": 1102007296.0,
-            "13": 1103012352.0,
-            "14": 1103012352.0,
-            "15": 1103012352.0,
-            "16": 1103012352.0,
-            "17": 1103012352.0,
-            "18": 1103012352.0,
-            "19": 1103012352.0,
-            "20": 1103012352.0,
-            "21": 1103012352.0,
-            "22": 1103012352.0,
-            "23": 1103012352.0,
-            "24": 1103012352.0,
-            "25": 1103012352.0,
-            "26": 1103012352.0,
-            "27": 1103012352.0,
-            "28": 1103012352.0,
-            "29": 1103012352.0,
-            "30": 1103012352.0,
-            "31": 1103012352.0,
-            "32": 1103012352.0,
-            "33": 1103012352.0,
-            "34": 1103012352.0,
-            "35": 1103012352.0,
-            "36": 1103012352.0,
-            "37": 1103012352.0,
-            "38": 1103012352.0,
-            "39": 1103012352.0,
-            "40": 1103012352.0,
-            "41": 1103012352.0,
-            "42": 1103012352.0,
-            "43": 1103012352.0,
-            "44": 1103012352.0,
-            "45": 1103012352.0,
-            "46": 1103012352.0,
-            "47": 1103012352.0,
-            "48": 1103012352.0,
-            "49": 1103012352.0,
-            "50": 1103012352.0
+            "1": 879924224.0,
+            "2": 1111762432.0,
+            "3": 1111762432.0,
+            "4": 1113592832.0,
+            "5": 1113592832.0,
+            "6": 1113592832.0,
+            "7": 1113592832.0,
+            "8": 1113592832.0,
+            "9": 1113592832.0,
+            "10": 1113592832.0,
+            "11": 1113592832.0,
+            "12": 1113592832.0,
+            "13": 1113592832.0,
+            "14": 1113592832.0,
+            "15": 1113592832.0,
+            "16": 1113592832.0,
+            "17": 1113592832.0,
+            "18": 1113592832.0,
+            "19": 1113592832.0,
+            "20": 1113592832.0,
+            "21": 1113592832.0,
+            "22": 1113592832.0,
+            "23": 1113592832.0,
+            "24": 1113592832.0,
+            "25": 1113592832.0,
+            "26": 1113592832.0,
+            "27": 1113592832.0,
+            "28": 1113592832.0,
+            "29": 1113592832.0,
+            "30": 1113592832.0,
+            "31": 1113592832.0,
+            "32": 1113592832.0,
+            "33": 1113592832.0,
+            "34": 1113592832.0,
+            "35": 1113592832.0,
+            "36": 1113592832.0,
+            "37": 1113592832.0,
+            "38": 1113592832.0,
+            "39": 1113592832.0,
+            "40": 1113592832.0,
+            "41": 1113592832.0,
+            "42": 1113592832.0,
+            "43": 1113592832.0,
+            "44": 1113592832.0,
+            "45": 1113592832.0,
+            "46": 1113592832.0,
+            "47": 1113592832.0,
+            "48": 1113592832.0,
+            "49": 1113592832.0,
+            "50": 1113592832.0
         }
     },
     "iteration-time": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 17.91902,
-            "2": 0.59117,
-            "3": 0.52614,
-            "4": 0.54746,
-            "5": 0.5056,
-            "6": 0.50649,
-            "7": 0.52305,
-            "8": 0.50853,
-            "9": 0.50644,
-            "10": 0.50303,
-            "11": 0.50387,
-            "12": 0.50249,
-            "13": 0.51153,
-            "14": 0.49861,
-            "15": 0.51318,
-            "16": 0.50066,
-            "17": 0.50888,
-            "18": 0.50788,
-            "19": 0.51533,
-            "20": 0.51425,
-            "21": 0.51111,
-            "22": 0.5116,
-            "23": 0.50626,
-            "24": 0.5049,
-            "25": 0.51101,
-            "26": 0.50993,
-            "27": 0.5073,
-            "28": 0.50949,
-            "29": 0.50784,
-            "30": 0.50783,
-            "31": 0.51255,
-            "32": 0.51065,
-            "33": 0.50731,
-            "34": 0.50768,
-            "35": 0.51749,
-            "36": 0.50656,
-            "37": 0.51012,
-            "38": 0.51668,
-            "39": 0.50475,
-            "40": 0.50784,
-            "41": 0.51405,
-            "42": 0.51014,
-            "43": 0.51186,
-            "44": 0.50532,
-            "45": 0.51211,
-            "46": 0.52864,
-            "47": 0.52545,
-            "48": 0.50927,
-            "49": 0.52883,
-            "50": 0.50373
+            "1": 22.46796,
+            "2": 0.55121,
+            "3": 0.49073,
+            "4": 0.49513,
+            "5": 0.46581,
+            "6": 0.45704,
+            "7": 0.47585,
+            "8": 1.29882,
+            "9": 0.47574,
+            "10": 0.46585,
+            "11": 0.48809,
+            "12": 0.45979,
+            "13": 0.47153,
+            "14": 0.82188,
+            "15": 0.47696,
+            "16": 0.45474,
+            "17": 0.46236,
+            "18": 0.45323,
+            "19": 0.45728,
+            "20": 0.47493,
+            "21": 0.45187,
+            "22": 0.45466,
+            "23": 0.45322,
+            "24": 0.45177,
+            "25": 0.45722,
+            "26": 0.46293,
+            "27": 0.45714,
+            "28": 0.45943,
+            "29": 0.45163,
+            "30": 0.45687,
+            "31": 0.4545,
+            "32": 0.45288,
+            "33": 0.45164,
+            "34": 0.45777,
+            "35": 0.46272,
+            "36": 0.45524,
+            "37": 0.45441,
+            "38": 0.45752,
+            "39": 0.4509,
+            "40": 0.44879,
+            "41": 0.45622,
+            "42": 0.45367,
+            "43": 0.46325,
+            "44": 0.45127,
+            "45": 0.46393,
+            "46": 0.51509,
+            "47": 0.46791,
+            "48": 0.45502,
+            "49": 0.48346,
+            "50": 0.45945
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgx_a100.json
index 184675324be..e3b2e326fda 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgx_a100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgx_a100.json
@@ -1 +1,287 @@
-{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.79175, "5": 10.82288, "10": 10.7688, "15": 10.79157, "20": 10.71001, "25": 10.54662, "30": 10.39407, "35": 10.30461, "40": 10.13303, "45": 9.90015, "50": 9.97874}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 5656.0, "5": 6476.0, "10": 5453.0, "15": 6233.0, "20": 5837.0, "25": 5811.0, "30": 6047.0, "35": 6712.0, "40": 7062.0, "45": 6681.0, "50": 7527.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 458213888.0, "5": 458213376.0, "10": 458215936.0, "15": 458215424.0, "20": 458214400.0, "25": 458211840.0, "30": 458211840.0, "35": 458215936.0, "40": 458213376.0, "45": 458214400.0, "50": 458214912.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1016114688.0, "5": 1180862464.0, "10": 1181913600.0, "15": 1181913600.0, "20": 1181913600.0, "25": 1181913600.0, "30": 1181913600.0, "35": 1181913600.0, "40": 1181913600.0, "45": 1181913600.0, "50": 1181913600.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 6.08798, "5": 0.55184, "10": 0.55591, "15": 0.55638, "20": 0.55589, "25": 0.55697, "30": 0.55631, "35": 0.55801, "40": 0.55677, "45": 0.55857, "50": 0.57711}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.79175,
+            "2": 10.80907,
+            "3": 10.81011,
+            "4": 10.78146,
+            "5": 10.82288,
+            "6": 10.84057,
+            "7": 10.81192,
+            "8": 10.80005,
+            "9": 10.81667,
+            "10": 10.7688,
+            "11": 10.8618,
+            "12": 10.84042,
+            "13": 10.84452,
+            "14": 10.86421,
+            "15": 10.79157,
+            "16": 10.78199,
+            "17": 10.75122,
+            "18": 10.79446,
+            "19": 10.79523,
+            "20": 10.71001,
+            "21": 10.68811,
+            "22": 10.53736,
+            "23": 10.7066,
+            "24": 10.58865,
+            "25": 10.54662,
+            "26": 10.59492,
+            "27": 10.62142,
+            "28": 10.5969,
+            "29": 10.60036,
+            "30": 10.39407,
+            "31": 10.12951,
+            "32": 10.49684,
+            "33": 10.48779,
+            "34": 10.24347,
+            "35": 10.30461,
+            "36": 10.26056,
+            "37": 10.38859,
+            "38": 10.24848,
+            "39": 10.43799,
+            "40": 10.13303,
+            "41": 10.18651,
+            "42": 10.25823,
+            "43": 9.892,
+            "44": 10.02576,
+            "45": 9.90015,
+            "46": 9.88387,
+            "47": 10.19565,
+            "48": 9.91255,
+            "49": 9.60147,
+            "50": 9.97874
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 5656.0,
+            "2": 6018.0,
+            "3": 5790.0,
+            "4": 5941.0,
+            "5": 6476.0,
+            "6": 6653.0,
+            "7": 6287.0,
+            "8": 5875.0,
+            "9": 6239.0,
+            "10": 5453.0,
+            "11": 6936.0,
+            "12": 6711.0,
+            "13": 6655.0,
+            "14": 6814.0,
+            "15": 6233.0,
+            "16": 6533.0,
+            "17": 6397.0,
+            "18": 6112.0,
+            "19": 6678.0,
+            "20": 5837.0,
+            "21": 6403.0,
+            "22": 5715.0,
+            "23": 6744.0,
+            "24": 6051.0,
+            "25": 5811.0,
+            "26": 6104.0,
+            "27": 6484.0,
+            "28": 6884.0,
+            "29": 7253.0,
+            "30": 6047.0,
+            "31": 5593.0,
+            "32": 6625.0,
+            "33": 7054.0,
+            "34": 6104.0,
+            "35": 6712.0,
+            "36": 6684.0,
+            "37": 7523.0,
+            "38": 7273.0,
+            "39": 7620.0,
+            "40": 7062.0,
+            "41": 6895.0,
+            "42": 7426.0,
+            "43": 6713.0,
+            "44": 6664.0,
+            "45": 6681.0,
+            "46": 6923.0,
+            "47": 7705.0,
+            "48": 7248.0,
+            "49": 7331.0,
+            "50": 7527.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 458213888.0,
+            "2": 458211840.0,
+            "3": 458215424.0,
+            "4": 458211840.0,
+            "5": 458213376.0,
+            "6": 458213888.0,
+            "7": 458216448.0,
+            "8": 458216448.0,
+            "9": 458212864.0,
+            "10": 458215936.0,
+            "11": 458213888.0,
+            "12": 458213888.0,
+            "13": 458214400.0,
+            "14": 458215424.0,
+            "15": 458215424.0,
+            "16": 458212864.0,
+            "17": 458214400.0,
+            "18": 458214400.0,
+            "19": 458214400.0,
+            "20": 458214400.0,
+            "21": 458211840.0,
+            "22": 458218496.0,
+            "23": 458214912.0,
+            "24": 458214400.0,
+            "25": 458211840.0,
+            "26": 458215936.0,
+            "27": 458210816.0,
+            "28": 458213888.0,
+            "29": 458212864.0,
+            "30": 458211840.0,
+            "31": 458219008.0,
+            "32": 458214400.0,
+            "33": 458214912.0,
+            "34": 458211840.0,
+            "35": 458215936.0,
+            "36": 458212864.0,
+            "37": 458215424.0,
+            "38": 458213888.0,
+            "39": 458213888.0,
+            "40": 458213376.0,
+            "41": 458216960.0,
+            "42": 458215424.0,
+            "43": 458216960.0,
+            "44": 458213376.0,
+            "45": 458214400.0,
+            "46": 458216448.0,
+            "47": 458213376.0,
+            "48": 458213888.0,
+            "49": 458215424.0,
+            "50": 458214912.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1029256704.0,
+            "2": 1193177088.0,
+            "3": 1193177088.0,
+            "4": 1193686016.0,
+            "5": 1193686016.0,
+            "6": 1193686016.0,
+            "7": 1193686016.0,
+            "8": 1193686016.0,
+            "9": 1193771520.0,
+            "10": 1193771520.0,
+            "11": 1193771520.0,
+            "12": 1193771520.0,
+            "13": 1193771520.0,
+            "14": 1193771520.0,
+            "15": 1193771520.0,
+            "16": 1193771520.0,
+            "17": 1193771520.0,
+            "18": 1193771520.0,
+            "19": 1193771520.0,
+            "20": 1193771520.0,
+            "21": 1193771520.0,
+            "22": 1193918464.0,
+            "23": 1193918464.0,
+            "24": 1193918464.0,
+            "25": 1193918464.0,
+            "26": 1193918464.0,
+            "27": 1193918464.0,
+            "28": 1193918464.0,
+            "29": 1193918464.0,
+            "30": 1193918464.0,
+            "31": 1193918464.0,
+            "32": 1193918464.0,
+            "33": 1193918464.0,
+            "34": 1193918464.0,
+            "35": 1193918464.0,
+            "36": 1193918464.0,
+            "37": 1193918464.0,
+            "38": 1193918464.0,
+            "39": 1193918464.0,
+            "40": 1194139136.0,
+            "41": 1194139136.0,
+            "42": 1194139136.0,
+            "43": 1194249728.0,
+            "44": 1194249728.0,
+            "45": 1194249728.0,
+            "46": 1194249728.0,
+            "47": 1194249728.0,
+            "48": 1194249728.0,
+            "49": 1194249728.0,
+            "50": 1194249728.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 6.67874,
+            "2": 0.59048,
+            "3": 0.55954,
+            "4": 0.55064,
+            "5": 0.54285,
+            "6": 0.54344,
+            "7": 0.54862,
+            "8": 0.542,
+            "9": 0.54738,
+            "10": 0.54947,
+            "11": 0.53996,
+            "12": 0.54615,
+            "13": 0.54407,
+            "14": 0.54098,
+            "15": 0.55148,
+            "16": 0.54024,
+            "17": 0.54784,
+            "18": 0.54329,
+            "19": 0.54213,
+            "20": 0.55192,
+            "21": 0.53901,
+            "22": 0.54612,
+            "23": 0.54495,
+            "24": 0.54254,
+            "25": 0.55242,
+            "26": 0.53958,
+            "27": 0.54346,
+            "28": 0.5466,
+            "29": 0.54048,
+            "30": 0.55385,
+            "31": 0.54112,
+            "32": 0.54404,
+            "33": 0.54779,
+            "34": 0.54049,
+            "35": 0.53889,
+            "36": 0.53823,
+            "37": 0.54013,
+            "38": 0.53918,
+            "39": 0.53801,
+            "40": 0.5394,
+            "41": 0.53905,
+            "42": 0.53797,
+            "43": 0.53957,
+            "44": 0.5384,
+            "45": 0.53795,
+            "46": 0.53859,
+            "47": 0.54222,
+            "48": 0.53881,
+            "49": 0.5401,
+            "50": 0.53746
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgx_h100.json
index b250bf7ac21..6ec10f4f931 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgx_h100.json
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 491766784.0,
-            "2": 491767296.0,
-            "3": 491765760.0,
-            "4": 491767296.0,
-            "5": 491766784.0,
-            "6": 491767808.0,
-            "7": 491767296.0,
-            "8": 491768320.0,
-            "9": 491767808.0,
-            "10": 491767296.0,
-            "11": 491765248.0,
-            "12": 491764736.0,
-            "13": 491766272.0,
-            "14": 491767808.0,
-            "15": 491768832.0,
-            "16": 491769856.0,
-            "17": 491767296.0,
-            "18": 491765248.0,
-            "19": 491766272.0,
-            "20": 491766784.0,
-            "21": 491768320.0,
-            "22": 491768320.0,
-            "23": 491765760.0,
-            "24": 491766272.0,
-            "25": 491766272.0,
-            "26": 491767296.0,
-            "27": 491766784.0,
-            "28": 491767296.0,
-            "29": 491766272.0,
-            "30": 491766272.0,
-            "31": 491767808.0,
-            "32": 491765760.0,
-            "33": 491764736.0,
-            "34": 491768320.0,
-            "35": 491769344.0,
-            "36": 491765760.0,
-            "37": 491765248.0,
-            "38": 491766272.0,
-            "39": 491767808.0,
-            "40": 491765760.0,
-            "41": 491768320.0,
-            "42": 491766272.0,
-            "43": 491768832.0,
-            "44": 491768320.0,
-            "45": 491765248.0,
-            "46": 491768320.0,
-            "47": 491765760.0,
-            "48": 491766784.0,
-            "49": 491766784.0,
-            "50": 491765248.0
+            "1": 458212352.0,
+            "2": 458212864.0,
+            "3": 458211328.0,
+            "4": 458212864.0,
+            "5": 458212352.0,
+            "6": 458213376.0,
+            "7": 458212864.0,
+            "8": 458213888.0,
+            "9": 458213376.0,
+            "10": 458212864.0,
+            "11": 458210816.0,
+            "12": 458210304.0,
+            "13": 458211840.0,
+            "14": 458213376.0,
+            "15": 458214400.0,
+            "16": 458215424.0,
+            "17": 458212864.0,
+            "18": 458210816.0,
+            "19": 458211840.0,
+            "20": 458212352.0,
+            "21": 458213888.0,
+            "22": 458213888.0,
+            "23": 458211328.0,
+            "24": 458211840.0,
+            "25": 458211840.0,
+            "26": 458212864.0,
+            "27": 458212352.0,
+            "28": 458212864.0,
+            "29": 458211840.0,
+            "30": 458211840.0,
+            "31": 458213376.0,
+            "32": 458211328.0,
+            "33": 458210304.0,
+            "34": 458213888.0,
+            "35": 458214912.0,
+            "36": 458211328.0,
+            "37": 458210816.0,
+            "38": 458211840.0,
+            "39": 458213376.0,
+            "40": 458211328.0,
+            "41": 458213888.0,
+            "42": 458211840.0,
+            "43": 458214400.0,
+            "44": 458213888.0,
+            "45": 458210816.0,
+            "46": 458213888.0,
+            "47": 458211328.0,
+            "48": 458212352.0,
+            "49": 458212352.0,
+            "50": 458210816.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 1047229440.0,
-            "2": 1213900288.0,
-            "3": 1213900288.0,
-            "4": 1213900288.0,
-            "5": 1213900288.0,
-            "6": 1213900288.0,
-            "7": 1213900288.0,
-            "8": 1213900288.0,
-            "9": 1213900288.0,
-            "10": 1213900288.0,
-            "11": 1213900288.0,
-            "12": 1213900288.0,
-            "13": 1213900288.0,
-            "14": 1213900288.0,
-            "15": 1213900288.0,
-            "16": 1213900288.0,
-            "17": 1213900288.0,
-            "18": 1213900288.0,
-            "19": 1213900288.0,
-            "20": 1213900288.0,
-            "21": 1213900288.0,
-            "22": 1213900288.0,
-            "23": 1213900288.0,
-            "24": 1213900288.0,
-            "25": 1213900288.0,
-            "26": 1213900288.0,
-            "27": 1213900288.0,
-            "28": 1213900288.0,
-            "29": 1213900288.0,
-            "30": 1213900288.0,
-            "31": 1213900288.0,
-            "32": 1213900288.0,
-            "33": 1213900288.0,
-            "34": 1213900288.0,
-            "35": 1213900288.0,
-            "36": 1213900288.0,
-            "37": 1213900288.0,
-            "38": 1213900288.0,
-            "39": 1213900288.0,
-            "40": 1213900288.0,
-            "41": 1213900288.0,
-            "42": 1213900288.0,
-            "43": 1213900288.0,
-            "44": 1213900288.0,
-            "45": 1213900288.0,
-            "46": 1213900288.0,
-            "47": 1213900288.0,
-            "48": 1213900288.0,
-            "49": 1213900288.0,
-            "50": 1213900288.0
+            "1": 1026068480.0,
+            "2": 1192152064.0,
+            "3": 1192152064.0,
+            "4": 1192205312.0,
+            "5": 1192205312.0,
+            "6": 1192205312.0,
+            "7": 1192205312.0,
+            "8": 1192205312.0,
+            "9": 1192205312.0,
+            "10": 1192205312.0,
+            "11": 1192205312.0,
+            "12": 1192205312.0,
+            "13": 1192349184.0,
+            "14": 1192349184.0,
+            "15": 1192506368.0,
+            "16": 1192506368.0,
+            "17": 1192506368.0,
+            "18": 1192506368.0,
+            "19": 1192506368.0,
+            "20": 1192506368.0,
+            "21": 1192506368.0,
+            "22": 1192506368.0,
+            "23": 1192506368.0,
+            "24": 1192506368.0,
+            "25": 1192506368.0,
+            "26": 1192506368.0,
+            "27": 1192506368.0,
+            "28": 1192506368.0,
+            "29": 1192506368.0,
+            "30": 1192506368.0,
+            "31": 1192506368.0,
+            "32": 1192506368.0,
+            "33": 1192506368.0,
+            "34": 1192506368.0,
+            "35": 1192506368.0,
+            "36": 1192506368.0,
+            "37": 1192506368.0,
+            "38": 1192506368.0,
+            "39": 1192506368.0,
+            "40": 1192506368.0,
+            "41": 1192506368.0,
+            "42": 1192506368.0,
+            "43": 1192506368.0,
+            "44": 1192506368.0,
+            "45": 1192506368.0,
+            "46": 1192506368.0,
+            "47": 1192506368.0,
+            "48": 1192506368.0,
+            "49": 1192506368.0,
+            "50": 1192506368.0
         }
     },
     "iteration-time": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 13.26707,
-            "2": 0.52806,
-            "3": 0.46475,
-            "4": 0.47125,
-            "5": 0.42985,
-            "6": 0.42614,
-            "7": 0.43552,
-            "8": 0.42689,
-            "9": 0.42927,
-            "10": 0.42373,
-            "11": 0.42662,
-            "12": 0.42301,
-            "13": 0.42359,
-            "14": 0.4226,
-            "15": 0.42796,
-            "16": 0.42415,
-            "17": 0.4235,
-            "18": 0.41948,
-            "19": 0.42601,
-            "20": 0.42722,
-            "21": 0.4176,
-            "22": 0.41953,
-            "23": 0.42303,
-            "24": 0.4187,
-            "25": 0.42281,
-            "26": 0.42449,
-            "27": 0.41941,
-            "28": 0.42935,
-            "29": 0.417,
-            "30": 0.4261,
-            "31": 0.42904,
-            "32": 0.41844,
-            "33": 0.41687,
-            "34": 0.43419,
-            "35": 0.43727,
-            "36": 0.42315,
-            "37": 0.42179,
-            "38": 0.42403,
-            "39": 0.4179,
-            "40": 0.42443,
-            "41": 0.42169,
-            "42": 0.42155,
-            "43": 0.43942,
-            "44": 0.42209,
-            "45": 0.41972,
-            "46": 0.46515,
-            "47": 0.43911,
-            "48": 0.43693,
-            "49": 0.44745,
-            "50": 0.4198
+            "1": 13.43711,
+            "2": 0.5648,
+            "3": 0.46103,
+            "4": 0.42843,
+            "5": 0.39023,
+            "6": 0.40228,
+            "7": 0.39933,
+            "8": 0.40801,
+            "9": 0.41661,
+            "10": 0.41115,
+            "11": 0.40919,
+            "12": 0.38713,
+            "13": 0.3967,
+            "14": 0.39634,
+            "15": 0.3917,
+            "16": 0.38895,
+            "17": 0.39488,
+            "18": 0.38262,
+            "19": 0.38633,
+            "20": 0.38778,
+            "21": 0.37793,
+            "22": 0.38122,
+            "23": 0.3785,
+            "24": 0.38176,
+            "25": 0.37936,
+            "26": 0.38399,
+            "27": 0.37425,
+            "28": 0.38373,
+            "29": 0.37674,
+            "30": 0.38541,
+            "31": 0.38748,
+            "32": 0.37483,
+            "33": 0.37931,
+            "34": 0.38691,
+            "35": 0.39293,
+            "36": 0.38011,
+            "37": 0.37641,
+            "38": 0.37714,
+            "39": 0.37754,
+            "40": 0.3929,
+            "41": 0.37984,
+            "42": 0.37748,
+            "43": 0.39504,
+            "44": 0.38155,
+            "45": 0.39617,
+            "46": 0.42631,
+            "47": 0.39497,
+            "48": 0.39432,
+            "49": 0.40482,
+            "50": 0.37964
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_lts_dgx_a100.json
index a186febffbe..d8e319ffb51 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_lts_dgx_a100.json
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 462408192.0,
-            "2": 462406144.0,
-            "3": 462409728.0,
-            "4": 462406144.0,
-            "5": 462407680.0,
-            "6": 462408192.0,
-            "7": 462410752.0,
-            "8": 462410752.0,
-            "9": 462407168.0,
-            "10": 462410240.0,
-            "11": 462408192.0,
-            "12": 462408192.0,
-            "13": 462408704.0,
-            "14": 462409728.0,
-            "15": 462409728.0,
-            "16": 462407168.0,
-            "17": 462408704.0,
-            "18": 462408704.0,
-            "19": 462408704.0,
-            "20": 462408704.0,
-            "21": 462406144.0,
-            "22": 462412800.0,
-            "23": 462409216.0,
-            "24": 462408704.0,
-            "25": 462406144.0,
-            "26": 462410240.0,
-            "27": 462405120.0,
-            "28": 462408192.0,
-            "29": 462407168.0,
-            "30": 462406144.0,
-            "31": 462413312.0,
-            "32": 462408704.0,
-            "33": 462409216.0,
-            "34": 462406144.0,
-            "35": 462410240.0,
-            "36": 462407168.0,
-            "37": 462409728.0,
-            "38": 462408192.0,
-            "39": 462408192.0,
-            "40": 462407680.0,
-            "41": 462411264.0,
-            "42": 462409728.0,
-            "43": 462411264.0,
-            "44": 462407680.0,
-            "45": 462408704.0,
-            "46": 462410752.0,
-            "47": 462407680.0,
-            "48": 462408192.0,
-            "49": 462409728.0,
-            "50": 462409216.0
+            "1": 458213888.0,
+            "2": 458211840.0,
+            "3": 458215424.0,
+            "4": 458211840.0,
+            "5": 458213376.0,
+            "6": 458213888.0,
+            "7": 458216448.0,
+            "8": 458216448.0,
+            "9": 458212864.0,
+            "10": 458215936.0,
+            "11": 458213888.0,
+            "12": 458213888.0,
+            "13": 458214400.0,
+            "14": 458215424.0,
+            "15": 458215424.0,
+            "16": 458212864.0,
+            "17": 458214400.0,
+            "18": 458214400.0,
+            "19": 458214400.0,
+            "20": 458214400.0,
+            "21": 458211840.0,
+            "22": 458218496.0,
+            "23": 458214912.0,
+            "24": 458214400.0,
+            "25": 458211840.0,
+            "26": 458215936.0,
+            "27": 458210816.0,
+            "28": 458213888.0,
+            "29": 458212864.0,
+            "30": 458211840.0,
+            "31": 458219008.0,
+            "32": 458214400.0,
+            "33": 458214912.0,
+            "34": 458211840.0,
+            "35": 458215936.0,
+            "36": 458212864.0,
+            "37": 458215424.0,
+            "38": 458213888.0,
+            "39": 458213888.0,
+            "40": 458213376.0,
+            "41": 458216960.0,
+            "42": 458215424.0,
+            "43": 458216960.0,
+            "44": 458213376.0,
+            "45": 458214400.0,
+            "46": 458216448.0,
+            "47": 458213376.0,
+            "48": 458213888.0,
+            "49": 458215424.0,
+            "50": 458214912.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 1033451008.0,
-            "2": 1197371392.0,
-            "3": 1197371392.0,
-            "4": 1197880320.0,
-            "5": 1197880320.0,
-            "6": 1197880320.0,
-            "7": 1197880320.0,
-            "8": 1197880320.0,
-            "9": 1197965824.0,
-            "10": 1197965824.0,
-            "11": 1197965824.0,
-            "12": 1197965824.0,
-            "13": 1197965824.0,
-            "14": 1197965824.0,
-            "15": 1197965824.0,
-            "16": 1197965824.0,
-            "17": 1197965824.0,
-            "18": 1197965824.0,
-            "19": 1197965824.0,
-            "20": 1197965824.0,
-            "21": 1197965824.0,
-            "22": 1198112768.0,
-            "23": 1198112768.0,
-            "24": 1198112768.0,
-            "25": 1198112768.0,
-            "26": 1198112768.0,
-            "27": 1198112768.0,
-            "28": 1198112768.0,
-            "29": 1198112768.0,
-            "30": 1198112768.0,
-            "31": 1198112768.0,
-            "32": 1198112768.0,
-            "33": 1198112768.0,
-            "34": 1198112768.0,
-            "35": 1198112768.0,
-            "36": 1198112768.0,
-            "37": 1198112768.0,
-            "38": 1198112768.0,
-            "39": 1198112768.0,
-            "40": 1198333440.0,
-            "41": 1198333440.0,
-            "42": 1198333440.0,
-            "43": 1198444032.0,
-            "44": 1198444032.0,
-            "45": 1198444032.0,
-            "46": 1198444032.0,
-            "47": 1198444032.0,
-            "48": 1198444032.0,
-            "49": 1198444032.0,
-            "50": 1198444032.0
+            "1": 1029256704.0,
+            "2": 1193177088.0,
+            "3": 1193177088.0,
+            "4": 1193686016.0,
+            "5": 1193686016.0,
+            "6": 1193686016.0,
+            "7": 1193686016.0,
+            "8": 1193686016.0,
+            "9": 1193771520.0,
+            "10": 1193771520.0,
+            "11": 1193771520.0,
+            "12": 1193771520.0,
+            "13": 1193771520.0,
+            "14": 1193771520.0,
+            "15": 1193771520.0,
+            "16": 1193771520.0,
+            "17": 1193771520.0,
+            "18": 1193771520.0,
+            "19": 1193771520.0,
+            "20": 1193771520.0,
+            "21": 1193771520.0,
+            "22": 1193918464.0,
+            "23": 1193918464.0,
+            "24": 1193918464.0,
+            "25": 1193918464.0,
+            "26": 1193918464.0,
+            "27": 1193918464.0,
+            "28": 1193918464.0,
+            "29": 1193918464.0,
+            "30": 1193918464.0,
+            "31": 1193918464.0,
+            "32": 1193918464.0,
+            "33": 1193918464.0,
+            "34": 1193918464.0,
+            "35": 1193918464.0,
+            "36": 1193918464.0,
+            "37": 1193918464.0,
+            "38": 1193918464.0,
+            "39": 1193918464.0,
+            "40": 1194139136.0,
+            "41": 1194139136.0,
+            "42": 1194139136.0,
+            "43": 1194249728.0,
+            "44": 1194249728.0,
+            "45": 1194249728.0,
+            "46": 1194249728.0,
+            "47": 1194249728.0,
+            "48": 1194249728.0,
+            "49": 1194249728.0,
+            "50": 1194249728.0
         }
     },
     "iteration-time": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 12.49228,
-            "2": 0.63481,
-            "3": 0.56951,
-            "4": 0.57807,
-            "5": 0.581,
-            "6": 0.58159,
-            "7": 0.5705,
-            "8": 0.56929,
-            "9": 0.56794,
-            "10": 0.56314,
-            "11": 0.57935,
-            "12": 0.57294,
-            "13": 0.56865,
-            "14": 0.56698,
-            "15": 0.56505,
-            "16": 0.56266,
-            "17": 0.56337,
-            "18": 0.56237,
-            "19": 0.56197,
-            "20": 0.5626,
-            "21": 0.5642,
-            "22": 0.56373,
-            "23": 0.57291,
-            "24": 0.56432,
-            "25": 0.56287,
-            "26": 0.56295,
-            "27": 0.56146,
-            "28": 0.56459,
-            "29": 0.56415,
-            "30": 0.56587,
-            "31": 0.5671,
-            "32": 0.56896,
-            "33": 0.57526,
-            "34": 0.57281,
-            "35": 0.57407,
-            "36": 0.57321,
-            "37": 0.57403,
-            "38": 0.57296,
-            "39": 0.57248,
-            "40": 0.57089,
-            "41": 0.57201,
-            "42": 0.5661,
-            "43": 0.57044,
-            "44": 0.56777,
-            "45": 0.56877,
-            "46": 0.57143,
-            "47": 0.57031,
-            "48": 0.56952,
-            "49": 0.57353,
-            "50": 0.56636
+            "1": 6.42109,
+            "2": 0.63984,
+            "3": 0.57811,
+            "4": 0.56134,
+            "5": 0.56563,
+            "6": 0.56363,
+            "7": 0.56774,
+            "8": 0.56212,
+            "9": 0.56082,
+            "10": 0.55677,
+            "11": 0.55824,
+            "12": 0.55917,
+            "13": 0.55878,
+            "14": 0.55777,
+            "15": 0.5601,
+            "16": 0.5566,
+            "17": 0.55819,
+            "18": 0.55905,
+            "19": 0.55832,
+            "20": 0.55798,
+            "21": 0.56392,
+            "22": 0.55882,
+            "23": 0.55672,
+            "24": 0.55578,
+            "25": 0.559,
+            "26": 0.55625,
+            "27": 0.55438,
+            "28": 0.55769,
+            "29": 0.55694,
+            "30": 0.55738,
+            "31": 0.55917,
+            "32": 0.55757,
+            "33": 0.55756,
+            "34": 0.55564,
+            "35": 0.557,
+            "36": 0.55678,
+            "37": 0.55963,
+            "38": 0.55693,
+            "39": 0.55382,
+            "40": 0.55644,
+            "41": 0.55445,
+            "42": 0.55427,
+            "43": 0.55749,
+            "44": 0.55808,
+            "45": 0.56177,
+            "46": 0.57237,
+            "47": 0.55947,
+            "48": 0.55498,
+            "49": 0.55635,
+            "50": 0.55639
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgx_a100.json
index d859d8da902..b4462fc931e 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgx_a100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgx_a100.json
@@ -1 +1,287 @@
-{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.79175, "5": 10.82288, "10": 10.7688, "15": 10.79157, "20": 10.71001, "25": 10.54662, "30": 10.39407, "35": 10.30461, "40": 10.13303, "45": 9.90015, "50": 9.97874}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 5656.0, "5": 6476.0, "10": 5453.0, "15": 6233.0, "20": 5837.0, "25": 5811.0, "30": 6047.0, "35": 6712.0, "40": 7062.0, "45": 6681.0, "50": 7527.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 458213888.0, "5": 458213376.0, "10": 458215936.0, "15": 458215424.0, "20": 458214400.0, "25": 458211840.0, "30": 458211840.0, "35": 458215936.0, "40": 458213376.0, "45": 458214400.0, "50": 458214912.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1016114688.0, "5": 1180862464.0, "10": 1181913600.0, "15": 1181913600.0, "20": 1181913600.0, "25": 1181913600.0, "30": 1181913600.0, "35": 1181913600.0, "40": 1181913600.0, "45": 1181913600.0, "50": 1181913600.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 6.22686, "5": 0.5556, "10": 0.55416, "15": 0.55551, "20": 0.55398, "25": 0.55449, "30": 0.59353, "35": 0.55443, "40": 0.55473, "45": 0.55192, "50": 0.55296}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.79175,
+            "2": 10.80907,
+            "3": 10.81011,
+            "4": 10.78146,
+            "5": 10.82288,
+            "6": 10.84057,
+            "7": 10.81192,
+            "8": 10.80005,
+            "9": 10.81667,
+            "10": 10.7688,
+            "11": 10.8618,
+            "12": 10.84042,
+            "13": 10.84452,
+            "14": 10.86421,
+            "15": 10.79157,
+            "16": 10.78199,
+            "17": 10.75122,
+            "18": 10.79446,
+            "19": 10.79523,
+            "20": 10.71001,
+            "21": 10.68811,
+            "22": 10.53736,
+            "23": 10.7066,
+            "24": 10.58865,
+            "25": 10.54662,
+            "26": 10.59492,
+            "27": 10.62142,
+            "28": 10.5969,
+            "29": 10.60036,
+            "30": 10.39407,
+            "31": 10.12951,
+            "32": 10.49684,
+            "33": 10.48779,
+            "34": 10.24347,
+            "35": 10.30461,
+            "36": 10.26056,
+            "37": 10.38859,
+            "38": 10.24848,
+            "39": 10.43799,
+            "40": 10.13303,
+            "41": 10.18651,
+            "42": 10.25823,
+            "43": 9.892,
+            "44": 10.02576,
+            "45": 9.90015,
+            "46": 9.88387,
+            "47": 10.19565,
+            "48": 9.91255,
+            "49": 9.60147,
+            "50": 9.97874
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 5656.0,
+            "2": 6018.0,
+            "3": 5790.0,
+            "4": 5941.0,
+            "5": 6476.0,
+            "6": 6653.0,
+            "7": 6287.0,
+            "8": 5875.0,
+            "9": 6239.0,
+            "10": 5453.0,
+            "11": 6936.0,
+            "12": 6711.0,
+            "13": 6655.0,
+            "14": 6814.0,
+            "15": 6233.0,
+            "16": 6533.0,
+            "17": 6397.0,
+            "18": 6112.0,
+            "19": 6678.0,
+            "20": 5837.0,
+            "21": 6403.0,
+            "22": 5715.0,
+            "23": 6744.0,
+            "24": 6051.0,
+            "25": 5811.0,
+            "26": 6104.0,
+            "27": 6484.0,
+            "28": 6884.0,
+            "29": 7253.0,
+            "30": 6047.0,
+            "31": 5593.0,
+            "32": 6625.0,
+            "33": 7054.0,
+            "34": 6104.0,
+            "35": 6712.0,
+            "36": 6684.0,
+            "37": 7523.0,
+            "38": 7273.0,
+            "39": 7620.0,
+            "40": 7062.0,
+            "41": 6895.0,
+            "42": 7426.0,
+            "43": 6713.0,
+            "44": 6664.0,
+            "45": 6681.0,
+            "46": 6923.0,
+            "47": 7705.0,
+            "48": 7248.0,
+            "49": 7331.0,
+            "50": 7527.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 458213888.0,
+            "2": 458211840.0,
+            "3": 458215424.0,
+            "4": 458211840.0,
+            "5": 458213376.0,
+            "6": 458213888.0,
+            "7": 458216448.0,
+            "8": 458216448.0,
+            "9": 458212864.0,
+            "10": 458215936.0,
+            "11": 458213888.0,
+            "12": 458213888.0,
+            "13": 458214400.0,
+            "14": 458215424.0,
+            "15": 458215424.0,
+            "16": 458212864.0,
+            "17": 458214400.0,
+            "18": 458214400.0,
+            "19": 458214400.0,
+            "20": 458214400.0,
+            "21": 458211840.0,
+            "22": 458218496.0,
+            "23": 458214912.0,
+            "24": 458214400.0,
+            "25": 458211840.0,
+            "26": 458215936.0,
+            "27": 458210816.0,
+            "28": 458213888.0,
+            "29": 458212864.0,
+            "30": 458211840.0,
+            "31": 458219008.0,
+            "32": 458214400.0,
+            "33": 458214912.0,
+            "34": 458211840.0,
+            "35": 458215936.0,
+            "36": 458212864.0,
+            "37": 458215424.0,
+            "38": 458213888.0,
+            "39": 458213888.0,
+            "40": 458213376.0,
+            "41": 458216960.0,
+            "42": 458215424.0,
+            "43": 458216960.0,
+            "44": 458213376.0,
+            "45": 458214400.0,
+            "46": 458216448.0,
+            "47": 458213376.0,
+            "48": 458213888.0,
+            "49": 458215424.0,
+            "50": 458214912.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1029256704.0,
+            "2": 1193177088.0,
+            "3": 1193177088.0,
+            "4": 1193686016.0,
+            "5": 1193686016.0,
+            "6": 1193686016.0,
+            "7": 1193686016.0,
+            "8": 1193686016.0,
+            "9": 1193771520.0,
+            "10": 1193771520.0,
+            "11": 1193771520.0,
+            "12": 1193771520.0,
+            "13": 1193771520.0,
+            "14": 1193771520.0,
+            "15": 1193771520.0,
+            "16": 1193771520.0,
+            "17": 1193771520.0,
+            "18": 1193771520.0,
+            "19": 1193771520.0,
+            "20": 1193771520.0,
+            "21": 1193771520.0,
+            "22": 1193918464.0,
+            "23": 1193918464.0,
+            "24": 1193918464.0,
+            "25": 1193918464.0,
+            "26": 1193918464.0,
+            "27": 1193918464.0,
+            "28": 1193918464.0,
+            "29": 1193918464.0,
+            "30": 1193918464.0,
+            "31": 1193918464.0,
+            "32": 1193918464.0,
+            "33": 1193918464.0,
+            "34": 1193918464.0,
+            "35": 1193918464.0,
+            "36": 1193918464.0,
+            "37": 1193918464.0,
+            "38": 1193918464.0,
+            "39": 1193918464.0,
+            "40": 1194139136.0,
+            "41": 1194139136.0,
+            "42": 1194139136.0,
+            "43": 1194249728.0,
+            "44": 1194249728.0,
+            "45": 1194249728.0,
+            "46": 1194249728.0,
+            "47": 1194249728.0,
+            "48": 1194249728.0,
+            "49": 1194249728.0,
+            "50": 1194249728.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 6.42299,
+            "2": 0.59069,
+            "3": 0.56496,
+            "4": 0.54736,
+            "5": 0.54792,
+            "6": 0.57731,
+            "7": 0.54778,
+            "8": 0.54659,
+            "9": 0.54833,
+            "10": 0.54497,
+            "11": 0.55076,
+            "12": 0.55595,
+            "13": 0.54721,
+            "14": 0.54614,
+            "15": 0.5457,
+            "16": 0.54774,
+            "17": 0.54518,
+            "18": 0.54582,
+            "19": 0.5467,
+            "20": 0.54611,
+            "21": 0.54622,
+            "22": 0.54617,
+            "23": 0.54622,
+            "24": 0.54547,
+            "25": 0.54796,
+            "26": 0.54413,
+            "27": 0.5458,
+            "28": 0.54598,
+            "29": 0.54813,
+            "30": 0.54556,
+            "31": 0.54684,
+            "32": 0.54789,
+            "33": 0.57275,
+            "34": 0.54705,
+            "35": 0.54545,
+            "36": 0.54414,
+            "37": 0.54225,
+            "38": 0.54504,
+            "39": 0.54284,
+            "40": 0.54185,
+            "41": 0.54578,
+            "42": 0.54542,
+            "43": 0.54621,
+            "44": 0.54447,
+            "45": 0.54521,
+            "46": 0.5449,
+            "47": 0.54529,
+            "48": 0.54403,
+            "49": 0.56089,
+            "50": 0.54374
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgx_h100.json
index eb4665ad7e2..64dc8751e92 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgx_h100.json
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 491766784.0,
-            "2": 491767296.0,
-            "3": 491765760.0,
-            "4": 491767296.0,
-            "5": 491766784.0,
-            "6": 491767808.0,
-            "7": 491767296.0,
-            "8": 491768320.0,
-            "9": 491767808.0,
-            "10": 491767296.0,
-            "11": 491765248.0,
-            "12": 491764736.0,
-            "13": 491766272.0,
-            "14": 491767808.0,
-            "15": 491768832.0,
-            "16": 491769856.0,
-            "17": 491767296.0,
-            "18": 491765248.0,
-            "19": 491766272.0,
-            "20": 491766784.0,
-            "21": 491768320.0,
-            "22": 491768320.0,
-            "23": 491765760.0,
-            "24": 491766272.0,
-            "25": 491766272.0,
-            "26": 491767296.0,
-            "27": 491766784.0,
-            "28": 491767296.0,
-            "29": 491766272.0,
-            "30": 491766272.0,
-            "31": 491767808.0,
-            "32": 491765760.0,
-            "33": 491764736.0,
-            "34": 491768320.0,
-            "35": 491769344.0,
-            "36": 491765760.0,
-            "37": 491765248.0,
-            "38": 491766272.0,
-            "39": 491767808.0,
-            "40": 491765760.0,
-            "41": 491768320.0,
-            "42": 491766272.0,
-            "43": 491768832.0,
-            "44": 491768320.0,
-            "45": 491765248.0,
-            "46": 491768320.0,
-            "47": 491765760.0,
-            "48": 491766784.0,
-            "49": 491766784.0,
-            "50": 491765248.0
+            "1": 458212352.0,
+            "2": 458212864.0,
+            "3": 458211328.0,
+            "4": 458212864.0,
+            "5": 458212352.0,
+            "6": 458213376.0,
+            "7": 458212864.0,
+            "8": 458213888.0,
+            "9": 458213376.0,
+            "10": 458212864.0,
+            "11": 458210816.0,
+            "12": 458210304.0,
+            "13": 458211840.0,
+            "14": 458213376.0,
+            "15": 458214400.0,
+            "16": 458215424.0,
+            "17": 458212864.0,
+            "18": 458210816.0,
+            "19": 458211840.0,
+            "20": 458212352.0,
+            "21": 458213888.0,
+            "22": 458213888.0,
+            "23": 458211328.0,
+            "24": 458211840.0,
+            "25": 458211840.0,
+            "26": 458212864.0,
+            "27": 458212352.0,
+            "28": 458212864.0,
+            "29": 458211840.0,
+            "30": 458211840.0,
+            "31": 458213376.0,
+            "32": 458211328.0,
+            "33": 458210304.0,
+            "34": 458213888.0,
+            "35": 458214912.0,
+            "36": 458211328.0,
+            "37": 458210816.0,
+            "38": 458211840.0,
+            "39": 458213376.0,
+            "40": 458211328.0,
+            "41": 458213888.0,
+            "42": 458211840.0,
+            "43": 458214400.0,
+            "44": 458213888.0,
+            "45": 458210816.0,
+            "46": 458213888.0,
+            "47": 458211328.0,
+            "48": 458212352.0,
+            "49": 458212352.0,
+            "50": 458210816.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 1047229440.0,
-            "2": 1213900288.0,
-            "3": 1213900288.0,
-            "4": 1213900288.0,
-            "5": 1213900288.0,
-            "6": 1213900288.0,
-            "7": 1213900288.0,
-            "8": 1213900288.0,
-            "9": 1213900288.0,
-            "10": 1213900288.0,
-            "11": 1213900288.0,
-            "12": 1213900288.0,
-            "13": 1213900288.0,
-            "14": 1213900288.0,
-            "15": 1213900288.0,
-            "16": 1213900288.0,
-            "17": 1213900288.0,
-            "18": 1213900288.0,
-            "19": 1213900288.0,
-            "20": 1213900288.0,
-            "21": 1213900288.0,
-            "22": 1213900288.0,
-            "23": 1213900288.0,
-            "24": 1213900288.0,
-            "25": 1213900288.0,
-            "26": 1213900288.0,
-            "27": 1213900288.0,
-            "28": 1213900288.0,
-            "29": 1213900288.0,
-            "30": 1213900288.0,
-            "31": 1213900288.0,
-            "32": 1213900288.0,
-            "33": 1213900288.0,
-            "34": 1213900288.0,
-            "35": 1213900288.0,
-            "36": 1213900288.0,
-            "37": 1213900288.0,
-            "38": 1213900288.0,
-            "39": 1213900288.0,
-            "40": 1213900288.0,
-            "41": 1213900288.0,
-            "42": 1213900288.0,
-            "43": 1213900288.0,
-            "44": 1213900288.0,
-            "45": 1213900288.0,
-            "46": 1213900288.0,
-            "47": 1213900288.0,
-            "48": 1213900288.0,
-            "49": 1213900288.0,
-            "50": 1213900288.0
+            "1": 1026068480.0,
+            "2": 1192152064.0,
+            "3": 1192152064.0,
+            "4": 1192205312.0,
+            "5": 1192205312.0,
+            "6": 1192205312.0,
+            "7": 1192205312.0,
+            "8": 1192205312.0,
+            "9": 1192205312.0,
+            "10": 1192205312.0,
+            "11": 1192205312.0,
+            "12": 1192205312.0,
+            "13": 1192349184.0,
+            "14": 1192349184.0,
+            "15": 1192506368.0,
+            "16": 1192506368.0,
+            "17": 1192506368.0,
+            "18": 1192506368.0,
+            "19": 1192506368.0,
+            "20": 1192506368.0,
+            "21": 1192506368.0,
+            "22": 1192506368.0,
+            "23": 1192506368.0,
+            "24": 1192506368.0,
+            "25": 1192506368.0,
+            "26": 1192506368.0,
+            "27": 1192506368.0,
+            "28": 1192506368.0,
+            "29": 1192506368.0,
+            "30": 1192506368.0,
+            "31": 1192506368.0,
+            "32": 1192506368.0,
+            "33": 1192506368.0,
+            "34": 1192506368.0,
+            "35": 1192506368.0,
+            "36": 1192506368.0,
+            "37": 1192506368.0,
+            "38": 1192506368.0,
+            "39": 1192506368.0,
+            "40": 1192506368.0,
+            "41": 1192506368.0,
+            "42": 1192506368.0,
+            "43": 1192506368.0,
+            "44": 1192506368.0,
+            "45": 1192506368.0,
+            "46": 1192506368.0,
+            "47": 1192506368.0,
+            "48": 1192506368.0,
+            "49": 1192506368.0,
+            "50": 1192506368.0
         }
     },
     "iteration-time": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 13.31352,
-            "2": 0.50754,
-            "3": 0.44486,
-            "4": 0.4668,
-            "5": 0.42238,
-            "6": 0.42115,
-            "7": 0.42604,
-            "8": 0.4217,
-            "9": 0.42265,
-            "10": 0.41522,
-            "11": 0.41976,
-            "12": 0.41287,
-            "13": 0.42113,
-            "14": 0.41948,
-            "15": 0.4211,
-            "16": 0.41519,
-            "17": 0.42043,
-            "18": 0.415,
-            "19": 0.42142,
-            "20": 0.42878,
-            "21": 0.4145,
-            "22": 0.42054,
-            "23": 0.41581,
-            "24": 0.42934,
-            "25": 0.43897,
-            "26": 0.42648,
-            "27": 0.42242,
-            "28": 0.42576,
-            "29": 0.42795,
-            "30": 0.42485,
-            "31": 0.43439,
-            "32": 0.42257,
-            "33": 0.41924,
-            "34": 0.43519,
-            "35": 0.43865,
-            "36": 0.42518,
-            "37": 0.42435,
-            "38": 0.42597,
-            "39": 0.42134,
-            "40": 0.42937,
-            "41": 0.42822,
-            "42": 0.42413,
-            "43": 0.44197,
-            "44": 0.42413,
-            "45": 0.42687,
-            "46": 0.46081,
-            "47": 0.45208,
-            "48": 0.43527,
-            "49": 0.44658,
-            "50": 0.41965
+            "1": 13.13083,
+            "2": 0.49339,
+            "3": 0.43067,
+            "4": 0.43124,
+            "5": 0.38622,
+            "6": 0.39174,
+            "7": 0.39833,
+            "8": 0.39421,
+            "9": 0.3937,
+            "10": 0.38682,
+            "11": 0.39333,
+            "12": 0.38647,
+            "13": 0.38364,
+            "14": 0.38374,
+            "15": 0.38593,
+            "16": 0.38263,
+            "17": 0.39915,
+            "18": 0.38564,
+            "19": 0.38954,
+            "20": 0.38955,
+            "21": 0.38216,
+            "22": 0.38466,
+            "23": 0.38551,
+            "24": 0.38195,
+            "25": 0.38416,
+            "26": 0.38554,
+            "27": 0.38123,
+            "28": 0.38882,
+            "29": 0.43011,
+            "30": 0.38995,
+            "31": 0.39202,
+            "32": 0.38203,
+            "33": 0.38777,
+            "34": 0.39058,
+            "35": 0.39634,
+            "36": 0.38496,
+            "37": 0.38112,
+            "38": 0.38052,
+            "39": 0.37771,
+            "40": 0.38438,
+            "41": 0.38696,
+            "42": 0.38029,
+            "43": 0.39638,
+            "44": 0.38187,
+            "45": 0.38285,
+            "46": 0.42266,
+            "47": 0.3977,
+            "48": 0.39566,
+            "49": 0.40884,
+            "50": 0.38389
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_lts_dgx_a100.json
index 4ebfff8da76..3f86a0b644a 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_lts_dgx_a100.json
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 462408192.0,
-            "2": 462406144.0,
-            "3": 462409728.0,
-            "4": 462406144.0,
-            "5": 462407680.0,
-            "6": 462408192.0,
-            "7": 462410752.0,
-            "8": 462410752.0,
-            "9": 462407168.0,
-            "10": 462410240.0,
-            "11": 462408192.0,
-            "12": 462408192.0,
-            "13": 462408704.0,
-            "14": 462409728.0,
-            "15": 462409728.0,
-            "16": 462407168.0,
-            "17": 462408704.0,
-            "18": 462408704.0,
-            "19": 462408704.0,
-            "20": 462408704.0,
-            "21": 462406144.0,
-            "22": 462412800.0,
-            "23": 462409216.0,
-            "24": 462408704.0,
-            "25": 462406144.0,
-            "26": 462410240.0,
-            "27": 462405120.0,
-            "28": 462408192.0,
-            "29": 462407168.0,
-            "30": 462406144.0,
-            "31": 462413312.0,
-            "32": 462408704.0,
-            "33": 462409216.0,
-            "34": 462406144.0,
-            "35": 462410240.0,
-            "36": 462407168.0,
-            "37": 462409728.0,
-            "38": 462408192.0,
-            "39": 462408192.0,
-            "40": 462407680.0,
-            "41": 462411264.0,
-            "42": 462409728.0,
-            "43": 462411264.0,
-            "44": 462407680.0,
-            "45": 462408704.0,
-            "46": 462410752.0,
-            "47": 462407680.0,
-            "48": 462408192.0,
-            "49": 462409728.0,
-            "50": 462409216.0
+            "1": 458213888.0,
+            "2": 458211840.0,
+            "3": 458215424.0,
+            "4": 458211840.0,
+            "5": 458213376.0,
+            "6": 458213888.0,
+            "7": 458216448.0,
+            "8": 458216448.0,
+            "9": 458212864.0,
+            "10": 458215936.0,
+            "11": 458213888.0,
+            "12": 458213888.0,
+            "13": 458214400.0,
+            "14": 458215424.0,
+            "15": 458215424.0,
+            "16": 458212864.0,
+            "17": 458214400.0,
+            "18": 458214400.0,
+            "19": 458214400.0,
+            "20": 458214400.0,
+            "21": 458211840.0,
+            "22": 458218496.0,
+            "23": 458214912.0,
+            "24": 458214400.0,
+            "25": 458211840.0,
+            "26": 458215936.0,
+            "27": 458210816.0,
+            "28": 458213888.0,
+            "29": 458212864.0,
+            "30": 458211840.0,
+            "31": 458219008.0,
+            "32": 458214400.0,
+            "33": 458214912.0,
+            "34": 458211840.0,
+            "35": 458215936.0,
+            "36": 458212864.0,
+            "37": 458215424.0,
+            "38": 458213888.0,
+            "39": 458213888.0,
+            "40": 458213376.0,
+            "41": 458216960.0,
+            "42": 458215424.0,
+            "43": 458216960.0,
+            "44": 458213376.0,
+            "45": 458214400.0,
+            "46": 458216448.0,
+            "47": 458213376.0,
+            "48": 458213888.0,
+            "49": 458215424.0,
+            "50": 458214912.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 1033451008.0,
-            "2": 1197371392.0,
-            "3": 1197371392.0,
-            "4": 1197880320.0,
-            "5": 1197880320.0,
-            "6": 1197880320.0,
-            "7": 1197880320.0,
-            "8": 1197880320.0,
-            "9": 1197965824.0,
-            "10": 1197965824.0,
-            "11": 1197965824.0,
-            "12": 1197965824.0,
-            "13": 1197965824.0,
-            "14": 1197965824.0,
-            "15": 1197965824.0,
-            "16": 1197965824.0,
-            "17": 1197965824.0,
-            "18": 1197965824.0,
-            "19": 1197965824.0,
-            "20": 1197965824.0,
-            "21": 1197965824.0,
-            "22": 1198112768.0,
-            "23": 1198112768.0,
-            "24": 1198112768.0,
-            "25": 1198112768.0,
-            "26": 1198112768.0,
-            "27": 1198112768.0,
-            "28": 1198112768.0,
-            "29": 1198112768.0,
-            "30": 1198112768.0,
-            "31": 1198112768.0,
-            "32": 1198112768.0,
-            "33": 1198112768.0,
-            "34": 1198112768.0,
-            "35": 1198112768.0,
-            "36": 1198112768.0,
-            "37": 1198112768.0,
-            "38": 1198112768.0,
-            "39": 1198112768.0,
-            "40": 1198333440.0,
-            "41": 1198333440.0,
-            "42": 1198333440.0,
-            "43": 1198444032.0,
-            "44": 1198444032.0,
-            "45": 1198444032.0,
-            "46": 1198444032.0,
-            "47": 1198444032.0,
-            "48": 1198444032.0,
-            "49": 1198444032.0,
-            "50": 1198444032.0
+            "1": 1029256704.0,
+            "2": 1193177088.0,
+            "3": 1193177088.0,
+            "4": 1193686016.0,
+            "5": 1193686016.0,
+            "6": 1193686016.0,
+            "7": 1193686016.0,
+            "8": 1193686016.0,
+            "9": 1193771520.0,
+            "10": 1193771520.0,
+            "11": 1193771520.0,
+            "12": 1193771520.0,
+            "13": 1193771520.0,
+            "14": 1193771520.0,
+            "15": 1193771520.0,
+            "16": 1193771520.0,
+            "17": 1193771520.0,
+            "18": 1193771520.0,
+            "19": 1193771520.0,
+            "20": 1193771520.0,
+            "21": 1193771520.0,
+            "22": 1193918464.0,
+            "23": 1193918464.0,
+            "24": 1193918464.0,
+            "25": 1193918464.0,
+            "26": 1193918464.0,
+            "27": 1193918464.0,
+            "28": 1193918464.0,
+            "29": 1193918464.0,
+            "30": 1193918464.0,
+            "31": 1193918464.0,
+            "32": 1193918464.0,
+            "33": 1193918464.0,
+            "34": 1193918464.0,
+            "35": 1193918464.0,
+            "36": 1193918464.0,
+            "37": 1193918464.0,
+            "38": 1193918464.0,
+            "39": 1193918464.0,
+            "40": 1194139136.0,
+            "41": 1194139136.0,
+            "42": 1194139136.0,
+            "43": 1194249728.0,
+            "44": 1194249728.0,
+            "45": 1194249728.0,
+            "46": 1194249728.0,
+            "47": 1194249728.0,
+            "48": 1194249728.0,
+            "49": 1194249728.0,
+            "50": 1194249728.0
         }
     },
     "iteration-time": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 8.27777,
-            "2": 0.60806,
-            "3": 0.55409,
-            "4": 0.55324,
-            "5": 0.54815,
-            "6": 0.54698,
-            "7": 0.54712,
-            "8": 0.55008,
-            "9": 0.55718,
-            "10": 0.55527,
-            "11": 0.55082,
-            "12": 0.56208,
-            "13": 0.55625,
-            "14": 0.55717,
-            "15": 0.56582,
-            "16": 0.55953,
-            "17": 0.57188,
-            "18": 0.55508,
-            "19": 0.55956,
-            "20": 0.55934,
-            "21": 0.55676,
-            "22": 0.55842,
-            "23": 0.55867,
-            "24": 0.55987,
-            "25": 0.55941,
-            "26": 0.55642,
-            "27": 0.55364,
-            "28": 0.55209,
-            "29": 0.55397,
-            "30": 0.55602,
-            "31": 0.55344,
-            "32": 0.55195,
-            "33": 0.56308,
-            "34": 0.55588,
-            "35": 0.55251,
-            "36": 0.55314,
-            "37": 0.55563,
-            "38": 0.56708,
-            "39": 0.5661,
-            "40": 0.56725,
-            "41": 0.5663,
-            "42": 0.56565,
-            "43": 0.5725,
-            "44": 0.56736,
-            "45": 0.5674,
-            "46": 0.56751,
-            "47": 0.56642,
-            "48": 0.56257,
-            "49": 0.56841,
-            "50": 0.56452
+            "1": 6.51772,
+            "2": 0.67032,
+            "3": 0.58012,
+            "4": 0.56416,
+            "5": 0.56277,
+            "6": 0.56185,
+            "7": 0.56613,
+            "8": 0.56306,
+            "9": 0.55846,
+            "10": 0.55676,
+            "11": 0.58727,
+            "12": 0.58309,
+            "13": 0.58685,
+            "14": 0.57988,
+            "15": 0.57248,
+            "16": 0.5838,
+            "17": 0.58349,
+            "18": 0.57587,
+            "19": 0.57576,
+            "20": 0.56068,
+            "21": 0.56288,
+            "22": 0.5656,
+            "23": 0.56764,
+            "24": 0.55796,
+            "25": 0.5651,
+            "26": 0.56407,
+            "27": 0.56035,
+            "28": 0.5648,
+            "29": 0.55018,
+            "30": 0.55186,
+            "31": 0.64216,
+            "32": 0.64815,
+            "33": 0.64922,
+            "34": 0.64899,
+            "35": 0.65107,
+            "36": 0.64829,
+            "37": 0.64814,
+            "38": 0.64822,
+            "39": 0.64955,
+            "40": 0.61641,
+            "41": 0.5534,
+            "42": 0.55493,
+            "43": 0.55548,
+            "44": 0.55538,
+            "45": 0.55475,
+            "46": 0.5581,
+            "47": 0.55771,
+            "48": 0.5557,
+            "49": 0.55591,
+            "50": 0.5552
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_a100.json
index 52611762241..e752e7d8fe0 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_a100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_a100.json
@@ -1 +1,287 @@
-{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.82004, "5": 10.84794, "10": 10.79461, "15": 10.82911, "20": 10.73175, "25": 10.57964, "30": 10.40859, "35": 10.31503, "40": 10.14367, "45": 9.914, "50": 9.97565}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 12826.0, "5": 15951.0, "10": 12611.0, "15": 14834.0, "20": 13675.0, "25": 13129.0, "30": 14652.0, "35": 15183.0, "40": 16971.0, "45": 16188.0, "50": 18998.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 624217088.0, "5": 624219648.0, "10": 624217600.0, "15": 624222208.0, "20": 624221184.0, "25": 624558080.0, "30": 624215552.0, "35": 624218624.0, "40": 624219136.0, "45": 624218112.0, "50": 624219648.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1818388480.0, "5": 2048155136.0, "10": 2049900032.0, "15": 2049900032.0, "20": 2049900032.0, "25": 2049900032.0, "30": 2049900032.0, "35": 2049900032.0, "40": 2049900032.0, "45": 2049900032.0, "50": 2049900032.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 7.11963, "5": 0.34973, "10": 0.34623, "15": 0.3403, "20": 0.34061, "25": 0.3401, "30": 0.34214, "35": 0.3402, "40": 0.37279, "45": 0.33997, "50": 0.33985}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.82004,
+            "2": 10.8392,
+            "3": 10.81124,
+            "4": 10.81983,
+            "5": 10.84794,
+            "6": 10.8608,
+            "7": 10.84085,
+            "8": 10.84432,
+            "9": 10.8504,
+            "10": 10.79461,
+            "11": 10.85658,
+            "12": 10.84848,
+            "13": 10.86929,
+            "14": 10.8667,
+            "15": 10.82911,
+            "16": 10.81111,
+            "17": 10.79027,
+            "18": 10.80981,
+            "19": 10.81143,
+            "20": 10.73175,
+            "21": 10.71285,
+            "22": 10.58199,
+            "23": 10.72,
+            "24": 10.61704,
+            "25": 10.57964,
+            "26": 10.63372,
+            "27": 10.6365,
+            "28": 10.60641,
+            "29": 10.61561,
+            "30": 10.40859,
+            "31": 10.17068,
+            "32": 10.49958,
+            "33": 10.4963,
+            "34": 10.25574,
+            "35": 10.31503,
+            "36": 10.28536,
+            "37": 10.38742,
+            "38": 10.24674,
+            "39": 10.44222,
+            "40": 10.14384,
+            "41": 10.19169,
+            "42": 10.25683,
+            "43": 9.90704,
+            "44": 10.02666,
+            "45": 9.91412,
+            "46": 9.89643,
+            "47": 10.18881,
+            "48": 9.93025,
+            "49": 9.61398,
+            "50": 9.97515
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 12826.0,
+            "2": 14613.0,
+            "3": 14549.0,
+            "4": 13422.0,
+            "5": 15951.0,
+            "6": 16055.0,
+            "7": 15208.0,
+            "8": 12944.0,
+            "9": 15110.0,
+            "10": 12611.0,
+            "11": 16586.0,
+            "12": 14954.0,
+            "13": 15925.0,
+            "14": 16182.0,
+            "15": 14834.0,
+            "16": 16023.0,
+            "17": 15486.0,
+            "18": 15116.0,
+            "19": 15584.0,
+            "20": 13675.0,
+            "21": 13873.0,
+            "22": 12917.0,
+            "23": 16766.0,
+            "24": 13924.0,
+            "25": 13129.0,
+            "26": 14794.0,
+            "27": 15169.0,
+            "28": 16393.0,
+            "29": 16719.0,
+            "30": 14652.0,
+            "31": 13126.0,
+            "32": 15987.0,
+            "33": 17372.0,
+            "34": 14206.0,
+            "35": 15183.0,
+            "36": 15837.0,
+            "37": 17507.0,
+            "38": 16382.0,
+            "39": 18071.0,
+            "40": 16755.0,
+            "41": 16757.0,
+            "42": 17222.0,
+            "43": 15308.0,
+            "44": 15173.0,
+            "45": 16243.0,
+            "46": 17454.0,
+            "47": 19165.0,
+            "48": 16552.0,
+            "49": 16282.0,
+            "50": 19162.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 625695744.0,
+            "2": 625700352.0,
+            "3": 625698304.0,
+            "4": 625694720.0,
+            "5": 625771008.0,
+            "6": 625698304.0,
+            "7": 625713664.0,
+            "8": 625698304.0,
+            "9": 625696768.0,
+            "10": 626427392.0,
+            "11": 626528256.0,
+            "12": 625700864.0,
+            "13": 625701376.0,
+            "14": 625740288.0,
+            "15": 625700864.0,
+            "16": 625891840.0,
+            "17": 625693184.0,
+            "18": 625699840.0,
+            "19": 625699840.0,
+            "20": 625699840.0,
+            "21": 625711616.0,
+            "22": 625694720.0,
+            "23": 626073088.0,
+            "24": 626040832.0,
+            "25": 626703360.0,
+            "26": 625732096.0,
+            "27": 625732096.0,
+            "28": 625745408.0,
+            "29": 625777664.0,
+            "30": 625699328.0,
+            "31": 625959936.0,
+            "32": 625695232.0,
+            "33": 625698304.0,
+            "34": 625747968.0,
+            "35": 625720832.0,
+            "36": 625694720.0,
+            "37": 625883136.0,
+            "38": 625796096.0,
+            "39": 625697280.0,
+            "40": 625727488.0,
+            "41": 625707520.0,
+            "42": 625724416.0,
+            "43": 625731584.0,
+            "44": 625759232.0,
+            "45": 625696256.0,
+            "46": 625780224.0,
+            "47": 625701888.0,
+            "48": 625842688.0,
+            "49": 626536960.0,
+            "50": 625698816.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1843612672.0,
+            "2": 2073786880.0,
+            "3": 2074433024.0,
+            "4": 2074433024.0,
+            "5": 2074806784.0,
+            "6": 2075118080.0,
+            "7": 2076633600.0,
+            "8": 2076633600.0,
+            "9": 2076633600.0,
+            "10": 2076633600.0,
+            "11": 2076633600.0,
+            "12": 2076633600.0,
+            "13": 2076673536.0,
+            "14": 2076673536.0,
+            "15": 2076673536.0,
+            "16": 2076673536.0,
+            "17": 2076673536.0,
+            "18": 2076673536.0,
+            "19": 2076673536.0,
+            "20": 2076673536.0,
+            "21": 2076673536.0,
+            "22": 2076673536.0,
+            "23": 2076673536.0,
+            "24": 2076673536.0,
+            "25": 2076673536.0,
+            "26": 2076673536.0,
+            "27": 2076673536.0,
+            "28": 2076673536.0,
+            "29": 2076673536.0,
+            "30": 2076673536.0,
+            "31": 2076673536.0,
+            "32": 2076673536.0,
+            "33": 2076673536.0,
+            "34": 2076673536.0,
+            "35": 2076673536.0,
+            "36": 2076673536.0,
+            "37": 2076673536.0,
+            "38": 2076673536.0,
+            "39": 2076673536.0,
+            "40": 2076673536.0,
+            "41": 2076673536.0,
+            "42": 2076673536.0,
+            "43": 2076673536.0,
+            "44": 2076673536.0,
+            "45": 2076673536.0,
+            "46": 2076673536.0,
+            "47": 2076673536.0,
+            "48": 2076673536.0,
+            "49": 2076673536.0,
+            "50": 2076673536.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 6.71779,
+            "2": 0.4021,
+            "3": 0.34522,
+            "4": 0.32028,
+            "5": 0.32155,
+            "6": 0.32036,
+            "7": 0.32793,
+            "8": 0.31946,
+            "9": 0.32227,
+            "10": 0.32133,
+            "11": 0.3212,
+            "12": 0.32189,
+            "13": 0.32578,
+            "14": 0.3194,
+            "15": 0.32101,
+            "16": 0.3216,
+            "17": 0.32118,
+            "18": 0.3199,
+            "19": 0.32019,
+            "20": 0.32361,
+            "21": 0.32862,
+            "22": 0.32239,
+            "23": 0.31961,
+            "24": 0.31968,
+            "25": 0.32024,
+            "26": 0.31969,
+            "27": 0.31928,
+            "28": 0.32117,
+            "29": 0.32074,
+            "30": 0.32265,
+            "31": 0.32078,
+            "32": 0.32625,
+            "33": 0.32431,
+            "34": 0.3229,
+            "35": 0.32227,
+            "36": 0.32535,
+            "37": 0.32428,
+            "38": 0.31953,
+            "39": 0.32251,
+            "40": 0.32338,
+            "41": 0.32439,
+            "42": 0.32389,
+            "43": 0.32348,
+            "44": 0.32363,
+            "45": 0.32303,
+            "46": 0.32406,
+            "47": 0.32367,
+            "48": 0.32364,
+            "49": 0.32375,
+            "50": 0.32234
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_h100.json
index eb013c007ca..8928145fcbb 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_h100.json
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 659394560.0,
-            "2": 659346944.0,
-            "3": 659401728.0,
-            "4": 659351040.0,
-            "5": 659623424.0,
-            "6": 659348480.0,
-            "7": 659508736.0,
-            "8": 659353088.0,
-            "9": 659383296.0,
-            "10": 659347456.0,
-            "11": 659350016.0,
-            "12": 659437056.0,
-            "13": 659356160.0,
-            "14": 659702272.0,
-            "15": 659658240.0,
-            "16": 659450880.0,
-            "17": 659438080.0,
-            "18": 659384320.0,
-            "19": 659492352.0,
-            "20": 659372544.0,
-            "21": 659350016.0,
-            "22": 659347456.0,
-            "23": 659348992.0,
-            "24": 659430400.0,
-            "25": 659347968.0,
-            "26": 659378176.0,
-            "27": 659353088.0,
-            "28": 659346944.0,
-            "29": 659440640.0,
-            "30": 659732480.0,
-            "31": 659361792.0,
-            "32": 659345920.0,
-            "33": 659473920.0,
-            "34": 660008448.0,
-            "35": 659819520.0,
-            "36": 659363840.0,
-            "37": 659418624.0,
-            "38": 659351040.0,
-            "39": 659449344.0,
-            "40": 659586560.0,
-            "41": 659387392.0,
-            "42": 659476480.0,
-            "43": 659567104.0,
-            "44": 659344384.0,
-            "45": 659346944.0,
-            "46": 659466752.0,
-            "47": 659345408.0,
-            "48": 659835392.0,
-            "49": 659494400.0,
-            "50": 659346432.0
+            "1": 625530880.0,
+            "2": 625483264.0,
+            "3": 625484800.0,
+            "4": 625516032.0,
+            "5": 625759744.0,
+            "6": 625774592.0,
+            "7": 625485312.0,
+            "8": 625568256.0,
+            "9": 625519616.0,
+            "10": 625655808.0,
+            "11": 625630720.0,
+            "12": 625482240.0,
+            "13": 625488384.0,
+            "14": 625819136.0,
+            "15": 625982976.0,
+            "16": 625500160.0,
+            "17": 625613312.0,
+            "18": 625494016.0,
+            "19": 625484288.0,
+            "20": 625508864.0,
+            "21": 625486336.0,
+            "22": 625486848.0,
+            "23": 625632768.0,
+            "24": 625487872.0,
+            "25": 625484288.0,
+            "26": 625753088.0,
+            "27": 625513984.0,
+            "28": 625483264.0,
+            "29": 625698816.0,
+            "30": 625967104.0,
+            "31": 625477632.0,
+            "32": 625523200.0,
+            "33": 625484288.0,
+            "34": 625481216.0,
+            "35": 625479680.0,
+            "36": 625554432.0,
+            "37": 625554944.0,
+            "38": 625487360.0,
+            "39": 625504768.0,
+            "40": 625481216.0,
+            "41": 625481728.0,
+            "42": 625481728.0,
+            "43": 626760192.0,
+            "44": 625598464.0,
+            "45": 625534464.0,
+            "46": 625603072.0,
+            "47": 625509376.0,
+            "48": 626520576.0,
+            "49": 625630720.0,
+            "50": 625565696.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 1853294080.0,
-            "2": 2083995136.0,
-            "3": 2084402688.0,
-            "4": 2084433408.0,
-            "5": 2084433408.0,
-            "6": 2084433408.0,
-            "7": 2085503488.0,
-            "8": 2085503488.0,
-            "9": 2085503488.0,
-            "10": 2085503488.0,
-            "11": 2085503488.0,
-            "12": 2085503488.0,
-            "13": 2085503488.0,
-            "14": 2085503488.0,
-            "15": 2085503488.0,
-            "16": 2085503488.0,
-            "17": 2085503488.0,
-            "18": 2085503488.0,
-            "19": 2085503488.0,
-            "20": 2085503488.0,
-            "21": 2085503488.0,
-            "22": 2085503488.0,
-            "23": 2085503488.0,
-            "24": 2085503488.0,
-            "25": 2085503488.0,
-            "26": 2085503488.0,
-            "27": 2085503488.0,
-            "28": 2085503488.0,
-            "29": 2085503488.0,
-            "30": 2085503488.0,
-            "31": 2085503488.0,
-            "32": 2085503488.0,
-            "33": 2085503488.0,
-            "34": 2085503488.0,
-            "35": 2085503488.0,
-            "36": 2085503488.0,
-            "37": 2085503488.0,
-            "38": 2085503488.0,
-            "39": 2085503488.0,
-            "40": 2085503488.0,
-            "41": 2085503488.0,
-            "42": 2085503488.0,
-            "43": 2085503488.0,
-            "44": 2085503488.0,
-            "45": 2085503488.0,
-            "46": 2085503488.0,
-            "47": 2085503488.0,
-            "48": 2085503488.0,
-            "49": 2085503488.0,
-            "50": 2085503488.0
+            "1": 1845331456.0,
+            "2": 2075684352.0,
+            "3": 2075684352.0,
+            "4": 2078547456.0,
+            "5": 2078547456.0,
+            "6": 2078547456.0,
+            "7": 2078547456.0,
+            "8": 2078547456.0,
+            "9": 2078547456.0,
+            "10": 2078547456.0,
+            "11": 2078547456.0,
+            "12": 2078547456.0,
+            "13": 2078547456.0,
+            "14": 2078547456.0,
+            "15": 2078547456.0,
+            "16": 2078547456.0,
+            "17": 2078547456.0,
+            "18": 2078547456.0,
+            "19": 2078547456.0,
+            "20": 2078547456.0,
+            "21": 2078547456.0,
+            "22": 2078547456.0,
+            "23": 2078547456.0,
+            "24": 2078547456.0,
+            "25": 2078547456.0,
+            "26": 2078547456.0,
+            "27": 2078547456.0,
+            "28": 2078547456.0,
+            "29": 2078547456.0,
+            "30": 2078547456.0,
+            "31": 2078547456.0,
+            "32": 2078547456.0,
+            "33": 2078547456.0,
+            "34": 2078547456.0,
+            "35": 2078547456.0,
+            "36": 2078547456.0,
+            "37": 2078547456.0,
+            "38": 2078547456.0,
+            "39": 2078547456.0,
+            "40": 2078547456.0,
+            "41": 2078547456.0,
+            "42": 2078547456.0,
+            "43": 2078547456.0,
+            "44": 2078547456.0,
+            "45": 2078547456.0,
+            "46": 2078547456.0,
+            "47": 2078547456.0,
+            "48": 2078547456.0,
+            "49": 2078547456.0,
+            "50": 2078547456.0
         }
     },
     "iteration-time": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 13.92506,
-            "2": 0.34079,
-            "3": 0.28891,
-            "4": 0.30652,
-            "5": 0.27326,
-            "6": 0.26908,
-            "7": 0.28337,
-            "8": 0.26429,
-            "9": 0.27048,
-            "10": 0.26866,
-            "11": 0.28689,
-            "12": 0.25961,
-            "13": 0.26511,
-            "14": 0.26065,
-            "15": 0.27834,
-            "16": 0.26398,
-            "17": 0.26064,
-            "18": 0.26661,
-            "19": 0.26487,
-            "20": 0.27686,
-            "21": 0.26249,
-            "22": 0.2677,
-            "23": 0.26859,
-            "24": 0.26049,
-            "25": 0.26086,
-            "26": 0.26279,
-            "27": 0.25983,
-            "28": 0.26561,
-            "29": 0.26345,
-            "30": 0.26142,
-            "31": 0.30613,
-            "32": 0.26049,
-            "33": 0.26142,
-            "34": 0.27278,
-            "35": 0.25691,
-            "36": 0.26151,
-            "37": 0.25654,
-            "38": 0.25753,
-            "39": 0.2576,
-            "40": 0.25839,
-            "41": 0.27219,
-            "42": 0.25851,
-            "43": 0.2668,
-            "44": 0.26229,
-            "45": 0.27182,
-            "46": 0.27691,
-            "47": 0.26299,
-            "48": 0.27152,
-            "49": 0.31513,
-            "50": 0.25813
+            "1": 14.22688,
+            "2": 0.36404,
+            "3": 0.28777,
+            "4": 0.27054,
+            "5": 0.24844,
+            "6": 0.23753,
+            "7": 0.2541,
+            "8": 0.2395,
+            "9": 0.23675,
+            "10": 0.23301,
+            "11": 0.25454,
+            "12": 0.22665,
+            "13": 0.23214,
+            "14": 0.22521,
+            "15": 0.24748,
+            "16": 0.2636,
+            "17": 0.2605,
+            "18": 0.24164,
+            "19": 0.24627,
+            "20": 0.25668,
+            "21": 0.24329,
+            "22": 0.24722,
+            "23": 0.25378,
+            "24": 0.22642,
+            "25": 0.22497,
+            "26": 0.22495,
+            "27": 0.2239,
+            "28": 0.22848,
+            "29": 0.22515,
+            "30": 0.22501,
+            "31": 0.27252,
+            "32": 0.22744,
+            "33": 0.22453,
+            "34": 0.23411,
+            "35": 0.22556,
+            "36": 0.2278,
+            "37": 0.22109,
+            "38": 0.22459,
+            "39": 0.22077,
+            "40": 0.22097,
+            "41": 0.23428,
+            "42": 0.22009,
+            "43": 0.23227,
+            "44": 0.22717,
+            "45": 0.23445,
+            "46": 0.23886,
+            "47": 0.22667,
+            "48": 0.23204,
+            "49": 0.27864,
+            "50": 0.22287
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_lts_dgx_a100.json
index 478bae6fdec..126f22e3d75 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_lts_dgx_a100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_lts_dgx_a100.json
@@ -41,19 +41,19 @@
             "35": 10.31503,
             "36": 10.28536,
             "37": 10.38742,
-            "38": 10.24676,
-            "39": 10.44249,
-            "40": 10.14367,
-            "41": 10.19116,
-            "42": 10.25654,
-            "43": 9.90671,
-            "44": 10.02653,
-            "45": 9.914,
-            "46": 9.89613,
-            "47": 10.18885,
-            "48": 9.92993,
-            "49": 9.61419,
-            "50": 9.97565
+            "38": 10.24674,
+            "39": 10.44222,
+            "40": 10.14384,
+            "41": 10.19169,
+            "42": 10.25683,
+            "43": 9.90704,
+            "44": 10.02666,
+            "45": 9.91412,
+            "46": 9.89643,
+            "47": 10.18881,
+            "48": 9.93025,
+            "49": 9.61398,
+            "50": 9.97515
         }
     },
     "num-zeros": {
@@ -98,19 +98,19 @@
             "35": 15183.0,
             "36": 15837.0,
             "37": 17507.0,
-            "38": 16617.0,
-            "39": 17712.0,
-            "40": 16971.0,
-            "41": 16795.0,
-            "42": 17304.0,
-            "43": 15578.0,
-            "44": 15564.0,
-            "45": 16188.0,
-            "46": 17443.0,
-            "47": 19238.0,
-            "48": 16575.0,
-            "49": 16273.0,
-            "50": 18998.0
+            "38": 16382.0,
+            "39": 18071.0,
+            "40": 16755.0,
+            "41": 16757.0,
+            "42": 17222.0,
+            "43": 15308.0,
+            "44": 15173.0,
+            "45": 16243.0,
+            "46": 17454.0,
+            "47": 19165.0,
+            "48": 16552.0,
+            "49": 16282.0,
+            "50": 19162.0
         }
     },
     "mem-allocated-bytes": {
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 629738496.0,
-            "2": 629705216.0,
-            "3": 629710336.0,
-            "4": 629699584.0,
-            "5": 629732864.0,
-            "6": 629703168.0,
-            "7": 629718528.0,
-            "8": 629722624.0,
-            "9": 629763072.0,
-            "10": 629733888.0,
-            "11": 629810176.0,
-            "12": 629705728.0,
-            "13": 629848576.0,
-            "14": 629702144.0,
-            "15": 629870592.0,
-            "16": 629805568.0,
-            "17": 629698048.0,
-            "18": 629731328.0,
-            "19": 629798912.0,
-            "20": 629752832.0,
-            "21": 629716480.0,
-            "22": 629699584.0,
-            "23": 629705216.0,
-            "24": 629736448.0,
-            "25": 629699584.0,
-            "26": 629736960.0,
-            "27": 629704192.0,
-            "28": 629750272.0,
-            "29": 629728256.0,
-            "30": 629933568.0,
-            "31": 629847040.0,
-            "32": 629700096.0,
-            "33": 629703168.0,
-            "34": 629752832.0,
-            "35": 629725696.0,
-            "36": 629724160.0,
-            "37": 629702656.0,
-            "38": 629704192.0,
-            "39": 629733888.0,
-            "40": 629749760.0,
-            "41": 629700096.0,
-            "42": 629729280.0,
-            "43": 629699072.0,
-            "44": 629769728.0,
-            "45": 629713920.0,
-            "46": 629804544.0,
-            "47": 629719552.0,
-            "48": 629843456.0,
-            "49": 630007296.0,
-            "50": 629703168.0
+            "1": 625695744.0,
+            "2": 625700352.0,
+            "3": 625698304.0,
+            "4": 625694720.0,
+            "5": 625771008.0,
+            "6": 625698304.0,
+            "7": 625713664.0,
+            "8": 625698304.0,
+            "9": 625696768.0,
+            "10": 626427392.0,
+            "11": 626528256.0,
+            "12": 625700864.0,
+            "13": 625701376.0,
+            "14": 625740288.0,
+            "15": 625700864.0,
+            "16": 625891840.0,
+            "17": 625693184.0,
+            "18": 625699840.0,
+            "19": 625699840.0,
+            "20": 625699840.0,
+            "21": 625711616.0,
+            "22": 625694720.0,
+            "23": 626073088.0,
+            "24": 626040832.0,
+            "25": 626703360.0,
+            "26": 625732096.0,
+            "27": 625732096.0,
+            "28": 625745408.0,
+            "29": 625777664.0,
+            "30": 625699328.0,
+            "31": 625959936.0,
+            "32": 625695232.0,
+            "33": 625698304.0,
+            "34": 625747968.0,
+            "35": 625720832.0,
+            "36": 625694720.0,
+            "37": 625883136.0,
+            "38": 625796096.0,
+            "39": 625697280.0,
+            "40": 625727488.0,
+            "41": 625707520.0,
+            "42": 625724416.0,
+            "43": 625731584.0,
+            "44": 625759232.0,
+            "45": 625696256.0,
+            "46": 625780224.0,
+            "47": 625701888.0,
+            "48": 625842688.0,
+            "49": 626536960.0,
+            "50": 625698816.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 1847654400.0,
-            "2": 2077632000.0,
-            "3": 2078750208.0,
-            "4": 2078750208.0,
-            "5": 2079077888.0,
-            "6": 2079077888.0,
-            "7": 2080535040.0,
-            "8": 2080535040.0,
-            "9": 2080535040.0,
-            "10": 2080535040.0,
-            "11": 2080535040.0,
-            "12": 2080535040.0,
-            "13": 2080535040.0,
-            "14": 2080535040.0,
-            "15": 2080535040.0,
-            "16": 2080535040.0,
-            "17": 2080535040.0,
-            "18": 2080535040.0,
-            "19": 2080535040.0,
-            "20": 2080535040.0,
-            "21": 2080535040.0,
-            "22": 2080535040.0,
-            "23": 2080535040.0,
-            "24": 2080535040.0,
-            "25": 2080535040.0,
-            "26": 2080535040.0,
-            "27": 2080535040.0,
-            "28": 2080535040.0,
-            "29": 2080535040.0,
-            "30": 2080535040.0,
-            "31": 2080535040.0,
-            "32": 2080535040.0,
-            "33": 2080535040.0,
-            "34": 2080535040.0,
-            "35": 2080535040.0,
-            "36": 2080535040.0,
-            "37": 2080535040.0,
-            "38": 2080535040.0,
-            "39": 2080535040.0,
-            "40": 2080535040.0,
-            "41": 2080535040.0,
-            "42": 2080535040.0,
-            "43": 2080535040.0,
-            "44": 2080535040.0,
-            "45": 2080535040.0,
-            "46": 2080535040.0,
-            "47": 2080535040.0,
-            "48": 2080535040.0,
-            "49": 2080535040.0,
-            "50": 2080535040.0
+            "1": 1843612672.0,
+            "2": 2073786880.0,
+            "3": 2074433024.0,
+            "4": 2074433024.0,
+            "5": 2074806784.0,
+            "6": 2075118080.0,
+            "7": 2076633600.0,
+            "8": 2076633600.0,
+            "9": 2076633600.0,
+            "10": 2076633600.0,
+            "11": 2076633600.0,
+            "12": 2076633600.0,
+            "13": 2076673536.0,
+            "14": 2076673536.0,
+            "15": 2076673536.0,
+            "16": 2076673536.0,
+            "17": 2076673536.0,
+            "18": 2076673536.0,
+            "19": 2076673536.0,
+            "20": 2076673536.0,
+            "21": 2076673536.0,
+            "22": 2076673536.0,
+            "23": 2076673536.0,
+            "24": 2076673536.0,
+            "25": 2076673536.0,
+            "26": 2076673536.0,
+            "27": 2076673536.0,
+            "28": 2076673536.0,
+            "29": 2076673536.0,
+            "30": 2076673536.0,
+            "31": 2076673536.0,
+            "32": 2076673536.0,
+            "33": 2076673536.0,
+            "34": 2076673536.0,
+            "35": 2076673536.0,
+            "36": 2076673536.0,
+            "37": 2076673536.0,
+            "38": 2076673536.0,
+            "39": 2076673536.0,
+            "40": 2076673536.0,
+            "41": 2076673536.0,
+            "42": 2076673536.0,
+            "43": 2076673536.0,
+            "44": 2076673536.0,
+            "45": 2076673536.0,
+            "46": 2076673536.0,
+            "47": 2076673536.0,
+            "48": 2076673536.0,
+            "49": 2076673536.0,
+            "50": 2076673536.0
         }
     },
     "iteration-time": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 10.46534,
-            "2": 0.4102,
-            "3": 0.34219,
-            "4": 0.34093,
-            "5": 0.34255,
-            "6": 0.33706,
-            "7": 0.33661,
-            "8": 0.33616,
-            "9": 0.33565,
-            "10": 0.33568,
-            "11": 0.33538,
-            "12": 0.33457,
-            "13": 0.33488,
-            "14": 0.33416,
-            "15": 0.33683,
-            "16": 0.33684,
-            "17": 0.33708,
-            "18": 0.33815,
-            "19": 0.33805,
-            "20": 0.33696,
-            "21": 0.33675,
-            "22": 0.33623,
-            "23": 0.33752,
-            "24": 0.33699,
-            "25": 0.3409,
-            "26": 0.33513,
-            "27": 0.33524,
-            "28": 0.33491,
-            "29": 0.33714,
-            "30": 0.33571,
-            "31": 0.33638,
-            "32": 0.33629,
-            "33": 0.3369,
-            "34": 0.33685,
-            "35": 0.33651,
-            "36": 0.33539,
-            "37": 0.33561,
-            "38": 0.33636,
-            "39": 0.33558,
-            "40": 0.3356,
-            "41": 0.33618,
-            "42": 0.33669,
-            "43": 0.33535,
-            "44": 0.3362,
-            "45": 0.3354,
-            "46": 0.33686,
-            "47": 0.33486,
-            "48": 0.33657,
-            "49": 0.33563,
-            "50": 0.33513
+            "1": 6.70836,
+            "2": 0.3903,
+            "3": 0.34658,
+            "4": 0.33174,
+            "5": 0.33024,
+            "6": 0.32826,
+            "7": 0.32764,
+            "8": 0.32869,
+            "9": 0.32788,
+            "10": 0.3286,
+            "11": 0.32808,
+            "12": 0.33088,
+            "13": 0.32722,
+            "14": 0.32709,
+            "15": 0.32599,
+            "16": 0.32627,
+            "17": 0.32568,
+            "18": 0.32553,
+            "19": 0.32587,
+            "20": 0.32614,
+            "21": 0.32643,
+            "22": 0.32599,
+            "23": 0.32625,
+            "24": 0.32672,
+            "25": 0.32482,
+            "26": 0.32493,
+            "27": 0.32669,
+            "28": 0.32628,
+            "29": 0.32713,
+            "30": 0.32658,
+            "31": 0.32584,
+            "32": 0.32655,
+            "33": 0.3257,
+            "34": 0.32557,
+            "35": 0.3265,
+            "36": 0.32561,
+            "37": 0.32526,
+            "38": 0.32485,
+            "39": 0.32759,
+            "40": 0.32685,
+            "41": 0.32691,
+            "42": 0.32612,
+            "43": 0.32555,
+            "44": 0.32643,
+            "45": 0.32699,
+            "46": 0.32711,
+            "47": 0.32611,
+            "48": 0.32765,
+            "49": 0.32669,
+            "50": 0.32485
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_muon/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_muon/golden_values_dev_dgx_h100.json
index 13bfff6c765..197eda568d8 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_muon/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_muon/golden_values_dev_dgx_h100.json
@@ -4,106 +4,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 10.81131,
-            "2": 10.83052,
-            "3": 10.82065,
-            "4": 10.81318,
-            "5": 10.84363,
-            "6": 10.84747,
-            "7": 10.85338,
-            "8": 10.83667,
-            "9": 10.8468,
-            "10": 10.7825,
-            "11": 10.85216,
-            "12": 10.86296,
-            "13": 10.85469,
-            "14": 10.88433,
-            "15": 10.87748,
-            "16": 10.84698,
-            "17": 10.83109,
+            "1": 10.81103,
+            "2": 10.83065,
+            "3": 10.82048,
+            "4": 10.81293,
+            "5": 10.84375,
+            "6": 10.8473,
+            "7": 10.85341,
+            "8": 10.83649,
+            "9": 10.84696,
+            "10": 10.78181,
+            "11": 10.85157,
+            "12": 10.86354,
+            "13": 10.85392,
+            "14": 10.88443,
+            "15": 10.87738,
+            "16": 10.84647,
+            "17": 10.83081,
             "18": 10.86619,
-            "19": 10.84965,
-            "20": 10.84503,
-            "21": 10.84788,
-            "22": 10.79628,
-            "23": 10.88209,
-            "24": 10.83272,
-            "25": 10.82407,
-            "26": 10.84275,
-            "27": 10.85284,
-            "28": 10.87701,
-            "29": 10.8644,
-            "30": 10.81288,
-            "31": 10.78708,
-            "32": 10.85504,
-            "33": 10.85616,
-            "34": 10.84955,
-            "35": 10.83713,
-            "36": 10.80378,
-            "37": 10.83848,
-            "38": 10.80562,
-            "39": 10.8422,
-            "40": 10.80302,
-            "41": 10.84057,
-            "42": 10.84402,
-            "43": 10.81002,
-            "44": 10.80246,
-            "45": 10.78649,
-            "46": 10.80799,
-            "47": 10.817,
-            "48": 10.80324,
-            "49": 10.78157,
-            "50": 10.80218,
-            "51": 10.82262,
-            "52": 10.80415,
-            "53": 10.83258,
-            "54": 10.81542,
-            "55": 10.82524,
-            "56": 10.77667,
-            "57": 10.75278,
-            "58": 10.8075,
-            "59": 10.79063,
-            "60": 10.73975,
-            "61": 10.79974,
-            "62": 10.81288,
-            "63": 10.72014,
-            "64": 10.78563,
-            "65": 10.68987,
-            "66": 10.76119,
-            "67": 10.73431,
-            "68": 10.80192,
-            "69": 10.78336,
-            "70": 10.77619,
-            "71": 10.76644,
-            "72": 10.73613,
-            "73": 10.72971,
-            "74": 10.62238,
-            "75": 10.69054,
-            "76": 10.65471,
-            "77": 10.82153,
-            "78": 10.76381,
-            "79": 10.705,
-            "80": 10.69388,
-            "81": 10.72432,
-            "82": 10.74257,
-            "83": 10.66783,
-            "84": 10.69845,
-            "85": 10.71465,
-            "86": 10.63873,
-            "87": 10.71762,
-            "88": 10.73506,
-            "89": 10.71394,
-            "90": 10.74649,
-            "91": 10.64881,
-            "92": 10.64684,
-            "93": 10.60201,
-            "94": 10.53283,
-            "95": 10.66127,
-            "96": 10.67245,
-            "97": 10.61405,
-            "98": 10.68482,
-            "99": 10.52006,
-            "100": 10.61575
+            "19": 10.84941,
+            "20": 10.84533,
+            "21": 10.84772,
+            "22": 10.79615,
+            "23": 10.88259,
+            "24": 10.83337,
+            "25": 10.82488,
+            "26": 10.84313,
+            "27": 10.85316,
+            "28": 10.87689,
+            "29": 10.86377,
+            "30": 10.81302,
+            "31": 10.78697,
+            "32": 10.85497,
+            "33": 10.85651,
+            "34": 10.849,
+            "35": 10.83725,
+            "36": 10.80381,
+            "37": 10.83835,
+            "38": 10.8051,
+            "39": 10.84122,
+            "40": 10.80292,
+            "41": 10.8407,
+            "42": 10.84416,
+            "43": 10.80995,
+            "44": 10.80279,
+            "45": 10.7866,
+            "46": 10.80814,
+            "47": 10.81723,
+            "48": 10.80288,
+            "49": 10.78144,
+            "50": 10.80226,
+            "51": 10.8227,
+            "52": 10.80372,
+            "53": 10.83318,
+            "54": 10.81535,
+            "55": 10.8256,
+            "56": 10.77729,
+            "57": 10.75246,
+            "58": 10.80818,
+            "59": 10.7909,
+            "60": 10.74009,
+            "61": 10.79938,
+            "62": 10.81291,
+            "63": 10.7204,
+            "64": 10.78529,
+            "65": 10.68966,
+            "66": 10.76117,
+            "67": 10.73412,
+            "68": 10.80256,
+            "69": 10.7832,
+            "70": 10.77682,
+            "71": 10.76728,
+            "72": 10.73575,
+            "73": 10.72932,
+            "74": 10.62223,
+            "75": 10.69036,
+            "76": 10.65459,
+            "77": 10.8217,
+            "78": 10.76362,
+            "79": 10.70431,
+            "80": 10.69382,
+            "81": 10.72448,
+            "82": 10.74183,
+            "83": 10.66825,
+            "84": 10.69817,
+            "85": 10.71449,
+            "86": 10.63898,
+            "87": 10.7181,
+            "88": 10.73512,
+            "89": 10.71387,
+            "90": 10.74622,
+            "91": 10.64935,
+            "92": 10.64642,
+            "93": 10.60191,
+            "94": 10.53277,
+            "95": 10.66125,
+            "96": 10.67241,
+            "97": 10.61414,
+            "98": 10.68493,
+            "99": 10.51994,
+            "100": 10.61532
         }
     },
     "num-zeros": {
@@ -111,106 +111,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 1216.0,
-            "2": 1361.0,
-            "3": 1290.0,
-            "4": 1255.0,
-            "5": 1433.0,
-            "6": 1548.0,
-            "7": 1277.0,
-            "8": 1340.0,
-            "9": 1318.0,
-            "10": 1284.0,
-            "11": 1307.0,
-            "12": 1174.0,
-            "13": 1268.0,
-            "14": 1421.0,
-            "15": 1220.0,
-            "16": 1229.0,
-            "17": 1346.0,
-            "18": 1311.0,
-            "19": 1252.0,
-            "20": 1273.0,
-            "21": 1283.0,
-            "22": 1145.0,
-            "23": 1454.0,
-            "24": 1348.0,
-            "25": 1258.0,
-            "26": 1212.0,
-            "27": 1343.0,
-            "28": 1389.0,
-            "29": 1282.0,
-            "30": 1203.0,
-            "31": 1152.0,
-            "32": 1244.0,
-            "33": 1290.0,
-            "34": 1082.0,
-            "35": 1176.0,
-            "36": 1168.0,
+            "1": 1248.0,
+            "2": 1334.0,
+            "3": 1297.0,
+            "4": 1215.0,
+            "5": 1398.0,
+            "6": 1528.0,
+            "7": 1225.0,
+            "8": 1318.0,
+            "9": 1310.0,
+            "10": 1321.0,
+            "11": 1324.0,
+            "12": 1240.0,
+            "13": 1294.0,
+            "14": 1467.0,
+            "15": 1268.0,
+            "16": 1250.0,
+            "17": 1358.0,
+            "18": 1315.0,
+            "19": 1243.0,
+            "20": 1257.0,
+            "21": 1227.0,
+            "22": 1182.0,
+            "23": 1417.0,
+            "24": 1332.0,
+            "25": 1281.0,
+            "26": 1209.0,
+            "27": 1318.0,
+            "28": 1410.0,
+            "29": 1295.0,
+            "30": 1234.0,
+            "31": 1108.0,
+            "32": 1299.0,
+            "33": 1298.0,
+            "34": 1116.0,
+            "35": 1213.0,
+            "36": 1208.0,
             "37": 1242.0,
-            "38": 1316.0,
-            "39": 1589.0,
-            "40": 1218.0,
-            "41": 1391.0,
-            "42": 1137.0,
-            "43": 1234.0,
-            "44": 1265.0,
-            "45": 1194.0,
-            "46": 1124.0,
-            "47": 1300.0,
-            "48": 1102.0,
-            "49": 1124.0,
-            "50": 1211.0,
-            "51": 1266.0,
-            "52": 1269.0,
-            "53": 1355.0,
-            "54": 1212.0,
-            "55": 1137.0,
-            "56": 1313.0,
-            "57": 1288.0,
-            "58": 1341.0,
-            "59": 1261.0,
-            "60": 1287.0,
-            "61": 1139.0,
-            "62": 1205.0,
-            "63": 1265.0,
-            "64": 1350.0,
-            "65": 1195.0,
-            "66": 1207.0,
-            "67": 1121.0,
-            "68": 1212.0,
-            "69": 1335.0,
-            "70": 1356.0,
-            "71": 1316.0,
-            "72": 1232.0,
-            "73": 1121.0,
-            "74": 1130.0,
-            "75": 1295.0,
-            "76": 1335.0,
-            "77": 1371.0,
-            "78": 1336.0,
-            "79": 1042.0,
-            "80": 1149.0,
-            "81": 1117.0,
-            "82": 1202.0,
-            "83": 1289.0,
-            "84": 1140.0,
-            "85": 1323.0,
-            "86": 1219.0,
-            "87": 1219.0,
-            "88": 1221.0,
-            "89": 1294.0,
-            "90": 1402.0,
-            "91": 1197.0,
-            "92": 1269.0,
-            "93": 1106.0,
-            "94": 960.0,
-            "95": 1192.0,
-            "96": 1253.0,
-            "97": 1148.0,
-            "98": 1218.0,
-            "99": 1273.0,
-            "100": 1249.0
+            "38": 1382.0,
+            "39": 1531.0,
+            "40": 1195.0,
+            "41": 1382.0,
+            "42": 1173.0,
+            "43": 1189.0,
+            "44": 1215.0,
+            "45": 1175.0,
+            "46": 1207.0,
+            "47": 1372.0,
+            "48": 1158.0,
+            "49": 1223.0,
+            "50": 1257.0,
+            "51": 1219.0,
+            "52": 1236.0,
+            "53": 1343.0,
+            "54": 1286.0,
+            "55": 1103.0,
+            "56": 1299.0,
+            "57": 1212.0,
+            "58": 1379.0,
+            "59": 1235.0,
+            "60": 1210.0,
+            "61": 1159.0,
+            "62": 1203.0,
+            "63": 1219.0,
+            "64": 1239.0,
+            "65": 1245.0,
+            "66": 1153.0,
+            "67": 1210.0,
+            "68": 1206.0,
+            "69": 1315.0,
+            "70": 1342.0,
+            "71": 1288.0,
+            "72": 1171.0,
+            "73": 1182.0,
+            "74": 1093.0,
+            "75": 1300.0,
+            "76": 1341.0,
+            "77": 1369.0,
+            "78": 1286.0,
+            "79": 1111.0,
+            "80": 1189.0,
+            "81": 1205.0,
+            "82": 1269.0,
+            "83": 1293.0,
+            "84": 1145.0,
+            "85": 1251.0,
+            "86": 1191.0,
+            "87": 1179.0,
+            "88": 1294.0,
+            "89": 1265.0,
+            "90": 1314.0,
+            "91": 1175.0,
+            "92": 1286.0,
+            "93": 1100.0,
+            "94": 969.0,
+            "95": 1204.0,
+            "96": 1241.0,
+            "97": 1163.0,
+            "98": 1205.0,
+            "99": 1291.0,
+            "100": 1214.0
         }
     },
     "mem-allocated-bytes": {
@@ -218,106 +218,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 994082816.0,
-            "2": 994053120.0,
-            "3": 994100224.0,
-            "4": 994081280.0,
-            "5": 994103808.0,
-            "6": 994043392.0,
-            "7": 994066944.0,
-            "8": 994074112.0,
-            "9": 994091008.0,
-            "10": 994104320.0,
-            "11": 994077696.0,
-            "12": 994044416.0,
-            "13": 994100736.0,
-            "14": 994012160.0,
-            "15": 994057216.0,
-            "16": 993989120.0,
-            "17": 994107904.0,
-            "18": 994082304.0,
-            "19": 994089472.0,
-            "20": 994008064.0,
-            "21": 994033152.0,
-            "22": 994105344.0,
-            "23": 994081280.0,
-            "24": 994021888.0,
-            "25": 994152960.0,
-            "26": 994058752.0,
-            "27": 994118144.0,
-            "28": 994044416.0,
-            "29": 994075648.0,
-            "30": 994039296.0,
-            "31": 994107392.0,
-            "32": 994037760.0,
-            "33": 994046976.0,
-            "34": 994015232.0,
-            "35": 994064384.0,
-            "36": 994078208.0,
-            "37": 994037248.0,
-            "38": 994120192.0,
-            "39": 994128896.0,
-            "40": 994016768.0,
-            "41": 994044928.0,
-            "42": 994063872.0,
-            "43": 994075648.0,
-            "44": 994180096.0,
-            "45": 994053632.0,
-            "46": 994070016.0,
-            "47": 994091520.0,
-            "48": 994076672.0,
-            "49": 994042368.0,
-            "50": 994061312.0,
-            "51": 994132992.0,
-            "52": 994076160.0,
-            "53": 994139136.0,
-            "54": 994086400.0,
-            "55": 994076160.0,
-            "56": 994066944.0,
-            "57": 994113536.0,
-            "58": 994111488.0,
-            "59": 994096128.0,
-            "60": 994060288.0,
-            "61": 994060800.0,
-            "62": 994054656.0,
-            "63": 994068992.0,
-            "64": 994058752.0,
-            "65": 994064896.0,
-            "66": 994074624.0,
-            "67": 994061824.0,
-            "68": 994071552.0,
-            "69": 994058240.0,
-            "70": 994103808.0,
-            "71": 994077184.0,
-            "72": 994002944.0,
-            "73": 994104320.0,
-            "74": 994116608.0,
-            "75": 994081792.0,
-            "76": 994104320.0,
-            "77": 994054656.0,
-            "78": 994114048.0,
-            "79": 994085376.0,
-            "80": 994039296.0,
-            "81": 994073600.0,
-            "82": 994020864.0,
-            "83": 994123776.0,
-            "84": 994103296.0,
-            "85": 994070528.0,
-            "86": 994070016.0,
-            "87": 994093056.0,
-            "88": 994079232.0,
-            "89": 994066432.0,
-            "90": 994060800.0,
-            "91": 994116096.0,
-            "92": 994098176.0,
-            "93": 994076672.0,
-            "94": 994083840.0,
-            "95": 994082816.0,
-            "96": 994086400.0,
-            "97": 994094080.0,
-            "98": 994070016.0,
-            "99": 994088448.0,
-            "100": 994124800.0
+            "1": 994066432.0,
+            "2": 994036224.0,
+            "3": 994083840.0,
+            "4": 994063872.0,
+            "5": 994086912.0,
+            "6": 994028032.0,
+            "7": 994051072.0,
+            "8": 994058752.0,
+            "9": 994072576.0,
+            "10": 994086912.0,
+            "11": 994060800.0,
+            "12": 994029056.0,
+            "13": 994085888.0,
+            "14": 993994240.0,
+            "15": 994040832.0,
+            "16": 993971712.0,
+            "17": 994093568.0,
+            "18": 994065920.0,
+            "19": 994073088.0,
+            "20": 993993216.0,
+            "21": 994013184.0,
+            "22": 994089472.0,
+            "23": 994065408.0,
+            "24": 994004992.0,
+            "25": 994137600.0,
+            "26": 994042880.0,
+            "27": 994099712.0,
+            "28": 994027520.0,
+            "29": 994059776.0,
+            "30": 994023936.0,
+            "31": 994087936.0,
+            "32": 994022400.0,
+            "33": 994032640.0,
+            "34": 993997312.0,
+            "35": 994046976.0,
+            "36": 994061824.0,
+            "37": 994019840.0,
+            "38": 994102784.0,
+            "39": 994113536.0,
+            "40": 994000384.0,
+            "41": 994028544.0,
+            "42": 994046464.0,
+            "43": 994057728.0,
+            "44": 994161664.0,
+            "45": 994034176.0,
+            "46": 994053120.0,
+            "47": 994075648.0,
+            "48": 994058240.0,
+            "49": 994025472.0,
+            "50": 994043392.0,
+            "51": 994117120.0,
+            "52": 994060800.0,
+            "53": 994122752.0,
+            "54": 994071040.0,
+            "55": 994060800.0,
+            "56": 994049536.0,
+            "57": 994097152.0,
+            "58": 994092544.0,
+            "59": 994078720.0,
+            "60": 994044928.0,
+            "61": 994045440.0,
+            "62": 994039808.0,
+            "63": 994052608.0,
+            "64": 994041856.0,
+            "65": 994048000.0,
+            "66": 994055680.0,
+            "67": 994045440.0,
+            "68": 994053120.0,
+            "69": 994042368.0,
+            "70": 994087424.0,
+            "71": 994061312.0,
+            "72": 993986560.0,
+            "73": 994088448.0,
+            "74": 994099200.0,
+            "75": 994067456.0,
+            "76": 994084864.0,
+            "77": 994039808.0,
+            "78": 994094080.0,
+            "79": 994071040.0,
+            "80": 994024960.0,
+            "81": 994057728.0,
+            "82": 994005504.0,
+            "83": 994106880.0,
+            "84": 994085888.0,
+            "85": 994054144.0,
+            "86": 994055168.0,
+            "87": 994075648.0,
+            "88": 994062336.0,
+            "89": 994051584.0,
+            "90": 994043392.0,
+            "91": 994097664.0,
+            "92": 994082304.0,
+            "93": 994058752.0,
+            "94": 994066944.0,
+            "95": 994068992.0,
+            "96": 994066944.0,
+            "97": 994078208.0,
+            "98": 994054144.0,
+            "99": 994071552.0,
+            "100": 994109952.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -325,106 +325,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 3209166336.0,
-            "2": 3482067456.0,
-            "3": 3514878464.0,
-            "4": 3514878464.0,
-            "5": 3515977728.0,
-            "6": 3515977728.0,
-            "7": 3515977728.0,
-            "8": 3515977728.0,
-            "9": 3515977728.0,
-            "10": 3519236608.0,
-            "11": 3519236608.0,
-            "12": 3519236608.0,
-            "13": 3519236608.0,
-            "14": 3519236608.0,
-            "15": 3519236608.0,
-            "16": 3519236608.0,
-            "17": 3519236608.0,
-            "18": 3519236608.0,
-            "19": 3519236608.0,
-            "20": 3519236608.0,
-            "21": 3519236608.0,
-            "22": 3519236608.0,
-            "23": 3519236608.0,
-            "24": 3519236608.0,
-            "25": 3549031424.0,
-            "26": 3549031424.0,
-            "27": 3549031424.0,
-            "28": 3549031424.0,
-            "29": 3549031424.0,
-            "30": 3549031424.0,
-            "31": 3549031424.0,
-            "32": 3549031424.0,
-            "33": 3549031424.0,
-            "34": 3549031424.0,
-            "35": 3549031424.0,
-            "36": 3549031424.0,
-            "37": 3549031424.0,
-            "38": 3549031424.0,
-            "39": 3549031424.0,
-            "40": 3549031424.0,
-            "41": 3549031424.0,
-            "42": 3549031424.0,
-            "43": 3549031424.0,
-            "44": 3560927744.0,
-            "45": 3560927744.0,
-            "46": 3560927744.0,
-            "47": 3560927744.0,
-            "48": 3560927744.0,
-            "49": 3560927744.0,
-            "50": 3560927744.0,
-            "51": 3560927744.0,
-            "52": 3560927744.0,
-            "53": 3560927744.0,
-            "54": 3560927744.0,
-            "55": 3560927744.0,
-            "56": 3560927744.0,
-            "57": 3560927744.0,
-            "58": 3560927744.0,
-            "59": 3560927744.0,
-            "60": 3560927744.0,
-            "61": 3560927744.0,
-            "62": 3560927744.0,
-            "63": 3560927744.0,
-            "64": 3560927744.0,
-            "65": 3560927744.0,
-            "66": 3560927744.0,
-            "67": 3560927744.0,
-            "68": 3560927744.0,
-            "69": 3560927744.0,
-            "70": 3560927744.0,
-            "71": 3560927744.0,
-            "72": 3560927744.0,
-            "73": 3560927744.0,
-            "74": 3560927744.0,
-            "75": 3560927744.0,
-            "76": 3560927744.0,
-            "77": 3560927744.0,
-            "78": 3560927744.0,
-            "79": 3560927744.0,
-            "80": 3560927744.0,
-            "81": 3560927744.0,
-            "82": 3560927744.0,
-            "83": 3560927744.0,
-            "84": 3560927744.0,
-            "85": 3560927744.0,
-            "86": 3560927744.0,
-            "87": 3560927744.0,
-            "88": 3560927744.0,
-            "89": 3560927744.0,
-            "90": 3560927744.0,
-            "91": 3560927744.0,
-            "92": 3560927744.0,
-            "93": 3560927744.0,
-            "94": 3560927744.0,
-            "95": 3560927744.0,
-            "96": 3560927744.0,
-            "97": 3560927744.0,
-            "98": 3560927744.0,
-            "99": 3560927744.0,
-            "100": 3560927744.0
+            "1": 3209309696.0,
+            "2": 3480903680.0,
+            "3": 3511780864.0,
+            "4": 3511780864.0,
+            "5": 3517387264.0,
+            "6": 3517387264.0,
+            "7": 3517387264.0,
+            "8": 3517387264.0,
+            "9": 3517387264.0,
+            "10": 3517387264.0,
+            "11": 3517387264.0,
+            "12": 3517387264.0,
+            "13": 3517387264.0,
+            "14": 3517387264.0,
+            "15": 3517387264.0,
+            "16": 3517387264.0,
+            "17": 3518340096.0,
+            "18": 3518340096.0,
+            "19": 3518340096.0,
+            "20": 3518340096.0,
+            "21": 3518340096.0,
+            "22": 3518340096.0,
+            "23": 3518340096.0,
+            "24": 3518340096.0,
+            "25": 3547281408.0,
+            "26": 3547281408.0,
+            "27": 3547281408.0,
+            "28": 3547281408.0,
+            "29": 3547281408.0,
+            "30": 3547281408.0,
+            "31": 3547281408.0,
+            "32": 3547281408.0,
+            "33": 3547281408.0,
+            "34": 3547281408.0,
+            "35": 3547281408.0,
+            "36": 3547281408.0,
+            "37": 3547281408.0,
+            "38": 3547281408.0,
+            "39": 3547281408.0,
+            "40": 3547281408.0,
+            "41": 3547281408.0,
+            "42": 3547281408.0,
+            "43": 3547281408.0,
+            "44": 3565241856.0,
+            "45": 3565241856.0,
+            "46": 3565241856.0,
+            "47": 3565241856.0,
+            "48": 3565241856.0,
+            "49": 3565241856.0,
+            "50": 3565241856.0,
+            "51": 3565241856.0,
+            "52": 3565241856.0,
+            "53": 3565241856.0,
+            "54": 3565241856.0,
+            "55": 3565241856.0,
+            "56": 3565241856.0,
+            "57": 3565241856.0,
+            "58": 3565241856.0,
+            "59": 3565241856.0,
+            "60": 3565241856.0,
+            "61": 3565241856.0,
+            "62": 3565241856.0,
+            "63": 3565241856.0,
+            "64": 3565241856.0,
+            "65": 3565241856.0,
+            "66": 3565241856.0,
+            "67": 3565241856.0,
+            "68": 3565241856.0,
+            "69": 3565241856.0,
+            "70": 3565241856.0,
+            "71": 3565241856.0,
+            "72": 3565241856.0,
+            "73": 3565241856.0,
+            "74": 3565241856.0,
+            "75": 3565241856.0,
+            "76": 3565241856.0,
+            "77": 3565241856.0,
+            "78": 3565241856.0,
+            "79": 3565241856.0,
+            "80": 3565241856.0,
+            "81": 3565241856.0,
+            "82": 3565241856.0,
+            "83": 3565241856.0,
+            "84": 3565241856.0,
+            "85": 3565241856.0,
+            "86": 3565241856.0,
+            "87": 3565241856.0,
+            "88": 3565241856.0,
+            "89": 3565241856.0,
+            "90": 3565241856.0,
+            "91": 3565241856.0,
+            "92": 3565241856.0,
+            "93": 3565241856.0,
+            "94": 3565241856.0,
+            "95": 3565241856.0,
+            "96": 3565241856.0,
+            "97": 3565241856.0,
+            "98": 3565241856.0,
+            "99": 3565241856.0,
+            "100": 3565241856.0
         }
     },
     "iteration-time": {
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 12.93942,
-            "2": 0.24599,
-            "3": 0.18905,
-            "4": 0.15958,
-            "5": 0.17376,
-            "6": 0.15827,
-            "7": 0.1625,
-            "8": 0.15602,
-            "9": 0.14535,
-            "10": 0.15058,
-            "11": 0.15764,
-            "12": 0.14977,
-            "13": 0.14045,
-            "14": 0.14809,
-            "15": 0.14641,
-            "16": 0.14226,
-            "17": 0.14811,
-            "18": 0.14049,
-            "19": 0.14226,
-            "20": 0.14343,
-            "21": 0.13924,
-            "22": 0.13727,
-            "23": 0.14079,
-            "24": 0.13602,
-            "25": 0.1322,
-            "26": 0.14315,
-            "27": 0.1347,
-            "28": 0.13221,
-            "29": 0.14595,
-            "30": 0.13083,
-            "31": 0.13326,
-            "32": 0.14065,
-            "33": 0.1383,
-            "34": 0.12953,
-            "35": 0.12541,
-            "36": 0.13129,
-            "37": 0.13317,
-            "38": 0.13535,
-            "39": 0.14664,
-            "40": 0.13368,
-            "41": 0.13115,
-            "42": 0.13308,
-            "43": 0.14022,
-            "44": 0.12946,
-            "45": 0.134,
-            "46": 0.12714,
-            "47": 0.13354,
-            "48": 0.13449,
-            "49": 0.13041,
-            "50": 0.13278,
-            "51": 0.14094,
-            "52": 0.12708,
-            "53": 0.13344,
-            "54": 0.13202,
-            "55": 0.13136,
-            "56": 0.13508,
-            "57": 0.13876,
-            "58": 0.13736,
-            "59": 0.12763,
-            "60": 0.13185,
-            "61": 0.12865,
-            "62": 0.13343,
-            "63": 0.13403,
-            "64": 0.12891,
-            "65": 0.13097,
-            "66": 0.12741,
-            "67": 0.13812,
-            "68": 0.13131,
-            "69": 0.13389,
-            "70": 0.13833,
-            "71": 0.12822,
-            "72": 0.12851,
-            "73": 0.13747,
-            "74": 0.13403,
-            "75": 0.12846,
-            "76": 0.13178,
-            "77": 0.12922,
-            "78": 0.12906,
-            "79": 0.12676,
-            "80": 0.13361,
-            "81": 0.12867,
-            "82": 0.1295,
-            "83": 0.12961,
-            "84": 0.12795,
-            "85": 0.13547,
-            "86": 0.13067,
-            "87": 0.13455,
-            "88": 0.13573,
-            "89": 0.12632,
-            "90": 0.13428,
-            "91": 0.13373,
-            "92": 0.12985,
-            "93": 0.1291,
-            "94": 0.12972,
-            "95": 0.13089,
-            "96": 0.13658,
-            "97": 0.12767,
-            "98": 0.14125,
-            "99": 0.13279,
-            "100": 0.12715
+            "1": 10.4734,
+            "2": 0.22466,
+            "3": 0.19051,
+            "4": 0.16936,
+            "5": 0.17686,
+            "6": 0.15785,
+            "7": 0.16819,
+            "8": 0.15689,
+            "9": 0.15169,
+            "10": 0.15121,
+            "11": 0.15857,
+            "12": 0.15775,
+            "13": 0.15107,
+            "14": 0.19276,
+            "15": 0.1585,
+            "16": 0.14844,
+            "17": 0.14326,
+            "18": 0.13869,
+            "19": 0.1396,
+            "20": 0.15448,
+            "21": 0.139,
+            "22": 0.13512,
+            "23": 0.1426,
+            "24": 0.13221,
+            "25": 0.13685,
+            "26": 0.1411,
+            "27": 0.13181,
+            "28": 0.1391,
+            "29": 0.15621,
+            "30": 0.13616,
+            "31": 0.14287,
+            "32": 0.14647,
+            "33": 0.13884,
+            "34": 0.137,
+            "35": 0.13475,
+            "36": 0.13916,
+            "37": 0.14264,
+            "38": 0.13664,
+            "39": 0.14359,
+            "40": 0.13821,
+            "41": 0.13468,
+            "42": 0.1363,
+            "43": 0.13569,
+            "44": 0.13933,
+            "45": 0.13715,
+            "46": 0.12697,
+            "47": 0.13407,
+            "48": 0.13274,
+            "49": 0.13757,
+            "50": 0.13925,
+            "51": 0.14105,
+            "52": 0.1341,
+            "53": 0.5448,
+            "54": 0.13151,
+            "55": 0.13522,
+            "56": 0.13665,
+            "57": 0.13286,
+            "58": 0.13453,
+            "59": 0.12754,
+            "60": 0.1357,
+            "61": 0.53562,
+            "62": 0.13254,
+            "63": 0.13398,
+            "64": 0.12882,
+            "65": 0.13897,
+            "66": 0.13313,
+            "67": 0.12905,
+            "68": 0.13433,
+            "69": 0.13542,
+            "70": 0.13311,
+            "71": 0.12876,
+            "72": 0.12973,
+            "73": 0.12733,
+            "74": 0.13423,
+            "75": 0.12883,
+            "76": 0.13263,
+            "77": 0.13959,
+            "78": 0.13036,
+            "79": 0.12628,
+            "80": 0.13369,
+            "81": 0.13323,
+            "82": 0.13,
+            "83": 0.13277,
+            "84": 0.12856,
+            "85": 0.13675,
+            "86": 0.13342,
+            "87": 0.13516,
+            "88": 0.13259,
+            "89": 0.13162,
+            "90": 0.14614,
+            "91": 0.13534,
+            "92": 0.1265,
+            "93": 0.12755,
+            "94": 0.12676,
+            "95": 0.12846,
+            "96": 0.13404,
+            "97": 0.12623,
+            "98": 0.13489,
+            "99": 0.13377,
+            "100": 0.12824
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_muon/golden_values_dev_dgx_h100_2nd.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_muon/golden_values_dev_dgx_h100_2nd.json
new file mode 100644
index 00000000000..bc235c4dfa5
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_muon/golden_values_dev_dgx_h100_2nd.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 10.8227,
+            "52": 10.80372,
+            "53": 10.83318,
+            "54": 10.81535,
+            "55": 10.8256,
+            "56": 10.77729,
+            "57": 10.75246,
+            "58": 10.80818,
+            "59": 10.7909,
+            "60": 10.74009,
+            "61": 10.79938,
+            "62": 10.81291,
+            "63": 10.7204,
+            "64": 10.78529,
+            "65": 10.68966,
+            "66": 10.76117,
+            "67": 10.73412,
+            "68": 10.80256,
+            "69": 10.7832,
+            "70": 10.77682,
+            "71": 10.76728,
+            "72": 10.73575,
+            "73": 10.72932,
+            "74": 10.62223,
+            "75": 10.69036,
+            "76": 10.65459,
+            "77": 10.8217,
+            "78": 10.76362,
+            "79": 10.70431,
+            "80": 10.69382,
+            "81": 10.72448,
+            "82": 10.74183,
+            "83": 10.66825,
+            "84": 10.69817,
+            "85": 10.71449,
+            "86": 10.63898,
+            "87": 10.7181,
+            "88": 10.73512,
+            "89": 10.71387,
+            "90": 10.74622,
+            "91": 10.64935,
+            "92": 10.64642,
+            "93": 10.60191,
+            "94": 10.53277,
+            "95": 10.66125,
+            "96": 10.67241,
+            "97": 10.61414,
+            "98": 10.68493,
+            "99": 10.51994,
+            "100": 10.61532
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 1219.0,
+            "52": 1236.0,
+            "53": 1343.0,
+            "54": 1286.0,
+            "55": 1103.0,
+            "56": 1299.0,
+            "57": 1212.0,
+            "58": 1379.0,
+            "59": 1235.0,
+            "60": 1210.0,
+            "61": 1159.0,
+            "62": 1203.0,
+            "63": 1219.0,
+            "64": 1239.0,
+            "65": 1245.0,
+            "66": 1153.0,
+            "67": 1210.0,
+            "68": 1206.0,
+            "69": 1315.0,
+            "70": 1342.0,
+            "71": 1288.0,
+            "72": 1171.0,
+            "73": 1182.0,
+            "74": 1093.0,
+            "75": 1300.0,
+            "76": 1341.0,
+            "77": 1369.0,
+            "78": 1286.0,
+            "79": 1111.0,
+            "80": 1189.0,
+            "81": 1205.0,
+            "82": 1269.0,
+            "83": 1293.0,
+            "84": 1145.0,
+            "85": 1251.0,
+            "86": 1191.0,
+            "87": 1179.0,
+            "88": 1294.0,
+            "89": 1265.0,
+            "90": 1314.0,
+            "91": 1175.0,
+            "92": 1286.0,
+            "93": 1100.0,
+            "94": 969.0,
+            "95": 1204.0,
+            "96": 1241.0,
+            "97": 1163.0,
+            "98": 1205.0,
+            "99": 1291.0,
+            "100": 1214.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 994116096.0,
+            "52": 994060800.0,
+            "53": 994122752.0,
+            "54": 994071040.0,
+            "55": 994060800.0,
+            "56": 994049536.0,
+            "57": 994097152.0,
+            "58": 994092544.0,
+            "59": 994078720.0,
+            "60": 994044928.0,
+            "61": 994045440.0,
+            "62": 994039808.0,
+            "63": 994052608.0,
+            "64": 994041856.0,
+            "65": 994048000.0,
+            "66": 994055680.0,
+            "67": 994045440.0,
+            "68": 994053120.0,
+            "69": 994042368.0,
+            "70": 994087424.0,
+            "71": 994061312.0,
+            "72": 993986560.0,
+            "73": 994088448.0,
+            "74": 994099200.0,
+            "75": 994067456.0,
+            "76": 994084864.0,
+            "77": 994039808.0,
+            "78": 994094080.0,
+            "79": 994071040.0,
+            "80": 994024960.0,
+            "81": 994057728.0,
+            "82": 994005504.0,
+            "83": 994106880.0,
+            "84": 994085888.0,
+            "85": 994054144.0,
+            "86": 994055168.0,
+            "87": 994075648.0,
+            "88": 994062336.0,
+            "89": 994051584.0,
+            "90": 994043392.0,
+            "91": 994097664.0,
+            "92": 994082304.0,
+            "93": 994058752.0,
+            "94": 994066944.0,
+            "95": 994068992.0,
+            "96": 994066944.0,
+            "97": 994078208.0,
+            "98": 994054144.0,
+            "99": 994071552.0,
+            "100": 994109952.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 3502329856.0,
+            "52": 3502329856.0,
+            "53": 3537698304.0,
+            "54": 3537698304.0,
+            "55": 3537698304.0,
+            "56": 3537698304.0,
+            "57": 3537698304.0,
+            "58": 3537698304.0,
+            "59": 3537698304.0,
+            "60": 3537698304.0,
+            "61": 3537698304.0,
+            "62": 3537698304.0,
+            "63": 3537698304.0,
+            "64": 3537698304.0,
+            "65": 3537698304.0,
+            "66": 3537698304.0,
+            "67": 3537698304.0,
+            "68": 3537698304.0,
+            "69": 3537698304.0,
+            "70": 3537698304.0,
+            "71": 3537698304.0,
+            "72": 3537698304.0,
+            "73": 3537698304.0,
+            "74": 3537698304.0,
+            "75": 3537698304.0,
+            "76": 3537698304.0,
+            "77": 3537698304.0,
+            "78": 3537698304.0,
+            "79": 3537698304.0,
+            "80": 3537698304.0,
+            "81": 3537698304.0,
+            "82": 3537698304.0,
+            "83": 3537698304.0,
+            "84": 3537698304.0,
+            "85": 3537698304.0,
+            "86": 3537698304.0,
+            "87": 3537698304.0,
+            "88": 3537698304.0,
+            "89": 3537698304.0,
+            "90": 3537698304.0,
+            "91": 3537698304.0,
+            "92": 3537698304.0,
+            "93": 3537698304.0,
+            "94": 3537698304.0,
+            "95": 3537698304.0,
+            "96": 3537698304.0,
+            "97": 3537698304.0,
+            "98": 3537698304.0,
+            "99": 3537698304.0,
+            "100": 3537698304.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 7.80393,
+            "52": 0.21609,
+            "53": 0.18011,
+            "54": 0.16574,
+            "55": 0.17551,
+            "56": 0.15661,
+            "57": 0.15643,
+            "58": 0.14683,
+            "59": 0.14167,
+            "60": 0.15286,
+            "61": 0.14194,
+            "62": 0.15289,
+            "63": 0.14852,
+            "64": 0.15158,
+            "65": 0.14582,
+            "66": 0.14918,
+            "67": 0.13999,
+            "68": 0.14356,
+            "69": 0.14847,
+            "70": 0.14345,
+            "71": 0.13948,
+            "72": 0.14052,
+            "73": 0.13195,
+            "74": 0.14445,
+            "75": 0.12708,
+            "76": 0.13314,
+            "77": 0.14514,
+            "78": 0.14212,
+            "79": 0.12911,
+            "80": 0.13195,
+            "81": 0.14027,
+            "82": 0.13349,
+            "83": 0.12837,
+            "84": 0.1284,
+            "85": 0.14683,
+            "86": 0.14559,
+            "87": 0.14449,
+            "88": 0.13511,
+            "89": 0.13496,
+            "90": 0.14777,
+            "91": 0.13483,
+            "92": 0.13387,
+            "93": 0.12619,
+            "94": 0.12638,
+            "95": 0.12624,
+            "96": 0.13537,
+            "97": 0.12788,
+            "98": 0.14225,
+            "99": 0.13569,
+            "100": 0.12935
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json
index ad7af2bddb0..b106daa13a1 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json
@@ -4,105 +4,105 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 10.81131,
-            "2": 10.83052,
-            "3": 10.82093,
-            "4": 10.81347,
-            "5": 10.84338,
-            "6": 10.84743,
-            "7": 10.85254,
-            "8": 10.83482,
-            "9": 10.84276,
-            "10": 10.77693,
-            "11": 10.8459,
-            "12": 10.85115,
-            "13": 10.84165,
-            "14": 10.8714,
-            "15": 10.83613,
-            "16": 10.79815,
-            "17": 10.77288,
-            "18": 10.8075,
-            "19": 10.78773,
-            "20": 10.73433,
-            "21": 10.69461,
-            "22": 10.56597,
-            "23": 10.71611,
-            "24": 10.61321,
-            "25": 10.552,
-            "26": 10.61364,
-            "27": 10.62702,
-            "28": 10.59546,
-            "29": 10.59195,
-            "30": 10.3916,
-            "31": 10.14615,
-            "32": 10.47399,
-            "33": 10.47051,
-            "34": 10.23435,
-            "35": 10.29318,
-            "36": 10.26627,
-            "37": 10.37219,
-            "38": 10.2254,
-            "39": 10.42101,
-            "40": 10.13002,
-            "41": 10.16265,
-            "42": 10.24278,
-            "43": 9.88237,
-            "44": 9.99105,
-            "45": 9.87295,
+            "1": 10.81103,
+            "2": 10.83065,
+            "3": 10.82107,
+            "4": 10.81304,
+            "5": 10.84321,
+            "6": 10.84718,
+            "7": 10.85237,
+            "8": 10.83499,
+            "9": 10.84293,
+            "10": 10.77678,
+            "11": 10.84585,
+            "12": 10.85174,
+            "13": 10.84182,
+            "14": 10.87189,
+            "15": 10.83593,
+            "16": 10.79751,
+            "17": 10.77325,
+            "18": 10.8073,
+            "19": 10.78778,
+            "20": 10.73435,
+            "21": 10.69516,
+            "22": 10.56641,
+            "23": 10.71634,
+            "24": 10.61287,
+            "25": 10.55191,
+            "26": 10.61354,
+            "27": 10.62651,
+            "28": 10.59524,
+            "29": 10.5917,
+            "30": 10.39149,
+            "31": 10.1464,
+            "32": 10.47402,
+            "33": 10.47024,
+            "34": 10.23415,
+            "35": 10.2932,
+            "36": 10.26667,
+            "37": 10.37209,
+            "38": 10.22542,
+            "39": 10.42143,
+            "40": 10.13017,
+            "41": 10.16266,
+            "42": 10.24275,
+            "43": 9.88221,
+            "44": 9.99119,
+            "45": 9.87323,
             "46": 9.85181,
-            "47": 10.15633,
-            "48": 9.8915,
-            "49": 9.58889,
-            "50": 9.9543,
-            "51": 9.8849,
-            "52": 9.78004,
-            "53": 10.10188,
-            "54": 9.98715,
+            "47": 10.15626,
+            "48": 9.89157,
+            "49": 9.58903,
+            "50": 9.95443,
+            "51": 9.88487,
+            "52": 9.78018,
+            "53": 10.10226,
+            "54": 9.9873,
             "55": 9.9027,
-            "56": 9.66837,
-            "57": 9.53524,
+            "56": 9.66818,
+            "57": 9.53521,
             "58": 9.89495,
-            "59": 9.62892,
-            "60": 9.54308,
-            "61": 9.72727,
-            "62": 10.0332,
-            "63": 9.45215,
-            "64": 9.83179,
-            "65": 8.99109,
-            "66": 9.76394,
-            "67": 9.40349,
-            "68": 9.83129,
-            "69": 9.81856,
-            "70": 9.77262,
-            "71": 9.658,
-            "72": 9.64033,
-            "73": 9.55124,
-            "74": 9.02026,
-            "75": 9.47695,
-            "76": 9.13586,
-            "77": 10.09787,
-            "78": 9.75274,
-            "79": 9.41697,
-            "80": 9.45074,
-            "81": 9.52041,
-            "82": 9.73203,
-            "83": 9.36912,
-            "84": 9.45039,
-            "85": 9.65229,
-            "86": 9.1123,
-            "87": 9.61119,
-            "88": 9.78708,
-            "89": 9.64625,
-            "90": 9.83474,
-            "91": 9.39429,
-            "92": 9.39178,
+            "59": 9.6289,
+            "60": 9.54307,
+            "61": 9.72725,
+            "62": 10.03319,
+            "63": 9.45201,
+            "64": 9.83185,
+            "65": 8.99108,
+            "66": 9.76421,
+            "67": 9.40334,
+            "68": 9.83107,
+            "69": 9.81874,
+            "70": 9.77252,
+            "71": 9.65812,
+            "72": 9.64065,
+            "73": 9.5512,
+            "74": 9.02044,
+            "75": 9.47713,
+            "76": 9.13591,
+            "77": 10.09778,
+            "78": 9.75282,
+            "79": 9.41686,
+            "80": 9.45072,
+            "81": 9.52034,
+            "82": 9.73197,
+            "83": 9.36926,
+            "84": 9.4504,
+            "85": 9.65212,
+            "86": 9.11237,
+            "87": 9.61129,
+            "88": 9.78679,
+            "89": 9.64613,
+            "90": 9.83484,
+            "91": 9.39422,
+            "92": 9.39187,
             "93": 9.12787,
-            "94": 8.86637,
-            "95": 9.54352,
-            "96": 9.55716,
-            "97": 9.332,
-            "98": 9.69189,
-            "99": 8.92072,
+            "94": 8.86646,
+            "95": 9.54348,
+            "96": 9.55708,
+            "97": 9.33174,
+            "98": 9.6919,
+            "99": 8.92043,
             "100": 9.41916
         }
     },
@@ -111,106 +111,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 1216.0,
-            "2": 1361.0,
-            "3": 1221.0,
-            "4": 1222.0,
-            "5": 1385.0,
-            "6": 1467.0,
+            "1": 1248.0,
+            "2": 1334.0,
+            "3": 1294.0,
+            "4": 1227.0,
+            "5": 1403.0,
+            "6": 1427.0,
             "7": 1252.0,
-            "8": 1355.0,
-            "9": 1346.0,
-            "10": 1335.0,
-            "11": 1278.0,
-            "12": 1185.0,
-            "13": 1203.0,
-            "14": 1385.0,
-            "15": 1303.0,
-            "16": 1377.0,
-            "17": 1229.0,
-            "18": 1291.0,
-            "19": 1244.0,
-            "20": 1183.0,
-            "21": 1262.0,
-            "22": 1122.0,
-            "23": 1301.0,
-            "24": 1066.0,
-            "25": 1182.0,
-            "26": 1263.0,
-            "27": 1162.0,
-            "28": 1262.0,
-            "29": 1179.0,
-            "30": 1168.0,
-            "31": 991.0,
-            "32": 1092.0,
-            "33": 1183.0,
-            "34": 1081.0,
-            "35": 1146.0,
-            "36": 1076.0,
-            "37": 1252.0,
-            "38": 1176.0,
+            "8": 1427.0,
+            "9": 1305.0,
+            "10": 1282.0,
+            "11": 1297.0,
+            "12": 1278.0,
+            "13": 1202.0,
+            "14": 1425.0,
+            "15": 1290.0,
+            "16": 1353.0,
+            "17": 1248.0,
+            "18": 1308.0,
+            "19": 1305.0,
+            "20": 1244.0,
+            "21": 1166.0,
+            "22": 1145.0,
+            "23": 1320.0,
+            "24": 1102.0,
+            "25": 1254.0,
+            "26": 1241.0,
+            "27": 1137.0,
+            "28": 1332.0,
+            "29": 1297.0,
+            "30": 1138.0,
+            "31": 1027.0,
+            "32": 1093.0,
+            "33": 1262.0,
+            "34": 1095.0,
+            "35": 1120.0,
+            "36": 1048.0,
+            "37": 1161.0,
+            "38": 1211.0,
             "39": 1225.0,
-            "40": 1303.0,
-            "41": 1104.0,
-            "42": 1210.0,
-            "43": 1116.0,
-            "44": 1165.0,
-            "45": 1097.0,
-            "46": 1308.0,
-            "47": 1165.0,
-            "48": 1134.0,
-            "49": 1272.0,
-            "50": 1083.0,
-            "51": 1234.0,
-            "52": 1274.0,
-            "53": 1393.0,
-            "54": 1299.0,
-            "55": 1186.0,
-            "56": 1267.0,
-            "57": 1161.0,
-            "58": 1326.0,
-            "59": 1403.0,
-            "60": 1177.0,
-            "61": 1363.0,
-            "62": 1302.0,
-            "63": 1245.0,
-            "64": 1378.0,
-            "65": 1330.0,
-            "66": 1363.0,
-            "67": 1286.0,
-            "68": 1313.0,
-            "69": 1295.0,
-            "70": 1459.0,
-            "71": 1374.0,
-            "72": 1092.0,
-            "73": 1274.0,
-            "74": 943.0,
-            "75": 1059.0,
-            "76": 1323.0,
-            "77": 1475.0,
-            "78": 1487.0,
-            "79": 1496.0,
-            "80": 1382.0,
-            "81": 1470.0,
-            "82": 1417.0,
-            "83": 1177.0,
-            "84": 1506.0,
-            "85": 1420.0,
-            "86": 1281.0,
-            "87": 1540.0,
-            "88": 1467.0,
-            "89": 1452.0,
-            "90": 1350.0,
-            "91": 1010.0,
-            "92": 1324.0,
-            "93": 1349.0,
-            "94": 1197.0,
-            "95": 2503.0,
-            "96": 2373.0,
-            "97": 1490.0,
-            "98": 2541.0,
-            "99": 1367.0,
-            "100": 1122.0
+            "40": 1379.0,
+            "41": 1115.0,
+            "42": 1175.0,
+            "43": 1049.0,
+            "44": 1164.0,
+            "45": 1127.0,
+            "46": 1334.0,
+            "47": 1233.0,
+            "48": 1192.0,
+            "49": 1310.0,
+            "50": 1125.0,
+            "51": 1311.0,
+            "52": 1269.0,
+            "53": 1392.0,
+            "54": 1266.0,
+            "55": 1197.0,
+            "56": 1294.0,
+            "57": 1125.0,
+            "58": 1380.0,
+            "59": 1335.0,
+            "60": 1070.0,
+            "61": 1317.0,
+            "62": 1323.0,
+            "63": 1177.0,
+            "64": 1464.0,
+            "65": 1297.0,
+            "66": 1459.0,
+            "67": 1319.0,
+            "68": 1281.0,
+            "69": 1361.0,
+            "70": 1439.0,
+            "71": 1408.0,
+            "72": 1131.0,
+            "73": 1261.0,
+            "74": 918.0,
+            "75": 1051.0,
+            "76": 1288.0,
+            "77": 1472.0,
+            "78": 1433.0,
+            "79": 1433.0,
+            "80": 1350.0,
+            "81": 1576.0,
+            "82": 1414.0,
+            "83": 1205.0,
+            "84": 1485.0,
+            "85": 1339.0,
+            "86": 1265.0,
+            "87": 1538.0,
+            "88": 1462.0,
+            "89": 1499.0,
+            "90": 1289.0,
+            "91": 1052.0,
+            "92": 1303.0,
+            "93": 1235.0,
+            "94": 1301.0,
+            "95": 1386.0,
+            "96": 2364.0,
+            "97": 1408.0,
+            "98": 2551.0,
+            "99": 1263.0,
+            "100": 1227.0
         }
     },
     "mem-allocated-bytes": {
@@ -218,106 +218,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 788523008.0,
-            "2": 788493312.0,
-            "3": 788540416.0,
-            "4": 788518400.0,
-            "5": 788542464.0,
-            "6": 788484608.0,
-            "7": 788507648.0,
-            "8": 788515328.0,
-            "9": 788531200.0,
-            "10": 788543488.0,
-            "11": 788518400.0,
-            "12": 788489216.0,
-            "13": 788547584.0,
-            "14": 788456448.0,
-            "15": 788508160.0,
-            "16": 788445696.0,
-            "17": 788563456.0,
-            "18": 788540416.0,
-            "19": 788547584.0,
-            "20": 788475904.0,
-            "21": 788513792.0,
-            "22": 788599296.0,
-            "23": 788578816.0,
-            "24": 788518400.0,
-            "25": 788660736.0,
-            "26": 788571136.0,
-            "27": 788635648.0,
-            "28": 788573696.0,
-            "29": 788615680.0,
-            "30": 788592640.0,
-            "31": 788652544.0,
-            "32": 788608000.0,
-            "33": 788621824.0,
-            "34": 788582912.0,
-            "35": 788621824.0,
-            "36": 788647424.0,
-            "37": 788602880.0,
-            "38": 788655616.0,
-            "39": 788668416.0,
-            "40": 788555264.0,
-            "41": 788596736.0,
-            "42": 788580352.0,
-            "43": 788547072.0,
-            "44": 788628992.0,
-            "45": 788496384.0,
-            "46": 788508672.0,
-            "47": 788577280.0,
-            "48": 788493824.0,
-            "49": 788466688.0,
-            "50": 788492288.0,
-            "51": 788528128.0,
-            "52": 788488704.0,
-            "53": 788518912.0,
-            "54": 788508672.0,
-            "55": 788505088.0,
-            "56": 788464128.0,
-            "57": 788461568.0,
-            "58": 788505088.0,
-            "59": 788508672.0,
-            "60": 788496384.0,
-            "61": 788468736.0,
-            "62": 788502528.0,
-            "63": 788454912.0,
-            "64": 788470784.0,
-            "65": 788413440.0,
-            "66": 788450816.0,
-            "67": 788450816.0,
-            "68": 788461568.0,
-            "69": 788478976.0,
-            "70": 788502528.0,
-            "71": 788459008.0,
-            "72": 788419072.0,
-            "73": 788449280.0,
-            "74": 788424192.0,
-            "75": 788446720.0,
-            "76": 788418048.0,
-            "77": 788476416.0,
-            "78": 788467712.0,
-            "79": 788424192.0,
-            "80": 788416512.0,
-            "81": 788435968.0,
-            "82": 788444160.0,
-            "83": 788440576.0,
-            "84": 788476416.0,
-            "85": 788466176.0,
-            "86": 788400128.0,
-            "87": 788495872.0,
-            "88": 788498432.0,
-            "89": 788506624.0,
-            "90": 788536832.0,
-            "91": 788518912.0,
-            "92": 788521984.0,
-            "93": 788492288.0,
-            "94": 788511744.0,
-            "95": 788548608.0,
-            "96": 788568576.0,
-            "97": 788584960.0,
-            "98": 788595712.0,
-            "99": 788519936.0,
-            "100": 788575744.0
+            "1": 788555776.0,
+            "2": 788525568.0,
+            "3": 788572672.0,
+            "4": 788552704.0,
+            "5": 788574720.0,
+            "6": 788517888.0,
+            "7": 788541440.0,
+            "8": 788548096.0,
+            "9": 788562944.0,
+            "10": 788577280.0,
+            "11": 788553216.0,
+            "12": 788523008.0,
+            "13": 788579328.0,
+            "14": 788489216.0,
+            "15": 788539904.0,
+            "16": 788476928.0,
+            "17": 788598784.0,
+            "18": 788574208.0,
+            "19": 788580864.0,
+            "20": 788508160.0,
+            "21": 788545536.0,
+            "22": 788632064.0,
+            "23": 788610560.0,
+            "24": 788551168.0,
+            "25": 788694016.0,
+            "26": 788605440.0,
+            "27": 788667904.0,
+            "28": 788609024.0,
+            "29": 788647936.0,
+            "30": 788625408.0,
+            "31": 788685824.0,
+            "32": 788640768.0,
+            "33": 788655616.0,
+            "34": 788615680.0,
+            "35": 788654080.0,
+            "36": 788679680.0,
+            "37": 788634624.0,
+            "38": 788688896.0,
+            "39": 788698112.0,
+            "40": 788588032.0,
+            "41": 788628992.0,
+            "42": 788613632.0,
+            "43": 788577792.0,
+            "44": 788661248.0,
+            "45": 788528640.0,
+            "46": 788540928.0,
+            "47": 788609536.0,
+            "48": 788528640.0,
+            "49": 788498944.0,
+            "50": 788524544.0,
+            "51": 788559872.0,
+            "52": 788518400.0,
+            "53": 788552192.0,
+            "54": 788543488.0,
+            "55": 788538880.0,
+            "56": 788497408.0,
+            "57": 788493824.0,
+            "58": 788537344.0,
+            "59": 788539904.0,
+            "60": 788527104.0,
+            "61": 788499968.0,
+            "62": 788535296.0,
+            "63": 788487168.0,
+            "64": 788503552.0,
+            "65": 788446208.0,
+            "66": 788485632.0,
+            "67": 788485120.0,
+            "68": 788493312.0,
+            "69": 788508672.0,
+            "70": 788534784.0,
+            "71": 788491264.0,
+            "72": 788452864.0,
+            "73": 788477440.0,
+            "74": 788452864.0,
+            "75": 788480000.0,
+            "76": 788450304.0,
+            "77": 788506624.0,
+            "78": 788500992.0,
+            "79": 788451840.0,
+            "80": 788448256.0,
+            "81": 788466176.0,
+            "82": 788474880.0,
+            "83": 788470784.0,
+            "84": 788506624.0,
+            "85": 788496384.0,
+            "86": 788430848.0,
+            "87": 788528128.0,
+            "88": 788530176.0,
+            "89": 788537856.0,
+            "90": 788569600.0,
+            "91": 788549632.0,
+            "92": 788555264.0,
+            "93": 788525056.0,
+            "94": 788546560.0,
+            "95": 788583424.0,
+            "96": 788601856.0,
+            "97": 788617216.0,
+            "98": 788629504.0,
+            "99": 788551680.0,
+            "100": 788611072.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -325,106 +325,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 3022964224.0,
-            "2": 3177559552.0,
-            "3": 3206005248.0,
-            "4": 3206005248.0,
-            "5": 3206005248.0,
-            "6": 3206005248.0,
-            "7": 3206005248.0,
-            "8": 3206005248.0,
-            "9": 3206005248.0,
-            "10": 3206005248.0,
-            "11": 3206005248.0,
-            "12": 3206005248.0,
-            "13": 3208181248.0,
-            "14": 3208181248.0,
-            "15": 3208181248.0,
-            "16": 3208181248.0,
-            "17": 3216008192.0,
-            "18": 3216008192.0,
-            "19": 3216008192.0,
-            "20": 3216008192.0,
-            "21": 3216008192.0,
-            "22": 3238043648.0,
-            "23": 3238043648.0,
-            "24": 3238043648.0,
-            "25": 3281027072.0,
-            "26": 3281027072.0,
-            "27": 3281027072.0,
-            "28": 3281027072.0,
-            "29": 3281027072.0,
-            "30": 3281027072.0,
-            "31": 3281027072.0,
-            "32": 3281027072.0,
-            "33": 3281027072.0,
-            "34": 3281027072.0,
-            "35": 3281027072.0,
-            "36": 3281027072.0,
-            "37": 3281027072.0,
-            "38": 3281027072.0,
-            "39": 3281027072.0,
-            "40": 3281027072.0,
-            "41": 3281027072.0,
-            "42": 3281027072.0,
-            "43": 3281027072.0,
-            "44": 3281027072.0,
-            "45": 3281027072.0,
-            "46": 3281027072.0,
-            "47": 3281027072.0,
-            "48": 3281027072.0,
-            "49": 3281027072.0,
-            "50": 3281027072.0,
-            "51": 3281027072.0,
-            "52": 3281027072.0,
-            "53": 3281027072.0,
-            "54": 3281027072.0,
-            "55": 3281027072.0,
-            "56": 3281027072.0,
-            "57": 3281027072.0,
-            "58": 3281027072.0,
-            "59": 3281027072.0,
-            "60": 3281027072.0,
-            "61": 3281027072.0,
-            "62": 3281027072.0,
-            "63": 3281027072.0,
-            "64": 3281027072.0,
-            "65": 3281027072.0,
-            "66": 3281027072.0,
-            "67": 3281027072.0,
-            "68": 3281027072.0,
-            "69": 3281027072.0,
-            "70": 3281027072.0,
-            "71": 3281027072.0,
-            "72": 3281027072.0,
-            "73": 3281027072.0,
-            "74": 3281027072.0,
-            "75": 3281027072.0,
-            "76": 3281027072.0,
-            "77": 3281027072.0,
-            "78": 3281027072.0,
-            "79": 3281027072.0,
-            "80": 3281027072.0,
-            "81": 3281027072.0,
-            "82": 3281027072.0,
-            "83": 3281027072.0,
-            "84": 3281027072.0,
-            "85": 3281027072.0,
-            "86": 3281027072.0,
-            "87": 3281027072.0,
-            "88": 3281027072.0,
-            "89": 3281027072.0,
-            "90": 3281027072.0,
-            "91": 3281027072.0,
-            "92": 3281027072.0,
-            "93": 3281027072.0,
-            "94": 3281027072.0,
-            "95": 3281027072.0,
-            "96": 3281027072.0,
-            "97": 3281027072.0,
-            "98": 3281027072.0,
-            "99": 3281027072.0,
-            "100": 3281027072.0
+            "1": 3121186304.0,
+            "2": 3272137728.0,
+            "3": 3305329664.0,
+            "4": 3305329664.0,
+            "5": 3309687808.0,
+            "6": 3309687808.0,
+            "7": 3309687808.0,
+            "8": 3309687808.0,
+            "9": 3309687808.0,
+            "10": 3309926912.0,
+            "11": 3309926912.0,
+            "12": 3309926912.0,
+            "13": 3309926912.0,
+            "14": 3309926912.0,
+            "15": 3309926912.0,
+            "16": 3309926912.0,
+            "17": 3318584832.0,
+            "18": 3318584832.0,
+            "19": 3318584832.0,
+            "20": 3318584832.0,
+            "21": 3318584832.0,
+            "22": 3346422784.0,
+            "23": 3346422784.0,
+            "24": 3346422784.0,
+            "25": 3392057856.0,
+            "26": 3392057856.0,
+            "27": 3392057856.0,
+            "28": 3392057856.0,
+            "29": 3392057856.0,
+            "30": 3392057856.0,
+            "31": 3392057856.0,
+            "32": 3392057856.0,
+            "33": 3392057856.0,
+            "34": 3392057856.0,
+            "35": 3392057856.0,
+            "36": 3392057856.0,
+            "37": 3392057856.0,
+            "38": 3392057856.0,
+            "39": 3392057856.0,
+            "40": 3392057856.0,
+            "41": 3392057856.0,
+            "42": 3392057856.0,
+            "43": 3392057856.0,
+            "44": 3392057856.0,
+            "45": 3392057856.0,
+            "46": 3392057856.0,
+            "47": 3392057856.0,
+            "48": 3392057856.0,
+            "49": 3392057856.0,
+            "50": 3392057856.0,
+            "51": 3392057856.0,
+            "52": 3392057856.0,
+            "53": 3392057856.0,
+            "54": 3392057856.0,
+            "55": 3392057856.0,
+            "56": 3392057856.0,
+            "57": 3392057856.0,
+            "58": 3392057856.0,
+            "59": 3392057856.0,
+            "60": 3392057856.0,
+            "61": 3392057856.0,
+            "62": 3392057856.0,
+            "63": 3392057856.0,
+            "64": 3392057856.0,
+            "65": 3392057856.0,
+            "66": 3392057856.0,
+            "67": 3392057856.0,
+            "68": 3392057856.0,
+            "69": 3392057856.0,
+            "70": 3392057856.0,
+            "71": 3392057856.0,
+            "72": 3392057856.0,
+            "73": 3392057856.0,
+            "74": 3392057856.0,
+            "75": 3392057856.0,
+            "76": 3392057856.0,
+            "77": 3392057856.0,
+            "78": 3392057856.0,
+            "79": 3392057856.0,
+            "80": 3392057856.0,
+            "81": 3392057856.0,
+            "82": 3392057856.0,
+            "83": 3392057856.0,
+            "84": 3392057856.0,
+            "85": 3392057856.0,
+            "86": 3392057856.0,
+            "87": 3392057856.0,
+            "88": 3392057856.0,
+            "89": 3392057856.0,
+            "90": 3392057856.0,
+            "91": 3392057856.0,
+            "92": 3392057856.0,
+            "93": 3392057856.0,
+            "94": 3392057856.0,
+            "95": 3392057856.0,
+            "96": 3392057856.0,
+            "97": 3392057856.0,
+            "98": 3392057856.0,
+            "99": 3392057856.0,
+            "100": 3392057856.0
         }
     },
     "iteration-time": {
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 12.96093,
-            "2": 0.20892,
-            "3": 0.18473,
-            "4": 0.18131,
-            "5": 0.18523,
-            "6": 0.15261,
-            "7": 0.15478,
-            "8": 0.15961,
-            "9": 0.14304,
-            "10": 0.14479,
-            "11": 0.14001,
-            "12": 0.14477,
-            "13": 0.13539,
-            "14": 0.14122,
-            "15": 0.12814,
-            "16": 0.1422,
-            "17": 0.14026,
-            "18": 0.1393,
-            "19": 0.13844,
-            "20": 0.14704,
-            "21": 0.13226,
-            "22": 0.12909,
-            "23": 0.13878,
-            "24": 0.13814,
-            "25": 0.13861,
-            "26": 0.14021,
-            "27": 0.15004,
-            "28": 0.14508,
-            "29": 0.15539,
-            "30": 0.14923,
-            "31": 0.15897,
-            "32": 0.14709,
-            "33": 0.15008,
-            "34": 0.14672,
-            "35": 0.15075,
-            "36": 0.15567,
-            "37": 0.14723,
-            "38": 0.15175,
-            "39": 0.14843,
-            "40": 0.15144,
-            "41": 0.14498,
-            "42": 0.15026,
-            "43": 0.15467,
-            "44": 0.14949,
-            "45": 0.14547,
-            "46": 0.16159,
-            "47": 0.14865,
-            "48": 0.13694,
-            "49": 0.1448,
-            "50": 0.14252,
-            "51": 0.1539,
-            "52": 0.14596,
-            "53": 0.14405,
-            "54": 0.13597,
-            "55": 0.13684,
-            "56": 0.1422,
-            "57": 0.14574,
-            "58": 0.15689,
-            "59": 0.14026,
-            "60": 0.15291,
-            "61": 0.14644,
-            "62": 0.14867,
-            "63": 0.14378,
-            "64": 0.14841,
-            "65": 0.13208,
-            "66": 0.13289,
-            "67": 0.13565,
-            "68": 0.13616,
-            "69": 0.1404,
-            "70": 0.15207,
-            "71": 0.12955,
-            "72": 0.13978,
-            "73": 0.13699,
-            "74": 0.13757,
-            "75": 0.13284,
-            "76": 0.12662,
-            "77": 0.13897,
-            "78": 0.13046,
-            "79": 0.13331,
-            "80": 0.13187,
-            "81": 0.13684,
-            "82": 0.12702,
-            "83": 0.13369,
-            "84": 0.14567,
-            "85": 0.13204,
-            "86": 0.12582,
-            "87": 0.12655,
-            "88": 0.13008,
-            "89": 0.12999,
-            "90": 0.13521,
-            "91": 0.12701,
-            "92": 0.13282,
-            "93": 0.12621,
-            "94": 0.12513,
-            "95": 0.12172,
-            "96": 0.12142,
-            "97": 0.13611,
-            "98": 0.12449,
-            "99": 0.12809,
-            "100": 0.12496
+            "1": 12.9672,
+            "2": 0.18032,
+            "3": 0.16621,
+            "4": 0.14138,
+            "5": 0.14697,
+            "6": 0.12745,
+            "7": 0.13018,
+            "8": 0.1308,
+            "9": 0.12325,
+            "10": 0.11929,
+            "11": 0.11868,
+            "12": 0.11662,
+            "13": 0.11935,
+            "14": 0.12579,
+            "15": 0.10685,
+            "16": 0.1235,
+            "17": 0.11712,
+            "18": 0.11351,
+            "19": 0.11956,
+            "20": 0.12036,
+            "21": 0.11206,
+            "22": 0.12061,
+            "23": 0.11918,
+            "24": 0.11718,
+            "25": 0.11286,
+            "26": 0.11553,
+            "27": 0.12325,
+            "28": 0.12425,
+            "29": 0.1373,
+            "30": 0.14042,
+            "31": 0.12588,
+            "32": 0.12886,
+            "33": 0.11871,
+            "34": 0.1268,
+            "35": 0.12631,
+            "36": 0.13682,
+            "37": 0.12561,
+            "38": 0.12806,
+            "39": 0.13203,
+            "40": 0.13218,
+            "41": 0.12224,
+            "42": 0.13858,
+            "43": 0.13174,
+            "44": 0.12012,
+            "45": 0.12567,
+            "46": 0.13565,
+            "47": 0.12427,
+            "48": 0.11574,
+            "49": 0.11974,
+            "50": 0.12631,
+            "51": 0.14169,
+            "52": 0.11509,
+            "53": 0.1256,
+            "54": 0.1169,
+            "55": 0.12608,
+            "56": 0.11705,
+            "57": 0.12085,
+            "58": 0.11877,
+            "59": 0.1187,
+            "60": 0.12978,
+            "61": 0.11339,
+            "62": 0.1117,
+            "63": 0.12276,
+            "64": 0.12623,
+            "65": 0.1311,
+            "66": 0.1174,
+            "67": 0.12925,
+            "68": 0.11502,
+            "69": 0.1185,
+            "70": 0.12525,
+            "71": 0.10756,
+            "72": 0.11771,
+            "73": 0.1132,
+            "74": 0.12549,
+            "75": 0.10854,
+            "76": 0.11252,
+            "77": 0.11354,
+            "78": 0.10942,
+            "79": 0.11618,
+            "80": 0.1066,
+            "81": 0.11024,
+            "82": 0.10189,
+            "83": 0.10909,
+            "84": 0.14864,
+            "85": 0.10374,
+            "86": 0.10395,
+            "87": 0.10291,
+            "88": 0.11323,
+            "89": 0.10749,
+            "90": 0.10777,
+            "91": 0.10528,
+            "92": 0.10628,
+            "93": 0.10398,
+            "94": 0.11116,
+            "95": 0.10621,
+            "96": 0.11081,
+            "97": 0.11111,
+            "98": 0.09872,
+            "99": 0.1051,
+            "100": 0.10136
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100_2nd.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100_2nd.json
new file mode 100644
index 00000000000..7bfccdb49b6
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100_2nd.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 9.88487,
+            "52": 9.78018,
+            "53": 10.10226,
+            "54": 9.9873,
+            "55": 9.9027,
+            "56": 9.66818,
+            "57": 9.53521,
+            "58": 9.89495,
+            "59": 9.6289,
+            "60": 9.54307,
+            "61": 9.72725,
+            "62": 10.03319,
+            "63": 9.45201,
+            "64": 9.83185,
+            "65": 8.99108,
+            "66": 9.76421,
+            "67": 9.40334,
+            "68": 9.83107,
+            "69": 9.81874,
+            "70": 9.77252,
+            "71": 9.65812,
+            "72": 9.64065,
+            "73": 9.5512,
+            "74": 9.02044,
+            "75": 9.47713,
+            "76": 9.13591,
+            "77": 10.09778,
+            "78": 9.75282,
+            "79": 9.41686,
+            "80": 9.45072,
+            "81": 9.52034,
+            "82": 9.73197,
+            "83": 9.36926,
+            "84": 9.4504,
+            "85": 9.65212,
+            "86": 9.11237,
+            "87": 9.61129,
+            "88": 9.78679,
+            "89": 9.64613,
+            "90": 9.83484,
+            "91": 9.39422,
+            "92": 9.39187,
+            "93": 9.12787,
+            "94": 8.86646,
+            "95": 9.54348,
+            "96": 9.55708,
+            "97": 9.33174,
+            "98": 9.6919,
+            "99": 8.92043,
+            "100": 9.41916
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 1311.0,
+            "52": 1269.0,
+            "53": 1392.0,
+            "54": 1266.0,
+            "55": 1197.0,
+            "56": 1294.0,
+            "57": 1125.0,
+            "58": 1380.0,
+            "59": 1335.0,
+            "60": 1070.0,
+            "61": 1317.0,
+            "62": 1323.0,
+            "63": 1177.0,
+            "64": 1464.0,
+            "65": 1297.0,
+            "66": 1459.0,
+            "67": 1319.0,
+            "68": 1281.0,
+            "69": 1361.0,
+            "70": 1439.0,
+            "71": 1408.0,
+            "72": 1131.0,
+            "73": 1261.0,
+            "74": 918.0,
+            "75": 1051.0,
+            "76": 1288.0,
+            "77": 1472.0,
+            "78": 1433.0,
+            "79": 1433.0,
+            "80": 1350.0,
+            "81": 1576.0,
+            "82": 1414.0,
+            "83": 1205.0,
+            "84": 1485.0,
+            "85": 1339.0,
+            "86": 1265.0,
+            "87": 1538.0,
+            "88": 1462.0,
+            "89": 1499.0,
+            "90": 1289.0,
+            "91": 1052.0,
+            "92": 1303.0,
+            "93": 1235.0,
+            "94": 1301.0,
+            "95": 1386.0,
+            "96": 2364.0,
+            "97": 1408.0,
+            "98": 2551.0,
+            "99": 1263.0,
+            "100": 1227.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 788526080.0,
+            "52": 788485632.0,
+            "53": 788519424.0,
+            "54": 788510720.0,
+            "55": 788506112.0,
+            "56": 788464640.0,
+            "57": 788461056.0,
+            "58": 788504576.0,
+            "59": 788507136.0,
+            "60": 788494336.0,
+            "61": 788467200.0,
+            "62": 788502528.0,
+            "63": 788454400.0,
+            "64": 788470784.0,
+            "65": 788413440.0,
+            "66": 788452864.0,
+            "67": 788452352.0,
+            "68": 788460544.0,
+            "69": 788475904.0,
+            "70": 788502016.0,
+            "71": 788458496.0,
+            "72": 788420096.0,
+            "73": 788444672.0,
+            "74": 788420096.0,
+            "75": 788447232.0,
+            "76": 788417536.0,
+            "77": 788473856.0,
+            "78": 788468224.0,
+            "79": 788419072.0,
+            "80": 788415488.0,
+            "81": 788433408.0,
+            "82": 788442112.0,
+            "83": 788438016.0,
+            "84": 788473856.0,
+            "85": 788463616.0,
+            "86": 788398080.0,
+            "87": 788495360.0,
+            "88": 788497408.0,
+            "89": 788505088.0,
+            "90": 788536832.0,
+            "91": 788516864.0,
+            "92": 788522496.0,
+            "93": 788492288.0,
+            "94": 788513792.0,
+            "95": 788550656.0,
+            "96": 788569088.0,
+            "97": 788584448.0,
+            "98": 788596736.0,
+            "99": 788518912.0,
+            "100": 788578304.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 3260852736.0,
+            "52": 3268460544.0,
+            "53": 3289748992.0,
+            "54": 3289748992.0,
+            "55": 3289748992.0,
+            "56": 3289748992.0,
+            "57": 3289748992.0,
+            "58": 3289748992.0,
+            "59": 3289748992.0,
+            "60": 3289748992.0,
+            "61": 3289748992.0,
+            "62": 3289748992.0,
+            "63": 3289748992.0,
+            "64": 3289748992.0,
+            "65": 3289748992.0,
+            "66": 3289748992.0,
+            "67": 3289748992.0,
+            "68": 3289748992.0,
+            "69": 3289748992.0,
+            "70": 3289748992.0,
+            "71": 3289748992.0,
+            "72": 3289748992.0,
+            "73": 3289748992.0,
+            "74": 3289748992.0,
+            "75": 3289748992.0,
+            "76": 3289748992.0,
+            "77": 3289748992.0,
+            "78": 3289748992.0,
+            "79": 3289748992.0,
+            "80": 3289748992.0,
+            "81": 3289748992.0,
+            "82": 3289748992.0,
+            "83": 3289748992.0,
+            "84": 3289748992.0,
+            "85": 3289748992.0,
+            "86": 3289748992.0,
+            "87": 3289748992.0,
+            "88": 3289748992.0,
+            "89": 3289748992.0,
+            "90": 3304260608.0,
+            "91": 3304260608.0,
+            "92": 3304260608.0,
+            "93": 3304260608.0,
+            "94": 3304260608.0,
+            "95": 3317049856.0,
+            "96": 3327264256.0,
+            "97": 3342199296.0,
+            "98": 3342199296.0,
+            "99": 3342199296.0,
+            "100": 3342199296.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 9.41223,
+            "52": 0.19638,
+            "53": 0.16863,
+            "54": 0.16115,
+            "55": 0.16098,
+            "56": 0.14835,
+            "57": 0.14976,
+            "58": 0.1434,
+            "59": 0.15979,
+            "60": 0.14662,
+            "61": 0.13636,
+            "62": 0.13903,
+            "63": 0.14463,
+            "64": 0.12921,
+            "65": 0.14012,
+            "66": 0.1288,
+            "67": 0.13615,
+            "68": 0.12598,
+            "69": 0.12709,
+            "70": 0.13652,
+            "71": 0.12173,
+            "72": 0.13319,
+            "73": 0.12379,
+            "74": 0.13482,
+            "75": 0.1344,
+            "76": 0.11894,
+            "77": 0.13537,
+            "78": 0.12153,
+            "79": 0.12133,
+            "80": 0.11937,
+            "81": 0.11569,
+            "82": 0.11902,
+            "83": 0.12127,
+            "84": 0.1134,
+            "85": 0.10983,
+            "86": 0.12467,
+            "87": 0.10796,
+            "88": 0.11354,
+            "89": 0.11117,
+            "90": 0.1179,
+            "91": 0.10903,
+            "92": 0.10919,
+            "93": 0.11161,
+            "94": 0.11589,
+            "95": 0.11757,
+            "96": 0.11512,
+            "97": 0.11492,
+            "98": 0.1084,
+            "99": 0.12117,
+            "100": 0.10905
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_muon/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_muon/golden_values_dev_dgx_h100.json
index dd58e4cb1e6..7d62923f634 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_muon/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_muon/golden_values_dev_dgx_h100.json
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 14.85862,
-            "2": 0.28762,
-            "3": 0.23592,
-            "4": 0.20463,
-            "5": 0.21635,
-            "6": 0.20801,
-            "7": 0.20692,
-            "8": 0.20277,
-            "9": 0.20138,
-            "10": 0.19098,
-            "11": 0.19711,
-            "12": 0.19844,
-            "13": 0.18786,
-            "14": 0.19577,
-            "15": 0.18886,
-            "16": 0.18411,
-            "17": 0.18416,
-            "18": 0.18182,
-            "19": 0.17759,
-            "20": 0.18827,
-            "21": 0.18366,
-            "22": 0.18163,
-            "23": 0.18941,
-            "24": 0.18055,
-            "25": 0.18951,
-            "26": 0.18201,
-            "27": 0.17466,
-            "28": 0.18234,
-            "29": 0.1853,
-            "30": 0.17307,
-            "31": 0.18014,
-            "32": 0.17813,
-            "33": 0.18392,
-            "34": 0.1759,
-            "35": 0.18165,
-            "36": 0.17738,
-            "37": 0.18009,
-            "38": 0.17899,
-            "39": 0.18864,
-            "40": 0.17767,
-            "41": 0.17797,
-            "42": 0.18018,
-            "43": 0.18155,
-            "44": 0.17807,
-            "45": 0.17732,
-            "46": 0.17196,
-            "47": 0.1803,
-            "48": 0.17785,
-            "49": 0.17302,
-            "50": 0.1824,
-            "51": 0.19257,
-            "52": 0.17832,
-            "53": 0.18137,
-            "54": 0.17448,
-            "55": 0.178,
-            "56": 0.17346,
-            "57": 0.17236,
-            "58": 0.17018,
-            "59": 0.16863,
-            "60": 0.17468,
-            "61": 0.1713,
-            "62": 0.1744,
-            "63": 0.17553,
-            "64": 0.57804,
-            "65": 0.17627,
-            "66": 0.17362,
-            "67": 0.17436,
-            "68": 0.17766,
-            "69": 0.18446,
-            "70": 0.18419,
-            "71": 0.17131,
-            "72": 0.16832,
-            "73": 0.17321,
-            "74": 0.17561,
-            "75": 0.17215,
-            "76": 0.17083,
-            "77": 0.1791,
-            "78": 0.16898,
-            "79": 0.17382,
-            "80": 0.17586,
-            "81": 0.18035,
-            "82": 0.17931,
-            "83": 0.17665,
-            "84": 0.17692,
-            "85": 0.1765,
-            "86": 0.17412,
-            "87": 0.59045,
-            "88": 0.17964,
-            "89": 0.17565,
-            "90": 0.18664,
-            "91": 0.1784,
-            "92": 0.17175,
-            "93": 0.17523,
-            "94": 0.17223,
-            "95": 0.17436,
-            "96": 0.18556,
-            "97": 0.17929,
-            "98": 0.1847,
-            "99": 0.17691,
-            "100": 0.57857
+            "1": "nan",
+            "2": 6.73766,
+            "3": 0.31869,
+            "4": 0.3125,
+            "5": 0.31279,
+            "6": 0.29974,
+            "7": 0.30628,
+            "8": 0.29637,
+            "9": 0.29751,
+            "10": 0.28123,
+            "11": 0.3055,
+            "12": 0.28861,
+            "13": 0.27778,
+            "14": 0.28796,
+            "15": 0.28678,
+            "16": 0.27533,
+            "17": 0.27979,
+            "18": 1.87923,
+            "19": 0.28574,
+            "20": 0.28215,
+            "21": 0.2771,
+            "22": 0.27101,
+            "23": 0.27311,
+            "24": 1.50235,
+            "25": 0.27537,
+            "26": 1.04897,
+            "27": 0.26783,
+            "28": 0.69868,
+            "29": 0.27953,
+            "30": 1.54699,
+            "31": 0.27899,
+            "32": 0.28165,
+            "33": 0.28792,
+            "34": 0.27678,
+            "35": 1.25378,
+            "36": 0.88573,
+            "37": 0.26832,
+            "38": 0.26501,
+            "39": 0.28399,
+            "40": 0.96629,
+            "41": 0.26938,
+            "42": 0.31209,
+            "43": 0.27968,
+            "44": 0.65987,
+            "45": 0.51088,
+            "46": 1.37707,
+            "47": 0.26575,
+            "48": 0.92193,
+            "49": 0.26081,
+            "50": 0.27031,
+            "51": 0.31353,
+            "52": 0.27257,
+            "53": 0.27323,
+            "54": 0.27148,
+            "55": 0.27248,
+            "56": 0.7475,
+            "57": 0.26706,
+            "58": 0.28367,
+            "59": 0.27716,
+            "60": 1.12441,
+            "61": 0.26587,
+            "62": 0.68635,
+            "63": 0.28123,
+            "64": 0.98333,
+            "65": 0.27408,
+            "66": 1.22087,
+            "67": 0.26407,
+            "68": 0.95198,
+            "69": 0.29272,
+            "70": 0.52799,
+            "71": 0.92323,
+            "72": 0.25931,
+            "73": 0.26616,
+            "74": 0.28128,
+            "75": 0.28947,
+            "76": 0.27481,
+            "77": 0.67217,
+            "78": 0.28612,
+            "79": 0.85039,
+            "80": 0.2721,
+            "81": 0.5328,
+            "82": 0.57505,
+            "83": 0.79918,
+            "84": 0.28096,
+            "85": 0.27744,
+            "86": 0.273,
+            "87": 0.33552,
+            "88": 0.48699,
+            "89": 0.28552,
+            "90": 0.50386,
+            "91": 0.27372,
+            "92": 0.64636,
+            "93": 0.26742,
+            "94": 0.2649,
+            "95": 0.49366,
+            "96": 0.36845,
+            "97": 0.29731,
+            "98": 0.53051,
+            "99": 0.26212,
+            "100": 0.75087
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_muon/golden_values_dev_dgx_h100_2nd.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_muon/golden_values_dev_dgx_h100_2nd.json
new file mode 100644
index 00000000000..d8a426b39e0
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_muon/golden_values_dev_dgx_h100_2nd.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 10.82229,
+            "52": 10.80331,
+            "53": 10.83272,
+            "54": 10.81511,
+            "55": 10.82544,
+            "56": 10.77739,
+            "57": 10.75212,
+            "58": 10.80727,
+            "59": 10.79059,
+            "60": 10.74002,
+            "61": 10.79967,
+            "62": 10.81287,
+            "63": 10.72057,
+            "64": 10.78554,
+            "65": 10.68954,
+            "66": 10.76088,
+            "67": 10.73433,
+            "68": 10.80171,
+            "69": 10.78354,
+            "70": 10.77601,
+            "71": 10.767,
+            "72": 10.73617,
+            "73": 10.72977,
+            "74": 10.62268,
+            "75": 10.69072,
+            "76": 10.65444,
+            "77": 10.82173,
+            "78": 10.76342,
+            "79": 10.70428,
+            "80": 10.69419,
+            "81": 10.72444,
+            "82": 10.74209,
+            "83": 10.66776,
+            "84": 10.69841,
+            "85": 10.71466,
+            "86": 10.63794,
+            "87": 10.71867,
+            "88": 10.73504,
+            "89": 10.71428,
+            "90": 10.74679,
+            "91": 10.64894,
+            "92": 10.64647,
+            "93": 10.60196,
+            "94": 10.53294,
+            "95": 10.66112,
+            "96": 10.6724,
+            "97": 10.61431,
+            "98": 10.68496,
+            "99": 10.52028,
+            "100": 10.61542
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 1192.0,
+            "52": 1269.0,
+            "53": 1394.0,
+            "54": 1248.0,
+            "55": 1159.0,
+            "56": 1286.0,
+            "57": 1320.0,
+            "58": 1277.0,
+            "59": 1258.0,
+            "60": 1208.0,
+            "61": 1163.0,
+            "62": 1153.0,
+            "63": 1291.0,
+            "64": 1246.0,
+            "65": 1270.0,
+            "66": 1214.0,
+            "67": 1160.0,
+            "68": 1234.0,
+            "69": 1298.0,
+            "70": 1371.0,
+            "71": 1159.0,
+            "72": 1221.0,
+            "73": 1193.0,
+            "74": 1133.0,
+            "75": 1314.0,
+            "76": 1279.0,
+            "77": 1351.0,
+            "78": 1304.0,
+            "79": 1100.0,
+            "80": 1124.0,
+            "81": 1146.0,
+            "82": 1247.0,
+            "83": 1291.0,
+            "84": 1104.0,
+            "85": 1226.0,
+            "86": 1171.0,
+            "87": 1212.0,
+            "88": 1322.0,
+            "89": 1215.0,
+            "90": 1303.0,
+            "91": 1142.0,
+            "92": 1267.0,
+            "93": 1099.0,
+            "94": 1022.0,
+            "95": 1297.0,
+            "96": 1255.0,
+            "97": 1195.0,
+            "98": 1250.0,
+            "99": 1256.0,
+            "100": 1214.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 1095899648.0,
+            "52": 1095847424.0,
+            "53": 1095908352.0,
+            "54": 1095856640.0,
+            "55": 1095843328.0,
+            "56": 1095836160.0,
+            "57": 1095881216.0,
+            "58": 1095879680.0,
+            "59": 1095863296.0,
+            "60": 1095830016.0,
+            "61": 1095828992.0,
+            "62": 1095825920.0,
+            "63": 1095840256.0,
+            "64": 1095826944.0,
+            "65": 1095834112.0,
+            "66": 1095843840.0,
+            "67": 1095830528.0,
+            "68": 1095840256.0,
+            "69": 1095829504.0,
+            "70": 1095872000.0,
+            "71": 1095846912.0,
+            "72": 1095772160.0,
+            "73": 1095873024.0,
+            "74": 1095885824.0,
+            "75": 1095849984.0,
+            "76": 1095870976.0,
+            "77": 1095824896.0,
+            "78": 1095884288.0,
+            "79": 1095855616.0,
+            "80": 1095808000.0,
+            "81": 1095844864.0,
+            "82": 1095790080.0,
+            "83": 1095890944.0,
+            "84": 1095872000.0,
+            "85": 1095839744.0,
+            "86": 1095839232.0,
+            "87": 1095861760.0,
+            "88": 1095849472.0,
+            "89": 1095837696.0,
+            "90": 1095828480.0,
+            "91": 1095883776.0,
+            "92": 1095866880.0,
+            "93": 1095845376.0,
+            "94": 1095854592.0,
+            "95": 1095854080.0,
+            "96": 1095854592.0,
+            "97": 1095863296.0,
+            "98": 1095840256.0,
+            "99": 1095857152.0,
+            "100": 1095894528.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 3605154816.0,
+            "52": 3605154816.0,
+            "53": 3639019008.0,
+            "54": 3639019008.0,
+            "55": 3639019008.0,
+            "56": 3639019008.0,
+            "57": 3639019008.0,
+            "58": 3639019008.0,
+            "59": 3639019008.0,
+            "60": 3639019008.0,
+            "61": 3639019008.0,
+            "62": 3639019008.0,
+            "63": 3639019008.0,
+            "64": 3639019008.0,
+            "65": 3639019008.0,
+            "66": 3639019008.0,
+            "67": 3639019008.0,
+            "68": 3639019008.0,
+            "69": 3639019008.0,
+            "70": 3639019008.0,
+            "71": 3639019008.0,
+            "72": 3639019008.0,
+            "73": 3639019008.0,
+            "74": 3639019008.0,
+            "75": 3639019008.0,
+            "76": 3639019008.0,
+            "77": 3639019008.0,
+            "78": 3639019008.0,
+            "79": 3639019008.0,
+            "80": 3639019008.0,
+            "81": 3639019008.0,
+            "82": 3639019008.0,
+            "83": 3639019008.0,
+            "84": 3639019008.0,
+            "85": 3639019008.0,
+            "86": 3639019008.0,
+            "87": 3639019008.0,
+            "88": 3639019008.0,
+            "89": 3639019008.0,
+            "90": 3639019008.0,
+            "91": 3639019008.0,
+            "92": 3639019008.0,
+            "93": 3639019008.0,
+            "94": 3639019008.0,
+            "95": 3639019008.0,
+            "96": 3639019008.0,
+            "97": 3639019008.0,
+            "98": 3639019008.0,
+            "99": 3639019008.0,
+            "100": 3639019008.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": "nan",
+            "52": 2.63883,
+            "53": 0.31395,
+            "54": 0.31367,
+            "55": 0.29374,
+            "56": 0.30814,
+            "57": 0.28518,
+            "58": 0.2893,
+            "59": 0.29547,
+            "60": 0.29213,
+            "61": 0.27939,
+            "62": 0.28509,
+            "63": 0.28109,
+            "64": 0.28285,
+            "65": 0.27653,
+            "66": 0.27923,
+            "67": 0.27493,
+            "68": 0.28188,
+            "69": 0.2851,
+            "70": 0.28475,
+            "71": 0.28187,
+            "72": 0.28341,
+            "73": 0.26848,
+            "74": 0.27702,
+            "75": 0.29514,
+            "76": 0.26459,
+            "77": 0.27617,
+            "78": 0.27231,
+            "79": 0.28621,
+            "80": 0.27218,
+            "81": 0.27522,
+            "82": 0.27114,
+            "83": 0.26001,
+            "84": 0.26222,
+            "85": 0.27374,
+            "86": 0.27145,
+            "87": 0.28673,
+            "88": 0.27394,
+            "89": 0.26336,
+            "90": 0.28319,
+            "91": 0.26195,
+            "92": 0.26716,
+            "93": 0.26523,
+            "94": 0.26477,
+            "95": 0.26706,
+            "96": 0.2815,
+            "97": 0.27054,
+            "98": 0.28122,
+            "99": 0.27335,
+            "100": 0.27113
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_muon/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_muon/model_config.yaml
index d3e3baa9f14..81b023bd86e 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_muon/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_muon/model_config.yaml
@@ -64,4 +64,4 @@ MODEL_ARGS:
   --muon-momentum: 0.9
   --muon-extra-scale-factor: 0.2
   --muon-scale-mode: spectral
-TEST_TYPE: regular
+TEST_TYPE: ckpt-resume
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_dgx_h100.json
index 038ed2be724..d5ced620365 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_dgx_h100.json
@@ -6,54 +6,54 @@
         "values": {
             "1": 11.06693,
             "2": 11.0602,
-            "3": 10.21173,
-            "4": 9.95255,
-            "5": 10.12502,
-            "6": 8.8231,
-            "7": 9.52825,
-            "8": 8.44297,
-            "9": 7.84977,
-            "10": 7.0728,
-            "11": 9.30154,
-            "12": 9.14531,
-            "13": 7.86583,
-            "14": 8.21069,
-            "15": 8.2169,
-            "16": 8.17413,
-            "17": 8.21514,
-            "18": 7.49348,
-            "19": 8.08414,
-            "20": 7.63479,
-            "21": 7.95116,
-            "22": 7.29475,
-            "23": 7.9358,
-            "24": 7.43073,
-            "25": 8.23819,
-            "26": 7.75508,
-            "27": 7.6991,
-            "28": 7.65492,
-            "29": 7.75272,
-            "30": 7.56401,
-            "31": 7.81794,
-            "32": 6.46781,
-            "33": 7.20433,
-            "34": 7.77611,
-            "35": 7.72648,
-            "36": 6.71848,
-            "37": 8.09106,
-            "38": 7.61823,
-            "39": 7.96665,
-            "40": 7.49555,
-            "41": 7.49366,
-            "42": 6.10456,
-            "43": 7.59158,
-            "44": 7.91315,
-            "45": 6.83253,
-            "46": 7.4064,
-            "47": 7.78787,
-            "48": 7.87227,
-            "49": 7.58424,
-            "50": 6.83739
+            "3": 10.16141,
+            "4": 10.11145,
+            "5": 10.47957,
+            "6": 10.21751,
+            "7": 10.56153,
+            "8": 12.79501,
+            "9": 12.96949,
+            "10": 13.32223,
+            "11": 11.63359,
+            "12": 11.4938,
+            "13": 12.46292,
+            "14": 12.13415,
+            "15": 11.90295,
+            "16": 12.01307,
+            "17": 12.17443,
+            "18": 12.64978,
+            "19": 11.81295,
+            "20": 12.18673,
+            "21": 11.24306,
+            "22": 11.54156,
+            "23": 10.98412,
+            "24": 11.01925,
+            "25": 10.73001,
+            "26": 10.72806,
+            "27": 10.79039,
+            "28": 10.714,
+            "29": 10.73974,
+            "30": 10.75246,
+            "31": 10.68874,
+            "32": 10.65791,
+            "33": 10.81137,
+            "34": 10.79058,
+            "35": 10.75368,
+            "36": 10.64393,
+            "37": 10.87492,
+            "38": 10.90591,
+            "39": 10.78825,
+            "40": 10.75548,
+            "41": 10.8955,
+            "42": 10.70411,
+            "43": 10.66907,
+            "44": 10.72512,
+            "45": 10.54927,
+            "46": 10.46973,
+            "47": 10.66311,
+            "48": 10.62453,
+            "49": 10.61656,
+            "50": 10.21176
         }
     },
     "num-zeros": {
@@ -61,56 +61,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 47165248.0,
-            "2": 46897896.0,
-            "3": 52684328.0,
-            "4": 297102368.0,
-            "5": 569266880.0,
-            "6": 661848704.0,
-            "7": 1027448384.0,
-            "8": 752263424.0,
-            "9": 852974912.0,
-            "10": 683720576.0,
-            "11": 833170624.0,
-            "12": 814312640.0,
-            "13": 639456320.0,
-            "14": 628553664.0,
-            "15": 706814592.0,
-            "16": 848848256.0,
-            "17": 676948992.0,
-            "18": 676681088.0,
-            "19": 892688576.0,
-            "20": 890700864.0,
-            "21": 676293696.0,
-            "22": 701562304.0,
-            "23": 796268224.0,
-            "24": 786414720.0,
-            "25": 667072192.0,
-            "26": 767487552.0,
-            "27": 773408512.0,
-            "28": 758333696.0,
-            "29": 770627840.0,
-            "30": 758410304.0,
-            "31": 644127616.0,
-            "32": 806561088.0,
-            "33": 811820352.0,
-            "34": 780254848.0,
-            "35": 757223808.0,
-            "36": 758778496.0,
-            "37": 753072832.0,
-            "38": 752875328.0,
-            "39": 767575744.0,
-            "40": 760803392.0,
-            "41": 742253440.0,
-            "42": 718278848.0,
-            "43": 676047424.0,
-            "44": 673998592.0,
-            "45": 635196864.0,
-            "46": 629090048.0,
-            "47": 623565376.0,
-            "48": 600849984.0,
-            "49": 578357504.0,
-            "50": 585291904.0
+            "1": 47165216.0,
+            "2": 46897552.0,
+            "3": 52682736.0,
+            "4": 70585808.0,
+            "5": 1850183680.0,
+            "6": 171098656.0,
+            "7": 436105120.0,
+            "8": 1850183680.0,
+            "9": 1850183680.0,
+            "10": 1850183680.0,
+            "11": 1850183680.0,
+            "12": 1850183680.0,
+            "13": 1850183680.0,
+            "14": 1850183680.0,
+            "15": 555857088.0,
+            "16": 1850183680.0,
+            "17": 1850183680.0,
+            "18": 1850183680.0,
+            "19": 886404992.0,
+            "20": 654826944.0,
+            "21": 603993664.0,
+            "22": 726709632.0,
+            "23": 566656896.0,
+            "24": 1850183680.0,
+            "25": 799245696.0,
+            "26": 978252032.0,
+            "27": 1850183680.0,
+            "28": 906183104.0,
+            "29": 1850183680.0,
+            "30": 1850183680.0,
+            "31": 810874112.0,
+            "32": 1850183680.0,
+            "33": 1850183680.0,
+            "34": 553779584.0,
+            "35": 565382400.0,
+            "36": 585787712.0,
+            "37": 627284160.0,
+            "38": 331368192.0,
+            "39": 638619264.0,
+            "40": 1850183680.0,
+            "41": 1850183680.0,
+            "42": 1850183680.0,
+            "43": 1850183680.0,
+            "44": 1850183680.0,
+            "45": 1850183680.0,
+            "46": 1850183680.0,
+            "47": 434842944.0,
+            "48": 1850183680.0,
+            "49": 575219328.0,
+            "50": 1850183680.0
         }
     },
     "mem-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 6208857600.0,
-            "2": 8233667072.0,
-            "3": 8233667072.0,
-            "4": 8233667072.0,
-            "5": 8233667072.0,
-            "6": 8233667072.0,
-            "7": 8233667072.0,
-            "8": 8233667072.0,
-            "9": 8233667072.0,
-            "10": 8233667072.0,
-            "11": 8262763008.0,
-            "12": 8262763008.0,
-            "13": 8262763008.0,
-            "14": 8262763008.0,
-            "15": 8262763008.0,
-            "16": 8273029632.0,
-            "17": 8282915328.0,
-            "18": 8282915328.0,
-            "19": 8284467712.0,
-            "20": 8294910464.0,
-            "21": 8294910464.0,
-            "22": 8303365632.0,
-            "23": 8303365632.0,
-            "24": 8303365632.0,
-            "25": 8303365632.0,
-            "26": 8303365632.0,
-            "27": 8303365632.0,
-            "28": 8303365632.0,
-            "29": 8303365632.0,
-            "30": 8328921600.0,
-            "31": 8328921600.0,
-            "32": 8328921600.0,
-            "33": 8328921600.0,
-            "34": 8342317568.0,
-            "35": 8352083456.0,
-            "36": 8352083456.0,
-            "37": 8352083456.0,
-            "38": 8352083456.0,
-            "39": 8352083456.0,
-            "40": 8352083456.0,
-            "41": 8352083456.0,
-            "42": 8352083456.0,
-            "43": 8352083456.0,
-            "44": 8352083456.0,
-            "45": 8352083456.0,
-            "46": 8352083456.0,
-            "47": 8352083456.0,
-            "48": 8352083456.0,
-            "49": 8352083456.0,
-            "50": 8352083456.0
+            "1": 5283618816.0,
+            "2": 8185453056.0,
+            "3": 8185453056.0,
+            "4": 8185453056.0,
+            "5": 8195318272.0,
+            "6": 8195318272.0,
+            "7": 8195318272.0,
+            "8": 8195318272.0,
+            "9": 8195318272.0,
+            "10": 8195318272.0,
+            "11": 8195318272.0,
+            "12": 8195318272.0,
+            "13": 8195318272.0,
+            "14": 8195318272.0,
+            "15": 8195318272.0,
+            "16": 8199233024.0,
+            "17": 8199233024.0,
+            "18": 8199233024.0,
+            "19": 8199233024.0,
+            "20": 8199233024.0,
+            "21": 8238446080.0,
+            "22": 8238446080.0,
+            "23": 8238446080.0,
+            "24": 8238446080.0,
+            "25": 8247293440.0,
+            "26": 8247293440.0,
+            "27": 8247293440.0,
+            "28": 8250185216.0,
+            "29": 8255527424.0,
+            "30": 8255527424.0,
+            "31": 8255527424.0,
+            "32": 8255527424.0,
+            "33": 8255527424.0,
+            "34": 8255527424.0,
+            "35": 8255527424.0,
+            "36": 8255527424.0,
+            "37": 8255527424.0,
+            "38": 8255527424.0,
+            "39": 8255527424.0,
+            "40": 8255527424.0,
+            "41": 8255527424.0,
+            "42": 8255527424.0,
+            "43": 8255527424.0,
+            "44": 8255527424.0,
+            "45": 8255527424.0,
+            "46": 8255527424.0,
+            "47": 8255527424.0,
+            "48": 8255527424.0,
+            "49": 8255527424.0,
+            "50": 8255527424.0
         }
     },
     "mtp_1 loss": {
@@ -234,54 +234,54 @@
         "values": {
             "1": 11.07401,
             "2": 11.0927,
-            "3": 10.82644,
-            "4": 10.27575,
-            "5": 10.45332,
-            "6": 8.3277,
-            "7": 9.8265,
-            "8": 8.01558,
-            "9": 7.47586,
-            "10": 6.7581,
-            "11": 8.9297,
-            "12": 8.98829,
-            "13": 7.80214,
-            "14": 8.02436,
-            "15": 8.11251,
-            "16": 8.14258,
-            "17": 8.13031,
-            "18": 7.44579,
-            "19": 8.03606,
-            "20": 7.54064,
-            "21": 7.90046,
-            "22": 7.27709,
-            "23": 7.88548,
-            "24": 7.37576,
-            "25": 8.17071,
-            "26": 7.69849,
-            "27": 7.62829,
-            "28": 7.61349,
-            "29": 7.69754,
-            "30": 7.47936,
-            "31": 7.73926,
-            "32": 6.37137,
-            "33": 7.1379,
-            "34": 7.71901,
-            "35": 7.63544,
-            "36": 6.61321,
-            "37": 8.03174,
-            "38": 7.58067,
-            "39": 7.89473,
-            "40": 7.41418,
-            "41": 7.42196,
-            "42": 6.01401,
-            "43": 7.49099,
-            "44": 7.86625,
-            "45": 6.74951,
-            "46": 7.30637,
-            "47": 7.72653,
-            "48": 7.78872,
-            "49": 7.48917,
-            "50": 6.75533
+            "3": 10.83159,
+            "4": 10.61397,
+            "5": 10.85768,
+            "6": 9.79263,
+            "7": 10.90607,
+            "8": 10.19798,
+            "9": 9.82717,
+            "10": 9.23805,
+            "11": 11.0712,
+            "12": 11.11709,
+            "13": 10.03407,
+            "14": 10.27606,
+            "15": 10.73067,
+            "16": 10.91485,
+            "17": 10.76886,
+            "18": 10.49659,
+            "19": 10.96955,
+            "20": 10.45905,
+            "21": 10.91629,
+            "22": 10.05081,
+            "23": 10.44411,
+            "24": 9.74826,
+            "25": 10.81497,
+            "26": 10.38519,
+            "27": 10.31999,
+            "28": 10.27887,
+            "29": 10.40945,
+            "30": 10.20684,
+            "31": 10.54594,
+            "32": 8.85942,
+            "33": 9.75619,
+            "34": 10.56214,
+            "35": 10.59167,
+            "36": 9.30537,
+            "37": 10.59407,
+            "38": 10.2994,
+            "39": 10.69954,
+            "40": 10.37003,
+            "41": 10.248,
+            "42": 8.56376,
+            "43": 10.49224,
+            "44": 10.57211,
+            "45": 9.36238,
+            "46": 10.2179,
+            "47": 10.63449,
+            "48": 10.56697,
+            "49": 10.44093,
+            "50": 9.49252
         }
     },
     "iteration-time": {
@@ -289,56 +289,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 88.9425,
-            "2": 2.91855,
-            "3": 2.58352,
-            "4": 3.73409,
-            "5": 2.63585,
-            "6": 2.48926,
-            "7": 2.27523,
-            "8": 2.50563,
-            "9": 2.45577,
-            "10": 1.90482,
-            "11": 1.96806,
-            "12": 2.42331,
-            "13": 1.88872,
-            "14": 1.89773,
-            "15": 1.90418,
-            "16": 1.885,
-            "17": 1.91181,
-            "18": 1.89194,
-            "19": 1.97889,
-            "20": 1.88063,
-            "21": 1.88612,
-            "22": 1.90981,
-            "23": 1.87053,
-            "24": 1.87293,
-            "25": 1.89611,
-            "26": 1.96035,
-            "27": 1.9067,
-            "28": 1.91982,
-            "29": 1.94441,
-            "30": 1.88208,
-            "31": 1.9521,
-            "32": 1.89063,
-            "33": 1.9571,
-            "34": 1.93481,
-            "35": 1.87558,
-            "36": 1.88538,
-            "37": 1.89041,
-            "38": 1.97023,
-            "39": 1.89001,
-            "40": 1.87859,
-            "41": 1.89949,
-            "42": 1.88775,
-            "43": 1.94805,
-            "44": 1.90575,
-            "45": 1.89185,
-            "46": 1.87259,
-            "47": 1.89396,
-            "48": 1.8747,
-            "49": 1.88874,
-            "50": 1.91915
+            "1": 71.30157,
+            "2": 2.34464,
+            "3": 2.38747,
+            "4": 2.10322,
+            "5": 2.12945,
+            "6": 2.0424,
+            "7": 2.12036,
+            "8": 2.0147,
+            "9": 2.04925,
+            "10": 2.02797,
+            "11": 1.95087,
+            "12": 2.04985,
+            "13": 1.94106,
+            "14": 1.90425,
+            "15": 1.89051,
+            "16": 1.89398,
+            "17": 1.94082,
+            "18": 1.93176,
+            "19": 1.94027,
+            "20": 1.90271,
+            "21": 1.91097,
+            "22": 1.90382,
+            "23": 1.93889,
+            "24": 1.90551,
+            "25": 1.90947,
+            "26": 1.92126,
+            "27": 1.89917,
+            "28": 1.89866,
+            "29": 1.93981,
+            "30": 1.90782,
+            "31": 1.91244,
+            "32": 1.93864,
+            "33": 1.93947,
+            "34": 1.96882,
+            "35": 1.89751,
+            "36": 1.94038,
+            "37": 1.90603,
+            "38": 1.94988,
+            "39": 1.89874,
+            "40": 1.90233,
+            "41": 1.92861,
+            "42": 1.93931,
+            "43": 1.91212,
+            "44": 1.92615,
+            "45": 1.89555,
+            "46": 1.94522,
+            "47": 1.9103,
+            "48": 1.94689,
+            "49": 1.9355,
+            "50": 1.89832
         }
     }
-}
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/model_config.yaml
index 38528836659..a37dd0dc658 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/model_config.yaml
@@ -133,7 +133,7 @@ MODEL_ARGS:
   --overlap-moe-expert-parallel-comm: true
 TEST_TYPE: regular # Usually ckpt-resume, but as a WAR to #513 set to regular
 METRICS:
-  # - "iteration-time"
+  - "iteration-time"
   - "lm loss"
   - "mem-allocated-bytes"
   - "mem-max-allocated-bytes"
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_dgx_h100.json
index 9cc2fa69da7..57848f8130e 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_dgx_h100.json
@@ -6,54 +6,54 @@
         "values": {
             "1": 11.01693,
             "2": 11.06263,
-            "3": 10.17828,
-            "4": 10.86162,
-            "5": 9.8171,
-            "6": 9.10066,
-            "7": 9.61216,
-            "8": 8.39629,
-            "9": 7.79624,
-            "10": 7.15182,
-            "11": 9.06686,
-            "12": 12.41529,
-            "13": 8.05859,
-            "14": 8.25078,
-            "15": 8.25932,
-            "16": 8.33199,
-            "17": 8.33144,
-            "18": 7.58852,
-            "19": 8.19681,
-            "20": 7.68193,
-            "21": 8.00256,
-            "22": 7.37928,
-            "23": 7.95036,
-            "24": 7.52138,
-            "25": 8.32313,
-            "26": 7.80137,
-            "27": 7.73067,
-            "28": 7.70985,
-            "29": 7.77487,
-            "30": 7.57653,
-            "31": 7.85303,
-            "32": 6.5208,
-            "33": 7.2477,
-            "34": 7.80024,
-            "35": 7.74614,
-            "36": 6.73365,
-            "37": 8.154,
-            "38": 7.62714,
-            "39": 7.97924,
-            "40": 7.524,
-            "41": 7.52079,
-            "42": 6.11188,
-            "43": 7.6025,
-            "44": 7.97264,
-            "45": 6.84479,
-            "46": 7.4241,
-            "47": 7.82528,
-            "48": 7.87668,
-            "49": 7.5987,
-            "50": 6.8481
+            "3": 10.08845,
+            "4": 9.73223,
+            "5": 10.41008,
+            "6": 10.46377,
+            "7": 11.62265,
+            "8": 12.30479,
+            "9": 12.258,
+            "10": 12.11321,
+            "11": 11.67717,
+            "12": 11.60724,
+            "13": 11.46408,
+            "14": 11.41026,
+            "15": 11.44828,
+            "16": 11.31999,
+            "17": 11.28503,
+            "18": 11.35547,
+            "19": 11.35205,
+            "20": 11.50757,
+            "21": 11.41181,
+            "22": 11.56383,
+            "23": 11.41906,
+            "24": 11.39788,
+            "25": 11.26438,
+            "26": 11.36733,
+            "27": 11.37099,
+            "28": 11.40035,
+            "29": 11.42808,
+            "30": 11.53613,
+            "31": 11.3981,
+            "32": 12.00058,
+            "33": 11.68213,
+            "34": 11.38046,
+            "35": 11.36734,
+            "36": 11.77291,
+            "37": 11.34584,
+            "38": 11.4654,
+            "39": 11.33231,
+            "40": 11.43538,
+            "41": 11.47405,
+            "42": 12.09241,
+            "43": 11.39968,
+            "44": 11.38762,
+            "45": 11.79356,
+            "46": 11.4469,
+            "47": 11.3507,
+            "48": 11.30787,
+            "49": 11.39251,
+            "50": 11.7264
         }
     },
     "num-zeros": {
@@ -61,56 +61,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 47167760.0,
-            "2": 46900544.0,
-            "3": 84151152.0,
-            "4": 237329488.0,
-            "5": 471710816.0,
-            "6": 558040704.0,
-            "7": 958277696.0,
-            "8": 723945792.0,
-            "9": 812038208.0,
-            "10": 721441280.0,
-            "11": 622437632.0,
-            "12": 556346176.0,
-            "13": 633166464.0,
-            "14": 700920576.0,
-            "15": 766532480.0,
-            "16": 719878656.0,
-            "17": 673785280.0,
-            "18": 733291456.0,
-            "19": 713440768.0,
-            "20": 859244608.0,
-            "21": 836730112.0,
-            "22": 789566720.0,
-            "23": 808848960.0,
-            "24": 644896128.0,
-            "25": 852631104.0,
-            "26": 836696384.0,
-            "27": 550069504.0,
-            "28": 604192832.0,
-            "29": 761193792.0,
-            "30": 758412160.0,
-            "31": 782509568.0,
-            "32": 765664256.0,
-            "33": 745758912.0,
-            "34": 569510656.0,
-            "35": 728914304.0,
-            "36": 699003840.0,
-            "37": 705883072.0,
-            "38": 705682240.0,
-            "39": 685787136.0,
-            "40": 656996352.0,
-            "41": 484325760.0,
-            "42": 633345536.0,
-            "43": 641441984.0,
-            "44": 466413888.0,
-            "45": 427604864.0,
-            "46": 566181184.0,
-            "47": 563795904.0,
-            "48": 421565312.0,
-            "49": 537463040.0,
-            "50": 494058176.0
+            "1": 47167880.0,
+            "2": 46899772.0,
+            "3": 1722086400.0,
+            "4": 1722086400.0,
+            "5": 188597600.0,
+            "6": 120779000.0,
+            "7": 527310080.0,
+            "8": 1722086400.0,
+            "9": 1722086400.0,
+            "10": 321966144.0,
+            "11": 493484608.0,
+            "12": 1722086400.0,
+            "13": 529395136.0,
+            "14": 1722086400.0,
+            "15": 1722086400.0,
+            "16": 723018944.0,
+            "17": 233377744.0,
+            "18": 642084544.0,
+            "19": 1722086400.0,
+            "20": 1722086400.0,
+            "21": 578776704.0,
+            "22": 396416192.0,
+            "23": 506872960.0,
+            "24": 670044160.0,
+            "25": 884090624.0,
+            "26": 912192512.0,
+            "27": 764026112.0,
+            "28": 972234112.0,
+            "29": 915345600.0,
+            "30": 937728768.0,
+            "31": 1722086400.0,
+            "32": 976440512.0,
+            "33": 984833664.0,
+            "34": 802321088.0,
+            "35": 1722086400.0,
+            "36": 931810816.0,
+            "37": 897772032.0,
+            "38": 982505792.0,
+            "39": 704699008.0,
+            "40": 688513344.0,
+            "41": 946725760.0,
+            "42": 1722086400.0,
+            "43": 1722086400.0,
+            "44": 875336384.0,
+            "45": 1722086400.0,
+            "46": 909066432.0,
+            "47": 900409280.0,
+            "48": 890279744.0,
+            "49": 597272192.0,
+            "50": 921883712.0
         }
     },
     "mem-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 4305060864.0,
-            "2": 5850929152.0,
-            "3": 5850929152.0,
-            "4": 5857061888.0,
-            "5": 5857061888.0,
-            "6": 5857061888.0,
-            "7": 5857061888.0,
-            "8": 5857061888.0,
-            "9": 5857061888.0,
-            "10": 5857061888.0,
-            "11": 5857061888.0,
-            "12": 5857061888.0,
-            "13": 5857061888.0,
-            "14": 5857061888.0,
-            "15": 5857061888.0,
-            "16": 5857061888.0,
-            "17": 5857061888.0,
-            "18": 5857061888.0,
-            "19": 5857061888.0,
-            "20": 5857061888.0,
-            "21": 5857061888.0,
-            "22": 5857061888.0,
-            "23": 5857061888.0,
-            "24": 5857061888.0,
-            "25": 5857061888.0,
-            "26": 5857061888.0,
-            "27": 5857061888.0,
-            "28": 5857061888.0,
-            "29": 5857061888.0,
-            "30": 5857061888.0,
-            "31": 5857061888.0,
-            "32": 5857061888.0,
-            "33": 5857061888.0,
-            "34": 5857061888.0,
-            "35": 5857061888.0,
-            "36": 5857061888.0,
-            "37": 5857061888.0,
-            "38": 5857061888.0,
-            "39": 5860414976.0,
-            "40": 5860414976.0,
-            "41": 5860414976.0,
-            "42": 5860414976.0,
-            "43": 5860414976.0,
-            "44": 5860414976.0,
-            "45": 5860414976.0,
-            "46": 5860414976.0,
-            "47": 5860414976.0,
-            "48": 5860414976.0,
-            "49": 5860414976.0,
-            "50": 5860414976.0
+            "1": 4313449472.0,
+            "2": 7108272640.0,
+            "3": 7108272640.0,
+            "4": 7108272640.0,
+            "5": 7119571456.0,
+            "6": 7119571456.0,
+            "7": 7129409024.0,
+            "8": 7158368768.0,
+            "9": 7158368768.0,
+            "10": 7158838784.0,
+            "11": 7202046464.0,
+            "12": 7202046464.0,
+            "13": 7202046464.0,
+            "14": 7202046464.0,
+            "15": 7202046464.0,
+            "16": 7202046464.0,
+            "17": 7202046464.0,
+            "18": 7202046464.0,
+            "19": 7202046464.0,
+            "20": 7202046464.0,
+            "21": 7202046464.0,
+            "22": 7202046464.0,
+            "23": 7202046464.0,
+            "24": 7202046464.0,
+            "25": 7202046464.0,
+            "26": 7202046464.0,
+            "27": 7202046464.0,
+            "28": 7202046464.0,
+            "29": 7202046464.0,
+            "30": 7202046464.0,
+            "31": 7202046464.0,
+            "32": 7202046464.0,
+            "33": 7202046464.0,
+            "34": 7202046464.0,
+            "35": 7202046464.0,
+            "36": 7202046464.0,
+            "37": 7202046464.0,
+            "38": 7202046464.0,
+            "39": 7202046464.0,
+            "40": 7202046464.0,
+            "41": 7202046464.0,
+            "42": 7202046464.0,
+            "43": 7202046464.0,
+            "44": 7202046464.0,
+            "45": 7202046464.0,
+            "46": 7202046464.0,
+            "47": 7202046464.0,
+            "48": 7202046464.0,
+            "49": 7202046464.0,
+            "50": 7202046464.0
         }
     },
     "iteration-time": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 92.74621,
-            "2": 3.05215,
-            "3": 3.87635,
-            "4": 2.96691,
-            "5": 3.09601,
-            "6": 1.94793,
-            "7": 2.58283,
-            "8": 2.00403,
-            "9": 1.96081,
-            "10": 1.955,
-            "11": 1.95251,
-            "12": 2.07845,
-            "13": 2.01952,
-            "14": 1.96206,
-            "15": 1.96234,
-            "16": 1.97406,
-            "17": 2.0423,
-            "18": 1.96841,
-            "19": 1.95796,
-            "20": 2.48713,
-            "21": 2.55338,
-            "22": 1.97633,
-            "23": 1.95723,
-            "24": 1.98425,
-            "25": 1.95827,
-            "26": 1.95919,
-            "27": 1.95629,
-            "28": 1.96685,
-            "29": 1.95089,
-            "30": 2.55672,
-            "31": 1.93918,
-            "32": 1.95892,
-            "33": 1.95987,
-            "34": 1.95394,
-            "35": 1.96053,
-            "36": 1.96074,
-            "37": 1.96542,
-            "38": 1.97304,
-            "39": 2.00073,
-            "40": 1.98223,
-            "41": 1.95986,
-            "42": 1.96976,
-            "43": 1.94793,
-            "44": 1.95897,
-            "45": 1.96904,
-            "46": 1.96519,
-            "47": 1.95996,
-            "48": 1.96564,
-            "49": 1.96485,
-            "50": 1.97038
+            "1": 90.31742,
+            "2": 2.522,
+            "3": 2.42029,
+            "4": 2.06158,
+            "5": 2.28893,
+            "6": 3.01447,
+            "7": 3.96389,
+            "8": 3.20878,
+            "9": 2.43815,
+            "10": 1.94158,
+            "11": 1.95031,
+            "12": 1.98877,
+            "13": 1.92978,
+            "14": 1.93494,
+            "15": 1.92559,
+            "16": 1.95925,
+            "17": 2.59672,
+            "18": 1.94175,
+            "19": 1.92388,
+            "20": 1.92283,
+            "21": 1.92623,
+            "22": 1.92561,
+            "23": 1.92611,
+            "24": 1.94339,
+            "25": 2.02939,
+            "26": 1.93181,
+            "27": 1.92433,
+            "28": 1.96842,
+            "29": 1.92479,
+            "30": 1.93949,
+            "31": 1.96151,
+            "32": 1.93071,
+            "33": 1.92266,
+            "34": 1.92587,
+            "35": 1.92251,
+            "36": 1.92324,
+            "37": 1.93141,
+            "38": 1.92431,
+            "39": 1.93685,
+            "40": 1.92592,
+            "41": 1.92962,
+            "42": 1.92986,
+            "43": 1.92956,
+            "44": 1.93019,
+            "45": 1.93251,
+            "46": 1.92915,
+            "47": 1.93714,
+            "48": 1.93564,
+            "49": 1.94035,
+            "50": 1.93018
         }
     }
-}
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/model_config.yaml
index d1fcd8fd4b7..da78378ddae 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/model_config.yaml
@@ -8,7 +8,7 @@ ENV_VARS:
   NVTE_CPU_OFFLOAD_V1: 1
   NVTE_FUSED_ATTN: 0
   NCCL_ALGO: ^NVLS
-  CUBLAS_WORKSPACE_CONFIG: ':4096:8'
+  CUBLAS_WORKSPACE_CONFIG: ":4096:8"
 MODEL_ARGS:
   # Distributed args
   --distributed-timeout-minutes: 60
@@ -129,7 +129,6 @@ MODEL_ARGS:
   --exit-interval: 50
 TEST_TYPE: regular # Usually ckpt-resume, but as a WAR to #513 set to regular
 METRICS:
-  # - "iteration-time"
   - "lm loss"
   - "mem-allocated-bytes"
   - "mem-max-allocated-bytes"
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json
index 68b72267704..dc836c3d699 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json
@@ -4,106 +4,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 10.81442,
-            "2": 10.81882,
-            "3": 10.81551,
-            "4": 10.80292,
-            "5": 10.85144,
+            "1": 10.81455,
+            "2": 10.81846,
+            "3": 10.81528,
+            "4": 10.80297,
+            "5": 10.8513,
             "6": 10.85011,
-            "7": 10.83867,
-            "8": 10.83952,
-            "9": 10.82213,
-            "10": 10.77746,
-            "11": 10.86426,
-            "12": 10.83689,
-            "13": 10.85831,
-            "14": 10.86354,
-            "15": 10.79774,
-            "16": 10.79537,
-            "17": 10.77155,
-            "18": 10.78908,
-            "19": 10.78343,
-            "20": 10.71629,
-            "21": 10.6835,
-            "22": 10.53061,
-            "23": 10.69849,
-            "24": 10.58571,
-            "25": 10.52397,
-            "26": 10.58327,
-            "27": 10.60963,
-            "28": 10.57207,
-            "29": 10.59012,
-            "30": 10.35613,
-            "31": 10.09392,
-            "32": 10.45887,
-            "33": 10.45644,
-            "34": 10.20494,
-            "35": 10.26735,
-            "36": 10.22333,
-            "37": 10.35299,
-            "38": 10.19476,
-            "39": 10.41731,
-            "40": 10.08948,
-            "41": 10.12721,
-            "42": 10.21207,
-            "43": 9.8313,
-            "44": 9.96936,
-            "45": 9.83601,
-            "46": 9.81666,
-            "47": 10.1539,
-            "48": 9.85279,
-            "49": 9.53447,
-            "50": 9.91909,
-            "51": 9.85364,
-            "52": 9.74286,
-            "53": 10.07155,
-            "54": 9.96279,
-            "55": 9.88223,
-            "56": 9.63465,
-            "57": 9.48633,
-            "58": 9.84878,
-            "59": 9.58904,
-            "60": 9.51094,
-            "61": 9.7032,
-            "62": 9.99637,
-            "63": 9.40044,
-            "64": 9.78465,
-            "65": 8.95366,
-            "66": 9.71808,
-            "67": 9.36931,
-            "68": 9.79818,
-            "69": 9.79667,
-            "70": 9.74899,
-            "71": 9.63213,
-            "72": 9.59956,
-            "73": 9.50308,
-            "74": 8.95202,
-            "75": 9.43084,
-            "76": 9.09067,
-            "77": 10.08102,
-            "78": 9.73521,
-            "79": 9.38853,
+            "7": 10.83843,
+            "8": 10.83961,
+            "9": 10.82224,
+            "10": 10.77788,
+            "11": 10.86443,
+            "12": 10.83746,
+            "13": 10.85841,
+            "14": 10.86315,
+            "15": 10.79766,
+            "16": 10.79525,
+            "17": 10.77133,
+            "18": 10.78938,
+            "19": 10.78311,
+            "20": 10.71655,
+            "21": 10.68376,
+            "22": 10.53038,
+            "23": 10.69869,
+            "24": 10.5858,
+            "25": 10.52379,
+            "26": 10.58281,
+            "27": 10.6097,
+            "28": 10.57173,
+            "29": 10.59005,
+            "30": 10.35671,
+            "31": 10.09391,
+            "32": 10.45878,
+            "33": 10.45658,
+            "34": 10.20481,
+            "35": 10.26727,
+            "36": 10.22341,
+            "37": 10.35319,
+            "38": 10.19446,
+            "39": 10.41712,
+            "40": 10.08932,
+            "41": 10.12772,
+            "42": 10.21193,
+            "43": 9.83111,
+            "44": 9.96933,
+            "45": 9.83615,
+            "46": 9.81673,
+            "47": 10.15426,
+            "48": 9.85308,
+            "49": 9.53436,
+            "50": 9.91912,
+            "51": 9.85363,
+            "52": 9.74288,
+            "53": 10.07163,
+            "54": 9.96275,
+            "55": 9.88233,
+            "56": 9.63455,
+            "57": 9.48649,
+            "58": 9.84879,
+            "59": 9.589,
+            "60": 9.5109,
+            "61": 9.703,
+            "62": 9.99634,
+            "63": 9.40054,
+            "64": 9.78477,
+            "65": 8.95365,
+            "66": 9.71813,
+            "67": 9.36915,
+            "68": 9.79814,
+            "69": 9.79674,
+            "70": 9.74886,
+            "71": 9.63185,
+            "72": 9.59951,
+            "73": 9.50305,
+            "74": 8.95217,
+            "75": 9.43098,
+            "76": 9.09068,
+            "77": 10.08086,
+            "78": 9.7353,
+            "79": 9.38859,
             "80": 9.41418,
-            "81": 9.48403,
-            "82": 9.70907,
-            "83": 9.3152,
-            "84": 9.41838,
-            "85": 9.62222,
-            "86": 9.07945,
-            "87": 9.59202,
-            "88": 9.74953,
-            "89": 9.60441,
-            "90": 9.82577,
-            "91": 9.34232,
-            "92": 9.35837,
-            "93": 9.07969,
-            "94": 8.82793,
-            "95": 9.50864,
-            "96": 9.52117,
-            "97": 9.30605,
-            "98": 9.6658,
-            "99": 8.87716,
-            "100": 9.38997
+            "81": 9.48423,
+            "82": 9.70903,
+            "83": 9.3151,
+            "84": 9.41846,
+            "85": 9.62239,
+            "86": 9.07953,
+            "87": 9.59204,
+            "88": 9.74948,
+            "89": 9.60436,
+            "90": 9.82573,
+            "91": 9.34231,
+            "92": 9.35857,
+            "93": 9.07976,
+            "94": 8.82788,
+            "95": 9.50877,
+            "96": 9.52129,
+            "97": 9.30597,
+            "98": 9.66586,
+            "99": 8.87711,
+            "100": 9.38978
         }
     },
     "num-zeros": {
@@ -111,106 +111,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 5488.0,
-            "2": 5704.0,
-            "3": 5788.0,
-            "4": 5853.0,
-            "5": 6401.0,
-            "6": 6686.0,
-            "7": 5949.0,
-            "8": 5811.0,
-            "9": 6280.0,
-            "10": 5192.0,
-            "11": 6645.0,
-            "12": 6193.0,
-            "13": 6525.0,
-            "14": 6487.0,
-            "15": 6258.0,
-            "16": 6261.0,
-            "17": 6080.0,
-            "18": 5901.0,
-            "19": 6228.0,
-            "20": 5713.0,
-            "21": 6265.0,
-            "22": 5788.0,
-            "23": 6618.0,
-            "24": 6159.0,
-            "25": 5674.0,
-            "26": 6218.0,
-            "27": 6180.0,
-            "28": 6802.0,
-            "29": 7006.0,
-            "30": 6195.0,
-            "31": 5847.0,
-            "32": 6680.0,
-            "33": 7327.0,
-            "34": 6433.0,
-            "35": 6593.0,
-            "36": 6717.0,
-            "37": 7545.0,
-            "38": 7130.0,
-            "39": 7928.0,
-            "40": 7233.0,
-            "41": 7093.0,
-            "42": 7653.0,
-            "43": 7136.0,
-            "44": 7113.0,
-            "45": 7167.0,
-            "46": 7435.0,
-            "47": 7501.0,
-            "48": 7648.0,
-            "49": 7520.0,
-            "50": 7701.0,
-            "51": 7847.0,
-            "52": 7828.0,
-            "53": 8765.0,
-            "54": 8799.0,
-            "55": 7683.0,
-            "56": 7972.0,
-            "57": 7642.0,
-            "58": 8419.0,
-            "59": 8276.0,
-            "60": 7917.0,
-            "61": 8598.0,
-            "62": 8394.0,
-            "63": 7896.0,
-            "64": 9047.0,
-            "65": 8280.0,
-            "66": 9315.0,
-            "67": 8277.0,
-            "68": 8341.0,
-            "69": 8737.0,
-            "70": 9764.0,
-            "71": 9050.0,
-            "72": 9036.0,
-            "73": 9076.0,
-            "74": 6969.0,
-            "75": 7833.0,
-            "76": 8450.0,
-            "77": 13505.0,
-            "78": 9634.0,
-            "79": 13982.0,
-            "80": 11548.0,
-            "81": 10035.0,
-            "82": 9732.0,
-            "83": 9037.0,
-            "84": 9522.0,
-            "85": 46479.0,
-            "86": 8626.0,
-            "87": 11964.0,
-            "88": 9637.0,
+            "1": 5566.0,
+            "2": 5749.0,
+            "3": 5881.0,
+            "4": 5840.0,
+            "5": 6476.0,
+            "6": 6425.0,
+            "7": 5900.0,
+            "8": 5783.0,
+            "9": 6426.0,
+            "10": 5252.0,
+            "11": 6722.0,
+            "12": 6169.0,
+            "13": 6556.0,
+            "14": 6524.0,
+            "15": 6116.0,
+            "16": 6245.0,
+            "17": 6139.0,
+            "18": 5888.0,
+            "19": 6375.0,
+            "20": 5773.0,
+            "21": 6188.0,
+            "22": 5742.0,
+            "23": 6768.0,
+            "24": 6000.0,
+            "25": 5852.0,
+            "26": 6285.0,
+            "27": 6357.0,
+            "28": 6586.0,
+            "29": 6742.0,
+            "30": 6214.0,
+            "31": 5775.0,
+            "32": 6746.0,
+            "33": 7205.0,
+            "34": 6344.0,
+            "35": 6686.0,
+            "36": 6743.0,
+            "37": 7281.0,
+            "38": 7228.0,
+            "39": 7810.0,
+            "40": 7116.0,
+            "41": 6902.0,
+            "42": 7809.0,
+            "43": 7110.0,
+            "44": 7040.0,
+            "45": 7058.0,
+            "46": 7292.0,
+            "47": 7813.0,
+            "48": 7672.0,
+            "49": 7601.0,
+            "50": 7605.0,
+            "51": 8105.0,
+            "52": 7792.0,
+            "53": 8870.0,
+            "54": 8700.0,
+            "55": 7685.0,
+            "56": 7975.0,
+            "57": 7544.0,
+            "58": 8539.0,
+            "59": 8275.0,
+            "60": 7822.0,
+            "61": 8316.0,
+            "62": 8493.0,
+            "63": 7748.0,
+            "64": 8801.0,
+            "65": 8269.0,
+            "66": 9209.0,
+            "67": 8382.0,
+            "68": 8362.0,
+            "69": 8644.0,
+            "70": 9785.0,
+            "71": 9060.0,
+            "72": 8909.0,
+            "73": 9217.0,
+            "74": 6949.0,
+            "75": 7960.0,
+            "76": 8489.0,
+            "77": 12484.0,
+            "78": 9598.0,
+            "79": 12984.0,
+            "80": 11398.0,
+            "81": 10221.0,
+            "82": 9615.0,
+            "83": 62741.0,
+            "84": 9936.0,
+            "85": 46541.0,
+            "86": 8528.0,
+            "87": 14916.0,
+            "88": 9710.0,
             "89": 10273.0,
-            "90": 11256.0,
-            "91": 8811.0,
-            "92": 9218.0,
-            "93": 8281.0,
-            "94": 9390.0,
-            "95": 9376.0,
-            "96": 13248.0,
-            "97": 8945.0,
-            "98": 10682.0,
-            "99": 15485.0,
-            "100": 9101.0
+            "90": 11178.0,
+            "91": 8856.0,
+            "92": 9337.0,
+            "93": 8404.0,
+            "94": 9649.0,
+            "95": 9657.0,
+            "96": 13226.0,
+            "97": 9093.0,
+            "98": 10575.0,
+            "99": 15320.0,
+            "100": 9363.0
         }
     },
     "mem-allocated-bytes": {
@@ -218,106 +218,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 628059136.0,
-            "2": 628060160.0,
-            "3": 628060160.0,
-            "4": 628060160.0,
-            "5": 628060160.0,
-            "6": 628060160.0,
-            "7": 628060160.0,
-            "8": 628060160.0,
-            "9": 628060160.0,
-            "10": 628060160.0,
-            "11": 628060160.0,
-            "12": 628060160.0,
-            "13": 628060160.0,
-            "14": 628060160.0,
-            "15": 628060160.0,
-            "16": 628060160.0,
-            "17": 628060160.0,
-            "18": 628060160.0,
-            "19": 628060160.0,
-            "20": 628060160.0,
-            "21": 628060160.0,
-            "22": 628060160.0,
-            "23": 628060160.0,
-            "24": 628060160.0,
-            "25": 628060160.0,
-            "26": 628060160.0,
-            "27": 628060160.0,
-            "28": 628060160.0,
-            "29": 628060160.0,
-            "30": 628060160.0,
-            "31": 628060160.0,
-            "32": 628060160.0,
-            "33": 628060160.0,
-            "34": 628060160.0,
-            "35": 628060160.0,
-            "36": 628060160.0,
-            "37": 628060160.0,
-            "38": 628060160.0,
-            "39": 628060160.0,
-            "40": 628060160.0,
-            "41": 628060160.0,
-            "42": 628060160.0,
-            "43": 628060160.0,
-            "44": 628060160.0,
-            "45": 628060160.0,
-            "46": 628060160.0,
-            "47": 628060160.0,
-            "48": 628060160.0,
-            "49": 628060160.0,
-            "50": 628060160.0,
-            "51": 628060160.0,
-            "52": 628060160.0,
-            "53": 628060160.0,
-            "54": 628060160.0,
-            "55": 628060160.0,
-            "56": 628060160.0,
-            "57": 628060160.0,
-            "58": 628060160.0,
-            "59": 628060160.0,
-            "60": 628060160.0,
-            "61": 628060160.0,
-            "62": 628060160.0,
-            "63": 628060160.0,
-            "64": 628060160.0,
-            "65": 628060160.0,
-            "66": 628060160.0,
-            "67": 628060160.0,
-            "68": 628060160.0,
-            "69": 628060160.0,
-            "70": 628060160.0,
-            "71": 628060160.0,
-            "72": 628060160.0,
-            "73": 628060160.0,
-            "74": 628060160.0,
-            "75": 628060160.0,
-            "76": 628060160.0,
-            "77": 628060160.0,
-            "78": 628060160.0,
-            "79": 628060160.0,
-            "80": 628060160.0,
-            "81": 628060160.0,
-            "82": 628060160.0,
-            "83": 628060160.0,
-            "84": 628060160.0,
-            "85": 628060160.0,
-            "86": 628060160.0,
-            "87": 628060160.0,
-            "88": 628060160.0,
-            "89": 628060160.0,
-            "90": 628060160.0,
-            "91": 628060160.0,
-            "92": 628060160.0,
-            "93": 628060160.0,
-            "94": 628060160.0,
-            "95": 628060160.0,
-            "96": 628060160.0,
-            "97": 628060160.0,
-            "98": 628060160.0,
-            "99": 628060160.0,
-            "100": 628060160.0
+            "1": 628645888.0,
+            "2": 628646912.0,
+            "3": 628646912.0,
+            "4": 628646912.0,
+            "5": 628646912.0,
+            "6": 628646912.0,
+            "7": 628646912.0,
+            "8": 628646912.0,
+            "9": 628646912.0,
+            "10": 628646912.0,
+            "11": 628646912.0,
+            "12": 628646912.0,
+            "13": 628646912.0,
+            "14": 628646912.0,
+            "15": 628646912.0,
+            "16": 628646912.0,
+            "17": 628646912.0,
+            "18": 628646912.0,
+            "19": 628646912.0,
+            "20": 628646912.0,
+            "21": 628646912.0,
+            "22": 628646912.0,
+            "23": 628646912.0,
+            "24": 628646912.0,
+            "25": 628646912.0,
+            "26": 628646912.0,
+            "27": 628646912.0,
+            "28": 628646912.0,
+            "29": 628646912.0,
+            "30": 628646912.0,
+            "31": 628646912.0,
+            "32": 628646912.0,
+            "33": 628646912.0,
+            "34": 628646912.0,
+            "35": 628646912.0,
+            "36": 628646912.0,
+            "37": 628646912.0,
+            "38": 628646912.0,
+            "39": 628646912.0,
+            "40": 628646912.0,
+            "41": 628646912.0,
+            "42": 628646912.0,
+            "43": 628646912.0,
+            "44": 628646912.0,
+            "45": 628646912.0,
+            "46": 628646912.0,
+            "47": 628646912.0,
+            "48": 628646912.0,
+            "49": 628646912.0,
+            "50": 628646912.0,
+            "51": 628646912.0,
+            "52": 628646912.0,
+            "53": 628646912.0,
+            "54": 628646912.0,
+            "55": 628646912.0,
+            "56": 628646912.0,
+            "57": 628646912.0,
+            "58": 628646912.0,
+            "59": 628646912.0,
+            "60": 628646912.0,
+            "61": 628646912.0,
+            "62": 628646912.0,
+            "63": 628646912.0,
+            "64": 628646912.0,
+            "65": 628646912.0,
+            "66": 628646912.0,
+            "67": 628646912.0,
+            "68": 628646912.0,
+            "69": 628646912.0,
+            "70": 628646912.0,
+            "71": 628646912.0,
+            "72": 628646912.0,
+            "73": 628646912.0,
+            "74": 628646912.0,
+            "75": 628646912.0,
+            "76": 628646912.0,
+            "77": 628646912.0,
+            "78": 628646912.0,
+            "79": 628646912.0,
+            "80": 628646912.0,
+            "81": 628646912.0,
+            "82": 628646912.0,
+            "83": 628646912.0,
+            "84": 628646912.0,
+            "85": 628646912.0,
+            "86": 628646912.0,
+            "87": 628646912.0,
+            "88": 628646912.0,
+            "89": 628646912.0,
+            "90": 628646912.0,
+            "91": 628646912.0,
+            "92": 628646912.0,
+            "93": 628646912.0,
+            "94": 628646912.0,
+            "95": 628646912.0,
+            "96": 628646912.0,
+            "97": 628646912.0,
+            "98": 628646912.0,
+            "99": 628646912.0,
+            "100": 628646912.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -325,106 +325,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 966226944.0,
-            "2": 1135178752.0,
-            "3": 1135178752.0,
-            "4": 1142154752.0,
-            "5": 1142154752.0,
-            "6": 1142154752.0,
-            "7": 1142154752.0,
-            "8": 1142154752.0,
-            "9": 1142154752.0,
-            "10": 1142154752.0,
-            "11": 1142154752.0,
-            "12": 1142154752.0,
-            "13": 1142154752.0,
-            "14": 1142154752.0,
-            "15": 1142154752.0,
-            "16": 1142154752.0,
-            "17": 1142154752.0,
-            "18": 1142154752.0,
-            "19": 1142154752.0,
-            "20": 1142154752.0,
-            "21": 1142154752.0,
-            "22": 1142154752.0,
-            "23": 1142154752.0,
-            "24": 1142154752.0,
-            "25": 1142154752.0,
-            "26": 1142154752.0,
-            "27": 1142154752.0,
-            "28": 1142154752.0,
-            "29": 1142154752.0,
-            "30": 1142154752.0,
-            "31": 1142154752.0,
-            "32": 1142154752.0,
-            "33": 1142154752.0,
-            "34": 1142154752.0,
-            "35": 1142154752.0,
-            "36": 1142154752.0,
-            "37": 1142154752.0,
-            "38": 1142154752.0,
-            "39": 1142154752.0,
-            "40": 1142154752.0,
-            "41": 1142154752.0,
-            "42": 1142154752.0,
-            "43": 1142154752.0,
-            "44": 1142154752.0,
-            "45": 1142154752.0,
-            "46": 1142154752.0,
-            "47": 1142154752.0,
-            "48": 1142154752.0,
-            "49": 1142154752.0,
-            "50": 1142154752.0,
-            "51": 1142154752.0,
-            "52": 1142154752.0,
-            "53": 1142154752.0,
-            "54": 1142154752.0,
-            "55": 1142154752.0,
-            "56": 1142154752.0,
-            "57": 1142154752.0,
-            "58": 1142154752.0,
-            "59": 1142154752.0,
-            "60": 1142154752.0,
-            "61": 1145444352.0,
-            "62": 1145444352.0,
-            "63": 1145444352.0,
-            "64": 1145444352.0,
-            "65": 1145444352.0,
-            "66": 1145444352.0,
-            "67": 1145444352.0,
-            "68": 1145444352.0,
-            "69": 1145444352.0,
-            "70": 1145444352.0,
-            "71": 1145444352.0,
-            "72": 1145444352.0,
-            "73": 1145444352.0,
-            "74": 1145444352.0,
-            "75": 1145444352.0,
-            "76": 1149560320.0,
-            "77": 1149560320.0,
-            "78": 1149560320.0,
-            "79": 1149560320.0,
-            "80": 1149560320.0,
-            "81": 1149560320.0,
-            "82": 1149560320.0,
-            "83": 1149560320.0,
-            "84": 1149560320.0,
-            "85": 1149560320.0,
-            "86": 1149560320.0,
-            "87": 1149560320.0,
-            "88": 1149560320.0,
-            "89": 1149560320.0,
-            "90": 1149560320.0,
-            "91": 1149560320.0,
-            "92": 1149560320.0,
-            "93": 1149560320.0,
-            "94": 1149560320.0,
-            "95": 1149560320.0,
-            "96": 1149560320.0,
-            "97": 1149560320.0,
-            "98": 1149560320.0,
-            "99": 1149560320.0,
-            "100": 1149560320.0
+            "1": 982203392.0,
+            "2": 1149396992.0,
+            "3": 1149396992.0,
+            "4": 1155475456.0,
+            "5": 1155475456.0,
+            "6": 1155475456.0,
+            "7": 1155475456.0,
+            "8": 1155475456.0,
+            "9": 1155475456.0,
+            "10": 1155475456.0,
+            "11": 1155475456.0,
+            "12": 1155475456.0,
+            "13": 1155475456.0,
+            "14": 1155475456.0,
+            "15": 1155475456.0,
+            "16": 1155475456.0,
+            "17": 1155475456.0,
+            "18": 1155475456.0,
+            "19": 1155475456.0,
+            "20": 1155475456.0,
+            "21": 1155475456.0,
+            "22": 1155475456.0,
+            "23": 1155475456.0,
+            "24": 1155475456.0,
+            "25": 1155475456.0,
+            "26": 1155475456.0,
+            "27": 1155475456.0,
+            "28": 1155475456.0,
+            "29": 1155475456.0,
+            "30": 1155475456.0,
+            "31": 1155475456.0,
+            "32": 1155475456.0,
+            "33": 1155475456.0,
+            "34": 1155475456.0,
+            "35": 1155475456.0,
+            "36": 1155475456.0,
+            "37": 1155475456.0,
+            "38": 1155475456.0,
+            "39": 1155475456.0,
+            "40": 1155475456.0,
+            "41": 1155475456.0,
+            "42": 1155475456.0,
+            "43": 1155475456.0,
+            "44": 1155475456.0,
+            "45": 1155475456.0,
+            "46": 1155475456.0,
+            "47": 1155475456.0,
+            "48": 1155475456.0,
+            "49": 1155475456.0,
+            "50": 1155475456.0,
+            "51": 1155475456.0,
+            "52": 1155475456.0,
+            "53": 1155475456.0,
+            "54": 1155475456.0,
+            "55": 1155475456.0,
+            "56": 1155475456.0,
+            "57": 1155475456.0,
+            "58": 1155475456.0,
+            "59": 1155475456.0,
+            "60": 1155975680.0,
+            "61": 1159303168.0,
+            "62": 1159303168.0,
+            "63": 1159303168.0,
+            "64": 1159303168.0,
+            "65": 1159303168.0,
+            "66": 1159303168.0,
+            "67": 1159303168.0,
+            "68": 1159303168.0,
+            "69": 1159303168.0,
+            "70": 1159303168.0,
+            "71": 1159303168.0,
+            "72": 1159303168.0,
+            "73": 1159303168.0,
+            "74": 1159303168.0,
+            "75": 1159303168.0,
+            "76": 1164697088.0,
+            "77": 1164697088.0,
+            "78": 1164697088.0,
+            "79": 1164697088.0,
+            "80": 1164697088.0,
+            "81": 1164697088.0,
+            "82": 1164697088.0,
+            "83": 1164697088.0,
+            "84": 1164697088.0,
+            "85": 1164697088.0,
+            "86": 1164697088.0,
+            "87": 1164697088.0,
+            "88": 1164697088.0,
+            "89": 1164697088.0,
+            "90": 1164697088.0,
+            "91": 1164697088.0,
+            "92": 1164697088.0,
+            "93": 1164697088.0,
+            "94": 1164697088.0,
+            "95": 1164697088.0,
+            "96": 1164697088.0,
+            "97": 1164697088.0,
+            "98": 1164697088.0,
+            "99": 1164697088.0,
+            "100": 1164697088.0
         }
     },
     "iteration-time": {
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 20.38736,
-            "2": 0.68138,
-            "3": 0.62881,
-            "4": 0.61692,
-            "5": 0.61365,
-            "6": 0.60735,
-            "7": 0.60006,
-            "8": 0.59897,
-            "9": 0.59763,
-            "10": 0.6122,
-            "11": 0.59106,
-            "12": 0.59749,
-            "13": 0.60001,
-            "14": 0.58446,
-            "15": 0.57929,
-            "16": 0.58508,
-            "17": 0.5725,
-            "18": 0.57386,
-            "19": 0.57617,
-            "20": 0.57081,
-            "21": 0.57614,
-            "22": 0.57046,
-            "23": 0.57731,
-            "24": 0.56893,
-            "25": 0.58004,
-            "26": 0.56911,
-            "27": 0.60575,
-            "28": 0.61474,
-            "29": 0.58874,
-            "30": 0.57969,
-            "31": 0.57737,
-            "32": 0.58556,
-            "33": 0.5704,
-            "34": 0.57592,
-            "35": 0.58241,
-            "36": 0.57697,
-            "37": 0.57978,
-            "38": 0.57647,
-            "39": 0.56977,
-            "40": 0.58017,
-            "41": 0.57153,
-            "42": 0.57267,
-            "43": 0.5881,
-            "44": 0.57211,
-            "45": 0.59552,
-            "46": 0.56308,
-            "47": 0.5736,
-            "48": 0.58403,
-            "49": 0.57693,
-            "50": 0.57016,
-            "51": 0.57233,
-            "52": 0.55871,
-            "53": 0.5593,
-            "54": 0.55755,
-            "55": 0.56057,
-            "56": 0.56649,
-            "57": 0.56057,
-            "58": 0.56658,
-            "59": 0.55825,
-            "60": 0.57038,
-            "61": 0.5563,
-            "62": 0.56031,
-            "63": 0.56901,
-            "64": 0.56097,
-            "65": 0.56153,
-            "66": 0.56761,
-            "67": 0.5785,
-            "68": 0.57341,
-            "69": 0.57139,
-            "70": 0.56231,
-            "71": 0.55874,
-            "72": 0.55834,
-            "73": 0.55824,
-            "74": 0.5552,
-            "75": 0.5593,
-            "76": 0.56038,
-            "77": 0.56527,
-            "78": 0.56728,
-            "79": 0.56424,
-            "80": 0.55564,
-            "81": 0.55955,
-            "82": 0.55867,
-            "83": 0.56254,
-            "84": 0.55754,
-            "85": 0.55409,
-            "86": 0.55901,
-            "87": 0.55904,
-            "88": 0.57097,
-            "89": 0.5735,
-            "90": 0.55808,
-            "91": 0.55819,
-            "92": 0.58224,
-            "93": 0.55845,
-            "94": 0.56512,
-            "95": 0.5709,
-            "96": 0.56099,
-            "97": 0.56779,
-            "98": 0.55446,
-            "99": 0.56053,
-            "100": 0.56338
+            "1": 19.23269,
+            "2": 0.72886,
+            "3": 0.65505,
+            "4": 0.57926,
+            "5": 0.56473,
+            "6": 0.56262,
+            "7": 0.55541,
+            "8": 0.55169,
+            "9": 0.54588,
+            "10": 0.54513,
+            "11": 0.54209,
+            "12": 0.55074,
+            "13": 0.54861,
+            "14": 0.54825,
+            "15": 0.54517,
+            "16": 0.54378,
+            "17": 0.54038,
+            "18": 0.53418,
+            "19": 0.54272,
+            "20": 0.53786,
+            "21": 0.5453,
+            "22": 0.53544,
+            "23": 0.5385,
+            "24": 0.5306,
+            "25": 0.53752,
+            "26": 0.53028,
+            "27": 1.14331,
+            "28": 0.55476,
+            "29": 0.55192,
+            "30": 0.53922,
+            "31": 0.53776,
+            "32": 0.53422,
+            "33": 0.53153,
+            "34": 0.53781,
+            "35": 0.53428,
+            "36": 0.5321,
+            "37": 0.53103,
+            "38": 0.53328,
+            "39": 0.53189,
+            "40": 1.26265,
+            "41": 0.53531,
+            "42": 0.53252,
+            "43": 0.53665,
+            "44": 0.88396,
+            "45": 0.53586,
+            "46": 0.89593,
+            "47": 0.53907,
+            "48": 0.5309,
+            "49": 0.53767,
+            "50": 0.53491,
+            "51": 0.55263,
+            "52": 0.53343,
+            "53": 0.53673,
+            "54": 0.53859,
+            "55": 0.5329,
+            "56": 0.52954,
+            "57": 0.53085,
+            "58": 0.53458,
+            "59": 0.53132,
+            "60": 0.53967,
+            "61": 0.53205,
+            "62": 0.53559,
+            "63": 0.53393,
+            "64": 0.53143,
+            "65": 0.5339,
+            "66": 0.53358,
+            "67": 0.53117,
+            "68": 0.53709,
+            "69": 0.53768,
+            "70": 0.53628,
+            "71": 0.53275,
+            "72": 0.54058,
+            "73": 0.53091,
+            "74": 0.53069,
+            "75": 0.53307,
+            "76": 0.53389,
+            "77": 0.53403,
+            "78": 0.53188,
+            "79": 0.53173,
+            "80": 0.532,
+            "81": 0.53145,
+            "82": 0.5358,
+            "83": 0.53475,
+            "84": 0.5323,
+            "85": 0.54048,
+            "86": 0.53766,
+            "87": 0.53212,
+            "88": 0.53119,
+            "89": 0.53372,
+            "90": 0.53371,
+            "91": 0.53164,
+            "92": 0.53327,
+            "93": 0.54146,
+            "94": 0.53517,
+            "95": 0.53542,
+            "96": 0.5306,
+            "97": 0.53654,
+            "98": 0.53425,
+            "99": 0.53223,
+            "100": 0.53446
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100_2nd.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100_2nd.json
new file mode 100644
index 00000000000..78918e95bae
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100_2nd.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 9.85363,
+            "52": 9.74288,
+            "53": 10.07163,
+            "54": 9.96275,
+            "55": 9.88233,
+            "56": 9.63455,
+            "57": 9.48649,
+            "58": 9.84879,
+            "59": 9.589,
+            "60": 9.5109,
+            "61": 9.703,
+            "62": 9.99634,
+            "63": 9.40054,
+            "64": 9.78477,
+            "65": 8.95365,
+            "66": 9.71813,
+            "67": 9.36915,
+            "68": 9.79814,
+            "69": 9.79674,
+            "70": 9.74886,
+            "71": 9.63185,
+            "72": 9.59951,
+            "73": 9.50305,
+            "74": 8.95217,
+            "75": 9.43098,
+            "76": 9.09068,
+            "77": 10.08086,
+            "78": 9.7353,
+            "79": 9.38859,
+            "80": 9.41418,
+            "81": 9.48423,
+            "82": 9.70903,
+            "83": 9.3151,
+            "84": 9.41846,
+            "85": 9.62239,
+            "86": 9.07953,
+            "87": 9.59204,
+            "88": 9.74948,
+            "89": 9.60436,
+            "90": 9.82573,
+            "91": 9.34231,
+            "92": 9.35857,
+            "93": 9.07976,
+            "94": 8.82788,
+            "95": 9.50877,
+            "96": 9.52129,
+            "97": 9.30597,
+            "98": 9.66586,
+            "99": 8.87711,
+            "100": 9.38978
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 8105.0,
+            "52": 7792.0,
+            "53": 8870.0,
+            "54": 8700.0,
+            "55": 7685.0,
+            "56": 7975.0,
+            "57": 7544.0,
+            "58": 8539.0,
+            "59": 8275.0,
+            "60": 7822.0,
+            "61": 8316.0,
+            "62": 8493.0,
+            "63": 7748.0,
+            "64": 8801.0,
+            "65": 8269.0,
+            "66": 9209.0,
+            "67": 8382.0,
+            "68": 8362.0,
+            "69": 8644.0,
+            "70": 9785.0,
+            "71": 9060.0,
+            "72": 8909.0,
+            "73": 9217.0,
+            "74": 6949.0,
+            "75": 7960.0,
+            "76": 8489.0,
+            "77": 12484.0,
+            "78": 9598.0,
+            "79": 12984.0,
+            "80": 11398.0,
+            "81": 10221.0,
+            "82": 9615.0,
+            "83": 62741.0,
+            "84": 9936.0,
+            "85": 46541.0,
+            "86": 8528.0,
+            "87": 14916.0,
+            "88": 9710.0,
+            "89": 10273.0,
+            "90": 11178.0,
+            "91": 8856.0,
+            "92": 9337.0,
+            "93": 8404.0,
+            "94": 9649.0,
+            "95": 9657.0,
+            "96": 13226.0,
+            "97": 9093.0,
+            "98": 10575.0,
+            "99": 15320.0,
+            "100": 9363.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 628064256.0,
+            "52": 628065280.0,
+            "53": 628065280.0,
+            "54": 628065280.0,
+            "55": 628065280.0,
+            "56": 628065280.0,
+            "57": 628065280.0,
+            "58": 628065280.0,
+            "59": 628065280.0,
+            "60": 628065280.0,
+            "61": 628065280.0,
+            "62": 628065280.0,
+            "63": 628065280.0,
+            "64": 628065280.0,
+            "65": 628065280.0,
+            "66": 628065280.0,
+            "67": 628065280.0,
+            "68": 628065280.0,
+            "69": 628065280.0,
+            "70": 628065280.0,
+            "71": 628065280.0,
+            "72": 628065280.0,
+            "73": 628065280.0,
+            "74": 628065280.0,
+            "75": 628065280.0,
+            "76": 628065280.0,
+            "77": 628065280.0,
+            "78": 628065280.0,
+            "79": 628065280.0,
+            "80": 628065280.0,
+            "81": 628065280.0,
+            "82": 628065280.0,
+            "83": 628065280.0,
+            "84": 628065280.0,
+            "85": 628065280.0,
+            "86": 628065280.0,
+            "87": 628065280.0,
+            "88": 628065280.0,
+            "89": 628065280.0,
+            "90": 628065280.0,
+            "91": 628065280.0,
+            "92": 628065280.0,
+            "93": 628065280.0,
+            "94": 628065280.0,
+            "95": 628065280.0,
+            "96": 628065280.0,
+            "97": 628065280.0,
+            "98": 628065280.0,
+            "99": 628065280.0,
+            "100": 628065280.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 1148061696.0,
+            "52": 1150909952.0,
+            "53": 1154032640.0,
+            "54": 1154032640.0,
+            "55": 1154032640.0,
+            "56": 1154885120.0,
+            "57": 1154885120.0,
+            "58": 1154885120.0,
+            "59": 1154885120.0,
+            "60": 1158400512.0,
+            "61": 1161243648.0,
+            "62": 1161243648.0,
+            "63": 1161243648.0,
+            "64": 1161243648.0,
+            "65": 1161243648.0,
+            "66": 1161243648.0,
+            "67": 1161243648.0,
+            "68": 1161243648.0,
+            "69": 1161243648.0,
+            "70": 1161243648.0,
+            "71": 1161243648.0,
+            "72": 1161243648.0,
+            "73": 1161243648.0,
+            "74": 1161243648.0,
+            "75": 1161243648.0,
+            "76": 1164402176.0,
+            "77": 1164402176.0,
+            "78": 1164402176.0,
+            "79": 1164402176.0,
+            "80": 1164402176.0,
+            "81": 1164402176.0,
+            "82": 1164402176.0,
+            "83": 1164402176.0,
+            "84": 1164402176.0,
+            "85": 1164402176.0,
+            "86": 1164402176.0,
+            "87": 1164402176.0,
+            "88": 1164402176.0,
+            "89": 1164402176.0,
+            "90": 1164402176.0,
+            "91": 1164402176.0,
+            "92": 1164402176.0,
+            "93": 1164402176.0,
+            "94": 1164402176.0,
+            "95": 1164402176.0,
+            "96": 1164402176.0,
+            "97": 1164402176.0,
+            "98": 1164402176.0,
+            "99": 1164402176.0,
+            "100": 1164402176.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 19.75998,
+            "52": 0.75632,
+            "53": 0.61311,
+            "54": 0.58323,
+            "55": 0.58626,
+            "56": 0.55076,
+            "57": 0.55884,
+            "58": 0.55879,
+            "59": 0.55701,
+            "60": 0.55258,
+            "61": 0.54558,
+            "62": 0.54571,
+            "63": 0.52564,
+            "64": 0.52057,
+            "65": 0.52606,
+            "66": 0.52186,
+            "67": 0.51907,
+            "68": 0.52677,
+            "69": 0.52114,
+            "70": 0.51963,
+            "71": 0.51192,
+            "72": 0.51671,
+            "73": 0.53544,
+            "74": 0.53543,
+            "75": 0.53296,
+            "76": 0.53665,
+            "77": 0.53249,
+            "78": 0.53515,
+            "79": 0.53542,
+            "80": 0.53567,
+            "81": 0.53848,
+            "82": 0.55706,
+            "83": 0.52186,
+            "84": 0.51342,
+            "85": 0.53509,
+            "86": 0.53067,
+            "87": 0.51458,
+            "88": 0.53017,
+            "89": 0.52642,
+            "90": 0.52796,
+            "91": 0.5213,
+            "92": 0.52233,
+            "93": 0.52409,
+            "94": 0.52466,
+            "95": 0.52364,
+            "96": 0.52347,
+            "97": 0.52512,
+            "98": 0.52375,
+            "99": 0.52859,
+            "100": 0.52625
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_scoped_cudagraph/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_scoped_cudagraph/golden_values_dev_dgx_h100.json
index a77eac20664..0954418053d 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_scoped_cudagraph/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_scoped_cudagraph/golden_values_dev_dgx_h100.json
@@ -219,105 +219,105 @@
         "step_interval": 1,
         "values": {
             "1": 773784064.0,
-            "2": 776621056.0,
-            "3": 764709888.0,
-            "4": 937392128.0,
-            "5": 935098368.0,
-            "6": 935098368.0,
-            "7": 935639040.0,
-            "8": 937392128.0,
-            "9": 935098368.0,
-            "10": 936785920.0,
-            "11": 937392128.0,
-            "12": 935098368.0,
-            "13": 935098368.0,
-            "14": 935639040.0,
-            "15": 937392128.0,
-            "16": 935098368.0,
-            "17": 935639040.0,
-            "18": 937392128.0,
-            "19": 937392128.0,
-            "20": 935098368.0,
-            "21": 936785920.0,
-            "22": 937392128.0,
-            "23": 936785920.0,
-            "24": 937392128.0,
-            "25": 935098368.0,
-            "26": 935098368.0,
-            "27": 936245248.0,
-            "28": 937392128.0,
-            "29": 937392128.0,
-            "30": 935098368.0,
-            "31": 935098368.0,
-            "32": 935639040.0,
-            "33": 936785920.0,
-            "34": 937392128.0,
-            "35": 937392128.0,
-            "36": 937392128.0,
-            "37": 935098368.0,
-            "38": 935098368.0,
-            "39": 935098368.0,
-            "40": 936785920.0,
-            "41": 937392128.0,
-            "42": 937392128.0,
-            "43": 937392128.0,
-            "44": 937392128.0,
-            "45": 937392128.0,
-            "46": 937392128.0,
-            "47": 935098368.0,
-            "48": 935098368.0,
-            "49": 937392128.0,
-            "50": 937392128.0,
-            "51": 935098368.0,
-            "52": 935639040.0,
-            "53": 936785920.0,
-            "54": 937392128.0,
-            "55": 937392128.0,
-            "56": 935098368.0,
-            "57": 935098368.0,
-            "58": 935098368.0,
-            "59": 935639040.0,
-            "60": 936245248.0,
-            "61": 936785920.0,
-            "62": 936785920.0,
-            "63": 937392128.0,
-            "64": 937392128.0,
-            "65": 937392128.0,
-            "66": 935098368.0,
-            "67": 935098368.0,
-            "68": 935639040.0,
-            "69": 936245248.0,
-            "70": 936785920.0,
-            "71": 937392128.0,
-            "72": 937392128.0,
-            "73": 937392128.0,
-            "74": 937392128.0,
-            "75": 935098368.0,
-            "76": 937392128.0,
-            "77": 937392128.0,
-            "78": 935098368.0,
-            "79": 935639040.0,
-            "80": 937392128.0,
-            "81": 937392128.0,
-            "82": 935098368.0,
-            "83": 936785920.0,
-            "84": 937392128.0,
-            "85": 937392128.0,
-            "86": 935098368.0,
-            "87": 936785920.0,
-            "88": 937392128.0,
-            "89": 935098368.0,
-            "90": 935639040.0,
-            "91": 937392128.0,
-            "92": 937392128.0,
-            "93": 937392128.0,
-            "94": 935098368.0,
-            "95": 935098368.0,
-            "96": 935639040.0,
-            "97": 936245248.0,
-            "98": 937392128.0,
-            "99": 935098368.0,
-            "100": 936785920.0
+            "2": 775203840.0,
+            "3": 766700544.0,
+            "4": 937129984.0,
+            "5": 934836224.0,
+            "6": 934836224.0,
+            "7": 935983104.0,
+            "8": 937129984.0,
+            "9": 937129984.0,
+            "10": 937129984.0,
+            "11": 937129984.0,
+            "12": 937129984.0,
+            "13": 937129984.0,
+            "14": 934836224.0,
+            "15": 934836224.0,
+            "16": 935376896.0,
+            "17": 935983104.0,
+            "18": 937129984.0,
+            "19": 937129984.0,
+            "20": 937129984.0,
+            "21": 937129984.0,
+            "22": 934836224.0,
+            "23": 934836224.0,
+            "24": 935376896.0,
+            "25": 937129984.0,
+            "26": 937129984.0,
+            "27": 937129984.0,
+            "28": 934836224.0,
+            "29": 935376896.0,
+            "30": 936523776.0,
+            "31": 936523776.0,
+            "32": 937129984.0,
+            "33": 937129984.0,
+            "34": 937129984.0,
+            "35": 937129984.0,
+            "36": 937129984.0,
+            "37": 937129984.0,
+            "38": 934836224.0,
+            "39": 935376896.0,
+            "40": 936523776.0,
+            "41": 937129984.0,
+            "42": 937129984.0,
+            "43": 937129984.0,
+            "44": 934836224.0,
+            "45": 934836224.0,
+            "46": 937129984.0,
+            "47": 935376896.0,
+            "48": 937129984.0,
+            "49": 937129984.0,
+            "50": 935376896.0,
+            "51": 935376896.0,
+            "52": 937129984.0,
+            "53": 937129984.0,
+            "54": 934836224.0,
+            "55": 934836224.0,
+            "56": 934836224.0,
+            "57": 934836224.0,
+            "58": 934836224.0,
+            "59": 934836224.0,
+            "60": 934836224.0,
+            "61": 935376896.0,
+            "62": 935376896.0,
+            "63": 935983104.0,
+            "64": 936523776.0,
+            "65": 936523776.0,
+            "66": 936523776.0,
+            "67": 937129984.0,
+            "68": 937129984.0,
+            "69": 937129984.0,
+            "70": 937129984.0,
+            "71": 937129984.0,
+            "72": 937129984.0,
+            "73": 937129984.0,
+            "74": 934836224.0,
+            "75": 934836224.0,
+            "76": 935376896.0,
+            "77": 935376896.0,
+            "78": 936523776.0,
+            "79": 937129984.0,
+            "80": 937129984.0,
+            "81": 937129984.0,
+            "82": 937129984.0,
+            "83": 934836224.0,
+            "84": 934836224.0,
+            "85": 934836224.0,
+            "86": 936523776.0,
+            "87": 936523776.0,
+            "88": 937129984.0,
+            "89": 937129984.0,
+            "90": 937129984.0,
+            "91": 937129984.0,
+            "92": 934836224.0,
+            "93": 935376896.0,
+            "94": 936523776.0,
+            "95": 936523776.0,
+            "96": 936523776.0,
+            "97": 936523776.0,
+            "98": 936523776.0,
+            "99": 937129984.0,
+            "100": 937129984.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -325,106 +325,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 936453632.0,
-            "2": 1158617088.0,
-            "3": 1158617088.0,
-            "4": 1246761472.0,
-            "5": 1247365632.0,
-            "6": 1247365632.0,
-            "7": 1247765504.0,
-            "8": 1247765504.0,
-            "9": 1247765504.0,
-            "10": 1252415488.0,
-            "11": 1252415488.0,
-            "12": 1252415488.0,
-            "13": 1252415488.0,
-            "14": 1252415488.0,
-            "15": 1252415488.0,
-            "16": 1252415488.0,
-            "17": 1252415488.0,
-            "18": 1252415488.0,
-            "19": 1252415488.0,
-            "20": 1252415488.0,
-            "21": 1252415488.0,
-            "22": 1252415488.0,
-            "23": 1252415488.0,
-            "24": 1252415488.0,
-            "25": 1252415488.0,
-            "26": 1252415488.0,
-            "27": 1252415488.0,
-            "28": 1252415488.0,
-            "29": 1252415488.0,
-            "30": 1252415488.0,
-            "31": 1252415488.0,
-            "32": 1252415488.0,
-            "33": 1252415488.0,
-            "34": 1252415488.0,
-            "35": 1252415488.0,
-            "36": 1252415488.0,
-            "37": 1252415488.0,
-            "38": 1252415488.0,
-            "39": 1252415488.0,
-            "40": 1252415488.0,
-            "41": 1252415488.0,
-            "42": 1252415488.0,
-            "43": 1252415488.0,
-            "44": 1252415488.0,
-            "45": 1252415488.0,
-            "46": 1252415488.0,
-            "47": 1252415488.0,
-            "48": 1252415488.0,
-            "49": 1252415488.0,
-            "50": 1252415488.0,
-            "51": 1252415488.0,
-            "52": 1252415488.0,
-            "53": 1252415488.0,
-            "54": 1252415488.0,
-            "55": 1252415488.0,
-            "56": 1252415488.0,
-            "57": 1252415488.0,
-            "58": 1252415488.0,
-            "59": 1252415488.0,
-            "60": 1252415488.0,
-            "61": 1252415488.0,
-            "62": 1252415488.0,
-            "63": 1252415488.0,
-            "64": 1252415488.0,
-            "65": 1252415488.0,
-            "66": 1252415488.0,
-            "67": 1252415488.0,
-            "68": 1252415488.0,
-            "69": 1252415488.0,
-            "70": 1252415488.0,
-            "71": 1252415488.0,
-            "72": 1252415488.0,
-            "73": 1252415488.0,
-            "74": 1252415488.0,
-            "75": 1252415488.0,
-            "76": 1252415488.0,
-            "77": 1252415488.0,
-            "78": 1252415488.0,
-            "79": 1252415488.0,
-            "80": 1252415488.0,
-            "81": 1252415488.0,
-            "82": 1252415488.0,
-            "83": 1252415488.0,
-            "84": 1252415488.0,
-            "85": 1252415488.0,
-            "86": 1252415488.0,
-            "87": 1252415488.0,
-            "88": 1252415488.0,
-            "89": 1252415488.0,
-            "90": 1252415488.0,
-            "91": 1252415488.0,
-            "92": 1252415488.0,
-            "93": 1252415488.0,
-            "94": 1252415488.0,
-            "95": 1252415488.0,
-            "96": 1252415488.0,
-            "97": 1252415488.0,
-            "98": 1252415488.0,
-            "99": 1252415488.0,
-            "100": 1252415488.0
+            "1": 990381056.0,
+            "2": 1211127808.0,
+            "3": 1211127808.0,
+            "4": 1296840704.0,
+            "5": 1297885184.0,
+            "6": 1297885184.0,
+            "7": 1298358784.0,
+            "8": 1299077120.0,
+            "9": 1299077120.0,
+            "10": 1300477952.0,
+            "11": 1300477952.0,
+            "12": 1300477952.0,
+            "13": 1300477952.0,
+            "14": 1300477952.0,
+            "15": 1300477952.0,
+            "16": 1300477952.0,
+            "17": 1300477952.0,
+            "18": 1300477952.0,
+            "19": 1300779008.0,
+            "20": 1300779008.0,
+            "21": 1300779008.0,
+            "22": 1300779008.0,
+            "23": 1301612544.0,
+            "24": 1301612544.0,
+            "25": 1301612544.0,
+            "26": 1301612544.0,
+            "27": 1301612544.0,
+            "28": 1301612544.0,
+            "29": 1301612544.0,
+            "30": 1301612544.0,
+            "31": 1301612544.0,
+            "32": 1301612544.0,
+            "33": 1301612544.0,
+            "34": 1301612544.0,
+            "35": 1301612544.0,
+            "36": 1301612544.0,
+            "37": 1301612544.0,
+            "38": 1301612544.0,
+            "39": 1301612544.0,
+            "40": 1301612544.0,
+            "41": 1301612544.0,
+            "42": 1301612544.0,
+            "43": 1301612544.0,
+            "44": 1301612544.0,
+            "45": 1301612544.0,
+            "46": 1301612544.0,
+            "47": 1301612544.0,
+            "48": 1301612544.0,
+            "49": 1301612544.0,
+            "50": 1301612544.0,
+            "51": 1301612544.0,
+            "52": 1301612544.0,
+            "53": 1301612544.0,
+            "54": 1301612544.0,
+            "55": 1301612544.0,
+            "56": 1301612544.0,
+            "57": 1301612544.0,
+            "58": 1301612544.0,
+            "59": 1301612544.0,
+            "60": 1301612544.0,
+            "61": 1301612544.0,
+            "62": 1301612544.0,
+            "63": 1301612544.0,
+            "64": 1301612544.0,
+            "65": 1301612544.0,
+            "66": 1301612544.0,
+            "67": 1301612544.0,
+            "68": 1301612544.0,
+            "69": 1301612544.0,
+            "70": 1301612544.0,
+            "71": 1301612544.0,
+            "72": 1301612544.0,
+            "73": 1301612544.0,
+            "74": 1301612544.0,
+            "75": 1301612544.0,
+            "76": 1301612544.0,
+            "77": 1301612544.0,
+            "78": 1301612544.0,
+            "79": 1301612544.0,
+            "80": 1301612544.0,
+            "81": 1301612544.0,
+            "82": 1301612544.0,
+            "83": 1301612544.0,
+            "84": 1301612544.0,
+            "85": 1301612544.0,
+            "86": 1301612544.0,
+            "87": 1301612544.0,
+            "88": 1301612544.0,
+            "89": 1301612544.0,
+            "90": 1301612544.0,
+            "91": 1301612544.0,
+            "92": 1301612544.0,
+            "93": 1301612544.0,
+            "94": 1301612544.0,
+            "95": 1301612544.0,
+            "96": 1301612544.0,
+            "97": 1301612544.0,
+            "98": 1301612544.0,
+            "99": 1301612544.0,
+            "100": 1301612544.0
         }
     },
     "mtp_1 loss": {
@@ -539,106 +539,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 74.16337,
-            "2": 1.6487,
-            "3": 1.45105,
-            "4": 4.39166,
-            "5": 0.72113,
-            "6": 0.82637,
-            "7": 0.7985,
-            "8": 0.73623,
-            "9": 0.7398,
-            "10": 0.74065,
-            "11": 0.73395,
-            "12": 0.73395,
-            "13": 0.79806,
-            "14": 0.7251,
-            "15": 0.7312,
-            "16": 0.75102,
-            "17": 0.72379,
-            "18": 0.72614,
-            "19": 0.73367,
-            "20": 0.73334,
-            "21": 0.72408,
-            "22": 0.74787,
-            "23": 0.75535,
-            "24": 0.72783,
-            "25": 0.7314,
-            "26": 0.71985,
-            "27": 0.7246,
-            "28": 0.72236,
-            "29": 0.71945,
-            "30": 0.72182,
-            "31": 0.72292,
-            "32": 0.71754,
-            "33": 0.7157,
-            "34": 0.70975,
-            "35": 0.72388,
-            "36": 0.71455,
-            "37": 0.71511,
-            "38": 0.71163,
-            "39": 0.71376,
-            "40": 0.72067,
-            "41": 0.71279,
-            "42": 0.70858,
-            "43": 0.7086,
-            "44": 0.70995,
-            "45": 0.70901,
-            "46": 0.70881,
-            "47": 0.71115,
-            "48": 0.72369,
-            "49": 0.73908,
-            "50": 0.81598,
-            "51": 0.73667,
-            "52": 0.71381,
-            "53": 0.72282,
-            "54": 0.73549,
-            "55": 0.70748,
-            "56": 0.7102,
-            "57": 0.70853,
-            "58": 0.70998,
-            "59": 0.71846,
-            "60": 0.70825,
-            "61": 0.70848,
-            "62": 0.70734,
-            "63": 0.7097,
-            "64": 0.72007,
-            "65": 0.71061,
-            "66": 0.7223,
-            "67": 0.71411,
-            "68": 0.71437,
-            "69": 0.70943,
-            "70": 0.70895,
-            "71": 0.71052,
-            "72": 0.70672,
-            "73": 0.72725,
-            "74": 0.70761,
-            "75": 0.7334,
-            "76": 0.7387,
-            "77": 0.72758,
-            "78": 0.72748,
-            "79": 0.73386,
-            "80": 0.72774,
-            "81": 0.71859,
-            "82": 0.71526,
-            "83": 0.75425,
-            "84": 0.72064,
-            "85": 0.72017,
-            "86": 0.72277,
-            "87": 0.73635,
-            "88": 0.72228,
-            "89": 0.73388,
-            "90": 0.74435,
-            "91": 0.7281,
-            "92": 0.71839,
-            "93": 0.71175,
-            "94": 0.71437,
-            "95": 0.71311,
-            "96": 0.71386,
-            "97": 0.71412,
-            "98": 0.72944,
-            "99": 0.7486,
-            "100": 0.74015
+            "1": 56.96201,
+            "2": 1.45193,
+            "3": 1.37387,
+            "4": 3.96627,
+            "5": 0.7423,
+            "6": 0.71394,
+            "7": 0.74369,
+            "8": 0.72342,
+            "9": 0.70545,
+            "10": 0.70125,
+            "11": 0.70256,
+            "12": 0.69915,
+            "13": 0.70499,
+            "14": 0.72329,
+            "15": 0.71852,
+            "16": 0.71011,
+            "17": 0.70885,
+            "18": 0.73035,
+            "19": 0.71099,
+            "20": 0.70225,
+            "21": 0.70459,
+            "22": 0.71823,
+            "23": 0.7143,
+            "24": 0.72574,
+            "25": 0.72055,
+            "26": 0.71722,
+            "27": 0.71209,
+            "28": 0.72407,
+            "29": 0.72809,
+            "30": 0.71187,
+            "31": 0.70668,
+            "32": 0.70676,
+            "33": 0.70474,
+            "34": 0.70406,
+            "35": 0.70401,
+            "36": 0.70968,
+            "37": 0.71106,
+            "38": 0.72458,
+            "39": 0.736,
+            "40": 0.71238,
+            "41": 0.71868,
+            "42": 0.71459,
+            "43": 0.71031,
+            "44": 0.70945,
+            "45": 0.72444,
+            "46": 0.76158,
+            "47": 0.75856,
+            "48": 0.7282,
+            "49": 0.72448,
+            "50": 0.7471,
+            "51": 0.80801,
+            "52": 0.73438,
+            "53": 0.71695,
+            "54": 0.71541,
+            "55": 0.70768,
+            "56": 0.70462,
+            "57": 0.70705,
+            "58": 0.70511,
+            "59": 0.70702,
+            "60": 0.70636,
+            "61": 0.70372,
+            "62": 0.71024,
+            "63": 0.70358,
+            "64": 0.70559,
+            "65": 0.70617,
+            "66": 0.70048,
+            "67": 0.71248,
+            "68": 0.7119,
+            "69": 0.71093,
+            "70": 0.7051,
+            "71": 0.70391,
+            "72": 0.70275,
+            "73": 0.70876,
+            "74": 0.7119,
+            "75": 0.71307,
+            "76": 0.718,
+            "77": 0.71166,
+            "78": 0.71308,
+            "79": 0.70995,
+            "80": 0.71153,
+            "81": 0.71464,
+            "82": 0.71596,
+            "83": 0.71997,
+            "84": 0.71197,
+            "85": 0.70577,
+            "86": 0.71956,
+            "87": 0.70383,
+            "88": 0.71047,
+            "89": 0.71711,
+            "90": 0.70818,
+            "91": 0.71353,
+            "92": 0.71401,
+            "93": 0.73616,
+            "94": 0.71104,
+            "95": 0.70295,
+            "96": 0.69995,
+            "97": 0.7015,
+            "98": 0.70705,
+            "99": 0.70765,
+            "100": 0.72052
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_scoped_cudagraph/golden_values_dev_dgx_h100_2nd.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_scoped_cudagraph/golden_values_dev_dgx_h100_2nd.json
new file mode 100644
index 00000000000..f6ac4db56ee
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_scoped_cudagraph/golden_values_dev_dgx_h100_2nd.json
@@ -0,0 +1,644 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 9.77036,
+            "52": 9.65641,
+            "53": 10.03067,
+            "54": 9.87916,
+            "55": 9.79619,
+            "56": 9.52858,
+            "57": 9.36596,
+            "58": 9.75327,
+            "59": 9.48259,
+            "60": 9.40835,
+            "61": 9.60202,
+            "62": 9.90742,
+            "63": 9.25777,
+            "64": 9.68411,
+            "65": 8.79911,
+            "66": 9.60796,
+            "67": 9.25427,
+            "68": 9.71419,
+            "69": 9.71666,
+            "70": 9.6613,
+            "71": 9.52439,
+            "72": 9.4709,
+            "73": 9.38862,
+            "74": 8.80286,
+            "75": 9.34004,
+            "76": 8.93543,
+            "77": 9.99337,
+            "78": 9.64723,
+            "79": 9.28126,
+            "80": 9.29633,
+            "81": 9.39609,
+            "82": 9.60877,
+            "83": 9.21694,
+            "84": 9.34008,
+            "85": 9.53009,
+            "86": 8.95652,
+            "87": 9.51691,
+            "88": 9.68221,
+            "89": 9.50553,
+            "90": 9.753,
+            "91": 9.2347,
+            "92": 9.26019,
+            "93": 8.94568,
+            "94": 8.69194,
+            "95": 9.44616,
+            "96": 9.41008,
+            "97": 9.20125,
+            "98": 9.58169,
+            "99": 8.75946,
+            "100": 9.29483
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 23866164.0,
+            "52": 23807242.0,
+            "53": 24007504.0,
+            "54": 22867916.0,
+            "55": 23571280.0,
+            "56": 23954212.0,
+            "57": 24211680.0,
+            "58": 23914512.0,
+            "59": 22722820.0,
+            "60": 23813508.0,
+            "61": 23796364.0,
+            "62": 23739896.0,
+            "63": 24965914.0,
+            "64": 23898698.0,
+            "65": 24150860.0,
+            "66": 23796512.0,
+            "67": 25032960.0,
+            "68": 23673048.0,
+            "69": 23644684.0,
+            "70": 23903614.0,
+            "71": 24864656.0,
+            "72": 24766928.0,
+            "73": 24850636.0,
+            "74": 24133166.0,
+            "75": 24143912.0,
+            "76": 25025406.0,
+            "77": 24358344.0,
+            "78": 24910132.0,
+            "79": 23808164.0,
+            "80": 23772256.0,
+            "81": 25020440.0,
+            "82": 23851242.0,
+            "83": 23911824.0,
+            "84": 25143864.0,
+            "85": 24823592.0,
+            "86": 23153228.0,
+            "87": 24850332.0,
+            "88": 24749368.0,
+            "89": 22505174.0,
+            "90": 25108752.0,
+            "91": 23838548.0,
+            "92": 24923816.0,
+            "93": 24769484.0,
+            "94": 25041572.0,
+            "95": 25189350.0,
+            "96": 23909318.0,
+            "97": 23664104.0,
+            "98": 23832392.0,
+            "99": 23981812.0,
+            "100": 24101144.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 773784064.0,
+            "52": 782961664.0,
+            "53": 762989568.0,
+            "54": 937131008.0,
+            "55": 937131008.0,
+            "56": 936524800.0,
+            "57": 935377920.0,
+            "58": 934837248.0,
+            "59": 937131008.0,
+            "60": 937131008.0,
+            "61": 937131008.0,
+            "62": 935984128.0,
+            "63": 934837248.0,
+            "64": 937131008.0,
+            "65": 937131008.0,
+            "66": 936524800.0,
+            "67": 934837248.0,
+            "68": 937131008.0,
+            "69": 937131008.0,
+            "70": 935377920.0,
+            "71": 934837248.0,
+            "72": 937131008.0,
+            "73": 936524800.0,
+            "74": 934837248.0,
+            "75": 937131008.0,
+            "76": 936524800.0,
+            "77": 934837248.0,
+            "78": 937131008.0,
+            "79": 937131008.0,
+            "80": 935377920.0,
+            "81": 934837248.0,
+            "82": 937131008.0,
+            "83": 936524800.0,
+            "84": 934837248.0,
+            "85": 937131008.0,
+            "86": 937131008.0,
+            "87": 934837248.0,
+            "88": 937131008.0,
+            "89": 937131008.0,
+            "90": 935377920.0,
+            "91": 937131008.0,
+            "92": 937131008.0,
+            "93": 935377920.0,
+            "94": 934837248.0,
+            "95": 937131008.0,
+            "96": 935984128.0,
+            "97": 934837248.0,
+            "98": 937131008.0,
+            "99": 937131008.0,
+            "100": 934837248.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 1191340032.0,
+            "52": 1191340032.0,
+            "53": 1191340032.0,
+            "54": 1286565888.0,
+            "55": 1287746048.0,
+            "56": 1287746048.0,
+            "57": 1288803328.0,
+            "58": 1288803328.0,
+            "59": 1288803328.0,
+            "60": 1288803328.0,
+            "61": 1288803328.0,
+            "62": 1288803328.0,
+            "63": 1288803328.0,
+            "64": 1288803328.0,
+            "65": 1288803328.0,
+            "66": 1288803328.0,
+            "67": 1288803328.0,
+            "68": 1288803328.0,
+            "69": 1288803328.0,
+            "70": 1288803328.0,
+            "71": 1288803328.0,
+            "72": 1288803328.0,
+            "73": 1288803328.0,
+            "74": 1288803328.0,
+            "75": 1288803328.0,
+            "76": 1288803328.0,
+            "77": 1288803328.0,
+            "78": 1288803328.0,
+            "79": 1288803328.0,
+            "80": 1288803328.0,
+            "81": 1288803328.0,
+            "82": 1288803328.0,
+            "83": 1288803328.0,
+            "84": 1288803328.0,
+            "85": 1288803328.0,
+            "86": 1288803328.0,
+            "87": 1288803328.0,
+            "88": 1288803328.0,
+            "89": 1288803328.0,
+            "90": 1288803328.0,
+            "91": 1288803328.0,
+            "92": 1288803328.0,
+            "93": 1288803328.0,
+            "94": 1288803328.0,
+            "95": 1288803328.0,
+            "96": 1288803328.0,
+            "97": 1288803328.0,
+            "98": 1288803328.0,
+            "99": 1288803328.0,
+            "100": 1288803328.0
+        }
+    },
+    "mtp_1 loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 10.76681,
+            "52": 10.74029,
+            "53": 10.8027,
+            "54": 10.77345,
+            "55": 10.76133,
+            "56": 10.71153,
+            "57": 10.66673,
+            "58": 10.74318,
+            "59": 10.69182,
+            "60": 10.66418,
+            "61": 10.70712,
+            "62": 10.77164,
+            "63": 10.61759,
+            "64": 10.71667,
+            "65": 10.4936,
+            "66": 10.67118,
+            "67": 10.57515,
+            "68": 10.68716,
+            "69": 10.68277,
+            "70": 10.66908,
+            "71": 10.64566,
+            "72": 10.60905,
+            "73": 10.56507,
+            "74": 10.37106,
+            "75": 10.5114,
+            "76": 10.39856,
+            "77": 10.75192,
+            "78": 10.62708,
+            "79": 10.4675,
+            "80": 10.47474,
+            "81": 10.51003,
+            "82": 10.58819,
+            "83": 10.43946,
+            "84": 10.45015,
+            "85": 10.55142,
+            "86": 10.2831,
+            "87": 10.51182,
+            "88": 10.60318,
+            "89": 10.50948,
+            "90": 10.60407,
+            "91": 10.38208,
+            "92": 10.38708,
+            "93": 10.23019,
+            "94": 10.08381,
+            "95": 10.4259,
+            "96": 10.4489,
+            "97": 10.32133,
+            "98": 10.49668,
+            "99": 10.04795,
+            "100": 10.33446
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 57.04071,
+            "52": 1.40134,
+            "53": 1.32404,
+            "54": 3.89868,
+            "55": 0.67679,
+            "56": 0.684,
+            "57": 0.68825,
+            "58": 0.68465,
+            "59": 0.68607,
+            "60": 0.68633,
+            "61": 0.6798,
+            "62": 0.68281,
+            "63": 0.68253,
+            "64": 0.68011,
+            "65": 0.6766,
+            "66": 0.67533,
+            "67": 0.67885,
+            "68": 0.67126,
+            "69": 0.6756,
+            "70": 0.67255,
+            "71": 0.67556,
+            "72": 0.67135,
+            "73": 0.66897,
+            "74": 0.66783,
+            "75": 0.66944,
+            "76": 0.66908,
+            "77": 0.66904,
+            "78": 0.67839,
+            "79": 0.6752,
+            "80": 0.67644,
+            "81": 0.6727,
+            "82": 0.67278,
+            "83": 0.66999,
+            "84": 0.67287,
+            "85": 0.67248,
+            "86": 0.6678,
+            "87": 0.67191,
+            "88": 0.66961,
+            "89": 0.67168,
+            "90": 0.67021,
+            "91": 0.66676,
+            "92": 0.66871,
+            "93": 0.67204,
+            "94": 0.67233,
+            "95": 0.66905,
+            "96": 0.6735,
+            "97": 0.67671,
+            "98": 0.67137,
+            "99": 0.67053,
+            "100": 0.67168
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt_grpo_tp8tp4_pp1_ep8ep2_dp8_throughputtest/env_config.yaml b/tests/functional_tests/test_cases/moe/gpt_grpo_tp8tp4_pp1_ep8ep2_dp8_throughputtest/env_config.yaml
new file mode 100644
index 00000000000..329246987bf
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt_grpo_tp8tp4_pp1_ep8ep2_dp8_throughputtest/env_config.yaml
@@ -0,0 +1,5 @@
+- agent_type: examples.rl.environments.countdown.countdown_agent.CountdownAgent
+  agent_args:
+    dataset_file: "/mnt/artifacts/rl_environments/Jiayi-Pan___countdown-tasks-3to4"
+    split: "train"
+  weight: 1.0
diff --git a/tests/functional_tests/test_cases/moe/gpt_grpo_tp8tp4_pp1_ep8ep2_dp8_throughputtest/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt_grpo_tp8tp4_pp1_ep8ep2_dp8_throughputtest/golden_values_dev_dgx_h100.json
new file mode 100644
index 00000000000..f2b6084c49b
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt_grpo_tp8tp4_pp1_ep8ep2_dp8_throughputtest/golden_values_dev_dgx_h100.json
@@ -0,0 +1,62 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 5,
+        "step_interval": 1,
+        "values": {
+            "1": 0.0,
+            "2": 0.0,
+            "3": 0.0,
+            "4": 0.0,
+            "5": 0.0
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 5,
+        "step_interval": 1,
+        "values": {
+            "1": 1116221440.0,
+            "2": 1107565568.0,
+            "3": 1116188160.0,
+            "4": 1107525248.0,
+            "5": 1116234624.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 5,
+        "step_interval": 1,
+        "values": {
+            "1": 76714901504.0,
+            "2": 76724633600.0,
+            "3": 76724633600.0,
+            "4": 76724633600.0,
+            "5": 76724633600.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 5,
+        "step_interval": 1,
+        "values": {
+            "1": 76714909696.0,
+            "2": 77061054464.0,
+            "3": 77061103616.0,
+            "4": 77061226496.0,
+            "5": 77061226496.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 5,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": 121.41938,
+            "3": 88.73186,
+            "4": 93.15825,
+            "5": 91.09737
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt_grpo_tp8tp4_pp1_ep8ep2_dp8_throughputtest/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt_grpo_tp8tp4_pp1_ep8ep2_dp8_throughputtest/model_config.yaml
new file mode 100644
index 00000000000..47228df80b4
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt_grpo_tp8tp4_pp1_ep8ep2_dp8_throughputtest/model_config.yaml
@@ -0,0 +1,131 @@
+ENV_VARS:
+  CUDA_DEVICE_MAX_CONNECTIONS: 1
+  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
+  NCCL_ALGO: Ring
+  CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEAT: 1
+TEST_TYPE: frozen-start
+MODE: rl
+MODEL_ARGS:
+  # Logging and debug
+  --log-num-zeros-in-grad: true
+  --log-validation-ppl-to-tensorboard: true
+  --log-memory-to-tensorboard: true
+  --log-interval: 1
+  --log-throughput: true
+  --log-timers-to-tensorboard: true
+  --timing-log-level: 1
+  --timing-log-option: minmax
+  --tensorboard-log-interval: 1
+
+  # Model loading
+  --load: ${CHECKPOINT_LOAD_PATH}/model/deepseek_16b_pyt/dcp/mcore-v1_bf16/checkpoints
+  --tokenizer-model: ${CHECKPOINT_LOAD_PATH}/model/deepseek_16b_pyt/dcp/mcore-v1_bf16/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json
+  --tokenizer-type: TikTokenizer
+  --tiktoken-pattern: v2
+  --use-checkpoint-args: true
+  --no-use-tokenizer-model-from-checkpoint-args: true
+  --no-load-optim: true
+  --ckpt-format: torch_dist
+  --ckpt-fully-parallel-save: true
+  --ckpt-fully-parallel-load: true
+  --ckpt-assume-constant-structure: true
+  --dist-ckpt-strictness: log_unexpected
+
+  # Parallelism - Training: TP=1, EP=4 (4 GPUs model, DP=2 on 8 GPUs)
+  --sequence-parallel: true
+  --tensor-model-parallel-size: 8
+  --pipeline-model-parallel-size: 1
+  --expert-model-parallel-size: 8
+  --expert-tensor-parallel-size: 1
+  # Parallelism - Inference (refit): TP=1, EP=2 (tests EP refit)
+  --rl-inference-tensor-model-parallel-size: 4
+  --rl-inference-expert-model-parallel-size: 2
+  --rl-inference-expert-tensor-model-parallel-size: 1
+
+  # MoE configuration
+  --use-mcore-models: true
+  --moe-token-dispatcher-type: alltoall
+  --moe-grouped-gemm: true
+  --num-experts: 64
+  --moe-router-topk: 6
+  --moe-z-loss-coeff: 0
+  --moe-router-load-balancing-type: seq_aux_loss
+  --moe-aux-loss-coeff: 1e-3
+  --moe-router-score-function: sigmoid
+
+  # Model architecture
+  --untie-embeddings-and-output-weights: true
+  --disable-bias-linear: true
+  --init-method-std: 0.014
+  --position-embedding-type: rope
+  --rotary-base: 1000000
+  --rotary-percent: 1.0
+  --num-layers: 27
+  --hidden-size: 2048
+  --moe-ffn-hidden-size: 1408
+  --moe-shared-expert-intermediate-size: 2816
+  --ffn-hidden-size: 10944
+  --num-attention-heads: 16
+  --kv-channels: 128
+  --normalization: RMSNorm
+  --swiglu: true
+  --attention-dropout: 0.0
+  --hidden-dropout: 0.0
+  --seq-length: 256
+  --max-position-embeddings: 256
+
+  # Training settings
+  --distributed-backend: nccl
+  --transformer-impl: transformer_engine
+  --bf16: true
+  --attention-backend: flash
+  --no-create-attention-mask-in-dataloader: true
+  --num-workers: 8
+  --deterministic-mode: true
+  --seed: 42
+
+  # RL / GRPO settings
+  --mock-data: true
+  --max-tokens-to-oom: 3600000
+  --inference-max-seq-length: 256
+  --langrl-inference-server-type: inplace_megatron
+  --calculate-per-token-loss: true
+  --rl-use-sequence-packing: true
+  --rl-sequence-packing-algo: fifo
+  --rl-offload-optimizer-during-inference: true
+  --rl-parallel-generation-tasks: 1
+  --cuda-graph-impl: local
+  --micro-batch-size: 1
+  --global-batch-size: 4
+  --grpo-group-size: 2
+  --grpo-prompts-per-step: 2
+  --grpo-iterations: 1
+  --grpo-clamp-eps-lower: 0.2
+  --grpo-clamp-eps-upper: 0.2
+  --grpo-kl-beta: 0.0
+  --grpo-entropy-term-weight: 0.0
+  --langrl-env-config: tests/functional_tests/test_cases/moe/gpt_grpo_tp8tp4_pp1_ep8ep2_dp8_throughputtest/env_config.yaml
+  --rl-partial-rollouts: true
+  --perform-rl-step: true
+  --moe-pad-experts-for-cuda-graph-inference: true
+  --rl-inference-model-unified-memory-level: 1
+  --rl-offload-inference-model-weights-when-idle: true
+  --inference-dynamic-batching-buffer-size-gb: 20
+  --inference-dynamic-batching-num-cuda-graphs: 4
+
+  # Optimizer
+  --lr: 0.000001
+  --lr-warmup-samples: 0
+  --clip-grad: 1.0
+
+  # Run control
+  --train-samples: 48828125
+  --exit-interval: 5
+  --save-interval: 1000000
+  --eval-interval: 1000000
+  --finetune: true
+  --inference-logging-step-interval: 1
+  --tensorboard-dir: ${TENSORBOARD_PATH}
+  --straggler-minmax-count: 16
+  --empty-unused-memory-level: 2
diff --git a/tests/functional_tests/test_cases/moe/gpt_static_inference_tp1_pp1_ep1_16B_logitsmatch/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt_static_inference_tp1_pp1_ep1_16B_logitsmatch/model_config.yaml
index 569eb969d72..6daec7b3da6 100644
--- a/tests/functional_tests/test_cases/moe/gpt_static_inference_tp1_pp1_ep1_16B_logitsmatch/model_config.yaml
+++ b/tests/functional_tests/test_cases/moe/gpt_static_inference_tp1_pp1_ep1_16B_logitsmatch/model_config.yaml
@@ -74,6 +74,7 @@ MODEL_ARGS:
   --output-path: ${TENSORBOARD_PATH}
   --prompts: "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies."
   --incoming-requests-per-sec: -1 # all requests arrive up front.
+  --inference-dynamic-batching-buffer-size-gb: 20
 METRICS:
   - "generated_tokens"
   - "logprobs"
diff --git a/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp1_pp1/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp1_pp1/golden_values_dev_dgx_h100.json
index 7dbf0c3c806..f4357530aed 100644
--- a/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp1_pp1/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp1_pp1/golden_values_dev_dgx_h100.json
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 2431335424.0,
-            "2": 2431335424.0,
-            "3": 2431335424.0,
-            "4": 2431335424.0,
-            "5": 2431335424.0,
-            "6": 2431335424.0,
-            "7": 2431335424.0,
-            "8": 2431335424.0,
-            "9": 2431335424.0,
-            "10": 2431335424.0,
-            "11": 2431335424.0,
-            "12": 2431335424.0,
-            "13": 2431335424.0,
-            "14": 2431335424.0,
-            "15": 2431335424.0,
-            "16": 2431335424.0,
-            "17": 2431335424.0,
-            "18": 2431335424.0,
-            "19": 2431335424.0,
-            "20": 2431335424.0,
-            "21": 2431335424.0,
-            "22": 2431335424.0,
-            "23": 2431335424.0,
-            "24": 2431335424.0,
-            "25": 2431335424.0,
-            "26": 2431335424.0,
-            "27": 2431335424.0,
-            "28": 2431335424.0,
-            "29": 2431335424.0,
-            "30": 2431335424.0,
-            "31": 2431335424.0,
-            "32": 2431335424.0,
-            "33": 2431335424.0,
-            "34": 2431335424.0,
-            "35": 2431335424.0,
-            "36": 2431335424.0,
-            "37": 2431335424.0,
-            "38": 2431335424.0,
-            "39": 2431335424.0,
-            "40": 2431335424.0,
-            "41": 2431335424.0,
-            "42": 2431335424.0,
-            "43": 2431335424.0,
-            "44": 2431335424.0,
-            "45": 2431335424.0,
-            "46": 2431335424.0,
-            "47": 2431335424.0,
-            "48": 2431335424.0,
-            "49": 2431335424.0,
-            "50": 2431335424.0
+            "1": 2431875072.0,
+            "2": 2431875072.0,
+            "3": 2431875072.0,
+            "4": 2431875072.0,
+            "5": 2431875072.0,
+            "6": 2431875072.0,
+            "7": 2431875072.0,
+            "8": 2431875072.0,
+            "9": 2431875072.0,
+            "10": 2431875072.0,
+            "11": 2431875072.0,
+            "12": 2431875072.0,
+            "13": 2431875072.0,
+            "14": 2431875072.0,
+            "15": 2431875072.0,
+            "16": 2431875072.0,
+            "17": 2431875072.0,
+            "18": 2431875072.0,
+            "19": 2431875072.0,
+            "20": 2431875072.0,
+            "21": 2431875072.0,
+            "22": 2431875072.0,
+            "23": 2431875072.0,
+            "24": 2431875072.0,
+            "25": 2431875072.0,
+            "26": 2431875072.0,
+            "27": 2431875072.0,
+            "28": 2431875072.0,
+            "29": 2431875072.0,
+            "30": 2431875072.0,
+            "31": 2431875072.0,
+            "32": 2431875072.0,
+            "33": 2431875072.0,
+            "34": 2431875072.0,
+            "35": 2431875072.0,
+            "36": 2431875072.0,
+            "37": 2431875072.0,
+            "38": 2431875072.0,
+            "39": 2431875072.0,
+            "40": 2431875072.0,
+            "41": 2431875072.0,
+            "42": 2431875072.0,
+            "43": 2431875072.0,
+            "44": 2431875072.0,
+            "45": 2431875072.0,
+            "46": 2431875072.0,
+            "47": 2431875072.0,
+            "48": 2431875072.0,
+            "49": 2431875072.0,
+            "50": 2431875072.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 14740086784.0,
-            "2": 15773663232.0,
-            "3": 15773663232.0,
-            "4": 15773663232.0,
-            "5": 15773663232.0,
-            "6": 15773663232.0,
-            "7": 15773663232.0,
-            "8": 15773663232.0,
-            "9": 15773663232.0,
-            "10": 15773663232.0,
-            "11": 15773663232.0,
-            "12": 15773663232.0,
-            "13": 15773663232.0,
-            "14": 15773663232.0,
-            "15": 15773663232.0,
-            "16": 15773663232.0,
-            "17": 15773663232.0,
-            "18": 15773663232.0,
-            "19": 15773663232.0,
-            "20": 15773663232.0,
-            "21": 15773663232.0,
-            "22": 15773663232.0,
-            "23": 15773663232.0,
-            "24": 15773663232.0,
-            "25": 15773663232.0,
-            "26": 15773663232.0,
-            "27": 15773663232.0,
-            "28": 15773663232.0,
-            "29": 15773663232.0,
-            "30": 15773663232.0,
-            "31": 15773663232.0,
-            "32": 15773663232.0,
-            "33": 15773663232.0,
-            "34": 15773663232.0,
-            "35": 15773663232.0,
-            "36": 15773663232.0,
-            "37": 15773663232.0,
-            "38": 15773663232.0,
-            "39": 15773663232.0,
-            "40": 15773663232.0,
-            "41": 15773663232.0,
-            "42": 15773663232.0,
-            "43": 15773663232.0,
-            "44": 15773663232.0,
-            "45": 15773663232.0,
-            "46": 15773663232.0,
-            "47": 15773663232.0,
-            "48": 15773663232.0,
-            "49": 15773663232.0,
-            "50": 15773663232.0
+            "1": 14740087808.0,
+            "2": 15774200832.0,
+            "3": 15774200832.0,
+            "4": 15774200832.0,
+            "5": 15774200832.0,
+            "6": 15774200832.0,
+            "7": 15774200832.0,
+            "8": 15774200832.0,
+            "9": 15774200832.0,
+            "10": 15774200832.0,
+            "11": 15774200832.0,
+            "12": 15774200832.0,
+            "13": 15774200832.0,
+            "14": 15774200832.0,
+            "15": 15774200832.0,
+            "16": 15774200832.0,
+            "17": 15774200832.0,
+            "18": 15774200832.0,
+            "19": 15774200832.0,
+            "20": 15774200832.0,
+            "21": 15774200832.0,
+            "22": 15774200832.0,
+            "23": 15774200832.0,
+            "24": 15774200832.0,
+            "25": 15774200832.0,
+            "26": 15774200832.0,
+            "27": 15774200832.0,
+            "28": 15774200832.0,
+            "29": 15774200832.0,
+            "30": 15774200832.0,
+            "31": 15774200832.0,
+            "32": 15774200832.0,
+            "33": 15774200832.0,
+            "34": 15774200832.0,
+            "35": 15774200832.0,
+            "36": 15774200832.0,
+            "37": 15774200832.0,
+            "38": 15774200832.0,
+            "39": 15774200832.0,
+            "40": 15774200832.0,
+            "41": 15774200832.0,
+            "42": 15774200832.0,
+            "43": 15774200832.0,
+            "44": 15774200832.0,
+            "45": 15774200832.0,
+            "46": 15774200832.0,
+            "47": 15774200832.0,
+            "48": 15774200832.0,
+            "49": 15774200832.0,
+            "50": 15774200832.0
         }
     },
     "iteration-time": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 5.97454,
-            "2": 0.19297,
-            "3": 0.18331,
-            "4": 0.18419,
-            "5": 0.18099,
-            "6": 0.18354,
-            "7": 0.18332,
-            "8": 0.18477,
-            "9": 0.18391,
-            "10": 0.18412,
-            "11": 0.18154,
-            "12": 0.18441,
-            "13": 0.18338,
-            "14": 0.1859,
-            "15": 0.18316,
-            "16": 0.18298,
-            "17": 0.18167,
-            "18": 0.18385,
-            "19": 0.18358,
-            "20": 0.18325,
-            "21": 0.18392,
-            "22": 0.1826,
-            "23": 0.18266,
-            "24": 0.18333,
-            "25": 0.18413,
-            "26": 0.185,
-            "27": 0.18218,
-            "28": 0.18361,
-            "29": 0.18161,
-            "30": 0.18366,
-            "31": 0.18238,
-            "32": 0.18355,
-            "33": 0.18274,
-            "34": 0.18399,
-            "35": 0.18232,
-            "36": 0.18405,
-            "37": 0.18325,
-            "38": 0.18367,
-            "39": 0.18313,
-            "40": 0.18319,
-            "41": 0.18244,
-            "42": 0.18305,
-            "43": 0.18287,
-            "44": 0.18263,
-            "45": 0.18326,
-            "46": 0.18213,
-            "47": 0.18261,
-            "48": 0.18333,
-            "49": 0.18287,
-            "50": 0.18284
+            "1": 21.47107,
+            "2": 0.21426,
+            "3": 0.18485,
+            "4": 0.1655,
+            "5": 0.16764,
+            "6": 0.16482,
+            "7": 0.16761,
+            "8": 0.16451,
+            "9": 0.16762,
+            "10": 0.16536,
+            "11": 0.17999,
+            "12": 0.18657,
+            "13": 0.16983,
+            "14": 0.16676,
+            "15": 0.16908,
+            "16": 0.16963,
+            "17": 0.17346,
+            "18": 0.17019,
+            "19": 0.17052,
+            "20": 0.17018,
+            "21": 0.16541,
+            "22": 0.16566,
+            "23": 0.16521,
+            "24": 0.16662,
+            "25": 0.16493,
+            "26": 0.16377,
+            "27": 0.16515,
+            "28": 0.16469,
+            "29": 0.16683,
+            "30": 0.16435,
+            "31": 0.1697,
+            "32": 0.16472,
+            "33": 0.1693,
+            "34": 0.16637,
+            "35": 0.16593,
+            "36": 0.16439,
+            "37": 0.16693,
+            "38": 0.16653,
+            "39": 0.16645,
+            "40": 0.16669,
+            "41": 0.16547,
+            "42": 0.16438,
+            "43": 0.16787,
+            "44": 0.16848,
+            "45": 0.16631,
+            "46": 0.16902,
+            "47": 0.16588,
+            "48": 0.16644,
+            "49": 0.16691,
+            "50": 0.1671
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp4_sp_cp2/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp4_sp_cp2/golden_values_dev_dgx_h100.json
index bf52c8e8fd4..b0c23087659 100644
--- a/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp4_sp_cp2/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp4_sp_cp2/golden_values_dev_dgx_h100.json
@@ -4,56 +4,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 9.28651,
-            "2": 9.28395,
+            "1": 9.28644,
+            "2": 9.28396,
             "3": 9.28076,
-            "4": 9.28861,
-            "5": 9.27695,
+            "4": 9.28856,
+            "5": 9.27699,
             "6": 9.28726,
-            "7": 9.27836,
-            "8": 9.28267,
-            "9": 9.28528,
-            "10": 9.28293,
-            "11": 9.28342,
-            "12": 9.27384,
-            "13": 9.27126,
+            "7": 9.27831,
+            "8": 9.28266,
+            "9": 9.28518,
+            "10": 9.28294,
+            "11": 9.28326,
+            "12": 9.27377,
+            "13": 9.27113,
             "14": 9.27209,
-            "15": 9.25309,
-            "16": 9.24492,
+            "15": 9.25297,
+            "16": 9.24499,
             "17": 9.24857,
-            "18": 9.22951,
+            "18": 9.2295,
             "19": 9.23151,
-            "20": 9.20817,
-            "21": 9.17046,
-            "22": 9.15049,
-            "23": 9.16842,
-            "24": 9.15079,
-            "25": 9.1444,
-            "26": 9.14727,
-            "27": 9.12295,
-            "28": 9.09719,
-            "29": 9.09388,
-            "30": 9.0783,
-            "31": 8.97175,
-            "32": 9.03158,
-            "33": 9.02021,
-            "34": 8.98662,
-            "35": 8.95924,
-            "36": 8.97139,
-            "37": 8.91443,
-            "38": 8.88795,
-            "39": 8.88883,
-            "40": 8.90642,
-            "41": 8.81811,
+            "20": 9.20818,
+            "21": 9.1704,
+            "22": 9.15059,
+            "23": 9.16837,
+            "24": 9.15073,
+            "25": 9.14424,
+            "26": 9.14738,
+            "27": 9.12308,
+            "28": 9.09717,
+            "29": 9.09386,
+            "30": 9.07826,
+            "31": 8.97181,
+            "32": 9.0315,
+            "33": 9.02023,
+            "34": 8.98663,
+            "35": 8.95928,
+            "36": 8.97134,
+            "37": 8.91442,
+            "38": 8.88791,
+            "39": 8.88879,
+            "40": 8.90639,
+            "41": 8.81803,
             "42": 8.87405,
-            "43": 8.85666,
-            "44": 8.81697,
-            "45": 8.81379,
-            "46": 8.84457,
-            "47": 8.73721,
-            "48": 8.66931,
-            "49": 8.70107,
-            "50": 8.73494
+            "43": 8.85655,
+            "44": 8.81693,
+            "45": 8.81356,
+            "46": 8.84453,
+            "47": 8.73701,
+            "48": 8.66923,
+            "49": 8.70104,
+            "50": 8.73489
         }
     },
     "num-zeros": {
@@ -61,56 +61,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 5959400.0,
-            "2": 6553837.0,
-            "3": 7313493.0,
-            "4": 6377142.0,
-            "5": 6498093.0,
-            "6": 7151947.0,
-            "7": 6210401.0,
-            "8": 6334645.0,
-            "9": 6624584.0,
-            "10": 6529058.0,
-            "11": 7466715.0,
-            "12": 6471579.0,
-            "13": 6003497.0,
-            "14": 8071952.0,
-            "15": 6530023.0,
-            "16": 7526922.0,
-            "17": 6034909.0,
-            "18": 6289605.0,
-            "19": 6162573.0,
-            "20": 6527801.0,
-            "21": 6981914.0,
-            "22": 7132792.0,
-            "23": 5928465.0,
-            "24": 6210239.0,
-            "25": 6993035.0,
-            "26": 6471579.0,
-            "27": 6355357.0,
-            "28": 6877112.0,
-            "29": 6380110.0,
-            "30": 6468659.0,
-            "31": 8165130.0,
-            "32": 6765448.0,
+            "1": 5959428.0,
+            "2": 6553739.0,
+            "3": 7313558.0,
+            "4": 6377212.0,
+            "5": 6498220.0,
+            "6": 7152015.0,
+            "7": 6210260.0,
+            "8": 6334672.0,
+            "9": 6624655.0,
+            "10": 6529106.0,
+            "11": 7466660.0,
+            "12": 6471717.0,
+            "13": 6003465.0,
+            "14": 8072041.0,
+            "15": 6529968.0,
+            "16": 7526852.0,
+            "17": 6035134.0,
+            "18": 6289690.0,
+            "19": 6162498.0,
+            "20": 6527712.0,
+            "21": 6981897.0,
+            "22": 7132920.0,
+            "23": 5928645.0,
+            "24": 6210340.0,
+            "25": 6993116.0,
+            "26": 6471329.0,
+            "27": 6355333.0,
+            "28": 6876968.0,
+            "29": 6380137.0,
+            "30": 6468615.0,
+            "31": 8165212.0,
+            "32": 6765571.0,
             "33": 6355561.0,
-            "34": 6662237.0,
-            "35": 7065192.0,
-            "36": 6076915.0,
-            "37": 7785518.0,
-            "38": 6727009.0,
-            "39": 7315902.0,
-            "40": 6555154.0,
-            "41": 7314617.0,
-            "42": 6591869.0,
-            "43": 6928017.0,
-            "44": 7274417.0,
-            "45": 6680008.0,
-            "46": 6232372.0,
-            "47": 6496696.0,
-            "48": 6809696.0,
-            "49": 6753491.0,
-            "50": 6238169.0
+            "34": 6662287.0,
+            "35": 7065313.0,
+            "36": 6076925.0,
+            "37": 7785462.0,
+            "38": 6727049.0,
+            "39": 7315988.0,
+            "40": 6555018.0,
+            "41": 7314645.0,
+            "42": 6591992.0,
+            "43": 6928020.0,
+            "44": 7274444.0,
+            "45": 6680179.0,
+            "46": 6232560.0,
+            "47": 6496796.0,
+            "48": 6809653.0,
+            "49": 6753531.0,
+            "50": 6238141.0
         }
     },
     "mem-allocated-bytes": {
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 1653820416.0,
-            "2": 1653820416.0,
-            "3": 1653820416.0,
-            "4": 1653820416.0,
-            "5": 1653820416.0,
-            "6": 1653820416.0,
-            "7": 1653820416.0,
-            "8": 1653820416.0,
-            "9": 1653820416.0,
-            "10": 1653820416.0,
-            "11": 1653820416.0,
-            "12": 1653820416.0,
-            "13": 1653820416.0,
-            "14": 1653820416.0,
-            "15": 1653820416.0,
-            "16": 1653820416.0,
-            "17": 1653820416.0,
-            "18": 1653820416.0,
-            "19": 1653820416.0,
-            "20": 1653820416.0,
-            "21": 1653820416.0,
-            "22": 1653820416.0,
-            "23": 1653820416.0,
-            "24": 1653820416.0,
-            "25": 1653820416.0,
-            "26": 1653820416.0,
-            "27": 1653820416.0,
-            "28": 1653820416.0,
-            "29": 1653820416.0,
-            "30": 1653820416.0,
-            "31": 1653820416.0,
-            "32": 1653820416.0,
-            "33": 1653820416.0,
-            "34": 1653820416.0,
-            "35": 1653820416.0,
-            "36": 1653820416.0,
-            "37": 1653820416.0,
-            "38": 1653820416.0,
-            "39": 1653820416.0,
-            "40": 1653820416.0,
-            "41": 1653820416.0,
-            "42": 1653820416.0,
-            "43": 1653820416.0,
-            "44": 1653820416.0,
-            "45": 1653820416.0,
-            "46": 1653820416.0,
-            "47": 1653820416.0,
-            "48": 1653820416.0,
-            "49": 1653820416.0,
-            "50": 1653820416.0
+            "1": 1653821440.0,
+            "2": 1653821440.0,
+            "3": 1653821440.0,
+            "4": 1653821440.0,
+            "5": 1653821440.0,
+            "6": 1653821440.0,
+            "7": 1653821440.0,
+            "8": 1653821440.0,
+            "9": 1653821440.0,
+            "10": 1653821440.0,
+            "11": 1653821440.0,
+            "12": 1653821440.0,
+            "13": 1653821440.0,
+            "14": 1653821440.0,
+            "15": 1653821440.0,
+            "16": 1653821440.0,
+            "17": 1653821440.0,
+            "18": 1653821440.0,
+            "19": 1653821440.0,
+            "20": 1653821440.0,
+            "21": 1653821440.0,
+            "22": 1653821440.0,
+            "23": 1653821440.0,
+            "24": 1653821440.0,
+            "25": 1653821440.0,
+            "26": 1653821440.0,
+            "27": 1653821440.0,
+            "28": 1653821440.0,
+            "29": 1653821440.0,
+            "30": 1653821440.0,
+            "31": 1653821440.0,
+            "32": 1653821440.0,
+            "33": 1653821440.0,
+            "34": 1653821440.0,
+            "35": 1653821440.0,
+            "36": 1653821440.0,
+            "37": 1653821440.0,
+            "38": 1653821440.0,
+            "39": 1653821440.0,
+            "40": 1653821440.0,
+            "41": 1653821440.0,
+            "42": 1653821440.0,
+            "43": 1653821440.0,
+            "44": 1653821440.0,
+            "45": 1653821440.0,
+            "46": 1653821440.0,
+            "47": 1653821440.0,
+            "48": 1653821440.0,
+            "49": 1653821440.0,
+            "50": 1653821440.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 1653824512.0,
-            "2": 2142515200.0,
-            "3": 2142515200.0,
-            "4": 2142515200.0,
-            "5": 2142515200.0,
-            "6": 2142515200.0,
-            "7": 2142515200.0,
-            "8": 2142515200.0,
-            "9": 2142515200.0,
-            "10": 2142515200.0,
-            "11": 2142515200.0,
-            "12": 2142515200.0,
-            "13": 2142515200.0,
-            "14": 2142515200.0,
-            "15": 2142515200.0,
-            "16": 2142515200.0,
-            "17": 2142515200.0,
-            "18": 2142515200.0,
-            "19": 2142515200.0,
-            "20": 2142515200.0,
-            "21": 2142515200.0,
-            "22": 2142515200.0,
-            "23": 2142515200.0,
-            "24": 2142515200.0,
-            "25": 2142515200.0,
-            "26": 2142515200.0,
-            "27": 2142515200.0,
-            "28": 2142515200.0,
-            "29": 2142515200.0,
-            "30": 2142515200.0,
-            "31": 2142515200.0,
-            "32": 2142515200.0,
-            "33": 2142515200.0,
-            "34": 2142515200.0,
-            "35": 2142515200.0,
-            "36": 2142515200.0,
-            "37": 2142515200.0,
-            "38": 2142515200.0,
-            "39": 2142515200.0,
-            "40": 2142515200.0,
-            "41": 2142515200.0,
-            "42": 2142515200.0,
-            "43": 2142515200.0,
-            "44": 2142515200.0,
-            "45": 2142515200.0,
-            "46": 2142515200.0,
-            "47": 2142515200.0,
-            "48": 2142515200.0,
-            "49": 2142515200.0,
-            "50": 2142515200.0
+            "1": 1653825536.0,
+            "2": 2142998016.0,
+            "3": 2142998016.0,
+            "4": 2142998016.0,
+            "5": 2142998016.0,
+            "6": 2142998016.0,
+            "7": 2142998016.0,
+            "8": 2142998016.0,
+            "9": 2142998016.0,
+            "10": 2142998016.0,
+            "11": 2142998016.0,
+            "12": 2142998016.0,
+            "13": 2142998016.0,
+            "14": 2142998016.0,
+            "15": 2142998016.0,
+            "16": 2142998016.0,
+            "17": 2142998016.0,
+            "18": 2142998016.0,
+            "19": 2142998016.0,
+            "20": 2142998016.0,
+            "21": 2142998016.0,
+            "22": 2142998016.0,
+            "23": 2142998016.0,
+            "24": 2142998016.0,
+            "25": 2142998016.0,
+            "26": 2142998016.0,
+            "27": 2142998016.0,
+            "28": 2142998016.0,
+            "29": 2142998016.0,
+            "30": 2142998016.0,
+            "31": 2142998016.0,
+            "32": 2142998016.0,
+            "33": 2142998016.0,
+            "34": 2142998016.0,
+            "35": 2142998016.0,
+            "36": 2142998016.0,
+            "37": 2142998016.0,
+            "38": 2142998016.0,
+            "39": 2142998016.0,
+            "40": 2142998016.0,
+            "41": 2142998016.0,
+            "42": 2142998016.0,
+            "43": 2142998016.0,
+            "44": 2142998016.0,
+            "45": 2142998016.0,
+            "46": 2142998016.0,
+            "47": 2142998016.0,
+            "48": 2142998016.0,
+            "49": 2142998016.0,
+            "50": 2142998016.0
         }
     },
     "iteration-time": {
@@ -232,56 +232,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 14.64684,
-            "2": 0.98193,
-            "3": 0.95861,
-            "4": 0.96167,
-            "5": 0.96222,
-            "6": 0.96444,
-            "7": 0.95334,
-            "8": 0.95675,
-            "9": 0.95004,
-            "10": 0.9526,
-            "11": 0.94782,
-            "12": 0.95256,
-            "13": 0.95466,
-            "14": 0.95046,
-            "15": 0.96366,
-            "16": 0.95156,
-            "17": 0.95425,
-            "18": 0.9544,
-            "19": 1.2298,
-            "20": 0.95303,
-            "21": 0.95634,
-            "22": 0.95632,
-            "23": 0.95424,
-            "24": 0.95464,
-            "25": 0.96269,
-            "26": 0.96616,
-            "27": 0.94874,
-            "28": 0.94988,
-            "29": 1.26385,
-            "30": 0.95465,
-            "31": 1.2033,
-            "32": 0.9571,
-            "33": 0.956,
-            "34": 0.95832,
-            "35": 1.32667,
-            "36": 0.95679,
-            "37": 0.95623,
-            "38": 0.96193,
-            "39": 0.96003,
-            "40": 1.25799,
-            "41": 0.95599,
-            "42": 0.95891,
-            "43": 1.55786,
-            "44": 0.96371,
-            "45": 0.96764,
-            "46": 0.95894,
-            "47": 0.96017,
-            "48": 0.95646,
-            "49": 0.961,
-            "50": 0.96278
+            "1": 28.88794,
+            "2": 1.3875,
+            "3": 1.3655,
+            "4": 0.91436,
+            "5": 0.92323,
+            "6": 0.90862,
+            "7": 0.90351,
+            "8": 0.90087,
+            "9": 0.90804,
+            "10": 0.90099,
+            "11": 1.44829,
+            "12": 1.27198,
+            "13": 1.47603,
+            "14": 0.90715,
+            "15": 0.90169,
+            "16": 0.8955,
+            "17": 0.91977,
+            "18": 0.91161,
+            "19": 0.90173,
+            "20": 0.89581,
+            "21": 0.89026,
+            "22": 0.88949,
+            "23": 0.91159,
+            "24": 0.90975,
+            "25": 0.90708,
+            "26": 0.89948,
+            "27": 0.89544,
+            "28": 0.89745,
+            "29": 0.90068,
+            "30": 0.89534,
+            "31": 0.90066,
+            "32": 0.91859,
+            "33": 0.91419,
+            "34": 0.89878,
+            "35": 0.89846,
+            "36": 0.8945,
+            "37": 0.89356,
+            "38": 0.89475,
+            "39": 0.89372,
+            "40": 0.90674,
+            "41": 0.90461,
+            "42": 0.93092,
+            "43": 0.90002,
+            "44": 0.89721,
+            "45": 0.89453,
+            "46": 0.89499,
+            "47": 0.90828,
+            "48": 0.89629,
+            "49": 0.90644,
+            "50": 0.90588
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_11b_mcore_tp4_pp1/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/t5/t5_11b_mcore_tp4_pp1/golden_values_dev_dgx_h100.json
index 45c06ac2f7e..f4a701a2e4d 100644
--- a/tests/functional_tests/test_cases/t5/t5_11b_mcore_tp4_pp1/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/t5/t5_11b_mcore_tp4_pp1/golden_values_dev_dgx_h100.json
@@ -100,7 +100,7 @@
         "end_step": 25,
         "step_interval": 1,
         "values": {
-            "1": 40735711232.0,
+            "1": 40735715328.0,
             "2": 44991991808.0,
             "3": 44993564672.0,
             "4": 44993564672.0,
@@ -132,31 +132,31 @@
         "end_step": 25,
         "step_interval": 1,
         "values": {
-            "1": 12.25468,
-            "2": 0.47853,
-            "3": 0.41459,
-            "4": 0.41066,
-            "5": 0.4125,
-            "6": 0.42243,
-            "7": 0.40926,
-            "8": 0.41832,
-            "9": 0.4068,
-            "10": 0.41071,
-            "11": 0.41068,
-            "12": 0.41187,
-            "13": 0.42064,
-            "14": 0.4228,
-            "15": 0.41026,
-            "16": 0.81409,
-            "17": 0.41651,
-            "18": 0.41416,
-            "19": 0.41418,
-            "20": 0.41217,
-            "21": 0.42084,
-            "22": 0.4131,
-            "23": 0.41106,
-            "24": 0.41518,
-            "25": 0.41106
+            "1": 25.74522,
+            "2": 0.73559,
+            "3": 0.40581,
+            "4": 0.38308,
+            "5": 0.37606,
+            "6": 0.37631,
+            "7": 0.39269,
+            "8": 0.37902,
+            "9": 0.37764,
+            "10": 0.8554,
+            "11": 0.95952,
+            "12": 0.37861,
+            "13": 0.38954,
+            "14": 0.42497,
+            "15": 0.37698,
+            "16": 0.37629,
+            "17": 0.37835,
+            "18": 0.3766,
+            "19": 0.37494,
+            "20": 0.42005,
+            "21": 0.38011,
+            "22": 0.37713,
+            "23": 0.37617,
+            "24": 0.37515,
+            "25": 0.37401
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_a100.json
index 8915a1493e9..377aa000112 100644
--- a/tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_a100.json
+++ b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_a100.json
@@ -1 +1,537 @@
-{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.34492, "5": 9.3892, "10": 9.01571, "15": 8.64615, "20": 8.26963, "25": 7.99467, "30": 7.87463, "35": 7.65847, "40": 7.50295, "45": 7.36112, "50": 7.19186, "55": 7.16789, "60": 7.16511, "65": 7.00051, "70": 7.07139, "75": 7.07586, "80": 6.95246, "85": 6.86372, "90": 7.25405, "95": 6.85964, "100": 6.99698}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 43313.0, "5": 45406.0, "10": 45370.0, "15": 43950.0, "20": 44806.0, "25": 42740.0, "30": 44052.0, "35": 43279.0, "40": 43242.0, "45": 43344.0, "50": 43411.0, "55": 43968.0, "60": 41346.0, "65": 44726.0, "70": 45545.0, "75": 44680.0, "80": 41138.0, "85": 44039.0, "90": 44735.0, "95": 44094.0, "100": 42475.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 4138985984.0, "5": 4138985984.0, "10": 4138985984.0, "15": 4138985984.0, "20": 4138985984.0, "25": 4138985984.0, "30": 4138985984.0, "35": 4138985984.0, "40": 4138985984.0, "45": 4138985984.0, "50": 4138985984.0, "55": 4138985984.0, "60": 4138985984.0, "65": 4138985984.0, "70": 4138985984.0, "75": 4138985984.0, "80": 4138985984.0, "85": 4138985984.0, "90": 4138985984.0, "95": 4138985984.0, "100": 4138985984.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 4345973248.0, "5": 6177401856.0, "10": 6177401856.0, "15": 6177401856.0, "20": 6177401856.0, "25": 6177401856.0, "30": 6177401856.0, "35": 6177401856.0, "40": 6177401856.0, "45": 6177401856.0, "50": 6177401856.0, "55": 6177401856.0, "60": 6177401856.0, "65": 6177401856.0, "70": 6177401856.0, "75": 6177401856.0, "80": 6177401856.0, "85": 6177401856.0, "90": 6177401856.0, "95": 6177401856.0, "100": 6177401856.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 6.23885, "5": 0.26424, "10": 0.26473, "15": 0.25653, "20": 0.25905, "25": 0.26025, "30": 0.25576, "35": 0.26028, "40": 0.26409, "45": 0.27254, "50": 0.25589, "55": 0.25786, "60": 0.25294, "65": 0.25565, "70": 0.25965, "75": 0.25357, "80": 0.25553, "85": 0.25588, "90": 0.25409, "95": 0.2567, "100": 0.25733}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.34492,
+            "2": 10.36433,
+            "3": 9.73145,
+            "4": 9.57923,
+            "5": 9.3892,
+            "6": 9.41078,
+            "7": 9.30545,
+            "8": 9.24872,
+            "9": 9.09363,
+            "10": 9.01571,
+            "11": 8.86227,
+            "12": 8.79088,
+            "13": 8.80884,
+            "14": 8.67658,
+            "15": 8.64615,
+            "16": 8.53973,
+            "17": 8.47875,
+            "18": 8.38919,
+            "19": 8.36145,
+            "20": 8.26963,
+            "21": 8.26321,
+            "22": 8.15047,
+            "23": 8.08861,
+            "24": 8.12416,
+            "25": 7.99467,
+            "26": 8.08474,
+            "27": 7.87741,
+            "28": 7.95852,
+            "29": 7.79567,
+            "30": 7.87463,
+            "31": 7.83211,
+            "32": 7.69448,
+            "33": 7.78447,
+            "34": 7.55753,
+            "35": 7.65847,
+            "36": 7.52861,
+            "37": 7.44889,
+            "38": 7.50364,
+            "39": 7.48064,
+            "40": 7.50295,
+            "41": 7.3974,
+            "42": 7.37184,
+            "43": 7.44291,
+            "44": 7.38083,
+            "45": 7.36112,
+            "46": 7.29391,
+            "47": 7.475,
+            "48": 7.29535,
+            "49": 7.3607,
+            "50": 7.19186,
+            "51": 7.38728,
+            "52": 7.13728,
+            "53": 7.12477,
+            "54": 7.23618,
+            "55": 7.16789,
+            "56": 7.22866,
+            "57": 7.34625,
+            "58": 7.03082,
+            "59": 7.12273,
+            "60": 7.16511,
+            "61": 7.11656,
+            "62": 7.26779,
+            "63": 7.16695,
+            "64": 7.08275,
+            "65": 7.00051,
+            "66": 7.07139,
+            "67": 7.05884,
+            "68": 7.14563,
+            "69": 7.03993,
+            "70": 7.07139,
+            "71": 6.91636,
+            "72": 7.02022,
+            "73": 6.99002,
+            "74": 6.91408,
+            "75": 7.07586,
+            "76": 6.97032,
+            "77": 7.08431,
+            "78": 7.03516,
+            "79": 6.88312,
+            "80": 6.95246,
+            "81": 6.98441,
+            "82": 7.06806,
+            "83": 7.00882,
+            "84": 7.01789,
+            "85": 6.86372,
+            "86": 7.04924,
+            "87": 6.99288,
+            "88": 6.92333,
+            "89": 6.82337,
+            "90": 7.25405,
+            "91": 6.72212,
+            "92": 7.05344,
+            "93": 6.91633,
+            "94": 7.0654,
+            "95": 6.85964,
+            "96": 6.98723,
+            "97": 6.96749,
+            "98": 6.89904,
+            "99": 7.02746,
+            "100": 6.99698
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 43313.0,
+            "2": 44075.0,
+            "3": 44779.0,
+            "4": 42461.0,
+            "5": 45406.0,
+            "6": 40995.0,
+            "7": 43185.0,
+            "8": 45480.0,
+            "9": 42555.0,
+            "10": 45370.0,
+            "11": 44017.0,
+            "12": 44619.0,
+            "13": 43939.0,
+            "14": 46223.0,
+            "15": 43950.0,
+            "16": 41732.0,
+            "17": 43869.0,
+            "18": 44696.0,
+            "19": 42631.0,
+            "20": 44806.0,
+            "21": 44813.0,
+            "22": 41897.0,
+            "23": 45483.0,
+            "24": 43099.0,
+            "25": 42740.0,
+            "26": 43950.0,
+            "27": 46249.0,
+            "28": 46424.0,
+            "29": 46206.0,
+            "30": 44052.0,
+            "31": 41268.0,
+            "32": 43408.0,
+            "33": 45487.0,
+            "34": 43390.0,
+            "35": 43279.0,
+            "36": 42533.0,
+            "37": 40700.0,
+            "38": 42585.0,
+            "39": 44772.0,
+            "40": 43242.0,
+            "41": 44698.0,
+            "42": 43271.0,
+            "43": 45502.0,
+            "44": 44648.0,
+            "45": 43344.0,
+            "46": 43923.0,
+            "47": 42519.0,
+            "48": 44691.0,
+            "49": 43190.0,
+            "50": 43411.0,
+            "51": 41175.0,
+            "52": 43901.0,
+            "53": 43967.0,
+            "54": 41964.0,
+            "55": 43968.0,
+            "56": 43280.0,
+            "57": 42566.0,
+            "58": 43903.0,
+            "59": 44657.0,
+            "60": 41346.0,
+            "61": 39760.0,
+            "62": 44779.0,
+            "63": 44680.0,
+            "64": 45395.0,
+            "65": 44726.0,
+            "66": 45386.0,
+            "67": 43197.0,
+            "68": 42570.0,
+            "69": 43834.0,
+            "70": 45545.0,
+            "71": 43402.0,
+            "72": 44828.0,
+            "73": 45410.0,
+            "74": 42508.0,
+            "75": 44680.0,
+            "76": 43936.0,
+            "77": 42111.0,
+            "78": 40541.0,
+            "79": 38950.0,
+            "80": 41138.0,
+            "81": 45397.0,
+            "82": 43256.0,
+            "83": 38500.0,
+            "84": 42533.0,
+            "85": 44039.0,
+            "86": 45756.0,
+            "87": 41125.0,
+            "88": 41799.0,
+            "89": 41088.0,
+            "90": 44735.0,
+            "91": 46292.0,
+            "92": 41852.0,
+            "93": 43234.0,
+            "94": 39581.0,
+            "95": 44094.0,
+            "96": 44736.0,
+            "97": 45487.0,
+            "98": 41852.0,
+            "99": 45522.0,
+            "100": 42475.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 4138985984.0,
+            "2": 4138985984.0,
+            "3": 4138985984.0,
+            "4": 4138985984.0,
+            "5": 4138985984.0,
+            "6": 4138985984.0,
+            "7": 4138985984.0,
+            "8": 4138985984.0,
+            "9": 4138985984.0,
+            "10": 4138985984.0,
+            "11": 4138985984.0,
+            "12": 4138985984.0,
+            "13": 4138985984.0,
+            "14": 4138985984.0,
+            "15": 4138985984.0,
+            "16": 4138985984.0,
+            "17": 4138985984.0,
+            "18": 4138985984.0,
+            "19": 4138985984.0,
+            "20": 4138985984.0,
+            "21": 4138985984.0,
+            "22": 4138985984.0,
+            "23": 4138985984.0,
+            "24": 4138985984.0,
+            "25": 4138985984.0,
+            "26": 4138985984.0,
+            "27": 4138985984.0,
+            "28": 4138985984.0,
+            "29": 4138985984.0,
+            "30": 4138985984.0,
+            "31": 4138985984.0,
+            "32": 4138985984.0,
+            "33": 4138985984.0,
+            "34": 4138985984.0,
+            "35": 4138985984.0,
+            "36": 4138985984.0,
+            "37": 4138985984.0,
+            "38": 4138985984.0,
+            "39": 4138985984.0,
+            "40": 4138985984.0,
+            "41": 4138985984.0,
+            "42": 4138985984.0,
+            "43": 4138985984.0,
+            "44": 4138985984.0,
+            "45": 4138985984.0,
+            "46": 4138985984.0,
+            "47": 4138985984.0,
+            "48": 4138985984.0,
+            "49": 4138985984.0,
+            "50": 4138985984.0,
+            "51": 4138985984.0,
+            "52": 4138985984.0,
+            "53": 4138985984.0,
+            "54": 4138985984.0,
+            "55": 4138985984.0,
+            "56": 4138985984.0,
+            "57": 4138985984.0,
+            "58": 4138985984.0,
+            "59": 4138985984.0,
+            "60": 4138985984.0,
+            "61": 4138985984.0,
+            "62": 4138985984.0,
+            "63": 4138985984.0,
+            "64": 4138985984.0,
+            "65": 4138985984.0,
+            "66": 4138985984.0,
+            "67": 4138985984.0,
+            "68": 4138985984.0,
+            "69": 4138985984.0,
+            "70": 4138985984.0,
+            "71": 4138985984.0,
+            "72": 4138985984.0,
+            "73": 4138985984.0,
+            "74": 4138985984.0,
+            "75": 4138985984.0,
+            "76": 4138985984.0,
+            "77": 4138985984.0,
+            "78": 4138985984.0,
+            "79": 4138985984.0,
+            "80": 4138985984.0,
+            "81": 4138985984.0,
+            "82": 4138985984.0,
+            "83": 4138985984.0,
+            "84": 4138985984.0,
+            "85": 4138985984.0,
+            "86": 4138985984.0,
+            "87": 4138985984.0,
+            "88": 4138985984.0,
+            "89": 4138985984.0,
+            "90": 4138985984.0,
+            "91": 4138985984.0,
+            "92": 4138985984.0,
+            "93": 4138985984.0,
+            "94": 4138985984.0,
+            "95": 4138985984.0,
+            "96": 4138985984.0,
+            "97": 4138985984.0,
+            "98": 4138985984.0,
+            "99": 4138985984.0,
+            "100": 4138985984.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 4345973248.0,
+            "2": 6174256128.0,
+            "3": 6177401856.0,
+            "4": 6177401856.0,
+            "5": 6177401856.0,
+            "6": 6177401856.0,
+            "7": 6177401856.0,
+            "8": 6177401856.0,
+            "9": 6177401856.0,
+            "10": 6177401856.0,
+            "11": 6177401856.0,
+            "12": 6177401856.0,
+            "13": 6177401856.0,
+            "14": 6177401856.0,
+            "15": 6177401856.0,
+            "16": 6177401856.0,
+            "17": 6177401856.0,
+            "18": 6177401856.0,
+            "19": 6177401856.0,
+            "20": 6177401856.0,
+            "21": 6177401856.0,
+            "22": 6177401856.0,
+            "23": 6177401856.0,
+            "24": 6177401856.0,
+            "25": 6177401856.0,
+            "26": 6177401856.0,
+            "27": 6177401856.0,
+            "28": 6177401856.0,
+            "29": 6177401856.0,
+            "30": 6177401856.0,
+            "31": 6177401856.0,
+            "32": 6177401856.0,
+            "33": 6177401856.0,
+            "34": 6177401856.0,
+            "35": 6177401856.0,
+            "36": 6177401856.0,
+            "37": 6177401856.0,
+            "38": 6177401856.0,
+            "39": 6177401856.0,
+            "40": 6177401856.0,
+            "41": 6177401856.0,
+            "42": 6177401856.0,
+            "43": 6177401856.0,
+            "44": 6177401856.0,
+            "45": 6177401856.0,
+            "46": 6177401856.0,
+            "47": 6177401856.0,
+            "48": 6177401856.0,
+            "49": 6177401856.0,
+            "50": 6177401856.0,
+            "51": 6177401856.0,
+            "52": 6177401856.0,
+            "53": 6177401856.0,
+            "54": 6177401856.0,
+            "55": 6177401856.0,
+            "56": 6177401856.0,
+            "57": 6177401856.0,
+            "58": 6177401856.0,
+            "59": 6177401856.0,
+            "60": 6177401856.0,
+            "61": 6177401856.0,
+            "62": 6177401856.0,
+            "63": 6177401856.0,
+            "64": 6177401856.0,
+            "65": 6177401856.0,
+            "66": 6177401856.0,
+            "67": 6177401856.0,
+            "68": 6177401856.0,
+            "69": 6177401856.0,
+            "70": 6177401856.0,
+            "71": 6177401856.0,
+            "72": 6177401856.0,
+            "73": 6177401856.0,
+            "74": 6177401856.0,
+            "75": 6177401856.0,
+            "76": 6177401856.0,
+            "77": 6177401856.0,
+            "78": 6177401856.0,
+            "79": 6177401856.0,
+            "80": 6177401856.0,
+            "81": 6177401856.0,
+            "82": 6177401856.0,
+            "83": 6177401856.0,
+            "84": 6177401856.0,
+            "85": 6177401856.0,
+            "86": 6177401856.0,
+            "87": 6177401856.0,
+            "88": 6177401856.0,
+            "89": 6177401856.0,
+            "90": 6177401856.0,
+            "91": 6177401856.0,
+            "92": 6177401856.0,
+            "93": 6177401856.0,
+            "94": 6177401856.0,
+            "95": 6177401856.0,
+            "96": 6177401856.0,
+            "97": 6177401856.0,
+            "98": 6177401856.0,
+            "99": 6177401856.0,
+            "100": 6177401856.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 15.90256,
+            "2": 0.38776,
+            "3": 0.2538,
+            "4": 0.23765,
+            "5": 0.24163,
+            "6": 0.23676,
+            "7": 0.24025,
+            "8": 0.23655,
+            "9": 0.23987,
+            "10": 0.23768,
+            "11": 0.23998,
+            "12": 0.23715,
+            "13": 0.24393,
+            "14": 0.24443,
+            "15": 0.239,
+            "16": 0.23703,
+            "17": 0.23983,
+            "18": 0.68895,
+            "19": 0.24165,
+            "20": 0.23942,
+            "21": 0.2407,
+            "22": 0.24031,
+            "23": 0.24024,
+            "24": 0.23652,
+            "25": 0.24086,
+            "26": 0.2366,
+            "27": 0.23948,
+            "28": 0.23647,
+            "29": 0.23853,
+            "30": 0.23618,
+            "31": 0.24073,
+            "32": 0.24306,
+            "33": 0.24364,
+            "34": 0.24271,
+            "35": 0.25558,
+            "36": 0.24636,
+            "37": 0.24909,
+            "38": 0.24557,
+            "39": 0.23889,
+            "40": 0.23902,
+            "41": 0.24642,
+            "42": 0.25339,
+            "43": 0.24074,
+            "44": 0.24571,
+            "45": 0.24717,
+            "46": 0.24699,
+            "47": 0.24736,
+            "48": 0.24603,
+            "49": 0.24517,
+            "50": 0.24539,
+            "51": 0.24811,
+            "52": 0.24582,
+            "53": 0.24593,
+            "54": 0.24504,
+            "55": 0.246,
+            "56": 0.24529,
+            "57": 0.24504,
+            "58": 0.2456,
+            "59": 0.24486,
+            "60": 0.24469,
+            "61": 0.24492,
+            "62": 0.24541,
+            "63": 0.24477,
+            "64": 0.24513,
+            "65": 0.24517,
+            "66": 0.24604,
+            "67": 0.24545,
+            "68": 0.24484,
+            "69": 0.24544,
+            "70": 0.2465,
+            "71": 0.24485,
+            "72": 0.24533,
+            "73": 0.24696,
+            "74": 0.24713,
+            "75": 0.24439,
+            "76": 0.24545,
+            "77": 0.24597,
+            "78": 0.24609,
+            "79": 0.24565,
+            "80": 0.24461,
+            "81": 0.2449,
+            "82": 0.24557,
+            "83": 0.24452,
+            "84": 0.67347,
+            "85": 0.24571,
+            "86": 0.24569,
+            "87": 0.62538,
+            "88": 0.24689,
+            "89": 0.24525,
+            "90": 0.67646,
+            "91": 0.24552,
+            "92": 0.67563,
+            "93": 0.24534,
+            "94": 0.24466,
+            "95": 0.24425,
+            "96": 0.24474,
+            "97": 0.24581,
+            "98": 0.24507,
+            "99": 0.24475,
+            "100": 0.24541
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_a100_2nd.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_a100_2nd.json
new file mode 100644
index 00000000000..ecfeaf1c209
--- /dev/null
+++ b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_a100_2nd.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 7.38728,
+            "52": 7.13728,
+            "53": 7.12477,
+            "54": 7.23618,
+            "55": 7.16789,
+            "56": 7.22866,
+            "57": 7.34625,
+            "58": 7.03082,
+            "59": 7.12273,
+            "60": 7.16511,
+            "61": 7.11656,
+            "62": 7.26779,
+            "63": 7.16695,
+            "64": 7.08275,
+            "65": 7.00051,
+            "66": 7.07139,
+            "67": 7.05884,
+            "68": 7.14563,
+            "69": 7.03993,
+            "70": 7.07139,
+            "71": 6.91636,
+            "72": 7.02022,
+            "73": 6.99002,
+            "74": 6.91408,
+            "75": 7.07586,
+            "76": 6.97032,
+            "77": 7.08431,
+            "78": 7.03516,
+            "79": 6.88312,
+            "80": 6.95246,
+            "81": 6.98441,
+            "82": 7.06806,
+            "83": 7.00882,
+            "84": 7.01789,
+            "85": 6.86372,
+            "86": 7.04924,
+            "87": 6.99288,
+            "88": 6.92333,
+            "89": 6.82337,
+            "90": 7.25405,
+            "91": 6.72212,
+            "92": 7.05344,
+            "93": 6.91633,
+            "94": 7.0654,
+            "95": 6.85964,
+            "96": 6.98723,
+            "97": 6.96749,
+            "98": 6.89904,
+            "99": 7.02746,
+            "100": 6.99698
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 41175.0,
+            "52": 43901.0,
+            "53": 43967.0,
+            "54": 41964.0,
+            "55": 43968.0,
+            "56": 43280.0,
+            "57": 42566.0,
+            "58": 43903.0,
+            "59": 44657.0,
+            "60": 41346.0,
+            "61": 39760.0,
+            "62": 44779.0,
+            "63": 44680.0,
+            "64": 45395.0,
+            "65": 44726.0,
+            "66": 45386.0,
+            "67": 43197.0,
+            "68": 42570.0,
+            "69": 43834.0,
+            "70": 45545.0,
+            "71": 43402.0,
+            "72": 44828.0,
+            "73": 45410.0,
+            "74": 42508.0,
+            "75": 44680.0,
+            "76": 43936.0,
+            "77": 42111.0,
+            "78": 40541.0,
+            "79": 38950.0,
+            "80": 41138.0,
+            "81": 45397.0,
+            "82": 43256.0,
+            "83": 38500.0,
+            "84": 42533.0,
+            "85": 44039.0,
+            "86": 45756.0,
+            "87": 41125.0,
+            "88": 41799.0,
+            "89": 41088.0,
+            "90": 44735.0,
+            "91": 46292.0,
+            "92": 41852.0,
+            "93": 43234.0,
+            "94": 39581.0,
+            "95": 44094.0,
+            "96": 44736.0,
+            "97": 45487.0,
+            "98": 41852.0,
+            "99": 45522.0,
+            "100": 42475.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 4137282048.0,
+            "52": 4137282048.0,
+            "53": 4137282048.0,
+            "54": 4137282048.0,
+            "55": 4137282048.0,
+            "56": 4137282048.0,
+            "57": 4137282048.0,
+            "58": 4137282048.0,
+            "59": 4137282048.0,
+            "60": 4137282048.0,
+            "61": 4137282048.0,
+            "62": 4137282048.0,
+            "63": 4137282048.0,
+            "64": 4137282048.0,
+            "65": 4137282048.0,
+            "66": 4137282048.0,
+            "67": 4137282048.0,
+            "68": 4137282048.0,
+            "69": 4137282048.0,
+            "70": 4137282048.0,
+            "71": 4137282048.0,
+            "72": 4137282048.0,
+            "73": 4137282048.0,
+            "74": 4137282048.0,
+            "75": 4137282048.0,
+            "76": 4137282048.0,
+            "77": 4137282048.0,
+            "78": 4137282048.0,
+            "79": 4137282048.0,
+            "80": 4137282048.0,
+            "81": 4137282048.0,
+            "82": 4137282048.0,
+            "83": 4137282048.0,
+            "84": 4137282048.0,
+            "85": 4137282048.0,
+            "86": 4137282048.0,
+            "87": 4137282048.0,
+            "88": 4137282048.0,
+            "89": 4137282048.0,
+            "90": 4137282048.0,
+            "91": 4137282048.0,
+            "92": 4137282048.0,
+            "93": 4137282048.0,
+            "94": 4137282048.0,
+            "95": 4137282048.0,
+            "96": 4137282048.0,
+            "97": 4137282048.0,
+            "98": 4137282048.0,
+            "99": 4137282048.0,
+            "100": 4137282048.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 6119897600.0,
+            "52": 6198635520.0,
+            "53": 6198635520.0,
+            "54": 6198635520.0,
+            "55": 6198635520.0,
+            "56": 6198635520.0,
+            "57": 6198635520.0,
+            "58": 6198635520.0,
+            "59": 6198635520.0,
+            "60": 6198635520.0,
+            "61": 6198635520.0,
+            "62": 6198635520.0,
+            "63": 6198635520.0,
+            "64": 6198635520.0,
+            "65": 6198635520.0,
+            "66": 6198635520.0,
+            "67": 6198635520.0,
+            "68": 6198635520.0,
+            "69": 6198635520.0,
+            "70": 6198635520.0,
+            "71": 6198635520.0,
+            "72": 6198635520.0,
+            "73": 6198635520.0,
+            "74": 6198635520.0,
+            "75": 6198635520.0,
+            "76": 6198635520.0,
+            "77": 6198635520.0,
+            "78": 6198635520.0,
+            "79": 6198635520.0,
+            "80": 6198635520.0,
+            "81": 6198635520.0,
+            "82": 6198635520.0,
+            "83": 6198635520.0,
+            "84": 6198635520.0,
+            "85": 6198635520.0,
+            "86": 6198635520.0,
+            "87": 6198635520.0,
+            "88": 6198635520.0,
+            "89": 6198635520.0,
+            "90": 6198635520.0,
+            "91": 6198635520.0,
+            "92": 6198635520.0,
+            "93": 6198635520.0,
+            "94": 6198635520.0,
+            "95": 6198635520.0,
+            "96": 6198635520.0,
+            "97": 6198635520.0,
+            "98": 6198635520.0,
+            "99": 6198635520.0,
+            "100": 6198635520.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 17.50157,
+            "52": 0.25854,
+            "53": 0.23866,
+            "54": 0.23772,
+            "55": 0.23735,
+            "56": 0.25491,
+            "57": 0.23917,
+            "58": 0.23806,
+            "59": 0.24067,
+            "60": 0.25384,
+            "61": 0.64867,
+            "62": 0.23907,
+            "63": 0.23697,
+            "64": 0.23809,
+            "65": 0.23776,
+            "66": 0.23806,
+            "67": 0.23688,
+            "68": 0.2374,
+            "69": 0.23748,
+            "70": 0.23755,
+            "71": 0.23825,
+            "72": 0.23729,
+            "73": 0.23714,
+            "74": 0.23744,
+            "75": 0.24319,
+            "76": 0.24832,
+            "77": 0.24157,
+            "78": 0.24391,
+            "79": 0.24576,
+            "80": 0.245,
+            "81": 0.24875,
+            "82": 0.24081,
+            "83": 0.24491,
+            "84": 0.24628,
+            "85": 0.23944,
+            "86": 0.23819,
+            "87": 0.23895,
+            "88": 0.24078,
+            "89": 0.24348,
+            "90": 0.23902,
+            "91": 0.23911,
+            "92": 0.23727,
+            "93": 0.23776,
+            "94": 0.23873,
+            "95": 0.23736,
+            "96": 0.23765,
+            "97": 0.23709,
+            "98": 0.2376,
+            "99": 0.23731,
+            "100": 0.23775
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_h100.json
index 8809a47cd54..2f16e1424cf 100644
--- a/tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_h100.json
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 7.22025,
-            "2": 0.31576,
-            "3": 0.19278,
-            "4": 0.19432,
-            "5": 0.18909,
-            "6": 0.19307,
-            "7": 0.18922,
-            "8": 0.19506,
-            "9": 0.18834,
-            "10": 0.19233,
-            "11": 0.18825,
-            "12": 0.19571,
-            "13": 0.19081,
-            "14": 0.19613,
-            "15": 0.18954,
-            "16": 0.18825,
-            "17": 0.18583,
-            "18": 0.18933,
-            "19": 0.1896,
-            "20": 0.19136,
-            "21": 0.18842,
-            "22": 0.19581,
-            "23": 0.18752,
-            "24": 0.19277,
-            "25": 0.18759,
-            "26": 0.19405,
-            "27": 0.18784,
-            "28": 0.18762,
-            "29": 0.19232,
-            "30": 0.18798,
-            "31": 0.18713,
-            "32": 0.18948,
-            "33": 0.18968,
-            "34": 0.19011,
-            "35": 0.18907,
-            "36": 0.18983,
-            "37": 0.18857,
-            "38": 0.18728,
-            "39": 0.18835,
-            "40": 0.18777,
-            "41": 0.188,
-            "42": 0.18818,
-            "43": 0.18602,
-            "44": 0.18972,
-            "45": 0.19276,
-            "46": 0.18816,
-            "47": 0.18794,
-            "48": 0.19299,
-            "49": 0.19241,
-            "50": 0.18805,
-            "51": 0.18895,
-            "52": 0.19459,
-            "53": 0.18821,
-            "54": 0.18597,
-            "55": 0.189,
-            "56": 0.18748,
-            "57": 0.18709,
-            "58": 0.19127,
-            "59": 0.19097,
-            "60": 0.18702,
-            "61": 0.18725,
-            "62": 0.18762,
-            "63": 0.19407,
-            "64": 0.19411,
-            "65": 0.20071,
-            "66": 0.19555,
-            "67": 0.22543,
-            "68": 0.21724,
-            "69": 0.22635,
-            "70": 0.52922,
-            "71": 0.19086,
-            "72": 0.19899,
-            "73": 0.51667,
-            "74": 0.20138,
-            "75": 0.19507,
-            "76": 0.24987,
-            "77": 0.22838,
-            "78": 0.51523,
-            "79": 0.19126,
-            "80": 0.18911,
-            "81": 0.19269,
-            "82": 0.18816,
-            "83": 0.18902,
-            "84": 0.18942,
-            "85": 0.19004,
-            "86": 0.50868,
-            "87": 0.19274,
-            "88": 0.18813,
-            "89": 0.19169,
-            "90": 0.50854,
-            "91": 0.1924,
-            "92": 0.18906,
-            "93": 0.19016,
-            "94": 0.1902,
-            "95": 0.19338,
-            "96": 0.51468,
-            "97": 0.19597,
-            "98": 0.19147,
-            "99": 0.19626,
-            "100": 0.18852
+            "1": 21.8125,
+            "2": 0.28714,
+            "3": 0.18248,
+            "4": 0.16775,
+            "5": 0.16676,
+            "6": 0.16648,
+            "7": 0.16754,
+            "8": 0.1665,
+            "9": 0.16691,
+            "10": 0.16693,
+            "11": 0.16662,
+            "12": 0.16643,
+            "13": 0.16866,
+            "14": 0.18027,
+            "15": 0.18602,
+            "16": 0.17217,
+            "17": 0.1728,
+            "18": 0.80687,
+            "19": 0.17209,
+            "20": 0.16817,
+            "21": 0.16774,
+            "22": 0.16767,
+            "23": 0.16997,
+            "24": 0.17545,
+            "25": 0.16618,
+            "26": 0.16606,
+            "27": 0.16686,
+            "28": 0.16671,
+            "29": 0.16978,
+            "30": 0.16859,
+            "31": 0.16653,
+            "32": 0.16895,
+            "33": 0.1718,
+            "34": 0.16983,
+            "35": 0.17083,
+            "36": 0.16981,
+            "37": 0.21328,
+            "38": 0.20684,
+            "39": 0.17073,
+            "40": 0.17292,
+            "41": 0.17014,
+            "42": 0.16958,
+            "43": 0.17123,
+            "44": 0.23117,
+            "45": 0.17089,
+            "46": 0.16839,
+            "47": 0.16741,
+            "48": 0.16733,
+            "49": 0.16907,
+            "50": 0.166,
+            "51": 0.18917,
+            "52": 0.16625,
+            "53": 0.1648,
+            "54": 0.16453,
+            "55": 0.19111,
+            "56": 0.16472,
+            "57": 0.1648,
+            "58": 0.16849,
+            "59": 0.16461,
+            "60": 0.16483,
+            "61": 0.16545,
+            "62": 0.1653,
+            "63": 0.16489,
+            "64": 0.16447,
+            "65": 0.16466,
+            "66": 0.16483,
+            "67": 0.1656,
+            "68": 0.16424,
+            "69": 0.16509,
+            "70": 0.16891,
+            "71": 0.16577,
+            "72": 0.1654,
+            "73": 0.16726,
+            "74": 0.16512,
+            "75": 0.16474,
+            "76": 0.16524,
+            "77": 0.1647,
+            "78": 0.16627,
+            "79": 0.16568,
+            "80": 0.16511,
+            "81": 0.16637,
+            "82": 0.16694,
+            "83": 0.16527,
+            "84": 0.56724,
+            "85": 0.17088,
+            "86": 0.16835,
+            "87": 0.59121,
+            "88": 0.16681,
+            "89": 0.16548,
+            "90": 0.58424,
+            "91": 0.1663,
+            "92": 0.57005,
+            "93": 0.16681,
+            "94": 0.165,
+            "95": 0.16566,
+            "96": 0.16609,
+            "97": 0.16553,
+            "98": 0.16396,
+            "99": 0.16454,
+            "100": 0.16365
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_h100_2nd.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_h100_2nd.json
new file mode 100644
index 00000000000..990df178a9a
--- /dev/null
+++ b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_h100_2nd.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 7.37351,
+            "52": 7.13362,
+            "53": 7.11248,
+            "54": 7.23395,
+            "55": 7.14784,
+            "56": 7.2278,
+            "57": 7.33273,
+            "58": 6.99464,
+            "59": 7.11597,
+            "60": 7.13216,
+            "61": 7.10561,
+            "62": 7.26519,
+            "63": 7.14764,
+            "64": 7.08702,
+            "65": 6.98658,
+            "66": 7.04733,
+            "67": 7.04745,
+            "68": 7.14076,
+            "69": 7.24347,
+            "70": 7.05974,
+            "71": 6.89358,
+            "72": 6.99793,
+            "73": 6.97928,
+            "74": 6.91973,
+            "75": 7.05295,
+            "76": 6.96054,
+            "77": 7.07939,
+            "78": 7.0137,
+            "79": 6.88344,
+            "80": 6.93032,
+            "81": 6.96568,
+            "82": 7.05273,
+            "83": 6.98785,
+            "84": 7.00434,
+            "85": 6.84596,
+            "86": 7.03651,
+            "87": 6.96347,
+            "88": 6.91343,
+            "89": 6.80657,
+            "90": 7.23629,
+            "91": 6.70068,
+            "92": 7.05694,
+            "93": 6.89292,
+            "94": 7.05848,
+            "95": 6.84802,
+            "96": 6.9679,
+            "97": 6.9429,
+            "98": 6.87432,
+            "99": 7.01828,
+            "100": 6.98491
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 41200.0,
+            "52": 43884.0,
+            "53": 43946.0,
+            "54": 41916.0,
+            "55": 43925.0,
+            "56": 43252.0,
+            "57": 42636.0,
+            "58": 43941.0,
+            "59": 44619.0,
+            "60": 41400.0,
+            "61": 39750.0,
+            "62": 44764.0,
+            "63": 44671.0,
+            "64": 45375.0,
+            "65": 44753.0,
+            "66": 45404.0,
+            "67": 43154.0,
+            "68": 42551.0,
+            "69": 43844.0,
+            "70": 45537.0,
+            "71": 43335.0,
+            "72": 44839.0,
+            "73": 45372.0,
+            "74": 42511.0,
+            "75": 44712.0,
+            "76": 43930.0,
+            "77": 42073.0,
+            "78": 40535.0,
+            "79": 38992.0,
+            "80": 41092.0,
+            "81": 45382.0,
+            "82": 43275.0,
+            "83": 38475.0,
+            "84": 42418.0,
+            "85": 43979.0,
+            "86": 45691.0,
+            "87": 41145.0,
+            "88": 41782.0,
+            "89": 41042.0,
+            "90": 44713.0,
+            "91": 46270.0,
+            "92": 41845.0,
+            "93": 43272.0,
+            "94": 39536.0,
+            "95": 44085.0,
+            "96": 44689.0,
+            "97": 45411.0,
+            "98": 41858.0,
+            "99": 45575.0,
+            "100": 42501.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 4166512128.0,
+            "52": 4166512128.0,
+            "53": 4166512128.0,
+            "54": 4166512128.0,
+            "55": 4166512128.0,
+            "56": 4166512128.0,
+            "57": 4166512128.0,
+            "58": 4166512128.0,
+            "59": 4166512128.0,
+            "60": 4166512128.0,
+            "61": 4166512128.0,
+            "62": 4166512128.0,
+            "63": 4166512128.0,
+            "64": 4166512128.0,
+            "65": 4166512128.0,
+            "66": 4166512128.0,
+            "67": 4166512128.0,
+            "68": 4166512128.0,
+            "69": 4166512128.0,
+            "70": 4166512128.0,
+            "71": 4166512128.0,
+            "72": 4166512128.0,
+            "73": 4166512128.0,
+            "74": 4166512128.0,
+            "75": 4166512128.0,
+            "76": 4166512128.0,
+            "77": 4166512128.0,
+            "78": 4166512128.0,
+            "79": 4166512128.0,
+            "80": 4166512128.0,
+            "81": 4166512128.0,
+            "82": 4166512128.0,
+            "83": 4166512128.0,
+            "84": 4166512128.0,
+            "85": 4166512128.0,
+            "86": 4166512128.0,
+            "87": 4166512128.0,
+            "88": 4166512128.0,
+            "89": 4166512128.0,
+            "90": 4166512128.0,
+            "91": 4166512128.0,
+            "92": 4166512128.0,
+            "93": 4166512128.0,
+            "94": 4166512128.0,
+            "95": 4166512128.0,
+            "96": 4166512128.0,
+            "97": 4166512128.0,
+            "98": 4166512128.0,
+            "99": 4166512128.0,
+            "100": 4166512128.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 6147947008.0,
+            "52": 6229044224.0,
+            "53": 6229044224.0,
+            "54": 6229044224.0,
+            "55": 6229044224.0,
+            "56": 6229044224.0,
+            "57": 6229044224.0,
+            "58": 6229044224.0,
+            "59": 6229044224.0,
+            "60": 6229044224.0,
+            "61": 6229044224.0,
+            "62": 6229044224.0,
+            "63": 6229044224.0,
+            "64": 6229044224.0,
+            "65": 6229044224.0,
+            "66": 6229044224.0,
+            "67": 6229044224.0,
+            "68": 6229044224.0,
+            "69": 6229044224.0,
+            "70": 6229044224.0,
+            "71": 6229044224.0,
+            "72": 6229044224.0,
+            "73": 6229044224.0,
+            "74": 6229044224.0,
+            "75": 6229044224.0,
+            "76": 6229044224.0,
+            "77": 6229044224.0,
+            "78": 6229044224.0,
+            "79": 6229044224.0,
+            "80": 6229044224.0,
+            "81": 6229044224.0,
+            "82": 6229044224.0,
+            "83": 6229044224.0,
+            "84": 6229044224.0,
+            "85": 6229044224.0,
+            "86": 6229044224.0,
+            "87": 6229044224.0,
+            "88": 6229044224.0,
+            "89": 6229044224.0,
+            "90": 6229044224.0,
+            "91": 6229044224.0,
+            "92": 6229044224.0,
+            "93": 6229044224.0,
+            "94": 6229044224.0,
+            "95": 6229044224.0,
+            "96": 6229044224.0,
+            "97": 6229044224.0,
+            "98": 6229044224.0,
+            "99": 6229044224.0,
+            "100": 6229044224.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 21.52581,
+            "52": 0.20557,
+            "53": 0.16728,
+            "54": 0.16541,
+            "55": 0.16459,
+            "56": 0.1635,
+            "57": 0.16634,
+            "58": 0.16486,
+            "59": 0.18518,
+            "60": 0.18385,
+            "61": 0.18349,
+            "62": 0.16716,
+            "63": 0.85301,
+            "64": 0.16878,
+            "65": 0.16296,
+            "66": 0.16285,
+            "67": 0.16213,
+            "68": 0.1653,
+            "69": 0.16402,
+            "70": 0.16087,
+            "71": 0.16009,
+            "72": 0.16411,
+            "73": 0.16271,
+            "74": 0.16402,
+            "75": 0.19388,
+            "76": 0.19834,
+            "77": 0.18848,
+            "78": 0.17552,
+            "79": 0.16404,
+            "80": 0.21371,
+            "81": 0.16791,
+            "82": 0.16882,
+            "83": 0.16426,
+            "84": 0.16282,
+            "85": 0.16565,
+            "86": 0.16341,
+            "87": 0.16331,
+            "88": 0.16306,
+            "89": 0.16564,
+            "90": 0.20919,
+            "91": 0.16623,
+            "92": 0.16207,
+            "93": 0.16589,
+            "94": 0.16268,
+            "95": 0.16134,
+            "96": 0.16581,
+            "97": 0.1593,
+            "98": 0.16011,
+            "99": 0.16089,
+            "100": 0.16056
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1/golden_values_dev_dgx_a100.json
index 6e6c2f4365a..25b93ce0f66 100644
--- a/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1/golden_values_dev_dgx_a100.json
+++ b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1/golden_values_dev_dgx_a100.json
@@ -1 +1,537 @@
-{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.38869, "5": 9.38084, "10": 9.05709, "15": 8.65595, "20": 8.26189, "25": 7.98194, "30": 7.86925, "35": 7.66275, "40": 7.5007, "45": 7.34875, "50": 7.18139, "55": 7.15407, "60": 7.14724, "65": 6.99707, "70": 7.06003, "75": 7.0608, "80": 6.94288, "85": 6.85973, "90": 7.24972, "95": 6.84835, "100": 6.9828}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 43318.0, "5": 45385.0, "10": 45371.0, "15": 43897.0, "20": 44769.0, "25": 42476.0, "30": 43985.0, "35": 43264.0, "40": 43230.0, "45": 43278.0, "50": 43381.0, "55": 43857.0, "60": 41225.0, "65": 44683.0, "70": 45534.0, "75": 44679.0, "80": 41115.0, "85": 44010.0, "90": 44673.0, "95": 44064.0, "100": 42520.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 2164472832.0, "5": 2164472832.0, "10": 2164472832.0, "15": 2164472832.0, "20": 2164472832.0, "25": 2164472832.0, "30": 2164472832.0, "35": 2164472832.0, "40": 2164472832.0, "45": 2164472832.0, "50": 2164472832.0, "55": 2164472832.0, "60": 2164472832.0, "65": 2164472832.0, "70": 2164472832.0, "75": 2164472832.0, "80": 2164472832.0, "85": 2164472832.0, "90": 2164472832.0, "95": 2164472832.0, "100": 2164472832.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 2413216256.0, "5": 3345833472.0, "10": 3345833472.0, "15": 3345833472.0, "20": 3345833472.0, "25": 3345833472.0, "30": 3345833472.0, "35": 3345833472.0, "40": 3345833472.0, "45": 3345833472.0, "50": 3345833472.0, "55": 3345833472.0, "60": 3345833472.0, "65": 3345833472.0, "70": 3345833472.0, "75": 3345833472.0, "80": 3345833472.0, "85": 3345833472.0, "90": 3345833472.0, "95": 3345833472.0, "100": 3345833472.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 6.74162, "5": 0.47846, "10": 0.4772, "15": 0.47601, "20": 0.47317, "25": 0.47899, "30": 0.79953, "35": 0.47489, "40": 0.47181, "45": 0.772, "50": 0.4704, "55": 0.47309, "60": 0.47139, "65": 0.4766, "70": 0.47286, "75": 0.47576, "80": 0.4722, "85": 0.47279, "90": 0.46958, "95": 0.46793, "100": 0.47059}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.38869,
+            "2": 10.39385,
+            "3": 9.78084,
+            "4": 9.59727,
+            "5": 9.38084,
+            "6": 9.40579,
+            "7": 9.30788,
+            "8": 9.24106,
+            "9": 9.12192,
+            "10": 9.05709,
+            "11": 8.87331,
+            "12": 8.7937,
+            "13": 8.84028,
+            "14": 8.68508,
+            "15": 8.65595,
+            "16": 8.54356,
+            "17": 8.50088,
+            "18": 8.39002,
+            "19": 8.36442,
+            "20": 8.26189,
+            "21": 8.27089,
+            "22": 8.14388,
+            "23": 8.07456,
+            "24": 8.11903,
+            "25": 7.98194,
+            "26": 8.08775,
+            "27": 7.87135,
+            "28": 7.96498,
+            "29": 7.80253,
+            "30": 7.86925,
+            "31": 7.81724,
+            "32": 7.68778,
+            "33": 7.78042,
+            "34": 7.55486,
+            "35": 7.66275,
+            "36": 7.52238,
+            "37": 7.44446,
+            "38": 7.50242,
+            "39": 7.45039,
+            "40": 7.5007,
+            "41": 7.39051,
+            "42": 7.36065,
+            "43": 7.43329,
+            "44": 7.3762,
+            "45": 7.34875,
+            "46": 7.28162,
+            "47": 7.46112,
+            "48": 7.28762,
+            "49": 7.35376,
+            "50": 7.18139,
+            "51": 7.36575,
+            "52": 7.1333,
+            "53": 7.11549,
+            "54": 7.22921,
+            "55": 7.15407,
+            "56": 7.22241,
+            "57": 7.32951,
+            "58": 7.02329,
+            "59": 7.11369,
+            "60": 7.14724,
+            "61": 7.11415,
+            "62": 7.24749,
+            "63": 7.15673,
+            "64": 7.08408,
+            "65": 6.99707,
+            "66": 7.06064,
+            "67": 7.04874,
+            "68": 7.14167,
+            "69": 7.0346,
+            "70": 7.06003,
+            "71": 6.92549,
+            "72": 7.00408,
+            "73": 6.97962,
+            "74": 6.92272,
+            "75": 7.0608,
+            "76": 6.97256,
+            "77": 7.08183,
+            "78": 7.01864,
+            "79": 6.8552,
+            "80": 6.94288,
+            "81": 6.97634,
+            "82": 7.06647,
+            "83": 6.99975,
+            "84": 7.00894,
+            "85": 6.85973,
+            "86": 7.03631,
+            "87": 6.98045,
+            "88": 6.91491,
+            "89": 6.81048,
+            "90": 7.24972,
+            "91": 6.71004,
+            "92": 7.04898,
+            "93": 6.90555,
+            "94": 7.06456,
+            "95": 6.84835,
+            "96": 6.97647,
+            "97": 6.9631,
+            "98": 6.88688,
+            "99": 7.01307,
+            "100": 6.9828
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 43318.0,
+            "2": 44050.0,
+            "3": 44756.0,
+            "4": 42391.0,
+            "5": 45385.0,
+            "6": 40966.0,
+            "7": 43182.0,
+            "8": 45459.0,
+            "9": 42453.0,
+            "10": 45371.0,
+            "11": 43978.0,
+            "12": 44598.0,
+            "13": 43892.0,
+            "14": 46190.0,
+            "15": 43897.0,
+            "16": 41608.0,
+            "17": 43825.0,
+            "18": 44703.0,
+            "19": 42550.0,
+            "20": 44769.0,
+            "21": 44793.0,
+            "22": 41844.0,
+            "23": 45444.0,
+            "24": 43071.0,
+            "25": 42476.0,
+            "26": 43926.0,
+            "27": 46218.0,
+            "28": 46430.0,
+            "29": 46178.0,
+            "30": 43985.0,
+            "31": 41281.0,
+            "32": 43347.0,
+            "33": 45448.0,
+            "34": 43305.0,
+            "35": 43264.0,
+            "36": 42485.0,
+            "37": 40077.0,
+            "38": 42514.0,
+            "39": 44723.0,
+            "40": 43230.0,
+            "41": 44653.0,
+            "42": 43269.0,
+            "43": 45446.0,
+            "44": 44588.0,
+            "45": 43278.0,
+            "46": 43896.0,
+            "47": 42369.0,
+            "48": 44704.0,
+            "49": 43172.0,
+            "50": 43381.0,
+            "51": 41175.0,
+            "52": 43812.0,
+            "53": 43934.0,
+            "54": 41932.0,
+            "55": 43857.0,
+            "56": 43277.0,
+            "57": 42576.0,
+            "58": 43835.0,
+            "59": 44629.0,
+            "60": 41225.0,
+            "61": 39716.0,
+            "62": 44773.0,
+            "63": 44717.0,
+            "64": 45367.0,
+            "65": 44683.0,
+            "66": 45367.0,
+            "67": 43136.0,
+            "68": 42523.0,
+            "69": 43828.0,
+            "70": 45534.0,
+            "71": 43316.0,
+            "72": 44750.0,
+            "73": 45364.0,
+            "74": 42445.0,
+            "75": 44679.0,
+            "76": 43875.0,
+            "77": 42100.0,
+            "78": 40289.0,
+            "79": 38949.0,
+            "80": 41115.0,
+            "81": 45362.0,
+            "82": 43205.0,
+            "83": 38475.0,
+            "84": 42459.0,
+            "85": 44010.0,
+            "86": 45731.0,
+            "87": 40860.0,
+            "88": 41793.0,
+            "89": 41068.0,
+            "90": 44673.0,
+            "91": 46149.0,
+            "92": 41798.0,
+            "93": 43246.0,
+            "94": 39583.0,
+            "95": 44064.0,
+            "96": 44715.0,
+            "97": 45390.0,
+            "98": 41808.0,
+            "99": 45436.0,
+            "100": 42520.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2164472832.0,
+            "2": 2164472832.0,
+            "3": 2164472832.0,
+            "4": 2164472832.0,
+            "5": 2164472832.0,
+            "6": 2164472832.0,
+            "7": 2164472832.0,
+            "8": 2164472832.0,
+            "9": 2164472832.0,
+            "10": 2164472832.0,
+            "11": 2164472832.0,
+            "12": 2164472832.0,
+            "13": 2164472832.0,
+            "14": 2164472832.0,
+            "15": 2164472832.0,
+            "16": 2164472832.0,
+            "17": 2164472832.0,
+            "18": 2164472832.0,
+            "19": 2164472832.0,
+            "20": 2164472832.0,
+            "21": 2164472832.0,
+            "22": 2164472832.0,
+            "23": 2164472832.0,
+            "24": 2164472832.0,
+            "25": 2164472832.0,
+            "26": 2164472832.0,
+            "27": 2164472832.0,
+            "28": 2164472832.0,
+            "29": 2164472832.0,
+            "30": 2164472832.0,
+            "31": 2164472832.0,
+            "32": 2164472832.0,
+            "33": 2164472832.0,
+            "34": 2164472832.0,
+            "35": 2164472832.0,
+            "36": 2164472832.0,
+            "37": 2164472832.0,
+            "38": 2164472832.0,
+            "39": 2164472832.0,
+            "40": 2164472832.0,
+            "41": 2164472832.0,
+            "42": 2164472832.0,
+            "43": 2164472832.0,
+            "44": 2164472832.0,
+            "45": 2164472832.0,
+            "46": 2164472832.0,
+            "47": 2164472832.0,
+            "48": 2164472832.0,
+            "49": 2164472832.0,
+            "50": 2164472832.0,
+            "51": 2164472832.0,
+            "52": 2164472832.0,
+            "53": 2164472832.0,
+            "54": 2164472832.0,
+            "55": 2164472832.0,
+            "56": 2164472832.0,
+            "57": 2164472832.0,
+            "58": 2164472832.0,
+            "59": 2164472832.0,
+            "60": 2164472832.0,
+            "61": 2164472832.0,
+            "62": 2164472832.0,
+            "63": 2164472832.0,
+            "64": 2164472832.0,
+            "65": 2164472832.0,
+            "66": 2164472832.0,
+            "67": 2164472832.0,
+            "68": 2164472832.0,
+            "69": 2164472832.0,
+            "70": 2164472832.0,
+            "71": 2164472832.0,
+            "72": 2164472832.0,
+            "73": 2164472832.0,
+            "74": 2164472832.0,
+            "75": 2164472832.0,
+            "76": 2164472832.0,
+            "77": 2164472832.0,
+            "78": 2164472832.0,
+            "79": 2164472832.0,
+            "80": 2164472832.0,
+            "81": 2164472832.0,
+            "82": 2164472832.0,
+            "83": 2164472832.0,
+            "84": 2164472832.0,
+            "85": 2164472832.0,
+            "86": 2164472832.0,
+            "87": 2164472832.0,
+            "88": 2164472832.0,
+            "89": 2164472832.0,
+            "90": 2164472832.0,
+            "91": 2164472832.0,
+            "92": 2164472832.0,
+            "93": 2164472832.0,
+            "94": 2164472832.0,
+            "95": 2164472832.0,
+            "96": 2164472832.0,
+            "97": 2164472832.0,
+            "98": 2164472832.0,
+            "99": 2164472832.0,
+            "100": 2164472832.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2413216256.0,
+            "2": 3345833472.0,
+            "3": 3345833472.0,
+            "4": 3345833472.0,
+            "5": 3345833472.0,
+            "6": 3345833472.0,
+            "7": 3345833472.0,
+            "8": 3345833472.0,
+            "9": 3345833472.0,
+            "10": 3345833472.0,
+            "11": 3345833472.0,
+            "12": 3345833472.0,
+            "13": 3345833472.0,
+            "14": 3345833472.0,
+            "15": 3345833472.0,
+            "16": 3345833472.0,
+            "17": 3345833472.0,
+            "18": 3345833472.0,
+            "19": 3345833472.0,
+            "20": 3345833472.0,
+            "21": 3345833472.0,
+            "22": 3345833472.0,
+            "23": 3345833472.0,
+            "24": 3345833472.0,
+            "25": 3345833472.0,
+            "26": 3345833472.0,
+            "27": 3345833472.0,
+            "28": 3345833472.0,
+            "29": 3345833472.0,
+            "30": 3345833472.0,
+            "31": 3345833472.0,
+            "32": 3345833472.0,
+            "33": 3345833472.0,
+            "34": 3345833472.0,
+            "35": 3345833472.0,
+            "36": 3345833472.0,
+            "37": 3345833472.0,
+            "38": 3345833472.0,
+            "39": 3345833472.0,
+            "40": 3345833472.0,
+            "41": 3345833472.0,
+            "42": 3345833472.0,
+            "43": 3345833472.0,
+            "44": 3345833472.0,
+            "45": 3345833472.0,
+            "46": 3345833472.0,
+            "47": 3345833472.0,
+            "48": 3345833472.0,
+            "49": 3345833472.0,
+            "50": 3345833472.0,
+            "51": 3345833472.0,
+            "52": 3345833472.0,
+            "53": 3345833472.0,
+            "54": 3345833472.0,
+            "55": 3345833472.0,
+            "56": 3345833472.0,
+            "57": 3345833472.0,
+            "58": 3345833472.0,
+            "59": 3345833472.0,
+            "60": 3345833472.0,
+            "61": 3345833472.0,
+            "62": 3345833472.0,
+            "63": 3345833472.0,
+            "64": 3345833472.0,
+            "65": 3345833472.0,
+            "66": 3345833472.0,
+            "67": 3345833472.0,
+            "68": 3345833472.0,
+            "69": 3345833472.0,
+            "70": 3345833472.0,
+            "71": 3345833472.0,
+            "72": 3345833472.0,
+            "73": 3345833472.0,
+            "74": 3345833472.0,
+            "75": 3345833472.0,
+            "76": 3345833472.0,
+            "77": 3345833472.0,
+            "78": 3345833472.0,
+            "79": 3345833472.0,
+            "80": 3345833472.0,
+            "81": 3345833472.0,
+            "82": 3345833472.0,
+            "83": 3345833472.0,
+            "84": 3345833472.0,
+            "85": 3345833472.0,
+            "86": 3345833472.0,
+            "87": 3345833472.0,
+            "88": 3345833472.0,
+            "89": 3345833472.0,
+            "90": 3345833472.0,
+            "91": 3345833472.0,
+            "92": 3345833472.0,
+            "93": 3345833472.0,
+            "94": 3345833472.0,
+            "95": 3345833472.0,
+            "96": 3345833472.0,
+            "97": 3345833472.0,
+            "98": 3345833472.0,
+            "99": 3345833472.0,
+            "100": 3345833472.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 15.92465,
+            "2": 0.73672,
+            "3": 0.44476,
+            "4": 0.43267,
+            "5": 0.43229,
+            "6": 0.43162,
+            "7": 0.43222,
+            "8": 0.4329,
+            "9": 0.43176,
+            "10": 0.43233,
+            "11": 0.43227,
+            "12": 0.43124,
+            "13": 0.43277,
+            "14": 0.44061,
+            "15": 0.4485,
+            "16": 0.45121,
+            "17": 0.80848,
+            "18": 0.43555,
+            "19": 0.43298,
+            "20": 0.44302,
+            "21": 0.44708,
+            "22": 0.43142,
+            "23": 0.43189,
+            "24": 0.44055,
+            "25": 0.4339,
+            "26": 0.43161,
+            "27": 0.43237,
+            "28": 0.43157,
+            "29": 0.43161,
+            "30": 0.43227,
+            "31": 0.43156,
+            "32": 0.43921,
+            "33": 0.43687,
+            "34": 0.43188,
+            "35": 0.43194,
+            "36": 0.43194,
+            "37": 0.43209,
+            "38": 0.43171,
+            "39": 0.4409,
+            "40": 0.45052,
+            "41": 0.43131,
+            "42": 0.43172,
+            "43": 0.43147,
+            "44": 0.84045,
+            "45": 0.43076,
+            "46": 0.43068,
+            "47": 0.87305,
+            "48": 0.43164,
+            "49": 1.00548,
+            "50": 0.8703,
+            "51": 0.43255,
+            "52": 0.43229,
+            "53": 0.43202,
+            "54": 0.432,
+            "55": 0.43189,
+            "56": 0.43154,
+            "57": 0.43166,
+            "58": 0.4319,
+            "59": 0.43132,
+            "60": 0.43234,
+            "61": 0.43225,
+            "62": 0.43193,
+            "63": 0.43153,
+            "64": 0.43325,
+            "65": 0.4339,
+            "66": 0.43652,
+            "67": 0.43828,
+            "68": 0.43797,
+            "69": 0.44101,
+            "70": 0.43951,
+            "71": 0.43787,
+            "72": 0.43391,
+            "73": 0.4315,
+            "74": 0.43378,
+            "75": 0.43568,
+            "76": 0.43331,
+            "77": 0.43334,
+            "78": 0.43227,
+            "79": 0.43399,
+            "80": 0.44924,
+            "81": 0.4326,
+            "82": 0.43301,
+            "83": 0.43228,
+            "84": 0.43254,
+            "85": 0.43238,
+            "86": 0.43838,
+            "87": 0.44364,
+            "88": 0.43194,
+            "89": 0.43286,
+            "90": 0.43292,
+            "91": 0.43386,
+            "92": 0.43602,
+            "93": 0.43208,
+            "94": 0.43192,
+            "95": 0.43262,
+            "96": 0.43158,
+            "97": 0.43293,
+            "98": 0.43715,
+            "99": 0.43258,
+            "100": 0.43232
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1/golden_values_dev_dgx_h100.json
index 89582b25851..8e29e2a4993 100644
--- a/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1/golden_values_dev_dgx_h100.json
@@ -218,106 +218,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 2194357248.0,
-            "2": 2194357248.0,
-            "3": 2194357248.0,
-            "4": 2194357248.0,
-            "5": 2194357248.0,
-            "6": 2194357248.0,
-            "7": 2194357248.0,
-            "8": 2194357248.0,
-            "9": 2194357248.0,
-            "10": 2194357248.0,
-            "11": 2194357248.0,
-            "12": 2194357248.0,
-            "13": 2194357248.0,
-            "14": 2194357248.0,
-            "15": 2194357248.0,
-            "16": 2194357248.0,
-            "17": 2194357248.0,
-            "18": 2194357248.0,
-            "19": 2194357248.0,
-            "20": 2194357248.0,
-            "21": 2194357248.0,
-            "22": 2194357248.0,
-            "23": 2194357248.0,
-            "24": 2194357248.0,
-            "25": 2194357248.0,
-            "26": 2194357248.0,
-            "27": 2194357248.0,
-            "28": 2194357248.0,
-            "29": 2194357248.0,
-            "30": 2194357248.0,
-            "31": 2194357248.0,
-            "32": 2194357248.0,
-            "33": 2194357248.0,
-            "34": 2194357248.0,
-            "35": 2194357248.0,
-            "36": 2194357248.0,
-            "37": 2194357248.0,
-            "38": 2194357248.0,
-            "39": 2194357248.0,
-            "40": 2194357248.0,
-            "41": 2194357248.0,
-            "42": 2194357248.0,
-            "43": 2194357248.0,
-            "44": 2194357248.0,
-            "45": 2194357248.0,
-            "46": 2194357248.0,
-            "47": 2194357248.0,
-            "48": 2194357248.0,
-            "49": 2194357248.0,
-            "50": 2194357248.0,
-            "51": 2194357248.0,
-            "52": 2194357248.0,
-            "53": 2194357248.0,
-            "54": 2194357248.0,
-            "55": 2194357248.0,
-            "56": 2194357248.0,
-            "57": 2194357248.0,
-            "58": 2194357248.0,
-            "59": 2194357248.0,
-            "60": 2194357248.0,
-            "61": 2194357248.0,
-            "62": 2194357248.0,
-            "63": 2194357248.0,
-            "64": 2194357248.0,
-            "65": 2194357248.0,
-            "66": 2194357248.0,
-            "67": 2194357248.0,
-            "68": 2194357248.0,
-            "69": 2194357248.0,
-            "70": 2194357248.0,
-            "71": 2194357248.0,
-            "72": 2194357248.0,
-            "73": 2194357248.0,
-            "74": 2194357248.0,
-            "75": 2194357248.0,
-            "76": 2194357248.0,
-            "77": 2194357248.0,
-            "78": 2194357248.0,
-            "79": 2194357248.0,
-            "80": 2194357248.0,
-            "81": 2194357248.0,
-            "82": 2194357248.0,
-            "83": 2194357248.0,
-            "84": 2194357248.0,
-            "85": 2194357248.0,
-            "86": 2194357248.0,
-            "87": 2194357248.0,
-            "88": 2194357248.0,
-            "89": 2194357248.0,
-            "90": 2194357248.0,
-            "91": 2194357248.0,
-            "92": 2194357248.0,
-            "93": 2194357248.0,
-            "94": 2194357248.0,
-            "95": 2194357248.0,
-            "96": 2194357248.0,
-            "97": 2194357248.0,
-            "98": 2194357248.0,
-            "99": 2194357248.0,
-            "100": 2194357248.0
+            "1": 2196192256.0,
+            "2": 2196192256.0,
+            "3": 2196192256.0,
+            "4": 2196192256.0,
+            "5": 2196192256.0,
+            "6": 2196192256.0,
+            "7": 2196192256.0,
+            "8": 2196192256.0,
+            "9": 2196192256.0,
+            "10": 2196192256.0,
+            "11": 2196192256.0,
+            "12": 2196192256.0,
+            "13": 2196192256.0,
+            "14": 2196192256.0,
+            "15": 2196192256.0,
+            "16": 2196192256.0,
+            "17": 2196192256.0,
+            "18": 2196192256.0,
+            "19": 2196192256.0,
+            "20": 2196192256.0,
+            "21": 2196192256.0,
+            "22": 2196192256.0,
+            "23": 2196192256.0,
+            "24": 2196192256.0,
+            "25": 2196192256.0,
+            "26": 2196192256.0,
+            "27": 2196192256.0,
+            "28": 2196192256.0,
+            "29": 2196192256.0,
+            "30": 2196192256.0,
+            "31": 2196192256.0,
+            "32": 2196192256.0,
+            "33": 2196192256.0,
+            "34": 2196192256.0,
+            "35": 2196192256.0,
+            "36": 2196192256.0,
+            "37": 2196192256.0,
+            "38": 2196192256.0,
+            "39": 2196192256.0,
+            "40": 2196192256.0,
+            "41": 2196192256.0,
+            "42": 2196192256.0,
+            "43": 2196192256.0,
+            "44": 2196192256.0,
+            "45": 2196192256.0,
+            "46": 2196192256.0,
+            "47": 2196192256.0,
+            "48": 2196192256.0,
+            "49": 2196192256.0,
+            "50": 2196192256.0,
+            "51": 2196192256.0,
+            "52": 2196192256.0,
+            "53": 2196192256.0,
+            "54": 2196192256.0,
+            "55": 2196192256.0,
+            "56": 2196192256.0,
+            "57": 2196192256.0,
+            "58": 2196192256.0,
+            "59": 2196192256.0,
+            "60": 2196192256.0,
+            "61": 2196192256.0,
+            "62": 2196192256.0,
+            "63": 2196192256.0,
+            "64": 2196192256.0,
+            "65": 2196192256.0,
+            "66": 2196192256.0,
+            "67": 2196192256.0,
+            "68": 2196192256.0,
+            "69": 2196192256.0,
+            "70": 2196192256.0,
+            "71": 2196192256.0,
+            "72": 2196192256.0,
+            "73": 2196192256.0,
+            "74": 2196192256.0,
+            "75": 2196192256.0,
+            "76": 2196192256.0,
+            "77": 2196192256.0,
+            "78": 2196192256.0,
+            "79": 2196192256.0,
+            "80": 2196192256.0,
+            "81": 2196192256.0,
+            "82": 2196192256.0,
+            "83": 2196192256.0,
+            "84": 2196192256.0,
+            "85": 2196192256.0,
+            "86": 2196192256.0,
+            "87": 2196192256.0,
+            "88": 2196192256.0,
+            "89": 2196192256.0,
+            "90": 2196192256.0,
+            "91": 2196192256.0,
+            "92": 2196192256.0,
+            "93": 2196192256.0,
+            "94": 2196192256.0,
+            "95": 2196192256.0,
+            "96": 2196192256.0,
+            "97": 2196192256.0,
+            "98": 2196192256.0,
+            "99": 2196192256.0,
+            "100": 2196192256.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -325,106 +325,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 2443624960.0,
-            "2": 3375193600.0,
-            "3": 3375193600.0,
-            "4": 3375193600.0,
-            "5": 3375193600.0,
-            "6": 3375193600.0,
-            "7": 3375193600.0,
-            "8": 3375193600.0,
-            "9": 3375193600.0,
-            "10": 3375193600.0,
-            "11": 3375193600.0,
-            "12": 3375193600.0,
-            "13": 3375193600.0,
-            "14": 3375193600.0,
-            "15": 3375193600.0,
-            "16": 3375193600.0,
-            "17": 3375193600.0,
-            "18": 3375193600.0,
-            "19": 3375193600.0,
-            "20": 3375193600.0,
-            "21": 3375193600.0,
-            "22": 3375193600.0,
-            "23": 3375193600.0,
-            "24": 3375193600.0,
-            "25": 3375193600.0,
-            "26": 3375193600.0,
-            "27": 3375193600.0,
-            "28": 3375193600.0,
-            "29": 3375193600.0,
-            "30": 3375193600.0,
-            "31": 3375193600.0,
-            "32": 3375193600.0,
-            "33": 3375193600.0,
-            "34": 3375193600.0,
-            "35": 3375193600.0,
-            "36": 3375193600.0,
-            "37": 3375193600.0,
-            "38": 3375193600.0,
-            "39": 3375193600.0,
-            "40": 3375193600.0,
-            "41": 3375193600.0,
-            "42": 3375193600.0,
-            "43": 3375193600.0,
-            "44": 3375193600.0,
-            "45": 3375193600.0,
-            "46": 3375193600.0,
-            "47": 3375193600.0,
-            "48": 3375193600.0,
-            "49": 3375193600.0,
-            "50": 3375193600.0,
-            "51": 3375193600.0,
-            "52": 3375193600.0,
-            "53": 3375193600.0,
-            "54": 3375193600.0,
-            "55": 3375193600.0,
-            "56": 3375193600.0,
-            "57": 3375193600.0,
-            "58": 3375193600.0,
-            "59": 3375193600.0,
-            "60": 3375193600.0,
-            "61": 3375193600.0,
-            "62": 3375193600.0,
-            "63": 3375193600.0,
-            "64": 3375193600.0,
-            "65": 3375193600.0,
-            "66": 3375193600.0,
-            "67": 3375193600.0,
-            "68": 3375193600.0,
-            "69": 3375193600.0,
-            "70": 3375193600.0,
-            "71": 3375193600.0,
-            "72": 3375193600.0,
-            "73": 3375193600.0,
-            "74": 3375193600.0,
-            "75": 3375193600.0,
-            "76": 3375193600.0,
-            "77": 3375193600.0,
-            "78": 3375193600.0,
-            "79": 3375193600.0,
-            "80": 3375193600.0,
-            "81": 3375193600.0,
-            "82": 3375193600.0,
-            "83": 3375193600.0,
-            "84": 3375193600.0,
-            "85": 3375193600.0,
-            "86": 3375193600.0,
-            "87": 3375193600.0,
-            "88": 3375193600.0,
-            "89": 3375193600.0,
-            "90": 3375193600.0,
-            "91": 3375193600.0,
-            "92": 3375193600.0,
-            "93": 3375193600.0,
-            "94": 3375193600.0,
-            "95": 3375193600.0,
-            "96": 3375193600.0,
-            "97": 3375193600.0,
-            "98": 3375193600.0,
-            "99": 3375193600.0,
-            "100": 3375193600.0
+            "1": 2444149248.0,
+            "2": 3377290752.0,
+            "3": 3377290752.0,
+            "4": 3377290752.0,
+            "5": 3377290752.0,
+            "6": 3377290752.0,
+            "7": 3377290752.0,
+            "8": 3377290752.0,
+            "9": 3377290752.0,
+            "10": 3377290752.0,
+            "11": 3377290752.0,
+            "12": 3377290752.0,
+            "13": 3377290752.0,
+            "14": 3377290752.0,
+            "15": 3377290752.0,
+            "16": 3377290752.0,
+            "17": 3377290752.0,
+            "18": 3377290752.0,
+            "19": 3377290752.0,
+            "20": 3377290752.0,
+            "21": 3377290752.0,
+            "22": 3377290752.0,
+            "23": 3377290752.0,
+            "24": 3377290752.0,
+            "25": 3377290752.0,
+            "26": 3377290752.0,
+            "27": 3377290752.0,
+            "28": 3377290752.0,
+            "29": 3377290752.0,
+            "30": 3377290752.0,
+            "31": 3377290752.0,
+            "32": 3377290752.0,
+            "33": 3377290752.0,
+            "34": 3377290752.0,
+            "35": 3377290752.0,
+            "36": 3377290752.0,
+            "37": 3377290752.0,
+            "38": 3377290752.0,
+            "39": 3377290752.0,
+            "40": 3377290752.0,
+            "41": 3377290752.0,
+            "42": 3377290752.0,
+            "43": 3377290752.0,
+            "44": 3377290752.0,
+            "45": 3377290752.0,
+            "46": 3377290752.0,
+            "47": 3377290752.0,
+            "48": 3377290752.0,
+            "49": 3377290752.0,
+            "50": 3377290752.0,
+            "51": 3377290752.0,
+            "52": 3377290752.0,
+            "53": 3377290752.0,
+            "54": 3377290752.0,
+            "55": 3377290752.0,
+            "56": 3377290752.0,
+            "57": 3377290752.0,
+            "58": 3377290752.0,
+            "59": 3377290752.0,
+            "60": 3377290752.0,
+            "61": 3377290752.0,
+            "62": 3377290752.0,
+            "63": 3377290752.0,
+            "64": 3377290752.0,
+            "65": 3377290752.0,
+            "66": 3377290752.0,
+            "67": 3377290752.0,
+            "68": 3377290752.0,
+            "69": 3377290752.0,
+            "70": 3377290752.0,
+            "71": 3377290752.0,
+            "72": 3377290752.0,
+            "73": 3377290752.0,
+            "74": 3377290752.0,
+            "75": 3377290752.0,
+            "76": 3377290752.0,
+            "77": 3377290752.0,
+            "78": 3377290752.0,
+            "79": 3377290752.0,
+            "80": 3377290752.0,
+            "81": 3377290752.0,
+            "82": 3377290752.0,
+            "83": 3377290752.0,
+            "84": 3377290752.0,
+            "85": 3377290752.0,
+            "86": 3377290752.0,
+            "87": 3377290752.0,
+            "88": 3377290752.0,
+            "89": 3377290752.0,
+            "90": 3377290752.0,
+            "91": 3377290752.0,
+            "92": 3377290752.0,
+            "93": 3377290752.0,
+            "94": 3377290752.0,
+            "95": 3377290752.0,
+            "96": 3377290752.0,
+            "97": 3377290752.0,
+            "98": 3377290752.0,
+            "99": 3377290752.0,
+            "100": 3377290752.0
         }
     },
     "iteration-time": {
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 9.37156,
-            "2": 0.38887,
-            "3": 0.36602,
-            "4": 0.35866,
-            "5": 0.36165,
-            "6": 0.37465,
-            "7": 0.35731,
-            "8": 0.3641,
-            "9": 0.35988,
-            "10": 0.35622,
-            "11": 0.36397,
-            "12": 0.36059,
-            "13": 0.35322,
-            "14": 0.36378,
-            "15": 0.35044,
-            "16": 0.351,
-            "17": 0.3614,
-            "18": 0.3499,
-            "19": 0.3502,
-            "20": 0.35899,
-            "21": 0.34832,
-            "22": 0.35463,
-            "23": 0.36264,
-            "24": 0.3582,
-            "25": 0.68028,
-            "26": 0.35807,
-            "27": 0.36086,
-            "28": 0.3546,
-            "29": 0.35008,
-            "30": 0.36639,
-            "31": 0.35917,
-            "32": 0.35093,
-            "33": 0.42545,
-            "34": 0.36458,
-            "35": 0.36139,
-            "36": 0.66018,
-            "37": 0.36179,
-            "38": 0.35264,
-            "39": 0.35347,
-            "40": 0.35947,
-            "41": 0.65933,
-            "42": 0.36488,
-            "43": 0.35596,
-            "44": 0.35639,
-            "45": 0.35817,
-            "46": 0.35914,
-            "47": 0.65482,
-            "48": 0.35543,
-            "49": 0.3548,
-            "50": 0.36559,
-            "51": 0.3585,
-            "52": 0.35668,
-            "53": 0.3592,
-            "54": 0.35503,
-            "55": 0.36108,
-            "56": 0.74128,
-            "57": 0.36657,
-            "58": 0.36018,
-            "59": 0.35608,
-            "60": 0.36593,
-            "61": 0.35388,
-            "62": 0.35617,
-            "63": 0.63145,
-            "64": 0.35737,
-            "65": 0.36509,
-            "66": 0.35793,
-            "67": 0.36215,
-            "68": 0.35502,
-            "69": 0.35608,
-            "70": 0.36406,
-            "71": 0.35939,
-            "72": 0.36012,
-            "73": 0.36102,
-            "74": 0.35997,
-            "75": 0.35821,
-            "76": 0.36372,
-            "77": 0.36015,
-            "78": 0.36089,
-            "79": 0.3626,
-            "80": 0.36632,
-            "81": 0.36481,
-            "82": 0.38444,
-            "83": 0.36154,
-            "84": 0.37204,
-            "85": 0.35784,
-            "86": 0.35591,
-            "87": 0.36678,
-            "88": 0.73353,
-            "89": 0.36867,
-            "90": 0.36231,
-            "91": 0.36826,
-            "92": 0.35945,
-            "93": 0.36394,
-            "94": 0.43835,
-            "95": 0.36152,
-            "96": 0.36154,
-            "97": 0.35778,
-            "98": 0.35857,
-            "99": 0.36061,
-            "100": 0.35857
+            "1": 25.09235,
+            "2": 0.40134,
+            "3": 0.33175,
+            "4": 0.31603,
+            "5": 0.31264,
+            "6": 0.3171,
+            "7": 0.31353,
+            "8": 0.31164,
+            "9": 0.31158,
+            "10": 0.31146,
+            "11": 0.3125,
+            "12": 0.31264,
+            "13": 0.31346,
+            "14": 0.317,
+            "15": 0.32556,
+            "16": 0.31934,
+            "17": 0.69799,
+            "18": 0.32677,
+            "19": 0.31967,
+            "20": 0.3173,
+            "21": 0.31556,
+            "22": 0.31356,
+            "23": 0.31832,
+            "24": 0.31564,
+            "25": 0.31197,
+            "26": 0.31173,
+            "27": 0.31328,
+            "28": 0.31264,
+            "29": 0.31324,
+            "30": 0.31156,
+            "31": 0.31097,
+            "32": 0.31333,
+            "33": 0.31645,
+            "34": 0.31419,
+            "35": 0.31325,
+            "36": 0.30809,
+            "37": 0.30923,
+            "38": 0.30875,
+            "39": 0.30819,
+            "40": 0.31109,
+            "41": 0.30849,
+            "42": 0.30871,
+            "43": 0.72163,
+            "44": 0.70555,
+            "45": 0.31196,
+            "46": 0.30971,
+            "47": 0.90209,
+            "48": 0.30901,
+            "49": 0.30899,
+            "50": 0.31177,
+            "51": 0.31251,
+            "52": 0.30763,
+            "53": 0.31005,
+            "54": 0.30977,
+            "55": 0.30883,
+            "56": 0.30955,
+            "57": 0.30687,
+            "58": 0.30701,
+            "59": 0.30937,
+            "60": 0.3093,
+            "61": 0.30827,
+            "62": 0.30923,
+            "63": 0.30942,
+            "64": 0.30862,
+            "65": 0.31004,
+            "66": 0.30958,
+            "67": 0.3081,
+            "68": 0.30948,
+            "69": 0.30866,
+            "70": 0.30848,
+            "71": 0.32952,
+            "72": 0.32928,
+            "73": 0.32761,
+            "74": 0.32983,
+            "75": 0.32798,
+            "76": 0.40614,
+            "77": 0.33024,
+            "78": 0.33019,
+            "79": 0.31035,
+            "80": 0.30849,
+            "81": 0.31139,
+            "82": 0.3106,
+            "83": 0.30861,
+            "84": 0.3083,
+            "85": 0.30817,
+            "86": 0.31324,
+            "87": 0.31432,
+            "88": 0.31032,
+            "89": 0.30979,
+            "90": 0.30748,
+            "91": 0.30871,
+            "92": 0.31423,
+            "93": 0.31134,
+            "94": 0.31265,
+            "95": 0.30865,
+            "96": 0.30849,
+            "97": 0.31368,
+            "98": 0.30792,
+            "99": 0.31014,
+            "100": 0.30734
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgx_a100.json
index db68b291113..df17a69a638 100644
--- a/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgx_a100.json
+++ b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgx_a100.json
@@ -1 +1,537 @@
-{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.38736, "5": 9.38281, "10": 9.06783, "15": 8.65706, "20": 8.26603, "25": 7.98158, "30": 7.87182, "35": 7.66308, "40": 7.50499, "45": 7.3523, "50": 7.17986, "55": 7.15383, "60": 7.14998, "65": 6.99542, "70": 7.0643, "75": 7.06414, "80": 6.94493, "85": 6.8595, "90": 7.25918, "95": 6.84927, "100": 6.99082}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 43296.0, "5": 45373.0, "10": 45357.0, "15": 43909.0, "20": 44765.0, "25": 42457.0, "30": 43999.0, "35": 43276.0, "40": 43214.0, "45": 43265.0, "50": 43383.0, "55": 43861.0, "60": 41267.0, "65": 44696.0, "70": 45504.0, "75": 44661.0, "80": 41077.0, "85": 43970.0, "90": 44657.0, "95": 44047.0, "100": 42429.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 2166438912.0, "5": 2166438912.0, "10": 2166438912.0, "15": 2166438912.0, "20": 2166438912.0, "25": 2166438912.0, "30": 2166438912.0, "35": 2166438912.0, "40": 2166438912.0, "45": 2166438912.0, "50": 2166438912.0, "55": 2166438912.0, "60": 2166438912.0, "65": 2166438912.0, "70": 2166438912.0, "75": 2166438912.0, "80": 2166438912.0, "85": 2166438912.0, "90": 2166438912.0, "95": 2166438912.0, "100": 2166438912.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 2274851328.0, "5": 3206419968.0, "10": 3206419968.0, "15": 3206419968.0, "20": 3206419968.0, "25": 3206419968.0, "30": 3206419968.0, "35": 3206419968.0, "40": 3206419968.0, "45": 3206419968.0, "50": 3206419968.0, "55": 3206419968.0, "60": 3206419968.0, "65": 3206419968.0, "70": 3206419968.0, "75": 3206419968.0, "80": 3206419968.0, "85": 3206419968.0, "90": 3206419968.0, "95": 3206419968.0, "100": 3206419968.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 6.73958, "5": 0.5162, "10": 0.51641, "15": 0.51693, "20": 0.93549, "25": 0.52094, "30": 1.03416, "35": 0.51, "40": 0.85483, "45": 0.50998, "50": 0.51431, "55": 0.51184, "60": 0.51243, "65": 0.51243, "70": 0.52038, "75": 0.51387, "80": 0.51875, "85": 0.51808, "90": 0.52661, "95": 0.51088, "100": 0.51108}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.38736,
+            "2": 10.37971,
+            "3": 9.79428,
+            "4": 9.59941,
+            "5": 9.38281,
+            "6": 9.40765,
+            "7": 9.31116,
+            "8": 9.25004,
+            "9": 9.1304,
+            "10": 9.06783,
+            "11": 8.89519,
+            "12": 8.8149,
+            "13": 8.82749,
+            "14": 8.69768,
+            "15": 8.65706,
+            "16": 8.54479,
+            "17": 8.50168,
+            "18": 8.39069,
+            "19": 8.36692,
+            "20": 8.26603,
+            "21": 8.27533,
+            "22": 8.14757,
+            "23": 8.0735,
+            "24": 8.12127,
+            "25": 7.98158,
+            "26": 8.09181,
+            "27": 7.87361,
+            "28": 7.96832,
+            "29": 7.80579,
+            "30": 7.87182,
+            "31": 7.818,
+            "32": 7.69078,
+            "33": 7.7864,
+            "34": 7.55667,
+            "35": 7.66308,
+            "36": 7.52559,
+            "37": 7.44779,
+            "38": 7.50335,
+            "39": 7.45281,
+            "40": 7.50499,
+            "41": 7.38901,
+            "42": 7.36263,
+            "43": 7.43543,
+            "44": 7.37578,
+            "45": 7.3523,
+            "46": 7.2817,
+            "47": 7.46121,
+            "48": 7.29037,
+            "49": 7.35179,
+            "50": 7.17986,
+            "51": 7.36821,
+            "52": 7.13332,
+            "53": 7.11532,
+            "54": 7.23214,
+            "55": 7.15383,
+            "56": 7.22184,
+            "57": 7.33328,
+            "58": 7.02116,
+            "59": 7.11467,
+            "60": 7.14998,
+            "61": 7.1117,
+            "62": 7.25117,
+            "63": 7.15586,
+            "64": 7.08539,
+            "65": 6.99542,
+            "66": 7.05924,
+            "67": 7.04804,
+            "68": 7.13906,
+            "69": 7.03428,
+            "70": 7.0643,
+            "71": 6.9218,
+            "72": 7.00511,
+            "73": 6.97917,
+            "74": 6.92066,
+            "75": 7.06414,
+            "76": 6.97532,
+            "77": 7.0837,
+            "78": 7.01986,
+            "79": 6.86115,
+            "80": 6.94493,
+            "81": 6.97847,
+            "82": 7.06834,
+            "83": 6.99434,
+            "84": 7.01114,
+            "85": 6.8595,
+            "86": 7.04211,
+            "87": 6.98111,
+            "88": 6.91353,
+            "89": 6.81096,
+            "90": 7.25918,
+            "91": 6.71195,
+            "92": 7.05431,
+            "93": 6.91084,
+            "94": 7.06872,
+            "95": 6.84927,
+            "96": 6.98126,
+            "97": 6.96743,
+            "98": 6.89421,
+            "99": 7.0152,
+            "100": 6.99082
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 43296.0,
+            "2": 44067.0,
+            "3": 44759.0,
+            "4": 42367.0,
+            "5": 45373.0,
+            "6": 40966.0,
+            "7": 43147.0,
+            "8": 45448.0,
+            "9": 42470.0,
+            "10": 45357.0,
+            "11": 43969.0,
+            "12": 44583.0,
+            "13": 43897.0,
+            "14": 46189.0,
+            "15": 43909.0,
+            "16": 41613.0,
+            "17": 43823.0,
+            "18": 44678.0,
+            "19": 42556.0,
+            "20": 44765.0,
+            "21": 44723.0,
+            "22": 41820.0,
+            "23": 45463.0,
+            "24": 43077.0,
+            "25": 42457.0,
+            "26": 43913.0,
+            "27": 46221.0,
+            "28": 46390.0,
+            "29": 46160.0,
+            "30": 43999.0,
+            "31": 41276.0,
+            "32": 43316.0,
+            "33": 45432.0,
+            "34": 43303.0,
+            "35": 43276.0,
+            "36": 42461.0,
+            "37": 40045.0,
+            "38": 42557.0,
+            "39": 44701.0,
+            "40": 43214.0,
+            "41": 44667.0,
+            "42": 43241.0,
+            "43": 45448.0,
+            "44": 44605.0,
+            "45": 43265.0,
+            "46": 43892.0,
+            "47": 42375.0,
+            "48": 44656.0,
+            "49": 43182.0,
+            "50": 43383.0,
+            "51": 41130.0,
+            "52": 43841.0,
+            "53": 43918.0,
+            "54": 41894.0,
+            "55": 43861.0,
+            "56": 43229.0,
+            "57": 42488.0,
+            "58": 43831.0,
+            "59": 44616.0,
+            "60": 41267.0,
+            "61": 39701.0,
+            "62": 44746.0,
+            "63": 44704.0,
+            "64": 45346.0,
+            "65": 44696.0,
+            "66": 45356.0,
+            "67": 43133.0,
+            "68": 42535.0,
+            "69": 43803.0,
+            "70": 45504.0,
+            "71": 43309.0,
+            "72": 44800.0,
+            "73": 45401.0,
+            "74": 42467.0,
+            "75": 44661.0,
+            "76": 43882.0,
+            "77": 42110.0,
+            "78": 40337.0,
+            "79": 38924.0,
+            "80": 41077.0,
+            "81": 45349.0,
+            "82": 43228.0,
+            "83": 38446.0,
+            "84": 42443.0,
+            "85": 43970.0,
+            "86": 45668.0,
+            "87": 40846.0,
+            "88": 41780.0,
+            "89": 41056.0,
+            "90": 44657.0,
+            "91": 46133.0,
+            "92": 41748.0,
+            "93": 43205.0,
+            "94": 39556.0,
+            "95": 44047.0,
+            "96": 44668.0,
+            "97": 45383.0,
+            "98": 41817.0,
+            "99": 45425.0,
+            "100": 42429.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2166438912.0,
+            "2": 2166438912.0,
+            "3": 2166438912.0,
+            "4": 2166438912.0,
+            "5": 2166438912.0,
+            "6": 2166438912.0,
+            "7": 2166438912.0,
+            "8": 2166438912.0,
+            "9": 2166438912.0,
+            "10": 2166438912.0,
+            "11": 2166438912.0,
+            "12": 2166438912.0,
+            "13": 2166438912.0,
+            "14": 2166438912.0,
+            "15": 2166438912.0,
+            "16": 2166438912.0,
+            "17": 2166438912.0,
+            "18": 2166438912.0,
+            "19": 2166438912.0,
+            "20": 2166438912.0,
+            "21": 2166438912.0,
+            "22": 2166438912.0,
+            "23": 2166438912.0,
+            "24": 2166438912.0,
+            "25": 2166438912.0,
+            "26": 2166438912.0,
+            "27": 2166438912.0,
+            "28": 2166438912.0,
+            "29": 2166438912.0,
+            "30": 2166438912.0,
+            "31": 2166438912.0,
+            "32": 2166438912.0,
+            "33": 2166438912.0,
+            "34": 2166438912.0,
+            "35": 2166438912.0,
+            "36": 2166438912.0,
+            "37": 2166438912.0,
+            "38": 2166438912.0,
+            "39": 2166438912.0,
+            "40": 2166438912.0,
+            "41": 2166438912.0,
+            "42": 2166438912.0,
+            "43": 2166438912.0,
+            "44": 2166438912.0,
+            "45": 2166438912.0,
+            "46": 2166438912.0,
+            "47": 2166438912.0,
+            "48": 2166438912.0,
+            "49": 2166438912.0,
+            "50": 2166438912.0,
+            "51": 2166438912.0,
+            "52": 2166438912.0,
+            "53": 2166438912.0,
+            "54": 2166438912.0,
+            "55": 2166438912.0,
+            "56": 2166438912.0,
+            "57": 2166438912.0,
+            "58": 2166438912.0,
+            "59": 2166438912.0,
+            "60": 2166438912.0,
+            "61": 2166438912.0,
+            "62": 2166438912.0,
+            "63": 2166438912.0,
+            "64": 2166438912.0,
+            "65": 2166438912.0,
+            "66": 2166438912.0,
+            "67": 2166438912.0,
+            "68": 2166438912.0,
+            "69": 2166438912.0,
+            "70": 2166438912.0,
+            "71": 2166438912.0,
+            "72": 2166438912.0,
+            "73": 2166438912.0,
+            "74": 2166438912.0,
+            "75": 2166438912.0,
+            "76": 2166438912.0,
+            "77": 2166438912.0,
+            "78": 2166438912.0,
+            "79": 2166438912.0,
+            "80": 2166438912.0,
+            "81": 2166438912.0,
+            "82": 2166438912.0,
+            "83": 2166438912.0,
+            "84": 2166438912.0,
+            "85": 2166438912.0,
+            "86": 2166438912.0,
+            "87": 2166438912.0,
+            "88": 2166438912.0,
+            "89": 2166438912.0,
+            "90": 2166438912.0,
+            "91": 2166438912.0,
+            "92": 2166438912.0,
+            "93": 2166438912.0,
+            "94": 2166438912.0,
+            "95": 2166438912.0,
+            "96": 2166438912.0,
+            "97": 2166438912.0,
+            "98": 2166438912.0,
+            "99": 2166438912.0,
+            "100": 2166438912.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2274851328.0,
+            "2": 3206419968.0,
+            "3": 3206419968.0,
+            "4": 3206419968.0,
+            "5": 3206419968.0,
+            "6": 3206419968.0,
+            "7": 3206419968.0,
+            "8": 3206419968.0,
+            "9": 3206419968.0,
+            "10": 3206419968.0,
+            "11": 3206419968.0,
+            "12": 3206419968.0,
+            "13": 3206419968.0,
+            "14": 3206419968.0,
+            "15": 3206419968.0,
+            "16": 3206419968.0,
+            "17": 3206419968.0,
+            "18": 3206419968.0,
+            "19": 3206419968.0,
+            "20": 3206419968.0,
+            "21": 3206419968.0,
+            "22": 3206419968.0,
+            "23": 3206419968.0,
+            "24": 3206419968.0,
+            "25": 3206419968.0,
+            "26": 3206419968.0,
+            "27": 3206419968.0,
+            "28": 3206419968.0,
+            "29": 3206419968.0,
+            "30": 3206419968.0,
+            "31": 3206419968.0,
+            "32": 3206419968.0,
+            "33": 3206419968.0,
+            "34": 3206419968.0,
+            "35": 3206419968.0,
+            "36": 3206419968.0,
+            "37": 3206419968.0,
+            "38": 3206419968.0,
+            "39": 3206419968.0,
+            "40": 3206419968.0,
+            "41": 3206419968.0,
+            "42": 3206419968.0,
+            "43": 3206419968.0,
+            "44": 3206419968.0,
+            "45": 3206419968.0,
+            "46": 3206419968.0,
+            "47": 3206419968.0,
+            "48": 3206419968.0,
+            "49": 3206419968.0,
+            "50": 3206419968.0,
+            "51": 3206419968.0,
+            "52": 3206419968.0,
+            "53": 3206419968.0,
+            "54": 3206419968.0,
+            "55": 3206419968.0,
+            "56": 3206419968.0,
+            "57": 3206419968.0,
+            "58": 3206419968.0,
+            "59": 3206419968.0,
+            "60": 3206419968.0,
+            "61": 3206419968.0,
+            "62": 3206419968.0,
+            "63": 3206419968.0,
+            "64": 3206419968.0,
+            "65": 3206419968.0,
+            "66": 3206419968.0,
+            "67": 3206419968.0,
+            "68": 3206419968.0,
+            "69": 3206419968.0,
+            "70": 3206419968.0,
+            "71": 3206419968.0,
+            "72": 3206419968.0,
+            "73": 3206419968.0,
+            "74": 3206419968.0,
+            "75": 3206419968.0,
+            "76": 3206419968.0,
+            "77": 3206419968.0,
+            "78": 3206419968.0,
+            "79": 3206419968.0,
+            "80": 3206419968.0,
+            "81": 3206419968.0,
+            "82": 3206419968.0,
+            "83": 3206419968.0,
+            "84": 3206419968.0,
+            "85": 3206419968.0,
+            "86": 3206419968.0,
+            "87": 3206419968.0,
+            "88": 3206419968.0,
+            "89": 3206419968.0,
+            "90": 3206419968.0,
+            "91": 3206419968.0,
+            "92": 3206419968.0,
+            "93": 3206419968.0,
+            "94": 3206419968.0,
+            "95": 3206419968.0,
+            "96": 3206419968.0,
+            "97": 3206419968.0,
+            "98": 3206419968.0,
+            "99": 3206419968.0,
+            "100": 3206419968.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 15.84492,
+            "2": 0.56374,
+            "3": 0.48979,
+            "4": 0.47999,
+            "5": 0.47943,
+            "6": 0.4785,
+            "7": 0.48067,
+            "8": 0.98328,
+            "9": 0.47936,
+            "10": 0.47967,
+            "11": 0.48109,
+            "12": 0.49359,
+            "13": 0.50052,
+            "14": 0.4915,
+            "15": 0.49405,
+            "16": 0.50085,
+            "17": 0.49211,
+            "18": 0.51598,
+            "19": 0.50449,
+            "20": 0.4857,
+            "21": 0.48578,
+            "22": 0.48623,
+            "23": 0.48781,
+            "24": 0.87325,
+            "25": 0.48523,
+            "26": 0.92864,
+            "27": 0.4864,
+            "28": 0.48651,
+            "29": 0.48435,
+            "30": 0.49416,
+            "31": 1.05489,
+            "32": 1.10052,
+            "33": 0.49491,
+            "34": 0.49294,
+            "35": 0.48798,
+            "36": 0.48781,
+            "37": 0.48704,
+            "38": 0.49022,
+            "39": 0.48933,
+            "40": 0.48881,
+            "41": 0.48549,
+            "42": 0.48579,
+            "43": 0.48689,
+            "44": 0.48684,
+            "45": 0.48751,
+            "46": 0.48731,
+            "47": 0.48706,
+            "48": 0.48816,
+            "49": 0.48587,
+            "50": 0.48676,
+            "51": 0.4868,
+            "52": 0.48709,
+            "53": 0.4868,
+            "54": 0.48647,
+            "55": 0.48914,
+            "56": 0.48748,
+            "57": 0.487,
+            "58": 0.48636,
+            "59": 0.48608,
+            "60": 0.48583,
+            "61": 0.48634,
+            "62": 0.48753,
+            "63": 0.48694,
+            "64": 0.48525,
+            "65": 0.4853,
+            "66": 0.48545,
+            "67": 0.48738,
+            "68": 0.48709,
+            "69": 0.48727,
+            "70": 0.48494,
+            "71": 0.48573,
+            "72": 0.48622,
+            "73": 0.48642,
+            "74": 0.48627,
+            "75": 0.48837,
+            "76": 0.48773,
+            "77": 0.48748,
+            "78": 0.49724,
+            "79": 0.49868,
+            "80": 0.48848,
+            "81": 0.48729,
+            "82": 0.48827,
+            "83": 0.48649,
+            "84": 0.48563,
+            "85": 0.4887,
+            "86": 0.49085,
+            "87": 0.50008,
+            "88": 0.48807,
+            "89": 0.48771,
+            "90": 0.49194,
+            "91": 0.48913,
+            "92": 0.48833,
+            "93": 0.48713,
+            "94": 0.48704,
+            "95": 0.48785,
+            "96": 0.489,
+            "97": 0.48763,
+            "98": 0.49533,
+            "99": 0.49947,
+            "100": 0.48805
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgx_h100.json
index 30c495148f4..6b1bd4f8405 100644
--- a/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgx_h100.json
@@ -32,78 +32,78 @@
             "26": 8.10636,
             "27": 7.88853,
             "28": 7.97024,
-            "29": 7.8121,
-            "30": 7.87698,
-            "31": 7.82339,
-            "32": 7.70086,
-            "33": 7.80317,
-            "34": 7.56843,
-            "35": 7.67276,
-            "36": 7.54942,
-            "37": 7.475,
-            "38": 7.51068,
-            "39": 7.49979,
-            "40": 7.51131,
-            "41": 7.41252,
-            "42": 7.38333,
-            "43": 7.4414,
-            "44": 7.39857,
-            "45": 7.37352,
-            "46": 7.28824,
-            "47": 7.4683,
-            "48": 7.29457,
-            "49": 7.35181,
-            "50": 7.17223,
-            "51": 7.37216,
-            "52": 7.14588,
-            "53": 7.12384,
-            "54": 7.23984,
-            "55": 7.15454,
-            "56": 7.23308,
-            "57": 7.33501,
-            "58": 7.01226,
-            "59": 7.12063,
-            "60": 7.15043,
-            "61": 7.11076,
-            "62": 7.26458,
-            "63": 7.1544,
-            "64": 7.08651,
-            "65": 6.99077,
-            "66": 7.05503,
+            "29": 7.81206,
+            "30": 7.87695,
+            "31": 7.82331,
+            "32": 7.70095,
+            "33": 7.80328,
+            "34": 7.56837,
+            "35": 7.67277,
+            "36": 7.54939,
+            "37": 7.47502,
+            "38": 7.51064,
+            "39": 7.49974,
+            "40": 7.51136,
+            "41": 7.41248,
+            "42": 7.38332,
+            "43": 7.44137,
+            "44": 7.39868,
+            "45": 7.37355,
+            "46": 7.2884,
+            "47": 7.46831,
+            "48": 7.29467,
+            "49": 7.3518,
+            "50": 7.17242,
+            "51": 7.37224,
+            "52": 7.14591,
+            "53": 7.12383,
+            "54": 7.23985,
+            "55": 7.15463,
+            "56": 7.23305,
+            "57": 7.33504,
+            "58": 7.01209,
+            "59": 7.12052,
+            "60": 7.15042,
+            "61": 7.11083,
+            "62": 7.26448,
+            "63": 7.15439,
+            "64": 7.08647,
+            "65": 6.99081,
+            "66": 7.05501,
             "67": 7.04463,
-            "68": 7.136,
-            "69": 7.03404,
-            "70": 7.05994,
-            "71": 6.90146,
-            "72": 6.99845,
-            "73": 6.97783,
-            "74": 6.92205,
-            "75": 7.06268,
-            "76": 6.95612,
-            "77": 7.08838,
-            "78": 7.02608,
-            "79": 6.85354,
-            "80": 6.93543,
-            "81": 6.97396,
-            "82": 7.05854,
-            "83": 6.98003,
-            "84": 7.00602,
-            "85": 6.84771,
-            "86": 7.04197,
-            "87": 6.97366,
-            "88": 6.90817,
-            "89": 6.80902,
-            "90": 7.23999,
-            "91": 6.70221,
-            "92": 7.0543,
-            "93": 6.89332,
-            "94": 7.05002,
-            "95": 6.84547,
-            "96": 6.96202,
-            "97": 6.95355,
-            "98": 6.8731,
-            "99": 6.99831,
-            "100": 6.98508
+            "68": 7.13589,
+            "69": 7.03403,
+            "70": 7.05993,
+            "71": 6.90134,
+            "72": 6.99846,
+            "73": 6.97799,
+            "74": 6.92221,
+            "75": 7.06246,
+            "76": 6.95628,
+            "77": 7.08818,
+            "78": 7.02594,
+            "79": 6.85356,
+            "80": 6.93552,
+            "81": 6.97408,
+            "82": 7.05838,
+            "83": 6.98013,
+            "84": 7.00615,
+            "85": 6.84767,
+            "86": 7.04208,
+            "87": 6.97372,
+            "88": 6.90816,
+            "89": 6.80892,
+            "90": 7.23979,
+            "91": 6.70218,
+            "92": 7.05429,
+            "93": 6.89324,
+            "94": 7.05007,
+            "95": 6.84548,
+            "96": 6.96184,
+            "97": 6.95372,
+            "98": 6.87307,
+            "99": 6.99837,
+            "100": 6.98518
         }
     },
     "num-zeros": {
@@ -139,78 +139,78 @@
             "26": 43923.0,
             "27": 46212.0,
             "28": 46362.0,
-            "29": 46133.0,
-            "30": 43978.0,
-            "31": 41220.0,
-            "32": 43307.0,
-            "33": 45440.0,
-            "34": 43284.0,
-            "35": 43248.0,
-            "36": 42437.0,
-            "37": 40066.0,
-            "38": 42483.0,
-            "39": 44702.0,
-            "40": 43230.0,
-            "41": 44672.0,
-            "42": 43202.0,
-            "43": 45459.0,
-            "44": 44609.0,
-            "45": 43265.0,
-            "46": 43915.0,
-            "47": 42366.0,
-            "48": 44650.0,
-            "49": 43139.0,
-            "50": 43399.0,
-            "51": 41159.0,
-            "52": 43818.0,
-            "53": 43924.0,
-            "54": 41952.0,
-            "55": 43866.0,
-            "56": 43239.0,
-            "57": 42540.0,
-            "58": 43856.0,
-            "59": 44589.0,
-            "60": 41152.0,
-            "61": 39709.0,
-            "62": 44822.0,
-            "63": 44663.0,
-            "64": 45372.0,
+            "29": 46135.0,
+            "30": 43975.0,
+            "31": 41226.0,
+            "32": 43299.0,
+            "33": 45425.0,
+            "34": 43296.0,
+            "35": 43243.0,
+            "36": 42441.0,
+            "37": 40060.0,
+            "38": 42489.0,
+            "39": 44704.0,
+            "40": 43237.0,
+            "41": 44663.0,
+            "42": 43215.0,
+            "43": 45451.0,
+            "44": 44614.0,
+            "45": 43281.0,
+            "46": 43913.0,
+            "47": 42359.0,
+            "48": 44654.0,
+            "49": 43144.0,
+            "50": 43398.0,
+            "51": 41144.0,
+            "52": 43830.0,
+            "53": 43934.0,
+            "54": 41941.0,
+            "55": 43886.0,
+            "56": 43231.0,
+            "57": 42542.0,
+            "58": 43846.0,
+            "59": 44585.0,
+            "60": 41140.0,
+            "61": 39720.0,
+            "62": 44819.0,
+            "63": 44670.0,
+            "64": 45354.0,
             "65": 44676.0,
             "66": 45345.0,
-            "67": 43130.0,
-            "68": 42567.0,
-            "69": 43812.0,
-            "70": 45538.0,
-            "71": 43282.0,
-            "72": 44765.0,
-            "73": 45354.0,
-            "74": 42517.0,
-            "75": 44666.0,
+            "67": 43146.0,
+            "68": 42561.0,
+            "69": 43826.0,
+            "70": 45535.0,
+            "71": 43294.0,
+            "72": 44777.0,
+            "73": 45349.0,
+            "74": 42497.0,
+            "75": 44676.0,
             "76": 43904.0,
-            "77": 42041.0,
-            "78": 40320.0,
-            "79": 38914.0,
-            "80": 41081.0,
-            "81": 45333.0,
-            "82": 43195.0,
+            "77": 42038.0,
+            "78": 40306.0,
+            "79": 38925.0,
+            "80": 41075.0,
+            "81": 45335.0,
+            "82": 43207.0,
             "83": 38489.0,
-            "84": 42436.0,
-            "85": 43978.0,
-            "86": 45680.0,
-            "87": 40832.0,
-            "88": 41797.0,
-            "89": 41083.0,
-            "90": 44676.0,
-            "91": 46190.0,
-            "92": 41837.0,
-            "93": 43234.0,
+            "84": 42428.0,
+            "85": 43976.0,
+            "86": 45688.0,
+            "87": 40838.0,
+            "88": 41786.0,
+            "89": 41088.0,
+            "90": 44682.0,
+            "91": 46204.0,
+            "92": 41815.0,
+            "93": 43233.0,
             "94": 39504.0,
-            "95": 44067.0,
-            "96": 44684.0,
-            "97": 45419.0,
-            "98": 41854.0,
-            "99": 45431.0,
-            "100": 42479.0
+            "95": 44070.0,
+            "96": 44687.0,
+            "97": 45432.0,
+            "98": 41849.0,
+            "99": 45441.0,
+            "100": 42488.0
         }
     },
     "mem-allocated-bytes": {
@@ -218,106 +218,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 2195405824.0,
-            "2": 2195405824.0,
-            "3": 2195405824.0,
-            "4": 2195405824.0,
-            "5": 2195405824.0,
-            "6": 2195405824.0,
-            "7": 2195405824.0,
-            "8": 2195405824.0,
-            "9": 2195405824.0,
-            "10": 2195405824.0,
-            "11": 2195405824.0,
-            "12": 2195405824.0,
-            "13": 2195405824.0,
-            "14": 2195405824.0,
-            "15": 2195405824.0,
-            "16": 2195405824.0,
-            "17": 2195405824.0,
-            "18": 2195405824.0,
-            "19": 2195405824.0,
-            "20": 2195405824.0,
-            "21": 2195405824.0,
-            "22": 2195405824.0,
-            "23": 2195405824.0,
-            "24": 2195405824.0,
-            "25": 2195405824.0,
-            "26": 2195405824.0,
-            "27": 2195405824.0,
-            "28": 2195405824.0,
-            "29": 2195405824.0,
-            "30": 2195405824.0,
-            "31": 2195405824.0,
-            "32": 2195405824.0,
-            "33": 2195405824.0,
-            "34": 2195405824.0,
-            "35": 2195405824.0,
-            "36": 2195405824.0,
-            "37": 2195405824.0,
-            "38": 2195405824.0,
-            "39": 2195405824.0,
-            "40": 2195405824.0,
-            "41": 2195405824.0,
-            "42": 2195405824.0,
-            "43": 2195405824.0,
-            "44": 2195405824.0,
-            "45": 2195405824.0,
-            "46": 2195405824.0,
-            "47": 2195405824.0,
-            "48": 2195405824.0,
-            "49": 2195405824.0,
-            "50": 2195405824.0,
-            "51": 2195405824.0,
-            "52": 2195405824.0,
-            "53": 2195405824.0,
-            "54": 2195405824.0,
-            "55": 2195405824.0,
-            "56": 2195405824.0,
-            "57": 2195405824.0,
-            "58": 2195405824.0,
-            "59": 2195405824.0,
-            "60": 2195405824.0,
-            "61": 2195405824.0,
-            "62": 2195405824.0,
-            "63": 2195405824.0,
-            "64": 2195405824.0,
-            "65": 2195405824.0,
-            "66": 2195405824.0,
-            "67": 2195405824.0,
-            "68": 2195405824.0,
-            "69": 2195405824.0,
-            "70": 2195405824.0,
-            "71": 2195405824.0,
-            "72": 2195405824.0,
-            "73": 2195405824.0,
-            "74": 2195405824.0,
-            "75": 2195405824.0,
-            "76": 2195405824.0,
-            "77": 2195405824.0,
-            "78": 2195405824.0,
-            "79": 2195405824.0,
-            "80": 2195405824.0,
-            "81": 2195405824.0,
-            "82": 2195405824.0,
-            "83": 2195405824.0,
-            "84": 2195405824.0,
-            "85": 2195405824.0,
-            "86": 2195405824.0,
-            "87": 2195405824.0,
-            "88": 2195405824.0,
-            "89": 2195405824.0,
-            "90": 2195405824.0,
-            "91": 2195405824.0,
-            "92": 2195405824.0,
-            "93": 2195405824.0,
-            "94": 2195405824.0,
-            "95": 2195405824.0,
-            "96": 2195405824.0,
-            "97": 2195405824.0,
-            "98": 2195405824.0,
-            "99": 2195405824.0,
-            "100": 2195405824.0
+            "1": 2197502976.0,
+            "2": 2197502976.0,
+            "3": 2197502976.0,
+            "4": 2197502976.0,
+            "5": 2197502976.0,
+            "6": 2197502976.0,
+            "7": 2197502976.0,
+            "8": 2197502976.0,
+            "9": 2197502976.0,
+            "10": 2197502976.0,
+            "11": 2197502976.0,
+            "12": 2197502976.0,
+            "13": 2197502976.0,
+            "14": 2197502976.0,
+            "15": 2197502976.0,
+            "16": 2197502976.0,
+            "17": 2197502976.0,
+            "18": 2197502976.0,
+            "19": 2197502976.0,
+            "20": 2197502976.0,
+            "21": 2197502976.0,
+            "22": 2197502976.0,
+            "23": 2197502976.0,
+            "24": 2197502976.0,
+            "25": 2197502976.0,
+            "26": 2197502976.0,
+            "27": 2197502976.0,
+            "28": 2197502976.0,
+            "29": 2197502976.0,
+            "30": 2197502976.0,
+            "31": 2197502976.0,
+            "32": 2197502976.0,
+            "33": 2197502976.0,
+            "34": 2197502976.0,
+            "35": 2197502976.0,
+            "36": 2197502976.0,
+            "37": 2197502976.0,
+            "38": 2197502976.0,
+            "39": 2197502976.0,
+            "40": 2197502976.0,
+            "41": 2197502976.0,
+            "42": 2197502976.0,
+            "43": 2197502976.0,
+            "44": 2197502976.0,
+            "45": 2197502976.0,
+            "46": 2197502976.0,
+            "47": 2197502976.0,
+            "48": 2197502976.0,
+            "49": 2197502976.0,
+            "50": 2197502976.0,
+            "51": 2197502976.0,
+            "52": 2197502976.0,
+            "53": 2197502976.0,
+            "54": 2197502976.0,
+            "55": 2197502976.0,
+            "56": 2197502976.0,
+            "57": 2197502976.0,
+            "58": 2197502976.0,
+            "59": 2197502976.0,
+            "60": 2197502976.0,
+            "61": 2197502976.0,
+            "62": 2197502976.0,
+            "63": 2197502976.0,
+            "64": 2197502976.0,
+            "65": 2197502976.0,
+            "66": 2197502976.0,
+            "67": 2197502976.0,
+            "68": 2197502976.0,
+            "69": 2197502976.0,
+            "70": 2197502976.0,
+            "71": 2197502976.0,
+            "72": 2197502976.0,
+            "73": 2197502976.0,
+            "74": 2197502976.0,
+            "75": 2197502976.0,
+            "76": 2197502976.0,
+            "77": 2197502976.0,
+            "78": 2197502976.0,
+            "79": 2197502976.0,
+            "80": 2197502976.0,
+            "81": 2197502976.0,
+            "82": 2197502976.0,
+            "83": 2197502976.0,
+            "84": 2197502976.0,
+            "85": 2197502976.0,
+            "86": 2197502976.0,
+            "87": 2197502976.0,
+            "88": 2197502976.0,
+            "89": 2197502976.0,
+            "90": 2197502976.0,
+            "91": 2197502976.0,
+            "92": 2197502976.0,
+            "93": 2197502976.0,
+            "94": 2197502976.0,
+            "95": 2197502976.0,
+            "96": 2197502976.0,
+            "97": 2197502976.0,
+            "98": 2197502976.0,
+            "99": 2197502976.0,
+            "100": 2197502976.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -325,106 +325,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 2302114304.0,
-            "2": 3236697600.0,
-            "3": 3236697600.0,
-            "4": 3236697600.0,
-            "5": 3236697600.0,
-            "6": 3236697600.0,
-            "7": 3236697600.0,
-            "8": 3236697600.0,
-            "9": 3236697600.0,
-            "10": 3236697600.0,
-            "11": 3236697600.0,
-            "12": 3236697600.0,
-            "13": 3236697600.0,
-            "14": 3236697600.0,
-            "15": 3236697600.0,
-            "16": 3236697600.0,
-            "17": 3236697600.0,
-            "18": 3236697600.0,
-            "19": 3236697600.0,
-            "20": 3236697600.0,
-            "21": 3236697600.0,
-            "22": 3236697600.0,
-            "23": 3236697600.0,
-            "24": 3236697600.0,
-            "25": 3236697600.0,
-            "26": 3236697600.0,
-            "27": 3236697600.0,
-            "28": 3236697600.0,
-            "29": 3236697600.0,
-            "30": 3236697600.0,
-            "31": 3236697600.0,
-            "32": 3236697600.0,
-            "33": 3236697600.0,
-            "34": 3236697600.0,
-            "35": 3236697600.0,
-            "36": 3236697600.0,
-            "37": 3236697600.0,
-            "38": 3236697600.0,
-            "39": 3236697600.0,
-            "40": 3236697600.0,
-            "41": 3236697600.0,
-            "42": 3236697600.0,
-            "43": 3236697600.0,
-            "44": 3236697600.0,
-            "45": 3236697600.0,
-            "46": 3236697600.0,
-            "47": 3236697600.0,
-            "48": 3236697600.0,
-            "49": 3236697600.0,
-            "50": 3236697600.0,
-            "51": 3236697600.0,
-            "52": 3236697600.0,
-            "53": 3236697600.0,
-            "54": 3236697600.0,
-            "55": 3236697600.0,
-            "56": 3236697600.0,
-            "57": 3236697600.0,
-            "58": 3236697600.0,
-            "59": 3236697600.0,
-            "60": 3236697600.0,
-            "61": 3236697600.0,
-            "62": 3236697600.0,
-            "63": 3236697600.0,
-            "64": 3236697600.0,
-            "65": 3236697600.0,
-            "66": 3236697600.0,
-            "67": 3236697600.0,
-            "68": 3236697600.0,
-            "69": 3236697600.0,
-            "70": 3236697600.0,
-            "71": 3236697600.0,
-            "72": 3236697600.0,
-            "73": 3236697600.0,
-            "74": 3236697600.0,
-            "75": 3236697600.0,
-            "76": 3236697600.0,
-            "77": 3236697600.0,
-            "78": 3236697600.0,
-            "79": 3236697600.0,
-            "80": 3236697600.0,
-            "81": 3236697600.0,
-            "82": 3236697600.0,
-            "83": 3236697600.0,
-            "84": 3236697600.0,
-            "85": 3236697600.0,
-            "86": 3236697600.0,
-            "87": 3236697600.0,
-            "88": 3236697600.0,
-            "89": 3236697600.0,
-            "90": 3236697600.0,
-            "91": 3236697600.0,
-            "92": 3236697600.0,
-            "93": 3236697600.0,
-            "94": 3236697600.0,
-            "95": 3236697600.0,
-            "96": 3236697600.0,
-            "97": 3236697600.0,
-            "98": 3236697600.0,
-            "99": 3236697600.0,
-            "100": 3236697600.0
+            "1": 2302638592.0,
+            "2": 3238794752.0,
+            "3": 3238794752.0,
+            "4": 3238794752.0,
+            "5": 3238794752.0,
+            "6": 3238794752.0,
+            "7": 3238794752.0,
+            "8": 3238794752.0,
+            "9": 3238794752.0,
+            "10": 3238794752.0,
+            "11": 3238794752.0,
+            "12": 3238794752.0,
+            "13": 3238794752.0,
+            "14": 3238794752.0,
+            "15": 3238794752.0,
+            "16": 3238794752.0,
+            "17": 3238794752.0,
+            "18": 3238794752.0,
+            "19": 3238794752.0,
+            "20": 3238794752.0,
+            "21": 3238794752.0,
+            "22": 3238794752.0,
+            "23": 3238794752.0,
+            "24": 3238794752.0,
+            "25": 3238794752.0,
+            "26": 3238794752.0,
+            "27": 3238794752.0,
+            "28": 3238794752.0,
+            "29": 3238794752.0,
+            "30": 3238794752.0,
+            "31": 3238794752.0,
+            "32": 3238794752.0,
+            "33": 3238794752.0,
+            "34": 3238794752.0,
+            "35": 3238794752.0,
+            "36": 3238794752.0,
+            "37": 3238794752.0,
+            "38": 3238794752.0,
+            "39": 3238794752.0,
+            "40": 3238794752.0,
+            "41": 3238794752.0,
+            "42": 3238794752.0,
+            "43": 3238794752.0,
+            "44": 3238794752.0,
+            "45": 3238794752.0,
+            "46": 3238794752.0,
+            "47": 3238794752.0,
+            "48": 3238794752.0,
+            "49": 3238794752.0,
+            "50": 3238794752.0,
+            "51": 3238794752.0,
+            "52": 3238794752.0,
+            "53": 3238794752.0,
+            "54": 3238794752.0,
+            "55": 3238794752.0,
+            "56": 3238794752.0,
+            "57": 3238794752.0,
+            "58": 3238794752.0,
+            "59": 3238794752.0,
+            "60": 3238794752.0,
+            "61": 3238794752.0,
+            "62": 3238794752.0,
+            "63": 3238794752.0,
+            "64": 3238794752.0,
+            "65": 3238794752.0,
+            "66": 3238794752.0,
+            "67": 3238794752.0,
+            "68": 3238794752.0,
+            "69": 3238794752.0,
+            "70": 3238794752.0,
+            "71": 3238794752.0,
+            "72": 3238794752.0,
+            "73": 3238794752.0,
+            "74": 3238794752.0,
+            "75": 3238794752.0,
+            "76": 3238794752.0,
+            "77": 3238794752.0,
+            "78": 3238794752.0,
+            "79": 3238794752.0,
+            "80": 3238794752.0,
+            "81": 3238794752.0,
+            "82": 3238794752.0,
+            "83": 3238794752.0,
+            "84": 3238794752.0,
+            "85": 3238794752.0,
+            "86": 3238794752.0,
+            "87": 3238794752.0,
+            "88": 3238794752.0,
+            "89": 3238794752.0,
+            "90": 3238794752.0,
+            "91": 3238794752.0,
+            "92": 3238794752.0,
+            "93": 3238794752.0,
+            "94": 3238794752.0,
+            "95": 3238794752.0,
+            "96": 3238794752.0,
+            "97": 3238794752.0,
+            "98": 3238794752.0,
+            "99": 3238794752.0,
+            "100": 3238794752.0
         }
     },
     "iteration-time": {
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 9.46115,
-            "2": 0.46835,
-            "3": 0.38416,
-            "4": 0.37391,
-            "5": 0.37703,
-            "6": 0.38173,
-            "7": 0.37456,
-            "8": 0.37696,
-            "9": 0.37338,
-            "10": 0.37687,
-            "11": 0.38251,
-            "12": 0.38037,
-            "13": 0.37996,
-            "14": 0.38264,
-            "15": 0.37959,
-            "16": 0.38232,
-            "17": 0.37852,
-            "18": 0.37735,
-            "19": 0.3812,
-            "20": 0.37493,
-            "21": 0.38227,
-            "22": 0.38196,
-            "23": 0.37745,
-            "24": 0.3782,
-            "25": 0.37181,
-            "26": 0.37935,
-            "27": 0.38539,
-            "28": 0.38393,
-            "29": 0.3826,
-            "30": 0.37839,
-            "31": 0.38438,
-            "32": 0.64523,
-            "33": 0.37971,
-            "34": 0.38082,
-            "35": 0.74313,
-            "36": 0.3848,
-            "37": 0.38169,
-            "38": 0.38154,
-            "39": 0.40495,
-            "40": 0.40243,
-            "41": 0.37972,
-            "42": 0.37792,
-            "43": 0.38261,
-            "44": 0.37607,
-            "45": 0.37463,
-            "46": 0.37881,
-            "47": 0.37293,
-            "48": 0.37592,
-            "49": 0.659,
-            "50": 0.37783,
-            "51": 0.38158,
-            "52": 0.73901,
-            "53": 0.37684,
-            "54": 0.37707,
-            "55": 0.42405,
-            "56": 0.38184,
-            "57": 0.37936,
-            "58": 0.37539,
-            "59": 0.37591,
-            "60": 0.72267,
-            "61": 0.37815,
-            "62": 0.77277,
-            "63": 0.38815,
-            "64": 0.3807,
-            "65": 0.37848,
-            "66": 0.38143,
-            "67": 0.37999,
-            "68": 0.38158,
-            "69": 0.38427,
-            "70": 0.37479,
-            "71": 0.38252,
-            "72": 0.38036,
-            "73": 0.38116,
-            "74": 0.38336,
-            "75": 0.3771,
-            "76": 0.37876,
-            "77": 0.38102,
-            "78": 0.37864,
-            "79": 0.38095,
-            "80": 0.37954,
-            "81": 0.37575,
-            "82": 0.38084,
-            "83": 0.38192,
-            "84": 0.38267,
-            "85": 0.38765,
-            "86": 0.38467,
-            "87": 0.3817,
-            "88": 0.37395,
-            "89": 0.37751,
-            "90": 0.38076,
-            "91": 0.37565,
-            "92": 0.38237,
-            "93": 0.37738,
-            "94": 0.37726,
-            "95": 0.38237,
-            "96": 0.38018,
-            "97": 0.38525,
-            "98": 0.40815,
-            "99": 0.38117,
-            "100": 0.38201
+            "1": 25.05607,
+            "2": 0.4771,
+            "3": 0.78234,
+            "4": 0.35523,
+            "5": 0.34787,
+            "6": 0.35038,
+            "7": 0.35972,
+            "8": 0.35589,
+            "9": 0.38294,
+            "10": 0.35953,
+            "11": 0.35001,
+            "12": 0.35158,
+            "13": 0.3501,
+            "14": 0.3486,
+            "15": 0.34967,
+            "16": 0.347,
+            "17": 0.34513,
+            "18": 0.36694,
+            "19": 0.36383,
+            "20": 0.3472,
+            "21": 0.3418,
+            "22": 0.34601,
+            "23": 0.76245,
+            "24": 0.73697,
+            "25": 0.7256,
+            "26": 0.34524,
+            "27": 0.34628,
+            "28": 0.34443,
+            "29": 0.35468,
+            "30": 0.73189,
+            "31": 0.96909,
+            "32": 0.34399,
+            "33": 0.34907,
+            "34": 0.35028,
+            "35": 0.34486,
+            "36": 0.34787,
+            "37": 0.345,
+            "38": 0.34797,
+            "39": 0.34864,
+            "40": 0.34596,
+            "41": 0.34855,
+            "42": 0.34707,
+            "43": 0.34709,
+            "44": 0.34717,
+            "45": 0.34917,
+            "46": 0.34955,
+            "47": 0.34487,
+            "48": 0.35114,
+            "49": 0.34985,
+            "50": 0.35151,
+            "51": 0.3515,
+            "52": 0.34854,
+            "53": 0.34699,
+            "54": 0.35058,
+            "55": 0.34683,
+            "56": 0.34606,
+            "57": 0.34877,
+            "58": 0.34509,
+            "59": 0.34822,
+            "60": 0.34532,
+            "61": 0.34516,
+            "62": 0.34479,
+            "63": 0.36001,
+            "64": 0.3983,
+            "65": 0.34758,
+            "66": 0.34684,
+            "67": 0.34571,
+            "68": 0.3481,
+            "69": 0.34685,
+            "70": 0.34473,
+            "71": 0.34557,
+            "72": 0.34856,
+            "73": 0.34506,
+            "74": 0.34674,
+            "75": 0.34706,
+            "76": 0.34879,
+            "77": 0.35195,
+            "78": 0.34663,
+            "79": 0.35252,
+            "80": 0.34719,
+            "81": 0.3448,
+            "82": 0.34727,
+            "83": 0.34972,
+            "84": 0.34547,
+            "85": 0.35367,
+            "86": 0.34453,
+            "87": 0.3406,
+            "88": 0.34389,
+            "89": 0.3438,
+            "90": 0.34535,
+            "91": 0.34386,
+            "92": 0.34313,
+            "93": 0.34017,
+            "94": 0.34115,
+            "95": 0.34187,
+            "96": 0.34159,
+            "97": 0.34076,
+            "98": 0.34202,
+            "99": 0.34323,
+            "100": 0.34206
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1/golden_values_dev_dgx_h100.json
index 2400879202c..da925a09fb1 100644
--- a/tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1/golden_values_dev_dgx_h100.json
@@ -218,106 +218,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 1132053504.0,
-            "2": 1132053504.0,
-            "3": 1132053504.0,
-            "4": 1132053504.0,
-            "5": 1132053504.0,
-            "6": 1132053504.0,
-            "7": 1132053504.0,
-            "8": 1132053504.0,
-            "9": 1132053504.0,
-            "10": 1132053504.0,
-            "11": 1132053504.0,
-            "12": 1132053504.0,
-            "13": 1132053504.0,
-            "14": 1132053504.0,
-            "15": 1132053504.0,
-            "16": 1132053504.0,
-            "17": 1132053504.0,
-            "18": 1132053504.0,
-            "19": 1132053504.0,
-            "20": 1132053504.0,
-            "21": 1132053504.0,
-            "22": 1132053504.0,
-            "23": 1132053504.0,
-            "24": 1132053504.0,
-            "25": 1132053504.0,
-            "26": 1132053504.0,
-            "27": 1132053504.0,
-            "28": 1132053504.0,
-            "29": 1132053504.0,
-            "30": 1132053504.0,
-            "31": 1132053504.0,
-            "32": 1132053504.0,
-            "33": 1132053504.0,
-            "34": 1132053504.0,
-            "35": 1132053504.0,
-            "36": 1132053504.0,
-            "37": 1132053504.0,
-            "38": 1132053504.0,
-            "39": 1132053504.0,
-            "40": 1132053504.0,
-            "41": 1132053504.0,
-            "42": 1132053504.0,
-            "43": 1132053504.0,
-            "44": 1132053504.0,
-            "45": 1132053504.0,
-            "46": 1132053504.0,
-            "47": 1132053504.0,
-            "48": 1132053504.0,
-            "49": 1132053504.0,
-            "50": 1132053504.0,
-            "51": 1132053504.0,
-            "52": 1132053504.0,
-            "53": 1132053504.0,
-            "54": 1132053504.0,
-            "55": 1132053504.0,
-            "56": 1132053504.0,
-            "57": 1132053504.0,
-            "58": 1132053504.0,
-            "59": 1132053504.0,
-            "60": 1132053504.0,
-            "61": 1132053504.0,
-            "62": 1132053504.0,
-            "63": 1132053504.0,
-            "64": 1132053504.0,
-            "65": 1132053504.0,
-            "66": 1132053504.0,
-            "67": 1132053504.0,
-            "68": 1132053504.0,
-            "69": 1132053504.0,
-            "70": 1132053504.0,
-            "71": 1132053504.0,
-            "72": 1132053504.0,
-            "73": 1132053504.0,
-            "74": 1132053504.0,
-            "75": 1132053504.0,
-            "76": 1132053504.0,
-            "77": 1132053504.0,
-            "78": 1132053504.0,
-            "79": 1132053504.0,
-            "80": 1132053504.0,
-            "81": 1132053504.0,
-            "82": 1132053504.0,
-            "83": 1132053504.0,
-            "84": 1132053504.0,
-            "85": 1132053504.0,
-            "86": 1132053504.0,
-            "87": 1132053504.0,
-            "88": 1132053504.0,
-            "89": 1132053504.0,
-            "90": 1132053504.0,
-            "91": 1132053504.0,
-            "92": 1132053504.0,
-            "93": 1132053504.0,
-            "94": 1132053504.0,
-            "95": 1132053504.0,
-            "96": 1132053504.0,
-            "97": 1132053504.0,
-            "98": 1132053504.0,
-            "99": 1132053504.0,
-            "100": 1132053504.0
+            "1": 1131791360.0,
+            "2": 1131791360.0,
+            "3": 1131791360.0,
+            "4": 1131791360.0,
+            "5": 1131791360.0,
+            "6": 1131791360.0,
+            "7": 1131791360.0,
+            "8": 1131791360.0,
+            "9": 1131791360.0,
+            "10": 1131791360.0,
+            "11": 1131791360.0,
+            "12": 1131791360.0,
+            "13": 1131791360.0,
+            "14": 1131791360.0,
+            "15": 1131791360.0,
+            "16": 1131791360.0,
+            "17": 1131791360.0,
+            "18": 1131791360.0,
+            "19": 1131791360.0,
+            "20": 1131791360.0,
+            "21": 1131791360.0,
+            "22": 1131791360.0,
+            "23": 1131791360.0,
+            "24": 1131791360.0,
+            "25": 1131791360.0,
+            "26": 1131791360.0,
+            "27": 1131791360.0,
+            "28": 1131791360.0,
+            "29": 1131791360.0,
+            "30": 1131791360.0,
+            "31": 1131791360.0,
+            "32": 1131791360.0,
+            "33": 1131791360.0,
+            "34": 1131791360.0,
+            "35": 1131791360.0,
+            "36": 1131791360.0,
+            "37": 1131791360.0,
+            "38": 1131791360.0,
+            "39": 1131791360.0,
+            "40": 1131791360.0,
+            "41": 1131791360.0,
+            "42": 1131791360.0,
+            "43": 1131791360.0,
+            "44": 1131791360.0,
+            "45": 1131791360.0,
+            "46": 1131791360.0,
+            "47": 1131791360.0,
+            "48": 1131791360.0,
+            "49": 1131791360.0,
+            "50": 1131791360.0,
+            "51": 1131791360.0,
+            "52": 1131791360.0,
+            "53": 1131791360.0,
+            "54": 1131791360.0,
+            "55": 1131791360.0,
+            "56": 1131791360.0,
+            "57": 1131791360.0,
+            "58": 1131791360.0,
+            "59": 1131791360.0,
+            "60": 1131791360.0,
+            "61": 1131791360.0,
+            "62": 1131791360.0,
+            "63": 1131791360.0,
+            "64": 1131791360.0,
+            "65": 1131791360.0,
+            "66": 1131791360.0,
+            "67": 1131791360.0,
+            "68": 1131791360.0,
+            "69": 1131791360.0,
+            "70": 1131791360.0,
+            "71": 1131791360.0,
+            "72": 1131791360.0,
+            "73": 1131791360.0,
+            "74": 1131791360.0,
+            "75": 1131791360.0,
+            "76": 1131791360.0,
+            "77": 1131791360.0,
+            "78": 1131791360.0,
+            "79": 1131791360.0,
+            "80": 1131791360.0,
+            "81": 1131791360.0,
+            "82": 1131791360.0,
+            "83": 1131791360.0,
+            "84": 1131791360.0,
+            "85": 1131791360.0,
+            "86": 1131791360.0,
+            "87": 1131791360.0,
+            "88": 1131791360.0,
+            "89": 1131791360.0,
+            "90": 1131791360.0,
+            "91": 1131791360.0,
+            "92": 1131791360.0,
+            "93": 1131791360.0,
+            "94": 1131791360.0,
+            "95": 1131791360.0,
+            "96": 1131791360.0,
+            "97": 1131791360.0,
+            "98": 1131791360.0,
+            "99": 1131791360.0,
+            "100": 1131791360.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -325,106 +325,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 1409266176.0,
-            "2": 1864166912.0,
-            "3": 1864166912.0,
-            "4": 1864166912.0,
-            "5": 1864166912.0,
-            "6": 1864166912.0,
-            "7": 1864166912.0,
-            "8": 1864166912.0,
-            "9": 1864166912.0,
-            "10": 1864166912.0,
-            "11": 1864166912.0,
-            "12": 1864166912.0,
-            "13": 1864166912.0,
-            "14": 1864166912.0,
-            "15": 1864166912.0,
-            "16": 1864166912.0,
-            "17": 1864166912.0,
-            "18": 1864166912.0,
-            "19": 1864166912.0,
-            "20": 1864166912.0,
-            "21": 1864166912.0,
-            "22": 1864166912.0,
-            "23": 1864166912.0,
-            "24": 1864166912.0,
-            "25": 1864166912.0,
-            "26": 1864166912.0,
-            "27": 1864166912.0,
-            "28": 1864166912.0,
-            "29": 1864166912.0,
-            "30": 1864166912.0,
-            "31": 1864166912.0,
-            "32": 1864166912.0,
-            "33": 1864166912.0,
-            "34": 1864166912.0,
-            "35": 1864166912.0,
-            "36": 1864166912.0,
-            "37": 1864166912.0,
-            "38": 1864166912.0,
-            "39": 1864166912.0,
-            "40": 1864166912.0,
-            "41": 1864166912.0,
-            "42": 1864166912.0,
-            "43": 1864166912.0,
-            "44": 1864166912.0,
-            "45": 1864166912.0,
-            "46": 1864166912.0,
-            "47": 1864166912.0,
-            "48": 1864166912.0,
-            "49": 1864166912.0,
-            "50": 1864166912.0,
-            "51": 1864166912.0,
-            "52": 1864166912.0,
-            "53": 1864166912.0,
-            "54": 1864166912.0,
-            "55": 1864166912.0,
-            "56": 1864166912.0,
-            "57": 1864166912.0,
-            "58": 1864166912.0,
-            "59": 1864166912.0,
-            "60": 1864166912.0,
-            "61": 1864166912.0,
-            "62": 1864166912.0,
-            "63": 1864166912.0,
-            "64": 1864166912.0,
-            "65": 1864166912.0,
-            "66": 1864166912.0,
-            "67": 1864166912.0,
-            "68": 1864166912.0,
-            "69": 1864166912.0,
-            "70": 1864166912.0,
-            "71": 1864166912.0,
-            "72": 1864166912.0,
-            "73": 1864166912.0,
-            "74": 1864166912.0,
-            "75": 1864166912.0,
-            "76": 1864166912.0,
-            "77": 1864166912.0,
-            "78": 1864166912.0,
-            "79": 1864166912.0,
-            "80": 1864166912.0,
-            "81": 1864166912.0,
-            "82": 1864166912.0,
-            "83": 1864166912.0,
-            "84": 1864166912.0,
-            "85": 1864166912.0,
-            "86": 1864166912.0,
-            "87": 1864166912.0,
-            "88": 1864166912.0,
-            "89": 1864166912.0,
-            "90": 1864166912.0,
-            "91": 1864166912.0,
-            "92": 1864166912.0,
-            "93": 1864166912.0,
-            "94": 1864166912.0,
-            "95": 1864166912.0,
-            "96": 1864166912.0,
-            "97": 1864166912.0,
-            "98": 1864166912.0,
-            "99": 1864166912.0,
-            "100": 1864166912.0
+            "1": 1410773504.0,
+            "2": 1862789632.0,
+            "3": 1862789632.0,
+            "4": 1862789632.0,
+            "5": 1862789632.0,
+            "6": 1862789632.0,
+            "7": 1862789632.0,
+            "8": 1862789632.0,
+            "9": 1862789632.0,
+            "10": 1862789632.0,
+            "11": 1862789632.0,
+            "12": 1862789632.0,
+            "13": 1862789632.0,
+            "14": 1862789632.0,
+            "15": 1862789632.0,
+            "16": 1862789632.0,
+            "17": 1862789632.0,
+            "18": 1862789632.0,
+            "19": 1862789632.0,
+            "20": 1862789632.0,
+            "21": 1862789632.0,
+            "22": 1862789632.0,
+            "23": 1862789632.0,
+            "24": 1862789632.0,
+            "25": 1862789632.0,
+            "26": 1862789632.0,
+            "27": 1862789632.0,
+            "28": 1862789632.0,
+            "29": 1862789632.0,
+            "30": 1862789632.0,
+            "31": 1862789632.0,
+            "32": 1862789632.0,
+            "33": 1862789632.0,
+            "34": 1862789632.0,
+            "35": 1862789632.0,
+            "36": 1862789632.0,
+            "37": 1862789632.0,
+            "38": 1862789632.0,
+            "39": 1862789632.0,
+            "40": 1862789632.0,
+            "41": 1862789632.0,
+            "42": 1862789632.0,
+            "43": 1862789632.0,
+            "44": 1862789632.0,
+            "45": 1862789632.0,
+            "46": 1862789632.0,
+            "47": 1862789632.0,
+            "48": 1862789632.0,
+            "49": 1862789632.0,
+            "50": 1862789632.0,
+            "51": 1862789632.0,
+            "52": 1862789632.0,
+            "53": 1862789632.0,
+            "54": 1862789632.0,
+            "55": 1862789632.0,
+            "56": 1862789632.0,
+            "57": 1862789632.0,
+            "58": 1862789632.0,
+            "59": 1862789632.0,
+            "60": 1862789632.0,
+            "61": 1862789632.0,
+            "62": 1862789632.0,
+            "63": 1862789632.0,
+            "64": 1862789632.0,
+            "65": 1862789632.0,
+            "66": 1862789632.0,
+            "67": 1862789632.0,
+            "68": 1862789632.0,
+            "69": 1862789632.0,
+            "70": 1862789632.0,
+            "71": 1862789632.0,
+            "72": 1862789632.0,
+            "73": 1862789632.0,
+            "74": 1862789632.0,
+            "75": 1862789632.0,
+            "76": 1862789632.0,
+            "77": 1862789632.0,
+            "78": 1862789632.0,
+            "79": 1862789632.0,
+            "80": 1862789632.0,
+            "81": 1862789632.0,
+            "82": 1862789632.0,
+            "83": 1862789632.0,
+            "84": 1862789632.0,
+            "85": 1862789632.0,
+            "86": 1862789632.0,
+            "87": 1862789632.0,
+            "88": 1862789632.0,
+            "89": 1862789632.0,
+            "90": 1862789632.0,
+            "91": 1862789632.0,
+            "92": 1862789632.0,
+            "93": 1862789632.0,
+            "94": 1862789632.0,
+            "95": 1862789632.0,
+            "96": 1862789632.0,
+            "97": 1862789632.0,
+            "98": 1862789632.0,
+            "99": 1862789632.0,
+            "100": 1862789632.0
         }
     },
     "iteration-time": {
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 9.54009,
-            "2": 0.66845,
-            "3": 0.64084,
-            "4": 0.64526,
-            "5": 0.64331,
-            "6": 0.65463,
-            "7": 0.63991,
-            "8": 0.63854,
-            "9": 0.64034,
-            "10": 0.63886,
-            "11": 0.63968,
-            "12": 0.64441,
-            "13": 0.63828,
-            "14": 0.64647,
-            "15": 0.64199,
-            "16": 0.63783,
-            "17": 0.64359,
-            "18": 0.66439,
-            "19": 0.64718,
-            "20": 0.63999,
-            "21": 0.65677,
-            "22": 0.95191,
-            "23": 0.64765,
-            "24": 0.98317,
-            "25": 1.63221,
-            "26": 0.64915,
-            "27": 0.64318,
-            "28": 0.99238,
-            "29": 0.64655,
-            "30": 0.64693,
-            "31": 0.64241,
-            "32": 0.98967,
-            "33": 0.64928,
-            "34": 0.64294,
-            "35": 0.65629,
-            "36": 0.64358,
-            "37": 0.64814,
-            "38": 0.64325,
-            "39": 0.64509,
-            "40": 0.64733,
-            "41": 0.64693,
-            "42": 0.65392,
-            "43": 0.64721,
-            "44": 0.64487,
-            "45": 0.64766,
-            "46": 0.65872,
-            "47": 0.65402,
-            "48": 0.65486,
-            "49": 0.64433,
-            "50": 0.64917,
-            "51": 0.64197,
-            "52": 0.64647,
-            "53": 0.64656,
-            "54": 0.64815,
-            "55": 0.64573,
-            "56": 0.6539,
-            "57": 0.64582,
-            "58": 0.64668,
-            "59": 0.64431,
-            "60": 0.64957,
-            "61": 0.64703,
-            "62": 0.64671,
-            "63": 0.65979,
-            "64": 0.64599,
-            "65": 0.6466,
-            "66": 0.64754,
-            "67": 0.6471,
-            "68": 0.64756,
-            "69": 0.64621,
-            "70": 0.65906,
-            "71": 0.64587,
-            "72": 0.65969,
-            "73": 0.64476,
-            "74": 0.65304,
-            "75": 0.64786,
-            "76": 0.65077,
-            "77": 0.66405,
-            "78": 0.6472,
-            "79": 0.64431,
-            "80": 0.64472,
-            "81": 0.64407,
-            "82": 0.64326,
-            "83": 0.93161,
-            "84": 0.65573,
-            "85": 0.63999,
-            "86": 0.64393,
-            "87": 0.92064,
-            "88": 0.64399,
-            "89": 0.64306,
-            "90": 0.64439,
-            "91": 0.6414,
-            "92": 0.64504,
-            "93": 0.64858,
-            "94": 0.64041,
-            "95": 0.64497,
-            "96": 0.64493,
-            "97": 0.64508,
-            "98": 0.6444,
-            "99": 0.64587,
-            "100": 0.64886
+            "1": 25.99742,
+            "2": 0.74354,
+            "3": 0.5991,
+            "4": 0.58509,
+            "5": 0.57829,
+            "6": 0.59904,
+            "7": 0.60788,
+            "8": 0.59588,
+            "9": 0.59262,
+            "10": 0.59201,
+            "11": 0.6011,
+            "12": 0.58294,
+            "13": 1.00971,
+            "14": 1.2235,
+            "15": 0.59824,
+            "16": 0.59871,
+            "17": 0.59553,
+            "18": 0.60447,
+            "19": 0.59305,
+            "20": 0.59516,
+            "21": 0.59434,
+            "22": 0.59253,
+            "23": 0.59245,
+            "24": 0.59395,
+            "25": 0.59087,
+            "26": 0.59548,
+            "27": 0.59981,
+            "28": 0.59298,
+            "29": 0.60365,
+            "30": 0.59179,
+            "31": 0.59532,
+            "32": 0.59589,
+            "33": 0.58615,
+            "34": 0.5832,
+            "35": 0.58623,
+            "36": 0.58286,
+            "37": 0.58446,
+            "38": 0.59392,
+            "39": 0.60039,
+            "40": 0.59556,
+            "41": 0.59642,
+            "42": 0.60532,
+            "43": 0.6013,
+            "44": 0.60295,
+            "45": 0.60146,
+            "46": 0.58736,
+            "47": 0.58628,
+            "48": 0.58704,
+            "49": 0.5858,
+            "50": 0.59709,
+            "51": 0.61827,
+            "52": 0.58553,
+            "53": 0.58061,
+            "54": 0.57839,
+            "55": 0.58578,
+            "56": 0.59768,
+            "57": 0.59453,
+            "58": 0.61716,
+            "59": 0.57953,
+            "60": 0.57769,
+            "61": 0.57901,
+            "62": 0.58074,
+            "63": 0.58369,
+            "64": 0.57997,
+            "65": 0.58275,
+            "66": 0.58343,
+            "67": 0.57961,
+            "68": 0.57755,
+            "69": 0.58701,
+            "70": 0.57588,
+            "71": 0.5775,
+            "72": 0.57925,
+            "73": 0.57648,
+            "74": 0.57923,
+            "75": 0.58354,
+            "76": 0.58196,
+            "77": 0.57857,
+            "78": 0.58636,
+            "79": 0.58475,
+            "80": 0.58428,
+            "81": 0.58017,
+            "82": 0.58459,
+            "83": 0.58698,
+            "84": 0.57714,
+            "85": 0.57756,
+            "86": 0.58774,
+            "87": 0.57843,
+            "88": 0.57647,
+            "89": 0.57865,
+            "90": 0.5784,
+            "91": 0.57912,
+            "92": 0.57658,
+            "93": 0.58094,
+            "94": 0.57865,
+            "95": 0.58251,
+            "96": 0.62025,
+            "97": 0.58429,
+            "98": 0.59488,
+            "99": 0.58183,
+            "100": 0.583
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_h100.json
index 11ef3fbd8c5..448fe2595ce 100644
--- a/tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_h100.json
@@ -218,106 +218,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 1132053504.0,
-            "2": 1132053504.0,
-            "3": 1132053504.0,
-            "4": 1132053504.0,
-            "5": 1132053504.0,
-            "6": 1132053504.0,
-            "7": 1132053504.0,
-            "8": 1132053504.0,
-            "9": 1132053504.0,
-            "10": 1132053504.0,
-            "11": 1132053504.0,
-            "12": 1132053504.0,
-            "13": 1132053504.0,
-            "14": 1132053504.0,
-            "15": 1132053504.0,
-            "16": 1132053504.0,
-            "17": 1132053504.0,
-            "18": 1132053504.0,
-            "19": 1132053504.0,
-            "20": 1132053504.0,
-            "21": 1132053504.0,
-            "22": 1132053504.0,
-            "23": 1132053504.0,
-            "24": 1132053504.0,
-            "25": 1132053504.0,
-            "26": 1132053504.0,
-            "27": 1132053504.0,
-            "28": 1132053504.0,
-            "29": 1132053504.0,
-            "30": 1132053504.0,
-            "31": 1132053504.0,
-            "32": 1132053504.0,
-            "33": 1132053504.0,
-            "34": 1132053504.0,
-            "35": 1132053504.0,
-            "36": 1132053504.0,
-            "37": 1132053504.0,
-            "38": 1132053504.0,
-            "39": 1132053504.0,
-            "40": 1132053504.0,
-            "41": 1132053504.0,
-            "42": 1132053504.0,
-            "43": 1132053504.0,
-            "44": 1132053504.0,
-            "45": 1132053504.0,
-            "46": 1132053504.0,
-            "47": 1132053504.0,
-            "48": 1132053504.0,
-            "49": 1132053504.0,
-            "50": 1132053504.0,
-            "51": 1132053504.0,
-            "52": 1132053504.0,
-            "53": 1132053504.0,
-            "54": 1132053504.0,
-            "55": 1132053504.0,
-            "56": 1132053504.0,
-            "57": 1132053504.0,
-            "58": 1132053504.0,
-            "59": 1132053504.0,
-            "60": 1132053504.0,
-            "61": 1132053504.0,
-            "62": 1132053504.0,
-            "63": 1132053504.0,
-            "64": 1132053504.0,
-            "65": 1132053504.0,
-            "66": 1132053504.0,
-            "67": 1132053504.0,
-            "68": 1132053504.0,
-            "69": 1132053504.0,
-            "70": 1132053504.0,
-            "71": 1132053504.0,
-            "72": 1132053504.0,
-            "73": 1132053504.0,
-            "74": 1132053504.0,
-            "75": 1132053504.0,
-            "76": 1132053504.0,
-            "77": 1132053504.0,
-            "78": 1132053504.0,
-            "79": 1132053504.0,
-            "80": 1132053504.0,
-            "81": 1132053504.0,
-            "82": 1132053504.0,
-            "83": 1132053504.0,
-            "84": 1132053504.0,
-            "85": 1132053504.0,
-            "86": 1132053504.0,
-            "87": 1132053504.0,
-            "88": 1132053504.0,
-            "89": 1132053504.0,
-            "90": 1132053504.0,
-            "91": 1132053504.0,
-            "92": 1132053504.0,
-            "93": 1132053504.0,
-            "94": 1132053504.0,
-            "95": 1132053504.0,
-            "96": 1132053504.0,
-            "97": 1132053504.0,
-            "98": 1132053504.0,
-            "99": 1132053504.0,
-            "100": 1132053504.0
+            "1": 1131791360.0,
+            "2": 1131791360.0,
+            "3": 1131791360.0,
+            "4": 1131791360.0,
+            "5": 1131791360.0,
+            "6": 1131791360.0,
+            "7": 1131791360.0,
+            "8": 1131791360.0,
+            "9": 1131791360.0,
+            "10": 1131791360.0,
+            "11": 1131791360.0,
+            "12": 1131791360.0,
+            "13": 1131791360.0,
+            "14": 1131791360.0,
+            "15": 1131791360.0,
+            "16": 1131791360.0,
+            "17": 1131791360.0,
+            "18": 1131791360.0,
+            "19": 1131791360.0,
+            "20": 1131791360.0,
+            "21": 1131791360.0,
+            "22": 1131791360.0,
+            "23": 1131791360.0,
+            "24": 1131791360.0,
+            "25": 1131791360.0,
+            "26": 1131791360.0,
+            "27": 1131791360.0,
+            "28": 1131791360.0,
+            "29": 1131791360.0,
+            "30": 1131791360.0,
+            "31": 1131791360.0,
+            "32": 1131791360.0,
+            "33": 1131791360.0,
+            "34": 1131791360.0,
+            "35": 1131791360.0,
+            "36": 1131791360.0,
+            "37": 1131791360.0,
+            "38": 1131791360.0,
+            "39": 1131791360.0,
+            "40": 1131791360.0,
+            "41": 1131791360.0,
+            "42": 1131791360.0,
+            "43": 1131791360.0,
+            "44": 1131791360.0,
+            "45": 1131791360.0,
+            "46": 1131791360.0,
+            "47": 1131791360.0,
+            "48": 1131791360.0,
+            "49": 1131791360.0,
+            "50": 1131791360.0,
+            "51": 1131791360.0,
+            "52": 1131791360.0,
+            "53": 1131791360.0,
+            "54": 1131791360.0,
+            "55": 1131791360.0,
+            "56": 1131791360.0,
+            "57": 1131791360.0,
+            "58": 1131791360.0,
+            "59": 1131791360.0,
+            "60": 1131791360.0,
+            "61": 1131791360.0,
+            "62": 1131791360.0,
+            "63": 1131791360.0,
+            "64": 1131791360.0,
+            "65": 1131791360.0,
+            "66": 1131791360.0,
+            "67": 1131791360.0,
+            "68": 1131791360.0,
+            "69": 1131791360.0,
+            "70": 1131791360.0,
+            "71": 1131791360.0,
+            "72": 1131791360.0,
+            "73": 1131791360.0,
+            "74": 1131791360.0,
+            "75": 1131791360.0,
+            "76": 1131791360.0,
+            "77": 1131791360.0,
+            "78": 1131791360.0,
+            "79": 1131791360.0,
+            "80": 1131791360.0,
+            "81": 1131791360.0,
+            "82": 1131791360.0,
+            "83": 1131791360.0,
+            "84": 1131791360.0,
+            "85": 1131791360.0,
+            "86": 1131791360.0,
+            "87": 1131791360.0,
+            "88": 1131791360.0,
+            "89": 1131791360.0,
+            "90": 1131791360.0,
+            "91": 1131791360.0,
+            "92": 1131791360.0,
+            "93": 1131791360.0,
+            "94": 1131791360.0,
+            "95": 1131791360.0,
+            "96": 1131791360.0,
+            "97": 1131791360.0,
+            "98": 1131791360.0,
+            "99": 1131791360.0,
+            "100": 1131791360.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -325,106 +325,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 1409266176.0,
-            "2": 1864166912.0,
-            "3": 1864166912.0,
-            "4": 1864166912.0,
-            "5": 1864166912.0,
-            "6": 1864166912.0,
-            "7": 1864166912.0,
-            "8": 1864166912.0,
-            "9": 1864166912.0,
-            "10": 1864166912.0,
-            "11": 1864166912.0,
-            "12": 1864166912.0,
-            "13": 1864166912.0,
-            "14": 1864166912.0,
-            "15": 1864166912.0,
-            "16": 1864166912.0,
-            "17": 1864166912.0,
-            "18": 1864166912.0,
-            "19": 1864166912.0,
-            "20": 1864166912.0,
-            "21": 1864166912.0,
-            "22": 1864166912.0,
-            "23": 1864166912.0,
-            "24": 1864166912.0,
-            "25": 1864166912.0,
-            "26": 1864166912.0,
-            "27": 1864166912.0,
-            "28": 1864166912.0,
-            "29": 1864166912.0,
-            "30": 1864166912.0,
-            "31": 1864166912.0,
-            "32": 1864166912.0,
-            "33": 1864166912.0,
-            "34": 1864166912.0,
-            "35": 1864166912.0,
-            "36": 1864166912.0,
-            "37": 1864166912.0,
-            "38": 1864166912.0,
-            "39": 1864166912.0,
-            "40": 1864166912.0,
-            "41": 1864166912.0,
-            "42": 1864166912.0,
-            "43": 1864166912.0,
-            "44": 1864166912.0,
-            "45": 1864166912.0,
-            "46": 1864166912.0,
-            "47": 1864166912.0,
-            "48": 1864166912.0,
-            "49": 1864166912.0,
-            "50": 1864166912.0,
-            "51": 1864166912.0,
-            "52": 1864166912.0,
-            "53": 1864166912.0,
-            "54": 1864166912.0,
-            "55": 1864166912.0,
-            "56": 1864166912.0,
-            "57": 1864166912.0,
-            "58": 1864166912.0,
-            "59": 1864166912.0,
-            "60": 1864166912.0,
-            "61": 1864166912.0,
-            "62": 1864166912.0,
-            "63": 1864166912.0,
-            "64": 1864166912.0,
-            "65": 1864166912.0,
-            "66": 1864166912.0,
-            "67": 1864166912.0,
-            "68": 1864166912.0,
-            "69": 1864166912.0,
-            "70": 1864166912.0,
-            "71": 1864166912.0,
-            "72": 1864166912.0,
-            "73": 1864166912.0,
-            "74": 1864166912.0,
-            "75": 1864166912.0,
-            "76": 1864166912.0,
-            "77": 1864166912.0,
-            "78": 1864166912.0,
-            "79": 1864166912.0,
-            "80": 1864166912.0,
-            "81": 1864166912.0,
-            "82": 1864166912.0,
-            "83": 1864166912.0,
-            "84": 1864166912.0,
-            "85": 1864166912.0,
-            "86": 1864166912.0,
-            "87": 1864166912.0,
-            "88": 1864166912.0,
-            "89": 1864166912.0,
-            "90": 1864166912.0,
-            "91": 1864166912.0,
-            "92": 1864166912.0,
-            "93": 1864166912.0,
-            "94": 1864166912.0,
-            "95": 1864166912.0,
-            "96": 1864166912.0,
-            "97": 1864166912.0,
-            "98": 1864166912.0,
-            "99": 1864166912.0,
-            "100": 1864166912.0
+            "1": 1410773504.0,
+            "2": 1862789632.0,
+            "3": 1862789632.0,
+            "4": 1862789632.0,
+            "5": 1862789632.0,
+            "6": 1862789632.0,
+            "7": 1862789632.0,
+            "8": 1862789632.0,
+            "9": 1862789632.0,
+            "10": 1862789632.0,
+            "11": 1862789632.0,
+            "12": 1862789632.0,
+            "13": 1862789632.0,
+            "14": 1862789632.0,
+            "15": 1862789632.0,
+            "16": 1862789632.0,
+            "17": 1862789632.0,
+            "18": 1862789632.0,
+            "19": 1862789632.0,
+            "20": 1862789632.0,
+            "21": 1862789632.0,
+            "22": 1862789632.0,
+            "23": 1862789632.0,
+            "24": 1862789632.0,
+            "25": 1862789632.0,
+            "26": 1862789632.0,
+            "27": 1862789632.0,
+            "28": 1862789632.0,
+            "29": 1862789632.0,
+            "30": 1862789632.0,
+            "31": 1862789632.0,
+            "32": 1862789632.0,
+            "33": 1862789632.0,
+            "34": 1862789632.0,
+            "35": 1862789632.0,
+            "36": 1862789632.0,
+            "37": 1862789632.0,
+            "38": 1862789632.0,
+            "39": 1862789632.0,
+            "40": 1862789632.0,
+            "41": 1862789632.0,
+            "42": 1862789632.0,
+            "43": 1862789632.0,
+            "44": 1862789632.0,
+            "45": 1862789632.0,
+            "46": 1862789632.0,
+            "47": 1862789632.0,
+            "48": 1862789632.0,
+            "49": 1862789632.0,
+            "50": 1862789632.0,
+            "51": 1862789632.0,
+            "52": 1862789632.0,
+            "53": 1862789632.0,
+            "54": 1862789632.0,
+            "55": 1862789632.0,
+            "56": 1862789632.0,
+            "57": 1862789632.0,
+            "58": 1862789632.0,
+            "59": 1862789632.0,
+            "60": 1862789632.0,
+            "61": 1862789632.0,
+            "62": 1862789632.0,
+            "63": 1862789632.0,
+            "64": 1862789632.0,
+            "65": 1862789632.0,
+            "66": 1862789632.0,
+            "67": 1862789632.0,
+            "68": 1862789632.0,
+            "69": 1862789632.0,
+            "70": 1862789632.0,
+            "71": 1862789632.0,
+            "72": 1862789632.0,
+            "73": 1862789632.0,
+            "74": 1862789632.0,
+            "75": 1862789632.0,
+            "76": 1862789632.0,
+            "77": 1862789632.0,
+            "78": 1862789632.0,
+            "79": 1862789632.0,
+            "80": 1862789632.0,
+            "81": 1862789632.0,
+            "82": 1862789632.0,
+            "83": 1862789632.0,
+            "84": 1862789632.0,
+            "85": 1862789632.0,
+            "86": 1862789632.0,
+            "87": 1862789632.0,
+            "88": 1862789632.0,
+            "89": 1862789632.0,
+            "90": 1862789632.0,
+            "91": 1862789632.0,
+            "92": 1862789632.0,
+            "93": 1862789632.0,
+            "94": 1862789632.0,
+            "95": 1862789632.0,
+            "96": 1862789632.0,
+            "97": 1862789632.0,
+            "98": 1862789632.0,
+            "99": 1862789632.0,
+            "100": 1862789632.0
         }
     },
     "iteration-time": {
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 9.74091,
-            "2": 0.66943,
-            "3": 0.64954,
-            "4": 0.64695,
-            "5": 0.65419,
-            "6": 0.6513,
-            "7": 0.64556,
-            "8": 0.6385,
-            "9": 0.64307,
-            "10": 0.63679,
-            "11": 0.64386,
-            "12": 0.64012,
-            "13": 0.63889,
-            "14": 0.63958,
-            "15": 0.64024,
-            "16": 0.63721,
-            "17": 0.6492,
-            "18": 0.65247,
-            "19": 0.64523,
-            "20": 1.0041,
-            "21": 0.64739,
-            "22": 1.02158,
-            "23": 0.96313,
-            "24": 0.64631,
-            "25": 0.64337,
-            "26": 0.64702,
-            "27": 0.64516,
-            "28": 0.64748,
-            "29": 0.64657,
-            "30": 0.95958,
-            "31": 1.05772,
-            "32": 0.64319,
-            "33": 0.64455,
-            "34": 0.64044,
-            "35": 0.6445,
-            "36": 0.64649,
-            "37": 0.64593,
-            "38": 0.64912,
-            "39": 0.64665,
-            "40": 0.64585,
-            "41": 0.64603,
-            "42": 0.64765,
-            "43": 0.64548,
-            "44": 0.64732,
-            "45": 0.64996,
-            "46": 0.65909,
-            "47": 0.66335,
-            "48": 0.64625,
-            "49": 0.64641,
-            "50": 0.64822,
-            "51": 0.65982,
-            "52": 0.64882,
-            "53": 0.64892,
-            "54": 0.64636,
-            "55": 0.64591,
-            "56": 0.65232,
-            "57": 0.64591,
-            "58": 0.64572,
-            "59": 0.64949,
-            "60": 0.64277,
-            "61": 0.64766,
-            "62": 0.64726,
-            "63": 0.64637,
-            "64": 0.64901,
-            "65": 0.6476,
-            "66": 0.64458,
-            "67": 0.64951,
-            "68": 0.64438,
-            "69": 0.64854,
-            "70": 0.65268,
-            "71": 0.64762,
-            "72": 1.02587,
-            "73": 0.65274,
-            "74": 0.65942,
-            "75": 0.65091,
-            "76": 0.65181,
-            "77": 0.65582,
-            "78": 0.64434,
-            "79": 0.65116,
-            "80": 0.65073,
-            "81": 0.64645,
-            "82": 0.65405,
-            "83": 0.65107,
-            "84": 0.64883,
-            "85": 0.94272,
-            "86": 0.65641,
-            "87": 0.99204,
-            "88": 0.96199,
-            "89": 0.64856,
-            "90": 0.65165,
-            "91": 0.65163,
-            "92": 0.6506,
-            "93": 0.64828,
-            "94": 0.64682,
-            "95": 1.01586,
-            "96": 1.04151,
-            "97": 0.65481,
-            "98": 0.64703,
-            "99": 0.64964,
-            "100": 0.65343
+            "1": 25.75145,
+            "2": 0.68955,
+            "3": 0.62891,
+            "4": 0.62371,
+            "5": 0.64907,
+            "6": 0.63218,
+            "7": 0.66755,
+            "8": 0.61813,
+            "9": 0.59993,
+            "10": 0.59659,
+            "11": 0.60388,
+            "12": 0.60369,
+            "13": 1.0243,
+            "14": 1.00512,
+            "15": 0.61333,
+            "16": 0.61377,
+            "17": 0.6103,
+            "18": 0.60779,
+            "19": 0.6087,
+            "20": 0.60685,
+            "21": 0.61179,
+            "22": 0.61036,
+            "23": 0.60843,
+            "24": 0.61334,
+            "25": 0.61104,
+            "26": 0.60721,
+            "27": 0.60906,
+            "28": 0.61093,
+            "29": 0.60885,
+            "30": 0.60331,
+            "31": 0.60347,
+            "32": 0.61091,
+            "33": 0.60942,
+            "34": 0.59484,
+            "35": 0.59387,
+            "36": 0.59382,
+            "37": 0.60178,
+            "38": 0.59578,
+            "39": 0.59527,
+            "40": 0.59259,
+            "41": 0.65592,
+            "42": 0.60449,
+            "43": 0.59683,
+            "44": 0.59604,
+            "45": 0.59257,
+            "46": 0.59555,
+            "47": 0.59173,
+            "48": 0.58982,
+            "49": 0.59611,
+            "50": 0.59259,
+            "51": 0.6131,
+            "52": 0.61177,
+            "53": 0.59702,
+            "54": 0.59373,
+            "55": 0.59877,
+            "56": 0.59405,
+            "57": 0.59369,
+            "58": 0.59622,
+            "59": 0.59453,
+            "60": 0.59018,
+            "61": 0.59521,
+            "62": 0.59435,
+            "63": 0.59412,
+            "64": 0.5937,
+            "65": 0.5926,
+            "66": 0.61412,
+            "67": 0.60902,
+            "68": 0.59153,
+            "69": 0.59219,
+            "70": 0.59689,
+            "71": 0.59441,
+            "72": 0.59498,
+            "73": 0.59486,
+            "74": 0.5906,
+            "75": 0.59758,
+            "76": 0.59428,
+            "77": 0.60149,
+            "78": 0.59424,
+            "79": 0.59801,
+            "80": 0.59552,
+            "81": 0.60182,
+            "82": 0.58057,
+            "83": 0.58573,
+            "84": 0.58157,
+            "85": 0.93106,
+            "86": 0.58378,
+            "87": 1.02253,
+            "88": 0.60509,
+            "89": 1.03608,
+            "90": 0.59228,
+            "91": 0.59375,
+            "92": 0.59564,
+            "93": 0.59607,
+            "94": 0.59269,
+            "95": 0.59143,
+            "96": 0.59188,
+            "97": 0.59202,
+            "98": 0.60085,
+            "99": 0.60637,
+            "100": 0.60502
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_h100_2nd.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_h100_2nd.json
new file mode 100644
index 00000000000..54505a38bfd
--- /dev/null
+++ b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_h100_2nd.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 7.36541,
+            "52": 7.12192,
+            "53": 7.09189,
+            "54": 7.22759,
+            "55": 7.13584,
+            "56": 7.20822,
+            "57": 7.31316,
+            "58": 6.99088,
+            "59": 7.09934,
+            "60": 7.12683,
+            "61": 7.1014,
+            "62": 7.23954,
+            "63": 7.14417,
+            "64": 7.06836,
+            "65": 6.98412,
+            "66": 7.03768,
+            "67": 7.02847,
+            "68": 7.1299,
+            "69": 7.01456,
+            "70": 7.04997,
+            "71": 6.89408,
+            "72": 6.98553,
+            "73": 6.96694,
+            "74": 6.90297,
+            "75": 7.0574,
+            "76": 6.9581,
+            "77": 7.06903,
+            "78": 7.02133,
+            "79": 6.8504,
+            "80": 6.91935,
+            "81": 6.95874,
+            "82": 7.04745,
+            "83": 6.98522,
+            "84": 6.99712,
+            "85": 6.83565,
+            "86": 7.04156,
+            "87": 6.96476,
+            "88": 6.89883,
+            "89": 6.80051,
+            "90": 7.22593,
+            "91": 6.70562,
+            "92": 7.0381,
+            "93": 6.88685,
+            "94": 7.03908,
+            "95": 6.84815,
+            "96": 6.95281,
+            "97": 6.94344,
+            "98": 6.86987,
+            "99": 6.99502,
+            "100": 6.96683
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 41112.0,
+            "52": 43837.0,
+            "53": 43913.0,
+            "54": 41704.0,
+            "55": 43870.0,
+            "56": 43209.0,
+            "57": 42636.0,
+            "58": 43841.0,
+            "59": 44630.0,
+            "60": 41219.0,
+            "61": 39702.0,
+            "62": 44739.0,
+            "63": 44651.0,
+            "64": 45372.0,
+            "65": 44682.0,
+            "66": 45351.0,
+            "67": 43174.0,
+            "68": 42502.0,
+            "69": 43834.0,
+            "70": 45514.0,
+            "71": 43291.0,
+            "72": 44767.0,
+            "73": 45384.0,
+            "74": 42457.0,
+            "75": 44673.0,
+            "76": 43876.0,
+            "77": 42026.0,
+            "78": 40350.0,
+            "79": 38918.0,
+            "80": 41092.0,
+            "81": 45364.0,
+            "82": 43198.0,
+            "83": 38467.0,
+            "84": 42477.0,
+            "85": 43981.0,
+            "86": 45667.0,
+            "87": 40863.0,
+            "88": 41772.0,
+            "89": 41104.0,
+            "90": 44669.0,
+            "91": 46134.0,
+            "92": 41634.0,
+            "93": 43241.0,
+            "94": 39538.0,
+            "95": 43915.0,
+            "96": 44683.0,
+            "97": 45405.0,
+            "98": 41791.0,
+            "99": 45414.0,
+            "100": 42458.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 1133037568.0,
+            "52": 1133037568.0,
+            "53": 1133037568.0,
+            "54": 1133037568.0,
+            "55": 1133037568.0,
+            "56": 1133037568.0,
+            "57": 1133037568.0,
+            "58": 1133037568.0,
+            "59": 1133037568.0,
+            "60": 1133037568.0,
+            "61": 1133037568.0,
+            "62": 1133037568.0,
+            "63": 1133037568.0,
+            "64": 1133037568.0,
+            "65": 1133037568.0,
+            "66": 1133037568.0,
+            "67": 1133037568.0,
+            "68": 1133037568.0,
+            "69": 1133037568.0,
+            "70": 1133037568.0,
+            "71": 1133037568.0,
+            "72": 1133037568.0,
+            "73": 1133037568.0,
+            "74": 1133037568.0,
+            "75": 1133037568.0,
+            "76": 1133037568.0,
+            "77": 1133037568.0,
+            "78": 1133037568.0,
+            "79": 1133037568.0,
+            "80": 1133037568.0,
+            "81": 1133037568.0,
+            "82": 1133037568.0,
+            "83": 1133037568.0,
+            "84": 1133037568.0,
+            "85": 1133037568.0,
+            "86": 1133037568.0,
+            "87": 1133037568.0,
+            "88": 1133037568.0,
+            "89": 1133037568.0,
+            "90": 1133037568.0,
+            "91": 1133037568.0,
+            "92": 1133037568.0,
+            "93": 1133037568.0,
+            "94": 1133037568.0,
+            "95": 1133037568.0,
+            "96": 1133037568.0,
+            "97": 1133037568.0,
+            "98": 1133037568.0,
+            "99": 1133037568.0,
+            "100": 1133037568.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 1863903744.0,
+            "52": 1863904768.0,
+            "53": 1863904768.0,
+            "54": 1863904768.0,
+            "55": 1863904768.0,
+            "56": 1863904768.0,
+            "57": 1863904768.0,
+            "58": 1863904768.0,
+            "59": 1863904768.0,
+            "60": 1863904768.0,
+            "61": 1863904768.0,
+            "62": 1863904768.0,
+            "63": 1863904768.0,
+            "64": 1863904768.0,
+            "65": 1863904768.0,
+            "66": 1863904768.0,
+            "67": 1863904768.0,
+            "68": 1863904768.0,
+            "69": 1863904768.0,
+            "70": 1863904768.0,
+            "71": 1863904768.0,
+            "72": 1863904768.0,
+            "73": 1863904768.0,
+            "74": 1863904768.0,
+            "75": 1863904768.0,
+            "76": 1863904768.0,
+            "77": 1863904768.0,
+            "78": 1863904768.0,
+            "79": 1863904768.0,
+            "80": 1863904768.0,
+            "81": 1863904768.0,
+            "82": 1863904768.0,
+            "83": 1863904768.0,
+            "84": 1863904768.0,
+            "85": 1863904768.0,
+            "86": 1863904768.0,
+            "87": 1863904768.0,
+            "88": 1863904768.0,
+            "89": 1863904768.0,
+            "90": 1863904768.0,
+            "91": 1863904768.0,
+            "92": 1863904768.0,
+            "93": 1863904768.0,
+            "94": 1863904768.0,
+            "95": 1863904768.0,
+            "96": 1863904768.0,
+            "97": 1863904768.0,
+            "98": 1863904768.0,
+            "99": 1863904768.0,
+            "100": 1863904768.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 23.83009,
+            "52": 0.76142,
+            "53": 0.67196,
+            "54": 0.6081,
+            "55": 0.60646,
+            "56": 0.60713,
+            "57": 0.6272,
+            "58": 0.62763,
+            "59": 0.62688,
+            "60": 0.62193,
+            "61": 0.62167,
+            "62": 0.61817,
+            "63": 0.61775,
+            "64": 0.5974,
+            "65": 0.60155,
+            "66": 0.60696,
+            "67": 0.59768,
+            "68": 0.59371,
+            "69": 0.59479,
+            "70": 0.59367,
+            "71": 0.60012,
+            "72": 0.5983,
+            "73": 0.60139,
+            "74": 0.60001,
+            "75": 0.59852,
+            "76": 0.59622,
+            "77": 0.59604,
+            "78": 0.59666,
+            "79": 0.6022,
+            "80": 0.62234,
+            "81": 0.62179,
+            "82": 0.62692,
+            "83": 0.62266,
+            "84": 0.6182,
+            "85": 0.62589,
+            "86": 0.62575,
+            "87": 0.59517,
+            "88": 0.60178,
+            "89": 0.60479,
+            "90": 0.61692,
+            "91": 0.60273,
+            "92": 0.61308,
+            "93": 0.6039,
+            "94": 0.62096,
+            "95": 0.62166,
+            "96": 0.61878,
+            "97": 0.6187,
+            "98": 0.6215,
+            "99": 0.62325,
+            "100": 0.61948
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1/golden_values_dev_dgx_a100.json
index 5aebe0d3c7a..8476c973a1a 100644
--- a/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1/golden_values_dev_dgx_a100.json
+++ b/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1/golden_values_dev_dgx_a100.json
@@ -1 +1,537 @@
-{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.34494, "5": 9.38931, "10": 9.01569, "15": 8.64631, "20": 8.26966, "25": 7.99493, "30": 7.87492, "35": 7.65834, "40": 7.50302, "45": 7.36143, "50": 7.19205, "55": 7.16852, "60": 7.16587, "65": 7.00099, "70": 7.07162, "75": 7.07611, "80": 6.95251, "85": 6.8641, "90": 7.25457, "95": 6.8601, "100": 6.99745}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 43305.0, "5": 45413.0, "10": 45379.0, "15": 43956.0, "20": 44818.0, "25": 42745.0, "30": 44042.0, "35": 43297.0, "40": 43251.0, "45": 43345.0, "50": 43415.0, "55": 43960.0, "60": 41326.0, "65": 44730.0, "70": 45543.0, "75": 44684.0, "80": 41118.0, "85": 44024.0, "90": 44744.0, "95": 44092.0, "100": 42500.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 4158515200.0, "5": 4158515200.0, "10": 4158515200.0, "15": 4158515200.0, "20": 4158515200.0, "25": 4158515200.0, "30": 4158515200.0, "35": 4158515200.0, "40": 4158515200.0, "45": 4158515200.0, "50": 4158515200.0, "55": 4158515200.0, "60": 4158515200.0, "65": 4158515200.0, "70": 4158515200.0, "75": 4158515200.0, "80": 4158515200.0, "85": 4158515200.0, "90": 4158515200.0, "95": 4158515200.0, "100": 4158515200.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 4349380608.0, "5": 6187556864.0, "10": 6187556864.0, "15": 6187556864.0, "20": 6187556864.0, "25": 6187556864.0, "30": 6187556864.0, "35": 6187556864.0, "40": 6187556864.0, "45": 6187556864.0, "50": 6187556864.0, "55": 6187556864.0, "60": 6187556864.0, "65": 6187556864.0, "70": 6187556864.0, "75": 6187556864.0, "80": 6187556864.0, "85": 6187556864.0, "90": 6187556864.0, "95": 6187556864.0, "100": 6187556864.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 6.48832, "5": 0.2262, "10": 0.22726, "15": 0.22564, "20": 0.22623, "25": 0.22711, "30": 0.22781, "35": 0.2271, "40": 0.22647, "45": 0.2358, "50": 0.22658, "55": 0.22646, "60": 0.22506, "65": 0.2281, "70": 0.22663, "75": 0.2252, "80": 0.22659, "85": 0.22661, "90": 0.23186, "95": 0.24827, "100": 0.23899}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.34494,
+            "2": 10.36431,
+            "3": 9.73158,
+            "4": 9.57928,
+            "5": 9.38931,
+            "6": 9.41074,
+            "7": 9.30545,
+            "8": 9.24868,
+            "9": 9.09349,
+            "10": 9.01569,
+            "11": 8.86286,
+            "12": 8.79096,
+            "13": 8.80892,
+            "14": 8.67669,
+            "15": 8.64631,
+            "16": 8.5398,
+            "17": 8.47895,
+            "18": 8.38945,
+            "19": 8.36156,
+            "20": 8.26966,
+            "21": 8.26333,
+            "22": 8.15066,
+            "23": 8.08893,
+            "24": 8.12421,
+            "25": 7.99493,
+            "26": 8.08494,
+            "27": 7.87755,
+            "28": 7.95863,
+            "29": 7.79585,
+            "30": 7.87492,
+            "31": 7.83245,
+            "32": 7.69489,
+            "33": 7.78469,
+            "34": 7.55767,
+            "35": 7.65834,
+            "36": 7.52881,
+            "37": 7.44912,
+            "38": 7.50398,
+            "39": 7.48056,
+            "40": 7.50302,
+            "41": 7.39767,
+            "42": 7.37206,
+            "43": 7.44301,
+            "44": 7.3811,
+            "45": 7.36143,
+            "46": 7.29415,
+            "47": 7.47498,
+            "48": 7.29564,
+            "49": 7.36092,
+            "50": 7.19205,
+            "51": 7.38769,
+            "52": 7.13773,
+            "53": 7.125,
+            "54": 7.23668,
+            "55": 7.16852,
+            "56": 7.22884,
+            "57": 7.34699,
+            "58": 7.03128,
+            "59": 7.1229,
+            "60": 7.16587,
+            "61": 7.1174,
+            "62": 7.26837,
+            "63": 7.16759,
+            "64": 7.08376,
+            "65": 7.00099,
+            "66": 7.07203,
+            "67": 7.05971,
+            "68": 7.14618,
+            "69": 7.03944,
+            "70": 7.07162,
+            "71": 6.91653,
+            "72": 7.02025,
+            "73": 6.9904,
+            "74": 6.9146,
+            "75": 7.07611,
+            "76": 6.97098,
+            "77": 7.08446,
+            "78": 7.03608,
+            "79": 6.88325,
+            "80": 6.95251,
+            "81": 6.985,
+            "82": 7.06843,
+            "83": 7.00882,
+            "84": 7.0181,
+            "85": 6.8641,
+            "86": 7.04979,
+            "87": 6.99342,
+            "88": 6.9238,
+            "89": 6.82406,
+            "90": 7.25457,
+            "91": 6.7226,
+            "92": 7.05372,
+            "93": 6.91688,
+            "94": 7.066,
+            "95": 6.8601,
+            "96": 6.98742,
+            "97": 6.96796,
+            "98": 6.89964,
+            "99": 7.02766,
+            "100": 6.99745
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 43305.0,
+            "2": 44091.0,
+            "3": 44794.0,
+            "4": 42436.0,
+            "5": 45413.0,
+            "6": 40989.0,
+            "7": 43195.0,
+            "8": 45462.0,
+            "9": 42551.0,
+            "10": 45379.0,
+            "11": 44016.0,
+            "12": 44629.0,
+            "13": 43937.0,
+            "14": 46250.0,
+            "15": 43956.0,
+            "16": 41728.0,
+            "17": 43873.0,
+            "18": 44716.0,
+            "19": 42648.0,
+            "20": 44818.0,
+            "21": 44812.0,
+            "22": 41883.0,
+            "23": 45468.0,
+            "24": 43112.0,
+            "25": 42745.0,
+            "26": 43949.0,
+            "27": 46268.0,
+            "28": 46429.0,
+            "29": 46199.0,
+            "30": 44042.0,
+            "31": 41264.0,
+            "32": 43413.0,
+            "33": 45478.0,
+            "34": 43375.0,
+            "35": 43297.0,
+            "36": 42545.0,
+            "37": 40689.0,
+            "38": 42575.0,
+            "39": 44772.0,
+            "40": 43251.0,
+            "41": 44707.0,
+            "42": 43261.0,
+            "43": 45506.0,
+            "44": 44652.0,
+            "45": 43345.0,
+            "46": 43935.0,
+            "47": 42506.0,
+            "48": 44693.0,
+            "49": 43200.0,
+            "50": 43415.0,
+            "51": 41174.0,
+            "52": 43885.0,
+            "53": 43959.0,
+            "54": 41961.0,
+            "55": 43960.0,
+            "56": 43269.0,
+            "57": 42561.0,
+            "58": 43898.0,
+            "59": 44654.0,
+            "60": 41326.0,
+            "61": 39744.0,
+            "62": 44774.0,
+            "63": 44682.0,
+            "64": 45396.0,
+            "65": 44730.0,
+            "66": 45388.0,
+            "67": 43196.0,
+            "68": 42556.0,
+            "69": 43825.0,
+            "70": 45543.0,
+            "71": 43407.0,
+            "72": 44832.0,
+            "73": 45412.0,
+            "74": 42502.0,
+            "75": 44684.0,
+            "76": 43926.0,
+            "77": 42100.0,
+            "78": 40525.0,
+            "79": 38954.0,
+            "80": 41118.0,
+            "81": 45412.0,
+            "82": 43238.0,
+            "83": 38495.0,
+            "84": 42524.0,
+            "85": 44024.0,
+            "86": 45749.0,
+            "87": 41116.0,
+            "88": 41798.0,
+            "89": 41078.0,
+            "90": 44744.0,
+            "91": 46266.0,
+            "92": 41865.0,
+            "93": 43254.0,
+            "94": 39588.0,
+            "95": 44092.0,
+            "96": 44732.0,
+            "97": 45474.0,
+            "98": 41859.0,
+            "99": 45537.0,
+            "100": 42500.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 4158515200.0,
+            "2": 4158515200.0,
+            "3": 4158515200.0,
+            "4": 4158515200.0,
+            "5": 4158515200.0,
+            "6": 4158515200.0,
+            "7": 4158515200.0,
+            "8": 4158515200.0,
+            "9": 4158515200.0,
+            "10": 4158515200.0,
+            "11": 4158515200.0,
+            "12": 4158515200.0,
+            "13": 4158515200.0,
+            "14": 4158515200.0,
+            "15": 4158515200.0,
+            "16": 4158515200.0,
+            "17": 4158515200.0,
+            "18": 4158515200.0,
+            "19": 4158515200.0,
+            "20": 4158515200.0,
+            "21": 4158515200.0,
+            "22": 4158515200.0,
+            "23": 4158515200.0,
+            "24": 4158515200.0,
+            "25": 4158515200.0,
+            "26": 4158515200.0,
+            "27": 4158515200.0,
+            "28": 4158515200.0,
+            "29": 4158515200.0,
+            "30": 4158515200.0,
+            "31": 4158515200.0,
+            "32": 4158515200.0,
+            "33": 4158515200.0,
+            "34": 4158515200.0,
+            "35": 4158515200.0,
+            "36": 4158515200.0,
+            "37": 4158515200.0,
+            "38": 4158515200.0,
+            "39": 4158515200.0,
+            "40": 4158515200.0,
+            "41": 4158515200.0,
+            "42": 4158515200.0,
+            "43": 4158515200.0,
+            "44": 4158515200.0,
+            "45": 4158515200.0,
+            "46": 4158515200.0,
+            "47": 4158515200.0,
+            "48": 4158515200.0,
+            "49": 4158515200.0,
+            "50": 4158515200.0,
+            "51": 4158515200.0,
+            "52": 4158515200.0,
+            "53": 4158515200.0,
+            "54": 4158515200.0,
+            "55": 4158515200.0,
+            "56": 4158515200.0,
+            "57": 4158515200.0,
+            "58": 4158515200.0,
+            "59": 4158515200.0,
+            "60": 4158515200.0,
+            "61": 4158515200.0,
+            "62": 4158515200.0,
+            "63": 4158515200.0,
+            "64": 4158515200.0,
+            "65": 4158515200.0,
+            "66": 4158515200.0,
+            "67": 4158515200.0,
+            "68": 4158515200.0,
+            "69": 4158515200.0,
+            "70": 4158515200.0,
+            "71": 4158515200.0,
+            "72": 4158515200.0,
+            "73": 4158515200.0,
+            "74": 4158515200.0,
+            "75": 4158515200.0,
+            "76": 4158515200.0,
+            "77": 4158515200.0,
+            "78": 4158515200.0,
+            "79": 4158515200.0,
+            "80": 4158515200.0,
+            "81": 4158515200.0,
+            "82": 4158515200.0,
+            "83": 4158515200.0,
+            "84": 4158515200.0,
+            "85": 4158515200.0,
+            "86": 4158515200.0,
+            "87": 4158515200.0,
+            "88": 4158515200.0,
+            "89": 4158515200.0,
+            "90": 4158515200.0,
+            "91": 4158515200.0,
+            "92": 4158515200.0,
+            "93": 4158515200.0,
+            "94": 4158515200.0,
+            "95": 4158515200.0,
+            "96": 4158515200.0,
+            "97": 4158515200.0,
+            "98": 4158515200.0,
+            "99": 4158515200.0,
+            "100": 4158515200.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 4349380608.0,
+            "2": 6185459712.0,
+            "3": 6187556864.0,
+            "4": 6187556864.0,
+            "5": 6187556864.0,
+            "6": 6187556864.0,
+            "7": 6187556864.0,
+            "8": 6187556864.0,
+            "9": 6187556864.0,
+            "10": 6187556864.0,
+            "11": 6187556864.0,
+            "12": 6187556864.0,
+            "13": 6187556864.0,
+            "14": 6187556864.0,
+            "15": 6187556864.0,
+            "16": 6187556864.0,
+            "17": 6187556864.0,
+            "18": 6187556864.0,
+            "19": 6187556864.0,
+            "20": 6187556864.0,
+            "21": 6187556864.0,
+            "22": 6187556864.0,
+            "23": 6187556864.0,
+            "24": 6187556864.0,
+            "25": 6187556864.0,
+            "26": 6187556864.0,
+            "27": 6187556864.0,
+            "28": 6187556864.0,
+            "29": 6187556864.0,
+            "30": 6187556864.0,
+            "31": 6187556864.0,
+            "32": 6187556864.0,
+            "33": 6187556864.0,
+            "34": 6187556864.0,
+            "35": 6187556864.0,
+            "36": 6187556864.0,
+            "37": 6187556864.0,
+            "38": 6187556864.0,
+            "39": 6187556864.0,
+            "40": 6187556864.0,
+            "41": 6187556864.0,
+            "42": 6187556864.0,
+            "43": 6187556864.0,
+            "44": 6187556864.0,
+            "45": 6187556864.0,
+            "46": 6187556864.0,
+            "47": 6187556864.0,
+            "48": 6187556864.0,
+            "49": 6187556864.0,
+            "50": 6187556864.0,
+            "51": 6187556864.0,
+            "52": 6187556864.0,
+            "53": 6187556864.0,
+            "54": 6187556864.0,
+            "55": 6187556864.0,
+            "56": 6187556864.0,
+            "57": 6187556864.0,
+            "58": 6187556864.0,
+            "59": 6187556864.0,
+            "60": 6187556864.0,
+            "61": 6187556864.0,
+            "62": 6187556864.0,
+            "63": 6187556864.0,
+            "64": 6187556864.0,
+            "65": 6187556864.0,
+            "66": 6187556864.0,
+            "67": 6187556864.0,
+            "68": 6187556864.0,
+            "69": 6187556864.0,
+            "70": 6187556864.0,
+            "71": 6187556864.0,
+            "72": 6187556864.0,
+            "73": 6187556864.0,
+            "74": 6187556864.0,
+            "75": 6187556864.0,
+            "76": 6187556864.0,
+            "77": 6187556864.0,
+            "78": 6187556864.0,
+            "79": 6187556864.0,
+            "80": 6187556864.0,
+            "81": 6187556864.0,
+            "82": 6187556864.0,
+            "83": 6187556864.0,
+            "84": 6187556864.0,
+            "85": 6187556864.0,
+            "86": 6187556864.0,
+            "87": 6187556864.0,
+            "88": 6187556864.0,
+            "89": 6187556864.0,
+            "90": 6187556864.0,
+            "91": 6187556864.0,
+            "92": 6187556864.0,
+            "93": 6187556864.0,
+            "94": 6187556864.0,
+            "95": 6187556864.0,
+            "96": 6187556864.0,
+            "97": 6187556864.0,
+            "98": 6187556864.0,
+            "99": 6187556864.0,
+            "100": 6187556864.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 15.70772,
+            "2": 0.54719,
+            "3": 0.22124,
+            "4": 0.2113,
+            "5": 0.21574,
+            "6": 0.20899,
+            "7": 0.21163,
+            "8": 0.20932,
+            "9": 0.20931,
+            "10": 0.20843,
+            "11": 0.20865,
+            "12": 0.20976,
+            "13": 0.21153,
+            "14": 0.21141,
+            "15": 0.22881,
+            "16": 0.2095,
+            "17": 0.22252,
+            "18": 0.21238,
+            "19": 0.21011,
+            "20": 0.21012,
+            "21": 0.20824,
+            "22": 0.21048,
+            "23": 0.21174,
+            "24": 0.21129,
+            "25": 0.21316,
+            "26": 0.2111,
+            "27": 0.20884,
+            "28": 0.20897,
+            "29": 0.2111,
+            "30": 0.20827,
+            "31": 0.20796,
+            "32": 0.20813,
+            "33": 0.21328,
+            "34": 0.21312,
+            "35": 0.20816,
+            "36": 0.2194,
+            "37": 0.21822,
+            "38": 0.21033,
+            "39": 0.20794,
+            "40": 0.2076,
+            "41": 0.21268,
+            "42": 0.23004,
+            "43": 0.21754,
+            "44": 0.21505,
+            "45": 0.21734,
+            "46": 0.21516,
+            "47": 0.21219,
+            "48": 0.21234,
+            "49": 0.21349,
+            "50": 0.21178,
+            "51": 0.20738,
+            "52": 0.2076,
+            "53": 0.20803,
+            "54": 0.20714,
+            "55": 0.20879,
+            "56": 0.66578,
+            "57": 0.21121,
+            "58": 0.20847,
+            "59": 0.20864,
+            "60": 0.20774,
+            "61": 0.2096,
+            "62": 0.20814,
+            "63": 0.20821,
+            "64": 0.20754,
+            "65": 0.20865,
+            "66": 0.20774,
+            "67": 0.20742,
+            "68": 0.20782,
+            "69": 0.20843,
+            "70": 0.20816,
+            "71": 0.20717,
+            "72": 0.20871,
+            "73": 0.20889,
+            "74": 0.20819,
+            "75": 0.20754,
+            "76": 0.20875,
+            "77": 0.20921,
+            "78": 0.2087,
+            "79": 0.20863,
+            "80": 0.20792,
+            "81": 0.20726,
+            "82": 0.20882,
+            "83": 0.20819,
+            "84": 0.20781,
+            "85": 0.20789,
+            "86": 0.20766,
+            "87": 0.20795,
+            "88": 0.20781,
+            "89": 0.20815,
+            "90": 0.20721,
+            "91": 0.20799,
+            "92": 0.20836,
+            "93": 0.20739,
+            "94": 0.20893,
+            "95": 0.20842,
+            "96": 0.20769,
+            "97": 0.2107,
+            "98": 0.20784,
+            "99": 0.20696,
+            "100": 0.20698
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1/golden_values_dev_dgx_h100.json
index e788215b20a..8c2893286fd 100644
--- a/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1/golden_values_dev_dgx_h100.json
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 7.09171,
-            "2": 0.19937,
-            "3": 0.15739,
-            "4": 0.15626,
-            "5": 0.15726,
-            "6": 0.16596,
-            "7": 0.15866,
-            "8": 0.16018,
-            "9": 0.16342,
-            "10": 0.15848,
-            "11": 0.1563,
-            "12": 0.15949,
-            "13": 0.16471,
-            "14": 0.1653,
-            "15": 0.15904,
-            "16": 0.15673,
-            "17": 0.15845,
-            "18": 0.15591,
-            "19": 0.15809,
-            "20": 0.1593,
-            "21": 0.15934,
-            "22": 0.1588,
-            "23": 0.15615,
-            "24": 0.15816,
-            "25": 0.15513,
-            "26": 0.16623,
-            "27": 0.1635,
-            "28": 0.15796,
-            "29": 0.15745,
-            "30": 0.15659,
-            "31": 0.15757,
-            "32": 0.15805,
-            "33": 0.16121,
-            "34": 0.15918,
-            "35": 0.15628,
-            "36": 0.16015,
-            "37": 0.15954,
-            "38": 0.15711,
-            "39": 0.16207,
-            "40": 0.16543,
-            "41": 0.16329,
-            "42": 0.15895,
-            "43": 0.15771,
-            "44": 0.16372,
-            "45": 0.15827,
-            "46": 0.16205,
-            "47": 0.16175,
-            "48": 0.15754,
-            "49": 0.15916,
-            "50": 0.15618,
-            "51": 0.15693,
-            "52": 0.16151,
-            "53": 0.16143,
-            "54": 0.16281,
-            "55": 0.15891,
-            "56": 0.16235,
-            "57": 0.16248,
-            "58": 0.16949,
-            "59": 0.16264,
-            "60": 0.15666,
-            "61": 0.19456,
-            "62": 0.19414,
-            "63": 0.16346,
-            "64": 0.16675,
-            "65": 0.16803,
-            "66": 0.1748,
-            "67": 0.16431,
-            "68": 0.1587,
-            "69": 0.16219,
-            "70": 0.16457,
-            "71": 0.1716,
-            "72": 0.16546,
-            "73": 0.16711,
-            "74": 0.16142,
-            "75": 0.17042,
-            "76": 0.17092,
-            "77": 0.16596,
-            "78": 0.16577,
-            "79": 0.15743,
-            "80": 0.15851,
-            "81": 0.15791,
-            "82": 0.16001,
-            "83": 0.15783,
-            "84": 0.15788,
-            "85": 0.15665,
-            "86": 0.16107,
-            "87": 0.15608,
-            "88": 0.15928,
-            "89": 0.16138,
-            "90": 0.15621,
-            "91": 0.15886,
-            "92": 0.15808,
-            "93": 0.15911,
-            "94": 0.16777,
-            "95": 0.16017,
-            "96": 0.15821,
-            "97": 0.15642,
-            "98": 0.16061,
-            "99": 0.157,
-            "100": 0.15975
+            "1": 21.7472,
+            "2": 0.26947,
+            "3": 0.15906,
+            "4": 0.14381,
+            "5": 0.13718,
+            "6": 0.13541,
+            "7": 0.13627,
+            "8": 0.13552,
+            "9": 0.15313,
+            "10": 0.15332,
+            "11": 0.15293,
+            "12": 0.14699,
+            "13": 0.13522,
+            "14": 0.13752,
+            "15": 0.14123,
+            "16": 0.14245,
+            "17": 0.14135,
+            "18": 0.13773,
+            "19": 0.13696,
+            "20": 0.13686,
+            "21": 0.13916,
+            "22": 0.13592,
+            "23": 0.13723,
+            "24": 0.13489,
+            "25": 0.13734,
+            "26": 0.14011,
+            "27": 0.13977,
+            "28": 0.13653,
+            "29": 0.13981,
+            "30": 0.13581,
+            "31": 0.13818,
+            "32": 0.13543,
+            "33": 0.13872,
+            "34": 0.13879,
+            "35": 0.14257,
+            "36": 0.13909,
+            "37": 0.259,
+            "38": 0.15725,
+            "39": 0.16376,
+            "40": 0.13972,
+            "41": 0.13871,
+            "42": 0.13723,
+            "43": 0.24968,
+            "44": 0.13741,
+            "45": 0.17732,
+            "46": 0.13888,
+            "47": 0.13561,
+            "48": 0.17199,
+            "49": 0.14457,
+            "50": 0.14057,
+            "51": 0.13853,
+            "52": 0.53484,
+            "53": 0.13659,
+            "54": 0.13534,
+            "55": 0.13612,
+            "56": 0.13281,
+            "57": 0.1356,
+            "58": 0.13222,
+            "59": 0.13569,
+            "60": 0.13553,
+            "61": 0.13464,
+            "62": 0.13388,
+            "63": 0.13695,
+            "64": 0.13201,
+            "65": 0.13601,
+            "66": 0.13229,
+            "67": 0.13532,
+            "68": 0.13224,
+            "69": 0.13444,
+            "70": 0.13376,
+            "71": 0.13581,
+            "72": 0.13302,
+            "73": 0.13502,
+            "74": 0.13267,
+            "75": 0.13531,
+            "76": 0.13332,
+            "77": 0.13635,
+            "78": 0.13294,
+            "79": 0.13456,
+            "80": 0.13311,
+            "81": 0.13594,
+            "82": 0.13241,
+            "83": 0.13659,
+            "84": 0.13211,
+            "85": 0.1359,
+            "86": 0.13243,
+            "87": 0.13479,
+            "88": 0.13306,
+            "89": 0.13564,
+            "90": 0.13326,
+            "91": 0.13434,
+            "92": 0.13257,
+            "93": 0.13697,
+            "94": 0.13578,
+            "95": 0.13676,
+            "96": 0.13248,
+            "97": 0.13516,
+            "98": 0.13424,
+            "99": 0.13587,
+            "100": 0.13365
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_a100.json
index b9771639ebd..d0e9e9b3b5a 100644
--- a/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_a100.json
+++ b/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_a100.json
@@ -1 +1,537 @@
-{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.34494, "5": 9.38931, "10": 9.01569, "15": 8.64631, "20": 8.26966, "25": 7.99493, "30": 7.87492, "35": 7.65834, "40": 7.50302, "45": 7.36143, "50": 7.19205, "55": 7.16852, "60": 7.16587, "65": 7.00099, "70": 7.07162, "75": 7.07611, "80": 6.95251, "85": 6.8641, "90": 7.25457, "95": 6.8601, "100": 6.99745}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 43305.0, "5": 45413.0, "10": 45379.0, "15": 43956.0, "20": 44818.0, "25": 42745.0, "30": 44042.0, "35": 43297.0, "40": 43251.0, "45": 43345.0, "50": 43415.0, "55": 43960.0, "60": 41326.0, "65": 44730.0, "70": 45543.0, "75": 44684.0, "80": 41118.0, "85": 44024.0, "90": 44744.0, "95": 44092.0, "100": 42500.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 4158515200.0, "5": 4158515200.0, "10": 4158515200.0, "15": 4158515200.0, "20": 4158515200.0, "25": 4158515200.0, "30": 4158515200.0, "35": 4158515200.0, "40": 4158515200.0, "45": 4158515200.0, "50": 4158515200.0, "55": 4158515200.0, "60": 4158515200.0, "65": 4158515200.0, "70": 4158515200.0, "75": 4158515200.0, "80": 4158515200.0, "85": 4158515200.0, "90": 4158515200.0, "95": 4158515200.0, "100": 4158515200.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 4349380608.0, "5": 6186508288.0, "10": 6186508288.0, "15": 6186508288.0, "20": 6186508288.0, "25": 6186508288.0, "30": 6186508288.0, "35": 6186508288.0, "40": 6186508288.0, "45": 6186508288.0, "50": 6186508288.0, "55": 6186508288.0, "60": 6186508288.0, "65": 6186508288.0, "70": 6186508288.0, "75": 6186508288.0, "80": 6186508288.0, "85": 6186508288.0, "90": 6186508288.0, "95": 6186508288.0, "100": 6186508288.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 6.34153, "5": 0.23162, "10": 0.22893, "15": 0.23688, "20": 0.2316, "25": 0.22871, "30": 0.23008, "35": 0.22669, "40": 0.24999, "45": 0.22865, "50": 0.23226, "55": 0.22758, "60": 0.23004, "65": 0.22585, "70": 0.23272, "75": 0.22388, "80": 0.22441, "85": 0.22606, "90": 0.6846, "95": 0.22521, "100": 0.22591}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.34494,
+            "2": 10.36431,
+            "3": 9.73158,
+            "4": 9.57928,
+            "5": 9.38931,
+            "6": 9.41074,
+            "7": 9.30545,
+            "8": 9.24868,
+            "9": 9.09349,
+            "10": 9.01569,
+            "11": 8.86286,
+            "12": 8.79096,
+            "13": 8.80892,
+            "14": 8.67669,
+            "15": 8.64631,
+            "16": 8.5398,
+            "17": 8.47895,
+            "18": 8.38945,
+            "19": 8.36156,
+            "20": 8.26966,
+            "21": 8.26333,
+            "22": 8.15066,
+            "23": 8.08893,
+            "24": 8.12421,
+            "25": 7.99493,
+            "26": 8.08494,
+            "27": 7.87755,
+            "28": 7.95863,
+            "29": 7.79585,
+            "30": 7.87492,
+            "31": 7.83245,
+            "32": 7.69489,
+            "33": 7.78469,
+            "34": 7.55767,
+            "35": 7.65834,
+            "36": 7.52881,
+            "37": 7.44912,
+            "38": 7.50398,
+            "39": 7.48056,
+            "40": 7.50302,
+            "41": 7.39767,
+            "42": 7.37206,
+            "43": 7.44301,
+            "44": 7.3811,
+            "45": 7.36143,
+            "46": 7.29415,
+            "47": 7.47498,
+            "48": 7.29564,
+            "49": 7.36092,
+            "50": 7.19205,
+            "51": 7.38769,
+            "52": 7.13773,
+            "53": 7.125,
+            "54": 7.23668,
+            "55": 7.16852,
+            "56": 7.22884,
+            "57": 7.34699,
+            "58": 7.03128,
+            "59": 7.1229,
+            "60": 7.16587,
+            "61": 7.1174,
+            "62": 7.26837,
+            "63": 7.16759,
+            "64": 7.08376,
+            "65": 7.00099,
+            "66": 7.07203,
+            "67": 7.05971,
+            "68": 7.14618,
+            "69": 7.03944,
+            "70": 7.07162,
+            "71": 6.91653,
+            "72": 7.02025,
+            "73": 6.9904,
+            "74": 6.9146,
+            "75": 7.07611,
+            "76": 6.97098,
+            "77": 7.08446,
+            "78": 7.03608,
+            "79": 6.88325,
+            "80": 6.95251,
+            "81": 6.985,
+            "82": 7.06843,
+            "83": 7.00882,
+            "84": 7.0181,
+            "85": 6.8641,
+            "86": 7.04979,
+            "87": 6.99342,
+            "88": 6.9238,
+            "89": 6.82406,
+            "90": 7.25457,
+            "91": 6.7226,
+            "92": 7.05372,
+            "93": 6.91688,
+            "94": 7.066,
+            "95": 6.8601,
+            "96": 6.98742,
+            "97": 6.96796,
+            "98": 6.89964,
+            "99": 7.02766,
+            "100": 6.99745
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 43305.0,
+            "2": 44091.0,
+            "3": 44794.0,
+            "4": 42436.0,
+            "5": 45413.0,
+            "6": 40989.0,
+            "7": 43195.0,
+            "8": 45462.0,
+            "9": 42551.0,
+            "10": 45379.0,
+            "11": 44016.0,
+            "12": 44629.0,
+            "13": 43937.0,
+            "14": 46250.0,
+            "15": 43956.0,
+            "16": 41728.0,
+            "17": 43873.0,
+            "18": 44716.0,
+            "19": 42648.0,
+            "20": 44818.0,
+            "21": 44812.0,
+            "22": 41883.0,
+            "23": 45468.0,
+            "24": 43112.0,
+            "25": 42745.0,
+            "26": 43949.0,
+            "27": 46268.0,
+            "28": 46429.0,
+            "29": 46199.0,
+            "30": 44042.0,
+            "31": 41264.0,
+            "32": 43413.0,
+            "33": 45478.0,
+            "34": 43375.0,
+            "35": 43297.0,
+            "36": 42545.0,
+            "37": 40689.0,
+            "38": 42575.0,
+            "39": 44772.0,
+            "40": 43251.0,
+            "41": 44707.0,
+            "42": 43261.0,
+            "43": 45506.0,
+            "44": 44652.0,
+            "45": 43345.0,
+            "46": 43935.0,
+            "47": 42506.0,
+            "48": 44693.0,
+            "49": 43200.0,
+            "50": 43415.0,
+            "51": 41174.0,
+            "52": 43885.0,
+            "53": 43959.0,
+            "54": 41961.0,
+            "55": 43960.0,
+            "56": 43269.0,
+            "57": 42561.0,
+            "58": 43898.0,
+            "59": 44654.0,
+            "60": 41326.0,
+            "61": 39744.0,
+            "62": 44774.0,
+            "63": 44682.0,
+            "64": 45396.0,
+            "65": 44730.0,
+            "66": 45388.0,
+            "67": 43196.0,
+            "68": 42556.0,
+            "69": 43825.0,
+            "70": 45543.0,
+            "71": 43407.0,
+            "72": 44832.0,
+            "73": 45412.0,
+            "74": 42502.0,
+            "75": 44684.0,
+            "76": 43926.0,
+            "77": 42100.0,
+            "78": 40525.0,
+            "79": 38954.0,
+            "80": 41118.0,
+            "81": 45412.0,
+            "82": 43238.0,
+            "83": 38495.0,
+            "84": 42524.0,
+            "85": 44024.0,
+            "86": 45749.0,
+            "87": 41116.0,
+            "88": 41798.0,
+            "89": 41078.0,
+            "90": 44744.0,
+            "91": 46266.0,
+            "92": 41865.0,
+            "93": 43254.0,
+            "94": 39588.0,
+            "95": 44092.0,
+            "96": 44732.0,
+            "97": 45474.0,
+            "98": 41859.0,
+            "99": 45537.0,
+            "100": 42500.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 4158515200.0,
+            "2": 4158515200.0,
+            "3": 4158515200.0,
+            "4": 4158515200.0,
+            "5": 4158515200.0,
+            "6": 4158515200.0,
+            "7": 4158515200.0,
+            "8": 4158515200.0,
+            "9": 4158515200.0,
+            "10": 4158515200.0,
+            "11": 4158515200.0,
+            "12": 4158515200.0,
+            "13": 4158515200.0,
+            "14": 4158515200.0,
+            "15": 4158515200.0,
+            "16": 4158515200.0,
+            "17": 4158515200.0,
+            "18": 4158515200.0,
+            "19": 4158515200.0,
+            "20": 4158515200.0,
+            "21": 4158515200.0,
+            "22": 4158515200.0,
+            "23": 4158515200.0,
+            "24": 4158515200.0,
+            "25": 4158515200.0,
+            "26": 4158515200.0,
+            "27": 4158515200.0,
+            "28": 4158515200.0,
+            "29": 4158515200.0,
+            "30": 4158515200.0,
+            "31": 4158515200.0,
+            "32": 4158515200.0,
+            "33": 4158515200.0,
+            "34": 4158515200.0,
+            "35": 4158515200.0,
+            "36": 4158515200.0,
+            "37": 4158515200.0,
+            "38": 4158515200.0,
+            "39": 4158515200.0,
+            "40": 4158515200.0,
+            "41": 4158515200.0,
+            "42": 4158515200.0,
+            "43": 4158515200.0,
+            "44": 4158515200.0,
+            "45": 4158515200.0,
+            "46": 4158515200.0,
+            "47": 4158515200.0,
+            "48": 4158515200.0,
+            "49": 4158515200.0,
+            "50": 4158515200.0,
+            "51": 4158515200.0,
+            "52": 4158515200.0,
+            "53": 4158515200.0,
+            "54": 4158515200.0,
+            "55": 4158515200.0,
+            "56": 4158515200.0,
+            "57": 4158515200.0,
+            "58": 4158515200.0,
+            "59": 4158515200.0,
+            "60": 4158515200.0,
+            "61": 4158515200.0,
+            "62": 4158515200.0,
+            "63": 4158515200.0,
+            "64": 4158515200.0,
+            "65": 4158515200.0,
+            "66": 4158515200.0,
+            "67": 4158515200.0,
+            "68": 4158515200.0,
+            "69": 4158515200.0,
+            "70": 4158515200.0,
+            "71": 4158515200.0,
+            "72": 4158515200.0,
+            "73": 4158515200.0,
+            "74": 4158515200.0,
+            "75": 4158515200.0,
+            "76": 4158515200.0,
+            "77": 4158515200.0,
+            "78": 4158515200.0,
+            "79": 4158515200.0,
+            "80": 4158515200.0,
+            "81": 4158515200.0,
+            "82": 4158515200.0,
+            "83": 4158515200.0,
+            "84": 4158515200.0,
+            "85": 4158515200.0,
+            "86": 4158515200.0,
+            "87": 4158515200.0,
+            "88": 4158515200.0,
+            "89": 4158515200.0,
+            "90": 4158515200.0,
+            "91": 4158515200.0,
+            "92": 4158515200.0,
+            "93": 4158515200.0,
+            "94": 4158515200.0,
+            "95": 4158515200.0,
+            "96": 4158515200.0,
+            "97": 4158515200.0,
+            "98": 4158515200.0,
+            "99": 4158515200.0,
+            "100": 4158515200.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 4349380608.0,
+            "2": 6185459712.0,
+            "3": 6187556864.0,
+            "4": 6187556864.0,
+            "5": 6187556864.0,
+            "6": 6187556864.0,
+            "7": 6187556864.0,
+            "8": 6187556864.0,
+            "9": 6187556864.0,
+            "10": 6187556864.0,
+            "11": 6187556864.0,
+            "12": 6187556864.0,
+            "13": 6187556864.0,
+            "14": 6187556864.0,
+            "15": 6187556864.0,
+            "16": 6187556864.0,
+            "17": 6187556864.0,
+            "18": 6187556864.0,
+            "19": 6187556864.0,
+            "20": 6187556864.0,
+            "21": 6187556864.0,
+            "22": 6187556864.0,
+            "23": 6187556864.0,
+            "24": 6187556864.0,
+            "25": 6187556864.0,
+            "26": 6187556864.0,
+            "27": 6187556864.0,
+            "28": 6187556864.0,
+            "29": 6187556864.0,
+            "30": 6187556864.0,
+            "31": 6187556864.0,
+            "32": 6187556864.0,
+            "33": 6187556864.0,
+            "34": 6187556864.0,
+            "35": 6187556864.0,
+            "36": 6187556864.0,
+            "37": 6187556864.0,
+            "38": 6187556864.0,
+            "39": 6187556864.0,
+            "40": 6187556864.0,
+            "41": 6187556864.0,
+            "42": 6187556864.0,
+            "43": 6187556864.0,
+            "44": 6187556864.0,
+            "45": 6187556864.0,
+            "46": 6187556864.0,
+            "47": 6187556864.0,
+            "48": 6187556864.0,
+            "49": 6187556864.0,
+            "50": 6187556864.0,
+            "51": 6187556864.0,
+            "52": 6187556864.0,
+            "53": 6187556864.0,
+            "54": 6187556864.0,
+            "55": 6187556864.0,
+            "56": 6187556864.0,
+            "57": 6187556864.0,
+            "58": 6187556864.0,
+            "59": 6187556864.0,
+            "60": 6187556864.0,
+            "61": 6187556864.0,
+            "62": 6187556864.0,
+            "63": 6187556864.0,
+            "64": 6187556864.0,
+            "65": 6187556864.0,
+            "66": 6187556864.0,
+            "67": 6187556864.0,
+            "68": 6187556864.0,
+            "69": 6187556864.0,
+            "70": 6187556864.0,
+            "71": 6187556864.0,
+            "72": 6187556864.0,
+            "73": 6187556864.0,
+            "74": 6187556864.0,
+            "75": 6187556864.0,
+            "76": 6187556864.0,
+            "77": 6187556864.0,
+            "78": 6187556864.0,
+            "79": 6187556864.0,
+            "80": 6187556864.0,
+            "81": 6187556864.0,
+            "82": 6187556864.0,
+            "83": 6187556864.0,
+            "84": 6187556864.0,
+            "85": 6187556864.0,
+            "86": 6187556864.0,
+            "87": 6187556864.0,
+            "88": 6187556864.0,
+            "89": 6187556864.0,
+            "90": 6187556864.0,
+            "91": 6187556864.0,
+            "92": 6187556864.0,
+            "93": 6187556864.0,
+            "94": 6187556864.0,
+            "95": 6187556864.0,
+            "96": 6187556864.0,
+            "97": 6187556864.0,
+            "98": 6187556864.0,
+            "99": 6187556864.0,
+            "100": 6187556864.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 15.06497,
+            "2": 0.33648,
+            "3": 0.22277,
+            "4": 0.20886,
+            "5": 0.21308,
+            "6": 0.20892,
+            "7": 0.21092,
+            "8": 0.20752,
+            "9": 0.21199,
+            "10": 0.20742,
+            "11": 0.21069,
+            "12": 0.20826,
+            "13": 0.21142,
+            "14": 0.21455,
+            "15": 0.21627,
+            "16": 0.21296,
+            "17": 0.21402,
+            "18": 0.20889,
+            "19": 0.21078,
+            "20": 0.20954,
+            "21": 0.20887,
+            "22": 0.20979,
+            "23": 0.21439,
+            "24": 0.2099,
+            "25": 0.21242,
+            "26": 0.21605,
+            "27": 0.21297,
+            "28": 0.20982,
+            "29": 0.21136,
+            "30": 0.20907,
+            "31": 0.20972,
+            "32": 0.21139,
+            "33": 0.21469,
+            "34": 0.21652,
+            "35": 0.21743,
+            "36": 0.2149,
+            "37": 0.22692,
+            "38": 0.21471,
+            "39": 0.21755,
+            "40": 0.21624,
+            "41": 0.21941,
+            "42": 0.21428,
+            "43": 0.21749,
+            "44": 0.21544,
+            "45": 0.22837,
+            "46": 0.21663,
+            "47": 0.21319,
+            "48": 0.21421,
+            "49": 0.21543,
+            "50": 0.21524,
+            "51": 0.61922,
+            "52": 0.21119,
+            "53": 0.21075,
+            "54": 0.20936,
+            "55": 0.20973,
+            "56": 0.20946,
+            "57": 0.2092,
+            "58": 0.20996,
+            "59": 0.20928,
+            "60": 0.20927,
+            "61": 0.21061,
+            "62": 0.20871,
+            "63": 0.20949,
+            "64": 0.20862,
+            "65": 0.21028,
+            "66": 0.20932,
+            "67": 0.20996,
+            "68": 0.20879,
+            "69": 0.21044,
+            "70": 0.20912,
+            "71": 0.20946,
+            "72": 0.2097,
+            "73": 0.21061,
+            "74": 0.20946,
+            "75": 0.20911,
+            "76": 0.20928,
+            "77": 0.20987,
+            "78": 0.21013,
+            "79": 0.2094,
+            "80": 0.20969,
+            "81": 0.20909,
+            "82": 0.20968,
+            "83": 0.21037,
+            "84": 0.20978,
+            "85": 0.21017,
+            "86": 0.20951,
+            "87": 0.21004,
+            "88": 0.20955,
+            "89": 0.20979,
+            "90": 0.20905,
+            "91": 0.21055,
+            "92": 0.20916,
+            "93": 0.21026,
+            "94": 0.20948,
+            "95": 0.20954,
+            "96": 0.20902,
+            "97": 0.20988,
+            "98": 0.20896,
+            "99": 0.20908,
+            "100": 0.20889
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_a100_2nd.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_a100_2nd.json
new file mode 100644
index 00000000000..3e69a67d2bd
--- /dev/null
+++ b/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_a100_2nd.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 7.38769,
+            "52": 7.13773,
+            "53": 7.125,
+            "54": 7.23668,
+            "55": 7.16852,
+            "56": 7.22884,
+            "57": 7.34699,
+            "58": 7.03128,
+            "59": 7.1229,
+            "60": 7.16587,
+            "61": 7.1174,
+            "62": 7.26837,
+            "63": 7.16759,
+            "64": 7.08376,
+            "65": 7.00099,
+            "66": 7.07203,
+            "67": 7.05971,
+            "68": 7.14618,
+            "69": 7.03944,
+            "70": 7.07162,
+            "71": 6.91653,
+            "72": 7.02025,
+            "73": 6.9904,
+            "74": 6.9146,
+            "75": 7.07611,
+            "76": 6.97098,
+            "77": 7.08446,
+            "78": 7.03608,
+            "79": 6.88325,
+            "80": 6.95251,
+            "81": 6.985,
+            "82": 7.06843,
+            "83": 7.00882,
+            "84": 7.0181,
+            "85": 6.8641,
+            "86": 7.04979,
+            "87": 6.99342,
+            "88": 6.9238,
+            "89": 6.82406,
+            "90": 7.25457,
+            "91": 6.7226,
+            "92": 7.05372,
+            "93": 6.91688,
+            "94": 7.066,
+            "95": 6.8601,
+            "96": 6.98742,
+            "97": 6.96796,
+            "98": 6.89964,
+            "99": 7.02766,
+            "100": 6.99745
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 41174.0,
+            "52": 43885.0,
+            "53": 43959.0,
+            "54": 41961.0,
+            "55": 43960.0,
+            "56": 43269.0,
+            "57": 42561.0,
+            "58": 43898.0,
+            "59": 44654.0,
+            "60": 41326.0,
+            "61": 39744.0,
+            "62": 44774.0,
+            "63": 44682.0,
+            "64": 45396.0,
+            "65": 44730.0,
+            "66": 45388.0,
+            "67": 43196.0,
+            "68": 42556.0,
+            "69": 43825.0,
+            "70": 45543.0,
+            "71": 43407.0,
+            "72": 44832.0,
+            "73": 45412.0,
+            "74": 42502.0,
+            "75": 44684.0,
+            "76": 43926.0,
+            "77": 42100.0,
+            "78": 40525.0,
+            "79": 38954.0,
+            "80": 41118.0,
+            "81": 45412.0,
+            "82": 43238.0,
+            "83": 38495.0,
+            "84": 42524.0,
+            "85": 44024.0,
+            "86": 45749.0,
+            "87": 41116.0,
+            "88": 41798.0,
+            "89": 41078.0,
+            "90": 44744.0,
+            "91": 46266.0,
+            "92": 41865.0,
+            "93": 43254.0,
+            "94": 39588.0,
+            "95": 44092.0,
+            "96": 44732.0,
+            "97": 45474.0,
+            "98": 41859.0,
+            "99": 45537.0,
+            "100": 42500.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 4157204480.0,
+            "52": 4157204480.0,
+            "53": 4157204480.0,
+            "54": 4157204480.0,
+            "55": 4157204480.0,
+            "56": 4157204480.0,
+            "57": 4157204480.0,
+            "58": 4157204480.0,
+            "59": 4157204480.0,
+            "60": 4157204480.0,
+            "61": 4157204480.0,
+            "62": 4157204480.0,
+            "63": 4157204480.0,
+            "64": 4157204480.0,
+            "65": 4157204480.0,
+            "66": 4157204480.0,
+            "67": 4157204480.0,
+            "68": 4157204480.0,
+            "69": 4157204480.0,
+            "70": 4157204480.0,
+            "71": 4157204480.0,
+            "72": 4157204480.0,
+            "73": 4157204480.0,
+            "74": 4157204480.0,
+            "75": 4157204480.0,
+            "76": 4157204480.0,
+            "77": 4157204480.0,
+            "78": 4157204480.0,
+            "79": 4157204480.0,
+            "80": 4157204480.0,
+            "81": 4157204480.0,
+            "82": 4157204480.0,
+            "83": 4157204480.0,
+            "84": 4157204480.0,
+            "85": 4157204480.0,
+            "86": 4157204480.0,
+            "87": 4157204480.0,
+            "88": 4157204480.0,
+            "89": 4157204480.0,
+            "90": 4157204480.0,
+            "91": 4157204480.0,
+            "92": 4157204480.0,
+            "93": 4157204480.0,
+            "94": 4157204480.0,
+            "95": 4157204480.0,
+            "96": 4157204480.0,
+            "97": 4157204480.0,
+            "98": 4157204480.0,
+            "99": 4157204480.0,
+            "100": 4157204480.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 6123567104.0,
+            "52": 6204596224.0,
+            "53": 6204596224.0,
+            "54": 6204596224.0,
+            "55": 6204596224.0,
+            "56": 6204596224.0,
+            "57": 6204596224.0,
+            "58": 6204596224.0,
+            "59": 6204596224.0,
+            "60": 6204596224.0,
+            "61": 6204596224.0,
+            "62": 6204596224.0,
+            "63": 6204596224.0,
+            "64": 6204596224.0,
+            "65": 6204596224.0,
+            "66": 6204596224.0,
+            "67": 6204596224.0,
+            "68": 6204596224.0,
+            "69": 6204596224.0,
+            "70": 6204596224.0,
+            "71": 6204596224.0,
+            "72": 6204596224.0,
+            "73": 6204596224.0,
+            "74": 6204596224.0,
+            "75": 6204596224.0,
+            "76": 6204596224.0,
+            "77": 6204596224.0,
+            "78": 6204596224.0,
+            "79": 6204596224.0,
+            "80": 6204596224.0,
+            "81": 6204596224.0,
+            "82": 6204596224.0,
+            "83": 6204596224.0,
+            "84": 6204596224.0,
+            "85": 6204596224.0,
+            "86": 6204596224.0,
+            "87": 6204596224.0,
+            "88": 6204596224.0,
+            "89": 6204596224.0,
+            "90": 6204596224.0,
+            "91": 6204596224.0,
+            "92": 6204596224.0,
+            "93": 6204596224.0,
+            "94": 6204596224.0,
+            "95": 6204596224.0,
+            "96": 6204596224.0,
+            "97": 6204596224.0,
+            "98": 6204596224.0,
+            "99": 6204596224.0,
+            "100": 6204596224.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 15.16949,
+            "52": 0.23002,
+            "53": 0.21058,
+            "54": 0.20946,
+            "55": 0.20972,
+            "56": 0.20922,
+            "57": 0.20983,
+            "58": 0.20987,
+            "59": 0.20922,
+            "60": 0.20914,
+            "61": 0.2094,
+            "62": 0.20895,
+            "63": 0.2095,
+            "64": 0.21548,
+            "65": 0.21352,
+            "66": 0.21226,
+            "67": 0.21515,
+            "68": 0.20948,
+            "69": 0.21616,
+            "70": 0.21445,
+            "71": 0.21232,
+            "72": 0.21093,
+            "73": 0.21045,
+            "74": 0.21041,
+            "75": 0.21224,
+            "76": 0.21145,
+            "77": 0.21077,
+            "78": 0.21093,
+            "79": 0.2106,
+            "80": 0.20977,
+            "81": 0.21008,
+            "82": 0.2107,
+            "83": 0.21493,
+            "84": 0.22072,
+            "85": 0.24247,
+            "86": 0.23417,
+            "87": 0.68465,
+            "88": 0.21379,
+            "89": 0.21223,
+            "90": 0.20997,
+            "91": 0.21086,
+            "92": 0.2272,
+            "93": 0.21574,
+            "94": 0.21262,
+            "95": 0.21076,
+            "96": 0.21013,
+            "97": 0.2109,
+            "98": 0.21138,
+            "99": 0.21072,
+            "100": 0.21732
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_h100.json
index e0a55371afb..87d5de19688 100644
--- a/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_h100.json
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 6.98463,
-            "2": 0.19558,
-            "3": 0.15734,
-            "4": 0.15695,
-            "5": 0.15774,
-            "6": 0.15468,
-            "7": 0.15373,
-            "8": 0.15721,
-            "9": 0.15375,
-            "10": 0.15555,
-            "11": 0.15762,
-            "12": 0.15358,
-            "13": 0.15446,
-            "14": 0.15343,
-            "15": 0.15567,
-            "16": 0.15597,
-            "17": 0.19986,
-            "18": 0.19685,
-            "19": 0.15757,
-            "20": 0.16418,
-            "21": 0.1662,
-            "22": 0.1633,
-            "23": 0.15542,
-            "24": 0.16131,
-            "25": 0.15713,
-            "26": 0.16116,
-            "27": 0.15731,
-            "28": 0.16645,
-            "29": 0.1581,
-            "30": 0.16334,
-            "31": 0.15469,
-            "32": 0.1607,
-            "33": 0.15565,
-            "34": 0.16369,
-            "35": 0.15592,
-            "36": 0.16404,
-            "37": 0.15034,
-            "38": 0.15864,
-            "39": 0.15017,
-            "40": 0.1607,
-            "41": 0.15387,
-            "42": 0.17077,
-            "43": 0.15397,
-            "44": 0.1563,
-            "45": 0.15512,
-            "46": 0.16115,
-            "47": 0.15635,
-            "48": 0.16292,
-            "49": 0.15581,
-            "50": 0.16402,
-            "51": 0.15457,
-            "52": 0.16232,
-            "53": 0.156,
-            "54": 0.16433,
-            "55": 0.15283,
-            "56": 0.19434,
-            "57": 0.19273,
-            "58": 0.15955,
-            "59": 0.15405,
-            "60": 0.15503,
-            "61": 0.15418,
-            "62": 0.15446,
-            "63": 0.15778,
-            "64": 0.1578,
-            "65": 0.16024,
-            "66": 0.15656,
-            "67": 0.15524,
-            "68": 0.15394,
-            "69": 0.16041,
-            "70": 0.16082,
-            "71": 0.16503,
-            "72": 0.16142,
-            "73": 0.16242,
-            "74": 0.15995,
-            "75": 0.15816,
-            "76": 0.16199,
-            "77": 0.16827,
-            "78": 0.15987,
-            "79": 0.15797,
-            "80": 0.15617,
-            "81": 0.15308,
-            "82": 0.15484,
-            "83": 0.15382,
-            "84": 0.16856,
-            "85": 0.15976,
-            "86": 0.15794,
-            "87": 0.15409,
-            "88": 0.15333,
-            "89": 0.15511,
-            "90": 0.15333,
-            "91": 0.17162,
-            "92": 0.15418,
-            "93": 0.15421,
-            "94": 0.15169,
-            "95": 0.15479,
-            "96": 0.15268,
-            "97": 0.1552,
-            "98": 0.1575,
-            "99": 0.15403,
-            "100": 0.15379
+            "1": 21.61124,
+            "2": 0.25375,
+            "3": 0.15381,
+            "4": 0.13668,
+            "5": 0.14061,
+            "6": 0.13695,
+            "7": 0.13991,
+            "8": 0.13647,
+            "9": 0.13948,
+            "10": 0.13599,
+            "11": 0.13996,
+            "12": 0.13684,
+            "13": 0.13803,
+            "14": 0.13775,
+            "15": 0.14405,
+            "16": 0.14329,
+            "17": 0.14214,
+            "18": 0.13792,
+            "19": 0.14542,
+            "20": 0.13933,
+            "21": 0.14385,
+            "22": 0.14038,
+            "23": 0.1392,
+            "24": 0.14184,
+            "25": 0.14024,
+            "26": 0.13811,
+            "27": 0.14146,
+            "28": 0.1387,
+            "29": 0.16852,
+            "30": 0.17758,
+            "31": 0.17327,
+            "32": 0.139,
+            "33": 0.14013,
+            "34": 0.14167,
+            "35": 0.56403,
+            "36": 0.16981,
+            "37": 0.16552,
+            "38": 0.16667,
+            "39": 0.14682,
+            "40": 0.14282,
+            "41": 0.14246,
+            "42": 0.13999,
+            "43": 0.14095,
+            "44": 0.13857,
+            "45": 0.13996,
+            "46": 0.13897,
+            "47": 0.13758,
+            "48": 0.13993,
+            "49": 0.13748,
+            "50": 0.13821,
+            "51": 0.15888,
+            "52": 0.13795,
+            "53": 0.13793,
+            "54": 0.13589,
+            "55": 0.13601,
+            "56": 0.13569,
+            "57": 0.13516,
+            "58": 0.13634,
+            "59": 0.13738,
+            "60": 0.13603,
+            "61": 0.15318,
+            "62": 0.13568,
+            "63": 0.13667,
+            "64": 0.1406,
+            "65": 0.1369,
+            "66": 0.13909,
+            "67": 0.13571,
+            "68": 0.13523,
+            "69": 0.13642,
+            "70": 0.13547,
+            "71": 0.1377,
+            "72": 0.13793,
+            "73": 0.13582,
+            "74": 0.13579,
+            "75": 0.13481,
+            "76": 0.13578,
+            "77": 0.13685,
+            "78": 0.13529,
+            "79": 0.13534,
+            "80": 0.13583,
+            "81": 0.13619,
+            "82": 0.13843,
+            "83": 0.13827,
+            "84": 0.13815,
+            "85": 0.13776,
+            "86": 0.13726,
+            "87": 0.13781,
+            "88": 0.13804,
+            "89": 0.13806,
+            "90": 0.13816,
+            "91": 0.13897,
+            "92": 0.13721,
+            "93": 0.13893,
+            "94": 0.14047,
+            "95": 0.13678,
+            "96": 0.13685,
+            "97": 0.13729,
+            "98": 0.13723,
+            "99": 0.13754,
+            "100": 0.50769
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_h100_2nd.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_h100_2nd.json
new file mode 100644
index 00000000000..573e46b0bdd
--- /dev/null
+++ b/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_h100_2nd.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 7.37361,
+            "52": 7.13381,
+            "53": 7.11244,
+            "54": 7.23402,
+            "55": 7.14785,
+            "56": 7.22775,
+            "57": 7.33273,
+            "58": 6.99461,
+            "59": 7.11599,
+            "60": 7.13222,
+            "61": 7.1056,
+            "62": 7.26513,
+            "63": 7.14772,
+            "64": 7.08696,
+            "65": 6.98643,
+            "66": 7.04728,
+            "67": 7.04697,
+            "68": 7.14062,
+            "69": 7.2435,
+            "70": 7.05957,
+            "71": 6.89356,
+            "72": 6.99769,
+            "73": 6.97897,
+            "74": 6.91983,
+            "75": 7.05297,
+            "76": 6.96036,
+            "77": 7.0791,
+            "78": 7.01392,
+            "79": 6.88358,
+            "80": 6.93014,
+            "81": 6.96553,
+            "82": 7.05265,
+            "83": 6.98788,
+            "84": 7.00427,
+            "85": 6.84577,
+            "86": 7.03621,
+            "87": 6.96327,
+            "88": 6.9137,
+            "89": 6.80631,
+            "90": 7.23619,
+            "91": 6.70015,
+            "92": 7.05679,
+            "93": 6.89287,
+            "94": 7.05835,
+            "95": 6.84786,
+            "96": 6.96771,
+            "97": 6.94258,
+            "98": 6.87388,
+            "99": 7.01816,
+            "100": 6.98466
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 41198.0,
+            "52": 43900.0,
+            "53": 43938.0,
+            "54": 41922.0,
+            "55": 43916.0,
+            "56": 43237.0,
+            "57": 42634.0,
+            "58": 43916.0,
+            "59": 44616.0,
+            "60": 41414.0,
+            "61": 39759.0,
+            "62": 44750.0,
+            "63": 44673.0,
+            "64": 45378.0,
+            "65": 44765.0,
+            "66": 45401.0,
+            "67": 43155.0,
+            "68": 42552.0,
+            "69": 43831.0,
+            "70": 45546.0,
+            "71": 43332.0,
+            "72": 44847.0,
+            "73": 45376.0,
+            "74": 42503.0,
+            "75": 44704.0,
+            "76": 43916.0,
+            "77": 42101.0,
+            "78": 40543.0,
+            "79": 38997.0,
+            "80": 41079.0,
+            "81": 45377.0,
+            "82": 43254.0,
+            "83": 38473.0,
+            "84": 42420.0,
+            "85": 43989.0,
+            "86": 45694.0,
+            "87": 41164.0,
+            "88": 41773.0,
+            "89": 41047.0,
+            "90": 44710.0,
+            "91": 46274.0,
+            "92": 41823.0,
+            "93": 43286.0,
+            "94": 39530.0,
+            "95": 44074.0,
+            "96": 44686.0,
+            "97": 45424.0,
+            "98": 41849.0,
+            "99": 45567.0,
+            "100": 42485.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 4157204480.0,
+            "52": 4157204480.0,
+            "53": 4157204480.0,
+            "54": 4157204480.0,
+            "55": 4157204480.0,
+            "56": 4157204480.0,
+            "57": 4157204480.0,
+            "58": 4157204480.0,
+            "59": 4157204480.0,
+            "60": 4157204480.0,
+            "61": 4157204480.0,
+            "62": 4157204480.0,
+            "63": 4157204480.0,
+            "64": 4157204480.0,
+            "65": 4157204480.0,
+            "66": 4157204480.0,
+            "67": 4157204480.0,
+            "68": 4157204480.0,
+            "69": 4157204480.0,
+            "70": 4157204480.0,
+            "71": 4157204480.0,
+            "72": 4157204480.0,
+            "73": 4157204480.0,
+            "74": 4157204480.0,
+            "75": 4157204480.0,
+            "76": 4157204480.0,
+            "77": 4157204480.0,
+            "78": 4157204480.0,
+            "79": 4157204480.0,
+            "80": 4157204480.0,
+            "81": 4157204480.0,
+            "82": 4157204480.0,
+            "83": 4157204480.0,
+            "84": 4157204480.0,
+            "85": 4157204480.0,
+            "86": 4157204480.0,
+            "87": 4157204480.0,
+            "88": 4157204480.0,
+            "89": 4157204480.0,
+            "90": 4157204480.0,
+            "91": 4157204480.0,
+            "92": 4157204480.0,
+            "93": 4157204480.0,
+            "94": 4157204480.0,
+            "95": 4157204480.0,
+            "96": 4157204480.0,
+            "97": 4157204480.0,
+            "98": 4157204480.0,
+            "99": 4157204480.0,
+            "100": 4157204480.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 6123567104.0,
+            "52": 6204596224.0,
+            "53": 6204596224.0,
+            "54": 6204596224.0,
+            "55": 6204596224.0,
+            "56": 6204596224.0,
+            "57": 6204596224.0,
+            "58": 6204596224.0,
+            "59": 6204596224.0,
+            "60": 6204596224.0,
+            "61": 6204596224.0,
+            "62": 6204596224.0,
+            "63": 6204596224.0,
+            "64": 6204596224.0,
+            "65": 6204596224.0,
+            "66": 6204596224.0,
+            "67": 6204596224.0,
+            "68": 6204596224.0,
+            "69": 6204596224.0,
+            "70": 6204596224.0,
+            "71": 6204596224.0,
+            "72": 6204596224.0,
+            "73": 6204596224.0,
+            "74": 6204596224.0,
+            "75": 6204596224.0,
+            "76": 6204596224.0,
+            "77": 6204596224.0,
+            "78": 6204596224.0,
+            "79": 6204596224.0,
+            "80": 6204596224.0,
+            "81": 6204596224.0,
+            "82": 6204596224.0,
+            "83": 6204596224.0,
+            "84": 6204596224.0,
+            "85": 6204596224.0,
+            "86": 6204596224.0,
+            "87": 6204596224.0,
+            "88": 6204596224.0,
+            "89": 6204596224.0,
+            "90": 6204596224.0,
+            "91": 6204596224.0,
+            "92": 6204596224.0,
+            "93": 6204596224.0,
+            "94": 6204596224.0,
+            "95": 6204596224.0,
+            "96": 6204596224.0,
+            "97": 6204596224.0,
+            "98": 6204596224.0,
+            "99": 6204596224.0,
+            "100": 6204596224.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 21.53373,
+            "52": 0.17108,
+            "53": 0.14343,
+            "54": 0.1382,
+            "55": 0.13776,
+            "56": 0.13812,
+            "57": 0.13818,
+            "58": 0.60134,
+            "59": 0.14006,
+            "60": 0.13721,
+            "61": 0.13776,
+            "62": 0.1388,
+            "63": 0.1416,
+            "64": 0.14634,
+            "65": 0.14469,
+            "66": 0.14853,
+            "67": 0.14401,
+            "68": 0.14036,
+            "69": 0.13971,
+            "70": 0.14452,
+            "71": 0.13933,
+            "72": 0.14544,
+            "73": 0.14099,
+            "74": 0.14162,
+            "75": 0.13904,
+            "76": 0.14131,
+            "77": 0.1772,
+            "78": 0.17391,
+            "79": 0.15422,
+            "80": 0.14246,
+            "81": 0.14329,
+            "82": 0.14005,
+            "83": 0.14166,
+            "84": 0.14169,
+            "85": 0.14284,
+            "86": 0.13961,
+            "87": 0.14163,
+            "88": 0.1407,
+            "89": 0.14357,
+            "90": 0.13852,
+            "91": 0.13984,
+            "92": 0.14186,
+            "93": 0.13873,
+            "94": 0.13893,
+            "95": 0.13848,
+            "96": 0.14366,
+            "97": 0.14476,
+            "98": 0.14352,
+            "99": 0.14347,
+            "100": 0.14605
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_mcore_tp2_pp1_vp1/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp2_pp1_vp1/golden_values_dev_dgx_a100.json
index 415d8919883..ff144e3d252 100644
--- a/tests/functional_tests/test_cases/t5/t5_mcore_tp2_pp1_vp1/golden_values_dev_dgx_a100.json
+++ b/tests/functional_tests/test_cases/t5/t5_mcore_tp2_pp1_vp1/golden_values_dev_dgx_a100.json
@@ -1 +1,537 @@
-{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.38854, "5": 9.38095, "10": 9.05714, "15": 8.65603, "20": 8.26193, "25": 7.98192, "30": 7.86937, "35": 7.66279, "40": 7.50083, "45": 7.34894, "50": 7.18147, "55": 7.1542, "60": 7.14734, "65": 6.9972, "70": 7.06009, "75": 7.06086, "80": 6.94306, "85": 6.85989, "90": 7.24967, "95": 6.84836, "100": 6.98289}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 43331.0, "5": 45376.0, "10": 45361.0, "15": 43888.0, "20": 44777.0, "25": 42465.0, "30": 43995.0, "35": 43276.0, "40": 43245.0, "45": 43285.0, "50": 43365.0, "55": 43853.0, "60": 41218.0, "65": 44684.0, "70": 45522.0, "75": 44695.0, "80": 41096.0, "85": 43990.0, "90": 44676.0, "95": 44077.0, "100": 42530.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 2171550208.0, "5": 2171550208.0, "10": 2171550208.0, "15": 2171550208.0, "20": 2171550208.0, "25": 2171550208.0, "30": 2171550208.0, "35": 2171550208.0, "40": 2171550208.0, "45": 2171550208.0, "50": 2171550208.0, "55": 2171550208.0, "60": 2171550208.0, "65": 2171550208.0, "70": 2171550208.0, "75": 2171550208.0, "80": 2171550208.0, "85": 2171550208.0, "90": 2171550208.0, "95": 2171550208.0, "100": 2171550208.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 2407642624.0, "5": 3336458752.0, "10": 3336458752.0, "15": 3336458752.0, "20": 3336458752.0, "25": 3336458752.0, "30": 3336458752.0, "35": 3336458752.0, "40": 3336458752.0, "45": 3336458752.0, "50": 3336458752.0, "55": 3336458752.0, "60": 3336458752.0, "65": 3336458752.0, "70": 3336458752.0, "75": 3336458752.0, "80": 3336458752.0, "85": 3336458752.0, "90": 3336458752.0, "95": 3336458752.0, "100": 3336458752.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 7.05678, "5": 0.40847, "10": 0.40944, "15": 0.41103, "20": 0.40541, "25": 0.40521, "30": 0.41404, "35": 0.40757, "40": 0.40461, "45": 0.40953, "50": 0.41332, "55": 0.41397, "60": 0.41379, "65": 0.41333, "70": 0.4099, "75": 0.41406, "80": 0.40498, "85": 0.40583, "90": 0.40273, "95": 0.40387, "100": 0.88919}}}
\ No newline at end of file
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.38854,
+            "2": 10.3937,
+            "3": 9.78105,
+            "4": 9.59731,
+            "5": 9.38095,
+            "6": 9.4057,
+            "7": 9.30785,
+            "8": 9.24107,
+            "9": 9.12192,
+            "10": 9.05714,
+            "11": 8.87325,
+            "12": 8.79368,
+            "13": 8.84026,
+            "14": 8.68518,
+            "15": 8.65603,
+            "16": 8.54372,
+            "17": 8.50113,
+            "18": 8.39001,
+            "19": 8.36443,
+            "20": 8.26193,
+            "21": 8.27097,
+            "22": 8.14406,
+            "23": 8.07467,
+            "24": 8.11915,
+            "25": 7.98192,
+            "26": 8.08777,
+            "27": 7.87148,
+            "28": 7.96511,
+            "29": 7.80258,
+            "30": 7.86937,
+            "31": 7.81742,
+            "32": 7.68788,
+            "33": 7.7805,
+            "34": 7.55497,
+            "35": 7.66279,
+            "36": 7.52257,
+            "37": 7.44455,
+            "38": 7.5026,
+            "39": 7.4504,
+            "40": 7.50083,
+            "41": 7.39053,
+            "42": 7.36073,
+            "43": 7.4333,
+            "44": 7.37641,
+            "45": 7.34894,
+            "46": 7.28171,
+            "47": 7.46122,
+            "48": 7.2877,
+            "49": 7.35375,
+            "50": 7.18147,
+            "51": 7.36608,
+            "52": 7.13343,
+            "53": 7.11575,
+            "54": 7.22932,
+            "55": 7.1542,
+            "56": 7.22261,
+            "57": 7.32969,
+            "58": 7.02356,
+            "59": 7.11377,
+            "60": 7.14734,
+            "61": 7.11404,
+            "62": 7.24755,
+            "63": 7.1568,
+            "64": 7.08414,
+            "65": 6.9972,
+            "66": 7.06074,
+            "67": 7.04881,
+            "68": 7.14167,
+            "69": 7.03482,
+            "70": 7.06009,
+            "71": 6.92578,
+            "72": 7.0043,
+            "73": 6.97965,
+            "74": 6.92276,
+            "75": 7.06086,
+            "76": 6.97271,
+            "77": 7.08186,
+            "78": 7.01883,
+            "79": 6.85524,
+            "80": 6.94306,
+            "81": 6.97637,
+            "82": 7.06676,
+            "83": 6.99984,
+            "84": 7.0089,
+            "85": 6.85989,
+            "86": 7.03607,
+            "87": 6.98072,
+            "88": 6.91508,
+            "89": 6.81068,
+            "90": 7.24967,
+            "91": 6.71006,
+            "92": 7.04916,
+            "93": 6.9057,
+            "94": 7.06458,
+            "95": 6.84836,
+            "96": 6.97667,
+            "97": 6.96312,
+            "98": 6.88704,
+            "99": 7.013,
+            "100": 6.98289
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 43331.0,
+            "2": 44051.0,
+            "3": 44760.0,
+            "4": 42395.0,
+            "5": 45376.0,
+            "6": 40957.0,
+            "7": 43160.0,
+            "8": 45463.0,
+            "9": 42446.0,
+            "10": 45361.0,
+            "11": 43965.0,
+            "12": 44605.0,
+            "13": 43884.0,
+            "14": 46187.0,
+            "15": 43888.0,
+            "16": 41604.0,
+            "17": 43828.0,
+            "18": 44690.0,
+            "19": 42562.0,
+            "20": 44777.0,
+            "21": 44792.0,
+            "22": 41854.0,
+            "23": 45465.0,
+            "24": 43071.0,
+            "25": 42465.0,
+            "26": 43917.0,
+            "27": 46228.0,
+            "28": 46431.0,
+            "29": 46169.0,
+            "30": 43995.0,
+            "31": 41278.0,
+            "32": 43346.0,
+            "33": 45463.0,
+            "34": 43298.0,
+            "35": 43276.0,
+            "36": 42490.0,
+            "37": 40069.0,
+            "38": 42527.0,
+            "39": 44730.0,
+            "40": 43245.0,
+            "41": 44653.0,
+            "42": 43269.0,
+            "43": 45462.0,
+            "44": 44594.0,
+            "45": 43285.0,
+            "46": 43915.0,
+            "47": 42370.0,
+            "48": 44704.0,
+            "49": 43164.0,
+            "50": 43365.0,
+            "51": 41167.0,
+            "52": 43825.0,
+            "53": 43945.0,
+            "54": 41947.0,
+            "55": 43853.0,
+            "56": 43268.0,
+            "57": 42591.0,
+            "58": 43843.0,
+            "59": 44625.0,
+            "60": 41218.0,
+            "61": 39714.0,
+            "62": 44779.0,
+            "63": 44716.0,
+            "64": 45359.0,
+            "65": 44684.0,
+            "66": 45355.0,
+            "67": 43146.0,
+            "68": 42519.0,
+            "69": 43835.0,
+            "70": 45522.0,
+            "71": 43316.0,
+            "72": 44767.0,
+            "73": 45365.0,
+            "74": 42449.0,
+            "75": 44695.0,
+            "76": 43885.0,
+            "77": 42092.0,
+            "78": 40278.0,
+            "79": 38915.0,
+            "80": 41096.0,
+            "81": 45372.0,
+            "82": 43206.0,
+            "83": 38481.0,
+            "84": 42474.0,
+            "85": 43990.0,
+            "86": 45729.0,
+            "87": 40884.0,
+            "88": 41772.0,
+            "89": 41076.0,
+            "90": 44676.0,
+            "91": 46159.0,
+            "92": 41790.0,
+            "93": 43242.0,
+            "94": 39566.0,
+            "95": 44077.0,
+            "96": 44741.0,
+            "97": 45379.0,
+            "98": 41802.0,
+            "99": 45441.0,
+            "100": 42530.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2171550208.0,
+            "2": 2171550208.0,
+            "3": 2171550208.0,
+            "4": 2171550208.0,
+            "5": 2171550208.0,
+            "6": 2171550208.0,
+            "7": 2171550208.0,
+            "8": 2171550208.0,
+            "9": 2171550208.0,
+            "10": 2171550208.0,
+            "11": 2171550208.0,
+            "12": 2171550208.0,
+            "13": 2171550208.0,
+            "14": 2171550208.0,
+            "15": 2171550208.0,
+            "16": 2171550208.0,
+            "17": 2171550208.0,
+            "18": 2171550208.0,
+            "19": 2171550208.0,
+            "20": 2171550208.0,
+            "21": 2171550208.0,
+            "22": 2171550208.0,
+            "23": 2171550208.0,
+            "24": 2171550208.0,
+            "25": 2171550208.0,
+            "26": 2171550208.0,
+            "27": 2171550208.0,
+            "28": 2171550208.0,
+            "29": 2171550208.0,
+            "30": 2171550208.0,
+            "31": 2171550208.0,
+            "32": 2171550208.0,
+            "33": 2171550208.0,
+            "34": 2171550208.0,
+            "35": 2171550208.0,
+            "36": 2171550208.0,
+            "37": 2171550208.0,
+            "38": 2171550208.0,
+            "39": 2171550208.0,
+            "40": 2171550208.0,
+            "41": 2171550208.0,
+            "42": 2171550208.0,
+            "43": 2171550208.0,
+            "44": 2171550208.0,
+            "45": 2171550208.0,
+            "46": 2171550208.0,
+            "47": 2171550208.0,
+            "48": 2171550208.0,
+            "49": 2171550208.0,
+            "50": 2171550208.0,
+            "51": 2171550208.0,
+            "52": 2171550208.0,
+            "53": 2171550208.0,
+            "54": 2171550208.0,
+            "55": 2171550208.0,
+            "56": 2171550208.0,
+            "57": 2171550208.0,
+            "58": 2171550208.0,
+            "59": 2171550208.0,
+            "60": 2171550208.0,
+            "61": 2171550208.0,
+            "62": 2171550208.0,
+            "63": 2171550208.0,
+            "64": 2171550208.0,
+            "65": 2171550208.0,
+            "66": 2171550208.0,
+            "67": 2171550208.0,
+            "68": 2171550208.0,
+            "69": 2171550208.0,
+            "70": 2171550208.0,
+            "71": 2171550208.0,
+            "72": 2171550208.0,
+            "73": 2171550208.0,
+            "74": 2171550208.0,
+            "75": 2171550208.0,
+            "76": 2171550208.0,
+            "77": 2171550208.0,
+            "78": 2171550208.0,
+            "79": 2171550208.0,
+            "80": 2171550208.0,
+            "81": 2171550208.0,
+            "82": 2171550208.0,
+            "83": 2171550208.0,
+            "84": 2171550208.0,
+            "85": 2171550208.0,
+            "86": 2171550208.0,
+            "87": 2171550208.0,
+            "88": 2171550208.0,
+            "89": 2171550208.0,
+            "90": 2171550208.0,
+            "91": 2171550208.0,
+            "92": 2171550208.0,
+            "93": 2171550208.0,
+            "94": 2171550208.0,
+            "95": 2171550208.0,
+            "96": 2171550208.0,
+            "97": 2171550208.0,
+            "98": 2171550208.0,
+            "99": 2171550208.0,
+            "100": 2171550208.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 2407642624.0,
+            "2": 3336458752.0,
+            "3": 3336458752.0,
+            "4": 3336458752.0,
+            "5": 3336458752.0,
+            "6": 3336458752.0,
+            "7": 3336458752.0,
+            "8": 3336458752.0,
+            "9": 3336458752.0,
+            "10": 3336458752.0,
+            "11": 3336458752.0,
+            "12": 3336458752.0,
+            "13": 3336458752.0,
+            "14": 3336458752.0,
+            "15": 3336458752.0,
+            "16": 3336458752.0,
+            "17": 3336458752.0,
+            "18": 3336458752.0,
+            "19": 3336458752.0,
+            "20": 3336458752.0,
+            "21": 3336458752.0,
+            "22": 3336458752.0,
+            "23": 3336458752.0,
+            "24": 3336458752.0,
+            "25": 3336458752.0,
+            "26": 3336458752.0,
+            "27": 3336458752.0,
+            "28": 3336458752.0,
+            "29": 3336458752.0,
+            "30": 3336458752.0,
+            "31": 3336458752.0,
+            "32": 3336458752.0,
+            "33": 3336458752.0,
+            "34": 3336458752.0,
+            "35": 3336458752.0,
+            "36": 3336458752.0,
+            "37": 3336458752.0,
+            "38": 3336458752.0,
+            "39": 3336458752.0,
+            "40": 3336458752.0,
+            "41": 3336458752.0,
+            "42": 3336458752.0,
+            "43": 3336458752.0,
+            "44": 3336458752.0,
+            "45": 3336458752.0,
+            "46": 3336458752.0,
+            "47": 3336458752.0,
+            "48": 3336458752.0,
+            "49": 3336458752.0,
+            "50": 3336458752.0,
+            "51": 3336458752.0,
+            "52": 3336458752.0,
+            "53": 3336458752.0,
+            "54": 3336458752.0,
+            "55": 3336458752.0,
+            "56": 3336458752.0,
+            "57": 3336458752.0,
+            "58": 3336458752.0,
+            "59": 3336458752.0,
+            "60": 3336458752.0,
+            "61": 3336458752.0,
+            "62": 3336458752.0,
+            "63": 3336458752.0,
+            "64": 3336458752.0,
+            "65": 3336458752.0,
+            "66": 3336458752.0,
+            "67": 3336458752.0,
+            "68": 3336458752.0,
+            "69": 3336458752.0,
+            "70": 3336458752.0,
+            "71": 3336458752.0,
+            "72": 3336458752.0,
+            "73": 3336458752.0,
+            "74": 3336458752.0,
+            "75": 3336458752.0,
+            "76": 3336458752.0,
+            "77": 3336458752.0,
+            "78": 3336458752.0,
+            "79": 3336458752.0,
+            "80": 3336458752.0,
+            "81": 3336458752.0,
+            "82": 3336458752.0,
+            "83": 3336458752.0,
+            "84": 3336458752.0,
+            "85": 3336458752.0,
+            "86": 3336458752.0,
+            "87": 3336458752.0,
+            "88": 3336458752.0,
+            "89": 3336458752.0,
+            "90": 3336458752.0,
+            "91": 3336458752.0,
+            "92": 3336458752.0,
+            "93": 3336458752.0,
+            "94": 3336458752.0,
+            "95": 3336458752.0,
+            "96": 3336458752.0,
+            "97": 3336458752.0,
+            "98": 3336458752.0,
+            "99": 3336458752.0,
+            "100": 3336458752.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 18.62611,
+            "2": 0.61916,
+            "3": 0.39111,
+            "4": 0.37734,
+            "5": 0.37747,
+            "6": 0.37685,
+            "7": 0.37677,
+            "8": 0.37728,
+            "9": 0.37655,
+            "10": 0.37691,
+            "11": 0.37785,
+            "12": 0.37904,
+            "13": 0.37764,
+            "14": 0.37699,
+            "15": 0.37715,
+            "16": 0.38824,
+            "17": 0.38806,
+            "18": 0.38018,
+            "19": 0.38683,
+            "20": 0.9055,
+            "21": 0.38303,
+            "22": 0.3989,
+            "23": 0.38148,
+            "24": 0.37842,
+            "25": 0.3783,
+            "26": 0.37826,
+            "27": 0.37811,
+            "28": 0.38399,
+            "29": 0.38106,
+            "30": 0.38545,
+            "31": 0.38376,
+            "32": 0.37822,
+            "33": 0.37908,
+            "34": 0.37752,
+            "35": 0.37707,
+            "36": 0.37805,
+            "37": 0.37768,
+            "38": 0.37787,
+            "39": 0.37768,
+            "40": 0.37772,
+            "41": 0.37854,
+            "42": 0.37822,
+            "43": 0.3784,
+            "44": 0.37704,
+            "45": 0.37698,
+            "46": 0.37731,
+            "47": 0.37806,
+            "48": 0.37732,
+            "49": 0.37787,
+            "50": 0.96201,
+            "51": 0.37939,
+            "52": 0.3783,
+            "53": 0.37741,
+            "54": 0.37713,
+            "55": 0.37693,
+            "56": 0.37705,
+            "57": 0.37763,
+            "58": 0.37733,
+            "59": 0.37723,
+            "60": 0.37677,
+            "61": 0.37741,
+            "62": 0.37846,
+            "63": 0.37789,
+            "64": 0.37762,
+            "65": 0.37726,
+            "66": 0.82486,
+            "67": 0.37916,
+            "68": 0.81188,
+            "69": 0.37737,
+            "70": 0.37671,
+            "71": 0.37812,
+            "72": 0.3783,
+            "73": 0.37834,
+            "74": 0.37781,
+            "75": 0.37676,
+            "76": 0.37767,
+            "77": 0.37767,
+            "78": 0.37779,
+            "79": 0.37804,
+            "80": 0.38597,
+            "81": 0.37771,
+            "82": 0.37768,
+            "83": 0.37796,
+            "84": 0.3771,
+            "85": 0.38399,
+            "86": 0.38623,
+            "87": 0.37928,
+            "88": 0.3908,
+            "89": 0.38126,
+            "90": 0.38257,
+            "91": 0.37842,
+            "92": 0.37962,
+            "93": 0.38289,
+            "94": 0.37797,
+            "95": 0.37837,
+            "96": 0.37748,
+            "97": 0.37811,
+            "98": 0.38381,
+            "99": 0.37833,
+            "100": 0.37842
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_mcore_tp2_pp1_vp1/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp2_pp1_vp1/golden_values_dev_dgx_h100.json
index 81670d237ce..642719d609f 100644
--- a/tests/functional_tests/test_cases/t5/t5_mcore_tp2_pp1_vp1/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/t5/t5_mcore_tp2_pp1_vp1/golden_values_dev_dgx_h100.json
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 9.16897,
-            "2": 0.35143,
-            "3": 0.28496,
-            "4": 0.28172,
-            "5": 0.28308,
-            "6": 0.2855,
-            "7": 0.28287,
-            "8": 0.28079,
-            "9": 0.2809,
-            "10": 0.28329,
-            "11": 0.28038,
-            "12": 0.28371,
-            "13": 0.28032,
-            "14": 0.28362,
-            "15": 0.28125,
-            "16": 0.28046,
-            "17": 0.28421,
-            "18": 0.28132,
-            "19": 0.2808,
-            "20": 0.28432,
-            "21": 0.28578,
-            "22": 0.28205,
-            "23": 0.28411,
-            "24": 0.28378,
-            "25": 0.28227,
-            "26": 0.28231,
-            "27": 0.28353,
-            "28": 0.28497,
-            "29": 0.29981,
-            "30": 0.28557,
-            "31": 0.28777,
-            "32": 0.28808,
-            "33": 0.28609,
-            "34": 0.32585,
-            "35": 0.341,
-            "36": 0.2886,
-            "37": 0.28157,
-            "38": 0.2916,
-            "39": 0.28501,
-            "40": 0.27952,
-            "41": 0.27767,
-            "42": 0.28062,
-            "43": 0.28781,
-            "44": 0.2839,
-            "45": 0.282,
-            "46": 0.27837,
-            "47": 0.27883,
-            "48": 0.27865,
-            "49": 0.28179,
-            "50": 0.27881,
-            "51": 0.27669,
-            "52": 0.28063,
-            "53": 0.27909,
-            "54": 0.27716,
-            "55": 0.27807,
-            "56": 0.2785,
-            "57": 0.27679,
-            "58": 0.28004,
-            "59": 0.27659,
-            "60": 0.27984,
-            "61": 0.2771,
-            "62": 0.27714,
-            "63": 0.2802,
-            "64": 0.2918,
-            "65": 0.27948,
-            "66": 0.27839,
-            "67": 0.28573,
-            "68": 0.27933,
-            "69": 0.27893,
-            "70": 0.27964,
-            "71": 0.2767,
-            "72": 0.27816,
-            "73": 0.28004,
-            "74": 0.27997,
-            "75": 0.28095,
-            "76": 0.27752,
-            "77": 0.27912,
-            "78": 0.28068,
-            "79": 0.27992,
-            "80": 0.28771,
-            "81": 0.28046,
-            "82": 0.28352,
-            "83": 0.28376,
-            "84": 0.28337,
-            "85": 0.28197,
-            "86": 0.27949,
-            "87": 0.27909,
-            "88": 0.28479,
-            "89": 0.28248,
-            "90": 0.27742,
-            "91": 0.27819,
-            "92": 0.2809,
-            "93": 0.28123,
-            "94": 0.27933,
-            "95": 0.28364,
-            "96": 0.28523,
-            "97": 0.28365,
-            "98": 0.27822,
-            "99": 0.28382,
-            "100": 0.28917
+            "1": 25.71894,
+            "2": 0.34844,
+            "3": 0.27498,
+            "4": 0.26037,
+            "5": 0.26158,
+            "6": 0.26112,
+            "7": 0.25983,
+            "8": 0.26046,
+            "9": 0.26084,
+            "10": 0.2682,
+            "11": 0.26401,
+            "12": 0.26721,
+            "13": 0.26076,
+            "14": 0.26222,
+            "15": 0.2543,
+            "16": 0.26175,
+            "17": 0.31454,
+            "18": 0.47931,
+            "19": 0.26259,
+            "20": 0.69917,
+            "21": 0.26316,
+            "22": 0.26474,
+            "23": 0.26088,
+            "24": 0.25816,
+            "25": 0.25832,
+            "26": 0.25678,
+            "27": 0.25785,
+            "28": 0.25895,
+            "29": 0.25888,
+            "30": 0.25913,
+            "31": 0.26035,
+            "32": 0.26324,
+            "33": 0.26028,
+            "34": 0.25857,
+            "35": 0.25864,
+            "36": 0.26043,
+            "37": 0.25816,
+            "38": 0.25979,
+            "39": 0.25847,
+            "40": 0.25813,
+            "41": 0.25846,
+            "42": 0.25664,
+            "43": 0.25705,
+            "44": 0.26337,
+            "45": 0.26143,
+            "46": 0.26024,
+            "47": 0.2583,
+            "48": 0.2592,
+            "49": 0.26051,
+            "50": 0.79372,
+            "51": 0.26784,
+            "52": 0.25688,
+            "53": 0.25931,
+            "54": 0.25883,
+            "55": 0.25833,
+            "56": 0.25645,
+            "57": 0.25691,
+            "58": 0.26093,
+            "59": 0.26089,
+            "60": 0.25935,
+            "61": 0.25786,
+            "62": 0.25771,
+            "63": 0.26223,
+            "64": 0.26036,
+            "65": 0.25957,
+            "66": 0.74086,
+            "67": 0.25826,
+            "68": 0.25657,
+            "69": 0.25496,
+            "70": 0.25447,
+            "71": 0.2713,
+            "72": 0.25135,
+            "73": 0.25078,
+            "74": 0.26569,
+            "75": 0.26382,
+            "76": 0.2633,
+            "77": 0.26309,
+            "78": 0.26574,
+            "79": 0.26362,
+            "80": 0.3128,
+            "81": 0.26022,
+            "82": 0.26605,
+            "83": 0.26244,
+            "84": 0.26413,
+            "85": 0.2656,
+            "86": 0.26904,
+            "87": 0.26661,
+            "88": 0.26377,
+            "89": 0.2667,
+            "90": 0.26433,
+            "91": 0.26317,
+            "92": 0.26411,
+            "93": 0.26798,
+            "94": 0.25821,
+            "95": 0.26018,
+            "96": 0.29437,
+            "97": 0.26414,
+            "98": 0.26347,
+            "99": 0.26108,
+            "100": 0.25931
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1/golden_values_dev_dgx_h100.json
index 2e0ee7ee230..0b23b1bfecd 100644
--- a/tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1/golden_values_dev_dgx_h100.json
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 9.3446,
-            "2": 0.55186,
-            "3": 0.52074,
-            "4": 0.52226,
-            "5": 0.51961,
-            "6": 0.52672,
-            "7": 0.52451,
-            "8": 0.52369,
-            "9": 0.54507,
-            "10": 0.53931,
-            "11": 0.55505,
-            "12": 0.52851,
-            "13": 0.51692,
-            "14": 0.52026,
-            "15": 0.51979,
-            "16": 0.53317,
-            "17": 0.52489,
-            "18": 0.59625,
-            "19": 0.52238,
-            "20": 0.53197,
-            "21": 0.52211,
-            "22": 0.51979,
-            "23": 0.52551,
-            "24": 0.52413,
-            "25": 0.52676,
-            "26": 0.5192,
-            "27": 0.52336,
-            "28": 0.53671,
-            "29": 0.53561,
-            "30": 0.51609,
-            "31": 0.55983,
-            "32": 0.5166,
-            "33": 0.53721,
-            "34": 0.52158,
-            "35": 0.53727,
-            "36": 0.5279,
-            "37": 0.51655,
-            "38": 0.51986,
-            "39": 0.5223,
-            "40": 0.52388,
-            "41": 0.52083,
-            "42": 0.52801,
-            "43": 0.52136,
-            "44": 0.52414,
-            "45": 0.52048,
-            "46": 0.53415,
-            "47": 0.54831,
-            "48": 0.58827,
-            "49": 0.55044,
-            "50": 0.52682,
-            "51": 0.52339,
-            "52": 0.51726,
-            "53": 0.518,
-            "54": 0.51935,
-            "55": 0.52073,
-            "56": 0.52732,
-            "57": 0.51867,
-            "58": 0.51876,
-            "59": 0.5213,
-            "60": 0.51779,
-            "61": 0.52225,
-            "62": 0.52041,
-            "63": 0.51793,
-            "64": 0.5135,
-            "65": 0.51913,
-            "66": 0.86034,
-            "67": 0.51468,
-            "68": 0.90156,
-            "69": 0.51931,
-            "70": 0.53602,
-            "71": 0.51818,
-            "72": 0.51744,
-            "73": 0.54454,
-            "74": 0.51831,
-            "75": 0.521,
-            "76": 0.52894,
-            "77": 0.53227,
-            "78": 0.51806,
-            "79": 0.51818,
-            "80": 0.51632,
-            "81": 0.51704,
-            "82": 0.51542,
-            "83": 0.51861,
-            "84": 0.53204,
-            "85": 0.52011,
-            "86": 0.53043,
-            "87": 0.94359,
-            "88": 0.51776,
-            "89": 0.51799,
-            "90": 0.51773,
-            "91": 0.51828,
-            "92": 0.52318,
-            "93": 0.51688,
-            "94": 0.51939,
-            "95": 0.51554,
-            "96": 0.9,
-            "97": 0.96079,
-            "98": 0.52856,
-            "99": 0.51996,
-            "100": 0.52921
+            "1": 25.3049,
+            "2": 0.96867,
+            "3": 0.50973,
+            "4": 0.4916,
+            "5": 0.48837,
+            "6": 0.48697,
+            "7": 0.48553,
+            "8": 0.48392,
+            "9": 0.50312,
+            "10": 0.50926,
+            "11": 0.49703,
+            "12": 0.50337,
+            "13": 0.4965,
+            "14": 0.49332,
+            "15": 0.49456,
+            "16": 0.49141,
+            "17": 0.49486,
+            "18": 0.49094,
+            "19": 0.49816,
+            "20": 0.49526,
+            "21": 0.4944,
+            "22": 0.49451,
+            "23": 0.89375,
+            "24": 1.14231,
+            "25": 0.49653,
+            "26": 0.49556,
+            "27": 0.49346,
+            "28": 0.49649,
+            "29": 0.49046,
+            "30": 0.49275,
+            "31": 0.49217,
+            "32": 0.492,
+            "33": 0.49189,
+            "34": 0.49161,
+            "35": 0.48929,
+            "36": 0.50013,
+            "37": 0.49187,
+            "38": 0.49624,
+            "39": 0.49444,
+            "40": 0.4924,
+            "41": 0.49691,
+            "42": 0.49262,
+            "43": 0.4991,
+            "44": 0.48077,
+            "45": 0.47788,
+            "46": 0.48199,
+            "47": 0.49826,
+            "48": 0.49278,
+            "49": 0.48988,
+            "50": 0.48958,
+            "51": 0.49301,
+            "52": 0.48885,
+            "53": 0.48896,
+            "54": 0.49306,
+            "55": 0.49203,
+            "56": 0.49425,
+            "57": 0.49088,
+            "58": 0.48671,
+            "59": 0.48576,
+            "60": 0.49276,
+            "61": 0.4913,
+            "62": 0.48886,
+            "63": 0.49215,
+            "64": 0.49049,
+            "65": 0.4937,
+            "66": 0.49731,
+            "67": 0.48964,
+            "68": 0.49368,
+            "69": 0.47854,
+            "70": 0.47863,
+            "71": 0.48038,
+            "72": 0.47911,
+            "73": 0.48181,
+            "74": 0.49298,
+            "75": 0.49322,
+            "76": 0.48959,
+            "77": 0.48669,
+            "78": 0.47649,
+            "79": 0.48313,
+            "80": 0.47614,
+            "81": 0.47749,
+            "82": 0.47372,
+            "83": 0.48543,
+            "84": 0.47903,
+            "85": 0.47638,
+            "86": 0.47539,
+            "87": 0.47854,
+            "88": 0.47715,
+            "89": 0.47616,
+            "90": 0.47457,
+            "91": 0.4771,
+            "92": 0.4792,
+            "93": 0.47493,
+            "94": 0.47522,
+            "95": 0.47459,
+            "96": 0.474,
+            "97": 0.48537,
+            "98": 0.47982,
+            "99": 0.47495,
+            "100": 0.47321
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_h100.json
index b9a799c779f..e4524b5427a 100644
--- a/tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_h100.json
@@ -432,106 +432,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 9.38956,
-            "2": 0.54892,
-            "3": 0.53756,
-            "4": 0.52845,
-            "5": 0.52687,
-            "6": 0.51818,
-            "7": 0.52819,
-            "8": 0.52051,
-            "9": 0.52526,
-            "10": 0.52865,
-            "11": 0.52834,
-            "12": 0.52573,
-            "13": 0.52783,
-            "14": 0.52938,
-            "15": 0.51899,
-            "16": 0.53517,
-            "17": 0.52289,
-            "18": 0.5363,
-            "19": 0.5954,
-            "20": 0.55838,
-            "21": 0.52166,
-            "22": 0.54146,
-            "23": 0.53649,
-            "24": 0.52785,
-            "25": 0.52349,
-            "26": 0.52481,
-            "27": 0.52376,
-            "28": 0.52226,
-            "29": 0.5291,
-            "30": 0.52613,
-            "31": 0.52719,
-            "32": 0.52341,
-            "33": 0.52646,
-            "34": 0.52272,
-            "35": 0.53016,
-            "36": 0.51941,
-            "37": 0.52643,
-            "38": 0.51914,
-            "39": 0.53109,
-            "40": 0.52353,
-            "41": 0.55102,
-            "42": 0.52656,
-            "43": 0.53223,
-            "44": 0.53438,
-            "45": 0.53126,
-            "46": 0.53776,
-            "47": 0.52511,
-            "48": 0.53521,
-            "49": 0.52743,
-            "50": 0.52883,
-            "51": 0.54078,
-            "52": 0.52088,
-            "53": 0.53221,
-            "54": 0.52473,
-            "55": 0.54396,
-            "56": 0.52771,
-            "57": 0.52699,
-            "58": 0.53079,
-            "59": 0.52445,
-            "60": 0.53037,
-            "61": 0.52164,
-            "62": 0.532,
-            "63": 0.52392,
-            "64": 0.53062,
-            "65": 0.52269,
-            "66": 0.53306,
-            "67": 0.5173,
-            "68": 0.54063,
-            "69": 0.52464,
-            "70": 0.92233,
-            "71": 0.53301,
-            "72": 0.52584,
-            "73": 0.55029,
-            "74": 0.54931,
-            "75": 0.54907,
-            "76": 0.53191,
-            "77": 0.53522,
-            "78": 0.53487,
-            "79": 0.52543,
-            "80": 0.53474,
-            "81": 0.52635,
-            "82": 0.54801,
-            "83": 0.52605,
-            "84": 0.53393,
-            "85": 0.52523,
-            "86": 0.53947,
-            "87": 0.52933,
-            "88": 0.53447,
-            "89": 0.53,
-            "90": 0.5287,
-            "91": 0.53326,
-            "92": 0.54604,
-            "93": 0.53649,
-            "94": 0.5297,
-            "95": 0.54163,
-            "96": 0.52549,
-            "97": 0.53256,
-            "98": 0.53104,
-            "99": 0.54062,
-            "100": 0.52332
+            "1": 25.29495,
+            "2": 0.59083,
+            "3": 0.51228,
+            "4": 0.86881,
+            "5": 0.4917,
+            "6": 0.49302,
+            "7": 0.49226,
+            "8": 0.49005,
+            "9": 0.56319,
+            "10": 0.66651,
+            "11": 0.48986,
+            "12": 0.48642,
+            "13": 0.48195,
+            "14": 0.48561,
+            "15": 0.48592,
+            "16": 0.49064,
+            "17": 0.48536,
+            "18": 0.483,
+            "19": 0.48082,
+            "20": 0.48238,
+            "21": 0.50394,
+            "22": 0.8666,
+            "23": 1.49846,
+            "24": 0.48279,
+            "25": 0.48011,
+            "26": 0.48147,
+            "27": 0.4828,
+            "28": 0.47915,
+            "29": 0.49097,
+            "30": 0.48131,
+            "31": 0.48075,
+            "32": 0.47908,
+            "33": 0.47968,
+            "34": 0.48222,
+            "35": 0.48057,
+            "36": 0.47723,
+            "37": 0.48,
+            "38": 0.48269,
+            "39": 0.47837,
+            "40": 0.48188,
+            "41": 0.47999,
+            "42": 0.4825,
+            "43": 0.49017,
+            "44": 0.48176,
+            "45": 0.48251,
+            "46": 0.47977,
+            "47": 0.48156,
+            "48": 0.48108,
+            "49": 0.48014,
+            "50": 0.47676,
+            "51": 0.49017,
+            "52": 0.481,
+            "53": 0.47836,
+            "54": 0.47545,
+            "55": 0.47796,
+            "56": 0.47606,
+            "57": 0.47601,
+            "58": 0.47957,
+            "59": 0.47812,
+            "60": 0.47515,
+            "61": 0.47947,
+            "62": 0.47591,
+            "63": 0.47577,
+            "64": 0.47566,
+            "65": 0.4769,
+            "66": 0.47889,
+            "67": 0.47584,
+            "68": 0.47578,
+            "69": 0.47401,
+            "70": 0.4759,
+            "71": 0.47514,
+            "72": 0.4742,
+            "73": 0.47824,
+            "74": 0.47726,
+            "75": 0.48289,
+            "76": 0.48194,
+            "77": 0.48719,
+            "78": 0.49039,
+            "79": 0.4775,
+            "80": 0.48402,
+            "81": 0.48084,
+            "82": 0.47553,
+            "83": 0.48122,
+            "84": 0.47896,
+            "85": 0.4766,
+            "86": 0.47712,
+            "87": 0.47753,
+            "88": 0.47535,
+            "89": 0.4749,
+            "90": 0.4776,
+            "91": 0.47619,
+            "92": 0.47613,
+            "93": 0.47698,
+            "94": 0.47658,
+            "95": 0.47543,
+            "96": 0.47852,
+            "97": 0.47566,
+            "98": 0.47444,
+            "99": 0.47759,
+            "100": 0.47631
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_h100_2nd.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_h100_2nd.json
new file mode 100644
index 00000000000..a890b5a0f5d
--- /dev/null
+++ b/tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_h100_2nd.json
@@ -0,0 +1,537 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 7.36469,
+            "52": 7.12044,
+            "53": 7.09167,
+            "54": 7.22712,
+            "55": 7.13495,
+            "56": 7.20751,
+            "57": 7.31287,
+            "58": 6.99063,
+            "59": 7.09849,
+            "60": 7.12665,
+            "61": 7.10047,
+            "62": 7.23974,
+            "63": 7.14358,
+            "64": 7.06717,
+            "65": 6.98408,
+            "66": 7.03692,
+            "67": 7.02875,
+            "68": 7.12914,
+            "69": 7.01425,
+            "70": 7.04954,
+            "71": 6.89312,
+            "72": 6.98513,
+            "73": 6.96734,
+            "74": 6.90236,
+            "75": 7.05611,
+            "76": 6.95986,
+            "77": 7.06862,
+            "78": 7.0204,
+            "79": 6.8505,
+            "80": 6.92019,
+            "81": 6.95982,
+            "82": 7.04575,
+            "83": 6.98617,
+            "84": 6.99991,
+            "85": 6.83511,
+            "86": 7.04087,
+            "87": 6.96604,
+            "88": 6.90125,
+            "89": 6.80345,
+            "90": 7.22384,
+            "91": 6.70505,
+            "92": 7.03979,
+            "93": 6.8857,
+            "94": 7.04044,
+            "95": 6.84746,
+            "96": 6.9546,
+            "97": 6.94425,
+            "98": 6.86865,
+            "99": 6.9948,
+            "100": 6.96761
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 41133.0,
+            "52": 43849.0,
+            "53": 43899.0,
+            "54": 41704.0,
+            "55": 43863.0,
+            "56": 43205.0,
+            "57": 42636.0,
+            "58": 43835.0,
+            "59": 44623.0,
+            "60": 41226.0,
+            "61": 39705.0,
+            "62": 44732.0,
+            "63": 44659.0,
+            "64": 45371.0,
+            "65": 44682.0,
+            "66": 45341.0,
+            "67": 43169.0,
+            "68": 42486.0,
+            "69": 43829.0,
+            "70": 45529.0,
+            "71": 43294.0,
+            "72": 44745.0,
+            "73": 45364.0,
+            "74": 42463.0,
+            "75": 44679.0,
+            "76": 43882.0,
+            "77": 42042.0,
+            "78": 40356.0,
+            "79": 38928.0,
+            "80": 41079.0,
+            "81": 45349.0,
+            "82": 43226.0,
+            "83": 38474.0,
+            "84": 42415.0,
+            "85": 43989.0,
+            "86": 45673.0,
+            "87": 40850.0,
+            "88": 41756.0,
+            "89": 41065.0,
+            "90": 44686.0,
+            "91": 46135.0,
+            "92": 41609.0,
+            "93": 43267.0,
+            "94": 39525.0,
+            "95": 43921.0,
+            "96": 44683.0,
+            "97": 45412.0,
+            "98": 41832.0,
+            "99": 45416.0,
+            "100": 42457.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 1105773056.0,
+            "52": 1105773056.0,
+            "53": 1105773056.0,
+            "54": 1105773056.0,
+            "55": 1105773056.0,
+            "56": 1105773056.0,
+            "57": 1105773056.0,
+            "58": 1105773056.0,
+            "59": 1105773056.0,
+            "60": 1105773056.0,
+            "61": 1105773056.0,
+            "62": 1105773056.0,
+            "63": 1105773056.0,
+            "64": 1105773056.0,
+            "65": 1105773056.0,
+            "66": 1105773056.0,
+            "67": 1105773056.0,
+            "68": 1105773056.0,
+            "69": 1105773056.0,
+            "70": 1105773056.0,
+            "71": 1105773056.0,
+            "72": 1105773056.0,
+            "73": 1105773056.0,
+            "74": 1105773056.0,
+            "75": 1105773056.0,
+            "76": 1105773056.0,
+            "77": 1105773056.0,
+            "78": 1105773056.0,
+            "79": 1105773056.0,
+            "80": 1105773056.0,
+            "81": 1105773056.0,
+            "82": 1105773056.0,
+            "83": 1105773056.0,
+            "84": 1105773056.0,
+            "85": 1105773056.0,
+            "86": 1105773056.0,
+            "87": 1105773056.0,
+            "88": 1105773056.0,
+            "89": 1105773056.0,
+            "90": 1105773056.0,
+            "91": 1105773056.0,
+            "92": 1105773056.0,
+            "93": 1105773056.0,
+            "94": 1105773056.0,
+            "95": 1105773056.0,
+            "96": 1105773056.0,
+            "97": 1105773056.0,
+            "98": 1105773056.0,
+            "99": 1105773056.0,
+            "100": 1105773056.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 1823922688.0,
+            "52": 1823923712.0,
+            "53": 1823923712.0,
+            "54": 1823923712.0,
+            "55": 1823923712.0,
+            "56": 1823923712.0,
+            "57": 1823923712.0,
+            "58": 1823923712.0,
+            "59": 1823923712.0,
+            "60": 1823923712.0,
+            "61": 1823923712.0,
+            "62": 1823923712.0,
+            "63": 1823923712.0,
+            "64": 1823923712.0,
+            "65": 1823923712.0,
+            "66": 1823923712.0,
+            "67": 1823923712.0,
+            "68": 1823923712.0,
+            "69": 1823923712.0,
+            "70": 1823923712.0,
+            "71": 1823923712.0,
+            "72": 1823923712.0,
+            "73": 1823923712.0,
+            "74": 1823923712.0,
+            "75": 1823923712.0,
+            "76": 1823923712.0,
+            "77": 1823923712.0,
+            "78": 1823923712.0,
+            "79": 1823923712.0,
+            "80": 1823923712.0,
+            "81": 1823923712.0,
+            "82": 1823923712.0,
+            "83": 1823923712.0,
+            "84": 1823923712.0,
+            "85": 1823923712.0,
+            "86": 1823923712.0,
+            "87": 1823923712.0,
+            "88": 1823923712.0,
+            "89": 1823923712.0,
+            "90": 1823923712.0,
+            "91": 1823923712.0,
+            "92": 1823923712.0,
+            "93": 1823923712.0,
+            "94": 1823923712.0,
+            "95": 1823923712.0,
+            "96": 1823923712.0,
+            "97": 1823923712.0,
+            "98": 1823923712.0,
+            "99": 1823923712.0,
+            "100": 1823923712.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": "nan",
+            "3": "nan",
+            "4": "nan",
+            "5": "nan",
+            "6": "nan",
+            "7": "nan",
+            "8": "nan",
+            "9": "nan",
+            "10": "nan",
+            "11": "nan",
+            "12": "nan",
+            "13": "nan",
+            "14": "nan",
+            "15": "nan",
+            "16": "nan",
+            "17": "nan",
+            "18": "nan",
+            "19": "nan",
+            "20": "nan",
+            "21": "nan",
+            "22": "nan",
+            "23": "nan",
+            "24": "nan",
+            "25": "nan",
+            "26": "nan",
+            "27": "nan",
+            "28": "nan",
+            "29": "nan",
+            "30": "nan",
+            "31": "nan",
+            "32": "nan",
+            "33": "nan",
+            "34": "nan",
+            "35": "nan",
+            "36": "nan",
+            "37": "nan",
+            "38": "nan",
+            "39": "nan",
+            "40": "nan",
+            "41": "nan",
+            "42": "nan",
+            "43": "nan",
+            "44": "nan",
+            "45": "nan",
+            "46": "nan",
+            "47": "nan",
+            "48": "nan",
+            "49": "nan",
+            "50": "nan",
+            "51": 23.79487,
+            "52": 0.54117,
+            "53": 0.50294,
+            "54": 0.49381,
+            "55": 0.49765,
+            "56": 0.49437,
+            "57": 0.48794,
+            "58": 0.4927,
+            "59": 0.492,
+            "60": 0.50378,
+            "61": 0.49484,
+            "62": 0.49441,
+            "63": 0.49721,
+            "64": 0.49973,
+            "65": 0.49641,
+            "66": 0.49959,
+            "67": 0.49735,
+            "68": 0.49554,
+            "69": 0.4954,
+            "70": 0.49556,
+            "71": 0.49515,
+            "72": 0.49547,
+            "73": 0.49564,
+            "74": 0.50072,
+            "75": 0.50384,
+            "76": 0.50256,
+            "77": 0.49599,
+            "78": 0.49854,
+            "79": 0.49618,
+            "80": 0.5065,
+            "81": 0.50877,
+            "82": 0.49521,
+            "83": 0.51145,
+            "84": 0.49943,
+            "85": 0.49798,
+            "86": 0.49691,
+            "87": 0.49859,
+            "88": 0.50159,
+            "89": 0.49713,
+            "90": 0.49297,
+            "91": 0.49503,
+            "92": 0.49824,
+            "93": 0.49313,
+            "94": 0.4893,
+            "95": 0.48841,
+            "96": 0.49,
+            "97": 0.48974,
+            "98": 0.4896,
+            "99": 0.49265,
+            "100": 0.49225
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/test_utils/python_scripts/auto_reminder_github.py b/tests/test_utils/python_scripts/auto_reminder_github.py
index 7484244b717..94e0de1ddaa 100644
--- a/tests/test_utils/python_scripts/auto_reminder_github.py
+++ b/tests/test_utils/python_scripts/auto_reminder_github.py
@@ -6,6 +6,7 @@
 Usage: GH_TOKEN=ghp_... SLACK_TOKEN=xoxb-... SLACK_WEBHOOK_URL=https://... REPO=NVIDIA/Megatron-LM python github_pr_reminder.py
 """
 
+import html
 import logging
 import os
 import sys
@@ -231,10 +232,11 @@ def create_reminder(self, pr):
         stage_days = self.days_since(self.get_label_date(pr, stage))
         author_email = self.get_user_email(pr.user.login)
         reviewer_emails, action_message = self.get_reviewers(pr)
+        escaped_title = html.escape(pr.title, quote=False)
 
         return Reminder(
             id=pr.number,
-            pr=f"<{pr.html_url}|#{pr.number} - {pr.title}>",
+            pr=f"<{pr.html_url}|#{pr.number} - {escaped_title}>",
             milestone=pr.milestone.title if pr.milestone else "No Milestone",
             author=self.get_slack_user_id(author_email),
             priority="P0" if stage_days > 3 else "P1" if stage_days >= 1 else "P2",
diff --git a/tests/test_utils/python_scripts/download_golden_values.py b/tests/test_utils/python_scripts/download_golden_values.py
index e2294b32fbb..158df867a64 100644
--- a/tests/test_utils/python_scripts/download_golden_values.py
+++ b/tests/test_utils/python_scripts/download_golden_values.py
@@ -84,37 +84,38 @@ def main(pipeline_id: int, only_failing: bool):
                 ).glob("g*.json")
             )
 
-            if len(golden_values_sources) == 1:
-                golden_values_source = golden_values_sources[0]
-            else:
+            if len(golden_values_sources) < 1:
                 logger.info(
                     "Golden values for %s does not exist. Skip.", str(golden_values_sources)
                 )
                 continue
 
-            golden_values_source_name = golden_values_source.name
-            golden_values_source_name = golden_values_source_name.replace(
-                "generations", "golden_values"
-            )
-
-            golden_values_target = (
-                pathlib.Path("tests")
-                / "functional_tests"
-                / 'test_cases'
-                / job.stage
-                / job.name
-                / golden_values_source_name
-            )
+            for golden_values_source in golden_values_sources:
+                golden_values_source_name = golden_values_source.name
+                golden_values_source_name = golden_values_source_name.replace(
+                    "generations", "golden_values"
+                )
 
-            if golden_values_source.exists():
-                pathlib.Path(golden_values_target.parent).mkdir(parents=True, exist_ok=True)
-                logger.info(
-                    "Move artifacts from %s to %s", golden_values_source, golden_values_target
+                golden_values_target = (
+                    pathlib.Path("tests")
+                    / "functional_tests"
+                    / 'test_cases'
+                    / job.stage
+                    / job.name
+                    / golden_values_source_name
                 )
 
-                shutil.move(golden_values_source, golden_values_target)
-            else:
-                logger.info("Golden values for %s does not exist. Skip.", str(golden_values_source))
+                if golden_values_source.exists():
+                    pathlib.Path(golden_values_target.parent).mkdir(parents=True, exist_ok=True)
+                    logger.info(
+                        "Move artifacts from %s to %s", golden_values_source, golden_values_target
+                    )
+
+                    shutil.move(golden_values_source, golden_values_target)
+                else:
+                    logger.info(
+                        "Golden values for %s does not exist. Skip.", str(golden_values_source)
+                    )
 
             shutil.rmtree("tmp")
 
diff --git a/tests/test_utils/python_scripts/recipe_parser.py b/tests/test_utils/python_scripts/recipe_parser.py
index c6e7c5517e8..d21551c6c46 100644
--- a/tests/test_utils/python_scripts/recipe_parser.py
+++ b/tests/test_utils/python_scripts/recipe_parser.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 import copy
 import itertools
 import logging
diff --git a/tests/test_utils/recipes/gpt-gb200.yaml b/tests/test_utils/recipes/gpt-gb200.yaml
index 70b89e31a0e..9c3786332c9 100644
--- a/tests/test_utils/recipes/gpt-gb200.yaml
+++ b/tests/test_utils/recipes/gpt-gb200.yaml
@@ -9,7 +9,7 @@ spec:
   nodes: 2
   gpus: 4
   n_repeat: 5
-  platforms: dgx_a100
+  platforms: dgx_gb200
   script_setup: |
     unset https_proxy
     echo "machine gitlab-master.nvidia.com login okoenig password $RO_API_TOKEN" | tee -a /root/.netrc
@@ -64,8 +64,293 @@ spec:
     exit $exit_code
 
 products:
+  - test_case: [gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather]
+    products:
+      - environment: [dev]
+        scope: [nightly]
+        platforms: [dgx_gb200]
+  - test_case: [gpt3_mcore_tp1_pp2]
+    products:
+      - environment: [dev]
+        scope: [nightly]
+        platforms: [dgx_gb200]
+  - test_case: [gpt3_mcore_tp1_pp2_resume_torch_dist]
+    products:
+      - environment: [dev]
+        scope: [nightly]
+        platforms: [dgx_gb200]
+  - test_case: [gpt3_mcore_tp1_pp4]
+    products:
+      - environment: [dev]
+        scope: [nightly]
+        platforms: [dgx_gb200]
+  - test_case: [gpt3_mcore_tp1_pp4_resume_torch_dist]
+    products:
+      - environment: [dev]
+        scope: [nightly]
+        platforms: [dgx_gb200]
+  - test_case: [gpt3_mcore_tp4_pp1_resume_torch]
+    products:
+      - environment: [dev]
+        scope: [nightly]
+        platforms: [dgx_gb200]
+  - test_case: [gpt3_mcore_tp4_pp1_resume_torch_dist]
+    products:
+      - environment: [dev]
+        scope: [nightly]
+        platforms: [dgx_gb200]
+  - test_case: [gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files]
+    products:
+      - environment: [dev]
+        scope: [nightly]
+        platforms: [dgx_gb200]
+  - test_case: [gpt3_mcore_te_tp1_pp1_dist_optimizer_fim_dataset]
+    products:
+      - environment: [dev]
+        scope: [nightly]
+        platforms: [dgx_gb200]
+  - test_case: [gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer]
+    products:
+      - environment: [dev]
+        scope: [nightly]
+        platforms: [dgx_gb200]
+  - test_case: [gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute]
+    products:
+      - environment: [dev]
+        scope: [nightly]
+        platforms: [dgx_gb200]
+  - test_case: [gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings]
+    products:
+      - environment: [dev]
+        scope: [nightly]
+        platforms: [dgx_gb200]
+  - test_case: [gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion]
+    products:
+      - environment: [dev]
+        scope: [nightly]
+        platforms: [dgx_gb200]
+  - test_case: [gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear]
+    products:
+      - environment: [dev]
+        scope: [nightly]
+        platforms: [dgx_gb200]
+  - test_case: [gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear]
+    products:
+      - environment: [dev]
+        scope: [nightly]
+        platforms: [dgx_gb200]
+  - test_case: [gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu]
+    products:
+      - environment: [dev]
+        scope: [nightly]
+        platforms: [dgx_gb200]
+  - test_case: [gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs]
+    products:
+      - environment: [dev]
+        scope: [nightly]
+        platforms: [dgx_gb200]
+  - test_case: [gpt3_mcore_te_tp1_pp4_vp1]
+    products:
+      - environment: [dev]
+        scope: [nightly]
+        platforms: [dgx_gb200]
+  - test_case: [gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr]
+    products:
+      - environment: [dev]
+        scope: [nightly]
+        platforms: [dgx_gb200]
+  - test_case: [gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss]
+    products:
+      - environment: [dev]
+        scope: [nightly]
+        platforms: [dgx_gb200]
+  - test_case: [gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce]
+    products:
+      - environment: [dev]
+        scope: [nightly]
+        platforms: [dgx_gb200]
+  - test_case: [gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather]
+    products:
+      - environment: [dev]
+        scope: [nightly]
+        platforms: [dgx_gb200]
+  - test_case: [gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied]
+    products:
+      - environment: [dev]
+        scope: [nightly]
+        platforms: [dgx_gb200]
+  - test_case: [gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap]
+    products:
+      - environment: [dev]
+        scope: [nightly]
+        platforms: [dgx_gb200]
+  - test_case: [gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap]
+    products:
+      - environment: [dev]
+        scope: [nightly]
+        platforms: [dgx_gb200]
+  - test_case: [gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline]
+    products:
+      - environment: [dev]
+        scope: [nightly]
+        platforms: [dgx_gb200]
+  - test_case: [gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split]
+    products:
+      - environment: [dev]
+        scope: [nightly]
+        platforms: [dgx_gb200]
+  - test_case: [gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic]
+    products:
+      - environment: [dev]
+        scope: [nightly]
+        platforms: [dgx_gb200]
+  - test_case: [gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances]
+    products:
+      - environment: [dev]
+        scope: [nightly]
+        platforms: [dgx_gb200]
+  - test_case: [gpt3_mcore_te_tp2_pp2_cp2]
+    products:
+      - environment: [dev]
+        scope: [nightly]
+        platforms: [dgx_gb200]
+  - test_case: [gpt3_mcore_te_tp2_pp2_cp2_etp4_dp_last]
+    products:
+      - environment: [dev]
+        scope: [nightly]
+        platforms: [dgx_gb200]
+  - test_case: [gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss]
+    products:
+      - environment: [dev]
+        scope: [nightly]
+        platforms: [dgx_gb200]
+  - test_case: [gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last]
+    products:
+      - environment: [dev]
+        scope: [nightly]
+        platforms: [dgx_gb200]
+  - test_case: [gpt3_mcore_te_tp2_pp2_cp2_nondeterministic]
+    products:
+      - environment: [dev]
+        scope: [nightly]
+        platforms: [dgx_gb200]
+  - test_case: [gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last]
+    products:
+      - environment: [dev]
+        scope: [nightly]
+        platforms: [dgx_gb200]
+  - test_case: [gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic]
+    products:
+      - environment: [dev]
+        scope: [nightly]
+        platforms: [dgx_gb200]
+  - test_case: [gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last]
+    products:
+      - environment: [dev]
+        scope: [nightly]
+        platforms: [dgx_gb200]
+  - test_case: [gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion]
+    products:
+      - environment: [dev]
+        scope: [nightly]
+        platforms: [dgx_gb200]
+  - test_case: [gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic]
+    products:
+      - environment: [dev]
+        scope: [nightly]
+        platforms: [dgx_gb200]
+  - test_case: [gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion]
+    products:
+      - environment: [dev]
+        scope: [nightly]
+        platforms: [dgx_gb200]
+  - test_case: [gpt3_mcore_te_tp2_pp2_mla]
+    products:
+      - environment: [dev]
+        scope: [nightly]
+        platforms: [dgx_gb200]
+  - test_case: [gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective]
+    products:
+      - environment: [dev]
+        scope: [nightly]
+        platforms: [dgx_gb200]
+  - test_case: [gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute]
+    products:
+      - environment: [dev]
+        scope: [nightly]
+        platforms: [dgx_gb200]
+  - test_case: [gpt3_mcore_te_tp2_pp2_resume_torch_dist]
+    products:
+      - environment: [dev]
+        scope: [nightly]
+        platforms: [dgx_gb200]
+  - test_case: [gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader]
+    products:
+      - environment: [dev]
+        scope: [nightly]
+        platforms: [dgx_gb200]
+  - test_case: [gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone]
+    products:
+      - environment: [dev]
+        scope: [nightly]
+        platforms: [dgx_gb200]
+  - test_case: [gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce]
+    products:
+      - environment: [dev]
+        scope: [nightly]
+        platforms: [dgx_gb200]
+  - test_case: [gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode]
+    products:
+      - environment: [dev]
+        scope: [nightly]
+        platforms: [dgx_gb200]
+  - test_case: [gpt3_mcore_tp2_pp2_uninstall_te]
+    products:
+      - environment: [dev]
+        scope: [nightly]
+        platforms: [dgx_gb200]
+  - test_case: [gpt3_7b_tp1_pp4_memory_speed]
+    products:
+      - environment: [dev]
+        scope: [nightly]
+        platforms: [dgx_gb200]
+  - test_case: [gpt3_7b_tp4_pp1_memory_speed]
+    products:
+      - environment: [dev]
+        scope: [nightly]
+        platforms: [dgx_gb200]
+  - test_case: [gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor]
+    products:
+      - environment: [dev]
+        scope: [nightly]
+        platforms: [dgx_gb200]
+  - test_case: [gpt3_mcore_te_tp2_pp1_modelopt_distill_resume]
+    products:
+      - environment: [dev]
+        scope: [nightly]
+        platforms: [dgx_gb200]
+  - test_case: [gpt3_mcore_reruns_persistent_1]
+    products:
+      - environment: [dev]
+        scope: [nightly]
+        platforms: [dgx_gb200]
   - test_case: [gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer]
     products:
       - environment: [dev]
         scope: [nightly]
         platforms: [dgx_gb200]
+  - test_case: [gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather]
+    products:
+      - environment: [dev]
+        scope: [nightly]
+        platforms: [dgx_gb200]
+  - test_case: [gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather]
+    products:
+      - environment: [dev]
+        scope: [nightly]
+        platforms: [dgx_gb200]
+  - test_case: [gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone]
+    products:
+      - environment: [dev]
+        scope: [nightly]
+        platforms: [dgx_gb200]
diff --git a/tests/test_utils/recipes/gpt-grpo.yaml b/tests/test_utils/recipes/gpt-grpo.yaml
index 90e9815c5fe..11e8eadea9b 100644
--- a/tests/test_utils/recipes/gpt-grpo.yaml
+++ b/tests/test_utils/recipes/gpt-grpo.yaml
@@ -54,7 +54,7 @@ spec:
     bash ./tests/functional_tests/shell_test_utils/run_ci_test.sh ${{ARGUMENTS[@]}}
 
 products:
-  # - test_case: [gpt_grpo_tp1_pp1_dp8_583m_throughputtest]
+  # - test_case: [gpt_grpo_tp1_pp1_dp8_583m_throughputtest] # Offline until golden values are properly written to disk
   #   products:
   #     - environment: [dev]
   #       scope: [mr]
@@ -62,5 +62,20 @@ products:
   - test_case: [gpt_grpo_tp1_pp1_dp8_583m_throughputtest_github]
     products:
       - environment: [dev]
-        scope: [mr-github]
+        scope: [mr-github-broken]
+        platforms: [dgx_h100]
+  - test_case: [gpt_grpo_tp1tp2_pp1_dp8_583m_throughputtest]
+    products:
+      - environment: [dev]
+        scope: [mr-broken]
+        platforms: [dgx_h100]
+  - test_case: [gpt_grpo_tp1tp2_pp1_dp8_583m_throughputtest_github]
+    products:
+      - environment: [dev]
+        scope: [mr-github-broken]
+        platforms: [dgx_h100]
+  - test_case: [gpt_grpo_tp2tp1_pp4pp2_dp8_583m_throughputtest]
+    products:
+      - environment: [dev]
+        scope: [mr-broken]
         platforms: [dgx_h100]
diff --git a/tests/test_utils/recipes/mamba-dynamic-inference.yaml b/tests/test_utils/recipes/mamba-dynamic-inference.yaml
index 0d02ce29a54..11e05c745ce 100644
--- a/tests/test_utils/recipes/mamba-dynamic-inference.yaml
+++ b/tests/test_utils/recipes/mamba-dynamic-inference.yaml
@@ -57,5 +57,10 @@ products:
   - test_case: [hybrid_dynamic_inference_tp1_pp1_dp8_583m]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr-github]
+        platforms: [dgx_h100]
+  - test_case: [hybrid_dynamic_inference_tp1_pp1_dp8_583m_chunked_prefill]
+    products:
+      - environment: [dev]
+        scope: [mr-github]
         platforms: [dgx_h100]
diff --git a/tests/test_utils/recipes/mamba-static-inference.yaml b/tests/test_utils/recipes/mamba-static-inference.yaml
index 7cee0a47f56..4cf35d99b70 100644
--- a/tests/test_utils/recipes/mamba-static-inference.yaml
+++ b/tests/test_utils/recipes/mamba-static-inference.yaml
@@ -3,7 +3,7 @@ format_version: 1
 maintainers: [mcore]
 loggers: [stdout]
 spec:
-  name: '{test_case}_{environment}_{platforms}'
+  name: "{test_case}_{environment}_{platforms}"
   model: hybrid
   build: mcore-pyt-{environment}
   nodes: 1
@@ -57,7 +57,7 @@ products:
   - test_case: [hybrid_static_inference_tp1_pp1_2B_logitsmatch]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr-broken, mr-github-broken]
         platforms: [dgx_h100]
   - test_case: [hybrid_static_inference_tp1_pp1_2B_cudagraphs]
     products:
diff --git a/tests/test_utils/recipes/moe-grpo.yaml b/tests/test_utils/recipes/moe-grpo.yaml
new file mode 100644
index 00000000000..360f6ead209
--- /dev/null
+++ b/tests/test_utils/recipes/moe-grpo.yaml
@@ -0,0 +1,61 @@
+type: basic
+format_version: 1
+maintainers: [mcore]
+loggers: [stdout]
+spec:
+  name: "{test_case}_{environment}_{platforms}"
+  model: moe
+  build: mcore-pyt-{environment}
+  nodes: 1
+  gpus: 1
+  n_repeat: 1
+  platforms: dgx_a100
+  script_setup: |
+    unset https_proxy
+    echo "machine gitlab-master.nvidia.com login okoenig password $RO_API_TOKEN" | tee -a /root/.netrc
+
+    # Checkout latest
+    cd /opt
+    rm -rf /opt/megatron-lm; mkdir megatron-lm; cd megatron-lm
+    git init
+    git remote add origin $MCORE_REPO
+    git fetch origin '+refs/merge-requests/*:refs/remotes/merge-requests/*'
+    git fetch origin $MCORE_MR_COMMIT
+    git checkout $MCORE_MR_COMMIT
+    git rev-parse HEAD
+    # Checkout backwards-ref
+    cd /opt
+    rm -rf /opt/megatron-lm-legacy; mkdir megatron-lm-legacy; cd megatron-lm-legacy
+    git init
+    git remote add origin $MCORE_REPO
+    git fetch origin $MCORE_BACKWARDS_COMMIT
+    git checkout $MCORE_BACKWARDS_COMMIT
+    git rev-parse HEAD
+    rm -rf megatron; cp -a /opt/megatron-lm/megatron ./
+  script: |-
+    ls
+    cd /opt/megatron-lm
+
+    ARGUMENTS=(
+        "CHECKPOINT_LOAD_PATH=/mnt/artifacts"
+        "CHECKPOINT_SAVE_PATH=/tmp/checkpoints"
+        "DATA_PATH=/mnt/artifacts/"
+        "DATA_CACHE_PATH=/workspace/data/cache"
+        "TRAINING_SCRIPT_PATH=train_rl.py"
+        "TRAINING_PARAMS_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml"
+        "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_{platforms}.json"
+        "OUTPUT_PATH={assets_dir}"
+        "TENSORBOARD_PATH={assets_dir}/generations_{environment}_{platforms}.json"
+        "N_REPEAT={n_repeat}"
+        "ENABLE_LIGHTWEIGHT_MODE=${{ENABLE_LIGHTWEIGHT_MODE}}"
+        "RECORD_CHECKPOINTS=${{RECORD_CHECKPOINTS}}"
+    )
+
+    bash ./tests/functional_tests/shell_test_utils/run_ci_test.sh ${{ARGUMENTS[@]}}
+
+products:
+  - test_case: [gpt_grpo_tp8tp4_pp1_ep8ep2_dp8_throughputtest]
+    products:
+      - environment: [dev]
+        scope: [mr]
+        platforms: [dgx_h100]
diff --git a/tests/test_utils/recipes/moe.yaml b/tests/test_utils/recipes/moe.yaml
index 02c3f68b5f1..faef76e38eb 100644
--- a/tests/test_utils/recipes/moe.yaml
+++ b/tests/test_utils/recipes/moe.yaml
@@ -3,7 +3,7 @@ format_version: 1
 maintainers: [mcore]
 loggers: [stdout]
 spec:
-  name: '{test_case}_{environment}_{platforms}'
+  name: "{test_case}_{environment}_{platforms}"
   model: moe
   build: mcore-pyt-{environment}
   nodes: 1
@@ -60,16 +60,51 @@ products:
   #######################################################################
   # Nightly tests: Run both DEV and LTS unless something is flaky       #
   #######################################################################
+  - test_case: [gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel]
+    products:
+      - environment: [dev]
+        scope: [nightly]
+        platforms: [dgx_a100, dgx_h100]
+      - environment: [lts]
+        scope: [nightly]
+  - test_case: [gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel]
+    products:
+      - environment: [dev]
+        scope: [nightly]
+        platforms: [dgx_a100, dgx_h100]
   - test_case: [gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel]
     products:
       - environment: [dev]
         scope: [nightly]
         platforms: [dgx_a100, dgx_h100]
+      - environment: [lts]
+        scope: [nightly]
+  - test_case: [gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last]
+    products:
+      - environment: [dev]
+        scope: [nightly]
+        platforms: [dgx_a100, dgx_h100]
+      - environment: [lts]
+        scope: [nightly]
   - test_case: [gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last]
     products:
       - environment: [dev]
         scope: [nightly]
         platforms: [dgx_a100, dgx_h100]
+  - test_case: [gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic]
+    products:
+      - environment: [dev]
+        scope: [nightly]
+        platforms: [dgx_a100, dgx_h100]
+      - environment: [lts]
+        scope: [nightly]
+  - test_case: [gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last]
+    products:
+      - environment: [dev]
+        scope: [nightly]
+        platforms: [dgx_a100, dgx_h100]
+      - environment: [lts]
+        scope: [nightly]
   # - test_case: [gpt3_mcore_tp2_pp2_resume_torch_dist_te_2experts]
   #   products: # non-determinism: #478
   #     - environment: [dev, lts]
@@ -86,6 +121,11 @@ products:
       - environment: [dev]
         scope: [mr]
         platforms: [dgx_h100]
+  # - test_case: [gpt3_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8]
+  #   products:
+  #     - environment: [dev]
+  #       scope: [mr]
+  #       platforms: [dgx_h100] # hang: #513
   - test_case: [gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph]
     products:
       - environment: [dev]
@@ -126,42 +166,54 @@ products:
   - test_case: [gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr]
+        platforms: [dgx_h100]
+  - test_case: [gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading]
+    products:
+      - environment: [dev]
+        scope: [mr]
+        platforms: [dgx_h100]
+  - test_case: [gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading]
+    products:
+      - environment: [dev]
+        scope: [mr]
         platforms: [dgx_h100]
   - test_case: [gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective]
     products:
       - environment: [dev]
         scope: [mr]
         platforms: [dgx_h100]
+      - environment: [lts]
+        scope: [nightly]
   - test_case: [gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_optimizer]
     products:
       - environment: [dev]
         scope: [mr]
         platforms: [dgx_h100]
+  - test_case: [gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_scoped_cudagraph]
+    products:
+      - environment: [dev]
+        scope: [mr]
+        platforms: [dgx_h100]
   - test_case: [gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_muon]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr, mr-github, mr-slim]
         platforms: [dgx_h100]
   - test_case: [gpt3_moe_mcore_te_ep8_resume_torch_dist_muon]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr-broken, mr-github-broken, mr-slim-broken]
         platforms: [dgx_h100]
   - test_case: [gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading]
     products:
       - environment: [dev]
-        scope: [mr-broken, mr-github]
+        scope: [mr-broken]
         platforms: [dgx_h100]
   - test_case: [gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading]
     products:
       - environment: [dev]
-        scope: [mr, mr-github]
-        platforms: [dgx_h100]
-  - test_case: [gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_scoped_cudagraph]
-    products:
-      - environment: [dev]
-        scope: [mr, mr-github]
+        scope: [mr-broken]
         platforms: [dgx_h100]
   #######################################################################
   # Super important mr, mr-github tests that run for both DEV and LTS per mr, mr-github       #
@@ -189,11 +241,3 @@ products:
       - environment: [dev]
         scope: [mr-broken]
         platforms: [dgx_h100]
-  - test_case: [gpt3_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8]
-    products:
-      - environment: [dev]
-        scope: [mr-broken]
-        platforms: [dgx_h100] # hang: #513
-      - environment: [dev]
-        scope: [mr-slim-broken]
-        platforms: [dgx_h100]
diff --git a/tests/unit_tests/conftest.py b/tests/unit_tests/conftest.py
index e251a3c1e7e..362d102200e 100644
--- a/tests/unit_tests/conftest.py
+++ b/tests/unit_tests/conftest.py
@@ -38,14 +38,6 @@ def pytest_sessionfinish(session, exitstatus):
         session.exitstatus = 0
 
 
-@pytest.fixture(scope="session", autouse=True)
-def cleanup():
-    yield
-    if torch.distributed.is_initialized():
-        torch.distributed.barrier()
-        torch.distributed.destroy_process_group()
-
-
 @pytest.fixture(scope="function", autouse=True)
 def set_env():
     if is_te_min_version("1.3"):
diff --git a/tests/unit_tests/data/test_builder.py b/tests/unit_tests/data/test_builder.py
index 939677268bb..d0e86c87fb8 100644
--- a/tests/unit_tests/data/test_builder.py
+++ b/tests/unit_tests/data/test_builder.py
@@ -5,7 +5,9 @@
 ##
 
 import os
+import random
 import tempfile
+from argparse import Namespace
 from collections import defaultdict
 from typing import Dict, Optional
 
@@ -13,11 +15,18 @@
 import pytest
 import torch
 
+from megatron.core.datasets.blended_dataset import BlendedDataset
 from megatron.core.datasets.blended_megatron_dataset_builder import BlendedMegatronDatasetBuilder
 from megatron.core.datasets.blended_megatron_dataset_config import BlendedMegatronDatasetConfig
+from megatron.core.datasets.gpt_dataset import GPTDataset, GPTDatasetConfig
+from megatron.core.datasets.indexed_dataset import DType, IndexedDatasetBuilder
 from megatron.core.datasets.megatron_dataset import LowLevelDataset, MegatronDataset
 from megatron.core.datasets.utils import Split, compile_helpers, get_blend_from_list
+from megatron.training.tokenizer import build_tokenizer
+from megatron.training.utils import get_blend_and_blend_per_split
+from tests.unit_tests.dist_checkpointing import TempNamedDir
 from tests.unit_tests.test_utilities import Utils
+from tools.build_sequences_per_dataset import build_sequences_per_dataset
 
 _NUM_DATASETS = 10
 
@@ -32,6 +41,30 @@
 _MARGIN = 0.005
 
 
+def create_file_prefixes(tokenizer, number_of_files, maximum_number_of_documents, dataset_dir):
+    # Create dataset directory
+    os.makedirs(dataset_dir, exist_ok=True)
+
+    # Create file prefixes
+    file_prefixes = []
+    for i in range(number_of_files):
+        file_prefix_path = os.path.join(dataset_dir, f"file_{i}")
+        builder = IndexedDatasetBuilder(
+            file_prefix_path + ".bin", dtype=DType.optimal_dtype(tokenizer.vocab_size)
+        )
+        number_of_documents = random.randint(10, maximum_number_of_documents)
+        for j in range(number_of_documents):
+            number_of_tokens = random.randint(50, 100)
+            tokenized_doc = [
+                str(random.randint(0, tokenizer.vocab_size - 1)) for _ in range(number_of_tokens)
+            ]
+            builder.add_document(tokenized_doc, [len(tokenized_doc)])
+        builder.finalize(file_prefix_path + ".idx")
+        file_prefixes.append(file_prefix_path)
+
+    return file_prefixes
+
+
 def do_setup(odir):
     paths = defaultdict(list)
 
@@ -297,5 +330,206 @@ def __getitem__(self, idx: int) -> Dict[str, numpy.ndarray]:
         ).build()
 
 
+@pytest.mark.parametrize("use_split", [True, False])
+@pytest.mark.parametrize("add_weights", [True, False])
+@pytest.mark.parametrize("fast_cache_load", [True, False])
+@pytest.mark.parametrize("sequences_per_dataset", [True, False])
+@pytest.mark.parametrize("defer_npy_index_mmap", [True, False])
+@pytest.mark.parametrize("vocab_size", [131072, 20000])
+@pytest.mark.parametrize("mid_level_dataset_surplus", [0.005, 0.01, 0])
+def test_fast_builder(
+    use_split,
+    add_weights,
+    fast_cache_load,
+    sequences_per_dataset,
+    defer_npy_index_mmap,
+    vocab_size,
+    mid_level_dataset_surplus,
+    tmp_path_dist_ckpt,
+    sequence_length: int = 5,
+    number_of_files: int = 10,
+    number_of_documents: int = 10,
+):
+    if use_split and fast_cache_load:
+        pytest.skip("Skipping test case when both use_split and fast_cache_load are True")
+
+    if torch.distributed.is_available():
+        Utils.initialize_distributed()
+        if torch.distributed.get_rank() == 0:
+            compile_helpers()
+        torch.distributed.barrier()
+    else:
+        compile_helpers()
+
+    tokenizer = build_tokenizer(
+        Namespace(
+            vocab_size=vocab_size,
+            tokenizer_type="NullTokenizer",
+            rank=0,
+            make_vocab_size_divisible_by=128,
+            tensor_model_parallel_size=1,
+        )
+    )
+
+    with TempNamedDir(tmp_path_dist_ckpt / "test_fast_builder", sync=True) as temp_dir:
+        # Created file_prefixes (tokenizer, Number of files, number of documents, path) --> returns file prefixes (list of strings)
+        if not torch.distributed.is_initialized() or torch.distributed.get_rank() == 0:
+            file_prefixes = create_file_prefixes(
+                tokenizer, number_of_files, number_of_documents, os.path.join(temp_dir, "dataset")
+            )
+        else:
+            file_prefixes = []
+            for i in range(number_of_files):
+                file_prefix_path = os.path.join(temp_dir, "dataset", f"file_{i}")
+                file_prefixes.append(file_prefix_path)
+
+        if torch.distributed.is_initialized():
+            torch.distributed.barrier()
+
+        random.seed(1234)  # NOTE(asolergi-nv): re-sync random state across all ranks
+
+        data_cache_path = os.path.join(temp_dir, "cache")
+
+        args = Namespace(
+            seed=1234,
+            seq_length=sequence_length,
+            data_cache_path=data_cache_path,
+            split=None,
+            data_path=None,
+            train_data_path=None,
+            valid_data_path=None,
+            test_data_path=None,
+            per_split_data_args_path=None,
+            data_args_path=None,
+        )
+
+        # set up data mixture
+        if use_split:
+            args.data_path = file_prefixes
+            args.split = "70,20,10"
+        else:
+            train_file_prefixes = file_prefixes[0:6]
+            valid_file_prefixes = file_prefixes[6:9]
+            test_file_prefixes = file_prefixes[9:10]
+
+            if add_weights:
+                # Save original lists before modifying
+                train_file_prefixes_original = train_file_prefixes[:]
+                valid_file_prefixes_original = valid_file_prefixes[:]
+                test_file_prefixes_original = test_file_prefixes[:]
+
+                # For train_file_prefixes, alternately append a random int (10-100) and the file prefix.
+                train_file_prefixes = []
+                for fp in train_file_prefixes_original:
+                    train_file_prefixes.extend([random.randint(10, 100), fp])
+                # For valid/test, also add random weights (10-100).
+                valid_file_prefixes = []
+                for fp in valid_file_prefixes_original:
+                    valid_file_prefixes.extend([random.randint(10, 100), fp])
+                test_file_prefixes = []
+                for fp in test_file_prefixes_original:
+                    test_file_prefixes.extend([random.randint(10, 100), fp])
+
+            args.train_data_path = train_file_prefixes
+            args.valid_data_path = valid_file_prefixes
+            args.test_data_path = test_file_prefixes
+
+        if sequences_per_dataset:
+            args.path_to_sequences_per_dataset_json = os.path.join(
+                temp_dir, "sequences_per_dataset.json"
+            )
+            sequences_per_dataset = build_sequences_per_dataset(args)
+
+        blend, blend_per_split = get_blend_and_blend_per_split(args)
+
+        data_args = {
+            "random_seed": args.seed,
+            "sequence_length": args.seq_length,
+            "blend": blend,
+            "blend_per_split": blend_per_split,
+            "split": args.split,
+            "path_to_cache": args.data_cache_path,
+            "tokenizer": tokenizer,
+            "reset_position_ids": False,
+            "reset_attention_mask": False,
+            "eod_mask_loss": False,
+            "create_attention_mask": False,
+            "mid_level_dataset_surplus": mid_level_dataset_surplus,
+        }
+        config = GPTDatasetConfig(**data_args)
+
+        train_ds, valid_ds, test_ds = BlendedMegatronDatasetBuilder(
+            GPTDataset, [100, 10, 10], lambda: True, config
+        ).build()
+
+        fast_config = GPTDatasetConfig(
+            **data_args,
+            fast_cache_load=fast_cache_load,
+            defer_npy_index_mmap=defer_npy_index_mmap,
+            sequences_per_dataset=sequences_per_dataset,
+        )
+
+        train_ds_fast, valid_ds_fast, test_ds_fast = BlendedMegatronDatasetBuilder(
+            GPTDataset, [100, 10, 10], lambda: True, fast_config
+        ).build()
+
+        for ds_slow, ds_fast, split_name in zip(
+            [train_ds, valid_ds, test_ds],
+            [train_ds_fast, valid_ds_fast, test_ds_fast],
+            ["train", "valid", "test"],
+        ):
+            if not ds_slow:
+                continue
+            assert len(ds_slow) == len(
+                ds_fast
+            ), f"ds_slow: {len(ds_slow)}, ds_fast: {len(ds_fast)}, split_name: {split_name}"
+            if isinstance(ds_slow, GPTDataset):
+                assert torch.all(ds_slow[0]["tokens"] == ds_fast[0]["tokens"])
+                assert torch.all(ds_slow[-1]["tokens"] == ds_fast[-1]["tokens"])
+                numpy.testing.assert_array_equal(ds_slow.document_index, ds_fast.document_index)
+                numpy.testing.assert_array_equal(ds_slow.sample_index, ds_fast.sample_index)
+                numpy.testing.assert_array_equal(ds_slow.shuffle_index, ds_fast.shuffle_index)
+                numpy.testing.assert_array_equal(
+                    ds_slow.dataset.index.sequence_lengths, ds_fast.dataset.index.sequence_lengths
+                )
+                numpy.testing.assert_array_equal(
+                    ds_slow.dataset.index.document_indices, ds_fast.dataset.index.document_indices
+                )
+                numpy.testing.assert_array_equal(
+                    ds_slow.dataset.index.sequence_pointers, ds_fast.dataset.index.sequence_pointers
+                )
+            elif isinstance(ds_slow, BlendedDataset):
+                assert torch.all(ds_slow[0]["tokens"] == ds_fast[0]["tokens"])
+                assert torch.all(ds_slow[-1]["tokens"] == ds_fast[-1]["tokens"])
+                numpy.testing.assert_array_equal(ds_slow.dataset_index, ds_fast.dataset_index)
+                numpy.testing.assert_array_equal(
+                    ds_slow.dataset_sample_index, ds_fast.dataset_sample_index
+                )
+                for ds_slow_i, ds_fast_i in zip(ds_slow.datasets, ds_fast.datasets):
+                    assert torch.all(ds_slow_i[0]["tokens"] == ds_fast_i[0]["tokens"])
+                    assert torch.all(ds_slow_i[-1]["tokens"] == ds_fast_i[-1]["tokens"])
+                    numpy.testing.assert_array_equal(
+                        ds_slow_i.document_index, ds_fast_i.document_index
+                    )
+                    numpy.testing.assert_array_equal(ds_slow_i.sample_index, ds_fast_i.sample_index)
+                    numpy.testing.assert_array_equal(
+                        ds_slow_i.shuffle_index, ds_fast_i.shuffle_index
+                    )
+                    numpy.testing.assert_array_equal(
+                        ds_slow_i.dataset.index.sequence_lengths,
+                        ds_fast_i.dataset.index.sequence_lengths,
+                    )
+                    numpy.testing.assert_array_equal(
+                        ds_slow_i.dataset.index.document_indices,
+                        ds_fast_i.dataset.index.document_indices,
+                    )
+                    numpy.testing.assert_array_equal(
+                        ds_slow_i.dataset.index.sequence_pointers,
+                        ds_fast_i.dataset.index.sequence_pointers,
+                    )
+        if torch.distributed.is_initialized():
+            torch.distributed.barrier()
+
+
 if __name__ == "__main__":
     test_builder()
diff --git a/tests/unit_tests/dist_checkpointing/models/test_bert_model.py b/tests/unit_tests/dist_checkpointing/models/test_bert_model.py
index 27f01447851..81b01c8f886 100644
--- a/tests/unit_tests/dist_checkpointing/models/test_bert_model.py
+++ b/tests/unit_tests/dist_checkpointing/models/test_bert_model.py
@@ -24,13 +24,11 @@
 
 
 def initialize_bert_model(
-    seed, layer_spec_fn=bert_layer_with_transformer_engine_spec, vocab_size=128, **config_kwargs
+    seed, layer_spec=bert_layer_with_transformer_engine_spec, vocab_size=128, **config_kwargs
 ):
     torch.manual_seed(seed)
     model_parallel_cuda_manual_seed(seed)
 
-    layer_spec = layer_spec_fn() if callable(layer_spec_fn) else layer_spec_fn
-
     default_config_kwargs = dict(
         num_layers=8,
         hidden_size=16,
diff --git a/tests/unit_tests/dist_checkpointing/test_layer_wise_optimizer.py b/tests/unit_tests/dist_checkpointing/test_layer_wise_optimizer.py
index 54e12b9e7b7..0662922586c 100644
--- a/tests/unit_tests/dist_checkpointing/test_layer_wise_optimizer.py
+++ b/tests/unit_tests/dist_checkpointing/test_layer_wise_optimizer.py
@@ -10,9 +10,9 @@
 from megatron.core import parallel_state
 from megatron.core.dist_checkpointing import load, save
 from megatron.core.dist_checkpointing.dict_utils import nested_values
-from megatron.core.models.gpt.gpt_layer_specs import get_gpt_decoder_block_spec
 from megatron.core.models.gpt.gpt_layer_specs import (
-    get_gpt_layer_with_transformer_engine_spec as gpt_te_spec,
+    get_gpt_decoder_block_spec,
+    get_gpt_layer_with_transformer_engine_spec,
 )
 from megatron.core.models.gpt.gpt_model import GPTModel
 from megatron.core.optimizer import ChainedOptimizer
@@ -62,11 +62,6 @@ def initialize_real_model(
     virtual_pipeline_model_parallel_size=None,
     **config_kwargs,
 ):
-    # These kwargs are passed through training.get_model for model construction,
-    # but are not part of TransformerConfig; strip them before building config.
-    config_kwargs.pop("pg_collection", None)
-    config_kwargs.pop("config", None)
-
     torch.manual_seed(seed)
     model_parallel_cuda_manual_seed(seed)
 
@@ -95,6 +90,8 @@ def initialize_real_model(
         default_config_kwargs["qk_head_dim"] = 64
         default_config_kwargs["qk_pos_emb_head_dim"] = 32
         default_config_kwargs["v_head_dim"] = 64
+    config_kwargs.pop("pg_collection", None)
+    config_kwargs.pop("config", None)
     default_config_kwargs.update(**config_kwargs)
     config_cls = MLATransformerConfig if is_mla else TransformerConfig
     transformer_config = config_cls(**default_config_kwargs)
@@ -104,7 +101,7 @@ def initialize_real_model(
             transformer_config, use_transformer_engine=True, vp_stage=vp_stage
         )
     else:
-        layer_spec = gpt_te_spec(multi_latent_attention=is_mla)
+        layer_spec = get_gpt_layer_with_transformer_engine_spec(multi_latent_attention=is_mla)
     this_model = GPTModel(
         config=transformer_config,
         transformer_layer_spec=layer_spec,
diff --git a/tests/unit_tests/dist_checkpointing/test_pipeline_parallel_layout.py b/tests/unit_tests/dist_checkpointing/test_pipeline_parallel_layout.py
index 42fc9997e13..927b51d5ddb 100644
--- a/tests/unit_tests/dist_checkpointing/test_pipeline_parallel_layout.py
+++ b/tests/unit_tests/dist_checkpointing/test_pipeline_parallel_layout.py
@@ -152,6 +152,7 @@ def create_args():
     args.use_megatron_fsdp = False
     args.dist_ckpt_optim_fully_reshardable = False
     args.distrib_optim_fully_reshardable_mem_efficient = False
+    args.phase_transition_iterations = None
 
     yield args
 
diff --git a/tests/unit_tests/dist_checkpointing/test_serialization.py b/tests/unit_tests/dist_checkpointing/test_serialization.py
index d2bebc93101..0815633f9b5 100644
--- a/tests/unit_tests/dist_checkpointing/test_serialization.py
+++ b/tests/unit_tests/dist_checkpointing/test_serialization.py
@@ -77,16 +77,6 @@ def test_single_process_save_load(self, tmp_path_dist_ckpt):
             save(sharded_state_dict, ckpt_dir)
             torch.distributed.barrier()
 
-            saved_config = maybe_load_config(ckpt_dir)
-            if saved_config.sharded_backend == 'zarr':
-                assert (ckpt_dir / 'keyA').is_dir()
-                assert (ckpt_dir / 'keyB').is_dir()
-                assert not (ckpt_dir / 'keyC').exists()
-                assert not (ckpt_dir / 'sd_keyA').is_dir()
-
-                if HAVE_DTENSOR:
-                    assert (ckpt_dir / 'keyD').is_dir()
-
             load_ssd = {
                 'load_sd_keyA': ShardedTensor.from_rank_offsets(
                     'keyA', torch.ones(2, 4), replica_id=Utils.rank
@@ -127,13 +117,6 @@ def preprocess_fn(x):
                 preprocess_common_before_consistancy_check=preprocess_fn,
             )
 
-            saved_config = maybe_load_config(ckpt_dir)
-            if saved_config.sharded_backend == 'zarr':
-                assert (ckpt_dir / 'keyA').is_dir()
-                assert (ckpt_dir / 'keyB').is_dir()
-                assert not (ckpt_dir / 'keyC').exists()
-                assert not (ckpt_dir / 'sd_keyA').is_dir()
-
         Utils.destroy_model_parallel()
 
     def test_multi_process_save_log_difference(self, tmp_path_dist_ckpt, caplog):
@@ -426,7 +409,6 @@ def test_load_error_msg(self, tmp_path_dist_ckpt):
                 load(state_dict, ckpt_dir)
             assert f'is not a distributed checkpoint' in str(exc_info.value)
 
-            # Missing Zarr arrays
             torch.distributed.barrier()
             save(state_dict, ckpt_dir)
             sh_ten.key = 'different_key'
diff --git a/tests/unit_tests/dist_checkpointing/utils.py b/tests/unit_tests/dist_checkpointing/utils.py
index ddbb78e0a61..8d22e184893 100644
--- a/tests/unit_tests/dist_checkpointing/utils.py
+++ b/tests/unit_tests/dist_checkpointing/utils.py
@@ -167,6 +167,7 @@ def init_checkpointing_mock_args(args, ckpt_dir, fully_parallel=False):
     args.use_megatron_fsdp = False
     args.dist_ckpt_optim_fully_reshardable = False
     args.distrib_optim_fully_reshardable_mem_efficient = False
+    args.phase_transition_iterations = None
 
 
 def setup_model_and_optimizer(
diff --git a/tests/unit_tests/distributed/fsdp/test_mfsdp_fully_shard.py b/tests/unit_tests/distributed/fsdp/test_mfsdp_fully_shard.py
index b0bd6c729ef..cbca505b405 100644
--- a/tests/unit_tests/distributed/fsdp/test_mfsdp_fully_shard.py
+++ b/tests/unit_tests/distributed/fsdp/test_mfsdp_fully_shard.py
@@ -2,6 +2,7 @@
 
 import logging
 import shutil
+from contextlib import nullcontext
 from copy import deepcopy
 from pathlib import Path
 
@@ -33,6 +34,10 @@
 DIM_SIZE = 2
 NUM_LAYERS = 2
 NUM_STEPS = 2
+DELAYED_FP8_RECIPE = "fp8_delayed_scaling"
+CURRENT_FP8_RECIPE = "fp8_current_scaling"
+BLOCKWISE_FP8_RECIPE = "fp8_blockwise_scaling"
+MXFP8_BLOCKWISE_RECIPE = "mxfp8_blockwise"
 
 # Needed for `torch.distributed.checkpoint.{save,load}` because
 # multiple processes need to write to the same directory.
@@ -119,17 +124,33 @@ def forward(self, x, y):
 class ToyTETransformer(torch.nn.Module):
     """Toy Transformer model for testing Megatron-FSDP with Transformer Engine."""
 
-    def __init__(self, model_dim, num_heads, num_layers, output_dim):
+    def __init__(
+        self,
+        model_dim,
+        num_heads,
+        num_layers,
+        output_dim,
+        fuse_qkv_params=False,
+        params_dtype=torch.float32,
+        device="cuda",
+    ):
         super().__init__()
         self.layers = torch.nn.ModuleList(
             [
                 te.pytorch.TransformerLayer(
-                    hidden_size=model_dim, ffn_hidden_size=model_dim, num_attention_heads=num_heads
+                    hidden_size=model_dim,
+                    ffn_hidden_size=model_dim,
+                    num_attention_heads=num_heads,
+                    fuse_qkv_params=fuse_qkv_params,
+                    params_dtype=params_dtype,
+                    device=device,
                 )
                 for _ in range(num_layers)
             ]
         )
-        self.fc_out = te.pytorch.Linear(model_dim, output_dim)
+        self.fc_out = te.pytorch.Linear(
+            model_dim, output_dim, params_dtype=params_dtype, device=device
+        )
 
     def forward(self, x):
         for layer in self.layers:
@@ -166,7 +187,11 @@ def build_toy_model(model_type: str, init_model_with_meta_device: bool, seed=Non
             fsdp_unit_modules = [torch.nn.Transformer]
         elif model_type == TE_TRANSFORMER:
             toy_model = ToyTETransformer(
-                model_dim=DIM_SIZE, num_heads=2, num_layers=NUM_LAYERS, output_dim=DIM_SIZE
+                model_dim=DIM_SIZE,
+                num_heads=2,
+                num_layers=NUM_LAYERS,
+                output_dim=DIM_SIZE,
+                device="meta" if init_model_with_meta_device else "cuda",
             )
             fsdp_unit_modules = [te.pytorch.TransformerLayer]
 
@@ -232,16 +257,23 @@ def teardown_class(cls):
             (2, 2, 1, 2),
         ],
     )
-    @pytest.mark.parametrize("preserve_fp32_weights", [True, False])
-    @pytest.mark.parametrize("init_model_with_meta_device", [True, False])
+    @pytest.mark.parametrize(
+        "common_args",
+        [
+            {
+                "preserve_fp32_weights": True,
+                "init_model_with_meta_device": True,
+                "torch_compile": True,
+            },
+            {
+                "preserve_fp32_weights": False,
+                "init_model_with_meta_device": False,
+                "torch_compile": False,
+            },
+        ],
+    )
     def test_fully_shard(
-        self,
-        model_type,
-        dp_shard_strategy,
-        dp_outer_strategy,
-        mesh_dim_config,
-        preserve_fp32_weights,
-        init_model_with_meta_device,
+        self, model_type, dp_shard_strategy, dp_outer_strategy, mesh_dim_config, common_args
     ):
         """
         Test the fully_shard API with different configurations.
@@ -253,6 +285,10 @@ def test_fully_shard(
         """
         from megatron.core.distributed.fsdp.src.megatron_fsdp.fully_shard import fully_shard
 
+        preserve_fp32_weights = common_args["preserve_fp32_weights"]
+        init_model_with_meta_device = common_args["init_model_with_meta_device"]
+        torch_compile = common_args["torch_compile"]
+
         # Skip due to lack of functionality.
         if init_model_with_meta_device and dp_shard_strategy == NO_SHARD:
             pytest.skip(
@@ -261,7 +297,7 @@ def test_fully_shard(
             )
         elif dp_outer_strategy == OPTIM:
             if dp_shard_strategy != OPTIM_GRADS_PARAMS:
-                # FIXME(@shjwudp, @cspades): This is an unexpected lack of support.
+                # TODO(@shjwudp, @cspades): Requires various modifications to support.
                 # [default0]:FAILED tests/unit_tests/distributed/test_mfsdp_fully_shard.py
                 # [False-True-True-True-mesh_dim_config0-optim-optim-cnn]
                 # [False-True-True-True-mesh_dim_config0-optim-optim_grads-cnn]
@@ -297,6 +333,7 @@ def test_fully_shard(
             grad_reduce_in_fp32=False,
             init_model_with_meta_device=init_model_with_meta_device,
         )
+        model = torch.compile(model) if torch_compile else model
 
         # Mock input and target.
         toy_input = torch.randn(1, DIM_SIZE, DIM_SIZE).to("cuda")
@@ -638,3 +675,102 @@ def test_fully_shard_ez(self, shard_strategy):
             # Optimizer step.
             optimizer.step()
             optimizer.zero_grad()
+
+    @pytest.mark.parametrize("init_model_with_meta_device", [True, False])
+    @pytest.mark.parametrize(
+        "te_recipe",
+        [DELAYED_FP8_RECIPE, CURRENT_FP8_RECIPE, BLOCKWISE_FP8_RECIPE, MXFP8_BLOCKWISE_RECIPE],
+    )
+    def test_fully_shard_te_quantized(self, init_model_with_meta_device, te_recipe):
+        """
+        Test Megatron-FSDP with FP8 activations and parameters via TransformerEngine.
+        """
+        if te_recipe == MXFP8_BLOCKWISE_RECIPE:
+            # TODO(@cspades, @ko3n1g): Add this test case in.
+            pytest.skip(f"[Megatron CI/CD] MXFP8 requires Blackwell nodes to test.")
+
+        from megatron.core.distributed.fsdp.src.megatron_fsdp.fully_shard import (
+            fully_shard_model,
+            fully_shard_optimizer,
+        )
+
+        # Build FP8 recipe.
+        te_quant_recipe = None
+        if te_recipe == MXFP8_BLOCKWISE_RECIPE:
+            te_quant_recipe = te.common.recipe.MXFP8BlockScaling(
+                fp8_format=te.common.recipe.Format.HYBRID
+            )
+        elif te_recipe == DELAYED_FP8_RECIPE:
+            te_quant_recipe = te.common.recipe.DelayedScaling()
+        elif te_recipe == CURRENT_FP8_RECIPE:
+            te_quant_recipe = te.common.recipe.Float8CurrentScaling()
+        elif te_recipe == BLOCKWISE_FP8_RECIPE:
+            te_quant_recipe = te.common.recipe.Float8BlockScaling()
+
+        # Construct toy model compatible with FP8.
+        with (
+            te.pytorch.quantized_model_init(
+                recipe=te_quant_recipe,
+                # Needed for FP8 parameters with Megatron-FSDP.
+                preserve_high_precision_init_val=True,
+            )
+            if te_quant_recipe is not None
+            else nullcontext()
+        ):
+            # Fused QKV, BF16 precision for high-precision weights,
+            # and hidden dimension divisibility by 32 is required
+            # for some FP8 recipes such as MXFP8.
+            toy_model = ToyTETransformer(
+                model_dim=64,
+                num_heads=2,
+                num_layers=2,
+                output_dim=64,
+                fuse_qkv_params=True,
+                params_dtype=torch.bfloat16,
+                device="meta" if init_model_with_meta_device else "cuda",
+            )
+
+        # Fully-shard the model.
+        mfsdp_model = fully_shard_model(
+            module=toy_model,
+            fsdp_unit_modules=[te.pytorch.TransformerLayer, te.pytorch.Linear],
+            # Only ZeRO-3 / FSDP supports FP8 parameters.
+            zero_dp_strategy=3,
+            init_model_with_meta_device=init_model_with_meta_device,
+            # Required for FP8 parameter support, except for MXFP8 which has
+            # its own row-wise and col-wise (transpose) buffer management
+            # schedule that is natively managed by Megatron-FSDP.
+            keep_fp8_transpose_cache=True,
+            # Required for FP8 parameters. The optimizer state (and gradients)
+            # are never quantized, as TE produces high-precision wgrad and
+            # dgrad from FP8 weights and activations. Already defaults to True.
+            preserve_fp32_weights=True,
+        )
+
+        # Initialize the distributed optimizer on the MegatronFSDP model.
+        toy_adam = Adam(params=mfsdp_model.parameters(), lr=0.01)
+        optimizer = fully_shard_optimizer(optimizer=toy_adam)
+
+        # Mock input and target. Requires 2^N batch size for (MX)FP8 kernels.
+        toy_input = torch.randn(16, 64, 64, dtype=torch.bfloat16).to("cuda")
+        toy_target = torch.randn(16, 64, 64, dtype=torch.bfloat16).to("cuda")
+
+        for step in range(NUM_STEPS):
+
+            # Forward pass.
+            with (
+                te.pytorch.autocast(recipe=te_quant_recipe)
+                if te_quant_recipe is not None
+                else nullcontext()
+            ):
+                output = mfsdp_model(toy_input)
+
+            # Loss.
+            loss = mse_loss(output, toy_target)
+
+            # Backward pass.
+            loss.backward()
+
+            # Optimizer step.
+            optimizer.step()
+            optimizer.zero_grad()
diff --git a/tests/unit_tests/distributed/test_grad_sync_with_expert_parallel.py b/tests/unit_tests/distributed/test_grad_sync_with_expert_parallel.py
index 71e45f9d92e..e83f7142284 100644
--- a/tests/unit_tests/distributed/test_grad_sync_with_expert_parallel.py
+++ b/tests/unit_tests/distributed/test_grad_sync_with_expert_parallel.py
@@ -1,3 +1,5 @@
+# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.
+
 import contextlib
 from typing import Optional
 
@@ -169,15 +171,20 @@ def test_grad_sync(
         )
         != 0
     ):
-        # With above conditions, the data in param_and_grad_buffer.grad_data[0] equals to 1/data_parallel_word_size
-        # When average_in_collective=False, the grad data is always first scaled by 1/data_parallel_word_size and then summed by AR/RS
-        # when use_distributed_optimizer=True, only for rank=0 param_and_grad_buffer.grad_data[0] is updated, for other ranks
-        # another shard of grad_data is updated while param_and_grad_buffer.grad_data[0] is unchanged (=1/data_parallel_word_size)
+        # With above conditions, the data in param_and_grad_buffer.grad_data[0] equals
+        # 1/data_parallel_word_size.
+        # When average_in_collective=False, the grad data is always first scaled by
+        # 1/data_parallel_word_size and then summed by AR/RS.
+        # When use_distributed_optimizer=True, only for rank=0,
+        # param_and_grad_buffer.grad_data[0] is updated. For other ranks another shard of
+        # grad_data is updated while param_and_grad_buffer.grad_data[0] is unchanged
+        # (=1/data_parallel_word_size).
         non_ep_expected_grad_data_value_after_collective /= (
             parallel_state.get_data_parallel_world_size()
         )
     if ep_size > 1:
-        # For MoE models with exper parallelism, each expert will receive tokens from EPxETP times batches, such that the expert gradient will be EPxETP times after backward,
+        # For MoE models with exper parallelism, each expert will receive tokens from EPxETP
+        # times batches, such that the expert gradient will be EPxETP times after backward,
         # and the expected gradient after collective should be 1.0 as same as dense params.
         ep_param_and_grad_buffer.grad_data.data.fill_(float(ep_size * etp_size))
         ep_expected_grad_data_value_after_collective = 1
@@ -186,14 +193,30 @@ def test_grad_sync(
             and (not average_in_collective)
             and parallel_state.get_expert_data_parallel_rank(partial_expert_data_parallel=True) != 0
         ):
-            # With above conditions, the data in param_and_grad_buffer.grad_data[0] equals to 1/EDP
-            # When average_in_collective=False, the grad data is always first scaled by expert_data_parallel_size and then summed by AR/RS
-            # after SUM collective in expert_data_group, the scale will be 1.0.
+            # With above conditions, the data in param_and_grad_buffer.grad_data[0] equals 1/EDP.
+            # When average_in_collective=False, the grad data is always first scaled by
+            # expert_data_parallel_size and then summed by AR/RS.
+            # After SUM collective in expert_data_group, the scale will be 1.0.
             ep_expected_grad_data_value_after_collective /= (
                 parallel_state.get_expert_data_parallel_world_size()
             )
 
+    register_grad_sync_context = (
+        contextlib.nullcontext() if overlap_grad_reduce else pytest.raises(AssertionError)
+    )
+
+    # Call register_grad_ready for all params before starting test to seed tracking
+    # data structures.
     params = list(model.parameters())
+    for param in params:
+        with register_grad_sync_context:
+            bucket_group = param_to_bucket_group[param]
+            bucket_group.register_grad_ready(param)
+    # Call reset to set .is_first_batch to False.
+    for param in params:
+        bucket_group = param_to_bucket_group[param]
+        bucket_group.reset()
+
     map_bucket_to_last_param_idx = {}
     for i, param in enumerate(params):
         if not (param in param_to_bucket_group):
@@ -206,9 +229,6 @@ def test_grad_sync(
             param_idx = 0
         map_bucket_to_last_param_idx[bucket_group] = param_idx
 
-        register_grad_sync_context = (
-            contextlib.nullcontext() if overlap_grad_reduce else pytest.raises(AssertionError)
-        )
         finish_grad_sync_context = contextlib.nullcontext()
         if (
             param_idx < (len(bucket_group.params) - 1)
@@ -220,6 +240,7 @@ def test_grad_sync(
 
         with register_grad_sync_context:
             bucket_group.register_grad_ready(param)
+
         with finish_grad_sync_context:
             # When overlap_grad_reduce is True, this should throw an assertion error until all
             # params in the model have registered their grad above.
diff --git a/tests/unit_tests/distributed/test_param_and_grad_buffer.py b/tests/unit_tests/distributed/test_param_and_grad_buffer.py
index c09e2313d8d..ac0c6a6c422 100644
--- a/tests/unit_tests/distributed/test_param_and_grad_buffer.py
+++ b/tests/unit_tests/distributed/test_param_and_grad_buffer.py
@@ -1,3 +1,5 @@
+# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.
+
 import contextlib
 import math
 from typing import Optional
@@ -164,7 +166,6 @@ def _pad_param_if_needed(numel_unpadded):
 @pytest.mark.parametrize("overlap_grad_reduce", [False, True])
 @pytest.mark.parametrize("average_in_collective", [False, True])
 @pytest.mark.parametrize("num_distributed_optimizer_instances", [1, 2])
-# @pytest.mark.flaky
 def test_grad_sync(
     use_distributed_optimizer: bool,
     overlap_grad_reduce: bool,
@@ -201,10 +202,12 @@ def test_grad_sync(
 
     param_and_grad_buffer.grad_data.data.fill_(1.0)
     expected_grad_data_value_after_collective = 1
-    # under the following conditions, the data in param_and_grad_buffer.grad_data[0] equals to 1/DP
-    # this is because when average_in_collective=False, the grad data is always first scaled by 1/DP and then summed by AR/RS
-    # and when use_distributed_optimizer=True, only for rank=0 param_and_grad_buffer.grad_data[0] is updated, for other ranks
-    # another shard of grad_data is updated while param_and_grad_buffer.grad_data[0] is unchanged (=1/DP)
+    # Data in param_and_grad_buffer.grad_data[0] is 1/DP.
+    # When average_in_collective=False, the grad data is always first scaled by 1/DP and then
+    # summed by AR/RS.
+    # When use_distributed_optimizer=True, only rank0's param_and_grad_buffer.grad_data[0] is
+    # updated; other ranks update another shard of grad_data while keeping
+    # param_and_grad_buffer.grad_data[0] unchanged (=1/DP).
     if (
         use_distributed_optimizer
         and (not average_in_collective)
@@ -215,13 +218,25 @@ def test_grad_sync(
     ):
         expected_grad_data_value_after_collective /= parallel_state.get_data_parallel_world_size()
 
+    register_grad_sync_context = (
+        contextlib.nullcontext() if overlap_grad_reduce else pytest.raises(AssertionError)
+    )
+
+    # Call register_grad_ready for all params before starting test to seed tracking
+    # data structures.
     params = list(model.parameters())
+    for param in params:
+        with register_grad_sync_context:
+            bucket_group = param_to_bucket_group[param]
+            bucket_group.register_grad_ready(param)
+    # Call reset to set .is_first_batch to False.
+    for param in params:
+        bucket_group = param_to_bucket_group[param]
+        bucket_group.reset()
+
     for i, param in enumerate(params):
         assert param in param_to_bucket_group
         bucket_group = param_to_bucket_group[param]
-        register_grad_sync_context = (
-            contextlib.nullcontext() if overlap_grad_reduce else pytest.raises(AssertionError)
-        )
         finish_grad_sync_context = contextlib.nullcontext()
         if (
             i < (len(params) - 1)
@@ -233,6 +248,7 @@ def test_grad_sync(
 
         with register_grad_sync_context:
             bucket_group.register_grad_ready(param)
+
         with finish_grad_sync_context:
             # When overlap_grad_reduce is True, this should throw an assertion error until all
             # params in the model have registered their grad above.
diff --git a/tests/unit_tests/inference/contexts/attention_metadata/test_tensor_ops.py b/tests/unit_tests/inference/contexts/attention_metadata/test_tensor_ops.py
new file mode 100644
index 00000000000..a44f0c0d155
--- /dev/null
+++ b/tests/unit_tests/inference/contexts/attention_metadata/test_tensor_ops.py
@@ -0,0 +1,302 @@
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+import pytest
+import torch
+
+from megatron.core.inference.contexts.attention_context.triton.tensor_ops import (
+    tensor_get_slice_after,
+    tensor_masked_update,
+    tensor_merge,
+)
+
+
+def tensor_get_slice_after_pytorch(
+    input_tensor: torch.Tensor, output_tensor: torch.Tensor, pos_on_device: torch.Tensor
+) -> None:
+    """Reference PyTorch implementation of tensor_get_slice_after."""
+
+    assert input_tensor.ndim == output_tensor.ndim, "Rank mismatch"
+    for i in range(1, input_tensor.ndim):
+        assert input_tensor.shape[i] == output_tensor.shape[i], f"Dimension {i} must match"
+
+    pos = pos_on_device[0].item()
+    assert 0 <= pos <= input_tensor.shape[0]
+
+    copy_size = min(input_tensor.shape[0] - pos, output_tensor.shape[0])
+    if copy_size > 0:
+        output_tensor[:copy_size].copy_(input_tensor[pos : pos + copy_size])
+
+
+def tensor_merge_pytorch(
+    tensor_a: torch.Tensor,
+    tensor_b: torch.Tensor,
+    output_tensor: torch.Tensor,
+    pos_on_device: torch.Tensor,
+) -> None:
+    """Reference PyTorch implementation of tensor_merge."""
+
+    assert tensor_a.ndim == tensor_b.ndim == output_tensor.ndim, "Rank mismatch across tensors"
+    for i in range(1, tensor_a.ndim):
+        assert (
+            tensor_a.shape[i] == tensor_b.shape[i] == output_tensor.shape[i]
+        ), f"Dimension {i} must match"
+
+    pos = pos_on_device[0].item()
+    assert 0 <= pos <= tensor_a.shape[0]
+    assert output_tensor.shape[0] >= tensor_a.shape[0]
+
+    if pos > 0:
+        output_tensor[:pos].copy_(tensor_a[:pos])
+
+    copy_size = min(tensor_b.shape[0], output_tensor.shape[0] - pos)
+    if copy_size > 0:
+        output_tensor[pos : pos + copy_size].copy_(tensor_b[:copy_size])
+
+
+@pytest.fixture
+def device():
+    if not torch.cuda.is_available():
+        pytest.skip("CUDA not available")
+    return torch.device("cuda")
+
+
+@pytest.fixture
+def slice_params():
+    return {"input_batch": 16, "output_batch": 20, "feature_dim": 256}
+
+
+def test_get_slice_after_basic(device, slice_params):
+    params = slice_params
+    input_tensor = torch.randn(params["input_batch"], params["feature_dim"], device=device)
+    pos_on_device = torch.tensor([5], device=device)
+
+    output_ref = torch.zeros(params["output_batch"], params["feature_dim"], device=device)
+    output_triton = torch.zeros_like(output_ref)
+    output_ref[15:] = 123.0
+    output_triton[15:] = 123.0
+
+    tensor_get_slice_after_pytorch(input_tensor, output_ref, pos_on_device)
+    tensor_get_slice_after(input_tensor, output_triton, pos_on_device, check_bounds=True)
+
+    assert torch.equal(output_ref, output_triton)
+    assert torch.equal(
+        output_triton[: params["input_batch"] - pos_on_device[0].item()],
+        input_tensor[pos_on_device[0].item() :],
+    )
+
+
+def test_get_slice_after_pos_zero(device, slice_params):
+    params = slice_params
+    input_tensor = torch.randn(params["input_batch"], params["feature_dim"], device=device)
+    output_tensor = torch.zeros(params["output_batch"], params["feature_dim"], device=device)
+
+    tensor_get_slice_after(
+        input_tensor, output_tensor, torch.tensor([0], device=device), check_bounds=True
+    )
+
+    copy_size = min(params["input_batch"], params["output_batch"])
+    assert torch.equal(output_tensor[:copy_size], input_tensor[:copy_size])
+
+
+def test_get_slice_after_pos_full(device, slice_params):
+    params = slice_params
+    input_tensor = torch.randn(params["input_batch"], params["feature_dim"], device=device)
+    output_tensor = torch.ones(params["output_batch"], params["feature_dim"], device=device)
+    original = output_tensor.clone()
+
+    tensor_get_slice_after(
+        input_tensor,
+        output_tensor,
+        torch.tensor([params["input_batch"]], device=device),
+        check_bounds=True,
+    )
+
+    assert torch.equal(output_tensor, original)
+
+
+def test_get_slice_after_exact_fit(device):
+    input_tensor = torch.randn(8, 256, device=device)
+    output_tensor = torch.zeros(5, 256, device=device)
+
+    tensor_get_slice_after(input_tensor, output_tensor, torch.tensor([3], device=device))
+
+    assert torch.equal(output_tensor, input_tensor[3:8])
+
+
+def test_get_slice_after_nd(device):
+    input_tensor = torch.randn(6, 4, 8, device=device)
+    output_tensor = torch.zeros(10, 4, 8, device=device)
+
+    tensor_get_slice_after(
+        input_tensor, output_tensor, torch.tensor([1], device=device), check_bounds=True
+    )
+
+    assert torch.equal(output_tensor[:5], input_tensor[1:6])
+
+
+def test_get_slice_after_bounds(device, slice_params):
+    params = slice_params
+    input_tensor = torch.randn(params["input_batch"], params["feature_dim"], device=device)
+    output_tensor = torch.zeros(params["output_batch"], params["feature_dim"], device=device)
+
+    with pytest.raises(AssertionError):
+        tensor_get_slice_after(
+            input_tensor,
+            output_tensor,
+            torch.tensor([params["input_batch"] + 1], device=device),
+            check_bounds=True,
+        )
+
+
+def test_get_slice_after_consistency(device):
+    input_tensor = torch.randn(32, 128, device=device)
+    output_ref = torch.zeros(16, 128, device=device)
+    output_triton = torch.zeros_like(output_ref)
+    pos_on_device = torch.tensor([8], device=device)
+
+    tensor_get_slice_after_pytorch(input_tensor, output_ref, pos_on_device)
+    tensor_get_slice_after(input_tensor, output_triton, pos_on_device)
+
+    assert torch.equal(output_ref, output_triton)
+
+
+@pytest.fixture
+def merge_params():
+    return {"tensor_a_batch": 8, "tensor_b_batch": 12, "output_batch": 32, "feature_dim": 256}
+
+
+@pytest.mark.parametrize("in_place", [False, True])
+def test_tensor_merge_basic(device, merge_params, in_place):
+    params = merge_params
+    pos_val = 5
+    pos_on_device = torch.tensor([pos_val], device=device)
+
+    tensor_b = torch.randn(params["tensor_b_batch"], params["feature_dim"], device=device)
+
+    if in_place:
+        tensor_a = torch.randn(params["output_batch"], params["feature_dim"], device=device)
+        output_triton = tensor_a.clone()
+
+        output_ref = tensor_a.clone()
+        tensor_merge_pytorch(tensor_a, tensor_b, output_ref, pos_on_device)
+        tensor_merge(output_triton, tensor_b, pos_on_device, output_tensor=None, check_bounds=True)
+    else:
+        tensor_a = torch.randn(params["tensor_a_batch"], params["feature_dim"], device=device)
+        output_ref = torch.zeros(params["output_batch"], params["feature_dim"], device=device)
+        output_triton = torch.zeros_like(output_ref)
+
+        tensor_merge_pytorch(tensor_a, tensor_b, output_ref, pos_on_device)
+        tensor_merge(
+            tensor_a, tensor_b, pos_on_device, output_tensor=output_triton, check_bounds=True
+        )
+
+    assert torch.equal(output_ref, output_triton)
+    assert torch.equal(output_triton[:pos_val], tensor_a[:pos_val])
+    assert torch.equal(output_triton[pos_val : pos_val + params["tensor_b_batch"]], tensor_b)
+
+
+def test_tensor_merge_pos_zero(device, merge_params):
+    params = merge_params
+    tensor_a = torch.randn(params["tensor_a_batch"], params["feature_dim"], device=device)
+    tensor_b = torch.randn(params["tensor_b_batch"], params["feature_dim"], device=device)
+    output_tensor = torch.zeros(params["output_batch"], params["feature_dim"], device=device)
+
+    tensor_merge(
+        tensor_a,
+        tensor_b,
+        torch.tensor([0], device=device),
+        output_tensor=output_tensor,
+        check_bounds=True,
+    )
+
+    assert torch.equal(output_tensor[: params["tensor_b_batch"]], tensor_b)
+
+
+def test_tensor_merge_pos_full(device, merge_params):
+    params = merge_params
+    tensor_a = torch.randn(params["tensor_a_batch"], params["feature_dim"], device=device)
+    tensor_b = torch.randn(params["tensor_b_batch"], params["feature_dim"], device=device)
+    output_tensor = torch.zeros(params["output_batch"], params["feature_dim"], device=device)
+
+    tensor_merge(
+        tensor_a,
+        tensor_b,
+        torch.tensor([params["tensor_a_batch"]], device=device),
+        output_tensor=output_tensor,
+        check_bounds=True,
+    )
+
+    assert torch.equal(output_tensor[: params["tensor_a_batch"]], tensor_a)
+    assert torch.equal(
+        output_tensor[
+            params["tensor_a_batch"] : params["tensor_a_batch"] + params["tensor_b_batch"]
+        ],
+        tensor_b,
+    )
+
+
+def test_tensor_merge_small(device):
+    tensor_a = torch.randn(3, 256, device=device)
+    tensor_b = torch.randn(5, 256, device=device)
+    output_tensor = torch.zeros(10, 256, device=device)
+
+    tensor_merge(tensor_a, tensor_b, torch.tensor([2], device=device), output_tensor=output_tensor)
+
+    assert torch.equal(output_tensor[:2], tensor_a[:2])
+    assert torch.equal(output_tensor[2:7], tensor_b)
+
+
+@pytest.mark.parametrize("ndim", [2, 3, 4])
+def test_tensor_masked_update(device, ndim):
+    """
+    Tests tensor_masked_update for 2D, 3D, and 4D tensors.
+    Covering 3 scenarios:
+    1. idx has only valid values (arbitrary order).
+    2. idx has mixed valid values and -1s (all -1s at the end).
+    3. idx has all -1s.
+    """
+
+    num_states = 32
+    batch_size = 8
+
+    # Define shapes based on dimensionality
+    if ndim == 2:
+        shape_states = (num_states, 64)
+        shape_new = (batch_size, 64)
+    elif ndim == 3:
+        shape_states = (num_states, 8, 8)
+        shape_new = (batch_size, 8, 8)
+    elif ndim == 4:
+        shape_states = (num_states, 4, 4, 4)
+        shape_new = (batch_size, 4, 4, 4)
+
+    def allocate_tensors():
+        states = torch.randn(shape_states, device=device)
+        new_states = torch.randn(shape_new, device=device)
+        return states, new_states
+
+    # Scenario 1: no -1s
+    states, new_states = allocate_tensors()
+    idx = torch.randperm(num_states, device=device)[:batch_size]
+    expected_states = states.clone()
+    expected_states[idx] = new_states
+    tensor_masked_update(states, idx, new_states)
+    assert torch.equal(states, expected_states), f"Failed {ndim}D: all valid idx values"
+
+    # Scenario 2: mix of regular values and -1s
+    states, new_states = allocate_tensors()
+    num_valid = batch_size // 2
+    valid_indices = torch.randperm(num_states, device=device)[:num_valid]
+    idx = torch.full((batch_size,), -1, dtype=torch.long, device=device)
+    idx[:num_valid] = valid_indices
+    expected_states = states.clone()
+    expected_states[valid_indices] = new_states[:num_valid]
+    tensor_masked_update(states, idx, new_states)
+    assert torch.equal(states, expected_states), f"Failed {ndim}D: mix of valid and mask values"
+
+    # Scenario 3: all -1s
+    states, new_states = allocate_tensors()
+    idx = torch.full((batch_size,), -1, dtype=torch.long, device=device)
+    expected_states = states.clone()
+    tensor_masked_update(states, idx, new_states)
+    assert torch.equal(states, expected_states), f"Failed {ndim}D: all mask values"
diff --git a/tests/unit_tests/inference/contexts/test_dynamic_context.py b/tests/unit_tests/inference/contexts/test_dynamic_context.py
index 2da334191a0..05e0306bfd8 100644
--- a/tests/unit_tests/inference/contexts/test_dynamic_context.py
+++ b/tests/unit_tests/inference/contexts/test_dynamic_context.py
@@ -5,6 +5,7 @@
 import pytest
 import torch
 
+from megatron.core import parallel_state
 from megatron.core.inference.contexts.attention_context.mamba_metadata import (
     MambaInferenceStateConfig,
 )
@@ -52,6 +53,7 @@ def _get_dynamic_context(
         is_hybrid_model=False,
         layer_type_list=None,
         rounder=64,
+        paused_buffer_size_gb=None,
     ):
         set_rounder(rounder)
 
@@ -73,8 +75,11 @@ def _get_dynamic_context(
             num_attention_heads=num_attention_heads,
             max_sequence_length=max_sequence_length,
             num_cuda_graphs=None,
-            use_cuda_graphs_for_non_decode_steps=not is_hybrid_model,
+            use_cuda_graphs_for_non_decode_steps=True,
             buffer_size_gb=buffer_size_gb,
+            paused_buffer_size_gb=(
+                0.2 * buffer_size_gb if paused_buffer_size_gb is None else paused_buffer_size_gb
+            ),
             block_size_tokens=block_size_tokens,
             max_tokens=max_tokens,
             mamba_inference_state_config=mamba_inference_state_config,
@@ -107,18 +112,16 @@ def test_initialize_dynamic_context(self, is_hybrid_model: bool):
 
         if not is_hybrid_model:
             assert dynamic_context.block_allocator.total_count == 491
-            assert dynamic_context.block_allocator.active_count == 245
-            assert dynamic_context.max_total_requests == 490
-            # We make max_active_requests divisible by the REQUEST_ROUNDER.
-            assert dynamic_context.max_active_requests == 192
+            assert dynamic_context.block_allocator.active_count == 392
+            # We make max_requests divisible by the REQUEST_ROUNDER.
+            assert dynamic_context.max_requests == 448
             assert dynamic_context.max_tokens == 16384
             assert dynamic_context.num_mamba_layers == 0
             assert dynamic_context.mamba_metadata is None
         else:
-            assert dynamic_context.block_allocator.total_count == 555
-            assert dynamic_context.block_allocator.active_count == 277
-            assert dynamic_context.max_total_requests == 554
-            assert dynamic_context.max_active_requests == 256
+            assert dynamic_context.block_allocator.total_count == 556
+            assert dynamic_context.block_allocator.active_count == 444
+            assert dynamic_context.max_requests == 512
             assert dynamic_context.max_tokens == 16384
             assert dynamic_context.num_mamba_layers == 1
             assert dynamic_context.mamba_metadata is not None
@@ -156,12 +159,12 @@ def test_is_memory_available(self, is_hybrid_model):
             max_tokens=None,
             is_hybrid_model=is_hybrid_model,
         )
-        dynamic_context.block_allocator.active_count = 10
+        dynamic_context.block_allocator.total_avail = 10
         assert dynamic_context.block_allocator.is_memory_available(10)
         assert not dynamic_context.block_allocator.is_memory_available(11)
 
         assert dynamic_context.block_allocator.is_memory_available(1)
-        dynamic_context.block_allocator.active_count = 0
+        dynamic_context.block_allocator.total_avail = 0
         assert not dynamic_context.block_allocator.is_memory_available(1)
 
     @pytest.mark.internal
@@ -181,9 +184,9 @@ def test_request_overflow(self, is_hybrid_model: bool):
             rounder=1,
             is_hybrid_model=is_hybrid_model,
         )
-        dynamic_context.max_active_requests //= 2
+        dynamic_context.max_requests //= 2
         with pytest.raises(RequestOverflowError):
-            for i in range(dynamic_context.max_active_requests + 1):
+            for i in range(dynamic_context.max_requests + 1):
                 dynamic_context.add_request(
                     DynamicInferenceRequest(
                         request_id=i,
@@ -207,7 +210,7 @@ def test_token_overflow_error(self, is_hybrid_model: bool):
             max_sequence_length=512,
             buffer_size_gb=0.1,
             block_size_tokens=128,
-            max_tokens=200,  # setting low, but >= context.max_active_requests.
+            max_tokens=200,  # setting low, but >= context.max_requests.
             rounder=1,
             is_hybrid_model=is_hybrid_model,
         )
@@ -287,10 +290,12 @@ def test_reset(self, is_hybrid_model: bool):
         assert torch.all(dynamic_context.token_to_position_in_request == 0)
         assert torch.all(dynamic_context.token_to_block_idx == -1)
         assert torch.all(dynamic_context.token_to_local_position_within_kv_block == 0)
-        assert (
-            dynamic_context.block_allocator.active_count
-            == dynamic_context.block_allocator.total_count // 2
-        )
+        if not is_hybrid_model:
+            assert dynamic_context.block_allocator.active_count == 819
+            assert dynamic_context.block_allocator.total_count == 1024
+        else:
+            assert dynamic_context.block_allocator.active_count == 1517
+            assert dynamic_context.block_allocator.total_count == 1897
         assert torch.all(dynamic_context.request_to_kv_block_ids == -1)
         if is_hybrid_model:
             assert torch.all(dynamic_context.mamba_metadata.request_to_mamba_state_idx == -1)
@@ -312,7 +317,7 @@ def test_allocate_and_release_memory_blocks(self, is_hybrid_model):
         )
 
         if is_hybrid_model:
-            expected_memory_blocks = [550, 551, 552, 553]
+            expected_memory_blocks = [551, 552, 553, 554]
         else:
             expected_memory_blocks = [486, 487, 488, 489]
         expected_block_count_avail = expected_memory_blocks[0]
@@ -378,7 +383,7 @@ def test_add_request(self, is_hybrid_model: bool):
         assert dynamic_context.request_kv_length_offsets[0] == 0
         assert dynamic_context.request_kv_block_counts[0] == 2
         assert dynamic_context.request_last_kv_block_id[0].item() == (
-            553 if is_hybrid_model else 489
+            554 if is_hybrid_model else 489
         )
         assert dynamic_context.request_last_kv_block_offset[0].item() == 15
         assert torch.all(
@@ -736,13 +741,13 @@ def test_update_request(self, is_hybrid_model: bool):
                 dynamic_context.request_to_kv_block_ids[0:10].cpu()
                 == torch.tensor(
                     [
-                        [543, 546, -1, -1],
-                        [544, 543, -1, -1],
-                        [548, 550, -1, -1],
+                        [544, 547, -1, -1],
+                        [545, 544, -1, -1],
                         [549, 551, -1, -1],
-                        [547, -1, -1, -1],
-                        [545, -1, -1, -1],
-                        [552, -1, -1, -1],
+                        [550, 552, -1, -1],
+                        [548, -1, -1, -1],
+                        [546, -1, -1, -1],
+                        [553, -1, -1, -1],
                         [-1, -1, -1, -1],
                         [-1, -1, -1, -1],
                         [-1, -1, -1, -1],
@@ -1198,3 +1203,51 @@ def test_calculate_and_store_log_probs(self):
                 )
 
                 current_global_token_offset += expected_len
+
+    @pytest.mark.internal
+    def test_pipeline_parallel_uneven_layers(self):
+        """
+        Test that DynamicInferenceContext synchronizes the total block count across
+        pipeline stages when they have unequal layer counts.
+        """
+        pp_size = 2
+        self._setup_model_parallel_group(tensor_parallel_size=1, pipeline_parallel_size=pp_size)
+
+        rank = parallel_state.get_pipeline_model_parallel_rank()
+
+        if rank == 0:
+            local_num_layers = 12
+        else:
+            local_num_layers = 4
+
+        context = DynamicInferenceContext(
+            params_dtype=torch.float32,
+            num_layers=local_num_layers,
+            kv_channels=64,
+            num_attention_heads=8,
+            max_sequence_length=128,
+            buffer_size_gb=0.1,
+            block_size_tokens=16,
+            max_tokens=1024,
+            pipeline_model_parallel_size=pp_size,
+            tensor_model_parallel_size=1,
+            unified_memory_level=0,
+        )
+
+        # Collect the total block counts on each rank
+        local_total_blocks = torch.tensor(
+            [context.block_allocator.total_count], device='cuda', dtype=torch.long
+        )
+        gathered_block_counts = [torch.zeros_like(local_total_blocks) for _ in range(pp_size)]
+        torch.distributed.all_gather(
+            gathered_block_counts,
+            local_total_blocks,
+            group=parallel_state.get_pipeline_model_parallel_group(),
+        )
+        all_counts = [t.item() for t in gathered_block_counts]
+
+        # Verify that there is only 1 unique value across all ranks
+        unique_counts = set(all_counts)
+        assert (
+            len(unique_counts) == 1
+        ), f"Block counts were not synchronized across ranks. Gathered: {all_counts}"
diff --git a/tests/unit_tests/inference/engines/test_dynamic_engine.py b/tests/unit_tests/inference/engines/test_dynamic_engine.py
index 21f6d94dd1a..d5803b3638e 100644
--- a/tests/unit_tests/inference/engines/test_dynamic_engine.py
+++ b/tests/unit_tests/inference/engines/test_dynamic_engine.py
@@ -43,12 +43,12 @@
 from megatron.core.models.gpt.gpt_model import GPTModel
 from megatron.core.models.mamba.mamba_layer_specs import mamba_stack_spec
 from megatron.core.models.mamba.mamba_model import MambaModel
+from megatron.core.ssm.mamba_mixer import _check_mamba_sequence_packing_support
 from megatron.core.tensor_parallel.random import model_parallel_cuda_manual_seed
 from megatron.core.transformer.cuda_graphs import CudaGraphManager, _CudagraphGlobalRecord
 from megatron.core.transformer.enums import CudaGraphScope
 from megatron.core.transformer.transformer_config import TransformerConfig
 from megatron.core.utils import (
-    check_mamba_sequence_packing_support,
     get_mamba_inference_state_config_from_model,
     is_fa_min_version,
     is_te_min_version,
@@ -59,7 +59,7 @@
 def skip_if_mamba_sequence_packing_not_available(model_provider: str):
     if model_provider == "mamba":
         sequence_packing_available, reason_for_no_sequence_packing = (
-            check_mamba_sequence_packing_support()
+            _check_mamba_sequence_packing_support()
         )
         if not sequence_packing_available:
             pytest.skip(reason_for_no_sequence_packing)
@@ -90,6 +90,7 @@ class DynamicEngineTestConfig:
     num_gap_steps: int = 2
 
     context_buffer_size_gb: float = 0.1  # enough room for all tokens.
+    context_paused_buffer_size_gb: float | None = None
     context_block_size_tokens: int = 256
     context_max_requests: Optional[int] = None
     context_max_tokens: Optional[int] = None
@@ -106,6 +107,7 @@ class DynamicEngineTestConfig:
     return_log_probs: bool = False
     materialize_only_last_token_logits: bool = True
     skip_prompt_log_probs: bool = False
+    enable_chunked_prefill: bool = False
     cuda_graph_scope: List[CudaGraphScope] = field(
         default_factory=lambda: [CudaGraphScope.full_iteration]
     )
@@ -132,6 +134,10 @@ def __post_init__(self):
             assert self.num_tokens_total is not None
             self.max_sequence_length = self.num_tokens_total
 
+        # Default paused buffer size.
+        if self.context_paused_buffer_size_gb is None:
+            self.context_paused_buffer_size_gb = 0.2 * self.context_buffer_size_gb
+
 
 @dataclass
 class DynamicEngineTestEnv:
@@ -224,12 +230,14 @@ def _build_inference_context(
             num_attention_heads=transformer_config.num_query_groups,
             max_sequence_length=test_config.max_sequence_length,
             num_cuda_graphs=test_config.num_cuda_graphs,
-            use_cuda_graphs_for_non_decode_steps=not test_config.model_provider == "mamba",
+            use_cuda_graphs_for_non_decode_steps=True,
             buffer_size_gb=test_config.context_buffer_size_gb,
+            paused_buffer_size_gb=test_config.context_paused_buffer_size_gb,
             block_size_tokens=test_config.context_block_size_tokens,
             max_requests=test_config.context_max_requests,
             max_tokens=test_config.context_max_tokens,
             tensor_model_parallel_size=transformer_config.tensor_model_parallel_size,
+            pipeline_model_parallel_size=transformer_config.pipeline_model_parallel_size,
             mamba_inference_state_config=mamba_inference_state_config,
             materialize_only_last_token_logits=test_config.materialize_only_last_token_logits,
             use_flashinfer_fused_rope=None,  # default to using flash-infer if available
@@ -421,6 +429,7 @@ def _build_test_env(cls, test_config):
             inference_context,
             random_seed=test_config.random_seed,
             enable_cuda_graph=transformer_config.cuda_graph_impl == "local",
+            enable_chunked_prefill=test_config.enable_chunked_prefill,
         )
 
         # Test env.
@@ -679,12 +688,13 @@ def test_cuda_graph_token_counts(self) -> None:
 
         # Test num_cuda_graphs.
         for num_cuda_graphs, expected_cuda_graph_token_counts in [
-            (0, [40]),
-            (1, [40]),
-            (2, [40, 24]),
-            (4, [40, 32, 16]),
-            (8, [40, 32, 24, 16, 8]),
-            (16, [40, 32, 24, 16, 8]),
+            (0, [80]),
+            (1, [80]),
+            (2, [80, 40]),
+            (4, [80, 72, 48, 24]),
+            (8, [80, 64, 48, 32, 16]),
+            (16, [80, 72, 64, 56, 48, 40, 32, 24, 16, 8]),
+            (32, [80, 72, 64, 56, 48, 40, 32, 24, 16, 8]),
         ]:
 
             # Build cuda graphs (inside dynamic engine).
@@ -1146,7 +1156,7 @@ def test_chunked_prefill(self, model_provider: str):
         num_tokens_to_generate = 16
         max_sequence_length = prompt_length + num_tokens_to_generate
 
-        # Configure context to force chunking (chunked prefill is enabled by default)
+        # Configure context to force chunking
         env = self._run_test(
             num_requests=1,
             min_prompt_length=prompt_length,
@@ -1156,6 +1166,7 @@ def test_chunked_prefill(self, model_provider: str):
             model_provider=model_provider,
             context_block_size_tokens=256,
             context_max_tokens=1000,
+            enable_chunked_prefill=True,
         )
 
     @pytest.mark.internal
@@ -1185,6 +1196,7 @@ def test_chunked_prefill_with_log_probs(self):
             model_provider="gpt",
             context_block_size_tokens=256,
             context_max_tokens=1000,
+            enable_chunked_prefill=True,
         )
 
         # Validate results
@@ -1365,13 +1377,13 @@ def test_max_requests(self, max_requests: int | None):
         step_count = env.engine.step_count
         context = env.engine.context
         if max_requests is None:
-            assert context.max_active_requests == 408
+            assert context.max_requests == 816
             assert step_count == 22
         else:
             assert max_requests < len(env.requests), (
                 f"Test is only useful if max_requests ({max_requests}) < "
                 f"num_requests ({len(env.requests)})."
             )
-            assert context.max_active_requests == 4
+            assert context.max_requests == 4
             assert step_count == 34
-        assert context.block_allocator.active_count == 409
+        assert context.block_allocator.active_count == 655
diff --git a/tests/unit_tests/inference/test_stop_words.py b/tests/unit_tests/inference/test_stop_words.py
new file mode 100644
index 00000000000..31665c0bb81
--- /dev/null
+++ b/tests/unit_tests/inference/test_stop_words.py
@@ -0,0 +1,226 @@
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+"""Unit tests for stop word functionality in dynamic inference."""
+
+from dataclasses import dataclass, field
+from typing import List, Optional
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from megatron.core.inference.sampling_params import SamplingParams
+
+
+class MockDynamicInferenceRequest:
+    """Mock class for DynamicInferenceRequest to test stop word detection."""
+
+    def __init__(
+        self,
+        request_id: int,
+        generated_tokens: Optional[List[int]] = None,
+        stop_word_ids: Optional[List[List[int]]] = None,
+        sampling_params: Optional[SamplingParams] = None,
+    ):
+        self.request_id = request_id
+        self.generated_tokens = generated_tokens if generated_tokens is not None else []
+        self.stop_word_ids = stop_word_ids
+        self.sampling_params = sampling_params or SamplingParams()
+
+
+class TestStopWordDetection:
+    """Test stop word detection logic."""
+
+    def _check_stop_words_for_request_post_append(
+        self, request: MockDynamicInferenceRequest
+    ) -> bool:
+        """
+        Check if a request should stop due to stop words (after token is appended).
+
+        This mirrors the logic in DynamicInferenceEngine._check_stop_words_for_request_post_append
+        """
+        # Check if request has stop words configured
+        if request.stop_word_ids is None or len(request.stop_word_ids) == 0:
+            return False
+
+        generated_tokens = request.generated_tokens
+
+        # Check if the sequence ends with any stop word
+        for stop_word_ids in request.stop_word_ids:
+            stop_len = len(stop_word_ids)
+            if len(generated_tokens) >= stop_len:
+                # Check if the last stop_len tokens match the stop word
+                if list(generated_tokens[-stop_len:]) == stop_word_ids:
+                    return True
+
+        return False
+
+    def test_no_stop_words_configured(self):
+        """Test that requests without stop words configured don't trigger stop."""
+        request = MockDynamicInferenceRequest(
+            request_id=1, generated_tokens=[100, 200, 300], stop_word_ids=None
+        )
+        assert self._check_stop_words_for_request_post_append(request) is False
+
+    def test_empty_stop_words_list(self):
+        """Test that empty stop words list doesn't trigger stop."""
+        request = MockDynamicInferenceRequest(
+            request_id=1, generated_tokens=[100, 200, 300], stop_word_ids=[]
+        )
+        assert self._check_stop_words_for_request_post_append(request) is False
+
+    def test_single_token_stop_word_match(self):
+        """Test detection of single-token stop word."""
+        # Stop word is token 300
+        request = MockDynamicInferenceRequest(
+            request_id=1, generated_tokens=[100, 200, 300], stop_word_ids=[[300]]
+        )
+        assert self._check_stop_words_for_request_post_append(request) is True
+
+    def test_single_token_stop_word_no_match(self):
+        """Test no detection when single-token stop word doesn't match."""
+        request = MockDynamicInferenceRequest(
+            request_id=1, generated_tokens=[100, 200, 300], stop_word_ids=[[400]]
+        )
+        assert self._check_stop_words_for_request_post_append(request) is False
+
+    def test_multi_token_stop_word_match(self):
+        """Test detection of multi-token stop word."""
+        # Stop word is tokens [200, 300]
+        request = MockDynamicInferenceRequest(
+            request_id=1, generated_tokens=[100, 200, 300], stop_word_ids=[[200, 300]]
+        )
+        assert self._check_stop_words_for_request_post_append(request) is True
+
+    def test_multi_token_stop_word_no_match_partial(self):
+        """Test no detection when only partial stop word matches."""
+        # Stop word is [200, 300], but generated ends with [100, 200]
+        request = MockDynamicInferenceRequest(
+            request_id=1, generated_tokens=[100, 200], stop_word_ids=[[200, 300]]
+        )
+        assert self._check_stop_words_for_request_post_append(request) is False
+
+    def test_multi_token_stop_word_no_match_wrong_order(self):
+        """Test no detection when tokens are present but in wrong order."""
+        # Stop word is [200, 300], but generated ends with [300, 200]
+        request = MockDynamicInferenceRequest(
+            request_id=1, generated_tokens=[100, 300, 200], stop_word_ids=[[200, 300]]
+        )
+        assert self._check_stop_words_for_request_post_append(request) is False
+
+    def test_multiple_stop_words_first_matches(self):
+        """Test with multiple stop words where first one matches."""
+        request = MockDynamicInferenceRequest(
+            request_id=1, generated_tokens=[100, 200, 300], stop_word_ids=[[300], [400], [500]]
+        )
+        assert self._check_stop_words_for_request_post_append(request) is True
+
+    def test_multiple_stop_words_second_matches(self):
+        """Test with multiple stop words where second one matches."""
+        request = MockDynamicInferenceRequest(
+            request_id=1, generated_tokens=[100, 200, 400], stop_word_ids=[[300], [400], [500]]
+        )
+        assert self._check_stop_words_for_request_post_append(request) is True
+
+    def test_multiple_stop_words_none_match(self):
+        """Test with multiple stop words where none match."""
+        request = MockDynamicInferenceRequest(
+            request_id=1, generated_tokens=[100, 200, 600], stop_word_ids=[[300], [400], [500]]
+        )
+        assert self._check_stop_words_for_request_post_append(request) is False
+
+    def test_stop_word_longer_than_generated(self):
+        """Test that stop word longer than generated tokens doesn't crash."""
+        # Stop word is 5 tokens, but only 3 tokens generated
+        request = MockDynamicInferenceRequest(
+            request_id=1, generated_tokens=[100, 200, 300], stop_word_ids=[[1, 2, 3, 4, 5]]
+        )
+        assert self._check_stop_words_for_request_post_append(request) is False
+
+    def test_stop_word_exact_length_match(self):
+        """Test stop word that matches entire generated sequence."""
+        request = MockDynamicInferenceRequest(
+            request_id=1, generated_tokens=[100, 200, 300], stop_word_ids=[[100, 200, 300]]
+        )
+        assert self._check_stop_words_for_request_post_append(request) is True
+
+    def test_empty_generated_tokens(self):
+        """Test with no generated tokens."""
+        request = MockDynamicInferenceRequest(
+            request_id=1, generated_tokens=[], stop_word_ids=[[300]]
+        )
+        assert self._check_stop_words_for_request_post_append(request) is False
+
+    def test_stop_word_in_middle_not_end(self):
+        """Test that stop word in middle of sequence doesn't trigger (only end matters)."""
+        # Stop word is [200], which is in middle but not at end
+        request = MockDynamicInferenceRequest(
+            request_id=1, generated_tokens=[100, 200, 300], stop_word_ids=[[200]]
+        )
+        assert self._check_stop_words_for_request_post_append(request) is False
+
+
+class TestStopWordTrackingFlow:
+    """Test the stop word tracking flow between steps."""
+
+    def test_stop_word_finished_ids_tracking(self):
+        """Test that stop_word_finished_request_ids correctly tracks requests."""
+        stop_word_finished_request_ids = set()
+        stop_word_being_finished_ids = set()
+
+        # Simulate detecting stop word in post_process_requests
+        request_id = 42
+        stop_word_finished_request_ids.add(request_id)
+
+        assert request_id in stop_word_finished_request_ids
+        assert len(stop_word_finished_request_ids) == 1
+
+        # Simulate callback being called
+        active_request_ids = [42, 43, 44]
+        result = stop_word_finished_request_ids & set(active_request_ids)
+        stop_word_being_finished_ids = result
+        stop_word_finished_request_ids -= result
+
+        assert request_id in stop_word_being_finished_ids
+        assert request_id not in stop_word_finished_request_ids
+
+    def test_skip_extra_token_for_stop_word_requests(self):
+        """Test that extra token is skipped for stop word finished requests."""
+        stop_word_being_finished_ids = {42}
+        generated_tokens = {
+            42: [100, 200, 300],  # Already has tokens from previous step
+            43: [100, 200],
+        }
+
+        new_tokens = {42: 999, 43: 301}  # New tokens to potentially append
+
+        for request_id, token in new_tokens.items():
+            if request_id not in stop_word_being_finished_ids:
+                generated_tokens[request_id].append(token)
+
+        # Request 42 should NOT have the extra token
+        assert generated_tokens[42] == [100, 200, 300]
+        # Request 43 should have the new token
+        assert generated_tokens[43] == [100, 200, 301]
+
+
+class TestSamplingParamsStopWords:
+    """Test SamplingParams stop words field."""
+
+    def test_stop_words_default_none(self):
+        """Test that stop_words defaults to None."""
+        params = SamplingParams()
+        assert params.stop_words is None
+
+    def test_stop_words_can_be_set(self):
+        """Test that stop_words can be set."""
+        params = SamplingParams(stop_words=["STOP", "END"])
+        assert params.stop_words == ["STOP", "END"]
+
+    def test_stop_words_empty_list(self):
+        """Test that stop_words can be empty list."""
+        params = SamplingParams(stop_words=[])
+        assert params.stop_words == []
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/tests/unit_tests/inference/test_wandb_logging.py b/tests/unit_tests/inference/test_wandb_logging.py
index 1d5d054b80e..cab464af503 100644
--- a/tests/unit_tests/inference/test_wandb_logging.py
+++ b/tests/unit_tests/inference/test_wandb_logging.py
@@ -85,8 +85,7 @@ def test_get_kvcache_utilization_stats_with_requests(self):
         assert 'block_count_avail' in stats
         assert 'active_token_count' in stats
         assert 'total_request_count' in stats
-        assert 'max_total_requests' in stats
-        assert 'max_active_requests' in stats
+        assert 'max_requests' in stats
 
         # Verify values for empty context
         assert stats['allocated_blocks'] == 0
@@ -133,10 +132,8 @@ def test_get_kvcache_utilization_stats_with_requests(self):
         assert stats_after['total_blocks'] > 0
 
         # Verify that max_requests remains constant
-        assert stats_after['max_total_requests'] == stats['max_total_requests']
-        assert stats_after['max_total_requests'] > 0
-        assert stats_after['max_active_requests'] == stats['max_active_requests']
-        assert stats_after['max_active_requests'] > 0
+        assert stats_after['max_requests'] == stats['max_requests']
+        assert stats_after['max_requests'] > 0
 
         # Verify block availability decreased after allocation
         assert stats_after['block_count_avail'] < stats['block_count_avail']
@@ -180,8 +177,7 @@ def test_kvcache_utilization_stats_types(self):
             'block_count_avail',
             'active_token_count',
             'total_request_count',
-            'max_total_requests',
-            'max_active_requests',
+            'max_requests',
         ]
 
         for field in int_fields:
diff --git a/tests/unit_tests/inference/text_generation_controllers/test_simple_text_generation_controller.py b/tests/unit_tests/inference/text_generation_controllers/test_simple_text_generation_controller.py
index ebf558d3fa9..0885401e7a0 100644
--- a/tests/unit_tests/inference/text_generation_controllers/test_simple_text_generation_controller.py
+++ b/tests/unit_tests/inference/text_generation_controllers/test_simple_text_generation_controller.py
@@ -118,6 +118,8 @@ def setup_model(
                 num_layers=transformer_config.num_layers // pipeline_model_parallel_size,
                 kv_channels=transformer_config.kv_channels,
                 num_attention_heads=transformer_config.num_attention_heads,
+                tensor_model_parallel_size=transformer_config.tensor_model_parallel_size,
+                pipeline_model_parallel_size=transformer_config.pipeline_model_parallel_size,
                 max_sequence_length=2048,
                 buffer_size_gb=0.2,
                 materialize_only_last_token_logits=False,
diff --git a/tests/unit_tests/models/test_gpt_model.py b/tests/unit_tests/models/test_gpt_model.py
index 6936cfbe60a..cf3bd40ee4b 100644
--- a/tests/unit_tests/models/test_gpt_model.py
+++ b/tests/unit_tests/models/test_gpt_model.py
@@ -8,9 +8,13 @@
 import torch
 from packaging import version
 from pytest import approx
+from transformer_engine.pytorch.fp8 import check_fp8_support
 
 from megatron.core import parallel_state
 from megatron.core.hyper_comm_grid import HyperCommGrid
+from megatron.core.inference.contexts.dynamic_context import DynamicInferenceContext
+from megatron.core.inference.inference_request import DynamicInferenceRequest
+from megatron.core.inference.sampling_params import SamplingParams
 from megatron.core.models.gpt.gpt_layer_specs import (
     get_gpt_layer_with_transformer_engine_spec,
     get_mlp_module_spec,
@@ -18,8 +22,9 @@
 from megatron.core.models.gpt.gpt_model import GPTModel
 from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.tensor_parallel.random import model_parallel_cuda_manual_seed
+from megatron.core.transformer.module import Float16Module
 from megatron.core.transformer.transformer_config import TransformerConfig
-from megatron.core.utils import is_te_min_version
+from megatron.core.utils import is_fa_min_version, is_te_min_version
 from tests.unit_tests.test_utilities import Utils
 
 
@@ -333,3 +338,108 @@ def test_gpt_model_with_custom_pg(self, tp_size, dp_size, cp_size):
         assert logits.shape[0] == sequence_length
         assert logits.shape[1] == micro_batch_size
         assert logits.shape[2] == self.gpt_model.config.hidden_size
+
+
+class TestGPTWithDynamicInference:
+    """Tests GPTModel with dynamic inference."""
+
+    @torch.inference_mode()
+    def setup_method(self, method):
+        fp8_available, reason_for_no_fp8 = check_fp8_support()
+        if not fp8_available:
+            pytest.skip(reason_for_no_fp8)
+
+        os.environ.pop('NVTE_FUSED_ATTN', None)
+        os.environ.pop('NVTE_FLASH_ATTN', None)
+        os.environ.pop('NVTE_UNFUSED_ATTN', None)
+        Utils.initialize_model_parallel(1, 1)
+        model_parallel_cuda_manual_seed(123)
+
+        transformer_config = TransformerConfig(
+            num_layers=8,
+            hidden_size=256,
+            num_attention_heads=8,
+            use_cpu_initialization=True,
+            params_dtype=torch.bfloat16,
+            bf16=True,
+            fp8="hybrid",
+            fp8_recipe="tensorwise",
+        )
+
+        self.gpt_model = GPTModel(
+            config=transformer_config,
+            transformer_layer_spec=get_gpt_layer_with_transformer_engine_spec(),
+            vocab_size=128,
+            max_sequence_length=DynamicInferenceContext.TOKEN_ROUNDER,
+            parallel_output=True,
+        )
+        self.gpt_model = Float16Module(self.gpt_model.config, self.gpt_model)
+
+    def teardown_method(self, method):
+        Utils.destroy_model_parallel()
+
+    @pytest.mark.internal
+    @pytest.mark.skipif(
+        not is_fa_min_version("2.7.3"), reason="need latest flash attn for dynamic batching"
+    )
+    @torch.inference_mode()
+    def test_dynamic_inference_padding_with_fp8(self):
+        """
+        Tests that logits for padded tokens are zeroed out for fp8 inference.
+        """
+        self.gpt_model.cuda()
+        self.gpt_model.eval()
+        config = self.gpt_model.config
+
+        inference_context = DynamicInferenceContext(
+            params_dtype=config.params_dtype,
+            num_layers=config.num_layers,
+            kv_channels=config.hidden_size // config.num_attention_heads,
+            num_attention_heads=config.num_attention_heads,
+            max_sequence_length=self.gpt_model.module.max_sequence_length,
+            buffer_size_gb=1.0,
+            block_size_tokens=256,
+            materialize_only_last_token_logits=False,
+        )
+
+        # Add a request with 10 tokens. Since 10 is not a multiple of 64,
+        # this will create padding up to the padded length of 64.
+        active_token_count = 10
+        request = DynamicInferenceRequest(
+            request_id=0,
+            prompt_tokens=torch.arange(0, active_token_count, dtype=torch.long, device='cuda'),
+            sampling_params=SamplingParams(num_tokens_to_generate=1),
+        )
+        inference_context.add_request(request)
+
+        # Prepares the context, including calculating the padded token count.
+        inference_context.initialize_attention_state()
+
+        assert inference_context.active_token_count == active_token_count
+        assert inference_context.padded_active_token_count == DynamicInferenceContext.TOKEN_ROUNDER
+
+        # Prepare inputs for the forward pass.
+        padded_token_count = inference_context.padded_active_token_count
+        input_ids, position_ids = inference_context.current_input_and_position_ids()
+
+        # Run the forward pass with inference parameters.
+        logits = self.gpt_model.forward(
+            input_ids=input_ids,
+            position_ids=position_ids,
+            attention_mask=None,
+            inference_context=inference_context,
+            runtime_gather_output=True,
+        )
+
+        # Verify the output shape.
+        assert logits.shape[0] == 1
+        assert logits.shape[1] == padded_token_count
+        assert logits.shape[2] == self.gpt_model.module.vocab_size
+
+        # Extract the logits corresponding to the padding tokens (from index 10 to 63).
+        padding_start_idx = inference_context.active_token_count
+        padding_end_idx = inference_context.padded_active_token_count
+        padding_logits = logits[0, padding_start_idx:padding_end_idx, :]
+
+        # Assert that all padding logits are zero.
+        assert torch.all(padding_logits == 0.0), "Logits for padding tokens are not all zero."
diff --git a/tests/unit_tests/models/test_mamba_model.py b/tests/unit_tests/models/test_mamba_model.py
index ca42ae496be..9eb7b2dea9a 100644
--- a/tests/unit_tests/models/test_mamba_model.py
+++ b/tests/unit_tests/models/test_mamba_model.py
@@ -1,18 +1,32 @@
 # Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
 
+import os
 from datetime import timedelta
+from itertools import accumulate
 
 import pytest
 import torch
+from transformer_engine.pytorch.fp8 import check_fp8_support
 
 from megatron.core import parallel_state
 from megatron.core.hyper_comm_grid import HyperCommGrid
 from megatron.core.inference.contexts import BaseInferenceContext, StaticInferenceContext
+from megatron.core.inference.contexts.dynamic_context import DynamicInferenceContext
+from megatron.core.inference.inference_request import DynamicInferenceRequest
+from megatron.core.inference.sampling_params import SamplingParams
 from megatron.core.models.mamba.mamba_layer_specs import mamba_stack_spec
 from megatron.core.models.mamba.mamba_model import MambaModel
+from megatron.core.packed_seq_params import PackedSeqParams
 from megatron.core.tensor_parallel.random import model_parallel_cuda_manual_seed
 from megatron.core.transformer import TransformerConfig
-from megatron.core.utils import divide, is_torch_min_version
+from megatron.core.transformer.enums import AttnBackend
+from megatron.core.transformer.module import Float16Module
+from megatron.core.utils import (
+    divide,
+    get_mamba_inference_state_config_from_model,
+    is_fa_min_version,
+    is_torch_min_version,
+)
 from tests.unit_tests.test_utilities import Utils
 
 
@@ -62,7 +76,6 @@ def test_set_input_tensor(self):
         assert self.model.decoder.input_tensor.shape[2] == config.hidden_size
 
     def test_forward(self):
-        config: TransformerConfig = self.model.config
         sequence_length = self.model.max_sequence_length
         micro_batch_size = 2
 
@@ -83,8 +96,70 @@ def test_forward(self):
         assert logits.shape[1] == sequence_length
         assert logits.shape[2] == self.model.vocab_size
 
+    def test_forward_packed_sequence(self):
+        os.environ.pop('NVTE_FUSED_ATTN', None)
+        os.environ.pop('NVTE_FLASH_ATTN', None)
+        os.environ.pop('NVTE_UNFUSED_ATTN', None)
+        model_config = TransformerConfig(
+            num_layers=3,  # 1 Mamba layer, 1 attention layer, 1 MLP layer
+            hidden_size=256,  # The Mamba layer places several constraints on this
+            num_attention_heads=4,
+            use_cpu_initialization=True,
+            bf16=True,  # Needed for backend=flash
+            params_dtype=torch.bfloat16,  # Needed for backend=flash
+            attention_backend=AttnBackend.flash,  # Needed for packed sequence
+        )
+        vocab_size = 100
+        model = MambaModel(
+            config=model_config,
+            mamba_stack_spec=mamba_stack_spec,
+            vocab_size=vocab_size,
+            max_sequence_length=12,
+            hybrid_attention_ratio=0.3,
+            hybrid_mlp_ratio=0.3,
+        )
+
+        sequence_length = model.max_sequence_length
+        micro_batch_size = 1  # must be 1 for packed sequence
+
+        model.cuda()
+
+        data = [i % vocab_size for i in range(sequence_length)]
+        input_ids = torch.tensor(data, dtype=torch.int64).repeat((micro_batch_size, 1)).cuda()
+        lengths = [4, 3, 5]
+        assert sum(lengths) == sequence_length
+        positions = [i for n in lengths for i in range(n)]
+        position_ids = (
+            torch.tensor(positions, dtype=torch.int64).repeat((micro_batch_size, 1)).cuda()
+        )
+        attention_mask = None
+
+        cumsum = [0] + list(accumulate(lengths))
+        cu_seqlens = torch.tensor(cumsum, dtype=torch.int32).cuda()
+        max_seqlen = max(lengths)
+
+        packed_seq_params = PackedSeqParams(
+            qkv_format="thd",
+            cu_seqlens_q=cu_seqlens,
+            cu_seqlens_kv=cu_seqlens,
+            cu_seqlens_q_padded=None,
+            cu_seqlens_kv_padded=None,
+            max_seqlen_q=max_seqlen,
+            max_seqlen_kv=max_seqlen,
+        )
+
+        logits = model.forward(
+            input_ids=input_ids,
+            position_ids=position_ids,
+            attention_mask=attention_mask,
+            packed_seq_params=packed_seq_params,
+        )
+
+        assert logits.shape[0] == micro_batch_size
+        assert logits.shape[1] == sequence_length
+        assert logits.shape[2] == model.vocab_size
+
     def test_inference(self):
-        config: TransformerConfig = self.model.config
         micro_batch_size = 2
         inference_context: BaseInferenceContext = StaticInferenceContext(
             max_batch_size=micro_batch_size, max_sequence_length=self.model.max_sequence_length
@@ -218,3 +293,111 @@ def test_with_custom_process_groups(self, tmp_path, tp_size, cp_size, pp_size):
         assert logits.shape[0] == micro_batch_size
         assert logits.shape[1] == sequence_length
         assert logits.shape[2] == divide(model.vocab_size, tp_size)
+
+
+class TestMambaWithDynamicInference:
+    """Tests MambaModel with dynamic inference."""
+
+    @torch.inference_mode()
+    def setup_method(self, method):
+        fp8_available, reason_for_no_fp8 = check_fp8_support()
+        if not fp8_available:
+            pytest.skip(reason_for_no_fp8)
+
+        Utils.initialize_model_parallel(1, 1)
+        model_parallel_cuda_manual_seed(123)
+
+        model_config = TransformerConfig(
+            num_layers=2,
+            hidden_size=512,
+            num_attention_heads=4,
+            use_cpu_initialization=True,
+            params_dtype=torch.bfloat16,
+            bf16=True,
+            fp8="hybrid",
+            fp8_recipe="tensorwise",
+        )
+
+        self.model = MambaModel(
+            config=model_config,
+            mamba_stack_spec=mamba_stack_spec,
+            vocab_size=128,
+            max_sequence_length=DynamicInferenceContext.TOKEN_ROUNDER,
+            hybrid_attention_ratio=0.5,
+            hybrid_mlp_ratio=0.0,
+        )
+        self.model = Float16Module(self.model.config, self.model)
+
+    def teardown_method(self, method):
+        Utils.destroy_model_parallel()
+
+    @pytest.mark.internal
+    @pytest.mark.skipif(
+        not is_fa_min_version("2.7.3"), reason="need latest flash attn for dynamic batching"
+    )
+    @torch.inference_mode()
+    def test_dynamic_inference_padding_with_fp8(self):
+        """
+        Tests that logits for padded tokens are zeroed out for fp8 inference.
+        """
+        self.model.cuda()
+        self.model.eval()
+        config = self.model.config
+
+        mamba_inference_state_config = get_mamba_inference_state_config_from_model(
+            self.model.module
+        )
+
+        inference_context = DynamicInferenceContext(
+            params_dtype=config.params_dtype,
+            num_layers=config.num_layers,
+            kv_channels=config.hidden_size // config.num_attention_heads,
+            num_attention_heads=config.num_attention_heads,
+            max_sequence_length=self.model.module.max_sequence_length,
+            buffer_size_gb=1.0,
+            block_size_tokens=256,
+            materialize_only_last_token_logits=False,
+            mamba_inference_state_config=mamba_inference_state_config,
+        )
+
+        # Add a request with 10 tokens. Since 10 is not a multiple of 64 (TOKEN_ROUNDER),
+        # this will create padding up to the padded length of 64.
+        active_token_count = 10
+        request = DynamicInferenceRequest(
+            request_id=0,
+            prompt_tokens=torch.arange(0, active_token_count, dtype=torch.long, device='cuda'),
+            sampling_params=SamplingParams(num_tokens_to_generate=1),
+        )
+        inference_context.add_request(request)
+
+        # Prepares the context, including calculating the padded token count.
+        inference_context.initialize_attention_state()
+
+        assert inference_context.active_token_count == active_token_count
+        assert inference_context.padded_active_token_count == DynamicInferenceContext.TOKEN_ROUNDER
+
+        # Prepare inputs for the forward pass.
+        padded_token_count = inference_context.padded_active_token_count
+        input_ids, position_ids = inference_context.current_input_and_position_ids()
+
+        # Run the forward pass with inference parameters.
+        logits = self.model.forward(
+            input_ids=input_ids,
+            position_ids=position_ids,
+            attention_mask=None,
+            inference_context=inference_context,
+            runtime_gather_output=True,
+        )
+
+        # Verify the output shape.
+        assert logits.shape[0] == 1
+        assert logits.shape[1] == padded_token_count
+        assert logits.shape[2] == self.model.module.vocab_size
+
+        # Extract the logits corresponding to the padding tokens (from index 10 to 63).
+        padding_start_idx = inference_context.active_token_count
+        padding_end_idx = inference_context.padded_active_token_count
+        padding_logits = logits[0, padding_start_idx:padding_end_idx, :]
+
+        # Assert that all padding logits are zero.
+        assert torch.all(padding_logits == 0.0), "Logits for padding tokens are not all zero."
diff --git a/tests/unit_tests/models/test_mamba_moe_model.py b/tests/unit_tests/models/test_mamba_moe_model.py
new file mode 100644
index 00000000000..f21cfffe4ba
--- /dev/null
+++ b/tests/unit_tests/models/test_mamba_moe_model.py
@@ -0,0 +1,572 @@
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+
+import hashlib
+import inspect
+import json
+import os
+import sys
+from typing import Any, Dict, Mapping, Tuple
+
+import pytest  # type: ignore[import]
+import torch
+
+from megatron.core.models.mamba.mamba_layer_specs import mamba_stack_spec
+from megatron.core.models.mamba.mamba_model import MambaModel
+from megatron.core.num_microbatches_calculator import destroy_num_microbatches_calculator
+from megatron.core.tensor_parallel.random import model_parallel_cuda_manual_seed
+from megatron.core.transformer import TransformerConfig
+from megatron.core.transformer.enums import AttnBackend
+from megatron.training.arguments import core_transformer_config_from_args, parse_args, validate_args
+from megatron.training.global_vars import (
+    destroy_global_vars,
+    get_args,
+    set_args,
+    set_global_variables,
+)
+from tests.unit_tests.test_utilities import Utils
+
+GOLDEN_CONFIG: Dict[str, Any] = {
+    "_cpu_offloading_context": None,
+    "account_for_embedding_in_pipeline_split": False,
+    "account_for_loss_in_pipeline_split": False,
+    "activation_func": "megatron.core.activations.squared_relu",
+    "activation_func_clamp_value": None,
+    "activation_func_fp8_input_store": False,
+    "add_bias_linear": False,
+    "add_qkv_bias": False,
+    "apply_query_key_layer_scaling": False,
+    "apply_residual_connection_post_layernorm": False,
+    "apply_rope_fusion": False,
+    "async_tensor_model_parallel_allreduce": True,
+    "attention_backend": {
+        "__objclass__": "megatron.core.transformer.enums.AttnBackend",
+        "_name_": "flash",
+        "_sort_order_": 0,
+        "_value_": 1,
+    },
+    "attention_dropout": 0.0,
+    "attention_output_gate": False,
+    "attention_softmax_in_fp32": False,
+    "autocast_dtype": "torch.bfloat16",
+    "barrier_with_L1_time": True,
+    "batch_invariant_mode": False,
+    "batch_p2p_comm": True,
+    "batch_p2p_sync": True,
+    "bf16": True,
+    "bias_activation_fusion": False,
+    "bias_dropout_fusion": True,
+    "calculate_per_token_loss": False,
+    "clone_scatter_output_in_embedding": True,
+    "config_logger_dir": "",
+    "context_parallel_size": 1,
+    "cp_comm_type": "p2p",
+    "cpu_offloading": False,
+    "cpu_offloading_activations": True,
+    "cpu_offloading_double_buffering": False,
+    "cpu_offloading_num_layers": 0,
+    "cpu_offloading_weights": False,
+    "cross_entropy_fusion_impl": "native",
+    "cross_entropy_loss_fusion": True,
+    "cuda_graph_impl": "none",
+    "cuda_graph_retain_backward_graph": False,
+    "cuda_graph_scope": [],
+    "cuda_graph_use_single_mempool": False,
+    "cuda_graph_warmup_steps": 3,
+    "deallocate_pipeline_outputs": True,
+    "defer_embedding_wgrad_compute": False,
+    "delay_wgrad_compute": False,
+    "deterministic_mode": False,
+    "disable_bf16_reduced_precision_matmul": False,
+    "disable_parameter_transpose_cache": False,
+    "distribute_saved_activations": False,
+    "dsa_indexer_head_dim": None,
+    "dsa_indexer_loss_coeff": None,
+    "dsa_indexer_n_heads": None,
+    "dsa_indexer_topk": None,
+    "dsa_indexer_use_sparse_loss": False,
+    "embedding_init_method": {},
+    "embedding_init_method_std": 0.014,
+    "enable_autocast": False,
+    "enable_cuda_graph": False,
+    "ep_overlap_early_attn_memory_release": False,
+    "experimental_attention_variant": None,
+    "expert_model_parallel_size": 4,
+    "expert_tensor_parallel_size": 1,
+    "external_cuda_graph": False,
+    "ffn_hidden_size": 1856,
+    "finalize_model_grads_func": None,
+    "first_last_layers_bf16": False,
+    "flash_decode": False,
+    "fp16": False,
+    "fp32_residual_connection": False,
+    "fp4": None,
+    "fp4_param": False,
+    "fp4_quantizer_factory": None,
+    "fp4_recipe": "nvfp4",
+    "fp8": None,
+    "fp8_amax_compute_algo": "most_recent",
+    "fp8_amax_history_len": 1,
+    "fp8_dot_product_attention": False,
+    "fp8_interval": 1,
+    "fp8_margin": 0,
+    "fp8_multi_head_attention": False,
+    "fp8_param": False,
+    "fp8_quantizer_factory": None,
+    "fp8_recipe": "delayed",
+    "fp8_wgrad": True,
+    "fused_single_qkv_rope": False,
+    "gated_linear_unit": False,
+    "glu_linear_offset": 0.0,
+    "grad_scale_func": None,
+    "grad_sync_func": None,
+    "gradient_accumulation_fusion": True,
+    "hetereogenous_dist_checkpoint": False,
+    "heterogeneous_block_specs": False,
+    "hidden_dropout": 0.0,
+    "hidden_size": 2688,
+    "hierarchical_context_parallel_sizes": None,
+    "inference_fuse_tp_communication": False,
+    "inference_rng_tracker": False,
+    "inference_sampling_seed": 42,
+    "init_method": {},
+    "init_method_std": 0.014,
+    "init_model_with_meta_device": False,
+    "is_hybrid_model": True,
+    "kitchen_attention_backend": "sdpa",
+    "kv_channels": 128,
+    "layernorm_epsilon": 1e-05,
+    "layernorm_zero_centered_gamma": False,
+    "linear_attention_freq": None,
+    "linear_conv_kernel_dim": 4,
+    "linear_key_head_dim": 128,
+    "linear_num_key_heads": 16,
+    "linear_num_value_heads": 32,
+    "linear_value_head_dim": 128,
+    "log_max_attention_logit": False,
+    "mamba_head_dim": 64,
+    "mamba_num_groups": 8,
+    "mamba_num_heads": 64,
+    "mamba_state_dim": 128,
+    "masked_softmax_fusion": True,
+    "memory_efficient_layer_norm": False,
+    "microbatch_group_size_per_vp_stage": 1,
+    "mlp_chunks_for_prefill": 1,
+    "moe_apply_probs_on_input": False,
+    "moe_aux_loss_coeff": 0.0,
+    "moe_deepep_num_sms": 20,
+    "moe_enable_deepep": False,
+    "moe_expert_capacity_factor": None,
+    "moe_extended_tp": False,
+    "moe_ffn_hidden_size": 1856,
+    "moe_flex_dispatcher_backend": "deepep",
+    "moe_grouped_gemm": True,
+    "moe_hybridep_num_sms": 16,
+    "moe_input_jitter_eps": None,
+    "moe_latent_size": None,
+    "moe_layer_freq": 1,
+    "moe_layer_recompute": False,
+    "moe_pad_expert_input_to_capacity": False,
+    "moe_per_layer_logging": False,
+    "moe_permute_fusion": False,
+    "moe_router_bias_update_rate": 0.001,
+    "moe_router_dtype": "fp64",
+    "moe_router_enable_expert_bias": True,
+    "moe_router_force_load_balancing": False,
+    "moe_router_fusion": False,
+    "moe_router_group_topk": None,
+    "moe_router_load_balancing_type": "aux_loss",
+    "moe_router_num_groups": None,
+    "moe_router_padding_for_fp8": False,
+    "moe_router_padding_for_quantization": False,
+    "moe_router_pre_softmax": False,
+    "moe_router_score_function": "sigmoid",
+    "moe_router_topk": 6,
+    "moe_router_topk_limited_devices": None,
+    "moe_router_topk_scaling_factor": 2.5,
+    "moe_shared_expert_gate": False,
+    "moe_shared_expert_intermediate_size": 3712,
+    "moe_shared_expert_overlap": False,
+    "moe_token_dispatcher_type": "alltoall",
+    "moe_token_drop_policy": "probs",
+    "moe_token_dropping": False,
+    "moe_use_legacy_grouped_gemm": False,
+    "moe_z_loss_coeff": None,
+    "mrope_section": None,
+    "mtp_loss_scaling_factor": 0.1,
+    "mtp_num_layers": None,
+    "mtp_standalone": False,
+    "multi_latent_attention": False,
+    "no_rope_freq": None,
+    "no_sync_func": None,
+    "normalization": "RMSNorm",
+    "num_attention_heads": 32,
+    "num_layers": 52,
+    "num_layers_at_end_in_bf16": 1,
+    "num_layers_at_start_in_bf16": 1,
+    "num_layers_in_first_pipeline_stage": None,
+    "num_layers_in_last_pipeline_stage": None,
+    "num_microbatches_with_partial_activation_checkpoints": None,
+    "num_moe_experts": 128,
+    "num_query_groups": 2,
+    "output_layer_init_method": {},
+    "overlap_moe_expert_parallel_comm": False,
+    "overlap_p2p_comm": False,
+    "overlap_p2p_comm_warmup_flush": False,
+    "param_sync_func": None,
+    "params_dtype": "torch.bfloat16",
+    "perform_initialization": True,
+    "persist_layer_norm": True,
+    "pipeline_dtype": "torch.bfloat16",
+    "pipeline_model_parallel_comm_backend": None,
+    "pipeline_model_parallel_layout": None,
+    "pipeline_model_parallel_size": 1,
+    "qk_clip": False,
+    "qk_clip_alpha": 0.5,
+    "qk_clip_threshold": 100,
+    "qk_l2_norm": False,
+    "qk_layernorm": False,
+    "quant_recipe": None,
+    "recompute_granularity": None,
+    "recompute_method": None,
+    "recompute_modules": ["core_attn"],
+    "recompute_num_layers": None,
+    "rotary_interleaved": False,
+    "sequence_parallel": True,
+    "softmax_scale": None,
+    "softmax_type": "vanilla",
+    "symmetric_ar_type": None,
+    "tensor_model_parallel_size": 2,
+    "test_mode": False,
+    "timers": None,
+    "tp_comm_atomic_ag": False,
+    "tp_comm_atomic_rs": False,
+    "tp_comm_bootstrap_backend": "nccl",
+    "tp_comm_bulk_dgrad": True,
+    "tp_comm_bulk_wgrad": True,
+    "tp_comm_overlap": False,
+    "tp_comm_overlap_ag": True,
+    "tp_comm_overlap_disable_fc1": False,
+    "tp_comm_overlap_disable_qkv": False,
+    "tp_comm_overlap_rs": True,
+    "tp_comm_overlap_rs_dgrad": False,
+    "tp_comm_split_ag": True,
+    "tp_comm_split_rs": True,
+    "tp_only_amax_red": False,
+    "transformer_impl": "transformer_engine",
+    "use_cpu_initialization": None,
+    "use_fused_weighted_squared_relu": False,
+    "use_inference_optimized_layers": False,
+    "use_kitchen": False,
+    "use_kitchen_attention": False,
+    "use_mamba_mem_eff_path": True,
+    "use_ring_exchange_p2p": False,
+    "use_te_activation_func": False,
+    "use_te_rng_tracker": False,
+    "variable_seq_lengths": False,
+    "virtual_pipeline_model_parallel_size": None,
+    "wgrad_deferral_limit": 0,
+    "window_attn_skip_freq": None,
+    "window_size": None,
+    "fine_grained_activation_offloading": False,
+    "min_offloaded_tensor_size": 1024 * 1024,
+    "offload_modules": [],
+    "hybrid_context_parallel": False,
+    "max_seqlen_per_dp_cp_rank": None,
+    "enable_routing_replay": False,
+    "fallback_to_eager_attn": False,
+    "linear_attention_type": None,
+    "moe_router_force_biased": None,
+}
+# Fields to ignore entirely (ephemeral, environment-specific, very large).
+SKIP_FIELDS = set()
+# Fields that are allowed to appear in the live config even if not yet in the golden.
+ALLOW_ADDED_FIELDS = set()
+
+
+def serialize_config(cfg: Any) -> Dict[str, Any]:
+    """Normalize a config object into a JSON-serializable dict."""
+    data = {k: v for k, v in vars(cfg).items() if k not in SKIP_FIELDS}
+    return _ser(data)
+
+
+def assert_config_matches_golden(cfg: Any) -> None:
+    """Compare live config to golden snapshot with readable diffs."""
+    current = serialize_config(cfg)
+    golden = GOLDEN_CONFIG
+
+    added, removed, changed = _diff_configs(golden, current)
+
+    # Ignore added fields that are explicitly allowed.
+    added = [k for k in added if k not in ALLOW_ADDED_FIELDS]
+
+    if added or removed or changed:
+        # Build actionable guidance for each type of drift
+        guidance_parts = []
+
+        if added:
+            guidance_parts.append(
+                f"\n\n[ADDED ARGS]: {sorted(added)}\n"
+                "  → Update GOLDEN_CONFIG in this test file to include the new arg(s) with "
+                "their default value(s).\n"
+                "  ⚠️  CAUTION: Review any logic associated with new args to ensure it doesn't "
+                "silently affect downstream model configs or behavior.\n"
+            )
+
+        if changed:
+            guidance_parts.append(
+                f"\n\n[CHANGED DEFAULTS]: {sorted(changed)}\n"
+                "  → Please don't change the default values of existing args unless "
+                "it is absolutely necessary for a bug fix.\n"
+                "  → If you must change the default value, please update the GOLDEN_CONFIG "
+                "in this test file to reflect the new default value.\n"
+            )
+
+        if removed:
+            guidance_parts.append(
+                f"\n\n[REMOVED ARGS]: {sorted(removed)}\n"
+                "  → Do NOT remove args directly. Instead, deprecate them with a warning message "
+                "to maintain backwards compatibility.\n"
+            )
+
+        guidance_parts.append(
+            "Please contact NV-username @jbarker if you are unsure how to proceed.\n"
+        )
+
+        header = "Mamba MoE config drift detected!\n" "═" * 60 + "".join(guidance_parts)
+        parts = [header]
+        if changed:
+            formatted = {k: {"expected": golden[k], "actual": current[k]} for k in sorted(changed)}
+            parts.append(
+                f"Changed field details:\n{json.dumps(formatted, indent=2, sort_keys=True)}"
+            )
+        pytest.fail("\n".join(parts))
+
+
+def regenerate_mamba_moe_golden(cfg: Any) -> Dict[str, Any]:
+    """Helper to regenerate the golden config; copy/paste into GOLDEN_CONFIG."""
+    serialized = serialize_config(cfg)
+    return serialized
+
+
+def _ser(obj: Any) -> Any:
+    """Recursively convert objects to JSON-friendly structures."""
+    if obj is None or isinstance(obj, (bool, int, float, str)):
+        return obj
+    if isinstance(obj, dict):
+        return {k: _ser(v) for k, v in obj.items()}
+    if isinstance(obj, (list, tuple)):
+        return [_ser(v) for v in obj]
+    if inspect.isfunction(obj) or inspect.ismethod(obj):
+        return f"{obj.__module__}.{obj.__name__}"
+    if inspect.isclass(obj):
+        return f"{obj.__module__}.{obj.__name__}"
+    if hasattr(obj, "__dict__"):
+        return {k: _ser(v) for k, v in vars(obj).items()}
+    try:
+        return str(obj)
+    except Exception:
+        return f"<unserializable:{type(obj).__name__}>"
+
+
+def _diff_configs(expected: Mapping[str, Any], actual: Mapping[str, Any]) -> Tuple[set, set, set]:
+    """Return added, removed, and changed top-level keys between dicts."""
+    expected_keys = set(expected)
+    actual_keys = set(actual)
+    added = actual_keys - expected_keys
+    removed = expected_keys - actual_keys
+    changed = {k for k in expected_keys & actual_keys if expected[k] != actual[k]}
+    return added, removed, changed
+
+
+class TestMambaMoEModel:
+    """Test the initialization and use of an MoE Mamba model."""
+
+    def create_test_args(self):
+        destroy_global_vars()
+        destroy_num_microbatches_calculator()
+
+        sys.argv = ['test_mamba_moe_model.py']
+        args = parse_args()
+
+        # The following args would be set from the nano v3 checkpoint.
+        args.num_layers = 52
+        args.hidden_size = 2688
+        args.ffn_hidden_size = 1856
+        args.num_attention_heads = 32
+        args.num_query_groups = 2
+        args.group_query_attention = True
+        args.kv_channels = 128
+        args.position_embedding_type = 'none'
+        args.add_position_embedding = True
+        args.use_rotary_position_embeddings = False
+        args.rotary_base = 10000
+        args.rotary_percent = 1.0
+        args.rotary_interleaved = False
+        args.add_bias_linear = False
+        args.add_qkv_bias = False
+        args.squared_relu = True
+        args.swiglu = False
+        args.untie_embeddings_and_output_weights = True
+        args.apply_layernorm_1p = False
+        args.normalization = "RMSNorm"
+        args.apply_query_key_layer_scaling = False
+        args.attention_dropout = 0.0
+        args.hidden_dropout = 0.0
+        args.hybrid_override_pattern = "MEMEM*EMEMEM*EMEMEM*EMEMEM*EMEMEM*EMEMEMEM*EMEMEMEME"
+        args.spec = ["megatron.core.models.mamba.mamba_layer_specs", "mamba_stack_spec"]
+        args.hybrid_attention_ratio = 0.0
+        args.hybrid_mlp_ratio = 0.0
+        args.num_experts = 128
+        args.moe_layer_freq = 1
+        args.moe_ffn_hidden_size = 1856
+        args.moe_router_topk = 6
+        args.moe_router_pre_softmax = False
+        args.moe_grouped_gemm = True
+        args.moe_shared_expert_intermediate_size = 3712
+        args.moe_router_score_function = "sigmoid"
+        args.moe_router_enable_expert_bias = True
+        args.moe_router_topk_scaling_factor = 2.5
+        args.mamba_state_dim = 128
+        args.mamba_head_dim = 64
+        args.mamba_num_groups = 8
+        args.mamba_num_heads = 64
+        args.is_hybrid_model = True
+        args.tokenizer_type = "TikTokenizer"
+        args.tiktoken_pattern = "v2"
+        args.tokenizer_model = "/mnt/artifacts/model/nemotron6/tokenizers/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json"
+        args.padded_vocab_size = 131072
+
+        # The following args would be set in the user's nano v3 config.
+        args.async_tensor_model_parallel_allreduce = True
+        args.attention_backend = AttnBackend.flash
+        args.bf16 = True
+        args.ckpt_format = 'torch_dist'
+        args.cross_entropy_loss_fusion = True
+        args.cuda_graph_impl = "none"
+        args.embedding_init_method_std = 0.014
+        args.expert_model_parallel_size = 4
+        args.expert_tensor_parallel_size = 1
+        args.init_method_std = 0.014
+        args.lr = 3e-5
+        args.max_position_embeddings = 1024
+        args.micro_batch_size = 2
+        args.moe_aux_loss_coeff = 0.0
+        args.moe_grouped_gemm = True
+        args.moe_route_load_balancing_type = "aux_loss"
+        args.moe_router_dtype = "fp64"
+        args.moe_router_pre_softmax = False
+        args.moe_token_dispatcher_type = "alltoall"
+        args.no_load_optim = True
+        args.no_load_rng = True
+        args.no_save_optim = True
+        args.pipeline_model_parallel_size = 1
+        args.position_embedding_type = None
+        args.recompute_granularity = None
+        args.seed = 42
+        args.seq_length = 1024
+        args.sequence_parallel = True
+        args.te_rng_tracker = True
+        args.tensor_model_parallel_size = 2
+        args.vocab_size = 131072
+
+        validate_args(args)
+        set_global_variables(args, False)
+        return args
+
+    def setup_method(self, method):
+
+        os.environ['CUDA_DEVICE_MAX_CONNECTIONS'] = '1'
+        args = self.create_test_args()
+        set_args(args)
+
+        Utils.initialize_model_parallel(
+            tensor_model_parallel_size=args.tensor_model_parallel_size,
+            pipeline_model_parallel_size=args.pipeline_model_parallel_size,
+            expert_model_parallel_size=args.expert_model_parallel_size,
+            expert_tensor_parallel_size=args.expert_tensor_parallel_size,
+        )
+        model_parallel_cuda_manual_seed(123)
+
+        model_config = core_transformer_config_from_args(args, TransformerConfig)
+
+        self.model = MambaModel(
+            config=model_config,
+            mamba_stack_spec=mamba_stack_spec,
+            vocab_size=args.vocab_size,
+            max_sequence_length=args.seq_length,
+            hybrid_attention_ratio=args.hybrid_attention_ratio,
+            hybrid_mlp_ratio=args.hybrid_mlp_ratio,
+            hybrid_override_pattern=args.hybrid_override_pattern,
+            position_embedding_type=args.position_embedding_type,
+            rotary_base=args.rotary_base,
+            rotary_percent=args.rotary_percent,
+        )
+
+    def teardown_method(self, method):
+        Utils.destroy_model_parallel()
+
+    def test_constructor(self):
+        """Sanity check for the constructor of the Mamba MoE model."""
+
+        args = get_args()
+
+        assert_config_matches_golden(self.model.config)
+
+        assert self.model.pre_process is True, "pre_process should be True"
+        assert self.model.post_process is True, "post_process should be True"
+        assert self.model.hybrid_attention_ratio == 0.0, "hybrid_attention_ratio should be 0.0"
+        assert self.model.hybrid_mlp_ratio == 0.0, "hybrid_mlp_ratio should be 0.0"
+        assert (
+            self.model.hybrid_override_pattern == args.hybrid_override_pattern
+        ), f"hybrid_override_pattern should be {args.hybrid_override_pattern}"
+        num_weights = sum([p.numel() for p in self.model.parameters()])
+        assert num_weights == 8449294624, f"Expected 8449294624 parameters, got {num_weights}"
+
+    def test_set_input_tensor(self):
+
+        args = get_args()
+
+        config: TransformerConfig = self.model.config
+        sequence_length = self.model.max_sequence_length
+        micro_batch_size = args.micro_batch_size
+
+        # [sequence length, batch size, hidden size]
+        input_tensor = torch.ones((sequence_length, micro_batch_size, config.hidden_size))
+
+        self.model.set_input_tensor(input_tensor)
+
+        assert self.model.decoder.input_tensor.shape[0] == sequence_length
+        assert self.model.decoder.input_tensor.shape[1] == micro_batch_size
+        assert self.model.decoder.input_tensor.shape[2] == config.hidden_size
+
+    def test_forward(self):
+        """Basic smoke test for the forward pass of the Mamba MoE model."""
+
+        args = get_args()
+
+        # we must override this to avoid the need to initialize the optimizer
+        for param in self.model.parameters():
+            param.requires_grad = False
+
+        sequence_length = self.model.max_sequence_length
+        micro_batch_size = args.micro_batch_size
+
+        self.model.cuda()
+
+        data = list(range(sequence_length))
+        input_ids = torch.tensor(data, dtype=torch.int64).repeat((micro_batch_size, 1)).cuda()
+        position_ids = torch.tensor(data, dtype=torch.int64).repeat((micro_batch_size, 1)).cuda()
+        attention_mask = torch.ones(
+            (micro_batch_size, 1, sequence_length, sequence_length), dtype=bool
+        ).cuda()
+
+        logits = self.model.forward(
+            input_ids=input_ids,
+            position_ids=position_ids,
+            attention_mask=attention_mask,
+            runtime_gather_output=True,
+        )
+
+        assert logits.shape[0] == micro_batch_size
+        assert logits.shape[1] == sequence_length
+        assert logits.shape[2] == self.model.vocab_size
diff --git a/tests/unit_tests/pipeline_parallel/test_pipeline_layout.py b/tests/unit_tests/pipeline_parallel/test_pipeline_layout.py
index cdf32d01fd3..5b01aac6b2e 100644
--- a/tests/unit_tests/pipeline_parallel/test_pipeline_layout.py
+++ b/tests/unit_tests/pipeline_parallel/test_pipeline_layout.py
@@ -85,9 +85,6 @@ def initialize_gpt_model(
         else:
             mtp_block_spec = None
 
-        # print("========================")
-        # print("[DEBUG] mtp_block_spec is ", mtp_block_spec)
-        # exit()
         pre_process = mpu.is_pipeline_first_stage(ignore_virtual=False, vp_stage=i)
         post_process = mpu.is_pipeline_last_stage(ignore_virtual=False, vp_stage=i)
         this_model = (
@@ -150,6 +147,7 @@ def create_args():
     args.use_megatron_fsdp = False
     args.dist_ckpt_optim_fully_reshardable = False
     args.distrib_optim_fully_reshardable_mem_efficient = False
+    args.phase_transition_iterations = None
 
     yield args
 
diff --git a/tests/unit_tests/pipeline_parallel/test_schedules.py b/tests/unit_tests/pipeline_parallel/test_schedules.py
index 86b9219fe0f..7dbd9fb15b1 100644
--- a/tests/unit_tests/pipeline_parallel/test_schedules.py
+++ b/tests/unit_tests/pipeline_parallel/test_schedules.py
@@ -15,6 +15,10 @@
 from megatron.core.pipeline_parallel.p2p_communication import P2PCommunicator
 from megatron.core.pipeline_parallel.utils import is_pp_first_stage, is_pp_last_stage
 from megatron.core.process_groups_config import ProcessGroupCollection
+from megatron.core.transformer.cuda_graphs import (
+    convert_schedule_table_to_order,
+    get_overlap_moe_expert_parallel_comm_order,
+)
 from tests.unit_tests.test_utilities import Utils
 
 rank = Utils.rank
@@ -108,7 +112,7 @@ def test_get_pipeline_parallel_order(
     schedule_table = schedule.get_schedule_table(
         num_microbatches, num_model_chunks, microbatch_group_size_per_vp_stage
     )
-    order = schedule.convert_schedule_table_to_order(
+    order = convert_schedule_table_to_order(
         num_warmup_microbatches, num_model_chunks, schedule_table
     )
 
@@ -132,7 +136,7 @@ def test_get_pipeline_parallel_order(
     layers_per_chunk = 2
     num_layers_per_chunk = [layers_per_chunk] * num_model_chunks
     # disable wgrad compute
-    overlapped_order, chunk_id_list = schedule.get_overlap_moe_expert_parallel_comm_order(
+    overlapped_order, chunk_id_list = get_overlap_moe_expert_parallel_comm_order(
         order, num_layers_per_chunk, False
     )
     assert max(overlapped_order) == num_model_chunks * layers_per_chunk
@@ -151,7 +155,7 @@ def test_get_pipeline_parallel_order(
     assert accumulated_order == 0
 
     # enable wgrad compute
-    overlapped_order, chunk_id_list = schedule.get_overlap_moe_expert_parallel_comm_order(
+    overlapped_order, chunk_id_list = get_overlap_moe_expert_parallel_comm_order(
         order, num_layers_per_chunk, True
     )
     assert max(overlapped_order) == num_model_chunks * layers_per_chunk
diff --git a/tests/unit_tests/post_training/test_modelopt_model_builder.py b/tests/unit_tests/post_training/test_modelopt_model_builder.py
new file mode 100644
index 00000000000..b489d659ec4
--- /dev/null
+++ b/tests/unit_tests/post_training/test_modelopt_model_builder.py
@@ -0,0 +1,68 @@
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+
+"""Unit tests for model_provider integration with ModelOpt model_builder."""
+
+from argparse import Namespace
+
+import model_provider as mp
+
+
+def _sentinel_builder(return_value, calls):
+    """Create a builder stub that records invocation."""
+
+    def _builder(args, pre_process, post_process, vp_stage, config=None, pg_collection=None):
+        calls.append(
+            {
+                "args": args,
+                "pre_process": pre_process,
+                "post_process": post_process,
+                "vp_stage": vp_stage,
+                "config": config,
+                "pg_collection": pg_collection,
+            }
+        )
+        return return_value
+
+    return _builder
+
+
+def test_model_provider_switches_to_modelopt_builder(monkeypatch):
+    """Ensure model_provider delegates to ModelOpt builder when enabled."""
+    args = Namespace(record_memory_history=False, modelopt_enabled=True)
+    modelopt_calls = []
+    original_calls = []
+
+    modelopt_result = object()
+    original_result = object()
+
+    # Force ModelOpt availability and stub builders.
+    monkeypatch.setattr(mp, "has_nvidia_modelopt", True)
+    monkeypatch.setattr(mp, "get_args", lambda: args)
+    monkeypatch.setattr(
+        mp, "modelopt_gpt_mamba_builder", _sentinel_builder(modelopt_result, modelopt_calls)
+    )
+
+    # original_builder should be ignored when ModelOpt is enabled.
+    original_builder = _sentinel_builder(original_result, original_calls)
+
+    returned = mp.model_provider(
+        original_builder,
+        pre_process=False,
+        post_process=False,
+        vp_stage=1,
+        config="cfg",
+        pg_collection="pg",
+    )
+
+    assert returned is modelopt_result
+    assert modelopt_calls == [
+        {
+            "args": args,
+            "pre_process": False,
+            "post_process": False,
+            "vp_stage": 1,
+            "config": "cfg",
+            "pg_collection": "pg",
+        }
+    ]
+    assert len(original_calls) == 0
diff --git a/tests/unit_tests/resharding/test_model_swap.py b/tests/unit_tests/resharding/test_model_swap.py
new file mode 100644
index 00000000000..f5db5cb6185
--- /dev/null
+++ b/tests/unit_tests/resharding/test_model_swap.py
@@ -0,0 +1,278 @@
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+import copy
+import os
+import types
+from typing import List, Optional, Tuple
+
+import pytest
+import torch
+import torch.distributed as dist
+
+from megatron.core import parallel_state as mpu
+from megatron.core.hyper_comm_grid import HyperCommGrid
+from megatron.core.model_parallel_config import ModelParallelConfig
+from megatron.core.models.gpt.gpt_layer_specs import (
+    get_gpt_layer_local_spec,
+    get_gpt_layer_with_transformer_engine_spec,
+)
+from megatron.core.models.gpt.gpt_model import GPTModel
+from megatron.core.process_groups_config import ProcessGroupCollection
+from megatron.core.resharding.refit import swap_model_weights
+from megatron.core.tensor_parallel.layers import ColumnParallelLinear, RowParallelLinear
+from megatron.core.tensor_parallel.random import model_parallel_cuda_manual_seed
+from megatron.core.transformer.cuda_graphs import CudaGraphManager, _CudagraphGlobalRecord
+from megatron.core.transformer.transformer_config import TransformerConfig
+from tests.unit_tests.test_utilities import Utils
+
+
+def _build_pg_collection(
+    tp_size: int, pp_size: int = None, ep_size: int = 1
+) -> ProcessGroupCollection:
+    cp_size = mpu.get_context_parallel_world_size()
+    if pp_size is None:
+        pp_size = mpu.get_pipeline_model_parallel_world_size()
+    world_size = dist.get_world_size()
+    dp_size = world_size // (tp_size * cp_size * ep_size * pp_size)
+    assert dp_size >= 1 and (tp_size * cp_size * ep_size * pp_size * dp_size) == world_size
+
+    grid = HyperCommGrid(
+        [tp_size, cp_size, ep_size, pp_size, dp_size], ["tp", "cp", "ep", "pp", "dp"]
+    )
+    tp_group = grid.create_pg("tp")
+    cp_group = grid.create_pg("cp")
+    pp_group = grid.create_pg("pp")
+    ep_group = grid.create_pg("ep")
+    dp_group = grid.create_pg("dp")
+    # Composite groups required by MoE/router and some utilities
+    tp_cp_group = grid.create_pg(["tp", "cp"])
+    mp_group = grid.create_pg(["tp", "cp", "ep", "pp"])
+    tp_ep_group = grid.create_pg(["tp", "ep"])
+    tp_ep_pp_group = grid.create_pg(["tp", "ep", "pp"])
+    dp_cp_group = grid.create_pg(["cp", "dp"])
+    tp_dp_cp_group = grid.create_pg(["tp", "cp", "dp"])
+    embd_group_ranks = mpu.default_embedding_ranks(dist.get_process_group_ranks(pp_group))
+    embd_group = dist.new_group(ranks=embd_group_ranks)
+    pos_embd_group_ranks = mpu.default_position_embedding_ranks(
+        dist.get_process_group_ranks(pp_group)
+    )
+    pos_embd_group = dist.new_group(ranks=pos_embd_group_ranks)
+    return ProcessGroupCollection(
+        tp=tp_group,
+        cp=cp_group,
+        pp=pp_group,
+        ep=ep_group,
+        embd=embd_group,
+        pos_embd=pos_embd_group,
+        dp=dp_group,
+        tp_cp=tp_cp_group,
+        mp=mp_group,
+        expt_tp=tp_group,
+        expt_dp=dp_group,
+        tp_ep=tp_ep_group,
+        tp_ep_pp=tp_ep_pp_group,
+        dp_cp=dp_cp_group,
+        tp_dp_cp=tp_dp_cp_group,
+    )
+
+
+def _build_gpt(
+    config: TransformerConfig,
+    vocab_size: int,
+    seq_len: int,
+    pg_collection,
+    parallel_output: bool = True,
+    num_moe_experts: Optional[int] = None,
+) -> GPTModel:
+    model = GPTModel(
+        config=config,
+        transformer_layer_spec=get_gpt_layer_with_transformer_engine_spec(
+            num_experts=num_moe_experts, moe_grouped_gemm=(num_moe_experts is not None)
+        ),
+        vocab_size=vocab_size,
+        max_sequence_length=seq_len,
+        pre_process=True,
+        post_process=True,
+        fp16_lm_cross_entropy=False,
+        parallel_output=parallel_output,
+        share_embeddings_and_output_weights=True,
+        position_embedding_type="rope",
+        rotary_percent=1.0,
+        pg_collection=pg_collection,
+    )
+    return model
+
+
+def _mp_config() -> ModelParallelConfig:
+    return ModelParallelConfig(
+        params_dtype=torch.float32,
+        use_cpu_initialization=True,
+        sequence_parallel=False,
+        gradient_accumulation_fusion=False,
+    )
+
+
+def _set_pg_collection(module, tp_group, dp_group):
+    module.pg_collection = types.SimpleNamespace(tp=tp_group, dp=dp_group, ep=None, pp=None)
+    return module
+
+
+@pytest.mark.parametrize("refit_backend", ["nccl", "gloo"])
+@pytest.mark.parametrize(
+    "src_tp,src_pp,src_ep,dst_tp,dst_pp,dst_ep,num_experts",
+    [
+        # TP only changes
+        (2, 1, 1, 1, 1, 1, None),  # TP2 -> TP1
+        (1, 1, 1, 2, 1, 1, None),  # TP1 -> TP2
+        (2, 1, 1, 4, 1, 1, None),  # TP2 -> TP4
+        # # PP only changes
+        (1, 2, 1, 1, 1, 1, None),  # PP2 -> PP1
+        (1, 1, 1, 1, 2, 1, None),  # PP1 -> PP2
+        # # Both TP and PP change
+        (2, 2, 1, 1, 1, 1, None),  # TP2,PP2 -> TP1,PP1
+        (1, 1, 1, 2, 2, 1, None),  # TP1,PP1 -> TP2,PP2
+        (2, 1, 1, 1, 2, 1, None),  # TP2,PP1 -> TP1,PP2
+        (1, 2, 1, 2, 1, 1, None),  # TP1,PP2 -> TP2,PP1
+        (1, 2, 1, 2, 4, 1, None),  # TP1,PP2 -> TP2,PP4
+        (1, 1, 2, 1, 1, 4, 4),  # EP2 -> EP4
+        (1, 1, 2, 1, 1, 1, 4),  # EP2 -> EP1
+        (1, 1, 1, 1, 1, 2, 4),
+        (1, 1, 2, 1, 2, 2, 4),
+    ],
+)
+def test_swap_gpt_parametrized(
+    refit_backend: str,
+    src_tp: int,
+    src_pp: int,
+    src_ep: int,
+    dst_tp: int,
+    dst_pp: int,
+    dst_ep: int,
+    num_experts: Optional[int],
+):
+    # Initialize environment with source MP sizing
+    Utils.initialize_model_parallel(
+        tensor_model_parallel_size=src_tp, pipeline_model_parallel_size=src_pp
+    )
+    # Validate divisibility post-init using the default PG safely
+    world = dist.get_world_size()
+    if (world % (src_tp * src_pp * src_ep) != 0) or (world % (dst_tp * dst_pp * dst_ep) != 0):
+        Utils.destroy_model_parallel()
+        pytest.skip(
+            "WORLD_SIZE must be divisible by both src_tp*src_pp*src_ep and dst_tp*dst_pp*dst_ep"
+        )
+    model_parallel_cuda_manual_seed(1234)
+
+    torch.manual_seed(1234)
+    device = torch.device(f"cuda:{torch.cuda.current_device()}")
+
+    # Small GPT config
+    seq_len = 8
+    vocab_size = 128
+    # --group-query-attention   --num-query-groups 8
+    cfg = TransformerConfig(
+        num_layers=4 if (src_pp > 1 or dst_pp > 1) else 2,
+        hidden_size=32,
+        num_attention_heads=8,
+        use_cpu_initialization=True,
+        pipeline_dtype=torch.float32,
+        hidden_dropout=0.0,
+        attention_dropout=0.0,
+        moe_router_dtype="fp64",
+        moe_token_dispatcher_type="alltoall",
+        num_query_groups=4,
+    )
+
+    # Build PGs and models (always use unified PG builder so we can set EP)
+    src_pgs = _build_pg_collection(tp_size=src_tp, pp_size=src_pp, ep_size=src_ep)
+    dst_pgs = _build_pg_collection(tp_size=dst_tp, pp_size=dst_pp, ep_size=dst_ep)
+    # Apply EP configuration to TransformerConfigs when MoE is requested
+    src_cfg = copy.deepcopy(cfg)
+    dst_cfg = copy.deepcopy(cfg)
+    if num_experts is not None:
+        src_cfg.num_moe_experts = num_experts
+        dst_cfg.num_moe_experts = num_experts
+        # Ensure MoE MLP has an intermediate size; __post_init__ won't rerun after manual mutation
+        src_cfg.moe_ffn_hidden_size = src_cfg.ffn_hidden_size
+        dst_cfg.moe_ffn_hidden_size = dst_cfg.ffn_hidden_size
+        src_cfg.expert_model_parallel_size = src_ep
+        dst_cfg.expert_model_parallel_size = dst_ep
+        # Force grouped MLP path under Transformer Engine and satisfy requirements
+        src_cfg.moe_grouped_gemm = True
+        dst_cfg.moe_grouped_gemm = True
+        src_cfg.add_bias_linear = False
+        dst_cfg.add_bias_linear = False
+        # Require Transformer Engine for TEGroupedMLP; skip if unavailable
+        try:
+            import transformer_engine
+        except Exception:
+            Utils.destroy_model_parallel()
+            pytest.skip("Transformer Engine not available; skipping TE-grouped MoE test")
+    # Use parallel_output=False to gather TP logits inside model and emit only on last PP stage
+    src_model = (
+        _build_gpt(
+            src_cfg,
+            vocab_size,
+            seq_len,
+            src_pgs,
+            parallel_output=False,
+            num_moe_experts=num_experts,
+        )
+        .to(device)
+        .eval()
+    )
+    dst_model = (
+        _build_gpt(
+            dst_cfg,
+            vocab_size,
+            seq_len,
+            dst_pgs,
+            parallel_output=False,
+            num_moe_experts=num_experts,
+        )
+        .to(device)
+        .eval()
+    )
+
+    # Inputs
+    batch = 2
+    tokens = torch.randint(
+        low=0, high=vocab_size, size=(batch, seq_len), device=device, dtype=torch.long
+    )
+    position_ids = (
+        torch.arange(seq_len, device=device, dtype=torch.long).unsqueeze(0).expand(batch, -1)
+    )
+    attention_mask = torch.ones((batch, 1, seq_len, seq_len), device=device, dtype=torch.bool)
+
+    # Collect source reference logits (parallel_output=False ensures full vocab on last PP stage)
+    ref_logits = torch.empty(batch, seq_len, vocab_size, device=device, dtype=torch.float32)
+    src_pp_ranks = dist.get_process_group_ranks(src_pgs.pp)
+    src_last_pp_rank = src_pp_ranks[-1]
+    with torch.no_grad():
+        src_out = src_model(tokens, position_ids, attention_mask)
+        if dist.get_rank() == src_last_pp_rank:
+            ref = src_out  # [b, s, vocab]
+            ref_logits.copy_(ref)
+    dist.broadcast(ref_logits, src=src_last_pp_rank, group=src_pgs.pp)
+
+    # Swap weights
+    swap_model_weights([src_model], [dst_model], refit_method=refit_backend)
+
+    # Collect destination logits (parallel_output=False ensures full vocab on last PP stage)
+    dst_logits = torch.empty(batch, seq_len, vocab_size, device=device, dtype=torch.float32)
+    dst_pp_ranks = dist.get_process_group_ranks(dst_pgs.pp)
+    dst_last_pp_rank = dst_pp_ranks[-1]
+    with torch.no_grad():
+        dst_out = dst_model(
+            tokens, position_ids, attention_mask
+        )  # last stage returns tensor, others return None
+        if dist.get_rank() == dst_last_pp_rank:
+            dst_logits.copy_(dst_out)  # [b, s, vocab]
+    dist.broadcast(dst_logits, src=dst_last_pp_rank, group=dst_pgs.pp)
+
+    # Compare
+    assert ref_logits.shape == dst_logits.shape
+    assert torch.allclose(
+        dst_logits, ref_logits, atol=1e-4, rtol=1e-4
+    ), f"Refit src(TP={src_tp},PP={src_pp})->dst(TP={dst_tp},PP={dst_pp}) GPT outputs differ"
+    dist.barrier()
+    Utils.destroy_model_parallel()
diff --git a/tests/unit_tests/test_checkpointing.py b/tests/unit_tests/test_checkpointing.py
index cbced6419be..2964c67c7ce 100644
--- a/tests/unit_tests/test_checkpointing.py
+++ b/tests/unit_tests/test_checkpointing.py
@@ -143,6 +143,7 @@ def create_ckpt_load_args(create_args):
     args.dist_ckpt_strictness = 'assume_ok_unexpected'
     args.use_megatron_fsdp = False
     args.strict_fsdp_dtensor_load = True
+    args.phase_transition_iterations = None
 
     yield args
 
diff --git a/tests/unit_tests/test_fp8_param.py b/tests/unit_tests/test_fp8_param.py
index 59824478bdc..361698f7127 100644
--- a/tests/unit_tests/test_fp8_param.py
+++ b/tests/unit_tests/test_fp8_param.py
@@ -36,7 +36,10 @@
 try:
     from transformer_engine.pytorch.tensor.utils import post_all_gather_processing
 
-    cuda_graph_supported = True
+    if is_te_min_version("2.10.0"):
+        cuda_graph_supported = True
+    else:
+        reason_for_no_cuda_graph = "Need newer TransformerEngine"
 except ImportError:
     reason_for_no_cuda_graph = "Need newer TransformerEngine"
 
@@ -65,12 +68,16 @@ class TestFP8Param:
     def setup_method(self, method):
         self.seq_length = 512
         self.micro_batch_size = 2
+        self.cuda_graph_helper = None
         os.environ['CUDA_DEVICE_MAX_CONNECTIONS'] = '1'
 
     def teardown_method(self, method):
         Utils.destroy_model_parallel()
         destroy_global_vars()
         destroy_num_microbatches_calculator()
+        if self.cuda_graph_helper is not None and self.cuda_graph_helper.graphs_created():
+            self.cuda_graph_helper.delete_cuda_graphs()
+            self.cuda_graph_helper = None
         gc.collect()
 
     def model_provider(
@@ -209,13 +216,12 @@ def _run_test_helper(
             )
         assert len(gpt_model) == 1  # Assume only one model in the model provider.
 
-        cuda_graph_helper = None
         # Hard coded to use cuda_graph_impl="transformer_engine"
         cuda_graph_impl = "transformer_engine"
         if use_cuda_graph and cuda_graph_impl == "transformer_engine":
             from megatron.core.transformer.cuda_graphs import TECudaGraphHelper
 
-            cuda_graph_helper = TECudaGraphHelper(
+            self.cuda_graph_helper = TECudaGraphHelper(
                 model=gpt_model,
                 config=gpt_model[0].config,
                 seq_length=self.seq_length,
@@ -250,13 +256,13 @@ def _run_test_helper(
             # Capture CUDA graphs after warmup if helper is provided.
             # Hard coded cuda_graph_warmup_steps = 0.
             cuda_graph_warmup_steps = 0
-            if cuda_graph_helper is not None and i == cuda_graph_warmup_steps:
+            if self.cuda_graph_helper is not None and i == cuda_graph_warmup_steps:
                 if should_disable_forward_pre_hook(args):
                     disable_forward_pre_hook(gpt_model, param_sync=False)
-                cuda_graph_helper.create_cudagraphs()
+                self.cuda_graph_helper.create_cudagraphs()
                 if should_disable_forward_pre_hook(args):
                     enable_forward_pre_hook(gpt_model)
-                    cuda_graph_helper.cuda_graph_set_manual_hooks()
+                    self.cuda_graph_helper.cuda_graph_set_manual_hooks()
 
             # For the mxfp8_param with reuse_grad_buf_for_mxfp8_param_ag and dp_ag_overlap,
             # we need to call the _copy_main_params_to_param_buffer() after the grad buffer
@@ -297,6 +303,10 @@ def _run_test_helper(
 
             loss_list.append(loss.item())
 
+        if self.cuda_graph_helper is not None and self.cuda_graph_helper.graphs_created():
+            self.cuda_graph_helper.delete_cuda_graphs()
+            self.cuda_graph_helper = None
+
         return torch.tensor(loss_list)
 
     def run_test(self, tp_size, recipe, inference: bool = False, **kwargs):
diff --git a/tests/unit_tests/test_optimizer.py b/tests/unit_tests/test_optimizer.py
index 1f5bbc3f14c..6b1da8c4e3f 100644
--- a/tests/unit_tests/test_optimizer.py
+++ b/tests/unit_tests/test_optimizer.py
@@ -384,6 +384,7 @@ def test_precision_aware_fused_adam():
     "moment_dtype",
     [torch.float32, torch.float16, torch.bfloat16, torch.uint8],
 )
+@pytest.mark.skip(reason="inconsistent ci test runs resulting in NCCL errors")
 def test_precision_aware_optimizer(
     precision: str,
     main_params_dtype: torch.dtype,
diff --git a/tests/unit_tests/test_rl_utils.py b/tests/unit_tests/test_rl_utils.py
index 0c7e518167d..f28240591fe 100644
--- a/tests/unit_tests/test_rl_utils.py
+++ b/tests/unit_tests/test_rl_utils.py
@@ -1,6 +1,7 @@
 # Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 import os
+from types import SimpleNamespace
 from unittest.mock import patch
 
 import pytest
@@ -30,6 +31,7 @@ def __init__(self, batch=BATCH, seq=SEQ, vocab=VOCAB):
         self.batch = batch
         self.seq = seq
         self.vocab = vocab
+        self.pg_collection = SimpleNamespace(pp=None)
         self.config = TransformerConfig(num_attention_heads=1, num_layers=1)
         self.model_type = ModelType.encoder_or_decoder
 
@@ -66,7 +68,7 @@ def detokenize(self, tokens):
 
 @pytest.fixture(scope='module', autouse=True)
 def mock_pipeline_stuff():
-    with patch('megatron.rl.rl_utils.is_pipeline_last_stage', return_value=True):
+    with patch('megatron.rl.rl_utils.is_pp_last_stage', return_value=True):
         yield
 
 
@@ -110,6 +112,7 @@ def test_prepare_trajectories(mock_rank):
     args = type('Args', (), {})()
     args.rl_use_sequence_packing = False
     args.rl_inference_logprobs_is_correction = True
+    args.rl_skip_bos_token = False
     global_vars.set_args(args)
 
     tokenizer = MockTokenizer()
diff --git a/tests/unit_tests/test_training.py b/tests/unit_tests/test_training.py
index 953a80e0945..fef4bfbc5ef 100644
--- a/tests/unit_tests/test_training.py
+++ b/tests/unit_tests/test_training.py
@@ -1,3 +1,5 @@
+# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
 from types import SimpleNamespace
 
 from megatron.training.global_vars import set_args
@@ -26,6 +28,7 @@ def create_test_args():
     args.full_validation = False
     args.multiple_validation_sets = False
     args.perform_rl_step = False
+    args.phase_transition_iterations = None
 
     return args
 
diff --git a/tests/unit_tests/transformer/moe/test_aux_loss.py b/tests/unit_tests/transformer/moe/test_aux_loss.py
index f5726777383..0f8fb0e3a5c 100644
--- a/tests/unit_tests/transformer/moe/test_aux_loss.py
+++ b/tests/unit_tests/transformer/moe/test_aux_loss.py
@@ -18,6 +18,7 @@
 )
 from megatron.core.transformer.moe.router import TopKRouter
 from megatron.core.transformer.transformer_config import TransformerConfig
+from megatron.core.typed_torch import apply_module
 from megatron.training.initialize import _set_random_seed
 from tests.unit_tests.test_utilities import Utils
 from tests.unit_tests.transformer.moe.test_token_dispatcher import MoEModelTestContainer
@@ -51,7 +52,7 @@ def partition_input(self, input):
     def aux_loss_test(self, input, baseline_grad, loss_name):
         partitioned_input = self.partition_input(input)
         moe_layer = self.moe_layer
-        probs, indices = moe_layer.router(partitioned_input)
+        probs, indices = apply_module(moe_layer.router)(partitioned_input)
         probs.sum().mul_(0).backward()
         aux_loss_grad = partitioned_input.grad
         torch.distributed.barrier()
@@ -62,7 +63,7 @@ def aux_loss_test(self, input, baseline_grad, loss_name):
         clear_aux_losses_tracker()
 
         with torch.no_grad():
-            probs, indices = moe_layer.router(partitioned_input)
+            probs, indices = apply_module(moe_layer.router)(partitioned_input)
             loss = get_moe_layer_wise_logging_tracker()[loss_name]['values']
             assert loss == 0, "Loss should be 0"
             clear_aux_losses_tracker()
@@ -84,7 +85,7 @@ def setup_method(self, method):
         moe_layer = baseline_container.moe_layer
         self.input = torch.randn((32, 8, moe_layer.config.hidden_size)).cuda()
         self.input.requires_grad = True
-        probs, indices = moe_layer.router(self.input)
+        probs, indices = apply_module(moe_layer.router)(self.input)
         probs.sum().mul_(0).backward()  # zero out the main gradients
         self.baseline_grad = self.input.grad
         self.input.grad = None
@@ -148,7 +149,7 @@ def setup_method(self, method):
         moe_layer = baseline_container.moe_layer
         self.input = torch.randn((32, 8, moe_layer.config.hidden_size)).cuda()
         self.input.requires_grad = True
-        probs, indices = moe_layer.router(self.input)
+        probs, indices = apply_module(moe_layer.router)(self.input)
         probs.sum().mul_(0).backward()  # zero out the main gradients
         self.baseline_grad = self.input.grad
         self.input.grad = None
diff --git a/tests/unit_tests/transformer/moe/test_routers.py b/tests/unit_tests/transformer/moe/test_routers.py
index abd1a4db2dc..4b9a4c90b6d 100644
--- a/tests/unit_tests/transformer/moe/test_routers.py
+++ b/tests/unit_tests/transformer/moe/test_routers.py
@@ -1,5 +1,7 @@
 # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
 
+from typing import cast
+
 import pytest
 import torch
 
@@ -47,7 +49,7 @@ def setup_method(self, method):
         self.sequential_mlp = MoELayer(
             self.transformer_config, transformer_layer_spec.submodules.mlp.submodules
         )
-        self.router = self.sequential_mlp.router
+        self.router = cast(Router, self.sequential_mlp.router)
 
     def teardown_method(self, method):
         Utils.destroy_model_parallel()
@@ -318,7 +320,7 @@ def setup_method(self, method):
         self.moe_layer = MoELayer(
             self.transformer_config, transformer_layer_spec.submodules.mlp.submodules
         ).cuda()
-        self.router = self.moe_layer.router
+        self.router = cast(Router, self.moe_layer.router)
 
     def teardown_method(self, method):
         Utils.destroy_model_parallel()
@@ -425,7 +427,7 @@ def setup_method(self, method):
         self.moe_layer = MoELayer(
             self.transformer_config, transformer_layer_spec.submodules.mlp.submodules
         )
-        self.router = self.moe_layer.router
+        self.router = cast(Router, self.moe_layer.router)
         assert self.router.expert_bias is not None
         assert self.router.local_tokens_per_expert is not None
 
diff --git a/tests/unit_tests/transformer/moe/test_token_dispatcher.py b/tests/unit_tests/transformer/moe/test_token_dispatcher.py
index 6a155920e2f..c2bb269c9c4 100644
--- a/tests/unit_tests/transformer/moe/test_token_dispatcher.py
+++ b/tests/unit_tests/transformer/moe/test_token_dispatcher.py
@@ -11,6 +11,7 @@
 from megatron.core.transformer.moe.moe_layer import MoELayer
 from megatron.core.transformer.moe.moe_utils import get_capacity
 from megatron.core.transformer.transformer_config import TransformerConfig
+from megatron.core.typed_torch import apply_module
 from megatron.core.utils import is_te_min_version
 from megatron.training.initialize import _set_random_seed
 from tests.unit_tests.test_utilities import Utils
@@ -129,7 +130,7 @@ def dispatcher_dropless_test(self):
         # Permute and then unpermute data are supposed to restore original data
         ans = hidden_states
         hidden_states.requires_grad = True
-        probs, indices = moe_layer.router(hidden_states)
+        probs, indices = apply_module(moe_layer.router)(hidden_states)
         probs = torch.ones_like(probs) / moe_layer.router.topk
 
         (permuted_local_hidden_states, tokens_per_expert, permuted_probs) = token_permutation(
@@ -166,7 +167,7 @@ def dispatcher_capacity_test(self):
         )
         hidden_states = hidden_states.cuda()
         hidden_states.requires_grad = True
-        probs, indices = moe_layer.router(hidden_states)
+        probs, indices = apply_module(moe_layer.router)(hidden_states)
 
         # Create the answer.
         prob_mask = probs != 0
@@ -225,7 +226,7 @@ def dispatcher_drop_and_pad_test(self):
         ).cuda()
         hidden_states.requires_grad = True
 
-        probs_1, indices_1 = moe_layer.router(hidden_states)
+        probs_1, indices_1 = apply_module(moe_layer.router)(hidden_states)
         (permuted_input_1, tokens_per_expert, permuted_probs_1) = token_permutation(
             moe_layer.token_dispatcher, hidden_states, probs_1, indices_1
         )
@@ -243,7 +244,7 @@ def dispatcher_drop_and_pad_test(self):
         moe_layer_2 = self.new_moe_layer(moe_pad_expert_input_to_capacity=True)
         moe_layer_2.load_state_dict(moe_layer.state_dict())
 
-        probs_2, indices_2 = moe_layer_2.router(hidden_states)
+        probs_2, indices_2 = apply_module(moe_layer_2.router)(hidden_states)
         (permuted_input_2, tokens_per_expert, permuted_probs_2) = token_permutation(
             moe_layer_2.token_dispatcher, hidden_states, probs_2, indices_2
         )
@@ -296,7 +297,7 @@ def dispatcher_router_padding_for_fp8_test(self):
         ).cuda()
         hidden_states.requires_grad = True
 
-        probs_1, indices_1 = moe_layer.router(hidden_states)
+        probs_1, indices_1 = apply_module(moe_layer.router)(hidden_states)
         (permuted_input_1, tokens_per_expert_1, permuted_probs_1) = token_permutation(
             moe_layer.token_dispatcher, hidden_states, probs_1, indices_1
         )
@@ -313,7 +314,7 @@ def dispatcher_router_padding_for_fp8_test(self):
         moe_layer_2 = self.new_moe_layer(moe_router_padding_for_quantization=True, fp8="hybrid")
         moe_layer_2.load_state_dict(moe_layer.state_dict())
 
-        probs_2, indices_2 = moe_layer_2.router(hidden_states)
+        probs_2, indices_2 = apply_module(moe_layer_2.router)(hidden_states)
         (permuted_input_2, tokens_per_expert_2, permuted_probs_2) = token_permutation(
             moe_layer_2.token_dispatcher, hidden_states, probs_2, indices_2
         )
diff --git a/tests/unit_tests/transformer/test_attention.py b/tests/unit_tests/transformer/test_attention.py
index b5f2857d622..0fbc6b4da23 100644
--- a/tests/unit_tests/transformer/test_attention.py
+++ b/tests/unit_tests/transformer/test_attention.py
@@ -402,13 +402,16 @@ def test_clip_qk_mixed_logits(self):
         assert attention.core_attention.current_max_attn_logits is None
 
 
+@pytest.mark.parametrize("output_gate", [False, True])
 class TestSelfAttention:
 
-    def setup_method(self, method):
+    @pytest.fixture(scope='function', autouse=True)
+    def setup_method(self, output_gate):
+        self.output_gate = output_gate
         Utils.initialize_model_parallel(1, 1)
         model_parallel_cuda_manual_seed(123)
 
-    def teardown_method(self, method):
+    def teardown_method(self):
         Utils.destroy_model_parallel()
 
     def test_clip_qk_disabled_raises_error(self):
diff --git a/tests/unit_tests/transformer/test_cuda_graphs.py b/tests/unit_tests/transformer/test_cuda_graphs.py
index 7f49a559f32..d602346c370 100644
--- a/tests/unit_tests/transformer/test_cuda_graphs.py
+++ b/tests/unit_tests/transformer/test_cuda_graphs.py
@@ -1252,6 +1252,10 @@ def test_moe_partial_cudagraph(self, ep_size, moe_dropless_dispatcher, moe_dispa
             extra_kwargs["moe_token_dispatcher_type"] = "flex"
             extra_kwargs["moe_flex_dispatcher_backend"] = "deepep"
         elif moe_dispatcher_type == "hybridep":
+            pytest.skip(
+                "Currently, the Hybrid EP is broken. "
+                "Temporarily skip the test and wait for the fix."
+            )
             if not is_hybrid_ep_available():
                 pytest.skip("Hybrid EP is not available")
             extra_kwargs["moe_token_dispatcher_type"] = "flex"
@@ -1313,7 +1317,6 @@ def test_moe_partial_cudagraph(self, ep_size, moe_dropless_dispatcher, moe_dispa
 
     test = TestCaptureFreezeGC()
     test.test_capture_freeze_gc()
-
     test = TestPartialCudaGraph()
     test.setup_method(method=None)
     test.test_moe_partial_cudagraph(4, True, "alltoall")
diff --git a/tests/unit_tests/transformer/test_multi_latent_attention.py b/tests/unit_tests/transformer/test_multi_latent_attention.py
index fb259671287..bc8514ee561 100644
--- a/tests/unit_tests/transformer/test_multi_latent_attention.py
+++ b/tests/unit_tests/transformer/test_multi_latent_attention.py
@@ -293,13 +293,17 @@ def test_gpu_forward_thd_padded(self):
             assert bias.shape[0] == config.hidden_size
 
             # Test that the get_query_key_value_tensors function properly handles padded cu_seqlens
-            query, key, value = self.parallel_attention.get_query_key_value_tensors(
-                hidden_states, None, None, packed_seq_params, None
+            query, key, value, q_compressed, kv_compressed = (
+                self.parallel_attention.get_query_key_value_tensors(
+                    hidden_states, None, None, packed_seq_params, None
+                )
             )
 
             assert query is not None
             assert key is not None
             assert value is not None
+            assert q_compressed is not None
+            assert kv_compressed is not None
             assert query.is_contiguous()
             assert key.is_contiguous()
             assert value.is_contiguous()
@@ -370,7 +374,9 @@ def test_up_proj_recomputed_gpu_forward(self):
             )
             hidden_states = hidden_states.cuda()
 
-            q, k, v = checkpointed_parallel_attention.get_query_key_value_tensors(hidden_states)
+            q, k, v, q_compressed, kv_compressed = (
+                checkpointed_parallel_attention.get_query_key_value_tensors(hidden_states)
+            )
             assert q.is_contiguous()
             assert k.is_contiguous()
             assert v.is_contiguous()
@@ -675,18 +681,30 @@ def test_gpu_forward_thd_precision(self):
             packed_seq_params = make_test_packed_seq_params(cu_seqlens=cu_seqlens)
 
             # fine-grained check
-            query_sbhd, key_sbhd, value_sbhd = self.parallel_attention.get_query_key_value_tensors(
-                hidden_states_sbhd, None, None, None, None
+            query_sbhd, key_sbhd, value_sbhd, q_compressed_sbhd, kv_compressed_sbhd = (
+                self.parallel_attention.get_query_key_value_tensors(
+                    hidden_states_sbhd, None, None, None, None
+                )
             )
-            query_thd, key_thd, value_thd = self.parallel_attention.get_query_key_value_tensors(
-                hidden_states_thd, None, None, packed_seq_params, None
+            query_thd, key_thd, value_thd, q_compressed_thd, kv_compressed_thd = (
+                self.parallel_attention.get_query_key_value_tensors(
+                    hidden_states_thd, None, None, packed_seq_params, None
+                )
             )
             _query_sbhd = query_sbhd.transpose(0, 1).contiguous().view(*query_thd.shape)
             _key_sbhd = key_sbhd.transpose(0, 1).contiguous().view(*key_thd.shape)
             _value_sbhd = value_sbhd.transpose(0, 1).contiguous().view(*value_thd.shape)
+            _q_compressed_sbhd = (
+                q_compressed_sbhd.transpose(0, 1).contiguous().view(*q_compressed_thd.shape)
+            )
+            _kv_compressed_sbhd = (
+                kv_compressed_sbhd.transpose(0, 1).contiguous().view(*kv_compressed_thd.shape)
+            )
             assert torch.equal(_query_sbhd, query_thd)
             assert torch.equal(_key_sbhd, key_thd)
             assert torch.equal(_value_sbhd, value_thd)
+            assert torch.equal(_q_compressed_sbhd, q_compressed_thd)
+            assert torch.equal(_kv_compressed_sbhd, kv_compressed_thd)
 
             core_attn_out_sbhd = self.parallel_attention.core_attention(
                 query_sbhd,
@@ -828,18 +846,30 @@ def test_gpu_forward_thd_precision(self):
             packed_seq_params = make_test_packed_seq_params(cu_seqlens=cu_seqlens)
 
             # fine-grained check
-            query_sbhd, key_sbhd, value_sbhd = self.parallel_attention.get_query_key_value_tensors(
-                hidden_states_sbhd, None, None, None, None
+            query_sbhd, key_sbhd, value_sbhd, q_compressed_sbhd, kv_compressed_sbhd = (
+                self.parallel_attention.get_query_key_value_tensors(
+                    hidden_states_sbhd, None, None, None, None
+                )
             )
-            query_thd, key_thd, value_thd = self.parallel_attention.get_query_key_value_tensors(
-                hidden_states_thd, None, None, packed_seq_params, None
+            query_thd, key_thd, value_thd, q_compressed_thd, kv_compressed_thd = (
+                self.parallel_attention.get_query_key_value_tensors(
+                    hidden_states_thd, None, None, packed_seq_params, None
+                )
             )
             _query_sbhd = query_sbhd.transpose(0, 1).contiguous().view(*query_thd.shape)
             _key_sbhd = key_sbhd.transpose(0, 1).contiguous().view(*key_thd.shape)
             _value_sbhd = value_sbhd.transpose(0, 1).contiguous().view(*value_thd.shape)
+            _q_compressed_sbhd = (
+                q_compressed_sbhd.transpose(0, 1).contiguous().view(*q_compressed_thd.shape)
+            )
+            _kv_compressed_sbhd = (
+                kv_compressed_sbhd.transpose(0, 1).contiguous().view(*kv_compressed_thd.shape)
+            )
             torch.testing.assert_close(_query_sbhd, query_thd, atol=1e-6, rtol=1e-6)
             torch.testing.assert_close(_key_sbhd, key_thd, atol=1e-6, rtol=1e-6)
             torch.testing.assert_close(_value_sbhd, value_thd, atol=1e-6, rtol=1e-6)
+            torch.testing.assert_close(_q_compressed_sbhd, q_compressed_thd, atol=1e-6, rtol=1e-6)
+            torch.testing.assert_close(_kv_compressed_sbhd, kv_compressed_thd, atol=1e-6, rtol=1e-6)
 
             core_attn_out_sbhd = self.parallel_attention.core_attention(
                 query_sbhd,
@@ -967,18 +997,30 @@ def test_gpu_forward_thd_precision(self):
             packed_seq_params = make_test_packed_seq_params(cu_seqlens=cu_seqlens)
 
             # fine-grained check
-            query_sbhd, key_sbhd, value_sbhd = self.parallel_attention.get_query_key_value_tensors(
-                hidden_states_sbhd, None, None, None, None
+            query_sbhd, key_sbhd, value_sbhd, q_compressed_sbhd, kv_compressed_sbhd = (
+                self.parallel_attention.get_query_key_value_tensors(
+                    hidden_states_sbhd, None, None, None, None
+                )
             )
-            query_thd, key_thd, value_thd = self.parallel_attention.get_query_key_value_tensors(
-                hidden_states_thd, None, None, packed_seq_params, None
+            query_thd, key_thd, value_thd, q_compressed_thd, kv_compressed_thd = (
+                self.parallel_attention.get_query_key_value_tensors(
+                    hidden_states_thd, None, None, packed_seq_params, None
+                )
             )
             _query_sbhd = query_sbhd.transpose(0, 1).contiguous().view(*query_thd.shape)
             _key_sbhd = key_sbhd.transpose(0, 1).contiguous().view(*key_thd.shape)
             _value_sbhd = value_sbhd.transpose(0, 1).contiguous().view(*value_thd.shape)
+            _q_compressed_sbhd = (
+                q_compressed_sbhd.transpose(0, 1).contiguous().view(*q_compressed_thd.shape)
+            )
+            _kv_compressed_sbhd = (
+                kv_compressed_sbhd.transpose(0, 1).contiguous().view(*kv_compressed_thd.shape)
+            )
             assert torch.equal(_query_sbhd, query_thd)
             assert torch.equal(_key_sbhd, key_thd)
             assert torch.equal(_value_sbhd, value_thd)
+            assert torch.equal(_q_compressed_sbhd, q_compressed_thd)
+            assert torch.equal(_kv_compressed_sbhd, kv_compressed_thd)
 
             core_attn_out_sbhd = self.parallel_attention.core_attention(
                 query_sbhd,
diff --git a/tools/build_sequences_per_dataset.py b/tools/build_sequences_per_dataset.py
new file mode 100644
index 00000000000..e2787dd6434
--- /dev/null
+++ b/tools/build_sequences_per_dataset.py
@@ -0,0 +1,117 @@
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+"""
+Script to build a json file with the sequences per dataset to use with the --per-dataset-sequences-path. Accepts the same arguments as the training script.
+
+Usage:
+python3 tools/build_sequences_per_dataset.py --per-split-data-args-path my-training-dataset-blend.json --per-dataset-sequences-path my-training-dataset-blend-sequences-per-dataset.json
+
+"""
+
+import argparse
+import json
+from typing import Optional, Tuple, List
+
+
+from megatron.core.datasets.indexed_dataset import _IndexReader
+from megatron.training.utils import get_blend_and_blend_per_split
+
+def get_paths_from_blend(
+    blend: Optional[Tuple[List[str], Optional[List[float]]]],
+    blend_per_split: Optional[List[Optional[Tuple[List[str], Optional[List[float]]]]]],
+) -> List[str]:
+    """Extract all dataset paths from blend and blend_per_split.
+
+    Args:
+        blend (Optional[Tuple[List[str], Optional[List[float]]]]): A blend tuple containing
+            a list of dataset paths and optionally a list of weights, e.g.,
+            (["path/to/dataset_1", "path/to/dataset_2"], [0.3, 0.7])
+        blend_per_split (Optional[List[Optional[Tuple[List[str], Optional[List[float]]]]]]): 
+            A list of 3 blend tuples (for train, valid, test splits), where each element has 
+            the same structure as blend
+
+    Returns:
+        List[str]: A list of all unique dataset paths found in blend and blend_per_split
+    """
+    paths = []
+    
+    # Extract paths from blend
+    if blend is not None:
+        paths_list, _ = blend
+        paths.extend(paths_list)
+    
+    # Extract paths from blend_per_split
+    if blend_per_split is not None:
+        for split_blend in blend_per_split:
+            if split_blend is not None:
+                split_paths, _ = split_blend
+                paths.extend(split_paths)
+    
+    # Remove duplicates while preserving order
+    seen = set()
+    unique_paths = []
+    for path in paths:
+        if path not in seen:
+            seen.add(path)
+            unique_paths.append(path)
+    
+    return unique_paths
+
+def build_sequences_per_dataset(args):
+    print("Building sequences per dataset...")
+
+    blend, blend_per_split = get_blend_and_blend_per_split(args)
+
+    file_prefixes = get_paths_from_blend(blend, blend_per_split)
+
+    print(f"Number of unique file prefixes: {len(file_prefixes)}")
+
+    sequence_count_dict = {}
+    for file_prefix in file_prefixes:
+        # NOTE(asolergi-nv): For every file prefix, read index file and get the number of sequences and documents
+        index_reader = _IndexReader(file_prefix + ".idx", False)
+        count = (index_reader.sequence_count, index_reader.document_count)
+        sequence_count_dict[file_prefix] = count
+
+    return sequence_count_dict
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--data-path', nargs='*', default=None,
+                       help='The weight and prefix list for a set of train, validation, and test'
+                       'datasets which split according to --split. The accepted formats are: '
+                       '(1) a single prefix, '
+                       '(2) a list of weight prefix pairs e.g. weight1 prefix1 weight2 prefix2, '
+                       '(3) a list of prefixes e.g. prefix1 prefix2. '
+                       'For (3), weights are inferred from the lengths of the contributing datasets. '
+                       'This argument is exclusive to the other independent --*-data-path arguments.')
+    parser.add_argument('--train-data-path', nargs='*', default=None,
+                       help='The weight and prefix list for an independent train dataset. '
+                       'Follows the same pattern rules as --data-path.')
+    parser.add_argument('--valid-data-path', nargs='*', default=None,
+                       help='The weight and prefix list for an independent validation dataset. '
+                       'Follows the same pattern rules as --data-path.')
+    parser.add_argument('--test-data-path', nargs='*', default=None,
+                       help='The weight and prefix list for an independent test dataset. '
+                       'Follows the same pattern rules as --data-path.')
+    parser.add_argument('--data-args-path', type=str, default=None,
+                       help='Path to data-args. Instead of feeding `--data-path` '
+                       'with weighted dataset, we pass in a file path from which '
+                       'we read that argument. This is useful when the list of data is '
+                       'too big.')
+    parser.add_argument('--per-split-data-args-path', type=str, default=None,
+                       help='Path to per-split-data-args. Instead of feeding '
+                       '`--(train|valid|test)-data-path` with weighted dataset, '
+                       'we pass in a file path from which we read those arguments. '
+                       'This is useful when the list of data is too big. Format is a '
+                       'json file with `train`, `valid, `test` keys')
+    parser.add_argument('--per-dataset-sequences-path', type=str, required=True,
+                       help='Path to the output json file with the sequences per dataset.')
+    args = parser.parse_args()
+
+    sequence_count_dict = build_sequences_per_dataset(args)
+
+    with open(args.path_to_sequences_per_dataset_json, "w") as f:
+        json.dump(sequence_count_dict, f)
+
+    print(f"Done! Saving --path-to-sequences-per-dataset file to {args.path_to_sequences_per_dataset_json}")
\ No newline at end of file
diff --git a/tools/run_dynamic_text_generation_server.py b/tools/run_dynamic_text_generation_server.py
new file mode 100644
index 00000000000..615073b8fd0
--- /dev/null
+++ b/tools/run_dynamic_text_generation_server.py
@@ -0,0 +1,109 @@
+# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+
+import argparse
+import asyncio
+
+import torch
+
+from examples.inference.gpt.gpt_dynamic_inference import (
+    add_dynamic_inference_args,
+    get_inference_context,
+    get_inference_controller,
+    get_model,
+)
+from megatron.core.inference.engines import DynamicInferenceEngine
+from megatron.core.inference.text_generation_server.dynamic_text_gen_server import run_flask_server
+from megatron.core.tokenizers.text.utils.build_tokenizer import build_tokenizer
+from megatron.core.utils import get_mamba_inference_state_config_from_model, trace_async_exceptions
+from megatron.post_training.arguments import add_modelopt_args
+from megatron.training import get_args, get_tokenizer
+from megatron.training.initialize import initialize_megatron
+
+
+def add_text_generation_server_args(parser: argparse.ArgumentParser):
+    """Adds the required command line arguments for running the text generation server."""
+    parser = add_modelopt_args(parser)
+    parser = add_dynamic_inference_args(parser)
+    parser.add_argument("--port", type=int, default=5000, help="Port for Flask server to run on")
+    return parser
+
+
+@trace_async_exceptions
+async def run_text_generation_server(
+    engine: DynamicInferenceEngine, coordinator_port: int, flask_port: int
+):
+    """Runs the Flask server from rank 0 and initializes the DynamicInferenceEngine on all ranks.
+
+    Args:
+        engine (DynamicInferenceEngine): The dynamic inference engine.
+        coordinator_port (int): The network port for the dynamic inference DP coordinator.
+        flask_port (int): The network for port the frontend Flask server.
+    """
+
+    rank = torch.distributed.get_rank()
+
+    await engine.start_listening_to_data_parallel_coordinator(
+        inference_coordinator_port=coordinator_port, launch_inference_coordinator=True
+    )
+
+    server_task = None
+    if rank == 0:
+        server_task = asyncio.create_task(
+            run_flask_server(
+                coordinator_port=coordinator_port,
+                tokenizer=engine.controller.tokenizer,
+                rank=rank,
+                flask_port=flask_port,
+            )
+        )
+    engine_task = engine.engine_loop_task
+
+    tasks_to_run = [engine_task]
+    if server_task:
+        assert rank == 0
+
+        tasks_to_run.append(server_task)
+
+    await asyncio.gather(*tasks_to_run)
+
+
+if __name__ == "__main__":
+    with torch.inference_mode():
+        initialize_megatron(
+            extra_args_provider=add_text_generation_server_args,
+            args_defaults={'no_load_rng': True, 'no_load_optim': True},
+        )
+
+        args = get_args()
+        model = get_model()
+
+        if args.legacy_tokenizer:
+            tokenizer = get_tokenizer()
+        else:
+            tokenizer = build_tokenizer(args)
+
+        mamba_inference_state_config = get_mamba_inference_state_config_from_model(model)
+
+        # Enable return_log_probs to allow prompt logprobs computation for echo=True requests
+        # This sets materialize_only_last_token_logits=False in the inference context,
+        # which is required for lm-eval compatibility (loglikelihood evaluation tasks)
+        args.return_log_probs = True
+
+        context = get_inference_context(
+            None,
+            None,
+            calculate_max_sequence_length_from_requests=False,
+            mamba_inference_state_config=mamba_inference_state_config,
+        )
+
+        controller = get_inference_controller(model, context)
+
+        engine = DynamicInferenceEngine(
+            controller,
+            context,
+            enable_cuda_graph=args.cuda_graph_impl == "local",
+            random_seed=args.seed,
+            enable_chunked_prefill=not args.disable_chunked_prefill,
+        )
+
+        asyncio.run(run_text_generation_server(engine, args.inference_coordinator_port, args.port))
diff --git a/tools/run_inference_performance_test.py b/tools/run_inference_performance_test.py
index dda2b8284b3..32d61444530 100644
--- a/tools/run_inference_performance_test.py
+++ b/tools/run_inference_performance_test.py
@@ -120,6 +120,7 @@ def get_inference_engine(args: argparse.Namespace, model: MegatronModule) -> Abs
             max_tokens_override=args.inference_dynamic_batching_max_tokens_override,
             block_size_tokens=args.inference_dynamic_batching_block_size,
             tensor_model_parallel_size=args.tensor_model_parallel_size,
+            pipeline_model_parallel_size=args.pipeline_model_parallel_size,
             materialize_only_last_token_logits=not args.return_log_probs,
             mamba_inference_state_config=mamba_inference_state_config,
             cache_mla_latent=args.multi_latent_attention and args.cache_mla_latents,
diff --git a/train_rl.py b/train_rl.py
index d767e30401b..299843bcff3 100644
--- a/train_rl.py
+++ b/train_rl.py
@@ -25,6 +25,8 @@
 from megatron.training.arguments import core_transformer_config_from_args
 from model_provider import model_provider
 
+from megatron.rl.sequence_packing_utils import get_default_packed_seq_params
+
 stimer = StragglerDetector()
 
 import logging
@@ -255,6 +257,12 @@ def forward_step(data_iterator, model: GPTModel, loss_only: bool = False):
     # Common logic for both paths
     model_to_use = model[0] if isinstance(model, list) else model
 
+    if packed_seq_params is None:
+        packed_seq_params = get_default_packed_seq_params(
+            seq_length=tokens.shape[1],
+            device=tokens.device,
+        )
+
     # Clear RoPE cache to avoid inference tensor errors
     try:
         for module in model_to_use.modules():
diff --git a/uv.lock b/uv.lock
index 15892827c83..b95e1cef2cf 100644
--- a/uv.lock
+++ b/uv.lock
@@ -274,37 +274,37 @@ wheels = [
 
 [[package]]
 name = "apache-tvm-ffi"
-version = "0.1.7"
+version = "0.1.8.post2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/3d/07/6fbc8fbef1d04bd290f2dcdb3091ae784ac526b62649ec52993a41c65f72/apache_tvm_ffi-0.1.7.tar.gz", hash = "sha256:737cd4a067d6c6c7ad7dd909a0708eb3dc28540299039ea636f8ff5766b122be", size = 2397940, upload-time = "2025-12-28T09:13:25.52Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/3b/00/e6c7e0710344ccfb2a42be68e04dfd1920864c25bab4a7411a48a4809a1a/apache_tvm_ffi-0.1.7-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:cc6334f55ad8b4cb3c084dcdf33720b47665d0ea488c36a1b4f1b99445ae5a12", size = 1816700, upload-time = "2025-12-28T09:12:22.223Z" },
-    { url = "https://files.pythonhosted.org/packages/84/68/82799768095fe83640f0def07eda01891c9d713a9db8770316ca460a6114/apache_tvm_ffi-0.1.7-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f69f1195ad7701b0a024a84914b934487a30d5975a9e5d5044c57eb9f9b0fcf7", size = 1976292, upload-time = "2025-12-28T09:12:24.623Z" },
-    { url = "https://files.pythonhosted.org/packages/8a/ab/0c01ac5c3d545c04d1adf03a154f8167dc5884c0fdcbb519714107426028/apache_tvm_ffi-0.1.7-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7b6444a322279cc33ada0bb2a0482e3433c31028becda106dcb0d48c30fb2de0", size = 2048671, upload-time = "2025-12-28T09:12:26.457Z" },
-    { url = "https://files.pythonhosted.org/packages/0a/e3/449fcdbe7ebd8df4b830399171fb325e7f77b2babe958c6fa6c537281e26/apache_tvm_ffi-0.1.7-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d5e9e668620ba3b78b1c1f393dee67a63850882b0713dba31972c5f854f02860", size = 1920010, upload-time = "2025-12-28T09:12:27.81Z" },
-    { url = "https://files.pythonhosted.org/packages/a2/98/737ffc4576af7d4da97f3c73bf347f69d269497cfe9ac089517af5900919/apache_tvm_ffi-0.1.7-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5f7deaa48cfd720949dd1638dfbd4cc7d5285008c7f3f342887e2bf33cf1f5be", size = 2030727, upload-time = "2025-12-28T09:12:29.38Z" },
-    { url = "https://files.pythonhosted.org/packages/f1/36/8ea373c1758c812a504a856a06fc08d8761df1c0e2515e6867c22168fea7/apache_tvm_ffi-0.1.7-cp310-cp310-win_amd64.whl", hash = "sha256:c1fd70f6e7578eeec5e5d8ed0fb814b12280b724531487ff4d899edddd188d97", size = 1787864, upload-time = "2025-12-28T09:12:31.194Z" },
-    { url = "https://files.pythonhosted.org/packages/0a/e7/33ece51ba1670fa77a1897745720b9c8bdac854acb0e09d45e64340948f4/apache_tvm_ffi-0.1.7-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:20a8847f4609f1fe61015b7547bced99eba38072ed422799fc7bd15371d6d83c", size = 1818328, upload-time = "2025-12-28T09:12:32.784Z" },
-    { url = "https://files.pythonhosted.org/packages/8f/b9/3bb4099a82b4c7198823b67067a3d206ec8a0b32204a559c5cca1bee54bd/apache_tvm_ffi-0.1.7-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f0e010e61d1f220ec4ce3d15053db3f8c8d9c79230ea763343fc5e4acf53ef17", size = 1975412, upload-time = "2025-12-28T09:12:34.737Z" },
-    { url = "https://files.pythonhosted.org/packages/48/53/423788fb9b26460b3d7ceb8588d172dfe7ae4abcc335931fcbf08a859904/apache_tvm_ffi-0.1.7-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:9b05155b4b60ebd3642213d0489b6ef24aff17b268960dbb5f106a39899bb8b1", size = 2047974, upload-time = "2025-12-28T09:12:36.296Z" },
-    { url = "https://files.pythonhosted.org/packages/a6/30/45d4acf7f99e1fc79a8663f2111901b8031e1f9b316860af7acf4859c964/apache_tvm_ffi-0.1.7-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:cceaddc7636060231aca4ada2632814189b1169224b2b451f41984145ef615fc", size = 1919697, upload-time = "2025-12-28T09:12:38.15Z" },
-    { url = "https://files.pythonhosted.org/packages/dd/bb/fa5042076bf6e7daaf9774389f99149c1851434fc0d8e4cb34aa0c4a3810/apache_tvm_ffi-0.1.7-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5769cadc42e70522e2a523f1dfe24f48dbe3bf384e63f95df251f9d572ffcf23", size = 2030760, upload-time = "2025-12-28T09:12:39.813Z" },
-    { url = "https://files.pythonhosted.org/packages/fe/74/fd06e97699e9cbf36d887c5fbbc56b14e896e2652bbe1781ab84cef82a40/apache_tvm_ffi-0.1.7-cp311-cp311-win_amd64.whl", hash = "sha256:b5c7716429ce2beb0a5b00c5a3bdd90b8a5891838afb782491c576ade42ba7c4", size = 1788026, upload-time = "2025-12-28T09:12:42.142Z" },
-    { url = "https://files.pythonhosted.org/packages/26/4e/43a41ac023a5989803952d527dfea6e63da71fe223f6e010d4ec71ca0526/apache_tvm_ffi-0.1.7-cp312-abi3-macosx_11_0_arm64.whl", hash = "sha256:12950ca9f9f4f4436869afe17845a6bfc85cbcd8a15dfa2b16095f7e6f49d06f", size = 1790152, upload-time = "2025-12-28T09:12:43.975Z" },
-    { url = "https://files.pythonhosted.org/packages/b9/d3/05ba0a63baba1e3aec0f6303c4bc567493fb1c070d9f298f929a7703c0fb/apache_tvm_ffi-0.1.7-cp312-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d0e579234ce6fb2899377335a881ecf15d0197d833e2d370c9269ea6ca578f6f", size = 1947362, upload-time = "2025-12-28T09:12:45.921Z" },
-    { url = "https://files.pythonhosted.org/packages/f1/11/b69df7685d75144fd9f57e5155cdf4ff91d6617a9f8b89b1415204863da0/apache_tvm_ffi-0.1.7-cp312-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:258a4aecc16e963def8ba0ab07f585147c7e7f586156b9496bfdf34af229443d", size = 2024240, upload-time = "2025-12-28T09:12:47.337Z" },
-    { url = "https://files.pythonhosted.org/packages/cf/b6/31459f4141ea8621377fecac7c29e1568d494cbf95c5aa1ddf2cbc12a8ff/apache_tvm_ffi-0.1.7-cp312-abi3-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:363701589349e11a945dabce026578203bd83cb8de71af9a066beadd77af085a", size = 1891485, upload-time = "2025-12-28T09:12:49.171Z" },
-    { url = "https://files.pythonhosted.org/packages/a5/4d/d21874eda6e3ea59c5a84aa010b24b84617e3b286ad759ac5eadccb1a88c/apache_tvm_ffi-0.1.7-cp312-abi3-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fbbf87df625930bafbd979c2c510d5bd989e9171098e5bb65320d0e7336d0095", size = 2003196, upload-time = "2025-12-28T09:12:50.891Z" },
-    { url = "https://files.pythonhosted.org/packages/3f/d4/37102d96e359386107f5ce3751c4e2a8c1b8df3d34f65b701810ba59465c/apache_tvm_ffi-0.1.7-cp312-abi3-win_amd64.whl", hash = "sha256:d2fb56f53e33c7ddf7d6d340d44cbc440d205f7dab4bc5ed1ad20c8fc779250f", size = 1768697, upload-time = "2025-12-28T09:12:52.394Z" },
-    { url = "https://files.pythonhosted.org/packages/92/c3/aa4b950032251c24b9db7d725b86d7d683b62d9919f8a32f478c28951dc3/apache_tvm_ffi-0.1.7-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:dc4a02e0252599d0c4eb2d2fa91b7756f0446b3bc42479b05c140e9d336b9b8b", size = 1820520, upload-time = "2025-12-28T09:12:54.29Z" },
-    { url = "https://files.pythonhosted.org/packages/19/70/55ee17b8a340ef8ffc0d6c0587ff5a0c7e7c85a94e6cb202e682838a42c7/apache_tvm_ffi-0.1.7-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:41e50f2c8d98d706923c70ac19fd5f605bf71b8ffa43c0c2e9e1e22c2d60d4e0", size = 1960686, upload-time = "2025-12-28T09:12:56.206Z" },
-    { url = "https://files.pythonhosted.org/packages/b6/0f/ca4f7b4836e1e03386b6e486a0ba88812644723a96965a01e2072f551f2e/apache_tvm_ffi-0.1.7-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:835bd391c6f3388e84e36f0ea2347761992241a3953be6ebb319bf1c2ac855d8", size = 2032237, upload-time = "2025-12-28T09:12:58.113Z" },
-    { url = "https://files.pythonhosted.org/packages/89/b6/35be0035f8ed9e10ae6d9ffb7e91397ba381eb734f85ff852efe56eb3012/apache_tvm_ffi-0.1.7-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d7d8b53e94c2bc28e961934e8291a9763d7868f84f9759cbae462b77ca801e5b", size = 1904414, upload-time = "2025-12-28T09:12:59.624Z" },
-    { url = "https://files.pythonhosted.org/packages/5a/5f/1f57863c2c68389d1453fe147d89da22910a0e4f645a8be29cc8f461850f/apache_tvm_ffi-0.1.7-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e135b70c7be8627661c5ec4a466e17e1aba260ffd7c6bccfe231c9ea975875e7", size = 2013039, upload-time = "2025-12-28T09:13:01.37Z" },
-    { url = "https://files.pythonhosted.org/packages/bb/3f/08d1931c6ebca557051176d400e15c1d7f6cf9096fc02f8c90ac7ee309ac/apache_tvm_ffi-0.1.7-cp314-cp314t-win_amd64.whl", hash = "sha256:408bb2c1fa585260afd556e53d65e2735f201f358202fda2b07d08a6cbfaf91f", size = 1828344, upload-time = "2025-12-28T09:13:03.359Z" },
+sdist = { url = "https://files.pythonhosted.org/packages/e3/e9/a13952726228fa6282154ecf927092396bc759739e5e045019f6ab92f3ca/apache_tvm_ffi-0.1.8.post2.tar.gz", hash = "sha256:4513e38852894f290172ecfefcbc18d34e817fd29c16a0f1770e130c82b4067e", size = 2441111, upload-time = "2026-01-13T18:11:27.864Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/cd/65/0c67653e6431716f2706e29f2e2e1ce9a6f9d9f7615c0c637a4881c3f5a5/apache_tvm_ffi-0.1.8.post2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e11e03c865297c65c2f206c90b8014890bc52a3059d8148b47cd2c2759bcea90", size = 1838436, upload-time = "2026-01-13T18:10:22.334Z" },
+    { url = "https://files.pythonhosted.org/packages/46/8f/13fe7acbd7497312fda5faf51545fcb50c0ed5398cfe525d006ba29f1b9b/apache_tvm_ffi-0.1.8.post2-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:e855f2b3f60ec16939b00e1b594ce7f488f96e387b12547e98643177f70ab2b1", size = 1996102, upload-time = "2026-01-13T18:10:23.97Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/f8/b469a4d91ea74f627cb220835049fb60a566f7427f27c9f66c6c54a287b6/apache_tvm_ffi-0.1.8.post2-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:854ecd90a1039d542c531fa6a4928f5633452aedf1ed7f646f3bbbeca8217156", size = 2069067, upload-time = "2026-01-13T18:10:25.425Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/88/663e532e7ba625a3998724ae0207ce620c32a057c339b4e4ae0be6810d85/apache_tvm_ffi-0.1.8.post2-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e1894b6f9c2b45bc9df8e407d041e575128591b998ced09f974675d2bb6b8bc9", size = 1939413, upload-time = "2026-01-13T18:10:28.61Z" },
+    { url = "https://files.pythonhosted.org/packages/ee/16/6ec659fd5b3b163de9adc75bf29fc90460d212b489947b77b8ed89c01472/apache_tvm_ffi-0.1.8.post2-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ef922ef3ed971a4e161a0385ef9f67af379d52b0d83d62c08b79f6707b6660b5", size = 2053058, upload-time = "2026-01-13T18:10:30.721Z" },
+    { url = "https://files.pythonhosted.org/packages/ec/a8/d01f81987db9bbfc4b242575d3fe79f72aeba3582ca449fec28d19938400/apache_tvm_ffi-0.1.8.post2-cp310-cp310-win_amd64.whl", hash = "sha256:146f98dcd21052eeed96ad07472bdffd8189fb2106edc6e3de91e28e3b000bf8", size = 1809231, upload-time = "2026-01-13T18:10:32.293Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/86/7db24692281d80204d07d77346ad4cb87f6183f1364ed94311993a47ed1a/apache_tvm_ffi-0.1.8.post2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:40f5fba3e06617f16888a0fdaf7ab4049841ff6e741644be822400438b771fe7", size = 1840013, upload-time = "2026-01-13T18:10:33.724Z" },
+    { url = "https://files.pythonhosted.org/packages/cf/cc/fbaef883c6ba8e2c56ffcca997f2c076d1c14787799a62f39bd52c7126d5/apache_tvm_ffi-0.1.8.post2-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:9eb6d228fa22b6a5da140d761962f022a154746c91fe7608c49062deaf671f9f", size = 1995159, upload-time = "2026-01-13T18:10:35.727Z" },
+    { url = "https://files.pythonhosted.org/packages/49/08/f1e984e3573d0cbd6d53f3f73a12691fba153afc529fbd506d78e739b330/apache_tvm_ffi-0.1.8.post2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:581c0acf845859be0cc26ac79f3663a83393b662c97c7125ebb78f0228b69d96", size = 2068543, upload-time = "2026-01-13T18:10:39.12Z" },
+    { url = "https://files.pythonhosted.org/packages/35/1f/5336d430a133cf66ca9dac8ae9b6e25d8b99275a6687656421a1deee9f1b/apache_tvm_ffi-0.1.8.post2-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:beadc7bb480ae02d02e2108543f6f4b4170d77e361ab3ccb43697d174ec185b0", size = 1939018, upload-time = "2026-01-13T18:10:40.621Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/67/969c66a27a128cf738d0c068e0d4451d691d8197929c797cbe8e59c6cfc9/apache_tvm_ffi-0.1.8.post2-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e593d191c7ca0726ebcd3b024a4bc8140694fdfce2e7b02493f38ad5c4c9ecf7", size = 2053068, upload-time = "2026-01-13T18:10:43.241Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/f1/84881a799d227fdc4a61fbf0cb8d5ceb6a72ad788fa9070e5853ed9759b6/apache_tvm_ffi-0.1.8.post2-cp311-cp311-win_amd64.whl", hash = "sha256:1c685f19d0f26d9356c7c77a1cb652a3632ec9ee6cd21aa1d8cfb968743ec1fd", size = 1809557, upload-time = "2026-01-13T18:10:44.743Z" },
+    { url = "https://files.pythonhosted.org/packages/12/8b/a39d6c6eb1a87f6003e2717695cc6d44cc65ccd57dae5a0af944c0d25751/apache_tvm_ffi-0.1.8.post2-cp312-abi3-macosx_11_0_arm64.whl", hash = "sha256:c13ec7fc8f255767998b301ace0cd1e7d17ba76b48ffeb97ca9eb22a3314e250", size = 1811882, upload-time = "2026-01-13T18:10:46.317Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/3a/7b1c9edcaeaebb945038144896cf17eb828a40b6ace0371823e133132664/apache_tvm_ffi-0.1.8.post2-cp312-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8c78b4caf17304a1f47881bccdb2f9ac24d98b3b7fbe761a6dd4fd0585934d96", size = 1967259, upload-time = "2026-01-13T18:10:47.851Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/b6/463602f57dda2e1c69165c044c07061cd59404593f313a427a3ad9c02cf3/apache_tvm_ffi-0.1.8.post2-cp312-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4a48da3fa8f47130f3502134f01e97044388c5217e7b91be4b0acec4feab81a0", size = 2044821, upload-time = "2026-01-13T18:10:49.396Z" },
+    { url = "https://files.pythonhosted.org/packages/fe/e6/9cdc7f4814b2fbdfceba5dc640c3704d07d8db18e3d1aef5aa49bbf1ba7e/apache_tvm_ffi-0.1.8.post2-cp312-abi3-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:61cc98e489ebc03bc96d1a966dc863eb1c0a607383f6bf4a416ff0a96170ca85", size = 1910964, upload-time = "2026-01-13T18:10:51.345Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/f5/a2e5487cdad575fe6cf34f8a23f8c49e08ce5808fa75dc19d98bcebc20ec/apache_tvm_ffi-0.1.8.post2-cp312-abi3-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:caa48509f0c7d9b896823b492a9ee42afac2548065c1ec7ef07f9a0dc30d2796", size = 2025814, upload-time = "2026-01-13T18:10:52.804Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/0d/8922c142281187ae6b989579876d00d20b84ccd3878aad487b91d951d254/apache_tvm_ffi-0.1.8.post2-cp312-abi3-win_amd64.whl", hash = "sha256:985831722d1dd562d13e8e34102fd99f42f964c53fc7cf9d80fc4f7602f89196", size = 1790204, upload-time = "2026-01-13T18:10:54.558Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/6e/2c21e754adf5c08fff154ee0a75b01568a4ed5da2d8f4a4a95d8451736e0/apache_tvm_ffi-0.1.8.post2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:4a3f6cb1173cfe19a1b66fd8577a6f3ce644bdc22691961c07c64304a7c3f17a", size = 1842240, upload-time = "2026-01-13T18:10:56.652Z" },
+    { url = "https://files.pythonhosted.org/packages/f6/0a/342dd451d714b683143bd0d7dbd26279772dedf1d827a7efd357f05ff0aa/apache_tvm_ffi-0.1.8.post2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ac6c2d4e117ca63974bcd20fdf5715d01f3b4d0ed78921f493461050daf7c1a3", size = 1980660, upload-time = "2026-01-13T18:10:58.892Z" },
+    { url = "https://files.pythonhosted.org/packages/c6/63/59f00116530cf7513866467de9044dbdd1954a536009e56c44f167743b35/apache_tvm_ffi-0.1.8.post2-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0bc5456f971097dcd973daba32cb6f321893873c53235159ab6426b0c7bef7e2", size = 2052810, upload-time = "2026-01-13T18:11:01.698Z" },
+    { url = "https://files.pythonhosted.org/packages/46/dc/e22c784937fdc907785a764d773ef57a925c443d8ec01ad8bff43dd8d8d6/apache_tvm_ffi-0.1.8.post2-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7f2016b4b31e7f75d71c638bbd1ae43d6e239cf8e20b539fb9de6917b3fb25bc", size = 1923716, upload-time = "2026-01-13T18:11:03.225Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/39/695f5642979d1d2d4cd3fca92e7b3b324ebba734b8aab9bdbacc26d4a05c/apache_tvm_ffi-0.1.8.post2-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4c0ca7be630d0888eae163a4298ddfb3f7bd837112c7e6ffcd7157e34e78215b", size = 2035440, upload-time = "2026-01-13T18:11:04.841Z" },
+    { url = "https://files.pythonhosted.org/packages/ed/e0/ed152425e51b7c8a4ce81d33683b43d87e770a76a65922dc7524a0106ae8/apache_tvm_ffi-0.1.8.post2-cp314-cp314t-win_amd64.whl", hash = "sha256:ecb0d9f7f410ba3b4d92547c2477f73f8406455448f4ea8c146515671fd20210", size = 1849938, upload-time = "2026-01-13T18:11:06.312Z" },
 ]
 
 [[package]]
@@ -339,59 +339,59 @@ wheels = [
 
 [[package]]
 name = "av"
-version = "16.0.1"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/15/c3/fd72a0315bc6c943ced1105aaac6e0ec1be57c70d8a616bd05acaa21ffee/av-16.0.1.tar.gz", hash = "sha256:dd2ce779fa0b5f5889a6d9e00fbbbc39f58e247e52d31044272648fe16ff1dbf", size = 3904030, upload-time = "2025-10-13T12:28:51.082Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/e8/3c/eefa29b7d0f5afdf7af9197bbecad8ec2ad06bcb5ac7e909c05a624b00a6/av-16.0.1-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:8b141aaa29a3afc96a1d467d106790782c1914628b57309eaadb8c10c299c9c0", size = 27206679, upload-time = "2025-10-13T12:24:41.145Z" },
-    { url = "https://files.pythonhosted.org/packages/ac/89/a474feb07d5b94aa5af3771b0fe328056e2e0a840039b329f4fa2a1fd13a/av-16.0.1-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:4b8a08a59a5be0082af063d3f4b216e3950340121c6ea95b505a3f5f5cc8f21d", size = 21774556, upload-time = "2025-10-13T12:24:44.332Z" },
-    { url = "https://files.pythonhosted.org/packages/be/e5/4361010dcac398bc224823e4b2a47803845e159af9f95164662c523770dc/av-16.0.1-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:792e7fc3c08eae005ff36486983966476e553cbb55aaeb0ec99adc4909377320", size = 38176763, upload-time = "2025-10-13T12:24:46.98Z" },
-    { url = "https://files.pythonhosted.org/packages/d4/db/b27bdd20c9dc80de5b8792dae16dd6f4edf16408c0c7b28070c6228a8057/av-16.0.1-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:4e8ef5df76d8d0ee56139789f80bb90ad1a82a7e6df6e080e2e95c06fa22aea7", size = 39696277, upload-time = "2025-10-13T12:24:50.951Z" },
-    { url = "https://files.pythonhosted.org/packages/4e/c8/dd48e6a3ac1e922c141475a0dc30e2b6dfdef9751b3274829889a9281cce/av-16.0.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:4f7a6985784a7464f078e419c71f5528c3e550ee5d605e7149b4a37a111eb136", size = 39576660, upload-time = "2025-10-13T12:24:55.773Z" },
-    { url = "https://files.pythonhosted.org/packages/b9/f0/223d047e2e60672a2fb5e51e28913de8d52195199f3e949cbfda1e6cd64b/av-16.0.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:3f45c8d7b803b6faa2a25a26de5964a0a897de68298d9c9672c7af9d65d8b48a", size = 40752775, upload-time = "2025-10-13T12:25:00.827Z" },
-    { url = "https://files.pythonhosted.org/packages/18/73/73acad21c9203bc63d806e8baf42fe705eb5d36dafd1996b71ab5861a933/av-16.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:58e6faf1d9328d8cc6be14c5aadacb7d2965ed6d6ae1af32696993096543ff00", size = 32302328, upload-time = "2025-10-13T12:25:06.042Z" },
-    { url = "https://files.pythonhosted.org/packages/49/d3/f2a483c5273fccd556dfa1fce14fab3b5d6d213b46e28e54e254465a2255/av-16.0.1-cp311-cp311-macosx_11_0_x86_64.whl", hash = "sha256:e310d1fb42879df9bad2152a8db6d2ff8bf332c8c36349a09d62cc122f5070fb", size = 27191982, upload-time = "2025-10-13T12:25:10.622Z" },
-    { url = "https://files.pythonhosted.org/packages/e0/39/dff28bd252131b3befd09d8587992fe18c09d5125eaefc83a6434d5f56ff/av-16.0.1-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:2f4b357e5615457a84e6b6290916b22864b76b43d5079e1a73bc27581a5b9bac", size = 21760305, upload-time = "2025-10-13T12:25:14.882Z" },
-    { url = "https://files.pythonhosted.org/packages/4a/4d/2312d50a09c84a9b4269f7fea5de84f05dd2b7c7113dd961d31fad6c64c4/av-16.0.1-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:286665c77034c3a98080169b8b5586d5568a15da81fbcdaf8099252f2d232d7c", size = 38691616, upload-time = "2025-10-13T12:25:20.063Z" },
-    { url = "https://files.pythonhosted.org/packages/15/9a/3d2d30b56252f998e53fced13720e2ce809c4db477110f944034e0fa4c9f/av-16.0.1-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:f88de8e5b8ea29e41af4d8d61df108323d050ccfbc90f15b13ec1f99ce0e841e", size = 40216464, upload-time = "2025-10-13T12:25:24.848Z" },
-    { url = "https://files.pythonhosted.org/packages/98/cb/3860054794a47715b4be0006105158c7119a57be58d9e8882b72e4d4e1dd/av-16.0.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:0cdb71ebe4d1b241cf700f8f0c44a7d2a6602b921e16547dd68c0842113736e1", size = 40094077, upload-time = "2025-10-13T12:25:30.238Z" },
-    { url = "https://files.pythonhosted.org/packages/41/58/79830fb8af0a89c015250f7864bbd427dff09c70575c97847055f8a302f7/av-16.0.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:28c27a65d40e8cf82b6db2543f8feeb8b56d36c1938f50773494cd3b073c7223", size = 41279948, upload-time = "2025-10-13T12:25:35.24Z" },
-    { url = "https://files.pythonhosted.org/packages/83/79/6e1463b04382f379f857113b851cf5f9d580a2f7bd794211cd75352f4e04/av-16.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:ffea39ac7574f234f5168f9b9602e8d4ecdd81853238ec4d661001f03a6d3f64", size = 32297586, upload-time = "2025-10-13T12:25:39.826Z" },
-    { url = "https://files.pythonhosted.org/packages/44/78/12a11d7a44fdd8b26a65e2efa1d8a5826733c8887a989a78306ec4785956/av-16.0.1-cp312-cp312-macosx_11_0_x86_64.whl", hash = "sha256:e41a8fef85dfb2c717349f9ff74f92f9560122a9f1a94b1c6c9a8a9c9462ba71", size = 27206375, upload-time = "2025-10-13T12:25:44.423Z" },
-    { url = "https://files.pythonhosted.org/packages/27/19/3a4d3882852a0ee136121979ce46f6d2867b974eb217a2c9a070939f55ad/av-16.0.1-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:6352a64b25c9f985d4f279c2902db9a92424e6f2c972161e67119616f0796cb9", size = 21752603, upload-time = "2025-10-13T12:25:49.122Z" },
-    { url = "https://files.pythonhosted.org/packages/cb/6e/f7abefba6e008e2f69bebb9a17ba38ce1df240c79b36a5b5fcacf8c8fcfd/av-16.0.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:5201f7b4b5ed2128118cb90c2a6d64feedb0586ca7c783176896c78ffb4bbd5c", size = 38931978, upload-time = "2025-10-13T12:25:55.021Z" },
-    { url = "https://files.pythonhosted.org/packages/b2/7a/1305243ab47f724fdd99ddef7309a594e669af7f0e655e11bdd2c325dfae/av-16.0.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:daecc2072b82b6a942acbdaa9a2e00c05234c61fef976b22713983c020b07992", size = 40549383, upload-time = "2025-10-13T12:26:00.897Z" },
-    { url = "https://files.pythonhosted.org/packages/32/b2/357cc063185043eb757b4a48782bff780826103bcad1eb40c3ddfc050b7e/av-16.0.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6573da96e8bebc3536860a7def108d7dbe1875c86517072431ced702447e6aea", size = 40241993, upload-time = "2025-10-13T12:26:06.993Z" },
-    { url = "https://files.pythonhosted.org/packages/20/bb/ced42a4588ba168bf0ef1e9d016982e3ba09fde6992f1dda586fd20dcf71/av-16.0.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:4bc064e48a8de6c087b97dd27cf4ef8c13073f0793108fbce3ecd721201b2502", size = 41532235, upload-time = "2025-10-13T12:26:12.488Z" },
-    { url = "https://files.pythonhosted.org/packages/15/37/c7811eca0f318d5fd3212f7e8c3d8335f75a54907c97a89213dc580b8056/av-16.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0c669b6b6668c8ae74451c15ec6d6d8a36e4c3803dc5d9910f607a174dd18f17", size = 32296912, upload-time = "2025-10-13T12:26:19.187Z" },
-    { url = "https://files.pythonhosted.org/packages/86/59/972f199ccc4f8c9e51f59e0f8962a09407396b3f6d11355e2c697ba555f9/av-16.0.1-cp313-cp313-macosx_11_0_x86_64.whl", hash = "sha256:4c61c6c120f5c5d95c711caf54e2c4a9fb2f1e613ac0a9c273d895f6b2602e44", size = 27170433, upload-time = "2025-10-13T12:26:24.673Z" },
-    { url = "https://files.pythonhosted.org/packages/53/9d/0514cbc185fb20353ab25da54197fbd169a233e39efcbb26533c36a9dbb9/av-16.0.1-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:7ecc2e41320c69095f44aff93470a0d32c30892b2dbad0a08040441c81efa379", size = 21717654, upload-time = "2025-10-13T12:26:29.12Z" },
-    { url = "https://files.pythonhosted.org/packages/32/8c/881409dd124b4e07d909d2b70568acb21126fc747656390840a2238651c9/av-16.0.1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:036f0554d6faef3f4a94acaeb0cedd388e3ab96eb0eb5a14ec27c17369c466c9", size = 38651601, upload-time = "2025-10-13T12:26:33.919Z" },
-    { url = "https://files.pythonhosted.org/packages/35/fd/867ba4cc3ab504442dc89b0c117e6a994fc62782eb634c8f31304586f93e/av-16.0.1-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:876415470a62e4a3550cc38db2fc0094c25e64eea34d7293b7454125d5958190", size = 40278604, upload-time = "2025-10-13T12:26:39.2Z" },
-    { url = "https://files.pythonhosted.org/packages/b3/87/63cde866c0af09a1fa9727b4f40b34d71b0535785f5665c27894306f1fbc/av-16.0.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:56902a06bd0828d13f13352874c370670882048267191ff5829534b611ba3956", size = 39984854, upload-time = "2025-10-13T12:26:44.581Z" },
-    { url = "https://files.pythonhosted.org/packages/71/3b/8f40a708bff0e6b0f957836e2ef1f4d4429041cf8d99a415a77ead8ac8a3/av-16.0.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:fe988c2bf0fc2d952858f791f18377ea4ae4e19ba3504793799cd6c2a2562edf", size = 41270352, upload-time = "2025-10-13T12:26:50.817Z" },
-    { url = "https://files.pythonhosted.org/packages/1e/b5/c114292cb58a7269405ae13b7ba48c7d7bfeebbb2e4e66c8073c065a4430/av-16.0.1-cp313-cp313-win_amd64.whl", hash = "sha256:708a66c248848029bf518f0482b81c5803846f1b597ef8013b19c014470b620f", size = 32273242, upload-time = "2025-10-13T12:26:55.788Z" },
-    { url = "https://files.pythonhosted.org/packages/ff/e9/a5b714bc078fdcca8b46c8a0b38484ae5c24cd81d9c1703d3e8ae2b57259/av-16.0.1-cp313-cp313t-macosx_11_0_x86_64.whl", hash = "sha256:79a77ee452537030c21a0b41139bedaf16629636bf764b634e93b99c9d5f4558", size = 27248984, upload-time = "2025-10-13T12:27:00.564Z" },
-    { url = "https://files.pythonhosted.org/packages/06/ef/ff777aaf1f88e3f6ce94aca4c5806a0c360e68d48f9d9f0214e42650f740/av-16.0.1-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:080823a6ff712f81e7089ae9756fb1512ca1742a138556a852ce50f58e457213", size = 21828098, upload-time = "2025-10-13T12:27:05.433Z" },
-    { url = "https://files.pythonhosted.org/packages/34/d7/a484358d24a42bedde97f61f5d6ee568a7dd866d9df6e33731378db92d9e/av-16.0.1-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:04e00124afa8b46a850ed48951ddda61de874407fb8307d6a875bba659d5727e", size = 40051697, upload-time = "2025-10-13T12:27:10.525Z" },
-    { url = "https://files.pythonhosted.org/packages/73/87/6772d6080837da5d5c810a98a95bde6977e1f5a6e2e759e8c9292af9ec69/av-16.0.1-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:bc098c1c6dc4e7080629a7e9560e67bd4b5654951e17e5ddfd2b1515cfcd37db", size = 41352596, upload-time = "2025-10-13T12:27:16.217Z" },
-    { url = "https://files.pythonhosted.org/packages/bd/58/fe448c60cf7f85640a0ed8936f16bac874846aa35e1baa521028949c1ea3/av-16.0.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:e6ffd3559a72c46a76aa622630751a821499ba5a780b0047ecc75105d43a6b61", size = 41183156, upload-time = "2025-10-13T12:27:21.574Z" },
-    { url = "https://files.pythonhosted.org/packages/85/c6/a039a0979d0c278e1bed6758d5a6186416c3ccb8081970df893fdf9a0d99/av-16.0.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:7a3f1a36b550adadd7513f4f5ee956f9e06b01a88e59f3150ef5fec6879d6f79", size = 42302331, upload-time = "2025-10-13T12:27:26.953Z" },
-    { url = "https://files.pythonhosted.org/packages/18/7b/2ca4a9e3609ff155436dac384e360f530919cb1e328491f7df294be0f0dc/av-16.0.1-cp313-cp313t-win_amd64.whl", hash = "sha256:c6de794abe52b8c0be55d8bb09ade05905efa74b1a5ab4860b4b9c2bfb6578bf", size = 32462194, upload-time = "2025-10-13T12:27:32.942Z" },
-    { url = "https://files.pythonhosted.org/packages/14/9a/6d17e379906cf53a7a44dfac9cf7e4b2e7df2082ba2dbf07126055effcc1/av-16.0.1-cp314-cp314-macosx_11_0_x86_64.whl", hash = "sha256:4b55ba69a943ae592ad7900da67129422954789de9dc384685d6b529925f542e", size = 27167101, upload-time = "2025-10-13T12:27:38.886Z" },
-    { url = "https://files.pythonhosted.org/packages/6c/34/891816cd82d5646cb5a51d201d20be0a578232536d083b7d939734258067/av-16.0.1-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:d4a0c47b6c9bbadad8909b82847f5fe64a608ad392f0b01704e427349bcd9a47", size = 21722708, upload-time = "2025-10-13T12:27:43.29Z" },
-    { url = "https://files.pythonhosted.org/packages/1d/20/c24ad34038423ab8c9728cef3301e0861727c188442dcfd70a4a10834c63/av-16.0.1-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:8bba52f3035708456f6b1994d10b0371b45cfd8f917b5e84ff81aef4ec2f08bf", size = 38638842, upload-time = "2025-10-13T12:27:49.776Z" },
-    { url = "https://files.pythonhosted.org/packages/d7/32/034412309572ba3ad713079d07a3ffc13739263321aece54a3055d7a4f1f/av-16.0.1-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:08e34c7e7b5e55e29931180bbe21095e1874ac120992bf6b8615d39574487617", size = 40197789, upload-time = "2025-10-13T12:27:55.688Z" },
-    { url = "https://files.pythonhosted.org/packages/fb/9c/40496298c32f9094e7df28641c5c58aa6fb07554dc232a9ac98a9894376f/av-16.0.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:0d6250ab9db80c641b299987027c987f14935ea837ea4c02c5f5182f6b69d9e5", size = 39980829, upload-time = "2025-10-13T12:28:01.507Z" },
-    { url = "https://files.pythonhosted.org/packages/4a/7e/5c38268ac1d424f309b13b2de4597ad28daea6039ee5af061e62918b12a8/av-16.0.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:7b621f28d8bcbb07cdcd7b18943ddc040739ad304545715ae733873b6e1b739d", size = 41205928, upload-time = "2025-10-13T12:28:08.431Z" },
-    { url = "https://files.pythonhosted.org/packages/e3/07/3176e02692d8753a6c4606021c60e4031341afb56292178eee633b6760a4/av-16.0.1-cp314-cp314-win_amd64.whl", hash = "sha256:92101f49082392580c9dba4ba2fe5b931b3bb0fb75a1a848bfb9a11ded68be91", size = 32272836, upload-time = "2025-10-13T12:28:13.405Z" },
-    { url = "https://files.pythonhosted.org/packages/8a/47/10e03b88de097385d1550cbb6d8de96159131705c13adb92bd9b7e677425/av-16.0.1-cp314-cp314t-macosx_11_0_x86_64.whl", hash = "sha256:07c464bf2bc362a154eccc82e235ef64fd3aaf8d76fc8ed63d0ae520943c6d3f", size = 27248864, upload-time = "2025-10-13T12:28:17.467Z" },
-    { url = "https://files.pythonhosted.org/packages/b1/60/7447f206bec3e55e81371f1989098baa2fe9adb7b46c149e6937b7e7c1ca/av-16.0.1-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:750da0673864b669c95882c7b25768cd93ece0e47010d74ebcc29dbb14d611f8", size = 21828185, upload-time = "2025-10-13T12:28:21.461Z" },
-    { url = "https://files.pythonhosted.org/packages/68/48/ee2680e7a01bc4911bbe902b814346911fa2528697a44f3043ee68e0f07e/av-16.0.1-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:0b7c0d060863b2e341d07cd26851cb9057b7979814148b028fb7ee5d5eb8772d", size = 40040572, upload-time = "2025-10-13T12:28:26.585Z" },
-    { url = "https://files.pythonhosted.org/packages/da/68/2c43d28871721ae07cde432d6e36ae2f7035197cbadb43764cc5bf3d4b33/av-16.0.1-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:e67c2eca6023ca7d76b0709c5f392b23a5defba499f4c262411f8155b1482cbd", size = 41344288, upload-time = "2025-10-13T12:28:32.512Z" },
-    { url = "https://files.pythonhosted.org/packages/ec/7f/1d801bff43ae1af4758c45eee2eaae64f303bbb460e79f352f08587fd179/av-16.0.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e3243d54d84986e8fbdc1946db634b0c41fe69b6de35a99fa8b763e18503d040", size = 41175142, upload-time = "2025-10-13T12:28:38.356Z" },
-    { url = "https://files.pythonhosted.org/packages/e4/06/bb363138687066bbf8997c1433dbd9c81762bae120955ea431fb72d69d26/av-16.0.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:a1bcf73efab5379601e6510abd7afe5f397d0f6defe69b1610c2f37a4a17996b", size = 42293932, upload-time = "2025-10-13T12:28:43.442Z" },
-    { url = "https://files.pythonhosted.org/packages/92/15/5e713098a085f970ccf88550194d277d244464d7b3a7365ad92acb4b6dc1/av-16.0.1-cp314-cp314t-win_amd64.whl", hash = "sha256:6368d4ff153d75469d2a3217bc403630dc870a72fe0a014d9135de550d731a86", size = 32460624, upload-time = "2025-10-13T12:28:48.767Z" },
+version = "16.1.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/78/cd/3a83ffbc3cc25b39721d174487fb0d51a76582f4a1703f98e46170ce83d4/av-16.1.0.tar.gz", hash = "sha256:a094b4fd87a3721dacf02794d3d2c82b8d712c85b9534437e82a8a978c175ffd", size = 4285203, upload-time = "2026-01-11T07:31:33.772Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/97/51/2217a9249409d2e88e16e3f16f7c0def9fd3e7ffc4238b2ec211f9935bdb/av-16.1.0-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:2395748b0c34fe3a150a1721e4f3d4487b939520991b13e7b36f8926b3b12295", size = 26942590, upload-time = "2026-01-09T20:17:58.588Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/cd/a7070f4febc76a327c38808e01e2ff6b94531fe0b321af54ea3915165338/av-16.1.0-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:72d7ac832710a158eeb7a93242370aa024a7646516291c562ee7f14a7ea881fd", size = 21507910, upload-time = "2026-01-09T20:18:02.309Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/30/ec812418cd9b297f0238fe20eb0747d8a8b68d82c5f73c56fe519a274143/av-16.1.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:6cbac833092e66b6b0ac4d81ab077970b8ca874951e9c3974d41d922aaa653ed", size = 38738309, upload-time = "2026-01-09T20:18:04.701Z" },
+    { url = "https://files.pythonhosted.org/packages/3a/b8/6c5795bf1f05f45c5261f8bce6154e0e5e86b158a6676650ddd77c28805e/av-16.1.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:eb990672d97c18f99c02f31c8d5750236f770ffe354b5a52c5f4d16c5e65f619", size = 40293006, upload-time = "2026-01-09T20:18:07.238Z" },
+    { url = "https://files.pythonhosted.org/packages/a7/44/5e183bcb9333fc3372ee6e683be8b0c9b515a506894b2d32ff465430c074/av-16.1.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:05ad70933ac3b8ef896a820ea64b33b6cca91a5fac5259cb9ba7fa010435be15", size = 40123516, upload-time = "2026-01-09T20:18:09.955Z" },
+    { url = "https://files.pythonhosted.org/packages/12/1d/b5346d582a3c3d958b4d26a2cc63ce607233582d956121eb20d2bbe55c2e/av-16.1.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:d831a1062a3c47520bf99de6ec682bd1d64a40dfa958e5457bb613c5270e7ce3", size = 41463289, upload-time = "2026-01-09T20:18:12.459Z" },
+    { url = "https://files.pythonhosted.org/packages/fa/31/acc946c0545f72b8d0d74584cb2a0ade9b7dfe2190af3ef9aa52a2e3c0b1/av-16.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:358ab910fef3c5a806c55176f2b27e5663b33c4d0a692dafeb049c6ed71f8aff", size = 31754959, upload-time = "2026-01-09T20:18:14.718Z" },
+    { url = "https://files.pythonhosted.org/packages/48/d0/b71b65d1b36520dcb8291a2307d98b7fc12329a45614a303ff92ada4d723/av-16.1.0-cp311-cp311-macosx_11_0_x86_64.whl", hash = "sha256:e88ad64ee9d2b9c4c5d891f16c22ae78e725188b8926eb88187538d9dd0b232f", size = 26927747, upload-time = "2026-01-09T20:18:16.976Z" },
+    { url = "https://files.pythonhosted.org/packages/2f/79/720a5a6ccdee06eafa211b945b0a450e3a0b8fc3d12922f0f3c454d870d2/av-16.1.0-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:cb296073fa6935724de72593800ba86ae49ed48af03960a4aee34f8a611f442b", size = 21492232, upload-time = "2026-01-09T20:18:19.266Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/4f/a1ba8d922f2f6d1a3d52419463ef26dd6c4d43ee364164a71b424b5ae204/av-16.1.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:720edd4d25aa73723c1532bb0597806d7b9af5ee34fc02358782c358cfe2f879", size = 39291737, upload-time = "2026-01-09T20:18:21.513Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/31/fc62b9fe8738d2693e18d99f040b219e26e8df894c10d065f27c6b4f07e3/av-16.1.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:c7f2bc703d0df260a1fdf4de4253c7f5500ca9fc57772ea241b0cb241bcf972e", size = 40846822, upload-time = "2026-01-09T20:18:24.275Z" },
+    { url = "https://files.pythonhosted.org/packages/53/10/ab446583dbce730000e8e6beec6ec3c2753e628c7f78f334a35cad0317f4/av-16.1.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d69c393809babada7d54964d56099e4b30a3e1f8b5736ca5e27bd7be0e0f3c83", size = 40675604, upload-time = "2026-01-09T20:18:26.866Z" },
+    { url = "https://files.pythonhosted.org/packages/31/d7/1003be685277005f6d63fd9e64904ee222fe1f7a0ea70af313468bb597db/av-16.1.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:441892be28582356d53f282873c5a951592daaf71642c7f20165e3ddcb0b4c63", size = 42015955, upload-time = "2026-01-09T20:18:29.461Z" },
+    { url = "https://files.pythonhosted.org/packages/2f/4a/fa2a38ee9306bf4579f556f94ecbc757520652eb91294d2a99c7cf7623b9/av-16.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:273a3e32de64819e4a1cd96341824299fe06f70c46f2288b5dc4173944f0fd62", size = 31750339, upload-time = "2026-01-09T20:18:32.249Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/84/2535f55edcd426cebec02eb37b811b1b0c163f26b8d3f53b059e2ec32665/av-16.1.0-cp312-cp312-macosx_11_0_x86_64.whl", hash = "sha256:640f57b93f927fba8689f6966c956737ee95388a91bd0b8c8b5e0481f73513d6", size = 26945785, upload-time = "2026-01-09T20:18:34.486Z" },
+    { url = "https://files.pythonhosted.org/packages/b6/17/ffb940c9e490bf42e86db4db1ff426ee1559cd355a69609ec1efe4d3a9eb/av-16.1.0-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:ae3fb658eec00852ebd7412fdc141f17f3ddce8afee2d2e1cf366263ad2a3b35", size = 21481147, upload-time = "2026-01-09T20:18:36.716Z" },
+    { url = "https://files.pythonhosted.org/packages/15/c1/e0d58003d2d83c3921887d5c8c9b8f5f7de9b58dc2194356a2656a45cfdc/av-16.1.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:27ee558d9c02a142eebcbe55578a6d817fedfde42ff5676275504e16d07a7f86", size = 39517197, upload-time = "2026-01-11T09:57:31.937Z" },
+    { url = "https://files.pythonhosted.org/packages/32/77/787797b43475d1b90626af76f80bfb0c12cfec5e11eafcfc4151b8c80218/av-16.1.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:7ae547f6d5fa31763f73900d43901e8c5fa6367bb9a9840978d57b5a7ae14ed2", size = 41174337, upload-time = "2026-01-11T09:57:35.792Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/ac/d90df7f1e3b97fc5554cf45076df5045f1e0a6adf13899e10121229b826c/av-16.1.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8cf065f9d438e1921dc31fc7aa045790b58aee71736897866420d80b5450f62a", size = 40817720, upload-time = "2026-01-11T09:57:39.039Z" },
+    { url = "https://files.pythonhosted.org/packages/80/6f/13c3a35f9dbcebafd03fe0c4cbd075d71ac8968ec849a3cfce406c35a9d2/av-16.1.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a345877a9d3cc0f08e2bc4ec163ee83176864b92587afb9d08dff50f37a9a829", size = 42267396, upload-time = "2026-01-11T09:57:42.115Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/b9/275df9607f7fb44317ccb1d4be74827185c0d410f52b6e2cd770fe209118/av-16.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:f49243b1d27c91cd8c66fdba90a674e344eb8eb917264f36117bf2b6879118fd", size = 31752045, upload-time = "2026-01-11T09:57:45.106Z" },
+    { url = "https://files.pythonhosted.org/packages/75/2a/63797a4dde34283dd8054219fcb29294ba1c25d68ba8c8c8a6ae53c62c45/av-16.1.0-cp313-cp313-macosx_11_0_x86_64.whl", hash = "sha256:ce2a1b3d8bf619f6c47a9f28cfa7518ff75ddd516c234a4ee351037b05e6a587", size = 26916715, upload-time = "2026-01-11T09:57:47.682Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/c4/0b49cf730d0ae8cda925402f18ae814aef351f5772d14da72dd87ff66448/av-16.1.0-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:408dbe6a2573ca58a855eb8cd854112b33ea598651902c36709f5f84c991ed8e", size = 21452167, upload-time = "2026-01-11T09:57:50.606Z" },
+    { url = "https://files.pythonhosted.org/packages/51/23/408806503e8d5d840975aad5699b153aaa21eb6de41ade75248a79b7a37f/av-16.1.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:57f657f86652a160a8a01887aaab82282f9e629abf94c780bbdbb01595d6f0f7", size = 39215659, upload-time = "2026-01-11T09:57:53.757Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/19/a8528d5bba592b3903f44c28dab9cc653c95fcf7393f382d2751a1d1523e/av-16.1.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:adbad2b355c2ee4552cac59762809d791bda90586d134a33c6f13727fb86cb3a", size = 40874970, upload-time = "2026-01-11T09:57:56.802Z" },
+    { url = "https://files.pythonhosted.org/packages/e8/24/2dbcdf0e929ad56b7df078e514e7bd4ca0d45cba798aff3c8caac097d2f7/av-16.1.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f42e1a68ec2aebd21f7eb6895be69efa6aa27eec1670536876399725bbda4b99", size = 40530345, upload-time = "2026-01-11T09:58:00.421Z" },
+    { url = "https://files.pythonhosted.org/packages/54/27/ae91b41207f34e99602d1c72ab6ffd9c51d7c67e3fbcd4e3a6c0e54f882c/av-16.1.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:58fe47aeaef0f100c40ec8a5de9abbd37f118d3ca03829a1009cf288e9aef67c", size = 41972163, upload-time = "2026-01-11T09:58:03.756Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/7a/22158fb923b2a9a00dfab0e96ef2e8a1763a94dd89e666a5858412383d46/av-16.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:565093ebc93b2f4b76782589564869dadfa83af5b852edebedd8fee746457d06", size = 31729230, upload-time = "2026-01-11T09:58:07.254Z" },
+    { url = "https://files.pythonhosted.org/packages/7f/f1/878f8687d801d6c4565d57ebec08449c46f75126ebca8e0fed6986599627/av-16.1.0-cp313-cp313t-macosx_11_0_x86_64.whl", hash = "sha256:574081a24edb98343fd9f473e21ae155bf61443d4ec9d7708987fa597d6b04b2", size = 27008769, upload-time = "2026-01-11T09:58:10.266Z" },
+    { url = "https://files.pythonhosted.org/packages/30/f1/bd4ce8c8b5cbf1d43e27048e436cbc9de628d48ede088a1d0a993768eb86/av-16.1.0-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:9ab00ea29c25ebf2ea1d1e928d7babb3532d562481c5d96c0829212b70756ad0", size = 21590588, upload-time = "2026-01-11T09:58:12.629Z" },
+    { url = "https://files.pythonhosted.org/packages/1d/dd/c81f6f9209201ff0b5d5bed6da6c6e641eef52d8fbc930d738c3f4f6f75d/av-16.1.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:a84a91188c1071f238a9523fd42dbe567fb2e2607b22b779851b2ce0eac1b560", size = 40638029, upload-time = "2026-01-11T09:58:15.399Z" },
+    { url = "https://files.pythonhosted.org/packages/15/4d/07edff82b78d0459a6e807e01cd280d3180ce832efc1543de80d77676722/av-16.1.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:c2cd0de4dd022a7225ff224fde8e7971496d700be41c50adaaa26c07bb50bf97", size = 41970776, upload-time = "2026-01-11T09:58:19.075Z" },
+    { url = "https://files.pythonhosted.org/packages/da/9d/1f48b354b82fa135d388477cd1b11b81bdd4384bd6a42a60808e2ec2d66b/av-16.1.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:0816143530624a5a93bc5494f8c6eeaf77549b9366709c2ac8566c1e9bff6df5", size = 41764751, upload-time = "2026-01-11T09:58:22.788Z" },
+    { url = "https://files.pythonhosted.org/packages/2f/c7/a509801e98db35ec552dd79da7bdbcff7104044bfeb4c7d196c1ce121593/av-16.1.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:e3a28053af29644696d0c007e897d19b1197585834660a54773e12a40b16974c", size = 43034355, upload-time = "2026-01-11T09:58:26.125Z" },
+    { url = "https://files.pythonhosted.org/packages/36/8b/e5f530d9e8f640da5f5c5f681a424c65f9dd171c871cd255d8a861785a6e/av-16.1.0-cp313-cp313t-win_amd64.whl", hash = "sha256:2e3e67144a202b95ed299d165232533989390a9ea3119d37eccec697dc6dbb0c", size = 31947047, upload-time = "2026-01-11T09:58:31.867Z" },
+    { url = "https://files.pythonhosted.org/packages/df/18/8812221108c27d19f7e5f486a82c827923061edf55f906824ee0fcaadf50/av-16.1.0-cp314-cp314-macosx_11_0_x86_64.whl", hash = "sha256:39a634d8e5a87e78ea80772774bfd20c0721f0d633837ff185f36c9d14ffede4", size = 26916179, upload-time = "2026-01-11T09:58:36.506Z" },
+    { url = "https://files.pythonhosted.org/packages/38/ef/49d128a9ddce42a2766fe2b6595bd9c49e067ad8937a560f7838a541464e/av-16.1.0-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:0ba32fb9e9300948a7fa9f8a3fc686e6f7f77599a665c71eb2118fdfd2c743f9", size = 21460168, upload-time = "2026-01-11T09:58:39.231Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/a9/b310d390844656fa74eeb8c2750e98030877c75b97551a23a77d3f982741/av-16.1.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:ca04d17815182d34ce3edc53cbda78a4f36e956c0fd73e3bab249872a831c4d7", size = 39210194, upload-time = "2026-01-11T09:58:42.138Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/7b/e65aae179929d0f173af6e474ad1489b5b5ad4c968a62c42758d619e54cf/av-16.1.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:ee0e8de2e124a9ef53c955fe2add6ee7c56cc8fd83318265549e44057db77142", size = 40811675, upload-time = "2026-01-11T09:58:45.871Z" },
+    { url = "https://files.pythonhosted.org/packages/54/3f/5d7edefd26b6a5187d6fac0f5065ee286109934f3dea607ef05e53f05b31/av-16.1.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:22bf77a2f658827043a1e184b479c3bf25c4c43ab32353677df2d119f080e28f", size = 40543942, upload-time = "2026-01-11T09:58:49.759Z" },
+    { url = "https://files.pythonhosted.org/packages/1b/24/f8b17897b67be0900a211142f5646a99d896168f54d57c81f3e018853796/av-16.1.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2dd419d262e6a71cab206d80bbf28e0a10d0f227b671cdf5e854c028faa2d043", size = 41924336, upload-time = "2026-01-11T09:58:53.344Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/cf/d32bc6bbbcf60b65f6510c54690ed3ae1c4ca5d9fafbce835b6056858686/av-16.1.0-cp314-cp314-win_amd64.whl", hash = "sha256:53585986fd431cd436f290fba662cfb44d9494fbc2949a183de00acc5b33fa88", size = 31735077, upload-time = "2026-01-11T09:58:56.684Z" },
+    { url = "https://files.pythonhosted.org/packages/53/f4/9b63dc70af8636399bd933e9df4f3025a0294609510239782c1b746fc796/av-16.1.0-cp314-cp314t-macosx_11_0_x86_64.whl", hash = "sha256:76f5ed8495cf41e1209a5775d3699dc63fdc1740b94a095e2485f13586593205", size = 27014423, upload-time = "2026-01-11T09:58:59.703Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/da/787a07a0d6ed35a0888d7e5cfb8c2ffa202f38b7ad2c657299fac08eb046/av-16.1.0-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:8d55397190f12a1a3ae7538be58c356cceb2bf50df1b33523817587748ce89e5", size = 21595536, upload-time = "2026-01-11T09:59:02.508Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/f4/9a7d8651a611be6e7e3ab7b30bb43779899c8cac5f7293b9fb634c44a3f3/av-16.1.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:9d51d9037437218261b4bbf9df78a95e216f83d7774fbfe8d289230b5b2e28e2", size = 40642490, upload-time = "2026-01-11T09:59:05.842Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/e4/eb79bc538a94b4ff93cd4237d00939cba797579f3272490dd0144c165a21/av-16.1.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:0ce07a89c15644407f49d942111ca046e323bbab0a9078ff43ee57c9b4a50dad", size = 41976905, upload-time = "2026-01-11T09:59:09.169Z" },
+    { url = "https://files.pythonhosted.org/packages/5e/f5/f6db0dd86b70167a4d55ee0d9d9640983c570d25504f2bde42599f38241e/av-16.1.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:cac0c074892ea97113b53556ff41c99562db7b9f09f098adac1f08318c2acad5", size = 41770481, upload-time = "2026-01-11T09:59:12.74Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/8b/33651d658e45e16ab7671ea5fcf3d20980ea7983234f4d8d0c63c65581a5/av-16.1.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:7dec3dcbc35a187ce450f65a2e0dda820d5a9e6553eea8344a1459af11c98649", size = 43036824, upload-time = "2026-01-11T09:59:16.507Z" },
+    { url = "https://files.pythonhosted.org/packages/83/41/7f13361db54d7e02f11552575c0384dadaf0918138f4eaa82ea03a9f9580/av-16.1.0-cp314-cp314t-win_amd64.whl", hash = "sha256:6f90dc082ff2068ddbe77618400b44d698d25d9c4edac57459e250c16b33d700", size = 31948164, upload-time = "2026-01-11T09:59:19.501Z" },
 ]
 
 [[package]]
@@ -675,14 +675,14 @@ wheels = [
 
 [[package]]
 name = "causal-conv1d"
-version = "1.5.3.post1"
+version = "1.6.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "ninja" },
     { name = "packaging" },
     { name = "torch", marker = "sys_platform == 'never'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/64/cb/104778c728dc3d5ea3bf65a484e3a4cdbe894bdaa2586320e2f61d007b8c/causal_conv1d-1.5.3.post1.tar.gz", hash = "sha256:aba1b717484472d0b2f2e40520a1c03f35fe5155555bd753d1c324afc56ba468", size = 24198, upload-time = "2025-10-10T10:16:23.921Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/db/df/63a384c49743b9fc8fec4c05dbd0b515e1c1c2b07e4559acc4fc37c69223/causal_conv1d-1.6.0.tar.gz", hash = "sha256:4eae3220d08e1e88238f3a0a88783147cbdf47f612cc610add75127c7a37ca3e", size = 29356, upload-time = "2026-01-12T17:33:32.794Z" }
 
 [[package]]
 name = "certifi"
@@ -1143,7 +1143,7 @@ dependencies = [
     { name = "huggingface-hub" },
     { name = "multiprocess" },
     { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "numpy", version = "2.4.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.4.1", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "packaging" },
     { name = "pandas" },
     { name = "pyarrow" },
@@ -1315,11 +1315,11 @@ wheels = [
 
 [[package]]
 name = "filelock"
-version = "3.20.2"
+version = "3.20.3"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/c1/e0/a75dbe4bca1e7d41307323dad5ea2efdd95408f74ab2de8bd7dba9b51a1a/filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64", size = 19510, upload-time = "2026-01-02T15:33:32.582Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/1d/65/ce7f1b70157833bf3cb851b556a37d4547ceafc158aa9b34b36782f23696/filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1", size = 19485, upload-time = "2026-01-09T17:55:05.421Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/9a/30/ab407e2ec752aa541704ed8f93c11e2a5d92c168b8a755d818b74a3c5c2d/filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8", size = 16697, upload-time = "2026-01-02T15:33:31.133Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/36/7fb70f04bf00bc646cd5bb45aa9eddb15e19437a28b8fb2b4a5249fac770/filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1", size = 16701, upload-time = "2026-01-09T17:55:04.334Z" },
 ]
 
 [[package]]
@@ -1388,7 +1388,7 @@ dependencies = [
     { name = "einops" },
     { name = "ninja" },
     { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "numpy", version = "2.4.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.4.1", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "nvidia-cudnn-frontend" },
     { name = "nvidia-cutlass-dsl" },
     { name = "nvidia-ml-py" },
@@ -1913,7 +1913,7 @@ wheels = [
 
 [[package]]
 name = "jsonschema"
-version = "4.25.1"
+version = "4.26.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "attrs" },
@@ -1921,9 +1921,9 @@ dependencies = [
     { name = "referencing" },
     { name = "rpds-py" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/74/69/f7185de793a29082a9f3c7728268ffb31cb5095131a9c139a74078e27336/jsonschema-4.25.1.tar.gz", hash = "sha256:e4a9655ce0da0c0b67a085847e00a3a51449e1157f4f75e9fb5aa545e122eb85", size = 357342, upload-time = "2025-08-18T17:03:50.038Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/b3/fc/e067678238fa451312d4c62bf6e6cf5ec56375422aee02f9cb5f909b3047/jsonschema-4.26.0.tar.gz", hash = "sha256:0c26707e2efad8aa1bfc5b7ce170f3fccc2e4918ff85989ba9ffa9facb2be326", size = 366583, upload-time = "2026-01-07T13:41:07.246Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/bf/9c/8c95d856233c1f82500c2450b8c68576b4cf1c871db3afac5c34ff84e6fd/jsonschema-4.25.1-py3-none-any.whl", hash = "sha256:3fba0169e345c7175110351d456342c364814cfcf3b964ba4587f22915230a63", size = 90040, upload-time = "2025-08-18T17:03:48.373Z" },
+    { url = "https://files.pythonhosted.org/packages/69/90/f63fb5873511e014207a475e2bb4e8b2e570d655b00ac19a9a0ca0a385ee/jsonschema-4.26.0-py3-none-any.whl", hash = "sha256:d489f15263b8d200f8387e64b4c3a75f06629559fb73deb8fdfb525f2dab50ce", size = 90630, upload-time = "2026-01-07T13:41:05.306Z" },
 ]
 
 [[package]]
@@ -1958,7 +1958,7 @@ wheels = [
 
 [[package]]
 name = "leptonai"
-version = "0.26.7"
+version = "0.26.8"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "anyio" },
@@ -1983,7 +1983,7 @@ dependencies = [
     { name = "uvicorn" },
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/c2/4d/2b5ab13294b23326ba1d8ef6ad703b1d9535bf72a0617030ddd6238eb925/leptonai-0.26.7-py3-none-any.whl", hash = "sha256:74996da36bf177d2b148887dd349627ab8cd78b94623d543bc91ed9ad65ba0e2", size = 2452890, upload-time = "2025-11-07T20:07:14.99Z" },
+    { url = "https://files.pythonhosted.org/packages/be/fd/949841aaf69cfb8086be61ddb06864426064400f831b1ca1ae1ade32b357/leptonai-0.26.8-py3-none-any.whl", hash = "sha256:ddba3afd6b82899f66cd229c4348972320f1e96f20393ebfc5153338b56aad30", size = 2467623, upload-time = "2026-01-08T00:13:55.515Z" },
 ]
 
 [[package]]
@@ -2069,7 +2069,7 @@ wheels = [
 
 [[package]]
 name = "mamba-ssm"
-version = "2.2.6.post3"
+version = "2.3.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "einops" },
@@ -2080,7 +2080,7 @@ dependencies = [
     { name = "transformers" },
     { name = "triton", marker = "sys_platform == 'never'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/b6/0c/9373a469ff7a33bdd0644e55fa45165ba3900274dcf7fe9f10ccc232aef9/mamba_ssm-2.2.6.post3.tar.gz", hash = "sha256:826a3cdb651959f191dac64502f8a29627d9116fe6bb7c57e4f562da1aea7bf3", size = 113913, upload-time = "2025-10-10T06:00:44.939Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/54/69/a87f06d9dba78c041adb81f2228e978aab179477c64f1a210c0fe0d63e8d/mamba_ssm-2.3.0.tar.gz", hash = "sha256:8294e12125f76021e4e190f4137e84a84935920eeda5d0037a6917524456b303", size = 121116, upload-time = "2026-01-12T17:07:22.152Z" }
 
 [[package]]
 name = "markdown"
@@ -2223,7 +2223,7 @@ name = "megatron-core"
 source = { editable = "." }
 dependencies = [
     { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "numpy", version = "2.4.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.4.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "packaging" },
     { name = "torch", marker = "sys_platform == 'never' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
@@ -2258,6 +2258,7 @@ lts = [
     { name = "causal-conv1d" },
     { name = "datasets" },
     { name = "einops" },
+    { name = "emerging-optimizers" },
     { name = "fastapi" },
     { name = "flashinfer-python" },
     { name = "mamba-ssm" },
@@ -2343,11 +2344,12 @@ requires-dist = [
     { name = "einops", marker = "extra == 'dev'", specifier = "~=0.8" },
     { name = "einops", marker = "extra == 'lts'", specifier = "~=0.8" },
     { name = "emerging-optimizers", marker = "extra == 'dev'", git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git?rev=v0.1.0" },
+    { name = "emerging-optimizers", marker = "extra == 'lts'", git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git?rev=v0.1.0" },
     { name = "fastapi", marker = "extra == 'dev'", specifier = "~=0.50" },
     { name = "fastapi", marker = "extra == 'lts'", specifier = "~=0.50" },
     { name = "flash-linear-attention", marker = "extra == 'dev'", specifier = "~=0.3.2" },
-    { name = "flashinfer-python", marker = "extra == 'dev'" },
-    { name = "flashinfer-python", marker = "extra == 'lts'" },
+    { name = "flashinfer-python", marker = "extra == 'dev'", specifier = "~=0.5.0" },
+    { name = "flashinfer-python", marker = "extra == 'lts'", specifier = "~=0.5.0" },
     { name = "flask-restful", marker = "extra == 'mlm'" },
     { name = "mamba-ssm", marker = "extra == 'dev'", specifier = "~=2.2" },
     { name = "mamba-ssm", marker = "extra == 'lts'", specifier = "~=2.2" },
@@ -2389,7 +2391,7 @@ build = [
     { name = "nvidia-mathdx" },
     { name = "packaging", specifier = ">=24.2" },
     { name = "pybind11" },
-    { name = "setuptools", specifier = "<80.0.0" },
+    { name = "setuptools", specifier = ">=77.0.0,<80.0.0" },
     { name = "torch" },
 ]
 ci = [
@@ -2441,7 +2443,7 @@ dependencies = [
     { name = "click" },
     { name = "multi-storage-client" },
     { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "numpy", version = "2.4.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.4.1", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "pillow" },
     { name = "pyyaml" },
     { name = "s3fs" },
@@ -2470,7 +2472,7 @@ version = "0.5.4"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "numpy", version = "2.4.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.4.1", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/0e/4a/c27b42ed9b1c7d13d9ba8b6905dece787d6259152f2309338aed29b2447b/ml_dtypes-0.5.4.tar.gz", hash = "sha256:8ab06a50fb9bf9666dd0fe5dfb4676fa2b0ac0f31ecff72a6c3af8e22c063453", size = 692314, upload-time = "2025-11-17T22:32:31.031Z" }
 wheels = [
@@ -2582,7 +2584,7 @@ wheels = [
 
 [[package]]
 name = "multi-storage-client"
-version = "0.39.1"
+version = "0.40.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "filelock" },
@@ -2600,18 +2602,18 @@ dependencies = [
     { name = "xattr" },
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/03/d2/6cce7b432f58bcffb394bac96d9edad2d927ffec382a53300e41186da1a5/multi_storage_client-0.39.1-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:46aa5c7446e079dac852e7db9077e80fe69f4c7e4690f526cc61cbd15d43b07e", size = 8429120, upload-time = "2025-12-19T03:18:25.375Z" },
-    { url = "https://files.pythonhosted.org/packages/18/00/423e6fcf218a52216ad86686f4fffa4f18b605594601d621aec68ad02d33/multi_storage_client-0.39.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:31249cee7f0fa4cc536136371eb41ad48c4b86c02fdf4e3186e7b464488d1e73", size = 4784439, upload-time = "2025-12-19T03:20:50.881Z" },
-    { url = "https://files.pythonhosted.org/packages/14/73/161ebe8bb71acee7bb7a42389756cd43d07e56e155d40f54b72370c5eb64/multi_storage_client-0.39.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:22eb940daae3149efe2e8e412fdd4e3d8d10c0077b336cf2ebc90236dfe58665", size = 5048336, upload-time = "2025-12-19T03:17:35.193Z" },
-    { url = "https://files.pythonhosted.org/packages/6d/77/c20249c7887c37c0868ec8bc9ca6313fca54a232c3a50b04cd56b0b514ea/multi_storage_client-0.39.1-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:4d71b05d5ea7d09c075edae731ee6b89ed2114b5eeaf96e46e2a15b37b91de07", size = 8427425, upload-time = "2025-12-19T03:20:26.171Z" },
-    { url = "https://files.pythonhosted.org/packages/8d/ab/350acee344fe32db07ae535021e339ec4edf5e40b78a323fc11fcd6dda97/multi_storage_client-0.39.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f0dd3bb536f8fc5992dccb1e53108a695fb4e703e9320d5292f63188269bfcd", size = 4783799, upload-time = "2025-12-19T03:19:14.054Z" },
-    { url = "https://files.pythonhosted.org/packages/83/c0/19b03d58f4d2713b3948e3bd72d5711d89f22250b966b70ccfbb914cb6fe/multi_storage_client-0.39.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:44602b32f7b708b82eab56d49ba00a05ed478193387cf4e22ca2c19da8c6877a", size = 5047335, upload-time = "2025-12-19T03:18:49.925Z" },
-    { url = "https://files.pythonhosted.org/packages/76/c5/204f3859f3cc7dde35fc74b52c5d61d7017434781c296c9640c1bbd849c7/multi_storage_client-0.39.1-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:15973fff7b351f2949d4cd3bf9f24bb8c73838f5ab29e67f018318ec3d3e3079", size = 8420253, upload-time = "2025-12-19T03:16:48.333Z" },
-    { url = "https://files.pythonhosted.org/packages/57/dd/9f2d20e83742c5dcf49719a2905157b372e6380779d8c2fdd90f3898f6b9/multi_storage_client-0.39.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0fc12834fd3fada72016b4df54f908c769d1fd6d5b9dbbc573831665def8b46c", size = 4784064, upload-time = "2025-12-19T03:17:11.978Z" },
-    { url = "https://files.pythonhosted.org/packages/7f/c8/fbc5a69eb910246bf154030aec0d9df6c204481d8a1ec3352de042499300/multi_storage_client-0.39.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:881365a17bc3886bf8f54d33c9c5d0d5a393b6000cdd12eed756b6e5eb3b2bb0", size = 5048702, upload-time = "2025-12-19T03:15:27.792Z" },
-    { url = "https://files.pythonhosted.org/packages/8d/e6/7ca7a7fd03893d03b36c225702e2a644b38bfe1b5c0fa5b266fd8f72ba1d/multi_storage_client-0.39.1-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:e6f7a9710d1e61beb3f736cc2a1bbb9916c462022be544edf604cc8d8a9ac201", size = 8418852, upload-time = "2025-12-19T03:16:23.313Z" },
-    { url = "https://files.pythonhosted.org/packages/bb/6a/7b25d15446085a103ebdf21834705020693e76ea093ca23e5647872b4165/multi_storage_client-0.39.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cfa87a3eecb09de64a77c68b622f2d2b0cb3aefb8a9d2306b1bf83c085f3bdd5", size = 4784229, upload-time = "2025-12-19T03:21:14.885Z" },
-    { url = "https://files.pythonhosted.org/packages/a1/bd/dbda0847ef2ffab6a11b60f4702edf60fc1287174009bb2e35dff205d5ba/multi_storage_client-0.39.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4f705d1d4d11b19ec9f6819674ef852d9b8fa2c95906c4c5cab2ecb7c22bc290", size = 5048266, upload-time = "2025-12-19T03:15:56.348Z" },
+    { url = "https://files.pythonhosted.org/packages/af/63/3ecdef2bd3e627d4915497315db8c9fdd86f8443c2ea858b0ebae3116edd/multi_storage_client-0.40.0-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:ab13383f3c70b5dfa784bc26c8e40777c03c6969c87107c72424a365140635e3", size = 8441996, upload-time = "2026-01-06T20:21:02.68Z" },
+    { url = "https://files.pythonhosted.org/packages/a7/4c/1bb4945379009d2197689742ef1a932862e269a8f2267e57fa439d77bd58/multi_storage_client-0.40.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:96d7d64e06da0d0726ad9cad1d741a312d65e5be84ac93880bda5a81dcb38439", size = 4795528, upload-time = "2026-01-06T20:19:13.6Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/1e/d44fc5b1f1a05b7ce4b3b5edde7f8daa7b4dcf05a61b7a0c9e4fe22af1bd/multi_storage_client-0.40.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:58da2b465f8b90f6deee94d9f59b9911a752592220e323b855ee1509ae613a27", size = 5061490, upload-time = "2026-01-06T20:17:26.767Z" },
+    { url = "https://files.pythonhosted.org/packages/3b/f9/73072df16c61e8927691d6d636951e8954371882f9fea8b93fdef42ae315/multi_storage_client-0.40.0-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:6a87296d15900a8e1e819ee6cdca9b311e892622108e947066455a3797fb8508", size = 8439893, upload-time = "2026-01-06T20:20:15.162Z" },
+    { url = "https://files.pythonhosted.org/packages/92/d4/512b14589cfa739426e3852fda62f774aa7ea7ba48877e75c3d03d091ca9/multi_storage_client-0.40.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4b51c3ef6a69c7d9e98ee5b9aa2b511228b0802dd837048da5a1b79413d016fe", size = 4795279, upload-time = "2026-01-06T20:19:45.661Z" },
+    { url = "https://files.pythonhosted.org/packages/49/aa/8c3a9557fb39bfb57842ac6f39f7fd614ab68e299ead20695ed3ffd90a99/multi_storage_client-0.40.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d4f85db520f058c29c0553a0b4cc7a67811879dcc72ed8cc091e1c41d65874b7", size = 5061699, upload-time = "2026-01-06T20:15:34.596Z" },
+    { url = "https://files.pythonhosted.org/packages/92/65/448a08141d34629e601edca69883268801a02ef385b6c70b4bffe37074f5/multi_storage_client-0.40.0-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:7719d181f3be9d76c1335a5e6b83db02bbbe81b64d786a7dfbdf5fdf4edabd02", size = 8434884, upload-time = "2026-01-06T20:21:52.106Z" },
+    { url = "https://files.pythonhosted.org/packages/3b/38/28ee280cab9c47c24a251f88f8b461fac00aebb7ae5dc045c8bf46fc4c05/multi_storage_client-0.40.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b36eaf7260d5bc017bba2fb77529365ca89f7180111065f820df03b55f9132ad", size = 4800088, upload-time = "2026-01-06T20:21:27.251Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/57/24120b57f2e30372fcf0d3ec5673e0824414ccd9a312a5669f7274980bd9/multi_storage_client-0.40.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8fe94c6ed7861fc38f55054fb13e05a818a1a738d7625eb057920ab5292b324", size = 5061276, upload-time = "2026-01-06T20:16:37.388Z" },
+    { url = "https://files.pythonhosted.org/packages/98/3d/b0cb5eb1d6f5b36c4226d74f83b14fe45e120807cf059e8db6fac3017ca0/multi_storage_client-0.40.0-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:a3f7998b7ddea2e4b669570384be937cab53add5734154c5543098789f8a3db2", size = 8433939, upload-time = "2026-01-06T20:17:03.222Z" },
+    { url = "https://files.pythonhosted.org/packages/75/ea/fcec0e93d23fc26cc622cefb41a574f5b1697cc33d86e0e8e48f7da4248a/multi_storage_client-0.40.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7be2a973661d19434ec744e3c7b7cdf6a36d0aa22ba25b62b44b6a20a18aee88", size = 4799908, upload-time = "2026-01-06T20:17:54.691Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/f3/fc07bd7efcffa5422e746550231d0edd6459f9686edf03c1ad961fd4d721/multi_storage_client-0.40.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4928f1c6b0cc4011d785c6fac10114d61833d6aa10c0e22ecce85090bf868e4c", size = 5060685, upload-time = "2026-01-06T20:16:14.272Z" },
 ]
 
 [[package]]
@@ -2965,7 +2967,7 @@ wheels = [
 
 [[package]]
 name = "numpy"
-version = "2.4.0"
+version = "2.4.1"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
     "python_full_version >= '3.14' and sys_platform == 'linux'",
@@ -2977,79 +2979,79 @@ resolution-markers = [
     "python_full_version == '3.11.*' and sys_platform == 'linux'",
     "python_full_version == '3.11.*' and sys_platform != 'linux'",
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/a4/7a/6a3d14e205d292b738db449d0de649b373a59edb0d0b4493821d0a3e8718/numpy-2.4.0.tar.gz", hash = "sha256:6e504f7b16118198f138ef31ba24d985b124c2c469fe8467007cf30fd992f934", size = 20685720, upload-time = "2025-12-20T16:18:19.023Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/26/7e/7bae7cbcc2f8132271967aa03e03954fc1e48aa1f3bf32b29ca95fbef352/numpy-2.4.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:316b2f2584682318539f0bcaca5a496ce9ca78c88066579ebd11fd06f8e4741e", size = 16940166, upload-time = "2025-12-20T16:15:43.434Z" },
-    { url = "https://files.pythonhosted.org/packages/0f/27/6c13f5b46776d6246ec884ac5817452672156a506d08a1f2abb39961930a/numpy-2.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a2718c1de8504121714234b6f8241d0019450353276c88b9453c9c3d92e101db", size = 12641781, upload-time = "2025-12-20T16:15:45.701Z" },
-    { url = "https://files.pythonhosted.org/packages/14/1c/83b4998d4860d15283241d9e5215f28b40ac31f497c04b12fa7f428ff370/numpy-2.4.0-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:21555da4ec4a0c942520ead42c3b0dc9477441e085c42b0fbdd6a084869a6f6b", size = 5470247, upload-time = "2025-12-20T16:15:47.943Z" },
-    { url = "https://files.pythonhosted.org/packages/54/08/cbce72c835d937795571b0464b52069f869c9e78b0c076d416c5269d2718/numpy-2.4.0-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:413aa561266a4be2d06cd2b9665e89d9f54c543f418773076a76adcf2af08bc7", size = 6799807, upload-time = "2025-12-20T16:15:49.795Z" },
-    { url = "https://files.pythonhosted.org/packages/ff/be/2e647961cd8c980591d75cdcd9e8f647d69fbe05e2a25613dc0a2ea5fb1a/numpy-2.4.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0feafc9e03128074689183031181fac0897ff169692d8492066e949041096548", size = 14701992, upload-time = "2025-12-20T16:15:51.615Z" },
-    { url = "https://files.pythonhosted.org/packages/a2/fb/e1652fb8b6fd91ce6ed429143fe2e01ce714711e03e5b762615e7b36172c/numpy-2.4.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a8fdfed3deaf1928fb7667d96e0567cdf58c2b370ea2ee7e586aa383ec2cb346", size = 16646871, upload-time = "2025-12-20T16:15:54.129Z" },
-    { url = "https://files.pythonhosted.org/packages/62/23/d841207e63c4322842f7cd042ae981cffe715c73376dcad8235fb31debf1/numpy-2.4.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:e06a922a469cae9a57100864caf4f8a97a1026513793969f8ba5b63137a35d25", size = 16487190, upload-time = "2025-12-20T16:15:56.147Z" },
-    { url = "https://files.pythonhosted.org/packages/bc/a0/6a842c8421ebfdec0a230e65f61e0dabda6edbef443d999d79b87c273965/numpy-2.4.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:927ccf5cd17c48f801f4ed43a7e5673a2724bd2171460be3e3894e6e332ef83a", size = 18580762, upload-time = "2025-12-20T16:15:58.524Z" },
-    { url = "https://files.pythonhosted.org/packages/0a/d1/c79e0046641186f2134dde05e6181825b911f8bdcef31b19ddd16e232847/numpy-2.4.0-cp311-cp311-win32.whl", hash = "sha256:882567b7ae57c1b1a0250208cc21a7976d8cbcc49d5a322e607e6f09c9e0bd53", size = 6233359, upload-time = "2025-12-20T16:16:00.938Z" },
-    { url = "https://files.pythonhosted.org/packages/fc/f0/74965001d231f28184d6305b8cdc1b6fcd4bf23033f6cb039cfe76c9fca7/numpy-2.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:8b986403023c8f3bf8f487c2e6186afda156174d31c175f747d8934dfddf3479", size = 12601132, upload-time = "2025-12-20T16:16:02.484Z" },
-    { url = "https://files.pythonhosted.org/packages/65/32/55408d0f46dfebce38017f5bd931affa7256ad6beac1a92a012e1fbc67a7/numpy-2.4.0-cp311-cp311-win_arm64.whl", hash = "sha256:3f3096405acc48887458bbf9f6814d43785ac7ba2a57ea6442b581dedbc60ce6", size = 10573977, upload-time = "2025-12-20T16:16:04.77Z" },
-    { url = "https://files.pythonhosted.org/packages/8b/ff/f6400ffec95de41c74b8e73df32e3fff1830633193a7b1e409be7fb1bb8c/numpy-2.4.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:2a8b6bb8369abefb8bd1801b054ad50e02b3275c8614dc6e5b0373c305291037", size = 16653117, upload-time = "2025-12-20T16:16:06.709Z" },
-    { url = "https://files.pythonhosted.org/packages/fd/28/6c23e97450035072e8d830a3c411bf1abd1f42c611ff9d29e3d8f55c6252/numpy-2.4.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2e284ca13d5a8367e43734148622caf0b261b275673823593e3e3634a6490f83", size = 12369711, upload-time = "2025-12-20T16:16:08.758Z" },
-    { url = "https://files.pythonhosted.org/packages/bc/af/acbef97b630ab1bb45e6a7d01d1452e4251aa88ce680ac36e56c272120ec/numpy-2.4.0-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:49ff32b09f5aa0cd30a20c2b39db3e669c845589f2b7fc910365210887e39344", size = 5198355, upload-time = "2025-12-20T16:16:10.902Z" },
-    { url = "https://files.pythonhosted.org/packages/c1/c8/4e0d436b66b826f2e53330adaa6311f5cac9871a5b5c31ad773b27f25a74/numpy-2.4.0-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:36cbfb13c152b1c7c184ddac43765db8ad672567e7bafff2cc755a09917ed2e6", size = 6545298, upload-time = "2025-12-20T16:16:12.607Z" },
-    { url = "https://files.pythonhosted.org/packages/ef/27/e1f5d144ab54eac34875e79037011d511ac57b21b220063310cb96c80fbc/numpy-2.4.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:35ddc8f4914466e6fc954c76527aa91aa763682a4f6d73249ef20b418fe6effb", size = 14398387, upload-time = "2025-12-20T16:16:14.257Z" },
-    { url = "https://files.pythonhosted.org/packages/67/64/4cb909dd5ab09a9a5d086eff9586e69e827b88a5585517386879474f4cf7/numpy-2.4.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dc578891de1db95b2a35001b695451767b580bb45753717498213c5ff3c41d63", size = 16363091, upload-time = "2025-12-20T16:16:17.32Z" },
-    { url = "https://files.pythonhosted.org/packages/9d/9c/8efe24577523ec6809261859737cf117b0eb6fdb655abdfdc81b2e468ce4/numpy-2.4.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:98e81648e0b36e325ab67e46b5400a7a6d4a22b8a7c8e8bbfe20e7db7906bf95", size = 16176394, upload-time = "2025-12-20T16:16:19.524Z" },
-    { url = "https://files.pythonhosted.org/packages/61/f0/1687441ece7b47a62e45a1f82015352c240765c707928edd8aef875d5951/numpy-2.4.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:d57b5046c120561ba8fa8e4030fbb8b822f3063910fa901ffadf16e2b7128ad6", size = 18287378, upload-time = "2025-12-20T16:16:22.866Z" },
-    { url = "https://files.pythonhosted.org/packages/d3/6f/f868765d44e6fc466467ed810ba9d8d6db1add7d4a748abfa2a4c99a3194/numpy-2.4.0-cp312-cp312-win32.whl", hash = "sha256:92190db305a6f48734d3982f2c60fa30d6b5ee9bff10f2887b930d7b40119f4c", size = 5955432, upload-time = "2025-12-20T16:16:25.06Z" },
-    { url = "https://files.pythonhosted.org/packages/d4/b5/94c1e79fcbab38d1ca15e13777477b2914dd2d559b410f96949d6637b085/numpy-2.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:680060061adb2d74ce352628cb798cfdec399068aa7f07ba9fb818b2b3305f98", size = 12306201, upload-time = "2025-12-20T16:16:26.979Z" },
-    { url = "https://files.pythonhosted.org/packages/70/09/c39dadf0b13bb0768cd29d6a3aaff1fb7c6905ac40e9aaeca26b1c086e06/numpy-2.4.0-cp312-cp312-win_arm64.whl", hash = "sha256:39699233bc72dd482da1415dcb06076e32f60eddc796a796c5fb6c5efce94667", size = 10308234, upload-time = "2025-12-20T16:16:29.417Z" },
-    { url = "https://files.pythonhosted.org/packages/a7/0d/853fd96372eda07c824d24adf02e8bc92bb3731b43a9b2a39161c3667cc4/numpy-2.4.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:a152d86a3ae00ba5f47b3acf3b827509fd0b6cb7d3259665e63dafbad22a75ea", size = 16649088, upload-time = "2025-12-20T16:16:31.421Z" },
-    { url = "https://files.pythonhosted.org/packages/e3/37/cc636f1f2a9f585434e20a3e6e63422f70bfe4f7f6698e941db52ea1ac9a/numpy-2.4.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:39b19251dec4de8ff8496cd0806cbe27bf0684f765abb1f4809554de93785f2d", size = 12364065, upload-time = "2025-12-20T16:16:33.491Z" },
-    { url = "https://files.pythonhosted.org/packages/ed/69/0b78f37ca3690969beee54103ce5f6021709134e8020767e93ba691a72f1/numpy-2.4.0-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:009bd0ea12d3c784b6639a8457537016ce5172109e585338e11334f6a7bb88ee", size = 5192640, upload-time = "2025-12-20T16:16:35.636Z" },
-    { url = "https://files.pythonhosted.org/packages/1d/2a/08569f8252abf590294dbb09a430543ec8f8cc710383abfb3e75cc73aeda/numpy-2.4.0-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:5fe44e277225fd3dff6882d86d3d447205d43532c3627313d17e754fb3905a0e", size = 6541556, upload-time = "2025-12-20T16:16:37.276Z" },
-    { url = "https://files.pythonhosted.org/packages/93/e9/a949885a4e177493d61519377952186b6cbfdf1d6002764c664ba28349b5/numpy-2.4.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f935c4493eda9069851058fa0d9e39dbf6286be690066509305e52912714dbb2", size = 14396562, upload-time = "2025-12-20T16:16:38.953Z" },
-    { url = "https://files.pythonhosted.org/packages/99/98/9d4ad53b0e9ef901c2ef1d550d2136f5ac42d3fd2988390a6def32e23e48/numpy-2.4.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8cfa5f29a695cb7438965e6c3e8d06e0416060cf0d709c1b1c1653a939bf5c2a", size = 16351719, upload-time = "2025-12-20T16:16:41.503Z" },
-    { url = "https://files.pythonhosted.org/packages/28/de/5f3711a38341d6e8dd619f6353251a0cdd07f3d6d101a8fd46f4ef87f895/numpy-2.4.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ba0cb30acd3ef11c94dc27fbfba68940652492bc107075e7ffe23057f9425681", size = 16176053, upload-time = "2025-12-20T16:16:44.552Z" },
-    { url = "https://files.pythonhosted.org/packages/2a/5b/2a3753dc43916501b4183532e7ace862e13211042bceafa253afb5c71272/numpy-2.4.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:60e8c196cd82cbbd4f130b5290007e13e6de3eca79f0d4d38014769d96a7c475", size = 18277859, upload-time = "2025-12-20T16:16:47.174Z" },
-    { url = "https://files.pythonhosted.org/packages/2c/c5/a18bcdd07a941db3076ef489d036ab16d2bfc2eae0cf27e5a26e29189434/numpy-2.4.0-cp313-cp313-win32.whl", hash = "sha256:5f48cb3e88fbc294dc90e215d86fbaf1c852c63dbdb6c3a3e63f45c4b57f7344", size = 5953849, upload-time = "2025-12-20T16:16:49.554Z" },
-    { url = "https://files.pythonhosted.org/packages/4f/f1/719010ff8061da6e8a26e1980cf090412d4f5f8060b31f0c45d77dd67a01/numpy-2.4.0-cp313-cp313-win_amd64.whl", hash = "sha256:a899699294f28f7be8992853c0c60741f16ff199205e2e6cdca155762cbaa59d", size = 12302840, upload-time = "2025-12-20T16:16:51.227Z" },
-    { url = "https://files.pythonhosted.org/packages/f5/5a/b3d259083ed8b4d335270c76966cb6cf14a5d1b69e1a608994ac57a659e6/numpy-2.4.0-cp313-cp313-win_arm64.whl", hash = "sha256:9198f447e1dc5647d07c9a6bbe2063cc0132728cc7175b39dbc796da5b54920d", size = 10308509, upload-time = "2025-12-20T16:16:53.313Z" },
-    { url = "https://files.pythonhosted.org/packages/31/01/95edcffd1bb6c0633df4e808130545c4f07383ab629ac7e316fb44fff677/numpy-2.4.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:74623f2ab5cc3f7c886add4f735d1031a1d2be4a4ae63c0546cfd74e7a31ddf6", size = 12491815, upload-time = "2025-12-20T16:16:55.496Z" },
-    { url = "https://files.pythonhosted.org/packages/59/ea/5644b8baa92cc1c7163b4b4458c8679852733fa74ca49c942cfa82ded4e0/numpy-2.4.0-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:0804a8e4ab070d1d35496e65ffd3cf8114c136a2b81f61dfab0de4b218aacfd5", size = 5320321, upload-time = "2025-12-20T16:16:57.468Z" },
-    { url = "https://files.pythonhosted.org/packages/26/4e/e10938106d70bc21319bd6a86ae726da37edc802ce35a3a71ecdf1fdfe7f/numpy-2.4.0-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:02a2038eb27f9443a8b266a66911e926566b5a6ffd1a689b588f7f35b81e7dc3", size = 6641635, upload-time = "2025-12-20T16:16:59.379Z" },
-    { url = "https://files.pythonhosted.org/packages/b3/8d/a8828e3eaf5c0b4ab116924df82f24ce3416fa38d0674d8f708ddc6c8aac/numpy-2.4.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1889b3a3f47a7b5bee16bc25a2145bd7cb91897f815ce3499db64c7458b6d91d", size = 14456053, upload-time = "2025-12-20T16:17:01.768Z" },
-    { url = "https://files.pythonhosted.org/packages/68/a1/17d97609d87d4520aa5ae2dcfb32305654550ac6a35effb946d303e594ce/numpy-2.4.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:85eef4cb5625c47ee6425c58a3502555e10f45ee973da878ac8248ad58c136f3", size = 16401702, upload-time = "2025-12-20T16:17:04.235Z" },
-    { url = "https://files.pythonhosted.org/packages/18/32/0f13c1b2d22bea1118356b8b963195446f3af124ed7a5adfa8fdecb1b6ca/numpy-2.4.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:6dc8b7e2f4eb184b37655195f421836cfae6f58197b67e3ffc501f1333d993fa", size = 16242493, upload-time = "2025-12-20T16:17:06.856Z" },
-    { url = "https://files.pythonhosted.org/packages/ae/23/48f21e3d309fbc137c068a1475358cbd3a901b3987dcfc97a029ab3068e2/numpy-2.4.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:44aba2f0cafd287871a495fb3163408b0bd25bbce135c6f621534a07f4f7875c", size = 18324222, upload-time = "2025-12-20T16:17:09.392Z" },
-    { url = "https://files.pythonhosted.org/packages/ac/52/41f3d71296a3dcaa4f456aaa3c6fc8e745b43d0552b6bde56571bb4b4a0f/numpy-2.4.0-cp313-cp313t-win32.whl", hash = "sha256:20c115517513831860c573996e395707aa9fb691eb179200125c250e895fcd93", size = 6076216, upload-time = "2025-12-20T16:17:11.437Z" },
-    { url = "https://files.pythonhosted.org/packages/35/ff/46fbfe60ab0710d2a2b16995f708750307d30eccbb4c38371ea9e986866e/numpy-2.4.0-cp313-cp313t-win_amd64.whl", hash = "sha256:b48e35f4ab6f6a7597c46e301126ceba4c44cd3280e3750f85db48b082624fa4", size = 12444263, upload-time = "2025-12-20T16:17:13.182Z" },
-    { url = "https://files.pythonhosted.org/packages/a3/e3/9189ab319c01d2ed556c932ccf55064c5d75bb5850d1df7a482ce0badead/numpy-2.4.0-cp313-cp313t-win_arm64.whl", hash = "sha256:4d1cfce39e511069b11e67cd0bd78ceff31443b7c9e5c04db73c7a19f572967c", size = 10378265, upload-time = "2025-12-20T16:17:15.211Z" },
-    { url = "https://files.pythonhosted.org/packages/ab/ed/52eac27de39d5e5a6c9aadabe672bc06f55e24a3d9010cd1183948055d76/numpy-2.4.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:c95eb6db2884917d86cde0b4d4cf31adf485c8ec36bf8696dd66fa70de96f36b", size = 16647476, upload-time = "2025-12-20T16:17:17.671Z" },
-    { url = "https://files.pythonhosted.org/packages/77/c0/990ce1b7fcd4e09aeaa574e2a0a839589e4b08b2ca68070f1acb1fea6736/numpy-2.4.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:65167da969cd1ec3a1df31cb221ca3a19a8aaa25370ecb17d428415e93c1935e", size = 12374563, upload-time = "2025-12-20T16:17:20.216Z" },
-    { url = "https://files.pythonhosted.org/packages/37/7c/8c5e389c6ae8f5fd2277a988600d79e9625db3fff011a2d87ac80b881a4c/numpy-2.4.0-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:3de19cfecd1465d0dcf8a5b5ea8b3155b42ed0b639dba4b71e323d74f2a3be5e", size = 5203107, upload-time = "2025-12-20T16:17:22.47Z" },
-    { url = "https://files.pythonhosted.org/packages/e6/94/ca5b3bd6a8a70a5eec9a0b8dd7f980c1eff4b8a54970a9a7fef248ef564f/numpy-2.4.0-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:6c05483c3136ac4c91b4e81903cb53a8707d316f488124d0398499a4f8e8ef51", size = 6538067, upload-time = "2025-12-20T16:17:24.001Z" },
-    { url = "https://files.pythonhosted.org/packages/79/43/993eb7bb5be6761dde2b3a3a594d689cec83398e3f58f4758010f3b85727/numpy-2.4.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:36667db4d6c1cea79c8930ab72fadfb4060feb4bfe724141cd4bd064d2e5f8ce", size = 14411926, upload-time = "2025-12-20T16:17:25.822Z" },
-    { url = "https://files.pythonhosted.org/packages/03/75/d4c43b61de473912496317a854dac54f1efec3eeb158438da6884b70bb90/numpy-2.4.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9a818668b674047fd88c4cddada7ab8f1c298812783e8328e956b78dc4807f9f", size = 16354295, upload-time = "2025-12-20T16:17:28.308Z" },
-    { url = "https://files.pythonhosted.org/packages/b8/0a/b54615b47ee8736a6461a4bb6749128dd3435c5a759d5663f11f0e9af4ac/numpy-2.4.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:1ee32359fb7543b7b7bd0b2f46294db27e29e7bbdf70541e81b190836cd83ded", size = 16190242, upload-time = "2025-12-20T16:17:30.993Z" },
-    { url = "https://files.pythonhosted.org/packages/98/ce/ea207769aacad6246525ec6c6bbd66a2bf56c72443dc10e2f90feed29290/numpy-2.4.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:e493962256a38f58283de033d8af176c5c91c084ea30f15834f7545451c42059", size = 18280875, upload-time = "2025-12-20T16:17:33.327Z" },
-    { url = "https://files.pythonhosted.org/packages/17/ef/ec409437aa962ea372ed601c519a2b141701683ff028f894b7466f0ab42b/numpy-2.4.0-cp314-cp314-win32.whl", hash = "sha256:6bbaebf0d11567fa8926215ae731e1d58e6ec28a8a25235b8a47405d301332db", size = 6002530, upload-time = "2025-12-20T16:17:35.729Z" },
-    { url = "https://files.pythonhosted.org/packages/5f/4a/5cb94c787a3ed1ac65e1271b968686521169a7b3ec0b6544bb3ca32960b0/numpy-2.4.0-cp314-cp314-win_amd64.whl", hash = "sha256:3d857f55e7fdf7c38ab96c4558c95b97d1c685be6b05c249f5fdafcbd6f9899e", size = 12435890, upload-time = "2025-12-20T16:17:37.599Z" },
-    { url = "https://files.pythonhosted.org/packages/48/a0/04b89db963af9de1104975e2544f30de89adbf75b9e75f7dd2599be12c79/numpy-2.4.0-cp314-cp314-win_arm64.whl", hash = "sha256:bb50ce5fb202a26fd5404620e7ef820ad1ab3558b444cb0b55beb7ef66cd2d63", size = 10591892, upload-time = "2025-12-20T16:17:39.649Z" },
-    { url = "https://files.pythonhosted.org/packages/53/e5/d74b5ccf6712c06c7a545025a6a71bfa03bdc7e0568b405b0d655232fd92/numpy-2.4.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:355354388cba60f2132df297e2d53053d4063f79077b67b481d21276d61fc4df", size = 12494312, upload-time = "2025-12-20T16:17:41.714Z" },
-    { url = "https://files.pythonhosted.org/packages/c2/08/3ca9cc2ddf54dfee7ae9a6479c071092a228c68aef08252aa08dac2af002/numpy-2.4.0-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:1d8f9fde5f6dc1b6fc34df8162f3b3079365468703fee7f31d4e0cc8c63baed9", size = 5322862, upload-time = "2025-12-20T16:17:44.145Z" },
-    { url = "https://files.pythonhosted.org/packages/87/74/0bb63a68394c0c1e52670cfff2e309afa41edbe11b3327d9af29e4383f34/numpy-2.4.0-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:e0434aa22c821f44eeb4c650b81c7fbdd8c0122c6c4b5a576a76d5a35625ecd9", size = 6644986, upload-time = "2025-12-20T16:17:46.203Z" },
-    { url = "https://files.pythonhosted.org/packages/06/8f/9264d9bdbcf8236af2823623fe2f3981d740fc3461e2787e231d97c38c28/numpy-2.4.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:40483b2f2d3ba7aad426443767ff5632ec3156ef09742b96913787d13c336471", size = 14457958, upload-time = "2025-12-20T16:17:48.017Z" },
-    { url = "https://files.pythonhosted.org/packages/8c/d9/f9a69ae564bbc7236a35aa883319364ef5fd41f72aa320cc1cbe66148fe2/numpy-2.4.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d9e6a7664ddd9746e20b7325351fe1a8408d0a2bf9c63b5e898290ddc8f09544", size = 16398394, upload-time = "2025-12-20T16:17:50.409Z" },
-    { url = "https://files.pythonhosted.org/packages/34/c7/39241501408dde7f885d241a98caba5421061a2c6d2b2197ac5e3aa842d8/numpy-2.4.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:ecb0019d44f4cdb50b676c5d0cb4b1eae8e15d1ed3d3e6639f986fc92b2ec52c", size = 16241044, upload-time = "2025-12-20T16:17:52.661Z" },
-    { url = "https://files.pythonhosted.org/packages/7c/95/cae7effd90e065a95e59fe710eeee05d7328ed169776dfdd9f789e032125/numpy-2.4.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:d0ffd9e2e4441c96a9c91ec1783285d80bf835b677853fc2770a89d50c1e48ac", size = 18321772, upload-time = "2025-12-20T16:17:54.947Z" },
-    { url = "https://files.pythonhosted.org/packages/96/df/3c6c279accd2bfb968a76298e5b276310bd55d243df4fa8ac5816d79347d/numpy-2.4.0-cp314-cp314t-win32.whl", hash = "sha256:77f0d13fa87036d7553bf81f0e1fe3ce68d14c9976c9851744e4d3e91127e95f", size = 6148320, upload-time = "2025-12-20T16:17:57.249Z" },
-    { url = "https://files.pythonhosted.org/packages/92/8d/f23033cce252e7a75cae853d17f582e86534c46404dea1c8ee094a9d6d84/numpy-2.4.0-cp314-cp314t-win_amd64.whl", hash = "sha256:b1f5b45829ac1848893f0ddf5cb326110604d6df96cdc255b0bf9edd154104d4", size = 12623460, upload-time = "2025-12-20T16:17:58.963Z" },
-    { url = "https://files.pythonhosted.org/packages/a4/4f/1f8475907d1a7c4ef9020edf7f39ea2422ec896849245f00688e4b268a71/numpy-2.4.0-cp314-cp314t-win_arm64.whl", hash = "sha256:23a3e9d1a6f360267e8fbb38ba5db355a6a7e9be71d7fce7ab3125e88bb646c8", size = 10661799, upload-time = "2025-12-20T16:18:01.078Z" },
-    { url = "https://files.pythonhosted.org/packages/4b/ef/088e7c7342f300aaf3ee5f2c821c4b9996a1bef2aaf6a49cc8ab4883758e/numpy-2.4.0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:b54c83f1c0c0f1d748dca0af516062b8829d53d1f0c402be24b4257a9c48ada6", size = 16819003, upload-time = "2025-12-20T16:18:03.41Z" },
-    { url = "https://files.pythonhosted.org/packages/ff/ce/a53017b5443b4b84517182d463fc7bcc2adb4faa8b20813f8e5f5aeb5faa/numpy-2.4.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:aabb081ca0ec5d39591fc33018cd4b3f96e1a2dd6756282029986d00a785fba4", size = 12567105, upload-time = "2025-12-20T16:18:05.594Z" },
-    { url = "https://files.pythonhosted.org/packages/77/58/5ff91b161f2ec650c88a626c3905d938c89aaadabd0431e6d9c1330c83e2/numpy-2.4.0-pp311-pypy311_pp73-macosx_14_0_arm64.whl", hash = "sha256:8eafe7c36c8430b7794edeab3087dec7bf31d634d92f2af9949434b9d1964cba", size = 5395590, upload-time = "2025-12-20T16:18:08.031Z" },
-    { url = "https://files.pythonhosted.org/packages/1d/4e/f1a084106df8c2df8132fc437e56987308e0524836aa7733721c8429d4fe/numpy-2.4.0-pp311-pypy311_pp73-macosx_14_0_x86_64.whl", hash = "sha256:2f585f52b2baf07ff3356158d9268ea095e221371f1074fadea2f42544d58b4d", size = 6709947, upload-time = "2025-12-20T16:18:09.836Z" },
-    { url = "https://files.pythonhosted.org/packages/63/09/3d8aeb809c0332c3f642da812ac2e3d74fc9252b3021f8c30c82e99e3f3d/numpy-2.4.0-pp311-pypy311_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:32ed06d0fe9cae27d8fb5f400c63ccee72370599c75e683a6358dd3a4fb50aaf", size = 14535119, upload-time = "2025-12-20T16:18:12.105Z" },
-    { url = "https://files.pythonhosted.org/packages/fd/7f/68f0fc43a2cbdc6bb239160c754d87c922f60fbaa0fa3cd3d312b8a7f5ee/numpy-2.4.0-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:57c540ed8fb1f05cb997c6761cd56db72395b0d6985e90571ff660452ade4f98", size = 16475815, upload-time = "2025-12-20T16:18:14.433Z" },
-    { url = "https://files.pythonhosted.org/packages/11/73/edeacba3167b1ca66d51b1a5a14697c2c40098b5ffa01811c67b1785a5ab/numpy-2.4.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:a39fb973a726e63223287adc6dafe444ce75af952d711e400f3bf2b36ef55a7b", size = 12489376, upload-time = "2025-12-20T16:18:16.524Z" },
+sdist = { url = "https://files.pythonhosted.org/packages/24/62/ae72ff66c0f1fd959925b4c11f8c2dea61f47f6acaea75a08512cdfe3fed/numpy-2.4.1.tar.gz", hash = "sha256:a1ceafc5042451a858231588a104093474c6a5c57dcc724841f5c888d237d690", size = 20721320, upload-time = "2026-01-10T06:44:59.619Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a5/34/2b1bc18424f3ad9af577f6ce23600319968a70575bd7db31ce66731bbef9/numpy-2.4.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0cce2a669e3c8ba02ee563c7835f92c153cf02edff1ae05e1823f1dde21b16a5", size = 16944563, upload-time = "2026-01-10T06:42:14.615Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/57/26e5f97d075aef3794045a6ca9eada6a4ed70eb9a40e7a4a93f9ac80d704/numpy-2.4.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:899d2c18024984814ac7e83f8f49d8e8180e2fbe1b2e252f2e7f1d06bea92425", size = 12645658, upload-time = "2026-01-10T06:42:17.298Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/ba/80fc0b1e3cb2fd5c6143f00f42eb67762aa043eaa05ca924ecc3222a7849/numpy-2.4.1-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:09aa8a87e45b55a1c2c205d42e2808849ece5c484b2aab11fecabec3841cafba", size = 5474132, upload-time = "2026-01-10T06:42:19.637Z" },
+    { url = "https://files.pythonhosted.org/packages/40/ae/0a5b9a397f0e865ec171187c78d9b57e5588afc439a04ba9cab1ebb2c945/numpy-2.4.1-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:edee228f76ee2dab4579fad6f51f6a305de09d444280109e0f75df247ff21501", size = 6804159, upload-time = "2026-01-10T06:42:21.44Z" },
+    { url = "https://files.pythonhosted.org/packages/86/9c/841c15e691c7085caa6fd162f063eff494099c8327aeccd509d1ab1e36ab/numpy-2.4.1-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a92f227dbcdc9e4c3e193add1a189a9909947d4f8504c576f4a732fd0b54240a", size = 14708058, upload-time = "2026-01-10T06:42:23.546Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/9d/7862db06743f489e6a502a3b93136d73aea27d97b2cf91504f70a27501d6/numpy-2.4.1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:538bf4ec353709c765ff75ae616c34d3c3dca1a68312727e8f2676ea644f8509", size = 16651501, upload-time = "2026-01-10T06:42:25.909Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/9c/6fc34ebcbd4015c6e5f0c0ce38264010ce8a546cb6beacb457b84a75dfc8/numpy-2.4.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:ac08c63cb7779b85e9d5318e6c3518b424bc1f364ac4cb2c6136f12e5ff2dccc", size = 16492627, upload-time = "2026-01-10T06:42:28.938Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/63/2494a8597502dacda439f61b3c0db4da59928150e62be0e99395c3ad23c5/numpy-2.4.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:4f9c360ecef085e5841c539a9a12b883dff005fbd7ce46722f5e9cef52634d82", size = 18585052, upload-time = "2026-01-10T06:42:31.312Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/93/098e1162ae7522fc9b618d6272b77404c4656c72432ecee3abc029aa3de0/numpy-2.4.1-cp311-cp311-win32.whl", hash = "sha256:0f118ce6b972080ba0758c6087c3617b5ba243d806268623dc34216d69099ba0", size = 6236575, upload-time = "2026-01-10T06:42:33.872Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/de/f5e79650d23d9e12f38a7bc6b03ea0835b9575494f8ec94c11c6e773b1b1/numpy-2.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:18e14c4d09d55eef39a6ab5b08406e84bc6869c1e34eef45564804f90b7e0574", size = 12604479, upload-time = "2026-01-10T06:42:35.778Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/65/e1097a7047cff12ce3369bd003811516b20ba1078dbdec135e1cd7c16c56/numpy-2.4.1-cp311-cp311-win_arm64.whl", hash = "sha256:6461de5113088b399d655d45c3897fa188766415d0f568f175ab071c8873bd73", size = 10578325, upload-time = "2026-01-10T06:42:38.518Z" },
+    { url = "https://files.pythonhosted.org/packages/78/7f/ec53e32bf10c813604edf07a3682616bd931d026fcde7b6d13195dfb684a/numpy-2.4.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d3703409aac693fa82c0aee023a1ae06a6e9d065dba10f5e8e80f642f1e9d0a2", size = 16656888, upload-time = "2026-01-10T06:42:40.913Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/e0/1f9585d7dae8f14864e948fd7fa86c6cb72dee2676ca2748e63b1c5acfe0/numpy-2.4.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7211b95ca365519d3596a1d8688a95874cc94219d417504d9ecb2df99fa7bfa8", size = 12373956, upload-time = "2026-01-10T06:42:43.091Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/43/9762e88909ff2326f5e7536fa8cb3c49fb03a7d92705f23e6e7f553d9cb3/numpy-2.4.1-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:5adf01965456a664fc727ed69cc71848f28d063217c63e1a0e200a118d5eec9a", size = 5202567, upload-time = "2026-01-10T06:42:45.107Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/ee/34b7930eb61e79feb4478800a4b95b46566969d837546aa7c034c742ef98/numpy-2.4.1-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:26f0bcd9c79a00e339565b303badc74d3ea2bd6d52191eeca5f95936cad107d0", size = 6549459, upload-time = "2026-01-10T06:42:48.152Z" },
+    { url = "https://files.pythonhosted.org/packages/79/e3/5f115fae982565771be994867c89bcd8d7208dbfe9469185497d70de5ddf/numpy-2.4.1-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0093e85df2960d7e4049664b26afc58b03236e967fb942354deef3208857a04c", size = 14404859, upload-time = "2026-01-10T06:42:49.947Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/7d/9c8a781c88933725445a859cac5d01b5871588a15969ee6aeb618ba99eee/numpy-2.4.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7ad270f438cbdd402c364980317fb6b117d9ec5e226fff5b4148dd9aa9fc6e02", size = 16371419, upload-time = "2026-01-10T06:42:52.409Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/d2/8aa084818554543f17cf4162c42f162acbd3bb42688aefdba6628a859f77/numpy-2.4.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:297c72b1b98100c2e8f873d5d35fb551fce7040ade83d67dd51d38c8d42a2162", size = 16182131, upload-time = "2026-01-10T06:42:54.694Z" },
+    { url = "https://files.pythonhosted.org/packages/60/db/0425216684297c58a8df35f3284ef56ec4a043e6d283f8a59c53562caf1b/numpy-2.4.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:cf6470d91d34bf669f61d515499859fa7a4c2f7c36434afb70e82df7217933f9", size = 18295342, upload-time = "2026-01-10T06:42:56.991Z" },
+    { url = "https://files.pythonhosted.org/packages/31/4c/14cb9d86240bd8c386c881bafbe43f001284b7cce3bc01623ac9475da163/numpy-2.4.1-cp312-cp312-win32.whl", hash = "sha256:b6bcf39112e956594b3331316d90c90c90fb961e39696bda97b89462f5f3943f", size = 5959015, upload-time = "2026-01-10T06:42:59.631Z" },
+    { url = "https://files.pythonhosted.org/packages/51/cf/52a703dbeb0c65807540d29699fef5fda073434ff61846a564d5c296420f/numpy-2.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:e1a27bb1b2dee45a2a53f5ca6ff2d1a7f135287883a1689e930d44d1ff296c87", size = 12310730, upload-time = "2026-01-10T06:43:01.627Z" },
+    { url = "https://files.pythonhosted.org/packages/69/80/a828b2d0ade5e74a9fe0f4e0a17c30fdc26232ad2bc8c9f8b3197cf7cf18/numpy-2.4.1-cp312-cp312-win_arm64.whl", hash = "sha256:0e6e8f9d9ecf95399982019c01223dc130542960a12edfa8edd1122dfa66a8a8", size = 10312166, upload-time = "2026-01-10T06:43:03.673Z" },
+    { url = "https://files.pythonhosted.org/packages/04/68/732d4b7811c00775f3bd522a21e8dd5a23f77eb11acdeb663e4a4ebf0ef4/numpy-2.4.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:d797454e37570cfd61143b73b8debd623c3c0952959adb817dd310a483d58a1b", size = 16652495, upload-time = "2026-01-10T06:43:06.283Z" },
+    { url = "https://files.pythonhosted.org/packages/20/ca/857722353421a27f1465652b2c66813eeeccea9d76d5f7b74b99f298e60e/numpy-2.4.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:82c55962006156aeef1629b953fd359064aa47e4d82cfc8e67f0918f7da3344f", size = 12368657, upload-time = "2026-01-10T06:43:09.094Z" },
+    { url = "https://files.pythonhosted.org/packages/81/0d/2377c917513449cc6240031a79d30eb9a163d32a91e79e0da47c43f2c0c8/numpy-2.4.1-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:71abbea030f2cfc3092a0ff9f8c8fdefdc5e0bf7d9d9c99663538bb0ecdac0b9", size = 5197256, upload-time = "2026-01-10T06:43:13.634Z" },
+    { url = "https://files.pythonhosted.org/packages/17/39/569452228de3f5de9064ac75137082c6214be1f5c532016549a7923ab4b5/numpy-2.4.1-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:5b55aa56165b17aaf15520beb9cbd33c9039810e0d9643dd4379e44294c7303e", size = 6545212, upload-time = "2026-01-10T06:43:15.661Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/a4/77333f4d1e4dac4395385482557aeecf4826e6ff517e32ca48e1dafbe42a/numpy-2.4.1-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c0faba4a331195bfa96f93dd9dfaa10b2c7aa8cda3a02b7fd635e588fe821bf5", size = 14402871, upload-time = "2026-01-10T06:43:17.324Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/87/d341e519956273b39d8d47969dd1eaa1af740615394fe67d06f1efa68773/numpy-2.4.1-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d3e3087f53e2b4428766b54932644d148613c5a595150533ae7f00dab2f319a8", size = 16359305, upload-time = "2026-01-10T06:43:19.376Z" },
+    { url = "https://files.pythonhosted.org/packages/32/91/789132c6666288eaa20ae8066bb99eba1939362e8f1a534949a215246e97/numpy-2.4.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:49e792ec351315e16da54b543db06ca8a86985ab682602d90c60ef4ff4db2a9c", size = 16181909, upload-time = "2026-01-10T06:43:21.808Z" },
+    { url = "https://files.pythonhosted.org/packages/cf/b8/090b8bd27b82a844bb22ff8fdf7935cb1980b48d6e439ae116f53cdc2143/numpy-2.4.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:79e9e06c4c2379db47f3f6fc7a8652e7498251789bf8ff5bd43bf478ef314ca2", size = 18284380, upload-time = "2026-01-10T06:43:23.957Z" },
+    { url = "https://files.pythonhosted.org/packages/67/78/722b62bd31842ff029412271556a1a27a98f45359dea78b1548a3a9996aa/numpy-2.4.1-cp313-cp313-win32.whl", hash = "sha256:3d1a100e48cb266090a031397863ff8a30050ceefd798f686ff92c67a486753d", size = 5957089, upload-time = "2026-01-10T06:43:27.535Z" },
+    { url = "https://files.pythonhosted.org/packages/da/a6/cf32198b0b6e18d4fbfa9a21a992a7fca535b9bb2b0cdd217d4a3445b5ca/numpy-2.4.1-cp313-cp313-win_amd64.whl", hash = "sha256:92a0e65272fd60bfa0d9278e0484c2f52fe03b97aedc02b357f33fe752c52ffb", size = 12307230, upload-time = "2026-01-10T06:43:29.298Z" },
+    { url = "https://files.pythonhosted.org/packages/44/6c/534d692bfb7d0afe30611320c5fb713659dcb5104d7cc182aff2aea092f5/numpy-2.4.1-cp313-cp313-win_arm64.whl", hash = "sha256:20d4649c773f66cc2fc36f663e091f57c3b7655f936a4c681b4250855d1da8f5", size = 10313125, upload-time = "2026-01-10T06:43:31.782Z" },
+    { url = "https://files.pythonhosted.org/packages/da/a1/354583ac5c4caa566de6ddfbc42744409b515039e085fab6e0ff942e0df5/numpy-2.4.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:f93bc6892fe7b0663e5ffa83b61aab510aacffd58c16e012bb9352d489d90cb7", size = 12496156, upload-time = "2026-01-10T06:43:34.237Z" },
+    { url = "https://files.pythonhosted.org/packages/51/b0/42807c6e8cce58c00127b1dc24d365305189991f2a7917aa694a109c8d7d/numpy-2.4.1-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:178de8f87948163d98a4c9ab5bee4ce6519ca918926ec8df195af582de28544d", size = 5324663, upload-time = "2026-01-10T06:43:36.211Z" },
+    { url = "https://files.pythonhosted.org/packages/fe/55/7a621694010d92375ed82f312b2f28017694ed784775269115323e37f5e2/numpy-2.4.1-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:98b35775e03ab7f868908b524fc0a84d38932d8daf7b7e1c3c3a1b6c7a2c9f15", size = 6645224, upload-time = "2026-01-10T06:43:37.884Z" },
+    { url = "https://files.pythonhosted.org/packages/50/96/9fa8635ed9d7c847d87e30c834f7109fac5e88549d79ef3324ab5c20919f/numpy-2.4.1-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:941c2a93313d030f219f3a71fd3d91a728b82979a5e8034eb2e60d394a2b83f9", size = 14462352, upload-time = "2026-01-10T06:43:39.479Z" },
+    { url = "https://files.pythonhosted.org/packages/03/d1/8cf62d8bb2062da4fb82dd5d49e47c923f9c0738032f054e0a75342faba7/numpy-2.4.1-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:529050522e983e00a6c1c6b67411083630de8b57f65e853d7b03d9281b8694d2", size = 16407279, upload-time = "2026-01-10T06:43:41.93Z" },
+    { url = "https://files.pythonhosted.org/packages/86/1c/95c86e17c6b0b31ce6ef219da00f71113b220bcb14938c8d9a05cee0ff53/numpy-2.4.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:2302dc0224c1cbc49bb94f7064f3f923a971bfae45c33870dcbff63a2a550505", size = 16248316, upload-time = "2026-01-10T06:43:44.121Z" },
+    { url = "https://files.pythonhosted.org/packages/30/b4/e7f5ff8697274c9d0fa82398b6a372a27e5cef069b37df6355ccb1f1db1a/numpy-2.4.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:9171a42fcad32dcf3fa86f0a4faa5e9f8facefdb276f54b8b390d90447cff4e2", size = 18329884, upload-time = "2026-01-10T06:43:46.613Z" },
+    { url = "https://files.pythonhosted.org/packages/37/a4/b073f3e9d77f9aec8debe8ca7f9f6a09e888ad1ba7488f0c3b36a94c03ac/numpy-2.4.1-cp313-cp313t-win32.whl", hash = "sha256:382ad67d99ef49024f11d1ce5dcb5ad8432446e4246a4b014418ba3a1175a1f4", size = 6081138, upload-time = "2026-01-10T06:43:48.854Z" },
+    { url = "https://files.pythonhosted.org/packages/16/16/af42337b53844e67752a092481ab869c0523bc95c4e5c98e4dac4e9581ac/numpy-2.4.1-cp313-cp313t-win_amd64.whl", hash = "sha256:62fea415f83ad8fdb6c20840578e5fbaf5ddd65e0ec6c3c47eda0f69da172510", size = 12447478, upload-time = "2026-01-10T06:43:50.476Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/f8/fa85b2eac68ec631d0b631abc448552cb17d39afd17ec53dcbcc3537681a/numpy-2.4.1-cp313-cp313t-win_arm64.whl", hash = "sha256:a7870e8c5fc11aef57d6fea4b4085e537a3a60ad2cdd14322ed531fdca68d261", size = 10382981, upload-time = "2026-01-10T06:43:52.575Z" },
+    { url = "https://files.pythonhosted.org/packages/1b/a7/ef08d25698e0e4b4efbad8d55251d20fe2a15f6d9aa7c9b30cd03c165e6f/numpy-2.4.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:3869ea1ee1a1edc16c29bbe3a2f2a4e515cc3a44d43903ad41e0cacdbaf733dc", size = 16652046, upload-time = "2026-01-10T06:43:54.797Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/39/e378b3e3ca13477e5ac70293ec027c438d1927f18637e396fe90b1addd72/numpy-2.4.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:e867df947d427cdd7a60e3e271729090b0f0df80f5f10ab7dd436f40811699c3", size = 12378858, upload-time = "2026-01-10T06:43:57.099Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/74/7ec6154f0006910ed1fdbb7591cf4432307033102b8a22041599935f8969/numpy-2.4.1-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:e3bd2cb07841166420d2fa7146c96ce00cb3410664cbc1a6be028e456c4ee220", size = 5207417, upload-time = "2026-01-10T06:43:59.037Z" },
+    { url = "https://files.pythonhosted.org/packages/f7/b7/053ac11820d84e42f8feea5cb81cc4fcd1091499b45b1ed8c7415b1bf831/numpy-2.4.1-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:f0a90aba7d521e6954670550e561a4cb925713bd944445dbe9e729b71f6cabee", size = 6542643, upload-time = "2026-01-10T06:44:01.852Z" },
+    { url = "https://files.pythonhosted.org/packages/c0/c4/2e7908915c0e32ca636b92e4e4a3bdec4cb1e7eb0f8aedf1ed3c68a0d8cd/numpy-2.4.1-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5d558123217a83b2d1ba316b986e9248a1ed1971ad495963d555ccd75dcb1556", size = 14418963, upload-time = "2026-01-10T06:44:04.047Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/c0/3ed5083d94e7ffd7c404e54619c088e11f2e1939a9544f5397f4adb1b8ba/numpy-2.4.1-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2f44de05659b67d20499cbc96d49f2650769afcb398b79b324bb6e297bfe3844", size = 16363811, upload-time = "2026-01-10T06:44:06.207Z" },
+    { url = "https://files.pythonhosted.org/packages/0e/68/42b66f1852bf525050a67315a4fb94586ab7e9eaa541b1bef530fab0c5dd/numpy-2.4.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:69e7419c9012c4aaf695109564e3387f1259f001b4326dfa55907b098af082d3", size = 16197643, upload-time = "2026-01-10T06:44:08.33Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/40/e8714fc933d85f82c6bfc7b998a0649ad9769a32f3494ba86598aaf18a48/numpy-2.4.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2ffd257026eb1b34352e749d7cc1678b5eeec3e329ad8c9965a797e08ccba205", size = 18289601, upload-time = "2026-01-10T06:44:10.841Z" },
+    { url = "https://files.pythonhosted.org/packages/80/9a/0d44b468cad50315127e884802351723daca7cf1c98d102929468c81d439/numpy-2.4.1-cp314-cp314-win32.whl", hash = "sha256:727c6c3275ddefa0dc078524a85e064c057b4f4e71ca5ca29a19163c607be745", size = 6005722, upload-time = "2026-01-10T06:44:13.332Z" },
+    { url = "https://files.pythonhosted.org/packages/7e/bb/c6513edcce5a831810e2dddc0d3452ce84d208af92405a0c2e58fd8e7881/numpy-2.4.1-cp314-cp314-win_amd64.whl", hash = "sha256:7d5d7999df434a038d75a748275cd6c0094b0ecdb0837342b332a82defc4dc4d", size = 12438590, upload-time = "2026-01-10T06:44:15.006Z" },
+    { url = "https://files.pythonhosted.org/packages/e9/da/a598d5cb260780cf4d255102deba35c1d072dc028c4547832f45dd3323a8/numpy-2.4.1-cp314-cp314-win_arm64.whl", hash = "sha256:ce9ce141a505053b3c7bce3216071f3bf5c182b8b28930f14cd24d43932cd2df", size = 10596180, upload-time = "2026-01-10T06:44:17.386Z" },
+    { url = "https://files.pythonhosted.org/packages/de/bc/ea3f2c96fcb382311827231f911723aeff596364eb6e1b6d1d91128aa29b/numpy-2.4.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:4e53170557d37ae404bf8d542ca5b7c629d6efa1117dac6a83e394142ea0a43f", size = 12498774, upload-time = "2026-01-10T06:44:19.467Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/ab/ef9d939fe4a812648c7a712610b2ca6140b0853c5efea361301006c02ae5/numpy-2.4.1-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:a73044b752f5d34d4232f25f18160a1cc418ea4507f5f11e299d8ac36875f8a0", size = 5327274, upload-time = "2026-01-10T06:44:23.189Z" },
+    { url = "https://files.pythonhosted.org/packages/bd/31/d381368e2a95c3b08b8cf7faac6004849e960f4a042d920337f71cef0cae/numpy-2.4.1-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:fb1461c99de4d040666ca0444057b06541e5642f800b71c56e6ea92d6a853a0c", size = 6648306, upload-time = "2026-01-10T06:44:25.012Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/e5/0989b44ade47430be6323d05c23207636d67d7362a1796ccbccac6773dd2/numpy-2.4.1-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:423797bdab2eeefbe608d7c1ec7b2b4fd3c58d51460f1ee26c7500a1d9c9ee93", size = 14464653, upload-time = "2026-01-10T06:44:26.706Z" },
+    { url = "https://files.pythonhosted.org/packages/10/a7/cfbe475c35371cae1358e61f20c5f075badc18c4797ab4354140e1d283cf/numpy-2.4.1-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:52b5f61bdb323b566b528899cc7db2ba5d1015bda7ea811a8bcf3c89c331fa42", size = 16405144, upload-time = "2026-01-10T06:44:29.378Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/a3/0c63fe66b534888fa5177cc7cef061541064dbe2b4b60dcc60ffaf0d2157/numpy-2.4.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:42d7dd5fa36d16d52a84f821eb96031836fd405ee6955dd732f2023724d0aa01", size = 16247425, upload-time = "2026-01-10T06:44:31.721Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/2b/55d980cfa2c93bd40ff4c290bf824d792bd41d2fe3487b07707559071760/numpy-2.4.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:e7b6b5e28bbd47b7532698e5db2fe1db693d84b58c254e4389d99a27bb9b8f6b", size = 18330053, upload-time = "2026-01-10T06:44:34.617Z" },
+    { url = "https://files.pythonhosted.org/packages/23/12/8b5fc6b9c487a09a7957188e0943c9ff08432c65e34567cabc1623b03a51/numpy-2.4.1-cp314-cp314t-win32.whl", hash = "sha256:5de60946f14ebe15e713a6f22850c2372fa72f4ff9a432ab44aa90edcadaa65a", size = 6152482, upload-time = "2026-01-10T06:44:36.798Z" },
+    { url = "https://files.pythonhosted.org/packages/00/a5/9f8ca5856b8940492fc24fbe13c1bc34d65ddf4079097cf9e53164d094e1/numpy-2.4.1-cp314-cp314t-win_amd64.whl", hash = "sha256:8f085da926c0d491ffff3096f91078cc97ea67e7e6b65e490bc8dcda65663be2", size = 12627117, upload-time = "2026-01-10T06:44:38.828Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/0d/eca3d962f9eef265f01a8e0d20085c6dd1f443cbffc11b6dede81fd82356/numpy-2.4.1-cp314-cp314t-win_arm64.whl", hash = "sha256:6436cffb4f2bf26c974344439439c95e152c9a527013f26b3577be6c2ca64295", size = 10667121, upload-time = "2026-01-10T06:44:41.644Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/48/d86f97919e79314a1cdee4c832178763e6e98e623e123d0bada19e92c15a/numpy-2.4.1-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:8ad35f20be147a204e28b6a0575fbf3540c5e5f802634d4258d55b1ff5facce1", size = 16822202, upload-time = "2026-01-10T06:44:43.738Z" },
+    { url = "https://files.pythonhosted.org/packages/51/e9/1e62a7f77e0f37dcfb0ad6a9744e65df00242b6ea37dfafb55debcbf5b55/numpy-2.4.1-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:8097529164c0f3e32bb89412a0905d9100bf434d9692d9fc275e18dcf53c9344", size = 12569985, upload-time = "2026-01-10T06:44:45.945Z" },
+    { url = "https://files.pythonhosted.org/packages/c7/7e/914d54f0c801342306fdcdce3e994a56476f1b818c46c47fc21ae968088c/numpy-2.4.1-pp311-pypy311_pp73-macosx_14_0_arm64.whl", hash = "sha256:ea66d2b41ca4a1630aae5507ee0a71647d3124d1741980138aa8f28f44dac36e", size = 5398484, upload-time = "2026-01-10T06:44:48.012Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/d8/9570b68584e293a33474e7b5a77ca404f1dcc655e40050a600dee81d27fb/numpy-2.4.1-pp311-pypy311_pp73-macosx_14_0_x86_64.whl", hash = "sha256:d3f8f0df9f4b8be57b3bf74a1d087fec68f927a2fab68231fdb442bf2c12e426", size = 6713216, upload-time = "2026-01-10T06:44:49.725Z" },
+    { url = "https://files.pythonhosted.org/packages/33/9b/9dd6e2db8d49eb24f86acaaa5258e5f4c8ed38209a4ee9de2d1a0ca25045/numpy-2.4.1-pp311-pypy311_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2023ef86243690c2791fd6353e5b4848eedaa88ca8a2d129f462049f6d484696", size = 14538937, upload-time = "2026-01-10T06:44:51.498Z" },
+    { url = "https://files.pythonhosted.org/packages/53/87/d5bd995b0f798a37105b876350d346eea5838bd8f77ea3d7a48392f3812b/numpy-2.4.1-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8361ea4220d763e54cff2fbe7d8c93526b744f7cd9ddab47afeff7e14e8503be", size = 16479830, upload-time = "2026-01-10T06:44:53.931Z" },
+    { url = "https://files.pythonhosted.org/packages/5b/c7/b801bf98514b6ae6475e941ac05c58e6411dd863ea92916bfd6d510b08c1/numpy-2.4.1-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:4f1b68ff47680c2925f8063402a693ede215f0257f02596b1318ecdfb1d79e33", size = 12492579, upload-time = "2026-01-10T06:44:57.094Z" },
 ]
 
 [[package]]
@@ -3059,7 +3061,7 @@ source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "absl-py" },
     { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "numpy", version = "2.4.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.4.1", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "torch", marker = "sys_platform == 'never'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/02/ad/046a097b63a96c1ba1d85f0031dbe7fcbdb33e6c445dfbaba2ffaefdd497/nv_grouped_gemm-1.1.4.post8.tar.gz", hash = "sha256:ab321693f0292cfd8a26dc7b6f14decd9eb00e209494de7218e4fad36191275d", size = 20821209, upload-time = "2025-12-17T02:22:38.432Z" }
@@ -3094,59 +3096,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/e5/15/97e6e4ddfe5fc35bcee74a45b7c33fb73abb83713c7dfa26420b971a86c3/nv_one_logger_training_telemetry-2.3.1-py3-none-any.whl", hash = "sha256:5319443829b59378a498c3c62ac98973e14f31be675c229ff2b14e2fe109aa0b", size = 44140, upload-time = "2025-10-29T21:21:40.72Z" },
 ]
 
-[[package]]
-name = "nvidia-cublas-cu12"
-version = "12.8.4.1"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/29/99/db44d685f0e257ff0e213ade1964fc459b4a690a73293220e98feb3307cf/nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:b86f6dd8935884615a0683b663891d43781b819ac4f2ba2b0c9604676af346d0", size = 590537124, upload-time = "2025-03-07T01:43:53.556Z" },
-    { url = "https://files.pythonhosted.org/packages/dc/61/e24b560ab2e2eaeb3c839129175fb330dfcfc29e5203196e5541a4c44682/nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:8ac4e771d5a348c551b2a426eda6193c19aa630236b418086020df5ba9667142", size = 594346921, upload-time = "2025-03-07T01:44:31.254Z" },
-    { url = "https://files.pythonhosted.org/packages/70/61/7d7b3c70186fb651d0fbd35b01dbfc8e755f69fd58f817f3d0f642df20c3/nvidia_cublas_cu12-12.8.4.1-py3-none-win_amd64.whl", hash = "sha256:47e9b82132fa8d2b4944e708049229601448aaad7e6f296f630f2d1a32de35af", size = 567544208, upload-time = "2025-03-07T01:53:30.535Z" },
-]
-
-[[package]]
-name = "nvidia-cuda-cupti-cu12"
-version = "12.8.90"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/d5/1f/b3bd73445e5cb342727fd24fe1f7b748f690b460acadc27ea22f904502c8/nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:4412396548808ddfed3f17a467b104ba7751e6b58678a4b840675c56d21cf7ed", size = 9533318, upload-time = "2025-03-07T01:40:10.421Z" },
-    { url = "https://files.pythonhosted.org/packages/f8/02/2adcaa145158bf1a8295d83591d22e4103dbfd821bcaf6f3f53151ca4ffa/nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ea0cb07ebda26bb9b29ba82cda34849e73c166c18162d3913575b0c9db9a6182", size = 10248621, upload-time = "2025-03-07T01:40:21.213Z" },
-    { url = "https://files.pythonhosted.org/packages/41/bc/83f5426095d93694ae39fe1311431b5d5a9bb82e48bf0dd8e19be2765942/nvidia_cuda_cupti_cu12-12.8.90-py3-none-win_amd64.whl", hash = "sha256:bb479dcdf7e6d4f8b0b01b115260399bf34154a1a2e9fe11c85c517d87efd98e", size = 7015759, upload-time = "2025-03-07T01:51:11.355Z" },
-]
-
-[[package]]
-name = "nvidia-cuda-nvrtc-cu12"
-version = "12.8.93"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/05/6b/32f747947df2da6994e999492ab306a903659555dddc0fbdeb9d71f75e52/nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:a7756528852ef889772a84c6cd89d41dfa74667e24cca16bb31f8f061e3e9994", size = 88040029, upload-time = "2025-03-07T01:42:13.562Z" },
-    { url = "https://files.pythonhosted.org/packages/eb/d1/e50d0acaab360482034b84b6e27ee83c6738f7d32182b987f9c7a4e32962/nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fc1fec1e1637854b4c0a65fb9a8346b51dd9ee69e61ebaccc82058441f15bce8", size = 43106076, upload-time = "2025-03-07T01:41:59.817Z" },
-    { url = "https://files.pythonhosted.org/packages/45/51/52a3d84baa2136cc8df15500ad731d74d3a1114d4c123e043cb608d4a32b/nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-win_amd64.whl", hash = "sha256:7a4b6b2904850fe78e0bd179c4b655c404d4bb799ef03ddc60804247099ae909", size = 73586838, upload-time = "2025-03-07T01:52:13.483Z" },
-]
-
-[[package]]
-name = "nvidia-cuda-runtime-cu12"
-version = "12.8.90"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/7c/75/f865a3b236e4647605ea34cc450900854ba123834a5f1598e160b9530c3a/nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:52bf7bbee900262ffefe5e9d5a2a69a30d97e2bc5bb6cc866688caa976966e3d", size = 965265, upload-time = "2025-03-07T01:39:43.533Z" },
-    { url = "https://files.pythonhosted.org/packages/0d/9b/a997b638fcd068ad6e4d53b8551a7d30fe8b404d6f1804abf1df69838932/nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:adade8dcbd0edf427b7204d480d6066d33902cab2a4707dcfc48a2d0fd44ab90", size = 954765, upload-time = "2025-03-07T01:40:01.615Z" },
-    { url = "https://files.pythonhosted.org/packages/30/a5/a515b7600ad361ea14bfa13fb4d6687abf500adc270f19e89849c0590492/nvidia_cuda_runtime_cu12-12.8.90-py3-none-win_amd64.whl", hash = "sha256:c0c6027f01505bfed6c3b21ec546f69c687689aad5f1a377554bc6ca4aa993a8", size = 944318, upload-time = "2025-03-07T01:51:01.794Z" },
-]
-
-[[package]]
-name = "nvidia-cudnn-cu12"
-version = "9.10.2.21"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "nvidia-cublas-cu12" },
-]
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/fa/41/e79269ce215c857c935fd86bcfe91a451a584dfc27f1e068f568b9ad1ab7/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:c9132cc3f8958447b4910a1720036d9eff5928cc3179b0a51fb6d167c6cc87d8", size = 705026878, upload-time = "2025-06-06T21:52:51.348Z" },
-    { url = "https://files.pythonhosted.org/packages/ba/51/e123d997aa098c61d029f76663dedbfb9bc8dcf8c60cbd6adbe42f76d049/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:949452be657fa16687d0930933f032835951ef0892b37d2d53824d1a84dc97a8", size = 706758467, upload-time = "2025-06-06T21:54:08.597Z" },
-    { url = "https://files.pythonhosted.org/packages/3d/90/0bd6e586701b3a890fd38aa71c387dab4883d619d6e5ad912ccbd05bfd67/nvidia_cudnn_cu12-9.10.2.21-py3-none-win_amd64.whl", hash = "sha256:c6288de7d63e6cf62988f0923f96dc339cea362decb1bf5b3141883392a7d65e", size = 692992268, upload-time = "2025-06-06T21:55:18.114Z" },
-]
-
 [[package]]
 name = "nvidia-cudnn-frontend"
 version = "1.17.0"
@@ -3166,95 +3115,25 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/64/ee/6de6aec1e42c859134312e6d5348d6f036b2f1b825e6eae92f9a429eccc4/nvidia_cudnn_frontend-1.17.0-cp313-cp313-win_amd64.whl", hash = "sha256:5c6a120fb54b157585ce6587153fc7086081af961f284f2553e01ba7c7a80c1a", size = 1441177, upload-time = "2025-12-20T00:30:09.927Z" },
 ]
 
-[[package]]
-name = "nvidia-cufft-cu12"
-version = "11.3.3.83"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "nvidia-nvjitlink-cu12" },
-]
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/60/bc/7771846d3a0272026c416fbb7e5f4c1f146d6d80704534d0b187dd6f4800/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:848ef7224d6305cdb2a4df928759dca7b1201874787083b6e7550dd6765ce69a", size = 193109211, upload-time = "2025-03-07T01:44:56.873Z" },
-    { url = "https://files.pythonhosted.org/packages/1f/13/ee4e00f30e676b66ae65b4f08cb5bcbb8392c03f54f2d5413ea99a5d1c80/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d2dd21ec0b88cf61b62e6b43564355e5222e4a3fb394cac0db101f2dd0d4f74", size = 193118695, upload-time = "2025-03-07T01:45:27.821Z" },
-    { url = "https://files.pythonhosted.org/packages/7d/ec/ce1629f1e478bb5ccd208986b5f9e0316a78538dd6ab1d0484f012f8e2a1/nvidia_cufft_cu12-11.3.3.83-py3-none-win_amd64.whl", hash = "sha256:7a64a98ef2a7c47f905aaf8931b69a3a43f27c55530c698bb2ed7c75c0b42cb7", size = 192216559, upload-time = "2025-03-07T01:53:57.106Z" },
-]
-
-[[package]]
-name = "nvidia-cufile-cu12"
-version = "1.13.1.3"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/bb/fe/1bcba1dfbfb8d01be8d93f07bfc502c93fa23afa6fd5ab3fc7c1df71038a/nvidia_cufile_cu12-1.13.1.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1d069003be650e131b21c932ec3d8969c1715379251f8d23a1860554b1cb24fc", size = 1197834, upload-time = "2025-03-07T01:45:50.723Z" },
-    { url = "https://files.pythonhosted.org/packages/1e/f5/5607710447a6fe9fd9b3283956fceeee8a06cda1d2f56ce31371f595db2a/nvidia_cufile_cu12-1.13.1.3-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:4beb6d4cce47c1a0f1013d72e02b0994730359e17801d395bdcbf20cfb3bb00a", size = 1120705, upload-time = "2025-03-07T01:45:41.434Z" },
-]
-
-[[package]]
-name = "nvidia-curand-cu12"
-version = "10.3.9.90"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/45/5e/92aa15eca622a388b80fbf8375d4760738df6285b1e92c43d37390a33a9a/nvidia_curand_cu12-10.3.9.90-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:dfab99248034673b779bc6decafdc3404a8a6f502462201f2f31f11354204acd", size = 63625754, upload-time = "2025-03-07T01:46:10.735Z" },
-    { url = "https://files.pythonhosted.org/packages/fb/aa/6584b56dc84ebe9cf93226a5cde4d99080c8e90ab40f0c27bda7a0f29aa1/nvidia_curand_cu12-10.3.9.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:b32331d4f4df5d6eefa0554c565b626c7216f87a06a4f56fab27c3b68a830ec9", size = 63619976, upload-time = "2025-03-07T01:46:23.323Z" },
-    { url = "https://files.pythonhosted.org/packages/b9/75/70c05b2f3ed5be3bb30b7102b6eb78e100da4bbf6944fd6725c012831cab/nvidia_curand_cu12-10.3.9.90-py3-none-win_amd64.whl", hash = "sha256:f149a8ca457277da854f89cf282d6ef43176861926c7ac85b2a0fbd237c587ec", size = 62765309, upload-time = "2025-03-07T01:54:20.478Z" },
-]
-
-[[package]]
-name = "nvidia-cusolver-cu12"
-version = "11.7.3.90"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "nvidia-cublas-cu12" },
-    { name = "nvidia-cusparse-cu12" },
-    { name = "nvidia-nvjitlink-cu12" },
-]
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/c8/32/f7cd6ce8a7690544d084ea21c26e910a97e077c9b7f07bf5de623ee19981/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:db9ed69dbef9715071232caa9b69c52ac7de3a95773c2db65bdba85916e4e5c0", size = 267229841, upload-time = "2025-03-07T01:46:54.356Z" },
-    { url = "https://files.pythonhosted.org/packages/85/48/9a13d2975803e8cf2777d5ed57b87a0b6ca2cc795f9a4f59796a910bfb80/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:4376c11ad263152bd50ea295c05370360776f8c3427b30991df774f9fb26c450", size = 267506905, upload-time = "2025-03-07T01:47:16.273Z" },
-    { url = "https://files.pythonhosted.org/packages/13/c0/76ca8551b8a84146ffa189fec81c26d04adba4bc0dbe09cd6e6fd9b7de04/nvidia_cusolver_cu12-11.7.3.90-py3-none-win_amd64.whl", hash = "sha256:4a550db115fcabc4d495eb7d39ac8b58d4ab5d8e63274d3754df1c0ad6a22d34", size = 256720438, upload-time = "2025-03-07T01:54:39.898Z" },
-]
-
-[[package]]
-name = "nvidia-cusparse-cu12"
-version = "12.5.8.93"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "nvidia-nvjitlink-cu12" },
-]
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/bc/f7/cd777c4109681367721b00a106f491e0d0d15cfa1fd59672ce580ce42a97/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:9b6c161cb130be1a07a27ea6923df8141f3c295852f4b260c65f18f3e0a091dc", size = 288117129, upload-time = "2025-03-07T01:47:40.407Z" },
-    { url = "https://files.pythonhosted.org/packages/c2/f5/e1854cb2f2bcd4280c44736c93550cc300ff4b8c95ebe370d0aa7d2b473d/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1ec05d76bbbd8b61b06a80e1eaf8cf4959c3d4ce8e711b65ebd0443bb0ebb13b", size = 288216466, upload-time = "2025-03-07T01:48:13.779Z" },
-    { url = "https://files.pythonhosted.org/packages/62/07/f3b2ad63f8e3d257a599f422ae34eb565e70c41031aecefa3d18b62cabd1/nvidia_cusparse_cu12-12.5.8.93-py3-none-win_amd64.whl", hash = "sha256:9a33604331cb2cac199f2e7f5104dfbb8a5a898c367a53dfda9ff2acb6b6b4dd", size = 284937404, upload-time = "2025-03-07T01:55:07.742Z" },
-]
-
-[[package]]
-name = "nvidia-cusparselt-cu12"
-version = "0.7.1"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/73/b9/598f6ff36faaece4b3c50d26f50e38661499ff34346f00e057760b35cc9d/nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_aarch64.whl", hash = "sha256:8878dce784d0fac90131b6817b607e803c36e629ba34dc5b433471382196b6a5", size = 283835557, upload-time = "2025-02-26T00:16:54.265Z" },
-    { url = "https://files.pythonhosted.org/packages/56/79/12978b96bd44274fe38b5dde5cfb660b1d114f70a65ef962bcbbed99b549/nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_x86_64.whl", hash = "sha256:f1bb701d6b930d5a7cea44c19ceb973311500847f81b634d802b7b539dc55623", size = 287193691, upload-time = "2025-02-26T00:15:44.104Z" },
-    { url = "https://files.pythonhosted.org/packages/2f/d8/a6b0d0d0c2435e9310f3e2bb0d9c9dd4c33daef86aa5f30b3681defd37ea/nvidia_cusparselt_cu12-0.7.1-py3-none-win_amd64.whl", hash = "sha256:f67fbb5831940ec829c9117b7f33807db9f9678dc2a617fbe781cac17b4e1075", size = 271020911, upload-time = "2025-02-26T00:14:47.204Z" },
-]
-
 [[package]]
 name = "nvidia-cutlass-dsl"
-version = "4.3.4"
+version = "4.3.5"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "cuda-python" },
     { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "numpy", version = "2.4.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.4.1", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "typing-extensions" },
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/ba/1f/83e48a71e0b7bed6b33b01732ae53e9f2e61dc518ab273e56ec859bb05f1/nvidia_cutlass_dsl-4.3.4-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:118508bc84f2a55ec7af3affd379bb713edf837d593218329909db67b518e700", size = 58736512, upload-time = "2025-12-21T07:40:34.715Z" },
-    { url = "https://files.pythonhosted.org/packages/27/f1/21166ae0b6da766e11448d32c1e69fc60ba4023de9040f6ef9c333e7b0b5/nvidia_cutlass_dsl-4.3.4-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:3fdf0603ab7ec1bf6a499fbf72cff65e73b597d6e1359286808317c69aeb7c3d", size = 58598504, upload-time = "2025-12-21T07:39:43.124Z" },
-    { url = "https://files.pythonhosted.org/packages/43/01/3067eaad7454a3e36523b6814f09344afa0d36f71719072a6eecd6c87a40/nvidia_cutlass_dsl-4.3.4-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:c5bd21ed877da171f115123a12aae4a920035fc47eb57c807f9fba9f3df97cf4", size = 58733573, upload-time = "2025-12-21T07:41:51.364Z" },
-    { url = "https://files.pythonhosted.org/packages/86/3b/f8255a1fe6841955eea7a211bc9f30fd46bd8424ea15f361d5c09b29520a/nvidia_cutlass_dsl-4.3.4-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:671936f1df909e7de377d0cc00cb4287a3458c013d34947600423e9deb827e41", size = 58598831, upload-time = "2025-12-21T07:39:17.853Z" },
-    { url = "https://files.pythonhosted.org/packages/86/ee/53d22e2e14cb763927d85f7ec9748f6af6d27a2b7f43d52de014728da10e/nvidia_cutlass_dsl-4.3.4-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:57693d87677919572ab9eefa386b3f39e8e888bc4a9db7ab8730a97e8dbe06b4", size = 58736300, upload-time = "2025-12-21T07:41:25.723Z" },
-    { url = "https://files.pythonhosted.org/packages/66/f6/47489e07081cd4060f08bfa4166f8ff32beaecf71c06060d03bde88f3b6c/nvidia_cutlass_dsl-4.3.4-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:a48fbff859e44dd548f8f26819d97d0595acea70e3b057c91dfdb47929015c72", size = 58599014, upload-time = "2025-12-21T07:38:51.632Z" },
-    { url = "https://files.pythonhosted.org/packages/c7/2e/3aaf6121842351ec0231d5ab9d9ebe9a6e2269e9a8f7345e02f096db1ba8/nvidia_cutlass_dsl-4.3.4-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:36bde25160f461f393beba81868ef9e54d5ba2e0e7666ed3e44b6dbf788af493", size = 58735620, upload-time = "2025-12-21T07:40:59.729Z" },
-    { url = "https://files.pythonhosted.org/packages/62/90/1da2583bda001bf678066bc970963aad3986036ac15e95eb38447fa1b51e/nvidia_cutlass_dsl-4.3.4-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:be127f0f087028fa498f50a994c49f95b2c6a518e11e2567bc3d71528bf0a504", size = 58600158, upload-time = "2025-12-21T07:40:09.36Z" },
+    { url = "https://files.pythonhosted.org/packages/52/3a/89f70082c24d3b88316df9b16df861e1f2cc86389a7b36a670bc7c541977/nvidia_cutlass_dsl-4.3.5-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:b4fcc50dbf9f9c6d1f4d6e1748e366c6835c95bea7b54f7111bfa6e66230f74b", size = 58736963, upload-time = "2026-01-09T01:37:55.298Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/92/3f39b64341e2b16dedc7434e7b63a8f457a6fdbd023346d2f00276943495/nvidia_cutlass_dsl-4.3.5-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:776f54fa72333bc8fca274e59b70552adbcd85aaef603c7d58a79ef284890046", size = 58601295, upload-time = "2026-01-09T01:39:02.461Z" },
+    { url = "https://files.pythonhosted.org/packages/e8/93/9114f28351d55061d30c68dbec3ba49659ac65607966029f52dab66950e9/nvidia_cutlass_dsl-4.3.5-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:6de9a4a7150ad1832fb8c862c92df4836f347690e4c085e9044160c846010b59", size = 58736943, upload-time = "2026-01-09T01:40:25.777Z" },
+    { url = "https://files.pythonhosted.org/packages/54/b5/d2f08919a9aa9052d45b2c8adfc310a724e9474e39c612358b1b24282c54/nvidia_cutlass_dsl-4.3.5-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:7a792f02ce548f311a3df313a7cdb4ac4ec1cccb6c7ff9cd68d5470b25a6daf6", size = 58602358, upload-time = "2026-01-09T01:39:28.521Z" },
+    { url = "https://files.pythonhosted.org/packages/78/6c/f45c930f662e0ec7856baa5d4e6f4d1e2ca6b029678f9e05d2df54c865be/nvidia_cutlass_dsl-4.3.5-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:6a79e94d157b16ab34069dd73fb708ff0ef31f486d699b6d5a015217f754cb0b", size = 58739895, upload-time = "2026-01-09T01:38:22.076Z" },
+    { url = "https://files.pythonhosted.org/packages/76/cb/998e79b6f028268bf2653250deb4a2edb618db81244e549ced71112c6f85/nvidia_cutlass_dsl-4.3.5-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:4687eef20c405023daa99dd4653a292fd875d6c9486f8d9a069ff6fcdb00834f", size = 58602784, upload-time = "2026-01-09T01:40:52.873Z" },
+    { url = "https://files.pythonhosted.org/packages/97/09/78a2f9141006f6f1e371a3dfb7a921205bcad6fb27810731169939d3e63d/nvidia_cutlass_dsl-4.3.5-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:9343a5c1335169d791b05aac6fb81e33d7f17c4f8250613a091e6ee8314ed6aa", size = 58738707, upload-time = "2026-01-09T01:39:56.445Z" },
+    { url = "https://files.pythonhosted.org/packages/0f/16/41b88ded92648d99f3c83880c07a54475feded9b32b4425e30d4b34f6c63/nvidia_cutlass_dsl-4.3.5-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:11d19b7e56ae1bedaf736ea3965af3be1e7af6c2482989c414b606cdd406cf32", size = 58601867, upload-time = "2026-01-09T01:37:29.895Z" },
 ]
 
 [[package]]
@@ -3281,7 +3160,7 @@ source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "ninja" },
     { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "numpy", version = "2.4.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.4.1", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "nvidia-ml-py" },
     { name = "packaging" },
     { name = "pulp" },
@@ -3290,7 +3169,7 @@ dependencies = [
     { name = "rich" },
     { name = "safetensors" },
     { name = "scipy", version = "1.15.3", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "scipy", version = "1.16.3", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "scipy", version = "1.17.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "torch", marker = "sys_platform == 'never'" },
     { name = "torchprofile" },
     { name = "tqdm" },
@@ -3299,44 +3178,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/7f/4a/4b4c339637fdbd54bc98b92c87c8b22f5efee05ca9e31e40a8d49ee66187/nvidia_modelopt-0.40.0-py3-none-any.whl", hash = "sha256:0315f53aef014b902866e427038db5803e3c6787a8e1f09c3650031550885051", size = 901421, upload-time = "2025-12-12T10:35:28.506Z" },
 ]
 
-[[package]]
-name = "nvidia-nccl-cu12"
-version = "2.27.5"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/bb/1c/857979db0ef194ca5e21478a0612bcdbbe59458d7694361882279947b349/nvidia_nccl_cu12-2.27.5-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:31432ad4d1fb1004eb0c56203dc9bc2178a1ba69d1d9e02d64a6938ab5e40e7a", size = 322400625, upload-time = "2025-06-26T04:11:04.496Z" },
-    { url = "https://files.pythonhosted.org/packages/6e/89/f7a07dc961b60645dbbf42e80f2bc85ade7feb9a491b11a1e973aa00071f/nvidia_nccl_cu12-2.27.5-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ad730cf15cb5d25fe849c6e6ca9eb5b76db16a80f13f425ac68d8e2e55624457", size = 322348229, upload-time = "2025-06-26T04:11:28.385Z" },
-]
-
-[[package]]
-name = "nvidia-nvjitlink-cu12"
-version = "12.8.93"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/f6/74/86a07f1d0f42998ca31312f998bd3b9a7eff7f52378f4f270c8679c77fb9/nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:81ff63371a7ebd6e6451970684f916be2eab07321b73c9d244dc2b4da7f73b88", size = 39254836, upload-time = "2025-03-07T01:49:55.661Z" },
-    { url = "https://files.pythonhosted.org/packages/2a/a2/8cee5da30d13430e87bf99bb33455d2724d0a4a9cb5d7926d80ccb96d008/nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:adccd7161ace7261e01bb91e44e88da350895c270d23f744f0820c818b7229e7", size = 38386204, upload-time = "2025-03-07T01:49:43.612Z" },
-    { url = "https://files.pythonhosted.org/packages/ed/d7/34f02dad2e30c31b10a51f6b04e025e5dd60e5f936af9045a9b858a05383/nvidia_nvjitlink_cu12-12.8.93-py3-none-win_amd64.whl", hash = "sha256:bd93fbeeee850917903583587f4fc3a4eafa022e34572251368238ab5e6bd67f", size = 268553710, upload-time = "2025-03-07T01:56:24.13Z" },
-]
-
-[[package]]
-name = "nvidia-nvshmem-cu12"
-version = "3.3.20"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/92/9d/3dd98852568fb845ec1f7902c90a22b240fe1cbabda411ccedf2fd737b7b/nvidia_nvshmem_cu12-3.3.20-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0b0b960da3842212758e4fa4696b94f129090b30e5122fea3c5345916545cff0", size = 124484616, upload-time = "2025-08-04T20:24:59.172Z" },
-    { url = "https://files.pythonhosted.org/packages/3b/6c/99acb2f9eb85c29fc6f3a7ac4dccfd992e22666dd08a642b303311326a97/nvidia_nvshmem_cu12-3.3.20-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d00f26d3f9b2e3c3065be895e3059d6479ea5c638a3f38c9fec49b1b9dd7c1e5", size = 124657145, upload-time = "2025-08-04T20:25:19.995Z" },
-]
-
-[[package]]
-name = "nvidia-nvtx-cu12"
-version = "12.8.90"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/10/c0/1b303feea90d296f6176f32a2a70b5ef230f9bdeb3a72bddb0dc922dc137/nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d7ad891da111ebafbf7e015d34879f7112832fc239ff0d7d776b6cb685274615", size = 91161, upload-time = "2025-03-07T01:42:23.922Z" },
-    { url = "https://files.pythonhosted.org/packages/a2/eb/86626c1bbc2edb86323022371c39aa48df6fd8b0a1647bc274577f72e90b/nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5b17e2001cc0d751a5bc2c6ec6d26ad95913324a4adb86788c944f8ce9ba441f", size = 89954, upload-time = "2025-03-07T01:42:44.131Z" },
-    { url = "https://files.pythonhosted.org/packages/9f/99/4c9c0c329bf9fc125008c3b54c7c94c0023518d06fc025ae36431375e1fe/nvidia_nvtx_cu12-12.8.90-py3-none-win_amd64.whl", hash = "sha256:619c8304aedc69f02ea82dd244541a83c3d9d40993381b3b590f1adaed3db41e", size = 56492, upload-time = "2025-03-07T01:52:24.69Z" },
-]
-
 [[package]]
 name = "nvidia-resiliency-ext"
 version = "0.5.0"
@@ -3417,55 +3258,55 @@ wheels = [
 
 [[package]]
 name = "onnx"
-version = "1.20.0"
+version = "1.20.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "ml-dtypes" },
     { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "numpy", version = "2.4.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.4.1", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "protobuf" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/bd/bf/824b13b7ea14c2d374b48a296cfa412442e5559326fbab5441a4fcb68924/onnx-1.20.0.tar.gz", hash = "sha256:1a93ec69996b4556062d552ed1aa0671978cfd3c17a40bf4c89a1ae169c6a4ad", size = 12049527, upload-time = "2025-12-01T18:14:34.679Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/23/18/8fd768f715a990d3b5786c9bffa6f158934cc1935f2774dd15b26c62f99f/onnx-1.20.0-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:7e706470f8b731af6d0347c4f01b8e0e1810855d0c71c467066a5bd7fa21704b", size = 18341375, upload-time = "2025-12-01T18:13:29.481Z" },
-    { url = "https://files.pythonhosted.org/packages/cf/47/9fdb6e8bde5f77f8bdcf7e584ad88ffa7a189338b92658351518c192bde0/onnx-1.20.0-cp310-cp310-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3e941d0f3edd57e1d63e2562c74aec2803ead5b965e76ccc3d2b2bd4ae0ea054", size = 17899075, upload-time = "2025-12-01T18:13:32.375Z" },
-    { url = "https://files.pythonhosted.org/packages/b2/17/7bb16372f95a8a8251c202018952a747ac7f796a9e6d5720ed7b36680834/onnx-1.20.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6930ed7795912c4298ec8642b33c99c51c026a57edf17788b8451fe22d11e674", size = 18118826, upload-time = "2025-12-01T18:13:35.077Z" },
-    { url = "https://files.pythonhosted.org/packages/19/d8/19e3f599601195b1d8ff0bf9e9469065ebeefd9b5e5ec090344f031c38cb/onnx-1.20.0-cp310-cp310-win32.whl", hash = "sha256:f8424c95491de38ecc280f7d467b298cb0b7cdeb1cd892eb9b4b9541c00a600e", size = 16364286, upload-time = "2025-12-01T18:13:38.304Z" },
-    { url = "https://files.pythonhosted.org/packages/5d/f9/11d2db50a6c56092bd2e22515fe6998309c7b2389ed67f8ffd27285c33b5/onnx-1.20.0-cp310-cp310-win_amd64.whl", hash = "sha256:1ecca1f963d69e002c03000f15844f8cac3b6d7b6639a934e73571ee02d59c35", size = 16487791, upload-time = "2025-12-01T18:13:41.062Z" },
-    { url = "https://files.pythonhosted.org/packages/9e/9a/125ad5ed919d1782b26b0b4404e51adc44afd029be30d5a81b446dccd9c5/onnx-1.20.0-cp311-cp311-macosx_12_0_universal2.whl", hash = "sha256:00dc8ae2c7b283f79623961f450b5515bd2c4b47a7027e7a1374ba49cef27768", size = 18341929, upload-time = "2025-12-01T18:13:43.79Z" },
-    { url = "https://files.pythonhosted.org/packages/4d/3c/85280dd05396493f3e1b4feb7a3426715e344b36083229437f31d9788a01/onnx-1.20.0-cp311-cp311-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f62978ecfb8f320faba6704abd20253a5a79aacc4e5d39a9c061dd63d3b7574f", size = 17899362, upload-time = "2025-12-01T18:13:46.496Z" },
-    { url = "https://files.pythonhosted.org/packages/26/db/e11cf9aaa6ccbcd27ea94d321020fef3207cba388bff96111e6431f97d1a/onnx-1.20.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:71177f8fd5c0dd90697bc281f5035f73707bdac83257a5c54d74403a1100ace9", size = 18119129, upload-time = "2025-12-01T18:13:49.662Z" },
-    { url = "https://files.pythonhosted.org/packages/ef/0b/1b99e7ba5ccfa8ecb3509ec579c8520098d09b903ccd520026d60faa7c75/onnx-1.20.0-cp311-cp311-win32.whl", hash = "sha256:1d3d0308e2c194f4b782f51e78461b567fac8ce6871c0cf5452ede261683cc8f", size = 16364604, upload-time = "2025-12-01T18:13:52.691Z" },
-    { url = "https://files.pythonhosted.org/packages/51/ab/7399817821d0d18ff67292ac183383e41f4f4ddff2047902f1b7b51d2d40/onnx-1.20.0-cp311-cp311-win_amd64.whl", hash = "sha256:3a6de7dda77926c323b0e5a830dc9c2866ce350c1901229e193be1003a076c25", size = 16488019, upload-time = "2025-12-01T18:13:55.776Z" },
-    { url = "https://files.pythonhosted.org/packages/fd/e0/23059c11d9c0fb1951acec504a5cc86e1dd03d2eef3a98cf1941839f5322/onnx-1.20.0-cp311-cp311-win_arm64.whl", hash = "sha256:afc4cf83ce5d547ebfbb276dae8eb0ec836254a8698d462b4ba5f51e717fd1ae", size = 16446841, upload-time = "2025-12-01T18:13:58.091Z" },
-    { url = "https://files.pythonhosted.org/packages/5e/19/2caa972a31014a8cb4525f715f2a75d93caef9d4b9da2809cc05d0489e43/onnx-1.20.0-cp312-abi3-macosx_12_0_universal2.whl", hash = "sha256:31efe37d7d1d659091f34ddd6a31780334acf7c624176832db9a0a8ececa8fb5", size = 18340913, upload-time = "2025-12-01T18:14:00.477Z" },
-    { url = "https://files.pythonhosted.org/packages/78/bb/b98732309f2f6beb4cdcf7b955d7bbfd75a191185370ee21233373db381e/onnx-1.20.0-cp312-abi3-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d75da05e743eb9a11ff155a775cae5745e71f1cd0ca26402881b8f20e8d6e449", size = 17896118, upload-time = "2025-12-01T18:14:03.239Z" },
-    { url = "https://files.pythonhosted.org/packages/84/a7/38aa564871d062c11538d65c575af9c7e057be880c09ecbd899dd1abfa83/onnx-1.20.0-cp312-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:02e0d72ab09a983fce46686b155a5049898558d9f3bc6e8515120d6c40666318", size = 18115415, upload-time = "2025-12-01T18:14:06.261Z" },
-    { url = "https://files.pythonhosted.org/packages/3b/17/a600b62cf4ad72976c66f83ce9e324205af434706ad5ec0e35129e125aef/onnx-1.20.0-cp312-abi3-win32.whl", hash = "sha256:392ca68b34b97e172d33b507e1e7bfdf2eea96603e6e7ff109895b82ff009dc7", size = 16363019, upload-time = "2025-12-01T18:14:09.16Z" },
-    { url = "https://files.pythonhosted.org/packages/9c/3b/5146ba0a89f73c026bb468c49612bab8d005aa28155ebf06cf5f2eb8d36c/onnx-1.20.0-cp312-abi3-win_amd64.whl", hash = "sha256:259b05758d41645f5545c09f887187662b350d40db8d707c33c94a4f398e1733", size = 16485934, upload-time = "2025-12-01T18:14:13.046Z" },
-    { url = "https://files.pythonhosted.org/packages/f3/bc/d251b97395e721b3034e9578d4d4d9fb33aac4197ae16ce8c7ed79a26dce/onnx-1.20.0-cp312-abi3-win_arm64.whl", hash = "sha256:2d25a9e1fde44bc69988e50e2211f62d6afcd01b0fd6dfd23429fd978a35d32f", size = 16444946, upload-time = "2025-12-01T18:14:15.801Z" },
-    { url = "https://files.pythonhosted.org/packages/8d/11/4d47409e257013951a17d08c31988e7c2e8638c91d4d5ce18cc57c6ea9d9/onnx-1.20.0-cp313-cp313t-macosx_12_0_universal2.whl", hash = "sha256:7646e700c0a53770a86d5a9a582999a625a3173c4323635960aec3cba8441c6a", size = 18348524, upload-time = "2025-12-01T18:14:18.102Z" },
-    { url = "https://files.pythonhosted.org/packages/67/60/774d29a0f00f84a4ec624fe35e0c59e1dbd7f424adaab751977a45b60e05/onnx-1.20.0-cp313-cp313t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d0bdfd22fe92b87bf98424335ec1191ed79b08cd0f57fe396fab558b83b2c868", size = 17900987, upload-time = "2025-12-01T18:14:20.835Z" },
-    { url = "https://files.pythonhosted.org/packages/9c/7c/6bd82b81b85b2680e3de8cf7b6cc49a7380674b121265bb6e1e2ff3bb0aa/onnx-1.20.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9d1a4e02148b2a7a4b82796d0ecdb6e49ba7abd34bb5a9de22af86aad556fb76", size = 18121332, upload-time = "2025-12-01T18:14:24.558Z" },
-    { url = "https://files.pythonhosted.org/packages/d1/42/d2cd00c84def4e17b471e24d82a1d2e3c5be202e2c163420b0353ddf34df/onnx-1.20.0-cp313-cp313t-win_amd64.whl", hash = "sha256:2241c85fdaa25a66565fcd1d327c7bcd8f55165420ebaee1e9563c3b9bf961c9", size = 16492660, upload-time = "2025-12-01T18:14:27.456Z" },
-    { url = "https://files.pythonhosted.org/packages/42/cd/1106de50a17f2a2dfbb4c8bb3cf2f99be2c7ac2e19abbbf9e07ab47b1b35/onnx-1.20.0-cp313-cp313t-win_arm64.whl", hash = "sha256:ee46cdc5abd851a007a4be81ee53e0e303cf9a0e46d74231d5d361333a1c9411", size = 16448588, upload-time = "2025-12-01T18:14:32.277Z" },
+sdist = { url = "https://files.pythonhosted.org/packages/3b/8a/335c03a8683a88a32f9a6bb98899ea6df241a41df64b37b9696772414794/onnx-1.20.1.tar.gz", hash = "sha256:ded16de1df563d51fbc1ad885f2a426f814039d8b5f4feb77febe09c0295ad67", size = 12048980, upload-time = "2026-01-10T01:40:03.043Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/79/cc/4ba3c80cfaffdb541dc5a23eaccb045a627361e94ecaeba30496270f15b3/onnx-1.20.1-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:3fe243e83ad737637af6512708454e720d4b0864def2b28e6b0ee587b80a50be", size = 17904206, upload-time = "2026-01-10T01:38:58.574Z" },
+    { url = "https://files.pythonhosted.org/packages/f3/fc/3a1c4ae2cd5cfab2d0ebc1842769b04b417fe13946144a7c8ce470dd9c85/onnx-1.20.1-cp310-cp310-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e24e96b48f27e4d6b44cb0b195b367a2665da2d819621eec51903d575fc49d38", size = 17414849, upload-time = "2026-01-10T01:39:01.494Z" },
+    { url = "https://files.pythonhosted.org/packages/a4/ab/5017945291b981f2681fb620f2d5b6070e02170c648770711ef1eac79d56/onnx-1.20.1-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0903e6088ed5e8f59ebd381ab2a6e9b2a60b4c898f79aa2fe76bb79cf38a5031", size = 17513600, upload-time = "2026-01-10T01:39:04.348Z" },
+    { url = "https://files.pythonhosted.org/packages/2e/b0/063e79dc365972af876d786bacc6acd8909691af2b9296615ff74ad182f3/onnx-1.20.1-cp310-cp310-win32.whl", hash = "sha256:17483e59082b2ca6cadd2b48fd8dce937e5b2c985ed5583fefc38af928be1826", size = 16239159, upload-time = "2026-01-10T01:39:07.254Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/73/a992271eb3683e676239d71b5a78ad3cf4d06d2223c387e701bf305da199/onnx-1.20.1-cp310-cp310-win_amd64.whl", hash = "sha256:e2b0cf797faedfd3b83491dc168ab5f1542511448c65ceb482f20f04420cbf3a", size = 16391718, upload-time = "2026-01-10T01:39:09.96Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/38/1a0e74d586c08833404100f5c052f92732fb5be417c0b2d7cb0838443bfe/onnx-1.20.1-cp311-cp311-macosx_12_0_universal2.whl", hash = "sha256:53426e1b458641e7a537e9f176330012ff59d90206cac1c1a9d03cdd73ed3095", size = 17904965, upload-time = "2026-01-10T01:39:13.532Z" },
+    { url = "https://files.pythonhosted.org/packages/96/25/64b076e9684d17335f80b15b3bf502f7a8e1a89f08a6b208d4f2861b3011/onnx-1.20.1-cp311-cp311-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ca7281f8c576adf396c338cf43fff26faee8d4d2e2577b8e73738f37ceccf945", size = 17415179, upload-time = "2026-01-10T01:39:16.516Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/d5/6743b409421ced20ad5af1b3a7b4c4e568689ffaca86db431692fca409a6/onnx-1.20.1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2297f428c51c7fc6d8fad0cf34384284dfeff3f86799f8e83ef905451348ade0", size = 17513672, upload-time = "2026-01-10T01:39:19.35Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/6b/dae82e6fdb2043302f29adca37522312ea2be55b75907b59be06fbdffe87/onnx-1.20.1-cp311-cp311-win32.whl", hash = "sha256:63d9cbcab8c96841eadeb7c930e07bfab4dde8081eb76fb68e0dfb222706b81e", size = 16239336, upload-time = "2026-01-10T01:39:22.506Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/17/a0d7863390c1f2067d7c02dcc1477034965c32aaa1407bfcf775305ffee4/onnx-1.20.1-cp311-cp311-win_amd64.whl", hash = "sha256:d78cde72d7ca8356a2d99c5dc0dbf67264254828cae2c5780184486c0cd7b3bf", size = 16392120, upload-time = "2026-01-10T01:39:25.106Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/72/9b879a46eb7a3322223791f36bf9c25d95da9ed93779eabb75a560f22e5b/onnx-1.20.1-cp311-cp311-win_arm64.whl", hash = "sha256:0104bb2d4394c179bcea3df7599a45a2932b80f4633840896fcf0d7d8daecea2", size = 16346923, upload-time = "2026-01-10T01:39:27.782Z" },
+    { url = "https://files.pythonhosted.org/packages/7c/4c/4b17e82f91ab9aa07ff595771e935ca73547b035030dc5f5a76e63fbfea9/onnx-1.20.1-cp312-abi3-macosx_12_0_universal2.whl", hash = "sha256:1d923bb4f0ce1b24c6859222a7e6b2f123e7bfe7623683662805f2e7b9e95af2", size = 17903547, upload-time = "2026-01-10T01:39:31.015Z" },
+    { url = "https://files.pythonhosted.org/packages/64/5e/1bfa100a9cb3f2d3d5f2f05f52f7e60323b0e20bb0abace1ae64dbc88f25/onnx-1.20.1-cp312-abi3-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ddc0b7d8b5a94627dc86c533d5e415af94cbfd103019a582669dad1f56d30281", size = 17412021, upload-time = "2026-01-10T01:39:33.885Z" },
+    { url = "https://files.pythonhosted.org/packages/fb/71/d3fec0dcf9a7a99e7368112d9c765154e81da70fcba1e3121131a45c245b/onnx-1.20.1-cp312-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9336b6b8e6efcf5c490a845f6afd7e041c89a56199aeda384ed7d58fb953b080", size = 17510450, upload-time = "2026-01-10T01:39:36.589Z" },
+    { url = "https://files.pythonhosted.org/packages/74/a7/edce1403e05a46e59b502fae8e3350ceeac5841f8e8f1561e98562ed9b09/onnx-1.20.1-cp312-abi3-win32.whl", hash = "sha256:564c35a94811979808ab5800d9eb4f3f32c12daedba7e33ed0845f7c61ef2431", size = 16238216, upload-time = "2026-01-10T01:39:39.46Z" },
+    { url = "https://files.pythonhosted.org/packages/8b/c7/8690c81200ae652ac550c1df52f89d7795e6cc941f3cb38c9ef821419e80/onnx-1.20.1-cp312-abi3-win_amd64.whl", hash = "sha256:9fe7f9a633979d50984b94bda8ceb7807403f59a341d09d19342dc544d0ca1d5", size = 16389207, upload-time = "2026-01-10T01:39:41.955Z" },
+    { url = "https://files.pythonhosted.org/packages/01/a0/4fb0e6d36eaf079af366b2c1f68bafe92df6db963e2295da84388af64abc/onnx-1.20.1-cp312-abi3-win_arm64.whl", hash = "sha256:21d747348b1c8207406fa2f3e12b82f53e0d5bb3958bcd0288bd27d3cb6ebb00", size = 16344155, upload-time = "2026-01-10T01:39:45.536Z" },
+    { url = "https://files.pythonhosted.org/packages/ea/bb/715fad292b255664f0e603f1b2ef7bf2b386281775f37406beb99fa05957/onnx-1.20.1-cp313-cp313t-macosx_12_0_universal2.whl", hash = "sha256:29197b768f5acdd1568ddeb0a376407a2817844f6ac1ef8c8dd2d974c9ab27c3", size = 17912296, upload-time = "2026-01-10T01:39:48.21Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/c3/541af12c3d45e159a94ee701100ba9e94b7bd8b7a8ac5ca6838569f894f8/onnx-1.20.1-cp313-cp313t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1f0371aa67f51917a09cc829ada0f9a79a58f833449e03d748f7f7f53787c43c", size = 17416925, upload-time = "2026-01-10T01:39:50.82Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/3b/d5660a7d2ddf14f531ca66d409239f543bb290277c3f14f4b4b78e32efa3/onnx-1.20.1-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:be1e5522200b203b34327b2cf132ddec20ab063469476e1f5b02bb7bd259a489", size = 17515602, upload-time = "2026-01-10T01:39:54.132Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/b4/47225ab2a92562eff87ba9a1a028e3535d659a7157d7cde659003998b8e3/onnx-1.20.1-cp313-cp313t-win_amd64.whl", hash = "sha256:15c815313bbc4b2fdc7e4daeb6e26b6012012adc4d850f4e3b09ed327a7ea92a", size = 16395729, upload-time = "2026-01-10T01:39:57.577Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/7d/1bbe626ff6b192c844d3ad34356840cc60fca02e2dea0db95e01645758b1/onnx-1.20.1-cp313-cp313t-win_arm64.whl", hash = "sha256:eb335d7bcf9abac82a0d6a0fda0363531ae0b22cfd0fc6304bff32ee29905def", size = 16348968, upload-time = "2026-01-10T01:40:00.491Z" },
 ]
 
 [[package]]
 name = "onnx-ir"
-version = "0.1.13"
+version = "0.1.14"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "ml-dtypes" },
     { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "numpy", version = "2.4.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.4.1", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "onnx" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/a8/c2/6db31dc3132e540076f15ed0cdf4a8db7ab75557f4d6c19eda655cac666e/onnx_ir-0.1.13.tar.gz", hash = "sha256:e08f00d30579bdbff2152692a6f1bc1f0523d3321ac6348aadcd40595e56231e", size = 115872, upload-time = "2025-12-17T18:03:13.86Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/a5/5b/ebd083a5c3d25ce9f95b34a11b3a492cdcf7831bf127c0f64429a4e83961/onnx_ir-0.1.14.tar.gz", hash = "sha256:bd69e3b5821046d5d7c9d0fdd023f8e1d0cc9a62cbee986fa0e5ab2b1602d7ae", size = 120732, upload-time = "2026-01-07T01:19:47.777Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/37/b6/f60fd79ff5bc617d49db1378eb7c4c315b21b786502674e4a2d48e64491a/onnx_ir-0.1.13-py3-none-any.whl", hash = "sha256:2791493d1529fdbea60c257dc7bc0933dc812e6d68f4976d8b59aa7b4c2de8cf", size = 133063, upload-time = "2025-12-17T18:03:12.268Z" },
+    { url = "https://files.pythonhosted.org/packages/53/d1/bd9a5007448b4599a80143b0b5ccc78e9c46176e5e1bee81f6d3da68d217/onnx_ir-0.1.14-py3-none-any.whl", hash = "sha256:89b212fa7840981c5db5dc478190f1b7369536297c3c6eae68fb1c2237dd2554", size = 139128, upload-time = "2026-01-07T01:19:46.403Z" },
 ]
 
 [[package]]
@@ -3475,7 +3316,7 @@ source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "ml-dtypes" },
     { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "numpy", version = "2.4.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.4.1", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "onnx" },
     { name = "onnx-ir" },
     { name = "packaging" },
@@ -3523,7 +3364,7 @@ version = "2.3.3"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "numpy", version = "2.4.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.4.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "python-dateutil" },
     { name = "pytz" },
     { name = "tzdata" },
@@ -3596,11 +3437,11 @@ wheels = [
 
 [[package]]
 name = "pathspec"
-version = "1.0.0"
+version = "1.0.3"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/c2/97/39352be14d20d377a387828daf9d3f765fad1ff29bd49913d5bbf4cefe61/pathspec-1.0.0.tar.gz", hash = "sha256:9ada63a23541746b0cf7d5672a39ea77eac31dd23a80470be90df83537512131", size = 129410, upload-time = "2026-01-06T03:21:22.892Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/4c/b2/bb8e495d5262bfec41ab5cb18f522f1012933347fb5d9e62452d446baca2/pathspec-1.0.3.tar.gz", hash = "sha256:bac5cf97ae2c2876e2d25ebb15078eb04d76e4b98921ee31c6f85ade8b59444d", size = 130841, upload-time = "2026-01-09T15:46:46.009Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/05/bb/39e6768529454cc2b57e1e2fa0a0a18ff64397a16303270e215a3e03285f/pathspec-1.0.0-py3-none-any.whl", hash = "sha256:1373719036e64a2b9de3b8ddd9e30afb082a915619f07265ed76d9ae507800ae", size = 54316, upload-time = "2026-01-06T03:21:21.74Z" },
+    { url = "https://files.pythonhosted.org/packages/32/2b/121e912bd60eebd623f873fd090de0e84f322972ab25a7f9044c056804ed/pathspec-1.0.3-py3-none-any.whl", hash = "sha256:e80767021c1cc524aa3fb14bedda9c34406591343cc42797b386ce7b9354fb6c", size = 55021, upload-time = "2026-01-09T15:46:44.652Z" },
 ]
 
 [[package]]
@@ -3742,11 +3583,11 @@ wheels = [
 
 [[package]]
 name = "prometheus-client"
-version = "0.23.1"
+version = "0.24.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/23/53/3edb5d68ecf6b38fcbcc1ad28391117d2a322d9a1a3eff04bfdb184d8c3b/prometheus_client-0.23.1.tar.gz", hash = "sha256:6ae8f9081eaaaf153a2e959d2e6c4f4fb57b12ef76c8c7980202f1e57b48b2ce", size = 80481, upload-time = "2025-09-18T20:47:25.043Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/07/8f/35d31c925f33a494b3f4f10ee25bf47757aff2d63424a06af13814293f13/prometheus_client-0.24.0.tar.gz", hash = "sha256:726b40c0d499f4904d4b5b7abe8d43e6aff090de0d468ae8f2226290b331c667", size = 85590, upload-time = "2026-01-12T20:12:48.963Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/b8/db/14bafcb4af2139e046d03fd00dea7873e48eafe18b7d2797e73d6681f210/prometheus_client-0.23.1-py3-none-any.whl", hash = "sha256:dd1913e6e76b59cfe44e7a4b83e01afc9873c1bdfd2ed8739f1e76aeca115f99", size = 61145, upload-time = "2025-09-18T20:47:23.875Z" },
+    { url = "https://files.pythonhosted.org/packages/22/dd/50260b80759f90e3be66f094e0cd1fdef680b18d9f91edc9ae1b627624ba/prometheus_client-0.24.0-py3-none-any.whl", hash = "sha256:4ab6d4fb5a1b25ad74b58e6271857e356fff3399473e599d227ab5d0ce6637f0", size = 64062, upload-time = "2026-01-12T20:12:47.501Z" },
 ]
 
 [[package]]
@@ -3890,17 +3731,17 @@ wheels = [
 
 [[package]]
 name = "protobuf"
-version = "6.33.2"
+version = "6.33.4"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/34/44/e49ecff446afeec9d1a66d6bbf9adc21e3c7cea7803a920ca3773379d4f6/protobuf-6.33.2.tar.gz", hash = "sha256:56dc370c91fbb8ac85bc13582c9e373569668a290aa2e66a590c2a0d35ddb9e4", size = 444296, upload-time = "2025-12-06T00:17:53.311Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/53/b8/cda15d9d46d03d4aa3a67cb6bffe05173440ccf86a9541afaf7ac59a1b6b/protobuf-6.33.4.tar.gz", hash = "sha256:dc2e61bca3b10470c1912d166fe0af67bfc20eb55971dcef8dfa48ce14f0ed91", size = 444346, upload-time = "2026-01-12T18:33:40.109Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/bc/91/1e3a34881a88697a7354ffd177e8746e97a722e5e8db101544b47e84afb1/protobuf-6.33.2-cp310-abi3-win32.whl", hash = "sha256:87eb388bd2d0f78febd8f4c8779c79247b26a5befad525008e49a6955787ff3d", size = 425603, upload-time = "2025-12-06T00:17:41.114Z" },
-    { url = "https://files.pythonhosted.org/packages/64/20/4d50191997e917ae13ad0a235c8b42d8c1ab9c3e6fd455ca16d416944355/protobuf-6.33.2-cp310-abi3-win_amd64.whl", hash = "sha256:fc2a0e8b05b180e5fc0dd1559fe8ebdae21a27e81ac77728fb6c42b12c7419b4", size = 436930, upload-time = "2025-12-06T00:17:43.278Z" },
-    { url = "https://files.pythonhosted.org/packages/b2/ca/7e485da88ba45c920fb3f50ae78de29ab925d9e54ef0de678306abfbb497/protobuf-6.33.2-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:d9b19771ca75935b3a4422957bc518b0cecb978b31d1dd12037b088f6bcc0e43", size = 427621, upload-time = "2025-12-06T00:17:44.445Z" },
-    { url = "https://files.pythonhosted.org/packages/7d/4f/f743761e41d3b2b2566748eb76bbff2b43e14d5fcab694f494a16458b05f/protobuf-6.33.2-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:b5d3b5625192214066d99b2b605f5783483575656784de223f00a8d00754fc0e", size = 324460, upload-time = "2025-12-06T00:17:45.678Z" },
-    { url = "https://files.pythonhosted.org/packages/b1/fa/26468d00a92824020f6f2090d827078c09c9c587e34cbfd2d0c7911221f8/protobuf-6.33.2-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:8cd7640aee0b7828b6d03ae518b5b4806fdfc1afe8de82f79c3454f8aef29872", size = 339168, upload-time = "2025-12-06T00:17:46.813Z" },
-    { url = "https://files.pythonhosted.org/packages/56/13/333b8f421738f149d4fe5e49553bc2a2ab75235486259f689b4b91f96cec/protobuf-6.33.2-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:1f8017c48c07ec5859106533b682260ba3d7c5567b1ca1f24297ce03384d1b4f", size = 323270, upload-time = "2025-12-06T00:17:48.253Z" },
-    { url = "https://files.pythonhosted.org/packages/0e/15/4f02896cc3df04fc465010a4c6a0cd89810f54617a32a70ef531ed75d61c/protobuf-6.33.2-py3-none-any.whl", hash = "sha256:7636aad9bb01768870266de5dc009de2d1b936771b38a793f73cbbf279c91c5c", size = 170501, upload-time = "2025-12-06T00:17:52.211Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/be/24ef9f3095bacdf95b458543334d0c4908ccdaee5130420bf064492c325f/protobuf-6.33.4-cp310-abi3-win32.whl", hash = "sha256:918966612c8232fc6c24c78e1cd89784307f5814ad7506c308ee3cf86662850d", size = 425612, upload-time = "2026-01-12T18:33:29.656Z" },
+    { url = "https://files.pythonhosted.org/packages/31/ad/e5693e1974a28869e7cd244302911955c1cebc0161eb32dfa2b25b6e96f0/protobuf-6.33.4-cp310-abi3-win_amd64.whl", hash = "sha256:8f11ffae31ec67fc2554c2ef891dcb561dae9a2a3ed941f9e134c2db06657dbc", size = 436962, upload-time = "2026-01-12T18:33:31.345Z" },
+    { url = "https://files.pythonhosted.org/packages/66/15/6ee23553b6bfd82670207ead921f4d8ef14c107e5e11443b04caeb5ab5ec/protobuf-6.33.4-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:2fe67f6c014c84f655ee06f6f66213f9254b3a8b6bda6cda0ccd4232c73c06f0", size = 427612, upload-time = "2026-01-12T18:33:32.646Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/48/d301907ce6d0db75f959ca74f44b475a9caa8fcba102d098d3c3dd0f2d3f/protobuf-6.33.4-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:757c978f82e74d75cba88eddec479df9b99a42b31193313b75e492c06a51764e", size = 324484, upload-time = "2026-01-12T18:33:33.789Z" },
+    { url = "https://files.pythonhosted.org/packages/92/1c/e53078d3f7fe710572ab2dcffd993e1e3b438ae71cfc031b71bae44fcb2d/protobuf-6.33.4-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:c7c64f259c618f0bef7bee042075e390debbf9682334be2b67408ec7c1c09ee6", size = 339256, upload-time = "2026-01-12T18:33:35.231Z" },
+    { url = "https://files.pythonhosted.org/packages/e8/8e/971c0edd084914f7ee7c23aa70ba89e8903918adca179319ee94403701d5/protobuf-6.33.4-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:3df850c2f8db9934de4cf8f9152f8dc2558f49f298f37f90c517e8e5c84c30e9", size = 323311, upload-time = "2026-01-12T18:33:36.305Z" },
+    { url = "https://files.pythonhosted.org/packages/75/b1/1dc83c2c661b4c62d56cc081706ee33a4fc2835bd90f965baa2663ef7676/protobuf-6.33.4-py3-none-any.whl", hash = "sha256:1fe3730068fcf2e595816a6c34fe66eeedd37d51d0400b72fabc848811fdc1bc", size = 170532, upload-time = "2026-01-12T18:33:39.199Z" },
 ]
 
 [[package]]
@@ -4296,12 +4137,12 @@ name = "pytest"
 version = "8.3.5"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "colorama", marker = "sys_platform == 'win32'" },
-    { name = "exceptiongroup", marker = "python_full_version < '3.11'" },
+    { name = "colorama", marker = "sys_platform == 'win32' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "exceptiongroup", marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "iniconfig" },
     { name = "packaging" },
     { name = "pluggy" },
-    { name = "tomli", marker = "python_full_version < '3.11'" },
+    { name = "tomli", marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/ae/3c/c9d525a414d506893f0cd8a8d0de7706446213181570cdbd766691164e40/pytest-8.3.5.tar.gz", hash = "sha256:f4efe70cc14e511565ac476b57c279e12a855b11f48f212af1080ef2263d3845", size = 1450891, upload-time = "2025-03-02T12:54:54.503Z" }
 wheels = [
@@ -4976,7 +4817,7 @@ wheels = [
 
 [[package]]
 name = "scipy"
-version = "1.16.3"
+version = "1.17.0"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
     "python_full_version >= '3.14' and sys_platform == 'linux'",
@@ -4989,70 +4830,70 @@ resolution-markers = [
     "python_full_version == '3.11.*' and sys_platform != 'linux'",
 ]
 dependencies = [
-    { name = "numpy", version = "2.4.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/0a/ca/d8ace4f98322d01abcd52d381134344bf7b431eba7ed8b42bdea5a3c2ac9/scipy-1.16.3.tar.gz", hash = "sha256:01e87659402762f43bd2fee13370553a17ada367d42e7487800bf2916535aecb", size = 30597883, upload-time = "2025-10-28T17:38:54.068Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/9b/5f/6f37d7439de1455ce9c5a556b8d1db0979f03a796c030bafdf08d35b7bf9/scipy-1.16.3-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:40be6cf99e68b6c4321e9f8782e7d5ff8265af28ef2cd56e9c9b2638fa08ad97", size = 36630881, upload-time = "2025-10-28T17:31:47.104Z" },
-    { url = "https://files.pythonhosted.org/packages/7c/89/d70e9f628749b7e4db2aa4cd89735502ff3f08f7b9b27d2e799485987cd9/scipy-1.16.3-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:8be1ca9170fcb6223cc7c27f4305d680ded114a1567c0bd2bfcbf947d1b17511", size = 28941012, upload-time = "2025-10-28T17:31:53.411Z" },
-    { url = "https://files.pythonhosted.org/packages/a8/a8/0e7a9a6872a923505dbdf6bb93451edcac120363131c19013044a1e7cb0c/scipy-1.16.3-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:bea0a62734d20d67608660f69dcda23e7f90fb4ca20974ab80b6ed40df87a005", size = 20931935, upload-time = "2025-10-28T17:31:57.361Z" },
-    { url = "https://files.pythonhosted.org/packages/bd/c7/020fb72bd79ad798e4dbe53938543ecb96b3a9ac3fe274b7189e23e27353/scipy-1.16.3-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:2a207a6ce9c24f1951241f4693ede2d393f59c07abc159b2cb2be980820e01fb", size = 23534466, upload-time = "2025-10-28T17:32:01.875Z" },
-    { url = "https://files.pythonhosted.org/packages/be/a0/668c4609ce6dbf2f948e167836ccaf897f95fb63fa231c87da7558a374cd/scipy-1.16.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:532fb5ad6a87e9e9cd9c959b106b73145a03f04c7d57ea3e6f6bb60b86ab0876", size = 33593618, upload-time = "2025-10-28T17:32:06.902Z" },
-    { url = "https://files.pythonhosted.org/packages/ca/6e/8942461cf2636cdae083e3eb72622a7fbbfa5cf559c7d13ab250a5dbdc01/scipy-1.16.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0151a0749efeaaab78711c78422d413c583b8cdd2011a3c1d6c794938ee9fdb2", size = 35899798, upload-time = "2025-10-28T17:32:12.665Z" },
-    { url = "https://files.pythonhosted.org/packages/79/e8/d0f33590364cdbd67f28ce79368b373889faa4ee959588beddf6daef9abe/scipy-1.16.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:b7180967113560cca57418a7bc719e30366b47959dd845a93206fbed693c867e", size = 36226154, upload-time = "2025-10-28T17:32:17.961Z" },
-    { url = "https://files.pythonhosted.org/packages/39/c1/1903de608c0c924a1749c590064e65810f8046e437aba6be365abc4f7557/scipy-1.16.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:deb3841c925eeddb6afc1e4e4a45e418d19ec7b87c5df177695224078e8ec733", size = 38878540, upload-time = "2025-10-28T17:32:23.907Z" },
-    { url = "https://files.pythonhosted.org/packages/f1/d0/22ec7036ba0b0a35bccb7f25ab407382ed34af0b111475eb301c16f8a2e5/scipy-1.16.3-cp311-cp311-win_amd64.whl", hash = "sha256:53c3844d527213631e886621df5695d35e4f6a75f620dca412bcd292f6b87d78", size = 38722107, upload-time = "2025-10-28T17:32:29.921Z" },
-    { url = "https://files.pythonhosted.org/packages/7b/60/8a00e5a524bb3bf8898db1650d350f50e6cffb9d7a491c561dc9826c7515/scipy-1.16.3-cp311-cp311-win_arm64.whl", hash = "sha256:9452781bd879b14b6f055b26643703551320aa8d79ae064a71df55c00286a184", size = 25506272, upload-time = "2025-10-28T17:32:34.577Z" },
-    { url = "https://files.pythonhosted.org/packages/40/41/5bf55c3f386b1643812f3a5674edf74b26184378ef0f3e7c7a09a7e2ca7f/scipy-1.16.3-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:81fc5827606858cf71446a5e98715ba0e11f0dbc83d71c7409d05486592a45d6", size = 36659043, upload-time = "2025-10-28T17:32:40.285Z" },
-    { url = "https://files.pythonhosted.org/packages/1e/0f/65582071948cfc45d43e9870bf7ca5f0e0684e165d7c9ef4e50d783073eb/scipy-1.16.3-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:c97176013d404c7346bf57874eaac5187d969293bf40497140b0a2b2b7482e07", size = 28898986, upload-time = "2025-10-28T17:32:45.325Z" },
-    { url = "https://files.pythonhosted.org/packages/96/5e/36bf3f0ac298187d1ceadde9051177d6a4fe4d507e8f59067dc9dd39e650/scipy-1.16.3-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:2b71d93c8a9936046866acebc915e2af2e292b883ed6e2cbe5c34beb094b82d9", size = 20889814, upload-time = "2025-10-28T17:32:49.277Z" },
-    { url = "https://files.pythonhosted.org/packages/80/35/178d9d0c35394d5d5211bbff7ac4f2986c5488b59506fef9e1de13ea28d3/scipy-1.16.3-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:3d4a07a8e785d80289dfe66b7c27d8634a773020742ec7187b85ccc4b0e7b686", size = 23565795, upload-time = "2025-10-28T17:32:53.337Z" },
-    { url = "https://files.pythonhosted.org/packages/fa/46/d1146ff536d034d02f83c8afc3c4bab2eddb634624d6529a8512f3afc9da/scipy-1.16.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0553371015692a898e1aa858fed67a3576c34edefa6b7ebdb4e9dde49ce5c203", size = 33349476, upload-time = "2025-10-28T17:32:58.353Z" },
-    { url = "https://files.pythonhosted.org/packages/79/2e/415119c9ab3e62249e18c2b082c07aff907a273741b3f8160414b0e9193c/scipy-1.16.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:72d1717fd3b5e6ec747327ce9bda32d5463f472c9dce9f54499e81fbd50245a1", size = 35676692, upload-time = "2025-10-28T17:33:03.88Z" },
-    { url = "https://files.pythonhosted.org/packages/27/82/df26e44da78bf8d2aeaf7566082260cfa15955a5a6e96e6a29935b64132f/scipy-1.16.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1fb2472e72e24d1530debe6ae078db70fb1605350c88a3d14bc401d6306dbffe", size = 36019345, upload-time = "2025-10-28T17:33:09.773Z" },
-    { url = "https://files.pythonhosted.org/packages/82/31/006cbb4b648ba379a95c87262c2855cd0d09453e500937f78b30f02fa1cd/scipy-1.16.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:c5192722cffe15f9329a3948c4b1db789fbb1f05c97899187dcf009b283aea70", size = 38678975, upload-time = "2025-10-28T17:33:15.809Z" },
-    { url = "https://files.pythonhosted.org/packages/c2/7f/acbd28c97e990b421af7d6d6cd416358c9c293fc958b8529e0bd5d2a2a19/scipy-1.16.3-cp312-cp312-win_amd64.whl", hash = "sha256:56edc65510d1331dae01ef9b658d428e33ed48b4f77b1d51caf479a0253f96dc", size = 38555926, upload-time = "2025-10-28T17:33:21.388Z" },
-    { url = "https://files.pythonhosted.org/packages/ce/69/c5c7807fd007dad4f48e0a5f2153038dc96e8725d3345b9ee31b2b7bed46/scipy-1.16.3-cp312-cp312-win_arm64.whl", hash = "sha256:a8a26c78ef223d3e30920ef759e25625a0ecdd0d60e5a8818b7513c3e5384cf2", size = 25463014, upload-time = "2025-10-28T17:33:25.975Z" },
-    { url = "https://files.pythonhosted.org/packages/72/f1/57e8327ab1508272029e27eeef34f2302ffc156b69e7e233e906c2a5c379/scipy-1.16.3-cp313-cp313-macosx_10_14_x86_64.whl", hash = "sha256:d2ec56337675e61b312179a1ad124f5f570c00f920cc75e1000025451b88241c", size = 36617856, upload-time = "2025-10-28T17:33:31.375Z" },
-    { url = "https://files.pythonhosted.org/packages/44/13/7e63cfba8a7452eb756306aa2fd9b37a29a323b672b964b4fdeded9a3f21/scipy-1.16.3-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:16b8bc35a4cc24db80a0ec836a9286d0e31b2503cb2fd7ff7fb0e0374a97081d", size = 28874306, upload-time = "2025-10-28T17:33:36.516Z" },
-    { url = "https://files.pythonhosted.org/packages/15/65/3a9400efd0228a176e6ec3454b1fa998fbbb5a8defa1672c3f65706987db/scipy-1.16.3-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:5803c5fadd29de0cf27fa08ccbfe7a9e5d741bf63e4ab1085437266f12460ff9", size = 20865371, upload-time = "2025-10-28T17:33:42.094Z" },
-    { url = "https://files.pythonhosted.org/packages/33/d7/eda09adf009a9fb81827194d4dd02d2e4bc752cef16737cc4ef065234031/scipy-1.16.3-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:b81c27fc41954319a943d43b20e07c40bdcd3ff7cf013f4fb86286faefe546c4", size = 23524877, upload-time = "2025-10-28T17:33:48.483Z" },
-    { url = "https://files.pythonhosted.org/packages/7d/6b/3f911e1ebc364cb81320223a3422aab7d26c9c7973109a9cd0f27c64c6c0/scipy-1.16.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0c3b4dd3d9b08dbce0f3440032c52e9e2ab9f96ade2d3943313dfe51a7056959", size = 33342103, upload-time = "2025-10-28T17:33:56.495Z" },
-    { url = "https://files.pythonhosted.org/packages/21/f6/4bfb5695d8941e5c570a04d9fcd0d36bce7511b7d78e6e75c8f9791f82d0/scipy-1.16.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7dc1360c06535ea6116a2220f760ae572db9f661aba2d88074fe30ec2aa1ff88", size = 35697297, upload-time = "2025-10-28T17:34:04.722Z" },
-    { url = "https://files.pythonhosted.org/packages/04/e1/6496dadbc80d8d896ff72511ecfe2316b50313bfc3ebf07a3f580f08bd8c/scipy-1.16.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:663b8d66a8748051c3ee9c96465fb417509315b99c71550fda2591d7dd634234", size = 36021756, upload-time = "2025-10-28T17:34:13.482Z" },
-    { url = "https://files.pythonhosted.org/packages/fe/bd/a8c7799e0136b987bda3e1b23d155bcb31aec68a4a472554df5f0937eef7/scipy-1.16.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:eab43fae33a0c39006a88096cd7b4f4ef545ea0447d250d5ac18202d40b6611d", size = 38696566, upload-time = "2025-10-28T17:34:22.384Z" },
-    { url = "https://files.pythonhosted.org/packages/cd/01/1204382461fcbfeb05b6161b594f4007e78b6eba9b375382f79153172b4d/scipy-1.16.3-cp313-cp313-win_amd64.whl", hash = "sha256:062246acacbe9f8210de8e751b16fc37458213f124bef161a5a02c7a39284304", size = 38529877, upload-time = "2025-10-28T17:35:51.076Z" },
-    { url = "https://files.pythonhosted.org/packages/7f/14/9d9fbcaa1260a94f4bb5b64ba9213ceb5d03cd88841fe9fd1ffd47a45b73/scipy-1.16.3-cp313-cp313-win_arm64.whl", hash = "sha256:50a3dbf286dbc7d84f176f9a1574c705f277cb6565069f88f60db9eafdbe3ee2", size = 25455366, upload-time = "2025-10-28T17:35:59.014Z" },
-    { url = "https://files.pythonhosted.org/packages/e2/a3/9ec205bd49f42d45d77f1730dbad9ccf146244c1647605cf834b3a8c4f36/scipy-1.16.3-cp313-cp313t-macosx_10_14_x86_64.whl", hash = "sha256:fb4b29f4cf8cc5a8d628bc8d8e26d12d7278cd1f219f22698a378c3d67db5e4b", size = 37027931, upload-time = "2025-10-28T17:34:31.451Z" },
-    { url = "https://files.pythonhosted.org/packages/25/06/ca9fd1f3a4589cbd825b1447e5db3a8ebb969c1eaf22c8579bd286f51b6d/scipy-1.16.3-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:8d09d72dc92742988b0e7750bddb8060b0c7079606c0d24a8cc8e9c9c11f9079", size = 29400081, upload-time = "2025-10-28T17:34:39.087Z" },
-    { url = "https://files.pythonhosted.org/packages/6a/56/933e68210d92657d93fb0e381683bc0e53a965048d7358ff5fbf9e6a1b17/scipy-1.16.3-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:03192a35e661470197556de24e7cb1330d84b35b94ead65c46ad6f16f6b28f2a", size = 21391244, upload-time = "2025-10-28T17:34:45.234Z" },
-    { url = "https://files.pythonhosted.org/packages/a8/7e/779845db03dc1418e215726329674b40576879b91814568757ff0014ad65/scipy-1.16.3-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:57d01cb6f85e34f0946b33caa66e892aae072b64b034183f3d87c4025802a119", size = 23929753, upload-time = "2025-10-28T17:34:51.793Z" },
-    { url = "https://files.pythonhosted.org/packages/4c/4b/f756cf8161d5365dcdef9e5f460ab226c068211030a175d2fc7f3f41ca64/scipy-1.16.3-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:96491a6a54e995f00a28a3c3badfff58fd093bf26cd5fb34a2188c8c756a3a2c", size = 33496912, upload-time = "2025-10-28T17:34:59.8Z" },
-    { url = "https://files.pythonhosted.org/packages/09/b5/222b1e49a58668f23839ca1542a6322bb095ab8d6590d4f71723869a6c2c/scipy-1.16.3-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:cd13e354df9938598af2be05822c323e97132d5e6306b83a3b4ee6724c6e522e", size = 35802371, upload-time = "2025-10-28T17:35:08.173Z" },
-    { url = "https://files.pythonhosted.org/packages/c1/8d/5964ef68bb31829bde27611f8c9deeac13764589fe74a75390242b64ca44/scipy-1.16.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:63d3cdacb8a824a295191a723ee5e4ea7768ca5ca5f2838532d9f2e2b3ce2135", size = 36190477, upload-time = "2025-10-28T17:35:16.7Z" },
-    { url = "https://files.pythonhosted.org/packages/ab/f2/b31d75cb9b5fa4dd39a0a931ee9b33e7f6f36f23be5ef560bf72e0f92f32/scipy-1.16.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:e7efa2681ea410b10dde31a52b18b0154d66f2485328830e45fdf183af5aefc6", size = 38796678, upload-time = "2025-10-28T17:35:26.354Z" },
-    { url = "https://files.pythonhosted.org/packages/b4/1e/b3723d8ff64ab548c38d87055483714fefe6ee20e0189b62352b5e015bb1/scipy-1.16.3-cp313-cp313t-win_amd64.whl", hash = "sha256:2d1ae2cf0c350e7705168ff2429962a89ad90c2d49d1dd300686d8b2a5af22fc", size = 38640178, upload-time = "2025-10-28T17:35:35.304Z" },
-    { url = "https://files.pythonhosted.org/packages/8e/f3/d854ff38789aca9b0cc23008d607ced9de4f7ab14fa1ca4329f86b3758ca/scipy-1.16.3-cp313-cp313t-win_arm64.whl", hash = "sha256:0c623a54f7b79dd88ef56da19bc2873afec9673a48f3b85b18e4d402bdd29a5a", size = 25803246, upload-time = "2025-10-28T17:35:42.155Z" },
-    { url = "https://files.pythonhosted.org/packages/99/f6/99b10fd70f2d864c1e29a28bbcaa0c6340f9d8518396542d9ea3b4aaae15/scipy-1.16.3-cp314-cp314-macosx_10_14_x86_64.whl", hash = "sha256:875555ce62743e1d54f06cdf22c1e0bc47b91130ac40fe5d783b6dfa114beeb6", size = 36606469, upload-time = "2025-10-28T17:36:08.741Z" },
-    { url = "https://files.pythonhosted.org/packages/4d/74/043b54f2319f48ea940dd025779fa28ee360e6b95acb7cd188fad4391c6b/scipy-1.16.3-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:bb61878c18a470021fb515a843dc7a76961a8daceaaaa8bad1332f1bf4b54657", size = 28872043, upload-time = "2025-10-28T17:36:16.599Z" },
-    { url = "https://files.pythonhosted.org/packages/4d/e1/24b7e50cc1c4ee6ffbcb1f27fe9f4c8b40e7911675f6d2d20955f41c6348/scipy-1.16.3-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:f2622206f5559784fa5c4b53a950c3c7c1cf3e84ca1b9c4b6c03f062f289ca26", size = 20862952, upload-time = "2025-10-28T17:36:22.966Z" },
-    { url = "https://files.pythonhosted.org/packages/dd/3a/3e8c01a4d742b730df368e063787c6808597ccb38636ed821d10b39ca51b/scipy-1.16.3-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:7f68154688c515cdb541a31ef8eb66d8cd1050605be9dcd74199cbd22ac739bc", size = 23508512, upload-time = "2025-10-28T17:36:29.731Z" },
-    { url = "https://files.pythonhosted.org/packages/1f/60/c45a12b98ad591536bfe5330cb3cfe1850d7570259303563b1721564d458/scipy-1.16.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8b3c820ddb80029fe9f43d61b81d8b488d3ef8ca010d15122b152db77dc94c22", size = 33413639, upload-time = "2025-10-28T17:36:37.982Z" },
-    { url = "https://files.pythonhosted.org/packages/71/bc/35957d88645476307e4839712642896689df442f3e53b0fa016ecf8a3357/scipy-1.16.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d3837938ae715fc0fe3c39c0202de3a8853aff22ca66781ddc2ade7554b7e2cc", size = 35704729, upload-time = "2025-10-28T17:36:46.547Z" },
-    { url = "https://files.pythonhosted.org/packages/3b/15/89105e659041b1ca11c386e9995aefacd513a78493656e57789f9d9eab61/scipy-1.16.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:aadd23f98f9cb069b3bd64ddc900c4d277778242e961751f77a8cb5c4b946fb0", size = 36086251, upload-time = "2025-10-28T17:36:55.161Z" },
-    { url = "https://files.pythonhosted.org/packages/1a/87/c0ea673ac9c6cc50b3da2196d860273bc7389aa69b64efa8493bdd25b093/scipy-1.16.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:b7c5f1bda1354d6a19bc6af73a649f8285ca63ac6b52e64e658a5a11d4d69800", size = 38716681, upload-time = "2025-10-28T17:37:04.1Z" },
-    { url = "https://files.pythonhosted.org/packages/91/06/837893227b043fb9b0d13e4bd7586982d8136cb249ffb3492930dab905b8/scipy-1.16.3-cp314-cp314-win_amd64.whl", hash = "sha256:e5d42a9472e7579e473879a1990327830493a7047506d58d73fc429b84c1d49d", size = 39358423, upload-time = "2025-10-28T17:38:20.005Z" },
-    { url = "https://files.pythonhosted.org/packages/95/03/28bce0355e4d34a7c034727505a02d19548549e190bedd13a721e35380b7/scipy-1.16.3-cp314-cp314-win_arm64.whl", hash = "sha256:6020470b9d00245926f2d5bb93b119ca0340f0d564eb6fbaad843eaebf9d690f", size = 26135027, upload-time = "2025-10-28T17:38:24.966Z" },
-    { url = "https://files.pythonhosted.org/packages/b2/6f/69f1e2b682efe9de8fe9f91040f0cd32f13cfccba690512ba4c582b0bc29/scipy-1.16.3-cp314-cp314t-macosx_10_14_x86_64.whl", hash = "sha256:e1d27cbcb4602680a49d787d90664fa4974063ac9d4134813332a8c53dbe667c", size = 37028379, upload-time = "2025-10-28T17:37:14.061Z" },
-    { url = "https://files.pythonhosted.org/packages/7c/2d/e826f31624a5ebbab1cd93d30fd74349914753076ed0593e1d56a98c4fb4/scipy-1.16.3-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:9b9c9c07b6d56a35777a1b4cc8966118fb16cfd8daf6743867d17d36cfad2d40", size = 29400052, upload-time = "2025-10-28T17:37:21.709Z" },
-    { url = "https://files.pythonhosted.org/packages/69/27/d24feb80155f41fd1f156bf144e7e049b4e2b9dd06261a242905e3bc7a03/scipy-1.16.3-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:3a4c460301fb2cffb7f88528f30b3127742cff583603aa7dc964a52c463b385d", size = 21391183, upload-time = "2025-10-28T17:37:29.559Z" },
-    { url = "https://files.pythonhosted.org/packages/f8/d3/1b229e433074c5738a24277eca520a2319aac7465eea7310ea6ae0e98ae2/scipy-1.16.3-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:f667a4542cc8917af1db06366d3f78a5c8e83badd56409f94d1eac8d8d9133fa", size = 23930174, upload-time = "2025-10-28T17:37:36.306Z" },
-    { url = "https://files.pythonhosted.org/packages/16/9d/d9e148b0ec680c0f042581a2be79a28a7ab66c0c4946697f9e7553ead337/scipy-1.16.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f379b54b77a597aa7ee5e697df0d66903e41b9c85a6dd7946159e356319158e8", size = 33497852, upload-time = "2025-10-28T17:37:42.228Z" },
-    { url = "https://files.pythonhosted.org/packages/2f/22/4e5f7561e4f98b7bea63cf3fd7934bff1e3182e9f1626b089a679914d5c8/scipy-1.16.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4aff59800a3b7f786b70bfd6ab551001cb553244988d7d6b8299cb1ea653b353", size = 35798595, upload-time = "2025-10-28T17:37:48.102Z" },
-    { url = "https://files.pythonhosted.org/packages/83/42/6644d714c179429fc7196857866f219fef25238319b650bb32dde7bf7a48/scipy-1.16.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:da7763f55885045036fabcebd80144b757d3db06ab0861415d1c3b7c69042146", size = 36186269, upload-time = "2025-10-28T17:37:53.72Z" },
-    { url = "https://files.pythonhosted.org/packages/ac/70/64b4d7ca92f9cf2e6fc6aaa2eecf80bb9b6b985043a9583f32f8177ea122/scipy-1.16.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:ffa6eea95283b2b8079b821dc11f50a17d0571c92b43e2b5b12764dc5f9b285d", size = 38802779, upload-time = "2025-10-28T17:37:59.393Z" },
-    { url = "https://files.pythonhosted.org/packages/61/82/8d0e39f62764cce5ffd5284131e109f07cf8955aef9ab8ed4e3aa5e30539/scipy-1.16.3-cp314-cp314t-win_amd64.whl", hash = "sha256:d9f48cafc7ce94cf9b15c6bffdc443a81a27bf7075cf2dcd5c8b40f85d10c4e7", size = 39471128, upload-time = "2025-10-28T17:38:05.259Z" },
-    { url = "https://files.pythonhosted.org/packages/64/47/a494741db7280eae6dc033510c319e34d42dd41b7ac0c7ead39354d1a2b5/scipy-1.16.3-cp314-cp314t-win_arm64.whl", hash = "sha256:21d9d6b197227a12dcbf9633320a4e34c6b0e51c57268df255a0942983bac562", size = 26464127, upload-time = "2025-10-28T17:38:11.34Z" },
+    { name = "numpy", version = "2.4.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/56/3e/9cca699f3486ce6bc12ff46dc2031f1ec8eb9ccc9a320fdaf925f1417426/scipy-1.17.0.tar.gz", hash = "sha256:2591060c8e648d8b96439e111ac41fd8342fdeff1876be2e19dea3fe8930454e", size = 30396830, upload-time = "2026-01-10T21:34:23.009Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/1e/4b/c89c131aa87cad2b77a54eb0fb94d633a842420fa7e919dc2f922037c3d8/scipy-1.17.0-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:2abd71643797bd8a106dff97894ff7869eeeb0af0f7a5ce02e4227c6a2e9d6fd", size = 31381316, upload-time = "2026-01-10T21:24:33.42Z" },
+    { url = "https://files.pythonhosted.org/packages/5e/5f/a6b38f79a07d74989224d5f11b55267714707582908a5f1ae854cf9a9b84/scipy-1.17.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:ef28d815f4d2686503e5f4f00edc387ae58dfd7a2f42e348bb53359538f01558", size = 27966760, upload-time = "2026-01-10T21:24:38.911Z" },
+    { url = "https://files.pythonhosted.org/packages/c1/20/095ad24e031ee8ed3c5975954d816b8e7e2abd731e04f8be573de8740885/scipy-1.17.0-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:272a9f16d6bb4667e8b50d25d71eddcc2158a214df1b566319298de0939d2ab7", size = 20138701, upload-time = "2026-01-10T21:24:43.249Z" },
+    { url = "https://files.pythonhosted.org/packages/89/11/4aad2b3858d0337756f3323f8960755704e530b27eb2a94386c970c32cbe/scipy-1.17.0-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:7204fddcbec2fe6598f1c5fdf027e9f259106d05202a959a9f1aecf036adc9f6", size = 22480574, upload-time = "2026-01-10T21:24:47.266Z" },
+    { url = "https://files.pythonhosted.org/packages/85/bd/f5af70c28c6da2227e510875cadf64879855193a687fb19951f0f44cfd6b/scipy-1.17.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:fc02c37a5639ee67d8fb646ffded6d793c06c5622d36b35cfa8fe5ececb8f042", size = 32862414, upload-time = "2026-01-10T21:24:52.566Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/df/df1457c4df3826e908879fe3d76bc5b6e60aae45f4ee42539512438cfd5d/scipy-1.17.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dac97a27520d66c12a34fd90a4fe65f43766c18c0d6e1c0a80f114d2260080e4", size = 35112380, upload-time = "2026-01-10T21:24:58.433Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/bb/88e2c16bd1dd4de19d80d7c5e238387182993c2fb13b4b8111e3927ad422/scipy-1.17.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:ebb7446a39b3ae0fe8f416a9a3fdc6fba3f11c634f680f16a239c5187bc487c0", size = 34922676, upload-time = "2026-01-10T21:25:04.287Z" },
+    { url = "https://files.pythonhosted.org/packages/02/ba/5120242cc735f71fc002cff0303d536af4405eb265f7c60742851e7ccfe9/scipy-1.17.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:474da16199f6af66601a01546144922ce402cb17362e07d82f5a6cf8f963e449", size = 37507599, upload-time = "2026-01-10T21:25:09.851Z" },
+    { url = "https://files.pythonhosted.org/packages/52/c8/08629657ac6c0da198487ce8cd3de78e02cfde42b7f34117d56a3fe249dc/scipy-1.17.0-cp311-cp311-win_amd64.whl", hash = "sha256:255c0da161bd7b32a6c898e7891509e8a9289f0b1c6c7d96142ee0d2b114c2ea", size = 36380284, upload-time = "2026-01-10T21:25:15.632Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/4a/465f96d42c6f33ad324a40049dfd63269891db9324aa66c4a1c108c6f994/scipy-1.17.0-cp311-cp311-win_arm64.whl", hash = "sha256:85b0ac3ad17fa3be50abd7e69d583d98792d7edc08367e01445a1e2076005379", size = 24370427, upload-time = "2026-01-10T21:25:20.514Z" },
+    { url = "https://files.pythonhosted.org/packages/0b/11/7241a63e73ba5a516f1930ac8d5b44cbbfabd35ac73a2d08ca206df007c4/scipy-1.17.0-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:0d5018a57c24cb1dd828bcf51d7b10e65986d549f52ef5adb6b4d1ded3e32a57", size = 31364580, upload-time = "2026-01-10T21:25:25.717Z" },
+    { url = "https://files.pythonhosted.org/packages/ed/1d/5057f812d4f6adc91a20a2d6f2ebcdb517fdbc87ae3acc5633c9b97c8ba5/scipy-1.17.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:88c22af9e5d5a4f9e027e26772cc7b5922fab8bcc839edb3ae33de404feebd9e", size = 27969012, upload-time = "2026-01-10T21:25:30.921Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/21/f6ec556c1e3b6ec4e088da667d9987bb77cc3ab3026511f427dc8451187d/scipy-1.17.0-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:f3cd947f20fe17013d401b64e857c6b2da83cae567adbb75b9dcba865abc66d8", size = 20140691, upload-time = "2026-01-10T21:25:34.802Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/fe/5e5ad04784964ba964a96f16c8d4676aa1b51357199014dce58ab7ec5670/scipy-1.17.0-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:e8c0b331c2c1f531eb51f1b4fc9ba709521a712cce58f1aa627bc007421a5306", size = 22463015, upload-time = "2026-01-10T21:25:39.277Z" },
+    { url = "https://files.pythonhosted.org/packages/4a/69/7c347e857224fcaf32a34a05183b9d8a7aca25f8f2d10b8a698b8388561a/scipy-1.17.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5194c445d0a1c7a6c1a4a4681b6b7c71baad98ff66d96b949097e7513c9d6742", size = 32724197, upload-time = "2026-01-10T21:25:44.084Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/fe/66d73b76d378ba8cc2fe605920c0c75092e3a65ae746e1e767d9d020a75a/scipy-1.17.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9eeb9b5f5997f75507814ed9d298ab23f62cf79f5a3ef90031b1ee2506abdb5b", size = 35009148, upload-time = "2026-01-10T21:25:50.591Z" },
+    { url = "https://files.pythonhosted.org/packages/af/07/07dec27d9dc41c18d8c43c69e9e413431d20c53a0339c388bcf72f353c4b/scipy-1.17.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:40052543f7bbe921df4408f46003d6f01c6af109b9e2c8a66dd1cf6cf57f7d5d", size = 34798766, upload-time = "2026-01-10T21:25:59.41Z" },
+    { url = "https://files.pythonhosted.org/packages/81/61/0470810c8a093cdacd4ba7504b8a218fd49ca070d79eca23a615f5d9a0b0/scipy-1.17.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:0cf46c8013fec9d3694dc572f0b54100c28405d55d3e2cb15e2895b25057996e", size = 37405953, upload-time = "2026-01-10T21:26:07.75Z" },
+    { url = "https://files.pythonhosted.org/packages/92/ce/672ed546f96d5d41ae78c4b9b02006cedd0b3d6f2bf5bb76ea455c320c28/scipy-1.17.0-cp312-cp312-win_amd64.whl", hash = "sha256:0937a0b0d8d593a198cededd4c439a0ea216a3f36653901ea1f3e4be949056f8", size = 36328121, upload-time = "2026-01-10T21:26:16.509Z" },
+    { url = "https://files.pythonhosted.org/packages/9d/21/38165845392cae67b61843a52c6455d47d0cc2a40dd495c89f4362944654/scipy-1.17.0-cp312-cp312-win_arm64.whl", hash = "sha256:f603d8a5518c7426414d1d8f82e253e454471de682ce5e39c29adb0df1efb86b", size = 24314368, upload-time = "2026-01-10T21:26:23.087Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/51/3468fdfd49387ddefee1636f5cf6d03ce603b75205bf439bbf0e62069bfd/scipy-1.17.0-cp313-cp313-macosx_10_14_x86_64.whl", hash = "sha256:65ec32f3d32dfc48c72df4291345dae4f048749bc8d5203ee0a3f347f96c5ce6", size = 31344101, upload-time = "2026-01-10T21:26:30.25Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/9a/9406aec58268d437636069419e6977af953d1e246df941d42d3720b7277b/scipy-1.17.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:1f9586a58039d7229ce77b52f8472c972448cded5736eaf102d5658bbac4c269", size = 27950385, upload-time = "2026-01-10T21:26:36.801Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/98/e7342709e17afdfd1b26b56ae499ef4939b45a23a00e471dfb5375eea205/scipy-1.17.0-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:9fad7d3578c877d606b1150135c2639e9de9cecd3705caa37b66862977cc3e72", size = 20122115, upload-time = "2026-01-10T21:26:42.107Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/0e/9eeeb5357a64fd157cbe0302c213517c541cc16b8486d82de251f3c68ede/scipy-1.17.0-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:423ca1f6584fc03936972b5f7c06961670dbba9f234e71676a7c7ccf938a0d61", size = 22442402, upload-time = "2026-01-10T21:26:48.029Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/10/be13397a0e434f98e0c79552b2b584ae5bb1c8b2be95db421533bbca5369/scipy-1.17.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:fe508b5690e9eaaa9467fc047f833af58f1152ae51a0d0aed67aa5801f4dd7d6", size = 32696338, upload-time = "2026-01-10T21:26:55.521Z" },
+    { url = "https://files.pythonhosted.org/packages/63/1e/12fbf2a3bb240161651c94bb5cdd0eae5d4e8cc6eaeceb74ab07b12a753d/scipy-1.17.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6680f2dfd4f6182e7d6db161344537da644d1cf85cf293f015c60a17ecf08752", size = 34977201, upload-time = "2026-01-10T21:27:03.501Z" },
+    { url = "https://files.pythonhosted.org/packages/19/5b/1a63923e23ccd20bd32156d7dd708af5bbde410daa993aa2500c847ab2d2/scipy-1.17.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:eec3842ec9ac9de5917899b277428886042a93db0b227ebbe3a333b64ec7643d", size = 34777384, upload-time = "2026-01-10T21:27:11.423Z" },
+    { url = "https://files.pythonhosted.org/packages/39/22/b5da95d74edcf81e540e467202a988c50fef41bd2011f46e05f72ba07df6/scipy-1.17.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:d7425fcafbc09a03731e1bc05581f5fad988e48c6a861f441b7ab729a49a55ea", size = 37379586, upload-time = "2026-01-10T21:27:20.171Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/b6/8ac583d6da79e7b9e520579f03007cb006f063642afd6b2eeb16b890bf93/scipy-1.17.0-cp313-cp313-win_amd64.whl", hash = "sha256:87b411e42b425b84777718cc41516b8a7e0795abfa8e8e1d573bf0ef014f0812", size = 36287211, upload-time = "2026-01-10T21:28:43.122Z" },
+    { url = "https://files.pythonhosted.org/packages/55/fb/7db19e0b3e52f882b420417644ec81dd57eeef1bd1705b6f689d8ff93541/scipy-1.17.0-cp313-cp313-win_arm64.whl", hash = "sha256:357ca001c6e37601066092e7c89cca2f1ce74e2a520ca78d063a6d2201101df2", size = 24312646, upload-time = "2026-01-10T21:28:49.893Z" },
+    { url = "https://files.pythonhosted.org/packages/20/b6/7feaa252c21cc7aff335c6c55e1b90ab3e3306da3f048109b8b639b94648/scipy-1.17.0-cp313-cp313t-macosx_10_14_x86_64.whl", hash = "sha256:ec0827aa4d36cb79ff1b81de898e948a51ac0b9b1c43e4a372c0508c38c0f9a3", size = 31693194, upload-time = "2026-01-10T21:27:27.454Z" },
+    { url = "https://files.pythonhosted.org/packages/76/bb/bbb392005abce039fb7e672cb78ac7d158700e826b0515cab6b5b60c26fb/scipy-1.17.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:819fc26862b4b3c73a60d486dbb919202f3d6d98c87cf20c223511429f2d1a97", size = 28365415, upload-time = "2026-01-10T21:27:34.26Z" },
+    { url = "https://files.pythonhosted.org/packages/37/da/9d33196ecc99fba16a409c691ed464a3a283ac454a34a13a3a57c0d66f3a/scipy-1.17.0-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:363ad4ae2853d88ebcde3ae6ec46ccca903ea9835ee8ba543f12f575e7b07e4e", size = 20537232, upload-time = "2026-01-10T21:27:40.306Z" },
+    { url = "https://files.pythonhosted.org/packages/56/9d/f4b184f6ddb28e9a5caea36a6f98e8ecd2a524f9127354087ce780885d83/scipy-1.17.0-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:979c3a0ff8e5ba254d45d59ebd38cde48fce4f10b5125c680c7a4bfe177aab07", size = 22791051, upload-time = "2026-01-10T21:27:46.539Z" },
+    { url = "https://files.pythonhosted.org/packages/9b/9d/025cccdd738a72140efc582b1641d0dd4caf2e86c3fb127568dc80444e6e/scipy-1.17.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:130d12926ae34399d157de777472bf82e9061c60cc081372b3118edacafe1d00", size = 32815098, upload-time = "2026-01-10T21:27:54.389Z" },
+    { url = "https://files.pythonhosted.org/packages/48/5f/09b879619f8bca15ce392bfc1894bd9c54377e01d1b3f2f3b595a1b4d945/scipy-1.17.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6e886000eb4919eae3a44f035e63f0fd8b651234117e8f6f29bad1cd26e7bc45", size = 35031342, upload-time = "2026-01-10T21:28:03.012Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/9a/f0f0a9f0aa079d2f106555b984ff0fbb11a837df280f04f71f056ea9c6e4/scipy-1.17.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:13c4096ac6bc31d706018f06a49abe0485f96499deb82066b94d19b02f664209", size = 34893199, upload-time = "2026-01-10T21:28:10.832Z" },
+    { url = "https://files.pythonhosted.org/packages/90/b8/4f0f5cf0c5ea4d7548424e6533e6b17d164f34a6e2fb2e43ffebb6697b06/scipy-1.17.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:cacbaddd91fcffde703934897c5cd2c7cb0371fac195d383f4e1f1c5d3f3bd04", size = 37438061, upload-time = "2026-01-10T21:28:19.684Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/cc/2bd59140ed3b2fa2882fb15da0a9cb1b5a6443d67cfd0d98d4cec83a57ec/scipy-1.17.0-cp313-cp313t-win_amd64.whl", hash = "sha256:edce1a1cf66298cccdc48a1bdf8fb10a3bf58e8b58d6c3883dd1530e103f87c0", size = 36328593, upload-time = "2026-01-10T21:28:28.007Z" },
+    { url = "https://files.pythonhosted.org/packages/13/1b/c87cc44a0d2c7aaf0f003aef2904c3d097b422a96c7e7c07f5efd9073c1b/scipy-1.17.0-cp313-cp313t-win_arm64.whl", hash = "sha256:30509da9dbec1c2ed8f168b8d8aa853bc6723fede1dbc23c7d43a56f5ab72a67", size = 24625083, upload-time = "2026-01-10T21:28:35.188Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/2d/51006cd369b8e7879e1c630999a19d1fbf6f8b5ed3e33374f29dc87e53b3/scipy-1.17.0-cp314-cp314-macosx_10_14_x86_64.whl", hash = "sha256:c17514d11b78be8f7e6331b983a65a7f5ca1fd037b95e27b280921fe5606286a", size = 31346803, upload-time = "2026-01-10T21:28:57.24Z" },
+    { url = "https://files.pythonhosted.org/packages/d6/2e/2349458c3ce445f53a6c93d4386b1c4c5c0c540917304c01222ff95ff317/scipy-1.17.0-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:4e00562e519c09da34c31685f6acc3aa384d4d50604db0f245c14e1b4488bfa2", size = 27967182, upload-time = "2026-01-10T21:29:04.107Z" },
+    { url = "https://files.pythonhosted.org/packages/5e/7c/df525fbfa77b878d1cfe625249529514dc02f4fd5f45f0f6295676a76528/scipy-1.17.0-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:f7df7941d71314e60a481e02d5ebcb3f0185b8d799c70d03d8258f6c80f3d467", size = 20139125, upload-time = "2026-01-10T21:29:10.179Z" },
+    { url = "https://files.pythonhosted.org/packages/33/11/fcf9d43a7ed1234d31765ec643b0515a85a30b58eddccc5d5a4d12b5f194/scipy-1.17.0-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:aabf057c632798832f071a8dde013c2e26284043934f53b00489f1773b33527e", size = 22443554, upload-time = "2026-01-10T21:29:15.888Z" },
+    { url = "https://files.pythonhosted.org/packages/80/5c/ea5d239cda2dd3d31399424967a24d556cf409fbea7b5b21412b0fd0a44f/scipy-1.17.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a38c3337e00be6fd8a95b4ed66b5d988bac4ec888fd922c2ea9fe5fb1603dd67", size = 32757834, upload-time = "2026-01-10T21:29:23.406Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/7e/8c917cc573310e5dc91cbeead76f1b600d3fb17cf0969db02c9cf92e3cfa/scipy-1.17.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:00fb5f8ec8398ad90215008d8b6009c9db9fa924fd4c7d6be307c6f945f9cd73", size = 34995775, upload-time = "2026-01-10T21:29:31.915Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/43/176c0c3c07b3f7df324e7cdd933d3e2c4898ca202b090bd5ba122f9fe270/scipy-1.17.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:f2a4942b0f5f7c23c7cd641a0ca1955e2ae83dedcff537e3a0259096635e186b", size = 34841240, upload-time = "2026-01-10T21:29:39.995Z" },
+    { url = "https://files.pythonhosted.org/packages/44/8c/d1f5f4b491160592e7f084d997de53a8e896a3ac01cd07e59f43ca222744/scipy-1.17.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:dbf133ced83889583156566d2bdf7a07ff89228fe0c0cb727f777de92092ec6b", size = 37394463, upload-time = "2026-01-10T21:29:48.723Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/ec/42a6657f8d2d087e750e9a5dde0b481fd135657f09eaf1cf5688bb23c338/scipy-1.17.0-cp314-cp314-win_amd64.whl", hash = "sha256:3625c631a7acd7cfd929e4e31d2582cf00f42fcf06011f59281271746d77e061", size = 37053015, upload-time = "2026-01-10T21:30:51.418Z" },
+    { url = "https://files.pythonhosted.org/packages/27/58/6b89a6afd132787d89a362d443a7bddd511b8f41336a1ae47f9e4f000dc4/scipy-1.17.0-cp314-cp314-win_arm64.whl", hash = "sha256:9244608d27eafe02b20558523ba57f15c689357c85bdcfe920b1828750aa26eb", size = 24951312, upload-time = "2026-01-10T21:30:56.771Z" },
+    { url = "https://files.pythonhosted.org/packages/e9/01/f58916b9d9ae0112b86d7c3b10b9e685625ce6e8248df139d0fcb17f7397/scipy-1.17.0-cp314-cp314t-macosx_10_14_x86_64.whl", hash = "sha256:2b531f57e09c946f56ad0b4a3b2abee778789097871fc541e267d2eca081cff1", size = 31706502, upload-time = "2026-01-10T21:29:56.326Z" },
+    { url = "https://files.pythonhosted.org/packages/59/8e/2912a87f94a7d1f8b38aabc0faf74b82d3b6c9e22be991c49979f0eceed8/scipy-1.17.0-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:13e861634a2c480bd237deb69333ac79ea1941b94568d4b0efa5db5e263d4fd1", size = 28380854, upload-time = "2026-01-10T21:30:01.554Z" },
+    { url = "https://files.pythonhosted.org/packages/bd/1c/874137a52dddab7d5d595c1887089a2125d27d0601fce8c0026a24a92a0b/scipy-1.17.0-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:eb2651271135154aa24f6481cbae5cc8af1f0dd46e6533fb7b56aa9727b6a232", size = 20552752, upload-time = "2026-01-10T21:30:05.93Z" },
+    { url = "https://files.pythonhosted.org/packages/3f/f0/7518d171cb735f6400f4576cf70f756d5b419a07fe1867da34e2c2c9c11b/scipy-1.17.0-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:c5e8647f60679790c2f5c76be17e2e9247dc6b98ad0d3b065861e082c56e078d", size = 22803972, upload-time = "2026-01-10T21:30:10.651Z" },
+    { url = "https://files.pythonhosted.org/packages/7c/74/3498563a2c619e8a3ebb4d75457486c249b19b5b04a30600dfd9af06bea5/scipy-1.17.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5fb10d17e649e1446410895639f3385fd2bf4c3c7dfc9bea937bddcbc3d7b9ba", size = 32829770, upload-time = "2026-01-10T21:30:16.359Z" },
+    { url = "https://files.pythonhosted.org/packages/48/d1/7b50cedd8c6c9d6f706b4b36fa8544d829c712a75e370f763b318e9638c1/scipy-1.17.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8547e7c57f932e7354a2319fab613981cde910631979f74c9b542bb167a8b9db", size = 35051093, upload-time = "2026-01-10T21:30:22.987Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/82/a2d684dfddb87ba1b3ea325df7c3293496ee9accb3a19abe9429bce94755/scipy-1.17.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:33af70d040e8af9d5e7a38b5ed3b772adddd281e3062ff23fec49e49681c38cf", size = 34909905, upload-time = "2026-01-10T21:30:28.704Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/5e/e565bd73991d42023eb82bb99e51c5b3d9e2c588ca9d4b3e2cc1d3ca62a6/scipy-1.17.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:f9eb55bb97d00f8b7ab95cb64f873eb0bf54d9446264d9f3609130381233483f", size = 37457743, upload-time = "2026-01-10T21:30:34.819Z" },
+    { url = "https://files.pythonhosted.org/packages/58/a8/a66a75c3d8f1fb2b83f66007d6455a06a6f6cf5618c3dc35bc9b69dd096e/scipy-1.17.0-cp314-cp314t-win_amd64.whl", hash = "sha256:1ff269abf702f6c7e67a4b7aad981d42871a11b9dd83c58d2d2ea624efbd1088", size = 37098574, upload-time = "2026-01-10T21:30:40.782Z" },
+    { url = "https://files.pythonhosted.org/packages/56/a5/df8f46ef7da168f1bc52cd86e09a9de5c6f19cc1da04454d51b7d4f43408/scipy-1.17.0-cp314-cp314t-win_arm64.whl", hash = "sha256:031121914e295d9791319a1875444d55079885bbae5bdc9c5e0f2ee5f09d34ff", size = 25246266, upload-time = "2026-01-10T21:30:45.923Z" },
 ]
 
 [[package]]
@@ -5121,15 +4962,15 @@ wheels = [
 
 [[package]]
 name = "sentry-sdk"
-version = "2.48.0"
+version = "2.49.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "certifi" },
     { name = "urllib3" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/40/f0/0e9dc590513d5e742d7799e2038df3a05167cba084c6ca4f3cdd75b55164/sentry_sdk-2.48.0.tar.gz", hash = "sha256:5213190977ff7fdff8a58b722fb807f8d5524a80488626ebeda1b5676c0c1473", size = 384828, upload-time = "2025-12-16T14:55:41.722Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/02/94/23ac26616a883f492428d9ee9ad6eee391612125326b784dbfc30e1e7bab/sentry_sdk-2.49.0.tar.gz", hash = "sha256:c1878599cde410d481c04ef50ee3aedd4f600e4d0d253f4763041e468b332c30", size = 387228, upload-time = "2026-01-08T09:56:25.642Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/4d/19/8d77f9992e5cbfcaa9133c3bf63b4fbbb051248802e1e803fed5c552fbb2/sentry_sdk-2.48.0-py2.py3-none-any.whl", hash = "sha256:6b12ac256769d41825d9b7518444e57fa35b5642df4c7c5e322af4d2c8721172", size = 414555, upload-time = "2025-12-16T14:55:40.152Z" },
+    { url = "https://files.pythonhosted.org/packages/88/43/1c586f9f413765201234541857cb82fda076f4b0f7bad4a0ec248da39cf3/sentry_sdk-2.49.0-py2.py3-none-any.whl", hash = "sha256:6ea78499133874445a20fe9c826c9e960070abeb7ae0cdf930314ab16bb97aa0", size = 415693, upload-time = "2026-01-08T09:56:21.872Z" },
 ]
 
 [[package]]
@@ -5211,7 +5052,7 @@ source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "cffi" },
     { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "numpy", version = "2.4.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.4.1", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/e1/41/9b873a8c055582859b239be17902a85339bec6a30ad162f98c9b0288a2cc/soundfile-0.13.1.tar.gz", hash = "sha256:b2c68dab1e30297317080a5b43df57e302584c49e2942defdde0acccc53f0e5b", size = 46156, upload-time = "2025-01-25T09:17:04.831Z" }
 wheels = [
@@ -5459,7 +5300,7 @@ name = "sympy"
 version = "1.14.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "mpmath" },
+    { name = "mpmath", marker = "sys_platform != 'linux'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/83/d3/803453b36afefb7c2bb238361cd4ae6125a569b4db67cd9e79846ba2d68c/sympy-1.14.0.tar.gz", hash = "sha256:d3d3fe8df1e5a0b42f0e7bdf50541697dbe7d23746e894990c030e2b05e72517", size = 7793921, upload-time = "2025-04-27T18:05:01.611Z" }
 wheels = [
@@ -5484,7 +5325,7 @@ dependencies = [
     { name = "grpcio" },
     { name = "markdown" },
     { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "numpy", version = "2.4.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.4.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "packaging" },
     { name = "pillow" },
     { name = "protobuf" },
@@ -5558,7 +5399,7 @@ resolution-markers = [
 ]
 dependencies = [
     { name = "ml-dtypes", marker = "python_full_version >= '3.11'" },
-    { name = "numpy", version = "2.4.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
+    { name = "numpy", version = "2.4.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/88/18/7b91daa9cf29dbb6bfdd603154f355c9069a9cd8c757038fe52b0f613611/tensorstore-0.1.80.tar.gz", hash = "sha256:4158fe76b96f62d12a37d7868150d836e089b5280b2bdd363c43c5d651f10e26", size = 7090032, upload-time = "2025-12-10T21:35:10.941Z" }
 wheels = [
@@ -5690,60 +5531,65 @@ wheels = [
 
 [[package]]
 name = "tomli"
-version = "2.3.0"
+version = "2.4.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/52/ed/3f73f72945444548f33eba9a87fc7a6e969915e7b1acc8260b30e1f76a2f/tomli-2.3.0.tar.gz", hash = "sha256:64be704a875d2a59753d80ee8a533c3fe183e3f06807ff7dc2232938ccb01549", size = 17392, upload-time = "2025-10-08T22:01:47.119Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/b3/2e/299f62b401438d5fe1624119c723f5d877acc86a4c2492da405626665f12/tomli-2.3.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:88bd15eb972f3664f5ed4b57c1634a97153b4bac4479dcb6a495f41921eb7f45", size = 153236, upload-time = "2025-10-08T22:01:00.137Z" },
-    { url = "https://files.pythonhosted.org/packages/86/7f/d8fffe6a7aefdb61bced88fcb5e280cfd71e08939da5894161bd71bea022/tomli-2.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:883b1c0d6398a6a9d29b508c331fa56adbcdff647f6ace4dfca0f50e90dfd0ba", size = 148084, upload-time = "2025-10-08T22:01:01.63Z" },
-    { url = "https://files.pythonhosted.org/packages/47/5c/24935fb6a2ee63e86d80e4d3b58b222dafaf438c416752c8b58537c8b89a/tomli-2.3.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d1381caf13ab9f300e30dd8feadb3de072aeb86f1d34a8569453ff32a7dea4bf", size = 234832, upload-time = "2025-10-08T22:01:02.543Z" },
-    { url = "https://files.pythonhosted.org/packages/89/da/75dfd804fc11e6612846758a23f13271b76d577e299592b4371a4ca4cd09/tomli-2.3.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a0e285d2649b78c0d9027570d4da3425bdb49830a6156121360b3f8511ea3441", size = 242052, upload-time = "2025-10-08T22:01:03.836Z" },
-    { url = "https://files.pythonhosted.org/packages/70/8c/f48ac899f7b3ca7eb13af73bacbc93aec37f9c954df3c08ad96991c8c373/tomli-2.3.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:0a154a9ae14bfcf5d8917a59b51ffd5a3ac1fd149b71b47a3a104ca4edcfa845", size = 239555, upload-time = "2025-10-08T22:01:04.834Z" },
-    { url = "https://files.pythonhosted.org/packages/ba/28/72f8afd73f1d0e7829bfc093f4cb98ce0a40ffc0cc997009ee1ed94ba705/tomli-2.3.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:74bf8464ff93e413514fefd2be591c3b0b23231a77f901db1eb30d6f712fc42c", size = 245128, upload-time = "2025-10-08T22:01:05.84Z" },
-    { url = "https://files.pythonhosted.org/packages/b6/eb/a7679c8ac85208706d27436e8d421dfa39d4c914dcf5fa8083a9305f58d9/tomli-2.3.0-cp311-cp311-win32.whl", hash = "sha256:00b5f5d95bbfc7d12f91ad8c593a1659b6387b43f054104cda404be6bda62456", size = 96445, upload-time = "2025-10-08T22:01:06.896Z" },
-    { url = "https://files.pythonhosted.org/packages/0a/fe/3d3420c4cb1ad9cb462fb52967080575f15898da97e21cb6f1361d505383/tomli-2.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:4dc4ce8483a5d429ab602f111a93a6ab1ed425eae3122032db7e9acf449451be", size = 107165, upload-time = "2025-10-08T22:01:08.107Z" },
-    { url = "https://files.pythonhosted.org/packages/ff/b7/40f36368fcabc518bb11c8f06379a0fd631985046c038aca08c6d6a43c6e/tomli-2.3.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d7d86942e56ded512a594786a5ba0a5e521d02529b3826e7761a05138341a2ac", size = 154891, upload-time = "2025-10-08T22:01:09.082Z" },
-    { url = "https://files.pythonhosted.org/packages/f9/3f/d9dd692199e3b3aab2e4e4dd948abd0f790d9ded8cd10cbaae276a898434/tomli-2.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:73ee0b47d4dad1c5e996e3cd33b8a76a50167ae5f96a2607cbe8cc773506ab22", size = 148796, upload-time = "2025-10-08T22:01:10.266Z" },
-    { url = "https://files.pythonhosted.org/packages/60/83/59bff4996c2cf9f9387a0f5a3394629c7efa5ef16142076a23a90f1955fa/tomli-2.3.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:792262b94d5d0a466afb5bc63c7daa9d75520110971ee269152083270998316f", size = 242121, upload-time = "2025-10-08T22:01:11.332Z" },
-    { url = "https://files.pythonhosted.org/packages/45/e5/7c5119ff39de8693d6baab6c0b6dcb556d192c165596e9fc231ea1052041/tomli-2.3.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4f195fe57ecceac95a66a75ac24d9d5fbc98ef0962e09b2eddec5d39375aae52", size = 250070, upload-time = "2025-10-08T22:01:12.498Z" },
-    { url = "https://files.pythonhosted.org/packages/45/12/ad5126d3a278f27e6701abde51d342aa78d06e27ce2bb596a01f7709a5a2/tomli-2.3.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e31d432427dcbf4d86958c184b9bfd1e96b5b71f8eb17e6d02531f434fd335b8", size = 245859, upload-time = "2025-10-08T22:01:13.551Z" },
-    { url = "https://files.pythonhosted.org/packages/fb/a1/4d6865da6a71c603cfe6ad0e6556c73c76548557a8d658f9e3b142df245f/tomli-2.3.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:7b0882799624980785240ab732537fcfc372601015c00f7fc367c55308c186f6", size = 250296, upload-time = "2025-10-08T22:01:14.614Z" },
-    { url = "https://files.pythonhosted.org/packages/a0/b7/a7a7042715d55c9ba6e8b196d65d2cb662578b4d8cd17d882d45322b0d78/tomli-2.3.0-cp312-cp312-win32.whl", hash = "sha256:ff72b71b5d10d22ecb084d345fc26f42b5143c5533db5e2eaba7d2d335358876", size = 97124, upload-time = "2025-10-08T22:01:15.629Z" },
-    { url = "https://files.pythonhosted.org/packages/06/1e/f22f100db15a68b520664eb3328fb0ae4e90530887928558112c8d1f4515/tomli-2.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:1cb4ed918939151a03f33d4242ccd0aa5f11b3547d0cf30f7c74a408a5b99878", size = 107698, upload-time = "2025-10-08T22:01:16.51Z" },
-    { url = "https://files.pythonhosted.org/packages/89/48/06ee6eabe4fdd9ecd48bf488f4ac783844fd777f547b8d1b61c11939974e/tomli-2.3.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5192f562738228945d7b13d4930baffda67b69425a7f0da96d360b0a3888136b", size = 154819, upload-time = "2025-10-08T22:01:17.964Z" },
-    { url = "https://files.pythonhosted.org/packages/f1/01/88793757d54d8937015c75dcdfb673c65471945f6be98e6a0410fba167ed/tomli-2.3.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:be71c93a63d738597996be9528f4abe628d1adf5e6eb11607bc8fe1a510b5dae", size = 148766, upload-time = "2025-10-08T22:01:18.959Z" },
-    { url = "https://files.pythonhosted.org/packages/42/17/5e2c956f0144b812e7e107f94f1cc54af734eb17b5191c0bbfb72de5e93e/tomli-2.3.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c4665508bcbac83a31ff8ab08f424b665200c0e1e645d2bd9ab3d3e557b6185b", size = 240771, upload-time = "2025-10-08T22:01:20.106Z" },
-    { url = "https://files.pythonhosted.org/packages/d5/f4/0fbd014909748706c01d16824eadb0307115f9562a15cbb012cd9b3512c5/tomli-2.3.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4021923f97266babc6ccab9f5068642a0095faa0a51a246a6a02fccbb3514eaf", size = 248586, upload-time = "2025-10-08T22:01:21.164Z" },
-    { url = "https://files.pythonhosted.org/packages/30/77/fed85e114bde5e81ecf9bc5da0cc69f2914b38f4708c80ae67d0c10180c5/tomli-2.3.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a4ea38c40145a357d513bffad0ed869f13c1773716cf71ccaa83b0fa0cc4e42f", size = 244792, upload-time = "2025-10-08T22:01:22.417Z" },
-    { url = "https://files.pythonhosted.org/packages/55/92/afed3d497f7c186dc71e6ee6d4fcb0acfa5f7d0a1a2878f8beae379ae0cc/tomli-2.3.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ad805ea85eda330dbad64c7ea7a4556259665bdf9d2672f5dccc740eb9d3ca05", size = 248909, upload-time = "2025-10-08T22:01:23.859Z" },
-    { url = "https://files.pythonhosted.org/packages/f8/84/ef50c51b5a9472e7265ce1ffc7f24cd4023d289e109f669bdb1553f6a7c2/tomli-2.3.0-cp313-cp313-win32.whl", hash = "sha256:97d5eec30149fd3294270e889b4234023f2c69747e555a27bd708828353ab606", size = 96946, upload-time = "2025-10-08T22:01:24.893Z" },
-    { url = "https://files.pythonhosted.org/packages/b2/b7/718cd1da0884f281f95ccfa3a6cc572d30053cba64603f79d431d3c9b61b/tomli-2.3.0-cp313-cp313-win_amd64.whl", hash = "sha256:0c95ca56fbe89e065c6ead5b593ee64b84a26fca063b5d71a1122bf26e533999", size = 107705, upload-time = "2025-10-08T22:01:26.153Z" },
-    { url = "https://files.pythonhosted.org/packages/19/94/aeafa14a52e16163008060506fcb6aa1949d13548d13752171a755c65611/tomli-2.3.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:cebc6fe843e0733ee827a282aca4999b596241195f43b4cc371d64fc6639da9e", size = 154244, upload-time = "2025-10-08T22:01:27.06Z" },
-    { url = "https://files.pythonhosted.org/packages/db/e4/1e58409aa78eefa47ccd19779fc6f36787edbe7d4cd330eeeedb33a4515b/tomli-2.3.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:4c2ef0244c75aba9355561272009d934953817c49f47d768070c3c94355c2aa3", size = 148637, upload-time = "2025-10-08T22:01:28.059Z" },
-    { url = "https://files.pythonhosted.org/packages/26/b6/d1eccb62f665e44359226811064596dd6a366ea1f985839c566cd61525ae/tomli-2.3.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c22a8bf253bacc0cf11f35ad9808b6cb75ada2631c2d97c971122583b129afbc", size = 241925, upload-time = "2025-10-08T22:01:29.066Z" },
-    { url = "https://files.pythonhosted.org/packages/70/91/7cdab9a03e6d3d2bb11beae108da5bdc1c34bdeb06e21163482544ddcc90/tomli-2.3.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0eea8cc5c5e9f89c9b90c4896a8deefc74f518db5927d0e0e8d4a80953d774d0", size = 249045, upload-time = "2025-10-08T22:01:31.98Z" },
-    { url = "https://files.pythonhosted.org/packages/15/1b/8c26874ed1f6e4f1fcfeb868db8a794cbe9f227299402db58cfcc858766c/tomli-2.3.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:b74a0e59ec5d15127acdabd75ea17726ac4c5178ae51b85bfe39c4f8a278e879", size = 245835, upload-time = "2025-10-08T22:01:32.989Z" },
-    { url = "https://files.pythonhosted.org/packages/fd/42/8e3c6a9a4b1a1360c1a2a39f0b972cef2cc9ebd56025168c4137192a9321/tomli-2.3.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:b5870b50c9db823c595983571d1296a6ff3e1b88f734a4c8f6fc6188397de005", size = 253109, upload-time = "2025-10-08T22:01:34.052Z" },
-    { url = "https://files.pythonhosted.org/packages/22/0c/b4da635000a71b5f80130937eeac12e686eefb376b8dee113b4a582bba42/tomli-2.3.0-cp314-cp314-win32.whl", hash = "sha256:feb0dacc61170ed7ab602d3d972a58f14ee3ee60494292d384649a3dc38ef463", size = 97930, upload-time = "2025-10-08T22:01:35.082Z" },
-    { url = "https://files.pythonhosted.org/packages/b9/74/cb1abc870a418ae99cd5c9547d6bce30701a954e0e721821df483ef7223c/tomli-2.3.0-cp314-cp314-win_amd64.whl", hash = "sha256:b273fcbd7fc64dc3600c098e39136522650c49bca95df2d11cf3b626422392c8", size = 107964, upload-time = "2025-10-08T22:01:36.057Z" },
-    { url = "https://files.pythonhosted.org/packages/54/78/5c46fff6432a712af9f792944f4fcd7067d8823157949f4e40c56b8b3c83/tomli-2.3.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:940d56ee0410fa17ee1f12b817b37a4d4e4dc4d27340863cc67236c74f582e77", size = 163065, upload-time = "2025-10-08T22:01:37.27Z" },
-    { url = "https://files.pythonhosted.org/packages/39/67/f85d9bd23182f45eca8939cd2bc7050e1f90c41f4a2ecbbd5963a1d1c486/tomli-2.3.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:f85209946d1fe94416debbb88d00eb92ce9cd5266775424ff81bc959e001acaf", size = 159088, upload-time = "2025-10-08T22:01:38.235Z" },
-    { url = "https://files.pythonhosted.org/packages/26/5a/4b546a0405b9cc0659b399f12b6adb750757baf04250b148d3c5059fc4eb/tomli-2.3.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a56212bdcce682e56b0aaf79e869ba5d15a6163f88d5451cbde388d48b13f530", size = 268193, upload-time = "2025-10-08T22:01:39.712Z" },
-    { url = "https://files.pythonhosted.org/packages/42/4f/2c12a72ae22cf7b59a7fe75b3465b7aba40ea9145d026ba41cb382075b0e/tomli-2.3.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c5f3ffd1e098dfc032d4d3af5c0ac64f6d286d98bc148698356847b80fa4de1b", size = 275488, upload-time = "2025-10-08T22:01:40.773Z" },
-    { url = "https://files.pythonhosted.org/packages/92/04/a038d65dbe160c3aa5a624e93ad98111090f6804027d474ba9c37c8ae186/tomli-2.3.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:5e01decd096b1530d97d5d85cb4dff4af2d8347bd35686654a004f8dea20fc67", size = 272669, upload-time = "2025-10-08T22:01:41.824Z" },
-    { url = "https://files.pythonhosted.org/packages/be/2f/8b7c60a9d1612a7cbc39ffcca4f21a73bf368a80fc25bccf8253e2563267/tomli-2.3.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:8a35dd0e643bb2610f156cca8db95d213a90015c11fee76c946aa62b7ae7e02f", size = 279709, upload-time = "2025-10-08T22:01:43.177Z" },
-    { url = "https://files.pythonhosted.org/packages/7e/46/cc36c679f09f27ded940281c38607716c86cf8ba4a518d524e349c8b4874/tomli-2.3.0-cp314-cp314t-win32.whl", hash = "sha256:a1f7f282fe248311650081faafa5f4732bdbfef5d45fe3f2e702fbc6f2d496e0", size = 107563, upload-time = "2025-10-08T22:01:44.233Z" },
-    { url = "https://files.pythonhosted.org/packages/84/ff/426ca8683cf7b753614480484f6437f568fd2fda2edbdf57a2d3d8b27a0b/tomli-2.3.0-cp314-cp314t-win_amd64.whl", hash = "sha256:70a251f8d4ba2d9ac2542eecf008b3c8a9fc5c3f9f02c56a9d7952612be2fdba", size = 119756, upload-time = "2025-10-08T22:01:45.234Z" },
-    { url = "https://files.pythonhosted.org/packages/77/b8/0135fadc89e73be292b473cb820b4f5a08197779206b33191e801feeae40/tomli-2.3.0-py3-none-any.whl", hash = "sha256:e95b1af3c5b07d9e643909b5abbec77cd9f1217e6d0bca72b0234736b9fb1f1b", size = 14408, upload-time = "2025-10-08T22:01:46.04Z" },
+sdist = { url = "https://files.pythonhosted.org/packages/82/30/31573e9457673ab10aa432461bee537ce6cef177667deca369efb79df071/tomli-2.4.0.tar.gz", hash = "sha256:aa89c3f6c277dd275d8e243ad24f3b5e701491a860d5121f2cdd399fbb31fc9c", size = 17477, upload-time = "2026-01-11T11:22:38.165Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/3c/d9/3dc2289e1f3b32eb19b9785b6a006b28ee99acb37d1d47f78d4c10e28bf8/tomli-2.4.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b5ef256a3fd497d4973c11bf142e9ed78b150d36f5773f1ca6088c230ffc5867", size = 153663, upload-time = "2026-01-11T11:21:45.27Z" },
+    { url = "https://files.pythonhosted.org/packages/51/32/ef9f6845e6b9ca392cd3f64f9ec185cc6f09f0a2df3db08cbe8809d1d435/tomli-2.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5572e41282d5268eb09a697c89a7bee84fae66511f87533a6f88bd2f7b652da9", size = 148469, upload-time = "2026-01-11T11:21:46.873Z" },
+    { url = "https://files.pythonhosted.org/packages/d6/c2/506e44cce89a8b1b1e047d64bd495c22c9f71f21e05f380f1a950dd9c217/tomli-2.4.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:551e321c6ba03b55676970b47cb1b73f14a0a4dce6a3e1a9458fd6d921d72e95", size = 236039, upload-time = "2026-01-11T11:21:48.503Z" },
+    { url = "https://files.pythonhosted.org/packages/b3/40/e1b65986dbc861b7e986e8ec394598187fa8aee85b1650b01dd925ca0be8/tomli-2.4.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5e3f639a7a8f10069d0e15408c0b96a2a828cfdec6fca05296ebcdcc28ca7c76", size = 243007, upload-time = "2026-01-11T11:21:49.456Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/6f/6e39ce66b58a5b7ae572a0f4352ff40c71e8573633deda43f6a379d56b3e/tomli-2.4.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1b168f2731796b045128c45982d3a4874057626da0e2ef1fdd722848b741361d", size = 240875, upload-time = "2026-01-11T11:21:50.755Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/ad/cb089cb190487caa80204d503c7fd0f4d443f90b95cf4ef5cf5aa0f439b0/tomli-2.4.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:133e93646ec4300d651839d382d63edff11d8978be23da4cc106f5a18b7d0576", size = 246271, upload-time = "2026-01-11T11:21:51.81Z" },
+    { url = "https://files.pythonhosted.org/packages/0b/63/69125220e47fd7a3a27fd0de0c6398c89432fec41bc739823bcc66506af6/tomli-2.4.0-cp311-cp311-win32.whl", hash = "sha256:b6c78bdf37764092d369722d9946cb65b8767bfa4110f902a1b2542d8d173c8a", size = 96770, upload-time = "2026-01-11T11:21:52.647Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/0d/a22bb6c83f83386b0008425a6cd1fa1c14b5f3dd4bad05e98cf3dbbf4a64/tomli-2.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:d3d1654e11d724760cdb37a3d7691f0be9db5fbdaef59c9f532aabf87006dbaa", size = 107626, upload-time = "2026-01-11T11:21:53.459Z" },
+    { url = "https://files.pythonhosted.org/packages/2f/6d/77be674a3485e75cacbf2ddba2b146911477bd887dda9d8c9dfb2f15e871/tomli-2.4.0-cp311-cp311-win_arm64.whl", hash = "sha256:cae9c19ed12d4e8f3ebf46d1a75090e4c0dc16271c5bce1c833ac168f08fb614", size = 94842, upload-time = "2026-01-11T11:21:54.831Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/43/7389a1869f2f26dba52404e1ef13b4784b6b37dac93bac53457e3ff24ca3/tomli-2.4.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:920b1de295e72887bafa3ad9f7a792f811847d57ea6b1215154030cf131f16b1", size = 154894, upload-time = "2026-01-11T11:21:56.07Z" },
+    { url = "https://files.pythonhosted.org/packages/e9/05/2f9bf110b5294132b2edf13fe6ca6ae456204f3d749f623307cbb7a946f2/tomli-2.4.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7d6d9a4aee98fac3eab4952ad1d73aee87359452d1c086b5ceb43ed02ddb16b8", size = 149053, upload-time = "2026-01-11T11:21:57.467Z" },
+    { url = "https://files.pythonhosted.org/packages/e8/41/1eda3ca1abc6f6154a8db4d714a4d35c4ad90adc0bcf700657291593fbf3/tomli-2.4.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:36b9d05b51e65b254ea6c2585b59d2c4cb91c8a3d91d0ed0f17591a29aaea54a", size = 243481, upload-time = "2026-01-11T11:21:58.661Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/6d/02ff5ab6c8868b41e7d4b987ce2b5f6a51d3335a70aa144edd999e055a01/tomli-2.4.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1c8a885b370751837c029ef9bc014f27d80840e48bac415f3412e6593bbc18c1", size = 251720, upload-time = "2026-01-11T11:22:00.178Z" },
+    { url = "https://files.pythonhosted.org/packages/7b/57/0405c59a909c45d5b6f146107c6d997825aa87568b042042f7a9c0afed34/tomli-2.4.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8768715ffc41f0008abe25d808c20c3d990f42b6e2e58305d5da280ae7d1fa3b", size = 247014, upload-time = "2026-01-11T11:22:01.238Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/0e/2e37568edd944b4165735687cbaf2fe3648129e440c26d02223672ee0630/tomli-2.4.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:7b438885858efd5be02a9a133caf5812b8776ee0c969fea02c45e8e3f296ba51", size = 251820, upload-time = "2026-01-11T11:22:02.727Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/1c/ee3b707fdac82aeeb92d1a113f803cf6d0f37bdca0849cb489553e1f417a/tomli-2.4.0-cp312-cp312-win32.whl", hash = "sha256:0408e3de5ec77cc7f81960c362543cbbd91ef883e3138e81b729fc3eea5b9729", size = 97712, upload-time = "2026-01-11T11:22:03.777Z" },
+    { url = "https://files.pythonhosted.org/packages/69/13/c07a9177d0b3bab7913299b9278845fc6eaaca14a02667c6be0b0a2270c8/tomli-2.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:685306e2cc7da35be4ee914fd34ab801a6acacb061b6a7abca922aaf9ad368da", size = 108296, upload-time = "2026-01-11T11:22:04.86Z" },
+    { url = "https://files.pythonhosted.org/packages/18/27/e267a60bbeeee343bcc279bb9e8fbed0cbe224bc7b2a3dc2975f22809a09/tomli-2.4.0-cp312-cp312-win_arm64.whl", hash = "sha256:5aa48d7c2356055feef06a43611fc401a07337d5b006be13a30f6c58f869e3c3", size = 94553, upload-time = "2026-01-11T11:22:05.854Z" },
+    { url = "https://files.pythonhosted.org/packages/34/91/7f65f9809f2936e1f4ce6268ae1903074563603b2a2bd969ebbda802744f/tomli-2.4.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:84d081fbc252d1b6a982e1870660e7330fb8f90f676f6e78b052ad4e64714bf0", size = 154915, upload-time = "2026-01-11T11:22:06.703Z" },
+    { url = "https://files.pythonhosted.org/packages/20/aa/64dd73a5a849c2e8f216b755599c511badde80e91e9bc2271baa7b2cdbb1/tomli-2.4.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:9a08144fa4cba33db5255f9b74f0b89888622109bd2776148f2597447f92a94e", size = 149038, upload-time = "2026-01-11T11:22:07.56Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/8a/6d38870bd3d52c8d1505ce054469a73f73a0fe62c0eaf5dddf61447e32fa/tomli-2.4.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c73add4bb52a206fd0c0723432db123c0c75c280cbd67174dd9d2db228ebb1b4", size = 242245, upload-time = "2026-01-11T11:22:08.344Z" },
+    { url = "https://files.pythonhosted.org/packages/59/bb/8002fadefb64ab2669e5b977df3f5e444febea60e717e755b38bb7c41029/tomli-2.4.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1fb2945cbe303b1419e2706e711b7113da57b7db31ee378d08712d678a34e51e", size = 250335, upload-time = "2026-01-11T11:22:09.951Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/3d/4cdb6f791682b2ea916af2de96121b3cb1284d7c203d97d92d6003e91c8d/tomli-2.4.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:bbb1b10aa643d973366dc2cb1ad94f99c1726a02343d43cbc011edbfac579e7c", size = 245962, upload-time = "2026-01-11T11:22:11.27Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/4a/5f25789f9a460bd858ba9756ff52d0830d825b458e13f754952dd15fb7bb/tomli-2.4.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4cbcb367d44a1f0c2be408758b43e1ffb5308abe0ea222897d6bfc8e8281ef2f", size = 250396, upload-time = "2026-01-11T11:22:12.325Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/2f/b73a36fea58dfa08e8b3a268750e6853a6aac2a349241a905ebd86f3047a/tomli-2.4.0-cp313-cp313-win32.whl", hash = "sha256:7d49c66a7d5e56ac959cb6fc583aff0651094ec071ba9ad43df785abc2320d86", size = 97530, upload-time = "2026-01-11T11:22:13.865Z" },
+    { url = "https://files.pythonhosted.org/packages/3b/af/ca18c134b5d75de7e8dc551c5234eaba2e8e951f6b30139599b53de9c187/tomli-2.4.0-cp313-cp313-win_amd64.whl", hash = "sha256:3cf226acb51d8f1c394c1b310e0e0e61fecdd7adcb78d01e294ac297dd2e7f87", size = 108227, upload-time = "2026-01-11T11:22:15.224Z" },
+    { url = "https://files.pythonhosted.org/packages/22/c3/b386b832f209fee8073c8138ec50f27b4460db2fdae9ffe022df89a57f9b/tomli-2.4.0-cp313-cp313-win_arm64.whl", hash = "sha256:d20b797a5c1ad80c516e41bc1fb0443ddb5006e9aaa7bda2d71978346aeb9132", size = 94748, upload-time = "2026-01-11T11:22:16.009Z" },
+    { url = "https://files.pythonhosted.org/packages/f3/c4/84047a97eb1004418bc10bdbcfebda209fca6338002eba2dc27cc6d13563/tomli-2.4.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:26ab906a1eb794cd4e103691daa23d95c6919cc2fa9160000ac02370cc9dd3f6", size = 154725, upload-time = "2026-01-11T11:22:17.269Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/5d/d39038e646060b9d76274078cddf146ced86dc2b9e8bbf737ad5983609a0/tomli-2.4.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:20cedb4ee43278bc4f2fee6cb50daec836959aadaf948db5172e776dd3d993fc", size = 148901, upload-time = "2026-01-11T11:22:18.287Z" },
+    { url = "https://files.pythonhosted.org/packages/73/e5/383be1724cb30f4ce44983d249645684a48c435e1cd4f8b5cded8a816d3c/tomli-2.4.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:39b0b5d1b6dd03684b3fb276407ebed7090bbec989fa55838c98560c01113b66", size = 243375, upload-time = "2026-01-11T11:22:19.154Z" },
+    { url = "https://files.pythonhosted.org/packages/31/f0/bea80c17971c8d16d3cc109dc3585b0f2ce1036b5f4a8a183789023574f2/tomli-2.4.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a26d7ff68dfdb9f87a016ecfd1e1c2bacbe3108f4e0f8bcd2228ef9a766c787d", size = 250639, upload-time = "2026-01-11T11:22:20.168Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/8f/2853c36abbb7608e3f945d8a74e32ed3a74ee3a1f468f1ffc7d1cb3abba6/tomli-2.4.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:20ffd184fb1df76a66e34bd1b36b4a4641bd2b82954befa32fe8163e79f1a702", size = 246897, upload-time = "2026-01-11T11:22:21.544Z" },
+    { url = "https://files.pythonhosted.org/packages/49/f0/6c05e3196ed5337b9fe7ea003e95fd3819a840b7a0f2bf5a408ef1dad8ed/tomli-2.4.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:75c2f8bbddf170e8effc98f5e9084a8751f8174ea6ccf4fca5398436e0320bc8", size = 254697, upload-time = "2026-01-11T11:22:23.058Z" },
+    { url = "https://files.pythonhosted.org/packages/f3/f5/2922ef29c9f2951883525def7429967fc4d8208494e5ab524234f06b688b/tomli-2.4.0-cp314-cp314-win32.whl", hash = "sha256:31d556d079d72db7c584c0627ff3a24c5d3fb4f730221d3444f3efb1b2514776", size = 98567, upload-time = "2026-01-11T11:22:24.033Z" },
+    { url = "https://files.pythonhosted.org/packages/7b/31/22b52e2e06dd2a5fdbc3ee73226d763b184ff21fc24e20316a44ccc4d96b/tomli-2.4.0-cp314-cp314-win_amd64.whl", hash = "sha256:43e685b9b2341681907759cf3a04e14d7104b3580f808cfde1dfdb60ada85475", size = 108556, upload-time = "2026-01-11T11:22:25.378Z" },
+    { url = "https://files.pythonhosted.org/packages/48/3d/5058dff3255a3d01b705413f64f4306a141a8fd7a251e5a495e3f192a998/tomli-2.4.0-cp314-cp314-win_arm64.whl", hash = "sha256:3d895d56bd3f82ddd6faaff993c275efc2ff38e52322ea264122d72729dca2b2", size = 96014, upload-time = "2026-01-11T11:22:26.138Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/4e/75dab8586e268424202d3a1997ef6014919c941b50642a1682df43204c22/tomli-2.4.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:5b5807f3999fb66776dbce568cc9a828544244a8eb84b84b9bafc080c99597b9", size = 163339, upload-time = "2026-01-11T11:22:27.143Z" },
+    { url = "https://files.pythonhosted.org/packages/06/e3/b904d9ab1016829a776d97f163f183a48be6a4deb87304d1e0116a349519/tomli-2.4.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c084ad935abe686bd9c898e62a02a19abfc9760b5a79bc29644463eaf2840cb0", size = 159490, upload-time = "2026-01-11T11:22:28.399Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/5a/fc3622c8b1ad823e8ea98a35e3c632ee316d48f66f80f9708ceb4f2a0322/tomli-2.4.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0f2e3955efea4d1cfbcb87bc321e00dc08d2bcb737fd1d5e398af111d86db5df", size = 269398, upload-time = "2026-01-11T11:22:29.345Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/33/62bd6152c8bdd4c305ad9faca48f51d3acb2df1f8791b1477d46ff86e7f8/tomli-2.4.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0e0fe8a0b8312acf3a88077a0802565cb09ee34107813bba1c7cd591fa6cfc8d", size = 276515, upload-time = "2026-01-11T11:22:30.327Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/ff/ae53619499f5235ee4211e62a8d7982ba9e439a0fb4f2f351a93d67c1dd2/tomli-2.4.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:413540dce94673591859c4c6f794dfeaa845e98bf35d72ed59636f869ef9f86f", size = 273806, upload-time = "2026-01-11T11:22:32.56Z" },
+    { url = "https://files.pythonhosted.org/packages/47/71/cbca7787fa68d4d0a9f7072821980b39fbb1b6faeb5f5cf02f4a5559fa28/tomli-2.4.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:0dc56fef0e2c1c470aeac5b6ca8cc7b640bb93e92d9803ddaf9ea03e198f5b0b", size = 281340, upload-time = "2026-01-11T11:22:33.505Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/00/d595c120963ad42474cf6ee7771ad0d0e8a49d0f01e29576ee9195d9ecdf/tomli-2.4.0-cp314-cp314t-win32.whl", hash = "sha256:d878f2a6707cc9d53a1be1414bbb419e629c3d6e67f69230217bb663e76b5087", size = 108106, upload-time = "2026-01-11T11:22:34.451Z" },
+    { url = "https://files.pythonhosted.org/packages/de/69/9aa0c6a505c2f80e519b43764f8b4ba93b5a0bbd2d9a9de6e2b24271b9a5/tomli-2.4.0-cp314-cp314t-win_amd64.whl", hash = "sha256:2add28aacc7425117ff6364fe9e06a183bb0251b03f986df0e78e974047571fd", size = 120504, upload-time = "2026-01-11T11:22:35.764Z" },
+    { url = "https://files.pythonhosted.org/packages/b3/9f/f1668c281c58cfae01482f7114a4b88d345e4c140386241a1a24dcc9e7bc/tomli-2.4.0-cp314-cp314t-win_arm64.whl", hash = "sha256:2b1e3b80e1d5e52e40e9b924ec43d81570f0e7d09d11081b797bc4692765a3d4", size = 99561, upload-time = "2026-01-11T11:22:36.624Z" },
+    { url = "https://files.pythonhosted.org/packages/23/d1/136eb2cb77520a31e1f64cbae9d33ec6df0d78bdf4160398e86eec8a8754/tomli-2.4.0-py3-none-any.whl", hash = "sha256:1f776e7d669ebceb01dee46484485f43a4048746235e683bcdffacdf1fb4785a", size = 14477, upload-time = "2026-01-11T11:22:37.446Z" },
 ]
 
 [[package]]
 name = "tomlkit"
-version = "0.13.3"
+version = "0.14.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/cc/18/0bbf3884e9eaa38819ebe46a7bd25dcd56b67434402b66a58c4b8e552575/tomlkit-0.13.3.tar.gz", hash = "sha256:430cf247ee57df2b94ee3fbe588e71d362a941ebb545dec29b53961d61add2a1", size = 185207, upload-time = "2025-06-05T07:13:44.947Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/c3/af/14b24e41977adb296d6bd1fb59402cf7d60ce364f90c890bd2ec65c43b5a/tomlkit-0.14.0.tar.gz", hash = "sha256:cf00efca415dbd57575befb1f6634c4f42d2d87dbba376128adb42c121b87064", size = 187167, upload-time = "2026-01-13T01:14:53.304Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/bd/75/8539d011f6be8e29f339c42e633aae3cb73bffa95dd0f9adec09b9c58e85/tomlkit-0.13.3-py3-none-any.whl", hash = "sha256:c89c649d79ee40629a9fda55f8ace8c6a1b42deb912b2a8fd8d942ddadb606b0", size = 38901, upload-time = "2025-06-05T07:13:43.546Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/11/87d6d29fb5d237229d67973a6c9e06e048f01cf4994dee194ab0ea841814/tomlkit-0.14.0-py3-none-any.whl", hash = "sha256:592064ed85b40fa213469f81ac584f67a4f2992509a7c3ea2d632208623a3680", size = 39310, upload-time = "2026-01-13T01:14:51.965Z" },
 ]
 
 [[package]]
@@ -5751,30 +5597,15 @@ name = "torch"
 version = "2.9.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "filelock" },
-    { name = "fsspec" },
-    { name = "jinja2" },
-    { name = "networkx", version = "3.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "networkx", version = "3.6.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "nvidia-cublas-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-cuda-cupti-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-cuda-nvrtc-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-cuda-runtime-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-cudnn-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-cufft-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-cufile-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-curand-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-cusolver-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-cusparse-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-cusparselt-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-nccl-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-nvjitlink-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-nvshmem-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-nvtx-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "setuptools", marker = "python_full_version >= '3.12'" },
-    { name = "sympy" },
+    { name = "filelock", marker = "sys_platform != 'linux'" },
+    { name = "fsspec", marker = "sys_platform != 'linux'" },
+    { name = "jinja2", marker = "sys_platform != 'linux'" },
+    { name = "networkx", version = "3.4.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and sys_platform != 'linux') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "networkx", version = "3.6.1", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and sys_platform != 'linux') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "setuptools", marker = "python_full_version >= '3.12' and sys_platform != 'linux'" },
+    { name = "sympy", marker = "sys_platform != 'linux'" },
     { name = "triton", marker = "sys_platform == 'never'" },
-    { name = "typing-extensions" },
+    { name = "typing-extensions", marker = "sys_platform != 'linux'" },
 ]
 wheels = [
     { url = "https://files.pythonhosted.org/packages/5f/56/9577683b23072075ed2e40d725c52c2019d71a972fab8e083763da8e707e/torch-2.9.1-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:1cc208435f6c379f9b8fdfd5ceb5be1e3b72a6bdf1cb46c0d2812aa73472db9e", size = 104207681, upload-time = "2025-11-12T15:19:56.48Z" },
@@ -5813,7 +5644,7 @@ version = "0.0.4"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "numpy", version = "2.4.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.4.1", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "torch", marker = "sys_platform == 'never'" },
     { name = "torchvision", marker = "sys_platform == 'never'" },
 ]
@@ -5828,7 +5659,7 @@ version = "0.24.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "numpy", version = "2.4.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.4.1", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "pillow", marker = "sys_platform != 'linux'" },
     { name = "torch", marker = "sys_platform == 'never'" },
 ]
@@ -5911,13 +5742,13 @@ dependencies = [
 
 [[package]]
 name = "transformers"
-version = "4.57.3"
+version = "4.57.5"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "filelock" },
     { name = "huggingface-hub" },
     { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "numpy", version = "2.4.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.4.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "packaging" },
     { name = "pyyaml" },
     { name = "regex" },
@@ -5926,9 +5757,9 @@ dependencies = [
     { name = "tokenizers" },
     { name = "tqdm" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/dd/70/d42a739e8dfde3d92bb2fff5819cbf331fe9657323221e79415cd5eb65ee/transformers-4.57.3.tar.gz", hash = "sha256:df4945029aaddd7c09eec5cad851f30662f8bd1746721b34cc031d70c65afebc", size = 10139680, upload-time = "2025-11-25T15:51:30.139Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/35/3a/7c90ee739871495f1a5cb9bdb074b42fe69357d7ccc1a8818af858d8e63b/transformers-4.57.5.tar.gz", hash = "sha256:d631faea6bd32fc51962e482744afeaa70170c70e5e991cf8e355d7275631524", size = 10138171, upload-time = "2026-01-13T13:28:24.19Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/6a/6b/2f416568b3c4c91c96e5a365d164f8a4a4a88030aa8ab4644181fdadce97/transformers-4.57.3-py3-none-any.whl", hash = "sha256:c77d353a4851b1880191603d36acb313411d3577f6e2897814f333841f7003f4", size = 11993463, upload-time = "2025-11-25T15:51:26.493Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/de/4f95d22d9764659d2bd35065f383f3fe099699a9e6e89fa4728dbcd7244a/transformers-4.57.5-py3-none-any.whl", hash = "sha256:5a1e0deb989cd0b8f141b6d8c9b7c956fc029cd288d68844f57dc0acbaf2fe39", size = 11993481, upload-time = "2026-01-13T13:28:16.542Z" },
 ]
 
 [[package]]
@@ -5954,11 +5785,11 @@ wheels = [
 
 [[package]]
 name = "trove-classifiers"
-version = "2025.12.1.14"
+version = "2026.1.12.15"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/80/e1/000add3b3e0725ce7ee0ea6ea4543f1e1d9519742f3b2320de41eeefa7c7/trove_classifiers-2025.12.1.14.tar.gz", hash = "sha256:a74f0400524fc83620a9be74a07074b5cbe7594fd4d97fd4c2bfde625fdc1633", size = 16985, upload-time = "2025-12-01T14:47:11.456Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/4c/3b/600fa0e35b353a66d1134a233d67feee4d934b7878aef10a21f39b17c6ab/trove_classifiers-2026.1.12.15.tar.gz", hash = "sha256:832a7e89ccc43b64b89f8f9d9150c069ebcd17d2dc68279bc00bb53f2a9ae112", size = 16978, upload-time = "2026-01-12T15:15:10.479Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/4f/7e/bc19996fa86cad8801e8ffe6f1bba5836ca0160df76d0410d27432193712/trove_classifiers-2025.12.1.14-py3-none-any.whl", hash = "sha256:a8206978ede95937b9959c3aff3eb258bbf7b07dff391ddd4ea7e61f316635ab", size = 14184, upload-time = "2025-12-01T14:47:10.113Z" },
+    { url = "https://files.pythonhosted.org/packages/7e/c3/4b74a82b58dbfb0a4dafa8149fc60f20ce5228be5d133ca0c3f2e90f6e7d/trove_classifiers-2026.1.12.15-py3-none-any.whl", hash = "sha256:8832dfbc226fc4df986666b9cb3a018818b1498aeb79f5f66a31a918b47a98f1", size = 14192, upload-time = "2026-01-12T15:15:09.413Z" },
 ]
 
 [[package]]
@@ -6044,7 +5875,7 @@ wheels = [
 
 [[package]]
 name = "wandb"
-version = "0.23.1"
+version = "0.24.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "click" },
@@ -6058,17 +5889,17 @@ dependencies = [
     { name = "sentry-sdk" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/0a/cc/770ae3aa7ae44f6792f7ecb81c14c0e38b672deb35235719bb1006519487/wandb-0.23.1.tar.gz", hash = "sha256:f6fb1e3717949b29675a69359de0eeb01e67d3360d581947d5b3f98c273567d6", size = 44298053, upload-time = "2025-12-03T02:25:10.79Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/27/7e/aad6e943012ea4d88f3a037f1a5a7c6898263c60fbef8c9cdb95a8ff9fd9/wandb-0.24.0.tar.gz", hash = "sha256:4715a243b3d460b6434b9562e935dfd9dfdf5d6e428cfb4c3e7ce4fd44460ab3", size = 44197947, upload-time = "2026-01-13T22:59:59.767Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/12/0b/c3d7053dfd93fd259a63c7818d9c4ac2ba0642ff8dc8db98662ea0cf9cc0/wandb-0.23.1-py3-none-macosx_12_0_arm64.whl", hash = "sha256:358e15471d19b7d73fc464e37371c19d44d39e433252ac24df107aff993a286b", size = 21527293, upload-time = "2025-12-03T02:24:48.011Z" },
-    { url = "https://files.pythonhosted.org/packages/ee/9f/059420fa0cb6c511dc5c5a50184122b6aca7b178cb2aa210139e354020da/wandb-0.23.1-py3-none-macosx_12_0_x86_64.whl", hash = "sha256:110304407f4b38f163bdd50ed5c5225365e4df3092f13089c30171a75257b575", size = 22745926, upload-time = "2025-12-03T02:24:50.519Z" },
-    { url = "https://files.pythonhosted.org/packages/96/b6/fd465827c14c64d056d30b4c9fcf4dac889a6969dba64489a88fc4ffa333/wandb-0.23.1-py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:6cc984cf85feb2f8ee0451d76bc9fb7f39da94956bb8183e30d26284cf203b65", size = 21212973, upload-time = "2025-12-03T02:24:52.828Z" },
-    { url = "https://files.pythonhosted.org/packages/5c/ee/9a8bb9a39cc1f09c3060456cc79565110226dc4099a719af5c63432da21d/wandb-0.23.1-py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:67431cd3168d79fdb803e503bd669c577872ffd5dadfa86de733b3274b93088e", size = 22887885, upload-time = "2025-12-03T02:24:55.281Z" },
-    { url = "https://files.pythonhosted.org/packages/6d/4d/8d9e75add529142e037b05819cb3ab1005679272950128d69d218b7e5b2e/wandb-0.23.1-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:07be70c0baa97ea25fadc4a9d0097f7371eef6dcacc5ceb525c82491a31e9244", size = 21250967, upload-time = "2025-12-03T02:24:57.603Z" },
-    { url = "https://files.pythonhosted.org/packages/97/72/0b35cddc4e4168f03c759b96d9f671ad18aec8bdfdd84adfea7ecb3f5701/wandb-0.23.1-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:216c95b08e0a2ec6a6008373b056d597573d565e30b43a7a93c35a171485ee26", size = 22988382, upload-time = "2025-12-03T02:25:00.518Z" },
-    { url = "https://files.pythonhosted.org/packages/c0/6d/e78093d49d68afb26f5261a70fc7877c34c114af5c2ee0ab3b1af85f5e76/wandb-0.23.1-py3-none-win32.whl", hash = "sha256:fb5cf0f85692f758a5c36ab65fea96a1284126de64e836610f92ddbb26df5ded", size = 22150756, upload-time = "2025-12-03T02:25:02.734Z" },
-    { url = "https://files.pythonhosted.org/packages/05/27/4f13454b44c9eceaac3d6e4e4efa2230b6712d613ff9bf7df010eef4fd18/wandb-0.23.1-py3-none-win_amd64.whl", hash = "sha256:21c8c56e436eb707b7d54f705652e030d48e5cfcba24cf953823eb652e30e714", size = 22150760, upload-time = "2025-12-03T02:25:05.106Z" },
-    { url = "https://files.pythonhosted.org/packages/30/20/6c091d451e2a07689bfbfaeb7592d488011420e721de170884fedd68c644/wandb-0.23.1-py3-none-win_arm64.whl", hash = "sha256:8aee7f3bb573f2c0acf860f497ca9c684f9b35f2ca51011ba65af3d4592b77c1", size = 20137463, upload-time = "2025-12-03T02:25:08.317Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/8a/efec186dcc5dcf3c806040e3f33e58997878b2d30b87aa02b26f046858b6/wandb-0.24.0-py3-none-macosx_12_0_arm64.whl", hash = "sha256:aa9777398ff4b0f04c41359f7d1b95b5d656cb12c37c63903666799212e50299", size = 21464901, upload-time = "2026-01-13T22:59:31.86Z" },
+    { url = "https://files.pythonhosted.org/packages/ed/84/fadf0d5f1d86c3ba662d2b33a15d2b1f08ff1e4e196c77e455f028b0fda2/wandb-0.24.0-py3-none-macosx_12_0_x86_64.whl", hash = "sha256:0423fbd58c3926949724feae8aab89d20c68846f9f4f596b80f9ffe1fc298130", size = 22697817, upload-time = "2026-01-13T22:59:35.267Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/5f/e3124e68d02b30c62856175ce714e07904730be06eecb00f66bb1a59aacf/wandb-0.24.0-py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:2b25fc0c123daac97ed32912ac55642c65013cc6e3a898e88ca2d917fc8eadc0", size = 21118798, upload-time = "2026-01-13T22:59:38.453Z" },
+    { url = "https://files.pythonhosted.org/packages/22/a1/8d68a914c030e897c306c876d47c73aa5d9ca72be608971290d3a5749570/wandb-0.24.0-py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:9485344b4667944b5b77294185bae8469cfa4074869bec0e74f54f8492234cc2", size = 22849954, upload-time = "2026-01-13T22:59:41.265Z" },
+    { url = "https://files.pythonhosted.org/packages/e9/f8/3e68841a4282a4fb6a8935534e6064acc6c9708e8fb76953ec73bbc72a5e/wandb-0.24.0-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:51b2b9a9d7d6b35640f12a46a48814fd4516807ad44f586b819ed6560f8de1fd", size = 21160339, upload-time = "2026-01-13T22:59:43.967Z" },
+    { url = "https://files.pythonhosted.org/packages/16/e5/d851868ce5b4b437a7cc90405979cd83809790e4e2a2f1e454f63f116e52/wandb-0.24.0-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:11f7e7841f31eff82c82a677988889ad3aa684c6de61ff82145333b5214ec860", size = 22936978, upload-time = "2026-01-13T22:59:46.911Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/34/43b7f18870051047ce6fe18e7eb24ba7ebdc71663a8f1c58e31e855eb8ac/wandb-0.24.0-py3-none-win32.whl", hash = "sha256:42af348998b00d4309ae790c5374040ac6cc353ab21567f4e29c98c9376dee8e", size = 22118243, upload-time = "2026-01-13T22:59:49.555Z" },
+    { url = "https://files.pythonhosted.org/packages/a1/92/909c81173cf1399111f57f9ca5399a8f165607b024e406e080178c878f70/wandb-0.24.0-py3-none-win_amd64.whl", hash = "sha256:32604eddcd362e1ed4a2e2ce5f3a239369c4a193af223f3e66603481ac91f336", size = 22118246, upload-time = "2026-01-13T22:59:52.126Z" },
+    { url = "https://files.pythonhosted.org/packages/87/85/a845aefd9c2285f98261fa6ffa0a14466366c1ac106d35bc84b654c0ad7f/wandb-0.24.0-py3-none-win_arm64.whl", hash = "sha256:e0f2367552abfca21b0f3a03405fbf48f1e14de9846e70f73c6af5da57afd8ef", size = 20077678, upload-time = "2026-01-13T22:59:56.112Z" },
 ]
 
 [[package]]
@@ -6202,7 +6033,7 @@ source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "braceexpand" },
     { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "numpy", version = "2.4.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.4.1", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "pyyaml" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/5a/3a/68800d92e065cf4750ebecf973b13979c0c929b439e1293012938862038d/webdataset-1.0.2.tar.gz", hash = "sha256:7f0498be827cfa46cc5430a58768a24e2c6a410676a61be1838f53d61afdaab4", size = 80090, upload-time = "2025-06-19T23:26:21.945Z" }
@@ -6212,73 +6043,82 @@ wheels = [
 
 [[package]]
 name = "websockets"
-version = "15.0.1"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/21/e6/26d09fab466b7ca9c7737474c52be4f76a40301b08362eb2dbc19dcc16c1/websockets-15.0.1.tar.gz", hash = "sha256:82544de02076bafba038ce055ee6412d68da13ab47f0c60cab827346de828dee", size = 177016, upload-time = "2025-03-05T20:03:41.606Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/1e/da/6462a9f510c0c49837bbc9345aca92d767a56c1fb2939e1579df1e1cdcf7/websockets-15.0.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:d63efaa0cd96cf0c5fe4d581521d9fa87744540d4bc999ae6e08595a1014b45b", size = 175423, upload-time = "2025-03-05T20:01:35.363Z" },
-    { url = "https://files.pythonhosted.org/packages/1c/9f/9d11c1a4eb046a9e106483b9ff69bce7ac880443f00e5ce64261b47b07e7/websockets-15.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ac60e3b188ec7574cb761b08d50fcedf9d77f1530352db4eef1707fe9dee7205", size = 173080, upload-time = "2025-03-05T20:01:37.304Z" },
-    { url = "https://files.pythonhosted.org/packages/d5/4f/b462242432d93ea45f297b6179c7333dd0402b855a912a04e7fc61c0d71f/websockets-15.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5756779642579d902eed757b21b0164cd6fe338506a8083eb58af5c372e39d9a", size = 173329, upload-time = "2025-03-05T20:01:39.668Z" },
-    { url = "https://files.pythonhosted.org/packages/6e/0c/6afa1f4644d7ed50284ac59cc70ef8abd44ccf7d45850d989ea7310538d0/websockets-15.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0fdfe3e2a29e4db3659dbd5bbf04560cea53dd9610273917799f1cde46aa725e", size = 182312, upload-time = "2025-03-05T20:01:41.815Z" },
-    { url = "https://files.pythonhosted.org/packages/dd/d4/ffc8bd1350b229ca7a4db2a3e1c482cf87cea1baccd0ef3e72bc720caeec/websockets-15.0.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4c2529b320eb9e35af0fa3016c187dffb84a3ecc572bcee7c3ce302bfeba52bf", size = 181319, upload-time = "2025-03-05T20:01:43.967Z" },
-    { url = "https://files.pythonhosted.org/packages/97/3a/5323a6bb94917af13bbb34009fac01e55c51dfde354f63692bf2533ffbc2/websockets-15.0.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ac1e5c9054fe23226fb11e05a6e630837f074174c4c2f0fe442996112a6de4fb", size = 181631, upload-time = "2025-03-05T20:01:46.104Z" },
-    { url = "https://files.pythonhosted.org/packages/a6/cc/1aeb0f7cee59ef065724041bb7ed667b6ab1eeffe5141696cccec2687b66/websockets-15.0.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:5df592cd503496351d6dc14f7cdad49f268d8e618f80dce0cd5a36b93c3fc08d", size = 182016, upload-time = "2025-03-05T20:01:47.603Z" },
-    { url = "https://files.pythonhosted.org/packages/79/f9/c86f8f7af208e4161a7f7e02774e9d0a81c632ae76db2ff22549e1718a51/websockets-15.0.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:0a34631031a8f05657e8e90903e656959234f3a04552259458aac0b0f9ae6fd9", size = 181426, upload-time = "2025-03-05T20:01:48.949Z" },
-    { url = "https://files.pythonhosted.org/packages/c7/b9/828b0bc6753db905b91df6ae477c0b14a141090df64fb17f8a9d7e3516cf/websockets-15.0.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:3d00075aa65772e7ce9e990cab3ff1de702aa09be3940d1dc88d5abf1ab8a09c", size = 181360, upload-time = "2025-03-05T20:01:50.938Z" },
-    { url = "https://files.pythonhosted.org/packages/89/fb/250f5533ec468ba6327055b7d98b9df056fb1ce623b8b6aaafb30b55d02e/websockets-15.0.1-cp310-cp310-win32.whl", hash = "sha256:1234d4ef35db82f5446dca8e35a7da7964d02c127b095e172e54397fb6a6c256", size = 176388, upload-time = "2025-03-05T20:01:52.213Z" },
-    { url = "https://files.pythonhosted.org/packages/1c/46/aca7082012768bb98e5608f01658ff3ac8437e563eca41cf068bd5849a5e/websockets-15.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:39c1fec2c11dc8d89bba6b2bf1556af381611a173ac2b511cf7231622058af41", size = 176830, upload-time = "2025-03-05T20:01:53.922Z" },
-    { url = "https://files.pythonhosted.org/packages/9f/32/18fcd5919c293a398db67443acd33fde142f283853076049824fc58e6f75/websockets-15.0.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:823c248b690b2fd9303ba00c4f66cd5e2d8c3ba4aa968b2779be9532a4dad431", size = 175423, upload-time = "2025-03-05T20:01:56.276Z" },
-    { url = "https://files.pythonhosted.org/packages/76/70/ba1ad96b07869275ef42e2ce21f07a5b0148936688c2baf7e4a1f60d5058/websockets-15.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678999709e68425ae2593acf2e3ebcbcf2e69885a5ee78f9eb80e6e371f1bf57", size = 173082, upload-time = "2025-03-05T20:01:57.563Z" },
-    { url = "https://files.pythonhosted.org/packages/86/f2/10b55821dd40eb696ce4704a87d57774696f9451108cff0d2824c97e0f97/websockets-15.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d50fd1ee42388dcfb2b3676132c78116490976f1300da28eb629272d5d93e905", size = 173330, upload-time = "2025-03-05T20:01:59.063Z" },
-    { url = "https://files.pythonhosted.org/packages/a5/90/1c37ae8b8a113d3daf1065222b6af61cc44102da95388ac0018fcb7d93d9/websockets-15.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d99e5546bf73dbad5bf3547174cd6cb8ba7273062a23808ffea025ecb1cf8562", size = 182878, upload-time = "2025-03-05T20:02:00.305Z" },
-    { url = "https://files.pythonhosted.org/packages/8e/8d/96e8e288b2a41dffafb78e8904ea7367ee4f891dafc2ab8d87e2124cb3d3/websockets-15.0.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:66dd88c918e3287efc22409d426c8f729688d89a0c587c88971a0faa2c2f3792", size = 181883, upload-time = "2025-03-05T20:02:03.148Z" },
-    { url = "https://files.pythonhosted.org/packages/93/1f/5d6dbf551766308f6f50f8baf8e9860be6182911e8106da7a7f73785f4c4/websockets-15.0.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8dd8327c795b3e3f219760fa603dcae1dcc148172290a8ab15158cf85a953413", size = 182252, upload-time = "2025-03-05T20:02:05.29Z" },
-    { url = "https://files.pythonhosted.org/packages/d4/78/2d4fed9123e6620cbf1706c0de8a1632e1a28e7774d94346d7de1bba2ca3/websockets-15.0.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8fdc51055e6ff4adeb88d58a11042ec9a5eae317a0a53d12c062c8a8865909e8", size = 182521, upload-time = "2025-03-05T20:02:07.458Z" },
-    { url = "https://files.pythonhosted.org/packages/e7/3b/66d4c1b444dd1a9823c4a81f50231b921bab54eee2f69e70319b4e21f1ca/websockets-15.0.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:693f0192126df6c2327cce3baa7c06f2a117575e32ab2308f7f8216c29d9e2e3", size = 181958, upload-time = "2025-03-05T20:02:09.842Z" },
-    { url = "https://files.pythonhosted.org/packages/08/ff/e9eed2ee5fed6f76fdd6032ca5cd38c57ca9661430bb3d5fb2872dc8703c/websockets-15.0.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:54479983bd5fb469c38f2f5c7e3a24f9a4e70594cd68cd1fa6b9340dadaff7cf", size = 181918, upload-time = "2025-03-05T20:02:11.968Z" },
-    { url = "https://files.pythonhosted.org/packages/d8/75/994634a49b7e12532be6a42103597b71098fd25900f7437d6055ed39930a/websockets-15.0.1-cp311-cp311-win32.whl", hash = "sha256:16b6c1b3e57799b9d38427dda63edcbe4926352c47cf88588c0be4ace18dac85", size = 176388, upload-time = "2025-03-05T20:02:13.32Z" },
-    { url = "https://files.pythonhosted.org/packages/98/93/e36c73f78400a65f5e236cd376713c34182e6663f6889cd45a4a04d8f203/websockets-15.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:27ccee0071a0e75d22cb35849b1db43f2ecd3e161041ac1ee9d2352ddf72f065", size = 176828, upload-time = "2025-03-05T20:02:14.585Z" },
-    { url = "https://files.pythonhosted.org/packages/51/6b/4545a0d843594f5d0771e86463606a3988b5a09ca5123136f8a76580dd63/websockets-15.0.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:3e90baa811a5d73f3ca0bcbf32064d663ed81318ab225ee4f427ad4e26e5aff3", size = 175437, upload-time = "2025-03-05T20:02:16.706Z" },
-    { url = "https://files.pythonhosted.org/packages/f4/71/809a0f5f6a06522af902e0f2ea2757f71ead94610010cf570ab5c98e99ed/websockets-15.0.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:592f1a9fe869c778694f0aa806ba0374e97648ab57936f092fd9d87f8bc03665", size = 173096, upload-time = "2025-03-05T20:02:18.832Z" },
-    { url = "https://files.pythonhosted.org/packages/3d/69/1a681dd6f02180916f116894181eab8b2e25b31e484c5d0eae637ec01f7c/websockets-15.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0701bc3cfcb9164d04a14b149fd74be7347a530ad3bbf15ab2c678a2cd3dd9a2", size = 173332, upload-time = "2025-03-05T20:02:20.187Z" },
-    { url = "https://files.pythonhosted.org/packages/a6/02/0073b3952f5bce97eafbb35757f8d0d54812b6174ed8dd952aa08429bcc3/websockets-15.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e8b56bdcdb4505c8078cb6c7157d9811a85790f2f2b3632c7d1462ab5783d215", size = 183152, upload-time = "2025-03-05T20:02:22.286Z" },
-    { url = "https://files.pythonhosted.org/packages/74/45/c205c8480eafd114b428284840da0b1be9ffd0e4f87338dc95dc6ff961a1/websockets-15.0.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0af68c55afbd5f07986df82831c7bff04846928ea8d1fd7f30052638788bc9b5", size = 182096, upload-time = "2025-03-05T20:02:24.368Z" },
-    { url = "https://files.pythonhosted.org/packages/14/8f/aa61f528fba38578ec553c145857a181384c72b98156f858ca5c8e82d9d3/websockets-15.0.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:64dee438fed052b52e4f98f76c5790513235efaa1ef7f3f2192c392cd7c91b65", size = 182523, upload-time = "2025-03-05T20:02:25.669Z" },
-    { url = "https://files.pythonhosted.org/packages/ec/6d/0267396610add5bc0d0d3e77f546d4cd287200804fe02323797de77dbce9/websockets-15.0.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:d5f6b181bb38171a8ad1d6aa58a67a6aa9d4b38d0f8c5f496b9e42561dfc62fe", size = 182790, upload-time = "2025-03-05T20:02:26.99Z" },
-    { url = "https://files.pythonhosted.org/packages/02/05/c68c5adbf679cf610ae2f74a9b871ae84564462955d991178f95a1ddb7dd/websockets-15.0.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:5d54b09eba2bada6011aea5375542a157637b91029687eb4fdb2dab11059c1b4", size = 182165, upload-time = "2025-03-05T20:02:30.291Z" },
-    { url = "https://files.pythonhosted.org/packages/29/93/bb672df7b2f5faac89761cb5fa34f5cec45a4026c383a4b5761c6cea5c16/websockets-15.0.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3be571a8b5afed347da347bfcf27ba12b069d9d7f42cb8c7028b5e98bbb12597", size = 182160, upload-time = "2025-03-05T20:02:31.634Z" },
-    { url = "https://files.pythonhosted.org/packages/ff/83/de1f7709376dc3ca9b7eeb4b9a07b4526b14876b6d372a4dc62312bebee0/websockets-15.0.1-cp312-cp312-win32.whl", hash = "sha256:c338ffa0520bdb12fbc527265235639fb76e7bc7faafbb93f6ba80d9c06578a9", size = 176395, upload-time = "2025-03-05T20:02:33.017Z" },
-    { url = "https://files.pythonhosted.org/packages/7d/71/abf2ebc3bbfa40f391ce1428c7168fb20582d0ff57019b69ea20fa698043/websockets-15.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:fcd5cf9e305d7b8338754470cf69cf81f420459dbae8a3b40cee57417f4614a7", size = 176841, upload-time = "2025-03-05T20:02:34.498Z" },
-    { url = "https://files.pythonhosted.org/packages/cb/9f/51f0cf64471a9d2b4d0fc6c534f323b664e7095640c34562f5182e5a7195/websockets-15.0.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ee443ef070bb3b6ed74514f5efaa37a252af57c90eb33b956d35c8e9c10a1931", size = 175440, upload-time = "2025-03-05T20:02:36.695Z" },
-    { url = "https://files.pythonhosted.org/packages/8a/05/aa116ec9943c718905997412c5989f7ed671bc0188ee2ba89520e8765d7b/websockets-15.0.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5a939de6b7b4e18ca683218320fc67ea886038265fd1ed30173f5ce3f8e85675", size = 173098, upload-time = "2025-03-05T20:02:37.985Z" },
-    { url = "https://files.pythonhosted.org/packages/ff/0b/33cef55ff24f2d92924923c99926dcce78e7bd922d649467f0eda8368923/websockets-15.0.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:746ee8dba912cd6fc889a8147168991d50ed70447bf18bcda7039f7d2e3d9151", size = 173329, upload-time = "2025-03-05T20:02:39.298Z" },
-    { url = "https://files.pythonhosted.org/packages/31/1d/063b25dcc01faa8fada1469bdf769de3768b7044eac9d41f734fd7b6ad6d/websockets-15.0.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:595b6c3969023ecf9041b2936ac3827e4623bfa3ccf007575f04c5a6aa318c22", size = 183111, upload-time = "2025-03-05T20:02:40.595Z" },
-    { url = "https://files.pythonhosted.org/packages/93/53/9a87ee494a51bf63e4ec9241c1ccc4f7c2f45fff85d5bde2ff74fcb68b9e/websockets-15.0.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3c714d2fc58b5ca3e285461a4cc0c9a66bd0e24c5da9911e30158286c9b5be7f", size = 182054, upload-time = "2025-03-05T20:02:41.926Z" },
-    { url = "https://files.pythonhosted.org/packages/ff/b2/83a6ddf56cdcbad4e3d841fcc55d6ba7d19aeb89c50f24dd7e859ec0805f/websockets-15.0.1-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0f3c1e2ab208db911594ae5b4f79addeb3501604a165019dd221c0bdcabe4db8", size = 182496, upload-time = "2025-03-05T20:02:43.304Z" },
-    { url = "https://files.pythonhosted.org/packages/98/41/e7038944ed0abf34c45aa4635ba28136f06052e08fc2168520bb8b25149f/websockets-15.0.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:229cf1d3ca6c1804400b0a9790dc66528e08a6a1feec0d5040e8b9eb14422375", size = 182829, upload-time = "2025-03-05T20:02:48.812Z" },
-    { url = "https://files.pythonhosted.org/packages/e0/17/de15b6158680c7623c6ef0db361da965ab25d813ae54fcfeae2e5b9ef910/websockets-15.0.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:756c56e867a90fb00177d530dca4b097dd753cde348448a1012ed6c5131f8b7d", size = 182217, upload-time = "2025-03-05T20:02:50.14Z" },
-    { url = "https://files.pythonhosted.org/packages/33/2b/1f168cb6041853eef0362fb9554c3824367c5560cbdaad89ac40f8c2edfc/websockets-15.0.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:558d023b3df0bffe50a04e710bc87742de35060580a293c2a984299ed83bc4e4", size = 182195, upload-time = "2025-03-05T20:02:51.561Z" },
-    { url = "https://files.pythonhosted.org/packages/86/eb/20b6cdf273913d0ad05a6a14aed4b9a85591c18a987a3d47f20fa13dcc47/websockets-15.0.1-cp313-cp313-win32.whl", hash = "sha256:ba9e56e8ceeeedb2e080147ba85ffcd5cd0711b89576b83784d8605a7df455fa", size = 176393, upload-time = "2025-03-05T20:02:53.814Z" },
-    { url = "https://files.pythonhosted.org/packages/1b/6c/c65773d6cab416a64d191d6ee8a8b1c68a09970ea6909d16965d26bfed1e/websockets-15.0.1-cp313-cp313-win_amd64.whl", hash = "sha256:e09473f095a819042ecb2ab9465aee615bd9c2028e4ef7d933600a8401c79561", size = 176837, upload-time = "2025-03-05T20:02:55.237Z" },
-    { url = "https://files.pythonhosted.org/packages/02/9e/d40f779fa16f74d3468357197af8d6ad07e7c5a27ea1ca74ceb38986f77a/websockets-15.0.1-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:0c9e74d766f2818bb95f84c25be4dea09841ac0f734d1966f415e4edfc4ef1c3", size = 173109, upload-time = "2025-03-05T20:03:17.769Z" },
-    { url = "https://files.pythonhosted.org/packages/bc/cd/5b887b8585a593073fd92f7c23ecd3985cd2c3175025a91b0d69b0551372/websockets-15.0.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:1009ee0c7739c08a0cd59de430d6de452a55e42d6b522de7aa15e6f67db0b8e1", size = 173343, upload-time = "2025-03-05T20:03:19.094Z" },
-    { url = "https://files.pythonhosted.org/packages/fe/ae/d34f7556890341e900a95acf4886833646306269f899d58ad62f588bf410/websockets-15.0.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:76d1f20b1c7a2fa82367e04982e708723ba0e7b8d43aa643d3dcd404d74f1475", size = 174599, upload-time = "2025-03-05T20:03:21.1Z" },
-    { url = "https://files.pythonhosted.org/packages/71/e6/5fd43993a87db364ec60fc1d608273a1a465c0caba69176dd160e197ce42/websockets-15.0.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f29d80eb9a9263b8d109135351caf568cc3f80b9928bccde535c235de55c22d9", size = 174207, upload-time = "2025-03-05T20:03:23.221Z" },
-    { url = "https://files.pythonhosted.org/packages/2b/fb/c492d6daa5ec067c2988ac80c61359ace5c4c674c532985ac5a123436cec/websockets-15.0.1-pp310-pypy310_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b359ed09954d7c18bbc1680f380c7301f92c60bf924171629c5db97febb12f04", size = 174155, upload-time = "2025-03-05T20:03:25.321Z" },
-    { url = "https://files.pythonhosted.org/packages/68/a1/dcb68430b1d00b698ae7a7e0194433bce4f07ded185f0ee5fb21e2a2e91e/websockets-15.0.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:cad21560da69f4ce7658ca2cb83138fb4cf695a2ba3e475e0559e05991aa8122", size = 176884, upload-time = "2025-03-05T20:03:27.934Z" },
-    { url = "https://files.pythonhosted.org/packages/fa/a8/5b41e0da817d64113292ab1f8247140aac61cbf6cfd085d6a0fa77f4984f/websockets-15.0.1-py3-none-any.whl", hash = "sha256:f7a866fbc1e97b5c617ee4116daaa09b722101d4a3c170c787450ba409f9736f", size = 169743, upload-time = "2025-03-05T20:03:39.41Z" },
+version = "16.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/04/24/4b2031d72e840ce4c1ccb255f693b15c334757fc50023e4db9537080b8c4/websockets-16.0.tar.gz", hash = "sha256:5f6261a5e56e8d5c42a4497b364ea24d94d9563e8fbd44e78ac40879c60179b5", size = 179346, upload-time = "2026-01-10T09:23:47.181Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/20/74/221f58decd852f4b59cc3354cccaf87e8ef695fede361d03dc9a7396573b/websockets-16.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:04cdd5d2d1dacbad0a7bf36ccbcd3ccd5a30ee188f2560b7a62a30d14107b31a", size = 177343, upload-time = "2026-01-10T09:22:21.28Z" },
+    { url = "https://files.pythonhosted.org/packages/19/0f/22ef6107ee52ab7f0b710d55d36f5a5d3ef19e8a205541a6d7ffa7994e5a/websockets-16.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:8ff32bb86522a9e5e31439a58addbb0166f0204d64066fb955265c4e214160f0", size = 175021, upload-time = "2026-01-10T09:22:22.696Z" },
+    { url = "https://files.pythonhosted.org/packages/10/40/904a4cb30d9b61c0e278899bf36342e9b0208eb3c470324a9ecbaac2a30f/websockets-16.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:583b7c42688636f930688d712885cf1531326ee05effd982028212ccc13e5957", size = 175320, upload-time = "2026-01-10T09:22:23.94Z" },
+    { url = "https://files.pythonhosted.org/packages/9d/2f/4b3ca7e106bc608744b1cdae041e005e446124bebb037b18799c2d356864/websockets-16.0-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:7d837379b647c0c4c2355c2499723f82f1635fd2c26510e1f587d89bc2199e72", size = 183815, upload-time = "2026-01-10T09:22:25.469Z" },
+    { url = "https://files.pythonhosted.org/packages/86/26/d40eaa2a46d4302becec8d15b0fc5e45bdde05191e7628405a19cf491ccd/websockets-16.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:df57afc692e517a85e65b72e165356ed1df12386ecb879ad5693be08fac65dde", size = 185054, upload-time = "2026-01-10T09:22:27.101Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/ba/6500a0efc94f7373ee8fefa8c271acdfd4dca8bd49a90d4be7ccabfc397e/websockets-16.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:2b9f1e0d69bc60a4a87349d50c09a037a2607918746f07de04df9e43252c77a3", size = 184565, upload-time = "2026-01-10T09:22:28.293Z" },
+    { url = "https://files.pythonhosted.org/packages/04/b4/96bf2cee7c8d8102389374a2616200574f5f01128d1082f44102140344cc/websockets-16.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:335c23addf3d5e6a8633f9f8eda77efad001671e80b95c491dd0924587ece0b3", size = 183848, upload-time = "2026-01-10T09:22:30.394Z" },
+    { url = "https://files.pythonhosted.org/packages/02/8e/81f40fb00fd125357814e8c3025738fc4ffc3da4b6b4a4472a82ba304b41/websockets-16.0-cp310-cp310-win32.whl", hash = "sha256:37b31c1623c6605e4c00d466c9d633f9b812ea430c11c8a278774a1fde1acfa9", size = 178249, upload-time = "2026-01-10T09:22:32.083Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/5f/7e40efe8df57db9b91c88a43690ac66f7b7aa73a11aa6a66b927e44f26fa/websockets-16.0-cp310-cp310-win_amd64.whl", hash = "sha256:8e1dab317b6e77424356e11e99a432b7cb2f3ec8c5ab4dabbcee6add48f72b35", size = 178685, upload-time = "2026-01-10T09:22:33.345Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/db/de907251b4ff46ae804ad0409809504153b3f30984daf82a1d84a9875830/websockets-16.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:31a52addea25187bde0797a97d6fc3d2f92b6f72a9370792d65a6e84615ac8a8", size = 177340, upload-time = "2026-01-10T09:22:34.539Z" },
+    { url = "https://files.pythonhosted.org/packages/f3/fa/abe89019d8d8815c8781e90d697dec52523fb8ebe308bf11664e8de1877e/websockets-16.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:417b28978cdccab24f46400586d128366313e8a96312e4b9362a4af504f3bbad", size = 175022, upload-time = "2026-01-10T09:22:36.332Z" },
+    { url = "https://files.pythonhosted.org/packages/58/5d/88ea17ed1ded2079358b40d31d48abe90a73c9e5819dbcde1606e991e2ad/websockets-16.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:af80d74d4edfa3cb9ed973a0a5ba2b2a549371f8a741e0800cb07becdd20f23d", size = 175319, upload-time = "2026-01-10T09:22:37.602Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/ae/0ee92b33087a33632f37a635e11e1d99d429d3d323329675a6022312aac2/websockets-16.0-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:08d7af67b64d29823fed316505a89b86705f2b7981c07848fb5e3ea3020c1abe", size = 184631, upload-time = "2026-01-10T09:22:38.789Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/c5/27178df583b6c5b31b29f526ba2da5e2f864ecc79c99dae630a85d68c304/websockets-16.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7be95cfb0a4dae143eaed2bcba8ac23f4892d8971311f1b06f3c6b78952ee70b", size = 185870, upload-time = "2026-01-10T09:22:39.893Z" },
+    { url = "https://files.pythonhosted.org/packages/87/05/536652aa84ddc1c018dbb7e2c4cbcd0db884580bf8e95aece7593fde526f/websockets-16.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d6297ce39ce5c2e6feb13c1a996a2ded3b6832155fcfc920265c76f24c7cceb5", size = 185361, upload-time = "2026-01-10T09:22:41.016Z" },
+    { url = "https://files.pythonhosted.org/packages/6d/e2/d5332c90da12b1e01f06fb1b85c50cfc489783076547415bf9f0a659ec19/websockets-16.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:1c1b30e4f497b0b354057f3467f56244c603a79c0d1dafce1d16c283c25f6e64", size = 184615, upload-time = "2026-01-10T09:22:42.442Z" },
+    { url = "https://files.pythonhosted.org/packages/77/fb/d3f9576691cae9253b51555f841bc6600bf0a983a461c79500ace5a5b364/websockets-16.0-cp311-cp311-win32.whl", hash = "sha256:5f451484aeb5cafee1ccf789b1b66f535409d038c56966d6101740c1614b86c6", size = 178246, upload-time = "2026-01-10T09:22:43.654Z" },
+    { url = "https://files.pythonhosted.org/packages/54/67/eaff76b3dbaf18dcddabc3b8c1dba50b483761cccff67793897945b37408/websockets-16.0-cp311-cp311-win_amd64.whl", hash = "sha256:8d7f0659570eefb578dacde98e24fb60af35350193e4f56e11190787bee77dac", size = 178684, upload-time = "2026-01-10T09:22:44.941Z" },
+    { url = "https://files.pythonhosted.org/packages/84/7b/bac442e6b96c9d25092695578dda82403c77936104b5682307bd4deb1ad4/websockets-16.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:71c989cbf3254fbd5e84d3bff31e4da39c43f884e64f2551d14bb3c186230f00", size = 177365, upload-time = "2026-01-10T09:22:46.787Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/fe/136ccece61bd690d9c1f715baaeefd953bb2360134de73519d5df19d29ca/websockets-16.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:8b6e209ffee39ff1b6d0fa7bfef6de950c60dfb91b8fcead17da4ee539121a79", size = 175038, upload-time = "2026-01-10T09:22:47.999Z" },
+    { url = "https://files.pythonhosted.org/packages/40/1e/9771421ac2286eaab95b8575b0cb701ae3663abf8b5e1f64f1fd90d0a673/websockets-16.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:86890e837d61574c92a97496d590968b23c2ef0aeb8a9bc9421d174cd378ae39", size = 175328, upload-time = "2026-01-10T09:22:49.809Z" },
+    { url = "https://files.pythonhosted.org/packages/18/29/71729b4671f21e1eaa5d6573031ab810ad2936c8175f03f97f3ff164c802/websockets-16.0-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:9b5aca38b67492ef518a8ab76851862488a478602229112c4b0d58d63a7a4d5c", size = 184915, upload-time = "2026-01-10T09:22:51.071Z" },
+    { url = "https://files.pythonhosted.org/packages/97/bb/21c36b7dbbafc85d2d480cd65df02a1dc93bf76d97147605a8e27ff9409d/websockets-16.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e0334872c0a37b606418ac52f6ab9cfd17317ac26365f7f65e203e2d0d0d359f", size = 186152, upload-time = "2026-01-10T09:22:52.224Z" },
+    { url = "https://files.pythonhosted.org/packages/4a/34/9bf8df0c0cf88fa7bfe36678dc7b02970c9a7d5e065a3099292db87b1be2/websockets-16.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a0b31e0b424cc6b5a04b8838bbaec1688834b2383256688cf47eb97412531da1", size = 185583, upload-time = "2026-01-10T09:22:53.443Z" },
+    { url = "https://files.pythonhosted.org/packages/47/88/4dd516068e1a3d6ab3c7c183288404cd424a9a02d585efbac226cb61ff2d/websockets-16.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:485c49116d0af10ac698623c513c1cc01c9446c058a4e61e3bf6c19dff7335a2", size = 184880, upload-time = "2026-01-10T09:22:55.033Z" },
+    { url = "https://files.pythonhosted.org/packages/91/d6/7d4553ad4bf1c0421e1ebd4b18de5d9098383b5caa1d937b63df8d04b565/websockets-16.0-cp312-cp312-win32.whl", hash = "sha256:eaded469f5e5b7294e2bdca0ab06becb6756ea86894a47806456089298813c89", size = 178261, upload-time = "2026-01-10T09:22:56.251Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/f0/f3a17365441ed1c27f850a80b2bc680a0fa9505d733fe152fdf5e98c1c0b/websockets-16.0-cp312-cp312-win_amd64.whl", hash = "sha256:5569417dc80977fc8c2d43a86f78e0a5a22fee17565d78621b6bb264a115d4ea", size = 178693, upload-time = "2026-01-10T09:22:57.478Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/9c/baa8456050d1c1b08dd0ec7346026668cbc6f145ab4e314d707bb845bf0d/websockets-16.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:878b336ac47938b474c8f982ac2f7266a540adc3fa4ad74ae96fea9823a02cc9", size = 177364, upload-time = "2026-01-10T09:22:59.333Z" },
+    { url = "https://files.pythonhosted.org/packages/7e/0c/8811fc53e9bcff68fe7de2bcbe75116a8d959ac699a3200f4847a8925210/websockets-16.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:52a0fec0e6c8d9a784c2c78276a48a2bdf099e4ccc2a4cad53b27718dbfd0230", size = 175039, upload-time = "2026-01-10T09:23:01.171Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/82/39a5f910cb99ec0b59e482971238c845af9220d3ab9fa76dd9162cda9d62/websockets-16.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:e6578ed5b6981005df1860a56e3617f14a6c307e6a71b4fff8c48fdc50f3ed2c", size = 175323, upload-time = "2026-01-10T09:23:02.341Z" },
+    { url = "https://files.pythonhosted.org/packages/bd/28/0a25ee5342eb5d5f297d992a77e56892ecb65e7854c7898fb7d35e9b33bd/websockets-16.0-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:95724e638f0f9c350bb1c2b0a7ad0e83d9cc0c9259f3ea94e40d7b02a2179ae5", size = 184975, upload-time = "2026-01-10T09:23:03.756Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/66/27ea52741752f5107c2e41fda05e8395a682a1e11c4e592a809a90c6a506/websockets-16.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c0204dc62a89dc9d50d682412c10b3542d748260d743500a85c13cd1ee4bde82", size = 186203, upload-time = "2026-01-10T09:23:05.01Z" },
+    { url = "https://files.pythonhosted.org/packages/37/e5/8e32857371406a757816a2b471939d51c463509be73fa538216ea52b792a/websockets-16.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:52ac480f44d32970d66763115edea932f1c5b1312de36df06d6b219f6741eed8", size = 185653, upload-time = "2026-01-10T09:23:06.301Z" },
+    { url = "https://files.pythonhosted.org/packages/9b/67/f926bac29882894669368dc73f4da900fcdf47955d0a0185d60103df5737/websockets-16.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6e5a82b677f8f6f59e8dfc34ec06ca6b5b48bc4fcda346acd093694cc2c24d8f", size = 184920, upload-time = "2026-01-10T09:23:07.492Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/a1/3d6ccdcd125b0a42a311bcd15a7f705d688f73b2a22d8cf1c0875d35d34a/websockets-16.0-cp313-cp313-win32.whl", hash = "sha256:abf050a199613f64c886ea10f38b47770a65154dc37181bfaff70c160f45315a", size = 178255, upload-time = "2026-01-10T09:23:09.245Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/ae/90366304d7c2ce80f9b826096a9e9048b4bb760e44d3b873bb272cba696b/websockets-16.0-cp313-cp313-win_amd64.whl", hash = "sha256:3425ac5cf448801335d6fdc7ae1eb22072055417a96cc6b31b3861f455fbc156", size = 178689, upload-time = "2026-01-10T09:23:10.483Z" },
+    { url = "https://files.pythonhosted.org/packages/f3/1d/e88022630271f5bd349ed82417136281931e558d628dd52c4d8621b4a0b2/websockets-16.0-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:8cc451a50f2aee53042ac52d2d053d08bf89bcb31ae799cb4487587661c038a0", size = 177406, upload-time = "2026-01-10T09:23:12.178Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/78/e63be1bf0724eeb4616efb1ae1c9044f7c3953b7957799abb5915bffd38e/websockets-16.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:daa3b6ff70a9241cf6c7fc9e949d41232d9d7d26fd3522b1ad2b4d62487e9904", size = 175085, upload-time = "2026-01-10T09:23:13.511Z" },
+    { url = "https://files.pythonhosted.org/packages/bb/f4/d3c9220d818ee955ae390cf319a7c7a467beceb24f05ee7aaaa2414345ba/websockets-16.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:fd3cb4adb94a2a6e2b7c0d8d05cb94e6f1c81a0cf9dc2694fb65c7e8d94c42e4", size = 175328, upload-time = "2026-01-10T09:23:14.727Z" },
+    { url = "https://files.pythonhosted.org/packages/63/bc/d3e208028de777087e6fb2b122051a6ff7bbcca0d6df9d9c2bf1dd869ae9/websockets-16.0-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:781caf5e8eee67f663126490c2f96f40906594cb86b408a703630f95550a8c3e", size = 185044, upload-time = "2026-01-10T09:23:15.939Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/6e/9a0927ac24bd33a0a9af834d89e0abc7cfd8e13bed17a86407a66773cc0e/websockets-16.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:caab51a72c51973ca21fa8a18bd8165e1a0183f1ac7066a182ff27107b71e1a4", size = 186279, upload-time = "2026-01-10T09:23:17.148Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/ca/bf1c68440d7a868180e11be653c85959502efd3a709323230314fda6e0b3/websockets-16.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:19c4dc84098e523fd63711e563077d39e90ec6702aff4b5d9e344a60cb3c0cb1", size = 185711, upload-time = "2026-01-10T09:23:18.372Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/f8/fdc34643a989561f217bb477cbc47a3a07212cbda91c0e4389c43c296ebf/websockets-16.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:a5e18a238a2b2249c9a9235466b90e96ae4795672598a58772dd806edc7ac6d3", size = 184982, upload-time = "2026-01-10T09:23:19.652Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/d1/574fa27e233764dbac9c52730d63fcf2823b16f0856b3329fc6268d6ae4f/websockets-16.0-cp314-cp314-win32.whl", hash = "sha256:a069d734c4a043182729edd3e9f247c3b2a4035415a9172fd0f1b71658a320a8", size = 177915, upload-time = "2026-01-10T09:23:21.458Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/f1/ae6b937bf3126b5134ce1f482365fde31a357c784ac51852978768b5eff4/websockets-16.0-cp314-cp314-win_amd64.whl", hash = "sha256:c0ee0e63f23914732c6d7e0cce24915c48f3f1512ec1d079ed01fc629dab269d", size = 178381, upload-time = "2026-01-10T09:23:22.715Z" },
+    { url = "https://files.pythonhosted.org/packages/06/9b/f791d1db48403e1f0a27577a6beb37afae94254a8c6f08be4a23e4930bc0/websockets-16.0-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:a35539cacc3febb22b8f4d4a99cc79b104226a756aa7400adc722e83b0d03244", size = 177737, upload-time = "2026-01-10T09:23:24.523Z" },
+    { url = "https://files.pythonhosted.org/packages/bd/40/53ad02341fa33b3ce489023f635367a4ac98b73570102ad2cdd770dacc9a/websockets-16.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:b784ca5de850f4ce93ec85d3269d24d4c82f22b7212023c974c401d4980ebc5e", size = 175268, upload-time = "2026-01-10T09:23:25.781Z" },
+    { url = "https://files.pythonhosted.org/packages/74/9b/6158d4e459b984f949dcbbb0c5d270154c7618e11c01029b9bbd1bb4c4f9/websockets-16.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:569d01a4e7fba956c5ae4fc988f0d4e187900f5497ce46339c996dbf24f17641", size = 175486, upload-time = "2026-01-10T09:23:27.033Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/2d/7583b30208b639c8090206f95073646c2c9ffd66f44df967981a64f849ad/websockets-16.0-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:50f23cdd8343b984957e4077839841146f67a3d31ab0d00e6b824e74c5b2f6e8", size = 185331, upload-time = "2026-01-10T09:23:28.259Z" },
+    { url = "https://files.pythonhosted.org/packages/45/b0/cce3784eb519b7b5ad680d14b9673a31ab8dcb7aad8b64d81709d2430aa8/websockets-16.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:152284a83a00c59b759697b7f9e9cddf4e3c7861dd0d964b472b70f78f89e80e", size = 186501, upload-time = "2026-01-10T09:23:29.449Z" },
+    { url = "https://files.pythonhosted.org/packages/19/60/b8ebe4c7e89fb5f6cdf080623c9d92789a53636950f7abacfc33fe2b3135/websockets-16.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:bc59589ab64b0022385f429b94697348a6a234e8ce22544e3681b2e9331b5944", size = 186062, upload-time = "2026-01-10T09:23:31.368Z" },
+    { url = "https://files.pythonhosted.org/packages/88/a8/a080593f89b0138b6cba1b28f8df5673b5506f72879322288b031337c0b8/websockets-16.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:32da954ffa2814258030e5a57bc73a3635463238e797c7375dc8091327434206", size = 185356, upload-time = "2026-01-10T09:23:32.627Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/b6/b9afed2afadddaf5ebb2afa801abf4b0868f42f8539bfe4b071b5266c9fe/websockets-16.0-cp314-cp314t-win32.whl", hash = "sha256:5a4b4cc550cb665dd8a47f868c8d04c8230f857363ad3c9caf7a0c3bf8c61ca6", size = 178085, upload-time = "2026-01-10T09:23:33.816Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/3e/28135a24e384493fa804216b79a6a6759a38cc4ff59118787b9fb693df93/websockets-16.0-cp314-cp314t-win_amd64.whl", hash = "sha256:b14dc141ed6d2dde437cddb216004bcac6a1df0935d79656387bd41632ba0bbd", size = 178531, upload-time = "2026-01-10T09:23:35.016Z" },
+    { url = "https://files.pythonhosted.org/packages/72/07/c98a68571dcf256e74f1f816b8cc5eae6eb2d3d5cfa44d37f801619d9166/websockets-16.0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:349f83cd6c9a415428ee1005cadb5c2c56f4389bc06a9af16103c3bc3dcc8b7d", size = 174947, upload-time = "2026-01-10T09:23:36.166Z" },
+    { url = "https://files.pythonhosted.org/packages/7e/52/93e166a81e0305b33fe416338be92ae863563fe7bce446b0f687b9df5aea/websockets-16.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:4a1aba3340a8dca8db6eb5a7986157f52eb9e436b74813764241981ca4888f03", size = 175260, upload-time = "2026-01-10T09:23:37.409Z" },
+    { url = "https://files.pythonhosted.org/packages/56/0c/2dbf513bafd24889d33de2ff0368190a0e69f37bcfa19009ef819fe4d507/websockets-16.0-pp311-pypy311_pp73-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:f4a32d1bd841d4bcbffdcb3d2ce50c09c3909fbead375ab28d0181af89fd04da", size = 176071, upload-time = "2026-01-10T09:23:39.158Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/8f/aea9c71cc92bf9b6cc0f7f70df8f0b420636b6c96ef4feee1e16f80f75dd/websockets-16.0-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0298d07ee155e2e9fda5be8a9042200dd2e3bb0b8a38482156576f863a9d457c", size = 176968, upload-time = "2026-01-10T09:23:41.031Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/3f/f70e03f40ffc9a30d817eef7da1be72ee4956ba8d7255c399a01b135902a/websockets-16.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:a653aea902e0324b52f1613332ddf50b00c06fdaf7e92624fbf8c77c78fa5767", size = 178735, upload-time = "2026-01-10T09:23:42.259Z" },
+    { url = "https://files.pythonhosted.org/packages/6f/28/258ebab549c2bf3e64d2b0217b973467394a9cea8c42f70418ca2c5d0d2e/websockets-16.0-py3-none-any.whl", hash = "sha256:1637db62fad1dc833276dded54215f2c7fa46912301a24bd94d45d46a011ceec", size = 171598, upload-time = "2026-01-10T09:23:45.395Z" },
 ]
 
 [[package]]
 name = "werkzeug"
-version = "3.1.4"
+version = "3.1.5"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "markupsafe" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/45/ea/b0f8eeb287f8df9066e56e831c7824ac6bab645dd6c7a8f4b2d767944f9b/werkzeug-3.1.4.tar.gz", hash = "sha256:cd3cd98b1b92dc3b7b3995038826c68097dcb16f9baa63abe35f20eafeb9fe5e", size = 864687, upload-time = "2025-11-29T02:15:22.841Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/5a/70/1469ef1d3542ae7c2c7b72bd5e3a4e6ee69d7978fa8a3af05a38eca5becf/werkzeug-3.1.5.tar.gz", hash = "sha256:6a548b0e88955dd07ccb25539d7d0cc97417ee9e179677d22c7041c8f078ce67", size = 864754, upload-time = "2026-01-08T17:49:23.247Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/2f/f9/9e082990c2585c744734f85bec79b5dae5df9c974ffee58fe421652c8e91/werkzeug-3.1.4-py3-none-any.whl", hash = "sha256:2ad50fb9ed09cc3af22c54698351027ace879a0b60a3b5edf5730b2f7d876905", size = 224960, upload-time = "2025-11-29T02:15:21.13Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/e4/8d97cca767bcc1be76d16fb76951608305561c6e056811587f36cb1316a8/werkzeug-3.1.5-py3-none-any.whl", hash = "sha256:5111e36e91086ece91f93268bb39b4a35c1e6f1feac762c9c822ded0a4e322dc", size = 225025, upload-time = "2026-01-08T17:49:21.859Z" },
 ]
 
 [[package]]

From 420aa6ad0cb2555192b0afb736e1f5bb37137d76 Mon Sep 17 00:00:00 2001
From: Dong Hyuk Chang <donghyukc@nvidia.com>
Date: Fri, 23 Jan 2026 18:32:12 -0500
Subject: [PATCH 251/334] ci: Skip test_precision_aware_optimizer (#3062)

Signed-off-by: Dong Hyuk Chang <donghyukc@nvidia.com>
---
 tests/unit_tests/test_optimizer.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/unit_tests/test_optimizer.py b/tests/unit_tests/test_optimizer.py
index 1f5bbc3f14c..6b1da8c4e3f 100644
--- a/tests/unit_tests/test_optimizer.py
+++ b/tests/unit_tests/test_optimizer.py
@@ -384,6 +384,7 @@ def test_precision_aware_fused_adam():
     "moment_dtype",
     [torch.float32, torch.float16, torch.bfloat16, torch.uint8],
 )
+@pytest.mark.skip(reason="inconsistent ci test runs resulting in NCCL errors")
 def test_precision_aware_optimizer(
     precision: str,
     main_params_dtype: torch.dtype,

From 0f82f059ecda533647fca34a014a8a6da4f56b0e Mon Sep 17 00:00:00 2001
From: Deyu Fu <deyuf@nvidia.com>
Date: Wed, 28 Jan 2026 18:31:57 +0800
Subject: [PATCH 252/334] [dev] fixes for pull main 260122 (#3103)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
Signed-off-by: Deyu Fu <deyuf@nvidia.com>
Co-authored-by: Yuzhong Wang <yuzhongw@nvidia.com>
Co-authored-by: oliver könig <okoenig@nvidia.com>
---
 gpt_builders.py                               |   21 +-
 ...rimental_attention_variant_module_specs.py |   59 +-
 megatron/core/models/gpt/gpt_layer_specs.py   |    5 +-
 megatron/core/ssm/mamba_layer.py              |    7 +-
 pretrain_mamba.py                             |    7 +-
 .../golden_values_dev_dgx_h100.json           | 2048 ++++++++---------
 6 files changed, 1094 insertions(+), 1053 deletions(-)

diff --git a/gpt_builders.py b/gpt_builders.py
index a86d3af100b..0be64edaab6 100644
--- a/gpt_builders.py
+++ b/gpt_builders.py
@@ -11,6 +11,7 @@
 )
 from megatron.core.models.gpt.experimental_attention_variant_module_specs import (
     get_transformer_block_with_experimental_attention_variant_spec,
+    get_transformer_layer_with_experimental_attention_variant_spec,
 )
 from megatron.core.models.gpt.heterogeneous.heterogeneous_layer_specs import (
     get_gpt_heterogeneous_layer_spec,
@@ -76,13 +77,19 @@ def gpt_builder(args, pre_process, post_process, vp_stage=None, config=None, pg_
                 mtp_transformer_layer_spec = import_module(args.spec)
             else:
                 # Define the decoder block spec
-                decoder_layer_specs = get_gpt_decoder_layer_specs(
-                    config,
-                    use_transformer_engine=use_te,
-                    normalization=args.normalization,
-                    qk_l2_norm=args.qk_l2_norm,
-                    vp_stage=vp_stage,
-                )
+                if args.experimental_attention_variant is not None:
+                    decoder_layer_specs = (
+                        get_transformer_layer_with_experimental_attention_variant_spec(
+                            config=config
+                        )
+                    )
+                else:
+                    decoder_layer_specs = get_gpt_decoder_layer_specs(
+                        config,
+                        use_transformer_engine=use_te,
+                        normalization=args.normalization,
+                        qk_l2_norm=args.qk_l2_norm,
+                    )
                 mtp_transformer_layer_spec = decoder_layer_specs[-1]
             # Use spec of the last layer in decoder block as spec of the transformer layer in MTP
             mtp_block_spec = get_gpt_mtp_block_spec(
diff --git a/megatron/core/models/gpt/experimental_attention_variant_module_specs.py b/megatron/core/models/gpt/experimental_attention_variant_module_specs.py
index a7cc7cc0a55..3051cf6e960 100644
--- a/megatron/core/models/gpt/experimental_attention_variant_module_specs.py
+++ b/megatron/core/models/gpt/experimental_attention_variant_module_specs.py
@@ -149,12 +149,12 @@ def get_experimental_attention_variant_module_spec(
 ##########
 
 
-def get_transformer_block_with_experimental_attention_variant_spec(
-    config: TransformerConfig, vp_stage: Optional[int] = None, pp_rank: Optional[int] = None
-) -> TransformerBlockSubmodules:
-    """Build transformer block spec with experimental attention variants (e.g., linear attention).
+def get_transformer_layer_with_experimental_attention_variant_spec(
+    config: TransformerConfig, backend: BackendSpecProvider = None
+) -> List[ModuleSpec]:
+    """Build transformer layer specs with experimental attention variants (e.g., linear attention).
 
-    This function constructs a heterogeneous transformer block that supports mixing different
+    This function is for constructing a heterogeneous transformer that supports mixing different
     attention mechanisms (experimental vs standard) and MLP types (MoE vs dense) across layers.
     **Note that, this API is a experimental API in the short term, and might be deprecated in the
     future. In the long run, we will move to a new design that better support hybrid models.**
@@ -170,22 +170,19 @@ def get_transformer_block_with_experimental_attention_variant_spec(
         2. Per-Layer Spec Construction: Iterates through layers, constructing transformer
            layer specs based on attention and MLP patterns.
 
-        3. Pipeline Slicing: Extracts layer specs for the current pipeline stage.
-
     Args:
         config: Transformer configuration containing model hyperparameters and feature flags.
-        vp_stage: Virtual pipeline stage index for interleaved pipeline parallelism.
-        pp_rank: Pipeline model parallel rank.
 
     Returns:
-        TransformerBlockSubmodules containing per-layer specs and final layer norm.
+        List[ModuleSpec] containing per-layer specs.
 
     Note:
         Currently only supports transformer_engine backend. Kitchen backend can be used as a
         wrapper with TE fallback for unsupported operations.
     """
 
-    backend = _get_backend_spec_provider(config=config)
+    if backend is None:
+        backend = _get_backend_spec_provider(config=config)
 
     # Get attention patterns and specs
     experimental_attention_pattern = [0] * config.num_layers
@@ -257,6 +254,42 @@ def get_transformer_block_with_experimental_attention_variant_spec(
             )
         )
 
+    return layer_specs
+
+
+def get_transformer_block_with_experimental_attention_variant_spec(
+    config: TransformerConfig, vp_stage: Optional[int] = None, pp_rank: Optional[int] = None
+) -> TransformerBlockSubmodules:
+    """Build transformer block spec with experimental attention variants (e.g., linear attention).
+
+    This function constructs a heterogeneous transformer block that supports mixing different
+    attention mechanisms (experimental vs standard) and MLP types (MoE vs dense) across layers.
+    **Note that, this API is a experimental API in the short term, and might be deprecated in the
+    future. In the long run, we will move to a new design that better support hybrid models.**
+
+    Constructing transformer layer specs by
+    `get_transformer_layer_with_experimental_attention_variant_spec` and then slicing the
+    layer specs to only include the layers that are built in this pipeline stage.
+
+    Args:
+        config: Transformer configuration containing model hyperparameters and feature flags.
+        vp_stage: Virtual pipeline stage index for interleaved pipeline parallelism.
+        pp_rank: Pipeline model parallel rank.
+
+    Returns:
+        TransformerBlockSubmodules containing per-layer specs and final layer norm.
+
+    Note:
+        Currently only supports transformer_engine backend. Kitchen backend can be used as a
+        wrapper with TE fallback for unsupported operations.
+    """
+
+    backend = _get_backend_spec_provider(config=config)
+
+    layer_specs = get_transformer_layer_with_experimental_attention_variant_spec(
+        config=config, backend=backend
+    )
+
     # Slice the layer specs to only include the layers that are built in this pipeline stage.
     if config.pipeline_model_parallel_layout is not None:
         local_layer_ids = config.pipeline_model_parallel_layout.get_layer_id_list(
@@ -270,6 +303,7 @@ def get_transformer_block_with_experimental_attention_variant_spec(
     layer_specs = [layer_specs[layer_id] for layer_id in local_layer_ids]
 
     # Get GPT decoder block spec
+    rms_norm = config.normalization == "RMSNorm"
     gpt_decoder_block_spec = TransformerBlockSubmodules(
         layer_specs=layer_specs, layer_norm=backend.layer_norm(rms_norm=rms_norm, for_qk=False)
     )
@@ -359,7 +393,7 @@ def _get_backend_spec_provider(config: TransformerConfig) -> BackendSpecProvider
     )
     backend: BackendSpecProvider = (
         KitchenSpecProvider(
-            fallback=TESpecProvider(),
+            fallback=TESpecProvider(fallback_to_eager_attn=config.fallback_to_eager_attn),
             use_kitchen_attention=config.use_kitchen_attention,
             kitchen_attention_backend=config.kitchen_attention_backend,
         )
@@ -396,6 +430,7 @@ def _get_self_attention_module_spec(
         qk_l2_norm=config.qk_l2_norm,
         use_kitchen=config.use_kitchen,
         use_te_activation_func=config.use_te_activation_func,
+        fallback_to_eager_attn=config.fallback_to_eager_attn,
         use_kitchen_attention=config.use_kitchen_attention,
         kitchen_attention_backend=config.kitchen_attention_backend,
     )
diff --git a/megatron/core/models/gpt/gpt_layer_specs.py b/megatron/core/models/gpt/gpt_layer_specs.py
index 3bd0c7fe6ab..dfaf59bbcfc 100755
--- a/megatron/core/models/gpt/gpt_layer_specs.py
+++ b/megatron/core/models/gpt/gpt_layer_specs.py
@@ -618,6 +618,7 @@ def get_gpt_decoder_block_spec(
     layer_specs = get_gpt_decoder_layer_specs(
         config, use_transformer_engine, normalization, qk_l2_norm
     )
+
     # Slice the layer specs to only include the layers that are built in this pipeline stage.
     # Note: MCore layer_number starts at 1
     num_layers_to_build = get_num_layers_to_build(config, vp_stage=vp_stage, pp_rank=pp_rank)
@@ -637,10 +638,6 @@ def get_gpt_decoder_block_spec(
         offset = get_transformer_layer_offset(config, vp_stage=vp_stage, pp_rank=pp_rank)
         local_layer_specs = layer_specs[offset : offset + num_layers_to_build]
 
-    if use_transformer_engine:
-        layer_norm_impl = TENorm
-    else:
-        layer_norm_impl = LNImpl
     # Block spec.
     if use_transformer_engine:
         layer_norm_impl = TENorm
diff --git a/megatron/core/ssm/mamba_layer.py b/megatron/core/ssm/mamba_layer.py
index 48ea84566d5..6b96b262ff0 100644
--- a/megatron/core/ssm/mamba_layer.py
+++ b/megatron/core/ssm/mamba_layer.py
@@ -185,6 +185,9 @@ def _should_call_local_cudagraph(self, *args, **kwargs):
             and kwargs.get('attention_mask') is None
             and kwargs.get('inference_context') is not None
         ):
-            using_cuda_graph = kwargs['inference_context'].using_cuda_graph_this_step()
-            return using_cuda_graph
+            if hasattr(kwargs['inference_context'], "using_cuda_graph_this_step"):
+                return kwargs['inference_context'].using_cuda_graph_this_step()
+            else:
+                # static
+                return kwargs['inference_context'].is_decode_only()
         return False
diff --git a/pretrain_mamba.py b/pretrain_mamba.py
index bd46dce212f..6fcc0d25c45 100644
--- a/pretrain_mamba.py
+++ b/pretrain_mamba.py
@@ -82,11 +82,10 @@ def get_batch(data_iterator, vp_stage=None):
         return empty_batch.values()
 
     batch = get_batch_on_this_tp_rank(data_iterator)
-
-    # Support for Packed Sequence (Unused in this script)
-    cu_seqlens = batch.pop('cu_seqlens', None)
+    
+    cu_seqlens = batch['cu_seqlens']
+    # Unused at the moment
     cu_seqlens_padded = batch.pop('cu_seqlens_padded', None)
-    max_seqlen = batch.pop('max_seqlen', None)
     # Support for Hybrid Context Parallel (Unused in this script)
     local_cp_size = batch.pop('local_cp_size', None)
 
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_dp8_583m_throughputtest_zmq/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_dp8_583m_throughputtest_zmq/golden_values_dev_dgx_h100.json
index b31640a2a28..dc1e1921fd8 100644
--- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_dp8_583m_throughputtest_zmq/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_dp8_583m_throughputtest_zmq/golden_values_dev_dgx_h100.json
@@ -1,1028 +1,1028 @@
 {
     "throughput": [
-        41.46611265659158,
-        44.4918071112372,
-        46.926673665513704,
-        46.30487800041612,
-        45.31117511724168,
-        39.48427257480573,
-        41.73807567318408,
-        44.986328772700176,
-        46.79460518580979,
-        2.1481645603133406,
-        45.3304673980315,
-        46.361305003734564,
-        1.2216768370041928,
-        35.39842883637453,
-        44.9539795483452,
-        39.212326267312775,
-        1.0742220506708642,
-        45.596949876501405,
-        1.656518545685144,
-        41.1853065101293,
-        45.186903991589205,
-        2.733636984435035,
-        1.8859234764357438,
-        4.103119744826081,
-        45.69245622017379,
-        1.6582215083936738,
-        37.954906657600475,
-        46.5127757873931,
-        45.29733823530308,
-        23.1754689963102,
-        43.44487109471452,
-        33.311038622351724,
-        46.400400898475304,
-        43.13207624251721,
-        45.26221685255157,
-        38.89631907864675,
-        1.0766827581902934,
-        3.1955625641377354,
-        41.00672778846412,
-        1.225434086753332,
-        0.951420354873873,
-        47.29759062957134,
-        37.27931328255301,
-        44.02626192577354,
-        44.567351509891715,
-        41.19817412895097,
-        1.4117117845102758,
-        46.974942144500005,
-        26.16803432928029,
-        40.79104304470394,
-        45.98186302516314,
-        47.4055947551752,
-        1.076201435026891,
-        3.1796394093402074,
-        41.23717257081556,
-        42.85213590859161,
-        44.28329201807133,
-        46.527540336613534,
-        43.08848614726634,
-        44.40830753324719,
-        41.37604170752994,
-        0.9482378607333808,
-        45.48122547719385,
-        47.20316588665498,
-        0.9510683482370443,
-        1.9012380421663475,
-        46.19550253488152,
-        2.7330118039774067,
-        45.74495207812405,
-        34.67238053318697,
-        38.85119722571936,
-        1.225081100472964,
-        45.15238085691014,
-        40.396011557170766,
-        45.488921919651816,
-        45.29351001493665,
-        1.0758273605231232,
-        29.808026495079588,
-        1.2280820949811997,
-        46.586185131212794,
-        42.89263913245724,
-        42.15612175451927,
-        46.693253798156995,
-        46.57003199283068,
-        46.509087816223484,
-        38.12557546239959,
-        45.81548305523131,
-        46.07453120649211,
-        40.81605463432999,
-        45.228424339779814,
-        42.086064813661196,
-        42.78740035356858,
-        45.98922633164769,
-        41.28717865700289,
-        1.2274351142907918,
-        43.46971411790415,
-        45.4498626576556,
-        42.51719188567606,
-        46.624215728553786,
-        43.26045159027894,
-        43.962414509948275,
-        0.9481540147597537,
-        1.2267700611313974,
-        1.2246727704472544,
-        45.950324312195605,
-        46.02559998344755,
-        1.413545795432525,
-        2.1538932898075407,
-        45.57032628071106,
-        38.877775528665516,
-        44.5660811280025,
-        45.98326532911864,
-        41.78435738761637,
-        44.118449498817554,
-        43.11682781122976,
-        46.80957208928424,
-        1.0755822711089933,
-        29.775928132799514,
-        42.492052303926506,
-        1.2241095107799485,
-        45.796086216431775,
-        45.258843364665246,
-        44.97308057669771,
-        42.89527265230854,
-        43.91533758581356,
-        35.81442349583988,
-        30.65358830169187,
-        46.3182793971083,
-        44.145493159555286,
-        44.2651994526335,
-        40.09824843769361,
-        45.68707977480025,
-        39.990813212941646,
-        35.79658562417175,
-        44.86013694329229,
-        41.83115806056866,
-        37.15064410140025,
-        0.996787320025337,
-        45.66808620182929,
-        46.6130598481811,
-        45.60972037064592,
-        0.9940425141246046,
-        45.591900274871186,
-        46.96840985185615,
-        43.393354375970155,
-        25.5248831966376,
-        45.77235244972332,
-        24.590561326831967,
-        0.9773483444490005,
-        34.09417278739622,
-        43.586572958161206,
-        46.535859932274164,
-        45.946757322805404,
-        0.9962165194499956,
-        0.992874583950711,
-        46.119932829039165,
-        42.179658293228435,
-        32.997191121192365,
-        44.17582132320044,
-        46.14366473770965,
-        45.81106545186327,
-        0.9957624959115234,
-        0.9924622264244217,
-        39.42192933951627,
-        37.64229442727469,
-        21.26565173458009,
-        45.593412953334585,
-        46.87304671516134,
-        45.216027572946594,
-        42.43765019133474,
-        46.197382024442064,
-        40.692114254409056,
-        45.33796853087654,
-        27.766522112160985,
-        40.02641706822085,
-        1.3017150918854614,
-        45.591631786019235,
-        44.34279696011747,
-        39.28257190816356,
-        43.72958684288255,
-        0.9771143356157014,
-        23.874882409185425,
-        38.84831650281934,
-        46.04825715862786,
-        44.318350427904555,
-        47.26086876225989,
-        39.433419122254435,
-        42.94084765393213,
-        43.44077111651132,
-        42.4775425505976,
-        0.9890763303083981,
-        47.353878858820345,
-        40.99026973150018,
-        0.9955331259047124,
-        46.52810662522569,
-        43.71121305319187,
-        43.098140605333754,
-        0.9941110054345192,
-        0.9887007080233833,
-        41.60423122999918,
-        45.81533148936388,
-        42.37614297709579,
-        45.84171517205181,
-        41.73162426832469,
-        0.976838541947363,
-        14.558863836592382,
-        0.988317986920056,
-        27.41518624216025,
-        46.00613760472248,
-        44.605125117227445,
-        0.9923556095766691,
-        46.06453996269855,
-        45.69598995103852,
-        38.29204120955434,
-        0.9879204612413145,
-        45.051133494631664,
-        0.974139430894493,
-        43.52911731376158,
-        0.9919675926934881,
-        45.37964604415822,
-        0.976397605350521,
-        36.30289308241207,
-        45.597233615462315,
-        43.61071649968794,
-        43.122470348017536,
-        46.76087701561043,
-        0.9915593888202096,
-        43.301652472823534,
-        43.35874933591963,
-        0.9940066207204965,
-        42.186091123827985,
-        45.37749985977852,
-        0.9738097357420213,
-        46.47531110944141,
-        0.9911618676375942,
-        43.561154900046205,
-        42.50481546978642,
-        36.28178246877416,
-        44.229193258120816,
-        43.274122438133034,
-        43.16603619055846,
-        46.24123104179791,
-        0.9907652867200517,
-        44.808052346983644,
-        42.157257924432415,
-        30.810167635761594,
-        44.5009455404432,
-        44.803133707609575,
-        46.717718944658586,
-        45.328295623099564,
-        0.9903649151763216,
-        45.98765051561304,
-        43.15949033247262,
-        0.9938810855133485,
-        42.5272021864534,
-        46.202556875553654,
-        37.69680010665373,
-        13.506488443568907,
-        47.084518208092895,
-        45.34409129030842,
-        45.528670127709155,
-        1.0839758382565585,
-        45.77369572816552,
-        40.36600389536794,
-        46.346373598961115,
-        47.59928731210073,
-        45.213230445194775,
-        46.97741000418462,
-        43.73589527028813,
-        38.21138599701667,
-        39.80440406603509,
-        47.546574744238036,
-        46.363044750837105,
-        45.73935328577624,
-        22.79542790283351,
-        1.0852955230764447,
-        46.31190530756646,
-        10.103645571001175,
-        20.743583307847267,
-        34.08924086156784,
-        40.34233471572178,
-        1.0825832325439408,
-        42.93380762165118,
-        46.538540446937695,
-        40.56431787179345,
-        1.0837596134259624,
-        35.02268200701654,
-        47.136990718638934,
-        38.591258432063235,
-        47.93266376947172,
-        40.53416662878643,
-        46.663334136659614,
-        1.0714520955139675,
-        27.88935756664922,
-        45.48047962233704,
-        1.0758750615408978,
-        1.0683190801502396,
-        46.009876361978876,
-        46.59268594380503,
-        46.02812612004097,
-        46.372356575684854,
-        22.894765755636868,
-        45.64436406976758,
-        46.20773355624579,
-        42.364426646383905,
-        1.0822510357556412,
-        44.863056156314066,
-        46.46090797778492,
-        1.0710544669423023,
-        1.083596675232654,
-        46.253226306136575,
-        1.075461579555405,
-        46.46757181265049,
-        1.081777244820761,
-        1.079157130525964,
-        47.44728077576711,
-        44.18890905454099,
-        25.69445080780143,
-        41.61341063520841,
-        1.0749834632245117,
-        45.18278804232428,
-        1.0813046939407982,
-        45.584290798191994,
-        1.0851558601194167,
-        1.0706298125469418,
-        27.277652622917802,
-        3.13795203228774,
-        46.596243996630385,
-        1.0680343711445561,
-        1.0808489429820316,
-        44.07771833504717,
-        1.0782837622370247,
-        44.620236842054005,
-        33.66037405692795,
-        42.88981761147569,
-        1.0745719383443746,
-        1.067541523615096,
-        43.3531928586852,
-        46.45260807995745,
-        46.301433990064965,
-        45.45037480313856,
-        42.01190688214572,
-        43.97592120992246,
-        44.22612202356458,
-        46.93790632881387,
-        43.35324044647867,
-        46.24983553374027,
-        1.0779013969854039,
-        45.68642573969881,
-        40.71576971597602,
-        43.609256041900395,
-        44.75345611987869,
-        46.683440264062696,
-        6.250364298356673,
-        46.58797465847453,
-        1.0773923535890582,
-        43.82763570204923,
-        41.62940460437239,
-        42.91661388574536,
-        46.901610347450095,
-        46.61677212391794,
-        1.080583826854443,
-        34.07713605907777,
-        46.92641126499492,
-        45.79075334582258,
-        40.14409222341034,
-        45.361779654878845,
-        46.88204342817273,
-        46.35566639777504,
-        46.36704829301128,
-        1.079068056447631,
-        46.774512434519465,
-        1.0704507990204184,
-        1.0837001046492374,
-        44.56501843026455,
-        45.92497594226974,
-        46.819599375484145,
-        1.0801577199815187,
-        46.01182819769449,
-        1.0770346495733834,
-        46.950613182781744,
-        30.797706097998343,
-        46.18180484355316,
-        46.16072338065117,
-        1.1133090433838153,
-        1.1264329475750274,
-        1.1236172122377037,
-        47.045544454610436,
-        46.77875324298633,
-        28.03992244253687,
-        45.334641615839494,
-        8.780689100623139,
-        20.7913981632672,
-        32.723036948097274,
-        45.13282209264667,
-        46.65435200771115,
-        45.96287965580367,
-        9.076296968757461,
-        45.4816339150996,
-        46.902872519542036,
-        46.16846796984993,
-        45.756891597403175,
-        44.88315382035088,
-        46.23903054578556,
-        45.83324366902273,
-        17.750809391531607,
-        45.20000225981293,
-        47.302482301226895,
-        45.60218665990497,
-        36.97764728135097,
-        46.59609042040382,
-        46.604767462324304,
-        45.96159537616419,
-        22.37221435902452,
-        43.859502782475616,
-        46.5164446015921,
-        46.29329085467359,
-        1.1262112315718147,
-        46.308551190848824,
-        46.12319048896243,
-        43.60305812792925,
-        0.9422659923955576,
-        45.850627271010616,
-        45.017760412103506,
-        46.45017372234843,
-        46.681005137311296,
-        1.1235052275623567,
-        45.024655731975905,
-        42.551907139236725,
-        0.9419457570631012,
-        41.1118024425248,
-        45.63421048620437,
-        46.022116096626675,
-        1.1258383546403372,
-        47.1081443735114,
-        47.030126605956774,
-        42.86500455064436,
-        37.358353939700315,
-        45.34461986882157,
-        46.86806884248587,
-        46.417501701989885,
-        46.351389315230215,
-        46.78447423742242,
-        43.74686698408526,
-        1.116867665232356,
-        0.9417093885501255,
-        1.1193255628248941,
-        46.36628759364972,
-        47.0182927090698,
-        44.33757352470002,
-        7.691634088129115,
-        1.1283438070497074,
-        43.879143747221455,
-        0.9414915905260655,
-        1.1187592356622462,
-        1.1221505116978934,
-        46.07747894106487,
-        46.579798906537704,
-        45.766896552621894,
-        46.65247758283254,
-        43.302159908237364,
-        37.720159108605536,
-        1.1182282725285237,
-        46.39182837285494,
-        44.636636353923784,
-        43.44450203063323,
-        1.1233649178804157,
-        45.04855028838785,
-        1.1165108506849695,
-        29.25784442036365,
-        44.92016113045485,
-        1.1217307674387187,
-        46.08594914883392,
-        1.1256588113160433,
-        44.33658350966423,
-        1.1279641443945907,
-        46.995953225218045,
-        43.09174152350243,
-        45.522175701238005,
-        44.54660682798267,
-        46.26002914896281,
-        45.121721334753246,
-        45.99661519970516,
-        46.999367551883665,
-        1.1162274151428622,
-        34.79092708982097,
-        45.466303894602824,
-        1.1214388358967042,
-        46.3611527229414,
-        1.1253775196067384,
-        1.1231558495643674,
-        45.46781022594765,
-        46.83967784020296,
-        35.37244717495285,
-        1.1180685191822184,
-        47.0281597759591,
-        45.004932496628875,
-        44.35708507257986,
-        46.65855899768837,
-        5.505111079406215,
-        1.115802761131929,
-        35.602590093008914,
-        44.671751586624886,
-        46.281278781026465,
-        46.65874233841448,
-        47.449917573209895,
-        47.11754288927177,
-        46.84313387306054,
-        1.1152851890752418,
-        26.693730551391678,
-        45.574691537692864,
-        47.110350441661474,
-        46.950895044828556,
-        47.10814947984309,
-        42.35670263948847,
-        43.399091167413815,
-        45.65945467138436,
-        10.323879128717438,
-        17.406756102821927,
-        46.70765041608834,
-        46.265154949804675,
-        46.966387230240066,
-        46.58181691440536,
-        1.1794390054814614,
-        40.240832270343546,
-        39.59688963721167,
-        1.169177901708881,
-        1.176889456593387,
-        46.512318262726104,
-        16.255791986842784,
-        46.90191826875892,
-        38.002332039368945,
-        1.1673839996531623,
-        32.855434627015846,
-        43.339268319257165,
-        46.75273409704357,
-        46.82224515218503,
-        46.7787448289983,
-        46.08633464118119,
-        1.1789416201176985,
-        45.01880600815589,
-        17.692981429746695,
-        43.82069805510859,
-        42.693302457425894,
-        40.895519742462156,
-        43.141099312595934,
-        48.08036522096514,
-        1.178390117026328,
-        45.95511642215028,
-        35.29568405980472,
-        1.1687957641452225,
-        1.1765143734981645,
-        46.688387154545254,
-        47.06125638807941,
-        45.346066735128574,
-        1.1777709765320192,
-        1.166989666506321,
-        0.9847523589742398,
-        18.562855771239047,
-        47.9065264813057,
-        46.73354514650198,
-        1.1735046304883543,
-        46.412712735423334,
-        45.16100408019957,
-        43.83022094061403,
-        35.89794593782671,
-        44.97192473982221,
-        46.7633180339843,
-        44.329869977212624,
-        47.38342947643397,
-        46.79402738420473,
-        47.634269098703626,
-        44.0213863595159,
-        0.9845269249937244,
-        45.78778499348287,
-        43.90149865817902,
-        45.65368969409286,
-        47.746456721033944,
-        47.21697228426952,
-        47.01924612843149,
-        46.3245200194134,
-        0.9842560530393194,
-        45.26992712182612,
-        46.89243421872701,
-        3.4924828727877877,
-        45.25207572636316,
-        47.25700297914972,
-        46.94730150195301,
-        39.12367514310055,
-        42.117856976344655,
-        44.28179459170351,
-        46.596840500912684,
-        45.392754933120926,
-        1.1731165363524663,
-        1.1755941425503302,
-        46.46126582671268,
-        45.79994582850055,
-        31.36362072652773,
-        43.50384100878153,
-        45.440038476775335,
-        1.1661505662188223,
-        46.52744939333318,
-        45.250414658311975,
-        46.53386354717518,
-        45.796239735104564,
-        0.9841302985201961,
-        46.27883497779145,
-        47.83598353847002,
-        46.607837943658275,
-        1.1726681962992465,
-        1.1751504766334446,
-        46.84845290565303,
-        46.07497571222637,
-        33.33732005606778,
-        45.813985387630716,
-        45.57964157112892,
-        46.41818933014048,
-        1.1721397028860254,
-        45.89252926130944,
-        47.09569465450331,
-        47.250364539349285,
-        35.22784278442342,
-        1.1688030911620526,
-        46.42186257421796,
-        46.25658899517002,
-        1.171409947579052,
-        45.16137403712752,
-        47.22442045049697,
-        44.82261712339744,
-        32.494327996097915,
-        44.219079390101115,
-        46.87735465561079,
-        44.699203955991905,
-        45.12568915598884,
-        1.1747532937483116,
-        47.069832959511444,
-        1.1670956785442357,
-        41.217948435045656,
-        44.93033926516496,
-        1.1766349885441727,
-        35.47522021954888,
-        46.21124702140885,
-        46.24628779612773,
-        34.53125955420697,
-        46.66578037331865,
-        43.65856477535035,
-        45.03361057951491,
-        46.76526122602155,
-        10.182019712559228,
-        45.71366318720834,
-        9.833945628376052,
-        9.322117004081543,
-        46.537564499785105,
-        31.262138808373493,
-        37.90592059294092,
-        46.820091937863225,
-        10.139423148881114,
-        46.75580347295349,
-        46.89455728317566,
-        39.52390472502032,
-        42.643467900988064,
-        38.90725083946543,
-        9.086630150053459,
-        8.937192123351853,
-        40.9872575801166,
-        46.394128489242924,
-        41.193529101734704,
-        47.34329154675404,
-        10.054610354639179,
-        43.31828144588645,
-        44.553079069624026,
-        46.98279134065351,
-        46.830147489351724,
-        45.31329233494219,
-        45.552850223950976,
-        9.295212965663417,
-        10.01436272470524,
-        43.57022598341257,
-        45.70609566213184,
-        43.449062338174066,
-        46.855675373016474,
-        47.68860594538369,
-        47.09689498272573,
-        47.173878516378814,
-        46.069788054621185,
-        38.92002107306488,
-        46.38712908030891,
-        47.104897416242906,
-        46.938337511897245,
-        45.36212980855197,
-        9.7037632831636,
-        9.265430506589102,
-        46.11721659871563,
-        38.06187391881914,
-        43.25827348162763,
-        46.84719251692419,
-        47.03682707869591,
-        9.90500846057903,
-        45.68739012850455,
-        43.47148156475432,
-        45.23323967788647,
-        39.81125388088527,
-        45.95084232488125,
-        8.919454342379801,
-        8.706571515609426,
-        45.29003523159025,
-        46.867399234540684,
-        45.35240769107086,
-        44.80265358061401,
-        41.83510960528982,
-        43.92616077285124,
-        44.61292075723489,
-        46.86625528407582,
-        47.230904823696534,
-        9.643361950798496,
-        9.236779459262468,
-        46.27993094745158,
-        43.29062809284174,
-        46.53130368901898,
-        8.891092687715933,
-        45.323215643957305,
-        46.38559644193777,
-        46.8553797027437,
-        45.16725651833185,
-        46.26177304715086,
-        43.16649621953115,
-        19.53072875578119,
-        44.16107832748164,
-        44.46643011473998,
-        45.302511702487166,
-        47.59950805589659,
-        9.206283803180765,
-        46.31521045156664,
-        42.932315734513345,
-        9.081962094633843,
-        8.862645496755041,
-        8.681026899042758,
-        47.175946890403075,
-        9.613647025719098,
-        45.37459772842735,
-        46.657937572561956,
-        40.090063197986055,
-        43.91176191056239,
-        47.1764939819939,
-        44.932347492473085,
-        46.951971869749755,
-        9.588107858966847,
-        46.890536209011636,
-        47.457220061858926,
-        41.820791051617206,
-        9.051934235829219,
-        45.46750284471863,
-        47.1114848526844,
-        46.90614671206355,
-        46.81408948407702,
-        44.76508972637772,
-        44.94143445208981,
-        10.013702243637548,
-        9.016326405341099,
-        8.836765675846252,
-        46.724030690708,
-        45.670931647965055,
-        45.52105012345985,
-        46.760404038674345,
-        46.879394746618935,
-        44.17372013338399,
-        45.75158023561404,
-        8.805217872024683,
-        45.797390838433785,
-        13.147893146580197,
-        10.47047709122617,
-        46.61575812332005,
-        46.51823693220529,
-        4.823033237525791,
-        46.77438522864306,
-        12.978009554740229,
-        38.60487947846694,
-        42.776667803234396,
-        46.400158258735026,
-        47.945284694706544,
-        46.56814403610221,
-        4.817274157491479,
-        46.62284523101857,
-        43.12368820615556,
-        41.32670008561977,
-        47.18041683967238,
-        43.946314235571926,
-        44.21062282398479,
-        46.19942835901387,
-        43.058732279332816,
-        45.38189559700182,
-        12.884302510247224,
-        41.31993708388949,
-        46.47169213829526,
-        47.19006572402318,
-        47.14982705362978,
-        47.06368907184152,
-        4.812880414029111,
-        11.16220592067454,
-        46.574241250493166,
-        46.97994816848278,
-        47.45816665639938,
-        46.13083135931701,
-        44.32000975084153,
-        43.41804159092183,
-        42.66169852490167,
-        45.48613569289166,
-        44.33345445574926,
-        43.452008302705025,
-        46.81171828117368,
-        43.10993692872848,
-        45.994793877105536,
-        46.800586622051604,
-        44.27154316655175,
-        46.105917327794614,
-        47.46844284412024,
-        46.26483577817879,
-        47.53682651754337,
-        44.570703276937955,
-        13.903655242145248,
-        11.480956559418479,
-        39.336500908555834,
-        45.90660459732642,
-        46.77917515765938,
-        45.088381020490885,
-        46.506580602768324,
-        10.416775312398924,
-        46.58444309156844,
-        11.387487180031048,
-        40.66527760299146,
-        43.83362837067986,
-        12.535722984692502,
-        10.862075986088263,
-        45.57849071079437,
-        44.54752207894966,
-        47.368339209936586,
-        44.99292457355705,
-        40.53083756344339,
-        11.0636299214144,
-        47.688667053142176,
-        46.49150277169404,
-        45.74006902822907,
-        10.33525884882965,
-        47.48557960393818,
-        11.308966508889716,
-        43.29259854243531,
-        46.1099584752184,
-        12.17957601526656,
-        45.17415787692287,
-        47.42069363597441,
-        46.61857073840612,
-        47.2421945434337,
-        45.43588217737557,
-        40.87274833234901,
-        46.70759606653805,
-        36.65554403597885,
-        47.00974843039727,
-        44.27238095134427,
-        10.215116571612004,
-        13.7852700376187,
-        46.056843647274086,
-        40.6532114020977,
-        44.73992298080998,
-        45.68916428641405,
-        47.31026005200245,
-        46.82535713731543,
-        10.130547297609347,
-        47.03536361799409,
-        46.991892284267614,
-        40.158116078863046,
-        46.709887162762875,
-        46.67477141304538,
-        46.52127067854677,
-        46.8876604645323,
-        10.042145383707755,
-        47.028109894652104,
-        45.7372913308103,
-        43.35504560755716,
-        46.94810107337359,
-        11.8541419498795,
-        46.48396692070885,
-        46.650791251635994,
-        45.251645228092976,
-        46.90500963017914,
-        47.44769079351513,
-        45.17830741847997,
-        10.999409433497265,
-        46.47750683850478,
-        46.775120397902185,
-        47.814786925390884,
-        9.948141267257297,
-        13.587316761063226,
-        46.55485731583328,
-        42.77962873201528,
-        45.79657353014755,
-        46.78648032853886,
-        6.092950585496579,
-        16.427217699690395,
-        6.041669306781378,
-        33.44834000640586,
-        45.71021173581392,
-        40.44649791159415,
-        44.41704966518361,
-        45.16867811008679,
-        46.553484065254395,
-        11.951659518508801,
-        40.964520355583325,
-        17.222473173678548,
-        15.810785212495478,
-        5.896598504159821,
-        46.15486957962745,
-        6.267247605496281,
-        38.65955739206124,
-        16.334240831872595,
-        40.92114763036668,
-        44.25538155878388,
-        46.79667178943268,
-        5.886210147826818,
-        45.086831193223446,
-        47.3009972481073,
-        47.07801971653764,
-        46.80397795995714,
-        46.806845163101094,
-        43.42411625011456,
-        46.37426980773864,
-        41.17909401763616,
-        46.16226579941339,
-        47.44507636385267,
-        11.930205494257288,
-        16.233747914032552,
-        6.031411752952078,
-        45.92910900092996,
-        47.47110773753601,
-        39.494621036199604,
-        16.734374432604927,
-        47.37802539239185,
-        46.74469194379278,
-        16.087259096423576,
-        46.92051488410033,
-        47.34732444333283,
-        46.40587690730415,
-        5.872780467931287,
-        44.55593583365237,
-        45.7052618242163,
-        6.085826627872682,
-        44.846431805065144,
-        45.41689502907426,
-        45.289189315257374,
-        44.95210230627078,
-        42.99904025714732,
-        46.839026962763846,
-        6.250954782033121,
-        44.8453124032084,
-        45.278261112862296,
-        6.020810288080093,
-        17.182296973833214,
-        46.63633652424215,
-        5.866101016705892,
-        46.160696572751434,
-        46.32038287353405,
-        46.89907461120633,
-        45.95374406526204,
-        46.925975948392896,
-        46.42837166656114,
-        15.78999329881552,
-        44.465193132950446,
-        46.21771478110725,
-        47.314131714710484,
-        6.0756954521719475,
-        47.654756058723834,
-        45.70610138140926,
-        46.42506531228388,
-        46.278376731444745,
-        42.38396099575264,
-        42.30031354989153,
-        6.238343970049818,
-        44.63197875047801,
-        45.842276161134954,
-        47.290515920449934,
-        17.100464476837107,
-        46.03336595920761,
-        42.199011552033475,
-        46.12151306088509,
-        6.22230433569469,
-        42.38409981463419,
-        16.065182030558717,
-        47.159068653554634,
-        47.325440650358736,
-        47.304702743784624,
-        41.95305830151048,
-        46.32090634094613,
-        6.205841232502227,
-        45.21525043209204,
-        46.68630635575757,
-        6.014917714514858,
-        16.99660741175496,
-        46.04707312586917,
-        42.19662106675615,
-        45.454018018858854,
-        47.15352407193948,
-        46.93603762078255,
-        46.83396897378934,
-        47.15013333226566,
-        46.77541231643884,
-        47.24502443147304,
-        42.759813321329425,
-        47.001201569266215,
-        6.192232905623395,
-        47.13098385966453,
-        47.01234120088298,
-        46.79153288884898,
-        46.373378014241005,
-        15.754365078113269,
-        5.8675558701311985,
-        45.42074545020536,
-        6.176488223442546,
-        47.27337589918247,
-        46.90578973015155,
-        47.16448140788897,
-        47.56000914081759,
-        46.62586586855627,
-        41.982557140496446,
-        16.770559660054925,
-        47.00638722437522
+        98.47864949895008,
+        63.93792629897559,
+        166.49088904974073,
+        148.10611103663214,
+        136.93608898138933,
+        153.87586308063382,
+        90.56559317052603,
+        128.5291550251628,
+        162.07670305023993,
+        4.196475118529487,
+        147.98743190294235,
+        149.72190006929446,
+        1.1777631788022311,
+        133.74963259040626,
+        150.11088322452974,
+        51.863180020864455,
+        4.139051494405947,
+        79.2557164919149,
+        1.6071996867452278,
+        70.01915930069646,
+        137.26891673137558,
+        1.0402098481802287,
+        1.8594022431966566,
+        2.039486534010741,
+        146.2938256177694,
+        4.149796716964247,
+        46.34667799086249,
+        151.47361823216394,
+        137.54739677623354,
+        51.120748066850325,
+        136.84512611150544,
+        32.11962977236786,
+        157.56752902839474,
+        47.12119148820226,
+        145.7314367353006,
+        42.20270560372231,
+        1.0426098595499007,
+        3.5892682955617827,
+        76.57100636536596,
+        1.612496526198,
+        2.6881979572654413,
+        111.88402006134972,
+        45.58338247702666,
+        111.4111889571842,
+        132.16301113659247,
+        161.64295403385984,
+        2.664705818704618,
+        157.1638935590632,
+        25.286871922093454,
+        37.4310109209181,
+        153.65911351957632,
+        170.7256762539797,
+        1.042128189044151,
+        3.5869040413041917,
+        83.30261586197105,
+        90.55970202339806,
+        132.9415846015795,
+        95.80834182322752,
+        112.4369142570399,
+        130.7156977512895,
+        90.98968148626129,
+        0.9371270459059615,
+        159.09279181195387,
+        162.9970081970886,
+        2.6700708026356366,
+        1.8557378891084773,
+        156.12103246797463,
+        1.3653778104766194,
+        143.46571269908148,
+        130.6346250925551,
+        62.46023289115923,
+        1.6116060776090406,
+        139.8111163213305,
+        34.86018737886305,
+        146.06865198079345,
+        133.96801334258495,
+        1.0417626130871034,
+        97.53781169320182,
+        2.0478975910586503,
+        151.90776052541932,
+        126.40035137658552,
+        44.78808603802679,
+        163.9803901721219,
+        152.78287546210825,
+        154.77428093351637,
+        145.74430748169019,
+        163.03421864587594,
+        146.28703545539014,
+        82.55934081518444,
+        73.53123347847824,
+        87.20650201489909,
+        79.6237289961617,
+        146.76012425672718,
+        162.46398331888344,
+        2.046000130560097,
+        104.11707807083185,
+        142.7981951169222,
+        45.781111784259096,
+        164.13498801895528,
+        93.34392878508068,
+        127.09756182184553,
+        0.9369885821746623,
+        2.0440080852076448,
+        1.6107470231739485,
+        149.4484511068655,
+        87.5539915318001,
+        1.3670348174101508,
+        1.1796264961520015,
+        142.53546263417087,
+        150.2065859393766,
+        145.65883203776818,
+        142.2125733485302,
+        96.99016545580078,
+        57.32416740237564,
+        106.63530054957698,
+        159.19142654590536,
+        1.0415326032228118,
+        98.71719677010607,
+        106.73175053259962,
+        1.6100826372227688,
+        146.64805335844048,
+        72.59518577946031,
+        142.34132184480842,
+        85.94240702745647,
+        126.17687901514078,
+        135.7696701691411,
+        29.62308081982307,
+        148.2421144346034,
+        130.36261145275355,
+        53.13931721337651,
+        60.51160243931191,
+        141.54695622051943,
+        73.11803837069677,
+        137.21251141324606,
+        148.63844490308944,
+        62.8404582738594,
+        45.401831957608,
+        0.9643006239654945,
+        147.2298500624911,
+        151.91506054646217,
+        140.48716103219812,
+        0.9577624967779577,
+        160.06459889404132,
+        155.2359539910114,
+        126.59645077786885,
+        15.69438649059929,
+        152.80784197867072,
+        23.527136960081226,
+        0.9561607658842026,
+        135.304826702121,
+        142.47511264536794,
+        149.8501903787043,
+        151.43523022097875,
+        0.9640793717349251,
+        0.9631519875374979,
+        145.2950579689095,
+        104.16937732598902,
+        131.1708059930721,
+        144.18743838648734,
+        143.6919419808989,
+        145.5428193502994,
+        0.9638106812588461,
+        0.9627615573404509,
+        116.54193238808332,
+        54.308902955274014,
+        45.33558667751163,
+        159.57290743060722,
+        156.60366994005867,
+        142.03263718363198,
+        40.71403223415776,
+        155.40510615972553,
+        58.6681100653237,
+        137.0437576533739,
+        80.42300690375168,
+        58.033083103031665,
+        0.9693871919683402,
+        145.73573001557583,
+        60.44621412824422,
+        54.994288450325136,
+        88.73692291143061,
+        0.9559459748869998,
+        56.08954858644736,
+        56.31747770886735,
+        142.34693049846092,
+        132.51002333480037,
+        108.96587128971876,
+        57.39669142091791,
+        85.1254544103699,
+        122.1342568773111,
+        170.14800453897098,
+        0.9667745869936778,
+        164.77118206030752,
+        77.67607540068808,
+        0.9637172808805204,
+        159.27278631745818,
+        93.32941075871183,
+        114.31154051585622,
+        0.9577271441482065,
+        0.9663851340406727,
+        69.18116638176265,
+        145.49566595839337,
+        39.99458755398874,
+        151.72058228459386,
+        71.71902007184255,
+        0.955684788125637,
+        70.8845735459765,
+        0.9659986810119839,
+        26.22947505868186,
+        149.5122587573231,
+        62.37088691999424,
+        0.9626226162613168,
+        144.16390862207493,
+        143.18707878361667,
+        148.34680655358588,
+        0.9655981786202157,
+        128.6357514760558,
+        0.972457638109508,
+        47.97113131021637,
+        0.962257594040168,
+        135.91488529586792,
+        0.9555101570399641,
+        139.87244415060783,
+        161.80374363862717,
+        102.03749537949356,
+        119.90228156989667,
+        95.01508726085196,
+        0.9618747782794568,
+        97.04528669323962,
+        124.83482655795,
+        0.9575074351185681,
+        97.4749088017089,
+        143.04337002379702,
+        0.9720616869548507,
+        88.4343283770829,
+        0.9616266920922193,
+        104.03159874923712,
+        102.89124420706305,
+        140.2496100327507,
+        143.1710058572335,
+        101.42975069052237,
+        128.03336431254732,
+        85.69336920713639,
+        0.9613543134449882,
+        104.07697069101184,
+        100.02889226751559,
+        106.63283752921622,
+        144.57311516379912,
+        126.07240879815421,
+        161.55730431091774,
+        73.12112420438781,
+        0.9589217273481213,
+        142.0323058738417,
+        122.36148204858885,
+        0.9572538602096321,
+        112.98246752660035,
+        142.34355181617389,
+        41.04230698700827,
+        8.473685991981666,
+        170.80637904469666,
+        142.97081601431356,
+        140.00938953689527,
+        1.0308124281925075,
+        163.68673254202156,
+        43.76708184183388,
+        152.25998257998737,
+        111.67117755812934,
+        145.80673033340165,
+        160.967274593742,
+        121.82423347589321,
+        151.58970194946951,
+        43.836717431814456,
+        168.33474851388928,
+        152.8971313956712,
+        72.9024488252911,
+        21.820779024213074,
+        1.0392675847166184,
+        147.87020150991353,
+        14.897143028689484,
+        19.847221148151032,
+        32.431828340180246,
+        57.7813822991841,
+        1.0334876773950952,
+        94.25591710682407,
+        151.42229388821934,
+        62.73982551986958,
+        1.0305004930196628,
+        33.431851137208405,
+        162.37672318207316,
+        50.321107844780045,
+        120.0631996858246,
+        45.868384609266045,
+        150.25509288811767,
+        1.03641668355906,
+        82.19687660990678,
+        158.74432925111145,
+        1.041876067399849,
+        1.0459490020450795,
+        74.46636703262733,
+        159.72092018884473,
+        145.89909226306747,
+        151.4623812014693,
+        53.96440008638893,
+        159.793887362778,
+        148.37554042172758,
+        83.3128358383083,
+        1.033330707971675,
+        134.17516572064534,
+        146.71192985844118,
+        1.0352015128775223,
+        1.030228349427348,
+        173.4020929881413,
+        1.0414756431813357,
+        157.44806749626466,
+        1.0330400451866075,
+        1.0430419707188734,
+        167.82243267657728,
+        143.8312255273241,
+        68.13449792020043,
+        74.35987547428464,
+        1.0410410061956523,
+        144.46694632543532,
+        1.0327651323294085,
+        150.93003222189313,
+        1.0391803120976406,
+        1.0348231697568464,
+        80.60319434281541,
+        3.1207628480728475,
+        151.16210456830606,
+        1.044348655121621,
+        1.0324784232146003,
+        99.42447225407219,
+        1.038776111100077,
+        132.7893754958314,
+        146.8726662885585,
+        91.5964670484325,
+        1.0406970130016908,
+        1.0437330582244273,
+        42.28479249749239,
+        162.83839126288393,
+        151.86715746595317,
+        140.5094808302986,
+        170.2080960063118,
+        131.07684807335298,
+        88.96862061056908,
+        163.9922734476757,
+        44.213460221990154,
+        157.8010866400773,
+        1.0382665374856965,
+        139.57673454433854,
+        163.7758432408245,
+        102.99718171708128,
+        107.60774917922078,
+        159.16551335735969,
+        4.119717517454783,
+        160.5803771988876,
+        1.0378430568380714,
+        115.44357851711793,
+        167.4238211695712,
+        103.79633528746076,
+        154.03506418556444,
+        159.03692094687025,
+        1.032427282609682,
+        32.52187142118156,
+        158.57750457420016,
+        141.67055142208721,
+        160.71458938698333,
+        157.24106314480454,
+        157.40833384009724,
+        150.60022387354616,
+        80.91896448664748,
+        1.0430666391532655,
+        160.36671183081978,
+        1.0347878859497883,
+        1.030293958907628,
+        147.50533105226975,
+        152.4875796332852,
+        160.31618334728296,
+        1.0321960030040243,
+        156.27786873980907,
+        1.0375321120324796,
+        160.4885833961135,
+        111.93639192506156,
+        172.24078944530834,
+        145.3287404427809,
+        1.0880735082543522,
+        0.7878037099331565,
+        1.0864480413552253,
+        158.40272521901554,
+        155.28074693629694,
+        87.44836891077435,
+        155.54752700738993,
+        8.411714256180034,
+        19.862348977650086,
+        18.35501539895094,
+        163.43115890247273,
+        157.8836387689617,
+        143.68115882020365,
+        13.66284888141665,
+        160.7292101444063,
+        155.01427847930626,
+        150.31432418581997,
+        60.81928120084204,
+        145.3926688034953,
+        145.30123372502598,
+        144.98393507215505,
+        35.18970147025731,
+        153.82777107784506,
+        164.23228082777166,
+        145.88278452124027,
+        20.46954502286418,
+        162.0360370063431,
+        150.43884956663888,
+        142.41966677764808,
+        53.07266306010992,
+        93.50532435009316,
+        150.1523142285131,
+        152.33361454488718,
+        0.787209685332213,
+        159.9704569183677,
+        147.66926829001207,
+        116.31853611522087,
+        1.0774618364125428,
+        164.22843982362895,
+        103.98183305676696,
+        152.52952151222078,
+        90.29170862480086,
+        1.0862563048060565,
+        118.53710658997939,
+        90.19968385647951,
+        1.0770089089852286,
+        59.61890934626195,
+        134.6160499563656,
+        147.6477708991394,
+        0.7870687303401608,
+        171.47874197919785,
+        165.99226887272076,
+        83.5080960308232,
+        151.55871514895225,
+        154.9605789451006,
+        154.1866343413245,
+        152.69380076313175,
+        78.46281024467942,
+        165.86076250975873,
+        74.6681179766703,
+        1.0816751050475706,
+        1.0766059511099162,
+        1.091025249207128,
+        151.61539901543878,
+        165.44997737983917,
+        41.75139614518547,
+        7.388178711598297,
+        1.0848156120039962,
+        121.93333712957133,
+        1.0761843006794773,
+        1.0905643992997778,
+        1.075801598924969,
+        151.57738041471748,
+        87.38815331117043,
+        154.57766374016802,
+        153.3353461131615,
+        81.63500323812801,
+        153.88446167160095,
+        1.0900521500553328,
+        151.65017721794743,
+        118.01864188919838,
+        40.91238161739305,
+        1.0860502574663193,
+        103.72384951664927,
+        1.081356861209966,
+        97.70962808524236,
+        153.30715221364136,
+        1.0754011583086598,
+        149.80888083526256,
+        0.7870161596702333,
+        95.11588780527678,
+        1.0824954483404,
+        159.0909827809553,
+        176.4607736857684,
+        160.28483143240214,
+        108.14616986068252,
+        150.64495962435973,
+        49.52814184554448,
+        152.62988882612356,
+        161.40766773375927,
+        1.0809227984149974,
+        150.0601857860385,
+        156.59538854909297,
+        1.072689949598873,
+        152.81205676706514,
+        0.7868728895290079,
+        1.0857058881477388,
+        143.4694111503961,
+        159.8022996153893,
+        144.9300712596306,
+        1.089757442067835,
+        160.11340438331118,
+        132.79626776787333,
+        50.38448421210805,
+        162.42137561579725,
+        5.284417747700096,
+        1.0805116052247719,
+        145.73004732672527,
+        152.59775665509528,
+        151.63963715309214,
+        155.59850627759238,
+        104.41906641764095,
+        169.89843638971865,
+        158.37348320912855,
+        1.0800687750785642,
+        149.5543247935483,
+        156.60712632191078,
+        159.6236209903005,
+        163.09782416725415,
+        98.6328505039743,
+        53.85030009718123,
+        61.00364034342645,
+        142.05505100830447,
+        16.614192215593924,
+        16.582992843952567,
+        154.47389623241062,
+        150.9101058615698,
+        90.42581449278116,
+        159.53144787295545,
+        1.1253578624639393,
+        38.131573465314304,
+        163.695564516746,
+        1.1316048014866884,
+        1.1159054012388119,
+        152.5411314388352,
+        111.46983099035936,
+        168.09092507016115,
+        36.13058934697122,
+        1.1197910040154087,
+        142.05200673526159,
+        78.09074458708291,
+        157.63502242964265,
+        162.03218881710688,
+        80.0426703374817,
+        164.26384362727924,
+        1.1222030060702506,
+        123.66591496581279,
+        35.97653651285592,
+        112.29012034978103,
+        62.69199102131731,
+        54.806250360805244,
+        25.5070616004963,
+        187.35211092519995,
+        1.1217003700976045,
+        145.32823111763997,
+        145.9166945337544,
+        1.1301150192515073,
+        1.1155615329029929,
+        154.1440872758632,
+        88.5586247200791,
+        161.60021419086345,
+        1.121175594981433,
+        1.1194211460505468,
+        1.1184405197027008,
+        17.60883897305572,
+        174.5134372600641,
+        160.45245655990746,
+        0.8166461657826791,
+        160.30564706046655,
+        75.44218827386376,
+        108.54547521267394,
+        150.49806131791814,
+        153.04150189313873,
+        150.40965861420275,
+        125.63958433236749,
+        103.12983995128599,
+        164.17811633308784,
+        175.52459662743908,
+        121.09400696724566,
+        1.1180201884652679,
+        166.27365155489332,
+        76.42072368500718,
+        146.90227613796094,
+        110.70803654586257,
+        171.79379505267624,
+        158.67043375351244,
+        147.76280504628218,
+        1.1175125336867027,
+        156.89279233182117,
+        158.0652757498143,
+        3.343340016597665,
+        49.779892185016756,
+        173.36352621939335,
+        162.4424006508065,
+        49.49838297370054,
+        173.86161362836785,
+        128.03796900006384,
+        155.68412076198788,
+        137.87250806830016,
+        0.8165665367853991,
+        1.1298869482124425,
+        159.26492424008396,
+        144.56503533715272,
+        120.55988523349636,
+        103.3722869693168,
+        93.5099865200851,
+        1.1209786631771586,
+        77.46613714395933,
+        153.76092950699294,
+        154.6841596167678,
+        146.17966014780984,
+        1.1171782471429414,
+        172.55763339822,
+        174.99117233418923,
+        157.46750414970307,
+        0.816424346577868,
+        1.1274076620999394,
+        157.20421311127953,
+        148.14748951821153,
+        149.14697533706817,
+        158.95389608842163,
+        107.97531407241593,
+        151.43640801793904,
+        0.8162494126902972,
+        157.001545737823,
+        163.80848036600747,
+        158.89222886851297,
+        147.3506488140666,
+        1.1133445391411512,
+        153.68284200756125,
+        151.8834177926471,
+        0.8160708323289537,
+        136.43010052273473,
+        162.61423354524993,
+        133.02570532111102,
+        137.86961562609895,
+        133.04901735700332,
+        154.9473181767413,
+        123.93507737689346,
+        50.83204611520686,
+        1.1270195451857552,
+        161.44093109510388,
+        1.1195708009057284,
+        169.64321510449827,
+        129.6089117511605,
+        1.115946234318508,
+        60.34621183821726,
+        101.29881161208688,
+        160.90062346193574,
+        68.21783931047266,
+        154.41899008326143,
+        174.07515811573973,
+        159.677356250512,
+        159.67728671666873,
+        9.799978913114145,
+        94.27732771999344,
+        9.60214441506233,
+        9.392617132404062,
+        155.0463449410919,
+        71.59183194783785,
+        87.06866691125934,
+        157.13349078706932,
+        9.759436169606595,
+        110.44611293008246,
+        171.1626230380253,
+        89.85437363374635,
+        107.09248087440588,
+        126.74466225447065,
+        10.009602057141537,
+        9.177527712733529,
+        99.62101604875475,
+        102.08957950312852,
+        99.71118980213345,
+        175.89684251359242,
+        10.182586030301673,
+        171.66004511817064,
+        148.24171173832124,
+        164.5397331583309,
+        158.71440804719356,
+        86.55832242496149,
+        148.610396831239,
+        9.368509685917438,
+        10.136730874821687,
+        173.75231796226313,
+        168.18072479771067,
+        125.24195815296933,
+        151.26149869648452,
+        130.6197551882794,
+        174.23395009631983,
+        170.65779238484487,
+        148.1296912550562,
+        131.11524857886738,
+        177.99920893337523,
+        167.5808938510404,
+        158.60603057794222,
+        93.6097533900039,
+        9.587874811966838,
+        9.33150536695352,
+        141.2149869829261,
+        117.88939818622781,
+        133.45305575288236,
+        156.7555665933833,
+        166.2992810974147,
+        8.762060933047495,
+        147.60747975090285,
+        125.39702986854361,
+        126.29551477783566,
+        133.3684883476696,
+        169.84463465109542,
+        9.160889914093532,
+        9.75005007182584,
+        91.25897804548956,
+        171.15603143396729,
+        137.11852945151446,
+        119.70724002664221,
+        157.24098320319794,
+        144.12095644229885,
+        131.6771710258767,
+        164.00686483698965,
+        120.71707004833677,
+        9.560442320047777,
+        9.299425721987362,
+        147.15785637439873,
+        170.64643820040646,
+        181.465984660646,
+        9.098182272291353,
+        131.1874185050373,
+        100.18931014367688,
+        166.410568062446,
+        135.47929425317378,
+        151.28962080931584,
+        169.34032285811423,
+        24.163402926519016,
+        130.3951109594527,
+        133.85939391500654,
+        91.24306358260182,
+        183.98754016151273,
+        9.265911045247684,
+        147.14244062731618,
+        165.66255588662568,
+        10.016411965833509,
+        9.03577437369573,
+        9.70728564931857,
+        122.8213056543772,
+        9.533743128327513,
+        143.45968503667223,
+        155.32709571771161,
+        141.06113578797667,
+        145.47889938004263,
+        167.35960747366406,
+        138.12559014567552,
+        116.75045269404782,
+        9.4953352412109,
+        170.07468770066882,
+        172.07629747140533,
+        155.39552706715028,
+        9.96413703689447,
+        144.65169143749998,
+        169.142417216155,
+        112.76319305930042,
+        166.30777737368877,
+        123.90774653996388,
+        132.11710295459207,
+        8.76790539542995,
+        9.923343461828647,
+        8.972068632607057,
+        152.30472233633313,
+        99.16466897297458,
+        147.39899220637375,
+        167.5046285318718,
+        158.30798003347417,
+        176.8098098029006,
+        169.2000502496997,
+        8.908205534006084,
+        147.04973272590675,
+        6.01978171115786,
+        12.908947280828421,
+        161.20885865837164,
+        154.1041738397025,
+        12.160112764259807,
+        183.9484777068351,
+        13.885015446203202,
+        103.27604069377547,
+        68.56270954501308,
+        153.39985703870556,
+        188.5641680250544,
+        151.39232245655768,
+        12.050089294787492,
+        183.13047361941102,
+        114.09672566233004,
+        109.88264169611061,
+        118.56400136868983,
+        130.5787804713655,
+        131.9836940557652,
+        139.4770525169641,
+        172.40959805680149,
+        153.8901427211502,
+        13.813000129286806,
+        115.17874112168954,
+        106.36053561017184,
+        174.23315480590185,
+        169.50614560985875,
+        163.7261937236369,
+        11.957456410326769,
+        13.191395790527517,
+        152.6369175652841,
+        164.00689931377138,
+        124.1532871601288,
+        158.7919901602378,
+        126.3012920481913,
+        110.01300143579287,
+        166.51966455859474,
+        161.16730547199728,
+        137.84358628055278,
+        123.59630141121379,
+        109.08989919709578,
+        113.66676604314083,
+        150.33107775824936,
+        155.76683850736808,
+        180.51837524079605,
+        172.49809361722134,
+        171.2412543685433,
+        146.52428847969958,
+        125.58622347928333,
+        132.3599749727434,
+        14.245461215559237,
+        11.847782329285673,
+        127.588030395774,
+        169.03076884237493,
+        160.74766094154035,
+        141.23866796872034,
+        111.11477769019474,
+        12.898248376303878,
+        164.38673745815677,
+        11.791663338710885,
+        148.00296428763687,
+        140.16323874251623,
+        13.719781371654578,
+        14.365561456573998,
+        89.32211257795143,
+        135.76622159161508,
+        175.86032158817434,
+        128.11591032818185,
+        141.79940543502275,
+        13.157166878859636,
+        176.72190145631947,
+        146.35619986228915,
+        98.02869268663022,
+        12.811778712246966,
+        178.01632978541917,
+        11.747222913476566,
+        173.95822172954252,
+        172.47660061508643,
+        13.568556768695913,
+        135.1198744591959,
+        122.01181780569887,
+        165.54722192942938,
+        176.91918611654273,
+        135.48421254380435,
+        152.73279297531656,
+        183.54215600068494,
+        94.73349204436757,
+        165.3454353780521,
+        84.12230571074015,
+        12.73103339619439,
+        14.20676756417383,
+        140.07559949201985,
+        145.7554344839868,
+        148.14304437101455,
+        144.7060493293736,
+        173.19895239158285,
+        107.2396185797313,
+        12.648044488473259,
+        173.48665402770794,
+        161.86284234640354,
+        144.49958539317737,
+        183.33130603616738,
+        149.75316477343017,
+        153.29421953478465,
+        112.95288962968242,
+        12.55136585792316,
+        173.4614521532605,
+        136.4085114015674,
+        173.79337782013562,
+        186.83123762499903,
+        13.449155280150386,
+        155.12272657027916,
+        108.84862656043424,
+        150.6559527232612,
+        161.90374448992205,
+        169.6874597897037,
+        185.13622778245175,
+        13.139280888748093,
+        148.81997444276612,
+        162.826727139871,
+        134.34831771089154,
+        12.478143605322522,
+        14.14151231689335,
+        149.25750191310448,
+        167.44106770036936,
+        170.90279518575983,
+        157.84394143590183,
+        8.296884066877869,
+        7.386407378393029,
+        8.177010477741181,
+        60.00030364994894,
+        137.35670186784466,
+        151.41307554547254,
+        150.53265674110258,
+        141.36182090288565,
+        154.42392832445645,
+        14.407560995301617,
+        113.05280253165802,
+        8.771319013508563,
+        7.756832533799784,
+        7.915167569814742,
+        172.68555416184375,
+        9.246590778625794,
+        112.20973585271739,
+        7.374925625154626,
+        111.78749154901601,
+        119.95753341645725,
+        154.77722687049408,
+        7.888580292543184,
+        149.23559365306315,
+        175.52342653145377,
+        158.75097413261327,
+        114.24446296440473,
+        167.67413927012774,
+        139.92437779140218,
+        153.0991583611961,
+        159.7319334713746,
+        175.77990646480632,
+        175.13373633806003,
+        16.006580912678864,
+        7.364167548538875,
+        8.116426613758023,
+        153.43476931019558,
+        172.67401521610824,
+        137.25039229504623,
+        23.94869767384389,
+        175.71290886984852,
+        160.1562681126053,
+        7.350730708586878,
+        168.18537884347361,
+        177.44645900467552,
+        144.69151322813394,
+        7.870439881886282,
+        158.459275555328,
+        137.04634114797315,
+        8.28967641118504,
+        89.24830243345173,
+        138.72903724038372,
+        144.82472911115988,
+        132.17749274525417,
+        174.5610183503014,
+        187.22444190737485,
+        9.045633413519324,
+        144.6870829429866,
+        94.13484353638168,
+        8.073564944014072,
+        8.763589893125177,
+        144.5449141719037,
+        7.849459359846659,
+        172.45229931306682,
+        148.0354241542905,
+        164.34364023912008,
+        96.62180529545114,
+        168.59818307908336,
+        164.06742901634536,
+        7.6595131274639785,
+        182.08298206042065,
+        174.01829936632802,
+        168.2984620634042,
+        8.26924767633141,
+        127.74401148092224,
+        152.9540589411171,
+        149.70999922953388,
+        138.05370099020382,
+        163.98282164395957,
+        109.96934554907047,
+        8.996791001407733,
+        144.24233837720223,
+        98.41465480385448,
+        176.36506372732458,
+        8.593325518249502,
+        139.963285147378,
+        165.79681947035346,
+        175.45780184642518,
+        8.96329041536684,
+        125.14956588858662,
+        7.352306186940013,
+        177.70817869555052,
+        177.69694242040705,
+        165.7171562780864,
+        159.43363801309405,
+        181.71342376901586,
+        8.928314868453635,
+        147.12698777390037,
+        98.19159288935101,
+        8.060910754944894,
+        8.549652936388815,
+        141.01923634310606,
+        163.73524549575265,
+        160.94825111954881,
+        163.89847325356007,
+        158.8883048029801,
+        101.49136858702906,
+        175.66290061319754,
+        149.61494347618603,
+        167.11717553963226,
+        172.02372197225566,
+        187.6631035218658,
+        8.900520682145078,
+        171.61286914605415,
+        115.54311347996163,
+        169.3438620700623,
+        158.53427223473756,
+        7.62769842231285,
+        7.851152107489017,
+        166.35296262059944,
+        8.869163971328895,
+        174.27048762162556,
+        105.1060664928901,
+        176.44597164262635,
+        183.93578989094215,
+        155.56348841330345,
+        165.030948332619,
+        24.021020986288374,
+        158.54498277925777
     ]
 }
\ No newline at end of file

From 0ceb6988ae21a39e87d5295cc319d8ac967d0404 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Wed, 28 Jan 2026 18:05:47 +0100
Subject: [PATCH 253/334] ci: Disable broken test (#3121)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 tests/test_utils/recipes/mamba-static-inference.yaml | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/test_utils/recipes/mamba-static-inference.yaml b/tests/test_utils/recipes/mamba-static-inference.yaml
index 4cf35d99b70..0d5a83d98fe 100644
--- a/tests/test_utils/recipes/mamba-static-inference.yaml
+++ b/tests/test_utils/recipes/mamba-static-inference.yaml
@@ -59,8 +59,8 @@ products:
       - environment: [dev]
         scope: [mr-broken, mr-github-broken]
         platforms: [dgx_h100]
-  - test_case: [hybrid_static_inference_tp1_pp1_2B_cudagraphs]
-    products:
-      - environment: [dev]
-        scope: [mr]
-        platforms: [dgx_h100]
+  # - test_case: [hybrid_static_inference_tp1_pp1_2B_cudagraphs]
+  #   products:
+  #     - environment: [dev]
+  #       scope: [mr]
+  #       platforms: [dgx_h100] # Broken after dev2main sync 01/27

From f6f2abeaa15267e5725d5354a1d14ee9b2231b19 Mon Sep 17 00:00:00 2001
From: Li Tao <lit@nvidia.com>
Date: Thu, 29 Jan 2026 09:49:40 +0800
Subject: [PATCH 254/334] [Dev] Param offset in _ParamAndGradBucket should be
 aligned (#3010)

Signed-off-by: skydoorkai <htsantaclara@163.com>
Co-authored-by: skydoorkai <htsantaclara@163.com>
Co-authored-by: Kunlun Li <94586211+kunlunl@users.noreply.github.com>
---
 .../core/distributed/param_and_grad_buffer.py | 10 ++--
 .../distributed/test_param_and_grad_buffer.py | 53 +++++++++++++++++++
 2 files changed, 60 insertions(+), 3 deletions(-)

diff --git a/megatron/core/distributed/param_and_grad_buffer.py b/megatron/core/distributed/param_and_grad_buffer.py
index 50cf3e0ea37..db3948562f5 100644
--- a/megatron/core/distributed/param_and_grad_buffer.py
+++ b/megatron/core/distributed/param_and_grad_buffer.py
@@ -78,6 +78,8 @@ class _ParamAndGradBucket:
             communication. Its application is twofold: it facilitates the averaging of gradients
             and the scaling of gradients in the context of the Mixture of Experts (MoE) model.
         bucket_id: Index of bucket in buffer.
+        param_index_map: Mapping from param to (start, end, bucket_id) in the global buffer.
+            Used to derive bucket-local offsets for param_to_index.
     """
 
     def __init__(
@@ -89,6 +91,7 @@ def __init__(
         numel_unpadded: int,
         gradient_scaling_factor: float,
         bucket_id: int,
+        param_index_map: Dict[torch.nn.Parameter, tuple],
     ):
         self.params_list = params
         self.params = set(params)
@@ -102,11 +105,11 @@ def __init__(
         self.numel_unpadded = numel_unpadded
         self.gradient_scaling_factor = gradient_scaling_factor
         self.bucket_id = bucket_id
+        # Derive bucket-local param offsets from the global param_index_map.
         self.param_to_index = {}
-        offset = 0
         for param in params:
-            self.param_to_index[param] = (offset, offset + param.numel())
-            offset += param.numel()
+            global_start, global_end, _ = param_index_map[param]
+            self.param_to_index[param] = (global_start - offset, global_end - offset)
 
 
 class _ParamAndGradBucketGroup:
@@ -926,6 +929,7 @@ def _new_bucket(
             numel_unpadded=numel_unpadded,
             gradient_scaling_factor=self.gradient_scaling_factor,
             bucket_id=bucket_id,
+            param_index_map=self.param_index_map,
         )
         for bucket_param in bucket_params:
             assert bucket_param not in self.param_to_bucket
diff --git a/tests/unit_tests/distributed/test_param_and_grad_buffer.py b/tests/unit_tests/distributed/test_param_and_grad_buffer.py
index ac0c6a6c422..295ef0acc7e 100644
--- a/tests/unit_tests/distributed/test_param_and_grad_buffer.py
+++ b/tests/unit_tests/distributed/test_param_and_grad_buffer.py
@@ -162,6 +162,59 @@ def _pad_param_if_needed(numel_unpadded):
     Utils.destroy_model_parallel()
 
 
+def test_param_to_index_alignment_with_padding():
+    """Ensure bucket-local param offsets honor padding when DistOpt pads params."""
+    Utils.initialize_model_parallel()
+
+    # With input_dim=4, output_dim=4:
+    #   - weight: 4*4 = 16 elements
+    #   - bias: 4 elements
+    # Since 16 % 64 != 0, the bias must be padded away from the weight,
+    # making padding observable.
+    input_dim = 4
+    output_dim = 4
+    model, param_and_grad_buffer, _ = get_model_and_buffers(
+        input_dim=input_dim,
+        output_dim=output_dim,
+        num_layers=1,
+        bias=True,
+        shared_embedding=False,
+        bucket_size=None,  # single bucket
+        use_distributed_optimizer=True,  # enforces 64-element alignment
+        overlap_grad_reduce=True,
+        average_in_collective=False,
+    )
+
+    bucket = param_and_grad_buffer.buckets[0]
+    naive_offset = 0
+    padding_observed = False
+
+    for param in bucket.params_list:
+        global_start, global_end, _ = param_and_grad_buffer.param_index_map[param]
+        expected_local_start = global_start - bucket.offset
+        expected_local_end = global_end - bucket.offset
+        local_start, local_end = bucket.param_to_index[param]
+
+        # param_to_index should match the padded offsets used in the global buffer.
+        assert (local_start, local_end) == (expected_local_start, expected_local_end)
+
+        # At least one param should have been padded relative to naive packing.
+        if local_start != naive_offset:
+            padding_observed = True
+        naive_offset = local_end
+
+        # Verify the slice retrieved via param_to_index matches param.data view.
+        param_slice = bucket.param_data.view(-1)[local_start:local_end]
+        torch.testing.assert_close(param_slice, param.data.view(-1))
+
+    assert padding_observed, (
+        "Expected padding to be applied between params. "
+        "Ensure model dimensions are chosen such that param sizes are not multiples of 64."
+    )
+
+    Utils.destroy_model_parallel()
+
+
 @pytest.mark.parametrize("use_distributed_optimizer", [False, True])
 @pytest.mark.parametrize("overlap_grad_reduce", [False, True])
 @pytest.mark.parametrize("average_in_collective", [False, True])

From d587dd163a8e96733b49520ba02727b5ba55d42e Mon Sep 17 00:00:00 2001
From: Pingtian Li <158665726+Wohox@users.noreply.github.com>
Date: Thu, 29 Jan 2026 15:05:40 +0800
Subject: [PATCH 255/334] [Dev] fix cg missing wgrad hook (#2999)

---
 megatron/training/arguments.py | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py
index 54c7eeaa3fd..eaf2188a180 100644
--- a/megatron/training/arguments.py
+++ b/megatron/training/arguments.py
@@ -1334,6 +1334,25 @@ def validate_args(args, defaults={}):
             assert is_te_min_version("2.8.0"), (
                 "overlap_grad_reduce is only supported with TE >= 2.8.0 when enabling delay_wgrad_compute"
             )
+            wgrad_in_graph_scope = CudaGraphScope.attn in args.cuda_graph_scope or (
+                CudaGraphScope.moe_router in args.cuda_graph_scope
+                and args.moe_shared_expert_intermediate_size is not None
+                and not args.moe_shared_expert_overlap
+            )
+            if wgrad_in_graph_scope:
+                assert is_te_min_version(
+                    "2.12.0"
+                ), "CUDA graph with delay_wgrad_compute requires TE version >= 2.12.0."
+                assert args.gradient_accumulation_fusion, (
+                    'CUDA graph with delay_wgrad_compute requires gradient_accumulation_fusion '
+                    'to be enabled. This is because the default gradient accumulation does not '
+                    'use static memory addresses, which breaks CUDA graph requirements.'
+                )
+                if CudaGraphScope.attn in args.cuda_graph_scope:
+                    assert (
+                        not args.add_bias_linear and not args.add_qkv_bias
+                    ), "CUDA graph with delay_wgrad_compute doesn't support attn bias for now."
+
         if not args.gradient_accumulation_fusion:
             assert is_te_min_version("2.7.0"), (
                 "disabling gradient_accumulation_fusion is only supported with TE >= 2.7.0 "

From 8f8f7351ccccb24d4d1b92697cb307fb08830bca Mon Sep 17 00:00:00 2001
From: Jianbin Chang <shjwudp@gmail.com>
Date: Fri, 30 Jan 2026 00:51:46 +0800
Subject: [PATCH 256/334] [Megatron-FSDP] Add
 fsdp_all_gather_in_start_param_sync option in DDP Config (#2627)

Co-authored-by: Zijie Yan <zijiey@nvidia.com>
---
 .../core/distributed/distributed_data_parallel_config.py | 8 ++++++++
 .../megatron_fsdp/distributed_data_parallel_config.py    | 8 ++++++++
 .../distributed/fsdp/src/megatron_fsdp/megatron_fsdp.py  | 9 +++++----
 3 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/megatron/core/distributed/distributed_data_parallel_config.py b/megatron/core/distributed/distributed_data_parallel_config.py
index eaec971c79c..7e2d19e5ce9 100644
--- a/megatron/core/distributed/distributed_data_parallel_config.py
+++ b/megatron/core/distributed/distributed_data_parallel_config.py
@@ -124,6 +124,14 @@ class DistributedDataParallelConfig:
       This option will be automatically set to True when nccl_ub=True.
    """
 
+    fsdp_all_gather_in_start_param_sync: bool = True
+    """
+    If True, use all-gather during the initial Megatron-FSDP parameter
+    synchronization step. This can increase overlap between the first
+    parameter all-gather and computation, helping to better hide the
+    initial communication cost.
+    """
+
     outer_dp_sharding_strategy: str = 'no_shard'
     """
     Sharding strategy for outer data parallel group in Hybrid Sharded Data Parallel (HSDP) mode.
diff --git a/megatron/core/distributed/fsdp/src/megatron_fsdp/distributed_data_parallel_config.py b/megatron/core/distributed/fsdp/src/megatron_fsdp/distributed_data_parallel_config.py
index f0c817e1f80..32c0ffde2ad 100644
--- a/megatron/core/distributed/fsdp/src/megatron_fsdp/distributed_data_parallel_config.py
+++ b/megatron/core/distributed/fsdp/src/megatron_fsdp/distributed_data_parallel_config.py
@@ -119,6 +119,14 @@ class DistributedDataParallelConfig:
       This option will be automatically set to True when nccl_ub=True.
     """
 
+    fsdp_all_gather_in_start_param_sync: bool = True
+    """
+    If True, use all-gather during the initial Megatron-FSDP parameter
+    synchronization step. This can increase overlap between the first
+    parameter all-gather and computation, helping to better hide the
+    initial communication cost.
+    """
+
     outer_dp_sharding_strategy: str = 'no_shard'
     """
     Sharding strategy for outer data parallel group in Hybrid Sharded Data Parallel (HSDP) mode.
diff --git a/megatron/core/distributed/fsdp/src/megatron_fsdp/megatron_fsdp.py b/megatron/core/distributed/fsdp/src/megatron_fsdp/megatron_fsdp.py
index c1c11721f7e..c99141d4d44 100644
--- a/megatron/core/distributed/fsdp/src/megatron_fsdp/megatron_fsdp.py
+++ b/megatron/core/distributed/fsdp/src/megatron_fsdp/megatron_fsdp.py
@@ -1041,10 +1041,11 @@ def start_param_sync(self, *unused, force_sync: bool = False, force_dispatch: bo
 
         if not force_sync and self.ddp_config.overlap_param_gather:
             # All-gather the first bucket before the forward pass.
-            first_param = list(self.module.parameters())[0]
-            self.all_gather_and_wait_parameters_ready(
-                params=[first_param], prefetch=True, wait_bucket_ready=False
-            )
+            if self.ddp_config.fsdp_all_gather_in_start_param_sync:
+                first_param = list(self.module.parameters())[0]
+                self.all_gather_and_wait_parameters_ready(
+                    params=[first_param], prefetch=True, wait_bucket_ready=False
+                )
         else:
             self.synchronize_param_gather()
             for bucket_id in range(self.all_gather_pipeline.num_buckets):

From bde9e32f9f822ab8e2f887e56324519c3df09919 Mon Sep 17 00:00:00 2001
From: Li Jinliang <jinliangl@nvidia.com>
Date: Fri, 30 Jan 2026 00:53:27 +0800
Subject: [PATCH 257/334] [Dev] Support EP with HSDP (#2800)

Signed-off-by: jinliangl <jinliangl@nvidia.com>
Co-authored-by: Jinliang Li <jinliangl@pool0-01676.cm.cluster>
Co-authored-by: Jinliang Li <jinliangl@cw-dfw-cs-001-vscode-01.cm.cluster>
Co-authored-by: Jianbin Chang <shjwudp@gmail.com>
---
 .../distributed/fsdp/mcore_fsdp_adapter.py    | 54 ++++++++++++++++---
 megatron/core/distributed/fsdp/src/README.md  | 14 +++--
 .../fsdp/src/megatron_fsdp/fully_shard.py     |  5 ++
 .../megatron_fsdp/param_and_grad_buffer.py    |  4 +-
 .../fsdp/src/megatron_fsdp/utils.py           | 42 ++++++++++++---
 5 files changed, 99 insertions(+), 20 deletions(-)

diff --git a/megatron/core/distributed/fsdp/mcore_fsdp_adapter.py b/megatron/core/distributed/fsdp/mcore_fsdp_adapter.py
index d6384e70488..5bf543fdc5c 100644
--- a/megatron/core/distributed/fsdp/mcore_fsdp_adapter.py
+++ b/megatron/core/distributed/fsdp/mcore_fsdp_adapter.py
@@ -212,6 +212,13 @@ def _init_dist_index(self, pg_collection):
                 hybrid_fsdp_group = parallel_state.get_data_parallel_group(
                     with_context_parallel=True, partial_data_parallel=False
                 )
+                expt_dp_group = parallel_state.get_expert_data_parallel_group(
+                    partial_expert_data_parallel=True
+                )
+                hybrid_fsdp_expt_group = parallel_state.get_expert_data_parallel_group(
+                    partial_expert_data_parallel=False
+                )
+                ep_group = parallel_state.get_expert_model_parallel_group()
             else:
                 dp_cp_group = parallel_state.get_data_parallel_group(
                     with_context_parallel=True, partial_data_parallel=False
@@ -227,6 +234,10 @@ def _init_dist_index(self, pg_collection):
                 dp_cp_group = pg_collection.intra_dp_cp
                 outer_fsdp_group = pg_collection.inter_dist_opt
                 hybrid_fsdp_group = pg_collection.dp_cp
+                # This has not been tested yet.
+                expt_dp_group = getattr(pg_collection, 'intra_expt_dp', None)
+                hybrid_fsdp_expt_group = getattr(pg_collection, 'expt_dp', None)
+                ep_group = getattr(pg_collection, 'ep', None)
             else:
                 dp_cp_group = pg_collection.dp_cp
                 outer_fsdp_group = None
@@ -243,6 +254,18 @@ def _init_dist_index(self, pg_collection):
             expt_tp_group = single_rank_group
 
         if enable_hsdp:
+            if expt_dp_group is not None:
+                expt_mesh = _get_hsdp_tp_mesh(
+                    outer_fsdp_group, expt_dp_group, expt_tp_group, ep_size=ep_group.size()
+                )
+                expt_device_mesh = DeviceMesh.from_group(
+                    [outer_fsdp_group, expt_dp_group, expt_tp_group],
+                    device_type="cuda",
+                    mesh=expt_mesh.tolist(),
+                    mesh_dim_names=["outer_fsdp_dp", "dp_cp", "tp"],
+                )
+            else:
+                expt_device_mesh = None
             mesh = _get_hsdp_tp_mesh(outer_fsdp_group, dp_cp_group, tp_group)
             dist_index = FSDPDistributedIndex(
                 hsdp_outer_dp_shard=self.ddp_config.outer_dp_sharding_strategy != "no_shard",
@@ -256,6 +279,8 @@ def _init_dist_index(self, pg_collection):
                 dp_shard_dim="dp_cp",
                 tp_dim="tp",
                 hybrid_fsdp_group=hybrid_fsdp_group,
+                hybrid_fsdp_expt_group=hybrid_fsdp_expt_group,
+                expt_device_mesh=expt_device_mesh,
             )
         else:
             if ep_group is not None:
@@ -308,22 +333,24 @@ def sync_rng_states_across_tp_group(self):
         _load_rng_state_dict(broadcast_list[0])
 
 
-def _get_hsdp_tp_mesh(outer_fsdp_dp_group, dp_cp_group, tp_group):
+def _get_hsdp_tp_mesh(outer_fsdp_dp_group, dp_cp_group, tp_group, ep_size=1):
     assert HAVE_EINOPS, "einops is not installed. Please install it with `pip install einops`."
     world_size = dist.get_world_size()
 
     mesh = einops.rearrange(
         torch.arange(world_size),
-        "(outer_fsdp_dp fsdp tp) -> outer_fsdp_dp fsdp tp",
+        "(outer_fsdp_dp fsdp ep tp) -> ep outer_fsdp_dp fsdp tp",
         outer_fsdp_dp=outer_fsdp_dp_group.size(),
         tp=tp_group.size(),
+        ep=ep_size,
     )
 
     mesh_fsdp_ranks = einops.rearrange(
         mesh,
-        'outer_fsdp_dp fsdp tp -> (outer_fsdp_dp tp) fsdp',
+        'ep outer_fsdp_dp fsdp tp -> (outer_fsdp_dp ep tp) fsdp',
         tp=tp_group.size(),
         fsdp=dp_cp_group.size(),
+        ep=ep_size,
     )
     fsdp_group_ranks = dist.get_process_group_ranks(dp_cp_group)
     assert _check_mesh_ranks_and_group_ranks_are_consistent(mesh_fsdp_ranks, fsdp_group_ranks), (
@@ -333,7 +360,7 @@ def _get_hsdp_tp_mesh(outer_fsdp_dp_group, dp_cp_group, tp_group):
 
     mesh_tp_ranks = einops.rearrange(
         mesh,
-        'outer_fsdp_dp fsdp tp -> (outer_fsdp_dp fsdp) tp',
+        'ep outer_fsdp_dp fsdp tp -> (outer_fsdp_dp fsdp ep) tp',
         tp=tp_group.size(),
         fsdp=dp_cp_group.size(),
     )
@@ -345,9 +372,10 @@ def _get_hsdp_tp_mesh(outer_fsdp_dp_group, dp_cp_group, tp_group):
 
     mesh_outer_fsdp_dp_ranks = einops.rearrange(
         mesh,
-        'outer_fsdp_dp fsdp tp -> (fsdp tp) outer_fsdp_dp',
+        'ep outer_fsdp_dp fsdp tp -> (fsdp ep tp) outer_fsdp_dp',
         tp=tp_group.size(),
         fsdp=dp_cp_group.size(),
+        ep=ep_size,
     )
     outer_fsdp_dp_group_ranks = dist.get_process_group_ranks(outer_fsdp_dp_group)
     assert _check_mesh_ranks_and_group_ranks_are_consistent(
@@ -357,7 +385,21 @@ def _get_hsdp_tp_mesh(outer_fsdp_dp_group, dp_cp_group, tp_group):
         f"do not match the ranks in the Outer FSDP DP group {outer_fsdp_dp_group_ranks}."
     )
 
-    return mesh
+    # Exclude the expert parallel dimension
+    rank = dist.get_rank()
+    dp_tp_meshes = [per_ep_mesh for per_ep_mesh in mesh if rank in per_ep_mesh.reshape(-1).tolist()]
+    assert (
+        len(dp_tp_meshes) == 1
+    ), f"[Megatron-FSDP] Current rank {rank} is not unique in the mesh ranks {mesh.tolist()}."
+    assert (
+        len(dp_tp_meshes[0].reshape(-1).tolist())
+        == outer_fsdp_dp_group.size() * dp_cp_group.size() * tp_group.size()
+    ), (
+        f"[Megatron-FSDP] DP-TP mesh size {len(dp_tp_meshes[0].reshape(-1).tolist())} "
+        f"does not match the expected size"
+        f"{outer_fsdp_dp_group.size() * dp_cp_group.size() * tp_group.size()}."
+    )
+    return dp_tp_meshes[0]
 
 
 def _get_dp_tp_mesh(dp_cp_group, tp_group, ep_size=1):
diff --git a/megatron/core/distributed/fsdp/src/README.md b/megatron/core/distributed/fsdp/src/README.md
index bc4cdaa078e..75cb7c45613 100644
--- a/megatron/core/distributed/fsdp/src/README.md
+++ b/megatron/core/distributed/fsdp/src/README.md
@@ -156,12 +156,13 @@ device_mesh[("dp_outer", "dp_shard", "cp")]._flatten("hsdp")
 hsdp_group = device_mesh["hsdp"].get_group()
 
 # Initialize DeviceMesh for expert parallel (EP) modules when using FSDP + EP.
-expt_device_mesh = DeviceMesh.from_group(
-    [expt_dp_group, expt_tp_group],
-    device_type="cuda",
-    mesh=expt_mesh.tolist(),
-    mesh_dim_names=["dp_shard_cp", "tp"],
+expert_device_mesh = torch.distributed.device_mesh.init_device_mesh(
+    "cuda",
+    mesh_shape=(dp_outer_size, expt_dp_shard_size, expt_tp_size),
+    mesh_dim_names=("dp_outer", "dp_shard_cp", "tp"),
 )
+expert_device_mesh[("dp_outer", "dp_shard_cp")].flatten("hsdp")
+hsdp_expt_group = expert_device_mesh["hsdp"].get_group()
 ```
 
 ### Convert models into fully-sharded `MegatronFSDP` models with `fully_shard_model`.
@@ -186,6 +187,8 @@ model = fully_shard_model(
     tp_dim="tp",
     # Only required when using HSDP. Otherwise, set this to None.
     hybrid_fsdp_group=hsdp_group,
+    # Only required when using HSDP + EP. Otherwise, set this to None.
+    hybrid_fsdp_expt_group=hsdp_expt_group,
     # Only required for FSDP + EP. Otherwise, set this to None.
     expt_device_mesh=expt_device_mesh,
     # FSDP Sharding Strategy: no_shard (0) / optim (1) / optim_grads (2) / optim_grads_params (3)
@@ -295,6 +298,7 @@ Megatron-FSDP's `fully_shard_*` API has a comprehensive set of arguments for fin
   - `tp_dim` is the name of the sub-mesh used for tensor parallelism (TP), which is required for `(FSDP, TP)`-strided sharding when using Megatron-LM or Torch-native `DTensor` TP.
     - For more information about tensor parallelism, refer to: [Megatron-LM: Training Multi-Billion Parameter Language Models Using Model Parallelism](https://arxiv.org/abs/1909.08053).
   - `hybrid_fsdp_group` is the `ProcessGroup` which contains all ranks in the flattened `dp_shard_dim` and `dp_outer_dim` sub-meshes utilized to specify the `(DP-Outer, DP-Shard)` sharded mesh coordinates for the weight and gradient buffers. Required for HSDP.
+  - `hybrid_fsdp_expt_group` defines the data-parallel communication group for expert parameters. It is required for HSDP.
 - `expt_device_mesh` is another [`torch.distributed.DeviceMesh`](https://docs.pytorch.org/docs/stable/distributed.html#devicemesh) tailored for the expert parallel (EP) modules in `MegatronFSDP`.
   - `dp_shard_dim` is the name of the sub-mesh required for FSDP sharding of the EP modules, enabling expert data parallelism (EDP).
   - `tp_dim` is the name of the sub-mesh used for expert tensor parallelism (ETP), which is required for `(FSDP, ETP)`-strided sharding when using Megatron-LM or Torch-native `DTensor` ETP.
diff --git a/megatron/core/distributed/fsdp/src/megatron_fsdp/fully_shard.py b/megatron/core/distributed/fsdp/src/megatron_fsdp/fully_shard.py
index df210f15f05..7a118a8424b 100644
--- a/megatron/core/distributed/fsdp/src/megatron_fsdp/fully_shard.py
+++ b/megatron/core/distributed/fsdp/src/megatron_fsdp/fully_shard.py
@@ -77,6 +77,7 @@ def fully_shard_model(
     dp_outer_dim: Optional[str] = None,
     tp_dim: Optional[str] = None,
     hybrid_fsdp_group: Optional[torch.distributed.ProcessGroup] = None,
+    hybrid_fsdp_expt_group: Optional[torch.distributed.ProcessGroup] = None,
     expt_device_mesh: Optional[DeviceMesh] = None,
     fsdp_unit_modules: Optional[Sequence[Type[torch.nn.Module]] | Sequence[str]] = None,
     zero_dp_strategy: str | int = 3,
@@ -352,6 +353,8 @@ class that schedules the sharding lifecycle of the model parameters and gradient
         tp_dim=tp_dim,
         # Only required for HSDP.
         hybrid_fsdp_group=hybrid_fsdp_group,
+        # Only required for HSDP + EP.
+        hybrid_fsdp_expt_group=hybrid_fsdp_expt_group,
         # Access to flattened DP rank assignments for HSDP.
         hsdp_outer_dp_shard=_outer_fsdp_sharding,
         # Only required for Megatron-FSDP + EP.
@@ -521,6 +524,7 @@ def fully_shard(
     dp_outer_dim: Optional[str] = None,
     tp_dim: Optional[str] = None,
     hybrid_fsdp_group: Optional[torch.distributed.ProcessGroup] = None,
+    hybrid_fsdp_expt_group: Optional[torch.distributed.ProcessGroup] = None,
     expt_device_mesh: Optional[DeviceMesh] = None,
     fsdp_unit_modules: Optional[Sequence[Type[torch.nn.Module]] | Sequence[str]] = None,
     zero_dp_strategy: str | int = 3,
@@ -568,6 +572,7 @@ def fully_shard(
         dp_outer_dim=dp_outer_dim,
         tp_dim=tp_dim,
         hybrid_fsdp_group=hybrid_fsdp_group,
+        hybrid_fsdp_expt_group=hybrid_fsdp_expt_group,
         expt_device_mesh=expt_device_mesh,
         fsdp_unit_modules=fsdp_unit_modules,
         zero_dp_strategy=zero_dp_strategy,
diff --git a/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py b/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py
index 0865ff8e647..b1112f4b375 100644
--- a/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py
+++ b/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py
@@ -1882,7 +1882,9 @@ def _init_each_parameter_group_buffers(self, meta_device_init_fp8_params):
                 hsdp_buf_dp_group = self.dist_index.get_fsdp_group(
                     is_expert_parallel=group.is_expert_param
                 )
-                main_buf_extra_kwargs["dp_rank"] = self.dist_index.get_logical_hybrid_fsdp_rank()
+                main_buf_extra_kwargs["dp_rank"] = self.dist_index.get_logical_hybrid_fsdp_rank(
+                    is_expert_parallel=group.is_expert_param
+                )
             else:
                 main_buf_dp_group = self.dist_index.get_fsdp_group(
                     is_expert_parallel=group.is_expert_param
diff --git a/megatron/core/distributed/fsdp/src/megatron_fsdp/utils.py b/megatron/core/distributed/fsdp/src/megatron_fsdp/utils.py
index 01523929ae1..c1c8a0b0c7a 100644
--- a/megatron/core/distributed/fsdp/src/megatron_fsdp/utils.py
+++ b/megatron/core/distributed/fsdp/src/megatron_fsdp/utils.py
@@ -443,6 +443,7 @@ def __init__(
         dp_outer_dim: Optional[str] = None,
         tp_dim: Optional[str] = None,
         hybrid_fsdp_group: Optional[torch.distributed.ProcessGroup] = None,
+        hybrid_fsdp_expt_group: Optional[torch.distributed.ProcessGroup] = None,
         hsdp_outer_dp_shard: bool = False,
         expt_device_mesh: Optional[DeviceMesh] = None,
     ):
@@ -457,6 +458,9 @@ def __init__(
             hybrid_fsdp_group (Optional[torch.distributed.ProcessGroup]): The
                 process group for hybrid FSDP communication, which is the flattened
                 combination of the dp_outer and dp_shard process groups.
+            hybrid_fsdp_expt_group (Optional[torch.distributed.ProcessGroup]): The
+                process group for hybrid FSDP expert communication, which is the flattened
+                combination of the expert dp_outer and expert dp_shard process groups.
             hsdp_outer_dp_shard (bool): Whether to have outer DP group sharding
                 in hybrid FSDP. Specifying outer sharding will lift the bucket sharding
                 coordinate system to flattened ranks of (dp_shard, dp_outer) instead of
@@ -495,6 +499,7 @@ def __init__(
         # Save a reference to the overall HSDP process group, which is the flattened
         # combination of the outer-FSDP and FSDP process groups.
         self.hybrid_fsdp_group = hybrid_fsdp_group
+        self.hybrid_fsdp_expt_group = hybrid_fsdp_expt_group
 
         # Retrieve the expert parallel process groups from the DeviceMesh.
         self.expt_fsdp_group = (
@@ -504,6 +509,13 @@ def __init__(
             else None
         )
 
+        self.expt_outer_fsdp_group = (
+            self.expt_device_mesh[self.dp_outer_dim].get_group()
+            if self.expt_device_mesh is not None
+            and contains_submesh(self.expt_device_mesh, self.dp_outer_dim)
+            else None
+        )
+
         """
         Megatron-FSDP is responsible for storing all required DeviceMesh
         as per best practices recommended by the DeviceMesh API.
@@ -544,6 +556,8 @@ def register_submesh(device_mesh, submesh, is_expert_parallel):
             register_submesh(self.expt_device_mesh, tp_submesh, True)
             register_submesh(self.expt_device_mesh, fsdp_tp_submesh, True)
             register_submesh(self.expt_device_mesh, fsdp_submesh, True)
+            register_submesh(self.expt_device_mesh, hsdp_submesh, True)
+            register_submesh(self.expt_device_mesh, hsdp_tp_submesh, True)
 
         # Validate FSDP arguments.
         if self.fsdp_group is None:
@@ -615,6 +629,8 @@ def get_submesh(
     def get_dp_group(self, is_expert_parallel: bool = False) -> ProcessGroup:
         """Get the data parallel process group."""
         if is_expert_parallel:
+            if self.use_hybrid_fsdp:
+                return self.hybrid_fsdp_expt_group
             return self.expt_fsdp_group
         if self.use_hybrid_fsdp:
             return self.hybrid_fsdp_group
@@ -626,10 +642,12 @@ def get_fsdp_group(self, is_expert_parallel: bool = False) -> ProcessGroup:
             return self.expt_fsdp_group
         return self.fsdp_group
 
-    def get_outer_fsdp_group(self) -> ProcessGroup:
+    def get_outer_fsdp_group(self, is_expert_parallel: bool = False) -> ProcessGroup:
         """Get the outer-FSDP process group."""
         if not self.use_hybrid_fsdp:
             return None
+        if is_expert_parallel:
+            return self.expt_outer_fsdp_group
         return self.outer_fsdp_group
 
     def get_root_mesh(self, is_expert_parallel: bool = False) -> DeviceMesh:
@@ -641,7 +659,7 @@ def get_root_mesh(self, is_expert_parallel: bool = False) -> DeviceMesh:
             return self.expt_device_mesh
         return self.device_mesh
 
-    def get_logical_hybrid_fsdp_rank(self):
+    def get_logical_hybrid_fsdp_rank(self, is_expert_parallel: bool = False):
         """
         Returns the logical rank of the current process within the full-shard hybrid FSDP group.
 
@@ -661,20 +679,28 @@ def get_logical_hybrid_fsdp_rank(self):
             self.hsdp_outer_dp_shard
         ), "get_logical_hybrid_fsdp_rank is only valid when full-shard hybrid FSDP is enabled."
 
-        if not hasattr(self, "_hybrid_fsdp_group_ranks"):
-            dp_world_size = self.get_dp_group().size()
+        _hybrid_fsdp_group_name = (
+            "_hybrid_fsdp_group_ranks"
+            if not is_expert_parallel
+            else "_hybrid_fsdp_expt_group_ranks"
+        )
+
+        if not hasattr(self, _hybrid_fsdp_group_name):
+            dp_world_size = self.get_dp_group(is_expert_parallel).size()
 
             # Reorder the flat ranks: (outer_dp, inner_dp) -> (inner_dp, outer_dp)
             mesh = einops.rearrange(
                 torch.arange(dp_world_size),
                 "(outer_dp inner_dp) -> (inner_dp outer_dp)",
-                outer_dp=self.outer_fsdp_group.size(),
-                inner_dp=self.fsdp_group.size(),
+                outer_dp=self.get_outer_fsdp_group(is_expert_parallel).size(),
+                inner_dp=self.get_fsdp_group(is_expert_parallel).size(),
             )
-            self._hybrid_fsdp_group_ranks = mesh.tolist()
+            setattr(self, _hybrid_fsdp_group_name, mesh.tolist())
 
         # Find the index for the current rank in the hybrid group
-        return self._hybrid_fsdp_group_ranks.index(self.hybrid_fsdp_group.rank())
+        return getattr(self, _hybrid_fsdp_group_name).index(
+            self.get_dp_group(is_expert_parallel).rank()
+        )
 
 
 class GlobalMemoryBuffer:

From 27fcfb243db32f015674b08fe98b1e6df7f9ea67 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Thu, 29 Jan 2026 22:12:47 +0100
Subject: [PATCH 258/334] Cherrypick CI improvements to dev branch (#3118)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 .github/actions/action.yml                                | 5 ++++-
 .../test_utils/python_scripts/launch_nemo_run_workload.py | 8 +++++++-
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/.github/actions/action.yml b/.github/actions/action.yml
index dfc6d79688e..6e9c72016f6 100644
--- a/.github/actions/action.yml
+++ b/.github/actions/action.yml
@@ -149,14 +149,17 @@ runs:
           ARGS=(
             --scope mr-github
             --enable-lightweight-mode
+            --n-repeat 1
           )
         elif [ "${{ steps.has-run-functional-tests-label.outputs.main }}" == "true" ]; then
           ARGS=(
             --scope mr-github
+            --n-repeat 5
           )
         else
           ARGS=(
             --scope mr-github-slim
+            --n-repeat 5
           )
         fi
 
@@ -258,5 +261,5 @@ runs:
       if: always()
       with:
         name: ${{ steps.check.outputs.logs_report }}
-        path: ${{ inputs.is_unit_test == 'true' && 'logs' || 'assets_dir' }}
+        path: ${{ inputs.is_unit_test == 'true' && 'assets_dir/logs' || 'assets_dir' }}
         include-hidden-files: true
diff --git a/tests/test_utils/python_scripts/launch_nemo_run_workload.py b/tests/test_utils/python_scripts/launch_nemo_run_workload.py
index 26a7dbd79f5..8d006f70d19 100644
--- a/tests/test_utils/python_scripts/launch_nemo_run_workload.py
+++ b/tests/test_utils/python_scripts/launch_nemo_run_workload.py
@@ -50,6 +50,9 @@ def is_flaky_failure(concat_allranks_logs: str) -> bool:
 @click.option("--environment", required=True, type=str, help="Environment of the workload")
 @click.option("--platform", required=True, type=str, help="Platform of the workload")
 @click.option("--container-image", required=True, type=str, help="Container image of the workload")
+@click.option(
+    "--n-repeat", required=False, type=int, help="Number of times to repeat the workload", default=1
+)
 @click.option("--data-dir", required=False, type=str, help="Data directory of the workload")
 @click.option("--tag", required=False, type=str, help="Tag of the workload")
 @click.option(
@@ -68,6 +71,7 @@ def main(
     environment,
     platform,
     container_image,
+    n_repeat: int = 1,
     data_dir: Optional[str] = None,
     tag: Optional[str] = None,
     enable_lightweight_mode: Optional[bool] = False,
@@ -92,6 +96,7 @@ def main(
     magic_values["assets_dir"] = "/opt/megatron-lm/assets_dir"
     magic_values["artifacts_dir"] = "/opt/megatron-lm/artifacts_dir"
     magic_values["environment"] = environment
+    magic_values["n_repeat"] = n_repeat
     magic_values["test_case"] = workload.spec["test_case"]
     magic_values["name"] = workload.spec["name"].format(**magic_values)
     workload.spec["script"] = workload.spec["script"].format(**magic_values)
@@ -113,9 +118,10 @@ def main(
             "PYTHONUNBUFFERED": "1",
             "OUTPUT_PATH": os.getcwd(),
             "ENABLE_LIGHTWEIGHT_MODE": str(enable_lightweight_mode).lower(),
-            "N_REPEAT": "1",
+            "N_REPEAT": str(n_repeat),
             "CLUSTER": "dgxh100_dgxc",
             "NCCL_DEBUG": "INFO",
+            "NCCL_DEBUG_FILE": "/opt/megatron-lm/assets_dir/logs/nccl_debug.log",
         },
         packager=run.Packager(),
         volumes=artifacts,

From 55e3a0a41774b2575e5de65b0c7c15483442b500 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Fri, 30 Jan 2026 16:09:36 +0100
Subject: [PATCH 259/334] [dev] ci: Add DSv3 proxy (#3144)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 .../bert/bert_release/model_config.yaml       |   1 +
 .../gpt/gpt3_15b_8t_release/model_config.yaml |   1 +
 .../gpt3_15b_8t_release_sm/model_config.yaml  |   1 +
 .../model_config.yml                          | 169 ++++++++++++++++++
 .../model_config.yml                          |  11 +-
 .../model_config.yaml                         | 168 +++++++++++++++++
 .../model_config.yaml                         |   1 +
 .../model_config.yaml                         |   1 +
 .../model_config.yaml                         |   1 +
 .../model_config.yaml                         |   1 +
 .../model_config.yaml                         |   1 +
 .../model_config.yaml                         |   1 +
 .../t5/t5_release/model_config.yaml           |   1 +
 13 files changed, 353 insertions(+), 5 deletions(-)
 create mode 100644 tests/functional_tests/test_cases/mixtral/deepseekv3_proxy_flex_tp1pp4emp16etc1cp1_gb_200_release_sm/model_config.yml
 create mode 100644 tests/functional_tests/test_cases/mixtral/deepseekv3_proxy_flex_tp1pp4emp16etp1cp1_gb_200_release/model_config.yaml

diff --git a/tests/functional_tests/test_cases/bert/bert_release/model_config.yaml b/tests/functional_tests/test_cases/bert/bert_release/model_config.yaml
index 278ad6c17a8..546926fc66c 100644
--- a/tests/functional_tests/test_cases/bert/bert_release/model_config.yaml
+++ b/tests/functional_tests/test_cases/bert/bert_release/model_config.yaml
@@ -45,6 +45,7 @@ MODEL_ARGS:
   --log-params-norm: true
   --log-validation-ppl-to-tensorboard: true
   --wandb-project: megatron-core-release-runs
+  --wandb-entity: adlr
   --wandb-exp-name: ${WANDB_EXPERIMENT}
   --attention-backend: unfused
   --exit-interval: 20000
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_15b_8t_release/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_15b_8t_release/model_config.yaml
index 44f9de33775..692e3882e02 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_15b_8t_release/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_15b_8t_release/model_config.yaml
@@ -84,6 +84,7 @@ MODEL_ARGS:
   --log-interval: 100
   --tensorboard-dir: ${TENSORBOARD_PATH}
   --wandb-project: megatron-core-release-runs
+  --wandb-entity: adlr
   --wandb-exp-name: ${WANDB_EXPERIMENT}
   # Add mixed precision args
   --bf16: true
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_15b_8t_release_sm/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_15b_8t_release_sm/model_config.yaml
index 32386558710..b7fb9d7d661 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_15b_8t_release_sm/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_15b_8t_release_sm/model_config.yaml
@@ -84,6 +84,7 @@ MODEL_ARGS:
   --log-interval: 100
   --tensorboard-dir: ${TENSORBOARD_PATH}
   --wandb-project: megatron-core-release-runs
+  --wandb-entity: adlr
   --wandb-exp-name: ${WANDB_EXPERIMENT}
   # Add mixed precision args
   --bf16: true
diff --git a/tests/functional_tests/test_cases/mixtral/deepseekv3_proxy_flex_tp1pp4emp16etc1cp1_gb_200_release_sm/model_config.yml b/tests/functional_tests/test_cases/mixtral/deepseekv3_proxy_flex_tp1pp4emp16etc1cp1_gb_200_release_sm/model_config.yml
new file mode 100644
index 00000000000..1ad8597d932
--- /dev/null
+++ b/tests/functional_tests/test_cases/mixtral/deepseekv3_proxy_flex_tp1pp4emp16etc1cp1_gb_200_release_sm/model_config.yml
@@ -0,0 +1,169 @@
+# The proxy model is used for local code quality check.
+# The proxy model should contain all the necessary components and settings but fewer parameters.
+ENV_VARS:
+  TORCH_NCCL_AVOID_RECORD_STREAMS: 0
+  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 1
+  PYTORCH_CUDA_ALLOC_CONF: expandable_segments:True
+  NCCL_NVLS_ENABLE: 0
+  NVTE_FUSED_ATTN: 1
+  NVTE_NORM_FWD_USE_CUDNN: 1
+  NVTE_NORM_BWD_USE_CUDNN: 1
+  PYTHONWARNINGS: ignore
+  NCCL_DEBUG: VERSION
+  NON_DETERMINSTIC_RESULTS: 1
+  NVSHMEM_IB_ENABLE_IBGDA: 0
+  CUDA_DEVICE_MAX_CONNECTIONS: 1
+  NUM_OF_HYBRID_EP_RANKS_PER_NVLINK_DOMAIN: 16
+  USE_MNNVL: 1
+TEST_TYPE: "release"
+MODEL_ARGS:
+  # Distributed args
+  --distributed-timeout-minutes: 60
+  --tensor-model-parallel-size: 2
+  --pipeline-model-parallel-size: 4
+  --pipeline-model-parallel-layout: Et*2\\|\\(tt\\|\\)*5t\\|tmL # Et*2|(tt|)*5t|tmL
+  --expert-model-parallel-size: 16
+  --context-parallel-size: 1
+  --expert-tensor-parallel-size: 1
+  --use-distributed-optimizer: true
+  --overlap-grad-reduce: true
+  --overlap-param-gather: true
+
+  # Training args
+  --use-mcore-models: true
+  --sequence-parallel: true
+  --use-flash-attn: true
+  --disable-bias-linear: true
+  --micro-batch-size: 1
+  --global-batch-size: 512
+  --train-samples: 24414062
+  --exit-duration-in-mins: 220
+  --no-check-for-nan-in-loss-and-grad: true
+  --cross-entropy-loss-fusion: true
+  --cross-entropy-fusion-impl: te
+  --manual-gc: true
+  --manual-gc-interval: 10
+
+  # Transformer Engine args
+  --transformer-impl: transformer_engine
+
+  # Data args
+  --seq-length: 4096
+  --data-cache-path: ${DATA_CACHE_PATH}
+  --tokenizer-type: GPTSentencePieceTokenizer
+  --tokenizer-model: ${DATA_PATH}/utils/nemotron_2_256k.model
+  --data-path: $DATA_BLEND
+  --split: 99,1,0
+  --no-mmap-bin-files: true
+  --no-create-attention-mask-in-dataloader: true
+  --num-workers: 6
+
+  # Add network size args
+  --num-layers: 14 # original 61 layers
+  --hidden-size: 7168
+  --ffn-hidden-size: 18432
+  --num-attention-heads: 128
+  --kv-channels: 128
+  --max-position-embeddings: 4096
+  --position-embedding-type: rope
+  --rotary-base: 10000
+  --make-vocab-size-divisible-by: 3232
+  --normalization: RMSNorm
+  --norm-epsilon: 1e-6
+  --swiglu: true
+  --untie-embeddings-and-output-weights: true
+  --multi-latent-attention: true
+  --mtp-num-layers: 1
+  --mtp-loss-scaling-factor: 0.1
+
+  # Add regularization args
+  --attention-dropout: 0.0
+  --hidden-dropout: 0.0
+  --clip-grad: 1.0
+  --weight-decay: 0.1
+  --qk-layernorm: true
+
+  # Add learning rate args
+  --lr-decay-samples: 24413696
+  --lr-warmup-samples: 1536000
+  --lr-warmup-init: 1e-7
+  --lr: 1e-5
+  --min-lr: 1e-6
+  --lr-decay-style: cosine
+  --adam-beta1: 0.9
+  --adam-beta2: 0.95
+
+  # Add MoE args
+  --num-experts: 64 # local 4 + 1 shared, EP16
+  --moe-layer-freq: ([0]*3+[1]*11)
+  --moe-ffn-hidden-size: 2048
+  --moe-shared-expert-intermediate-size: 2048
+  --moe-router-load-balancing-type: seq_aux_loss
+  --moe-router-topk: 8
+  --moe-token-dispatcher-type: flex
+  --moe-flex-dispatcher-backend: hybridep
+  --moe-router-pre-softmax: true
+  --moe-grouped-gemm: true
+  --moe-aux-loss-coeff: 1e-4
+  --moe-router-group-topk: 4
+  --moe-router-num-groups: 8
+  --moe-router-topk-scaling-factor: 2.5
+  --moe-router-score-function: sigmoid
+  --moe-router-enable-expert-bias: true
+  --moe-router-bias-update-rate: 1e-3
+  --moe-router-dtype: fp32
+  --moe-permute-fusion: true
+
+  # Add MLA args
+  --q-lora-rank: 1536
+  --kv-lora-rank: 512
+  --qk-head-dim: 128
+  --qk-pos-emb-head-dim: 64
+  --v-head-dim: 128
+  --rotary-scaling-factor: 40
+  --mscale: 1.0
+  --mscale-all-dim: 1.0
+
+  # Add validation args
+  --eval-iters: 32
+  --eval-interval: 200
+
+  # Add checkpointing args
+  --auto-detect-ckpt-format:
+    true
+    # Add checkpointing args
+  --save: ${CHECKPOINT_SAVE_PATH}
+  --load: ${CHECKPOINT_LOAD_PATH}
+  --save-interval: 500
+  --save-retain-interval: 10000
+  --dist-ckpt-strictness: log_all
+
+  # Add initialization args
+  --init-method-std: 0.02
+
+  # Add logging args
+  --log-timers-to-tensorboard: true
+  --log-memory-to-tensorboard: true
+  --log-num-zeros-in-grad: true
+  --log-params-norm: true
+  --log-validation-ppl-to-tensorboard: true
+  --log-throughput: true
+  --log-interval: 1
+  --logging-level: 40
+  --tensorboard-dir: ${TENSORBOARD_PATH}
+  --wandb-project: megatron-core-release-runs
+  --wandb-entity: adlr
+  --wandb-exp-name: ${WANDB_EXPERIMENT}
+  --wandb-save-dir: ${WANDB_SAVE_PATH}
+
+  # Add mixed precision args
+  --bf16: true
+
+  # enable experimental
+  --enable-experimental: true
+  --exit-interval: 9536
+METRICS:
+  - "iteration-time"
+  - "lm loss"
+  - "mem-allocated-bytes"
+  - "mem-max-allocated-bytes"
diff --git a/tests/functional_tests/test_cases/mixtral/deepseekv3_proxy_flex_tp1pp4emp16etc1cp1_release_sm/model_config.yml b/tests/functional_tests/test_cases/mixtral/deepseekv3_proxy_flex_tp1pp4emp16etc1cp1_release_sm/model_config.yml
index 9c7d2496e2a..cc8f2b814c2 100644
--- a/tests/functional_tests/test_cases/mixtral/deepseekv3_proxy_flex_tp1pp4emp16etc1cp1_release_sm/model_config.yml
+++ b/tests/functional_tests/test_cases/mixtral/deepseekv3_proxy_flex_tp1pp4emp16etc1cp1_release_sm/model_config.yml
@@ -13,7 +13,7 @@ ENV_VARS:
   NON_DETERMINSTIC_RESULTS: 1
   NVSHMEM_IB_ENABLE_IBGDA: 0
   CUDA_DEVICE_MAX_CONNECTIONS: 1
-TEST_TYPE: 'release'
+TEST_TYPE: "release"
 MODEL_ARGS:
   # Distributed args
   --distributed-timeout-minutes: 60
@@ -150,6 +150,7 @@ MODEL_ARGS:
   --logging-level: 40
   --tensorboard-dir: ${TENSORBOARD_PATH}
   --wandb-project: megatron-core-release-runs
+  --wandb-entity: adlr
   --wandb-exp-name: ${WANDB_EXPERIMENT}
   --wandb-save-dir: ${WANDB_SAVE_PATH}
 
@@ -160,7 +161,7 @@ MODEL_ARGS:
   --enable-experimental: true
   --exit-interval: 9536
 METRICS:
-  - 'iteration-time'
-  - 'lm loss'
-  - 'mem-allocated-bytes'
-  - 'mem-max-allocated-bytes'
+  - "iteration-time"
+  - "lm loss"
+  - "mem-allocated-bytes"
+  - "mem-max-allocated-bytes"
diff --git a/tests/functional_tests/test_cases/mixtral/deepseekv3_proxy_flex_tp1pp4emp16etp1cp1_gb_200_release/model_config.yaml b/tests/functional_tests/test_cases/mixtral/deepseekv3_proxy_flex_tp1pp4emp16etp1cp1_gb_200_release/model_config.yaml
new file mode 100644
index 00000000000..ced409e5b1e
--- /dev/null
+++ b/tests/functional_tests/test_cases/mixtral/deepseekv3_proxy_flex_tp1pp4emp16etp1cp1_gb_200_release/model_config.yaml
@@ -0,0 +1,168 @@
+# The proxy model is used for local code quality check.
+# The proxy model should contain all the necessary components and settings but fewer parameters.
+ENV_VARS:
+  TORCH_NCCL_AVOID_RECORD_STREAMS: 0
+  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 1
+  PYTORCH_CUDA_ALLOC_CONF: expandable_segments:True
+  NCCL_NVLS_ENABLE: 0
+  NVTE_FUSED_ATTN: 1
+  NVTE_NORM_FWD_USE_CUDNN: 1
+  NVTE_NORM_BWD_USE_CUDNN: 1
+  PYTHONWARNINGS: ignore
+  NCCL_DEBUG: VERSION
+  NON_DETERMINSTIC_RESULTS: 1
+  NVSHMEM_IB_ENABLE_IBGDA: 0
+  CUDA_DEVICE_MAX_CONNECTIONS: 1
+  NUM_OF_HYBRID_EP_RANKS_PER_NVLINK_DOMAIN: 16
+  USE_MNNVL: 1
+TEST_TYPE: "release"
+MODEL_ARGS:
+  # Distributed args
+  --distributed-timeout-minutes: 60
+  --tensor-model-parallel-size: 2
+  --pipeline-model-parallel-size: 4
+  --pipeline-model-parallel-layout: Et*2\\|\\(tt\\|\\)*5t\\|tmL # Et*2|(tt|)*5t|tmL
+  --expert-model-parallel-size: 16
+  --context-parallel-size: 1
+  --expert-tensor-parallel-size: 1
+  --use-distributed-optimizer: true
+  --overlap-grad-reduce: true
+  --overlap-param-gather: true
+
+  # Training args
+  --use-mcore-models: true
+  --sequence-parallel: true
+  --use-flash-attn: true
+  --disable-bias-linear: true
+  --micro-batch-size: 1
+  --global-batch-size: 512
+  --train-samples: 24414062
+  --exit-duration-in-mins: 220
+  --no-check-for-nan-in-loss-and-grad: true
+  --cross-entropy-loss-fusion: true
+  --cross-entropy-fusion-impl: te
+  --manual-gc: true
+  --manual-gc-interval: 10
+
+  # Transformer Engine args
+  --transformer-impl: transformer_engine
+
+  # Data args
+  --seq-length: 4096
+  --data-cache-path: ${DATA_CACHE_PATH}
+  --tokenizer-type: GPTSentencePieceTokenizer
+  --tokenizer-model: ${DATA_PATH}/utils/nemotron_2_256k.model
+  --data-path: $DATA_BLEND
+  --split: 99,1,0
+  --no-mmap-bin-files: true
+  --no-create-attention-mask-in-dataloader: true
+  --num-workers: 6
+
+  # Add network size args
+  --num-layers: 14 # original 61 layers
+  --hidden-size: 7168
+  --ffn-hidden-size: 18432
+  --num-attention-heads: 128
+  --kv-channels: 128
+  --max-position-embeddings: 4096
+  --position-embedding-type: rope
+  --rotary-base: 10000
+  --make-vocab-size-divisible-by: 3232
+  --normalization: RMSNorm
+  --norm-epsilon: 1e-6
+  --swiglu: true
+  --untie-embeddings-and-output-weights: true
+  --multi-latent-attention: true
+  --mtp-num-layers: 1
+  --mtp-loss-scaling-factor: 0.1
+
+  # Add regularization args
+  --attention-dropout: 0.0
+  --hidden-dropout: 0.0
+  --clip-grad: 1.0
+  --weight-decay: 0.1
+  --qk-layernorm: true
+
+  # Add learning rate args
+  --lr-decay-samples: 24413696
+  --lr-warmup-samples: 1536000
+  --lr-warmup-init: 1e-7
+  --lr: 1e-5
+  --min-lr: 1e-6
+  --lr-decay-style: cosine
+  --adam-beta1: 0.9
+  --adam-beta2: 0.95
+
+  # Add MoE args
+  --num-experts: 64 # local 4 + 1 shared, EP16
+  --moe-layer-freq: ([0]*3+[1]*11)
+  --moe-ffn-hidden-size: 2048
+  --moe-shared-expert-intermediate-size: 2048
+  --moe-router-load-balancing-type: seq_aux_loss
+  --moe-router-topk: 8
+  --moe-token-dispatcher-type: flex
+  --moe-flex-dispatcher-backend: hybridep
+  --moe-router-pre-softmax: true
+  --moe-grouped-gemm: true
+  --moe-aux-loss-coeff: 1e-4
+  --moe-router-group-topk: 4
+  --moe-router-num-groups: 8
+  --moe-router-topk-scaling-factor: 2.5
+  --moe-router-score-function: sigmoid
+  --moe-router-enable-expert-bias: true
+  --moe-router-bias-update-rate: 1e-3
+  --moe-router-dtype: fp32
+  --moe-permute-fusion: true
+
+  # Add MLA args
+  --q-lora-rank: 1536
+  --kv-lora-rank: 512
+  --qk-head-dim: 128
+  --qk-pos-emb-head-dim: 64
+  --v-head-dim: 128
+  --rotary-scaling-factor: 40
+  --mscale: 1.0
+  --mscale-all-dim: 1.0
+
+  # Add validation args
+  --eval-iters: 32
+  --eval-interval: 200
+
+  # Add checkpointing args
+  --auto-detect-ckpt-format:
+    true
+    # Add checkpointing args
+  --save: ${CHECKPOINT_SAVE_PATH}
+  --load: ${CHECKPOINT_LOAD_PATH}
+  --save-interval: 500
+  --save-retain-interval: 10000
+  --dist-ckpt-strictness: log_all
+
+  # Add initialization args
+  --init-method-std: 0.02
+
+  # Add logging args
+  --log-timers-to-tensorboard: true
+  --log-memory-to-tensorboard: true
+  --log-num-zeros-in-grad: true
+  --log-params-norm: true
+  --log-validation-ppl-to-tensorboard: true
+  --log-throughput: true
+  --log-interval: 1
+  --logging-level: 40
+  --tensorboard-dir: ${TENSORBOARD_PATH}
+  --wandb-project: megatron-core-release-runs
+  --wandb-entity: adlr
+  --wandb-exp-name: ${WANDB_EXPERIMENT}
+  --wandb-save-dir: ${WANDB_SAVE_PATH}
+
+  # Add mixed precision args
+  --bf16: true
+
+  # enable experimental
+  --enable-experimental: true
+METRICS:
+  - "iteration-time"
+  - "lm loss"
+  - "mem-allocated-bytes"
+  - "mem-max-allocated-bytes"
diff --git a/tests/functional_tests/test_cases/mixtral/deepseekv3_proxy_flex_tp1pp4emp16etp1cp1_release/model_config.yaml b/tests/functional_tests/test_cases/mixtral/deepseekv3_proxy_flex_tp1pp4emp16etp1cp1_release/model_config.yaml
index 080f669e6a4..7bc14780fb3 100644
--- a/tests/functional_tests/test_cases/mixtral/deepseekv3_proxy_flex_tp1pp4emp16etp1cp1_release/model_config.yaml
+++ b/tests/functional_tests/test_cases/mixtral/deepseekv3_proxy_flex_tp1pp4emp16etp1cp1_release/model_config.yaml
@@ -150,6 +150,7 @@ MODEL_ARGS:
   --logging-level: 40
   --tensorboard-dir: ${TENSORBOARD_PATH}
   --wandb-project: megatron-core-release-runs
+  --wandb-entity: adlr
   --wandb-exp-name: ${WANDB_EXPERIMENT}
   --wandb-save-dir: ${WANDB_SAVE_PATH}
 
diff --git a/tests/functional_tests/test_cases/mixtral/deepseekv3_proxy_flex_tp1pp4emp16etp1cp1_release_sm/model_config.yaml b/tests/functional_tests/test_cases/mixtral/deepseekv3_proxy_flex_tp1pp4emp16etp1cp1_release_sm/model_config.yaml
index 8bab921aa04..cc8f2b814c2 100644
--- a/tests/functional_tests/test_cases/mixtral/deepseekv3_proxy_flex_tp1pp4emp16etp1cp1_release_sm/model_config.yaml
+++ b/tests/functional_tests/test_cases/mixtral/deepseekv3_proxy_flex_tp1pp4emp16etp1cp1_release_sm/model_config.yaml
@@ -150,6 +150,7 @@ MODEL_ARGS:
   --logging-level: 40
   --tensorboard-dir: ${TENSORBOARD_PATH}
   --wandb-project: megatron-core-release-runs
+  --wandb-entity: adlr
   --wandb-exp-name: ${WANDB_EXPERIMENT}
   --wandb-save-dir: ${WANDB_SAVE_PATH}
 
diff --git a/tests/functional_tests/test_cases/mixtral/mixtral_8x22b_tp2pp8ep8vpp1_release/model_config.yaml b/tests/functional_tests/test_cases/mixtral/mixtral_8x22b_tp2pp8ep8vpp1_release/model_config.yaml
index e2b8b212900..efe39998065 100644
--- a/tests/functional_tests/test_cases/mixtral/mixtral_8x22b_tp2pp8ep8vpp1_release/model_config.yaml
+++ b/tests/functional_tests/test_cases/mixtral/mixtral_8x22b_tp2pp8ep8vpp1_release/model_config.yaml
@@ -92,6 +92,7 @@ MODEL_ARGS:
   --log-interval: 1
   --tensorboard-dir: ${TENSORBOARD_PATH}
   --wandb-project: megatron-core-release-runs
+  --wandb-entity: adlr
   --wandb-exp-name: ${WANDB_EXPERIMENT}
   --wandb-save-dir: ${WANDB_SAVE_PATH}
   # Add mixed precision args
diff --git a/tests/functional_tests/test_cases/mixtral/mixtral_8x7b_alltoall_tp2pp4ep4_release/model_config.yaml b/tests/functional_tests/test_cases/mixtral/mixtral_8x7b_alltoall_tp2pp4ep4_release/model_config.yaml
index a02fbe99537..f4476c712f2 100644
--- a/tests/functional_tests/test_cases/mixtral/mixtral_8x7b_alltoall_tp2pp4ep4_release/model_config.yaml
+++ b/tests/functional_tests/test_cases/mixtral/mixtral_8x7b_alltoall_tp2pp4ep4_release/model_config.yaml
@@ -92,6 +92,7 @@ MODEL_ARGS:
   --log-interval: 1
   --tensorboard-dir: ${TENSORBOARD_PATH}
   --wandb-project: megatron-core-release-runs
+  --wandb-entity: adlr
   --wandb-exp-name: ${WANDB_EXPERIMENT}
   --wandb-save-dir: ${WANDB_SAVE_PATH}
   # Add mixed precision args
diff --git a/tests/functional_tests/test_cases/mixtral/mixtral_8x7b_alltoall_tp2pp4ep4_release_sm/model_config.yaml b/tests/functional_tests/test_cases/mixtral/mixtral_8x7b_alltoall_tp2pp4ep4_release_sm/model_config.yaml
index b43a1227ea0..cfeb7709839 100644
--- a/tests/functional_tests/test_cases/mixtral/mixtral_8x7b_alltoall_tp2pp4ep4_release_sm/model_config.yaml
+++ b/tests/functional_tests/test_cases/mixtral/mixtral_8x7b_alltoall_tp2pp4ep4_release_sm/model_config.yaml
@@ -92,6 +92,7 @@ MODEL_ARGS:
   --log-interval: 1
   --tensorboard-dir: ${TENSORBOARD_PATH}
   --wandb-project: megatron-core-release-runs
+  --wandb-entity: adlr
   --wandb-exp-name: ${WANDB_EXPERIMENT}
   --wandb-save-dir: ${WANDB_SAVE_PATH}
   # Add mixed precision args
diff --git a/tests/functional_tests/test_cases/mixtral/mixtral_8x7b_tp1pp4ep8vpp8_release/model_config.yaml b/tests/functional_tests/test_cases/mixtral/mixtral_8x7b_tp1pp4ep8vpp8_release/model_config.yaml
index 1fdad2a5c70..29dcefadf0e 100644
--- a/tests/functional_tests/test_cases/mixtral/mixtral_8x7b_tp1pp4ep8vpp8_release/model_config.yaml
+++ b/tests/functional_tests/test_cases/mixtral/mixtral_8x7b_tp1pp4ep8vpp8_release/model_config.yaml
@@ -94,6 +94,7 @@ MODEL_ARGS:
   --log-interval: 1
   --tensorboard-dir: ${TENSORBOARD_PATH}
   --wandb-project: megatron-core-release-runs
+  --wandb-entity: adlr
   --wandb-exp-name: ${WANDB_EXPERIMENT}
   --wandb-save-dir: ${WANDB_SAVE_PATH}
   # Add mixed precision args
diff --git a/tests/functional_tests/test_cases/t5/t5_release/model_config.yaml b/tests/functional_tests/test_cases/t5/t5_release/model_config.yaml
index b684a2ebb54..6cfe215b80f 100644
--- a/tests/functional_tests/test_cases/t5/t5_release/model_config.yaml
+++ b/tests/functional_tests/test_cases/t5/t5_release/model_config.yaml
@@ -58,6 +58,7 @@ MODEL_ARGS:
   --log-validation-ppl-to-tensorboard: true
   --timing-log-level: 0
   --wandb-project: megatron-core-release-runs
+  --wandb-entity: adlr
   --wandb-exp-name: ${WANDB_EXPERIMENT}
   --wandb-save-dir: ${WANDB_SAVE_PATH}
 METRICS:

From a78ae4948a0f3cd69b9a8441571126d556d6501c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Sat, 31 Jan 2026 20:07:48 +0100
Subject: [PATCH 260/334] [dev] ci: Fix DSv3 (#3187)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 .../golden_values_dev_dgx_h100.json           | 11492 ----------------
 .../model_config.yml                          |   167 -
 .../model_config.yaml}                        |     2 +
 3 files changed, 2 insertions(+), 11659 deletions(-)
 delete mode 100644 tests/functional_tests/test_cases/mixtral/deepseekv3_proxy_flex_tp1pp4emp16etc1cp1_release_sm/golden_values_dev_dgx_h100.json
 delete mode 100644 tests/functional_tests/test_cases/mixtral/deepseekv3_proxy_flex_tp1pp4emp16etc1cp1_release_sm/model_config.yml
 rename tests/functional_tests/test_cases/mixtral/{deepseekv3_proxy_flex_tp1pp4emp16etc1cp1_gb_200_release_sm/model_config.yml => deepseekv3_proxy_flex_tp1pp4emp16etp1cp1_gb_200_release_sm/model_config.yaml} (99%)

diff --git a/tests/functional_tests/test_cases/mixtral/deepseekv3_proxy_flex_tp1pp4emp16etc1cp1_release_sm/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/mixtral/deepseekv3_proxy_flex_tp1pp4emp16etc1cp1_release_sm/golden_values_dev_dgx_h100.json
deleted file mode 100644
index f486950e5a2..00000000000
--- a/tests/functional_tests/test_cases/mixtral/deepseekv3_proxy_flex_tp1pp4emp16etc1cp1_release_sm/golden_values_dev_dgx_h100.json
+++ /dev/null
@@ -1,11492 +0,0 @@
-{
-    "lm loss": {
-        "start_step": 1,
-        "end_step": 9535,
-        "step_interval": 5,
-        "values": {
-            "1": 13.89756,
-            "5": 13.89155,
-            "10": 13.85814,
-            "15": 13.84947,
-            "20": 13.74128,
-            "25": 13.71269,
-            "30": 13.39136,
-            "35": 13.32418,
-            "40": 13.23329,
-            "45": 13.12045,
-            "50": 12.53632,
-            "55": 12.35058,
-            "60": 12.17187,
-            "65": 12.01029,
-            "70": 11.83519,
-            "75": 11.55823,
-            "80": 11.30557,
-            "85": 11.11711,
-            "90": 10.96045,
-            "95": 10.79835,
-            "100": 10.58719,
-            "105": 10.45871,
-            "110": 10.23985,
-            "115": 10.03197,
-            "120": 9.88087,
-            "125": 9.74001,
-            "130": 9.64895,
-            "135": 9.58316,
-            "140": 9.34895,
-            "145": 9.3363,
-            "150": 9.17736,
-            "155": 9.11162,
-            "160": 9.02957,
-            "165": 8.91504,
-            "170": 8.86399,
-            "175": 8.82531,
-            "180": 8.68067,
-            "185": 8.72019,
-            "190": 8.59287,
-            "195": 8.59803,
-            "200": 8.48665,
-            "205": 8.39681,
-            "210": 8.35424,
-            "215": 8.40636,
-            "220": 8.27837,
-            "225": 8.29496,
-            "230": 8.27773,
-            "235": 8.20463,
-            "240": 8.15385,
-            "245": 8.1344,
-            "250": 8.06891,
-            "255": 8.08354,
-            "260": 7.97761,
-            "265": 7.96264,
-            "270": 7.91745,
-            "275": 7.9055,
-            "280": 7.89502,
-            "285": 7.91233,
-            "290": 7.858,
-            "295": 7.84326,
-            "300": 7.73922,
-            "305": 7.73479,
-            "310": 7.6998,
-            "315": 7.6959,
-            "320": 7.68835,
-            "325": 7.60857,
-            "330": 7.59888,
-            "335": 7.57833,
-            "340": 7.62257,
-            "345": 7.51187,
-            "350": 7.5063,
-            "355": 7.43406,
-            "360": 7.53414,
-            "365": 7.45759,
-            "370": 7.49186,
-            "375": 7.43607,
-            "380": 7.41292,
-            "385": 7.41117,
-            "390": 7.42986,
-            "395": 7.36781,
-            "400": 7.30747,
-            "405": 7.31834,
-            "410": 7.30943,
-            "415": 7.29421,
-            "420": 7.2965,
-            "425": 7.26158,
-            "430": 7.20979,
-            "435": 7.22197,
-            "440": 7.18512,
-            "445": 7.1687,
-            "450": 7.12181,
-            "455": 7.14062,
-            "460": 7.11041,
-            "465": 7.10497,
-            "470": 7.07645,
-            "475": 7.09742,
-            "480": 6.97587,
-            "485": 7.03312,
-            "490": 6.99478,
-            "495": 6.9692,
-            "500": 6.91435,
-            "505": 6.94713,
-            "510": 6.92309,
-            "515": 6.88853,
-            "520": 6.88024,
-            "525": 6.87529,
-            "530": 6.88311,
-            "535": 6.8642,
-            "540": 6.78769,
-            "545": 6.8252,
-            "550": 6.84568,
-            "555": 6.86869,
-            "560": 6.81372,
-            "565": 6.74969,
-            "570": 6.76579,
-            "575": 6.77872,
-            "580": 6.69766,
-            "585": 6.71359,
-            "590": 6.65449,
-            "595": 6.64792,
-            "600": 6.67016,
-            "605": 6.65924,
-            "610": 6.63641,
-            "615": 6.68438,
-            "620": 6.60355,
-            "625": 6.57203,
-            "630": 6.56964,
-            "635": 6.60732,
-            "640": 6.59738,
-            "645": 6.5815,
-            "650": 6.62582,
-            "655": 6.62475,
-            "660": 6.53171,
-            "665": 6.52224,
-            "670": 6.47146,
-            "675": 6.57058,
-            "680": 6.53989,
-            "685": 6.49695,
-            "690": 6.47037,
-            "695": 6.43685,
-            "700": 6.43121,
-            "705": 6.4313,
-            "710": 6.46058,
-            "715": 6.46842,
-            "720": 6.35254,
-            "725": 6.40344,
-            "730": 6.39123,
-            "735": 6.41174,
-            "740": 6.34886,
-            "745": 6.31567,
-            "750": 6.37227,
-            "755": 6.29068,
-            "760": 6.30783,
-            "765": 6.32016,
-            "770": 6.31539,
-            "775": 6.3051,
-            "780": 6.27484,
-            "785": 6.28635,
-            "790": 6.25066,
-            "795": 6.24498,
-            "800": 6.22595,
-            "805": 6.30241,
-            "810": 6.16125,
-            "815": 6.18921,
-            "820": 6.19984,
-            "825": 6.20878,
-            "830": 6.21184,
-            "835": 6.16547,
-            "840": 6.13918,
-            "845": 6.18907,
-            "850": 6.14544,
-            "855": 6.14245,
-            "860": 6.12573,
-            "865": 6.14471,
-            "870": 6.103,
-            "875": 6.14755,
-            "880": 6.09503,
-            "885": 6.08625,
-            "890": 6.14906,
-            "895": 6.03612,
-            "900": 6.06033,
-            "905": 6.07119,
-            "910": 6.04765,
-            "915": 6.02795,
-            "920": 6.01922,
-            "925": 6.00762,
-            "930": 6.04202,
-            "935": 6.03448,
-            "940": 5.96552,
-            "945": 6.00691,
-            "950": 6.02802,
-            "955": 5.9757,
-            "960": 5.9732,
-            "965": 5.8947,
-            "970": 5.93848,
-            "975": 5.94046,
-            "980": 5.91694,
-            "985": 5.91057,
-            "990": 5.96163,
-            "995": 5.87028,
-            "1000": 5.89819,
-            "1005": 5.85552,
-            "1010": 5.89001,
-            "1015": 5.91011,
-            "1020": 5.82121,
-            "1025": 5.81525,
-            "1030": 5.82852,
-            "1035": 5.91121,
-            "1040": 5.83477,
-            "1045": 5.80641,
-            "1050": 5.84029,
-            "1055": 5.82471,
-            "1060": 5.77657,
-            "1065": 5.75965,
-            "1070": 5.80228,
-            "1075": 5.78852,
-            "1080": 5.77993,
-            "1085": 5.79347,
-            "1090": 5.7642,
-            "1095": 5.77727,
-            "1100": 5.73679,
-            "1105": 5.71252,
-            "1110": 5.76864,
-            "1115": 5.69994,
-            "1120": 5.64073,
-            "1125": 5.65212,
-            "1130": 5.71653,
-            "1135": 5.67194,
-            "1140": 5.66144,
-            "1145": 5.65572,
-            "1150": 5.68319,
-            "1155": 5.64543,
-            "1160": 5.63371,
-            "1165": 5.67226,
-            "1170": 5.65589,
-            "1175": 5.62136,
-            "1180": 5.63006,
-            "1185": 5.6181,
-            "1190": 5.60413,
-            "1195": 5.59825,
-            "1200": 5.54202,
-            "1205": 5.65572,
-            "1210": 5.51312,
-            "1215": 5.55359,
-            "1220": 5.63431,
-            "1225": 5.51403,
-            "1230": 5.56754,
-            "1235": 5.521,
-            "1240": 5.55808,
-            "1245": 5.52886,
-            "1250": 5.51046,
-            "1255": 5.50279,
-            "1260": 5.50208,
-            "1265": 5.47964,
-            "1270": 5.44537,
-            "1275": 5.52448,
-            "1280": 5.45447,
-            "1285": 5.4682,
-            "1290": 5.43648,
-            "1295": 5.46181,
-            "1300": 5.46016,
-            "1305": 5.43278,
-            "1310": 5.38271,
-            "1315": 5.44073,
-            "1320": 5.42393,
-            "1325": 5.3568,
-            "1330": 5.41966,
-            "1335": 5.39498,
-            "1340": 5.44678,
-            "1345": 5.4046,
-            "1350": 5.3745,
-            "1355": 5.36722,
-            "1360": 5.37555,
-            "1365": 5.38819,
-            "1370": 5.31687,
-            "1375": 5.3257,
-            "1380": 5.37435,
-            "1385": 5.33822,
-            "1390": 5.32907,
-            "1395": 5.35996,
-            "1400": 5.34708,
-            "1405": 5.32768,
-            "1410": 5.30321,
-            "1415": 5.26874,
-            "1420": 5.31115,
-            "1425": 5.3045,
-            "1430": 5.33954,
-            "1435": 5.24914,
-            "1440": 5.27894,
-            "1445": 5.31118,
-            "1450": 5.28087,
-            "1455": 5.30455,
-            "1460": 5.26455,
-            "1465": 5.26355,
-            "1470": 5.29615,
-            "1475": 5.27116,
-            "1480": 5.26692,
-            "1485": 5.21939,
-            "1490": 5.21283,
-            "1495": 5.23155,
-            "1500": 5.23275,
-            "1505": 5.20436,
-            "1510": 5.22447,
-            "1515": 5.15502,
-            "1520": 5.1852,
-            "1525": 5.15413,
-            "1530": 5.17452,
-            "1535": 5.16098,
-            "1540": 5.16276,
-            "1545": 5.19593,
-            "1550": 5.1989,
-            "1555": 5.18478,
-            "1560": 5.1253,
-            "1565": 5.15973,
-            "1570": 5.17281,
-            "1575": 5.1468,
-            "1580": 5.16002,
-            "1585": 5.14495,
-            "1590": 5.12815,
-            "1595": 5.09691,
-            "1600": 5.17173,
-            "1605": 5.09626,
-            "1610": 5.10506,
-            "1615": 5.09978,
-            "1620": 5.1145,
-            "1625": 5.10983,
-            "1630": 5.08211,
-            "1635": 5.12902,
-            "1640": 5.09565,
-            "1645": 5.08916,
-            "1650": 5.08067,
-            "1655": 5.06625,
-            "1660": 5.05546,
-            "1665": 5.04609,
-            "1670": 5.06711,
-            "1675": 5.06871,
-            "1680": 5.00775,
-            "1685": 5.01672,
-            "1690": 4.99799,
-            "1695": 5.00065,
-            "1700": 5.03983,
-            "1705": 5.01824,
-            "1710": 5.00629,
-            "1715": 4.97587,
-            "1720": 4.97437,
-            "1725": 4.9984,
-            "1730": 4.95014,
-            "1735": 5.02541,
-            "1740": 4.95266,
-            "1745": 4.97461,
-            "1750": 4.95639,
-            "1755": 4.97133,
-            "1760": 4.98489,
-            "1765": 4.93728,
-            "1770": 4.93343,
-            "1775": 4.9432,
-            "1780": 4.96314,
-            "1785": 4.91574,
-            "1790": 4.93944,
-            "1795": 4.93848,
-            "1800": 4.88725,
-            "1805": 4.87771,
-            "1810": 4.8976,
-            "1815": 4.89801,
-            "1820": 4.8872,
-            "1825": 4.89371,
-            "1830": 4.8786,
-            "1835": 4.87542,
-            "1840": 4.87209,
-            "1845": 4.85811,
-            "1850": 4.83484,
-            "1855": 4.89133,
-            "1860": 4.84322,
-            "1865": 4.85108,
-            "1870": 4.82648,
-            "1875": 4.83877,
-            "1880": 4.89485,
-            "1885": 4.84392,
-            "1890": 4.8281,
-            "1895": 4.77339,
-            "1900": 4.81423,
-            "1905": 4.81232,
-            "1910": 4.82991,
-            "1915": 4.79768,
-            "1920": 4.78308,
-            "1925": 4.79277,
-            "1930": 4.76544,
-            "1935": 4.7941,
-            "1940": 4.75875,
-            "1945": 4.80214,
-            "1950": 4.83843,
-            "1955": 4.77731,
-            "1960": 4.76768,
-            "1965": 4.72596,
-            "1970": 4.73388,
-            "1975": 4.7973,
-            "1980": 4.73036,
-            "1985": 4.74162,
-            "1990": 4.78353,
-            "1995": 4.74959,
-            "2000": 4.76948,
-            "2005": 4.80113,
-            "2010": 4.70951,
-            "2015": 4.69715,
-            "2020": 4.71284,
-            "2025": 4.75821,
-            "2030": 4.68831,
-            "2035": 4.71528,
-            "2040": 4.67772,
-            "2045": 4.76255,
-            "2050": 4.74404,
-            "2055": 4.7077,
-            "2060": 4.70614,
-            "2065": 4.66526,
-            "2070": 4.67653,
-            "2075": 4.69507,
-            "2080": 4.66174,
-            "2085": 4.69911,
-            "2090": 4.61739,
-            "2095": 4.64746,
-            "2100": 4.61666,
-            "2105": 4.64633,
-            "2110": 4.64123,
-            "2115": 4.65336,
-            "2120": 4.64559,
-            "2125": 4.61059,
-            "2130": 4.61466,
-            "2135": 4.62745,
-            "2140": 4.6232,
-            "2145": 4.58124,
-            "2150": 4.60983,
-            "2155": 4.57956,
-            "2160": 4.60382,
-            "2165": 4.58415,
-            "2170": 4.61387,
-            "2175": 4.60275,
-            "2180": 4.59531,
-            "2185": 4.60788,
-            "2190": 4.58246,
-            "2195": 4.55672,
-            "2200": 4.55346,
-            "2205": 4.56383,
-            "2210": 4.6146,
-            "2215": 4.64276,
-            "2220": 4.59912,
-            "2225": 4.57263,
-            "2230": 4.56854,
-            "2235": 4.61797,
-            "2240": 4.51401,
-            "2245": 4.5176,
-            "2250": 4.52905,
-            "2255": 4.54117,
-            "2260": 4.48536,
-            "2265": 4.56489,
-            "2270": 4.49655,
-            "2275": 4.55547,
-            "2280": 4.51075,
-            "2285": 4.53333,
-            "2290": 4.52269,
-            "2295": 4.52707,
-            "2300": 4.53228,
-            "2305": 4.49287,
-            "2310": 4.53148,
-            "2315": 4.46329,
-            "2320": 4.51121,
-            "2325": 4.49336,
-            "2330": 4.49351,
-            "2335": 4.47787,
-            "2340": 4.48626,
-            "2345": 4.52525,
-            "2350": 4.4674,
-            "2355": 4.47173,
-            "2360": 4.44099,
-            "2365": 4.44682,
-            "2370": 4.44716,
-            "2375": 4.44199,
-            "2380": 4.39487,
-            "2385": 4.43475,
-            "2390": 4.43071,
-            "2395": 4.46719,
-            "2400": 4.42074,
-            "2405": 4.40081,
-            "2410": 4.44955,
-            "2415": 4.42055,
-            "2420": 4.4293,
-            "2425": 4.39783,
-            "2430": 4.42084,
-            "2435": 4.40291,
-            "2440": 4.39501,
-            "2445": 4.40808,
-            "2450": 4.38239,
-            "2455": 4.4178,
-            "2460": 4.36606,
-            "2465": 4.41327,
-            "2470": 4.40023,
-            "2475": 4.41776,
-            "2480": 4.34092,
-            "2485": 4.37423,
-            "2490": 4.37838,
-            "2495": 4.35662,
-            "2500": 4.36528,
-            "2505": 4.37219,
-            "2510": 4.41251,
-            "2515": 4.40356,
-            "2520": 4.34516,
-            "2525": 4.36214,
-            "2530": 4.36786,
-            "2535": 4.36686,
-            "2540": 4.36548,
-            "2545": 4.37687,
-            "2550": 4.30337,
-            "2555": 4.37244,
-            "2560": 4.35158,
-            "2565": 4.30393,
-            "2570": 4.33393,
-            "2575": 4.30697,
-            "2580": 4.30582,
-            "2585": 4.29358,
-            "2590": 4.31272,
-            "2595": 4.28154,
-            "2600": 4.29867,
-            "2605": 4.31115,
-            "2610": 4.32106,
-            "2615": 4.27768,
-            "2620": 4.26935,
-            "2625": 4.30437,
-            "2630": 4.22434,
-            "2635": 4.30369,
-            "2640": 4.30012,
-            "2645": 4.2581,
-            "2650": 4.28639,
-            "2655": 4.26647,
-            "2660": 4.21474,
-            "2665": 4.30436,
-            "2670": 4.26382,
-            "2675": 4.2306,
-            "2680": 4.25227,
-            "2685": 4.25736,
-            "2690": 4.22986,
-            "2695": 4.28379,
-            "2700": 4.19098,
-            "2705": 4.23853,
-            "2710": 4.25092,
-            "2715": 4.23481,
-            "2720": 4.24356,
-            "2725": 4.2225,
-            "2730": 4.22941,
-            "2735": 4.22363,
-            "2740": 4.20346,
-            "2745": 4.18765,
-            "2750": 4.21101,
-            "2755": 4.22237,
-            "2760": 4.22902,
-            "2765": 4.18298,
-            "2770": 4.23755,
-            "2775": 4.17706,
-            "2780": 4.21186,
-            "2785": 4.19469,
-            "2790": 4.21736,
-            "2795": 4.18988,
-            "2800": 4.1159,
-            "2805": 4.16613,
-            "2810": 4.17076,
-            "2815": 4.15389,
-            "2820": 4.1969,
-            "2825": 4.19241,
-            "2830": 4.16864,
-            "2835": 4.17046,
-            "2840": 4.16148,
-            "2845": 4.14967,
-            "2850": 4.16619,
-            "2855": 4.11805,
-            "2860": 4.14572,
-            "2865": 4.17023,
-            "2870": 4.14096,
-            "2875": 4.1596,
-            "2880": 4.08582,
-            "2885": 4.14242,
-            "2890": 4.11503,
-            "2895": 4.15452,
-            "2900": 4.09735,
-            "2905": 4.11101,
-            "2910": 4.10798,
-            "2915": 4.14914,
-            "2920": 4.12546,
-            "2925": 4.10099,
-            "2930": 4.08522,
-            "2935": 4.07896,
-            "2940": 4.09225,
-            "2945": 4.06113,
-            "2950": 4.03479,
-            "2955": 4.03763,
-            "2960": 4.04955,
-            "2965": 4.0643,
-            "2970": 4.08593,
-            "2975": 4.0941,
-            "2980": 4.03102,
-            "2985": 4.07394,
-            "2990": 4.08923,
-            "2995": 4.03231,
-            "3000": 4.0436,
-            "3005": 4.02568,
-            "3010": 4.06747,
-            "3015": 4.02305,
-            "3020": 4.03992,
-            "3025": 4.02491,
-            "3030": 4.0567,
-            "3035": 4.04059,
-            "3040": 4.0544,
-            "3045": 4.04677,
-            "3050": 4.017,
-            "3055": 4.00507,
-            "3060": 3.9904,
-            "3065": 4.02281,
-            "3070": 4.03826,
-            "3075": 3.97211,
-            "3080": 4.0011,
-            "3085": 4.00548,
-            "3090": 4.00887,
-            "3095": 4.02745,
-            "3100": 4.01465,
-            "3105": 3.99035,
-            "3110": 3.99124,
-            "3115": 3.92509,
-            "3120": 4.00505,
-            "3125": 3.94183,
-            "3130": 3.96987,
-            "3135": 3.96132,
-            "3140": 3.95209,
-            "3145": 3.93524,
-            "3150": 3.96949,
-            "3155": 3.96213,
-            "3160": 3.96255,
-            "3165": 3.96146,
-            "3170": 3.96456,
-            "3175": 3.93165,
-            "3180": 3.93784,
-            "3185": 3.90234,
-            "3190": 3.92455,
-            "3195": 3.9116,
-            "3200": 3.89013,
-            "3205": 3.92029,
-            "3210": 3.89711,
-            "3215": 3.90569,
-            "3220": 3.89706,
-            "3225": 3.91097,
-            "3230": 3.89895,
-            "3235": 3.91122,
-            "3240": 3.88912,
-            "3245": 3.88902,
-            "3250": 3.84407,
-            "3255": 3.89259,
-            "3260": 3.88283,
-            "3265": 3.92603,
-            "3270": 3.9052,
-            "3275": 3.85915,
-            "3280": 3.88232,
-            "3285": 3.86652,
-            "3290": 3.86681,
-            "3295": 3.83806,
-            "3300": 3.85349,
-            "3305": 3.86048,
-            "3310": 3.85872,
-            "3315": 3.89673,
-            "3320": 3.85179,
-            "3325": 3.84353,
-            "3330": 3.82539,
-            "3335": 3.86213,
-            "3340": 3.81824,
-            "3345": 3.83129,
-            "3350": 3.85901,
-            "3355": 3.8452,
-            "3360": 3.83241,
-            "3365": 3.83682,
-            "3370": 3.82265,
-            "3375": 3.85232,
-            "3380": 3.79563,
-            "3385": 3.81353,
-            "3390": 3.79143,
-            "3395": 3.86888,
-            "3400": 3.83997,
-            "3405": 3.86197,
-            "3410": 3.77529,
-            "3415": 3.72916,
-            "3420": 3.80048,
-            "3425": 3.81237,
-            "3430": 3.84497,
-            "3435": 3.80796,
-            "3440": 3.8267,
-            "3445": 3.7742,
-            "3450": 3.78787,
-            "3455": 3.80217,
-            "3460": 3.78265,
-            "3465": 3.75891,
-            "3470": 3.77341,
-            "3475": 3.77638,
-            "3480": 3.77988,
-            "3485": 3.80588,
-            "3490": 3.76958,
-            "3495": 3.80315,
-            "3500": 3.77047,
-            "3505": 3.77239,
-            "3510": 3.75092,
-            "3515": 3.80896,
-            "3520": 3.79879,
-            "3525": 3.76372,
-            "3530": 3.75322,
-            "3535": 3.76209,
-            "3540": 3.81796,
-            "3545": 3.72915,
-            "3550": 3.79201,
-            "3555": 3.72604,
-            "3560": 3.78622,
-            "3565": 3.7451,
-            "3570": 3.74254,
-            "3575": 3.71868,
-            "3580": 3.77066,
-            "3585": 3.76174,
-            "3590": 3.68853,
-            "3595": 3.76509,
-            "3600": 3.71336,
-            "3605": 3.71948,
-            "3610": 3.70916,
-            "3615": 3.74868,
-            "3620": 3.7837,
-            "3625": 3.71964,
-            "3630": 3.76519,
-            "3635": 3.68617,
-            "3640": 3.7093,
-            "3645": 3.74263,
-            "3650": 3.69638,
-            "3655": 3.72074,
-            "3660": 3.72832,
-            "3665": 3.74694,
-            "3670": 3.71178,
-            "3675": 3.71065,
-            "3680": 3.72416,
-            "3685": 3.67473,
-            "3690": 3.6936,
-            "3695": 3.68528,
-            "3700": 3.70814,
-            "3705": 3.67651,
-            "3710": 3.68493,
-            "3715": 3.6842,
-            "3720": 3.66563,
-            "3725": 3.64716,
-            "3730": 3.64883,
-            "3735": 3.68782,
-            "3740": 3.6732,
-            "3745": 3.66354,
-            "3750": 3.6757,
-            "3755": 3.66351,
-            "3760": 3.67285,
-            "3765": 3.66004,
-            "3770": 3.6516,
-            "3775": 3.63831,
-            "3780": 3.62453,
-            "3785": 3.6765,
-            "3790": 3.60163,
-            "3795": 3.64291,
-            "3800": 3.63275,
-            "3805": 3.62032,
-            "3810": 3.59475,
-            "3815": 3.63585,
-            "3820": 3.64099,
-            "3825": 3.6535,
-            "3830": 3.63864,
-            "3835": 3.59938,
-            "3840": 3.67685,
-            "3845": 3.65895,
-            "3850": 3.60064,
-            "3855": 3.60428,
-            "3860": 3.65711,
-            "3865": 3.60867,
-            "3870": 3.6721,
-            "3875": 3.58596,
-            "3880": 3.58212,
-            "3885": 3.60502,
-            "3890": 3.60969,
-            "3895": 3.5558,
-            "3900": 3.61685,
-            "3905": 3.59135,
-            "3910": 3.5772,
-            "3915": 3.5862,
-            "3920": 3.57131,
-            "3925": 3.56751,
-            "3930": 3.58005,
-            "3935": 3.5821,
-            "3940": 3.57511,
-            "3945": 3.56965,
-            "3950": 3.61887,
-            "3955": 3.57531,
-            "3960": 3.60735,
-            "3965": 3.58853,
-            "3970": 3.56735,
-            "3975": 3.56709,
-            "3980": 3.5304,
-            "3985": 3.60527,
-            "3990": 3.58124,
-            "3995": 3.60753,
-            "4000": 3.55811,
-            "4005": 3.54162,
-            "4010": 3.58376,
-            "4015": 3.58398,
-            "4020": 3.58355,
-            "4025": 3.57409,
-            "4030": 3.62855,
-            "4035": 3.57033,
-            "4040": 3.5882,
-            "4045": 3.60161,
-            "4050": 3.57522,
-            "4055": 3.57403,
-            "4060": 3.5888,
-            "4065": 3.58382,
-            "4070": 3.51488,
-            "4075": 3.55887,
-            "4080": 3.53108,
-            "4085": 3.54596,
-            "4090": 3.54584,
-            "4095": 3.53161,
-            "4100": 3.55106,
-            "4105": 3.53794,
-            "4110": 3.51736,
-            "4115": 3.56348,
-            "4120": 3.49648,
-            "4125": 3.49769,
-            "4130": 3.55149,
-            "4135": 3.54373,
-            "4140": 3.49112,
-            "4145": 3.51351,
-            "4150": 3.55497,
-            "4155": 3.48797,
-            "4160": 3.54539,
-            "4165": 3.56451,
-            "4170": 3.50424,
-            "4175": 3.50239,
-            "4180": 3.4998,
-            "4185": 3.5138,
-            "4190": 3.5011,
-            "4195": 3.50044,
-            "4200": 3.49424,
-            "4205": 3.53032,
-            "4210": 3.51921,
-            "4215": 3.52292,
-            "4220": 3.53088,
-            "4225": 3.50168,
-            "4230": 3.49756,
-            "4235": 3.52008,
-            "4240": 3.49249,
-            "4245": 3.49542,
-            "4250": 3.48848,
-            "4255": 3.50707,
-            "4260": 3.4676,
-            "4265": 3.48819,
-            "4270": 3.50473,
-            "4275": 3.53933,
-            "4280": 3.48997,
-            "4285": 3.50947,
-            "4290": 3.48405,
-            "4295": 3.48692,
-            "4300": 3.52631,
-            "4305": 3.48704,
-            "4310": 3.51358,
-            "4315": 3.50638,
-            "4320": 3.50379,
-            "4325": 3.51699,
-            "4330": 3.45992,
-            "4335": 3.49232,
-            "4340": 3.50354,
-            "4345": 3.43189,
-            "4350": 3.44845,
-            "4355": 3.52327,
-            "4360": 3.48083,
-            "4365": 3.47079,
-            "4370": 3.47624,
-            "4375": 3.44129,
-            "4380": 3.44296,
-            "4385": 3.42527,
-            "4390": 3.49048,
-            "4395": 3.47699,
-            "4400": 3.47442,
-            "4405": 3.41723,
-            "4410": 3.48335,
-            "4415": 3.44899,
-            "4420": 3.44113,
-            "4425": 3.47273,
-            "4430": 3.44742,
-            "4435": 3.49082,
-            "4440": 3.48522,
-            "4445": 3.43744,
-            "4450": 3.3974,
-            "4455": 3.4624,
-            "4460": 3.43415,
-            "4465": 3.45284,
-            "4470": 3.42199,
-            "4475": 3.45352,
-            "4480": 3.44375,
-            "4485": 3.43643,
-            "4490": 3.43453,
-            "4495": 3.38677,
-            "4500": 3.45384,
-            "4505": 3.43515,
-            "4510": 3.44292,
-            "4515": 3.40605,
-            "4520": 3.43888,
-            "4525": 3.40731,
-            "4530": 3.44131,
-            "4535": 3.3963,
-            "4540": 3.42067,
-            "4545": 3.43217,
-            "4550": 3.47418,
-            "4555": 3.39854,
-            "4560": 3.42732,
-            "4565": 3.37837,
-            "4570": 3.41702,
-            "4575": 3.41117,
-            "4580": 3.45362,
-            "4585": 3.42636,
-            "4590": 3.42388,
-            "4595": 3.39853,
-            "4600": 3.39686,
-            "4605": 3.42144,
-            "4610": 3.41286,
-            "4615": 3.45309,
-            "4620": 3.39526,
-            "4625": 3.42534,
-            "4630": 3.4127,
-            "4635": 3.39195,
-            "4640": 3.4264,
-            "4645": 3.41975,
-            "4650": 3.43542,
-            "4655": 3.40687,
-            "4660": 3.39737,
-            "4665": 3.41231,
-            "4670": 3.446,
-            "4675": 3.40423,
-            "4680": 3.42886,
-            "4685": 3.42464,
-            "4690": 3.39897,
-            "4695": 3.38,
-            "4700": 3.3729,
-            "4705": 3.35029,
-            "4710": 3.40571,
-            "4715": 3.39222,
-            "4720": 3.38774,
-            "4725": 3.35968,
-            "4730": 3.39519,
-            "4735": 3.32069,
-            "4740": 3.36458,
-            "4745": 3.40698,
-            "4750": 3.36053,
-            "4755": 3.39053,
-            "4760": 3.41421,
-            "4765": 3.36022,
-            "4770": 3.36502,
-            "4775": 3.36135,
-            "4780": 3.37362,
-            "4785": 3.374,
-            "4790": 3.41163,
-            "4795": 3.39334,
-            "4800": 3.34583,
-            "4805": 3.41139,
-            "4810": 3.35086,
-            "4815": 3.38903,
-            "4820": 3.34814,
-            "4825": 3.40406,
-            "4830": 3.38314,
-            "4835": 3.3693,
-            "4840": 3.38086,
-            "4845": 3.32726,
-            "4850": 3.39372,
-            "4855": 3.39679,
-            "4860": 3.32727,
-            "4865": 3.36392,
-            "4870": 3.34896,
-            "4875": 3.39123,
-            "4880": 3.39974,
-            "4885": 3.35153,
-            "4890": 3.36191,
-            "4895": 3.35318,
-            "4900": 3.32971,
-            "4905": 3.33008,
-            "4910": 3.32861,
-            "4915": 3.37524,
-            "4920": 3.35807,
-            "4925": 3.31242,
-            "4930": 3.34376,
-            "4935": 3.3273,
-            "4940": 3.28784,
-            "4945": 3.36034,
-            "4950": 3.29629,
-            "4955": 3.40365,
-            "4960": 3.3479,
-            "4965": 3.34204,
-            "4970": 3.33369,
-            "4975": 3.34388,
-            "4980": 3.36573,
-            "4985": 3.35352,
-            "4990": 3.33542,
-            "4995": 3.3795,
-            "5000": 3.30893,
-            "5005": 3.35715,
-            "5010": 3.36146,
-            "5015": 3.30923,
-            "5020": 3.28653,
-            "5025": 3.31605,
-            "5030": 3.32648,
-            "5035": 3.32963,
-            "5040": 3.30481,
-            "5045": 3.34994,
-            "5050": 3.30693,
-            "5055": 3.32632,
-            "5060": 3.28843,
-            "5065": 3.33396,
-            "5070": 3.33431,
-            "5075": 3.34337,
-            "5080": 3.31868,
-            "5085": 3.34518,
-            "5090": 3.32323,
-            "5095": 3.29022,
-            "5100": 3.32026,
-            "5105": 3.32744,
-            "5110": 3.3329,
-            "5115": 3.3038,
-            "5120": 3.34196,
-            "5125": 3.3184,
-            "5130": 3.31738,
-            "5135": 3.30105,
-            "5140": 3.3111,
-            "5145": 3.31125,
-            "5150": 3.32063,
-            "5155": 3.31567,
-            "5160": 3.31039,
-            "5165": 3.34534,
-            "5170": 3.23105,
-            "5175": 3.31877,
-            "5180": 3.28445,
-            "5185": 3.30691,
-            "5190": 3.32611,
-            "5195": 3.30561,
-            "5200": 3.31019,
-            "5205": 3.34654,
-            "5210": 3.28506,
-            "5215": 3.2874,
-            "5220": 3.28219,
-            "5225": 3.28677,
-            "5230": 3.32011,
-            "5235": 3.27975,
-            "5240": 3.27349,
-            "5245": 3.29646,
-            "5250": 3.3023,
-            "5255": 3.28615,
-            "5260": 3.31039,
-            "5265": 3.27007,
-            "5270": 3.25412,
-            "5275": 3.25534,
-            "5280": 3.28407,
-            "5285": 3.30874,
-            "5290": 3.2589,
-            "5295": 3.27448,
-            "5300": 3.27858,
-            "5305": 3.26656,
-            "5310": 3.32809,
-            "5315": 3.25873,
-            "5320": 3.30633,
-            "5325": 3.3111,
-            "5330": 3.27899,
-            "5335": 3.28833,
-            "5340": 3.23016,
-            "5345": 3.28336,
-            "5350": 3.28737,
-            "5355": 3.28737,
-            "5360": 3.23407,
-            "5365": 3.25011,
-            "5370": 3.28855,
-            "5375": 3.26985,
-            "5380": 3.24418,
-            "5385": 3.28394,
-            "5390": 3.28221,
-            "5395": 3.20448,
-            "5400": 3.30114,
-            "5405": 3.21525,
-            "5410": 3.29188,
-            "5415": 3.22284,
-            "5420": 3.25707,
-            "5425": 3.23689,
-            "5430": 3.24779,
-            "5435": 3.2811,
-            "5440": 3.21236,
-            "5445": 3.24176,
-            "5450": 3.24576,
-            "5455": 3.22991,
-            "5460": 3.25196,
-            "5465": 3.29692,
-            "5470": 3.27194,
-            "5475": 3.20136,
-            "5480": 3.28214,
-            "5485": 3.24325,
-            "5490": 3.26633,
-            "5495": 3.27183,
-            "5500": 3.22718,
-            "5505": 3.23914,
-            "5510": 3.28342,
-            "5515": 3.27035,
-            "5520": 3.23742,
-            "5525": 3.28473,
-            "5530": 3.22923,
-            "5535": 3.26258,
-            "5540": 3.25366,
-            "5545": 3.26198,
-            "5550": 3.24962,
-            "5555": 3.22875,
-            "5560": 3.22306,
-            "5565": 3.26845,
-            "5570": 3.22989,
-            "5575": 3.26435,
-            "5580": 3.23553,
-            "5585": 3.18594,
-            "5590": 3.24664,
-            "5595": 3.2105,
-            "5600": 3.25488,
-            "5605": 3.17461,
-            "5610": 3.2604,
-            "5615": 3.25606,
-            "5620": 3.2609,
-            "5625": 3.25214,
-            "5630": 3.24091,
-            "5635": 3.21924,
-            "5640": 3.24377,
-            "5645": 3.20743,
-            "5650": 3.2076,
-            "5655": 3.20542,
-            "5660": 3.20971,
-            "5665": 3.21069,
-            "5670": 3.20056,
-            "5675": 3.22863,
-            "5680": 3.19922,
-            "5685": 3.20573,
-            "5690": 3.2077,
-            "5695": 3.24414,
-            "5700": 3.19628,
-            "5705": 3.18515,
-            "5710": 3.17855,
-            "5715": 3.28582,
-            "5720": 3.2496,
-            "5725": 3.2002,
-            "5730": 3.24085,
-            "5735": 3.22905,
-            "5740": 3.22477,
-            "5745": 3.20281,
-            "5750": 3.23329,
-            "5755": 3.23832,
-            "5760": 3.22288,
-            "5765": 3.22651,
-            "5770": 3.25303,
-            "5775": 3.19712,
-            "5780": 3.21565,
-            "5785": 3.21756,
-            "5790": 3.22715,
-            "5795": 3.22463,
-            "5800": 3.16888,
-            "5805": 3.18332,
-            "5810": 3.22432,
-            "5815": 3.20302,
-            "5820": 3.16241,
-            "5825": 3.20754,
-            "5830": 3.1647,
-            "5835": 3.17395,
-            "5840": 3.20628,
-            "5845": 3.217,
-            "5850": 3.21594,
-            "5855": 3.15148,
-            "5860": 3.17119,
-            "5865": 3.20009,
-            "5870": 3.16136,
-            "5875": 3.20014,
-            "5880": 3.19456,
-            "5885": 3.19488,
-            "5890": 3.21776,
-            "5895": 3.23301,
-            "5900": 3.1895,
-            "5905": 3.21986,
-            "5910": 3.20185,
-            "5915": 3.17464,
-            "5920": 3.1915,
-            "5925": 3.15681,
-            "5930": 3.19135,
-            "5935": 3.19128,
-            "5940": 3.2051,
-            "5945": 3.21968,
-            "5950": 3.20213,
-            "5955": 3.16275,
-            "5960": 3.22598,
-            "5965": 3.17666,
-            "5970": 3.21828,
-            "5975": 3.18539,
-            "5980": 3.25556,
-            "5985": 3.14035,
-            "5990": 3.2373,
-            "5995": 3.15341,
-            "6000": 3.17562,
-            "6005": 3.15642,
-            "6010": 3.15958,
-            "6015": 3.16383,
-            "6020": 3.17057,
-            "6025": 3.20846,
-            "6030": 3.14683,
-            "6035": 3.20108,
-            "6040": 3.18034,
-            "6045": 3.19784,
-            "6050": 3.19841,
-            "6055": 3.17123,
-            "6060": 3.18513,
-            "6065": 3.20946,
-            "6070": 3.16514,
-            "6075": 3.13204,
-            "6080": 3.19182,
-            "6085": 3.15022,
-            "6090": 3.18799,
-            "6095": 3.18454,
-            "6100": 3.13968,
-            "6105": 3.18911,
-            "6110": 3.13194,
-            "6115": 3.18032,
-            "6120": 3.17268,
-            "6125": 3.17817,
-            "6130": 3.16826,
-            "6135": 3.16641,
-            "6140": 3.16491,
-            "6145": 3.14203,
-            "6150": 3.17849,
-            "6155": 3.14973,
-            "6160": 3.12836,
-            "6165": 3.15943,
-            "6170": 3.14366,
-            "6175": 3.14619,
-            "6180": 3.14564,
-            "6185": 3.18694,
-            "6190": 3.15491,
-            "6195": 3.12582,
-            "6200": 3.15218,
-            "6205": 3.14598,
-            "6210": 3.10092,
-            "6215": 3.15518,
-            "6220": 3.1544,
-            "6225": 3.17142,
-            "6230": 3.10668,
-            "6235": 3.14063,
-            "6240": 3.08394,
-            "6245": 3.18223,
-            "6250": 3.14309,
-            "6255": 3.15773,
-            "6260": 3.14125,
-            "6265": 3.15597,
-            "6270": 3.10065,
-            "6275": 3.12382,
-            "6280": 3.13503,
-            "6285": 3.11829,
-            "6290": 3.14415,
-            "6295": 3.15298,
-            "6300": 3.15403,
-            "6305": 3.21086,
-            "6310": 3.11266,
-            "6315": 3.10982,
-            "6320": 3.16047,
-            "6325": 3.10246,
-            "6330": 3.16954,
-            "6335": 3.15391,
-            "6340": 3.10904,
-            "6345": 3.16578,
-            "6350": 3.11808,
-            "6355": 3.11742,
-            "6360": 3.1108,
-            "6365": 3.14775,
-            "6370": 3.16278,
-            "6375": 3.1337,
-            "6380": 3.15125,
-            "6385": 3.17081,
-            "6390": 3.12597,
-            "6395": 3.10466,
-            "6400": 3.10591,
-            "6405": 3.18617,
-            "6410": 3.17298,
-            "6415": 3.12537,
-            "6420": 3.17096,
-            "6425": 3.17458,
-            "6430": 3.16659,
-            "6435": 3.12451,
-            "6440": 3.13606,
-            "6445": 3.15196,
-            "6450": 3.09161,
-            "6455": 3.08666,
-            "6460": 3.13082,
-            "6465": 3.16786,
-            "6470": 3.13951,
-            "6475": 3.13285,
-            "6480": 3.15191,
-            "6485": 3.11206,
-            "6490": 3.0797,
-            "6495": 3.16564,
-            "6500": 3.14177,
-            "6505": 3.08566,
-            "6510": 3.14483,
-            "6515": 3.16369,
-            "6520": 3.09044,
-            "6525": 3.14867,
-            "6530": 3.10896,
-            "6535": 3.12403,
-            "6540": 3.18005,
-            "6545": 3.11404,
-            "6550": 3.11103,
-            "6555": 3.10947,
-            "6560": 3.0737,
-            "6565": 3.07934,
-            "6570": 3.10438,
-            "6575": 3.05844,
-            "6580": 3.17411,
-            "6585": 3.10694,
-            "6590": 3.0877,
-            "6595": 3.10332,
-            "6600": 3.1032,
-            "6605": 3.08625,
-            "6610": 3.08405,
-            "6615": 3.1316,
-            "6620": 3.076,
-            "6625": 3.09705,
-            "6630": 3.09309,
-            "6635": 3.12933,
-            "6640": 3.08864,
-            "6645": 3.10948,
-            "6650": 3.1378,
-            "6655": 3.07416,
-            "6660": 3.11313,
-            "6665": 3.12487,
-            "6670": 3.08048,
-            "6675": 3.10457,
-            "6680": 3.10673,
-            "6685": 3.14077,
-            "6690": 3.11651,
-            "6695": 3.12176,
-            "6700": 3.1127,
-            "6705": 3.09107,
-            "6710": 3.10728,
-            "6715": 3.05842,
-            "6720": 3.13504,
-            "6725": 3.12621,
-            "6730": 3.1099,
-            "6735": 3.10898,
-            "6740": 3.11731,
-            "6745": 3.0901,
-            "6750": 3.10983,
-            "6755": 3.06749,
-            "6760": 3.06624,
-            "6765": 3.08509,
-            "6770": 3.07057,
-            "6775": 3.10523,
-            "6780": 3.07455,
-            "6785": 3.07959,
-            "6790": 3.10472,
-            "6795": 3.07166,
-            "6800": 3.09692,
-            "6805": 3.08719,
-            "6810": 3.10858,
-            "6815": 3.04354,
-            "6820": 3.07401,
-            "6825": 3.10257,
-            "6830": 3.08637,
-            "6835": 3.06002,
-            "6840": 3.0654,
-            "6845": 3.11054,
-            "6850": 3.08009,
-            "6855": 3.11065,
-            "6860": 3.06305,
-            "6865": 3.10876,
-            "6870": 3.07538,
-            "6875": 3.07578,
-            "6880": 3.08642,
-            "6885": 3.05135,
-            "6890": 3.0749,
-            "6895": 3.05299,
-            "6900": 3.05973,
-            "6905": 3.07506,
-            "6910": 3.09159,
-            "6915": 3.11333,
-            "6920": 3.06615,
-            "6925": 3.08379,
-            "6930": 3.06742,
-            "6935": 3.02485,
-            "6940": 3.06623,
-            "6945": 3.05639,
-            "6950": 3.07964,
-            "6955": 3.05853,
-            "6960": 3.05554,
-            "6965": 3.09907,
-            "6970": 3.03589,
-            "6975": 3.1075,
-            "6980": 3.06776,
-            "6985": 3.06784,
-            "6990": 3.11146,
-            "6995": 3.09126,
-            "7000": 3.02783,
-            "7005": 3.09757,
-            "7010": 3.0779,
-            "7015": 3.07385,
-            "7020": 3.10018,
-            "7025": 3.08417,
-            "7030": 3.08746,
-            "7035": 3.04096,
-            "7040": 3.01984,
-            "7045": 3.07968,
-            "7050": 3.09817,
-            "7055": 3.03816,
-            "7060": 3.09848,
-            "7065": 3.11109,
-            "7070": 3.05748,
-            "7075": 3.06319,
-            "7080": 3.11208,
-            "7085": 3.03557,
-            "7090": 3.05692,
-            "7095": 3.04652,
-            "7100": 3.07149,
-            "7105": 3.02035,
-            "7110": 3.0623,
-            "7115": 3.03547,
-            "7120": 3.07999,
-            "7125": 3.03377,
-            "7130": 3.04883,
-            "7135": 3.05627,
-            "7140": 3.06014,
-            "7145": 3.0691,
-            "7150": 3.02375,
-            "7155": 3.08612,
-            "7160": 3.0047,
-            "7165": 3.0418,
-            "7170": 3.07701,
-            "7175": 3.03661,
-            "7180": 3.07042,
-            "7185": 3.09125,
-            "7190": 3.05302,
-            "7195": 3.06058,
-            "7200": 3.06039,
-            "7205": 3.04153,
-            "7210": 3.08703,
-            "7215": 3.06723,
-            "7220": 3.08798,
-            "7225": 3.06993,
-            "7230": 3.07403,
-            "7235": 3.05435,
-            "7240": 3.05017,
-            "7245": 3.07131,
-            "7250": 3.01274,
-            "7255": 3.03229,
-            "7260": 3.06928,
-            "7265": 3.00261,
-            "7270": 3.04138,
-            "7275": 3.04223,
-            "7280": 3.04181,
-            "7285": 3.05407,
-            "7290": 3.07344,
-            "7295": 3.06537,
-            "7300": 3.02809,
-            "7305": 3.02877,
-            "7310": 3.04926,
-            "7315": 3.07646,
-            "7320": 3.05669,
-            "7325": 3.06149,
-            "7330": 3.02592,
-            "7335": 3.02733,
-            "7340": 3.06004,
-            "7345": 3.0091,
-            "7350": 3.06031,
-            "7355": 3.04495,
-            "7360": 3.03923,
-            "7365": 3.03845,
-            "7370": 3.03136,
-            "7375": 2.9999,
-            "7380": 3.06202,
-            "7385": 3.07693,
-            "7390": 3.06411,
-            "7395": 3.02221,
-            "7400": 3.07516,
-            "7405": 3.04382,
-            "7410": 3.06023,
-            "7415": 3.05228,
-            "7420": 3.03261,
-            "7425": 3.08586,
-            "7430": 3.0272,
-            "7435": 3.01757,
-            "7440": 3.0377,
-            "7445": 3.01394,
-            "7450": 2.99482,
-            "7455": 3.04735,
-            "7460": 3.04105,
-            "7465": 3.04977,
-            "7470": 3.05673,
-            "7475": 3.06741,
-            "7480": 3.02749,
-            "7485": 2.98653,
-            "7490": 2.98973,
-            "7495": 2.99863,
-            "7500": 3.02945,
-            "7505": 3.0059,
-            "7510": 2.97871,
-            "7515": 3.02404,
-            "7520": 3.01697,
-            "7525": 2.98295,
-            "7530": 3.02636,
-            "7535": 3.04423,
-            "7540": 3.02494,
-            "7545": 3.0588,
-            "7550": 3.06534,
-            "7555": 3.00732,
-            "7560": 3.01283,
-            "7565": 3.00874,
-            "7570": 3.03442,
-            "7575": 2.97962,
-            "7580": 3.03034,
-            "7585": 3.01793,
-            "7590": 3.01504,
-            "7595": 3.07403,
-            "7600": 3.03015,
-            "7605": 3.02144,
-            "7610": 3.00533,
-            "7615": 2.99602,
-            "7620": 2.99265,
-            "7625": 3.03762,
-            "7630": 3.02026,
-            "7635": 3.01854,
-            "7640": 3.01712,
-            "7645": 3.04845,
-            "7650": 3.04439,
-            "7655": 3.08975,
-            "7660": 2.96325,
-            "7665": 3.02969,
-            "7670": 3.01245,
-            "7675": 3.00305,
-            "7680": 2.9998,
-            "7685": 3.07016,
-            "7690": 3.01368,
-            "7695": 2.99671,
-            "7700": 3.05056,
-            "7705": 3.01282,
-            "7710": 3.05828,
-            "7715": 2.99725,
-            "7720": 3.08276,
-            "7725": 2.98411,
-            "7730": 2.99881,
-            "7735": 3.02714,
-            "7740": 3.00979,
-            "7745": 3.00319,
-            "7750": 3.01,
-            "7755": 3.01954,
-            "7760": 2.98571,
-            "7765": 3.00397,
-            "7770": 3.02732,
-            "7775": 2.98978,
-            "7780": 2.97862,
-            "7785": 3.01472,
-            "7790": 2.99842,
-            "7795": 3.02413,
-            "7800": 3.00827,
-            "7805": 3.01176,
-            "7810": 3.03082,
-            "7815": 3.00244,
-            "7820": 3.0019,
-            "7825": 3.03231,
-            "7830": 3.03143,
-            "7835": 2.96605,
-            "7840": 3.04336,
-            "7845": 2.97937,
-            "7850": 2.93977,
-            "7855": 2.98529,
-            "7860": 2.98344,
-            "7865": 3.02956,
-            "7870": 2.9691,
-            "7875": 2.98838,
-            "7880": 3.00349,
-            "7885": 2.9968,
-            "7890": 3.03811,
-            "7895": 3.02857,
-            "7900": 3.03097,
-            "7905": 2.99876,
-            "7910": 3.0088,
-            "7915": 3.02527,
-            "7920": 3.01259,
-            "7925": 2.99646,
-            "7930": 3.02866,
-            "7935": 2.98913,
-            "7940": 3.03573,
-            "7945": 3.0501,
-            "7950": 2.96381,
-            "7955": 2.98711,
-            "7960": 2.96943,
-            "7965": 2.94566,
-            "7970": 2.9655,
-            "7975": 2.99544,
-            "7980": 3.00887,
-            "7985": 2.97698,
-            "7990": 2.97506,
-            "7995": 2.96124,
-            "8000": 3.02098,
-            "8005": 2.9801,
-            "8010": 2.97649,
-            "8015": 2.96466,
-            "8020": 2.97779,
-            "8025": 2.95601,
-            "8030": 2.97562,
-            "8035": 2.97196,
-            "8040": 2.95703,
-            "8045": 3.01604,
-            "8050": 3.01297,
-            "8055": 2.97453,
-            "8060": 3.00494,
-            "8065": 2.98862,
-            "8070": 2.96753,
-            "8075": 2.97734,
-            "8080": 3.01019,
-            "8085": 2.96754,
-            "8090": 2.98003,
-            "8095": 3.00216,
-            "8100": 2.95105,
-            "8105": 2.99247,
-            "8110": 2.98157,
-            "8115": 2.95999,
-            "8120": 2.97249,
-            "8125": 2.99946,
-            "8130": 2.97003,
-            "8135": 2.98766,
-            "8140": 2.96736,
-            "8145": 2.95939,
-            "8150": 2.98009,
-            "8155": 2.95146,
-            "8160": 2.997,
-            "8165": 2.9913,
-            "8170": 2.95554,
-            "8175": 2.95554,
-            "8180": 3.01376,
-            "8185": 2.98624,
-            "8190": 3.02032,
-            "8195": 2.99613,
-            "8200": 2.96412,
-            "8205": 2.97566,
-            "8210": 2.9781,
-            "8215": 2.99017,
-            "8220": 2.971,
-            "8225": 2.96329,
-            "8230": 2.99505,
-            "8235": 3.00306,
-            "8240": 2.97419,
-            "8245": 2.9738,
-            "8250": 3.00958,
-            "8255": 2.96716,
-            "8260": 2.97331,
-            "8265": 2.95555,
-            "8270": 2.97514,
-            "8275": 2.96718,
-            "8280": 2.94092,
-            "8285": 2.97838,
-            "8290": 2.96734,
-            "8295": 2.95246,
-            "8300": 2.96504,
-            "8305": 2.97504,
-            "8310": 2.97996,
-            "8315": 2.95732,
-            "8320": 2.97776,
-            "8325": 2.929,
-            "8330": 2.89908,
-            "8335": 2.96646,
-            "8340": 2.99201,
-            "8345": 2.94463,
-            "8350": 2.95886,
-            "8355": 2.98631,
-            "8360": 2.96643,
-            "8365": 2.98326,
-            "8370": 2.99094,
-            "8375": 2.93854,
-            "8380": 2.94099,
-            "8385": 2.97126,
-            "8390": 2.9453,
-            "8395": 2.97523,
-            "8400": 2.95927,
-            "8405": 2.97418,
-            "8410": 3.03057,
-            "8415": 2.93533,
-            "8420": 2.91801,
-            "8425": 2.97564,
-            "8430": 2.97808,
-            "8435": 2.93124,
-            "8440": 3.01239,
-            "8445": 2.99121,
-            "8450": 2.96616,
-            "8455": 2.97106,
-            "8460": 2.97975,
-            "8465": 2.92562,
-            "8470": 2.94697,
-            "8475": 2.99054,
-            "8480": 2.93097,
-            "8485": 2.93977,
-            "8490": 2.948,
-            "8495": 2.93336,
-            "8500": 2.96904,
-            "8505": 2.92233,
-            "8510": 3.00332,
-            "8515": 2.94052,
-            "8520": 2.95755,
-            "8525": 2.88522,
-            "8530": 2.95834,
-            "8535": 2.97603,
-            "8540": 2.93194,
-            "8545": 2.95741,
-            "8550": 2.92307,
-            "8555": 2.98961,
-            "8560": 2.99424,
-            "8565": 2.9514,
-            "8570": 2.94707,
-            "8575": 2.93509,
-            "8580": 2.9669,
-            "8585": 2.976,
-            "8590": 2.97659,
-            "8595": 2.97731,
-            "8600": 2.94787,
-            "8605": 2.94545,
-            "8610": 2.95479,
-            "8615": 2.96032,
-            "8620": 2.92346,
-            "8625": 2.94581,
-            "8630": 2.95087,
-            "8635": 2.94522,
-            "8640": 2.92578,
-            "8645": 2.98133,
-            "8650": 2.92232,
-            "8655": 2.96592,
-            "8660": 2.97073,
-            "8665": 2.95471,
-            "8670": 2.96657,
-            "8675": 2.93996,
-            "8680": 2.93576,
-            "8685": 2.94815,
-            "8690": 2.96442,
-            "8695": 2.97067,
-            "8700": 2.94799,
-            "8705": 2.91745,
-            "8710": 2.96979,
-            "8715": 2.91522,
-            "8720": 2.97447,
-            "8725": 2.94876,
-            "8730": 2.94256,
-            "8735": 2.97158,
-            "8740": 2.92587,
-            "8745": 2.96492,
-            "8750": 2.96628,
-            "8755": 2.93098,
-            "8760": 2.94924,
-            "8765": 2.91354,
-            "8770": 2.96822,
-            "8775": 2.94219,
-            "8780": 2.92859,
-            "8785": 2.94726,
-            "8790": 2.92803,
-            "8795": 2.96489,
-            "8800": 2.92662,
-            "8805": 2.90115,
-            "8810": 2.93145,
-            "8815": 2.93283,
-            "8820": 2.90387,
-            "8825": 2.92443,
-            "8830": 2.91245,
-            "8835": 2.89847,
-            "8840": 2.91518,
-            "8845": 2.92785,
-            "8850": 2.95695,
-            "8855": 2.92839,
-            "8860": 2.98878,
-            "8865": 2.93356,
-            "8870": 2.90865,
-            "8875": 2.92162,
-            "8880": 2.9295,
-            "8885": 2.9207,
-            "8890": 2.9404,
-            "8895": 2.92179,
-            "8900": 2.94464,
-            "8905": 2.93594,
-            "8910": 2.91993,
-            "8915": 2.90336,
-            "8920": 2.91127,
-            "8925": 2.97428,
-            "8930": 2.96209,
-            "8935": 2.97189,
-            "8940": 2.94882,
-            "8945": 2.94789,
-            "8950": 2.9328,
-            "8955": 2.91679,
-            "8960": 2.89858,
-            "8965": 2.92721,
-            "8970": 2.94082,
-            "8975": 2.90449,
-            "8980": 2.89797,
-            "8985": 2.92102,
-            "8990": 2.9662,
-            "8995": 2.9373,
-            "9000": 2.89467,
-            "9005": 2.9399,
-            "9010": 2.97901,
-            "9015": 2.90311,
-            "9020": 2.90423,
-            "9025": 2.92238,
-            "9030": 2.94518,
-            "9035": 2.85736,
-            "9040": 2.93491,
-            "9045": 2.92378,
-            "9050": 2.96087,
-            "9055": 2.88884,
-            "9060": 2.95609,
-            "9065": 2.98682,
-            "9070": 2.92665,
-            "9075": 2.94254,
-            "9080": 2.93301,
-            "9085": 2.9439,
-            "9090": 2.93648,
-            "9095": 2.89849,
-            "9100": 2.90017,
-            "9105": 2.89,
-            "9110": 2.93211,
-            "9115": 2.93981,
-            "9120": 2.97397,
-            "9125": 2.91648,
-            "9130": 2.92277,
-            "9135": 2.94086,
-            "9140": 2.94695,
-            "9145": 2.89447,
-            "9150": 2.92217,
-            "9155": 2.93169,
-            "9160": 2.93686,
-            "9165": 2.92557,
-            "9170": 2.9498,
-            "9175": 2.88716,
-            "9180": 2.93307,
-            "9185": 2.8947,
-            "9190": 2.94894,
-            "9195": 2.91222,
-            "9200": 2.93251,
-            "9205": 2.88702,
-            "9210": 2.93304,
-            "9215": 2.87965,
-            "9220": 2.90288,
-            "9225": 2.93315,
-            "9230": 2.86569,
-            "9235": 2.87842,
-            "9240": 2.89576,
-            "9245": 2.88279,
-            "9250": 2.88136,
-            "9255": 2.91192,
-            "9260": 2.87817,
-            "9265": 2.92175,
-            "9270": 2.89613,
-            "9275": 2.91313,
-            "9280": 2.91939,
-            "9285": 2.91903,
-            "9290": 2.93047,
-            "9295": 2.92844,
-            "9300": 2.87877,
-            "9305": 2.90909,
-            "9310": 2.89871,
-            "9315": 2.86609,
-            "9320": 2.86065,
-            "9325": 2.90436,
-            "9330": 2.95511,
-            "9335": 2.87572,
-            "9340": 2.93845,
-            "9345": 2.94693,
-            "9350": 2.9134,
-            "9355": 2.87737,
-            "9360": 2.89674,
-            "9365": 2.8823,
-            "9370": 2.93386,
-            "9375": 2.91236,
-            "9380": 2.86428,
-            "9385": 2.91358,
-            "9390": 2.92324,
-            "9395": 2.92024,
-            "9400": 2.89599,
-            "9405": 2.89197,
-            "9410": 2.9185,
-            "9415": 2.91775,
-            "9420": 2.89381,
-            "9425": 2.89983,
-            "9430": 2.87833,
-            "9435": 2.90417,
-            "9440": 2.89629,
-            "9445": 2.88366,
-            "9450": 2.89069,
-            "9455": 2.88969,
-            "9460": 2.94442,
-            "9465": 2.94721,
-            "9470": 2.88553,
-            "9475": 2.94033,
-            "9480": 2.88982,
-            "9485": 2.87815,
-            "9490": 2.89723,
-            "9495": 2.9225,
-            "9500": 2.89514,
-            "9505": 2.86794,
-            "9510": 2.894,
-            "9515": 2.90369,
-            "9520": 2.91102,
-            "9525": 2.89095,
-            "9530": 2.88696,
-            "9535": 2.91216
-        }
-    },
-    "num-zeros": {
-        "start_step": 1,
-        "end_step": 9535,
-        "step_interval": 5,
-        "values": {
-            "1": 1021640256.0,
-            "5": 1024063424.0,
-            "10": 1014250560.0,
-            "15": 1024077504.0,
-            "20": 1022486144.0,
-            "25": 1041373312.0,
-            "30": 1028112896.0,
-            "35": 1035625088.0,
-            "40": 1026328384.0,
-            "45": 1022350080.0,
-            "50": 1030098560.0,
-            "55": 1028966144.0,
-            "60": 1036320640.0,
-            "65": 1034679168.0,
-            "70": 1029374848.0,
-            "75": 1028745088.0,
-            "80": 1047575040.0,
-            "85": 1029448064.0,
-            "90": 1020467392.0,
-            "95": 1028310016.0,
-            "100": 1040961344.0,
-            "105": 1039436544.0,
-            "110": 1026879104.0,
-            "115": 1052312832.0,
-            "120": 1018863104.0,
-            "125": 1045372160.0,
-            "130": 1034330368.0,
-            "135": 1016615680.0,
-            "140": 1038582272.0,
-            "145": 1020688640.0,
-            "150": 1039788096.0,
-            "155": 1032796928.0,
-            "160": 1020952640.0,
-            "165": 1032424512.0,
-            "170": 1017396096.0,
-            "175": 1033427072.0,
-            "180": 1036119424.0,
-            "185": 1030573760.0,
-            "190": 1035673984.0,
-            "195": 1034555520.0,
-            "200": 1040973824.0,
-            "205": 1048500352.0,
-            "210": 1054481024.0,
-            "215": 1025159552.0,
-            "220": 1044962496.0,
-            "225": 1038076416.0,
-            "230": 1026222720.0,
-            "235": 1051134976.0,
-            "240": 1029276416.0,
-            "245": 1031397824.0,
-            "250": 1027879616.0,
-            "255": 1016929792.0,
-            "260": 1045008896.0,
-            "265": 1021330688.0,
-            "270": 1030964864.0,
-            "275": 1036911744.0,
-            "280": 1031743488.0,
-            "285": 1015014016.0,
-            "290": 1018756352.0,
-            "295": 1017237504.0,
-            "300": 1034761152.0,
-            "305": 1032166144.0,
-            "310": 1035583104.0,
-            "315": 1012734272.0,
-            "320": 1008275072.0,
-            "325": 1042741760.0,
-            "330": 1042870656.0,
-            "335": 1033508480.0,
-            "340": 1014464512.0,
-            "345": 1042618880.0,
-            "350": 1031852736.0,
-            "355": 1050844800.0,
-            "360": 1030258432.0,
-            "365": 1034595648.0,
-            "370": 1019436032.0,
-            "375": 1022144832.0,
-            "380": 1021326592.0,
-            "385": 1025589504.0,
-            "390": 1023195072.0,
-            "395": 1019653952.0,
-            "400": 1033520512.0,
-            "405": 1023880192.0,
-            "410": 1017910016.0,
-            "415": 1024288000.0,
-            "420": 1020624256.0,
-            "425": 1025854848.0,
-            "430": 1033854336.0,
-            "435": 1028182400.0,
-            "440": 1022090752.0,
-            "445": 1036768256.0,
-            "450": 1024997376.0,
-            "455": 1013852096.0,
-            "460": 1022093824.0,
-            "465": 1041431552.0,
-            "470": 1029038016.0,
-            "475": 1010065792.0,
-            "480": 1047607616.0,
-            "485": 1029724928.0,
-            "490": 1044668160.0,
-            "495": 1025229952.0,
-            "500": 1037464960.0,
-            "505": 1032181376.0,
-            "510": 1042853056.0,
-            "515": 1026159744.0,
-            "520": 1013409792.0,
-            "525": 1035147520.0,
-            "530": 1016375552.0,
-            "535": 1040113024.0,
-            "540": 1035052352.0,
-            "545": 1032113664.0,
-            "550": 1018673408.0,
-            "555": 1008638656.0,
-            "560": 1011927680.0,
-            "565": 1041824320.0,
-            "570": 1034942208.0,
-            "575": 1010199040.0,
-            "580": 1032210496.0,
-            "585": 1041262144.0,
-            "590": 1038867968.0,
-            "595": 1035743104.0,
-            "600": 1023772736.0,
-            "605": 1032294272.0,
-            "610": 1037748672.0,
-            "615": 1005974784.0,
-            "620": 1040407424.0,
-            "625": 1045209216.0,
-            "630": 1034414464.0,
-            "635": 1028523008.0,
-            "640": 1022644928.0,
-            "645": 1035876032.0,
-            "650": 1009255680.0,
-            "655": 997757696.0,
-            "660": 1029710464.0,
-            "665": 1025532608.0,
-            "670": 1048812288.0,
-            "675": 1025202688.0,
-            "680": 1019340032.0,
-            "685": 1027832512.0,
-            "690": 1029230080.0,
-            "695": 1040024576.0,
-            "700": 1042031680.0,
-            "705": 1034382976.0,
-            "710": 1020441792.0,
-            "715": 1031472128.0,
-            "720": 1040274560.0,
-            "725": 1023279936.0,
-            "730": 1022792704.0,
-            "735": 1025085696.0,
-            "740": 1038382656.0,
-            "745": 1045205504.0,
-            "750": 1013180928.0,
-            "755": 1031644032.0,
-            "760": 1032783552.0,
-            "765": 1027135936.0,
-            "770": 1023967232.0,
-            "775": 1025895168.0,
-            "780": 1038166464.0,
-            "785": 1025486400.0,
-            "790": 1040810624.0,
-            "795": 1032531200.0,
-            "800": 1039592768.0,
-            "805": 1024318016.0,
-            "810": 1034725632.0,
-            "815": 1036000448.0,
-            "820": 1035671552.0,
-            "825": 1051375360.0,
-            "830": 1035406784.0,
-            "835": 1022547776.0,
-            "840": 1036875648.0,
-            "845": 1025700352.0,
-            "850": 1048529920.0,
-            "855": 1014986432.0,
-            "860": 1033098624.0,
-            "865": 1031543040.0,
-            "870": 1040902912.0,
-            "875": 1023938304.0,
-            "880": 1028395904.0,
-            "885": 1054406656.0,
-            "890": 1019537152.0,
-            "895": 1045189824.0,
-            "900": 1031772928.0,
-            "905": 1020970688.0,
-            "910": 1031386112.0,
-            "915": 1032926912.0,
-            "920": 1038459392.0,
-            "925": 1026754560.0,
-            "930": 1025378752.0,
-            "935": 1031126464.0,
-            "940": 1057933568.0,
-            "945": 1029823104.0,
-            "950": 1014412480.0,
-            "955": 1032173696.0,
-            "960": 1026152064.0,
-            "965": 1062678976.0,
-            "970": 1030096128.0,
-            "975": 1036903680.0,
-            "980": 1027049216.0,
-            "985": 1030676736.0,
-            "990": 1020676864.0,
-            "995": 1042301760.0,
-            "1000": 1036831616.0,
-            "1005": 1050206080.0,
-            "1010": 1023801984.0,
-            "1015": 1020539008.0,
-            "1020": 1042587392.0,
-            "1025": 1037943808.0,
-            "1030": 1049210048.0,
-            "1035": 1012483456.0,
-            "1040": 1023092032.0,
-            "1045": 1039520768.0,
-            "1050": 1026825728.0,
-            "1055": 1034861184.0,
-            "1060": 1046128704.0,
-            "1065": 1036804096.0,
-            "1070": 1019994880.0,
-            "1075": 1025341696.0,
-            "1080": 1014979200.0,
-            "1085": 1030007744.0,
-            "1090": 1029062016.0,
-            "1095": 1020309888.0,
-            "1100": 1039835008.0,
-            "1105": 1048600064.0,
-            "1110": 1020704448.0,
-            "1115": 1024782720.0,
-            "1120": 1061896576.0,
-            "1125": 1043311616.0,
-            "1130": 1031219456.0,
-            "1135": 1041360512.0,
-            "1140": 1021486272.0,
-            "1145": 1051696128.0,
-            "1150": 1035590400.0,
-            "1155": 1029590528.0,
-            "1160": 1042564800.0,
-            "1165": 1026810496.0,
-            "1170": 1018001408.0,
-            "1175": 1033684032.0,
-            "1180": 1035633536.0,
-            "1185": 1023928960.0,
-            "1190": 1033160320.0,
-            "1195": 1024228608.0,
-            "1200": 1039116544.0,
-            "1205": 1031740800.0,
-            "1210": 1053250560.0,
-            "1215": 1024617600.0,
-            "1220": 1009041280.0,
-            "1225": 1036679680.0,
-            "1230": 1041257984.0,
-            "1235": 1053974912.0,
-            "1240": 1030356224.0,
-            "1245": 1017684864.0,
-            "1250": 1022772992.0,
-            "1255": 1033439104.0,
-            "1260": 1034284736.0,
-            "1265": 1034003840.0,
-            "1270": 1037323264.0,
-            "1275": 1029345792.0,
-            "1280": 1046489856.0,
-            "1285": 1028285120.0,
-            "1290": 1036578176.0,
-            "1295": 1032421696.0,
-            "1300": 1033065728.0,
-            "1305": 1030027008.0,
-            "1310": 1051262976.0,
-            "1315": 1035373184.0,
-            "1320": 1028263936.0,
-            "1325": 1049972736.0,
-            "1330": 1030133376.0,
-            "1335": 1031164800.0,
-            "1340": 1012758912.0,
-            "1345": 1044639232.0,
-            "1350": 1034957312.0,
-            "1355": 1033623744.0,
-            "1360": 1036683392.0,
-            "1365": 1038588672.0,
-            "1370": 1039851904.0,
-            "1375": 1034117632.0,
-            "1380": 1022886656.0,
-            "1385": 1018084096.0,
-            "1390": 1049054400.0,
-            "1395": 1034868352.0,
-            "1400": 1034998144.0,
-            "1405": 1034131456.0,
-            "1410": 1036368256.0,
-            "1415": 1043577600.0,
-            "1420": 1026111104.0,
-            "1425": 1033320320.0,
-            "1430": 1012808128.0,
-            "1435": 1038394880.0,
-            "1440": 1020971904.0,
-            "1445": 1032459904.0,
-            "1450": 1014039296.0,
-            "1455": 1011673984.0,
-            "1460": 1043275904.0,
-            "1465": 1014361600.0,
-            "1470": 1020655360.0,
-            "1475": 1030231296.0,
-            "1480": 1029370496.0,
-            "1485": 1022997696.0,
-            "1490": 1026783360.0,
-            "1495": 1021815744.0,
-            "1500": 1027177088.0,
-            "1505": 1034882880.0,
-            "1510": 1014397120.0,
-            "1515": 1042136832.0,
-            "1520": 1025792640.0,
-            "1525": 1036335872.0,
-            "1530": 1039948992.0,
-            "1535": 1047640192.0,
-            "1540": 1043539840.0,
-            "1545": 1034043520.0,
-            "1550": 1016108736.0,
-            "1555": 1015573504.0,
-            "1560": 1055021824.0,
-            "1565": 1015593728.0,
-            "1570": 1018243840.0,
-            "1575": 1032515456.0,
-            "1580": 1012984768.0,
-            "1585": 1025327680.0,
-            "1590": 1034127360.0,
-            "1595": 1057393664.0,
-            "1600": 1026867584.0,
-            "1605": 1019994624.0,
-            "1610": 1031268736.0,
-            "1615": 1035274880.0,
-            "1620": 1018016000.0,
-            "1625": 1028272512.0,
-            "1630": 1027205376.0,
-            "1635": 1023799040.0,
-            "1640": 1034120832.0,
-            "1645": 1021814528.0,
-            "1650": 1015262080.0,
-            "1655": 1018280064.0,
-            "1660": 1047982976.0,
-            "1665": 1027060352.0,
-            "1670": 1048219904.0,
-            "1675": 1021102912.0,
-            "1680": 1043288320.0,
-            "1685": 1052719360.0,
-            "1690": 1026724032.0,
-            "1695": 1040385280.0,
-            "1700": 1018036352.0,
-            "1705": 1020480640.0,
-            "1710": 1021024448.0,
-            "1715": 1026932992.0,
-            "1720": 1028350208.0,
-            "1725": 1034363136.0,
-            "1730": 1013692352.0,
-            "1735": 1018429696.0,
-            "1740": 1057257024.0,
-            "1745": 1029261952.0,
-            "1750": 1024357888.0,
-            "1755": 1029970112.0,
-            "1760": 1022192512.0,
-            "1765": 1040477056.0,
-            "1770": 1029669760.0,
-            "1775": 1046196864.0,
-            "1780": 1021955712.0,
-            "1785": 1035109376.0,
-            "1790": 1028263808.0,
-            "1795": 1031023616.0,
-            "1800": 1028300480.0,
-            "1805": 1025669248.0,
-            "1810": 1021556096.0,
-            "1815": 1033440256.0,
-            "1820": 1034885888.0,
-            "1825": 1020208448.0,
-            "1830": 1013885632.0,
-            "1835": 1031382272.0,
-            "1840": 1040391040.0,
-            "1845": 1034828800.0,
-            "1850": 1014480064.0,
-            "1855": 1019418816.0,
-            "1860": 1019569536.0,
-            "1865": 1035942400.0,
-            "1870": 1026242368.0,
-            "1875": 1031525248.0,
-            "1880": 1011590784.0,
-            "1885": 1041065536.0,
-            "1890": 1035000704.0,
-            "1895": 1028959488.0,
-            "1900": 1033997568.0,
-            "1905": 1027123776.0,
-            "1910": 1029217792.0,
-            "1915": 1030492864.0,
-            "1920": 1042920384.0,
-            "1925": 1038419392.0,
-            "1930": 1019304512.0,
-            "1935": 1032535936.0,
-            "1940": 1027806336.0,
-            "1945": 1034205056.0,
-            "1950": 1006036224.0,
-            "1955": 1032577600.0,
-            "1960": 1015720256.0,
-            "1965": 1029088512.0,
-            "1970": 1021554176.0,
-            "1975": 1034048000.0,
-            "1980": 1029366912.0,
-            "1985": 1027784960.0,
-            "1990": 1020947840.0,
-            "1995": 1010422912.0,
-            "2000": 1039617152.0,
-            "2005": 1001486208.0,
-            "2010": 1020422912.0,
-            "2015": 1032034048.0,
-            "2020": 1036298624.0,
-            "2025": 1037172352.0,
-            "2030": 1029770752.0,
-            "2035": 1040333312.0,
-            "2040": 1030112768.0,
-            "2045": 1032700800.0,
-            "2050": 1008016064.0,
-            "2055": 1045723840.0,
-            "2060": 1028142400.0,
-            "2065": 1038799488.0,
-            "2070": 1045645184.0,
-            "2075": 1035237952.0,
-            "2080": 1022882304.0,
-            "2085": 1024815424.0,
-            "2090": 1034363392.0,
-            "2095": 1005220672.0,
-            "2100": 1034644096.0,
-            "2105": 1035581312.0,
-            "2110": 1030685952.0,
-            "2115": 1029798528.0,
-            "2120": 1018846080.0,
-            "2125": 1021863168.0,
-            "2130": 1026638080.0,
-            "2135": 1053279488.0,
-            "2140": 1017060608.0,
-            "2145": 1019635072.0,
-            "2150": 1037130752.0,
-            "2155": 1033302784.0,
-            "2160": 1049035776.0,
-            "2165": 1039682816.0,
-            "2170": 1020308096.0,
-            "2175": 1027338752.0,
-            "2180": 1041703168.0,
-            "2185": 1028895360.0,
-            "2190": 1029309888.0,
-            "2195": 1028944768.0,
-            "2200": 1039639680.0,
-            "2205": 1036972288.0,
-            "2210": 1031740544.0,
-            "2215": 1021404480.0,
-            "2220": 1020910848.0,
-            "2225": 1033403072.0,
-            "2230": 1014201856.0,
-            "2235": 1029395968.0,
-            "2240": 1029885184.0,
-            "2245": 1026005824.0,
-            "2250": 1046268800.0,
-            "2255": 1032951936.0,
-            "2260": 1047494592.0,
-            "2265": 1023721088.0,
-            "2270": 1022566144.0,
-            "2275": 1028537600.0,
-            "2280": 1034973568.0,
-            "2285": 1031819968.0,
-            "2290": 1038650048.0,
-            "2295": 1028816000.0,
-            "2300": 1034450496.0,
-            "2305": 1032314496.0,
-            "2310": 1013586496.0,
-            "2315": 1048182656.0,
-            "2320": 1035210368.0,
-            "2325": 1046966016.0,
-            "2330": 1014696192.0,
-            "2335": 1027382272.0,
-            "2340": 1036736512.0,
-            "2345": 1020186944.0,
-            "2350": 1031017728.0,
-            "2355": 1037474240.0,
-            "2360": 1032608128.0,
-            "2365": 1028041856.0,
-            "2370": 1021004224.0,
-            "2375": 1022912000.0,
-            "2380": 1048556224.0,
-            "2385": 1044140736.0,
-            "2390": 1021986816.0,
-            "2395": 1020595584.0,
-            "2400": 1026930816.0,
-            "2405": 1038387200.0,
-            "2410": 1045395200.0,
-            "2415": 1048454656.0,
-            "2420": 1032227712.0,
-            "2425": 1029562176.0,
-            "2430": 1030386176.0,
-            "2435": 1029217856.0,
-            "2440": 1029168000.0,
-            "2445": 1033132160.0,
-            "2450": 1038557824.0,
-            "2455": 1034721536.0,
-            "2460": 1039984192.0,
-            "2465": 1032500992.0,
-            "2470": 1024143872.0,
-            "2475": 1016539520.0,
-            "2480": 1023613248.0,
-            "2485": 1021030592.0,
-            "2490": 1035920448.0,
-            "2495": 1032967360.0,
-            "2500": 1028107008.0,
-            "2505": 1015385600.0,
-            "2510": 1030967104.0,
-            "2515": 1025700096.0,
-            "2520": 1033326208.0,
-            "2525": 1029692800.0,
-            "2530": 1023986560.0,
-            "2535": 1071069696.0,
-            "2540": 1024537984.0,
-            "2545": 1033798784.0,
-            "2550": 1029448064.0,
-            "2555": 1029183488.0,
-            "2560": 1018115072.0,
-            "2565": 1031598528.0,
-            "2570": 1022847232.0,
-            "2575": 1026503104.0,
-            "2580": 1038622592.0,
-            "2585": 1025899456.0,
-            "2590": 1026100800.0,
-            "2595": 1046623104.0,
-            "2600": 1031103360.0,
-            "2605": 1001910656.0,
-            "2610": 1028423360.0,
-            "2615": 1025564544.0,
-            "2620": 1038651392.0,
-            "2625": 1026996352.0,
-            "2630": 1036831424.0,
-            "2635": 1021198400.0,
-            "2640": 1021865856.0,
-            "2645": 1039153408.0,
-            "2650": 1025943488.0,
-            "2655": 1013255808.0,
-            "2660": 1032645248.0,
-            "2665": 1035218048.0,
-            "2670": 1036437632.0,
-            "2675": 1039296064.0,
-            "2680": 1041661696.0,
-            "2685": 1034565504.0,
-            "2690": 1058871168.0,
-            "2695": 1019879552.0,
-            "2700": 1062626816.0,
-            "2705": 1035376320.0,
-            "2710": 1019542400.0,
-            "2715": 1031885824.0,
-            "2720": 1016403200.0,
-            "2725": 1040594688.0,
-            "2730": 1019586688.0,
-            "2735": 1030889856.0,
-            "2740": 1029290752.0,
-            "2745": 1040687744.0,
-            "2750": 1023880448.0,
-            "2755": 1011865664.0,
-            "2760": 1027684864.0,
-            "2765": 1030882240.0,
-            "2770": 1033119872.0,
-            "2775": 1026332352.0,
-            "2780": 1033684224.0,
-            "2785": 1024589888.0,
-            "2790": 1033734272.0,
-            "2795": 1045949184.0,
-            "2800": 1040286016.0,
-            "2805": 1019944192.0,
-            "2810": 1031449600.0,
-            "2815": 1030932736.0,
-            "2820": 1037855616.0,
-            "2825": 1041684096.0,
-            "2830": 1030459904.0,
-            "2835": 1013508352.0,
-            "2840": 1031449600.0,
-            "2845": 1030129920.0,
-            "2850": 1026617600.0,
-            "2855": 1024705280.0,
-            "2860": 1031700096.0,
-            "2865": 1027428800.0,
-            "2870": 1026690048.0,
-            "2875": 1012777024.0,
-            "2880": 1038301568.0,
-            "2885": 1017901184.0,
-            "2890": 1044200064.0,
-            "2895": 1036459136.0,
-            "2900": 1030652928.0,
-            "2905": 1035957376.0,
-            "2910": 1038718272.0,
-            "2915": 1039385408.0,
-            "2920": 1034781248.0,
-            "2925": 1043267840.0,
-            "2930": 1038229696.0,
-            "2935": 1021222144.0,
-            "2940": 1042307456.0,
-            "2945": 1045232384.0,
-            "2950": 1047525952.0,
-            "2955": 1034172928.0,
-            "2960": 1020891904.0,
-            "2965": 1027307840.0,
-            "2970": 1038796288.0,
-            "2975": 1034007296.0,
-            "2980": 1049590400.0,
-            "2985": 1034846016.0,
-            "2990": 1026008576.0,
-            "2995": 1034919296.0,
-            "3000": 1039017856.0,
-            "3005": 1038158848.0,
-            "3010": 1010907712.0,
-            "3015": 1044976064.0,
-            "3020": 1034050688.0,
-            "3025": 1037763840.0,
-            "3030": 1027722816.0,
-            "3035": 1041821056.0,
-            "3040": 1035311872.0,
-            "3045": 1027255296.0,
-            "3050": 1029708032.0,
-            "3055": 1028029568.0,
-            "3060": 1049976960.0,
-            "3065": 1024067200.0,
-            "3070": 1011545728.0,
-            "3075": 1042846272.0,
-            "3080": 1036094912.0,
-            "3085": 1030387456.0,
-            "3090": 1035262976.0,
-            "3095": 1013803008.0,
-            "3100": 1030144896.0,
-            "3105": 1017609088.0,
-            "3110": 1033370816.0,
-            "3115": 1023737728.0,
-            "3120": 1024877504.0,
-            "3125": 1046537216.0,
-            "3130": 1024676160.0,
-            "3135": 1025722496.0,
-            "3140": 1043778176.0,
-            "3145": 1044372672.0,
-            "3150": 1016483328.0,
-            "3155": 1042487936.0,
-            "3160": 1026834688.0,
-            "3165": 1031199360.0,
-            "3170": 1024332800.0,
-            "3175": 1024368640.0,
-            "3180": 1018204288.0,
-            "3185": 1034352512.0,
-            "3190": 1019221888.0,
-            "3195": 1028425408.0,
-            "3200": 1036080640.0,
-            "3205": 1016076160.0,
-            "3210": 1034109312.0,
-            "3215": 1031349312.0,
-            "3220": 1040833664.0,
-            "3225": 1022835008.0,
-            "3230": 1033255744.0,
-            "3235": 1019975488.0,
-            "3240": 1038131840.0,
-            "3245": 1031643136.0,
-            "3250": 1022390656.0,
-            "3255": 1032876672.0,
-            "3260": 1037751616.0,
-            "3265": 1021622656.0,
-            "3270": 1031242880.0,
-            "3275": 1038461184.0,
-            "3280": 1023236992.0,
-            "3285": 1031615424.0,
-            "3290": 1045247616.0,
-            "3295": 1043177536.0,
-            "3300": 1035084224.0,
-            "3305": 1042662400.0,
-            "3310": 1058092096.0,
-            "3315": 1024282880.0,
-            "3320": 1046015296.0,
-            "3325": 1023179008.0,
-            "3330": 1048037248.0,
-            "3335": 1036690560.0,
-            "3340": 1042123392.0,
-            "3345": 1030897920.0,
-            "3350": 1020621696.0,
-            "3355": 1025960576.0,
-            "3360": 1030305344.0,
-            "3365": 1031171520.0,
-            "3370": 1036454144.0,
-            "3375": 1023472384.0,
-            "3380": 1032383744.0,
-            "3385": 1038081536.0,
-            "3390": 1052811072.0,
-            "3395": 1012090496.0,
-            "3400": 1019209600.0,
-            "3405": 1021780224.0,
-            "3410": 1028433728.0,
-            "3415": 1058222400.0,
-            "3420": 1033492480.0,
-            "3425": 1029580352.0,
-            "3430": 1021150976.0,
-            "3435": 1034991872.0,
-            "3440": 1017961600.0,
-            "3445": 1025537280.0,
-            "3450": 1032254336.0,
-            "3455": 1036261312.0,
-            "3460": 1052071808.0,
-            "3465": 1027114240.0,
-            "3470": 1043729536.0,
-            "3475": 1033265792.0,
-            "3480": 1026619776.0,
-            "3485": 1029215232.0,
-            "3490": 1041041408.0,
-            "3495": 1019252224.0,
-            "3500": 1032059904.0,
-            "3505": 1025753728.0,
-            "3510": 1044367616.0,
-            "3515": 1013817280.0,
-            "3520": 1021846400.0,
-            "3525": 1032175552.0,
-            "3530": 1029789056.0,
-            "3535": 1034568704.0,
-            "3540": 1017731456.0,
-            "3545": 1035658880.0,
-            "3550": 1024535296.0,
-            "3555": 1035866112.0,
-            "3560": 1029737600.0,
-            "3565": 1028900160.0,
-            "3570": 1046029888.0,
-            "3575": 1039186304.0,
-            "3580": 1010838336.0,
-            "3585": 1031737728.0,
-            "3590": 1041450688.0,
-            "3595": 1037636800.0,
-            "3600": 1032763584.0,
-            "3605": 1045822272.0,
-            "3610": 1039235200.0,
-            "3615": 1036870144.0,
-            "3620": 1026929664.0,
-            "3625": 1033931136.0,
-            "3630": 1017582464.0,
-            "3635": 1026629056.0,
-            "3640": 1039529088.0,
-            "3645": 1022655872.0,
-            "3650": 1036842624.0,
-            "3655": 1023990144.0,
-            "3660": 1014987456.0,
-            "3665": 1026118784.0,
-            "3670": 1041672448.0,
-            "3675": 1033250304.0,
-            "3680": 1015353984.0,
-            "3685": 1029122304.0,
-            "3690": 1026204416.0,
-            "3695": 1043800832.0,
-            "3700": 1028613504.0,
-            "3705": 1049485312.0,
-            "3710": 1027180672.0,
-            "3715": 1016134912.0,
-            "3720": 1040818560.0,
-            "3725": 1032763776.0,
-            "3730": 1030920960.0,
-            "3735": 1019008640.0,
-            "3740": 1023825600.0,
-            "3745": 1046289152.0,
-            "3750": 1034462336.0,
-            "3755": 1032090048.0,
-            "3760": 1019366912.0,
-            "3765": 1031916736.0,
-            "3770": 1026677120.0,
-            "3775": 1035708288.0,
-            "3780": 1030671104.0,
-            "3785": 1027208128.0,
-            "3790": 1019584064.0,
-            "3795": 1030306048.0,
-            "3800": 1035614976.0,
-            "3805": 1035423360.0,
-            "3810": 1033294144.0,
-            "3815": 1033988608.0,
-            "3820": 1041105792.0,
-            "3825": 1024534976.0,
-            "3830": 1037630528.0,
-            "3835": 1040347968.0,
-            "3840": 1023445888.0,
-            "3845": 1048466688.0,
-            "3850": 1052489280.0,
-            "3855": 1028907264.0,
-            "3860": 1019532672.0,
-            "3865": 1035487744.0,
-            "3870": 1028491712.0,
-            "3875": 1041164800.0,
-            "3880": 1048854912.0,
-            "3885": 1027725248.0,
-            "3890": 1027487616.0,
-            "3895": 1034190592.0,
-            "3900": 1027645312.0,
-            "3905": 1027976128.0,
-            "3910": 1041572480.0,
-            "3915": 1043995392.0,
-            "3920": 1041063424.0,
-            "3925": 1030836160.0,
-            "3930": 1027072896.0,
-            "3935": 1033782016.0,
-            "3940": 1042275712.0,
-            "3945": 1036248064.0,
-            "3950": 1021430976.0,
-            "3955": 1036304128.0,
-            "3960": 1024184192.0,
-            "3965": 1027065856.0,
-            "3970": 1015984640.0,
-            "3975": 1041421632.0,
-            "3980": 1032455488.0,
-            "3985": 1037680640.0,
-            "3990": 1038684992.0,
-            "3995": 1023654528.0,
-            "4000": 1054410240.0,
-            "4005": 1029983424.0,
-            "4010": 1025138112.0,
-            "4015": 1030978560.0,
-            "4020": 1018472448.0,
-            "4025": 1027124352.0,
-            "4030": 1010306816.0,
-            "4035": 1038641088.0,
-            "4040": 1022256640.0,
-            "4045": 1025038208.0,
-            "4050": 1032348800.0,
-            "4055": 1022420864.0,
-            "4060": 1024520768.0,
-            "4065": 1032871168.0,
-            "4070": 1027791232.0,
-            "4075": 1025596928.0,
-            "4080": 1029366656.0,
-            "4085": 1020823552.0,
-            "4090": 1033322496.0,
-            "4095": 1024142656.0,
-            "4100": 1040948864.0,
-            "4105": 1027266496.0,
-            "4110": 1038791424.0,
-            "4115": 1023497088.0,
-            "4120": 1038943168.0,
-            "4125": 1048274176.0,
-            "4130": 1021490752.0,
-            "4135": 1034570880.0,
-            "4140": 1034613824.0,
-            "4145": 1044447232.0,
-            "4150": 1000353664.0,
-            "4155": 1028363392.0,
-            "4160": 1024242624.0,
-            "4165": 1033688704.0,
-            "4170": 1018888000.0,
-            "4175": 1026492608.0,
-            "4180": 1045409024.0,
-            "4185": 1033631616.0,
-            "4190": 1029574592.0,
-            "4195": 1038777984.0,
-            "4200": 1025102336.0,
-            "4205": 1019074816.0,
-            "4210": 1029560704.0,
-            "4215": 1032269184.0,
-            "4220": 1026242048.0,
-            "4225": 1031925888.0,
-            "4230": 1030269824.0,
-            "4235": 1027603328.0,
-            "4240": 1031480832.0,
-            "4245": 1028765056.0,
-            "4250": 1026987008.0,
-            "4255": 1021240064.0,
-            "4260": 1042082432.0,
-            "4265": 1025411200.0,
-            "4270": 1030169984.0,
-            "4275": 1012472448.0,
-            "4280": 1044505600.0,
-            "4285": 1019898304.0,
-            "4290": 1033058560.0,
-            "4295": 1033596032.0,
-            "4300": 1031638912.0,
-            "4305": 1023847936.0,
-            "4310": 1021568512.0,
-            "4315": 1047221504.0,
-            "4320": 1026520576.0,
-            "4325": 1005865600.0,
-            "4330": 1037666688.0,
-            "4335": 1022006464.0,
-            "4340": 1029009920.0,
-            "4345": 1033474496.0,
-            "4350": 1036886144.0,
-            "4355": 1026808832.0,
-            "4360": 1022938240.0,
-            "4365": 1028779648.0,
-            "4370": 1029624704.0,
-            "4375": 1042196864.0,
-            "4380": 1016100096.0,
-            "4385": 1045551296.0,
-            "4390": 1026270848.0,
-            "4395": 1029796416.0,
-            "4400": 1047365760.0,
-            "4405": 1029297344.0,
-            "4410": 1033424256.0,
-            "4415": 1028298304.0,
-            "4420": 1028148928.0,
-            "4425": 1033575552.0,
-            "4430": 1031374592.0,
-            "4435": 1028571136.0,
-            "4440": 1033123328.0,
-            "4445": 1028293504.0,
-            "4450": 1052210944.0,
-            "4455": 1026286080.0,
-            "4460": 1034885888.0,
-            "4465": 1031725696.0,
-            "4470": 1035446528.0,
-            "4475": 1036971712.0,
-            "4480": 1025117824.0,
-            "4485": 1034104960.0,
-            "4490": 1024630912.0,
-            "4495": 1047974912.0,
-            "4500": 1024707840.0,
-            "4505": 1038850048.0,
-            "4510": 1043723776.0,
-            "4515": 1044276736.0,
-            "4520": 1036872320.0,
-            "4525": 1058073536.0,
-            "4530": 1030973568.0,
-            "4535": 1032592256.0,
-            "4540": 1036428160.0,
-            "4545": 1025726400.0,
-            "4550": 1021749312.0,
-            "4555": 1037546112.0,
-            "4560": 1020099200.0,
-            "4565": 1036055296.0,
-            "4570": 1020501120.0,
-            "4575": 1050412608.0,
-            "4580": 1010437888.0,
-            "4585": 1022960768.0,
-            "4590": 1039710272.0,
-            "4595": 1023274880.0,
-            "4600": 1042477824.0,
-            "4605": 1039746688.0,
-            "4610": 1046104192.0,
-            "4615": 1017999744.0,
-            "4620": 1044734592.0,
-            "4625": 1030479104.0,
-            "4630": 1027260800.0,
-            "4635": 1026995200.0,
-            "4640": 1034901248.0,
-            "4645": 1036420352.0,
-            "4650": 1033711488.0,
-            "4655": 1035461056.0,
-            "4660": 1035324800.0,
-            "4665": 1020265664.0,
-            "4670": 1020057344.0,
-            "4675": 1054848768.0,
-            "4680": 1024895872.0,
-            "4685": 1027820160.0,
-            "4690": 1034449664.0,
-            "4695": 1039151744.0,
-            "4700": 1038865024.0,
-            "4705": 1027655808.0,
-            "4710": 1020522560.0,
-            "4715": 1031825536.0,
-            "4720": 1030300416.0,
-            "4725": 1030298368.0,
-            "4730": 1044096704.0,
-            "4735": 1046133376.0,
-            "4740": 1036178112.0,
-            "4745": 1039043840.0,
-            "4750": 1031790528.0,
-            "4755": 1047723392.0,
-            "4760": 1026178176.0,
-            "4765": 1034695040.0,
-            "4770": 1036521856.0,
-            "4775": 1029375168.0,
-            "4780": 1028543488.0,
-            "4785": 1028414976.0,
-            "4790": 1019620224.0,
-            "4795": 1033060160.0,
-            "4800": 1051866880.0,
-            "4805": 1015414400.0,
-            "4810": 1029454336.0,
-            "4815": 1009572096.0,
-            "4820": 1041051200.0,
-            "4825": 1026708608.0,
-            "4830": 1020450816.0,
-            "4835": 1051307840.0,
-            "4840": 1019456512.0,
-            "4845": 1032315008.0,
-            "4850": 1036794496.0,
-            "4855": 1031052736.0,
-            "4860": 1033131776.0,
-            "4865": 1032064384.0,
-            "4870": 1049832576.0,
-            "4875": 1025110528.0,
-            "4880": 1048476160.0,
-            "4885": 1016853056.0,
-            "4890": 1037317312.0,
-            "4895": 1024323136.0,
-            "4900": 1043374208.0,
-            "4905": 1033397120.0,
-            "4910": 1032830272.0,
-            "4915": 1016889856.0,
-            "4920": 1022294784.0,
-            "4925": 1034965888.0,
-            "4930": 1034630016.0,
-            "4935": 1025885312.0,
-            "4940": 1048398272.0,
-            "4945": 1025248576.0,
-            "4950": 1024208768.0,
-            "4955": 1007485952.0,
-            "4960": 1040213824.0,
-            "4965": 1018775296.0,
-            "4970": 1014274688.0,
-            "4975": 1038025472.0,
-            "4980": 1020917888.0,
-            "4985": 1029045888.0,
-            "4990": 1028394816.0,
-            "4995": 1032020480.0,
-            "5000": 1039791104.0,
-            "5005": 1024351552.0,
-            "5010": 1029147968.0,
-            "5015": 1021807296.0,
-            "5020": 1023506944.0,
-            "5025": 1037603456.0,
-            "5030": 1041947136.0,
-            "5035": 1047130304.0,
-            "5040": 1060956096.0,
-            "5045": 1032108544.0,
-            "5050": 1029534336.0,
-            "5055": 1024552192.0,
-            "5060": 1035282304.0,
-            "5065": 1021205504.0,
-            "5070": 1035756288.0,
-            "5075": 1015771264.0,
-            "5080": 1027040064.0,
-            "5085": 1021792192.0,
-            "5090": 1034973568.0,
-            "5095": 1015499712.0,
-            "5100": 1032257600.0,
-            "5105": 1017981568.0,
-            "5110": 1019586304.0,
-            "5115": 1036063936.0,
-            "5120": 1032695040.0,
-            "5125": 1019076992.0,
-            "5130": 1033404672.0,
-            "5135": 1041203072.0,
-            "5140": 1026258752.0,
-            "5145": 1033705856.0,
-            "5150": 1022043520.0,
-            "5155": 1032265664.0,
-            "5160": 1039625984.0,
-            "5165": 1031576448.0,
-            "5170": 1035555328.0,
-            "5175": 1026116224.0,
-            "5180": 1030316032.0,
-            "5185": 1024495680.0,
-            "5190": 1019492608.0,
-            "5195": 1035626496.0,
-            "5200": 1016905344.0,
-            "5205": 1013435648.0,
-            "5210": 1049395456.0,
-            "5215": 1030833280.0,
-            "5220": 1025276800.0,
-            "5225": 1035239936.0,
-            "5230": 1025930624.0,
-            "5235": 1025120000.0,
-            "5240": 1046308224.0,
-            "5245": 1022740608.0,
-            "5250": 1027062336.0,
-            "5255": 1023887360.0,
-            "5260": 1033821440.0,
-            "5265": 1045733696.0,
-            "5270": 1052500480.0,
-            "5275": 1033018112.0,
-            "5280": 1030073920.0,
-            "5285": 1025212608.0,
-            "5290": 1026575616.0,
-            "5295": 1032653440.0,
-            "5300": 1024367872.0,
-            "5305": 1029634368.0,
-            "5310": 1033197312.0,
-            "5315": 1032988992.0,
-            "5320": 1019521664.0,
-            "5325": 1022718336.0,
-            "5330": 1021335168.0,
-            "5335": 1039275776.0,
-            "5340": 1037219648.0,
-            "5345": 1039188096.0,
-            "5350": 1023701888.0,
-            "5355": 1029935872.0,
-            "5360": 1047046080.0,
-            "5365": 1037426432.0,
-            "5370": 1024381568.0,
-            "5375": 1042070656.0,
-            "5380": 1020368384.0,
-            "5385": 1021765696.0,
-            "5390": 1035133184.0,
-            "5395": 1049653568.0,
-            "5400": 1026015744.0,
-            "5405": 1036453120.0,
-            "5410": 1027635776.0,
-            "5415": 1042285824.0,
-            "5420": 1039941888.0,
-            "5425": 1028381184.0,
-            "5430": 1043799808.0,
-            "5435": 1032653312.0,
-            "5440": 1033384448.0,
-            "5445": 1034144640.0,
-            "5450": 1025299328.0,
-            "5455": 1034079424.0,
-            "5460": 1026812416.0,
-            "5465": 1027399552.0,
-            "5470": 1028969216.0,
-            "5475": 1037233920.0,
-            "5480": 1023830272.0,
-            "5485": 1019186752.0,
-            "5490": 1030891520.0,
-            "5495": 1029399424.0,
-            "5500": 1032681216.0,
-            "5505": 1018275200.0,
-            "5510": 1023987648.0,
-            "5515": 1025156032.0,
-            "5520": 1039527296.0,
-            "5525": 1018024576.0,
-            "5530": 1037663936.0,
-            "5535": 1031599232.0,
-            "5540": 1027564544.0,
-            "5545": 1033212160.0,
-            "5550": 1032115968.0,
-            "5555": 1044802304.0,
-            "5560": 1028511232.0,
-            "5565": 1029686016.0,
-            "5570": 1042027776.0,
-            "5575": 1025379392.0,
-            "5580": 1023716736.0,
-            "5585": 1044093696.0,
-            "5590": 1041319936.0,
-            "5595": 1031549824.0,
-            "5600": 1023400320.0,
-            "5605": 1040115456.0,
-            "5610": 1034087552.0,
-            "5615": 1021042816.0,
-            "5620": 1031004800.0,
-            "5625": 1030188544.0,
-            "5630": 1023502080.0,
-            "5635": 1026684096.0,
-            "5640": 1034589120.0,
-            "5645": 1018655744.0,
-            "5650": 1052378752.0,
-            "5655": 1048933504.0,
-            "5660": 1050077696.0,
-            "5665": 1033958144.0,
-            "5670": 1033750016.0,
-            "5675": 1025392640.0,
-            "5680": 1039378304.0,
-            "5685": 1033056576.0,
-            "5690": 1031464576.0,
-            "5695": 1021946368.0,
-            "5700": 1038065664.0,
-            "5705": 1043684736.0,
-            "5710": 1057231616.0,
-            "5715": 1014462848.0,
-            "5720": 1021258816.0,
-            "5725": 1041822272.0,
-            "5730": 1039454912.0,
-            "5735": 1025128576.0,
-            "5740": 1026045440.0,
-            "5745": 1036990208.0,
-            "5750": 1044552256.0,
-            "5755": 1011860416.0,
-            "5760": 1028389568.0,
-            "5765": 1028245504.0,
-            "5770": 1021530368.0,
-            "5775": 1051210240.0,
-            "5780": 1034984512.0,
-            "5785": 1037513920.0,
-            "5790": 1016957184.0,
-            "5795": 1027873536.0,
-            "5800": 1029780736.0,
-            "5805": 1050694912.0,
-            "5810": 1018478336.0,
-            "5815": 1036123520.0,
-            "5820": 1048408704.0,
-            "5825": 1030977920.0,
-            "5830": 1031572096.0,
-            "5835": 1034045440.0,
-            "5840": 1039843776.0,
-            "5845": 1021746048.0,
-            "5850": 1029807744.0,
-            "5855": 1038789376.0,
-            "5860": 1031436288.0,
-            "5865": 1026397568.0,
-            "5870": 1029861824.0,
-            "5875": 1032841856.0,
-            "5880": 1032675968.0,
-            "5885": 1024576128.0,
-            "5890": 1026798976.0,
-            "5895": 1015796160.0,
-            "5900": 1049707008.0,
-            "5905": 1025653248.0,
-            "5910": 1019150720.0,
-            "5915": 1042739136.0,
-            "5920": 1028047232.0,
-            "5925": 1034016448.0,
-            "5930": 1030963328.0,
-            "5935": 1038102784.0,
-            "5940": 1019172864.0,
-            "5945": 1025130112.0,
-            "5950": 1035530240.0,
-            "5955": 1050437184.0,
-            "5960": 1024548736.0,
-            "5965": 1029923712.0,
-            "5970": 1016427776.0,
-            "5975": 1036682752.0,
-            "5980": 1024118464.0,
-            "5985": 1035386624.0,
-            "5990": 1010550784.0,
-            "5995": 1047019200.0,
-            "6000": 1021245568.0,
-            "6005": 1040460416.0,
-            "6010": 1025358720.0,
-            "6015": 1050179072.0,
-            "6020": 1039514496.0,
-            "6025": 1030254592.0,
-            "6030": 1025931968.0,
-            "6035": 1021745408.0,
-            "6040": 1034117056.0,
-            "6045": 1028282112.0,
-            "6050": 1020112320.0,
-            "6055": 1040397056.0,
-            "6060": 1026347008.0,
-            "6065": 1022198400.0,
-            "6070": 1040668416.0,
-            "6075": 1046037440.0,
-            "6080": 1038583168.0,
-            "6085": 1041485568.0,
-            "6090": 1037205888.0,
-            "6095": 1036282880.0,
-            "6100": 1030454720.0,
-            "6105": 1019216640.0,
-            "6110": 1035357824.0,
-            "6115": 1019452544.0,
-            "6120": 1032188800.0,
-            "6125": 1020922624.0,
-            "6130": 1012013952.0,
-            "6135": 1038733824.0,
-            "6140": 1041736896.0,
-            "6145": 1041917056.0,
-            "6150": 1018958208.0,
-            "6155": 1024649344.0,
-            "6160": 1047972160.0,
-            "6165": 1050408832.0,
-            "6170": 1032505344.0,
-            "6175": 1045793664.0,
-            "6180": 1040067072.0,
-            "6185": 1029710464.0,
-            "6190": 1023293760.0,
-            "6195": 1050897728.0,
-            "6200": 1035035776.0,
-            "6205": 1036275584.0,
-            "6210": 1039772736.0,
-            "6215": 1033200256.0,
-            "6220": 1026162432.0,
-            "6225": 1036741120.0,
-            "6230": 1025144192.0,
-            "6235": 1019352832.0,
-            "6240": 1057104384.0,
-            "6245": 1018413952.0,
-            "6250": 1035337344.0,
-            "6255": 1025380992.0,
-            "6260": 1034863744.0,
-            "6265": 1027703424.0,
-            "6270": 1042116480.0,
-            "6275": 1037659008.0,
-            "6280": 1018270208.0,
-            "6285": 1032642304.0,
-            "6290": 1038598592.0,
-            "6295": 1031803456.0,
-            "6300": 1034635200.0,
-            "6305": 1011066624.0,
-            "6310": 1039458624.0,
-            "6315": 1030054272.0,
-            "6320": 1030534208.0,
-            "6325": 1038642496.0,
-            "6330": 1033908800.0,
-            "6335": 1032297856.0,
-            "6340": 1033544448.0,
-            "6345": 1031036416.0,
-            "6350": 1037451264.0,
-            "6355": 1028075968.0,
-            "6360": 1043313408.0,
-            "6365": 1025223808.0,
-            "6370": 1033939200.0,
-            "6375": 1036038720.0,
-            "6380": 1029108096.0,
-            "6385": 1025395072.0,
-            "6390": 1025517952.0,
-            "6395": 1048611584.0,
-            "6400": 1040734976.0,
-            "6405": 1024247936.0,
-            "6410": 1017489280.0,
-            "6415": 1042827072.0,
-            "6420": 1025202432.0,
-            "6425": 1027164928.0,
-            "6430": 1040568256.0,
-            "6435": 1022908800.0,
-            "6440": 1047994624.0,
-            "6445": 1036089088.0,
-            "6450": 1048532224.0,
-            "6455": 1037272320.0,
-            "6460": 1036750912.0,
-            "6465": 1033652032.0,
-            "6470": 1018135232.0,
-            "6475": 1034691648.0,
-            "6480": 1028994048.0,
-            "6485": 1033258880.0,
-            "6490": 1035638656.0,
-            "6495": 1024470016.0,
-            "6500": 1020572096.0,
-            "6505": 1059327104.0,
-            "6510": 1020472576.0,
-            "6515": 1018688064.0,
-            "6520": 1051470592.0,
-            "6525": 1035544512.0,
-            "6530": 1027897216.0,
-            "6535": 1022722240.0,
-            "6540": 1023273984.0,
-            "6545": 1033173120.0,
-            "6550": 1029488512.0,
-            "6555": 1029575296.0,
-            "6560": 1056438784.0,
-            "6565": 1054295040.0,
-            "6570": 1032319040.0,
-            "6575": 1041208320.0,
-            "6580": 1028134400.0,
-            "6585": 1036504832.0,
-            "6590": 1042456192.0,
-            "6595": 1038568832.0,
-            "6600": 1031388096.0,
-            "6605": 1045715456.0,
-            "6610": 1034713472.0,
-            "6615": 1015576448.0,
-            "6620": 1039115136.0,
-            "6625": 1054654208.0,
-            "6630": 1043092928.0,
-            "6635": 1032226304.0,
-            "6640": 1016738496.0,
-            "6645": 1016178816.0,
-            "6650": 1034692672.0,
-            "6655": 1031753472.0,
-            "6660": 1041401920.0,
-            "6665": 1024657984.0,
-            "6670": 1023820032.0,
-            "6675": 1038306176.0,
-            "6680": 1025624064.0,
-            "6685": 1045394048.0,
-            "6690": 1046390720.0,
-            "6695": 1027754368.0,
-            "6700": 1033473920.0,
-            "6705": 1038857152.0,
-            "6710": 1047485888.0,
-            "6715": 1043229440.0,
-            "6720": 1022995456.0,
-            "6725": 1018910144.0,
-            "6730": 1027525504.0,
-            "6735": 1016937856.0,
-            "6740": 1027238016.0,
-            "6745": 1030263680.0,
-            "6750": 1006373760.0,
-            "6755": 1034765056.0,
-            "6760": 1040735296.0,
-            "6765": 1023827008.0,
-            "6770": 1036441344.0,
-            "6775": 1019627712.0,
-            "6780": 1043723904.0,
-            "6785": 1037409280.0,
-            "6790": 1029403072.0,
-            "6795": 1026349440.0,
-            "6800": 1036628224.0,
-            "6805": 1024579712.0,
-            "6810": 1042340544.0,
-            "6815": 1035274112.0,
-            "6820": 1022594880.0,
-            "6825": 1034793344.0,
-            "6830": 1029862400.0,
-            "6835": 1041609600.0,
-            "6840": 1042283776.0,
-            "6845": 1018954624.0,
-            "6850": 1032171136.0,
-            "6855": 1034434752.0,
-            "6860": 1042054848.0,
-            "6865": 1021813568.0,
-            "6870": 1037015424.0,
-            "6875": 1030379968.0,
-            "6880": 1029360768.0,
-            "6885": 1030435968.0,
-            "6890": 1039890432.0,
-            "6895": 1027267712.0,
-            "6900": 1035174016.0,
-            "6905": 1043975424.0,
-            "6910": 1019763072.0,
-            "6915": 1017476608.0,
-            "6920": 1017184256.0,
-            "6925": 1030650688.0,
-            "6930": 1036672384.0,
-            "6935": 1042835712.0,
-            "6940": 1040313216.0,
-            "6945": 1044196992.0,
-            "6950": 1040513472.0,
-            "6955": 1036112704.0,
-            "6960": 1036436224.0,
-            "6965": 1019161024.0,
-            "6970": 1034729088.0,
-            "6975": 1019134464.0,
-            "6980": 1028436160.0,
-            "6985": 1023240128.0,
-            "6990": 1026994688.0,
-            "6995": 1027547520.0,
-            "7000": 1058819840.0,
-            "7005": 1013737856.0,
-            "7010": 1028959488.0,
-            "7015": 1037288768.0,
-            "7020": 1011880576.0,
-            "7025": 1017313280.0,
-            "7030": 1028301440.0,
-            "7035": 1035955392.0,
-            "7040": 1042966016.0,
-            "7045": 1028185856.0,
-            "7050": 1017979584.0,
-            "7055": 1035088000.0,
-            "7060": 1051802624.0,
-            "7065": 1007664640.0,
-            "7070": 1035819008.0,
-            "7075": 1031039552.0,
-            "7080": 1026143296.0,
-            "7085": 1044906432.0,
-            "7090": 1046261760.0,
-            "7095": 1043760512.0,
-            "7100": 1035089024.0,
-            "7105": 1049143296.0,
-            "7110": 1010962944.0,
-            "7115": 1033869504.0,
-            "7120": 1031267456.0,
-            "7125": 1037496832.0,
-            "7130": 1024881856.0,
-            "7135": 1031991808.0,
-            "7140": 1019090176.0,
-            "7145": 1033081088.0,
-            "7150": 1037554112.0,
-            "7155": 1015729728.0,
-            "7160": 1024724608.0,
-            "7165": 1030895808.0,
-            "7170": 1037367808.0,
-            "7175": 1028816896.0,
-            "7180": 1037633280.0,
-            "7185": 1016174080.0,
-            "7190": 1019808128.0,
-            "7195": 1040915392.0,
-            "7200": 1041375360.0,
-            "7205": 1026538240.0,
-            "7210": 1022638720.0,
-            "7215": 1041890560.0,
-            "7220": 1017742720.0,
-            "7225": 1027296640.0,
-            "7230": 1030200448.0,
-            "7235": 1035726848.0,
-            "7240": 1037854848.0,
-            "7245": 1023971008.0,
-            "7250": 1044708096.0,
-            "7255": 1031900480.0,
-            "7260": 1030128256.0,
-            "7265": 1036887104.0,
-            "7270": 1050097152.0,
-            "7275": 1029225216.0,
-            "7280": 1020231808.0,
-            "7285": 1029842048.0,
-            "7290": 1017219328.0,
-            "7295": 1029139584.0,
-            "7300": 1031533824.0,
-            "7305": 1027298176.0,
-            "7310": 1029089664.0,
-            "7315": 1022782272.0,
-            "7320": 1036458176.0,
-            "7325": 1036851840.0,
-            "7330": 1021706496.0,
-            "7335": 1030715904.0,
-            "7340": 1039382976.0,
-            "7345": 1040177664.0,
-            "7350": 1034973568.0,
-            "7355": 1033656320.0,
-            "7360": 1031254912.0,
-            "7365": 1048742016.0,
-            "7370": 1027298304.0,
-            "7375": 1041854848.0,
-            "7380": 1016725760.0,
-            "7385": 1017578368.0,
-            "7390": 1017234944.0,
-            "7395": 1046793600.0,
-            "7400": 1048441216.0,
-            "7405": 1013394304.0,
-            "7410": 1017386368.0,
-            "7415": 1017815360.0,
-            "7420": 1028043008.0,
-            "7425": 1012840576.0,
-            "7430": 1034042368.0,
-            "7435": 1032530432.0,
-            "7440": 1002692928.0,
-            "7445": 1034451200.0,
-            "7450": 1039304832.0,
-            "7455": 1019027008.0,
-            "7460": 1014740928.0,
-            "7465": 1027204736.0,
-            "7470": 1030422784.0,
-            "7475": 1033792064.0,
-            "7480": 1043317376.0,
-            "7485": 1038215168.0,
-            "7490": 1049000960.0,
-            "7495": 1028982720.0,
-            "7500": 1027426816.0,
-            "7505": 1028695936.0,
-            "7510": 1048886528.0,
-            "7515": 1035648704.0,
-            "7520": 1017198848.0,
-            "7525": 1036572736.0,
-            "7530": 1029261952.0,
-            "7535": 1027190144.0,
-            "7540": 1028338048.0,
-            "7545": 1025986304.0,
-            "7550": 1023025856.0,
-            "7555": 1033025344.0,
-            "7560": 1031404672.0,
-            "7565": 1022710528.0,
-            "7570": 1037591552.0,
-            "7575": 1022603136.0,
-            "7580": 1018123584.0,
-            "7585": 1033054208.0,
-            "7590": 1010993280.0,
-            "7595": 1018260352.0,
-            "7600": 1049904448.0,
-            "7605": 1037361216.0,
-            "7610": 1040415744.0,
-            "7615": 1035247488.0,
-            "7620": 1024230912.0,
-            "7625": 1020317184.0,
-            "7630": 1034939584.0,
-            "7635": 1043224192.0,
-            "7640": 1033491520.0,
-            "7645": 1034444608.0,
-            "7650": 1039804800.0,
-            "7655": 1031240576.0,
-            "7660": 1056628096.0,
-            "7665": 1031076096.0,
-            "7670": 1033685120.0,
-            "7675": 1030681600.0,
-            "7680": 1035398720.0,
-            "7685": 1018661760.0,
-            "7690": 1031921024.0,
-            "7695": 1025858880.0,
-            "7700": 1017715200.0,
-            "7705": 1036531200.0,
-            "7710": 1029893248.0,
-            "7715": 1053230656.0,
-            "7720": 1019514240.0,
-            "7725": 1042193216.0,
-            "7730": 1035620992.0,
-            "7735": 1020726144.0,
-            "7740": 1045576128.0,
-            "7745": 1026932992.0,
-            "7750": 1048550208.0,
-            "7755": 1022539264.0,
-            "7760": 1049532032.0,
-            "7765": 1029370176.0,
-            "7770": 1018375296.0,
-            "7775": 1021364672.0,
-            "7780": 1039770624.0,
-            "7785": 1039914112.0,
-            "7790": 1030516992.0,
-            "7795": 1039353728.0,
-            "7800": 1028187904.0,
-            "7805": 1027635776.0,
-            "7810": 1020970368.0,
-            "7815": 1035878400.0,
-            "7820": 1017666240.0,
-            "7825": 1018067392.0,
-            "7830": 1035104128.0,
-            "7835": 1044507648.0,
-            "7840": 1027836224.0,
-            "7845": 1032101504.0,
-            "7850": 1034609408.0,
-            "7855": 1025464832.0,
-            "7860": 1059051648.0,
-            "7865": 1016626240.0,
-            "7870": 1033729408.0,
-            "7875": 1044185600.0,
-            "7880": 1029084352.0,
-            "7885": 1040308288.0,
-            "7890": 1029556480.0,
-            "7895": 1032947008.0,
-            "7900": 1021409216.0,
-            "7905": 1020955904.0,
-            "7910": 1008993856.0,
-            "7915": 1023120768.0,
-            "7920": 1023070976.0,
-            "7925": 1030094080.0,
-            "7930": 1020712704.0,
-            "7935": 1019443776.0,
-            "7940": 1017809152.0,
-            "7945": 1014447552.0,
-            "7950": 1026303616.0,
-            "7955": 1034518272.0,
-            "7960": 1056026304.0,
-            "7965": 1031047872.0,
-            "7970": 1030417152.0,
-            "7975": 1022189888.0,
-            "7980": 1034474624.0,
-            "7985": 1047305024.0,
-            "7990": 1032066176.0,
-            "7995": 1044264704.0,
-            "8000": 1028876672.0,
-            "8005": 1028045440.0,
-            "8010": 1050665408.0,
-            "8015": 1019758976.0,
-            "8020": 1043297408.0,
-            "8025": 1039018560.0,
-            "8030": 1030868800.0,
-            "8035": 1045304192.0,
-            "8040": 1026310784.0,
-            "8045": 1024970368.0,
-            "8050": 1018405632.0,
-            "8055": 1033736960.0,
-            "8060": 1012986816.0,
-            "8065": 1022016640.0,
-            "8070": 1034776064.0,
-            "8075": 1042759616.0,
-            "8080": 1027758784.0,
-            "8085": 1037205376.0,
-            "8090": 1007008256.0,
-            "8095": 1030374528.0,
-            "8100": 1030726016.0,
-            "8105": 1027794944.0,
-            "8110": 1031557248.0,
-            "8115": 1037685248.0,
-            "8120": 1037692992.0,
-            "8125": 1031097472.0,
-            "8130": 1028627072.0,
-            "8135": 1029680256.0,
-            "8140": 1049904256.0,
-            "8145": 1043463552.0,
-            "8150": 1040087424.0,
-            "8155": 1046780288.0,
-            "8160": 1010199040.0,
-            "8165": 1031657728.0,
-            "8170": 1024483264.0,
-            "8175": 1035019648.0,
-            "8180": 1024460544.0,
-            "8185": 1021960448.0,
-            "8190": 1037125504.0,
-            "8195": 1022368384.0,
-            "8200": 1035635968.0,
-            "8205": 1026482496.0,
-            "8210": 1023888000.0,
-            "8215": 1014276416.0,
-            "8220": 1026756224.0,
-            "8225": 1028540160.0,
-            "8230": 1027163072.0,
-            "8235": 1037914048.0,
-            "8240": 1025909376.0,
-            "8245": 1024676608.0,
-            "8250": 1041635840.0,
-            "8255": 1031908224.0,
-            "8260": 1032424512.0,
-            "8265": 1023164800.0,
-            "8270": 1040172544.0,
-            "8275": 1038050688.0,
-            "8280": 1041849216.0,
-            "8285": 1038804352.0,
-            "8290": 1024074880.0,
-            "8295": 1028403648.0,
-            "8300": 1039341440.0,
-            "8305": 1012104192.0,
-            "8310": 1021882048.0,
-            "8315": 1027307200.0,
-            "8320": 1021636992.0,
-            "8325": 1048572160.0,
-            "8330": 1041039616.0,
-            "8335": 1037964928.0,
-            "8340": 1033019136.0,
-            "8345": 1043864192.0,
-            "8350": 1037713792.0,
-            "8355": 1029686400.0,
-            "8360": 1040667776.0,
-            "8365": 1027450304.0,
-            "8370": 1037742848.0,
-            "8375": 1041986944.0,
-            "8380": 1037628416.0,
-            "8385": 1023436160.0,
-            "8390": 1026068224.0,
-            "8395": 1028913408.0,
-            "8400": 1046530560.0,
-            "8405": 1040179456.0,
-            "8410": 1034252672.0,
-            "8415": 1040258688.0,
-            "8420": 1054730752.0,
-            "8425": 1031514880.0,
-            "8430": 1030295680.0,
-            "8435": 1045707200.0,
-            "8440": 1026310784.0,
-            "8445": 1029027392.0,
-            "8450": 1034201920.0,
-            "8455": 1031794688.0,
-            "8460": 1016828032.0,
-            "8465": 1035163648.0,
-            "8470": 1035185152.0,
-            "8475": 1024712960.0,
-            "8480": 1035901184.0,
-            "8485": 1028948480.0,
-            "8490": 1023079168.0,
-            "8495": 1037393280.0,
-            "8500": 1025960064.0,
-            "8505": 1042724992.0,
-            "8510": 1028167936.0,
-            "8515": 1038101056.0,
-            "8520": 1023107328.0,
-            "8525": 1037987328.0,
-            "8530": 1027572800.0,
-            "8535": 1041656128.0,
-            "8540": 1033880960.0,
-            "8545": 1015116160.0,
-            "8550": 1040188160.0,
-            "8555": 1016340672.0,
-            "8560": 1019330048.0,
-            "8565": 1021410112.0,
-            "8570": 1032032320.0,
-            "8575": 1031880128.0,
-            "8580": 1016011264.0,
-            "8585": 1030017408.0,
-            "8590": 1031637248.0,
-            "8595": 1017776128.0,
-            "8600": 1002393216.0,
-            "8605": 1030238336.0,
-            "8610": 1017532288.0,
-            "8615": 1023989248.0,
-            "8620": 1047205696.0,
-            "8625": 1034231552.0,
-            "8630": 1030921280.0,
-            "8635": 1051992512.0,
-            "8640": 1041134208.0,
-            "8645": 1024870720.0,
-            "8650": 1025595392.0,
-            "8655": 1036904832.0,
-            "8660": 1031171200.0,
-            "8665": 1032904640.0,
-            "8670": 1037400576.0,
-            "8675": 1029157248.0,
-            "8680": 1031264704.0,
-            "8685": 1041197568.0,
-            "8690": 1035035392.0,
-            "8695": 1008508416.0,
-            "8700": 1027459072.0,
-            "8705": 1051504896.0,
-            "8710": 1041678016.0,
-            "8715": 1034152256.0,
-            "8720": 1017596544.0,
-            "8725": 1025187456.0,
-            "8730": 1036610816.0,
-            "8735": 1014829568.0,
-            "8740": 1036081536.0,
-            "8745": 1021252416.0,
-            "8750": 1027866496.0,
-            "8755": 1020742272.0,
-            "8760": 1036899712.0,
-            "8765": 1058672448.0,
-            "8770": 1020462464.0,
-            "8775": 1031773056.0,
-            "8780": 1030892544.0,
-            "8785": 1032117504.0,
-            "8790": 1041034112.0,
-            "8795": 1019523968.0,
-            "8800": 1038245632.0,
-            "8805": 1035106752.0,
-            "8810": 1043257088.0,
-            "8815": 1026490496.0,
-            "8820": 1027666944.0,
-            "8825": 1043464064.0,
-            "8830": 1027480192.0,
-            "8835": 1038812928.0,
-            "8840": 1034490752.0,
-            "8845": 1033909760.0,
-            "8850": 1030491008.0,
-            "8855": 1042524992.0,
-            "8860": 1013002880.0,
-            "8865": 1038368128.0,
-            "8870": 1025187456.0,
-            "8875": 1012981760.0,
-            "8880": 1028376704.0,
-            "8885": 1046461056.0,
-            "8890": 1038603840.0,
-            "8895": 1037909504.0,
-            "8900": 1027294848.0,
-            "8905": 1032792064.0,
-            "8910": 1029795264.0,
-            "8915": 1030003968.0,
-            "8920": 1030339968.0,
-            "8925": 1028569984.0,
-            "8930": 1031637376.0,
-            "8935": 1022951424.0,
-            "8940": 1019847872.0,
-            "8945": 1031909248.0,
-            "8950": 1039951744.0,
-            "8955": 1041902720.0,
-            "8960": 1026878464.0,
-            "8965": 1022083968.0,
-            "8970": 1029559424.0,
-            "8975": 1038934400.0,
-            "8980": 1033860160.0,
-            "8985": 1030649472.0,
-            "8990": 1025014144.0,
-            "8995": 1013963648.0,
-            "9000": 1035286400.0,
-            "9005": 1028649280.0,
-            "9010": 1011913280.0,
-            "9015": 1038912128.0,
-            "9020": 1030153856.0,
-            "9025": 1024685056.0,
-            "9030": 1025861888.0,
-            "9035": 1054309248.0,
-            "9040": 1027293952.0,
-            "9045": 1036583040.0,
-            "9050": 1020929664.0,
-            "9055": 1043212800.0,
-            "9060": 1023159104.0,
-            "9065": 1023387520.0,
-            "9070": 1039364480.0,
-            "9075": 1026728320.0,
-            "9080": 1018873408.0,
-            "9085": 1015439104.0,
-            "9090": 1043764736.0,
-            "9095": 1014020224.0,
-            "9100": 1031975296.0,
-            "9105": 1026514304.0,
-            "9110": 1029229568.0,
-            "9115": 1024866432.0,
-            "9120": 999986240.0,
-            "9125": 1032842752.0,
-            "9130": 1038534336.0,
-            "9135": 1031037696.0,
-            "9140": 1025502208.0,
-            "9145": 1030405248.0,
-            "9150": 1029416576.0,
-            "9155": 1038268928.0,
-            "9160": 1046043904.0,
-            "9165": 1017948992.0,
-            "9170": 1040955520.0,
-            "9175": 1031287552.0,
-            "9180": 1037830656.0,
-            "9185": 1040684416.0,
-            "9190": 1028985728.0,
-            "9195": 1034312320.0,
-            "9200": 1035551872.0,
-            "9205": 1029847040.0,
-            "9210": 1026535872.0,
-            "9215": 1030520448.0,
-            "9220": 1025732224.0,
-            "9225": 1048001408.0,
-            "9230": 1041601792.0,
-            "9235": 1027775104.0,
-            "9240": 1025245760.0,
-            "9245": 1036211584.0,
-            "9250": 1041192384.0,
-            "9255": 1020063872.0,
-            "9260": 1035337984.0,
-            "9265": 1023102208.0,
-            "9270": 1038332928.0,
-            "9275": 1036053568.0,
-            "9280": 1026541504.0,
-            "9285": 1014285184.0,
-            "9290": 1018866304.0,
-            "9295": 1026915264.0,
-            "9300": 1037085888.0,
-            "9305": 1045435392.0,
-            "9310": 1033242944.0,
-            "9315": 1039043840.0,
-            "9320": 1048495488.0,
-            "9325": 1023059840.0,
-            "9330": 1031724672.0,
-            "9335": 1035673472.0,
-            "9340": 1013719296.0,
-            "9345": 1022572032.0,
-            "9350": 1026585600.0,
-            "9355": 1034807104.0,
-            "9360": 1029839552.0,
-            "9365": 1019863296.0,
-            "9370": 1006904320.0,
-            "9375": 1036232960.0,
-            "9380": 1049012736.0,
-            "9385": 1015905344.0,
-            "9390": 1029208704.0,
-            "9395": 1008931968.0,
-            "9400": 1026893568.0,
-            "9405": 1027653312.0,
-            "9410": 1040913280.0,
-            "9415": 1035128576.0,
-            "9420": 1030792640.0,
-            "9425": 1027581056.0,
-            "9430": 1032727360.0,
-            "9435": 1031796288.0,
-            "9440": 1051730048.0,
-            "9445": 1019626752.0,
-            "9450": 1044505152.0,
-            "9455": 1035773696.0,
-            "9460": 1013828224.0,
-            "9465": 1023403904.0,
-            "9470": 1023576832.0,
-            "9475": 1039164416.0,
-            "9480": 1029597056.0,
-            "9485": 1032075200.0,
-            "9490": 1020994560.0,
-            "9495": 1021375616.0,
-            "9500": 1035594304.0,
-            "9505": 1034478464.0,
-            "9510": 1014286592.0,
-            "9515": 1031309312.0,
-            "9520": 1026563904.0,
-            "9525": 1035853184.0,
-            "9530": 1031624448.0,
-            "9535": 1025926720.0
-        }
-    },
-    "mem-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 9535,
-        "step_interval": 5,
-        "values": {
-            "1": 33307314176.0,
-            "5": 33307424768.0,
-            "10": 33307447296.0,
-            "15": 33307439104.0,
-            "20": 33307533312.0,
-            "25": 33307473920.0,
-            "30": 33307504640.0,
-            "35": 33307639808.0,
-            "40": 33307637760.0,
-            "45": 33307568128.0,
-            "50": 33307418624.0,
-            "55": 33307326464.0,
-            "60": 33307346944.0,
-            "65": 33307490304.0,
-            "70": 33307312128.0,
-            "75": 33307308032.0,
-            "80": 33307404288.0,
-            "85": 33307314176.0,
-            "90": 33307285504.0,
-            "95": 33307392000.0,
-            "100": 33307260928.0,
-            "105": 33307129856.0,
-            "110": 33307037696.0,
-            "115": 33306703872.0,
-            "120": 33307355136.0,
-            "125": 33306873856.0,
-            "130": 33307017216.0,
-            "135": 33307305984.0,
-            "140": 33307004928.0,
-            "145": 33307121664.0,
-            "150": 33307312128.0,
-            "155": 33307176960.0,
-            "160": 33307103232.0,
-            "165": 33307174912.0,
-            "170": 33307832320.0,
-            "175": 33307199488.0,
-            "180": 33307355136.0,
-            "185": 33307355136.0,
-            "190": 33307131904.0,
-            "195": 33307256832.0,
-            "200": 33307326464.0,
-            "205": 33307492352.0,
-            "210": 33307500544.0,
-            "215": 33307086848.0,
-            "220": 33306857472.0,
-            "225": 33306933248.0,
-            "230": 33307092992.0,
-            "235": 33307183104.0,
-            "240": 33307303936.0,
-            "245": 33307426816.0,
-            "250": 33307308032.0,
-            "255": 33307295744.0,
-            "260": 33306767360.0,
-            "265": 33307461632.0,
-            "270": 33307467776.0,
-            "275": 33307469824.0,
-            "280": 33307254784.0,
-            "285": 33307947008.0,
-            "290": 33307191296.0,
-            "295": 33308014592.0,
-            "300": 33307856896.0,
-            "305": 33308340224.0,
-            "310": 33307815936.0,
-            "315": 33307181056.0,
-            "320": 33307512832.0,
-            "325": 33307488256.0,
-            "330": 33307977728.0,
-            "335": 33307947008.0,
-            "340": 33308606464.0,
-            "345": 33308037120.0,
-            "350": 33307693056.0,
-            "355": 33308000256.0,
-            "360": 33307348992.0,
-            "365": 33307451392.0,
-            "370": 33308000256.0,
-            "375": 33307283456.0,
-            "380": 33307570176.0,
-            "385": 33307860992.0,
-            "390": 33307416576.0,
-            "395": 33307031552.0,
-            "400": 33307246592.0,
-            "405": 33307676672.0,
-            "410": 33306935296.0,
-            "415": 33307752448.0,
-            "420": 33307529216.0,
-            "425": 33307314176.0,
-            "430": 33306988544.0,
-            "435": 33307455488.0,
-            "440": 33307369472.0,
-            "445": 33307709440.0,
-            "450": 33307588608.0,
-            "455": 33306963968.0,
-            "460": 33307193344.0,
-            "465": 33306845184.0,
-            "470": 33307766784.0,
-            "475": 33306464256.0,
-            "480": 33307566080.0,
-            "485": 33307682816.0,
-            "490": 33307389952.0,
-            "495": 33307179008.0,
-            "500": 33307969536.0,
-            "505": 33307629568.0,
-            "510": 33308192768.0,
-            "515": 33307279360.0,
-            "520": 33306544128.0,
-            "525": 33307265024.0,
-            "530": 33307025408.0,
-            "535": 33307648000.0,
-            "540": 33307582464.0,
-            "545": 33307297792.0,
-            "550": 33307396096.0,
-            "555": 33307301888.0,
-            "560": 33307899904.0,
-            "565": 33307379712.0,
-            "570": 33307553792.0,
-            "575": 33307136000.0,
-            "580": 33305892864.0,
-            "585": 33306945536.0,
-            "590": 33307629568.0,
-            "595": 33307860992.0,
-            "600": 33306873856.0,
-            "605": 33307357184.0,
-            "610": 33306556416.0,
-            "615": 33306349568.0,
-            "620": 33307791360.0,
-            "625": 33306378240.0,
-            "630": 33307168768.0,
-            "635": 33306767360.0,
-            "640": 33306116096.0,
-            "645": 33308092416.0,
-            "650": 33307277312.0,
-            "655": 33307131904.0,
-            "660": 33308485632.0,
-            "665": 33307334656.0,
-            "670": 33307959296.0,
-            "675": 33307701248.0,
-            "680": 33306863616.0,
-            "685": 33306697728.0,
-            "690": 33307863040.0,
-            "695": 33307293696.0,
-            "700": 33306263552.0,
-            "705": 33306955776.0,
-            "710": 33308225536.0,
-            "715": 33307174912.0,
-            "720": 33307107328.0,
-            "725": 33307324416.0,
-            "730": 33308231680.0,
-            "735": 33307224064.0,
-            "740": 33307815936.0,
-            "745": 33307938816.0,
-            "750": 33307779072.0,
-            "755": 33308463104.0,
-            "760": 33306349568.0,
-            "765": 33308266496.0,
-            "770": 33306603520.0,
-            "775": 33307424768.0,
-            "780": 33308608512.0,
-            "785": 33307969536.0,
-            "790": 33308188672.0,
-            "795": 33307656192.0,
-            "800": 33307547648.0,
-            "805": 33307619328.0,
-            "810": 33307910144.0,
-            "815": 33307170816.0,
-            "820": 33307029504.0,
-            "825": 33307443200.0,
-            "830": 33307422720.0,
-            "835": 33307262976.0,
-            "840": 33307613184.0,
-            "845": 33307928576.0,
-            "850": 33306238976.0,
-            "855": 33307396096.0,
-            "860": 33307938816.0,
-            "865": 33307701248.0,
-            "870": 33307940864.0,
-            "875": 33307545600.0,
-            "880": 33307527168.0,
-            "885": 33307336704.0,
-            "890": 33308262400.0,
-            "895": 33307717632.0,
-            "900": 33306474496.0,
-            "905": 33307480064.0,
-            "910": 33307725824.0,
-            "915": 33308303360.0,
-            "920": 33307770880.0,
-            "925": 33307566080.0,
-            "930": 33307451392.0,
-            "935": 33307975680.0,
-            "940": 33306320896.0,
-            "945": 33306429440.0,
-            "950": 33307136000.0,
-            "955": 33307846656.0,
-            "960": 33307611136.0,
-            "965": 33307465728.0,
-            "970": 33308293120.0,
-            "975": 33307078656.0,
-            "980": 33307568128.0,
-            "985": 33307080704.0,
-            "990": 33307367424.0,
-            "995": 33306861568.0,
-            "1000": 33307889664.0,
-            "1005": 33305956352.0,
-            "1010": 33307508736.0,
-            "1015": 33306671104.0,
-            "1020": 33306669056.0,
-            "1025": 33306509312.0,
-            "1030": 33307117568.0,
-            "1035": 33308332032.0,
-            "1040": 33307353088.0,
-            "1045": 33308368896.0,
-            "1050": 33306615808.0,
-            "1055": 33306802176.0,
-            "1060": 33307103232.0,
-            "1065": 33307404288.0,
-            "1070": 33307070464.0,
-            "1075": 33308188672.0,
-            "1080": 33307011072.0,
-            "1085": 33307027456.0,
-            "1090": 33308086272.0,
-            "1095": 33307086848.0,
-            "1100": 33307287552.0,
-            "1105": 33308497920.0,
-            "1110": 33307461632.0,
-            "1115": 33307533312.0,
-            "1120": 33307777024.0,
-            "1125": 33307809792.0,
-            "1130": 33307484160.0,
-            "1135": 33308082176.0,
-            "1140": 33307029504.0,
-            "1145": 33307432960.0,
-            "1150": 33307574272.0,
-            "1155": 33307551744.0,
-            "1160": 33307561984.0,
-            "1165": 33307086848.0,
-            "1170": 33307856896.0,
-            "1175": 33306976256.0,
-            "1180": 33308237824.0,
-            "1185": 33307875328.0,
-            "1190": 33307369472.0,
-            "1195": 33308231680.0,
-            "1200": 33307197440.0,
-            "1205": 33307480064.0,
-            "1210": 33305866240.0,
-            "1215": 33308297216.0,
-            "1220": 33307451392.0,
-            "1225": 33307518976.0,
-            "1230": 33307688960.0,
-            "1235": 33307901952.0,
-            "1240": 33307394048.0,
-            "1245": 33307842560.0,
-            "1250": 33307281408.0,
-            "1255": 33306906624.0,
-            "1260": 33307301888.0,
-            "1265": 33307674624.0,
-            "1270": 33307150336.0,
-            "1275": 33307686912.0,
-            "1280": 33307430912.0,
-            "1285": 33306974208.0,
-            "1290": 33307529216.0,
-            "1295": 33307901952.0,
-            "1300": 33307002880.0,
-            "1305": 33308059648.0,
-            "1310": 33306939392.0,
-            "1315": 33307336704.0,
-            "1320": 33307262976.0,
-            "1325": 33307011072.0,
-            "1330": 33306550272.0,
-            "1335": 33307181056.0,
-            "1340": 33307406336.0,
-            "1345": 33307463680.0,
-            "1350": 33308135424.0,
-            "1355": 33307480064.0,
-            "1360": 33307533312.0,
-            "1365": 33307066368.0,
-            "1370": 33306595328.0,
-            "1375": 33307891712.0,
-            "1380": 33307830272.0,
-            "1385": 33308487680.0,
-            "1390": 33306521600.0,
-            "1395": 33307338752.0,
-            "1400": 33308430336.0,
-            "1405": 33307768832.0,
-            "1410": 33308041216.0,
-            "1415": 33307797504.0,
-            "1420": 33306605568.0,
-            "1425": 33307240448.0,
-            "1430": 33307322368.0,
-            "1435": 33307559936.0,
-            "1440": 33306662912.0,
-            "1445": 33307058176.0,
-            "1450": 33307705344.0,
-            "1455": 33307291648.0,
-            "1460": 33306861568.0,
-            "1465": 33306312704.0,
-            "1470": 33307394048.0,
-            "1475": 33307211776.0,
-            "1480": 33306527744.0,
-            "1485": 33307361280.0,
-            "1490": 33307693056.0,
-            "1495": 33307271168.0,
-            "1500": 33306820608.0,
-            "1505": 33307092992.0,
-            "1510": 33306624000.0,
-            "1515": 33307097088.0,
-            "1520": 33306931200.0,
-            "1525": 33307635712.0,
-            "1530": 33307353088.0,
-            "1535": 33306468352.0,
-            "1540": 33307172864.0,
-            "1545": 33307693056.0,
-            "1550": 33307938816.0,
-            "1555": 33307832320.0,
-            "1560": 33308182528.0,
-            "1565": 33307099136.0,
-            "1570": 33306798080.0,
-            "1575": 33307492352.0,
-            "1580": 33307688960.0,
-            "1585": 33307326464.0,
-            "1590": 33306988544.0,
-            "1595": 33306818560.0,
-            "1600": 33307836416.0,
-            "1605": 33307590656.0,
-            "1610": 33307168768.0,
-            "1615": 33306931200.0,
-            "1620": 33306732544.0,
-            "1625": 33308260352.0,
-            "1630": 33308227584.0,
-            "1635": 33306957824.0,
-            "1640": 33306759168.0,
-            "1645": 33306021888.0,
-            "1650": 33306689536.0,
-            "1655": 33307332608.0,
-            "1660": 33307170816.0,
-            "1665": 33306583040.0,
-            "1670": 33307535360.0,
-            "1675": 33306912768.0,
-            "1680": 33306675200.0,
-            "1685": 33307774976.0,
-            "1690": 33307783168.0,
-            "1695": 33307971584.0,
-            "1700": 33307623424.0,
-            "1705": 33307652096.0,
-            "1710": 33307731968.0,
-            "1715": 33308090368.0,
-            "1720": 33307172864.0,
-            "1725": 33307672576.0,
-            "1730": 33306355712.0,
-            "1735": 33308229632.0,
-            "1740": 33307142144.0,
-            "1745": 33308151808.0,
-            "1750": 33306898432.0,
-            "1755": 33307105280.0,
-            "1760": 33308000256.0,
-            "1765": 33307750400.0,
-            "1770": 33308450816.0,
-            "1775": 33308184576.0,
-            "1780": 33308129280.0,
-            "1785": 33307936768.0,
-            "1790": 33307238400.0,
-            "1795": 33307922432.0,
-            "1800": 33306900480.0,
-            "1805": 33307203584.0,
-            "1810": 33306923008.0,
-            "1815": 33307617280.0,
-            "1820": 33307664384.0,
-            "1825": 33308440576.0,
-            "1830": 33306843136.0,
-            "1835": 33307979776.0,
-            "1840": 33307588608.0,
-            "1845": 33307602944.0,
-            "1850": 33307774976.0,
-            "1855": 33307529216.0,
-            "1860": 33307054080.0,
-            "1865": 33307097088.0,
-            "1870": 33307373568.0,
-            "1875": 33306265600.0,
-            "1880": 33307275264.0,
-            "1885": 33307224064.0,
-            "1890": 33307324416.0,
-            "1895": 33307283456.0,
-            "1900": 33306810368.0,
-            "1905": 33307191296.0,
-            "1910": 33306884096.0,
-            "1915": 33308162048.0,
-            "1920": 33307664384.0,
-            "1925": 33305972736.0,
-            "1930": 33308504064.0,
-            "1935": 33307377664.0,
-            "1940": 33307119616.0,
-            "1945": 33307416576.0,
-            "1950": 33307746304.0,
-            "1955": 33307420672.0,
-            "1960": 33308073984.0,
-            "1965": 33307148288.0,
-            "1970": 33306775552.0,
-            "1975": 33308207104.0,
-            "1980": 33307473920.0,
-            "1985": 33307095040.0,
-            "1990": 33307527168.0,
-            "1995": 33307037696.0,
-            "2000": 33308801024.0,
-            "2005": 33307985920.0,
-            "2010": 33307516928.0,
-            "2015": 33307604992.0,
-            "2020": 33307406336.0,
-            "2025": 33307719680.0,
-            "2030": 33308381184.0,
-            "2035": 33307914240.0,
-            "2040": 33307324416.0,
-            "2045": 33306476544.0,
-            "2050": 33308246016.0,
-            "2055": 33307430912.0,
-            "2060": 33307912192.0,
-            "2065": 33307543552.0,
-            "2070": 33307670528.0,
-            "2075": 33307482112.0,
-            "2080": 33307871232.0,
-            "2085": 33306722304.0,
-            "2090": 33307549696.0,
-            "2095": 33307260928.0,
-            "2100": 33306765312.0,
-            "2105": 33306847232.0,
-            "2110": 33307332608.0,
-            "2115": 33306480640.0,
-            "2120": 33307168768.0,
-            "2125": 33307277312.0,
-            "2130": 33307314176.0,
-            "2135": 33307752448.0,
-            "2140": 33306710016.0,
-            "2145": 33307478016.0,
-            "2150": 33307729920.0,
-            "2155": 33306943488.0,
-            "2160": 33307508736.0,
-            "2165": 33307049984.0,
-            "2170": 33307158528.0,
-            "2175": 33306599424.0,
-            "2180": 33307054080.0,
-            "2185": 33307017216.0,
-            "2190": 33307119616.0,
-            "2195": 33307289600.0,
-            "2200": 33306726400.0,
-            "2205": 33306636288.0,
-            "2210": 33307639808.0,
-            "2215": 33308215296.0,
-            "2220": 33307314176.0,
-            "2225": 33307437056.0,
-            "2230": 33306318848.0,
-            "2235": 33306941440.0,
-            "2240": 33308131328.0,
-            "2245": 33307707392.0,
-            "2250": 33307256832.0,
-            "2255": 33306845184.0,
-            "2260": 33307736064.0,
-            "2265": 33308620800.0,
-            "2270": 33307357184.0,
-            "2275": 33308151808.0,
-            "2280": 33307981824.0,
-            "2285": 33307922432.0,
-            "2290": 33306767360.0,
-            "2295": 33307670528.0,
-            "2300": 33307179008.0,
-            "2305": 33307545600.0,
-            "2310": 33307924480.0,
-            "2315": 33307396096.0,
-            "2320": 33307725824.0,
-            "2325": 33308024832.0,
-            "2330": 33307793408.0,
-            "2335": 33307019264.0,
-            "2340": 33307162624.0,
-            "2345": 33307934720.0,
-            "2350": 33306232832.0,
-            "2355": 33307719680.0,
-            "2360": 33307375616.0,
-            "2365": 33306537984.0,
-            "2370": 33307279360.0,
-            "2375": 33308131328.0,
-            "2380": 33307136000.0,
-            "2385": 33307490304.0,
-            "2390": 33307316224.0,
-            "2395": 33306587136.0,
-            "2400": 33307594752.0,
-            "2405": 33308393472.0,
-            "2410": 33306726400.0,
-            "2415": 33307506688.0,
-            "2420": 33308407808.0,
-            "2425": 33307942912.0,
-            "2430": 33308116992.0,
-            "2435": 33307308032.0,
-            "2440": 33308362752.0,
-            "2445": 33308071936.0,
-            "2450": 33307740160.0,
-            "2455": 33307959296.0,
-            "2460": 33308258304.0,
-            "2465": 33307299840.0,
-            "2470": 33307056128.0,
-            "2475": 33307224064.0,
-            "2480": 33307713536.0,
-            "2485": 33306550272.0,
-            "2490": 33306992640.0,
-            "2495": 33307232256.0,
-            "2500": 33307095040.0,
-            "2505": 33307107328.0,
-            "2510": 33307488256.0,
-            "2515": 33308360704.0,
-            "2520": 33307369472.0,
-            "2525": 33306959872.0,
-            "2530": 33307258880.0,
-            "2535": 33307082752.0,
-            "2540": 33308633088.0,
-            "2545": 33308542976.0,
-            "2550": 33308002304.0,
-            "2555": 33307961344.0,
-            "2560": 33307328512.0,
-            "2565": 33308299264.0,
-            "2570": 33307770880.0,
-            "2575": 33307877376.0,
-            "2580": 33307990016.0,
-            "2585": 33308016640.0,
-            "2590": 33308135424.0,
-            "2595": 33307617280.0,
-            "2600": 33306667008.0,
-            "2605": 33307422720.0,
-            "2610": 33306683392.0,
-            "2615": 33308669952.0,
-            "2620": 33308616704.0,
-            "2625": 33308366848.0,
-            "2630": 33307574272.0,
-            "2635": 33308166144.0,
-            "2640": 33307983872.0,
-            "2645": 33307609088.0,
-            "2650": 33307807744.0,
-            "2655": 33306955776.0,
-            "2660": 33307273216.0,
-            "2665": 33307709440.0,
-            "2670": 33307693056.0,
-            "2675": 33307731968.0,
-            "2680": 33308227584.0,
-            "2685": 33307742208.0,
-            "2690": 33307734016.0,
-            "2695": 33307424768.0,
-            "2700": 33306644480.0,
-            "2705": 33306300416.0,
-            "2710": 33307881472.0,
-            "2715": 33307488256.0,
-            "2720": 33307318272.0,
-            "2725": 33307604992.0,
-            "2730": 33306710016.0,
-            "2735": 33308049408.0,
-            "2740": 33307437056.0,
-            "2745": 33307572224.0,
-            "2750": 33307136000.0,
-            "2755": 33307584512.0,
-            "2760": 33307355136.0,
-            "2765": 33307713536.0,
-            "2770": 33308000256.0,
-            "2775": 33306460160.0,
-            "2780": 33306923008.0,
-            "2785": 33307017216.0,
-            "2790": 33306720256.0,
-            "2795": 33307785216.0,
-            "2800": 33307234304.0,
-            "2805": 33306685440.0,
-            "2810": 33307469824.0,
-            "2815": 33308069888.0,
-            "2820": 33306460160.0,
-            "2825": 33307467776.0,
-            "2830": 33307666432.0,
-            "2835": 33307371520.0,
-            "2840": 33306904576.0,
-            "2845": 33308061696.0,
-            "2850": 33308520448.0,
-            "2855": 33307695104.0,
-            "2860": 33308487680.0,
-            "2865": 33307058176.0,
-            "2870": 33307303936.0,
-            "2875": 33307324416.0,
-            "2880": 33306968064.0,
-            "2885": 33307641856.0,
-            "2890": 33307785216.0,
-            "2895": 33308221440.0,
-            "2900": 33307596800.0,
-            "2905": 33307533312.0,
-            "2910": 33307459584.0,
-            "2915": 33307799552.0,
-            "2920": 33308461056.0,
-            "2925": 33307938816.0,
-            "2930": 33308268544.0,
-            "2935": 33308594176.0,
-            "2940": 33308170240.0,
-            "2945": 33307578368.0,
-            "2950": 33307590656.0,
-            "2955": 33308131328.0,
-            "2960": 33306839040.0,
-            "2965": 33307111424.0,
-            "2970": 33307570176.0,
-            "2975": 33307766784.0,
-            "2980": 33307600896.0,
-            "2985": 33307123712.0,
-            "2990": 33307641856.0,
-            "2995": 33307527168.0,
-            "3000": 33307863040.0,
-            "3005": 33306927104.0,
-            "3010": 33307738112.0,
-            "3015": 33308217344.0,
-            "3020": 33306697728.0,
-            "3025": 33306970112.0,
-            "3030": 33308127232.0,
-            "3035": 33308213248.0,
-            "3040": 33307578368.0,
-            "3045": 33308327936.0,
-            "3050": 33306910720.0,
-            "3055": 33307004928.0,
-            "3060": 33307602944.0,
-            "3065": 33306970112.0,
-            "3070": 33307985920.0,
-            "3075": 33306945536.0,
-            "3080": 33307312128.0,
-            "3085": 33306533888.0,
-            "3090": 33306933248.0,
-            "3095": 33307906048.0,
-            "3100": 33306793984.0,
-            "3105": 33307127808.0,
-            "3110": 33308295168.0,
-            "3115": 33307295744.0,
-            "3120": 33307897856.0,
-            "3125": 33307066368.0,
-            "3130": 33307781120.0,
-            "3135": 33307762688.0,
-            "3140": 33308196864.0,
-            "3145": 33306904576.0,
-            "3150": 33307140096.0,
-            "3155": 33306660864.0,
-            "3160": 33307514880.0,
-            "3165": 33307246592.0,
-            "3170": 33307613184.0,
-            "3175": 33307375616.0,
-            "3180": 33307551744.0,
-            "3185": 33307842560.0,
-            "3190": 33308342272.0,
-            "3195": 33308350464.0,
-            "3200": 33307799552.0,
-            "3205": 33307099136.0,
-            "3210": 33306869760.0,
-            "3215": 33307678720.0,
-            "3220": 33307111424.0,
-            "3225": 33307146240.0,
-            "3230": 33306972160.0,
-            "3235": 33307387904.0,
-            "3240": 33307521024.0,
-            "3245": 33307287552.0,
-            "3250": 33307523072.0,
-            "3255": 33307639808.0,
-            "3260": 33307092992.0,
-            "3265": 33308338176.0,
-            "3270": 33307273216.0,
-            "3275": 33307713536.0,
-            "3280": 33307719680.0,
-            "3285": 33308049408.0,
-            "3290": 33307484160.0,
-            "3295": 33307594752.0,
-            "3300": 33307228160.0,
-            "3305": 33306580992.0,
-            "3310": 33307541504.0,
-            "3315": 33307211776.0,
-            "3320": 33307324416.0,
-            "3325": 33306615808.0,
-            "3330": 33307777024.0,
-            "3335": 33308135424.0,
-            "3340": 33307351040.0,
-            "3345": 33307131904.0,
-            "3350": 33307031552.0,
-            "3355": 33307791360.0,
-            "3360": 33307410432.0,
-            "3365": 33307090944.0,
-            "3370": 33306187776.0,
-            "3375": 33307113472.0,
-            "3380": 33308071936.0,
-            "3385": 33307717632.0,
-            "3390": 33306648576.0,
-            "3395": 33306781696.0,
-            "3400": 33307734016.0,
-            "3405": 33307570176.0,
-            "3410": 33307750400.0,
-            "3415": 33307920384.0,
-            "3420": 33308157952.0,
-            "3425": 33307500544.0,
-            "3430": 33307168768.0,
-            "3435": 33307645952.0,
-            "3440": 33307185152.0,
-            "3445": 33307459584.0,
-            "3450": 33306804224.0,
-            "3455": 33307662336.0,
-            "3460": 33306748928.0,
-            "3465": 33306497024.0,
-            "3470": 33306796032.0,
-            "3475": 33307947008.0,
-            "3480": 33308039168.0,
-            "3485": 33307676672.0,
-            "3490": 33306728448.0,
-            "3495": 33307115520.0,
-            "3500": 33306628096.0,
-            "3505": 33307537408.0,
-            "3510": 33306945536.0,
-            "3515": 33306902528.0,
-            "3520": 33307553792.0,
-            "3525": 33307590656.0,
-            "3530": 33307852800.0,
-            "3535": 33306773504.0,
-            "3540": 33307953152.0,
-            "3545": 33307463680.0,
-            "3550": 33307123712.0,
-            "3555": 33307738112.0,
-            "3560": 33307766784.0,
-            "3565": 33307088896.0,
-            "3570": 33306882048.0,
-            "3575": 33307443200.0,
-            "3580": 33306951680.0,
-            "3585": 33306841088.0,
-            "3590": 33308293120.0,
-            "3595": 33307723776.0,
-            "3600": 33307756544.0,
-            "3605": 33307930624.0,
-            "3610": 33307985920.0,
-            "3615": 33307222016.0,
-            "3620": 33307430912.0,
-            "3625": 33307148288.0,
-            "3630": 33306388480.0,
-            "3635": 33307035648.0,
-            "3640": 33307455488.0,
-            "3645": 33306906624.0,
-            "3650": 33307545600.0,
-            "3655": 33307336704.0,
-            "3660": 33306910720.0,
-            "3665": 33307623424.0,
-            "3670": 33306824704.0,
-            "3675": 33307590656.0,
-            "3680": 33307373568.0,
-            "3685": 33306505216.0,
-            "3690": 33307817984.0,
-            "3695": 33306890240.0,
-            "3700": 33306802176.0,
-            "3705": 33306945536.0,
-            "3710": 33306904576.0,
-            "3715": 33307754496.0,
-            "3720": 33308395520.0,
-            "3725": 33308112896.0,
-            "3730": 33307652096.0,
-            "3735": 33307867136.0,
-            "3740": 33307805696.0,
-            "3745": 33308069888.0,
-            "3750": 33307826176.0,
-            "3755": 33306439680.0,
-            "3760": 33306849280.0,
-            "3765": 33307471872.0,
-            "3770": 33307095040.0,
-            "3775": 33307492352.0,
-            "3780": 33308141568.0,
-            "3785": 33307910144.0,
-            "3790": 33307656192.0,
-            "3795": 33307727872.0,
-            "3800": 33307246592.0,
-            "3805": 33307848704.0,
-            "3810": 33307490304.0,
-            "3815": 33307357184.0,
-            "3820": 33307346944.0,
-            "3825": 33307619328.0,
-            "3830": 33308102656.0,
-            "3835": 33306849280.0,
-            "3840": 33307678720.0,
-            "3845": 33307258880.0,
-            "3850": 33307686912.0,
-            "3855": 33307467776.0,
-            "3860": 33307471872.0,
-            "3865": 33307439104.0,
-            "3870": 33307676672.0,
-            "3875": 33306865664.0,
-            "3880": 33307232256.0,
-            "3885": 33307099136.0,
-            "3890": 33307854848.0,
-            "3895": 33306370048.0,
-            "3900": 33306900480.0,
-            "3905": 33306824704.0,
-            "3910": 33307361280.0,
-            "3915": 33306591232.0,
-            "3920": 33307213824.0,
-            "3925": 33306980352.0,
-            "3930": 33308110848.0,
-            "3935": 33307179008.0,
-            "3940": 33307379712.0,
-            "3945": 33307813888.0,
-            "3950": 33307277312.0,
-            "3955": 33307203584.0,
-            "3960": 33307234304.0,
-            "3965": 33307121664.0,
-            "3970": 33307303936.0,
-            "3975": 33307144192.0,
-            "3980": 33307869184.0,
-            "3985": 33307660288.0,
-            "3990": 33307779072.0,
-            "3995": 33307795456.0,
-            "4000": 33307131904.0,
-            "4005": 33307238400.0,
-            "4010": 33307875328.0,
-            "4015": 33306726400.0,
-            "4020": 33308227584.0,
-            "4025": 33307799552.0,
-            "4030": 33307318272.0,
-            "4035": 33308190720.0,
-            "4040": 33307932672.0,
-            "4045": 33307291648.0,
-            "4050": 33307959296.0,
-            "4055": 33307447296.0,
-            "4060": 33307486208.0,
-            "4065": 33308088320.0,
-            "4070": 33307183104.0,
-            "4075": 33307201536.0,
-            "4080": 33308184576.0,
-            "4085": 33306406912.0,
-            "4090": 33307891712.0,
-            "4095": 33307031552.0,
-            "4100": 33308100608.0,
-            "4105": 33307258880.0,
-            "4110": 33307492352.0,
-            "4115": 33308344320.0,
-            "4120": 33306552320.0,
-            "4125": 33307611136.0,
-            "4130": 33306083328.0,
-            "4135": 33308463104.0,
-            "4140": 33307611136.0,
-            "4145": 33307455488.0,
-            "4150": 33307658240.0,
-            "4155": 33307133952.0,
-            "4160": 33308233728.0,
-            "4165": 33307408384.0,
-            "4170": 33306888192.0,
-            "4175": 33307852800.0,
-            "4180": 33307150336.0,
-            "4185": 33307127808.0,
-            "4190": 33307582464.0,
-            "4195": 33308610560.0,
-            "4200": 33308231680.0,
-            "4205": 33307906048.0,
-            "4210": 33308307456.0,
-            "4215": 33306363904.0,
-            "4220": 33306980352.0,
-            "4225": 33306318848.0,
-            "4230": 33307731968.0,
-            "4235": 33307142144.0,
-            "4240": 33307432960.0,
-            "4245": 33307097088.0,
-            "4250": 33307783168.0,
-            "4255": 33307365376.0,
-            "4260": 33306947584.0,
-            "4265": 33306611712.0,
-            "4270": 33306347520.0,
-            "4275": 33306624000.0,
-            "4280": 33307185152.0,
-            "4285": 33307922432.0,
-            "4290": 33307508736.0,
-            "4295": 33307658240.0,
-            "4300": 33308405760.0,
-            "4305": 33306474496.0,
-            "4310": 33307557888.0,
-            "4315": 33308307456.0,
-            "4320": 33307719680.0,
-            "4325": 33306824704.0,
-            "4330": 33307594752.0,
-            "4335": 33306144768.0,
-            "4340": 33307852800.0,
-            "4345": 33307342848.0,
-            "4350": 33308139520.0,
-            "4355": 33307713536.0,
-            "4360": 33307373568.0,
-            "4365": 33308065792.0,
-            "4370": 33306681344.0,
-            "4375": 33307770880.0,
-            "4380": 33307361280.0,
-            "4385": 33307086848.0,
-            "4390": 33307019264.0,
-            "4395": 33306986496.0,
-            "4400": 33307103232.0,
-            "4405": 33307664384.0,
-            "4410": 33307996160.0,
-            "4415": 33306990592.0,
-            "4420": 33306546176.0,
-            "4425": 33306904576.0,
-            "4430": 33307303936.0,
-            "4435": 33306763264.0,
-            "4440": 33308063744.0,
-            "4445": 33307242496.0,
-            "4450": 33307283456.0,
-            "4455": 33306654720.0,
-            "4460": 33307205632.0,
-            "4465": 33306867712.0,
-            "4470": 33307916288.0,
-            "4475": 33307791360.0,
-            "4480": 33308450816.0,
-            "4485": 33307547648.0,
-            "4490": 33307090944.0,
-            "4495": 33307000832.0,
-            "4500": 33306935296.0,
-            "4505": 33307099136.0,
-            "4510": 33307525120.0,
-            "4515": 33307367424.0,
-            "4520": 33307813888.0,
-            "4525": 33307715584.0,
-            "4530": 33307901952.0,
-            "4535": 33307174912.0,
-            "4540": 33306880000.0,
-            "4545": 33307138048.0,
-            "4550": 33306873856.0,
-            "4555": 33306316800.0,
-            "4560": 33305849856.0,
-            "4565": 33307187200.0,
-            "4570": 33307260928.0,
-            "4575": 33307410432.0,
-            "4580": 33307201536.0,
-            "4585": 33306920960.0,
-            "4590": 33307355136.0,
-            "4595": 33307346944.0,
-            "4600": 33307856896.0,
-            "4605": 33307752448.0,
-            "4610": 33307095040.0,
-            "4615": 33306286080.0,
-            "4620": 33306699776.0,
-            "4625": 33308069888.0,
-            "4630": 33307439104.0,
-            "4635": 33306900480.0,
-            "4640": 33307076608.0,
-            "4645": 33308160000.0,
-            "4650": 33307758592.0,
-            "4655": 33307865088.0,
-            "4660": 33306255360.0,
-            "4665": 33307641856.0,
-            "4670": 33307912192.0,
-            "4675": 33306603520.0,
-            "4680": 33307799552.0,
-            "4685": 33307488256.0,
-            "4690": 33307394048.0,
-            "4695": 33306763264.0,
-            "4700": 33307873280.0,
-            "4705": 33308106752.0,
-            "4710": 33307617280.0,
-            "4715": 33307047936.0,
-            "4720": 33307901952.0,
-            "4725": 33307793408.0,
-            "4730": 33308123136.0,
-            "4735": 33307451392.0,
-            "4740": 33307623424.0,
-            "4745": 33306857472.0,
-            "4750": 33308436480.0,
-            "4755": 33307260928.0,
-            "4760": 33307975680.0,
-            "4765": 33307965440.0,
-            "4770": 33306859520.0,
-            "4775": 33307922432.0,
-            "4780": 33306978304.0,
-            "4785": 33306869760.0,
-            "4790": 33307084800.0,
-            "4795": 33307226112.0,
-            "4800": 33307961344.0,
-            "4805": 33308334080.0,
-            "4810": 33305587712.0,
-            "4815": 33307928576.0,
-            "4820": 33307875328.0,
-            "4825": 33306957824.0,
-            "4830": 33307797504.0,
-            "4835": 33306116096.0,
-            "4840": 33307654144.0,
-            "4845": 33307131904.0,
-            "4850": 33308055552.0,
-            "4855": 33305792512.0,
-            "4860": 33307402240.0,
-            "4865": 33307086848.0,
-            "4870": 33307637760.0,
-            "4875": 33307789312.0,
-            "4880": 33307701248.0,
-            "4885": 33308010496.0,
-            "4890": 33307039744.0,
-            "4895": 33307369472.0,
-            "4900": 33307127808.0,
-            "4905": 33306988544.0,
-            "4910": 33308276736.0,
-            "4915": 33307090944.0,
-            "4920": 33307015168.0,
-            "4925": 33308043264.0,
-            "4930": 33307607040.0,
-            "4935": 33308209152.0,
-            "4940": 33307725824.0,
-            "4945": 33307985920.0,
-            "4950": 33307582464.0,
-            "4955": 33307297792.0,
-            "4960": 33307639808.0,
-            "4965": 33307445248.0,
-            "4970": 33306869760.0,
-            "4975": 33306787840.0,
-            "4980": 33307099136.0,
-            "4985": 33307635712.0,
-            "4990": 33307406336.0,
-            "4995": 33307471872.0,
-            "5000": 33307375616.0,
-            "5005": 33307672576.0,
-            "5010": 33306970112.0,
-            "5015": 33307244544.0,
-            "5020": 33306966016.0,
-            "5025": 33307705344.0,
-            "5030": 33307463680.0,
-            "5035": 33306818560.0,
-            "5040": 33306972160.0,
-            "5045": 33308157952.0,
-            "5050": 33306376192.0,
-            "5055": 33307594752.0,
-            "5060": 33308471296.0,
-            "5065": 33307455488.0,
-            "5070": 33307301888.0,
-            "5075": 33307488256.0,
-            "5080": 33307910144.0,
-            "5085": 33307635712.0,
-            "5090": 33307406336.0,
-            "5095": 33307254784.0,
-            "5100": 33306828800.0,
-            "5105": 33307852800.0,
-            "5110": 33308258304.0,
-            "5115": 33307228160.0,
-            "5120": 33307955200.0,
-            "5125": 33305640960.0,
-            "5130": 33306683392.0,
-            "5135": 33307336704.0,
-            "5140": 33307834368.0,
-            "5145": 33307060224.0,
-            "5150": 33307023360.0,
-            "5155": 33307308032.0,
-            "5160": 33306664960.0,
-            "5165": 33307123712.0,
-            "5170": 33306935296.0,
-            "5175": 33308094464.0,
-            "5180": 33306566656.0,
-            "5185": 33306796032.0,
-            "5190": 33307545600.0,
-            "5195": 33308067840.0,
-            "5200": 33307754496.0,
-            "5205": 33307445248.0,
-            "5210": 33306785792.0,
-            "5215": 33307551744.0,
-            "5220": 33308188672.0,
-            "5225": 33307338752.0,
-            "5230": 33307283456.0,
-            "5235": 33306976256.0,
-            "5240": 33308041216.0,
-            "5245": 33308340224.0,
-            "5250": 33308153856.0,
-            "5255": 33307590656.0,
-            "5260": 33306896384.0,
-            "5265": 33308303360.0,
-            "5270": 33308796928.0,
-            "5275": 33307949056.0,
-            "5280": 33306157056.0,
-            "5285": 33307904000.0,
-            "5290": 33308143616.0,
-            "5295": 33306533888.0,
-            "5300": 33307912192.0,
-            "5305": 33308338176.0,
-            "5310": 33308688384.0,
-            "5315": 33308045312.0,
-            "5320": 33306206208.0,
-            "5325": 33308219392.0,
-            "5330": 33308012544.0,
-            "5335": 33307602944.0,
-            "5340": 33306685440.0,
-            "5345": 33308209152.0,
-            "5350": 33307150336.0,
-            "5355": 33308176384.0,
-            "5360": 33307273216.0,
-            "5365": 33307850752.0,
-            "5370": 33307222016.0,
-            "5375": 33307803648.0,
-            "5380": 33307617280.0,
-            "5385": 33307179008.0,
-            "5390": 33307389952.0,
-            "5395": 33306927104.0,
-            "5400": 33307518976.0,
-            "5405": 33307400192.0,
-            "5410": 33307598848.0,
-            "5415": 33307846656.0,
-            "5420": 33307490304.0,
-            "5425": 33307459584.0,
-            "5430": 33307283456.0,
-            "5435": 33307453440.0,
-            "5440": 33307383808.0,
-            "5445": 33307117568.0,
-            "5450": 33307832320.0,
-            "5455": 33307582464.0,
-            "5460": 33306963968.0,
-            "5465": 33306947584.0,
-            "5470": 33307355136.0,
-            "5475": 33306748928.0,
-            "5480": 33306435584.0,
-            "5485": 33307590656.0,
-            "5490": 33307787264.0,
-            "5495": 33307568128.0,
-            "5500": 33307351040.0,
-            "5505": 33307568128.0,
-            "5510": 33307426816.0,
-            "5515": 33307451392.0,
-            "5520": 33307549696.0,
-            "5525": 33307000832.0,
-            "5530": 33307566080.0,
-            "5535": 33307664384.0,
-            "5540": 33306966016.0,
-            "5545": 33307781120.0,
-            "5550": 33307275264.0,
-            "5555": 33307269120.0,
-            "5560": 33307576320.0,
-            "5565": 33307377664.0,
-            "5570": 33307052032.0,
-            "5575": 33306978304.0,
-            "5580": 33307965440.0,
-            "5585": 33307494400.0,
-            "5590": 33308055552.0,
-            "5595": 33306943488.0,
-            "5600": 33306542080.0,
-            "5605": 33307680768.0,
-            "5610": 33308542976.0,
-            "5615": 33307826176.0,
-            "5620": 33308108800.0,
-            "5625": 33308225536.0,
-            "5630": 33308069888.0,
-            "5635": 33307760640.0,
-            "5640": 33307500544.0,
-            "5645": 33307930624.0,
-            "5650": 33306755072.0,
-            "5655": 33308192768.0,
-            "5660": 33308631040.0,
-            "5665": 33307418624.0,
-            "5670": 33307504640.0,
-            "5675": 33307715584.0,
-            "5680": 33307910144.0,
-            "5685": 33307996160.0,
-            "5690": 33307478016.0,
-            "5695": 33308164096.0,
-            "5700": 33307906048.0,
-            "5705": 33307750400.0,
-            "5710": 33306779648.0,
-            "5715": 33307219968.0,
-            "5720": 33307750400.0,
-            "5725": 33307537408.0,
-            "5730": 33307262976.0,
-            "5735": 33306767360.0,
-            "5740": 33307508736.0,
-            "5745": 33306753024.0,
-            "5750": 33306636288.0,
-            "5755": 33306943488.0,
-            "5760": 33307553792.0,
-            "5765": 33307842560.0,
-            "5770": 33307047936.0,
-            "5775": 33307348992.0,
-            "5780": 33306361856.0,
-            "5785": 33307709440.0,
-            "5790": 33307832320.0,
-            "5795": 33307406336.0,
-            "5800": 33307056128.0,
-            "5805": 33307631616.0,
-            "5810": 33307766784.0,
-            "5815": 33307971584.0,
-            "5820": 33307447296.0,
-            "5825": 33307084800.0,
-            "5830": 33307324416.0,
-            "5835": 33307127808.0,
-            "5840": 33307729920.0,
-            "5845": 33307088896.0,
-            "5850": 33307635712.0,
-            "5855": 33307119616.0,
-            "5860": 33306703872.0,
-            "5865": 33307291648.0,
-            "5870": 33307613184.0,
-            "5875": 33307893760.0,
-            "5880": 33307893760.0,
-            "5885": 33307301888.0,
-            "5890": 33307830272.0,
-            "5895": 33306671104.0,
-            "5900": 33306488832.0,
-            "5905": 33308141568.0,
-            "5910": 33307373568.0,
-            "5915": 33307330560.0,
-            "5920": 33307656192.0,
-            "5925": 33307533312.0,
-            "5930": 33307848704.0,
-            "5935": 33307586560.0,
-            "5940": 33307602944.0,
-            "5945": 33307631616.0,
-            "5950": 33306615808.0,
-            "5955": 33307719680.0,
-            "5960": 33308553216.0,
-            "5965": 33308676096.0,
-            "5970": 33308313600.0,
-            "5975": 33306810368.0,
-            "5980": 33307222016.0,
-            "5985": 33307367424.0,
-            "5990": 33307119616.0,
-            "5995": 33307166720.0,
-            "6000": 33307822080.0,
-            "6005": 33307553792.0,
-            "6010": 33307756544.0,
-            "6015": 33306392576.0,
-            "6020": 33308116992.0,
-            "6025": 33307738112.0,
-            "6030": 33307459584.0,
-            "6035": 33306920960.0,
-            "6040": 33307701248.0,
-            "6045": 33307932672.0,
-            "6050": 33307496448.0,
-            "6055": 33307133952.0,
-            "6060": 33306370048.0,
-            "6065": 33307521024.0,
-            "6070": 33307244544.0,
-            "6075": 33306447872.0,
-            "6080": 33306963968.0,
-            "6085": 33307932672.0,
-            "6090": 33307293696.0,
-            "6095": 33307058176.0,
-            "6100": 33307449344.0,
-            "6105": 33307613184.0,
-            "6110": 33307779072.0,
-            "6115": 33306832896.0,
-            "6120": 33306732544.0,
-            "6125": 33306488832.0,
-            "6130": 33308866560.0,
-            "6135": 33308000256.0,
-            "6140": 33307906048.0,
-            "6145": 33308504064.0,
-            "6150": 33307826176.0,
-            "6155": 33306906624.0,
-            "6160": 33307533312.0,
-            "6165": 33307578368.0,
-            "6170": 33307891712.0,
-            "6175": 33307537408.0,
-            "6180": 33307803648.0,
-            "6185": 33308125184.0,
-            "6190": 33307342848.0,
-            "6195": 33308135424.0,
-            "6200": 33306468352.0,
-            "6205": 33308026880.0,
-            "6210": 33308028928.0,
-            "6215": 33308157952.0,
-            "6220": 33307662336.0,
-            "6225": 33307344896.0,
-            "6230": 33308231680.0,
-            "6235": 33307148288.0,
-            "6240": 33308809216.0,
-            "6245": 33307017216.0,
-            "6250": 33307234304.0,
-            "6255": 33308430336.0,
-            "6260": 33307246592.0,
-            "6265": 33307418624.0,
-            "6270": 33308319744.0,
-            "6275": 33307090944.0,
-            "6280": 33307404288.0,
-            "6285": 33308227584.0,
-            "6290": 33307656192.0,
-            "6295": 33306865664.0,
-            "6300": 33307596800.0,
-            "6305": 33308192768.0,
-            "6310": 33307695104.0,
-            "6315": 33307361280.0,
-            "6320": 33306775552.0,
-            "6325": 33307557888.0,
-            "6330": 33307639808.0,
-            "6335": 33307820032.0,
-            "6340": 33307410432.0,
-            "6345": 33307410432.0,
-            "6350": 33308256256.0,
-            "6355": 33307082752.0,
-            "6360": 33306855424.0,
-            "6365": 33307418624.0,
-            "6370": 33307066368.0,
-            "6375": 33307891712.0,
-            "6380": 33307779072.0,
-            "6385": 33306128384.0,
-            "6390": 33306884096.0,
-            "6395": 33307060224.0,
-            "6400": 33307250688.0,
-            "6405": 33308135424.0,
-            "6410": 33308155904.0,
-            "6415": 33307101184.0,
-            "6420": 33306318848.0,
-            "6425": 33308065792.0,
-            "6430": 33307813888.0,
-            "6435": 33307842560.0,
-            "6440": 33308571648.0,
-            "6445": 33306138624.0,
-            "6450": 33307762688.0,
-            "6455": 33308119040.0,
-            "6460": 33308037120.0,
-            "6465": 33308467200.0,
-            "6470": 33307181056.0,
-            "6475": 33307246592.0,
-            "6480": 33306855424.0,
-            "6485": 33308440576.0,
-            "6490": 33307863040.0,
-            "6495": 33306857472.0,
-            "6500": 33306529792.0,
-            "6505": 33307097088.0,
-            "6510": 33307842560.0,
-            "6515": 33307095040.0,
-            "6520": 33307848704.0,
-            "6525": 33307596800.0,
-            "6530": 33307117568.0,
-            "6535": 33307811840.0,
-            "6540": 33307645952.0,
-            "6545": 33307211776.0,
-            "6550": 33308196864.0,
-            "6555": 33307213824.0,
-            "6560": 33307326464.0,
-            "6565": 33306490880.0,
-            "6570": 33306877952.0,
-            "6575": 33307199488.0,
-            "6580": 33308370944.0,
-            "6585": 33307828224.0,
-            "6590": 33307871232.0,
-            "6595": 33307590656.0,
-            "6600": 33306578944.0,
-            "6605": 33307496448.0,
-            "6610": 33307912192.0,
-            "6615": 33307521024.0,
-            "6620": 33307189248.0,
-            "6625": 33306961920.0,
-            "6630": 33306800128.0,
-            "6635": 33306957824.0,
-            "6640": 33307762688.0,
-            "6645": 33306427392.0,
-            "6650": 33307672576.0,
-            "6655": 33305133056.0,
-            "6660": 33307598848.0,
-            "6665": 33306884096.0,
-            "6670": 33307500544.0,
-            "6675": 33307592704.0,
-            "6680": 33306923008.0,
-            "6685": 33307084800.0,
-            "6690": 33307402240.0,
-            "6695": 33307963392.0,
-            "6700": 33307336704.0,
-            "6705": 33306845184.0,
-            "6710": 33307230208.0,
-            "6715": 33306310656.0,
-            "6720": 33307834368.0,
-            "6725": 33308094464.0,
-            "6730": 33308327936.0,
-            "6735": 33308092416.0,
-            "6740": 33306873856.0,
-            "6745": 33308082176.0,
-            "6750": 33306112000.0,
-            "6755": 33306810368.0,
-            "6760": 33307394048.0,
-            "6765": 33307414528.0,
-            "6770": 33308286976.0,
-            "6775": 33308618752.0,
-            "6780": 33306904576.0,
-            "6785": 33308182528.0,
-            "6790": 33308057600.0,
-            "6795": 33307049984.0,
-            "6800": 33306744832.0,
-            "6805": 33307242496.0,
-            "6810": 33307176960.0,
-            "6815": 33307779072.0,
-            "6820": 33306849280.0,
-            "6825": 33307623424.0,
-            "6830": 33307887616.0,
-            "6835": 33307670528.0,
-            "6840": 33308348416.0,
-            "6845": 33308184576.0,
-            "6850": 33307727872.0,
-            "6855": 33307252736.0,
-            "6860": 33307680768.0,
-            "6865": 33306963968.0,
-            "6870": 33307099136.0,
-            "6875": 33307037696.0,
-            "6880": 33307635712.0,
-            "6885": 33307615232.0,
-            "6890": 33307652096.0,
-            "6895": 33307369472.0,
-            "6900": 33307947008.0,
-            "6905": 33307334656.0,
-            "6910": 33306824704.0,
-            "6915": 33307537408.0,
-            "6920": 33306619904.0,
-            "6925": 33306408960.0,
-            "6930": 33306765312.0,
-            "6935": 33306609664.0,
-            "6940": 33307623424.0,
-            "6945": 33307160576.0,
-            "6950": 33307463680.0,
-            "6955": 33306507264.0,
-            "6960": 33307185152.0,
-            "6965": 33307019264.0,
-            "6970": 33307598848.0,
-            "6975": 33307435008.0,
-            "6980": 33307238400.0,
-            "6985": 33306222592.0,
-            "6990": 33308581888.0,
-            "6995": 33307254784.0,
-            "7000": 33308035072.0,
-            "7005": 33308233728.0,
-            "7010": 33307092992.0,
-            "7015": 33307193344.0,
-            "7020": 33307643904.0,
-            "7025": 33308274688.0,
-            "7030": 33307019264.0,
-            "7035": 33308454912.0,
-            "7040": 33308086272.0,
-            "7045": 33307277312.0,
-            "7050": 33307172864.0,
-            "7055": 33306599424.0,
-            "7060": 33307613184.0,
-            "7065": 33307031552.0,
-            "7070": 33306243072.0,
-            "7075": 33308037120.0,
-            "7080": 33306759168.0,
-            "7085": 33308033024.0,
-            "7090": 33307971584.0,
-            "7095": 33306873856.0,
-            "7100": 33308522496.0,
-            "7105": 33307363328.0,
-            "7110": 33308063744.0,
-            "7115": 33307770880.0,
-            "7120": 33307906048.0,
-            "7125": 33307443200.0,
-            "7130": 33307574272.0,
-            "7135": 33307541504.0,
-            "7140": 33306765312.0,
-            "7145": 33307854848.0,
-            "7150": 33306853376.0,
-            "7155": 33307856896.0,
-            "7160": 33307906048.0,
-            "7165": 33308184576.0,
-            "7170": 33308272640.0,
-            "7175": 33306417152.0,
-            "7180": 33307107328.0,
-            "7185": 33307860992.0,
-            "7190": 33307078656.0,
-            "7195": 33307494400.0,
-            "7200": 33307613184.0,
-            "7205": 33307680768.0,
-            "7210": 33307990016.0,
-            "7215": 33306822656.0,
-            "7220": 33306730496.0,
-            "7225": 33307539456.0,
-            "7230": 33307744256.0,
-            "7235": 33306136576.0,
-            "7240": 33307189248.0,
-            "7245": 33307236352.0,
-            "7250": 33306980352.0,
-            "7255": 33307832320.0,
-            "7260": 33307426816.0,
-            "7265": 33307340800.0,
-            "7270": 33307844608.0,
-            "7275": 33308094464.0,
-            "7280": 33308602368.0,
-            "7285": 33307498496.0,
-            "7290": 33307920384.0,
-            "7295": 33307426816.0,
-            "7300": 33306392576.0,
-            "7305": 33306718208.0,
-            "7310": 33307260928.0,
-            "7315": 33307527168.0,
-            "7320": 33306963968.0,
-            "7325": 33308188672.0,
-            "7330": 33307799552.0,
-            "7335": 33307717632.0,
-            "7340": 33307238400.0,
-            "7345": 33307365376.0,
-            "7350": 33307314176.0,
-            "7355": 33307940864.0,
-            "7360": 33306284032.0,
-            "7365": 33307893760.0,
-            "7370": 33306275840.0,
-            "7375": 33307873280.0,
-            "7380": 33309245440.0,
-            "7385": 33306730496.0,
-            "7390": 33307758592.0,
-            "7395": 33306609664.0,
-            "7400": 33307652096.0,
-            "7405": 33306427392.0,
-            "7410": 33308524544.0,
-            "7415": 33307961344.0,
-            "7420": 33307242496.0,
-            "7425": 33307811840.0,
-            "7430": 33307119616.0,
-            "7435": 33307428864.0,
-            "7440": 33307709440.0,
-            "7445": 33308342272.0,
-            "7450": 33306980352.0,
-            "7455": 33307351040.0,
-            "7460": 33306730496.0,
-            "7465": 33306537984.0,
-            "7470": 33307664384.0,
-            "7475": 33308037120.0,
-            "7480": 33307179008.0,
-            "7485": 33308467200.0,
-            "7490": 33307822080.0,
-            "7495": 33306638336.0,
-            "7500": 33306689536.0,
-            "7505": 33307717632.0,
-            "7510": 33306789888.0,
-            "7515": 33307518976.0,
-            "7520": 33307260928.0,
-            "7525": 33307676672.0,
-            "7530": 33306916864.0,
-            "7535": 33306996736.0,
-            "7540": 33306566656.0,
-            "7545": 33306720256.0,
-            "7550": 33307584512.0,
-            "7555": 33307471872.0,
-            "7560": 33306736640.0,
-            "7565": 33306292224.0,
-            "7570": 33307066368.0,
-            "7575": 33306871808.0,
-            "7580": 33307324416.0,
-            "7585": 33307115520.0,
-            "7590": 33306341376.0,
-            "7595": 33307744256.0,
-            "7600": 33307482112.0,
-            "7605": 33308149760.0,
-            "7610": 33307525120.0,
-            "7615": 33307656192.0,
-            "7620": 33307224064.0,
-            "7625": 33307158528.0,
-            "7630": 33307742208.0,
-            "7635": 33308012544.0,
-            "7640": 33307049984.0,
-            "7645": 33308631040.0,
-            "7650": 33307865088.0,
-            "7655": 33308229632.0,
-            "7660": 33307043840.0,
-            "7665": 33307037696.0,
-            "7670": 33306791936.0,
-            "7675": 33307320320.0,
-            "7680": 33307293696.0,
-            "7685": 33307432960.0,
-            "7690": 33307103232.0,
-            "7695": 33307568128.0,
-            "7700": 33306312704.0,
-            "7705": 33307795456.0,
-            "7710": 33307996160.0,
-            "7715": 33307133952.0,
-            "7720": 33308164096.0,
-            "7725": 33307254784.0,
-            "7730": 33307830272.0,
-            "7735": 33307721728.0,
-            "7740": 33307492352.0,
-            "7745": 33307783168.0,
-            "7750": 33306728448.0,
-            "7755": 33307734016.0,
-            "7760": 33308614656.0,
-            "7765": 33306791936.0,
-            "7770": 33308278784.0,
-            "7775": 33307873280.0,
-            "7780": 33307078656.0,
-            "7785": 33306990592.0,
-            "7790": 33307062272.0,
-            "7795": 33307680768.0,
-            "7800": 33306982400.0,
-            "7805": 33308090368.0,
-            "7810": 33307308032.0,
-            "7815": 33307078656.0,
-            "7820": 33307951104.0,
-            "7825": 33306480640.0,
-            "7830": 33307258880.0,
-            "7835": 33307891712.0,
-            "7840": 33307432960.0,
-            "7845": 33307066368.0,
-            "7850": 33306910720.0,
-            "7855": 33307938816.0,
-            "7860": 33307308032.0,
-            "7865": 33308264448.0,
-            "7870": 33307729920.0,
-            "7875": 33308129280.0,
-            "7880": 33308352512.0,
-            "7885": 33307398144.0,
-            "7890": 33306920960.0,
-            "7895": 33307156480.0,
-            "7900": 33308221440.0,
-            "7905": 33308047360.0,
-            "7910": 33306146816.0,
-            "7915": 33306910720.0,
-            "7920": 33307090944.0,
-            "7925": 33308264448.0,
-            "7930": 33307908096.0,
-            "7935": 33307465728.0,
-            "7940": 33307375616.0,
-            "7945": 33307848704.0,
-            "7950": 33308090368.0,
-            "7955": 33307043840.0,
-            "7960": 33307168768.0,
-            "7965": 33307846656.0,
-            "7970": 33306454016.0,
-            "7975": 33307635712.0,
-            "7980": 33307555840.0,
-            "7985": 33307131904.0,
-            "7990": 33306732544.0,
-            "7995": 33307430912.0,
-            "8000": 33307674624.0,
-            "8005": 33307746304.0,
-            "8010": 33308002304.0,
-            "8015": 33306906624.0,
-            "8020": 33307895808.0,
-            "8025": 33308231680.0,
-            "8030": 33307664384.0,
-            "8035": 33306888192.0,
-            "8040": 33308024832.0,
-            "8045": 33307693056.0,
-            "8050": 33306583040.0,
-            "8055": 33307201536.0,
-            "8060": 33307594752.0,
-            "8065": 33308260352.0,
-            "8070": 33307426816.0,
-            "8075": 33308108800.0,
-            "8080": 33308178432.0,
-            "8085": 33307308032.0,
-            "8090": 33306513408.0,
-            "8095": 33306968064.0,
-            "8100": 33308413952.0,
-            "8105": 33308241920.0,
-            "8110": 33307471872.0,
-            "8115": 33307832320.0,
-            "8120": 33307193344.0,
-            "8125": 33307295744.0,
-            "8130": 33306775552.0,
-            "8135": 33307097088.0,
-            "8140": 33307865088.0,
-            "8145": 33306746880.0,
-            "8150": 33307023360.0,
-            "8155": 33306806272.0,
-            "8160": 33307373568.0,
-            "8165": 33307631616.0,
-            "8170": 33306769408.0,
-            "8175": 33308239872.0,
-            "8180": 33307240448.0,
-            "8185": 33307471872.0,
-            "8190": 33308184576.0,
-            "8195": 33307754496.0,
-            "8200": 33307459584.0,
-            "8205": 33307850752.0,
-            "8210": 33306810368.0,
-            "8215": 33306222592.0,
-            "8220": 33307795456.0,
-            "8225": 33308078080.0,
-            "8230": 33306132480.0,
-            "8235": 33308764160.0,
-            "8240": 33307432960.0,
-            "8245": 33307867136.0,
-            "8250": 33308260352.0,
-            "8255": 33308334080.0,
-            "8260": 33308233728.0,
-            "8265": 33308528640.0,
-            "8270": 33307699200.0,
-            "8275": 33306748928.0,
-            "8280": 33307635712.0,
-            "8285": 33308008448.0,
-            "8290": 33307590656.0,
-            "8295": 33308041216.0,
-            "8300": 33307516928.0,
-            "8305": 33307879424.0,
-            "8310": 33307576320.0,
-            "8315": 33308366848.0,
-            "8320": 33307496448.0,
-            "8325": 33307256832.0,
-            "8330": 33307680768.0,
-            "8335": 33306669056.0,
-            "8340": 33306990592.0,
-            "8345": 33307936768.0,
-            "8350": 33307955200.0,
-            "8355": 33307791360.0,
-            "8360": 33306640384.0,
-            "8365": 33307586560.0,
-            "8370": 33307648000.0,
-            "8375": 33306890240.0,
-            "8380": 33307764736.0,
-            "8385": 33307871232.0,
-            "8390": 33307023360.0,
-            "8395": 33307664384.0,
-            "8400": 33307510784.0,
-            "8405": 33307338752.0,
-            "8410": 33307316224.0,
-            "8415": 33307566080.0,
-            "8420": 33307891712.0,
-            "8425": 33307676672.0,
-            "8430": 33307693056.0,
-            "8435": 33306812416.0,
-            "8440": 33307762688.0,
-            "8445": 33307447296.0,
-            "8450": 33307426816.0,
-            "8455": 33306660864.0,
-            "8460": 33307385856.0,
-            "8465": 33308121088.0,
-            "8470": 33307664384.0,
-            "8475": 33307023360.0,
-            "8480": 33308082176.0,
-            "8485": 33307346944.0,
-            "8490": 33307471872.0,
-            "8495": 33307889664.0,
-            "8500": 33307492352.0,
-            "8505": 33307502592.0,
-            "8510": 33307815936.0,
-            "8515": 33307983872.0,
-            "8520": 33306431488.0,
-            "8525": 33306537984.0,
-            "8530": 33307199488.0,
-            "8535": 33307848704.0,
-            "8540": 33307459584.0,
-            "8545": 33307432960.0,
-            "8550": 33307600896.0,
-            "8555": 33308553216.0,
-            "8560": 33307701248.0,
-            "8565": 33307799552.0,
-            "8570": 33307934720.0,
-            "8575": 33306324992.0,
-            "8580": 33307648000.0,
-            "8585": 33307951104.0,
-            "8590": 33308108800.0,
-            "8595": 33308037120.0,
-            "8600": 33308182528.0,
-            "8605": 33307410432.0,
-            "8610": 33308102656.0,
-            "8615": 33307342848.0,
-            "8620": 33306077184.0,
-            "8625": 33308153856.0,
-            "8630": 33307807744.0,
-            "8635": 33306734592.0,
-            "8640": 33307867136.0,
-            "8645": 33307129856.0,
-            "8650": 33307430912.0,
-            "8655": 33307545600.0,
-            "8660": 33307975680.0,
-            "8665": 33307822080.0,
-            "8670": 33307156480.0,
-            "8675": 33307758592.0,
-            "8680": 33308340224.0,
-            "8685": 33307357184.0,
-            "8690": 33308479488.0,
-            "8695": 33306523648.0,
-            "8700": 33307404288.0,
-            "8705": 33307791360.0,
-            "8710": 33308004352.0,
-            "8715": 33308108800.0,
-            "8720": 33307424768.0,
-            "8725": 33307564032.0,
-            "8730": 33306877952.0,
-            "8735": 33307199488.0,
-            "8740": 33307734016.0,
-            "8745": 33307248640.0,
-            "8750": 33307912192.0,
-            "8755": 33307215872.0,
-            "8760": 33308012544.0,
-            "8765": 33306640384.0,
-            "8770": 33307977728.0,
-            "8775": 33306624000.0,
-            "8780": 33307357184.0,
-            "8785": 33306353664.0,
-            "8790": 33307518976.0,
-            "8795": 33308178432.0,
-            "8800": 33307113472.0,
-            "8805": 33307045888.0,
-            "8810": 33307252736.0,
-            "8815": 33307430912.0,
-            "8820": 33307568128.0,
-            "8825": 33306791936.0,
-            "8830": 33307529216.0,
-            "8835": 33306691584.0,
-            "8840": 33306529792.0,
-            "8845": 33307303936.0,
-            "8850": 33307901952.0,
-            "8855": 33308196864.0,
-            "8860": 33307965440.0,
-            "8865": 33307971584.0,
-            "8870": 33306595328.0,
-            "8875": 33306419200.0,
-            "8880": 33307508736.0,
-            "8885": 33306345472.0,
-            "8890": 33307373568.0,
-            "8895": 33307631616.0,
-            "8900": 33307330560.0,
-            "8905": 33308209152.0,
-            "8910": 33308155904.0,
-            "8915": 33306943488.0,
-            "8920": 33307381760.0,
-            "8925": 33307437056.0,
-            "8930": 33308041216.0,
-            "8935": 33307142144.0,
-            "8940": 33307768832.0,
-            "8945": 33308551168.0,
-            "8950": 33307682816.0,
-            "8955": 33307656192.0,
-            "8960": 33307787264.0,
-            "8965": 33306220544.0,
-            "8970": 33307693056.0,
-            "8975": 33307529216.0,
-            "8980": 33307027456.0,
-            "8985": 33308442624.0,
-            "8990": 33307588608.0,
-            "8995": 33308315648.0,
-            "9000": 33307787264.0,
-            "9005": 33307951104.0,
-            "9010": 33305649152.0,
-            "9015": 33307592704.0,
-            "9020": 33307033600.0,
-            "9025": 33307232256.0,
-            "9030": 33307793408.0,
-            "9035": 33307385856.0,
-            "9040": 33308012544.0,
-            "9045": 33307287552.0,
-            "9050": 33307701248.0,
-            "9055": 33306814464.0,
-            "9060": 33307975680.0,
-            "9065": 33307693056.0,
-            "9070": 33306888192.0,
-            "9075": 33307168768.0,
-            "9080": 33306818560.0,
-            "9085": 33307557888.0,
-            "9090": 33308200960.0,
-            "9095": 33306867712.0,
-            "9100": 33308563456.0,
-            "9105": 33306994688.0,
-            "9110": 33307004928.0,
-            "9115": 33307439104.0,
-            "9120": 33307340800.0,
-            "9125": 33307295744.0,
-            "9130": 33306771456.0,
-            "9135": 33307031552.0,
-            "9140": 33306497024.0,
-            "9145": 33307629568.0,
-            "9150": 33308002304.0,
-            "9155": 33307484160.0,
-            "9160": 33308100608.0,
-            "9165": 33307611136.0,
-            "9170": 33307897856.0,
-            "9175": 33307473920.0,
-            "9180": 33307977728.0,
-            "9185": 33307203584.0,
-            "9190": 33306693632.0,
-            "9195": 33306931200.0,
-            "9200": 33307779072.0,
-            "9205": 33307205632.0,
-            "9210": 33307637760.0,
-            "9215": 33307090944.0,
-            "9220": 33308454912.0,
-            "9225": 33307471872.0,
-            "9230": 33307322368.0,
-            "9235": 33307422720.0,
-            "9240": 33307242496.0,
-            "9245": 33308026880.0,
-            "9250": 33308203008.0,
-            "9255": 33307389952.0,
-            "9260": 33308825600.0,
-            "9265": 33306505216.0,
-            "9270": 33307426816.0,
-            "9275": 33307865088.0,
-            "9280": 33307435008.0,
-            "9285": 33307258880.0,
-            "9290": 33308000256.0,
-            "9295": 33307498496.0,
-            "9300": 33307301888.0,
-            "9305": 33307674624.0,
-            "9310": 33307031552.0,
-            "9315": 33306327040.0,
-            "9320": 33306834944.0,
-            "9325": 33307971584.0,
-            "9330": 33307910144.0,
-            "9335": 33307213824.0,
-            "9340": 33307385856.0,
-            "9345": 33307385856.0,
-            "9350": 33308127232.0,
-            "9355": 33306615808.0,
-            "9360": 33306697728.0,
-            "9365": 33307463680.0,
-            "9370": 33306355712.0,
-            "9375": 33307219968.0,
-            "9380": 33307224064.0,
-            "9385": 33308024832.0,
-            "9390": 33307830272.0,
-            "9395": 33307535360.0,
-            "9400": 33307031552.0,
-            "9405": 33307418624.0,
-            "9410": 33306822656.0,
-            "9415": 33307267072.0,
-            "9420": 33306994688.0,
-            "9425": 33306892288.0,
-            "9430": 33307199488.0,
-            "9435": 33306980352.0,
-            "9440": 33306451968.0,
-            "9445": 33308420096.0,
-            "9450": 33306755072.0,
-            "9455": 33306341376.0,
-            "9460": 33308131328.0,
-            "9465": 33307023360.0,
-            "9470": 33308307456.0,
-            "9475": 33308221440.0,
-            "9480": 33308037120.0,
-            "9485": 33308055552.0,
-            "9490": 33307908096.0,
-            "9495": 33306486784.0,
-            "9500": 33306490880.0,
-            "9505": 33307967488.0,
-            "9510": 33307125760.0,
-            "9515": 33307242496.0,
-            "9520": 33307670528.0,
-            "9525": 33307496448.0,
-            "9530": 33307731968.0,
-            "9535": 33307435008.0
-        }
-    },
-    "mem-max-allocated-bytes": {
-        "start_step": 1,
-        "end_step": 9535,
-        "step_interval": 5,
-        "values": {
-            "1": 36905754624.0,
-            "5": 45014786048.0,
-            "10": 45173362688.0,
-            "15": 45173362688.0,
-            "20": 45251878912.0,
-            "25": 45286207488.0,
-            "30": 45286207488.0,
-            "35": 45288939520.0,
-            "40": 45288939520.0,
-            "45": 45288939520.0,
-            "50": 45288939520.0,
-            "55": 45288939520.0,
-            "60": 45288939520.0,
-            "65": 45288939520.0,
-            "70": 45288939520.0,
-            "75": 45288939520.0,
-            "80": 45288939520.0,
-            "85": 45288939520.0,
-            "90": 45288939520.0,
-            "95": 45288939520.0,
-            "100": 45288939520.0,
-            "105": 45288939520.0,
-            "110": 45299392512.0,
-            "115": 45314936832.0,
-            "120": 45378736128.0,
-            "125": 45428596736.0,
-            "130": 45428596736.0,
-            "135": 45445640192.0,
-            "140": 45445640192.0,
-            "145": 45445640192.0,
-            "150": 45445640192.0,
-            "155": 45445640192.0,
-            "160": 45445640192.0,
-            "165": 45445640192.0,
-            "170": 45445640192.0,
-            "175": 45445640192.0,
-            "180": 45445640192.0,
-            "185": 45445640192.0,
-            "190": 45445640192.0,
-            "195": 45445640192.0,
-            "200": 45536641024.0,
-            "205": 45638885376.0,
-            "210": 45638885376.0,
-            "215": 45638885376.0,
-            "220": 45638885376.0,
-            "225": 45638885376.0,
-            "230": 45638885376.0,
-            "235": 45713887232.0,
-            "240": 45932376064.0,
-            "245": 45982269440.0,
-            "250": 45982269440.0,
-            "255": 45982269440.0,
-            "260": 46039670784.0,
-            "265": 46039670784.0,
-            "270": 46039670784.0,
-            "275": 46039670784.0,
-            "280": 46293884928.0,
-            "285": 46293884928.0,
-            "290": 46293884928.0,
-            "295": 46293884928.0,
-            "300": 46293884928.0,
-            "305": 46319267840.0,
-            "310": 46319267840.0,
-            "315": 46319267840.0,
-            "320": 46319267840.0,
-            "325": 46319267840.0,
-            "330": 46319267840.0,
-            "335": 46319267840.0,
-            "340": 46319267840.0,
-            "345": 46451261440.0,
-            "350": 46451261440.0,
-            "355": 46451261440.0,
-            "360": 46451261440.0,
-            "365": 46451261440.0,
-            "370": 46451261440.0,
-            "375": 46451261440.0,
-            "380": 46451261440.0,
-            "385": 46451261440.0,
-            "390": 46451261440.0,
-            "395": 46451261440.0,
-            "400": 46451261440.0,
-            "405": 46451261440.0,
-            "410": 46451261440.0,
-            "415": 46451261440.0,
-            "420": 46451261440.0,
-            "425": 46451261440.0,
-            "430": 46451261440.0,
-            "435": 46451261440.0,
-            "440": 46451261440.0,
-            "445": 46451261440.0,
-            "450": 46451261440.0,
-            "455": 46451261440.0,
-            "460": 46451261440.0,
-            "465": 46451261440.0,
-            "470": 46451261440.0,
-            "475": 46451261440.0,
-            "480": 46451261440.0,
-            "485": 46451261440.0,
-            "490": 46451261440.0,
-            "495": 46451261440.0,
-            "500": 46451261440.0,
-            "505": 46451261440.0,
-            "510": 46451261440.0,
-            "515": 46451261440.0,
-            "520": 46451261440.0,
-            "525": 46451261440.0,
-            "530": 46451261440.0,
-            "535": 46451261440.0,
-            "540": 46451261440.0,
-            "545": 46451261440.0,
-            "550": 46451261440.0,
-            "555": 46451261440.0,
-            "560": 46451261440.0,
-            "565": 46451261440.0,
-            "570": 46451261440.0,
-            "575": 46451261440.0,
-            "580": 46451261440.0,
-            "585": 46451261440.0,
-            "590": 46451261440.0,
-            "595": 46451261440.0,
-            "600": 46451261440.0,
-            "605": 46451261440.0,
-            "610": 46451261440.0,
-            "615": 46451261440.0,
-            "620": 46451261440.0,
-            "625": 46451261440.0,
-            "630": 46451261440.0,
-            "635": 46451261440.0,
-            "640": 46451261440.0,
-            "645": 46451261440.0,
-            "650": 46451261440.0,
-            "655": 46451261440.0,
-            "660": 46451261440.0,
-            "665": 46451261440.0,
-            "670": 46451261440.0,
-            "675": 46451261440.0,
-            "680": 46451261440.0,
-            "685": 46451261440.0,
-            "690": 46451261440.0,
-            "695": 46451261440.0,
-            "700": 46451261440.0,
-            "705": 46451261440.0,
-            "710": 46451261440.0,
-            "715": 46451261440.0,
-            "720": 46451261440.0,
-            "725": 46451261440.0,
-            "730": 46451261440.0,
-            "735": 46451261440.0,
-            "740": 46451261440.0,
-            "745": 46451261440.0,
-            "750": 46451261440.0,
-            "755": 46451261440.0,
-            "760": 46451261440.0,
-            "765": 46451261440.0,
-            "770": 46451261440.0,
-            "775": 46451261440.0,
-            "780": 46451261440.0,
-            "785": 46451261440.0,
-            "790": 46451261440.0,
-            "795": 46451261440.0,
-            "800": 46451261440.0,
-            "805": 46451261440.0,
-            "810": 46451261440.0,
-            "815": 46451261440.0,
-            "820": 46451261440.0,
-            "825": 46451261440.0,
-            "830": 46451261440.0,
-            "835": 46451261440.0,
-            "840": 46451261440.0,
-            "845": 46451261440.0,
-            "850": 46451261440.0,
-            "855": 46451261440.0,
-            "860": 46451261440.0,
-            "865": 46451261440.0,
-            "870": 46451261440.0,
-            "875": 46451261440.0,
-            "880": 46451261440.0,
-            "885": 46451261440.0,
-            "890": 46451261440.0,
-            "895": 46451261440.0,
-            "900": 46451261440.0,
-            "905": 46451261440.0,
-            "910": 46451261440.0,
-            "915": 46451261440.0,
-            "920": 46451261440.0,
-            "925": 46451261440.0,
-            "930": 46451261440.0,
-            "935": 46451261440.0,
-            "940": 46451261440.0,
-            "945": 46451261440.0,
-            "950": 46451261440.0,
-            "955": 46451261440.0,
-            "960": 45564735488.0,
-            "965": 45952081920.0,
-            "970": 45952081920.0,
-            "975": 46005657600.0,
-            "980": 46005657600.0,
-            "985": 46005657600.0,
-            "990": 46005657600.0,
-            "995": 46169923584.0,
-            "1000": 46169923584.0,
-            "1005": 46169923584.0,
-            "1010": 46169923584.0,
-            "1015": 46169923584.0,
-            "1020": 46169923584.0,
-            "1025": 46169923584.0,
-            "1030": 46169923584.0,
-            "1035": 46169923584.0,
-            "1040": 46169923584.0,
-            "1045": 46169923584.0,
-            "1050": 46169923584.0,
-            "1055": 46169923584.0,
-            "1060": 46169923584.0,
-            "1065": 46169923584.0,
-            "1070": 46169923584.0,
-            "1075": 46169923584.0,
-            "1080": 46169923584.0,
-            "1085": 46169923584.0,
-            "1090": 46169923584.0,
-            "1095": 46169923584.0,
-            "1100": 46169923584.0,
-            "1105": 46169923584.0,
-            "1110": 46169923584.0,
-            "1115": 46169923584.0,
-            "1120": 46169923584.0,
-            "1125": 46169923584.0,
-            "1130": 46169923584.0,
-            "1135": 46169923584.0,
-            "1140": 46169923584.0,
-            "1145": 46169923584.0,
-            "1150": 46169923584.0,
-            "1155": 46169923584.0,
-            "1160": 46169923584.0,
-            "1165": 46169923584.0,
-            "1170": 46169923584.0,
-            "1175": 46169923584.0,
-            "1180": 46192005120.0,
-            "1185": 46192005120.0,
-            "1190": 46192005120.0,
-            "1195": 46192005120.0,
-            "1200": 46192005120.0,
-            "1205": 46192005120.0,
-            "1210": 46192005120.0,
-            "1215": 46192005120.0,
-            "1220": 46192005120.0,
-            "1225": 46192005120.0,
-            "1230": 46192005120.0,
-            "1235": 46192005120.0,
-            "1240": 46192005120.0,
-            "1245": 46192005120.0,
-            "1250": 46192005120.0,
-            "1255": 46192005120.0,
-            "1260": 46192005120.0,
-            "1265": 46192005120.0,
-            "1270": 46192005120.0,
-            "1275": 46192005120.0,
-            "1280": 46192005120.0,
-            "1285": 46192005120.0,
-            "1290": 46192005120.0,
-            "1295": 46192005120.0,
-            "1300": 46192005120.0,
-            "1305": 46192005120.0,
-            "1310": 46192005120.0,
-            "1315": 46192005120.0,
-            "1320": 46192005120.0,
-            "1325": 46192005120.0,
-            "1330": 46192005120.0,
-            "1335": 46192005120.0,
-            "1340": 46192005120.0,
-            "1345": 46192005120.0,
-            "1350": 46192005120.0,
-            "1355": 46192005120.0,
-            "1360": 46192005120.0,
-            "1365": 46192005120.0,
-            "1370": 46192005120.0,
-            "1375": 46192005120.0,
-            "1380": 46192005120.0,
-            "1385": 46192005120.0,
-            "1390": 46192005120.0,
-            "1395": 46192005120.0,
-            "1400": 46192005120.0,
-            "1405": 46192005120.0,
-            "1410": 46192005120.0,
-            "1415": 46192005120.0,
-            "1420": 46192005120.0,
-            "1425": 46192005120.0,
-            "1430": 46192005120.0,
-            "1435": 46192005120.0,
-            "1440": 46192005120.0,
-            "1445": 46192005120.0,
-            "1450": 46192005120.0,
-            "1455": 46192005120.0,
-            "1460": 46192005120.0,
-            "1465": 46192005120.0,
-            "1470": 46192005120.0,
-            "1475": 46192005120.0,
-            "1480": 46192005120.0,
-            "1485": 46192005120.0,
-            "1490": 46192005120.0,
-            "1495": 46192005120.0,
-            "1500": 46192005120.0,
-            "1505": 46192005120.0,
-            "1510": 46192005120.0,
-            "1515": 46192005120.0,
-            "1520": 46192005120.0,
-            "1525": 46192005120.0,
-            "1530": 46192005120.0,
-            "1535": 46192005120.0,
-            "1540": 46192005120.0,
-            "1545": 46192005120.0,
-            "1550": 46260322304.0,
-            "1555": 46260322304.0,
-            "1560": 46260322304.0,
-            "1565": 46260322304.0,
-            "1570": 46260322304.0,
-            "1575": 46260322304.0,
-            "1580": 46260322304.0,
-            "1585": 46260322304.0,
-            "1590": 46260322304.0,
-            "1595": 46260322304.0,
-            "1600": 46260322304.0,
-            "1605": 46260322304.0,
-            "1610": 46260322304.0,
-            "1615": 46260322304.0,
-            "1620": 46260322304.0,
-            "1625": 46260322304.0,
-            "1630": 46260322304.0,
-            "1635": 46260322304.0,
-            "1640": 46260322304.0,
-            "1645": 46260322304.0,
-            "1650": 46260322304.0,
-            "1655": 46260322304.0,
-            "1660": 46260322304.0,
-            "1665": 46260322304.0,
-            "1670": 46260322304.0,
-            "1675": 46260322304.0,
-            "1680": 46260322304.0,
-            "1685": 46260322304.0,
-            "1690": 46260322304.0,
-            "1695": 46260322304.0,
-            "1700": 46260322304.0,
-            "1705": 46260322304.0,
-            "1710": 46260322304.0,
-            "1715": 46260322304.0,
-            "1720": 46260322304.0,
-            "1725": 46260322304.0,
-            "1730": 46260322304.0,
-            "1735": 46260322304.0,
-            "1740": 46260322304.0,
-            "1745": 46260322304.0,
-            "1750": 46260322304.0,
-            "1755": 46260322304.0,
-            "1760": 46260322304.0,
-            "1765": 46260322304.0,
-            "1770": 46260322304.0,
-            "1775": 46260322304.0,
-            "1780": 46260322304.0,
-            "1785": 46260322304.0,
-            "1790": 46260322304.0,
-            "1795": 46260322304.0,
-            "1800": 46260322304.0,
-            "1805": 46260322304.0,
-            "1810": 46260322304.0,
-            "1815": 46260322304.0,
-            "1820": 46260322304.0,
-            "1825": 46260322304.0,
-            "1830": 46260322304.0,
-            "1835": 46260322304.0,
-            "1840": 46260322304.0,
-            "1845": 46260322304.0,
-            "1850": 46260322304.0,
-            "1855": 46260322304.0,
-            "1860": 46260322304.0,
-            "1865": 46260322304.0,
-            "1870": 46260322304.0,
-            "1875": 46260322304.0,
-            "1880": 46260322304.0,
-            "1885": 46260322304.0,
-            "1890": 46260322304.0,
-            "1895": 46260322304.0,
-            "1900": 46260322304.0,
-            "1905": 46260322304.0,
-            "1910": 46260322304.0,
-            "1915": 46260322304.0,
-            "1920": 46260322304.0,
-            "1925": 46260322304.0,
-            "1930": 46260322304.0,
-            "1935": 46260322304.0,
-            "1940": 46260322304.0,
-            "1945": 46260322304.0,
-            "1950": 46260322304.0,
-            "1955": 46260322304.0,
-            "1960": 46260322304.0,
-            "1965": 46260322304.0,
-            "1970": 46260322304.0,
-            "1975": 46261714944.0,
-            "1980": 46261714944.0,
-            "1985": 46261714944.0,
-            "1990": 46261714944.0,
-            "1995": 46261714944.0,
-            "2000": 46261714944.0,
-            "2005": 46261714944.0,
-            "2010": 46261714944.0,
-            "2015": 46261714944.0,
-            "2020": 46261714944.0,
-            "2025": 46261714944.0,
-            "2030": 46261714944.0,
-            "2035": 46261714944.0,
-            "2040": 46261714944.0,
-            "2045": 46261714944.0,
-            "2050": 46261714944.0,
-            "2055": 46261714944.0,
-            "2060": 46261714944.0,
-            "2065": 46261714944.0,
-            "2070": 46261714944.0,
-            "2075": 46261714944.0,
-            "2080": 46261714944.0,
-            "2085": 46261714944.0,
-            "2090": 46261714944.0,
-            "2095": 46261714944.0,
-            "2100": 46261714944.0,
-            "2105": 46261714944.0,
-            "2110": 46261714944.0,
-            "2115": 46261714944.0,
-            "2120": 46261714944.0,
-            "2125": 46261714944.0,
-            "2130": 46261714944.0,
-            "2135": 46261714944.0,
-            "2140": 46261714944.0,
-            "2145": 46261714944.0,
-            "2150": 46261714944.0,
-            "2155": 46261714944.0,
-            "2160": 46261714944.0,
-            "2165": 46261714944.0,
-            "2170": 46261714944.0,
-            "2175": 46261714944.0,
-            "2180": 46261714944.0,
-            "2185": 46261714944.0,
-            "2190": 46261714944.0,
-            "2195": 46261714944.0,
-            "2200": 46261714944.0,
-            "2205": 46261714944.0,
-            "2210": 46261714944.0,
-            "2215": 46261714944.0,
-            "2220": 46261714944.0,
-            "2225": 46261714944.0,
-            "2230": 46261714944.0,
-            "2235": 46261714944.0,
-            "2240": 46261714944.0,
-            "2245": 46261714944.0,
-            "2250": 46261714944.0,
-            "2255": 46261714944.0,
-            "2260": 46261714944.0,
-            "2265": 46261714944.0,
-            "2270": 46261714944.0,
-            "2275": 46261714944.0,
-            "2280": 46261714944.0,
-            "2285": 46261714944.0,
-            "2290": 46261714944.0,
-            "2295": 46261714944.0,
-            "2300": 46261714944.0,
-            "2305": 46261714944.0,
-            "2310": 46261714944.0,
-            "2315": 46261714944.0,
-            "2320": 46261714944.0,
-            "2325": 46261714944.0,
-            "2330": 46261714944.0,
-            "2335": 46261714944.0,
-            "2340": 46261714944.0,
-            "2345": 46261714944.0,
-            "2350": 46261714944.0,
-            "2355": 46261714944.0,
-            "2360": 46261714944.0,
-            "2365": 46261714944.0,
-            "2370": 46261714944.0,
-            "2375": 46261714944.0,
-            "2380": 46261714944.0,
-            "2385": 46261714944.0,
-            "2390": 46261714944.0,
-            "2395": 46261714944.0,
-            "2400": 46261714944.0,
-            "2405": 46261714944.0,
-            "2410": 46261714944.0,
-            "2415": 46261714944.0,
-            "2420": 46261714944.0,
-            "2425": 46261714944.0,
-            "2430": 46261714944.0,
-            "2435": 46261714944.0,
-            "2440": 46261714944.0,
-            "2445": 46261714944.0,
-            "2450": 46261714944.0,
-            "2455": 46261714944.0,
-            "2460": 46261714944.0,
-            "2465": 46261714944.0,
-            "2470": 46261714944.0,
-            "2475": 46261714944.0,
-            "2480": 46261714944.0,
-            "2485": 46261714944.0,
-            "2490": 46261714944.0,
-            "2495": 46261714944.0,
-            "2500": 46261714944.0,
-            "2505": 46261714944.0,
-            "2510": 46261714944.0,
-            "2515": 46261714944.0,
-            "2520": 46261714944.0,
-            "2525": 46261714944.0,
-            "2530": 46261714944.0,
-            "2535": 46261714944.0,
-            "2540": 46261714944.0,
-            "2545": 46261714944.0,
-            "2550": 46261714944.0,
-            "2555": 46261714944.0,
-            "2560": 46261714944.0,
-            "2565": 46261714944.0,
-            "2570": 46261714944.0,
-            "2575": 46261714944.0,
-            "2580": 46261714944.0,
-            "2585": 46261714944.0,
-            "2590": 46261714944.0,
-            "2595": 46261714944.0,
-            "2600": 46261714944.0,
-            "2605": 46261714944.0,
-            "2610": 46261714944.0,
-            "2615": 46261714944.0,
-            "2620": 46261714944.0,
-            "2625": 46261714944.0,
-            "2630": 46261714944.0,
-            "2635": 46261714944.0,
-            "2640": 46261714944.0,
-            "2645": 46261714944.0,
-            "2650": 46261714944.0,
-            "2655": 46261714944.0,
-            "2660": 46261714944.0,
-            "2665": 46261714944.0,
-            "2670": 46261714944.0,
-            "2675": 46261714944.0,
-            "2680": 46261714944.0,
-            "2685": 46261714944.0,
-            "2690": 46261714944.0,
-            "2695": 46261714944.0,
-            "2700": 46261714944.0,
-            "2705": 46261714944.0,
-            "2710": 46261714944.0,
-            "2715": 46261714944.0,
-            "2720": 46261714944.0,
-            "2725": 46261714944.0,
-            "2730": 46261714944.0,
-            "2735": 46261714944.0,
-            "2740": 46261714944.0,
-            "2745": 46261714944.0,
-            "2750": 46261714944.0,
-            "2755": 46261714944.0,
-            "2760": 46261714944.0,
-            "2765": 46261714944.0,
-            "2770": 46261714944.0,
-            "2775": 46261714944.0,
-            "2780": 46261714944.0,
-            "2785": 46261714944.0,
-            "2790": 46261714944.0,
-            "2795": 46261714944.0,
-            "2800": 46261714944.0,
-            "2805": 46261714944.0,
-            "2810": 46261714944.0,
-            "2815": 46261714944.0,
-            "2820": 46261714944.0,
-            "2825": 46261714944.0,
-            "2830": 46261714944.0,
-            "2835": 46261714944.0,
-            "2840": 46261714944.0,
-            "2845": 46261714944.0,
-            "2850": 46261714944.0,
-            "2855": 46261714944.0,
-            "2860": 46261714944.0,
-            "2865": 46261714944.0,
-            "2870": 46261714944.0,
-            "2875": 46261714944.0,
-            "2880": 46261714944.0,
-            "2885": 46261714944.0,
-            "2890": 46261714944.0,
-            "2895": 46261714944.0,
-            "2900": 46261714944.0,
-            "2905": 46261714944.0,
-            "2910": 46261714944.0,
-            "2915": 46261714944.0,
-            "2920": 46261714944.0,
-            "2925": 46261714944.0,
-            "2930": 46261714944.0,
-            "2935": 46261714944.0,
-            "2940": 46261714944.0,
-            "2945": 46261714944.0,
-            "2950": 46261714944.0,
-            "2955": 46261714944.0,
-            "2960": 46261714944.0,
-            "2965": 46261714944.0,
-            "2970": 46261714944.0,
-            "2975": 46261714944.0,
-            "2980": 46261714944.0,
-            "2985": 45706711040.0,
-            "2990": 45883699200.0,
-            "2995": 46072287232.0,
-            "3000": 46072287232.0,
-            "3005": 46072287232.0,
-            "3010": 46072287232.0,
-            "3015": 46072287232.0,
-            "3020": 46072287232.0,
-            "3025": 46072287232.0,
-            "3030": 46072287232.0,
-            "3035": 46072287232.0,
-            "3040": 46072287232.0,
-            "3045": 46072287232.0,
-            "3050": 46072287232.0,
-            "3055": 46072287232.0,
-            "3060": 46072287232.0,
-            "3065": 46072287232.0,
-            "3070": 46072287232.0,
-            "3075": 46072287232.0,
-            "3080": 46072287232.0,
-            "3085": 46072287232.0,
-            "3090": 46072287232.0,
-            "3095": 46072287232.0,
-            "3100": 46072287232.0,
-            "3105": 46072287232.0,
-            "3110": 46072287232.0,
-            "3115": 46072287232.0,
-            "3120": 46072287232.0,
-            "3125": 46072287232.0,
-            "3130": 46072287232.0,
-            "3135": 46072287232.0,
-            "3140": 46072287232.0,
-            "3145": 46072287232.0,
-            "3150": 46072287232.0,
-            "3155": 46072287232.0,
-            "3160": 46072287232.0,
-            "3165": 46072287232.0,
-            "3170": 46072287232.0,
-            "3175": 46072287232.0,
-            "3180": 46072287232.0,
-            "3185": 46072287232.0,
-            "3190": 46072287232.0,
-            "3195": 46072287232.0,
-            "3200": 46072287232.0,
-            "3205": 46072287232.0,
-            "3210": 46072287232.0,
-            "3215": 46072287232.0,
-            "3220": 46072287232.0,
-            "3225": 46072287232.0,
-            "3230": 46072287232.0,
-            "3235": 46072287232.0,
-            "3240": 46072287232.0,
-            "3245": 46072287232.0,
-            "3250": 46072287232.0,
-            "3255": 46072287232.0,
-            "3260": 46072287232.0,
-            "3265": 46072287232.0,
-            "3270": 46072287232.0,
-            "3275": 46072287232.0,
-            "3280": 46072287232.0,
-            "3285": 46072287232.0,
-            "3290": 46072287232.0,
-            "3295": 46072287232.0,
-            "3300": 46072287232.0,
-            "3305": 46072287232.0,
-            "3310": 46072287232.0,
-            "3315": 46072287232.0,
-            "3320": 46072287232.0,
-            "3325": 46072287232.0,
-            "3330": 46072287232.0,
-            "3335": 46072287232.0,
-            "3340": 46072287232.0,
-            "3345": 46072287232.0,
-            "3350": 46072287232.0,
-            "3355": 46072287232.0,
-            "3360": 46072287232.0,
-            "3365": 46072287232.0,
-            "3370": 46072287232.0,
-            "3375": 46072287232.0,
-            "3380": 46072287232.0,
-            "3385": 46072287232.0,
-            "3390": 46072287232.0,
-            "3395": 46072287232.0,
-            "3400": 46072287232.0,
-            "3405": 46072287232.0,
-            "3410": 46072287232.0,
-            "3415": 46072287232.0,
-            "3420": 46072287232.0,
-            "3425": 46072672256.0,
-            "3430": 46072672256.0,
-            "3435": 46072672256.0,
-            "3440": 46072672256.0,
-            "3445": 46072672256.0,
-            "3450": 46072672256.0,
-            "3455": 46072672256.0,
-            "3460": 46072672256.0,
-            "3465": 46072672256.0,
-            "3470": 46072672256.0,
-            "3475": 46072672256.0,
-            "3480": 46072672256.0,
-            "3485": 46095564800.0,
-            "3490": 46095564800.0,
-            "3495": 46095564800.0,
-            "3500": 46095564800.0,
-            "3505": 46095564800.0,
-            "3510": 46095564800.0,
-            "3515": 46095564800.0,
-            "3520": 46095564800.0,
-            "3525": 46095564800.0,
-            "3530": 46095564800.0,
-            "3535": 46095564800.0,
-            "3540": 46095564800.0,
-            "3545": 46095564800.0,
-            "3550": 46191697920.0,
-            "3555": 46191697920.0,
-            "3560": 46191697920.0,
-            "3565": 46191697920.0,
-            "3570": 46191697920.0,
-            "3575": 46191697920.0,
-            "3580": 46191697920.0,
-            "3585": 46191697920.0,
-            "3590": 46191697920.0,
-            "3595": 46191697920.0,
-            "3600": 46191697920.0,
-            "3605": 46191697920.0,
-            "3610": 46191697920.0,
-            "3615": 46191697920.0,
-            "3620": 46191697920.0,
-            "3625": 46191697920.0,
-            "3630": 46191697920.0,
-            "3635": 46191697920.0,
-            "3640": 46191697920.0,
-            "3645": 46191697920.0,
-            "3650": 46191697920.0,
-            "3655": 46191697920.0,
-            "3660": 46191697920.0,
-            "3665": 46191697920.0,
-            "3670": 46191697920.0,
-            "3675": 46191697920.0,
-            "3680": 46191697920.0,
-            "3685": 46191697920.0,
-            "3690": 46191697920.0,
-            "3695": 46191697920.0,
-            "3700": 46191697920.0,
-            "3705": 46191697920.0,
-            "3710": 46191697920.0,
-            "3715": 46191697920.0,
-            "3720": 46191697920.0,
-            "3725": 46191697920.0,
-            "3730": 46191697920.0,
-            "3735": 46191697920.0,
-            "3740": 46191697920.0,
-            "3745": 46191697920.0,
-            "3750": 46191697920.0,
-            "3755": 46191697920.0,
-            "3760": 46191697920.0,
-            "3765": 46191697920.0,
-            "3770": 46191697920.0,
-            "3775": 46191697920.0,
-            "3780": 46191697920.0,
-            "3785": 46191697920.0,
-            "3790": 46191697920.0,
-            "3795": 46191697920.0,
-            "3800": 46191697920.0,
-            "3805": 46191697920.0,
-            "3810": 46191697920.0,
-            "3815": 46191697920.0,
-            "3820": 46191697920.0,
-            "3825": 46191697920.0,
-            "3830": 46191697920.0,
-            "3835": 46191697920.0,
-            "3840": 46191697920.0,
-            "3845": 46191697920.0,
-            "3850": 46191697920.0,
-            "3855": 46191697920.0,
-            "3860": 46191697920.0,
-            "3865": 46191697920.0,
-            "3870": 46191697920.0,
-            "3875": 46191697920.0,
-            "3880": 46191697920.0,
-            "3885": 46191697920.0,
-            "3890": 46191697920.0,
-            "3895": 46191697920.0,
-            "3900": 46191697920.0,
-            "3905": 46191697920.0,
-            "3910": 46191697920.0,
-            "3915": 46191697920.0,
-            "3920": 46191697920.0,
-            "3925": 46191697920.0,
-            "3930": 46191697920.0,
-            "3935": 46191697920.0,
-            "3940": 46191697920.0,
-            "3945": 46191697920.0,
-            "3950": 46191697920.0,
-            "3955": 46191697920.0,
-            "3960": 46191697920.0,
-            "3965": 46191697920.0,
-            "3970": 46191697920.0,
-            "3975": 46191697920.0,
-            "3980": 46191697920.0,
-            "3985": 46191697920.0,
-            "3990": 46191697920.0,
-            "3995": 46191697920.0,
-            "4000": 45840449536.0,
-            "4005": 45869191168.0,
-            "4010": 45897973760.0,
-            "4015": 45897973760.0,
-            "4020": 45940301824.0,
-            "4025": 45940301824.0,
-            "4030": 45940301824.0,
-            "4035": 45940301824.0,
-            "4040": 45940301824.0,
-            "4045": 45940301824.0,
-            "4050": 45940301824.0,
-            "4055": 45940301824.0,
-            "4060": 45940301824.0,
-            "4065": 45940301824.0,
-            "4070": 45940301824.0,
-            "4075": 45940301824.0,
-            "4080": 45940301824.0,
-            "4085": 46009651200.0,
-            "4090": 46009651200.0,
-            "4095": 46009651200.0,
-            "4100": 46009651200.0,
-            "4105": 46009651200.0,
-            "4110": 46009651200.0,
-            "4115": 46009651200.0,
-            "4120": 46009651200.0,
-            "4125": 46009651200.0,
-            "4130": 46009651200.0,
-            "4135": 46009651200.0,
-            "4140": 46009651200.0,
-            "4145": 46009651200.0,
-            "4150": 46009651200.0,
-            "4155": 46009651200.0,
-            "4160": 46009651200.0,
-            "4165": 46009651200.0,
-            "4170": 46009651200.0,
-            "4175": 46009651200.0,
-            "4180": 46009651200.0,
-            "4185": 46009651200.0,
-            "4190": 46009651200.0,
-            "4195": 46009651200.0,
-            "4200": 46009651200.0,
-            "4205": 46009651200.0,
-            "4210": 46009651200.0,
-            "4215": 46009651200.0,
-            "4220": 46009651200.0,
-            "4225": 46064635904.0,
-            "4230": 46064635904.0,
-            "4235": 46064635904.0,
-            "4240": 46064635904.0,
-            "4245": 46064635904.0,
-            "4250": 46064635904.0,
-            "4255": 46064635904.0,
-            "4260": 46064635904.0,
-            "4265": 46064635904.0,
-            "4270": 46064635904.0,
-            "4275": 46064635904.0,
-            "4280": 46064635904.0,
-            "4285": 46064635904.0,
-            "4290": 46064635904.0,
-            "4295": 46064635904.0,
-            "4300": 46064635904.0,
-            "4305": 46064635904.0,
-            "4310": 46064635904.0,
-            "4315": 46064635904.0,
-            "4320": 46064635904.0,
-            "4325": 46064635904.0,
-            "4330": 46064635904.0,
-            "4335": 46064635904.0,
-            "4340": 46064635904.0,
-            "4345": 46064635904.0,
-            "4350": 46064635904.0,
-            "4355": 46064635904.0,
-            "4360": 46064635904.0,
-            "4365": 46064635904.0,
-            "4370": 46064635904.0,
-            "4375": 46064635904.0,
-            "4380": 46064635904.0,
-            "4385": 46064635904.0,
-            "4390": 46064635904.0,
-            "4395": 46064635904.0,
-            "4400": 46064635904.0,
-            "4405": 46064635904.0,
-            "4410": 46064635904.0,
-            "4415": 46064635904.0,
-            "4420": 46064635904.0,
-            "4425": 46064635904.0,
-            "4430": 46064635904.0,
-            "4435": 46064635904.0,
-            "4440": 46064635904.0,
-            "4445": 46064635904.0,
-            "4450": 46064635904.0,
-            "4455": 46064635904.0,
-            "4460": 46080573440.0,
-            "4465": 46080573440.0,
-            "4470": 46080573440.0,
-            "4475": 46080573440.0,
-            "4480": 46080573440.0,
-            "4485": 46080573440.0,
-            "4490": 46080573440.0,
-            "4495": 46080573440.0,
-            "4500": 46080573440.0,
-            "4505": 46080573440.0,
-            "4510": 46080573440.0,
-            "4515": 46080573440.0,
-            "4520": 46080573440.0,
-            "4525": 46080573440.0,
-            "4530": 46080573440.0,
-            "4535": 46080573440.0,
-            "4540": 46080573440.0,
-            "4545": 46080573440.0,
-            "4550": 46080573440.0,
-            "4555": 46080573440.0,
-            "4560": 46080573440.0,
-            "4565": 46080573440.0,
-            "4570": 46080573440.0,
-            "4575": 46080573440.0,
-            "4580": 46080573440.0,
-            "4585": 46080573440.0,
-            "4590": 46080573440.0,
-            "4595": 46080573440.0,
-            "4600": 46080573440.0,
-            "4605": 46080573440.0,
-            "4610": 46080573440.0,
-            "4615": 46343888896.0,
-            "4620": 46343888896.0,
-            "4625": 46343888896.0,
-            "4630": 46343888896.0,
-            "4635": 46343888896.0,
-            "4640": 46343888896.0,
-            "4645": 46343888896.0,
-            "4650": 46343888896.0,
-            "4655": 46343888896.0,
-            "4660": 46343888896.0,
-            "4665": 46343888896.0,
-            "4670": 46343888896.0,
-            "4675": 46343888896.0,
-            "4680": 46343888896.0,
-            "4685": 46343888896.0,
-            "4690": 46343888896.0,
-            "4695": 46343888896.0,
-            "4700": 46343888896.0,
-            "4705": 46343888896.0,
-            "4710": 46343888896.0,
-            "4715": 46343888896.0,
-            "4720": 46343888896.0,
-            "4725": 46343888896.0,
-            "4730": 46343888896.0,
-            "4735": 46343888896.0,
-            "4740": 46343888896.0,
-            "4745": 46343888896.0,
-            "4750": 46343888896.0,
-            "4755": 46343888896.0,
-            "4760": 46343888896.0,
-            "4765": 46343888896.0,
-            "4770": 46343888896.0,
-            "4775": 46343888896.0,
-            "4780": 46343888896.0,
-            "4785": 46343888896.0,
-            "4790": 46343888896.0,
-            "4795": 46343888896.0,
-            "4800": 46343888896.0,
-            "4805": 46343888896.0,
-            "4810": 46343888896.0,
-            "4815": 46343888896.0,
-            "4820": 46343888896.0,
-            "4825": 46343888896.0,
-            "4830": 46343888896.0,
-            "4835": 46343888896.0,
-            "4840": 46343888896.0,
-            "4845": 46343888896.0,
-            "4850": 46343888896.0,
-            "4855": 46343888896.0,
-            "4860": 46343888896.0,
-            "4865": 46343888896.0,
-            "4870": 46343888896.0,
-            "4875": 46343888896.0,
-            "4880": 46343888896.0,
-            "4885": 46343888896.0,
-            "4890": 46343888896.0,
-            "4895": 46343888896.0,
-            "4900": 46343888896.0,
-            "4905": 46343888896.0,
-            "4910": 46343888896.0,
-            "4915": 46343888896.0,
-            "4920": 46343888896.0,
-            "4925": 46343888896.0,
-            "4930": 46343888896.0,
-            "4935": 46343888896.0,
-            "4940": 46343888896.0,
-            "4945": 46343888896.0,
-            "4950": 46343888896.0,
-            "4955": 46343888896.0,
-            "4960": 46343888896.0,
-            "4965": 46343888896.0,
-            "4970": 46343888896.0,
-            "4975": 46343888896.0,
-            "4980": 46343888896.0,
-            "4985": 46343888896.0,
-            "4990": 46343888896.0,
-            "4995": 46343888896.0,
-            "5000": 46343888896.0,
-            "5005": 46199529472.0,
-            "5010": 46199529472.0,
-            "5015": 45764182016.0,
-            "5020": 45878784000.0,
-            "5025": 45878784000.0,
-            "5030": 45878784000.0,
-            "5035": 45878784000.0,
-            "5040": 45992685568.0,
-            "5045": 45992685568.0,
-            "5050": 45992685568.0,
-            "5055": 45992685568.0,
-            "5060": 45992685568.0,
-            "5065": 45992685568.0,
-            "5070": 45992685568.0,
-            "5075": 45992685568.0,
-            "5080": 45992685568.0,
-            "5085": 45992685568.0,
-            "5090": 45992685568.0,
-            "5095": 46014451712.0,
-            "5100": 46014451712.0,
-            "5105": 46014451712.0,
-            "5110": 46014451712.0,
-            "5115": 46014451712.0,
-            "5120": 46014451712.0,
-            "5125": 46014451712.0,
-            "5130": 46014451712.0,
-            "5135": 46014451712.0,
-            "5140": 46014451712.0,
-            "5145": 46014451712.0,
-            "5150": 46014451712.0,
-            "5155": 46014451712.0,
-            "5160": 46014451712.0,
-            "5165": 46014451712.0,
-            "5170": 46014451712.0,
-            "5175": 46014451712.0,
-            "5180": 46014451712.0,
-            "5185": 46014451712.0,
-            "5190": 46014451712.0,
-            "5195": 46014451712.0,
-            "5200": 46139572224.0,
-            "5205": 46139572224.0,
-            "5210": 46139572224.0,
-            "5215": 46139572224.0,
-            "5220": 46168403968.0,
-            "5225": 46168403968.0,
-            "5230": 46168403968.0,
-            "5235": 46168403968.0,
-            "5240": 46168403968.0,
-            "5245": 46168403968.0,
-            "5250": 46168403968.0,
-            "5255": 46168403968.0,
-            "5260": 46168403968.0,
-            "5265": 46168403968.0,
-            "5270": 46168403968.0,
-            "5275": 46168403968.0,
-            "5280": 46168403968.0,
-            "5285": 46168403968.0,
-            "5290": 46168403968.0,
-            "5295": 46168403968.0,
-            "5300": 46168403968.0,
-            "5305": 46168403968.0,
-            "5310": 46168403968.0,
-            "5315": 46168403968.0,
-            "5320": 46168403968.0,
-            "5325": 46168403968.0,
-            "5330": 46168403968.0,
-            "5335": 46168403968.0,
-            "5340": 46168403968.0,
-            "5345": 46168403968.0,
-            "5350": 46168403968.0,
-            "5355": 46168403968.0,
-            "5360": 46168403968.0,
-            "5365": 46168403968.0,
-            "5370": 46168403968.0,
-            "5375": 46168403968.0,
-            "5380": 46168403968.0,
-            "5385": 46168403968.0,
-            "5390": 46168403968.0,
-            "5395": 46168403968.0,
-            "5400": 46168403968.0,
-            "5405": 46168403968.0,
-            "5410": 46168403968.0,
-            "5415": 46168403968.0,
-            "5420": 46168403968.0,
-            "5425": 46168403968.0,
-            "5430": 46168403968.0,
-            "5435": 46168403968.0,
-            "5440": 46168403968.0,
-            "5445": 46168403968.0,
-            "5450": 46168403968.0,
-            "5455": 46168403968.0,
-            "5460": 46168403968.0,
-            "5465": 46168403968.0,
-            "5470": 46168403968.0,
-            "5475": 46168403968.0,
-            "5480": 46168403968.0,
-            "5485": 46168403968.0,
-            "5490": 46168403968.0,
-            "5495": 46168403968.0,
-            "5500": 46168403968.0,
-            "5505": 46168403968.0,
-            "5510": 46168403968.0,
-            "5515": 46168403968.0,
-            "5520": 46168403968.0,
-            "5525": 46168403968.0,
-            "5530": 46168403968.0,
-            "5535": 46168403968.0,
-            "5540": 46168403968.0,
-            "5545": 46168403968.0,
-            "5550": 46168403968.0,
-            "5555": 46168403968.0,
-            "5560": 46168403968.0,
-            "5565": 46168403968.0,
-            "5570": 46168403968.0,
-            "5575": 46168403968.0,
-            "5580": 46168403968.0,
-            "5585": 46168403968.0,
-            "5590": 46168403968.0,
-            "5595": 46168403968.0,
-            "5600": 46168403968.0,
-            "5605": 46226247680.0,
-            "5610": 46226247680.0,
-            "5615": 46226247680.0,
-            "5620": 46226247680.0,
-            "5625": 46226247680.0,
-            "5630": 46226247680.0,
-            "5635": 46226247680.0,
-            "5640": 46226247680.0,
-            "5645": 46226247680.0,
-            "5650": 46226247680.0,
-            "5655": 46226247680.0,
-            "5660": 46226247680.0,
-            "5665": 46226247680.0,
-            "5670": 46226247680.0,
-            "5675": 46226247680.0,
-            "5680": 46226247680.0,
-            "5685": 46226247680.0,
-            "5690": 46226247680.0,
-            "5695": 46226247680.0,
-            "5700": 46226247680.0,
-            "5705": 46226247680.0,
-            "5710": 46226247680.0,
-            "5715": 46226247680.0,
-            "5720": 46226247680.0,
-            "5725": 46226247680.0,
-            "5730": 46226247680.0,
-            "5735": 46226247680.0,
-            "5740": 46226247680.0,
-            "5745": 46226247680.0,
-            "5750": 46226247680.0,
-            "5755": 46226247680.0,
-            "5760": 46226247680.0,
-            "5765": 46226247680.0,
-            "5770": 46226247680.0,
-            "5775": 46226247680.0,
-            "5780": 46226247680.0,
-            "5785": 46226247680.0,
-            "5790": 46226247680.0,
-            "5795": 46226247680.0,
-            "5800": 46226247680.0,
-            "5805": 46226247680.0,
-            "5810": 46226247680.0,
-            "5815": 46226247680.0,
-            "5820": 46226247680.0,
-            "5825": 46226247680.0,
-            "5830": 46226247680.0,
-            "5835": 46226247680.0,
-            "5840": 46226247680.0,
-            "5845": 46226247680.0,
-            "5850": 46226247680.0,
-            "5855": 46226247680.0,
-            "5860": 46226247680.0,
-            "5865": 46226247680.0,
-            "5870": 46226247680.0,
-            "5875": 46226247680.0,
-            "5880": 46226247680.0,
-            "5885": 46226247680.0,
-            "5890": 46226247680.0,
-            "5895": 46226247680.0,
-            "5900": 46226247680.0,
-            "5905": 46226247680.0,
-            "5910": 46226247680.0,
-            "5915": 46226247680.0,
-            "5920": 46226247680.0,
-            "5925": 46226247680.0,
-            "5930": 46226247680.0,
-            "5935": 46226247680.0,
-            "5940": 46226247680.0,
-            "5945": 46226247680.0,
-            "5950": 46226247680.0,
-            "5955": 46226247680.0,
-            "5960": 46226247680.0,
-            "5965": 46226247680.0,
-            "5970": 46226247680.0,
-            "5975": 46226247680.0,
-            "5980": 46226247680.0,
-            "5985": 46226247680.0,
-            "5990": 46226247680.0,
-            "5995": 46226247680.0,
-            "6000": 46226247680.0,
-            "6005": 46226247680.0,
-            "6010": 46226247680.0,
-            "6015": 46226247680.0,
-            "6020": 46226247680.0,
-            "6025": 46226247680.0,
-            "6030": 45912186880.0,
-            "6035": 45912186880.0,
-            "6040": 45995683840.0,
-            "6045": 45995683840.0,
-            "6050": 45995683840.0,
-            "6055": 45995683840.0,
-            "6060": 45995683840.0,
-            "6065": 45995683840.0,
-            "6070": 45995683840.0,
-            "6075": 46014836736.0,
-            "6080": 46014836736.0,
-            "6085": 46014836736.0,
-            "6090": 46014836736.0,
-            "6095": 46014836736.0,
-            "6100": 46014836736.0,
-            "6105": 46014836736.0,
-            "6110": 46014836736.0,
-            "6115": 46014836736.0,
-            "6120": 46014836736.0,
-            "6125": 46014836736.0,
-            "6130": 46014836736.0,
-            "6135": 46014836736.0,
-            "6140": 46014836736.0,
-            "6145": 46014836736.0,
-            "6150": 46014836736.0,
-            "6155": 46014836736.0,
-            "6160": 46014836736.0,
-            "6165": 46025334784.0,
-            "6170": 46025334784.0,
-            "6175": 46025334784.0,
-            "6180": 46025334784.0,
-            "6185": 46035255296.0,
-            "6190": 46035255296.0,
-            "6195": 46035255296.0,
-            "6200": 46035255296.0,
-            "6205": 46035255296.0,
-            "6210": 46035255296.0,
-            "6215": 46035255296.0,
-            "6220": 46035255296.0,
-            "6225": 46035255296.0,
-            "6230": 46035255296.0,
-            "6235": 46035255296.0,
-            "6240": 46035255296.0,
-            "6245": 46035255296.0,
-            "6250": 46035255296.0,
-            "6255": 46035255296.0,
-            "6260": 46035255296.0,
-            "6265": 46035255296.0,
-            "6270": 46035255296.0,
-            "6275": 46035255296.0,
-            "6280": 46035255296.0,
-            "6285": 46035255296.0,
-            "6290": 46035255296.0,
-            "6295": 46035255296.0,
-            "6300": 46035255296.0,
-            "6305": 46035255296.0,
-            "6310": 46035255296.0,
-            "6315": 46035255296.0,
-            "6320": 46035255296.0,
-            "6325": 46035255296.0,
-            "6330": 46035255296.0,
-            "6335": 46035255296.0,
-            "6340": 46035255296.0,
-            "6345": 46035255296.0,
-            "6350": 46035255296.0,
-            "6355": 46035255296.0,
-            "6360": 46035255296.0,
-            "6365": 46035255296.0,
-            "6370": 46035255296.0,
-            "6375": 46035255296.0,
-            "6380": 46035255296.0,
-            "6385": 46035255296.0,
-            "6390": 46035255296.0,
-            "6395": 46035255296.0,
-            "6400": 46035255296.0,
-            "6405": 46035255296.0,
-            "6410": 46035255296.0,
-            "6415": 46035255296.0,
-            "6420": 46035255296.0,
-            "6425": 46035255296.0,
-            "6430": 46035255296.0,
-            "6435": 46035255296.0,
-            "6440": 46035255296.0,
-            "6445": 46035255296.0,
-            "6450": 46035255296.0,
-            "6455": 46035255296.0,
-            "6460": 46035255296.0,
-            "6465": 46035255296.0,
-            "6470": 46035255296.0,
-            "6475": 46035255296.0,
-            "6480": 46035255296.0,
-            "6485": 46035255296.0,
-            "6490": 46035255296.0,
-            "6495": 46035255296.0,
-            "6500": 46035255296.0,
-            "6505": 46064041984.0,
-            "6510": 46064041984.0,
-            "6515": 46064041984.0,
-            "6520": 46064041984.0,
-            "6525": 46064041984.0,
-            "6530": 46064041984.0,
-            "6535": 46064041984.0,
-            "6540": 46064041984.0,
-            "6545": 46064041984.0,
-            "6550": 46064041984.0,
-            "6555": 46064041984.0,
-            "6560": 46064041984.0,
-            "6565": 46064041984.0,
-            "6570": 46064041984.0,
-            "6575": 46064041984.0,
-            "6580": 46064041984.0,
-            "6585": 46064041984.0,
-            "6590": 46064041984.0,
-            "6595": 46064041984.0,
-            "6600": 46064041984.0,
-            "6605": 46064041984.0,
-            "6610": 46064041984.0,
-            "6615": 46064041984.0,
-            "6620": 46064041984.0,
-            "6625": 46064041984.0,
-            "6630": 46064041984.0,
-            "6635": 46064041984.0,
-            "6640": 46064041984.0,
-            "6645": 46064041984.0,
-            "6650": 46064041984.0,
-            "6655": 46064041984.0,
-            "6660": 46064041984.0,
-            "6665": 46064041984.0,
-            "6670": 46064041984.0,
-            "6675": 46064041984.0,
-            "6680": 46064041984.0,
-            "6685": 46064041984.0,
-            "6690": 46064041984.0,
-            "6695": 46064041984.0,
-            "6700": 46064041984.0,
-            "6705": 46064041984.0,
-            "6710": 46064041984.0,
-            "6715": 46064041984.0,
-            "6720": 46064041984.0,
-            "6725": 46064041984.0,
-            "6730": 46064041984.0,
-            "6735": 46064041984.0,
-            "6740": 46064041984.0,
-            "6745": 46064041984.0,
-            "6750": 46064041984.0,
-            "6755": 46064041984.0,
-            "6760": 46064041984.0,
-            "6765": 46064041984.0,
-            "6770": 46064041984.0,
-            "6775": 46064041984.0,
-            "6780": 46064041984.0,
-            "6785": 46064041984.0,
-            "6790": 46064041984.0,
-            "6795": 46064041984.0,
-            "6800": 46064041984.0,
-            "6805": 46064041984.0,
-            "6810": 46064041984.0,
-            "6815": 46064041984.0,
-            "6820": 46064041984.0,
-            "6825": 46064041984.0,
-            "6830": 46064041984.0,
-            "6835": 46064041984.0,
-            "6840": 46064041984.0,
-            "6845": 46064041984.0,
-            "6850": 46064041984.0,
-            "6855": 46064041984.0,
-            "6860": 46064041984.0,
-            "6865": 46064041984.0,
-            "6870": 46064041984.0,
-            "6875": 46064041984.0,
-            "6880": 46064041984.0,
-            "6885": 46064041984.0,
-            "6890": 46064041984.0,
-            "6895": 46064041984.0,
-            "6900": 46064041984.0,
-            "6905": 46064041984.0,
-            "6910": 46064041984.0,
-            "6915": 46064041984.0,
-            "6920": 46064041984.0,
-            "6925": 46064041984.0,
-            "6930": 46064041984.0,
-            "6935": 46064041984.0,
-            "6940": 46064041984.0,
-            "6945": 46064041984.0,
-            "6950": 46064041984.0,
-            "6955": 46064041984.0,
-            "6960": 46064041984.0,
-            "6965": 46064041984.0,
-            "6970": 46064041984.0,
-            "6975": 46064041984.0,
-            "6980": 46064041984.0,
-            "6985": 46064041984.0,
-            "6990": 46064041984.0,
-            "6995": 46064041984.0,
-            "7000": 46064041984.0,
-            "7005": 46064041984.0,
-            "7010": 46064041984.0,
-            "7015": 46064041984.0,
-            "7020": 46064041984.0,
-            "7025": 46064041984.0,
-            "7030": 46108979200.0,
-            "7035": 46108979200.0,
-            "7040": 46108979200.0,
-            "7045": 46108979200.0,
-            "7050": 46065532928.0,
-            "7055": 46065532928.0,
-            "7060": 46065532928.0,
-            "7065": 46065532928.0,
-            "7070": 46065532928.0,
-            "7075": 46065532928.0,
-            "7080": 46065532928.0,
-            "7085": 46065532928.0,
-            "7090": 46065532928.0,
-            "7095": 46065532928.0,
-            "7100": 46065532928.0,
-            "7105": 46065532928.0,
-            "7110": 46065532928.0,
-            "7115": 46065532928.0,
-            "7120": 46065532928.0,
-            "7125": 46065532928.0,
-            "7130": 46065532928.0,
-            "7135": 46065532928.0,
-            "7140": 46065532928.0,
-            "7145": 46065532928.0,
-            "7150": 46065532928.0,
-            "7155": 46065532928.0,
-            "7160": 46065532928.0,
-            "7165": 46065532928.0,
-            "7170": 46065532928.0,
-            "7175": 46065532928.0,
-            "7180": 46065532928.0,
-            "7185": 46065532928.0,
-            "7190": 46065532928.0,
-            "7195": 46065532928.0,
-            "7200": 46065532928.0,
-            "7205": 46065532928.0,
-            "7210": 46065532928.0,
-            "7215": 46065532928.0,
-            "7220": 46065532928.0,
-            "7225": 46065532928.0,
-            "7230": 46065532928.0,
-            "7235": 46065532928.0,
-            "7240": 46065532928.0,
-            "7245": 46065532928.0,
-            "7250": 46065532928.0,
-            "7255": 46065532928.0,
-            "7260": 46065532928.0,
-            "7265": 46065532928.0,
-            "7270": 46065532928.0,
-            "7275": 46065532928.0,
-            "7280": 46065532928.0,
-            "7285": 46065532928.0,
-            "7290": 46065532928.0,
-            "7295": 46065532928.0,
-            "7300": 46065532928.0,
-            "7305": 46065532928.0,
-            "7310": 46065532928.0,
-            "7315": 46065532928.0,
-            "7320": 46065532928.0,
-            "7325": 46065532928.0,
-            "7330": 46065532928.0,
-            "7335": 46065532928.0,
-            "7340": 46065532928.0,
-            "7345": 46065532928.0,
-            "7350": 46065532928.0,
-            "7355": 46065532928.0,
-            "7360": 46065532928.0,
-            "7365": 46065532928.0,
-            "7370": 46065532928.0,
-            "7375": 46065532928.0,
-            "7380": 46065532928.0,
-            "7385": 46065532928.0,
-            "7390": 46065532928.0,
-            "7395": 46065532928.0,
-            "7400": 46065532928.0,
-            "7405": 46065532928.0,
-            "7410": 46065532928.0,
-            "7415": 46065532928.0,
-            "7420": 46065532928.0,
-            "7425": 46065532928.0,
-            "7430": 46065532928.0,
-            "7435": 46065532928.0,
-            "7440": 46065532928.0,
-            "7445": 46065532928.0,
-            "7450": 46065532928.0,
-            "7455": 46065532928.0,
-            "7460": 46065532928.0,
-            "7465": 46065532928.0,
-            "7470": 46065532928.0,
-            "7475": 46065532928.0,
-            "7480": 46065532928.0,
-            "7485": 46065532928.0,
-            "7490": 46065532928.0,
-            "7495": 46065532928.0,
-            "7500": 46065532928.0,
-            "7505": 46065532928.0,
-            "7510": 46065532928.0,
-            "7515": 46065532928.0,
-            "7520": 45618061312.0,
-            "7525": 45747933184.0,
-            "7530": 45825024000.0,
-            "7535": 45825024000.0,
-            "7540": 45825024000.0,
-            "7545": 45910597632.0,
-            "7550": 45910597632.0,
-            "7555": 45910597632.0,
-            "7560": 45910597632.0,
-            "7565": 45910597632.0,
-            "7570": 45910597632.0,
-            "7575": 45910597632.0,
-            "7580": 45910597632.0,
-            "7585": 45910597632.0,
-            "7590": 45910597632.0,
-            "7595": 45916950528.0,
-            "7600": 45924253696.0,
-            "7605": 45924253696.0,
-            "7610": 45924253696.0,
-            "7615": 45924253696.0,
-            "7620": 45924253696.0,
-            "7625": 45924253696.0,
-            "7630": 45924253696.0,
-            "7635": 45924253696.0,
-            "7640": 45924253696.0,
-            "7645": 45944950784.0,
-            "7650": 45944950784.0,
-            "7655": 45944950784.0,
-            "7660": 45944950784.0,
-            "7665": 45944950784.0,
-            "7670": 45944950784.0,
-            "7675": 45944950784.0,
-            "7680": 45944950784.0,
-            "7685": 45944950784.0,
-            "7690": 45944950784.0,
-            "7695": 45944950784.0,
-            "7700": 45944950784.0,
-            "7705": 45944950784.0,
-            "7710": 45944950784.0,
-            "7715": 45944950784.0,
-            "7720": 45944950784.0,
-            "7725": 45944950784.0,
-            "7730": 45944950784.0,
-            "7735": 45944950784.0,
-            "7740": 45944950784.0,
-            "7745": 45944950784.0,
-            "7750": 45944950784.0,
-            "7755": 45944950784.0,
-            "7760": 45944950784.0,
-            "7765": 45944950784.0,
-            "7770": 45944950784.0,
-            "7775": 45944950784.0,
-            "7780": 45944950784.0,
-            "7785": 45944950784.0,
-            "7790": 45944950784.0,
-            "7795": 45944950784.0,
-            "7800": 45944950784.0,
-            "7805": 45944950784.0,
-            "7810": 45944950784.0,
-            "7815": 45944950784.0,
-            "7820": 45944950784.0,
-            "7825": 45944950784.0,
-            "7830": 45944950784.0,
-            "7835": 45944950784.0,
-            "7840": 45973135360.0,
-            "7845": 45973135360.0,
-            "7850": 46089904128.0,
-            "7855": 46089904128.0,
-            "7860": 46089904128.0,
-            "7865": 46089904128.0,
-            "7870": 46089904128.0,
-            "7875": 46089904128.0,
-            "7880": 46089904128.0,
-            "7885": 46089904128.0,
-            "7890": 46089904128.0,
-            "7895": 46089904128.0,
-            "7900": 46089904128.0,
-            "7905": 46089904128.0,
-            "7910": 46089904128.0,
-            "7915": 46089904128.0,
-            "7920": 46089904128.0,
-            "7925": 46089904128.0,
-            "7930": 46089904128.0,
-            "7935": 46089904128.0,
-            "7940": 46089904128.0,
-            "7945": 46089904128.0,
-            "7950": 46089904128.0,
-            "7955": 46089904128.0,
-            "7960": 46089904128.0,
-            "7965": 46089904128.0,
-            "7970": 46089904128.0,
-            "7975": 46089904128.0,
-            "7980": 46089904128.0,
-            "7985": 46089904128.0,
-            "7990": 46089904128.0,
-            "7995": 46089904128.0,
-            "8000": 46089904128.0,
-            "8005": 46089904128.0,
-            "8010": 46089904128.0,
-            "8015": 46089904128.0,
-            "8020": 46089904128.0,
-            "8025": 46089904128.0,
-            "8030": 46089904128.0,
-            "8035": 46089904128.0,
-            "8040": 46089904128.0,
-            "8045": 46089904128.0,
-            "8050": 46089904128.0,
-            "8055": 46089904128.0,
-            "8060": 46089904128.0,
-            "8065": 46089904128.0,
-            "8070": 46089904128.0,
-            "8075": 46089904128.0,
-            "8080": 46089904128.0,
-            "8085": 46089904128.0,
-            "8090": 46089904128.0,
-            "8095": 46089904128.0,
-            "8100": 46089904128.0,
-            "8105": 46089904128.0,
-            "8110": 46089904128.0,
-            "8115": 46089904128.0,
-            "8120": 46089904128.0,
-            "8125": 46089904128.0,
-            "8130": 46089904128.0,
-            "8135": 46089904128.0,
-            "8140": 46089904128.0,
-            "8145": 46089904128.0,
-            "8150": 46089904128.0,
-            "8155": 46089904128.0,
-            "8160": 46089904128.0,
-            "8165": 46089904128.0,
-            "8170": 46089904128.0,
-            "8175": 46089904128.0,
-            "8180": 46089904128.0,
-            "8185": 46089904128.0,
-            "8190": 46089904128.0,
-            "8195": 46089904128.0,
-            "8200": 46089904128.0,
-            "8205": 46089904128.0,
-            "8210": 46089904128.0,
-            "8215": 46089904128.0,
-            "8220": 46089904128.0,
-            "8225": 46089904128.0,
-            "8230": 46089904128.0,
-            "8235": 46089904128.0,
-            "8240": 46089904128.0,
-            "8245": 46089904128.0,
-            "8250": 46089904128.0,
-            "8255": 46089904128.0,
-            "8260": 46089904128.0,
-            "8265": 46089904128.0,
-            "8270": 46089904128.0,
-            "8275": 46089904128.0,
-            "8280": 46089904128.0,
-            "8285": 46089904128.0,
-            "8290": 46089904128.0,
-            "8295": 46089904128.0,
-            "8300": 46089904128.0,
-            "8305": 46089904128.0,
-            "8310": 46089904128.0,
-            "8315": 46089904128.0,
-            "8320": 46089904128.0,
-            "8325": 46089904128.0,
-            "8330": 46089904128.0,
-            "8335": 46089904128.0,
-            "8340": 46089904128.0,
-            "8345": 46089904128.0,
-            "8350": 46089904128.0,
-            "8355": 46089904128.0,
-            "8360": 46089904128.0,
-            "8365": 46089904128.0,
-            "8370": 46089904128.0,
-            "8375": 46089904128.0,
-            "8380": 46089904128.0,
-            "8385": 46089904128.0,
-            "8390": 46089904128.0,
-            "8395": 46089904128.0,
-            "8400": 46089904128.0,
-            "8405": 46089904128.0,
-            "8410": 46089904128.0,
-            "8415": 46089904128.0,
-            "8420": 46089904128.0,
-            "8425": 46089904128.0,
-            "8430": 46089904128.0,
-            "8435": 46089904128.0,
-            "8440": 46089904128.0,
-            "8445": 46089904128.0,
-            "8450": 46089904128.0,
-            "8455": 46089904128.0,
-            "8460": 46089904128.0,
-            "8465": 46089904128.0,
-            "8470": 46089904128.0,
-            "8475": 46089904128.0,
-            "8480": 46089904128.0,
-            "8485": 46089904128.0,
-            "8490": 46089904128.0,
-            "8495": 46089904128.0,
-            "8500": 46089904128.0,
-            "8505": 46089904128.0,
-            "8510": 46089904128.0,
-            "8515": 46089904128.0,
-            "8520": 46089904128.0,
-            "8525": 46089904128.0,
-            "8530": 45938114560.0,
-            "8535": 45938114560.0,
-            "8540": 45938114560.0,
-            "8545": 45938114560.0,
-            "8550": 45938114560.0,
-            "8555": 45938114560.0,
-            "8560": 45938114560.0,
-            "8565": 45938114560.0,
-            "8570": 45938114560.0,
-            "8575": 45938114560.0,
-            "8580": 45938114560.0,
-            "8585": 45938114560.0,
-            "8590": 45950377984.0,
-            "8595": 45950377984.0,
-            "8600": 45950377984.0,
-            "8605": 45950377984.0,
-            "8610": 45950377984.0,
-            "8615": 45950377984.0,
-            "8620": 45950377984.0,
-            "8625": 45950377984.0,
-            "8630": 45950377984.0,
-            "8635": 45950377984.0,
-            "8640": 45950377984.0,
-            "8645": 45950377984.0,
-            "8650": 45950377984.0,
-            "8655": 45950377984.0,
-            "8660": 45950377984.0,
-            "8665": 45950377984.0,
-            "8670": 45955510272.0,
-            "8675": 45955510272.0,
-            "8680": 45955510272.0,
-            "8685": 45955510272.0,
-            "8690": 45991550976.0,
-            "8695": 45991550976.0,
-            "8700": 45991550976.0,
-            "8705": 45991550976.0,
-            "8710": 45991550976.0,
-            "8715": 45991550976.0,
-            "8720": 45991550976.0,
-            "8725": 45991550976.0,
-            "8730": 45991550976.0,
-            "8735": 45991550976.0,
-            "8740": 46068584448.0,
-            "8745": 46068584448.0,
-            "8750": 46068584448.0,
-            "8755": 46068584448.0,
-            "8760": 46068584448.0,
-            "8765": 46068584448.0,
-            "8770": 46068584448.0,
-            "8775": 46068584448.0,
-            "8780": 46068584448.0,
-            "8785": 46068584448.0,
-            "8790": 46068584448.0,
-            "8795": 46068584448.0,
-            "8800": 46068584448.0,
-            "8805": 46068584448.0,
-            "8810": 46068584448.0,
-            "8815": 46068584448.0,
-            "8820": 46068584448.0,
-            "8825": 46068584448.0,
-            "8830": 46068584448.0,
-            "8835": 46068584448.0,
-            "8840": 46068584448.0,
-            "8845": 46068584448.0,
-            "8850": 46068584448.0,
-            "8855": 46184767488.0,
-            "8860": 46184767488.0,
-            "8865": 46184767488.0,
-            "8870": 46184767488.0,
-            "8875": 46184767488.0,
-            "8880": 46184767488.0,
-            "8885": 46184767488.0,
-            "8890": 46184767488.0,
-            "8895": 46184767488.0,
-            "8900": 46184767488.0,
-            "8905": 46184767488.0,
-            "8910": 46184767488.0,
-            "8915": 46184767488.0,
-            "8920": 46184767488.0,
-            "8925": 46184767488.0,
-            "8930": 46184767488.0,
-            "8935": 46184767488.0,
-            "8940": 46184767488.0,
-            "8945": 46184767488.0,
-            "8950": 46184767488.0,
-            "8955": 46184767488.0,
-            "8960": 46184767488.0,
-            "8965": 46184767488.0,
-            "8970": 46184767488.0,
-            "8975": 46184767488.0,
-            "8980": 46184767488.0,
-            "8985": 46184767488.0,
-            "8990": 46184767488.0,
-            "8995": 46184767488.0,
-            "9000": 46184767488.0,
-            "9005": 46184767488.0,
-            "9010": 46184767488.0,
-            "9015": 46184767488.0,
-            "9020": 46184767488.0,
-            "9025": 46184767488.0,
-            "9030": 46184767488.0,
-            "9035": 46184767488.0,
-            "9040": 46184767488.0,
-            "9045": 46184767488.0,
-            "9050": 46184767488.0,
-            "9055": 46184767488.0,
-            "9060": 46184767488.0,
-            "9065": 46184767488.0,
-            "9070": 46184767488.0,
-            "9075": 46184767488.0,
-            "9080": 46184767488.0,
-            "9085": 46184767488.0,
-            "9090": 46184767488.0,
-            "9095": 46184767488.0,
-            "9100": 46184767488.0,
-            "9105": 46184767488.0,
-            "9110": 46184767488.0,
-            "9115": 46184767488.0,
-            "9120": 46184767488.0,
-            "9125": 46184767488.0,
-            "9130": 46184767488.0,
-            "9135": 46184767488.0,
-            "9140": 46184767488.0,
-            "9145": 46184767488.0,
-            "9150": 46184767488.0,
-            "9155": 46184767488.0,
-            "9160": 46184767488.0,
-            "9165": 46184767488.0,
-            "9170": 46184767488.0,
-            "9175": 46184767488.0,
-            "9180": 46184767488.0,
-            "9185": 46184767488.0,
-            "9190": 46184767488.0,
-            "9195": 46184767488.0,
-            "9200": 46184767488.0,
-            "9205": 46184767488.0,
-            "9210": 46184767488.0,
-            "9215": 46184767488.0,
-            "9220": 46184767488.0,
-            "9225": 46184767488.0,
-            "9230": 46184767488.0,
-            "9235": 46184767488.0,
-            "9240": 46184767488.0,
-            "9245": 46184767488.0,
-            "9250": 46184767488.0,
-            "9255": 46184767488.0,
-            "9260": 46184767488.0,
-            "9265": 46184767488.0,
-            "9270": 46184767488.0,
-            "9275": 46184767488.0,
-            "9280": 46184767488.0,
-            "9285": 46184767488.0,
-            "9290": 46184767488.0,
-            "9295": 46184767488.0,
-            "9300": 46184767488.0,
-            "9305": 46184767488.0,
-            "9310": 46184767488.0,
-            "9315": 46184767488.0,
-            "9320": 46184767488.0,
-            "9325": 46184767488.0,
-            "9330": 46184767488.0,
-            "9335": 46184767488.0,
-            "9340": 46184767488.0,
-            "9345": 46184767488.0,
-            "9350": 46184767488.0,
-            "9355": 46184767488.0,
-            "9360": 46184767488.0,
-            "9365": 46184767488.0,
-            "9370": 46184767488.0,
-            "9375": 46184767488.0,
-            "9380": 46184767488.0,
-            "9385": 46184767488.0,
-            "9390": 46184767488.0,
-            "9395": 46184767488.0,
-            "9400": 46184767488.0,
-            "9405": 46184767488.0,
-            "9410": 46184767488.0,
-            "9415": 46184767488.0,
-            "9420": 46184767488.0,
-            "9425": 46184767488.0,
-            "9430": 46184767488.0,
-            "9435": 46184767488.0,
-            "9440": 46184767488.0,
-            "9445": 46184767488.0,
-            "9450": 46184767488.0,
-            "9455": 46184767488.0,
-            "9460": 46184767488.0,
-            "9465": 46184767488.0,
-            "9470": 46184767488.0,
-            "9475": 46184767488.0,
-            "9480": 46184767488.0,
-            "9485": 46184767488.0,
-            "9490": 46184767488.0,
-            "9495": 46184767488.0,
-            "9500": 46184767488.0,
-            "9505": 46184767488.0,
-            "9510": 46184767488.0,
-            "9515": 46184767488.0,
-            "9520": 46184767488.0,
-            "9525": 46184767488.0,
-            "9530": 46184767488.0,
-            "9535": 46184767488.0
-        }
-    },
-    "mtp_1 loss": {
-        "start_step": 1,
-        "end_step": 9535,
-        "step_interval": 5,
-        "values": {
-            "1": 13.88878,
-            "5": 13.88979,
-            "10": 13.88767,
-            "15": 13.88576,
-            "20": 13.88068,
-            "25": 13.87774,
-            "30": 13.85566,
-            "35": 13.84855,
-            "40": 13.84546,
-            "45": 13.82693,
-            "50": 13.74828,
-            "55": 13.7249,
-            "60": 13.70841,
-            "65": 13.67571,
-            "70": 13.63981,
-            "75": 13.44327,
-            "80": 13.36054,
-            "85": 13.2835,
-            "90": 13.18641,
-            "95": 13.0505,
-            "100": 12.90733,
-            "105": 12.74689,
-            "110": 12.48525,
-            "115": 12.26801,
-            "120": 12.04358,
-            "125": 11.87011,
-            "130": 11.74911,
-            "135": 11.5841,
-            "140": 11.3494,
-            "145": 11.26997,
-            "150": 11.11919,
-            "155": 11.0211,
-            "160": 10.88133,
-            "165": 10.75162,
-            "170": 10.65694,
-            "175": 10.59566,
-            "180": 10.43546,
-            "185": 10.42441,
-            "190": 10.27183,
-            "195": 10.2539,
-            "200": 10.12718,
-            "205": 9.97472,
-            "210": 9.94271,
-            "215": 9.92122,
-            "220": 9.78944,
-            "225": 9.77014,
-            "230": 9.73,
-            "235": 9.64372,
-            "240": 9.57366,
-            "245": 9.50499,
-            "250": 9.43776,
-            "255": 9.37037,
-            "260": 9.29579,
-            "265": 9.2411,
-            "270": 9.15629,
-            "275": 9.12851,
-            "280": 9.10516,
-            "285": 9.09815,
-            "290": 9.01068,
-            "295": 8.94828,
-            "300": 8.83207,
-            "305": 8.80663,
-            "310": 8.74389,
-            "315": 8.71813,
-            "320": 8.68425,
-            "325": 8.58706,
-            "330": 8.56208,
-            "335": 8.53307,
-            "340": 8.52937,
-            "345": 8.41091,
-            "350": 8.39973,
-            "355": 8.29759,
-            "360": 8.38348,
-            "365": 8.28981,
-            "370": 8.2833,
-            "375": 8.22588,
-            "380": 8.18359,
-            "385": 8.16998,
-            "390": 8.1467,
-            "395": 8.09789,
-            "400": 8.01583,
-            "405": 8.01349,
-            "410": 8.00377,
-            "415": 7.95012,
-            "420": 7.93109,
-            "425": 7.88677,
-            "430": 7.81895,
-            "435": 7.82989,
-            "440": 7.77278,
-            "445": 7.7493,
-            "450": 7.67877,
-            "455": 7.7063,
-            "460": 7.6532,
-            "465": 7.6329,
-            "470": 7.59885,
-            "475": 7.61277,
-            "480": 7.48436,
-            "485": 7.53153,
-            "490": 7.48574,
-            "495": 7.4714,
-            "500": 7.41282,
-            "505": 7.41932,
-            "510": 7.38698,
-            "515": 7.35645,
-            "520": 7.35102,
-            "525": 7.32559,
-            "530": 7.32588,
-            "535": 7.30357,
-            "540": 7.2179,
-            "545": 7.24022,
-            "550": 7.27618,
-            "555": 7.30238,
-            "560": 7.23984,
-            "565": 7.16321,
-            "570": 7.17228,
-            "575": 7.18898,
-            "580": 7.11497,
-            "585": 7.11901,
-            "590": 7.06121,
-            "595": 7.04317,
-            "600": 7.06682,
-            "605": 7.06137,
-            "610": 7.01939,
-            "615": 7.078,
-            "620": 6.98113,
-            "625": 6.95612,
-            "630": 6.96104,
-            "635": 6.98871,
-            "640": 6.96819,
-            "645": 6.95817,
-            "650": 7.00625,
-            "655": 7.00242,
-            "660": 6.89823,
-            "665": 6.88159,
-            "670": 6.84888,
-            "675": 6.93827,
-            "680": 6.89638,
-            "685": 6.85679,
-            "690": 6.83445,
-            "695": 6.79719,
-            "700": 6.79183,
-            "705": 6.78625,
-            "710": 6.82275,
-            "715": 6.82665,
-            "720": 6.71137,
-            "725": 6.76643,
-            "730": 6.75579,
-            "735": 6.75515,
-            "740": 6.70045,
-            "745": 6.67565,
-            "750": 6.73564,
-            "755": 6.65767,
-            "760": 6.66496,
-            "765": 6.65951,
-            "770": 6.68075,
-            "775": 6.65453,
-            "780": 6.62427,
-            "785": 6.64321,
-            "790": 6.59399,
-            "795": 6.59812,
-            "800": 6.5878,
-            "805": 6.65391,
-            "810": 6.51946,
-            "815": 6.5419,
-            "820": 6.55134,
-            "825": 6.55855,
-            "830": 6.57041,
-            "835": 6.52603,
-            "840": 6.49033,
-            "845": 6.54438,
-            "850": 6.49874,
-            "855": 6.49335,
-            "860": 6.49024,
-            "865": 6.49642,
-            "870": 6.46222,
-            "875": 6.51054,
-            "880": 6.4748,
-            "885": 6.43786,
-            "890": 6.51246,
-            "895": 6.39629,
-            "900": 6.41895,
-            "905": 6.44341,
-            "910": 6.40617,
-            "915": 6.38978,
-            "920": 6.38772,
-            "925": 6.37391,
-            "930": 6.40825,
-            "935": 6.39755,
-            "940": 6.34172,
-            "945": 6.36869,
-            "950": 6.3953,
-            "955": 6.34893,
-            "960": 6.35406,
-            "965": 6.25416,
-            "970": 6.32381,
-            "975": 6.31262,
-            "980": 6.28797,
-            "985": 6.29222,
-            "990": 6.34527,
-            "995": 6.26326,
-            "1000": 6.28434,
-            "1005": 6.23155,
-            "1010": 6.26712,
-            "1015": 6.29352,
-            "1020": 6.20454,
-            "1025": 6.21082,
-            "1030": 6.20913,
-            "1035": 6.29924,
-            "1040": 6.22531,
-            "1045": 6.19943,
-            "1050": 6.2267,
-            "1055": 6.21777,
-            "1060": 6.1673,
-            "1065": 6.15758,
-            "1070": 6.19281,
-            "1075": 6.19093,
-            "1080": 6.19319,
-            "1085": 6.19606,
-            "1090": 6.17796,
-            "1095": 6.181,
-            "1100": 6.1397,
-            "1105": 6.11513,
-            "1110": 6.17787,
-            "1115": 6.11231,
-            "1120": 6.05286,
-            "1125": 6.08699,
-            "1130": 6.14167,
-            "1135": 6.09531,
-            "1140": 6.08221,
-            "1145": 6.06731,
-            "1150": 6.09458,
-            "1155": 6.06298,
-            "1160": 6.04607,
-            "1165": 6.09676,
-            "1170": 6.07336,
-            "1175": 6.04568,
-            "1180": 6.05058,
-            "1185": 6.04124,
-            "1190": 6.04961,
-            "1195": 6.02949,
-            "1200": 5.97329,
-            "1205": 6.07601,
-            "1210": 5.93751,
-            "1215": 5.98403,
-            "1220": 6.06306,
-            "1225": 5.95152,
-            "1230": 5.99877,
-            "1235": 5.95912,
-            "1240": 5.99322,
-            "1245": 5.97187,
-            "1250": 5.95299,
-            "1255": 5.94742,
-            "1260": 5.95227,
-            "1265": 5.93352,
-            "1270": 5.90818,
-            "1275": 5.96805,
-            "1280": 5.90416,
-            "1285": 5.92308,
-            "1290": 5.90725,
-            "1295": 5.92,
-            "1300": 5.9267,
-            "1305": 5.90057,
-            "1310": 5.83908,
-            "1315": 5.8992,
-            "1320": 5.89614,
-            "1325": 5.8271,
-            "1330": 5.88462,
-            "1335": 5.8531,
-            "1340": 5.91994,
-            "1345": 5.86667,
-            "1350": 5.84738,
-            "1355": 5.84415,
-            "1360": 5.85216,
-            "1365": 5.84478,
-            "1370": 5.79663,
-            "1375": 5.80667,
-            "1380": 5.86219,
-            "1385": 5.81826,
-            "1390": 5.81231,
-            "1395": 5.8299,
-            "1400": 5.83135,
-            "1405": 5.82032,
-            "1410": 5.78518,
-            "1415": 5.77017,
-            "1420": 5.8049,
-            "1425": 5.79565,
-            "1430": 5.83189,
-            "1435": 5.74562,
-            "1440": 5.76408,
-            "1445": 5.8071,
-            "1450": 5.78859,
-            "1455": 5.80534,
-            "1460": 5.75975,
-            "1465": 5.76379,
-            "1470": 5.8044,
-            "1475": 5.76985,
-            "1480": 5.77563,
-            "1485": 5.72396,
-            "1490": 5.72354,
-            "1495": 5.74538,
-            "1500": 5.75109,
-            "1505": 5.72321,
-            "1510": 5.74832,
-            "1515": 5.67052,
-            "1520": 5.70302,
-            "1525": 5.67385,
-            "1530": 5.69497,
-            "1535": 5.68565,
-            "1540": 5.672,
-            "1545": 5.7178,
-            "1550": 5.72274,
-            "1555": 5.70942,
-            "1560": 5.65211,
-            "1565": 5.69926,
-            "1570": 5.71179,
-            "1575": 5.6613,
-            "1580": 5.69275,
-            "1585": 5.67221,
-            "1590": 5.66087,
-            "1595": 5.63673,
-            "1600": 5.70849,
-            "1605": 5.64113,
-            "1610": 5.64353,
-            "1615": 5.63334,
-            "1620": 5.65496,
-            "1625": 5.64982,
-            "1630": 5.62727,
-            "1635": 5.67706,
-            "1640": 5.62761,
-            "1645": 5.6449,
-            "1650": 5.63803,
-            "1655": 5.62499,
-            "1660": 5.61278,
-            "1665": 5.60116,
-            "1670": 5.61214,
-            "1675": 5.62193,
-            "1680": 5.56155,
-            "1685": 5.57098,
-            "1690": 5.55098,
-            "1695": 5.55521,
-            "1700": 5.60178,
-            "1705": 5.57706,
-            "1710": 5.58407,
-            "1715": 5.54721,
-            "1720": 5.52704,
-            "1725": 5.56718,
-            "1730": 5.53148,
-            "1735": 5.58307,
-            "1740": 5.52337,
-            "1745": 5.55772,
-            "1750": 5.53213,
-            "1755": 5.5301,
-            "1760": 5.55304,
-            "1765": 5.5132,
-            "1770": 5.522,
-            "1775": 5.52704,
-            "1780": 5.53997,
-            "1785": 5.48896,
-            "1790": 5.52187,
-            "1795": 5.52448,
-            "1800": 5.4698,
-            "1805": 5.46326,
-            "1810": 5.47869,
-            "1815": 5.48464,
-            "1820": 5.48466,
-            "1825": 5.48352,
-            "1830": 5.46909,
-            "1835": 5.46355,
-            "1840": 5.46633,
-            "1845": 5.44723,
-            "1850": 5.42996,
-            "1855": 5.4834,
-            "1860": 5.43502,
-            "1865": 5.44302,
-            "1870": 5.43258,
-            "1875": 5.42823,
-            "1880": 5.491,
-            "1885": 5.45039,
-            "1890": 5.44132,
-            "1895": 5.38084,
-            "1900": 5.42123,
-            "1905": 5.41299,
-            "1910": 5.43539,
-            "1915": 5.4013,
-            "1920": 5.37729,
-            "1925": 5.4085,
-            "1930": 5.37579,
-            "1935": 5.39731,
-            "1940": 5.3727,
-            "1945": 5.4174,
-            "1950": 5.45899,
-            "1955": 5.39197,
-            "1960": 5.39342,
-            "1965": 5.34213,
-            "1970": 5.34023,
-            "1975": 5.40413,
-            "1980": 5.35398,
-            "1985": 5.37376,
-            "1990": 5.39658,
-            "1995": 5.37398,
-            "2000": 5.38469,
-            "2005": 5.42838,
-            "2010": 5.32884,
-            "2015": 5.32047,
-            "2020": 5.32991,
-            "2025": 5.37403,
-            "2030": 5.31228,
-            "2035": 5.33119,
-            "2040": 5.29466,
-            "2045": 5.38332,
-            "2050": 5.35716,
-            "2055": 5.33062,
-            "2060": 5.32903,
-            "2065": 5.29751,
-            "2070": 5.29985,
-            "2075": 5.32708,
-            "2080": 5.29709,
-            "2085": 5.32918,
-            "2090": 5.24905,
-            "2095": 5.29587,
-            "2100": 5.25777,
-            "2105": 5.28625,
-            "2110": 5.28042,
-            "2115": 5.28102,
-            "2120": 5.2839,
-            "2125": 5.24699,
-            "2130": 5.25602,
-            "2135": 5.25599,
-            "2140": 5.26607,
-            "2145": 5.22772,
-            "2150": 5.24774,
-            "2155": 5.22588,
-            "2160": 5.24123,
-            "2165": 5.22937,
-            "2170": 5.26626,
-            "2175": 5.2603,
-            "2180": 5.24294,
-            "2185": 5.24675,
-            "2190": 5.22691,
-            "2195": 5.20127,
-            "2200": 5.20409,
-            "2205": 5.2127,
-            "2210": 5.25738,
-            "2215": 5.30103,
-            "2220": 5.24446,
-            "2225": 5.2194,
-            "2230": 5.21789,
-            "2235": 5.25766,
-            "2240": 5.16329,
-            "2245": 5.1607,
-            "2250": 5.18607,
-            "2255": 5.19635,
-            "2260": 5.13701,
-            "2265": 5.21276,
-            "2270": 5.14278,
-            "2275": 5.19722,
-            "2280": 5.17159,
-            "2285": 5.18798,
-            "2290": 5.17456,
-            "2295": 5.18141,
-            "2300": 5.17912,
-            "2305": 5.15551,
-            "2310": 5.1834,
-            "2315": 5.12144,
-            "2320": 5.17039,
-            "2325": 5.14984,
-            "2330": 5.15156,
-            "2335": 5.13195,
-            "2340": 5.13852,
-            "2345": 5.18732,
-            "2350": 5.12945,
-            "2355": 5.11891,
-            "2360": 5.10445,
-            "2365": 5.11898,
-            "2370": 5.10258,
-            "2375": 5.11122,
-            "2380": 5.05395,
-            "2385": 5.09747,
-            "2390": 5.11702,
-            "2395": 5.1322,
-            "2400": 5.07944,
-            "2405": 5.06236,
-            "2410": 5.11554,
-            "2415": 5.09106,
-            "2420": 5.10878,
-            "2425": 5.06863,
-            "2430": 5.09273,
-            "2435": 5.08666,
-            "2440": 5.07515,
-            "2445": 5.08608,
-            "2450": 5.04943,
-            "2455": 5.09523,
-            "2460": 5.04536,
-            "2465": 5.08334,
-            "2470": 5.07644,
-            "2475": 5.11246,
-            "2480": 5.02872,
-            "2485": 5.05906,
-            "2490": 5.05297,
-            "2495": 5.04377,
-            "2500": 5.04447,
-            "2505": 5.05124,
-            "2510": 5.0909,
-            "2515": 5.08005,
-            "2520": 5.02414,
-            "2525": 5.03617,
-            "2530": 5.05281,
-            "2535": 5.04127,
-            "2540": 5.04342,
-            "2545": 5.05498,
-            "2550": 4.99288,
-            "2555": 5.05988,
-            "2560": 5.03403,
-            "2565": 5.00279,
-            "2570": 5.02524,
-            "2575": 4.98811,
-            "2580": 5.00235,
-            "2585": 4.98259,
-            "2590": 5.00195,
-            "2595": 4.95577,
-            "2600": 4.99616,
-            "2605": 5.01565,
-            "2610": 5.00846,
-            "2615": 4.9779,
-            "2620": 4.96,
-            "2625": 4.99167,
-            "2630": 4.92069,
-            "2635": 5.00179,
-            "2640": 5.00217,
-            "2645": 4.95857,
-            "2650": 4.98056,
-            "2655": 4.97276,
-            "2660": 4.91658,
-            "2665": 5.00931,
-            "2670": 4.95271,
-            "2675": 4.92627,
-            "2680": 4.95939,
-            "2685": 4.9606,
-            "2690": 4.92299,
-            "2695": 4.99925,
-            "2700": 4.90798,
-            "2705": 4.92161,
-            "2710": 4.9625,
-            "2715": 4.94083,
-            "2720": 4.97062,
-            "2725": 4.91977,
-            "2730": 4.9445,
-            "2735": 4.9369,
-            "2740": 4.92939,
-            "2745": 4.89678,
-            "2750": 4.93832,
-            "2755": 4.94144,
-            "2760": 4.94244,
-            "2765": 4.91315,
-            "2770": 4.95527,
-            "2775": 4.90029,
-            "2780": 4.93753,
-            "2785": 4.91159,
-            "2790": 4.93952,
-            "2795": 4.89812,
-            "2800": 4.84327,
-            "2805": 4.89103,
-            "2810": 4.88284,
-            "2815": 4.89434,
-            "2820": 4.93504,
-            "2825": 4.92479,
-            "2830": 4.90086,
-            "2835": 4.90451,
-            "2840": 4.89553,
-            "2845": 4.87238,
-            "2850": 4.90777,
-            "2855": 4.83628,
-            "2860": 4.89239,
-            "2865": 4.90134,
-            "2870": 4.89048,
-            "2875": 4.90822,
-            "2880": 4.82774,
-            "2885": 4.8758,
-            "2890": 4.84909,
-            "2895": 4.88906,
-            "2900": 4.84436,
-            "2905": 4.85096,
-            "2910": 4.84745,
-            "2915": 4.89554,
-            "2920": 4.87192,
-            "2925": 4.84408,
-            "2930": 4.83304,
-            "2935": 4.83856,
-            "2940": 4.8364,
-            "2945": 4.80087,
-            "2950": 4.79094,
-            "2955": 4.79257,
-            "2960": 4.81394,
-            "2965": 4.82244,
-            "2970": 4.83033,
-            "2975": 4.843,
-            "2980": 4.78708,
-            "2985": 4.83546,
-            "2990": 4.84632,
-            "2995": 4.79479,
-            "3000": 4.79957,
-            "3005": 4.7852,
-            "3010": 4.81747,
-            "3015": 4.77707,
-            "3020": 4.79613,
-            "3025": 4.80689,
-            "3030": 4.81521,
-            "3035": 4.81107,
-            "3040": 4.83014,
-            "3045": 4.81253,
-            "3050": 4.78854,
-            "3055": 4.79109,
-            "3060": 4.77291,
-            "3065": 4.80026,
-            "3070": 4.82011,
-            "3075": 4.75177,
-            "3080": 4.78059,
-            "3085": 4.7825,
-            "3090": 4.76596,
-            "3095": 4.80833,
-            "3100": 4.79656,
-            "3105": 4.77177,
-            "3110": 4.76085,
-            "3115": 4.71609,
-            "3120": 4.78235,
-            "3125": 4.74714,
-            "3130": 4.75497,
-            "3135": 4.75435,
-            "3140": 4.7318,
-            "3145": 4.71606,
-            "3150": 4.74842,
-            "3155": 4.78313,
-            "3160": 4.765,
-            "3165": 4.75911,
-            "3170": 4.7541,
-            "3175": 4.746,
-            "3180": 4.73371,
-            "3185": 4.70655,
-            "3190": 4.70906,
-            "3195": 4.70876,
-            "3200": 4.67795,
-            "3205": 4.72527,
-            "3210": 4.67973,
-            "3215": 4.71138,
-            "3220": 4.67941,
-            "3225": 4.71501,
-            "3230": 4.698,
-            "3235": 4.73415,
-            "3240": 4.68214,
-            "3245": 4.6954,
-            "3250": 4.64543,
-            "3255": 4.69551,
-            "3260": 4.67926,
-            "3265": 4.72582,
-            "3270": 4.70744,
-            "3275": 4.65457,
-            "3280": 4.68021,
-            "3285": 4.69583,
-            "3290": 4.66845,
-            "3295": 4.67202,
-            "3300": 4.66858,
-            "3305": 4.67172,
-            "3310": 4.66314,
-            "3315": 4.70829,
-            "3320": 4.64885,
-            "3325": 4.65812,
-            "3330": 4.64245,
-            "3335": 4.65293,
-            "3340": 4.62608,
-            "3345": 4.64548,
-            "3350": 4.65071,
-            "3355": 4.65765,
-            "3360": 4.64823,
-            "3365": 4.66194,
-            "3370": 4.63984,
-            "3375": 4.67722,
-            "3380": 4.61449,
-            "3385": 4.62869,
-            "3390": 4.60608,
-            "3395": 4.6967,
-            "3400": 4.64188,
-            "3405": 4.6721,
-            "3410": 4.60581,
-            "3415": 4.55337,
-            "3420": 4.61467,
-            "3425": 4.63228,
-            "3430": 4.66874,
-            "3435": 4.63419,
-            "3440": 4.65338,
-            "3445": 4.60093,
-            "3450": 4.59889,
-            "3455": 4.62429,
-            "3460": 4.58089,
-            "3465": 4.57689,
-            "3470": 4.59454,
-            "3475": 4.60079,
-            "3480": 4.59374,
-            "3485": 4.62356,
-            "3490": 4.60917,
-            "3495": 4.63221,
-            "3500": 4.59027,
-            "3505": 4.59844,
-            "3510": 4.59797,
-            "3515": 4.648,
-            "3520": 4.62554,
-            "3525": 4.57245,
-            "3530": 4.58587,
-            "3535": 4.58174,
-            "3540": 4.63653,
-            "3545": 4.56212,
-            "3550": 4.62056,
-            "3555": 4.55332,
-            "3560": 4.62414,
-            "3565": 4.55473,
-            "3570": 4.56696,
-            "3575": 4.53468,
-            "3580": 4.59878,
-            "3585": 4.58068,
-            "3590": 4.51872,
-            "3595": 4.58848,
-            "3600": 4.55395,
-            "3605": 4.53571,
-            "3610": 4.54008,
-            "3615": 4.56874,
-            "3620": 4.61691,
-            "3625": 4.55023,
-            "3630": 4.59867,
-            "3635": 4.50879,
-            "3640": 4.52782,
-            "3645": 4.56947,
-            "3650": 4.53552,
-            "3655": 4.54665,
-            "3660": 4.55228,
-            "3665": 4.58643,
-            "3670": 4.54047,
-            "3675": 4.55594,
-            "3680": 4.57348,
-            "3685": 4.49418,
-            "3690": 4.54299,
-            "3695": 4.49297,
-            "3700": 4.52866,
-            "3705": 4.50654,
-            "3710": 4.51966,
-            "3715": 4.53,
-            "3720": 4.50118,
-            "3725": 4.47886,
-            "3730": 4.4879,
-            "3735": 4.50546,
-            "3740": 4.49399,
-            "3745": 4.48041,
-            "3750": 4.51288,
-            "3755": 4.48915,
-            "3760": 4.50004,
-            "3765": 4.47669,
-            "3770": 4.48984,
-            "3775": 4.46969,
-            "3780": 4.45476,
-            "3785": 4.50898,
-            "3790": 4.42336,
-            "3795": 4.4846,
-            "3800": 4.46028,
-            "3805": 4.46023,
-            "3810": 4.42629,
-            "3815": 4.4806,
-            "3820": 4.4736,
-            "3825": 4.4803,
-            "3830": 4.46747,
-            "3835": 4.42638,
-            "3840": 4.52349,
-            "3845": 4.48225,
-            "3850": 4.42266,
-            "3855": 4.46223,
-            "3860": 4.48001,
-            "3865": 4.44144,
-            "3870": 4.50523,
-            "3875": 4.41439,
-            "3880": 4.42672,
-            "3885": 4.44983,
-            "3890": 4.43819,
-            "3895": 4.38007,
-            "3900": 4.43434,
-            "3905": 4.41283,
-            "3910": 4.42081,
-            "3915": 4.42082,
-            "3920": 4.41329,
-            "3925": 4.39336,
-            "3930": 4.41243,
-            "3935": 4.41903,
-            "3940": 4.41848,
-            "3945": 4.39397,
-            "3950": 4.46098,
-            "3955": 4.39087,
-            "3960": 4.43851,
-            "3965": 4.44901,
-            "3970": 4.39272,
-            "3975": 4.40242,
-            "3980": 4.37236,
-            "3985": 4.40832,
-            "3990": 4.40208,
-            "3995": 4.44335,
-            "4000": 4.38322,
-            "4005": 4.37255,
-            "4010": 4.40982,
-            "4015": 4.39813,
-            "4020": 4.43488,
-            "4025": 4.39111,
-            "4030": 4.44761,
-            "4035": 4.40548,
-            "4040": 4.43553,
-            "4045": 4.41155,
-            "4050": 4.40643,
-            "4055": 4.41393,
-            "4060": 4.40665,
-            "4065": 4.41291,
-            "4070": 4.34904,
-            "4075": 4.37708,
-            "4080": 4.35797,
-            "4085": 4.39736,
-            "4090": 4.37437,
-            "4095": 4.35826,
-            "4100": 4.37323,
-            "4105": 4.36208,
-            "4110": 4.32609,
-            "4115": 4.39421,
-            "4120": 4.31057,
-            "4125": 4.31168,
-            "4130": 4.39302,
-            "4135": 4.37289,
-            "4140": 4.31616,
-            "4145": 4.32788,
-            "4150": 4.37558,
-            "4155": 4.29766,
-            "4160": 4.35633,
-            "4165": 4.38157,
-            "4170": 4.32646,
-            "4175": 4.33285,
-            "4180": 4.32735,
-            "4185": 4.31953,
-            "4190": 4.31017,
-            "4195": 4.31525,
-            "4200": 4.31406,
-            "4205": 4.37,
-            "4210": 4.32695,
-            "4215": 4.3562,
-            "4220": 4.33701,
-            "4225": 4.32036,
-            "4230": 4.30579,
-            "4235": 4.35051,
-            "4240": 4.30872,
-            "4245": 4.31564,
-            "4250": 4.29999,
-            "4255": 4.31166,
-            "4260": 4.29019,
-            "4265": 4.30554,
-            "4270": 4.29954,
-            "4275": 4.36276,
-            "4280": 4.29798,
-            "4285": 4.33284,
-            "4290": 4.27741,
-            "4295": 4.30368,
-            "4300": 4.32594,
-            "4305": 4.29066,
-            "4310": 4.33408,
-            "4315": 4.3163,
-            "4320": 4.30571,
-            "4325": 4.32764,
-            "4330": 4.26525,
-            "4335": 4.30418,
-            "4340": 4.28838,
-            "4345": 4.23753,
-            "4350": 4.25927,
-            "4355": 4.33009,
-            "4360": 4.30543,
-            "4365": 4.30411,
-            "4370": 4.28149,
-            "4375": 4.24372,
-            "4380": 4.25559,
-            "4385": 4.23331,
-            "4390": 4.30895,
-            "4395": 4.27518,
-            "4400": 4.26254,
-            "4405": 4.23007,
-            "4410": 4.28048,
-            "4415": 4.26816,
-            "4420": 4.24916,
-            "4425": 4.29252,
-            "4430": 4.24244,
-            "4435": 4.29049,
-            "4440": 4.28601,
-            "4445": 4.24232,
-            "4450": 4.20719,
-            "4455": 4.26016,
-            "4460": 4.23459,
-            "4465": 4.25243,
-            "4470": 4.23841,
-            "4475": 4.2641,
-            "4480": 4.24909,
-            "4485": 4.23389,
-            "4490": 4.23593,
-            "4495": 4.17962,
-            "4500": 4.25444,
-            "4505": 4.22942,
-            "4510": 4.23965,
-            "4515": 4.19566,
-            "4520": 4.23113,
-            "4525": 4.19456,
-            "4530": 4.24001,
-            "4535": 4.20166,
-            "4540": 4.21127,
-            "4545": 4.23188,
-            "4550": 4.27088,
-            "4555": 4.2072,
-            "4560": 4.22378,
-            "4565": 4.15426,
-            "4570": 4.21606,
-            "4575": 4.1941,
-            "4580": 4.25747,
-            "4585": 4.22428,
-            "4590": 4.21266,
-            "4595": 4.17399,
-            "4600": 4.16313,
-            "4605": 4.2045,
-            "4610": 4.19939,
-            "4615": 4.24443,
-            "4620": 4.16447,
-            "4625": 4.19099,
-            "4630": 4.20991,
-            "4635": 4.18208,
-            "4640": 4.21078,
-            "4645": 4.20652,
-            "4650": 4.22758,
-            "4655": 4.19246,
-            "4660": 4.18248,
-            "4665": 4.193,
-            "4670": 4.23574,
-            "4675": 4.17989,
-            "4680": 4.20859,
-            "4685": 4.19688,
-            "4690": 4.1723,
-            "4695": 4.18485,
-            "4700": 4.16546,
-            "4705": 4.14067,
-            "4710": 4.20305,
-            "4715": 4.19002,
-            "4720": 4.14737,
-            "4725": 4.12216,
-            "4730": 4.17809,
-            "4735": 4.10178,
-            "4740": 4.14697,
-            "4745": 4.18779,
-            "4750": 4.13615,
-            "4755": 4.19424,
-            "4760": 4.1984,
-            "4765": 4.1461,
-            "4770": 4.14849,
-            "4775": 4.14773,
-            "4780": 4.15523,
-            "4785": 4.13664,
-            "4790": 4.19224,
-            "4795": 4.17628,
-            "4800": 4.13942,
-            "4805": 4.17839,
-            "4810": 4.1375,
-            "4815": 4.17167,
-            "4820": 4.12226,
-            "4825": 4.17474,
-            "4830": 4.16985,
-            "4835": 4.14976,
-            "4840": 4.15298,
-            "4845": 4.10968,
-            "4850": 4.17354,
-            "4855": 4.17639,
-            "4860": 4.11236,
-            "4865": 4.13759,
-            "4870": 4.13215,
-            "4875": 4.17643,
-            "4880": 4.1702,
-            "4885": 4.13029,
-            "4890": 4.1249,
-            "4895": 4.12403,
-            "4900": 4.09958,
-            "4905": 4.09173,
-            "4910": 4.09074,
-            "4915": 4.14665,
-            "4920": 4.12021,
-            "4925": 4.08814,
-            "4930": 4.09778,
-            "4935": 4.12094,
-            "4940": 4.04981,
-            "4945": 4.13369,
-            "4950": 4.07708,
-            "4955": 4.15684,
-            "4960": 4.11652,
-            "4965": 4.1151,
-            "4970": 4.09971,
-            "4975": 4.11736,
-            "4980": 4.12585,
-            "4985": 4.12754,
-            "4990": 4.09005,
-            "4995": 4.12916,
-            "5000": 4.05682,
-            "5005": 4.11701,
-            "5010": 4.10942,
-            "5015": 4.07584,
-            "5020": 4.05201,
-            "5025": 4.06082,
-            "5030": 4.10005,
-            "5035": 4.08177,
-            "5040": 4.0418,
-            "5045": 4.11064,
-            "5050": 4.06425,
-            "5055": 4.08995,
-            "5060": 4.03143,
-            "5065": 4.09666,
-            "5070": 4.07056,
-            "5075": 4.12386,
-            "5080": 4.07795,
-            "5085": 4.09595,
-            "5090": 4.07748,
-            "5095": 4.0424,
-            "5100": 4.0782,
-            "5105": 4.0809,
-            "5110": 4.08612,
-            "5115": 4.07663,
-            "5120": 4.09438,
-            "5125": 4.05976,
-            "5130": 4.06327,
-            "5135": 4.0488,
-            "5140": 4.06922,
-            "5145": 4.05942,
-            "5150": 4.07092,
-            "5155": 4.07553,
-            "5160": 4.05549,
-            "5165": 4.09766,
-            "5170": 3.96642,
-            "5175": 4.07515,
-            "5180": 4.03531,
-            "5185": 4.05861,
-            "5190": 4.08092,
-            "5195": 4.04601,
-            "5200": 4.06577,
-            "5205": 4.09747,
-            "5210": 4.01055,
-            "5215": 4.02373,
-            "5220": 4.02621,
-            "5225": 4.02349,
-            "5230": 4.06271,
-            "5235": 4.03585,
-            "5240": 4.02422,
-            "5245": 4.04177,
-            "5250": 4.04544,
-            "5255": 4.03173,
-            "5260": 4.04798,
-            "5265": 4.01495,
-            "5270": 3.98673,
-            "5275": 4.00519,
-            "5280": 4.02024,
-            "5285": 4.04277,
-            "5290": 4.00304,
-            "5295": 4.00093,
-            "5300": 4.02323,
-            "5305": 4.01012,
-            "5310": 4.0478,
-            "5315": 3.99571,
-            "5320": 4.03864,
-            "5325": 4.06497,
-            "5330": 3.99981,
-            "5335": 4.02122,
-            "5340": 3.9739,
-            "5345": 4.01424,
-            "5350": 4.0246,
-            "5355": 4.01714,
-            "5360": 3.9668,
-            "5365": 3.98455,
-            "5370": 4.02892,
-            "5375": 3.99384,
-            "5380": 3.98952,
-            "5385": 4.00787,
-            "5390": 3.99585,
-            "5395": 3.932,
-            "5400": 4.02192,
-            "5405": 3.94401,
-            "5410": 4.03103,
-            "5415": 3.94954,
-            "5420": 3.98108,
-            "5425": 3.96619,
-            "5430": 3.97462,
-            "5435": 4.00917,
-            "5440": 3.96082,
-            "5445": 3.96843,
-            "5450": 3.98078,
-            "5455": 3.96312,
-            "5460": 3.97781,
-            "5465": 4.03343,
-            "5470": 3.99301,
-            "5475": 3.92634,
-            "5480": 4.0001,
-            "5485": 3.96789,
-            "5490": 3.99381,
-            "5495": 3.99755,
-            "5500": 3.95394,
-            "5505": 3.9702,
-            "5510": 4.00139,
-            "5515": 3.97886,
-            "5520": 3.95723,
-            "5525": 4.01089,
-            "5530": 3.95723,
-            "5535": 3.99058,
-            "5540": 3.95888,
-            "5545": 3.97704,
-            "5550": 3.97005,
-            "5555": 3.93134,
-            "5560": 3.94203,
-            "5565": 3.98688,
-            "5570": 3.94409,
-            "5575": 3.97691,
-            "5580": 3.95423,
-            "5585": 3.89232,
-            "5590": 3.96662,
-            "5595": 3.91996,
-            "5600": 3.97099,
-            "5605": 3.87423,
-            "5610": 3.96509,
-            "5615": 3.9629,
-            "5620": 3.97882,
-            "5625": 3.95843,
-            "5630": 3.94884,
-            "5635": 3.92989,
-            "5640": 3.95308,
-            "5645": 3.91537,
-            "5650": 3.88759,
-            "5655": 3.91914,
-            "5660": 3.9101,
-            "5665": 3.92739,
-            "5670": 3.91107,
-            "5675": 3.94487,
-            "5680": 3.91238,
-            "5685": 3.92365,
-            "5690": 3.92517,
-            "5695": 3.953,
-            "5700": 3.88996,
-            "5705": 3.88995,
-            "5710": 3.87532,
-            "5715": 3.99623,
-            "5720": 3.94505,
-            "5725": 3.89527,
-            "5730": 3.94792,
-            "5735": 3.92817,
-            "5740": 3.92171,
-            "5745": 3.89897,
-            "5750": 3.92176,
-            "5755": 3.94672,
-            "5760": 3.92632,
-            "5765": 3.92024,
-            "5770": 3.95286,
-            "5775": 3.86965,
-            "5780": 3.91041,
-            "5785": 3.91605,
-            "5790": 3.9236,
-            "5795": 3.93068,
-            "5800": 3.86954,
-            "5805": 3.8764,
-            "5810": 3.92692,
-            "5815": 3.89083,
-            "5820": 3.84021,
-            "5825": 3.89285,
-            "5830": 3.85163,
-            "5835": 3.88292,
-            "5840": 3.89361,
-            "5845": 3.91293,
-            "5850": 3.90508,
-            "5855": 3.84956,
-            "5860": 3.87018,
-            "5865": 3.8979,
-            "5870": 3.85816,
-            "5875": 3.89604,
-            "5880": 3.88075,
-            "5885": 3.89965,
-            "5890": 3.90395,
-            "5895": 3.92339,
-            "5900": 3.85618,
-            "5905": 3.92033,
-            "5910": 3.88782,
-            "5915": 3.85158,
-            "5920": 3.88999,
-            "5925": 3.82174,
-            "5930": 3.88478,
-            "5935": 3.86887,
-            "5940": 3.89924,
-            "5945": 3.90324,
-            "5950": 3.88472,
-            "5955": 3.83758,
-            "5960": 3.91077,
-            "5965": 3.85295,
-            "5970": 3.90592,
-            "5975": 3.87131,
-            "5980": 3.94635,
-            "5985": 3.81828,
-            "5990": 3.91445,
-            "5995": 3.82666,
-            "6000": 3.86389,
-            "6005": 3.82737,
-            "6010": 3.84638,
-            "6015": 3.82528,
-            "6020": 3.84213,
-            "6025": 3.8812,
-            "6030": 3.82864,
-            "6035": 3.87549,
-            "6040": 3.85371,
-            "6045": 3.88892,
-            "6050": 3.86125,
-            "6055": 3.84398,
-            "6060": 3.86538,
-            "6065": 3.8955,
-            "6070": 3.844,
-            "6075": 3.79156,
-            "6080": 3.86497,
-            "6085": 3.82767,
-            "6090": 3.86054,
-            "6095": 3.85995,
-            "6100": 3.82399,
-            "6105": 3.87238,
-            "6110": 3.80525,
-            "6115": 3.87931,
-            "6120": 3.85374,
-            "6125": 3.85469,
-            "6130": 3.85122,
-            "6135": 3.82709,
-            "6140": 3.8225,
-            "6145": 3.81264,
-            "6150": 3.85853,
-            "6155": 3.83605,
-            "6160": 3.80232,
-            "6165": 3.82292,
-            "6170": 3.81513,
-            "6175": 3.80691,
-            "6180": 3.8071,
-            "6185": 3.84448,
-            "6190": 3.81178,
-            "6195": 3.78014,
-            "6200": 3.80543,
-            "6205": 3.81219,
-            "6210": 3.77002,
-            "6215": 3.82559,
-            "6220": 3.822,
-            "6225": 3.82598,
-            "6230": 3.76955,
-            "6235": 3.8072,
-            "6240": 3.73374,
-            "6245": 3.84624,
-            "6250": 3.80845,
-            "6255": 3.8223,
-            "6260": 3.7948,
-            "6265": 3.82819,
-            "6270": 3.75673,
-            "6275": 3.78492,
-            "6280": 3.80313,
-            "6285": 3.78154,
-            "6290": 3.79976,
-            "6295": 3.80168,
-            "6300": 3.80756,
-            "6305": 3.88253,
-            "6310": 3.7702,
-            "6315": 3.7633,
-            "6320": 3.81817,
-            "6325": 3.75526,
-            "6330": 3.82862,
-            "6335": 3.81943,
-            "6340": 3.76721,
-            "6345": 3.82391,
-            "6350": 3.76718,
-            "6355": 3.77414,
-            "6360": 3.75111,
-            "6365": 3.80986,
-            "6370": 3.81014,
-            "6375": 3.78548,
-            "6380": 3.8065,
-            "6385": 3.82336,
-            "6390": 3.78289,
-            "6395": 3.75935,
-            "6400": 3.76038,
-            "6405": 3.83749,
-            "6410": 3.83127,
-            "6415": 3.7623,
-            "6420": 3.82306,
-            "6425": 3.83219,
-            "6430": 3.81048,
-            "6435": 3.77764,
-            "6440": 3.76108,
-            "6445": 3.80173,
-            "6450": 3.73884,
-            "6455": 3.75156,
-            "6460": 3.77352,
-            "6465": 3.80905,
-            "6470": 3.78701,
-            "6475": 3.78176,
-            "6480": 3.81548,
-            "6485": 3.76414,
-            "6490": 3.71291,
-            "6495": 3.81407,
-            "6500": 3.79809,
-            "6505": 3.72741,
-            "6510": 3.7976,
-            "6515": 3.81938,
-            "6520": 3.73166,
-            "6525": 3.80464,
-            "6530": 3.76853,
-            "6535": 3.76159,
-            "6540": 3.82675,
-            "6545": 3.76261,
-            "6550": 3.76963,
-            "6555": 3.75505,
-            "6560": 3.71108,
-            "6565": 3.70887,
-            "6570": 3.7465,
-            "6575": 3.69338,
-            "6580": 3.81517,
-            "6585": 3.76239,
-            "6590": 3.72546,
-            "6595": 3.74461,
-            "6600": 3.73687,
-            "6605": 3.71668,
-            "6610": 3.72679,
-            "6615": 3.76079,
-            "6620": 3.70966,
-            "6625": 3.72313,
-            "6630": 3.72114,
-            "6635": 3.76232,
-            "6640": 3.73374,
-            "6645": 3.75061,
-            "6650": 3.77922,
-            "6655": 3.70627,
-            "6660": 3.73531,
-            "6665": 3.7573,
-            "6670": 3.71979,
-            "6675": 3.74124,
-            "6680": 3.73477,
-            "6685": 3.76436,
-            "6690": 3.74256,
-            "6695": 3.75545,
-            "6700": 3.74559,
-            "6705": 3.72882,
-            "6710": 3.72913,
-            "6715": 3.69291,
-            "6720": 3.77736,
-            "6725": 3.75737,
-            "6730": 3.73993,
-            "6735": 3.74082,
-            "6740": 3.73806,
-            "6745": 3.72041,
-            "6750": 3.74412,
-            "6755": 3.69337,
-            "6760": 3.68122,
-            "6765": 3.74232,
-            "6770": 3.69625,
-            "6775": 3.74604,
-            "6780": 3.70485,
-            "6785": 3.70942,
-            "6790": 3.73683,
-            "6795": 3.69846,
-            "6800": 3.71752,
-            "6805": 3.72172,
-            "6810": 3.73628,
-            "6815": 3.65876,
-            "6820": 3.70229,
-            "6825": 3.72745,
-            "6830": 3.70872,
-            "6835": 3.68623,
-            "6840": 3.67517,
-            "6845": 3.74818,
-            "6850": 3.70405,
-            "6855": 3.73713,
-            "6860": 3.6695,
-            "6865": 3.73585,
-            "6870": 3.6953,
-            "6875": 3.69781,
-            "6880": 3.70324,
-            "6885": 3.67727,
-            "6890": 3.69236,
-            "6895": 3.67848,
-            "6900": 3.68133,
-            "6905": 3.68771,
-            "6910": 3.72919,
-            "6915": 3.73359,
-            "6920": 3.68934,
-            "6925": 3.69022,
-            "6930": 3.68858,
-            "6935": 3.62056,
-            "6940": 3.68927,
-            "6945": 3.67777,
-            "6950": 3.68038,
-            "6955": 3.6771,
-            "6960": 3.68108,
-            "6965": 3.72225,
-            "6970": 3.64603,
-            "6975": 3.72781,
-            "6980": 3.68459,
-            "6985": 3.68985,
-            "6990": 3.7316,
-            "6995": 3.70495,
-            "7000": 3.63993,
-            "7005": 3.71744,
-            "7010": 3.69223,
-            "7015": 3.67561,
-            "7020": 3.72152,
-            "7025": 3.70969,
-            "7030": 3.70236,
-            "7035": 3.65723,
-            "7040": 3.61488,
-            "7045": 3.69518,
-            "7050": 3.71947,
-            "7055": 3.64991,
-            "7060": 3.69149,
-            "7065": 3.74261,
-            "7070": 3.67108,
-            "7075": 3.67419,
-            "7080": 3.71683,
-            "7085": 3.64191,
-            "7090": 3.66318,
-            "7095": 3.63818,
-            "7100": 3.68341,
-            "7105": 3.62024,
-            "7110": 3.68873,
-            "7115": 3.63797,
-            "7120": 3.68741,
-            "7125": 3.63499,
-            "7130": 3.65311,
-            "7135": 3.66196,
-            "7140": 3.66504,
-            "7145": 3.68183,
-            "7150": 3.62677,
-            "7155": 3.69052,
-            "7160": 3.62415,
-            "7165": 3.64241,
-            "7170": 3.68231,
-            "7175": 3.64603,
-            "7180": 3.67571,
-            "7185": 3.70721,
-            "7190": 3.663,
-            "7195": 3.66862,
-            "7200": 3.67265,
-            "7205": 3.65833,
-            "7210": 3.68834,
-            "7215": 3.67282,
-            "7220": 3.69117,
-            "7225": 3.66107,
-            "7230": 3.68593,
-            "7235": 3.64823,
-            "7240": 3.64663,
-            "7245": 3.66574,
-            "7250": 3.60447,
-            "7255": 3.62598,
-            "7260": 3.68023,
-            "7265": 3.60288,
-            "7270": 3.63936,
-            "7275": 3.64805,
-            "7280": 3.62623,
-            "7285": 3.65053,
-            "7290": 3.6735,
-            "7295": 3.66357,
-            "7300": 3.62393,
-            "7305": 3.62784,
-            "7310": 3.66312,
-            "7315": 3.67632,
-            "7320": 3.65015,
-            "7325": 3.65453,
-            "7330": 3.62344,
-            "7335": 3.62574,
-            "7340": 3.64422,
-            "7345": 3.60533,
-            "7350": 3.65727,
-            "7355": 3.64352,
-            "7360": 3.61779,
-            "7365": 3.63578,
-            "7370": 3.6188,
-            "7375": 3.59366,
-            "7380": 3.64743,
-            "7385": 3.67218,
-            "7390": 3.65876,
-            "7395": 3.60688,
-            "7400": 3.65695,
-            "7405": 3.64945,
-            "7410": 3.66151,
-            "7415": 3.64439,
-            "7420": 3.63591,
-            "7425": 3.6844,
-            "7430": 3.63181,
-            "7435": 3.61154,
-            "7440": 3.62564,
-            "7445": 3.60843,
-            "7450": 3.57301,
-            "7455": 3.64772,
-            "7460": 3.63452,
-            "7465": 3.63169,
-            "7470": 3.63744,
-            "7475": 3.64264,
-            "7480": 3.61171,
-            "7485": 3.57567,
-            "7490": 3.57599,
-            "7495": 3.5863,
-            "7500": 3.61565,
-            "7505": 3.59614,
-            "7510": 3.55707,
-            "7515": 3.61683,
-            "7520": 3.60991,
-            "7525": 3.56658,
-            "7530": 3.61196,
-            "7535": 3.62507,
-            "7540": 3.61046,
-            "7545": 3.64639,
-            "7550": 3.65882,
-            "7555": 3.58595,
-            "7560": 3.60212,
-            "7565": 3.59782,
-            "7570": 3.60603,
-            "7575": 3.57351,
-            "7580": 3.62111,
-            "7585": 3.60137,
-            "7590": 3.6026,
-            "7595": 3.66318,
-            "7600": 3.6076,
-            "7605": 3.59626,
-            "7610": 3.58483,
-            "7615": 3.58478,
-            "7620": 3.56787,
-            "7625": 3.62193,
-            "7630": 3.60469,
-            "7635": 3.5928,
-            "7640": 3.59019,
-            "7645": 3.62279,
-            "7650": 3.6259,
-            "7655": 3.66371,
-            "7660": 3.5305,
-            "7665": 3.60545,
-            "7670": 3.59796,
-            "7675": 3.58201,
-            "7680": 3.57701,
-            "7685": 3.64556,
-            "7690": 3.59102,
-            "7695": 3.57063,
-            "7700": 3.63352,
-            "7705": 3.58816,
-            "7710": 3.62048,
-            "7715": 3.5764,
-            "7720": 3.65561,
-            "7725": 3.55706,
-            "7730": 3.57614,
-            "7735": 3.61006,
-            "7740": 3.58168,
-            "7745": 3.58454,
-            "7750": 3.57422,
-            "7755": 3.59202,
-            "7760": 3.56089,
-            "7765": 3.58551,
-            "7770": 3.60104,
-            "7775": 3.57103,
-            "7780": 3.55457,
-            "7785": 3.57713,
-            "7790": 3.57042,
-            "7795": 3.58792,
-            "7800": 3.57997,
-            "7805": 3.58361,
-            "7810": 3.60683,
-            "7815": 3.57773,
-            "7820": 3.57578,
-            "7825": 3.61835,
-            "7830": 3.59192,
-            "7835": 3.52632,
-            "7840": 3.6194,
-            "7845": 3.55538,
-            "7850": 3.51354,
-            "7855": 3.56599,
-            "7860": 3.54645,
-            "7865": 3.60369,
-            "7870": 3.54114,
-            "7875": 3.55695,
-            "7880": 3.572,
-            "7885": 3.56229,
-            "7890": 3.60585,
-            "7895": 3.59334,
-            "7900": 3.60641,
-            "7905": 3.56339,
-            "7910": 3.58203,
-            "7915": 3.58298,
-            "7920": 3.59012,
-            "7925": 3.5681,
-            "7930": 3.59927,
-            "7935": 3.56169,
-            "7940": 3.60948,
-            "7945": 3.62723,
-            "7950": 3.53708,
-            "7955": 3.54481,
-            "7960": 3.53124,
-            "7965": 3.51862,
-            "7970": 3.52486,
-            "7975": 3.55975,
-            "7980": 3.56722,
-            "7985": 3.54114,
-            "7990": 3.54399,
-            "7995": 3.5186,
-            "8000": 3.57756,
-            "8005": 3.54643,
-            "8010": 3.53705,
-            "8015": 3.53445,
-            "8020": 3.53111,
-            "8025": 3.51514,
-            "8030": 3.54148,
-            "8035": 3.53478,
-            "8040": 3.52163,
-            "8045": 3.57586,
-            "8050": 3.57789,
-            "8055": 3.54866,
-            "8060": 3.5712,
-            "8065": 3.54757,
-            "8070": 3.53654,
-            "8075": 3.52629,
-            "8080": 3.57467,
-            "8085": 3.52928,
-            "8090": 3.53424,
-            "8095": 3.56313,
-            "8100": 3.51543,
-            "8105": 3.54752,
-            "8110": 3.5453,
-            "8115": 3.51645,
-            "8120": 3.52703,
-            "8125": 3.56437,
-            "8130": 3.52567,
-            "8135": 3.53994,
-            "8140": 3.52104,
-            "8145": 3.50389,
-            "8150": 3.52394,
-            "8155": 3.51178,
-            "8160": 3.56129,
-            "8165": 3.54328,
-            "8170": 3.5116,
-            "8175": 3.5057,
-            "8180": 3.57245,
-            "8185": 3.54733,
-            "8190": 3.58207,
-            "8195": 3.55001,
-            "8200": 3.52156,
-            "8205": 3.52888,
-            "8210": 3.53558,
-            "8215": 3.55713,
-            "8220": 3.5201,
-            "8225": 3.51201,
-            "8230": 3.53756,
-            "8235": 3.55814,
-            "8240": 3.54052,
-            "8245": 3.53652,
-            "8250": 3.5692,
-            "8255": 3.51844,
-            "8260": 3.52912,
-            "8265": 3.52072,
-            "8270": 3.52843,
-            "8275": 3.51526,
-            "8280": 3.50321,
-            "8285": 3.52669,
-            "8290": 3.5272,
-            "8295": 3.49645,
-            "8300": 3.51721,
-            "8305": 3.53958,
-            "8310": 3.5351,
-            "8315": 3.50396,
-            "8320": 3.53046,
-            "8325": 3.47885,
-            "8330": 3.44388,
-            "8335": 3.51457,
-            "8340": 3.54076,
-            "8345": 3.49873,
-            "8350": 3.51134,
-            "8355": 3.54342,
-            "8360": 3.51607,
-            "8365": 3.53716,
-            "8370": 3.53127,
-            "8375": 3.48696,
-            "8380": 3.4848,
-            "8385": 3.52879,
-            "8390": 3.49474,
-            "8395": 3.52721,
-            "8400": 3.49636,
-            "8405": 3.51685,
-            "8410": 3.57651,
-            "8415": 3.48228,
-            "8420": 3.45216,
-            "8425": 3.53401,
-            "8430": 3.53787,
-            "8435": 3.47534,
-            "8440": 3.55163,
-            "8445": 3.53658,
-            "8450": 3.50995,
-            "8455": 3.52875,
-            "8460": 3.53463,
-            "8465": 3.4708,
-            "8470": 3.4929,
-            "8475": 3.55004,
-            "8480": 3.47555,
-            "8485": 3.49487,
-            "8490": 3.48489,
-            "8495": 3.48023,
-            "8500": 3.52888,
-            "8505": 3.46749,
-            "8510": 3.54064,
-            "8515": 3.48982,
-            "8520": 3.49184,
-            "8525": 3.42254,
-            "8530": 3.50181,
-            "8535": 3.52351,
-            "8540": 3.47484,
-            "8545": 3.49944,
-            "8550": 3.46881,
-            "8555": 3.53517,
-            "8560": 3.5346,
-            "8565": 3.48792,
-            "8570": 3.48883,
-            "8575": 3.46414,
-            "8580": 3.50837,
-            "8585": 3.52994,
-            "8590": 3.51956,
-            "8595": 3.52409,
-            "8600": 3.50319,
-            "8605": 3.49079,
-            "8610": 3.49584,
-            "8615": 3.49483,
-            "8620": 3.46525,
-            "8625": 3.4875,
-            "8630": 3.49269,
-            "8635": 3.47742,
-            "8640": 3.46288,
-            "8645": 3.52844,
-            "8650": 3.45936,
-            "8655": 3.50294,
-            "8660": 3.51093,
-            "8665": 3.48996,
-            "8670": 3.50547,
-            "8675": 3.47414,
-            "8680": 3.4685,
-            "8685": 3.48029,
-            "8690": 3.51264,
-            "8695": 3.51367,
-            "8700": 3.48324,
-            "8705": 3.45351,
-            "8710": 3.50031,
-            "8715": 3.45042,
-            "8720": 3.52876,
-            "8725": 3.48819,
-            "8730": 3.47981,
-            "8735": 3.51018,
-            "8740": 3.46013,
-            "8745": 3.50108,
-            "8750": 3.50543,
-            "8755": 3.46564,
-            "8760": 3.48373,
-            "8765": 3.43955,
-            "8770": 3.50951,
-            "8775": 3.47313,
-            "8780": 3.45782,
-            "8785": 3.47628,
-            "8790": 3.4608,
-            "8795": 3.49675,
-            "8800": 3.46402,
-            "8805": 3.43267,
-            "8810": 3.45044,
-            "8815": 3.47281,
-            "8820": 3.43586,
-            "8825": 3.46906,
-            "8830": 3.44494,
-            "8835": 3.42402,
-            "8840": 3.4361,
-            "8845": 3.45772,
-            "8850": 3.48143,
-            "8855": 3.46505,
-            "8860": 3.53187,
-            "8865": 3.46882,
-            "8870": 3.44869,
-            "8875": 3.45286,
-            "8880": 3.45584,
-            "8885": 3.44986,
-            "8890": 3.47298,
-            "8895": 3.45131,
-            "8900": 3.47879,
-            "8905": 3.46796,
-            "8910": 3.45421,
-            "8915": 3.44293,
-            "8920": 3.43345,
-            "8925": 3.50917,
-            "8930": 3.49052,
-            "8935": 3.50073,
-            "8940": 3.47584,
-            "8945": 3.47848,
-            "8950": 3.45717,
-            "8955": 3.44615,
-            "8960": 3.43965,
-            "8965": 3.45818,
-            "8970": 3.47179,
-            "8975": 3.42177,
-            "8980": 3.42266,
-            "8985": 3.44671,
-            "8990": 3.50075,
-            "8995": 3.47255,
-            "9000": 3.41954,
-            "9005": 3.46563,
-            "9010": 3.51573,
-            "9015": 3.4185,
-            "9020": 3.43896,
-            "9025": 3.44768,
-            "9030": 3.4718,
-            "9035": 3.37943,
-            "9040": 3.45501,
-            "9045": 3.45466,
-            "9050": 3.49179,
-            "9055": 3.40312,
-            "9060": 3.49477,
-            "9065": 3.51349,
-            "9070": 3.44713,
-            "9075": 3.47746,
-            "9080": 3.47127,
-            "9085": 3.47459,
-            "9090": 3.46668,
-            "9095": 3.42167,
-            "9100": 3.4227,
-            "9105": 3.41261,
-            "9110": 3.45663,
-            "9115": 3.46481,
-            "9120": 3.51949,
-            "9125": 3.44245,
-            "9130": 3.43654,
-            "9135": 3.46008,
-            "9140": 3.47929,
-            "9145": 3.42408,
-            "9150": 3.44307,
-            "9155": 3.45089,
-            "9160": 3.44998,
-            "9165": 3.45651,
-            "9170": 3.47508,
-            "9175": 3.41133,
-            "9180": 3.45323,
-            "9185": 3.41086,
-            "9190": 3.46875,
-            "9195": 3.43315,
-            "9200": 3.44758,
-            "9205": 3.42373,
-            "9210": 3.45572,
-            "9215": 3.39585,
-            "9220": 3.42327,
-            "9225": 3.44665,
-            "9230": 3.37357,
-            "9235": 3.39456,
-            "9240": 3.42282,
-            "9245": 3.40683,
-            "9250": 3.40791,
-            "9255": 3.42077,
-            "9260": 3.39755,
-            "9265": 3.44216,
-            "9270": 3.40754,
-            "9275": 3.42864,
-            "9280": 3.44334,
-            "9285": 3.44087,
-            "9290": 3.45563,
-            "9295": 3.44456,
-            "9300": 3.39522,
-            "9305": 3.42638,
-            "9310": 3.41593,
-            "9315": 3.38278,
-            "9320": 3.3797,
-            "9325": 3.42046,
-            "9330": 3.47853,
-            "9335": 3.38962,
-            "9340": 3.4706,
-            "9345": 3.46224,
-            "9350": 3.42735,
-            "9355": 3.39326,
-            "9360": 3.4165,
-            "9365": 3.41212,
-            "9370": 3.46155,
-            "9375": 3.42622,
-            "9380": 3.36413,
-            "9385": 3.43469,
-            "9390": 3.44403,
-            "9395": 3.45465,
-            "9400": 3.41582,
-            "9405": 3.40031,
-            "9410": 3.43744,
-            "9415": 3.42574,
-            "9420": 3.40295,
-            "9425": 3.42063,
-            "9430": 3.3935,
-            "9435": 3.41529,
-            "9440": 3.40125,
-            "9445": 3.39961,
-            "9450": 3.39469,
-            "9455": 3.4008,
-            "9460": 3.46489,
-            "9465": 3.46303,
-            "9470": 3.40478,
-            "9475": 3.45335,
-            "9480": 3.40789,
-            "9485": 3.3998,
-            "9490": 3.41154,
-            "9495": 3.44387,
-            "9500": 3.40535,
-            "9505": 3.37735,
-            "9510": 3.41645,
-            "9515": 3.41113,
-            "9520": 3.43045,
-            "9525": 3.40102,
-            "9530": 3.40027,
-            "9535": 3.42216
-        }
-    },
-    "iteration-time": {
-        "start_step": 1,
-        "end_step": 9535,
-        "step_interval": 5,
-        "values": {
-            "1": 241.22832,
-            "5": 11.6467,
-            "10": 11.59177,
-            "15": 11.54982,
-            "20": 11.50554,
-            "25": 11.48401,
-            "30": 11.47019,
-            "35": 11.4638,
-            "40": 11.44621,
-            "45": 11.45505,
-            "50": 11.48551,
-            "55": 11.47505,
-            "60": 11.46559,
-            "65": 11.69276,
-            "70": 11.51491,
-            "75": 11.58841,
-            "80": 11.59402,
-            "85": 11.55505,
-            "90": 11.57827,
-            "95": 11.6084,
-            "100": 11.72328,
-            "105": 11.84735,
-            "110": 11.81445,
-            "115": 12.01469,
-            "120": 12.27052,
-            "125": 12.40894,
-            "130": 12.32306,
-            "135": 12.6537,
-            "140": 12.87941,
-            "145": 12.87274,
-            "150": 13.17646,
-            "155": 13.42132,
-            "160": 13.29203,
-            "165": 13.33468,
-            "170": 13.38365,
-            "175": 13.29143,
-            "180": 13.37704,
-            "185": 13.17491,
-            "190": 13.2207,
-            "195": 13.0407,
-            "200": 13.03378,
-            "205": 12.93499,
-            "210": 12.93302,
-            "215": 12.83429,
-            "220": 12.77504,
-            "225": 12.71437,
-            "230": 12.67462,
-            "235": 12.7241,
-            "240": 12.78341,
-            "245": 12.61372,
-            "250": 12.60968,
-            "255": 12.49502,
-            "260": 12.38655,
-            "265": 12.35372,
-            "270": 12.32939,
-            "275": 12.25213,
-            "280": 12.23412,
-            "285": 12.25047,
-            "290": 12.1386,
-            "295": 12.11066,
-            "300": 12.11487,
-            "305": 12.08746,
-            "310": 12.06842,
-            "315": 12.13334,
-            "320": 12.12044,
-            "325": 12.01351,
-            "330": 11.97276,
-            "335": 11.951,
-            "340": 11.97582,
-            "345": 11.94178,
-            "350": 11.90942,
-            "355": 11.9474,
-            "360": 11.94231,
-            "365": 11.91539,
-            "370": 11.89051,
-            "375": 11.87871,
-            "380": 11.8539,
-            "385": 11.81422,
-            "390": 11.82072,
-            "395": 11.85516,
-            "400": 11.8322,
-            "405": 11.81286,
-            "410": 11.81008,
-            "415": 11.76854,
-            "420": 11.7721,
-            "425": 11.7287,
-            "430": 11.80281,
-            "435": 11.76948,
-            "440": 11.78237,
-            "445": 11.81223,
-            "450": 11.76024,
-            "455": 11.83905,
-            "460": 11.86797,
-            "465": 11.88193,
-            "470": 11.94544,
-            "475": 12.03403,
-            "480": 11.8718,
-            "485": 11.96463,
-            "490": 11.9543,
-            "495": 11.99738,
-            "500": 12.06608,
-            "505": 12.04813,
-            "510": 12.09706,
-            "515": 12.14335,
-            "520": 12.36581,
-            "525": 12.19115,
-            "530": 12.1887,
-            "535": 12.25354,
-            "540": 12.27902,
-            "545": 12.32347,
-            "550": 12.44366,
-            "555": 12.25807,
-            "560": 12.22369,
-            "565": 12.28956,
-            "570": 12.31572,
-            "575": 12.28835,
-            "580": 12.33571,
-            "585": 12.26567,
-            "590": 12.30079,
-            "595": 12.29151,
-            "600": 12.30023,
-            "605": 12.45501,
-            "610": 12.27373,
-            "615": 12.217,
-            "620": 12.22334,
-            "625": 12.21274,
-            "630": 12.21904,
-            "635": 12.20277,
-            "640": 12.25538,
-            "645": 12.19988,
-            "650": 12.14026,
-            "655": 12.14302,
-            "660": 12.14678,
-            "665": 12.13972,
-            "670": 12.11485,
-            "675": 12.0282,
-            "680": 12.01901,
-            "685": 11.98462,
-            "690": 11.98742,
-            "695": 11.95917,
-            "700": 11.92521,
-            "705": 18.38779,
-            "710": 11.92438,
-            "715": 11.8274,
-            "720": 11.90138,
-            "725": 11.84998,
-            "730": 11.83009,
-            "735": 11.89248,
-            "740": 11.82364,
-            "745": 11.91839,
-            "750": 11.9577,
-            "755": 11.85056,
-            "760": 11.90523,
-            "765": 11.9116,
-            "770": 11.83717,
-            "775": 12.05864,
-            "780": 11.84895,
-            "785": 11.84375,
-            "790": 11.86493,
-            "795": 11.85763,
-            "800": 11.94365,
-            "805": 11.86899,
-            "810": 11.86748,
-            "815": 11.86393,
-            "820": 11.87992,
-            "825": 11.85259,
-            "830": 11.86886,
-            "835": 11.8517,
-            "840": 11.86254,
-            "845": 11.89508,
-            "850": 11.85613,
-            "855": 11.87434,
-            "860": 11.90703,
-            "865": 11.83224,
-            "870": 11.88246,
-            "875": 11.9305,
-            "880": 11.96022,
-            "885": 11.81651,
-            "890": 12.06642,
-            "895": 11.92653,
-            "900": 11.86469,
-            "905": 12.01767,
-            "910": 11.89635,
-            "915": 11.8254,
-            "920": 11.86106,
-            "925": 11.88434,
-            "930": 11.97059,
-            "935": 12.03718,
-            "940": 11.87698,
-            "945": 11.88008,
-            "950": 12.02071,
-            "955": 11.84843,
-            "960": 244.37245,
-            "965": 12.32084,
-            "970": 11.86341,
-            "975": 12.01988,
-            "980": 11.92166,
-            "985": 11.85411,
-            "990": 11.87753,
-            "995": 11.84786,
-            "1000": 11.89892,
-            "1005": 11.99759,
-            "1010": 11.91045,
-            "1015": 11.87038,
-            "1020": 11.85674,
-            "1025": 11.85567,
-            "1030": 11.86674,
-            "1035": 11.92499,
-            "1040": 11.85969,
-            "1045": 12.04929,
-            "1050": 11.82341,
-            "1055": 11.83111,
-            "1060": 11.87567,
-            "1065": 11.84584,
-            "1070": 11.93603,
-            "1075": 11.87121,
-            "1080": 11.85935,
-            "1085": 11.88667,
-            "1090": 11.86058,
-            "1095": 11.86482,
-            "1100": 11.82375,
-            "1105": 11.86482,
-            "1110": 11.89668,
-            "1115": 11.94941,
-            "1120": 11.84941,
-            "1125": 11.94466,
-            "1130": 11.90846,
-            "1135": 11.8602,
-            "1140": 11.86926,
-            "1145": 11.90365,
-            "1150": 11.88788,
-            "1155": 11.81781,
-            "1160": 11.88464,
-            "1165": 11.85344,
-            "1170": 11.8865,
-            "1175": 11.93361,
-            "1180": 11.89647,
-            "1185": 11.9031,
-            "1190": 11.89287,
-            "1195": 11.88683,
-            "1200": 11.85927,
-            "1205": 11.92471,
-            "1210": 11.85592,
-            "1215": 17.4276,
-            "1220": 11.87359,
-            "1225": 11.9296,
-            "1230": 11.95025,
-            "1235": 11.90738,
-            "1240": 11.86621,
-            "1245": 11.98001,
-            "1250": 12.003,
-            "1255": 11.91396,
-            "1260": 11.92279,
-            "1265": 11.85195,
-            "1270": 11.87463,
-            "1275": 11.90307,
-            "1280": 11.84637,
-            "1285": 11.95883,
-            "1290": 11.88039,
-            "1295": 11.8399,
-            "1300": 11.81976,
-            "1305": 11.89766,
-            "1310": 11.91584,
-            "1315": 12.12571,
-            "1320": 12.05556,
-            "1325": 11.84679,
-            "1330": 11.94985,
-            "1335": 11.94039,
-            "1340": 12.00572,
-            "1345": 11.98268,
-            "1350": 12.15927,
-            "1355": 12.04312,
-            "1360": 11.98816,
-            "1365": 11.95737,
-            "1370": 11.92395,
-            "1375": 11.89595,
-            "1380": 11.88635,
-            "1385": 11.96617,
-            "1390": 11.87421,
-            "1395": 12.02833,
-            "1400": 11.87415,
-            "1405": 11.85875,
-            "1410": 11.85419,
-            "1415": 11.8978,
-            "1420": 11.86309,
-            "1425": 11.87505,
-            "1430": 12.10339,
-            "1435": 11.88151,
-            "1440": 12.15068,
-            "1445": 11.98493,
-            "1450": 11.95438,
-            "1455": 12.03808,
-            "1460": 11.85293,
-            "1465": 11.93176,
-            "1470": 11.92246,
-            "1475": 11.90448,
-            "1480": 11.98959,
-            "1485": 11.93685,
-            "1490": 11.92389,
-            "1495": 11.95047,
-            "1500": 11.94526,
-            "1505": 11.9086,
-            "1510": 11.95225,
-            "1515": 11.87405,
-            "1520": 11.87975,
-            "1525": 11.88264,
-            "1530": 12.04989,
-            "1535": 12.02942,
-            "1540": 11.93089,
-            "1545": 11.89376,
-            "1550": 11.88596,
-            "1555": 11.95001,
-            "1560": 11.90239,
-            "1565": 11.89699,
-            "1570": 11.91441,
-            "1575": 11.87813,
-            "1580": 11.86939,
-            "1585": 11.8566,
-            "1590": 11.8665,
-            "1595": 11.90861,
-            "1600": 11.90425,
-            "1605": 11.82248,
-            "1610": 11.86531,
-            "1615": 11.8796,
-            "1620": 11.87587,
-            "1625": 11.88944,
-            "1630": 11.88839,
-            "1635": 11.8307,
-            "1640": 11.87082,
-            "1645": 11.84687,
-            "1650": 11.87887,
-            "1655": 11.85709,
-            "1660": 11.85167,
-            "1665": 11.90284,
-            "1670": 11.85205,
-            "1675": 12.00742,
-            "1680": 11.90754,
-            "1685": 11.97458,
-            "1690": 11.97016,
-            "1695": 11.9189,
-            "1700": 11.89709,
-            "1705": 11.88042,
-            "1710": 11.87879,
-            "1715": 12.06779,
-            "1720": 11.98631,
-            "1725": 12.01044,
-            "1730": 11.9924,
-            "1735": 11.87648,
-            "1740": 11.87455,
-            "1745": 11.93461,
-            "1750": 11.90235,
-            "1755": 11.97053,
-            "1760": 11.89545,
-            "1765": 11.8564,
-            "1770": 11.92635,
-            "1775": 11.91815,
-            "1780": 11.91235,
-            "1785": 11.85546,
-            "1790": 11.93087,
-            "1795": 11.91138,
-            "1800": 11.95901,
-            "1805": 12.0529,
-            "1810": 11.98858,
-            "1815": 12.13997,
-            "1820": 11.94798,
-            "1825": 11.97682,
-            "1830": 11.91244,
-            "1835": 11.94888,
-            "1840": 11.93666,
-            "1845": 11.87312,
-            "1850": 11.86327,
-            "1855": 11.94769,
-            "1860": 12.00187,
-            "1865": 12.06916,
-            "1870": 11.99528,
-            "1875": 11.89416,
-            "1880": 12.02292,
-            "1885": 12.04249,
-            "1890": 11.94094,
-            "1895": 11.93619,
-            "1900": 11.95301,
-            "1905": 11.85793,
-            "1910": 11.96264,
-            "1915": 11.92826,
-            "1920": 11.94216,
-            "1925": 12.01307,
-            "1930": 11.98891,
-            "1935": 11.95834,
-            "1940": 11.92143,
-            "1945": 11.98459,
-            "1950": 16.97099,
-            "1955": 11.89147,
-            "1960": 11.94643,
-            "1965": 11.92486,
-            "1970": 11.91542,
-            "1975": 13.09741,
-            "1980": 12.02148,
-            "1985": 11.92812,
-            "1990": 12.01102,
-            "1995": 11.94891,
-            "2000": 12.06741,
-            "2005": 11.94166,
-            "2010": 11.95871,
-            "2015": 12.00042,
-            "2020": 11.99101,
-            "2025": 11.95463,
-            "2030": 12.36755,
-            "2035": 11.96199,
-            "2040": 11.97863,
-            "2045": 12.01033,
-            "2050": 12.0643,
-            "2055": 11.96928,
-            "2060": 11.98383,
-            "2065": 11.92648,
-            "2070": 11.92379,
-            "2075": 11.97669,
-            "2080": 11.95508,
-            "2085": 11.94472,
-            "2090": 11.9663,
-            "2095": 11.93695,
-            "2100": 11.97178,
-            "2105": 11.98764,
-            "2110": 11.9516,
-            "2115": 11.9215,
-            "2120": 11.95207,
-            "2125": 11.95947,
-            "2130": 11.96722,
-            "2135": 11.97924,
-            "2140": 11.88777,
-            "2145": 11.95546,
-            "2150": 11.90266,
-            "2155": 11.97573,
-            "2160": 11.93275,
-            "2165": 11.98593,
-            "2170": 11.9842,
-            "2175": 12.00145,
-            "2180": 11.99219,
-            "2185": 11.96424,
-            "2190": 11.94313,
-            "2195": 11.93489,
-            "2200": 11.94356,
-            "2205": 12.00157,
-            "2210": 11.97153,
-            "2215": 11.9563,
-            "2220": 12.14117,
-            "2225": 11.97066,
-            "2230": 12.00037,
-            "2235": 11.95279,
-            "2240": 11.9544,
-            "2245": 11.97031,
-            "2250": 11.92229,
-            "2255": 11.98097,
-            "2260": 11.96529,
-            "2265": 11.98619,
-            "2270": 12.02117,
-            "2275": 11.94865,
-            "2280": 12.02569,
-            "2285": 11.98203,
-            "2290": 12.10479,
-            "2295": 11.95346,
-            "2300": 11.99961,
-            "2305": 11.96025,
-            "2310": 11.98746,
-            "2315": 11.95209,
-            "2320": 12.02644,
-            "2325": 11.95369,
-            "2330": 11.91985,
-            "2335": 11.93244,
-            "2340": 11.97061,
-            "2345": 11.90115,
-            "2350": 11.99136,
-            "2355": 12.0541,
-            "2360": 12.03728,
-            "2365": 11.95319,
-            "2370": 11.8917,
-            "2375": 11.94629,
-            "2380": 11.9087,
-            "2385": 11.91696,
-            "2390": 11.90123,
-            "2395": 11.87998,
-            "2400": 12.02954,
-            "2405": 11.97917,
-            "2410": 11.98456,
-            "2415": 11.9575,
-            "2420": 11.95917,
-            "2425": 11.95788,
-            "2430": 11.99944,
-            "2435": 12.00043,
-            "2440": 11.91339,
-            "2445": 11.97889,
-            "2450": 11.93997,
-            "2455": 11.91834,
-            "2460": 11.98321,
-            "2465": 11.94509,
-            "2470": 11.93387,
-            "2475": 11.9562,
-            "2480": 11.93148,
-            "2485": 11.94432,
-            "2490": 11.95477,
-            "2495": 11.94334,
-            "2500": 11.9284,
-            "2505": 11.93757,
-            "2510": 11.92289,
-            "2515": 11.97869,
-            "2520": 11.94858,
-            "2525": 11.96606,
-            "2530": 11.90894,
-            "2535": 11.95425,
-            "2540": 11.89136,
-            "2545": 11.94553,
-            "2550": 11.98026,
-            "2555": 11.93376,
-            "2560": 11.94866,
-            "2565": 11.92767,
-            "2570": 11.93583,
-            "2575": 11.97284,
-            "2580": 11.98911,
-            "2585": 11.95484,
-            "2590": 11.96399,
-            "2595": 11.96211,
-            "2600": 11.93906,
-            "2605": 11.9733,
-            "2610": 12.01872,
-            "2615": 11.99897,
-            "2620": 11.90926,
-            "2625": 11.93248,
-            "2630": 11.92842,
-            "2635": 11.94338,
-            "2640": 11.94678,
-            "2645": 11.95901,
-            "2650": 11.9296,
-            "2655": 12.02405,
-            "2660": 12.0166,
-            "2665": 12.01166,
-            "2670": 11.90595,
-            "2675": 11.98569,
-            "2680": 12.0118,
-            "2685": 11.92029,
-            "2690": 11.93111,
-            "2695": 12.00369,
-            "2700": 11.94818,
-            "2705": 11.99119,
-            "2710": 11.93978,
-            "2715": 11.9296,
-            "2720": 11.93044,
-            "2725": 11.94343,
-            "2730": 12.02248,
-            "2735": 11.95389,
-            "2740": 11.94611,
-            "2745": 11.92776,
-            "2750": 11.91647,
-            "2755": 11.9522,
-            "2760": 11.95012,
-            "2765": 11.96707,
-            "2770": 11.94892,
-            "2775": 11.9867,
-            "2780": 11.96897,
-            "2785": 11.97268,
-            "2790": 12.01936,
-            "2795": 11.97259,
-            "2800": 12.01028,
-            "2805": 11.94892,
-            "2810": 12.04828,
-            "2815": 11.93469,
-            "2820": 11.94568,
-            "2825": 11.92529,
-            "2830": 11.97458,
-            "2835": 11.99475,
-            "2840": 11.94984,
-            "2845": 11.93356,
-            "2850": 12.05796,
-            "2855": 11.99065,
-            "2860": 11.96077,
-            "2865": 11.9377,
-            "2870": 11.97627,
-            "2875": 11.97986,
-            "2880": 11.97201,
-            "2885": 11.91879,
-            "2890": 11.93586,
-            "2895": 12.00661,
-            "2900": 11.94616,
-            "2905": 11.94376,
-            "2910": 11.94168,
-            "2915": 11.94867,
-            "2920": 11.99355,
-            "2925": 11.94779,
-            "2930": 11.97133,
-            "2935": 11.96256,
-            "2940": 11.97787,
-            "2945": 11.93759,
-            "2950": 11.91863,
-            "2955": 11.98973,
-            "2960": 12.00486,
-            "2965": 11.91623,
-            "2970": 11.94846,
-            "2975": 11.91534,
-            "2980": 11.97787,
-            "2985": 12.385,
-            "2990": 11.88498,
-            "2995": 11.92173,
-            "3000": 11.90561,
-            "3005": 11.86795,
-            "3010": 11.88075,
-            "3015": 11.87833,
-            "3020": 11.98777,
-            "3025": 11.90078,
-            "3030": 11.98251,
-            "3035": 11.92211,
-            "3040": 11.91067,
-            "3045": 12.04371,
-            "3050": 11.91886,
-            "3055": 11.952,
-            "3060": 11.90649,
-            "3065": 11.86917,
-            "3070": 11.86601,
-            "3075": 11.92435,
-            "3080": 11.98092,
-            "3085": 11.94809,
-            "3090": 12.20304,
-            "3095": 11.87329,
-            "3100": 11.92696,
-            "3105": 11.85799,
-            "3110": 11.84125,
-            "3115": 11.82558,
-            "3120": 11.87566,
-            "3125": 11.89426,
-            "3130": 11.85869,
-            "3135": 11.92893,
-            "3140": 11.97022,
-            "3145": 11.84939,
-            "3150": 11.9785,
-            "3155": 11.92499,
-            "3160": 11.8889,
-            "3165": 11.87938,
-            "3170": 11.95555,
-            "3175": 11.91883,
-            "3180": 11.85842,
-            "3185": 11.9325,
-            "3190": 11.86061,
-            "3195": 11.90479,
-            "3200": 11.85963,
-            "3205": 11.91214,
-            "3210": 11.9243,
-            "3215": 11.8472,
-            "3220": 11.86665,
-            "3225": 11.89836,
-            "3230": 11.86299,
-            "3235": 11.89396,
-            "3240": 11.87482,
-            "3245": 11.86774,
-            "3250": 11.86673,
-            "3255": 11.88133,
-            "3260": 11.9014,
-            "3265": 11.92289,
-            "3270": 11.98401,
-            "3275": 11.95198,
-            "3280": 11.87392,
-            "3285": 11.89268,
-            "3290": 11.88963,
-            "3295": 11.91043,
-            "3300": 11.89803,
-            "3305": 11.87011,
-            "3310": 11.84465,
-            "3315": 11.84015,
-            "3320": 11.88334,
-            "3325": 11.93368,
-            "3330": 11.83472,
-            "3335": 11.86862,
-            "3340": 11.87575,
-            "3345": 11.94875,
-            "3350": 11.93528,
-            "3355": 11.81967,
-            "3360": 11.95954,
-            "3365": 11.88024,
-            "3370": 11.88333,
-            "3375": 11.85751,
-            "3380": 11.88742,
-            "3385": 11.9179,
-            "3390": 11.83242,
-            "3395": 11.96084,
-            "3400": 11.88213,
-            "3405": 11.86112,
-            "3410": 11.8407,
-            "3415": 11.92255,
-            "3420": 11.91997,
-            "3425": 11.88372,
-            "3430": 11.8672,
-            "3435": 11.85235,
-            "3440": 11.84935,
-            "3445": 11.93228,
-            "3450": 11.85166,
-            "3455": 11.9026,
-            "3460": 11.99596,
-            "3465": 11.88838,
-            "3470": 11.90065,
-            "3475": 11.92033,
-            "3480": 11.87265,
-            "3485": 11.89235,
-            "3490": 11.89267,
-            "3495": 11.97544,
-            "3500": 11.92819,
-            "3505": 11.82459,
-            "3510": 11.90756,
-            "3515": 11.92021,
-            "3520": 11.88124,
-            "3525": 11.86983,
-            "3530": 11.90548,
-            "3535": 11.94666,
-            "3540": 11.93322,
-            "3545": 11.90904,
-            "3550": 11.85224,
-            "3555": 11.886,
-            "3560": 11.93583,
-            "3565": 11.87294,
-            "3570": 11.86107,
-            "3575": 11.83618,
-            "3580": 11.94649,
-            "3585": 11.8886,
-            "3590": 12.01796,
-            "3595": 11.86065,
-            "3600": 11.96008,
-            "3605": 11.94154,
-            "3610": 11.91928,
-            "3615": 11.88551,
-            "3620": 11.8865,
-            "3625": 11.86807,
-            "3630": 11.98152,
-            "3635": 11.87685,
-            "3640": 11.89995,
-            "3645": 11.86485,
-            "3650": 11.94291,
-            "3655": 11.86472,
-            "3660": 11.84946,
-            "3665": 11.90789,
-            "3670": 11.86396,
-            "3675": 12.07226,
-            "3680": 11.8654,
-            "3685": 11.90154,
-            "3690": 11.87282,
-            "3695": 11.84993,
-            "3700": 11.92847,
-            "3705": 11.85848,
-            "3710": 11.86691,
-            "3715": 11.93176,
-            "3720": 11.86996,
-            "3725": 11.92665,
-            "3730": 11.90876,
-            "3735": 11.83597,
-            "3740": 11.8819,
-            "3745": 11.90119,
-            "3750": 11.90765,
-            "3755": 11.89791,
-            "3760": 11.91124,
-            "3765": 11.95606,
-            "3770": 11.93789,
-            "3775": 11.87152,
-            "3780": 11.89754,
-            "3785": 11.8704,
-            "3790": 11.88079,
-            "3795": 11.89363,
-            "3800": 11.88641,
-            "3805": 11.87724,
-            "3810": 11.86303,
-            "3815": 11.96793,
-            "3820": 11.97071,
-            "3825": 11.90678,
-            "3830": 11.84478,
-            "3835": 11.86339,
-            "3840": 11.84359,
-            "3845": 11.85381,
-            "3850": 11.89843,
-            "3855": 11.83659,
-            "3860": 11.8253,
-            "3865": 11.82796,
-            "3870": 11.93815,
-            "3875": 11.87584,
-            "3880": 11.85716,
-            "3885": 11.85848,
-            "3890": 11.84472,
-            "3895": 11.85001,
-            "3900": 11.90416,
-            "3905": 11.87723,
-            "3910": 11.90409,
-            "3915": 11.88375,
-            "3920": 11.9526,
-            "3925": 11.8796,
-            "3930": 11.92607,
-            "3935": 12.02111,
-            "3940": 11.89989,
-            "3945": 11.96829,
-            "3950": 11.92362,
-            "3955": 11.91298,
-            "3960": 11.93391,
-            "3965": 11.9977,
-            "3970": 11.91134,
-            "3975": 11.87698,
-            "3980": 11.84039,
-            "3985": 11.8296,
-            "3990": 11.8824,
-            "3995": 12.03103,
-            "4000": 12.53061,
-            "4005": 11.99032,
-            "4010": 11.94569,
-            "4015": 12.02459,
-            "4020": 12.05098,
-            "4025": 11.9408,
-            "4030": 11.9872,
-            "4035": 11.91882,
-            "4040": 11.91053,
-            "4045": 11.94764,
-            "4050": 11.96252,
-            "4055": 11.92924,
-            "4060": 11.95584,
-            "4065": 11.96477,
-            "4070": 11.95333,
-            "4075": 11.95009,
-            "4080": 11.94196,
-            "4085": 11.96679,
-            "4090": 12.09863,
-            "4095": 12.09521,
-            "4100": 11.99854,
-            "4105": 12.05345,
-            "4110": 11.99127,
-            "4115": 12.05731,
-            "4120": 11.95072,
-            "4125": 12.09249,
-            "4130": 12.04972,
-            "4135": 11.892,
-            "4140": 11.93048,
-            "4145": 11.92862,
-            "4150": 12.00088,
-            "4155": 11.95542,
-            "4160": 12.01499,
-            "4165": 11.90691,
-            "4170": 11.99204,
-            "4175": 12.02661,
-            "4180": 12.08762,
-            "4185": 11.93626,
-            "4190": 11.96513,
-            "4195": 11.9247,
-            "4200": 11.89449,
-            "4205": 11.95353,
-            "4210": 11.90984,
-            "4215": 11.92857,
-            "4220": 11.99809,
-            "4225": 12.01358,
-            "4230": 12.00065,
-            "4235": 11.95146,
-            "4240": 12.12674,
-            "4245": 11.99718,
-            "4250": 11.98808,
-            "4255": 11.95388,
-            "4260": 11.91437,
-            "4265": 11.97358,
-            "4270": 11.99013,
-            "4275": 11.95746,
-            "4280": 11.9273,
-            "4285": 11.92873,
-            "4290": 11.94103,
-            "4295": 11.93054,
-            "4300": 11.92986,
-            "4305": 12.11627,
-            "4310": 11.95471,
-            "4315": 11.96985,
-            "4320": 12.03911,
-            "4325": 12.01041,
-            "4330": 11.93084,
-            "4335": 11.95171,
-            "4340": 12.03209,
-            "4345": 11.94503,
-            "4350": 11.95426,
-            "4355": 12.08714,
-            "4360": 12.18212,
-            "4365": 11.94575,
-            "4370": 11.96598,
-            "4375": 12.00939,
-            "4380": 12.08808,
-            "4385": 11.9772,
-            "4390": 12.02704,
-            "4395": 12.01062,
-            "4400": 11.94619,
-            "4405": 11.98609,
-            "4410": 11.98025,
-            "4415": 11.99156,
-            "4420": 11.96913,
-            "4425": 12.02991,
-            "4430": 11.98417,
-            "4435": 12.07654,
-            "4440": 12.09429,
-            "4445": 11.9962,
-            "4450": 11.91032,
-            "4455": 11.99724,
-            "4460": 11.94549,
-            "4465": 11.92313,
-            "4470": 11.98709,
-            "4475": 11.9946,
-            "4480": 12.041,
-            "4485": 11.98684,
-            "4490": 12.00793,
-            "4495": 11.96519,
-            "4500": 11.91768,
-            "4505": 11.93855,
-            "4510": 11.96344,
-            "4515": 11.93266,
-            "4520": 11.99772,
-            "4525": 12.00265,
-            "4530": 12.00144,
-            "4535": 11.93099,
-            "4540": 11.9976,
-            "4545": 12.04415,
-            "4550": 11.92104,
-            "4555": 11.97762,
-            "4560": 12.05513,
-            "4565": 12.08413,
-            "4570": 12.00561,
-            "4575": 12.03402,
-            "4580": 12.07435,
-            "4585": 11.91157,
-            "4590": 11.93266,
-            "4595": 12.00575,
-            "4600": 11.98764,
-            "4605": 12.07608,
-            "4610": 11.98608,
-            "4615": 12.23058,
-            "4620": 11.96992,
-            "4625": 11.98931,
-            "4630": 11.92725,
-            "4635": 11.94909,
-            "4640": 11.94336,
-            "4645": 11.95955,
-            "4650": 11.99978,
-            "4655": 11.95199,
-            "4660": 11.97643,
-            "4665": 12.03686,
-            "4670": 12.0499,
-            "4675": 11.98439,
-            "4680": 12.00394,
-            "4685": 11.97515,
-            "4690": 11.95102,
-            "4695": 12.07552,
-            "4700": 11.9222,
-            "4705": 11.97387,
-            "4710": 11.99203,
-            "4715": 11.93004,
-            "4720": 11.97237,
-            "4725": 12.00277,
-            "4730": 12.00835,
-            "4735": 11.97435,
-            "4740": 11.98233,
-            "4745": 11.92423,
-            "4750": 11.95154,
-            "4755": 12.02084,
-            "4760": 11.94378,
-            "4765": 11.95313,
-            "4770": 11.92338,
-            "4775": 11.92352,
-            "4780": 12.00277,
-            "4785": 11.94768,
-            "4790": 11.97296,
-            "4795": 11.98757,
-            "4800": 12.26361,
-            "4805": 11.90736,
-            "4810": 11.9844,
-            "4815": 12.04212,
-            "4820": 11.98762,
-            "4825": 12.89959,
-            "4830": 11.9442,
-            "4835": 12.35106,
-            "4840": 11.93828,
-            "4845": 11.92418,
-            "4850": 11.96443,
-            "4855": 12.03431,
-            "4860": 12.04422,
-            "4865": 11.9646,
-            "4870": 11.91857,
-            "4875": 11.95672,
-            "4880": 11.9198,
-            "4885": 11.96783,
-            "4890": 11.94953,
-            "4895": 11.96692,
-            "4900": 12.04475,
-            "4905": 12.05877,
-            "4910": 12.15039,
-            "4915": 12.15039,
-            "4920": 11.95008,
-            "4925": 11.96843,
-            "4930": 11.958,
-            "4935": 11.98531,
-            "4940": 11.90874,
-            "4945": 11.95752,
-            "4950": 12.01284,
-            "4955": 11.97799,
-            "4960": 11.99989,
-            "4965": 11.9277,
-            "4970": 12.06095,
-            "4975": 11.95713,
-            "4980": 12.02719,
-            "4985": 11.96446,
-            "4990": 11.92043,
-            "4995": 11.99522,
-            "5000": 12.0792,
-            "5005": 11.95462,
-            "5010": 18.30939,
-            "5015": 12.57034,
-            "5020": 12.13652,
-            "5025": 11.95064,
-            "5030": 11.93538,
-            "5035": 12.01779,
-            "5040": 11.8639,
-            "5045": 11.89312,
-            "5050": 11.93054,
-            "5055": 11.89904,
-            "5060": 11.88635,
-            "5065": 11.89505,
-            "5070": 11.95957,
-            "5075": 11.96591,
-            "5080": 11.85594,
-            "5085": 11.87343,
-            "5090": 11.89162,
-            "5095": 11.9231,
-            "5100": 11.9213,
-            "5105": 11.9793,
-            "5110": 11.92942,
-            "5115": 11.87025,
-            "5120": 11.84167,
-            "5125": 11.92967,
-            "5130": 11.90523,
-            "5135": 11.8727,
-            "5140": 11.95822,
-            "5145": 11.97795,
-            "5150": 11.90614,
-            "5155": 11.88276,
-            "5160": 11.94188,
-            "5165": 11.91373,
-            "5170": 12.01192,
-            "5175": 11.85511,
-            "5180": 11.84375,
-            "5185": 11.88965,
-            "5190": 11.88542,
-            "5195": 11.85346,
-            "5200": 11.94188,
-            "5205": 11.92082,
-            "5210": 11.8821,
-            "5215": 11.92239,
-            "5220": 11.90608,
-            "5225": 11.8947,
-            "5230": 11.88619,
-            "5235": 11.8948,
-            "5240": 11.89599,
-            "5245": 11.88662,
-            "5250": 11.95415,
-            "5255": 11.96527,
-            "5260": 11.89009,
-            "5265": 11.87997,
-            "5270": 11.94016,
-            "5275": 11.89138,
-            "5280": 11.90447,
-            "5285": 11.86453,
-            "5290": 11.90845,
-            "5295": 11.89373,
-            "5300": 11.96084,
-            "5305": 12.00505,
-            "5310": 11.87874,
-            "5315": 11.94047,
-            "5320": 11.90115,
-            "5325": 11.8657,
-            "5330": 11.98456,
-            "5335": 11.89142,
-            "5340": 11.94056,
-            "5345": 11.88326,
-            "5350": 12.02941,
-            "5355": 11.94937,
-            "5360": 11.84158,
-            "5365": 11.85236,
-            "5370": 11.89414,
-            "5375": 11.92681,
-            "5380": 11.89983,
-            "5385": 11.93247,
-            "5390": 11.88545,
-            "5395": 11.85963,
-            "5400": 11.87187,
-            "5405": 11.92558,
-            "5410": 11.94364,
-            "5415": 11.9087,
-            "5420": 11.86332,
-            "5425": 11.92767,
-            "5430": 11.87425,
-            "5435": 11.91049,
-            "5440": 11.87699,
-            "5445": 11.93171,
-            "5450": 11.90161,
-            "5455": 11.921,
-            "5460": 11.88038,
-            "5465": 11.91315,
-            "5470": 11.89728,
-            "5475": 11.95689,
-            "5480": 11.98965,
-            "5485": 11.91576,
-            "5490": 11.89757,
-            "5495": 11.93064,
-            "5500": 11.88252,
-            "5505": 11.96073,
-            "5510": 11.86654,
-            "5515": 11.87886,
-            "5520": 11.90936,
-            "5525": 12.03373,
-            "5530": 11.90318,
-            "5535": 11.92154,
-            "5540": 11.90086,
-            "5545": 11.89022,
-            "5550": 11.90225,
-            "5555": 11.83513,
-            "5560": 11.91062,
-            "5565": 11.87125,
-            "5570": 11.87145,
-            "5575": 11.86357,
-            "5580": 11.91841,
-            "5585": 11.92436,
-            "5590": 11.9023,
-            "5595": 11.86709,
-            "5600": 11.91375,
-            "5605": 11.90872,
-            "5610": 11.8916,
-            "5615": 11.95578,
-            "5620": 11.89294,
-            "5625": 11.90784,
-            "5630": 11.92391,
-            "5635": 11.89956,
-            "5640": 11.89869,
-            "5645": 11.91776,
-            "5650": 11.9431,
-            "5655": 11.89517,
-            "5660": 11.88968,
-            "5665": 11.89529,
-            "5670": 11.91051,
-            "5675": 11.91888,
-            "5680": 11.90991,
-            "5685": 11.93985,
-            "5690": 11.90708,
-            "5695": 11.8876,
-            "5700": 11.95923,
-            "5705": 11.93355,
-            "5710": 11.87364,
-            "5715": 11.9268,
-            "5720": 11.98226,
-            "5725": 11.87678,
-            "5730": 11.83368,
-            "5735": 11.89468,
-            "5740": 11.90674,
-            "5745": 11.88476,
-            "5750": 11.86646,
-            "5755": 11.88929,
-            "5760": 11.85649,
-            "5765": 11.85565,
-            "5770": 11.93646,
-            "5775": 11.90704,
-            "5780": 12.04897,
-            "5785": 11.91885,
-            "5790": 11.90414,
-            "5795": 11.92795,
-            "5800": 11.9484,
-            "5805": 11.9947,
-            "5810": 11.88562,
-            "5815": 11.89893,
-            "5820": 11.86069,
-            "5825": 11.85602,
-            "5830": 11.90577,
-            "5835": 11.90369,
-            "5840": 11.95291,
-            "5845": 11.93547,
-            "5850": 11.89776,
-            "5855": 11.89365,
-            "5860": 11.88809,
-            "5865": 11.89502,
-            "5870": 11.90093,
-            "5875": 11.89463,
-            "5880": 11.85877,
-            "5885": 11.91775,
-            "5890": 11.9362,
-            "5895": 11.90238,
-            "5900": 11.89416,
-            "5905": 11.9161,
-            "5910": 11.91617,
-            "5915": 11.89704,
-            "5920": 11.86193,
-            "5925": 11.94942,
-            "5930": 11.85147,
-            "5935": 11.87033,
-            "5940": 11.9311,
-            "5945": 11.96348,
-            "5950": 11.96932,
-            "5955": 11.90137,
-            "5960": 11.87563,
-            "5965": 11.86128,
-            "5970": 11.99512,
-            "5975": 11.92846,
-            "5980": 11.83738,
-            "5985": 11.88075,
-            "5990": 11.89265,
-            "5995": 11.92537,
-            "6000": 11.88009,
-            "6005": 11.9523,
-            "6010": 11.93509,
-            "6015": 11.89766,
-            "6020": 11.88045,
-            "6025": 11.87641,
-            "6030": 246.60413,
-            "6035": 12.33879,
-            "6040": 11.91607,
-            "6045": 11.95709,
-            "6050": 11.93381,
-            "6055": 11.91355,
-            "6060": 11.91286,
-            "6065": 11.97819,
-            "6070": 11.93373,
-            "6075": 11.85049,
-            "6080": 11.96747,
-            "6085": 11.93318,
-            "6090": 11.93239,
-            "6095": 11.8622,
-            "6100": 11.88525,
-            "6105": 11.97899,
-            "6110": 11.91577,
-            "6115": 11.92755,
-            "6120": 11.92296,
-            "6125": 11.99725,
-            "6130": 11.97753,
-            "6135": 11.92108,
-            "6140": 11.91607,
-            "6145": 11.9071,
-            "6150": 11.92499,
-            "6155": 11.91611,
-            "6160": 12.01604,
-            "6165": 11.89838,
-            "6170": 11.90254,
-            "6175": 11.96493,
-            "6180": 11.84452,
-            "6185": 11.91052,
-            "6190": 11.8712,
-            "6195": 11.90582,
-            "6200": 11.90605,
-            "6205": 11.98397,
-            "6210": 11.92035,
-            "6215": 11.96579,
-            "6220": 11.99275,
-            "6225": 11.88749,
-            "6230": 11.89369,
-            "6235": 11.95748,
-            "6240": 11.93057,
-            "6245": 11.94912,
-            "6250": 11.9372,
-            "6255": 11.90439,
-            "6260": 11.92527,
-            "6265": 11.95201,
-            "6270": 11.9095,
-            "6275": 11.97821,
-            "6280": 11.94458,
-            "6285": 11.90287,
-            "6290": 11.89278,
-            "6295": 11.96073,
-            "6300": 11.90554,
-            "6305": 11.88653,
-            "6310": 11.8962,
-            "6315": 11.93036,
-            "6320": 11.95396,
-            "6325": 11.94894,
-            "6330": 12.04569,
-            "6335": 11.88055,
-            "6340": 11.91066,
-            "6345": 11.89024,
-            "6350": 11.89994,
-            "6355": 11.92221,
-            "6360": 11.92333,
-            "6365": 11.91761,
-            "6370": 11.97313,
-            "6375": 11.90689,
-            "6380": 12.08922,
-            "6385": 11.94942,
-            "6390": 11.91702,
-            "6395": 11.90139,
-            "6400": 11.89012,
-            "6405": 11.9541,
-            "6410": 12.00044,
-            "6415": 11.89967,
-            "6420": 11.86695,
-            "6425": 11.87294,
-            "6430": 11.89524,
-            "6435": 11.94881,
-            "6440": 11.91361,
-            "6445": 11.91243,
-            "6450": 11.90246,
-            "6455": 11.88301,
-            "6460": 11.94133,
-            "6465": 11.95353,
-            "6470": 11.93545,
-            "6475": 11.91767,
-            "6480": 11.904,
-            "6485": 11.97366,
-            "6490": 11.9268,
-            "6495": 11.92497,
-            "6500": 12.05293,
-            "6505": 11.83715,
-            "6510": 11.86732,
-            "6515": 11.90038,
-            "6520": 11.86776,
-            "6525": 11.86971,
-            "6530": 11.85789,
-            "6535": 11.88616,
-            "6540": 11.85825,
-            "6545": 11.82803,
-            "6550": 11.89596,
-            "6555": 11.89246,
-            "6560": 11.87827,
-            "6565": 11.87369,
-            "6570": 11.88103,
-            "6575": 11.86696,
-            "6580": 11.90165,
-            "6585": 11.85113,
-            "6590": 11.85101,
-            "6595": 11.80896,
-            "6600": 11.90596,
-            "6605": 11.87406,
-            "6610": 11.8658,
-            "6615": 11.86475,
-            "6620": 11.88848,
-            "6625": 11.85675,
-            "6630": 11.84722,
-            "6635": 11.83752,
-            "6640": 11.8855,
-            "6645": 11.91332,
-            "6650": 11.86288,
-            "6655": 11.89588,
-            "6660": 11.8071,
-            "6665": 11.84093,
-            "6670": 11.88653,
-            "6675": 11.88047,
-            "6680": 11.87018,
-            "6685": 11.8411,
-            "6690": 11.82244,
-            "6695": 11.86596,
-            "6700": 11.85423,
-            "6705": 11.86228,
-            "6710": 11.86517,
-            "6715": 11.87189,
-            "6720": 11.84138,
-            "6725": 11.88097,
-            "6730": 11.90906,
-            "6735": 11.91578,
-            "6740": 11.88058,
-            "6745": 11.88169,
-            "6750": 12.03575,
-            "6755": 11.84511,
-            "6760": 11.84038,
-            "6765": 11.83499,
-            "6770": 11.87927,
-            "6775": 11.81349,
-            "6780": 13.01048,
-            "6785": 11.81032,
-            "6790": 11.93614,
-            "6795": 11.97801,
-            "6800": 11.86,
-            "6805": 11.83039,
-            "6810": 11.8441,
-            "6815": 11.89187,
-            "6820": 11.87841,
-            "6825": 11.86012,
-            "6830": 11.83442,
-            "6835": 11.85081,
-            "6840": 11.83799,
-            "6845": 11.82691,
-            "6850": 11.89092,
-            "6855": 11.82022,
-            "6860": 11.8279,
-            "6865": 11.79814,
-            "6870": 11.83217,
-            "6875": 11.90136,
-            "6880": 11.85295,
-            "6885": 11.84058,
-            "6890": 11.84482,
-            "6895": 11.82768,
-            "6900": 11.88337,
-            "6905": 11.84656,
-            "6910": 11.90272,
-            "6915": 11.8005,
-            "6920": 11.93804,
-            "6925": 12.00166,
-            "6930": 11.88293,
-            "6935": 11.9479,
-            "6940": 11.85228,
-            "6945": 11.86242,
-            "6950": 11.83582,
-            "6955": 11.81523,
-            "6960": 11.75894,
-            "6965": 11.81699,
-            "6970": 11.85282,
-            "6975": 11.84727,
-            "6980": 11.84729,
-            "6985": 12.01189,
-            "6990": 11.86887,
-            "6995": 11.88713,
-            "7000": 11.85612,
-            "7005": 11.86648,
-            "7010": 11.8888,
-            "7015": 11.84573,
-            "7020": 11.77395,
-            "7025": 11.85096,
-            "7030": 11.86323,
-            "7035": 11.84315,
-            "7040": 11.82293,
-            "7045": 11.81241,
-            "7050": 11.85808,
-            "7055": 11.86593,
-            "7060": 11.87475,
-            "7065": 11.90707,
-            "7070": 11.9358,
-            "7075": 11.84297,
-            "7080": 11.80853,
-            "7085": 11.88178,
-            "7090": 11.87836,
-            "7095": 11.85532,
-            "7100": 11.89414,
-            "7105": 11.85379,
-            "7110": 11.89642,
-            "7115": 11.85858,
-            "7120": 11.90327,
-            "7125": 11.89711,
-            "7130": 11.89177,
-            "7135": 11.88659,
-            "7140": 11.85757,
-            "7145": 11.87756,
-            "7150": 11.88577,
-            "7155": 11.86153,
-            "7160": 11.92297,
-            "7165": 11.88396,
-            "7170": 11.85778,
-            "7175": 11.91483,
-            "7180": 11.86232,
-            "7185": 11.87476,
-            "7190": 11.8982,
-            "7195": 11.88516,
-            "7200": 11.88158,
-            "7205": 11.88444,
-            "7210": 11.89206,
-            "7215": 11.87279,
-            "7220": 11.90742,
-            "7225": 11.85079,
-            "7230": 11.8483,
-            "7235": 11.90312,
-            "7240": 11.87181,
-            "7245": 11.91535,
-            "7250": 11.87908,
-            "7255": 11.92293,
-            "7260": 11.84549,
-            "7265": 11.8901,
-            "7270": 11.84322,
-            "7275": 11.848,
-            "7280": 11.8967,
-            "7285": 11.89986,
-            "7290": 11.95382,
-            "7295": 11.90753,
-            "7300": 11.86218,
-            "7305": 11.85436,
-            "7310": 11.85753,
-            "7315": 11.9134,
-            "7320": 11.90034,
-            "7325": 11.83407,
-            "7330": 11.85974,
-            "7335": 11.90032,
-            "7340": 11.88835,
-            "7345": 11.88443,
-            "7350": 11.85147,
-            "7355": 11.86003,
-            "7360": 11.88911,
-            "7365": 11.88721,
-            "7370": 11.94597,
-            "7375": 11.88507,
-            "7380": 11.8675,
-            "7385": 11.88615,
-            "7390": 11.85493,
-            "7395": 11.9078,
-            "7400": 11.89976,
-            "7405": 11.94755,
-            "7410": 11.86216,
-            "7415": 11.81832,
-            "7420": 11.89699,
-            "7425": 11.90201,
-            "7430": 11.88324,
-            "7435": 11.84242,
-            "7440": 11.89387,
-            "7445": 11.85554,
-            "7450": 11.927,
-            "7455": 11.89196,
-            "7460": 11.93241,
-            "7465": 11.89671,
-            "7470": 11.8633,
-            "7475": 11.85785,
-            "7480": 11.86619,
-            "7485": 11.90047,
-            "7490": 11.93453,
-            "7495": 11.89595,
-            "7500": 11.92255,
-            "7505": 11.86705,
-            "7510": 11.86492,
-            "7515": 11.83778,
-            "7520": 12.43308,
-            "7525": 11.94046,
-            "7530": 12.11911,
-            "7535": 11.95645,
-            "7540": 12.01144,
-            "7545": 11.94459,
-            "7550": 12.00989,
-            "7555": 11.95308,
-            "7560": 12.02894,
-            "7565": 12.00926,
-            "7570": 11.88032,
-            "7575": 11.94986,
-            "7580": 11.94673,
-            "7585": 11.92777,
-            "7590": 11.96311,
-            "7595": 11.90291,
-            "7600": 11.96776,
-            "7605": 11.91009,
-            "7610": 11.98945,
-            "7615": 11.943,
-            "7620": 11.97203,
-            "7625": 11.87696,
-            "7630": 11.92313,
-            "7635": 11.9056,
-            "7640": 11.89922,
-            "7645": 11.93063,
-            "7650": 11.89735,
-            "7655": 11.93078,
-            "7660": 11.95494,
-            "7665": 11.91011,
-            "7670": 11.97093,
-            "7675": 11.97514,
-            "7680": 11.93177,
-            "7685": 11.8992,
-            "7690": 11.94571,
-            "7695": 11.92277,
-            "7700": 11.94906,
-            "7705": 11.92727,
-            "7710": 11.93604,
-            "7715": 11.92305,
-            "7720": 11.93766,
-            "7725": 11.95622,
-            "7730": 11.90603,
-            "7735": 11.91132,
-            "7740": 11.97695,
-            "7745": 11.96601,
-            "7750": 11.88967,
-            "7755": 11.93644,
-            "7760": 11.96688,
-            "7765": 11.92672,
-            "7770": 23.39259,
-            "7775": 23.06567,
-            "7780": 11.93112,
-            "7785": 11.93477,
-            "7790": 11.94106,
-            "7795": 11.94556,
-            "7800": 12.0002,
-            "7805": 11.97342,
-            "7810": 11.95163,
-            "7815": 11.96208,
-            "7820": 11.96513,
-            "7825": 11.93368,
-            "7830": 11.91708,
-            "7835": 11.89017,
-            "7840": 11.94549,
-            "7845": 11.96002,
-            "7850": 11.95829,
-            "7855": 11.92186,
-            "7860": 11.93832,
-            "7865": 11.889,
-            "7870": 11.96191,
-            "7875": 12.05703,
-            "7880": 11.97288,
-            "7885": 11.91666,
-            "7890": 11.93728,
-            "7895": 11.96047,
-            "7900": 11.9818,
-            "7905": 11.92242,
-            "7910": 11.97684,
-            "7915": 11.91154,
-            "7920": 11.96828,
-            "7925": 11.94506,
-            "7930": 11.93465,
-            "7935": 11.90216,
-            "7940": 11.91383,
-            "7945": 11.91481,
-            "7950": 11.96693,
-            "7955": 11.94446,
-            "7960": 11.92358,
-            "7965": 11.94155,
-            "7970": 11.95822,
-            "7975": 12.03469,
-            "7980": 11.94102,
-            "7985": 11.94681,
-            "7990": 11.92459,
-            "7995": 11.92763,
-            "8000": 11.96299,
-            "8005": 11.9788,
-            "8010": 11.96826,
-            "8015": 12.02982,
-            "8020": 11.94329,
-            "8025": 11.98105,
-            "8030": 12.01501,
-            "8035": 11.96502,
-            "8040": 11.97586,
-            "8045": 11.96948,
-            "8050": 11.92611,
-            "8055": 11.93414,
-            "8060": 11.93961,
-            "8065": 11.9262,
-            "8070": 11.9178,
-            "8075": 11.90325,
-            "8080": 11.93833,
-            "8085": 11.97936,
-            "8090": 11.99724,
-            "8095": 11.94796,
-            "8100": 11.9625,
-            "8105": 11.94798,
-            "8110": 11.92353,
-            "8115": 11.96357,
-            "8120": 11.92451,
-            "8125": 11.89352,
-            "8130": 11.97563,
-            "8135": 11.97236,
-            "8140": 11.9723,
-            "8145": 11.92641,
-            "8150": 11.89834,
-            "8155": 11.94876,
-            "8160": 11.95465,
-            "8165": 11.95874,
-            "8170": 11.93402,
-            "8175": 11.96745,
-            "8180": 11.91172,
-            "8185": 11.91331,
-            "8190": 11.95504,
-            "8195": 11.94346,
-            "8200": 11.95192,
-            "8205": 11.9973,
-            "8210": 11.95023,
-            "8215": 12.03521,
-            "8220": 11.96486,
-            "8225": 11.95464,
-            "8230": 11.96151,
-            "8235": 11.95994,
-            "8240": 11.97909,
-            "8245": 11.92928,
-            "8250": 11.92518,
-            "8255": 11.94881,
-            "8260": 11.907,
-            "8265": 11.93185,
-            "8270": 11.9211,
-            "8275": 11.86366,
-            "8280": 12.00914,
-            "8285": 11.97086,
-            "8290": 11.98208,
-            "8295": 11.92309,
-            "8300": 11.94129,
-            "8305": 11.99302,
-            "8310": 11.97601,
-            "8315": 11.88862,
-            "8320": 11.96454,
-            "8325": 11.89961,
-            "8330": 11.99534,
-            "8335": 11.91687,
-            "8340": 11.96466,
-            "8345": 11.93152,
-            "8350": 11.94368,
-            "8355": 11.92235,
-            "8360": 11.99578,
-            "8365": 11.90045,
-            "8370": 11.91744,
-            "8375": 11.92667,
-            "8380": 11.90428,
-            "8385": 11.94828,
-            "8390": 11.93507,
-            "8395": 11.9473,
-            "8400": 11.94267,
-            "8405": 11.93414,
-            "8410": 11.90959,
-            "8415": 11.92941,
-            "8420": 11.91201,
-            "8425": 11.91625,
-            "8430": 11.9332,
-            "8435": 11.99456,
-            "8440": 11.8869,
-            "8445": 11.90729,
-            "8450": 11.93362,
-            "8455": 11.96619,
-            "8460": 12.01359,
-            "8465": 11.9429,
-            "8470": 11.99594,
-            "8475": 11.95465,
-            "8480": 11.92489,
-            "8485": 11.92415,
-            "8490": 11.97388,
-            "8495": 11.89913,
-            "8500": 11.95945,
-            "8505": 11.91567,
-            "8510": 11.91482,
-            "8515": 11.93548,
-            "8520": 11.95743,
-            "8525": 11.94743,
-            "8530": 12.42097,
-            "8535": 11.9272,
-            "8540": 12.09436,
-            "8545": 12.04967,
-            "8550": 11.9651,
-            "8555": 12.03857,
-            "8560": 11.97265,
-            "8565": 11.91082,
-            "8570": 11.95406,
-            "8575": 11.94802,
-            "8580": 11.9942,
-            "8585": 11.96288,
-            "8590": 11.95701,
-            "8595": 11.97786,
-            "8600": 11.89715,
-            "8605": 11.93644,
-            "8610": 11.98611,
-            "8615": 11.91557,
-            "8620": 11.92076,
-            "8625": 11.96113,
-            "8630": 11.99266,
-            "8635": 11.93916,
-            "8640": 12.02781,
-            "8645": 11.99006,
-            "8650": 11.91164,
-            "8655": 11.91924,
-            "8660": 11.95194,
-            "8665": 12.00021,
-            "8670": 11.90972,
-            "8675": 11.96086,
-            "8680": 11.95175,
-            "8685": 11.95495,
-            "8690": 12.00198,
-            "8695": 12.07659,
-            "8700": 11.96371,
-            "8705": 11.91845,
-            "8710": 11.97745,
-            "8715": 11.93805,
-            "8720": 11.9173,
-            "8725": 11.91035,
-            "8730": 12.01393,
-            "8735": 11.98447,
-            "8740": 11.97475,
-            "8745": 11.96291,
-            "8750": 11.9361,
-            "8755": 11.96838,
-            "8760": 11.93695,
-            "8765": 12.00162,
-            "8770": 11.92599,
-            "8775": 12.0012,
-            "8780": 12.03738,
-            "8785": 11.94909,
-            "8790": 11.90577,
-            "8795": 11.97012,
-            "8800": 11.93035,
-            "8805": 11.99893,
-            "8810": 11.94421,
-            "8815": 11.98191,
-            "8820": 11.99062,
-            "8825": 11.92267,
-            "8830": 11.95194,
-            "8835": 11.937,
-            "8840": 11.97075,
-            "8845": 11.95007,
-            "8850": 12.02522,
-            "8855": 11.94712,
-            "8860": 11.96728,
-            "8865": 11.89285,
-            "8870": 11.94189,
-            "8875": 11.92065,
-            "8880": 11.98822,
-            "8885": 11.98285,
-            "8890": 11.99582,
-            "8895": 11.96596,
-            "8900": 11.94354,
-            "8905": 11.95473,
-            "8910": 11.99259,
-            "8915": 11.96618,
-            "8920": 11.93587,
-            "8925": 11.99413,
-            "8930": 12.00638,
-            "8935": 11.93,
-            "8940": 11.95031,
-            "8945": 11.91928,
-            "8950": 11.9941,
-            "8955": 11.94031,
-            "8960": 11.96914,
-            "8965": 11.95062,
-            "8970": 11.95268,
-            "8975": 12.03161,
-            "8980": 11.97245,
-            "8985": 12.01027,
-            "8990": 11.9446,
-            "8995": 11.96843,
-            "9000": 11.9429,
-            "9005": 11.94091,
-            "9010": 11.93667,
-            "9015": 11.95344,
-            "9020": 11.93207,
-            "9025": 11.91998,
-            "9030": 11.92651,
-            "9035": 11.97131,
-            "9040": 11.92008,
-            "9045": 11.9777,
-            "9050": 11.93287,
-            "9055": 11.96682,
-            "9060": 11.982,
-            "9065": 11.9763,
-            "9070": 11.92703,
-            "9075": 11.95149,
-            "9080": 11.94863,
-            "9085": 11.92217,
-            "9090": 11.92326,
-            "9095": 11.9586,
-            "9100": 11.93403,
-            "9105": 11.97708,
-            "9110": 11.97248,
-            "9115": 11.91899,
-            "9120": 11.98175,
-            "9125": 12.0043,
-            "9130": 11.98361,
-            "9135": 11.95811,
-            "9140": 11.89116,
-            "9145": 11.92833,
-            "9150": 11.96999,
-            "9155": 11.95682,
-            "9160": 11.93898,
-            "9165": 11.98676,
-            "9170": 11.96776,
-            "9175": 11.91735,
-            "9180": 11.96488,
-            "9185": 11.93801,
-            "9190": 11.93829,
-            "9195": 11.96444,
-            "9200": 11.91924,
-            "9205": 11.99554,
-            "9210": 11.91977,
-            "9215": 11.99739,
-            "9220": 11.92053,
-            "9225": 11.93702,
-            "9230": 11.95815,
-            "9235": 12.05346,
-            "9240": 11.9596,
-            "9245": 11.97173,
-            "9250": 11.94092,
-            "9255": 11.94632,
-            "9260": 12.00354,
-            "9265": 11.96854,
-            "9270": 11.91621,
-            "9275": 11.94709,
-            "9280": 11.93375,
-            "9285": 11.92465,
-            "9290": 11.93047,
-            "9295": 11.93184,
-            "9300": 11.95538,
-            "9305": 11.96102,
-            "9310": 11.93874,
-            "9315": 11.94123,
-            "9320": 11.95854,
-            "9325": 11.98961,
-            "9330": 11.87394,
-            "9335": 11.97986,
-            "9340": 12.02583,
-            "9345": 11.94202,
-            "9350": 12.00113,
-            "9355": 11.97405,
-            "9360": 11.96746,
-            "9365": 11.96018,
-            "9370": 11.9475,
-            "9375": 11.94327,
-            "9380": 11.92135,
-            "9385": 12.01574,
-            "9390": 11.95494,
-            "9395": 11.93529,
-            "9400": 11.96463,
-            "9405": 11.9807,
-            "9410": 11.92926,
-            "9415": 11.95919,
-            "9420": 11.94796,
-            "9425": 11.94261,
-            "9430": 11.94968,
-            "9435": 11.9655,
-            "9440": 11.94016,
-            "9445": 11.98541,
-            "9450": 11.94602,
-            "9455": 11.96365,
-            "9460": 11.9884,
-            "9465": 11.93962,
-            "9470": 11.93471,
-            "9475": 11.91073,
-            "9480": 11.92557,
-            "9485": 11.93537,
-            "9490": 11.97267,
-            "9495": 11.93521,
-            "9500": 11.92542,
-            "9505": 12.00627,
-            "9510": 11.9749,
-            "9515": 11.97511,
-            "9520": 11.88493,
-            "9525": 11.91739,
-            "9530": 11.92418,
-            "9535": 11.97024
-        }
-    }
-}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/mixtral/deepseekv3_proxy_flex_tp1pp4emp16etc1cp1_release_sm/model_config.yml b/tests/functional_tests/test_cases/mixtral/deepseekv3_proxy_flex_tp1pp4emp16etc1cp1_release_sm/model_config.yml
deleted file mode 100644
index cc8f2b814c2..00000000000
--- a/tests/functional_tests/test_cases/mixtral/deepseekv3_proxy_flex_tp1pp4emp16etc1cp1_release_sm/model_config.yml
+++ /dev/null
@@ -1,167 +0,0 @@
-# The proxy model is used for local code quality check.
-# The proxy model should contain all the necessary components and settings but fewer parameters.
-ENV_VARS:
-  TORCH_NCCL_AVOID_RECORD_STREAMS: 0
-  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 1
-  PYTORCH_CUDA_ALLOC_CONF: expandable_segments:True
-  NCCL_NVLS_ENABLE: 0
-  NVTE_FUSED_ATTN: 1
-  NVTE_NORM_FWD_USE_CUDNN: 1
-  NVTE_NORM_BWD_USE_CUDNN: 1
-  PYTHONWARNINGS: ignore
-  NCCL_DEBUG: VERSION
-  NON_DETERMINSTIC_RESULTS: 1
-  NVSHMEM_IB_ENABLE_IBGDA: 0
-  CUDA_DEVICE_MAX_CONNECTIONS: 1
-TEST_TYPE: "release"
-MODEL_ARGS:
-  # Distributed args
-  --distributed-timeout-minutes: 60
-  --tensor-model-parallel-size: 2
-  --pipeline-model-parallel-size: 4
-  --pipeline-model-parallel-layout: Et*2\\|\\(tt\\|\\)*5t\\|tmL # Et*2|(tt|)*5t|tmL
-  --expert-model-parallel-size: 16
-  --context-parallel-size: 1
-  --expert-tensor-parallel-size: 1
-  --use-distributed-optimizer: true
-  --overlap-grad-reduce: true
-  --overlap-param-gather: true
-
-  # Training args
-  --use-mcore-models: true
-  --sequence-parallel: true
-  --use-flash-attn: true
-  --disable-bias-linear: true
-  --micro-batch-size: 1
-  --global-batch-size: 512
-  --train-samples: 24414062
-  --exit-duration-in-mins: 220
-  --no-check-for-nan-in-loss-and-grad: true
-  --cross-entropy-loss-fusion: true
-  --cross-entropy-fusion-impl: te
-  --manual-gc: true
-  --manual-gc-interval: 10
-
-  # Transformer Engine args
-  --transformer-impl: transformer_engine
-
-  # Data args
-  --seq-length: 4096
-  --data-cache-path: ${DATA_CACHE_PATH}
-  --tokenizer-type: GPTSentencePieceTokenizer
-  --tokenizer-model: ${DATA_PATH}/utils/nemotron_2_256k.model
-  --data-path: $DATA_BLEND
-  --split: 99,1,0
-  --no-mmap-bin-files: true
-  --no-create-attention-mask-in-dataloader: true
-  --num-workers: 6
-
-  # Add network size args
-  --num-layers: 14 # original 61 layers
-  --hidden-size: 7168
-  --ffn-hidden-size: 18432
-  --num-attention-heads: 128
-  --kv-channels: 128
-  --max-position-embeddings: 4096
-  --position-embedding-type: rope
-  --rotary-base: 10000
-  --make-vocab-size-divisible-by: 3232
-  --normalization: RMSNorm
-  --norm-epsilon: 1e-6
-  --swiglu: true
-  --untie-embeddings-and-output-weights: true
-  --multi-latent-attention: true
-  --mtp-num-layers: 1
-  --mtp-loss-scaling-factor: 0.1
-
-  # Add regularization args
-  --attention-dropout: 0.0
-  --hidden-dropout: 0.0
-  --clip-grad: 1.0
-  --weight-decay: 0.1
-  --qk-layernorm: true
-
-  # Add learning rate args
-  --lr-decay-samples: 24413696
-  --lr-warmup-samples: 1536000
-  --lr-warmup-init: 1e-7
-  --lr: 1e-5
-  --min-lr: 1e-6
-  --lr-decay-style: cosine
-  --adam-beta1: 0.9
-  --adam-beta2: 0.95
-
-  # Add MoE args
-  --num-experts: 64 # local 4 + 1 shared, EP16
-  --moe-layer-freq: ([0]*3+[1]*11)
-  --moe-ffn-hidden-size: 2048
-  --moe-shared-expert-intermediate-size: 2048
-  --moe-router-load-balancing-type: seq_aux_loss
-  --moe-router-topk: 8
-  --moe-token-dispatcher-type: flex
-  --moe-enable-deepep: true
-  --moe-router-pre-softmax: true
-  --moe-grouped-gemm: true
-  --moe-aux-loss-coeff: 1e-4
-  --moe-router-group-topk: 4
-  --moe-router-num-groups: 8
-  --moe-router-topk-scaling-factor: 2.5
-  --moe-router-score-function: sigmoid
-  --moe-router-enable-expert-bias: true
-  --moe-router-bias-update-rate: 1e-3
-  --moe-router-dtype: fp32
-  --moe-permute-fusion: true
-
-  # Add MLA args
-  --q-lora-rank: 1536
-  --kv-lora-rank: 512
-  --qk-head-dim: 128
-  --qk-pos-emb-head-dim: 64
-  --v-head-dim: 128
-  --rotary-scaling-factor: 40
-  --mscale: 1.0
-  --mscale-all-dim: 1.0
-
-  # Add validation args
-  --eval-iters: 32
-  --eval-interval: 200
-
-  # Add checkpointing args
-  --auto-detect-ckpt-format:
-    true
-    # Add checkpointing args
-  --save: ${CHECKPOINT_SAVE_PATH}
-  --load: ${CHECKPOINT_LOAD_PATH}
-  --save-interval: 500
-  --save-retain-interval: 10000
-  --dist-ckpt-strictness: log_all
-
-  # Add initialization args
-  --init-method-std: 0.02
-
-  # Add logging args
-  --log-timers-to-tensorboard: true
-  --log-memory-to-tensorboard: true
-  --log-num-zeros-in-grad: true
-  --log-params-norm: true
-  --log-validation-ppl-to-tensorboard: true
-  --log-throughput: true
-  --log-interval: 1
-  --logging-level: 40
-  --tensorboard-dir: ${TENSORBOARD_PATH}
-  --wandb-project: megatron-core-release-runs
-  --wandb-entity: adlr
-  --wandb-exp-name: ${WANDB_EXPERIMENT}
-  --wandb-save-dir: ${WANDB_SAVE_PATH}
-
-  # Add mixed precision args
-  --bf16: true
-
-  # enable experimental
-  --enable-experimental: true
-  --exit-interval: 9536
-METRICS:
-  - "iteration-time"
-  - "lm loss"
-  - "mem-allocated-bytes"
-  - "mem-max-allocated-bytes"
diff --git a/tests/functional_tests/test_cases/mixtral/deepseekv3_proxy_flex_tp1pp4emp16etc1cp1_gb_200_release_sm/model_config.yml b/tests/functional_tests/test_cases/mixtral/deepseekv3_proxy_flex_tp1pp4emp16etp1cp1_gb_200_release_sm/model_config.yaml
similarity index 99%
rename from tests/functional_tests/test_cases/mixtral/deepseekv3_proxy_flex_tp1pp4emp16etc1cp1_gb_200_release_sm/model_config.yml
rename to tests/functional_tests/test_cases/mixtral/deepseekv3_proxy_flex_tp1pp4emp16etp1cp1_gb_200_release_sm/model_config.yaml
index 1ad8597d932..a6c65afb712 100644
--- a/tests/functional_tests/test_cases/mixtral/deepseekv3_proxy_flex_tp1pp4emp16etc1cp1_gb_200_release_sm/model_config.yml
+++ b/tests/functional_tests/test_cases/mixtral/deepseekv3_proxy_flex_tp1pp4emp16etp1cp1_gb_200_release_sm/model_config.yaml
@@ -161,7 +161,9 @@ MODEL_ARGS:
 
   # enable experimental
   --enable-experimental: true
+
   --exit-interval: 9536
+
 METRICS:
   - "iteration-time"
   - "lm loss"

From 9375be41242dc681601844265faa09a14877a80c Mon Sep 17 00:00:00 2001
From: Youngeun Kwon <youngeunk@nvidia.com>
Date: Sun, 1 Feb 2026 01:43:13 -0800
Subject: [PATCH 261/334] Fix: nccl-ub in ddp path (#3181)

Signed-off-by: Youngeun Kwon <youngeunk@nvidia.com>
Co-authored-by: Xin Yao <xiny@nvidia.com>
---
 megatron/core/distributed/param_and_grad_buffer.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/megatron/core/distributed/param_and_grad_buffer.py b/megatron/core/distributed/param_and_grad_buffer.py
index db3948562f5..4840658b041 100644
--- a/megatron/core/distributed/param_and_grad_buffer.py
+++ b/megatron/core/distributed/param_and_grad_buffer.py
@@ -753,6 +753,12 @@ def _does_param_require_new_bucket(param):
                 group=self.data_parallel_group,
                 symmetric=not self.ddp_config.disable_symmetric_registration,
             )
+            # Since nccl communicator group is created lazily, we need to perform a warmup call to
+            # initialize NCCL comm buffers for this dp_group before doing buffer registration.
+            torch.distributed.barrier()
+            tmp_warmup_tensor = torch.zeros([1], device="cuda")
+            torch.distributed.all_reduce(tmp_warmup_tensor, group=self.data_parallel_group)
+            torch.distributed.barrier()
         else:
             # If nccl_ub is False, mem_alloc_context is nullcontext.
             mem_alloc_context = nullcontext

From 0f73a8ae9aad1ecc57b635e05196062c797bfb6a Mon Sep 17 00:00:00 2001
From: Yuzhong Wang <yuzhongw@nvidia.com>
Date: Mon, 2 Feb 2026 10:10:26 +0800
Subject: [PATCH 262/334] [dev] perf(moe): Refine gated delta net
 implementation (#3040)

---
 megatron/core/ssm/gated_delta_net.py | 86 ++++++++++++++--------------
 1 file changed, 42 insertions(+), 44 deletions(-)

diff --git a/megatron/core/ssm/gated_delta_net.py b/megatron/core/ssm/gated_delta_net.py
index 16dc3a79ebb..99cb6cdcfc2 100644
--- a/megatron/core/ssm/gated_delta_net.py
+++ b/megatron/core/ssm/gated_delta_net.py
@@ -40,20 +40,18 @@
 from megatron.core.utils import deprecate_inference_params, nvtx_range_pop, nvtx_range_push
 
 try:
+    from fla.modules.convolution import causal_conv1d
     from fla.modules.l2norm import l2norm
     from fla.ops.gated_delta_rule import chunk_gated_delta_rule
 
     HAVE_FLA = True
 except ImportError:
+    causal_conv1d = None
+    l2norm = None
     chunk_gated_delta_rule = None
 
     HAVE_FLA = False
 
-try:
-    from causal_conv1d import causal_conv1d_fn
-except ImportError:
-    causal_conv1d_fn = None
-
 
 logger = logging.getLogger(__name__)
 
@@ -204,6 +202,11 @@ def __init__(
         )
         setattr(self.A_log, "tensor_model_parallel", True)
 
+        if self.config.deterministic_mode:
+            self.gated_delta_rule = torch_chunk_gated_delta_rule
+        else:
+            self.gated_delta_rule = chunk_gated_delta_rule
+
         # Output layernorm before projection
         self.out_norm = build_module(
             submodules.out_norm,
@@ -337,8 +340,8 @@ def forward(
         alpha = alpha.reshape(batch, seq_len, -1)
 
         # Convolution on qkv
-        qkv = qkv.transpose(1, 2).contiguous()  # b, s, d -> b, d, s
         nvtx_range_push(suffix="conv1d")
+        seq_len = qkv.shape[1]
         qkv_channels_split_sections = [
             self.qk_dim_local_tp,
             self.qk_dim_local_tp,
@@ -360,9 +363,10 @@ def forward(
             if self.conv_bias
             else None
         )
-        if (causal_conv1d_fn is None) or self.config.deterministic_mode:
+        if self.config.deterministic_mode:
+            qkv = qkv.transpose(1, 2).contiguous()  # b, s, d -> b, d, s
             conv_out = F.conv1d(
-                input=qkv,
+                input=qkv,  # Torch-native only accept [b, d, s] format input
                 weight=conv1d_weight,
                 bias=conv1d_bias,
                 stride=self.conv1d.stride,
@@ -371,33 +375,39 @@ def forward(
                 groups=self.conv_dim_local_tp // self.cp_size,
             )
             qkv = self.act_fn(conv_out[..., :seq_len])
+            qkv = qkv.transpose(1, 2)  # b, d, s -> b, s, d
         else:
             assert self.activation in ["silu", "swish"]
-            qkv = causal_conv1d_fn(
-                x=qkv,
+            qkv, _ = causal_conv1d(
+                x=qkv,  # FLA conv1d accepts [b, s, d] format input
                 weight=conv1d_weight.squeeze(1),  # d, 1, w -> d, w
                 bias=conv1d_bias,
                 activation=self.activation,
+                initial_state=None,
+                output_final_state=False,
             )
         nvtx_range_pop(suffix="conv1d")
-        # Split qkv into query, key, and value
-        qkv = qkv.transpose(1, 2)  # b, d, s -> b, s, d
-        query, key, value = torch.split(
+
+        # Split qkv into query_key, and value
+        query_key, value = torch.split(
             qkv,
-            [
-                self.qk_dim_local_tp // self.cp_size,
-                self.qk_dim_local_tp // self.cp_size,
-                self.v_dim_local_tp // self.cp_size,
-            ],
+            [2 * self.qk_dim_local_tp // self.cp_size, self.v_dim_local_tp // self.cp_size],
             dim=-1,
         )
-        query = query.reshape(batch, seq_len, -1, self.key_head_dim)
-        key = key.reshape(batch, seq_len, -1, self.key_head_dim)
+        query_key = query_key.reshape(batch, seq_len, -1, self.key_head_dim)
         value = value.reshape(batch, seq_len, -1, self.value_head_dim)
         # Apply L2 norm to query and key
         if self.use_qk_l2norm:
-            query = l2norm(query.contiguous())
-            key = l2norm(key.contiguous())
+            query_key = l2norm(query_key.contiguous())
+        # Split query and key.
+        query, key = torch.split(
+            query_key,
+            [
+                self.qk_dim_local_tp // self.key_head_dim // self.cp_size,
+                self.qk_dim_local_tp // self.key_head_dim // self.cp_size,
+            ],
+            dim=2,
+        )
         if self.num_value_heads // self.num_key_heads > 1:
             query = query.repeat_interleave(self.num_value_heads // self.num_key_heads, dim=2)
             key = key.repeat_interleave(self.num_value_heads // self.num_key_heads, dim=2)
@@ -421,28 +431,16 @@ def forward(
         nvtx_range_pop(suffix="g_and_beta")
 
         nvtx_range_push(suffix="gated_delta_rule")
-        if self.config.deterministic_mode:
-            core_attn_out, last_recurrent_state = torch_chunk_gated_delta_rule(
-                query,
-                key,
-                value,
-                g=g,
-                beta=beta,
-                initial_state=None,
-                output_final_state=False,
-                use_qk_l2norm_in_kernel=False,
-            )
-        else:
-            core_attn_out, last_recurrent_state = chunk_gated_delta_rule(
-                query,
-                key,
-                value,
-                g=g,
-                beta=beta,
-                initial_state=None,
-                output_final_state=False,
-                use_qk_l2norm_in_kernel=False,
-            )
+        core_attn_out, last_recurrent_state = self.gated_delta_rule(
+            query,
+            key,
+            value,
+            g=g,
+            beta=beta,
+            initial_state=None,
+            output_final_state=False,
+            use_qk_l2norm_in_kernel=False,
+        )
         nvtx_range_pop(suffix="gated_delta_rule")
 
         # RMSNorm

From 5035cbe4c96c4298f9531bbf37109f0b654d6f14 Mon Sep 17 00:00:00 2001
From: Li Tao <lit@nvidia.com>
Date: Mon, 2 Feb 2026 11:10:56 +0800
Subject: [PATCH 263/334] [Dev] Add the missing part to support 1F1B overlap
 for Qwen3-Next (#2996)

---
 megatron/core/ssm/gated_delta_net.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/megatron/core/ssm/gated_delta_net.py b/megatron/core/ssm/gated_delta_net.py
index 99cb6cdcfc2..601a72a4356 100644
--- a/megatron/core/ssm/gated_delta_net.py
+++ b/megatron/core/ssm/gated_delta_net.py
@@ -561,6 +561,19 @@ def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None, tp_gr
 
         return sharded_state_dict
 
+    def backward_dw(self):
+        """Execute weight gradient computation for all linear layers."""
+        self._backward_in_proj()
+        self._backward_out_proj()
+
+    def _backward_in_proj(self):
+        """Computes weight gradients of input projection layer."""
+        self.in_proj.backward_dw()
+
+    def _backward_out_proj(self):
+        """Computes weight gradients of output projection layer."""
+        self.out_proj.backward_dw()
+
 
 ####################
 # Sharded state dict utilities

From 4aac3fed3ec1c5dbc2169d0d905de86d4c593544 Mon Sep 17 00:00:00 2001
From: Tong Liu <tongliu@nvidia.com>
Date: Mon, 2 Feb 2026 14:26:02 +0800
Subject: [PATCH 264/334] Use the latest hybrid-ep (#3092)

---
 docker/Dockerfile.ci.dev | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/Dockerfile.ci.dev b/docker/Dockerfile.ci.dev
index b43b7286506..bb9ca5fbe9a 100644
--- a/docker/Dockerfile.ci.dev
+++ b/docker/Dockerfile.ci.dev
@@ -70,7 +70,7 @@ RUN bash -ex <<"EOF"
 
     git clone --branch hybrid-ep https://github.com/deepseek-ai/DeepEP.git
     pushd DeepEP
-        git checkout 83e0d156807f31abed4ea55c2fa6eb4b62a11b82
+        git checkout eb9cee7de5a24193bf09500668d3a619d3d3f3fb
         patch -p1 < /workspace/deepep.patch
     popd
     TORCH_CUDA_ARCH_LIST="9.0 10.0 12.0" uv pip install --no-build-isolation -v DeepEP/.

From bfa1d3163804eb8ea65b77d1c0e807a3fcb959e9 Mon Sep 17 00:00:00 2001
From: Tong Liu <tongliu@nvidia.com>
Date: Mon, 2 Feb 2026 16:31:42 +0800
Subject: [PATCH 265/334] [BUG FIX] Try to enable cuda graph ut (#3192)

---
 tests/unit_tests/transformer/test_cuda_graphs.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/tests/unit_tests/transformer/test_cuda_graphs.py b/tests/unit_tests/transformer/test_cuda_graphs.py
index d602346c370..a1e9dab269f 100644
--- a/tests/unit_tests/transformer/test_cuda_graphs.py
+++ b/tests/unit_tests/transformer/test_cuda_graphs.py
@@ -1252,10 +1252,6 @@ def test_moe_partial_cudagraph(self, ep_size, moe_dropless_dispatcher, moe_dispa
             extra_kwargs["moe_token_dispatcher_type"] = "flex"
             extra_kwargs["moe_flex_dispatcher_backend"] = "deepep"
         elif moe_dispatcher_type == "hybridep":
-            pytest.skip(
-                "Currently, the Hybrid EP is broken. "
-                "Temporarily skip the test and wait for the fix."
-            )
             if not is_hybrid_ep_available():
                 pytest.skip("Hybrid EP is not available")
             extra_kwargs["moe_token_dispatcher_type"] = "flex"
@@ -1265,8 +1261,6 @@ def test_moe_partial_cudagraph(self, ep_size, moe_dropless_dispatcher, moe_dispa
         if not moe_dropless_dispatcher:
             if moe_dispatcher_type == "deepep":
                 pytest.skip("Deep EP doesn't support drop&pad MoE")
-            if moe_dispatcher_type == "hybridep" and ep_size == 1:
-                pytest.skip("Hybrid EP doesn't support drop&pad MoE with ep_size == 1")
             extra_kwargs["moe_expert_capacity_factor"] = 1.0
             extra_kwargs["moe_pad_expert_input_to_capacity"] = True
 

From 13ad65379034f79687c6bc5d2ac3bd7e31df41b7 Mon Sep 17 00:00:00 2001
From: Jianbin Chang <shjwudp@gmail.com>
Date: Tue, 3 Feb 2026 09:40:18 +0800
Subject: [PATCH 266/334] [Dev] Fix Linear-Cross-Entropy Convergence Issue
 (#2739)

Co-authored-by: Xin Yao <xiny@nvidia.com>
---
 .../linear_cross_entropy/blackwell/entry.py   |  23 +--
 .../common/language_module/language_module.py |  65 +--------
 megatron/core/models/gpt/gpt_model.py         |  31 ++--
 megatron/core/models/mamba/mamba_model.py     |  20 ++-
 .../core/transformer/linear_cross_entropy.py  | 134 ++++++++++++++++++
 5 files changed, 169 insertions(+), 104 deletions(-)
 create mode 100644 megatron/core/transformer/linear_cross_entropy.py

diff --git a/megatron/core/fusions/linear_cross_entropy/blackwell/entry.py b/megatron/core/fusions/linear_cross_entropy/blackwell/entry.py
index dc369a7c558..07e018b51ff 100644
--- a/megatron/core/fusions/linear_cross_entropy/blackwell/entry.py
+++ b/megatron/core/fusions/linear_cross_entropy/blackwell/entry.py
@@ -345,7 +345,8 @@ def backward(
             and num_valid_tokens.dtype == torch.int64
         )
 
-        d_hidden = torch.empty_like(global_hidden)
+        # Allocate d_hidden in float32 for better numerical stability
+        d_hidden = torch.empty_like(global_hidden, dtype=torch.float32)
         d_weight = torch.empty_like(weight)
         assert d_hidden.is_contiguous() and d_weight.is_contiguous()
 
@@ -435,14 +436,15 @@ def backward(
                 )
                 valid_d_logits = _d_logits[:, :vocab_right_bound]
 
-                torch.addmm(
-                    input=d_hidden.view(-1, dim),
-                    mat1=valid_d_logits,
-                    mat2=weight[split_idx * vocab_per_split : (split_idx + 1) * vocab_per_split, :],
-                    beta=(split_idx != 0),
-                    alpha=1.0,
-                    out=d_hidden.view(-1, dim),
-                )
+                _delta_hidden = torch.mm(
+                    valid_d_logits,
+                    weight[split_idx * vocab_per_split : (split_idx + 1) * vocab_per_split, :],
+                    out_dtype=torch.float32,
+                ).view_as(d_hidden)
+                if split_idx == 0:
+                    d_hidden.copy_(_delta_hidden)
+                else:
+                    d_hidden.add_(_delta_hidden)
                 torch.matmul(
                     valid_d_logits.T,
                     hidden_view,
@@ -466,6 +468,9 @@ def backward(
                 ]
                 d_hidden = d_hidden.view(partial_hidden_shape).clone()
 
+        # convert d_hidden to the original dtype
+        d_hidden = d_hidden.type_as(global_hidden)
+
         return d_hidden, d_weight
 
 except ImportError:
diff --git a/megatron/core/models/common/language_module/language_module.py b/megatron/core/models/common/language_module/language_module.py
index 13d74aa5271..259bb716a93 100644
--- a/megatron/core/models/common/language_module/language_module.py
+++ b/megatron/core/models/common/language_module/language_module.py
@@ -1,7 +1,7 @@
 # Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 import logging
 import os
-from typing import Any, Dict, Literal, Optional, Tuple
+from typing import Optional, Tuple
 
 import torch
 from torch import Tensor
@@ -14,7 +14,6 @@
 except:
     te_parallel_cross_entropy = None
 from megatron.core.fusions.fused_cross_entropy import fused_vocab_parallel_cross_entropy
-from megatron.core.fusions.fused_linear_cross_entropy import linear_cross_entropy
 from megatron.core.pipeline_parallel.utils import (
     is_pp_first_stage,
     is_pp_last_stage,
@@ -126,68 +125,6 @@ def check_and_set_env_variable(
             check_and_set_env_variable("NVTE_FUSED_ATTN", 1, AttnBackend.auto)
             check_and_set_env_variable("NVTE_UNFUSED_ATTN", 1, AttnBackend.auto)
 
-    def compute_output_layer_and_language_model_loss(
-        self,
-        hidden: Tensor,
-        labels: Optional[Tensor],
-        weight: Tensor = None,
-        sequence_parallel_enabled: bool = False,
-        column_parallel_linear: torch.nn.Module = None,
-        col_linear_kwargs: Dict[str, Any] = {},
-        reduction: Literal["none", "sum", "mean"] = "none",
-        ignore_index: int = -100,
-    ) -> Tensor:
-        """Computes the language model logits and loss (Cross entropy across vocabulary)
-
-        Args:
-            hidden (Tensor): The hidden states from the transformer model
-            labels (Optional[Tensor]): The labels of dimension [batch size, seq length]
-            weight (Tensor): The weight tensor of shape [vocab size, hidden size].
-                Required if using fused linear cross entropy.
-            column_parallel_linear (torch.nn.Module): The column parallel linear
-                layer to use for computing logits when not using fused linear cross entropy.
-            col_linear_kwargs (Dict[str, Any]): Additional kwargs for column parallel linear layer
-            reduction (Optional[str]): The reduction method. Defaults to "none", and can be
-                one of "none", "sum", "mean".
-            ignore_index (Optional[int]): The index to ignore in the loss calculation.
-                Defaults to -100.
-
-        Returns:
-            Tensor: Loss tensor of dimensions [batch size, sequence_length].
-        """
-        if (
-            self.config.cross_entropy_loss_fusion
-            and self.config.cross_entropy_fusion_impl == 'linear'
-        ):
-            assert (
-                weight is not None
-            ), "weight cannot be None when using fused linear cross entropy."
-            assert (
-                labels is not None
-            ), "labels cannot be None when using fused linear cross entropy."
-            # [b s] => [s b]
-            labels = labels.transpose(0, 1).contiguous()
-            loss = linear_cross_entropy(
-                hidden,
-                weight,
-                labels,
-                tp_group=self.pg_collection.tp,
-                sequence_parallel=sequence_parallel_enabled,
-                reduction=reduction,
-                ignore_index=ignore_index,
-            )
-
-            # [s b] => [b, s]
-            loss = loss.view_as(labels).transpose(0, 1).contiguous()
-            return loss
-        else:
-            assert (
-                column_parallel_linear is not None
-            ), "column_parallel_linear cannot be None when not using fused linear cross entropy."
-            logits, _ = column_parallel_linear(hidden, **col_linear_kwargs)
-
-            return self.compute_language_model_loss(labels, logits)
-
     def compute_language_model_loss(self, labels: Tensor, logits: Tensor) -> Tensor:
         """Computes the language model loss (Cross entropy across vocabulary)
 
diff --git a/megatron/core/models/gpt/gpt_model.py b/megatron/core/models/gpt/gpt_model.py
index 3c65621a060..e89cb705920 100644
--- a/megatron/core/models/gpt/gpt_model.py
+++ b/megatron/core/models/gpt/gpt_model.py
@@ -25,6 +25,7 @@
 from megatron.core.quantization.utils import get_quant_config_or_none
 from megatron.core.tensor_parallel import gather_from_sequence_parallel_region
 from megatron.core.transformer.enums import CudaGraphScope, ModelType
+from megatron.core.transformer.linear_cross_entropy import LinearCrossEntropyModule
 from megatron.core.transformer.multi_token_prediction import (
     MTPLossAutoScaler,
     MTPLossLoggingHelper,
@@ -238,7 +239,7 @@ def __init__(
                 self.embedding_activation_buffer = None
                 self.grad_output_buffer = None
 
-            self.output_layer = tensor_parallel.ColumnParallelLinear(
+            self.output_layer = LinearCrossEntropyModule(
                 config.hidden_size,
                 self.vocab_size,
                 config=config,
@@ -633,16 +634,12 @@ def _postprocess(
                 )
 
                 # Compute mtp loss without storing logits to save memory.
-                mtp_loss = self.compute_output_layer_and_language_model_loss(
-                    hidden_states_list[mtp_layer_number + 1],
+                mtp_loss = self.output_layer(
+                    output_cross_entropy_loss=True,
+                    input_=hidden_states_list[mtp_layer_number + 1],
+                    weight=output_weight,
                     labels=mtp_labels,
-                    weight=self.shared_embedding_or_output_weight(),
-                    sequence_parallel_enabled=self.output_layer.sequence_parallel,
-                    column_parallel_linear=self.output_layer,
-                    col_linear_kwargs={
-                        'weight': output_weight,
-                        'runtime_gather_output': runtime_gather_output,
-                    },
+                    runtime_gather_output=runtime_gather_output,
                 )
 
                 mtp_loss = loss_mask * mtp_loss
@@ -721,16 +718,12 @@ def _postprocess(
             # [s b h] => [b s h]
             return logits.transpose(0, 1).contiguous()
 
-        loss = self.compute_output_layer_and_language_model_loss(
-            hidden_states,
+        loss = self.output_layer(
+            output_cross_entropy_loss=True,
+            input_=hidden_states,
             labels=labels,
-            weight=self.shared_embedding_or_output_weight(),
-            sequence_parallel_enabled=self.output_layer.sequence_parallel,
-            column_parallel_linear=self.output_layer,
-            col_linear_kwargs={
-                'weight': output_weight,
-                'runtime_gather_output': runtime_gather_output,
-            },
+            weight=output_weight,
+            runtime_gather_output=runtime_gather_output,
         )
 
         return loss
diff --git a/megatron/core/models/mamba/mamba_model.py b/megatron/core/models/mamba/mamba_model.py
index 0acca7e8713..c91b14d9326 100644
--- a/megatron/core/models/mamba/mamba_model.py
+++ b/megatron/core/models/mamba/mamba_model.py
@@ -4,7 +4,6 @@
 
 from torch import Tensor
 
-from megatron.core import tensor_parallel
 from megatron.core.config_logger import has_config_logger_enabled, log_config_to_disk
 from megatron.core.inference.contexts import BaseInferenceContext
 from megatron.core.models.common.embeddings.language_model_embedding import LanguageModelEmbedding
@@ -16,6 +15,7 @@
 from megatron.core.tensor_parallel import gather_from_sequence_parallel_region
 from megatron.core.transformer import TransformerConfig
 from megatron.core.transformer.enums import ModelType
+from megatron.core.transformer.linear_cross_entropy import LinearCrossEntropyModule
 from megatron.core.transformer.spec_utils import ModuleSpec, build_module
 from megatron.core.utils import (
     WrappedTensor,
@@ -136,7 +136,7 @@ def __init__(
 
         # Output
         if post_process:
-            self.output_layer = tensor_parallel.ColumnParallelLinear(
+            self.output_layer = LinearCrossEntropyModule(
                 config.hidden_size,
                 self.vocab_size,
                 config=config,
@@ -304,16 +304,12 @@ def forward(
             # [s b h] => [b s h]
             return logits.transpose(0, 1).contiguous()
 
-        loss = self.compute_output_layer_and_language_model_loss(
-            hidden_states,
-            labels,
-            weight=self.shared_embedding_or_output_weight(),
-            sequence_parallel_enabled=self.output_layer.sequence_parallel,
-            column_parallel_linear=self.output_layer,
-            col_linear_kwargs={
-                "weight": output_weight,
-                "runtime_gather_output": runtime_gather_output,
-            },
+        loss = self.output_layer(
+            output_cross_entropy_loss=True,
+            input_=hidden_states,
+            labels=labels,
+            weight=output_weight,
+            runtime_gather_output=runtime_gather_output,
         )
 
         return loss
diff --git a/megatron/core/transformer/linear_cross_entropy.py b/megatron/core/transformer/linear_cross_entropy.py
new file mode 100644
index 00000000000..373f2f20bf5
--- /dev/null
+++ b/megatron/core/transformer/linear_cross_entropy.py
@@ -0,0 +1,134 @@
+# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.
+
+from typing import Literal, Optional, Tuple, Union
+
+import torch
+
+from megatron.core import tensor_parallel
+from megatron.core.fusions.fused_cross_entropy import fused_vocab_parallel_cross_entropy
+from megatron.core.fusions.fused_linear_cross_entropy import linear_cross_entropy
+from megatron.core.transformer.enums import CudaGraphScope
+from megatron.core.utils import is_te_min_version
+
+try:
+    from megatron.core.extensions.transformer_engine import te_parallel_cross_entropy
+except:
+    te_parallel_cross_entropy = None
+
+
+class LinearCrossEntropyModule(tensor_parallel.ColumnParallelLinear):
+    """
+    A module that combines a ColumnParallelLinear layer with fused
+    linear + cross-entropy loss computation over a tensor-parallel vocabulary.
+    """
+
+    def forward(
+        self,
+        input_: torch.Tensor,
+        weight: Optional[torch.Tensor] = None,
+        runtime_gather_output: Optional[bool] = None,
+        output_cross_entropy_loss: bool = False,
+        labels: Optional[torch.Tensor] = None,
+        reduction: Literal["none", "sum", "mean"] = "none",
+        ignore_index: int = -100,
+    ) -> Union[torch.Tensor, Tuple[torch.Tensor, Optional[torch.Tensor]]]:
+        """Run either the plain ColumnParallelLinear or fused linear+cross-entropy."""
+        if output_cross_entropy_loss:
+            assert labels is not None, "labels cannot be None when outputting cross-entropy loss."
+            return self._compute_linear_and_cross_entropy_loss(
+                hidden=input_,
+                weight=weight if weight is not None else self.weight,
+                labels=labels,
+                reduction=reduction,
+                ignore_index=ignore_index,
+            )
+
+        # Fall back to standard ColumnParallelLinear forward.
+        # ColumnParallelLinear.forward returns (output, bias) or just output
+        # depending on configuration, so keep the return type as Tensor.
+        return super().forward(input_, weight, runtime_gather_output)
+
+    def _compute_linear_and_cross_entropy_loss(
+        self,
+        hidden: torch.Tensor,
+        weight: torch.Tensor,
+        runtime_gather_output: Optional[bool] = None,
+        labels: Optional[torch.Tensor] = None,
+        reduction: Literal["none", "sum", "mean"] = "none",
+        ignore_index: int = -100,
+    ) -> torch.Tensor:
+        """Compute fused linear + cross-entropy over tensor-parallel vocab."""
+        if (
+            self.config.cross_entropy_loss_fusion
+            and self.config.cross_entropy_fusion_impl == 'linear'
+        ):
+            assert (
+                weight is not None
+            ), "weight cannot be None when using fused linear cross entropy."
+            assert (
+                labels is not None
+            ), "labels cannot be None when using fused linear cross entropy."
+
+            # [b s] => [s b]
+            labels = labels.transpose(0, 1).contiguous()
+            loss = linear_cross_entropy(
+                hidden,
+                self.weight,
+                labels,
+                sequence_parallel=self.sequence_parallel,
+                reduction=reduction,
+                ignore_index=ignore_index,
+                tp_group=self.tp_group,
+            )
+            # If reduction != "none" this will be a scalar; for "none" it should
+            # match [s, b] and can be reshaped back to [b, s].
+            if reduction == "none":
+                loss = loss.view_as(labels).transpose(0, 1).contiguous()
+        else:
+            logits, _ = super().forward(hidden, weight, runtime_gather_output)
+            loss = self._compute_cross_entropy_loss(labels, logits)
+
+        return loss
+
+    def _compute_cross_entropy_loss(
+        self, labels: torch.Tensor, logits: torch.Tensor
+    ) -> Optional[torch.Tensor]:
+        """Compute (possibly fused) vocab-parallel cross-entropy loss."""
+        loss = None
+
+        # [b s] => [s b]
+        labels = labels.transpose(0, 1).contiguous()
+        if self.config.cross_entropy_loss_fusion:
+            if self.config.cross_entropy_fusion_impl == 'te':
+                if te_parallel_cross_entropy is not None:
+                    labels = torch.as_strided(labels, labels.size(), (labels.size()[1], 1))
+                    # Use is_cg_capturable=True for full iteration CUDA graphs
+                    # to avoid torch.equal checks
+                    is_cg_capturable = (
+                        hasattr(self.config, 'cuda_graph_scope')
+                        and CudaGraphScope.full_iteration in self.config.cuda_graph_scope
+                    )
+                    if is_cg_capturable and not is_te_min_version("2.7.0"):
+                        from megatron.core.utils import get_te_version
+
+                        current_version = get_te_version()
+                        raise AssertionError(
+                            f"CUDA graph compatible cross entropy requires "
+                            f"TransformerEngine >= 2.7.0, but found version {current_version}. "
+                            "Please upgrade TransformerEngine "
+                            f"or set cuda_graph_scope to a value other than 'full_iteration'."
+                        )
+
+                    loss = te_parallel_cross_entropy(
+                        logits, labels, self.tp_group, is_cg_capturable
+                    )
+                else:
+                    raise RuntimeError("Trying to use a TE block when it's not present.")
+            elif self.config.cross_entropy_fusion_impl == 'native':
+                loss = fused_vocab_parallel_cross_entropy(logits, labels, self.tp_group)
+        else:
+            loss = tensor_parallel.vocab_parallel_cross_entropy(logits, labels)
+
+        # [s b] => [b, s]
+        loss = loss.transpose(0, 1).contiguous()
+        return loss

From b8b8662278c35b6e0c7cc901ce0b8d5f6b94eb10 Mon Sep 17 00:00:00 2001
From: Charlie Truong <chtruong@nvidia.com>
Date: Mon, 2 Feb 2026 21:56:03 -0600
Subject: [PATCH 267/334] Revert "[Dev] Fix Linear-Cross-Entropy Convergence
 Issue (#2739)" (#3218)

Signed-off-by: Charlie Truong <chtruong@nvidia.com>
---
 .../linear_cross_entropy/blackwell/entry.py   |  23 ++-
 .../common/language_module/language_module.py |  65 ++++++++-
 megatron/core/models/gpt/gpt_model.py         |  31 ++--
 megatron/core/models/mamba/mamba_model.py     |  20 +--
 .../core/transformer/linear_cross_entropy.py  | 134 ------------------
 5 files changed, 104 insertions(+), 169 deletions(-)
 delete mode 100644 megatron/core/transformer/linear_cross_entropy.py

diff --git a/megatron/core/fusions/linear_cross_entropy/blackwell/entry.py b/megatron/core/fusions/linear_cross_entropy/blackwell/entry.py
index 07e018b51ff..dc369a7c558 100644
--- a/megatron/core/fusions/linear_cross_entropy/blackwell/entry.py
+++ b/megatron/core/fusions/linear_cross_entropy/blackwell/entry.py
@@ -345,8 +345,7 @@ def backward(
             and num_valid_tokens.dtype == torch.int64
         )
 
-        # Allocate d_hidden in float32 for better numerical stability
-        d_hidden = torch.empty_like(global_hidden, dtype=torch.float32)
+        d_hidden = torch.empty_like(global_hidden)
         d_weight = torch.empty_like(weight)
         assert d_hidden.is_contiguous() and d_weight.is_contiguous()
 
@@ -436,15 +435,14 @@ def backward(
                 )
                 valid_d_logits = _d_logits[:, :vocab_right_bound]
 
-                _delta_hidden = torch.mm(
-                    valid_d_logits,
-                    weight[split_idx * vocab_per_split : (split_idx + 1) * vocab_per_split, :],
-                    out_dtype=torch.float32,
-                ).view_as(d_hidden)
-                if split_idx == 0:
-                    d_hidden.copy_(_delta_hidden)
-                else:
-                    d_hidden.add_(_delta_hidden)
+                torch.addmm(
+                    input=d_hidden.view(-1, dim),
+                    mat1=valid_d_logits,
+                    mat2=weight[split_idx * vocab_per_split : (split_idx + 1) * vocab_per_split, :],
+                    beta=(split_idx != 0),
+                    alpha=1.0,
+                    out=d_hidden.view(-1, dim),
+                )
                 torch.matmul(
                     valid_d_logits.T,
                     hidden_view,
@@ -468,9 +466,6 @@ def backward(
                 ]
                 d_hidden = d_hidden.view(partial_hidden_shape).clone()
 
-        # convert d_hidden to the original dtype
-        d_hidden = d_hidden.type_as(global_hidden)
-
         return d_hidden, d_weight
 
 except ImportError:
diff --git a/megatron/core/models/common/language_module/language_module.py b/megatron/core/models/common/language_module/language_module.py
index 259bb716a93..13d74aa5271 100644
--- a/megatron/core/models/common/language_module/language_module.py
+++ b/megatron/core/models/common/language_module/language_module.py
@@ -1,7 +1,7 @@
 # Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 import logging
 import os
-from typing import Optional, Tuple
+from typing import Any, Dict, Literal, Optional, Tuple
 
 import torch
 from torch import Tensor
@@ -14,6 +14,7 @@
 except:
     te_parallel_cross_entropy = None
 from megatron.core.fusions.fused_cross_entropy import fused_vocab_parallel_cross_entropy
+from megatron.core.fusions.fused_linear_cross_entropy import linear_cross_entropy
 from megatron.core.pipeline_parallel.utils import (
     is_pp_first_stage,
     is_pp_last_stage,
@@ -125,6 +126,68 @@ def check_and_set_env_variable(
             check_and_set_env_variable("NVTE_FUSED_ATTN", 1, AttnBackend.auto)
             check_and_set_env_variable("NVTE_UNFUSED_ATTN", 1, AttnBackend.auto)
 
+    def compute_output_layer_and_language_model_loss(
+        self,
+        hidden: Tensor,
+        labels: Optional[Tensor],
+        weight: Tensor = None,
+        sequence_parallel_enabled: bool = False,
+        column_parallel_linear: torch.nn.Module = None,
+        col_linear_kwargs: Dict[str, Any] = {},
+        reduction: Literal["none", "sum", "mean"] = "none",
+        ignore_index: int = -100,
+    ) -> Tensor:
+        """Computes the language model logits and loss (Cross entropy across vocabulary)
+
+        Args:
+            hidden (Tensor): The hidden states from the transformer model
+            labels (Optional[Tensor]): The labels of dimension [batch size, seq length]
+            weight (Tensor): The weight tensor of shape [vocab size, hidden size].
+                Required if using fused linear cross entropy.
+            column_parallel_linear (torch.nn.Module): The column parallel linear
+                layer to use for computing logits when not using fused linear cross entropy.
+            col_linear_kwargs (Dict[str, Any]): Additional kwargs for column parallel linear layer
+            reduction (Optional[str]): The reduction method. Defaults to "none", and can be
+                one of "none", "sum", "mean".
+            ignore_index (Optional[int]): The index to ignore in the loss calculation.
+                Defaults to -100.
+
+        Returns:
+            Tensor: Loss tensor of dimensions [batch size, sequence_length].
+        """
+        if (
+            self.config.cross_entropy_loss_fusion
+            and self.config.cross_entropy_fusion_impl == 'linear'
+        ):
+            assert (
+                weight is not None
+            ), "weight cannot be None when using fused linear cross entropy."
+            assert (
+                labels is not None
+            ), "labels cannot be None when using fused linear cross entropy."
+            # [b s] => [s b]
+            labels = labels.transpose(0, 1).contiguous()
+            loss = linear_cross_entropy(
+                hidden,
+                weight,
+                labels,
+                tp_group=self.pg_collection.tp,
+                sequence_parallel=sequence_parallel_enabled,
+                reduction=reduction,
+                ignore_index=ignore_index,
+            )
+
+            # [s b] => [b, s]
+            loss = loss.view_as(labels).transpose(0, 1).contiguous()
+            return loss
+        else:
+            assert (
+                column_parallel_linear is not None
+            ), "column_parallel_linear cannot be None when not using fused linear cross entropy."
+            logits, _ = column_parallel_linear(hidden, **col_linear_kwargs)
+
+            return self.compute_language_model_loss(labels, logits)
+
     def compute_language_model_loss(self, labels: Tensor, logits: Tensor) -> Tensor:
         """Computes the language model loss (Cross entropy across vocabulary)
 
diff --git a/megatron/core/models/gpt/gpt_model.py b/megatron/core/models/gpt/gpt_model.py
index e89cb705920..3c65621a060 100644
--- a/megatron/core/models/gpt/gpt_model.py
+++ b/megatron/core/models/gpt/gpt_model.py
@@ -25,7 +25,6 @@
 from megatron.core.quantization.utils import get_quant_config_or_none
 from megatron.core.tensor_parallel import gather_from_sequence_parallel_region
 from megatron.core.transformer.enums import CudaGraphScope, ModelType
-from megatron.core.transformer.linear_cross_entropy import LinearCrossEntropyModule
 from megatron.core.transformer.multi_token_prediction import (
     MTPLossAutoScaler,
     MTPLossLoggingHelper,
@@ -239,7 +238,7 @@ def __init__(
                 self.embedding_activation_buffer = None
                 self.grad_output_buffer = None
 
-            self.output_layer = LinearCrossEntropyModule(
+            self.output_layer = tensor_parallel.ColumnParallelLinear(
                 config.hidden_size,
                 self.vocab_size,
                 config=config,
@@ -634,12 +633,16 @@ def _postprocess(
                 )
 
                 # Compute mtp loss without storing logits to save memory.
-                mtp_loss = self.output_layer(
-                    output_cross_entropy_loss=True,
-                    input_=hidden_states_list[mtp_layer_number + 1],
-                    weight=output_weight,
+                mtp_loss = self.compute_output_layer_and_language_model_loss(
+                    hidden_states_list[mtp_layer_number + 1],
                     labels=mtp_labels,
-                    runtime_gather_output=runtime_gather_output,
+                    weight=self.shared_embedding_or_output_weight(),
+                    sequence_parallel_enabled=self.output_layer.sequence_parallel,
+                    column_parallel_linear=self.output_layer,
+                    col_linear_kwargs={
+                        'weight': output_weight,
+                        'runtime_gather_output': runtime_gather_output,
+                    },
                 )
 
                 mtp_loss = loss_mask * mtp_loss
@@ -718,12 +721,16 @@ def _postprocess(
             # [s b h] => [b s h]
             return logits.transpose(0, 1).contiguous()
 
-        loss = self.output_layer(
-            output_cross_entropy_loss=True,
-            input_=hidden_states,
+        loss = self.compute_output_layer_and_language_model_loss(
+            hidden_states,
             labels=labels,
-            weight=output_weight,
-            runtime_gather_output=runtime_gather_output,
+            weight=self.shared_embedding_or_output_weight(),
+            sequence_parallel_enabled=self.output_layer.sequence_parallel,
+            column_parallel_linear=self.output_layer,
+            col_linear_kwargs={
+                'weight': output_weight,
+                'runtime_gather_output': runtime_gather_output,
+            },
         )
 
         return loss
diff --git a/megatron/core/models/mamba/mamba_model.py b/megatron/core/models/mamba/mamba_model.py
index c91b14d9326..0acca7e8713 100644
--- a/megatron/core/models/mamba/mamba_model.py
+++ b/megatron/core/models/mamba/mamba_model.py
@@ -4,6 +4,7 @@
 
 from torch import Tensor
 
+from megatron.core import tensor_parallel
 from megatron.core.config_logger import has_config_logger_enabled, log_config_to_disk
 from megatron.core.inference.contexts import BaseInferenceContext
 from megatron.core.models.common.embeddings.language_model_embedding import LanguageModelEmbedding
@@ -15,7 +16,6 @@
 from megatron.core.tensor_parallel import gather_from_sequence_parallel_region
 from megatron.core.transformer import TransformerConfig
 from megatron.core.transformer.enums import ModelType
-from megatron.core.transformer.linear_cross_entropy import LinearCrossEntropyModule
 from megatron.core.transformer.spec_utils import ModuleSpec, build_module
 from megatron.core.utils import (
     WrappedTensor,
@@ -136,7 +136,7 @@ def __init__(
 
         # Output
         if post_process:
-            self.output_layer = LinearCrossEntropyModule(
+            self.output_layer = tensor_parallel.ColumnParallelLinear(
                 config.hidden_size,
                 self.vocab_size,
                 config=config,
@@ -304,12 +304,16 @@ def forward(
             # [s b h] => [b s h]
             return logits.transpose(0, 1).contiguous()
 
-        loss = self.output_layer(
-            output_cross_entropy_loss=True,
-            input_=hidden_states,
-            labels=labels,
-            weight=output_weight,
-            runtime_gather_output=runtime_gather_output,
+        loss = self.compute_output_layer_and_language_model_loss(
+            hidden_states,
+            labels,
+            weight=self.shared_embedding_or_output_weight(),
+            sequence_parallel_enabled=self.output_layer.sequence_parallel,
+            column_parallel_linear=self.output_layer,
+            col_linear_kwargs={
+                "weight": output_weight,
+                "runtime_gather_output": runtime_gather_output,
+            },
         )
 
         return loss
diff --git a/megatron/core/transformer/linear_cross_entropy.py b/megatron/core/transformer/linear_cross_entropy.py
deleted file mode 100644
index 373f2f20bf5..00000000000
--- a/megatron/core/transformer/linear_cross_entropy.py
+++ /dev/null
@@ -1,134 +0,0 @@
-# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.
-
-from typing import Literal, Optional, Tuple, Union
-
-import torch
-
-from megatron.core import tensor_parallel
-from megatron.core.fusions.fused_cross_entropy import fused_vocab_parallel_cross_entropy
-from megatron.core.fusions.fused_linear_cross_entropy import linear_cross_entropy
-from megatron.core.transformer.enums import CudaGraphScope
-from megatron.core.utils import is_te_min_version
-
-try:
-    from megatron.core.extensions.transformer_engine import te_parallel_cross_entropy
-except:
-    te_parallel_cross_entropy = None
-
-
-class LinearCrossEntropyModule(tensor_parallel.ColumnParallelLinear):
-    """
-    A module that combines a ColumnParallelLinear layer with fused
-    linear + cross-entropy loss computation over a tensor-parallel vocabulary.
-    """
-
-    def forward(
-        self,
-        input_: torch.Tensor,
-        weight: Optional[torch.Tensor] = None,
-        runtime_gather_output: Optional[bool] = None,
-        output_cross_entropy_loss: bool = False,
-        labels: Optional[torch.Tensor] = None,
-        reduction: Literal["none", "sum", "mean"] = "none",
-        ignore_index: int = -100,
-    ) -> Union[torch.Tensor, Tuple[torch.Tensor, Optional[torch.Tensor]]]:
-        """Run either the plain ColumnParallelLinear or fused linear+cross-entropy."""
-        if output_cross_entropy_loss:
-            assert labels is not None, "labels cannot be None when outputting cross-entropy loss."
-            return self._compute_linear_and_cross_entropy_loss(
-                hidden=input_,
-                weight=weight if weight is not None else self.weight,
-                labels=labels,
-                reduction=reduction,
-                ignore_index=ignore_index,
-            )
-
-        # Fall back to standard ColumnParallelLinear forward.
-        # ColumnParallelLinear.forward returns (output, bias) or just output
-        # depending on configuration, so keep the return type as Tensor.
-        return super().forward(input_, weight, runtime_gather_output)
-
-    def _compute_linear_and_cross_entropy_loss(
-        self,
-        hidden: torch.Tensor,
-        weight: torch.Tensor,
-        runtime_gather_output: Optional[bool] = None,
-        labels: Optional[torch.Tensor] = None,
-        reduction: Literal["none", "sum", "mean"] = "none",
-        ignore_index: int = -100,
-    ) -> torch.Tensor:
-        """Compute fused linear + cross-entropy over tensor-parallel vocab."""
-        if (
-            self.config.cross_entropy_loss_fusion
-            and self.config.cross_entropy_fusion_impl == 'linear'
-        ):
-            assert (
-                weight is not None
-            ), "weight cannot be None when using fused linear cross entropy."
-            assert (
-                labels is not None
-            ), "labels cannot be None when using fused linear cross entropy."
-
-            # [b s] => [s b]
-            labels = labels.transpose(0, 1).contiguous()
-            loss = linear_cross_entropy(
-                hidden,
-                self.weight,
-                labels,
-                sequence_parallel=self.sequence_parallel,
-                reduction=reduction,
-                ignore_index=ignore_index,
-                tp_group=self.tp_group,
-            )
-            # If reduction != "none" this will be a scalar; for "none" it should
-            # match [s, b] and can be reshaped back to [b, s].
-            if reduction == "none":
-                loss = loss.view_as(labels).transpose(0, 1).contiguous()
-        else:
-            logits, _ = super().forward(hidden, weight, runtime_gather_output)
-            loss = self._compute_cross_entropy_loss(labels, logits)
-
-        return loss
-
-    def _compute_cross_entropy_loss(
-        self, labels: torch.Tensor, logits: torch.Tensor
-    ) -> Optional[torch.Tensor]:
-        """Compute (possibly fused) vocab-parallel cross-entropy loss."""
-        loss = None
-
-        # [b s] => [s b]
-        labels = labels.transpose(0, 1).contiguous()
-        if self.config.cross_entropy_loss_fusion:
-            if self.config.cross_entropy_fusion_impl == 'te':
-                if te_parallel_cross_entropy is not None:
-                    labels = torch.as_strided(labels, labels.size(), (labels.size()[1], 1))
-                    # Use is_cg_capturable=True for full iteration CUDA graphs
-                    # to avoid torch.equal checks
-                    is_cg_capturable = (
-                        hasattr(self.config, 'cuda_graph_scope')
-                        and CudaGraphScope.full_iteration in self.config.cuda_graph_scope
-                    )
-                    if is_cg_capturable and not is_te_min_version("2.7.0"):
-                        from megatron.core.utils import get_te_version
-
-                        current_version = get_te_version()
-                        raise AssertionError(
-                            f"CUDA graph compatible cross entropy requires "
-                            f"TransformerEngine >= 2.7.0, but found version {current_version}. "
-                            "Please upgrade TransformerEngine "
-                            f"or set cuda_graph_scope to a value other than 'full_iteration'."
-                        )
-
-                    loss = te_parallel_cross_entropy(
-                        logits, labels, self.tp_group, is_cg_capturable
-                    )
-                else:
-                    raise RuntimeError("Trying to use a TE block when it's not present.")
-            elif self.config.cross_entropy_fusion_impl == 'native':
-                loss = fused_vocab_parallel_cross_entropy(logits, labels, self.tp_group)
-        else:
-            loss = tensor_parallel.vocab_parallel_cross_entropy(logits, labels)
-
-        # [s b] => [b, s]
-        loss = loss.transpose(0, 1).contiguous()
-        return loss

From 2ab74aba18c473ee59bf62251d7be06a31bf0173 Mon Sep 17 00:00:00 2001
From: Parth Mannan <38387286+parthmannan@users.noreply.github.com>
Date: Mon, 2 Feb 2026 22:22:16 -0800
Subject: [PATCH 268/334] Fix missing PackedSeqParams import (#3215)

---
 megatron/core/utils.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/megatron/core/utils.py b/megatron/core/utils.py
index f1c8a42913b..fde77a2304a 100644
--- a/megatron/core/utils.py
+++ b/megatron/core/utils.py
@@ -45,6 +45,7 @@
 
 from megatron.core import config
 from megatron.core.package_info import __version__ as mcore_version
+from megatron.core.packed_seq_params import PackedSeqParams
 
 try:
     from torch.distributed._tensor import DTensor

From 20e8ac8ff04f51d72c256fd9f247d5bbac71b4b8 Mon Sep 17 00:00:00 2001
From: Deyu Fu <deyuf@nvidia.com>
Date: Fri, 30 Jan 2026 16:42:28 +0800
Subject: [PATCH 269/334] fix merge main issues

Signed-off-by: Deyu Fu <deyuf@nvidia.com>
---
 megatron/training/arguments.py                  | 11 -----------
 tests/unit_tests/models/test_mamba_moe_model.py |  1 -
 2 files changed, 12 deletions(-)

diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py
index 97338f1f528..1af066a8207 100644
--- a/megatron/training/arguments.py
+++ b/megatron/training/arguments.py
@@ -1173,15 +1173,6 @@ def validate_args(args, defaults={}):
         args.no_load_optim = True
         warn_rank_0('enabling --no-load-optim when skipping training.')
 
-    # Experimental attention variant check
-    if args.linear_attention_type is not None:
-        print_rank_0(
-            '--linear-attention-type is deprecated, use --experimental-attention-variant instead.',
-            args.rank,
-        )
-        args.experimental_attention_variant = args.linear_attention_type
-        del args.linear_attention_type
-
     # Muon optimizer check
     if 'muon' in args.optimizer:
         # TODO: remove these checks once we support them
@@ -2788,8 +2779,6 @@ def _add_mla_args(parser):
 def _add_experimental_attention_variant_args(parser):
     group = parser.add_argument_group(title="experimental_attention_variant")
     # Linear attention
-    group.add_argument('--linear-attention-type', default=None, choices=['gated_delta_net'], type=str,
-                       help='(Deprecated, use --experimental-attention-variant instead) Type of linear attention to use. Currently support gated_delta_net.')
     group.add_argument('--linear-attention-freq', type=la_freq_type, default=None,
                        help='Frequency between LA (linear attention) layers and'
                             ' SDPA (scaled dot-product attention) layers. Accepts either: '
diff --git a/tests/unit_tests/models/test_mamba_moe_model.py b/tests/unit_tests/models/test_mamba_moe_model.py
index aeedc96dfc7..2481649bc3f 100644
--- a/tests/unit_tests/models/test_mamba_moe_model.py
+++ b/tests/unit_tests/models/test_mamba_moe_model.py
@@ -273,7 +273,6 @@
     "offload_modules": [],
     "hybrid_context_parallel": False,
     "max_seqlen_per_dp_cp_rank": None,
-    "enable_routing_replay": False,
     "fallback_to_eager_attn": False,
     "linear_attention_type": None,
     "moe_router_force_biased": None,

From c5b282b8212195b008e741cd6da35039d5ca4140 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Tue, 3 Feb 2026 22:26:40 +0100
Subject: [PATCH 270/334] ci(hotfix): Pin uv (#3233) (#3234)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 .github/actions/action.yml | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/.github/actions/action.yml b/.github/actions/action.yml
index f3e42e5843d..895b6863bef 100644
--- a/.github/actions/action.yml
+++ b/.github/actions/action.yml
@@ -117,8 +117,10 @@ runs:
         export PYTHONPATH=$(pwd)
         export NEMORUN_HOME=$(pwd)
         export NCCL_DEBUG=INFO
-        pip install --no-cache-dir uv
-        uv sync --only-group test
+        pip install --no-cache-dir "uv!=0.9.29"
+        uv venv .venv
+        uv cache clean
+        uv sync --no-cache --only-group test
         uv run python tests/test_utils/python_scripts/launch_nemo_run_workload.py \
           --scope unit-tests \
           --model unit-tests \
@@ -197,8 +199,10 @@ runs:
 
         export PYTHONPATH=$(pwd)
         export NEMORUN_HOME=$(pwd)
-        pip install --no-cache-dir uv
-        uv sync --only-group test
+        pip install --no-cache-dir "uv!=0.9.29"
+        uv venv .venv
+        uv cache clean
+        uv sync --no-cache --only-group test
         uv run python tests/test_utils/python_scripts/launch_nemo_run_workload.py \
           ${ARGS[@]} \
           --model ${{ inputs.model }} \

From 8a29fd575242af7ab202bdf2cd3611f7f7041062 Mon Sep 17 00:00:00 2001
From: Jianbin Chang <shjwudp@gmail.com>
Date: Wed, 4 Feb 2026 09:47:22 +0800
Subject: [PATCH 271/334] [DEV] Reapply fix Linear CE Fusion (#3226)

---
 .../linear_cross_entropy/blackwell/entry.py   | 23 +++---
 .../common/language_module/language_module.py | 65 +---------------
 megatron/core/models/gpt/gpt_model.py         | 52 ++++++++-----
 megatron/core/models/mamba/mamba_model.py     | 30 +++++---
 .../core/transformer/linear_cross_entropy.py  | 76 +++++++++++++++++++
 5 files changed, 140 insertions(+), 106 deletions(-)
 create mode 100644 megatron/core/transformer/linear_cross_entropy.py

diff --git a/megatron/core/fusions/linear_cross_entropy/blackwell/entry.py b/megatron/core/fusions/linear_cross_entropy/blackwell/entry.py
index dc369a7c558..07e018b51ff 100644
--- a/megatron/core/fusions/linear_cross_entropy/blackwell/entry.py
+++ b/megatron/core/fusions/linear_cross_entropy/blackwell/entry.py
@@ -345,7 +345,8 @@ def backward(
             and num_valid_tokens.dtype == torch.int64
         )
 
-        d_hidden = torch.empty_like(global_hidden)
+        # Allocate d_hidden in float32 for better numerical stability
+        d_hidden = torch.empty_like(global_hidden, dtype=torch.float32)
         d_weight = torch.empty_like(weight)
         assert d_hidden.is_contiguous() and d_weight.is_contiguous()
 
@@ -435,14 +436,15 @@ def backward(
                 )
                 valid_d_logits = _d_logits[:, :vocab_right_bound]
 
-                torch.addmm(
-                    input=d_hidden.view(-1, dim),
-                    mat1=valid_d_logits,
-                    mat2=weight[split_idx * vocab_per_split : (split_idx + 1) * vocab_per_split, :],
-                    beta=(split_idx != 0),
-                    alpha=1.0,
-                    out=d_hidden.view(-1, dim),
-                )
+                _delta_hidden = torch.mm(
+                    valid_d_logits,
+                    weight[split_idx * vocab_per_split : (split_idx + 1) * vocab_per_split, :],
+                    out_dtype=torch.float32,
+                ).view_as(d_hidden)
+                if split_idx == 0:
+                    d_hidden.copy_(_delta_hidden)
+                else:
+                    d_hidden.add_(_delta_hidden)
                 torch.matmul(
                     valid_d_logits.T,
                     hidden_view,
@@ -466,6 +468,9 @@ def backward(
                 ]
                 d_hidden = d_hidden.view(partial_hidden_shape).clone()
 
+        # convert d_hidden to the original dtype
+        d_hidden = d_hidden.type_as(global_hidden)
+
         return d_hidden, d_weight
 
 except ImportError:
diff --git a/megatron/core/models/common/language_module/language_module.py b/megatron/core/models/common/language_module/language_module.py
index 13d74aa5271..259bb716a93 100644
--- a/megatron/core/models/common/language_module/language_module.py
+++ b/megatron/core/models/common/language_module/language_module.py
@@ -1,7 +1,7 @@
 # Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 import logging
 import os
-from typing import Any, Dict, Literal, Optional, Tuple
+from typing import Optional, Tuple
 
 import torch
 from torch import Tensor
@@ -14,7 +14,6 @@
 except:
     te_parallel_cross_entropy = None
 from megatron.core.fusions.fused_cross_entropy import fused_vocab_parallel_cross_entropy
-from megatron.core.fusions.fused_linear_cross_entropy import linear_cross_entropy
 from megatron.core.pipeline_parallel.utils import (
     is_pp_first_stage,
     is_pp_last_stage,
@@ -126,68 +125,6 @@ def check_and_set_env_variable(
             check_and_set_env_variable("NVTE_FUSED_ATTN", 1, AttnBackend.auto)
             check_and_set_env_variable("NVTE_UNFUSED_ATTN", 1, AttnBackend.auto)
 
-    def compute_output_layer_and_language_model_loss(
-        self,
-        hidden: Tensor,
-        labels: Optional[Tensor],
-        weight: Tensor = None,
-        sequence_parallel_enabled: bool = False,
-        column_parallel_linear: torch.nn.Module = None,
-        col_linear_kwargs: Dict[str, Any] = {},
-        reduction: Literal["none", "sum", "mean"] = "none",
-        ignore_index: int = -100,
-    ) -> Tensor:
-        """Computes the language model logits and loss (Cross entropy across vocabulary)
-
-        Args:
-            hidden (Tensor): The hidden states from the transformer model
-            labels (Optional[Tensor]): The labels of dimension [batch size, seq length]
-            weight (Tensor): The weight tensor of shape [vocab size, hidden size].
-                Required if using fused linear cross entropy.
-            column_parallel_linear (torch.nn.Module): The column parallel linear
-                layer to use for computing logits when not using fused linear cross entropy.
-            col_linear_kwargs (Dict[str, Any]): Additional kwargs for column parallel linear layer
-            reduction (Optional[str]): The reduction method. Defaults to "none", and can be
-                one of "none", "sum", "mean".
-            ignore_index (Optional[int]): The index to ignore in the loss calculation.
-                Defaults to -100.
-
-        Returns:
-            Tensor: Loss tensor of dimensions [batch size, sequence_length].
-        """
-        if (
-            self.config.cross_entropy_loss_fusion
-            and self.config.cross_entropy_fusion_impl == 'linear'
-        ):
-            assert (
-                weight is not None
-            ), "weight cannot be None when using fused linear cross entropy."
-            assert (
-                labels is not None
-            ), "labels cannot be None when using fused linear cross entropy."
-            # [b s] => [s b]
-            labels = labels.transpose(0, 1).contiguous()
-            loss = linear_cross_entropy(
-                hidden,
-                weight,
-                labels,
-                tp_group=self.pg_collection.tp,
-                sequence_parallel=sequence_parallel_enabled,
-                reduction=reduction,
-                ignore_index=ignore_index,
-            )
-
-            # [s b] => [b, s]
-            loss = loss.view_as(labels).transpose(0, 1).contiguous()
-            return loss
-        else:
-            assert (
-                column_parallel_linear is not None
-            ), "column_parallel_linear cannot be None when not using fused linear cross entropy."
-            logits, _ = column_parallel_linear(hidden, **col_linear_kwargs)
-
-            return self.compute_language_model_loss(labels, logits)
-
     def compute_language_model_loss(self, labels: Tensor, logits: Tensor) -> Tensor:
         """Computes the language model loss (Cross entropy across vocabulary)
 
diff --git a/megatron/core/models/gpt/gpt_model.py b/megatron/core/models/gpt/gpt_model.py
index 3c65621a060..8e2301cd6f1 100644
--- a/megatron/core/models/gpt/gpt_model.py
+++ b/megatron/core/models/gpt/gpt_model.py
@@ -25,6 +25,7 @@
 from megatron.core.quantization.utils import get_quant_config_or_none
 from megatron.core.tensor_parallel import gather_from_sequence_parallel_region
 from megatron.core.transformer.enums import CudaGraphScope, ModelType
+from megatron.core.transformer.linear_cross_entropy import LinearCrossEntropyModule
 from megatron.core.transformer.multi_token_prediction import (
     MTPLossAutoScaler,
     MTPLossLoggingHelper,
@@ -146,6 +147,11 @@ def __init__(
         self.mtp_block_spec = mtp_block_spec
         self.mtp_process = mtp_block_spec is not None
 
+        self.fuse_linear_cross_entropy = (
+            self.config.cross_entropy_loss_fusion
+            and self.config.cross_entropy_fusion_impl == "linear"
+        )
+
         if self.pre_process or self.mtp_process:
             self.embedding = LanguageModelEmbedding(
                 config=self.config,
@@ -238,7 +244,7 @@ def __init__(
                 self.embedding_activation_buffer = None
                 self.grad_output_buffer = None
 
-            self.output_layer = tensor_parallel.ColumnParallelLinear(
+            self.output_layer = LinearCrossEntropyModule(
                 config.hidden_size,
                 self.vocab_size,
                 config=config,
@@ -633,17 +639,20 @@ def _postprocess(
                 )
 
                 # Compute mtp loss without storing logits to save memory.
-                mtp_loss = self.compute_output_layer_and_language_model_loss(
-                    hidden_states_list[mtp_layer_number + 1],
-                    labels=mtp_labels,
-                    weight=self.shared_embedding_or_output_weight(),
-                    sequence_parallel_enabled=self.output_layer.sequence_parallel,
-                    column_parallel_linear=self.output_layer,
-                    col_linear_kwargs={
-                        'weight': output_weight,
-                        'runtime_gather_output': runtime_gather_output,
-                    },
+                output_layer_kwargs = dict(
+                    input_=hidden_states_list[mtp_layer_number + 1],
+                    weight=output_weight,
+                    runtime_gather_output=runtime_gather_output,
                 )
+                if self.fuse_linear_cross_entropy:
+                    mtp_loss = self.output_layer(
+                        output_cross_entropy_loss=self.fuse_linear_cross_entropy,
+                        labels=mtp_labels,
+                        **output_layer_kwargs,
+                    )
+                else:
+                    mtp_logits, _ = self.output_layer(**output_layer_kwargs)
+                    mtp_loss = self.compute_language_model_loss(mtp_labels, mtp_logits)
 
                 mtp_loss = loss_mask * mtp_loss
                 if self.training:
@@ -721,17 +730,18 @@ def _postprocess(
             # [s b h] => [b s h]
             return logits.transpose(0, 1).contiguous()
 
-        loss = self.compute_output_layer_and_language_model_loss(
-            hidden_states,
-            labels=labels,
-            weight=self.shared_embedding_or_output_weight(),
-            sequence_parallel_enabled=self.output_layer.sequence_parallel,
-            column_parallel_linear=self.output_layer,
-            col_linear_kwargs={
-                'weight': output_weight,
-                'runtime_gather_output': runtime_gather_output,
-            },
+        output_layer_kwargs = dict(
+            input_=hidden_states, weight=output_weight, runtime_gather_output=runtime_gather_output
         )
+        if self.fuse_linear_cross_entropy:
+            loss = self.output_layer(
+                output_cross_entropy_loss=self.fuse_linear_cross_entropy,
+                labels=labels,
+                **output_layer_kwargs,
+            )
+        else:
+            logits, _ = self.output_layer(**output_layer_kwargs)
+            loss = self.compute_language_model_loss(labels, logits)
 
         return loss
 
diff --git a/megatron/core/models/mamba/mamba_model.py b/megatron/core/models/mamba/mamba_model.py
index 115588e6d45..cf1002a5426 100644
--- a/megatron/core/models/mamba/mamba_model.py
+++ b/megatron/core/models/mamba/mamba_model.py
@@ -4,7 +4,6 @@
 
 from torch import Tensor
 
-from megatron.core import tensor_parallel
 from megatron.core.config_logger import has_config_logger_enabled, log_config_to_disk
 from megatron.core.inference.contexts import BaseInferenceContext
 from megatron.core.models.common.embeddings.language_model_embedding import LanguageModelEmbedding
@@ -16,6 +15,7 @@
 from megatron.core.tensor_parallel import gather_from_sequence_parallel_region
 from megatron.core.transformer import TransformerConfig
 from megatron.core.transformer.enums import ModelType
+from megatron.core.transformer.linear_cross_entropy import LinearCrossEntropyModule
 from megatron.core.transformer.spec_utils import ModuleSpec, build_module
 from megatron.core.utils import (
     WrappedTensor,
@@ -102,6 +102,11 @@ def __init__(
         # TODO: remove this dependency ?
         self.model_type = ModelType.encoder_or_decoder
 
+        self.fuse_linear_cross_entropy = (
+            self.config.cross_entropy_loss_fusion
+            and self.config.cross_entropy_fusion_impl == "linear"
+        )
+
         if self.pre_process:
             self.embedding = LanguageModelEmbedding(
                 config=self.config,
@@ -136,7 +141,7 @@ def __init__(
 
         # Output
         if post_process:
-            self.output_layer = tensor_parallel.ColumnParallelLinear(
+            self.output_layer = LinearCrossEntropyModule(
                 config.hidden_size,
                 self.vocab_size,
                 config=config,
@@ -306,16 +311,17 @@ def forward(
             # [s b h] => [b s h]
             return logits.transpose(0, 1).contiguous()
 
-        loss = self.compute_output_layer_and_language_model_loss(
-            hidden_states,
-            labels,
-            weight=self.shared_embedding_or_output_weight(),
-            sequence_parallel_enabled=self.output_layer.sequence_parallel,
-            column_parallel_linear=self.output_layer,
-            col_linear_kwargs={
-                "weight": output_weight,
-                "runtime_gather_output": runtime_gather_output,
-            },
+        output_layer_kwargs = dict(
+            input_=hidden_states, weight=output_weight, runtime_gather_output=runtime_gather_output
         )
+        if self.fuse_linear_cross_entropy:
+            loss = self.output_layer(
+                output_cross_entropy_loss=self.fuse_linear_cross_entropy,
+                labels=labels,
+                **output_layer_kwargs,
+            )
+        else:
+            logits, _ = self.output_layer(**output_layer_kwargs)
+            loss = self.compute_language_model_loss(labels, logits)
 
         return loss
diff --git a/megatron/core/transformer/linear_cross_entropy.py b/megatron/core/transformer/linear_cross_entropy.py
new file mode 100644
index 00000000000..e7afe326e1c
--- /dev/null
+++ b/megatron/core/transformer/linear_cross_entropy.py
@@ -0,0 +1,76 @@
+# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.
+
+from typing import Literal, Optional, Tuple, Union
+
+import torch
+
+from megatron.core import tensor_parallel
+from megatron.core.fusions.fused_linear_cross_entropy import linear_cross_entropy
+
+
+class LinearCrossEntropyModule(tensor_parallel.ColumnParallelLinear):
+    """
+    A module that combines a ColumnParallelLinear layer with fused
+    linear + cross-entropy loss computation over a tensor-parallel vocabulary.
+    """
+
+    def forward(
+        self,
+        input_: torch.Tensor,
+        weight: Optional[torch.Tensor] = None,
+        runtime_gather_output: Optional[bool] = None,
+        output_cross_entropy_loss: bool = False,
+        labels: Optional[torch.Tensor] = None,
+        reduction: Literal["none", "sum", "mean"] = "none",
+        ignore_index: int = -100,
+    ) -> Union[torch.Tensor, Tuple[torch.Tensor, Optional[torch.Tensor]]]:
+        """Run either the plain ColumnParallelLinear or fused linear+cross-entropy."""
+        if output_cross_entropy_loss:
+            assert labels is not None, "labels cannot be None when outputting cross-entropy loss."
+            return self._compute_linear_and_cross_entropy_loss(
+                hidden=input_,
+                weight=weight if weight is not None else self.weight,
+                labels=labels,
+                reduction=reduction,
+                ignore_index=ignore_index,
+            )
+
+        # Fall back to standard ColumnParallelLinear forward.
+        # ColumnParallelLinear.forward returns (output, bias) or just output
+        # depending on configuration, so keep the return type as Tensor.
+        return super().forward(input_, weight, runtime_gather_output)
+
+    def _compute_linear_and_cross_entropy_loss(
+        self,
+        hidden: torch.Tensor,
+        weight: torch.Tensor,
+        labels: Optional[torch.Tensor] = None,
+        reduction: Literal["none", "sum", "mean"] = "none",
+        ignore_index: int = -100,
+    ) -> torch.Tensor:
+        """Compute fused linear + cross-entropy over tensor-parallel vocab."""
+        assert self.config.cross_entropy_loss_fusion, "Cross-entropy loss fusion must be enabled."
+        assert self.config.cross_entropy_fusion_impl == "linear", (
+            "Cross-entropy loss fusion implementation must be 'linear' to use "
+            "_compute_linear_and_cross_entropy_loss."
+        )
+        assert weight is not None, "weight cannot be None when using fused linear cross entropy."
+        assert labels is not None, "labels cannot be None when using fused linear cross entropy."
+
+        # [b s] => [s b]
+        labels = labels.transpose(0, 1).contiguous()
+        loss = linear_cross_entropy(
+            hidden,
+            weight,
+            labels,
+            sequence_parallel=self.sequence_parallel,
+            reduction=reduction,
+            ignore_index=ignore_index,
+            tp_group=self.tp_group,
+        )
+        # If reduction != "none" this will be a scalar; for "none" it should
+        # match [s, b] and can be reshaped back to [b, s].
+        if reduction == "none":
+            loss = loss.view_as(labels).transpose(0, 1).contiguous()
+
+        return loss

From dd17acc39702b0fd96065a432af0d5b5d6203e4c Mon Sep 17 00:00:00 2001
From: Parth Mannan <38387286+parthmannan@users.noreply.github.com>
Date: Tue, 3 Feb 2026 19:49:57 -0800
Subject: [PATCH 272/334] Missing import fix (#3242)

---
 megatron/core/utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/megatron/core/utils.py b/megatron/core/utils.py
index 9ef95285a2b..036589b209a 100644
--- a/megatron/core/utils.py
+++ b/megatron/core/utils.py
@@ -2094,8 +2094,8 @@ def get_thd_batch_on_this_cp_rank(
         max_seqlen_kv=int(max_seqlen[0].item()),
     )
 
-    cp_size = get_context_parallel_world_size() if cp_size is None else cp_size
-    cp_rank = get_context_parallel_rank() if cp_rank is None else cp_rank
+    cp_size = parallel_state.get_context_parallel_world_size() if cp_size is None else cp_size
+    cp_rank = parallel_state.get_context_parallel_rank() if cp_rank is None else cp_rank
     if cp_size > 1:  # slice batch along sequence dimension for context parallelism
         assert tex is not None and is_te_min_version("1.10.0"), (
             "Please update Transformer Engine to >= 1.10 to use "

From fa5bcf676e79cde3bd51745e78f84a7381fc2228 Mon Sep 17 00:00:00 2001
From: Pingtian Li <158665726+Wohox@users.noreply.github.com>
Date: Wed, 4 Feb 2026 12:55:14 +0800
Subject: [PATCH 273/334] [Dev] Fix EP Overlap Bugs for Full-Iter CG (#3163)

---
 .../common/model_chunk_schedule_plan.py       |  9 +-
 .../core/models/gpt/fine_grained_callables.py | 38 +++++---
 megatron/core/pipeline_parallel/utils.py      | 86 ++++++++++---------
 .../transformer/test_submodule_callables.py   |  4 +-
 4 files changed, 80 insertions(+), 57 deletions(-)

diff --git a/megatron/core/models/common/model_chunk_schedule_plan.py b/megatron/core/models/common/model_chunk_schedule_plan.py
index d11e53d7fc2..f451942ffc2 100644
--- a/megatron/core/models/common/model_chunk_schedule_plan.py
+++ b/megatron/core/models/common/model_chunk_schedule_plan.py
@@ -123,14 +123,13 @@ def _build_callable_nodes(self, event, comp_stream, comm_stream, extra_args):
 
         # get flags for latter use
         is_mtp = isinstance(self.layer, MultiTokenPredictionLayer)
-        is_moe = (
-            isinstance(self.layer.transformer_layer.mlp, MoELayer)
-            if is_mtp
-            else isinstance(self.layer.mlp, MoELayer)
-        )
+        transformer_layer = self.layer.transformer_layer if is_mtp else self.layer
+        is_moe = isinstance(transformer_layer.mlp, MoELayer)
+        num_local_experts = transformer_layer.mlp.num_local_experts if is_moe else None
 
         extra_args["config"] = self.layer.config
         extra_args["is_moe"] = is_moe
+        extra_args["num_local_experts"] = num_local_experts
         extra_args["delay_wgrad_compute"] = self.layer.config.delay_wgrad_compute
         extra_args["is_mtp"] = is_mtp
 
diff --git a/megatron/core/models/gpt/fine_grained_callables.py b/megatron/core/models/gpt/fine_grained_callables.py
index 7cee9d2973c..b4fe64ee9bb 100644
--- a/megatron/core/models/gpt/fine_grained_callables.py
+++ b/megatron/core/models/gpt/fine_grained_callables.py
@@ -43,13 +43,14 @@ def wrapped_func(*args, **kwarg):
 
 
 @internal_api
-def should_free_input(name, is_moe, config):
+def should_free_input(name, is_moe, config, num_local_experts):
     """Determine if the node should free its input memory.
 
     Args:
         name: Node name
         is_moe: Whether it's a MoE model
         config: TransformerConfig object
+        num_local_experts: Number of local experts in MoE module
 
     Returns:
         bool: Whether to free input memory
@@ -70,8 +71,19 @@ def should_free_input(name, is_moe, config):
     # when and how to free the input memory.
     # The input and output of A2A are not needed anymore after the forward pass,
     # so we can free the input memory after the forward pass.
+
+    # When low precision fp8/4 is enabled, the casted tensors are saved and the
+    # original bf16 tensors are safe to be freed.
+    free_mlp = config.fp8 is not None or config.fp4 is not None
+    if not free_mlp:
+        # AlltoAll dispatcher with local_num_experts=1 and HybridEP both use identity
+        # operation for `dispatch_postprocess`, hence the mlp inputs will be directly
+        # passed to GroupedGemm and should be saved for backward pass.
+        free_mlp = num_local_experts > 1 or config.moe_token_dispatcher_type != "alltoall"
+        free_mlp = free_mlp and not enable_hybridep
+
     free_input_nodes = {
-        "mlp": not enable_hybridep,
+        "mlp": free_mlp,
         "moe_combine": True,
         # For non-DeepEP and non-HybridEP dispatcher mode, the input is the un-dispatched tokens
         # and probs before dispatch A2A and it's not needed anymore after the forward pass
@@ -256,7 +268,8 @@ def __init__(
         config = extra_args.get("config", None)
         assert config is not None, "model config must be passed to TransformerLayerNode."
         is_moe = extra_args.get("is_moe", False)
-        free_input = should_free_input(name, is_moe, config)
+        num_local_experts = extra_args.get("num_local_experts", None)
+        free_input = should_free_input(name, is_moe, config, num_local_experts)
         self.delay_wgrad_compute = extra_args.get("delay_wgrad_compute", False)
 
         super().__init__(
@@ -316,7 +329,7 @@ def backward_dw(self):
         """Computes the weight gradients for the transformer layer node."""
         if not self.delay_wgrad_compute:
             return
-        with torch.cuda.nvtx.range(f"{self.name} wgrad"):
+        with self.stream_acquire_context(f"{self.name} wgrad"):
             for module in self.bwd_dw_callables:
                 module.backward_dw()
 
@@ -514,15 +527,15 @@ def submodule_dispatch_forward(
             token_dispatcher._comm_manager.token_probs = probs
 
         dispatched_tokens, dispatched_probs = layer.mlp.dispatch(local_tokens, probs)
-        node.layer_state.dispatched_probs = node.detach(dispatched_probs)
-        return dispatched_tokens
+        return dispatched_tokens, dispatched_probs
 
-    def submodule_moe_forward(node: ScheduleNode, dispatched_tokens: torch.Tensor):
+    def submodule_moe_forward(
+        node: ScheduleNode, dispatched_tokens: torch.Tensor, dispatched_probs: torch.Tensor
+    ):
         """
         Run forward pass for computations between dispatch and combine:
             post dispatch->experts->combine preprocess
         """
-        dispatched_probs = node.layer_state.dispatched_probs
         token_dispatcher = layer.mlp.token_dispatcher
         if enable_deepep or enable_hybridep:
             # update dispatched_probs to be detached version, prevents
@@ -531,13 +544,16 @@ def submodule_moe_forward(node: ScheduleNode, dispatched_tokens: torch.Tensor):
 
         expert_output, _ = layer.mlp.routed_experts_compute(dispatched_tokens, dispatched_probs)
 
+        # For HybridEP, tokens_per_expert is generated on comm stream, as the input to
+        # `routed_experts_compute`, it needs to be recorded to comp stream.
+        if enable_hybridep:
+            tokens_per_expert = token_dispatcher._comm_manager.get_number_of_tokens_per_expert()
+            tokens_per_expert.record_stream(torch.cuda.current_stream())
+
         if layer.recompute_pre_mlp_layernorm:
             # discard the output of the pre-mlp layernorm and register the recompute
             # as a gradient hook of expert_output
             layer.pre_mlp_norm_checkpoint.discard_output_and_register_recompute(expert_output)
-        # release tensor reference after use
-        node.layer_state.dispatched_probs = None
-        node.layer_state.pre_mlp_layernorm_output = None
 
         return expert_output
 
diff --git a/megatron/core/pipeline_parallel/utils.py b/megatron/core/pipeline_parallel/utils.py
index 03c5f01f443..695968e2443 100644
--- a/megatron/core/pipeline_parallel/utils.py
+++ b/megatron/core/pipeline_parallel/utils.py
@@ -116,16 +116,6 @@ def set_ideal_affinity_for_current_gpu():
     )
 
 
-@contextmanager
-def stream_acquire_context(stream, event):
-    """Stream acquire context"""
-    event.wait(stream)
-    try:
-        yield
-    finally:
-        event.record(stream)
-
-
 class NoopScheduleNode:
     """A placeholder node in the computation graph that simply passes through inputs and outputs.
 
@@ -208,26 +198,21 @@ def forward(self, inputs=()):
         return self._forward(*inputs)
 
     def _forward(self, *inputs):
-        with stream_acquire_context(self.stream, self.event):
-            torch.cuda.nvtx.range_push(f"{self.name} forward")
-            with torch.cuda.stream(self.stream):
-                self.inputs = [make_viewless(e).detach() if e is not None else None for e in inputs]
-                for i, input in enumerate(self.inputs):
-                    if input is not None:
-                        input.requires_grad = inputs[i].requires_grad
+        with self.stream_acquire_context(f"{self.name} forward"):
+            self.inputs = [make_viewless(e).detach() if e is not None else None for e in inputs]
+            for i, input in enumerate(self.inputs):
+                if input is not None:
+                    input.requires_grad = inputs[i].requires_grad
 
-                data = tuple(self.inputs)
-                data = self.forward_func(*data)
+            data = tuple(self.inputs)
+            data = self.forward_func(*data)
 
-                if not isinstance(data, tuple):
-                    data = make_viewless(data)
-                else:
-                    data = tuple(
-                        [make_viewless(e) if isinstance(e, torch.Tensor) else e for e in data]
-                    )
+            if not isinstance(data, tuple):
+                data = make_viewless(data)
+            else:
+                data = tuple([make_viewless(e) if isinstance(e, torch.Tensor) else e for e in data])
 
-                self.output = data
-            torch.cuda.nvtx.range_pop()
+            self.output = data
 
         # Immediately frees input tensors after they are used for nodes
         # where inputs are no longer needed after computation.
@@ -250,18 +235,15 @@ def backward(self, output_grad):
         return self._backward(*output_grad)
 
     def _backward(self, *output_grad):
-        with stream_acquire_context(self.stream, self.event):
-            torch.cuda.nvtx.range_push(f"{self.name} backward")
-            with torch.cuda.stream(self.stream):
-                outputs = self.output
-                if not isinstance(outputs, tuple):
-                    outputs = (outputs,)
-                assert len(outputs) == len(output_grad), (
-                    f"{len(outputs)} of {type(outputs[0])} is not equal to "
-                    f"{len(output_grad)} of {type(output_grad[0])}"
-                )
-                output_grad = self.backward_func(outputs, output_grad)
-            torch.cuda.nvtx.range_pop()
+        with self.stream_acquire_context(f"{self.name} backward"):
+            outputs = self.output
+            if not isinstance(outputs, tuple):
+                outputs = (outputs,)
+            assert len(outputs) == len(output_grad), (
+                f"{len(outputs)} of {type(outputs[0])} is not equal to "
+                f"{len(output_grad)} of {type(output_grad[0])}"
+            )
+            output_grad = self.backward_func(outputs, output_grad)
 
         # output_grad maybe from another stream
         if output_grad:
@@ -288,6 +270,32 @@ def get_grad(self):
             grad = grad[0]
         return grad
 
+    @contextmanager
+    def stream_acquire_context(self, name=None):
+        """Stream acquire context that handles event synchronization,
+            NVTX profiling, and stream context.
+
+        This context manager consolidates:
+        1. Event wait/record for synchronization between streams
+        2. NVTX range for profiling (if name is provided)
+        3. torch.cuda.stream context for execution on the specified stream
+
+        Args:
+            stream: The CUDA stream to execute on
+            event: The CUDA event for synchronization
+            name: Optional name for NVTX range profiling
+        """
+        self.event.wait(self.stream)
+        if name:
+            torch.cuda.nvtx.range_push(name)
+        try:
+            with torch.cuda.stream(self.stream):
+                yield
+        finally:
+            if name:
+                torch.cuda.nvtx.range_pop()
+            self.event.record(self.stream)
+
     def _release_state(self):
         """Clear the state of the node"""
         self.inputs = None
diff --git a/tests/unit_tests/transformer/test_submodule_callables.py b/tests/unit_tests/transformer/test_submodule_callables.py
index 73059495c06..31bd3d18b80 100644
--- a/tests/unit_tests/transformer/test_submodule_callables.py
+++ b/tests/unit_tests/transformer/test_submodule_callables.py
@@ -79,10 +79,10 @@ def run_model_submodules_with_capture(model, input_tensors, microbatches):
         local_tokens, probs = attn(node, input_tensors[i])
 
         # dispatch fwd
-        dispatched_tokens = dispatch(node, local_tokens, probs)
+        dispatched_tokens, dispatched_probs = dispatch(node, local_tokens, probs)
 
         # moe fwd
-        expert_output = moe(node, dispatched_tokens)
+        expert_output = moe(node, dispatched_tokens, dispatched_probs)
 
         # combine fwd
         hidden_states = combine(node, expert_output)

From a5928198f0e5499a55b43fa47483d312a6da9f0e Mon Sep 17 00:00:00 2001
From: laixin <q865809639@gmail.com>
Date: Wed, 4 Feb 2026 22:23:01 +0800
Subject: [PATCH 274/334] [Refactor] Decouple topk and loss from DSA Indexer
 (#3013)

Co-authored-by: kunlunl <kunlunl@nvidia.com>
Co-authored-by: Kunlun Li <94586211+kunlunl@users.noreply.github.com>
---
 .../experimental_attention_variant/dsa.py     | 493 ++++++++++++++----
 .../transformer/test_attention_variant_dsa.py | 317 +++++++++++
 2 files changed, 712 insertions(+), 98 deletions(-)

diff --git a/megatron/core/transformer/experimental_attention_variant/dsa.py b/megatron/core/transformer/experimental_attention_variant/dsa.py
index 88b4713dc60..3734db7043f 100644
--- a/megatron/core/transformer/experimental_attention_variant/dsa.py
+++ b/megatron/core/transformer/experimental_attention_variant/dsa.py
@@ -252,6 +252,330 @@ def compute_dsa_indexer_loss(
     return indexer_loss
 
 
+def _compute_index_scores(q: torch.Tensor, weights: torch.Tensor, k: torch.Tensor) -> torch.Tensor:
+    """
+    Perform index score using BF16 precision.
+
+    Reference:
+        https://github.com/deepseek-ai/DeepSeek-V3.2-Exp/blob/main/inference/kernel.py#L254-L274
+    This is a BF16 implementation of the `fp8_index` logic:
+        1. Compute attention scores: q @ k^T;
+        2. Apply ReLU activation;
+        3. Weight by attention weights;
+        4. Sum across attention heads.
+
+    Args:
+        q: BF16 [seqlen_q, batch, index_n_heads, index_head_dim], the query tensor.
+        weights: BF16 [seqlen_q, batch, index_n_heads], the attention weights.
+        k: BF16 [seqlen_k, batch, index_head_dim], the key tensor.
+
+    Returns:
+        index_scores: FP32 [batch, seqlen_q, seqlen_k], the index scores.
+    """
+    # Compute attention scores: q @ k^T
+    # [seqlen_q, batch, index_n_heads, index_head_dim] @ [seqlen_k, batch, index_head_dim]^T
+    #   -> [seqlen_q, batch, index_n_heads, seqlen_k]
+    index_scores = torch.einsum('sbhd,tbd->sbht', q.float(), k.float())
+
+    # Apply ReLU activation.
+    index_scores = torch.relu(index_scores)
+
+    # Weight each head by attention weights.
+    # [seqlen_q, batch, index_n_heads, seqlen_k] * [seqlen_q, batch, index_n_heads, 1]
+    #   -> [seqlen_q, batch, index_n_heads, seqlen_k]
+    index_scores = index_scores * weights.unsqueeze(-1)
+
+    # Sum across attention heads.
+    # [seqlen_q, batch, index_n_heads, seqlen_k] -> [seqlen_q, batch, seqlen_k]
+    index_scores = index_scores.sum(dim=2)
+
+    # Transpose to [batch, seqlen_q, seqlen_k].
+    index_scores = index_scores.transpose(0, 1)
+
+    return index_scores
+
+
+def fused_qk_topk_naive(
+    q: torch.Tensor,
+    k: torch.Tensor,
+    weights: torch.Tensor,
+    index_topk: int,
+    mask: Optional[torch.Tensor] = None,
+):
+    """Naive implementation of QK Topk."""
+    seqlen = q.size(0)
+    # =========================================
+    # Compute index scores
+    # =========================================
+    # [batch, seqlen, seqlen]
+    index_scores = _compute_index_scores(q, weights, k)
+    if mask is not None:
+        assert mask.dtype == index_scores.dtype, "Mask dtype must match index scores dtype"
+        index_scores = index_scores + mask
+
+    # =========================================
+    # Select top-k indices
+    # =========================================
+    topk_k = min(index_topk, seqlen)
+    # [batch, seqlen, index_topk]
+    topk_indices = index_scores.topk(topk_k, dim=-1)[1]
+
+    return index_scores, topk_indices
+
+
+def fwd_fused_indexer_loss_naive(
+    q, weights, k, query, key, topk, softmax_scale, loss_coeff, mask, sparse_loss, pg_collection
+):
+    """Naive implementation of forward pass for indexer loss."""
+    index_scores, topk_indices = fused_qk_topk_naive(q, k, weights, topk, mask)
+
+    indexer_loss = compute_dsa_indexer_loss(
+        index_scores,
+        topk_indices,
+        query,
+        key,
+        softmax_scale,
+        loss_coeff,
+        sparse_loss,
+        pg_collection,
+    )
+
+    return topk_indices, indexer_loss
+
+
+def bwd_fused_indexer_loss_naive(
+    q,
+    weights,
+    k,
+    query,
+    key,
+    topk_indices,
+    softmax_scale,
+    loss_coeff,
+    sparse_loss,
+    grad_loss,
+    pg_collection,
+):
+    """Naive implementation of backward pass for indexer loss."""
+    index_scores = _compute_index_scores(q, weights, k)  # [B, Sq, Sk]
+
+    sq, b, np, hn = query.size()
+    sk = key.size(0)
+
+    # [sq, b, np, hn] -> [b, np, sq, hn] -> [b * np, sq, hn]
+    query_reshaped = query.permute(1, 2, 0, 3).reshape(b * np, sq, hn)
+    # [sk, b, np, hn] -> [b, np, hn, sk] -> [b * np, hn, sk]
+    key_reshaped = key.permute(1, 2, 3, 0).reshape(b * np, hn, sk)
+    # Compute attention scores [b * np, sq, sk]
+    attention_scores = torch.bmm(query_reshaped.float(), key_reshaped.float()) * softmax_scale
+    # Free reshaped tensors - no longer needed after bmm
+    del query_reshaped, key_reshaped
+
+    # Reshape to [b, np, sq, sk]
+    attention_scores = attention_scores.reshape(b, np, sq, sk)
+
+    # causal_mask [sq, sk]
+    causal_mask = torch.triu(
+        torch.full((sq, sk), float('-inf'), dtype=torch.float32, device=attention_scores.device),
+        diagonal=1,
+    )
+    # index_mask [b, sq, sk]
+    index_mask = torch.full(
+        (b, sq, sk), float("-inf"), dtype=torch.float32, device=causal_mask.device
+    ).scatter_(-1, topk_indices, 0)
+
+    # Apply causal mask to both attention and index scores
+    # [b, np, sq, skv] + [1, 1, sq, skv] -> [b, np, sq, skv]
+    attention_scores = attention_scores + causal_mask.view(1, 1, sq, sk)
+    # [b, sq, sk] + [1, sq, sk] -> [b, sq, sk]
+    index_scores = index_scores + causal_mask.unsqueeze(0)
+    # Free causal_mask - no longer needed
+    del causal_mask
+
+    if sparse_loss:
+        # [b, np, sq, sk] + [b, 1, sq, sk] -> [b, np, sq, sk]
+        attention_scores = attention_scores + index_mask.view(b, 1, sq, sk)
+        # [b, sq, sk] + [b, sq, sk] -> [b, sq, sk]
+        index_scores = index_scores + index_mask
+
+    # Compute softmax for both
+    attention_scores_softmax = torch.nn.functional.softmax(
+        attention_scores, dim=-1, dtype=torch.float32
+    )
+    # Free attention_scores immediately
+    del attention_scores
+
+    index_scores_softmax = torch.nn.functional.softmax(index_scores, dim=-1, dtype=torch.float32)
+    # Free index_scores - no longer needed after softmax
+    del index_scores
+
+    # Sum attention scores across heads: [b, np, sq, sk] -> [b, sq, sk]
+    attention_scores_sum = attention_scores_softmax.sum(dim=1)
+    # Free attention_scores_softmax
+    del attention_scores_softmax
+
+    if pg_collection.tp.size() > 1:
+        # attention scores are scattered to TP ranks in head dimension.
+        torch.distributed.all_reduce(attention_scores_sum.contiguous(), group=pg_collection.tp)
+
+    # L1 normalize
+    attention_scores_normalized = attention_scores_sum / attention_scores_sum.sum(
+        dim=-1, keepdim=True
+    )
+    # Free attention_scores_sum - no longer needed after normalization
+    del attention_scores_sum
+
+    # Backward through loss = kl_div * loss_coeff
+    # where kl_div = kl_per_element.sum(dim=-1).mean()
+    grad_kl_div = grad_loss * loss_coeff  # scalar
+
+    # Backward through mean: distribute gradient equally
+    grad_kl_per_row = grad_kl_div / (b * sq)  # scalar value for each row
+
+    # Backward through sum(dim=-1): broadcast back to [b, sq, sk]
+    # Each element in a row contributes to the sum, so gradient is same for all
+    grad_kl_per_element = grad_kl_per_row.view(1, 1, 1).expand(b, sq, sk)
+
+    # Backward through kl_per_element = target * (log(target) - log(index))
+    # ∂kl/∂index_softmax = -target / index_softmax
+    grad_index_scores_softmax = (
+        -attention_scores_normalized / (index_scores_softmax + 1e-10) * grad_kl_per_element
+    )
+    # Free attention_scores_normalized - no longer needed
+    del attention_scores_normalized
+
+    # Backward through softmax: ∂L/∂x = softmax * (∂L/∂softmax - sum(∂L/∂softmax * softmax))
+    sum_grad = (grad_index_scores_softmax * index_scores_softmax).sum(dim=-1, keepdim=True)
+    grad_index_scores_logits = index_scores_softmax * (grad_index_scores_softmax - sum_grad)
+    # Free intermediate tensors
+    del index_scores_softmax, grad_index_scores_softmax, sum_grad
+
+    # Zero out gradients for masked positions
+    # Create a mask for valid (non-masked) positions
+    # Causal mask: position (i, j) is valid if j <= i
+    causal_valid_mask = torch.tril(
+        torch.ones((sq, sk), device=q.device, dtype=torch.bool)
+    )  # [sq, sk]
+    if sparse_loss:
+        # Also apply index mask - only topk positions are valid
+        index_valid_mask = index_mask == 0  # [b, sq, sk]
+        del index_mask  # Free index_mask immediately after use
+        valid_mask = causal_valid_mask.unsqueeze(0) & index_valid_mask  # [b, sq, sk]
+        del index_valid_mask
+    else:
+        del index_mask  # Free index_mask even if not used for sparse_loss
+        valid_mask = causal_valid_mask.unsqueeze(0).expand(b, sq, sk)  # [b, sq, sk]
+    del causal_valid_mask
+
+    grad_index_scores_logits = grad_index_scores_logits * valid_mask.float()
+    del valid_mask
+
+    # Transpose from [b, sq, sk] to [sq, b, sk]
+    grad_index_scores = grad_index_scores_logits.transpose(0, 1)  # [sq, b, sk]
+    del grad_index_scores_logits
+
+    # Backward through sum over heads: expand gradient
+    grad_weighted_scores = grad_index_scores.unsqueeze(2)  # [sq, b, 1, sk]
+    del grad_index_scores
+
+    # Compute forward values needed for backward
+    scores = torch.einsum('sbhd,tbd->sbht', q.float(), k.float())  # [sq, b, h, sk]
+    # Compute relu_mask before relu (saves memory vs keeping both scores and relu output)
+    relu_mask = scores > 0
+    scores_after_relu = torch.relu(scores)
+    del scores
+
+    # Backward through multiplication by weights: index_scores_per_head * weights
+    # ∂L/∂weights = grad * relu_scores (sum over sk)
+    grad_weights = (grad_weighted_scores * scores_after_relu).sum(dim=-1)  # [sq, b, h]
+
+    # ∂L/∂relu_scores = grad * weights
+    grad_scores_after_relu = grad_weighted_scores * weights.unsqueeze(-1)  # [sq, b, h, sk]
+    del grad_weighted_scores, scores_after_relu
+
+    # Backward through ReLU
+    grad_scores = grad_scores_after_relu * relu_mask.float()  # [sq, b, h, sk]
+    del grad_scores_after_relu, relu_mask
+
+    # Backward through einsum 'sbhd,tbd->sbht'
+    # ∂L/∂q = einsum('sbht,tbd->sbhd', grad_scores, k)
+    grad_q = torch.einsum('sbht,tbd->sbhd', grad_scores, k.float())  # [sq, b, h, d]
+    # ∂L/∂k = einsum('sbht,sbhd->tbd', grad_scores, q)
+    grad_k = torch.einsum('sbht,sbhd->tbd', grad_scores, q.float())  # [sk, b, d]
+    del grad_scores
+
+    return grad_q.to(q.dtype), grad_weights.to(weights.dtype), grad_k.to(k.dtype)
+
+
+class FusedDSAIndexerLoss(torch.autograd.Function):
+    """Fused implementation of DSA Indexer Loss."""
+
+    @staticmethod
+    def forward(
+        ctx,
+        q,
+        weights,
+        k,
+        query,
+        key,
+        softmax_scale,
+        topk,
+        loss_coeff,
+        mask,
+        sparse_loss,
+        pg_collection,
+    ):
+        """
+        Fused forward: index_scores never materialized in full.
+        """
+        topk_indices, loss = fwd_fused_indexer_loss_naive(
+            q,
+            weights,
+            k,
+            query,
+            key,
+            topk,
+            softmax_scale,
+            loss_coeff,
+            mask,
+            sparse_loss,
+            pg_collection,
+        )
+
+        # Save for backward (recomputation strategy)
+        ctx.save_for_backward(q, weights, k, query, key, topk_indices)
+        ctx.softmax_scale = softmax_scale
+        ctx.loss_coeff = loss_coeff
+        ctx.sparse_loss = sparse_loss
+        ctx.pg_collection = pg_collection
+
+        return topk_indices, loss
+
+    @staticmethod
+    def backward(ctx, grad_topk_indices, grad_loss):
+        """
+        Backward: Recompute what we need.
+        """
+        q, weights, k, query, key, topk_indices = ctx.saved_tensors
+
+        grad_q, grad_weights, grad_k = bwd_fused_indexer_loss_naive(
+            q,
+            weights,
+            k,
+            query,
+            key,
+            topk_indices,
+            ctx.softmax_scale,
+            ctx.loss_coeff,
+            ctx.sparse_loss,
+            grad_loss,
+            ctx.pg_collection,
+        )
+
+        # query and key are detached in forward, so return None for their gradients
+        return grad_q, grad_weights, grad_k, None, None, None, None, None, None, None, None
+
+
 class DSAIndexerLossAutoScaler(torch.autograd.Function):
     """An AutoScaler that triggers the backward pass and scales the grad for indexer loss.
 
@@ -471,74 +795,10 @@ def _apply_rope(self, x: torch.Tensor, rotary_pos_emb: torch.Tensor, mscale: flo
         x = torch.cat([x_nope, x_pe], dim=-1)
         return x
 
-    def _compute_index_scores(
-        self, q: torch.Tensor, weights: torch.Tensor, k: torch.Tensor
-    ) -> torch.Tensor:
-        """
-        Perform index score using BF16 precision.
-
-        Reference:
-            https://github.com/deepseek-ai/DeepSeek-V3.2-Exp/blob/main/inference/kernel.py#L254-L274
-        This is a BF16 implementation of the `fp8_index` logic:
-            1. Compute attention scores: q @ k^T;
-            2. Apply ReLU activation;
-            3. Weight by attention weights;
-            4. Sum across attention heads.
-
-        Args:
-            q: BF16 [seqlen_q, batch, index_n_heads, index_head_dim], the query tensor.
-            weights: BF16 [seqlen_q, batch, index_n_heads], the attention weights.
-            k: BF16 [seqlen_k, batch, index_head_dim], the key tensor.
-
-        Returns:
-            index_scores: FP32 [batch, seqlen_q, seqlen_k], the index scores.
-        """
-        # Compute attention scores: q @ k^T
-        # [seqlen_q, batch, index_n_heads, index_head_dim] @ [seqlen_k, batch, index_head_dim]^T
-        #   -> [seqlen_q, batch, index_n_heads, seqlen_k]
-        index_scores = torch.einsum('sbhd,tbd->sbht', q.float(), k.float())
-
-        # Apply ReLU activation.
-        index_scores = torch.relu(index_scores)
-
-        # Weight each head by attention weights.
-        # [seqlen_q, batch, index_n_heads, seqlen_k] * [seqlen_q, batch, index_n_heads, 1]
-        #   -> [seqlen_q, batch, index_n_heads, seqlen_k]
-        index_scores = index_scores * weights.unsqueeze(-1)
-
-        # Sum across attention heads.
-        # [seqlen_q, batch, index_n_heads, seqlen_k] -> [seqlen_q, batch, seqlen_k]
-        index_scores = index_scores.sum(dim=2)
-
-        # Transpose to [batch, seqlen_q, seqlen_k].
-        index_scores = index_scores.transpose(0, 1)
-
-        return index_scores
-
-    def forward_with_scores(
-        self,
-        x: torch.Tensor,
-        qr: torch.Tensor,
-        mask: Optional[torch.Tensor] = None,
-        packed_seq_params: Optional[PackedSeqParams] = None,
+    def forward_before_topk(
+        self, x: torch.Tensor, qr: torch.Tensor, packed_seq_params: Optional[PackedSeqParams] = None
     ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """
-        Forward pass for DSA Indexer that returns both index scores and top-k indices.
-
-        This is used when KL loss is enabled to compare indexer scores with true attention scores.
-
-        Args:
-            x: hidden states [seqlen, batch, hidden_size].
-            qr: Low-rank query tensor [seqlen, batch, q_lora_rank].
-            mask: Attention mask [batch, seqlen, seqlen].
-            packed_seq_params: Packed sequence parameters for variable length sequences.
-
-        Returns:
-            index_scores: Index scores [batch, seqlen, seqlen].
-            topk_indices: Top-k indices [batch, seqlen, index_topk].
-        """
-        assert packed_seq_params is None, "Packed sequence is not supported for DSAttention"
-
+        """All computations before topk."""
         # =========================================
         # Prepare RoPE params
         # =========================================
@@ -592,23 +852,45 @@ def forward_with_scores(
         k = rotate_activation(k)
 
         # =========================================
-        # Compute index scores
+        # Prepare weights for index scores
         # =========================================
         # [seqlen, batch, hidden_size] -> [seqlen, batch, index_n_heads]
         weights, _ = self.linear_weights_proj(x)
         weights = weights * (self.index_n_heads**-0.5) * self.softmax_scale
-        # [batch, seqlen, seqlen]
-        index_scores = self._compute_index_scores(q, weights, k)
-        if mask is not None:
-            assert mask.dtype == index_scores.dtype, "Mask dtype must match index scores dtype"
-            index_scores = index_scores + mask
 
-        # =========================================
-        # Select top-k indices
-        # =========================================
-        topk_k = min(self.index_topk, seqlen)
-        # [batch, seqlen, index_topk]
-        topk_indices = index_scores.topk(topk_k, dim=-1)[1]
+        return q, k, weights
+
+    def forward_with_scores(
+        self,
+        x: torch.Tensor,
+        qr: torch.Tensor,
+        mask: Optional[torch.Tensor] = None,
+        packed_seq_params: Optional[PackedSeqParams] = None,
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        """
+        Forward pass for DSA Indexer that returns both index scores and top-k indices.
+
+        This is used when KL loss is enabled to compare indexer scores with true attention scores.
+
+        Args:
+            x: hidden states [seqlen, batch, hidden_size].
+            qr: Low-rank query tensor [seqlen, batch, q_lora_rank].
+            mask: Attention mask [batch, seqlen, seqlen].
+            packed_seq_params: Packed sequence parameters for variable length sequences.
+
+        Returns:
+            index_scores: Index scores [batch, seqlen, seqlen].
+            topk_indices: Top-k indices [batch, seqlen, index_topk].
+        """
+        assert packed_seq_params is None, "Packed sequence is not supported for DSAttention"
+
+        # [seqlen, batch, index_n_heads * index_head_dim]
+        # [seqlen, batch, index_head_dim]
+        # [seqlen, batch, index_n_heads]
+        q, k, weights = self.forward_before_topk(x, qr, packed_seq_params)
+
+        # [batch, seqlen, seqlen], [batch, seqlen, index_topk]
+        index_scores, topk_indices = fused_qk_topk_naive(q, k, weights, self.index_topk, mask)
 
         return index_scores, topk_indices
 
@@ -781,31 +1063,27 @@ def forward(
                 mask, float('-inf')
             )
 
-        # ===================================
-        # Get index scores and top-k indices
-        # ===================================
-        index_scores, topk_indices = self.indexer.forward_with_scores(
-            x, qr, mask=float_mask, packed_seq_params=packed_seq_params
-        )
-
-        # ===================================
-        # Run sparse attention kernel
-        # ===================================
-        output = unfused_dsa_fn(query, key, value, topk_indices, self.softmax_scale)
-
-        # ===================================
-        # Attach indexer loss
-        # ===================================
         if self.training and torch.is_grad_enabled():
-            # Compute KL divergence loss between indexer scores and true attention scores
+            # ===================================
+            # Prepare inputs for indexer loss
+            # ===================================
+            q, k, weights = self.indexer.forward_before_topk(x, qr, packed_seq_params)
             indexer_loss_coeff = getattr(self.config, 'dsa_indexer_loss_coeff', 0.0)
-            indexer_loss = compute_dsa_indexer_loss(
-                index_scores,
-                topk_indices,
+
+            # ===================================
+            # Attach indexer topk and loss
+            # ===================================
+            # Compute KL divergence loss between indexer scores and true attention scores
+            topk_indices, indexer_loss = FusedDSAIndexerLoss.apply(
+                q,
+                weights,
+                k,
                 query.detach(),
                 key.detach(),
                 self.softmax_scale,
+                self.indexer.index_topk,
                 indexer_loss_coeff,
+                float_mask,
                 getattr(self.config, "dsa_indexer_use_sparse_loss", False),
                 self.indexer.pg_collection,
             )
@@ -816,7 +1094,26 @@ def forward(
                     layer_number=self.layer_number,
                     num_layers=self.config.num_layers,
                 )
+
+            # ===================================
+            # Run sparse attention kernel
+            # ===================================
+            output = unfused_dsa_fn(query, key, value, topk_indices, self.softmax_scale)
+
             # Attach loss to output
             output = DSAIndexerLossAutoScaler.apply(output, indexer_loss)
 
+        else:
+            # ===================================
+            # Get index scores and top-k indices
+            # ===================================
+            _, topk_indices = self.indexer.forward_with_scores(
+                x, qr, mask=float_mask, packed_seq_params=packed_seq_params
+            )
+
+            # ===================================
+            # Run sparse attention kernel
+            # ===================================
+            output = unfused_dsa_fn(query, key, value, topk_indices, self.softmax_scale)
+
         return output
diff --git a/tests/unit_tests/transformer/test_attention_variant_dsa.py b/tests/unit_tests/transformer/test_attention_variant_dsa.py
index bd106aa6f0e..96253a4ca10 100644
--- a/tests/unit_tests/transformer/test_attention_variant_dsa.py
+++ b/tests/unit_tests/transformer/test_attention_variant_dsa.py
@@ -17,7 +17,10 @@
     DSAIndexerSubmodules,
     DSAttention,
     DSAttentionSubmodules,
+    FusedDSAIndexerLoss,
+    _compute_index_scores,
     compute_dsa_indexer_loss,
+    fused_qk_topk_naive,
     rotate_activation,
 )
 from megatron.core.transformer.transformer_config import MLATransformerConfig
@@ -265,6 +268,320 @@ def test_backward_pass(self):
         ), f"Gradient should be scaled by loss scale, expected {expected_grad_per_element}, got {dummy_input.grad[0].item()}"
 
 
+@pytest.mark.parametrize("seqlen_and_topk", [[16, 8], [32, 16], [64, 32]])
+@pytest.mark.parametrize("sparse_loss", [False, True])
+class TestFusedDSAIndexerLossGradient:
+    """Test that FusedDSAIndexerLoss manual backward matches autograd backward."""
+
+    @pytest.fixture(scope='function', autouse=True)
+    def setup_method(self):
+        Utils.initialize_model_parallel(
+            tensor_model_parallel_size=1, pipeline_model_parallel_size=1
+        )
+        self.pg_collection = ProcessGroupCollection.use_mpu_process_groups(required_pgs=['tp'])
+        yield
+        Utils.destroy_model_parallel()
+
+    @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
+    def test_fused_indexer_loss_gradient_matches_autograd(self, seqlen_and_topk, sparse_loss):
+        """
+        Test that the manually written backward in FusedDSAIndexerLoss produces
+        the same gradients as PyTorch autograd on the unfused implementation.
+        """
+        seqlen = seqlen_and_topk[0]
+        index_topk = seqlen_and_topk[1]
+        batch_size = 2
+        num_heads = 4
+        head_dim = 64
+        index_n_heads = 8
+        index_head_dim = 64
+        softmax_scale = head_dim**-0.5
+        loss_coeff = 1.0
+
+        torch.manual_seed(42)
+
+        # Create inputs for indexer
+        # q: [seqlen, batch, index_n_heads, index_head_dim]
+        q_ref = (
+            torch.randn(seqlen, batch_size, index_n_heads, index_head_dim, dtype=torch.float32)
+            .cuda()
+            .requires_grad_(True)
+        )
+        # weights: [seqlen, batch, index_n_heads]
+        weights_ref = (
+            torch.randn(seqlen, batch_size, index_n_heads, dtype=torch.float32)
+            .cuda()
+            .requires_grad_(True)
+        )
+        # k: [seqlen, batch, index_head_dim]
+        k_ref = (
+            torch.randn(seqlen, batch_size, index_head_dim, dtype=torch.float32)
+            .cuda()
+            .requires_grad_(True)
+        )
+        # query: [seqlen, batch, num_heads, head_dim] - detached, not trained
+        query = torch.randn(seqlen, batch_size, num_heads, head_dim, dtype=torch.bfloat16).cuda()
+        # key: [seqlen, batch, num_heads, head_dim] - detached, not trained
+        key = torch.randn(seqlen, batch_size, num_heads, head_dim, dtype=torch.bfloat16).cuda()
+
+        # Create causal mask
+        mask = torch.triu(
+            torch.full((seqlen, seqlen), float('-inf'), dtype=torch.float32).cuda(), diagonal=1
+        )
+
+        # =============================================
+        # Method 1: Autograd (reference)
+        # =============================================
+        # Compute index scores and apply mask (matches fused_qk_topk_naive behavior)
+        index_scores_ref = _compute_index_scores(q_ref, weights_ref, k_ref)
+        # Apply mask
+        index_scores_masked = index_scores_ref + mask.unsqueeze(0)
+        # Get topk indices from masked scores
+        topk_k = min(index_topk, seqlen)
+        topk_indices = index_scores_masked.topk(topk_k, dim=-1)[1]
+
+        # Compute loss using autograd
+        loss_ref = compute_dsa_indexer_loss(
+            index_scores=index_scores_masked,
+            topk_indices=topk_indices,
+            query=query,
+            key=key,
+            softmax_scale=softmax_scale,
+            loss_coeff=loss_coeff,
+            sparse_loss=sparse_loss,
+            pg_collection=self.pg_collection,
+        )
+
+        # Backward with autograd
+        loss_ref.backward()
+
+        # Save reference gradients
+        grad_q_ref = q_ref.grad.clone()
+        grad_weights_ref = weights_ref.grad.clone()
+        grad_k_ref = k_ref.grad.clone()
+
+        # =============================================
+        # Method 2: FusedDSAIndexerLoss (manual backward)
+        # =============================================
+        # Clone tensors from ref (detach and require grad again)
+        q_fused = q_ref.detach().clone().requires_grad_(True)
+        weights_fused = weights_ref.detach().clone().requires_grad_(True)
+        k_fused = k_ref.detach().clone().requires_grad_(True)
+
+        # Use FusedDSAIndexerLoss
+        topk_indices_fused, loss_fused = FusedDSAIndexerLoss.apply(
+            q_fused,
+            weights_fused,
+            k_fused,
+            query.detach(),
+            key.detach(),
+            softmax_scale,
+            index_topk,
+            loss_coeff,
+            mask,
+            sparse_loss,
+            self.pg_collection,
+        )
+
+        # Backward with manual implementation
+        loss_fused.backward()
+
+        # Get fused gradients
+        grad_q_fused = q_fused.grad
+        grad_weights_fused = weights_fused.grad
+        grad_k_fused = k_fused.grad
+
+        # =============================================
+        # Compare gradients
+        # =============================================
+        # Check loss values match
+        assert torch.allclose(
+            loss_fused, loss_ref, rtol=1e-5, atol=1e-5
+        ), f"Loss mismatch: fused={loss_fused.item()}, ref={loss_ref.item()}"
+
+        # Check topk indices match
+        assert torch.equal(
+            topk_indices_fused, topk_indices
+        ), "Top-k indices mismatch between fused and reference"
+
+        # Check gradients match
+        assert torch.allclose(
+            grad_q_fused, grad_q_ref, rtol=1e-5, atol=1e-5
+        ), f"grad_q mismatch: max diff = {(grad_q_fused - grad_q_ref).abs().max().item()}"
+
+        assert torch.allclose(
+            grad_weights_fused, grad_weights_ref, rtol=1e-5, atol=1e-5
+        ), f"grad_weights mismatch: max diff = {(grad_weights_fused - grad_weights_ref).abs().max().item()}"
+
+        assert torch.allclose(
+            grad_k_fused, grad_k_ref, rtol=1e-5, atol=1e-5
+        ), f"grad_k mismatch: max diff = {(grad_k_fused - grad_k_ref).abs().max().item()}"
+
+
+@pytest.mark.parametrize("tensor_model_parallel_size", [2, 4])
+@pytest.mark.parametrize("sparse_loss", [False, True])
+class TestFusedDSAIndexerLossGradientTP:
+    """Test FusedDSAIndexerLoss gradient consistency across different TP sizes."""
+
+    @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
+    def test_fused_indexer_loss_gradient_tp_consistency(
+        self, tensor_model_parallel_size, sparse_loss
+    ):
+        """
+        Test that FusedDSAIndexerLoss produces consistent gradients across TP ranks
+        and matches TP=1 baseline.
+        """
+        seqlen = 64
+        index_topk = 32
+        batch_size = 2
+        num_heads = 8
+        head_dim = 64
+        index_n_heads = 8
+        index_head_dim = 64
+        softmax_scale = head_dim**-0.5
+        loss_coeff = 1.0
+
+        # =============================================
+        # First run with TP=1 to get baseline
+        # =============================================
+        Utils.initialize_model_parallel(
+            tensor_model_parallel_size=1, pipeline_model_parallel_size=1
+        )
+        torch.manual_seed(42)
+        model_parallel_cuda_manual_seed(42)
+
+        pg_collection_tp1 = ProcessGroupCollection.use_mpu_process_groups(required_pgs=['tp'])
+
+        # Create inputs
+        q_input = torch.randn(
+            seqlen, batch_size, index_n_heads, index_head_dim, dtype=torch.float32
+        ).cuda()
+        weights_input = torch.randn(seqlen, batch_size, index_n_heads, dtype=torch.float32).cuda()
+        k_input = torch.randn(seqlen, batch_size, index_head_dim, dtype=torch.float32).cuda()
+        query_input = torch.randn(
+            seqlen, batch_size, num_heads, head_dim, dtype=torch.bfloat16
+        ).cuda()
+        key_input = torch.randn(
+            seqlen, batch_size, num_heads, head_dim, dtype=torch.bfloat16
+        ).cuda()
+        mask = torch.triu(
+            torch.full((seqlen, seqlen), float('-inf'), dtype=torch.float32).cuda(), diagonal=1
+        )
+
+        # Clone for TP=1
+        q_tp1 = q_input.clone().requires_grad_(True)
+        weights_tp1 = weights_input.clone().requires_grad_(True)
+        k_tp1 = k_input.clone().requires_grad_(True)
+
+        # Forward and backward with TP=1
+        topk_indices_tp1, loss_tp1 = FusedDSAIndexerLoss.apply(
+            q_tp1,
+            weights_tp1,
+            k_tp1,
+            query_input.detach(),
+            key_input.detach(),
+            softmax_scale,
+            index_topk,
+            loss_coeff,
+            mask,
+            sparse_loss,
+            pg_collection_tp1,
+        )
+        loss_tp1.backward()
+
+        # Save TP=1 results
+        grad_q_tp1 = q_tp1.grad.clone()
+        grad_weights_tp1 = weights_tp1.grad.clone()
+        grad_k_tp1 = k_tp1.grad.clone()
+        loss_tp1_value = loss_tp1.detach().clone()
+
+        Utils.destroy_model_parallel()
+
+        # =============================================
+        # Run with target TP size
+        # =============================================
+        Utils.initialize_model_parallel(
+            tensor_model_parallel_size=tensor_model_parallel_size, pipeline_model_parallel_size=1
+        )
+        torch.manual_seed(42)
+        model_parallel_cuda_manual_seed(42)
+
+        pg_collection_tpn = ProcessGroupCollection.use_mpu_process_groups(required_pgs=['tp'])
+        tp_rank = parallel_state.get_tensor_model_parallel_rank()
+
+        # Clone inputs for TP=N (same values as TP=1)
+        q_tpn = q_input.clone().requires_grad_(True)
+        weights_tpn = weights_input.clone().requires_grad_(True)
+        k_tpn = k_input.clone().requires_grad_(True)
+
+        # query and key need to be split along heads for TP
+        head_per_rank = num_heads // tensor_model_parallel_size
+        start_head = tp_rank * head_per_rank
+        end_head = (tp_rank + 1) * head_per_rank
+        query_tpn = query_input[:, :, start_head:end_head, :].clone()
+        key_tpn = key_input[:, :, start_head:end_head, :].clone()
+
+        # Forward and backward with TP=N
+        topk_indices_tpn, loss_tpn = FusedDSAIndexerLoss.apply(
+            q_tpn,
+            weights_tpn,
+            k_tpn,
+            query_tpn.detach(),
+            key_tpn.detach(),
+            softmax_scale,
+            index_topk,
+            loss_coeff,
+            mask,
+            sparse_loss,
+            pg_collection_tpn,
+        )
+        loss_tpn.backward()
+
+        # =============================================
+        # Compare results
+        # =============================================
+        # Loss should be the same
+        assert torch.allclose(
+            loss_tpn, loss_tp1_value, rtol=1e-5, atol=1e-5
+        ), f"Loss mismatch: TP={tensor_model_parallel_size} got {loss_tpn.item()}, TP=1 got {loss_tp1_value.item()}"
+
+        # Top-k indices should be the same
+        assert torch.equal(
+            topk_indices_tpn, topk_indices_tp1
+        ), "Top-k indices mismatch between TP=1 and TP=N"
+
+        # Gradients should match exactly (indexer params are duplicated across TP)
+        assert torch.allclose(
+            q_tpn.grad, grad_q_tp1, rtol=1e-5, atol=1e-5
+        ), f"grad_q mismatch: max diff = {(q_tpn.grad - grad_q_tp1).abs().max().item()}"
+
+        assert torch.allclose(
+            weights_tpn.grad, grad_weights_tp1, rtol=1e-5, atol=1e-5
+        ), f"grad_weights mismatch: max diff = {(weights_tpn.grad - grad_weights_tp1).abs().max().item()}"
+
+        assert torch.allclose(
+            k_tpn.grad, grad_k_tp1, rtol=1e-5, atol=1e-5
+        ), f"grad_k mismatch: max diff = {(k_tpn.grad - grad_k_tp1).abs().max().item()}"
+
+        # Check gradients are identical across all TP ranks
+        tp_size = parallel_state.get_tensor_model_parallel_world_size()
+        if tp_size > 1:
+            for grad_tensor, name in [
+                (q_tpn.grad, "grad_q"),
+                (weights_tpn.grad, "grad_weights"),
+                (k_tpn.grad, "grad_k"),
+            ]:
+                grad_list = [torch.zeros_like(grad_tensor) for _ in range(tp_size)]
+                torch.distributed.all_gather(grad_list, grad_tensor, group=pg_collection_tpn.tp)
+
+                for i in range(1, tp_size):
+                    assert torch.allclose(
+                        grad_list[0], grad_list[i], rtol=0, atol=0
+                    ), f"{name} differs between TP rank 0 and rank {i}"
+
+        Utils.destroy_model_parallel()
+
+
 @pytest.mark.parametrize("seqlen", [16, 64])
 class TestDSAIndexer:
     """Test DSA Indexer module basic functionality with TP=1."""

From 54f4feb4ea02c40f29b2cfa1a25804a846da4e56 Mon Sep 17 00:00:00 2001
From: Charlie Truong <chtruong@nvidia.com>
Date: Wed, 4 Feb 2026 19:40:50 -0600
Subject: [PATCH 275/334] cp: Fix uv install for GH actions (#3259) (#3261)

Signed-off-by: Charlie Truong <chtruong@nvidia.com>
Co-authored-by: Philip Petrakian <ppetrakian@nvidia.com>
---
 .github/actions/action.yml                 | 4 ++--
 .github/workflows/oncall-rotation.yml      | 5 ++++-
 .github/workflows/sync-team-usergroups.yml | 5 ++++-
 3 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/.github/actions/action.yml b/.github/actions/action.yml
index 895b6863bef..088877304a7 100644
--- a/.github/actions/action.yml
+++ b/.github/actions/action.yml
@@ -117,7 +117,7 @@ runs:
         export PYTHONPATH=$(pwd)
         export NEMORUN_HOME=$(pwd)
         export NCCL_DEBUG=INFO
-        pip install --no-cache-dir "uv!=0.9.29"
+        pip install --no-cache-dir "uv<0.9.29"
         uv venv .venv
         uv cache clean
         uv sync --no-cache --only-group test
@@ -199,7 +199,7 @@ runs:
 
         export PYTHONPATH=$(pwd)
         export NEMORUN_HOME=$(pwd)
-        pip install --no-cache-dir "uv!=0.9.29"
+        pip install --no-cache-dir "uv<0.9.29"
         uv venv .venv
         uv cache clean
         uv sync --no-cache --only-group test
diff --git a/.github/workflows/oncall-rotation.yml b/.github/workflows/oncall-rotation.yml
index 46a45810ad1..71ae094e6c8 100644
--- a/.github/workflows/oncall-rotation.yml
+++ b/.github/workflows/oncall-rotation.yml
@@ -45,7 +45,10 @@ jobs:
           # Slack token for updating the Slack usergroup
           SLACK_TOKEN: ${{ secrets.ONCALL_SLACK_TOKEN }}
         run: |
-          pip install --no-cache-dir uv
+          pip install --no-cache-dir "uv<0.9.29"
+          uv venv .venv
+          uv cache clean
+          uv sync --no-cache 
           uv run --with slack-sdk python .github/scripts/oncall_manager.py rotate
 
       - name: Commit and Push changes
diff --git a/.github/workflows/sync-team-usergroups.yml b/.github/workflows/sync-team-usergroups.yml
index 8b08182dceb..1c6cecaeb7a 100644
--- a/.github/workflows/sync-team-usergroups.yml
+++ b/.github/workflows/sync-team-usergroups.yml
@@ -35,5 +35,8 @@ jobs:
           GH_TOKEN: ${{ secrets.NVIDIA_MCORE_ONCALL_TOKEN || secrets.PAT || secrets.GITHUB_TOKEN }}
           SLACK_TOKEN: ${{ secrets.ONCALL_SLACK_TOKEN }}
         run: |
-          pip install --no-cache-dir uv
+          pip install --no-cache-dir "uv<0.9.29"
+          uv venv .venv
+          uv cache clean
+          uv sync --no-cache 
           uv run --with slack-sdk python .github/scripts/sync_team_usergroups.py

From ef336cae04246d5860d811a4900c9225c1731868 Mon Sep 17 00:00:00 2001
From: Pingtian Li <158665726+Wohox@users.noreply.github.com>
Date: Thu, 5 Feb 2026 22:40:27 +0800
Subject: [PATCH 276/334] [Dev] Fix EP Overlap missing record stream for shared
 expert (#3244)

---
 .../core/models/gpt/fine_grained_callables.py | 21 ++++++++++++-------
 .../transformer/test_submodule_callables.py   |  4 ++--
 2 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/megatron/core/models/gpt/fine_grained_callables.py b/megatron/core/models/gpt/fine_grained_callables.py
index b4fe64ee9bb..fcccbdb1837 100644
--- a/megatron/core/models/gpt/fine_grained_callables.py
+++ b/megatron/core/models/gpt/fine_grained_callables.py
@@ -527,15 +527,19 @@ def submodule_dispatch_forward(
             token_dispatcher._comm_manager.token_probs = probs
 
         dispatched_tokens, dispatched_probs = layer.mlp.dispatch(local_tokens, probs)
-        return dispatched_tokens, dispatched_probs
 
-    def submodule_moe_forward(
-        node: ScheduleNode, dispatched_tokens: torch.Tensor, dispatched_probs: torch.Tensor
-    ):
+        # `dispatched_probs` is needed by backward pass of swiglu, therefore it's
+        # passed to moe_forward within `layer_state` to avoid the free_input process
+        # of the input tensors.
+        node.layer_state.dispatched_probs = node.detach(dispatched_probs)
+        return dispatched_tokens
+
+    def submodule_moe_forward(node: ScheduleNode, dispatched_tokens: torch.Tensor):
         """
         Run forward pass for computations between dispatch and combine:
             post dispatch->experts->combine preprocess
         """
+        dispatched_probs = node.layer_state.dispatched_probs
         token_dispatcher = layer.mlp.token_dispatcher
         if enable_deepep or enable_hybridep:
             # update dispatched_probs to be detached version, prevents
@@ -545,10 +549,10 @@ def submodule_moe_forward(
         expert_output, _ = layer.mlp.routed_experts_compute(dispatched_tokens, dispatched_probs)
 
         # For HybridEP, tokens_per_expert is generated on comm stream, as the input to
-        # `routed_experts_compute`, it needs to be recorded to comp stream.
+        # `routed_experts_compute`, a ref is needed to prevent it from being freed.
         if enable_hybridep:
             tokens_per_expert = token_dispatcher._comm_manager.get_number_of_tokens_per_expert()
-            tokens_per_expert.record_stream(torch.cuda.current_stream())
+            node.layer_state.tokens_per_expert = tokens_per_expert
 
         if layer.recompute_pre_mlp_layernorm:
             # discard the output of the pre-mlp layernorm and register the recompute
@@ -588,11 +592,14 @@ def submodule_combine_forward(node: ScheduleNode, output: torch.Tensor):
             inp=hidden_states, requires_grad=hidden_states.requires_grad, keep_graph=True
         )
 
-        # Need to record residual to comm stream, since it's created on comp stream
+        # Need to record tensors created on comp stream to comm stream
         node.layer_state.residual.record_stream(torch.cuda.current_stream())
+        if shared_expert_output is not None:
+            shared_expert_output.record_stream(torch.cuda.current_stream())
 
         # release tensor reference after use
         node.layer_state.residual = None
+        node.layer_state.shared_expert_output = None
 
         # final layer norm from decoder
         final_layernorm = node.chunk_state.model.decoder.final_layernorm
diff --git a/tests/unit_tests/transformer/test_submodule_callables.py b/tests/unit_tests/transformer/test_submodule_callables.py
index 31bd3d18b80..73059495c06 100644
--- a/tests/unit_tests/transformer/test_submodule_callables.py
+++ b/tests/unit_tests/transformer/test_submodule_callables.py
@@ -79,10 +79,10 @@ def run_model_submodules_with_capture(model, input_tensors, microbatches):
         local_tokens, probs = attn(node, input_tensors[i])
 
         # dispatch fwd
-        dispatched_tokens, dispatched_probs = dispatch(node, local_tokens, probs)
+        dispatched_tokens = dispatch(node, local_tokens, probs)
 
         # moe fwd
-        expert_output = moe(node, dispatched_tokens, dispatched_probs)
+        expert_output = moe(node, dispatched_tokens)
 
         # combine fwd
         hidden_states = combine(node, expert_output)

From ec94d63584cc7a6659435069ab8cf742eec424ea Mon Sep 17 00:00:00 2001
From: Jianbin Chang <shjwudp@gmail.com>
Date: Fri, 6 Feb 2026 10:11:18 +0800
Subject: [PATCH 277/334] Restore missing linear-cross-entropy option
 accidentally removed from arguments.py (#3266)

Co-authored-by: Xin Yao <xiny@nvidia.com>
---
 megatron/core/model_parallel_config.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/megatron/core/model_parallel_config.py b/megatron/core/model_parallel_config.py
index 3d6ffd2f56e..e30cb0e1d1a 100644
--- a/megatron/core/model_parallel_config.py
+++ b/megatron/core/model_parallel_config.py
@@ -246,9 +246,14 @@ class ModelParallelConfig:
        Defaults to False.
     """
 
-    cross_entropy_fusion_impl: Literal['native', 'te'] = 'native'
-    """If 'native', MCore based CE loss fusion is used, if 'te', Parallel CE loss
-       from Transformer Engine library is used. Defaults to 'native'.
+    cross_entropy_fusion_impl: Literal['native', 'te', 'linear'] = 'native'
+    """
+    Specifies the implementation of cross-entropy loss fusion.
+
+    Options:
+    - 'native': Uses MCore-based cross-entropy loss fusion (default).
+    - 'te': Uses the parallel cross-entropy loss implementation from the Transformer Engine library.
+    - 'linear': Uses a linear-cross-entropy fusion approach.
     """
 
     tp_comm_overlap_disable_qkv: bool = False

From 500e080f20f122cae28a60c993f586d9b8414000 Mon Sep 17 00:00:00 2001
From: eternally-z <105485498+eternally-z@users.noreply.github.com>
Date: Mon, 9 Feb 2026 19:37:31 +0800
Subject: [PATCH 278/334] Fix reload_model_params failure when loading MoE
 models with explicit state_dict (#3243)

Co-authored-by: Xin Yao <xiny@nvidia.com>
---
 megatron/core/optimizer/distrib_optimizer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/megatron/core/optimizer/distrib_optimizer.py b/megatron/core/optimizer/distrib_optimizer.py
index 8a07bef2faa..a4364f5e92d 100644
--- a/megatron/core/optimizer/distrib_optimizer.py
+++ b/megatron/core/optimizer/distrib_optimizer.py
@@ -2528,7 +2528,7 @@ def _build_model_param_to_state_dict_param_map(self, state_dict):
             for name, model_param in model_chunk.named_parameters():
                 while name.startswith("module."):
                     name = name[len("module.") :]
-                matched_keys = [k for k in names_in_state_dict if name in k]
+                matched_keys = [k for k in names_in_state_dict if k.endswith(name)]
                 assert (
                     len(matched_keys) == 1
                 ), f"Parameter {name} has {len(matched_keys)} matches in state dict"

From 433c169b45124a0dcae5568b098d4d9d41c41fd1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Mon, 9 Feb 2026 15:53:10 +0100
Subject: [PATCH 279/334] ci: Disable moe20 tests (#3312)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 tests/test_utils/recipes/moe2.0.yaml | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/tests/test_utils/recipes/moe2.0.yaml b/tests/test_utils/recipes/moe2.0.yaml
index d16be18642a..39fccd08c40 100644
--- a/tests/test_utils/recipes/moe2.0.yaml
+++ b/tests/test_utils/recipes/moe2.0.yaml
@@ -3,7 +3,7 @@ format_version: 1
 maintainers: [mcore]
 loggers: [stdout]
 spec:
-  name: '{test_case}_{environment}_{platforms}'
+  name: "{test_case}_{environment}_{platforms}"
   model: moe2.0
   build: mcore-pyt-{environment}
   nodes: 1
@@ -72,41 +72,41 @@ products:
       - model_config: dsv3_proxy
         runtime_config: tp1pp1ep8
         environment: [dev]
-        scope: [nightly]
+        scope: [nightly-broken]
         platforms: [dgx_h100]
   - test_case: [dsv3_tp2pp2ep4]
     products:
       - model_config: dsv3_proxy
         runtime_config: tp2pp2ep4
         environment: [dev]
-        scope: [nightly]
+        scope: [nightly-broken]
         platforms: [dgx_h100]
   - test_case: [qwen3_tp1pp1ep1]
     products:
       - model_config: qwen3_proxy
         runtime_config: tp1pp1ep1
         environment: [dev]
-        scope: [nightly]
+        scope: [nightly-broken]
         platforms: [dgx_h100]
   - test_case: [qwen3_tp2pp2ep4]
     products:
       - model_config: qwen3_proxy
         runtime_config: tp2pp2ep4
         environment: [dev]
-        scope: [nightly]
+        scope: [nightly-broken]
         platforms: [dgx_h100]
   - test_case: [bert_mcore_tp1_pp2]
     products:
       - environment: [dev]
-        scope: [nightly]
+        scope: [nightly-broken]
         platforms: [dgx_h100]
   - test_case: [bert_mcore_tp1_pp4_vp2]
     products:
       - environment: [dev]
-        scope: [nightly]
+        scope: [nightly-broken]
         platforms: [dgx_h100]
   - test_case: [bert_mcore_tp4_pp1]
     products:
       - environment: [dev]
-        scope: [nightly]
+        scope: [nightly-broken]
         platforms: [dgx_h100]

From fd4801ee15a31cb722a278c25f9c2767cacabddb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Mon, 9 Feb 2026 16:11:54 +0100
Subject: [PATCH 280/334] ci: Pin down setuptools to lt 82 (#3316)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 docker/Dockerfile.ci.dev | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/Dockerfile.ci.dev b/docker/Dockerfile.ci.dev
index bb9ca5fbe9a..fa214deeea5 100644
--- a/docker/Dockerfile.ci.dev
+++ b/docker/Dockerfile.ci.dev
@@ -88,7 +88,7 @@ RUN --mount=type=secret,id=JET_INDEX_URLS bash -ex <<"EOF"
     JET_INDEX_URLS=$(cat /run/secrets/JET_INDEX_URLS)
     python -m venv /opt/jet 
     /opt/jet/bin/pip install --no-cache-dir $JET_INDEX_URLS \
-        jet-api==$JET_API_VERSION
+        "jet-api==$JET_API_VERSION" "setuptools<82.0.0"
 EOF
 
 RUN --mount=type=secret,id=JET_INDEX_URLS \

From 52eabf01905190007350f8d68ae8316e9803ecfe Mon Sep 17 00:00:00 2001
From: Ian Zhang <4110995+IanBoyanZhang@users.noreply.github.com>
Date: Mon, 9 Feb 2026 23:19:27 -0800
Subject: [PATCH 281/334] [None][Fix] Prevent resource leak warnings (#3216)

Co-authored-by: Xin Yao <xiny@nvidia.com>
---
 megatron/core/datasets/utils.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/megatron/core/datasets/utils.py b/megatron/core/datasets/utils.py
index e14656df799..26cf5253179 100644
--- a/megatron/core/datasets/utils.py
+++ b/megatron/core/datasets/utils.py
@@ -12,6 +12,8 @@
 
 
 class Split(Enum):
+    """Enum train, valid, and test split."""
+
     train = 0
     valid = 1
     test = 2
@@ -26,7 +28,11 @@ def compile_helpers():
     if subprocess.run(command).returncode != 0:
         import sys
 
+        import torch.distributed as dist
+
         log_single_rank(logger, logging.ERROR, "Failed to compile the C++ dataset helper functions")
+
+        dist.destroy_process_group()
         sys.exit(1)
 
 
From c0030d616593dc606239e76b46a5b8afebc614d3 Mon Sep 17 00:00:00 2001
From: Pingtian Li <158665726+Wohox@users.noreply.github.com>
Date: Tue, 10 Feb 2026 16:38:58 +0800
Subject: [PATCH 282/334] [Dev] Fix backward dw dependency (#3338)

---
 megatron/core/models/gpt/fine_grained_callables.py | 4 +++-
 megatron/core/pipeline_parallel/utils.py           | 2 --
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/megatron/core/models/gpt/fine_grained_callables.py b/megatron/core/models/gpt/fine_grained_callables.py
index fcccbdb1837..e77cfb71871 100644
--- a/megatron/core/models/gpt/fine_grained_callables.py
+++ b/megatron/core/models/gpt/fine_grained_callables.py
@@ -329,9 +329,11 @@ def backward_dw(self):
         """Computes the weight gradients for the transformer layer node."""
         if not self.delay_wgrad_compute:
             return
-        with self.stream_acquire_context(f"{self.name} wgrad"):
+        with torch.cuda.stream(self.stream):
+            torch.cuda.nvtx.range_push(f"{self.name} wgrad")
             for module in self.bwd_dw_callables:
                 module.backward_dw()
+            torch.cuda.nvtx.range_pop()
 
         # the output grad memory is last used in wgrad compute, should be safe to release.
         assert self.delay_grads_release, "output grad memory should be valid before wgrad."
diff --git a/megatron/core/pipeline_parallel/utils.py b/megatron/core/pipeline_parallel/utils.py
index 695968e2443..8f6b25eec32 100644
--- a/megatron/core/pipeline_parallel/utils.py
+++ b/megatron/core/pipeline_parallel/utils.py
@@ -281,8 +281,6 @@ def stream_acquire_context(self, name=None):
         3. torch.cuda.stream context for execution on the specified stream
 
         Args:
-            stream: The CUDA stream to execute on
-            event: The CUDA event for synchronization
             name: Optional name for NVTX range profiling
         """
         self.event.wait(self.stream)

From 2c2e749233b6ac251441f92fb852a02066ab55e1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Tue, 10 Feb 2026 11:52:09 +0100
Subject: [PATCH 283/334] ci: Rely exclusively on GitHub CI (#3341)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
Signed-off-by: Charlie Truong <chtruong@nvidia.com>
Co-authored-by: Charlie Truong <chtruong@nvidia.com>
---
 .github/actions/action.yml          | 11 +++-
 .github/workflows/cicd-main.yml     |  9 ++--
 tests/test_utils/recipes/gpt.yaml   | 78 ++++++++++++++---------------
 tests/test_utils/recipes/mamba.yaml |  6 +--
 tests/test_utils/recipes/moe.yaml   | 18 +++----
 5 files changed, 63 insertions(+), 59 deletions(-)

diff --git a/.github/actions/action.yml b/.github/actions/action.yml
index 088877304a7..4a838b24d95 100644
--- a/.github/actions/action.yml
+++ b/.github/actions/action.yml
@@ -48,7 +48,9 @@ inputs:
   is_ci_workload:
     description: "Is CI workload"
     required: true
-
+  is_merge_group:
+    description: "Is merge group"
+    required: true
 runs:
   using: "composite"
   steps:
@@ -179,7 +181,12 @@ runs:
         #!/bin/bash
         set -euxo pipefail
 
-        if [ "${{ steps.has-run-tests-label.outputs.main }}" == "true" ]; then
+        if [ "${{ inputs.is_merge_group }}" == "true" ]; then
+          ARGS=(
+            --scope mr-github
+            --n-repeat 1
+          )
+        elif [ "${{ steps.has-run-tests-label.outputs.main }}" == "true" ]; then
           ARGS=(
             --scope mr-github
             --enable-lightweight-mode
diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml
index b27348f2dce..074f4234a91 100644
--- a/.github/workflows/cicd-main.yml
+++ b/.github/workflows/cicd-main.yml
@@ -18,8 +18,6 @@ on:
     - cron: 0 0 * * *
   push:
     branches:
-      - dev
-      - main
       - "pull-request/[0-9]+"
       - "deploy-release/*"
   merge_group:
@@ -181,7 +179,7 @@ jobs:
   cicd-wait-in-queue:
     runs-on: ubuntu-latest
     needs: [pre-flight, linting]
-    environment: ${{ needs.pre-flight.outputs.is_merge_group == 'true' && 'merge-gate' || 'test' }}
+    environment: "test"
     if: |
       !(needs.pre-flight.outputs.is_ci_workload == 'true'
       || needs.pre-flight.outputs.is_deployment_workflow == 'true'
@@ -404,7 +402,6 @@ jobs:
         success()
         || needs.pre-flight.outputs.is_ci_workload == 'true'
         || needs.pre-flight.outputs.force_run_all == 'true'
-        || needs.pre-flight.outputs.is_merge_group == 'true'
       )
       && !cancelled()
     outputs:
@@ -442,7 +439,7 @@ jobs:
         id: main
         env:
           HAS_RUN_TESTS_LABEL: ${{ steps.has-run-tests-label.outputs.main }}
-          HAS_RUN_FUNCTIONAL_TESTS_LABEL: ${{ steps.has-run-functional-tests-label.outputs.main }}
+          HAS_RUN_FUNCTIONAL_TESTS_LABEL: ${{ steps.has-run-functional-tests-label.outputs.main == 'true' || needs.pre-flight.outputs.is_merge_group == 'true' }}
         run: |
           export PYTHONPATH=$(pwd)
 
@@ -505,7 +502,6 @@ jobs:
         || needs.pre-flight.outputs.is_ci_workload == 'true'
         || needs.pre-flight.outputs.force_run_all == 'true'
       )
-      && needs.pre-flight.outputs.is_merge_group == 'false'
       && !cancelled()
     steps:
       - name: Checkout
@@ -521,6 +517,7 @@ jobs:
           PAT: ${{ secrets.PAT }}
           container-image: ${{ env.container-registry }}/megatron-lm:${{ github.sha }}
           is_ci_workload: ${{ needs.pre-flight.outputs.is_ci_workload }}
+          is_merge_group: ${{ needs.pre-flight.outputs.is_merge_group }}
 
   Nemo_CICD_Test:
     needs:
diff --git a/tests/test_utils/recipes/gpt.yaml b/tests/test_utils/recipes/gpt.yaml
index 90eddc55c27..a97a4d7bb38 100644
--- a/tests/test_utils/recipes/gpt.yaml
+++ b/tests/test_utils/recipes/gpt.yaml
@@ -110,14 +110,14 @@ products:
   - test_case: [gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
   - test_case: [gpt3_mcore_te_tp1_pp1_dist_optimizer_fim_dataset]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
   - test_case: [gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer]
     products:
@@ -129,7 +129,7 @@ products:
   - test_case: [gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
@@ -142,28 +142,28 @@ products:
   - test_case: [gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       # - environment: [lts]
       #   scope: [nightly] # outdated TE: #501
   - test_case: [gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
   - test_case: [gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       # - environment: [lts]
       #   scope: [nightly] # non-determinism: #436
   - test_case: [gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
@@ -177,28 +177,28 @@ products:
   - test_case: [gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
   - test_case: [gpt3_mcore_te_tp1_pp4_vp1]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
   - test_case: [gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
   - test_case: [gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
@@ -212,7 +212,7 @@ products:
   - test_case: [gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
@@ -247,83 +247,83 @@ products:
   - test_case: [gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
   - test_case: [gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
   - test_case: [gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
   - test_case: [gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
   - test_case: [gpt3_mcore_te_tp2_pp2_cp2]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
   - test_case: [gpt3_mcore_te_tp2_pp2_cp2_etp4_dp_last]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
   - test_case: [gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
   - test_case: [gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
   - test_case: [gpt3_mcore_te_tp2_pp2_cp2_nondeterministic]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
   - test_case: [gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
   - test_case: [gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
   - test_case: [gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
   - test_case: [gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
@@ -331,14 +331,14 @@ products:
   - test_case: [gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
   - test_case: [gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
@@ -350,26 +350,26 @@ products:
   - test_case: [gpt3_mcore_te_tp2_pp2_mla]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
   - test_case: [gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
   - test_case: [gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
   - test_case: [gpt3_mcore_te_tp2_pp2_resume_torch_dist]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
@@ -383,28 +383,28 @@ products:
   - test_case: [gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
   - test_case: [gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
   - test_case: [gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
   - test_case: [gpt3_mcore_tp2_pp2_uninstall_te]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
@@ -425,14 +425,14 @@ products:
   - test_case: [gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       # - environment: [lts]
       #   scope: [nightly]
   - test_case: [gpt3_mcore_te_tp2_pp1_modelopt_distill_resume]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       # - environment: [lts]
       #   scope: [nightly] # Outdated: #502
@@ -469,7 +469,7 @@ products:
   - test_case: [gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
@@ -483,7 +483,7 @@ products:
   - test_case: [gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
diff --git a/tests/test_utils/recipes/mamba.yaml b/tests/test_utils/recipes/mamba.yaml
index 47b731f7e00..456a6cbccf7 100644
--- a/tests/test_utils/recipes/mamba.yaml
+++ b/tests/test_utils/recipes/mamba.yaml
@@ -3,7 +3,7 @@ format_version: 1
 maintainers: [mcore]
 loggers: [stdout]
 spec:
-  name: '{test_case}_{environment}_{platforms}'
+  name: "{test_case}_{environment}_{platforms}"
   model: hybrid
   build: mcore-pyt-{environment}
   nodes: 1
@@ -58,7 +58,7 @@ products:
   - test_case: [hybrid_mr_mcore_te_tp1_pp1_cp1_dgx_a100_1N8G]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       # - environment: [lts] # disabled until triton is bumped
       #   scope: [nightly]
@@ -74,7 +74,7 @@ products:
   - test_case: [hybrid_mr_mcore_te_tp2_pp1_cp1_dgx_a100_1N8G]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       # - environment: [lts] # disabled until triton is bumped
       #   scope: [nightly]
diff --git a/tests/test_utils/recipes/moe.yaml b/tests/test_utils/recipes/moe.yaml
index 06039d77440..10c1140ddf5 100644
--- a/tests/test_utils/recipes/moe.yaml
+++ b/tests/test_utils/recipes/moe.yaml
@@ -119,7 +119,7 @@ products:
   - test_case: [gpt3_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
   # - test_case: [gpt3_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8]
   #   products:
@@ -139,17 +139,17 @@ products:
   - test_case: [gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
   - test_case: [gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
   - test_case: [gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
   - test_case: [gpt3_mcore_te_tp2_pp1_te_8experts_etp1_ep4]
     products:
@@ -161,12 +161,12 @@ products:
   - test_case: [gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
   - test_case: [gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
   - test_case: [gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading]
     products:
@@ -181,19 +181,19 @@ products:
   - test_case: [gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
       - environment: [lts]
         scope: [nightly]
   - test_case: [gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_optimizer]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
   - test_case: [gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_scoped_cudagraph]
     products:
       - environment: [dev]
-        scope: [mr]
+        scope: [mr, mr-github]
         platforms: [dgx_h100]
   - test_case: [gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_muon]
     products:

From 98f6f81686c2a7d8562022102b3778b30e8d2482 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Tue, 10 Feb 2026 12:43:44 +0100
Subject: [PATCH 284/334] [dev] ci: skip queue in merge-gate (#3344)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 .github/workflows/cicd-main.yml | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml
index 074f4234a91..89895e3de41 100644
--- a/.github/workflows/cicd-main.yml
+++ b/.github/workflows/cicd-main.yml
@@ -183,6 +183,7 @@ jobs:
     if: |
       !(needs.pre-flight.outputs.is_ci_workload == 'true'
       || needs.pre-flight.outputs.is_deployment_workflow == 'true'
+      || needs.pre-flight.outputs.is_merge_group == 'true'
       || needs.pre-flight.outputs.docs_only == 'true')
     steps:
       - name: Running CI tests
@@ -197,6 +198,7 @@ jobs:
       (
         success()
         || needs.pre-flight.outputs.is_ci_workload == 'true'
+        || needs.pre-flight.outputs.is_merge_group == 'true'
         || needs.pre-flight.outputs.force_run_all == 'true'
       )
       && needs.pre-flight.outputs.is_merge_group == 'false'
@@ -336,6 +338,7 @@ jobs:
         success()
         || needs.pre-flight.outputs.is_ci_workload == 'true'
         || needs.pre-flight.outputs.force_run_all == 'true'
+        || needs.pre-flight.outputs.is_merge_group == 'true'
       )
       && needs.pre-flight.outputs.is_merge_group == 'false'
       && !cancelled()
@@ -367,6 +370,7 @@ jobs:
         success()
         || needs.pre-flight.outputs.is_ci_workload == 'true'
         || needs.pre-flight.outputs.force_run_all == 'true'
+        || needs.pre-flight.outputs.is_merge_group == 'true'
       )
       && needs.pre-flight.outputs.is_merge_group == 'false'
       && !cancelled()
@@ -402,6 +406,7 @@ jobs:
         success()
         || needs.pre-flight.outputs.is_ci_workload == 'true'
         || needs.pre-flight.outputs.force_run_all == 'true'
+        || needs.pre-flight.outputs.is_merge_group == 'true'
       )
       && !cancelled()
     outputs:
@@ -501,6 +506,7 @@ jobs:
         success()
         || needs.pre-flight.outputs.is_ci_workload == 'true'
         || needs.pre-flight.outputs.force_run_all == 'true'
+        || needs.pre-flight.outputs.is_merge_group == 'true'
       )
       && !cancelled()
     steps:

From 28b130f34e47178c231cc9b32f6faad5647ed583 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Wed, 11 Feb 2026 11:45:36 +0100
Subject: [PATCH 285/334] Revert "[None][Fix] Prevent resource leak warnings
 (#3216)" (#3366)

---
 megatron/core/datasets/utils.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/megatron/core/datasets/utils.py b/megatron/core/datasets/utils.py
index 26cf5253179..e14656df799 100644
--- a/megatron/core/datasets/utils.py
+++ b/megatron/core/datasets/utils.py
@@ -12,8 +12,6 @@
 
 
 class Split(Enum):
-    """Enum train, valid, and test split."""
-
     train = 0
     valid = 1
     test = 2
@@ -28,11 +26,7 @@ def compile_helpers():
     if subprocess.run(command).returncode != 0:
         import sys
 
-        import torch.distributed as dist
-
         log_single_rank(logger, logging.ERROR, "Failed to compile the C++ dataset helper functions")
-
-        dist.destroy_process_group()
         sys.exit(1)
 
 
From e868e8f280c142192603cf51490ba545f113e903 Mon Sep 17 00:00:00 2001
From: Charlie Truong <chtruong@nvidia.com>
Date: Thu, 12 Feb 2026 20:49:16 -0600
Subject: [PATCH 286/334] ci: Fix dev branch merge queue (#3397)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Charlie Truong <chtruong@nvidia.com>
Signed-off-by: oliver könig <okoenig@nvidia.com>
Co-authored-by: oliver könig <okoenig@nvidia.com>
---
 .github/workflows/cicd-main.yml | 72 +++++++++++++++++++++++++++------
 1 file changed, 60 insertions(+), 12 deletions(-)

diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml
index 89895e3de41..cd8091f6edf 100644
--- a/.github/workflows/cicd-main.yml
+++ b/.github/workflows/cicd-main.yml
@@ -25,7 +25,7 @@ on:
   workflow_dispatch:
 
 concurrency:
-  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-${{ github.event.label.name || 'main' }}-${{ github.event_name }}
+  group: ${{ github.workflow }}-${{ github.head_ref || github.ref || github.event.pull_request.number }}
   cancel-in-progress: true
 
 permissions:
@@ -195,13 +195,15 @@ jobs:
     needs: [is-not-external-contributor, pre-flight, cicd-wait-in-queue]
     runs-on: ${{ needs.is-not-external-contributor.outputs.selected_runner }}
     if: |
-      (
+      needs.is-not-external-contributor.result != 'cancelled'
+      && needs.pre-flight.result != 'cancelled'
+      && needs.cicd-wait-in-queue.result != 'cancelled'
+      && (
         success()
         || needs.pre-flight.outputs.is_ci_workload == 'true'
         || needs.pre-flight.outputs.is_merge_group == 'true'
         || needs.pre-flight.outputs.force_run_all == 'true'
       )
-      && needs.pre-flight.outputs.is_merge_group == 'false'
       && !cancelled()
     steps:
       - name: Get PR info
@@ -214,9 +216,12 @@ jobs:
         id: sha
         env:
           IS_PR: ${{ startsWith(github.ref, 'refs/heads/pull-request/') }}
+          IS_MERGE_GROUP: ${{ github.event_name == 'merge_group' }}
         run: |
           if [[ "$IS_PR" == "true" ]]; then
             SHA=${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').merge_commit_sha }}
+          elif [[ "$IS_MERGE_GROUP" == "true" ]]; then
+            SHA=${{ github.event.merge_group.head_sha }}
           else
             SHA=${GITHUB_SHA}
           fi
@@ -334,13 +339,15 @@ jobs:
       - cicd-wait-in-queue
       - cicd-container-build
     if: |
-      (
+      needs.pre-flight.result != 'cancelled'
+      && needs.cicd-wait-in-queue.result != 'cancelled'
+      && needs.cicd-container-build.result != 'cancelled'
+      && (
         success()
         || needs.pre-flight.outputs.is_ci_workload == 'true'
         || needs.pre-flight.outputs.force_run_all == 'true'
         || needs.pre-flight.outputs.is_merge_group == 'true'
       )
-      && needs.pre-flight.outputs.is_merge_group == 'false'
       && !cancelled()
     steps:
       - name: Checkout
@@ -366,13 +373,17 @@ jobs:
     timeout-minutes: 60
     name: "${{ matrix.bucket }} - latest"
     if: |
-      (
+      needs.is-not-external-contributor.result != 'cancelled'
+      && needs.pre-flight.result != 'cancelled'
+      && needs.cicd-wait-in-queue.result != 'cancelled'
+      && needs.cicd-container-build.result != 'cancelled'
+      && needs.cicd-parse-unit-tests.result != 'cancelled'
+      && (
         success()
         || needs.pre-flight.outputs.is_ci_workload == 'true'
         || needs.pre-flight.outputs.force_run_all == 'true'
         || needs.pre-flight.outputs.is_merge_group == 'true'
       )
-      && needs.pre-flight.outputs.is_merge_group == 'false'
       && !cancelled()
     env:
       PIP_DISABLE_PIP_VERSION_CHECK: 1
@@ -394,7 +405,6 @@ jobs:
 
   cicd-parse-integration-tests:
     runs-on: ubuntu-latest
-    timeout-minutes: 60
     needs:
       - pre-flight
       - cicd-wait-in-queue
@@ -402,7 +412,11 @@ jobs:
       - cicd-unit-tests-latest
     environment: nemo-ci
     if: |
-      (
+      needs.pre-flight.result != 'cancelled'
+      && needs.cicd-wait-in-queue.result != 'cancelled'
+      && needs.cicd-container-build.result != 'cancelled'
+      && needs.cicd-unit-tests-latest.result != 'cancelled'
+      && (
         success()
         || needs.pre-flight.outputs.is_ci_workload == 'true'
         || needs.pre-flight.outputs.force_run_all == 'true'
@@ -485,6 +499,7 @@ jobs:
           echo "integration-tests=$(cat integration-tests.json)" | tee -a "$GITHUB_OUTPUT"
 
   cicd-integration-tests-latest:
+    timeout-minutes: 60
     strategy:
       fail-fast: false
       matrix:
@@ -502,7 +517,12 @@ jobs:
       PIP_NO_PYTHON_VERSION_WARNING: 1
       PIP_ROOT_USER_ACTION: ignore
     if: |
-      (
+      needs.is-not-external-contributor.result != 'cancelled'
+      && needs.pre-flight.result != 'cancelled'
+      && needs.cicd-wait-in-queue.result != 'cancelled'
+      && needs.cicd-parse-integration-tests.result != 'cancelled'
+      && needs.cicd-unit-tests-latest.result != 'cancelled'
+      && (
         success()
         || needs.pre-flight.outputs.is_ci_workload == 'true'
         || needs.pre-flight.outputs.force_run_all == 'true'
@@ -554,8 +574,8 @@ jobs:
           GITHUB_RUN_ID: ${{ github.run_id }}
           SKIPPING_IS_ALLOWED: ${{ needs.pre-flight.outputs.docs_only == 'true' || needs.pre-flight.outputs.is_deployment_workflow == 'true' || needs.pre-flight.outputs.is_merge_group == 'true' || needs.pre-flight.outputs.is_ci_workload == 'true' }}
         run: |
-          FAILED_JOBS=$(gh run view $GITHUB_RUN_ID --json jobs --jq '[.jobs[] | select(.status == "completed" and .conclusion == "failure")] | length') || echo 0
-          SKIPPED_JOBS=$(gh run view $GITHUB_RUN_ID --json jobs --jq '[.jobs[] | select(.status == "completed" and .conclusion == "skipped")] | length') || echo 0
+          FAILED_JOBS=$(gh run view $GITHUB_RUN_ID --json jobs --jq '[.jobs[] | select(.status == "completed" and .conclusion == "failure" and .name != "merge-queue-notification")] | length') || echo 0
+          SKIPPED_JOBS=$(gh run view $GITHUB_RUN_ID --json jobs --jq '[.jobs[] | select(.status == "completed" and .conclusion == "skipped" and .name != "merge-queue-notification")] | length') || echo 0
 
           if [ "${FAILED_JOBS:-0}" -eq 0 ] && ([ "${SKIPPED_JOBS:-0}" -eq 0 ] || [ "$SKIPPING_IS_ALLOWED" == "true" ]); then
               echo "✅ All previous jobs completed successfully"
@@ -648,6 +668,34 @@ jobs:
             .coverage
           include-hidden-files: true
 
+  merge-queue-notification:
+    runs-on: ubuntu-latest
+    if: github.event_name == 'merge_group'
+    permissions:
+      pull-requests: write
+    steps:
+      - name: Extract PR number from merge group
+        id: get-pr-number
+        run: |
+          # Extract PR number from merge group head_ref (format: refs/heads/gh-readonly-queue/main/pr-<number>-<sha>)
+          PR_NUMBER=$(echo "${{ github.event.merge_group.head_ref }}" | sed -n 's/.*\/pr-\([0-9]*\)-.*/\1/p')
+          echo "pr_number=$PR_NUMBER" >> $GITHUB_OUTPUT
+
+      - name: Comment on PR with action run URL
+        uses: actions/github-script@v7
+        with:
+          github-token: ${{ secrets.PAT }}
+          script: |
+            const prNumber = ${{ steps.get-pr-number.outputs.pr_number }};
+            const runUrl = `https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}`;
+
+            await github.rest.issues.createComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: prNumber,
+              body: `🔄 Merge queue validation started!\n\nYou can track the progress here: ${runUrl}`
+            });
+
   cleanup-taint-node:
     runs-on: ${{ needs.is-not-external-contributor.outputs.selected_runner }}
     needs:

From c4b910f4ba637bf06a56128f373930d72426221f Mon Sep 17 00:00:00 2001
From: xuwchen <xuwenc@nvidia.com>
Date: Fri, 13 Feb 2026 12:44:53 +0800
Subject: [PATCH 287/334] [Dev] Add Qwen3-VL support with Megatron-FSDP (#2842)

Co-authored-by: Li Tao <lit@nvidia.com>
---
 .../distributed/fsdp/src/megatron_fsdp/megatron_fsdp.py     | 6 ++++--
 .../fsdp/src/megatron_fsdp/param_and_grad_buffer.py         | 2 ++
 megatron/core/models/gpt/gpt_model.py                       | 2 +-
 megatron/core/transformer/fsdp_dtensor_checkpoint.py        | 6 +++++-
 4 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/megatron/core/distributed/fsdp/src/megatron_fsdp/megatron_fsdp.py b/megatron/core/distributed/fsdp/src/megatron_fsdp/megatron_fsdp.py
index 65b86c0b3a0..671487a30eb 100644
--- a/megatron/core/distributed/fsdp/src/megatron_fsdp/megatron_fsdp.py
+++ b/megatron/core/distributed/fsdp/src/megatron_fsdp/megatron_fsdp.py
@@ -1000,9 +1000,11 @@ def _register_pre_backward_param_unshard_hook(module):
                             with_kwargs=True,
                         )
                     )
-                grad_acc_param_list = list(module.parameters())
+                grad_acc_param_list = [p for p in module.parameters() if p.requires_grad]
             else:
-                grad_acc_param_list = list(module.parameters(recurse=False))
+                grad_acc_param_list = [
+                    p for p in module.parameters(recurse=False) if p.requires_grad
+                ]
 
             for param in grad_acc_param_list:
                 self.grad_acc_hooks[f"grad_acc and reduce for {self.param_to_name[param]}"] = (
diff --git a/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py b/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py
index cce3c2be00d..aabdd010ed9 100644
--- a/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py
+++ b/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py
@@ -2433,6 +2433,8 @@ def _reset_parameters(self, old_params, new_params):
             self.param_to_direct_module[new_param] = self.param_to_direct_module[old_param]
             del self.param_to_direct_module[old_param]
 
+            new_param.requires_grad_(old_param.requires_grad)
+
             for tp_attr in ["_mcore_tp", "_tp_partition_dim", "_tp_duplicated"]:
                 if getattr(old_param, tp_attr, None) is not None:
                     setattr(new_param, tp_attr, getattr(old_param, tp_attr))
diff --git a/megatron/core/models/gpt/gpt_model.py b/megatron/core/models/gpt/gpt_model.py
index 8e2301cd6f1..5b31ddedf13 100644
--- a/megatron/core/models/gpt/gpt_model.py
+++ b/megatron/core/models/gpt/gpt_model.py
@@ -450,7 +450,7 @@ def _preprocess(
             # return this extra tensor
             # this is for backwards compatibility with
             # legacy unit tests, which break if you
-            # return a 6 tuple instead of 5.
+            # return a 7 tuple instead of 6.
             preproc_output += (rotary_pos_cos_sin,)
 
         return preproc_output
diff --git a/megatron/core/transformer/fsdp_dtensor_checkpoint.py b/megatron/core/transformer/fsdp_dtensor_checkpoint.py
index 04ec982e6ff..4dbc6623506 100644
--- a/megatron/core/transformer/fsdp_dtensor_checkpoint.py
+++ b/megatron/core/transformer/fsdp_dtensor_checkpoint.py
@@ -45,6 +45,7 @@
 from megatron.core import parallel_state
 from megatron.core.tensor_parallel.layers import copy_tensor_model_parallel_attributes
 from megatron.core.transformer.transformer_layer import TransformerLayer
+from megatron.core.utils import get_attr_wrapped_model
 
 
 def get_ep_layer_offset(num_experts: int | None = None) -> int:
@@ -196,7 +197,10 @@ def handle_swiglu_in_state_dict(model, model_state_dict, optimizer_state_dict):
     assert HAVE_MEGATRON_FSDP, "This function requires Megatron-FSDP to be installed."
 
     # Extract num_experts from model config for expert parameter processing
-    num_experts = model.config.num_moe_experts if hasattr(model, 'config') else None
+    model_config = get_attr_wrapped_model(model, "config", allow_none=True)
+    num_experts = (
+        getattr(model_config, 'num_moe_experts', None) if model_config is not None else None
+    )
 
     def intersection(s1, s2):
         # Only works for step=1

From 6059f36e4b52ce66b1859a14a0368a418f647574 Mon Sep 17 00:00:00 2001
From: Kunlun Li <94586211+kunlunl@users.noreply.github.com>
Date: Fri, 13 Feb 2026 22:36:12 +0800
Subject: [PATCH 288/334] Add absorbed-mla (#3193)

---
 .../absorbed_mla.py                           | 961 ++++++++++++++++++
 .../test_absorbed_mla.py                      | 421 ++++++++
 .../test_attention_variant_dsa.py             |   0
 3 files changed, 1382 insertions(+)
 create mode 100644 megatron/core/transformer/experimental_attention_variant/absorbed_mla.py
 create mode 100644 tests/unit_tests/transformer/experimental_attention_variant/test_absorbed_mla.py
 rename tests/unit_tests/transformer/{ => experimental_attention_variant}/test_attention_variant_dsa.py (100%)

diff --git a/megatron/core/transformer/experimental_attention_variant/absorbed_mla.py b/megatron/core/transformer/experimental_attention_variant/absorbed_mla.py
new file mode 100644
index 00000000000..b56add7302e
--- /dev/null
+++ b/megatron/core/transformer/experimental_attention_variant/absorbed_mla.py
@@ -0,0 +1,961 @@
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+"""
+Absorbed Multi-Latent Attention implementation.
+
+This module implements MLA with matrix absorption:
+- Absorbs K's up projection into Q: Q' = Q @ K_up_proj^T
+- Applies V's up projection after core attention
+- Core attention operates in MQA form with KV being single-head.
+
+The absorption is mathematically equivalent to standard MLA but enables MQA-style attention which
+can be more efficient for certain attention variants.
+"""
+
+import math
+from dataclasses import dataclass
+from typing import NoReturn, Optional, Union
+
+import torch
+
+from megatron.core import tensor_parallel
+from megatron.core.models.common.embeddings import (
+    RotaryEmbedding,
+    YarnRotaryEmbedding,
+    _yarn_get_mscale,
+    apply_rotary_pos_emb,
+)
+from megatron.core.process_groups_config import ProcessGroupCollection
+from megatron.core.tensor_parallel.layers import ColumnParallelLinear
+from megatron.core.tensor_parallel.mappings import (
+    gather_from_sequence_parallel_region,
+    gather_from_tensor_model_parallel_region,
+    scatter_to_sequence_parallel_region,
+)
+from megatron.core.transformer.attention import Attention
+from megatron.core.transformer.enums import AttnMaskType
+from megatron.core.transformer.spec_utils import ModuleSpec, build_module
+from megatron.core.transformer.transformer_config import MLATransformerConfig
+from megatron.core.utils import deprecate_inference_params, get_pg_size
+
+try:
+    from megatron.core.fusions.fused_mla_yarn_rope_apply import (
+        fused_apply_mla_rope_for_kv,
+        fused_apply_mla_rope_for_q,
+    )
+except ImportError:
+    fused_apply_mla_rope_for_kv = None
+    fused_apply_mla_rope_for_q = None
+
+try:
+    from megatron.core.extensions.transformer_engine import (
+        TEColumnParallelLinear,
+        TELinear,
+        set_save_original_input,
+    )
+    from megatron.core.post_training.modelopt.layers import Linear
+
+    HAVE_TE = True
+except ImportError:
+    TEColumnParallelLinear, TELinear, Linear, set_save_original_input = None, None, None, None
+    HAVE_TE = False
+
+
+@dataclass
+class AbsorbedMLASelfAttentionSubmodules:
+    """
+    Configuration class for specifying the submodules of absorbed multi-latent self-attention.
+    """
+
+    linear_q_proj: Union[ModuleSpec, type] = None
+    linear_q_down_proj: Union[ModuleSpec, type] = None
+    linear_q_up_proj: Union[ModuleSpec, type] = None
+    linear_kv_down_proj: Union[ModuleSpec, type] = None
+    linear_k_up_proj: Union[ModuleSpec, type] = None
+    linear_v_up_proj: Union[ModuleSpec, type] = None
+    core_attention: Union[ModuleSpec, type] = None
+    linear_proj: Union[ModuleSpec, type] = None
+    q_layernorm: Union[ModuleSpec, type] = None
+    kv_layernorm: Union[ModuleSpec, type] = None
+
+
+class AbsorbedMLASelfAttention(Attention):
+    """Multi-latent self-attention layer with matrix absorption.
+
+    This layer takes input with shape [s, b, h] and returns output of the same shape.
+
+    Compared to standard MLA, this class implements matrix absorption:
+      - K's up projection is applied to the query before core attention, not to the compressed KV.
+      - V's up projection is applied to the output of core attention, not to the compressed KV.
+      - Core attention operates in MQA form with KV being single-head.
+
+    The absorption is mathematically equivalent to standard MLA but enables MQA-style attention
+    computation which can be more efficient for certain attention variants.
+    """
+
+    def __init__(
+        self,
+        config: MLATransformerConfig,
+        submodules: AbsorbedMLASelfAttentionSubmodules,
+        layer_number: int,
+        attn_mask_type=AttnMaskType.padding,
+        cp_comm_type: Optional[str] = None,
+        pg_collection: ProcessGroupCollection = None,
+    ):
+        if pg_collection is None:
+            pg_collection = ProcessGroupCollection.use_mpu_process_groups()
+
+        super().__init__(
+            config=config,
+            submodules=submodules,
+            layer_number=layer_number,
+            attn_mask_type=attn_mask_type,
+            attention_type="self",
+            pg_collection=pg_collection,
+        )
+
+        assert not config.add_bias_linear, "add_bias_linear is not supported for AbsorbedMLA"
+        assert not (
+            config.tensor_model_parallel_size > 1 and not config.sequence_parallel
+        ), "AbsorbedMLA requires sequence_parallel when tensor_model_parallel_size > 1"
+
+        self.query_projection_size = self.config.v_head_dim * self.config.num_attention_heads
+        self.q_head_dim = self.config.qk_head_dim + self.config.qk_pos_emb_head_dim
+
+        # Inference is currently not supported.
+        self.key_hidden_size = None
+        self.val_hidden_size = None
+
+        self.recompute_up_proj = (
+            self.config.recompute_granularity == 'selective'
+            and "mla_up_proj" in self.config.recompute_modules
+        )
+        self.qkv_up_checkpoint = None
+
+        mscale = _yarn_get_mscale(self.config.rotary_scaling_factor, self.config.mscale_all_dim)
+        self.softmax_scale = mscale * mscale / math.sqrt(self.q_head_dim)
+        self.cache_mla_latents = self.config.cache_mla_latents
+        assert not self.cache_mla_latents, "cache_mla_latents is not supported for AbsorbedMLA"
+
+        if self.config.rope_type == "rope":
+            self.rotary_pos_emb = RotaryEmbedding(
+                self.config.qk_pos_emb_head_dim,
+                rotary_percent=self.config.rotary_percent,
+                rotary_base=self.config.rotary_base,
+                cp_group=self.pg_collection.cp,
+            )
+        elif self.config.rope_type == "yarn":
+            self.rotary_pos_emb = YarnRotaryEmbedding(
+                self.config.qk_pos_emb_head_dim,
+                rotary_base=self.config.rotary_base,
+                scaling_factor=self.config.rotary_scaling_factor,
+                original_max_position_embeddings=self.config.original_max_position_embeddings,
+                beta_fast=self.config.beta_fast,
+                beta_slow=self.config.beta_slow,
+                mscale=self.config.mscale,
+                mscale_all_dim=self.config.mscale_all_dim,
+                cp_group=self.pg_collection.cp,
+            )
+        else:
+            raise ValueError(
+                f"Unsupported RoPE type: {self.config.rope_type}, supported types are "
+                "'rope' and 'yarn'"
+            )
+
+        self.core_attention = build_module(
+            submodules.core_attention,
+            config=self.config,
+            layer_number=self.layer_number,
+            attn_mask_type=self.attn_mask_type,
+            attention_type="self",
+            softmax_scale=self.softmax_scale,
+            k_channels=self.config.kv_lora_rank + self.config.qk_pos_emb_head_dim,
+            v_channels=self.config.kv_lora_rank,
+            cp_comm_type=cp_comm_type,
+            pg_collection=self.pg_collection,
+        )
+
+        if self.config.q_lora_rank is None:
+            # Not projecting query
+            self.linear_q_proj = build_module(
+                submodules.linear_q_proj,
+                self.config.hidden_size,
+                self.config.num_attention_heads * self.q_head_dim,
+                config=self.config,
+                init_method=self.config.init_method,
+                gather_output=False,
+                bias=False,
+                skip_bias_add=False,
+                is_expert=False,
+                tp_comm_buffer_name='q_proj',
+            )
+        else:
+            q_down_proj_kwargs = {}
+            if submodules.linear_q_down_proj in [TELinear]:
+                q_down_proj_kwargs['parallel_mode'] = 'duplicated'
+            elif submodules.linear_q_down_proj in [
+                Linear,
+                TEColumnParallelLinear,
+                ColumnParallelLinear,
+            ]:
+                q_down_proj_kwargs['gather_output'] = False
+            else:
+                raise ValueError(f"Unsupported linear_q_down_proj: {submodules.linear_q_down_proj}")
+
+            self.linear_q_down_proj = build_module(
+                submodules.linear_q_down_proj,
+                self.config.hidden_size,
+                self.config.q_lora_rank,
+                config=self.config,
+                init_method=self.config.init_method,
+                bias=False,
+                skip_bias_add=False,
+                is_expert=False,
+                tp_comm_buffer_name='q_down_proj',
+                skip_weight_param_allocation=False,
+                tp_group=(
+                    pg_collection.tp
+                    if q_down_proj_kwargs.get('parallel_mode') != 'duplicated'
+                    else None
+                ),
+                **q_down_proj_kwargs,
+            )
+
+            self.linear_q_up_proj = build_module(
+                submodules.linear_q_up_proj,
+                self.config.q_lora_rank,
+                self.config.num_attention_heads * self.q_head_dim,
+                config=self.config,
+                init_method=self.config.init_method,
+                gather_output=False,
+                bias=False,
+                skip_bias_add=False,
+                is_expert=False,
+                tp_comm_buffer_name='q_up_proj',
+                tp_group=pg_collection.tp,
+            )
+
+        kv_down_proj_kwargs = {}
+        if submodules.linear_kv_down_proj in [TELinear]:
+            kv_down_proj_kwargs['parallel_mode'] = 'duplicated'
+        elif submodules.linear_kv_down_proj in [
+            Linear,
+            TEColumnParallelLinear,
+            ColumnParallelLinear,
+        ]:
+            kv_down_proj_kwargs['gather_output'] = False
+        else:
+            raise ValueError(f"Unsupported linear_kv_down_proj: {submodules.linear_kv_down_proj}")
+
+        self.linear_kv_down_proj = build_module(
+            submodules.linear_kv_down_proj,
+            self.config.hidden_size,
+            self.config.kv_lora_rank + self.config.qk_pos_emb_head_dim,
+            config=self.config,
+            init_method=self.config.init_method,
+            bias=False,
+            skip_bias_add=False,
+            is_expert=False,
+            tp_comm_buffer_name='kv_down_proj',
+            skip_weight_param_allocation=False,
+            tp_group=(
+                pg_collection.tp
+                if kv_down_proj_kwargs.get('parallel_mode') != 'duplicated'
+                else None
+            ),
+            **kv_down_proj_kwargs,
+        )
+
+        # Build separate K and V up projections
+        self.linear_k_up_proj = build_module(
+            submodules.linear_k_up_proj,
+            self.config.kv_lora_rank,
+            self.config.num_attention_heads * self.config.qk_head_dim,
+            config=self.config,
+            init_method=self.config.init_method,
+            gather_output=False,
+            bias=False,
+            skip_bias_add=False,
+            is_expert=False,
+            tp_comm_buffer_name='k_up_proj',
+            tp_group=pg_collection.tp,
+        )
+        self.linear_v_up_proj = build_module(
+            submodules.linear_v_up_proj,
+            self.config.kv_lora_rank,
+            self.config.num_attention_heads * self.config.v_head_dim,
+            config=self.config,
+            init_method=self.config.init_method,
+            gather_output=False,
+            bias=False,
+            skip_bias_add=False,
+            is_expert=False,
+            tp_comm_buffer_name='v_up_proj',
+            tp_group=pg_collection.tp,
+        )
+
+        if self.config.q_lora_rank is not None:
+            self.q_layernorm = build_module(
+                submodules.q_layernorm,
+                hidden_size=self.config.q_lora_rank,
+                config=self.config,
+                eps=self.config.layernorm_epsilon,
+            )
+
+        self.kv_layernorm = build_module(
+            submodules.kv_layernorm,
+            hidden_size=self.config.kv_lora_rank,
+            config=self.config,
+            eps=self.config.layernorm_epsilon,
+        )
+
+        # Output.
+        self.linear_proj = build_module(
+            submodules.linear_proj,
+            self.query_projection_size,
+            self.config.hidden_size,
+            config=self.config,
+            init_method=self.config.output_layer_init_method,
+            bias=self.config.add_bias_linear,
+            input_is_parallel=True,
+            skip_bias_add=True,
+            is_expert=False,
+            tp_comm_buffer_name='proj',
+            tp_group=self.pg_collection.tp,
+        )
+
+        if (
+            HAVE_TE
+            and isinstance(self.linear_proj, TELinear)
+            and (
+                (
+                    self.config.fp8
+                    and self.config.fp8_recipe != 'delayed'
+                    and is_te_min_version("2.6.0dev0")
+                )
+                or (self.config.fp4 and is_te_min_version("2.7.0.dev0"))
+            )
+        ):
+            # For fp8/fp4 training, the output of the fused core_attn is saved by itself, and
+            # linear_proj also saves the quantized tensor of this output. Here we set the
+            # linear_proj to save the original input tensors to avoid the extra memory usage of
+            # the quantized tensor.
+            set_save_original_input(self.linear_proj)
+
+    def get_query_key_value_tensors(
+        self,
+        hidden_states,
+        key_value_states=None,
+        packed_seq_params=None,
+        inference_context=None,
+        *,
+        inference_params=None,
+    ):
+        """
+        Derives absorbed q, compressed q, and compressed kv tensors from `hidden_states`.
+        """
+        # s = sequence length, b = batch size, h = hidden size
+        assert (
+            hidden_states.ndim == 3
+        ), f"hidden_states should be 3D, [s, b, h], got {hidden_states.ndim}D"
+        if packed_seq_params is not None:
+            assert (
+                packed_seq_params.local_cp_size is None
+            ), "dynamic context parallel is not supported with MLA yet and is planned for future. \
+            Please disable dynamic context parallel."
+
+        inference_context = deprecate_inference_params(inference_context, inference_params)
+
+        # =========================================
+        # Prepare RoPE and seqlen related params
+        # =========================================
+        rotary_seq_len = self.rotary_pos_emb.get_rotary_seq_len(
+            inference_context, None, hidden_states, self.config, packed_seq_params
+        )
+
+        mscale = 1.0
+        rotary_pos_cos = None
+        rotary_pos_sin = None
+        packed_seq = packed_seq_params is not None and packed_seq_params.qkv_format == 'thd'
+        if self.config.rope_type == "rope":
+            rotary_pos_emb = self.rotary_pos_emb(rotary_seq_len, packed_seq=packed_seq)
+        else:
+            if self.config.apply_rope_fusion:
+                rotary_pos_cos, rotary_pos_sin = self.rotary_pos_emb.get_cached_cos_sin(
+                    rotary_seq_len, dtype=hidden_states.dtype, packed_seq=packed_seq
+                )
+                rotary_pos_emb = None
+                assert inference_context is None, "Inference with MLA RoPE fusion is not supported"
+                assert (
+                    fused_apply_mla_rope_for_q is not None
+                    and fused_apply_mla_rope_for_kv is not None
+                ), "Fused MLA RoPE apply is not imported successfully"
+            else:
+                rotary_pos_emb, mscale = self.rotary_pos_emb(rotary_seq_len, packed_seq=packed_seq)
+
+        if packed_seq_params is not None and packed_seq_params.qkv_format == 'thd':
+            if packed_seq_params.cu_seqlens_q_padded is not None:
+                cu_seqlens_q = packed_seq_params.cu_seqlens_q_padded
+            else:
+                cu_seqlens_q = packed_seq_params.cu_seqlens_q
+            if packed_seq_params.cu_seqlens_kv_padded is not None:
+                cu_seqlens_kv = packed_seq_params.cu_seqlens_kv_padded
+            else:
+                cu_seqlens_kv = packed_seq_params.cu_seqlens_kv
+        else:
+            cu_seqlens_q = cu_seqlens_kv = None
+
+        # =========================================
+        # Q down projection
+        # =========================================
+        if self.config.q_lora_rank is not None:
+            # if linear_q_down_proj is ColumnParallelLinear:
+            #     q_compressed: [s, b, q_lora_rank / TP]
+            # elif linear_q_down_proj is Linear:
+            #     q_compressed: [s / TP, b, q_lora_rank]
+            q_compressed, _ = self.linear_q_down_proj(hidden_states)
+
+            # When output is sharded (ColumnParallelLinear), two things are needed to be
+            # identical to a normal Linear.
+            #   1. Manually gather output to restore output dim q_lora_rank;
+            #   2. Scatter sequence back to s / TP if sequence-parallel since it was
+            #      gathered by ColumnParallelLinear.
+            if q_compressed.size(-1) != self.config.q_lora_rank:
+                q_compressed = gather_from_tensor_model_parallel_region(q_compressed)
+                if self.config.sequence_parallel:
+                    q_compressed = scatter_to_sequence_parallel_region(q_compressed)
+        else:
+            q_compressed = hidden_states
+
+        # =========================================
+        # KV down projection
+        # =========================================
+        # if linear_kv_down_proj is ColumnParallelLinear:
+        #     kv_combined: [s, b, (kv_lora_rank + qk_pos_emb_head_dim) / TP]
+        # elif linear_kv_down_proj is Linear:
+        #     kv_combined: [s / TP, b, (kv_lora_rank + qk_pos_emb_head_dim)]
+        kv_combined, _ = self.linear_kv_down_proj(hidden_states)
+        if kv_combined.size(-1) != self.config.kv_lora_rank + self.config.qk_pos_emb_head_dim:
+            # kv_combined: [s, b, (kv_lora_rank + qk_pos_emb_head_dim)]
+            kv_combined = gather_from_tensor_model_parallel_region(kv_combined)
+            # kv_compressed:[s, b, kv_lora_rank], k_pos_emb: [s, b, qk_pos_emb_head_dim]
+            kv_compressed, k_pos_emb = torch.split(
+                kv_combined, [self.config.kv_lora_rank, self.config.qk_pos_emb_head_dim], dim=-1
+            )
+            if self.config.sequence_parallel:
+                # kv_compressed:[s / TP, b, kv_lora_rank]
+                kv_compressed = scatter_to_sequence_parallel_region(kv_compressed)
+        else:
+            # kv_compressed:[s / TP, b, kv_lora_rank], k_pos_emb: [s / TP, b, qk_pos_emb_head_dim]
+            kv_compressed, k_pos_emb = torch.split(
+                kv_combined, [self.config.kv_lora_rank, self.config.qk_pos_emb_head_dim], dim=-1
+            )
+            if get_pg_size(self.tp_group) > 1 and self.config.sequence_parallel:
+                # k_pos_emb: [s, b, qk_pos_emb_head_dim]
+                k_pos_emb = gather_from_sequence_parallel_region(k_pos_emb, group=self.tp_group)
+
+        if packed_seq_params is not None:
+            assert q_compressed.ndim == 3 and q_compressed.size(1) == 1
+            assert kv_compressed.ndim == 3 and kv_compressed.size(1) == 1
+            assert k_pos_emb.ndim == 3 and k_pos_emb.size(1) == 1
+            # If sequence packing, TE expect [t, h, d] shaped qkv input.
+            # In Megatron-Core, the qkv shape is [t, 1, h, d].
+            # So we need to reshape qkv from [t, 1, h, d] to [t, h, d].
+            q_compressed = q_compressed.squeeze(1)
+            kv_compressed = kv_compressed.squeeze(1)
+            k_pos_emb = k_pos_emb.squeeze(1)
+
+        # =========================================
+        # Apply norm
+        # =========================================
+        if self.config.q_lora_rank is not None:
+            # q_compressed: [num_tokens, q_lora_rank]
+            q_compressed = self.q_layernorm(q_compressed)
+
+        kv_compressed = self.kv_layernorm(kv_compressed)
+        # Because we won't apply V up projection to the compressed KV, so we need to gather it
+        # manually.
+        if get_pg_size(self.tp_group) > 1 and self.config.sequence_parallel:
+            kv_compressed = gather_from_sequence_parallel_region(kv_compressed, group=self.tp_group)
+
+        # =========================================
+        # QKV up projection and RoPE apply
+        # =========================================
+
+        def qkv_up_proj_and_rope_apply(q_compressed, kv_compressed, k_pos_emb, rotary_pos_emb):
+            """
+            Apply the up projection and RoPE to the query and key.
+            When sequence packing enabled, the input tensors adopt a packed shape of [t, ...];
+            otherwise, they maintain the unpacked shape [s, b, ...]. In subsequent code comments,
+            we uniformly use [num_tokens, ...] to denote [s, b, ...] or [t, ...] for two cases.
+            """
+            if self.config.q_lora_rank is not None:
+                # q_compressed: [num_tokens, q_lora_rank]
+                # q: [num_tokens, n * (qk_head_dim + qk_pos_emb_head_dim)]
+                q, _ = self.linear_q_up_proj(q_compressed)
+            else:
+                # q_compressed: [num_tokens, hidden_size]
+                # q: [num_tokens, n * (qk_head_dim + qk_pos_emb_head_dim)]
+                q, _ = self.linear_q_proj(q_compressed)
+
+            # q: [num_tokens, n, q_head_dim]
+            q = q.view(*q.size()[:-1], self.num_attention_heads_per_partition, self.q_head_dim)
+
+            # [num_tokens, kv_lora_rank] -> [num_tokens, 1, kv_lora_rank]
+            kv_compressed = torch.unsqueeze(kv_compressed, -2)
+            # [num_tokens, qk_pos_emb_head_dim] -> [num_tokens, 1, qk_pos_emb_head_dim]
+            k_pos_emb = torch.unsqueeze(k_pos_emb, -2)
+
+            # Prepare k_up_weight for absorption
+            # k_up_weight: linear_k_up_proj.weight viewed as [n, qk_head_dim, kv_lora_rank]
+            assert self.linear_k_up_proj.weight.size(0) == (
+                self.num_attention_heads_per_partition * self.config.qk_head_dim
+            )
+            assert self.linear_k_up_proj.weight.size(1) == self.config.kv_lora_rank
+            k_up_weight = self.linear_k_up_proj.weight.view(
+                self.num_attention_heads_per_partition,
+                self.config.qk_head_dim,
+                self.config.kv_lora_rank,
+            )
+
+            if self.config.apply_rope_fusion:
+                # q_no_pe: [num_tokens, n, qk_head_dim]
+                # q_pos_emb: [num_tokens, n, qk_pos_emb_head_dim]
+                q_no_pe, q_pos_emb = torch.split(
+                    q, [self.config.qk_head_dim, self.config.qk_pos_emb_head_dim], dim=-1
+                )
+
+                # Absorb k_up_weight into q_no_pe
+                # q_absorbed: [num_tokens, n, kv_lora_rank]
+                q_absorbed = torch.einsum("...nd,ndk->...nk", q_no_pe, k_up_weight)
+                q_absorbed = q_absorbed.contiguous()
+                assert q_absorbed.ndim == q.ndim
+                assert q_absorbed.shape[:-1] == q.shape[:-1]
+                assert q_absorbed.size(-1) == self.config.kv_lora_rank
+
+                # q_absorbed: [num_tokens, n, (kv_lora_rank + qk_pos_emb_head_dim)]
+                q_absorbed = torch.cat([q_absorbed, q_pos_emb], dim=-1)
+                # kv_compressed: [num_tokens, 1, (kv_lora_rank + qk_pos_emb_head_dim)]
+                kv_compressed = torch.cat([kv_compressed, k_pos_emb], dim=-1)
+
+                cp_rank = self.pg_collection.cp.rank()
+                cp_size = self.pg_collection.cp.size()
+                q_absorbed = fused_apply_mla_rope_for_q(
+                    q_absorbed,
+                    rotary_pos_cos,
+                    rotary_pos_sin,
+                    self.config.kv_lora_rank,
+                    self.config.qk_pos_emb_head_dim,
+                    cu_seqlens_q,
+                    cp_rank,
+                    cp_size,
+                )
+                kv_compressed = fused_apply_mla_rope_for_q(
+                    kv_compressed,
+                    rotary_pos_cos,
+                    rotary_pos_sin,
+                    self.config.kv_lora_rank,
+                    self.config.qk_pos_emb_head_dim,
+                    cu_seqlens_kv,
+                    cp_rank,
+                    cp_size,
+                )
+            else:
+                q_len = q.size()[0]
+                if inference_context is not None:
+                    # add offset to the sequence start for inference
+                    sequence_start = inference_context.sequence_len_offset
+                    sequence_end = sequence_start + q_len
+                    rotary_pos_emb = rotary_pos_emb[sequence_start:sequence_end]
+                elif packed_seq_params is None or self.config.context_parallel_size == 1:
+                    # Shorten rotary_pos_emb to the sequence length when inference_params
+                    # is not provided. This makes sure we can run forward directly with
+                    # any sequence length. During training, the sequence length is always
+                    # the full rotary_pos_emb length, except for sequence packing + CP.
+                    # When sequence packing and context parallel are both enabled, the
+                    # position embedding will not split rotary_pos_emb, so it may exceed
+                    # the sequence length on this CP rank, but we need the full rotary_pos_emb
+                    # to cover the full sequence, so we do not shorten it here.
+                    rotary_pos_emb = rotary_pos_emb[0:q_len]
+
+                # q_no_pe: [num_tokens, n, qk_head_dim]
+                # q_pos_emb: [num_tokens, n, qk_pos_emb_head_dim]
+                q_no_pe, q_pos_emb = torch.split(
+                    q, [self.config.qk_head_dim, self.config.qk_pos_emb_head_dim], dim=-1
+                )
+
+                # Absorb k_up_weight into q_no_pe
+                # q_absorbed: [num_tokens, n, kv_lora_rank]
+                q_absorbed = torch.einsum("...nd,ndk->...nk", q_no_pe, k_up_weight)
+                q_absorbed = q_absorbed.contiguous()
+                assert q_absorbed.ndim == q.ndim
+                assert q_absorbed.shape[:-1] == q.shape[:-1]
+                assert q_absorbed.size(-1) == self.config.kv_lora_rank
+
+                # Apply RoPE to q_pos_emb: [num_tokens, n, qk_pos_emb_head_dim]
+                q_pos_emb = apply_rotary_pos_emb(
+                    q_pos_emb,
+                    rotary_pos_emb,
+                    config=self.config,
+                    cu_seqlens=cu_seqlens_q,
+                    mscale=mscale,
+                    cp_group=self.pg_collection.cp,
+                )
+                # k_pos_emb:[num_tokens, 1, qk_pos_emb_head_dim]
+                k_pos_emb = apply_rotary_pos_emb(
+                    k_pos_emb,
+                    rotary_pos_emb,
+                    config=self.config,
+                    cu_seqlens=cu_seqlens_kv,
+                    mscale=mscale,
+                    cp_group=self.pg_collection.cp,
+                )
+
+                # query: [num_tokens, n, (kv_lora_rank + qk_pos_emb_head_dim)]
+                q_absorbed = torch.cat([q_absorbed, q_pos_emb], dim=-1)
+                # key: [num_tokens, 1, (kv_lora_rank + qk_pos_emb_head_dim)]
+                kv_compressed = torch.cat([kv_compressed, k_pos_emb], dim=-1)
+
+            assert q_absorbed.is_contiguous()
+            assert kv_compressed.is_contiguous()
+
+            return q_absorbed, kv_compressed
+
+        if self.recompute_up_proj:
+            quantization = self.config.fp8 or self.config.fp4
+            assert not quantization, "FP8/FP4 is not supported for AbsorbedMLA"
+            self.qkv_up_checkpoint = tensor_parallel.CheckpointWithoutOutput(fp8=quantization)
+            q_absorbed, kv_compressed = self.qkv_up_checkpoint.checkpoint(
+                qkv_up_proj_and_rope_apply, q_compressed, kv_compressed, k_pos_emb, rotary_pos_emb
+            )
+        else:
+            assert not self.cache_mla_latents, "cache_mla_latents is not supported for AbsorbedMLA"
+            q_absorbed, kv_compressed = qkv_up_proj_and_rope_apply(
+                q_compressed, kv_compressed, k_pos_emb, rotary_pos_emb
+            )
+
+        return q_absorbed, kv_compressed, q_compressed
+
+    def _checkpointed_attention_forward(
+        self,
+        q_absorbed,
+        k_compressed,
+        v_compressed,
+        hidden_states,
+        q_compressed,
+        attention_mask,
+        rotary_pos_emb=None,
+        attn_mask_type=None,
+        attention_bias=None,
+        packed_seq_params=None,
+    ):
+        """Forward method with selective activation checkpointing."""
+
+        def custom_forward(*inputs):
+            q_absorbed = inputs[0]
+            k_compressed = inputs[1]
+            v_compressed = inputs[2]
+            hidden_states = inputs[3]
+            q_compressed = inputs[4]
+            attention_mask = inputs[5]
+            attn_mask_type = inputs[7]
+            attention_bias = inputs[8]
+            packed_seq_params = inputs[9]
+            attn_mask_type = AttnMaskType(attn_mask_type.item())
+            output_ = self.core_attention(
+                q_absorbed,
+                k_compressed,
+                v_compressed,
+                hidden_states,
+                q_compressed,
+                attention_mask,
+                attn_mask_type=attn_mask_type,
+                attention_bias=attention_bias,
+                packed_seq_params=packed_seq_params,
+            )
+            return output_
+
+        if attn_mask_type is None:
+            attn_mask_type = self.attn_mask_type
+        attn_mask_type = torch.tensor([attn_mask_type.value], dtype=torch.int)
+        hidden_states = tensor_parallel.checkpoint(
+            custom_forward,
+            False,
+            q_absorbed,
+            k_compressed,
+            v_compressed,
+            hidden_states,
+            q_compressed,
+            attention_mask,
+            rotary_pos_emb,
+            attn_mask_type,
+            attention_bias,
+            packed_seq_params,
+        )
+
+        return hidden_states
+
+    def forward(
+        self,
+        hidden_states,
+        attention_mask,
+        key_value_states=None,
+        inference_context=None,
+        rotary_pos_emb=None,
+        rotary_pos_cos=None,
+        rotary_pos_sin=None,
+        rotary_pos_cos_sin=None,
+        attention_bias=None,
+        packed_seq_params=None,
+        sequence_len_offset=None,
+        *,
+        inference_params=None,
+    ):
+        """Forward pass for multi-latent attention with matrix absorption"""
+        assert rotary_pos_emb is None, "Rotary position embeddings should not be passed into MLA."
+        assert attention_bias is None, "Attention bias should not be passed into MLA."
+        assert (
+            rotary_pos_cos is None and rotary_pos_sin is None
+        ), "MLA does not support Flash Decoding"
+        assert not rotary_pos_cos_sin, "Flash-infer rope has not been tested with MLA."
+        assert not (
+            self.training and self.cache_mla_latents
+        ), "cache_mla_latents conflicts with training."
+        assert (
+            inference_context is None and inference_params is None
+        ), "Inference is not supported for AbsorbedMLA"
+
+        # =====================
+        # Query, Key, and Value
+        # =====================
+        q_absorbed, kv_compressed, q_compressed = self.get_query_key_value_tensors(
+            hidden_states, key_value_states, packed_seq_params, inference_context=inference_context
+        )
+
+        assert q_absorbed.is_contiguous()
+        assert q_compressed.is_contiguous()
+        assert kv_compressed.is_contiguous()
+
+        # ==================================
+        # Core attention computation
+        # ==================================
+        if self.checkpoint_core_attention and self.training:
+            core_attn_out = self._checkpointed_attention_forward(
+                q_absorbed,
+                kv_compressed,
+                None,
+                hidden_states,
+                q_compressed,
+                attention_mask,
+                packed_seq_params=packed_seq_params,
+            )
+        else:
+            core_attn_out = self.core_attention(
+                q_absorbed,
+                kv_compressed,
+                None,
+                hidden_states,
+                q_compressed,
+                attention_mask,
+                packed_seq_params=packed_seq_params,
+                attn_mask_type=self.attn_mask_type,
+            )
+
+        # ==================================
+        # Apply V up projection
+        # ==================================
+        assert self.linear_v_up_proj.weight.size(0) == (
+            self.num_attention_heads_per_partition * self.config.v_head_dim
+        )
+        assert self.linear_v_up_proj.weight.size(1) == self.config.kv_lora_rank
+        v_up_weight = self.linear_v_up_proj.weight.view(
+            self.num_attention_heads_per_partition, self.config.v_head_dim, self.config.kv_lora_rank
+        )
+        core_attn_out = core_attn_out.view(
+            *core_attn_out.shape[:-1],
+            self.num_attention_heads_per_partition,
+            self.config.kv_lora_rank,
+        )
+        core_attn_out = torch.einsum("...nc,ndc->...nd", core_attn_out, v_up_weight)
+        core_attn_out = core_attn_out.contiguous()
+        core_attn_out = core_attn_out.view(*core_attn_out.shape[:-2], -1)
+
+        if packed_seq_params is not None and packed_seq_params.qkv_format == 'thd':
+            core_attn_out = core_attn_out.unsqueeze(1)
+
+        assert core_attn_out.ndim == hidden_states.ndim
+        assert core_attn_out.shape[0] == (
+            hidden_states.shape[0] * self.config.tensor_model_parallel_size
+        ), (
+            f"{core_attn_out.shape[0]} != "
+            f"{hidden_states.shape[0]} * "
+            f"{self.config.tensor_model_parallel_size}"
+        )
+        assert core_attn_out.shape[1:-1] == hidden_states.shape[1:-1]
+        assert core_attn_out.size(-1) == (
+            self.config.v_head_dim * self.num_attention_heads_per_partition
+        )
+
+        if self.recompute_up_proj:
+            assert self.qkv_up_checkpoint is not None
+            self.qkv_up_checkpoint.discard_output_and_register_recompute(core_attn_out)
+            self.qkv_up_checkpoint = None
+
+        # =================
+        # Output. [sq, b, h]
+        # =================
+        output, bias = self.linear_proj(core_attn_out)
+
+        return output, bias
+
+    def backward_dw(self) -> NoReturn:
+        """Execute weight gradient computation."""
+        self._backward_kv_proj()
+        self._backward_q_proj()
+        self._backward_output_proj()
+
+    def _backward_kv_proj(self):
+        """Computes weight gradients of KV projection layers."""
+        self.linear_k_up_proj.backward_dw()
+        self.linear_v_up_proj.backward_dw()
+        self.linear_kv_down_proj.backward_dw()
+
+    def _backward_q_proj(self):
+        """Computes weight gradients of Q projection layers."""
+        if self.config.q_lora_rank is None:
+            self.linear_q_proj.backward_dw()
+        else:
+            self.linear_q_down_proj.backward_dw()
+            self.linear_q_up_proj.backward_dw()
+
+    def _backward_output_proj(self):
+        """Computes weight gradients of output projection layer."""
+        self.linear_proj.backward_dw()
+
+    def set_for_recompute_input_layernorm(self):
+        """Set the attention layer for recompute input_layernorm. Only needed for fp8/fp4."""
+        from megatron.core.extensions.transformer_engine import set_save_original_input
+
+        if self.config.q_lora_rank is not None:
+            set_save_original_input(self.linear_q_down_proj)
+        set_save_original_input(self.linear_kv_down_proj)
+
+    def clip_qk(self):
+        """
+        QK Clipping is a technique to clip the query and key attention logits to prevent the
+        attention logits from exploding. Per MuonClip usage, we update the weight by calling this
+        function after Muon optimizer step.
+        """
+        raise NotImplementedError("clip_qk is not implemented for AbsorbedMLA")
+
+    def _combine_kv_weights(self, k_weight, v_weight):
+        """Combine separate K and V weights into MLA's interleaved format.
+
+        MLA's linear_kv_up_proj weight layout (per head interleaved):
+            [head0_K, head0_V, head1_K, head1_V, ...]
+
+        AbsorbedMLA's separate weights layout:
+            K: [head0_K, head1_K, ...]
+            V: [head0_V, head1_V, ...]
+
+        This method interleaves K and V per head to match MLA's format.
+
+        Args:
+            k_weight: [num_heads_per_partition * qk_head_dim, kv_lora_rank]
+            v_weight: [num_heads_per_partition * v_head_dim, kv_lora_rank]
+
+        Returns:
+            combined: [num_heads_per_partition * (qk_head_dim + v_head_dim), kv_lora_rank]
+        """
+        n = self.num_attention_heads_per_partition
+        qk_dim = self.config.qk_head_dim
+        v_dim = self.config.v_head_dim
+        lora_rank = self.config.kv_lora_rank
+
+        # Reshape to per-head format
+        k_per_head = k_weight.view(n, qk_dim, lora_rank)
+        v_per_head = v_weight.view(n, v_dim, lora_rank)
+
+        # Concatenate K and V for each head along dim=1
+        # Result: [n, qk_dim + v_dim, lora_rank]
+        combined_per_head = torch.cat([k_per_head, v_per_head], dim=1)
+
+        # Reshape back to linear weight format
+        combined_weight = combined_per_head.view(n * (qk_dim + v_dim), lora_rank)
+
+        return combined_weight
+
+    def _split_kv_weights(self, combined_weight):
+        """Split MLA's interleaved KV weight into separate K and V weights.
+
+        MLA's linear_kv_up_proj weight layout (per head interleaved):
+            [head0_K, head0_V, head1_K, head1_V, ...]
+
+        This method extracts K and V into separate tensors:
+            K: [head0_K, head1_K, ...]
+            V: [head0_V, head1_V, ...]
+
+        Args:
+            combined_weight: [num_heads_per_partition * (qk_head_dim + v_head_dim), kv_lora_rank]
+
+        Returns:
+            k_weight: [num_heads_per_partition * qk_head_dim, kv_lora_rank]
+            v_weight: [num_heads_per_partition * v_head_dim, kv_lora_rank]
+        """
+        n = self.num_attention_heads_per_partition
+        qk_dim = self.config.qk_head_dim
+        v_dim = self.config.v_head_dim
+        lora_rank = self.config.kv_lora_rank
+
+        # Reshape to per-head format
+        combined_per_head = combined_weight.view(n, qk_dim + v_dim, lora_rank)
+
+        # Split K and V for each head (slicing creates non-contiguous views)
+        k_per_head = combined_per_head[:, :qk_dim, :]  # [n, qk_dim, lora_rank]
+        v_per_head = combined_per_head[:, qk_dim:, :]  # [n, v_dim, lora_rank]
+
+        # Make contiguous and reshape back to linear weight format
+        k_weight = k_per_head.contiguous().view(n * qk_dim, lora_rank)
+        v_weight = v_per_head.contiguous().view(n * v_dim, lora_rank)
+
+        return k_weight, v_weight
+
+    def _load_from_state_dict(self, state_dict, prefix, *args, **kwargs):
+        """Handle loading from checkpoints with combined KV up projection weights.
+
+        This method splits the combined 'linear_kv_up_proj.weight' (which has per-head
+        interleaved K and V) into separate 'linear_k_up_proj.weight' and 'linear_v_up_proj.weight'.
+        """
+        combined_key = f'{prefix}linear_kv_up_proj.weight'
+        k_up_key = f'{prefix}linear_k_up_proj.weight'
+        v_up_key = f'{prefix}linear_v_up_proj.weight'
+
+        # Split combined KV weights into separate K and V
+        if combined_key in state_dict:
+            combined_weight = state_dict[combined_key]
+
+            # Split with proper per-head de-interleaving
+            k_weight, v_weight = self._split_kv_weights(combined_weight)
+
+            state_dict[k_up_key] = k_weight
+            state_dict[v_up_key] = v_weight
+
+            del state_dict[combined_key]
+
+        combined_extra_state_key = f'{prefix}linear_kv_up_proj._extra_state'
+        k_up_extra_state_key = f'{prefix}linear_k_up_proj._extra_state'
+        v_up_extra_state_key = f'{prefix}linear_v_up_proj._extra_state'
+
+        if combined_extra_state_key in state_dict:
+            combined_extra_state = state_dict[combined_extra_state_key]
+
+            assert isinstance(combined_extra_state, torch.Tensor)
+            # Now we can only handle the case where the extra state is empty.
+            assert combined_extra_state.numel() == 0
+
+            state_dict[k_up_extra_state_key] = combined_extra_state.clone()
+            state_dict[v_up_extra_state_key] = combined_extra_state.clone()
+
+            del state_dict[combined_extra_state_key]
+
+        super()._load_from_state_dict(state_dict, prefix, *args, **kwargs)
diff --git a/tests/unit_tests/transformer/experimental_attention_variant/test_absorbed_mla.py b/tests/unit_tests/transformer/experimental_attention_variant/test_absorbed_mla.py
new file mode 100644
index 00000000000..4ed9ff8af46
--- /dev/null
+++ b/tests/unit_tests/transformer/experimental_attention_variant/test_absorbed_mla.py
@@ -0,0 +1,421 @@
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+import random
+from types import SimpleNamespace
+from typing import List, Optional, Tuple
+
+import pytest
+import torch
+import torch.distributed as dist
+
+from megatron.core import parallel_state
+from megatron.core.extensions.transformer_engine_spec_provider import TESpecProvider
+from megatron.core.packed_seq_params import PackedSeqParams
+from megatron.core.tensor_parallel.random import model_parallel_cuda_manual_seed
+from megatron.core.transformer.enums import AttnMaskType
+from megatron.core.transformer.experimental_attention_variant.absorbed_mla import (
+    AbsorbedMLASelfAttention,
+    AbsorbedMLASelfAttentionSubmodules,
+)
+from megatron.core.transformer.identity_op import IdentityOp
+from megatron.core.transformer.multi_latent_attention import (
+    MLASelfAttention,
+    MLASelfAttentionSubmodules,
+)
+from megatron.core.utils import init_method_normal, scaled_init_method_normal
+from tests.unit_tests.test_utilities import Utils
+
+
+class MockCoreAttention(torch.nn.Module):
+    """Mock core attention for testing MLA computation flow."""
+
+    def __init__(self, *args, **kwargs):
+        super().__init__()
+        self.softmax_scale = kwargs.get("softmax_scale")
+        self.k_channels = kwargs.get("k_channels")
+        self.v_channels = kwargs.get("v_channels")
+        self.pg_collection = kwargs.get("pg_collection")
+
+    def forward(
+        self, q, k, v, *args, packed_seq_params: Optional[PackedSeqParams] = None, **kwargs
+    ):
+        """Mock forward pass."""
+        if packed_seq_params is None:
+            return self._forward_standard(q, k, v)
+        else:
+            return self._forward_thd(q, k, v, packed_seq_params)
+
+    def _forward_standard(self, q, k, v):
+        """Standard forward for [s, b, n, d] format."""
+        sq, b, n = q.shape[:3]
+        dtype = q.dtype
+        if v is None:
+            # Absorbed MLA
+            assert q.shape[-1] == self.k_channels
+            assert k.shape == (sq, b, 1, self.k_channels)
+            v = k[..., : self.v_channels]
+            k = k.expand(-1, -1, n, -1)
+            v = v.expand(-1, -1, n, -1)
+        else:
+            # Standard MLA
+            assert k.shape == q.shape
+            assert v.shape[:-1] == q.shape[:-1]
+
+        q = q.permute(1, 2, 0, 3).contiguous()
+        k = k.permute(1, 2, 3, 0).contiguous()
+        v = v.permute(1, 2, 0, 3).contiguous()
+
+        q = q.view(b * n, q.size(-2), q.size(-1)).float()
+        k = k.view(b * n, k.size(-2), k.size(-1)).float()
+        v = v.view(b * n, v.size(-2), v.size(-1)).float()
+
+        score = torch.bmm(q, k) * self.softmax_scale
+        score = torch.nn.functional.softmax(score, dim=-1, dtype=torch.float32)
+        out = torch.bmm(score, v)
+        out = out.to(dtype)
+        out = out.permute(1, 0, 2)
+        out = out.reshape(sq, b, -1)
+
+        return out
+
+    def _forward_thd(self, q, k, v, packed_seq_params):
+        """Forward for THD packed sequence format."""
+        cu_seqlens = packed_seq_params.cu_seqlens_q
+        num_seqs = len(cu_seqlens) - 1
+
+        sq, n = q.shape[:2]
+        dtype = q.dtype
+        if v is None:
+            # Absorbed MLA
+            assert q.shape[-1] == self.k_channels
+            assert k.shape == (sq, 1, self.k_channels)
+            v = k[..., : self.v_channels]
+            k = k.expand(-1, n, -1)
+            v = v.expand(-1, n, -1)
+        else:
+            # Standard MLA
+            assert k.shape == q.shape
+            assert v.shape[:-1] == q.shape[:-1]
+
+        out_list = []
+        for i in range(num_seqs):
+            start = cu_seqlens[i] // self.pg_collection.cp.size()
+            end = cu_seqlens[i + 1] // self.pg_collection.cp.size()
+            q_seq = q[start:end]
+            k_seq = k[start:end]
+            v_seq = v[start:end]
+
+            q_seq = q_seq.permute(1, 0, 2).contiguous().float()
+            k_seq = k_seq.permute(1, 2, 0).contiguous().float()
+            v_seq = v_seq.permute(1, 0, 2).contiguous().float()
+
+            score = torch.bmm(q_seq, k_seq) * self.softmax_scale
+            score = torch.nn.functional.softmax(score, dim=-1, dtype=torch.float32)
+            out = torch.bmm(score, v_seq)
+            out = out.to(dtype)
+            out = out.permute(1, 0, 2).contiguous()
+            out = out.reshape(out.shape[0], -1)
+            out_list.append(out)
+
+        return torch.cat(out_list, dim=0)
+
+
+def get_mock_mla_config(
+    tensor_model_parallel_size: int,
+    context_parallel_size: int,
+    sequence_parallel: bool,
+    recompute_mla_up_proj: bool,
+) -> SimpleNamespace:
+    """Create test config with all attributes used in MLA."""
+    return SimpleNamespace(
+        multi_latent_attention=True,
+        hidden_size=7168,
+        num_attention_heads=128,
+        q_lora_rank=1536,
+        kv_lora_rank=512,
+        qk_head_dim=128,
+        qk_pos_emb_head_dim=64,
+        v_head_dim=128,
+        add_bias_linear=False,
+        bf16=True,
+        params_dtype=torch.bfloat16,
+        layernorm_epsilon=1e-5,
+        normalization="RMSNorm",
+        layernorm_zero_centered_gamma=False,
+        expert_model_parallel_size=1,
+        tensor_model_parallel_size=tensor_model_parallel_size,
+        sequence_parallel=tensor_model_parallel_size > 1 and sequence_parallel,
+        context_parallel_size=context_parallel_size,
+        apply_rope_fusion=False,
+        rope_type="yarn",
+        rotary_scaling_factor=40,
+        mscale=1.0,
+        mscale_all_dim=1.0,
+        rotary_base=10000,
+        original_max_position_embeddings=4096,
+        beta_fast=32,
+        beta_slow=1,
+        rotary_interleaved=False,
+        recompute_granularity="selective" if recompute_mla_up_proj else None,
+        recompute_modules=["mla_up_proj"] if recompute_mla_up_proj else [],
+        fine_grained_activation_offloading=False,
+        gradient_accumulation_fusion=False,
+        fp8=False,
+        fp4=False,
+        init_method=init_method_normal(0.02),
+        output_layer_init_method=scaled_init_method_normal(0.02, 61, multiplier=2.0),
+        kv_channels=56,
+        num_query_groups=128,
+        batch_invariant_mode=False,
+        cache_mla_latents=False,
+        use_cpu_initialization=False,
+        perform_initialization=True,
+        symmetric_ar_type=None,
+        disable_parameter_transpose_cache=False,
+        init_model_with_meta_device=False,
+        delay_wgrad_compute=False,
+        tp_comm_overlap=False,
+        experimental_attention_variant=None,
+        softmax_scale=None,
+    )
+
+
+def get_absorbed_mla_submodules(
+    down_proj_use_column_parallel: bool, qk_layernorm: bool, rms_norm: bool
+) -> AbsorbedMLASelfAttentionSubmodules:
+    """Get submodules for AbsorbedMLASelfAttention testing."""
+    backend = TESpecProvider()
+    linear_q_down_proj = (
+        backend.column_parallel_linear() if down_proj_use_column_parallel else backend.linear()
+    )
+    linear_kv_down_proj = (
+        backend.column_parallel_linear() if down_proj_use_column_parallel else backend.linear()
+    )
+    qk_norm = backend.layer_norm(rms_norm=rms_norm, for_qk=True) if qk_layernorm else IdentityOp
+    return AbsorbedMLASelfAttentionSubmodules(
+        linear_q_proj=backend.column_parallel_linear(),
+        linear_q_down_proj=linear_q_down_proj,
+        linear_q_up_proj=backend.column_parallel_linear(),
+        linear_kv_down_proj=linear_kv_down_proj,
+        linear_k_up_proj=backend.column_parallel_linear(),
+        linear_v_up_proj=backend.column_parallel_linear(),
+        core_attention=MockCoreAttention,
+        linear_proj=backend.row_parallel_linear(),
+        q_layernorm=qk_norm,
+        kv_layernorm=qk_norm,
+    )
+
+
+def get_mla_submodules(
+    down_proj_use_column_parallel: bool, qk_layernorm: bool, rms_norm: bool
+) -> MLASelfAttentionSubmodules:
+    """Get submodules for AbsorbedMLASelfAttention testing."""
+    backend = TESpecProvider()
+    linear_q_down_proj = (
+        backend.column_parallel_linear() if down_proj_use_column_parallel else backend.linear()
+    )
+    linear_kv_down_proj = (
+        backend.column_parallel_linear() if down_proj_use_column_parallel else backend.linear()
+    )
+    qk_norm = backend.layer_norm(rms_norm=rms_norm, for_qk=True) if qk_layernorm else IdentityOp
+    return MLASelfAttentionSubmodules(
+        linear_q_proj=backend.column_parallel_linear(),
+        linear_q_down_proj=linear_q_down_proj,
+        linear_q_up_proj=backend.column_parallel_linear(),
+        linear_kv_down_proj=linear_kv_down_proj,
+        linear_kv_up_proj=backend.column_parallel_linear(),
+        core_attention=MockCoreAttention,
+        linear_proj=backend.row_parallel_linear(),
+        q_layernorm=qk_norm,
+        kv_layernorm=qk_norm,
+    )
+
+
+# TODO: Consider using get_gpt_layer_with_transformer_engine_spec from
+#       megatron.core.models.gpt.gpt_layer_specs to simplify submodule setup and cover real specs.
+# TODO: Add test case to cover TP > 1 but SP = False.
+
+
+@pytest.mark.parametrize("tp_cp_sp", [[1, 1, False], [2, 1, True], [1, 2, False], [2, 2, True]])
+@pytest.mark.parametrize("qkv_format", ['sbhd', 'thd'])
+@pytest.mark.parametrize("down_proj_use_column_parallel", [False, True])
+@pytest.mark.parametrize("recompute_mla_up_proj", [False, True])
+def test_functionality(
+    tp_cp_sp: List,
+    qkv_format: str,
+    down_proj_use_column_parallel: bool,
+    recompute_mla_up_proj: bool,
+):
+    """Test that AbsorbedMLASelfAttention is equivalent to standard MLA."""
+    tp_size, cp_size, sp = tp_cp_sp
+    Utils.initialize_model_parallel(
+        tensor_model_parallel_size=tp_size, context_parallel_size=cp_size
+    )
+    model_parallel_cuda_manual_seed(123)
+
+    # Create model
+    config = get_mock_mla_config(
+        tensor_model_parallel_size=tp_size,
+        context_parallel_size=cp_size,
+        sequence_parallel=sp,
+        recompute_mla_up_proj=recompute_mla_up_proj,
+    )
+    absorbed_submodules = get_absorbed_mla_submodules(
+        down_proj_use_column_parallel=down_proj_use_column_parallel,
+        qk_layernorm=True,
+        rms_norm=True,
+    )
+    standard_submodules = get_mla_submodules(
+        down_proj_use_column_parallel=down_proj_use_column_parallel,
+        qk_layernorm=True,
+        rms_norm=True,
+    )
+    absorbed_mla = AbsorbedMLASelfAttention(
+        config=config,
+        submodules=absorbed_submodules,
+        layer_number=0,
+        attn_mask_type=AttnMaskType.causal,
+        cp_comm_type="all_gather" if cp_size > 1 else None,
+        pg_collection=None,
+    ).cuda()
+    standard_mla = MLASelfAttention(
+        config=config,
+        submodules=standard_submodules,
+        layer_number=0,
+        attn_mask_type=AttnMaskType.causal,
+        cp_comm_type="all_gather" if cp_size > 1 else None,
+        pg_collection=None,
+    ).cuda()
+
+    state_dict = standard_mla.state_dict()
+    absorbed_mla.load_state_dict(state_dict)
+
+    # Prepare random data
+    if qkv_format == 'thd':
+        # Create random seqlens
+        num_seqs, min_len, max_len = 3, 128, 1024
+        divisor = tp_size * cp_size * 2
+        random.seed(42)
+        seqlens = [random.randint(min_len, max_len) // divisor * divisor for _ in range(num_seqs)]
+        # Create cumulative sequence lengths
+        cu_seqlens = [0]
+        for length in seqlens:
+            cu_seqlens.append(cu_seqlens[-1] + length)
+        total_tokens = cu_seqlens[-1]
+        cu_seqlens = torch.IntTensor(cu_seqlens).cuda()
+        max_seqlen = max(seqlens)
+        # Create packed sequence parameters
+        packed_seq_params = PackedSeqParams(
+            cu_seqlens_q=cu_seqlens,
+            cu_seqlens_q_padded=cu_seqlens,
+            cu_seqlens_kv=cu_seqlens,
+            cu_seqlens_kv_padded=cu_seqlens,
+            max_seqlen_q=max_seqlen,
+            max_seqlen_kv=max_seqlen,
+            qkv_format='thd',
+        )
+        hidden_states = torch.randn(
+            (total_tokens // cp_size // (tp_size if sp else 1), 1, config.hidden_size),
+            dtype=torch.bfloat16,
+            device='cuda',
+        )
+        grads = torch.randn_like(hidden_states)
+    else:
+        # When SP is enabled, sequence is sharded across TP ranks
+        # When SP is disabled, each TP rank has the full sequence
+        seqlen = 1024 // cp_size // (tp_size if sp else 1)
+        hidden_states = torch.randn((seqlen, 3, 7168), dtype=torch.bfloat16, device='cuda')
+        grads = torch.randn_like(hidden_states)
+        packed_seq_params = None
+
+    # Forward & Backward
+    for name, param in absorbed_mla.named_parameters():
+        if param.grad is not None:
+            param.grad.zero_()
+    absorbed_outputs, _ = absorbed_mla(
+        hidden_states, attention_mask=None, packed_seq_params=packed_seq_params
+    )
+    absorbed_outputs.backward(grads)
+
+    for name, param in standard_mla.named_parameters():
+        if param.grad is not None:
+            param.grad.zero_()
+    standard_outputs, _ = standard_mla(
+        hidden_states, attention_mask=None, packed_seq_params=packed_seq_params
+    )
+    standard_outputs.backward(grads)
+
+    def _calculate_tensor_similarity(x, y):
+        x, y = x.data.double(), y.data.double()
+        denominator = (x * x + y * y).sum()
+        if denominator == 0:
+            return 1
+        sim = 2 * (x * y).sum() / denominator
+        return sim
+
+    # Compute cosine similarity
+    absorbed_flat = absorbed_outputs.flatten().float()
+    standard_flat = standard_outputs.flatten().float()
+    cosine_sim = torch.nn.functional.cosine_similarity(
+        absorbed_flat.unsqueeze(0), standard_flat.unsqueeze(0)
+    ).item()
+    assert cosine_sim > 0.9999, f"output cosine similarity = {cosine_sim} < 0.9999"
+    assert _calculate_tensor_similarity(absorbed_outputs, standard_outputs) > 0.9999
+    torch.testing.assert_close(absorbed_outputs, standard_outputs, atol=5e-3, rtol=5e-3)
+
+    for name, param in absorbed_mla.named_parameters():
+        assert param.grad is not None
+    for name, param in standard_mla.named_parameters():
+        assert param.grad is not None
+
+    # Compare gradients with cosine similarity
+    absorbed_grads = dict(absorbed_mla.named_parameters())
+    standard_grads = dict(standard_mla.named_parameters())
+
+    # Map parameter names between absorbed and standard MLA
+    # Most parameters have the same name, except for K/V up proj
+    for name, param in standard_grads.items():
+        if 'linear_kv_up_proj' in name:
+            # Special handling: combine k and v up proj grads from absorbed_mla
+            k_name = name.replace('linear_kv_up_proj', 'linear_k_up_proj')
+            v_name = name.replace('linear_kv_up_proj', 'linear_v_up_proj')
+
+            k_grad = absorbed_grads[k_name].grad
+            v_grad = absorbed_grads[v_name].grad
+
+            # Combine k and v grads (interleaved by head)
+            # k_grad: [n * qk_head_dim, kv_lora_rank]
+            # v_grad: [n * v_head_dim, kv_lora_rank]
+            # combined: [n * (qk_head_dim + v_head_dim), kv_lora_rank]
+            n_heads = absorbed_mla.num_attention_heads_per_partition
+            qk_head_dim = absorbed_mla.config.qk_head_dim
+            v_head_dim = absorbed_mla.config.v_head_dim
+            kv_lora_rank = absorbed_mla.config.kv_lora_rank
+
+            k_grad_3d = k_grad.view(n_heads, qk_head_dim, kv_lora_rank)
+            v_grad_3d = v_grad.view(n_heads, v_head_dim, kv_lora_rank)
+            combined_grad_3d = torch.cat([k_grad_3d, v_grad_3d], dim=1)
+            combined_grad = combined_grad_3d.view(-1, kv_lora_rank)
+
+            absorbed_grad_flat = combined_grad.flatten().float()
+            standard_grad_flat = param.grad.flatten().float()
+
+            cos_sim = torch.nn.functional.cosine_similarity(
+                absorbed_grad_flat.unsqueeze(0), standard_grad_flat.unsqueeze(0)
+            ).item()
+            assert cos_sim > 0.9999, f"name: {name}, cosine similarity = {cos_sim} < 0.9999"
+            assert _calculate_tensor_similarity(combined_grad, param.grad) > 0.9999
+        else:
+            absorbed_grad = absorbed_grads[name].grad
+            standard_grad = param.grad
+
+            absorbed_grad_flat = absorbed_grad.flatten().float()
+            standard_grad_flat = standard_grad.flatten().float()
+
+            cos_sim = torch.nn.functional.cosine_similarity(
+                absorbed_grad_flat.unsqueeze(0), standard_grad_flat.unsqueeze(0)
+            ).item()
+            assert cos_sim > 0.9999, f"name: {name}, cosine similarity = {cos_sim} < 0.9999"
+            assert _calculate_tensor_similarity(absorbed_grad, standard_grad) > 0.9999
+
+    Utils.destroy_model_parallel()
diff --git a/tests/unit_tests/transformer/test_attention_variant_dsa.py b/tests/unit_tests/transformer/experimental_attention_variant/test_attention_variant_dsa.py
similarity index 100%
rename from tests/unit_tests/transformer/test_attention_variant_dsa.py
rename to tests/unit_tests/transformer/experimental_attention_variant/test_attention_variant_dsa.py

From 9f2ca96bd1e80990a79114cda40d6b2473f4fb65 Mon Sep 17 00:00:00 2001
From: Charlie Truong <chtruong@nvidia.com>
Date: Fri, 13 Feb 2026 15:25:23 -0600
Subject: [PATCH 289/334] cp: Remove gpu sanity check (#3420) into dev (#3421)

Signed-off-by: Charlie Truong <chtruong@nvidia.com>
---
 .github/actions/action.yml | 32 --------------------------------
 1 file changed, 32 deletions(-)

diff --git a/.github/actions/action.yml b/.github/actions/action.yml
index 4a838b24d95..decaa5ff3f8 100644
--- a/.github/actions/action.yml
+++ b/.github/actions/action.yml
@@ -58,38 +58,6 @@ runs:
       shell: bash -x -e -u -o pipefail {0}
       run: echo "node_name=$NODE_NAME" | tee -a "$GITHUB_OUTPUT"
 
-    - name: GPU Sanity Check
-      shell: bash -x -e -u -o pipefail {0}
-      run: |
-        echo "Starting GPU Sanity Check..."
-
-        # 1. Check for active Compute Processes
-        # query-compute-apps returns a list of PIDs using the GPU. If empty, we are good.
-        OPEN_PROCESSES=$(docker run --rm --gpus all ubuntu nvidia-smi --query-compute-apps=pid,process_name --format=csv,noheader)
-
-        if [ -n "$OPEN_PROCESSES" ]; then
-          echo "::error::❌ GPU is not clean! Found active processes:"
-          echo "$OPEN_PROCESSES"
-        else
-          echo "✅ No active compute processes found."
-        fi
-
-        # 2. Check VRAM Usage (Optional but recommended)
-        # We allow a small buffer (e.g., < 300MiB) for driver overhead/Xorg, 
-        # though on headless K8s nodes this should be very close to 0.
-
-        MEMORY_USAGES=$(docker run --rm --gpus all ubuntu nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits)
-
-        # Check each GPU visible to the container
-        for MEMORY in $MEMORY_USAGES; do
-          if [ "$MEMORY" -gt 300 ]; then
-            echo "::error::❌ GPU VRAM usage is suspiciously high: ${MEMORY} MiB"
-          fi
-        done
-
-        echo "✅ GPU Memory is clean (all < 300 MiB)."
-        echo "Ready to start workflow."
-
     - name: Checkout repository
       uses: actions/checkout@v2
 

From 1dcf0dafa884ad52ffb243625717a3471643e087 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Sat, 14 Feb 2026 01:28:50 +0100
Subject: [PATCH 290/334] [dev] ci: Fix merge queue (#3385)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 .github/workflows/cicd-main.yml | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml
index cd8091f6edf..3aff7995099 100644
--- a/.github/workflows/cicd-main.yml
+++ b/.github/workflows/cicd-main.yml
@@ -38,7 +38,6 @@ env:
 jobs:
   is-not-external-contributor:
     runs-on: ubuntu-latest
-    environment: nemo-ci
     if: github.repository == 'NVIDIA/Megatron-LM'
     outputs:
       is_external_contributor: ${{ github.event.pull_request.user.type == 'User' }}
@@ -73,15 +72,11 @@ jobs:
         id: check-membership
         env:
           IS_MAIN_BRANCH: ${{ github.ref == 'refs/heads/main' }}
-          IS_DEV_BRANCH: ${{ github.ref == 'refs/heads/dev' }}
           IS_MERGE_GROUP: ${{ github.event_name == 'merge_group' }}
           SCHEDULED_JOB: ${{ github.event_name == 'schedule' }}
         run: |
           # Skip SSO check for scheduled jobs, main branch, dev branch, or merge groups
           if [ "${{ env.SCHEDULED_JOB }}" == "true" ] || [ "${IS_MAIN_BRANCH}" == "true" ] || [ "${IS_DEV_BRANCH}" == "true" ] || [ "${IS_MERGE_GROUP}" == "true" ]; then
-            echo "is_maintainer=true" | tee -a $GITHUB_OUTPUT
-            exit 0
-          fi
 
           # Use SSO membership check result
           IS_MEMBER="${{ steps.check-sso.outputs.is_member }}"
@@ -410,7 +405,6 @@ jobs:
       - cicd-wait-in-queue
       - cicd-container-build
       - cicd-unit-tests-latest
-    environment: nemo-ci
     if: |
       needs.pre-flight.result != 'cancelled'
       && needs.cicd-wait-in-queue.result != 'cancelled'
@@ -599,7 +593,6 @@ jobs:
       && needs.pre-flight.outputs.is_ci_workload == 'false'
       && !cancelled()
       && github.repository == 'NVIDIA/Megatron-LM'
-    environment: nemo-ci
     steps:
       - name: Generate fake coverage report
         uses: actions/github-script@v6

From cd1c215b956e09fad153e1034d2ea5ee70345234 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Mon, 23 Feb 2026 20:31:24 +0100
Subject: [PATCH 291/334] [dev] `cp: Cherrypick CI changes` (#3543)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 .github/copy-pr-bot.yaml                      |   2 +-
 .github/oncall_schedule.json                  |  24 +--
 .github/scripts/readme.sh                     |  65 ++++++
 .../workflows/_build_test_publish_wheel.yml   |   5 +-
 .github/workflows/_release_library.yml        |  87 ++++++--
 .github/workflows/_update_dependencies.yml    |  21 --
 .github/workflows/auto-assign-milestone.yml   |   1 -
 .github/workflows/auto-reminder-bot.yml       |   5 +-
 .github/workflows/auto-update-copy-pr-bot.yml |   3 +-
 .../workflows/build-test-publish-wheel.yml    |  12 +-
 .../workflows/cherry-pick-release-commit.yml  |   5 +-
 .github/workflows/cicd-approve-test-queue.yml |   6 +-
 .github/workflows/cicd-main.yml               | 114 ++++++++++-
 .../workflows/config/changelog-config.json    |  24 +++
 .github/workflows/copyright-check.yml         |  11 +-
 .github/workflows/dependabot.yml              |   8 +-
 .github/workflows/install-test.yml            |  12 +-
 .github/workflows/multi-approval-bot.yml      |  74 +++++++
 .github/workflows/oncall-rotation.yml         |   6 +-
 .github/workflows/release-docs.yml            |  48 ++++-
 .github/workflows/release-freeze.yml          |   4 +-
 .github/workflows/release.yaml                |  20 +-
 .github/workflows/sync-team-usergroups.yml    |   3 +-
 .github/workflows/trigger-mbridge-tests.yml   | 186 ++----------------
 24 files changed, 467 insertions(+), 279 deletions(-)
 create mode 100644 .github/scripts/readme.sh
 create mode 100644 .github/workflows/config/changelog-config.json
 create mode 100644 .github/workflows/multi-approval-bot.yml

diff --git a/.github/copy-pr-bot.yaml b/.github/copy-pr-bot.yaml
index f43437d19c0..d9ece17bd35 100644
--- a/.github/copy-pr-bot.yaml
+++ b/.github/copy-pr-bot.yaml
@@ -1,4 +1,4 @@
 enabled: true
 auto_sync_draft: false
 auto_sync_ready: true
-trustees_override: ["AAnoosheh", "ArEsKay3", "Autumn1998", "BestJuly", "BoxiangW", "ChenhanYu", "FDecaYed", "HaochenYuan", "ISEEKYAN", "JRD971000", "Phlip79", "QiZhangNV", "ShriyaRishab", "Victarry", "Wohox", "ZhiyuLi-Nvidia", "ahmadki", "aklife97", "ananthsub", "asolergi-nv", "buptzyb", "chtruong814", "cspades", "cuichenx", "deepakn94", "dimapihtar", "duncanriach", "erhoo82", "ericharper", "fanshiqing", "frsun-nvda", "gautham-kollu", "gdengk", "guyueh1", "hxbai", "jalbericiola", "janEbert", "jaredcasper", "jenchen13", "jiemingz", "jingqiny-99", "jkamalu", "jon-barker", "jstjohn", "kanz-nv", "kevalmorabia97", "ko3n1g", "kunlunl", "kvareddy", "kwyss-nvidia", "layalir", "lhb8125", "lmcafee-nvidia", "maanug-nv", "mathemakitten", "matthieule", "mehraakash", "mkhona-nvidia", "parthmannan", "prajwal1210", "pthombre", "rogerwaleffe", "sanandaraj5597", "sancha", "santhnm2", "sbak5", "shanmugamr1992", "shifangx", "shjwudp", "sidsingh-nvidia", "skyw", "sudhakarsingh27", "tdene", "theothermike", "thomasdhc", "trintamaki", "tylerpoon", "wdykas", "xiaoyao0115", "xuwchen", "yanring", "yaox12", "yaoyu-33", "yashaswikarnati", "yeyu-nvidia", "yobibyte", "youngeunkwon0405", "yuzhongw-nvidia", "zhongbozhu"]
+trustees_override: ["AAnoosheh", "ArEsKay3", "Autumn1998", "BestJuly", "BoxiangW", "ChenhanYu", "FDecaYed", "HaochenYuan", "ISEEKYAN", "JRD971000", "Phlip79", "QiZhangNV", "RPrenger", "ShriyaRishab", "Victarry", "Wohox", "ZhiyuLi-Nvidia", "ahmadki", "aklife97", "ananthsub", "asolergi-nv", "buptzyb", "chtruong814", "cspades", "cuichenx", "deepakn94", "dimapihtar", "dingqingy-nv", "duncanriach", "erhoo82", "ericharper", "fanshiqing", "frsun-nvda", "gautham-kollu", "gdengk", "guyueh1", "hxbai", "ilml", "jalbericiola", "janEbert", "jaredcasper", "jenchen13", "jiemingz", "jingqiny-99", "jkamalu", "jon-barker", "jstjohn", "kanz-nv", "kevalmorabia97", "ko3n1g", "kunlunl", "kvareddy", "kwyss-nvidia", "layalir", "lhb8125", "lmcafee-nvidia", "maanug-nv", "mathemakitten", "matthieule", "mehraakash", "mkhona-nvidia", "parthmannan", "prajwal1210", "pthombre", "rogerwaleffe", "sajadn", "sanandaraj5597", "sancha", "santhnm2", "sbak5", "shanmugamr1992", "sharathts", "shengf-nv", "shifangx", "shjwudp", "sidsingh-nvidia", "skyw", "sudhakarsingh27", "tdene", "theothermike", "thomasdhc", "trintamaki", "tylerpoon", "wdykas", "xiaoyao0115", "xuwchen", "yanring", "yaox12", "yaoyu-33", "yashaswikarnati", "yeyu-nvidia", "yobibyte", "youngeunkwon0405", "yueshen2016", "yuzhongw-nvidia", "zhongbozhu"]
diff --git a/.github/oncall_schedule.json b/.github/oncall_schedule.json
index 5fa49e966bc..58fcf0ddbbc 100644
--- a/.github/oncall_schedule.json
+++ b/.github/oncall_schedule.json
@@ -1,18 +1,6 @@
 [
-    {
-        "user": "dimapihtar",
-        "date": "2026-01-28"
-    },
-    {
-        "user": "gautham-kollu",
-        "date": "2026-02-04"
-    },
     {
         "user": "janEbert",
-        "date": "2026-02-11"
-    },
-    {
-        "user": "Phlip79",
         "date": "2026-02-18"
     },
     {
@@ -46,5 +34,17 @@
     {
         "user": "BoxiangW",
         "date": "2026-04-15"
+    },
+    {
+        "user": "Phlip79",
+        "date": "2026-04-22"
+    },
+    {
+        "user": "asolergi-nv",
+        "date": "2026-04-29"
+    },
+    {
+        "user": "dimapihtar",
+        "date": "2026-05-06"
     }
 ]
diff --git a/.github/scripts/readme.sh b/.github/scripts/readme.sh
new file mode 100644
index 00000000000..216d5224a28
--- /dev/null
+++ b/.github/scripts/readme.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+
+cat << 'EOF'
+╔══════════════════════════════════════════════════════════════════════╗
+║                                                                      ║
+║    ███╗   ███╗██████╗ ██████╗ ██╗██████╗  ██████╗ ███████╗         ║
+║    ████╗ ████║██╔══██╗██╔══██╗██║██╔══██╗██╔════╝ ██╔════╝         ║
+║    ██╔████╔██║██████╔╝██████╔╝██║██║  ██║██║  ███╗█████╗           ║
+║    ██║╚██╔╝██║██╔══██╗██╔══██╗██║██║  ██║██║   ██║██╔══╝           ║
+║    ██║ ╚═╝ ██║██████╔╝██║  ██║██║██████╔╝╚██████╔╝███████╗         ║
+║    ╚═╝     ╚═╝╚═════╝ ╚═╝  ╚═╝╚═╝╚═════╝  ╚═════╝ ╚══════╝         ║
+║                                                                      ║
+║              H O W   T O :   M B R I D G E   T E S T I N G         ║
+╚══════════════════════════════════════════════════════════════════════╝
+
+  MBridge unit tests run automatically on every PR. To also trigger
+  functional tests, attach the label and re-run the workflow step.
+
+  ┌─────────────────────────────────────────────────────────────────┐
+  │  DEFAULT  │  Unit tests run on every PR (no action needed)      │
+  ├─────────────────────────────────────────────────────────────────┤
+  │                                                                  │
+  │    Every PR  ──►  cicd-mbridge-testing  ──►  unit tests only   │
+  │                                                                  │
+  └─────────────────────────────────────────────────────────────────┘
+
+  ┌─────────────────────────────────────────────────────────────────┐
+  │  STEP 1  │  Attach the label to your PR (for functional tests)  │
+  ├─────────────────────────────────────────────────────────────────┤
+  │                                                                  │
+  │    PR Labels  ──►  [ + Add label ]  ──►  "Run MBridge tests"   │
+  │                                                                  │
+  └─────────────────────────────────────────────────────────────────┘
+
+  ┌─────────────────────────────────────────────────────────────────┐
+  │  STEP 2  │  Re-run this workflow step                           │
+  ├─────────────────────────────────────────────────────────────────┤
+  │                                                                  │
+  │    Actions  ──►  [ Re-run jobs ]  ──►  Re-run failed jobs      │
+  │                                                                  │
+  └─────────────────────────────────────────────────────────────────┘
+
+  ┌─────────────────────────────────────────────────────────────────┐
+  │  RESULT  │  Unit + functional tests run!                        │
+  ├─────────────────────────────────────────────────────────────────┤
+  │                                                                  │
+  │         cicd-mbridge-testing  ◄── unit + functional tests      │
+  │                                                                  │
+  │         Tests run against MBridge using the merge commit       │
+  │         SHA of your pull request.                              │
+  │                                                                  │
+  └─────────────────────────────────────────────────────────────────┘
+
+                ┌────────────────────────────────────┐
+                │  Label present?     NO   → unit    │
+                │  Label present?     YES  → unit +  │
+                │                           functional│
+                └────────────────────────────────────┘
+
+  NOTE: The label must be present BEFORE the re-run is triggered.
+        The CI checks for "Run MBridge tests" at runtime.
+
+  NOTE: All MBridge test results are optional — failures do not
+        block merging your PR.
+EOF
diff --git a/.github/workflows/_build_test_publish_wheel.yml b/.github/workflows/_build_test_publish_wheel.yml
index 9e9062827de..0b71577b587 100644
--- a/.github/workflows/_build_test_publish_wheel.yml
+++ b/.github/workflows/_build_test_publish_wheel.yml
@@ -17,8 +17,6 @@ on:
         type: boolean
         default: true
     secrets:
-      TWINE_USERNAME:
-        required: true
       TWINE_PASSWORD:
         required: true
 
@@ -147,7 +145,6 @@ jobs:
     needs: [build-and-test-wheels]
     runs-on: ubuntu-latest
     if: inputs.no-publish == false
-    environment: ${{ (github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/heads/r')) && 'main' || 'public' }}
     strategy:
       fail-fast: false
       matrix:
@@ -170,7 +167,7 @@ jobs:
 
       - name: Publish wheels
         env:
-          TWINE_USERNAME: ${{ secrets.TWINE_USERNAME }}
+          TWINE_USERNAME: __token__
           TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD }}
           TWINE_REPOSITORY: ${{ (github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/heads/r')) && 'pypi' || 'testpypi' }}
           PLATFORM: ${{ matrix.PLATFORM }}
diff --git a/.github/workflows/_release_library.yml b/.github/workflows/_release_library.yml
index d39ee505c2a..684dacc27aa 100644
--- a/.github/workflows/_release_library.yml
+++ b/.github/workflows/_release_library.yml
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-name: 'Release'
+name: "Release"
 
 defaults:
   run:
@@ -38,13 +38,24 @@ on:
         description: Create a GitHub release
         type: boolean
         default: true
+      gh-release-use-changelog-builder:
+        required: false
+        description: Use release-changelog-builder-action to dynamically build changelog
+        type: boolean
+        default: true
+      gh-release-changelog-config:
+        required: false
+        description: Path to changelog builder configuration file
+        type: string
+        default: ".github/workflows/config/changelog-config.json"
+      gh-release-from-tag:
+        required: false
+        description: Starting tag for changelog builder (leave empty for auto-detect)
+        type: string
+        default: ""
     secrets:
-      TWINE_USERNAME:
-        required: true
       TWINE_PASSWORD:
         required: true
-      SLACK_WEBHOOK_ADMIN:
-        required: true
       SLACK_WEBHOOK:
         required: true
       PAT:
@@ -62,12 +73,10 @@ jobs:
       ref: ${{ inputs.release-ref }}
       no-publish: true
     secrets:
-      TWINE_USERNAME: ${{ secrets.TWINE_USERNAME }}
       TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD }}
 
   bump-next-version:
     runs-on: ubuntu-latest
-    environment: main # ${{ inputs.dry-run == true && 'public' || 'main' }}
     needs: build-test-publish-wheels-dry-run
     if: |
       (
@@ -90,8 +99,8 @@ jobs:
       - name: Bump version MCore
         id: bump-version-mcore
         env:
-          SRC_DIR: ''
-          PYPROJECT_NAME: 'megatron.core'
+          SRC_DIR: ""
+          PYPROJECT_NAME: "megatron.core"
         run: |
           set +u
           cd ${{ github.run_id }}
@@ -129,8 +138,8 @@ jobs:
       - name: Bump version MFSDP
         id: bump-version-mfsdp
         env:
-          SRC_DIR: 'megatron/core/distributed/fsdp/src/'
-          PYPROJECT_NAME: 'megatron_fsdp'
+          SRC_DIR: "megatron/core/distributed/fsdp/src/"
+          PYPROJECT_NAME: "megatron_fsdp"
         run: |
           set +u
 
@@ -323,7 +332,6 @@ jobs:
   create-gh-release:
     needs: [build-test-publish-wheels, bump-next-version]
     runs-on: ubuntu-latest
-    environment: ${{ inputs.dry-run == true && 'public' || 'main' }}
     if: |
       (
         success() || !failure()
@@ -345,12 +353,51 @@ jobs:
           ref: ${{ inputs.release-ref }}
           token: ${{ secrets.PAT || secrets.GITHUB_TOKEN }}
 
+      - name: Determine fromTag for changelog
+        id: determine-from-tag
+        if: inputs.gh-release-use-changelog-builder == true
+        run: |
+          cd ${{ github.run_id }}
+
+          # If gh-release-from-tag is provided, use it
+          if [[ -n "${{ inputs.gh-release-from-tag }}" ]]; then
+            FROM_TAG="${{ inputs.gh-release-from-tag }}"
+            echo "Using provided fromTag: $FROM_TAG"
+          else
+            # Get the most recent tag
+            FROM_TAG=$(git describe --tags --abbrev=0 2>/dev/null || echo "")
+            if [[ -z "$FROM_TAG" ]]; then
+              echo "No previous tags found, leaving fromTag empty"
+            else
+              echo "Auto-detected most recent tag: $FROM_TAG"
+            fi
+          fi
+
+          echo "from-tag=$FROM_TAG" >> $GITHUB_OUTPUT
+
+      - name: Build Changelog
+        id: build-changelog
+        if: inputs.gh-release-use-changelog-builder == true
+        uses: mikepenz/release-changelog-builder-action@v6.1.0
+        env:
+          GITHUB_TOKEN: ${{ secrets.PAT || secrets.GITHUB_TOKEN }}
+        with:
+          configuration: ${{ github.run_id }}/${{ inputs.gh-release-changelog-config }}
+          owner: ${{ github.repository_owner }}
+          repo: ${{ github.event.repository.name }}
+          ignorePreReleases: "false"
+          failOnError: "false"
+          fromTag: ${{ steps.determine-from-tag.outputs.from-tag }}
+          toTag: ${{ inputs.release-ref }}
+          mode: ${{ inputs.gh-release-changelog-mode }}
+
       - name: Create release
         id: version-number
         env:
           SHA: ${{ inputs.release-ref }}
           GH_TOKEN: ${{ secrets.PAT }}
           IS_DRY_RUN: ${{ inputs.dry-run }}
+          BUILT_CHANGELOG: ${{ steps.build-changelog.outputs.changelog }}
         run: |
           cd ${{ github.run_id }}
 
@@ -359,7 +406,10 @@ jobs:
           IS_PRERELEASE=$([[ "$IS_RELEASE_CANDIDATE" == "true" || "$IS_ALPHA" == "true" ]] && echo "true" || echo "false")
           NAME="NVIDIA $PROJECT_NAME ${VERSION}"
 
-          if [[ "$IS_RELEASE_CANDIDATE" == "true" ]]; then
+          # Use built changelog if available, otherwise fall back to CHANGELOG.md
+          if [[ -n "$BUILT_CHANGELOG" ]]; then
+            CHANGELOG="$BUILT_CHANGELOG"
+          elif [[ "$IS_RELEASE_CANDIDATE" == "true" ]]; then
             DATE=$(date +"%Y-%m-%d")
             CHANGELOG="Prerelease: $NAME ($DATE)"
           else
@@ -402,10 +452,19 @@ jobs:
             eval "$CMD"
           fi
 
+  publish-docs:
+    needs: [bump-next-version, create-gh-release]
+    uses: ./.github/workflows/release-docs.yml
+    with:
+      dry-run: ${{ inputs.dry-run }}
+      publish-as-latest: true
+      docs-version-override: ${{ needs.bump-next-version.outputs.release-version }}
+      build-docs-ref: ${{ inputs.release-ref }}
+    secrets: inherit
+
   notify:
     needs: [build-test-publish-wheels, create-gh-release]
     runs-on: ubuntu-latest
-    environment: ${{ inputs.dry-run == true && 'public' || 'main' }}
     env:
       GH_URL: https://github.com/${{ github.repository }}/releases/tag/v${{ needs.build-test-publish-wheels.outputs.version }}
       PYPI_URL: https://${{ inputs.dry-run == true && 'test.' || '' }}pypi.org/project/${{ needs.build-test-publish-wheels.outputs.pypi-name }}/${{ needs.build-test-publish-wheels.outputs.version }}/
diff --git a/.github/workflows/_update_dependencies.yml b/.github/workflows/_update_dependencies.yml
index 063b966b5de..0a5fb47605f 100644
--- a/.github/workflows/_update_dependencies.yml
+++ b/.github/workflows/_update_dependencies.yml
@@ -9,12 +9,6 @@ on:
     secrets:
       PAT:
         required: true
-      AZURE_CLIENT_ID:
-        required: true
-      AZURE_TENANT_ID:
-        required: true
-      AZURE_SUBSCRIPTION_ID:
-        required: true
       SSH_KEY:
         required: true
       SSH_PWD:
@@ -32,26 +26,12 @@ jobs:
         run: echo "date=$(date +%F)" | tee -a "$GITHUB_OUTPUT"
 
   update-lockfile:
-    environment: nemo-ci
     runs-on: linux-amd64-cpu16
     needs: [pre-flight]
     env:
       SOURCE_BRANCH: ${{ needs.pre-flight.outputs.bump-branch }}
       TARGET_BRANCH: ${{ inputs.target-branch }}
     steps:
-      - name: Install Azure CLI
-        run: curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash
-
-      - name: Azure Login
-        uses: azure/login@v2
-        with:
-          client-id: ${{ secrets.AZURE_CLIENT_ID }}
-          tenant-id: ${{ secrets.AZURE_TENANT_ID }}
-          subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
-
-      - name: Azure ACR Login
-        run: az acr login --name nemoci
-
       - name: Checkout repo
         uses: actions/checkout@v4
         with:
@@ -96,7 +76,6 @@ jobs:
   create-pr:
     needs: [update-lockfile, pre-flight]
     runs-on: ubuntu-latest
-    environment: main
     env:
       SOURCE_BRANCH: ${{ needs.pre-flight.outputs.bump-branch }}
       TARGET_BRANCH: ${{ inputs.target-branch }}
diff --git a/.github/workflows/auto-assign-milestone.yml b/.github/workflows/auto-assign-milestone.yml
index 8153728f9fd..b972329bac1 100644
--- a/.github/workflows/auto-assign-milestone.yml
+++ b/.github/workflows/auto-assign-milestone.yml
@@ -13,7 +13,6 @@ permissions:
 jobs:
   assign-milestone:
     runs-on: ubuntu-latest
-    environment: nemo-ci
     if: github.repository == 'NVIDIA/Megatron-LM'
     steps:
       - name: Get PR info
diff --git a/.github/workflows/auto-reminder-bot.yml b/.github/workflows/auto-reminder-bot.yml
index c3aa8169b50..37e6e5498e3 100644
--- a/.github/workflows/auto-reminder-bot.yml
+++ b/.github/workflows/auto-reminder-bot.yml
@@ -9,7 +9,6 @@ on:
 
 jobs:
   run-script:
-    environment: main
     name: Run Auto Reminder Bot
     runs-on: ubuntu-latest
     if: github.repository == 'NVIDIA/Megatron-LM'
@@ -28,7 +27,7 @@ jobs:
 
       - name: Run Auto Reminder Bot
         run: |
-          export SLACK_TOKEN=${{ secrets.SLACK_TOKEN }}
-          export SLACK_WEBHOOK_URL=${{ secrets.SLACK_WEBHOOK_URL }}
+          export SLACK_TOKEN=${{ secrets.SLACK_BOT_TOKEN }}
+          export SLACK_WEBHOOK_URL=${{ secrets.SLACK_REVIEW_REMINDER_CHANNEL_WEBHOOK }}
           export GH_TOKEN=${{ secrets.PAT }}
           python tests/test_utils/python_scripts/auto_reminder_github.py
diff --git a/.github/workflows/auto-update-copy-pr-bot.yml b/.github/workflows/auto-update-copy-pr-bot.yml
index 5f6f1ade9e8..3358a747f34 100644
--- a/.github/workflows/auto-update-copy-pr-bot.yml
+++ b/.github/workflows/auto-update-copy-pr-bot.yml
@@ -3,12 +3,11 @@ name: Auto Update Copy PR Bot
 on:
   workflow_dispatch:
   schedule:
-    - cron: '0 0 * * *'
+    - cron: "0 0 * * *"
 
 jobs:
   auto-update-copy-pr-bot:
     runs-on: ubuntu-latest
-    environment: nemo-ci
     if: github.repository == 'NVIDIA/Megatron-LM'
     steps:
       - name: Checkout code
diff --git a/.github/workflows/build-test-publish-wheel.yml b/.github/workflows/build-test-publish-wheel.yml
index bca859d0e61..00711b50806 100644
--- a/.github/workflows/build-test-publish-wheel.yml
+++ b/.github/workflows/build-test-publish-wheel.yml
@@ -17,10 +17,9 @@ name: Build, test, and publish a PyPi wheel (to testpypi).
 on:
   push:
     branches:
-      - dev
       - main
-      - 'pull-request/[0-9]+'
-      - 'deploy-release/*'
+      - "pull-request/[0-9]+"
+      - "deploy-release/*"
   merge_group:
     types: [checks_requested]
 
@@ -34,7 +33,7 @@ permissions:
 
 jobs:
   pre-flight:
-    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_cicd_preflight.yml@v0.65.5
+    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_cicd_preflight.yml@v0.73.2
     if: github.repository == 'NVIDIA/Megatron-LM'
 
   build-test-publish-wheels:
@@ -43,8 +42,7 @@ jobs:
     with:
       no-publish: true
     secrets:
-      TWINE_USERNAME: ${{ secrets.TWINE_USERNAME }}
-      TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD }}
+      TWINE_PASSWORD: ${{ (github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/heads/r')) && secrets.SVC_PYPI_TOKEN || secrets.SVC_PYPI_TEST_TOKEN }}
 
   build-test-publish-wheel-summary:
     needs: [pre-flight, build-test-publish-wheels]
@@ -66,7 +64,7 @@ jobs:
         env:
           GH_TOKEN: ${{ github.token }}
           GITHUB_RUN_ID: ${{ github.run_id }}
-          SKIPPING_IS_ALLOWED: ${{ needs.pre-flight.outputs.docs_only == 'true' || needs.pre-flight.outputs.is_deployment_workflow == 'true' || needs.pre-flight.outputs.is_merge_group == 'true' || needs.pre-flight.outputs.is_ci_workload == 'true' || github.ref != 'refs/heads/main' }}
+          SKIPPING_IS_ALLOWED: true
         run: |
           FAILED_JOBS=$(gh run view $GITHUB_RUN_ID --json jobs --jq '[.jobs[] | select(.status == "completed" and .conclusion != "success")] | length') || echo 0
 
diff --git a/.github/workflows/cherry-pick-release-commit.yml b/.github/workflows/cherry-pick-release-commit.yml
index 58b447939a7..9da305f07e6 100644
--- a/.github/workflows/cherry-pick-release-commit.yml
+++ b/.github/workflows/cherry-pick-release-commit.yml
@@ -17,7 +17,6 @@ on:
   push:
     branches:
       - main
-      - dev
 
 jobs:
   cherry-pick:
@@ -27,5 +26,5 @@ jobs:
       target-branches-pattern: 'core_(*dev_)?r[0-9]+\.[0-9]+\.[0-9]+'
     secrets:
       PAT: ${{ secrets.PAT }}
-      SLACK_WEBHOOK_ADMIN: ${{ secrets.SLACK_WEBHOOK_ADMIN }}
-      SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
+      SLACK_WEBHOOK_ADMIN: ${{ secrets.SLACK_TEAM_GROUP_ID }}
+      SLACK_WEBHOOK: ${{ secrets.SLACK_CI_CHANNEL_WEBHOOK }}
diff --git a/.github/workflows/cicd-approve-test-queue.yml b/.github/workflows/cicd-approve-test-queue.yml
index 1c35031cb35..2cba41eafb8 100644
--- a/.github/workflows/cicd-approve-test-queue.yml
+++ b/.github/workflows/cicd-approve-test-queue.yml
@@ -155,8 +155,6 @@ jobs:
 
               workflow_id = workflow["id"]
               workflow_name = workflow["display_title"]
-              pr_info = workflow.get("pull_requests", [{}])[0]
-              pr_number = pr_info.get("number", "unknown")
               print(f"Approving workflow {workflow_name} with Run Id: {workflow_id}")
 
               deployment_url = f"actions/runs/{workflow_id}/pending_deployments"
@@ -183,8 +181,8 @@ jobs:
     steps:
       - name: Notify
         env:
-          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
-          SLACK_WEBHOOK_ADMIN: <!subteam^${{ secrets.SLACK_WEBHOOK_ADMIN }}>
+          SLACK_WEBHOOK: ${{ secrets.SLACK_CI_CHANNEL_WEBHOOK }}
+          SLACK_WEBHOOK_ADMIN: <!subteam^${{ secrets.SLACK_TEAM_GROUP_ID }}>
           GITHUB_RUN_ID: ${{ github.run_id }}
           GITHUB_REPOSITORY: ${{ github.repository }}
         run: |
diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml
index 3aff7995099..cc108bc66d0 100644
--- a/.github/workflows/cicd-main.yml
+++ b/.github/workflows/cicd-main.yml
@@ -77,6 +77,9 @@ jobs:
         run: |
           # Skip SSO check for scheduled jobs, main branch, dev branch, or merge groups
           if [ "${{ env.SCHEDULED_JOB }}" == "true" ] || [ "${IS_MAIN_BRANCH}" == "true" ] || [ "${IS_DEV_BRANCH}" == "true" ] || [ "${IS_MERGE_GROUP}" == "true" ]; then
+            echo "is_maintainer=true" | tee -a $GITHUB_OUTPUT
+            exit 0
+          fi
 
           # Use SSO membership check result
           IS_MEMBER="${{ steps.check-sso.outputs.is_member }}"
@@ -126,7 +129,7 @@ jobs:
   pre-flight:
     needs: [is-not-external-contributor]
     if: github.repository == 'NVIDIA/Megatron-LM'
-    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_cicd_preflight.yml@v0.65.10
+    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_cicd_preflight.yml@v0.73.2
 
   linting:
     runs-on: ubuntu-latest
@@ -186,6 +189,115 @@ jobs:
           echo "Running CI tests"
           echo "is_merge_group: ${{ needs.pre-flight.outputs.is_merge_group }}"
 
+  cicd-parse-downstream-testing:
+    runs-on: ubuntu-latest
+    needs:
+      - pre-flight
+      - cicd-wait-in-queue
+    if: |
+      needs.pre-flight.result != 'cancelled'
+      && needs.cicd-wait-in-queue.result != 'cancelled'
+      && (
+        success()
+        || needs.pre-flight.outputs.is_ci_workload == 'true'
+        || needs.pre-flight.outputs.force_run_all == 'true'
+        || needs.pre-flight.outputs.is_merge_group == 'true'
+      )
+      && !cancelled()
+    outputs:
+      mbridge-test-suite: ${{ steps.select-mbridge-test-suite.outputs.main }}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Get PR info
+        id: get-pr-info
+        if: startsWith(github.ref, 'refs/heads/pull-request/')
+        uses: nv-gha-runners/get-pr-info@main
+
+      - name: Select MBridge test suite
+        id: select-mbridge-test-suite
+        env:
+          GH_TOKEN: ${{ secrets.PAT }}
+        run: |
+          PR_NUMBER=${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number }}
+          TEST_SUITE=$(gh pr view $PR_NUMBER --json labels | jq -r 'if [.labels[].name] | any(. == "Run MBridge tests") then "all" else "unit-only" end')
+          echo "main=$TEST_SUITE" | tee -a $GITHUB_OUTPUT
+
+      - name: How-To
+        run: bash .github/scripts/readme.sh
+
+  cicd-mbridge-testing:
+    runs-on: ubuntu-latest
+    needs:
+      - pre-flight
+      - cicd-wait-in-queue
+      - cicd-parse-downstream-testing
+    if: |
+      needs.pre-flight.result != 'cancelled'
+      && needs.cicd-wait-in-queue.result != 'cancelled'
+      && needs.cicd-parse-downstream-testing.result != 'cancelled'
+      && (
+        success()
+        || needs.pre-flight.outputs.is_ci_workload == 'true'
+        || needs.pre-flight.outputs.force_run_all == 'true'
+        || needs.pre-flight.outputs.is_merge_group == 'true'
+      )
+      && !cancelled()
+    steps:
+      - name: Get PR info
+        id: get-pr-info
+        if: startsWith(github.ref, 'refs/heads/pull-request/')
+        uses: nv-gha-runners/get-pr-info@main
+
+      - name: Checkout MBridge and create testing branch
+        uses: actions/checkout@v4
+        with:
+          ref: main
+          repository: NVIDIA-NeMo/Megatron-Bridge
+          path: megatron-bridge
+          token: ${{ secrets.PAT }}
+
+      - name: Create testing branch
+        run: |
+          cd megatron-bridge
+          git fetch origin main
+          git checkout -b mcore-testing-${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number }} origin/main
+          git push origin mcore-testing-${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number }} --force
+
+      - name: Get merge commit sha
+        shell: bash -x -e -u -o pipefail {0}
+        id: sha
+        env:
+          IS_PR: ${{ startsWith(github.ref, 'refs/heads/pull-request/') }}
+          IS_MERGE_GROUP: ${{ github.event_name == 'merge_group' }}
+        run: |
+          if [[ "$IS_PR" == "true" ]]; then
+            SHA=${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').merge_commit_sha }}
+          elif [[ "$IS_MERGE_GROUP" == "true" ]]; then
+            SHA=${{ github.event.merge_group.head_sha }}
+          else
+            SHA=${GITHUB_SHA}
+          fi
+          echo "main=${SHA}" | tee -a "$GITHUB_OUTPUT"
+
+      - name: Trigger MBridge tests
+        uses: convictional/trigger-workflow-and-wait@v1.6.5
+        with:
+          owner: NVIDIA-NeMo
+          repo: Megatron-Bridge
+          workflow_file_name: cicd-main.yml
+          github_token: ${{ secrets.PAT }}
+          ref: mcore-testing-${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number }}
+          wait_interval: 60
+          propagate_failure: true
+          client_payload: |
+            {
+              "mcore_ref": "${{ steps.sha.outputs.main }}",
+              "test_suite": "${{ needs.cicd-parse-downstream-testing.outputs.mbridge-test-suite }}",
+              "triggered_by": "https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"
+            }
+
   cicd-container-build:
     needs: [is-not-external-contributor, pre-flight, cicd-wait-in-queue]
     runs-on: ${{ needs.is-not-external-contributor.outputs.selected_runner }}
diff --git a/.github/workflows/config/changelog-config.json b/.github/workflows/config/changelog-config.json
new file mode 100644
index 00000000000..e640b90a0f3
--- /dev/null
+++ b/.github/workflows/config/changelog-config.json
@@ -0,0 +1,24 @@
+{
+    "categories": [],
+    "ignore_labels": [
+      "ignore"
+    ],
+    "sort": "ASC",
+    "template": "\n${{CHANGELOG}}\n\n<details><summary>Changelog Details</summary>\n\n${{UNCATEGORIZED}}\n</details>\n",
+    "pr_template": "- ${{TITLE}} by @${{AUTHOR}} :: PR: #${{NUMBER}}",
+    "commit_template": "- ${{TITLE}} by @${{AUTHOR}}",
+    "empty_template": "${{OWNER}}\n${{REPO}}\n${{FROM_TAG}}\n${{TO_TAG}}",
+    "duplicate_filter": {
+      "pattern": ".+",
+      "on_property": "title",
+      "method": "match"
+    },
+    "transformers": [],
+    "max_tags_to_fetch": 100,
+    "max_pull_requests": 500,
+    "max_back_track_time_days": 365,
+    "exclude_merge_branches": [],
+    "tag_resolver": {
+      "method": "semver"
+    }
+}
diff --git a/.github/workflows/copyright-check.yml b/.github/workflows/copyright-check.yml
index ac0d49daf9a..a7f51cd8a0e 100644
--- a/.github/workflows/copyright-check.yml
+++ b/.github/workflows/copyright-check.yml
@@ -17,14 +17,14 @@ name: Copyright check
 on:
   push:
     branches:
-      - 'pull-request/[0-9]+'
-      - 'deploy-release/*'
+      - "pull-request/[0-9]+"
+      - "deploy-release/*"
   merge_group:
     types: [checks_requested]
 
 jobs:
   pre-flight:
-    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_cicd_preflight.yml@v0.65.10
+    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_cicd_preflight.yml@v0.73.2
     if: github.repository == 'NVIDIA/Megatron-LM'
 
   copyright-check:
@@ -48,8 +48,13 @@ jobs:
       && github.repository == 'NVIDIA/Megatron-LM'
     runs-on: ubuntu-latest
     steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
       - name: Result
         env:
+          GH_TOKEN: ${{ github.token }}
+          GITHUB_RUN_ID: ${{ github.run_id }}
           SKIPPING_IS_ALLOWED: ${{ needs.pre-flight.outputs.docs_only == 'true' || needs.pre-flight.outputs.is_deployment_workflow == 'true' || needs.pre-flight.outputs.is_merge_group == 'true' || needs.pre-flight.outputs.is_ci_workload == 'true' }}
         run: |
           FAILED_JOBS=$(gh run view $GITHUB_RUN_ID --json jobs --jq '[.jobs[] | select(.status == "completed" and .conclusion != "success")] | length') || echo 0
diff --git a/.github/workflows/dependabot.yml b/.github/workflows/dependabot.yml
index 9dc1e6ac5a9..6aa16ba0a0c 100644
--- a/.github/workflows/dependabot.yml
+++ b/.github/workflows/dependabot.yml
@@ -11,7 +11,6 @@ permissions:
 jobs:
   get-release-branch-names:
     runs-on: ubuntu-latest
-    environment: nemo-ci
     outputs:
       mcore: ${{ steps.get-branch.outputs.mcore_release_branch }}
     if: github.repository == 'NVIDIA/Megatron-LM'
@@ -41,9 +40,6 @@ jobs:
       target-branch: ${{ matrix.target-branch }}
     secrets:
       PAT: ${{ secrets.PAT }}
-      AZURE_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }}
-      AZURE_TENANT_ID: ${{ secrets.AZURE_TENANT_ID }}
-      AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
       SSH_KEY: ${{ secrets.SSH_KEY }}
       SSH_PWD: ${{ secrets.SSH_PWD }}
 
@@ -54,8 +50,8 @@ jobs:
     steps:
       - name: Notify
         env:
-          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
-          SLACK_WEBHOOK_ADMIN: <!subteam^${{ secrets.SLACK_WEBHOOK_ADMIN }}>
+          SLACK_WEBHOOK: ${{ secrets.SLACK_CI_CHANNEL_WEBHOOK }}
+          SLACK_WEBHOOK_ADMIN: <!subteam^${{ secrets.SLACK_TEAM_GROUP_ID }}>
           GITHUB_RUN_ID: ${{ github.run_id }}
           GITHUB_REPOSITORY: ${{ github.repository }}
         run: |
diff --git a/.github/workflows/install-test.yml b/.github/workflows/install-test.yml
index ece9184ee94..5a0abb8596d 100644
--- a/.github/workflows/install-test.yml
+++ b/.github/workflows/install-test.yml
@@ -22,14 +22,14 @@ on:
     branches:
       - dev
       - main
-      - 'pull-request/[0-9]+'
-      - 'deploy-release/*'
+      - "pull-request/[0-9]+"
+      - "deploy-release/*"
   merge_group:
     types: [checks_requested]
 
 jobs:
   pre-flight:
-    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_cicd_preflight.yml@v0.65.5
+    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_cicd_preflight.yml@v0.73.2
     if: github.repository == 'NVIDIA/Megatron-LM'
 
   pip-test-pytorch:
@@ -43,11 +43,10 @@ jobs:
     name: Pip - Python${{ matrix.python-version }} - AMD64/Linux - NGC PyTorch
     container:
       image: nvcr.io/nvidia/pytorch:25.05-py3
-    environment: nemo-ci
     strategy:
       fail-fast: false
       matrix:
-        python-version: ['3.12']
+        python-version: ["3.12"]
     steps:
       - name: Checkout repository
         uses: actions/checkout@v4
@@ -89,11 +88,10 @@ jobs:
     name: UV - Python${{ matrix.python-version }} - AMD64/Linux - NGC PyTorch
     container:
       image: nvcr.io/nvidia/pytorch:25.05-py3
-    environment: nemo-ci
     strategy:
       fail-fast: false
       matrix:
-        python-version: ['3.12']
+        python-version: ["3.12"]
     steps:
       - name: Checkout repository
         uses: actions/checkout@v4
diff --git a/.github/workflows/multi-approval-bot.yml b/.github/workflows/multi-approval-bot.yml
new file mode 100644
index 00000000000..6a925604213
--- /dev/null
+++ b/.github/workflows/multi-approval-bot.yml
@@ -0,0 +1,74 @@
+name: "Codeowners Approval Workflow"
+
+on:
+  push:
+    branches:
+      - "pull-request/[0-9]+"
+  merge_group:
+    types: [checks_requested]
+
+jobs:
+  pre-flight:
+    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_cicd_preflight.yml@v0.73.2
+    if: github.repository == 'NVIDIA/Megatron-LM'
+
+  codeowners-approval:
+    needs: [pre-flight]
+    runs-on: ubuntu-latest
+    if: |
+      !(needs.pre-flight.outputs.docs_only == 'true'
+      || needs.pre-flight.outputs.is_merge_group == 'true'
+      || needs.pre-flight.outputs.is_deployment_workflow == 'true')
+    steps:
+      - name: Get PR info
+        id: get-pr-info
+        if: startsWith(github.ref, 'refs/heads/pull-request/')
+        uses: nv-gha-runners/get-pr-info@main
+
+      - name: Checkout action
+        uses: actions/checkout@v3
+        with:
+          repository: noamelf/codeowner-multi-approval-action
+          ref: v0.1
+          path: codeowner-multi-approval-action
+
+      - name: Check Codeowners Approval
+        uses: ./codeowner-multi-approval-action
+        with:
+          pr-number: ${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number }}
+          repo-name: ${{ github.repository }}
+          github-token: ${{ secrets.PAT }}
+
+  multi-approval-bot-summary:
+    needs: [pre-flight, codeowners-approval]
+    if: |
+      (
+        needs.pre-flight.outputs.docs_only == 'true'
+        || needs.pre-flight.outputs.is_merge_group == 'true'
+        || needs.pre-flight.outputs.is_deployment_workflow == 'true'
+        || always()
+      )
+      && github.repository == 'NVIDIA/Megatron-LM'
+      && !cancelled()
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Result
+        env:
+          GH_TOKEN: ${{ github.token }}
+          GITHUB_RUN_ID: ${{ github.run_id }}
+          SKIPPING_IS_ALLOWED: ${{ needs.pre-flight.outputs.docs_only == 'true' || needs.pre-flight.outputs.is_deployment_workflow == 'true' || needs.pre-flight.outputs.is_merge_group == 'true' || needs.pre-flight.outputs.is_ci_workload == 'true' }}
+        run: |
+          FAILED_JOBS=$(gh run view $GITHUB_RUN_ID --json jobs --jq '[.jobs[] | select(.status == "completed" and .conclusion != "success")] | length') || echo 0
+
+          if [ "${FAILED_JOBS:-0}" -eq 0 ] || [ "$SKIPPING_IS_ALLOWED" == "true" ]; then
+              echo "✅ All previous jobs completed successfully"
+              exit 0
+          else
+              echo "❌ Found $FAILED_JOBS failed job(s)"
+              # Show which jobs failed
+              gh run view $GITHUB_RUN_ID --json jobs --jq '.jobs[] | select(.status == "completed" and .conclusion != "success") | .name'
+              exit 1
+          fi
diff --git a/.github/workflows/oncall-rotation.yml b/.github/workflows/oncall-rotation.yml
index 71ae094e6c8..a621be7f652 100644
--- a/.github/workflows/oncall-rotation.yml
+++ b/.github/workflows/oncall-rotation.yml
@@ -17,7 +17,7 @@ name: Oncall Rotation
 on:
   schedule:
     # Runs at 09:00 UTC every Wednesday
-    - cron: '0 9 * * 3'
+    - cron: "0 9 * * 3"
   workflow_dispatch:
 
 permissions:
@@ -25,7 +25,6 @@ permissions:
 
 jobs:
   rotate-schedule:
-    environment: main
     runs-on: ubuntu-latest
     steps:
       - name: Checkout code
@@ -36,7 +35,7 @@ jobs:
       - name: Set up Python
         uses: actions/setup-python@v4
         with:
-          python-version: '3.10'
+          python-version: "3.10"
 
       - name: Rotate Schedule
         env:
@@ -59,4 +58,3 @@ jobs:
           git commit -m "chore: rotate oncall schedule" || echo "No changes to commit"
           git pull --rebase
           git push origin HEAD:main
-
diff --git a/.github/workflows/release-docs.yml b/.github/workflows/release-docs.yml
index d15ea74f052..b393a69c745 100644
--- a/.github/workflows/release-docs.yml
+++ b/.github/workflows/release-docs.yml
@@ -20,23 +20,52 @@ on:
         required: true
         type: boolean
         default: true
-      version-number:
-        description: Version number to release this as (use `latest` for main branch)
-        required: true
+      publish-as-latest:
+        description: Publish as Latest stable version.
+        required: false
+        type: boolean
+        default: true
+      docs-version-override:
+        description: Docs version if commit is not tagged
+        required: false
         type: string
+        default: ""
       notify-emails:
         description: Email addresses to send the notification to. Format as "me@me.com,you@you.com".
+        required: false
+        type: string
+  workflow_call:
+    inputs:
+      dry-run:
+        description: Whether to run the workflow in dry-run mode
         required: true
+        type: boolean
+        default: true
+      publish-as-latest:
+        description: Publish as Latest stable version.
+        required: false
+        type: boolean
+        default: true
+      docs-version-override:
+        description: Docs version if commit is not tagged
+        required: false
+        type: string
+        default: ""
+      notify-emails:
+        description: Email addresses to send the notification to. Format as "me@me.com,you@you.com".
+        required: false
         type: string
-      aws-region:
-        description: AWS region
+      build-docs-ref:
+        description: Reference to build the docs from
         required: false
         type: string
-        default: us-east-1
+        default: ${{ github.sha }}
 
 jobs:
   build-docs:
     uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_build_docs.yml@v0.67.0
+    with:
+      ref: ${{ inputs.build-docs-ref }}
 
   publish-docs:
     runs-on: ubuntu-latest
@@ -45,7 +74,7 @@ jobs:
       - uses: actions/checkout@v6
         with:
           repository: NVIDIA-NeMo/FW-CI-templates
-          ref: v0.67.2
+          ref: v0.72.0
           path: FW-CI-templates
 
       - uses: ./FW-CI-templates/.github/actions/publish-docs
@@ -59,10 +88,11 @@ jobs:
           artifacts-name: docs-html
           artifacts-path: _build/html
           emails-csv: ${{ inputs.notify-emails && format('{0},{1}', vars.docs_release_emails, inputs.notify-emails) || vars.docs_release_emails }}
-          overwrite-latest-on-tag: false
+          overwrite-latest-on-tag: ${{ inputs.publish-as-latest }}
+          docs-version-override: ${{ inputs.docs-version-override }}
           run-on-version-tag-only: ${{ github.ref_name != 'main' }}
           request-name: megatron-core-publish-docs-${{ github.run_id }}
-          aws-region: ${{ inputs.aws-region }}
+          aws-region: ${{ vars.DOCS_AWS_REGION }}
           aws-role-to-assume: ${{ secrets.AWS_ASSUME_ROLE_ARN }}
           aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
           aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
diff --git a/.github/workflows/release-freeze.yml b/.github/workflows/release-freeze.yml
index 82f26168bd6..dc4bad0a9a7 100644
--- a/.github/workflows/release-freeze.yml
+++ b/.github/workflows/release-freeze.yml
@@ -42,5 +42,5 @@ jobs:
       freeze-commit: ${{ inputs.freeze-commit }}
       dry-run: ${{ inputs.dry-run }}
     secrets:
-      SLACK_WEBHOOK: ${{ secrets.SLACK_RELEASE_ENDPOINT }}
-      SLACK_WEBHOOK_ADMIN: ${{ secrets.SLACK_WEBHOOK_ADMIN }}
+      SLACK_WEBHOOK: ${{ secrets.SLACK_MAIN_CHANNEL_WEBHOOK }}
+      SLACK_WEBHOOK_ADMIN: ${{ secrets.SLACK_TEAM_GROUP_ID }}
diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
index aa04408689b..647e6af2379 100644
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-name: 'Release Megatron-Core'
+name: "Release Megatron-Core"
 
 on:
   workflow_dispatch:
@@ -30,6 +30,16 @@ on:
         required: true
         default: true
         type: boolean
+      generate-changelog:
+        description: Generate changelog
+        required: false
+        default: true
+        type: boolean
+      publish-docs:
+        description: Publish docs
+        required: false
+        default: true
+        type: boolean
       version-bump-branch:
         description: Branch for version bump
         required: true
@@ -47,9 +57,9 @@ jobs:
       dry-run: ${{ inputs.dry-run || false }}
       version-bump-branch: ${{ inputs.version-bump-branch || github.ref_name }}
       create-gh-release: ${{ inputs.create-gh-release || true }}
+      gh-release-use-changelog-builder: ${{ inputs.generate-changelog }}
+      publish-docs: ${{ inputs.publish-docs }}
     secrets:
-      TWINE_USERNAME: ${{ secrets.TWINE_USERNAME }}
-      TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD }}
-      SLACK_WEBHOOK_ADMIN: ${{ secrets.SLACK_WEBHOOK_ADMIN }}
-      SLACK_WEBHOOK: ${{ secrets.SLACK_RELEASE_ENDPOINT }}
+      TWINE_PASSWORD: ${{ (github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/heads/r')) && secrets.SVC_PYPI_TOKEN || secrets.SVC_PYPI_TEST_TOKEN }}
+      SLACK_WEBHOOK: ${{ (github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/heads/r')) && secrets.SLACK_MAIN_CHANNEL_WEBHOOK || secrets.SLACK_CI_CHANNEL_WEBHOOK }}
       PAT: ${{ secrets.PAT }}
diff --git a/.github/workflows/sync-team-usergroups.yml b/.github/workflows/sync-team-usergroups.yml
index 1c6cecaeb7a..6db5127d9a0 100644
--- a/.github/workflows/sync-team-usergroups.yml
+++ b/.github/workflows/sync-team-usergroups.yml
@@ -19,7 +19,6 @@ on:
 
 jobs:
   sync-usergroups:
-    environment: main
     runs-on: ubuntu-latest
     steps:
       - name: Checkout code
@@ -28,7 +27,7 @@ jobs:
       - name: Set up Python
         uses: actions/setup-python@v4
         with:
-          python-version: '3.10'
+          python-version: "3.10"
 
       - name: Sync Teams to User Groups
         env:
diff --git a/.github/workflows/trigger-mbridge-tests.yml b/.github/workflows/trigger-mbridge-tests.yml
index b1a3aa0089d..282818460bb 100644
--- a/.github/workflows/trigger-mbridge-tests.yml
+++ b/.github/workflows/trigger-mbridge-tests.yml
@@ -2,182 +2,32 @@
 # SPDX-License-Identifier: Apache-2.0
 
 name: Trigger MBridge Tests
-# Remote testing of MBridge from MCore
-# Triggers MBridge CI tests with current MCore commit to verify backward compatibility
-
 on:
-  # Manual trigger only
   workflow_dispatch:
     inputs:
       mbridge_ref:
-        description: 'MBridge branch/ref to trigger'
+        description: "MBridge branch/ref to trigger"
         required: false
         type: string
-        default: 'main'
-      run_cicd_main:
-        description: 'Run cicd-main.yml (full CI/CD)'
-        required: false
-        type: boolean
-        default: true
-      run_install_test:
-        description: 'Run install-test.yml (quick install check)'
-        required: false
-        type: boolean
-        default: true
-      test_suite:
-        description: 'Test suite to run (for cicd-main)'
-        required: false
-        type: choice
-        options:
-          - 'all'
-          - 'unit-only'
-          - 'functional-only'
-        default: 'all'
+        default: "main"
 
 jobs:
-  # First job: Get MCore commit info (shared by all matrix jobs)
-  get-mcore-info:
+  trigger-mbridge-tests:
     runs-on: ubuntu-latest
-    outputs:
-      sha: ${{ steps.mcore_info.outputs.sha }}
-      short_sha: ${{ steps.mcore_info.outputs.short_sha }}
-      branch: ${{ steps.mcore_info.outputs.branch }}
-      repo_url: ${{ steps.mcore_info.outputs.repo_url }}
     steps:
-      - name: Checkout MCore
-        uses: actions/checkout@v4
+      - name: Trigger MBridge tests
+        uses: convictional/trigger-workflow-and-wait@v1.6.5
         with:
-          fetch-depth: 0
-      
-      - name: Get MCore commit info
-        id: mcore_info
-        run: |
-          echo "sha=$(git rev-parse HEAD)" >> $GITHUB_OUTPUT
-          echo "short_sha=$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT
-          echo "branch=${GITHUB_REF#refs/heads/}" >> $GITHUB_OUTPUT
-          
-          # Get repo URL from origin remote, fallback to constructing from github context
-          REPO_URL=$(git remote get-url origin 2>/dev/null || echo "${{ github.server_url }}/${{ github.repository }}.git")
-          echo "repo_url=${REPO_URL}" >> $GITHUB_OUTPUT
-
-          echo "📦 MCore commit: $(git rev-parse --short HEAD)"
-          echo "🌿 Branch: ${GITHUB_REF#refs/heads/}"
-          echo "📍 Repo: ${REPO_URL}"
-
-  # Matrix job: Trigger and monitor MBridge workflows in parallel
-  trigger-and-monitor:
-    needs: [get-mcore-info]
-    runs-on: ubuntu-latest
-    continue-on-error: true  # Don't fail workflow if monitoring times out
-    strategy:
-      fail-fast: false  # Continue other matrix jobs even if one fails
-      matrix:
-        include:
-          - workflow: install-test.yml
-            name: Install Test
-          - workflow: cicd-main.yml
-            name: CI/CD Main
-    
-    name: ${{ matrix.name }}
-    
-    steps:
-      - name: Check if workflow should run
-        id: should_run
-        run: |
-          if [[ "${{ matrix.workflow }}" == "install-test.yml" && "${{ inputs.run_install_test }}" == "true" ]]; then
-            echo "run=true" >> $GITHUB_OUTPUT
-          elif [[ "${{ matrix.workflow }}" == "cicd-main.yml" && "${{ inputs.run_cicd_main }}" == "true" ]]; then
-            echo "run=true" >> $GITHUB_OUTPUT
-          else
-            echo "run=false" >> $GITHUB_OUTPUT
-            echo "⏭️ Skipping ${{ matrix.workflow }} (not enabled)"
-          fi
-      
-      - name: Trigger ${{ matrix.workflow }}
-        if: steps.should_run.outputs.run == 'true'
-        id: trigger
-        env:
-          GH_TOKEN: ${{ secrets.PAT }}
-        run: |
-          echo "🚀 Triggering ${{ matrix.workflow }} | MCore: ${{ needs.get-mcore-info.outputs.short_sha }} | MBridge: ${{ inputs.mbridge_ref }}"
-          
-          gh workflow run ${{ matrix.workflow }} \
-            --repo NVIDIA-NeMo/Megatron-Bridge --ref ${{ inputs.mbridge_ref }} \
-            --field mcore_commit=${{ needs.get-mcore-info.outputs.sha }} \
-            --field mcore_branch=${{ needs.get-mcore-info.outputs.branch }} \
-            --field mcore_repo=${{ needs.get-mcore-info.outputs.repo_url }} \
-            --field test_suite=${{ inputs.test_suite }} \
-            --field triggered_by=mcore-ci
-      
-      - name: Get run ID
-        if: steps.should_run.outputs.run == 'true'
-        id: get_run_id
-        env:
-          GH_TOKEN: ${{ secrets.PAT }}
-        run: |
-          sleep 10  # Wait for run to appear
-          RUN_ID=$(gh run list \
-            --repo NVIDIA-NeMo/Megatron-Bridge \
-            --workflow=${{ matrix.workflow }} \
-            --limit 5 \
-            --json databaseId,createdAt \
-            --jq "sort_by(.createdAt) | reverse | .[0] | .databaseId")
-          
-          echo "run_id=${RUN_ID}" >> $GITHUB_OUTPUT
-          echo "📋 Run ID: ${RUN_ID}"
-          
-          cat >> $GITHUB_STEP_SUMMARY << EOF
-          ## 🔄 ${{ matrix.name }} Triggered
-          
-          **MCore:** \`${{ needs.get-mcore-info.outputs.short_sha }}\` | **MBridge:** \`${{ inputs.mbridge_ref }}\` | **Suite:** \`${{ inputs.test_suite }}\`
-          
-          - 🔄 [${{ matrix.workflow }}](https://github.com/NVIDIA-NeMo/Megatron-Bridge/actions/runs/${RUN_ID}) - Running...
-          - ⏳ Monitoring every 5 minutes until completion
-          
-          > **Note:** Tests run without approval when triggered from MCore
-          EOF
-      
-      - name: Monitor workflow
-        if: steps.should_run.outputs.run == 'true'
-        id: monitor
-        continue-on-error: true
-        env:
-          GH_TOKEN: ${{ secrets.PAT }}
-        run: |
-          RUN_ID="${{ steps.get_run_id.outputs.run_id }}"
-          echo "📊 Monitoring ${{ matrix.workflow }} (Run ID: ${RUN_ID})"
-          
-          gh run watch ${RUN_ID} --repo NVIDIA-NeMo/Megatron-Bridge --exit-status
-          
-          CONCLUSION=$(gh run view ${RUN_ID} --repo NVIDIA-NeMo/Megatron-Bridge --json conclusion --jq -r .conclusion)
-          echo "workflow_status=${CONCLUSION}" >> $GITHUB_ENV
-          echo "✅ Completed: ${CONCLUSION}"
-      
-      - name: Report results
-        if: always() && steps.should_run.outputs.run == 'true'
-        run: |
-          CONCLUSION="${{ env.workflow_status || 'unknown' }}"
-          RUN_ID="${{ steps.get_run_id.outputs.run_id }}"
-          
-          case "$CONCLUSION" in
-            "success") ICON="✅"; MSG="passed" ;;
-            "failure") ICON="❌"; MSG="failed"; EXIT_CODE=1 ;;
-            "cancelled") ICON="🚫"; MSG="cancelled"; EXIT_CODE=0 ;;
-            *) ICON="⏳"; MSG="still running or timed out"; EXIT_CODE=0 ;;
-          esac
-          
-          cat >> $GITHUB_STEP_SUMMARY << EOF
-          ## 📊 ${{ matrix.name }} Results
-          
-          ### ${ICON} ${{ matrix.workflow }}
-          **Status:** \`${CONCLUSION}\`
-          
-          [View full results →](https://github.com/NVIDIA-NeMo/Megatron-Bridge/actions/runs/${RUN_ID})
-          
-          ---
-          *Triggered from MCore \`${{ needs.get-mcore-info.outputs.short_sha }}\`*
-          EOF
-          
-          echo "${ICON} ${{ matrix.name }} ${MSG}"
-          exit ${EXIT_CODE:-0}
-
+          owner: NVIDIA-NeMo
+          repo: Megatron-Bridge
+          workflow_file_name: cicd-main.yml
+          github_token: ${{ secrets.PAT }}
+          ref: ${{ inputs.mbridge_ref }}
+          wait_interval: 60
+          propagate_failure: true
+          client_payload: |
+            {
+              "mcore_ref": "${{ github.sha }}",
+              "test_suite": "all",
+              "triggered_by": "https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"
+            }

From aa860180b62cf87ba9c5cba7a1e13003c170ca65 Mon Sep 17 00:00:00 2001
From: "Dennis(Zhenhuan) Liu" <denliu@nvidia.com>
Date: Wed, 25 Feb 2026 11:09:11 +0800
Subject: [PATCH 292/334] [Dev] Fix MoE aux loss tracker hang with MTP enabled
 (#3400)

---
 megatron/core/transformer/moe/moe_utils.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/megatron/core/transformer/moe/moe_utils.py b/megatron/core/transformer/moe/moe_utils.py
index 75825cd373b..4250f764948 100644
--- a/megatron/core/transformer/moe/moe_utils.py
+++ b/megatron/core/transformer/moe/moe_utils.py
@@ -999,13 +999,20 @@ def track_moe_metrics(
     """
     # Aux loss logging
     tracker = get_moe_layer_wise_logging_tracker()
-    # Initialize the tracker if force_initialize is True
+    # Initialize the tracker if force_initialize is True.
+    # The values tensor size must match what the router creates in save_to_aux_losses_tracker,
+    # which uses (num_layers + mtp_num_layers). This is important for PP ranks that have no
+    # MoE layers (so the tracker is empty and force_initialize creates the entry); their tensor
+    # size must match ranks that do have MoE layers, otherwise all_reduce across PP will hang.
+    tracker_num_layers = num_layers
+    if mtp_num_layers is not None:
+        tracker_num_layers += mtp_num_layers
     if force_initialize:
         if track_names is not None:
             for key in track_names:
                 if key not in tracker:
                     tracker[key] = {}
-                    tracker[key]["values"] = torch.zeros(num_layers, device="cuda")
+                    tracker[key]["values"] = torch.zeros(tracker_num_layers, device="cuda")
                     tracker[key]["reduce_group"] = None
                     tracker[key]["avg_group"] = None
                     tracker[key]["reduce_group_has_dp"] = False

From 2b4b9c428cf4e9bffe563ba86635d4f846ca55b3 Mon Sep 17 00:00:00 2001
From: Charlie Truong <chtruong@nvidia.com>
Date: Tue, 24 Feb 2026 22:47:58 -0600
Subject: [PATCH 293/334] ci: Remove multi-approval action from dev branch
 (#3576)

Signed-off-by: Charlie Truong <chtruong@nvidia.com>
---
 .github/workflows/multi-approval-bot.yml | 74 ------------------------
 1 file changed, 74 deletions(-)
 delete mode 100644 .github/workflows/multi-approval-bot.yml

diff --git a/.github/workflows/multi-approval-bot.yml b/.github/workflows/multi-approval-bot.yml
deleted file mode 100644
index 6a925604213..00000000000
--- a/.github/workflows/multi-approval-bot.yml
+++ /dev/null
@@ -1,74 +0,0 @@
-name: "Codeowners Approval Workflow"
-
-on:
-  push:
-    branches:
-      - "pull-request/[0-9]+"
-  merge_group:
-    types: [checks_requested]
-
-jobs:
-  pre-flight:
-    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_cicd_preflight.yml@v0.73.2
-    if: github.repository == 'NVIDIA/Megatron-LM'
-
-  codeowners-approval:
-    needs: [pre-flight]
-    runs-on: ubuntu-latest
-    if: |
-      !(needs.pre-flight.outputs.docs_only == 'true'
-      || needs.pre-flight.outputs.is_merge_group == 'true'
-      || needs.pre-flight.outputs.is_deployment_workflow == 'true')
-    steps:
-      - name: Get PR info
-        id: get-pr-info
-        if: startsWith(github.ref, 'refs/heads/pull-request/')
-        uses: nv-gha-runners/get-pr-info@main
-
-      - name: Checkout action
-        uses: actions/checkout@v3
-        with:
-          repository: noamelf/codeowner-multi-approval-action
-          ref: v0.1
-          path: codeowner-multi-approval-action
-
-      - name: Check Codeowners Approval
-        uses: ./codeowner-multi-approval-action
-        with:
-          pr-number: ${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number }}
-          repo-name: ${{ github.repository }}
-          github-token: ${{ secrets.PAT }}
-
-  multi-approval-bot-summary:
-    needs: [pre-flight, codeowners-approval]
-    if: |
-      (
-        needs.pre-flight.outputs.docs_only == 'true'
-        || needs.pre-flight.outputs.is_merge_group == 'true'
-        || needs.pre-flight.outputs.is_deployment_workflow == 'true'
-        || always()
-      )
-      && github.repository == 'NVIDIA/Megatron-LM'
-      && !cancelled()
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v4
-
-      - name: Result
-        env:
-          GH_TOKEN: ${{ github.token }}
-          GITHUB_RUN_ID: ${{ github.run_id }}
-          SKIPPING_IS_ALLOWED: ${{ needs.pre-flight.outputs.docs_only == 'true' || needs.pre-flight.outputs.is_deployment_workflow == 'true' || needs.pre-flight.outputs.is_merge_group == 'true' || needs.pre-flight.outputs.is_ci_workload == 'true' }}
-        run: |
-          FAILED_JOBS=$(gh run view $GITHUB_RUN_ID --json jobs --jq '[.jobs[] | select(.status == "completed" and .conclusion != "success")] | length') || echo 0
-
-          if [ "${FAILED_JOBS:-0}" -eq 0 ] || [ "$SKIPPING_IS_ALLOWED" == "true" ]; then
-              echo "✅ All previous jobs completed successfully"
-              exit 0
-          else
-              echo "❌ Found $FAILED_JOBS failed job(s)"
-              # Show which jobs failed
-              gh run view $GITHUB_RUN_ID --json jobs --jq '.jobs[] | select(.status == "completed" and .conclusion != "success") | .name'
-              exit 1
-          fi

From 2e4a5d48c9199a52167322d4c7682f75bdcd173b Mon Sep 17 00:00:00 2001
From: iTao <1416101719@qq.com>
Date: Fri, 27 Feb 2026 15:22:59 +0800
Subject: [PATCH 294/334] [dev] fix(moe): fix the bug where gate was not sliced
 when kv_head < tp_size. (#3529)

Co-authored-by: xiaotaoliu <xiaotaoliu@tencent.com>
Co-authored-by: Yuzhong Wang <yuzhongw@nvidia.com>
Co-authored-by: Zijie Yan <zijiey@nvidia.com>
---
 megatron/core/transformer/attention.py        |  8 +++++
 .../unit_tests/transformer/test_attention.py  | 33 +++++++++++++++++++
 2 files changed, 41 insertions(+)

diff --git a/megatron/core/transformer/attention.py b/megatron/core/transformer/attention.py
index d686dd9efce..b8d9ef69443 100644
--- a/megatron/core/transformer/attention.py
+++ b/megatron/core/transformer/attention.py
@@ -1490,6 +1490,14 @@ def get_query_key_value_tensors(
         if output_gate:
             # Gate [sq, b, ng, np/ng * hn] -> [sq, b, np, hn]
             gate = gate.reshape(*gate.shape[:2], -1, self.hidden_size_per_attention_head)
+            if self.config.num_query_groups < self.world_size:
+                idx = get_tensor_model_parallel_rank() % (
+                    self.world_size // self.config.num_query_groups
+                )
+                size = self.num_attention_heads_per_partition // (
+                    self.world_size // self.config.num_query_groups
+                )
+                gate = gate[:, :, idx * size : (idx + 1) * size, :]
             return query, key, value, gate
 
         return query, key, value
diff --git a/tests/unit_tests/transformer/test_attention.py b/tests/unit_tests/transformer/test_attention.py
index 38588732d6f..cb69a0b7a9e 100644
--- a/tests/unit_tests/transformer/test_attention.py
+++ b/tests/unit_tests/transformer/test_attention.py
@@ -927,6 +927,39 @@ def test_parallel_attention_correctness(
     )
 
 
+@pytest.mark.parametrize("sp", [True, False])
+@pytest.mark.parametrize("output_gate", [False, True])
+def test_parallel_attention_correctness_num_query_groups_less_than_tp_size(
+    tmp_path_dist_ckpt, sp, output_gate
+):
+    transformer_config = TransformerConfig(
+        num_layers=1,
+        hidden_size=128,
+        num_attention_heads=8,
+        num_query_groups=2,
+        normalization="RMSNorm",
+        bf16=True,
+        attention_output_gate=output_gate,
+        hidden_dropout=0.0,
+        attention_dropout=0.0,
+    )
+
+    transformer_layer_spec = get_gpt_layer_with_transformer_engine_spec()
+    atol, rtol = 1e-2, 1e-2
+
+    _test_parallel_attention_correctness(
+        transformer_config,
+        transformer_layer_spec,
+        tmp_path_dist_ckpt,
+        atol=atol,
+        rtol=rtol,
+        tp=4,
+        sp=sp,
+        seed=123,
+        sequence_length=256,
+    )
+
+
 def _torch_native_attention(query, key, value, attention_mask, sinks, scaling: float):
     """Torch native attention implementation
     This was not in the original implementation and slightly affect results;

From d0e0cf00a1ad468a40c9853a06fd93ee5c82690b Mon Sep 17 00:00:00 2001
From: Kunlun Li <94586211+kunlunl@users.noreply.github.com>
Date: Sat, 28 Feb 2026 10:58:22 +0800
Subject: [PATCH 295/334] Add unit test for THD (#3608)

---
 .../transformer/test_thd_correctness.py       | 649 ++++++++++++++++++
 1 file changed, 649 insertions(+)
 create mode 100644 tests/unit_tests/transformer/test_thd_correctness.py

diff --git a/tests/unit_tests/transformer/test_thd_correctness.py b/tests/unit_tests/transformer/test_thd_correctness.py
new file mode 100644
index 00000000000..ccf70b8a885
--- /dev/null
+++ b/tests/unit_tests/transformer/test_thd_correctness.py
@@ -0,0 +1,649 @@
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+"""
+Compare THD format against SBHD format.
+
+Test Strategy
+-------------
+1. Generate full (unsharded) data with deterministic seed on each rank.
+2. Shard inputs for both SBHD and THD formats (zigzag CP, contiguous SP).
+3. Forward pass through the same TransformerLayer.
+4. Gather outputs back to full size (with gradient support).
+5. Backward pass with format-specific grad_output handling.
+6. Compare outputs and gradients with bitwise or similarity checks.
+
+Check Levels
+------------
+- bitwise_all: B=1, forward + backward bitwise (MockCoreAttention)
+- bitwise_fwd: B>1, forward bitwise, backward similarity (MockCoreAttention,
+               THD padded to max_len so total tokens match SBHD)
+- similarity:  All parallelism configs, real TE attention, similarity checks
+"""
+
+import os
+from dataclasses import dataclass
+from typing import List
+
+import pytest
+import torch
+import torch.distributed as dist
+import torch.nn as nn
+
+from megatron.core import parallel_state
+from megatron.core.models.gpt.gpt_layer_specs import get_gpt_layer_with_transformer_engine_spec
+from megatron.core.packed_seq_params import PackedSeqParams
+from megatron.core.tensor_parallel.random import model_parallel_cuda_manual_seed
+from megatron.core.transformer.transformer_config import TransformerConfig
+from megatron.core.transformer.transformer_layer import TransformerLayer
+from tests.unit_tests.test_utilities import Utils
+
+# =============================================================================
+# Constants
+# =============================================================================
+
+SIMILARITY_THRESHOLD = 0.999
+
+
+# =============================================================================
+# Test Cases
+# =============================================================================
+
+
+@dataclass
+class TestCase:
+    """Test case specification.
+
+    check_level controls comparison strictness and attention implementation:
+        "bitwise_all" - MockCoreAttention, forward + backward bitwise (B=1)
+        "bitwise_fwd" - MockCoreAttention, forward bitwise, backward similarity
+                        (B>1, THD padded to max_len to match SBHD total tokens)
+        "similarity"  - Real TE attention, forward + backward similarity
+    """
+
+    name: str
+    hidden_size: int
+    num_heads: int
+    num_kv_heads: int
+    ffn_hidden_size: int
+    seqlens: List[int]
+    tp_size: int = 1
+    cp_size: int = 1
+    sp_enabled: bool = False
+    check_level: str = "similarity"
+
+    @property
+    def use_mock_attention(self) -> bool:
+        return self.check_level in ("bitwise_all", "bitwise_fwd")
+
+    @property
+    def forward_bitwise(self) -> bool:
+        return self.check_level in ("bitwise_all", "bitwise_fwd")
+
+    @property
+    def backward_bitwise(self) -> bool:
+        return self.check_level == "bitwise_all"
+
+    @property
+    def pad_thd_to_max(self) -> bool:
+        """Pad each THD sequence to max_len so total tokens match SBHD."""
+        return self.check_level == "bitwise_fwd"
+
+
+# fmt: off
+TEST_CASES = [
+    # -------------------------------------------------------------------------
+    # B=1: forward + backward bitwise (MockCoreAttention)
+    # -------------------------------------------------------------------------
+    #                   name              H    heads kv_h  ffn    seqlens                    tp cp sp     check_level
+    TestCase("b1_seq3891_gqa",          1024,  16,    4, 4096,   [3891],                     1, 1, False, "bitwise_all"),
+    TestCase("b1_seq16k_mha",            256,   4,    4, 1024,   [16383],                    1, 1, False, "bitwise_all"),
+
+    # -------------------------------------------------------------------------
+    # B>1 single GPU: forward bitwise, backward similarity (MockCoreAttention)
+    # THD is padded to max_len per sequence so TE GEMM sees the same M value
+    # -------------------------------------------------------------------------
+    TestCase("varlen_mixed",            1024,  16,   16, 4096,   [1987, 523, 271, 1009],     1, 1, False, "bitwise_fwd"),
+    TestCase("short_seqs",              1024,  16,   16, 4096,   [17, 31, 11],               1, 1, False, "bitwise_fwd"),
+    TestCase("b2_long_8k",               256,   4,    4, 1024,   [8191, 8192],               1, 1, False, "bitwise_fwd"),
+
+    # -------------------------------------------------------------------------
+    # TP/CP/SP: similarity checks (TE Attention)
+    # -------------------------------------------------------------------------
+    TestCase("tp2_cp4_sp",              4096,  64,    4, 12288,  [2039, 1013, 509],          2, 4, True,  "similarity"),
+    TestCase("tp2_cp2_sp_longseq",      4096,  32,    8, 14336,  [65536, 8191, 4096],        2, 2, True,  "similarity"),
+
+    # -------------------------------------------------------------------------
+    # Edge cases
+    # -------------------------------------------------------------------------
+    TestCase("short_seqs_parallel",     1024,  16,    4, 4096,   [17, 31, 11],               2, 2, True,  "similarity"),
+    TestCase("extreme_mixed",           4096,  32,    8, 14336,  [4093, 127, 257],           2, 2, True,  "similarity"),
+    TestCase("long_short_mix",          4096,  32,    8, 14336,  [65535, 512, 1024],         2, 2, True,  "similarity"),
+]
+# fmt: on
+
+
+# =============================================================================
+# Padding Helpers
+# =============================================================================
+
+
+def _round_up(value: int, divisor: int) -> int:
+    return value if divisor <= 1 else (value + divisor - 1) // divisor * divisor
+
+
+def compute_sbhd_padded_max_len(
+    seqlens: List[int], cp_size: int, tp_size: int, sp_enabled: bool
+) -> int:
+    """Padded max_len for SBHD.
+
+    Must be divisible by:
+    - cp_size * 2 for zigzag CP sharding (if cp_size > 1)
+    - tp_size for SP sharding along sequence dim (if sp_enabled)
+    """
+    divisor = 1
+    if cp_size > 1:
+        divisor *= cp_size * 2
+    if sp_enabled:
+        divisor *= tp_size
+    return _round_up(max(seqlens), divisor)
+
+
+def compute_thd_padded_seqlens(
+    seqlens: List[int], cp_size: int, tp_size: int, sp_enabled: bool, pad_to_max: bool = False
+) -> List[int]:
+    """Padded per-sequence lengths for THD.
+
+    When pad_to_max=True, each sequence is padded to max(seqlens) so that
+    total THD tokens = max_len * B, matching SBHD. This ensures TE GEMM
+    kernels see identical M dimensions for bitwise comparison.
+    """
+    cp_divisor = 2 * cp_size if cp_size > 1 else 1
+    if pad_to_max:
+        max_len = _round_up(max(seqlens), cp_divisor)
+        padded = [max_len] * len(seqlens)
+    else:
+        padded = [_round_up(sl, cp_divisor) for sl in seqlens]
+    if sp_enabled:
+        remainder = sum(padded) % tp_size
+        if remainder > 0:
+            padded[-1] += tp_size - remainder
+    return padded
+
+
+# =============================================================================
+# PackedSeqParams Helper
+# =============================================================================
+
+
+def make_packed_seq_params(
+    seqlens: List[int],
+    cp_size: int = 1,
+    tp_size: int = 1,
+    sp_enabled: bool = False,
+    pad_to_max: bool = False,
+) -> PackedSeqParams:
+    """Create PackedSeqParams with cu_seqlens and cu_seqlens_padded."""
+
+    def to_cu_seqlens(lens):
+        cu = torch.zeros(len(lens) + 1, dtype=torch.int32)
+        for i, l in enumerate(lens):
+            cu[i + 1] = cu[i] + l
+        return cu.cuda()
+
+    padded = compute_thd_padded_seqlens(seqlens, cp_size, tp_size, sp_enabled, pad_to_max)
+    return PackedSeqParams(
+        cu_seqlens_q=to_cu_seqlens(seqlens),
+        cu_seqlens_kv=to_cu_seqlens(seqlens),
+        cu_seqlens_q_padded=to_cu_seqlens(padded),
+        cu_seqlens_kv_padded=to_cu_seqlens(padded),
+        max_seqlen_q=max(padded),
+        max_seqlen_kv=max(padded),
+        qkv_format='thd',
+    )
+
+
+# =============================================================================
+# Mock Core Attention (for bitwise tests)
+# =============================================================================
+
+
+class MockCoreAttention(nn.Module):
+    """Per-sequence unfused causal attention for bitwise comparison."""
+
+    def __init__(
+        self,
+        config,
+        layer_number,
+        attn_mask_type,
+        attention_type,
+        attention_dropout=None,
+        softmax_scale=None,
+        cp_comm_type=None,
+        pg_collection=None,
+    ):
+        super().__init__()
+        self.num_q_heads = config.num_attention_heads
+        self.num_kv_heads = config.num_query_groups
+        self.head_dim = config.hidden_size // config.num_attention_heads
+        self.hidden_size = config.hidden_size
+        self.scale = 1.0 / (self.head_dim**0.5)
+        self.num_rep = self.num_q_heads // self.num_kv_heads
+
+    def _repeat_kv(self, x):
+        """Repeat KV heads for GQA. [S, Hkv, D] -> [S, Hq, D]."""
+        if self.num_rep == 1:
+            return x
+        S, Hkv, D = x.shape
+        return x.unsqueeze(2).expand(S, Hkv, self.num_rep, D).reshape(S, self.num_q_heads, D)
+
+    def _attention_single_seq(self, q, k, v):
+        """Causal attention for one sequence."""
+        S = q.shape[0]
+        k, v = self._repeat_kv(k), self._repeat_kv(v)
+        q, k, v = (x.transpose(0, 1).contiguous() for x in (q, k, v))
+        q32, k32, v32 = q.float(), k.float(), v.float()
+        scores = torch.matmul(q32, k32.transpose(-2, -1)) * self.scale
+        mask = torch.triu(torch.ones(S, S, dtype=torch.bool, device=q.device), diagonal=1)
+        scores.masked_fill_(mask, float('-inf'))
+        attn = torch.softmax(scores, dim=-1)
+        out = torch.matmul(attn, v32)
+        return out.transpose(0, 1).to(q.dtype).contiguous()
+
+    def forward(
+        self,
+        query,
+        key,
+        value,
+        attention_mask=None,
+        attn_mask_type=None,
+        attention_bias=None,
+        packed_seq_params=None,
+    ):
+        if packed_seq_params is not None:
+            # THD: [T, 1, H, D] -> [T, H, D]
+            q = query.squeeze(1) if query.dim() == 4 else query
+            k = key.squeeze(1) if key.dim() == 4 else key
+            v = value.squeeze(1) if value.dim() == 4 else value
+
+            cu_valid = packed_seq_params.cu_seqlens_q.cpu().tolist()
+            cu_padded = packed_seq_params.cu_seqlens_q_padded.cpu().tolist()
+            num_seqs = len(cu_valid) - 1
+
+            outputs = []
+            for i in range(num_seqs):
+                out_seq = self._attention_single_seq(
+                    q[cu_padded[i] : cu_padded[i + 1]],
+                    k[cu_padded[i] : cu_padded[i + 1]],
+                    v[cu_padded[i] : cu_padded[i + 1]],
+                )
+                outputs.append(out_seq)
+
+            return torch.cat(outputs, dim=0)  # [T_padded, Hq, D]
+
+        else:
+            # SBHD: [S, B, H, D]
+            S, B = query.shape[:2]
+            outputs = [
+                self._attention_single_seq(query[:, b], key[:, b], value[:, b]) for b in range(B)
+            ]
+            return torch.stack(outputs, dim=1).reshape(S, B, self.hidden_size)
+
+
+# =============================================================================
+# Layer Builder
+# =============================================================================
+
+
+def build_gpt_layer(
+    hidden_size: int,
+    num_heads: int,
+    num_kv_heads: int,
+    ffn_hidden_size: int,
+    tp_size: int = 1,
+    cp_size: int = 1,
+    sp_enabled: bool = False,
+    use_mock_attention: bool = False,
+    deterministic: bool = False,
+) -> TransformerLayer:
+    """Build GPT TransformerLayer, optionally with MockCoreAttention."""
+    config = TransformerConfig(
+        num_layers=1,
+        hidden_size=hidden_size,
+        ffn_hidden_size=ffn_hidden_size,
+        num_attention_heads=num_heads,
+        num_query_groups=num_kv_heads,
+        bf16=True,
+        params_dtype=torch.bfloat16,
+        pipeline_dtype=torch.bfloat16,
+        autocast_dtype=torch.bfloat16,
+        hidden_dropout=0.0,
+        attention_dropout=0.0,
+        tensor_model_parallel_size=tp_size,
+        context_parallel_size=cp_size,
+        sequence_parallel=sp_enabled,
+        cp_comm_type="p2p" if cp_size > 1 else None,
+        deterministic_mode=deterministic,
+    )
+    spec = get_gpt_layer_with_transformer_engine_spec()
+    if use_mock_attention:
+        spec.submodules.self_attention.submodules.core_attention = MockCoreAttention
+    layer = TransformerLayer(config, spec.submodules)
+    layer.cuda()
+    return layer
+
+
+# =============================================================================
+# Sharding: full -> local
+# =============================================================================
+
+
+def _zigzag_split(tensor, cp_rank, cp_size, dim=0):
+    """Split tensor along dim using zigzag pattern for CP.
+
+    For cp_size=2: rank0 gets chunks [0,3], rank1 gets chunks [1,2]
+    For cp_size=4: rank0 gets [0,7], rank1 gets [1,6], rank2 gets [2,5], rank3 gets [3,4]
+    """
+    if cp_size <= 1:
+        return tensor
+    chunk_size = tensor.shape[dim] // (2 * cp_size)
+    i0, i1 = cp_rank, 2 * cp_size - cp_rank - 1
+    chunk0 = tensor.narrow(dim, i0 * chunk_size, chunk_size)
+    chunk1 = tensor.narrow(dim, i1 * chunk_size, chunk_size)
+    return torch.cat([chunk0, chunk1], dim=dim)
+
+
+def shard_sbhd(tensor, cp_rank, cp_size, tp_rank, tp_size, sp_enabled):
+    """Shard SBHD tensor: zigzag CP, then contiguous SP."""
+    out = _zigzag_split(tensor, cp_rank, cp_size)
+    if sp_enabled:
+        seg = out.shape[0] // tp_size
+        out = out.narrow(0, tp_rank * seg, seg)
+    return out.contiguous()
+
+
+def shard_thd(
+    seq_data_list, seqlens, cp_rank, cp_size, tp_rank, tp_size, sp_enabled, H, pad_to_max=False
+):
+    """Shard per-sequence data into local THD [local_T, 1, H]."""
+    padded = compute_thd_padded_seqlens(seqlens, cp_size, tp_size, sp_enabled, pad_to_max)
+
+    chunks = []
+    for data, sl, psl in zip(seq_data_list, seqlens, padded):
+        if psl > sl:
+            data = torch.cat([data, torch.zeros(psl - sl, H, dtype=data.dtype, device=data.device)])
+        chunks.append(_zigzag_split(data, cp_rank, cp_size))
+
+    packed = torch.cat(chunks, dim=0)
+    if sp_enabled:
+        seg = packed.shape[0] // tp_size
+        packed = packed[tp_rank * seg : (tp_rank + 1) * seg]
+    return packed.unsqueeze(1).contiguous()
+
+
+# =============================================================================
+# Gathering: local -> full (with backward support)
+# =============================================================================
+
+
+def _zigzag_merge(chunks: List[torch.Tensor], cp_size: int) -> torch.Tensor:
+    """Reconstruct full sequence from per-rank zigzag chunks."""
+    half = chunks[0].shape[0] // 2
+    parts = [None] * (2 * cp_size)
+    for r in range(cp_size):
+        parts[r] = chunks[r][:half]
+        parts[2 * cp_size - r - 1] = chunks[r][half:]
+    return torch.cat(parts, dim=0)
+
+
+def _strip_thd_padding(tensor, seqlens, padded_seqlens):
+    """Remove per-sequence padding from THD tensor, keeping autograd."""
+    total_valid = sum(seqlens)
+    if tensor.shape[0] <= total_valid:
+        return tensor
+    offset, seqs = 0, []
+    for sl, psl in zip(seqlens, padded_seqlens):
+        seqs.append(tensor[offset : offset + sl])
+        offset += psl
+    return torch.cat(seqs, dim=0)
+
+
+class _GatherSBHD(torch.autograd.Function):
+    """Gather SBHD outputs from all ranks with gradient support."""
+
+    @staticmethod
+    def forward(ctx, local, cp_size, tp_size, sp_enabled):
+        ctx.cp_size, ctx.tp_size, ctx.sp_enabled = cp_size, tp_size, sp_enabled
+        ctx.cp_rank = parallel_state.get_context_parallel_rank() if cp_size > 1 else 0
+        ctx.tp_rank = parallel_state.get_tensor_model_parallel_rank()
+
+        out = local
+        if sp_enabled:
+            gathered = [torch.empty_like(out) for _ in range(tp_size)]
+            dist.all_gather(
+                gathered, out.contiguous(), group=parallel_state.get_tensor_model_parallel_group()
+            )
+            out = torch.cat(gathered, dim=0)
+        if cp_size > 1:
+            gathered = [torch.empty_like(out) for _ in range(cp_size)]
+            dist.all_gather(
+                gathered, out.contiguous(), group=parallel_state.get_context_parallel_group()
+            )
+            out = _zigzag_merge(gathered, cp_size)
+        return out
+
+    @staticmethod
+    def backward(ctx, grad):
+        out = grad
+        if ctx.cp_size > 1:
+            out = _zigzag_split(out, ctx.cp_rank, ctx.cp_size)
+        if ctx.sp_enabled:
+            seg = out.shape[0] // ctx.tp_size
+            out = out[ctx.tp_rank * seg : (ctx.tp_rank + 1) * seg]
+        return out.contiguous(), None, None, None
+
+
+class _GatherTHD(torch.autograd.Function):
+    """Gather THD outputs from all ranks with gradient support."""
+
+    @staticmethod
+    def forward(ctx, local, seqlens, cp_size, tp_size, sp_enabled, H, pad_to_max):
+        ctx.seqlens, ctx.cp_size, ctx.tp_size, ctx.sp_enabled, ctx.H = (
+            seqlens,
+            cp_size,
+            tp_size,
+            sp_enabled,
+            H,
+        )
+        ctx.cp_rank = parallel_state.get_context_parallel_rank() if cp_size > 1 else 0
+        ctx.tp_rank = parallel_state.get_tensor_model_parallel_rank()
+        ctx.padded = compute_thd_padded_seqlens(seqlens, cp_size, tp_size, sp_enabled, pad_to_max)
+
+        out = local
+        if sp_enabled:
+            gathered = [torch.empty_like(out) for _ in range(tp_size)]
+            dist.all_gather(
+                gathered, out.contiguous(), group=parallel_state.get_tensor_model_parallel_group()
+            )
+            out = torch.cat(gathered, dim=0)
+
+        if cp_size > 1:
+            cp_group = parallel_state.get_context_parallel_group()
+            local_lens = [p // cp_size for p in ctx.padded]
+            offset, seqs = 0, []
+            for i, ll in enumerate(local_lens):
+                chunk = out[offset : offset + ll]
+                gathered = [torch.empty_like(chunk) for _ in range(cp_size)]
+                dist.all_gather(gathered, chunk.contiguous(), group=cp_group)
+                seqs.append(_zigzag_merge(gathered, cp_size)[: seqlens[i]])
+                offset += ll
+            out = torch.cat(seqs, dim=0)
+        else:
+            out = _strip_thd_padding(out, seqlens, ctx.padded)
+        return out
+
+    @staticmethod
+    def backward(ctx, grad):
+        offset, chunks = 0, []
+        for sl, psl in zip(ctx.seqlens, ctx.padded):
+            g = grad[offset : offset + sl, 0, :]
+            if psl > sl:
+                g = torch.cat([g, torch.zeros(psl - sl, ctx.H, dtype=g.dtype, device=g.device)])
+            chunks.append(_zigzag_split(g, ctx.cp_rank, ctx.cp_size))
+            offset += sl
+
+        packed = torch.cat(chunks, dim=0)
+        if ctx.sp_enabled:
+            seg = packed.shape[0] // ctx.tp_size
+            packed = packed[ctx.tp_rank * seg : (ctx.tp_rank + 1) * seg]
+        return packed.unsqueeze(1).contiguous(), None, None, None, None, None, None
+
+
+def gather_sbhd(local, cp_size, tp_size, sp_enabled):
+    if cp_size == 1 and not sp_enabled:
+        return local
+    return _GatherSBHD.apply(local, cp_size, tp_size, sp_enabled)
+
+
+def gather_thd(local, seqlens, cp_size, tp_size, sp_enabled, H, pad_to_max=False):
+    return _GatherTHD.apply(local, seqlens, cp_size, tp_size, sp_enabled, H, pad_to_max)
+
+
+# =============================================================================
+# Comparison Helpers
+# =============================================================================
+
+
+def _cosine_sim(a, b):
+    return torch.nn.functional.cosine_similarity(
+        a.flatten().float().unsqueeze(0), b.flatten().float().unsqueeze(0)
+    ).item()
+
+
+def _tensor_sim(a, b):
+    a, b = a.double(), b.double()
+    denom = (a * a + b * b).sum()
+    return (2.0 * (a * b).sum() / denom).item() if denom else 1.0
+
+
+def assert_close(name, a, b, bitwise):
+    """Assert tensors match (bitwise or similarity)."""
+    if bitwise:
+        assert torch.equal(
+            a, b
+        ), f"{name}: NOT bitwise equal, max diff = {(a-b).abs().max().item()}"
+    else:
+        cs, ts = _cosine_sim(a, b), _tensor_sim(a, b)
+        assert cs > SIMILARITY_THRESHOLD, f"{name}: cosine sim = {cs:.6f} < {SIMILARITY_THRESHOLD}"
+        assert ts > SIMILARITY_THRESHOLD, f"{name}: tensor sim = {ts:.6f} < {SIMILARITY_THRESHOLD}"
+
+
+# =============================================================================
+# Test Function
+# =============================================================================
+
+
+@pytest.mark.parametrize("tc", TEST_CASES, ids=lambda tc: tc.name)
+def test_thd_format(tc: TestCase):
+    """Compare THD vs SBHD format outputs and gradients."""
+    H, seqlens = tc.hidden_size, tc.seqlens
+    tp_size, cp_size, sp = tc.tp_size, tc.cp_size, tc.sp_enabled
+    B = len(seqlens)
+    pad_to_max = tc.pad_thd_to_max
+
+    # Deterministic mode for bitwise tests
+    if tc.forward_bitwise or tc.backward_bitwise:
+        os.environ["NVTE_ALLOW_NONDETERMINISTIC_ALGO"] = "0"
+        torch.use_deterministic_algorithms(True, warn_only=True)
+
+    Utils.initialize_model_parallel(
+        tensor_model_parallel_size=tp_size, context_parallel_size=cp_size
+    )
+    model_parallel_cuda_manual_seed(42)
+
+    deterministic = tc.forward_bitwise or tc.backward_bitwise
+    layer = build_gpt_layer(
+        H,
+        tc.num_heads,
+        tc.num_kv_heads,
+        tc.ffn_hidden_size,
+        tp_size,
+        cp_size,
+        sp,
+        tc.use_mock_attention,
+        deterministic,
+    )
+
+    cp_rank = parallel_state.get_context_parallel_rank()
+    tp_rank = parallel_state.get_tensor_model_parallel_rank()
+    dp_rank = parallel_state.get_data_parallel_rank()
+
+    # Generate data
+    torch.manual_seed(42 + dp_rank)
+    seq_data = [torch.randn(sl, H, dtype=torch.bfloat16).cuda() for sl in seqlens]
+    torch.manual_seed(142 + dp_rank)
+    grad_per_seq = [torch.randn(sl, H, dtype=torch.bfloat16).cuda() for sl in seqlens]
+
+    # Prepare SBHD
+    max_len = compute_sbhd_padded_max_len(seqlens, cp_size, tp_size, sp)
+    full_sbhd = torch.zeros(max_len, B, H, dtype=torch.bfloat16, device='cuda')
+    grad_sbhd = torch.zeros_like(full_sbhd)
+    for b, sl in enumerate(seqlens):
+        full_sbhd[:sl, b] = seq_data[b]
+        grad_sbhd[:sl, b] = grad_per_seq[b]
+
+    # Prepare THD grad (valid tokens only, gather_thd backward handles re-padding)
+    grad_thd = torch.cat(grad_per_seq, dim=0).unsqueeze(1)
+
+    # --- SBHD forward/backward ---
+    local_sbhd = shard_sbhd(full_sbhd, cp_rank, cp_size, tp_rank, tp_size, sp)
+    input_sbhd = local_sbhd.detach().clone().requires_grad_(True)
+    out_sbhd, _ = layer(hidden_states=input_sbhd)
+    gathered_sbhd = gather_sbhd(out_sbhd, cp_size, tp_size, sp)
+    gathered_sbhd.backward(grad_sbhd)
+    sbhd_grads = {n: p.grad.clone() for n, p in layer.named_parameters()}
+    layer.zero_grad()
+
+    # --- THD forward/backward ---
+    local_thd = shard_thd(seq_data, seqlens, cp_rank, cp_size, tp_rank, tp_size, sp, H, pad_to_max)
+    packed_seq_params = make_packed_seq_params(seqlens, cp_size, tp_size, sp, pad_to_max)
+    input_thd = local_thd.detach().clone().requires_grad_(True)
+    out_thd, _ = layer(hidden_states=input_thd, packed_seq_params=packed_seq_params)
+    gathered_thd = gather_thd(out_thd, seqlens, cp_size, tp_size, sp, H, pad_to_max)
+    gathered_thd.backward(grad_thd)
+    thd_grads = {n: p.grad.clone() for n, p in layer.named_parameters()}
+
+    # --- Gradient sync ---
+    # Reduce across DP*CP group (each DP/CP rank sees different data/tokens)
+    dp_cp_group = parallel_state.get_data_parallel_group(with_context_parallel=True)
+    for n in sbhd_grads:
+        dist.all_reduce(sbhd_grads[n], group=dp_cp_group)
+        dist.all_reduce(thd_grads[n], group=dp_cp_group)
+    # SP params also need reduction across TP group
+    if sp:
+        tp_group = parallel_state.get_tensor_model_parallel_group()
+        for n, p in layer.named_parameters():
+            if getattr(p, "sequence_parallel", False):
+                dist.all_reduce(sbhd_grads[n], group=tp_group)
+                dist.all_reduce(thd_grads[n], group=tp_group)
+
+    # --- Forward comparison ---
+    offset = 0
+    for b, sl in enumerate(seqlens):
+        assert_close(
+            f"seq[{b}] output",
+            gathered_sbhd[:sl, b].detach(),
+            gathered_thd[offset : offset + sl, 0].detach(),
+            tc.forward_bitwise,
+        )
+        offset += sl
+
+    # --- Backward comparison ---
+    for n in sbhd_grads:
+        if n in thd_grads:
+            assert_close(f"grad[{n}]", sbhd_grads[n], thd_grads[n], tc.backward_bitwise)
+
+    # --- Cleanup ---
+    Utils.destroy_model_parallel()
+    if tc.forward_bitwise or tc.backward_bitwise:
+        torch.use_deterministic_algorithms(False)
+        os.environ.pop("NVTE_ALLOW_NONDETERMINISTIC_ALGO", None)

From bc9298cb5d4a5505d69f5e2b45f510d1b9a3b8f2 Mon Sep 17 00:00:00 2001
From: "Dennis(Zhenhuan) Liu" <denliu@nvidia.com>
Date: Mon, 2 Mar 2026 16:33:59 +0800
Subject: [PATCH 296/334] [Dev] feat(checkpoint): zero-copy storage sharing in
 CheckpointWithoutOutput (#3641)

Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 megatron/core/tensor_parallel/random.py       | 66 +++++++++++++++++--
 .../unit_tests/tensor_parallel/test_random.py | 60 ++++++++++++++++-
 2 files changed, 119 insertions(+), 7 deletions(-)

diff --git a/megatron/core/tensor_parallel/random.py b/megatron/core/tensor_parallel/random.py
index dd8be947834..b6932607f2e 100644
--- a/megatron/core/tensor_parallel/random.py
+++ b/megatron/core/tensor_parallel/random.py
@@ -14,6 +14,7 @@
 from torch.cuda import _lazy_call, _lazy_init
 from torch.cuda import device as device_ctx_manager
 from torch.utils.checkpoint import detach_variable
+from torch.utils.cpp_extension import load_inline
 from typing_extensions import TypeVarTuple, Unpack
 
 from megatron.core.parallel_state import (
@@ -23,6 +24,57 @@
 )
 from megatron.core.utils import is_te_min_version, safely_set_viewless_tensor_data
 
+# ---------------------------------------------------------------------------
+# C++ extension: zero-copy storage sharing for CheckpointWithoutOutput
+# ---------------------------------------------------------------------------
+# Makes dst's UntypedStorage point to src's data WITHOUT copying bytes.
+# Holds a refcounted reference to src's StorageImpl so the memory stays alive.
+# Operates below the Tensor / autograd layer → no version-counter bump,
+# and ALL TensorImpls that reference dst's StorageImpl (including views
+# created by reshape / split / etc. inside TE GroupedLinear) see the data.
+# ---------------------------------------------------------------------------
+
+_SHARE_STORAGE_SRC = r"""
+#include <torch/extension.h>
+
+void share_storage(at::Tensor dst, at::Tensor src) {
+    auto* dst_impl = dst.storage().unsafeGetStorageImpl();
+
+    // Copy src's c10::Storage (increments StorageImpl refcount).
+    auto* src_storage_ref = new c10::Storage(src.storage());
+
+    void*       data   = src_storage_ref->data_ptr().get();
+    size_t      nbytes = src_storage_ref->nbytes();
+    c10::Device device = src_storage_ref->device();
+
+    // Build a DataPtr whose deleter releases our StorageImpl reference.
+    c10::DataPtr shared(
+        data,
+        static_cast<void*>(src_storage_ref),
+        [](void* ctx) { delete static_cast<c10::Storage*>(ctx); },
+        device);
+
+    dst_impl->set_data_ptr(std::move(shared));
+    dst_impl->set_nbytes(nbytes);
+}
+"""
+
+_share_storage_ext = None
+
+
+def _get_share_storage():
+    """Lazily compile & cache the share_storage extension."""
+    global _share_storage_ext
+    if _share_storage_ext is None:
+        _share_storage_ext = load_inline(
+            name="share_storage_ext",
+            cpp_sources=_SHARE_STORAGE_SRC,
+            functions=["share_storage"],
+            verbose=False,
+        )
+    return _share_storage_ext.share_storage
+
+
 from .utils import gather_split_1d_tensor, split_tensor_into_1d_equal_chunks
 
 try:
@@ -728,12 +780,14 @@ def detach(t):
         if isinstance(outputs, torch.Tensor):
             outputs = (outputs,)
 
-        # restore the recomputed memory without changing the metadata
-        with torch.no_grad():
-            for output, recomputation_output in zip(self.outputs, outputs):
-                output_size = recomputation_output.untyped_storage().size()
-                output.untyped_storage().resize_(output_size)
-                output.untyped_storage().copy_(recomputation_output.untyped_storage())
+        # Zero-copy: make output's StorageImpl point to recomputation_output's data.
+        # This operates at the UntypedStorage level (below TensorImpl), so:
+        #   - ALL views / reshapes that reference output's StorageImpl see the data
+        #     (e.g. TE GroupedLinear's inp.reshape() + torch.split() saved for backward)
+        #   - No tensor version-counter bump (no autograd complaint)
+        share_storage = _get_share_storage()
+        for output, recomputation_output in zip(self.outputs, outputs):
+            share_storage(output, recomputation_output)
 
         self.ctx.outputs = outputs
         self.ctx.inputs = inputs
diff --git a/tests/unit_tests/tensor_parallel/test_random.py b/tests/unit_tests/tensor_parallel/test_random.py
index a15ad83cb90..4fa79733d55 100644
--- a/tests/unit_tests/tensor_parallel/test_random.py
+++ b/tests/unit_tests/tensor_parallel/test_random.py
@@ -227,4 +227,62 @@ def checkpoint_forward(input):
     output2.backward(torch.ones((4, 4)), retain_graph=True)
     assert torch.equal(input1.grad, input2.grad)
 
-    Utils.destroy_model_parallel()
+
+class _ViewSavingLinear(torch.autograd.Function):
+    """Saves view tensors in forward to mimic TE GroupedLinear-style backward inputs."""
+
+    @staticmethod
+    def forward(ctx, inp, weight):
+        inp_2d = inp.reshape(-1, inp.shape[-1])
+        inputmats = torch.tensor_split(inp_2d, 2, dim=0)
+        ctx.save_for_backward(*inputmats, weight)
+        ctx.input_shape = inp.shape
+        out_2d = inp_2d.matmul(weight.t())
+        return out_2d.reshape(*inp.shape[:-1], weight.shape[0])
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        *inputmats, weight = ctx.saved_tensors
+        for inputmat in inputmats:
+            if inputmat.numel() > 0 and inputmat.untyped_storage().size() == 0:
+                raise RuntimeError("Saved view tensor points to an empty storage.")
+
+        inp_2d = torch.cat(inputmats, dim=0)
+        grad_output_2d = grad_output.reshape(-1, grad_output.shape[-1])
+        grad_input_2d = grad_output_2d.matmul(weight)
+        grad_weight = grad_output_2d.t().matmul(inp_2d)
+        grad_input = grad_input_2d.reshape(ctx.input_shape)
+        return grad_input, grad_weight
+
+
+def test_checkpoint_without_output_view_sharing_regression():
+    def normal_forward(input_, weight):
+        x = torch.nn.functional.gelu(input_)
+        return _ViewSavingLinear.apply(x, weight)
+
+    def checkpoint_forward(input_, weight):
+        checkpoint = CheckpointWithoutOutput()
+        x = checkpoint.checkpoint(torch.nn.functional.gelu, input_)
+        y = _ViewSavingLinear.apply(x, weight)
+        checkpoint.discard_output_and_register_recompute(y)
+        return y
+
+    Utils.initialize_model_parallel()
+    try:
+        input1 = torch.randn((3, 2, 8), requires_grad=True)
+        weight1 = torch.randn((6, 8), requires_grad=True)
+
+        input2 = input1.detach().clone().requires_grad_(True)
+        weight2 = weight1.detach().clone().requires_grad_(True)
+
+        output1 = normal_forward(input1, weight1)
+        output2 = checkpoint_forward(input2, weight2)
+        assert torch.allclose(output1, output2)
+
+        grad = torch.randn_like(output1)
+        output1.backward(grad, retain_graph=True)
+        output2.backward(grad, retain_graph=True)
+        assert torch.allclose(input1.grad, input2.grad)
+        assert torch.allclose(weight1.grad, weight2.grad)
+    finally:
+        Utils.destroy_model_parallel()

From 5c613abf4e598b6d6ecf7473a4acc8f575eee4d9 Mon Sep 17 00:00:00 2001
From: Tailai Ma <58548582+xiaoyao0115@users.noreply.github.com>
Date: Tue, 3 Mar 2026 10:06:06 +0800
Subject: [PATCH 297/334] [Dev] Add E2E support for THD format (#2924)

Signed-off-by: xiaoyao0115 <1804647152@qq.com>
Signed-off-by: tailaim <tailaim@nvidia.com>
Co-authored-by: kunlunl <kunlunl@nvidia.com>
---
 megatron/core/datasets/data_schedule.py       | 557 +++++++++++++++++-
 megatron/core/datasets/data_schedule_utils.py | 529 +++++++++++++++++
 megatron/core/datasets/gpt_dataset.py         |   3 +
 megatron/core/datasets/readme.md              |  62 ++
 .../core/extensions/transformer_engine.py     |  21 +
 megatron/core/model_parallel_config.py        |   8 +-
 .../core/transformer/transformer_config.py    |  34 ++
 megatron/training/arguments.py                |  16 +-
 megatron/training/datasets/sft_dataset.py     | 248 +++++++-
 megatron/training/training.py                 | 160 +++--
 pretrain_gpt.py                               |  17 +-
 .../unit_tests/models/test_mamba_moe_model.py |   1 +
 tests/unit_tests/test_sequence_packing.py     | 479 +++++++++++++++
 13 files changed, 2066 insertions(+), 69 deletions(-)
 create mode 100644 megatron/core/datasets/data_schedule_utils.py
 create mode 100644 tests/unit_tests/test_sequence_packing.py

diff --git a/megatron/core/datasets/data_schedule.py b/megatron/core/datasets/data_schedule.py
index 0f016473b6a..00591e4c24d 100644
--- a/megatron/core/datasets/data_schedule.py
+++ b/megatron/core/datasets/data_schedule.py
@@ -1,10 +1,21 @@
 # Copyright (c) 2025 NVIDIA CORPORATION.  All rights reserved.
 
-from typing import Any, List, Optional
+from typing import Any, Dict, List, Optional, Type
 
 import torch
 
 from megatron.core import parallel_state
+from megatron.core.datasets.data_schedule_utils import (
+    broadcast_scalars,
+    broadcast_tensor,
+    broadcast_to_pp_group,
+    build_packed_microbatches,
+    create_data_iterator,
+    get_batch_and_global_seqlens,
+    get_cp_slice_for_thd,
+    reroute_samples_to_dcp_ranks,
+)
+from megatron.core.packed_seq_params import PackedSeqParams
 from megatron.core.pipeline_parallel.hybrid_cp_schedule import BalancedCPScheduler
 from megatron.core.process_groups_config import ProcessGroupCollection
 
@@ -299,3 +310,547 @@ def __next__(self) -> Any:
             batch, global_ids_this_rank, global_id_seqlens, sample_id_groups, offsets
         )
         return samples_this_rank_with_id, sample_id_groups
+
+
+class BasePackingScheduler:
+    """Base class for sequence packing schedulers."""
+
+    def __init__(
+        self,
+        max_seqlen_per_dp_cp_rank: int,
+        cp_size: int,
+        dp_size: int,
+        microbatch_group_size_per_vp_stage: Optional[int],
+    ):
+        """
+        Args:
+            max_seqlen_per_dp_cp_rank: The maximum sequence length per DPxCP rank.
+            cp_size: The context parallel size.
+            dp_size: The data parallel size.
+            microbatch_group_size_per_vp_stage: The microbatch group size per virtual
+            pipeline stage, only used when enabling VPP, otherwise None.
+        """
+        self.max_seqlen_per_dp_cp_rank = max_seqlen_per_dp_cp_rank
+        self.cp_size = cp_size
+        self.dp_size = dp_size
+        self.microbatch_group_size_per_vp_stage = microbatch_group_size_per_vp_stage
+
+    def get_required_sample_keys(self):
+        """Return the required key of each batch."""
+        raise NotImplementedError
+
+    def get_groups_and_subsamples(self, sample_id_seqlens):
+        """schedule the samples into groups"""
+        raise NotImplementedError
+
+    def run(
+        self,
+        data_iterator,
+        num_microbatches,
+        dp_group,
+        tp_group,
+        pp_group,
+        dp_cp_group,
+        dev,
+        config,
+    ):
+        """
+        Run the scheduler and return the new data_iterator.
+
+        Args:
+            data_iterator: The data iterator.
+            num_microbatches: The number of microbatches to fetch.
+            dp_group: Data parallel process group.
+            tp_group: Tensor parallel process group.
+            pp_group: Pipeline parallel process group.
+            dp_cp_group: Data parallel + context parallel process group.
+            dev: CUDA device.
+            config: Model parallel config.
+
+        Returns:
+            new_data_iterator: The new data iterator (or list for VPP).
+            num_micro_batches: Number of micro batches after scheduling.
+            seqlen_sum_this_global_batch: Total tokens for FLOPs calculation.
+            seqlen_squared_sum_this_global_batch: Sum of squared seqlens for FLOPs.
+        """
+        raise NotImplementedError
+
+
+class DpBalancedScheduler(BasePackingScheduler):
+    """Packs sequences in their original order until reaching the max limit of sequence length."""
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.max_seq_len_all_ranks = self.max_seqlen_per_dp_cp_rank * self.cp_size
+
+    def get_required_sample_keys(self):
+        """Return the required key of each batch."""
+        return [
+            "tokens",
+            "labels",
+            "loss_mask",
+            "position_ids",
+            "original_seq_len",  # Length of the original sequence length, should be a gpu tensor.
+            "padded_seq_len",  # Length of the padded sequence length, should be a gpu tensor.
+        ]
+
+    def get_groups_and_subsamples(self, sample_id_seqlens):
+        """
+        Packs sequences in their original order until reaching the max limit of sequence length.
+        """
+        sample_id_groups = []
+        packed_id_groups = []
+        sum_seqlen = 0
+        single_microbatch = []
+
+        for i in range(len(sample_id_seqlens)):
+            if sum_seqlen + sample_id_seqlens[i][1] <= self.max_seq_len_all_ranks:
+                single_microbatch.append(i)
+                sum_seqlen += sample_id_seqlens[i][1]
+            else:
+                packed_id_groups.append(single_microbatch)
+                single_microbatch = [i]
+                sum_seqlen = sample_id_seqlens[i][1]
+        if len(single_microbatch) > 0:
+            packed_id_groups.append(single_microbatch)
+
+        # we want the number of packed sequences to be multiple of dp_size
+        # so we move few samples from previous microbatch
+        # to the end of the microbatches if needed
+        num_packed_sequence = len(packed_id_groups)
+
+        # when enabling vpp, we want the number of packed sequences to be
+        # multiple of dp_size * microbatch_group_size_per_vp_stage
+        multiple = self.dp_size * (
+            self.microbatch_group_size_per_vp_stage
+            if self.microbatch_group_size_per_vp_stage is not None
+            else 1
+        )
+        if num_packed_sequence % multiple != 0:
+            remainder = num_packed_sequence % multiple
+            num_to_move = multiple - remainder
+            i = num_packed_sequence - 1
+            while num_to_move > 0:
+                assert i >= 0, "Not enough samples to move"
+                if len(packed_id_groups[i]) > 1:
+                    seq_id = packed_id_groups[i].pop()
+                    packed_id_groups.append([seq_id])
+                    num_to_move -= 1
+                else:
+                    i -= 1
+
+        num_micro_batches = int(len(packed_id_groups) / self.dp_size)
+        for i in range(num_micro_batches):
+            sample_id_groups.append([])
+            for j in range(self.cp_size * self.dp_size):
+                seq_id = int(i * self.dp_size + j / self.cp_size)
+                sample_id_groups[i].append(packed_id_groups[seq_id])
+        return sample_id_groups
+
+    def run(
+        self,
+        data_iterator,
+        num_microbatches: int,
+        dp_group,
+        tp_group,
+        pp_group,
+        dp_cp_group,
+        dev: torch.device,
+        config,
+    ):
+        """
+        Run the complete scheduling pipeline.
+
+        Steps:
+            1. Fetch batches and gather global sequence lengths
+            2. Check required sample keys
+            3. Schedule samples into groups
+            4. Reroute samples to DCP ranks
+            5. Build packed microbatches
+            6. Calculate FLOPs info
+            7. Broadcast to PP group (for middle PP stages)
+            8. Broadcast to TP group (for non-TP-0 ranks)
+            9. Handle VPP if enabled
+
+        Args:
+            data_iterator: The data iterator.
+            num_microbatches: The number of microbatches to fetch.
+            dp_group: Data parallel process group.
+            tp_group: Tensor parallel process group.
+            pp_group: Pipeline parallel process group.
+            dp_cp_group: Data parallel + context parallel process group.
+            dev: CUDA device.
+            config: Model parallel config.
+
+        Returns:
+            new_data_iterator: The new data iterator (or list for VPP).
+            num_micro_batches: Number of micro batches after scheduling.
+            seqlen_sum_this_global_batch: Total tokens for FLOPs calculation.
+            seqlen_squared_sum_this_global_batch: Sum of squared seqlens for FLOPs.
+        """
+
+        total_dcp_gpus = dp_cp_group.size()
+
+        # Handle VPP: extract the correct data_iterator for this PP stage.
+        # When VPP is enabled, data_iterator is a list with one entry per VPP stage.
+        # We only need one data_iterator to run the schedule (all VPP stages on the
+        # same PP rank share the same underlying dataset), so pick the first non-None.
+        # Record which VPP stages had data so create_data_iterator knows which ones
+        # need full samples vs metadata only.
+        vpp_has_data = None
+        if (
+            config.virtual_pipeline_model_parallel_size is not None
+            and config.virtual_pipeline_model_parallel_size > 1
+        ):
+            assert len(data_iterator) == config.virtual_pipeline_model_parallel_size
+            vpp_has_data = [di is not None for di in data_iterator]
+            extracted = None
+            for di in data_iterator:
+                if di is not None:
+                    extracted = di
+                    break
+            data_iterator = extracted
+
+        # data_iterator is not None on TP rank 0 for PP stages that need data
+        # (first stage, last stage, or any stage with MTP).
+        if data_iterator is not None:
+            assert tp_group.rank() == 0, "Only TP rank 0 should have data_iterator"
+
+            # Step 1: Fetch batches and gather global sequence lengths
+            batch, global_id_seqlens, global_ids_this_rank, offsets, seqlens_gathered = (
+                get_batch_and_global_seqlens(data_iterator, num_microbatches, dp_group)
+            )
+
+            # Step 2: Check required sample keys
+            for key in self.get_required_sample_keys():
+                assert (
+                    key in batch[0]
+                ), f"Batch missing required key {key}, provided keys: {batch[0].keys()}"
+
+            # Step 3: Schedule samples into groups
+            sample_id_groups = self.get_groups_and_subsamples(global_id_seqlens)
+
+            # Validate scheduling result
+            set_gbs = set()
+            for group in sample_id_groups:
+                for sub in group:
+                    set_gbs.update(sub)
+            assert len(set_gbs) == len(global_id_seqlens), (
+                f"set_gbs length: {len(set_gbs)} != "
+                f"global_id_seqlens length: {len(global_id_seqlens)}"
+            )
+
+            # Step 4: Reroute samples to DCP ranks
+            samples_this_rank_with_id = reroute_samples_to_dcp_ranks(
+                batch,
+                global_ids_this_rank,
+                global_id_seqlens,
+                sample_id_groups,
+                offsets,
+                dp_group,
+                tp_group,
+                dp_cp_group,
+                total_dcp_gpus,
+            )
+
+            dcp_rank = dp_cp_group.rank()
+            num_micro_batches = len(sample_id_groups)
+
+            grouped_samples = [
+                [
+                    samples_this_rank_with_id[sub_sample_id]
+                    for sub_sample_id in sample_id_groups[i][dcp_rank]
+                ]
+                for i in range(num_micro_batches)
+            ]
+
+            # Step 5: Build packed microbatches
+            new_samples = build_packed_microbatches(grouped_samples, dev)
+
+            # Step 6: Calculate FLOPs info
+            seqlen_sum_this_global_batch = float(sum(seqlens_gathered))
+            seqlen_squared_sum_this_global_batch = float(
+                sum(seqlen**2 for seqlen in seqlens_gathered)
+            )
+        else:
+            (
+                new_samples,
+                num_micro_batches,
+                seqlen_sum_this_global_batch,
+                seqlen_squared_sum_this_global_batch,
+            ) = (None, None, None, None)
+
+        # Step 7: Broadcast to PP group (for middle PP stages)
+        if tp_group.rank() == 0:
+            (
+                new_samples,
+                num_micro_batches,
+                seqlen_sum_this_global_batch,
+                seqlen_squared_sum_this_global_batch,
+            ) = broadcast_to_pp_group(
+                new_samples,
+                num_micro_batches,
+                seqlen_sum_this_global_batch,
+                seqlen_squared_sum_this_global_batch,
+                pp_group,
+                dev,
+            )
+
+        # Step 8: Broadcast to TP group (for non-TP-0 ranks)
+        (num_micro_batches, seqlen_sum_this_global_batch, seqlen_squared_sum_this_global_batch) = (
+            broadcast_scalars(
+                [
+                    num_micro_batches,
+                    seqlen_sum_this_global_batch,
+                    seqlen_squared_sum_this_global_batch,
+                ],
+                tp_group,
+                dev,
+            )
+        )
+        num_micro_batches = int(num_micro_batches)
+
+        # Step 9: create data_iterator and handle VPP if enabled
+        new_data_iterator = create_data_iterator(new_samples, tp_group, config, vpp_has_data)
+
+        return (
+            new_data_iterator,
+            num_micro_batches,
+            seqlen_sum_this_global_batch,
+            seqlen_squared_sum_this_global_batch,
+        )
+
+
+scheduler_map: Dict[str, Type[BasePackingScheduler]] = {"dp_balanced": DpBalancedScheduler}
+
+
+def wrap_data_iterator(
+    data_iterator, config, num_microbatches, pg_collection: Optional[ProcessGroupCollection] = None
+):
+    """
+    A wrapper function that wraps around an existing data_iterator
+    and return the num_micro_batches for sequence packing.
+
+    Args:
+        data_iterator: The original data_iterator to wrap around
+        config: The config object containing the max_seqlen_per_dp_cp_rank
+        dp_cp_group: Data parallel context parallel group.
+        pg_collection: The process group collection.
+    """
+
+    if pg_collection is None:
+        dp_cp_group = parallel_state.get_data_parallel_group(with_context_parallel=True)
+        dp_group = parallel_state.get_data_parallel_group()
+        tp_group = parallel_state.get_tensor_model_parallel_group()
+        pp_group = parallel_state.get_pipeline_model_parallel_group()
+    else:
+        dp_cp_group = pg_collection.dp_cp
+        dp_group = pg_collection.dp
+        tp_group = pg_collection.tp
+        pp_group = pg_collection.pp
+    assert (
+        dp_cp_group is not None
+        and dp_group is not None
+        and tp_group is not None
+        and pp_group is not None
+    ), "dp_cp_group, dp_group, tp_group must not be None when using sequence packing"
+
+    dev = torch.cuda.current_device()
+    dp_size = dp_group.size()
+    cp_size = dp_cp_group.size() // dp_size
+
+    # Look up the scheduler class by name
+    scheduler_type = config.sequence_packing_scheduler
+
+    scheduler = scheduler_map[scheduler_type](
+        config.max_seqlen_per_dp_cp_rank,
+        cp_size,
+        dp_size,
+        # When VPP is enabled, align num_micro_batches to this multiple.
+        (
+            None
+            if config.virtual_pipeline_model_parallel_size is None
+            else config.microbatch_group_size_per_vp_stage
+        ),
+    )
+
+    (
+        new_data_iterator,
+        num_micro_batches,
+        seqlen_sum_this_global_batch,
+        seqlen_squared_sum_this_global_batch,
+    ) = scheduler.run(
+        data_iterator, num_microbatches, dp_group, tp_group, pp_group, dp_cp_group, dev, config
+    )
+
+    return (
+        new_data_iterator,
+        num_micro_batches,
+        seqlen_sum_this_global_batch,
+        seqlen_squared_sum_this_global_batch,
+    )
+
+
+def get_batch_on_this_rank_for_sequence_packing(
+    data_iterator,
+    vpp_size: Optional[int] = None,
+    mtp_on_this_rank: bool = False,
+    vp_stage: Optional[int] = None,
+    pg_collection: Optional[ProcessGroupCollection] = None,
+):
+    """
+    Get a batch of data for sequence packing.
+    Args:
+        data_iterator (Iterator): The data iterator to get the batch from.
+        mtp_on_this_rank (bool): Whether to use multi-token prediction.
+        vp_stage (Optional[int]): The stage of the pipeline.
+    Returns:
+        tuple of (tokens, labels, loss_mask, attention_mask, position_ids, packed_seq_params)
+    """
+
+    if pg_collection is None:
+        tp_group = parallel_state.get_tensor_model_parallel_group()
+        pp_group = parallel_state.get_pipeline_model_parallel_group()
+        cp_group = parallel_state.get_context_parallel_group()
+    else:
+        tp_group = pg_collection.tp
+        pp_group = pg_collection.pp
+        cp_group = pg_collection.cp
+
+    tp_src_rank = torch.distributed.get_process_group_ranks(tp_group)[0]
+
+    is_tp_rank_0 = tp_group.rank() == 0
+    is_first_stage = pp_group.rank() == 0 and (vp_stage is None or vp_stage == 0)
+    is_last_stage = pp_group.rank() == pp_group.size() - 1 and (
+        vp_stage is None or vp_stage == vpp_size - 1
+    )
+
+    is_first_or_last_stage = is_first_stage or is_last_stage
+    dev = torch.cuda.current_device()
+
+    # data_iterator should return a batch including the following keys.
+    batch_keys = ['cu_seqlens', 'cu_seqlens_padded', 'max_seqlen']
+    if is_first_stage or mtp_on_this_rank:
+        batch_keys.append('tokens')
+        batch_keys.append('position_ids')
+    if is_last_stage or mtp_on_this_rank:
+        batch_keys.append('labels')
+        batch_keys.append('loss_mask')
+
+    # Get a batch from data_iterator or create an emtpy batch.
+    if is_tp_rank_0:
+        assert data_iterator is not None
+        batch = next(data_iterator)
+        for key in batch_keys:
+            assert key in batch, f"{key} is missing in current batch."
+    else:
+        assert data_iterator is None, "Non TP 0 rank should not have data_iterator"
+        batch = {}
+
+    # Partition tokens, position_ids, labels, loss_mask for context parallel.
+    # Only TP rank 0 on stages that have data (first/last PP stage or MTP stage) needs this.
+    if is_tp_rank_0 and (is_first_or_last_stage or mtp_on_this_rank):
+        get_cp_slice_for_thd(batch, cp_group)
+
+    # Broadcast cu_seqlens_size because we need it to create placeholder for cu_seqlens and
+    # cu_seqlens_padded for non TP 0 ranks.
+    if is_tp_rank_0:
+        cu_seqlen_size = torch.tensor(batch['cu_seqlens'].size(0), dtype=torch.int32, device=dev)
+    else:
+        cu_seqlen_size = torch.empty(1, dtype=torch.int32, device=dev)
+    broadcast_tensor(cu_seqlen_size, tp_src_rank, tp_group)
+    cu_seqlen_size = cu_seqlen_size.item()
+
+    # Broadcast total_tokens because we need it to create placeholder for tokens, position_ids,
+    # labels, loss_mask for non TP 0 ranks. Only first stage, last stage,
+    # and stage with mtp need this.
+
+    if is_first_or_last_stage or mtp_on_this_rank:
+        if is_tp_rank_0:
+            total_tokens = torch.tensor(batch['tokens'].size(0), dtype=torch.int32, device=dev)
+        else:
+            total_tokens = torch.empty(1, dtype=torch.int32, device=dev)
+        broadcast_tensor(total_tokens, tp_src_rank, tp_group)
+        total_tokens = total_tokens.item()
+
+    # Step1: Prepare "tokens", "position_ids" for first stage and stage with mtp on all TP ranks.
+    if is_first_stage or mtp_on_this_rank:
+        if is_tp_rank_0:
+            assert batch['tokens'].dtype == torch.int64
+            assert batch['position_ids'].dtype == torch.int64
+            batch['tokens'] = batch['tokens'].view(1, total_tokens)
+            batch['position_ids'] = batch['position_ids'].view(1, total_tokens)
+        else:
+            batch['tokens'] = torch.empty([1, total_tokens], dtype=torch.int64, device=dev)
+            batch['position_ids'] = torch.empty([1, total_tokens], dtype=torch.int64, device=dev)
+    else:
+        # Non first stage rank doesn't need tokens and position_ids.
+        batch['tokens'] = None
+        batch['position_ids'] = None
+
+    # Step2: Prepare "labels", "loss_mask" for last stage and stage with mtp on all TP ranks.
+    if is_last_stage or mtp_on_this_rank:
+        if is_tp_rank_0:
+            assert batch['labels'].dtype == torch.int64
+            assert batch['loss_mask'].dtype == torch.float32
+            batch['labels'] = batch['labels'].view(1, total_tokens)
+            batch['loss_mask'] = batch['loss_mask'].view(1, total_tokens)
+        else:
+            batch['labels'] = torch.empty([1, total_tokens], dtype=torch.int64, device=dev)
+            batch['loss_mask'] = torch.empty([1, total_tokens], dtype=torch.float32, device=dev)
+    else:
+        # Non last stage rank doesn't need labels and loss_mask.
+        batch['labels'] = None
+        batch['loss_mask'] = None
+
+    # Step3: Prepare "cu_seqlens", "cu_seqlens_padded", "max_seqlen" on all ranks.
+    if is_tp_rank_0:
+        assert batch['cu_seqlens'].dtype == torch.int32
+        assert batch['cu_seqlens_padded'].dtype == torch.int32
+        assert batch['cu_seqlens'].dim() == 1
+        assert batch['cu_seqlens_padded'].dim() == 1
+        if type(batch['max_seqlen']) == int:
+            batch['max_seqlen'] = torch.tensor(batch['max_seqlen'], dtype=torch.int32, device=dev)
+        else:
+            assert batch['max_seqlen'].dtype == torch.int32
+            assert batch['max_seqlen'].numel() == 1
+    else:
+        batch['cu_seqlens'] = torch.empty([cu_seqlen_size], dtype=torch.int32, device=dev)
+        batch['cu_seqlens_padded'] = torch.empty([cu_seqlen_size], dtype=torch.int32, device=dev)
+        batch['max_seqlen'] = torch.empty(1, dtype=torch.int32, device=dev)
+
+    # Broadcast batch inside TP group.
+    broadcast_tensor(batch['tokens'], tp_src_rank, tp_group)
+    broadcast_tensor(batch['position_ids'], tp_src_rank, tp_group)
+    broadcast_tensor(batch['labels'], tp_src_rank, tp_group)
+    broadcast_tensor(batch['loss_mask'], tp_src_rank, tp_group)
+    broadcast_tensor(batch['cu_seqlens'], tp_src_rank, tp_group)
+    broadcast_tensor(batch['cu_seqlens_padded'], tp_src_rank, tp_group)
+    broadcast_tensor(batch['max_seqlen'], tp_src_rank, tp_group)
+
+    # Extract the data from batch after broadcasting.
+    tokens = batch['tokens']
+    position_ids = batch['position_ids']
+    labels = batch['labels']
+    loss_mask = batch['loss_mask']
+    cu_seqlens = batch['cu_seqlens']
+    cu_seqlens_padded = batch['cu_seqlens_padded']
+    max_seqlen = batch['max_seqlen'].item()
+
+    # Transformer Engine has a bug of cu_seqlens, we must treat cu_seqlens_padded as cu_seqlens to
+    # get the correct result.
+    # TODO: Revert this workaround once TE fixes the issue.
+    packed_seq_params = PackedSeqParams(
+        qkv_format="thd",
+        cu_seqlens_q=cu_seqlens_padded,
+        cu_seqlens_kv=cu_seqlens_padded,
+        cu_seqlens_q_padded=cu_seqlens_padded,
+        cu_seqlens_kv_padded=cu_seqlens_padded,
+        max_seqlen_q=max_seqlen,
+        max_seqlen_kv=max_seqlen,
+        local_cp_size=None,
+        cp_group=None,
+    )
+
+    # "attention_mask" is not valid for sequence packing, so set it to None.
+    return tokens, labels, loss_mask, None, position_ids, packed_seq_params
diff --git a/megatron/core/datasets/data_schedule_utils.py b/megatron/core/datasets/data_schedule_utils.py
new file mode 100644
index 00000000000..f3c637e4c79
--- /dev/null
+++ b/megatron/core/datasets/data_schedule_utils.py
@@ -0,0 +1,529 @@
+# Copyright (c) 2025 NVIDIA CORPORATION.  All rights reserved.
+
+from typing import Dict, List
+
+import numpy as np
+import torch
+
+from megatron.core.extensions.transformer_engine import get_thd_partitioned_indices
+from megatron.core.rerun_state_machine import RerunDataIterator
+
+
+def get_cp_slice_for_thd(batch, cp_group):
+    """Partition sequence data for context parallelism in THD format.
+
+    Uses TE's THD partitioned indices to split the packed sequence across CP ranks.
+    Only keys present in the batch are sliced.
+
+    Args:
+        batch: Dict with packed sequence data.
+        cp_group: Context parallel process group.
+    """
+    cp_size = cp_group.size()
+    if cp_size <= 1:
+        return
+    cp_rank = cp_group.rank()
+    total_tokens = batch['tokens'].size(0)
+    # Transformer Engine has a bug of cu_seqlens, we must treat cu_seqlens_padded as
+    # cu_seqlens to get the correct result.
+    # TODO: Revert this workaround once TE fixes the issue.
+    cu_seqlens = batch["cu_seqlens_padded"]
+    index = get_thd_partitioned_indices(cu_seqlens, total_tokens, cp_size, cp_rank)
+    for key in ['tokens', 'position_ids', 'labels', 'loss_mask']:
+        if key in batch:
+            batch[key] = batch[key].index_select(0, index)
+
+
+def _unpack_batch(batch: List[Dict[str, torch.Tensor]]) -> List[Dict[str, torch.Tensor]]:
+    """
+    Unpacks the packed samples into a list of sub-samples.
+    Since each sub-sample may be routed to different DPxCP ranks,
+    we unpack the sample here to avoid unnecessarily transferring
+    the entire packed sample.
+    """
+    batch_unpacked = []
+    dev = batch[0]["tokens"].device
+    original_seq_lens = []
+    padded_seq_lens = []
+    for sample in batch:
+        for key in sample.keys():
+            if len(sample[key].shape) == 2:
+                # squeeze the redundant batch dimension added by
+                # default collate_fn in pytorch dataloader
+                # we need a custom collate_fn for THD to avoid this
+                # current THD does not support micro_batch_size > 1 due to sft_dataset.py and
+                # data_loader in data_samples.py
+                sample[key] = sample[key].squeeze(0)
+        for sub_sample in range(sample["cu_seqlens"].shape[0] - 1):
+            sub_sample_dict = {}
+            start_idx = sample["cu_seqlens"][sub_sample]
+            end_idx = sample["cu_seqlens"][sub_sample + 1]
+            if end_idx - start_idx == 0:
+                continue
+            for key in ["tokens", "labels", "loss_mask", "position_ids"]:
+                sub_sample_dict[key] = sample[key][start_idx:end_idx]
+            # Since sft_dataset.py does not provide cu_seqlens_original,
+            # we assume original_seq_len equals padded_seq_len here.
+            # Ideally the dataset should define the pre-padding seq_len.
+            seq_len = (end_idx - start_idx).item()
+            original_seq_lens.append(seq_len)
+            padded_seq_lens.append(seq_len)
+            batch_unpacked.append(sub_sample_dict)
+
+    # Single H2D transfer for all seq lens
+    original_seq_lens_cuda = torch.tensor(original_seq_lens, device=dev)
+    padded_seq_lens_cuda = torch.tensor(padded_seq_lens, device=dev)
+    for i, sub_sample_dict in enumerate(batch_unpacked):
+        sub_sample_dict["original_seq_len"] = original_seq_lens_cuda[i : i + 1]
+        sub_sample_dict["padded_seq_len"] = padded_seq_lens_cuda[i : i + 1]
+
+    return batch_unpacked
+
+
+def _get_global_seqlens_and_ids(subsample_seqlens: torch.Tensor, dp_group):
+    """
+    Gathers the sequence lengths of all subsamples from all DP ranks and calculates global IDs.
+    """
+    # Collect the number of subsamples from all ranks
+    num_local_subsamples = subsample_seqlens.shape[0]
+    local_len = torch.tensor([num_local_subsamples], dtype=torch.int32).cuda()
+    dp_subsample_count = [torch.zeros_like(local_len) for _ in range(dp_group.size())]
+    torch.distributed.all_gather(dp_subsample_count, local_len, group=dp_group)
+
+    # Find the max number of subsamples across all ranks and pad subsample_seqlens to max length
+    dp_subsample_counts = torch.stack(dp_subsample_count, dim=0).cpu().view(-1)
+    max_sub_samples = int(dp_subsample_counts.max().item())
+
+    if num_local_subsamples < max_sub_samples:
+        subsample_seqlens_padded = torch.cat(
+            [
+                subsample_seqlens,
+                torch.zeros(max_sub_samples - num_local_subsamples, dtype=torch.int32).cuda(),
+            ],
+            dim=0,
+        )
+    else:
+        subsample_seqlens_padded = subsample_seqlens
+
+    # Gather the subsample_seqlens from all ranks
+    seqlens_gathered = [torch.empty_like(subsample_seqlens_padded) for _ in range(dp_group.size())]
+    torch.distributed.all_gather(seqlens_gathered, subsample_seqlens_padded, group=dp_group)
+
+    # Trim each seqlens_gathered to the length of the correct sample
+    for dp_rank, seqlen in enumerate(seqlens_gathered):
+        seqlens_gathered[dp_rank] = seqlen[: dp_subsample_counts[dp_rank]]
+
+    seqlens_gathered = torch.cat(seqlens_gathered, dim=0)
+    seqlens_gathered = seqlens_gathered.cpu().tolist()
+
+    # Calculate the offsets to assign unique global ID to each subsample.
+    csum = torch.cumsum(dp_subsample_counts, dim=0, dtype=torch.int32)
+    offsets = torch.cat([torch.zeros(1, dtype=torch.int32), csum], dim=0)
+
+    # Calculate global ID for each subsample
+    dp_rank = dp_group.rank()
+    global_ids = torch.arange(len(seqlens_gathered), dtype=torch.int32).cuda()
+
+    # Create a list of (global_id, seqlen) tuples for scheduling
+    global_id_seqlens = [(i, seqlens_gathered[i]) for i in range(len(global_ids))]
+
+    # Get the global IDs locally present on this rank
+    start_idx = offsets[dp_rank]
+    end_idx = offsets[dp_rank + 1]
+
+    global_ids_this_rank = global_ids[start_idx:end_idx]
+
+    return global_id_seqlens, global_ids_this_rank, offsets, seqlens_gathered
+
+
+def _pack_sequences(
+    samples: List, padded_lengths: torch.Tensor, original_lengths: torch.Tensor, dev: torch.device
+) -> Dict[str, torch.Tensor]:
+    """Pack multiple samples into a single packed sample."""
+
+    def _pack_tensors(tensors):
+        return torch.cat([t.reshape(-1) for t in tensors], dim=0)
+
+    tokens = _pack_tensors([sample["tokens"] for sample in samples])
+    labels = _pack_tensors([sample["labels"] for sample in samples])
+    loss_mask = _pack_tensors([sample["loss_mask"] for sample in samples])
+    position_ids = _pack_tensors([sample["position_ids"] for sample in samples])
+
+    new_sample = {}
+    new_sample["tokens"] = tokens
+    new_sample["labels"] = labels
+    new_sample["loss_mask"] = loss_mask
+    new_sample["position_ids"] = position_ids
+
+    padded_lengths = padded_lengths.to(device=dev, dtype=torch.int32, non_blocking=True).reshape(-1)
+    cu_seqlens_padded = torch.empty(padded_lengths.numel() + 1, device=dev, dtype=torch.int32)
+    cu_seqlens_padded[0] = 0
+    cu_seqlens_padded[1:] = torch.cumsum(padded_lengths, dim=0)
+    max_seqlen = torch.max(padded_lengths).to(dtype=torch.int32)
+
+    new_sample["cu_seqlens_padded"] = cu_seqlens_padded
+    new_sample["max_seqlen"] = max_seqlen
+
+    original_lengths = original_lengths.to(
+        device=dev, dtype=torch.int32, non_blocking=True
+    ).reshape(-1)
+    cu_seqlens = torch.empty(original_lengths.numel() + 1, device=dev, dtype=torch.int32)
+    cu_seqlens[0] = 0
+    cu_seqlens[1:] = torch.cumsum(original_lengths, dim=0).reshape(-1)
+    new_sample["cu_seqlens"] = cu_seqlens
+
+    return new_sample
+
+
+def broadcast_tensor(item, src_rank, group) -> None:
+    """Broadcast a tensor from src_rank to all ranks in the group."""
+    if item is not None:
+        torch.distributed.broadcast(item, src_rank, group=group)
+
+
+def broadcast_to_pp_group(
+    new_samples,
+    num_micro_batches,
+    seqlen_sum_this_global_batch,
+    seqlen_squared_sum_this_global_batch,
+    pp_group,
+    dev,
+):
+    """
+    Broadcast num_micro_batches, seqlen_sum_this_global_batch,
+    seqlen_squared_sum_this_global_batch and metadata to middle PP stages.
+    Before this broadcast, the new_samples on middle PP stages are None,
+    after this broadcast, the new_samples on middle PP stages contain the metadata but
+    without tokens, labels, loss_mask, position_ids.
+    """
+
+    pp_src_rank = torch.distributed.get_process_group_ranks(pp_group)[0]
+
+    if pp_group.size() > 2:
+        if pp_group.rank() == 0:
+            tensor_list = [
+                torch.tensor(
+                    [
+                        num_micro_batches,
+                        seqlen_sum_this_global_batch,
+                        seqlen_squared_sum_this_global_batch,
+                    ],
+                    dtype=torch.float32,
+                ).cuda()
+            ]
+            for sample in new_samples:
+                tensor_list.append(sample["max_seqlen"].unsqueeze(0))
+            for sample in new_samples:
+                tensor_list.append(sample["cu_seqlens"])
+                tensor_list.append(sample["cu_seqlens_padded"])
+            info_to_broadcast = torch.cat(tensor_list, dim=0).to(device=dev, dtype=torch.float32)
+            info_length_tensor = torch.tensor(info_to_broadcast.shape[0], dtype=torch.int32).cuda()
+            broadcast_tensor(info_length_tensor, pp_src_rank, pp_group)
+            broadcast_tensor(info_to_broadcast, pp_src_rank, pp_group)
+        else:
+            info_length_tensor = torch.tensor(0, dtype=torch.int32).cuda()
+            broadcast_tensor(info_length_tensor, pp_src_rank, pp_group)
+            info_to_broadcast = torch.empty(info_length_tensor.item(), dtype=torch.float32).cuda()
+            broadcast_tensor(info_to_broadcast, pp_src_rank, pp_group)
+            if pp_group.rank() != pp_group.size() - 1:
+                # middle PP stages receive the broadcasted info and unpack it
+                info_numpy = info_to_broadcast.cpu().numpy()
+                num_micro_batches = int(info_numpy[0])
+                seqlen_sum_this_global_batch = info_numpy[1]
+                seqlen_squared_sum_this_global_batch = info_numpy[2]
+                max_seqlens = info_to_broadcast[3 : 3 + num_micro_batches]
+                cu_seqlens_list = []
+                cu_seqlens_padded_list = []
+                # cu_seqlens always starts with 0, and the other metadata values
+                # (num_micro_batches, seqlen_sum, seqlen_squared_sum, max_seqlens)
+                # are always positive, so we can use 0 as the delimiter to locate
+                # the start of each cu_seqlens / cu_seqlens_padded tensor.
+                # This avoids an extra broadcast for the lengths of cu_seqlens.
+                indices = np.where(info_numpy == 0)[0]
+                for i in range(num_micro_batches):
+                    cu_seqlens_list.append(info_to_broadcast[indices[i * 2] : indices[i * 2 + 1]])
+                    if i == num_micro_batches - 1:
+                        cu_seqlens_padded_list.append(info_to_broadcast[indices[i * 2 + 1] :])
+                    else:
+                        cu_seqlens_padded_list.append(
+                            info_to_broadcast[indices[i * 2 + 1] : indices[i * 2 + 2]]
+                        )
+
+                new_samples = []
+                for i in range(num_micro_batches):
+                    new_sample = {}
+                    new_sample["max_seqlen"] = max_seqlens[i].to(torch.int32)
+                    new_sample["cu_seqlens"] = cu_seqlens_list[i].to(torch.int32)
+                    new_sample["cu_seqlens_padded"] = cu_seqlens_padded_list[i].to(torch.int32)
+                    new_samples.append(new_sample)
+
+    return (
+        new_samples,
+        num_micro_batches,
+        seqlen_sum_this_global_batch,
+        seqlen_squared_sum_this_global_batch,
+    )
+
+
+def broadcast_scalars(values: List, group, dev, dtype=torch.float32) -> List:
+    """
+    Broadcast scalar values from rank 0 to all ranks in the group.
+
+    Args:
+        values: List of scalar values to broadcast (only used on rank 0).
+        group: The process group to broadcast within.
+        dev: The device to use for the tensor.
+        dtype: The data type for the tensor.
+
+    Returns:
+        List of broadcasted values.
+    """
+    if group.size() <= 1:
+        return values
+
+    src_rank = torch.distributed.get_process_group_ranks(group)[0]
+    num_values = len(values)
+
+    if group.rank() == 0:
+        info_to_broadcast = torch.tensor(values, dtype=dtype, device=dev)
+    else:
+        info_to_broadcast = torch.zeros(num_values, dtype=dtype, device=dev)
+
+    broadcast_tensor(info_to_broadcast, src_rank, group)
+
+    if group.rank() != 0:
+        values = info_to_broadcast.cpu().tolist()
+
+    return values
+
+
+def create_data_iterator(new_samples, tp_group, config, vpp_has_data=None):
+    """Handle virtual pipeline parallelism.
+
+    For VPP, each PP rank needs a list of data iterators (one per VPP stage).
+    VPP stages that originally had a data_iterator (indicated by vpp_has_data)
+    get full samples; others get metadata only (cu_seqlens, cu_seqlens_padded,
+    max_seqlen).
+
+    Args:
+        new_samples: The packed samples after scheduling.
+        tp_group: Tensor parallel process group.
+        config: Model parallel config.
+        vpp_has_data: A list of booleans (one per VPP stage) indicating which
+            VPP stages originally had a data_iterator. None if VPP is disabled.
+    """
+    if (
+        config.virtual_pipeline_model_parallel_size is not None
+        and config.virtual_pipeline_model_parallel_size > 1
+    ):
+        vpp_size = config.virtual_pipeline_model_parallel_size
+        if tp_group.rank() == 0:
+            metadata = [
+                {k: sample[k] for k in ["max_seqlen", "cu_seqlens", "cu_seqlens_padded"]}
+                for sample in new_samples
+            ]
+            new_data_iterator = []
+            for i in range(vpp_size):
+                if vpp_has_data is not None and vpp_has_data[i]:
+                    new_data_iterator.append(RerunDataIterator(iter(new_samples)))
+                else:
+                    new_data_iterator.append(RerunDataIterator(iter(metadata)))
+        else:
+            new_data_iterator = [None for _ in range(vpp_size)]
+    else:
+        new_data_iterator = RerunDataIterator(iter(new_samples)) if tp_group.rank() == 0 else None
+
+    return new_data_iterator
+
+
+def reroute_samples_to_dcp_ranks(
+    batch,
+    global_ids_this_rank,
+    global_id_seqlens,
+    sample_id_groups,
+    offsets,
+    dp_group,
+    tp_group,
+    dp_cp_group,
+    total_dcp_gpus,
+):
+    """
+    Reroutes the sub-samples to the correct rank after scheduling.
+
+    For each key in the batch dict, we perform an all-to-all communication
+    to transfer the data to the correct ranks.
+    """
+
+    def _gid_to_src_rank(gid: int) -> int:
+        dp_src_rank = torch.bucketize(gid, offsets[1:] - 1)
+        dcp_rank = (
+            torch.distributed.get_process_group_ranks(dp_group)[dp_src_rank] // tp_group.size()
+        ) % dp_cp_group.size()
+        return dcp_rank
+
+    gid2local_id = {int(gid): i for i, gid in enumerate(global_ids_this_rank)}
+    dcp_rank = dp_cp_group.rank()
+    dp_ranks = torch.distributed.get_process_group_ranks(dp_group)
+    dp_ranks = [(r // tp_group.size()) % dp_cp_group.size() for r in dp_ranks]
+
+    data_keys = batch[0].keys()
+
+    # Create the send plan
+    combined_sample_id_groups: List[List[int]] = [[] for _ in range(total_dcp_gpus)]
+    for d in range(total_dcp_gpus):
+        for sample_id_group in sample_id_groups:
+            combined_sample_id_groups[d].extend(sample_id_group[d])
+    for dest_rank in range(total_dcp_gpus):
+        combined_sample_id_groups[dest_rank].sort()
+
+    send_ids_sorted = [
+        gid for d in dp_ranks for gid in combined_sample_id_groups[d] if gid in global_ids_this_rank
+    ]
+
+    send_num_split = [0] * total_dcp_gpus
+    send_lens_split = [0] * total_dcp_gpus
+    for dest_rank in range(total_dcp_gpus):
+        if dest_rank in dp_ranks:
+            send_seq_lens = [
+                global_id_seqlens[gid][1]
+                for gid in combined_sample_id_groups[dest_rank]
+                if gid in global_ids_this_rank
+            ]
+            send_num_split[dest_rank] = len(send_seq_lens)
+            send_lens_split[dest_rank] = sum(send_seq_lens)
+        else:
+            send_lens_split[dest_rank] = 0
+
+    # Create the recv plan
+    recv_sample_id_groups = [[] for _ in range(total_dcp_gpus)]
+    for gid in combined_sample_id_groups[dcp_rank]:
+        src_rank = _gid_to_src_rank(gid)
+        recv_sample_id_groups[src_rank].append(gid)
+
+    recv_lens_split = [0] * total_dcp_gpus
+    for src_rank in range(total_dcp_gpus):
+        recv_lens_split[src_rank] = sum(
+            [global_id_seqlens[gid][1] for gid in recv_sample_id_groups[src_rank]]
+        )
+
+    recv_ids_sorted = [gid for d in range(total_dcp_gpus) for gid in recv_sample_id_groups[d]]
+    recv_counts = [len(recv_sample_id_groups[d]) for d in range(total_dcp_gpus)]
+
+    recv_samples = [{k: None for k in data_keys} for _ in range(sum(recv_counts))]
+
+    def _pack_sample_by_key(key: str) -> torch.Tensor:
+        flattened_tensors = []
+        for gid in send_ids_sorted:
+            t = batch[gid2local_id[gid]][key].to(torch.cuda.current_device(), non_blocking=True)
+            flattened_tensors.append(t.reshape(-1))
+        return (
+            torch.cat(flattened_tensors, dim=0)
+            if flattened_tensors
+            else torch.empty(1, device=torch.cuda.current_device(), dtype=batch[0][key].dtype)
+        )
+
+    def _unpack_sample_by_key(key: str, recv_tensor: torch.Tensor):
+        cursor = 0
+        for i, gid in enumerate(recv_ids_sorted):
+            sample_len = (
+                1 if key in ["original_seq_len", "padded_seq_len"] else global_id_seqlens[gid][1]
+            )
+            recv_samples[i][key] = recv_tensor[cursor : cursor + sample_len]
+            cursor += sample_len
+
+    for key in data_keys:
+        output_split_sizes, input_split_sizes = (
+            (recv_counts, send_num_split)
+            if key in ["original_seq_len", "padded_seq_len"]
+            else (recv_lens_split, send_lens_split)
+        )
+        send_tensor = _pack_sample_by_key(key)
+        recv_tensor_size = sum(output_split_sizes)
+        recv_tensor = torch.empty(
+            recv_tensor_size, device=torch.cuda.current_device(), dtype=send_tensor.dtype
+        )
+        torch.distributed.all_to_all_single(
+            output=recv_tensor,
+            input=send_tensor,
+            output_split_sizes=output_split_sizes,
+            input_split_sizes=input_split_sizes,
+            group=dp_cp_group,
+        )
+        _unpack_sample_by_key(key, recv_tensor)
+
+    recv_sample_with_id = {recv_id: recv_samples[i] for i, recv_id in enumerate(recv_ids_sorted)}
+    return recv_sample_with_id
+
+
+def build_packed_microbatches(
+    grouped_samples: List[List[Dict[str, torch.Tensor]]], dev: torch.device
+) -> List[Dict[str, torch.Tensor]]:
+    """Build packed samples for each microbatch."""
+    num_micro_batches = len(grouped_samples)
+    seg_starts: List[int] = [0]
+    original_lens_tensors = []
+    padded_lens_tensors = []
+
+    for i in range(num_micro_batches):
+        samples = grouped_samples[i]
+        seg_starts.append(seg_starts[-1] + len(samples))
+        original_lens_tensors.extend([s["original_seq_len"].reshape(-1) for s in samples])
+        padded_lens_tensors.extend([s["padded_seq_len"].reshape(-1) for s in samples])
+
+    padded_lens_all_gpu = torch.cat(padded_lens_tensors, dim=0).to(dtype=torch.int32)
+    original_lens_all_gpu = torch.cat(original_lens_tensors, dim=0).to(dtype=torch.int32)
+
+    new_samples: List[Dict[str, torch.Tensor]] = []
+    for i in range(num_micro_batches):
+        samples = grouped_samples[i]
+        lens_padded = padded_lens_all_gpu[seg_starts[i] : seg_starts[i + 1]]
+        lens_original = original_lens_all_gpu[seg_starts[i] : seg_starts[i + 1]]
+        new_sample = _pack_sequences(samples, lens_padded, lens_original, dev)
+        new_samples.append(new_sample)
+
+    return new_samples
+
+
+def get_batch_and_global_seqlens(data_iterator, num_microbatches, dp_group):
+    """
+    Get the batch and global sequence lengths.
+    Each DP rank loads the same number of sequences, so we need to gather the sequence
+    lengths from all ranks then we can schedule the sequences into groups.
+    Args:
+        data_iterator: The data iterator.
+        num_microbatches: The number of microbatches.
+        dp_group: The data parallel group.
+
+    Returns:
+        batch: The batch.
+        global_id_seqlens: The global sequence lengths.
+        global_ids_this_rank: The global IDs locally present on this rank.
+    """
+
+    batch_list = [next(data_iterator) for _ in range(num_microbatches)]
+
+    batch = []
+    for item in batch_list:
+        if isinstance(item, dict):
+            batch.append(item)
+        elif isinstance(item, list):
+            batch.extend(item)
+        else:
+            raise ValueError(f"Invalid item type: {type(item)}")
+
+    # in sft_dataset.py, sequences are already packed before rescheduling,
+    # so we need to unpack them here and repack after rescheduling.
+    # This is only to adapt to the current megatron-lm sft_dataset.
+    # If you implement your own dataset, just have __getitem__ return List[Dict]
+    # and this step can be skipped.
+    batch = _unpack_batch(batch)
+
+    subsample_seqlens = torch.cat([sample["padded_seq_len"] for sample in batch]).to(
+        dtype=torch.int32, device=torch.cuda.current_device()
+    )
+
+    global_id_seqlens, global_ids_this_rank, offsets, seqlens_gathered = (
+        _get_global_seqlens_and_ids(subsample_seqlens, dp_group)
+    )
+
+    return batch, global_id_seqlens, global_ids_this_rank, offsets, seqlens_gathered
diff --git a/megatron/core/datasets/gpt_dataset.py b/megatron/core/datasets/gpt_dataset.py
index cbe0652402d..04d2c279818 100644
--- a/megatron/core/datasets/gpt_dataset.py
+++ b/megatron/core/datasets/gpt_dataset.py
@@ -79,6 +79,9 @@ class GPTDatasetConfig(BlendedMegatronDatasetConfig):
     context_parallel_size: Optional[int] = None
     """The size of the context parallel group. Needed for padding in packed sequences."""
 
+    sft_mock_dataset_config_json: Optional[str] = None
+    """This config provides the necessary information for the mock dataset."""
+
     def __post_init__(self) -> None:
         """Do asserts and set fields post init"""
         super().__post_init__()
diff --git a/megatron/core/datasets/readme.md b/megatron/core/datasets/readme.md
index 452bf24e4a2..a61c623d960 100644
--- a/megatron/core/datasets/readme.md
+++ b/megatron/core/datasets/readme.md
@@ -192,6 +192,68 @@ To query the `BlendedDataset` for the _k_-th sample we do the following
 
 To save time during initialization, each index is built/cached sequentially on one process rank and subsequently loaded in parallel on other process ranks. The cached indices are unique to a hash generated in the `BlendedDataset.__init__` function.
 
+## Packing Scheduler
+
+The packing scheduler re-schedules variable-length sequences across DP×CP ranks to improve GPU utilization. It is built around two modules: `data_schedule.py` (high-level logic and entry points) and `data_schedule_utils.py` (utility functions).
+
+### Call Hierarchy
+
+The scheduling pipeline has two phases connected by the data iterator: `wrap_data_iterator` consumes the **original** data iterator, performs global-batch scheduling, and produces a **wrapped** (packed) data iterator; `get_batch_on_this_rank_for_sequence_packing` then consumes this **wrapped** data iterator to fetch individual packed microbatches during training.
+
+```
+                          original                              wrapped (packed)
+                       data_iterator                             data_iterator
+                            │                                        │
+                            ▼                                        ▼
+               ┌────────────────────────┐               ┌────────────────────────────────────┐
+               │  wrap_data_iterator()  │               │ get_batch_on_this_rank_for_        │
+Phase 1        │  (once per global      │   ────────►   │       sequence_packing()            │  Phase 2
+(scheduling)   │       batch)           │   returns     │  (once per microbatch,              │  (fetching)
+               │                        │   wrapped     │   called by training loop)          │
+               └───────────┬────────────┘   iterator    └──────────────┬─────────────────────┘
+                           │                                           │
+                           ▼                                           ▼
+          DpBalancedScheduler.run()                   next(wrapped_data_iterator)
+          │                                           ├─ get_thd_partitioned_indices()  [TE]
+          ├─ get_batch_and_global_seqlens()  [utils]  ├─ broadcast_tensor()             [utils]
+          ├─ get_groups_and_subsamples()              └─ PackedSeqParams(...)
+          ├─ reroute_samples_to_dcp_ranks()  [utils]
+          ├─ build_packed_microbatches()     [utils]
+          ├─ broadcast_to_pp_group()         [utils]
+          ├─ broadcast_scalars()             [utils]
+          └─ create_data_iterator()          [utils]
+```
+
+### `data_schedule.py`
+
+#### Entry Points
+
+- **`wrap_data_iterator(original_data_iterator) → wrapped_data_iterator`** — Top-level entry point called once per global batch. Takes the **original** data iterator as input, resolves the scheduler class from `scheduler_map`, instantiates it, and delegates to `scheduler.run()` which consumes all microbatches from the original iterator, re-schedules them, and produces a **wrapped** (packed) data iterator along with the updated `num_microbatches` and FLOPs statistics.
+
+- **`get_batch_on_this_rank_for_sequence_packing(wrapped_data_iterator)`** — Per-microbatch entry point called by the training loop. Takes the **wrapped** data iterator returned by `wrap_data_iterator` as input. Fetches one packed microbatch via `next(wrapped_data_iterator)`, broadcasts batch fields across TP ranks, optionally partitions sequences across CP ranks using Transformer Engine's `thd_get_partitioned_indices`, and constructs `PackedSeqParams` (with `cu_seqlens`, `max_seqlen`, `qkv_format=thd`).
+
+#### Scheduler Classes
+
+- **`BasePackingScheduler`** — Abstract base class. Defines the interface:
+  - `get_groups_and_subsamples()` — pure scheduling algorithm (must be overridden).
+  - `run()` — full pipeline: fetch → schedule → reroute → pack → broadcast → VPP handling.
+
+- **`DpBalancedScheduler(BasePackingScheduler)`** — Concrete scheduler that packs sequences in their original order until reaching `max_seqlen_per_dp_cp_rank × cp_size`. Aligns the number of microbatches to `dp_size` (and VPP stage multiples when applicable).
+
+### `data_schedule_utils.py`
+
+Utility functions consumed by the schedulers above:
+
+| Function | Role |
+|---|---|
+| `get_batch_and_global_seqlens()` | Fetch `num_microbatches` batches from the data iterator and all-gather sequence lengths across DP ranks. |
+| `reroute_samples_to_dcp_ranks()` | All-to-all communication to transfer sub-samples to their scheduled DP×CP rank. |
+| `build_packed_microbatches()` | Concatenate sub-samples within each microbatch group and produce `cu_seqlens`. |
+| `broadcast_to_pp_group()` | Broadcast packed samples and metadata from the first/last PP stage to middle stages. |
+| `broadcast_scalars()` | Broadcast scalar values (e.g. `num_microbatches`, FLOPs stats) across a process group. |
+| `broadcast_tensor()` | Broadcast a single tensor within a process group. |
+| `create_data_iterator()` | Wrap packed sample lists into a data iterator; handles VPP stage splitting. |
+
 ## Fast DataLoader initialization
 
 Especially for large-scale runs, DataLoader initialization can take several minutes, since it involves opening and memory-mapping multiple files and can significantly stress the filesystem. To speed up this process, we have developed the following three optimizations, controlled by configuration flags":
diff --git a/megatron/core/extensions/transformer_engine.py b/megatron/core/extensions/transformer_engine.py
index bb913d97446..20f0ece635e 100644
--- a/megatron/core/extensions/transformer_engine.py
+++ b/megatron/core/extensions/transformer_engine.py
@@ -2559,3 +2559,24 @@ def set_save_original_input(module):
     from transformer_engine.pytorch.float8_tensor import Float8Tensor
 except ImportError:
     Float8Tensor = None
+
+
+def get_thd_partitioned_indices(cu_seqlens, total_tokens, cp_size, cp_rank):
+    """Get partitioned indices for THD format data in context parallel.
+
+    Args:
+        cu_seqlens: Cumulative sequence lengths tensor.
+        total_tokens: Total number of tokens.
+        cp_size: Context parallel world size.
+        cp_rank: Context parallel rank.
+
+    Returns:
+        Partitioned indices tensor.
+    """
+    assert is_te_min_version("1.10.0"), (
+        "Please update Transformer Engine to >= 1.10 to use "
+        "Context Parallel with THD format data"
+    )
+    import transformer_engine_torch as tex
+
+    return tex.thd_get_partitioned_indices(cu_seqlens, total_tokens, cp_size, cp_rank)
diff --git a/megatron/core/model_parallel_config.py b/megatron/core/model_parallel_config.py
index 5bbeef9b022..970b3b871fe 100644
--- a/megatron/core/model_parallel_config.py
+++ b/megatron/core/model_parallel_config.py
@@ -62,7 +62,7 @@ class ModelParallelConfig:
     can handle without overflowing the memory. Typically, a good starting point is to set this
     to maximum sequence length / context parallel size.
     This is used to calculate the number and length of sub-samples assigned to 
-    each rank when using hybrid_context_parallel.
+    each rank when sequence_packing_scheduler is not None.
     """
 
     hybrid_context_parallel: bool = False
@@ -72,6 +72,12 @@ class ModelParallelConfig:
     Please set max_seqlen_per_dp_cp_rank when using hybrid_context_parallel.
     """
 
+    sequence_packing_scheduler: Optional[Literal['dp_balanced']] = None
+    """
+    Scheduler for sequence packing and hybrid context parallel.
+    dp_balanced: DP-balanced scheduler for sequence packing.
+    """
+
     expert_model_parallel_size: int = 1
     """Distributes Moe Experts across sub data parallel dimension."""
 
diff --git a/megatron/core/transformer/transformer_config.py b/megatron/core/transformer/transformer_config.py
index 9da9a644a47..d48e29c1e71 100644
--- a/megatron/core/transformer/transformer_config.py
+++ b/megatron/core/transformer/transformer_config.py
@@ -2076,6 +2076,40 @@ def __post_init__(self):
                 self.attention_backend == AttnBackend.flash
             ), "Batch invariant mode only supports FlashAttention"
 
+        if self.sequence_packing_scheduler is not None:
+            # Check TE version.
+            if not HAVE_PACKAGING:
+                raise ImportError(
+                    "packaging is not installed. Please install it with `pip install packaging`."
+                )
+            # TODO: remove this after we fix the convergence issue with TE < 2.9.
+            if not (
+                is_te_min_version("2.9.0") or get_te_version() == PkgVersion("2.9.0.dev0+5b3092a")
+            ):
+                raise ValueError(
+                    "SFT sequence packing requires Transformer Engine >= 2.9.0 "
+                    f"but got {get_te_version()} (TE < 2.9.0 may have convergence issues)."
+                )
+
+            # Needed for passing variable sequences between pp stages.
+            self.variable_seq_lengths = True
+
+            # TODO(tailaim): add support for other dispatcher types
+            assert self.moe_token_dispatcher_type == "alltoall", (
+                f"sequence_packing only supports moe_token_dispatcher_type='alltoall', "
+                f"got '{self.moe_token_dispatcher_type}'"
+            )
+
+            supported_schedulers = ['dp_balanced']
+            if (
+                self.sequence_packing_scheduler is not None
+                and self.sequence_packing_scheduler not in supported_schedulers
+            ):
+                raise ValueError(
+                    f"Unsupported scheduler: {self.sequence_packing_scheduler}. "
+                    f"Available schedulers: {supported_schedulers}"
+                )
+
 
 @dataclass
 @experimental_api
diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py
index 5d5fa34b6c5..25f0d0d06d0 100644
--- a/megatron/training/arguments.py
+++ b/megatron/training/arguments.py
@@ -884,13 +884,6 @@ def validate_args(args, defaults={}):
     if args.rl_use_sequence_packing:
         args.consumed_train_bins = 0
 
-    # Support for variable sequence lengths across batches/microbatches.
-    # set it if the dataloader supports generation of variable sequence lengths
-    # across batches/microbatches. Due to additional communication overhead
-    # during pipeline parallelism, it should not be set if sequence length
-    # is constant during training.
-    args.variable_seq_lengths = False
-
     # Iteration-based training.
     if args.train_iters:
         # If we use iteration-based training, make sure the
@@ -1061,6 +1054,11 @@ def validate_args(args, defaults={}):
         assert args.dataloader_type == 'single', 'Hybrid context parallelism only supported with single dataloader type'
         assert args.calculate_per_token_loss, 'Hybrid context parallelism must be used with --calculate-per-token-loss'
 
+    if args.sequence_packing_scheduler is not None:
+        assert args.context_parallel_size * args.max_seqlen_per_dp_cp_rank >= args.seq_length, \
+            f'Packed sequence buffer size ({args.context_parallel_size * args.max_seqlen_per_dp_cp_rank}) ' \
+            f'must be >= single sequence max length ({args.seq_length})'
+
     # disable async_tensor_model_parallel_allreduce when
     # model parallel memory optimization is enabled
     if (args.tensor_model_parallel_size > 1 or args.context_parallel_size > 1) \
@@ -3061,4 +3059,8 @@ def _add_sft_args(parser):
     group.add_argument('--sft', action="store_true", help='Megatron SFT training')
     group.add_argument('--sft-tokenizer-prompt-format', type=str, default="nemotron-h-aligned",
                        help='SFT prompt format.')
+    group.add_argument('--sft-mock-dataset-config-json', type=str, default=None, 
+                       help='This config provides the necessary information for the mock dataset. You can either specify a CSV file that contains sequence lengths, where each line stores the length of a sequence, for example: {"mode":"file","path":"/path/to/file"}. Alternatively, you can specify a distribution (currently only supporting lognormal distribution) along with the required parameters, for example, {"mode":"distribution","type":"lognormal","min_seq_len":1024,"max_seq_len":2048,"mean_seq_len":1536,"lognormal_sigma":1.1}, where sigma controls the variability of the lognormal distribution. '
+                       'If not specified and --mock-data is set, defaults to a lognormal distribution with '
+                       'min_seq_len=seq_length//2, max_seq_len=seq_length, mean_seq_len=seq_length*3//4, lognormal_sigma=1.1.')
     return parser
diff --git a/megatron/training/datasets/sft_dataset.py b/megatron/training/datasets/sft_dataset.py
index 9de5d2a52fe..3f2e6e7362c 100644
--- a/megatron/training/datasets/sft_dataset.py
+++ b/megatron/training/datasets/sft_dataset.py
@@ -2,12 +2,16 @@
 
 import atexit, json
 from collections import Counter
-from typing import Any, Dict, Optional
+import json
+import math
+from typing import Any, Dict, Optional, List, Union
 
 import numpy as np
+import pandas as pd
 import torch
 
 from megatron.core.datasets.gpt_dataset import GPTDatasetConfig
+from megatron.core.datasets.indexed_dataset import IndexedDataset
 from megatron.core.datasets.megatron_dataset import LowLevelDataset, MegatronDataset
 from megatron.core.datasets.utils import Split
 
@@ -88,6 +92,26 @@ def _split_conversations(self, merged_conversations):
             split_conversations.append(current)
         return split_conversations
 
+    def _calculate_padding_divisor(self) -> int:
+        """
+            Calculate the divisor used for sequence padding.
+            tp_pad = tp_size * 2 if tp_size > 1 else 1
+            cp_pad = cp_size * 2 if cp_size > 1 else 1
+            cp_pad = cp_pad * dp_size if hybrid_cp else cp_pad
+            divisor = cp_pad * tp_pad
+        """
+        if self.config.hybrid_context_parallel:
+            # Hybrid CP: consider both CP and DP
+            cp_pad = self.config.data_parallel_size * self.config.context_parallel_size * 2
+        else:
+            # Standard CP: only consider CP
+            cp_pad = self.config.context_parallel_size * 2 if self.config.context_parallel_size > 1 else 1
+        tp_pad = self.config.sequence_parallel_size if self.config.sequence_parallel_size > 0 else 1
+        divisor = cp_pad * tp_pad
+        # TODO(tailaim): do we need to pad for FP8 execution?
+        # divisor = ((divisor + 15) // 16) * 16
+        return divisor
+
     def __getitem__(self, idx: int) -> Dict[str, Any]:
 
         tokenizer = self.config.tokenizer
@@ -124,12 +148,11 @@ def extend_with_padding(tokens, targets, positions, pad_len):
             assert not self.config.reset_position_ids
             pack_positions.extend(range(len(tokens_list)))
 
-            if self.config.context_parallel_size > 1:
-                pad_granularity = self.config.context_parallel_size * 2
-                mod_token_count = len(pack_tokens) % pad_granularity
-                if mod_token_count != 0:
-                    pad_len = pad_granularity - mod_token_count
-                    extend_with_padding(pack_tokens, pack_targets, pack_positions, pad_len)
+            pad_granularity = self._calculate_padding_divisor()
+            mod_token_count = len(pack_tokens) % pad_granularity
+            if mod_token_count != 0:
+                pad_len = pad_granularity - mod_token_count
+                extend_with_padding(pack_tokens, pack_targets, pack_positions, pad_len)
 
             # TODO(duncan): Consider also padding to multiple of number of tokens here. This might
             # be needed for efficiency (and potentially set via command-line argument).
@@ -190,3 +213,214 @@ def extend_with_padding(tokens, targets, positions, pad_len):
             'cu_seqlens': cu_seqlens,
             'max_seqlen': max_seqlen,
         }
+
+
+class MockSFTLowLevelDataset:
+    """The low-level mock dataset for SFT
+
+    Args:
+        mode (str): One of 'file', 'distribution', or 'verification'.
+        **kwargs: Additional arguments depending on mode.
+            For mode='file': path (str) - path to a CSV file with sequence lengths.
+            For mode='distribution': type (str), min_seq_len (int), max_seq_len (int),
+                mean_seq_len (int), and distribution-specific params (e.g. lognormal_sigma).
+            For mode='verification': data_path (str) - prefix path to an IndexedDataset
+                (.bin/.idx files). Optional lognormal distribution params same as
+                'distribution' mode (defaults: min_seq_len=100, max_seq_len=4096,
+                mean_seq_len=2048, lognormal_sigma=1.1).
+        format (str): Output format for MockSFTDataset. Either 'thd' (default, sequence
+            packing with cu_seqlens) or 'sbhd' (padded to seq_length, no cu_seqlens).
+    """
+
+    seed: int = 0
+    """The hard-coded random seed to use to set the NumPy RNG"""
+
+    size: int = 1000000
+    """The hard-coded number of sequence to generate"""
+
+    def __init__(self, mode: str, **kwargs) -> None:
+        np.random.seed(self.seed)
+        self.format = kwargs.get("format", "thd")
+
+        if mode == "file":
+            self.sequence_lengths = np.array(pd.read_csv(kwargs["path"])).flatten()
+            self.size = len(self.sequence_lengths)
+        elif mode == "distribution":
+            min_seq_len = kwargs["min_seq_len"]
+            max_seq_len = kwargs["max_seq_len"]
+            mean_seq_len = kwargs["mean_seq_len"]
+            if kwargs["type"] == "lognormal":
+                lognormal_sigma = kwargs["lognormal_sigma"]
+                self.sequence_lengths = self.generate_lognormal_samples(
+                    self.size, mean_seq_len, lognormal_sigma, min_seq_len, max_seq_len
+                )
+            else:
+                raise ValueError(f"Unsupported distribution type {kwargs['type']}")
+        elif mode == "verification":
+            # Load real tokens from an IndexedDataset for realistic loss curves.
+            # Sequence lengths are drawn from a lognormal distribution (same as
+            # "distribution" mode) to allow controlled comparison of THD vs SBHD.
+            self.indexed_dataset = IndexedDataset(kwargs["data_path"])
+            min_seq_len = kwargs.get("min_seq_len", 100)
+            max_seq_len = kwargs.get("max_seq_len", 4096)
+            mean_seq_len = kwargs.get("mean_seq_len", 2048)
+            lognormal_sigma = kwargs.get("lognormal_sigma", 1.1)
+            self.sequence_lengths = self.generate_lognormal_samples(
+                self.size, mean_seq_len, lognormal_sigma, min_seq_len, max_seq_len
+            )
+        else:
+            raise ValueError(f"Unsupported mode '{mode}', must be 'file', 'distribution', or 'verification'")
+        
+    def generate_lognormal_samples(self, size, mean, sigma, min_seq_len, max_seq_len):   
+        mu = np.log(mean) - sigma**2 / 2
+        samples = np.random.lognormal(mu, sigma, size)
+        samples = np.clip(samples, min_seq_len, max_seq_len)
+        return samples.astype(int)   
+
+    def __len__(self) -> int:
+        return self.size
+
+    def __getitem__(self, idx: int) -> np.ndarray:
+        # The returned sample has 'length-1' tokens; an EOD token is appended
+        # later in MockSFTDataset.__getitem__, making the total 'length' tokens.
+        length = int(self.sequence_lengths[idx % self.size])
+        if hasattr(self, 'indexed_dataset'):
+            target = length - 1
+            num_docs = len(self.indexed_dataset)
+            doc_idx = idx % num_docs
+            raw = self.indexed_dataset[doc_idx]
+            if len(raw) >= target:
+                sample = raw[:target]
+            else:
+                # Concatenate documents until we reach the target length.
+                chunks = [raw]
+                total = len(raw)
+                next_doc = doc_idx + 1
+                while total < target:
+                    raw_next = self.indexed_dataset[next_doc % num_docs]
+                    need = target - total
+                    chunks.append(raw_next[:need])
+                    total += min(len(raw_next), need)
+                    next_doc += 1
+                sample = np.concatenate(chunks)[:target]
+            assert len(sample) == target
+            return sample.astype(np.int64)
+        else:
+            return np.arange(1, length, dtype=np.int64)
+
+
+class MockSFTDataset(SFTDataset):
+    """The mock dataset used during SFT"""
+
+    def __init__(
+        self,
+        dataset: LowLevelDataset,
+        dataset_path: Optional[str],
+        indices: np.ndarray,
+        num_samples: Optional[int],
+        index_split: Split,
+        config: GPTDatasetConfig,
+    ) -> None:
+        super().__init__(dataset, dataset_path, indices, num_samples, index_split, config)
+
+    @staticmethod
+    def build_low_level_dataset(dataset_path: str, config: GPTDatasetConfig) -> LowLevelDataset:
+        if config.sft_mock_dataset_config_json is None:
+            mock_config = {
+                    "mode": "distribution",
+                    "type": "lognormal",
+                    "min_seq_len": config.sequence_length // 2,
+                    "max_seq_len": config.sequence_length,
+                    "mean_seq_len": config.sequence_length // 4 * 3,
+                    "lognormal_sigma": 1.1,
+                }
+        else:
+            mock_config = json.loads(config.sft_mock_dataset_config_json)
+        return MockSFTLowLevelDataset(**mock_config)
+
+    def __len__(self) -> int:
+        return self.num_samples
+
+    def __getitem__(self, idx: int) -> Dict[str, Any]:
+
+        tokenizer = self.config.tokenizer
+        pack_length = self.config.sequence_length
+        eod = tokenizer.eod
+        pad = tokenizer.pad
+
+        tokens = self.dataset[int(self.indices[idx % len(self.indices)])]
+
+        # Convert tokens to list and always append EOD to ensure length consistency.
+        # The low-level dataset returns length-1 tokens, and we add EOD to make it length tokens.
+        tokens_list = tokens.tolist()
+        tokens_list.append(eod)
+
+        if self.dataset.format == "sbhd":
+            # SBHD format: single padded sequence without cu_seqlens.
+            # Long sequences are truncated to pack_length tokens (including EOD).
+            if len(tokens_list) >= pack_length + 1:
+                tokens_list = tokens_list[:pack_length - 1] + [eod]
+            # Pad to pack_length + 1 (offset by 1 for input/label split).
+            pad_len = pack_length + 1 - len(tokens_list)
+            if pad_len > 0:
+                tokens_list = tokens_list + [pad] * pad_len
+            assert len(tokens_list) == pack_length + 1
+            input_ids    = torch.tensor(tokens_list[:-1], dtype=torch.int64)
+            labels       = torch.tensor(tokens_list[1:],  dtype=torch.int64)
+            # Position IDs are sequential across the entire sequence including padding,
+            # matching GPTDataset behavior for standard (non-packed) training.
+            position_ids = torch.arange(pack_length, dtype=torch.int64)
+            loss_mask = torch.ones(pack_length, dtype=torch.float32)
+            loss_mask[labels == pad] = 0.0
+            return {
+                'tokens':       input_ids,
+                'labels':       labels,
+                'loss_mask':    loss_mask,
+                'position_ids': position_ids,
+            }
+
+        # THD format (sequence packing) below.
+        def extend_with_padding(tokens, positions, pad_len):
+            tokens.extend([pad] * pad_len)
+            positions.extend(range(positions[-1] + 1, positions[-1] + 1 + pad_len))
+
+        pack_tokens = list(tokens_list) + [pad]
+        pack_positions = list(range(len(pack_tokens)))
+
+        # Truncate if sequence exceeds pack_length + 1 (need +1 for shift).
+        if len(pack_tokens) > pack_length + 1:
+            pack_tokens = pack_tokens[:pack_length - 1] + [eod, pad]
+            pack_positions = pack_positions[:pack_length + 1]
+
+        # Pad to pad_granularity alignment (tp * cp * 2).
+        # We need final length (after shift) to be divisible by pad_granularity.
+        pad_granularity = self._calculate_padding_divisor()
+        final_len = len(pack_tokens) - 1
+        mod_token_count = final_len % pad_granularity
+        if mod_token_count != 0:
+            pad_len = pad_granularity - mod_token_count
+            extend_with_padding(pack_tokens, pack_positions, pad_len)
+
+        # Apply shift for next-token prediction.
+        input_ids = torch.tensor(pack_tokens[:-1], dtype=torch.int64)
+        labels = torch.tensor(pack_tokens[1:], dtype=torch.int64)
+        position_ids = torch.tensor(pack_positions[:-1], dtype=torch.int64)
+
+        seq_len = len(input_ids)
+        cu_seqlens = [0, seq_len]
+
+        # Loss mask: mask padding tokens
+        loss_mask = torch.ones(seq_len, dtype=torch.float32)
+        loss_mask[labels == pad] = 0.0
+
+        cu_seqlens = torch.tensor(cu_seqlens, dtype=torch.int32)
+        max_seqlen = torch.tensor(seq_len, dtype=torch.int32)
+
+        return {
+            'tokens': input_ids,
+            'labels': labels,
+            'loss_mask': loss_mask,
+            'position_ids': position_ids,
+            'cu_seqlens': cu_seqlens,
+            'max_seqlen': max_seqlen,
+        }
diff --git a/megatron/training/training.py b/megatron/training/training.py
index 0c33206ba8b..26769fabe96 100644
--- a/megatron/training/training.py
+++ b/megatron/training/training.py
@@ -169,6 +169,7 @@ def set_startup_timestamps(program_start=None, main_entry=None):
     get_num_microbatches,
     update_num_microbatches
 )
+from megatron.core.datasets.data_schedule import wrap_data_iterator
 
 from .async_utils import maybe_finalize_async_save
 from .utils import (
@@ -225,7 +226,7 @@ def print_datetime(string, override_timestamp=None):
         time_str = datetime.fromtimestamp(override_timestamp).strftime('%Y-%m-%d %H:%M:%S.%f')
     print_rank_0(f'[{string}] datetime: {time_str} ')
 
-def num_floating_point_operations(args, batch_size):
+def num_floating_point_operations(args, seqlen_sum_this_global_batch, seqlen_squared_sum_this_global_batch):
     def calculate_layer_counts():
         """Calculate the number of attention, Mamba, and MLP layers."""
         if args.hybrid_override_pattern:
@@ -251,44 +252,42 @@ def calculate_layer_counts():
             num_moe_layers = 0
             return num_attn_layers, num_mamba_layers, num_mlp_layers, num_moe_layers
 
-    def mlp_layer_flops(batch_size, seq_len, hidden_size, expansion=4.0, swiglu=False):
+    def mlp_layer_flops(seqlen_sum_this_global_batch, hidden_size, expansion=4.0, swiglu=False):
         """Calculate FLOPs for an MLP layer."""
         scale_factor = 3.0 / 2.0 if swiglu else 1.0
-        return 4 * expansion * scale_factor * batch_size * seq_len * hidden_size**2
+        return 4 * expansion * scale_factor * seqlen_sum_this_global_batch * hidden_size**2
 
-    def moe_layer_flops(batch_size, seq_len, hidden_size, moe_ffn_hidden_size,
+    def moe_layer_flops(seqlen_sum_this_global_batch, hidden_size, moe_ffn_hidden_size,
                         shared_expert_ffn_hidden_size, num_experts_routed_to,
                         moe_latent_size=None, swiglu=False):
         """Calculate FLOPs for an MoE layer."""
         scale_factor = 3.0 / 2.0 if swiglu else 1.0
         if moe_latent_size is None:
-            routed_flops = (4 * batch_size * seq_len * hidden_size *
+            routed_flops = (4 * seqlen_sum_this_global_batch * hidden_size *
                             moe_ffn_hidden_size * num_experts_routed_to * scale_factor)
         else:
             # Routed experts run on moe_latent_size.
-            routed_flops = (4 * batch_size * seq_len * moe_latent_size *
+            routed_flops = (4 * seqlen_sum_this_global_batch * moe_latent_size *
                             moe_ffn_hidden_size * num_experts_routed_to * scale_factor)
             # Up proj and down proj.
-            routed_flops += (4 * batch_size * seq_len * hidden_size * moe_latent_size)
-        shared_flops = 4 * batch_size * seq_len * hidden_size * shared_expert_ffn_hidden_size * scale_factor
+            routed_flops += (4 * seqlen_sum_this_global_batch * hidden_size * moe_latent_size)
+        shared_flops = 4 * seqlen_sum_this_global_batch * hidden_size * shared_expert_ffn_hidden_size * scale_factor
         return routed_flops + shared_flops
 
     def attn_layer_flops(
-        batch_size, seq_len, hidden_size, num_heads, gqa=True, gqa_groups=8, kv_channels=None
+        seqlen_sum_this_global_batch, seqlen_squared_sum_this_global_batch, hidden_size, num_heads, gqa=True, gqa_groups=8, kv_channels=None
     ):
         """Calculate FLOPs for an attention layer."""
         p = (kv_channels * num_heads / hidden_size) if kv_channels else 1
         g = gqa_groups if gqa else num_heads
         return (
             4
-            * batch_size
-            * seq_len
             * hidden_size
             * p
-            * (hidden_size + (hidden_size * (g / num_heads)) + (seq_len / 2))
+            * (hidden_size * seqlen_sum_this_global_batch + (hidden_size * (g / num_heads)) * seqlen_sum_this_global_batch + (seqlen_squared_sum_this_global_batch / 2))
         )
 
-    def mamba_layer_flops(batch_size, seq_len, hidden_size, state_dim=16,
+    def mamba_layer_flops(seqlen_sum_this_global_batch, hidden_size, state_dim=16,
                           head_dim=64, num_groups=1, num_heads=128):
         """Calculate FLOPs for a Mamba layer."""
         # Note (rwaleffe): flops estimate for scan should be updated based on new SSD kernels,
@@ -301,16 +300,15 @@ def mamba_layer_flops(batch_size, seq_len, hidden_size, state_dim=16,
         return (
             (
                 2
-                * batch_size
-                * seq_len
+                * seqlen_sum_this_global_batch
                 * hidden_size
                 * (2 * d_in + 2 * num_groups * state_dim + nheads)
             )  # in_proj
-            + (7 * batch_size * seq_len * d_in * state_dim)  # scan
-            + (2 * batch_size * seq_len * d_in * hidden_size)  # out_proj
+            + (7 * seqlen_sum_this_global_batch * d_in * state_dim)  # scan
+            + (2 * seqlen_sum_this_global_batch * d_in * hidden_size)  # out_proj
         )
 
-    def hybrid_flops(batch_size, seq_len, hidden_size,
+    def hybrid_flops(seqlen_sum_this_global_batch, seqlen_squared_sum_this_global_batch, hidden_size,
                      num_attn_layers, num_mamba_layers, num_mlp_layers, num_moe_layers,
                      mamba_state_dim=128, mamba_head_dim=64,
                      mamba_num_groups=8, mamba_num_heads=128,
@@ -322,17 +320,17 @@ def hybrid_flops(batch_size, seq_len, hidden_size,
                      vocab_size=256000, mtp_num_layers=0):
         """Calculate total FLOPs for the hybrid model."""
         flops_fwd = (
-                num_attn_layers * attn_layer_flops(batch_size, seq_len, hidden_size,
+                num_attn_layers * attn_layer_flops(seqlen_sum_this_global_batch, seqlen_squared_sum_this_global_batch, hidden_size,
                                                    num_attn_heads, gqa, gqa_groups, kv_channels) +
-                num_mlp_layers * mlp_layer_flops(batch_size, seq_len, hidden_size,
+                num_mlp_layers * mlp_layer_flops(seqlen_sum_this_global_batch, hidden_size,
                                                  mlp_expansion, swiglu) +
-                num_mamba_layers * mamba_layer_flops(batch_size, seq_len, hidden_size,
+                num_mamba_layers * mamba_layer_flops(seqlen_sum_this_global_batch, hidden_size,
                                                      mamba_state_dim, mamba_head_dim,
                                                      mamba_num_groups, mamba_num_heads) +
-                num_moe_layers * moe_layer_flops(batch_size, seq_len, hidden_size, moe_ffn_hidden_size,
+                num_moe_layers * moe_layer_flops(seqlen_sum_this_global_batch, hidden_size, moe_ffn_hidden_size,
                                                  shared_expert_ffn_hidden_size, num_experts_routed_to,
                                                  moe_latent_size, swiglu) +
-                (2 * batch_size * seq_len * hidden_size * vocab_size * (1 + mtp_num_layers))  # logits computation
+                (2 * seqlen_sum_this_global_batch * hidden_size * vocab_size * (1 + mtp_num_layers))  # logits computation
         )
         return flops_fwd * 3
 
@@ -403,13 +401,18 @@ def transformer_flops():
             assert not args.group_query_attention
             '''
             Basic arithmetic
-            let B is batch size, s is seq_len, h is embedding dim,
-            for one self_attnetion block (prenorm is not included)
-            qkv projection:  6Bsh^2
-            attn:            2Bs^2h
-            attn over value: 2Bs^2h
-            oproj:           2Bsh^2
-
+            
+            Let h be the embedding dim.
+            We use two statistics to unify BSHD and THD cases:
+                seqlen_sum_this_global_batch: total number of tokens in this global batch
+                seqlen_squared_sum_this_global_batch: sum of squared sequence lengths in this global batch
+
+            For one self-attention block (prenorm not included):
+                qkv projection:      6 * seqlen_sum_this_global_batch * h^2
+                attn:    2 * seqlen_squared_sum_this_global_batch * h
+                attn over value:   2 * seqlen_squared_sum_this_global_batch * h
+                oproj:   2 * seqlen_sum_this_global_batch * h^2
+            
             references
             https://arxiv.org/abs/2305.10403
             https://arxiv.org/abs/2205.05198
@@ -430,7 +433,7 @@ def transformer_flops():
             standard_self_attn_term = (
                 forward_backward_expansion_factor
                 * fma_expansion_factor
-                * (
+                * ( seqlen_sum_this_global_batch * (
                     ## q lora + rope + q norm
                     q_term
                     ## kv lora + rope + kv norm
@@ -442,12 +445,12 @@ def transformer_flops():
                     )
                     + args.hidden_size * args.qk_pos_emb_head_dim
                     ## o proj
-                    + (args.num_attention_heads * args.v_head_dim) * args.hidden_size
+                    + (args.num_attention_heads * args.v_head_dim) * args.hidden_size)
                     ## core attn
-                    + args.seq_length
+                    + seqlen_squared_sum_this_global_batch
                     * (args.num_attention_heads * (args.qk_head_dim + args.qk_pos_emb_head_dim))
-                    / 2  # causal mask (only half of the mask is non-zero)
-                    + args.seq_length * args.num_attention_heads * args.v_head_dim / 2
+                    / 2 # causal mask (only half of the mask is non-zero)
+                    + seqlen_squared_sum_this_global_batch * args.num_attention_heads * args.v_head_dim / 2
                 )
             )
 
@@ -460,7 +463,7 @@ def transformer_flops():
             standard_self_attn_term = (
                 forward_backward_expansion_factor
                 * fma_expansion_factor
-                * (
+                * ( seqlen_sum_this_global_batch *(
                     ## qkv proj
                     args.hidden_size
                     * (
@@ -468,14 +471,14 @@ def transformer_flops():
                         + key_projection_size
                         + value_projection_size
                         + gate_projection_size
-                    )
+                    ))
                     ## core attention
                     + query_projection_size
-                    * args.seq_length
+                    * seqlen_squared_sum_this_global_batch
                     / 2  # causal mask (only half of the mask is non-zero)
                     * 2  # QK^T and (QK^T)V
                     ## out proj
-                    + query_projection_size
+                    + seqlen_sum_this_global_batch * query_projection_size
                     * args.hidden_size
                 )
             )
@@ -536,7 +539,7 @@ def transformer_flops():
                         + args.hidden_size
                         * v_dim
                     )
-                )
+                ) * seqlen_sum_this_global_batch
             else:
                 raise ValueError(
                     "Invalid experimental_attention_variant: "
@@ -553,8 +556,7 @@ def transformer_flops():
         )
 
         total_floating_point_operations = (
-            batch_size
-            * args.seq_length
+            seqlen_sum_this_global_batch
             * (
                 # MLP
                 forward_backward_expansion_factor
@@ -584,8 +586,6 @@ def transformer_flops():
                     + (shared_expert_ffn_hidden_size * ffn_expansion_factor)
                     * num_moe_layers
                 )
-                # Self Attention
-                + self_attn_term
                 # MTP norms and proj
                 + forward_backward_expansion_factor
                 * fma_expansion_factor
@@ -603,6 +603,10 @@ def transformer_flops():
                 * args.padded_vocab_size
                 * (mtp_num_layers + 1)  # MTP + final logit
             )
+            +                
+            # Self Attention
+            self_attn_term
+            
         )
         return total_floating_point_operations
 
@@ -616,8 +620,8 @@ def transformer_flops():
             mtp_num_layers = 0
         # Compute hybrid model FLOPs.
         return hybrid_flops(
-            batch_size=batch_size,
-            seq_len=args.seq_length,
+            seqlen_sum_this_global_batch=seqlen_sum_this_global_batch, 
+            seqlen_squared_sum_this_global_batch=seqlen_squared_sum_this_global_batch,
             hidden_size=args.hidden_size,
             num_attn_layers=num_attn_layers,
             num_mamba_layers=num_mamba_layers,
@@ -1728,6 +1732,27 @@ def train_step(forward_step_func, data_iterator, model, optimizer, opt_param_sch
                 if isinstance(optim_instance, DistributedOptimizer):
                     optim_instance.release_offloaded_gpu_states()
 
+        if config.sequence_packing_scheduler is not None:
+            # This wrapper is designed to support DP-balanced THD and dynamic-CP.
+            # Before wrapping, the data_iterator returns either a single sequence per get_item call, or a list where each element is a sequence.
+            # The wrapper is responsible for:
+            # 1. scheduling the sequences across ranks
+            # 2. packing them into THD format
+            # 3. broadcast flops parametes and num_microbatches to TP ranks to support unfixed num_microbatches
+            # 4. broadcast metadata(cu_seqlens, cu_seqlens_padded, max_seqlen, etc.) to PP ranks to
+            # 5. returning the packed data iterator and the FLOPs parameters
+            (
+                data_iterator,
+                num_microbatches,
+                seqlen_sum_this_global_batch,
+                seqlen_squared_sum_this_global_batch,
+            ) = wrap_data_iterator(data_iterator, config, get_num_microbatches())
+        else:
+            # data_iterator unchanged
+            num_microbatches = get_num_microbatches()
+            seqlen_sum_this_global_batch = args.seq_length * args.global_batch_size
+            seqlen_squared_sum_this_global_batch = args.seq_length ** 2 * args.global_batch_size
+
         # Forward pass.
         if save_dgrads_in_this_iteration:
             enable_dgrad_logging(model, args.save)
@@ -1735,7 +1760,7 @@ def train_step(forward_step_func, data_iterator, model, optimizer, opt_param_sch
             forward_step_func=forward_step_func,
             data_iterator=data_iterator,
             model=model,
-            num_microbatches=get_num_microbatches(),
+            num_microbatches=num_microbatches,
             seq_length=args.seq_length,
             micro_batch_size=args.micro_batch_size,
             decoder_seq_length=args.decoder_seq_length,
@@ -1768,7 +1793,7 @@ def train_step(forward_step_func, data_iterator, model, optimizer, opt_param_sch
 
     should_checkpoint, should_exit, exit_code = rerun_state_machine.should_checkpoint_and_exit()
     if should_exit:
-        return {}, True, should_checkpoint, should_exit, exit_code, None, None, 0
+        return {}, True, should_checkpoint, should_exit, exit_code, None, None, 0, seqlen_sum_this_global_batch, seqlen_squared_sum_this_global_batch
 
     # Empty unused memory.
     if args.empty_unused_memory_level >= 1:
@@ -1848,8 +1873,10 @@ def train_step(forward_step_func, data_iterator, model, optimizer, opt_param_sch
             grad_norm,
             num_zeros_in_grad,
             log_max_attention_logit,
+            seqlen_sum_this_global_batch,
+            seqlen_squared_sum_this_global_batch,
         )
-    return {}, skipped_iter, should_checkpoint, should_exit, exit_code, grad_norm, num_zeros_in_grad, log_max_attention_logit
+    return {}, skipped_iter, should_checkpoint, should_exit, exit_code, grad_norm, num_zeros_in_grad, log_max_attention_logit, seqlen_sum_this_global_batch, seqlen_squared_sum_this_global_batch
 
 
 def training_log(
@@ -1864,6 +1891,8 @@ def training_log(
     params_norm,
     num_zeros_in_grad,
     max_attention_logit,
+    seqlen_sum_this_global_batch,
+    seqlen_squared_sum_this_global_batch,
     pg_collection=None,
     is_first_iteration=False,
 ):
@@ -2096,7 +2125,7 @@ def training_log(
         elapsed_time = timers('interval-time').elapsed(barrier=True, reset=should_reset)
         elapsed_time_per_iteration = elapsed_time / total_iterations
 
-        throughput = num_floating_point_operations(args, batch_size) / (
+        throughput = num_floating_point_operations(args,seqlen_sum_this_global_batch, seqlen_squared_sum_this_global_batch) / (
             elapsed_time_per_iteration * 10**12 * args.world_size
         )
 
@@ -2864,6 +2893,8 @@ def trace_handler(p):
 
         # Completely skip iteration if needed.
         if iteration in args.iterations_to_skip:
+            # TODO(tailaim): this need to be modified
+            assert config.sequence_packing_scheduler is None, "Sequence packing scheduler is not supported in skip iteration mode"
             # Dummy train_step to fast forward train_data_iterator.
             dummy_train_step(train_data_iterator)
             if iteration == start_iteration:
@@ -2906,6 +2937,8 @@ def trace_handler(p):
             grad_norm,
             num_zeros_in_grad,
             max_attention_logit,
+            seqlen_sum_this_global_batch, 
+            seqlen_squared_sum_this_global_batch,
         ) = train_step(
             forward_step_func, train_data_iterator, model, optimizer, opt_param_scheduler, config, forward_backward_func, iteration=iteration
         )
@@ -2993,7 +3026,7 @@ def trace_handler(p):
         else:
             assert num_skipped_samples_in_batch == 0
         args.skipped_train_samples += num_skipped_samples_in_batch
-        num_floating_point_operations_in_batch = num_floating_point_operations(args, batch_size)
+        num_floating_point_operations_in_batch = num_floating_point_operations(args, seqlen_sum_this_global_batch, seqlen_squared_sum_this_global_batch)
         num_floating_point_operations_so_far += num_floating_point_operations_in_batch
         num_floating_point_operations_since_last_log_event += num_floating_point_operations_in_batch
 
@@ -3019,6 +3052,8 @@ def trace_handler(p):
             params_norm,
             num_zeros_in_grad,
             max_attention_logit,
+            seqlen_sum_this_global_batch, 
+            seqlen_squared_sum_this_global_batch,
             pg_collection=model_pg_collection,
             is_first_iteration=is_first_iteration,
         )
@@ -3214,9 +3249,30 @@ def evaluate(
             # Don't care about timing during evaluation
             config.timers = None
             ft_integration.on_eval_step_start()
+            if config.sequence_packing_scheduler is not None:
+                # This wrapper is designed to support DP-balanced THD and dynamic-CP.
+                # Before wrapping, the data_iterator returns either a single sequence per get_item call, or a list where each element is a sequence.
+                # The wrapper is responsible for:
+                # 1. scheduling the sequences across ranks
+                # 2. packing them into THD format
+                # 3. broadcast flops parametes and num_microbatches to TP ranks to support unfixed num_microbatches
+                # 4. broadcast metadata(cu_seqlens, cu_seqlens_padded, max_seqlen, etc.) to PP ranks to
+                # 5. returning the packed data iterator and the FLOPs parameters
+                try:
+                    (
+                        packed_data_iterator,
+                        eval_num_microbatches,
+                        _,
+                        _,
+                    ) = wrap_data_iterator(data_iterator, config, eval_num_microbatches)
+                except StopIteration:
+                    # Validation data iterator exhausted, stop evaluation early.
+                    break
+            else:
+                packed_data_iterator = data_iterator
             loss_dicts = forward_backward_func(
                 forward_step_func=forward_step_func,
-                data_iterator=data_iterator,
+                data_iterator=packed_data_iterator,
                 model=model,
                 num_microbatches=eval_num_microbatches,
                 seq_length=args.seq_length,
diff --git a/pretrain_gpt.py b/pretrain_gpt.py
index e6ce7ac2a48..083f97b0a2f 100644
--- a/pretrain_gpt.py
+++ b/pretrain_gpt.py
@@ -25,6 +25,7 @@
 from megatron.core import parallel_state
 from megatron.core.datasets.blended_megatron_dataset_builder import BlendedMegatronDatasetBuilder
 from megatron.core.datasets.gpt_dataset import GPTDataset, GPTDatasetConfig, MockGPTDataset
+from megatron.core.datasets.data_schedule import get_batch_on_this_rank_for_sequence_packing
 from megatron.core.enums import ModelType
 from megatron.core.models.gpt import GPTModel
 from megatron.core.rerun_state_machine import get_rerun_state_machine
@@ -49,6 +50,7 @@
     get_blend_and_blend_per_split,
     is_first_or_last_pipeline_stage,
 )
+from megatron.training.datasets.sft_dataset import SFTDataset, MockSFTDataset
 from model_provider import model_provider
 
 try:
@@ -66,6 +68,15 @@ def get_batch(data_iterator, vp_stage: Optional[int] = None):
     """Generate a batch."""
     args = get_args()
     config = core_transformer_config_from_args(args)
+
+    if args.sequence_packing_scheduler is not None:
+        return get_batch_on_this_rank_for_sequence_packing(
+            data_iterator,
+            vpp_size=config.virtual_pipeline_model_parallel_size,
+            mtp_on_this_rank=mtp_on_this_rank(config, ignore_virtual=False, vp_stage=vp_stage),
+            vp_stage=vp_stage,
+        )
+
     # TODO: this is pretty hacky, find a better way
     if not is_first_or_last_pipeline_stage(vp_stage) and (
     (not mtp_on_this_rank(config, ignore_virtual=False, vp_stage=vp_stage))):
@@ -250,6 +261,7 @@ def core_gpt_dataset_config_from_args(args):
         "data_parallel_size": args.data_parallel_size,
         "sequence_parallel_size": args.tensor_model_parallel_size*args.sequence_parallel,
         "hybrid_context_parallel": args.hybrid_context_parallel,
+        "sft_mock_dataset_config_json":args.sft_mock_dataset_config_json,
     }
 
     # add FIM args to the config
@@ -287,7 +299,10 @@ def train_valid_test_datasets_provider(train_val_test_num_samples, vp_stage=None
     config = core_gpt_dataset_config_from_args(args)
 
     if args.sft:
-        dataset_type = SFTDataset
+        if args.mock_data:
+            dataset_type = MockSFTDataset
+        else:
+            dataset_type = SFTDataset
     else:
         if args.mock_data:
             dataset_type = MockGPTDataset
diff --git a/tests/unit_tests/models/test_mamba_moe_model.py b/tests/unit_tests/models/test_mamba_moe_model.py
index 39b4a18e243..9797f5c20f7 100644
--- a/tests/unit_tests/models/test_mamba_moe_model.py
+++ b/tests/unit_tests/models/test_mamba_moe_model.py
@@ -275,6 +275,7 @@
     "offload_modules": [],
     "hybrid_context_parallel": False,
     "max_seqlen_per_dp_cp_rank": None,
+    "sequence_packing_scheduler": None,
     "fallback_to_eager_attn": False,
     "linear_attention_type": None,
     "moe_router_force_biased": None,
diff --git a/tests/unit_tests/test_sequence_packing.py b/tests/unit_tests/test_sequence_packing.py
new file mode 100644
index 00000000000..60316b0236e
--- /dev/null
+++ b/tests/unit_tests/test_sequence_packing.py
@@ -0,0 +1,479 @@
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+import random
+from types import SimpleNamespace
+
+import numpy as np
+import pytest
+import torch
+
+from megatron.core import parallel_state
+from megatron.core.datasets.data_schedule import (
+    get_batch_on_this_rank_for_sequence_packing,
+    wrap_data_iterator,
+)
+from megatron.core.rerun_state_machine import RerunDataIterator
+from megatron.training.global_vars import unset_global_variables
+from tests.unit_tests.test_utilities import Utils
+
+
+class MockVariableLengthSequencePackingDataIterator:
+    """
+    Mock data iterator for testing get_batch_on_this_rank_for_sequence_packing.
+
+    Generates variable-length (THD format) packed sequences with deterministic
+    data for verification across parallel ranks.
+    """
+
+    def __init__(
+        self,
+        total_seq_length: int,
+        sequence_lengths: list,
+        local_cp_size: int = None,
+        device: str = "cuda",
+        seed: int = 42,
+    ):
+        """
+        Args:
+            total_seq_length: Total length of packed sequences
+            sequence_lengths: List of individual sequence lengths (variable-length).
+                              If None, generates random variable lengths.
+            device: Device to create tensors on
+            seed: Random seed for reproducibility
+        """
+        self.total_seq_length = total_seq_length
+        self.sequence_lengths = sequence_lengths
+        self.local_cp_size = local_cp_size
+        self.device = device
+        self.seed = seed
+        assert (
+            sum(self.sequence_lengths) == total_seq_length
+        ), f"Sequence lengths sum {sum(self.sequence_lengths)} != total {total_seq_length}"
+
+    def __iter__(self):
+        """Interface for the data iterator."""
+        return self
+
+    def __next__(self):
+        """Generate a mock batch with variable-length THD format."""
+        dev = self.device
+        torch.manual_seed(self.seed)
+        torch.cuda.manual_seed(self.seed)
+
+        tokens = torch.randint(0, 16384, (self.total_seq_length,), dtype=torch.int64, device=dev)
+
+        # Create position_ids that reset for each sequence (THD format)
+        position_ids = []
+        for seq_len in self.sequence_lengths:
+            position_ids.extend(range(seq_len))
+        position_ids = torch.tensor(position_ids, dtype=torch.int64, device=dev)
+
+        # Labels are tokens shifted by 1 for easy verification
+        labels = tokens + 1
+
+        # Loss mask: 1.0 for all positions except padding (none here)
+        loss_mask = torch.ones(self.total_seq_length, dtype=torch.float32, device=dev)
+
+        # Create cu_seqlens for variable-length packed sequences
+        cu_seqlens = [0]
+        for seq_len in self.sequence_lengths:
+            cu_seqlens.append(cu_seqlens[-1] + seq_len)
+        cu_seqlens = torch.tensor(cu_seqlens, dtype=torch.int32, device=dev)
+        cu_seqlens_padded = cu_seqlens.clone()
+
+        max_seqlen = torch.tensor([max(self.sequence_lengths)], dtype=torch.int32, device=dev)
+
+        batch = {
+            "tokens": tokens,
+            "position_ids": position_ids,
+            "labels": labels,
+            "loss_mask": loss_mask,
+            "cu_seqlens": cu_seqlens,
+            "cu_seqlens_padded": cu_seqlens_padded,
+            "max_seqlen": max_seqlen,
+        }
+
+        if not (
+            parallel_state.is_pipeline_first_stage(ignore_virtual=True)
+            or parallel_state.is_pipeline_last_stage(ignore_virtual=True)
+        ):
+            batch["tokens"] = None
+            batch["position_ids"] = None
+            batch["labels"] = None
+            batch["loss_mask"] = None
+
+        if self.local_cp_size is not None:
+            batch["local_cp_size"] = torch.tensor(
+                [self.local_cp_size], dtype=torch.int32, device=dev
+            )
+
+        return batch
+
+
+def _gather_tensor_from_tp_group(tensor):
+    """Gather tensors from all TP ranks for comparison."""
+    assert tensor is not None, "Tensor should not be None"
+    tp_size = parallel_state.get_tensor_model_parallel_world_size()
+    gathered = [torch.zeros_like(tensor) for _ in range(tp_size)]
+    torch.distributed.all_gather(
+        gathered, tensor, group=parallel_state.get_tensor_model_parallel_group()
+    )
+    return gathered
+
+
+def _gather_tensor_from_all_ranks(tensor):
+    """Gather tensors from all PP ranks for comparison."""
+    assert tensor is not None, "Tensor should not be None"
+    if type(tensor) is int:
+        tensor = torch.tensor(tensor, dtype=torch.int32, device=torch.cuda.current_device())
+    gathered = [torch.zeros_like(tensor) for _ in range(torch.distributed.get_world_size())]
+    torch.distributed.all_gather(gathered, tensor)
+    return gathered
+
+
+@pytest.mark.parametrize(
+    ("tp", "pp", "cp"),
+    [
+        (1, 1, 1),  # Basic case: no parallelism
+        (2, 1, 1),  # Tensor parallel only
+        (1, 2, 1),  # Pipeline parallel only
+        (2, 2, 1),  # TP + PP
+        (1, 1, 2),  # CP only
+        (2, 1, 2),  # TP + CP
+        (1, 2, 2),  # PP + CP
+        (1, 4, 1),  # Has middle pp stage
+    ],
+)
+def test_get_batch_on_this_rank_for_sequence_packing(tp, pp, cp):
+    """
+    Test get_batch_on_this_rank_for_sequence_packing function with variable-length THD format.
+
+    This test verifies:
+    1. TP ranks: All ranks within a TP group receive identical data after broadcast
+    2. PP ranks: Middle PP ranks have the same packed_seq_params as first/last stages
+    3. CP ranks: Data is correctly partitioned with proper shape and values
+    4. Variable-length (THD) format: Different sequence lengths are handled correctly
+    """
+    args = SimpleNamespace()
+    args.tensor_model_parallel_size = tp
+    args.pipeline_model_parallel_size = pp
+    args.context_parallel_size = cp
+    args.virtual_pipeline_model_parallel_size = None
+    args.data_parallel_size = 8 // (tp * pp * cp)
+    args.seq_length = 8192
+
+    # Skip invalid configurations
+    if args.data_parallel_size < 1:
+        raise ValueError(f"Invalid config: tp={tp}, pp={pp}, cp={cp} exceeds world size 8")
+
+    # Initialize model parallel
+    Utils.initialize_model_parallel(tp, pp, None, context_parallel_size=cp)
+
+    try:
+        # Create mock data iterator with variable-length sequences
+        # Only TP rank 0 needs the iterator; other TP ranks pass None
+        tp_rank = parallel_state.get_tensor_model_parallel_rank()
+        if tp_rank == 0:
+            # Use deterministic seed based on DP rank so same data within TP/PP/CP group
+            dp_rank = parallel_state.get_data_parallel_rank()
+            sequence_lengths = [1024, 2048, 512, 1536, 3072]
+            assert (
+                sum(sequence_lengths) == args.seq_length
+            ), f"Sequence lengths sum {sum(sequence_lengths)} != total {args.seq_length}"
+            data_iterator = iter(
+                MockVariableLengthSequencePackingDataIterator(
+                    total_seq_length=args.seq_length,
+                    sequence_lengths=sequence_lengths,  # Variable lengths, sum=8192
+                    seed=42 + dp_rank,  # Same seed within PP/CP group
+                )
+            )
+        else:
+            # Non-TP-rank-0 ranks don't need the iterator
+            data_iterator = None
+
+        # Call the function under test
+        result = get_batch_on_this_rank_for_sequence_packing(
+            data_iterator=data_iterator, mtp_on_this_rank=False, vp_stage=None
+        )
+
+        # Unpack the result
+        tokens, labels, loss_mask, attention_mask, position_ids, packed_seq_params = result
+
+        # Get parallel state info
+        tp_rank = parallel_state.get_tensor_model_parallel_rank()
+        pp_rank = parallel_state.get_pipeline_model_parallel_rank()
+        cp_rank = parallel_state.get_context_parallel_rank()
+        is_first_stage = parallel_state.is_pipeline_first_stage(ignore_virtual=True)
+        is_last_stage = parallel_state.is_pipeline_last_stage(ignore_virtual=True)
+        is_first_or_last = is_first_stage or is_last_stage
+
+        # =====================================================================
+        # TEST 1: Verify data based on pipeline stage
+        # =====================================================================
+        if is_first_stage:
+            assert tokens is not None, "First stage should have tokens"
+            assert position_ids is not None, "First stage should have position_ids"
+            assert tokens.dim() == 2, "Tokens should be 2D (batch, seq)"
+            assert position_ids.dim() == 2, "Position IDs should be 2D (batch, seq)"
+            assert tokens.size(0) == 1, "batch should be 1 in THD format"
+            assert position_ids.size(0) == 1, "batch should be 1 in THD format"
+        else:
+            assert tokens is None, "Non-first stage should not have tokens"
+            assert position_ids is None, "Non-first stage should not have position_ids"
+
+        if is_last_stage:
+            assert labels is not None, "Last stage should have labels"
+            assert loss_mask is not None, "Last stage should have loss_mask"
+            assert labels.dim() == 2, "Labels should be 2D (batch, seq)"
+            assert loss_mask.dim() == 2, "Loss mask should be 2D (batch, seq)"
+            assert labels.size(0) == 1, "batch should be 1 in THD format"
+            assert loss_mask.size(0) == 1, "batch should be 1 in THD format"
+        else:
+            assert labels is None, "Non-last stage should not have labels"
+            assert loss_mask is None, "Non-last stage should not have loss_mask"
+
+        # =====================================================================
+        # TEST 2: Verify all ranks have consistent packed_seq_params
+        # =====================================================================
+        assert packed_seq_params is not None
+        assert packed_seq_params.qkv_format == "thd"
+
+        test_keys = [
+            "cu_seqlens_q",
+            "cu_seqlens_q_padded",
+            "max_seqlen_q",
+            "cu_seqlens_kv",
+            "cu_seqlens_kv_padded",
+            "max_seqlen_kv",
+        ]
+        for key in test_keys:
+            tensor = getattr(packed_seq_params, key)
+            assert tensor is not None
+            gathered_tensor = _gather_tensor_from_all_ranks(tensor)
+            for i in range(1, len(gathered_tensor)):
+                assert torch.equal(
+                    gathered_tensor[0], gathered_tensor[i]
+                ), f"Rank 0 and rank {i} have different {key}"
+
+        # =====================================================================
+        # TEST 3: Verify TP ranks receive identical data after broadcast
+        # =====================================================================
+        if tp > 1:
+            test_tensors = []
+            if is_first_stage:
+                test_tensors.extend([tokens, position_ids])
+            if is_last_stage:
+                test_tensors.extend([labels, loss_mask])
+
+            for tensor in test_tensors:
+                gathered_tensors = _gather_tensor_from_tp_group(tensor)
+                for i in range(1, tp):
+                    assert torch.equal(
+                        gathered_tensors[0], gathered_tensors[i]
+                    ), f"TP rank 0 and rank {i} have different data"
+
+        # =====================================================================
+        # TEST 4: Verify CP partitioning
+        # =====================================================================
+        if cp > 1:
+            # With CP, the sequence should be partitioned
+            expected_seq_len = args.seq_length // cp
+
+            if is_first_stage:
+                actual_seq_len = tokens.shape[1]
+                assert (
+                    actual_seq_len == expected_seq_len
+                ), f"CP partitioned tokens have wrong shape: {actual_seq_len} != {expected_seq_len}"
+
+            # Verify labels only if all CP ranks are at last stage
+            if is_last_stage:
+                actual_seq_len = labels.shape[1]
+                assert (
+                    actual_seq_len == expected_seq_len
+                ), f"CP partitioned labels have wrong shape: {actual_seq_len} != {expected_seq_len}"
+
+    finally:
+        Utils.destroy_model_parallel()
+        unset_global_variables()
+
+
+@pytest.mark.parametrize(
+    ("tp", "pp", "cp", "vpp", "scheduler_type"),
+    [
+        (1, 1, 8, None, "dp_balanced"),
+        (2, 1, 4, None, "dp_balanced"),
+        (2, 4, 1, None, "dp_balanced"),
+        (2, 2, 1, None, "dp_balanced"),
+        (1, 4, 1, 4, "dp_balanced"),
+    ],
+)
+def test_wrap_dataloader(tp, pp, cp, vpp, scheduler_type):
+    '''
+    Test wrap_dataloader function with different scheduler types.
+    '''
+    args = SimpleNamespace()
+    args.tensor_model_parallel_size = tp
+    args.pipeline_model_parallel_size = pp
+    args.context_parallel_size = cp
+    args.virtual_pipeline_model_parallel_size = None
+    args.data_parallel_size = 8 // (tp * pp * cp)
+    args.seq_length = 8192
+    args.max_seqlen_per_dp_cp_rank = 8192
+
+    # Skip invalid configurations
+    if args.data_parallel_size < 1:
+        raise ValueError(f"Invalid config: tp={tp}, pp={pp}, cp={cp} exceeds world size 8")
+
+    def _create_single_sample(seq_len):
+        # hard code the padding size to 16
+        pad_size = 16
+        seq_len_padded = ((seq_len + pad_size - 1) // pad_size) * pad_size
+        device = torch.device("cuda", torch.cuda.current_device())
+        tokens = torch.randint(0, 128, (seq_len_padded,), dtype=torch.int64, device=device)
+        labels = tokens + 1
+        position_ids = torch.arange(seq_len_padded, dtype=torch.int64, device=device)
+        loss_mask = torch.ones(seq_len_padded, dtype=torch.float32, device=device)
+        loss_mask[0:seq_len] = 1
+        loss_mask[seq_len:] = 0
+        cu_seqlens = torch.tensor([0, seq_len_padded], dtype=torch.int32, device=device)
+
+        return {
+            'tokens': tokens,
+            'labels': labels,
+            'loss_mask': loss_mask,
+            'position_ids': position_ids,
+            'cu_seqlens': cu_seqlens,
+        }
+
+    # Initialize model parallel
+    Utils.initialize_model_parallel(tp, pp, vpp, context_parallel_size=cp)
+
+    global_batch_size = 64
+    micro_batch_size = 1
+    nums = [random.randint(2048, args.seq_length) for _ in range(global_batch_size)]  # 64 sequences
+
+    config = SimpleNamespace()
+    config.max_seqlen_per_dp_cp_rank = args.max_seqlen_per_dp_cp_rank
+    config.microbatch_group_size_per_vp_stage = pp
+    config.virtual_pipeline_model_parallel_size = vpp
+    config.sequence_packing_scheduler = scheduler_type
+
+    dp_rank = parallel_state.get_data_parallel_rank()
+    dp_size = parallel_state.get_data_parallel_world_size()
+
+    pp_rank = parallel_state.get_pipeline_model_parallel_rank()
+    tp_rank = parallel_state.get_tensor_model_parallel_rank()
+
+    is_pp_first = pp_rank == 0
+    is_pp_last = pp_rank == pp - 1
+    is_pp_first_or_last = is_pp_first or is_pp_last
+    is_tp_first = tp_rank == 0
+
+    num_micro_batches_old = global_batch_size // micro_batch_size // dp_size
+
+    if is_tp_first and (is_pp_first or is_pp_last):
+        samples = [
+            _create_single_sample(num)
+            for num in nums[dp_rank * num_micro_batches_old : (dp_rank + 1) * num_micro_batches_old]
+        ]
+        data_iterator = RerunDataIterator(iter(samples))
+    else:
+        data_iterator = None
+
+    if is_tp_first:
+        if vpp is not None and vpp > 1:
+            if is_pp_first:
+                data_iterator = [data_iterator] + [None for _ in range(vpp - 1)]
+            elif is_pp_last:
+                data_iterator = [None for _ in range(vpp - 1)] + [data_iterator]
+            else:
+                data_iterator = [None for _ in range(vpp)]
+    try:
+        # Call the function under test
+        (
+            new_data_iterator,
+            num_micro_batches,
+            num_total_tokens_this_global_batch,
+            sequence_square_sum_this_global_batch,
+        ) = wrap_data_iterator(data_iterator, config, num_micro_batches_old)
+
+        # check the result
+        assert type(num_micro_batches) is int
+        assert (
+            type(num_total_tokens_this_global_batch) is float
+            or type(num_total_tokens_this_global_batch) is np.float32
+        )
+        assert (
+            type(sequence_square_sum_this_global_batch) is float
+            or type(sequence_square_sum_this_global_batch) is np.float32
+        )
+
+        def _check_batch(batch_all, batch_keys):
+            for batch in batch_all:
+                assert set(batch_keys) <= set(
+                    batch.keys()
+                ), f"batch keys: {set(batch.keys())} missing {set(batch_keys) - set(batch.keys())}"
+                for key in batch_keys:
+                    assert batch[key] is not None
+
+        if is_tp_first:
+            # CHECK KEYS
+            batch_keys = ["cu_seqlens", "max_seqlen", "cu_seqlens_padded"]
+            if vpp is not None and vpp > 1:
+                # check metadata for all stages (save batches to avoid re-consuming iterators)
+                all_stage_batches = []
+                for temp_data_iterator in new_data_iterator:
+                    stage_batch = [next(temp_data_iterator) for _ in range(num_micro_batches)]
+                    all_stage_batches.append(stage_batch)
+                    _check_batch(stage_batch, batch_keys)
+
+                # check for first or last stage on first or last pp rank
+                if is_pp_first_or_last:
+                    batch_all = all_stage_batches[0] if is_pp_first else all_stage_batches[-1]
+                    batch_keys += ["tokens", "position_ids", "labels", "loss_mask"]
+                    _check_batch(batch_all, batch_keys)
+            else:
+                # non-VPP: single iterator
+                batch_all = [next(new_data_iterator) for _ in range(num_micro_batches)]
+                if is_pp_first_or_last:
+                    batch_keys += ["tokens", "position_ids", "labels", "loss_mask"]
+                _check_batch(batch_all, batch_keys)
+
+            # CHECK TOKEN SUM ON FIRST OR LAST PP RANK
+            # Note: data_iterator is consumed by wrap_data_iterator, new_data_iterator is consumed above.
+            # Use `samples` for before-wrap, reuse `batch_all` from the check above for after-wrap.
+            if is_pp_first_or_last:
+                # Compute token sum before wrap
+                token_sum_before = torch.tensor(0, dtype=torch.int64, device='cuda')
+                for sample in samples:
+                    token_sum_before += sample['tokens'].long().sum()
+
+                # Compute token sum after wrap (batch_all already collected above with tokens)
+                token_sum_after = torch.tensor(0, dtype=torch.int64, device='cuda')
+                for batch in batch_all:
+                    token_sum_after += batch['tokens'].long().sum()
+
+                # Reduce sum across dp_cp group and verify equality
+                dp_cp_group = parallel_state.get_data_parallel_group(with_context_parallel=False)
+                torch.distributed.all_reduce(
+                    token_sum_before, op=torch.distributed.ReduceOp.SUM, group=dp_cp_group
+                )
+                torch.distributed.all_reduce(
+                    token_sum_after, op=torch.distributed.ReduceOp.SUM, group=dp_cp_group
+                )
+
+                assert (
+                    token_sum_before == token_sum_after
+                ), f"Token sum mismatch: before={token_sum_before.item()}, after={token_sum_after.item()}"
+
+        else:
+            if vpp is not None and vpp > 1:
+                assert type(new_data_iterator) is list and len(new_data_iterator) == vpp
+                for data_iterator in new_data_iterator:
+                    assert data_iterator is None
+            else:
+                assert new_data_iterator is None
+
+    finally:
+        Utils.destroy_model_parallel()
+        unset_global_variables()

From 5dadaf1c845f010ae67088d51f6f2a5a03cb35d8 Mon Sep 17 00:00:00 2001
From: "Dennis(Zhenhuan) Liu" <denliu@nvidia.com>
Date: Wed, 4 Mar 2026 12:03:23 +0800
Subject: [PATCH 298/334] fix: skip FSDP DTensor boundary validation under fake
 process group (#3668)

Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .../distributed/fsdp/src/megatron_fsdp/uneven_dtensor.py     | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/megatron/core/distributed/fsdp/src/megatron_fsdp/uneven_dtensor.py b/megatron/core/distributed/fsdp/src/megatron_fsdp/uneven_dtensor.py
index 5df9c2e95c0..f18a21df6c1 100644
--- a/megatron/core/distributed/fsdp/src/megatron_fsdp/uneven_dtensor.py
+++ b/megatron/core/distributed/fsdp/src/megatron_fsdp/uneven_dtensor.py
@@ -175,6 +175,11 @@ def validate_uneven_dtensor(dtensor: DTensor) -> None:
     )
 
     # Check that all boundaries (start and end) are touched.
+    # Skip under fake process group — all_reduce is a no-op so only rank 0's
+    # boundaries are visible, which makes the end-boundary check always fail.
+    if torch.distributed.is_initialized() and torch.distributed.get_backend() == 'fake':
+        return
+
     boundary_checks = torch.tensor(
         [
             [offset == 0, offset + size == dtensor.shape[dim]]

From 2176c4a1c176b6e104bd3c29e9476b4a140372f6 Mon Sep 17 00:00:00 2001
From: Charlie Truong <chtruong@nvidia.com>
Date: Wed, 4 Mar 2026 22:04:31 -0600
Subject: [PATCH 299/334] ci: Remove cudagraph codeowners entry in dev branch
 (#3712)

Signed-off-by: Charlie Truong <chtruong@nvidia.com>
---
 .github/CODEOWNERS | 2 --
 1 file changed, 2 deletions(-)

diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 5b2db410381..7613dc59da5 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -1,7 +1,5 @@
 * @NVIDIA/core-nemo @NVIDIA/core-devtech
 
-megatron/core/transformer/cuda_graphs.py @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/cuda-graphs
-
 .gitlab/ @NVIDIA/ci
 .github/ @NVIDIA/ci
 .gitlab-ci.yml @NVIDIA/ci

From 31f5294fb51f5003b31d132783ee2991e8d31d8e Mon Sep 17 00:00:00 2001
From: Deyu Fu <deyuf@nvidia.com>
Date: Thu, 5 Mar 2026 11:57:02 +0800
Subject: [PATCH 300/334] [dev] refactor to support emerging optimizers beyond
 muon (#3618)

Signed-off-by: Hao Wu <skyw@nvidia.com>
Co-authored-by: Hao Wu <skyw@nvidia.com>
---
 megatron/core/optimizer/__init__.py           | 192 +++++++++-
 .../core/optimizer/emerging_optimizers.py     | 260 +++++++++++++
 .../core/optimizer/layer_wise_optimizer.py    |  14 +-
 megatron/core/optimizer/muon.py               | 350 +-----------------
 megatron/core/optimizer/optimizer_config.py   |  49 +--
 megatron/core/optimizer_param_scheduler.py    |   5 +-
 megatron/training/arguments.py                |  21 +-
 megatron/training/checkpointing.py            |   4 +-
 megatron/training/training.py                 |  55 +--
 tests/unit_tests/dist_checkpointing/utils.py  |  70 ++--
 tests/unit_tests/test_layer_wise_optimizer.py |  15 +-
 tests/unit_tests/test_muon_optimizer.py       |  53 +--
 tests/unit_tests/test_optimizer.py            |   6 +-
 13 files changed, 567 insertions(+), 527 deletions(-)
 create mode 100644 megatron/core/optimizer/emerging_optimizers.py

diff --git a/megatron/core/optimizer/__init__.py b/megatron/core/optimizer/__init__.py
index 11aa6c49585..8babff5d4f5 100644
--- a/megatron/core/optimizer/__init__.py
+++ b/megatron/core/optimizer/__init__.py
@@ -2,6 +2,7 @@
 import copy
 import logging
 import warnings
+from collections import defaultdict
 from dataclasses import astuple
 from typing import Any, Callable, Dict, List, Optional, Tuple, Union
 
@@ -47,7 +48,13 @@
 from ..transformer.module import MegatronModule
 from ..utils import get_model_config, get_pg_rank, get_pg_size, is_te_min_version, log_single_rank
 from .distrib_optimizer import DistributedOptimizer
+from .emerging_optimizers import (
+    _EMERGING_OPTIMIZERS,
+    HAVE_EMERGING_OPTIMIZERS,
+    _create_emerging_optimizer,
+)
 from .grad_scaler import ConstantGradScaler, DynamicGradScaler
+from .layer_wise_optimizer import LayerWiseDistributedOptimizer
 from .optimizer import (
     ChainedOptimizer,
     Float16OptimizerWithFloat16Params,
@@ -55,6 +62,8 @@
     MegatronOptimizer,
     param_group_identifier_keys,
 )
+
+# Subclass aliases kept for backward compatibility; all are OptimizerConfig.
 from .optimizer_config import (
     AdamOptimizerConfig,
     OptimizerConfig,
@@ -134,14 +143,6 @@ def _get_param_groups(
     # Map (pg_overrides, is_expert_parallel) to params.
     params_map = {}
 
-    if config_overrides is None:
-        # TODO remove this default behavior eventually.
-        #  This is only needed for backwards compatibility with the old config overrides API where
-        #  the config_overrides argument by default lead to bias parameters and length 1 parameters.
-        #  We assume that users of decoupled LR already provide config overrides so will adapt
-        #  to the new API.
-        config_overrides = get_standard_config_overrides(config=config)
-
     for model_chunk in model_chunks:
         for name, param in model_chunk.named_parameters():
             if not param.requires_grad:
@@ -276,7 +277,8 @@ def _get_megatron_optimizer_based_on_param_groups(
     intra_dist_opt_group: Optional[torch.distributed.ProcessGroup] = None,
     distributed_optimizer_instance_id: Optional[int] = 0,
     pg_collection: Optional[ProcessGroupCollection] = None,
-) -> MegatronOptimizer:
+    skip_megatron_wrapping: bool = False,
+) -> Union[MegatronOptimizer, Tuple[Optional[torch.optim.Optimizer], Optional[Callable]]]:
     """Get Megatron optimizer based on parameter groups.
 
     Args:
@@ -292,12 +294,24 @@ def _get_megatron_optimizer_based_on_param_groups(
             optimizer. Defaults to None.
         distributed_optimizer_instance_id (int, optional): Distributed optimizer instance. Defaults
             0.
+        skip_megatron_wrapping (bool): if True, return a
+            ``(optimizer, init_state_fn)`` tuple of the raw PyTorch optimizer
+            without any Megatron wrapping. Useful when the caller
+            (e.g. LayerWiseDistributedOptimizer) performs its own wrapping.
 
     Returns:
-        Instance of MegatronOptimizer.
+        Instance of MegatronOptimizer, or ``(optimizer, init_state_fn)`` when
+        *skip_megatron_wrapping=True*.
     """
-    # TODO: Logic needs to be updated to handle different optimizer types (i.e., param_groups
-    # passed into this function need to correspond to the same optimizer).
+    # All param_groups passed here must belong to the same optimizer type (adam / sgd).
+    # Callers are responsible for splitting by optimizer type before calling this function.
+
+    if skip_megatron_wrapping and config.use_precision_aware_optimizer:
+        raise ValueError(
+            "skip_megatron_wrapping=True is incompatible with use_precision_aware_optimizer."
+        )
+    if skip_megatron_wrapping and config.optimizer_cpu_offload:
+        raise ValueError("skip_megatron_wrapping=True is incompatible with optimizer_cpu_offload.")
 
     # When freezing sub-models we may have no trainable parameters on a rank and
     # hence an empty param_groups. However, we still need to create an optimizer
@@ -412,6 +426,9 @@ def init_state_fn(opt, config=None):
         optimizer = None
         init_state_fn = None
 
+    if skip_megatron_wrapping:
+        return optimizer, init_state_fn
+
     # Mixed precision optimizer.
     # - Note: both the Float16Optimizer and the DistributedOptimizer inherit
     #   from the MixedPrecisionOptimizer, which manages any optimizer where
@@ -502,6 +519,137 @@ def check_config_overrides_consistency(
     return True
 
 
+def _get_megatron_emerging_optimizer(
+    config: OptimizerConfig,
+    model_chunks: List[MegatronModule],
+    config_overrides: Optional[Dict[ParamKey, Any]] = None,
+    pg_collection: Optional[ProcessGroupCollection] = None,
+) -> MegatronOptimizer:
+    """Build an emerging optimizer (e.g. Muon) for the given model chunks.
+
+    Parameter separation (e.g., linear weights -> Muon, rest -> Adam) is expressed as a
+    config_override, the same mechanism used for weight-decay and learning-rate overrides.
+    Adam/SGD groups are delegated to _get_megatron_optimizer_based_on_param_groups so they
+    go through the exact same code path as the standard optimizer factory.
+
+    When ``config.use_layer_wise_distributed_optimizer`` is True, the underlying optimizers
+    are wrapped with :class:`LayerWiseDistributedOptimizer`.
+    """
+    eopt_name = config.optimizer
+    use_layer_wise = config.use_layer_wise_distributed_optimizer
+
+    # Handle legacy "dist_*" optimizer names (e.g. "dist_muon" → "muon" + layer-wise).
+    if eopt_name.startswith('dist_'):
+        bare_name = eopt_name[len('dist_') :]
+        warnings.warn(
+            f"optimizer='{eopt_name}' is deprecated. "
+            f"Use optimizer='{bare_name}' with use_layer_wise_distributed_optimizer=True.",
+            DeprecationWarning,
+            stacklevel=3,
+        )
+        eopt_name = bare_name
+        use_layer_wise = True
+
+    if not HAVE_EMERGING_OPTIMIZERS:
+        raise ImportError(
+            f"emerging-optimizers package is required for optimizer='{eopt_name}'. "
+            "Install it with: pip install emerging-optimizers"
+        )
+    if eopt_name not in _EMERGING_OPTIMIZERS:
+        raise ValueError(f"Unsupported emerging optimizer: {eopt_name}")
+    if config.fp16:
+        raise ValueError('emerging optimizer with fp16 is not supported.')
+
+    if pg_collection is None:
+        pg_collection = ProcessGroupCollection.use_mpu_process_groups()
+
+    log_single_rank(logger, logging.INFO, f'Setting up emerging optimizer with config {config}')
+
+    # Tag parameters with optimizer-specific attributes (expert_tp, is_qkv).
+    for model_chunk in model_chunks:
+        for name, param in model_chunk.named_parameters():
+            if not param.requires_grad:
+                continue
+            if 'experts' in name and 'shared' not in name:
+                param.expert_tp = True
+            # TODO(deyuf): support MLA
+            if 'linear_qkv.weight' in name and len(param.shape) == 2:
+                param.is_qkv = True
+
+    # Apply optimizer-specific default param overrides (e.g. muon: non-linear -> adam).
+    config_overrides.update(_EMERGING_OPTIMIZERS[eopt_name].default_param_overrides)
+
+    # Build param groups and bucket by (optimizer_name, is_expert_parallel).
+    # Layer-wise distributed optimizer handles expert params internally so we skip that split.
+    all_param_groups = _get_param_groups(model_chunks, config, config_overrides)
+    grouped_param_groups = defaultdict(list)
+    for group in all_param_groups:
+        opt_name = group.get('optimizer', eopt_name)
+        is_expert = group['is_expert_parallel'] and not use_layer_wise
+        grouped_param_groups[(opt_name, is_expert)].append(group)
+
+    # Build an optimizer for each (optimizer_name, is_expert) bucket and combine.
+    results = []
+    for (opt_name, is_expert), groups in grouped_param_groups.items():
+        if not groups:
+            continue
+
+        model_parallel_group = pg_collection.tp_ep_pp if is_expert else pg_collection.mp
+
+        if opt_name in _EMERGING_OPTIMIZERS:
+            optimizer, init_state_fn = _create_emerging_optimizer(
+                config, groups, eopt_name, model_chunks, pg_collection
+            )
+            if use_layer_wise:
+                result = (optimizer, init_state_fn)
+            else:
+                if config.bf16:
+                    optimizer = Float16OptimizerWithFloat16Params(
+                        optimizer, config, None, init_state_fn
+                    )
+                else:
+                    optimizer = FP32Optimizer(optimizer, config, init_state_fn)
+                setattr(optimizer, 'grad_stats_parallel_group', model_parallel_group)
+                if pg_collection is None or not hasattr(pg_collection, 'tp'):
+                    tp_group = parallel_state.get_tensor_model_parallel_group()
+                else:
+                    tp_group = pg_collection.tp
+                setattr(optimizer, 'tp_group', tp_group)
+                result = optimizer
+        else:
+            fallback_config = copy.copy(config)
+            fallback_config.optimizer = opt_name
+            fallback_config.use_distributed_optimizer = False
+            result = _get_megatron_optimizer_based_on_param_groups(
+                config=fallback_config,
+                model_chunks=model_chunks,
+                param_groups=groups,
+                model_parallel_group=model_parallel_group,
+                pg_collection=pg_collection,
+                skip_megatron_wrapping=use_layer_wise,
+            )
+            # TODO(deyuf): ChainedOptimizer currently asserts all sub-optimizers
+            # share the same config. Revisit this design now that emerging
+            # optimizers mix different optimizer types (e.g. Muon + Adam).
+            # For now, reset to the top-level config so the assertion holds.
+            if not use_layer_wise and hasattr(result, 'config'):
+                result.config = config
+        results.append(result)
+
+    if use_layer_wise:
+        base_optimizers, init_fns = (), ()
+        if results:
+            base_optimizers, init_fns = zip(*results)
+        log_single_rank(
+            logger, logging.INFO, f'Using LayerWiseDistributedOptimizer for {eopt_name}'
+        )
+        return LayerWiseDistributedOptimizer(
+            list(base_optimizers), config, pg_collection, init_state_fn_list=list(init_fns)
+        )
+
+    return ChainedOptimizer(results)
+
+
 def get_megatron_optimizer(
     config: OptimizerConfig,
     model_chunks: List[MegatronModule],
@@ -512,7 +660,10 @@ def get_megatron_optimizer(
 ) -> MegatronOptimizer:
     """Retrieve the Megatron optimizer for model chunks.
 
+    Handles both standard optimizers (Adam, SGD) and emerging optimizers (e.g. Muon).
     We use separate optimizers for expert parameters and non-expert parameters.
+    For emerging optimizers with ``config.use_layer_wise_distributed_optimizer=True``,
+    the optimizer is automatically wrapped with :class:`LayerWiseDistributedOptimizer`.
 
     Args:
         config (OptimizerConfig): optimizer configuration object.
@@ -529,10 +680,25 @@ def get_megatron_optimizer(
         Instance of MegatronOptimizer.
     """
 
-    log_single_rank(logger, logging.INFO, f'Setting up optimizer with config {config}')
+    # None → apply standard defaults. To extend defaults with custom overrides,
+    # start from get_standard_config_overrides(config) and merge yours in.
+    if config_overrides is None:
+        config_overrides = get_standard_config_overrides(config)
 
     check_config_overrides_consistency(config, config_overrides)
 
+    # TODO: the standard and emerging optimizer paths handle pg_collection differently;
+    # unify them so both use a single pg_collection-based flow.
+    if config.optimizer not in ('adam', 'sgd'):
+        return _get_megatron_emerging_optimizer(
+            config=config,
+            model_chunks=model_chunks,
+            config_overrides=config_overrides,
+            pg_collection=pg_collection,
+        )
+
+    log_single_rank(logger, logging.INFO, f'Setting up optimizer with config {config}')
+
     # Separate out first model chunk if overlapping param AG with optimizer step.
     if config.overlap_param_gather_with_optimizer_step:
         all_dense_model_chunks = [[model_chunks[0]], model_chunks[1:]]
diff --git a/megatron/core/optimizer/emerging_optimizers.py b/megatron/core/optimizer/emerging_optimizers.py
new file mode 100644
index 00000000000..3cf36670fd3
--- /dev/null
+++ b/megatron/core/optimizer/emerging_optimizers.py
@@ -0,0 +1,260 @@
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+"""Emerging optimizer registry.
+
+To add a new emerging optimizer:
+  1. Define its optimizer class (or import it).
+  2. Write its ``_<name>_init_state_fn`` and ``_<name>_config_to_kwargs``.
+  3. Add an ``EmergingOptimizerEntry`` to ``_EMERGING_OPTIMIZERS`` at the bottom.
+"""
+
+import logging
+from dataclasses import dataclass, field
+from typing import Any, Callable, Dict, List, Literal, Optional
+
+import torch
+from torch.optim.optimizer import ParamsT
+
+from megatron.core.process_groups_config import ProcessGroupCollection
+from megatron.core.utils import get_pg_size, log_single_rank
+
+from .optimizer_config import ParamKey, ParamPredicate
+
+try:
+    from emerging_optimizers.orthogonalized_optimizers import (
+        OrthogonalizedOptimizer,
+        get_muon_scale_factor,
+    )
+    from emerging_optimizers.orthogonalized_optimizers.muon_utils import newton_schulz_tp
+
+    HAVE_EMERGING_OPTIMIZERS = True
+except ImportError:
+    HAVE_EMERGING_OPTIMIZERS = False
+
+
+logger = logging.getLogger(__name__)
+
+
+# ===========================================================================
+# Registry dataclass and public API
+# ===========================================================================
+
+
+@dataclass
+class EmergingOptimizerEntry:
+    """Everything needed to create and configure an emerging optimizer.
+
+    Attributes:
+        optimizer_cls: The torch optimizer class.
+        init_state_fn: Lazily initialises optimizer state (needed for checkpoint formats).
+        config_to_kwargs: ``(config, model_chunks, pg_collection) -> dict`` of constructor kwargs.
+        default_param_overrides: Per-parameter config overrides applied automatically
+            (e.g. route non-linear params to Adam).
+    """
+
+    optimizer_cls: type
+    init_state_fn: Callable
+    config_to_kwargs: Callable
+    default_param_overrides: Dict[ParamKey, Dict[str, Any]] = field(default_factory=dict)
+
+
+def _create_emerging_optimizer(config, param_groups, eopt_name, model_chunks, pg_collection):
+    """Instantiate an emerging optimizer and return it with its init_state_fn."""
+    entry = _EMERGING_OPTIMIZERS[eopt_name]
+    eopt_kwargs = entry.config_to_kwargs(config, model_chunks, pg_collection)
+    optimizer = entry.optimizer_cls(param_groups, **eopt_kwargs)
+    return optimizer, entry.init_state_fn
+
+
+# ===========================================================================
+# Shared helpers
+# ===========================================================================
+
+
+def _is_nonlinear_or_embedding(param):
+    """True for parameters that should NOT use the emerging optimizer."""
+    return getattr(param, 'is_embedding_or_output_parameter', False) or len(param.shape) != 2
+
+
+def _get_qkv_split_shapes(model_cfg) -> List[int]:
+    """Compute QKV split shapes from model config."""
+    return [
+        model_cfg.num_attention_heads // model_cfg.num_query_groups * model_cfg.kv_channels,
+        model_cfg.kv_channels,
+        model_cfg.kv_channels,
+    ]
+
+
+# ===========================================================================
+# Registry – populated below only when emerging_optimizers is installed.
+# ===========================================================================
+
+_EMERGING_OPTIMIZERS: Dict[str, EmergingOptimizerEntry] = {}
+
+
+# ===========================================================================
+# Muon
+# ===========================================================================
+
+if HAVE_EMERGING_OPTIMIZERS:
+
+    class TensorParallelMuon(OrthogonalizedOptimizer):
+        """Tensor Parallel Muon optimizer."""
+
+        def __init__(
+            self,
+            params: ParamsT,
+            lr: float = 3e-4,
+            momentum_beta: float = 0.95,
+            use_nesterov: bool = True,
+            weight_decay: float = 0.01,
+            use_decoupled_weight_decay: bool = True,
+            split_qkv: bool = False,
+            is_qkv_fn: Callable[[torch.Tensor], bool] | None = None,
+            qkv_split_shapes: tuple[int, int, int] | None = None,
+            fp32_matmul_prec: str = "medium",
+            coefficient_type: str = "quintic",
+            num_ns_steps: int = 5,
+            scale_mode: str = "spectral",
+            extra_scale_factor: float = 1.0,
+            pg_collection: Optional[ProcessGroupCollection] = None,
+            mode: Literal["blockwise", "duplicated", "distributed"] = "duplicated",
+        ) -> None:
+            if num_ns_steps < 1:
+                raise ValueError(f"num_ns_steps must be at least 1, got {num_ns_steps}")
+
+            def scaled_orthogonalize_fn(
+                grad: torch.Tensor,
+                tp_group: torch.distributed.ProcessGroup,
+                partition_dim: int | None = None,
+            ) -> torch.Tensor:
+                log_single_rank(
+                    logger,
+                    logging.DEBUG,
+                    f'Orthogonalizing grad with {num_ns_steps} steps, '
+                    f'{coefficient_type} coefficient, '
+                    f'{scale_mode} scale mode, extra_scale_factor={extra_scale_factor}',
+                )
+                size = [grad.size(-2), grad.size(-1)]
+                if partition_dim is not None:
+                    size[partition_dim] *= get_pg_size(tp_group)
+                orth_grad = newton_schulz_tp(
+                    grad,
+                    steps=num_ns_steps,
+                    coefficient_type=coefficient_type,
+                    tp_group=tp_group,
+                    partition_dim=partition_dim,
+                    mode="duplicated" if mode == "blockwise" else mode,
+                )
+                scale_factor = get_muon_scale_factor(size[0], size[1], mode=scale_mode)
+                return orth_grad * scale_factor * extra_scale_factor
+
+            self.pg_collection = pg_collection
+            self.mode = mode
+            self.split_qkv = split_qkv
+            self.is_qkv_fn = is_qkv_fn
+            self.qkv_split_shapes = qkv_split_shapes
+
+            weight_decay_method = "decoupled" if use_decoupled_weight_decay else "l2"
+            super().__init__(
+                params,
+                lr,
+                momentum_beta,
+                use_nesterov=use_nesterov,
+                weight_decay=weight_decay,
+                weight_decay_method=weight_decay_method,
+                fp32_matmul_prec=fp32_matmul_prec,
+                scaled_orthogonalize_fn=scaled_orthogonalize_fn,
+            )
+
+        def orthogonalize(self, p: torch.Tensor, grad: torch.Tensor, **kwargs: Any) -> torch.Tensor:
+            """Orthogonalize the momentum.
+
+            Args:
+                p: The parameter tensor. i is necessary to pass param tensor in addition to
+                    momentum because a lot of information is only available in the param tensor,
+                    attributes for example.
+                grad: The momentum tensor.
+
+            Returns:
+                The orthogonalized gradient tensor.
+            """
+            # TODO(deyuf): switch to group
+            if self.pg_collection:
+                tp_group = (
+                    self.pg_collection.expt_tp
+                    if getattr(p, 'expert_tp', False)
+                    else self.pg_collection.tp
+                )
+            else:
+                tp_group = None
+            partition_dim = None if self.mode == "blockwise" else getattr(p, "partition_dim", None)
+            if partition_dim == -1:
+                partition_dim = None
+
+            if self.split_qkv and self.is_qkv_fn(p):  # type: ignore[misc]
+                grad_shape = grad.shape
+                log_single_rank(
+                    logger,
+                    logging.DEBUG,
+                    f'qkv split grad shape {grad_shape}, ' f'split shapes {self.qkv_split_shapes}',
+                )
+                num_query_groups = grad_shape[0] // sum(self.qkv_split_shapes)
+                qkv_grads = torch.split(
+                    grad.view(num_query_groups, sum(self.qkv_split_shapes), -1),
+                    self.qkv_split_shapes,
+                    dim=1,
+                )
+                qkv_grads = [g.reshape(-1, grad_shape[-1]) for g in qkv_grads]
+
+                qkv_grads = [
+                    self.scaled_orthogonalize_fn(g, tp_group, partition_dim).view(
+                        num_query_groups, -1, grad_shape[-1]
+                    )
+                    for g in qkv_grads
+                ]
+                grad = torch.cat(qkv_grads, dim=1).view(grad_shape)
+            else:
+                grad = self.scaled_orthogonalize_fn(grad, tp_group, partition_dim)
+            return grad
+
+    def _muon_init_state_fn(opt, config=None):
+        """Initialize Muon optimizer state for torch_dist checkpoint format."""
+        for group in opt.param_groups:
+            for p in group['params']:
+                if len(opt.state[p]) == 0:
+                    opt.state[p]['momentum_buffer'] = torch.zeros_like(p.data)
+
+    def _muon_config_to_kwargs(config, model_chunks, pg_collection) -> Dict[str, Any]:
+        """Convert OptimizerConfig to TensorParallelMuon constructor kwargs."""
+        return {
+            "lr": config.lr,
+            "weight_decay": config.weight_decay,
+            "momentum_beta": config.muon_momentum,
+            "use_nesterov": config.muon_use_nesterov,
+            "fp32_matmul_prec": config.muon_fp32_matmul_prec,
+            "num_ns_steps": config.muon_num_ns_steps,
+            "scale_mode": config.muon_scale_mode,
+            "extra_scale_factor": config.muon_extra_scale_factor,
+            "mode": config.muon_tp_mode,
+            "split_qkv": config.muon_split_qkv,
+            "is_qkv_fn": lambda p: getattr(p, "is_qkv", False),
+            "qkv_split_shapes": _get_qkv_split_shapes(model_chunks[0].config),
+            "pg_collection": pg_collection,
+        }
+
+    # -----------------------------------------------------------------------
+    # Register Muon
+    # -----------------------------------------------------------------------
+    _EMERGING_OPTIMIZERS['muon'] = EmergingOptimizerEntry(
+        optimizer_cls=TensorParallelMuon,
+        init_state_fn=_muon_init_state_fn,
+        config_to_kwargs=_muon_config_to_kwargs,
+        default_param_overrides={
+            ParamKey(
+                predicate=ParamPredicate(
+                    name="nonlinear_or_embedding", fn=_is_nonlinear_or_embedding
+                )
+            ): {'optimizer': 'adam'}
+        },
+    )
diff --git a/megatron/core/optimizer/layer_wise_optimizer.py b/megatron/core/optimizer/layer_wise_optimizer.py
index de4396a5b4f..d5dcef209a9 100644
--- a/megatron/core/optimizer/layer_wise_optimizer.py
+++ b/megatron/core/optimizer/layer_wise_optimizer.py
@@ -63,19 +63,17 @@ def __init__(
                 optimizers
             ), "init_state_fn_list must be the same length as optimizers if provided"
 
-        # wrap optimizer after sharding to avoid unnecessary master weight creation
-        # for higher precision, optimizers are wrapped with megatron already
+        # Wrap base torch optimizers with Float16 for bf16 training.
+        # Callers pass base optimizers; wrapping happens here *after*
+        # shard_params so master weights are only created for the local shard.
         if config.bf16:
-            # unwrap FP32 optimizer, possibly from reusing get_megatron_optimizer for adam
             for i in range(len(optimizers)):
                 opt = optimizers[i]
-                if isinstance(opt, Float16OptimizerWithFloat16Params):
+                if isinstance(opt, (Float16OptimizerWithFloat16Params, FP32Optimizer)):
                     raise TypeError(
-                        'LayerWiseDistributedOptimizer received Float16 optimizer already.'
+                        'LayerWiseDistributedOptimizer expects base torch optimizers, '
+                        f'got {type(opt).__name__}. Do not pre-wrap with Megatron optimizers.'
                     )
-                # unwrap FP32 optimizer from reusing get_megatron_optimizer for adam
-                if isinstance(opt, FP32Optimizer):
-                    opt = opt.optimizer
                 optimizers[i] = Float16OptimizerWithFloat16Params(
                     opt, config, None, init_state_fn_list[i] if init_state_fn_list else None
                 )
diff --git a/megatron/core/optimizer/muon.py b/megatron/core/optimizer/muon.py
index 57eb1e94478..a3f7506f941 100644
--- a/megatron/core/optimizer/muon.py
+++ b/megatron/core/optimizer/muon.py
@@ -1,350 +1,16 @@
 # Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
-"""Megatron muon optimizer wrapper to handle tensor-parallel."""
+"""Backward-compatible shim — all code now lives in ``emerging_optimizers``."""
 
-import logging
-from typing import Any, Callable, Dict, List, Literal, Optional
+from typing import Any
 
-import torch
-from torch.optim.optimizer import ParamsT
 
-from megatron.core.optimizer_param_scheduler import ParamGroupOverride
-from megatron.core.process_groups_config import ProcessGroupCollection
-from megatron.core.transformer.module import MegatronModule
-from megatron.core.utils import get_pg_size, log_single_rank
+def get_megatron_muon_optimizer(*args: Any, **kwargs: Any) -> Any:
+    """Backward compatible muon optimizer getter.
 
-from . import _get_param_groups, get_megatron_optimizer
-from .layer_wise_optimizer import LayerWiseDistributedOptimizer
-from .optimizer import (
-    ChainedOptimizer,
-    Float16OptimizerWithFloat16Params,
-    FP32Optimizer,
-    MegatronOptimizer,
-)
-from .optimizer_config import OptimizerConfig, ParamKey
-
-try:
-    from emerging_optimizers.orthogonalized_optimizers import (
-        OrthogonalizedOptimizer,
-        get_muon_scale_factor,
-    )
-    from emerging_optimizers.orthogonalized_optimizers.muon_utils import newton_schulz_tp
-
-    HAVE_EMERGING_OPTIMIZERS = True
-except ImportError:
-    HAVE_EMERGING_OPTIMIZERS = False
-    OrthogonalizedOptimizer = object
-
-
-logger = logging.getLogger(__name__)
-
-
-class TensorParallelMuon(OrthogonalizedOptimizer):
-    """Tensor Parallel Muon optimizer."""
-
-    def __init__(
-        self,
-        params: ParamsT,
-        lr: float = 3e-4,
-        momentum_beta: float = 0.95,
-        use_nesterov: bool = True,
-        weight_decay: float = 0.01,
-        use_decoupled_weight_decay: bool = True,
-        split_qkv: bool = False,
-        is_qkv_fn: Callable[[torch.Tensor], bool] | None = None,
-        qkv_split_shapes: tuple[int, int, int] | None = None,
-        fp32_matmul_prec: str = "medium",
-        coefficient_type: str = "quintic",
-        num_ns_steps: int = 5,
-        scale_mode: str = "spectral",
-        extra_scale_factor: float = 1.0,
-        pg_collection: Optional[ProcessGroupCollection] = None,
-        mode: Literal["blockwise", "duplicated", "distributed"] = "duplicated",
-    ) -> None:
-        if num_ns_steps < 1:
-            raise ValueError(f"num_ns_steps must be at least 1, got {num_ns_steps}")
-
-        def scaled_orthogonalize_fn(
-            grad: torch.Tensor,
-            tp_group: torch.distributed.ProcessGroup,
-            partition_dim: int | None = None,
-        ) -> torch.Tensor:
-            log_single_rank(
-                logger,
-                logging.DEBUG,
-                f'Orthogonalizing grad with {num_ns_steps} steps, {coefficient_type} coefficient, '
-                f'{scale_mode} scale mode, extra_scale_factor={extra_scale_factor}',
-            )
-            size = [grad.size(-2), grad.size(-1)]
-            if partition_dim is not None:
-                size[partition_dim] *= get_pg_size(tp_group)
-            orth_grad = newton_schulz_tp(
-                grad,
-                steps=num_ns_steps,
-                coefficient_type=coefficient_type,
-                tp_group=tp_group,
-                partition_dim=partition_dim,
-                mode="duplicated" if mode == "blockwise" else mode,
-            )
-            scale_factor = get_muon_scale_factor(size[0], size[1], mode=scale_mode)
-            return orth_grad * scale_factor * extra_scale_factor
-
-        self.pg_collection = pg_collection
-        self.mode = mode
-        self.split_qkv = split_qkv
-        self.is_qkv_fn = is_qkv_fn
-        self.qkv_split_shapes = qkv_split_shapes
-
-        weight_decay_method = "decoupled" if use_decoupled_weight_decay else "l2"
-        super().__init__(
-            params,
-            lr,
-            momentum_beta,
-            use_nesterov=use_nesterov,
-            weight_decay=weight_decay,
-            weight_decay_method=weight_decay_method,
-            fp32_matmul_prec=fp32_matmul_prec,
-            scaled_orthogonalize_fn=scaled_orthogonalize_fn,
-        )
-
-    def orthogonalize(self, p: torch.Tensor, grad: torch.Tensor, **kwargs: Any) -> torch.Tensor:
-        """Orthogonalize the momentum.
-
-        Args:
-            p: The parameter tensor. i is necessary to pass param tensor in addition to momentum
-                because a lot of information is only available in the param tensor,
-                attributes for example.
-            grad: The momentum tensor.
-
-        Returns:
-            The orthogonalized gradient tensor.
-        """
-        # TODO(deyuf): switch to group
-        if self.pg_collection:
-            tp_group = (
-                self.pg_collection.expt_tp
-                if getattr(p, 'expert_tp', False)
-                else self.pg_collection.tp
-            )
-        else:
-            tp_group = None
-        partition_dim = None if self.mode == "blockwise" else getattr(p, "partition_dim", None)
-        if partition_dim == -1:
-            # emerging-optimizers use None instead of -1 to indicate no tensor parallel
-            partition_dim = None
-
-        if self.split_qkv and self.is_qkv_fn(p):  # type: ignore[misc]
-            # split grouped attention parameters (e.g., QKV, GQA, etc.)
-            grad_shape = grad.shape
-            log_single_rank(
-                logger,
-                logging.DEBUG,
-                f'qkv split grad shape {grad_shape}, split shapes {self.qkv_split_shapes}',
-            )
-            num_query_groups = grad_shape[0] // sum(self.qkv_split_shapes)
-            qkv_grads = torch.split(
-                grad.view(num_query_groups, sum(self.qkv_split_shapes), -1),
-                self.qkv_split_shapes,
-                dim=1,
-            )
-            qkv_grads = [g.reshape(-1, grad_shape[-1]) for g in qkv_grads]
-
-            # Apply Newton-Schulz and scales to each component, concat back
-            qkv_grads = [
-                self.scaled_orthogonalize_fn(g, tp_group, partition_dim).view(
-                    num_query_groups, -1, grad_shape[-1]
-                )
-                for g in qkv_grads
-            ]
-            grad = torch.cat(qkv_grads, dim=1).view(grad_shape)
-        else:
-            grad = self.scaled_orthogonalize_fn(grad, tp_group, partition_dim)
-        return grad
-
-
-def get_megatron_muon_optimizer(
-    config: OptimizerConfig,
-    model_chunks: List[MegatronModule],
-    config_overrides: Optional[Dict[ParamKey, ParamGroupOverride]] = None,
-    use_gloo_process_groups: bool = True,
-    layer_wise_distributed_optimizer: bool = False,
-    pg_collection: Optional[ProcessGroupCollection] = None,
-) -> MegatronOptimizer:
-    """This function is used to get the muon optimizer for the model chunks.
-    It is used to get the muon optimizer for the model chunks.
-
-    Args:
-        config (OptimizerConfig): optimizer configuration object.
-        model_chunks (List[MegatronModule]): model chunks to get optimizer for.
-        use_gloo_process_groups (bool): if false, disable use of Gloo process groups
-            in underlying Megatron optimizers.
-        layer_wise_distributed_optimizer (bool): if true, use layer-wise distributed optimizer.
-            Defaults to False.
+    .. deprecated::
+        Use :func:`megatron.core.optimizer.get_megatron_optimizer` instead.
     """
-    # Muon currently use adam config. setting str here to call regular get for adam creation
-    # side effect is muon optimizer will have wrong name, i.e. config.optimizer == 'adam'
-    config.optimizer = 'adam'
-
-    assert HAVE_EMERGING_OPTIMIZERS, "Emerging Optimizers is not installed."
-
-    # Dist-opt is not supported due to strong coupling with how DDP init grad buffer
-    # In theory we can change DDP to enable use muon and dist-opt-adam together
-    if config.use_distributed_optimizer:
-        raise Exception('muon with dist optimizer is not supported.')
-    # only support bf16 w/o loss scale now
-    if config.fp16:
-        raise Exception('muon with fp16 is not supported.')
-
-    # before this function receive properly created collection
-    if pg_collection is None:
-        pg_collection = ProcessGroupCollection.use_mpu_process_groups()
-
-    log_single_rank(logger, logging.INFO, f'Setting up emerging optimizer with config {config}')
-
-    # Needed for torch_dist ckpt_format, unlike torch ckpt_format
-    # For other emerging optimizers, need to implement init_state_fn as well
-    # TODO(boxiangw): Improve usability after optimizer refactor
-    # TODO(boxiangw): support precision aware optimizer
-    def muon_init_state_fn(opt, config=None):
-        for group in opt.param_groups:
-            for p in group['params']:
-                if len(opt.state[p]) == 0:
-                    opt.state[p]['momentum_buffer'] = torch.zeros_like(p.data)
-
-    def adam_init_state_fn(opt, config=None):
-        for group in opt.param_groups:
-            for p in group['params']:
-                if len(opt.state[p]) == 0:
-                    if config is None or not config.use_precision_aware_optimizer:
-                        opt.state[p]['exp_avg'] = torch.zeros_like(p.data)
-                        opt.state[p]['exp_avg_sq'] = torch.zeros_like(p.data)
-                    else:
-                        opt.initialize_state(p)
-
-    optimizers = []
-    # record list of non/linear params
-    linear_params = []
-    nonlinear_params = []
-    for model_chunk in model_chunks:
-        # use config to determine qkv split shapes.
-        # no need to check tp since tp splits by head and this is per head(group) dimension
-        num_attention_heads = model_chunk.config.num_attention_heads
-        num_query_groups = model_chunk.config.num_query_groups
-        kv_channels = model_chunk.config.kv_channels
-        qkv_split_shapes = [
-            num_attention_heads // num_query_groups * kv_channels,
-            kv_channels,
-            kv_channels,
-        ]
-        for name, param in model_chunk.named_parameters():
-            if not param.requires_grad:
-                continue
-            # add flag for expert weight so optimizer can figure which tp group it uses
-            # alternatively, create new param group and save tp_group. this require more
-            # change in optimizer
-            if 'experts' in name and 'shared' not in name:
-                param.expert_tp = True
-            # add flag for qkv parameter
-            # TODO(deyuf): support MLA
-            if 'linear_qkv.weight' in name and len(param.shape) == 2:
-                param.is_qkv = True
-            # TODO(deyuf): currently only allow 2D non-embedding weight to avoid breaking
-            if (
-                not getattr(param, 'is_embedding_or_output_parameter', False)
-                and len(param.shape) == 2
-            ):
-                linear_params.append(param)
-            else:
-                nonlinear_params.append(param)
-
-    muon_kwargs = {
-        "lr": config.lr,
-        "momentum_beta": config.muon_momentum,
-        "use_nesterov": config.muon_use_nesterov,
-        "weight_decay": config.weight_decay,
-        "fp32_matmul_prec": config.muon_fp32_matmul_prec,
-        "num_ns_steps": config.muon_num_ns_steps,
-        "scale_mode": config.muon_scale_mode,
-        "split_qkv": config.muon_split_qkv,
-        "is_qkv_fn": lambda p: getattr(p, "is_qkv", False),
-        "qkv_split_shapes": qkv_split_shapes,
-        "extra_scale_factor": config.muon_extra_scale_factor,
-        "pg_collection": pg_collection,
-        "mode": config.muon_tp_mode,
-    }
-
-    # freezing nonlinear params and get param groups for muon
-    for param in nonlinear_params:
-        param.requires_grad = False
-
-    linear_param_groups = _get_param_groups(model_chunks, config, config_overrides)
-    # if layerwise distributed optimizer is not used, need to handle ep params separately
-    expert_param_groups = []
-    if not layer_wise_distributed_optimizer:
-        for group in linear_param_groups:
-            if group['is_expert_parallel']:
-                expert_param_groups.append(group)
-                linear_param_groups.remove(group)
-
-    optimizer = TensorParallelMuon(linear_param_groups, **muon_kwargs)
-
-    reset_config_bf16 = False
-    if config.bf16:
-        if layer_wise_distributed_optimizer:
-            # creating master weight before layerwise sharding will lead to unnecessary master
-            # weight so here we delay master weight creation into layer_wise unset config.bf16
-            # will also result in all optimizers below(adam) to also not be wrapped
-            config.bf16 = False
-            reset_config_bf16 = True
-        else:
-            # if not using layer_wise wrapper, just create master weight here is fine
-            optimizer = Float16OptimizerWithFloat16Params(
-                optimizer, config, None, muon_init_state_fn
-            )
-    else:
-        optimizer = FP32Optimizer(optimizer, config, muon_init_state_fn)
-
-    optimizers.append(optimizer)
-
-    # expert optimizer exists meaning layerwise distributed optimizer is not used
-    if len(expert_param_groups) > 0:
-        expert_optimizer = TensorParallelMuon(expert_param_groups, **muon_kwargs)
-        if config.bf16:
-            expert_optimizer = Float16OptimizerWithFloat16Params(
-                expert_optimizer, config, None, muon_init_state_fn
-            )
-        else:
-            expert_optimizer = FP32Optimizer(expert_optimizer, config, muon_init_state_fn)
-        setattr(expert_optimizer, 'grad_stats_parallel_group', pg_collection.tp_ep_pp)
-        optimizers.append(expert_optimizer)
-
-    # done with muon, unfreeze nonlinear and freeze linear
-    for param in nonlinear_params:
-        param.requires_grad = True
-    for param in linear_params:
-        param.requires_grad = False
-
-    # call original get. linear params will be skipped since they're freezed
-    chained_adam = get_megatron_optimizer(
-        config,
-        model_chunks,
-        config_overrides=config_overrides,
-        use_gloo_process_groups=use_gloo_process_groups,
-    )
-
-    # unfreeze everything
-    for param in linear_params:
-        param.requires_grad = True
-
-    # chain everything together
-    init_fns = [muon_init_state_fn] + len(chained_adam.chained_optimizers) * [adam_init_state_fn]
-    optimizers += chained_adam.chained_optimizers
+    from . import get_megatron_optimizer
 
-    if layer_wise_distributed_optimizer:
-        log_single_rank(logger, logging.INFO, 'Using LayerWiseDistributedOptimizer for Muon')
-        if reset_config_bf16:
-            config.bf16 = True
-        return LayerWiseDistributedOptimizer(
-            optimizers, config, pg_collection, init_state_fn_list=init_fns
-        )
-    return ChainedOptimizer(optimizers)
+    return get_megatron_optimizer(*args, **kwargs)
diff --git a/megatron/core/optimizer/optimizer_config.py b/megatron/core/optimizer/optimizer_config.py
index 94163102eb3..4b43e7b5c08 100644
--- a/megatron/core/optimizer/optimizer_config.py
+++ b/megatron/core/optimizer/optimizer_config.py
@@ -206,7 +206,8 @@ class OptimizerConfig:
     """dtype of exp_avg_sq when enabling precision-aware-optimizer"""
 
     optimizer: str = 'adam'
-    """Optimizer name. NOTE: Deprecated, use individual optimizer classes instead."""
+    """Optimizer name (e.g., 'adam', 'sgd', 'muon'). Can be overridden per-parameter group
+    via config_overrides to use different optimizers for different parameters."""
 
     ###############
     # Loss scaling
@@ -229,7 +230,7 @@ class OptimizerConfig:
     """Hysteresis for dynamic loss scaling."""
 
     ###################################################################################
-    # Optimizer (NOTE: Deprecated, use individual optimizer classes instead.).
+    # Optimizer-specific parameters.
     ###################################################################################
     # Adam.
     adam_beta1: float = 0.9
@@ -254,10 +255,9 @@ class OptimizerConfig:
     sgd_momentum: float = 0.9
     """Momentum factor for SGD optimizer."""
 
-    # Muon.
-    # TODO: move muon configs to it's own `MuonConfig`.
+    # Muon / emerging optimizers.
     muon_momentum: float = 0.95
-    """The momentum used by the internal SGD."""
+    """The momentum used by the internal SGD in Muon optimizer."""
 
     muon_split_qkv: bool = True
     """Whether to split QKV parameters for Muon optimizer."""
@@ -286,6 +286,12 @@ class OptimizerConfig:
     use_distributed_optimizer: bool = False
     """Distribute optimizer state over data-parallel replicas."""
 
+    use_layer_wise_distributed_optimizer: bool = False
+    """Use :class:`LayerWiseDistributedOptimizer` for emerging optimizers (e.g. Muon).
+    When set via ``--use-distributed-optimizer`` with an emerging optimizer, the training
+    arguments layer sets this flag and resets ``use_distributed_optimizer`` to False so
+    that the standard distributed-optimizer path is not triggered."""
+
     overlap_param_gather: bool = False
     """If true, overlap param all-gather with forward compute. 
         This argument is intended to have the same value as the "overlap_param_gather" argument 
@@ -431,33 +437,6 @@ def __post_init__(self):
             ), "exp_avg_sq_dtype can only be fp32 when not using precision-aware optimizer"
 
 
-@dataclass
-class AdamOptimizerConfig(OptimizerConfig):
-    """Adam optimizer configuration object."""
-
-    optimizer: str = 'adam'
-    """Optimizer name."""
-
-    adam_beta1: float = 0.9
-    """First coefficient for computing running averages of gradient and its square in Adam
-    optimizer.
-    """
-
-    adam_beta2: float = 0.999
-    """Second coefficient for computing running averages of gradient and its square in Adam
-    optimizer.
-    """
-
-    adam_eps: float = 1e-08
-    """Term added to the denominator to improve numerical stability in Adam optimizer."""
-
-
-@dataclass
-class SGDOptimizerConfig(OptimizerConfig):
-    """SGD optimizer configuration object."""
-
-    optimizer: str = 'sgd'
-    """Optimizer name."""
-
-    sgd_momentum: float = 0.9
-    """Momentum factor for SGD optimizer."""
+# Backward-compatible aliases (deprecated; use OptimizerConfig directly).
+AdamOptimizerConfig = OptimizerConfig
+SGDOptimizerConfig = OptimizerConfig
diff --git a/megatron/core/optimizer_param_scheduler.py b/megatron/core/optimizer_param_scheduler.py
index e01a708ce79..91ed362b1b2 100644
--- a/megatron/core/optimizer_param_scheduler.py
+++ b/megatron/core/optimizer_param_scheduler.py
@@ -14,7 +14,7 @@
 logger = logging.getLogger(__name__)
 
 
-class ParamGroupOverride(TypedDict):
+class ParamGroupOverride(TypedDict, total=False):
     """Override values for a parameter group. These values may be optimizer-state/scheduler related.
 
     These are the values you see later in param_group.get(...) calls in the
@@ -23,7 +23,7 @@ class ParamGroupOverride(TypedDict):
 
     Example:
         >>> param_group_override = ParamGroupOverride(min_lr=1e-4, wd_mult=0.1)
-        >>> param_group_override == ParamGroupOverride(newvar=3) # this is ok too
+        >>> param_group_override == ParamGroupOverride(optimizer='muon')  # per-param optimizer
 
     """
 
@@ -32,6 +32,7 @@ class ParamGroupOverride(TypedDict):
     start_wd: float
     end_wd: float
     wd_mult: float
+    optimizer: str
 
 
 def get_canonical_lr_for_logging(param_groups: list[dict]) -> float | None:
diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py
index 25f0d0d06d0..dece9b480f5 100644
--- a/megatron/training/arguments.py
+++ b/megatron/training/arguments.py
@@ -1323,12 +1323,23 @@ def validate_args(args, defaults={}):
         args.no_load_optim = True
         warn_rank_0('enabling --no-load-optim when skipping training.')
 
-    # Muon optimizer check
-    if 'muon' in args.optimizer:
+    # Muon / emerging optimizer check
+    if args.optimizer in ('muon', 'dist_muon'):
+        if args.optimizer == 'dist_muon':
+            warn_rank_0(
+                "optimizer='dist_muon' is deprecated. "
+                "Use --optimizer muon --use-distributed-optimizer instead."
+            )
+            args.optimizer = 'muon'
+            args.use_layer_wise_distributed_optimizer = True
+
+        if args.use_distributed_optimizer:
+            args.use_layer_wise_distributed_optimizer = True
+            args.use_distributed_optimizer = False
+
         # TODO: remove these checks once we support them
         assert not args.overlap_grad_reduce, "Muon optimizer does not support overlap grad reduce for now."
         assert not args.overlap_param_gather, "Muon optimizer does not support overlap param gather for now."
-        assert not args.use_distributed_optimizer, "Muon optimizer does not support distributed optimizer for now."
         assert not args.use_torch_fsdp2, "Muon optimizer does not support Torch-FSDP2 for now."
         assert not args.use_megatron_fsdp, "Muon optimizer does not support Megatron-FSDP for now."
         assert args.ckpt_format in ["torch", "torch_dist"], "Muon optimizer supports torch and torch_dist checkpoint format."
@@ -2246,7 +2257,9 @@ def _add_training_args(parser):
                        'https://arxiv.org/abs/2205.14135')
     group.add_argument('--optimizer', type=str, default='adam',
                        choices=['adam', 'sgd', 'muon', 'dist_muon'],
-                       help='Optimizer function')
+                       help='Optimizer function. '
+                            'Note: dist_muon is deprecated; use --optimizer muon '
+                            'with --use-distributed-optimizer instead.')
     group.add_argument('--optimizer-cpu-offload', action='store_true',
                        help='Offload optimizer state to CPU')
     group.add_argument('--optimizer-offload-fraction', type=float, default=1.0,
diff --git a/megatron/training/checkpointing.py b/megatron/training/checkpointing.py
index a64d0cd318c..d9204f9007d 100644
--- a/megatron/training/checkpointing.py
+++ b/megatron/training/checkpointing.py
@@ -563,7 +563,7 @@ def save_checkpoint(iteration, model, optimizer, opt_param_scheduler, num_floati
             optimizer.save_parameter_state(optim_checkpoint_name)
     
     # LayerWiseDistributedOptimizer save optimizer state to file on different ranks
-    if getattr(args, "optimizer", "adam").startswith("dist_") and args.ckpt_format == 'torch':
+    if getattr(args, "use_layer_wise_distributed_optimizer", False) and args.ckpt_format == 'torch':
         dp_rank = mpu.get_data_parallel_rank()
         optim_checkpoint_name = os.path.join(os.path.dirname(checkpoint_name), f"layer_wise_optimizer_{dp_rank}.pt")
         ensure_directory_exists(optim_checkpoint_name)
@@ -1809,7 +1809,7 @@ def load_model_state_dict(module, state_dict, strict: bool):
     if not release and not args.finetune and not args.no_load_optim:
         try:
             # Load state dict.
-            if getattr(args, "optimizer", "adam").startswith("dist_") and args.ckpt_format == 'torch':
+            if getattr(args, "use_layer_wise_distributed_optimizer", False) and args.ckpt_format == 'torch':
                 # LayerWiseDistributedOptimizer load optimizer state from file on different ranks
                 dp_rank = mpu.get_data_parallel_rank()
                 optim_checkpoint_name = os.path.join(os.path.dirname(checkpoint_name), f"layer_wise_optimizer_{dp_rank}.pt")
diff --git a/megatron/training/training.py b/megatron/training/training.py
index 26769fabe96..2ee06acf795 100644
--- a/megatron/training/training.py
+++ b/megatron/training/training.py
@@ -126,8 +126,11 @@ def set_startup_timestamps(program_start=None, main_entry=None):
 
 from megatron.core.distributed import finalize_model_grads
 from megatron.core.enums import ModelType
-from megatron.core.optimizer import get_megatron_optimizer, AdamOptimizerConfig, SGDOptimizerConfig, OptimizerConfig, ParamKey
-from megatron.core.optimizer.muon import get_megatron_muon_optimizer
+from megatron.core.optimizer import (
+    get_megatron_optimizer,
+    OptimizerConfig,
+    ParamKey,
+)
 from megatron.core.rerun_state_machine import (
     get_rerun_state_machine,
     destroy_rerun_state_machine,
@@ -1482,23 +1485,11 @@ def get_optimizer_param_scheduler(optimizer):
 def get_megatron_optimizer_config(args: Any) -> OptimizerConfig:
     """Return a Megatron optimizer config object from Megatron's arguments."""
 
-    config = None
-    if args.optimizer == 'adam' or 'muon' in args.optimizer:
-        # TODO(deyuf): Muon needs both adam + muon but get() only receive one config
-        # So for now we keep using adam config that's back compat with old way
-        kwargs = {}
-        for f in dataclasses.fields(AdamOptimizerConfig):
-            if hasattr(args, f.name):
-                kwargs[f.name] = getattr(args, f.name)
-        config = AdamOptimizerConfig(**kwargs)
-    elif args.optimizer == 'sgd':
-        kwargs = {}
-        for f in dataclasses.fields(SGDOptimizerConfig):
-            if hasattr(args, f.name):
-                kwargs[f.name] = getattr(args, f.name)
-        config = SGDOptimizerConfig(**kwargs)
-    else:
-        raise ValueError("Invalid optimizer type!")
+    kwargs = {}
+    for f in dataclasses.fields(OptimizerConfig):
+        if hasattr(args, f.name):
+            kwargs[f.name] = getattr(args, f.name)
+    config = OptimizerConfig(**kwargs)
 
     # Construct the appropriate config_overrides object. This default handles many cases, but
     #  can be added to as needed by the user, or replaced entirely with a custom override.
@@ -1528,25 +1519,13 @@ def setup_model_and_optimizer(
         config, config_overrides = get_megatron_optimizer_config(args)
         config.timers = timers
 
-        if 'muon' not in config.optimizer:
-            # If the user is asking for a non-zero embedding init std, skip weight decay for embeddings
-            # to avoid embeddings from shrinking to zero as recommended in https://arxiv.org/abs/2312.16903
-            # default_skip_embedding_weight_decay=args.embedding_init_method_std is not None,
-            optimizer = get_megatron_optimizer(
-                config,
-                model,
-                config_overrides=config_overrides,
-                use_gloo_process_groups=args.enable_gloo_process_groups,
-                dump_param_to_param_group_map=args.dump_param_to_param_group_map,
-            )
-        else:
-            optimizer = get_megatron_muon_optimizer(
-                config,
-                model,
-                config_overrides=config_overrides,
-                use_gloo_process_groups=args.enable_gloo_process_groups,
-                layer_wise_distributed_optimizer='dist' in config.optimizer,
-            )
+        optimizer = get_megatron_optimizer(
+            config,
+            model,
+            config_overrides=config_overrides,
+            use_gloo_process_groups=args.enable_gloo_process_groups,
+            dump_param_to_param_group_map=args.dump_param_to_param_group_map,
+        )
         opt_param_scheduler = get_optimizer_param_scheduler(optimizer)
 
     one_logger and one_logger.log_metrics({"app_build_optimzer_finish_time": one_logger_utils.get_timestamp_in_ms()})
diff --git a/tests/unit_tests/dist_checkpointing/utils.py b/tests/unit_tests/dist_checkpointing/utils.py
index dd12ecd7684..cf6662c72bf 100644
--- a/tests/unit_tests/dist_checkpointing/utils.py
+++ b/tests/unit_tests/dist_checkpointing/utils.py
@@ -12,7 +12,7 @@
     get_gpt_layer_with_transformer_engine_spec,
 )
 from megatron.core.optimizer import OptimizerConfig, get_megatron_optimizer
-from megatron.core.optimizer.muon import get_megatron_muon_optimizer
+from megatron.core.optimizer.optimizer import ChainedOptimizer
 from megatron.core.tensor_parallel import model_parallel_cuda_manual_seed
 from megatron.core.transformer import TransformerConfig
 from megatron.training.arguments import parse_args
@@ -172,11 +172,6 @@ def init_checkpointing_mock_args(args, ckpt_dir, fully_parallel=False):
 def setup_model_and_optimizer(
     seed, tp, pp, initialize_fn=initialize_gpt_model, bf16=True, dist_opt=True, optimizer='adam'
 ):
-    if 'muon' in optimizer and dist_opt:
-        raise ValueError(
-            "Layer-wise distributed optimizer with Muon is not supported with distributed optimizer."
-        )
-
     mock_args = parse_args(ignore_unknown_args=True)
     with mock.patch('megatron.training.training.get_args', new=lambda: mock_args):
         init_basic_mock_args(mock_args, tp, pp, bf16=bf16)
@@ -191,37 +186,39 @@ def setup_model_and_optimizer(
             )
         )
 
+    optimizer_type = optimizer
+    use_layer_wise = False
+    if optimizer_type == 'dist_muon':
+        optimizer = 'muon'
+        use_layer_wise = True
+    if optimizer_type in ('muon', 'dist_muon') and dist_opt:
+        use_layer_wise = True
+        dist_opt = False
+
     config = OptimizerConfig(
         bf16=bf16,
         params_dtype=torch.bfloat16 if bf16 else torch.float,
         use_distributed_optimizer=dist_opt,
+        use_layer_wise_distributed_optimizer=use_layer_wise,
         optimizer=optimizer,
     )
 
-    if 'muon' in optimizer:
-        # Use layer-wise distributed optimizer with Muon
-        optimizer_type = optimizer
-        # default lr None feels wrong. only change muon lr to avoid breaking old tests
+    if optimizer_type in ('muon', 'dist_muon'):
         config.lr = 0.0
-        optimizer = get_megatron_muon_optimizer(
-            config, model, layer_wise_distributed_optimizer='dist' in optimizer_type
-        )
-    else:
-        optimizer_type = optimizer
-        optimizer = get_megatron_optimizer(config, model)
+    optimizer = get_megatron_optimizer(config, model)
 
     torch.manual_seed(seed + 1)
     model_parallel_cuda_manual_seed(seed + 1)
 
-    if not 'muon' in optimizer_type:
+    if isinstance(optimizer, ChainedOptimizer):
+        for opt in optimizer.chained_optimizers:
+            opt.init_state_fn(opt)
+    else:
         for group in optimizer.optimizer.param_groups:
             for p in group['params']:
                 if len(optimizer.optimizer.state[p]) == 0:
                     optimizer.optimizer.state[p]['exp_avg'] = torch.rand_like(p.data)
                     optimizer.optimizer.state[p]['exp_avg_sq'] = torch.rand_like(p.data)
-    else:
-        for opt in optimizer.chained_optimizers:
-            opt.init_state_fn(opt)
 
     optimizer.reload_model_params()
 
@@ -266,10 +263,6 @@ def setup_moe_model_and_optimizer(
     use_glu=False,
     optimizer='adam',
 ):
-    if 'muon' in optimizer and dist_opt:
-        raise ValueError(
-            "Layer-wise distributed optimizer with Muon is not supported with distributed optimizer."
-        )
     mock_args = parse_args(ignore_unknown_args=True)
     with mock.patch('megatron.training.training.get_args', new=lambda: mock_args):
         init_basic_mock_args(mock_args, tp, pp, bf16=bf16)
@@ -289,37 +282,40 @@ def setup_moe_model_and_optimizer(
             )
         )
 
+    optimizer_type = optimizer
+    use_layer_wise = False
+    if optimizer_type == 'dist_muon':
+        optimizer = 'muon'
+        use_layer_wise = True
+    if optimizer_type in ('muon', 'dist_muon') and dist_opt:
+        use_layer_wise = True
+        dist_opt = False
+
     config = OptimizerConfig(
         bf16=bf16,
         params_dtype=torch.bfloat16 if bf16 else torch.float,
         use_distributed_optimizer=dist_opt,
+        use_layer_wise_distributed_optimizer=use_layer_wise,
         optimizer=optimizer,
     )
 
-    if 'muon' in optimizer:
-        optimizer_type = optimizer
-        # default lr None feels wrong. only change muon lr to avoid breaking old tests
+    if optimizer_type in ('muon', 'dist_muon'):
         config.lr = 0.0
-        optimizer = get_megatron_muon_optimizer(
-            config, model, layer_wise_distributed_optimizer='dist' in optimizer_type
-        )
-    else:
-        optimizer_type = optimizer
-        optimizer = get_megatron_optimizer(config, model)
+    optimizer = get_megatron_optimizer(config, model)
 
     torch.manual_seed(seed + 1)
     model_parallel_cuda_manual_seed(seed + 1)
 
-    if not 'muon' in optimizer_type:
+    if optimizer_type in ('muon', 'dist_muon'):
+        for opt in optimizer.chained_optimizers:
+            opt.init_state_fn(opt)
+    else:
         for opt in optimizer.chained_optimizers:
             for group in opt.param_groups:
                 for p in group['params']:
                     if len(opt.state[p]) == 0:
                         opt.state[p]['exp_avg'] = torch.rand_like(p.data)
                         opt.state[p]['exp_avg_sq'] = torch.rand_like(p.data)
-    else:
-        for opt in optimizer.chained_optimizers:
-            opt.init_state_fn(opt)
 
     optimizer.reload_model_params()
 
diff --git a/tests/unit_tests/test_layer_wise_optimizer.py b/tests/unit_tests/test_layer_wise_optimizer.py
index 05ce26bcfa0..9b404b388b4 100644
--- a/tests/unit_tests/test_layer_wise_optimizer.py
+++ b/tests/unit_tests/test_layer_wise_optimizer.py
@@ -124,9 +124,11 @@ def create_model_and_optimizer(
 
         optimizer = get_megatron_optimizer(optimizer_config, [model])
         if use_layer_wise:
+            # Extract base torch optimizers from the FP32Optimizer wrappers.
+            base_optimizers = [opt.optimizer for opt in optimizer.chained_optimizers]
             optimizer_config.bf16 = True
             optimizer = LayerWiseDistributedOptimizer(
-                optimizer.chained_optimizers, optimizer_config, pg_collection
+                base_optimizers, optimizer_config, pg_collection
             )
         return model, optimizer, pg_collection
 
@@ -281,19 +283,16 @@ def test_multiple_optimizers(self):
         param_groups_1 = [{'params': params[:mid_point]}]
         param_groups_2 = [{'params': params[mid_point:]}]
 
-        # Create two separate base optimizers
+        # Create two separate plain base optimizers (LayerWise wraps them itself)
         base_optimizer_1 = torch.optim.Adam(param_groups_1, lr=optimizer_config.lr)
         base_optimizer_2 = torch.optim.Adam(param_groups_2, lr=optimizer_config.lr)
 
-        wrapped_optimizer_1 = FP32Optimizer(base_optimizer_1, optimizer_config, None)
-        wrapped_optimizer_2 = FP32Optimizer(base_optimizer_2, optimizer_config, None)
-
         pg_collection = ProcessGroupCollection.use_mpu_process_groups()
         pg_collection.dp_cp = parallel_state.get_data_parallel_group(with_context_parallel=True)
         pg_collection.expt_dp = parallel_state.get_expert_data_parallel_group()
 
         optimizer = LayerWiseDistributedOptimizer(
-            [wrapped_optimizer_1, wrapped_optimizer_2], optimizer_config, pg_collection
+            [base_optimizer_1, base_optimizer_2], optimizer_config, pg_collection
         )
 
         assert len(optimizer.chained_optimizers) == 2, "Should have two chained optimizers"
@@ -347,9 +346,9 @@ def test_bf16_error(self):
         pg_collection.dp_cp = parallel_state.get_data_parallel_group(with_context_parallel=True)
         pg_collection.expt_dp = parallel_state.get_expert_data_parallel_group()
 
-        # Should raise TypeError when receiving already-wrapped Float16 optimizer
+        # Should raise TypeError when receiving already-wrapped optimizer
         with pytest.raises(
-            TypeError, match='LayerWiseDistributedOptimizer received Float16 optimizer already'
+            TypeError, match='LayerWiseDistributedOptimizer expects base torch optimizers'
         ):
             LayerWiseDistributedOptimizer([wrapped_optimizer], optimizer_config, pg_collection)
 
diff --git a/tests/unit_tests/test_muon_optimizer.py b/tests/unit_tests/test_muon_optimizer.py
index cc99f7a16e6..86d75ee7a49 100644
--- a/tests/unit_tests/test_muon_optimizer.py
+++ b/tests/unit_tests/test_muon_optimizer.py
@@ -10,8 +10,8 @@
 
 from megatron.core import parallel_state
 from megatron.core.distributed import DistributedDataParallel, DistributedDataParallelConfig
-from megatron.core.optimizer import OptimizerConfig
-from megatron.core.optimizer.muon import TensorParallelMuon, get_megatron_muon_optimizer
+from megatron.core.optimizer import OptimizerConfig, get_megatron_optimizer
+from megatron.core.optimizer.emerging_optimizers import TensorParallelMuon
 from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.transformer import TransformerConfig
 from tests.unit_tests.test_utilities import Utils
@@ -129,8 +129,8 @@ def create_ddp_model(self, model):
             TransformerConfig(num_attention_heads=1, num_layers=1), ddp_config, model
         )
 
-    def test_get_megatron_muon_optimizer_smoke(self):
-        """Smoke test for get_megatron_muon_optimizer function."""
+    def test_get_megatron_optimizer_smoke(self):
+        """Smoke test for get_megatron_optimizer function."""
         model = Net().bfloat16().cuda()
         model.requires_grad_(True)
         model = self.create_ddp_model(model)
@@ -155,11 +155,8 @@ def test_get_megatron_muon_optimizer_smoke(self):
         )
 
         # Test creating the optimizer
-        optimizer = get_megatron_muon_optimizer(
-            config=optimizer_config,
-            model_chunks=[model],
-            use_gloo_process_groups=True,
-            layer_wise_distributed_optimizer=False,
+        optimizer = get_megatron_optimizer(
+            config=optimizer_config, model_chunks=[model], use_gloo_process_groups=True
         )
 
         # Test basic properties
@@ -204,24 +201,13 @@ def test_get_megatron_muon_optimizer_smoke(self):
         # Load state dict should not raise error
         optimizer.load_state_dict(state_dict)
 
-    def test_get_megatron_muon_optimizer_validation(self):
-        """Test validation logic for get_megatron_muon_optimizer."""
+    def test_get_megatron_optimizer_validation(self):
+        """Test validation logic for get_megatron_optimizer."""
         model = torch.nn.Linear(100, 50, bias=False, dtype=torch.bfloat16, device='cuda')
         model.requires_grad_(True)
         model = self.create_ddp_model(model)
 
-        # Test 1: Distributed optimizer should raise exception
-        optimizer_config_dist = OptimizerConfig(
-            optimizer='muon',
-            lr=0.01,
-            bf16=True,
-            use_distributed_optimizer=True,  # This should cause an exception
-        )
-
-        with pytest.raises(Exception, match='muon with dist optimizer is not supported'):
-            get_megatron_muon_optimizer(config=optimizer_config_dist, model_chunks=[model])
-
-        # Test 2: FP16 should raise exception
+        # Test 1: FP16 should raise exception
         optimizer_config_fp16 = OptimizerConfig(
             optimizer='muon',
             lr=0.01,
@@ -229,8 +215,8 @@ def test_get_megatron_muon_optimizer_validation(self):
             use_distributed_optimizer=False,
         )
 
-        with pytest.raises(Exception, match='muon with fp16 is not supported'):
-            get_megatron_muon_optimizer(config=optimizer_config_fp16, model_chunks=[model])
+        with pytest.raises(Exception, match='emerging optimizer with fp16 is not supported'):
+            get_megatron_optimizer(config=optimizer_config_fp16, model_chunks=[model])
 
         # Test 3: Invalid num_ns_steps should raise exception
         optimizer_config_invalid_ns = OptimizerConfig(
@@ -242,10 +228,10 @@ def test_get_megatron_muon_optimizer_validation(self):
         )
 
         with pytest.raises(ValueError, match='num_ns_steps must be at least 1'):
-            get_megatron_muon_optimizer(config=optimizer_config_invalid_ns, model_chunks=[model])
+            get_megatron_optimizer(config=optimizer_config_invalid_ns, model_chunks=[model])
 
-    def test_get_megatron_muon_optimizer_layer_wise(self):
-        """Test get_megatron_muon_optimizer with layer-wise distributed optimizer."""
+    def test_get_megatron_optimizer_layer_wise(self):
+        """Test get_megatron_optimizer with layer-wise distributed optimizer."""
         model = Net().bfloat16().cuda()
         model.requires_grad_(True)
         model = self.create_ddp_model(model)
@@ -255,7 +241,7 @@ def test_get_megatron_muon_optimizer_layer_wise(self):
             lr=0.01,
             weight_decay=0.01,
             bf16=True,
-            use_distributed_optimizer=False,
+            use_layer_wise_distributed_optimizer=True,
             muon_momentum=0.95,
             muon_use_nesterov=True,
             muon_fp32_matmul_prec="medium",
@@ -264,12 +250,9 @@ def test_get_megatron_muon_optimizer_layer_wise(self):
             muon_tp_mode="duplicated",
         )
 
-        # Test with layer_wise_distributed_optimizer=True
-        optimizer = get_megatron_muon_optimizer(
-            config=optimizer_config,
-            model_chunks=[model],
-            use_gloo_process_groups=True,
-            layer_wise_distributed_optimizer=True,
+        # use_layer_wise_distributed_optimizer=True triggers LayerWiseDistributedOptimizer
+        optimizer = get_megatron_optimizer(
+            config=optimizer_config, model_chunks=[model], use_gloo_process_groups=True
         )
 
         # Verify it's a LayerWiseDistributedOptimizer
diff --git a/tests/unit_tests/test_optimizer.py b/tests/unit_tests/test_optimizer.py
index 2488900ba72..56af8545042 100644
--- a/tests/unit_tests/test_optimizer.py
+++ b/tests/unit_tests/test_optimizer.py
@@ -106,10 +106,10 @@ def test_get_param_groups_no_overrides(mock_get_world_size):
 def test_get_param_groups_default_overrides(mock_get_world_size):
     """Test that the default overrides are applied to the parameter groups."""
     net = Net()
-    # NOTE: to get legacy default overrides, supply None.
     opt_config = OptimizerConfig(optimizer='adam', lr=0.01)
-    check_config_overrides_consistency(opt_config, None)
-    param_groups = _get_param_groups([net], opt_config, None)
+    config_overrides = get_standard_config_overrides(opt_config)
+    check_config_overrides_consistency(opt_config, config_overrides)
+    param_groups = _get_param_groups([net], opt_config, config_overrides)
     assert len(param_groups) == 2
     pg0, pg1 = param_groups
     wd_mults = {pg0['wd_mult'], pg1['wd_mult']}

From a2682314c6ac333921e5f0a1c4900d2c7d4c5d70 Mon Sep 17 00:00:00 2001
From: Li Tao <lit@nvidia.com>
Date: Thu, 5 Mar 2026 13:55:09 +0800
Subject: [PATCH 301/334] [Dev] Move some processing into a function so can be
 compiled (#3220)

---
 megatron/core/ssm/gated_delta_net.py         | 89 +++++++++++++-------
 tests/unit_tests/ssm/test_gated_delta_net.py | 64 ++++++++++++++
 2 files changed, 121 insertions(+), 32 deletions(-)

diff --git a/megatron/core/ssm/gated_delta_net.py b/megatron/core/ssm/gated_delta_net.py
index 601a72a4356..9cb50ba6953 100644
--- a/megatron/core/ssm/gated_delta_net.py
+++ b/megatron/core/ssm/gated_delta_net.py
@@ -388,37 +388,12 @@ def forward(
             )
         nvtx_range_pop(suffix="conv1d")
 
-        # Split qkv into query_key, and value
-        query_key, value = torch.split(
-            qkv,
-            [2 * self.qk_dim_local_tp // self.cp_size, self.v_dim_local_tp // self.cp_size],
-            dim=-1,
-        )
-        query_key = query_key.reshape(batch, seq_len, -1, self.key_head_dim)
-        value = value.reshape(batch, seq_len, -1, self.value_head_dim)
-        # Apply L2 norm to query and key
-        if self.use_qk_l2norm:
-            query_key = l2norm(query_key.contiguous())
-        # Split query and key.
-        query, key = torch.split(
-            query_key,
-            [
-                self.qk_dim_local_tp // self.key_head_dim // self.cp_size,
-                self.qk_dim_local_tp // self.key_head_dim // self.cp_size,
-            ],
-            dim=2,
+        # Prepare QKV tensors (split, reshape, L2 norm, repeat_interleave, contiguous)
+        nvtx_range_push(suffix="prepare_qkv_for_gated_delta_rule")
+        query, key, value, gate, beta, alpha = self._prepare_qkv_for_gated_delta_rule(
+            qkv, gate, beta, alpha, batch, seq_len
         )
-        if self.num_value_heads // self.num_key_heads > 1:
-            query = query.repeat_interleave(self.num_value_heads // self.num_key_heads, dim=2)
-            key = key.repeat_interleave(self.num_value_heads // self.num_key_heads, dim=2)
-
-        # Make contiguous
-        query = query.contiguous()
-        key = key.contiguous()
-        value = value.contiguous()
-        gate = gate.contiguous()
-        beta = beta.contiguous()
-        alpha = alpha.contiguous()
+        nvtx_range_pop(suffix="prepare_qkv_for_gated_delta_rule")
 
         # Calculate g and beta
         nvtx_range_push(suffix="g_and_beta")
@@ -426,8 +401,7 @@ def forward(
         dt_bias_local_cp = get_parameter_local_cp(
             self.dt_bias, dim=0, cp_group=self.pg_collection.cp
         )
-        g = -A_log_local_cp.exp() * F.softplus(alpha.float() + dt_bias_local_cp)  # In fp32
-        beta = beta.sigmoid()
+        g, beta = self._compute_g_and_beta(A_log_local_cp, dt_bias_local_cp, alpha, beta)
         nvtx_range_pop(suffix="g_and_beta")
 
         nvtx_range_push(suffix="gated_delta_rule")
@@ -477,6 +451,57 @@ def _apply_gated_norm(self, x, gate):
         y = y.to(x_dtype)
         return y
 
+    @jit_fuser
+    def _prepare_qkv_for_gated_delta_rule(self, qkv, gate, beta, alpha, batch, seq_len):
+        """
+        Prepare query, key, value, gate, beta, alpha tensors for gated delta rule.
+        Fuses split, reshape, L2 norm, repeat_interleave, and contiguous operations.
+        """
+        # Split qkv into query_key and value
+        query_key, value = torch.split(
+            qkv,
+            [2 * self.qk_dim_local_tp // self.cp_size, self.v_dim_local_tp // self.cp_size],
+            dim=-1,
+        )
+
+        # Reshape query_key and value
+        query_key = query_key.reshape(batch, seq_len, -1, self.key_head_dim)
+        value = value.reshape(batch, seq_len, -1, self.value_head_dim)
+
+        # Apply L2 norm to query and key
+        if self.use_qk_l2norm:
+            query_key = l2norm(query_key.contiguous())
+
+        # Split query and key
+        split_size = self.qk_dim_local_tp // self.key_head_dim // self.cp_size
+        query, key = torch.split(query_key, [split_size, split_size], dim=2)
+
+        # Expand query and key if needed (grouped query attention)
+        if self.num_value_heads // self.num_key_heads > 1:
+            repeat_factor = self.num_value_heads // self.num_key_heads
+            query = query.repeat_interleave(repeat_factor, dim=2)
+            key = key.repeat_interleave(repeat_factor, dim=2)
+
+        # Make all tensors contiguous
+        query = query.contiguous()
+        key = key.contiguous()
+        value = value.contiguous()
+        gate = gate.contiguous()
+        beta = beta.contiguous()
+        alpha = alpha.contiguous()
+
+        return query, key, value, gate, beta, alpha
+
+    @jit_fuser
+    def _compute_g_and_beta(self, A_log_local_cp, dt_bias_local_cp, alpha, beta):
+        """
+        Compute g (decay) and beta (sigmoid) for gated delta rule.
+        Fuses exp, softplus, mul, neg, and sigmoid operations.
+        """
+        g = -A_log_local_cp.exp() * F.softplus(alpha.float() + dt_bias_local_cp)  # In fp32
+        beta = beta.sigmoid()
+        return g, beta
+
     def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None, tp_group=None):
         """Provide a sharded state dictionary for distributed checkpointing."""
         # Guard for cases metadata is not provided
diff --git a/tests/unit_tests/ssm/test_gated_delta_net.py b/tests/unit_tests/ssm/test_gated_delta_net.py
index 81f8eed0574..8f3c59b3d43 100644
--- a/tests/unit_tests/ssm/test_gated_delta_net.py
+++ b/tests/unit_tests/ssm/test_gated_delta_net.py
@@ -138,6 +138,70 @@ def test_gpu_forward(self):
             output.dtype == hidden_states.dtype
         ), f"Output dtype {output.dtype=} mismatch with {hidden_states.dtype=}"
 
+    def test_jit_compiled_helpers(self):
+        import torch._dynamo
+
+        gdn = self.gdn
+        batch = 2
+        seq_len = 16
+
+        num_v_heads_local = gdn.num_value_heads // gdn.tp_size // gdn.cp_size
+
+        qkv_last_dim = (2 * gdn.qk_dim_local_tp + gdn.v_dim_local_tp) // gdn.cp_size
+        qkv = torch.randn(
+            batch, seq_len, qkv_last_dim, device=torch.cuda.current_device(), dtype=torch.bfloat16
+        )
+        gate = torch.randn(
+            batch,
+            seq_len,
+            num_v_heads_local,
+            gdn.value_head_dim,
+            device=torch.cuda.current_device(),
+            dtype=torch.bfloat16,
+        )
+        beta = torch.randn(
+            batch,
+            seq_len,
+            num_v_heads_local,
+            device=torch.cuda.current_device(),
+            dtype=torch.bfloat16,
+        )
+        alpha = torch.randn(
+            batch,
+            seq_len,
+            num_v_heads_local,
+            device=torch.cuda.current_device(),
+            dtype=torch.bfloat16,
+        )
+
+        # Disable dynamo so coverage.py can trace through the method bodies,
+        # which are normally wrapped by @jit_fuser (torch.compile).
+        with torch._dynamo.config.patch(disable=True):
+            query, key, value, gate_out, beta_out, alpha_out = (
+                gdn._prepare_qkv_for_gated_delta_rule(qkv, gate, beta, alpha, batch, seq_len)
+            )
+
+        assert query.shape == (batch, seq_len, num_v_heads_local, gdn.key_head_dim)
+        assert key.shape == (batch, seq_len, num_v_heads_local, gdn.key_head_dim)
+        assert value.shape == (batch, seq_len, num_v_heads_local, gdn.value_head_dim)
+        assert query.is_contiguous()
+        assert key.is_contiguous()
+        assert value.is_contiguous()
+
+        A_log_mock = torch.randn(
+            num_v_heads_local, device=torch.cuda.current_device(), dtype=torch.bfloat16
+        )
+        dt_bias_mock = torch.randn(
+            num_v_heads_local, device=torch.cuda.current_device(), dtype=torch.bfloat16
+        )
+
+        with torch._dynamo.config.patch(disable=True):
+            g, beta_sig = gdn._compute_g_and_beta(A_log_mock, dt_bias_mock, alpha, beta)
+
+        assert g.dtype == torch.float32
+        assert g.shape == alpha.shape
+        assert beta_sig.shape == beta.shape
+
 
 @pytest.mark.parametrize(
     ("tp", "sp", "cp"),

From f983b21cb87f9e601f000cb8d336981bf85f397c Mon Sep 17 00:00:00 2001
From: Zijie Yan <zijiey@nvidia.com>
Date: Thu, 5 Mar 2026 15:48:40 +0800
Subject: [PATCH 302/334] [Dev] Refactor MoE loss logging (#2569)

Co-authored-by: Robin Zhang <robinz@nvidia.com>
---
 megatron/core/transformer/cuda_graphs.py      |  21 +-
 megatron/core/transformer/moe/moe_logging.py  | 379 ++++++++++++++++++
 megatron/core/transformer/moe/moe_utils.py    | 219 +++-------
 megatron/core/transformer/moe/router.py       |  16 +-
 megatron/training/training.py                 |  12 +-
 .../unit_tests/models/test_mamba_moe_model.py |   2 +
 6 files changed, 469 insertions(+), 180 deletions(-)
 create mode 100644 megatron/core/transformer/moe/moe_logging.py

diff --git a/megatron/core/transformer/cuda_graphs.py b/megatron/core/transformer/cuda_graphs.py
index 48a023e6ddc..f7b2bc79cab 100644
--- a/megatron/core/transformer/cuda_graphs.py
+++ b/megatron/core/transformer/cuda_graphs.py
@@ -819,13 +819,12 @@ def create_fwd_graph(self, args, kwargs, outputs=None, clone_inputs=True):
 
         is_moe = isinstance(self.base_module, MoETransformerLayer)
         if is_moe:
-            from megatron.core.transformer.moe.moe_utils import get_moe_layer_wise_logging_tracker
+            from megatron.core.transformer.moe.moe_logging import get_moe_metrics_tracker
 
-            tracker = get_moe_layer_wise_logging_tracker()
+            moe_metrics_tracker = get_moe_metrics_tracker()
             cached_aux_losses = {}
-            for name in tracker:
-                if "values" in tracker[name]:
-                    cached_aux_losses[name] = torch.clone(tracker[name]["values"])
+            for name, entry in moe_metrics_tracker.metrics.items():
+                cached_aux_losses[name] = entry.values.clone()
 
         self.fwd_graph = torch.cuda.CUDAGraph()
 
@@ -1014,8 +1013,11 @@ def clone_ten(ten):
                     param.main_grad.copy_(main_grad_copy)
 
         if is_moe:
-            for name in tracker:
-                tracker[name]["values"].copy_(cached_aux_losses[name])
+            for name, cached_values in cached_aux_losses.items():
+                assert (
+                    name in moe_metrics_tracker.metrics
+                ), "cached metrics must be found in the tracker."
+                moe_metrics_tracker.metrics[name].values.copy_(cached_values)
 
     def create_bwd_graph(self):
         """Create a bwd cudagraph for this runner. Should be called inside
@@ -2208,14 +2210,15 @@ def _finish_capturing(self, start_time):
         _set_capture_end()
 
         from megatron.core.distributed.finalize_model_grads import reset_model_temporary_tensors
-        from megatron.core.transformer.moe.moe_utils import clear_aux_losses_tracker
 
         torch.distributed.barrier()
         for model_chunk in self.model:
             model_chunk.zero_grad_buffer()
         for optimizer in self.optimizers:
             optimizer.zero_grad()
-        clear_aux_losses_tracker()
+        from megatron.core.transformer.moe.moe_logging import get_moe_metrics_tracker
+
+        get_moe_metrics_tracker().clear()
         reset_model_temporary_tensors(self.config, self.model)
 
         if FREEZE_GC:
diff --git a/megatron/core/transformer/moe/moe_logging.py b/megatron/core/transformer/moe/moe_logging.py
new file mode 100644
index 00000000000..b1f2b27000b
--- /dev/null
+++ b/megatron/core/transformer/moe/moe_logging.py
@@ -0,0 +1,379 @@
+# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+"""MoE metrics tracking and logging.
+
+Collects per-layer MoE metrics during forward passes, synchronizes them across
+distributed ranks, and writes scalar summaries to TensorBoard / W&B.
+
+Usage:
+    tracker = get_moe_metrics_tracker()
+
+    # In router forward pass:
+    tracker.record("load_balancing_loss", loss, layer_number=1, num_layers=32,
+                   reduce_group=tp_cp_group)
+
+    # At end of training step:
+    log_str = tracker.report(
+        loss_scale=1 / num_microbatches,
+        iteration=step,
+        writer=tb_writer,
+        num_layers=32,
+    )
+"""
+
+from dataclasses import dataclass
+from typing import Dict, List, Optional, Union
+
+import torch
+
+from megatron.core import parallel_state
+from megatron.core.process_groups_config import ProcessGroupCollection
+
+
+@dataclass
+class MetricEntry:
+    """Per-layer metric with distributed reduction configuration."""
+
+    values: torch.Tensor
+    reduce_group: Optional[torch.distributed.ProcessGroup] = None
+    avg_group: Optional[torch.distributed.ProcessGroup] = None
+    needs_dp_avg: bool = True
+
+
+# ---------------------------------------------------------------------------
+# Module-level global tracker (follows parallel_state / global_vars pattern)
+# ---------------------------------------------------------------------------
+_MOE_METRICS_TRACKER: Optional['MoEMetricsTracker'] = None
+
+
+def get_moe_metrics_tracker() -> 'MoEMetricsTracker':
+    """Return the global MoE metrics tracker, creating it lazily if needed."""
+    global _MOE_METRICS_TRACKER
+    if _MOE_METRICS_TRACKER is None:
+        _MOE_METRICS_TRACKER = MoEMetricsTracker()
+    return _MOE_METRICS_TRACKER
+
+
+def set_moe_metrics_tracker(tracker: 'MoEMetricsTracker') -> None:
+    """Set the global MoE metrics tracker."""
+    global _MOE_METRICS_TRACKER
+    _MOE_METRICS_TRACKER = tracker
+
+
+def destroy_moe_metrics_tracker() -> None:
+    """Reset the global MoE metrics tracker to ``None``."""
+    global _MOE_METRICS_TRACKER
+    _MOE_METRICS_TRACKER = None
+
+
+class MoEMetricsTracker:
+    """Tracker for MoE layer-wise metrics.
+
+    Lifecycle: ``record()`` per-layer values during forward → ``report()`` at
+    step end (sync, aggregate, log, clear) → repeat.
+
+    Example:
+        tracker = get_moe_metrics_tracker()
+        tracker.record("load_balancing_loss", loss, layer_number=1, num_layers=32)
+        log_str = tracker.report(loss_scale=1/8, iteration=100, writer=tb_writer,
+                                 num_layers=32)
+    """
+
+    def __init__(self):
+        self._metrics: Dict[str, MetricEntry] = {}
+
+    # =========================================================================
+    # Public API
+    # =========================================================================
+
+    @property
+    def metrics(self) -> Dict[str, MetricEntry]:
+        """Read-only access to the underlying metric entries."""
+        return self._metrics
+
+    def record(
+        self,
+        name: str,
+        value: torch.Tensor,
+        layer_number: int,
+        num_layers: int,
+        reduce_group: Optional[torch.distributed.ProcessGroup] = None,
+        avg_group: Optional[torch.distributed.ProcessGroup] = None,
+        needs_dp_avg: bool = True,
+    ) -> None:
+        """Accumulate a metric value for a specific layer.
+
+        Called during the router forward pass.  Lazily creates the metric entry
+        on first call for each metric name.
+
+        Args:
+            name: Metric name (e.g. ``"load_balancing_loss"``).
+            value: Scalar tensor to accumulate (will be detached).
+            layer_number: 1-based layer index.
+            num_layers: Total number of layers (determines tensor size).
+            reduce_group: Process group for sum-reduction (e.g. tp_cp_group).
+            avg_group: Process group for average-reduction.
+            needs_dp_avg: Whether to average across DP ranks after other reductions.
+        """
+        if layer_number is None:
+            return
+
+        if name not in self._metrics:
+            self._metrics[name] = MetricEntry(values=torch.zeros(num_layers, device=value.device))
+
+        entry = self._metrics[name]
+        entry.values[layer_number - 1] += value.detach()
+        entry.reduce_group = reduce_group
+        entry.avg_group = avg_group
+        entry.needs_dp_avg = needs_dp_avg
+
+    def report(
+        self,
+        loss_scale: float,
+        iteration: int,
+        writer=None,
+        wandb_writer=None,
+        per_layer_logging: bool = False,
+        force_initialize: bool = False,
+        track_names: Optional[Union[str, List[str]]] = None,
+        num_layers: Optional[int] = None,
+        moe_layer_freq: Optional[Union[int, List[int]]] = None,
+        mtp_num_layers: Optional[int] = None,
+        total_loss_dict: Optional[dict[str, torch.Tensor]] = None,
+        percentiles: Optional[Dict[str, List[float]]] = None,
+        pg_collection: Optional[ProcessGroupCollection] = None,
+    ) -> str:
+        """Sync metrics across ranks, aggregate, log, and clear.
+
+        This is the main entry point called once per training step.  It pairs
+        with :meth:`record`: you *record* individual data points during forward,
+        then *report* the summary at step end.
+
+        Args:
+            loss_scale: Scale factor for averaging across microbatches
+                (usually ``1 / num_microbatches``).
+            iteration: Current training iteration.
+            writer: TensorBoard ``SummaryWriter`` (optional).
+            wandb_writer: Weights & Biases run object (optional).
+            per_layer_logging: Whether to also write per-layer values.
+            force_initialize: If True, pre-create metric entries for *track_names*
+                that don't exist yet.  Required for PP ranks without MoE layers
+                whose tensor sizes must match ranks that do have MoE layers.
+            track_names: Metric name(s) to report.  ``None`` reports all.
+            num_layers: Total transformer layers (required when *force_initialize*).
+            moe_layer_freq: MoE layer frequency or binary pattern list.
+            mtp_num_layers: Extra layers from Multi-Token Prediction.
+            total_loss_dict: Megatron training-loop accumulator.  Metrics
+                ending with ``"loss"`` are accumulated here and excluded from
+                the returned console log string.
+            percentiles: Per-metric percentiles to compute, e.g.
+                ``{"load_imbalance": [0.5, 0.95]}``.
+            pg_collection: Custom process-group collection for reduction.
+
+        Returns:
+            Formatted log string for console output.
+        """
+        metric_names = self._resolve_names(track_names)
+
+        # Pre-create entries on PP ranks that lack MoE layers.
+        # Tensor size must be (num_layers + mtp_num_layers) to match ranks that
+        # recorded via record(), otherwise all_reduce across PP will hang.
+        if force_initialize:
+            if num_layers is None:
+                raise ValueError("num_layers must be provided when force_initialize=True.")
+            init_size = num_layers + (mtp_num_layers or 0)
+            for name in metric_names:
+                self.ensure_initialized(name, init_size)
+
+        self._sync_metrics(metric_names, pg_collection)
+
+        num_moe_layers = self._count_moe_layers(num_layers, moe_layer_freq, mtp_num_layers)
+        scalars = self._aggregate(loss_scale, num_moe_layers, metric_names, percentiles)
+
+        # Megatron integration: accumulate loss metrics into total_loss_dict
+        console_scalars = dict(scalars)
+        if total_loss_dict is not None:
+            for k, v in scalars.items():
+                if k.lower().endswith("loss"):
+                    if k in total_loss_dict:
+                        total_loss_dict[k] += v
+                    else:
+                        total_loss_dict[k] = v
+                    console_scalars.pop(k)
+
+        self._log_scalars(scalars, iteration, writer, wandb_writer)
+        if per_layer_logging:
+            self._log_per_layer(
+                loss_scale, metric_names, iteration, writer, wandb_writer, percentiles
+            )
+
+        log_string = self._format(console_scalars)
+        self.clear()
+        return log_string
+
+    def clear(self) -> None:
+        """Zero out all metric values (entries are kept for reuse)."""
+        for entry in self._metrics.values():
+            entry.values.zero_()
+
+    def ensure_initialized(
+        self, name: str, num_layers: int, device: Optional[Union[str, torch.device, int]] = None
+    ) -> None:
+        """Pre-create a metric entry if it does not already exist.
+
+        This is needed for PP ranks that have no MoE layers -- their tensor
+        size must match ranks that do, otherwise ``all_reduce`` across PP hangs.
+
+        Args:
+            name: Metric name.
+            num_layers: Tensor size (should include MTP layers).
+            device: Device for the zero tensor.  Defaults to current CUDA device.
+        """
+        if name not in self._metrics:
+            if device is None:
+                device = torch.cuda.current_device() if torch.cuda.is_available() else "cpu"
+            self._metrics[name] = MetricEntry(values=torch.zeros(num_layers, device=device))
+
+    # =========================================================================
+    # Private implementation
+    # =========================================================================
+
+    def _resolve_names(self, track_names: Optional[Union[str, List[str]]]) -> List[str]:
+        """Normalize *track_names* argument to a list of strings."""
+        if track_names is None:
+            return list(self._metrics.keys())
+        if isinstance(track_names, str):
+            return [track_names]
+        return track_names
+
+    def _sync_metrics(
+        self, metric_names: List[str], pg_collection: Optional[ProcessGroupCollection] = None
+    ) -> None:
+        """All-reduce metrics across distributed ranks.
+
+        Reduction order: PP collect → reduce_group sum → avg_group avg → DP avg.
+        """
+        if pg_collection is None:
+            pp_group = parallel_state.get_pipeline_model_parallel_group()
+            dp_group = parallel_state.get_data_parallel_group(
+                with_context_parallel=False, partial_data_parallel=False
+            )
+        else:
+            pp_group = pg_collection.pp
+            dp_group = pg_collection.dp
+
+        for name in metric_names:
+            if name not in self._metrics:
+                continue
+
+            entry = self._metrics[name]
+            v = entry.values
+
+            torch.distributed.all_reduce(v, group=pp_group)
+
+            if entry.reduce_group is not None:
+                torch.distributed.all_reduce(v, group=entry.reduce_group)
+
+            if entry.avg_group is not None:
+                torch.distributed.all_reduce(
+                    v, group=entry.avg_group, op=torch.distributed.ReduceOp.AVG
+                )
+
+            if entry.needs_dp_avg:
+                torch.distributed.all_reduce(v, group=dp_group, op=torch.distributed.ReduceOp.AVG)
+
+    @staticmethod
+    def _count_moe_layers(
+        num_layers: Optional[int],
+        moe_layer_freq: Optional[Union[int, List[int]]],
+        mtp_num_layers: Optional[int],
+    ) -> int:
+        """Compute the effective number of MoE layers from configuration."""
+        if moe_layer_freq is None:
+            n = num_layers
+        elif isinstance(moe_layer_freq, int):
+            assert isinstance(num_layers, int)
+            n = sum(1 for i in range(num_layers) if i % moe_layer_freq == 0)
+        elif isinstance(moe_layer_freq, list):
+            n = sum(moe_layer_freq)
+        else:
+            raise ValueError(f"Invalid moe_layer_freq: {moe_layer_freq}")
+
+        if mtp_num_layers is not None:
+            n += mtp_num_layers
+
+        return n
+
+    def _aggregate(
+        self,
+        loss_scale: float,
+        num_moe_layers: int,
+        metric_names: List[str],
+        percentiles: Optional[Dict[str, List[float]]] = None,
+    ) -> Dict[str, Union[float, torch.Tensor]]:
+        """Aggregate per-layer values into scalar summaries.
+
+        Always computes the mean across MoE layers.  If *percentiles* specifies
+        quantiles for a metric, those are computed over non-zero layer values and
+        added as ``"{name}_p{pct}"`` keys.
+        """
+        result: Dict[str, Union[float, torch.Tensor]] = {}
+
+        for name in metric_names:
+            if name not in self._metrics:
+                continue
+
+            values = self._metrics[name].values.float() * loss_scale
+
+            if percentiles and name in percentiles:
+                nonzero = values[values > 0]
+                if nonzero.numel() > 0:
+                    pcts = percentiles[name]
+                    pct_vals = torch.quantile(
+                        nonzero, torch.tensor(pcts, device=nonzero.device)
+                    ).tolist()
+                    for pct, pct_val in zip(pcts, pct_vals):
+                        result[f"{name}_p{int(pct * 100)}"] = pct_val
+
+            result[name] = values.sum() / num_moe_layers
+
+        return result
+
+    def _log_scalars(
+        self, scalars: Dict[str, Union[float, torch.Tensor]], iteration: int, writer, wandb_writer
+    ) -> None:
+        """Write scalar metrics to TensorBoard and/or W&B."""
+        for name, value in scalars.items():
+            if writer is not None:
+                writer.add_scalar(name, value, iteration)
+            if wandb_writer is not None:
+                wandb_writer.log({name: value}, iteration)
+
+    def _log_per_layer(
+        self,
+        loss_scale: float,
+        metric_names: List[str],
+        iteration: int,
+        writer,
+        wandb_writer,
+        percentiles: Optional[Dict[str, List[float]]] = None,
+    ) -> None:
+        """Write per-layer metric values to TensorBoard and/or W&B."""
+        for name in metric_names:
+            if name not in self._metrics:
+                continue
+
+            values = self._metrics[name].values.float() * loss_scale
+            is_sparse = percentiles is not None and name in percentiles
+            for i, val in enumerate(values.tolist()):
+                if is_sparse and val == 0:
+                    continue
+                if writer is not None:
+                    writer.add_scalar(f"moe/{name}_layer_{i}", val, iteration)
+                if wandb_writer is not None:
+                    wandb_writer.log({f"moe/{name}_layer_{i}": val}, iteration)
+
+    @staticmethod
+    def _format(scalars: Dict[str, Union[float, torch.Tensor]]) -> str:
+        """Format aggregated metrics as a console log string."""
+        return "".join(f" {k}: {v:.2f} |" for k, v in scalars.items())
diff --git a/megatron/core/transformer/moe/moe_utils.py b/megatron/core/transformer/moe/moe_utils.py
index a55f98201bf..e736bc65142 100644
--- a/megatron/core/transformer/moe/moe_utils.py
+++ b/megatron/core/transformer/moe/moe_utils.py
@@ -18,9 +18,10 @@
 from megatron.core.tensor_parallel.mappings import reduce_from_tensor_model_parallel_region
 from megatron.core.transformer.cuda_graphs import is_graph_capturing
 from megatron.core.transformer.enums import CudaGraphScope
+from megatron.core.transformer.moe.moe_logging import get_moe_metrics_tracker
 from megatron.core.transformer.moe.router_replay import RouterReplay
 from megatron.core.transformer.transformer_config import TransformerConfig
-from megatron.core.utils import internal_api, is_te_min_version
+from megatron.core.utils import deprecated, internal_api, is_te_min_version
 
 try:
     import transformer_engine as te  # pylint: disable=unused-import
@@ -43,10 +44,6 @@
     HAVE_TE = False
 
 
-# MOE logging
-_MOE_LAYER_WISE_LOGGING_TRACKER: dict = {}
-
-
 def switch_load_balancing_loss_func(
     probs: torch.Tensor,
     tokens_per_expert: torch.Tensor,
@@ -914,6 +911,9 @@ def apply_router_token_dropping(
     return final_probs, final_map
 
 
+@deprecated(
+    version="0.16", removal_version="0.18", alternative="get_moe_metrics_tracker().record()"
+)
 def save_to_aux_losses_tracker(
     name: str,
     loss: torch.Tensor,
@@ -930,38 +930,36 @@ def save_to_aux_losses_tracker(
         layer_number (int): Layer index of the loss.
         num_layers (int): The number of total layers.
         reduce_group (torch.distributed.ProcessGroup, optional): The group for reducing the loss.
-                                                                 Defaults to None.
+            Defaults to None.
         avg_group (torch.distributed.ProcessGroup, optional): The group for averaging the loss.
-                                                              Defaults to None.
-        reduce_group_has_dp (bool, optional): Whether the reduce group has data parallel ranks.
-            Set this to True if the reduce group has data parallel ranks. This flag is used to
-            ensure the correct reduction in aux loss tracking. Defaults to False.
+            Defaults to None.
+        reduce_group_has_dp (bool, optional): Whether the reduce group already includes DP ranks.
+            If True, DP averaging is skipped. Defaults to False.
     """
-    # Skip aux loss logging if layer_number is None.
-    if layer_number is None:
-        return
-
-    tracker = get_moe_layer_wise_logging_tracker()
-    if name not in tracker:
-        tracker[name] = {}
-        tracker[name]["values"] = torch.zeros(num_layers, device=loss.device)
-    tracker[name]["values"][layer_number - 1] += loss.detach()  # Aggregate the loss for the layer.
-    tracker[name]["reduce_group"] = reduce_group
-    tracker[name]["avg_group"] = avg_group
-    tracker[name]["reduce_group_has_dp"] = reduce_group_has_dp
+    get_moe_metrics_tracker().record(
+        name=name,
+        value=loss,
+        layer_number=layer_number,
+        num_layers=num_layers,
+        reduce_group=reduce_group,
+        avg_group=avg_group,
+        needs_dp_avg=not reduce_group_has_dp,
+    )
 
 
+@deprecated(version="0.16", removal_version="0.18", alternative="get_moe_metrics_tracker().clear()")
 def clear_aux_losses_tracker() -> None:
     """Clear the auxiliary losses."""
-    tracker = get_moe_layer_wise_logging_tracker()
-    for name in tracker:
-        tracker[name]["values"].zero_()
+    get_moe_metrics_tracker().clear()
 
 
+@deprecated(
+    version="0.16", removal_version="0.18", alternative="get_moe_metrics_tracker()._sync_metrics()"
+)
 def reduce_aux_losses_tracker_across_ranks(
     track_names: Optional[List[str]] = None, pg_collection: Optional[ProcessGroupCollection] = None
 ) -> None:
-    """Collect and reduce the auxiliary losses across ranks.
+    """Reduce the auxiliary losses across ranks.
 
     Args:
         track_names (Optional[List[str]], optional):
@@ -969,45 +967,28 @@ def reduce_aux_losses_tracker_across_ranks(
         pg_collection (Optional[ProcessGroupCollection], optional):
             The process group collection. Defaults to None.
     """
-    tracker = get_moe_layer_wise_logging_tracker()
-    if track_names is None:
-        track_names = tracker.keys()
-
-    if pg_collection is None:
-        # Use parallel_state groups
-        pp_group = parallel_state.get_pipeline_model_parallel_group()
-        dp_group = parallel_state.get_data_parallel_group(
-            with_context_parallel=False, partial_data_parallel=False
-        )
-    else:
-        pp_group = pg_collection.pp
-        dp_group = pg_collection.dp
-
-    for name in track_names:
-        values = tracker[name]["values"]
-        # TODO(Hepteract): delete the usage of the global parallel_state.
-        # Collect aux losses across PP.
-        torch.distributed.all_reduce(values, group=pp_group)
-        # Reduce aux losses across ranks.
-        if tracker[name].get('reduce_group') is not None:
-            torch.distributed.all_reduce(values, group=tracker[name].get('reduce_group'))
-            # Need to conduct reduction across data parallel ranks. When the reduce_group
-            # does not have 'dp' attribute, do it manually.
-            if not tracker[name].get('reduce_group_has_dp', False):
-                torch.distributed.all_reduce(
-                    values, group=dp_group, op=torch.distributed.ReduceOp.AVG
-                )
-        if tracker[name].get('avg_group') is not None:
-            torch.distributed.all_reduce(
-                values, group=tracker[name]['avg_group'], op=torch.distributed.ReduceOp.AVG
-            )
-        # Average aux losses across data parallel ranks.
-        # The `global_load_balancing_loss` already uses `tp_dp_cp_group` in `reduce_group`,
-        # so we don't need to reduce it again. Others use `tp_cp_group` in `reduce_group`.
-        if name != "global_load_balancing_loss":
-            torch.distributed.all_reduce(values, group=dp_group, op=torch.distributed.ReduceOp.AVG)
-
-
+    tracker = get_moe_metrics_tracker()
+    names_list = track_names if track_names is not None else list(tracker.metrics.keys())
+    tracker._sync_metrics(names_list, pg_collection)
+
+
+@deprecated(version="0.16", removal_version="0.18", alternative="get_moe_metrics_tracker().metrics")
+def get_moe_layer_wise_logging_tracker():
+    """Return the moe layer wise tracker in legacy dict format."""
+    return {
+        name: {
+            "values": entry.values,
+            "reduce_group": entry.reduce_group,
+            "avg_group": entry.avg_group,
+            "needs_dp_avg": entry.needs_dp_avg,
+        }
+        for name, entry in get_moe_metrics_tracker().metrics.items()
+    }
+
+
+@deprecated(
+    version="0.15", removal_version="0.17", alternative="get_moe_metrics_tracker().report()"
+)
 def track_moe_metrics(
     loss_scale: float,
     iteration: int,
@@ -1021,95 +1002,25 @@ def track_moe_metrics(
     moe_layer_freq: Optional[Union[int, List[int]]] = None,
     mtp_num_layers: Optional[int] = None,
     pg_collection: Optional[ProcessGroupCollection] = None,
-) -> None:
+) -> str:
     """Track the MoE metrics for logging.
 
-    Args:
-        loss_scale (float): The loss scale.
-        iteration (int): The iteration.
-        writer (SummaryWriter, optional): The tensorboard writer. Defaults to None.
-        wandb_writer (wandb.Run, optional): The wandb writer. Defaults to None.
-        total_loss_dict (dict[str, torch.Tensor], optional): The total loss dictionary.
-                                                             Defaults to None.
-        per_layer_logging (bool, optional): Whether to log per layer. Defaults to False.
-        force_initialize (bool, optional): Whether to force initialize the tracker.
-                                           Defaults to False.
-        track_names (List[str], optional): The names of the losses to track. Defaults to None.
-        num_layers (int, optional): The number of layers. Defaults to None.
-        moe_layer_freq (Union[int, List[int]], optional): The frequency of the MoE layers.
-                                                          Defaults to None.
-        mtp_num_layers (int, optional): The number of layers in the model parallel group.
-                                        Defaults to None.
-        pg_collection (ProcessGroupCollection, optional): The process group collection.
-                                                          Defaults to None.
+    Deprecated: Use get_moe_metrics_tracker().report() directly.
     """
-    # Aux loss logging
-    tracker = get_moe_layer_wise_logging_tracker()
-    # Initialize the tracker if force_initialize is True.
-    # The values tensor size must match what the router creates in save_to_aux_losses_tracker,
-    # which uses (num_layers + mtp_num_layers). This is important for PP ranks that have no
-    # MoE layers (so the tracker is empty and force_initialize creates the entry); their tensor
-    # size must match ranks that do have MoE layers, otherwise all_reduce across PP will hang.
-    tracker_num_layers = num_layers
-    if mtp_num_layers is not None:
-        tracker_num_layers += mtp_num_layers
-    if force_initialize:
-        if track_names is not None:
-            for key in track_names:
-                if key not in tracker:
-                    tracker[key] = {}
-                    tracker[key]["values"] = torch.zeros(tracker_num_layers, device="cuda")
-                    tracker[key]["reduce_group"] = None
-                    tracker[key]["avg_group"] = None
-                    tracker[key]["reduce_group_has_dp"] = False
-    reduce_aux_losses_tracker_across_ranks(track_names, pg_collection=pg_collection)
-
-    # Get number of MoE layers
-    if moe_layer_freq is None:
-        num_moe_layers = num_layers
-    elif isinstance(moe_layer_freq, int):
-        assert isinstance(num_layers, int)
-        moe_layer_pattern = [1 if (i % moe_layer_freq == 0) else 0 for i in range(num_layers)]
-        num_moe_layers = sum(moe_layer_pattern)
-    elif isinstance(moe_layer_freq, list):
-        num_moe_layers = sum(moe_layer_freq)
-    else:
-        raise ValueError(f"Invalid moe_layer_freq: {moe_layer_freq}")
-
-    if mtp_num_layers is not None:
-        num_moe_layers += mtp_num_layers
-
-    aux_losses = {k: v['values'].float() * loss_scale for k, v in tracker.items()}
-    for name, loss_list in aux_losses.items():
-        if total_loss_dict is not None:
-            if name not in total_loss_dict:
-                total_loss_dict[name] = loss_list.sum() / num_moe_layers
-            else:
-                total_loss_dict[name] += loss_list.sum() / num_moe_layers
-        if writer is not None:
-            # currently when using add_scalars,
-            # torch.utils.add_scalars makes each timer its own run, which
-            # polutes the runs list, so we just add each as a scalar
-            writer.add_scalar(name, loss_list.sum() / num_moe_layers, iteration)
-            if per_layer_logging:
-                for i, loss in enumerate(loss_list.tolist()):
-                    writer.add_scalar(f"moe/{name}_layer_{i}", loss, iteration)
-
-            # W&B logging lacks support for logging multiple scalars simultaneously.
-            # As a workaround, we log each scalar individually first, then we can create
-            # a custom panel to manually group them to a single plot.
-            if wandb_writer:
-                wandb_writer.log({f"{name}": loss_list.sum() / num_moe_layers}, iteration)
-                if per_layer_logging:
-                    wandb_writer.log(
-                        {
-                            f"moe/{name}_layer_{i}": loss
-                            for i, loss in enumerate(loss_list.tolist())
-                        },
-                        iteration,
-                    )
-
-    clear_aux_losses_tracker()
+    return get_moe_metrics_tracker().report(
+        loss_scale=loss_scale,
+        iteration=iteration,
+        writer=writer,
+        wandb_writer=wandb_writer,
+        per_layer_logging=per_layer_logging,
+        force_initialize=force_initialize,
+        track_names=track_names,
+        num_layers=num_layers,
+        moe_layer_freq=moe_layer_freq,
+        mtp_num_layers=mtp_num_layers,
+        pg_collection=pg_collection,
+        total_loss_dict=total_loss_dict,
+    )
 
 
 def get_updated_expert_bias(
@@ -1163,12 +1074,6 @@ def maybe_move_tensor_to_cpu(
     return tensor
 
 
-def get_moe_layer_wise_logging_tracker() -> dict:
-    """Return the moe layer wise tracker."""
-    global _MOE_LAYER_WISE_LOGGING_TRACKER
-    return _MOE_LAYER_WISE_LOGGING_TRACKER
-
-
 @internal_api
 class RandomSTE(torch.autograd.Function):
     """
diff --git a/megatron/core/transformer/moe/router.py b/megatron/core/transformer/moe/router.py
index b468270f50b..c9a2a469531 100644
--- a/megatron/core/transformer/moe/router.py
+++ b/megatron/core/transformer/moe/router.py
@@ -7,6 +7,7 @@
 
 from megatron.core.jit import jit_fuser
 from megatron.core.transformer.module import MegatronModule
+from megatron.core.transformer.moe.moe_logging import get_moe_metrics_tracker
 from megatron.core.transformer.moe.moe_utils import (
     MoEAuxLossAutoScaler,
     ProcessGroupCollection,
@@ -16,7 +17,6 @@
     compute_routing_scores_for_aux_loss,
     get_tokens_per_expert_and_token_count,
     router_gating_linear,
-    save_to_aux_losses_tracker,
     sinkhorn,
     switch_load_balancing_loss_func,
     topk_routing_with_score_function,
@@ -419,7 +419,7 @@ def _apply_global_aux_loss(
             global_aux_loss,
             "global_load_balancing_loss",
             self.tp_dp_cp_group,
-            reduce_group_has_dp=True,
+            needs_dp_avg=False,
             valid_token_count=local_num_tokens,
         )
         return probs
@@ -431,7 +431,7 @@ def attach_and_log_load_balancing_loss(
         aux_loss: torch.Tensor,
         aux_loss_name: str,
         reduce_group: torch.distributed.ProcessGroup,
-        reduce_group_has_dp: bool = False,
+        needs_dp_avg: bool = True,
         valid_token_count: Optional[Union[int, torch.Tensor]] = None,
     ):
         """Attach aux loss function to activation and add to logging.
@@ -442,9 +442,7 @@ def attach_and_log_load_balancing_loss(
             aux_loss (torch.Tensor): Computed aux loss.
             aux_loss_name (str): Name of the aux loss for logging.
             reduce_group (torch.distributed.ProcessGroup): Process group for reduction.
-            reduce_group_has_dp (bool): Whether the reduce group has data parallel ranks.
-                Set this to True if the reduce group has data parallel ranks. This flag is used to
-                ensure the correct reduction in aux loss tracking.
+            needs_dp_avg (bool): Whether to average this metric across DP ranks after reduce_group.
             valid_token_count (int or torch.Tensor, optional): Number of valid tokens excluding
                 padding tokens. Can be a Python int or a torch.Tensor (typically 0-d tensor).
                 If None, uses activation.shape[0]. Defaults to None.
@@ -472,13 +470,13 @@ def attach_and_log_load_balancing_loss(
         else:
             layer_number = self.layer_number
 
-        save_to_aux_losses_tracker(
+        get_moe_metrics_tracker().record(
             aux_loss_name,
             aux_loss / aux_loss_coeff,
             layer_number,
             num_layers,
             reduce_group=reduce_group,
-            reduce_group_has_dp=reduce_group_has_dp,
+            needs_dp_avg=needs_dp_avg,
         )
         if self.calculate_per_token_loss:
             # Scale the aux_loss by the number of tokens.
@@ -545,7 +543,7 @@ def apply_z_loss(self, logits, padding_mask: Optional[torch.Tensor] = None):
             else:
                 layer_number = self.layer_number
 
-            save_to_aux_losses_tracker(
+            get_moe_metrics_tracker().record(
                 "z_loss", z_loss / moe_z_loss_coeff, layer_number, num_layers
             )
         return logits
diff --git a/megatron/training/training.py b/megatron/training/training.py
index 2ee06acf795..1b970d61ed3 100644
--- a/megatron/training/training.py
+++ b/megatron/training/training.py
@@ -145,7 +145,7 @@ def set_startup_timestamps(program_start=None, main_entry=None):
 from megatron.core.datasets.data_schedule import HybridCPDataLoaderWrapper
 from megatron.core.optimizer_param_scheduler import OptimizerParamScheduler
 from megatron.core.transformer.moe import upcycling_utils
-from megatron.core.transformer.moe.moe_utils import track_moe_metrics, clear_aux_losses_tracker
+from megatron.core.transformer.moe.moe_logging import get_moe_metrics_tracker
 from megatron.core.transformer.experimental_attention_variant.dsa import DSAIndexerLossLoggingHelper
 from megatron.core.transformer.multi_token_prediction import MTPLossLoggingHelper
 from megatron.core.parallel_state import (
@@ -2042,8 +2042,8 @@ def training_log(
             writer.add_scalar('max_attention_logit', max_attention_logit, iteration)
             if wandb_writer:
                 wandb_writer.log({'max_attention_logit': max_attention_logit}, iteration)
-
     # Log MoE metrics.
+    moe_log_string = ""
     if args.num_experts is not None:
         moe_loss_scale = 1 / get_num_microbatches()
         track_names = []
@@ -2061,12 +2061,11 @@ def training_log(
         else:
             layers = args.num_layers
 
-        track_moe_metrics(
+        moe_log_string = get_moe_metrics_tracker().report(
             loss_scale=moe_loss_scale,
             iteration=iteration,
             writer=writer,
             wandb_writer=wandb_writer,
-            total_loss_dict=total_loss_dict,
             per_layer_logging=args.moe_per_layer_logging,
             force_initialize=True,
             track_names=track_names,
@@ -2074,6 +2073,7 @@ def training_log(
             moe_layer_freq=args.moe_layer_freq,
             mtp_num_layers=args.mtp_num_layers,
             pg_collection=pg_collection,
+            total_loss_dict=total_loss_dict,
         )
 
     # Log MTP metrics.
@@ -2158,6 +2158,8 @@ def training_log(
                     log_string += ' {}: {:.6E} |'.format(key, avg)
                 if should_reset:
                     total_loss_dict[key] = torch.tensor([0.0], dtype=torch.float, device='cuda')
+        if args.num_experts is not None and moe_log_string:
+            log_string += moe_log_string
         log_string += f' loss scale: {loss_scale:.1f} |'
         if grad_norm is not None:
             log_string += f' grad norm: {grad_norm:.3f} |'
@@ -3094,7 +3096,7 @@ def trace_handler(p):
             if args.log_energy:
                 energy_monitor.resume()
             if args.num_experts is not None:
-                clear_aux_losses_tracker()
+                get_moe_metrics_tracker().clear()
 
         # Miscellaneous post-training-step functions (e.g., FT heartbeats, GC).
         # Some of these only happen at specific iterations. Capture updated FLOPs accumulator
diff --git a/tests/unit_tests/models/test_mamba_moe_model.py b/tests/unit_tests/models/test_mamba_moe_model.py
index 9797f5c20f7..5ecd4e92d80 100644
--- a/tests/unit_tests/models/test_mamba_moe_model.py
+++ b/tests/unit_tests/models/test_mamba_moe_model.py
@@ -16,6 +16,7 @@
 from megatron.core.tensor_parallel.random import model_parallel_cuda_manual_seed
 from megatron.core.transformer import TransformerConfig
 from megatron.core.transformer.enums import AttnBackend
+from megatron.core.transformer.moe.moe_logging import destroy_moe_metrics_tracker
 from megatron.training.arguments import core_transformer_config_from_args, parse_args, validate_args
 from megatron.training.global_vars import (
     destroy_global_vars,
@@ -478,6 +479,7 @@ def create_test_args(self):
     def setup_method(self, method):
 
         os.environ['CUDA_DEVICE_MAX_CONNECTIONS'] = '1'
+        destroy_moe_metrics_tracker()
         args = self.create_test_args()
         set_args(args)
 

From 0b0074e14ca0c4d0f11d36a3ca1ed20ccb7d1198 Mon Sep 17 00:00:00 2001
From: jingqiny-99 <jingqiny@nvidia.com>
Date: Fri, 6 Mar 2026 12:12:52 +0800
Subject: [PATCH 303/334] [dev] feat(mHC): Add basic pytorch implementation of
 manifold hyper connection(mHC). (#2943)

Co-authored-by: Jingqin Yang <jingqiny@login-eos01.eos.clusters.nvidia.com>
Co-authored-by: root <root@eos0478.eos.clusters.nvidia.com>
Co-authored-by: Dennis Liu <denliu@nvidia.com>
---
 gpt_builders.py                               |    2 +
 megatron/core/fusions/fused_bias_dropout.py   |   93 +-
 ...rimental_attention_variant_module_specs.py |   10 +-
 megatron/core/models/gpt/gpt_layer_specs.py   |   51 +-
 megatron/core/pipeline_parallel/schedules.py  |   48 +-
 megatron/core/tensor_parallel/random.py       |  163 ++-
 megatron/core/transformer/__init__.py         |    8 +-
 megatron/core/transformer/cuda_graphs.py      |    3 +-
 megatron/core/transformer/hyper_connection.py |  696 ++++++++++
 .../core/transformer/transformer_block.py     |   85 +-
 .../core/transformer/transformer_config.py    |   84 +-
 .../core/transformer/transformer_layer.py     |  385 +++++-
 megatron/training/initialize.py               |    8 +-
 .../golden_values_dev_dgx_h100.json           |  287 +++++
 .../model_config.yaml                         |   62 +
 tests/test_utils/recipes/h100/gpt.yaml        |    5 +
 .../unit_tests/models/test_gpt_layer_specs.py |   67 +
 .../unit_tests/models/test_mamba_moe_model.py |    7 +-
 .../test_pp_mhc_compatibility.py              | 1123 +++++++++++++++++
 tests/unit_tests/test_fp8_param.py            |    8 +-
 .../test_hyper_connection_recompute.py        |  408 ++++++
 .../transformer/test_mhc_block_manager.py     |  397 ++++++
 .../transformer/test_transformer_layer.py     |  786 +++++++++++-
 23 files changed, 4733 insertions(+), 53 deletions(-)
 create mode 100644 megatron/core/transformer/hyper_connection.py
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mhc/golden_values_dev_dgx_h100.json
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mhc/model_config.yaml
 create mode 100644 tests/unit_tests/models/test_gpt_layer_specs.py
 create mode 100644 tests/unit_tests/pipeline_parallel/test_pp_mhc_compatibility.py
 create mode 100644 tests/unit_tests/transformer/test_hyper_connection_recompute.py
 create mode 100644 tests/unit_tests/transformer/test_mhc_block_manager.py

diff --git a/gpt_builders.py b/gpt_builders.py
index 0be64edaab6..6711cce356f 100644
--- a/gpt_builders.py
+++ b/gpt_builders.py
@@ -146,6 +146,7 @@ def _get_transformer_layer_spec(use_te, config):
             use_kitchen_attention=config.use_kitchen_attention,
             kitchen_attention_backend=config.kitchen_attention_backend,
             fallback_to_eager_attn=config.fallback_to_eager_attn,
+            enable_hyper_connection=config.enable_hyper_connections,
         )
     elif config.transformer_impl == "inference_optimized":
         return get_gpt_layer_with_inference_spec(
@@ -165,4 +166,5 @@ def _get_transformer_layer_spec(use_te, config):
             use_kitchen=config.use_kitchen,
             use_kitchen_attention=config.use_kitchen_attention,
             kitchen_attention_backend=config.kitchen_attention_backend,
+            enable_hyper_connection=config.enable_hyper_connections,
         )
diff --git a/megatron/core/fusions/fused_bias_dropout.py b/megatron/core/fusions/fused_bias_dropout.py
index 2eb4007f75c..1f2448d86be 100644
--- a/megatron/core/fusions/fused_bias_dropout.py
+++ b/megatron/core/fusions/fused_bias_dropout.py
@@ -1,10 +1,13 @@
-# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
-from typing import Optional, Tuple
+# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+from typing import TYPE_CHECKING, Optional, Tuple
 
 import torch
 
 from megatron.core.jit import jit_fuser
 
+if TYPE_CHECKING:
+    from megatron.core.tensor_parallel.random import CheckpointManager
+
 # pylint: disable=missing-function-docstring
 
 
@@ -80,7 +83,26 @@ def bias_dropout_add_fused_inference(
     return _bias_dropout_add_func(x_with_bias, residual, prob, False)
 
 
-def get_bias_dropout_add(training, fused):
+def get_bias_dropout_add(
+    training, fused, mhc_recompute_manager: Optional['CheckpointManager'] = None
+):
+    """
+    Get the bias-dropout-add function.
+
+    Args:
+        training: Whether in training mode.
+        fused: Whether to use fused implementation.
+        mhc_recompute_manager: Optional CheckpointManager for checkpoint management.
+            When provided, the returned function will wrap the BDA operation with
+            CheckpointWithoutOutput for memory-efficient recomputation.
+
+    Returns:
+        A callable that performs bias-dropout-add operation.
+    """
+    if mhc_recompute_manager is not None:
+        # Return a checkpointed version that handles tuple unpacking internally
+        return _get_checkpointed_bda(training, fused, mhc_recompute_manager)
+
     if fused:
         # jit scripting for a nn.module (with dropout) is not
         # triggering the fusion kernel. For now, we use two
@@ -92,3 +114,68 @@ def get_bias_dropout_add(training, fused):
             return bias_dropout_add_fused_inference
     else:
         return bias_dropout_add_unfused(training)
+
+
+def _get_checkpointed_bda(training, fused, mhc_recompute_manager: 'CheckpointManager'):
+    """
+    Create a checkpointed bias-dropout-add function.
+
+    This function handles:
+    1. Tuple unpacking for x_with_bias (required because save_for_backward can't save tuples)
+    2. Non-tensor arguments like dropout probability (handled by CheckpointWithoutOutput)
+    3. Auto-registration to the CheckpointManager
+
+    Args:
+        training: Whether in training mode.
+        fused: Whether to use fused implementation.
+        mhc_recompute_manager: CheckpointManager for checkpoint management.
+
+    Returns:
+        A callable that performs checkpointed bias-dropout-add operation.
+    """
+    from megatron.core.tensor_parallel.random import CheckpointWithoutOutput
+
+    # Get the underlying BDA function
+    if fused:
+        if training:
+            bda_func = bias_dropout_add_fused_train
+        else:
+            bda_func = bias_dropout_add_fused_inference
+    else:
+        bda_func = bias_dropout_add_unfused(training)
+
+    def _checkpointed_bda(x_with_bias, residual, prob):
+        """
+        Checkpointed BDA that handles tuple unpacking internally.
+
+        Args:
+            x_with_bias: Either a tuple (x, bias) or a single tensor x.
+            residual: Residual tensor.
+            prob: Dropout probability.
+
+        Returns:
+            Output tensor after bias-dropout-add.
+        """
+        # Create checkpoint with manager
+        ckpt = CheckpointWithoutOutput(ckpt_manager=mhc_recompute_manager)
+
+        # Handle case where x_with_bias might be a single tensor (e.g., from IdentityOp)
+        if isinstance(x_with_bias, tuple):
+            x, bias = x_with_bias
+        else:
+            x = x_with_bias
+            bias = None
+
+        # Wrapper function that re-packs the tuple for the actual BDA function
+        def _bda_wrapper(output, bias, res, dropout):
+            return bda_func((output, bias), res, dropout)
+
+        # Call checkpoint with unpacked arguments
+        result = ckpt.checkpoint(_bda_wrapper, x, bias, residual, prob)
+
+        # No-op when manager is set - manager handles all discarding uniformly
+        ckpt.discard_output_and_register_recompute(result)
+
+        return result
+
+    return _checkpointed_bda
diff --git a/megatron/core/models/gpt/experimental_attention_variant_module_specs.py b/megatron/core/models/gpt/experimental_attention_variant_module_specs.py
index 3051cf6e960..6222291449e 100644
--- a/megatron/core/models/gpt/experimental_attention_variant_module_specs.py
+++ b/megatron/core/models/gpt/experimental_attention_variant_module_specs.py
@@ -12,6 +12,7 @@
     DSAttention,
     DSAttentionSubmodules,
 )
+from megatron.core.transformer.hyper_connection import HyperConnectionModule
 from megatron.core.transformer.identity_op import IdentityOp
 from megatron.core.transformer.multi_latent_attention import (
     MLASelfAttention,
@@ -24,6 +25,7 @@
 )
 from megatron.core.transformer.transformer_config import TransformerConfig
 from megatron.core.transformer.transformer_layer import (
+    HyperConnectionTransformerLayer,
     TransformerLayer,
     TransformerLayerSubmodules,
     get_transformer_layer_offset,
@@ -221,6 +223,10 @@ def get_transformer_layer_with_experimental_attention_variant_spec(
 
     # Get GPT decoder block layer specs
     rms_norm = config.normalization == "RMSNorm"
+    enable_hc = config.enable_hyper_connections
+    hc_module = HyperConnectionModule if enable_hc else IdentityOp
+    layer_module = HyperConnectionTransformerLayer if enable_hc else TransformerLayer
+
     layer_specs = []
     for layer_number in range(config.num_layers):
         attention = (
@@ -242,14 +248,16 @@ def get_transformer_layer_with_experimental_attention_variant_spec(
 
         layer_specs.append(
             ModuleSpec(
-                module=TransformerLayer,
+                module=layer_module,
                 submodules=TransformerLayerSubmodules(
                     input_layernorm=input_layernorm,
                     self_attention=attention,
                     self_attn_bda=get_bias_dropout_add,
+                    self_attention_hyper_connection=hc_module,
                     pre_mlp_layernorm=pre_mlp_layernorm,
                     mlp=mlp,
                     mlp_bda=get_bias_dropout_add,
+                    mlp_hyper_connection=hc_module,
                 ),
             )
         )
diff --git a/megatron/core/models/gpt/gpt_layer_specs.py b/megatron/core/models/gpt/gpt_layer_specs.py
index c52fc2bd7c5..1238f0a7601 100755
--- a/megatron/core/models/gpt/gpt_layer_specs.py
+++ b/megatron/core/models/gpt/gpt_layer_specs.py
@@ -1,4 +1,5 @@
-# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+import copy
 import warnings
 from typing import Optional, Union
 
@@ -11,6 +12,7 @@
 from megatron.core.models.gpt.moe_module_specs import get_moe_module_spec_for_backend
 from megatron.core.transformer.attention import SelfAttention, SelfAttentionSubmodules
 from megatron.core.transformer.enums import AttnMaskType, LayerType
+from megatron.core.transformer.hyper_connection import HyperConnectionModule
 from megatron.core.transformer.identity_op import IdentityOp
 from megatron.core.transformer.mlp import MLP, MLPSubmodules
 from megatron.core.transformer.multi_latent_attention import (
@@ -32,6 +34,7 @@
 )
 from megatron.core.transformer.transformer_config import TransformerConfig
 from megatron.core.transformer.transformer_layer import (
+    HyperConnectionTransformerLayer,
     TransformerLayer,
     TransformerLayerSubmodules,
     get_transformer_layer_offset,
@@ -184,6 +187,7 @@ def get_gpt_layer_with_transformer_engine_submodules(
     fallback_to_eager_attn: bool = False,
     use_kitchen_attention: bool = False,
     kitchen_attention_backend: str = "sdpa",
+    enable_hyper_connection: bool = False,
 ) -> TransformerLayerSubmodules:
     """Use these submodules to use lower-level Transformer Engine modules (required for fp8
     training).
@@ -200,6 +204,8 @@ def get_gpt_layer_with_transformer_engine_submodules(
         qk_l2_norm (bool, optional): To use l2 norm for queries/keys. Defaults to False.
         use_te_op_fuser (bool, optional): Use Transformer Engine's operation-based API, which may
                                           enable certain operation fusions. Defaults to False.
+        enable_hyper_connection (bool): Use HyperConnectionTransformerLayer with
+            HyperConnectionModule instead of plain TransformerLayer. Defaults to False.
 
     Returns:
         TransformerLayerSubmodules: TE modules to construct a TransformerLayer
@@ -234,6 +240,8 @@ def get_gpt_layer_with_transformer_engine_submodules(
         use_te_activation_func=use_te_activation_func,
     )
 
+    hc_module = HyperConnectionModule if enable_hyper_connection else IdentityOp
+
     if multi_latent_attention:
         assert qk_l2_norm is False, "qk_l2_norm is not supported with MLA."
         linear_q_up_proj = (
@@ -264,9 +272,11 @@ def get_gpt_layer_with_transformer_engine_submodules(
                 ),
             ),
             self_attn_bda=get_bias_dropout_add,
+            self_attention_hyper_connection=hc_module,
             pre_mlp_layernorm=backend.layer_norm() if num_experts else IdentityOp,
             mlp=mlp,
             mlp_bda=get_bias_dropout_add,
+            mlp_hyper_connection=hc_module,
         )
     else:
         qk_norm = backend.layer_norm(for_qk=True)
@@ -287,9 +297,11 @@ def get_gpt_layer_with_transformer_engine_submodules(
                 ),
             ),
             self_attn_bda=get_bias_dropout_add,
+            self_attention_hyper_connection=hc_module,
             pre_mlp_layernorm=backend.layer_norm() if num_experts else IdentityOp,
             mlp=mlp,
             mlp_bda=get_bias_dropout_add,
+            mlp_hyper_connection=hc_module,
             sharded_state_dict_keys_map={
                 "mlp.0.weight": "mlp.linear_fc1.layer_norm_weight",
                 "mlp.0.bias": "mlp.linear_fc1.layer_norm_bias",
@@ -304,8 +316,10 @@ def get_gpt_layer_with_transformer_engine_submodules(
 @copy_signature(get_gpt_layer_with_transformer_engine_submodules)
 def get_gpt_layer_with_transformer_engine_spec(*args, **kwargs) -> ModuleSpec:
     """Use this spec to use lower-level Transformer Engine modules (required for fp8 training)."""
+    enable_hc = kwargs.get('enable_hyper_connection', False)
+    layer_module = HyperConnectionTransformerLayer if enable_hc else TransformerLayer
     return ModuleSpec(
-        module=TransformerLayer,
+        module=layer_module,
         submodules=get_gpt_layer_with_transformer_engine_submodules(*args, **kwargs),
     )
 
@@ -322,6 +336,7 @@ def get_gpt_layer_local_submodules(
     use_kitchen: bool = False,
     use_kitchen_attention: bool = False,
     kitchen_attention_backend: str = "sdpa",
+    enable_hyper_connection: bool = False,
 ) -> TransformerLayerSubmodules:
     """Use these submodules for an implementation using only modules in Megatron-Core.
 
@@ -335,6 +350,8 @@ def get_gpt_layer_local_submodules(
         moe_use_legacy_grouped_gemm (bool, optional): Force use the legacy GroupedMLP.
                                                       Defaults to False.
         qk_l2_norm (bool, optional): To use l2 norm for queries/keys. Defaults to False.
+        enable_hyper_connection (bool): Use HyperConnectionTransformerLayer with
+            HyperConnectionModule instead of plain TransformerLayer. Defaults to False.
 
     Returns:
         TransformerLayerSubmodules: Megatron-Core modules to construct a TransformerLayer
@@ -370,6 +387,8 @@ def get_gpt_layer_local_submodules(
         moe_use_legacy_grouped_gemm=moe_use_legacy_grouped_gemm,
     )
 
+    hc_module = HyperConnectionModule if enable_hyper_connection else IdentityOp
+
     if multi_latent_attention:
         assert qk_l2_norm is False, "qk_l2_norm is not supported with MLA."
         return TransformerLayerSubmodules(
@@ -390,9 +409,11 @@ def get_gpt_layer_local_submodules(
                 ),
             ),
             self_attn_bda=get_bias_dropout_add,
+            self_attention_hyper_connection=hc_module,
             pre_mlp_layernorm=layer_norm,
             mlp=mlp,
             mlp_bda=get_bias_dropout_add,
+            mlp_hyper_connection=hc_module,
         )
     else:
         return TransformerLayerSubmodules(
@@ -413,9 +434,11 @@ def get_gpt_layer_local_submodules(
                 ),
             ),
             self_attn_bda=get_bias_dropout_add,
+            self_attention_hyper_connection=hc_module,
             pre_mlp_layernorm=layer_norm,
             mlp=mlp,
             mlp_bda=get_bias_dropout_add,
+            mlp_hyper_connection=hc_module,
             sharded_state_dict_keys_map={
                 "input_layernorm.": "self_attention.linear_qkv.layer_norm_",
                 "pre_mlp_layernorm.": "mlp.linear_fc1.layer_norm_",
@@ -426,8 +449,10 @@ def get_gpt_layer_local_submodules(
 @copy_signature(get_gpt_layer_local_submodules)
 def get_gpt_layer_local_spec(*args, **kwargs) -> ModuleSpec:
     """Use this spec for an implementation using only modules in Megatron-Core."""
+    enable_hc = kwargs.get('enable_hyper_connection', False)
+    layer_module = HyperConnectionTransformerLayer if enable_hc else TransformerLayer
     return ModuleSpec(
-        module=TransformerLayer, submodules=get_gpt_layer_local_submodules(*args, **kwargs)
+        module=layer_module, submodules=get_gpt_layer_local_submodules(*args, **kwargs)
     )
 
 
@@ -545,6 +570,7 @@ def get_gpt_decoder_layer_specs(
             qk_l2_norm=qk_l2_norm,
             use_kitchen=config.use_kitchen,
             use_te_activation_func=config.use_te_activation_func,
+            enable_hyper_connection=config.enable_hyper_connections,
         )
         moe_layer_spec = get_gpt_layer_with_transformer_engine_spec(
             num_experts=config.num_moe_experts,
@@ -555,6 +581,7 @@ def get_gpt_decoder_layer_specs(
             qk_l2_norm=qk_l2_norm,
             use_kitchen=config.use_kitchen,
             use_te_activation_func=config.use_te_activation_func,
+            enable_hyper_connection=config.enable_hyper_connections,
         )
     else:
         dense_layer_spec = get_gpt_layer_local_spec(
@@ -566,6 +593,7 @@ def get_gpt_decoder_layer_specs(
             normalization=normalization,
             qk_l2_norm=qk_l2_norm,
             use_kitchen=config.use_kitchen,
+            enable_hyper_connection=config.enable_hyper_connections,
         )
         moe_layer_spec = get_gpt_layer_local_spec(
             num_experts=config.num_moe_experts,
@@ -576,6 +604,7 @@ def get_gpt_decoder_layer_specs(
             normalization=normalization,
             qk_l2_norm=qk_l2_norm,
             use_kitchen=config.use_kitchen,
+            enable_hyper_connection=config.enable_hyper_connections,
         )
 
     # Parse config.moe_layer_freq to determine the pattern of expert/dense layers.
@@ -697,12 +726,22 @@ def get_gpt_mtp_block_spec_for_backend(
 
     if isinstance(spec, TransformerBlockSubmodules):
         # get the spec for the last layer of decoder block
-        transformer_layer_spec = spec.layer_specs[-1]
-    elif isinstance(spec, ModuleSpec) and spec.module == TransformerLayer:
-        transformer_layer_spec = spec
+        transformer_layer_spec = copy.copy(spec.layer_specs[-1])
+    elif isinstance(spec, ModuleSpec) and issubclass(spec.module, TransformerLayer):
+        transformer_layer_spec = copy.copy(spec)
     else:
         raise ValueError(f"Invalid spec: {spec}")
 
+    transformer_layer_spec.submodules = copy.copy(transformer_layer_spec.submodules)
+
+    # MTP does not support hyper connections yet; strip HC modules and
+    # downgrade the layer class to plain TransformerLayer.
+    transformer_layer_spec.submodules.self_attention_hyper_connection = IdentityOp
+    transformer_layer_spec.submodules.cross_attention_hyper_connection = IdentityOp
+    transformer_layer_spec.submodules.mlp_hyper_connection = IdentityOp
+    if transformer_layer_spec.module is HyperConnectionTransformerLayer:
+        transformer_layer_spec.module = TransformerLayer
+
     mtp_layer_spec = get_mtp_layer_spec_for_backend(
         mtp_model_layer_spec=transformer_layer_spec, backend=backend
     )
diff --git a/megatron/core/pipeline_parallel/schedules.py b/megatron/core/pipeline_parallel/schedules.py
index e903f392bf0..6dd5e7de02a 100644
--- a/megatron/core/pipeline_parallel/schedules.py
+++ b/megatron/core/pipeline_parallel/schedules.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 import contextlib
 from functools import partial
@@ -1147,7 +1147,15 @@ def enable_grad_sync():
 
     model_type = get_model_type(model[0])
 
-    tensor_shape = [seq_length, micro_batch_size, config.hidden_size]
+    # Determine hidden dimension for P2P communication
+    # For hyper connections with multiple PP stages, use n-stream dimension
+    hidden_dim = config.hidden_size
+    if getattr(config, 'enable_hyper_connections', False) and pipeline_parallel_size > 1:
+        # For interleaved PP with hyper connections, all intermediate communications use n-stream
+        # Note: This is a simplified approach - proper VPP support may need more complex logic
+        hidden_dim = config.hidden_size * getattr(config, 'num_residual_streams', 1)
+
+    tensor_shape = [seq_length, micro_batch_size, hidden_dim]
     tensor_shape[0] = tensor_shape[0] // cp_group.size()
     if config.sequence_parallel:
         tensor_shape[0] = tensor_shape[0] // tp_group.size()
@@ -2082,10 +2090,20 @@ def get_tensor_shapes(
     config,
     tp_group: torch.distributed.ProcessGroup,
     cp_group: torch.distributed.ProcessGroup,
+    pp_group: torch.distributed.ProcessGroup = None,
+    is_recv: bool = True,
 ):
     """
     Determine right tensor sizes (based on position of rank with respect to split rank) and
     model size.
+
+    For hyper connections (mHC), intermediate pipeline stages communicate n-stream tensors
+    with dimension hidden_size * num_residual_streams.
+
+    Args:
+        is_recv: If True, compute shape for receiving; if False, for sending.
+                 This matters for hyper connections where first/last stages have different
+                 send/recv dimensions.
     """
 
     tensor_shapes = []
@@ -2096,7 +2114,27 @@ def get_tensor_shapes(
     if config.sequence_parallel:
         effective_seq_length = effective_seq_length // tp_group.size()
 
-    tensor_shapes.append((effective_seq_length, micro_batch_size, config.hidden_size))
+    # Determine hidden dimension based on hyper connections and pipeline stage
+    hidden_size = config.hidden_size
+    # TODO: make this more robust, including flexible VPP layout
+    if getattr(config, 'enable_hyper_connections', False) and pp_group is not None:
+        pp_rank = pp_group.rank()
+        pp_size = pp_group.size()
+        # For hyper connections:
+        # - recv: stages with rank > 0 receive n-stream (n*C) from previous stage
+        # - send: stages with rank < pp_size-1 send n-stream (n*C) to next stage
+        use_nstream = False
+        if is_recv and pp_rank > 0:
+            # Receiving from previous stage (which sends n*C)
+            use_nstream = True
+        elif not is_recv and pp_rank < pp_size - 1:
+            # Sending to next stage (send n*C)
+            use_nstream = True
+
+        if use_nstream:
+            hidden_size = hidden_size * getattr(config, 'num_residual_streams', 1)
+
+    tensor_shapes.append((effective_seq_length, micro_batch_size, hidden_size))
     return tensor_shapes
 
 
@@ -2245,6 +2283,8 @@ def enable_grad_sync():
         config=config,
         tp_group=tp_group,
         cp_group=cp_group,
+        pp_group=p2p_communicator.pp_group,
+        is_recv=True,
     )
     send_tensor_shapes = get_tensor_shapes(
         seq_length=seq_length,
@@ -2253,6 +2293,8 @@ def enable_grad_sync():
         config=config,
         tp_group=tp_group,
         cp_group=cp_group,
+        pp_group=p2p_communicator.pp_group,
+        is_recv=False,
     )
     if adjust_tensor_shapes_fn is not None:
         recv_tensor_shapes, send_tensor_shapes = adjust_tensor_shapes_fn(
diff --git a/megatron/core/tensor_parallel/random.py b/megatron/core/tensor_parallel/random.py
index b6932607f2e..4516fe10d88 100644
--- a/megatron/core/tensor_parallel/random.py
+++ b/megatron/core/tensor_parallel/random.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 # Parts of the code here are adapted from PyTorch
 # repo: https://github.com/pytorch/pytorch
@@ -598,7 +598,9 @@ def forward(
     @staticmethod
     def backward(ctx, *args):
         """Backward pass."""
-        if not torch.autograd._is_checkpoint_valid():
+        from megatron.core.transformer.cuda_graphs import is_graph_capturing
+
+        if not torch.autograd._is_checkpoint_valid() and not is_graph_capturing():
             raise RuntimeError(
                 "Checkpointing is not compatible with .grad(), "
                 "please use .backward() if possible"
@@ -642,10 +644,67 @@ def checkpoint(
     return CheckpointFunction.apply(function, distribute_saved_activations, *args)
 
 
+def _save_args_to_ctx(ctx, args):
+    """Save mixed tensor/non-tensor arguments into autograd ctx.
+
+    Since save_for_backward only supports tensors, this function separates
+    tensor and non-tensor arguments, saving tensors via save_for_backward
+    and storing non-tensor metadata (indices and values) as ctx attributes.
+
+    Use _load_args_from_ctx to reconstruct the original args.
+    """
+    tensor_args = []
+    non_tensor_entries = []
+
+    for index, arg in enumerate(args):
+        if isinstance(arg, torch.Tensor):
+            tensor_args.append(arg)
+            continue
+        non_tensor_entries.append((index, arg))
+
+    ctx.save_for_backward(*detach_variable(tuple(tensor_args)))
+    ctx._non_tensor_entries = tuple(non_tensor_entries)
+    ctx._total_args_count = len(args)
+
+
+def _load_args_from_ctx(ctx):
+    """Load and reconstruct mixed tensor/non-tensor arguments from autograd ctx.
+
+    This is the inverse of _save_args_to_ctx. It retrieves tensors from
+    ctx.saved_tensors and merges them with stored non-tensor arguments
+    to reconstruct the original args in their original order.
+
+    Returns:
+        tuple of reconstructed arguments in their original order.
+    """
+
+    def _detach_with_grad(tensor):
+        detached = tensor.detach()
+        detached.requires_grad_(tensor.requires_grad)
+        return detached
+
+    tensor_iter = iter(_detach_with_grad(t) for t in ctx.saved_tensors)
+    total_args_count = ctx._total_args_count
+    non_tensor_map = dict(ctx._non_tensor_entries)
+
+    reconstructed_args = []
+    for index in range(total_args_count):
+        if index in non_tensor_map:
+            reconstructed_args.append(non_tensor_map[index])
+        else:
+            reconstructed_args.append(next(tensor_iter))
+    return tuple(reconstructed_args)
+
+
 class CheckpointWithoutOutputFunction(torch.autograd.Function):
     """
     Checkpoint Function Helper for CheckpointWithoutOutput.
     Save context for recompute.
+
+    Handles both tensor and non-tensor arguments:
+    - Tensor arguments are saved via save_for_backward
+    - Non-tensor arguments (int, float, bool, None, etc.) are stored separately
+      in ctx attributes and reconstructed during recomputation
     """
 
     @staticmethod
@@ -668,7 +727,10 @@ def forward(
 
         with torch.no_grad(), fwd_ctx:
             outputs = run_function(*args)
-        ctx.save_for_backward(*detach_variable(args))
+
+        # Save tensor and non-tensor arguments into ctx for recomputation
+        _save_args_to_ctx(ctx, args)
+
         # the CheckpointWithoutOutput object is passed in, then it can access the saved input
         # tensors later for recomputation
         checkpoint_without_output_obj.ctx = ctx
@@ -685,10 +747,56 @@ def backward(ctx, *args):
         torch.autograd.backward(outputs, args)
         ctx.outputs = None
         ctx.inputs = None
-        grads = tuple(inp.grad if isinstance(inp, torch.Tensor) else inp for inp in inputs)
+        grads = tuple(inp.grad if isinstance(inp, torch.Tensor) else None for inp in inputs)
         return (None, None) + grads
 
 
+class CheckpointManager:
+    """
+    Manages multiple CheckpointWithoutOutput objects within a TransformerBlock
+    cross layer recomputations, enabling unified recomputation during backward pass.
+    This is particularly useful for scenarios where multiple checkpoint operations have
+    sequential dependencies (i.e., the output of one checkpoint is the input of the next).
+
+    Usage:
+        ckptManager = CheckpointManager()
+        ckpt_function = CheckpointWithoutOutput(ckpt_manager=ckptManager)
+        ckpt_function.checkpoint(run_function, *args)
+        # other checkpointed operations
+        ckpt_manager.discard_all_outputs_and_register_unified_recompute(final_output)
+    """
+
+    def __init__(self):
+        self.checkpoints = []
+        # Set by TransformerBlock before each layer forward.
+        # When True, the layer should keep block-boundary output uncheckpointed.
+        self.is_last_layer_in_recompute_block = False
+
+    def add_checkpoint(self, ckpt):
+        """Add a checkpoint to the manager."""
+        if not isinstance(ckpt, CheckpointWithoutOutput):
+            raise TypeError("Expected CheckpointWithoutOutput object")
+        if ckpt.outputs is None:
+            raise ValueError("CheckpointWithoutOutput must call checkpoint() before adding")
+        self.checkpoints.append(ckpt)
+
+    def discard_all_outputs_and_register_unified_recompute(self, hook_tensor):
+        """Discard all checkpoint outputs to save memory and register unified recompute hook."""
+        for ckpt in self.checkpoints:
+            for output in ckpt.outputs:
+                output.untyped_storage().resize_(0)
+
+        # Register unified recompute hook
+        if hook_tensor.requires_grad:
+            hook_tensor.register_hook(self._unified_recompute_hook)
+
+    def _unified_recompute_hook(self, grad_output):
+        for ckpt in self.checkpoints:
+            # Call _recompute for each checkpoint in forward order
+            # The _recompute method will restore the output tensor storage
+            ckpt._recompute(None)
+
+
 class CheckpointWithoutOutput(object):
     """
     Checkpoint a model or part of the model and release the output.
@@ -703,8 +811,19 @@ class CheckpointWithoutOutput(object):
     discarded output tensors are directly saved in the following modules for backward computation.
     """
 
-    def __init__(self, fp8=False):
-        self.fp8 = fp8 is not None
+    def __init__(self, fp8=False, ckpt_manager=None):
+        """
+        Initialize CheckpointWithoutOutput.
+
+        Args:
+            fp8: Whether to use FP8 mode. Defaults to False.
+            ckpt_manager: Optional CheckpointManager instance. When provided,
+                         checkpoint() will auto-register to the manager, and
+                         discard_output_and_register_recompute() will only discard
+                         output without registering individual hooks.
+        """
+        self.fp8 = bool(fp8)
+        self.ckpt_manager = ckpt_manager
         self.run_function = None
         self.fwd_cpu_rng_state = None
         self.fwd_cuda_rng_state = None
@@ -713,7 +832,12 @@ def __init__(self, fp8=False):
         self.outputs = None
 
     def checkpoint(self, run_function: Callable[[Unpack[_Ts]], _R], *args: Unpack[_Ts]) -> _R:
-        """Checkpoint function."""
+        """
+        Checkpoint function.
+
+        If ckpt_manager was provided during initialization, this checkpoint
+        will be automatically registered to the manager after execution.
+        """
 
         # If in cuda graph warmup, disable checkpointing, as 'discard_output_and_register_recompute'
         # may be called in a separate graph warmup.
@@ -730,6 +854,11 @@ def checkpoint(self, run_function: Callable[[Unpack[_Ts]], _R], *args: Unpack[_T
         self.outputs = outputs
         if isinstance(self.outputs, torch.Tensor):
             self.outputs = (self.outputs,)
+
+        # Auto-register to manager if provided
+        if self.ckpt_manager is not None:
+            self.ckpt_manager.add_checkpoint(self)
+
         return outputs
 
     def _recompute(self, _):
@@ -738,7 +867,7 @@ def _recompute(self, _):
         from megatron.core.transformer.cuda_graphs import is_graph_capturing, is_graph_warmup
 
         # The recomputation has been triggered already. Just return.
-        # Handle cudagraphs, do nothing if currently in graph warmup
+        # Handle cudagraphs: do nothing if currently in graph warmup
         if self.ctx is None or is_graph_warmup():
             return
 
@@ -760,17 +889,8 @@ def _recompute(self, _):
                 recompute_ctx = contextlib.nullcontext()
                 fp8_ctx = contextlib.nullcontext()
 
-            # Store the inputs for backward pass
-            inputs = self.ctx.saved_tensors
-
-            def detach(t):
-                if isinstance(t, torch.Tensor):
-                    requires_grad = t.requires_grad
-                    t = t.detach()
-                    t.requires_grad_(requires_grad)
-                return t
-
-            inputs = tuple(detach(t) for t in inputs)
+            # Reconstruct full args list from saved ctx
+            inputs = _load_args_from_ctx(self.ctx)
             with torch.enable_grad(), fp8_ctx, recompute_ctx:
                 outputs = self.run_function(*inputs)
 
@@ -803,10 +923,11 @@ def discard_output_and_register_recompute(self, hook_tensor):
         in the forward pass and the gradient of the hook_tensor is computed before the recomputed
         tensors are used.
         """
-
+        # When ckpt_manager is set, this is a no-op.
+        # Manager handles all discarding and hook registration uniformly.
         from megatron.core.transformer.cuda_graphs import is_graph_warmup
 
-        if is_graph_warmup():
+        if self.ckpt_manager is not None or is_graph_warmup():
             return
 
         # use resize to release the output tensor memory and still keep the metadata in the tensors.
diff --git a/megatron/core/transformer/__init__.py b/megatron/core/transformer/__init__.py
index 0e3cdcfa57e..75e3b485c4f 100644
--- a/megatron/core/transformer/__init__.py
+++ b/megatron/core/transformer/__init__.py
@@ -1,6 +1,10 @@
-# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 from .module import MegatronModule
 from .spec_utils import ModuleSpec, build_module
 from .transformer_config import MLATransformerConfig, TransformerConfig
-from .transformer_layer import TransformerLayer, TransformerLayerSubmodules
+from .transformer_layer import (
+    HyperConnectionTransformerLayer,
+    TransformerLayer,
+    TransformerLayerSubmodules,
+)
diff --git a/megatron/core/transformer/cuda_graphs.py b/megatron/core/transformer/cuda_graphs.py
index f7b2bc79cab..0f7341f253e 100644
--- a/megatron/core/transformer/cuda_graphs.py
+++ b/megatron/core/transformer/cuda_graphs.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 import dataclasses
 import gc
@@ -112,6 +112,7 @@ def _set_warmup_start():
 def _set_warmup_end():
     """Set graph warmup has ended."""
     global _IS_GRAPH_WARMUP
+    _IS_GRAPH_WARMUP = False
 
 
 @dataclass
diff --git a/megatron/core/transformer/hyper_connection.py b/megatron/core/transformer/hyper_connection.py
new file mode 100644
index 00000000000..5ccbd70c340
--- /dev/null
+++ b/megatron/core/transformer/hyper_connection.py
@@ -0,0 +1,696 @@
+# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+import math
+from typing import TYPE_CHECKING, Optional, Tuple
+
+import torch
+import torch.nn as nn
+from torch import Tensor
+
+from megatron.core.transformer.module import MegatronModule
+from megatron.core.transformer.transformer_config import TransformerConfig
+from megatron.core.utils import nvtx_decorator
+
+if TYPE_CHECKING:
+    from megatron.core.tensor_parallel.random import CheckpointManager
+
+
+class SinkhornKnopp(torch.autograd.Function):
+    """
+    Differentiable Sinkhorn-Knopp algorithm for doubly stochastic projection.
+
+    Projects a positive matrix onto the Birkhoff polytope (doubly stochastic matrices)
+    via iterative row and column normalization.
+
+    Reference: Eq. (9) in mHC paper - M^{(t)} = T_c(T_r(M^{(t-1)}))
+    """
+
+    eps = 1e-6
+
+    @staticmethod
+    def _sinkhorn_normalize(M: Tensor, num_iterations: int) -> Tensor:
+        """
+        Apply Sinkhorn-Knopp normalization iterations.
+
+        Iteratively applies row and column normalization to project M
+        onto the Birkhoff polytope (doubly stochastic matrices).
+
+        Args:
+            M: [s, b, n, n] - positive matrix to normalize
+            num_iterations: Number of Sinkhorn iterations
+
+        Returns:
+            M: [s, b, n, n] - doubly stochastic matrix
+        """
+        for _ in range(num_iterations):
+            # T_r: Row normalization
+            M = M / M.sum(dim=-1, keepdim=True).clamp(min=SinkhornKnopp.eps)
+            # T_c: Column normalization
+            M = M / M.sum(dim=-2, keepdim=True).clamp(min=SinkhornKnopp.eps)
+        return M
+
+    @staticmethod
+    def forward(ctx, H_res_logits: Tensor, num_iterations: int) -> Tensor:
+        """
+        Project to doubly stochastic matrix via iterative row/col normalization.
+
+        Args:
+            H_res_logits: [s, b, n, n] - raw logits for residual mixing matrix
+            num_iterations: Number of Sinkhorn iterations (paper uses 20)
+
+        Returns:
+            H_res: [s, b, n, n] - doubly stochastic matrix
+        """
+        # Gradients are computed explicitly in backward via recomputation.
+        # Stabilized exp: subtract row-wise max to prevent overflow (log-sum-exp trick)
+        # M^{(0)} = exp(H_res_logits - max(H_res_logits)) - numerically equivalent
+        # after Sinkhorn normalization since row normalization absorbs the scaling.
+        M_init = torch.exp(H_res_logits - H_res_logits.max(dim=-1, keepdim=True).values)
+
+        M = SinkhornKnopp._sinkhorn_normalize(M_init, num_iterations)
+
+        # Save initial M for backward recomputation
+        ctx.save_for_backward(M_init)
+        ctx.num_iterations = num_iterations
+        return M
+
+    @staticmethod
+    def backward(ctx, grad_output: Tensor) -> Tuple[Tensor, None]:
+        """
+        Backward through Sinkhorn-Knopp iterations using recomputation.
+
+        Recomputes the forward pass with gradient tracking to obtain accurate gradients.
+        """
+        (M_init,) = ctx.saved_tensors
+        num_iterations = ctx.num_iterations
+
+        # Recompute forward with autograd enabled
+        with torch.enable_grad():
+            # Leaf for recomputation
+            M_input = M_init.detach().requires_grad_(True)
+
+            M_current = SinkhornKnopp._sinkhorn_normalize(M_input, num_iterations)
+
+            # Compute dL/dM_input (i.e., dL/dM_init) via autograd
+            (grad_M_init,) = torch.autograd.grad(
+                outputs=M_current,
+                inputs=M_input,
+                grad_outputs=grad_output,
+                create_graph=False,
+                retain_graph=False,
+            )
+        # Apply chain rule: dL/dH = dL/dM_init * dM_init/dH = dL/dM_init * M_init
+        # Since M_init = exp(H_res_logits), we have d(exp(x))/dx = exp(x) = M_init
+        grad_input = grad_M_init * M_init
+
+        return grad_input, None
+
+
+# TODO: keep hyper connection in fp32 computation
+class HyperConnectionModule(MegatronModule):
+    """
+    Unified mHC (Manifold-Constrained Hyper-Connections) module.
+
+    Implements the complete mHC propagation:
+        x_{l+1} = H_res @ x_l + H_post^T @ F(H_pre @ x_l)
+
+    This module handles:
+    1. Computing learnable mappings: H_pre, H_post, H_res (with Sinkhorn-Knopp projection)
+    2. Aggregation: n-stream → 1-stream (H_pre @ x)
+    3. Expansion: 1-stream → n-stream (H_post^T @ output)
+    4. Residual merge: H_res @ x + expanded_output
+    5. Block-level expand/contract for TransformerBlock boundaries
+
+    Args:
+        config: TransformerConfig with hyper-connection fields
+        layer_number: Current layer index for initialization
+    """
+
+    def __init__(self, config: TransformerConfig, layer_number: int):
+        super().__init__(config)
+        self.config = config
+        self.layer_number = layer_number
+        self.n = config.num_residual_streams
+        self.hidden_size = config.hidden_size
+        self.sinkhorn_iterations = config.mhc_sinkhorn_iterations
+
+        # Projection weights for dynamic mappings
+        # Input: [s, b, n*C] -> Output: n^2 + 2n values per token
+        # - H_pre: n values
+        # - H_post: n values
+        # - H_res: n^2 values (before Sinkhorn projection)
+        self.mapping_proj = nn.Linear(
+            self.n * self.hidden_size, self.n * self.n + 2 * self.n, bias=False
+        )
+
+        init_alpha = config.mhc_init_gating_factor
+        # Learnable scaling factors (Eq. 5 in paper)
+        self.alpha_pre = nn.Parameter(torch.full((1,), init_alpha))
+        self.alpha_post = nn.Parameter(torch.full((1,), init_alpha))
+        self.alpha_res = nn.Parameter(torch.full((1,), init_alpha))
+
+        # Static bias terms
+        self.bias = nn.Parameter(torch.zeros(self.n * self.n + 2 * self.n))
+        self.norm_eps = 1e-6
+
+        self._init_weights()
+
+    def _init_weights(self) -> None:
+        """Initialize weights for stable training."""
+        nn.init.xavier_uniform_(self.mapping_proj.weight)
+
+        # Set sequence_parallel attribute on parameters for gradient synchronization
+        # across TP ranks when sequence_parallel is enabled.
+        # This is required because HyperConnectionModule uses non-TP-aware layers
+        # (nn.Linear, nn.RMSNorm) whose gradients need to be all-reduced.
+        if self.config.sequence_parallel:
+            setattr(self.mapping_proj.weight, 'sequence_parallel', True)
+            setattr(self.alpha_pre, 'sequence_parallel', True)
+            setattr(self.alpha_post, 'sequence_parallel', True)
+            setattr(self.alpha_res, 'sequence_parallel', True)
+            setattr(self.bias, 'sequence_parallel', True)
+
+    @torch.compile
+    def _projection_and_get_norm(self, x: Tensor) -> Tuple[Tensor, Tensor]:
+        """
+        Project input hidden states to mapping space and apply RMS normalization.
+
+        Args:
+            x: [s, b, n*C] - n-stream hidden states
+        """
+        nC = x.shape[-1]
+        r = x.norm(dim=-1, keepdim=True) / math.sqrt(nC)  # shape: [s, b, 1]
+        r = 1.0 / (r + self.norm_eps)  # shape: [s, b, 1]
+        proj = self.mapping_proj(x)  # [s, b, n^2 + 2n]
+        return proj, r
+
+    @torch.compile
+    def _compute_h(self, proj: Tensor, r: Tensor) -> Tuple[Tensor, Tensor, Tensor]:
+        """
+        Compute h from projected hidden states and scaling factors.
+
+        Args:
+            proj: [s, b, n^2 + 2n] - projected hidden states
+            r: [s, b, 1] - scaling factors
+
+        Returns:
+            h_pre: [s, b, n] - aggregation weights
+            h_post: [s, b, n] - expansion weights
+            h_res: [s, b, n^2] - residual mixing logits
+        """
+        alpha_ = torch.cat(
+            [
+                self.alpha_pre.expand(self.n),
+                self.alpha_post.expand(self.n),
+                self.alpha_res.expand(self.n * self.n),
+            ],
+            dim=-1,
+        )
+        h = r * proj * alpha_ + self.bias
+        # H_pre = σ(α_pre * (θ_pre @ x̃) + b_pre)
+        h_pre = h[..., : self.n].sigmoid()  # [s, b, n]
+
+        # H_post = 2σ(α_post * (θ_post @ x̃) + b_post)
+        h_post = h[..., self.n : 2 * self.n].sigmoid() * 2  # [s, b, n]
+        h_res = h[..., 2 * self.n :]
+        return h_pre, h_post, h_res
+
+    @nvtx_decorator(message="HyperConnection::compute_mappings")
+    def compute_mappings(self, x: Tensor) -> Tuple[Tensor, Tensor, Tensor]:
+        """
+        Compute mHC mappings from input hidden states.
+
+        Reference: Eq. (5) and (8) in mHC paper
+
+        Args:
+            x: [s, b, n*C] - n-stream hidden states
+
+        Returns:
+            h_pre: [s, b, n] - aggregation weights (sigmoid activated)
+            h_post: [s, b, n] - expansion weights (2*sigmoid activated)
+            h_res: [s, b, n, n] - residual mixing matrix (doubly stochastic)
+        """
+        s, b, _ = x.shape
+        with torch.cuda.nvtx.range("HyperConnection::projection_and_get_norm"):
+            proj, r = self._projection_and_get_norm(x)
+        with torch.cuda.nvtx.range("HyperConnection::compute_h"):
+            h_pre, h_post, h_res = self._compute_h(proj, r)
+        h_res = SinkhornKnopp.apply(
+            h_res.view(s, b, self.n, self.n), self.sinkhorn_iterations
+        )  # [s, b, n, n]
+
+        return h_pre, h_post, h_res
+
+    @torch.compile
+    def _apply_h_post(self, x: Tensor, h_post: Tensor) -> Tensor:
+        """
+        Core implementation of H_post application to a single tensor.
+
+        Computes: H_post^T @ x
+
+        Args:
+            x: Input tensor, can be either:
+               - [s, b, C] - standard hidden states
+               - [C] - bias tensor (will be broadcast)
+            h_post: [s, b, n] - expansion weights
+
+        Returns:
+            output: [s, b, n*C] - expanded tensor
+        """
+        n = self.n
+        s, b, _ = h_post.shape
+
+        if x.dim() == 1:
+            # x is bias with shape [C], need to broadcast to [s, b, 1, C]
+            C = x.shape[0]
+            x_expanded = x.unsqueeze(0).unsqueeze(0).unsqueeze(0).expand(s, b, 1, C)
+        else:
+            # x is [s, b, C]
+            C = x.shape[-1]
+            x_expanded = x.unsqueeze(2)  # [s, b, 1, C]
+
+        # h_post^T @ x : [s, b, n, 1] * [s, b, 1, C] -> [s, b, n, C]
+        # Using broadcast multiply instead of einsum
+        result = h_post.unsqueeze(-1) * x_expanded
+        return result.view(s, b, n * C)
+
+    @nvtx_decorator(message="HyperConnection::apply_h_post")
+    def apply_h_post(
+        self,
+        x_with_bias: Tuple[Tensor, Optional[Tensor]],
+        h_post: Tensor,
+        manager: Optional['CheckpointManager'] = None,
+    ) -> Tuple[Tensor, Optional[Tensor]]:
+        """
+        Apply H_post to x and optionally bias, with optional checkpointing.
+
+        This is the unified entry point that handles both normal execution
+        and checkpoint-based execution for memory efficiency.
+
+        Args:
+            x_with_bias: Tuple of (x, bias) where:
+                - x: [s, b, C] - hidden states
+                - bias: [C] or None - optional bias tensor
+            h_post: [s, b, n] - expansion weights
+            manager: Optional CheckpointManager for checkpoint management.
+                When provided, wraps _apply_h_post with CheckpointWithoutOutput.
+
+        Returns:
+            Tuple of (x_out, bias_out) where:
+                - x_out: [s, b, n*C] - expanded hidden states
+                - bias_out: [s, b, n*C] or None - expanded bias if input bias was not None
+        """
+        x, bias = x_with_bias
+
+        if manager is not None:
+            from megatron.core.tensor_parallel.random import CheckpointWithoutOutput
+
+            # Checkpoint _apply_h_post to discard the output
+            x_out = CheckpointWithoutOutput(ckpt_manager=manager).checkpoint(
+                self._apply_h_post, x, h_post
+            )
+
+            # Checkpoint _apply_h_post for bias if not None
+            if bias is not None:
+                bias_out = CheckpointWithoutOutput(ckpt_manager=manager).checkpoint(
+                    self._apply_h_post, bias, h_post
+                )
+            else:
+                bias_out = None
+        else:
+            # Normal execution without checkpoint
+            x_out = self._apply_h_post(x, h_post)
+            bias_out = self._apply_h_post(bias, h_post) if bias is not None else None
+
+        return x_out, bias_out
+
+    @torch.compile
+    def aggregate(self, x: Tensor, h_pre: Tensor) -> Tensor:
+        """
+        Aggregate n-stream to 1-stream using H_pre weights.
+
+        Computes: sum_i(h_pre_i * x_stream_i)
+
+        Args:
+            x: [s, b, n*C] - n-stream hidden states
+            h_pre: [s, b, n] - aggregation weights
+
+        Returns:
+            aggregated: [s, b, C] - single stream hidden states
+        """
+        s, b, _ = x.shape
+        C = self.hidden_size
+
+        # Reshape to [s, b, n, C]
+        x_streams = x.view(s, b, self.n, C)
+
+        # Weighted sum: [s, b, n, C] * [s, b, n, 1] -> sum over n -> [s, b, C]
+        aggregated = (x_streams * h_pre.unsqueeze(-1)).sum(dim=2)
+
+        return aggregated
+
+    @torch.compile
+    def apply_h_res(self, h_res: Tensor, residual: Tensor) -> Tensor:
+        """
+        Apply H_res to residual using H_res weights.
+
+        Computes: H_res @ residual
+
+        Args:
+            h_res: [s, b, n, n] - residual mixing matrix
+            residual: [s, b, n*C] - n-stream hidden states
+        """
+        s, b, _ = residual.shape
+        n = self.n
+        C = self.hidden_size
+
+        # Reshape for bmm: [s, b, n, n] -> [s*b, n, n]
+        h_res_batched = h_res.view(s * b, n, n)
+        # [s, b, n*C] -> [s, b, n, C] -> [s*b, n, C]
+        residual_batched = residual.view(s, b, n, C).view(s * b, n, C)
+
+        # Batch matrix multiply: [s*b, n, n] @ [s*b, n, C] -> [s*b, n, C]
+        mixed = torch.bmm(h_res_batched, residual_batched)
+
+        return mixed.view(s, b, n * C)
+
+    def forward(
+        self, hidden_states: Tensor, mhc_recompute_manager: Optional['CheckpointManager'] = None
+    ) -> Tuple[Tensor, Tensor, Tensor]:
+        """
+        Full mHC forward pass.
+
+        Args:
+            hidden_states: [s, b, n*C] - n-stream hidden states
+            mhc_recompute_manager: Optional CheckpointManager for checkpoint management.
+                When provided, uses _forward_with_checkpoint for memory-efficient execution.
+
+        Returns:
+            aggregated: [s, b, C] - aggregated input for layer computation
+            h_res: [s, b, n, n] - residual mixing matrix (for fused kernel)
+            h_post: [s, b, n] - expansion weights
+        """
+        if mhc_recompute_manager is not None:
+            return self._forward_with_checkpoint(hidden_states, mhc_recompute_manager)
+        else:
+            return self._forward_normal(hidden_states)
+
+    def _forward_normal(self, hidden_states: Tensor) -> Tuple[Tensor, Tensor, Tensor]:
+        """
+        Normal forward pass without checkpointing.
+
+        Args:
+            hidden_states: [s, b, n*C] - n-stream hidden states
+
+        Returns:
+            aggregated: [s, b, C] - aggregated input for layer computation
+            h_res: [s, b, n, n] - residual mixing matrix (for fused kernel)
+            h_post: [s, b, n] - expansion weights
+        """
+        # Compute mappings
+        h_pre, h_post, h_res = self.compute_mappings(hidden_states)
+
+        # Aggregate for layer input
+        with torch.cuda.nvtx.range("HyperConnection::aggregate"):
+            aggregated = self.aggregate(hidden_states, h_pre)
+
+        return aggregated, h_res, h_post
+
+    def _forward_with_checkpoint(
+        self, hidden_states: Tensor, manager: 'CheckpointManager'
+    ) -> Tuple[Tensor, Tensor, Tensor]:
+        """
+        Forward pass with checkpointing for memory efficiency.
+
+        compute_mappings is called directly (not checkpointed) since its outputs
+        (h_pre, h_post, h_res) are needed downstream. Only aggregate is wrapped with
+        CheckpointWithoutOutput and auto-registered to the manager.
+        apply_h_res is deferred to fused_h_res_h_post_bda for kernel fusion.
+
+        Args:
+            hidden_states: [s, b, n*C] - n-stream hidden states
+            manager: CheckpointManager for unified recomputation
+
+        Returns:
+            aggregated: [s, b, C] - aggregated input for layer computation
+            h_res: [s, b, n, n] - residual mixing matrix (for fused kernel)
+            h_post: [s, b, n] - expansion weights
+        """
+        from megatron.core.tensor_parallel.random import CheckpointWithoutOutput
+
+        h_pre, h_post, h_res = self.compute_mappings(hidden_states)
+
+        # Checkpoint aggregate - auto-registers to manager
+        aggregated = CheckpointWithoutOutput(ckpt_manager=manager).checkpoint(
+            self.aggregate, hidden_states, h_pre
+        )
+
+        return aggregated, h_res, h_post
+
+    # ==================== Block-level utilities ====================
+
+    @staticmethod
+    def input_expand(x: Tensor, n: int) -> Tensor:
+        """
+        Expand 1-stream to n-stream at TransformerBlock entry.
+
+        Simple replication strategy: each stream initialized as a copy of input.
+
+        Args:
+            x: [s, b, C] - single stream hidden states
+            n: Number of residual streams
+
+        Returns:
+            expanded: [s, b, n*C] - n-stream hidden states
+        """
+        s, b, C = x.shape
+        # Replicate input to n streams
+        expanded = x.unsqueeze(2).expand(s, b, n, C).contiguous()
+        return expanded.view(s, b, n * C)
+
+    @staticmethod
+    def output_contract(x: Tensor, n: int) -> Tensor:
+        """
+        Contract n-stream to 1-stream at TransformerBlock exit.
+
+        Simple averaging strategy: average all streams.
+
+        Args:
+            x: [s, b, n*C] - n-stream hidden states
+            n: Number of residual streams
+
+        Returns:
+            contracted: [s, b, C] - single stream hidden states
+        """
+        s, b, nC = x.shape
+        C = nC // n
+        # Average all streams
+        x_streams = x.view(s, b, n, C)
+        contracted = x_streams.mean(dim=2)
+        return contracted
+
+    # ==================== Fused kernel placeholder ====================
+
+    @nvtx_decorator(message="HyperConnection::fused_h_res_h_post_bda")
+    def fused_h_res_h_post_bda(
+        self,
+        h_res: Tensor,
+        original_residual: Tensor,
+        h_post: Tensor,
+        layer_output_with_bias: Tuple[Tensor, Optional[Tensor]],
+        dropout_prob: float,
+        training: bool,
+        fused: bool,
+        manager: Optional['CheckpointManager'] = None,
+    ) -> Tensor:
+        """
+        Fused kernel combining apply_h_res, apply_h_post and bias-dropout-add.
+
+        This is a placeholder for future kernel fusion optimization.
+        Currently implements the operations sequentially using native PyTorch.
+
+        The computation flow is:
+            1. mixed = H_res @ original_residual (apply_h_res)
+            2. expanded = H_post^T @ layer_output (apply_h_post)
+            3. output = dropout(expanded + bias) + mixed (bias-dropout-add)
+
+        Args:
+            h_res: [s, b, n, n] - residual mixing matrix
+            original_residual: [s, b, n*C] - n-stream hidden states (before H_res applied)
+            h_post: [s, b, n] - expansion weights
+            layer_output_with_bias: Tuple of (x, bias) where:
+                - x: [s, b, C] - layer output (attention or MLP output)
+                - bias: [C] or None - optional bias tensor
+            dropout_prob: Dropout probability
+            training: Whether in training mode
+            fused: Whether to use fused BDA implementation
+            manager: Optional CheckpointManager for checkpoint management.
+                When provided, each operation is wrapped with CheckpointWithoutOutput.
+
+        Returns:
+            output: [s, b, n*C] - final output after all operations
+        """
+        if manager is not None:
+            return self._fused_h_res_h_post_bda_with_checkpoint(
+                h_res,
+                original_residual,
+                h_post,
+                layer_output_with_bias,
+                dropout_prob,
+                training,
+                fused,
+                manager,
+            )
+        else:
+            return self._fused_h_res_h_post_bda_native(
+                h_res,
+                original_residual,
+                h_post,
+                layer_output_with_bias,
+                dropout_prob,
+                training,
+                fused,
+            )
+
+    def _fused_h_res_h_post_bda_native(
+        self,
+        h_res: Tensor,
+        original_residual: Tensor,
+        h_post: Tensor,
+        layer_output_with_bias: Tuple[Tensor, Optional[Tensor]],
+        dropout_prob: float,
+        training: bool,
+        fused: bool,
+    ) -> Tensor:
+        """
+        Native implementation of fused h_res, h_post and bda operations.
+
+        Args:
+            h_res: [s, b, n, n] - residual mixing matrix
+            original_residual: [s, b, n*C] - n-stream hidden states
+            h_post: [s, b, n] - expansion weights
+            layer_output_with_bias: Tuple of (x, bias)
+            dropout_prob: Dropout probability
+            training: Whether in training mode
+            fused: Whether to use fused BDA implementation
+
+        Returns:
+            output: [s, b, n*C] - final output
+        """
+        from megatron.core.fusions.fused_bias_dropout import get_bias_dropout_add
+
+        # Step 1: Apply H_res to original residual
+        with torch.cuda.nvtx.range("HyperConnection::apply_h_res"):
+            mixed = self.apply_h_res(h_res, original_residual)
+
+        # Step 2: Apply H_post to layer output
+        x, bias = layer_output_with_bias
+        with torch.cuda.nvtx.range("HyperConnection::apply_h_post"):
+            x_expanded = self._apply_h_post(x, h_post)
+            bias_expanded = self._apply_h_post(bias, h_post) if bias is not None else None
+
+        # Step 3: Bias-dropout-add
+        bda_func = get_bias_dropout_add(training, fused)
+        with torch.cuda.nvtx.range("HyperConnection::bda"):
+            output = bda_func((x_expanded, bias_expanded), mixed, dropout_prob)
+
+        return output
+
+    @nvtx_decorator(message="HyperConnection::fused_h_res_h_post_bda_with_checkpoint")
+    def _fused_h_res_h_post_bda_with_checkpoint(
+        self,
+        h_res: Tensor,
+        original_residual: Tensor,
+        h_post: Tensor,
+        layer_output_with_bias: Tuple[Tensor, Optional[Tensor]],
+        dropout_prob: float,
+        training: bool,
+        fused: bool,
+        manager: 'CheckpointManager',
+    ) -> Tensor:
+        """
+        Checkpointed implementation of fused h_res, h_post and bda operations.
+
+        Uses a single checkpoint wrapper around all operations for memory efficiency.
+
+        Args:
+            h_res: [s, b, n, n] - residual mixing matrix
+            original_residual: [s, b, n*C] - n-stream hidden states
+            h_post: [s, b, n] - expansion weights
+            layer_output_with_bias: Tuple of (x, bias)
+            dropout_prob: Dropout probability
+            training: Whether in training mode
+            fused: Whether to use fused BDA implementation
+            manager: CheckpointManager for checkpoint management
+
+        Returns:
+            output: [s, b, n*C] - final output
+        """
+        from megatron.core.fusions.fused_bias_dropout import get_bias_dropout_add
+        from megatron.core.tensor_parallel.random import CheckpointWithoutOutput
+
+        # Get BDA function (captured via closure)
+        bda_func = get_bias_dropout_add(training, fused)
+
+        # Unpack layer_output_with_bias to avoid tuple tensors in checkpoint args
+        x, bias = layer_output_with_bias
+        has_bias = bias is not None
+
+        # Native wrapper that combines all operations without internal checkpointing.
+        # Non-tensor args (dropout_prob, has_bias) are captured via closure.
+        def _native_wrapper(h_res, original_residual, h_post, x, *optional_bias):
+            # Step 1: Apply H_res to original residual
+            with torch.cuda.nvtx.range("HyperConnection::apply_h_res"):
+                mixed = self.apply_h_res(h_res, original_residual)
+
+            # Step 2: Apply H_post to x and bias
+            with torch.cuda.nvtx.range("HyperConnection::apply_h_post"):
+                x_expanded = self._apply_h_post(x, h_post)
+                if has_bias:
+                    bias_expanded = self._apply_h_post(optional_bias[0], h_post)
+                else:
+                    bias_expanded = None
+
+            # Step 3: Bias-dropout-add
+            with torch.cuda.nvtx.range("HyperConnection::bda"):
+                output = bda_func((x_expanded, bias_expanded), mixed, dropout_prob)
+
+            return output
+
+        # Use a single checkpoint wrapper for all operations
+        ckpt = CheckpointWithoutOutput(ckpt_manager=manager)
+        if has_bias:
+            output = ckpt.checkpoint(_native_wrapper, h_res, original_residual, h_post, x, bias)
+        else:
+            output = ckpt.checkpoint(_native_wrapper, h_res, original_residual, h_post, x)
+
+        return output
+
+
+# ==================== Checkpoint utilities for mHC ====================
+
+
+class HyperConnectionCheckpoint:
+    """
+    Checkpoint utility for mHC intermediate activations.
+
+    Implements the paper's "recomputing strategy" to reduce memory footprint
+    by discarding intermediate n-stream activations and recomputing on-the-fly.
+    """
+
+    @staticmethod
+    def compute_optimal_block_size(num_layers: int, num_streams: int) -> int:
+        """
+        Compute optimal recomputation block size.
+
+        From paper Eq. (20): L_r^* ≈ sqrt(nL/(n+2))
+
+        Args:
+            num_layers: Total number of transformer layers
+            num_streams: Number of residual streams (n)
+
+        Returns:
+            block_size: Optimal block size for checkpointing
+        """
+        block_size = int(math.sqrt(num_streams * num_layers / (num_streams + 2)))
+        return max(1, block_size)
diff --git a/megatron/core/transformer/transformer_block.py b/megatron/core/transformer/transformer_block.py
index 5559b2536a9..e9bd52f34b4 100755
--- a/megatron/core/transformer/transformer_block.py
+++ b/megatron/core/transformer/transformer_block.py
@@ -1,8 +1,9 @@
-# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
 import logging
 from contextlib import nullcontext
 from dataclasses import dataclass
-from typing import List, Optional, Set, Union, cast
+from typing import List, Optional, Set, Tuple, Union, cast
 
 import torch
 from torch import Tensor
@@ -18,7 +19,9 @@
 from megatron.core.packed_seq_params import PackedSeqParams
 from megatron.core.pipeline_parallel.utils import is_vp_first_stage, is_vp_last_stage
 from megatron.core.process_groups_config import ProcessGroupCollection
+from megatron.core.tensor_parallel.random import CheckpointManager
 from megatron.core.transformer.enums import CudaGraphScope, LayerType
+from megatron.core.transformer.hyper_connection import HyperConnectionModule
 from megatron.core.transformer.module import GraphableMegatronModule, MegatronModule
 from megatron.core.transformer.spec_utils import ModuleSpec, build_module
 from megatron.core.transformer.torch_norm import LayerNormBuilder
@@ -324,6 +327,7 @@ def __init__(
             self.offload_context, self.group_prefetch_offload_commit_async = nullcontext(), None
             self.config._cpu_offloading_context = None
 
+        self.num_residual_streams = config.num_residual_streams
         self._build_layers()
         self.num_layers_per_pipeline_rank = len(self.layers)
 
@@ -648,6 +652,46 @@ def __call__(self, *args, **kwargs):
             return super().__call__(*args, **kwargs)[0]
         return super().__call__(*args, **kwargs)
 
+    def _build_mhc_recompute_layer_plan(
+        self, use_mhc_recompute: bool
+    ) -> Tuple[List[Optional[CheckpointManager]], List[bool]]:
+        """Pre-build per-layer MHC recompute managers and block-end markers."""
+        num_layers = len(self.layers)
+        layer_managers: List[Optional[CheckpointManager]] = [None] * num_layers
+        is_recompute_block_end: List[bool] = [False] * num_layers
+
+        if not use_mhc_recompute or num_layers == 0:
+            return layer_managers, is_recompute_block_end
+
+        mhc_recompute_layer_num = self.config.mhc_recompute_layer_num
+        mhc_manager = CheckpointManager()
+
+        for l_no in range(num_layers):
+            is_last_in_transformer_block = l_no == num_layers - 1
+            is_last_in_recompute_block = is_last_in_transformer_block
+            if mhc_recompute_layer_num is not None:
+                is_last_in_recompute_block = is_last_in_transformer_block or (
+                    (l_no + 1) % mhc_recompute_layer_num == 0
+                )
+
+            layer_managers[l_no] = mhc_manager
+            is_recompute_block_end[l_no] = is_last_in_recompute_block
+
+            if is_last_in_recompute_block and not is_last_in_transformer_block:
+                mhc_manager = CheckpointManager()
+
+        return layer_managers, is_recompute_block_end
+
+    @staticmethod
+    def _finalize_mhc_recompute_layer(
+        mhc_manager: Optional[CheckpointManager],
+        hidden_states: Tensor,
+        is_last_in_recompute_block: bool,
+    ) -> None:
+        """Finalize MHC recompute state for the current layer when block ends."""
+        if mhc_manager is not None and is_last_in_recompute_block:
+            mhc_manager.discard_all_outputs_and_register_unified_recompute(hidden_states)
+
     def forward(
         self,
         hidden_states: Union[Tensor, WrappedTensor],
@@ -757,6 +801,13 @@ def forward(
         #   is called here to be future-proof and corner-case-proof.
         hidden_states = make_viewless_tensor(inp=hidden_states, requires_grad=True, keep_graph=True)
 
+        # Expand hidden states for hyper connections at the start of the block
+        # Only expand at the first PP stage; subsequent stages receive n-stream from previous stage
+        if self.config.enable_hyper_connections and self.pre_process:
+            hidden_states = HyperConnectionModule.input_expand(
+                hidden_states, self.num_residual_streams
+            )  # [s, b, C] -> [s, b, n*C]
+
         if self.config.sequence_parallel:
             rng_context = tensor_parallel.get_cuda_rng_tracker().fork()
         else:
@@ -784,6 +835,18 @@ def forward(
             use_inner_quantization_context = False
             outer_quantization_context = nullcontext()
 
+        # Determine if MHC recompute should be used
+        # Only enable when: training mode AND hyper connections AND 'mhc' in recompute_modules
+        use_mhc_recompute = (
+            self.training
+            and self.config.enable_hyper_connections
+            and self.config.recompute_granularity == 'selective'
+            and "mhc" in self.config.recompute_modules
+        )
+        mhc_layer_managers, mhc_is_last_in_recompute_block = self._build_mhc_recompute_layer_plan(
+            use_mhc_recompute
+        )
+
         with rng_context, outer_quantization_context:
             # Forward pass.
             if self.config.recompute_granularity == 'full' and self.training:
@@ -824,6 +887,12 @@ def forward(
                     else:
                         inner_quantization_context = nullcontext()
 
+                    mhc_manager = mhc_layer_managers[l_no]
+                    if mhc_manager is not None:
+                        mhc_manager.is_last_layer_in_recompute_block = (
+                            mhc_is_last_in_recompute_block[l_no]
+                        )
+
                     with self.offload_context, inner_quantization_context:
                         hidden_states, context = layer(
                             hidden_states=hidden_states,
@@ -839,7 +908,13 @@ def forward(
                             packed_seq_params=packed_seq_params,
                             sequence_len_offset=sequence_len_offset,
                             padding_mask=padding_mask,
+                            mhc_recompute_manager=mhc_manager,
                         )
+                    self._finalize_mhc_recompute_layer(
+                        mhc_manager=mhc_manager,
+                        hidden_states=hidden_states,
+                        is_last_in_recompute_block=mhc_is_last_in_recompute_block[l_no],
+                    )
 
                     if (
                         torch.is_grad_enabled()
@@ -852,6 +927,12 @@ def forward(
                     if (l_no + layer_offset) in extract_layer_indices:
                         intermediate_hidden_states.append(hidden_states)
 
+        # Only contract if the final layer norm is in this stage
+        if self.config.enable_hyper_connections and self.has_final_layernorm_in_this_stage():
+            hidden_states = HyperConnectionModule.output_contract(
+                hidden_states, self.num_residual_streams
+            )  # [s, b, n*C] -> [s, b, C]
+
         # Final layer norm.
         if self.final_layernorm is not None:
             hidden_states = apply_module(self.final_layernorm)(cast(Tensor, hidden_states))
diff --git a/megatron/core/transformer/transformer_config.py b/megatron/core/transformer/transformer_config.py
index d48e29c1e71..d055b7d96cb 100644
--- a/megatron/core/transformer/transformer_config.py
+++ b/megatron/core/transformer/transformer_config.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 import logging
 import warnings
@@ -428,7 +428,8 @@ class TransformerConfig(ModelParallelConfig):
 
     recompute_modules: Optional[List[str]] = None
     """The submodules to recompute.
-    choices: "core_attn", "moe_act", "layernorm", "mla_up_proj", "mlp", "moe", "shared_experts".
+    choices: "core_attn", "moe_act", "layernorm", "mla_up_proj", "mlp", "moe",
+             "shared_experts", "mhc".
     default: ["core_attn"].
     "core_attn": recompute the core attention part of the transformer layer.
     "moe_act": recompute the MoE MLP activation function.
@@ -437,7 +438,10 @@ class TransformerConfig(ModelParallelConfig):
     "mlp": recompute the dense MLP submodule.
     "moe": recompute the MoE layer.
     "shared_experts": recompute the shared experts in the MoE layer.
-    "moe_act", "layernorm", and "mla_up_proj" use output-discarding checkpointing,
+    "mhc": recompute HyperConnection intermediate activations via
+            CheckpointWithoutOutput + CheckpointManager. Requires
+            enable_hyper_connections=True. Cannot be used with "mlp".
+    "moe_act", "layernorm", "mla_up_proj", and "mhc" use output-discarding checkpointing,
     "core_attn", "mlp", "moe", and "shared_experts" use normal checkpointing.
     """
 
@@ -821,6 +825,35 @@ class TransformerConfig(ModelParallelConfig):
     When cuda_graph_impl is set to "local", "full_iteration" can be specified as cuda_graph_scope
     to enable whole iteration CUDA graph. All other values enable layerwise CUDA graph."""
 
+    ####################
+    # Hyper-Connection Configuration
+    ####################
+    enable_hyper_connections: bool = False
+    """Enable mHC residual connections."""
+
+    num_residual_streams: int = 4
+    """Number of residual streams (n in paper)."""
+
+    mhc_sinkhorn_iterations: int = 20
+    """Number of Sinkhorn-Knopp iterations for doubly stochastic projection."""
+
+    mhc_init_gating_factor: float = 0.01
+    """Initial value of Gating Factor (alpha in paper)."""
+
+    mhc_recompute_layer_num: Optional[int] = None
+    """Number of layers per MHC recompute block.
+    
+    When set, every `mhc_recompute_layer_num` layers form a recompute block. The last layer
+    in each recompute block (i.e., layer_number % mhc_recompute_layer_num == 0 or the final
+    layer in the transformer block) will:
+    - NOT checkpoint its final MLP BDA
+    - Register the unified recompute hook on its MLP BDA output
+    - A new CheckpointManager is created for subsequent layers
+    
+    If None, all layers in the transformer block share a single recompute block.
+
+    Must be a positive integer when set."""
+
     ####################
     # miscellaneous
     ####################
@@ -1265,6 +1298,7 @@ def __post_init__(self):
                     "mlp",
                     "moe",
                     "shared_experts",
+                    "mhc",
                 }
                 invalid_modules = set(self.recompute_modules) - allowed_modules
                 assert not invalid_modules, (
@@ -1327,6 +1361,50 @@ def __post_init__(self):
             if "moe" not in self.recompute_modules:
                 self.recompute_modules.append("moe")
 
+        # Validation for "mhc" in recompute_modules
+        if self.recompute_granularity == "selective" and "mhc" in self.recompute_modules:
+            if not self.enable_hyper_connections:
+                raise ValueError(
+                    "'mhc' in recompute_modules requires enable_hyper_connections=True."
+                )
+            if "mlp" in self.recompute_modules:
+                raise ValueError(
+                    "'mhc' and 'mlp' in recompute_modules cannot be used together. "
+                    "They use different checkpoint mechanisms that may conflict."
+                )
+            if self.mhc_recompute_layer_num is not None and (
+                isinstance(self.mhc_recompute_layer_num, bool)
+                or not isinstance(self.mhc_recompute_layer_num, int)
+                or self.mhc_recompute_layer_num < 1
+            ):
+                raise ValueError(
+                    "mhc_recompute_layer_num must be a positive integer when "
+                    "'mhc' is in recompute_modules."
+                )
+            if self.fine_grained_activation_offloading:
+                raise ValueError(
+                    "'mhc' in recompute_modules is incompatible with "
+                    "fine_grained_activation_offloading. The mHC recompute hook fires "
+                    "before the offloading backward chunk is initialized, causing "
+                    "tensor_pop on a None chunk. Disable one of them."
+                )
+
+        if self.enable_hyper_connections and not (
+            self.recompute_granularity == "selective" and "mhc" in self.recompute_modules
+        ):
+            warnings.warn(
+                "HyperConnections are enabled but 'mhc' is not in "
+                "recompute_modules with selective recompute. Consider adding 'mhc' to "
+                "recompute_modules with selective recompute to reduce activation memory."
+            )
+
+        # Validation for hyper_connections with MTP
+        if self.enable_hyper_connections and self.mtp_num_layers is not None:
+            raise ValueError(
+                "enable_hyper_connections is not compatible with Multi-Token Prediction (MTP). "
+                "Please disable MTP (set mtp_num_layers=None) when using hyper connections."
+            )
+
         if self.fine_grained_activation_offloading:
             assert (
                 not self.cpu_offloading
diff --git a/megatron/core/transformer/transformer_layer.py b/megatron/core/transformer/transformer_layer.py
index 58fe690c553..aac05312220 100644
--- a/megatron/core/transformer/transformer_layer.py
+++ b/megatron/core/transformer/transformer_layer.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 from __future__ import annotations
 
 import functools
@@ -8,6 +8,9 @@
 from dataclasses import dataclass, field
 from typing import TYPE_CHECKING, Any, Dict, Optional, Union
 
+if TYPE_CHECKING:
+    from megatron.core.tensor_parallel.random import CheckpointManager
+
 import torch
 import torch.distributed
 from torch import Tensor
@@ -228,14 +231,17 @@ class TransformerLayerSubmodules:
     """
 
     input_layernorm: LayerNormBuilder = IdentityOp
+    self_attention_hyper_connection: Union[ModuleSpec, type] = IdentityOp
     self_attention: Union[ModuleSpec, type] = IdentityOp
     self_attn_bda: Union[ModuleSpec, type] = IdentityFuncOp
 
     pre_cross_attn_layernorm: LayerNormBuilder = IdentityOp
+    cross_attention_hyper_connection: Union[ModuleSpec, type] = IdentityOp
     cross_attention: Union[ModuleSpec, type] = IdentityOp
     cross_attn_bda: Union[ModuleSpec, type] = IdentityFuncOp
 
     pre_mlp_layernorm: LayerNormBuilder = IdentityOp
+    mlp_hyper_connection: Union[ModuleSpec, type] = IdentityOp
     mlp: Union[ModuleSpec, type] = IdentityOp
     mlp_bda: Union[ModuleSpec, type] = IdentityFuncOp
 
@@ -587,8 +593,6 @@ def _forward_attention(
         )
 
         if using_fused_tp_inference_kernel:
-            # Set the residual for fused reduce-scatter + add + layer-norm + all-gather
-            # operation in attention's out_proj (linear_proj)
             self._set_proj_residual(residual)
 
         # Self attention.
@@ -674,6 +678,9 @@ def forward(self, *args, **kwargs):
         """
         # Injected by __call__ for cuda graph keying; not a real forward arg.
         kwargs.pop("dynamic_inference_decode_only", None)
+        assert (
+            not self.config.enable_hyper_connections
+        ), "Please use HyperConnectionTransformerLayer instead"
         hidden_states, context = self._forward_attention(*args, **kwargs)
         output = self._forward_mlp(
             hidden_states,
@@ -1241,6 +1248,11 @@ def backward_dw_cudagraph(self, microbatch_idx):
         self.cuda_graphs[cg_index].backward_dw()
 
     def __call__(self, *args, **kwargs):
+        # Extract mhc_recompute_manager before CUDA graph manager processes kwargs,
+        # since CheckpointManager is not a CUDA-graph-supported type.
+        self._mhc_recompute_manager = kwargs.pop("mhc_recompute_manager", None)
+        kwargs.pop("is_last_layer_in_recompute_block", None)
+
         if self._should_call_local_cudagraph(*args, **kwargs):
             # Inference mode.
             if kwargs.get('inference_context') is not None:
@@ -1262,6 +1274,373 @@ def get_layer_norm_weights(self):
         return
 
 
+class HyperConnectionTransformerLayer(TransformerLayer):
+    """A transformer layer with Manifold-Constrained Hyper-Connections (mHC).
+
+    Extends TransformerLayer by adding hyper connection modules around self-attention
+    and MLP. The n-stream hidden states are aggregated before each sub-layer and
+    expanded back afterwards using learned mappings (H_pre, H_post, H_res).
+
+    Cross-attention hyper connection is not supported.
+    """
+
+    def __init__(
+        self,
+        config: TransformerConfig,
+        submodules: TransformerLayerSubmodules,
+        layer_number: int = 1,
+        hidden_dropout: Optional[float] = None,
+        pg_collection: Optional[ProcessGroupCollection] = None,
+        vp_stage: Optional[int] = None,
+    ):
+        super().__init__(
+            config=config,
+            submodules=submodules,
+            layer_number=layer_number,
+            hidden_dropout=hidden_dropout,
+            pg_collection=pg_collection,
+            vp_stage=vp_stage,
+        )
+
+        if submodules.cross_attention_hyper_connection is not IdentityOp:
+            raise ValueError(
+                "HyperConnectionTransformerLayer does not support cross-attention "
+                "hyper connections. Use IdentityOp for cross_attention_hyper_connection."
+            )
+
+        assert submodules.self_attention_hyper_connection is not IdentityOp, (
+            "HyperConnectionTransformerLayer requires self_attention_hyper_connection. "
+            "Use TransformerLayer instead if hyper connections are not needed."
+        )
+        assert submodules.mlp_hyper_connection is not IdentityOp, (
+            "HyperConnectionTransformerLayer requires mlp_hyper_connection. "
+            "Use TransformerLayer instead if hyper connections are not needed."
+        )
+
+        self.self_attention_hyper_connection = build_module(
+            submodules.self_attention_hyper_connection,
+            config=self.config,
+            layer_number=self.layer_number,
+        )
+
+        self.mlp_hyper_connection = build_module(
+            submodules.mlp_hyper_connection, config=self.config, layer_number=self.layer_number
+        )
+
+        # When mHC recompute is active, skip checkpointing if the layernorm
+        # is IdentityOp (fused into TE linear) — there is nothing to recompute.
+        self.mhc_checkpoint_input_layernorm = not isinstance(self.input_layernorm, IdentityOp)
+        self.mhc_checkpoint_pre_mlp_layernorm = not isinstance(self.pre_mlp_layernorm, IdentityOp)
+
+    def get_layer_static_inputs(self, seq_length, micro_batch_size):
+        """Override to produce n-stream hidden_states of shape [s, b, n*C].
+
+        CUDA graph capture creates static buffers whose shapes are determined by
+        this method. The base class returns [s, b, C], but mHC layers operate on
+        n-stream hidden states of shape [s, b, n*C].
+        """
+        static_inputs = super().get_layer_static_inputs(seq_length, micro_batch_size)
+        hs = static_inputs["hidden_states"]
+        n = self.config.num_residual_streams
+        static_inputs["hidden_states"] = torch.ones(
+            (hs.shape[0], hs.shape[1], n * self.config.hidden_size),
+            dtype=hs.dtype,
+            requires_grad=hs.requires_grad,
+            device=hs.device,
+        )
+        return static_inputs
+
+    def _get_submodules_under_cudagraphs(self):
+        """Override to include hyper connection modules.
+
+        The base TransformerLayer._get_submodules_under_cudagraphs does not include
+        self_attention_hyper_connection / mlp_hyper_connection. Their learnable
+        parameters (mapping_proj, alpha_*, bias) need manual pre-forward hooks
+        during CUDA graph replay so that parameter all-gathers are triggered.
+        """
+        submodules = super()._get_submodules_under_cudagraphs()
+
+        if not self.config.cuda_graph_scope:
+            return submodules
+
+        if CudaGraphScope.attn in self.config.cuda_graph_scope:
+            submodules.append(self.self_attention_hyper_connection)
+        if (not self.is_moe_layer and CudaGraphScope.mlp in self.config.cuda_graph_scope) or (
+            self.is_moe_layer and CudaGraphScope.moe in self.config.cuda_graph_scope
+        ):
+            submodules.append(self.mlp_hyper_connection)
+        return submodules
+
+    def forward(self, *args, **kwargs):
+        """Forward pass with MHC recompute manager support."""
+        kwargs.pop("dynamic_inference_decode_only", None)
+
+        mhc_recompute_manager = getattr(self, '_mhc_recompute_manager', None)
+
+        hidden_states, context = self._forward_attention(
+            *args, mhc_recompute_manager=mhc_recompute_manager, **kwargs
+        )
+
+        output = self._forward_mlp(
+            hidden_states,
+            kwargs.get("inference_context", None),
+            padding_mask=kwargs.get("padding_mask", None),
+            mhc_recompute_manager=mhc_recompute_manager,
+        )
+        return output, context
+
+    def _forward_attention(
+        self,
+        hidden_states: Tensor,
+        attention_mask: Optional[Tensor] = None,
+        context: Optional[Tensor] = None,
+        context_mask: Optional[Tensor] = None,
+        rotary_pos_emb: Optional[Tensor] = None,
+        rotary_pos_cos: Optional[Tensor] = None,
+        rotary_pos_sin: Optional[Tensor] = None,
+        rotary_pos_cos_sin: Optional[Tensor] = None,
+        attention_bias: Optional[Tensor] = None,
+        inference_context: Optional[Any] = None,
+        packed_seq_params: Optional[PackedSeqParams] = None,
+        sequence_len_offset: Optional[Tensor] = None,
+        padding_mask: Optional[Tensor] = None,
+        mhc_recompute_manager: Optional['CheckpointManager'] = None,
+        *,
+        inference_params: Optional[Any] = None,
+    ):
+        from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
+            FineGrainedActivationOffloadingInterface as off_interface,
+        )
+
+        """Forward attention with hyper connection pre/post processing on self-attention."""
+        inference_context = deprecate_inference_params(inference_context, inference_params)
+
+        residual = hidden_states
+
+        nvtx_range_push(suffix="self_attention_hyper_connection")
+        hidden_states, self_attn_h_res, self_attn_hc_h_post = self.self_attention_hyper_connection(
+            hidden_states, mhc_recompute_manager=mhc_recompute_manager
+        )
+        nvtx_range_pop(suffix="self_attention_hyper_connection")
+
+        # Optional Input Layer norm
+        checkpoint_input_layernorm = self.recompute_input_layernorm or (
+            mhc_recompute_manager is not None and self.mhc_checkpoint_input_layernorm
+        )
+        if checkpoint_input_layernorm:
+            self.input_layernorm_checkpoint = tensor_parallel.CheckpointWithoutOutput(
+                ckpt_manager=mhc_recompute_manager
+            )
+            with off_interface(self.offload_attn_norm, hidden_states, "attn_norm") as hidden_states:
+                input_layernorm_output = self.input_layernorm_checkpoint.checkpoint(
+                    self.input_layernorm, hidden_states
+                )
+        else:
+            with off_interface(self.offload_attn_norm, hidden_states, "attn_norm") as hidden_states:
+                input_layernorm_output = self.input_layernorm(hidden_states)
+
+        # Self attention.
+        nvtx_range_push(suffix="self_attention")
+        attention_output_with_bias = self.self_attention(
+            input_layernorm_output,
+            attention_mask=attention_mask,
+            inference_context=inference_context,
+            rotary_pos_emb=rotary_pos_emb,
+            rotary_pos_cos=rotary_pos_cos,
+            rotary_pos_sin=rotary_pos_sin,
+            rotary_pos_cos_sin=rotary_pos_cos_sin,
+            attention_bias=attention_bias,
+            packed_seq_params=packed_seq_params,
+            sequence_len_offset=sequence_len_offset,
+        )
+        nvtx_range_pop(suffix="self_attention")
+
+        if checkpoint_input_layernorm:
+            self.input_layernorm_checkpoint.discard_output_and_register_recompute(
+                attention_output_with_bias[0]
+            )
+
+        nvtx_range_push(suffix="self_attention_fused_h_res_h_post_bda")
+        with self.bias_dropout_add_exec_handler():
+            hidden_states = self.self_attention_hyper_connection.fused_h_res_h_post_bda(
+                self_attn_h_res,
+                residual,
+                self_attn_hc_h_post,
+                attention_output_with_bias,
+                self.hidden_dropout,
+                self.training,
+                self.config.bias_dropout_fusion,
+                mhc_recompute_manager,
+            )
+        nvtx_range_pop(suffix="self_attention_fused_h_res_h_post_bda")
+
+        if self.offload_attn_norm:
+            hidden_states = off_interface.group_commit(hidden_states, name="attn_norm")
+
+        # Cross-attention (no hyper connection support).
+        residual = hidden_states
+        pre_cross_attn_layernorm_output = self.pre_cross_attn_layernorm(hidden_states)
+
+        attention_output_with_bias = self.cross_attention(
+            pre_cross_attn_layernorm_output,
+            attention_mask=context_mask,
+            key_value_states=context,
+            inference_context=inference_context,
+        )
+
+        if isinstance(attention_output_with_bias, dict) and "context" in attention_output_with_bias:
+            context = attention_output_with_bias["context"]
+
+        with self.bias_dropout_add_exec_handler():
+            hidden_states = self.cross_attn_bda(self.training, self.config.bias_dropout_fusion)(
+                attention_output_with_bias, residual, self.hidden_dropout
+            )
+
+        return hidden_states, context
+
+    def _forward_mlp(
+        self,
+        hidden_states,
+        inference_context=None,
+        padding_mask=None,
+        mhc_recompute_manager: Optional['CheckpointManager'] = None,
+    ):
+        from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
+            FineGrainedActivationOffloadingInterface as off_interface,
+        )
+
+        """Forward MLP with hyper connection pre/post processing."""
+        is_last_in_recompute_block = bool(
+            mhc_recompute_manager is not None
+            and getattr(mhc_recompute_manager, "is_last_layer_in_recompute_block", False)
+        )
+        mhc_mlp_bda_manager = None if is_last_in_recompute_block else mhc_recompute_manager
+
+        residual = hidden_states
+
+        nvtx_range_push(suffix="mlp_hyper_connection")
+        hidden_states, mlp_h_res, mlp_hc_h_post = self.mlp_hyper_connection(
+            hidden_states, mhc_recompute_manager=mhc_recompute_manager
+        )
+        nvtx_range_pop(suffix="mlp_hyper_connection")
+
+        # Optional Layer norm post the cross-attention.
+        checkpoint_pre_mlp_layernorm = self.recompute_pre_mlp_layernorm or (
+            mhc_recompute_manager is not None and self.mhc_checkpoint_pre_mlp_layernorm
+        )
+        if checkpoint_pre_mlp_layernorm:
+            self.pre_mlp_norm_checkpoint = tensor_parallel.CheckpointWithoutOutput(
+                ckpt_manager=mhc_recompute_manager
+            )
+            with off_interface(self.offload_mlp_norm, hidden_states, "mlp_norm") as hidden_states:
+                pre_mlp_layernorm_output = self.pre_mlp_norm_checkpoint.checkpoint(
+                    self.pre_mlp_layernorm, hidden_states
+                )
+        else:
+            with off_interface(self.offload_mlp_norm, hidden_states, "mlp_norm") as hidden_states:
+                pre_mlp_layernorm_output = self.pre_mlp_layernorm(hidden_states)
+
+        nvtx_range_push(suffix="mlp")
+        should_chunk_mlp_for_prefill = (
+            self.config.mlp_chunks_for_prefill > 1
+            and inference_context is not None
+            and not inference_context.is_decode_only()
+            and not isinstance(self.mlp, IdentityOp)
+            and not self.config.transformer_impl == "inference_optimized"
+        )
+
+        if self.recompute_mlp:
+            if self.config.fp8 or self.config.fp4:
+                from megatron.core.extensions.transformer_engine import te_checkpoint
+
+                mlp_output_with_bias = te_checkpoint(
+                    self.mlp,
+                    False,
+                    tensor_parallel.random.get_cuda_rng_tracker,
+                    self.pg_collection.tp,
+                    pre_mlp_layernorm_output,
+                    padding_mask=padding_mask,
+                )
+            else:
+                mlp_output_with_bias = tensor_parallel.checkpoint(
+                    functools.partial(self.mlp, padding_mask=padding_mask),
+                    False,
+                    pre_mlp_layernorm_output,
+                )
+        elif should_chunk_mlp_for_prefill:
+            num_chunks = min(self.config.mlp_chunks_for_prefill, pre_mlp_layernorm_output.shape[0])
+            chunks = pre_mlp_layernorm_output.chunk(num_chunks, dim=0)
+            outputs = [self.mlp(chunk) for chunk in chunks]
+            mlp_output = torch.cat([out for out, _ in outputs], dim=0)
+            bias_chunks = [bias for _, bias in outputs if bias is not None]
+            bias_output = torch.stack(bias_chunks, dim=0).sum(dim=0) if bias_chunks else None
+            mlp_output_with_bias = (mlp_output, bias_output)
+        else:
+            mlp_output_with_bias = self.mlp(pre_mlp_layernorm_output, padding_mask=padding_mask)
+
+        nvtx_range_pop(suffix="mlp")
+
+        return self._forward_post_mlp_with_fused_hyper_connection(
+            mlp_output_with_bias, mlp_h_res, residual, mlp_hc_h_post, mhc_mlp_bda_manager
+        )
+
+    def _forward_post_mlp_with_fused_hyper_connection(
+        self,
+        mlp_output_with_bias,
+        mlp_h_res,
+        residual,
+        mlp_hc_h_post,
+        mhc_mlp_bda_recompute_manager: Optional['CheckpointManager'] = None,
+    ):
+        """
+        Perform operations after the MLP computation with fused hyper connection kernel.
+
+        This method uses the fused kernel combining apply_h_res, apply_h_post and bias-dropout-add.
+
+        Args:
+            mlp_output_with_bias (Tensor): Output tensor of the MLP layer with bias.
+            mlp_h_res (Tensor): [s, b, n, n] - residual mixing matrix from hyper connection.
+            residual (Tensor): [s, b, n*C] - original residual (n-stream hidden states).
+            mlp_hc_h_post (Tensor): [s, b, n] - expansion weights from hyper connection.
+            mhc_recompute_manager: Optional CheckpointManager for checkpoint management.
+
+        Returns:
+            output (Tensor): Transformed hidden states of shape [s, b, h].
+        """
+        if self.recompute_pre_mlp_layernorm or (
+            mhc_mlp_bda_recompute_manager is not None and self.mhc_checkpoint_pre_mlp_layernorm
+        ):
+            self.pre_mlp_norm_checkpoint.discard_output_and_register_recompute(
+                mlp_output_with_bias[0]
+            )
+
+        nvtx_range_push(suffix="mlp_fused_h_res_h_post_bda")
+        with self.bias_dropout_add_exec_handler():
+            hidden_states = self.mlp_hyper_connection.fused_h_res_h_post_bda(
+                mlp_h_res,
+                residual,
+                mlp_hc_h_post,
+                mlp_output_with_bias,
+                self.hidden_dropout,
+                self.training,
+                self.config.bias_dropout_fusion,
+                mhc_mlp_bda_recompute_manager,
+            )
+        nvtx_range_pop(suffix="mlp_fused_h_res_h_post_bda")
+
+        if self.offload_mlp_norm:
+            from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
+                FineGrainedActivationOffloadingInterface as off_interface,
+            )
+
+            hidden_states = off_interface.group_commit(hidden_states, name="mlp_norm")
+
+        output = make_viewless_tensor(
+            inp=hidden_states, requires_grad=hidden_states.requires_grad, keep_graph=True
+        )
+        return output
+
+
 class MoETransformerLayer(TransformerLayer):
     """
     A Transformer layer specialized for Mixture-of-Experts (MoE) architectures.
diff --git a/megatron/training/initialize.py b/megatron/training/initialize.py
index c150ac3d5ca..80d0764bdf7 100644
--- a/megatron/training/initialize.py
+++ b/megatron/training/initialize.py
@@ -23,7 +23,7 @@
     initialize_rerun_state_machine,
 )
 from megatron.core.transformer.custom_layers.batch_invariant_kernels import enable_batch_invariant_mode
-from megatron.core.utils import get_te_version, is_te_min_version, is_torch_min_version
+from megatron.core.utils import configure_nvtx_profiling, get_te_version, is_te_min_version, is_torch_min_version
 from megatron.legacy import fused_kernels
 from megatron.training import get_adlr_autoresume, get_args, get_tensorboard_writer
 from megatron.training.utils import print_rank_0, warn_rank_0
@@ -122,6 +122,12 @@ def state_restore_func(state_dict):
         print_rank_0("Enabling batch invariant mode globally")
         enable_batch_invariant_mode()
 
+    # Enable NVTX range profiling when profiling is active.
+    # Must be done before model modules with @nvtx_decorator are imported,
+    # since the decorator captures _nvtx_enabled at decoration (import) time.
+    if args.profile:
+        configure_nvtx_profiling(True)
+
     # torch.distributed initialization
     def finish_mpu_init():
         args = get_args()
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mhc/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mhc/golden_values_dev_dgx_h100.json
new file mode 100644
index 00000000000..dc905f25c06
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mhc/golden_values_dev_dgx_h100.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.86032,
+            "2": 10.85379,
+            "3": 10.86576,
+            "4": 10.84522,
+            "5": 10.88381,
+            "6": 10.89591,
+            "7": 10.87181,
+            "8": 10.86499,
+            "9": 10.86909,
+            "10": 10.83611,
+            "11": 10.89392,
+            "12": 10.87885,
+            "13": 10.87633,
+            "14": 10.9031,
+            "15": 10.83062,
+            "16": 10.83399,
+            "17": 10.80009,
+            "18": 10.82035,
+            "19": 10.81427,
+            "20": 10.71811,
+            "21": 10.68666,
+            "22": 10.5322,
+            "23": 10.70546,
+            "24": 10.58584,
+            "25": 10.51963,
+            "26": 10.58548,
+            "27": 10.60203,
+            "28": 10.53634,
+            "29": 10.57208,
+            "30": 10.33312,
+            "31": 10.05931,
+            "32": 10.42892,
+            "33": 10.42115,
+            "34": 10.17094,
+            "35": 10.23176,
+            "36": 10.1883,
+            "37": 10.31328,
+            "38": 10.14298,
+            "39": 10.38218,
+            "40": 10.04918,
+            "41": 10.10427,
+            "42": 10.17245,
+            "43": 9.78375,
+            "44": 9.91054,
+            "45": 9.78577,
+            "46": 9.7695,
+            "47": 10.10153,
+            "48": 9.81025,
+            "49": 9.48829,
+            "50": 9.8677
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1667.0,
+            "2": 1718.0,
+            "3": 1638.0,
+            "4": 1881.0,
+            "5": 1927.0,
+            "6": 1792.0,
+            "7": 1783.0,
+            "8": 1580.0,
+            "9": 1935.0,
+            "10": 1407.0,
+            "11": 1831.0,
+            "12": 1662.0,
+            "13": 1870.0,
+            "14": 1777.0,
+            "15": 1930.0,
+            "16": 1794.0,
+            "17": 1932.0,
+            "18": 1631.0,
+            "19": 1806.0,
+            "20": 1566.0,
+            "21": 1853.0,
+            "22": 1622.0,
+            "23": 2077.0,
+            "24": 1592.0,
+            "25": 1628.0,
+            "26": 1677.0,
+            "27": 1791.0,
+            "28": 1979.0,
+            "29": 2020.0,
+            "30": 1914.0,
+            "31": 1597.0,
+            "32": 1886.0,
+            "33": 2287.0,
+            "34": 1836.0,
+            "35": 1981.0,
+            "36": 1882.0,
+            "37": 2505.0,
+            "38": 2114.0,
+            "39": 2438.0,
+            "40": 2204.0,
+            "41": 2287.0,
+            "42": 2344.0,
+            "43": 2069.0,
+            "44": 2148.0,
+            "45": 2190.0,
+            "46": 2312.0,
+            "47": 2545.0,
+            "48": 2494.0,
+            "49": 2296.0,
+            "50": 2395.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 537920000.0,
+            "2": 537920000.0,
+            "3": 537920000.0,
+            "4": 537920000.0,
+            "5": 537920000.0,
+            "6": 537920000.0,
+            "7": 537920000.0,
+            "8": 537920000.0,
+            "9": 537920000.0,
+            "10": 537920000.0,
+            "11": 537920000.0,
+            "12": 537920000.0,
+            "13": 537920000.0,
+            "14": 537920000.0,
+            "15": 537920000.0,
+            "16": 537920000.0,
+            "17": 537920000.0,
+            "18": 537920000.0,
+            "19": 537920000.0,
+            "20": 537920000.0,
+            "21": 537920000.0,
+            "22": 537920000.0,
+            "23": 537920000.0,
+            "24": 537920000.0,
+            "25": 537920000.0,
+            "26": 537920000.0,
+            "27": 537920000.0,
+            "28": 537920000.0,
+            "29": 537920000.0,
+            "30": 537920000.0,
+            "31": 537920000.0,
+            "32": 537920000.0,
+            "33": 537920000.0,
+            "34": 537920000.0,
+            "35": 537920000.0,
+            "36": 537920000.0,
+            "37": 537920000.0,
+            "38": 537920000.0,
+            "39": 537920000.0,
+            "40": 537920000.0,
+            "41": 537920000.0,
+            "42": 537920000.0,
+            "43": 537920000.0,
+            "44": 537920000.0,
+            "45": 537920000.0,
+            "46": 537920000.0,
+            "47": 537920000.0,
+            "48": 537920000.0,
+            "49": 537920000.0,
+            "50": 537920000.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1225688576.0,
+            "2": 1408955904.0,
+            "3": 1408955904.0,
+            "4": 1408955904.0,
+            "5": 1408955904.0,
+            "6": 1408955904.0,
+            "7": 1408955904.0,
+            "8": 1408955904.0,
+            "9": 1408955904.0,
+            "10": 1408955904.0,
+            "11": 1408955904.0,
+            "12": 1408955904.0,
+            "13": 1408955904.0,
+            "14": 1408955904.0,
+            "15": 1408955904.0,
+            "16": 1408955904.0,
+            "17": 1408955904.0,
+            "18": 1408955904.0,
+            "19": 1408955904.0,
+            "20": 1408955904.0,
+            "21": 1408955904.0,
+            "22": 1408955904.0,
+            "23": 1408955904.0,
+            "24": 1408955904.0,
+            "25": 1408955904.0,
+            "26": 1408955904.0,
+            "27": 1408955904.0,
+            "28": 1408955904.0,
+            "29": 1408955904.0,
+            "30": 1408955904.0,
+            "31": 1408955904.0,
+            "32": 1408955904.0,
+            "33": 1408955904.0,
+            "34": 1408955904.0,
+            "35": 1408955904.0,
+            "36": 1408955904.0,
+            "37": 1408955904.0,
+            "38": 1408955904.0,
+            "39": 1408955904.0,
+            "40": 1408955904.0,
+            "41": 1408955904.0,
+            "42": 1408955904.0,
+            "43": 1408955904.0,
+            "44": 1408955904.0,
+            "45": 1408955904.0,
+            "46": 1408955904.0,
+            "47": 1408955904.0,
+            "48": 1408955904.0,
+            "49": 1408955904.0,
+            "50": 1408955904.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": 23.32725,
+            "3": 0.64935,
+            "4": 0.63773,
+            "5": 0.63792,
+            "6": 0.63776,
+            "7": 0.63937,
+            "8": 0.64046,
+            "9": 0.6361,
+            "10": 0.64423,
+            "11": 0.64021,
+            "12": 0.63952,
+            "13": 0.6451,
+            "14": 0.63986,
+            "15": 0.64096,
+            "16": 0.64001,
+            "17": 0.63996,
+            "18": 0.63814,
+            "19": 0.64219,
+            "20": 0.64081,
+            "21": 0.63784,
+            "22": 0.64101,
+            "23": 0.64231,
+            "24": 0.63904,
+            "25": 0.64041,
+            "26": 0.64744,
+            "27": 0.64738,
+            "28": 0.64182,
+            "29": 0.64714,
+            "30": 0.64337,
+            "31": 0.64627,
+            "32": 0.64639,
+            "33": 0.64426,
+            "34": 0.64469,
+            "35": 0.64416,
+            "36": 0.64898,
+            "37": 0.64103,
+            "38": 0.64541,
+            "39": 0.6467,
+            "40": 0.64896,
+            "41": 0.64438,
+            "42": 0.64755,
+            "43": 0.64706,
+            "44": 0.64706,
+            "45": 0.64435,
+            "46": 0.64608,
+            "47": 0.64784,
+            "48": 0.6453,
+            "49": 0.64942,
+            "50": 0.644
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mhc/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mhc/model_config.yaml
new file mode 100644
index 00000000000..686c8bdbb59
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mhc/model_config.yaml
@@ -0,0 +1,62 @@
+ENV_VARS:
+  CUDA_DEVICE_MAX_CONNECTIONS: 1
+  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
+  NCCL_ALGO: Ring
+  CUBLAS_WORKSPACE_CONFIG: :4096:8
+MODEL_ARGS:
+  --num-layers: 12
+  --hidden-size: 512
+  --num-attention-heads: 8
+  --log-params-norm: true
+  --log-num-zeros-in-grad: true
+  --log-validation-ppl-to-tensorboard: true
+  --log-timers-to-tensorboard: true
+  --tensorboard-dir: ${TENSORBOARD_PATH}
+  --micro-batch-size: 4
+  --global-batch-size: 32
+  --seq-length: 1024
+  --max-position-embeddings: 1024
+  --train-iters: 50
+  --timing-log-level: 0
+  --lr-decay-iters: 320000
+  --save: ${CHECKPOINT_SAVE_PATH}
+  --load: ${CHECKPOINT_LOAD_PATH}
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
+  --split: 949,50,1
+  --distributed-backend: nccl
+  --lr: 0.00015
+  --lr-decay-style: cosine
+  --min-lr: 1.0e-5
+  --weight-decay: 1e-2
+  --clip-grad: 1.0
+  --lr-warmup-fraction: .01
+  --log-interval: 1
+  --save-interval: 25
+  --eval-interval: 50
+  --eval-iters: 50
+  --transformer-impl: transformer_engine
+  --tensor-model-parallel-size: 2
+  --pipeline-model-parallel-size: 2
+  --deterministic-mode: true
+  --no-gradient-accumulation-fusion: true
+  --attention-softmax-in-fp32: true
+  --use-mcore-models: true
+  --ckpt-format: torch_dist
+  --dist-ckpt-optim-fully-reshardable: true
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
+  --data-cache-path: ${DATA_CACHE_PATH}
+  --bf16: true
+  --attention-backend: unfused
+  --sequence-parallel: true
+  --log-memory-to-tensorboard: true
+  --enable-hyper-connections: true
+  --num-residual-streams: 4
+  --mhc-sinkhorn-iterations: 20
+  --mhc-init-gating-factor: 0.01
+  --recompute-granularity: selective
+  --recompute-modules: "[mhc]"
+  --mhc-recompute-layer-num: 2
+  --exit-interval: 50
+TEST_TYPE: ckpt-resume
diff --git a/tests/test_utils/recipes/h100/gpt.yaml b/tests/test_utils/recipes/h100/gpt.yaml
index 52e38760f84..9062a3f4471 100644
--- a/tests/test_utils/recipes/h100/gpt.yaml
+++ b/tests/test_utils/recipes/h100/gpt.yaml
@@ -347,6 +347,11 @@ products:
       - environment: [dev]
         scope: [mr, mr-github, mr-github-slim]
         platforms: [dgx_h100]
+  - test_case: [gpt3_mcore_te_tp2_pp2_mhc]
+    products:
+      - environment: [dev]
+        scope: [mr, mr-github]
+        platforms: [dgx_h100]
   - test_case: [gpt3_mcore_te_tp2_pp2_mla]
     products:
       - environment: [dev]
diff --git a/tests/unit_tests/models/test_gpt_layer_specs.py b/tests/unit_tests/models/test_gpt_layer_specs.py
new file mode 100644
index 00000000000..bfa86fd0241
--- /dev/null
+++ b/tests/unit_tests/models/test_gpt_layer_specs.py
@@ -0,0 +1,67 @@
+# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+import pytest
+
+from megatron.core.models.gpt.gpt_layer_specs import (
+    get_gpt_layer_local_spec,
+    get_gpt_layer_with_transformer_engine_spec,
+)
+from megatron.core.transformer.hyper_connection import HyperConnectionModule
+from megatron.core.transformer.identity_op import IdentityOp
+from megatron.core.transformer.transformer_layer import (
+    HyperConnectionTransformerLayer,
+    TransformerLayer,
+)
+
+_TE = get_gpt_layer_with_transformer_engine_spec
+_LOCAL = get_gpt_layer_local_spec
+_HC = HyperConnectionTransformerLayer
+_HC_MOD = HyperConnectionModule
+_TL = TransformerLayer
+_ID = IdentityOp
+
+
+class TestGptLayerSpecsHyperConnection:
+    """Test that enable_hyper_connection controls module types in layer specs."""
+
+    @pytest.mark.parametrize(
+        "factory,kwargs,expected_module,expected_hc",
+        [
+            (_TE, {}, _TL, _ID),
+            (_TE, {"enable_hyper_connection": True}, _HC, _HC_MOD),
+            (_TE, {"enable_hyper_connection": False}, _TL, _ID),
+            (_TE, {"multi_latent_attention": True, "enable_hyper_connection": False}, _TL, _ID),
+            (_TE, {"multi_latent_attention": True, "enable_hyper_connection": True}, _HC, _HC_MOD),
+            (_LOCAL, {}, _TL, _ID),
+            (_LOCAL, {"enable_hyper_connection": True}, _HC, _HC_MOD),
+            (_LOCAL, {"enable_hyper_connection": False}, _TL, _ID),
+            (_LOCAL, {"multi_latent_attention": True, "enable_hyper_connection": False}, _TL, _ID),
+            (
+                _LOCAL,
+                {"multi_latent_attention": True, "enable_hyper_connection": True},
+                _HC,
+                _HC_MOD,
+            ),
+            (_LOCAL, {"normalization": "RMSNorm", "enable_hyper_connection": False}, _TL, _ID),
+            (_LOCAL, {"normalization": "RMSNorm", "enable_hyper_connection": True}, _HC, _HC_MOD),
+        ],
+        ids=[
+            "te_default",
+            "te_enable",
+            "te_disable",
+            "te_mla_disable",
+            "te_mla_enable",
+            "local_default",
+            "local_enable",
+            "local_disable",
+            "local_mla_disable",
+            "local_mla_enable",
+            "local_rmsnorm_disable",
+            "local_rmsnorm_enable",
+        ],
+    )
+    def test_hyper_connection_spec(self, factory, kwargs, expected_module, expected_hc):
+        spec = factory(**kwargs)
+        assert spec.module is expected_module
+        assert spec.submodules.self_attention_hyper_connection is expected_hc
+        assert spec.submodules.mlp_hyper_connection is expected_hc
diff --git a/tests/unit_tests/models/test_mamba_moe_model.py b/tests/unit_tests/models/test_mamba_moe_model.py
index 5ecd4e92d80..2524b3ade50 100644
--- a/tests/unit_tests/models/test_mamba_moe_model.py
+++ b/tests/unit_tests/models/test_mamba_moe_model.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 import hashlib
 import inspect
@@ -88,6 +88,7 @@
     "embedding_init_method_std": 0.014,
     "enable_autocast": False,
     "enable_cuda_graph": False,
+    "enable_hyper_connections": False,
     "ep_overlap_early_attn_memory_release": False,
     "experimental_attention_variant": None,
     "expert_model_parallel_size": 4,
@@ -149,6 +150,9 @@
     "mamba_state_dim": 128,
     "masked_softmax_fusion": True,
     "memory_efficient_layer_norm": False,
+    "mhc_init_gating_factor": 0.01,
+    "mhc_recompute_layer_num": None,
+    "mhc_sinkhorn_iterations": 20,
     "microbatch_group_size_per_vp_stage": 1,
     "mlp_chunks_for_prefill": 1,
     "moe_apply_probs_on_input": False,
@@ -212,6 +216,7 @@
     "num_microbatches_with_partial_activation_checkpoints": None,
     "num_moe_experts": 128,
     "num_query_groups": 2,
+    "num_residual_streams": 4,
     "output_layer_init_method": {},
     "overlap_moe_expert_parallel_comm": False,
     "overlap_p2p_comm": False,
diff --git a/tests/unit_tests/pipeline_parallel/test_pp_mhc_compatibility.py b/tests/unit_tests/pipeline_parallel/test_pp_mhc_compatibility.py
new file mode 100644
index 00000000000..eda8ffe7df4
--- /dev/null
+++ b/tests/unit_tests/pipeline_parallel/test_pp_mhc_compatibility.py
@@ -0,0 +1,1123 @@
+# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+"""
+Unit tests for PP / VPP + mHC (Hyper Connections) compatibility.
+
+Tests cover:
+1. get_tensor_shapes: shape correctness with mHC for all PP stages
+2. get_num_layers_to_build: layer counts with standalone embedding/loss + mHC
+3. TransformerBlock expand/contract: correct placement at PP boundaries
+4. VPP tensor_shape: single shape used across all chunks with mHC
+5. E2E forward pass: PP + mHC + standalone embedding/loss (multi-GPU)
+6. Flexible VPP layout (pipeline_model_parallel_layout) + mHC compatibility
+
+Run with:
+    uv run --no-sync pytest tests/unit_tests/pipeline_parallel/test_pp_mhc_compatibility.py -s -x
+    # Multi-GPU tests (world_size >= 2):
+    torchrun --nproc-per-node=2 -m pytest tests/unit_tests/pipeline_parallel/test_pp_mhc_compatibility.py -s -x
+"""
+
+from types import SimpleNamespace
+from unittest.mock import MagicMock
+
+import pytest
+import torch
+
+from megatron.core import parallel_state
+from megatron.core.pipeline_parallel.schedules import get_tensor_shapes
+from megatron.core.transformer.hyper_connection import HyperConnectionModule
+from megatron.core.transformer.transformer_block import get_num_layers_to_build
+from megatron.core.transformer.transformer_config import TransformerConfig
+from tests.unit_tests.test_utilities import Utils
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _make_pp_group(rank: int, size: int):
+    """Create a mock PP process group with given rank and size."""
+    pg = MagicMock()
+    pg.rank.return_value = rank
+    pg.size.return_value = size
+    return pg
+
+
+def _make_tp_cp_groups(tp_size: int = 1, cp_size: int = 1):
+    tp = MagicMock()
+    tp.size.return_value = tp_size
+    cp = MagicMock()
+    cp.size.return_value = cp_size
+    return tp, cp
+
+
+def _get_send_recv_shapes(config, pp_size, seq=32, mbs=2):
+    """Get (send_shape, recv_shape) for each PP rank."""
+    tp, cp = _make_tp_cp_groups()
+    results = []
+    for rank in range(pp_size):
+        send = get_tensor_shapes(
+            seq_length=seq,
+            micro_batch_size=mbs,
+            decoder_seq_length=None,
+            config=config,
+            tp_group=tp,
+            cp_group=cp,
+            pp_group=_make_pp_group(rank, pp_size),
+            is_recv=False,
+        )
+        recv = get_tensor_shapes(
+            seq_length=seq,
+            micro_batch_size=mbs,
+            decoder_seq_length=None,
+            config=config,
+            tp_group=tp,
+            cp_group=cp,
+            pp_group=_make_pp_group(rank, pp_size),
+            is_recv=True,
+        )
+        results.append((send, recv))
+    return results
+
+
+def _make_config(
+    hidden_size=64,
+    num_layers=8,
+    pp_size=2,
+    vp_size=None,
+    enable_hyper_connections=False,
+    num_residual_streams=4,
+    account_for_embedding=False,
+    account_for_loss=False,
+    num_layers_first=None,
+    num_layers_last=None,
+    **extra,
+):
+    """Build a TransformerConfig for testing without initializing parallel state."""
+    kwargs = dict(
+        hidden_size=hidden_size,
+        num_layers=num_layers,
+        num_attention_heads=4,
+        pipeline_model_parallel_size=pp_size,
+        virtual_pipeline_model_parallel_size=vp_size,
+        enable_hyper_connections=enable_hyper_connections,
+        num_residual_streams=num_residual_streams,
+        account_for_embedding_in_pipeline_split=account_for_embedding,
+        account_for_loss_in_pipeline_split=account_for_loss,
+        num_layers_in_first_pipeline_stage=num_layers_first,
+        num_layers_in_last_pipeline_stage=num_layers_last,
+        use_cpu_initialization=True,
+    )
+    if pp_size > 1:
+        kwargs.setdefault('pipeline_dtype', torch.bfloat16)
+    kwargs.update(extra)
+    return TransformerConfig(**kwargs)
+
+
+# ===========================================================================
+# 1. get_tensor_shapes — shape correctness with mHC
+# ===========================================================================
+
+
+class TestGetTensorShapesWithMHC:
+    """Verify get_tensor_shapes returns correct hidden dim for mHC-enabled models."""
+
+    SEQ, MBS, H = 32, 2, 64
+    N_STREAMS = 4
+
+    def _shapes(self, config, pp_rank, pp_size, is_recv):
+        tp, cp = _make_tp_cp_groups()
+        pp = _make_pp_group(pp_rank, pp_size)
+        return get_tensor_shapes(
+            seq_length=self.SEQ,
+            micro_batch_size=self.MBS,
+            decoder_seq_length=None,
+            config=config,
+            tp_group=tp,
+            cp_group=cp,
+            pp_group=pp,
+            is_recv=is_recv,
+        )
+
+    # --- Without mHC (baseline) ---
+
+    def test_no_mhc_pp2_all_stages(self):
+        cfg = _make_config(hidden_size=self.H, pp_size=2, enable_hyper_connections=False)
+        for rank in range(2):
+            for is_recv in (True, False):
+                shapes = self._shapes(cfg, rank, 2, is_recv)
+                assert shapes == [(self.SEQ, self.MBS, self.H)]
+
+    # --- With mHC, PP=2 ---
+
+    def test_mhc_pp2_rank0_send_nstream(self):
+        """PP rank 0 sends n*C to rank 1."""
+        cfg = _make_config(
+            hidden_size=self.H,
+            pp_size=2,
+            enable_hyper_connections=True,
+            num_residual_streams=self.N_STREAMS,
+        )
+        shapes = self._shapes(cfg, pp_rank=0, pp_size=2, is_recv=False)
+        assert shapes == [(self.SEQ, self.MBS, self.H * self.N_STREAMS)]
+
+    def test_mhc_pp2_rank0_recv_1stream(self):
+        """PP rank 0 receives nothing from previous (is first stage), so shape = C."""
+        cfg = _make_config(
+            hidden_size=self.H,
+            pp_size=2,
+            enable_hyper_connections=True,
+            num_residual_streams=self.N_STREAMS,
+        )
+        shapes = self._shapes(cfg, pp_rank=0, pp_size=2, is_recv=True)
+        assert shapes == [(self.SEQ, self.MBS, self.H)]
+
+    def test_mhc_pp2_rank1_recv_nstream(self):
+        """PP rank 1 receives n*C from rank 0."""
+        cfg = _make_config(
+            hidden_size=self.H,
+            pp_size=2,
+            enable_hyper_connections=True,
+            num_residual_streams=self.N_STREAMS,
+        )
+        shapes = self._shapes(cfg, pp_rank=1, pp_size=2, is_recv=True)
+        assert shapes == [(self.SEQ, self.MBS, self.H * self.N_STREAMS)]
+
+    def test_mhc_pp2_rank1_send_1stream(self):
+        """PP rank 1 (last stage) sends C (after output_contract)."""
+        cfg = _make_config(
+            hidden_size=self.H,
+            pp_size=2,
+            enable_hyper_connections=True,
+            num_residual_streams=self.N_STREAMS,
+        )
+        shapes = self._shapes(cfg, pp_rank=1, pp_size=2, is_recv=False)
+        assert shapes == [(self.SEQ, self.MBS, self.H)]
+
+    # --- With mHC, PP=4 (intermediate ranks) ---
+
+    def test_mhc_pp4_intermediate_ranks(self):
+        """Intermediate ranks both send and receive n*C."""
+        cfg = _make_config(
+            hidden_size=self.H,
+            pp_size=4,
+            num_layers=8,
+            enable_hyper_connections=True,
+            num_residual_streams=self.N_STREAMS,
+        )
+        for rank in (1, 2):
+            for is_recv in (True, False):
+                shapes = self._shapes(cfg, pp_rank=rank, pp_size=4, is_recv=is_recv)
+                assert shapes == [
+                    (self.SEQ, self.MBS, self.H * self.N_STREAMS)
+                ], f"rank={rank}, is_recv={is_recv}"
+
+    # --- With sequence parallel ---
+
+    def test_mhc_with_sequence_parallel(self):
+        """Sequence parallel divides seq_length by TP size."""
+        cfg = _make_config(
+            hidden_size=self.H,
+            pp_size=2,
+            enable_hyper_connections=True,
+            num_residual_streams=self.N_STREAMS,
+            sequence_parallel=True,
+            tensor_model_parallel_size=2,
+        )
+        tp, cp = _make_tp_cp_groups(tp_size=2)
+        pp = _make_pp_group(0, 2)
+        shapes = get_tensor_shapes(
+            seq_length=self.SEQ,
+            micro_batch_size=self.MBS,
+            decoder_seq_length=None,
+            config=cfg,
+            tp_group=tp,
+            cp_group=cp,
+            pp_group=pp,
+            is_recv=False,
+        )
+        assert shapes == [(self.SEQ // 2, self.MBS, self.H * self.N_STREAMS)]
+
+
+# ===========================================================================
+# 2. get_num_layers_to_build — mHC + standalone embedding/loss
+# ===========================================================================
+
+
+class TestGetNumLayersToBuilWithMHC:
+    """
+    Verify layer counts are correct when mHC is combined with standalone
+    embedding / loss stages (account_for_embedding/loss_in_pipeline_split).
+    mHC itself doesn't change layer counts, but we need to ensure the
+    combination doesn't break.
+    """
+
+    def test_pp2_even_split_mhc(self):
+        cfg = _make_config(num_layers=8, pp_size=2, enable_hyper_connections=True)
+        assert get_num_layers_to_build(cfg, pp_rank=0) == 4
+        assert get_num_layers_to_build(cfg, pp_rank=1) == 4
+
+    def test_pp2_standalone_embedding_mhc(self):
+        """With standalone embedding on PP rank 0, rank 0 builds fewer layers."""
+        cfg = _make_config(
+            num_layers=8,
+            pp_size=2,
+            enable_hyper_connections=True,
+            account_for_embedding=True,
+            account_for_loss=True,
+        )
+        # (8 + 1 + 1) / 2 = 5 per rank
+        # rank 0: 5 - 1 (embedding) = 4 transformer layers
+        # rank 1: 5 - 1 (loss) = 4 transformer layers
+        assert get_num_layers_to_build(cfg, pp_rank=0) == 4
+        assert get_num_layers_to_build(cfg, pp_rank=1) == 4
+
+    def test_pp4_standalone_invalid_division_raises(self):
+        """PP=4, standalone embedding+loss, 12 layers → (12+2)/4=3.5 → raises."""
+        with pytest.raises((ValueError, AssertionError)):
+            _make_config(
+                num_layers=12,
+                pp_size=4,
+                enable_hyper_connections=True,
+                account_for_embedding=True,
+                account_for_loss=True,
+            )
+
+    def test_pp4_standalone_both_mhc_valid(self):
+        """Valid configuration: (14+2)/4 = 4 per rank."""
+        cfg = _make_config(
+            num_layers=14,
+            pp_size=4,
+            enable_hyper_connections=True,
+            account_for_embedding=True,
+            account_for_loss=True,
+        )
+        # rank 0: 4 - 1 (embedding) = 3
+        # rank 1, 2: 4
+        # rank 3: 4 - 1 (loss) = 3
+        assert get_num_layers_to_build(cfg, pp_rank=0) == 3
+        assert get_num_layers_to_build(cfg, pp_rank=1) == 4
+        assert get_num_layers_to_build(cfg, pp_rank=2) == 4
+        assert get_num_layers_to_build(cfg, pp_rank=3) == 3
+
+    def test_uneven_pp_with_mhc(self):
+        """Uneven PP: first stage has 2 layers, last has 2, middle gets 2 each."""
+        cfg = _make_config(
+            num_layers=8,
+            pp_size=4,
+            enable_hyper_connections=True,
+            num_layers_first=2,
+            num_layers_last=2,
+        )
+        assert get_num_layers_to_build(cfg, pp_rank=0) == 2
+        assert get_num_layers_to_build(cfg, pp_rank=1) == 2
+        assert get_num_layers_to_build(cfg, pp_rank=2) == 2
+        assert get_num_layers_to_build(cfg, pp_rank=3) == 2
+
+    def test_vpp_with_mhc(self):
+        """VPP=2 with mHC: each VP stage gets half the layers per rank."""
+        cfg = _make_config(num_layers=8, pp_size=2, vp_size=2, enable_hyper_connections=True)
+        for pp_rank in range(2):
+            for vp_stage in range(2):
+                n = get_num_layers_to_build(cfg, vp_stage=vp_stage, pp_rank=pp_rank)
+                assert n == 2, f"pp_rank={pp_rank}, vp_stage={vp_stage}, got {n}"
+
+    def test_vpp_standalone_embedding_loss_invalid_raises(self):
+        """VPP=2, standalone embedding+loss, pp=2, 8 layers → 10/2=5, 5%2!=0 → raises."""
+        with pytest.raises((ValueError, AssertionError)):
+            _make_config(
+                num_layers=8,
+                pp_size=2,
+                vp_size=2,
+                enable_hyper_connections=True,
+                account_for_embedding=True,
+                account_for_loss=True,
+            )
+
+    def test_vpp_standalone_both_valid_mhc(self):
+        """VPP=2, standalone embed+loss, pp=4, 14 layers → (14+2)/4=4, 4/2=2 per VP."""
+        cfg = _make_config(
+            num_layers=14,
+            pp_size=4,
+            vp_size=2,
+            enable_hyper_connections=True,
+            account_for_embedding=True,
+            account_for_loss=True,
+        )
+        # rank 0, vp 0: first PP + first VP → 2 - 1(embed) = 1
+        assert get_num_layers_to_build(cfg, vp_stage=0, pp_rank=0) == 1
+        # rank 0, vp 1: first PP + second VP → 2
+        assert get_num_layers_to_build(cfg, vp_stage=1, pp_rank=0) == 2
+        # rank 1-2: 2 per VP stage
+        for rank in (1, 2):
+            for vp in (0, 1):
+                assert get_num_layers_to_build(cfg, vp_stage=vp, pp_rank=rank) == 2
+        # rank 3, vp 0: 2
+        assert get_num_layers_to_build(cfg, vp_stage=0, pp_rank=3) == 2
+        # rank 3, vp 1: last PP + last VP → 2 - 1(loss) = 1
+        assert get_num_layers_to_build(cfg, vp_stage=1, pp_rank=3) == 1
+
+
+# ===========================================================================
+# 3. TransformerBlock expand/contract — boundary logic
+# ===========================================================================
+
+
+class TestTransformerBlockMHCBoundaries:
+    """
+    Test that TransformerBlock correctly applies input_expand at pre_process
+    and output_contract at the final layernorm stage.
+    These are pure tensor operation tests — no GPU or parallel state needed.
+    """
+
+    @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
+    def test_input_expand(self):
+        n = 4
+        s, b, C = 8, 2, 64
+        x = torch.randn(s, b, C, device='cuda')
+        expanded = HyperConnectionModule.input_expand(x, n)
+        assert expanded.shape == (s, b, n * C)
+        # Each stream should be a copy of input
+        for i in range(n):
+            torch.testing.assert_close(expanded[:, :, i * C : (i + 1) * C], x)
+
+    @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
+    def test_output_contract(self):
+        n = 4
+        s, b, C = 8, 2, 64
+        x = torch.randn(s, b, n * C, device='cuda')
+        contracted = HyperConnectionModule.output_contract(x, n)
+        assert contracted.shape == (s, b, C)
+        # Should be the mean of all n streams
+        expected = x.view(s, b, n, C).mean(dim=2)
+        torch.testing.assert_close(contracted, expected)
+
+    @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
+    def test_expand_then_contract_preserves_shape(self):
+        n = 4
+        s, b, C = 8, 2, 64
+        x = torch.randn(s, b, C, device='cuda')
+        expanded = HyperConnectionModule.input_expand(x, n)
+        contracted = HyperConnectionModule.output_contract(expanded, n)
+        assert contracted.shape == x.shape
+        # expand copies all streams → mean of identical streams = original
+        torch.testing.assert_close(contracted, x)
+
+
+# ===========================================================================
+# 3b. Zero-layer VP stage edge cases with mHC
+# ===========================================================================
+
+
+class TestZeroLayerVPStageWithMHC:
+    """
+    When standalone embedding/loss makes a VP stage have very few (1) transformer
+    layers, verify layer counts stay non-negative.
+    """
+
+    def test_vpp_standalone_embed_first_stage_has_1_layer(self):
+        """First VP stage at first PP rank should have exactly 1 layer (2-1=1)."""
+        cfg = _make_config(
+            num_layers=7,
+            pp_size=2,
+            vp_size=2,
+            enable_hyper_connections=True,
+            account_for_embedding=True,
+        )
+        n = get_num_layers_to_build(cfg, vp_stage=0, pp_rank=0)
+        assert n == 1
+        assert n >= 0
+
+    def test_vpp_standalone_loss_last_stage_has_1_layer(self):
+        """Last VP stage at last PP rank should have exactly 1 layer (2-1=1)."""
+        cfg = _make_config(
+            num_layers=7, pp_size=2, vp_size=2, enable_hyper_connections=True, account_for_loss=True
+        )
+        n = get_num_layers_to_build(cfg, vp_stage=1, pp_rank=1)
+        assert n == 1
+        assert n >= 0
+
+    def test_vpp_standalone_both_boundary_layers(self):
+        """Both first and last VP stages lose a layer, but all counts remain >= 0."""
+        cfg = _make_config(
+            num_layers=14,
+            pp_size=4,
+            vp_size=2,
+            enable_hyper_connections=True,
+            account_for_embedding=True,
+            account_for_loss=True,
+        )
+        for pp_rank in range(4):
+            for vp_stage in range(2):
+                n = get_num_layers_to_build(cfg, vp_stage=vp_stage, pp_rank=pp_rank)
+                assert n >= 0, f"pp_rank={pp_rank}, vp_stage={vp_stage} has {n} < 0 layers"
+
+
+# ===========================================================================
+# 4. VPP tensor_shape — single shape for all chunks
+# ===========================================================================
+
+
+class TestVPPTensorShapeWithMHC:
+    """
+    Verify that the interleaved schedule uses n*C for all P2P communication
+    when mHC is enabled with PP > 1.
+    """
+
+    def test_interleaved_tensor_shape_uses_nstream(self):
+        """Reproduce the logic in forward_backward_pipelining_with_interleaving."""
+        hidden_size = 64
+        n_streams = 4
+        pp_size = 2
+
+        config = SimpleNamespace(
+            hidden_size=hidden_size,
+            enable_hyper_connections=True,
+            num_residual_streams=n_streams,
+            sequence_parallel=False,
+        )
+
+        hidden_dim = config.hidden_size
+        if getattr(config, 'enable_hyper_connections', False) and pp_size > 1:
+            hidden_dim = config.hidden_size * getattr(config, 'num_residual_streams', 1)
+
+        assert hidden_dim == hidden_size * n_streams
+
+    def test_interleaved_tensor_shape_no_mhc(self):
+        """Without mHC, hidden_dim = hidden_size."""
+        hidden_size = 64
+        pp_size = 2
+
+        config = SimpleNamespace(
+            hidden_size=hidden_size, enable_hyper_connections=False, sequence_parallel=False
+        )
+
+        hidden_dim = config.hidden_size
+        if getattr(config, 'enable_hyper_connections', False) and pp_size > 1:
+            hidden_dim = config.hidden_size * getattr(config, 'num_residual_streams', 1)
+
+        assert hidden_dim == hidden_size
+
+    def test_interleaved_tensor_shape_pp1_mhc_no_expand(self):
+        """PP=1 with mHC: no P2P communication needed, no shape change."""
+        hidden_size = 64
+        n_streams = 4
+        pp_size = 1
+
+        config = SimpleNamespace(
+            hidden_size=hidden_size,
+            enable_hyper_connections=True,
+            num_residual_streams=n_streams,
+            sequence_parallel=False,
+        )
+
+        hidden_dim = config.hidden_size
+        if getattr(config, 'enable_hyper_connections', False) and pp_size > 1:
+            hidden_dim = config.hidden_size * getattr(config, 'num_residual_streams', 1)
+
+        assert hidden_dim == hidden_size
+
+
+# ===========================================================================
+# 5. Shape consistency across PP stages with VPP + mHC
+# ===========================================================================
+
+
+class TestPPShapeConsistencyWithMHC:
+    """
+    Verify that send shape from one stage matches recv shape of the next stage.
+    This is critical: a mismatch would cause a hang or crash in P2P communication.
+    """
+
+    def test_pp2_mhc_send_recv_match(self):
+        """Rank 0's send shape must match rank 1's recv shape."""
+        cfg = _make_config(hidden_size=64, pp_size=2, enable_hyper_connections=True)
+        shapes = _get_send_recv_shapes(cfg, 2)
+        assert (
+            shapes[0][0] == shapes[1][1]
+        ), f"rank 0 send {shapes[0][0]} != rank 1 recv {shapes[1][1]}"
+
+    def test_pp4_mhc_all_consecutive_match(self):
+        """For all consecutive stages, send[i] == recv[i+1]."""
+        cfg = _make_config(hidden_size=64, num_layers=8, pp_size=4, enable_hyper_connections=True)
+        shapes = _get_send_recv_shapes(cfg, 4)
+        for i in range(3):
+            assert (
+                shapes[i][0] == shapes[i + 1][1]
+            ), f"rank {i} send {shapes[i][0]} != rank {i+1} recv {shapes[i+1][1]}"
+
+    def test_pp4_no_mhc_all_consecutive_match(self):
+        """Baseline: without mHC, all shapes should be plain hidden_size."""
+        cfg = _make_config(hidden_size=64, num_layers=8, pp_size=4)
+        shapes = _get_send_recv_shapes(cfg, 4)
+        for i in range(3):
+            assert shapes[i][0] == shapes[i + 1][1]
+            assert shapes[i][0] == [(32, 2, 64)]
+
+
+# ===========================================================================
+# 6. Standalone embedding / loss — PP boundary + mHC interaction
+# ===========================================================================
+
+
+class TestStandaloneEmbeddingLossWithMHC:
+    """
+    Verify that standalone embedding/loss configurations interact correctly
+    with mHC tensor shapes and layer counting.
+    """
+
+    def test_standalone_embedding_first_stage_has_fewer_layers(self):
+        """With standalone embedding, first PP/VP stage gets 1 fewer layer."""
+        # 7 layers, pp=2, vp=2 → (7+1)/2=4, 4/2=2 per VP stage
+        cfg = _make_config(
+            num_layers=7,
+            pp_size=2,
+            vp_size=2,
+            enable_hyper_connections=True,
+            account_for_embedding=True,
+        )
+        # rank 0, vp 0: first stage → 2 - 1(embed) = 1
+        assert get_num_layers_to_build(cfg, vp_stage=0, pp_rank=0) == 1
+        # rank 0, vp 1: 2
+        assert get_num_layers_to_build(cfg, vp_stage=1, pp_rank=0) == 2
+        # rank 1: 2 each VP
+        assert get_num_layers_to_build(cfg, vp_stage=0, pp_rank=1) == 2
+        assert get_num_layers_to_build(cfg, vp_stage=1, pp_rank=1) == 2
+
+    def test_standalone_loss_last_stage_has_fewer_layers(self):
+        """With standalone loss, last PP/VP stage gets 1 fewer layer."""
+        cfg = _make_config(
+            num_layers=7, pp_size=2, vp_size=2, enable_hyper_connections=True, account_for_loss=True
+        )
+        # (7+1)/2 = 4, 4/2 = 2 per VP
+        # rank 0: 2 each VP
+        assert get_num_layers_to_build(cfg, vp_stage=0, pp_rank=0) == 2
+        assert get_num_layers_to_build(cfg, vp_stage=1, pp_rank=0) == 2
+        # rank 1, vp 0: 2
+        assert get_num_layers_to_build(cfg, vp_stage=0, pp_rank=1) == 2
+        # rank 1, vp 1: last stage → 2 - 1(loss) = 1
+        assert get_num_layers_to_build(cfg, vp_stage=1, pp_rank=1) == 1
+
+    def test_standalone_both_mhc_shapes_still_consistent(self):
+        """With standalone embed+loss, P2P shapes should still match between stages."""
+        cfg = _make_config(
+            hidden_size=64,
+            num_layers=14,
+            pp_size=4,
+            enable_hyper_connections=True,
+            num_residual_streams=4,
+            account_for_embedding=True,
+            account_for_loss=True,
+        )
+        tp, cp = _make_tp_cp_groups()
+        for i in range(3):
+            send = get_tensor_shapes(
+                seq_length=32,
+                micro_batch_size=2,
+                decoder_seq_length=None,
+                config=cfg,
+                tp_group=tp,
+                cp_group=cp,
+                pp_group=_make_pp_group(i, 4),
+                is_recv=False,
+            )
+            recv = get_tensor_shapes(
+                seq_length=32,
+                micro_batch_size=2,
+                decoder_seq_length=None,
+                config=cfg,
+                tp_group=tp,
+                cp_group=cp,
+                pp_group=_make_pp_group(i + 1, 4),
+                is_recv=True,
+            )
+            assert send == recv, f"rank {i}→{i+1}: send={send} recv={recv}"
+
+    def test_mhc_shapes_first_stage_send_vs_second_recv(self):
+        """
+        First stage (pre_process) does input_expand: hidden [s,b,C] → [s,b,n*C].
+        The send shape from rank 0 should be n*C.
+        The recv shape at rank 1 should also be n*C.
+        """
+        H, N = 64, 4
+        cfg = _make_config(
+            hidden_size=H,
+            num_layers=8,
+            pp_size=2,
+            enable_hyper_connections=True,
+            num_residual_streams=N,
+        )
+        tp, cp = _make_tp_cp_groups()
+        send_0 = get_tensor_shapes(
+            seq_length=32,
+            micro_batch_size=2,
+            decoder_seq_length=None,
+            config=cfg,
+            tp_group=tp,
+            cp_group=cp,
+            pp_group=_make_pp_group(0, 2),
+            is_recv=False,
+        )
+        recv_1 = get_tensor_shapes(
+            seq_length=32,
+            micro_batch_size=2,
+            decoder_seq_length=None,
+            config=cfg,
+            tp_group=tp,
+            cp_group=cp,
+            pp_group=_make_pp_group(1, 2),
+            is_recv=True,
+        )
+        assert send_0 == [(32, 2, H * N)]
+        assert recv_1 == [(32, 2, H * N)]
+        assert send_0 == recv_1
+
+    def test_mhc_shapes_last_stage_output_is_1stream(self):
+        """
+        Last stage (post_process) does output_contract: [s,b,n*C] → [s,b,C].
+        The send shape from last rank should be C (but get_tensor_shapes returns C
+        because last rank doesn't send forward).
+        """
+        H, N = 64, 4
+        cfg = _make_config(
+            hidden_size=H,
+            num_layers=8,
+            pp_size=2,
+            enable_hyper_connections=True,
+            num_residual_streams=N,
+        )
+        tp, cp = _make_tp_cp_groups()
+        send_last = get_tensor_shapes(
+            seq_length=32,
+            micro_batch_size=2,
+            decoder_seq_length=None,
+            config=cfg,
+            tp_group=tp,
+            cp_group=cp,
+            pp_group=_make_pp_group(1, 2),
+            is_recv=False,
+        )
+        # Last stage sends C (after contract), not n*C
+        assert send_last == [(32, 2, H)]
+
+
+# ===========================================================================
+# 7. E2E forward pass tests (require multi-GPU)
+# ===========================================================================
+
+
+@pytest.mark.internal
+@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
+@pytest.mark.skipif(
+    int(__import__('os').environ.get('WORLD_SIZE', '1')) < 2, reason="Requires at least 2 GPUs"
+)
+class TestPPForwardWithMHC:
+    """
+    End-to-end forward pass tests with PP + mHC.
+    Requires multi-GPU (torchrun --nproc-per-node=2+).
+    """
+
+    def _run_forward(
+        self, pp_size, vp_size, enable_mhc, account_for_embedding=False, account_for_loss=False
+    ):
+        from megatron.core import mpu
+        from megatron.core.models.gpt.gpt_layer_specs import (
+            get_gpt_layer_with_transformer_engine_spec,
+        )
+        from megatron.core.models.gpt.gpt_model import GPTModel
+        from megatron.core.num_microbatches_calculator import (
+            init_num_microbatches_calculator,
+            unset_num_microbatches_calculator,
+        )
+        from megatron.core.pipeline_parallel import get_forward_backward_func
+        from megatron.core.tensor_parallel.random import model_parallel_cuda_manual_seed
+        from megatron.core.transformer.enums import ModelType
+        from megatron.training.global_vars import set_args
+        from tests.unit_tests.test_utilities import Utils
+
+        num_layers = 8
+        hidden_size = 64
+        num_heads = 4
+        seq_length = 16
+        micro_batch_size = 2
+        vocab_size = 128
+
+        Utils.initialize_model_parallel(1, pp_size, vp_size)
+        model_parallel_cuda_manual_seed(42)
+        init_num_microbatches_calculator(0, None, 1, 1, 1)
+
+        try:
+            config = TransformerConfig(
+                num_layers=num_layers,
+                hidden_size=hidden_size,
+                num_attention_heads=num_heads,
+                use_cpu_initialization=True,
+                pipeline_dtype=torch.bfloat16,
+                bf16=True,
+                pipeline_model_parallel_size=pp_size,
+                virtual_pipeline_model_parallel_size=vp_size,
+                enable_hyper_connections=enable_mhc,
+                num_residual_streams=4 if enable_mhc else 1,
+                account_for_embedding_in_pipeline_split=account_for_embedding,
+                account_for_loss_in_pipeline_split=account_for_loss,
+                hidden_dropout=0.0,
+                attention_dropout=0.0,
+            )
+
+            spec = get_gpt_layer_with_transformer_engine_spec(enable_hyper_connection=enable_mhc)
+
+            models = []
+            for i in range(vp_size or 1):
+                pre_process = mpu.is_pipeline_first_stage(ignore_virtual=False, vp_stage=i)
+                post_process = mpu.is_pipeline_last_stage(ignore_virtual=False, vp_stage=i)
+                m = (
+                    GPTModel(
+                        config=config,
+                        transformer_layer_spec=spec,
+                        vocab_size=vocab_size,
+                        max_sequence_length=seq_length,
+                        pre_process=pre_process,
+                        post_process=post_process,
+                        position_embedding_type="rope",
+                        vp_stage=i,
+                        share_embeddings_and_output_weights=False,
+                    )
+                    .bfloat16()
+                    .cuda()
+                )
+                m.model_type = ModelType.encoder_or_decoder
+                models.append(m)
+
+            if vp_size is None:
+                models = models[0]
+                model_list = [models]
+            else:
+                model_list = models
+
+            def forward_step_func(data_iterator, model):
+                tokens = torch.randint(0, vocab_size, (micro_batch_size, seq_length)).cuda()
+                position_ids = (
+                    torch.arange(seq_length).unsqueeze(0).expand(micro_batch_size, -1).cuda()
+                )
+                labels = torch.randint(0, vocab_size, (micro_batch_size, seq_length)).cuda()
+                output = model(tokens, position_ids, None, labels=labels)
+
+                def loss_func(output_tensor):
+                    loss = output_tensor.sum()
+                    return output_tensor, loss
+
+                return output, loss_func
+
+            forward_backward_func = get_forward_backward_func()
+
+            def make_iter():
+                while True:
+                    yield None
+
+            data_iters = [make_iter()] * len(model_list)
+
+            losses = forward_backward_func(
+                forward_step_func=forward_step_func,
+                data_iterator=data_iters,
+                model=model_list,
+                num_microbatches=4,
+                seq_length=seq_length,
+                micro_batch_size=micro_batch_size,
+                forward_only=True,
+            )
+            return losses
+
+        finally:
+            unset_num_microbatches_calculator()
+            Utils.destroy_model_parallel()
+
+    def test_pp2_mhc_forward(self):
+        """PP=2 + mHC forward pass should not hang."""
+        self._run_forward(pp_size=2, vp_size=None, enable_mhc=True)
+
+    def test_pp2_vpp2_mhc_forward(self):
+        """PP=2 + VPP=2 + mHC forward pass should not hang."""
+        self._run_forward(pp_size=2, vp_size=2, enable_mhc=True)
+
+    def test_pp2_mhc_standalone_embedding_forward(self):
+        """PP=2 + mHC + standalone embedding."""
+        # (8+1)/2 = 4.5 → need (num_layers+1) divisible by pp_size
+        # Use default 8 layers, won't divide evenly. Skip standalone embedding
+        # with 8 layers pp=2 as (8+1)/2 isn't integer.
+        # The test framework should raise ValueError, confirming the validation.
+        with pytest.raises((ValueError, AssertionError)):
+            self._run_forward(pp_size=2, vp_size=None, enable_mhc=True, account_for_embedding=True)
+
+    def test_pp2_mhc_standalone_both_forward(self):
+        """PP=2 + mHC + standalone embedding + loss: (8+2)/2=5, works."""
+        self._run_forward(
+            pp_size=2,
+            vp_size=None,
+            enable_mhc=True,
+            account_for_embedding=True,
+            account_for_loss=True,
+        )
+
+    def test_pp2_no_mhc_forward_baseline(self):
+        """Baseline: PP=2 without mHC should work fine."""
+        self._run_forward(pp_size=2, vp_size=None, enable_mhc=False)
+
+
+# ===========================================================================
+# 8. Flexible VPP layout (pipeline_model_parallel_layout) + mHC
+# ===========================================================================
+
+
+def _make_layout_config(
+    hidden_size=64,
+    num_layers=8,
+    pp_size=2,
+    layout=None,
+    enable_hyper_connections=False,
+    num_residual_streams=4,
+    **extra,
+):
+    """Build a TransformerConfig with a flexible VPP layout for testing.
+
+    Unlike _make_config, this uses pipeline_model_parallel_layout instead of
+    account_for_embedding/loss flags, since they are mutually exclusive.
+    """
+    kwargs = dict(
+        hidden_size=hidden_size,
+        num_layers=num_layers,
+        num_attention_heads=4,
+        pipeline_model_parallel_size=pp_size,
+        pipeline_model_parallel_layout=layout,
+        pipeline_dtype=torch.bfloat16,
+        enable_hyper_connections=enable_hyper_connections,
+        num_residual_streams=num_residual_streams,
+        use_cpu_initialization=True,
+    )
+    kwargs.update(extra)
+    return TransformerConfig(**kwargs)
+
+
+class TestFlexibleVPPLayoutLayerCountsWithMHC:
+    """
+    Verify get_num_layers_to_build returns correct layer counts when
+    flexible VPP layout (pipeline_model_parallel_layout) is combined with mHC.
+    mHC itself doesn't change layer counts, so these tests confirm the
+    combination doesn't break anything.
+    """
+
+    def setup_method(self, method):
+        pass
+
+    def teardown_method(self, method):
+        parallel_state.set_pipeline_model_parallel_world_size(None)
+        parallel_state.set_virtual_pipeline_model_parallel_world_size(None)
+
+    def test_pp2_vpp2_standalone_embed_loss_mhc(self):
+        """PP=2, VPP=2: standalone embedding & loss on separate VP stages."""
+        # Layout: [["embedding"], ["decoder"]*6, ["decoder"], ["loss"]]
+        # PP=2, VPP=2 → 4 stages:
+        #   PP0 VP0: ["embedding"]    → 0 decoders
+        #   PP1 VP0: ["decoder"]*6    → 6 decoders
+        #   PP0 VP1: ["decoder"]      → 1 decoder
+        #   PP1 VP1: ["loss"]         → 0 decoders
+        layout = [["embedding"], ["decoder"] * 6, ["decoder"], ["loss"]]
+        Utils.fake_initialize_model_parallel(
+            pipeline_model_parallel_size=2, virtual_pipeline_model_parallel_size=2
+        )
+        cfg = _make_layout_config(
+            num_layers=7,
+            pp_size=2,
+            layout=layout,
+            enable_hyper_connections=True,
+            num_residual_streams=4,
+        )
+
+        expected = {(0, 0): 0, (0, 1): 1, (1, 0): 6, (1, 1): 0}
+        total = 0
+        for pp_rank in range(2):
+            parallel_state.set_pipeline_model_parallel_rank(pp_rank)
+            for vp in range(2):
+                n = get_num_layers_to_build(cfg, vp_stage=vp)
+                assert (
+                    n == expected[(pp_rank, vp)]
+                ), f"pp_rank={pp_rank}, vp={vp}: expected {expected[(pp_rank, vp)]}, got {n}"
+                total += n
+        assert total == 7
+
+    def test_pp2_vpp2_even_split_mhc(self):
+        """PP=2, VPP=2: even split with embedding/loss attached to decoder stages."""
+        # Layout: [["embedding","decoder","decoder"], ["decoder"]*4,
+        #          ["decoder"], ["decoder","loss"]]
+        # PP0 VP0: ["embedding","decoder","decoder"] → 2 decoders
+        # PP1 VP0: ["decoder"]*4                     → 4 decoders
+        # PP0 VP1: ["decoder"]                       → 1 decoder
+        # PP1 VP1: ["decoder","loss"]                → 1 decoder
+        layout = [
+            ["embedding", "decoder", "decoder"],
+            ["decoder"] * 4,
+            ["decoder"],
+            ["decoder", "loss"],
+        ]
+        Utils.fake_initialize_model_parallel(
+            pipeline_model_parallel_size=2, virtual_pipeline_model_parallel_size=2
+        )
+        cfg = _make_layout_config(
+            num_layers=8, pp_size=2, layout=layout, enable_hyper_connections=True
+        )
+
+        expected = {(0, 0): 2, (0, 1): 1, (1, 0): 4, (1, 1): 1}
+        total = 0
+        for pp_rank in range(2):
+            parallel_state.set_pipeline_model_parallel_rank(pp_rank)
+            for vp in range(2):
+                n = get_num_layers_to_build(cfg, vp_stage=vp)
+                assert (
+                    n == expected[(pp_rank, vp)]
+                ), f"pp_rank={pp_rank}, vp={vp}: expected {expected[(pp_rank, vp)]}, got {n}"
+                total += n
+        assert total == 8
+
+    def test_pp2_vpp2_empty_stage_mhc(self):
+        """PP=2, VPP=2: empty VP stage (standalone embedding) with mHC."""
+        # Layout: [["embedding"], ["decoder"]*7, [], ["loss"]]
+        # PP0 VP0: ["embedding"]  → 0 decoders
+        # PP1 VP0: ["decoder"]*7  → 7 decoders
+        # PP0 VP1: []             → 0 decoders
+        # PP1 VP1: ["loss"]       → 0 decoders
+        layout = [["embedding"], ["decoder"] * 7, [], ["loss"]]
+        Utils.fake_initialize_model_parallel(
+            pipeline_model_parallel_size=2, virtual_pipeline_model_parallel_size=2
+        )
+        cfg = _make_layout_config(
+            num_layers=7, pp_size=2, layout=layout, enable_hyper_connections=True
+        )
+
+        expected = {(0, 0): 0, (0, 1): 0, (1, 0): 7, (1, 1): 0}
+        for pp_rank in range(2):
+            parallel_state.set_pipeline_model_parallel_rank(pp_rank)
+            for vp in range(2):
+                n = get_num_layers_to_build(cfg, vp_stage=vp)
+                assert n == expected[(pp_rank, vp)]
+                assert n >= 0
+
+    def test_mhc_does_not_alter_layout_layer_counts(self):
+        """Same layout gives identical layer counts with and without mHC."""
+        layout = [
+            ["embedding", "decoder", "decoder"],
+            ["decoder"] * 4,
+            ["decoder"],
+            ["decoder", "loss"],
+        ]
+        Utils.fake_initialize_model_parallel(
+            pipeline_model_parallel_size=2, virtual_pipeline_model_parallel_size=2
+        )
+        cfg_mhc = _make_layout_config(
+            num_layers=8, pp_size=2, layout=layout, enable_hyper_connections=True
+        )
+        cfg_no_mhc = _make_layout_config(
+            num_layers=8, pp_size=2, layout=layout, enable_hyper_connections=False
+        )
+
+        for pp_rank in range(2):
+            parallel_state.set_pipeline_model_parallel_rank(pp_rank)
+            for vp in range(2):
+                n_mhc = get_num_layers_to_build(cfg_mhc, vp_stage=vp)
+                n_no_mhc = get_num_layers_to_build(cfg_no_mhc, vp_stage=vp)
+                assert (
+                    n_mhc == n_no_mhc
+                ), f"pp_rank={pp_rank}, vp={vp}: mHC={n_mhc} != no-mHC={n_no_mhc}"
+
+
+class TestFlexibleVPPLayoutShapeConsistencyWithMHC:
+    """
+    Verify that P2P tensor shapes are consistent (send == recv) between
+    consecutive PP stages when using flexible VPP layout + mHC.
+    This is critical: a shape mismatch causes hangs or crashes.
+    """
+
+    def test_pp2_flexible_vpp_mhc_send_recv_match(self):
+        """PP=2 with flexible VPP layout + mHC: rank 0 send == rank 1 recv."""
+        H, N = 64, 4
+        cfg = _make_layout_config(
+            hidden_size=H,
+            num_layers=7,
+            pp_size=2,
+            layout=[["embedding"], ["decoder"] * 6, ["decoder"], ["loss"]],
+            enable_hyper_connections=True,
+            num_residual_streams=N,
+        )
+        shapes = _get_send_recv_shapes(cfg, pp_size=2)
+        assert (
+            shapes[0][0] == shapes[1][1]
+        ), f"rank 0 send {shapes[0][0]} != rank 1 recv {shapes[1][1]}"
+        # rank 0 (first) sends n*C
+        assert shapes[0][0] == [(32, 2, H * N)]
+        # rank 1 (last) sends C
+        assert shapes[1][0] == [(32, 2, H)]
+
+    def test_pp4_flexible_vpp_mhc_all_consecutive_match(self):
+        """PP=4 with flexible VPP layout + mHC: send[i] == recv[i+1] for all i."""
+        H, N = 64, 4
+        layout = [
+            ["embedding"],
+            ["decoder"] * 2,
+            ["decoder"],
+            ["decoder"],
+            ["decoder"],
+            ["decoder"],
+            ["decoder"],
+            ["decoder", "loss"],
+        ]
+        cfg = _make_layout_config(
+            hidden_size=H,
+            num_layers=8,
+            pp_size=4,
+            layout=layout,
+            enable_hyper_connections=True,
+            num_residual_streams=N,
+        )
+        shapes = _get_send_recv_shapes(cfg, pp_size=4)
+        for i in range(3):
+            assert (
+                shapes[i][0] == shapes[i + 1][1]
+            ), f"rank {i} send {shapes[i][0]} != rank {i+1} recv {shapes[i+1][1]}"
+
+        # First stage sends n*C, intermediate stages send/recv n*C, last stage sends C
+        assert shapes[0][0] == [(32, 2, H * N)]
+        for i in (1, 2):
+            assert shapes[i][0] == [(32, 2, H * N)]
+            assert shapes[i][1] == [(32, 2, H * N)]
+        assert shapes[3][0] == [(32, 2, H)]
+        assert shapes[3][1] == [(32, 2, H * N)]
+
+    def test_pp2_flexible_vpp_no_mhc_baseline(self):
+        """Baseline: PP=2 with flexible VPP layout, no mHC — all shapes are C."""
+        H = 64
+        cfg = _make_layout_config(
+            hidden_size=H,
+            num_layers=7,
+            pp_size=2,
+            layout=[["embedding"], ["decoder"] * 6, ["decoder"], ["loss"]],
+            enable_hyper_connections=False,
+        )
+        shapes = _get_send_recv_shapes(cfg, pp_size=2)
+        for i in range(1):
+            assert shapes[i][0] == shapes[i + 1][1]
+            assert shapes[i][0] == [(32, 2, H)]
+
+    def test_pp4_flexible_vpp_mhc_uneven_layers_shape_consistent(self):
+        """Highly uneven layout: shapes must still match between stages."""
+        H, N = 64, 4
+        layout = [["embedding", "decoder"], ["decoder"] * 5, ["decoder"], ["decoder", "loss"]]
+        cfg = _make_layout_config(
+            hidden_size=H,
+            num_layers=8,
+            pp_size=2,
+            layout=layout,
+            enable_hyper_connections=True,
+            num_residual_streams=N,
+        )
+        shapes = _get_send_recv_shapes(cfg, pp_size=2)
+        assert (
+            shapes[0][0] == shapes[1][1]
+        ), f"rank 0 send {shapes[0][0]} != rank 1 recv {shapes[1][1]}"
diff --git a/tests/unit_tests/test_fp8_param.py b/tests/unit_tests/test_fp8_param.py
index 361698f7127..e15d2440d99 100644
--- a/tests/unit_tests/test_fp8_param.py
+++ b/tests/unit_tests/test_fp8_param.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 import contextlib
 import gc
@@ -72,12 +72,12 @@ def setup_method(self, method):
         os.environ['CUDA_DEVICE_MAX_CONNECTIONS'] = '1'
 
     def teardown_method(self, method):
-        Utils.destroy_model_parallel()
-        destroy_global_vars()
-        destroy_num_microbatches_calculator()
         if self.cuda_graph_helper is not None and self.cuda_graph_helper.graphs_created():
             self.cuda_graph_helper.delete_cuda_graphs()
             self.cuda_graph_helper = None
+        Utils.destroy_model_parallel()
+        destroy_global_vars()
+        destroy_num_microbatches_calculator()
         gc.collect()
 
     def model_provider(
diff --git a/tests/unit_tests/transformer/test_hyper_connection_recompute.py b/tests/unit_tests/transformer/test_hyper_connection_recompute.py
new file mode 100644
index 00000000000..cf44f2d7cd0
--- /dev/null
+++ b/tests/unit_tests/transformer/test_hyper_connection_recompute.py
@@ -0,0 +1,408 @@
+# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+"""
+Unit tests for HyperConnection block-level recomputation.
+
+Tests the following functionality:
+1. HyperConnectionModule._forward_with_checkpoint correctness
+2. HyperConnectionModule.apply_h_post with CheckpointManager
+3. Multiple HyperConnectionModules chained with a single CheckpointManager
+4. Partial checkpoint (last layer not checkpointed)
+5. TransformerConfig 'mhc' in recompute_modules option
+"""
+
+import pytest
+import torch
+
+from megatron.core.tensor_parallel.random import CheckpointManager, model_parallel_cuda_manual_seed
+from megatron.core.transformer.hyper_connection import HyperConnectionModule
+from megatron.core.transformer.transformer_config import TransformerConfig
+from tests.unit_tests.test_utilities import Utils
+
+
+class TestHyperConnectionCheckpoint:
+    """Test HyperConnectionModule checkpoint functionality."""
+
+    def setup_method(self, method):
+        Utils.initialize_model_parallel(1, 1)
+        model_parallel_cuda_manual_seed(123)
+
+    def teardown_method(self, method):
+        Utils.destroy_model_parallel()
+
+    def _create_hyper_connection_module(self, hidden_size=64, num_residual_streams=4):
+        """Create a HyperConnectionModule for testing."""
+        config = TransformerConfig(
+            num_layers=2,
+            hidden_size=hidden_size,
+            num_attention_heads=4,
+            use_cpu_initialization=True,
+            enable_hyper_connections=True,
+            num_residual_streams=num_residual_streams,
+            mhc_sinkhorn_iterations=5,  # Fewer iterations for faster tests
+            mhc_init_gating_factor=0.01,
+        )
+        module = HyperConnectionModule(config=config, layer_number=1)
+        module.cuda()
+        return module
+
+    def test_forward_normal_vs_checkpoint_correctness(self):
+        """
+        Test that _forward_with_checkpoint produces the same outputs as _forward_normal.
+        """
+        hidden_size = 64
+        num_streams = 4
+        seq_len = 8
+        batch_size = 2
+
+        module = self._create_hyper_connection_module(hidden_size, num_streams)
+
+        # Create input tensors
+        hidden_states = torch.randn(
+            seq_len, batch_size, num_streams * hidden_size, device='cuda', requires_grad=True
+        )
+        residual = torch.randn(
+            seq_len, batch_size, num_streams * hidden_size, device='cuda', requires_grad=True
+        )
+
+        # Clone inputs for comparison
+        hidden_states_ckpt = hidden_states.detach().clone().requires_grad_(True)
+        residual_ckpt = residual.detach().clone().requires_grad_(True)
+
+        # Forward without checkpoint (reference)
+        torch.manual_seed(42)
+        torch.cuda.manual_seed(42)
+        aggregated_ref, h_res_ref, h_post_ref = module._forward_normal(hidden_states)
+        mixed_ref = module.apply_h_res(h_res_ref, residual)
+        loss_ref = aggregated_ref.sum() + mixed_ref.sum() + h_post_ref.sum()
+        loss_ref.backward()
+        grad_hidden_ref = hidden_states.grad.clone()
+        grad_residual_ref = residual.grad.clone()
+
+        # Forward with checkpoint
+        torch.manual_seed(42)
+        torch.cuda.manual_seed(42)
+        manager = CheckpointManager()
+        aggregated_ckpt, h_res_ckpt, h_post_ckpt = module._forward_with_checkpoint(
+            hidden_states_ckpt, manager
+        )
+        mixed_ckpt = module.apply_h_res(h_res_ckpt, residual_ckpt)
+        # Calculate loss before discarding outputs
+        loss_ckpt = aggregated_ckpt.sum() + mixed_ckpt.sum() + h_post_ckpt.sum()
+
+        # Register unified recompute hook
+        manager.discard_all_outputs_and_register_unified_recompute(loss_ckpt)
+
+        # Backward pass
+        loss_ckpt.backward()
+        grad_hidden_ckpt = hidden_states_ckpt.grad.clone()
+        grad_residual_ckpt = residual_ckpt.grad.clone()
+
+        # Verify gradients match
+        assert torch.allclose(grad_hidden_ckpt, grad_hidden_ref, atol=1e-5), (
+            f"Hidden states gradients mismatch:\n"
+            f"Checkpoint: {grad_hidden_ckpt}\n"
+            f"Reference: {grad_hidden_ref}"
+        )
+        assert torch.allclose(grad_residual_ckpt, grad_residual_ref, atol=1e-5), (
+            f"Residual gradients mismatch:\n"
+            f"Checkpoint: {grad_residual_ckpt}\n"
+            f"Reference: {grad_residual_ref}"
+        )
+
+    def test_apply_h_post_with_checkpoint(self):
+        """
+        Test that apply_h_post with manager produces correct gradients.
+        """
+        hidden_size = 64
+        num_streams = 4
+        seq_len = 8
+        batch_size = 2
+
+        module = self._create_hyper_connection_module(hidden_size, num_streams)
+
+        # Create input tensors
+        x = torch.randn(seq_len, batch_size, hidden_size, device='cuda', requires_grad=True)
+        bias = torch.randn(hidden_size, device='cuda')
+        h_post = torch.randn(seq_len, batch_size, num_streams, device='cuda', requires_grad=True)
+
+        # Clone inputs
+        x_ckpt = x.detach().clone().requires_grad_(True)
+        h_post_ckpt = h_post.detach().clone().requires_grad_(True)
+
+        # Reference: without checkpoint (manager=None)
+        torch.manual_seed(42)
+        x_out_ref, bias_out_ref = module.apply_h_post((x, bias), h_post, manager=None)
+        loss_ref = x_out_ref.sum()
+        if bias_out_ref is not None:
+            loss_ref = loss_ref + bias_out_ref.sum()
+        loss_ref.backward()
+        grad_x_ref = x.grad.clone()
+        grad_h_post_ref = h_post.grad.clone()
+
+        # With checkpoint (manager provided)
+        torch.manual_seed(42)
+        manager = CheckpointManager()
+        x_out_ckpt, bias_out_ckpt = module.apply_h_post(
+            (x_ckpt, bias), h_post_ckpt, manager=manager
+        )
+        loss_ckpt = x_out_ckpt.sum()
+        if bias_out_ckpt is not None:
+            loss_ckpt = loss_ckpt + bias_out_ckpt.sum()
+
+        manager.discard_all_outputs_and_register_unified_recompute(loss_ckpt)
+        loss_ckpt.backward()
+        grad_x_ckpt = x_ckpt.grad.clone()
+        grad_h_post_ckpt = h_post_ckpt.grad.clone()
+
+        # Verify gradients
+        assert torch.allclose(grad_x_ckpt, grad_x_ref, atol=1e-5)
+        assert torch.allclose(grad_h_post_ckpt, grad_h_post_ref, atol=1e-5)
+
+    def test_forward_with_manager_parameter(self):
+        """
+        Test forward() method with mhc_recompute_manager parameter.
+        """
+        hidden_size = 64
+        num_streams = 4
+        seq_len = 8
+        batch_size = 2
+
+        module = self._create_hyper_connection_module(hidden_size, num_streams)
+
+        # Create input tensors
+        hidden_states = torch.randn(
+            seq_len, batch_size, num_streams * hidden_size, device='cuda', requires_grad=True
+        )
+
+        # Clone inputs
+        hidden_states_ckpt = hidden_states.detach().clone().requires_grad_(True)
+
+        # Reference: forward without manager (uses _forward_normal)
+        torch.manual_seed(42)
+        torch.cuda.manual_seed(42)
+        aggregated_ref, h_res_ref, h_post_ref = module.forward(
+            hidden_states, mhc_recompute_manager=None
+        )
+        loss_ref = aggregated_ref.sum() + h_res_ref.sum() + h_post_ref.sum()
+        loss_ref.backward()
+        grad_hidden_ref = hidden_states.grad.clone()
+
+        # With manager (uses _forward_with_checkpoint)
+        torch.manual_seed(42)
+        torch.cuda.manual_seed(42)
+        manager = CheckpointManager()
+        aggregated_ckpt, h_res_ckpt, h_post_ckpt = module.forward(
+            hidden_states_ckpt, mhc_recompute_manager=manager
+        )
+        loss_ckpt = aggregated_ckpt.sum() + h_res_ckpt.sum() + h_post_ckpt.sum()
+
+        manager.discard_all_outputs_and_register_unified_recompute(loss_ckpt)
+        loss_ckpt.backward()
+        grad_hidden_ckpt = hidden_states_ckpt.grad.clone()
+
+        # Verify gradients match
+        assert torch.allclose(grad_hidden_ckpt, grad_hidden_ref, atol=1e-5)
+
+
+class TestMHCBlockRecomputeIntegration:
+    """Test CheckpointManager integration with HyperConnection."""
+
+    def setup_method(self, method):
+        Utils.initialize_model_parallel(1, 1)
+        model_parallel_cuda_manual_seed(123)
+
+    def teardown_method(self, method):
+        Utils.destroy_model_parallel()
+
+    def test_multiple_hyper_connections_in_chain(self):
+        """
+        Test that multiple HyperConnectionModules can be chained together
+        with a single CheckpointManager.
+        """
+        hidden_size = 64
+        num_streams = 4
+        seq_len = 8
+        batch_size = 2
+        n_channels = num_streams * hidden_size
+
+        # Create multiple HyperConnection modules (simulating multiple layers)
+        config = TransformerConfig(
+            num_layers=4,
+            hidden_size=hidden_size,
+            num_attention_heads=4,
+            use_cpu_initialization=True,
+            enable_hyper_connections=True,
+            num_residual_streams=num_streams,
+            mhc_sinkhorn_iterations=5,
+            mhc_init_gating_factor=0.01,
+        )
+
+        modules = [
+            HyperConnectionModule(config=config, layer_number=i + 1).cuda() for i in range(3)
+        ]
+
+        # Create input tensors
+        hidden_states_ref = torch.randn(
+            seq_len, batch_size, n_channels, device='cuda', requires_grad=True
+        )
+        residual_ref = torch.randn(
+            seq_len, batch_size, n_channels, device='cuda', requires_grad=True
+        )
+
+        hidden_states_ckpt = hidden_states_ref.detach().clone().requires_grad_(True)
+        residual_ckpt = residual_ref.detach().clone().requires_grad_(True)
+
+        # Reference: forward without checkpoint
+        torch.manual_seed(42)
+        torch.cuda.manual_seed(42)
+
+        h = hidden_states_ref
+        r = residual_ref
+        for module in modules:
+            agg, h_res, h_post = module.forward(h, mhc_recompute_manager=None)
+            agg, _ = module.apply_h_post((0.1 * agg, None), h_post, manager=None)
+            mixed = module.apply_h_res(h_res, r)  # Apply h_res to get mixed [s, b, n*C]
+            h = agg + mixed
+            r = h
+
+        loss_ref = h.sum()
+        loss_ref.backward()
+        grad_hidden_ref = hidden_states_ref.grad.clone()
+        grad_residual_ref = residual_ref.grad.clone()
+
+        # With checkpoint using single manager
+        torch.manual_seed(42)
+        torch.cuda.manual_seed(42)
+
+        manager = CheckpointManager()
+
+        h = hidden_states_ckpt
+        r = residual_ckpt
+        for module in modules:
+            agg, h_res, h_post = module.forward(h, mhc_recompute_manager=manager)
+            agg, _ = module.apply_h_post((0.1 * agg, None), h_post, manager=manager)
+            mixed = module.apply_h_res(h_res, r)  # Apply h_res to get mixed [s, b, n*C]
+            h = agg + mixed
+            r = h
+
+        loss_ckpt = h.sum()
+        manager.discard_all_outputs_and_register_unified_recompute(loss_ckpt)
+        loss_ckpt.backward()
+
+        grad_hidden_ckpt = hidden_states_ckpt.grad.clone()
+        grad_residual_ckpt = residual_ckpt.grad.clone()
+
+        # Verify gradients
+        assert torch.allclose(
+            grad_hidden_ckpt, grad_hidden_ref, atol=1e-4
+        ), f"Chained HyperConnection hidden gradients mismatch"
+        assert torch.allclose(
+            grad_residual_ckpt, grad_residual_ref, atol=1e-4
+        ), f"Chained HyperConnection residual gradients mismatch"
+
+    def test_partial_checkpoint_last_layer_not_checkpointed(self):
+        """
+        Test that when is_last_layer_in_block=True, the final output is NOT checkpointed.
+        This simulates the TransformerBlock behavior where the last layer's MLP BDA
+        serves as the hook_tensor for unified recompute.
+        """
+        hidden_size = 64
+        num_streams = 4
+        seq_len = 8
+        batch_size = 2
+
+        config = TransformerConfig(
+            num_layers=2,
+            hidden_size=hidden_size,
+            num_attention_heads=4,
+            use_cpu_initialization=True,
+            enable_hyper_connections=True,
+            num_residual_streams=num_streams,
+            mhc_sinkhorn_iterations=5,
+            mhc_init_gating_factor=0.01,
+        )
+
+        module = HyperConnectionModule(config=config, layer_number=1).cuda()
+
+        hidden_states_ref = torch.randn(
+            seq_len, batch_size, num_streams * hidden_size, device='cuda', requires_grad=True
+        )
+        residual_ref = torch.randn(
+            seq_len, batch_size, num_streams * hidden_size, device='cuda', requires_grad=True
+        )
+
+        hidden_states_ckpt = hidden_states_ref.detach().clone().requires_grad_(True)
+        residual_ckpt = residual_ref.detach().clone().requires_grad_(True)
+
+        # Reference
+        torch.manual_seed(42)
+        torch.cuda.manual_seed(42)
+        aggregated_ref, h_res_ref, h_post_ref = module.forward(
+            hidden_states_ref, mhc_recompute_manager=None
+        )
+        aggregated_ref, _ = module.apply_h_post(
+            (0.1 * aggregated_ref, None), h_post_ref, manager=None
+        )
+        mixed_ref = module.apply_h_res(
+            h_res_ref, residual_ref
+        )  # Apply h_res to get mixed [s, b, n*C]
+        # Simulate BDA that is NOT checkpointed (last layer)
+        output_ref = aggregated_ref + 0.5 * mixed_ref
+        loss_ref = output_ref.sum()
+        loss_ref.backward()
+        grad_hidden_ref = hidden_states_ref.grad.clone()
+
+        # With manager - checkpoint everything except final output
+        torch.manual_seed(42)
+        torch.cuda.manual_seed(42)
+        manager = CheckpointManager()
+        aggregated_ckpt, h_res_ckpt, h_post_ckpt = module.forward(
+            hidden_states_ckpt, mhc_recompute_manager=manager
+        )
+
+        aggregated_ckpt, _ = module.apply_h_post(
+            (0.1 * aggregated_ckpt, None), h_post_ckpt, manager=manager
+        )
+        mixed_ckpt = module.apply_h_res(
+            h_res_ckpt, residual_ckpt
+        )  # Apply h_res to get mixed [s, b, n*C]
+        # Simulate BDA that is NOT checkpointed (last layer) - this is the hook_tensor
+        output_ckpt = aggregated_ckpt + 0.5 * mixed_ckpt
+
+        # Register unified recompute on the output (which is not checkpointed)
+        manager.discard_all_outputs_and_register_unified_recompute(output_ckpt)
+
+        loss_ckpt = output_ckpt.sum()
+        loss_ckpt.backward()
+        grad_hidden_ckpt = hidden_states_ckpt.grad.clone()
+
+        # Verify gradients match
+        assert torch.allclose(grad_hidden_ckpt, grad_hidden_ref, atol=1e-5)
+
+
+class TestTransformerConfigRecomputeMhc:
+    """Test 'mhc' in recompute_modules configuration."""
+
+    def test_config_default_value(self):
+        """Test that 'mhc' is not in recompute_modules by default."""
+        config = TransformerConfig(num_layers=2, hidden_size=64, num_attention_heads=4)
+        assert "mhc" not in config.recompute_modules
+
+    def test_config_enable_mhc_recompute(self):
+        """Test enabling 'mhc' in recompute_modules."""
+        config = TransformerConfig(
+            num_layers=2,
+            hidden_size=64,
+            num_attention_heads=4,
+            enable_hyper_connections=True,
+            num_residual_streams=4,
+            recompute_modules=["core_attn", "mhc"],
+            recompute_granularity='selective',
+        )
+        assert "mhc" in config.recompute_modules
+        assert config.enable_hyper_connections is True
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/tests/unit_tests/transformer/test_mhc_block_manager.py b/tests/unit_tests/transformer/test_mhc_block_manager.py
new file mode 100644
index 00000000000..aab004d6516
--- /dev/null
+++ b/tests/unit_tests/transformer/test_mhc_block_manager.py
@@ -0,0 +1,397 @@
+# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+import pytest
+import torch
+
+from megatron.core.tensor_parallel.random import (
+    CheckpointManager,
+    CheckpointWithoutOutput,
+    initialize_rng_tracker,
+)
+from tests.unit_tests.test_utilities import Utils
+
+
+class TestCheckpointWithoutOutputManagerAPI:
+    """Test CheckpointWithoutOutput integration with CheckpointManager."""
+
+    def setup_method(self, method):
+        Utils.initialize_model_parallel()
+        initialize_rng_tracker(force_reset=True)
+
+    def teardown_method(self, method):
+        Utils.destroy_model_parallel()
+
+    def test_auto_register(self):
+        """CheckpointWithoutOutput auto-registers to manager when ckpt_manager is provided."""
+        manager = CheckpointManager()
+
+        def func(x):
+            return x * 2 + 1
+
+        input_t = torch.randn(4, 4, device='cuda', requires_grad=True)
+
+        ckpt = CheckpointWithoutOutput(ckpt_manager=manager)
+        y = ckpt.checkpoint(func, input_t)
+
+        assert len(manager.checkpoints) == 1
+        assert manager.checkpoints[0] is ckpt
+
+        ckpt2 = CheckpointWithoutOutput(ckpt_manager=manager)
+        y2 = ckpt2.checkpoint(torch.nn.functional.gelu, y)
+
+        assert len(manager.checkpoints) == 2
+        assert manager.checkpoints[1] is ckpt2
+
+        loss = y2.sum()
+        manager.discard_all_outputs_and_register_unified_recompute(loss)
+        loss.backward()
+
+        assert input_t.grad is not None
+
+    def test_discard_is_noop_with_manager(self):
+        """discard_output_and_register_recompute is a NO-OP when ckpt_manager is set."""
+        manager = CheckpointManager()
+
+        def func1(x):
+            return x * 2
+
+        def func2(x):
+            return torch.nn.functional.gelu(x)
+
+        input_ref = torch.randn(4, 4, device='cuda', requires_grad=True)
+        y1_ref = func1(input_ref)
+        y2_ref = func2(y1_ref)
+        loss_ref = y2_ref.sum()
+        loss_ref.backward()
+        grad_ref = input_ref.grad.clone()
+
+        input_ckpt = input_ref.detach().clone().requires_grad_(True)
+
+        ckpt1 = CheckpointWithoutOutput(ckpt_manager=manager)
+        y1 = ckpt1.checkpoint(func1, input_ckpt)
+        ckpt1.discard_output_and_register_recompute(y1)
+
+        ckpt2 = CheckpointWithoutOutput(ckpt_manager=manager)
+        y2 = ckpt2.checkpoint(func2, y1)
+        ckpt2.discard_output_and_register_recompute(y2)
+
+        assert y1.untyped_storage().size() > 0, "y1 should NOT be discarded yet"
+        assert y2.untyped_storage().size() > 0, "y2 should NOT be discarded yet"
+
+        loss_ckpt = y2.sum()
+        manager.discard_all_outputs_and_register_unified_recompute(loss_ckpt)
+
+        assert y1.untyped_storage().size() == 0, "y1 should be discarded after manager call"
+        assert y2.untyped_storage().size() == 0, "y2 should be discarded after manager call"
+
+        loss_ckpt.backward()
+        grad_ckpt = input_ckpt.grad.clone()
+
+        assert torch.allclose(grad_ckpt, grad_ref, atol=1e-6)
+
+    def test_backward_compat_without_manager(self):
+        """CheckpointWithoutOutput without ckpt_manager should work exactly as before."""
+
+        def func(x):
+            return torch.nn.functional.gelu(x)
+
+        input_ref = torch.randn(4, 4, device='cuda', requires_grad=True)
+        y_ref = func(input_ref)
+        z_ref = y_ref * 2
+        loss_ref = z_ref.sum()
+        loss_ref.backward()
+        grad_ref = input_ref.grad.clone()
+
+        input_ckpt = input_ref.detach().clone().requires_grad_(True)
+
+        ckpt = CheckpointWithoutOutput()
+        y = ckpt.checkpoint(func, input_ckpt)
+        z = y * 2
+        ckpt.discard_output_and_register_recompute(z)
+
+        assert y.untyped_storage().size() == 0
+
+        loss_ckpt = z.sum()
+        loss_ckpt.backward()
+        grad_ckpt = input_ckpt.grad.clone()
+
+        assert torch.allclose(grad_ckpt, grad_ref, atol=1e-6)
+
+    def test_error_handling(self):
+        """CheckpointManager rejects invalid add_checkpoint calls."""
+        manager = CheckpointManager()
+
+        with pytest.raises(TypeError):
+            manager.add_checkpoint("not a checkpoint")
+
+        ckpt = CheckpointWithoutOutput()
+        with pytest.raises(ValueError):
+            manager.add_checkpoint(ckpt)
+
+
+class TestCheckpointManagerSequentialChain:
+    """Test CheckpointManager with sequential checkpoint chains."""
+
+    def setup_method(self, method):
+        Utils.initialize_model_parallel()
+        initialize_rng_tracker(force_reset=True)
+
+    def teardown_method(self, method):
+        Utils.destroy_model_parallel()
+
+    def test_basic_sequential_chain(self):
+        """Three sequential checkpoints: gradients match non-checkpointed version."""
+
+        def func1(x):
+            return x * 2 + 1
+
+        def func2(x):
+            return torch.nn.functional.gelu(x)
+
+        def func3(x):
+            return x * x + x
+
+        input_ref = torch.randn(4, 4, device='cuda', requires_grad=True)
+        input_ckpt = input_ref.detach().clone().requires_grad_(True)
+
+        y1_ref = func1(input_ref)
+        y2_ref = func2(y1_ref)
+        y3_ref = func3(y2_ref)
+        loss_ref = y3_ref.sum()
+        loss_ref.backward()
+        grad_ref = input_ref.grad.clone()
+
+        manager = CheckpointManager()
+
+        y1 = CheckpointWithoutOutput(ckpt_manager=manager).checkpoint(func1, input_ckpt)
+        y2 = CheckpointWithoutOutput(ckpt_manager=manager).checkpoint(func2, y1)
+        y3 = CheckpointWithoutOutput(ckpt_manager=manager).checkpoint(func3, y2)
+
+        loss_ckpt = y3.sum()
+        manager.discard_all_outputs_and_register_unified_recompute(loss_ckpt)
+
+        assert y1.untyped_storage().size() == 0, "y1 storage should be released"
+        assert y2.untyped_storage().size() == 0, "y2 storage should be released"
+        assert y3.untyped_storage().size() == 0, "y3 storage should be released"
+
+        loss_ckpt.backward()
+        grad_ckpt = input_ckpt.grad.clone()
+
+        assert torch.allclose(
+            grad_ckpt, grad_ref, atol=1e-6
+        ), f"Gradients mismatch!\nWith manager: {grad_ckpt}\nReference: {grad_ref}"
+
+    def test_sequential_chain_with_dropout(self):
+        """RNG state is restored during recompute so dropout gradients match."""
+
+        def func_with_dropout(x):
+            return torch.nn.functional.dropout(x, p=0.3, training=True)
+
+        def func2(x):
+            return torch.nn.functional.gelu(x)
+
+        input_ref = torch.randn(4, 4, device='cuda', requires_grad=True)
+        input_ckpt = input_ref.detach().clone().requires_grad_(True)
+
+        torch.manual_seed(42)
+        torch.cuda.manual_seed(42)
+
+        y1_ref = func_with_dropout(input_ref)
+        y2_ref = func2(y1_ref)
+        loss_ref = y2_ref.sum()
+        loss_ref.backward()
+        grad_ref = input_ref.grad.clone()
+
+        torch.manual_seed(42)
+        torch.cuda.manual_seed(42)
+
+        manager = CheckpointManager()
+
+        y1 = CheckpointWithoutOutput(ckpt_manager=manager).checkpoint(func_with_dropout, input_ckpt)
+        y2 = CheckpointWithoutOutput(ckpt_manager=manager).checkpoint(func2, y1)
+
+        loss_ckpt = y2.sum()
+        manager.discard_all_outputs_and_register_unified_recompute(loss_ckpt)
+
+        loss_ckpt.backward()
+        grad_ckpt = input_ckpt.grad.clone()
+
+        assert torch.allclose(
+            grad_ckpt, grad_ref, atol=1e-6
+        ), f"Gradients with dropout mismatch!\nWith manager: {grad_ckpt}\nReference: {grad_ref}"
+
+    def test_multiple_outputs(self):
+        """CheckpointManager handles functions that return multiple outputs."""
+
+        def func_multi_output(x):
+            return x * 2, x + 1
+
+        def func_combine(a, b):
+            return a + b
+
+        input_ref = torch.randn(4, 4, device='cuda', requires_grad=True)
+        input_ckpt = input_ref.detach().clone().requires_grad_(True)
+
+        y1a_ref, y1b_ref = func_multi_output(input_ref)
+        y2_ref = func_combine(y1a_ref, y1b_ref)
+        loss_ref = y2_ref.sum()
+        loss_ref.backward()
+        grad_ref = input_ref.grad.clone()
+
+        manager = CheckpointManager()
+
+        y1a, y1b = CheckpointWithoutOutput(ckpt_manager=manager).checkpoint(
+            func_multi_output, input_ckpt
+        )
+        y2 = CheckpointWithoutOutput(ckpt_manager=manager).checkpoint(func_combine, y1a, y1b)
+
+        loss_ckpt = y2.sum()
+        manager.discard_all_outputs_and_register_unified_recompute(loss_ckpt)
+
+        loss_ckpt.backward()
+        grad_ckpt = input_ckpt.grad.clone()
+
+        assert torch.allclose(grad_ckpt, grad_ref, atol=1e-6), (
+            f"Gradients mismatch with multiple outputs!\n"
+            f"With manager: {grad_ckpt}\nReference: {grad_ref}"
+        )
+
+
+class TestCheckpointManagerPartialCheckpoint:
+    """Test CheckpointManager with partial checkpointing (some ops not checkpointed)."""
+
+    def setup_method(self, method):
+        Utils.initialize_model_parallel()
+        initialize_rng_tracker(force_reset=True)
+
+    def teardown_method(self, method):
+        Utils.destroy_model_parallel()
+
+    def test_partial_checkpoint(self):
+        """
+        Only f and h are checkpointed; g is a regular operation.
+
+        Computation chain:
+            a --[f]--> b --[g]--> c --[h]--> d --[sum]--> loss
+        """
+
+        def func_f(x):
+            return torch.nn.functional.gelu(x * 2 + 1)
+
+        def func_g(x):
+            return x * 3 - 2
+
+        def func_h(x):
+            return torch.sigmoid(x) + x
+
+        input_ref = torch.randn(4, 4, device='cuda', requires_grad=True)
+
+        b_ref = func_f(input_ref)
+        c_ref = func_g(b_ref)
+        d_ref = func_h(c_ref)
+        loss_ref = d_ref.sum()
+        loss_ref.backward()
+        grad_ref = input_ref.grad.clone()
+
+        input_ckpt = input_ref.detach().clone().requires_grad_(True)
+
+        manager = CheckpointManager()
+
+        b = CheckpointWithoutOutput(ckpt_manager=manager).checkpoint(func_f, input_ckpt)
+        c = func_g(b)
+        d = CheckpointWithoutOutput(ckpt_manager=manager).checkpoint(func_h, c)
+
+        loss_ckpt = d.sum()
+        manager.discard_all_outputs_and_register_unified_recompute(loss_ckpt)
+
+        assert b.untyped_storage().size() == 0, "b storage should be released"
+        assert d.untyped_storage().size() == 0, "d storage should be released"
+        assert c.untyped_storage().size() > 0, "c storage should NOT be released (not checkpointed)"
+
+        loss_ckpt.backward()
+        grad_ckpt = input_ckpt.grad.clone()
+
+        assert torch.allclose(grad_ckpt, grad_ref, atol=1e-6), (
+            f"Gradients mismatch with partial checkpoint!\n"
+            f"With manager: {grad_ckpt}\nReference: {grad_ref}"
+        )
+
+    def test_partial_checkpoint_with_tuple_output(self):
+        """
+        Mimics HyperConnection's computation pattern with tuple outputs.
+
+        - compute_mappings: checkpointed, returns tuple (h_pre, h_post, h_res)
+        - aggregate: NOT checkpointed
+        - apply_h_res: checkpointed
+        - apply_h_post: checkpointed
+        """
+
+        def compute_mappings(x):
+            h_pre = torch.sigmoid(x.mean(dim=-1, keepdim=True).expand_as(x))
+            h_post = torch.tanh(x.sum(dim=-1, keepdim=True).expand_as(x))
+            h_res = torch.relu(x)
+            return h_pre, h_post, h_res
+
+        def aggregate(x, h_pre):
+            return x * h_pre
+
+        def apply_h_res(h_res, residual):
+            return h_res + residual * 0.5
+
+        def apply_h_post(y, h_post):
+            return y * h_post + y
+
+        x_ref = torch.randn(4, 4, device='cuda', requires_grad=True)
+        residual_ref = torch.randn(4, 4, device='cuda', requires_grad=True)
+
+        h_pre_ref, h_post_ref, h_res_ref = compute_mappings(x_ref)
+        agg_ref = aggregate(x_ref, h_pre_ref)
+        y_ref = torch.nn.functional.gelu(agg_ref)
+        mixed_ref = apply_h_res(h_res_ref, residual_ref)
+        output_ref = apply_h_post(y_ref, h_post_ref)
+        final_ref = output_ref + mixed_ref
+        loss_ref = final_ref.sum()
+        loss_ref.backward()
+        grad_x_ref = x_ref.grad.clone()
+        grad_residual_ref = residual_ref.grad.clone()
+
+        x_ckpt = x_ref.detach().clone().requires_grad_(True)
+        residual_ckpt = residual_ref.detach().clone().requires_grad_(True)
+
+        manager = CheckpointManager()
+
+        h_pre, h_post, h_res = CheckpointWithoutOutput(ckpt_manager=manager).checkpoint(
+            compute_mappings, x_ckpt
+        )
+        agg = aggregate(x_ckpt, h_pre)
+        y = torch.nn.functional.gelu(agg)
+        mixed = CheckpointWithoutOutput(ckpt_manager=manager).checkpoint(
+            apply_h_res, h_res, residual_ckpt
+        )
+        output = CheckpointWithoutOutput(ckpt_manager=manager).checkpoint(apply_h_post, y, h_post)
+
+        final = output + mixed
+        loss_ckpt = final.sum()
+
+        manager.discard_all_outputs_and_register_unified_recompute(loss_ckpt)
+
+        assert h_pre.untyped_storage().size() == 0, "h_pre storage should be released"
+        assert h_post.untyped_storage().size() == 0, "h_post storage should be released"
+        assert h_res.untyped_storage().size() == 0, "h_res storage should be released"
+        assert mixed.untyped_storage().size() == 0, "mixed storage should be released"
+        assert output.untyped_storage().size() == 0, "output storage should be released"
+
+        assert agg.untyped_storage().size() > 0, "agg storage should NOT be released"
+        assert y.untyped_storage().size() > 0, "y storage should NOT be released"
+
+        loss_ckpt.backward()
+        grad_x_ckpt = x_ckpt.grad.clone()
+        grad_residual_ckpt = residual_ckpt.grad.clone()
+
+        assert torch.allclose(
+            grad_x_ckpt, grad_x_ref, atol=1e-6
+        ), f"Gradients for x mismatch!\nWith manager: {grad_x_ckpt}\nReference: {grad_x_ref}"
+        assert torch.allclose(grad_residual_ckpt, grad_residual_ref, atol=1e-6), (
+            f"Gradients for residual mismatch!\n"
+            f"With manager: {grad_residual_ckpt}\nReference: {grad_residual_ref}"
+        )
diff --git a/tests/unit_tests/transformer/test_transformer_layer.py b/tests/unit_tests/transformer/test_transformer_layer.py
index da1f9ce5860..995e99d6a24 100644
--- a/tests/unit_tests/transformer/test_transformer_layer.py
+++ b/tests/unit_tests/transformer/test_transformer_layer.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 
 import pytest
@@ -8,17 +8,41 @@
 from megatron.core.dist_checkpointing.mapping import ShardedObject, ShardedTensor
 from megatron.core.inference.contexts import StaticInferenceContext
 from megatron.core.models.gpt.gpt_layer_specs import (
+    get_gpt_layer_with_transformer_engine_spec,
     get_gpt_layer_with_transformer_engine_submodules,
 )
-from megatron.core.tensor_parallel.random import model_parallel_cuda_manual_seed
+from megatron.core.tensor_parallel.random import CheckpointManager, model_parallel_cuda_manual_seed
 from megatron.core.transformer.transformer_config import TransformerConfig
 from megatron.core.transformer.transformer_layer import (
+    HyperConnectionTransformerLayer,
     TransformerLayer,
     get_transformer_layer_offset,
 )
 from tests.unit_tests.test_utilities import Utils
 
 
+def _make_mhc_config(hidden_size=64, num_streams=4, **extra):
+    """Build a TransformerConfig with common MHC defaults.
+
+    Any default can be overridden via **extra
+    (e.g. ``_make_mhc_config(num_layers=8, recompute_modules=["core_attn", "mhc"])``).
+    """
+    base = dict(
+        num_layers=2,
+        hidden_size=hidden_size,
+        num_attention_heads=4,
+        use_cpu_initialization=True,
+        enable_hyper_connections=True,
+        num_residual_streams=num_streams,
+        mhc_sinkhorn_iterations=5,
+        mhc_init_gating_factor=0.01,
+        hidden_dropout=0.0,
+        attention_dropout=0.0,
+    )
+    base.update(extra)
+    return TransformerConfig(**base)
+
+
 class TestParallelTransformerLayer:
 
     def setup_method(self, method):
@@ -313,3 +337,761 @@ def get_tensor_shapes_for_tp(transformer_config, tp_size):
         'self_attention.linear_qkv.weight': (hs * 3 // tp_size, hs),
         'self_attention.linear_qkv.bias': (hs * 3 // tp_size,),
     }
+
+
+class TestTransformerLayerWithHyperConnectionRecompute:
+    """Test TransformerLayer with HyperConnection and MHC block recomputation."""
+
+    def setup_method(self, method):
+        Utils.initialize_model_parallel(1, 1)
+        model_parallel_cuda_manual_seed(123)
+
+    def teardown_method(self, method):
+        Utils.destroy_model_parallel()
+
+    def _create_layer_with_hyper_connection(
+        self, hidden_size=64, num_streams=4, layer_number=1, **extra
+    ):
+        """Create a HyperConnectionTransformerLayer with hyper connection enabled."""
+        config = _make_mhc_config(
+            hidden_size=hidden_size,
+            num_streams=num_streams,
+            recompute_modules=["core_attn", "mhc"],
+            recompute_granularity='selective',
+            **extra,
+        )
+        layer_spec = get_gpt_layer_with_transformer_engine_spec(enable_hyper_connection=True)
+        layer = HyperConnectionTransformerLayer(
+            config, layer_spec.submodules, layer_number=layer_number
+        )
+        layer.cuda()
+        return layer, config
+
+    def test_forward_with_hyper_connection_recompute(self):
+        """
+        Test that TransformerLayer forward works correctly with HyperConnection
+        and MHC block recomputation enabled.
+        """
+        hidden_size = 64
+        num_streams = 4
+        seq_len = 8
+        batch_size = 2
+
+        layer, config = self._create_layer_with_hyper_connection(hidden_size, num_streams)
+        layer.train()  # Enable training mode for recomputation
+
+        # Input shape: [seq_len, batch_size, n * hidden_size] for hyper connections
+        n_channels = num_streams * hidden_size
+        hidden_states = torch.randn(
+            seq_len, batch_size, n_channels, device='cuda', requires_grad=True
+        )
+        attention_mask = torch.ones((1, 1, seq_len, seq_len), dtype=bool, device='cuda')
+
+        # Create manager for MHC block recomputation
+        manager = CheckpointManager()
+
+        # Forward pass with recompute manager
+        manager.is_last_layer_in_recompute_block = True
+        output, context = layer(
+            hidden_states=hidden_states,
+            attention_mask=attention_mask,
+            mhc_recompute_manager=manager,
+        )
+
+        # Verify output shape
+        assert output.shape == (
+            seq_len,
+            batch_size,
+            n_channels,
+        ), f"Expected output shape {(seq_len, batch_size, n_channels)}, got {output.shape}"
+
+        # Register unified recompute hook at block boundary.
+        manager.discard_all_outputs_and_register_unified_recompute(output)
+
+        # Backward pass should work without error
+        loss = output.sum()
+        loss.backward()
+
+        # Verify gradients exist
+        assert hidden_states.grad is not None, "Gradients should be computed for hidden_states"
+        assert hidden_states.grad.shape == hidden_states.shape
+
+    def test_intermediate_layer_with_recompute(self):
+        """
+        Test TransformerLayer as an intermediate layer (not last in block).
+        In this case, MLP BDA should also be checkpointed.
+        """
+        hidden_size = 64
+        num_streams = 4
+        seq_len = 8
+        batch_size = 2
+
+        layer, config = self._create_layer_with_hyper_connection(hidden_size, num_streams)
+        layer.train()
+
+        n_channels = num_streams * hidden_size
+        hidden_states = torch.randn(
+            seq_len, batch_size, n_channels, device='cuda', requires_grad=True
+        )
+        attention_mask = torch.ones((1, 1, seq_len, seq_len), dtype=bool, device='cuda')
+
+        manager = CheckpointManager()
+
+        # Forward pass - NOT the last layer in block
+        manager.is_last_layer_in_recompute_block = False
+        output, context = layer(
+            hidden_states=hidden_states,
+            attention_mask=attention_mask,
+            mhc_recompute_manager=manager,
+        )
+
+        # Verify output shape
+        assert output.shape == (seq_len, batch_size, n_channels)
+
+        # Backward pass should work
+        loss = output.sum()
+        # For intermediate layers, we need to pass output to next layer
+        # Here we just register the recompute hook on output for testing
+        manager.discard_all_outputs_and_register_unified_recompute(loss)
+
+        loss.backward()
+
+        assert hidden_states.grad is not None
+        assert hidden_states.grad.shape == hidden_states.shape
+
+    def test_multiple_layers_chain_with_recompute(self):
+        """
+        Test multiple TransformerLayers chained together with a single
+        CheckpointManager, simulating TransformerBlock behavior.
+        """
+        hidden_size = 64
+        num_streams = 4
+        seq_len = 8
+        batch_size = 2
+        num_layers = 3
+
+        layers = [
+            self._create_layer_with_hyper_connection(
+                hidden_size, num_streams, layer_number=i + 1, num_layers=num_layers
+            )[0]
+            for i in range(num_layers)
+        ]
+
+        for layer in layers:
+            layer.train()
+
+        n_channels = num_streams * hidden_size
+        hidden_states = torch.randn(
+            seq_len, batch_size, n_channels, device='cuda', requires_grad=True
+        )
+        attention_mask = torch.ones((1, 1, seq_len, seq_len), dtype=bool, device='cuda')
+
+        # Single manager for all layers (like TransformerBlock)
+        manager = CheckpointManager()
+
+        # Forward through all layers
+        h = hidden_states
+        for i, layer in enumerate(layers):
+            is_last = i == num_layers - 1
+            manager.is_last_layer_in_recompute_block = is_last
+            h, _ = layer(
+                hidden_states=h, attention_mask=attention_mask, mhc_recompute_manager=manager
+            )
+            if is_last:
+                manager.discard_all_outputs_and_register_unified_recompute(h)
+
+        # Backward pass
+        loss = h.sum()
+        loss.backward()
+
+        # Verify gradients
+        assert hidden_states.grad is not None
+        assert hidden_states.grad.shape == hidden_states.shape
+        # Check that gradient is non-trivial (not all zeros)
+        assert hidden_states.grad.abs().sum() > 0
+
+
+class TestMHCRecomputeMemorySaving:
+    """Verify that 'mhc' in recompute_modules actually reduces peak GPU memory."""
+
+    def setup_method(self, method):
+        Utils.initialize_model_parallel(1, 1)
+        model_parallel_cuda_manual_seed(123)
+
+    def teardown_method(self, method):
+        Utils.destroy_model_parallel()
+
+    @staticmethod
+    def _run_forward_backward(
+        num_layers,
+        hidden_size,
+        num_streams,
+        seq_len,
+        batch_size,
+        use_recompute,
+        recompute_block_size=2,
+    ):
+        """Run a full forward + backward pass and return (peak memory, output grad).
+
+        When use_recompute=True, a new CheckpointManager is created every
+        `recompute_block_size` layers, mirroring TransformerBlock's
+        _build_mhc_recompute_layer_plan logic.
+        """
+        config = _make_mhc_config(
+            hidden_size=hidden_size,
+            num_streams=num_streams,
+            num_layers=num_layers,
+            recompute_modules=["core_attn", "mhc"] if use_recompute else None,
+            recompute_granularity='selective' if use_recompute else None,
+        )
+        layer_spec = get_gpt_layer_with_transformer_engine_spec(enable_hyper_connection=True)
+        layers = [
+            HyperConnectionTransformerLayer(
+                config, layer_spec.submodules, layer_number=i + 1
+            ).cuda()
+            for i in range(num_layers)
+        ]
+        for layer in layers:
+            layer.train()
+
+        n_channels = num_streams * hidden_size
+        hidden_states = torch.randn(
+            seq_len, batch_size, n_channels, device='cuda', requires_grad=True
+        )
+        attention_mask = torch.ones((1, 1, seq_len, seq_len), dtype=bool, device='cuda')
+
+        torch.cuda.reset_peak_memory_stats()
+        torch.cuda.synchronize()
+
+        manager = CheckpointManager() if use_recompute else None
+
+        h = hidden_states
+        for i, layer in enumerate(layers):
+            is_last_in_block = (i == num_layers - 1) or ((i + 1) % recompute_block_size == 0)
+            kwargs = dict(hidden_states=h, attention_mask=attention_mask)
+            if manager is not None:
+                manager.is_last_layer_in_recompute_block = is_last_in_block
+                kwargs['mhc_recompute_manager'] = manager
+            h, _ = layer(**kwargs)
+            if manager is not None and is_last_in_block:
+                manager.discard_all_outputs_and_register_unified_recompute(h)
+                if i < num_layers - 1:
+                    manager = CheckpointManager()
+
+        loss = h.sum()
+        loss.backward()
+        torch.cuda.synchronize()
+
+        peak_mem = torch.cuda.max_memory_allocated()
+        grad = hidden_states.grad.clone()
+
+        del layers, hidden_states, h, loss, manager
+        torch.cuda.empty_cache()
+
+        return peak_mem, grad
+
+    def test_recompute_reduces_peak_memory(self):
+        """Peak memory with recompute (block_size=2) should be lower than without."""
+        num_layers = 8
+        hidden_size = 128
+        num_streams = 4
+        seq_len = 64
+        batch_size = 4
+
+        peak_no_recompute, _ = self._run_forward_backward(
+            num_layers, hidden_size, num_streams, seq_len, batch_size, use_recompute=False
+        )
+        peak_recompute, _ = self._run_forward_backward(
+            num_layers,
+            hidden_size,
+            num_streams,
+            seq_len,
+            batch_size,
+            use_recompute=True,
+            recompute_block_size=2,
+        )
+
+        saving_pct = (peak_no_recompute - peak_recompute) / peak_no_recompute * 100
+
+        assert peak_recompute < peak_no_recompute, (
+            f"Recompute should reduce peak memory, but got "
+            f"no_recompute={peak_no_recompute / 1e6:.1f}MB vs "
+            f"recompute={peak_recompute / 1e6:.1f}MB "
+            f"(saving={saving_pct:.1f}%)"
+        )
+
+
+class TestMHCWithCudaGraph:
+    """Test HyperConnectionTransformerLayer compatibility with CUDA graphs.
+
+    CUDA graph capture requires static computation graphs and fixed tensor shapes.
+    These tests verify that the mHC layer properly supports the CUDA graph interface
+    defined in GraphableMegatronModule and TransformerLayer.
+    """
+
+    def setup_method(self, method):
+        Utils.initialize_model_parallel(1, 1)
+        model_parallel_cuda_manual_seed(123, use_cudagraphable_rng=True, force_reset_rng=True)
+
+    def teardown_method(self, method):
+        Utils.destroy_model_parallel()
+
+    def _create_mhc_layer(self, hidden_size=64, num_streams=4, **extra_config):
+        config = _make_mhc_config(hidden_size=hidden_size, num_streams=num_streams, **extra_config)
+        layer_spec = get_gpt_layer_with_transformer_engine_spec(enable_hyper_connection=True)
+        layer = HyperConnectionTransformerLayer(config, layer_spec.submodules)
+        layer.cuda()
+        return layer, config
+
+    def test_get_layer_static_inputs_shape_for_mhc(self):
+        """get_layer_static_inputs must return [s, b, n*C] for mHC layers.
+
+        CUDA graph capture creates static buffers whose shapes are determined by
+        this method. If the shape is [s, b, C] instead of [s, b, n*C], the graph
+        capture will produce a shape mismatch at the first hyper connection module.
+        """
+        layer, config = self._create_mhc_layer()
+        seq_length = 32
+        micro_batch_size = 2
+
+        static_inputs = layer.get_layer_static_inputs(seq_length, micro_batch_size)
+        hidden_states = static_inputs["hidden_states"]
+
+        expected_hidden_dim = config.num_residual_streams * config.hidden_size
+        assert hidden_states.shape[-1] == expected_hidden_dim, (
+            f"get_layer_static_inputs returns hidden dim {hidden_states.shape[-1]} "
+            f"but mHC expects {expected_hidden_dim} (n={config.num_residual_streams} * "
+            f"C={config.hidden_size}). "
+            f"HyperConnectionTransformerLayer must override get_layer_static_inputs."
+        )
+
+    def test_submodules_under_cudagraphs_includes_hyper_connection(self):
+        """_get_submodules_under_cudagraphs must include hyper connection modules.
+
+        CUDA graph manual hooks are set up for parameters of submodules returned
+        by this method. Missing hyper connection modules means their parameters
+        (mapping_proj, alpha_*, bias) will not get proper pre-forward hooks during
+        graph replay, leading to stale parameter values.
+        """
+        layer, config = self._create_mhc_layer()
+
+        submodules = layer._get_submodules_under_cudagraphs()
+
+        hc_modules_found = any(
+            hasattr(m, 'mapping_proj') for submod in submodules for m in submod.modules()
+        )
+        assert hc_modules_found, (
+            "_get_submodules_under_cudagraphs does not include HyperConnectionModule. "
+            "Parameters like mapping_proj, alpha_pre/post/res will not be updated "
+            "during CUDA graph replay."
+        )
+
+    def test_forward_through_te_cuda_graph_capture_path(self):
+        """_te_cuda_graph_capture must produce correct output shapes for mHC.
+
+        TE CUDA graph capture calls _te_cuda_graph_capture() during warmup.
+        For mHC layers, the input must be n-stream [s, b, n*C] and output must
+        also be [s, b, n*C].
+        """
+        layer, config = self._create_mhc_layer()
+        layer.eval()
+
+        seq_len = 8
+        batch_size = 2
+        n_channels = config.num_residual_streams * config.hidden_size
+
+        hidden_states = torch.randn(seq_len, batch_size, n_channels, device='cuda')
+        attention_mask = torch.ones((1, 1, seq_len, seq_len), dtype=bool, device='cuda')
+
+        with torch.no_grad():
+            outputs = layer._te_cuda_graph_capture(
+                hidden_states=hidden_states, attention_mask=attention_mask
+            )
+
+        if isinstance(outputs, tuple):
+            output = outputs[0]
+        else:
+            output = outputs
+
+        assert output.shape == (seq_len, batch_size, n_channels), (
+            f"_te_cuda_graph_capture output shape {output.shape} != "
+            f"expected {(seq_len, batch_size, n_channels)}"
+        )
+
+    def test_cuda_graph_fwd_bwd_with_hyper_connection(self):
+        """End-to-end CUDA graph capture and replay for forward+backward with mHC.
+
+        Captures both the forward and backward pass of HyperConnectionTransformerLayer
+        into a torch.cuda.CUDAGraph and replays it with fresh input data, verifying
+        that the computation graph is fully static (capturable) and produces correct
+        output shapes and non-trivial gradients.
+        """
+        layer, config = self._create_mhc_layer()
+        layer.train()
+
+        seq_len = 8
+        batch_size = 2
+        n_channels = config.num_residual_streams * config.hidden_size
+
+        static_input = torch.randn(
+            seq_len, batch_size, n_channels, device='cuda', requires_grad=True
+        )
+        attention_mask = torch.ones((1, 1, seq_len, seq_len), dtype=bool, device='cuda')
+
+        # Warmup on side stream to trigger lazy allocations
+        s = torch.cuda.Stream()
+        s.wait_stream(torch.cuda.current_stream())
+        with torch.cuda.stream(s):
+            for _ in range(3):
+                out, _ = layer(hidden_states=static_input, attention_mask=attention_mask)
+                out.sum().backward()
+        torch.cuda.current_stream().wait_stream(s)
+
+        # Set .grad to None so backward allocates fresh gradient tensors in the
+        # graph's private memory pool during capture.
+        layer.zero_grad(set_to_none=True)
+        static_input.grad = None
+
+        g = torch.cuda.CUDAGraph()
+        with torch.cuda.graph(g):
+            output, _ = layer(hidden_states=static_input, attention_mask=attention_mask)
+            output.sum().backward()
+
+        # Replay with new input data.
+        # Use no_grad because backward inside the captured graph already
+        # bumped the autograd version counter on static_input, making
+        # in-place copy_ illegal without disabling grad tracking.
+        with torch.no_grad():
+            static_input.copy_(torch.randn_like(static_input))
+        g.replay()
+
+        assert output.shape == (
+            seq_len,
+            batch_size,
+            n_channels,
+        ), f"Output shape {output.shape} != expected {(seq_len, batch_size, n_channels)}"
+        assert (
+            static_input.grad is not None
+        ), "Gradients should be computed for static_input after graph replay"
+        assert static_input.grad.shape == static_input.shape
+        assert static_input.grad.abs().sum() > 0, "Gradients should be non-trivial"
+
+        # Verify numerical consistency: graph replay should match eager execution
+        # with the same input and weights.
+        test_data = torch.randn(seq_len, batch_size, n_channels, device='cuda')
+
+        with torch.no_grad():
+            static_input.copy_(test_data)
+        g.replay()
+        graph_out = output.detach().clone()
+        graph_grad = static_input.grad.detach().clone()
+
+        eager_input = test_data.clone().requires_grad_(True)
+        eager_output, _ = layer(hidden_states=eager_input, attention_mask=attention_mask)
+        eager_output.sum().backward()
+
+        assert torch.allclose(graph_out, eager_output.detach(), atol=1e-5), (
+            f"Graph vs eager output mismatch: "
+            f"max diff = {(graph_out - eager_output.detach()).abs().max().item()}"
+        )
+        assert torch.allclose(graph_grad, eager_input.grad, atol=1e-5), (
+            f"Graph vs eager gradient mismatch: "
+            f"max diff = {(graph_grad - eager_input.grad).abs().max().item()}"
+        )
+
+    def test_cuda_graph_fwd_bwd_with_hyper_connection_and_recompute(self):
+        """CUDA graph capture+replay for fwd+bwd with mHC and CheckpointManager.
+
+        When a CheckpointManager is used, additional CheckpointWithoutOutput
+        objects are created for layernorm and hyper-connection operations. The
+        manager discards intermediate activations during forward (storage.resize_(0))
+        and recomputes them during backward via a unified gradient hook.
+        This test verifies the full capture+replay still works correctly.
+        """
+        layer, config = self._create_mhc_layer()
+        layer.train()
+
+        seq_len = 8
+        batch_size = 2
+        n_channels = config.num_residual_streams * config.hidden_size
+
+        static_input = torch.randn(
+            seq_len, batch_size, n_channels, device='cuda', requires_grad=True
+        )
+        attention_mask = torch.ones((1, 1, seq_len, seq_len), dtype=bool, device='cuda')
+
+        # Warmup on side stream; fresh manager per iteration to avoid stale state.
+        s = torch.cuda.Stream()
+        s.wait_stream(torch.cuda.current_stream())
+        with torch.cuda.stream(s):
+            for _ in range(3):
+                mgr = CheckpointManager()
+                mgr.is_last_layer_in_recompute_block = True
+                out, _ = layer(
+                    hidden_states=static_input,
+                    attention_mask=attention_mask,
+                    mhc_recompute_manager=mgr,
+                )
+                mgr.discard_all_outputs_and_register_unified_recompute(out)
+                out.sum().backward()
+        torch.cuda.current_stream().wait_stream(s)
+
+        layer.zero_grad(set_to_none=True)
+        static_input.grad = None
+
+        capture_mgr = CheckpointManager()
+        capture_mgr.is_last_layer_in_recompute_block = True
+
+        g = torch.cuda.CUDAGraph()
+        with torch.cuda.graph(g):
+            output, _ = layer(
+                hidden_states=static_input,
+                attention_mask=attention_mask,
+                mhc_recompute_manager=capture_mgr,
+            )
+            capture_mgr.discard_all_outputs_and_register_unified_recompute(output)
+            output.sum().backward()
+
+        # Replay with new input data.
+        with torch.no_grad():
+            static_input.copy_(torch.randn_like(static_input))
+        g.replay()
+
+        assert output.shape == (
+            seq_len,
+            batch_size,
+            n_channels,
+        ), f"Output shape {output.shape} != expected {(seq_len, batch_size, n_channels)}"
+        assert (
+            static_input.grad is not None
+        ), "Gradients should be computed for static_input after graph replay"
+        assert static_input.grad.shape == static_input.shape
+        assert static_input.grad.abs().sum() > 0, "Gradients should be non-trivial"
+
+        # Numerical consistency: graph replay vs eager with the same input.
+        test_data = torch.randn(seq_len, batch_size, n_channels, device='cuda')
+
+        with torch.no_grad():
+            static_input.copy_(test_data)
+        g.replay()
+        graph_out = output.detach().clone()
+        graph_grad = static_input.grad.detach().clone()
+
+        eager_mgr = CheckpointManager()
+        eager_mgr.is_last_layer_in_recompute_block = True
+        eager_input = test_data.clone().requires_grad_(True)
+        eager_output, _ = layer(
+            hidden_states=eager_input,
+            attention_mask=attention_mask,
+            mhc_recompute_manager=eager_mgr,
+        )
+        eager_mgr.discard_all_outputs_and_register_unified_recompute(eager_output)
+        eager_output.sum().backward()
+
+        assert torch.allclose(graph_out, eager_output.detach(), atol=1e-5), (
+            f"Graph vs eager output mismatch: "
+            f"max diff = {(graph_out - eager_output.detach()).abs().max().item()}"
+        )
+        assert torch.allclose(graph_grad, eager_input.grad, atol=1e-5), (
+            f"Graph vs eager gradient mismatch: "
+            f"max diff = {(graph_grad - eager_input.grad).abs().max().item()}"
+        )
+
+    def test_mcore_cudagraph_manager_with_mhc_recompute_manager(self):
+        """MCore CudaGraphManager must not crash on mhc_recompute_manager kwarg.
+
+        When cuda_graph_impl="local" is set, TransformerLayer.__call__ routes
+        through MegatronModule.__call__ → CudaGraphManager.__call__, which
+        iterates over all kwargs to check supported types. CheckpointManager
+        (used by mhc_recompute_manager) is not a CUDA-graph-supported type.
+
+        This test verifies that mhc_recompute_manager is properly extracted
+        from kwargs before the CudaGraphManager sees them, preventing the
+        AssertionError that would otherwise occur.
+        """
+        layer, config = self._create_mhc_layer(cuda_graph_impl="local", cuda_graph_scope="attn")
+        layer.train()
+
+        assert hasattr(
+            layer, 'cudagraph_manager'
+        ), "Layer should have cudagraph_manager with cuda_graph_impl='local'"
+
+        seq_len = 8
+        batch_size = 2
+        n_channels = config.num_residual_streams * config.hidden_size
+
+        hidden_states = torch.randn(
+            seq_len, batch_size, n_channels, device='cuda', requires_grad=True
+        )
+        attention_mask = torch.ones((1, 1, seq_len, seq_len), dtype=bool, device='cuda')
+
+        mgr = CheckpointManager()
+        mgr.is_last_layer_in_recompute_block = True
+
+        output, context = layer(
+            hidden_states=hidden_states, attention_mask=attention_mask, mhc_recompute_manager=mgr
+        )
+
+        assert output.shape == (seq_len, batch_size, n_channels)
+
+    def test_mcore_cudagraph_manager_without_mhc_recompute_manager(self):
+        """MCore CudaGraphManager path works when mhc_recompute_manager is None."""
+        layer, config = self._create_mhc_layer(cuda_graph_impl="local", cuda_graph_scope="attn")
+        layer.train()
+
+        seq_len = 8
+        batch_size = 2
+        n_channels = config.num_residual_streams * config.hidden_size
+
+        hidden_states = torch.randn(
+            seq_len, batch_size, n_channels, device='cuda', requires_grad=True
+        )
+        attention_mask = torch.ones((1, 1, seq_len, seq_len), dtype=bool, device='cuda')
+
+        output, context = layer(hidden_states=hidden_states, attention_mask=attention_mask)
+
+        assert output.shape == (seq_len, batch_size, n_channels)
+
+
+class TestMHCWithOffloading:
+    """Test HyperConnectionTransformerLayer with fine-grained activation offloading.
+
+    Fine-grained activation offloading transfers specific activations (e.g., layernorm
+    inputs) to CPU during forward and reloads them during backward. These tests verify
+    that the mHC layer's multi-stream architecture works correctly with offloading.
+    """
+
+    def setup_method(self, method):
+        Utils.initialize_model_parallel(1, 1)
+        model_parallel_cuda_manual_seed(123)
+
+    def teardown_method(self, method):
+        Utils.destroy_model_parallel()
+
+    def _create_mhc_layer_with_offloading(
+        self, hidden_size=64, num_streams=4, offload_modules=None
+    ):
+        if offload_modules is None:
+            offload_modules = ["attn_norm", "mlp_norm"]
+
+        config = _make_mhc_config(
+            hidden_size=hidden_size,
+            num_streams=num_streams,
+            fine_grained_activation_offloading=True,
+            offload_modules=offload_modules,
+        )
+        layer_spec = get_gpt_layer_with_transformer_engine_spec(enable_hyper_connection=True)
+        layer = HyperConnectionTransformerLayer(config, layer_spec.submodules)
+        layer.cuda()
+        return layer, config
+
+    def test_forward_backward_with_offloading(self):
+        """Forward+backward should work with activation offloading enabled.
+
+        This exercises the off_interface context manager around layernorms in
+        the mHC forward path, including the group_commit that commits the
+        offloading group for the aggregated 1-stream layernorm input.
+        """
+        from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
+            PipelineOffloadManager,
+        )
+
+        layer, config = self._create_mhc_layer_with_offloading()
+        layer.train()
+
+        seq_len = 8
+        batch_size = 2
+        n_channels = config.num_residual_streams * config.hidden_size
+
+        hidden_states = torch.randn(
+            seq_len, batch_size, n_channels, device='cuda', requires_grad=True
+        )
+        attention_mask = torch.ones((1, 1, seq_len, seq_len), dtype=bool, device='cuda')
+
+        mgr = PipelineOffloadManager.get_instance()
+        mgr.init_model_chunk_offload_handler(vp_size=1, vp_stage=0, min_offloaded_tensor_size=0)
+
+        output, context = layer(hidden_states=hidden_states, attention_mask=attention_mask)
+
+        assert output.shape == (
+            seq_len,
+            batch_size,
+            n_channels,
+        ), f"Output shape {output.shape} != expected {(seq_len, batch_size, n_channels)}"
+
+        loss = output.sum()
+        loss.backward()
+
+        assert hidden_states.grad is not None, "Gradients should flow through offloaded path"
+        assert hidden_states.grad.shape == hidden_states.shape
+        assert hidden_states.grad.abs().sum() > 0, "Gradients should be non-trivial"
+
+        PipelineOffloadManager.reset_instance()
+
+    def test_offloading_numerical_equivalence(self):
+        """Offloaded forward+backward must produce the same result as non-offloaded.
+
+        Compares outputs and gradients between a layer with offloading disabled
+        vs enabled to ensure the offloading path does not corrupt activations.
+        """
+        from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
+            PipelineOffloadManager,
+        )
+
+        PipelineOffloadManager.reset_instance()
+
+        hidden_size = 64
+        num_streams = 4
+        seq_len = 8
+        batch_size = 2
+        n_channels = num_streams * hidden_size
+
+        torch.manual_seed(42)
+        input_data = torch.randn(seq_len, batch_size, n_channels, device='cuda')
+        attention_mask = torch.ones((1, 1, seq_len, seq_len), dtype=bool, device='cuda')
+
+        # Run without offloading
+        config_no_offload = _make_mhc_config(hidden_size=hidden_size, num_streams=num_streams)
+        layer_spec = get_gpt_layer_with_transformer_engine_spec(enable_hyper_connection=True)
+        layer_no_offload = HyperConnectionTransformerLayer(
+            config_no_offload, layer_spec.submodules
+        ).cuda()
+        layer_no_offload.train()
+
+        h1 = input_data.clone().detach().requires_grad_(True)
+        out1, _ = layer_no_offload(hidden_states=h1, attention_mask=attention_mask)
+        out1.sum().backward()
+        grad_no_offload = h1.grad.clone()
+        out1_detached = out1.detach().clone()
+
+        # Run with offloading using the same weights
+        config_offload = _make_mhc_config(
+            hidden_size=hidden_size,
+            num_streams=num_streams,
+            fine_grained_activation_offloading=True,
+            offload_modules=["attn_norm", "mlp_norm"],
+        )
+        layer_offload = HyperConnectionTransformerLayer(
+            config_offload, layer_spec.submodules
+        ).cuda()
+        layer_offload.load_state_dict(layer_no_offload.state_dict())
+        layer_offload.train()
+
+        mgr = PipelineOffloadManager.get_instance()
+        mgr.init_model_chunk_offload_handler(vp_size=1, vp_stage=0, min_offloaded_tensor_size=0)
+
+        h2 = input_data.clone().detach().requires_grad_(True)
+        out2, _ = layer_offload(hidden_states=h2, attention_mask=attention_mask)
+        out2.sum().backward()
+        grad_offload = h2.grad.clone()
+
+        PipelineOffloadManager.reset_instance()
+
+        assert torch.allclose(out1_detached, out2.detach(), atol=1e-5), (
+            f"Forward outputs differ: max diff = "
+            f"{(out1_detached - out2.detach()).abs().max().item()}"
+        )
+        assert torch.allclose(grad_no_offload, grad_offload, atol=1e-5), (
+            f"Gradients differ: max diff = "
+            f"{(grad_no_offload - grad_offload).abs().max().item()}"
+        )

From 597f0d8a3d562dc7df680c28942682126ed21088 Mon Sep 17 00:00:00 2001
From: "Dennis(Zhenhuan) Liu" <denliu@nvidia.com>
Date: Fri, 6 Mar 2026 15:27:16 +0800
Subject: [PATCH 304/334] [Dev] Cherry-pick: M-FSDP: Cancel erroneous grad
 accumulation check (#3629) (#3729)

Co-authored-by: Jianbin Chang <shjwudp@gmail.com>
---
 megatron/core/distributed/fsdp/mcore_fsdp_adapter.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/megatron/core/distributed/fsdp/mcore_fsdp_adapter.py b/megatron/core/distributed/fsdp/mcore_fsdp_adapter.py
index 8848d93666e..0fa990466b4 100644
--- a/megatron/core/distributed/fsdp/mcore_fsdp_adapter.py
+++ b/megatron/core/distributed/fsdp/mcore_fsdp_adapter.py
@@ -86,12 +86,6 @@ def __init__(
         self.megatron_fsdp_dist_index = self._init_dist_index(pg_collection)
 
         if config.gradient_accumulation_fusion:
-            assert (
-                self.megatron_fsdp_dist_index.get_dp_group(is_expert_parallel=True).size() == 1
-            ), (
-                "Megatron-FSDP with gradient_accumulation_fusion does not support "
-                "data parallelism when expert parallelism is enabled."
-            )
             assert is_te_min_version("2.10"), (
                 "Megatron-FSDP with gradient_accumulation_fusion requires "
                 "Transformer Engine version 2.10 or higher."

From 3d097e5b3cab1fb71cd92749d7a0b949e3057abc Mon Sep 17 00:00:00 2001
From: Yuzhong Wang <yuzhongw@nvidia.com>
Date: Fri, 6 Mar 2026 20:37:39 +0800
Subject: [PATCH 305/334] [dev] fix(moe): Fix DSA spec and rope. (#3402)

---
 .../inference/contexts/dynamic_context.py     |  15 +-
 .../models/common/embeddings/rope_utils.py    |  41 +-
 ...rimental_attention_variant_module_specs.py |  30 +-
 .../absorbed_mla.py                           |   2 +
 .../experimental_attention_variant/dsa.py     |  13 +-
 .../transformer/multi_latent_attention.py     |   2 +
 pyproject.toml                                |   3 +
 .../model_config.yaml                         |  66 ++
 tests/test_utils/recipes/h100/gpt.yaml        |   5 +
 .../fusions/test_mla_yarn_rope_apply.py       |  71 +-
 ...rimental_attention_variant_module_specs.py | 660 ++++++++++++++++++
 uv.lock                                       | 578 +++------------
 12 files changed, 970 insertions(+), 516 deletions(-)
 create mode 100644 tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_dsa/model_config.yaml
 create mode 100644 tests/unit_tests/models/test_experimental_attention_variant_module_specs.py

diff --git a/megatron/core/inference/contexts/dynamic_context.py b/megatron/core/inference/contexts/dynamic_context.py
index 9f7556f1312..23e05bad92c 100644
--- a/megatron/core/inference/contexts/dynamic_context.py
+++ b/megatron/core/inference/contexts/dynamic_context.py
@@ -1001,6 +1001,7 @@ def apply_rotary_emb_query(
             cu_seqlens=cu_seqlens_q,
             cp_group=cp_group,
             mscale=mscale,
+            mla_rotary_interleaved=config.multi_latent_attention,
         )
         return query
 
@@ -1035,11 +1036,21 @@ def apply_rotary_emb_key(
                     f"paused_request_count={self.paused_request_count}"
                 )
             key = apply_rotary_pos_emb(
-                t=key[:n], freqs=key_emb[:n], config=config, cp_group=cp_group, mscale=mscale
+                t=key[:n],
+                freqs=key_emb[:n],
+                config=config,
+                cp_group=cp_group,
+                mscale=mscale,
+                mla_rotary_interleaved=config.multi_latent_attention,
             )
         else:
             key[:n] = apply_rotary_pos_emb(
-                t=key[:n], freqs=key_emb[:n], config=config, cp_group=cp_group, mscale=mscale
+                t=key[:n],
+                freqs=key_emb[:n],
+                config=config,
+                cp_group=cp_group,
+                mscale=mscale,
+                mla_rotary_interleaved=config.multi_latent_attention,
             )
         return key
 
diff --git a/megatron/core/models/common/embeddings/rope_utils.py b/megatron/core/models/common/embeddings/rope_utils.py
index e39540eb1d1..0e00c6340ed 100644
--- a/megatron/core/models/common/embeddings/rope_utils.py
+++ b/megatron/core/models/common/embeddings/rope_utils.py
@@ -93,8 +93,9 @@ def _apply_rotary_pos_emb_bshd(
     t: Tensor,
     freqs: Tensor,
     rotary_interleaved: bool = False,
-    multi_latent_attention: bool = False,
+    mla_rotary_interleaved: bool = False,
     mscale: float = 1.0,
+    multi_latent_attention: Optional[bool] = None,
 ) -> Tensor:
     """Apply rotary positional embedding to input tensor T.
 
@@ -103,16 +104,26 @@ def _apply_rotary_pos_emb_bshd(
     Args:
         t (Tensor): Input tensor T is of shape [seq_length, ... , dim]
         freqs (Tensor): Rotary Positional embedding tensor freq is of shape [seq_length, ..., dim]
+        rotary_interleaved (bool): Whether to apply interleaving in the rotate half function.
+        mla_rotary_interleaved (bool): Whether to apply MLA-style interleaving for RoPE.
+        mscale (float): The scaling factor for the RoPE.
 
     Returns:
         Tensor: The input tensor after applying RoPE
     """
+    if multi_latent_attention is not None:
+        warnings.warn(
+            "multi_latent_attention is deprecated. Please use mla_rotary_interleaved instead.",
+            DeprecationWarning,
+        )
+        mla_rotary_interleaved = multi_latent_attention
+
     rot_dim = freqs.shape[-1]
 
     # ideally t_pass is empty so rotary pos embedding is applied to all tensor t
     t, t_pass = t[..., :rot_dim], t[..., rot_dim:]
 
-    if multi_latent_attention:
+    if mla_rotary_interleaved:
         x1 = t[..., 0::2]
         x2 = t[..., 1::2]
         t = torch.cat((x1, x2), dim=-1)
@@ -180,9 +191,10 @@ def _apply_rotary_pos_emb_thd(
     cu_seqlens: Tensor,
     freqs: Tensor,
     rotary_interleaved: bool = False,
-    multi_latent_attention: bool = False,
+    mla_rotary_interleaved: bool = False,
     mscale: float = 1.0,
     cp_group: torch.distributed.ProcessGroup = None,
+    multi_latent_attention: Optional[bool] = None,
 ) -> Tensor:
     """A baseline implementation of applying RoPE for `thd` format.
 
@@ -196,6 +208,12 @@ def _apply_rotary_pos_emb_thd(
     Returns:
         Tensor: Shape [t, h, d]. The input tensor after applying RoPE.
     """
+    if multi_latent_attention is not None:
+        warnings.warn(
+            "multi_latent_attention is deprecated. Please use mla_rotary_interleaved instead.",
+            DeprecationWarning,
+        )
+        mla_rotary_interleaved = multi_latent_attention
 
     if cp_group is None:
         raise ValueError("cp_group must be provided for THD format RoPE")
@@ -226,7 +244,7 @@ def _apply_rotary_pos_emb_thd(
             t.unsqueeze(1),
             freqs_packed,
             rotary_interleaved=rotary_interleaved,
-            multi_latent_attention=multi_latent_attention,
+            mla_rotary_interleaved=mla_rotary_interleaved,
             mscale=mscale,
         ).squeeze(1)
     else:
@@ -242,7 +260,7 @@ def _apply_rotary_pos_emb_thd(
             t.unsqueeze(1),
             freqs_packed,
             rotary_interleaved=rotary_interleaved,
-            multi_latent_attention=multi_latent_attention,
+            mla_rotary_interleaved=mla_rotary_interleaved,
             mscale=mscale,
         ).squeeze(1)
 
@@ -254,6 +272,7 @@ def apply_rotary_pos_emb(
     cu_seqlens: Optional[Tensor] = None,
     mscale: float = 1.0,
     cp_group: torch.distributed.ProcessGroup = None,
+    mla_rotary_interleaved: bool = False,
 ):
     """
     Reroute to the appropriate apply_rotary_pos_emb function depending on
@@ -282,6 +301,12 @@ def apply_rotary_pos_emb(
                     "Using unfused implementation."
                 )
                 use_unfused = True
+            if mla_rotary_interleaved:
+                warnings.warn(
+                    "apply_rope_fusion does not support MLA-style interleaving in RoPE."
+                    "Using unfused implementation."
+                )
+                use_unfused = True
             if not use_unfused:
                 assert fused_apply_rotary_pos_emb is not None, "apply_rope_fusion is not available."
                 return fused_apply_rotary_pos_emb(t, freqs, interleaved=config.rotary_interleaved)
@@ -296,7 +321,7 @@ def apply_rotary_pos_emb(
             t,
             freqs,
             rotary_interleaved=config.rotary_interleaved,
-            multi_latent_attention=config.multi_latent_attention,
+            mla_rotary_interleaved=mla_rotary_interleaved,
             mscale=mscale,
         )
     else:
@@ -305,7 +330,7 @@ def apply_rotary_pos_emb(
             cu_seqlens,
             freqs,
             rotary_interleaved=config.rotary_interleaved,
-            multi_latent_attention=config.multi_latent_attention,
+            mla_rotary_interleaved=mla_rotary_interleaved,
             mscale=mscale,
             cp_group=cp_group,
         )
@@ -334,7 +359,7 @@ def apply_rotary_pos_emb_with_cos_sin(
             t,
             freqs,
             rotary_interleaved=rotary_interleaved,
-            multi_latent_attention=False,
+            mla_rotary_interleaved=False,
             mscale=1.0,
         )
     else:
diff --git a/megatron/core/models/gpt/experimental_attention_variant_module_specs.py b/megatron/core/models/gpt/experimental_attention_variant_module_specs.py
index 6222291449e..708722fa7b0 100644
--- a/megatron/core/models/gpt/experimental_attention_variant_module_specs.py
+++ b/megatron/core/models/gpt/experimental_attention_variant_module_specs.py
@@ -83,17 +83,6 @@ def get_dsa_module_spec_for_backend(
     assert config.multi_latent_attention, "Currently only MLA supports sparse attention."
     assert config.qk_l2_norm is False, "qk_l2_norm is not supported with MLA."
 
-    linear_q_up_proj = (
-        backend.column_parallel_layer_norm_linear()
-        if config.qk_layernorm
-        else backend.column_parallel_linear()
-    )
-    linear_kv_up_proj = (
-        backend.column_parallel_layer_norm_linear()
-        if config.qk_layernorm
-        else backend.column_parallel_linear()
-    )
-
     # Because TransformerEngine does not support sparse attention yet, we use local
     # implementation whether the backend is TransformerEngine or not.
     core_attention = ModuleSpec(
@@ -111,20 +100,29 @@ def get_dsa_module_spec_for_backend(
         ),
     )
 
+    # Adjust for RMS norm.
+    rms_norm = config.normalization == "RMSNorm"
+    # DSA indexer requires normalized q as input, so here we cannot fuse qk layernorm
+    # with linear projection and have to use unfused qk layernorm.
+    qk_norm = (
+        backend.layer_norm(rms_norm=rms_norm, for_qk=True) if config.qk_layernorm else IdentityOp
+    )
+
     attention = ModuleSpec(
         module=MLASelfAttention,
         params={"attn_mask_type": AttnMaskType.causal},
         submodules=MLASelfAttentionSubmodules(
             linear_q_proj=backend.column_parallel_linear(),
             linear_q_down_proj=backend.linear(),
-            linear_q_up_proj=linear_q_up_proj,
+            linear_q_up_proj=backend.column_parallel_linear(),
             linear_kv_down_proj=backend.linear(),
-            linear_kv_up_proj=linear_kv_up_proj,
+            linear_kv_up_proj=backend.column_parallel_linear(),
             core_attention=core_attention,
             linear_proj=backend.row_parallel_linear(),
-            q_layernorm=IdentityOp,
-            kv_layernorm=IdentityOp,
+            q_layernorm=qk_norm,
+            kv_layernorm=qk_norm,
         ),
+        metainfo={"fuse_input_layernorm": False},
     )
 
     return attention
@@ -140,6 +138,8 @@ def get_experimental_attention_variant_module_spec(
 
     if config.experimental_attention_variant == "gated_delta_net":
         return get_gated_delta_net_module_spec(config=config, backend=backend)
+    elif config.experimental_attention_variant == "dsa":
+        return get_dsa_module_spec_for_backend(config=config, backend=backend)
     else:
         raise ValueError(
             f"Invalid experimental attention variant: {config.experimental_attention_variant}"
diff --git a/megatron/core/transformer/experimental_attention_variant/absorbed_mla.py b/megatron/core/transformer/experimental_attention_variant/absorbed_mla.py
index b56add7302e..242b88303f2 100644
--- a/megatron/core/transformer/experimental_attention_variant/absorbed_mla.py
+++ b/megatron/core/transformer/experimental_attention_variant/absorbed_mla.py
@@ -600,6 +600,7 @@ def qkv_up_proj_and_rope_apply(q_compressed, kv_compressed, k_pos_emb, rotary_po
                     cu_seqlens=cu_seqlens_q,
                     mscale=mscale,
                     cp_group=self.pg_collection.cp,
+                    mla_rotary_interleaved=True,
                 )
                 # k_pos_emb:[num_tokens, 1, qk_pos_emb_head_dim]
                 k_pos_emb = apply_rotary_pos_emb(
@@ -609,6 +610,7 @@ def qkv_up_proj_and_rope_apply(q_compressed, kv_compressed, k_pos_emb, rotary_po
                     cu_seqlens=cu_seqlens_kv,
                     mscale=mscale,
                     cp_group=self.pg_collection.cp,
+                    mla_rotary_interleaved=True,
                 )
 
                 # query: [num_tokens, n, (kv_lora_rank + qk_pos_emb_head_dim)]
diff --git a/megatron/core/transformer/experimental_attention_variant/dsa.py b/megatron/core/transformer/experimental_attention_variant/dsa.py
index 3734db7043f..5c5f77363dc 100644
--- a/megatron/core/transformer/experimental_attention_variant/dsa.py
+++ b/megatron/core/transformer/experimental_attention_variant/dsa.py
@@ -778,10 +778,12 @@ def __init__(
 
     def _apply_rope(self, x: torch.Tensor, rotary_pos_emb: torch.Tensor, mscale: float):
         """Apply RoPE to the input tensor."""
-        # x_nope [seqlen, batch, *, index_head_dim - qk_pos_emb_head_dim]
         # x_pe   [seqlen, batch, *, qk_pos_emb_head_dim]
-        x_nope, x_pe = torch.split(
-            x, [self.index_head_dim - self.qk_pos_emb_head_dim, self.qk_pos_emb_head_dim], dim=-1
+        # x_nope [seqlen, batch, *, index_head_dim - qk_pos_emb_head_dim]
+        # To align with DeepSeek's implementation,
+        # x_pe is placed at the front, and x_nope is placed at the back.
+        x_pe, x_nope = torch.split(
+            x, [self.qk_pos_emb_head_dim, self.index_head_dim - self.qk_pos_emb_head_dim], dim=-1
         )
         x_pe = apply_rotary_pos_emb(
             x_pe,
@@ -790,9 +792,12 @@ def _apply_rope(self, x: torch.Tensor, rotary_pos_emb: torch.Tensor, mscale: flo
             cu_seqlens=None,
             mscale=mscale,
             cp_group=self.pg_collection.cp,
+            # This flag is for the MLA-style interleaving in RoPE.
+            # Set it to False, as indexer does not apply interleaved RoPE.
+            mla_rotary_interleaved=False,
         )
         # [seqlen, batch, *, index_head_dim]
-        x = torch.cat([x_nope, x_pe], dim=-1)
+        x = torch.cat([x_pe, x_nope], dim=-1)
         return x
 
     def forward_before_topk(
diff --git a/megatron/core/transformer/multi_latent_attention.py b/megatron/core/transformer/multi_latent_attention.py
index a9cdc697cc8..11330262159 100644
--- a/megatron/core/transformer/multi_latent_attention.py
+++ b/megatron/core/transformer/multi_latent_attention.py
@@ -807,6 +807,7 @@ def qkv_up_proj_and_rope_apply(q_compressed, kv_compressed, k_pos_emb, rotary_po
                     cu_seqlens=cu_seqlens_q,
                     mscale=mscale,
                     cp_group=self.pg_collection.cp,
+                    mla_rotary_interleaved=True,
                 )
                 # k_pos_emb:[num_tokens, 1, qk_pos_emb_head_dim]
                 k_pos_emb = apply_rotary_pos_emb(
@@ -816,6 +817,7 @@ def qkv_up_proj_and_rope_apply(q_compressed, kv_compressed, k_pos_emb, rotary_po
                     cu_seqlens=cu_seqlens_kv,
                     mscale=mscale,
                     cp_group=self.pg_collection.cp,
+                    mla_rotary_interleaved=True,
                 )
 
                 # query: [num_tokens, n, (qk_head_dim + v_head_dim)]
diff --git a/pyproject.toml b/pyproject.toml
index 9b75fcf3596..dc4efdc1523 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -97,6 +97,7 @@ dev = [
     "flask[async]",
     "hypercorn",
     "openai",
+    "fast-hadamard-transform",
 ]
 
 lts = [
@@ -171,6 +172,7 @@ no-build-isolation-package = [
     "mamba-ssm",
     "transformer-engine",
     "transformer-engine-torch",
+    "fast-hadamard-transform",
 ]
 link-mode = "copy"
 conflicts = [[{ extra = "lts" }, { extra = "dev" }]]
@@ -191,6 +193,7 @@ flash_mla = [
 transformer-engine = { git = "https://github.com/NVIDIA/TransformerEngine.git", rev = "5671fd3675906cda1ade26c24a65d3dedd88eb89" }
 nemo-run = { git = "https://github.com/NVIDIA-NeMo/Run.git", rev = "01a9a8ba360f7b2908728ad0516e0ad9d936966d" }
 emerging_optimizers = { git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git", rev = "v0.1.0" }
+fast-hadamard-transform = { git = "https://github.com/Dao-AILab/fast-hadamard-transform.git", rev = "f134af63deb2df17e1171a9ec1ea4a7d8604d5ca" }
 
 [tool.isort]
 profile = "black"                                                          # black-compatible
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_dsa/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_dsa/model_config.yaml
new file mode 100644
index 00000000000..63a0933313c
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_dsa/model_config.yaml
@@ -0,0 +1,66 @@
+ENV_VARS:
+  CUDA_DEVICE_MAX_CONNECTIONS: 1
+  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
+  NCCL_ALGO: Ring
+  CUBLAS_WORKSPACE_CONFIG: :4096:8
+  ENABLE_LIGHTWEIGHT_MODE: true
+MODEL_ARGS:
+  --num-layers: 4
+  --hidden-size: 512
+  --num-attention-heads: 8
+  --multi-latent-attention: true
+  --q-lora-rank: 192
+  --kv-lora-rank: 64
+  --qk-head-dim: 16
+  --qk-pos-emb-head-dim: 8
+  --v-head-dim: 16
+  --experimental-attention-variant: dsa
+  --dsa-indexer-n-heads: 64
+  --dsa-indexer-head-dim: 128
+  --dsa-indexer-topk: 2048
+  --dsa-indexer-loss-coeff: 0.01
+  --attention-backend: fused
+  --log-params-norm: true
+  --log-num-zeros-in-grad: true
+  --log-validation-ppl-to-tensorboard: true
+  --log-timers-to-tensorboard: true
+  --tensorboard-dir: ${TENSORBOARD_PATH}
+  --micro-batch-size: 4
+  --global-batch-size: 32
+  --seq-length: 1024
+  --max-position-embeddings: 1024
+  --train-iters: 50
+  --timing-log-level: 0
+  --lr-decay-iters: 320000
+  --save: ${CHECKPOINT_SAVE_PATH}
+  --load: ${CHECKPOINT_LOAD_PATH}
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
+  --split: 949,50,1
+  --distributed-backend: nccl
+  --lr: 0.00015
+  --lr-decay-style: cosine
+  --min-lr: 1.0e-5
+  --weight-decay: 1e-2
+  --clip-grad: 1.0
+  --lr-warmup-fraction: .01
+  --log-interval: 1
+  --save-interval: 25
+  --eval-interval: 1000
+  --eval-iters: 10
+  --transformer-impl: transformer_engine
+  --tensor-model-parallel-size: 2
+  --pipeline-model-parallel-size: 2
+  --sequence-parallel: true
+  --untie-embeddings-and-output-weights: true
+  --deterministic-mode: true
+  --no-gradient-accumulation-fusion: true
+  --attention-softmax-in-fp32: true
+  --use-mcore-models: true
+  --ckpt-format: torch_dist
+  --data-cache-path: ${DATA_CACHE_PATH}
+  --bf16: true
+  --attention-backend: unfused
+  --log-memory-to-tensorboard: true
+TEST_TYPE: ckpt-resume
diff --git a/tests/test_utils/recipes/h100/gpt.yaml b/tests/test_utils/recipes/h100/gpt.yaml
index 9062a3f4471..45985f133dd 100644
--- a/tests/test_utils/recipes/h100/gpt.yaml
+++ b/tests/test_utils/recipes/h100/gpt.yaml
@@ -357,6 +357,11 @@ products:
       - environment: [dev]
         scope: [mr, mr-github]
         platforms: [dgx_h100]
+  - test_case: [gpt3_mcore_te_tp2_pp2_dsa]
+    products:
+      - environment: [dev]
+        scope: [mr, mr-github, mr-github-slim]
+        platforms: [dgx_h100]
   - test_case: [gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective]
     products:
       - environment: [dev]
diff --git a/tests/unit_tests/fusions/test_mla_yarn_rope_apply.py b/tests/unit_tests/fusions/test_mla_yarn_rope_apply.py
index 1c8976bfcb6..1a0c19d5222 100644
--- a/tests/unit_tests/fusions/test_mla_yarn_rope_apply.py
+++ b/tests/unit_tests/fusions/test_mla_yarn_rope_apply.py
@@ -1,12 +1,18 @@
 # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
 
+import warnings
+from unittest.mock import MagicMock, patch
+
 import pytest
 import torch
 
 from megatron.core.models.common.embeddings import apply_rotary_pos_emb
+from megatron.core.models.common.embeddings import rope_utils as rope_utils_module
 from megatron.core.models.common.embeddings.yarn_rotary_pos_embedding import YarnRotaryEmbedding
+from megatron.core.tensor_parallel.random import model_parallel_cuda_manual_seed
 from megatron.core.transformer.transformer_config import TransformerConfig
 from megatron.core.utils import is_torch_min_version
+from tests.unit_tests.test_utilities import Utils
 
 try:
     from megatron.core.fusions.fused_mla_yarn_rope_apply import (
@@ -91,7 +97,13 @@ def _test_fused_apply_mla_rope_for_q(input_format):
 
     no_pe, pe = torch.split(pytorch_fwd_input, [q_dim, emb_dim], dim=-1)
     pe_output = apply_rotary_pos_emb(
-        pe, freqs, transformer_config, cu_seqlens=cu_seqlens, mscale=mscale, cp_group=FakeCPGroup()
+        pe,
+        freqs,
+        transformer_config,
+        cu_seqlens=cu_seqlens,
+        mscale=mscale,
+        cp_group=FakeCPGroup(),
+        mla_rotary_interleaved=True,
     )
     pytorch_output = torch.concat([no_pe, pe_output], dim=-1)
     pytorch_output.backward(pytorch_bwd_input, retain_graph=True)
@@ -190,6 +202,7 @@ def _test_fused_apply_mla_rope_for_kv(input_format):
         cu_seqlens=cu_seqlens,
         mscale=mscale,
         cp_group=FakeCPGroup(),
+        mla_rotary_interleaved=True,
     )
     if input_format == "sbhd":
         pe_output = pe_output.expand(-1, -1, num_heads, -1)
@@ -253,3 +266,59 @@ def test_forward_backward_for_q(self, input_format):
 
     def test_forward_backward_for_kv(self, input_format):
         _test_fused_apply_mla_rope_for_kv(input_format)
+
+
+class TestApplyRotaryPosEmbMlaFusionConflict:
+    """Test apply_rotary_pos_emb: mla_rotary_interleaved vs apply_rope_fusion conflict."""
+
+    def setup_method(self):
+        Utils.initialize_model_parallel(1, 1)
+        model_parallel_cuda_manual_seed(123)
+        self.seq_len = 16
+        self.num_heads = 2
+        self.kv_channels = 32
+        self.rot_dim = self.kv_channels
+
+    def teardown_method(self):
+        Utils.destroy_model_parallel()
+
+    @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
+    def test_mla_rotary_interleaved_with_apply_rope_fusion_emits_warning_and_uses_unfused(self):
+        """When apply_rope_fusion=True and mla_rotary_interleaved=True, expect warning and unfused path."""
+        config = TransformerConfig(
+            num_attention_heads=self.num_heads,
+            num_layers=1,
+            apply_rope_fusion=True,
+            rotary_interleaved=False,
+        )
+        t = torch.randn(
+            self.seq_len, 1, self.num_heads, self.kv_channels, device="cuda", dtype=torch.float32
+        )
+        freqs = torch.randn(self.seq_len, 1, 1, self.rot_dim, device="cuda", dtype=torch.float32)
+
+        fused_mock = MagicMock(return_value=t.clone())
+        with (
+            patch.object(rope_utils_module, "fused_apply_rotary_pos_emb", fused_mock),
+            patch.object(
+                rope_utils_module,
+                "_apply_rotary_pos_emb_bshd",
+                wraps=rope_utils_module._apply_rotary_pos_emb_bshd,
+            ) as unfused_spy,
+        ):
+            with warnings.catch_warnings(record=True) as w:
+                warnings.simplefilter("always")
+                out = apply_rotary_pos_emb(t, freqs, config, mla_rotary_interleaved=True)
+            # Should have warned about MLA + fusion conflict
+            mla_fusion_warnings = [
+                x for x in w if "apply_rope_fusion does not support MLA-style" in str(x.message)
+            ]
+            assert (
+                len(mla_fusion_warnings) >= 1
+            ), "Expected warning when mla_rotary_interleaved and apply_rope_fusion both enabled"
+            # Fused kernel must not be used
+            fused_mock.assert_not_called()
+            # Unfused path must have been used
+            unfused_spy.assert_called_once()
+            call_kw = unfused_spy.call_args[1]
+            assert call_kw["mla_rotary_interleaved"] is True
+        assert out.shape == t.shape
diff --git a/tests/unit_tests/models/test_experimental_attention_variant_module_specs.py b/tests/unit_tests/models/test_experimental_attention_variant_module_specs.py
new file mode 100644
index 00000000000..e3a589f1b97
--- /dev/null
+++ b/tests/unit_tests/models/test_experimental_attention_variant_module_specs.py
@@ -0,0 +1,660 @@
+# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.
+
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from megatron.core.transformer.enums import AttnMaskType, LayerType
+from megatron.core.transformer.identity_op import IdentityOp
+from megatron.core.transformer.spec_utils import ModuleSpec
+from megatron.core.transformer.transformer_block import TransformerBlockSubmodules
+from megatron.core.transformer.transformer_layer import (
+    HyperConnectionTransformerLayer,
+    TransformerLayer,
+)
+
+# ---------------------------------------------------------------------------
+# Helpers: fake backend and config builders
+# ---------------------------------------------------------------------------
+
+
+class _FakeLinear:
+    pass
+
+
+class _FakeColumnParallelLinear:
+    pass
+
+
+class _FakeRowParallelLinear:
+    pass
+
+
+class _FakeLayerNormColumnParallelLinear:
+    pass
+
+
+class _FakeLayerNorm:
+    pass
+
+
+class _FakeQKNorm:
+    pass
+
+
+class _FakeCoreAttention:
+    pass
+
+
+def _make_backend(fuse_layernorm=True):
+    """Return a mock BackendSpecProvider with deterministic return values."""
+    backend = MagicMock()
+    backend.linear.return_value = _FakeLinear
+    backend.column_parallel_linear.return_value = _FakeColumnParallelLinear
+    backend.row_parallel_linear.return_value = _FakeRowParallelLinear
+    backend.column_parallel_layer_norm_linear.return_value = _FakeLayerNormColumnParallelLinear
+    backend.fuse_layernorm_and_linear.return_value = fuse_layernorm
+    backend.core_attention.return_value = _FakeCoreAttention
+
+    def _layer_norm(rms_norm=False, for_qk=False):
+        return _FakeQKNorm if for_qk else _FakeLayerNorm
+
+    backend.layer_norm.side_effect = _layer_norm
+    return backend
+
+
+def _make_config(**overrides):
+    """Return a mock TransformerConfig with sane defaults."""
+    defaults = dict(
+        num_layers=4,
+        normalization="RMSNorm",
+        qk_layernorm=False,
+        multi_latent_attention=False,
+        qk_l2_norm=False,
+        transformer_impl="transformer_engine",
+        use_kitchen=False,
+        experimental_attention_variant=None,
+        linear_attention_freq=None,
+        moe_layer_freq=1,
+        num_moe_experts=None,
+        moe_grouped_gemm=False,
+        moe_use_legacy_grouped_gemm=False,
+        use_te_activation_func=False,
+        pipeline_model_parallel_size=1,
+        pipeline_model_parallel_layout=None,
+        use_kitchen_attention=False,
+        kitchen_attention_backend="sdpa",
+        fallback_to_eager_attn=False,
+        enable_hyper_connections=False,
+    )
+    defaults.update(overrides)
+    cfg = MagicMock()
+    for k, v in defaults.items():
+        setattr(cfg, k, v)
+    return cfg
+
+
+# ===================================================================
+# Tests for is_linear_attention_variant
+# ===================================================================
+
+
+class TestIsLinearAttentionVariant:
+    @staticmethod
+    def _fn(variant):
+        from megatron.core.models.gpt.experimental_attention_variant_module_specs import (
+            is_linear_attention_variant,
+        )
+
+        return is_linear_attention_variant(variant)
+
+    @pytest.mark.parametrize(
+        "variant, expected",
+        [("gated_delta_net", True), ("dsa", False), (None, False), ("some_unknown_variant", False)],
+    )
+    def test_variants(self, variant, expected):
+        """Validate linear-attention variant classification across supported and unsupported names."""
+        assert self._fn(variant) is expected
+
+
+# ===================================================================
+# Tests for get_moe_layer_pattern
+# ===================================================================
+
+
+class TestGetMoeLayerPattern:
+    @staticmethod
+    def _fn(config):
+        from megatron.core.models.gpt.experimental_attention_variant_module_specs import (
+            get_moe_layer_pattern,
+        )
+
+        return get_moe_layer_pattern(config)
+
+    @pytest.mark.parametrize(
+        "num_layers, freq, expected",
+        [(4, 1, [1, 1, 1, 1]), (6, 2, [1, 0, 1, 0, 1, 0]), (6, 3, [1, 0, 0, 1, 0, 0])],
+    )
+    def test_int_freq(self, num_layers, freq, expected):
+        """Verify integer moe_layer_freq is expanded into the expected per-layer MoE pattern."""
+        cfg = _make_config(num_layers=num_layers, moe_layer_freq=freq)
+        assert self._fn(cfg) == expected
+
+    def test_list_freq(self):
+        """Verify an explicit list pattern is used as-is."""
+        pattern = [1, 0, 1, 0]
+        cfg = _make_config(num_layers=4, moe_layer_freq=pattern)
+        assert self._fn(cfg) == pattern
+
+    def test_list_freq_wrong_length_raises(self):
+        """Verify a list with mismatched length fails fast."""
+        cfg = _make_config(num_layers=4, moe_layer_freq=[1, 0])
+        with pytest.raises(AssertionError, match="Invalid length"):
+            self._fn(cfg)
+
+    def test_invalid_type_raises(self):
+        """Verify unsupported moe_layer_freq types raise ValueError."""
+        cfg = _make_config(num_layers=4, moe_layer_freq="bad")
+        with pytest.raises(ValueError, match="Invalid moe_layer_freq"):
+            self._fn(cfg)
+
+
+# ===================================================================
+# Tests for get_linear_attention_pattern
+# ===================================================================
+
+
+class TestGetLinearAttentionPattern:
+    @staticmethod
+    def _fn(config):
+        from megatron.core.models.gpt.experimental_attention_variant_module_specs import (
+            get_linear_attention_pattern,
+        )
+
+        return get_linear_attention_pattern(config)
+
+    @pytest.mark.parametrize(
+        "num_layers, freq, expected",
+        [
+            # Every 4th layer (1-indexed) is SDPA (0), the rest are LA (1)
+            (8, 4, [1, 1, 1, 0, 1, 1, 1, 0]),
+            (4, 2, [1, 0, 1, 0]),
+            (3, 1, [0, 0, 0]),
+        ],
+    )
+    def test_int_freq(self, num_layers, freq, expected):
+        """Verify integer linear_attention_freq is expanded into the expected LA/SDPA pattern."""
+        cfg = _make_config(num_layers=num_layers, linear_attention_freq=freq)
+        assert self._fn(cfg) == expected
+
+    def test_list_freq(self):
+        """Verify an explicit linear-attention pattern list is used directly."""
+        pattern = [1, 0, 1, 0]
+        cfg = _make_config(num_layers=4, linear_attention_freq=pattern)
+        assert self._fn(cfg) == pattern
+
+    def test_list_freq_wrong_length_raises(self):
+        """Verify list length validation for linear_attention_freq."""
+        cfg = _make_config(num_layers=4, linear_attention_freq=[1, 0, 1])
+        with pytest.raises(AssertionError, match="Invalid length"):
+            self._fn(cfg)
+
+    def test_none_for_non_linear_variant(self):
+        """Verify non-linear variants default to all-standard attention when freq is None."""
+        cfg = _make_config(
+            num_layers=4, linear_attention_freq=None, experimental_attention_variant="dsa"
+        )
+        assert self._fn(cfg) == [0, 0, 0, 0]
+
+    def test_none_for_linear_variant_raises(self):
+        """Verify linear variants require linear_attention_freq to be explicitly set."""
+        cfg = _make_config(
+            num_layers=4,
+            linear_attention_freq=None,
+            experimental_attention_variant="gated_delta_net",
+        )
+        with pytest.raises(ValueError, match="linear_attention_freq is None"):
+            self._fn(cfg)
+
+    def test_invalid_type_raises(self):
+        """Verify unsupported linear_attention_freq types raise ValueError."""
+        cfg = _make_config(num_layers=4, linear_attention_freq=3.14)
+        with pytest.raises(ValueError, match="Invalid linear_attention_freq"):
+            self._fn(cfg)
+
+
+# ===================================================================
+# Tests for get_gated_delta_net_module_spec
+# ===================================================================
+
+
+class TestGetGatedDeltaNetModuleSpec:
+    def test_returns_correct_module_spec(self):
+        """Verify the top-level module spec targets GatedDeltaNet with expected metainfo."""
+        from megatron.core.models.gpt.experimental_attention_variant_module_specs import (
+            get_gated_delta_net_module_spec,
+        )
+        from megatron.core.ssm.gated_delta_net import GatedDeltaNet
+
+        backend = _make_backend()
+        cfg = _make_config(normalization="RMSNorm")
+        spec = get_gated_delta_net_module_spec(cfg, backend=backend)
+
+        assert isinstance(spec, ModuleSpec)
+        assert spec.module is GatedDeltaNet
+        assert spec.metainfo == {"fuse_input_layernorm": True}
+
+    def test_submodules_use_backend_modules(self):
+        """Verify backend-provided projection/norm modules are wired into submodules."""
+        from megatron.core.models.gpt.experimental_attention_variant_module_specs import (
+            get_gated_delta_net_module_spec,
+        )
+
+        backend = _make_backend()
+        cfg = _make_config(normalization="RMSNorm")
+        spec = get_gated_delta_net_module_spec(cfg, backend=backend)
+
+        subs = spec.submodules
+        assert subs.in_proj == _FakeLayerNormColumnParallelLinear
+        assert subs.out_proj == _FakeRowParallelLinear
+        backend.layer_norm.assert_any_call(rms_norm=True, for_qk=False)
+
+    def test_layer_norm_normalization(self):
+        """Verify LayerNorm mode passes rms_norm=False to backend.layer_norm."""
+        from megatron.core.models.gpt.experimental_attention_variant_module_specs import (
+            get_gated_delta_net_module_spec,
+        )
+
+        backend = _make_backend()
+        cfg = _make_config(normalization="LayerNorm")
+        get_gated_delta_net_module_spec(cfg, backend=backend)
+        backend.layer_norm.assert_any_call(rms_norm=False, for_qk=False)
+
+    def test_backend_auto_resolved_when_none(self):
+        """Verify backend is auto-resolved when caller does not pass one."""
+        from megatron.core.models.gpt.experimental_attention_variant_module_specs import (
+            get_gated_delta_net_module_spec,
+        )
+
+        cfg = _make_config(normalization="RMSNorm")
+        with patch(
+            "megatron.core.models.gpt.experimental_attention_variant_module_specs"
+            "._get_backend_spec_provider",
+            return_value=_make_backend(),
+        ):
+            spec = get_gated_delta_net_module_spec(cfg, backend=None)
+            assert isinstance(spec, ModuleSpec)
+
+
+# ===================================================================
+# Tests for get_dsa_module_spec_for_backend
+# ===================================================================
+
+
+class TestGetDsaModuleSpec:
+    def _call(self, cfg=None, backend=None):
+        from megatron.core.models.gpt.experimental_attention_variant_module_specs import (
+            get_dsa_module_spec_for_backend,
+        )
+
+        if cfg is None:
+            cfg = _make_config(multi_latent_attention=True, qk_l2_norm=False, qk_layernorm=True)
+        if backend is None:
+            backend = _make_backend()
+        return get_dsa_module_spec_for_backend(cfg, backend=backend)
+
+    def test_requires_multi_latent_attention(self):
+        """Verify DSA path rejects configs without MLA enabled."""
+        from megatron.core.models.gpt.experimental_attention_variant_module_specs import (
+            get_dsa_module_spec_for_backend,
+        )
+
+        cfg = _make_config(multi_latent_attention=False, qk_l2_norm=False)
+        with pytest.raises(AssertionError, match="only MLA supports"):
+            get_dsa_module_spec_for_backend(cfg, backend=_make_backend())
+
+    def test_rejects_qk_l2_norm(self):
+        """Verify unsupported qk_l2_norm setting is rejected for DSA+MLA."""
+        from megatron.core.models.gpt.experimental_attention_variant_module_specs import (
+            get_dsa_module_spec_for_backend,
+        )
+
+        cfg = _make_config(multi_latent_attention=True, qk_l2_norm=True)
+        with pytest.raises(AssertionError, match="qk_l2_norm is not supported"):
+            get_dsa_module_spec_for_backend(cfg, backend=_make_backend())
+
+    def test_returns_mla_self_attention_spec(self):
+        """Verify the returned attention module is MLA self-attention with causal mask."""
+        from megatron.core.transformer.multi_latent_attention import MLASelfAttention
+
+        spec = self._call()
+        assert spec.module is MLASelfAttention
+        assert spec.params == {"attn_mask_type": AttnMaskType.causal}
+        assert spec.metainfo == {"fuse_input_layernorm": False}
+
+    def test_core_attention_is_dsa(self):
+        """Verify MLA core_attention is wrapped with DSAttention."""
+        from megatron.core.transformer.experimental_attention_variant.dsa import DSAttention
+
+        spec = self._call()
+        core = spec.submodules.core_attention
+        assert core.module is DSAttention
+
+    def test_dsa_indexer_structure(self):
+        """Verify DSA indexer wiring uses expected backend linear/norm modules."""
+        from megatron.core.transformer.experimental_attention_variant.dsa import DSAIndexer
+
+        spec = self._call()
+        indexer = spec.submodules.core_attention.submodules.indexer
+        assert indexer.module is DSAIndexer
+        subs = indexer.submodules
+        assert subs.linear_wq_b == _FakeLinear
+        assert subs.linear_wk == _FakeLinear
+        assert subs.k_norm == _FakeQKNorm
+        assert subs.linear_weights_proj == _FakeLinear
+
+    @pytest.mark.parametrize("normalization", ["RMSNorm", "LayerNorm"])
+    def test_qk_layernorm_enabled(self, normalization):
+        """Verify q/kv layernorm uses backend.layer_norm(rms_norm=..., for_qk=True)."""
+        backend = _make_backend()
+        cfg = _make_config(
+            multi_latent_attention=True,
+            qk_l2_norm=False,
+            qk_layernorm=True,
+            normalization=normalization,
+        )
+        spec = self._call(cfg=cfg, backend=backend)
+        expected_rms = normalization == "RMSNorm"
+        assert spec.submodules.q_layernorm == _FakeQKNorm
+        assert spec.submodules.kv_layernorm == _FakeQKNorm
+        # Both point to the same qk_norm object
+        assert spec.submodules.q_layernorm is spec.submodules.kv_layernorm
+        backend.layer_norm.assert_any_call(rms_norm=expected_rms, for_qk=True)
+
+    def test_qk_layernorm_disabled(self):
+        """Verify q/kv layernorm becomes IdentityOp, skipping backend.layer_norm for qk."""
+        backend = _make_backend()
+        cfg = _make_config(multi_latent_attention=True, qk_l2_norm=False, qk_layernorm=False)
+        spec = self._call(cfg=cfg, backend=backend)
+        assert spec.submodules.q_layernorm is IdentityOp
+        assert spec.submodules.kv_layernorm is IdentityOp
+        # backend.layer_norm is still called for the indexer k_norm (for_qk=True at line 94),
+        # but NOT for the outer qk_norm (line 105-107 takes the else branch).
+        # Exactly one for_qk=True call should exist (from the indexer, not from qk_norm).
+        qk_calls = [c for c in backend.layer_norm.call_args_list if c.kwargs.get("for_qk")]
+        assert (
+            len(qk_calls) == 1
+        ), f"Expected 1 for_qk=True call (indexer only), got {len(qk_calls)}"
+
+    def test_linear_projections(self):
+        """Verify Q/KV projection slots and backend.column_parallel_linear call count."""
+        backend = _make_backend()
+        cfg = _make_config(multi_latent_attention=True, qk_l2_norm=False, qk_layernorm=True)
+        spec = self._call(cfg=cfg, backend=backend)
+        subs = spec.submodules
+        assert subs.linear_q_proj == _FakeColumnParallelLinear
+        assert subs.linear_q_down_proj == _FakeLinear
+        assert subs.linear_q_up_proj == _FakeColumnParallelLinear
+        assert subs.linear_kv_down_proj == _FakeLinear
+        assert subs.linear_kv_up_proj == _FakeColumnParallelLinear
+        assert subs.linear_proj == _FakeRowParallelLinear
+        # column_parallel_linear() is called exactly 3 times (q_proj, q_up_proj, kv_up_proj)
+        assert backend.column_parallel_linear.call_count == 3
+        assert backend.row_parallel_linear.call_count == 1
+
+
+# ===================================================================
+# Tests for get_experimental_attention_variant_module_spec
+# ===================================================================
+
+
+class TestGetExperimentalAttentionVariantModuleSpec:
+    MODULE = "megatron.core.models.gpt.experimental_attention_variant_module_specs"
+
+    @pytest.mark.parametrize(
+        "variant, target_fn",
+        [
+            ("gated_delta_net", "get_gated_delta_net_module_spec"),
+            ("dsa", "get_dsa_module_spec_for_backend"),
+        ],
+    )
+    def test_dispatches_to_variant_handler(self, variant, target_fn):
+        """Verify dispatcher routes each variant name to its corresponding builder function."""
+        backend = _make_backend()
+        cfg = _make_config(experimental_attention_variant=variant, normalization="RMSNorm")
+        with patch(f"{self.MODULE}.{target_fn}") as mock_fn:
+            mock_fn.return_value = ModuleSpec(module=MagicMock)
+            from megatron.core.models.gpt.experimental_attention_variant_module_specs import (
+                get_experimental_attention_variant_module_spec,
+            )
+
+            result = get_experimental_attention_variant_module_spec(cfg, backend=backend)
+            mock_fn.assert_called_once_with(config=cfg, backend=backend)
+            assert result is mock_fn.return_value
+
+    def test_invalid_variant_raises(self):
+        """Verify unknown variant names raise a clear ValueError."""
+        cfg = _make_config(experimental_attention_variant="unknown")
+        with pytest.raises(ValueError, match="Invalid experimental attention variant"):
+            from megatron.core.models.gpt.experimental_attention_variant_module_specs import (
+                get_experimental_attention_variant_module_spec,
+            )
+
+            get_experimental_attention_variant_module_spec(cfg, backend=_make_backend())
+
+
+# ===================================================================
+# Tests for get_transformer_layer_with_experimental_attention_variant_spec
+# ===================================================================
+
+
+class TestGetTransformerLayerWithExperimentalAttentionVariantSpec:
+    MODULE = "megatron.core.models.gpt.experimental_attention_variant_module_specs"
+
+    def _make_attention_spec(self, fuse_input_layernorm=True):
+        """Construct a mock attention spec with configurable fuse metadata."""
+        return ModuleSpec(module=MagicMock, metainfo={"fuse_input_layernorm": fuse_input_layernorm})
+
+    def _make_mlp_spec(self, fuse_pre_mlp_layernorm=True):
+        """Construct a mock MLP spec with configurable fuse metadata."""
+        return ModuleSpec(
+            module=MagicMock, metainfo={"fuse_pre_mlp_layernorm": fuse_pre_mlp_layernorm}
+        )
+
+    def test_all_experimental_no_moe(self):
+        """Verify all layers use experimental attention and dense MLP when no MoE is configured."""
+        from megatron.core.models.gpt.experimental_attention_variant_module_specs import (
+            get_transformer_layer_with_experimental_attention_variant_spec,
+        )
+
+        cfg = _make_config(
+            num_layers=4,
+            experimental_attention_variant="dsa",
+            num_moe_experts=None,
+            normalization="RMSNorm",
+        )
+        backend = _make_backend()
+        attn_spec = self._make_attention_spec(fuse_input_layernorm=False)
+        mlp_spec = self._make_mlp_spec(fuse_pre_mlp_layernorm=True)
+
+        with (
+            patch(
+                f"{self.MODULE}.get_experimental_attention_variant_module_spec",
+                return_value=attn_spec,
+            ),
+            patch(f"{self.MODULE}._get_dense_mlp_module_spec", return_value=mlp_spec),
+        ):
+            specs = get_transformer_layer_with_experimental_attention_variant_spec(
+                cfg, backend=backend
+            )
+
+        assert len(specs) == 4
+        for s in specs:
+            # Each layer should share the same selected module specs in this setup.
+            assert s.module is TransformerLayer
+            assert s.submodules.self_attention is attn_spec
+            assert s.submodules.mlp is mlp_spec
+
+    def test_hybrid_attention_pattern(self):
+        """Verify attention alternates between experimental and standard specs per pattern."""
+        from megatron.core.models.gpt.experimental_attention_variant_module_specs import (
+            get_transformer_layer_with_experimental_attention_variant_spec,
+        )
+
+        cfg = _make_config(
+            num_layers=4,
+            experimental_attention_variant="gated_delta_net",
+            linear_attention_freq=2,
+            num_moe_experts=None,
+            normalization="RMSNorm",
+        )
+        backend = _make_backend()
+        exp_attn_spec = self._make_attention_spec(fuse_input_layernorm=True)
+        std_attn_spec = self._make_attention_spec(fuse_input_layernorm=False)
+        mlp_spec = self._make_mlp_spec(fuse_pre_mlp_layernorm=True)
+
+        with (
+            patch(
+                f"{self.MODULE}.get_experimental_attention_variant_module_spec",
+                return_value=exp_attn_spec,
+            ),
+            patch(f"{self.MODULE}._get_self_attention_module_spec", return_value=std_attn_spec),
+            patch(f"{self.MODULE}._get_dense_mlp_module_spec", return_value=mlp_spec),
+        ):
+            specs = get_transformer_layer_with_experimental_attention_variant_spec(
+                cfg, backend=backend
+            )
+
+        assert len(specs) == 4
+        # Pattern for linear_attention_freq=2: [1, 0, 1, 0]
+        assert specs[0].submodules.self_attention is exp_attn_spec
+        assert specs[1].submodules.self_attention is std_attn_spec
+        assert specs[2].submodules.self_attention is exp_attn_spec
+        assert specs[3].submodules.self_attention is std_attn_spec
+
+    def test_hybrid_moe_pattern_with_mhc(self):
+        """Verify MLP alternates between MoE and dense specs per moe_layer_freq pattern."""
+        from megatron.core.models.gpt.experimental_attention_variant_module_specs import (
+            get_transformer_layer_with_experimental_attention_variant_spec,
+        )
+
+        cfg = _make_config(
+            num_layers=4,
+            experimental_attention_variant="dsa",
+            num_moe_experts=8,
+            moe_layer_freq=2,
+            normalization="RMSNorm",
+            enable_hyper_connections=True,
+        )
+        backend = _make_backend()
+        attn_spec = self._make_attention_spec(fuse_input_layernorm=False)
+        moe_spec = self._make_mlp_spec(fuse_pre_mlp_layernorm=False)
+        dense_spec = self._make_mlp_spec(fuse_pre_mlp_layernorm=True)
+
+        with (
+            patch(
+                f"{self.MODULE}.get_experimental_attention_variant_module_spec",
+                return_value=attn_spec,
+            ),
+            patch(f"{self.MODULE}._get_moe_module_spec", return_value=moe_spec),
+            patch(f"{self.MODULE}._get_dense_mlp_module_spec", return_value=dense_spec),
+        ):
+            specs = get_transformer_layer_with_experimental_attention_variant_spec(
+                cfg, backend=backend
+            )
+
+        # moe_layer_freq=2 -> [1, 0, 1, 0]
+        assert specs[0].submodules.mlp is moe_spec
+        assert specs[1].submodules.mlp is dense_spec
+        assert specs[2].submodules.mlp is moe_spec
+        assert specs[3].submodules.mlp is dense_spec
+        for s in specs:
+            assert s.module is HyperConnectionTransformerLayer
+
+
+# ===================================================================
+# Tests for get_transformer_block_with_experimental_attention_variant_spec
+# ===================================================================
+
+
+class TestGetTransformerBlockWithExperimentalAttentionVariantSpec:
+    MODULE = "megatron.core.models.gpt.experimental_attention_variant_module_specs"
+
+    @pytest.mark.parametrize(
+        "num_layers,pp_size,vp_stage,pp_rank,use_layout,offset,num_layers_to_build,layout_ids,expected_ids",
+        [
+            # no pipeline split
+            (4, 1, None, None, False, 0, 4, None, [0, 1, 2, 3]),
+            # pp split (rank 1 gets [4,5,6,7])
+            (8, 2, None, 1, False, 4, 4, None, [4, 5, 6, 7]),
+            # vpp + pp split (example stage)
+            (8, 2, 1, 0, False, 2, 2, None, [2, 3]),
+            # explicit pipeline layout wins over offset/num_layers
+            (8, 2, 0, 0, True, None, None, [0, 3, 5], [0, 3, 5]),
+        ],
+    )
+    def test_get_transformer_block_with_experimental_attention_variant_spec(
+        self,
+        num_layers,
+        pp_size,
+        vp_stage,
+        pp_rank,
+        use_layout,
+        offset,
+        num_layers_to_build,
+        layout_ids,
+        expected_ids,
+    ):
+        """Verify transformer block layer slicing and vp/pp argument forwarding."""
+        from megatron.core.models.gpt.experimental_attention_variant_module_specs import (
+            get_transformer_block_with_experimental_attention_variant_spec,
+        )
+
+        mock_layout = MagicMock() if use_layout else None
+        if mock_layout is not None:
+            # When layout is provided, it should fully control local layer selection.
+            mock_layout.get_layer_id_list.return_value = layout_ids
+
+        cfg = _make_config(
+            num_layers=num_layers,
+            pipeline_model_parallel_size=pp_size,
+            pipeline_model_parallel_layout=mock_layout,
+            normalization="RMSNorm",
+        )
+        backend = _make_backend()
+        fake_layer_specs = [
+            ModuleSpec(module=TransformerLayer, submodules=MagicMock()) for _ in range(num_layers)
+        ]
+
+        with (
+            patch(f"{self.MODULE}._get_backend_spec_provider", return_value=backend),
+            patch(
+                f"{self.MODULE}.get_transformer_layer_with_experimental_attention_variant_spec",
+                return_value=fake_layer_specs,
+            ),
+        ):
+            if use_layout:
+                result = get_transformer_block_with_experimental_attention_variant_spec(
+                    cfg, vp_stage=vp_stage, pp_rank=pp_rank
+                )
+                mock_layout.get_layer_id_list.assert_called_once_with(
+                    layer_type=LayerType.decoder, vp_stage=vp_stage, pp_rank=pp_rank
+                )
+            else:
+                # Without explicit layout, slicing comes from offset + num_layers_to_build.
+                with (
+                    patch(
+                        f"{self.MODULE}.get_transformer_layer_offset", return_value=offset
+                    ) as mock_offset,
+                    patch(
+                        f"{self.MODULE}.get_num_layers_to_build", return_value=num_layers_to_build
+                    ) as mock_num_layers,
+                ):
+                    result = get_transformer_block_with_experimental_attention_variant_spec(
+                        cfg, vp_stage=vp_stage, pp_rank=pp_rank
+                    )
+                mock_offset.assert_called_once_with(cfg, vp_stage=vp_stage, pp_rank=pp_rank)
+                mock_num_layers.assert_called_once_with(cfg, vp_stage=vp_stage, pp_rank=pp_rank)
+
+        assert isinstance(result, TransformerBlockSubmodules)
+        assert result.layer_specs == [fake_layer_specs[i] for i in expected_ids]
diff --git a/uv.lock b/uv.lock
index 1009b804630..f7147b8754d 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1,5 +1,5 @@
 version = 1
-revision = 2
+revision = 3
 requires-python = ">=3.10"
 resolution-markers = [
     "python_full_version >= '3.14' and sys_platform == 'linux'",
@@ -77,61 +77,20 @@ wheels = [
 name = "aiobotocore"
 version = "2.26.0"
 source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version == '3.13.*' and sys_platform == 'linux'",
-    "python_full_version == '3.12.*' and sys_platform == 'linux'",
-    "python_full_version >= '3.14' and sys_platform == 'win32'",
-    "python_full_version >= '3.14' and sys_platform == 'emscripten'",
-    "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
-    "python_full_version == '3.13.*' and sys_platform == 'win32'",
-    "python_full_version == '3.12.*' and sys_platform == 'win32'",
-    "python_full_version == '3.13.*' and sys_platform == 'emscripten'",
-    "python_full_version == '3.12.*' and sys_platform == 'emscripten'",
-    "python_full_version == '3.13.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
-    "python_full_version == '3.12.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
-    "python_full_version == '3.11.*' and sys_platform == 'linux'",
-    "python_full_version == '3.11.*' and sys_platform == 'win32'",
-    "python_full_version == '3.11.*' and sys_platform == 'emscripten'",
-    "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
-    "python_full_version < '3.11' and sys_platform == 'linux'",
-    "python_full_version < '3.11' and sys_platform != 'linux'",
-    "python_full_version >= '3.14' and sys_platform == 'linux'",
-]
 dependencies = [
-    { name = "aiohttp", marker = "python_full_version < '3.14' or sys_platform != 'linux'" },
-    { name = "aioitertools", marker = "python_full_version < '3.14' or sys_platform != 'linux'" },
-    { name = "botocore", version = "1.41.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.14' or sys_platform != 'linux'" },
-    { name = "jmespath", marker = "python_full_version < '3.14' or sys_platform != 'linux'" },
-    { name = "multidict", marker = "python_full_version < '3.14' or sys_platform != 'linux'" },
-    { name = "python-dateutil", marker = "python_full_version < '3.14' or sys_platform != 'linux'" },
-    { name = "wrapt", version = "1.17.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.14' or sys_platform != 'linux'" },
+    { name = "aiohttp" },
+    { name = "aioitertools" },
+    { name = "botocore" },
+    { name = "jmespath" },
+    { name = "multidict" },
+    { name = "python-dateutil" },
+    { name = "wrapt", version = "1.17.3", source = { registry = "https://pypi.org/simple" } },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/4d/f8/99fa90d9c25b78292899fd4946fce97b6353838b5ecc139ad8ba1436e70c/aiobotocore-2.26.0.tar.gz", hash = "sha256:50567feaf8dfe2b653570b4491f5bc8c6e7fb9622479d66442462c021db4fadc", size = 122026, upload-time = "2025-11-28T07:54:59.956Z" }
 wheels = [
     { url = "https://files.pythonhosted.org/packages/b7/58/3bf0b7d474607dc7fd67dd1365c4e0f392c8177eaf4054e5ddee3ebd53b5/aiobotocore-2.26.0-py3-none-any.whl", hash = "sha256:a793db51c07930513b74ea7a95bd79aaa42f545bdb0f011779646eafa216abec", size = 87333, upload-time = "2025-11-28T07:54:58.457Z" },
 ]
 
-[[package]]
-name = "aiobotocore"
-version = "3.1.3"
-source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version >= '3.14' and sys_platform == 'linux'",
-]
-dependencies = [
-    { name = "aiohttp", marker = "(python_full_version >= '3.14' and sys_platform == 'linux' and extra == 'extra-13-megatron-core-lts') or (python_full_version < '3.14' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "aioitertools", marker = "(python_full_version >= '3.14' and sys_platform == 'linux' and extra == 'extra-13-megatron-core-lts') or (python_full_version < '3.14' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "botocore", version = "1.42.49", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.14' and sys_platform == 'linux' and extra == 'extra-13-megatron-core-lts') or (python_full_version < '3.14' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "jmespath", marker = "(python_full_version >= '3.14' and sys_platform == 'linux' and extra == 'extra-13-megatron-core-lts') or (python_full_version < '3.14' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "multidict", marker = "(python_full_version >= '3.14' and sys_platform == 'linux' and extra == 'extra-13-megatron-core-lts') or (python_full_version < '3.14' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "python-dateutil", marker = "(python_full_version >= '3.14' and sys_platform == 'linux' and extra == 'extra-13-megatron-core-lts') or (python_full_version < '3.14' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "wrapt", version = "2.1.1", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.14' and sys_platform == 'linux' and extra == 'extra-13-megatron-core-lts') or (python_full_version < '3.14' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/18/94/332629387f4a9fc691cac9c0cb078af877bfaba415b1a16411377f6ea310/aiobotocore-3.1.3.tar.gz", hash = "sha256:b1b6a95aa4c17410090f4adf16fd45e45a898140c83d4e9d554602f9310408c0", size = 122675, upload-time = "2026-02-14T12:11:01.745Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/14/29/a3e75834009121ebb695dc24f9fe804566b1bcc9b7d46f6fbe56fe972c6a/aiobotocore-3.1.3-py3-none-any.whl", hash = "sha256:3afc93bf14de304dbd4a2c90f36fb3ce6348b06a5a1ec7f87261be628d7876d9", size = 87717, upload-time = "2026-02-14T12:10:59.898Z" },
-]
-
 [[package]]
 name = "aiohappyeyeballs"
 version = "2.6.1"
@@ -148,7 +107,7 @@ source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "aiohappyeyeballs" },
     { name = "aiosignal" },
-    { name = "async-timeout", marker = "python_full_version < '3.11'" },
+    { name = "async-timeout", marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "attrs" },
     { name = "frozenlist" },
     { name = "multidict" },
@@ -288,7 +247,7 @@ version = "1.4.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "frozenlist" },
-    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
+    { name = "typing-extensions", marker = "python_full_version < '3.13' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/61/62/06741b579156360248d1ec624842ad0edf697050bbaf7c3e46394e106ad1/aiosignal-1.4.0.tar.gz", hash = "sha256:f47eecd9468083c2029cc99945502cb7708b082c232f9aca65da147157b251c7", size = 25007, upload-time = "2025-07-03T22:54:43.528Z" }
 wheels = [
@@ -342,10 +301,10 @@ name = "anyio"
 version = "4.9.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "exceptiongroup", marker = "python_full_version < '3.11'" },
+    { name = "exceptiongroup", marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "idna" },
     { name = "sniffio" },
-    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
+    { name = "typing-extensions", marker = "python_full_version < '3.13' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/95/7d/4c1bd541d4dffa1b52bd83fb8527089e097a106fc90b467a7313b105f840/anyio-4.9.0.tar.gz", hash = "sha256:673c0c244e15788651a4ff38710fea9675823028a6f08a5eda409e0c9840a028", size = 190949, upload-time = "2025-03-17T00:02:54.77Z" }
 wheels = [
@@ -728,53 +687,16 @@ wheels = [
 name = "botocore"
 version = "1.41.5"
 source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version == '3.13.*' and sys_platform == 'linux'",
-    "python_full_version == '3.12.*' and sys_platform == 'linux'",
-    "python_full_version >= '3.14' and sys_platform == 'win32'",
-    "python_full_version >= '3.14' and sys_platform == 'emscripten'",
-    "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
-    "python_full_version == '3.13.*' and sys_platform == 'win32'",
-    "python_full_version == '3.12.*' and sys_platform == 'win32'",
-    "python_full_version == '3.13.*' and sys_platform == 'emscripten'",
-    "python_full_version == '3.12.*' and sys_platform == 'emscripten'",
-    "python_full_version == '3.13.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
-    "python_full_version == '3.12.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
-    "python_full_version == '3.11.*' and sys_platform == 'linux'",
-    "python_full_version == '3.11.*' and sys_platform == 'win32'",
-    "python_full_version == '3.11.*' and sys_platform == 'emscripten'",
-    "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
-    "python_full_version < '3.11' and sys_platform == 'linux'",
-    "python_full_version < '3.11' and sys_platform != 'linux'",
-    "python_full_version >= '3.14' and sys_platform == 'linux'",
-]
 dependencies = [
-    { name = "jmespath", marker = "python_full_version < '3.14' or sys_platform != 'linux'" },
-    { name = "python-dateutil", marker = "python_full_version < '3.14' or sys_platform != 'linux'" },
-    { name = "urllib3", marker = "python_full_version < '3.14' or sys_platform != 'linux'" },
+    { name = "jmespath" },
+    { name = "python-dateutil" },
+    { name = "urllib3" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/90/22/7fe08c726a2e3b11a0aef8bf177e83891c9cb2dc1809d35c9ed91a9e60e6/botocore-1.41.5.tar.gz", hash = "sha256:0367622b811597d183bfcaab4a350f0d3ede712031ce792ef183cabdee80d3bf", size = 14668152, upload-time = "2025-11-26T20:27:38.026Z" }
 wheels = [
     { url = "https://files.pythonhosted.org/packages/4e/4e/21cd0b8f365449f1576f93de1ec8718ed18a7a3bc086dfbdeb79437bba7a/botocore-1.41.5-py3-none-any.whl", hash = "sha256:3fef7fcda30c82c27202d232cfdbd6782cb27f20f8e7e21b20606483e66ee73a", size = 14337008, upload-time = "2025-11-26T20:27:35.208Z" },
 ]
 
-[[package]]
-name = "botocore"
-version = "1.42.49"
-source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version >= '3.14' and sys_platform == 'linux'",
-]
-dependencies = [
-    { name = "jmespath", marker = "(python_full_version >= '3.14' and sys_platform == 'linux' and extra == 'extra-13-megatron-core-lts') or (python_full_version < '3.14' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "python-dateutil", marker = "(python_full_version >= '3.14' and sys_platform == 'linux' and extra == 'extra-13-megatron-core-lts') or (python_full_version < '3.14' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "urllib3", marker = "(python_full_version >= '3.14' and sys_platform == 'linux' and extra == 'extra-13-megatron-core-lts') or (python_full_version < '3.14' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/c5/95/c3a3765ab65073695161e7180d631428cb6e67c18d97e8897871dfe51fcc/botocore-1.42.49.tar.gz", hash = "sha256:333115a64a507697b0c450ade7e2d82bc8b4e21c0051542514532b455712bdcc", size = 14958380, upload-time = "2026-02-13T20:29:47.218Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/d6/cd/7e7ceeff26889d1fd923f069381e3b2b85ff6d46c6fd1409ed8f486cc06f/botocore-1.42.49-py3-none-any.whl", hash = "sha256:1c33544f72101eed4ccf903ebb667a803e14e25b2af4e0836e4b871da1c0af37", size = 14630510, upload-time = "2026-02-13T20:29:43.086Z" },
-]
-
 [[package]]
 name = "braceexpand"
 version = "0.1.7"
@@ -827,7 +749,7 @@ name = "cffi"
 version = "2.0.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "pycparser", marker = "implementation_name != 'PyPy'" },
+    { name = "pycparser", marker = "implementation_name != 'PyPy' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/eb/56/b1ba7935a17738ae8453301356628e8147c79dbb825bcbc73dc7401f9846/cffi-2.0.0.tar.gz", hash = "sha256:44d1b5909021139fe36001ae048dbdde8214afa20200eda0f64c068cac5d5529", size = 523588, upload-time = "2025-09-08T23:24:04.541Z" }
 wheels = [
@@ -998,7 +920,7 @@ name = "click"
 version = "8.3.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "colorama", marker = "sys_platform == 'win32'" },
+    { name = "colorama", marker = "sys_platform == 'win32' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/3d/fa/656b739db8587d7b5dfa22e22ed02566950fbfbcdc20311993483657a5c0/click-8.3.1.tar.gz", hash = "sha256:12ff4785d337a1bb490bb7e9c2b1ee5da3112e94a8622f26a6c77f5d2fc6842a", size = 295065, upload-time = "2025-11-15T20:45:42.706Z" }
 wheels = [
@@ -1288,74 +1210,27 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/ff/fa/d3c15189f7c52aaefbaea76fb012119b04b9013f4bf446cb4eb4c26c4e6b/cython-3.2.4-py3-none-any.whl", hash = "sha256:732fc93bc33ae4b14f6afaca663b916c2fdd5dcbfad7114e17fb2434eeaea45c", size = 1257078, upload-time = "2026-01-04T14:14:12.373Z" },
 ]
 
-[[package]]
-name = "datasets"
-version = "2.2.1"
-source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version >= '3.14' and sys_platform == 'linux'",
-]
-dependencies = [
-    { name = "aiohttp", marker = "python_full_version >= '3.14' and sys_platform == 'linux'" },
-    { name = "dill", version = "0.4.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.14' and sys_platform == 'linux'" },
-    { name = "fsspec", version = "2026.2.0", source = { registry = "https://pypi.org/simple" }, extra = ["http"], marker = "(python_full_version >= '3.14' and sys_platform == 'linux' and extra == 'extra-13-megatron-core-lts') or (python_full_version < '3.14' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "huggingface-hub", marker = "python_full_version >= '3.14' and sys_platform == 'linux'" },
-    { name = "multiprocess", version = "0.70.19", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.14' and sys_platform == 'linux'" },
-    { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.14' and sys_platform == 'linux'" },
-    { name = "packaging", marker = "python_full_version >= '3.14' and sys_platform == 'linux'" },
-    { name = "pandas", version = "3.0.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.14' and sys_platform == 'linux'" },
-    { name = "pyarrow", marker = "python_full_version >= '3.14' and sys_platform == 'linux'" },
-    { name = "requests", marker = "python_full_version >= '3.14' and sys_platform == 'linux'" },
-    { name = "responses", marker = "python_full_version >= '3.14' and sys_platform == 'linux'" },
-    { name = "tqdm", marker = "python_full_version >= '3.14' and sys_platform == 'linux'" },
-    { name = "xxhash", marker = "python_full_version >= '3.14' and sys_platform == 'linux'" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/31/64/1e6fb2a0eb6b0d55117233cf33279ba6d680c0f031ebae81281a47c92760/datasets-2.2.1.tar.gz", hash = "sha256:d362717c4394589b516c8f397ff20a6fe720454aed877ab61d06f3bc05df9544", size = 302132, upload-time = "2022-05-11T17:02:29.543Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/d7/2d/41e8aec8d4bad6f07adfcbc89cf743e0d31c876371d453b2936bcfa7fe34/datasets-2.2.1-py3-none-any.whl", hash = "sha256:1938f3e99599422de50b9b54fe802aca854ed130382dab0b3820c821f7ae6d5e", size = 342193, upload-time = "2022-05-11T17:02:27.047Z" },
-]
-
 [[package]]
 name = "datasets"
 version = "4.5.0"
 source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version == '3.13.*' and sys_platform == 'linux'",
-    "python_full_version == '3.12.*' and sys_platform == 'linux'",
-    "python_full_version >= '3.14' and sys_platform == 'win32'",
-    "python_full_version >= '3.14' and sys_platform == 'emscripten'",
-    "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
-    "python_full_version == '3.13.*' and sys_platform == 'win32'",
-    "python_full_version == '3.12.*' and sys_platform == 'win32'",
-    "python_full_version == '3.13.*' and sys_platform == 'emscripten'",
-    "python_full_version == '3.12.*' and sys_platform == 'emscripten'",
-    "python_full_version == '3.13.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
-    "python_full_version == '3.12.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
-    "python_full_version == '3.11.*' and sys_platform == 'linux'",
-    "python_full_version == '3.11.*' and sys_platform == 'win32'",
-    "python_full_version == '3.11.*' and sys_platform == 'emscripten'",
-    "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
-    "python_full_version < '3.11' and sys_platform == 'linux'",
-    "python_full_version < '3.11' and sys_platform != 'linux'",
-    "python_full_version >= '3.14' and sys_platform == 'linux'",
-]
 dependencies = [
-    { name = "dill", version = "0.4.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.14' or sys_platform != 'linux'" },
-    { name = "filelock", marker = "python_full_version < '3.14' or sys_platform != 'linux'" },
-    { name = "fsspec", version = "2025.10.0", source = { registry = "https://pypi.org/simple" }, extra = ["http"], marker = "(python_full_version < '3.14' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-lts') or extra == 'extra-13-megatron-core-dev'" },
-    { name = "httpx", marker = "python_full_version < '3.14' or sys_platform != 'linux'" },
-    { name = "huggingface-hub", marker = "python_full_version < '3.14' or sys_platform != 'linux'" },
-    { name = "multiprocess", version = "0.70.18", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.14' or sys_platform != 'linux'" },
+    { name = "dill", version = "0.4.0", source = { registry = "https://pypi.org/simple" } },
+    { name = "filelock" },
+    { name = "fsspec", version = "2025.10.0", source = { registry = "https://pypi.org/simple" }, extra = ["http"], marker = "extra == 'extra-13-megatron-core-dev' or extra == 'extra-13-megatron-core-lts'" },
+    { name = "httpx" },
+    { name = "huggingface-hub" },
+    { name = "multiprocess" },
     { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and python_full_version < '3.14' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and python_full_version < '3.14' and extra == 'extra-13-megatron-core-lts') or (python_full_version >= '3.11' and sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and sys_platform != 'linux' and extra == 'extra-13-megatron-core-lts') or (python_full_version >= '3.14' and sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "packaging", marker = "python_full_version < '3.14' or sys_platform != 'linux'" },
+    { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "packaging" },
     { name = "pandas", version = "2.3.3", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "pandas", version = "3.0.1", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and python_full_version < '3.14' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and python_full_version < '3.14' and extra == 'extra-13-megatron-core-lts') or (python_full_version >= '3.11' and sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and sys_platform != 'linux' and extra == 'extra-13-megatron-core-lts') or (python_full_version >= '3.14' and sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "pyarrow", marker = "python_full_version < '3.14' or sys_platform != 'linux'" },
-    { name = "pyyaml", marker = "python_full_version < '3.14' or sys_platform != 'linux'" },
-    { name = "requests", marker = "python_full_version < '3.14' or sys_platform != 'linux'" },
-    { name = "tqdm", marker = "python_full_version < '3.14' or sys_platform != 'linux'" },
-    { name = "xxhash", marker = "python_full_version < '3.14' or sys_platform != 'linux'" },
+    { name = "pandas", version = "3.0.1", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "pyarrow" },
+    { name = "pyyaml" },
+    { name = "requests" },
+    { name = "tqdm" },
+    { name = "xxhash" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/55/bf/bb927bde63d649296c83e883171ae77074717c1b80fe2868b328bd0dbcbb/datasets-4.5.0.tar.gz", hash = "sha256:00c698ce1c2452e646cc5fad47fef39d3fe78dd650a8a6eb205bb45eb63cd500", size = 588384, upload-time = "2026-01-14T18:27:54.297Z" }
 wheels = [
@@ -1385,8 +1260,8 @@ name = "deprecated"
 version = "1.3.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "wrapt", version = "1.17.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.14' or sys_platform != 'linux' or extra == 'extra-13-megatron-core-dev'" },
-    { name = "wrapt", version = "2.1.1", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.14' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "wrapt", version = "1.17.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.14' or sys_platform != 'linux' or extra == 'extra-13-megatron-core-dev' or extra == 'extra-13-megatron-core-lts'" },
+    { name = "wrapt", version = "2.1.1", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.14' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/49/85/12f0a49a7c4ffb70572b6c2ef13c90c88fd190debda93b23f026b25f9634/deprecated-1.3.1.tar.gz", hash = "sha256:b1b50e0ff0c1fddaa5708a2c6b0a6588bb09b892825ab2b214ac9ea9d92a5223", size = 2932523, upload-time = "2025-10-30T08:19:02.757Z" }
 wheels = [
@@ -1398,6 +1273,7 @@ name = "dill"
 version = "0.4.0"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
+    "python_full_version >= '3.14' and sys_platform == 'linux'",
     "python_full_version == '3.13.*' and sys_platform == 'linux'",
     "python_full_version == '3.12.*' and sys_platform == 'linux'",
     "python_full_version >= '3.14' and sys_platform == 'win32'",
@@ -1415,7 +1291,6 @@ resolution-markers = [
     "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
     "python_full_version < '3.11' and sys_platform == 'linux'",
     "python_full_version < '3.11' and sys_platform != 'linux'",
-    "python_full_version >= '3.14' and sys_platform == 'linux'",
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/12/80/630b4b88364e9a8c8c5797f4602d0f76ef820909ee32f0bacb9f90654042/dill-0.4.0.tar.gz", hash = "sha256:0633f1d2df477324f53a895b02c901fb961bdbf65a17122586ea7019292cbcf0", size = 186976, upload-time = "2025-04-16T00:41:48.867Z" }
 wheels = [
@@ -1539,7 +1414,7 @@ version = "0.1.0"
 source = { git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git?rev=v0.1.0#d5363b4a418128cd8111983b191c4b8869a9766b" }
 dependencies = [
     { name = "absl-py" },
-    { name = "torch", marker = "sys_platform == 'never'" },
+    { name = "torch", marker = "sys_platform == 'never' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "typing-extensions" },
 ]
 
@@ -1548,7 +1423,7 @@ name = "exceptiongroup"
 version = "1.3.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
+    { name = "typing-extensions", marker = "python_full_version < '3.13' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/50/79/66800aadf48771f6b62f7eb014e352e5d06856655206165d775e675a02c9/exceptiongroup-1.3.1.tar.gz", hash = "sha256:8b412432c6055b0b7d14c310000ae93352ed6754f70fa8f7c34141f91c4e3219", size = 30371, upload-time = "2025-11-21T23:01:54.787Z" }
 wheels = [
@@ -1570,6 +1445,16 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/d6/1f/e99e23ee01847147fa194e8d41cfcf2535a2dbfcb51414c541cadb15c5d7/fabric-3.2.2-py3-none-any.whl", hash = "sha256:91c47c0be68b14936c88b34da8a1f55e5710fd28397dac5d4ff2e21558113a6f", size = 59417, upload-time = "2023-08-31T01:42:03.917Z" },
 ]
 
+[[package]]
+name = "fast-hadamard-transform"
+version = "1.0.4.post1"
+source = { git = "https://github.com/Dao-AILab/fast-hadamard-transform.git?rev=f134af63deb2df17e1171a9ec1ea4a7d8604d5ca#f134af63deb2df17e1171a9ec1ea4a7d8604d5ca" }
+dependencies = [
+    { name = "ninja" },
+    { name = "packaging" },
+    { name = "torch", marker = "sys_platform == 'never'" },
+]
+
 [[package]]
 name = "fastapi"
 version = "0.133.0"
@@ -1852,6 +1737,7 @@ name = "fsspec"
 version = "2025.10.0"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
+    "python_full_version >= '3.14' and sys_platform == 'linux'",
     "python_full_version == '3.13.*' and sys_platform == 'linux'",
     "python_full_version == '3.12.*' and sys_platform == 'linux'",
     "python_full_version >= '3.14' and sys_platform == 'win32'",
@@ -1869,7 +1755,6 @@ resolution-markers = [
     "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
     "python_full_version < '3.11' and sys_platform == 'linux'",
     "python_full_version < '3.11' and sys_platform != 'linux'",
-    "python_full_version >= '3.14' and sys_platform == 'linux'",
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/24/7f/2747c0d332b9acfa75dc84447a066fdf812b5a6b8d30472b74d309bfe8cb/fsspec-2025.10.0.tar.gz", hash = "sha256:b6789427626f068f9a83ca4e8a3cc050850b6c0f71f99ddb4f542b8266a26a59", size = 309285, upload-time = "2025-10-30T14:58:44.036Z" }
 wheels = [
@@ -1878,7 +1763,7 @@ wheels = [
 
 [package.optional-dependencies]
 http = [
-    { name = "aiohttp", marker = "python_full_version < '3.14' or sys_platform != 'linux'" },
+    { name = "aiohttp" },
 ]
 
 [[package]]
@@ -1893,11 +1778,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/e6/ab/fb21f4c939bb440104cc2b396d3be1d9b7a9fd3c6c2a53d98c45b3d7c954/fsspec-2026.2.0-py3-none-any.whl", hash = "sha256:98de475b5cb3bd66bedd5c4679e87b4fdfe1a3bf4d707b151b3c07e58c9a2437", size = 202505, upload-time = "2026-02-05T21:50:51.819Z" },
 ]
 
-[package.optional-dependencies]
-http = [
-    { name = "aiohttp", marker = "python_full_version >= '3.14' and sys_platform == 'linux'" },
-]
-
 [[package]]
 name = "gitdb"
 version = "4.0.12"
@@ -2150,9 +2030,9 @@ version = "0.36.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "filelock" },
-    { name = "fsspec", version = "2025.10.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.14' or sys_platform != 'linux' or extra == 'extra-13-megatron-core-dev'" },
-    { name = "fsspec", version = "2026.2.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.14' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "hf-xet", marker = "platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'" },
+    { name = "fsspec", version = "2025.10.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.14' or sys_platform != 'linux' or extra == 'extra-13-megatron-core-dev' or extra == 'extra-13-megatron-core-lts'" },
+    { name = "fsspec", version = "2026.2.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.14' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "hf-xet", marker = "platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "packaging" },
     { name = "pyyaml" },
     { name = "requests" },
@@ -2587,7 +2467,7 @@ resolution-markers = [
     "python_full_version < '3.11' and sys_platform != 'linux'",
 ]
 dependencies = [
-    { name = "mdurl", marker = "python_full_version < '3.11'" },
+    { name = "mdurl", marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/38/71/3b932df36c1a044d397a1f92d1cf91ee0a503d91e470cbd670aa66b07ed0/markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb", size = 74596, upload-time = "2023-06-03T06:41:14.443Z" }
 wheels = [
@@ -2617,7 +2497,7 @@ resolution-markers = [
     "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
 ]
 dependencies = [
-    { name = "mdurl", marker = "python_full_version >= '3.11'" },
+    { name = "mdurl", marker = "python_full_version >= '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/5b/f5/4ec618ed16cc4f8fb3b701563655a69816155e79e24a17b651541804721d/markdown_it_py-4.0.0.tar.gz", hash = "sha256:cb0a2b4aa34f932c007117b194e945bd74e0ec24133ceb5bac59009cda1cb9f3", size = 73070, upload-time = "2025-08-11T12:57:52.854Z" }
 wheels = [
@@ -2754,9 +2634,10 @@ dependencies = [
 dev = [
     { name = "av" },
     { name = "causal-conv1d" },
-    { name = "datasets", version = "4.5.0", source = { registry = "https://pypi.org/simple" } },
+    { name = "datasets" },
     { name = "einops" },
     { name = "emerging-optimizers" },
+    { name = "fast-hadamard-transform" },
     { name = "fastapi" },
     { name = "flash-linear-attention" },
     { name = "flashinfer-python" },
@@ -2781,8 +2662,7 @@ dev = [
 lts = [
     { name = "av" },
     { name = "causal-conv1d" },
-    { name = "datasets", version = "2.2.1", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.14' and sys_platform == 'linux' and extra == 'extra-13-megatron-core-lts') or (python_full_version < '3.14' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "datasets", version = "4.5.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.14' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "datasets" },
     { name = "einops" },
     { name = "emerging-optimizers" },
     { name = "fastapi" },
@@ -2860,8 +2740,8 @@ test = [
     { name = "pytest-random-order" },
     { name = "pyyaml" },
     { name = "tensorboard" },
-    { name = "wrapt", version = "1.17.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.14' or sys_platform != 'linux' or extra == 'extra-13-megatron-core-dev'" },
-    { name = "wrapt", version = "2.1.1", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.14' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "wrapt", version = "1.17.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.14' or sys_platform != 'linux' or extra == 'extra-13-megatron-core-dev' or extra == 'extra-13-megatron-core-lts'" },
+    { name = "wrapt", version = "2.1.1", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.14' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
 
 [package.metadata]
@@ -2877,6 +2757,7 @@ requires-dist = [
     { name = "einops", marker = "extra == 'lts'", specifier = "~=0.8" },
     { name = "emerging-optimizers", marker = "extra == 'dev'", git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git?rev=v0.1.0" },
     { name = "emerging-optimizers", marker = "extra == 'lts'", git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git?rev=v0.1.0" },
+    { name = "fast-hadamard-transform", marker = "extra == 'dev'", git = "https://github.com/Dao-AILab/fast-hadamard-transform.git?rev=f134af63deb2df17e1171a9ec1ea4a7d8604d5ca" },
     { name = "fastapi", marker = "extra == 'dev'", specifier = "~=0.50" },
     { name = "fastapi", marker = "extra == 'lts'", specifier = "~=0.50" },
     { name = "flash-linear-attention", marker = "extra == 'dev'", specifier = "~=0.4.0" },
@@ -2981,8 +2862,7 @@ dependencies = [
     { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "pillow" },
     { name = "pyyaml" },
-    { name = "s3fs", version = "2025.10.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.14' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-lts') or extra == 'extra-13-megatron-core-dev'" },
-    { name = "s3fs", version = "2026.2.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.14' and sys_platform == 'linux' and extra == 'extra-13-megatron-core-lts') or (python_full_version < '3.14' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "s3fs" },
     { name = "torch", marker = "sys_platform == 'never'" },
     { name = "tqdm" },
     { name = "webdataset" },
@@ -3157,7 +3037,7 @@ name = "multidict"
 version = "6.7.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "typing-extensions", marker = "python_full_version < '3.11'" },
+    { name = "typing-extensions", marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/1a/c2/c2d94cbe6ac1753f3fc980da97b3d930efe1da3af3c9f5125354436c073d/multidict-6.7.1.tar.gz", hash = "sha256:ec6652a1bee61c53a3e5776b6049172c53b6aaba34f18c9ad04f82712bac623d", size = 102010, upload-time = "2026-01-26T02:46:45.979Z" }
 wheels = [
@@ -3294,28 +3174,8 @@ wheels = [
 name = "multiprocess"
 version = "0.70.18"
 source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version == '3.13.*' and sys_platform == 'linux'",
-    "python_full_version == '3.12.*' and sys_platform == 'linux'",
-    "python_full_version >= '3.14' and sys_platform == 'win32'",
-    "python_full_version >= '3.14' and sys_platform == 'emscripten'",
-    "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
-    "python_full_version == '3.13.*' and sys_platform == 'win32'",
-    "python_full_version == '3.12.*' and sys_platform == 'win32'",
-    "python_full_version == '3.13.*' and sys_platform == 'emscripten'",
-    "python_full_version == '3.12.*' and sys_platform == 'emscripten'",
-    "python_full_version == '3.13.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
-    "python_full_version == '3.12.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
-    "python_full_version == '3.11.*' and sys_platform == 'linux'",
-    "python_full_version == '3.11.*' and sys_platform == 'win32'",
-    "python_full_version == '3.11.*' and sys_platform == 'emscripten'",
-    "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
-    "python_full_version < '3.11' and sys_platform == 'linux'",
-    "python_full_version < '3.11' and sys_platform != 'linux'",
-    "python_full_version >= '3.14' and sys_platform == 'linux'",
-]
 dependencies = [
-    { name = "dill", version = "0.4.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.14' or sys_platform != 'linux'" },
+    { name = "dill", version = "0.4.0", source = { registry = "https://pypi.org/simple" } },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/72/fd/2ae3826f5be24c6ed87266bc4e59c46ea5b059a103f3d7e7eb76a52aeecb/multiprocess-0.70.18.tar.gz", hash = "sha256:f9597128e6b3e67b23956da07cf3d2e5cba79e2f4e0fba8d7903636663ec6d0d", size = 1798503, upload-time = "2025-04-17T03:11:27.742Z" }
 wheels = [
@@ -3333,32 +3193,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/6c/28/dd72947e59a6a8c856448a5e74da6201cb5502ddff644fbc790e4bd40b9a/multiprocess-0.70.18-py39-none-any.whl", hash = "sha256:e78ca805a72b1b810c690b6b4cc32579eba34f403094bbbae962b7b5bf9dfcb8", size = 133478, upload-time = "2025-04-17T03:11:26.253Z" },
 ]
 
-[[package]]
-name = "multiprocess"
-version = "0.70.19"
-source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version >= '3.14' and sys_platform == 'linux'",
-]
-dependencies = [
-    { name = "dill", version = "0.4.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.14' and sys_platform == 'linux'" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/a2/f2/e783ac7f2aeeed14e9e12801f22529cc7e6b7ab80928d6dcce4e9f00922d/multiprocess-0.70.19.tar.gz", hash = "sha256:952021e0e6c55a4a9fe4cd787895b86e239a40e76802a789d6305398d3975897", size = 2079989, upload-time = "2026-01-19T06:47:39.744Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/8b/b6/10832f96b499690854e574360be342a282f5f7dba58eff791299ff6c0637/multiprocess-0.70.19-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:02e5c35d7d6cd2bdc89c1858867f7bde4012837411023a4696c148c1bdd7c80e", size = 135131, upload-time = "2026-01-19T06:47:20.479Z" },
-    { url = "https://files.pythonhosted.org/packages/99/50/faef2d8106534b0dc4a0b772668a1a99682696ebf17d3c0f13f2ed6a656a/multiprocess-0.70.19-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:79576c02d1207ec405b00cabf2c643c36070800cca433860e14539df7818b2aa", size = 135131, upload-time = "2026-01-19T06:47:21.879Z" },
-    { url = "https://files.pythonhosted.org/packages/94/b1/0b71d18b76bf423c2e8ee00b31db37d17297ab3b4db44e188692afdca628/multiprocess-0.70.19-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:c6b6d78d43a03b68014ca1f0b7937d965393a670c5de7c29026beb2258f2f896", size = 135134, upload-time = "2026-01-19T06:47:23.262Z" },
-    { url = "https://files.pythonhosted.org/packages/7e/aa/714635c727dbfc251139226fa4eaf1b07f00dc12d9cd2eb25f931adaf873/multiprocess-0.70.19-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:1bbf1b69af1cf64cd05f65337d9215b88079ec819cd0ea7bac4dab84e162efe7", size = 144743, upload-time = "2026-01-19T06:47:24.562Z" },
-    { url = "https://files.pythonhosted.org/packages/0f/e1/155f6abf5e6b5d9cef29b6d0167c180846157a4aca9b9bee1a217f67c959/multiprocess-0.70.19-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:5be9ec7f0c1c49a4f4a6fd20d5dda4aeabc2d39a50f4ad53720f1cd02b3a7c2e", size = 144738, upload-time = "2026-01-19T06:47:26.636Z" },
-    { url = "https://files.pythonhosted.org/packages/af/cb/f421c2869d75750a4f32301cc20c4b63fab6376e9a75c8e5e655bdeb3d9b/multiprocess-0.70.19-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:1c3dce098845a0db43b32a0b76a228ca059a668071cfeaa0f40c36c0b1585d45", size = 144741, upload-time = "2026-01-19T06:47:27.985Z" },
-    { url = "https://files.pythonhosted.org/packages/e3/45/8004d1e6b9185c1a444d6b55ac5682acf9d98035e54386d967366035a03a/multiprocess-0.70.19-py310-none-any.whl", hash = "sha256:97404393419dcb2a8385910864eedf47a3cadf82c66345b44f036420eb0b5d87", size = 134948, upload-time = "2026-01-19T06:47:32.325Z" },
-    { url = "https://files.pythonhosted.org/packages/86/c2/dec9722dc3474c164a0b6bcd9a7ed7da542c98af8cabce05374abab35edd/multiprocess-0.70.19-py311-none-any.whl", hash = "sha256:928851ae7973aea4ce0eaf330bbdafb2e01398a91518d5c8818802845564f45c", size = 144457, upload-time = "2026-01-19T06:47:33.711Z" },
-    { url = "https://files.pythonhosted.org/packages/71/70/38998b950a97ea279e6bd657575d22d1a2047256caf707d9a10fbce4f065/multiprocess-0.70.19-py312-none-any.whl", hash = "sha256:3a56c0e85dd5025161bac5ce138dcac1e49174c7d8e74596537e729fd5c53c28", size = 150281, upload-time = "2026-01-19T06:47:35.037Z" },
-    { url = "https://files.pythonhosted.org/packages/7f/74/d2c27e03cb84251dfe7249b8e82923643c6d48fa4883b9476b025e7dc7eb/multiprocess-0.70.19-py313-none-any.whl", hash = "sha256:8d5eb4ec5017ba2fab4e34a747c6d2c2b6fecfe9e7236e77988db91580ada952", size = 156414, upload-time = "2026-01-19T06:47:35.915Z" },
-    { url = "https://files.pythonhosted.org/packages/a0/61/af9115673a5870fd885247e2f1b68c4f1197737da315b520a91c757a861a/multiprocess-0.70.19-py314-none-any.whl", hash = "sha256:e8cc7fbdff15c0613f0a1f1f8744bef961b0a164c0ca29bdff53e9d2d93c5e5f", size = 160318, upload-time = "2026-01-19T06:47:37.497Z" },
-    { url = "https://files.pythonhosted.org/packages/7e/82/69e539c4c2027f1e1697e09aaa2449243085a0edf81ae2c6341e84d769b6/multiprocess-0.70.19-py39-none-any.whl", hash = "sha256:0d4b4397ed669d371c81dcd1ef33fd384a44d6c3de1bd0ca7ac06d837720d3c5", size = 133477, upload-time = "2026-01-19T06:47:38.619Z" },
-]
-
 [[package]]
 name = "mypy-extensions"
 version = "1.1.0"
@@ -3745,59 +3579,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/8a/86/94188e03e5d4dd7b73c390b0cddcde5618b3799c18e327b2bf15763f6137/nvdlfw_inspect-0.2.2-py3-none-any.whl", hash = "sha256:8a4dc2814c5a4cd19ae304170b9bfa514538ef3c3eb243a45a82404ec3cb279d", size = 30964, upload-time = "2025-12-03T10:52:01.933Z" },
 ]
 
-[[package]]
-name = "nvidia-cublas-cu12"
-version = "12.8.4.1"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/29/99/db44d685f0e257ff0e213ade1964fc459b4a690a73293220e98feb3307cf/nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:b86f6dd8935884615a0683b663891d43781b819ac4f2ba2b0c9604676af346d0", size = 590537124, upload-time = "2025-03-07T01:43:53.556Z" },
-    { url = "https://files.pythonhosted.org/packages/dc/61/e24b560ab2e2eaeb3c839129175fb330dfcfc29e5203196e5541a4c44682/nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:8ac4e771d5a348c551b2a426eda6193c19aa630236b418086020df5ba9667142", size = 594346921, upload-time = "2025-03-07T01:44:31.254Z" },
-    { url = "https://files.pythonhosted.org/packages/70/61/7d7b3c70186fb651d0fbd35b01dbfc8e755f69fd58f817f3d0f642df20c3/nvidia_cublas_cu12-12.8.4.1-py3-none-win_amd64.whl", hash = "sha256:47e9b82132fa8d2b4944e708049229601448aaad7e6f296f630f2d1a32de35af", size = 567544208, upload-time = "2025-03-07T01:53:30.535Z" },
-]
-
-[[package]]
-name = "nvidia-cuda-cupti-cu12"
-version = "12.8.90"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/d5/1f/b3bd73445e5cb342727fd24fe1f7b748f690b460acadc27ea22f904502c8/nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:4412396548808ddfed3f17a467b104ba7751e6b58678a4b840675c56d21cf7ed", size = 9533318, upload-time = "2025-03-07T01:40:10.421Z" },
-    { url = "https://files.pythonhosted.org/packages/f8/02/2adcaa145158bf1a8295d83591d22e4103dbfd821bcaf6f3f53151ca4ffa/nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ea0cb07ebda26bb9b29ba82cda34849e73c166c18162d3913575b0c9db9a6182", size = 10248621, upload-time = "2025-03-07T01:40:21.213Z" },
-    { url = "https://files.pythonhosted.org/packages/41/bc/83f5426095d93694ae39fe1311431b5d5a9bb82e48bf0dd8e19be2765942/nvidia_cuda_cupti_cu12-12.8.90-py3-none-win_amd64.whl", hash = "sha256:bb479dcdf7e6d4f8b0b01b115260399bf34154a1a2e9fe11c85c517d87efd98e", size = 7015759, upload-time = "2025-03-07T01:51:11.355Z" },
-]
-
-[[package]]
-name = "nvidia-cuda-nvrtc-cu12"
-version = "12.8.93"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/05/6b/32f747947df2da6994e999492ab306a903659555dddc0fbdeb9d71f75e52/nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:a7756528852ef889772a84c6cd89d41dfa74667e24cca16bb31f8f061e3e9994", size = 88040029, upload-time = "2025-03-07T01:42:13.562Z" },
-    { url = "https://files.pythonhosted.org/packages/eb/d1/e50d0acaab360482034b84b6e27ee83c6738f7d32182b987f9c7a4e32962/nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fc1fec1e1637854b4c0a65fb9a8346b51dd9ee69e61ebaccc82058441f15bce8", size = 43106076, upload-time = "2025-03-07T01:41:59.817Z" },
-    { url = "https://files.pythonhosted.org/packages/45/51/52a3d84baa2136cc8df15500ad731d74d3a1114d4c123e043cb608d4a32b/nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-win_amd64.whl", hash = "sha256:7a4b6b2904850fe78e0bd179c4b655c404d4bb799ef03ddc60804247099ae909", size = 73586838, upload-time = "2025-03-07T01:52:13.483Z" },
-]
-
-[[package]]
-name = "nvidia-cuda-runtime-cu12"
-version = "12.8.90"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/7c/75/f865a3b236e4647605ea34cc450900854ba123834a5f1598e160b9530c3a/nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:52bf7bbee900262ffefe5e9d5a2a69a30d97e2bc5bb6cc866688caa976966e3d", size = 965265, upload-time = "2025-03-07T01:39:43.533Z" },
-    { url = "https://files.pythonhosted.org/packages/0d/9b/a997b638fcd068ad6e4d53b8551a7d30fe8b404d6f1804abf1df69838932/nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:adade8dcbd0edf427b7204d480d6066d33902cab2a4707dcfc48a2d0fd44ab90", size = 954765, upload-time = "2025-03-07T01:40:01.615Z" },
-    { url = "https://files.pythonhosted.org/packages/30/a5/a515b7600ad361ea14bfa13fb4d6687abf500adc270f19e89849c0590492/nvidia_cuda_runtime_cu12-12.8.90-py3-none-win_amd64.whl", hash = "sha256:c0c6027f01505bfed6c3b21ec546f69c687689aad5f1a377554bc6ca4aa993a8", size = 944318, upload-time = "2025-03-07T01:51:01.794Z" },
-]
-
-[[package]]
-name = "nvidia-cudnn-cu12"
-version = "9.10.2.21"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "nvidia-cublas-cu12" },
-]
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/fa/41/e79269ce215c857c935fd86bcfe91a451a584dfc27f1e068f568b9ad1ab7/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:c9132cc3f8958447b4910a1720036d9eff5928cc3179b0a51fb6d167c6cc87d8", size = 705026878, upload-time = "2025-06-06T21:52:51.348Z" },
-    { url = "https://files.pythonhosted.org/packages/ba/51/e123d997aa098c61d029f76663dedbfb9bc8dcf8c60cbd6adbe42f76d049/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:949452be657fa16687d0930933f032835951ef0892b37d2d53824d1a84dc97a8", size = 706758467, upload-time = "2025-06-06T21:54:08.597Z" },
-    { url = "https://files.pythonhosted.org/packages/3d/90/0bd6e586701b3a890fd38aa71c387dab4883d619d6e5ad912ccbd05bfd67/nvidia_cudnn_cu12-9.10.2.21-py3-none-win_amd64.whl", hash = "sha256:c6288de7d63e6cf62988f0923f96dc339cea362decb1bf5b3141883392a7d65e", size = 692992268, upload-time = "2025-06-06T21:55:18.114Z" },
-]
-
 [[package]]
 name = "nvidia-cudnn-frontend"
 version = "1.18.0"
@@ -3820,76 +3601,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/67/53/df2810b56d259ef96fa6beaa1381bd14c29fbe82836b409516e864c5e177/nvidia_cudnn_frontend-1.18.0-cp314-cp314-win_amd64.whl", hash = "sha256:5053b473fa74168b5fbf35934cd6187f88aa03b8447b9f2cd417332d5e5c9569", size = 1592759, upload-time = "2026-02-11T21:32:33.87Z" },
 ]
 
-[[package]]
-name = "nvidia-cufft-cu12"
-version = "11.3.3.83"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "nvidia-nvjitlink-cu12" },
-]
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/60/bc/7771846d3a0272026c416fbb7e5f4c1f146d6d80704534d0b187dd6f4800/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:848ef7224d6305cdb2a4df928759dca7b1201874787083b6e7550dd6765ce69a", size = 193109211, upload-time = "2025-03-07T01:44:56.873Z" },
-    { url = "https://files.pythonhosted.org/packages/1f/13/ee4e00f30e676b66ae65b4f08cb5bcbb8392c03f54f2d5413ea99a5d1c80/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d2dd21ec0b88cf61b62e6b43564355e5222e4a3fb394cac0db101f2dd0d4f74", size = 193118695, upload-time = "2025-03-07T01:45:27.821Z" },
-    { url = "https://files.pythonhosted.org/packages/7d/ec/ce1629f1e478bb5ccd208986b5f9e0316a78538dd6ab1d0484f012f8e2a1/nvidia_cufft_cu12-11.3.3.83-py3-none-win_amd64.whl", hash = "sha256:7a64a98ef2a7c47f905aaf8931b69a3a43f27c55530c698bb2ed7c75c0b42cb7", size = 192216559, upload-time = "2025-03-07T01:53:57.106Z" },
-]
-
-[[package]]
-name = "nvidia-cufile-cu12"
-version = "1.13.1.3"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/bb/fe/1bcba1dfbfb8d01be8d93f07bfc502c93fa23afa6fd5ab3fc7c1df71038a/nvidia_cufile_cu12-1.13.1.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1d069003be650e131b21c932ec3d8969c1715379251f8d23a1860554b1cb24fc", size = 1197834, upload-time = "2025-03-07T01:45:50.723Z" },
-    { url = "https://files.pythonhosted.org/packages/1e/f5/5607710447a6fe9fd9b3283956fceeee8a06cda1d2f56ce31371f595db2a/nvidia_cufile_cu12-1.13.1.3-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:4beb6d4cce47c1a0f1013d72e02b0994730359e17801d395bdcbf20cfb3bb00a", size = 1120705, upload-time = "2025-03-07T01:45:41.434Z" },
-]
-
-[[package]]
-name = "nvidia-curand-cu12"
-version = "10.3.9.90"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/45/5e/92aa15eca622a388b80fbf8375d4760738df6285b1e92c43d37390a33a9a/nvidia_curand_cu12-10.3.9.90-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:dfab99248034673b779bc6decafdc3404a8a6f502462201f2f31f11354204acd", size = 63625754, upload-time = "2025-03-07T01:46:10.735Z" },
-    { url = "https://files.pythonhosted.org/packages/fb/aa/6584b56dc84ebe9cf93226a5cde4d99080c8e90ab40f0c27bda7a0f29aa1/nvidia_curand_cu12-10.3.9.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:b32331d4f4df5d6eefa0554c565b626c7216f87a06a4f56fab27c3b68a830ec9", size = 63619976, upload-time = "2025-03-07T01:46:23.323Z" },
-    { url = "https://files.pythonhosted.org/packages/b9/75/70c05b2f3ed5be3bb30b7102b6eb78e100da4bbf6944fd6725c012831cab/nvidia_curand_cu12-10.3.9.90-py3-none-win_amd64.whl", hash = "sha256:f149a8ca457277da854f89cf282d6ef43176861926c7ac85b2a0fbd237c587ec", size = 62765309, upload-time = "2025-03-07T01:54:20.478Z" },
-]
-
-[[package]]
-name = "nvidia-cusolver-cu12"
-version = "11.7.3.90"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "nvidia-cublas-cu12" },
-    { name = "nvidia-cusparse-cu12" },
-    { name = "nvidia-nvjitlink-cu12" },
-]
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/c8/32/f7cd6ce8a7690544d084ea21c26e910a97e077c9b7f07bf5de623ee19981/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:db9ed69dbef9715071232caa9b69c52ac7de3a95773c2db65bdba85916e4e5c0", size = 267229841, upload-time = "2025-03-07T01:46:54.356Z" },
-    { url = "https://files.pythonhosted.org/packages/85/48/9a13d2975803e8cf2777d5ed57b87a0b6ca2cc795f9a4f59796a910bfb80/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:4376c11ad263152bd50ea295c05370360776f8c3427b30991df774f9fb26c450", size = 267506905, upload-time = "2025-03-07T01:47:16.273Z" },
-    { url = "https://files.pythonhosted.org/packages/13/c0/76ca8551b8a84146ffa189fec81c26d04adba4bc0dbe09cd6e6fd9b7de04/nvidia_cusolver_cu12-11.7.3.90-py3-none-win_amd64.whl", hash = "sha256:4a550db115fcabc4d495eb7d39ac8b58d4ab5d8e63274d3754df1c0ad6a22d34", size = 256720438, upload-time = "2025-03-07T01:54:39.898Z" },
-]
-
-[[package]]
-name = "nvidia-cusparse-cu12"
-version = "12.5.8.93"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "nvidia-nvjitlink-cu12" },
-]
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/bc/f7/cd777c4109681367721b00a106f491e0d0d15cfa1fd59672ce580ce42a97/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:9b6c161cb130be1a07a27ea6923df8141f3c295852f4b260c65f18f3e0a091dc", size = 288117129, upload-time = "2025-03-07T01:47:40.407Z" },
-    { url = "https://files.pythonhosted.org/packages/c2/f5/e1854cb2f2bcd4280c44736c93550cc300ff4b8c95ebe370d0aa7d2b473d/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1ec05d76bbbd8b61b06a80e1eaf8cf4959c3d4ce8e711b65ebd0443bb0ebb13b", size = 288216466, upload-time = "2025-03-07T01:48:13.779Z" },
-    { url = "https://files.pythonhosted.org/packages/62/07/f3b2ad63f8e3d257a599f422ae34eb565e70c41031aecefa3d18b62cabd1/nvidia_cusparse_cu12-12.5.8.93-py3-none-win_amd64.whl", hash = "sha256:9a33604331cb2cac199f2e7f5104dfbb8a5a898c367a53dfda9ff2acb6b6b4dd", size = 284937404, upload-time = "2025-03-07T01:55:07.742Z" },
-]
-
-[[package]]
-name = "nvidia-cusparselt-cu12"
-version = "0.7.1"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/73/b9/598f6ff36faaece4b3c50d26f50e38661499ff34346f00e057760b35cc9d/nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_aarch64.whl", hash = "sha256:8878dce784d0fac90131b6817b607e803c36e629ba34dc5b433471382196b6a5", size = 283835557, upload-time = "2025-02-26T00:16:54.265Z" },
-    { url = "https://files.pythonhosted.org/packages/56/79/12978b96bd44274fe38b5dde5cfb660b1d114f70a65ef962bcbbed99b549/nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_x86_64.whl", hash = "sha256:f1bb701d6b930d5a7cea44c19ceb973311500847f81b634d802b7b539dc55623", size = 287193691, upload-time = "2025-02-26T00:15:44.104Z" },
-    { url = "https://files.pythonhosted.org/packages/2f/d8/a6b0d0d0c2435e9310f3e2bb0d9c9dd4c33daef86aa5f30b3681defd37ea/nvidia_cusparselt_cu12-0.7.1-py3-none-win_amd64.whl", hash = "sha256:f67fbb5831940ec829c9117b7f33807db9f9678dc2a617fbe781cac17b4e1075", size = 271020911, upload-time = "2025-02-26T00:14:47.204Z" },
-]
-
 [[package]]
 name = "nvidia-cutlass-dsl"
 version = "4.4.0"
@@ -3963,44 +3674,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/16/09/30147ab0d0409d3492f1d37469fe0586c82aeec6eec9a907f59d24094516/nvidia_modelopt-0.41.0-py3-none-any.whl", hash = "sha256:ffa5f903d22653649318831a470550ae55ee04716c068d5ade61c3176fdc1d7d", size = 934582, upload-time = "2026-01-20T17:21:28.494Z" },
 ]
 
-[[package]]
-name = "nvidia-nccl-cu12"
-version = "2.27.5"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/bb/1c/857979db0ef194ca5e21478a0612bcdbbe59458d7694361882279947b349/nvidia_nccl_cu12-2.27.5-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:31432ad4d1fb1004eb0c56203dc9bc2178a1ba69d1d9e02d64a6938ab5e40e7a", size = 322400625, upload-time = "2025-06-26T04:11:04.496Z" },
-    { url = "https://files.pythonhosted.org/packages/6e/89/f7a07dc961b60645dbbf42e80f2bc85ade7feb9a491b11a1e973aa00071f/nvidia_nccl_cu12-2.27.5-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ad730cf15cb5d25fe849c6e6ca9eb5b76db16a80f13f425ac68d8e2e55624457", size = 322348229, upload-time = "2025-06-26T04:11:28.385Z" },
-]
-
-[[package]]
-name = "nvidia-nvjitlink-cu12"
-version = "12.8.93"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/f6/74/86a07f1d0f42998ca31312f998bd3b9a7eff7f52378f4f270c8679c77fb9/nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:81ff63371a7ebd6e6451970684f916be2eab07321b73c9d244dc2b4da7f73b88", size = 39254836, upload-time = "2025-03-07T01:49:55.661Z" },
-    { url = "https://files.pythonhosted.org/packages/2a/a2/8cee5da30d13430e87bf99bb33455d2724d0a4a9cb5d7926d80ccb96d008/nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:adccd7161ace7261e01bb91e44e88da350895c270d23f744f0820c818b7229e7", size = 38386204, upload-time = "2025-03-07T01:49:43.612Z" },
-    { url = "https://files.pythonhosted.org/packages/ed/d7/34f02dad2e30c31b10a51f6b04e025e5dd60e5f936af9045a9b858a05383/nvidia_nvjitlink_cu12-12.8.93-py3-none-win_amd64.whl", hash = "sha256:bd93fbeeee850917903583587f4fc3a4eafa022e34572251368238ab5e6bd67f", size = 268553710, upload-time = "2025-03-07T01:56:24.13Z" },
-]
-
-[[package]]
-name = "nvidia-nvshmem-cu12"
-version = "3.4.5"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/1d/6a/03aa43cc9bd3ad91553a88b5f6fb25ed6a3752ae86ce2180221962bc2aa5/nvidia_nvshmem_cu12-3.4.5-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0b48363fc6964dede448029434c6abed6c5e37f823cb43c3bcde7ecfc0457e15", size = 138936938, upload-time = "2025-09-06T00:32:05.589Z" },
-    { url = "https://files.pythonhosted.org/packages/b5/09/6ea3ea725f82e1e76684f0708bbedd871fc96da89945adeba65c3835a64c/nvidia_nvshmem_cu12-3.4.5-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:042f2500f24c021db8a06c5eec2539027d57460e1c1a762055a6554f72c369bd", size = 139103095, upload-time = "2025-09-06T00:32:31.266Z" },
-]
-
-[[package]]
-name = "nvidia-nvtx-cu12"
-version = "12.8.90"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/10/c0/1b303feea90d296f6176f32a2a70b5ef230f9bdeb3a72bddb0dc922dc137/nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d7ad891da111ebafbf7e015d34879f7112832fc239ff0d7d776b6cb685274615", size = 91161, upload-time = "2025-03-07T01:42:23.922Z" },
-    { url = "https://files.pythonhosted.org/packages/a2/eb/86626c1bbc2edb86323022371c39aa48df6fd8b0a1647bc274577f72e90b/nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5b17e2001cc0d751a5bc2c6ec6d26ad95913324a4adb86788c944f8ce9ba441f", size = 89954, upload-time = "2025-03-07T01:42:44.131Z" },
-    { url = "https://files.pythonhosted.org/packages/9f/99/4c9c0c329bf9fc125008c3b54c7c94c0023518d06fc025ae36431375e1fe/nvidia_nvtx_cu12-12.8.90-py3-none-win_amd64.whl", hash = "sha256:619c8304aedc69f02ea82dd244541a83c3d9d40993381b3b590f1adaed3db41e", size = 56492, upload-time = "2025-03-07T01:52:24.69Z" },
-]
-
 [[package]]
 name = "nvidia-resiliency-ext"
 version = "0.5.0"
@@ -4501,10 +4174,10 @@ resolution-markers = [
     "python_full_version < '3.11' and sys_platform != 'linux'",
 ]
 dependencies = [
-    { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
-    { name = "python-dateutil", marker = "python_full_version < '3.11'" },
-    { name = "pytz", marker = "python_full_version < '3.11'" },
-    { name = "tzdata", marker = "python_full_version < '3.11'" },
+    { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "python-dateutil", marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "pytz", marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "tzdata", marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/33/01/d40b85317f86cf08d853a4f495195c73815fdf205eef3993821720274518/pandas-2.3.3.tar.gz", hash = "sha256:e05e1af93b977f7eafa636d043f9f94c7ee3ac81af99c13508215942e64c993b", size = 4495223, upload-time = "2025-09-29T23:34:51.853Z" }
 wheels = [
@@ -4580,9 +4253,9 @@ resolution-markers = [
     "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
 ]
 dependencies = [
-    { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
-    { name = "python-dateutil", marker = "python_full_version >= '3.11'" },
-    { name = "tzdata", marker = "(python_full_version >= '3.11' and sys_platform == 'emscripten') or (python_full_version >= '3.11' and sys_platform == 'win32')" },
+    { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "python-dateutil", marker = "python_full_version >= '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "tzdata", marker = "(python_full_version >= '3.11' and sys_platform == 'emscripten') or (python_full_version >= '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'emscripten' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'win32' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/2e/0c/b28ed414f080ee0ad153f848586d61d1878f91689950f037f976ce15f6c8/pandas-3.0.1.tar.gz", hash = "sha256:4186a699674af418f655dbd420ed87f50d56b4cd6603784279d9eef6627823c8", size = 4641901, upload-time = "2026-02-17T22:20:16.434Z" }
 wheels = [
@@ -5346,8 +5019,8 @@ source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "astroid" },
     { name = "colorama", marker = "sys_platform == 'win32' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "dill", version = "0.4.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.14' or sys_platform != 'linux' or extra == 'extra-13-megatron-core-dev'" },
-    { name = "dill", version = "0.4.1", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.14' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "dill", version = "0.4.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.14' or sys_platform != 'linux' or extra == 'extra-13-megatron-core-dev' or extra == 'extra-13-megatron-core-lts'" },
+    { name = "dill", version = "0.4.1", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.14' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "isort" },
     { name = "mccabe" },
     { name = "platformdirs" },
@@ -5688,7 +5361,7 @@ source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "attrs" },
     { name = "rpds-py" },
-    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
+    { name = "typing-extensions", marker = "python_full_version < '3.13' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/22/f5/df4e9027acead3ecc63e50fe1e36aca1523e1719559c499951bb4b53188f/referencing-0.37.0.tar.gz", hash = "sha256:44aefc3142c5b842538163acb373e24cce6632bd54bdb01b21ad5863489f50d8", size = 78036, upload-time = "2025-10-13T15:30:48.871Z" }
 wheels = [
@@ -5843,19 +5516,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/3f/51/d4db610ef29373b879047326cbf6fa98b6c1969d6f6dc423279de2b1be2c/requests_toolbelt-1.0.0-py2.py3-none-any.whl", hash = "sha256:cccfdd665f0a24fcf4726e690f65639d272bb0637b9b92dfd91a5568ccf6bd06", size = 54481, upload-time = "2023-05-01T04:11:28.427Z" },
 ]
 
-[[package]]
-name = "responses"
-version = "0.18.0"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "requests", marker = "python_full_version >= '3.14' and sys_platform == 'linux'" },
-    { name = "urllib3", marker = "python_full_version >= '3.14' and sys_platform == 'linux'" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/03/a5/186653e51cb20fe3ac793403334d4d077fbb7bb18a9c5c2fce8304d5a2e2/responses-0.18.0.tar.gz", hash = "sha256:380cad4c1c1dc942e5e8a8eaae0b4d4edf708f4f010db8b7bcfafad1fcd254ff", size = 45885, upload-time = "2022-02-02T19:59:52.834Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/79/f3/2b3a6dc5986303b3dd1bbbcf482022acb2583c428cd23f0b6d37b1a1a519/responses-0.18.0-py3-none-any.whl", hash = "sha256:15c63ad16de13ee8e7182d99c9334f64fd81f1ee79f90748d527c28f7ca9dd51", size = 38735, upload-time = "2022-02-02T19:59:52.833Z" },
-]
-
 [[package]]
 name = "rich"
 version = "14.3.3"
@@ -6042,53 +5702,16 @@ wheels = [
 name = "s3fs"
 version = "2025.10.0"
 source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version == '3.13.*' and sys_platform == 'linux'",
-    "python_full_version == '3.12.*' and sys_platform == 'linux'",
-    "python_full_version >= '3.14' and sys_platform == 'win32'",
-    "python_full_version >= '3.14' and sys_platform == 'emscripten'",
-    "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
-    "python_full_version == '3.13.*' and sys_platform == 'win32'",
-    "python_full_version == '3.12.*' and sys_platform == 'win32'",
-    "python_full_version == '3.13.*' and sys_platform == 'emscripten'",
-    "python_full_version == '3.12.*' and sys_platform == 'emscripten'",
-    "python_full_version == '3.13.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
-    "python_full_version == '3.12.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
-    "python_full_version == '3.11.*' and sys_platform == 'linux'",
-    "python_full_version == '3.11.*' and sys_platform == 'win32'",
-    "python_full_version == '3.11.*' and sys_platform == 'emscripten'",
-    "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
-    "python_full_version < '3.11' and sys_platform == 'linux'",
-    "python_full_version < '3.11' and sys_platform != 'linux'",
-    "python_full_version >= '3.14' and sys_platform == 'linux'",
-]
 dependencies = [
-    { name = "aiobotocore", version = "2.26.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.14' or sys_platform != 'linux'" },
-    { name = "aiohttp", marker = "python_full_version < '3.14' or sys_platform != 'linux'" },
-    { name = "fsspec", version = "2025.10.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.14' or sys_platform != 'linux'" },
+    { name = "aiobotocore" },
+    { name = "aiohttp" },
+    { name = "fsspec", version = "2025.10.0", source = { registry = "https://pypi.org/simple" } },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/bb/ee/7cf7de3b17ef6db10b027cc9f8a1108ceb6333e267943e666a35882b1474/s3fs-2025.10.0.tar.gz", hash = "sha256:e8be6cddc77aceea1681ece0f472c3a7f8ef71a0d2acddb1cc92bb6afa3e9e4f", size = 80383, upload-time = "2025-10-30T15:06:04.647Z" }
 wheels = [
     { url = "https://files.pythonhosted.org/packages/2d/fc/56cba14af8ad8fd020c85b6e44328520ac55939bb1f9d01444ad470504cb/s3fs-2025.10.0-py3-none-any.whl", hash = "sha256:da7ef25efc1541f5fca8e1116361e49ea1081f83f4e8001fbd77347c625da28a", size = 30357, upload-time = "2025-10-30T15:06:03.48Z" },
 ]
 
-[[package]]
-name = "s3fs"
-version = "2026.2.0"
-source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version >= '3.14' and sys_platform == 'linux'",
-]
-dependencies = [
-    { name = "aiobotocore", version = "3.1.3", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.14' and sys_platform == 'linux' and extra == 'extra-13-megatron-core-lts') or (python_full_version < '3.14' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "aiohttp", marker = "(python_full_version >= '3.14' and sys_platform == 'linux' and extra == 'extra-13-megatron-core-lts') or (python_full_version < '3.14' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "fsspec", version = "2026.2.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.14' and sys_platform == 'linux' and extra == 'extra-13-megatron-core-lts') or (python_full_version < '3.14' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/fa/be/392c8c5e0da9bfa139e41084690dd49a5e3e931099f78f52d3f6070105c6/s3fs-2026.2.0.tar.gz", hash = "sha256:91cb2a9f76e35643b76eeac3f47a6165172bb3def671f76b9111c8dd5779a2ac", size = 84152, upload-time = "2026-02-05T21:57:57.968Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/57/e1/64c264db50b68de8a438b60ceeb921b2f22da3ebb7ad6255150225d0beac/s3fs-2026.2.0-py3-none-any.whl", hash = "sha256:65198835b86b1d5771112b0085d1da52a6ede36508b1aaa6cae2aedc765dfe10", size = 31328, upload-time = "2026-02-05T21:57:56.532Z" },
-]
-
 [[package]]
 name = "safetensors"
 version = "0.7.0"
@@ -6382,8 +6005,8 @@ name = "smart-open"
 version = "7.5.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "wrapt", version = "1.17.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.14' or sys_platform != 'linux' or extra == 'extra-13-megatron-core-dev'" },
-    { name = "wrapt", version = "2.1.1", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.14' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "wrapt", version = "1.17.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.14' or sys_platform != 'linux' or extra == 'extra-13-megatron-core-dev' or extra == 'extra-13-megatron-core-lts'" },
+    { name = "wrapt", version = "2.1.1", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.14' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/e8/be/a66598b305763861a9ab15ff0f2fbc44e47b1ce7a776797337a4eef37c66/smart_open-7.5.1.tar.gz", hash = "sha256:3f08e16827c4733699e6b2cc40328a3568f900cb12ad9a3ad233ba6c872d9fe7", size = 54034, upload-time = "2026-02-23T11:01:28.979Z" }
 wheels = [
@@ -6708,7 +6331,7 @@ version = "0.52.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "anyio" },
-    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
+    { name = "typing-extensions", marker = "python_full_version < '3.13' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/c4/68/79977123bb7be889ad680d79a40f339082c1978b5cfcf62c2d8d196873ac/starlette-0.52.1.tar.gz", hash = "sha256:834edd1b0a23167694292e94f597773bc3f89f362be6effee198165a35d62933", size = 2653702, upload-time = "2026-01-18T13:34:11.062Z" }
 wheels = [
@@ -6729,7 +6352,7 @@ name = "sympy"
 version = "1.14.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "mpmath" },
+    { name = "mpmath", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32') or (sys_platform == 'emscripten' and extra == 'extra-13-megatron-core-dev') or (sys_platform == 'emscripten' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'win32' and extra == 'extra-13-megatron-core-dev') or (sys_platform == 'win32' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev') or (sys_platform == 'linux' and extra == 'extra-13-megatron-core-lts')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/83/d3/803453b36afefb7c2bb238361cd4ae6125a569b4db67cd9e79846ba2d68c/sympy-1.14.0.tar.gz", hash = "sha256:d3d3fe8df1e5a0b42f0e7bdf50541697dbe7d23746e894990c030e2b05e72517", size = 7793921, upload-time = "2025-04-27T18:05:01.611Z" }
 wheels = [
@@ -7047,32 +6670,15 @@ name = "torch"
 version = "2.10.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "cuda-bindings", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "filelock" },
-    { name = "fsspec", version = "2025.10.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.14' or sys_platform != 'linux' or extra == 'extra-13-megatron-core-dev'" },
-    { name = "fsspec", version = "2026.2.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.14' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "jinja2" },
-    { name = "networkx", version = "3.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "networkx", version = "3.6.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "nvidia-cublas-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-cuda-cupti-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-cuda-nvrtc-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-cuda-runtime-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-cudnn-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-cufft-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-cufile-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-curand-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-cusolver-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-cusparse-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-cusparselt-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-nccl-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-nvjitlink-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-nvshmem-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-nvtx-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "setuptools", marker = "python_full_version >= '3.12'" },
-    { name = "sympy" },
-    { name = "triton", marker = "sys_platform == 'never'" },
-    { name = "typing-extensions" },
+    { name = "filelock", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32') or (sys_platform == 'emscripten' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'win32' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "fsspec", version = "2025.10.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32') or (sys_platform == 'emscripten' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'win32' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "jinja2", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32') or (sys_platform == 'emscripten' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'win32' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "networkx", version = "3.4.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and sys_platform != 'linux') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "networkx", version = "3.6.1", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'emscripten' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'win32' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "setuptools", marker = "(python_full_version >= '3.12' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32') or (python_full_version < '3.12' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'emscripten' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'win32' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "sympy", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32') or (sys_platform == 'emscripten' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'win32' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "triton", marker = "sys_platform == 'never' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "typing-extensions", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32') or (sys_platform == 'emscripten' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'win32' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
 wheels = [
     { url = "https://files.pythonhosted.org/packages/5b/30/bfebdd8ec77db9a79775121789992d6b3b75ee5494971294d7b4b7c999bc/torch-2.10.0-2-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:2b980edd8d7c0a68c4e951ee1856334a43193f98730d97408fbd148c1a933313", size = 79411457, upload-time = "2026-02-10T21:44:59.189Z" },
@@ -7117,8 +6723,8 @@ dependencies = [
     { name = "docker" },
     { name = "docstring-parser" },
     { name = "filelock" },
-    { name = "fsspec", version = "2025.10.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.14' or sys_platform != 'linux' or extra == 'extra-13-megatron-core-dev'" },
-    { name = "fsspec", version = "2026.2.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.14' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "fsspec", version = "2025.10.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.14' or sys_platform != 'linux' or extra == 'extra-13-megatron-core-dev' or extra == 'extra-13-megatron-core-lts'" },
+    { name = "fsspec", version = "2026.2.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.14' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "importlib-metadata" },
     { name = "pyre-extensions" },
     { name = "pyyaml" },
@@ -7135,7 +6741,7 @@ name = "tqdm"
 version = "4.67.3"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "colorama", marker = "sys_platform == 'win32'" },
+    { name = "colorama", marker = "sys_platform == 'win32' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/09/a9/6ba95a270c6f1fbcd8dac228323f2777d886cb206987444e4bce66338dd4/tqdm-4.67.3.tar.gz", hash = "sha256:7d825f03f89244ef73f1d4ce193cb1774a8179fd96f31d7e1dcde62092b960bb", size = 169598, upload-time = "2026-02-03T17:35:53.048Z" }
 wheels = [
@@ -7573,6 +7179,7 @@ name = "wrapt"
 version = "1.17.3"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
+    "python_full_version >= '3.14' and sys_platform == 'linux'",
     "python_full_version == '3.13.*' and sys_platform == 'linux'",
     "python_full_version == '3.12.*' and sys_platform == 'linux'",
     "python_full_version >= '3.14' and sys_platform == 'win32'",
@@ -7590,7 +7197,6 @@ resolution-markers = [
     "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
     "python_full_version < '3.11' and sys_platform == 'linux'",
     "python_full_version < '3.11' and sys_platform != 'linux'",
-    "python_full_version >= '3.14' and sys_platform == 'linux'",
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/95/8f/aeb76c5b46e273670962298c23e7ddde79916cb74db802131d49a85e4b7d/wrapt-1.17.3.tar.gz", hash = "sha256:f66eb08feaa410fe4eebd17f2a2c8e2e46d3476e9f8c783daa8e09e0faa666d0", size = 55547, upload-time = "2025-08-12T05:53:21.714Z" }
 wheels = [

From 1edfbd6782bdff49af547e5b15079283b3ceeaf8 Mon Sep 17 00:00:00 2001
From: eternally-z <105485498+eternally-z@users.noreply.github.com>
Date: Tue, 10 Mar 2026 10:14:00 +0800
Subject: [PATCH 306/334] Fix split_state_dict function for MoE models (#3667)

Co-authored-by: kunlunl <kunlunl@nvidia.com>
---
 megatron/core/optimizer/optimizer.py | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/megatron/core/optimizer/optimizer.py b/megatron/core/optimizer/optimizer.py
index df8ec8ef613..f5d66b8db4f 100644
--- a/megatron/core/optimizer/optimizer.py
+++ b/megatron/core/optimizer/optimizer.py
@@ -1161,20 +1161,26 @@ def _split_state_dict(self, state_dict):
         state_dicts = [None] * len(self.chained_optimizers)
         if state_dict is not None:
             if len(self.model_chunks) == 1:
-                state_dicts[0] = state_dict
+                # When there is only one global model chunk, all sub-optimizers
+                # (e.g., dense and MoE parts) use the same model state dict.
+                state_dicts = [state_dict] * len(self.chained_optimizers)
             else:
-                # Split state_dict if needed
+                # Split state_dict by model chunk object.
                 prefix = "model" if "model0" in state_dict.keys() else "model_"
-                offset = 0
+                chunk_to_global_idx = {chunk: idx for idx, chunk in enumerate(self.model_chunks)}
                 for optimizer_idx, optimizer in enumerate(self.chained_optimizers):
                     if hasattr(optimizer, "model_chunks"):
                         d = {}
-                        for chunk_idx in range(len(optimizer.model_chunks)):
+                        for chunk_idx, model_chunk in enumerate(optimizer.model_chunks):
+                            assert model_chunk in chunk_to_global_idx, (
+                                "Sub-optimizer model chunk was not found in "
+                                "chained optimizer model chunks"
+                            )
+                            global_idx = chunk_to_global_idx[model_chunk]
                             assert (
-                                f"{prefix}{offset}" in state_dict
-                            ), f"Wrong state_dict format, cannot find '{prefix}{offset}'"
-                            d[f"{prefix}{chunk_idx}"] = state_dict[f"{prefix}{offset}"]
-                            offset += 1
+                                f"{prefix}{global_idx}" in state_dict
+                            ), f"Wrong state_dict format, cannot find '{prefix}{global_idx}'"
+                            d[f"{prefix}{chunk_idx}"] = state_dict[f"{prefix}{global_idx}"]
                         if len(d) > 0:
                             state_dicts[optimizer_idx] = d
         return state_dicts

From 28a0aefdeab41e4b9f53f12f95487b445faac3aa Mon Sep 17 00:00:00 2001
From: Huy Vu <86480512+huvunvidia@users.noreply.github.com>
Date: Tue, 10 Mar 2026 03:37:27 -0400
Subject: [PATCH 307/334] Exposing interleave argument for
 fused_apply_rotary_pos_emb_thd (#3759)

---
 megatron/core/extensions/transformer_engine.py       | 2 ++
 megatron/core/models/common/embeddings/rope_utils.py | 7 ++++++-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/megatron/core/extensions/transformer_engine.py b/megatron/core/extensions/transformer_engine.py
index 20f0ece635e..f82dabcb618 100644
--- a/megatron/core/extensions/transformer_engine.py
+++ b/megatron/core/extensions/transformer_engine.py
@@ -2385,6 +2385,7 @@ def fused_apply_rotary_pos_emb_thd(
         freqs: torch.Tensor,
         cp_size: int = 1,
         cp_rank: int = 0,
+        interleaved: bool = False,
     ) -> torch.Tensor:
         """
         Apply rotary positional embedding to input tensor T in `thd` format with CP support.
@@ -2398,6 +2399,7 @@ def fused_apply_rotary_pos_emb_thd(
                 cu_seqlens=cu_seqlens,
                 cp_size=cp_size,
                 cp_rank=cp_rank,
+                interleaved=interleaved,
             )
         else:
             assert cp_size == 1, "Only TE >= 1.12 supports RoPE fusion for THD format with CP."
diff --git a/megatron/core/models/common/embeddings/rope_utils.py b/megatron/core/models/common/embeddings/rope_utils.py
index 0e00c6340ed..b990615da29 100644
--- a/megatron/core/models/common/embeddings/rope_utils.py
+++ b/megatron/core/models/common/embeddings/rope_utils.py
@@ -313,7 +313,12 @@ def apply_rotary_pos_emb(
         else:
             assert fused_apply_rotary_pos_emb_thd is not None, "apply_rope_fusion is not available."
             return fused_apply_rotary_pos_emb_thd(
-                t, cu_seqlens, freqs, cp_size=cp_group.size(), cp_rank=cp_group.rank()
+                t,
+                cu_seqlens,
+                freqs,
+                cp_size=cp_group.size(),
+                cp_rank=cp_group.rank(),
+                interleaved=config.rotary_interleaved,
             )
     # use unfused implementation
     if cu_seqlens is None:

From 15fb5577b0de89347f3a44e154d4930eaa5ecd5b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Wed, 11 Mar 2026 01:33:25 +0100
Subject: [PATCH 308/334] build: Move fast-hadmard-transform (#3786)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 docker/Dockerfile.ci.dev |   9 +++-
 pyproject.toml           |   4 +-
 uv.lock                  | 113 ++++++++++++++++++---------------------
 3 files changed, 61 insertions(+), 65 deletions(-)

diff --git a/docker/Dockerfile.ci.dev b/docker/Dockerfile.ci.dev
index fa214deeea5..7f3a5c0552a 100644
--- a/docker/Dockerfile.ci.dev
+++ b/docker/Dockerfile.ci.dev
@@ -16,7 +16,7 @@ ENV UV_LINK_MODE=copy
 
 RUN bash -ex <<"EOF"
     apt-get update
-    apt-get install -y --no-install-recommends gettext python3-venv psmisc uuid-runtime
+    apt-get install -y --no-install-recommends gettext python3-venv psmisc uuid-runtime 
     apt-get clean
     python -m venv /opt/jet
     ARCH=$(uname -m)
@@ -31,6 +31,11 @@ RUN bash -ex <<"EOF"
     curl -LsSf https://astral.sh/uv/${UV_VERSION}/install.sh | sh
 EOF
 
+RUN ln -sf /usr/local/cuda/targets/x86_64-linux/include/cuda \
+    /usr/local/include/cuda
+RUN find /usr/local/cuda -name "utility" 2>/dev/null | head -5 && \
+    ls /usr/local/cuda/targets/x86_64-linux/include/ | head -20
+
 COPY README.md pyproject.toml uv.lock /workspace/
 COPY megatron/core/__init__.py /workspace/megatron/core/
 COPY megatron/core/package_info.py /workspace/megatron/core/
@@ -40,7 +45,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
     export NVTE_CUDA_ARCHS="80;90;100"
     uv venv ${UV_PROJECT_ENVIRONMENT} --system-site-packages
     uv sync --only-group build
-    uv sync --extra ${IMAGE_TYPE} --extra mlm --link-mode copy --locked \
+    uv sync --extra ${IMAGE_TYPE} --extra mlm --group no_pypi_wheels --link-mode copy --locked \
         --no-install-package torch \
         --no-install-package torchvision \
         --no-install-package triton \
diff --git a/pyproject.toml b/pyproject.toml
index dc4efdc1523..d39c9a011fc 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -97,7 +97,6 @@ dev = [
     "flask[async]",
     "hypercorn",
     "openai",
-    "fast-hadamard-transform",
 ]
 
 lts = [
@@ -161,14 +160,13 @@ linting = [
     "pylint==3.2.6",
 ]
 ci = ["python-gitlab", "slack-sdk", "pandas"]
-no_pypi_wheels = ["flash_mla", "emerging_optimizers"]
+no_pypi_wheels = ["emerging_optimizers", "fast-hadamard-transform"]
 
 [tool.uv]
 default-groups = ["linting", "build", "test"]
 no-build-isolation-package = [
     "causal-conv1d",
     "nv-grouped-gemm",
-    "flash_mla",
     "mamba-ssm",
     "transformer-engine",
     "transformer-engine-torch",
diff --git a/uv.lock b/uv.lock
index f7147b8754d..433e8b3ea8e 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1,5 +1,5 @@
 version = 1
-revision = 3
+revision = 2
 requires-python = ">=3.10"
 resolution-markers = [
     "python_full_version >= '3.14' and sys_platform == 'linux'",
@@ -107,7 +107,7 @@ source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "aiohappyeyeballs" },
     { name = "aiosignal" },
-    { name = "async-timeout", marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "async-timeout", marker = "python_full_version < '3.11'" },
     { name = "attrs" },
     { name = "frozenlist" },
     { name = "multidict" },
@@ -247,7 +247,7 @@ version = "1.4.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "frozenlist" },
-    { name = "typing-extensions", marker = "python_full_version < '3.13' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/61/62/06741b579156360248d1ec624842ad0edf697050bbaf7c3e46394e106ad1/aiosignal-1.4.0.tar.gz", hash = "sha256:f47eecd9468083c2029cc99945502cb7708b082c232f9aca65da147157b251c7", size = 25007, upload-time = "2025-07-03T22:54:43.528Z" }
 wheels = [
@@ -301,10 +301,10 @@ name = "anyio"
 version = "4.9.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "exceptiongroup", marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "exceptiongroup", marker = "python_full_version < '3.11'" },
     { name = "idna" },
     { name = "sniffio" },
-    { name = "typing-extensions", marker = "python_full_version < '3.13' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/95/7d/4c1bd541d4dffa1b52bd83fb8527089e097a106fc90b467a7313b105f840/anyio-4.9.0.tar.gz", hash = "sha256:673c0c244e15788651a4ff38710fea9675823028a6f08a5eda409e0c9840a028", size = 190949, upload-time = "2025-03-17T00:02:54.77Z" }
 wheels = [
@@ -749,7 +749,7 @@ name = "cffi"
 version = "2.0.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "pycparser", marker = "implementation_name != 'PyPy' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "pycparser", marker = "implementation_name != 'PyPy'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/eb/56/b1ba7935a17738ae8453301356628e8147c79dbb825bcbc73dc7401f9846/cffi-2.0.0.tar.gz", hash = "sha256:44d1b5909021139fe36001ae048dbdde8214afa20200eda0f64c068cac5d5529", size = 523588, upload-time = "2025-09-08T23:24:04.541Z" }
 wheels = [
@@ -920,7 +920,7 @@ name = "click"
 version = "8.3.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "colorama", marker = "sys_platform == 'win32' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "colorama", marker = "sys_platform == 'win32'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/3d/fa/656b739db8587d7b5dfa22e22ed02566950fbfbcdc20311993483657a5c0/click-8.3.1.tar.gz", hash = "sha256:12ff4785d337a1bb490bb7e9c2b1ee5da3112e94a8622f26a6c77f5d2fc6842a", size = 295065, upload-time = "2025-11-15T20:45:42.706Z" }
 wheels = [
@@ -1414,7 +1414,7 @@ version = "0.1.0"
 source = { git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git?rev=v0.1.0#d5363b4a418128cd8111983b191c4b8869a9766b" }
 dependencies = [
     { name = "absl-py" },
-    { name = "torch", marker = "sys_platform == 'never' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "torch", marker = "sys_platform == 'never'" },
     { name = "typing-extensions" },
 ]
 
@@ -1423,7 +1423,7 @@ name = "exceptiongroup"
 version = "1.3.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "typing-extensions", marker = "python_full_version < '3.13' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/50/79/66800aadf48771f6b62f7eb014e352e5d06856655206165d775e675a02c9/exceptiongroup-1.3.1.tar.gz", hash = "sha256:8b412432c6055b0b7d14c310000ae93352ed6754f70fa8f7c34141f91c4e3219", size = 30371, upload-time = "2025-11-21T23:01:54.787Z" }
 wheels = [
@@ -1452,7 +1452,7 @@ source = { git = "https://github.com/Dao-AILab/fast-hadamard-transform.git?rev=f
 dependencies = [
     { name = "ninja" },
     { name = "packaging" },
-    { name = "torch", marker = "sys_platform == 'never'" },
+    { name = "torch", marker = "sys_platform == 'never' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
 
 [[package]]
@@ -1544,11 +1544,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/63/d5/6327559a9d5b9243b10c3984f1bcef256ed2ad06d105a3bb8f7b2979659c/flash_linear_attention-0.4.1-py3-none-any.whl", hash = "sha256:d18bdfe9d1f4b424676444eac9d50fb8433b70e5d4e0e0878b20bcbcdbea57ce", size = 287415, upload-time = "2025-12-24T18:07:35.815Z" },
 ]
 
-[[package]]
-name = "flash-mla"
-version = "1.0.0+9edee0c"
-source = { git = "https://github.com/deepseek-ai/FlashMLA?rev=9edee0c022cd0938148a18e334203b0aab43aa19#9edee0c022cd0938148a18e334203b0aab43aa19" }
-
 [[package]]
 name = "flashinfer-python"
 version = "0.5.3"
@@ -2032,7 +2027,7 @@ dependencies = [
     { name = "filelock" },
     { name = "fsspec", version = "2025.10.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.14' or sys_platform != 'linux' or extra == 'extra-13-megatron-core-dev' or extra == 'extra-13-megatron-core-lts'" },
     { name = "fsspec", version = "2026.2.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.14' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "hf-xet", marker = "platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "hf-xet", marker = "platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'" },
     { name = "packaging" },
     { name = "pyyaml" },
     { name = "requests" },
@@ -2467,7 +2462,7 @@ resolution-markers = [
     "python_full_version < '3.11' and sys_platform != 'linux'",
 ]
 dependencies = [
-    { name = "mdurl", marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "mdurl", marker = "python_full_version < '3.11'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/38/71/3b932df36c1a044d397a1f92d1cf91ee0a503d91e470cbd670aa66b07ed0/markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb", size = 74596, upload-time = "2023-06-03T06:41:14.443Z" }
 wheels = [
@@ -2497,7 +2492,7 @@ resolution-markers = [
     "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
 ]
 dependencies = [
-    { name = "mdurl", marker = "python_full_version >= '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "mdurl", marker = "python_full_version >= '3.11'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/5b/f5/4ec618ed16cc4f8fb3b701563655a69816155e79e24a17b651541804721d/markdown_it_py-4.0.0.tar.gz", hash = "sha256:cb0a2b4aa34f932c007117b194e945bd74e0ec24133ceb5bac59009cda1cb9f3", size = 73070, upload-time = "2025-08-11T12:57:52.854Z" }
 wheels = [
@@ -2637,7 +2632,6 @@ dev = [
     { name = "datasets" },
     { name = "einops" },
     { name = "emerging-optimizers" },
-    { name = "fast-hadamard-transform" },
     { name = "fastapi" },
     { name = "flash-linear-attention" },
     { name = "flashinfer-python" },
@@ -2725,7 +2719,7 @@ linting = [
 ]
 no-pypi-wheels = [
     { name = "emerging-optimizers" },
-    { name = "flash-mla" },
+    { name = "fast-hadamard-transform" },
 ]
 test = [
     { name = "coverage" },
@@ -2757,7 +2751,6 @@ requires-dist = [
     { name = "einops", marker = "extra == 'lts'", specifier = "~=0.8" },
     { name = "emerging-optimizers", marker = "extra == 'dev'", git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git?rev=v0.1.0" },
     { name = "emerging-optimizers", marker = "extra == 'lts'", git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git?rev=v0.1.0" },
-    { name = "fast-hadamard-transform", marker = "extra == 'dev'", git = "https://github.com/Dao-AILab/fast-hadamard-transform.git?rev=f134af63deb2df17e1171a9ec1ea4a7d8604d5ca" },
     { name = "fastapi", marker = "extra == 'dev'", specifier = "~=0.50" },
     { name = "fastapi", marker = "extra == 'lts'", specifier = "~=0.50" },
     { name = "flash-linear-attention", marker = "extra == 'dev'", specifier = "~=0.4.0" },
@@ -2832,7 +2825,7 @@ linting = [
 ]
 no-pypi-wheels = [
     { name = "emerging-optimizers", git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git?rev=v0.1.0" },
-    { name = "flash-mla", git = "https://github.com/deepseek-ai/FlashMLA?rev=9edee0c022cd0938148a18e334203b0aab43aa19" },
+    { name = "fast-hadamard-transform", git = "https://github.com/Dao-AILab/fast-hadamard-transform.git?rev=f134af63deb2df17e1171a9ec1ea4a7d8604d5ca" },
 ]
 test = [
     { name = "coverage" },
@@ -3037,7 +3030,7 @@ name = "multidict"
 version = "6.7.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "typing-extensions", marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "typing-extensions", marker = "python_full_version < '3.11'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/1a/c2/c2d94cbe6ac1753f3fc980da97b3d930efe1da3af3c9f5125354436c073d/multidict-6.7.1.tar.gz", hash = "sha256:ec6652a1bee61c53a3e5776b6049172c53b6aaba34f18c9ad04f82712bac623d", size = 102010, upload-time = "2026-01-26T02:46:45.979Z" }
 wheels = [
@@ -3902,7 +3895,7 @@ wheels = [
 
 [[package]]
 name = "opentelemetry-api"
-version = "1.39.1"
+version = "1.40.0"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
     "python_full_version >= '3.14' and sys_platform == 'linux'",
@@ -3928,9 +3921,9 @@ dependencies = [
     { name = "importlib-metadata", marker = "(extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts')" },
     { name = "typing-extensions", marker = "(extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts')" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/97/b9/3161be15bb8e3ad01be8be5a968a9237c3027c5be504362ff800fca3e442/opentelemetry_api-1.39.1.tar.gz", hash = "sha256:fbde8c80e1b937a2c61f20347e91c0c18a1940cecf012d62e65a7caf08967c9c", size = 65767, upload-time = "2025-12-11T13:32:39.182Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/2c/1d/4049a9e8698361cc1a1aa03a6c59e4fa4c71e0c0f94a30f988a6876a2ae6/opentelemetry_api-1.40.0.tar.gz", hash = "sha256:159be641c0b04d11e9ecd576906462773eb97ae1b657730f0ecf64d32071569f", size = 70851, upload-time = "2026-03-04T14:17:21.555Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/cf/df/d3f1ddf4bb4cb50ed9b1139cc7b1c54c34a1e7ce8fd1b9a37c0d1551a6bd/opentelemetry_api-1.39.1-py3-none-any.whl", hash = "sha256:2edd8463432a7f8443edce90972169b195e7d6a05500cd29e6d13898187c9950", size = 66356, upload-time = "2025-12-11T13:32:17.304Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/bf/93795954016c522008da367da292adceed71cca6ee1717e1d64c83089099/opentelemetry_api-1.40.0-py3-none-any.whl", hash = "sha256:82dd69331ae74b06f6a874704be0cfaa49a1650e1537d4a813b86ecef7d0ecf9", size = 68676, upload-time = "2026-03-04T14:17:01.24Z" },
 ]
 
 [[package]]
@@ -3969,7 +3962,7 @@ wheels = [
 
 [[package]]
 name = "opentelemetry-exporter-prometheus"
-version = "0.60b1"
+version = "0.61b0"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
     "python_full_version >= '3.14' and sys_platform == 'linux'",
@@ -3992,13 +3985,13 @@ resolution-markers = [
     "python_full_version < '3.11' and sys_platform != 'linux'",
 ]
 dependencies = [
-    { name = "opentelemetry-api", version = "1.39.1", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts')" },
-    { name = "opentelemetry-sdk", version = "1.39.1", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts')" },
+    { name = "opentelemetry-api", version = "1.40.0", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts')" },
+    { name = "opentelemetry-sdk", version = "1.40.0", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts')" },
     { name = "prometheus-client", marker = "(extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts')" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/14/39/7dafa6fff210737267bed35a8855b6ac7399b9e582b8cf1f25f842517012/opentelemetry_exporter_prometheus-0.60b1.tar.gz", hash = "sha256:a4011b46906323f71724649d301b4dc188aaa068852e814f4df38cc76eac616b", size = 14976, upload-time = "2025-12-11T13:32:42.944Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/4a/20/9e818fd364d12e8d0cfdce4a3b2d82e24d98c4ceebb315de6b6770b5f214/opentelemetry_exporter_prometheus-0.61b0.tar.gz", hash = "sha256:7c4919bd8e79abd62b610767e80f42c9c3a06c5183f4dd9141eedeb57aea284b", size = 15136, upload-time = "2026-03-04T14:17:26.275Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/9b/0d/4be6bf5477a3eb3d917d2f17d3c0b6720cd6cb97898444a61d43cc983f5c/opentelemetry_exporter_prometheus-0.60b1-py3-none-any.whl", hash = "sha256:49f59178de4f4590e3cef0b8b95cf6e071aae70e1f060566df5546fad773b8fd", size = 13019, upload-time = "2025-12-11T13:32:23.974Z" },
+    { url = "https://files.pythonhosted.org/packages/02/4a/b65d40e94d1d930aee73a1a2857211ee6ab10ce3686cbdae5eea78cd9d34/opentelemetry_exporter_prometheus-0.61b0-py3-none-any.whl", hash = "sha256:3013b41f4370143d48d219a2351473761423e5882fa4c213811eaefacba39cb7", size = 13149, upload-time = "2026-03-04T14:17:08.983Z" },
 ]
 
 [[package]]
@@ -4049,7 +4042,7 @@ wheels = [
 
 [[package]]
 name = "opentelemetry-sdk"
-version = "1.39.1"
+version = "1.40.0"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
     "python_full_version >= '3.14' and sys_platform == 'linux'",
@@ -4072,13 +4065,13 @@ resolution-markers = [
     "python_full_version < '3.11' and sys_platform != 'linux'",
 ]
 dependencies = [
-    { name = "opentelemetry-api", version = "1.39.1", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts')" },
-    { name = "opentelemetry-semantic-conventions", version = "0.60b1", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts')" },
+    { name = "opentelemetry-api", version = "1.40.0", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts')" },
+    { name = "opentelemetry-semantic-conventions", version = "0.61b0", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts')" },
     { name = "typing-extensions", marker = "(extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts')" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/eb/fb/c76080c9ba07e1e8235d24cdcc4d125ef7aa3edf23eb4e497c2e50889adc/opentelemetry_sdk-1.39.1.tar.gz", hash = "sha256:cf4d4563caf7bff906c9f7967e2be22d0d6b349b908be0d90fb21c8e9c995cc6", size = 171460, upload-time = "2025-12-11T13:32:49.369Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/58/fd/3c3125b20ba18ce2155ba9ea74acb0ae5d25f8cd39cfd37455601b7955cc/opentelemetry_sdk-1.40.0.tar.gz", hash = "sha256:18e9f5ec20d859d268c7cb3c5198c8d105d073714db3de50b593b8c1345a48f2", size = 184252, upload-time = "2026-03-04T14:17:31.87Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/7c/98/e91cf858f203d86f4eccdf763dcf01cf03f1dae80c3750f7e635bfa206b6/opentelemetry_sdk-1.39.1-py3-none-any.whl", hash = "sha256:4d5482c478513ecb0a5d938dcc61394e647066e0cc2676bee9f3af3f3f45f01c", size = 132565, upload-time = "2025-12-11T13:32:35.069Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/c5/6a852903d8bfac758c6dc6e9a68b015d3c33f2f1be5e9591e0f4b69c7e0a/opentelemetry_sdk-1.40.0-py3-none-any.whl", hash = "sha256:787d2154a71f4b3d81f20524a8ce061b7db667d24e46753f32a7bc48f1c1f3f1", size = 141951, upload-time = "2026-03-04T14:17:17.961Z" },
 ]
 
 [[package]]
@@ -4116,7 +4109,7 @@ wheels = [
 
 [[package]]
 name = "opentelemetry-semantic-conventions"
-version = "0.60b1"
+version = "0.61b0"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
     "python_full_version >= '3.14' and sys_platform == 'linux'",
@@ -4139,12 +4132,12 @@ resolution-markers = [
     "python_full_version < '3.11' and sys_platform != 'linux'",
 ]
 dependencies = [
-    { name = "opentelemetry-api", version = "1.39.1", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts')" },
+    { name = "opentelemetry-api", version = "1.40.0", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts')" },
     { name = "typing-extensions", marker = "(extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts')" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/91/df/553f93ed38bf22f4b999d9be9c185adb558982214f33eae539d3b5cd0858/opentelemetry_semantic_conventions-0.60b1.tar.gz", hash = "sha256:87c228b5a0669b748c76d76df6c364c369c28f1c465e50f661e39737e84bc953", size = 137935, upload-time = "2025-12-11T13:32:50.487Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/6d/c0/4ae7973f3c2cfd2b6e321f1675626f0dab0a97027cc7a297474c9c8f3d04/opentelemetry_semantic_conventions-0.61b0.tar.gz", hash = "sha256:072f65473c5d7c6dc0355b27d6c9d1a679d63b6d4b4b16a9773062cb7e31192a", size = 145755, upload-time = "2026-03-04T14:17:32.664Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/7a/5e/5958555e09635d09b75de3c4f8b9cae7335ca545d77392ffe7331534c402/opentelemetry_semantic_conventions-0.60b1-py3-none-any.whl", hash = "sha256:9fa8c8b0c110da289809292b0591220d3a7b53c1526a23021e977d68597893fb", size = 219982, upload-time = "2025-12-11T13:32:36.955Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/37/cc6a55e448deaa9b27377d087da8615a3416d8ad523d5960b78dbeadd02a/opentelemetry_semantic_conventions-0.61b0-py3-none-any.whl", hash = "sha256:fa530a96be229795f8cef353739b618148b0fe2b4b3f005e60e262926c4d38e2", size = 231621, upload-time = "2026-03-04T14:17:19.33Z" },
 ]
 
 [[package]]
@@ -4174,10 +4167,10 @@ resolution-markers = [
     "python_full_version < '3.11' and sys_platform != 'linux'",
 ]
 dependencies = [
-    { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "python-dateutil", marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "pytz", marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "tzdata", marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
+    { name = "python-dateutil", marker = "python_full_version < '3.11'" },
+    { name = "pytz", marker = "python_full_version < '3.11'" },
+    { name = "tzdata", marker = "python_full_version < '3.11'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/33/01/d40b85317f86cf08d853a4f495195c73815fdf205eef3993821720274518/pandas-2.3.3.tar.gz", hash = "sha256:e05e1af93b977f7eafa636d043f9f94c7ee3ac81af99c13508215942e64c993b", size = 4495223, upload-time = "2025-09-29T23:34:51.853Z" }
 wheels = [
@@ -4253,9 +4246,9 @@ resolution-markers = [
     "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
 ]
 dependencies = [
-    { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "python-dateutil", marker = "python_full_version >= '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "tzdata", marker = "(python_full_version >= '3.11' and sys_platform == 'emscripten') or (python_full_version >= '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'emscripten' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'win32' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
+    { name = "python-dateutil", marker = "python_full_version >= '3.11'" },
+    { name = "tzdata", marker = "(python_full_version >= '3.11' and sys_platform == 'emscripten') or (python_full_version >= '3.11' and sys_platform == 'win32')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/2e/0c/b28ed414f080ee0ad153f848586d61d1878f91689950f037f976ce15f6c8/pandas-3.0.1.tar.gz", hash = "sha256:4186a699674af418f655dbd420ed87f50d56b4cd6603784279d9eef6627823c8", size = 4641901, upload-time = "2026-02-17T22:20:16.434Z" }
 wheels = [
@@ -5342,10 +5335,10 @@ default = [
     { name = "grpcio" },
     { name = "opencensus" },
     { name = "opentelemetry-exporter-prometheus", version = "0.54b1", source = { registry = "https://pypi.org/simple" }, marker = "extra == 'extra-13-megatron-core-dev' or extra == 'extra-13-megatron-core-lts'" },
-    { name = "opentelemetry-exporter-prometheus", version = "0.60b1", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts')" },
+    { name = "opentelemetry-exporter-prometheus", version = "0.61b0", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts')" },
     { name = "opentelemetry-proto" },
     { name = "opentelemetry-sdk", version = "1.33.1", source = { registry = "https://pypi.org/simple" }, marker = "extra == 'extra-13-megatron-core-dev' or extra == 'extra-13-megatron-core-lts'" },
-    { name = "opentelemetry-sdk", version = "1.39.1", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts')" },
+    { name = "opentelemetry-sdk", version = "1.40.0", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts')" },
     { name = "prometheus-client" },
     { name = "py-spy" },
     { name = "pydantic" },
@@ -5361,7 +5354,7 @@ source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "attrs" },
     { name = "rpds-py" },
-    { name = "typing-extensions", marker = "python_full_version < '3.13' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/22/f5/df4e9027acead3ecc63e50fe1e36aca1523e1719559c499951bb4b53188f/referencing-0.37.0.tar.gz", hash = "sha256:44aefc3142c5b842538163acb373e24cce6632bd54bdb01b21ad5863489f50d8", size = 78036, upload-time = "2025-10-13T15:30:48.871Z" }
 wheels = [
@@ -6331,7 +6324,7 @@ version = "0.52.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "anyio" },
-    { name = "typing-extensions", marker = "python_full_version < '3.13' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/c4/68/79977123bb7be889ad680d79a40f339082c1978b5cfcf62c2d8d196873ac/starlette-0.52.1.tar.gz", hash = "sha256:834edd1b0a23167694292e94f597773bc3f89f362be6effee198165a35d62933", size = 2653702, upload-time = "2026-01-18T13:34:11.062Z" }
 wheels = [
@@ -6352,7 +6345,7 @@ name = "sympy"
 version = "1.14.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "mpmath", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32') or (sys_platform == 'emscripten' and extra == 'extra-13-megatron-core-dev') or (sys_platform == 'emscripten' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'win32' and extra == 'extra-13-megatron-core-dev') or (sys_platform == 'win32' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev') or (sys_platform == 'linux' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "mpmath" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/83/d3/803453b36afefb7c2bb238361cd4ae6125a569b4db67cd9e79846ba2d68c/sympy-1.14.0.tar.gz", hash = "sha256:d3d3fe8df1e5a0b42f0e7bdf50541697dbe7d23746e894990c030e2b05e72517", size = 7793921, upload-time = "2025-04-27T18:05:01.611Z" }
 wheels = [
@@ -6670,15 +6663,15 @@ name = "torch"
 version = "2.10.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "filelock", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32') or (sys_platform == 'emscripten' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'win32' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "fsspec", version = "2025.10.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32') or (sys_platform == 'emscripten' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'win32' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "jinja2", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32') or (sys_platform == 'emscripten' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'win32' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "filelock", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32')" },
+    { name = "fsspec", version = "2025.10.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32')" },
+    { name = "jinja2", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32')" },
     { name = "networkx", version = "3.4.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and sys_platform != 'linux') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "networkx", version = "3.6.1", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'emscripten' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'win32' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "setuptools", marker = "(python_full_version >= '3.12' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32') or (python_full_version < '3.12' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'emscripten' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'win32' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "sympy", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32') or (sys_platform == 'emscripten' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'win32' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "triton", marker = "sys_platform == 'never' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "typing-extensions", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32') or (sys_platform == 'emscripten' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'win32' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "setuptools", marker = "python_full_version >= '3.12' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'" },
+    { name = "sympy", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32')" },
+    { name = "triton", marker = "sys_platform == 'never'" },
+    { name = "typing-extensions", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32')" },
 ]
 wheels = [
     { url = "https://files.pythonhosted.org/packages/5b/30/bfebdd8ec77db9a79775121789992d6b3b75ee5494971294d7b4b7c999bc/torch-2.10.0-2-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:2b980edd8d7c0a68c4e951ee1856334a43193f98730d97408fbd148c1a933313", size = 79411457, upload-time = "2026-02-10T21:44:59.189Z" },
@@ -6741,7 +6734,7 @@ name = "tqdm"
 version = "4.67.3"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "colorama", marker = "sys_platform == 'win32' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "colorama", marker = "sys_platform == 'win32'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/09/a9/6ba95a270c6f1fbcd8dac228323f2777d886cb206987444e4bce66338dd4/tqdm-4.67.3.tar.gz", hash = "sha256:7d825f03f89244ef73f1d4ce193cb1774a8179fd96f31d7e1dcde62092b960bb", size = 169598, upload-time = "2026-02-03T17:35:53.048Z" }
 wheels = [

From dbf6c4c81da310c825e76e09f17649a864b8dccc Mon Sep 17 00:00:00 2001
From: Li Jinliang <jinliangl@nvidia.com>
Date: Wed, 11 Mar 2026 12:25:02 +0800
Subject: [PATCH 309/334] fix ddp bug when --overlap-grad-reduce and
 --num-distributed-optimi for dev (#3694)

---
 megatron/core/distributed/param_and_grad_buffer.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/megatron/core/distributed/param_and_grad_buffer.py b/megatron/core/distributed/param_and_grad_buffer.py
index 088374fbf13..85b9d98a3be 100644
--- a/megatron/core/distributed/param_and_grad_buffer.py
+++ b/megatron/core/distributed/param_and_grad_buffer.py
@@ -419,10 +419,10 @@ def start_grad_sync(self, force_all_reduce: Optional[bool] = False):
             # need to overlap communication.
             stream_context = torch.cuda.stream(self.communication_stream)
 
-            # The RS/AR communication stream needs to wait for the default stream
+            # The RS/AR communication stream needs to wait for the current stream
             # to complete its gradient computation before launching the next
             # gradient reduction collective.
-            self.communication_stream.wait_stream(torch.cuda.default_stream())
+            self.communication_stream.wait_stream(torch.cuda.current_stream())
         else:
             stream_context = nullcontext()
 
@@ -529,7 +529,7 @@ def finish_grad_sync(self, force_all_reduce: Optional[bool] = False):
         # When using multiple DistOpt instances, we don't need to sync here as we launch
         # communications on a separate communication stream.
         if self.ddp_config.num_distributed_optimizer_instances > 1:
-            torch.cuda.default_stream().wait_stream(self.communication_stream)
+            torch.cuda.current_stream().wait_stream(self.communication_stream)
             return
         assert self.grad_reduce_handle is not None, (
             f"Communication call has not been issued for this bucket "

From cde56a4692bb51edfc21d268123b448fb36d6ab6 Mon Sep 17 00:00:00 2001
From: Tailai Ma <58548582+xiaoyao0115@users.noreply.github.com>
Date: Wed, 11 Mar 2026 15:32:56 +0800
Subject: [PATCH 310/334] [Dev] Fix for rope when enabling THD + Dynamic-CP;
 and use the naming Dynamic-CP. (#3405)

Signed-off-by: xiaoyao0115 <1804647152@qq.com>
---
 megatron/core/datasets/data_schedule.py       |   6 +-
 megatron/core/datasets/gpt_dataset.py         |   4 +-
 .../core/extensions/transformer_engine.py     |  29 +-
 megatron/core/model_parallel_config.py        |  24 +-
 megatron/core/parallel_state.py               |  55 ++-
 ..._cp_schedule.py => dynamic_cp_schedule.py} |   8 +-
 megatron/core/pipeline_parallel/schedules.py  |   6 +-
 megatron/core/transformer/attention.py        |   8 +
 .../transformer/multi_latent_attention.py     |   4 +-
 megatron/core/utils.py                        |  19 +-
 megatron/training/arguments.py                |  12 +-
 megatron/training/datasets/data_samplers.py   |  12 +-
 megatron/training/datasets/sft_dataset.py     |   6 +-
 megatron/training/initialize.py               |   2 +-
 megatron/training/training.py                 |   6 +-
 megatron/training/utils.py                    |  10 +-
 pretrain_gpt.py                               |   8 +-
 pretrain_mamba.py                             |   2 +-
 .../unit_tests/models/test_mamba_moe_model.py |   1 +
 tests/unit_tests/test_parallel_state.py       |  10 +-
 .../transformer/test_thd_correctness.py       | 434 +++++++++++++++++-
 21 files changed, 556 insertions(+), 110 deletions(-)
 rename megatron/core/pipeline_parallel/{hybrid_cp_schedule.py => dynamic_cp_schedule.py} (99%)

diff --git a/megatron/core/datasets/data_schedule.py b/megatron/core/datasets/data_schedule.py
index 00591e4c24d..45b78e625c6 100644
--- a/megatron/core/datasets/data_schedule.py
+++ b/megatron/core/datasets/data_schedule.py
@@ -16,11 +16,11 @@
     reroute_samples_to_dcp_ranks,
 )
 from megatron.core.packed_seq_params import PackedSeqParams
-from megatron.core.pipeline_parallel.hybrid_cp_schedule import BalancedCPScheduler
+from megatron.core.pipeline_parallel.dynamic_cp_schedule import BalancedCPScheduler
 from megatron.core.process_groups_config import ProcessGroupCollection
 
 
-class HybridCPDataLoaderWrapper:
+class DynamicCPDataLoaderWrapper:
     """
     A wrapper class that wraps around an existing data_iterator.
     For every __next__ call,
@@ -51,7 +51,7 @@ def __init__(
             self.tp_group = pg_collection.tp
         assert (
             self.dp_cp_group is not None and self.dp_group is not None and self.tp_group is not None
-        ), "dp_cp_group, dp_group, tp_group must not be None when using hybrid context parallel"
+        ), "dp_cp_group, dp_group, tp_group must not be None when using dynamic context parallel"
 
         self.cp_balancing_scheduler = BalancedCPScheduler(
             max_seq_len_per_rank=self.config.max_seqlen_per_dp_cp_rank, dp_cp_group=self.dp_cp_group
diff --git a/megatron/core/datasets/gpt_dataset.py b/megatron/core/datasets/gpt_dataset.py
index 04d2c279818..60d5f82b441 100644
--- a/megatron/core/datasets/gpt_dataset.py
+++ b/megatron/core/datasets/gpt_dataset.py
@@ -61,8 +61,8 @@ class GPTDatasetConfig(BlendedMegatronDatasetConfig):
     Set to 0 if sequence parallel is not enabled regardless of TP size.
     """
 
-    hybrid_context_parallel: bool = False
-    """Option to enable hybrid context parallelism. When setting this to True, 
+    dynamic_context_parallel: bool = False
+    """Option to enable dynamic context parallelism. When setting this to True, 
     each sample should be divisible by the data parallel size * context parallel size * 2.
     If sequence parallel is enabled, it should be divisible by the 
     data parallel size * context parallel size * sequence parallel size * 2.
diff --git a/megatron/core/extensions/transformer_engine.py b/megatron/core/extensions/transformer_engine.py
index f82dabcb618..943a72c531f 100644
--- a/megatron/core/extensions/transformer_engine.py
+++ b/megatron/core/extensions/transformer_engine.py
@@ -1363,21 +1363,20 @@ def forward(
         """Forward."""
         if packed_seq_params is not None:
             # If Dynamic CP group is provided, update TE DPA CP group
-            if packed_seq_params.cp_group is not None:
-                self.cp_group = packed_seq_params.cp_group
-                super().set_context_parallel_group(
-                    self.cp_group,
-                    torch.distributed.get_process_group_ranks(self.cp_group),
-                    TEDotProductAttention.cp_stream,
-                    self.cp_comm_type,
-                )
-            # If cp_group is None but local_cp_size is provided,
-            # Indicates to turn off CP dynamically
-            elif packed_seq_params.local_cp_size is not None:
-                assert (
-                    packed_seq_params.local_cp_size == 1
-                ), "local_cp_size must be == 1 if provided without cp_group"
-                super().set_context_parallel_group(None, None, None, self.cp_comm_type)
+            if packed_seq_params.local_cp_size is not None:
+                if packed_seq_params.local_cp_size == 1:
+                    super().set_context_parallel_group(None, None, None, self.cp_comm_type)
+                else:
+                    assert (
+                        packed_seq_params.cp_group is not None
+                    ), "cp_group is not set in packed_seq_params for dynamic CP"
+                    self.cp_group = packed_seq_params.cp_group
+                    super().set_context_parallel_group(
+                        self.cp_group,
+                        torch.distributed.get_process_group_ranks(self.cp_group),
+                        TEDotProductAttention.cp_stream,
+                        self.cp_comm_type,
+                    )
             self.kept_packed_seq_params.discard("cp_group")
             self.kept_packed_seq_params.discard("local_cp_size")
 
diff --git a/megatron/core/model_parallel_config.py b/megatron/core/model_parallel_config.py
index 970b3b871fe..075aa75c76a 100644
--- a/megatron/core/model_parallel_config.py
+++ b/megatron/core/model_parallel_config.py
@@ -65,16 +65,19 @@ class ModelParallelConfig:
     each rank when sequence_packing_scheduler is not None.
     """
 
-    hybrid_context_parallel: bool = False
+    dynamic_context_parallel: bool = False
     """
-    If true, enables hybrid context parallel. This is used to balance the workload of 
+    If true, enables dynamic context parallel. This is used to balance the workload of 
     each CP rank when we use packed samples with variable sequence lengths.
-    Please set max_seqlen_per_dp_cp_rank when using hybrid_context_parallel.
+    Please set max_seqlen_per_dp_cp_rank when using dynamic_context_parallel.
     """
 
+    hybrid_context_parallel: bool = False
+    """Deprecated. Use ``dynamic_context_parallel`` instead."""
+
     sequence_packing_scheduler: Optional[Literal['dp_balanced']] = None
     """
-    Scheduler for sequence packing and hybrid context parallel.
+    Scheduler for sequence packing and dynamic context parallel.
     dp_balanced: DP-balanced scheduler for sequence packing.
     """
 
@@ -412,6 +415,19 @@ def __post_init__(self):
         See https://docs.python.org/3/library/dataclasses.html#post-init-processing for more
         details.
         """
+        if self.hybrid_context_parallel:
+            warnings.warn(
+                "hybrid_context_parallel is deprecated and will be removed in a future release. "
+                "Use dynamic_context_parallel instead.",
+                DeprecationWarning,
+            )
+            if self.dynamic_context_parallel:
+                raise ValueError(
+                    "Cannot set both hybrid_context_parallel and dynamic_context_parallel. "
+                    "Please use dynamic_context_parallel only."
+                )
+            self.dynamic_context_parallel = True
+
         if self.sequence_parallel:
             if self.tensor_model_parallel_size <= 1:
                 raise ValueError("Cannot use sequence parallelism without tensor parallelism")
diff --git a/megatron/core/parallel_state.py b/megatron/core/parallel_state.py
index 087cbe7e152..a0e1b392b43 100644
--- a/megatron/core/parallel_state.py
+++ b/megatron/core/parallel_state.py
@@ -115,8 +115,8 @@
 _CONTEXT_PARALLEL_GLOBAL_RANKS = None
 # Hierarchical context parallel groups
 _HIERARCHICAL_CONTEXT_PARALLEL_GROUPS = None
-# Hybrid context parallel groups
-_HYBRID_DP_CP_GROUPS = {}
+# Dynamic context parallel groups
+_DYNAMIC_DP_CP_GROUPS = {}
 
 # Data parallel group information with context parallel combined.
 _DATA_PARALLEL_GROUP_WITH_CP = None
@@ -421,29 +421,29 @@ def create_hierarchical_groups(
     return hierarchical_groups, hierarchical_groups_gloo
 
 
-def create_hybrid_dp_cp_groups(rank, ranks, pg_options):
+def create_dynamic_dp_cp_groups(rank, ranks, pg_options):
     """
-    Creates groups required for hybrid DPxCP.
+    Creates groups required for dynamic DPxCP.
     Creates a new group for every power of 2 up to the number of DPxCP ranks.
     Returns a dictionary indexed by group size.
     """
-    hybrid_dp_cp_groups = {}
+    dynamic_dp_cp_groups = {}
     # Generate group for every power of 2 up to the number of CP ranks
     # We limit the allowed group sizes in order to avoid excessive overhead.
-    group_sizes = [2**i for i in range(int(log2(len(ranks))))][1:]
+    group_sizes = [2**i for i in range(int(log2(len(ranks))))]
     for group_size in group_sizes:
         for i in range(0, len(ranks), group_size):
             group = create_group(
                 ranks[i : i + group_size],
                 pg_options=pg_options,
-                group_desc=f"HYBRID_DP_CP_GROUP_{group_size}",
+                group_desc=f"DYNAMIC_DP_CP_GROUP_{group_size}",
             )
             if rank in ranks[i : i + group_size]:
                 assert (
-                    group_size not in hybrid_dp_cp_groups
-                ), f"Rank {rank} appears in multiple Hybrid DP CP groups of size {group_size}"
-                hybrid_dp_cp_groups[group_size] = group
-    return hybrid_dp_cp_groups
+                    group_size not in dynamic_dp_cp_groups
+                ), f"Rank {rank} appears in multiple Dynamic DP CP groups of size {group_size}"
+                dynamic_dp_cp_groups[group_size] = group
+    return dynamic_dp_cp_groups
 
 
 class RankGenerator(object):
@@ -555,7 +555,7 @@ def initialize_model_parallel(
     use_sharp: bool = False,
     context_parallel_size: int = 1,
     hierarchical_context_parallel_sizes: Optional[List[int]] = None,
-    hybrid_context_parallel: bool = False,
+    dynamic_context_parallel: bool = False,
     expert_model_parallel_size: int = 1,
     num_distributed_optimizer_instances: int = 1,
     expert_tensor_parallel_size: Optional[int] = None,
@@ -937,18 +937,29 @@ def initialize_model_parallel(
         if "NCCL_COLLNET_ENABLE" in os.environ:
             del os.environ["NCCL_COLLNET_ENABLE"]
 
-    if hybrid_context_parallel:
-        global _HYBRID_DP_CP_GROUPS
+    if dynamic_context_parallel:
+        # TODO: Are gloo groups needed for Dynamic CP?
+        global _DYNAMIC_DP_CP_GROUPS
         for ranks_with_cp in decoder_rank_generator.get_ranks('dp-cp'):
             assert (
                 len(ranks_with_cp) % 2 == 0
-            ), "Hybrid context parallel requires an even number of ranks"
-            _HYBRID_DP_CP_GROUPS.update(
-                create_hybrid_dp_cp_groups(
+            ), "Dynamic context parallel requires an even number of ranks"
+            _DYNAMIC_DP_CP_GROUPS.update(
+                create_dynamic_dp_cp_groups(
                     rank, ranks_with_cp, get_nccl_options("dp_cp", nccl_comm_cfgs)
                 )
             )
-        # TODO: Are gloo groups needed for hybrid cp?
+
+        # PyTorch is performing lazy initialization of the communicator group.
+        # Therefore, we need to perform a nccl call to ensure that the communicator group is created.
+        data_parallel_size_with_cp = data_parallel_size * context_parallel_size
+        group_sizes = [2**i for i in range(0, int(log2(data_parallel_size_with_cp)))]
+        if group_sizes[-1] * 2 == data_parallel_size_with_cp:
+            group_sizes.append(data_parallel_size_with_cp)
+        for group_size in group_sizes:
+            group = get_dynamic_data_context_parallel_groups(group_size=group_size)
+            torch.distributed.barrier(group=group, device_ids=[torch.cuda.current_device()])
+            torch.cuda.synchronize()
 
     for ranks in decoder_rank_generator.get_ranks('dp'):
         group = create_group(
@@ -1472,16 +1483,16 @@ def get_hierarchical_context_parallel_groups(check_initialized=True):
     return _HIERARCHICAL_CONTEXT_PARALLEL_GROUPS
 
 
-def get_hybrid_data_context_parallel_groups(check_initialized=True, group_size=None):
-    """Get the hybrid context parallel groups the caller rank belongs to."""
+def get_dynamic_data_context_parallel_groups(check_initialized=True, group_size=None):
+    """Get the dynamic context parallel groups the caller rank belongs to."""
     # If the group size is the same as the entire DPxCP group, return the original group
     if get_data_parallel_world_size(with_context_parallel=True) == group_size:
         if check_initialized:
             assert _DATA_PARALLEL_GROUP_WITH_CP is not None
         return _DATA_PARALLEL_GROUP_WITH_CP
     if check_initialized:
-        assert _HYBRID_DP_CP_GROUPS is not None
-    return _HYBRID_DP_CP_GROUPS[group_size]
+        assert _DYNAMIC_DP_CP_GROUPS is not None
+    return _DYNAMIC_DP_CP_GROUPS[group_size]
 
 
 def get_embedding_group(check_initialized=True):
diff --git a/megatron/core/pipeline_parallel/hybrid_cp_schedule.py b/megatron/core/pipeline_parallel/dynamic_cp_schedule.py
similarity index 99%
rename from megatron/core/pipeline_parallel/hybrid_cp_schedule.py
rename to megatron/core/pipeline_parallel/dynamic_cp_schedule.py
index 27b5fc87945..48dd633aeba 100644
--- a/megatron/core/pipeline_parallel/hybrid_cp_schedule.py
+++ b/megatron/core/pipeline_parallel/dynamic_cp_schedule.py
@@ -48,7 +48,7 @@ def gpus_needed(self, seq_len: int) -> int:
         This is used to determine the CP size of a sub-sample.
 
         The number is rounded up to the next power of 2 to match the available
-        hybrid context parallel process group sizes.
+        dynamic context parallel process group sizes.
         """
         return max(1, 2 ** ceil(log2((seq_len / self.max_seq_len_per_rank))))
 
@@ -370,7 +370,7 @@ def fill_empty_gpus(
             "try to increase 'max-seqlen-per-cp-rank'."
 
             min_group_size = min(existing_group_sizes)
-            # We have Hybrid DPxCP groups for every power of 2 of GPUs or the entire DPxCP group.
+            # We have Dynamic DPxCP groups for every power of 2 of GPUs or the entire DPxCP group.
             next_power = min(min_group_size * 2, total_gpus)
 
             # Find the first group of min_group_size that can be expanded
@@ -474,7 +474,7 @@ def get_groups_and_subsamples(self, sample_id_seqlens, config):
         return groups, sample_id_groups
 
 
-def hybrid_context_parallel_forward_backward(
+def dynamic_context_parallel_forward_backward(
     forward_step_func,
     data_iterator,
     model,
@@ -492,7 +492,7 @@ def hybrid_context_parallel_forward_backward(
     model_type,
 ):
     """
-    Scheduler for Hybrid Context Parallel.
+    Scheduler for Dynamic Context Parallel.
 
     This function performs the packed sample scheduling and determines
     1. The number of microbatches to schedule for each CP rank
diff --git a/megatron/core/pipeline_parallel/schedules.py b/megatron/core/pipeline_parallel/schedules.py
index 6dd5e7de02a..ed3794208f0 100644
--- a/megatron/core/pipeline_parallel/schedules.py
+++ b/megatron/core/pipeline_parallel/schedules.py
@@ -36,7 +36,7 @@
     combined_1f1b_schedule_for_interleaved_pipelining,
     combined_1f1b_schedule_for_no_pipelining,
 )
-from .hybrid_cp_schedule import hybrid_context_parallel_forward_backward
+from .dynamic_cp_schedule import dynamic_context_parallel_forward_backward
 
 # Types
 Shape = Union[List[int], torch.Size]
@@ -617,8 +617,8 @@ def forward_backward_no_pipelining(
             total_num_tokens,
             partial(check_first_val_step, first_val_step, forward_only),
         )
-    elif config.hybrid_context_parallel:
-        forward_data_store, total_num_tokens = hybrid_context_parallel_forward_backward(
+    elif config.dynamic_context_parallel:
+        forward_data_store, total_num_tokens = dynamic_context_parallel_forward_backward(
             forward_step_func,
             data_iterator,
             model,
diff --git a/megatron/core/transformer/attention.py b/megatron/core/transformer/attention.py
index b8d9ef69443..3b054ccc4b1 100644
--- a/megatron/core/transformer/attention.py
+++ b/megatron/core/transformer/attention.py
@@ -914,6 +914,13 @@ def forward(
             (Tuple[Tensor, Tensor]) Attention output and bias.
 
         """
+
+        # here we need to set the right cp group for dynamic-cp
+        _orig_cp_group = self.pg_collection.cp
+        if packed_seq_params is not None and packed_seq_params.local_cp_size is not None:
+            assert packed_seq_params.cp_group is not None, "cp_group must be set in dynamic-cp mode"
+            self.pg_collection.cp = packed_seq_params.cp_group
+
         # Check if we need to skip RoPE
         # no_rope is 0-indexed array and self.layer_number is 1-indexed
         no_rope = (
@@ -1218,6 +1225,7 @@ def forward(
             )
         nvtx_range_pop(suffix="linear_proj")
 
+        self.pg_collection.cp = _orig_cp_group
         return output, bias
 
     @jit_fuser
diff --git a/megatron/core/transformer/multi_latent_attention.py b/megatron/core/transformer/multi_latent_attention.py
index 11330262159..4b3f876a978 100644
--- a/megatron/core/transformer/multi_latent_attention.py
+++ b/megatron/core/transformer/multi_latent_attention.py
@@ -530,8 +530,8 @@ def get_query_key_value_tensors(
         if packed_seq_params is not None:
             assert (
                 packed_seq_params.local_cp_size is None
-            ), "hybrid_context_parallel is not supported with MLA yet and is planned for future. \
-            Please disable hybrid_context_parallel."
+            ), "dynamic_context_parallel is not supported with MLA yet and is planned for future. \
+            Please disable dynamic_context_parallel."
 
         inference_context = deprecate_inference_params(inference_context, inference_params)
 
diff --git a/megatron/core/utils.py b/megatron/core/utils.py
index 7c60f2da457..14c783ab0dc 100644
--- a/megatron/core/utils.py
+++ b/megatron/core/utils.py
@@ -2141,11 +2141,11 @@ def get_thd_batch_on_this_cp_rank(
 
 
 ################################
-### hybrid context parallel ###
+### dynamic context parallel ###
 ################################
 
 
-def get_batch_on_this_hybrid_cp_rank(
+def get_batch_on_this_dynamic_cp_rank(
     batch: Dict[str, Any],
     local_cp_size: int,
     cp_group: Optional[torch.distributed.ProcessGroup] = None,
@@ -2155,18 +2155,15 @@ def get_batch_on_this_hybrid_cp_rank(
     """
     assert local_cp_size is not None
     if cp_group is None:
-        # Get the local cp group required for as defined by the HybridCPDataLoaderWrapper
-        if local_cp_size > 1:
-            cp_group = parallel_state.get_hybrid_data_context_parallel_groups(
-                group_size=local_cp_size
-            )
+        # Get the local cp group required for as defined by the DynamicCPDataLoaderWrapper
+        cp_group = parallel_state.get_dynamic_data_context_parallel_groups(group_size=local_cp_size)
     else:
         # If cp group is provided, it must match the local cp size
-        # as defined by the HybridCPDataLoaderWrapper
+        # as defined by the DynamicCPDataLoaderWrapper
         assert cp_group.size() == local_cp_size
 
     # Convert [seqlen] to [1, seqlen] similar to default collate_fn
-    # as hybrid_context_parallel dataloader wrapper does not go through default collate_fn
+    # as dynamic_context_parallel dataloader wrapper does not go through default collate_fn
     for key, data in batch.items():
         if key in ['attention_mask']:
             continue
@@ -2186,8 +2183,8 @@ def get_batch_on_this_hybrid_cp_rank(
         cp_group=cp_group,
     )
 
-    if cp_group is not None and cp_group.size() > 1:
-        # When using hybrid_context_parallel, each sub-sample of a packed sample is
+    if cp_group.size() > 1:
+        # When using dynamic_context_parallel, each sub-sample of a packed sample is
         # required to be divisible by CP*DP*2 or CP*DP*TP*2 (if using sequence parallel)
         batch = get_batch_on_this_cp_rank(batch, cp_group=cp_group)
 
diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py
index dece9b480f5..b4691091be9 100644
--- a/megatron/training/arguments.py
+++ b/megatron/training/arguments.py
@@ -1047,12 +1047,12 @@ def validate_args(args, defaults={}):
     if args.tp_comm_overlap:
         assert args.sequence_parallel == True, 'Tensor parallel communication/GEMM overlap can happen only when sequence parallelism is enabled'
 
-    if args.hybrid_context_parallel:
-        assert not args.pipeline_model_parallel_size > 1, 'Hybrid context parallelism not supported with pipeline parallelism'
-        assert not args.enable_cuda_graph, 'Hybrid context parallelism not supported with CUDA Graph'
-        assert not args.use_megatron_fsdp, 'Hybrid context parallelism not supported with Megatron FSDP'
-        assert args.dataloader_type == 'single', 'Hybrid context parallelism only supported with single dataloader type'
-        assert args.calculate_per_token_loss, 'Hybrid context parallelism must be used with --calculate-per-token-loss'
+    if args.dynamic_context_parallel:
+        assert not args.pipeline_model_parallel_size > 1, 'Dynamic context parallelism not supported with pipeline parallelism'
+        assert not args.enable_cuda_graph, 'Dynamic context parallelism not supported with CUDA Graph'
+        assert not args.use_megatron_fsdp, 'Dynamic context parallelism not supported with Megatron FSDP'
+        assert args.dataloader_type == 'single', 'Dynamic context parallelism only supported with single dataloader type'
+        assert args.calculate_per_token_loss, 'Dynamic context parallelism must be used with --calculate-per-token-loss'
 
     if args.sequence_packing_scheduler is not None:
         assert args.context_parallel_size * args.max_seqlen_per_dp_cp_rank >= args.seq_length, \
diff --git a/megatron/training/datasets/data_samplers.py b/megatron/training/datasets/data_samplers.py
index ca4cc1b36a3..166d4597a97 100644
--- a/megatron/training/datasets/data_samplers.py
+++ b/megatron/training/datasets/data_samplers.py
@@ -39,8 +39,8 @@ def build_pretraining_data_loader(dataset, consumed_samples):
             data_parallel_size=mpu.get_data_parallel_world_size(),
         )
     elif args.dataloader_type == 'single':
-        if args.hybrid_context_parallel:
-            batch_sampler = HybridCPMegatronPretrainingSampler(
+        if args.dynamic_context_parallel:
+            batch_sampler = DynamicCPMegatronPretrainingSampler(
                 total_samples=len(dataset),
                 consumed_samples=consumed_samples,
                 micro_batch_size=args.micro_batch_size,
@@ -79,7 +79,7 @@ def worker_init_fn(_):
         worker_init_fn if args.exit_signal_handler and args.num_workers > 0 else None
     )
     # Torch dataloader.
-    if args.hybrid_context_parallel:
+    if args.dynamic_context_parallel:
         extra_kwargs = {"collate_fn": lambda x: x,}
     else:
         extra_kwargs = {}
@@ -162,11 +162,11 @@ def __iter__(self):
             start_idx, end_idx = self.get_start_end_idx()
             yield batch[start_idx:end_idx]
 
-class HybridCPMegatronPretrainingSampler(MegatronPretrainingSampler):
+class DynamicCPMegatronPretrainingSampler(MegatronPretrainingSampler):
     """
-    Data sampler for hybrid context parallel (Hybrid CP) format.
+    Data sampler for dynamic context parallel (Dynamic CP) format.
     This data sampler pulls in the entire global batch at once across all data parallel ranks.
-    This helps provide the Hybrid CP Dataloader Wrapper to schedule and load balance sub-samples
+    This helps provide the Dynamic CP Dataloader Wrapper to schedule and load balance sub-samples
     of the entire global batch.
     """
 
diff --git a/megatron/training/datasets/sft_dataset.py b/megatron/training/datasets/sft_dataset.py
index 3f2e6e7362c..250a0137568 100644
--- a/megatron/training/datasets/sft_dataset.py
+++ b/megatron/training/datasets/sft_dataset.py
@@ -97,11 +97,11 @@ def _calculate_padding_divisor(self) -> int:
             Calculate the divisor used for sequence padding.
             tp_pad = tp_size * 2 if tp_size > 1 else 1
             cp_pad = cp_size * 2 if cp_size > 1 else 1
-            cp_pad = cp_pad * dp_size if hybrid_cp else cp_pad
+            cp_pad = cp_pad * dp_size if dynamic_cp else cp_pad
             divisor = cp_pad * tp_pad
         """
-        if self.config.hybrid_context_parallel:
-            # Hybrid CP: consider both CP and DP
+        if self.config.dynamic_context_parallel:
+            # Dynamic CP: consider both CP and DP
             cp_pad = self.config.data_parallel_size * self.config.context_parallel_size * 2
         else:
             # Standard CP: only consider CP
diff --git a/megatron/training/initialize.py b/megatron/training/initialize.py
index 80d0764bdf7..a5c757ca41b 100644
--- a/megatron/training/initialize.py
+++ b/megatron/training/initialize.py
@@ -375,7 +375,7 @@ def _initialize_distributed(get_embedding_ranks, get_position_embedding_ranks, s
                 use_sharp=args.use_sharp,
                 context_parallel_size=args.context_parallel_size,
                 hierarchical_context_parallel_sizes=args.hierarchical_context_parallel_sizes,
-                hybrid_context_parallel=args.hybrid_context_parallel,
+                dynamic_context_parallel=args.dynamic_context_parallel,
                 expert_model_parallel_size=args.expert_model_parallel_size,
                 num_distributed_optimizer_instances=args.num_distributed_optimizer_instances,
                 expert_tensor_parallel_size=args.expert_tensor_parallel_size,
diff --git a/megatron/training/training.py b/megatron/training/training.py
index 1b970d61ed3..c5715e96aed 100644
--- a/megatron/training/training.py
+++ b/megatron/training/training.py
@@ -142,7 +142,7 @@ def set_startup_timestamps(program_start=None, main_entry=None):
 from megatron.training.initialize import set_jit_fusion_options
 from megatron.training.utils import get_batch_on_this_cp_rank, get_batch_on_this_tp_rank
 from megatron.training.datasets.data_samplers import build_pretraining_data_loader
-from megatron.core.datasets.data_schedule import HybridCPDataLoaderWrapper
+from megatron.core.datasets.data_schedule import DynamicCPDataLoaderWrapper
 from megatron.core.optimizer_param_scheduler import OptimizerParamScheduler
 from megatron.core.transformer.moe import upcycling_utils
 from megatron.core.transformer.moe.moe_logging import get_moe_metrics_tracker
@@ -2589,8 +2589,8 @@ def train(
     energy_monitor = get_energy_monitor()
     one_logger = get_one_logger()
 
-    if args.hybrid_context_parallel:
-        train_data_iterator = iter(HybridCPDataLoaderWrapper(train_data_iterator, config))
+    if args.dynamic_context_parallel:
+        train_data_iterator = iter(DynamicCPDataLoaderWrapper(train_data_iterator, config))
 
     if args.run_workload_inspector_server:
         try:
diff --git a/megatron/training/utils.py b/megatron/training/utils.py
index 54d69f3b150..7844b450136 100644
--- a/megatron/training/utils.py
+++ b/megatron/training/utils.py
@@ -574,7 +574,7 @@ def _broadcast_cu_seqlens(cu_seqlens):
                 buf = cu_seqlens.to(device=dev, non_blocking=True).contiguous()
             _broadcast(buf)
 
-        if args.hybrid_context_parallel:
+        if args.dynamic_context_parallel:
             seq_len = torch.tensor(batch['tokens'].shape[0], dtype=torch.int32, device=torch.cuda.current_device())
             _broadcast(seq_len)
             
@@ -604,7 +604,7 @@ def _broadcast_cu_seqlens(cu_seqlens):
             _broadcast(batch['attention_mask'])
 
     else:
-        if args.hybrid_context_parallel:
+        if args.dynamic_context_parallel:
             seq_len = torch.tensor(0, dtype=torch.int32, device=torch.cuda.current_device())
             _broadcast(seq_len)
             shape = (seq_len.item())
@@ -627,7 +627,7 @@ def _broadcast_cu_seqlens(cu_seqlens):
             device=torch.cuda.current_device(),
         )
         if args.create_attention_mask_in_dataloader:
-            shape_attention_mask = (args.micro_batch_size, 1, args.seq_length, args.seq_length) if not args.hybrid_context_parallel else (1, 1, shape[0], shape[0])
+            shape_attention_mask = (args.micro_batch_size, 1, args.seq_length, args.seq_length) if not args.dynamic_context_parallel else (1, 1, shape[0], shape[0])
             attention_mask = torch.empty(
                 shape_attention_mask,
                 dtype=torch.bool,
@@ -641,7 +641,7 @@ def _broadcast_cu_seqlens(cu_seqlens):
             device=torch.cuda.current_device(),
         )
         cu_seqlens = None
-        if args.hybrid_context_parallel or args.sft:
+        if args.dynamic_context_parallel or args.sft:
             max_seqlen = torch.empty(
                 1,
                 dtype=torch.int32,
@@ -654,7 +654,7 @@ def _broadcast_cu_seqlens(cu_seqlens):
             1,
             dtype=torch.int32,
             device=torch.cuda.current_device(),
-        ) if args.hybrid_context_parallel else None
+        ) if args.dynamic_context_parallel else None
 
         def _broadcast_cu_seqlens():
             dev = torch.cuda.current_device()
diff --git a/pretrain_gpt.py b/pretrain_gpt.py
index 083f97b0a2f..6ca303386ed 100644
--- a/pretrain_gpt.py
+++ b/pretrain_gpt.py
@@ -30,7 +30,7 @@
 from megatron.core.models.gpt import GPTModel
 from megatron.core.rerun_state_machine import get_rerun_state_machine
 from megatron.core.tokenizers.utils.build_tokenizer import build_tokenizer
-from megatron.core.utils import get_attr_wrapped_model, get_thd_batch_on_this_cp_rank, get_batch_on_this_hybrid_cp_rank, StragglerDetector
+from megatron.core.utils import get_attr_wrapped_model, get_thd_batch_on_this_cp_rank, get_batch_on_this_dynamic_cp_rank, StragglerDetector
 from megatron.training import (
     get_args,
     get_timers,
@@ -102,8 +102,8 @@ def get_batch(data_iterator, vp_stage: Optional[int] = None):
     elif local_cp_size is None:  # Packed THD format
         assert max_seqlen.dim() == 1
         batch, packed_seq_params = get_thd_batch_on_this_cp_rank(batch, cu_seqlens, cu_seqlens_padded, max_seqlen)
-    else: # Hybrid CP format
-        batch, packed_seq_params = get_batch_on_this_hybrid_cp_rank(batch, local_cp_size)
+    else: # Dynamic CP format
+        batch, packed_seq_params = get_batch_on_this_dynamic_cp_rank(batch, local_cp_size)
     
     return (*batch.values(), packed_seq_params)
 
@@ -260,7 +260,7 @@ def core_gpt_dataset_config_from_args(args):
         "context_parallel_size": args.context_parallel_size,
         "data_parallel_size": args.data_parallel_size,
         "sequence_parallel_size": args.tensor_model_parallel_size*args.sequence_parallel,
-        "hybrid_context_parallel": args.hybrid_context_parallel,
+        "dynamic_context_parallel": args.dynamic_context_parallel,
         "sft_mock_dataset_config_json":args.sft_mock_dataset_config_json,
     }
 
diff --git a/pretrain_mamba.py b/pretrain_mamba.py
index 0fecbef2c71..037f1817d99 100644
--- a/pretrain_mamba.py
+++ b/pretrain_mamba.py
@@ -94,7 +94,7 @@ def get_batch(data_iterator, vp_stage=None):
     cu_seqlens = batch['cu_seqlens']
     # Unused at the moment
     cu_seqlens_padded = batch.pop('cu_seqlens_padded', None)
-    # Support for Hybrid Context Parallel (Unused in this script)
+    # Support for Dynamic Context Parallel (Unused in this script)
     local_cp_size = batch.pop('local_cp_size', None)
 
     if cu_seqlens is not None:
diff --git a/tests/unit_tests/models/test_mamba_moe_model.py b/tests/unit_tests/models/test_mamba_moe_model.py
index 2524b3ade50..a55042ee979 100644
--- a/tests/unit_tests/models/test_mamba_moe_model.py
+++ b/tests/unit_tests/models/test_mamba_moe_model.py
@@ -279,6 +279,7 @@
     "fine_grained_activation_offloading": False,
     "min_offloaded_tensor_size": 1024 * 1024,
     "offload_modules": [],
+    "dynamic_context_parallel": False,
     "hybrid_context_parallel": False,
     "max_seqlen_per_dp_cp_rank": None,
     "sequence_packing_scheduler": None,
diff --git a/tests/unit_tests/test_parallel_state.py b/tests/unit_tests/test_parallel_state.py
index 21dc740cdf4..e7aa2fe4927 100644
--- a/tests/unit_tests/test_parallel_state.py
+++ b/tests/unit_tests/test_parallel_state.py
@@ -507,9 +507,9 @@ def golden_rank_result_from_past_code(
     "world_size, tp_size, cp_size, dp_size",
     [(8, 1, 2, 4), (8, 1, 1, 8)],  # 8 GPUs, 1 TP, 2 CP, 4 DP  # 8 GPUs, 1 TP, 1 CP, 8 DP
 )
-def test_hybrid_dp_cp_groups(world_size, tp_size, cp_size, dp_size):
+def test_dynamic_dp_cp_groups(world_size, tp_size, cp_size, dp_size):
     """
-    Test that hybrid DPxCP groups are created correctly.
+    Test that dynamic DPxCP groups are created correctly.
     """
     Utils.destroy_model_parallel()
 
@@ -520,13 +520,13 @@ def test_hybrid_dp_cp_groups(world_size, tp_size, cp_size, dp_size):
     Utils.initialize_model_parallel(
         tensor_model_parallel_size=tp_size,
         context_parallel_size=cp_size,
-        hybrid_context_parallel=True,
+        dynamic_context_parallel=True,
     )
 
     dp_cp_size = ps.get_data_parallel_world_size(with_context_parallel=True)
-    group_sizes = [2**i for i in range(int(log2(dp_cp_size)))][1:]
+    group_sizes = [2**i for i in range(int(log2(dp_cp_size)))]
     for group_size in group_sizes:
-        group = ps.get_hybrid_data_context_parallel_groups(group_size=group_size)
+        group = ps.get_dynamic_data_context_parallel_groups(group_size=group_size)
         assert group.size() == group_size
 
     Utils.destroy_model_parallel()
diff --git a/tests/unit_tests/transformer/test_thd_correctness.py b/tests/unit_tests/transformer/test_thd_correctness.py
index ccf70b8a885..533f64081f4 100644
--- a/tests/unit_tests/transformer/test_thd_correctness.py
+++ b/tests/unit_tests/transformer/test_thd_correctness.py
@@ -30,6 +30,7 @@
 import torch.nn as nn
 
 from megatron.core import parallel_state
+from megatron.core.models.common.embeddings.rotary_pos_embedding import RotaryEmbedding
 from megatron.core.models.gpt.gpt_layer_specs import get_gpt_layer_with_transformer_engine_spec
 from megatron.core.packed_seq_params import PackedSeqParams
 from megatron.core.tensor_parallel.random import model_parallel_cuda_manual_seed
@@ -149,15 +150,29 @@ def compute_sbhd_padded_max_len(
 
 
 def compute_thd_padded_seqlens(
-    seqlens: List[int], cp_size: int, tp_size: int, sp_enabled: bool, pad_to_max: bool = False
+    seqlens: List[int],
+    cp_size: int,
+    tp_size: int,
+    sp_enabled: bool,
+    pad_to_max: bool = False,
+    dynamic_cp: bool = False,
 ) -> List[int]:
     """Padded per-sequence lengths for THD.
 
     When pad_to_max=True, each sequence is padded to max(seqlens) so that
     total THD tokens = max_len * B, matching SBHD. This ensures TE GEMM
     kernels see identical M dimensions for bitwise comparison.
+
+    When dynamic_cp=True, pad to the global upper-bound CP size so that the
+    same packed layout works regardless of which dynamic CP sub-group the
+    sequence lands in.
     """
-    cp_divisor = 2 * cp_size if cp_size > 1 else 1
+    if dynamic_cp:
+        MAX_CP_SIZE = 8
+        effective_cp = max(cp_size, MAX_CP_SIZE)
+    else:
+        effective_cp = cp_size
+    cp_divisor = 2 * effective_cp if cp_size > 1 else 1
     if pad_to_max:
         max_len = _round_up(max(seqlens), cp_divisor)
         padded = [max_len] * len(seqlens)
@@ -181,6 +196,7 @@ def make_packed_seq_params(
     tp_size: int = 1,
     sp_enabled: bool = False,
     pad_to_max: bool = False,
+    dynamic_cp: bool = False,
 ) -> PackedSeqParams:
     """Create PackedSeqParams with cu_seqlens and cu_seqlens_padded."""
 
@@ -190,7 +206,9 @@ def to_cu_seqlens(lens):
             cu[i + 1] = cu[i] + l
         return cu.cuda()
 
-    padded = compute_thd_padded_seqlens(seqlens, cp_size, tp_size, sp_enabled, pad_to_max)
+    padded = compute_thd_padded_seqlens(
+        seqlens, cp_size, tp_size, sp_enabled, pad_to_max, dynamic_cp=dynamic_cp
+    )
     return PackedSeqParams(
         cu_seqlens_q=to_cu_seqlens(seqlens),
         cu_seqlens_kv=to_cu_seqlens(seqlens),
@@ -362,10 +380,21 @@ def shard_sbhd(tensor, cp_rank, cp_size, tp_rank, tp_size, sp_enabled):
 
 
 def shard_thd(
-    seq_data_list, seqlens, cp_rank, cp_size, tp_rank, tp_size, sp_enabled, H, pad_to_max=False
+    seq_data_list,
+    seqlens,
+    cp_rank,
+    cp_size,
+    tp_rank,
+    tp_size,
+    sp_enabled,
+    H,
+    pad_to_max=False,
+    dynamic_cp=False,
 ):
     """Shard per-sequence data into local THD [local_T, 1, H]."""
-    padded = compute_thd_padded_seqlens(seqlens, cp_size, tp_size, sp_enabled, pad_to_max)
+    padded = compute_thd_padded_seqlens(
+        seqlens, cp_size, tp_size, sp_enabled, pad_to_max, dynamic_cp=dynamic_cp
+    )
 
     chunks = []
     for data, sl, psl in zip(seq_data_list, seqlens, padded):
@@ -446,7 +475,7 @@ class _GatherTHD(torch.autograd.Function):
     """Gather THD outputs from all ranks with gradient support."""
 
     @staticmethod
-    def forward(ctx, local, seqlens, cp_size, tp_size, sp_enabled, H, pad_to_max):
+    def forward(ctx, local, seqlens, cp_size, tp_size, sp_enabled, H, pad_to_max, dynamic_cp):
         ctx.seqlens, ctx.cp_size, ctx.tp_size, ctx.sp_enabled, ctx.H = (
             seqlens,
             cp_size,
@@ -456,7 +485,9 @@ def forward(ctx, local, seqlens, cp_size, tp_size, sp_enabled, H, pad_to_max):
         )
         ctx.cp_rank = parallel_state.get_context_parallel_rank() if cp_size > 1 else 0
         ctx.tp_rank = parallel_state.get_tensor_model_parallel_rank()
-        ctx.padded = compute_thd_padded_seqlens(seqlens, cp_size, tp_size, sp_enabled, pad_to_max)
+        ctx.padded = compute_thd_padded_seqlens(
+            seqlens, cp_size, tp_size, sp_enabled, pad_to_max, dynamic_cp=dynamic_cp
+        )
 
         out = local
         if sp_enabled:
@@ -495,7 +526,7 @@ def backward(ctx, grad):
         if ctx.sp_enabled:
             seg = packed.shape[0] // ctx.tp_size
             packed = packed[ctx.tp_rank * seg : (ctx.tp_rank + 1) * seg]
-        return packed.unsqueeze(1).contiguous(), None, None, None, None, None, None
+        return packed.unsqueeze(1).contiguous(), None, None, None, None, None, None, None
 
 
 def gather_sbhd(local, cp_size, tp_size, sp_enabled):
@@ -504,8 +535,8 @@ def gather_sbhd(local, cp_size, tp_size, sp_enabled):
     return _GatherSBHD.apply(local, cp_size, tp_size, sp_enabled)
 
 
-def gather_thd(local, seqlens, cp_size, tp_size, sp_enabled, H, pad_to_max=False):
-    return _GatherTHD.apply(local, seqlens, cp_size, tp_size, sp_enabled, H, pad_to_max)
+def gather_thd(local, seqlens, cp_size, tp_size, sp_enabled, H, pad_to_max=False, dynamic_cp=False):
+    return _GatherTHD.apply(local, seqlens, cp_size, tp_size, sp_enabled, H, pad_to_max, dynamic_cp)
 
 
 # =============================================================================
@@ -647,3 +678,386 @@ def test_thd_format(tc: TestCase):
     if tc.forward_bitwise or tc.backward_bitwise:
         torch.use_deterministic_algorithms(False)
         os.environ.pop("NVTE_ALLOW_NONDETERMINISTIC_ALGO", None)
+
+
+# =============================================================================
+# Dynamic CP Test Infrastructure
+# =============================================================================
+
+
+@dataclass
+class DynamicCPAssignment:
+    """Per-rank assignment in the dynamic CP configuration.
+
+    local_cp_size: number of ranks in this rank's CP communicator.
+    seq_indices: indices into the test case's seqlens list that this rank processes.
+
+    Ranks sharing the same CP sub-group have identical DynamicCPAssignment values.
+    """
+
+    local_cp_size: int
+    seq_indices: List[int]
+
+
+@dataclass
+class DynamicCPTestCase:
+    """Test case for dynamic CP correctness.
+
+    Compares fixed CP (baseline) against dynamic CP where sub-groups of ranks
+    can process different sequences with different CP sizes.
+
+    dcp_assignments: one entry per DP×CP rank (len == dp_cp_world_size).
+    Ranks in the same sub-group share the same local_cp_size and seq_indices.
+    """
+
+    name: str
+    hidden_size: int
+    num_heads: int
+    num_kv_heads: int
+    ffn_hidden_size: int
+    seqlens: List[int]
+    tp_size: int
+    cp_size: int
+    sp_enabled: bool
+    dcp_assignments: List[DynamicCPAssignment]
+
+
+# Dynamic CP Test Cases
+# ---------------------
+# Each test runs two paths through the *same* TransformerLayer and compares
+# forward outputs + backward gradients (similarity check with TE attention).
+#
+# Parameters:
+#   cp_size — the CP size used for the *baseline* (fixed CP) path.  It also
+#   determines dp_size = world_size // (tp_size * cp_size), which controls how
+#   sequences are split across DP ranks in the baseline.  The dynamic CP path
+#   ignores this cp_size and instead uses the local_cp_size from each
+#   DynamicCPAssignment.
+#
+# Baseline (fixed CP):
+#   Sequences are evenly split across DP ranks (seqs_per_dp = len(seqlens) //
+#   dp_size).  Each DP rank runs standard CP (cp_size) on its subset:
+#   pad → zigzag shard → forward → gather → backward.
+#
+# Dynamic CP:
+#   dcp_assignments has one entry per DP×CP rank.  Ranks sharing a CP sub-group
+#   have identical (local_cp_size, seq_indices).  Each sub-group forms its own
+#   CP communicator and independently shards / gathers only the sequences
+#   assigned to it.
+#
+# Sequence lengths are intentionally non-powers-of-two (mostly primes) so
+# that padding to cp_divisor is always exercised.
+#
+# fmt: off
+_A = DynamicCPAssignment
+DYNAMIC_CP_TEST_CASES = [
+    # -------------------------------------------------------------------------
+    # Uniform: all dp_cp ranks share all seqs with larger local_cp_size.
+    # All 4 ranks form one sub-group → equivalent to fixed CP but via the
+    # dynamic CP code path.
+    # -------------------------------------------------------------------------
+    # tp=2, cp=2, world_size=8 → dp_cp_size=4, all ranks get same assignment
+    DynamicCPTestCase(
+        "dcp_uniform_tp2_cp2_sp",
+        4096, 32, 8, 14336,
+        [3947, 1999, 1037, 4091, 2111, 503],
+        tp_size=2, cp_size=2, sp_enabled=True,
+        dcp_assignments=[
+            _A(4, [0, 1, 2, 3, 4, 5]),  # dp_cp_rank 0
+            _A(4, [0, 1, 2, 3, 4, 5]),  # dp_cp_rank 1
+            _A(4, [0, 1, 2, 3, 4, 5]),  # dp_cp_rank 2
+            _A(4, [0, 1, 2, 3, 4, 5]),  # dp_cp_rank 3
+        ],
+    ),
+    # tp=1, cp=2, world_size=8 → dp_cp_size=8, all ranks get same assignment
+    DynamicCPTestCase(
+        "dcp_uniform_tp1_cp2",
+        1024, 16, 4, 4096,
+        [4001, 2039, 997, 511, 3967, 2053, 1009, 499],
+        tp_size=1, cp_size=2, sp_enabled=False,
+        dcp_assignments=[
+            _A(8, [0, 1, 2, 3, 4, 5, 6, 7]),  # dp_cp_rank 0
+            _A(8, [0, 1, 2, 3, 4, 5, 6, 7]),  # dp_cp_rank 1
+            _A(8, [0, 1, 2, 3, 4, 5, 6, 7]),  # dp_cp_rank 2
+            _A(8, [0, 1, 2, 3, 4, 5, 6, 7]),  # dp_cp_rank 3
+            _A(8, [0, 1, 2, 3, 4, 5, 6, 7]),  # dp_cp_rank 4
+            _A(8, [0, 1, 2, 3, 4, 5, 6, 7]),  # dp_cp_rank 5
+            _A(8, [0, 1, 2, 3, 4, 5, 6, 7]),  # dp_cp_rank 6
+            _A(8, [0, 1, 2, 3, 4, 5, 6, 7]),  # dp_cp_rank 7
+        ],
+    ),
+    # -------------------------------------------------------------------------
+    # Heterogeneous: sub-groups with different local_cp_size.
+    # Ranks are split into multiple CP sub-groups; some ranks process
+    # sequences alone (local_cp_size=1) while others cooperate (local_cp_size=2+).
+    # -------------------------------------------------------------------------
+    # tp=2, cp=4, world_size=8 → dp_cp_size=4
+    #   rank 0: alone (cp=1), rank 1: alone (cp=1), ranks 2-3: pair (cp=2)
+    DynamicCPTestCase(
+        "dcp_hetero_tp2_cp4_sp",
+        4096, 32, 8, 14336,
+        [4093, 2017, 3989, 2111, 1013, 509],
+        tp_size=2, cp_size=4, sp_enabled=True,
+        dcp_assignments=[
+            _A(1, [0]),              # dp_cp_rank 0: solo
+            _A(1, [1]),              # dp_cp_rank 1: solo
+            _A(2, [2, 3, 4, 5]),     # dp_cp_rank 2: pair with rank 3
+            _A(2, [2, 3, 4, 5]),     # dp_cp_rank 3: pair with rank 2
+        ],
+    ),
+    # tp=1, cp=4, world_size=8 → dp_cp_size=8
+    #   ranks 0,1: solo; ranks 2-3: pair; ranks 4,5: solo; ranks 6-7: pair
+    DynamicCPTestCase(
+        "dcp_hetero_tp1_cp4",
+        1024, 16, 4, 4096,
+        [4007, 2003, 3989, 2053, 4091, 2017, 1013, 503],
+        tp_size=1, cp_size=4, sp_enabled=False,
+        dcp_assignments=[
+            _A(1, [0]),          # dp_cp_rank 0: solo
+            _A(1, [1]),          # dp_cp_rank 1: solo
+            _A(2, [2, 3]),       # dp_cp_rank 2: pair with rank 3
+            _A(2, [2, 3]),       # dp_cp_rank 3: pair with rank 2
+            _A(1, [4]),          # dp_cp_rank 4: solo
+            _A(1, [5]),          # dp_cp_rank 5: solo
+            _A(2, [6, 7]),       # dp_cp_rank 6: pair with rank 7
+            _A(2, [6, 7]),       # dp_cp_rank 7: pair with rank 6
+        ],
+    ),
+    # -------------------------------------------------------------------------
+    # Mixed: cp4 + cp2 + cp1 + cp1, baseline fixed cp=2.
+    # tp=1, cp=2, world_size=8 → dp_cp_size=8, dp_size=4
+    #   ranks 0-3: quad (cp=4), ranks 4-5: pair (cp=2), rank 6: solo, rank 7: solo
+    # -------------------------------------------------------------------------
+    DynamicCPTestCase(
+        "dcp_mixed_tp1_cp2",
+        1024, 16, 4, 4096,
+        [4007, 2003, 3989, 2053, 4091, 2017, 1013, 503],
+        tp_size=1, cp_size=2, sp_enabled=False,
+        dcp_assignments=[
+            _A(4, [0, 1, 2, 3]),     # dp_cp_rank 0: quad with ranks 1,2,3
+            _A(4, [0, 1, 2, 3]),     # dp_cp_rank 1: quad with ranks 0,2,3
+            _A(4, [0, 1, 2, 3]),     # dp_cp_rank 2: quad with ranks 0,1,3
+            _A(4, [0, 1, 2, 3]),     # dp_cp_rank 3: quad with ranks 0,1,2
+            _A(2, [4, 5]),           # dp_cp_rank 4: pair with rank 5
+            _A(2, [4, 5]),           # dp_cp_rank 5: pair with rank 4
+            _A(1, [6]),              # dp_cp_rank 6: solo
+            _A(1, [7]),              # dp_cp_rank 7: solo
+        ],
+    ),
+]
+# fmt: on
+
+
+# =============================================================================
+# Dynamic CP Gather (with explicit cp_group)
+# =============================================================================
+
+
+class _GatherTHDDynamic(torch.autograd.Function):
+    """Gather THD outputs from an explicit CP group with gradient support."""
+
+    @staticmethod
+    def forward(ctx, local, seqlens, cp_size, tp_size, sp_enabled, H, cp_group, cp_rank):
+        ctx.seqlens, ctx.cp_size, ctx.tp_size, ctx.sp_enabled, ctx.H = (
+            seqlens,
+            cp_size,
+            tp_size,
+            sp_enabled,
+            H,
+        )
+        ctx.cp_rank = cp_rank
+        ctx.tp_rank = parallel_state.get_tensor_model_parallel_rank()
+        ctx.padded = compute_thd_padded_seqlens(
+            seqlens, cp_size, tp_size, sp_enabled, False, dynamic_cp=True
+        )
+
+        out = local
+        if sp_enabled:
+            gathered = [torch.empty_like(out) for _ in range(tp_size)]
+            dist.all_gather(
+                gathered, out.contiguous(), group=parallel_state.get_tensor_model_parallel_group()
+            )
+            out = torch.cat(gathered, dim=0)
+
+        if cp_size > 1:
+            local_lens = [p // cp_size for p in ctx.padded]
+            offset, seqs = 0, []
+            for i, ll in enumerate(local_lens):
+                chunk = out[offset : offset + ll]
+                gathered = [torch.empty_like(chunk) for _ in range(cp_size)]
+                dist.all_gather(gathered, chunk.contiguous(), group=cp_group)
+                seqs.append(_zigzag_merge(gathered, cp_size)[: seqlens[i]])
+                offset += ll
+            out = torch.cat(seqs, dim=0)
+        else:
+            out = _strip_thd_padding(out, seqlens, ctx.padded)
+        return out
+
+    @staticmethod
+    def backward(ctx, grad):
+        offset, chunks = 0, []
+        for sl, psl in zip(ctx.seqlens, ctx.padded):
+            g = grad[offset : offset + sl, 0, :]
+            if psl > sl:
+                g = torch.cat([g, torch.zeros(psl - sl, ctx.H, dtype=g.dtype, device=g.device)])
+            chunks.append(_zigzag_split(g, ctx.cp_rank, ctx.cp_size))
+            offset += sl
+
+        packed = torch.cat(chunks, dim=0)
+        if ctx.sp_enabled:
+            seg = packed.shape[0] // ctx.tp_size
+            packed = packed[ctx.tp_rank * seg : (ctx.tp_rank + 1) * seg]
+        return packed.unsqueeze(1).contiguous(), None, None, None, None, None, None, None
+
+
+def gather_thd_dynamic(local, seqlens, cp_size, tp_size, sp_enabled, H, cp_group, cp_rank):
+    return _GatherTHDDynamic.apply(
+        local, seqlens, cp_size, tp_size, sp_enabled, H, cp_group, cp_rank
+    )
+
+
+# =============================================================================
+# Dynamic CP Test Function
+# =============================================================================
+
+
+@pytest.mark.parametrize("tc", DYNAMIC_CP_TEST_CASES, ids=lambda tc: tc.name)
+def test_dynamic_cp_format(tc: DynamicCPTestCase):
+    """Compare fixed CP THD vs dynamic CP THD format outputs and gradients."""
+    H, seqlens = tc.hidden_size, tc.seqlens
+    tp_size, cp_size, sp = tc.tp_size, tc.cp_size, tc.sp_enabled
+
+    Utils.initialize_model_parallel(
+        tensor_model_parallel_size=tp_size,
+        context_parallel_size=cp_size,
+        dynamic_context_parallel=True,
+    )
+    model_parallel_cuda_manual_seed(42)
+
+    layer = build_gpt_layer(
+        H,
+        tc.num_heads,
+        tc.num_kv_heads,
+        tc.ffn_hidden_size,
+        tp_size,
+        cp_size,
+        sp,
+        use_mock_attention=False,
+        deterministic=False,
+    )
+    kv_channels = H // tc.num_heads
+    rope = RotaryEmbedding(kv_channels=kv_channels, rotary_percent=1.0).cuda()
+
+    cp_rank = parallel_state.get_context_parallel_rank()
+    tp_rank = parallel_state.get_tensor_model_parallel_rank()
+    dp_rank = parallel_state.get_data_parallel_rank()
+    dp_size = parallel_state.get_data_parallel_world_size()
+
+    # All ranks generate identical full data (same seed, no dp_rank offset)
+    torch.manual_seed(42)
+    all_seq_data = [torch.randn(sl, H, dtype=torch.bfloat16).cuda() for sl in seqlens]
+    torch.manual_seed(142)
+    all_grad_data = [torch.randn(sl, H, dtype=torch.bfloat16).cuda() for sl in seqlens]
+
+    # === Baseline: fixed CP, THD format ===
+    assert (
+        len(seqlens) % dp_size == 0
+    ), f"Need len(seqlens)={len(seqlens)} divisible by dp_size={dp_size}"
+    seqs_per_dp = len(seqlens) // dp_size
+    base_indices = list(range(dp_rank * seqs_per_dp, (dp_rank + 1) * seqs_per_dp))
+    base_seqlens = [seqlens[i] for i in base_indices]
+    base_seq_data = [all_seq_data[i] for i in base_indices]
+    base_grad_data = [all_grad_data[i] for i in base_indices]
+
+    local_thd_base = shard_thd(
+        base_seq_data, base_seqlens, cp_rank, cp_size, tp_rank, tp_size, sp, H, dynamic_cp=True
+    )
+    packed_base = make_packed_seq_params(base_seqlens, cp_size, tp_size, sp, dynamic_cp=True)
+    rotary_pos_emb_base = rope(packed_base.max_seqlen_q, packed_seq=True)
+    input_base = local_thd_base.detach().clone().requires_grad_(True)
+    out_base, _ = layer(
+        hidden_states=input_base, packed_seq_params=packed_base, rotary_pos_emb=rotary_pos_emb_base
+    )
+    gathered_base = gather_thd(out_base, base_seqlens, cp_size, tp_size, sp, H, dynamic_cp=True)
+    grad_base = torch.cat(base_grad_data, dim=0).unsqueeze(1)
+    gathered_base.backward(grad_base)
+    baseline_grads = {n: p.grad.clone() for n, p in layer.named_parameters()}
+    layer.zero_grad()
+
+    # === Dynamic CP ===
+    dp_cp_group = parallel_state.get_data_parallel_group(with_context_parallel=True)
+    dp_cp_rank = dist.get_rank(group=dp_cp_group)
+
+    assert dp_cp_rank < len(
+        tc.dcp_assignments
+    ), f"dp_cp_rank={dp_cp_rank} out of range (len={len(tc.dcp_assignments)})"
+    my_assignment = tc.dcp_assignments[dp_cp_rank]
+    local_cp_size = my_assignment.local_cp_size
+    dcp_indices = my_assignment.seq_indices
+    dcp_seqlens = [seqlens[i] for i in dcp_indices]
+    dcp_seq_data = [all_seq_data[i] for i in dcp_indices]
+    dcp_grad_data = [all_grad_data[i] for i in dcp_indices]
+
+    dcp_cp_group = parallel_state.get_dynamic_data_context_parallel_groups(group_size=local_cp_size)
+    dcp_cp_rank = dist.get_rank(group=dcp_cp_group)
+
+    local_thd_dcp = shard_thd(
+        dcp_seq_data,
+        dcp_seqlens,
+        dcp_cp_rank,
+        local_cp_size,
+        tp_rank,
+        tp_size,
+        sp,
+        H,
+        dynamic_cp=True,
+    )
+    packed_dcp = make_packed_seq_params(dcp_seqlens, local_cp_size, tp_size, sp, dynamic_cp=True)
+    packed_dcp.local_cp_size = local_cp_size
+    packed_dcp.cp_group = dcp_cp_group
+    rotary_pos_emb_dcp = rope(packed_dcp.max_seqlen_q, packed_seq=True)
+
+    input_dcp = local_thd_dcp.detach().clone().requires_grad_(True)
+    out_dcp, _ = layer(
+        hidden_states=input_dcp, packed_seq_params=packed_dcp, rotary_pos_emb=rotary_pos_emb_dcp
+    )
+    gathered_dcp = gather_thd_dynamic(
+        out_dcp, dcp_seqlens, local_cp_size, tp_size, sp, H, dcp_cp_group, dcp_cp_rank
+    )
+    grad_dcp = torch.cat(dcp_grad_data, dim=0).unsqueeze(1)
+    gathered_dcp.backward(grad_dcp)
+    dcp_grads = {n: p.grad.clone() for n, p in layer.named_parameters()}
+
+    # === Gradient sync: reduce across all DP×CP ranks ===
+    for n in baseline_grads:
+        dist.all_reduce(baseline_grads[n], group=dp_cp_group)
+        dist.all_reduce(dcp_grads[n], group=dp_cp_group)
+    if sp:
+        tp_group = parallel_state.get_tensor_model_parallel_group()
+        for n, p in layer.named_parameters():
+            if getattr(p, "sequence_parallel", False):
+                dist.all_reduce(baseline_grads[n], group=tp_group)
+                dist.all_reduce(dcp_grads[n], group=tp_group)
+
+    # === Forward comparison (per-sequence, on ranks that have both) ===
+    common_indices = sorted(set(base_indices) & set(dcp_indices))
+    for seq_idx in common_indices:
+        sl = seqlens[seq_idx]
+        base_pos = base_indices.index(seq_idx)
+        base_offset = sum(base_seqlens[:base_pos])
+        dcp_pos = dcp_indices.index(seq_idx)
+        dcp_offset = sum(dcp_seqlens[:dcp_pos])
+        assert_close(
+            f"seq[{seq_idx}] output",
+            gathered_base[base_offset : base_offset + sl, 0].detach(),
+            gathered_dcp[dcp_offset : dcp_offset + sl, 0].detach(),
+            False,
+        )
+
+    # === Backward comparison ===
+    for n in baseline_grads:
+        if n in dcp_grads:
+            assert_close(f"grad[{n}]", baseline_grads[n], dcp_grads[n], False)
+
+    # === Cleanup ===
+    Utils.destroy_model_parallel()

From 9374a4d328641a2fbbad4fd41cf038b539dcf1e6 Mon Sep 17 00:00:00 2001
From: Hao Wu <skyw@users.noreply.github.com>
Date: Wed, 11 Mar 2026 17:27:11 -0700
Subject: [PATCH 311/334] Continue emerging optimizer refactoring (#3737)

Signed-off-by: Hao Wu <skyw@nvidia.com>
---
 .../core/optimizer/emerging_optimizers.py     | 350 ++++++++++--------
 megatron/core/optimizer/optimizer_config.py   |  13 +-
 megatron/training/arguments.py                |   8 +-
 pyproject.toml                                |   8 +-
 tests/unit_tests/dist_checkpointing/utils.py  |  10 +-
 ...timizer.py => test_emerging_optimizers.py} | 347 +++++++++++++++--
 uv.lock                                       | 306 ++++++++++++---
 7 files changed, 795 insertions(+), 247 deletions(-)
 rename tests/unit_tests/{test_muon_optimizer.py => test_emerging_optimizers.py} (66%)

diff --git a/megatron/core/optimizer/emerging_optimizers.py b/megatron/core/optimizer/emerging_optimizers.py
index 3cf36670fd3..25294beabdf 100644
--- a/megatron/core/optimizer/emerging_optimizers.py
+++ b/megatron/core/optimizer/emerging_optimizers.py
@@ -8,6 +8,7 @@
   3. Add an ``EmergingOptimizerEntry`` to ``_EMERGING_OPTIMIZERS`` at the bottom.
 """
 
+import inspect
 import logging
 from dataclasses import dataclass, field
 from typing import Any, Callable, Dict, List, Literal, Optional
@@ -21,15 +22,20 @@
 from .optimizer_config import ParamKey, ParamPredicate
 
 try:
+    from emerging_optimizers import registry
     from emerging_optimizers.orthogonalized_optimizers import (
         OrthogonalizedOptimizer,
         get_muon_scale_factor,
     )
     from emerging_optimizers.orthogonalized_optimizers.muon_utils import newton_schulz_tp
 
+    # It is necessary to import SOAP for the registry to work.
+    from emerging_optimizers.soap import SOAP  # pylint: disable=unused-import
+
     HAVE_EMERGING_OPTIMIZERS = True
 except ImportError:
     HAVE_EMERGING_OPTIMIZERS = False
+    OrthogonalizedOptimizer = object
 
 
 logger = logging.getLogger(__name__)
@@ -54,14 +60,19 @@ class EmergingOptimizerEntry:
 
     optimizer_cls: type
     init_state_fn: Callable
-    config_to_kwargs: Callable
+    config_to_kwargs: Callable | None
     default_param_overrides: Dict[ParamKey, Dict[str, Any]] = field(default_factory=dict)
 
 
 def _create_emerging_optimizer(config, param_groups, eopt_name, model_chunks, pg_collection):
     """Instantiate an emerging optimizer and return it with its init_state_fn."""
     entry = _EMERGING_OPTIMIZERS[eopt_name]
-    eopt_kwargs = entry.config_to_kwargs(config, model_chunks, pg_collection)
+    if entry.config_to_kwargs is not None:
+        eopt_kwargs = entry.config_to_kwargs(config, model_chunks, pg_collection)
+    else:
+        eopt_kwargs = _default_adam_based_eopt_config_to_kwargs(
+            eopt_name, config, model_chunks, pg_collection
+        )
     optimizer = entry.optimizer_cls(param_groups, **eopt_kwargs)
     return optimizer, entry.init_state_fn
 
@@ -96,159 +107,180 @@ def _get_qkv_split_shapes(model_cfg) -> List[int]:
 # Muon
 # ===========================================================================
 
-if HAVE_EMERGING_OPTIMIZERS:
 
-    class TensorParallelMuon(OrthogonalizedOptimizer):
-        """Tensor Parallel Muon optimizer."""
-
-        def __init__(
-            self,
-            params: ParamsT,
-            lr: float = 3e-4,
-            momentum_beta: float = 0.95,
-            use_nesterov: bool = True,
-            weight_decay: float = 0.01,
-            use_decoupled_weight_decay: bool = True,
-            split_qkv: bool = False,
-            is_qkv_fn: Callable[[torch.Tensor], bool] | None = None,
-            qkv_split_shapes: tuple[int, int, int] | None = None,
-            fp32_matmul_prec: str = "medium",
-            coefficient_type: str = "quintic",
-            num_ns_steps: int = 5,
-            scale_mode: str = "spectral",
-            extra_scale_factor: float = 1.0,
-            pg_collection: Optional[ProcessGroupCollection] = None,
-            mode: Literal["blockwise", "duplicated", "distributed"] = "duplicated",
-        ) -> None:
-            if num_ns_steps < 1:
-                raise ValueError(f"num_ns_steps must be at least 1, got {num_ns_steps}")
-
-            def scaled_orthogonalize_fn(
-                grad: torch.Tensor,
-                tp_group: torch.distributed.ProcessGroup,
-                partition_dim: int | None = None,
-            ) -> torch.Tensor:
-                log_single_rank(
-                    logger,
-                    logging.DEBUG,
-                    f'Orthogonalizing grad with {num_ns_steps} steps, '
-                    f'{coefficient_type} coefficient, '
-                    f'{scale_mode} scale mode, extra_scale_factor={extra_scale_factor}',
-                )
-                size = [grad.size(-2), grad.size(-1)]
-                if partition_dim is not None:
-                    size[partition_dim] *= get_pg_size(tp_group)
-                orth_grad = newton_schulz_tp(
-                    grad,
-                    steps=num_ns_steps,
-                    coefficient_type=coefficient_type,
-                    tp_group=tp_group,
-                    partition_dim=partition_dim,
-                    mode="duplicated" if mode == "blockwise" else mode,
-                )
-                scale_factor = get_muon_scale_factor(size[0], size[1], mode=scale_mode)
-                return orth_grad * scale_factor * extra_scale_factor
-
-            self.pg_collection = pg_collection
-            self.mode = mode
-            self.split_qkv = split_qkv
-            self.is_qkv_fn = is_qkv_fn
-            self.qkv_split_shapes = qkv_split_shapes
-
-            weight_decay_method = "decoupled" if use_decoupled_weight_decay else "l2"
-            super().__init__(
-                params,
-                lr,
-                momentum_beta,
-                use_nesterov=use_nesterov,
-                weight_decay=weight_decay,
-                weight_decay_method=weight_decay_method,
-                fp32_matmul_prec=fp32_matmul_prec,
-                scaled_orthogonalize_fn=scaled_orthogonalize_fn,
+class TensorParallelMuon(OrthogonalizedOptimizer):
+    """Tensor Parallel Muon optimizer."""
+
+    def __init__(
+        self,
+        params: ParamsT,
+        lr: float = 3e-4,
+        momentum: float = 0.95,
+        nesterov: bool = True,
+        weight_decay: float = 0.01,
+        use_decoupled_weight_decay: bool = True,
+        split_qkv: bool = False,
+        is_qkv_fn: Callable[[torch.Tensor], bool] | None = None,
+        qkv_split_shapes: tuple[int, int, int] | None = None,
+        fp32_matmul_prec: str = "medium",
+        coefficient_type: str = "quintic",
+        num_ns_steps: int = 5,
+        scale_mode: str = "spectral",
+        extra_scale_factor: float = 1.0,
+        pg_collection: Optional[ProcessGroupCollection] = None,
+        tp_mode: Literal["blockwise", "duplicated", "distributed"] = "duplicated",
+    ) -> None:
+        if num_ns_steps < 1:
+            raise ValueError(f"num_ns_steps must be at least 1, got {num_ns_steps}")
+
+        def scaled_orthogonalize_fn(
+            grad: torch.Tensor,
+            tp_group: torch.distributed.ProcessGroup,
+            partition_dim: int | None = None,
+        ) -> torch.Tensor:
+            log_single_rank(
+                logger,
+                logging.DEBUG,
+                f'Orthogonalizing grad with {num_ns_steps} steps, '
+                f'{coefficient_type} coefficient, '
+                f'{scale_mode} scale mode, extra_scale_factor={extra_scale_factor}',
+            )
+            size = [grad.size(-2), grad.size(-1)]
+            if partition_dim is not None:
+                size[partition_dim] *= get_pg_size(tp_group)
+            orth_grad = newton_schulz_tp(
+                grad,
+                steps=num_ns_steps,
+                coefficient_type=coefficient_type,
+                tp_group=tp_group,
+                partition_dim=partition_dim,
+                tp_mode="duplicated" if tp_mode == "blockwise" else tp_mode,
             )
+            scale_factor = get_muon_scale_factor(size[0], size[1], mode=scale_mode)
+            return orth_grad * scale_factor * extra_scale_factor
+
+        self.pg_collection = pg_collection
+        self.tp_mode = tp_mode
+        self.split_qkv = split_qkv
+        self.is_qkv_fn = is_qkv_fn
+        self.qkv_split_shapes = qkv_split_shapes
+
+        weight_decay_method = "decoupled" if use_decoupled_weight_decay else "l2"
+        super().__init__(
+            params,
+            lr,
+            momentum,
+            nesterov=nesterov,
+            weight_decay=weight_decay,
+            weight_decay_method=weight_decay_method,
+            fp32_matmul_prec=fp32_matmul_prec,
+            scaled_orthogonalize_fn=scaled_orthogonalize_fn,
+        )
+
+    def orthogonalize(self, p: torch.Tensor, grad: torch.Tensor, **kwargs: Any) -> torch.Tensor:
+        """Orthogonalize the momentum.
+
+        Args:
+            p: The parameter tensor. i is necessary to pass param tensor in addition to
+                momentum because a lot of information is only available in the param tensor,
+                attributes for example.
+            grad: The momentum tensor.
+
+        Returns:
+            The orthogonalized gradient tensor.
+        """
+        # TODO(deyuf): switch to group
+        if self.pg_collection:
+            tp_group = (
+                self.pg_collection.expt_tp
+                if getattr(p, 'expert_tp', False)
+                else self.pg_collection.tp
+            )
+        else:
+            tp_group = None
+        partition_dim = None if self.tp_mode == "blockwise" else getattr(p, "partition_dim", None)
+        if partition_dim == -1:
+            partition_dim = None
+
+        if self.split_qkv and self.is_qkv_fn(p):  # type: ignore[misc]
+            grad_shape = grad.shape
+            log_single_rank(
+                logger,
+                logging.DEBUG,
+                f'qkv split grad shape {grad_shape}, ' f'split shapes {self.qkv_split_shapes}',
+            )
+            num_query_groups = grad_shape[0] // sum(self.qkv_split_shapes)
+            qkv_grads = torch.split(
+                grad.view(num_query_groups, sum(self.qkv_split_shapes), -1),
+                self.qkv_split_shapes,
+                dim=1,
+            )
+            qkv_grads = [g.reshape(-1, grad_shape[-1]) for g in qkv_grads]
 
-        def orthogonalize(self, p: torch.Tensor, grad: torch.Tensor, **kwargs: Any) -> torch.Tensor:
-            """Orthogonalize the momentum.
-
-            Args:
-                p: The parameter tensor. i is necessary to pass param tensor in addition to
-                    momentum because a lot of information is only available in the param tensor,
-                    attributes for example.
-                grad: The momentum tensor.
-
-            Returns:
-                The orthogonalized gradient tensor.
-            """
-            # TODO(deyuf): switch to group
-            if self.pg_collection:
-                tp_group = (
-                    self.pg_collection.expt_tp
-                    if getattr(p, 'expert_tp', False)
-                    else self.pg_collection.tp
-                )
-            else:
-                tp_group = None
-            partition_dim = None if self.mode == "blockwise" else getattr(p, "partition_dim", None)
-            if partition_dim == -1:
-                partition_dim = None
-
-            if self.split_qkv and self.is_qkv_fn(p):  # type: ignore[misc]
-                grad_shape = grad.shape
-                log_single_rank(
-                    logger,
-                    logging.DEBUG,
-                    f'qkv split grad shape {grad_shape}, ' f'split shapes {self.qkv_split_shapes}',
-                )
-                num_query_groups = grad_shape[0] // sum(self.qkv_split_shapes)
-                qkv_grads = torch.split(
-                    grad.view(num_query_groups, sum(self.qkv_split_shapes), -1),
-                    self.qkv_split_shapes,
-                    dim=1,
+            qkv_grads = [
+                self.scaled_orthogonalize_fn(g, tp_group, partition_dim).view(
+                    num_query_groups, -1, grad_shape[-1]
                 )
-                qkv_grads = [g.reshape(-1, grad_shape[-1]) for g in qkv_grads]
+                for g in qkv_grads
+            ]
+            grad = torch.cat(qkv_grads, dim=1).view(grad_shape)
+        else:
+            grad = self.scaled_orthogonalize_fn(grad, tp_group, partition_dim)
+        return grad
 
-                qkv_grads = [
-                    self.scaled_orthogonalize_fn(g, tp_group, partition_dim).view(
-                        num_query_groups, -1, grad_shape[-1]
-                    )
-                    for g in qkv_grads
-                ]
-                grad = torch.cat(qkv_grads, dim=1).view(grad_shape)
-            else:
-                grad = self.scaled_orthogonalize_fn(grad, tp_group, partition_dim)
-            return grad
-
-    def _muon_init_state_fn(opt, config=None):
-        """Initialize Muon optimizer state for torch_dist checkpoint format."""
-        for group in opt.param_groups:
-            for p in group['params']:
-                if len(opt.state[p]) == 0:
-                    opt.state[p]['momentum_buffer'] = torch.zeros_like(p.data)
-
-    def _muon_config_to_kwargs(config, model_chunks, pg_collection) -> Dict[str, Any]:
-        """Convert OptimizerConfig to TensorParallelMuon constructor kwargs."""
-        return {
-            "lr": config.lr,
-            "weight_decay": config.weight_decay,
-            "momentum_beta": config.muon_momentum,
-            "use_nesterov": config.muon_use_nesterov,
-            "fp32_matmul_prec": config.muon_fp32_matmul_prec,
-            "num_ns_steps": config.muon_num_ns_steps,
-            "scale_mode": config.muon_scale_mode,
-            "extra_scale_factor": config.muon_extra_scale_factor,
-            "mode": config.muon_tp_mode,
-            "split_qkv": config.muon_split_qkv,
-            "is_qkv_fn": lambda p: getattr(p, "is_qkv", False),
-            "qkv_split_shapes": _get_qkv_split_shapes(model_chunks[0].config),
-            "pg_collection": pg_collection,
-        }
-
-    # -----------------------------------------------------------------------
-    # Register Muon
-    # -----------------------------------------------------------------------
-    _EMERGING_OPTIMIZERS['muon'] = EmergingOptimizerEntry(
+
+def _eopt_init_state_fn(opt, config=None):
+    """Initialize emerging optimizer state for torch_dist checkpoint format."""
+    for group in opt.param_groups:
+        opt._init_group(group)
+
+
+def _kwargs_from_config(optimizer_cls: type, prefix: str, config) -> Dict[str, Any]:
+    """Match ``optimizer_cls.__init__`` parameters to config attributes.
+
+    For each init parameter, looks for ``{prefix}_{name}`` on *config* first,
+    then falls back to ``{name}`` (unprefixed).  ``self`` and ``params`` are
+    always skipped.
+    """
+    skip_params = {"self", "params"}
+    sig = inspect.signature(optimizer_cls.__init__)
+    kwargs: Dict[str, Any] = {}
+    for name in sig.parameters:
+        if name in skip_params:
+            continue
+        prefixed = f"{prefix}_{name}"
+        if hasattr(config, prefixed):
+            kwargs[name] = getattr(config, prefixed)
+        elif hasattr(config, name):
+            kwargs[name] = getattr(config, name)
+    return kwargs
+
+
+def _muon_config_to_kwargs(config, model_chunks, pg_collection) -> Dict[str, Any]:
+    """Convert OptimizerConfig to TensorParallelMuon constructor kwargs."""
+    kwargs = _kwargs_from_config(TensorParallelMuon, "muon", config)
+    kwargs["is_qkv_fn"] = lambda p: getattr(p, "is_qkv", False)
+    kwargs["qkv_split_shapes"] = _get_qkv_split_shapes(model_chunks[0].config)
+    kwargs["pg_collection"] = pg_collection
+    return kwargs
+
+
+def _default_adam_based_eopt_config_to_kwargs(
+    eopt_name, config, model_chunks, pg_collection
+) -> Dict[str, Any]:
+    """Convert OptimizerConfig to default emerging optimizer constructor kwargs."""
+    kwargs = _kwargs_from_config(registry.get_optimizer_cls(eopt_name), eopt_name, config)
+    kwargs["betas"] = (config.adam_beta1, config.adam_beta2)
+    return kwargs
+
+
+# -----------------------------------------------------------------------
+# Register emerging optimizers
+# -----------------------------------------------------------------------
+_EMERGING_OPTIMIZERS = {
+    'muon': EmergingOptimizerEntry(
         optimizer_cls=TensorParallelMuon,
-        init_state_fn=_muon_init_state_fn,
+        init_state_fn=_eopt_init_state_fn,
         config_to_kwargs=_muon_config_to_kwargs,
         default_param_overrides={
             ParamKey(
@@ -258,3 +290,23 @@ def _muon_config_to_kwargs(config, model_chunks, pg_collection) -> Dict[str, Any
             ): {'optimizer': 'adam'}
         },
     )
+}
+
+# Register soap with default config
+# TODO(skyw): register all emerging optimizers.
+if HAVE_EMERGING_OPTIMIZERS:
+    for eopt_name in ["soap"]:
+        if eopt_name in _EMERGING_OPTIMIZERS:
+            continue
+        _EMERGING_OPTIMIZERS[eopt_name] = EmergingOptimizerEntry(
+            optimizer_cls=registry.get_optimizer_cls(eopt_name),
+            init_state_fn=_eopt_init_state_fn,
+            config_to_kwargs=None,
+            default_param_overrides={
+                ParamKey(
+                    predicate=ParamPredicate(
+                        name="nonlinear_or_embedding", fn=_is_nonlinear_or_embedding
+                    )
+                ): {'optimizer': 'adam'}
+            },
+        )
diff --git a/megatron/core/optimizer/optimizer_config.py b/megatron/core/optimizer/optimizer_config.py
index 4b43e7b5c08..e10fd7852c7 100644
--- a/megatron/core/optimizer/optimizer_config.py
+++ b/megatron/core/optimizer/optimizer_config.py
@@ -255,14 +255,14 @@ class OptimizerConfig:
     sgd_momentum: float = 0.9
     """Momentum factor for SGD optimizer."""
 
-    # Muon / emerging optimizers.
+    # emerging optimizers.
     muon_momentum: float = 0.95
     """The momentum used by the internal SGD in Muon optimizer."""
 
     muon_split_qkv: bool = True
     """Whether to split QKV parameters for Muon optimizer."""
 
-    muon_use_nesterov: bool = False
+    muon_nesterov: bool = False
     """Whether to use Nesterov-style momentum in the internal SGD."""
 
     muon_scale_mode: str = "spectral"
@@ -280,6 +280,15 @@ class OptimizerConfig:
     muon_extra_scale_factor: float = 1.0
     """Additional scale factor for the muon update."""
 
+    soap_shampoo_beta: float = 0.95
+    """The beta parameter for the Shampoo preconditioner."""
+
+    soap_precondition_frequency: int = 1
+    """The frequency of the Shampoo preconditioner."""
+
+    soap_use_kl_shampoo: bool = True
+    """Whether to use the KL-Shampoo preconditioner."""
+
     #######################
     # Distributed optimizer
     #######################
diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py
index b4691091be9..eb91fa11cc0 100644
--- a/megatron/training/arguments.py
+++ b/megatron/training/arguments.py
@@ -1323,8 +1323,8 @@ def validate_args(args, defaults={}):
         args.no_load_optim = True
         warn_rank_0('enabling --no-load-optim when skipping training.')
 
-    # Muon / emerging optimizer check
-    if args.optimizer in ('muon', 'dist_muon'):
+    # emerging optimizer check
+    if args.optimizer not in ('sgd', 'adam'):
         if args.optimizer == 'dist_muon':
             warn_rank_0(
                 "optimizer='dist_muon' is deprecated. "
@@ -2047,7 +2047,7 @@ def _add_regularization_args(parser):
     group.add_argument('--muon-no-split-qkv', action='store_false', default=True,
                        dest='muon_split_qkv',
                        help='Whether to split QKV parameters for Muon optimizer')
-    group.add_argument('--muon-use-nesterov', action='store_true',
+    group.add_argument('--muon-nesterov', action='store_true',
                        help='Whether to use Nesterov-style momentum in the internal SGD')
     group.add_argument('--muon-scale-mode', type=str, default='spectral',
                        choices=['spectral', 'unit_rms_norm', 'shape_scaling'],
@@ -2256,7 +2256,7 @@ def _add_training_args(parser):
                        help='use FlashAttention implementation of attention. '
                        'https://arxiv.org/abs/2205.14135')
     group.add_argument('--optimizer', type=str, default='adam',
-                       choices=['adam', 'sgd', 'muon', 'dist_muon'],
+                       choices=['adam', 'sgd', 'muon', 'dist_muon', 'soap'],
                        help='Optimizer function. '
                             'Note: dist_muon is deprecated; use --optimizer muon '
                             'with --use-distributed-optimizer instead.')
diff --git a/pyproject.toml b/pyproject.toml
index d39c9a011fc..52a168aaa3a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -93,7 +93,7 @@ dev = [
     "onnxscript",
     "fastapi~=0.50",                                          # Forcing a little bit more recent version of fastapi to be compatible with pydantic 2.0
     "datasets",
-    "emerging_optimizers",
+    "emerging_optimizers; python_version >= '3.12'",
     "flask[async]",
     "hypercorn",
     "openai",
@@ -116,7 +116,7 @@ lts = [
     "onnxscript",
     "fastapi~=0.50",                      # Forcing a little bit more recent version of fastapi to be compatible with pydantic 2.0
     "datasets",
-    "emerging_optimizers",
+    "emerging_optimizers; python_version >= '3.12'",
 ]
 
 [dependency-groups]
@@ -160,7 +160,7 @@ linting = [
     "pylint==3.2.6",
 ]
 ci = ["python-gitlab", "slack-sdk", "pandas"]
-no_pypi_wheels = ["emerging_optimizers", "fast-hadamard-transform"]
+no_pypi_wheels = ["emerging_optimizers; python_version >= '3.12'", "fast-hadamard-transform"]
 
 [tool.uv]
 default-groups = ["linting", "build", "test"]
@@ -190,7 +190,7 @@ flash_mla = [
 ]
 transformer-engine = { git = "https://github.com/NVIDIA/TransformerEngine.git", rev = "5671fd3675906cda1ade26c24a65d3dedd88eb89" }
 nemo-run = { git = "https://github.com/NVIDIA-NeMo/Run.git", rev = "01a9a8ba360f7b2908728ad0516e0ad9d936966d" }
-emerging_optimizers = { git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git", rev = "v0.1.0" }
+emerging_optimizers = { git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git", rev = "a8faf641d5fca6a0515cfc010b6cedbf488cc33a" }
 fast-hadamard-transform = { git = "https://github.com/Dao-AILab/fast-hadamard-transform.git", rev = "f134af63deb2df17e1171a9ec1ea4a7d8604d5ca" }
 
 [tool.isort]
diff --git a/tests/unit_tests/dist_checkpointing/utils.py b/tests/unit_tests/dist_checkpointing/utils.py
index cf6662c72bf..5d7d42d9152 100644
--- a/tests/unit_tests/dist_checkpointing/utils.py
+++ b/tests/unit_tests/dist_checkpointing/utils.py
@@ -212,7 +212,10 @@ def setup_model_and_optimizer(
 
     if isinstance(optimizer, ChainedOptimizer):
         for opt in optimizer.chained_optimizers:
-            opt.init_state_fn(opt)
+            if not hasattr(opt, 'optimizer'):
+                opt.init_state_fn(opt)
+            else:
+                opt.init_state_fn(opt.optimizer)
     else:
         for group in optimizer.optimizer.param_groups:
             for p in group['params']:
@@ -308,7 +311,10 @@ def setup_moe_model_and_optimizer(
 
     if optimizer_type in ('muon', 'dist_muon'):
         for opt in optimizer.chained_optimizers:
-            opt.init_state_fn(opt)
+            if not hasattr(opt, 'optimizer'):
+                opt.init_state_fn(opt)
+            else:
+                opt.init_state_fn(opt.optimizer)
     else:
         for opt in optimizer.chained_optimizers:
             for group in opt.param_groups:
diff --git a/tests/unit_tests/test_muon_optimizer.py b/tests/unit_tests/test_emerging_optimizers.py
similarity index 66%
rename from tests/unit_tests/test_muon_optimizer.py
rename to tests/unit_tests/test_emerging_optimizers.py
index 86d75ee7a49..8fbac85c99f 100644
--- a/tests/unit_tests/test_muon_optimizer.py
+++ b/tests/unit_tests/test_emerging_optimizers.py
@@ -11,15 +11,20 @@
 from megatron.core import parallel_state
 from megatron.core.distributed import DistributedDataParallel, DistributedDataParallelConfig
 from megatron.core.optimizer import OptimizerConfig, get_megatron_optimizer
-from megatron.core.optimizer.emerging_optimizers import TensorParallelMuon
+from megatron.core.optimizer.emerging_optimizers import HAVE_EMERGING_OPTIMIZERS, TensorParallelMuon
 from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.transformer import TransformerConfig
 from tests.unit_tests.test_utilities import Utils
 
+if HAVE_EMERGING_OPTIMIZERS:
+    from emerging_optimizers.soap import SOAP
+else:
+    SOAP = None
+
 # Skip all tests in this file for LTS versions
 pytestmark = pytest.mark.skipif(
     Version(os.getenv('NVIDIA_PYTORCH_VERSION', "24.01")) <= Version("25.05"),
-    reason="Skip muon optimizer for LTS test",
+    reason="Skip emerging optimizer tests for LTS test",
 )
 
 
@@ -41,6 +46,11 @@ def forward(self, x):
         return x
 
 
+# ===========================================================================
+# Muon optimizer tests
+# ===========================================================================
+
+
 def test_muon_optimizer_smoke():
     """Smoke test for TensorParallelMuon optimizer."""
     # Create a simple linear model for testing
@@ -52,8 +62,8 @@ def test_muon_optimizer_smoke():
     optimizer = TensorParallelMuon(
         params=[model.weight],
         lr=0.01,
-        momentum_beta=0.95,
-        use_nesterov=True,
+        momentum=0.95,
+        nesterov=True,
         weight_decay=0.01,
         use_decoupled_weight_decay=True,
         split_qkv=False,
@@ -62,7 +72,7 @@ def test_muon_optimizer_smoke():
         scale_mode="spectral",
         extra_scale_factor=1.0,
         pg_collection=None,
-        mode="duplicated",
+        tp_mode="duplicated",
     )
 
     # Test basic properties
@@ -147,7 +157,7 @@ def test_get_megatron_optimizer_smoke(self):
             bf16=True,
             use_distributed_optimizer=False,  # Muon doesn't support distributed optimizer
             muon_momentum=0.95,
-            muon_use_nesterov=True,
+            muon_nesterov=True,
             muon_fp32_matmul_prec="medium",
             muon_num_ns_steps=5,
             muon_scale_mode="spectral",
@@ -243,7 +253,7 @@ def test_get_megatron_optimizer_layer_wise(self):
             bf16=True,
             use_layer_wise_distributed_optimizer=True,
             muon_momentum=0.95,
-            muon_use_nesterov=True,
+            muon_nesterov=True,
             muon_fp32_matmul_prec="medium",
             muon_num_ns_steps=5,
             muon_scale_mode="spectral",
@@ -292,11 +302,11 @@ def test_muon_optimizer_different_modes_single_rank(mode):
     optimizer = TensorParallelMuon(
         params=[model.weight],
         lr=0.01,
-        momentum_beta=0.95,
+        momentum=0.95,
         weight_decay=0.0,  # Disable weight decay for deterministic comparison
         num_ns_steps=5,
         pg_collection=None,
-        mode=mode,
+        tp_mode=mode,
     )
 
     # Use fixed input for deterministic results
@@ -352,11 +362,11 @@ def create_tp_model_and_optimizer(self, mode):
         optimizer = TensorParallelMuon(
             params=[model.weight],
             lr=0.01,
-            momentum_beta=0.95,
+            momentum=0.95,
             weight_decay=0.0,
             num_ns_steps=5,
             pg_collection=pg_collection,
-            mode=mode,
+            tp_mode=mode,
         )
 
         return model, optimizer
@@ -418,7 +428,7 @@ def test_muon_optimizer_coefficient_types(coefficient_type_and_steps):
         coefficient_type=coefficient_type_and_steps[0],
         num_ns_steps=coefficient_type_and_steps[1],
         pg_collection=None,
-        mode="duplicated",
+        tp_mode="duplicated",
     )
 
     input_tensor = torch.randn(16, 80, dtype=torch.float32, device='cuda')
@@ -447,7 +457,7 @@ def test_muon_optimizer_scale_modes(scale_mode):
         scale_mode=scale_mode,
         num_ns_steps=5,
         pg_collection=None,
-        mode="duplicated",
+        tp_mode="duplicated",
     )
 
     input_tensor = torch.randn(16, 60, dtype=torch.float32, device='cuda')
@@ -463,8 +473,8 @@ def test_muon_optimizer_scale_modes(scale_mode):
     ), f"Weight should be updated with scale_mode={scale_mode}"
 
 
-@pytest.mark.parametrize("use_nesterov", [True, False])
-def test_muon_optimizer_nesterov(use_nesterov):
+@pytest.mark.parametrize("nesterov", [True, False])
+def test_muon_optimizer_nesterov(nesterov):
     """Test TensorParallelMuon optimizer with and without Nesterov momentum."""
     model = torch.nn.Linear(50, 25, bias=False, dtype=torch.float32, device='cuda')
     model.requires_grad_(True)
@@ -473,11 +483,11 @@ def test_muon_optimizer_nesterov(use_nesterov):
     optimizer = TensorParallelMuon(
         params=[model.weight],
         lr=0.01,
-        momentum_beta=0.9,
-        use_nesterov=use_nesterov,
+        momentum=0.9,
+        nesterov=nesterov,
         num_ns_steps=5,
         pg_collection=None,
-        mode="duplicated",
+        tp_mode="duplicated",
     )
 
     input_tensor = torch.randn(16, 50, dtype=torch.float32, device='cuda')
@@ -490,7 +500,7 @@ def test_muon_optimizer_nesterov(use_nesterov):
 
     assert not torch.equal(
         model.weight.data, original_weight
-    ), f"Weight should be updated with use_nesterov={use_nesterov}"
+    ), f"Weight should be updated with nesterov={nesterov}"
 
 
 def test_muon_optimizer_multiple_steps():
@@ -502,11 +512,11 @@ def test_muon_optimizer_multiple_steps():
     optimizer = TensorParallelMuon(
         params=[model.weight],
         lr=0.01,
-        momentum_beta=0.95,
+        momentum=0.95,
         weight_decay=0.01,
         num_ns_steps=5,
         pg_collection=None,
-        mode="duplicated",
+        tp_mode="duplicated",
     )
 
     weights_history = [model.weight.data.clone()]
@@ -552,7 +562,7 @@ def test_muon_optimizer_qkv_split():
         qkv_split_shapes=qkv_split_shapes,
         num_ns_steps=5,
         pg_collection=None,
-        mode="duplicated",
+        tp_mode="duplicated",
     )
 
     input_tensor = torch.randn(16, hidden_size, dtype=torch.float32, device='cuda')
@@ -576,7 +586,7 @@ def test_muon_optimizer_qkv_split():
         split_qkv=False,
         num_ns_steps=5,
         pg_collection=None,
-        mode="duplicated",
+        tp_mode="duplicated",
     )
 
     output = model(input_tensor)
@@ -608,7 +618,7 @@ def test_muon_optimizer_extra_scale_factor():
         extra_scale_factor=2.0,
         num_ns_steps=5,
         pg_collection=None,
-        mode="duplicated",
+        tp_mode="duplicated",
     )
 
     input_tensor = torch.randn(16, 80, dtype=torch.float32, device='cuda')
@@ -637,7 +647,7 @@ def test_muon_optimizer_num_ns_steps(num_ns_steps):
         coefficient_type="quintic",
         num_ns_steps=num_ns_steps,
         pg_collection=None,
-        mode="duplicated",
+        tp_mode="duplicated",
     )
 
     input_tensor = torch.randn(16, 60, dtype=torch.float32, device='cuda')
@@ -651,3 +661,290 @@ def test_muon_optimizer_num_ns_steps(num_ns_steps):
     assert not torch.equal(
         model.weight.data, original_weight
     ), f"Weight should be updated with num_ns_steps={num_ns_steps}"
+
+
+# ===========================================================================
+# SOAP optimizer tests
+# ===========================================================================
+
+skip_no_soap = pytest.mark.skipif(
+    not HAVE_EMERGING_OPTIMIZERS, reason="emerging_optimizers package not installed"
+)
+
+
+@skip_no_soap
+def test_soap_optimizer_smoke():
+    """Smoke test for SOAP optimizer."""
+
+    model = torch.nn.Linear(100, 50, bias=False, dtype=torch.float32, device='cuda')
+    model.requires_grad_(True)
+    model.weight.data.fill_(1.0)
+
+    optimizer = SOAP(
+        params=[model.weight],
+        lr=0.01,
+        betas=(0.9, 0.999),
+        shampoo_beta=0.95,
+        weight_decay=0.01,
+        precondition_frequency=1,
+    )
+
+    # Test basic properties
+    assert optimizer is not None, "Optimizer should not be None"
+    assert hasattr(optimizer, 'param_groups'), "Optimizer should have param_groups"
+    assert len(optimizer.param_groups) > 0, "Optimizer should have at least one parameter group"
+
+    # Test forward and backward pass
+    input_tensor = torch.randn(32, 100, dtype=torch.float32, device='cuda')
+    output = model(input_tensor)
+    loss = output.sum()
+    loss.backward()
+
+    # Store original weight
+    original_weight = model.weight.data.clone()
+
+    # Test optimizer step
+    optimizer.step()
+
+    # Verify weight was updated
+    assert not torch.equal(
+        model.weight.data, original_weight
+    ), "Weight should be updated after optimizer step"
+
+    # Test zero_grad
+    optimizer.zero_grad()
+    assert model.weight.grad is None or torch.all(
+        model.weight.grad == 0
+    ), "Gradients should be zeroed"
+
+    # Test state_dict and load_state_dict
+    state_dict = optimizer.state_dict()
+    assert 'state' in state_dict, "State dict should contain state"
+    assert 'param_groups' in state_dict, "State dict should contain param_groups"
+
+    # Load state dict should not raise error
+    optimizer.load_state_dict(state_dict)
+
+
+@skip_no_soap
+def test_soap_optimizer_multiple_steps():
+    """Test SOAP optimizer across multiple optimization steps."""
+    model = torch.nn.Linear(100, 50, bias=False, dtype=torch.float32, device='cuda')
+    model.requires_grad_(True)
+    model.weight.data.fill_(1.0)
+
+    optimizer = SOAP(
+        params=[model.weight],
+        lr=0.01,
+        betas=(0.9, 0.999),
+        shampoo_beta=0.95,
+        weight_decay=0.01,
+        precondition_frequency=1,
+    )
+
+    weights_history = [model.weight.data.clone()]
+
+    for i in range(3):
+        input_tensor = torch.randn(32, 100, dtype=torch.float32, device='cuda')
+        output = model(input_tensor)
+        loss = output.sum()
+        loss.backward()
+
+        optimizer.step()
+        optimizer.zero_grad()
+        weights_history.append(model.weight.data.clone())
+
+    # Verify weights changed at each step
+    for i in range(len(weights_history) - 1):
+        assert not torch.equal(
+            weights_history[i], weights_history[i + 1]
+        ), f"Weight should change at step {i}"
+
+
+@skip_no_soap
+@pytest.mark.parametrize("precondition_frequency", [1, 5, 10])
+def test_soap_optimizer_precondition_frequency(precondition_frequency):
+    """Test SOAP optimizer with different precondition frequencies."""
+
+    model = torch.nn.Linear(60, 30, bias=False, dtype=torch.float32, device='cuda')
+    model.requires_grad_(True)
+    model.weight.data.fill_(1.0)
+
+    optimizer = SOAP(
+        params=[model.weight],
+        lr=0.01,
+        betas=(0.9, 0.999),
+        shampoo_beta=0.95,
+        precondition_frequency=precondition_frequency,
+    )
+
+    input_tensor = torch.randn(16, 60, dtype=torch.float32, device='cuda')
+    output = model(input_tensor)
+    loss = output.sum()
+    loss.backward()
+
+    original_weight = model.weight.data.clone()
+    optimizer.step()
+
+    assert not torch.equal(
+        model.weight.data, original_weight
+    ), f"Weight should be updated with precondition_frequency={precondition_frequency}"
+
+
+@skip_no_soap
+@pytest.mark.parametrize("use_kl_shampoo", [True, False])
+def test_soap_optimizer_kl_shampoo(use_kl_shampoo):
+    """Test SOAP optimizer with and without KL-Shampoo preconditioner."""
+
+    model = torch.nn.Linear(60, 30, bias=False, dtype=torch.float32, device='cuda')
+    model.requires_grad_(True)
+    model.weight.data.fill_(1.0)
+
+    optimizer = SOAP(
+        params=[model.weight],
+        lr=0.01,
+        betas=(0.9, 0.999),
+        shampoo_beta=0.95,
+        use_kl_shampoo=use_kl_shampoo,
+        precondition_frequency=1,
+    )
+
+    input_tensor = torch.randn(16, 60, dtype=torch.float32, device='cuda')
+    output = model(input_tensor)
+    loss = output.sum()
+    loss.backward()
+
+    original_weight = model.weight.data.clone()
+    optimizer.step()
+
+    assert not torch.equal(
+        model.weight.data, original_weight
+    ), f"Weight should be updated with use_kl_shampoo={use_kl_shampoo}"
+
+
+@skip_no_soap
+@pytest.mark.parametrize("shampoo_beta", [0.5, 0.9, 0.99])
+def test_soap_optimizer_shampoo_beta(shampoo_beta):
+    """Test SOAP optimizer with different shampoo_beta values."""
+
+    model = torch.nn.Linear(60, 30, bias=False, dtype=torch.float32, device='cuda')
+    model.requires_grad_(True)
+    model.weight.data.fill_(1.0)
+
+    optimizer = SOAP(
+        params=[model.weight],
+        lr=0.01,
+        betas=(0.9, 0.999),
+        shampoo_beta=shampoo_beta,
+        precondition_frequency=1,
+    )
+
+    input_tensor = torch.randn(16, 60, dtype=torch.float32, device='cuda')
+    output = model(input_tensor)
+    loss = output.sum()
+    loss.backward()
+
+    original_weight = model.weight.data.clone()
+    optimizer.step()
+
+    assert not torch.equal(
+        model.weight.data, original_weight
+    ), f"Weight should be updated with shampoo_beta={shampoo_beta}"
+
+
+@pytest.mark.skipif(
+    int(os.getenv('WORLD_SIZE', '1')) == 1, reason="Multi-rank test requires WORLD_SIZE > 1"
+)
+class TestSoapOptimizerMultiRank:
+    """Test class for SOAP optimizer with multi-rank setup."""
+
+    @pytest.fixture(autouse=True)
+    def setup_and_teardown(self):
+        """Setup and teardown for each test."""
+        Utils.initialize_model_parallel()
+        yield
+        Utils.destroy_model_parallel()
+
+    def create_ddp_model(self, model):
+        """Wrap model in DDP."""
+        ddp_config = DistributedDataParallelConfig(use_distributed_optimizer=False)
+        return DistributedDataParallel(
+            TransformerConfig(num_attention_heads=1, num_layers=1), ddp_config, model
+        )
+
+    def test_get_megatron_optimizer_soap_smoke(self):
+        """Smoke test for get_megatron_optimizer with SOAP."""
+        model = Net().bfloat16().cuda()
+        model.requires_grad_(True)
+        model = self.create_ddp_model(model)
+
+        for param in model.parameters():
+            assert param.requires_grad, "All parameters should require gradients"
+
+        optimizer_config = OptimizerConfig(
+            optimizer='soap',
+            lr=0.01,
+            weight_decay=0.01,
+            bf16=True,
+            use_distributed_optimizer=False,
+            soap_shampoo_beta=0.95,
+            soap_precondition_frequency=1,
+            soap_use_kl_shampoo=True,
+        )
+
+        optimizer = get_megatron_optimizer(
+            config=optimizer_config, model_chunks=[model], use_gloo_process_groups=True
+        )
+
+        assert optimizer is not None, "Optimizer should not be None"
+        assert hasattr(optimizer, 'param_groups'), "Optimizer should have param_groups"
+        assert hasattr(optimizer, 'chained_optimizers'), "Should be a ChainedOptimizer"
+        assert len(optimizer.chained_optimizers) >= 1, "Should have at least one chained optimizer"
+
+        # Test forward and backward pass
+        input_tensor = torch.randn(16, 80, dtype=torch.bfloat16, device='cuda')
+        output = model(input_tensor)
+        loss = output.sum()
+        loss.backward()
+
+        # Store original parameters
+        original_params = {}
+        for name, param in model.named_parameters():
+            original_params[name] = param.data.clone()
+
+        # Test optimizer step
+        optimizer.step()
+
+        # Verify at least some parameters were updated
+        params_updated = 0
+        for name, param in model.named_parameters():
+            if not torch.equal(param.data, original_params[name]):
+                params_updated += 1
+
+        assert params_updated > 0, "At least some parameters should be updated after optimizer step"
+
+        # Test zero_grad
+        optimizer.zero_grad()
+        for param in model.parameters():
+            assert param.grad is None or torch.all(
+                param.grad == 0
+            ), "Gradients should be zeroed for all parameters"
+
+        # Test state_dict and load_state_dict
+        state_dict = optimizer.state_dict()
+        assert isinstance(state_dict, list), "State dict should be a list"
+        optimizer.load_state_dict(state_dict)
+
+    def test_get_megatron_optimizer_soap_validation(self):
+        """Test validation logic for get_megatron_optimizer with SOAP."""
+        model = torch.nn.Linear(100, 50, bias=False, dtype=torch.bfloat16, device='cuda')
+        model.requires_grad_(True)
+        model = self.create_ddp_model(model)
+
+        # FP16 should raise exception
+        optimizer_config_fp16 = OptimizerConfig(
+            optimizer='soap', lr=0.01, fp16=True, use_distributed_optimizer=False
+        )
+
+        with pytest.raises(Exception, match='emerging optimizer with fp16 is not supported'):
+            get_megatron_optimizer(config=optimizer_config_fp16, model_chunks=[model])
diff --git a/uv.lock b/uv.lock
index 433e8b3ea8e..08482b4b7b8 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1,5 +1,5 @@
 version = 1
-revision = 2
+revision = 3
 requires-python = ">=3.10"
 resolution-markers = [
     "python_full_version >= '3.14' and sys_platform == 'linux'",
@@ -107,7 +107,7 @@ source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "aiohappyeyeballs" },
     { name = "aiosignal" },
-    { name = "async-timeout", marker = "python_full_version < '3.11'" },
+    { name = "async-timeout", marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "attrs" },
     { name = "frozenlist" },
     { name = "multidict" },
@@ -247,7 +247,7 @@ version = "1.4.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "frozenlist" },
-    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
+    { name = "typing-extensions", marker = "python_full_version < '3.13' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/61/62/06741b579156360248d1ec624842ad0edf697050bbaf7c3e46394e106ad1/aiosignal-1.4.0.tar.gz", hash = "sha256:f47eecd9468083c2029cc99945502cb7708b082c232f9aca65da147157b251c7", size = 25007, upload-time = "2025-07-03T22:54:43.528Z" }
 wheels = [
@@ -301,10 +301,10 @@ name = "anyio"
 version = "4.9.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "exceptiongroup", marker = "python_full_version < '3.11'" },
+    { name = "exceptiongroup", marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "idna" },
     { name = "sniffio" },
-    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
+    { name = "typing-extensions", marker = "python_full_version < '3.13' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/95/7d/4c1bd541d4dffa1b52bd83fb8527089e097a106fc90b467a7313b105f840/anyio-4.9.0.tar.gz", hash = "sha256:673c0c244e15788651a4ff38710fea9675823028a6f08a5eda409e0c9840a028", size = 190949, upload-time = "2025-03-17T00:02:54.77Z" }
 wheels = [
@@ -749,7 +749,7 @@ name = "cffi"
 version = "2.0.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "pycparser", marker = "implementation_name != 'PyPy'" },
+    { name = "pycparser", marker = "implementation_name != 'PyPy' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/eb/56/b1ba7935a17738ae8453301356628e8147c79dbb825bcbc73dc7401f9846/cffi-2.0.0.tar.gz", hash = "sha256:44d1b5909021139fe36001ae048dbdde8214afa20200eda0f64c068cac5d5529", size = 523588, upload-time = "2025-09-08T23:24:04.541Z" }
 wheels = [
@@ -920,7 +920,7 @@ name = "click"
 version = "8.3.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "colorama", marker = "sys_platform == 'win32'" },
+    { name = "colorama", marker = "sys_platform == 'win32' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/3d/fa/656b739db8587d7b5dfa22e22ed02566950fbfbcdc20311993483657a5c0/click-8.3.1.tar.gz", hash = "sha256:12ff4785d337a1bb490bb7e9c2b1ee5da3112e94a8622f26a6c77f5d2fc6842a", size = 295065, upload-time = "2025-11-15T20:45:42.706Z" }
 wheels = [
@@ -1410,12 +1410,11 @@ wheels = [
 
 [[package]]
 name = "emerging-optimizers"
-version = "0.1.0"
-source = { git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git?rev=v0.1.0#d5363b4a418128cd8111983b191c4b8869a9766b" }
+version = "0.2.0"
+source = { git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git?rev=a8faf641d5fca6a0515cfc010b6cedbf488cc33a#a8faf641d5fca6a0515cfc010b6cedbf488cc33a" }
 dependencies = [
-    { name = "absl-py" },
-    { name = "torch", marker = "sys_platform == 'never'" },
-    { name = "typing-extensions" },
+    { name = "absl-py", marker = "python_full_version >= '3.12' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "torch", marker = "(python_full_version >= '3.12' and sys_platform == 'never') or (python_full_version < '3.12' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'never' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
 
 [[package]]
@@ -1423,7 +1422,7 @@ name = "exceptiongroup"
 version = "1.3.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
+    { name = "typing-extensions", marker = "python_full_version < '3.13' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/50/79/66800aadf48771f6b62f7eb014e352e5d06856655206165d775e675a02c9/exceptiongroup-1.3.1.tar.gz", hash = "sha256:8b412432c6055b0b7d14c310000ae93352ed6754f70fa8f7c34141f91c4e3219", size = 30371, upload-time = "2025-11-21T23:01:54.787Z" }
 wheels = [
@@ -2027,7 +2026,7 @@ dependencies = [
     { name = "filelock" },
     { name = "fsspec", version = "2025.10.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.14' or sys_platform != 'linux' or extra == 'extra-13-megatron-core-dev' or extra == 'extra-13-megatron-core-lts'" },
     { name = "fsspec", version = "2026.2.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.14' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "hf-xet", marker = "platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'" },
+    { name = "hf-xet", marker = "platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "packaging" },
     { name = "pyyaml" },
     { name = "requests" },
@@ -2462,7 +2461,7 @@ resolution-markers = [
     "python_full_version < '3.11' and sys_platform != 'linux'",
 ]
 dependencies = [
-    { name = "mdurl", marker = "python_full_version < '3.11'" },
+    { name = "mdurl", marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/38/71/3b932df36c1a044d397a1f92d1cf91ee0a503d91e470cbd670aa66b07ed0/markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb", size = 74596, upload-time = "2023-06-03T06:41:14.443Z" }
 wheels = [
@@ -2492,7 +2491,7 @@ resolution-markers = [
     "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
 ]
 dependencies = [
-    { name = "mdurl", marker = "python_full_version >= '3.11'" },
+    { name = "mdurl", marker = "python_full_version >= '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/5b/f5/4ec618ed16cc4f8fb3b701563655a69816155e79e24a17b651541804721d/markdown_it_py-4.0.0.tar.gz", hash = "sha256:cb0a2b4aa34f932c007117b194e945bd74e0ec24133ceb5bac59009cda1cb9f3", size = 73070, upload-time = "2025-08-11T12:57:52.854Z" }
 wheels = [
@@ -2631,7 +2630,7 @@ dev = [
     { name = "causal-conv1d" },
     { name = "datasets" },
     { name = "einops" },
-    { name = "emerging-optimizers" },
+    { name = "emerging-optimizers", marker = "python_full_version >= '3.12'" },
     { name = "fastapi" },
     { name = "flash-linear-attention" },
     { name = "flashinfer-python" },
@@ -2658,7 +2657,7 @@ lts = [
     { name = "causal-conv1d" },
     { name = "datasets" },
     { name = "einops" },
-    { name = "emerging-optimizers" },
+    { name = "emerging-optimizers", marker = "python_full_version >= '3.12'" },
     { name = "fastapi" },
     { name = "flashinfer-python" },
     { name = "mamba-ssm" },
@@ -2718,7 +2717,7 @@ linting = [
     { name = "ruff" },
 ]
 no-pypi-wheels = [
-    { name = "emerging-optimizers" },
+    { name = "emerging-optimizers", marker = "python_full_version >= '3.12' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "fast-hadamard-transform" },
 ]
 test = [
@@ -2749,8 +2748,8 @@ requires-dist = [
     { name = "datasets", marker = "extra == 'lts'" },
     { name = "einops", marker = "extra == 'dev'", specifier = "~=0.8" },
     { name = "einops", marker = "extra == 'lts'", specifier = "~=0.8" },
-    { name = "emerging-optimizers", marker = "extra == 'dev'", git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git?rev=v0.1.0" },
-    { name = "emerging-optimizers", marker = "extra == 'lts'", git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git?rev=v0.1.0" },
+    { name = "emerging-optimizers", marker = "python_full_version >= '3.12' and extra == 'dev'", git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git?rev=a8faf641d5fca6a0515cfc010b6cedbf488cc33a" },
+    { name = "emerging-optimizers", marker = "python_full_version >= '3.12' and extra == 'lts'", git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git?rev=a8faf641d5fca6a0515cfc010b6cedbf488cc33a" },
     { name = "fastapi", marker = "extra == 'dev'", specifier = "~=0.50" },
     { name = "fastapi", marker = "extra == 'lts'", specifier = "~=0.50" },
     { name = "flash-linear-attention", marker = "extra == 'dev'", specifier = "~=0.4.0" },
@@ -2824,7 +2823,7 @@ linting = [
     { name = "ruff", specifier = "~=0.9.0" },
 ]
 no-pypi-wheels = [
-    { name = "emerging-optimizers", git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git?rev=v0.1.0" },
+    { name = "emerging-optimizers", marker = "python_full_version >= '3.12'", git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git?rev=a8faf641d5fca6a0515cfc010b6cedbf488cc33a" },
     { name = "fast-hadamard-transform", git = "https://github.com/Dao-AILab/fast-hadamard-transform.git?rev=f134af63deb2df17e1171a9ec1ea4a7d8604d5ca" },
 ]
 test = [
@@ -3030,7 +3029,7 @@ name = "multidict"
 version = "6.7.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "typing-extensions", marker = "python_full_version < '3.11'" },
+    { name = "typing-extensions", marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/1a/c2/c2d94cbe6ac1753f3fc980da97b3d930efe1da3af3c9f5125354436c073d/multidict-6.7.1.tar.gz", hash = "sha256:ec6652a1bee61c53a3e5776b6049172c53b6aaba34f18c9ad04f82712bac623d", size = 102010, upload-time = "2026-01-26T02:46:45.979Z" }
 wheels = [
@@ -3572,6 +3571,59 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/8a/86/94188e03e5d4dd7b73c390b0cddcde5618b3799c18e327b2bf15763f6137/nvdlfw_inspect-0.2.2-py3-none-any.whl", hash = "sha256:8a4dc2814c5a4cd19ae304170b9bfa514538ef3c3eb243a45a82404ec3cb279d", size = 30964, upload-time = "2025-12-03T10:52:01.933Z" },
 ]
 
+[[package]]
+name = "nvidia-cublas-cu12"
+version = "12.8.4.1"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/29/99/db44d685f0e257ff0e213ade1964fc459b4a690a73293220e98feb3307cf/nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:b86f6dd8935884615a0683b663891d43781b819ac4f2ba2b0c9604676af346d0", size = 590537124, upload-time = "2025-03-07T01:43:53.556Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/61/e24b560ab2e2eaeb3c839129175fb330dfcfc29e5203196e5541a4c44682/nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:8ac4e771d5a348c551b2a426eda6193c19aa630236b418086020df5ba9667142", size = 594346921, upload-time = "2025-03-07T01:44:31.254Z" },
+    { url = "https://files.pythonhosted.org/packages/70/61/7d7b3c70186fb651d0fbd35b01dbfc8e755f69fd58f817f3d0f642df20c3/nvidia_cublas_cu12-12.8.4.1-py3-none-win_amd64.whl", hash = "sha256:47e9b82132fa8d2b4944e708049229601448aaad7e6f296f630f2d1a32de35af", size = 567544208, upload-time = "2025-03-07T01:53:30.535Z" },
+]
+
+[[package]]
+name = "nvidia-cuda-cupti-cu12"
+version = "12.8.90"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d5/1f/b3bd73445e5cb342727fd24fe1f7b748f690b460acadc27ea22f904502c8/nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:4412396548808ddfed3f17a467b104ba7751e6b58678a4b840675c56d21cf7ed", size = 9533318, upload-time = "2025-03-07T01:40:10.421Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/02/2adcaa145158bf1a8295d83591d22e4103dbfd821bcaf6f3f53151ca4ffa/nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ea0cb07ebda26bb9b29ba82cda34849e73c166c18162d3913575b0c9db9a6182", size = 10248621, upload-time = "2025-03-07T01:40:21.213Z" },
+    { url = "https://files.pythonhosted.org/packages/41/bc/83f5426095d93694ae39fe1311431b5d5a9bb82e48bf0dd8e19be2765942/nvidia_cuda_cupti_cu12-12.8.90-py3-none-win_amd64.whl", hash = "sha256:bb479dcdf7e6d4f8b0b01b115260399bf34154a1a2e9fe11c85c517d87efd98e", size = 7015759, upload-time = "2025-03-07T01:51:11.355Z" },
+]
+
+[[package]]
+name = "nvidia-cuda-nvrtc-cu12"
+version = "12.8.93"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/05/6b/32f747947df2da6994e999492ab306a903659555dddc0fbdeb9d71f75e52/nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:a7756528852ef889772a84c6cd89d41dfa74667e24cca16bb31f8f061e3e9994", size = 88040029, upload-time = "2025-03-07T01:42:13.562Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/d1/e50d0acaab360482034b84b6e27ee83c6738f7d32182b987f9c7a4e32962/nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fc1fec1e1637854b4c0a65fb9a8346b51dd9ee69e61ebaccc82058441f15bce8", size = 43106076, upload-time = "2025-03-07T01:41:59.817Z" },
+    { url = "https://files.pythonhosted.org/packages/45/51/52a3d84baa2136cc8df15500ad731d74d3a1114d4c123e043cb608d4a32b/nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-win_amd64.whl", hash = "sha256:7a4b6b2904850fe78e0bd179c4b655c404d4bb799ef03ddc60804247099ae909", size = 73586838, upload-time = "2025-03-07T01:52:13.483Z" },
+]
+
+[[package]]
+name = "nvidia-cuda-runtime-cu12"
+version = "12.8.90"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/7c/75/f865a3b236e4647605ea34cc450900854ba123834a5f1598e160b9530c3a/nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:52bf7bbee900262ffefe5e9d5a2a69a30d97e2bc5bb6cc866688caa976966e3d", size = 965265, upload-time = "2025-03-07T01:39:43.533Z" },
+    { url = "https://files.pythonhosted.org/packages/0d/9b/a997b638fcd068ad6e4d53b8551a7d30fe8b404d6f1804abf1df69838932/nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:adade8dcbd0edf427b7204d480d6066d33902cab2a4707dcfc48a2d0fd44ab90", size = 954765, upload-time = "2025-03-07T01:40:01.615Z" },
+    { url = "https://files.pythonhosted.org/packages/30/a5/a515b7600ad361ea14bfa13fb4d6687abf500adc270f19e89849c0590492/nvidia_cuda_runtime_cu12-12.8.90-py3-none-win_amd64.whl", hash = "sha256:c0c6027f01505bfed6c3b21ec546f69c687689aad5f1a377554bc6ca4aa993a8", size = 944318, upload-time = "2025-03-07T01:51:01.794Z" },
+]
+
+[[package]]
+name = "nvidia-cudnn-cu12"
+version = "9.10.2.21"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "nvidia-cublas-cu12" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/fa/41/e79269ce215c857c935fd86bcfe91a451a584dfc27f1e068f568b9ad1ab7/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:c9132cc3f8958447b4910a1720036d9eff5928cc3179b0a51fb6d167c6cc87d8", size = 705026878, upload-time = "2025-06-06T21:52:51.348Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/51/e123d997aa098c61d029f76663dedbfb9bc8dcf8c60cbd6adbe42f76d049/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:949452be657fa16687d0930933f032835951ef0892b37d2d53824d1a84dc97a8", size = 706758467, upload-time = "2025-06-06T21:54:08.597Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/90/0bd6e586701b3a890fd38aa71c387dab4883d619d6e5ad912ccbd05bfd67/nvidia_cudnn_cu12-9.10.2.21-py3-none-win_amd64.whl", hash = "sha256:c6288de7d63e6cf62988f0923f96dc339cea362decb1bf5b3141883392a7d65e", size = 692992268, upload-time = "2025-06-06T21:55:18.114Z" },
+]
+
 [[package]]
 name = "nvidia-cudnn-frontend"
 version = "1.18.0"
@@ -3594,6 +3646,76 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/67/53/df2810b56d259ef96fa6beaa1381bd14c29fbe82836b409516e864c5e177/nvidia_cudnn_frontend-1.18.0-cp314-cp314-win_amd64.whl", hash = "sha256:5053b473fa74168b5fbf35934cd6187f88aa03b8447b9f2cd417332d5e5c9569", size = 1592759, upload-time = "2026-02-11T21:32:33.87Z" },
 ]
 
+[[package]]
+name = "nvidia-cufft-cu12"
+version = "11.3.3.83"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "nvidia-nvjitlink-cu12" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/60/bc/7771846d3a0272026c416fbb7e5f4c1f146d6d80704534d0b187dd6f4800/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:848ef7224d6305cdb2a4df928759dca7b1201874787083b6e7550dd6765ce69a", size = 193109211, upload-time = "2025-03-07T01:44:56.873Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/13/ee4e00f30e676b66ae65b4f08cb5bcbb8392c03f54f2d5413ea99a5d1c80/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d2dd21ec0b88cf61b62e6b43564355e5222e4a3fb394cac0db101f2dd0d4f74", size = 193118695, upload-time = "2025-03-07T01:45:27.821Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/ec/ce1629f1e478bb5ccd208986b5f9e0316a78538dd6ab1d0484f012f8e2a1/nvidia_cufft_cu12-11.3.3.83-py3-none-win_amd64.whl", hash = "sha256:7a64a98ef2a7c47f905aaf8931b69a3a43f27c55530c698bb2ed7c75c0b42cb7", size = 192216559, upload-time = "2025-03-07T01:53:57.106Z" },
+]
+
+[[package]]
+name = "nvidia-cufile-cu12"
+version = "1.13.1.3"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/bb/fe/1bcba1dfbfb8d01be8d93f07bfc502c93fa23afa6fd5ab3fc7c1df71038a/nvidia_cufile_cu12-1.13.1.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1d069003be650e131b21c932ec3d8969c1715379251f8d23a1860554b1cb24fc", size = 1197834, upload-time = "2025-03-07T01:45:50.723Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/f5/5607710447a6fe9fd9b3283956fceeee8a06cda1d2f56ce31371f595db2a/nvidia_cufile_cu12-1.13.1.3-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:4beb6d4cce47c1a0f1013d72e02b0994730359e17801d395bdcbf20cfb3bb00a", size = 1120705, upload-time = "2025-03-07T01:45:41.434Z" },
+]
+
+[[package]]
+name = "nvidia-curand-cu12"
+version = "10.3.9.90"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/45/5e/92aa15eca622a388b80fbf8375d4760738df6285b1e92c43d37390a33a9a/nvidia_curand_cu12-10.3.9.90-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:dfab99248034673b779bc6decafdc3404a8a6f502462201f2f31f11354204acd", size = 63625754, upload-time = "2025-03-07T01:46:10.735Z" },
+    { url = "https://files.pythonhosted.org/packages/fb/aa/6584b56dc84ebe9cf93226a5cde4d99080c8e90ab40f0c27bda7a0f29aa1/nvidia_curand_cu12-10.3.9.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:b32331d4f4df5d6eefa0554c565b626c7216f87a06a4f56fab27c3b68a830ec9", size = 63619976, upload-time = "2025-03-07T01:46:23.323Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/75/70c05b2f3ed5be3bb30b7102b6eb78e100da4bbf6944fd6725c012831cab/nvidia_curand_cu12-10.3.9.90-py3-none-win_amd64.whl", hash = "sha256:f149a8ca457277da854f89cf282d6ef43176861926c7ac85b2a0fbd237c587ec", size = 62765309, upload-time = "2025-03-07T01:54:20.478Z" },
+]
+
+[[package]]
+name = "nvidia-cusolver-cu12"
+version = "11.7.3.90"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "nvidia-cublas-cu12" },
+    { name = "nvidia-cusparse-cu12" },
+    { name = "nvidia-nvjitlink-cu12" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c8/32/f7cd6ce8a7690544d084ea21c26e910a97e077c9b7f07bf5de623ee19981/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:db9ed69dbef9715071232caa9b69c52ac7de3a95773c2db65bdba85916e4e5c0", size = 267229841, upload-time = "2025-03-07T01:46:54.356Z" },
+    { url = "https://files.pythonhosted.org/packages/85/48/9a13d2975803e8cf2777d5ed57b87a0b6ca2cc795f9a4f59796a910bfb80/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:4376c11ad263152bd50ea295c05370360776f8c3427b30991df774f9fb26c450", size = 267506905, upload-time = "2025-03-07T01:47:16.273Z" },
+    { url = "https://files.pythonhosted.org/packages/13/c0/76ca8551b8a84146ffa189fec81c26d04adba4bc0dbe09cd6e6fd9b7de04/nvidia_cusolver_cu12-11.7.3.90-py3-none-win_amd64.whl", hash = "sha256:4a550db115fcabc4d495eb7d39ac8b58d4ab5d8e63274d3754df1c0ad6a22d34", size = 256720438, upload-time = "2025-03-07T01:54:39.898Z" },
+]
+
+[[package]]
+name = "nvidia-cusparse-cu12"
+version = "12.5.8.93"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "nvidia-nvjitlink-cu12" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/bc/f7/cd777c4109681367721b00a106f491e0d0d15cfa1fd59672ce580ce42a97/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:9b6c161cb130be1a07a27ea6923df8141f3c295852f4b260c65f18f3e0a091dc", size = 288117129, upload-time = "2025-03-07T01:47:40.407Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/f5/e1854cb2f2bcd4280c44736c93550cc300ff4b8c95ebe370d0aa7d2b473d/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1ec05d76bbbd8b61b06a80e1eaf8cf4959c3d4ce8e711b65ebd0443bb0ebb13b", size = 288216466, upload-time = "2025-03-07T01:48:13.779Z" },
+    { url = "https://files.pythonhosted.org/packages/62/07/f3b2ad63f8e3d257a599f422ae34eb565e70c41031aecefa3d18b62cabd1/nvidia_cusparse_cu12-12.5.8.93-py3-none-win_amd64.whl", hash = "sha256:9a33604331cb2cac199f2e7f5104dfbb8a5a898c367a53dfda9ff2acb6b6b4dd", size = 284937404, upload-time = "2025-03-07T01:55:07.742Z" },
+]
+
+[[package]]
+name = "nvidia-cusparselt-cu12"
+version = "0.7.1"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/73/b9/598f6ff36faaece4b3c50d26f50e38661499ff34346f00e057760b35cc9d/nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_aarch64.whl", hash = "sha256:8878dce784d0fac90131b6817b607e803c36e629ba34dc5b433471382196b6a5", size = 283835557, upload-time = "2025-02-26T00:16:54.265Z" },
+    { url = "https://files.pythonhosted.org/packages/56/79/12978b96bd44274fe38b5dde5cfb660b1d114f70a65ef962bcbbed99b549/nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_x86_64.whl", hash = "sha256:f1bb701d6b930d5a7cea44c19ceb973311500847f81b634d802b7b539dc55623", size = 287193691, upload-time = "2025-02-26T00:15:44.104Z" },
+    { url = "https://files.pythonhosted.org/packages/2f/d8/a6b0d0d0c2435e9310f3e2bb0d9c9dd4c33daef86aa5f30b3681defd37ea/nvidia_cusparselt_cu12-0.7.1-py3-none-win_amd64.whl", hash = "sha256:f67fbb5831940ec829c9117b7f33807db9f9678dc2a617fbe781cac17b4e1075", size = 271020911, upload-time = "2025-02-26T00:14:47.204Z" },
+]
+
 [[package]]
 name = "nvidia-cutlass-dsl"
 version = "4.4.0"
@@ -3667,6 +3789,44 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/16/09/30147ab0d0409d3492f1d37469fe0586c82aeec6eec9a907f59d24094516/nvidia_modelopt-0.41.0-py3-none-any.whl", hash = "sha256:ffa5f903d22653649318831a470550ae55ee04716c068d5ade61c3176fdc1d7d", size = 934582, upload-time = "2026-01-20T17:21:28.494Z" },
 ]
 
+[[package]]
+name = "nvidia-nccl-cu12"
+version = "2.27.5"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/bb/1c/857979db0ef194ca5e21478a0612bcdbbe59458d7694361882279947b349/nvidia_nccl_cu12-2.27.5-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:31432ad4d1fb1004eb0c56203dc9bc2178a1ba69d1d9e02d64a6938ab5e40e7a", size = 322400625, upload-time = "2025-06-26T04:11:04.496Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/89/f7a07dc961b60645dbbf42e80f2bc85ade7feb9a491b11a1e973aa00071f/nvidia_nccl_cu12-2.27.5-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ad730cf15cb5d25fe849c6e6ca9eb5b76db16a80f13f425ac68d8e2e55624457", size = 322348229, upload-time = "2025-06-26T04:11:28.385Z" },
+]
+
+[[package]]
+name = "nvidia-nvjitlink-cu12"
+version = "12.8.93"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f6/74/86a07f1d0f42998ca31312f998bd3b9a7eff7f52378f4f270c8679c77fb9/nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:81ff63371a7ebd6e6451970684f916be2eab07321b73c9d244dc2b4da7f73b88", size = 39254836, upload-time = "2025-03-07T01:49:55.661Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/a2/8cee5da30d13430e87bf99bb33455d2724d0a4a9cb5d7926d80ccb96d008/nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:adccd7161ace7261e01bb91e44e88da350895c270d23f744f0820c818b7229e7", size = 38386204, upload-time = "2025-03-07T01:49:43.612Z" },
+    { url = "https://files.pythonhosted.org/packages/ed/d7/34f02dad2e30c31b10a51f6b04e025e5dd60e5f936af9045a9b858a05383/nvidia_nvjitlink_cu12-12.8.93-py3-none-win_amd64.whl", hash = "sha256:bd93fbeeee850917903583587f4fc3a4eafa022e34572251368238ab5e6bd67f", size = 268553710, upload-time = "2025-03-07T01:56:24.13Z" },
+]
+
+[[package]]
+name = "nvidia-nvshmem-cu12"
+version = "3.4.5"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/1d/6a/03aa43cc9bd3ad91553a88b5f6fb25ed6a3752ae86ce2180221962bc2aa5/nvidia_nvshmem_cu12-3.4.5-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0b48363fc6964dede448029434c6abed6c5e37f823cb43c3bcde7ecfc0457e15", size = 138936938, upload-time = "2025-09-06T00:32:05.589Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/09/6ea3ea725f82e1e76684f0708bbedd871fc96da89945adeba65c3835a64c/nvidia_nvshmem_cu12-3.4.5-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:042f2500f24c021db8a06c5eec2539027d57460e1c1a762055a6554f72c369bd", size = 139103095, upload-time = "2025-09-06T00:32:31.266Z" },
+]
+
+[[package]]
+name = "nvidia-nvtx-cu12"
+version = "12.8.90"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/10/c0/1b303feea90d296f6176f32a2a70b5ef230f9bdeb3a72bddb0dc922dc137/nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d7ad891da111ebafbf7e015d34879f7112832fc239ff0d7d776b6cb685274615", size = 91161, upload-time = "2025-03-07T01:42:23.922Z" },
+    { url = "https://files.pythonhosted.org/packages/a2/eb/86626c1bbc2edb86323022371c39aa48df6fd8b0a1647bc274577f72e90b/nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5b17e2001cc0d751a5bc2c6ec6d26ad95913324a4adb86788c944f8ce9ba441f", size = 89954, upload-time = "2025-03-07T01:42:44.131Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/99/4c9c0c329bf9fc125008c3b54c7c94c0023518d06fc025ae36431375e1fe/nvidia_nvtx_cu12-12.8.90-py3-none-win_amd64.whl", hash = "sha256:619c8304aedc69f02ea82dd244541a83c3d9d40993381b3b590f1adaed3db41e", size = 56492, upload-time = "2025-03-07T01:52:24.69Z" },
+]
+
 [[package]]
 name = "nvidia-resiliency-ext"
 version = "0.5.0"
@@ -3895,7 +4055,7 @@ wheels = [
 
 [[package]]
 name = "opentelemetry-api"
-version = "1.40.0"
+version = "1.39.1"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
     "python_full_version >= '3.14' and sys_platform == 'linux'",
@@ -3921,9 +4081,9 @@ dependencies = [
     { name = "importlib-metadata", marker = "(extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts')" },
     { name = "typing-extensions", marker = "(extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts')" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/2c/1d/4049a9e8698361cc1a1aa03a6c59e4fa4c71e0c0f94a30f988a6876a2ae6/opentelemetry_api-1.40.0.tar.gz", hash = "sha256:159be641c0b04d11e9ecd576906462773eb97ae1b657730f0ecf64d32071569f", size = 70851, upload-time = "2026-03-04T14:17:21.555Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/97/b9/3161be15bb8e3ad01be8be5a968a9237c3027c5be504362ff800fca3e442/opentelemetry_api-1.39.1.tar.gz", hash = "sha256:fbde8c80e1b937a2c61f20347e91c0c18a1940cecf012d62e65a7caf08967c9c", size = 65767, upload-time = "2025-12-11T13:32:39.182Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/5f/bf/93795954016c522008da367da292adceed71cca6ee1717e1d64c83089099/opentelemetry_api-1.40.0-py3-none-any.whl", hash = "sha256:82dd69331ae74b06f6a874704be0cfaa49a1650e1537d4a813b86ecef7d0ecf9", size = 68676, upload-time = "2026-03-04T14:17:01.24Z" },
+    { url = "https://files.pythonhosted.org/packages/cf/df/d3f1ddf4bb4cb50ed9b1139cc7b1c54c34a1e7ce8fd1b9a37c0d1551a6bd/opentelemetry_api-1.39.1-py3-none-any.whl", hash = "sha256:2edd8463432a7f8443edce90972169b195e7d6a05500cd29e6d13898187c9950", size = 66356, upload-time = "2025-12-11T13:32:17.304Z" },
 ]
 
 [[package]]
@@ -3962,7 +4122,7 @@ wheels = [
 
 [[package]]
 name = "opentelemetry-exporter-prometheus"
-version = "0.61b0"
+version = "0.60b1"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
     "python_full_version >= '3.14' and sys_platform == 'linux'",
@@ -3985,13 +4145,13 @@ resolution-markers = [
     "python_full_version < '3.11' and sys_platform != 'linux'",
 ]
 dependencies = [
-    { name = "opentelemetry-api", version = "1.40.0", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts')" },
-    { name = "opentelemetry-sdk", version = "1.40.0", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts')" },
+    { name = "opentelemetry-api", version = "1.39.1", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts')" },
+    { name = "opentelemetry-sdk", version = "1.39.1", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts')" },
     { name = "prometheus-client", marker = "(extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts')" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/4a/20/9e818fd364d12e8d0cfdce4a3b2d82e24d98c4ceebb315de6b6770b5f214/opentelemetry_exporter_prometheus-0.61b0.tar.gz", hash = "sha256:7c4919bd8e79abd62b610767e80f42c9c3a06c5183f4dd9141eedeb57aea284b", size = 15136, upload-time = "2026-03-04T14:17:26.275Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/14/39/7dafa6fff210737267bed35a8855b6ac7399b9e582b8cf1f25f842517012/opentelemetry_exporter_prometheus-0.60b1.tar.gz", hash = "sha256:a4011b46906323f71724649d301b4dc188aaa068852e814f4df38cc76eac616b", size = 14976, upload-time = "2025-12-11T13:32:42.944Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/02/4a/b65d40e94d1d930aee73a1a2857211ee6ab10ce3686cbdae5eea78cd9d34/opentelemetry_exporter_prometheus-0.61b0-py3-none-any.whl", hash = "sha256:3013b41f4370143d48d219a2351473761423e5882fa4c213811eaefacba39cb7", size = 13149, upload-time = "2026-03-04T14:17:08.983Z" },
+    { url = "https://files.pythonhosted.org/packages/9b/0d/4be6bf5477a3eb3d917d2f17d3c0b6720cd6cb97898444a61d43cc983f5c/opentelemetry_exporter_prometheus-0.60b1-py3-none-any.whl", hash = "sha256:49f59178de4f4590e3cef0b8b95cf6e071aae70e1f060566df5546fad773b8fd", size = 13019, upload-time = "2025-12-11T13:32:23.974Z" },
 ]
 
 [[package]]
@@ -4042,7 +4202,7 @@ wheels = [
 
 [[package]]
 name = "opentelemetry-sdk"
-version = "1.40.0"
+version = "1.39.1"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
     "python_full_version >= '3.14' and sys_platform == 'linux'",
@@ -4065,13 +4225,13 @@ resolution-markers = [
     "python_full_version < '3.11' and sys_platform != 'linux'",
 ]
 dependencies = [
-    { name = "opentelemetry-api", version = "1.40.0", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts')" },
-    { name = "opentelemetry-semantic-conventions", version = "0.61b0", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts')" },
+    { name = "opentelemetry-api", version = "1.39.1", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts')" },
+    { name = "opentelemetry-semantic-conventions", version = "0.60b1", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts')" },
     { name = "typing-extensions", marker = "(extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts')" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/58/fd/3c3125b20ba18ce2155ba9ea74acb0ae5d25f8cd39cfd37455601b7955cc/opentelemetry_sdk-1.40.0.tar.gz", hash = "sha256:18e9f5ec20d859d268c7cb3c5198c8d105d073714db3de50b593b8c1345a48f2", size = 184252, upload-time = "2026-03-04T14:17:31.87Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/eb/fb/c76080c9ba07e1e8235d24cdcc4d125ef7aa3edf23eb4e497c2e50889adc/opentelemetry_sdk-1.39.1.tar.gz", hash = "sha256:cf4d4563caf7bff906c9f7967e2be22d0d6b349b908be0d90fb21c8e9c995cc6", size = 171460, upload-time = "2025-12-11T13:32:49.369Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/2c/c5/6a852903d8bfac758c6dc6e9a68b015d3c33f2f1be5e9591e0f4b69c7e0a/opentelemetry_sdk-1.40.0-py3-none-any.whl", hash = "sha256:787d2154a71f4b3d81f20524a8ce061b7db667d24e46753f32a7bc48f1c1f3f1", size = 141951, upload-time = "2026-03-04T14:17:17.961Z" },
+    { url = "https://files.pythonhosted.org/packages/7c/98/e91cf858f203d86f4eccdf763dcf01cf03f1dae80c3750f7e635bfa206b6/opentelemetry_sdk-1.39.1-py3-none-any.whl", hash = "sha256:4d5482c478513ecb0a5d938dcc61394e647066e0cc2676bee9f3af3f3f45f01c", size = 132565, upload-time = "2025-12-11T13:32:35.069Z" },
 ]
 
 [[package]]
@@ -4109,7 +4269,7 @@ wheels = [
 
 [[package]]
 name = "opentelemetry-semantic-conventions"
-version = "0.61b0"
+version = "0.60b1"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
     "python_full_version >= '3.14' and sys_platform == 'linux'",
@@ -4132,12 +4292,12 @@ resolution-markers = [
     "python_full_version < '3.11' and sys_platform != 'linux'",
 ]
 dependencies = [
-    { name = "opentelemetry-api", version = "1.40.0", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts')" },
+    { name = "opentelemetry-api", version = "1.39.1", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts')" },
     { name = "typing-extensions", marker = "(extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts')" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/6d/c0/4ae7973f3c2cfd2b6e321f1675626f0dab0a97027cc7a297474c9c8f3d04/opentelemetry_semantic_conventions-0.61b0.tar.gz", hash = "sha256:072f65473c5d7c6dc0355b27d6c9d1a679d63b6d4b4b16a9773062cb7e31192a", size = 145755, upload-time = "2026-03-04T14:17:32.664Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/91/df/553f93ed38bf22f4b999d9be9c185adb558982214f33eae539d3b5cd0858/opentelemetry_semantic_conventions-0.60b1.tar.gz", hash = "sha256:87c228b5a0669b748c76d76df6c364c369c28f1c465e50f661e39737e84bc953", size = 137935, upload-time = "2025-12-11T13:32:50.487Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/b2/37/cc6a55e448deaa9b27377d087da8615a3416d8ad523d5960b78dbeadd02a/opentelemetry_semantic_conventions-0.61b0-py3-none-any.whl", hash = "sha256:fa530a96be229795f8cef353739b618148b0fe2b4b3f005e60e262926c4d38e2", size = 231621, upload-time = "2026-03-04T14:17:19.33Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/5e/5958555e09635d09b75de3c4f8b9cae7335ca545d77392ffe7331534c402/opentelemetry_semantic_conventions-0.60b1-py3-none-any.whl", hash = "sha256:9fa8c8b0c110da289809292b0591220d3a7b53c1526a23021e977d68597893fb", size = 219982, upload-time = "2025-12-11T13:32:36.955Z" },
 ]
 
 [[package]]
@@ -4167,10 +4327,10 @@ resolution-markers = [
     "python_full_version < '3.11' and sys_platform != 'linux'",
 ]
 dependencies = [
-    { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
-    { name = "python-dateutil", marker = "python_full_version < '3.11'" },
-    { name = "pytz", marker = "python_full_version < '3.11'" },
-    { name = "tzdata", marker = "python_full_version < '3.11'" },
+    { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "python-dateutil", marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "pytz", marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "tzdata", marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/33/01/d40b85317f86cf08d853a4f495195c73815fdf205eef3993821720274518/pandas-2.3.3.tar.gz", hash = "sha256:e05e1af93b977f7eafa636d043f9f94c7ee3ac81af99c13508215942e64c993b", size = 4495223, upload-time = "2025-09-29T23:34:51.853Z" }
 wheels = [
@@ -4246,9 +4406,9 @@ resolution-markers = [
     "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
 ]
 dependencies = [
-    { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
-    { name = "python-dateutil", marker = "python_full_version >= '3.11'" },
-    { name = "tzdata", marker = "(python_full_version >= '3.11' and sys_platform == 'emscripten') or (python_full_version >= '3.11' and sys_platform == 'win32')" },
+    { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "python-dateutil", marker = "python_full_version >= '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "tzdata", marker = "(python_full_version >= '3.11' and sys_platform == 'emscripten') or (python_full_version >= '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'emscripten' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'win32' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/2e/0c/b28ed414f080ee0ad153f848586d61d1878f91689950f037f976ce15f6c8/pandas-3.0.1.tar.gz", hash = "sha256:4186a699674af418f655dbd420ed87f50d56b4cd6603784279d9eef6627823c8", size = 4641901, upload-time = "2026-02-17T22:20:16.434Z" }
 wheels = [
@@ -5335,10 +5495,10 @@ default = [
     { name = "grpcio" },
     { name = "opencensus" },
     { name = "opentelemetry-exporter-prometheus", version = "0.54b1", source = { registry = "https://pypi.org/simple" }, marker = "extra == 'extra-13-megatron-core-dev' or extra == 'extra-13-megatron-core-lts'" },
-    { name = "opentelemetry-exporter-prometheus", version = "0.61b0", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts')" },
+    { name = "opentelemetry-exporter-prometheus", version = "0.60b1", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts')" },
     { name = "opentelemetry-proto" },
     { name = "opentelemetry-sdk", version = "1.33.1", source = { registry = "https://pypi.org/simple" }, marker = "extra == 'extra-13-megatron-core-dev' or extra == 'extra-13-megatron-core-lts'" },
-    { name = "opentelemetry-sdk", version = "1.40.0", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts')" },
+    { name = "opentelemetry-sdk", version = "1.39.1", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts')" },
     { name = "prometheus-client" },
     { name = "py-spy" },
     { name = "pydantic" },
@@ -5354,7 +5514,7 @@ source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "attrs" },
     { name = "rpds-py" },
-    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
+    { name = "typing-extensions", marker = "python_full_version < '3.13' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/22/f5/df4e9027acead3ecc63e50fe1e36aca1523e1719559c499951bb4b53188f/referencing-0.37.0.tar.gz", hash = "sha256:44aefc3142c5b842538163acb373e24cce6632bd54bdb01b21ad5863489f50d8", size = 78036, upload-time = "2025-10-13T15:30:48.871Z" }
 wheels = [
@@ -6324,7 +6484,7 @@ version = "0.52.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "anyio" },
-    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
+    { name = "typing-extensions", marker = "python_full_version < '3.13' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/c4/68/79977123bb7be889ad680d79a40f339082c1978b5cfcf62c2d8d196873ac/starlette-0.52.1.tar.gz", hash = "sha256:834edd1b0a23167694292e94f597773bc3f89f362be6effee198165a35d62933", size = 2653702, upload-time = "2026-01-18T13:34:11.062Z" }
 wheels = [
@@ -6663,21 +6823,45 @@ name = "torch"
 version = "2.10.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "filelock", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32')" },
-    { name = "fsspec", version = "2025.10.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32')" },
-    { name = "jinja2", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32')" },
-    { name = "networkx", version = "3.4.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and sys_platform != 'linux') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "networkx", version = "3.6.1", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'emscripten' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'win32' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "setuptools", marker = "python_full_version >= '3.12' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'" },
-    { name = "sympy", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32')" },
-    { name = "triton", marker = "sys_platform == 'never'" },
-    { name = "typing-extensions", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32')" },
+    { name = "cuda-bindings", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "filelock" },
+    { name = "fsspec", version = "2025.10.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.14' or sys_platform != 'linux' or extra == 'extra-13-megatron-core-dev' or extra == 'extra-13-megatron-core-lts'" },
+    { name = "fsspec", version = "2026.2.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.14' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "jinja2" },
+    { name = "networkx", version = "3.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "networkx", version = "3.6.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "nvidia-cublas-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "nvidia-cuda-cupti-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "nvidia-cuda-nvrtc-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "nvidia-cuda-runtime-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "nvidia-cudnn-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "nvidia-cufft-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "nvidia-cufile-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "nvidia-curand-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "nvidia-cusolver-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "nvidia-cusparse-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "nvidia-cusparselt-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "nvidia-nccl-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "nvidia-nvjitlink-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "nvidia-nvshmem-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "nvidia-nvtx-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "setuptools", marker = "python_full_version >= '3.12' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "sympy" },
+    { name = "triton", marker = "sys_platform == 'never' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "typing-extensions" },
 ]
 wheels = [
     { url = "https://files.pythonhosted.org/packages/5b/30/bfebdd8ec77db9a79775121789992d6b3b75ee5494971294d7b4b7c999bc/torch-2.10.0-2-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:2b980edd8d7c0a68c4e951ee1856334a43193f98730d97408fbd148c1a933313", size = 79411457, upload-time = "2026-02-10T21:44:59.189Z" },
     { url = "https://files.pythonhosted.org/packages/0f/8b/4b61d6e13f7108f36910df9ab4b58fd389cc2520d54d81b88660804aad99/torch-2.10.0-2-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:418997cb02d0a0f1497cf6a09f63166f9f5df9f3e16c8a716ab76a72127c714f", size = 79423467, upload-time = "2026-02-10T21:44:48.711Z" },
     { url = "https://files.pythonhosted.org/packages/d3/54/a2ba279afcca44bbd320d4e73675b282fcee3d81400ea1b53934efca6462/torch-2.10.0-2-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:13ec4add8c3faaed8d13e0574f5cd4a323c11655546f91fbe6afa77b57423574", size = 79498202, upload-time = "2026-02-10T21:44:52.603Z" },
     { url = "https://files.pythonhosted.org/packages/ec/23/2c9fe0c9c27f7f6cb865abcea8a4568f29f00acaeadfc6a37f6801f84cb4/torch-2.10.0-2-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:e521c9f030a3774ed770a9c011751fb47c4d12029a3d6522116e48431f2ff89e", size = 79498254, upload-time = "2026-02-10T21:44:44.095Z" },
+    { url = "https://files.pythonhosted.org/packages/16/ee/efbd56687be60ef9af0c9c0ebe106964c07400eade5b0af8902a1d8cd58c/torch-2.10.0-3-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:a1ff626b884f8c4e897c4c33782bdacdff842a165fee79817b1dd549fdda1321", size = 915510070, upload-time = "2026-03-11T14:16:39.386Z" },
+    { url = "https://files.pythonhosted.org/packages/36/ab/7b562f1808d3f65414cd80a4f7d4bb00979d9355616c034c171249e1a303/torch-2.10.0-3-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:ac5bdcbb074384c66fa160c15b1ead77839e3fe7ed117d667249afce0acabfac", size = 915518691, upload-time = "2026-03-11T14:15:43.147Z" },
+    { url = "https://files.pythonhosted.org/packages/b3/7a/abada41517ce0011775f0f4eacc79659bc9bc6c361e6bfe6f7052a6b9363/torch-2.10.0-3-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:98c01b8bb5e3240426dcde1446eed6f40c778091c8544767ef1168fc663a05a6", size = 915622781, upload-time = "2026-03-11T14:17:11.354Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/c6/4dfe238342ffdcec5aef1c96c457548762d33c40b45a1ab7033bb26d2ff2/torch-2.10.0-3-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:80b1b5bfe38eb0e9f5ff09f206dcac0a87aadd084230d4a36eea5ec5232c115b", size = 915627275, upload-time = "2026-03-11T14:16:11.325Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/f0/72bf18847f58f877a6a8acf60614b14935e2f156d942483af1ffc081aea0/torch-2.10.0-3-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:46b3574d93a2a8134b3f5475cfb98e2eb46771794c57015f6ad1fb795ec25e49", size = 915523474, upload-time = "2026-03-11T14:17:44.422Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/39/590742415c3030551944edc2ddc273ea1fdfe8ffb2780992e824f1ebee98/torch-2.10.0-3-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:b1d5e2aba4eb7f8e87fbe04f86442887f9167a35f092afe4c237dfcaaef6e328", size = 915632474, upload-time = "2026-03-11T14:15:13.666Z" },
+    { url = "https://files.pythonhosted.org/packages/b6/8e/34949484f764dde5b222b7fe3fede43e4a6f0da9d7f8c370bb617d629ee2/torch-2.10.0-3-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:0228d20b06701c05a8f978357f657817a4a63984b0c90745def81c18aedfa591", size = 915523882, upload-time = "2026-03-11T14:14:46.311Z" },
     { url = "https://files.pythonhosted.org/packages/0c/1a/c61f36cfd446170ec27b3a4984f072fd06dab6b5d7ce27e11adb35d6c838/torch-2.10.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:5276fa790a666ee8becaffff8acb711922252521b28fbce5db7db5cf9cb2026d", size = 145992962, upload-time = "2026-01-21T16:24:14.04Z" },
     { url = "https://files.pythonhosted.org/packages/b5/60/6662535354191e2d1555296045b63e4279e5a9dbad49acf55a5d38655a39/torch-2.10.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:aaf663927bcd490ae971469a624c322202a2a1e68936eb952535ca4cd3b90444", size = 915599237, upload-time = "2026-01-21T16:23:25.497Z" },
     { url = "https://files.pythonhosted.org/packages/40/b8/66bbe96f0d79be2b5c697b2e0b187ed792a15c6c4b8904613454651db848/torch-2.10.0-cp310-cp310-win_amd64.whl", hash = "sha256:a4be6a2a190b32ff5c8002a0977a25ea60e64f7ba46b1be37093c141d9c49aeb", size = 113720931, upload-time = "2026-01-21T16:24:23.743Z" },
@@ -6734,7 +6918,7 @@ name = "tqdm"
 version = "4.67.3"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "colorama", marker = "sys_platform == 'win32'" },
+    { name = "colorama", marker = "sys_platform == 'win32' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/09/a9/6ba95a270c6f1fbcd8dac228323f2777d886cb206987444e4bce66338dd4/tqdm-4.67.3.tar.gz", hash = "sha256:7d825f03f89244ef73f1d4ce193cb1774a8179fd96f31d7e1dcde62092b960bb", size = 169598, upload-time = "2026-02-03T17:35:53.048Z" }
 wheels = [

From f47ad9143e18dcaf518427eeb2bcd6834cb29abb Mon Sep 17 00:00:00 2001
From: Deyu Fu <deyuf@nvidia.com>
Date: Wed, 18 Mar 2026 01:41:20 +0800
Subject: [PATCH 312/334] Fix emerging optimizer init_group for ckpt loading
 (#3897)

Signed-off-by: Deyu Fu <deyuf@nvidia.com>
---
 megatron/core/optimizer/emerging_optimizers.py | 3 ++-
 pyproject.toml                                 | 2 +-
 uv.lock                                        | 8 ++++----
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/megatron/core/optimizer/emerging_optimizers.py b/megatron/core/optimizer/emerging_optimizers.py
index 25294beabdf..f10c79dc4be 100644
--- a/megatron/core/optimizer/emerging_optimizers.py
+++ b/megatron/core/optimizer/emerging_optimizers.py
@@ -232,7 +232,8 @@ def orthogonalize(self, p: torch.Tensor, grad: torch.Tensor, **kwargs: Any) -> t
 def _eopt_init_state_fn(opt, config=None):
     """Initialize emerging optimizer state for torch_dist checkpoint format."""
     for group in opt.param_groups:
-        opt._init_group(group)
+        # Checkpoint init needs state for all parameters, including those without grads yet.
+        opt._init_group(group, skip_non_grad_params=False)
 
 
 def _kwargs_from_config(optimizer_cls: type, prefix: str, config) -> Dict[str, Any]:
diff --git a/pyproject.toml b/pyproject.toml
index 52a168aaa3a..3a9d27b6a81 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -190,7 +190,7 @@ flash_mla = [
 ]
 transformer-engine = { git = "https://github.com/NVIDIA/TransformerEngine.git", rev = "5671fd3675906cda1ade26c24a65d3dedd88eb89" }
 nemo-run = { git = "https://github.com/NVIDIA-NeMo/Run.git", rev = "01a9a8ba360f7b2908728ad0516e0ad9d936966d" }
-emerging_optimizers = { git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git", rev = "a8faf641d5fca6a0515cfc010b6cedbf488cc33a" }
+emerging_optimizers = { git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git", rev = "bc634ff8c0cf4fb5dbae0a531081281b499be3a0" }
 fast-hadamard-transform = { git = "https://github.com/Dao-AILab/fast-hadamard-transform.git", rev = "f134af63deb2df17e1171a9ec1ea4a7d8604d5ca" }
 
 [tool.isort]
diff --git a/uv.lock b/uv.lock
index 08482b4b7b8..d1e17d67196 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1411,7 +1411,7 @@ wheels = [
 [[package]]
 name = "emerging-optimizers"
 version = "0.2.0"
-source = { git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git?rev=a8faf641d5fca6a0515cfc010b6cedbf488cc33a#a8faf641d5fca6a0515cfc010b6cedbf488cc33a" }
+source = { git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git?rev=bc634ff8c0cf4fb5dbae0a531081281b499be3a0#bc634ff8c0cf4fb5dbae0a531081281b499be3a0" }
 dependencies = [
     { name = "absl-py", marker = "python_full_version >= '3.12' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "torch", marker = "(python_full_version >= '3.12' and sys_platform == 'never') or (python_full_version < '3.12' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'never' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
@@ -2748,8 +2748,8 @@ requires-dist = [
     { name = "datasets", marker = "extra == 'lts'" },
     { name = "einops", marker = "extra == 'dev'", specifier = "~=0.8" },
     { name = "einops", marker = "extra == 'lts'", specifier = "~=0.8" },
-    { name = "emerging-optimizers", marker = "python_full_version >= '3.12' and extra == 'dev'", git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git?rev=a8faf641d5fca6a0515cfc010b6cedbf488cc33a" },
-    { name = "emerging-optimizers", marker = "python_full_version >= '3.12' and extra == 'lts'", git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git?rev=a8faf641d5fca6a0515cfc010b6cedbf488cc33a" },
+    { name = "emerging-optimizers", marker = "python_full_version >= '3.12' and extra == 'dev'", git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git?rev=bc634ff8c0cf4fb5dbae0a531081281b499be3a0" },
+    { name = "emerging-optimizers", marker = "python_full_version >= '3.12' and extra == 'lts'", git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git?rev=bc634ff8c0cf4fb5dbae0a531081281b499be3a0" },
     { name = "fastapi", marker = "extra == 'dev'", specifier = "~=0.50" },
     { name = "fastapi", marker = "extra == 'lts'", specifier = "~=0.50" },
     { name = "flash-linear-attention", marker = "extra == 'dev'", specifier = "~=0.4.0" },
@@ -2823,7 +2823,7 @@ linting = [
     { name = "ruff", specifier = "~=0.9.0" },
 ]
 no-pypi-wheels = [
-    { name = "emerging-optimizers", marker = "python_full_version >= '3.12'", git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git?rev=a8faf641d5fca6a0515cfc010b6cedbf488cc33a" },
+    { name = "emerging-optimizers", marker = "python_full_version >= '3.12'", git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git?rev=bc634ff8c0cf4fb5dbae0a531081281b499be3a0" },
     { name = "fast-hadamard-transform", git = "https://github.com/Dao-AILab/fast-hadamard-transform.git?rev=f134af63deb2df17e1171a9ec1ea4a7d8604d5ca" },
 ]
 test = [

From 74124ba1b76ad4837f573d53bfa26764505adafb Mon Sep 17 00:00:00 2001
From: Tom Long <tolong@nvidia.com>
Date: Tue, 17 Mar 2026 16:01:56 -0700
Subject: [PATCH 313/334] =?UTF-8?q?fix=20cg=20acess=20issue=20by=20using?=
 =?UTF-8?q?=20dict=20instead=20of=20list=20to=20iteratively=20acces?=
 =?UTF-8?q?=E2=80=A6=20(#3867)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Xin Yao <xiny@nvidia.com>
---
 megatron/core/transformer/moe/moe_utils.py    |  7 +--
 .../core/transformer/moe/token_dispatcher.py  | 17 +++++++
 .../core/transformer/transformer_layer.py     | 10 ++---
 .../transformer/moe/test_token_dispatcher.py  | 44 +++++++++++++++++++
 4 files changed, 65 insertions(+), 13 deletions(-)

diff --git a/megatron/core/transformer/moe/moe_utils.py b/megatron/core/transformer/moe/moe_utils.py
index e736bc65142..bf8df7a2482 100644
--- a/megatron/core/transformer/moe/moe_utils.py
+++ b/megatron/core/transformer/moe/moe_utils.py
@@ -1353,12 +1353,7 @@ def get_early_return_outputs(
             outputs = [self.kwargs['hidden_states'], self.kwargs['probs']]
             valid_cudagraph_attrs = []
             for attr_name in self.moe_layer.token_dispatcher.cudagraph_attrs:
-                hier_attr_name = attr_name.split('.')
-                attr = self.moe_layer.token_dispatcher
-                for name in hier_attr_name:
-                    attr = getattr(attr, name, None)
-                    if attr is None:
-                        break
+                attr = self.moe_layer.token_dispatcher.get_cudagraph_attr(attr_name)
                 if isinstance(attr, torch.Tensor):
                     outputs.append(attr)
                     valid_cudagraph_attrs.append(attr_name)
diff --git a/megatron/core/transformer/moe/token_dispatcher.py b/megatron/core/transformer/moe/token_dispatcher.py
index e82882a66f9..62e7ff41b87 100644
--- a/megatron/core/transformer/moe/token_dispatcher.py
+++ b/megatron/core/transformer/moe/token_dispatcher.py
@@ -84,6 +84,23 @@ def __init__(
         self.cudagraph_attrs = []
         self.valid_cudagraph_attrs = None
 
+    def get_cudagraph_attr(self, attr_name: str):
+        """Resolve a cudagraph attribute path, including nested attributes."""
+        attr = self
+        for name in attr_name.split('.'):
+            attr = getattr(attr, name, None)
+            if attr is None:
+                return None
+        return attr
+
+    def set_cudagraph_attr(self, attr_name: str, value) -> None:
+        """Assign to a cudagraph attribute path, including nested attributes."""
+        hier_attr_name = attr_name.split('.')
+        attr = self
+        for name in hier_attr_name[:-1]:
+            attr = getattr(attr, name)
+        setattr(attr, hier_attr_name[-1], value)
+
     @abstractmethod
     def dispatch_preprocess(
         self, tokens: torch.Tensor, routing_map: torch.Tensor, probs: torch.Tensor
diff --git a/megatron/core/transformer/transformer_layer.py b/megatron/core/transformer/transformer_layer.py
index aac05312220..61e9bb1535b 100644
--- a/megatron/core/transformer/transformer_layer.py
+++ b/megatron/core/transformer/transformer_layer.py
@@ -1098,11 +1098,7 @@ def _te_cuda_graph_replay(self, *args, **kwargs):
                     valid_cudagraph_attrs
                 ), f"attr_outputs: {len(attr_outputs)} != {len(valid_cudagraph_attrs)}"
                 for i, attr_name in enumerate(valid_cudagraph_attrs):
-                    hier_attr_name = attr_name.split('.')
-                    attr = self.mlp.token_dispatcher
-                    for name in hier_attr_name[:-1]:
-                        attr = getattr(attr, name)
-                    setattr(attr, hier_attr_name[-1], attr_outputs[i])
+                    self.mlp.token_dispatcher.set_cudagraph_attr(attr_name, attr_outputs[i])
             else:
                 # CUDA graph output is [hidden_states, probs, routing_map].
                 assert len(cuda_graph_output) == 3, (
@@ -1711,7 +1707,7 @@ def _forward_mlp_router(self, hidden_states, padding_mask=None):
         )
 
         for attr_name in self.mlp.token_dispatcher.cudagraph_attrs:
-            attr = getattr(self.mlp.token_dispatcher, attr_name)
+            attr = self.mlp.token_dispatcher.get_cudagraph_attr(attr_name)
             if torch.is_tensor(attr):
                 if attr_name in self.token_dispatcher_attrs:
                     self.token_dispatcher_attrs[attr_name].copy_(attr)
@@ -1730,7 +1726,7 @@ def _forward_mlp_expert_compute(self, hidden_states, probs):
         """
 
         for name, attr in self.token_dispatcher_attrs.items():
-            setattr(self.mlp.token_dispatcher, name, attr)
+            self.mlp.token_dispatcher.set_cudagraph_attr(name, attr)
 
         self.mlp.fwd_execution_map = "expert_compute"
         return self.mlp(None, intermediate_tensors=(hidden_states, probs))
diff --git a/tests/unit_tests/transformer/moe/test_token_dispatcher.py b/tests/unit_tests/transformer/moe/test_token_dispatcher.py
index 05ebdecfd04..91f32719d07 100644
--- a/tests/unit_tests/transformer/moe/test_token_dispatcher.py
+++ b/tests/unit_tests/transformer/moe/test_token_dispatcher.py
@@ -2,6 +2,7 @@
 
 import copy
 import dataclasses
+from types import SimpleNamespace
 
 import pytest
 import torch
@@ -10,6 +11,7 @@
 from megatron.core.models.gpt.gpt_layer_specs import get_gpt_layer_local_submodules
 from megatron.core.transformer.moe.moe_layer import MoELayer
 from megatron.core.transformer.moe.moe_utils import get_capacity
+from megatron.core.transformer.moe.token_dispatcher import MoETokenDispatcher
 from megatron.core.transformer.transformer_config import TransformerConfig
 from megatron.core.typed_torch import apply_module
 from megatron.core.utils import is_te_min_version
@@ -33,6 +35,48 @@ def token_unpermutation(token_dispatcher, hidden_states):
     return hidden_states, None
 
 
+class _NestedAttrTestDispatcher(MoETokenDispatcher):
+    def dispatch_preprocess(self, tokens, routing_map, probs):
+        raise NotImplementedError
+
+    def token_dispatch(self, hidden_states, probs):
+        raise NotImplementedError
+
+    def dispatch_postprocess(self, hidden_states, probs):
+        raise NotImplementedError
+
+    def combine_preprocess(self, hidden_states):
+        raise NotImplementedError
+
+    def token_combine(self, hidden_states):
+        raise NotImplementedError
+
+    def combine_postprocess(self, hidden_states):
+        raise NotImplementedError
+
+
+def test_get_cudagraph_attr_supports_nested_paths():
+    dispatcher = object.__new__(_NestedAttrTestDispatcher)
+    token_probs = torch.randn(2, 3)
+    dispatcher._comm_manager = SimpleNamespace(
+        token_probs=token_probs, nested=SimpleNamespace(routing_map=torch.randn(2, 4))
+    )
+
+    assert dispatcher.get_cudagraph_attr("_comm_manager.token_probs") is token_probs
+    assert dispatcher.get_cudagraph_attr("_comm_manager.nested.routing_map") is not None
+    assert dispatcher.get_cudagraph_attr("_comm_manager.missing_attr") is None
+
+
+def test_set_cudagraph_attr_supports_nested_paths():
+    dispatcher = object.__new__(_NestedAttrTestDispatcher)
+    dispatcher._comm_manager = SimpleNamespace(routing_map=None)
+    routing_map = torch.randn(4, 5)
+
+    dispatcher.set_cudagraph_attr("_comm_manager.routing_map", routing_map)
+
+    assert dispatcher._comm_manager.routing_map is routing_map
+
+
 class MoEModelTestContainer:
     def __init__(
         self,

From 51299c512e57b65d1d5ad57333e484167145c394 Mon Sep 17 00:00:00 2001
From: Huy Vu <86480512+huvunvidia@users.noreply.github.com>
Date: Tue, 17 Mar 2026 19:02:18 -0400
Subject: [PATCH 314/334] Enhance rotary positional embedding version checks
 (#3887)

Co-authored-by: Xin Yao <xiny@nvidia.com>
---
 megatron/core/extensions/transformer_engine.py | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/megatron/core/extensions/transformer_engine.py b/megatron/core/extensions/transformer_engine.py
index 943a72c531f..20ab554382e 100644
--- a/megatron/core/extensions/transformer_engine.py
+++ b/megatron/core/extensions/transformer_engine.py
@@ -2389,7 +2389,10 @@ def fused_apply_rotary_pos_emb_thd(
         """
         Apply rotary positional embedding to input tensor T in `thd` format with CP support.
         """
-        if is_te_min_version("1.12.0", check_equality=True):
+        if interleaved:
+            assert is_te_min_version("2.3.0"), "Only TE >= 2.3.0 supports interleaved fused RoPE."
+
+        if is_te_min_version("2.3.0", check_equality=True):
             return apply_rotary_pos_emb(
                 t,
                 freqs,
@@ -2400,6 +2403,16 @@ def fused_apply_rotary_pos_emb_thd(
                 cp_rank=cp_rank,
                 interleaved=interleaved,
             )
+        elif is_te_min_version("1.12.0", check_equality=True):
+            return apply_rotary_pos_emb(
+                t,
+                freqs,
+                tensor_format="thd",
+                fused=True,
+                cu_seqlens=cu_seqlens,
+                cp_size=cp_size,
+                cp_rank=cp_rank,
+            )
         else:
             assert cp_size == 1, "Only TE >= 1.12 supports RoPE fusion for THD format with CP."
             return apply_rotary_pos_emb(

From 7c3eea666815adc12e028e3b512f9e0dc34d8db0 Mon Sep 17 00:00:00 2001
From: xuwchen <xuwenc@nvidia.com>
Date: Wed, 18 Mar 2026 07:16:28 +0800
Subject: [PATCH 315/334] [DEV] fix(megatron-fsdp): build expt_device_mesh only
 for MoE models (#3832)

Co-authored-by: Xin Yao <xiny@nvidia.com>
---
 .../distributed/fsdp/mcore_fsdp_adapter.py    |  6 ++-
 .../fsdp/src/megatron_fsdp/utils.py           |  4 --
 .../test_mcore_fully_sharded_data_parallel.py | 46 +++++++++++++++++++
 3 files changed, 50 insertions(+), 6 deletions(-)

diff --git a/megatron/core/distributed/fsdp/mcore_fsdp_adapter.py b/megatron/core/distributed/fsdp/mcore_fsdp_adapter.py
index 0fa990466b4..d9af69eb41b 100644
--- a/megatron/core/distributed/fsdp/mcore_fsdp_adapter.py
+++ b/megatron/core/distributed/fsdp/mcore_fsdp_adapter.py
@@ -76,6 +76,8 @@ def __init__(
         if has_config_logger_enabled(config):
             log_config_to_disk(config, locals(), prefix=type(self).__name__)
 
+        self.num_moe_experts = getattr(config, "num_moe_experts", None)
+
         self.ddp_config = ddp_config
         log_single_rank(
             logger,
@@ -260,7 +262,7 @@ def _init_dist_index(self, pg_collection):
             expt_tp_group = single_rank_group
 
         if enable_hsdp:
-            if expt_dp_group is not None:
+            if self.num_moe_experts is not None:
                 expt_mesh = _get_hsdp_tp_mesh(
                     outer_fsdp_group, expt_dp_group, expt_tp_group, ep_size=ep_group.size()
                 )
@@ -289,7 +291,7 @@ def _init_dist_index(self, pg_collection):
                 expt_device_mesh=expt_device_mesh,
             )
         else:
-            if ep_group is not None:
+            if self.num_moe_experts is not None:
                 expt_mesh = _get_dp_tp_mesh(expt_dp_group, expt_tp_group, ep_size=ep_group.size())
                 expt_device_mesh = DeviceMesh.from_group(
                     [expt_dp_group, expt_tp_group],
diff --git a/megatron/core/distributed/fsdp/src/megatron_fsdp/utils.py b/megatron/core/distributed/fsdp/src/megatron_fsdp/utils.py
index ad29956e1b0..20aee12e394 100644
--- a/megatron/core/distributed/fsdp/src/megatron_fsdp/utils.py
+++ b/megatron/core/distributed/fsdp/src/megatron_fsdp/utils.py
@@ -486,10 +486,6 @@ def __init__(
         self.hsdp_outer_dp_shard = hsdp_outer_dp_shard
         self.expt_device_mesh = expt_device_mesh
 
-        # Handling the situation where M-Core MoE EP=1
-        if self.expt_device_mesh is None:
-            self.expt_device_mesh = device_mesh
-
         # Hybrid FSDP Process Groups
         # Retrieve the FSDP process group from the DeviceMesh.
         self.fsdp_group = (
diff --git a/tests/unit_tests/distributed/megatron_fsdp/test_mcore_fully_sharded_data_parallel.py b/tests/unit_tests/distributed/megatron_fsdp/test_mcore_fully_sharded_data_parallel.py
index d4c664cda9c..d88abb20514 100644
--- a/tests/unit_tests/distributed/megatron_fsdp/test_mcore_fully_sharded_data_parallel.py
+++ b/tests/unit_tests/distributed/megatron_fsdp/test_mcore_fully_sharded_data_parallel.py
@@ -225,6 +225,52 @@ def train_step(model, optimizer, inputs):
                 msg=f"Parameters for {name1} don't match",
             )
 
+    def test_fsdp_expt_device_mesh(self):
+        """Test that expt_device_mesh is None for dense models and not None for MoE models."""
+        if not is_torch_min_version("2.4.0"):
+            pytest.skip("Megatron FSDP requires torch >= 2.4.0")
+
+        fsdp_config = DistributedDataParallelConfig(
+            data_parallel_sharding_strategy="optim_grads_params",
+            overlap_grad_reduce=True,
+            overlap_param_gather=True,
+            bucket_size=10000,
+            use_megatron_fsdp=True,
+        )
+        input_dim, output_dim = 13, 17
+
+        # Dense model: expt_device_mesh should not be built without MoE config
+        dense_config = TransformerConfig(
+            num_attention_heads=1, num_layers=1, context_parallel_size=1
+        )
+        dense_model = TestModel(input_dim=input_dim, output_dim=output_dim).cuda()
+        fsdp_dense = FullyShardedDataParallel(
+            config=dense_config,
+            ddp_config=fsdp_config,
+            module=dense_model,
+            fsdp_unit_modules=[torch.nn.Linear],
+        )
+        assert (
+            fsdp_dense.megatron_fsdp_dist_index.expt_device_mesh is None
+        ), "Dense model: expt_device_mesh should be None"
+        fsdp_dense.stop_communication()
+
+        # MoE model: expt_device_mesh should be built when num_moe_experts is set
+        moe_config = TransformerConfig(
+            num_attention_heads=1, num_layers=1, context_parallel_size=1, num_moe_experts=4
+        )
+        moe_model = TestModel(input_dim=input_dim, output_dim=output_dim).cuda()
+        fsdp_moe = FullyShardedDataParallel(
+            config=moe_config,
+            ddp_config=fsdp_config,
+            module=moe_model,
+            fsdp_unit_modules=[torch.nn.Linear],
+        )
+        assert (
+            fsdp_moe.megatron_fsdp_dist_index.expt_device_mesh is not None
+        ), "MoE model: expt_device_mesh should not be None"
+        fsdp_moe.stop_communication()
+
     # Testing fsdp_double_buffer with and without nccl_ub
     @pytest.mark.parametrize(
         ("dp_size", "nccl_ub", "fsdp_double_buffer", "fsdp_manual_registration"),

From a9e5bf9409fd093ca82aa9817cb16030760f0896 Mon Sep 17 00:00:00 2001
From: Pingtian Li <158665726+Wohox@users.noreply.github.com>
Date: Wed, 18 Mar 2026 21:54:34 +0800
Subject: [PATCH 316/334] [Fix][Dev] Missing Assertion for moe layer recomptue
 in A2A Overlap (#3916)

---
 megatron/core/transformer/transformer_config.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/megatron/core/transformer/transformer_config.py b/megatron/core/transformer/transformer_config.py
index d055b7d96cb..f55de2ae2ff 100644
--- a/megatron/core/transformer/transformer_config.py
+++ b/megatron/core/transformer/transformer_config.py
@@ -2017,6 +2017,9 @@ def __post_init__(self):
             assert (
                 self.recompute_num_layers is None
             ), 'recompute_num_layers must be None when enabling overlap_moe_expert_parallel_comm'
+            assert (
+                "moe" not in self.recompute_modules
+            ), 'disable moe in recompute_modules when enabling overlap_moe_expert_parallel_comm'
 
             # Check if bf16 or fp16 is used
             assert (

From ebf1508100d4123293a242b84c76a937317d7894 Mon Sep 17 00:00:00 2001
From: Charlie Truong <chtruong@nvidia.com>
Date: Wed, 18 Mar 2026 22:18:50 -0500
Subject: [PATCH 317/334] ci: Fix sso users check (#3937)

Signed-off-by: Charlie Truong <chtruong@nvidia.com>
---
 .github/workflows/cicd-main.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml
index 2cc025baf99..3968f808ff6 100644
--- a/.github/workflows/cicd-main.yml
+++ b/.github/workflows/cicd-main.yml
@@ -67,6 +67,7 @@ jobs:
         with:
           username: ${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').user.login }}
           github_token: ${{ secrets.NVIDIA_MANAGEMENT_ORG_PAT }}
+          sso_users_filename: ${{ vars.SSO_USERS_FILENAME }}
 
       - name: Set maintainer status
         id: check-membership

From 8ae70d4fe6e5711cc70f028d003689e359fa1290 Mon Sep 17 00:00:00 2001
From: Hao Wu <skyw@users.noreply.github.com>
Date: Thu, 19 Mar 2026 05:49:27 -0700
Subject: [PATCH 318/334] Add more emerging optimizers (#3907)

Signed-off-by: Hao Wu <skyw@nvidia.com>
Signed-off-by: Deyu Fu <deyuf@nvidia.com>
Co-authored-by: Deyu Fu <deyuf@nvidia.com>
---
 .../core/optimizer/emerging_optimizers.py     | 131 +++-
 megatron/core/optimizer/optimizer_config.py   |   9 +
 megatron/training/arguments.py                |   2 +-
 pyproject.toml                                |   2 +-
 tests/unit_tests/test_emerging_optimizers.py  | 626 +++++++++++++++++-
 uv.lock                                       | 204 +-----
 6 files changed, 748 insertions(+), 226 deletions(-)

diff --git a/megatron/core/optimizer/emerging_optimizers.py b/megatron/core/optimizer/emerging_optimizers.py
index f10c79dc4be..b49bf8b9884 100644
--- a/megatron/core/optimizer/emerging_optimizers.py
+++ b/megatron/core/optimizer/emerging_optimizers.py
@@ -24,18 +24,21 @@
 try:
     from emerging_optimizers import registry
     from emerging_optimizers.orthogonalized_optimizers import (
+        AdaptiveMuon,
         OrthogonalizedOptimizer,
         get_muon_scale_factor,
     )
     from emerging_optimizers.orthogonalized_optimizers.muon_utils import newton_schulz_tp
+    from emerging_optimizers.scalar_optimizers import Lion  # pylint: disable=unused-import
 
-    # It is necessary to import SOAP for the registry to work.
+    # It is necessary to import optimizers for the registry to work.
     from emerging_optimizers.soap import SOAP  # pylint: disable=unused-import
 
     HAVE_EMERGING_OPTIMIZERS = True
 except ImportError:
     HAVE_EMERGING_OPTIMIZERS = False
     OrthogonalizedOptimizer = object
+    AdaptiveMuon = object
 
 
 logger = logging.getLogger(__name__)
@@ -46,6 +49,22 @@
 # ===========================================================================
 
 
+def _eopt_init_state_fn(opt, config=None):
+    """Initialize emerging optimizer state for torch_dist checkpoint format."""
+    for group in opt.param_groups:
+        # Checkpoint init needs state for all parameters, including those without grads yet.
+        opt._init_group(group, skip_non_grad_params=False)
+
+
+def _default_param_overrides_factory() -> Dict[ParamKey, Dict[str, Any]]:
+    """Default param overrides: route non-linear/embedding params to Adam."""
+    return {
+        ParamKey(
+            predicate=ParamPredicate(name="nonlinear_or_embedding", fn=_is_nonlinear_or_embedding)
+        ): {'optimizer': 'adam'}
+    }
+
+
 @dataclass
 class EmergingOptimizerEntry:
     """Everything needed to create and configure an emerging optimizer.
@@ -59,9 +78,11 @@ class EmergingOptimizerEntry:
     """
 
     optimizer_cls: type
-    init_state_fn: Callable
-    config_to_kwargs: Callable | None
-    default_param_overrides: Dict[ParamKey, Dict[str, Any]] = field(default_factory=dict)
+    init_state_fn: Callable = _eopt_init_state_fn
+    config_to_kwargs: Callable | None = None
+    default_param_overrides: Dict[ParamKey, Dict[str, Any]] = field(
+        default_factory=_default_param_overrides_factory
+    )
 
 
 def _create_emerging_optimizer(config, param_groups, eopt_name, model_chunks, pg_collection):
@@ -166,7 +187,11 @@ def scaled_orthogonalize_fn(
         self.qkv_split_shapes = qkv_split_shapes
 
         weight_decay_method = "decoupled" if use_decoupled_weight_decay else "l2"
-        super().__init__(
+        # Use explicit class call instead of super() so that subclasses with
+        # multiple inheritance (e.g. TensorParallelAdaptiveMuon) don't route
+        # through an intermediate class that doesn't accept scaled_orthogonalize_fn.
+        OrthogonalizedOptimizer.__init__(
+            self,
             params,
             lr,
             momentum,
@@ -229,11 +254,60 @@ def orthogonalize(self, p: torch.Tensor, grad: torch.Tensor, **kwargs: Any) -> t
         return grad
 
 
-def _eopt_init_state_fn(opt, config=None):
-    """Initialize emerging optimizer state for torch_dist checkpoint format."""
-    for group in opt.param_groups:
-        # Checkpoint init needs state for all parameters, including those without grads yet.
-        opt._init_group(group, skip_non_grad_params=False)
+class TensorParallelAdaptiveMuon(TensorParallelMuon, AdaptiveMuon):
+    """Tensor Parallel Adaptive Muon optimizer."""
+
+    def __init__(
+        self,
+        params: ParamsT,
+        lr: float = 3e-4,
+        momentum: float = 0.95,
+        nesterov: bool = True,
+        weight_decay: float = 0.01,
+        use_decoupled_weight_decay: bool = True,
+        split_qkv: bool = False,
+        is_qkv_fn: Callable[[torch.Tensor], bool] | None = None,
+        qkv_split_shapes: tuple[int, int, int] | None = None,
+        fp32_matmul_prec: str = "medium",
+        coefficient_type: str = "quintic",
+        num_ns_steps: int = 5,
+        scale_mode: str = "spectral",
+        extra_scale_factor: float = 1.0,
+        pg_collection: Optional[ProcessGroupCollection] = None,
+        tp_mode: Literal["blockwise", "duplicated", "distributed"] = "duplicated",
+        moment2_method: Literal["adamuon", "normuon"] = "adamuon",
+        beta2: float = 0.95,
+        eps: float = 1e-8,
+    ) -> None:
+        TensorParallelMuon.__init__(
+            self,
+            params,
+            lr=lr,
+            momentum=momentum,
+            nesterov=nesterov,
+            weight_decay=weight_decay,
+            use_decoupled_weight_decay=use_decoupled_weight_decay,
+            split_qkv=split_qkv,
+            is_qkv_fn=is_qkv_fn,
+            qkv_split_shapes=qkv_split_shapes,
+            fp32_matmul_prec=fp32_matmul_prec,
+            coefficient_type=coefficient_type,
+            num_ns_steps=num_ns_steps,
+            scale_mode=scale_mode,
+            extra_scale_factor=extra_scale_factor,
+            pg_collection=pg_collection,
+            tp_mode=tp_mode,
+        )
+        self.moment2_method = moment2_method
+
+        for group in self.param_groups:
+            group.setdefault("beta2", beta2)
+            group.setdefault("eps", eps)
+
+    @torch.no_grad()  # type: ignore[misc]
+    def step(self, closure: Optional[Callable] = None) -> Optional[float]:
+        """Step function"""
+        return AdaptiveMuon.step(self, closure)
 
 
 def _kwargs_from_config(optimizer_cls: type, prefix: str, config) -> Dict[str, Any]:
@@ -266,6 +340,13 @@ def _muon_config_to_kwargs(config, model_chunks, pg_collection) -> Dict[str, Any
     return kwargs
 
 
+def _adaptive_muon_config_to_kwargs(config, model_chunks, pg_collection) -> Dict[str, Any]:
+    """Convert OptimizerConfig to TensorParallelAdaptiveMuon constructor kwargs."""
+    kwargs = _muon_config_to_kwargs(config, model_chunks, pg_collection)
+    kwargs.update(_kwargs_from_config(TensorParallelAdaptiveMuon, "adaptive_muon", config))
+    return kwargs
+
+
 def _default_adam_based_eopt_config_to_kwargs(
     eopt_name, config, model_chunks, pg_collection
 ) -> Dict[str, Any]:
@@ -280,34 +361,20 @@ def _default_adam_based_eopt_config_to_kwargs(
 # -----------------------------------------------------------------------
 _EMERGING_OPTIMIZERS = {
     'muon': EmergingOptimizerEntry(
-        optimizer_cls=TensorParallelMuon,
-        init_state_fn=_eopt_init_state_fn,
-        config_to_kwargs=_muon_config_to_kwargs,
-        default_param_overrides={
-            ParamKey(
-                predicate=ParamPredicate(
-                    name="nonlinear_or_embedding", fn=_is_nonlinear_or_embedding
-                )
-            ): {'optimizer': 'adam'}
-        },
-    )
+        optimizer_cls=TensorParallelMuon, config_to_kwargs=_muon_config_to_kwargs
+    ),
+    "adaptive_muon": EmergingOptimizerEntry(
+        optimizer_cls=TensorParallelAdaptiveMuon, config_to_kwargs=_adaptive_muon_config_to_kwargs
+    ),
 }
 
 # Register soap with default config
 # TODO(skyw): register all emerging optimizers.
 if HAVE_EMERGING_OPTIMIZERS:
-    for eopt_name in ["soap"]:
+    for eopt_name in registry.get_optimizer_name_list():
         if eopt_name in _EMERGING_OPTIMIZERS:
+            # skip already registered local versions, e.g. TensorParallel versions.
             continue
         _EMERGING_OPTIMIZERS[eopt_name] = EmergingOptimizerEntry(
-            optimizer_cls=registry.get_optimizer_cls(eopt_name),
-            init_state_fn=_eopt_init_state_fn,
-            config_to_kwargs=None,
-            default_param_overrides={
-                ParamKey(
-                    predicate=ParamPredicate(
-                        name="nonlinear_or_embedding", fn=_is_nonlinear_or_embedding
-                    )
-                ): {'optimizer': 'adam'}
-            },
+            optimizer_cls=registry.get_optimizer_cls(eopt_name)
         )
diff --git a/megatron/core/optimizer/optimizer_config.py b/megatron/core/optimizer/optimizer_config.py
index e10fd7852c7..84dcc5d6965 100644
--- a/megatron/core/optimizer/optimizer_config.py
+++ b/megatron/core/optimizer/optimizer_config.py
@@ -289,6 +289,15 @@ class OptimizerConfig:
     soap_use_kl_shampoo: bool = True
     """Whether to use the KL-Shampoo preconditioner."""
 
+    adaptive_muon_moment2_method: str = "adamuon"
+    """The method to use for the moment2 update in Adaptive Muon optimizer."""
+
+    adaptive_muon_beta2: float = 0.95
+    """The beta2 parameter for the Adaptive Muon optimizer."""
+
+    adaptive_muon_eps: float = 1e-8
+    """The eps parameter for the Adaptive Muon optimizer."""
+
     #######################
     # Distributed optimizer
     #######################
diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py
index eb91fa11cc0..c1bb0f8ac0d 100644
--- a/megatron/training/arguments.py
+++ b/megatron/training/arguments.py
@@ -2256,7 +2256,7 @@ def _add_training_args(parser):
                        help='use FlashAttention implementation of attention. '
                        'https://arxiv.org/abs/2205.14135')
     group.add_argument('--optimizer', type=str, default='adam',
-                       choices=['adam', 'sgd', 'muon', 'dist_muon', 'soap'],
+                       choices=['adam', 'sgd', 'muon', 'dist_muon', 'soap', "adaptive_muon", "lion"],
                        help='Optimizer function. '
                             'Note: dist_muon is deprecated; use --optimizer muon '
                             'with --use-distributed-optimizer instead.')
diff --git a/pyproject.toml b/pyproject.toml
index 3a9d27b6a81..7ce7e3e17c6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -190,7 +190,7 @@ flash_mla = [
 ]
 transformer-engine = { git = "https://github.com/NVIDIA/TransformerEngine.git", rev = "5671fd3675906cda1ade26c24a65d3dedd88eb89" }
 nemo-run = { git = "https://github.com/NVIDIA-NeMo/Run.git", rev = "01a9a8ba360f7b2908728ad0516e0ad9d936966d" }
-emerging_optimizers = { git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git", rev = "bc634ff8c0cf4fb5dbae0a531081281b499be3a0" }
+emerging_optimizers = { git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git", rev = "v0.2.0" }
 fast-hadamard-transform = { git = "https://github.com/Dao-AILab/fast-hadamard-transform.git", rev = "f134af63deb2df17e1171a9ec1ea4a7d8604d5ca" }
 
 [tool.isort]
diff --git a/tests/unit_tests/test_emerging_optimizers.py b/tests/unit_tests/test_emerging_optimizers.py
index 8fbac85c99f..53d780fd832 100644
--- a/tests/unit_tests/test_emerging_optimizers.py
+++ b/tests/unit_tests/test_emerging_optimizers.py
@@ -11,15 +11,21 @@
 from megatron.core import parallel_state
 from megatron.core.distributed import DistributedDataParallel, DistributedDataParallelConfig
 from megatron.core.optimizer import OptimizerConfig, get_megatron_optimizer
-from megatron.core.optimizer.emerging_optimizers import HAVE_EMERGING_OPTIMIZERS, TensorParallelMuon
+from megatron.core.optimizer.emerging_optimizers import (
+    HAVE_EMERGING_OPTIMIZERS,
+    TensorParallelAdaptiveMuon,
+    TensorParallelMuon,
+)
 from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.transformer import TransformerConfig
 from tests.unit_tests.test_utilities import Utils
 
 if HAVE_EMERGING_OPTIMIZERS:
+    from emerging_optimizers.scalar_optimizers import Lion
     from emerging_optimizers.soap import SOAP
 else:
     SOAP = None
+    Lion = None
 
 # Skip all tests in this file for LTS versions
 pytestmark = pytest.mark.skipif(
@@ -663,6 +669,453 @@ def test_muon_optimizer_num_ns_steps(num_ns_steps):
     ), f"Weight should be updated with num_ns_steps={num_ns_steps}"
 
 
+# ===========================================================================
+# Adaptive Muon optimizer tests
+# ===========================================================================
+
+
+def test_adaptive_muon_optimizer_smoke():
+    """Smoke test for TensorParallelAdaptiveMuon optimizer."""
+    model = torch.nn.Linear(100, 50, bias=False, dtype=torch.float32, device='cuda')
+    model.requires_grad_(True)
+    model.weight.data.fill_(1.0)
+
+    optimizer = TensorParallelAdaptiveMuon(
+        params=[model.weight],
+        lr=0.01,
+        momentum=0.95,
+        nesterov=True,
+        weight_decay=0.01,
+        use_decoupled_weight_decay=True,
+        split_qkv=False,
+        fp32_matmul_prec="medium",
+        num_ns_steps=5,
+        scale_mode="spectral",
+        extra_scale_factor=1.0,
+        pg_collection=None,
+        tp_mode="duplicated",
+        moment2_method="adamuon",
+        beta2=0.95,
+        eps=1e-8,
+    )
+
+    assert optimizer is not None
+    assert hasattr(optimizer, 'param_groups')
+    assert len(optimizer.param_groups) > 0
+
+    input_tensor = torch.randn(32, 100, dtype=torch.float32, device='cuda')
+    output = model(input_tensor)
+    loss = output.sum()
+    loss.backward()
+
+    original_weight = model.weight.data.clone()
+    optimizer.step()
+
+    assert not torch.equal(
+        model.weight.data, original_weight
+    ), "Weight should be updated after optimizer step"
+
+    optimizer.zero_grad()
+    assert model.weight.grad is None or torch.all(
+        model.weight.grad == 0
+    ), "Gradients should be zeroed"
+
+    state_dict = optimizer.state_dict()
+    assert 'state' in state_dict
+    assert 'param_groups' in state_dict
+    optimizer.load_state_dict(state_dict)
+
+
+@pytest.mark.parametrize("mode", ["duplicated", "blockwise", "distributed"])
+def test_adaptive_muon_optimizer_different_modes_single_rank(mode):
+    """Test TensorParallelAdaptiveMuon with different modes on single rank."""
+    torch.manual_seed(42)
+    torch.cuda.manual_seed(42)
+
+    model = torch.nn.Linear(100, 50, bias=False, dtype=torch.float32, device='cuda')
+    model.requires_grad_(True)
+    model.weight.data.normal_(0, 0.02)
+
+    optimizer = TensorParallelAdaptiveMuon(
+        params=[model.weight],
+        lr=0.01,
+        momentum=0.95,
+        weight_decay=0.0,
+        num_ns_steps=5,
+        pg_collection=None,
+        tp_mode=mode,
+    )
+
+    torch.manual_seed(42)
+    input_tensor = torch.randn(32, 100, dtype=torch.float32, device='cuda')
+
+    output = model(input_tensor)
+    loss = output.sum()
+    loss.backward()
+
+    original_weight = model.weight.data.clone()
+    optimizer.step()
+
+    assert not torch.equal(
+        model.weight.data, original_weight
+    ), f"Weight should be updated with mode={mode}"
+
+
+@pytest.mark.parametrize("moment2_method", ["adamuon", "normuon"])
+def test_adaptive_muon_optimizer_moment2_methods(moment2_method):
+    """Test TensorParallelAdaptiveMuon with different moment2 methods."""
+    model = torch.nn.Linear(80, 40, bias=False, dtype=torch.float32, device='cuda')
+    model.requires_grad_(True)
+    model.weight.data.fill_(1.0)
+
+    optimizer = TensorParallelAdaptiveMuon(
+        params=[model.weight],
+        lr=0.01,
+        num_ns_steps=5,
+        pg_collection=None,
+        tp_mode="duplicated",
+        moment2_method=moment2_method,
+    )
+
+    input_tensor = torch.randn(16, 80, dtype=torch.float32, device='cuda')
+    output = model(input_tensor)
+    loss = output.sum()
+    loss.backward()
+
+    original_weight = model.weight.data.clone()
+    optimizer.step()
+
+    assert not torch.equal(
+        model.weight.data, original_weight
+    ), f"Weight should be updated with moment2_method={moment2_method}"
+
+
+@pytest.mark.parametrize("beta2", [0.5, 0.95, 0.999])
+def test_adaptive_muon_optimizer_beta2(beta2):
+    """Test TensorParallelAdaptiveMuon with different beta2 values."""
+    model = torch.nn.Linear(60, 30, bias=False, dtype=torch.float32, device='cuda')
+    model.requires_grad_(True)
+    model.weight.data.fill_(1.0)
+
+    optimizer = TensorParallelAdaptiveMuon(
+        params=[model.weight],
+        lr=0.01,
+        num_ns_steps=5,
+        pg_collection=None,
+        tp_mode="duplicated",
+        beta2=beta2,
+    )
+
+    input_tensor = torch.randn(16, 60, dtype=torch.float32, device='cuda')
+    output = model(input_tensor)
+    loss = output.sum()
+    loss.backward()
+
+    original_weight = model.weight.data.clone()
+    optimizer.step()
+
+    assert not torch.equal(
+        model.weight.data, original_weight
+    ), f"Weight should be updated with beta2={beta2}"
+
+
+def test_adaptive_muon_optimizer_multiple_steps():
+    """Test TensorParallelAdaptiveMuon across multiple optimization steps."""
+    model = torch.nn.Linear(100, 50, bias=False, dtype=torch.float32, device='cuda')
+    model.requires_grad_(True)
+    model.weight.data.fill_(1.0)
+
+    optimizer = TensorParallelAdaptiveMuon(
+        params=[model.weight],
+        lr=0.01,
+        momentum=0.95,
+        weight_decay=0.01,
+        num_ns_steps=5,
+        pg_collection=None,
+        tp_mode="duplicated",
+    )
+
+    weights_history = [model.weight.data.clone()]
+
+    for i in range(3):
+        input_tensor = torch.randn(32, 100, dtype=torch.float32, device='cuda')
+        output = model(input_tensor)
+        loss = output.sum()
+        loss.backward()
+
+        optimizer.step()
+        optimizer.zero_grad()
+        weights_history.append(model.weight.data.clone())
+
+    for i in range(len(weights_history) - 1):
+        assert not torch.equal(
+            weights_history[i], weights_history[i + 1]
+        ), f"Weight should change at step {i}"
+
+
+@pytest.mark.parametrize("nesterov", [True, False])
+def test_adaptive_muon_optimizer_nesterov(nesterov):
+    """Test TensorParallelAdaptiveMuon with and without Nesterov momentum."""
+    model = torch.nn.Linear(50, 25, bias=False, dtype=torch.float32, device='cuda')
+    model.requires_grad_(True)
+    model.weight.data.fill_(1.0)
+
+    optimizer = TensorParallelAdaptiveMuon(
+        params=[model.weight],
+        lr=0.01,
+        momentum=0.9,
+        nesterov=nesterov,
+        num_ns_steps=5,
+        pg_collection=None,
+        tp_mode="duplicated",
+    )
+
+    input_tensor = torch.randn(16, 50, dtype=torch.float32, device='cuda')
+    output = model(input_tensor)
+    loss = output.sum()
+    loss.backward()
+
+    original_weight = model.weight.data.clone()
+    optimizer.step()
+
+    assert not torch.equal(
+        model.weight.data, original_weight
+    ), f"Weight should be updated with nesterov={nesterov}"
+
+
+def test_adaptive_muon_optimizer_qkv_split():
+    """Test TensorParallelAdaptiveMuon with QKV splitting."""
+    qkv_size = 3 * 64 * 16  # Combined Q, K, V dimensions
+    hidden_size = 1024
+    model = torch.nn.Linear(hidden_size, qkv_size, bias=False, dtype=torch.float32, device='cuda')
+    model.requires_grad_(True)
+    model.weight.data.fill_(1.0)
+
+    model.weight.is_qkv = True
+    qkv_split_shapes = (64, 64, 64)
+
+    optimizer_split = TensorParallelAdaptiveMuon(
+        params=[model.weight],
+        lr=0.01,
+        split_qkv=True,
+        is_qkv_fn=lambda p: getattr(p, 'is_qkv', False),
+        qkv_split_shapes=qkv_split_shapes,
+        num_ns_steps=5,
+        pg_collection=None,
+        tp_mode="duplicated",
+    )
+
+    input_tensor = torch.randn(16, hidden_size, dtype=torch.float32, device='cuda')
+    output = model(input_tensor)
+    loss = output.sum()
+    loss.backward()
+
+    original_weight = model.weight.data.clone()
+    optimizer_split.step()
+    weight_with_split = model.weight.data.clone()
+
+    assert not torch.equal(
+        weight_with_split, original_weight
+    ), "QKV weight should be updated with split_qkv=True"
+
+    model.weight.data.fill_(1.0)
+    optimizer_no_split = TensorParallelAdaptiveMuon(
+        params=[model.weight],
+        lr=0.01,
+        split_qkv=False,
+        num_ns_steps=5,
+        pg_collection=None,
+        tp_mode="duplicated",
+    )
+
+    output = model(input_tensor)
+    loss = output.sum()
+    loss.backward()
+
+    optimizer_no_split.step()
+    weight_without_split = model.weight.data.clone()
+
+    assert not torch.equal(
+        weight_without_split, original_weight
+    ), "QKV weight should be updated with split_qkv=False"
+
+    assert not torch.equal(
+        weight_with_split, weight_without_split
+    ), "Weights should be different between split_qkv=True and split_qkv=False"
+
+
+@pytest.mark.skipif(
+    int(os.getenv('WORLD_SIZE', '1')) == 1, reason="Multi-rank test requires WORLD_SIZE > 1"
+)
+class TestAdaptiveMuonOptimizerMultiRank:
+    """Test class for Adaptive Muon optimizer with multi-rank setup."""
+
+    @pytest.fixture(autouse=True)
+    def setup_and_teardown(self):
+        """Setup and teardown for each test."""
+        Utils.initialize_model_parallel()
+        yield
+        Utils.destroy_model_parallel()
+
+    def create_ddp_model(self, model):
+        """Wrap model in DDP."""
+        ddp_config = DistributedDataParallelConfig(use_distributed_optimizer=False)
+        return DistributedDataParallel(
+            TransformerConfig(num_attention_heads=1, num_layers=1), ddp_config, model
+        )
+
+    def test_get_megatron_optimizer_adaptive_muon_smoke(self):
+        """Smoke test for get_megatron_optimizer with adaptive_muon."""
+        model = Net().bfloat16().cuda()
+        model.requires_grad_(True)
+        model = self.create_ddp_model(model)
+
+        for param in model.parameters():
+            assert param.requires_grad
+
+        optimizer_config = OptimizerConfig(
+            optimizer='adaptive_muon',
+            lr=0.01,
+            weight_decay=0.01,
+            bf16=True,
+            use_distributed_optimizer=False,
+            muon_momentum=0.95,
+            muon_nesterov=True,
+            muon_fp32_matmul_prec="medium",
+            muon_num_ns_steps=5,
+            muon_scale_mode="spectral",
+            muon_tp_mode="duplicated",
+            adaptive_muon_moment2_method="adamuon",
+            adaptive_muon_beta2=0.95,
+            adaptive_muon_eps=1e-8,
+        )
+
+        optimizer = get_megatron_optimizer(
+            config=optimizer_config, model_chunks=[model], use_gloo_process_groups=True
+        )
+
+        assert optimizer is not None
+        assert hasattr(optimizer, 'param_groups')
+        assert hasattr(optimizer, 'chained_optimizers')
+        assert len(optimizer.chained_optimizers) >= 1
+
+        input_tensor = torch.randn(16, 80, dtype=torch.bfloat16, device='cuda')
+        output = model(input_tensor)
+        loss = output.sum()
+        loss.backward()
+
+        original_params = {}
+        for name, param in model.named_parameters():
+            original_params[name] = param.data.clone()
+
+        optimizer.step()
+
+        params_updated = 0
+        for name, param in model.named_parameters():
+            if not torch.equal(param.data, original_params[name]):
+                params_updated += 1
+
+        assert params_updated > 0, "At least some parameters should be updated after optimizer step"
+
+        optimizer.zero_grad()
+        for param in model.parameters():
+            assert param.grad is None or torch.all(
+                param.grad == 0
+            ), "Gradients should be zeroed for all parameters"
+
+        state_dict = optimizer.state_dict()
+        assert isinstance(state_dict, list)
+        optimizer.load_state_dict(state_dict)
+
+    def test_get_megatron_optimizer_adaptive_muon_validation(self):
+        """Test validation logic for get_megatron_optimizer with adaptive_muon."""
+        model = torch.nn.Linear(100, 50, bias=False, dtype=torch.bfloat16, device='cuda')
+        model.requires_grad_(True)
+        model = self.create_ddp_model(model)
+
+        optimizer_config_fp16 = OptimizerConfig(
+            optimizer='adaptive_muon', lr=0.01, fp16=True, use_distributed_optimizer=False
+        )
+
+        with pytest.raises(Exception, match='emerging optimizer with fp16 is not supported'):
+            get_megatron_optimizer(config=optimizer_config_fp16, model_chunks=[model])
+
+
+@pytest.mark.skipif(
+    int(os.getenv('WORLD_SIZE', '1')) == 1, reason="Multi-rank test requires WORLD_SIZE > 1"
+)
+class TestAdaptiveMuonOptimizerMultiRankTP:
+    """Test class for Adaptive Muon optimizer with multi-rank and tensor parallel setup."""
+
+    @pytest.fixture(autouse=True)
+    def setup_and_teardown(self):
+        """Setup and teardown for each test with tensor parallel."""
+        world = int(os.getenv('WORLD_SIZE', '1'))
+        Utils.initialize_model_parallel(tensor_model_parallel_size=min(world, 2))
+        yield
+        Utils.destroy_model_parallel()
+
+    def create_tp_model_and_optimizer(self, mode):
+        """Create model with TP and optimizer."""
+        rank = int(os.getenv('RANK', '0'))
+        pg_collection = ProcessGroupCollection.use_mpu_process_groups()
+
+        torch.manual_seed(42 + rank)
+        model = torch.nn.Linear(100, 50, bias=False, dtype=torch.float32, device='cuda')
+        model.requires_grad_(True)
+        model.weight.data.normal_(0, 0.02)
+        model.weight.partition_dim = 0
+
+        optimizer = TensorParallelAdaptiveMuon(
+            params=[model.weight],
+            lr=0.01,
+            momentum=0.95,
+            weight_decay=0.0,
+            num_ns_steps=5,
+            pg_collection=pg_collection,
+            tp_mode=mode,
+        )
+
+        return model, optimizer
+
+    @pytest.mark.parametrize("mode", ["duplicated", "distributed"])
+    def test_adaptive_muon_optimizer_modes_multirank_same_result(self, mode):
+        """Test that duplicated and distributed modes produce same results with TP > 1."""
+        model, optimizer = self.create_tp_model_and_optimizer(mode)
+
+        torch.manual_seed(42)
+        input_tensor = torch.randn(32, 100, dtype=torch.float32, device='cuda')
+
+        output = model(input_tensor)
+        loss = output.sum()
+        loss.backward()
+
+        original_weight = model.weight.data.clone()
+        optimizer.step()
+
+        assert not torch.equal(
+            model.weight.data, original_weight
+        ), f"Weight should be updated with mode={mode}"
+
+    def test_adaptive_muon_optimizer_blockwise_mode(self):
+        """Test that blockwise mode works with TP > 1."""
+        model, optimizer = self.create_tp_model_and_optimizer("blockwise")
+
+        torch.manual_seed(42)
+        input_tensor = torch.randn(32, 100, dtype=torch.float32, device='cuda')
+
+        output = model(input_tensor)
+        loss = output.sum()
+        loss.backward()
+
+        original_weight = model.weight.data.clone()
+        optimizer.step()
+
+        assert not torch.equal(
+            model.weight.data, original_weight
+        ), "Weight should be updated with mode=blockwise"
+
+
 # ===========================================================================
 # SOAP optimizer tests
 # ===========================================================================
@@ -948,3 +1401,174 @@ def test_get_megatron_optimizer_soap_validation(self):
 
         with pytest.raises(Exception, match='emerging optimizer with fp16 is not supported'):
             get_megatron_optimizer(config=optimizer_config_fp16, model_chunks=[model])
+
+
+# ===========================================================================
+# Lion optimizer tests
+# ===========================================================================
+
+skip_no_lion = pytest.mark.skipif(
+    not HAVE_EMERGING_OPTIMIZERS, reason="emerging_optimizers package not installed"
+)
+
+
+@skip_no_lion
+def test_lion_optimizer_smoke():
+    """Smoke test for Lion optimizer."""
+    model = torch.nn.Linear(100, 50, bias=False, dtype=torch.float32, device='cuda')
+    model.requires_grad_(True)
+    model.weight.data.fill_(1.0)
+
+    optimizer = Lion(params=[model.weight], lr=1e-4, betas=(0.9, 0.99), weight_decay=0.01)
+
+    assert optimizer is not None
+    assert hasattr(optimizer, 'param_groups')
+    assert len(optimizer.param_groups) > 0
+
+    input_tensor = torch.randn(32, 100, dtype=torch.float32, device='cuda')
+    output = model(input_tensor)
+    loss = output.sum()
+    loss.backward()
+
+    original_weight = model.weight.data.clone()
+    optimizer.step()
+
+    assert not torch.equal(
+        model.weight.data, original_weight
+    ), "Weight should be updated after optimizer step"
+
+    optimizer.zero_grad()
+    assert model.weight.grad is None or torch.all(
+        model.weight.grad == 0
+    ), "Gradients should be zeroed"
+
+    state_dict = optimizer.state_dict()
+    assert 'state' in state_dict
+    assert 'param_groups' in state_dict
+    optimizer.load_state_dict(state_dict)
+
+
+@skip_no_lion
+def test_lion_optimizer_multiple_steps():
+    """Test Lion optimizer across multiple optimization steps."""
+    model = torch.nn.Linear(100, 50, bias=False, dtype=torch.float32, device='cuda')
+    model.requires_grad_(True)
+    model.weight.data.fill_(1.0)
+
+    optimizer = Lion(params=[model.weight], lr=1e-4, betas=(0.9, 0.99), weight_decay=0.01)
+
+    weights_history = [model.weight.data.clone()]
+
+    for i in range(3):
+        input_tensor = torch.randn(32, 100, dtype=torch.float32, device='cuda')
+        output = model(input_tensor)
+        loss = output.sum()
+        loss.backward()
+
+        optimizer.step()
+        optimizer.zero_grad()
+        weights_history.append(model.weight.data.clone())
+
+    for i in range(len(weights_history) - 1):
+        assert not torch.equal(
+            weights_history[i], weights_history[i + 1]
+        ), f"Weight should change at step {i}"
+
+
+@skip_no_lion
+@pytest.mark.parametrize("betas", [(0.9, 0.99), (0.95, 0.999), (0.5, 0.9)])
+def test_lion_optimizer_betas(betas):
+    """Test Lion optimizer with different beta values."""
+    model = torch.nn.Linear(80, 40, bias=False, dtype=torch.float32, device='cuda')
+    model.requires_grad_(True)
+    model.weight.data.fill_(1.0)
+
+    optimizer = Lion(params=[model.weight], lr=1e-4, betas=betas)
+
+    input_tensor = torch.randn(16, 80, dtype=torch.float32, device='cuda')
+    output = model(input_tensor)
+    loss = output.sum()
+    loss.backward()
+
+    original_weight = model.weight.data.clone()
+    optimizer.step()
+
+    assert not torch.equal(
+        model.weight.data, original_weight
+    ), f"Weight should be updated with betas={betas}"
+
+
+@skip_no_lion
+@pytest.mark.parametrize("weight_decay", [0.0, 0.01, 0.1])
+def test_lion_optimizer_weight_decay(weight_decay):
+    """Test Lion optimizer with different weight decay values."""
+    model = torch.nn.Linear(60, 30, bias=False, dtype=torch.float32, device='cuda')
+    model.requires_grad_(True)
+    model.weight.data.fill_(1.0)
+
+    optimizer = Lion(params=[model.weight], lr=1e-4, betas=(0.9, 0.99), weight_decay=weight_decay)
+
+    input_tensor = torch.randn(16, 60, dtype=torch.float32, device='cuda')
+    output = model(input_tensor)
+    loss = output.sum()
+    loss.backward()
+
+    original_weight = model.weight.data.clone()
+    optimizer.step()
+
+    assert not torch.equal(
+        model.weight.data, original_weight
+    ), f"Weight should be updated with weight_decay={weight_decay}"
+
+
+@skip_no_lion
+@pytest.mark.parametrize("weight_decay_method", ["decoupled", "l2"])
+def test_lion_optimizer_weight_decay_method(weight_decay_method):
+    """Test Lion optimizer with different weight decay methods."""
+    model = torch.nn.Linear(60, 30, bias=False, dtype=torch.float32, device='cuda')
+    model.requires_grad_(True)
+    model.weight.data.fill_(1.0)
+
+    optimizer = Lion(
+        params=[model.weight],
+        lr=1e-4,
+        betas=(0.9, 0.99),
+        weight_decay=0.01,
+        weight_decay_method=weight_decay_method,
+    )
+
+    input_tensor = torch.randn(16, 60, dtype=torch.float32, device='cuda')
+    output = model(input_tensor)
+    loss = output.sum()
+    loss.backward()
+
+    original_weight = model.weight.data.clone()
+    optimizer.step()
+
+    assert not torch.equal(
+        model.weight.data, original_weight
+    ), f"Weight should be updated with weight_decay_method={weight_decay_method}"
+
+
+@skip_no_lion
+def test_lion_optimizer_multi_layer_net():
+    """Test Lion optimizer with the multi-layer Net model."""
+    model = Net().cuda()
+    model.requires_grad_(True)
+
+    optimizer = Lion(params=model.parameters(), lr=1e-4, betas=(0.9, 0.99), weight_decay=0.01)
+
+    input_tensor = torch.randn(16, 80, dtype=torch.float32, device='cuda')
+    output = model(input_tensor)
+    loss = output.sum()
+    loss.backward()
+
+    original_params = {name: p.data.clone() for name, p in model.named_parameters()}
+    optimizer.step()
+
+    params_updated = 0
+    for name, param in model.named_parameters():
+        if not torch.equal(param.data, original_params[name]):
+            params_updated += 1
+
+    assert params_updated > 0, "At least some parameters should be updated after optimizer step"
diff --git a/uv.lock b/uv.lock
index d1e17d67196..129f94b4288 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1411,7 +1411,7 @@ wheels = [
 [[package]]
 name = "emerging-optimizers"
 version = "0.2.0"
-source = { git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git?rev=bc634ff8c0cf4fb5dbae0a531081281b499be3a0#bc634ff8c0cf4fb5dbae0a531081281b499be3a0" }
+source = { git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git?rev=v0.2.0#1effa026ff096b7fa1063ca2fba19d98be6e6cdf" }
 dependencies = [
     { name = "absl-py", marker = "python_full_version >= '3.12' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "torch", marker = "(python_full_version >= '3.12' and sys_platform == 'never') or (python_full_version < '3.12' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'never' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
@@ -2748,8 +2748,8 @@ requires-dist = [
     { name = "datasets", marker = "extra == 'lts'" },
     { name = "einops", marker = "extra == 'dev'", specifier = "~=0.8" },
     { name = "einops", marker = "extra == 'lts'", specifier = "~=0.8" },
-    { name = "emerging-optimizers", marker = "python_full_version >= '3.12' and extra == 'dev'", git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git?rev=bc634ff8c0cf4fb5dbae0a531081281b499be3a0" },
-    { name = "emerging-optimizers", marker = "python_full_version >= '3.12' and extra == 'lts'", git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git?rev=bc634ff8c0cf4fb5dbae0a531081281b499be3a0" },
+    { name = "emerging-optimizers", marker = "python_full_version >= '3.12' and extra == 'dev'", git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git?rev=v0.2.0" },
+    { name = "emerging-optimizers", marker = "python_full_version >= '3.12' and extra == 'lts'", git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git?rev=v0.2.0" },
     { name = "fastapi", marker = "extra == 'dev'", specifier = "~=0.50" },
     { name = "fastapi", marker = "extra == 'lts'", specifier = "~=0.50" },
     { name = "flash-linear-attention", marker = "extra == 'dev'", specifier = "~=0.4.0" },
@@ -2823,7 +2823,7 @@ linting = [
     { name = "ruff", specifier = "~=0.9.0" },
 ]
 no-pypi-wheels = [
-    { name = "emerging-optimizers", marker = "python_full_version >= '3.12'", git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git?rev=bc634ff8c0cf4fb5dbae0a531081281b499be3a0" },
+    { name = "emerging-optimizers", marker = "python_full_version >= '3.12'", git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git?rev=v0.2.0" },
     { name = "fast-hadamard-transform", git = "https://github.com/Dao-AILab/fast-hadamard-transform.git?rev=f134af63deb2df17e1171a9ec1ea4a7d8604d5ca" },
 ]
 test = [
@@ -3571,59 +3571,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/8a/86/94188e03e5d4dd7b73c390b0cddcde5618b3799c18e327b2bf15763f6137/nvdlfw_inspect-0.2.2-py3-none-any.whl", hash = "sha256:8a4dc2814c5a4cd19ae304170b9bfa514538ef3c3eb243a45a82404ec3cb279d", size = 30964, upload-time = "2025-12-03T10:52:01.933Z" },
 ]
 
-[[package]]
-name = "nvidia-cublas-cu12"
-version = "12.8.4.1"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/29/99/db44d685f0e257ff0e213ade1964fc459b4a690a73293220e98feb3307cf/nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:b86f6dd8935884615a0683b663891d43781b819ac4f2ba2b0c9604676af346d0", size = 590537124, upload-time = "2025-03-07T01:43:53.556Z" },
-    { url = "https://files.pythonhosted.org/packages/dc/61/e24b560ab2e2eaeb3c839129175fb330dfcfc29e5203196e5541a4c44682/nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:8ac4e771d5a348c551b2a426eda6193c19aa630236b418086020df5ba9667142", size = 594346921, upload-time = "2025-03-07T01:44:31.254Z" },
-    { url = "https://files.pythonhosted.org/packages/70/61/7d7b3c70186fb651d0fbd35b01dbfc8e755f69fd58f817f3d0f642df20c3/nvidia_cublas_cu12-12.8.4.1-py3-none-win_amd64.whl", hash = "sha256:47e9b82132fa8d2b4944e708049229601448aaad7e6f296f630f2d1a32de35af", size = 567544208, upload-time = "2025-03-07T01:53:30.535Z" },
-]
-
-[[package]]
-name = "nvidia-cuda-cupti-cu12"
-version = "12.8.90"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/d5/1f/b3bd73445e5cb342727fd24fe1f7b748f690b460acadc27ea22f904502c8/nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:4412396548808ddfed3f17a467b104ba7751e6b58678a4b840675c56d21cf7ed", size = 9533318, upload-time = "2025-03-07T01:40:10.421Z" },
-    { url = "https://files.pythonhosted.org/packages/f8/02/2adcaa145158bf1a8295d83591d22e4103dbfd821bcaf6f3f53151ca4ffa/nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ea0cb07ebda26bb9b29ba82cda34849e73c166c18162d3913575b0c9db9a6182", size = 10248621, upload-time = "2025-03-07T01:40:21.213Z" },
-    { url = "https://files.pythonhosted.org/packages/41/bc/83f5426095d93694ae39fe1311431b5d5a9bb82e48bf0dd8e19be2765942/nvidia_cuda_cupti_cu12-12.8.90-py3-none-win_amd64.whl", hash = "sha256:bb479dcdf7e6d4f8b0b01b115260399bf34154a1a2e9fe11c85c517d87efd98e", size = 7015759, upload-time = "2025-03-07T01:51:11.355Z" },
-]
-
-[[package]]
-name = "nvidia-cuda-nvrtc-cu12"
-version = "12.8.93"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/05/6b/32f747947df2da6994e999492ab306a903659555dddc0fbdeb9d71f75e52/nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:a7756528852ef889772a84c6cd89d41dfa74667e24cca16bb31f8f061e3e9994", size = 88040029, upload-time = "2025-03-07T01:42:13.562Z" },
-    { url = "https://files.pythonhosted.org/packages/eb/d1/e50d0acaab360482034b84b6e27ee83c6738f7d32182b987f9c7a4e32962/nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fc1fec1e1637854b4c0a65fb9a8346b51dd9ee69e61ebaccc82058441f15bce8", size = 43106076, upload-time = "2025-03-07T01:41:59.817Z" },
-    { url = "https://files.pythonhosted.org/packages/45/51/52a3d84baa2136cc8df15500ad731d74d3a1114d4c123e043cb608d4a32b/nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-win_amd64.whl", hash = "sha256:7a4b6b2904850fe78e0bd179c4b655c404d4bb799ef03ddc60804247099ae909", size = 73586838, upload-time = "2025-03-07T01:52:13.483Z" },
-]
-
-[[package]]
-name = "nvidia-cuda-runtime-cu12"
-version = "12.8.90"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/7c/75/f865a3b236e4647605ea34cc450900854ba123834a5f1598e160b9530c3a/nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:52bf7bbee900262ffefe5e9d5a2a69a30d97e2bc5bb6cc866688caa976966e3d", size = 965265, upload-time = "2025-03-07T01:39:43.533Z" },
-    { url = "https://files.pythonhosted.org/packages/0d/9b/a997b638fcd068ad6e4d53b8551a7d30fe8b404d6f1804abf1df69838932/nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:adade8dcbd0edf427b7204d480d6066d33902cab2a4707dcfc48a2d0fd44ab90", size = 954765, upload-time = "2025-03-07T01:40:01.615Z" },
-    { url = "https://files.pythonhosted.org/packages/30/a5/a515b7600ad361ea14bfa13fb4d6687abf500adc270f19e89849c0590492/nvidia_cuda_runtime_cu12-12.8.90-py3-none-win_amd64.whl", hash = "sha256:c0c6027f01505bfed6c3b21ec546f69c687689aad5f1a377554bc6ca4aa993a8", size = 944318, upload-time = "2025-03-07T01:51:01.794Z" },
-]
-
-[[package]]
-name = "nvidia-cudnn-cu12"
-version = "9.10.2.21"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "nvidia-cublas-cu12" },
-]
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/fa/41/e79269ce215c857c935fd86bcfe91a451a584dfc27f1e068f568b9ad1ab7/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:c9132cc3f8958447b4910a1720036d9eff5928cc3179b0a51fb6d167c6cc87d8", size = 705026878, upload-time = "2025-06-06T21:52:51.348Z" },
-    { url = "https://files.pythonhosted.org/packages/ba/51/e123d997aa098c61d029f76663dedbfb9bc8dcf8c60cbd6adbe42f76d049/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:949452be657fa16687d0930933f032835951ef0892b37d2d53824d1a84dc97a8", size = 706758467, upload-time = "2025-06-06T21:54:08.597Z" },
-    { url = "https://files.pythonhosted.org/packages/3d/90/0bd6e586701b3a890fd38aa71c387dab4883d619d6e5ad912ccbd05bfd67/nvidia_cudnn_cu12-9.10.2.21-py3-none-win_amd64.whl", hash = "sha256:c6288de7d63e6cf62988f0923f96dc339cea362decb1bf5b3141883392a7d65e", size = 692992268, upload-time = "2025-06-06T21:55:18.114Z" },
-]
-
 [[package]]
 name = "nvidia-cudnn-frontend"
 version = "1.18.0"
@@ -3646,76 +3593,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/67/53/df2810b56d259ef96fa6beaa1381bd14c29fbe82836b409516e864c5e177/nvidia_cudnn_frontend-1.18.0-cp314-cp314-win_amd64.whl", hash = "sha256:5053b473fa74168b5fbf35934cd6187f88aa03b8447b9f2cd417332d5e5c9569", size = 1592759, upload-time = "2026-02-11T21:32:33.87Z" },
 ]
 
-[[package]]
-name = "nvidia-cufft-cu12"
-version = "11.3.3.83"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "nvidia-nvjitlink-cu12" },
-]
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/60/bc/7771846d3a0272026c416fbb7e5f4c1f146d6d80704534d0b187dd6f4800/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:848ef7224d6305cdb2a4df928759dca7b1201874787083b6e7550dd6765ce69a", size = 193109211, upload-time = "2025-03-07T01:44:56.873Z" },
-    { url = "https://files.pythonhosted.org/packages/1f/13/ee4e00f30e676b66ae65b4f08cb5bcbb8392c03f54f2d5413ea99a5d1c80/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d2dd21ec0b88cf61b62e6b43564355e5222e4a3fb394cac0db101f2dd0d4f74", size = 193118695, upload-time = "2025-03-07T01:45:27.821Z" },
-    { url = "https://files.pythonhosted.org/packages/7d/ec/ce1629f1e478bb5ccd208986b5f9e0316a78538dd6ab1d0484f012f8e2a1/nvidia_cufft_cu12-11.3.3.83-py3-none-win_amd64.whl", hash = "sha256:7a64a98ef2a7c47f905aaf8931b69a3a43f27c55530c698bb2ed7c75c0b42cb7", size = 192216559, upload-time = "2025-03-07T01:53:57.106Z" },
-]
-
-[[package]]
-name = "nvidia-cufile-cu12"
-version = "1.13.1.3"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/bb/fe/1bcba1dfbfb8d01be8d93f07bfc502c93fa23afa6fd5ab3fc7c1df71038a/nvidia_cufile_cu12-1.13.1.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1d069003be650e131b21c932ec3d8969c1715379251f8d23a1860554b1cb24fc", size = 1197834, upload-time = "2025-03-07T01:45:50.723Z" },
-    { url = "https://files.pythonhosted.org/packages/1e/f5/5607710447a6fe9fd9b3283956fceeee8a06cda1d2f56ce31371f595db2a/nvidia_cufile_cu12-1.13.1.3-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:4beb6d4cce47c1a0f1013d72e02b0994730359e17801d395bdcbf20cfb3bb00a", size = 1120705, upload-time = "2025-03-07T01:45:41.434Z" },
-]
-
-[[package]]
-name = "nvidia-curand-cu12"
-version = "10.3.9.90"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/45/5e/92aa15eca622a388b80fbf8375d4760738df6285b1e92c43d37390a33a9a/nvidia_curand_cu12-10.3.9.90-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:dfab99248034673b779bc6decafdc3404a8a6f502462201f2f31f11354204acd", size = 63625754, upload-time = "2025-03-07T01:46:10.735Z" },
-    { url = "https://files.pythonhosted.org/packages/fb/aa/6584b56dc84ebe9cf93226a5cde4d99080c8e90ab40f0c27bda7a0f29aa1/nvidia_curand_cu12-10.3.9.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:b32331d4f4df5d6eefa0554c565b626c7216f87a06a4f56fab27c3b68a830ec9", size = 63619976, upload-time = "2025-03-07T01:46:23.323Z" },
-    { url = "https://files.pythonhosted.org/packages/b9/75/70c05b2f3ed5be3bb30b7102b6eb78e100da4bbf6944fd6725c012831cab/nvidia_curand_cu12-10.3.9.90-py3-none-win_amd64.whl", hash = "sha256:f149a8ca457277da854f89cf282d6ef43176861926c7ac85b2a0fbd237c587ec", size = 62765309, upload-time = "2025-03-07T01:54:20.478Z" },
-]
-
-[[package]]
-name = "nvidia-cusolver-cu12"
-version = "11.7.3.90"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "nvidia-cublas-cu12" },
-    { name = "nvidia-cusparse-cu12" },
-    { name = "nvidia-nvjitlink-cu12" },
-]
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/c8/32/f7cd6ce8a7690544d084ea21c26e910a97e077c9b7f07bf5de623ee19981/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:db9ed69dbef9715071232caa9b69c52ac7de3a95773c2db65bdba85916e4e5c0", size = 267229841, upload-time = "2025-03-07T01:46:54.356Z" },
-    { url = "https://files.pythonhosted.org/packages/85/48/9a13d2975803e8cf2777d5ed57b87a0b6ca2cc795f9a4f59796a910bfb80/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:4376c11ad263152bd50ea295c05370360776f8c3427b30991df774f9fb26c450", size = 267506905, upload-time = "2025-03-07T01:47:16.273Z" },
-    { url = "https://files.pythonhosted.org/packages/13/c0/76ca8551b8a84146ffa189fec81c26d04adba4bc0dbe09cd6e6fd9b7de04/nvidia_cusolver_cu12-11.7.3.90-py3-none-win_amd64.whl", hash = "sha256:4a550db115fcabc4d495eb7d39ac8b58d4ab5d8e63274d3754df1c0ad6a22d34", size = 256720438, upload-time = "2025-03-07T01:54:39.898Z" },
-]
-
-[[package]]
-name = "nvidia-cusparse-cu12"
-version = "12.5.8.93"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "nvidia-nvjitlink-cu12" },
-]
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/bc/f7/cd777c4109681367721b00a106f491e0d0d15cfa1fd59672ce580ce42a97/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:9b6c161cb130be1a07a27ea6923df8141f3c295852f4b260c65f18f3e0a091dc", size = 288117129, upload-time = "2025-03-07T01:47:40.407Z" },
-    { url = "https://files.pythonhosted.org/packages/c2/f5/e1854cb2f2bcd4280c44736c93550cc300ff4b8c95ebe370d0aa7d2b473d/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1ec05d76bbbd8b61b06a80e1eaf8cf4959c3d4ce8e711b65ebd0443bb0ebb13b", size = 288216466, upload-time = "2025-03-07T01:48:13.779Z" },
-    { url = "https://files.pythonhosted.org/packages/62/07/f3b2ad63f8e3d257a599f422ae34eb565e70c41031aecefa3d18b62cabd1/nvidia_cusparse_cu12-12.5.8.93-py3-none-win_amd64.whl", hash = "sha256:9a33604331cb2cac199f2e7f5104dfbb8a5a898c367a53dfda9ff2acb6b6b4dd", size = 284937404, upload-time = "2025-03-07T01:55:07.742Z" },
-]
-
-[[package]]
-name = "nvidia-cusparselt-cu12"
-version = "0.7.1"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/73/b9/598f6ff36faaece4b3c50d26f50e38661499ff34346f00e057760b35cc9d/nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_aarch64.whl", hash = "sha256:8878dce784d0fac90131b6817b607e803c36e629ba34dc5b433471382196b6a5", size = 283835557, upload-time = "2025-02-26T00:16:54.265Z" },
-    { url = "https://files.pythonhosted.org/packages/56/79/12978b96bd44274fe38b5dde5cfb660b1d114f70a65ef962bcbbed99b549/nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_x86_64.whl", hash = "sha256:f1bb701d6b930d5a7cea44c19ceb973311500847f81b634d802b7b539dc55623", size = 287193691, upload-time = "2025-02-26T00:15:44.104Z" },
-    { url = "https://files.pythonhosted.org/packages/2f/d8/a6b0d0d0c2435e9310f3e2bb0d9c9dd4c33daef86aa5f30b3681defd37ea/nvidia_cusparselt_cu12-0.7.1-py3-none-win_amd64.whl", hash = "sha256:f67fbb5831940ec829c9117b7f33807db9f9678dc2a617fbe781cac17b4e1075", size = 271020911, upload-time = "2025-02-26T00:14:47.204Z" },
-]
-
 [[package]]
 name = "nvidia-cutlass-dsl"
 version = "4.4.0"
@@ -3789,44 +3666,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/16/09/30147ab0d0409d3492f1d37469fe0586c82aeec6eec9a907f59d24094516/nvidia_modelopt-0.41.0-py3-none-any.whl", hash = "sha256:ffa5f903d22653649318831a470550ae55ee04716c068d5ade61c3176fdc1d7d", size = 934582, upload-time = "2026-01-20T17:21:28.494Z" },
 ]
 
-[[package]]
-name = "nvidia-nccl-cu12"
-version = "2.27.5"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/bb/1c/857979db0ef194ca5e21478a0612bcdbbe59458d7694361882279947b349/nvidia_nccl_cu12-2.27.5-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:31432ad4d1fb1004eb0c56203dc9bc2178a1ba69d1d9e02d64a6938ab5e40e7a", size = 322400625, upload-time = "2025-06-26T04:11:04.496Z" },
-    { url = "https://files.pythonhosted.org/packages/6e/89/f7a07dc961b60645dbbf42e80f2bc85ade7feb9a491b11a1e973aa00071f/nvidia_nccl_cu12-2.27.5-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ad730cf15cb5d25fe849c6e6ca9eb5b76db16a80f13f425ac68d8e2e55624457", size = 322348229, upload-time = "2025-06-26T04:11:28.385Z" },
-]
-
-[[package]]
-name = "nvidia-nvjitlink-cu12"
-version = "12.8.93"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/f6/74/86a07f1d0f42998ca31312f998bd3b9a7eff7f52378f4f270c8679c77fb9/nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:81ff63371a7ebd6e6451970684f916be2eab07321b73c9d244dc2b4da7f73b88", size = 39254836, upload-time = "2025-03-07T01:49:55.661Z" },
-    { url = "https://files.pythonhosted.org/packages/2a/a2/8cee5da30d13430e87bf99bb33455d2724d0a4a9cb5d7926d80ccb96d008/nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:adccd7161ace7261e01bb91e44e88da350895c270d23f744f0820c818b7229e7", size = 38386204, upload-time = "2025-03-07T01:49:43.612Z" },
-    { url = "https://files.pythonhosted.org/packages/ed/d7/34f02dad2e30c31b10a51f6b04e025e5dd60e5f936af9045a9b858a05383/nvidia_nvjitlink_cu12-12.8.93-py3-none-win_amd64.whl", hash = "sha256:bd93fbeeee850917903583587f4fc3a4eafa022e34572251368238ab5e6bd67f", size = 268553710, upload-time = "2025-03-07T01:56:24.13Z" },
-]
-
-[[package]]
-name = "nvidia-nvshmem-cu12"
-version = "3.4.5"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/1d/6a/03aa43cc9bd3ad91553a88b5f6fb25ed6a3752ae86ce2180221962bc2aa5/nvidia_nvshmem_cu12-3.4.5-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0b48363fc6964dede448029434c6abed6c5e37f823cb43c3bcde7ecfc0457e15", size = 138936938, upload-time = "2025-09-06T00:32:05.589Z" },
-    { url = "https://files.pythonhosted.org/packages/b5/09/6ea3ea725f82e1e76684f0708bbedd871fc96da89945adeba65c3835a64c/nvidia_nvshmem_cu12-3.4.5-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:042f2500f24c021db8a06c5eec2539027d57460e1c1a762055a6554f72c369bd", size = 139103095, upload-time = "2025-09-06T00:32:31.266Z" },
-]
-
-[[package]]
-name = "nvidia-nvtx-cu12"
-version = "12.8.90"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/10/c0/1b303feea90d296f6176f32a2a70b5ef230f9bdeb3a72bddb0dc922dc137/nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d7ad891da111ebafbf7e015d34879f7112832fc239ff0d7d776b6cb685274615", size = 91161, upload-time = "2025-03-07T01:42:23.922Z" },
-    { url = "https://files.pythonhosted.org/packages/a2/eb/86626c1bbc2edb86323022371c39aa48df6fd8b0a1647bc274577f72e90b/nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5b17e2001cc0d751a5bc2c6ec6d26ad95913324a4adb86788c944f8ce9ba441f", size = 89954, upload-time = "2025-03-07T01:42:44.131Z" },
-    { url = "https://files.pythonhosted.org/packages/9f/99/4c9c0c329bf9fc125008c3b54c7c94c0023518d06fc025ae36431375e1fe/nvidia_nvtx_cu12-12.8.90-py3-none-win_amd64.whl", hash = "sha256:619c8304aedc69f02ea82dd244541a83c3d9d40993381b3b590f1adaed3db41e", size = 56492, upload-time = "2025-03-07T01:52:24.69Z" },
-]
-
 [[package]]
 name = "nvidia-resiliency-ext"
 version = "0.5.0"
@@ -6505,7 +6344,7 @@ name = "sympy"
 version = "1.14.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "mpmath" },
+    { name = "mpmath", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32') or (sys_platform == 'emscripten' and extra == 'extra-13-megatron-core-dev') or (sys_platform == 'emscripten' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'win32' and extra == 'extra-13-megatron-core-dev') or (sys_platform == 'win32' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev') or (sys_platform == 'linux' and extra == 'extra-13-megatron-core-lts')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/83/d3/803453b36afefb7c2bb238361cd4ae6125a569b4db67cd9e79846ba2d68c/sympy-1.14.0.tar.gz", hash = "sha256:d3d3fe8df1e5a0b42f0e7bdf50541697dbe7d23746e894990c030e2b05e72517", size = 7793921, upload-time = "2025-04-27T18:05:01.611Z" }
 wheels = [
@@ -6823,32 +6662,15 @@ name = "torch"
 version = "2.10.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "cuda-bindings", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "filelock" },
-    { name = "fsspec", version = "2025.10.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.14' or sys_platform != 'linux' or extra == 'extra-13-megatron-core-dev' or extra == 'extra-13-megatron-core-lts'" },
-    { name = "fsspec", version = "2026.2.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.14' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "jinja2" },
-    { name = "networkx", version = "3.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "networkx", version = "3.6.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "nvidia-cublas-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "nvidia-cuda-cupti-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "nvidia-cuda-nvrtc-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "nvidia-cuda-runtime-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "nvidia-cudnn-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "nvidia-cufft-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "nvidia-cufile-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "nvidia-curand-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "nvidia-cusolver-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "nvidia-cusparse-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "nvidia-cusparselt-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "nvidia-nccl-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "nvidia-nvjitlink-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "nvidia-nvshmem-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "nvidia-nvtx-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "setuptools", marker = "python_full_version >= '3.12' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "sympy" },
+    { name = "filelock", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32') or (sys_platform == 'emscripten' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'win32' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "fsspec", version = "2025.10.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32') or (sys_platform == 'emscripten' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'win32' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "jinja2", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32') or (sys_platform == 'emscripten' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'win32' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "networkx", version = "3.4.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and sys_platform != 'linux') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "networkx", version = "3.6.1", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'emscripten' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'win32' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "setuptools", marker = "(python_full_version >= '3.12' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32') or (python_full_version < '3.12' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'emscripten' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'win32' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "sympy", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32') or (sys_platform == 'emscripten' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'win32' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "triton", marker = "sys_platform == 'never' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "typing-extensions" },
+    { name = "typing-extensions", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32') or (sys_platform == 'emscripten' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'win32' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
 wheels = [
     { url = "https://files.pythonhosted.org/packages/5b/30/bfebdd8ec77db9a79775121789992d6b3b75ee5494971294d7b4b7c999bc/torch-2.10.0-2-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:2b980edd8d7c0a68c4e951ee1856334a43193f98730d97408fbd148c1a933313", size = 79411457, upload-time = "2026-02-10T21:44:59.189Z" },

From c72c4599012297cfbd1d57e006b544478b6bbf78 Mon Sep 17 00:00:00 2001
From: Kirthi Shankar Sivamani <ksivamani@nvidia.com>
Date: Thu, 19 Mar 2026 19:00:43 -0700
Subject: [PATCH 319/334] Support GEMM + Swiglu fused MLP (#3890)

Signed-off-by: Kirthi Shankar Sivamani <ksivamani@nvidia.com>
Signed-off-by: ksivamani <ksivamani@nvidia.com>
Signed-off-by: Xin Yao <xiny@nvidia.com>
Co-authored-by: Xin Yao <xiny@nvidia.com>
---
 .../core/distributed/param_and_grad_buffer.py |  42 +--
 .../core/extensions/transformer_engine.py     |  88 +++++-
 megatron/core/optimizer/distrib_optimizer.py  | 112 +++++++-
 megatron/core/transformer/moe/experts.py      | 267 +++++++++++++++++-
 megatron/core/transformer/moe/moe_utils.py    |   5 +-
 .../core/transformer/transformer_config.py    |  13 +
 .../unit_tests/models/test_mamba_moe_model.py |   2 +
 7 files changed, 489 insertions(+), 40 deletions(-)

diff --git a/megatron/core/distributed/param_and_grad_buffer.py b/megatron/core/distributed/param_and_grad_buffer.py
index 85b9d98a3be..d47b7690a91 100644
--- a/megatron/core/distributed/param_and_grad_buffer.py
+++ b/megatron/core/distributed/param_and_grad_buffer.py
@@ -199,6 +199,9 @@ def __init__(
         # or bucket.grad_data.
         self.cached_param_buffer_shard_list = [None] * len(self.buckets)
         self.cached_grad_buffer_shard_list = [None] * len(self.buckets)
+        # Track grad mode used to create cached param views. Rebuild if mode changes to avoid
+        # mixing no_grad-created views with in-place updates in grad-enabled mode.
+        self._cached_param_buffer_shards_grad_enabled = None
 
     def reset(self):
         """
@@ -273,24 +276,29 @@ def start_param_sync(self, force_sync: bool = False):
             assert self.param_gather_handle is None
 
         async_op = self.ddp_config.overlap_param_gather and not force_sync
-        # Coalesce communication kernels across buckets in the bucket group.
-        with _coalescing_manager(
-            self.intra_distributed_optimizer_instance_group, async_ops=async_op
-        ) as cm:
-            for idx, bucket in enumerate(self.buckets):
-                if self.cached_param_buffer_shard_list[idx] is None:
-                    self.cached_param_buffer_shard_list[idx] = shard_buffer(
-                        bucket.param_data, self.intra_distributed_optimizer_instance_size
+        current_grad_enabled = torch.is_grad_enabled()
+        if self._cached_param_buffer_shards_grad_enabled != current_grad_enabled:
+            self.cached_param_buffer_shard_list = [None] * len(self.buckets)
+            self._cached_param_buffer_shards_grad_enabled = current_grad_enabled
+        with torch.no_grad():
+            # Coalesce communication kernels across buckets in the bucket group.
+            with _coalescing_manager(
+                self.intra_distributed_optimizer_instance_group, async_ops=async_op
+            ) as cm:
+                for idx, bucket in enumerate(self.buckets):
+                    if self.cached_param_buffer_shard_list[idx] is None:
+                        self.cached_param_buffer_shard_list[idx] = shard_buffer(
+                            bucket.param_data, self.intra_distributed_optimizer_instance_size
+                        )
+                    local_data_view = self.cached_param_buffer_shard_list[idx][
+                        self.intra_distributed_optimizer_instance_rank
+                    ]
+                    dist_all_gather_func(
+                        bucket.param_data,
+                        local_data_view,
+                        group=self.intra_distributed_optimizer_instance_group,
+                        async_op=async_op,
                     )
-                local_data_view = self.cached_param_buffer_shard_list[idx][
-                    self.intra_distributed_optimizer_instance_rank
-                ]
-                dist_all_gather_func(
-                    bucket.param_data,
-                    local_data_view,
-                    group=self.intra_distributed_optimizer_instance_group,
-                    async_op=async_op,
-                )
         if async_op:
             self.param_gather_handle = cm
         else:
diff --git a/megatron/core/extensions/transformer_engine.py b/megatron/core/extensions/transformer_engine.py
index 20ab554382e..0d2e227866b 100644
--- a/megatron/core/extensions/transformer_engine.py
+++ b/megatron/core/extensions/transformer_engine.py
@@ -1568,6 +1568,50 @@ def __init__(
             for param in self.parameters():
                 setattr(param, "allreduce", not (is_expert and self.expert_parallel))
 
+            def normalize_grouped_parameter_keys(
+                self,
+                state_dict,
+                prefix,
+                local_metadata,
+                strict,
+                missing_keys,
+                unexpected_keys,
+                error_msgs,
+            ):
+                """Make grouped checkpoint keys compatible across parameter layouts."""
+
+                def maybe_remap_param(param_name: str) -> None:
+                    grouped_key = f"{prefix}{param_name}"
+                    indexed_keys = [
+                        f"{prefix}{param_name}{gemm_idx}" for gemm_idx in range(self.num_gemms)
+                    ]
+                    has_grouped_key = grouped_key in state_dict
+                    has_any_indexed_key = any(key in state_dict for key in indexed_keys)
+                    has_all_indexed_keys = all(key in state_dict for key in indexed_keys)
+
+                    if getattr(self, "single_grouped_parameter", False):
+                        if has_grouped_key or not has_all_indexed_keys:
+                            return
+                        state_dict[grouped_key] = torch.stack(
+                            [state_dict.pop(key) for key in indexed_keys], dim=0
+                        )
+                    else:
+                        if has_any_indexed_key or not has_grouped_key:
+                            return
+                        split_tensors = self._split_grouped_checkpoint_tensor(
+                            state_dict.pop(grouped_key), grouped_key
+                        )
+                        for gemm_idx, tensor in enumerate(split_tensors):
+                            state_dict[f"{prefix}{param_name}{gemm_idx}"] = tensor
+
+                maybe_remap_param("weight")
+                if self.use_bias:
+                    maybe_remap_param("bias")
+
+            self._register_load_state_dict_pre_hook(
+                normalize_grouped_parameter_keys, with_module=True
+            )
+
             def merge_extra_states(
                 self,
                 state_dict,
@@ -1658,6 +1702,31 @@ def merge_extra_states(
 
             self._register_load_state_dict_pre_hook(merge_extra_states, with_module=True)
 
+        def _split_grouped_checkpoint_tensor(
+            self, tensor: torch.Tensor, checkpoint_key: str
+        ) -> list[torch.Tensor]:
+            """Split grouped checkpoint tensor into one tensor per GEMM."""
+            if hasattr(tensor, "split_into_quantized_tensors") and callable(
+                tensor.split_into_quantized_tensors
+            ):
+                grouped_tensors = getattr(tensor, "quantized_tensors", None)
+                if grouped_tensors is None:
+                    grouped_tensors = tensor.split_into_quantized_tensors()
+                if len(grouped_tensors) != self.num_gemms:
+                    raise RuntimeError(
+                        f"Grouped checkpoint tensor {checkpoint_key} has {len(grouped_tensors)} "
+                        f"groups, expected {self.num_gemms}."
+                    )
+                return list(grouped_tensors)
+            if tensor.ndim > 0 and tensor.shape[0] == self.num_gemms:
+                return list(tensor.unbind(dim=0))
+            if tensor.ndim > 0 and tensor.shape[0] % self.num_gemms == 0:
+                return list(torch.chunk(tensor, self.num_gemms, dim=0))
+            raise RuntimeError(
+                f"Cannot split checkpoint tensor {checkpoint_key} with shape {tuple(tensor.shape)} "
+                f"into {self.num_gemms} GEMM shards."
+            )
+
         def finish_init(self, quantization_config: QuantizationConfig):
             """Post-init of quantization override"""
             if quantization_config is None:
@@ -1762,6 +1831,21 @@ def _sharded_state_dict_grouped(
             singleton_local_shards = (metadata or {}).get('singleton_local_shards', False)
             sharded_state_dict = {}
             full_state_dict = self.state_dict(prefix="", keep_vars=True)
+            grouped_split_cache = {}
+
+            def get_gemm_tensor(param_name: str, gemm_idx: int) -> torch.Tensor:
+                indexed_name = f"{param_name}{gemm_idx}"
+                if indexed_name in full_state_dict:
+                    return full_state_dict[indexed_name]
+                if param_name not in full_state_dict:
+                    raise KeyError(indexed_name)
+                if param_name not in grouped_split_cache:
+                    grouped_split_cache[param_name] = self._split_grouped_checkpoint_tensor(
+                        full_state_dict[param_name], param_name
+                    )
+                grouped_splits = grouped_split_cache[param_name]
+                return grouped_splits[gemm_idx]
+
             num_global_experts = get_pg_size(self._pg_collection.ep) * self.num_gemms
             local_expert_indices_offset = get_pg_rank(self._pg_collection.ep) * self.num_gemms
             ep_axis = len(sharded_offsets)
@@ -1769,11 +1853,11 @@ def _sharded_state_dict_grouped(
             for gemm_idx in range(self.num_gemms):
                 global_expert_idx = local_expert_indices_offset + gemm_idx
                 state_dict = {
-                    f"{gemm_idx}.weight": full_state_dict[f"weight{gemm_idx}"],
+                    f"{gemm_idx}.weight": get_gemm_tensor("weight", gemm_idx),
                     f"{gemm_idx}._extra_state": extra_states[gemm_idx],
                 }
                 if self.use_bias:
-                    state_dict[f"{gemm_idx}.bias"] = full_state_dict[f"bias{gemm_idx}"]
+                    state_dict[f"{gemm_idx}.bias"] = get_gemm_tensor("bias", gemm_idx)
                 if singleton_local_shards:
                     expert_prefix = f"{global_expert_idx}.{prefix}"
                     new_sharded_offsets = sharded_offsets
diff --git a/megatron/core/optimizer/distrib_optimizer.py b/megatron/core/optimizer/distrib_optimizer.py
index be6a5638277..beb00391759 100644
--- a/megatron/core/optimizer/distrib_optimizer.py
+++ b/megatron/core/optimizer/distrib_optimizer.py
@@ -362,7 +362,10 @@ def _build_model_and_main_param_groups(
                 if model_param.type() in ['torch.cuda.HalfTensor', 'torch.cuda.BFloat16Tensor']:
 
                     # Generate sharded model param.
-                    if is_float8tensor(model_param) and config.fp8_recipe != "delayed":
+                    if (
+                        cls._is_distopt_quantized_param(model_param)
+                        and config.fp8_recipe != "delayed"
+                    ):
                         # MXFP8Tensor and BlockwiseQTensor don't support view(-1)
                         shard_model_param = None
                     else:
@@ -382,7 +385,7 @@ def _build_model_and_main_param_groups(
                         # precision at the beginning of training (this problem will not occur if the
                         # training is long enough or if the main params are loaded from a
                         # checkpoint).
-                        if is_float8tensor(model_param):
+                        if cls._is_distopt_quantized_param(model_param):
                             if hasattr(model_param, 'get_high_precision_init_val'):
                                 shard_main_param = (
                                     model_param.get_high_precision_init_val()
@@ -919,6 +922,70 @@ def _get_main_param_and_optimizer_states(self, model_param):
                     tensors[k] = v
         return tensors
 
+    @staticmethod
+    def _is_grouped_quantized_tensor(tensor: torch.Tensor) -> bool:
+        """Check if tensor is a TE GroupedTensor using quantized storage."""
+        return (
+            hasattr(tensor, "split_into_quantized_tensors")
+            and callable(tensor.split_into_quantized_tensors)
+            and getattr(tensor, "quantizer", None) is not None
+        )
+
+    @classmethod
+    def _is_distopt_quantized_param(cls, tensor: torch.Tensor) -> bool:
+        """Check if tensor should follow quantized parameter path in dist optimizer."""
+        return is_float8tensor(tensor) or cls._is_grouped_quantized_tensor(tensor)
+
+    def _expand_quantized_param_shard_for_cast(
+        self,
+        model_param: torch.Tensor,
+        shard_main_param: Optional[torch.Tensor],
+        start_offset: Optional[int],
+    ):
+        """Expand one quantized model param to cast-ready entries.
+
+        For grouped quantized tensors, split into member quantized tensors and map the sharded
+        master slice to per-member offset ranges, while preserving deterministic ordering across
+        DP ranks.
+        """
+        if not self._is_grouped_quantized_tensor(model_param):
+            return [model_param], [shard_main_param], [start_offset]
+
+        quantized_members = model_param.quantized_tensors
+        if quantized_members is None:
+            quantized_members = model_param.split_into_quantized_tensors()
+
+        shard_start = 0 if start_offset is None else start_offset
+        shard_size = 0 if shard_main_param is None else shard_main_param.numel()
+        shard_end = shard_start + shard_size
+        shard_flat = None if shard_main_param is None else shard_main_param.view(-1)
+
+        expanded_model_params = []
+        expanded_shard_main_params = []
+        expanded_start_offsets = []
+        member_offset = 0
+        for member in quantized_members:
+            member_numel = member.numel()
+            member_start = member_offset
+            member_end = member_start + member_numel
+            overlap_start = max(member_start, shard_start)
+            overlap_end = min(member_end, shard_end)
+
+            member_master = None
+            member_start_offset = None
+            if overlap_start < overlap_end:
+                local_start = overlap_start - shard_start
+                local_end = overlap_end - shard_start
+                member_master = shard_flat[local_start:local_end]
+                member_start_offset = overlap_start - member_start
+
+            expanded_model_params.append(member)
+            expanded_shard_main_params.append(member_master)
+            expanded_start_offsets.append(member_start_offset)
+            member_offset = member_end
+
+        return expanded_model_params, expanded_shard_main_params, expanded_start_offsets
+
     def _set_main_param_and_optimizer_states(self, model_param, tensors):
         """Set the main param and optimizer states corresponding to the input model_param.
 
@@ -2151,7 +2218,7 @@ def split_state_dict_if_needed(self, state_dict):
         fp8_gbuf_indices = []
         for gbuf_idx, gbuf_range_maps in enumerate(self.gbuf_ranges):
             for dtype, _ in gbuf_range_maps.items():
-                if is_float8tensor(self.buffers[gbuf_idx].params[0]):
+                if self._is_distopt_quantized_param(self.buffers[gbuf_idx].params[0]):
                     fp8_gbuf_indices.append(gbuf_idx)
         if len(fp8_gbuf_indices) == 0:
             return
@@ -2173,7 +2240,7 @@ def split_state_dict_if_needed(self, state_dict):
         new_state_dict = {'buckets_coalesced': state_dict['buckets_coalesced']}
         for gbuf_idx, gbuf_range_maps in enumerate(self.gbuf_ranges):
             for dtype, _ in gbuf_range_maps.items():
-                if not is_float8tensor(self.buffers[gbuf_idx].params[0]):
+                if not self._is_distopt_quantized_param(self.buffers[gbuf_idx].params[0]):
                     new_state_dict[gbuf_idx] = state_dict[dtype_to_gbuf_idx[dtype]]
 
         for fp8_gbuf_idx in fp8_gbuf_indices:
@@ -2373,7 +2440,7 @@ def _get_fp8_params_and_shard_fp32_from_fp8(self):
         idx = 0
         for buffer in buffers:
             for param in buffer.params:
-                if is_float8tensor(param):
+                if self._is_distopt_quantized_param(param):
                     fp8_params.append(param)
                     shard_fp32_from_fp8.append(None)
                     shard_offsets_in_fp8.append(None)
@@ -2388,7 +2455,7 @@ def get_shard_fp32_from_fp8(shard_main_groups, model_groups):
             """
             for shard_main_group, model_group in zip(shard_main_groups, model_groups):
                 for shard_main_param, model_param in zip(shard_main_group, model_group):
-                    if is_float8tensor(model_param):
+                    if self._is_distopt_quantized_param(model_param):
                         param_range_map = self._get_model_param_range_map(model_param)
                         param_range = param_range_map["param"]
                         assert param_range.size == shard_main_param.nelement()
@@ -2465,8 +2532,29 @@ def _copy_main_params_to_model_params(self):
         if self.config.use_precision_aware_optimizer_no_fp8_or_ds_fp8:
             return
 
+        fp8_params, shard_fp32_from_fp8, shard_offsets_in_fp8 = (
+            self._get_fp8_params_and_shard_fp32_from_fp8()
+        )
+        expanded_fp8_params = []
+        expanded_shard_fp32_from_fp8 = []
+        expanded_shard_offsets_in_fp8 = []
+        for model_param, shard_main_param, start_offset in zip(
+            fp8_params, shard_fp32_from_fp8, shard_offsets_in_fp8
+        ):
+            sub_model_params, sub_shard_main_params, sub_start_offsets = (
+                self._expand_quantized_param_shard_for_cast(
+                    model_param, shard_main_param, start_offset
+                )
+            )
+            expanded_fp8_params.extend(sub_model_params)
+            expanded_shard_fp32_from_fp8.extend(sub_shard_main_params)
+            expanded_shard_offsets_in_fp8.extend(sub_start_offsets)
+
         quantize_param_shard(
-            *self._get_fp8_params_and_shard_fp32_from_fp8(), self.data_parallel_group
+            expanded_fp8_params,
+            expanded_shard_fp32_from_fp8,
+            expanded_shard_offsets_in_fp8,
+            self.data_parallel_group,
         )
 
         # Utility method for copying group params.
@@ -2486,7 +2574,7 @@ def copy_group_params(shard_main_groups, model_groups):
                         world_range.start : world_range.end
                     ]
 
-                    if is_float8tensor(model_param):
+                    if self._is_distopt_quantized_param(model_param):
                         # FP8 params are quantized in the above "quantize_param_shard" function.
                         continue
                     else:
@@ -2598,8 +2686,12 @@ def copy_group_params(model_groups, shard_main_groups):
                         # Use param from state_dict to initialize main_param
                         model_param = model_param_to_state_dict_param_map[model_param]
 
-                    if is_float8tensor(model_param):
-                        shard_model_param = dequantize_fp8_tensor(model_param).view(-1)[
+                    if self._is_distopt_quantized_param(model_param):
+                        if self._is_grouped_quantized_tensor(model_param):
+                            dequantized_model_param = model_param.float()
+                        else:
+                            dequantized_model_param = dequantize_fp8_tensor(model_param)
+                        shard_model_param = dequantized_model_param.view(-1)[
                             param_range.start : param_range.end
                         ]
                     else:
diff --git a/megatron/core/transformer/moe/experts.py b/megatron/core/transformer/moe/experts.py
index 8a271ab4fb9..8168c8ab611 100644
--- a/megatron/core/transformer/moe/experts.py
+++ b/megatron/core/transformer/moe/experts.py
@@ -7,6 +7,7 @@
 from copy import deepcopy
 from dataclasses import dataclass
 from functools import partial
+from itertools import chain
 from math import ceil
 from typing import Optional, Protocol, Tuple
 
@@ -712,10 +713,32 @@ def __init__(
 
             set_save_original_input(self.linear_fc1)
 
+        # Fused implementation with Transformer Engine op fuser API
+        if self.config.use_transformer_engine_op_fuser:
+            assert (
+                self._is_fused_impl_supported()
+            ), "Fused GroupedMLP is not supported for this configuration."
+        self._with_fused_impl: bool = self.config.use_transformer_engine_op_fuser
+        self._fused_ops: Optional[Tuple[torch.nn.Module]] = None
+        if (
+            self.config.gated_linear_unit
+            and self.config.moe_mlp_glu_interleave_size is not None
+            and not self._with_fused_impl
+        ):
+            logger.warning(
+                "`moe_mlp_glu_interleave_size=%s` is enabled, but fused MoE MLP implementation "
+                "is not supported for this configuration. The non-fused path may incur extra "
+                "tensor reordering/copy overhead each forward pass.",
+                self.config.moe_mlp_glu_interleave_size,
+            )
+
         if self.config.fp8 or self.config.fp4:
             assert HAVE_TE, "FP8 and FP4 requires TE."
-            self.quantization_padding = Fp8Padding(self.num_local_experts)
-            self.quantization_unpadding = Fp8Unpadding(self.num_local_experts)
+            align_size = 256 if self._with_fused_impl else None
+            self.quantization_padding = Fp8Padding(self.num_local_experts, align_size=align_size)
+            self.quantization_unpadding = Fp8Unpadding(
+                self.num_local_experts, align_size=align_size
+            )
 
     @staticmethod
     def _apply_bias(intermediate_parallel, bias_parallel, tokens_per_expert, permuted_probs):
@@ -737,6 +760,192 @@ def _apply_bias(intermediate_parallel, bias_parallel, tokens_per_expert, permute
             .to(intermediate_parallel.dtype)
         )
 
+    def _is_fused_impl_supported(self) -> bool:
+        """Check if the TE op fuser supports implementing this module."""
+
+        # Check Transformer Engine installation
+        if not HAVE_TE:
+            return False  # Transformer Engine is not available
+        try:
+            from transformer_engine.pytorch.ops import GroupedLinear, ScaledSwiGLU
+        except ImportError:
+            return False  # Transformer Engine version is too old
+
+        # Check for unsupported features
+        if self.tp_group.size() > 1:
+            return False  # Tensor parallelism is not supported
+        if self.offload_expert_fc1 or self.offload_moe_act:
+            return False  # Fine-grained activation offloading is not supported
+        if self.config.moe_apply_probs_on_input:
+            return False  # Pre-multiplying probs is not supported
+
+        # Check grouped linear modules
+        if not isinstance(self.linear_fc1, te.pytorch.GroupedLinear):
+            return False
+        if not isinstance(self.linear_fc2, te.pytorch.GroupedLinear):
+            return False
+        if self.linear_fc1.need_backward_dw() or self.linear_fc2.need_backward_dw():
+            return False  # Delayed weight gradient compuation is not supported
+
+        # Check activation
+        if self.activation_func != F.silu or not self.config.gated_linear_unit:
+            return False  # Expected SwiGLU activation
+
+        return True
+
+    def _make_fused_ops(self) -> torch.nn.Module:
+        """Construct fused module for FC1, activation, and FC2."""
+
+        # Container for fusible ops
+        ops = te.pytorch.ops.Sequential()
+
+        # Check if there are 1 or "num_gemms" params in the GroupedLinear module.
+        fc1_single_grouped_parameter = self.linear_fc1.single_grouped_parameter
+        fc1_weight_dtype = (
+            self.linear_fc1.weight.dtype
+            if fc1_single_grouped_parameter
+            else self.linear_fc1.weight0.dtype
+        )
+        fc2_single_grouped_parameter = self.linear_fc2.single_grouped_parameter
+        fc2_weight_dtype = (
+            self.linear_fc2.weight.dtype
+            if fc2_single_grouped_parameter
+            else self.linear_fc2.weight0.dtype
+        )
+
+        # TODO:ksivamani: Why meta device?
+        op = te.pytorch.ops.GroupedLinear(
+            self.linear_fc1.num_gemms,
+            self.linear_fc1.in_features,
+            self.linear_fc1.out_features,
+            bias=self.linear_fc1.use_bias,
+            device=torch.cuda.current_device(),
+            dtype=fc1_weight_dtype,
+            accumulate_into_main_grad=self.linear_fc1.fuse_wgrad_accumulation,
+            single_grouped_parameter=fc1_single_grouped_parameter,
+        )
+
+        # Copy the weights from GroupedLinear module to GroupedLinear op.
+        if fc1_single_grouped_parameter:
+            setattr(op, "weight", getattr(self.linear_fc1, "weight"))
+
+        for idx in range(self.linear_fc1.num_gemms):
+            if not fc1_single_grouped_parameter:
+                setattr(op, f"weight{idx}", getattr(self.linear_fc1, f"weight{idx}"))
+            if self.linear_fc1.use_bias:
+                setattr(op, f"bias{idx}", getattr(self.linear_fc1, f"bias{idx}"))
+        ops.append(op)
+
+        # Activation and post-multiply probs
+        op = te.pytorch.ops.ScaledSwiGLU(
+            glu_interleave_size=self.config.moe_mlp_glu_interleave_size
+        )
+        ops.append(op)
+
+        # FC2
+        has_bias = self.linear_fc2.use_bias
+        op = te.pytorch.ops.GroupedLinear(
+            self.linear_fc2.num_gemms,
+            self.linear_fc2.in_features,
+            self.linear_fc2.out_features,
+            bias=self.linear_fc2.use_bias,
+            device=torch.cuda.current_device(),
+            dtype=fc2_weight_dtype,
+            accumulate_into_main_grad=self.linear_fc2.fuse_wgrad_accumulation,
+            single_grouped_parameter=fc2_single_grouped_parameter,
+        )
+
+        # Copy the weights from GroupedLinear module to GroupedLinear op.
+        if fc2_single_grouped_parameter:
+            setattr(op, "weight", getattr(self.linear_fc2, "weight"))
+
+        for idx in range(self.linear_fc2.num_gemms):
+            if not fc2_single_grouped_parameter:
+                setattr(op, f"weight{idx}", getattr(self.linear_fc2, f"weight{idx}"))
+            if self.linear_fc2.use_bias:
+                setattr(op, f"bias{idx}", getattr(self.linear_fc2, f"bias{idx}"))
+        ops.append(op)
+
+        # Emulate submodule pre-forward hooks
+        ops.register_forward_pre_hook(self._make_fused_impl_pre_forward_hook())
+
+        return ops
+
+    def _make_fused_impl_pre_forward_hook(self) -> Callable:
+        """Make function that calls submodule pre-forward callback hooks.
+
+        This is intended for compatibility with
+        DistributedDataParallel hooks that trigger parameter
+        all-gathers. It does not support general pre-forward hooks
+        since they may manipulate intermediate tensors that are never
+        instantiated by the fused implementation.
+
+        """
+
+        def forward_pre_hook(module, *_) -> None:
+            for submodule in chain(self.linear_fc1.modules(), self.linear_fc2.modules()):
+                for hook in submodule._forward_pre_hooks.values():
+                    # Assume that hook does not interact with input
+                    ret = hook(submodule, None)
+                    if ret is not None:
+                        raise RuntimeError(
+                            f"Applying a fused implementation for {self.__class__.__name__}, "
+                            f"but a {submodule.__class__.__name__} submodule "
+                            "has a pre-forward hook that modifies the input tensor."
+                        )
+
+        return forward_pre_hook
+
+    def _fused_forward(
+        self,
+        permuted_local_hidden_states: torch.Tensor,
+        tokens_per_expert: torch.Tensor,
+        permuted_probs: torch.Tensor,
+    ) -> torch.Tensor:
+        """Forward pass using Transformer Engine operation fuser API."""
+
+        # Construct fused impl if needed
+        # Note: We initialize during the first forward pass in case
+        # the params are modified after the constructor.
+        # Note: The fused impl is stored in a tuple to avoid
+        # registering submodules.
+        if self._fused_ops is None:
+            self._fused_ops = (self._make_fused_ops(),)
+        (ops,) = self._fused_ops
+
+        # Apply padding if needed
+        unpadded_tokens_per_expert = None
+        if self.config.moe_router_padding_for_quantization:
+            # Padding has already been applied in router
+            pass
+        elif self.config.fp8 or self.config.fp4:
+            tokens_per_expert = tokens_per_expert.tolist()
+            unpadded_tokens_per_expert = tokens_per_expert
+            permuted_local_hidden_states, tokens_per_expert = self.quantization_padding(
+                permuted_local_hidden_states, tokens_per_expert
+            )
+            permuted_probs, _ = self.quantization_padding(
+                permuted_probs.unsqueeze(-1), unpadded_tokens_per_expert
+            )
+            permuted_probs = permuted_probs.squeeze(-1)
+            tokens_per_expert = torch.tensor(
+                tokens_per_expert, dtype=torch.int, device=permuted_probs.device
+            )
+
+        # Call fused impl
+        output = ops(
+            permuted_local_hidden_states,
+            tokens_per_expert,  # FC1
+            permuted_probs,  # Scaled SwiGLU
+            tokens_per_expert,  # FC2
+        )
+
+        # Remove padding if needed
+        if unpadded_tokens_per_expert is not None:
+            output = self.quantization_unpadding(output, unpadded_tokens_per_expert)
+
+        return output
+
     def forward(
         self,
         permuted_local_hidden_states: torch.Tensor,
@@ -754,17 +963,30 @@ def forward(
         Return:
             output (torch.Tensor): The output of the local experts.
         """
+
+        # Call fused impl if enabled
+        if self._with_fused_impl:
+            output = self._fused_forward(
+                permuted_local_hidden_states, tokens_per_expert, permuted_probs
+            )
+            output_bias = None
+            return output, output_bias
+
+        # Apply padding if needed
+        unpadded_tokens_per_expert = None
         tokens_per_expert: list[int] = tokens_per_expert.tolist()
-        if self.config.fp8 or self.config.fp4:
-            actual_tokens_per_expert = tokens_per_expert
+        permuted_probs = permuted_probs.unsqueeze(-1)
+        if self.config.moe_router_padding_for_quantization:
+            # Padding has already been applied in router
+            pass
+        elif self.config.fp8 or self.config.fp4:
+            unpadded_tokens_per_expert = tokens_per_expert
             permuted_local_hidden_states, tokens_per_expert = self.quantization_padding(
                 permuted_local_hidden_states, tokens_per_expert
             )
             permuted_probs, _ = self.quantization_padding(
-                permuted_probs.unsqueeze(-1), actual_tokens_per_expert
+                permuted_probs, unpadded_tokens_per_expert
             )
-        else:
-            permuted_probs = permuted_probs.unsqueeze(-1)
 
         if self.config.moe_apply_probs_on_input:
             assert (
@@ -790,15 +1012,38 @@ def forward(
             )
 
         def bias_act_func(intermediate_parallel, bias_parallel, permuted_probs):
+
+            # Whether activation function is interleaved GLU
+            with_glu_interleaving = (
+                self.config.gated_linear_unit
+                and self.config.moe_mlp_glu_interleave_size is not None
+            )
+
+            def remove_glu_interleaving(x: torch.Tensor) -> torch.Tensor:
+                """Reorder tensor so gate and linear units are contiguous.
+
+                Should only be applied if the activation function is
+                an interleaved GLU.
+
+                """
+                shape = x.size()
+                interleave_size = self.config.moe_mlp_glu_interleave_size
+                x = x.reshape(-1, shape[-1] // (2 * interleave_size), 2, interleave_size)
+                x = x.transpose(1, 2).contiguous()
+                x = x.view(shape)
+                return x
+
             if self.config.use_te_activation_func:
                 if bias_parallel is not None:
                     intermediate_parallel = intermediate_parallel + bias_parallel
+                if with_glu_interleaving:
+                    intermediate_parallel = remove_glu_interleaving(intermediate_parallel)
                 intermediate_parallel = self.activation_func(intermediate_parallel)
                 if permuted_probs is not None:
                     original_dtype = intermediate_parallel.dtype
                     intermediate_parallel = intermediate_parallel * permuted_probs
                     intermediate_parallel = intermediate_parallel.to(original_dtype)
-            elif self.config.bias_activation_fusion:
+            elif self.config.bias_activation_fusion and not with_glu_interleaving:
                 if self.activation_func == F.silu and self.config.gated_linear_unit:
                     # dtype is handled inside the fused kernel
                     intermediate_parallel = weighted_bias_swiglu_impl(
@@ -831,6 +1076,8 @@ def bias_act_func(intermediate_parallel, bias_parallel, permuted_probs):
                 if self.config.gated_linear_unit:
 
                     def glu(x):
+                        if with_glu_interleaving:
+                            x = remove_glu_interleaving(x)
                         x_glu, x_linear = torch.chunk(x, 2, dim=-1)
                         if (val := self.config.activation_func_clamp_value) is not None:
                             x_glu = x_glu.clamp(min=None, max=val)
@@ -870,8 +1117,8 @@ def glu(x):
         output = self._apply_bias(output, output_bias, tokens_per_expert, permuted_probs)
 
         # upad and concat the output
-        if self.config.fp8 or self.config.fp4:
-            output = self.quantization_unpadding(output, actual_tokens_per_expert)
+        if unpadded_tokens_per_expert is not None:
+            output = self.quantization_unpadding(output, unpadded_tokens_per_expert)
 
         output_bias = None
 
diff --git a/megatron/core/transformer/moe/moe_utils.py b/megatron/core/transformer/moe/moe_utils.py
index bf8df7a2482..dbcc25a905c 100644
--- a/megatron/core/transformer/moe/moe_utils.py
+++ b/megatron/core/transformer/moe/moe_utils.py
@@ -1285,9 +1285,12 @@ def get_align_size_for_quantization(config: TransformerConfig) -> int:
     Returns:
         int: The alignment size for quantization.
     """
+    # CUTLASS kernel for grouped GEMM assumes 256 alignment.
+    if config.use_transformer_engine_op_fuser:
+        return 256
     if config.fp8:
         return get_fp8_align_size(config.fp8_recipe)
-    elif config.fp4:
+    if config.fp4:
         return get_fp4_align_size(config.fp4_recipe)
     return 16
 
diff --git a/megatron/core/transformer/transformer_config.py b/megatron/core/transformer/transformer_config.py
index f55de2ae2ff..7ec5636ab87 100644
--- a/megatron/core/transformer/transformer_config.py
+++ b/megatron/core/transformer/transformer_config.py
@@ -395,6 +395,10 @@ class TransformerConfig(ModelParallelConfig):
     fused_single_qkv_rope: bool = False
     """If set, avoid splitting QKV before ROPE forward and avoid concatenating ROPE dgrads."""
 
+    use_transformer_engine_op_fuser: bool = False
+    """If True, submodules may use Transformer Engine's operation fuser
+    API to enable advanced fusions."""
+
     ####################
     # activation recomputation
     ####################
@@ -761,6 +765,15 @@ class TransformerConfig(ModelParallelConfig):
     """Number of SMs to use for HybridEP. In pure NVL scenarios,
     16 SMs can generally achieve good bandwidth."""
 
+    moe_mlp_glu_interleave_size: Optional[int] = None
+    """When set, GLU activations in the MoE grouped MLP layer will use a
+    block interleaved format. Instead of interpreting the input tensor
+    as a concatenation of gates and linear units, it will be
+    interpreted as alternating blocks of gates and linear units.
+
+    This data format is experimental and primarily intended to enable
+    advanced fused kernels."""
+
     ##################
     # Context Parallel
     ##################
diff --git a/tests/unit_tests/models/test_mamba_moe_model.py b/tests/unit_tests/models/test_mamba_moe_model.py
index a55042ee979..1052d7781a5 100644
--- a/tests/unit_tests/models/test_mamba_moe_model.py
+++ b/tests/unit_tests/models/test_mamba_moe_model.py
@@ -286,6 +286,8 @@
     "fallback_to_eager_attn": False,
     "linear_attention_type": None,
     "moe_router_force_biased": None,
+    "moe_mlp_glu_interleave_size": None,
+    "use_transformer_engine_op_fuser": False,
 }
 # Fields to ignore entirely (ephemeral, environment-specific, very large).
 SKIP_FIELDS = set()

From 02961010a0a5720b0a61eb2f247fa5bc5bc30759 Mon Sep 17 00:00:00 2001
From: Pingtian Li <158665726+Wohox@users.noreply.github.com>
Date: Wed, 25 Mar 2026 09:30:23 +0800
Subject: [PATCH 320/334] [Dev] Support EP Overlap's Dynamic Computation Stream
 For Full-Iter CUDA Graph (#3818)

---
 .../common/model_chunk_schedule_plan.py       | 19 ++++++-----
 .../core/models/gpt/fine_grained_callables.py |  4 ++-
 .../core/pipeline_parallel/combined_1f1b.py   |  9 +++--
 megatron/core/pipeline_parallel/utils.py      | 33 +++++++++----------
 .../a2a_overlap/test_schedule_layer_1f1b.py   | 15 ++++-----
 5 files changed, 43 insertions(+), 37 deletions(-)

diff --git a/megatron/core/models/common/model_chunk_schedule_plan.py b/megatron/core/models/common/model_chunk_schedule_plan.py
index 2e26e5fd1d3..2a7476228ae 100644
--- a/megatron/core/models/common/model_chunk_schedule_plan.py
+++ b/megatron/core/models/common/model_chunk_schedule_plan.py
@@ -63,8 +63,8 @@ def __init__(self, layer, event, chunk_state, comp_stream, comm_stream, extra_ar
             event (torch.cuda.Event):
                 record CUDA event across multiple nodes on different streams for synchronization.
             chunk_state (ModelChunkState): model state shared in the model chunk.
-            comp_stream (torch.cuda.Stream): CUDA stream for computation.
-            comm_stream (torch.cuda.Stream): CUDA stream for communication.
+            comp_stream (Callable): Func that returns CUDA stream for computation.
+            comm_stream (Callable): Func that returns CUDA stream for communication.
             extra_args (dict): extra arguments for the layer.
 
         The event and chunk_state are binded to the TransformerModelChunkSchedulePlan
@@ -317,9 +317,6 @@ def __init__(
         self.post_process = None
         self.vp_stage = model.vp_stage
 
-        comp_stream = get_comp_stream()
-        comm_stream = get_comm_stream()
-
         # save the inputs of model.forward() to ModelChunkState
         self._model_chunk_state.input_ids = input_ids
         self._model_chunk_state.position_ids = position_ids
@@ -338,18 +335,22 @@ def __init__(
         self._model_chunk_state.attention_bias = None
 
         # build preprocess
-        self.pre_process = PreProcessNode(model, self._model_chunk_state, self._event, comp_stream)
+        self.pre_process = PreProcessNode(
+            model, self._model_chunk_state, self._event, get_comp_stream
+        )
 
         # build layer schedule plan for each layer.
         # The methods to obtain layers are different for MTP so we need the other build plan for
         # MTP. Also, this can help annotate MTP layer so that it can know where MTP is.
-        self._build_layer_schedule_plan(model.decoder, comp_stream, comm_stream)
-        self._build_layer_schedule_plan(getattr(model, "mtp", None), comp_stream, comm_stream)
+        self._build_layer_schedule_plan(model.decoder, get_comp_stream, get_comm_stream)
+        self._build_layer_schedule_plan(
+            getattr(model, "mtp", None), get_comp_stream, get_comm_stream
+        )
 
         # build post process
         if model.post_process:
             self.post_process = PostProcessNode(
-                model, self._model_chunk_state, self._event, comp_stream
+                model, self._model_chunk_state, self._event, get_comp_stream
             )
 
     def _build_layer_schedule_plan(self, module, comp_stream, comm_stream):
diff --git a/megatron/core/models/gpt/fine_grained_callables.py b/megatron/core/models/gpt/fine_grained_callables.py
index 6658b6363ea..8d1036b5bae 100644
--- a/megatron/core/models/gpt/fine_grained_callables.py
+++ b/megatron/core/models/gpt/fine_grained_callables.py
@@ -3,7 +3,7 @@
 import weakref
 from contextlib import nullcontext
 from functools import partial
-from typing import Optional
+from typing import Callable, Optional
 
 import torch
 from torch import Tensor
@@ -330,6 +330,8 @@ def backward_dw(self):
         """Computes the weight gradients for the transformer layer node."""
         if not self.delay_wgrad_compute:
             return
+        if isinstance(self.stream, Callable):
+            self.stream = self.stream()
         with torch.cuda.stream(self.stream):
             torch.cuda.nvtx.range_push(f"{self.name} wgrad")
             for module in self.bwd_dw_callables:
diff --git a/megatron/core/pipeline_parallel/combined_1f1b.py b/megatron/core/pipeline_parallel/combined_1f1b.py
index 232d9c8cd70..fdd3b32201f 100644
--- a/megatron/core/pipeline_parallel/combined_1f1b.py
+++ b/megatron/core/pipeline_parallel/combined_1f1b.py
@@ -8,7 +8,12 @@
 
 from megatron.core.enums import Fp8Recipe
 from megatron.core.fp8_utils import get_fp8_context
-from megatron.core.pipeline_parallel.utils import AbstractSchedulePlan, ScheduleNode, set_streams
+from megatron.core.pipeline_parallel.utils import (
+    AbstractSchedulePlan,
+    ScheduleNode,
+    get_comp_stream,
+    set_streams,
+)
 from megatron.core.utils import get_attr_wrapped_model
 
 # Types
@@ -405,7 +410,7 @@ def forward_backward_step():
         from megatron.core.pipeline_parallel.schedules import forward_step_calc_loss
 
         loss_node = ScheduleNode(
-            loss_func, torch.cuda.current_stream(), f_schedule_plan.event, name="loss_func"
+            loss_func, get_comp_stream, f_schedule_plan.event, name="loss_func"
         )
         loss_func = loss_node.forward
         output_tensor, num_tokens = forward_step_calc_loss(
diff --git a/megatron/core/pipeline_parallel/utils.py b/megatron/core/pipeline_parallel/utils.py
index 8f6b25eec32..8cb80741063 100644
--- a/megatron/core/pipeline_parallel/utils.py
+++ b/megatron/core/pipeline_parallel/utils.py
@@ -154,7 +154,7 @@ def __init__(
 
         Args:
             forward_func (callable): Function to execute during the forward pass.
-            stream (torch.cuda.Stream): The CUDA stream for this node's computation.
+            stream (Callable): Func that returns CUDA stream for computation.
                 This can be either a 'compute' stream or a 'communicate' stream.
                 - 'compute' stream: Used for computational nodes like attention and experts.
                 - 'communicate' stream: Used for nodes that handle token communication,
@@ -198,6 +198,9 @@ def forward(self, inputs=()):
         return self._forward(*inputs)
 
     def _forward(self, *inputs):
+        # Lazy initialization of stream
+        if isinstance(self.stream, Callable):
+            self.stream = self.stream()
         with self.stream_acquire_context(f"{self.name} forward"):
             self.inputs = [make_viewless(e).detach() if e is not None else None for e in inputs]
             for i, input in enumerate(self.inputs):
@@ -235,6 +238,9 @@ def backward(self, output_grad):
         return self._backward(*output_grad)
 
     def _backward(self, *output_grad):
+        # Lazy initialization of stream
+        if isinstance(self.stream, Callable):
+            self.stream = self.stream()
         with self.stream_acquire_context(f"{self.name} backward"):
             outputs = self.output
             if not isinstance(outputs, tuple):
@@ -323,32 +329,25 @@ def run(
         ...
 
 
+_USE_DYNAMIC_COMP_STREAM = None
 _COMP_STREAM = None
 _COMM_STREAM = None
 
 
-def set_streams(comp_stream=None, comm_stream=None):
-    """Set the streams for communication and computation"""
-    global _COMP_STREAM
+def set_streams(comm_stream=None):
+    """Set the stream for communication operations."""
     global _COMM_STREAM
-    if _COMP_STREAM is not None and _COMM_STREAM is not None:
-        return
 
-    if comp_stream is None:
-        comp_stream = torch.cuda.current_stream()
-    if comm_stream is None:
-        comm_stream = torch.cuda.Stream(device="cuda")
-
-    assert _COMP_STREAM is None
-    assert _COMM_STREAM is None
-    _COMP_STREAM = comp_stream
-    _COMM_STREAM = comm_stream
+    # Set communication stream
+    if _COMM_STREAM is None:
+        if comm_stream is None:
+            comm_stream = torch.cuda.Stream(device="cuda")
+        _COMM_STREAM = comm_stream
 
 
 def get_comp_stream():
     """Get the stream for computation"""
-    global _COMP_STREAM
-    return _COMP_STREAM
+    return torch.cuda.current_stream()
 
 
 def get_comm_stream():
diff --git a/tests/unit_tests/a2a_overlap/test_schedule_layer_1f1b.py b/tests/unit_tests/a2a_overlap/test_schedule_layer_1f1b.py
index c6c4a75af99..4bbab6ccb30 100644
--- a/tests/unit_tests/a2a_overlap/test_schedule_layer_1f1b.py
+++ b/tests/unit_tests/a2a_overlap/test_schedule_layer_1f1b.py
@@ -12,6 +12,7 @@
     get_gpt_mtp_block_spec,
 )
 from megatron.core.models.gpt.gpt_model import GPTModel
+from megatron.core.pipeline_parallel.utils import get_comm_stream, get_comp_stream, set_streams
 from megatron.core.utils import is_te_min_version
 from tests.unit_tests.a2a_overlap.utils import (
     DummyState,
@@ -68,9 +69,8 @@ def run_transformer_layer_a2a_overlap_with_capture(model, input_tensors, microba
     for i in range(len(input_tensors)):
         input_tensors[i] = input_tensors[i].clone()
 
+    set_streams()
     event = torch.cuda.Event()
-    comp_stream = torch.cuda.current_stream()
-    comm_stream = torch.cuda.Stream(device="cuda")
     state = DummyState()
     state.is_mtp = False
     state.model = model
@@ -79,8 +79,8 @@ def run_transformer_layer_a2a_overlap_with_capture(model, input_tensors, microba
             transformer_layer,
             event,
             state,
-            comp_stream,
-            comm_stream,
+            get_comp_stream,
+            get_comm_stream,
             extra_args={"is_moe": True, "enable_deepep": False},
         )
         for _ in range(microbatches)
@@ -183,8 +183,7 @@ def run_mtp_layer_a2a_overlap_with_capture(
     for i in range(len(hidden_states)):
         hidden_states[i] = hidden_states[i].clone()
 
-    comp_stream = torch.cuda.current_stream()
-    comm_stream = torch.cuda.Stream(device="cuda")
+    set_streams()
     layers = []
     for _ in range(microbatches):
         state = DummyState()
@@ -203,8 +202,8 @@ def run_mtp_layer_a2a_overlap_with_capture(
                 model.mtp.layers[0],
                 event,
                 state,
-                comp_stream,
-                comm_stream,
+                get_comp_stream,
+                get_comm_stream,
                 extra_args={
                     "is_moe": True,
                     "enable_deepep": False,

From 4108d68d46f0653b936dd09653ce3ee7fb0b87d1 Mon Sep 17 00:00:00 2001
From: jingqiny-99 <jingqiny@nvidia.com>
Date: Wed, 25 Mar 2026 14:00:27 +0800
Subject: [PATCH 321/334] [dev] mHC kernel fusion  (#3828)

---
 megatron/core/fusions/fused_mhc_kernels.py    | 964 ++++++++++++++++++
 megatron/core/transformer/hyper_connection.py | 292 +++---
 .../core/transformer/transformer_config.py    |  32 +
 .../golden_values_dev_dgx_h100.json           | 498 ++++-----
 .../fusions/test_fused_mhc_kernels.py         | 564 ++++++++++
 .../unit_tests/models/test_mamba_moe_model.py |   1 +
 6 files changed, 1966 insertions(+), 385 deletions(-)
 create mode 100644 megatron/core/fusions/fused_mhc_kernels.py
 create mode 100644 tests/unit_tests/fusions/test_fused_mhc_kernels.py

diff --git a/megatron/core/fusions/fused_mhc_kernels.py b/megatron/core/fusions/fused_mhc_kernels.py
new file mode 100644
index 00000000000..6a19255196a
--- /dev/null
+++ b/megatron/core/fusions/fused_mhc_kernels.py
@@ -0,0 +1,964 @@
+# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+"""Fused cuTile kernels for mHC (Manifold-Constrained Hyper-Connections).
+
+Requires cuda.tile (cuTile) for optimal performance on supported GPUs
+(compute capability 10.x+).  Reference (non-fused) implementations live in
+``megatron.core.transformer.hyper_connection`` and are used when cuTile is
+unavailable or when the ``use_fused_mhc`` config flag is False.
+
+Four fused operations:
+  - sinkhorn:     Sinkhorn-Knopp projection to doubly stochastic matrix
+  - h_aggregate:  weighted n-stream -> 1-stream aggregation
+  - h_post_bda:   fused H_res @ residual + H_post * (x + bias)
+  - proj_rms:     fused projection + RMS normalization
+"""
+
+import math
+from typing import Optional, Tuple
+
+import torch
+from torch import Tensor
+
+# ---------------------------------------------------------------------------
+# Check cuTile availability
+# ---------------------------------------------------------------------------
+_CUTILE_AVAILABLE = False
+try:
+    import cuda.tile as ct
+
+    _CUTILE_AVAILABLE = True
+except ImportError:
+    pass
+
+
+def is_cutile_available() -> bool:
+    """Return True if cuTile fused kernels are available."""
+    return _CUTILE_AVAILABLE
+
+
+# ============================================================================
+# CuTile implementations (only defined when cuda.tile is available)
+# ============================================================================
+
+if _CUTILE_AVAILABLE:
+    ConstInt = ct.Constant[int]
+    PAD_ZERO = ct.PaddingMode.ZERO
+    LOG2E = 1.4426950408889634
+
+    # -- Sinkhorn kernels ----------------------------------------------------
+
+    @ct.kernel
+    def _ct_sinkhorn_fwd_kernel(
+        inp, out, M_init_out, eps, HC: ConstInt, NUM_ITERS: ConstInt, TILE_SIZE: ConstInt
+    ):
+        pid = ct.bid(0)
+        logits = ct.load(inp, index=(pid, 0, 0), shape=(TILE_SIZE, HC, HC)).astype(ct.float32)
+        row_max = ct.max(logits, axis=2, keepdims=True)
+        M = ct.exp2((logits - row_max) * LOG2E)
+        ct.store(
+            M_init_out,
+            index=(pid, 0, 0),
+            tile=ct.reshape(M.astype(M_init_out.dtype), (TILE_SIZE, HC, HC)),
+        )
+        for _ in range(NUM_ITERS):
+            row_sum = ct.sum(M, axis=2, keepdims=True)
+            M = M / (row_sum + eps)
+            col_sum = ct.sum(M, axis=1, keepdims=True)
+            M = M / (col_sum + eps)
+        ct.store(out, index=(pid, 0, 0), tile=ct.reshape(M.astype(out.dtype), (TILE_SIZE, HC, HC)))
+
+    @ct.kernel
+    def _ct_sinkhorn_bwd_kernel(
+        grad_out,
+        M_init,
+        grad_inp,
+        ws_M,
+        ws_rs,
+        ws_cs,
+        eps,
+        HC: ConstInt,
+        NUM_ITERS: ConstInt,
+        TILE_SIZE: ConstInt,
+    ):
+        pid = ct.bid(0)
+        M_base = pid * (2 * NUM_ITERS)
+        v_base = pid * NUM_ITERS
+
+        M = ct.load(M_init, index=(pid, 0, 0), shape=(TILE_SIZE, HC, HC)).astype(ct.float32)
+        for t in range(NUM_ITERS):
+            ct.store(ws_M, index=(M_base + 2 * t, 0, 0), tile=M)
+            row_sum = ct.sum(M, axis=2, keepdims=True)
+            ct.store(ws_rs, index=(v_base + t, 0, 0), tile=row_sum)
+            M = M / (row_sum + eps)
+            ct.store(ws_M, index=(M_base + 2 * t + 1, 0, 0), tile=M)
+            col_sum = ct.sum(M, axis=1, keepdims=True)
+            ct.store(ws_cs, index=(v_base + t, 0, 0), tile=col_sum)
+            M = M / (col_sum + eps)
+
+        grad = ct.load(grad_out, index=(pid, 0, 0), shape=(TILE_SIZE, HC, HC)).astype(ct.float32)
+        for t_rev in range(NUM_ITERS):
+            t = NUM_ITERS - 1 - t_rev
+            col_s = ct.load(ws_cs, index=(v_base + t, 0, 0), shape=(TILE_SIZE, 1, HC))
+            grad = grad / (col_s + eps)
+            col_corr = ct.sum(grad * M, axis=1, keepdims=True)
+            grad = grad - col_corr
+            M = ct.load(ws_M, index=(M_base + 2 * t + 1, 0, 0), shape=(TILE_SIZE, HC, HC))
+            row_s = ct.load(ws_rs, index=(v_base + t, 0, 0), shape=(TILE_SIZE, HC, 1))
+            grad = grad / (row_s + eps)
+            row_corr = ct.sum(grad * M, axis=2, keepdims=True)
+            grad = grad - row_corr
+            M = ct.load(ws_M, index=(M_base + 2 * t, 0, 0), shape=(TILE_SIZE, HC, HC))
+        grad = grad * M
+        ct.store(grad_inp, index=(pid, 0, 0), tile=grad.astype(grad_inp.dtype))
+
+    def _cutile_sinkhorn_fwd(
+        input_logits: Tensor, num_iterations: int, eps: float = 1e-8
+    ) -> Tuple[Tensor, Tensor]:
+        original_shape = input_logits.shape
+        hc = original_shape[-1]
+        N_batch = input_logits.numel() // (hc * hc)
+        TILE_SIZE = math.gcd(N_batch, 128)
+        dev = input_logits.device
+        out = torch.empty(N_batch, hc, hc, dtype=input_logits.dtype, device=dev)
+        M_init = torch.empty(N_batch, hc, hc, dtype=input_logits.dtype, device=dev)
+        ct.launch(
+            torch.cuda.current_stream(),
+            (math.ceil(N_batch / TILE_SIZE), 1, 1),
+            _ct_sinkhorn_fwd_kernel,
+            (input_logits.view(N_batch, hc, hc), out, M_init, eps, hc, num_iterations, TILE_SIZE),
+        )
+        return out.view(original_shape), M_init.view(original_shape)
+
+    def _cutile_sinkhorn_bwd(
+        grad_output: Tensor, M_init: Tensor, num_iterations: int, eps: float = 1e-8
+    ) -> Tensor:
+        original_shape = grad_output.shape
+        hc = original_shape[-1]
+        N_batch = grad_output.numel() // (hc * hc)
+        TILE_SIZE = math.gcd(N_batch, 128)
+        dev = grad_output.device
+        ws_M = torch.empty(N_batch * 2 * num_iterations, hc, hc, dtype=torch.float32, device=dev)
+        ws_rs = torch.empty(N_batch * num_iterations, hc, 1, dtype=torch.float32, device=dev)
+        ws_cs = torch.empty(N_batch * num_iterations, 1, hc, dtype=torch.float32, device=dev)
+        grad_input = torch.empty(N_batch, hc, hc, dtype=grad_output.dtype, device=dev)
+        ct.launch(
+            torch.cuda.current_stream(),
+            (math.ceil(N_batch / TILE_SIZE), 1, 1),
+            _ct_sinkhorn_bwd_kernel,
+            (
+                grad_output.view(N_batch, hc, hc),
+                M_init.view(N_batch, hc, hc),
+                grad_input,
+                ws_M,
+                ws_rs,
+                ws_cs,
+                eps,
+                hc,
+                num_iterations,
+                TILE_SIZE,
+            ),
+        )
+        return grad_input.view(original_shape)
+
+    # -- H_aggregate kernels -------------------------------------------------
+
+    @ct.kernel
+    def _ct_h_agg_fwd_kernel(x, h_pre, out, N: ConstInt, TILE_M: ConstInt, TILE_C: ConstInt):
+        pid = ct.bid(0)
+        num_tiles = ct.num_tiles(x, axis=2, shape=(TILE_M, N, TILE_C))
+        h_tile = ct.load(h_pre, index=(pid, 0), shape=(TILE_M, N), padding_mode=PAD_ZERO)
+        h_tile = ct.expand_dims(h_tile, axis=2)
+        for j in range(num_tiles):
+            x_tile = ct.load(x, index=(pid, 0, j), shape=(TILE_M, N, TILE_C), padding_mode=PAD_ZERO)
+            acc = ct.sum(x_tile * h_tile, axis=1).astype(ct.float32)
+            ct.store(out, index=(pid, j), tile=acc.astype(out.dtype))
+
+    @ct.kernel
+    def _ct_h_agg_bwd_kernel(go, x, h_pre, gx, gh, N: ConstInt, TILE_M: ConstInt, TILE_C: ConstInt):
+        pid = ct.bid(0)
+        num_c_tiles = ct.num_tiles(go, axis=1, shape=(TILE_M, TILE_C))
+        h_tile = ct.load(h_pre, index=(pid, 0), shape=(TILE_M, N), padding_mode=PAD_ZERO)
+        h_expanded = ct.expand_dims(h_tile, axis=2)
+        gh_acc = ct.full((TILE_M, N), 0, dtype=ct.float32)
+        for ct_idx in range(num_c_tiles):
+            go_tile = ct.load(
+                go, index=(pid, ct_idx), shape=(TILE_M, TILE_C), padding_mode=PAD_ZERO
+            )
+            go_expanded = ct.expand_dims(go_tile, axis=1)
+            x_tile = ct.load(
+                x, index=(pid, 0, ct_idx), shape=(TILE_M, N, TILE_C), padding_mode=PAD_ZERO
+            )
+            gx_tile = go_expanded * h_expanded
+            ct.store(gx, index=(pid, 0, ct_idx), tile=gx_tile.astype(gx.dtype))
+            gh_acc += ct.sum(go_expanded * x_tile, axis=2)
+        ct.store(gh, index=(pid, 0), tile=gh_acc.astype(gh.dtype))
+
+    def _cutile_h_aggregate_fwd(x: Tensor, h_pre: Tensor) -> Tensor:
+        s, b, n, C = x.shape
+        sb = s * b
+        TILE_SIZE = math.gcd(sb, 4)
+        TILE_C = math.gcd(C, 1024)
+        out = torch.empty(sb, C, dtype=x.dtype, device=x.device)
+        ct.launch(
+            torch.cuda.current_stream(),
+            (math.ceil(sb / TILE_SIZE),),
+            _ct_h_agg_fwd_kernel,
+            (x.view(sb, n, C), h_pre.view(sb, n), out, n, TILE_SIZE, TILE_C),
+        )
+        return out.view(s, b, C)
+
+    def _cutile_h_aggregate_bwd(
+        grad_output: Tensor, x: Tensor, h_pre: Tensor
+    ) -> Tuple[Tensor, Tensor]:
+        s, b, n, C = x.shape
+        sb = s * b
+        TILE_C = math.gcd(C, 1024)
+        TILE_M = math.gcd(sb, 4)
+        gx = torch.empty(sb, n, C, dtype=x.dtype, device=x.device)
+        gh = torch.empty(sb, n, dtype=x.dtype, device=x.device)
+        ct.launch(
+            torch.cuda.current_stream(),
+            (math.ceil(sb / TILE_M),),
+            _ct_h_agg_bwd_kernel,
+            (
+                grad_output.view(sb, C),
+                x.view(sb, n, C),
+                h_pre.view(sb, n),
+                gx,
+                gh,
+                n,
+                TILE_M,
+                TILE_C,
+            ),
+        )
+        return gx.view(s, b, n, C), gh.view(s, b, n)
+
+    # -- H_post BDA kernels --------------------------------------------------
+
+    @ct.kernel
+    def _ct_hpb_fwd_kernel(
+        hr, orig, hp, x, out, N: ConstInt, TILE_C: ConstInt, TILE_SIZE: ConstInt
+    ):
+        pid = ct.bid(0)
+        num_c_tiles = ct.num_tiles(x, axis=1, shape=(TILE_SIZE, TILE_C))
+        hp_tile = ct.load(hp, index=(pid, 0), shape=(TILE_SIZE, N), padding_mode=PAD_ZERO)
+        hp_2d = ct.reshape(hp_tile, (N, 1))
+        hr_tile = ct.load(hr, index=(pid, 0, 0), shape=(TILE_SIZE, N, N), padding_mode=PAD_ZERO)
+        hr_2d = ct.reshape(hr_tile, (N, N))
+        for ct_idx in range(num_c_tiles):
+            orig_tile = ct.load(
+                orig, index=(pid, 0, ct_idx), shape=(TILE_SIZE, N, TILE_C), padding_mode=PAD_ZERO
+            )
+            orig_2d = ct.reshape(orig_tile, (N, TILE_C))
+            x_tile = ct.load(
+                x, index=(pid, ct_idx), shape=(TILE_SIZE, TILE_C), padding_mode=PAD_ZERO
+            )
+            x_2d = ct.reshape(x_tile, (1, TILE_C))
+            out_2d = hp_2d * x_2d
+            for j in range(N):
+                out_2d += ct.extract(hr_2d, (0, j), shape=(N, 1)) * ct.extract(
+                    orig_2d, (j, 0), shape=(1, TILE_C)
+                )
+            ct.store(
+                out,
+                index=(pid, 0, ct_idx),
+                tile=ct.reshape(out_2d, (TILE_SIZE, N, TILE_C)).astype(out.dtype),
+            )
+
+    @ct.kernel
+    def _ct_hpb_fwd_bias_kernel(
+        hr, orig, hp, x, bias, out, N: ConstInt, TILE_C: ConstInt, TILE_SIZE: ConstInt
+    ):
+        pid = ct.bid(0)
+        num_c_tiles = ct.num_tiles(x, axis=1, shape=(TILE_SIZE, TILE_C))
+        hp_tile = ct.load(hp, index=(pid, 0), shape=(TILE_SIZE, N), padding_mode=PAD_ZERO)
+        hp_2d = ct.reshape(hp_tile, (N, 1))
+        hr_tile = ct.load(hr, index=(pid, 0, 0), shape=(TILE_SIZE, N, N), padding_mode=PAD_ZERO)
+        hr_2d = ct.reshape(hr_tile, (N, N))
+        for ct_idx in range(num_c_tiles):
+            orig_tile = ct.load(
+                orig, index=(pid, 0, ct_idx), shape=(TILE_SIZE, N, TILE_C), padding_mode=PAD_ZERO
+            )
+            orig_2d = ct.reshape(orig_tile, (N, TILE_C))
+            x_tile = ct.load(
+                x, index=(pid, ct_idx), shape=(TILE_SIZE, TILE_C), padding_mode=PAD_ZERO
+            )
+            bias_tile = ct.load(bias, index=(ct_idx,), shape=(TILE_C,), padding_mode=PAD_ZERO)
+            xb_2d = ct.reshape(x_tile, (1, TILE_C)) + ct.reshape(bias_tile, (1, TILE_C))
+            out_2d = hp_2d * xb_2d
+            for j in range(N):
+                out_2d += ct.extract(hr_2d, (0, j), shape=(N, 1)) * ct.extract(
+                    orig_2d, (j, 0), shape=(1, TILE_C)
+                )
+            ct.store(
+                out,
+                index=(pid, 0, ct_idx),
+                tile=ct.reshape(out_2d, (TILE_SIZE, N, TILE_C)).astype(out.dtype),
+            )
+
+    @ct.kernel
+    def _ct_hpb_bwd_kernel(
+        go,
+        hr,
+        orig,
+        hp,
+        x,
+        g_hr,
+        g_orig,
+        g_hp,
+        g_x,
+        N: ConstInt,
+        TILE_C: ConstInt,
+        TILE_SIZE: ConstInt,
+    ):
+        pid = ct.bid(0)
+        num_c_tiles = ct.cdiv(go.shape[2], TILE_C)
+        hp_tile = ct.load(hp, index=(pid, 0), shape=(TILE_SIZE, N))
+        hp_2d = ct.reshape(hp_tile, (1, N))
+        hr_tile = ct.load(hr, index=(pid, 0, 0), shape=(TILE_SIZE, N, N), padding_mode=PAD_ZERO)
+        hr_2d = ct.reshape(hr_tile, (N, N))
+        acc_g_hp_2d = ct.full((N, 1), 0, dtype=ct.float32)
+        acc_g_hr_2d = ct.full((N, N), 0, dtype=ct.float32)
+        for ct_idx in range(num_c_tiles):
+            x_tile = ct.load(
+                x, index=(pid, ct_idx), shape=(TILE_SIZE, TILE_C), padding_mode=PAD_ZERO
+            )
+            x_2d = ct.reshape(x_tile, (1, TILE_C))
+            go_tile = ct.load(
+                go, index=(pid, 0, ct_idx), shape=(TILE_SIZE, N, TILE_C), padding_mode=PAD_ZERO
+            )
+            go_2d = ct.reshape(go_tile, (N, TILE_C))
+            orig_tile = ct.load(
+                orig, index=(pid, 0, ct_idx), shape=(TILE_SIZE, N, TILE_C), padding_mode=PAD_ZERO
+            )
+            orig_2d = ct.reshape(orig_tile, (N, TILE_C))
+            g_x_2d = ct.full((1, TILE_C), 0, dtype=hp.dtype)
+            g_orig_2d = ct.full((N, TILE_C), 0, dtype=hp.dtype)
+            for j in range(N):
+                g_x_2d += ct.extract(hp_2d, (0, j), shape=(1, 1)).item() * ct.extract(
+                    go_2d, (j, 0), shape=(1, TILE_C)
+                )
+                g_orig_2d += ct.extract(hr_2d, (j, 0), shape=(1, N)).reshape((N, 1)) * ct.extract(
+                    go_2d, (j, 0), shape=(1, TILE_C)
+                )
+            acc_g_hp_2d += ct.sum(go_2d * x_2d, axis=1, keepdims=True)
+            acc_g_hr_2d += ct.sum(
+                ct.expand_dims(go_2d, axis=1) * ct.expand_dims(orig_2d, axis=0), axis=2
+            )
+            ct.store(
+                g_x,
+                index=(pid, ct_idx),
+                tile=ct.reshape(g_x_2d, (TILE_SIZE, TILE_C)).astype(g_x.dtype),
+            )
+            ct.store(
+                g_orig,
+                index=(pid, 0, ct_idx),
+                tile=ct.reshape(g_orig_2d, (TILE_SIZE, N, TILE_C)).astype(g_orig.dtype),
+            )
+        ct.store(
+            g_hp, index=(pid, 0), tile=ct.reshape(acc_g_hp_2d, (TILE_SIZE, N)).astype(g_hp.dtype)
+        )
+        ct.store(
+            g_hr,
+            index=(pid, 0, 0),
+            tile=ct.reshape(acc_g_hr_2d, (TILE_SIZE, N, N)).astype(g_hr.dtype),
+        )
+
+    @ct.kernel
+    def _ct_hpb_bwd_bias_kernel(
+        go,
+        hr,
+        orig,
+        hp,
+        x,
+        bias,
+        g_hr,
+        g_orig,
+        g_hp,
+        g_x,
+        N: ConstInt,
+        TILE_C: ConstInt,
+        TILE_SIZE: ConstInt,
+    ):
+        pid = ct.bid(0)
+        num_c_tiles = ct.cdiv(go.shape[2], TILE_C)
+        hp_tile = ct.load(hp, index=(pid, 0), shape=(TILE_SIZE, N))
+        hp_2d = ct.reshape(hp_tile, (1, N))
+        hr_tile = ct.load(hr, index=(pid, 0, 0), shape=(TILE_SIZE, N, N), padding_mode=PAD_ZERO)
+        hr_2d = ct.reshape(hr_tile, (N, N))
+        acc_g_hp_2d = ct.full((N, 1), 0, dtype=ct.float32)
+        acc_g_hr_2d = ct.full((N, N), 0, dtype=ct.float32)
+        for ct_idx in range(num_c_tiles):
+            x_tile = ct.load(
+                x, index=(pid, ct_idx), shape=(TILE_SIZE, TILE_C), padding_mode=PAD_ZERO
+            )
+            bias_tile = ct.load(bias, index=(ct_idx,), shape=(TILE_C,), padding_mode=PAD_ZERO)
+            xb_2d = ct.reshape(x_tile, (1, TILE_C)) + ct.reshape(bias_tile, (1, TILE_C))
+            go_tile = ct.load(
+                go, index=(pid, 0, ct_idx), shape=(TILE_SIZE, N, TILE_C), padding_mode=PAD_ZERO
+            )
+            go_2d = ct.reshape(go_tile, (N, TILE_C))
+            orig_tile = ct.load(
+                orig, index=(pid, 0, ct_idx), shape=(TILE_SIZE, N, TILE_C), padding_mode=PAD_ZERO
+            )
+            orig_2d = ct.reshape(orig_tile, (N, TILE_C))
+            g_x_2d = ct.full((1, TILE_C), 0, dtype=hp.dtype)
+            g_orig_2d = ct.full((N, TILE_C), 0, dtype=hp.dtype)
+            for j in range(N):
+                g_x_2d += ct.extract(hp_2d, (0, j), shape=(1, 1)).item() * ct.extract(
+                    go_2d, (j, 0), shape=(1, TILE_C)
+                )
+                g_orig_2d += ct.extract(hr_2d, (j, 0), shape=(1, N)).reshape((N, 1)) * ct.extract(
+                    go_2d, (j, 0), shape=(1, TILE_C)
+                )
+            acc_g_hp_2d += ct.sum(go_2d * xb_2d, axis=1, keepdims=True)
+            acc_g_hr_2d += ct.sum(
+                ct.expand_dims(go_2d, axis=1) * ct.expand_dims(orig_2d, axis=0), axis=2
+            )
+            ct.store(
+                g_x,
+                index=(pid, ct_idx),
+                tile=ct.reshape(g_x_2d, (TILE_SIZE, TILE_C)).astype(g_x.dtype),
+            )
+            ct.store(
+                g_orig,
+                index=(pid, 0, ct_idx),
+                tile=ct.reshape(g_orig_2d, (TILE_SIZE, N, TILE_C)).astype(g_orig.dtype),
+            )
+        ct.store(
+            g_hp, index=(pid, 0), tile=ct.reshape(acc_g_hp_2d, (TILE_SIZE, N)).astype(g_hp.dtype)
+        )
+        ct.store(
+            g_hr,
+            index=(pid, 0, 0),
+            tile=ct.reshape(acc_g_hr_2d, (TILE_SIZE, N, N)).astype(g_hr.dtype),
+        )
+
+    def _cutile_h_post_bda_fwd(
+        h_res: Tensor, original_residual: Tensor, h_post: Tensor, x: Tensor, bias: Optional[Tensor]
+    ) -> Tensor:
+        s, b, n, C = original_residual.shape
+        sb = s * b
+        TILE_C = math.gcd(C, 1024)
+        TILE_SIZE = math.gcd(sb, 1)
+        out = torch.empty(sb, n, C, dtype=h_res.dtype, device=h_res.device)
+        grid = (math.ceil(sb / TILE_SIZE),)
+        if bias is not None:
+            ct.launch(
+                torch.cuda.current_stream(),
+                grid,
+                _ct_hpb_fwd_bias_kernel,
+                (
+                    h_res.view(sb, n, n),
+                    original_residual.view(sb, n, C),
+                    h_post.view(sb, n),
+                    x.view(sb, C),
+                    bias,
+                    out,
+                    n,
+                    TILE_C,
+                    TILE_SIZE,
+                ),
+            )
+        else:
+            ct.launch(
+                torch.cuda.current_stream(),
+                grid,
+                _ct_hpb_fwd_kernel,
+                (
+                    h_res.view(sb, n, n),
+                    original_residual.view(sb, n, C),
+                    h_post.view(sb, n),
+                    x.view(sb, C),
+                    out,
+                    n,
+                    TILE_C,
+                    TILE_SIZE,
+                ),
+            )
+        return out.view(s, b, n, C)
+
+    def _cutile_h_post_bda_bwd(
+        grad_output: Tensor,
+        h_res: Tensor,
+        original_residual: Tensor,
+        h_post: Tensor,
+        x: Tensor,
+        bias: Optional[Tensor],
+    ) -> Tuple[Tensor, Tensor, Tensor, Tensor, Optional[Tensor]]:
+        s, b, n, C = original_residual.shape
+        sb = s * b
+        TILE_C = math.gcd(C, 1024)
+        TILE_SIZE = math.gcd(sb, 1)
+        g_hr = torch.empty(sb, n, n, dtype=h_res.dtype, device=h_res.device)
+        g_res = torch.empty(sb, n, C, dtype=h_res.dtype, device=h_res.device)
+        g_hp = torch.empty(sb, n, dtype=h_res.dtype, device=h_res.device)
+        g_x = torch.empty(sb, C, dtype=h_res.dtype, device=h_res.device)
+        grid = (sb,)
+        if bias is not None:
+            ct.launch(
+                torch.cuda.current_stream(),
+                grid,
+                _ct_hpb_bwd_bias_kernel,
+                (
+                    grad_output.view(sb, n, C),
+                    h_res.view(sb, n, n),
+                    original_residual.view(sb, n, C),
+                    h_post.view(sb, n),
+                    x.view(sb, C),
+                    bias,
+                    g_hr,
+                    g_res,
+                    g_hp,
+                    g_x,
+                    n,
+                    TILE_C,
+                    TILE_SIZE,
+                ),
+            )
+        else:
+            ct.launch(
+                torch.cuda.current_stream(),
+                grid,
+                _ct_hpb_bwd_kernel,
+                (
+                    grad_output.view(sb, n, C),
+                    h_res.view(sb, n, n),
+                    original_residual.view(sb, n, C),
+                    h_post.view(sb, n),
+                    x.view(sb, C),
+                    g_hr,
+                    g_res,
+                    g_hp,
+                    g_x,
+                    n,
+                    TILE_C,
+                    TILE_SIZE,
+                ),
+            )
+        g_bias = g_x.sum(dim=0) if bias is not None else None
+        return (
+            g_hr.view(s, b, n, n),
+            g_res.view(s, b, n, C),
+            g_hp.view(s, b, n),
+            g_x.view(s, b, C),
+            g_bias,
+        )
+
+    # -- Proj RMS kernels ----------------------------------------------------
+
+    @ct.function
+    def _ct_rms_dnorm(a_tile, norm_tile, dr_tile, K):
+        inv_norm = ct.where(norm_tile > 0, 1.0 / norm_tile, 0.0)
+        inv_sqrt_k = 1.0 / ct.sqrt(K)
+        eps = 1e-8
+        u = norm_tile * inv_sqrt_k + eps
+        coeff = -(1.0 / (u * u)) * inv_sqrt_k
+        return dr_tile * coeff * a_tile * inv_norm
+
+    @ct.kernel
+    def _ct_proj_rms_fwd_kernel(
+        A,
+        B,
+        PROJ,
+        NORM,
+        R,
+        M: int,
+        N: int,
+        K: int,
+        eps: float,
+        TILE_M: ConstInt,
+        TILE_N: ConstInt,
+        TILE_K: ConstInt,
+    ):
+        tile_m_id = ct.bid(0)
+        num_k_tiles = ct.cdiv(K, TILE_K)
+        acc = ct.full((TILE_M, TILE_N), 0.0, dtype=ct.float32)
+        sum_sq = ct.full((TILE_M, 1), 0.0, dtype=ct.float32)
+        for tile_k_id in range(num_k_tiles):
+            a_tile = ct.load(
+                A, index=(tile_m_id, tile_k_id), shape=(TILE_M, TILE_K), padding_mode=PAD_ZERO
+            )
+            b_tile = ct.load(B, index=(0, tile_k_id), shape=(TILE_N, TILE_K), padding_mode=PAD_ZERO)
+            acc = ct.mma(
+                a_tile.astype(ct.tfloat32), b_tile.transpose().astype(ct.tfloat32), acc=acc
+            )
+            sum_sq += ct.sum(a_tile * a_tile, axis=1, keepdims=True)
+        norm_tile = ct.sqrt(sum_sq)
+        v = norm_tile / ct.sqrt(K) + eps
+        r_tile = 1.0 / v
+        ct.store(PROJ, index=(tile_m_id, 0), tile=acc.astype(PROJ.dtype))
+        ct.store(NORM, index=(tile_m_id, 0), tile=norm_tile.astype(NORM.dtype))
+        ct.store(R, index=(tile_m_id, 0), tile=r_tile.astype(R.dtype))
+
+    @ct.kernel
+    def _ct_proj_rms_bwd_kernel(
+        A,
+        B,
+        NORM,
+        DD,
+        DR,
+        DA,
+        DB,
+        M: int,
+        N: int,
+        K: int,
+        TILE_SIZE_M: ConstInt,
+        TILE_SIZE_N: ConstInt,
+        TILE_SIZE_K: ConstInt,
+    ):
+        zero_pad = ct.PaddingMode.ZERO
+        tile_k_id = ct.bid(0)
+        NUM_M_TILES = ct.cdiv(M, TILE_SIZE_M)
+        accumulator_db = ct.full((TILE_SIZE_K, TILE_SIZE_N), 0.0, dtype=ct.float32)
+        for tile_m_id in range(NUM_M_TILES):
+            accumulator_da = ct.full((TILE_SIZE_M, TILE_SIZE_K), 0.0, dtype=ct.float32)
+            a_tile = ct.load(
+                A,
+                index=(tile_m_id, tile_k_id),
+                shape=(TILE_SIZE_M, TILE_SIZE_K),
+                padding_mode=zero_pad,
+            )
+            norm_tile = ct.load(
+                NORM, index=(tile_m_id, 0), shape=(TILE_SIZE_M, 1), padding_mode=zero_pad
+            )
+            dr_tile = ct.load(
+                DR, index=(tile_m_id, 0), shape=(TILE_SIZE_M, 1), padding_mode=zero_pad
+            )
+            accumulator_da = accumulator_da + _ct_rms_dnorm(a_tile, norm_tile, dr_tile, K)
+            b_tile = ct.load(
+                B, index=(0, tile_k_id), shape=(TILE_SIZE_N, TILE_SIZE_K), padding_mode=zero_pad
+            )
+            dd_tile = ct.load(
+                DD, index=(tile_m_id, 0), shape=(TILE_SIZE_M, TILE_SIZE_N), padding_mode=zero_pad
+            )
+            dd_tile = ct.astype(dd_tile, ct.tfloat32)
+            accumulator_da = ct.mma(dd_tile, b_tile.astype(ct.tfloat32), acc=accumulator_da)
+            ct.store(DA, index=(tile_m_id, tile_k_id), tile=accumulator_da.astype(DA.dtype))
+            accumulator_db = ct.mma(
+                a_tile.transpose().astype(ct.tfloat32), dd_tile, acc=accumulator_db
+            )
+        ct.store(DB, index=(0, tile_k_id), tile=accumulator_db.transpose().astype(DB.dtype))
+
+    @ct.kernel
+    def _ct_proj_rms_bwd_small_k_kernel(
+        A, B, NORM, DD, DR, DA, DB, M: int, N: int, K: int, TILE_N_SIZE: ConstInt
+    ):
+        zero_pad = ct.PaddingMode.ZERO
+        TILE_DB_SIZE_M = 128
+        TILE_DB_SIZE_K = 64
+        NUM_M_TILES = ct.cdiv(M, TILE_DB_SIZE_M)
+        NUM_K_TILES = ct.cdiv(K, TILE_DB_SIZE_K)
+        if ct.bid(1) == 0:
+            for tile_id in range(ct.bid(0), NUM_K_TILES, ct.num_blocks(0)):
+                accumulator_db = ct.full((TILE_DB_SIZE_K, TILE_N_SIZE), 0.0, dtype=ct.float32)
+                for m_tile in range(NUM_M_TILES):
+                    a_tile = ct.load(
+                        A,
+                        index=(m_tile, tile_id),
+                        shape=(TILE_DB_SIZE_M, TILE_DB_SIZE_K),
+                        padding_mode=zero_pad,
+                    )
+                    dd_tile = ct.load(
+                        DD,
+                        index=(m_tile, 0),
+                        shape=(TILE_DB_SIZE_M, TILE_N_SIZE),
+                        padding_mode=zero_pad,
+                    )
+                    accumulator_db = ct.mma(
+                        a_tile.transpose().astype(ct.tfloat32),
+                        dd_tile.astype(ct.tfloat32),
+                        acc=accumulator_db,
+                    )
+                ct.store(
+                    DB,
+                    index=(0, tile_id),
+                    tile=accumulator_db.transpose().astype(DB.dtype),
+                    allow_tma=False,
+                )
+        TILE_DA_SIZE_M = 128
+        TILE_DA_SIZE_K = 256
+        NUM_DA_TILES = ct.cdiv(M, TILE_DA_SIZE_M) * ct.cdiv(K, TILE_DA_SIZE_K)
+        NUM_DA_K_TILES = ct.cdiv(K, TILE_DA_SIZE_K)
+        if ct.bid(1) == 1:
+            for tile_id in range(ct.bid(0), NUM_DA_TILES, ct.num_blocks(0)):
+                b_tile_idx = tile_id % NUM_DA_K_TILES
+                dd_tile_idx = tile_id // NUM_DA_K_TILES
+                accumulator_da = ct.full((TILE_DA_SIZE_M, TILE_DA_SIZE_K), 0.0, dtype=ct.float32)
+                a_tile = ct.load(
+                    A,
+                    index=(dd_tile_idx, b_tile_idx),
+                    shape=(TILE_DA_SIZE_M, TILE_DA_SIZE_K),
+                    padding_mode=zero_pad,
+                )
+                norm_tile = ct.load(
+                    NORM, index=(dd_tile_idx, 0), shape=(TILE_DA_SIZE_M, 1), padding_mode=zero_pad
+                )
+                dr_tile = ct.load(
+                    DR, index=(dd_tile_idx, 0), shape=(TILE_DA_SIZE_M, 1), padding_mode=zero_pad
+                )
+                accumulator_da = accumulator_da + _ct_rms_dnorm(
+                    a_tile.astype(ct.float32), norm_tile, dr_tile, K
+                )
+                b_tile = ct.load(
+                    B,
+                    index=(0, b_tile_idx),
+                    shape=(TILE_N_SIZE, TILE_DA_SIZE_K),
+                    padding_mode=zero_pad,
+                )
+                dd_tile = ct.load(
+                    DD,
+                    index=(dd_tile_idx, 0),
+                    shape=(TILE_DA_SIZE_M, TILE_N_SIZE),
+                    padding_mode=zero_pad,
+                )
+                accumulator_da = ct.mma(
+                    dd_tile.astype(ct.tfloat32), b_tile.astype(ct.tfloat32), acc=accumulator_da
+                )
+                ct.store(DA, index=(dd_tile_idx, b_tile_idx), tile=accumulator_da.astype(DA.dtype))
+
+    def _next_power_of_2(n: int) -> int:
+        n -= 1
+        n |= n >> 1
+        n |= n >> 2
+        n |= n >> 4
+        n |= n >> 8
+        n |= n >> 16
+        n |= n >> 32
+        n += 1
+        return n
+
+    def _cutile_proj_rms_fwd(
+        x: Tensor, weight: Tensor, eps: float = 1e-8
+    ) -> Tuple[Tensor, Tensor, Tensor]:
+        M, K = x.shape
+        N = weight.shape[0]
+        TILE_M = 128
+        TILE_N = _next_power_of_2(N)
+        TILE_K = 128
+        num_tiles_m = math.ceil(M / TILE_M)
+        proj = torch.empty(M, N, dtype=x.dtype, device=x.device)
+        norm = torch.empty(M, 1, dtype=x.dtype, device=x.device)
+        r = torch.empty(M, 1, dtype=x.dtype, device=x.device)
+        ct.launch(
+            torch.cuda.current_stream(),
+            (num_tiles_m,),
+            _ct_proj_rms_fwd_kernel,
+            (x, weight, proj, norm, r, M, N, K, eps, TILE_M, TILE_N, TILE_K),
+        )
+        return proj, norm, r
+
+    def _cutile_proj_rms_bwd(
+        grad_proj: Tensor,
+        grad_r: Tensor,
+        x: Tensor,
+        weight: Tensor,
+        norm: Tensor,
+        eps: float = 1e-8,
+    ) -> Tuple[Tensor, Tensor]:
+        M, K = x.shape
+        N = weight.shape[0]
+        da = torch.empty_like(x)
+        db = torch.empty_like(weight)
+        TILE_SIZE_N = _next_power_of_2(N)
+        assert TILE_SIZE_N <= 256, f"TILE_SIZE_N too large: {TILE_SIZE_N}"
+        num_sms = torch.cuda.get_device_properties("cuda").multi_processor_count
+        if K >= 8192:
+            TILE_SIZE_M, TILE_SIZE_K = 128, 128
+            grid = (math.ceil(K / TILE_SIZE_K), 1)
+            ct.launch(
+                torch.cuda.current_stream(),
+                grid,
+                _ct_proj_rms_bwd_kernel,
+                (
+                    x,
+                    weight,
+                    norm,
+                    grad_proj,
+                    grad_r,
+                    da,
+                    db,
+                    M,
+                    N,
+                    K,
+                    TILE_SIZE_M,
+                    TILE_SIZE_N,
+                    TILE_SIZE_K,
+                ),
+            )
+        else:
+            grid = (num_sms, 2, 1)
+            ct.launch(
+                torch.cuda.current_stream(),
+                grid,
+                _ct_proj_rms_bwd_small_k_kernel,
+                (x, weight, norm, grad_proj, grad_r, da, db, M, N, K, TILE_SIZE_N),
+            )
+        return da, db
+
+
+# ============================================================================
+# Autograd Functions (cuTile only – guarded by _CUTILE_AVAILABLE)
+# ============================================================================
+
+if not _CUTILE_AVAILABLE:
+
+    def _no_cutile_error(*_args, **_kwargs):
+        raise RuntimeError(
+            "Fused mHC kernels require cuda.tile (cuTile) which is not installed. "
+            "Either install cuTile or set use_fused_mhc=False to use reference "
+            "implementations."
+        )
+
+    fused_sinkhorn = _no_cutile_error
+    fused_h_aggregate = _no_cutile_error
+    fused_h_post_bda = _no_cutile_error
+    fused_proj_rms = _no_cutile_error
+
+else:
+
+    class FusedSinkhornKnopp(torch.autograd.Function):
+        """Fused Sinkhorn-Knopp projection to doubly stochastic matrix (cuTile)."""
+
+        @staticmethod
+        def forward(ctx, input_logits: Tensor, num_iterations: int, eps: float = 1e-6):
+            """cuTile fused Sinkhorn forward."""
+            output, M_init = _cutile_sinkhorn_fwd(input_logits, num_iterations, eps)
+            ctx.save_for_backward(M_init)
+            ctx.num_iterations = num_iterations
+            ctx.eps = eps
+            return output
+
+        @staticmethod
+        def backward(ctx, grad_output):
+            """cuTile fused Sinkhorn backward."""
+            (M_init,) = ctx.saved_tensors
+            grad_input = _cutile_sinkhorn_bwd(grad_output, M_init, ctx.num_iterations, ctx.eps)
+            return grad_input, None, None
+
+    class FusedHAggregate(torch.autograd.Function):
+        """Fused n-stream weighted aggregation (cuTile)."""
+
+        @staticmethod
+        def forward(ctx, x: Tensor, h_pre: Tensor):
+            """cuTile fused h_aggregate forward."""
+            output = _cutile_h_aggregate_fwd(x, h_pre)
+            ctx.save_for_backward(x, h_pre)
+            return output
+
+        @staticmethod
+        def backward(ctx, grad_output):
+            """cuTile fused h_aggregate backward."""
+            x, h_pre = ctx.saved_tensors
+            return _cutile_h_aggregate_bwd(grad_output, x, h_pre)
+
+    class FusedHPostBDA(torch.autograd.Function):
+        """Fused: output = H_res @ orig_res + H_post * (x [+ bias]) (cuTile)."""
+
+        @staticmethod
+        def forward(
+            ctx,
+            h_res: Tensor,
+            original_residual: Tensor,
+            h_post: Tensor,
+            x: Tensor,
+            bias: Optional[Tensor],
+        ):
+            """cuTile fused h_post_bda forward."""
+            output = _cutile_h_post_bda_fwd(h_res, original_residual, h_post, x, bias)
+            if bias is not None:
+                ctx.save_for_backward(h_res, original_residual, h_post, x, bias)
+                ctx.has_bias = True
+            else:
+                ctx.save_for_backward(h_res, original_residual, h_post, x)
+                ctx.has_bias = False
+            return output
+
+        @staticmethod
+        def backward(ctx, grad_output):
+            """cuTile fused h_post_bda backward."""
+            if ctx.has_bias:
+                h_res, orig_res, h_post, x, bias = ctx.saved_tensors
+            else:
+                h_res, orig_res, h_post, x = ctx.saved_tensors
+                bias = None
+            return _cutile_h_post_bda_bwd(grad_output, h_res, orig_res, h_post, x, bias)
+
+    class FusedProjRms(torch.autograd.Function):
+        """Fused projection + RMS normalization (cuTile)."""
+
+        @staticmethod
+        def forward(ctx, x: Tensor, weight: Tensor, eps: float = 1e-6):
+            """cuTile fused proj_rms forward."""
+            proj, norm, r = _cutile_proj_rms_fwd(x, weight, eps)
+            ctx.save_for_backward(x, weight, norm)
+            ctx.eps = eps
+            return proj, r
+
+        @staticmethod
+        def backward(ctx, grad_proj, grad_r):
+            """cuTile fused proj_rms backward."""
+            x, weight, norm = ctx.saved_tensors
+            grad_x, grad_weight = _cutile_proj_rms_bwd(grad_proj, grad_r, x, weight, norm, ctx.eps)
+            return grad_x, grad_weight, None
+
+    # ========================================================================
+    # Public API (only available when cuTile is installed)
+    # ========================================================================
+
+    def fused_sinkhorn(input_logits: Tensor, num_iterations: int, eps: float = 1e-6) -> Tensor:
+        """Project logits to doubly stochastic matrix via Sinkhorn-Knopp.
+
+        Args:
+            input_logits: [..., n, n] raw logits
+            num_iterations: Sinkhorn iterations
+            eps: numerical stability
+
+        Returns:
+            [..., n, n] doubly stochastic matrix
+        """
+        return FusedSinkhornKnopp.apply(input_logits, num_iterations, eps)
+
+    def fused_h_aggregate(x: Tensor, h_pre: Tensor) -> Tensor:
+        """Weighted n-stream to 1-stream aggregation.
+
+        Args:
+            x: [s, b, n, C] n-stream hidden states
+            h_pre: [s, b, n] aggregation weights
+
+        Returns:
+            [s, b, C] aggregated hidden states
+        """
+        return FusedHAggregate.apply(x, h_pre)
+
+    def fused_h_post_bda(
+        h_res: Tensor, original_residual: Tensor, h_post: Tensor, x: Tensor, bias: Optional[Tensor]
+    ) -> Tensor:
+        """Fused H_res @ residual + H_post * (x + bias).
+
+        Args:
+            h_res: [s, b, n, n] residual mixing matrix
+            original_residual: [s, b, n, C] n-stream residual
+            h_post: [s, b, n] expansion weights
+            x: [s, b, C] layer output
+            bias: [C] or None
+
+        Returns:
+            [s, b, n, C] fused output
+        """
+        return FusedHPostBDA.apply(h_res, original_residual, h_post, x, bias)
+
+    def fused_proj_rms(x: Tensor, weight: Tensor, eps: float = 1e-6) -> Tuple[Tensor, Tensor]:
+        """Fused projection + RMS normalization.
+
+        Args:
+            x: [M, K] input
+            weight: [N, K] projection weight
+            eps: stability epsilon
+
+        Returns:
+            proj: [M, N] = x @ weight^T
+            r: [M, 1] = 1 / (||x|| / sqrt(K) + eps)
+        """
+        return FusedProjRms.apply(x, weight, eps)
diff --git a/megatron/core/transformer/hyper_connection.py b/megatron/core/transformer/hyper_connection.py
index 5ccbd70c340..64ec3107213 100644
--- a/megatron/core/transformer/hyper_connection.py
+++ b/megatron/core/transformer/hyper_connection.py
@@ -15,95 +15,85 @@
     from megatron.core.tensor_parallel.random import CheckpointManager
 
 
-class SinkhornKnopp(torch.autograd.Function):
-    """
-    Differentiable Sinkhorn-Knopp algorithm for doubly stochastic projection.
+@torch.compile
+def _sinkhorn_iterations(input_logits: Tensor, num_iterations: int, eps: float) -> Tensor:
+    row_max = input_logits.max(dim=-1, keepdim=True).values
+    M = torch.exp(input_logits - row_max)
+    for _ in range(num_iterations):
+        M = M / M.sum(dim=-1, keepdim=True).clamp(min=eps)
+        M = M / M.sum(dim=-2, keepdim=True).clamp(min=eps)
+    return M
 
-    Projects a positive matrix onto the Birkhoff polytope (doubly stochastic matrices)
-    via iterative row and column normalization.
 
-    Reference: Eq. (9) in mHC paper - M^{(t)} = T_c(T_r(M^{(t-1)}))
-    """
+class SinkhornKnopp(torch.autograd.Function):
+    """Sinkhorn-Knopp projection to doubly stochastic matrix.
 
-    eps = 1e-6
+    This is an autograd.Function because the iterative forward is re-executed
+    during backward (under torch.enable_grad) so that PyTorch's autograd can
+    differentiate through it without storing all intermediate iteration states.
+    """
 
     @staticmethod
-    def _sinkhorn_normalize(M: Tensor, num_iterations: int) -> Tensor:
-        """
-        Apply Sinkhorn-Knopp normalization iterations.
-
-        Iteratively applies row and column normalization to project M
-        onto the Birkhoff polytope (doubly stochastic matrices).
-
-        Args:
-            M: [s, b, n, n] - positive matrix to normalize
-            num_iterations: Number of Sinkhorn iterations
-
-        Returns:
-            M: [s, b, n, n] - doubly stochastic matrix
-        """
-        for _ in range(num_iterations):
-            # T_r: Row normalization
-            M = M / M.sum(dim=-1, keepdim=True).clamp(min=SinkhornKnopp.eps)
-            # T_c: Column normalization
-            M = M / M.sum(dim=-2, keepdim=True).clamp(min=SinkhornKnopp.eps)
+    def forward(ctx, input_logits: Tensor, num_iterations: int, eps: float = 1e-6) -> Tensor:
+        """Run Sinkhorn iterations and save inputs for backward recomputation."""
+        M = _sinkhorn_iterations(input_logits, num_iterations, eps)
+        ctx.save_for_backward(input_logits)
+        ctx.num_iterations = num_iterations
+        ctx.eps = eps
         return M
 
     @staticmethod
-    def forward(ctx, H_res_logits: Tensor, num_iterations: int) -> Tensor:
-        """
-        Project to doubly stochastic matrix via iterative row/col normalization.
+    def backward(ctx, grad_output: Tensor):
+        """Recompute forward under enable_grad and back-propagate."""
+        (input_logits,) = ctx.saved_tensors
+        with torch.enable_grad():
+            logits = input_logits.detach().requires_grad_(True)
+            M = _sinkhorn_iterations(logits, ctx.num_iterations, ctx.eps)
+            M.backward(grad_output)
+        return logits.grad, None, None
 
-        Args:
-            H_res_logits: [s, b, n, n] - raw logits for residual mixing matrix
-            num_iterations: Number of Sinkhorn iterations (paper uses 20)
 
-        Returns:
-            H_res: [s, b, n, n] - doubly stochastic matrix
-        """
-        # Gradients are computed explicitly in backward via recomputation.
-        # Stabilized exp: subtract row-wise max to prevent overflow (log-sum-exp trick)
-        # M^{(0)} = exp(H_res_logits - max(H_res_logits)) - numerically equivalent
-        # after Sinkhorn normalization since row normalization absorbs the scaling.
-        M_init = torch.exp(H_res_logits - H_res_logits.max(dim=-1, keepdim=True).values)
+def native_sinkhorn(input_logits: Tensor, num_iterations: int, eps: float = 1e-6) -> Tensor:
+    """Native Sinkhorn-Knopp (autograd.Function wrapper)."""
+    return SinkhornKnopp.apply(input_logits, num_iterations, eps)
 
-        M = SinkhornKnopp._sinkhorn_normalize(M_init, num_iterations)
 
-        # Save initial M for backward recomputation
-        ctx.save_for_backward(M_init)
-        ctx.num_iterations = num_iterations
-        return M
+@torch.compile
+def native_h_aggregate(x: Tensor, h_pre: Tensor) -> Tensor:
+    """Native n-stream weighted aggregation: out = sum_j(h_pre_j * x_j)."""
+    return (x * h_pre.unsqueeze(-1)).sum(dim=2)
 
-    @staticmethod
-    def backward(ctx, grad_output: Tensor) -> Tuple[Tensor, None]:
-        """
-        Backward through Sinkhorn-Knopp iterations using recomputation.
 
-        Recomputes the forward pass with gradient tracking to obtain accurate gradients.
-        """
-        (M_init,) = ctx.saved_tensors
-        num_iterations = ctx.num_iterations
+@torch.compile
+def native_h_post_bda(
+    h_res: Tensor, original_residual: Tensor, h_post: Tensor, x: Tensor, bias: Optional[Tensor]
+) -> Tensor:
+    """Native H_res @ residual + H_post * (x [+ bias])."""
+    s, b, n, C = original_residual.shape
+    h_res_batched = h_res.view(s * b, n, n)
+    residual_batched = original_residual.view(s * b, n, C)
+    mixed = torch.bmm(h_res_batched, residual_batched).view(s, b, n, C)
+    x_expanded = h_post.unsqueeze(-1) * x.unsqueeze(2)
+    if bias is not None:
+        bias_expanded = h_post.unsqueeze(-1) * bias.view(1, 1, 1, C)
+        return x_expanded + bias_expanded + mixed
+    return x_expanded + mixed
 
-        # Recompute forward with autograd enabled
-        with torch.enable_grad():
-            # Leaf for recomputation
-            M_input = M_init.detach().requires_grad_(True)
-
-            M_current = SinkhornKnopp._sinkhorn_normalize(M_input, num_iterations)
-
-            # Compute dL/dM_input (i.e., dL/dM_init) via autograd
-            (grad_M_init,) = torch.autograd.grad(
-                outputs=M_current,
-                inputs=M_input,
-                grad_outputs=grad_output,
-                create_graph=False,
-                retain_graph=False,
-            )
-        # Apply chain rule: dL/dH = dL/dM_init * dM_init/dH = dL/dM_init * M_init
-        # Since M_init = exp(H_res_logits), we have d(exp(x))/dx = exp(x) = M_init
-        grad_input = grad_M_init * M_init
 
-        return grad_input, None
+@torch.compile
+def native_proj_rms(x: Tensor, weight: Tensor, eps: float = 1e-6) -> Tuple[Tensor, Tensor]:
+    """Native fused projection + RMS normalization."""
+    proj = torch.matmul(x, weight.t())
+    norm = x.norm(dim=-1, keepdim=True)
+    K = x.shape[-1]
+    v = norm / math.sqrt(K) + eps
+    r = 1.0 / v
+    return proj, r
+
+
+# ============================================================================
+# HyperConnectionModule
+# ============================================================================
 
 
 # TODO: keep hyper connection in fp32 computation
@@ -153,6 +143,27 @@ def __init__(self, config: TransformerConfig, layer_number: int):
         self.bias = nn.Parameter(torch.zeros(self.n * self.n + 2 * self.n))
         self.norm_eps = 1e-6
 
+        # Choose implementation: fused cuTile kernels vs reference modules.
+        # Both paths expose the same call signatures so the rest of the code
+        # is implementation-agnostic.
+        if config.use_fused_mhc:
+            from megatron.core.fusions.fused_mhc_kernels import (
+                fused_h_aggregate,
+                fused_h_post_bda,
+                fused_proj_rms,
+                fused_sinkhorn,
+            )
+
+            self._sinkhorn_op = fused_sinkhorn
+            self._h_aggregate_op = fused_h_aggregate
+            self._h_post_bda_op = fused_h_post_bda
+            self._proj_rms_op = fused_proj_rms
+        else:
+            self._sinkhorn_op = native_sinkhorn
+            self._h_aggregate_op = native_h_aggregate
+            self._h_post_bda_op = native_h_post_bda
+            self._proj_rms_op = native_proj_rms
+
         self._init_weights()
 
     def _init_weights(self) -> None:
@@ -170,19 +181,17 @@ def _init_weights(self) -> None:
             setattr(self.alpha_res, 'sequence_parallel', True)
             setattr(self.bias, 'sequence_parallel', True)
 
-    @torch.compile
     def _projection_and_get_norm(self, x: Tensor) -> Tuple[Tensor, Tensor]:
         """
-        Project input hidden states to mapping space and apply RMS normalization.
+        Projection + RMS normalization.
 
         Args:
             x: [s, b, n*C] - n-stream hidden states
         """
-        nC = x.shape[-1]
-        r = x.norm(dim=-1, keepdim=True) / math.sqrt(nC)  # shape: [s, b, 1]
-        r = 1.0 / (r + self.norm_eps)  # shape: [s, b, 1]
-        proj = self.mapping_proj(x)  # [s, b, n^2 + 2n]
-        return proj, r
+        s, b, nC = x.shape
+        x_2d = x.reshape(s * b, nC)
+        proj, r = self._proj_rms_op(x_2d, self.mapping_proj.weight, self.norm_eps)
+        return proj.view(s, b, -1), r.view(s, b, 1)
 
     @torch.compile
     def _compute_h(self, proj: Tensor, r: Tensor) -> Tuple[Tensor, Tensor, Tensor]:
@@ -235,8 +244,8 @@ def compute_mappings(self, x: Tensor) -> Tuple[Tensor, Tensor, Tensor]:
             proj, r = self._projection_and_get_norm(x)
         with torch.cuda.nvtx.range("HyperConnection::compute_h"):
             h_pre, h_post, h_res = self._compute_h(proj, r)
-        h_res = SinkhornKnopp.apply(
-            h_res.view(s, b, self.n, self.n), self.sinkhorn_iterations
+        h_res = self._sinkhorn_op(
+            h_res.view(s, b, self.n, self.n), self.sinkhorn_iterations, self.norm_eps
         )  # [s, b, n, n]
 
         return h_pre, h_post, h_res
@@ -324,12 +333,9 @@ def apply_h_post(
 
         return x_out, bias_out
 
-    @torch.compile
     def aggregate(self, x: Tensor, h_pre: Tensor) -> Tensor:
         """
-        Aggregate n-stream to 1-stream using H_pre weights.
-
-        Computes: sum_i(h_pre_i * x_stream_i)
+        Aggregate n-stream to 1-stream.
 
         Args:
             x: [s, b, n*C] - n-stream hidden states
@@ -340,14 +346,8 @@ def aggregate(self, x: Tensor, h_pre: Tensor) -> Tensor:
         """
         s, b, _ = x.shape
         C = self.hidden_size
-
-        # Reshape to [s, b, n, C]
         x_streams = x.view(s, b, self.n, C)
-
-        # Weighted sum: [s, b, n, C] * [s, b, n, 1] -> sum over n -> [s, b, C]
-        aggregated = (x_streams * h_pre.unsqueeze(-1)).sum(dim=2)
-
-        return aggregated
+        return self._h_aggregate_op(x_streams, h_pre)
 
     @torch.compile
     def apply_h_res(self, h_res: Tensor, residual: Tensor) -> Tensor:
@@ -563,7 +563,11 @@ def _fused_h_res_h_post_bda_native(
         fused: bool,
     ) -> Tensor:
         """
-        Native implementation of fused h_res, h_post and bda operations.
+        h_res, h_post and bda.
+
+        When dropout is zero (or inference), uses a single fused/reference kernel
+        for H_res @ residual + H_post * (x + bias). Falls back to unfused
+        implementation when dropout is needed.
 
         Args:
             h_res: [s, b, n, n] - residual mixing matrix
@@ -577,23 +581,26 @@ def _fused_h_res_h_post_bda_native(
         Returns:
             output: [s, b, n*C] - final output
         """
+        x, bias = layer_output_with_bias
+
+        if dropout_prob == 0.0 or not training:
+            s, b, _ = original_residual.shape
+            n = self.n
+            C = self.hidden_size
+            orig_reshaped = original_residual.view(s, b, n, C)
+            output = self._h_post_bda_op(h_res, orig_reshaped, h_post, x, bias)
+            return output.view(s, b, n * C)
+
         from megatron.core.fusions.fused_bias_dropout import get_bias_dropout_add
 
-        # Step 1: Apply H_res to original residual
         with torch.cuda.nvtx.range("HyperConnection::apply_h_res"):
             mixed = self.apply_h_res(h_res, original_residual)
-
-        # Step 2: Apply H_post to layer output
-        x, bias = layer_output_with_bias
         with torch.cuda.nvtx.range("HyperConnection::apply_h_post"):
             x_expanded = self._apply_h_post(x, h_post)
             bias_expanded = self._apply_h_post(bias, h_post) if bias is not None else None
-
-        # Step 3: Bias-dropout-add
         bda_func = get_bias_dropout_add(training, fused)
         with torch.cuda.nvtx.range("HyperConnection::bda"):
             output = bda_func((x_expanded, bias_expanded), mixed, dropout_prob)
-
         return output
 
     @nvtx_decorator(message="HyperConnection::fused_h_res_h_post_bda_with_checkpoint")
@@ -609,9 +616,12 @@ def _fused_h_res_h_post_bda_with_checkpoint(
         manager: 'CheckpointManager',
     ) -> Tensor:
         """
-        Checkpointed implementation of fused h_res, h_post and bda operations.
+        Checkpointed variant of _fused_h_res_h_post_bda_native.
 
-        Uses a single checkpoint wrapper around all operations for memory efficiency.
+        Wraps compute in CheckpointWithoutOutput for activation memory savings.
+        Cannot reuse _native directly because checkpoint requires all args to be
+        positional Tensors; tuple/Optional/scalar args are unpacked or captured
+        via closure instead.
 
         Args:
             h_res: [s, b, n, n] - residual mixing matrix
@@ -626,43 +636,53 @@ def _fused_h_res_h_post_bda_with_checkpoint(
         Returns:
             output: [s, b, n*C] - final output
         """
-        from megatron.core.fusions.fused_bias_dropout import get_bias_dropout_add
         from megatron.core.tensor_parallel.random import CheckpointWithoutOutput
 
-        # Get BDA function (captured via closure)
-        bda_func = get_bias_dropout_add(training, fused)
-
-        # Unpack layer_output_with_bias to avoid tuple tensors in checkpoint args
         x, bias = layer_output_with_bias
-        has_bias = bias is not None
-
-        # Native wrapper that combines all operations without internal checkpointing.
-        # Non-tensor args (dropout_prob, has_bias) are captured via closure.
-        def _native_wrapper(h_res, original_residual, h_post, x, *optional_bias):
-            # Step 1: Apply H_res to original residual
-            with torch.cuda.nvtx.range("HyperConnection::apply_h_res"):
-                mixed = self.apply_h_res(h_res, original_residual)
-
-            # Step 2: Apply H_post to x and bias
-            with torch.cuda.nvtx.range("HyperConnection::apply_h_post"):
-                x_expanded = self._apply_h_post(x, h_post)
-                if has_bias:
-                    bias_expanded = self._apply_h_post(optional_bias[0], h_post)
-                else:
-                    bias_expanded = None
-
-            # Step 3: Bias-dropout-add
-            with torch.cuda.nvtx.range("HyperConnection::bda"):
-                output = bda_func((x_expanded, bias_expanded), mixed, dropout_prob)
-
-            return output
-
-        # Use a single checkpoint wrapper for all operations
-        ckpt = CheckpointWithoutOutput(ckpt_manager=manager)
-        if has_bias:
-            output = ckpt.checkpoint(_native_wrapper, h_res, original_residual, h_post, x, bias)
+        n = self.n
+        C = self.hidden_size
+
+        # Fast path: no dropout — use fused/reference h_post_bda kernel (same as _native)
+        if dropout_prob == 0.0 or not training:
+
+            def _fused_wrapper(h_res, original_residual, h_post, x, *optional_bias):
+                s, b, _ = original_residual.shape
+                orig_reshaped = original_residual.view(s, b, n, C)
+                b_arg = optional_bias[0] if optional_bias else None
+                return self._h_post_bda_op(h_res, orig_reshaped, h_post, x, b_arg).view(s, b, n * C)
+
+            ckpt = CheckpointWithoutOutput(ckpt_manager=manager)
+            if bias is not None:
+                output = ckpt.checkpoint(_fused_wrapper, h_res, original_residual, h_post, x, bias)
+            else:
+                output = ckpt.checkpoint(_fused_wrapper, h_res, original_residual, h_post, x)
+
+        # Slow path: dropout required — fused kernel does not support dropout,
+        # fall back to sequential apply_h_res + apply_h_post + bda
         else:
-            output = ckpt.checkpoint(_native_wrapper, h_res, original_residual, h_post, x)
+            from megatron.core.fusions.fused_bias_dropout import get_bias_dropout_add
+
+            bda_func = get_bias_dropout_add(training, fused)
+            has_bias = bias is not None
+
+            def _native_wrapper(h_res, original_residual, h_post, x, *optional_bias):
+                with torch.cuda.nvtx.range("HyperConnection::apply_h_res"):
+                    mixed = self.apply_h_res(h_res, original_residual)
+                with torch.cuda.nvtx.range("HyperConnection::apply_h_post"):
+                    x_expanded = self._apply_h_post(x, h_post)
+                    if has_bias:
+                        bias_expanded = self._apply_h_post(optional_bias[0], h_post)
+                    else:
+                        bias_expanded = None
+                with torch.cuda.nvtx.range("HyperConnection::bda"):
+                    output = bda_func((x_expanded, bias_expanded), mixed, dropout_prob)
+                return output
+
+            ckpt = CheckpointWithoutOutput(ckpt_manager=manager)
+            if has_bias:
+                output = ckpt.checkpoint(_native_wrapper, h_res, original_residual, h_post, x, bias)
+            else:
+                output = ckpt.checkpoint(_native_wrapper, h_res, original_residual, h_post, x)
 
         return output
 
diff --git a/megatron/core/transformer/transformer_config.py b/megatron/core/transformer/transformer_config.py
index 7ec5636ab87..1874d93e50d 100644
--- a/megatron/core/transformer/transformer_config.py
+++ b/megatron/core/transformer/transformer_config.py
@@ -853,6 +853,16 @@ class TransformerConfig(ModelParallelConfig):
     mhc_init_gating_factor: float = 0.01
     """Initial value of Gating Factor (alpha in paper)."""
 
+    use_fused_mhc: bool = False
+    """Use cuTile fused kernels for mHC operations.
+
+    When True, attempts to replace the reference mHC modules (SinkhornKnopp,
+    H_aggregate, H_post_bda, ProjRms) with fused cuda.tile (cuTile) autograd
+    functions for better performance on supported GPUs.  Requires cuTile to be
+    installed; if cuTile is unavailable the flag is silently reset to False and
+    a warning is emitted.
+    """
+
     mhc_recompute_layer_num: Optional[int] = None
     """Number of layers per MHC recompute block.
     
@@ -1411,6 +1421,28 @@ def __post_init__(self):
                 "recompute_modules with selective recompute to reduce activation memory."
             )
 
+        # Validation for use_fused_mhc
+        if self.use_fused_mhc:
+            if not self.enable_hyper_connections:
+                raise ValueError("use_fused_mhc requires enable_hyper_connections=True.")
+            try:
+                from megatron.core.fusions.fused_mhc_kernels import is_cutile_available
+
+                if not is_cutile_available():
+                    warnings.warn(
+                        "use_fused_mhc is enabled but cuda.tile (cuTile) is not installed. "
+                        "Falling back to reference mHC implementations.",
+                        UserWarning,
+                    )
+                    self.use_fused_mhc = False
+            except ImportError:
+                warnings.warn(
+                    "use_fused_mhc is enabled but fused_mhc_kernels module could not be "
+                    "imported. Falling back to reference mHC implementations.",
+                    UserWarning,
+                )
+                self.use_fused_mhc = False
+
         # Validation for hyper_connections with MTP
         if self.enable_hyper_connections and self.mtp_num_layers is not None:
             raise ValueError(
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mhc/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mhc/golden_values_dev_dgx_h100.json
index dc905f25c06..fd52044e2b5 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mhc/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mhc/golden_values_dev_dgx_h100.json
@@ -4,56 +4,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 10.86032,
-            "2": 10.85379,
-            "3": 10.86576,
-            "4": 10.84522,
-            "5": 10.88381,
-            "6": 10.89591,
-            "7": 10.87181,
-            "8": 10.86499,
-            "9": 10.86909,
-            "10": 10.83611,
-            "11": 10.89392,
-            "12": 10.87885,
-            "13": 10.87633,
-            "14": 10.9031,
-            "15": 10.83062,
-            "16": 10.83399,
-            "17": 10.80009,
-            "18": 10.82035,
-            "19": 10.81427,
-            "20": 10.71811,
-            "21": 10.68666,
-            "22": 10.5322,
-            "23": 10.70546,
-            "24": 10.58584,
-            "25": 10.51963,
-            "26": 10.58548,
-            "27": 10.60203,
-            "28": 10.53634,
-            "29": 10.57208,
-            "30": 10.33312,
-            "31": 10.05931,
-            "32": 10.42892,
-            "33": 10.42115,
-            "34": 10.17094,
-            "35": 10.23176,
-            "36": 10.1883,
-            "37": 10.31328,
-            "38": 10.14298,
-            "39": 10.38218,
-            "40": 10.04918,
-            "41": 10.10427,
-            "42": 10.17245,
-            "43": 9.78375,
-            "44": 9.91054,
-            "45": 9.78577,
-            "46": 9.7695,
-            "47": 10.10153,
-            "48": 9.81025,
-            "49": 9.48829,
-            "50": 9.8677
+            "1": 10.86149,
+            "2": 10.85467,
+            "3": 10.86695,
+            "4": 10.84625,
+            "5": 10.8847,
+            "6": 10.89676,
+            "7": 10.87272,
+            "8": 10.86586,
+            "9": 10.86993,
+            "10": 10.83755,
+            "11": 10.89458,
+            "12": 10.87956,
+            "13": 10.8768,
+            "14": 10.90362,
+            "15": 10.8311,
+            "16": 10.8345,
+            "17": 10.80061,
+            "18": 10.82066,
+            "19": 10.81459,
+            "20": 10.71809,
+            "21": 10.68631,
+            "22": 10.532,
+            "23": 10.7048,
+            "24": 10.58548,
+            "25": 10.51896,
+            "26": 10.58491,
+            "27": 10.60108,
+            "28": 10.53537,
+            "29": 10.57113,
+            "30": 10.33244,
+            "31": 10.0583,
+            "32": 10.42784,
+            "33": 10.4202,
+            "34": 10.16985,
+            "35": 10.23069,
+            "36": 10.18752,
+            "37": 10.31251,
+            "38": 10.14213,
+            "39": 10.38135,
+            "40": 10.04843,
+            "41": 10.10329,
+            "42": 10.17154,
+            "43": 9.78292,
+            "44": 9.90959,
+            "45": 9.78499,
+            "46": 9.76878,
+            "47": 10.10082,
+            "48": 9.80965,
+            "49": 9.48778,
+            "50": 9.86704
         }
     },
     "num-zeros": {
@@ -61,56 +61,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 1667.0,
-            "2": 1718.0,
-            "3": 1638.0,
-            "4": 1881.0,
-            "5": 1927.0,
-            "6": 1792.0,
-            "7": 1783.0,
-            "8": 1580.0,
-            "9": 1935.0,
-            "10": 1407.0,
-            "11": 1831.0,
-            "12": 1662.0,
-            "13": 1870.0,
-            "14": 1777.0,
-            "15": 1930.0,
-            "16": 1794.0,
-            "17": 1932.0,
-            "18": 1631.0,
-            "19": 1806.0,
-            "20": 1566.0,
-            "21": 1853.0,
-            "22": 1622.0,
-            "23": 2077.0,
-            "24": 1592.0,
-            "25": 1628.0,
-            "26": 1677.0,
-            "27": 1791.0,
-            "28": 1979.0,
-            "29": 2020.0,
-            "30": 1914.0,
-            "31": 1597.0,
-            "32": 1886.0,
-            "33": 2287.0,
-            "34": 1836.0,
-            "35": 1981.0,
-            "36": 1882.0,
-            "37": 2505.0,
-            "38": 2114.0,
-            "39": 2438.0,
-            "40": 2204.0,
-            "41": 2287.0,
-            "42": 2344.0,
-            "43": 2069.0,
-            "44": 2148.0,
-            "45": 2190.0,
-            "46": 2312.0,
-            "47": 2545.0,
-            "48": 2494.0,
-            "49": 2296.0,
-            "50": 2395.0
+            "1": 1649.0,
+            "2": 34620.0,
+            "3": 34517.0,
+            "4": 1822.0,
+            "5": 34641.0,
+            "6": 1849.0,
+            "7": 1816.0,
+            "8": 1587.0,
+            "9": 34596.0,
+            "10": 34175.0,
+            "11": 34644.0,
+            "12": 34371.0,
+            "13": 1821.0,
+            "14": 1785.0,
+            "15": 1928.0,
+            "16": 1825.0,
+            "17": 1820.0,
+            "18": 34490.0,
+            "19": 1711.0,
+            "20": 1628.0,
+            "21": 1805.0,
+            "22": 1637.0,
+            "23": 34927.0,
+            "24": 1586.0,
+            "25": 1580.0,
+            "26": 34510.0,
+            "27": 34510.0,
+            "28": 2017.0,
+            "29": 1992.0,
+            "30": 1955.0,
+            "31": 34406.0,
+            "32": 34643.0,
+            "33": 34950.0,
+            "34": 1992.0,
+            "35": 34671.0,
+            "36": 34721.0,
+            "37": 2360.0,
+            "38": 34999.0,
+            "39": 35102.0,
+            "40": 2173.0,
+            "41": 35092.0,
+            "42": 2405.0,
+            "43": 34752.0,
+            "44": 34911.0,
+            "45": 34908.0,
+            "46": 35080.0,
+            "47": 35225.0,
+            "48": 35262.0,
+            "49": 35174.0,
+            "50": 35281.0
         }
     },
     "mem-allocated-bytes": {
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 537920000.0,
-            "2": 537920000.0,
-            "3": 537920000.0,
-            "4": 537920000.0,
-            "5": 537920000.0,
-            "6": 537920000.0,
-            "7": 537920000.0,
-            "8": 537920000.0,
-            "9": 537920000.0,
-            "10": 537920000.0,
-            "11": 537920000.0,
-            "12": 537920000.0,
-            "13": 537920000.0,
-            "14": 537920000.0,
-            "15": 537920000.0,
-            "16": 537920000.0,
-            "17": 537920000.0,
-            "18": 537920000.0,
-            "19": 537920000.0,
-            "20": 537920000.0,
-            "21": 537920000.0,
-            "22": 537920000.0,
-            "23": 537920000.0,
-            "24": 537920000.0,
-            "25": 537920000.0,
-            "26": 537920000.0,
-            "27": 537920000.0,
-            "28": 537920000.0,
-            "29": 537920000.0,
-            "30": 537920000.0,
-            "31": 537920000.0,
-            "32": 537920000.0,
-            "33": 537920000.0,
-            "34": 537920000.0,
-            "35": 537920000.0,
-            "36": 537920000.0,
-            "37": 537920000.0,
-            "38": 537920000.0,
-            "39": 537920000.0,
-            "40": 537920000.0,
-            "41": 537920000.0,
-            "42": 537920000.0,
-            "43": 537920000.0,
-            "44": 537920000.0,
-            "45": 537920000.0,
-            "46": 537920000.0,
-            "47": 537920000.0,
-            "48": 537920000.0,
-            "49": 537920000.0,
-            "50": 537920000.0
+            "1": 539492864.0,
+            "2": 539492864.0,
+            "3": 539492864.0,
+            "4": 539492864.0,
+            "5": 539492864.0,
+            "6": 539492864.0,
+            "7": 539492864.0,
+            "8": 539492864.0,
+            "9": 539492864.0,
+            "10": 539492864.0,
+            "11": 539492864.0,
+            "12": 539492864.0,
+            "13": 539492864.0,
+            "14": 539492864.0,
+            "15": 539492864.0,
+            "16": 539492864.0,
+            "17": 539492864.0,
+            "18": 539492864.0,
+            "19": 539492864.0,
+            "20": 539492864.0,
+            "21": 539492864.0,
+            "22": 539492864.0,
+            "23": 539492864.0,
+            "24": 539492864.0,
+            "25": 539492864.0,
+            "26": 539492864.0,
+            "27": 539492864.0,
+            "28": 539492864.0,
+            "29": 539492864.0,
+            "30": 539492864.0,
+            "31": 539492864.0,
+            "32": 539492864.0,
+            "33": 539492864.0,
+            "34": 539492864.0,
+            "35": 539492864.0,
+            "36": 539492864.0,
+            "37": 539492864.0,
+            "38": 539492864.0,
+            "39": 539492864.0,
+            "40": 539492864.0,
+            "41": 539492864.0,
+            "42": 539492864.0,
+            "43": 539492864.0,
+            "44": 539492864.0,
+            "45": 539492864.0,
+            "46": 539492864.0,
+            "47": 539492864.0,
+            "48": 539492864.0,
+            "49": 539492864.0,
+            "50": 539492864.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 1225688576.0,
-            "2": 1408955904.0,
-            "3": 1408955904.0,
-            "4": 1408955904.0,
-            "5": 1408955904.0,
-            "6": 1408955904.0,
-            "7": 1408955904.0,
-            "8": 1408955904.0,
-            "9": 1408955904.0,
-            "10": 1408955904.0,
-            "11": 1408955904.0,
-            "12": 1408955904.0,
-            "13": 1408955904.0,
-            "14": 1408955904.0,
-            "15": 1408955904.0,
-            "16": 1408955904.0,
-            "17": 1408955904.0,
-            "18": 1408955904.0,
-            "19": 1408955904.0,
-            "20": 1408955904.0,
-            "21": 1408955904.0,
-            "22": 1408955904.0,
-            "23": 1408955904.0,
-            "24": 1408955904.0,
-            "25": 1408955904.0,
-            "26": 1408955904.0,
-            "27": 1408955904.0,
-            "28": 1408955904.0,
-            "29": 1408955904.0,
-            "30": 1408955904.0,
-            "31": 1408955904.0,
-            "32": 1408955904.0,
-            "33": 1408955904.0,
-            "34": 1408955904.0,
-            "35": 1408955904.0,
-            "36": 1408955904.0,
-            "37": 1408955904.0,
-            "38": 1408955904.0,
-            "39": 1408955904.0,
-            "40": 1408955904.0,
-            "41": 1408955904.0,
-            "42": 1408955904.0,
-            "43": 1408955904.0,
-            "44": 1408955904.0,
-            "45": 1408955904.0,
-            "46": 1408955904.0,
-            "47": 1408955904.0,
-            "48": 1408955904.0,
-            "49": 1408955904.0,
-            "50": 1408955904.0
+            "1": 1729398272.0,
+            "2": 1914238464.0,
+            "3": 1914238464.0,
+            "4": 1914238464.0,
+            "5": 1914238464.0,
+            "6": 1914238464.0,
+            "7": 1914238464.0,
+            "8": 1914238464.0,
+            "9": 1914238464.0,
+            "10": 1914238464.0,
+            "11": 1914238464.0,
+            "12": 1914238464.0,
+            "13": 1914238464.0,
+            "14": 1914238464.0,
+            "15": 1914238464.0,
+            "16": 1914238464.0,
+            "17": 1914238464.0,
+            "18": 1914238464.0,
+            "19": 1914238464.0,
+            "20": 1914238464.0,
+            "21": 1914238464.0,
+            "22": 1914238464.0,
+            "23": 1914238464.0,
+            "24": 1914238464.0,
+            "25": 1914238464.0,
+            "26": 1914238464.0,
+            "27": 1914238464.0,
+            "28": 1914238464.0,
+            "29": 1914238464.0,
+            "30": 1914238464.0,
+            "31": 1914238464.0,
+            "32": 1914238464.0,
+            "33": 1914238464.0,
+            "34": 1914238464.0,
+            "35": 1914238464.0,
+            "36": 1914238464.0,
+            "37": 1914238464.0,
+            "38": 1914238464.0,
+            "39": 1914238464.0,
+            "40": 1914238464.0,
+            "41": 1914238464.0,
+            "42": 1914238464.0,
+            "43": 1914238464.0,
+            "44": 1914238464.0,
+            "45": 1914238464.0,
+            "46": 1914238464.0,
+            "47": 1914238464.0,
+            "48": 1914238464.0,
+            "49": 1914238464.0,
+            "50": 1914238464.0
         }
     },
     "iteration-time": {
@@ -233,55 +233,55 @@
         "step_interval": 1,
         "values": {
             "1": "nan",
-            "2": 23.32725,
-            "3": 0.64935,
-            "4": 0.63773,
-            "5": 0.63792,
-            "6": 0.63776,
-            "7": 0.63937,
-            "8": 0.64046,
-            "9": 0.6361,
-            "10": 0.64423,
-            "11": 0.64021,
-            "12": 0.63952,
-            "13": 0.6451,
-            "14": 0.63986,
-            "15": 0.64096,
-            "16": 0.64001,
-            "17": 0.63996,
-            "18": 0.63814,
-            "19": 0.64219,
-            "20": 0.64081,
-            "21": 0.63784,
-            "22": 0.64101,
-            "23": 0.64231,
-            "24": 0.63904,
-            "25": 0.64041,
-            "26": 0.64744,
-            "27": 0.64738,
-            "28": 0.64182,
-            "29": 0.64714,
-            "30": 0.64337,
-            "31": 0.64627,
-            "32": 0.64639,
-            "33": 0.64426,
-            "34": 0.64469,
-            "35": 0.64416,
-            "36": 0.64898,
-            "37": 0.64103,
-            "38": 0.64541,
-            "39": 0.6467,
-            "40": 0.64896,
-            "41": 0.64438,
-            "42": 0.64755,
-            "43": 0.64706,
-            "44": 0.64706,
-            "45": 0.64435,
-            "46": 0.64608,
-            "47": 0.64784,
-            "48": 0.6453,
-            "49": 0.64942,
-            "50": 0.644
+            "2": 33.07638,
+            "3": 4.62885,
+            "4": 2.78847,
+            "5": 3.81661,
+            "6": 4.56696,
+            "7": 3.45862,
+            "8": 2.51384,
+            "9": 2.4275,
+            "10": 3.71405,
+            "11": 3.43435,
+            "12": 4.09536,
+            "13": 1.70339,
+            "14": 4.2772,
+            "15": 2.37094,
+            "16": 2.10863,
+            "17": 1.98699,
+            "18": 4.2631,
+            "19": 2.93254,
+            "20": 4.0228,
+            "21": 3.09583,
+            "22": 3.24615,
+            "23": 4.11215,
+            "24": 2.40344,
+            "25": 3.66841,
+            "26": 0.5852,
+            "27": 6.04702,
+            "28": 2.56074,
+            "29": 2.3649,
+            "30": 2.97314,
+            "31": 2.21341,
+            "32": 5.02931,
+            "33": 2.09974,
+            "34": 1.53163,
+            "35": 2.17862,
+            "36": 3.61274,
+            "37": 2.68687,
+            "38": 1.85327,
+            "39": 3.95559,
+            "40": 3.49999,
+            "41": 4.68689,
+            "42": 2.7863,
+            "43": 3.48504,
+            "44": 2.4547,
+            "45": 2.47677,
+            "46": 2.7805,
+            "47": 4.16521,
+            "48": 3.3328,
+            "49": 2.95889,
+            "50": 3.68852
         }
     }
 }
\ No newline at end of file
diff --git a/tests/unit_tests/fusions/test_fused_mhc_kernels.py b/tests/unit_tests/fusions/test_fused_mhc_kernels.py
new file mode 100644
index 00000000000..15468df8264
--- /dev/null
+++ b/tests/unit_tests/fusions/test_fused_mhc_kernels.py
@@ -0,0 +1,564 @@
+# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+"""Unit tests for fused mHC kernels (cuTile) and native implementations.
+
+Each test compares the fused kernel's forward output AND backward gradients
+against a pure-PyTorch differentiable reference to catch numerical drift
+introduced by kernel fusion.
+"""
+
+import math
+from typing import Optional
+
+import pytest
+import torch
+from torch import Tensor
+
+from megatron.core.fusions.fused_mhc_kernels import is_cutile_available
+from megatron.core.transformer.hyper_connection import (
+    native_h_aggregate,
+    native_h_post_bda,
+    native_proj_rms,
+    native_sinkhorn,
+)
+
+_require_cutile = pytest.mark.skipif(not is_cutile_available(), reason="cuTile not installed")
+
+
+@pytest.fixture(autouse=True)
+def _skip_without_cuda():
+    if not torch.cuda.is_available():
+        pytest.skip("CUDA not available")
+
+
+DTYPE = torch.bfloat16
+DEVICE = "cuda"
+FWD_ATOL, FWD_RTOL = 2e-2, 2e-2
+BWD_ATOL, BWD_RTOL = 5e-2, 5e-2
+RAND_LO, RAND_HI = -0.1, 0.1
+COSINE_SIM_THRESH = 0.999
+
+
+def _assert_cosine_similar(a: Tensor, b: Tensor, threshold: float, msg: str = ""):
+    """Assert that flattened tensors have cosine similarity >= threshold."""
+    a_flat = a.flatten().float()
+    b_flat = b.flatten().float()
+    sim = torch.nn.functional.cosine_similarity(a_flat.unsqueeze(0), b_flat.unsqueeze(0)).item()
+    assert sim >= threshold, (
+        f"{msg}: cosine similarity {sim:.6f} < {threshold} "
+        f"(max_abs_diff={torch.max(torch.abs(a_flat - b_flat)):.6e})"
+    )
+
+
+def _rand(*shape, **kwargs):
+    """Uniform in [RAND_LO, RAND_HI] to keep magnitudes small for bf16 stability."""
+    return torch.empty(*shape, dtype=DTYPE, device=DEVICE, **kwargs).uniform_(RAND_LO, RAND_HI)
+
+
+def _info():
+    backend = "cuTile" if is_cutile_available() else "native"
+    print(f"\n  [backend: {backend}]")
+
+
+# ============================================================================
+# Pure-PyTorch differentiable references (used by both fwd AND bwd tests)
+# ============================================================================
+
+
+def _ref_sinkhorn(logits: Tensor, num_iters: int, eps: float = 1e-6) -> Tensor:
+    row_max = logits.max(dim=-1, keepdim=True).values
+    M = torch.exp(logits - row_max)
+    for _ in range(num_iters):
+        M = M / M.sum(dim=-1, keepdim=True).clamp(min=eps)
+        M = M / M.sum(dim=-2, keepdim=True).clamp(min=eps)
+    return M
+
+
+def _ref_h_aggregate(x: Tensor, h_pre: Tensor) -> Tensor:
+    return (x * h_pre.unsqueeze(-1)).sum(dim=2)
+
+
+def _ref_h_post_bda(
+    h_res: Tensor, orig_res: Tensor, h_post: Tensor, x: Tensor, bias: Optional[Tensor]
+) -> Tensor:
+    s, b, n, C = orig_res.shape
+    mixed = torch.bmm(h_res.view(s * b, n, n), orig_res.view(s * b, n, C)).view(s, b, n, C)
+    x_exp = h_post.unsqueeze(-1) * x.unsqueeze(2)
+    out = x_exp + mixed
+    if bias is not None:
+        out = out + h_post.unsqueeze(-1) * bias.view(1, 1, 1, C)
+    return out
+
+
+def _ref_proj_rms(x: Tensor, weight: Tensor, eps: float = 1e-6):
+    proj = torch.matmul(x, weight.t())
+    norm = x.norm(dim=-1, keepdim=True)
+    K = x.shape[-1]
+    r = 1.0 / (norm / math.sqrt(K) + eps)
+    return proj, r
+
+
+# ============================================================================
+# Sinkhorn
+# ============================================================================
+
+
+class TestNativeSinkhorn:
+    """Tests for the native SinkhornKnopp implementation."""
+
+    @pytest.mark.parametrize("s,b,n,iters", [(2, 4, 4, 5), (1, 1, 2, 10)])
+    def test_fwd_bwd_vs_torch_reference(self, s, b, n, iters):
+        """native_sinkhorn fwd output and bwd grad must match the inline PyTorch reference."""
+        _info()
+        eps = 1e-6
+        data = _rand(s, b, n, n)
+        grad_out = _rand(s, b, n, n)
+
+        # -- native_sinkhorn path (autograd.Function) --
+        inp_f = data.clone().requires_grad_(True)
+        out_f = native_sinkhorn(inp_f, iters, eps)
+        out_f.backward(grad_out)
+        grad_f = inp_f.grad.clone()
+
+        # -- inline torch reference (fully differentiable) --
+        inp_r = data.clone().requires_grad_(True)
+        out_r = _ref_sinkhorn(inp_r, iters, eps)
+        out_r.backward(grad_out)
+        grad_r = inp_r.grad.clone()
+
+        torch.testing.assert_close(out_f, out_r, atol=FWD_ATOL, rtol=FWD_RTOL)
+        torch.testing.assert_close(grad_f, grad_r, atol=BWD_ATOL, rtol=BWD_RTOL)
+
+
+class TestFusedSinkhorn:
+    @_require_cutile
+    @pytest.mark.parametrize("s,b,n,iters", [(2, 4, 4, 5), (1, 1, 2, 10)])
+    def test_fwd_bwd_vs_reference(self, s, b, n, iters):
+        """E2E: fused cuTile fwd output and bwd grad must match the PyTorch reference."""
+        from megatron.core.fusions.fused_mhc_kernels import fused_sinkhorn
+
+        _info()
+        eps = 1e-6
+        data = _rand(s, b, n, n)
+        grad_out = _rand(s, b, n, n)
+
+        # -- fused path --
+        inp_f = data.clone().requires_grad_(True)
+        out_f = fused_sinkhorn(inp_f, iters, eps)
+        out_f.backward(grad_out)
+        grad_f = inp_f.grad.clone()
+
+        # -- reference path (fully differentiable) --
+        inp_r = data.clone().requires_grad_(True)
+        out_r = _ref_sinkhorn(inp_r, iters, eps)
+        out_r.backward(grad_out)
+        grad_r = inp_r.grad.clone()
+
+        torch.testing.assert_close(out_f, out_r, atol=FWD_ATOL, rtol=FWD_RTOL)
+        torch.testing.assert_close(grad_f, grad_r, atol=BWD_ATOL, rtol=BWD_RTOL)
+
+
+# ============================================================================
+# H_aggregate
+# ============================================================================
+
+
+class TestNativeHAggregate:
+    """Tests for native_h_aggregate."""
+
+    @pytest.mark.parametrize("s,b,n,C", [(2, 4, 4, 1024), (1, 1, 2, 256)])
+    def test_fwd_bwd_vs_torch_reference(self, s, b, n, C):
+        _info()
+        x_data = _rand(s, b, n, C)
+        h_data = _rand(s, b, n)
+        grad_out = _rand(s, b, C)
+
+        xf = x_data.clone().requires_grad_(True)
+        hf = h_data.clone().requires_grad_(True)
+        of = native_h_aggregate(xf, hf)
+        of.backward(grad_out)
+
+        xr = x_data.clone().requires_grad_(True)
+        hr = h_data.clone().requires_grad_(True)
+        oref = _ref_h_aggregate(xr, hr)
+        oref.backward(grad_out)
+
+        torch.testing.assert_close(of, oref, atol=FWD_ATOL, rtol=FWD_RTOL)
+        torch.testing.assert_close(xf.grad, xr.grad, atol=BWD_ATOL, rtol=BWD_RTOL)
+        torch.testing.assert_close(hf.grad, hr.grad, atol=BWD_ATOL, rtol=BWD_RTOL)
+
+
+class TestFusedHAggregate:
+    @_require_cutile
+    @pytest.mark.parametrize("s,b,n,C", [(2, 4, 4, 1024), (1, 1, 2, 256)])
+    def test_fwd_bwd_vs_reference(self, s, b, n, C):
+        """E2E: fused cuTile fwd output and bwd grads must match the PyTorch reference."""
+        from megatron.core.fusions.fused_mhc_kernels import fused_h_aggregate
+
+        _info()
+        x_data = _rand(s, b, n, C)
+        h_data = _rand(s, b, n)
+        grad_out = _rand(s, b, C)
+
+        # -- fused path --
+        xf = x_data.clone().requires_grad_(True)
+        hf = h_data.clone().requires_grad_(True)
+        of = fused_h_aggregate(xf, hf)
+        of.backward(grad_out)
+
+        # -- reference path --
+        xr = x_data.clone().requires_grad_(True)
+        hr = h_data.clone().requires_grad_(True)
+        oref = _ref_h_aggregate(xr, hr)
+        oref.backward(grad_out)
+
+        torch.testing.assert_close(of, oref, atol=FWD_ATOL, rtol=FWD_RTOL)
+        torch.testing.assert_close(xf.grad, xr.grad, atol=BWD_ATOL, rtol=BWD_RTOL)
+        torch.testing.assert_close(hf.grad, hr.grad, atol=BWD_ATOL, rtol=BWD_RTOL)
+
+
+# ============================================================================
+# H_post BDA
+# ============================================================================
+
+
+class TestNativeHPostBDA:
+    """Tests for native_h_post_bda."""
+
+    @pytest.mark.parametrize("with_bias", [True, False])
+    @pytest.mark.parametrize("s,b,n,C", [(2, 4, 4, 1024), (1, 2, 2, 256)])
+    def test_fwd_bwd_vs_torch_reference(self, s, b, n, C, with_bias):
+        _info()
+        hr_data = _rand(s, b, n, n)
+        orig_data = _rand(s, b, n, C)
+        hp_data = _rand(s, b, n)
+        x_data = _rand(s, b, C)
+        bias_data = _rand(C) if with_bias else None
+        grad_out = _rand(s, b, n, C)
+
+        def _make_inputs():
+            hr = hr_data.clone().requires_grad_(True)
+            orig = orig_data.clone().requires_grad_(True)
+            hp = hp_data.clone().requires_grad_(True)
+            x = x_data.clone().requires_grad_(True)
+            bi = bias_data.clone().requires_grad_(True) if with_bias else None
+            return hr, orig, hp, x, bi
+
+        hr_f, orig_f, hp_f, x_f, bi_f = _make_inputs()
+        out_f = native_h_post_bda(hr_f, orig_f, hp_f, x_f, bi_f)
+        out_f.backward(grad_out)
+
+        hr_r, orig_r, hp_r, x_r, bi_r = _make_inputs()
+        out_r = _ref_h_post_bda(hr_r, orig_r, hp_r, x_r, bi_r)
+        out_r.backward(grad_out)
+
+        torch.testing.assert_close(out_f, out_r, atol=FWD_ATOL, rtol=FWD_RTOL)
+        for name, gf, gr in [
+            ("h_res", hr_f.grad, hr_r.grad),
+            ("orig_res", orig_f.grad, orig_r.grad),
+            ("h_post", hp_f.grad, hp_r.grad),
+            ("x", x_f.grad, x_r.grad),
+        ]:
+            torch.testing.assert_close(
+                gf, gr, atol=BWD_ATOL, rtol=BWD_RTOL, msg=f"backward mismatch on {name}"
+            )
+        if with_bias:
+            torch.testing.assert_close(
+                bi_f.grad, bi_r.grad, atol=BWD_ATOL, rtol=BWD_RTOL, msg="backward mismatch on bias"
+            )
+
+
+class TestFusedHPostBDA:
+    @_require_cutile
+    @pytest.mark.parametrize("with_bias", [True, False])
+    @pytest.mark.parametrize("s,b,n,C", [(2, 4, 4, 1024), (1, 2, 2, 256)])
+    def test_fwd_bwd_vs_reference(self, s, b, n, C, with_bias):
+        """E2E: fused cuTile fwd output and bwd grads must match the PyTorch reference."""
+        from megatron.core.fusions.fused_mhc_kernels import fused_h_post_bda
+
+        _info()
+        hr_data = _rand(s, b, n, n)
+        orig_data = _rand(s, b, n, C)
+        hp_data = _rand(s, b, n)
+        x_data = _rand(s, b, C)
+        bias_data = _rand(C) if with_bias else None
+        grad_out = _rand(s, b, n, C)
+
+        def _make_inputs():
+            hr = hr_data.clone().requires_grad_(True)
+            orig = orig_data.clone().requires_grad_(True)
+            hp = hp_data.clone().requires_grad_(True)
+            x = x_data.clone().requires_grad_(True)
+            bi = bias_data.clone().requires_grad_(True) if with_bias else None
+            return hr, orig, hp, x, bi
+
+        # -- fused path --
+        hr_f, orig_f, hp_f, x_f, bi_f = _make_inputs()
+        out_f = fused_h_post_bda(hr_f, orig_f, hp_f, x_f, bi_f)
+        out_f.backward(grad_out)
+
+        # -- reference path --
+        hr_r, orig_r, hp_r, x_r, bi_r = _make_inputs()
+        out_r = _ref_h_post_bda(hr_r, orig_r, hp_r, x_r, bi_r)
+        out_r.backward(grad_out)
+
+        torch.testing.assert_close(out_f, out_r, atol=FWD_ATOL, rtol=FWD_RTOL)
+        for name, gf, gr in [
+            ("h_res", hr_f.grad, hr_r.grad),
+            ("orig_res", orig_f.grad, orig_r.grad),
+            ("h_post", hp_f.grad, hp_r.grad),
+            ("x", x_f.grad, x_r.grad),
+        ]:
+            torch.testing.assert_close(
+                gf, gr, atol=BWD_ATOL, rtol=BWD_RTOL, msg=f"backward mismatch on {name}"
+            )
+        if with_bias:
+            torch.testing.assert_close(
+                bi_f.grad, bi_r.grad, atol=BWD_ATOL, rtol=BWD_RTOL, msg="backward mismatch on bias"
+            )
+
+
+# ============================================================================
+# Proj RMS
+# ============================================================================
+
+
+class TestNativeProjRms:
+    """Tests for native_proj_rms."""
+
+    @pytest.mark.parametrize("M,N,K", [(256, 20, 4096), (64, 8, 512)])
+    def test_fwd_bwd_vs_torch_reference(self, M, N, K):
+        _info()
+        eps = 1e-6
+        x_data = _rand(M, K)
+        w_data = _rand(N, K)
+        grad_proj = _rand(M, N)
+        grad_r = _rand(M, 1)
+
+        xf = x_data.clone().requires_grad_(True)
+        wf = w_data.clone().requires_grad_(True)
+        proj_f, r_f = native_proj_rms(xf, wf, eps)
+        (proj_f * grad_proj + r_f * grad_r).sum().backward()
+
+        xr = x_data.clone().requires_grad_(True)
+        wr = w_data.clone().requires_grad_(True)
+        proj_r, r_r = _ref_proj_rms(xr, wr, eps)
+        (proj_r * grad_proj + r_r * grad_r).sum().backward()
+
+        torch.testing.assert_close(proj_f, proj_r, atol=FWD_ATOL, rtol=FWD_RTOL)
+        torch.testing.assert_close(r_f, r_r, atol=FWD_ATOL, rtol=FWD_RTOL)
+        torch.testing.assert_close(
+            xf.grad, xr.grad, atol=BWD_ATOL, rtol=BWD_RTOL, msg="backward mismatch on x"
+        )
+        torch.testing.assert_close(
+            wf.grad, wr.grad, atol=BWD_ATOL, rtol=BWD_RTOL, msg="backward mismatch on weight"
+        )
+
+
+class TestFusedProjRms:
+    @_require_cutile
+    @pytest.mark.parametrize("M,N,K", [(256, 20, 4096), (64, 8, 512)])
+    def test_fwd_bwd_vs_reference(self, M, N, K):
+        """E2E: fused cuTile fwd output and bwd grads must match the PyTorch reference."""
+        from megatron.core.fusions.fused_mhc_kernels import fused_proj_rms
+
+        _info()
+        eps = 1e-6
+        x_data = _rand(M, K)
+        w_data = _rand(N, K)
+        grad_proj = _rand(M, N)
+        grad_r = _rand(M, 1)
+
+        # -- fused path --
+        xf = x_data.clone().requires_grad_(True)
+        wf = w_data.clone().requires_grad_(True)
+        proj_f, r_f = fused_proj_rms(xf, wf, eps)
+        (proj_f * grad_proj + r_f * grad_r).sum().backward()
+
+        # -- reference path --
+        xr = x_data.clone().requires_grad_(True)
+        wr = w_data.clone().requires_grad_(True)
+        proj_r, r_r = _ref_proj_rms(xr, wr, eps)
+        (proj_r * grad_proj + r_r * grad_r).sum().backward()
+
+        torch.testing.assert_close(proj_f, proj_r, atol=FWD_ATOL, rtol=FWD_RTOL)
+        torch.testing.assert_close(r_f, r_r, atol=FWD_ATOL, rtol=FWD_RTOL)
+        torch.testing.assert_close(
+            xf.grad, xr.grad, atol=BWD_ATOL, rtol=BWD_RTOL, msg="backward mismatch on x"
+        )
+        torch.testing.assert_close(
+            wf.grad, wr.grad, atol=BWD_ATOL, rtol=BWD_RTOL, msg="backward mismatch on weight"
+        )
+
+
+# ============================================================================
+# End-to-end pipeline (all four kernels chained)
+# ============================================================================
+
+
+class TestEndToEndNative:
+    """Full mHC pipeline using native modules.
+
+    proj_rms -> compute_h -> sinkhorn -> aggregate -> h_post_bda.
+    Compares the native modules against inline PyTorch reference.
+    """
+
+    def test_full_pipeline_fwd_bwd(self):
+        _info()
+        s, b, n, C = 2, 4, 4, 1024
+        eps = 1e-6
+        sinkhorn_iters = 5
+
+        hs_data = _rand(s, b, n * C)
+        w_data = _rand(n * n + 2 * n, n * C)
+        layer_out_data = _rand(s, b, C)
+        layer_bias_data = _rand(C)
+
+        def _run_native_modules():
+            hs = hs_data.clone().requires_grad_(True)
+            w = w_data.clone().requires_grad_(True)
+
+            x_2d = hs.reshape(s * b, n * C)
+            proj, r = native_proj_rms(x_2d, w, eps)
+            proj = proj.view(s, b, -1)
+            r = r.view(s, b, 1)
+
+            h = r * proj
+            h_pre = h[..., :n].sigmoid()
+            h_post = h[..., n : 2 * n].sigmoid() * 2
+            h_res_logits = h[..., 2 * n :]
+            h_res = native_sinkhorn(h_res_logits.view(s, b, n, n), sinkhorn_iters, eps)
+
+            aggregated = native_h_aggregate(hs.view(s, b, n, C), h_pre)
+
+            output = native_h_post_bda(
+                h_res, hs.view(s, b, n, C), h_post, layer_out_data, layer_bias_data
+            )
+
+            loss = output.sum() + aggregated.sum()
+            loss.backward()
+            return output.detach(), aggregated.detach(), hs.grad.clone()
+
+        def _run_inline_ref():
+            hs = hs_data.clone().requires_grad_(True)
+            w = w_data.clone().requires_grad_(True)
+
+            x_2d = hs.reshape(s * b, n * C)
+            proj, r = _ref_proj_rms(x_2d, w, eps)
+            proj = proj.view(s, b, -1)
+            r = r.view(s, b, 1)
+
+            h = r * proj
+            h_pre = h[..., :n].sigmoid()
+            h_post = h[..., n : 2 * n].sigmoid() * 2
+            h_res_logits = h[..., 2 * n :]
+            h_res = _ref_sinkhorn(h_res_logits.view(s, b, n, n), sinkhorn_iters, eps)
+
+            aggregated = _ref_h_aggregate(hs.view(s, b, n, C), h_pre)
+
+            output = _ref_h_post_bda(
+                h_res, hs.view(s, b, n, C), h_post, layer_out_data, layer_bias_data
+            )
+
+            loss = output.sum() + aggregated.sum()
+            loss.backward()
+            return output.detach(), aggregated.detach(), hs.grad.clone()
+
+        out_m, agg_m, grad_m = _run_native_modules()
+        out_r, agg_r, grad_r = _run_inline_ref()
+
+        torch.testing.assert_close(
+            agg_m, agg_r, atol=FWD_ATOL, rtol=FWD_RTOL, msg="aggregated output mismatch"
+        )
+        torch.testing.assert_close(
+            out_m, out_r, atol=FWD_ATOL, rtol=FWD_RTOL, msg="h_post_bda output mismatch"
+        )
+        _assert_cosine_similar(
+            grad_m, grad_r, COSINE_SIM_THRESH, msg="hidden_states grad (E2E backward)"
+        )
+
+
+class TestEndToEndFused:
+    """Full mHC pipeline using fused cuTile kernels (requires cuTile)."""
+
+    @_require_cutile
+    def test_full_pipeline_fwd_bwd(self):
+        from megatron.core.fusions.fused_mhc_kernels import (
+            fused_h_aggregate,
+            fused_h_post_bda,
+            fused_proj_rms,
+            fused_sinkhorn,
+        )
+
+        _info()
+        s, b, n, C = 2, 4, 4, 1024
+        eps = 1e-6
+        sinkhorn_iters = 5
+
+        hs_data = _rand(s, b, n * C)
+        w_data = _rand(n * n + 2 * n, n * C)
+        layer_out_data = _rand(s, b, C)
+        layer_bias_data = _rand(C)
+
+        def _run_fused():
+            hs = hs_data.clone().requires_grad_(True)
+            w = w_data.clone().requires_grad_(True)
+
+            x_2d = hs.reshape(s * b, n * C)
+            proj, r = fused_proj_rms(x_2d, w, eps)
+            proj = proj.view(s, b, -1)
+            r = r.view(s, b, 1)
+
+            h = r * proj
+            h_pre = h[..., :n].sigmoid()
+            h_post = h[..., n : 2 * n].sigmoid() * 2
+            h_res_logits = h[..., 2 * n :]
+            h_res = fused_sinkhorn(h_res_logits.view(s, b, n, n), sinkhorn_iters, eps)
+
+            aggregated = fused_h_aggregate(hs.view(s, b, n, C), h_pre)
+
+            output = fused_h_post_bda(
+                h_res, hs.view(s, b, n, C), h_post, layer_out_data, layer_bias_data
+            )
+
+            loss = output.sum() + aggregated.sum()
+            loss.backward()
+            return output.detach(), aggregated.detach(), hs.grad.clone()
+
+        def _run_ref():
+            hs = hs_data.clone().requires_grad_(True)
+            w = w_data.clone().requires_grad_(True)
+
+            x_2d = hs.reshape(s * b, n * C)
+            proj, r = _ref_proj_rms(x_2d, w, eps)
+            proj = proj.view(s, b, -1)
+            r = r.view(s, b, 1)
+
+            h = r * proj
+            h_pre = h[..., :n].sigmoid()
+            h_post = h[..., n : 2 * n].sigmoid() * 2
+            h_res_logits = h[..., 2 * n :]
+            h_res = _ref_sinkhorn(h_res_logits.view(s, b, n, n), sinkhorn_iters, eps)
+
+            aggregated = _ref_h_aggregate(hs.view(s, b, n, C), h_pre)
+
+            output = _ref_h_post_bda(
+                h_res, hs.view(s, b, n, C), h_post, layer_out_data, layer_bias_data
+            )
+
+            loss = output.sum() + aggregated.sum()
+            loss.backward()
+            return output.detach(), aggregated.detach(), hs.grad.clone()
+
+        out_f, agg_f, grad_f = _run_fused()
+        out_r, agg_r, grad_r = _run_ref()
+
+        torch.testing.assert_close(
+            agg_f, agg_r, atol=FWD_ATOL, rtol=FWD_RTOL, msg="aggregated output mismatch"
+        )
+        torch.testing.assert_close(
+            out_f, out_r, atol=FWD_ATOL, rtol=FWD_RTOL, msg="h_post_bda output mismatch"
+        )
+        _assert_cosine_similar(
+            grad_f, grad_r, COSINE_SIM_THRESH, msg="hidden_states grad (E2E backward)"
+        )
diff --git a/tests/unit_tests/models/test_mamba_moe_model.py b/tests/unit_tests/models/test_mamba_moe_model.py
index 1052d7781a5..bf3eb9b198b 100644
--- a/tests/unit_tests/models/test_mamba_moe_model.py
+++ b/tests/unit_tests/models/test_mamba_moe_model.py
@@ -263,6 +263,7 @@
     "tp_only_amax_red": False,
     "transformer_impl": "transformer_engine",
     "use_cpu_initialization": None,
+    "use_fused_mhc": False,
     "use_fused_weighted_squared_relu": False,
     "use_inference_optimized_layers": False,
     "use_kitchen": False,

From 0e53b308f8849f938789d3ad8191366dc5c2434b Mon Sep 17 00:00:00 2001
From: ilml <tolong@nvidia.com>
Date: Wed, 25 Mar 2026 15:02:22 -0700
Subject: [PATCH 322/334] fix: correct H2->H4 header skips in router_replay.md

---
 docs/source/api-guide/router_replay.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/source/api-guide/router_replay.md b/docs/source/api-guide/router_replay.md
index 12b5a4cd942..b2e043b3065 100644
--- a/docs/source/api-guide/router_replay.md
+++ b/docs/source/api-guide/router_replay.md
@@ -55,7 +55,7 @@ The implementation cleanly separates the replay logic from the router's core com
         *   `record_indices()`: A method to save the computed indices.
     *   The `topk_routing_with_score_function` is modified to contain the core logic. It checks the `router_replay_action` on the `router_replay` instance and accordingly performs one of the following actions: computes and records indices, replays indices from `target_topk_idx` (for forward), replays indices from `replay_backward_list` (for backward), or falls through to the default dynamic routing.
 
-#### Training recompute usage
+### Training recompute usage
 - During forward replay, `set_target_indices()` prepares `replay_backward_list` so each micro-batch’s indices are available for recomputation.
 - During recompute/backward, set action to `REPLAY_BACKWARD` so indices are consumed in FIFO order to mirror the forward sequence.
 
@@ -77,7 +77,7 @@ The implementation cleanly separates the replay logic from the router's core com
 5.  **Cleanup**
     - Use `RouterReplay.clear_global_indices()`, `RouterReplay.clear_global_router_replay_action()`, and `RouterReplay.clear_global_router_replay_instances()` to restore default behavior and prevent memory leaks.
 
-#### Quick usage with `topk_routing_with_score_function`
+### Quick usage with `topk_routing_with_score_function`
 
 ```python
 import torch

From 076d20fae766af572f0c401a0482ff1fe258a68d Mon Sep 17 00:00:00 2001
From: ilml <tolong@nvidia.com>
Date: Wed, 25 Mar 2026 15:35:13 -0700
Subject: [PATCH 323/334] fix: add missing tensor_parallel import in
 absorbed_mla.py

---
 .../transformer/experimental_attention_variant/absorbed_mla.py   | 1 +
 1 file changed, 1 insertion(+)

diff --git a/megatron/core/transformer/experimental_attention_variant/absorbed_mla.py b/megatron/core/transformer/experimental_attention_variant/absorbed_mla.py
index 4ac7636d776..8e4e82b01ec 100644
--- a/megatron/core/transformer/experimental_attention_variant/absorbed_mla.py
+++ b/megatron/core/transformer/experimental_attention_variant/absorbed_mla.py
@@ -19,6 +19,7 @@
 import torch
 
 from megatron.core.extensions.transformer_engine import HAVE_TE
+from megatron.core import tensor_parallel
 from megatron.core.models.common.embeddings import (
     RotaryEmbedding,
     YarnRotaryEmbedding,

From 09611963cbb21a6a80405b24d496d74defcb7a40 Mon Sep 17 00:00:00 2001
From: ilml <tolong@nvidia.com>
Date: Wed, 25 Mar 2026 15:46:04 -0700
Subject: [PATCH 324/334] fix: correct import ordering for tensor_parallel in
 absorbed_mla

---
 .../transformer/experimental_attention_variant/absorbed_mla.py  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/megatron/core/transformer/experimental_attention_variant/absorbed_mla.py b/megatron/core/transformer/experimental_attention_variant/absorbed_mla.py
index 8e4e82b01ec..6c6d5b07a75 100644
--- a/megatron/core/transformer/experimental_attention_variant/absorbed_mla.py
+++ b/megatron/core/transformer/experimental_attention_variant/absorbed_mla.py
@@ -18,8 +18,8 @@
 
 import torch
 
-from megatron.core.extensions.transformer_engine import HAVE_TE
 from megatron.core import tensor_parallel
+from megatron.core.extensions.transformer_engine import HAVE_TE
 from megatron.core.models.common.embeddings import (
     RotaryEmbedding,
     YarnRotaryEmbedding,

From 6823637068cd1889a55e30a02301a30a336ff4bc Mon Sep 17 00:00:00 2001
From: Deyu Fu <deyuf@nvidia.com>
Date: Mon, 30 Mar 2026 14:10:44 +0800
Subject: [PATCH 325/334] fix layerwise related merge error due to dev refactor

Signed-off-by: Deyu Fu <deyuf@nvidia.com>
---
 megatron/core/optimizer/__init__.py             | 6 +++++-
 megatron/core/optimizer/layer_wise_optimizer.py | 4 +---
 megatron/core/optimizer/muon.py                 | 6 +++++-
 tests/unit_tests/test_layer_wise_optimizer.py   | 2 +-
 4 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/megatron/core/optimizer/__init__.py b/megatron/core/optimizer/__init__.py
index 55fd3a128b9..b64c871104d 100644
--- a/megatron/core/optimizer/__init__.py
+++ b/megatron/core/optimizer/__init__.py
@@ -839,7 +839,11 @@ def _get_megatron_emerging_optimizer(
             logger, logging.INFO, f'Using LayerWiseDistributedOptimizer for {eopt_name}'
         )
         return LayerWiseDistributedOptimizer(
-            list(base_optimizers), config, pg_collection, init_state_fn_list=list(init_fns)
+            list(base_optimizers),
+            config,
+            pg_collection,
+            init_state_fn_list=list(init_fns),
+            model_chunks=model_chunks,
         )
 
     return ChainedOptimizer(results)
diff --git a/megatron/core/optimizer/layer_wise_optimizer.py b/megatron/core/optimizer/layer_wise_optimizer.py
index 6e59e03ae42..6e0f32ab357 100644
--- a/megatron/core/optimizer/layer_wise_optimizer.py
+++ b/megatron/core/optimizer/layer_wise_optimizer.py
@@ -46,7 +46,6 @@ def __init__(
         pg_collection: Optional[ProcessGroupCollection] = None,
         init_state_fn_list: Optional[List[Callable]] = None,
         model_chunks: Optional[List] = None,
-        async_allgather: bool = False,
     ) -> None:
         """
         Initialize LayerWiseDistributedOptimizer.
@@ -57,14 +56,13 @@ def __init__(
             pg_collection: ProcessGroupCollection.
             init_state_fn_list: List of init state functions.
             model_chunks: DDP-wrapped model chunks (needed for async_allgather).
-            async_allgather: If True, defer param all-gather to forward pre-hooks.
         """
 
         self.pg_collection = pg_collection
         self.shard_params(optimizers)
 
         # Set up async all-gather using DDP bucket infrastructure.
-        self.async_allgather = async_allgather
+        self.async_allgather = config.overlap_param_gather
         if self.async_allgather:
             assert (
                 model_chunks is not None
diff --git a/megatron/core/optimizer/muon.py b/megatron/core/optimizer/muon.py
index b2f989e7441..329ce60dd1f 100644
--- a/megatron/core/optimizer/muon.py
+++ b/megatron/core/optimizer/muon.py
@@ -22,5 +22,9 @@ def get_megatron_muon_optimizer(*args: Any, **kwargs: Any) -> Any:
     """
     from . import get_megatron_optimizer
 
-    kwargs.pop('layer_wise_distributed_optimizer', None)
+    if kwargs.pop('layer_wise_distributed_optimizer', False):
+        config = args[0] if args else kwargs.get('config')
+        if config is not None:
+            config.use_layer_wise_distributed_optimizer = True
+
     return get_megatron_optimizer(*args, **kwargs)
diff --git a/tests/unit_tests/test_layer_wise_optimizer.py b/tests/unit_tests/test_layer_wise_optimizer.py
index c484ca104ee..d8b0e97b524 100644
--- a/tests/unit_tests/test_layer_wise_optimizer.py
+++ b/tests/unit_tests/test_layer_wise_optimizer.py
@@ -417,7 +417,7 @@ def test_bf16_error(self):
             optimizer='muon', lr=0.01, bf16=True, use_distributed_optimizer=False
         )
         with pytest.raises(
-            TypeError, match='LayerWiseDistributedOptimizer received Float16 optimizer already'
+            TypeError, match='LayerWiseDistributedOptimizer expects base torch optimizers'
         ):
             LayerWiseDistributedOptimizer([wrapped_optimizer], lw_config, pg_collection)
 

From 0c306dcd63250f5a7ff70e10e9998315d97f6f64 Mon Sep 17 00:00:00 2001
From: Hongbin Liu <lhb8125@users.noreply.github.com>
Date: Mon, 30 Mar 2026 15:02:28 +0800
Subject: [PATCH 326/334] [Dev][feat] Support CUDA Graph capture offloading
 modules (#3219)

Signed-off-by: Hongbin Liu <hongbinl@nvidia.com>
Signed-off-by: root <root@eos0321.eos.clusters.nvidia.com>
Co-authored-by: root <root@eos0321.eos.clusters.nvidia.com>
---
 .../fine_grained_activation_offloading.md     |  31 --
 .../fine_grained_activation_offloading.md     | 167 +++++++--
 .../core/models/gpt/fine_grained_callables.py |  18 +-
 megatron/core/models/gpt/gpt_model.py         |   9 +-
 .../fine_grained_activation_offload.py        | 283 ++++++++++-----
 megatron/core/pipeline_parallel/schedules.py  |   6 +-
 megatron/core/transformer/attention.py        |  32 +-
 megatron/core/transformer/cuda_graphs.py      |  15 +
 megatron/core/transformer/module.py           |   9 +
 megatron/core/transformer/moe/experts.py      |  32 +-
 .../transformer/multi_latent_attention.py     |  30 +-
 .../core/transformer/transformer_config.py    |  45 +++
 .../core/transformer/transformer_layer.py     | 198 ++++++++---
 .../golden_values_dev_dgx_h100.json           |   2 +-
 .../golden_values_dev_dgx_h100.json           |   2 +-
 .../unit_tests/models/test_mamba_moe_model.py |   3 +
 ...test_fine_grained_activation_offloading.py | 336 +++++++++++++++++-
 .../transformer/test_transformer_layer.py     |   8 +-
 18 files changed, 967 insertions(+), 259 deletions(-)
 delete mode 100644 docs/api-guide/fine_grained_activation_offloading.md

diff --git a/docs/api-guide/fine_grained_activation_offloading.md b/docs/api-guide/fine_grained_activation_offloading.md
deleted file mode 100644
index 53211d1d06c..00000000000
--- a/docs/api-guide/fine_grained_activation_offloading.md
+++ /dev/null
@@ -1,31 +0,0 @@
-# Fine-grained Activation Offloading (collaborated with rednote)
-
-Memory capacity is more and more important with the rising of extreme sparse MoE models like DeepSeek-V3 and Qwen3-235B. Fine-grained recomputing reduces the memory footprint at the cost of extra recomputation, while offloading could utilize the host-device bandwidth to achieve nearly zero-overhead. Fine-grained Activation Offloading targets at offloading the activation at the granularity of specific modules, so that we can calibrate the amount of offloading activation to maximize the training throughput.
-
-Currently, the supported offloading modules are `"attn_norm", "core_attn", "attn_proj", "mlp_norm", "expert_fc1", "moe_act"`, which could work with fine-grained recomputation to release almost all activations of a transformer layer.
-
-**Features**
-* Support PP=1/PP/Interleaved PP
-* Compatible with fine-grained recomputation
-* Support FP8
-* Support MTP
-* Support mixed dense & moe layer
-* Support A2A Overlap
-* Support CUDA Graph
-  * (Temporary) cuda graph scope cannot contains the offloading modules
-
-**Usage**
-```bash
-# Enable fine-grained activation offloading
---fine-grained-activation-offloading
-
-# Specify which modules are going to offload its input
-# Choices: "attn_norm", "core_attn", "attn_proj", "mlp_norm", "expert_fc1", "moe_act".
---offload-modules expert_fc1
-```
-**Compatible with Fine-grained Recomputation**
-- For modules with minor perf overhead like layernorm or moe_act, use recomputing to reduce memory footprint;
-- For other modules, use offloading to reduce memory footprint;
-- Make sure the offloading/reloading could be overlapped with computing;
-
-![Fine-grained Activation Offloading and Fine-grained Recomputation](../../images/fine_grained_activation_offloading/offloading_and_recomputing.png)
diff --git a/docs/user-guide/features/fine_grained_activation_offloading.md b/docs/user-guide/features/fine_grained_activation_offloading.md
index 494674bd4f0..bb882602f37 100644
--- a/docs/user-guide/features/fine_grained_activation_offloading.md
+++ b/docs/user-guide/features/fine_grained_activation_offloading.md
@@ -7,34 +7,161 @@
    license agreement from NVIDIA CORPORATION is strictly prohibited.
 -->
 
-# Fine-grained Activation Offloading (collaborated with rednote)
+# Fine-Grained Activation Offloading
 
-Memory capacity is more and more important with the rising of extreme sparse MoE models like DeepSeek-V3 and Qwen3-235B. Fine-grained recomputing reduces the memory footprint at the cost of extra recomputation, while offloading could utilize the host-device bandwidth to achieve nearly zero-overhead. Fine-grained Activation Offloading targets at offloading the activation at the granularity of specific modules, so that we can calibrate the amount of offloading activation to maximize the training throughput.
+Fine-grained activation offloading reduces GPU memory by asynchronously transferring activations to CPU at the granularity of individual submodules within a transformer layer. Unlike layer-level offloading, it allows precise control over which activations to offload, enabling a tradeoff between memory savings and PCIe bandwidth overhead.
 
-Currently, the supported offloading modules are `"attn_norm", "core_attn", "attn_proj", "mlp_norm", "expert_fc1", "moe_act"`, which could work with fine-grained recomputation to release almost all activations of a transformer layer.
+## User Guide
 
-**Features**
-* Support PP=1/PP/Interleaved PP
-* Compatible with fine-grained recomputation
-* Support FP8
-* Support MTP
-* Support mixed dense & moe layer
-* Support A2A Overlap
-* Support CUDA Graph
-  * (Temporary) cuda graph scope cannot contains the offloading modules
+### Basic Usage
 
-**Usage**
 ```bash
 # Enable fine-grained activation offloading
 --fine-grained-activation-offloading
 
-# Specify which modules are going to offload its input
-# Choices: "attn_norm", "core_attn", "attn_proj", "mlp_norm", "expert_fc1", "moe_act".
---offload-modules expert_fc1
+# Specify which modules to offload (can combine multiple)
+# Choices: attn_norm, qkv_linear, core_attn, attn_proj, mlp_norm, expert_fc1, moe_act
+--offload-modules core_attn attn_proj expert_fc1
+```
+
+### Offloadable Modules
+
+Each module offloads its **input** activation to CPU during forward and reloads it before backward:
+
+| Module | Description | Notes |
+|---|---|---|
+| `attn_norm` | Input layernorm of attention | Skipped if using `IdentityOp` |
+| `qkv_linear` | QKV linear projection | |
+| `core_attn` | Core attention (softmax + matmul) | |
+| `attn_proj` | Output projection of attention | Must be used together with `core_attn` |
+| `mlp_norm` | Pre-MLP layernorm | Skipped if using `IdentityOp` |
+| `expert_fc1` | First FC layer in MoE experts | MoE models only |
+| `moe_act` | Activation function in MoE experts | MoE models only |
+
+### Tuning Parameters
+
+```bash
+# Minimum tensor size (in elements) to offload. Smaller tensors are skipped.
+# Default: 1048576 (1M elements)
+--min-offloaded-tensor-size 1048576
+
+# Fraction of activations to offload, range [0, 1]. Default: 1.0
+# Useful for partial offloading when PCIe bandwidth is a bottleneck.
+--activation-offload-fraction 0.8
+
+# Reduce offload amount on higher PP ranks (in bytes). Default: 0
+# Higher PP ranks have fewer microbatches in flight, so offloading less
+# reduces overhead without increasing peak memory.
+--delta-offload-bytes-across-pp-ranks 1073741824
+```
+
+### CUDA Graph Integration
+
+Fine-grained offloading is compatible with CUDA graphs. When CUDA graph is enabled, the following constraints apply:
+
+- `attn_norm` and `mlp_norm` **cannot** be offloaded (they cross CUDA graph boundaries).
+- `cuda_graph_scope` must include `attn` and `moe_router`.
+- `cuda_graph_impl` must be `transformer_engine`.
+- Requires `torch >= 2.9.0` and `transformer_engine >= 2.14.0`.
+
+```bash
+# Optional: defer D2H enqueue for offloads *outside* cuda_graph_scope (MoE experts; see below)
+--delay-offload-until-cuda-graph
+```
+
+**`--delay-offload-until-cuda-graph` (`TransformerConfig.delay_offload_until_cuda_graph`)**
+
+**Inside vs outside `cuda_graph_scope`.** Offload boundaries that lie **inside** the captured `cuda_graph_scope` (for example `qkv_linear`, `core_attn`, and `attn_proj` when `attn` is in scope) are part of CUDA graph **capture and replay**. Their offload-related work is replayed with the graph rather than re-driven from Python each step, so they do **not** incur the same per-step CPU launch overhead as a purely eager path.
+
+Boundaries that run **outside** the captured region still execute as normal eager PyTorch each forward—for the recommended MoE setup, that includes expert compute after a graphed `moe_router` (e.g. offloading `expert_fc1` / `moe_act`). For those groups, each `group_offload` would otherwise submit D2H work from the host as soon as the forward hits the commit point.
+
+**What this flag does.** It only affects offload commits that are explicitly wired with **delayed** group commit (currently the MoE expert path: `expert_fc1`, `moe_act`). Around each layer’s `TransformerEngine` CUDA graph replay, the offload manager enters **replay mode**; delayed commits **enqueue** `(callback, group name, forced tensors)` instead of launching D2H immediately, then **flush_delayed_groups** runs **after** that graph replay returns and issues the queued D2H copies in forward order, without changing the offload/reload semantics.
+
+**When this actually buys time (EP A2A after replay).** The benefit assumes a **real CPU/GPU synchronization gap right after graph replay**—in the usual MoE training layout, **expert parallel (EP) all-to-all** and related dispatch follows the graphed `moe_router` region. That A2A path typically needs the host to coordinate collectives and to **sync with the GPU** (e.g. wait for graph work to finish or for communication staging), so the CPU is not fully overlapped with useful launch work during that interval. Scheduling `flush_delayed_groups` **immediately after** `cudaGraphLaunch` returns uses that window to issue D2H copies from the host: the enqueue cost is largely **hidden** in slack that EP A2A would already incur. If there were no such post-replay sync (or expert work were fully captured inside the graph with no host-visible gap), deferring commits would not provide the same “free” host time.
+
+**Behavioral notes**
+
+- Does **not** replace or “delay” attention-side offloads inside the graphed `attn` region; those are not on the delayed path in the implementation.
+- Warmup and non-replay forwards still commit delayed-eligible groups immediately (no replay-mode deferral).
+- Must be used together with **fine-grained activation offloading** and **CUDA graph** under the same rules as this section (TE `cuda_graph_impl`, scope including `attn` and `moe_router`, etc.).
+- Stream ordering between the graph compute path and `d2h_stream` still uses the existing events (`forward_record` / `backward_record`); this option only changes **when** eligible D2H work is submitted from the host.
+
+### Combining with Fine-Grained Recomputation
+
+Offloading and recomputation are complementary:
+- Use **recomputation** for lightweight modules (e.g., layernorm, activation functions) with negligible compute overhead.
+- Use **offloading** for heavy modules (e.g., core_attn, expert_fc1) where recomputation would be too costly.
+
+```bash
+--recompute-granularity selective
+--recompute-modules layernorm moe_act
+--fine-grained-activation-offloading
+--offload-modules core_attn attn_proj expert_fc1
 ```
-**Compatible with Fine-grained Recomputation**
-- For modules with minor perf overhead like layernorm or moe_act, use recomputing to reduce memory footprint;
-- For other modules, use offloading to reduce memory footprint;
-- Make sure the offloading/reloading could be overlapped with computing;
 
 ![Fine-grained Activation Offloading and Fine-grained Recomputation](../../images/fine_grained_activation_offloading/offloading_and_recomputing.png)
+
+
+### Compatibility
+
+| Feature | Supported |
+|---|---|
+| PP / Interleaved PP / PP=1 | Yes |
+| Fine-grained recomputation | Yes |
+| FP8 training | Yes |
+| MTP (Multi-Token Prediction) | Yes |
+| Mixed dense & MoE layers | Yes |
+| A2A overlap (EP) | Yes |
+| CUDA Graph (TE impl) | Yes |
+
+---
+
+## How It Works
+
+### Architecture Overview
+
+The implementation consists of three layers:
+
+1. **`PipelineOffloadManager`** (singleton): Global coordinator that manages CUDA streams, CPU tensor pools, and chunk lifecycle across pipeline stages.
+2. **`ChunkOffloadHandler`**: Per-microbatch handler that tracks tensor groups, executes D2H/H2D transfers, and decides which groups to actually offload.
+3. **`FineGrainedActivationOffloadingInterface`**: Lightweight interface used by transformer modules (attention, MoE, etc.) to mark offload boundaries.
+
+### Offload/Reload Flow
+
+```
+Forward pass (Layer N):                    Backward pass (Layer N):
+┌─────────────────────┐                    ┌───────────────────────┐
+│ group_start(input)  │─── register ──►    │                       │
+│                     │    tensor group    │ group_commit_backward │
+│ module.forward()    │                    │   wait H2D complete   │
+│                     │                    │   pop tensors from    │
+│ group_offload(out)  │─── D2H async ──►   │   CPU → GPU           │
+│   on d2h_stream     │    to pinned CPU   │   on h2d_stream       │
+└─────────────────────┘                    └───────────────────────┘
+```
+
+1. **`group_start`**: Registers a new tensor group and hooks into `saved_tensors_hooks` to intercept `save_for_backward`.
+2. **Forward execution**: All tensors saved by autograd within the group are captured.
+3. **`group_offload`**: Triggers asynchronous D2H copy on a dedicated CUDA stream (`d2h_stream`), optionally releases GPU storage of input tensors.
+4. **Backward**: Before the group's backward, tensors are reloaded from CPU to GPU on `h2d_stream`, and the compute stream waits for the transfer to complete.
+
+### Warmup and Adaptive Offloading
+
+The first training iteration serves as a **warmup phase** where the manager records tensor groups, their sizes, and the execution order. After warmup, a `post_warmup_callback` runs to:
+
+1. **Reserve margin**: The last N groups (by deduplication count) are kept on GPU to avoid reload blocking the compute stream.
+2. **Apply PP rank delta**: Higher PP ranks offload fewer bytes (controlled by `delta_offload_bytes_across_pp_ranks`).
+3. **Apply fraction**: Only a fraction of eligible groups are actually offloaded (controlled by `activation_offload_fraction`).
+4. **Print summary table**: An ASCII table of per-rank offload bytes is printed for debugging.
+
+### CPU Tensor Pool
+
+A 'OffloadTensorPool` (on CPU with pinned memory) caches allocated tensors by `(shape, dtype)`. This avoids repeated `cudaMallocHost` / `cudaFreeHost` calls and reduces D2H latency after the first iteration.
+
+### CUDA Graph Support
+
+When offloading interacts with CUDA graphs:
+
+- A dedicated `cuda_graph_stream` runs the captured computation, while `d2h_stream` overlaps D2H transfers for regions that are **inside** the graph capture.
+- During CUDA graph **warmup**, offloading is disabled (`pre_warmup_hook` / `post_warmup_hook`).
+- **`delay_offload_until_cuda_graph`** applies to offload boundaries **outside** the captured scope (MoE `expert_fc1` / `moe_act` in the typical `attn` + `moe_router` configuration): D2H enqueue is deferred until **after** that layer’s graph replay returns, as described under CUDA Graph Integration. The intended win is overlapping host-side offload launches with **CPU/GPU synchronization slack before EP A2A** after replay; graphed attention offloads do not use this delayed path.
\ No newline at end of file
diff --git a/megatron/core/models/gpt/fine_grained_callables.py b/megatron/core/models/gpt/fine_grained_callables.py
index 8d1036b5bae..1261f384b8b 100644
--- a/megatron/core/models/gpt/fine_grained_callables.py
+++ b/megatron/core/models/gpt/fine_grained_callables.py
@@ -478,18 +478,16 @@ def forward_func(
                 )
                 if not isinstance(layer.mlp, MoELayer):
                     return hidden_states, None, None, None
+                mlp_norm_manager = off_interface(layer.offload_mlp_norm, hidden_states, "mlp_norm")
+                node.layer_state.mlp_norm_manager = mlp_norm_manager
                 if layer.recompute_pre_mlp_layernorm:
                     layer.pre_mlp_norm_checkpoint = tensor_parallel.CheckpointWithoutOutput()
-                    with off_interface(
-                        layer.offload_mlp_norm, hidden_states, "mlp_norm"
-                    ) as hidden_states:
+                    with mlp_norm_manager as hidden_states:
                         pre_mlp_layernorm_output = layer.pre_mlp_norm_checkpoint.checkpoint(
                             apply_module(layer.pre_mlp_layernorm), hidden_states
                         )
                 else:
-                    with off_interface(
-                        layer.offload_mlp_norm, hidden_states, "mlp_norm"
-                    ) as hidden_states:
+                    with mlp_norm_manager as hidden_states:
                         pre_mlp_layernorm_output = apply_module(layer.pre_mlp_layernorm)(
                             hidden_states
                         )
@@ -591,10 +589,12 @@ def submodule_combine_forward(node: ScheduleNode, output: torch.Tensor):
             )
         # Delay the offload of the mlp norm until after the mlp_bda has been computed
         # because the residual is needed in the mlp_bda.
-        if layer.offload_mlp_norm:
-            hidden_states = off_interface.group_commit(
-                hidden_states, name="mlp_norm", forced_released_tensors=[residual]
+        mlp_norm_manager = getattr(node.layer_state, 'mlp_norm_manager', None)
+        if mlp_norm_manager is not None:
+            hidden_states = mlp_norm_manager.group_offload(
+                hidden_states, forced_released_tensors=[residual]
             )
+            node.layer_state.mlp_norm_manager = None
         output = make_viewless_tensor(
             inp=hidden_states, requires_grad=hidden_states.requires_grad, keep_graph=True
         )
diff --git a/megatron/core/models/gpt/gpt_model.py b/megatron/core/models/gpt/gpt_model.py
index 27b62f91c34..5cc5a64e1d0 100644
--- a/megatron/core/models/gpt/gpt_model.py
+++ b/megatron/core/models/gpt/gpt_model.py
@@ -458,19 +458,22 @@ def _preprocess(
     def preprocess_for_fine_grained_offloading(self):
         """Preprocess for fine-grained activation offloading."""
         off_interface.init_chunk_handler(
+            pp_rank=self.pg_collection.pp.rank(),
             vp_size=self.config.virtual_pipeline_model_parallel_size,
             vp_stage=self.vp_stage,
             min_offloaded_tensor_size=self.config.min_offloaded_tensor_size,
+            delta_offload_bytes_across_pp_ranks=self.config.delta_offload_bytes_across_pp_ranks,
+            activation_offload_fraction=self.config.activation_offload_fraction,
         )
         if self.disable_param_offloading:
             for param in self.decoder.parameters():
-                off_interface.mark_not_offloadable(param)
+                off_interface.mark_not_offload(param)
             if self.mtp_process:
                 for param in self.mtp.parameters():
-                    off_interface.mark_not_offloadable(param)
+                    off_interface.mark_not_offload(param)
             if self.post_process:
                 for param in self.output_layer.parameters():
-                    off_interface.mark_not_offloadable(param)
+                    off_interface.mark_not_offload(param)
             self.disable_param_offloading = False
 
     def forward(
diff --git a/megatron/core/pipeline_parallel/fine_grained_activation_offload.py b/megatron/core/pipeline_parallel/fine_grained_activation_offload.py
index 1d2545b682d..99e3e3e4a2a 100644
--- a/megatron/core/pipeline_parallel/fine_grained_activation_offload.py
+++ b/megatron/core/pipeline_parallel/fine_grained_activation_offload.py
@@ -5,6 +5,7 @@
 from typing import Any, Dict, Tuple
 
 import torch
+from torch.autograd.graph import saved_tensors_hooks
 
 # CPU offload implementation for pipeline parallelism
 DEBUG = False
@@ -94,9 +95,9 @@ def print_offload_summary_table(total_offload_bytes: Dict[str, int]):
     torch.distributed.barrier()
 
 
-class GPUTensorPool:
+class OffloadTensorPool:
     """
-    GPU memory pool for efficient allocation and deallocation of tensors.
+    Memory pool for efficient allocation and deallocation of tensors.
 
     Features:
     - Supports multiple tensor shapes and dtypes, each with its own pool
@@ -105,7 +106,7 @@ class GPUTensorPool:
     - Uses queue-based management for O(1) allocation and deallocation
 
     Example:
-        pool = GPUTensorPool(device='cuda:0')
+        pool = OffloadTensorPool(device='cuda:0')
         tensor = pool.allocate((128, 512), dtype=torch.float32)
         # ... use tensor ...
         pool.free(tensor, (128, 512), dtype=torch.float32)
@@ -113,10 +114,10 @@ class GPUTensorPool:
 
     def __init__(self, device: str = 'cuda', pin_memory: bool = False):
         """
-        Initialize GPU tensor pool.
+        Initialize offload tensor pool.
 
         Args:
-            device: GPU device, default 'cuda'
+            device: Device, default 'cuda'
             pin_memory: Whether to use pinned memory (mainly for CPU tensors)
         """
         self.device = torch.device(device)
@@ -136,7 +137,7 @@ def __init__(self, device: str = 'cuda', pin_memory: bool = False):
             'pool_misses': 0,  # Number of times a new tensor was created
         }
 
-        debug_rank("GPUTensorPool: Initialized with dynamic allocation")
+        debug_rank("OffloadTensorPool: Initialized with dynamic allocation")
 
     def _get_pool_key(self, shape: Tuple, dtype: torch.dtype) -> Tuple:
         """Generate a unique key for the pool based on shape and dtype."""
@@ -181,7 +182,7 @@ def allocate(self, shape: Tuple, dtype: torch.dtype = torch.float32) -> torch.Te
             tensor = pool['free'].popleft()
             self._stats['pool_hits'] += 1
             debug_rank(
-                f"GPUTensorPool.allocate: Reused tensor from pool, "
+                f"OffloadTensorPool.allocate: Reused tensor from pool, "
                 f"shape={shape}, dtype={dtype}, "
                 f"remaining in pool={len(pool['free'])}"
             )
@@ -194,7 +195,7 @@ def allocate(self, shape: Tuple, dtype: torch.dtype = torch.float32) -> torch.Te
 
             memory_mb = self._calculate_memory_size(shape, dtype) / (1024**2)
             debug_rank(
-                f"GPUTensorPool.allocate: Created new tensor, "
+                f"OffloadTensorPool.allocate: Created new tensor, "
                 f"shape={shape}, dtype={dtype}, "
                 f"memory={memory_mb:.2f} MB, "
                 f"total_created={len(pool['all'])}"
@@ -244,7 +245,7 @@ def free(self, tensor: torch.Tensor):
         self._stats['current_in_use'] -= 1
 
         debug_rank(
-            f"GPUTensorPool.free: shape={shape}, dtype={dtype}, "
+            f"OffloadTensorPool.free: shape={shape}, dtype={dtype}, "
             f"available in pool={len(pool['free'])}"
         )
 
@@ -293,7 +294,7 @@ def get_pool_status(self, shape: Tuple = None, dtype: torch.dtype = None) -> Dic
 
     def reset(self):
         """Reset the pool, marking all tensors as available."""
-        debug_rank("GPUTensorPool: Resetting pool...")
+        debug_rank("OffloadTensorPool: Resetting pool...")
 
         for pool_key, pool in self._pools.items():
             # Clear and refill the free queue
@@ -303,11 +304,11 @@ def reset(self):
             pool['allocated_count'] = 0
 
         self._stats['current_in_use'] = 0
-        debug_rank("GPUTensorPool: Reset complete")
+        debug_rank("OffloadTensorPool: Reset complete")
 
     def clear(self):
         """Clear the pool and release all GPU memory."""
-        debug_rank("GPUTensorPool: Clearing pool...")
+        debug_rank("OffloadTensorPool: Clearing pool...")
 
         for pool_key, pool in self._pools.items():
             # Clear all references, allowing PyTorch GC to reclaim memory
@@ -321,7 +322,7 @@ def clear(self):
         if torch.cuda.is_available():
             torch.cuda.empty_cache()
 
-        debug_rank("GPUTensorPool: Clear complete")
+        debug_rank("OffloadTensorPool: Clear complete")
 
     def __del__(self):
         """Destructor to ensure resources are released."""
@@ -410,11 +411,16 @@ def __init__(self):
         # allocate streams and events for synchronization
         self._d2h_stream = torch.cuda.Stream()
         self._h2d_stream = torch.cuda.Stream()
+        # CUDA graph stream and event for offloading modules in cuda graph
+        self._cuda_graph_stream = torch.cuda.Stream()
+        self._cuda_graph_event = torch.cuda.Event(external=True)
         # Shared CPU tensor pool for all chunks to improve reuse efficiency
-        self._cpu_tensor_pool = GPUTensorPool(device="cpu", pin_memory=True)
+        self._cpu_tensor_pool = OffloadTensorPool(device="cpu", pin_memory=True)
 
         # Whether the manager is in warmup phase.
         self._is_warmup = True
+        # Whether the manager is in CUDA graph replay phase.
+        self._in_replay = False
         # Cache OffloadChunkHandler objects for each virtual pipeline stage and each forward pass.
         self._cached_chunks_forward = []
         # Cache OffloadChunkHandler objects for each virtual pipeline stage and each backward pass.
@@ -433,6 +439,10 @@ def __init__(self):
         self._delayed_offload_groups = []
         self.reset()
 
+        self._saved_tensors_hooks = saved_tensors_hooks(
+            self.on_save_for_backward, self.on_get_saved_tensor
+        )
+
     @property
     def d2h_stream(self):
         """Get the device-to-host (GPU to CPU) transfer stream."""
@@ -443,22 +453,32 @@ def h2d_stream(self):
         """Get the host-to-device (CPU to GPU) transfer stream."""
         return self._h2d_stream
 
+    @property
+    def cuda_graph_stream(self):
+        """Get the CUDA graph stream."""
+        return self._cuda_graph_stream
+
+    @property
+    def cuda_graph_event(self):
+        """Get the CUDA graph event."""
+        return self._cuda_graph_event
+
     @property
     def cpu_tensor_pool(self):
         """Get the shared CPU tensor pool."""
         return self._cpu_tensor_pool
 
-    def push_offload_groups(self, group_hook, forced_released_tensors):
+    def push_offload_groups(self, group_hook, name, forced_released_tensors):
         """Push the offload groups to the delayed queue."""
         debug_rank(f"pushing offload groups to the delayed queue")
-        self._delayed_offload_groups.append((group_hook, forced_released_tensors))
+        self._delayed_offload_groups.append((group_hook, name, forced_released_tensors))
 
     def flush_delayed_groups(self):
         """Flush the delayed groups."""
         debug_rank("flushing delayed groups")
-        # Flush the delayed groups in reverse order to maintain the order of the groups.
-        for group_hook, forced_released_tensors in reversed(self._delayed_offload_groups):
-            group_hook(forced_released_tensors)
+        # Flush the delayed groups in forward order.
+        for group_hook, name, forced_released_tensors in self._delayed_offload_groups:
+            group_hook(name, forced_released_tensors)
         self._delayed_offload_groups = []
 
     def reset(self):
@@ -549,13 +569,41 @@ def post_warmup_callback(self):
                 debug_rank(f"setting offload to false for group {name} at chunk index {chunk_idx}")
             else:
                 break
-        debug_rank(f"offload margin {self._offload_margin}")
         assert self._offload_margin == 0, "Offload margin is not 0"
+        # Disable the groups to meet the delta offload bytes across PP ranks.
+        keep_on_gpu_bytes = self._pp_rank * self._delta_offload_bytes_across_pp_ranks
+        for chunk in self._cached_chunks_backward:
+            for group in chunk.offload_groups:
+                if group.offload and keep_on_gpu_bytes > 0:
+                    debug_rank(
+                        f"group {group._name} offload {group.offload} \
+                        keep_on_gpu_bytes {keep_on_gpu_bytes}"
+                    )
+                    keep_on_gpu_bytes -= group.total_offload_bytes
+                    group.offload = False
+        # Disable the groups to meet the activation offload fraction.
+        for chunk in self._cached_chunks_backward:
+            offloaded_groups_count = 0
+            for group in chunk.offload_groups:
+                if group.offload:
+                    offloaded_groups_count += 1
+            disabled_groups_count = int(
+                offloaded_groups_count * (1 - self._activation_offload_fraction)
+            )
+            debug_rank(f"Disabled {disabled_groups_count}/{offloaded_groups_count} groups")
+            for group in reversed(chunk.offload_groups):
+                if group.offload:
+                    if disabled_groups_count > 0:
+                        disabled_groups_count -= 1
+                        group.offload = False
+                    else:
+                        break
         # Dump the offload information
         total_tensor_count = {}
         total_offload_bytes = {}
         for chunk in self._cached_chunks_forward:
             for group in chunk.offload_groups:
+                debug_rank(f"chunk {chunk} group {group} offload {group.offload}")
                 if group.offload:
                     if group._name not in total_tensor_count:
                         total_tensor_count[group._name] = 0
@@ -567,6 +615,8 @@ def post_warmup_callback(self):
             # where the memory cost will not increase anymore.
             if chunk is self._cached_chunks_backward[0]:
                 break
+        debug_rank(f"total_tensor_count {total_tensor_count}")
+        debug_rank(f"total_offload_bytes {total_offload_bytes}")
         # Cache summary for downstream consumers (e.g., unit tests).
         self._offload_summary_bytes = dict(total_offload_bytes)
         self._offload_summary_total_bytes = int(sum(total_offload_bytes.values()))
@@ -607,15 +657,25 @@ def front_backward_chunk(self, name=None):
         return None
 
     def init_model_chunk_offload_handler(
-        self, vp_size, vp_stage, min_offloaded_tensor_size=1024 * 1024
+        self,
+        pp_rank,
+        vp_size,
+        vp_stage,
+        min_offloaded_tensor_size=1024 * 1024,
+        delta_offload_bytes_across_pp_ranks=0,
+        activation_offload_fraction: float = 1.0,
     ):
         """
         Initialize a chunk offload handler for a model chunk (microbatch).
 
         Args:
+            pp_rank: Pipeline parallel rank
             vp_size: Virtual pipeline size
             vp_stage: Virtual pipeline stage index (None means stage 0)
             min_offloaded_tensor_size: Minimum tensor size (in elements) to offload
+            delta_offload_bytes_across_pp_ranks:
+                Difference of offload bytes across PP ranks to balance the offload load.
+            activation_offload_fraction: Fraction of eligible groups to offload, in range [0, 1].
         """
         if not self._is_warmup:
             return
@@ -625,6 +685,10 @@ def init_model_chunk_offload_handler(
             self._vpp = vp_size
             self._stages = [[] for _ in range(vp_size)]
 
+        self._delta_offload_bytes_across_pp_ranks = delta_offload_bytes_across_pp_ranks
+        self._pp_rank = pp_rank
+        self._activation_offload_fraction = activation_offload_fraction
+
         if vp_stage is None:
             cur_vpp_rank = 0
         else:
@@ -670,10 +734,10 @@ def cur_backward_chunk(self):
         """Get the current backward pass chunk handler."""
         return self._cur_backward_chunk
 
-    def mark_not_offloadable(self, tensor: torch.Tensor):
+    def mark_not_offload(self, tensor: torch.Tensor):
         """Mark the current forward chunk as not offloadable."""
         if tensor is not None:
-            tensor.offloading_activation = False
+            tensor._do_not_offload = True
 
     def __enter__(self):
         """Enter context manager to enable activation offloading hooks."""
@@ -687,10 +751,7 @@ def __enter__(self):
         else:
             raise RuntimeError("TE CPU offload is not available")
         self.inside_context = True
-
-        torch._C._autograd._push_saved_tensors_default_hooks(
-            self.on_save_for_backward, self.on_get_saved_tensor
-        )
+        self._saved_tensors_hooks.__enter__()
 
     def __exit__(self, *args: Any):
         """Exit context manager and restore original tensor saving behavior."""
@@ -704,7 +765,7 @@ def __exit__(self, *args: Any):
         else:
             raise RuntimeError("TE CPU offload is not available")
         self.inside_context = False
-        torch._C._autograd._pop_saved_tensors_default_hooks()
+        self._saved_tensors_hooks.__exit__()
 
     def on_save_for_backward(self, tensor: torch.Tensor) -> Any:
         """
@@ -794,17 +855,17 @@ def reset(self):
         self._tensor_count_current_group = 0
         self._reloading_group = []
 
-    def find_group_with_name(self, name: str, start_index: int = 0):
+    def find_group_with_name(
+        self, groups: list[OffloadTensorGroup], name: str, start_index: int = 0
+    ):
         """Find the group with the given name starting from the given index."""
-        return next(
-            (group for group in self.offload_groups[start_index:] if group._name == name), None
-        )
+        return next((group for group in groups[start_index:] if group._name == name), None)
 
     def is_empty_chunk(self, name=None):
         """Check if this chunk has no tensors to manage."""
         debug_rank(f"------is_empty_chunk {self._max_group_size}")
         if name is not None:
-            return self.find_group_with_name(name) is None
+            return self.find_group_with_name(self.offload_groups, name) is None
         return self._max_group_size == 0
 
     def finish_all_groups(self, name=None) -> bool:
@@ -821,12 +882,15 @@ def finish_all_groups(self, name=None) -> bool:
         ):
             return True
         assert name is not None, "Name is required"
-        return self.find_group_with_name(name, self._offloaded_group_index) is None
+        return (
+            self.find_group_with_name(self.offload_groups, name, self._offloaded_group_index)
+            is None
+        )
 
     def find_next_group(self, name=None):
         """Find the next group with the given name."""
         assert name is not None, "Name is required"
-        return self.find_group_with_name(name, self._offloaded_group_index)
+        return self.find_group_with_name(self.offload_groups, name, self._offloaded_group_index)
 
     def tensor_push(self, tensor):
         """Push tensor to the offload handler."""
@@ -859,20 +923,19 @@ def tensor_pop(self, tensor_tag):
 
     def tensor_need_offloading_checker(self, tensor):
         """Check if the tensor needs to be offloaded."""
-        debug_rank(
-            f"tensor_need_offloading_checker {getattr(tensor, 'offloading_activation', None)}"
-        )
+        debug_rank("tensor_need_offloading_checker")
         if tensor.numel() < self.min_offloaded_tensor_size:
             return False
         # Respect tensor's offload preference if specified
-        if hasattr(tensor, "offloading_activation") and not tensor.offloading_activation:
+        if getattr(tensor, "_TE_do_not_offload", False) or getattr(
+            tensor, "_do_not_offload", False
+        ):
             return False
         return True
 
-    def bulk_offload_group(self):
+    def bulk_offload_group(self, group_to_offload):
         """offload a group of tensors recorded in tensor_push()."""
         debug_rank("------bulk_offload_group")
-        group_to_offload = self._groups_to_offload[-1]
         torch.cuda.nvtx.range_push("activation offloading " + group_to_offload._name)
         with torch.cuda.stream(self.d2h_stream):
             for tensor_tag, tensor_on_device in group_to_offload._tensors.items():
@@ -885,7 +948,6 @@ def bulk_offload_group(self):
                     tensor_on_device.record_stream(self.d2h_stream)
                     group_to_offload.push_tensor(tensor_tag, state)
             group_to_offload.record_offload_event(self.d2h_stream)
-        self._groups_to_offload.pop()
         torch.cuda.nvtx.range_pop()
 
     def get_max_deduplicated_groups(self):
@@ -925,10 +987,11 @@ def pre_reload_last_layer(self):
             # Reload the last group (last layer) early
             self.bulk_reload_group()
 
-    def should_bulk_offload(self):
+    def should_bulk_offload(self, name):
         """Determine if the current group should be offloaded."""
         assert len(self._groups_to_offload) > 0, "No groups to offload"
-        group = self._groups_to_offload[-1]
+        group = self.find_group_with_name(self._groups_to_offload, name)
+        assert group is not None, f"Group {name} not found in {self._groups_to_offload}"
         debug_rank(f"should_bulk_offload {self.is_warmup} {group.offload}")
         # Don't offload if the chunk is not in warmup stage
         if self.is_warmup:
@@ -949,12 +1012,17 @@ def should_bulk_offload(self):
 
         return True
 
-    def bulk_offload(self, forced_released_tensors):
+    def bulk_offload(self, name, forced_released_tensors):
         """Offload a group of tensors and optionally release their GPU memory."""
         debug_rank("----bulk_offload")
-        if self.should_bulk_offload():
-            self._groups_to_reload.append(self._groups_to_offload[-1])
-            self.bulk_offload_group()
+        if self.should_bulk_offload(name):
+            group_to_offload = self.find_group_with_name(self._groups_to_offload, name)
+            assert (
+                group_to_offload is not None
+            ), f"Group {name} not found in {self._groups_to_offload}"
+            self._groups_to_reload.append(group_to_offload)
+            self.bulk_offload_group(group_to_offload)
+            self._groups_to_offload.remove(group_to_offload)
             # Manually release tensors not auto-freed by torch GC
             if len(forced_released_tensors) > 0:
                 cur_stream = torch.cuda.current_stream()
@@ -964,14 +1032,14 @@ def bulk_offload(self, forced_released_tensors):
                         release_tensor.record_stream(cur_stream)
                         release_tensor.untyped_storage().resize_(0)
 
-    def on_group_commit_forward(self, forced_released_tensors):
+    def on_group_commit_forward(self, name, forced_released_tensors):
         """Called at the end of a layer group's forward pass to trigger offloading."""
         if not self.do_offload:
             return
-        debug_rank("--on_group_commit_forward")
+        debug_rank(f"--on_group_commit_forward {name}")
         # Wait for compute to finish before starting offload
         self.d2h_stream.wait_stream(torch.cuda.current_stream())
-        self.bulk_offload(forced_released_tensors)
+        self.bulk_offload(name, forced_released_tensors)
 
     def bulk_reload(self):
         """Reload the next group of tensors from CPU to GPU."""
@@ -1070,12 +1138,12 @@ def forward(ctx, tensor, cur_forward_chunk, name, forced_released_tensors, delay
         # pylint: disable=missing-function-docstring
         debug_rank("FineGrainedOffloadingGroupCommitFunction forward")
 
-        if delay_offload:
+        if delay_offload and PipelineOffloadManager.get_instance()._in_replay:
             PipelineOffloadManager.get_instance().push_offload_groups(
-                cur_forward_chunk.on_group_commit_forward, forced_released_tensors
+                cur_forward_chunk.on_group_commit_forward, name, forced_released_tensors
             )
         else:
-            cur_forward_chunk.on_group_commit_forward(forced_released_tensors)
+            cur_forward_chunk.on_group_commit_forward(name, forced_released_tensors)
         ctx.cpu_offload_handler = cur_forward_chunk
         ctx.name = name
         return tensor
@@ -1172,13 +1240,6 @@ def fine_grained_offloading_group_start(tensor, name=None):
     return FineGrainedOffloadingGroupStartFunction.apply(tensor, cur_forward_chunk, name)
 
 
-def fine_grained_offloading_forward_record(event: torch.cuda.Event) -> None:
-    """Record the forward event for cuda graph capture."""
-    d2h_stream = PipelineOffloadManager.get_instance().d2h_stream
-    torch.cuda.current_stream().record_event(event)
-    torch.cuda.current_stream().wait_stream(d2h_stream)
-
-
 class FineGrainedOffloadingBackwardRecordFunction(torch.autograd.Function):
     """
     Identity operation that marks the end of a layer group for offload synchronization.
@@ -1186,23 +1247,19 @@ class FineGrainedOffloadingBackwardRecordFunction(torch.autograd.Function):
     """
 
     @staticmethod
-    def forward(ctx, tensor, event: torch.cuda.Event) -> torch.Tensor:
+    def forward(ctx, tensor) -> torch.Tensor:
         """Forward pass for cuda graph capture."""
-        ctx.event = event
+        debug_rank("FineGrainedOffloadingBackwardRecordFunction forward")
         return tensor
 
     @staticmethod
     def backward(ctx, grad_output):
         """Record the backward event and wait for the h2d stream on cuda graph stream."""
-        h2d_stream = PipelineOffloadManager.get_instance().h2d_stream
-        torch.cuda.current_stream().record_event(ctx.event)
-        torch.cuda.current_stream().wait_stream(h2d_stream)
-        return grad_output, None
-
-
-def fine_grained_offloading_backward_record(tensor, event: torch.cuda.Event) -> torch.Tensor:
-    """Record the backward event for cuda graph capture."""
-    return FineGrainedOffloadingBackwardRecordFunction.apply(tensor, event)
+        debug_rank("FineGrainedOffloadingBackwardRecordFunction backward")
+        mgr = PipelineOffloadManager.get_instance()
+        torch.cuda.current_stream().record_event(mgr.cuda_graph_event)
+        torch.cuda.current_stream().wait_stream(mgr.h2d_stream)
+        return (grad_output,)
 
 
 class FineGrainedActivationOffloadingInterface:
@@ -1226,10 +1283,32 @@ def __exit__(self, *args: Any):
             PipelineOffloadManager.get_instance().__exit__()
 
     @staticmethod
-    def init_chunk_handler(vp_size, vp_stage, min_offloaded_tensor_size):
+    def cuda_graph_stream():
+        """Get the CUDA graph stream."""
+        return PipelineOffloadManager.get_instance().cuda_graph_stream
+
+    @staticmethod
+    def cuda_graph_event():
+        """Get the CUDA graph event."""
+        return PipelineOffloadManager.get_instance().cuda_graph_event
+
+    @staticmethod
+    def init_chunk_handler(
+        pp_rank,
+        vp_size,
+        vp_stage,
+        min_offloaded_tensor_size,
+        delta_offload_bytes_across_pp_ranks,
+        activation_offload_fraction,
+    ):
         """Initialize the chunk handler, called at the start of a microbatch forward pass."""
         PipelineOffloadManager.get_instance().init_model_chunk_offload_handler(
-            vp_size, vp_stage, min_offloaded_tensor_size
+            pp_rank,
+            vp_size,
+            vp_stage,
+            min_offloaded_tensor_size,
+            delta_offload_bytes_across_pp_ranks,
+            activation_offload_fraction,
         )
 
     @staticmethod
@@ -1237,25 +1316,32 @@ def get_context(flag):
         """Get the fine-grained offload context"""
         return PipelineOffloadManager.get_instance() if flag else nullcontext()
 
-    @staticmethod
-    def group_commit(tensor, name, forced_released_tensors=None, delay_offload=False):
-        """Group commit the tensors."""
-        return fine_grained_offloading_group_commit(
-            tensor, name, forced_released_tensors, delay_offload
-        )
+    def group_offload(self, tensor, forced_released_tensors=None, delay_offload=False):
+        """Group offload the tensors."""
+        if self.offload:
+            return fine_grained_offloading_group_commit(
+                tensor, self.name, forced_released_tensors, delay_offload
+            )
+        return tensor
 
     @staticmethod
-    def mark_not_offloadable(tensor: torch.Tensor):
+    def mark_not_offload(tensor: torch.Tensor):
         """Mark the tensor as not offloadable."""
-        PipelineOffloadManager.get_instance().mark_not_offloadable(tensor)
+        PipelineOffloadManager.get_instance().mark_not_offload(tensor)
 
     @staticmethod
-    def forward_record(event: torch.cuda.Event) -> None:
+    def forward_record() -> None:
         """Record the forward event for cuda graph capture."""
-        d2h_stream = PipelineOffloadManager.get_instance().d2h_stream
-        torch.cuda.current_stream().record_event(event)
-        torch.cuda.current_stream().wait_stream(d2h_stream)
+        mgr = PipelineOffloadManager.get_instance()
+        torch.cuda.current_stream().record_event(mgr.cuda_graph_event)
+        torch.cuda.current_stream().wait_stream(mgr.d2h_stream)
 
+    @staticmethod
+    def backward_record(tensor) -> torch.Tensor:
+        """Record the backward event for cuda graph capture."""
+        return FineGrainedOffloadingBackwardRecordFunction.apply(tensor)
+
+    @staticmethod
     def reset():
         """Reset the chunk handler."""
         PipelineOffloadManager.get_instance().reset()
@@ -1264,3 +1350,28 @@ def reset():
     def reset_instance():
         """Reset the singleton instance."""
         PipelineOffloadManager.reset_instance()
+
+    @staticmethod
+    def flush_delayed_groups():
+        """Flush the delayed groups."""
+        PipelineOffloadManager.get_instance().flush_delayed_groups()
+
+    @staticmethod
+    def disable_offload():
+        """Disable the offload."""
+        PipelineOffloadManager.get_instance().disable_offload()
+
+    @staticmethod
+    def enable_offload():
+        """Enable the offload."""
+        PipelineOffloadManager.get_instance().enable_offload()
+
+    @staticmethod
+    def enter_replay():
+        """Enter CUDA graph replay mode to enable delayed offloading."""
+        PipelineOffloadManager.get_instance()._in_replay = True
+
+    @staticmethod
+    def exit_replay():
+        """Exit CUDA graph replay mode."""
+        PipelineOffloadManager.get_instance()._in_replay = False
diff --git a/megatron/core/pipeline_parallel/schedules.py b/megatron/core/pipeline_parallel/schedules.py
index ed3794208f0..10de8e0b311 100644
--- a/megatron/core/pipeline_parallel/schedules.py
+++ b/megatron/core/pipeline_parallel/schedules.py
@@ -689,7 +689,7 @@ def forward_backward_no_pipelining(
             force_all_reduce=force_all_reduce,
         )
 
-    if not forward_only and config.fine_grained_activation_offloading:
+    if getattr(config, 'fine_grained_activation_offloading', False):
         off_interface.reset()
 
     if config.timers is not None:
@@ -2062,7 +2062,7 @@ def pp_post_backward(input_tensor_grad, vp_stage=None):
             force_all_reduce=force_all_reduce,
         )
 
-    if not forward_only and config.fine_grained_activation_offloading:
+    if getattr(config, 'fine_grained_activation_offloading', False):
         off_interface.reset()
     # Restore config.grad_sync_func and config.param_sync_func.
     if forward_only:
@@ -2484,7 +2484,7 @@ def enable_grad_sync():
             force_all_reduce=force_all_reduce,
         )
 
-    if not forward_only and config.fine_grained_activation_offloading:
+    if getattr(config, 'fine_grained_activation_offloading', False):
         off_interface.reset()
 
     if config.timers is not None:
diff --git a/megatron/core/transformer/attention.py b/megatron/core/transformer/attention.py
index 3b054ccc4b1..146d7dbda87 100644
--- a/megatron/core/transformer/attention.py
+++ b/megatron/core/transformer/attention.py
@@ -989,18 +989,16 @@ def forward(
         if output_gate:
             assert split_qkv, "output_gate is not supported for unsplit mixed_qkv tensor."
 
-        with off_interface(self.offload_qkv_linear, hidden_states, "qkv_linear") as hidden_states:
+        qkv_linear_manager = off_interface(self.offload_qkv_linear, hidden_states, "qkv_linear")
+        with qkv_linear_manager as hidden_states:
             qkv_output = self.get_query_key_value_tensors(
                 hidden_states,
                 key_value_states,
                 split_qkv=split_qkv,
                 output_gate=self.config.attention_output_gate,
             )
-        if self.offload_qkv_linear:
-            # `qkv_output` may be a tuple; commit supports tuple/list and will keep structure.
-            qkv_output = off_interface.group_commit(
-                qkv_output, name="qkv_linear", forced_released_tensors=[]
-            )
+        # `qkv_output` may be a tuple; commit supports tuple/list and will keep structure.
+        qkv_output = qkv_linear_manager.group_offload(qkv_output, forced_released_tensors=[])
         attn_mask_type = self.attn_mask_type
         block_table = None
         gate = None
@@ -1143,6 +1141,9 @@ def forward(
         # ==================================
 
         nvtx_range_push(suffix="core_attention")
+        core_attn_manager = off_interface(
+            self.offload_core_attention and self.training, query, "core_attn"
+        )
         if self.checkpoint_core_attention and self.training:
             core_attn_out = self._checkpointed_attention_forward(
                 query,
@@ -1156,9 +1157,7 @@ def forward(
         else:
             if inference_context is None or inference_context.is_static_batching():
                 # Static batching attention kernel.
-                with off_interface(
-                    self.offload_core_attention and self.training, query, "core_attn"
-                ) as query:
+                with core_attn_manager as query:
                     core_attn_out = apply_module(self.core_attention)(
                         query,
                         key,
@@ -1194,10 +1193,9 @@ def forward(
                 if is_using_quantization_scales(self.config):
                     core_attn_out[inference_context.padding_slice] = 0.0
 
-            if self.offload_core_attention and self.training:
-                core_attn_out = off_interface.group_commit(
-                    core_attn_out, name="core_attn", forced_released_tensors=[query, key, value]
-                )
+            core_attn_out = core_attn_manager.group_offload(
+                core_attn_out, forced_released_tensors=[query, key, value]
+            )
 
         if packed_seq_params is not None and packed_seq_params.qkv_format == 'thd':
             # reshape to same output shape as unpacked case
@@ -1217,12 +1215,10 @@ def forward(
         # Output. [sq, b, h]
         # =================
         nvtx_range_push(suffix="linear_proj")
-        with off_interface(self.offload_attn_proj, core_attn_out, "attn_proj") as core_attn_out:
+        attn_proj_manager = off_interface(self.offload_attn_proj, core_attn_out, "attn_proj")
+        with attn_proj_manager as core_attn_out:
             output, bias = self.linear_proj(core_attn_out)
-        if self.offload_attn_proj:
-            output = off_interface.group_commit(
-                output, name="attn_proj", forced_released_tensors=[core_attn_out]
-            )
+        output = attn_proj_manager.group_offload(output, forced_released_tensors=[core_attn_out])
         nvtx_range_pop(suffix="linear_proj")
 
         self.pg_collection.cp = _orig_cp_group
diff --git a/megatron/core/transformer/cuda_graphs.py b/megatron/core/transformer/cuda_graphs.py
index 0f7341f253e..27301e30466 100644
--- a/megatron/core/transformer/cuda_graphs.py
+++ b/megatron/core/transformer/cuda_graphs.py
@@ -2177,6 +2177,15 @@ def _get_fp8_enabled():
                     )
             else:
                 kwargs['fp8_enabled'] = False
+
+            from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
+                FineGrainedActivationOffloadingInterface as off_interface,
+            )
+
+            # Disable and enable offloading before and after the warmup stage of cuda graph.
+            if self.config.fine_grained_activation_offloading:
+                kwargs['pre_warmup_hook'] = off_interface.disable_offload
+                kwargs['post_warmup_hook'] = off_interface.enable_offload
             return kwargs
 
         kwargs = get_make_graphed_callables_kwargs()
@@ -2211,6 +2220,12 @@ def _finish_capturing(self, start_time):
         _set_capture_end()
 
         from megatron.core.distributed.finalize_model_grads import reset_model_temporary_tensors
+        from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
+            FineGrainedActivationOffloadingInterface as off_interface,
+        )
+
+        if self.config.fine_grained_activation_offloading:
+            off_interface.reset()
 
         torch.distributed.barrier()
         for model_chunk in self.model:
diff --git a/megatron/core/transformer/module.py b/megatron/core/transformer/module.py
index 6539ee36105..2d588262676 100644
--- a/megatron/core/transformer/module.py
+++ b/megatron/core/transformer/module.py
@@ -322,6 +322,15 @@ def _get_te_cuda_graph_replay_args(self, *args, **kwargs):
 
         cudagraph_kwargs = kwargs.copy()
         cudagraph_kwargs['is_first_microbatch'] = getattr(self, 'current_microbatch', 0) == 0
+        if self.config.fine_grained_activation_offloading and getattr(
+            self, 'offload_module_in_cuda_graph', False
+        ):
+            from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
+                FineGrainedActivationOffloadingInterface as off_interface,
+            )
+
+            cudagraph_kwargs['cuda_graph_stream'] = off_interface.cuda_graph_stream()
+            cudagraph_kwargs['cuda_graph_event'] = off_interface.cuda_graph_event()
         return cudagraph_args, cudagraph_kwargs
 
     def _should_call_local_cudagraph(self, *args, **kwargs):
diff --git a/megatron/core/transformer/moe/experts.py b/megatron/core/transformer/moe/experts.py
index 8168c8ab611..96f7c926db0 100644
--- a/megatron/core/transformer/moe/experts.py
+++ b/megatron/core/transformer/moe/experts.py
@@ -708,7 +708,7 @@ def __init__(
             set_save_original_input(self.linear_fc2)
 
         # This is to avoid the CPU overhead of multiple d2h copies
-        if self.offload_expert_fc1:
+        if self.offload_expert_fc1 and not self.config.fp8:
             from megatron.core.extensions.transformer_engine import set_save_original_input
 
             set_save_original_input(self.linear_fc1)
@@ -998,18 +998,18 @@ def forward(
             # Probs already applied, so reset to 1.
             permuted_probs = torch.ones_like(permuted_probs)
 
-        with off_interface(
+        expert_fc1_manager = off_interface(
             self.offload_expert_fc1, permuted_local_hidden_states, "expert_fc1"
-        ) as permuted_local_hidden_states:
+        )
+        with expert_fc1_manager as permuted_local_hidden_states:
             fc1_output, bias_parallel = apply_module(self.linear_fc1)(
                 permuted_local_hidden_states, tokens_per_expert
             )
-        if self.offload_expert_fc1:
-            fc1_output = off_interface.group_commit(
-                fc1_output,
-                name="expert_fc1",
-                forced_released_tensors=[permuted_local_hidden_states],
-            )
+        fc1_output = expert_fc1_manager.group_offload(
+            fc1_output,
+            forced_released_tensors=[permuted_local_hidden_states],
+            delay_offload=self.config.delay_offload_until_cuda_graph,
+        )
 
         def bias_act_func(intermediate_parallel, bias_parallel, permuted_probs):
 
@@ -1094,14 +1094,15 @@ def glu(x):
                 intermediate_parallel = intermediate_parallel.to(original_dtype)
             return intermediate_parallel
 
+        moe_act_manager = off_interface(self.offload_moe_act, fc1_output, "moe_act")
         if self.activation_recompute:
             self.activation_checkpoint = tensor_parallel.CheckpointWithoutOutput()
-            with off_interface(self.offload_moe_act, fc1_output, "moe_act") as fc1_output:
+            with moe_act_manager as fc1_output:
                 bias_act_output = self.activation_checkpoint.checkpoint(
                     bias_act_func, fc1_output, bias_parallel, permuted_probs
                 )
         else:
-            with off_interface(self.offload_moe_act, fc1_output, "moe_act") as fc1_output:
+            with moe_act_manager as fc1_output:
                 bias_act_output = bias_act_func(fc1_output, bias_parallel, permuted_probs)
 
         output, output_bias = apply_module(self.linear_fc2)(bias_act_output, tokens_per_expert)
@@ -1110,10 +1111,11 @@ def glu(x):
 
         # Delay the offload of the moe act until after the linear_fc2 has been computed
         # to make sure the fc1_output is reloaded to GPU before recomputing moe_act.
-        if self.offload_moe_act:
-            output = off_interface.group_commit(
-                output, name="moe_act", forced_released_tensors=[fc1_output]
-            )
+        output = moe_act_manager.group_offload(
+            output,
+            forced_released_tensors=[fc1_output],
+            delay_offload=self.config.delay_offload_until_cuda_graph,
+        )
         output = self._apply_bias(output, output_bias, tokens_per_expert, permuted_probs)
 
         # upad and concat the output
diff --git a/megatron/core/transformer/multi_latent_attention.py b/megatron/core/transformer/multi_latent_attention.py
index 4b3f876a978..a484d73ebfb 100644
--- a/megatron/core/transformer/multi_latent_attention.py
+++ b/megatron/core/transformer/multi_latent_attention.py
@@ -245,7 +245,8 @@ def forward(
         # Get the query, key and value tensors based on the type of attention -
         # self or cross attn.
         # query: [96, 1, 16, 128], key:[96, 1, 16, 128], value:[96, 1, 16, 128]
-        with off_interface(self.offload_qkv_linear, hidden_states, "qkv_linear") as hidden_states:
+        qkv_linear_manager = off_interface(self.offload_qkv_linear, hidden_states, "qkv_linear")
+        with qkv_linear_manager as hidden_states:
             query, key, value, q_compressed, kv_compressed = self.get_query_key_value_tensors(
                 hidden_states,
                 key_value_states,
@@ -253,10 +254,7 @@ def forward(
                 packed_seq_params,
                 inference_context=inference_context,
             )
-        if self.offload_qkv_linear:
-            query = off_interface.group_commit(
-                query, name="qkv_linear", forced_released_tensors=[hidden_states]
-            )
+        query = qkv_linear_manager.group_offload(query, forced_released_tensors=[hidden_states])
 
         # ===================================================
         # Adjust key, value for inference
@@ -278,6 +276,9 @@ def forward(
         # core attention computation
         # ==================================
         # Need corresponding TE change
+        core_attn_manager = off_interface(
+            self.offload_core_attention and self.training, query, "core_attn"
+        )
         if self.checkpoint_core_attention and self.training:
             core_attn_out = self._checkpointed_attention_forward(
                 query, key, value, attention_mask, packed_seq_params=packed_seq_params
@@ -290,9 +291,7 @@ def forward(
                     # query representation.
                     extra_kwargs["x"] = hidden_states
                     extra_kwargs["qr"] = q_compressed
-                with off_interface(
-                    self.offload_core_attention and self.training, query, "core_attn"
-                ) as query:
+                with core_attn_manager as query:
                     core_attn_out = self.core_attention(
                         query,
                         key,
@@ -322,10 +321,9 @@ def forward(
                 # Only rearrange if not in absorption mode (Flash MLA handles format correctly)
                 if not inference_context.is_decode_only():
                     core_attn_out = rearrange(core_attn_out, 's b h d -> s b (h d)')
-            if self.offload_core_attention and self.training:
-                core_attn_out = off_interface.group_commit(
-                    core_attn_out, name="core_attn", forced_released_tensors=[query, key, value]
-                )
+            core_attn_out = core_attn_manager.group_offload(
+                core_attn_out, forced_released_tensors=[query, key, value]
+            )
 
         # We are doing absorption with cache mla latents and decode mode.
         if self.cache_mla_latents and inference_context.is_decode_only():
@@ -351,12 +349,10 @@ def forward(
         # =================
         # Output. [sq, b, h]
         # =================
-        with off_interface(self.offload_attn_proj, core_attn_out, "attn_proj") as core_attn_out:
+        attn_proj_manager = off_interface(self.offload_attn_proj, core_attn_out, "attn_proj")
+        with attn_proj_manager as core_attn_out:
             output, bias = self.linear_proj(core_attn_out)
-        if self.offload_attn_proj:
-            output = off_interface.group_commit(
-                output, name="attn_proj", forced_released_tensors=[core_attn_out]
-            )
+        output = attn_proj_manager.group_offload(output, forced_released_tensors=[core_attn_out])
 
         return output, bias
 
diff --git a/megatron/core/transformer/transformer_config.py b/megatron/core/transformer/transformer_config.py
index 1874d93e50d..ed382a29ca0 100644
--- a/megatron/core/transformer/transformer_config.py
+++ b/megatron/core/transformer/transformer_config.py
@@ -999,6 +999,21 @@ class TransformerConfig(ModelParallelConfig):
     min_offloaded_tensor_size: int = 1024 * 1024
     """The minimum size of the tensor to be offloaded."""
 
+    delay_offload_until_cuda_graph: bool = False
+    """If True, delay the offload until the CUDA graph is executed for minimal CPU overhead.
+    For more details, see the documentation:
+    https://github.com/NVIDIA/Megatron-LM/blob/main/docs/user-guide/features/fine_grained_activation_offloading.md#cuda-graph-integration.
+    """
+
+    delta_offload_bytes_across_pp_ranks: int = 0
+    """Difference of offload bytes across PP ranks to balance the offload load.
+    For more details, see the documentation:
+    https://github.com/NVIDIA/Megatron-LM/blob/main/docs/user-guide/features/fine_grained_activation_offloading.md#tuning-parameters.
+    """
+
+    activation_offload_fraction: float = 1.0
+    """The fraction of the activation to be offloaded, which should be in range [0, 1]."""
+
     def __post_init__(self):
         """Python dataclass method that is used to modify attributes after initialization.
         See https://docs.python.org/3/library/dataclasses.html#post-init-processing for more
@@ -1475,6 +1490,24 @@ def __post_init__(self):
                     "because the input of attn_proj is the output of core_attn, "
                     "which is needed in core_attn.backward()."
                 )
+            if self.recompute_granularity == "selective" and "moe" in self.recompute_modules:
+                offload_inside_moe = {"moe_act", "expert_fc1"} & set(self.offload_modules)
+                assert not offload_inside_moe, (
+                    f"Cannot offload {offload_inside_moe} while recomputing the entire MoE layer. "
+                    f"'moe' in recompute_modules wraps the full MoE forward in a checkpoint, "
+                    f"so offloading activations inside it is redundant and will cause errors. "
+                    f"Either remove 'moe' from --recompute-modules or remove "
+                    f"{offload_inside_moe} from --offload-modules."
+                )
+            assert (
+                self.min_offloaded_tensor_size >= 0
+            ), "min_offloaded_tensor_size must be non-negative."
+            assert (
+                self.activation_offload_fraction >= 0 and self.activation_offload_fraction <= 1
+            ), "activation_offload_fraction must be in range [0, 1]."
+            assert (
+                self.delta_offload_bytes_across_pp_ranks >= 0
+            ), "delta_offload_bytes_across_pp_ranks must be non-negative."
 
         if (
             self.num_layers_in_first_pipeline_stage is not None
@@ -2009,6 +2042,18 @@ def __post_init__(self):
                             "moe_input_jitter_eps is not supported with graphed moe recomputation."
                         )
 
+            if self.fine_grained_activation_offloading:
+                assert (
+                    self.cuda_graph_impl == "transformer_engine"
+                ), "fine_grained_activation_offloading must be used with TE impl of cuda_graph."
+                assert (
+                    CudaGraphScope.moe not in self.cuda_graph_scope
+                ), "Token-drop MoE is temporarily not supported with activation offloading."
+                assert self.cuda_graph_warmup_steps > 0, (
+                    "cuda_graph_warmup_steps must be greater than 0 when enabling "
+                    "fine-grained activation offloading."
+                )
+
         if self.moe_token_dispatcher_type in ["allgather"]:
             if self.variable_seq_lengths is True:
                 raise ValueError(
diff --git a/megatron/core/transformer/transformer_layer.py b/megatron/core/transformer/transformer_layer.py
index 61e9bb1535b..3350acefa18 100644
--- a/megatron/core/transformer/transformer_layer.py
+++ b/megatron/core/transformer/transformer_layer.py
@@ -33,6 +33,7 @@
     deprecate_inference_params,
     get_pg_rank,
     is_te_min_version,
+    is_torch_min_version,
     log_single_rank,
     make_viewless_tensor,
     nvtx_range_pop,
@@ -45,6 +46,16 @@
 logger = logging.getLogger(__name__)
 
 
+@functools.lru_cache(maxsize=None)
+def _get_offloading_interface():
+    """Get the offloading interface for fine-grained activation offloading."""
+    from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
+        FineGrainedActivationOffloadingInterface,
+    )
+
+    return FineGrainedActivationOffloadingInterface
+
+
 def get_transformer_layer_offset(
     config: TransformerConfig, vp_stage: Optional[int] = None, pp_rank: Optional[int] = None
 ):
@@ -467,17 +478,10 @@ def can_recompute_pre_mlp_layernorm_for_cudagraph():
             if "mlp" in self.config.recompute_modules:
                 if not self.is_moe_layer:
                     self.recompute_mlp = True
-        self.offload_attn_norm = (
-            self.config.fine_grained_activation_offloading
-            and "attn_norm" in self.config.offload_modules
-            and not isinstance(self.input_layernorm, IdentityOp)
-        )
-        self.offload_mlp_norm = (
-            self.config.fine_grained_activation_offloading
-            and "mlp_norm" in self.config.offload_modules
-            and not isinstance(self.pre_mlp_layernorm, IdentityOp)
-        )
 
+        self._set_offload_modules()
+        self.off_interface = _get_offloading_interface()
+        self.mlp_norm_manager = None
         # @jcasper how should we handle nvfuser?
         # Set bias+dropout+add fusion grad_enable execution handler.
         # TORCH_MAJOR = int(torch.__version__.split('.')[0])
@@ -566,10 +570,6 @@ def _forward_attention(
                 context (Tensor): Updated context tensor if cross-attention is used,
                 otherwise None.
         """
-        from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
-            FineGrainedActivationOffloadingInterface as off_interface,
-        )
-
         inference_context = deprecate_inference_params(inference_context, inference_params)
 
         # Residual connection.
@@ -578,14 +578,15 @@ def _forward_attention(
             residual = residual.float()
 
         # Optional Input Layer norm
+        attn_norm_manager = self.off_interface(self.offload_attn_norm, hidden_states, "attn_norm")
         if self.recompute_input_layernorm:
             self.input_layernorm_checkpoint = tensor_parallel.CheckpointWithoutOutput()
-            with off_interface(self.offload_attn_norm, hidden_states, "attn_norm") as hidden_states:
+            with attn_norm_manager as hidden_states:
                 input_layernorm_output = self.input_layernorm_checkpoint.checkpoint(
                     apply_module(self.input_layernorm), hidden_states
                 )
         else:
-            with off_interface(self.offload_attn_norm, hidden_states, "attn_norm") as hidden_states:
+            with attn_norm_manager as hidden_states:
                 input_layernorm_output = apply_module(self.input_layernorm)(hidden_states)
 
         using_fused_tp_inference_kernel = (not self.training) and (
@@ -635,10 +636,9 @@ def _forward_attention(
 
         # Delay the offload of the attention norm until after the self_attn_bda has been computed
         # because the residual is needed in the self_attn_bda.
-        if self.offload_attn_norm:
-            hidden_states = off_interface.group_commit(
-                hidden_states, name="attn_norm", forced_released_tensors=[residual]
-            )
+        hidden_states = attn_norm_manager.group_offload(
+            hidden_states, forced_released_tensors=[residual]
+        )
 
         # Residual connection.
         residual = hidden_states
@@ -690,18 +690,15 @@ def forward(self, *args, **kwargs):
         return output, context
 
     def _forward_pre_mlp_layernorm(self, hidden_states: Tensor):
-        from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
-            FineGrainedActivationOffloadingInterface as off_interface,
-        )
-
+        self.mlp_norm_manager = self.off_interface(self.offload_mlp_norm, hidden_states, "mlp_norm")
         if self.recompute_pre_mlp_layernorm:
             self.pre_mlp_norm_checkpoint = tensor_parallel.CheckpointWithoutOutput()
-            with off_interface(self.offload_mlp_norm, hidden_states, "mlp_norm") as hidden_states:
+            with self.mlp_norm_manager as hidden_states:
                 pre_mlp_layernorm_output = self.pre_mlp_norm_checkpoint.checkpoint(
                     apply_module(self.pre_mlp_layernorm), hidden_states
                 )
         else:
-            with off_interface(self.offload_mlp_norm, hidden_states, "mlp_norm") as hidden_states:
+            with self.mlp_norm_manager as hidden_states:
                 pre_mlp_layernorm_output = apply_module(self.pre_mlp_layernorm)(hidden_states)
 
         return pre_mlp_layernorm_output
@@ -821,9 +818,6 @@ def _forward_post_mlp(
         Returns:
             output (Tensor): Transformed hidden states of shape [s, b, h].
         """
-        from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
-            FineGrainedActivationOffloadingInterface as off_interface,
-        )
 
         using_fused_tp_inference_kernel = (not self.training) and (
             self.config.inference_fuse_tp_communication
@@ -852,10 +846,11 @@ def _forward_post_mlp(
         nvtx_range_pop(suffix="mlp_bda")
         # Delay the offload of the mlp norm until after the mlp_bda has been computed
         # because the residual is needed in the mlp_bda.
-        if self.offload_mlp_norm:
-            hidden_states = off_interface.group_commit(
-                hidden_states, name="mlp_norm", forced_released_tensors=[residual]
+        if self.mlp_norm_manager is not None:
+            hidden_states = self.mlp_norm_manager.group_offload(
+                hidden_states, forced_released_tensors=[residual]
             )
+            self.mlp_norm_manager = None
 
         # Jit compiled function creates 'view' tensor. This tensor
         # potentially gets saved in the MPU checkpoint function context,
@@ -1010,6 +1005,18 @@ def _te_cuda_graph_capture(self, *args, **kwargs):
            attribute can be set to control the scope of the CUDA graph.
         2. If context is None, it cannot be returned as output.
         """
+        # Record the backward event on cuda graph stream in backward pass.
+        # This is to ensure the main stream waits for computing on cuda graph stream to complete,
+        # and overlaps with the H2D transfer on reload stream.
+        if self.offload_module_in_cuda_graph:
+            if len(args) > 0:
+                hidden_states = args[0]
+                hidden_states = self.off_interface.backward_record(hidden_states)
+                args = (hidden_states,) + args[1:]
+            else:
+                hidden_states = kwargs.pop("hidden_states")
+                hidden_states = self.off_interface.backward_record(hidden_states)
+                kwargs["hidden_states"] = hidden_states
         context = None
         if not self.config.cuda_graph_scope or CudaGraphScope.attn in self.config.cuda_graph_scope:
             hidden_states, context = self._forward_attention(*args, **kwargs)
@@ -1037,6 +1044,11 @@ def _te_cuda_graph_capture(self, *args, **kwargs):
             cuda_graph_outputs = list(hidden_states)
         if context is not None:
             cuda_graph_outputs.append(context)
+        # Record the forward event on cuda graph stream for cuda graph capture.
+        # This is to ensure the main stream waits for computing on cuda graph stream to complete,
+        # and overlaps with the D2H transfer on offloading stream.
+        if self.offload_module_in_cuda_graph:
+            self.off_interface.forward_record()
         return tuple(cuda_graph_outputs)
 
     def _te_cuda_graph_replay(self, *args, **kwargs):
@@ -1060,8 +1072,25 @@ def _te_cuda_graph_replay(self, *args, **kwargs):
             "For inference cuda graph, please use cuda_graph_impl=local instead."
         )
 
+        if self.config.delay_offload_until_cuda_graph:
+            self.off_interface.enter_replay()
+
+        try:
+            return self._te_cuda_graph_replay_impl(args, kwargs, context)
+        finally:
+            if self.config.delay_offload_until_cuda_graph:
+                self.off_interface.exit_replay()
+
+    def _te_cuda_graph_replay_impl(self, args, kwargs, context):
+        """Implementation of _te_cuda_graph_replay, separated for replay mode cleanup."""
         cuda_graph_output = list(super()._te_cuda_graph_replay(*args, **kwargs))
 
+        # Flush delayed offload groups from previous layers after graph replay.
+        # The CPU is idle during the sync between graph replay and a2a comm,
+        # so we use that time to execute the delayed offload operations.
+        if self.config.delay_offload_until_cuda_graph:
+            self.off_interface.flush_delayed_groups()
+
         if kwargs.get('context') is not None:
             context = cuda_graph_output.pop()
 
@@ -1261,6 +1290,83 @@ def __call__(self, *args, **kwargs):
 
         return super().__call__(*args, **kwargs)
 
+    def _set_offload_modules(self):
+        """Set the offload modules for the transformer layer."""
+        if self.config.fine_grained_activation_offloading:
+            self.offload_attn_norm = "attn_norm" in self.config.offload_modules and not isinstance(
+                self.input_layernorm, IdentityOp
+            )
+            self.offload_qkv_linear = "qkv_linear" in self.config.offload_modules
+            self.offload_core_attn = "core_attn" in self.config.offload_modules
+            self.offload_attn_proj = "attn_proj" in self.config.offload_modules
+            self.offload_mlp_norm = "mlp_norm" in self.config.offload_modules and not isinstance(
+                self.pre_mlp_layernorm, IdentityOp
+            )
+            self.offload_expert_fc1 = "expert_fc1" in self.config.offload_modules
+            self.offload_moe_act = "moe_act" in self.config.offload_modules
+        else:
+            self.offload_attn_norm = False
+            self.offload_qkv_linear = False
+            self.offload_core_attn = False
+            self.offload_attn_proj = False
+            self.offload_mlp_norm = False
+            self.offload_expert_fc1 = False
+            self.offload_moe_act = False
+        # Check the compatibility of fine-grained activation offloading and cuda graph.
+        if self.config.fine_grained_activation_offloading:
+            if CudaGraphScope.attn in self.config.cuda_graph_scope:
+                self.offload_attn_norm = False
+                log_single_rank(
+                    logger,
+                    logging.WARNING,
+                    "attn_norm offloading is not supported with attn cudagraph. "
+                    "Disabling attn_norm offloading.",
+                )
+            mark_mlp_norm_offloading_not_supported = False
+            # For moe layer, mlp_norm offloading isn't supported with attn or moe_router cudagraph.
+            if self.is_moe_layer:
+                if (
+                    CudaGraphScope.attn in self.config.cuda_graph_scope
+                    or CudaGraphScope.moe_router in self.config.cuda_graph_scope
+                ):
+                    mark_mlp_norm_offloading_not_supported = True
+            # For non-moe layer, mlp_norm is the boundary of attn or mlp cudagraph.
+            # The only case where mlp_norm offloading is supported is when whole layer is captured.
+            elif (
+                CudaGraphScope.attn in self.config.cuda_graph_scope
+                and CudaGraphScope.mlp not in self.config.cuda_graph_scope
+            ) or (
+                CudaGraphScope.attn not in self.config.cuda_graph_scope
+                and CudaGraphScope.mlp in self.config.cuda_graph_scope
+            ):
+                mark_mlp_norm_offloading_not_supported = True
+            if mark_mlp_norm_offloading_not_supported:
+                self.offload_mlp_norm = False
+                log_single_rank(
+                    logger,
+                    logging.WARNING,
+                    "mlp_norm offloading is not supported with the current cudagraph scope. "
+                    "Disabling mlp_norm offloading.",
+                )
+        # Set the offload module in cuda graph flag.
+        self.offload_module_in_cuda_graph = False
+        if CudaGraphScope.attn in self.config.cuda_graph_scope:
+            if self.offload_core_attn or self.offload_attn_proj or self.offload_qkv_linear:
+                self.offload_module_in_cuda_graph = True
+        if not self.is_moe_layer and CudaGraphScope.mlp in self.config.cuda_graph_scope:
+            if self.offload_mlp_norm:
+                self.offload_module_in_cuda_graph = True
+        if self.offload_module_in_cuda_graph:
+            assert is_torch_min_version(
+                "2.9.0a0"
+            ), "Offloading modules captured in cuda graph requires torch>=2.9.0."
+            assert is_te_min_version(
+                "2.14.0"
+            ), "Offloading modules captured in cuda graph requires TE>=2.14.0."
+            assert (
+                self.config.cuda_graph_warmup_steps > 0
+            ), "Fine-grained activation offloading needs cuda_graph_warmup_steps > 0."
+
     def get_layer_norm_weights(self):
         """
         Get the weights of all layernorms (attention and MLP) in the transformer layer.
@@ -1404,10 +1510,6 @@ def _forward_attention(
         *,
         inference_params: Optional[Any] = None,
     ):
-        from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
-            FineGrainedActivationOffloadingInterface as off_interface,
-        )
-
         """Forward attention with hyper connection pre/post processing on self-attention."""
         inference_context = deprecate_inference_params(inference_context, inference_params)
 
@@ -1423,16 +1525,17 @@ def _forward_attention(
         checkpoint_input_layernorm = self.recompute_input_layernorm or (
             mhc_recompute_manager is not None and self.mhc_checkpoint_input_layernorm
         )
+        attn_norm_manager = self.off_interface(self.offload_attn_norm, hidden_states, "attn_norm")
         if checkpoint_input_layernorm:
             self.input_layernorm_checkpoint = tensor_parallel.CheckpointWithoutOutput(
                 ckpt_manager=mhc_recompute_manager
             )
-            with off_interface(self.offload_attn_norm, hidden_states, "attn_norm") as hidden_states:
+            with attn_norm_manager as hidden_states:
                 input_layernorm_output = self.input_layernorm_checkpoint.checkpoint(
                     self.input_layernorm, hidden_states
                 )
         else:
-            with off_interface(self.offload_attn_norm, hidden_states, "attn_norm") as hidden_states:
+            with attn_norm_manager as hidden_states:
                 input_layernorm_output = self.input_layernorm(hidden_states)
 
         # Self attention.
@@ -1470,8 +1573,7 @@ def _forward_attention(
             )
         nvtx_range_pop(suffix="self_attention_fused_h_res_h_post_bda")
 
-        if self.offload_attn_norm:
-            hidden_states = off_interface.group_commit(hidden_states, name="attn_norm")
+        hidden_states = attn_norm_manager.group_offload(hidden_states)
 
         # Cross-attention (no hyper connection support).
         residual = hidden_states
@@ -1501,10 +1603,6 @@ def _forward_mlp(
         padding_mask=None,
         mhc_recompute_manager: Optional['CheckpointManager'] = None,
     ):
-        from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
-            FineGrainedActivationOffloadingInterface as off_interface,
-        )
-
         """Forward MLP with hyper connection pre/post processing."""
         is_last_in_recompute_block = bool(
             mhc_recompute_manager is not None
@@ -1524,16 +1622,17 @@ def _forward_mlp(
         checkpoint_pre_mlp_layernorm = self.recompute_pre_mlp_layernorm or (
             mhc_recompute_manager is not None and self.mhc_checkpoint_pre_mlp_layernorm
         )
+        self.mlp_norm_manager = self.off_interface(self.offload_mlp_norm, hidden_states, "mlp_norm")
         if checkpoint_pre_mlp_layernorm:
             self.pre_mlp_norm_checkpoint = tensor_parallel.CheckpointWithoutOutput(
                 ckpt_manager=mhc_recompute_manager
             )
-            with off_interface(self.offload_mlp_norm, hidden_states, "mlp_norm") as hidden_states:
+            with self.mlp_norm_manager as hidden_states:
                 pre_mlp_layernorm_output = self.pre_mlp_norm_checkpoint.checkpoint(
                     self.pre_mlp_layernorm, hidden_states
                 )
         else:
-            with off_interface(self.offload_mlp_norm, hidden_states, "mlp_norm") as hidden_states:
+            with self.mlp_norm_manager as hidden_states:
                 pre_mlp_layernorm_output = self.pre_mlp_layernorm(hidden_states)
 
         nvtx_range_push(suffix="mlp")
@@ -1624,12 +1723,7 @@ def _forward_post_mlp_with_fused_hyper_connection(
             )
         nvtx_range_pop(suffix="mlp_fused_h_res_h_post_bda")
 
-        if self.offload_mlp_norm:
-            from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
-                FineGrainedActivationOffloadingInterface as off_interface,
-            )
-
-            hidden_states = off_interface.group_commit(hidden_states, name="mlp_norm")
+        hidden_states = self.mlp_norm_manager.group_offload(hidden_states)
 
         output = make_viewless_tensor(
             inp=hidden_states, requires_grad=hidden_states.requires_grad, keep_graph=True
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_dgx_h100.json
index d5ced620365..8fbe219530d 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_dgx_h100.json
@@ -341,4 +341,4 @@
             "50": 1.89832
         }
     }
-}
\ No newline at end of file
+}
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_dgx_h100.json
index 57848f8130e..03c8cb800c9 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_dgx_h100.json
@@ -284,4 +284,4 @@
             "50": 1.93018
         }
     }
-}
\ No newline at end of file
+}
diff --git a/tests/unit_tests/models/test_mamba_moe_model.py b/tests/unit_tests/models/test_mamba_moe_model.py
index bf3eb9b198b..6536343f0d5 100644
--- a/tests/unit_tests/models/test_mamba_moe_model.py
+++ b/tests/unit_tests/models/test_mamba_moe_model.py
@@ -280,6 +280,9 @@
     "fine_grained_activation_offloading": False,
     "min_offloaded_tensor_size": 1024 * 1024,
     "offload_modules": [],
+    "delay_offload_until_cuda_graph": False,
+    "delta_offload_bytes_across_pp_ranks": 0,
+    "activation_offload_fraction": 1.0,
     "dynamic_context_parallel": False,
     "hybrid_context_parallel": False,
     "max_seqlen_per_dp_cp_rank": None,
diff --git a/tests/unit_tests/pipeline_parallel/test_fine_grained_activation_offloading.py b/tests/unit_tests/pipeline_parallel/test_fine_grained_activation_offloading.py
index 558c6934a0c..419bad3aef4 100644
--- a/tests/unit_tests/pipeline_parallel/test_fine_grained_activation_offloading.py
+++ b/tests/unit_tests/pipeline_parallel/test_fine_grained_activation_offloading.py
@@ -318,7 +318,6 @@ def test_gpt_fine_grained_activation_offloading_correctness_and_memory(
         ("alltoall", True, ["mlp_norm"]),
         ("alltoall", False, ["expert_fc1"]),
         ("alltoall", False, ["moe_act"]),
-        ("alltoall", False, ["mlp_norm", "expert_fc1", "moe_act"]),
         (
             "alltoall",
             True,
@@ -571,3 +570,338 @@ def _run_schedule_1f1b_two_microbatches(
                     )
     finally:
         Utils.destroy_model_parallel()
+
+
+# =============================================================================
+# CUDA Graph + Fine-grained Activation Offloading Tests
+# =============================================================================
+
+
+def _build_gpt_model_with_cuda_graph(
+    *,
+    seed: int,
+    num_layers: int,
+    hidden_size: int,
+    num_attention_heads: int,
+    vocab_size: int,
+    seq_length: int,
+    num_experts: Optional[int],
+    fine_grained_activation_offloading: bool,
+    offload_modules: Optional[List[str]],
+    min_offloaded_tensor_size: int,
+    is_mla: bool,
+    cuda_graph_impl: str,
+    cuda_graph_scope: Optional[List[str]],
+    cuda_graph_warmup_steps: int,
+    delay_offload_until_cuda_graph: bool = False,
+    activation_offload_fraction: float = 1.0,
+) -> GPTModel:
+    """Build a GPTModel with CUDA Graph support and fine-grained activation offloading."""
+    model_parallel_cuda_manual_seed(seed)
+    torch.manual_seed(seed)
+    ConfigClass = MLATransformerConfig if is_mla else TransformerConfig
+    transformer_config = ConfigClass(
+        num_layers=num_layers,
+        hidden_size=hidden_size,
+        num_attention_heads=num_attention_heads,
+        use_cpu_initialization=True,
+        attention_backend=AttnBackend.unfused,
+        bf16=True,
+        # Recompute
+        recompute_modules=["layernorm", "moe_act"] if num_experts is not None else ["layernorm"],
+        recompute_granularity="selective",
+        # MoE
+        num_moe_experts=num_experts,
+        moe_grouped_gemm=(num_experts is not None),
+        # Fine-grained activation offloading
+        fine_grained_activation_offloading=fine_grained_activation_offloading,
+        offload_modules=offload_modules,
+        min_offloaded_tensor_size=min_offloaded_tensor_size,
+        delay_offload_until_cuda_graph=delay_offload_until_cuda_graph,
+        activation_offload_fraction=activation_offload_fraction,
+        # CUDA Graph settings
+        cuda_graph_impl=cuda_graph_impl,
+        cuda_graph_scope=cuda_graph_scope,
+        cuda_graph_warmup_steps=cuda_graph_warmup_steps,
+        use_te_rng_tracker=True,
+    )
+    gpt_model = GPTModel(
+        config=transformer_config,
+        transformer_layer_spec=get_gpt_layer_with_transformer_engine_spec(
+            num_experts=num_experts,
+            moe_grouped_gemm=num_experts is not None,
+            moe_use_legacy_grouped_gemm=False,
+            multi_latent_attention=is_mla,
+        ),
+        vocab_size=vocab_size,
+        max_sequence_length=seq_length,
+    ).bfloat16()
+    return gpt_model
+
+
+def _run_iters_with_cuda_graph(
+    model: GPTModel,
+    *,
+    input_ids: torch.Tensor,
+    position_ids: torch.Tensor,
+    attention_mask: torch.Tensor,
+    num_warmup_iters: int,
+    num_measure_iters: int,
+    enable_offload_reset: bool,
+) -> Tuple[torch.Tensor, Dict[str, torch.Tensor], int]:
+    """
+    Run multiple forward+backward iterations with CUDA graph capture.
+
+    Returns:
+      - logits from last iteration (CPU float32)
+      - selected grads from last iteration (CPU float32)
+      - peak_memory_allocated (bytes) during measurement iterations
+    """
+    from megatron.core.transformer.cuda_graphs import _CudagraphGlobalRecord, delete_cuda_graphs
+
+    if enable_offload_reset:
+        off_interface.reset()
+
+    # Warmup iterations (before CUDA graph capture)
+    for _ in range(num_warmup_iters):
+        if enable_offload_reset:
+            off_interface.reset()
+        logits = model(
+            input_ids=input_ids, position_ids=position_ids, attention_mask=attention_mask
+        )
+        loss = logits.float().sum()
+        loss.backward()
+        # Zero grads for next iteration
+        for p in model.parameters():
+            if p.grad is not None:
+                p.grad.zero_()
+
+    # Trigger post-warmup offload decisions
+    if enable_offload_reset:
+        off_interface.reset()
+
+    # Create CUDA graphs after warmup
+    _CudagraphGlobalRecord.create_cudagraphs()
+
+    # Measurement iterations (with CUDA graph replay)
+    torch.cuda.reset_peak_memory_stats()
+    for i in range(num_measure_iters):
+        if enable_offload_reset:
+            off_interface.reset()
+        logits = model(
+            input_ids=input_ids, position_ids=position_ids, attention_mask=attention_mask
+        )
+        loss = logits.float().sum()
+        loss.backward()
+        if i < num_measure_iters - 1:
+            for p in model.parameters():
+                if p.grad is not None:
+                    p.grad.zero_()
+
+    torch.cuda.synchronize()
+    peak_bytes = int(torch.cuda.max_memory_allocated())
+
+    # Capture grads from last iteration
+    grads: Dict[str, torch.Tensor] = {}
+    for name, p in model.named_parameters():
+        grads[name] = p.grad.detach().float().cpu() if p.grad is not None else None
+
+    # Cleanup CUDA graphs
+    delete_cuda_graphs()
+
+    return logits.detach().float().cpu(), grads, peak_bytes
+
+
+@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA is required for offloading tests.")
+@pytest.mark.skipif(
+    not is_te_min_version("2.14.0"), reason="CUDA Graph with TE RNG tracker requires TE >= 2.13.0"
+)
+@pytest.mark.parametrize(
+    "is_mla, offload_modules, cuda_graph_scope, activation_offload_fraction, delay_offload",
+    [
+        # MoE model with attention CUDA graph + attn offloading
+        (False, ["core_attn", "attn_proj"], ["attn", "moe_router"], 1.0, True),
+        (False, ["expert_fc1", "moe_act"], ["attn", "moe_router", "moe_preprocess"], 1.0, True),
+        (False, ["core_attn", "attn_proj", "expert_fc1"], ["attn", "moe_router"], 1.0, True),
+        (
+            False,
+            ["core_attn", "attn_proj", "expert_fc1", "moe_act"],
+            ["attn", "moe_router"],
+            1.0,
+            True,
+        ),
+        (
+            False,
+            ["core_attn", "expert_fc1", "moe_act"],
+            ["attn", "moe_router", "moe_preprocess"],
+            1.0,
+            True,
+        ),
+        (
+            True,
+            ["core_attn", "attn_proj", "expert_fc1", "moe_act"],
+            ["attn", "moe_router", "moe_preprocess"],
+            1.0,
+            True,
+        ),
+        # Test activation_offload_fraction parameter
+        (False, ["core_attn", "attn_proj", "expert_fc1"], ["attn", "moe_router"], 0.0, True),
+        (False, ["core_attn", "attn_proj", "expert_fc1"], ["attn", "moe_router"], 0.5, True),
+        # Test delay_offload_until_cuda_graph parameter
+        (False, ["core_attn", "attn_proj", "expert_fc1"], ["attn", "moe_router"], 1.0, False),
+    ],
+)
+def test_fine_grained_activation_offloading_with_cuda_graph(
+    is_mla: bool,
+    offload_modules: List[str],
+    cuda_graph_scope: List[str],
+    activation_offload_fraction: float,
+    delay_offload: bool,
+):
+    """
+    Test fine-grained activation offloading combined with CUDA graph capture.
+
+    Verifies:
+    - Forward output correctness with CUDA graph + offloading
+    - Backward gradient correctness
+    - Memory savings from offloading are preserved with CUDA graphs
+    - Different activation_offload_fraction values work correctly
+    - Both delay_offload_until_cuda_graph=True/False produce correct results
+    """
+    from megatron.core.tensor_parallel.random import initialize_rng_tracker
+
+    os.environ.pop("NVTE_FUSED_ATTN", None)
+    os.environ.pop("NVTE_FLASH_ATTN", None)
+    os.environ.pop("NVTE_UNFUSED_ATTN", None)
+
+    initialize_rng_tracker(use_te_rng_tracker=True, force_reset=True)
+    Utils.initialize_model_parallel(tensor_model_parallel_size=1, pipeline_model_parallel_size=1)
+
+    seed = 123
+    num_experts = 4  # Always MoE model
+    num_layers = 4  # Smaller for faster test with CUDA graphs
+    hidden_size = 1024
+    num_attention_heads = 8
+    vocab_size = 512
+    seq_length = 512
+    micro_batch_size = 2
+    device = torch.device("cuda")
+    cuda_graph_warmup_steps = 3
+
+    input_ids, position_ids, attention_mask = _make_gpt_inputs(
+        seq_length=seq_length, micro_batch_size=micro_batch_size, device=device
+    )
+
+    off_interface.reset_instance()
+
+    try:
+        # 1) Baseline: CUDA graph enabled, offloading disabled
+        _reset_cuda_memory()
+        base_model = _build_gpt_model_with_cuda_graph(
+            seed=seed,
+            num_layers=num_layers,
+            hidden_size=hidden_size,
+            num_attention_heads=num_attention_heads,
+            vocab_size=vocab_size,
+            seq_length=seq_length,
+            num_experts=num_experts,
+            fine_grained_activation_offloading=False,
+            offload_modules=None,
+            min_offloaded_tensor_size=1024 * 1024,
+            is_mla=is_mla,
+            cuda_graph_impl="transformer_engine",
+            cuda_graph_scope=cuda_graph_scope,
+            cuda_graph_warmup_steps=cuda_graph_warmup_steps,
+        ).cuda()
+        base_model.train()
+
+        base_logits, base_grads, base_peak = _run_iters_with_cuda_graph(
+            base_model,
+            input_ids=input_ids,
+            position_ids=position_ids,
+            attention_mask=attention_mask,
+            num_warmup_iters=cuda_graph_warmup_steps,
+            num_measure_iters=2,
+            enable_offload_reset=False,
+        )
+        del base_model
+        _reset_cuda_memory()
+
+        # 2) Test: CUDA graph enabled + offloading enabled
+        off_interface.reset_instance()
+
+        off_model = _build_gpt_model_with_cuda_graph(
+            seed=seed,
+            num_layers=num_layers,
+            hidden_size=hidden_size,
+            num_attention_heads=num_attention_heads,
+            vocab_size=vocab_size,
+            seq_length=seq_length,
+            num_experts=num_experts,
+            fine_grained_activation_offloading=True,
+            offload_modules=offload_modules,
+            min_offloaded_tensor_size=1024,  # Force offloading for determinism
+            is_mla=is_mla,
+            cuda_graph_impl="transformer_engine",
+            cuda_graph_scope=cuda_graph_scope,
+            cuda_graph_warmup_steps=cuda_graph_warmup_steps,
+            delay_offload_until_cuda_graph=delay_offload,
+            activation_offload_fraction=activation_offload_fraction,
+        ).cuda()
+        off_model.train()
+
+        off_logits, off_grads, off_peak = _run_iters_with_cuda_graph(
+            off_model,
+            input_ids=input_ids,
+            position_ids=position_ids,
+            attention_mask=attention_mask,
+            num_warmup_iters=cuda_graph_warmup_steps,
+            num_measure_iters=2,
+            enable_offload_reset=True,
+        )
+        del off_model
+        _reset_cuda_memory()
+
+        # 3) Correctness checks
+        assert torch.allclose(
+            off_logits, base_logits, rtol=1e-2, atol=1e-2
+        ), f"Logits mismatch: max_diff={torch.max(torch.abs(off_logits - base_logits))}"
+        assert set(off_grads.keys()) == set(base_grads.keys())
+        for name, gb in base_grads.items():
+            go = off_grads[name]
+            if gb is None or go is None:
+                assert gb is None and go is None, f"Grad None mismatch for {name}"
+                continue
+            assert torch.allclose(
+                go, gb, rtol=1e-2, atol=1e-2
+            ), f"Grad mismatch for {name}: max_diff={torch.max(torch.abs(go - gb))}"
+
+        # 4) Memory checks - offloading should still reduce memory with CUDA graphs
+        saved_mib = (base_peak - off_peak) / (1024**2)
+        print(
+            f"CUDA Graph + Offload test (fraction={activation_offload_fraction}, delay={delay_offload}): "
+            f"base_peak={base_peak/(1024**2):.2f}MiB, "
+            f"off_peak={off_peak/(1024**2):.2f}MiB, "
+            f"saved={saved_mib:.2f}MiB"
+        )
+
+        # Basic sanity checks
+        assert not torch.isnan(off_logits).any(), "NaN detected in logits"
+        assert not torch.isinf(off_logits).any(), "Inf detected in logits"
+
+        # Check gradients are valid
+        for name, g in off_grads.items():
+            if g is not None:
+                assert not torch.isnan(g).any(), f"NaN detected in grad for {name}"
+                assert not torch.isinf(g).any(), f"Inf detected in grad for {name}"
+
+        # Note: With CUDA graphs, memory behavior may differ from eager mode.
+        # We check that offloading doesn't significantly increase memory.
+        # In some cases, graph capture overhead may offset offload savings.
+        assert saved_mib >= -DELTA, (
+            f"Offloading with CUDA graph significantly increased memory: "
+            f"saved={saved_mib:.2f}MiB (negative means increase)"
+        )
+
+    finally:
+        Utils.destroy_model_parallel()
diff --git a/tests/unit_tests/transformer/test_transformer_layer.py b/tests/unit_tests/transformer/test_transformer_layer.py
index 995e99d6a24..c80b8f14480 100644
--- a/tests/unit_tests/transformer/test_transformer_layer.py
+++ b/tests/unit_tests/transformer/test_transformer_layer.py
@@ -1009,7 +1009,9 @@ def test_forward_backward_with_offloading(self):
         attention_mask = torch.ones((1, 1, seq_len, seq_len), dtype=bool, device='cuda')
 
         mgr = PipelineOffloadManager.get_instance()
-        mgr.init_model_chunk_offload_handler(vp_size=1, vp_stage=0, min_offloaded_tensor_size=0)
+        mgr.init_model_chunk_offload_handler(
+            pp_rank=0, vp_size=1, vp_stage=0, min_offloaded_tensor_size=0
+        )
 
         output, context = layer(hidden_states=hidden_states, attention_mask=attention_mask)
 
@@ -1078,7 +1080,9 @@ def test_offloading_numerical_equivalence(self):
         layer_offload.train()
 
         mgr = PipelineOffloadManager.get_instance()
-        mgr.init_model_chunk_offload_handler(vp_size=1, vp_stage=0, min_offloaded_tensor_size=0)
+        mgr.init_model_chunk_offload_handler(
+            pp_rank=0, vp_size=1, vp_stage=0, min_offloaded_tensor_size=0
+        )
 
         h2 = input_data.clone().detach().requires_grad_(True)
         out2, _ = layer_offload(hidden_states=h2, attention_mask=attention_mask)

From 9c0b6efa9412e6171047d7171dc45503f0545d52 Mon Sep 17 00:00:00 2001
From: Deyu Fu <deyuf@nvidia.com>
Date: Mon, 30 Mar 2026 16:07:08 +0800
Subject: [PATCH 327/334] update golden value for
 gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer,
 respecting dev

Signed-off-by: Deyu Fu <deyuf@nvidia.com>
---
 .../golden_values_dev_dgx_h100.json           | 784 +++++++++---------
 1 file changed, 392 insertions(+), 392 deletions(-)

diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json
index f529a646a7e..9533c3e29a1 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json
@@ -8,102 +8,102 @@
             "2": 10.91072,
             "3": 10.91895,
             "4": 10.91763,
-            "5": 10.90484,
-            "6": 10.90203,
-            "7": 10.89753,
-            "8": 10.91294,
-            "9": 10.91701,
-            "10": 10.91028,
-            "11": 10.90124,
-            "12": 10.89698,
-            "13": 10.88788,
-            "14": 10.89478,
-            "15": 10.87488,
-            "16": 10.87022,
-            "17": 10.86892,
-            "18": 10.85196,
-            "19": 10.87008,
-            "20": 10.7881,
-            "21": 10.77222,
-            "22": 10.7669,
-            "23": 10.75865,
-            "24": 10.71955,
-            "25": 10.71987,
-            "26": 10.71249,
-            "27": 10.68554,
-            "28": 10.61292,
-            "29": 10.58664,
-            "30": 10.56554,
-            "31": 10.55749,
-            "32": 10.54875,
-            "33": 10.50948,
-            "34": 10.48165,
-            "35": 10.46995,
-            "36": 10.45309,
-            "37": 10.42791,
-            "38": 10.43268,
-            "39": 10.40324,
-            "40": 10.3773,
-            "41": 10.36856,
-            "42": 10.33125,
-            "43": 10.31537,
-            "44": 10.29014,
-            "45": 10.30253,
-            "46": 10.26536,
-            "47": 10.25557,
-            "48": 10.20689,
-            "49": 10.21031,
-            "50": 10.2105,
-            "51": 10.21191,
-            "52": 10.16277,
-            "53": 10.16315,
-            "54": 10.13391,
-            "55": 10.10867,
-            "56": 10.13455,
+            "5": 10.90462,
+            "6": 10.90222,
+            "7": 10.89756,
+            "8": 10.91282,
+            "9": 10.91678,
+            "10": 10.9104,
+            "11": 10.9015,
+            "12": 10.89781,
+            "13": 10.8883,
+            "14": 10.89516,
+            "15": 10.87477,
+            "16": 10.87004,
+            "17": 10.86866,
+            "18": 10.85186,
+            "19": 10.87023,
+            "20": 10.78833,
+            "21": 10.7724,
+            "22": 10.76686,
+            "23": 10.75821,
+            "24": 10.71892,
+            "25": 10.72027,
+            "26": 10.71214,
+            "27": 10.68529,
+            "28": 10.61314,
+            "29": 10.58641,
+            "30": 10.56586,
+            "31": 10.5575,
+            "32": 10.5488,
+            "33": 10.50937,
+            "34": 10.48155,
+            "35": 10.47006,
+            "36": 10.45297,
+            "37": 10.42758,
+            "38": 10.43258,
+            "39": 10.40282,
+            "40": 10.37727,
+            "41": 10.36865,
+            "42": 10.33123,
+            "43": 10.31512,
+            "44": 10.29023,
+            "45": 10.30268,
+            "46": 10.26547,
+            "47": 10.25564,
+            "48": 10.20686,
+            "49": 10.21056,
+            "50": 10.21037,
+            "51": 10.21194,
+            "52": 10.16248,
+            "53": 10.16319,
+            "54": 10.13395,
+            "55": 10.10854,
+            "56": 10.13474,
             "57": 10.13262,
-            "58": 10.12407,
-            "59": 10.06503,
-            "60": 10.09528,
-            "61": 10.04743,
-            "62": 10.01537,
-            "63": 10.08286,
-            "64": 10.03273,
-            "65": 9.99833,
-            "66": 10.03902,
-            "67": 10.01293,
-            "68": 9.97751,
-            "69": 9.99331,
-            "70": 9.97079,
-            "71": 9.99817,
-            "72": 9.97548,
-            "73": 9.95979,
-            "74": 9.95289,
-            "75": 9.91425,
-            "76": 9.9499,
-            "77": 9.94212,
-            "78": 9.89883,
-            "79": 9.89693,
-            "80": 9.91029,
-            "81": 9.93356,
-            "82": 9.88352,
-            "83": 9.83982,
-            "84": 9.78195,
-            "85": 9.76266,
-            "86": 9.87794,
-            "87": 9.90072,
-            "88": 9.87398,
-            "89": 9.82485,
-            "90": 9.81362,
-            "91": 9.8199,
-            "92": 9.81611,
-            "93": 9.74343,
-            "94": 9.82156,
-            "95": 9.8122,
-            "96": 9.79476,
-            "97": 9.74624,
-            "98": 9.76879,
-            "99": 9.81836,
-            "100": 9.7074
+            "58": 10.124,
+            "59": 10.06483,
+            "60": 10.09511,
+            "61": 10.04736,
+            "62": 10.01513,
+            "63": 10.08268,
+            "64": 10.03239,
+            "65": 9.99804,
+            "66": 10.03859,
+            "67": 10.01247,
+            "68": 9.97703,
+            "69": 9.9927,
+            "70": 9.97031,
+            "71": 9.99747,
+            "72": 9.97476,
+            "73": 9.95896,
+            "74": 9.95212,
+            "75": 9.9133,
+            "76": 9.94908,
+            "77": 9.94119,
+            "78": 9.89795,
+            "79": 9.89601,
+            "80": 9.90926,
+            "81": 9.93266,
+            "82": 9.8826,
+            "83": 9.83875,
+            "84": 9.78078,
+            "85": 9.76158,
+            "86": 9.87689,
+            "87": 9.89972,
+            "88": 9.87298,
+            "89": 9.82372,
+            "90": 9.81265,
+            "91": 9.81889,
+            "92": 9.81491,
+            "93": 9.74217,
+            "94": 9.82042,
+            "95": 9.81103,
+            "96": 9.79363,
+            "97": 9.74488,
+            "98": 9.76721,
+            "99": 9.81701,
+            "100": 9.70593
         }
     },
     "num-zeros": {
@@ -111,106 +111,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 2589.0,
-            "2": 2610.0,
-            "3": 2532.0,
-            "4": 2530.0,
-            "5": 2535.0,
-            "6": 2504.0,
-            "7": 2664.0,
-            "8": 2529.0,
-            "9": 2641.0,
-            "10": 2550.0,
-            "11": 2654.0,
-            "12": 2438.0,
-            "13": 2617.0,
-            "14": 2645.0,
-            "15": 2328.0,
-            "16": 2493.0,
-            "17": 2550.0,
-            "18": 2599.0,
-            "19": 2441.0,
-            "20": 2491.0,
-            "21": 2583.0,
-            "22": 2562.0,
-            "23": 2470.0,
-            "24": 2588.0,
-            "25": 2439.0,
-            "26": 2535.0,
-            "27": 2589.0,
-            "28": 2534.0,
-            "29": 2637.0,
-            "30": 2716.0,
-            "31": 2705.0,
-            "32": 2812.0,
-            "33": 2835.0,
-            "34": 2727.0,
-            "35": 2870.0,
-            "36": 2698.0,
-            "37": 2921.0,
-            "38": 2783.0,
-            "39": 2848.0,
-            "40": 3037.0,
-            "41": 3154.0,
-            "42": 2864.0,
-            "43": 3103.0,
-            "44": 3123.0,
-            "45": 3271.0,
-            "46": 3208.0,
-            "47": 3206.0,
-            "48": 3309.0,
-            "49": 3457.0,
-            "50": 3466.0,
-            "51": 3276.0,
-            "52": 3448.0,
-            "53": 3254.0,
-            "54": 3504.0,
-            "55": 3230.0,
-            "56": 3568.0,
-            "57": 2933.0,
-            "58": 4052.0,
-            "59": 3626.0,
-            "60": 3510.0,
-            "61": 3371.0,
-            "62": 3642.0,
-            "63": 4019.0,
-            "64": 4041.0,
-            "65": 3371.0,
-            "66": 3826.0,
-            "67": 4156.0,
-            "68": 3811.0,
-            "69": 3545.0,
-            "70": 3831.0,
-            "71": 3834.0,
-            "72": 3593.0,
-            "73": 4098.0,
-            "74": 3711.0,
-            "75": 3649.0,
-            "76": 3907.0,
-            "77": 4118.0,
-            "78": 4212.0,
-            "79": 4428.0,
-            "80": 33291.0,
-            "81": 8226.0,
-            "82": 528724.0,
-            "83": 3499.0,
-            "84": 31529.0,
-            "85": 528713.0,
-            "86": 529264.0,
-            "87": 581775.0,
-            "88": 529230.0,
-            "89": 529270.0,
-            "90": 529149.0,
-            "91": 528757.0,
-            "92": 529091.0,
-            "93": 549748.0,
-            "94": 529131.0,
-            "95": 553058.0,
-            "96": 560607.0,
-            "97": 529708.0,
-            "98": 529488.0,
-            "99": 529121.0,
-            "100": 529245.0
+            "1": 6427.0,
+            "2": 6618.0,
+            "3": 6705.0,
+            "4": 6626.0,
+            "5": 6454.0,
+            "6": 6215.0,
+            "7": 6854.0,
+            "8": 6253.0,
+            "9": 6519.0,
+            "10": 6579.0,
+            "11": 6610.0,
+            "12": 6245.0,
+            "13": 6667.0,
+            "14": 6918.0,
+            "15": 6294.0,
+            "16": 6413.0,
+            "17": 6473.0,
+            "18": 6473.0,
+            "19": 6481.0,
+            "20": 6284.0,
+            "21": 6610.0,
+            "22": 6553.0,
+            "23": 6354.0,
+            "24": 6699.0,
+            "25": 6464.0,
+            "26": 6614.0,
+            "27": 6724.0,
+            "28": 6671.0,
+            "29": 7037.0,
+            "30": 6976.0,
+            "31": 7135.0,
+            "32": 7146.0,
+            "33": 7088.0,
+            "34": 7123.0,
+            "35": 7319.0,
+            "36": 7225.0,
+            "37": 7638.0,
+            "38": 7696.0,
+            "39": 7778.0,
+            "40": 7985.0,
+            "41": 8138.0,
+            "42": 7526.0,
+            "43": 8067.0,
+            "44": 7962.0,
+            "45": 8660.0,
+            "46": 8468.0,
+            "47": 8513.0,
+            "48": 8547.0,
+            "49": 8878.0,
+            "50": 8823.0,
+            "51": 8750.0,
+            "52": 8942.0,
+            "53": 8470.0,
+            "54": 9274.0,
+            "55": 8387.0,
+            "56": 9552.0,
+            "57": 7729.0,
+            "58": 10444.0,
+            "59": 9320.0,
+            "60": 9455.0,
+            "61": 8934.0,
+            "62": 9447.0,
+            "63": 10085.0,
+            "64": 10049.0,
+            "65": 8632.0,
+            "66": 9644.0,
+            "67": 10241.0,
+            "68": 9905.0,
+            "69": 8978.0,
+            "70": 9730.0,
+            "71": 9629.0,
+            "72": 9249.0,
+            "73": 10081.0,
+            "74": 14397.0,
+            "75": 8917.0,
+            "76": 10143.0,
+            "77": 10427.0,
+            "78": 10760.0,
+            "79": 68696.0,
+            "80": 132664.0,
+            "81": 80159.0,
+            "82": 1117640.0,
+            "83": 67014.0,
+            "84": 1112297.0,
+            "85": 2106479.0,
+            "86": 2108092.0,
+            "87": 1279087.0,
+            "88": 2107686.0,
+            "89": 2111718.0,
+            "90": 1059710.0,
+            "91": 2106808.0,
+            "92": 2106945.0,
+            "93": 3155405.0,
+            "94": 2107876.0,
+            "95": 2155420.0,
+            "96": 2170260.0,
+            "97": 2108441.0,
+            "98": 2107668.0,
+            "99": 2107336.0,
+            "100": 2107900.0
         }
     },
     "mem-allocated-bytes": {
@@ -327,104 +327,104 @@
         "values": {
             "1": 974333952.0,
             "2": 1142500864.0,
-            "3": 1142675968.0,
-            "4": 1147437056.0,
-            "5": 1147925504.0,
-            "6": 1147925504.0,
-            "7": 1148942336.0,
-            "8": 1148942336.0,
-            "9": 1148942336.0,
-            "10": 1148942336.0,
-            "11": 1148942336.0,
-            "12": 1148942336.0,
-            "13": 1148942336.0,
-            "14": 1148942336.0,
-            "15": 1148942336.0,
-            "16": 1148942336.0,
-            "17": 1148942336.0,
-            "18": 1148942336.0,
-            "19": 1148942336.0,
-            "20": 1148942336.0,
-            "21": 1148942336.0,
-            "22": 1148942336.0,
-            "23": 1148942336.0,
-            "24": 1148942336.0,
-            "25": 1148942336.0,
-            "26": 1149713920.0,
-            "27": 1149713920.0,
-            "28": 1149713920.0,
-            "29": 1149713920.0,
-            "30": 1149713920.0,
-            "31": 1149713920.0,
-            "32": 1149713920.0,
-            "33": 1149713920.0,
-            "34": 1149713920.0,
-            "35": 1149713920.0,
-            "36": 1149713920.0,
-            "37": 1149713920.0,
-            "38": 1149713920.0,
-            "39": 1149713920.0,
-            "40": 1149713920.0,
-            "41": 1149713920.0,
-            "42": 1149713920.0,
-            "43": 1149713920.0,
-            "44": 1149713920.0,
-            "45": 1149713920.0,
-            "46": 1149713920.0,
-            "47": 1149713920.0,
-            "48": 1149713920.0,
-            "49": 1149713920.0,
-            "50": 1149713920.0,
-            "51": 1149713920.0,
-            "52": 1149713920.0,
-            "53": 1149713920.0,
-            "54": 1149713920.0,
-            "55": 1149713920.0,
-            "56": 1149713920.0,
-            "57": 1149713920.0,
-            "58": 1149713920.0,
-            "59": 1149713920.0,
-            "60": 1149713920.0,
-            "61": 1149713920.0,
-            "62": 1149713920.0,
-            "63": 1149713920.0,
-            "64": 1149713920.0,
-            "65": 1149713920.0,
-            "66": 1149713920.0,
-            "67": 1149713920.0,
-            "68": 1149713920.0,
-            "69": 1149713920.0,
-            "70": 1149713920.0,
-            "71": 1149713920.0,
-            "72": 1149713920.0,
-            "73": 1149713920.0,
-            "74": 1149713920.0,
-            "75": 1149713920.0,
-            "76": 1149713920.0,
-            "77": 1149713920.0,
-            "78": 1149713920.0,
-            "79": 1149713920.0,
-            "80": 1149713920.0,
-            "81": 1149713920.0,
-            "82": 1149713920.0,
-            "83": 1149713920.0,
-            "84": 1149713920.0,
-            "85": 1149713920.0,
-            "86": 1149713920.0,
-            "87": 1149713920.0,
-            "88": 1149713920.0,
-            "89": 1149713920.0,
-            "90": 1149713920.0,
-            "91": 1149713920.0,
-            "92": 1149713920.0,
-            "93": 1149713920.0,
-            "94": 1149713920.0,
-            "95": 1149713920.0,
-            "96": 1149713920.0,
-            "97": 1149713920.0,
-            "98": 1149713920.0,
-            "99": 1149713920.0,
-            "100": 1149713920.0
+            "3": 1142671872.0,
+            "4": 1147373568.0,
+            "5": 1147845632.0,
+            "6": 1147845632.0,
+            "7": 1148584448.0,
+            "8": 1148584448.0,
+            "9": 1148584448.0,
+            "10": 1148584448.0,
+            "11": 1148584448.0,
+            "12": 1148584448.0,
+            "13": 1148584448.0,
+            "14": 1148584448.0,
+            "15": 1148584448.0,
+            "16": 1148584448.0,
+            "17": 1148584448.0,
+            "18": 1148584448.0,
+            "19": 1148584448.0,
+            "20": 1148584448.0,
+            "21": 1148584448.0,
+            "22": 1148584448.0,
+            "23": 1148584448.0,
+            "24": 1148584448.0,
+            "25": 1148584448.0,
+            "26": 1148584448.0,
+            "27": 1148584448.0,
+            "28": 1148584448.0,
+            "29": 1148584448.0,
+            "30": 1148584448.0,
+            "31": 1148584448.0,
+            "32": 1148584448.0,
+            "33": 1148584448.0,
+            "34": 1148584448.0,
+            "35": 1148595200.0,
+            "36": 1148595200.0,
+            "37": 1148595200.0,
+            "38": 1148595200.0,
+            "39": 1148595200.0,
+            "40": 1148595200.0,
+            "41": 1148595200.0,
+            "42": 1148595200.0,
+            "43": 1148595200.0,
+            "44": 1148595200.0,
+            "45": 1148595200.0,
+            "46": 1148595200.0,
+            "47": 1148595200.0,
+            "48": 1148595200.0,
+            "49": 1148595200.0,
+            "50": 1148595200.0,
+            "51": 1148595200.0,
+            "52": 1148595200.0,
+            "53": 1148595200.0,
+            "54": 1148595200.0,
+            "55": 1148595200.0,
+            "56": 1148595200.0,
+            "57": 1148595200.0,
+            "58": 1148595200.0,
+            "59": 1148595200.0,
+            "60": 1148595200.0,
+            "61": 1148595200.0,
+            "62": 1148595200.0,
+            "63": 1148595200.0,
+            "64": 1148595200.0,
+            "65": 1148595200.0,
+            "66": 1148595200.0,
+            "67": 1148595200.0,
+            "68": 1148595200.0,
+            "69": 1148595200.0,
+            "70": 1148595200.0,
+            "71": 1148595200.0,
+            "72": 1148595200.0,
+            "73": 1148595200.0,
+            "74": 1148595200.0,
+            "75": 1148595200.0,
+            "76": 1148595200.0,
+            "77": 1148595200.0,
+            "78": 1148595200.0,
+            "79": 1148595200.0,
+            "80": 1148595200.0,
+            "81": 1148595200.0,
+            "82": 1148595200.0,
+            "83": 1148595200.0,
+            "84": 1148595200.0,
+            "85": 1148595200.0,
+            "86": 1148595200.0,
+            "87": 1148595200.0,
+            "88": 1148595200.0,
+            "89": 1148595200.0,
+            "90": 1148595200.0,
+            "91": 1148595200.0,
+            "92": 1148595200.0,
+            "93": 1148595200.0,
+            "94": 1148595200.0,
+            "95": 1148595200.0,
+            "96": 1148595200.0,
+            "97": 1148595200.0,
+            "98": 1148595200.0,
+            "99": 1148595200.0,
+            "100": 1148595200.0
         }
     },
     "iteration-time": {
@@ -433,105 +433,105 @@
         "step_interval": 1,
         "values": {
             "1": "nan",
-            "2": 11.7836,
-            "3": 0.58975,
-            "4": 0.56544,
-            "5": 0.5504,
-            "6": 0.56842,
-            "7": 0.5491,
-            "8": 0.54138,
-            "9": 0.53371,
-            "10": 0.5342,
-            "11": 0.53224,
-            "12": 0.52891,
-            "13": 0.52976,
-            "14": 0.53162,
-            "15": 0.52297,
-            "16": 0.52336,
-            "17": 0.52793,
-            "18": 0.52225,
-            "19": 0.52121,
-            "20": 0.52937,
-            "21": 0.53168,
-            "22": 0.52349,
-            "23": 0.52045,
-            "24": 0.53318,
-            "25": 0.52745,
-            "26": 0.51972,
-            "27": 0.52474,
-            "28": 0.53885,
-            "29": 0.54406,
-            "30": 0.52979,
-            "31": 0.52273,
-            "32": 0.52354,
-            "33": 0.52179,
-            "34": 0.52809,
-            "35": 0.52207,
-            "36": 0.52789,
-            "37": 0.51996,
-            "38": 0.53223,
-            "39": 0.52549,
-            "40": 0.53308,
-            "41": 0.53147,
-            "42": 0.53153,
-            "43": 0.5292,
-            "44": 0.52056,
-            "45": 0.52578,
-            "46": 0.51549,
-            "47": 0.51842,
-            "48": 0.51917,
-            "49": 0.52488,
-            "50": 0.52255,
-            "51": 0.64477,
-            "52": 0.51979,
-            "53": 0.52383,
-            "54": 0.52192,
-            "55": 0.51931,
-            "56": 0.51907,
-            "57": 0.52009,
-            "58": 0.51807,
-            "59": 0.51736,
-            "60": 0.51892,
-            "61": 0.51809,
-            "62": 0.52089,
-            "63": 0.52315,
-            "64": 0.51504,
-            "65": 0.51491,
-            "66": 0.51739,
-            "67": 0.51455,
-            "68": 0.51564,
-            "69": 1.04071,
-            "70": 0.5162,
-            "71": 0.51607,
-            "72": 0.5156,
-            "73": 0.51835,
-            "74": 0.51882,
-            "75": 0.52265,
-            "76": 0.51863,
-            "77": 0.51483,
-            "78": 0.51774,
-            "79": 0.52634,
-            "80": 0.52171,
-            "81": 0.52135,
-            "82": 0.52168,
-            "83": 0.53375,
-            "84": 0.51785,
-            "85": 0.52358,
-            "86": 0.51614,
-            "87": 0.52652,
-            "88": 0.51691,
-            "89": 0.51638,
-            "90": 0.52191,
-            "91": 0.51655,
-            "92": 0.51846,
-            "93": 0.51379,
-            "94": 0.51835,
-            "95": 0.91609,
-            "96": 0.51869,
-            "97": 0.51813,
-            "98": 0.5255,
-            "99": 0.52418,
-            "100": 0.53762
+            "2": 8.7306,
+            "3": 0.82541,
+            "4": 0.79111,
+            "5": 0.78772,
+            "6": 0.78491,
+            "7": 0.77321,
+            "8": 0.80845,
+            "9": 0.76281,
+            "10": 0.76741,
+            "11": 0.76405,
+            "12": 0.7464,
+            "13": 0.74032,
+            "14": 0.74249,
+            "15": 0.7361,
+            "16": 0.73487,
+            "17": 0.72656,
+            "18": 0.73602,
+            "19": 0.72939,
+            "20": 0.72896,
+            "21": 0.7316,
+            "22": 0.73357,
+            "23": 0.72972,
+            "24": 0.73707,
+            "25": 0.73966,
+            "26": 0.719,
+            "27": 0.72924,
+            "28": 0.74616,
+            "29": 0.75162,
+            "30": 0.75031,
+            "31": 0.74663,
+            "32": 0.73337,
+            "33": 0.73723,
+            "34": 0.73465,
+            "35": 0.73771,
+            "36": 0.7385,
+            "37": 0.73536,
+            "38": 0.74515,
+            "39": 0.73575,
+            "40": 0.74509,
+            "41": 0.73501,
+            "42": 0.74091,
+            "43": 0.74268,
+            "44": 0.73316,
+            "45": 0.7359,
+            "46": 0.72733,
+            "47": 0.73408,
+            "48": 0.73042,
+            "49": 0.73455,
+            "50": 0.72958,
+            "51": 0.8591,
+            "52": 0.81718,
+            "53": 0.74131,
+            "54": 0.74839,
+            "55": 0.74974,
+            "56": 0.75244,
+            "57": 0.74244,
+            "58": 0.73823,
+            "59": 0.74268,
+            "60": 0.74576,
+            "61": 0.74499,
+            "62": 0.74408,
+            "63": 0.74442,
+            "64": 0.74569,
+            "65": 0.73634,
+            "66": 0.74134,
+            "67": 1.30864,
+            "68": 0.74506,
+            "69": 0.7469,
+            "70": 0.73887,
+            "71": 0.74595,
+            "72": 0.73832,
+            "73": 0.73662,
+            "74": 0.74627,
+            "75": 0.75627,
+            "76": 0.74451,
+            "77": 0.73734,
+            "78": 0.73831,
+            "79": 0.74279,
+            "80": 0.74483,
+            "81": 0.74523,
+            "82": 0.7475,
+            "83": 0.75273,
+            "84": 0.74267,
+            "85": 0.73974,
+            "86": 0.73832,
+            "87": 0.74642,
+            "88": 0.73886,
+            "89": 0.73962,
+            "90": 0.82905,
+            "91": 0.73775,
+            "92": 0.7538,
+            "93": 0.75623,
+            "94": 0.74641,
+            "95": 0.74354,
+            "96": 0.73224,
+            "97": 0.73277,
+            "98": 0.73692,
+            "99": 0.73794,
+            "100": 0.73356
         }
     }
 }
\ No newline at end of file

From 2bb0d38e631fbf88b07780211f75adbd299b08c5 Mon Sep 17 00:00:00 2001
From: "Dennis(Zhenhuan) Liu" <denliu@nvidia.com>
Date: Fri, 3 Apr 2026 19:53:58 +0800
Subject: [PATCH 328/334] [Dev] Fix golden values mismatch and dependency error
 due to last pull main (#4123)

Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 pyproject.toml                                |    4 +-
 .../golden_values_dev_dgx_h100.json           |  474 +++----
 .../golden_values_dev_dgx_h100.json           |  442 +++----
 .../golden_values_dev_dgx_h100.json           |  442 +++----
 .../golden_values_dev_dgx_h100.json           |  494 +++----
 .../golden_values_dev_dgx_h100.json           | 1152 ++++++++---------
 uv.lock                                       |  135 +-
 7 files changed, 1587 insertions(+), 1556 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 7ce7e3e17c6..a9889eb8c00 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -94,9 +94,9 @@ dev = [
     "fastapi~=0.50",                                          # Forcing a little bit more recent version of fastapi to be compatible with pydantic 2.0
     "datasets",
     "emerging_optimizers; python_version >= '3.12'",
-    "flask[async]",
+    "quart",
     "hypercorn",
-    "openai",
+    "openai[aiohttp]",
 ]
 
 lts = [
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mhc/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mhc/golden_values_dev_dgx_h100.json
index fd52044e2b5..40c4236aaba 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mhc/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mhc/golden_values_dev_dgx_h100.json
@@ -7,53 +7,53 @@
             "1": 10.86149,
             "2": 10.85467,
             "3": 10.86695,
-            "4": 10.84625,
-            "5": 10.8847,
-            "6": 10.89676,
-            "7": 10.87272,
-            "8": 10.86586,
+            "4": 10.84622,
+            "5": 10.88467,
+            "6": 10.89675,
+            "7": 10.87274,
+            "8": 10.86587,
             "9": 10.86993,
             "10": 10.83755,
             "11": 10.89458,
-            "12": 10.87956,
-            "13": 10.8768,
-            "14": 10.90362,
+            "12": 10.87951,
+            "13": 10.87683,
+            "14": 10.90359,
             "15": 10.8311,
-            "16": 10.8345,
-            "17": 10.80061,
-            "18": 10.82066,
-            "19": 10.81459,
-            "20": 10.71809,
-            "21": 10.68631,
+            "16": 10.83451,
+            "17": 10.8006,
+            "18": 10.82062,
+            "19": 10.81464,
+            "20": 10.71811,
+            "21": 10.68628,
             "22": 10.532,
-            "23": 10.7048,
-            "24": 10.58548,
+            "23": 10.70484,
+            "24": 10.58546,
             "25": 10.51896,
-            "26": 10.58491,
-            "27": 10.60108,
-            "28": 10.53537,
+            "26": 10.58493,
+            "27": 10.60104,
+            "28": 10.53535,
             "29": 10.57113,
-            "30": 10.33244,
-            "31": 10.0583,
-            "32": 10.42784,
-            "33": 10.4202,
-            "34": 10.16985,
+            "30": 10.33245,
+            "31": 10.05828,
+            "32": 10.42782,
+            "33": 10.42024,
+            "34": 10.16984,
             "35": 10.23069,
-            "36": 10.18752,
-            "37": 10.31251,
-            "38": 10.14213,
-            "39": 10.38135,
-            "40": 10.04843,
-            "41": 10.10329,
-            "42": 10.17154,
-            "43": 9.78292,
-            "44": 9.90959,
-            "45": 9.78499,
-            "46": 9.76878,
-            "47": 10.10082,
-            "48": 9.80965,
+            "36": 10.18748,
+            "37": 10.31248,
+            "38": 10.1421,
+            "39": 10.38137,
+            "40": 10.04848,
+            "41": 10.10328,
+            "42": 10.17152,
+            "43": 9.78294,
+            "44": 9.90964,
+            "45": 9.785,
+            "46": 9.7688,
+            "47": 10.10084,
+            "48": 9.80968,
             "49": 9.48778,
-            "50": 9.86704
+            "50": 9.8671
         }
     },
     "num-zeros": {
@@ -61,56 +61,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 1649.0,
-            "2": 34620.0,
-            "3": 34517.0,
-            "4": 1822.0,
-            "5": 34641.0,
-            "6": 1849.0,
-            "7": 1816.0,
-            "8": 1587.0,
-            "9": 34596.0,
-            "10": 34175.0,
-            "11": 34644.0,
-            "12": 34371.0,
-            "13": 1821.0,
-            "14": 1785.0,
-            "15": 1928.0,
-            "16": 1825.0,
-            "17": 1820.0,
-            "18": 34490.0,
-            "19": 1711.0,
-            "20": 1628.0,
-            "21": 1805.0,
-            "22": 1637.0,
-            "23": 34927.0,
-            "24": 1586.0,
-            "25": 1580.0,
-            "26": 34510.0,
-            "27": 34510.0,
-            "28": 2017.0,
-            "29": 1992.0,
-            "30": 1955.0,
-            "31": 34406.0,
-            "32": 34643.0,
-            "33": 34950.0,
-            "34": 1992.0,
-            "35": 34671.0,
-            "36": 34721.0,
-            "37": 2360.0,
-            "38": 34999.0,
-            "39": 35102.0,
-            "40": 2173.0,
-            "41": 35092.0,
-            "42": 2405.0,
-            "43": 34752.0,
-            "44": 34911.0,
-            "45": 34908.0,
-            "46": 35080.0,
-            "47": 35225.0,
-            "48": 35262.0,
-            "49": 35174.0,
-            "50": 35281.0
+            "1": 1732.0,
+            "2": 34586.0,
+            "3": 1628.0,
+            "4": 1806.0,
+            "5": 1834.0,
+            "6": 1858.0,
+            "7": 1772.0,
+            "8": 1665.0,
+            "9": 34627.0,
+            "10": 1456.0,
+            "11": 34535.0,
+            "12": 34448.0,
+            "13": 34667.0,
+            "14": 1796.0,
+            "15": 1927.0,
+            "16": 1877.0,
+            "17": 34649.0,
+            "18": 34420.0,
+            "19": 1769.0,
+            "20": 1649.0,
+            "21": 34642.0,
+            "22": 34433.0,
+            "23": 34799.0,
+            "24": 1646.0,
+            "25": 34511.0,
+            "26": 34458.0,
+            "27": 34560.0,
+            "28": 2009.0,
+            "29": 34850.0,
+            "30": 1856.0,
+            "31": 34387.0,
+            "32": 34646.0,
+            "33": 34964.0,
+            "34": 1977.0,
+            "35": 34773.0,
+            "36": 34665.0,
+            "37": 2428.0,
+            "38": 35045.0,
+            "39": 35161.0,
+            "40": 2201.0,
+            "41": 35100.0,
+            "42": 2389.0,
+            "43": 34872.0,
+            "44": 34922.0,
+            "45": 2153.0,
+            "46": 35027.0,
+            "47": 35293.0,
+            "48": 35249.0,
+            "49": 35127.0,
+            "50": 35248.0
         }
     },
     "mem-allocated-bytes": {
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 539492864.0,
-            "2": 539492864.0,
-            "3": 539492864.0,
-            "4": 539492864.0,
-            "5": 539492864.0,
-            "6": 539492864.0,
-            "7": 539492864.0,
-            "8": 539492864.0,
-            "9": 539492864.0,
-            "10": 539492864.0,
-            "11": 539492864.0,
-            "12": 539492864.0,
-            "13": 539492864.0,
-            "14": 539492864.0,
-            "15": 539492864.0,
-            "16": 539492864.0,
-            "17": 539492864.0,
-            "18": 539492864.0,
-            "19": 539492864.0,
-            "20": 539492864.0,
-            "21": 539492864.0,
-            "22": 539492864.0,
-            "23": 539492864.0,
-            "24": 539492864.0,
-            "25": 539492864.0,
-            "26": 539492864.0,
-            "27": 539492864.0,
-            "28": 539492864.0,
-            "29": 539492864.0,
-            "30": 539492864.0,
-            "31": 539492864.0,
-            "32": 539492864.0,
-            "33": 539492864.0,
-            "34": 539492864.0,
-            "35": 539492864.0,
-            "36": 539492864.0,
-            "37": 539492864.0,
-            "38": 539492864.0,
-            "39": 539492864.0,
-            "40": 539492864.0,
-            "41": 539492864.0,
-            "42": 539492864.0,
-            "43": 539492864.0,
-            "44": 539492864.0,
-            "45": 539492864.0,
-            "46": 539492864.0,
-            "47": 539492864.0,
-            "48": 539492864.0,
-            "49": 539492864.0,
-            "50": 539492864.0
+            "1": 555746816.0,
+            "2": 555746816.0,
+            "3": 555746816.0,
+            "4": 555746816.0,
+            "5": 555746816.0,
+            "6": 555746816.0,
+            "7": 555746816.0,
+            "8": 555746816.0,
+            "9": 555746816.0,
+            "10": 555746816.0,
+            "11": 555746816.0,
+            "12": 555746816.0,
+            "13": 555746816.0,
+            "14": 555746816.0,
+            "15": 555746816.0,
+            "16": 555746816.0,
+            "17": 555746816.0,
+            "18": 555746816.0,
+            "19": 555746816.0,
+            "20": 555746816.0,
+            "21": 555746816.0,
+            "22": 555746816.0,
+            "23": 555746816.0,
+            "24": 555746816.0,
+            "25": 555746816.0,
+            "26": 555746816.0,
+            "27": 555746816.0,
+            "28": 555746816.0,
+            "29": 555746816.0,
+            "30": 555746816.0,
+            "31": 555746816.0,
+            "32": 555746816.0,
+            "33": 555746816.0,
+            "34": 555746816.0,
+            "35": 555746816.0,
+            "36": 555746816.0,
+            "37": 555746816.0,
+            "38": 555746816.0,
+            "39": 555746816.0,
+            "40": 555746816.0,
+            "41": 555746816.0,
+            "42": 555746816.0,
+            "43": 555746816.0,
+            "44": 555746816.0,
+            "45": 555746816.0,
+            "46": 555746816.0,
+            "47": 555746816.0,
+            "48": 555746816.0,
+            "49": 555746816.0,
+            "50": 555746816.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 1729398272.0,
-            "2": 1914238464.0,
-            "3": 1914238464.0,
-            "4": 1914238464.0,
-            "5": 1914238464.0,
-            "6": 1914238464.0,
-            "7": 1914238464.0,
-            "8": 1914238464.0,
-            "9": 1914238464.0,
-            "10": 1914238464.0,
-            "11": 1914238464.0,
-            "12": 1914238464.0,
-            "13": 1914238464.0,
-            "14": 1914238464.0,
-            "15": 1914238464.0,
-            "16": 1914238464.0,
-            "17": 1914238464.0,
-            "18": 1914238464.0,
-            "19": 1914238464.0,
-            "20": 1914238464.0,
-            "21": 1914238464.0,
-            "22": 1914238464.0,
-            "23": 1914238464.0,
-            "24": 1914238464.0,
-            "25": 1914238464.0,
-            "26": 1914238464.0,
-            "27": 1914238464.0,
-            "28": 1914238464.0,
-            "29": 1914238464.0,
-            "30": 1914238464.0,
-            "31": 1914238464.0,
-            "32": 1914238464.0,
-            "33": 1914238464.0,
-            "34": 1914238464.0,
-            "35": 1914238464.0,
-            "36": 1914238464.0,
-            "37": 1914238464.0,
-            "38": 1914238464.0,
-            "39": 1914238464.0,
-            "40": 1914238464.0,
-            "41": 1914238464.0,
-            "42": 1914238464.0,
-            "43": 1914238464.0,
-            "44": 1914238464.0,
-            "45": 1914238464.0,
-            "46": 1914238464.0,
-            "47": 1914238464.0,
-            "48": 1914238464.0,
-            "49": 1914238464.0,
-            "50": 1914238464.0
+            "1": 1728349696.0,
+            "2": 1917909504.0,
+            "3": 1917909504.0,
+            "4": 1917909504.0,
+            "5": 1917909504.0,
+            "6": 1917909504.0,
+            "7": 1917909504.0,
+            "8": 1917909504.0,
+            "9": 1917909504.0,
+            "10": 1917909504.0,
+            "11": 1917909504.0,
+            "12": 1917909504.0,
+            "13": 1917909504.0,
+            "14": 1917909504.0,
+            "15": 1917909504.0,
+            "16": 1917909504.0,
+            "17": 1917909504.0,
+            "18": 1917909504.0,
+            "19": 1917909504.0,
+            "20": 1917909504.0,
+            "21": 1917909504.0,
+            "22": 1917909504.0,
+            "23": 1917909504.0,
+            "24": 1917909504.0,
+            "25": 1917909504.0,
+            "26": 1917909504.0,
+            "27": 1917909504.0,
+            "28": 1917909504.0,
+            "29": 1917909504.0,
+            "30": 1917909504.0,
+            "31": 1917909504.0,
+            "32": 1917909504.0,
+            "33": 1917909504.0,
+            "34": 1917909504.0,
+            "35": 1917909504.0,
+            "36": 1917909504.0,
+            "37": 1917909504.0,
+            "38": 1917909504.0,
+            "39": 1917909504.0,
+            "40": 1917909504.0,
+            "41": 1917909504.0,
+            "42": 1917909504.0,
+            "43": 1917909504.0,
+            "44": 1917909504.0,
+            "45": 1917909504.0,
+            "46": 1917909504.0,
+            "47": 1917909504.0,
+            "48": 1917909504.0,
+            "49": 1917909504.0,
+            "50": 1917909504.0
         }
     },
     "iteration-time": {
@@ -233,55 +233,55 @@
         "step_interval": 1,
         "values": {
             "1": "nan",
-            "2": 33.07638,
-            "3": 4.62885,
-            "4": 2.78847,
-            "5": 3.81661,
-            "6": 4.56696,
-            "7": 3.45862,
-            "8": 2.51384,
-            "9": 2.4275,
-            "10": 3.71405,
-            "11": 3.43435,
-            "12": 4.09536,
-            "13": 1.70339,
-            "14": 4.2772,
-            "15": 2.37094,
-            "16": 2.10863,
-            "17": 1.98699,
-            "18": 4.2631,
-            "19": 2.93254,
-            "20": 4.0228,
-            "21": 3.09583,
-            "22": 3.24615,
-            "23": 4.11215,
-            "24": 2.40344,
-            "25": 3.66841,
-            "26": 0.5852,
-            "27": 6.04702,
-            "28": 2.56074,
-            "29": 2.3649,
-            "30": 2.97314,
-            "31": 2.21341,
-            "32": 5.02931,
-            "33": 2.09974,
-            "34": 1.53163,
-            "35": 2.17862,
-            "36": 3.61274,
-            "37": 2.68687,
-            "38": 1.85327,
-            "39": 3.95559,
-            "40": 3.49999,
-            "41": 4.68689,
-            "42": 2.7863,
-            "43": 3.48504,
-            "44": 2.4547,
-            "45": 2.47677,
-            "46": 2.7805,
-            "47": 4.16521,
-            "48": 3.3328,
-            "49": 2.95889,
-            "50": 3.68852
+            "2": 30.27287,
+            "3": 0.63036,
+            "4": 0.62463,
+            "5": 0.62389,
+            "6": 0.62241,
+            "7": 0.62274,
+            "8": 0.62116,
+            "9": 0.62223,
+            "10": 0.62501,
+            "11": 0.62222,
+            "12": 0.62201,
+            "13": 0.6223,
+            "14": 0.62539,
+            "15": 0.62434,
+            "16": 0.62424,
+            "17": 0.62735,
+            "18": 0.62325,
+            "19": 0.62244,
+            "20": 0.62506,
+            "21": 0.62317,
+            "22": 0.62235,
+            "23": 0.625,
+            "24": 0.62205,
+            "25": 0.62519,
+            "26": 0.64769,
+            "27": 0.62564,
+            "28": 0.62374,
+            "29": 0.62533,
+            "30": 0.62018,
+            "31": 0.62779,
+            "32": 0.62201,
+            "33": 0.63514,
+            "34": 0.6314,
+            "35": 0.63737,
+            "36": 0.62906,
+            "37": 0.64653,
+            "38": 0.63058,
+            "39": 0.63017,
+            "40": 0.63041,
+            "41": 0.6331,
+            "42": 0.62522,
+            "43": 0.62568,
+            "44": 0.62119,
+            "45": 0.62536,
+            "46": 0.62217,
+            "47": 0.62615,
+            "48": 0.6199,
+            "49": 0.61769,
+            "50": 0.62242
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_dev_dgx_h100.json
index 0d556450cec..40700470348 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_dev_dgx_h100.json
@@ -6,54 +6,54 @@
         "values": {
             "1": 10.92671,
             "2": 10.91589,
-            "3": 10.92552,
-            "4": 10.93168,
-            "5": 10.93015,
-            "6": 10.9259,
-            "7": 10.92646,
-            "8": 10.92323,
-            "9": 10.92778,
-            "10": 10.9168,
-            "11": 10.9178,
-            "12": 10.92446,
-            "13": 10.90961,
-            "14": 10.90627,
-            "15": 10.90112,
-            "16": 10.88691,
-            "17": 10.88827,
-            "18": 10.88554,
-            "19": 10.88654,
-            "20": 10.8377,
-            "21": 10.82717,
-            "22": 10.81535,
-            "23": 10.80831,
-            "24": 10.78061,
-            "25": 10.77774,
-            "26": 10.76115,
-            "27": 10.7495,
-            "28": 10.6922,
-            "29": 10.66686,
-            "30": 10.63118,
-            "31": 10.62182,
-            "32": 10.61591,
-            "33": 10.57843,
-            "34": 10.54531,
-            "35": 10.54625,
-            "36": 10.53479,
-            "37": 10.50533,
-            "38": 10.50383,
-            "39": 10.47322,
-            "40": 10.45095,
-            "41": 10.42606,
-            "42": 10.41475,
-            "43": 10.40064,
-            "44": 10.37006,
-            "45": 10.38168,
-            "46": 10.33484,
-            "47": 10.32444,
-            "48": 10.28749,
-            "49": 10.28608,
-            "50": 10.27697
+            "3": 10.92569,
+            "4": 10.93204,
+            "5": 10.93027,
+            "6": 10.9261,
+            "7": 10.92637,
+            "8": 10.92388,
+            "9": 10.92728,
+            "10": 10.91588,
+            "11": 10.9183,
+            "12": 10.92402,
+            "13": 10.90967,
+            "14": 10.90628,
+            "15": 10.90098,
+            "16": 10.88556,
+            "17": 10.88818,
+            "18": 10.88475,
+            "19": 10.88523,
+            "20": 10.83769,
+            "21": 10.82735,
+            "22": 10.81478,
+            "23": 10.80877,
+            "24": 10.78047,
+            "25": 10.77776,
+            "26": 10.76048,
+            "27": 10.74912,
+            "28": 10.69225,
+            "29": 10.66725,
+            "30": 10.63087,
+            "31": 10.62053,
+            "32": 10.61533,
+            "33": 10.57791,
+            "34": 10.5462,
+            "35": 10.546,
+            "36": 10.53423,
+            "37": 10.50445,
+            "38": 10.50363,
+            "39": 10.47181,
+            "40": 10.44914,
+            "41": 10.42508,
+            "42": 10.41306,
+            "43": 10.39862,
+            "44": 10.36841,
+            "45": 10.37966,
+            "46": 10.33291,
+            "47": 10.32241,
+            "48": 10.28472,
+            "49": 10.28374,
+            "50": 10.27437
         }
     },
     "num-zeros": {
@@ -61,56 +61,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 19058.0,
-            "2": 19206.0,
-            "3": 18972.0,
-            "4": 19416.0,
-            "5": 19009.0,
-            "6": 18538.0,
-            "7": 18981.0,
-            "8": 18448.0,
-            "9": 18864.0,
-            "10": 19655.0,
-            "11": 19064.0,
-            "12": 18696.0,
-            "13": 19292.0,
-            "14": 19140.0,
-            "15": 18806.0,
-            "16": 18590.0,
-            "17": 18993.0,
-            "18": 19173.0,
-            "19": 19321.0,
-            "20": 19057.0,
-            "21": 19086.0,
-            "22": 18997.0,
-            "23": 18891.0,
-            "24": 19267.0,
-            "25": 18711.0,
-            "26": 19139.0,
-            "27": 19114.0,
-            "28": 18818.0,
-            "29": 18371.0,
-            "30": 18304.0,
-            "31": 19016.0,
-            "32": 19184.0,
-            "33": 18481.0,
-            "34": 18592.0,
-            "35": 18848.0,
-            "36": 18346.0,
-            "37": 18564.0,
-            "38": 18516.0,
-            "39": 18959.0,
-            "40": 19194.0,
-            "41": 18945.0,
-            "42": 18455.0,
-            "43": 19053.0,
-            "44": 18809.0,
-            "45": 20372.0,
-            "46": 19563.0,
-            "47": 19974.0,
-            "48": 20047.0,
-            "49": 21674.0,
-            "50": 20259.0
+            "1": 36710.0,
+            "2": 37160.0,
+            "3": 37309.0,
+            "4": 36783.0,
+            "5": 36803.0,
+            "6": 36142.0,
+            "7": 36370.0,
+            "8": 36176.0,
+            "9": 37100.0,
+            "10": 37917.0,
+            "11": 36745.0,
+            "12": 35709.0,
+            "13": 37084.0,
+            "14": 37775.0,
+            "15": 36119.0,
+            "16": 36038.0,
+            "17": 36700.0,
+            "18": 37055.0,
+            "19": 36638.0,
+            "20": 36735.0,
+            "21": 36231.0,
+            "22": 36562.0,
+            "23": 37065.0,
+            "24": 37204.0,
+            "25": 35891.0,
+            "26": 36983.0,
+            "27": 36795.0,
+            "28": 36205.0,
+            "29": 36207.0,
+            "30": 35575.0,
+            "31": 36467.0,
+            "32": 37374.0,
+            "33": 35909.0,
+            "34": 35933.0,
+            "35": 36426.0,
+            "36": 36062.0,
+            "37": 36347.0,
+            "38": 35710.0,
+            "39": 36968.0,
+            "40": 37362.0,
+            "41": 36161.0,
+            "42": 36032.0,
+            "43": 37353.0,
+            "44": 37025.0,
+            "45": 39544.0,
+            "46": 38629.0,
+            "47": 38810.0,
+            "48": 38612.0,
+            "49": 42460.0,
+            "50": 39729.0
         }
     },
     "mem-allocated-bytes": {
@@ -120,54 +120,54 @@
         "values": {
             "1": 1027089408.0,
             "2": 1027091968.0,
-            "3": 1027088384.0,
-            "4": 1027088384.0,
+            "3": 1027087360.0,
+            "4": 1027088896.0,
             "5": 1027090944.0,
-            "6": 1027091968.0,
-            "7": 1027088896.0,
-            "8": 1027092992.0,
-            "9": 1027090944.0,
-            "10": 1027090432.0,
-            "11": 1027090944.0,
-            "12": 1027091456.0,
+            "6": 1027090944.0,
+            "7": 1027088384.0,
+            "8": 1027092480.0,
+            "9": 1027091968.0,
+            "10": 1027091456.0,
+            "11": 1027090432.0,
+            "12": 1027090944.0,
             "13": 1027091968.0,
-            "14": 1027090944.0,
-            "15": 1027091456.0,
-            "16": 1027088384.0,
-            "17": 1027088896.0,
+            "14": 1027094528.0,
+            "15": 1027090432.0,
+            "16": 1027088896.0,
+            "17": 1027089408.0,
             "18": 1027090944.0,
-            "19": 1027089920.0,
-            "20": 1027089920.0,
+            "19": 1027088896.0,
+            "20": 1027090944.0,
             "21": 1027092480.0,
-            "22": 1027088896.0,
-            "23": 1027094528.0,
+            "22": 1027090944.0,
+            "23": 1027093504.0,
             "24": 1027091968.0,
             "25": 1027091456.0,
-            "26": 1027089408.0,
+            "26": 1027090944.0,
             "27": 1027087872.0,
-            "28": 1027091456.0,
+            "28": 1027092992.0,
             "29": 1027090432.0,
-            "30": 1027089920.0,
+            "30": 1027090432.0,
             "31": 1027089408.0,
             "32": 1027094528.0,
             "33": 1027094016.0,
-            "34": 1027092480.0,
-            "35": 1027086848.0,
-            "36": 1027088384.0,
+            "34": 1027093504.0,
+            "35": 1027085824.0,
+            "36": 1027087872.0,
             "37": 1027088896.0,
-            "38": 1027090432.0,
-            "39": 1027090432.0,
+            "38": 1027089920.0,
+            "39": 1027089920.0,
             "40": 1027091456.0,
-            "41": 1027088896.0,
+            "41": 1027089920.0,
             "42": 1027088384.0,
             "43": 1027088896.0,
-            "44": 1027091968.0,
-            "45": 1027091456.0,
-            "46": 1027085824.0,
+            "44": 1027090944.0,
+            "45": 1027091968.0,
+            "46": 1027084800.0,
             "47": 1027089920.0,
             "48": 1027088384.0,
-            "49": 1027086848.0,
-            "50": 1027089920.0
+            "49": 1027086336.0,
+            "50": 1027089408.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -182,49 +182,49 @@
             "5": 3298735616.0,
             "6": 3298735616.0,
             "7": 3298735616.0,
-            "8": 3299022336.0,
-            "9": 3299022336.0,
-            "10": 3299022336.0,
-            "11": 3299138048.0,
-            "12": 3299138048.0,
-            "13": 3299225088.0,
-            "14": 3299394048.0,
-            "15": 3299394048.0,
-            "16": 3299394048.0,
-            "17": 3299394048.0,
-            "18": 3299394048.0,
-            "19": 3299809792.0,
-            "20": 3299809792.0,
-            "21": 3299809792.0,
-            "22": 3299809792.0,
-            "23": 3300119552.0,
-            "24": 3300119552.0,
-            "25": 3300119552.0,
-            "26": 3300119552.0,
-            "27": 3300119552.0,
-            "28": 3300119552.0,
-            "29": 3300119552.0,
-            "30": 3300119552.0,
-            "31": 3300119552.0,
-            "32": 3300119552.0,
-            "33": 3300119552.0,
-            "34": 3300416512.0,
-            "35": 3300416512.0,
-            "36": 3300416512.0,
-            "37": 3300416512.0,
-            "38": 3300416512.0,
-            "39": 3300416512.0,
-            "40": 3300416512.0,
-            "41": 3300416512.0,
-            "42": 3300416512.0,
-            "43": 3300416512.0,
-            "44": 3300416512.0,
-            "45": 3300416512.0,
-            "46": 3300416512.0,
-            "47": 3300416512.0,
-            "48": 3300416512.0,
-            "49": 3300416512.0,
-            "50": 3300416512.0
+            "8": 3299167232.0,
+            "9": 3299167232.0,
+            "10": 3299167232.0,
+            "11": 3299167232.0,
+            "12": 3299167232.0,
+            "13": 3299167232.0,
+            "14": 3300327424.0,
+            "15": 3300327424.0,
+            "16": 3300327424.0,
+            "17": 3300327424.0,
+            "18": 3300327424.0,
+            "19": 3300327424.0,
+            "20": 3300327424.0,
+            "21": 3300327424.0,
+            "22": 3300327424.0,
+            "23": 3300327424.0,
+            "24": 3300327424.0,
+            "25": 3300327424.0,
+            "26": 3300327424.0,
+            "27": 3300327424.0,
+            "28": 3300327424.0,
+            "29": 3300327424.0,
+            "30": 3300327424.0,
+            "31": 3300327424.0,
+            "32": 3301105664.0,
+            "33": 3301105664.0,
+            "34": 3301105664.0,
+            "35": 3301105664.0,
+            "36": 3301105664.0,
+            "37": 3301105664.0,
+            "38": 3301105664.0,
+            "39": 3301105664.0,
+            "40": 3301105664.0,
+            "41": 3301105664.0,
+            "42": 3301105664.0,
+            "43": 3301105664.0,
+            "44": 3301105664.0,
+            "45": 3301105664.0,
+            "46": 3301105664.0,
+            "47": 3301105664.0,
+            "48": 3301105664.0,
+            "49": 3301105664.0,
+            "50": 3301105664.0
         }
     },
     "iteration-time": {
@@ -233,55 +233,55 @@
         "step_interval": 1,
         "values": {
             "1": "nan",
-            "2": 7.89786,
-            "3": 0.24594,
-            "4": 0.22051,
-            "5": 0.222,
-            "6": 0.21926,
-            "7": 0.2078,
-            "8": 0.21045,
-            "9": 0.20823,
-            "10": 0.20524,
-            "11": 0.21966,
-            "12": 0.20488,
-            "13": 0.2063,
-            "14": 0.21021,
-            "15": 0.20599,
-            "16": 0.20609,
-            "17": 0.2069,
-            "18": 0.20307,
-            "19": 0.20538,
-            "20": 0.20251,
-            "21": 0.21207,
-            "22": 0.20145,
-            "23": 0.21042,
-            "24": 0.21155,
-            "25": 0.2081,
-            "26": 0.20377,
-            "27": 0.20504,
-            "28": 0.20365,
-            "29": 0.20313,
-            "30": 0.20266,
-            "31": 0.20257,
-            "32": 0.20336,
-            "33": 0.20201,
-            "34": 0.20295,
-            "35": 0.20399,
-            "36": 0.20262,
-            "37": 0.2058,
-            "38": 0.20263,
-            "39": 0.20527,
-            "40": 0.20348,
-            "41": 0.20601,
-            "42": 0.20448,
-            "43": 0.20532,
-            "44": 0.20505,
-            "45": 0.20908,
-            "46": 0.2037,
-            "47": 0.21773,
-            "48": 0.20684,
-            "49": 0.20825,
-            "50": 0.20302
+            "2": 5.35585,
+            "3": 0.30738,
+            "4": 0.3051,
+            "5": 0.29962,
+            "6": 0.29439,
+            "7": 0.28971,
+            "8": 0.29154,
+            "9": 0.2896,
+            "10": 0.285,
+            "11": 0.28601,
+            "12": 0.28351,
+            "13": 0.28073,
+            "14": 0.28692,
+            "15": 0.28298,
+            "16": 0.28931,
+            "17": 0.28692,
+            "18": 0.28464,
+            "19": 0.2809,
+            "20": 0.2801,
+            "21": 0.29964,
+            "22": 0.28577,
+            "23": 0.29322,
+            "24": 0.28538,
+            "25": 0.28139,
+            "26": 0.28632,
+            "27": 0.28307,
+            "28": 0.28328,
+            "29": 0.2898,
+            "30": 0.28102,
+            "31": 0.28581,
+            "32": 0.29226,
+            "33": 0.28565,
+            "34": 0.28151,
+            "35": 0.28469,
+            "36": 0.28547,
+            "37": 0.28361,
+            "38": 0.28658,
+            "39": 0.28216,
+            "40": 0.28637,
+            "41": 0.28332,
+            "42": 0.28626,
+            "43": 0.28098,
+            "44": 0.28017,
+            "45": 0.28351,
+            "46": 0.2833,
+            "47": 0.27921,
+            "48": 0.2816,
+            "49": 0.28999,
+            "50": 0.28489
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM/golden_values_dev_dgx_h100.json
index 586f94b9d87..f640ef3fd7c 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM/golden_values_dev_dgx_h100.json
@@ -6,54 +6,54 @@
         "values": {
             "1": 10.92671,
             "2": 10.91589,
-            "3": 10.92552,
-            "4": 10.93168,
-            "5": 10.93015,
-            "6": 10.9259,
-            "7": 10.92646,
-            "8": 10.92323,
-            "9": 10.92778,
-            "10": 10.9168,
-            "11": 10.9178,
-            "12": 10.92446,
-            "13": 10.90961,
-            "14": 10.90627,
-            "15": 10.90112,
-            "16": 10.88691,
-            "17": 10.88827,
-            "18": 10.88554,
-            "19": 10.88654,
-            "20": 10.8377,
-            "21": 10.82717,
-            "22": 10.81535,
-            "23": 10.80831,
-            "24": 10.78061,
-            "25": 10.77774,
-            "26": 10.76115,
-            "27": 10.7495,
-            "28": 10.6922,
-            "29": 10.66686,
-            "30": 10.63118,
-            "31": 10.62182,
-            "32": 10.61591,
-            "33": 10.57843,
-            "34": 10.54531,
-            "35": 10.54625,
-            "36": 10.53479,
-            "37": 10.50533,
-            "38": 10.50383,
-            "39": 10.47322,
-            "40": 10.45095,
-            "41": 10.42606,
-            "42": 10.41475,
-            "43": 10.40064,
-            "44": 10.37006,
-            "45": 10.38168,
-            "46": 10.33484,
-            "47": 10.32444,
-            "48": 10.28749,
-            "49": 10.28608,
-            "50": 10.27697
+            "3": 10.92569,
+            "4": 10.93204,
+            "5": 10.93027,
+            "6": 10.9261,
+            "7": 10.92637,
+            "8": 10.92388,
+            "9": 10.92728,
+            "10": 10.91588,
+            "11": 10.9183,
+            "12": 10.92402,
+            "13": 10.90967,
+            "14": 10.90628,
+            "15": 10.90098,
+            "16": 10.88556,
+            "17": 10.88818,
+            "18": 10.88475,
+            "19": 10.88523,
+            "20": 10.83769,
+            "21": 10.82735,
+            "22": 10.81478,
+            "23": 10.80877,
+            "24": 10.78047,
+            "25": 10.77776,
+            "26": 10.76048,
+            "27": 10.74912,
+            "28": 10.69225,
+            "29": 10.66725,
+            "30": 10.63087,
+            "31": 10.62053,
+            "32": 10.61533,
+            "33": 10.57791,
+            "34": 10.5462,
+            "35": 10.546,
+            "36": 10.53423,
+            "37": 10.50445,
+            "38": 10.50363,
+            "39": 10.47181,
+            "40": 10.44914,
+            "41": 10.42508,
+            "42": 10.41306,
+            "43": 10.39862,
+            "44": 10.36841,
+            "45": 10.37966,
+            "46": 10.33291,
+            "47": 10.32241,
+            "48": 10.28472,
+            "49": 10.28374,
+            "50": 10.27437
         }
     },
     "num-zeros": {
@@ -61,56 +61,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 19058.0,
-            "2": 19206.0,
-            "3": 18972.0,
-            "4": 19416.0,
-            "5": 19009.0,
-            "6": 18538.0,
-            "7": 18981.0,
-            "8": 18448.0,
-            "9": 18864.0,
-            "10": 19655.0,
-            "11": 19064.0,
-            "12": 18696.0,
-            "13": 19292.0,
-            "14": 19140.0,
-            "15": 18806.0,
-            "16": 18590.0,
-            "17": 18993.0,
-            "18": 19173.0,
-            "19": 19321.0,
-            "20": 19057.0,
-            "21": 19086.0,
-            "22": 18997.0,
-            "23": 18891.0,
-            "24": 19267.0,
-            "25": 18711.0,
-            "26": 19139.0,
-            "27": 19114.0,
-            "28": 18818.0,
-            "29": 18371.0,
-            "30": 18304.0,
-            "31": 19016.0,
-            "32": 19184.0,
-            "33": 18481.0,
-            "34": 18592.0,
-            "35": 18848.0,
-            "36": 18346.0,
-            "37": 18564.0,
-            "38": 18516.0,
-            "39": 18959.0,
-            "40": 19194.0,
-            "41": 18945.0,
-            "42": 18455.0,
-            "43": 19053.0,
-            "44": 18809.0,
-            "45": 20372.0,
-            "46": 19563.0,
-            "47": 19974.0,
-            "48": 20047.0,
-            "49": 21674.0,
-            "50": 20259.0
+            "1": 36710.0,
+            "2": 37160.0,
+            "3": 37309.0,
+            "4": 36783.0,
+            "5": 36803.0,
+            "6": 36142.0,
+            "7": 36370.0,
+            "8": 36176.0,
+            "9": 37100.0,
+            "10": 37917.0,
+            "11": 36745.0,
+            "12": 35709.0,
+            "13": 37084.0,
+            "14": 37775.0,
+            "15": 36119.0,
+            "16": 36038.0,
+            "17": 36700.0,
+            "18": 37055.0,
+            "19": 36638.0,
+            "20": 36735.0,
+            "21": 36231.0,
+            "22": 36562.0,
+            "23": 37065.0,
+            "24": 37204.0,
+            "25": 35891.0,
+            "26": 36983.0,
+            "27": 36795.0,
+            "28": 36205.0,
+            "29": 36207.0,
+            "30": 35575.0,
+            "31": 36467.0,
+            "32": 37374.0,
+            "33": 35909.0,
+            "34": 35933.0,
+            "35": 36426.0,
+            "36": 36062.0,
+            "37": 36347.0,
+            "38": 35710.0,
+            "39": 36968.0,
+            "40": 37362.0,
+            "41": 36161.0,
+            "42": 36032.0,
+            "43": 37353.0,
+            "44": 37025.0,
+            "45": 39544.0,
+            "46": 38629.0,
+            "47": 38810.0,
+            "48": 38612.0,
+            "49": 42460.0,
+            "50": 39729.0
         }
     },
     "mem-allocated-bytes": {
@@ -120,54 +120,54 @@
         "values": {
             "1": 1027089408.0,
             "2": 1027091968.0,
-            "3": 1027088384.0,
-            "4": 1027088384.0,
+            "3": 1027087360.0,
+            "4": 1027088896.0,
             "5": 1027090944.0,
-            "6": 1027091968.0,
-            "7": 1027088896.0,
-            "8": 1027092992.0,
-            "9": 1027090944.0,
-            "10": 1027090432.0,
-            "11": 1027090944.0,
-            "12": 1027091456.0,
+            "6": 1027090944.0,
+            "7": 1027088384.0,
+            "8": 1027092480.0,
+            "9": 1027091968.0,
+            "10": 1027091456.0,
+            "11": 1027090432.0,
+            "12": 1027090944.0,
             "13": 1027091968.0,
-            "14": 1027090944.0,
-            "15": 1027091456.0,
-            "16": 1027088384.0,
-            "17": 1027088896.0,
+            "14": 1027094528.0,
+            "15": 1027090432.0,
+            "16": 1027088896.0,
+            "17": 1027089408.0,
             "18": 1027090944.0,
-            "19": 1027089920.0,
-            "20": 1027089920.0,
+            "19": 1027088896.0,
+            "20": 1027090944.0,
             "21": 1027092480.0,
-            "22": 1027088896.0,
-            "23": 1027094528.0,
+            "22": 1027090944.0,
+            "23": 1027093504.0,
             "24": 1027091968.0,
             "25": 1027091456.0,
-            "26": 1027089408.0,
+            "26": 1027090944.0,
             "27": 1027087872.0,
-            "28": 1027091456.0,
+            "28": 1027092992.0,
             "29": 1027090432.0,
-            "30": 1027089920.0,
+            "30": 1027090432.0,
             "31": 1027089408.0,
             "32": 1027094528.0,
             "33": 1027094016.0,
-            "34": 1027092480.0,
-            "35": 1027086848.0,
-            "36": 1027088384.0,
+            "34": 1027093504.0,
+            "35": 1027085824.0,
+            "36": 1027087872.0,
             "37": 1027088896.0,
-            "38": 1027090432.0,
-            "39": 1027090432.0,
+            "38": 1027089920.0,
+            "39": 1027089920.0,
             "40": 1027091456.0,
-            "41": 1027088896.0,
+            "41": 1027089920.0,
             "42": 1027088384.0,
             "43": 1027088896.0,
-            "44": 1027091968.0,
-            "45": 1027091456.0,
-            "46": 1027085824.0,
+            "44": 1027090944.0,
+            "45": 1027091968.0,
+            "46": 1027084800.0,
             "47": 1027089920.0,
             "48": 1027088384.0,
-            "49": 1027086848.0,
-            "50": 1027089920.0
+            "49": 1027086336.0,
+            "50": 1027089408.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -182,49 +182,49 @@
             "5": 3298735616.0,
             "6": 3298735616.0,
             "7": 3298735616.0,
-            "8": 3299022336.0,
-            "9": 3299022336.0,
-            "10": 3299022336.0,
-            "11": 3299138048.0,
-            "12": 3299138048.0,
-            "13": 3299225088.0,
-            "14": 3299394048.0,
-            "15": 3299394048.0,
-            "16": 3299394048.0,
-            "17": 3299394048.0,
-            "18": 3299394048.0,
-            "19": 3299809792.0,
-            "20": 3299809792.0,
-            "21": 3299809792.0,
-            "22": 3299809792.0,
-            "23": 3300119552.0,
-            "24": 3300119552.0,
-            "25": 3300119552.0,
-            "26": 3300119552.0,
-            "27": 3300119552.0,
-            "28": 3300119552.0,
-            "29": 3300119552.0,
-            "30": 3300119552.0,
-            "31": 3300119552.0,
-            "32": 3300119552.0,
-            "33": 3300119552.0,
-            "34": 3300416512.0,
-            "35": 3300416512.0,
-            "36": 3300416512.0,
-            "37": 3300416512.0,
-            "38": 3300416512.0,
-            "39": 3300416512.0,
-            "40": 3300416512.0,
-            "41": 3300416512.0,
-            "42": 3300416512.0,
-            "43": 3300416512.0,
-            "44": 3300416512.0,
-            "45": 3300416512.0,
-            "46": 3300416512.0,
-            "47": 3300416512.0,
-            "48": 3300416512.0,
-            "49": 3300416512.0,
-            "50": 3300416512.0
+            "8": 3299167232.0,
+            "9": 3299167232.0,
+            "10": 3299167232.0,
+            "11": 3299167232.0,
+            "12": 3299167232.0,
+            "13": 3299167232.0,
+            "14": 3300327424.0,
+            "15": 3300327424.0,
+            "16": 3300327424.0,
+            "17": 3300327424.0,
+            "18": 3300327424.0,
+            "19": 3300327424.0,
+            "20": 3300327424.0,
+            "21": 3300327424.0,
+            "22": 3300327424.0,
+            "23": 3300327424.0,
+            "24": 3300327424.0,
+            "25": 3300327424.0,
+            "26": 3300327424.0,
+            "27": 3300327424.0,
+            "28": 3300327424.0,
+            "29": 3300327424.0,
+            "30": 3300327424.0,
+            "31": 3300327424.0,
+            "32": 3301105664.0,
+            "33": 3301105664.0,
+            "34": 3301105664.0,
+            "35": 3301105664.0,
+            "36": 3301105664.0,
+            "37": 3301105664.0,
+            "38": 3301105664.0,
+            "39": 3301105664.0,
+            "40": 3301105664.0,
+            "41": 3301105664.0,
+            "42": 3301105664.0,
+            "43": 3301105664.0,
+            "44": 3301105664.0,
+            "45": 3301105664.0,
+            "46": 3301105664.0,
+            "47": 3301105664.0,
+            "48": 3301105664.0,
+            "49": 3301105664.0,
+            "50": 3301105664.0
         }
     },
     "iteration-time": {
@@ -233,55 +233,55 @@
         "step_interval": 1,
         "values": {
             "1": "nan",
-            "2": 7.23651,
-            "3": 0.24222,
-            "4": 0.22131,
-            "5": 0.2271,
-            "6": 0.22305,
-            "7": 0.21362,
-            "8": 0.21345,
-            "9": 0.21177,
-            "10": 0.20554,
-            "11": 0.21683,
-            "12": 0.21959,
-            "13": 0.23214,
-            "14": 0.21046,
-            "15": 0.2093,
-            "16": 0.20781,
-            "17": 0.21094,
-            "18": 0.20855,
-            "19": 0.20679,
-            "20": 0.20604,
-            "21": 0.21437,
-            "22": 0.20598,
-            "23": 0.20879,
-            "24": 0.20414,
-            "25": 0.20266,
-            "26": 0.20454,
-            "27": 0.20634,
-            "28": 0.20309,
-            "29": 0.20238,
-            "30": 0.20203,
-            "31": 0.20437,
-            "32": 0.20127,
-            "33": 0.20216,
-            "34": 0.20283,
-            "35": 0.20336,
-            "36": 0.20293,
-            "37": 0.20654,
-            "38": 0.20237,
-            "39": 0.20306,
-            "40": 0.20384,
-            "41": 0.20686,
-            "42": 0.20485,
-            "43": 0.20433,
-            "44": 0.20288,
-            "45": 0.20816,
-            "46": 0.20343,
-            "47": 0.2071,
-            "48": 0.20408,
-            "49": 0.2097,
-            "50": 0.20466
+            "2": 7.58728,
+            "3": 0.31009,
+            "4": 0.30574,
+            "5": 0.30849,
+            "6": 0.30568,
+            "7": 0.304,
+            "8": 0.28938,
+            "9": 0.29273,
+            "10": 0.28679,
+            "11": 0.29223,
+            "12": 0.29094,
+            "13": 0.28669,
+            "14": 0.28785,
+            "15": 0.28633,
+            "16": 0.2907,
+            "17": 0.28571,
+            "18": 0.28649,
+            "19": 0.28924,
+            "20": 0.28411,
+            "21": 0.29161,
+            "22": 0.2845,
+            "23": 0.29217,
+            "24": 0.28492,
+            "25": 0.28577,
+            "26": 0.28786,
+            "27": 0.2893,
+            "28": 0.29073,
+            "29": 0.28506,
+            "30": 0.28519,
+            "31": 0.28397,
+            "32": 0.2904,
+            "33": 0.29082,
+            "34": 0.28599,
+            "35": 0.28963,
+            "36": 0.28976,
+            "37": 0.28557,
+            "38": 0.29164,
+            "39": 0.29238,
+            "40": 0.28427,
+            "41": 0.28783,
+            "42": 0.28875,
+            "43": 0.28478,
+            "44": 0.28439,
+            "45": 0.29078,
+            "46": 0.28385,
+            "47": 0.28272,
+            "48": 0.28312,
+            "49": 0.29468,
+            "50": 0.28837
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4/golden_values_dev_dgx_h100.json
index 0e601bc661a..4a25865ef60 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4/golden_values_dev_dgx_h100.json
@@ -6,54 +6,54 @@
         "values": {
             "1": 10.90768,
             "2": 10.90727,
-            "3": 10.9168,
-            "4": 10.90829,
-            "5": 10.91479,
-            "6": 10.89485,
-            "7": 10.90737,
-            "8": 10.90882,
-            "9": 10.90915,
-            "10": 10.91068,
-            "11": 10.90062,
-            "12": 10.89878,
-            "13": 10.88648,
-            "14": 10.88227,
-            "15": 10.87325,
-            "16": 10.85233,
-            "17": 10.85658,
-            "18": 10.84766,
-            "19": 10.85521,
-            "20": 10.77651,
-            "21": 10.76089,
-            "22": 10.75994,
-            "23": 10.7431,
-            "24": 10.70783,
-            "25": 10.70972,
-            "26": 10.69229,
-            "27": 10.66881,
-            "28": 10.60562,
-            "29": 10.57195,
-            "30": 10.54188,
-            "31": 10.53201,
-            "32": 10.51676,
-            "33": 10.48119,
-            "34": 10.44953,
-            "35": 10.44578,
-            "36": 10.42078,
-            "37": 10.40052,
-            "38": 10.4042,
-            "39": 10.36985,
-            "40": 10.35254,
-            "41": 10.33024,
-            "42": 10.31102,
-            "43": 10.29815,
-            "44": 10.27127,
-            "45": 10.28382,
-            "46": 10.24095,
-            "47": 10.23461,
-            "48": 10.19191,
-            "49": 10.19522,
-            "50": 10.19066
+            "3": 10.91677,
+            "4": 10.90838,
+            "5": 10.91536,
+            "6": 10.89522,
+            "7": 10.90774,
+            "8": 10.90815,
+            "9": 10.90916,
+            "10": 10.91026,
+            "11": 10.9013,
+            "12": 10.89896,
+            "13": 10.88718,
+            "14": 10.88255,
+            "15": 10.87321,
+            "16": 10.85207,
+            "17": 10.85744,
+            "18": 10.84755,
+            "19": 10.85504,
+            "20": 10.77576,
+            "21": 10.76177,
+            "22": 10.75967,
+            "23": 10.74285,
+            "24": 10.70808,
+            "25": 10.70994,
+            "26": 10.6922,
+            "27": 10.66835,
+            "28": 10.60509,
+            "29": 10.57149,
+            "30": 10.54122,
+            "31": 10.53124,
+            "32": 10.51525,
+            "33": 10.48026,
+            "34": 10.44887,
+            "35": 10.44515,
+            "36": 10.41928,
+            "37": 10.39906,
+            "38": 10.40257,
+            "39": 10.3684,
+            "40": 10.35058,
+            "41": 10.32832,
+            "42": 10.30907,
+            "43": 10.29586,
+            "44": 10.26915,
+            "45": 10.28138,
+            "46": 10.23855,
+            "47": 10.23169,
+            "48": 10.18904,
+            "49": 10.19272,
+            "50": 10.18773
         }
     },
     "num-zeros": {
@@ -61,56 +61,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 16760.0,
-            "2": 16448.0,
-            "3": 16457.0,
-            "4": 16370.0,
-            "5": 16128.0,
-            "6": 15986.0,
-            "7": 16871.0,
-            "8": 16013.0,
-            "9": 16494.0,
-            "10": 16663.0,
-            "11": 16350.0,
-            "12": 15746.0,
-            "13": 16650.0,
-            "14": 16501.0,
-            "15": 16034.0,
-            "16": 16079.0,
-            "17": 16562.0,
-            "18": 16415.0,
-            "19": 16924.0,
-            "20": 16413.0,
-            "21": 16333.0,
-            "22": 16350.0,
-            "23": 16127.0,
-            "24": 16475.0,
-            "25": 15776.0,
-            "26": 16841.0,
-            "27": 16638.0,
-            "28": 16195.0,
-            "29": 16566.0,
-            "30": 16223.0,
-            "31": 16976.0,
-            "32": 17083.0,
-            "33": 17181.0,
-            "34": 17125.0,
-            "35": 18157.0,
-            "36": 17429.0,
-            "37": 17735.0,
-            "38": 17898.0,
-            "39": 18505.0,
-            "40": 19060.0,
-            "41": 18163.0,
-            "42": 18145.0,
-            "43": 18663.0,
-            "44": 18601.0,
-            "45": 20633.0,
-            "46": 20049.0,
-            "47": 19866.0,
-            "48": 20108.0,
-            "49": 21960.0,
-            "50": 20138.0
+            "1": 32335.0,
+            "2": 31841.0,
+            "3": 31775.0,
+            "4": 32249.0,
+            "5": 31694.0,
+            "6": 31106.0,
+            "7": 32512.0,
+            "8": 30765.0,
+            "9": 32296.0,
+            "10": 32743.0,
+            "11": 31984.0,
+            "12": 31014.0,
+            "13": 32552.0,
+            "14": 32744.0,
+            "15": 31135.0,
+            "16": 30964.0,
+            "17": 32132.0,
+            "18": 31974.0,
+            "19": 32648.0,
+            "20": 32452.0,
+            "21": 31857.0,
+            "22": 31848.0,
+            "23": 32281.0,
+            "24": 32970.0,
+            "25": 31299.0,
+            "26": 32672.0,
+            "27": 32851.0,
+            "28": 32568.0,
+            "29": 32585.0,
+            "30": 32739.0,
+            "31": 33858.0,
+            "32": 34616.0,
+            "33": 34212.0,
+            "34": 33982.0,
+            "35": 35475.0,
+            "36": 34732.0,
+            "37": 35645.0,
+            "38": 34867.0,
+            "39": 36578.0,
+            "40": 37563.0,
+            "41": 36481.0,
+            "42": 35997.0,
+            "43": 37571.0,
+            "44": 37041.0,
+            "45": 41119.0,
+            "46": 39345.0,
+            "47": 38921.0,
+            "48": 39898.0,
+            "49": 43851.0,
+            "50": 39868.0
         }
     },
     "mem-allocated-bytes": {
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 1558401536.0,
-            "2": 1558716416.0,
-            "3": 1559734784.0,
-            "4": 1558522880.0,
-            "5": 1558433280.0,
-            "6": 1558899712.0,
-            "7": 1559085568.0,
-            "8": 1559302144.0,
-            "9": 1558675968.0,
-            "10": 1559221248.0,
-            "11": 1558368768.0,
-            "12": 1558981632.0,
-            "13": 1559165440.0,
-            "14": 1558203392.0,
-            "15": 1558620672.0,
-            "16": 1558203392.0,
-            "17": 1558254080.0,
-            "18": 1559229440.0,
-            "19": 1558510080.0,
-            "20": 1558384640.0,
-            "21": 1559245312.0,
-            "22": 1558439936.0,
-            "23": 1558500864.0,
-            "24": 1558304768.0,
-            "25": 1558666752.0,
-            "26": 1558304768.0,
-            "27": 1558846976.0,
-            "28": 1558304768.0,
-            "29": 1558355456.0,
-            "30": 1559102464.0,
-            "31": 1559019008.0,
-            "32": 1559699456.0,
-            "33": 1559100928.0,
-            "34": 1558921216.0,
-            "35": 1558616576.0,
-            "36": 1558406144.0,
-            "37": 1559692800.0,
-            "38": 1558406144.0,
-            "39": 1558585856.0,
-            "40": 1559957504.0,
-            "41": 1559263744.0,
-            "42": 1558507520.0,
-            "43": 1558776320.0,
-            "44": 1559255040.0,
-            "45": 1558456832.0,
-            "46": 1558831104.0,
-            "47": 1558648320.0,
-            "48": 1558507520.0,
-            "49": 1559791104.0,
-            "50": 1558966272.0
+            "1": 1558936064.0,
+            "2": 1559142400.0,
+            "3": 1558418944.0,
+            "4": 1558473728.0,
+            "5": 1558619136.0,
+            "6": 1558418944.0,
+            "7": 1558368256.0,
+            "8": 1558418944.0,
+            "9": 1559001088.0,
+            "10": 1558676992.0,
+            "11": 1559100416.0,
+            "12": 1559344640.0,
+            "13": 1558738944.0,
+            "14": 1558446592.0,
+            "15": 1558907904.0,
+            "16": 1559096832.0,
+            "17": 1558469632.0,
+            "18": 1558520320.0,
+            "19": 1559102464.0,
+            "20": 1559440896.0,
+            "21": 1558653952.0,
+            "22": 1558520320.0,
+            "23": 1558571008.0,
+            "24": 1559296512.0,
+            "25": 1559556096.0,
+            "26": 1558520320.0,
+            "27": 1558571008.0,
+            "28": 1558621696.0,
+            "29": 1558571008.0,
+            "30": 1560001024.0,
+            "31": 1558796288.0,
+            "32": 1558621696.0,
+            "33": 1558571008.0,
+            "34": 1559272960.0,
+            "35": 1559274496.0,
+            "36": 1558780416.0,
+            "37": 1558672384.0,
+            "38": 1558621696.0,
+            "39": 1559065600.0,
+            "40": 1559026176.0,
+            "41": 1559201792.0,
+            "42": 1558960640.0,
+            "43": 1558672384.0,
+            "44": 1559427584.0,
+            "45": 1558672384.0,
+            "46": 1558723072.0,
+            "47": 1558773760.0,
+            "48": 1558723072.0,
+            "49": 1559203840.0,
+            "50": 1558723072.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 3480202240.0,
-            "2": 4039383552.0,
-            "3": 4045734912.0,
-            "4": 4052123136.0,
-            "5": 4052123136.0,
-            "6": 4053458432.0,
-            "7": 4054095872.0,
-            "8": 4058186240.0,
-            "9": 4059530240.0,
-            "10": 4061010432.0,
-            "11": 4061010432.0,
-            "12": 4061010432.0,
-            "13": 4061010432.0,
-            "14": 4061010432.0,
-            "15": 4061010432.0,
-            "16": 4061010432.0,
-            "17": 4061010432.0,
-            "18": 4061010432.0,
-            "19": 4061010432.0,
-            "20": 4061010432.0,
-            "21": 4061010432.0,
-            "22": 4061010432.0,
-            "23": 4061010432.0,
-            "24": 4061010432.0,
-            "25": 4061010432.0,
-            "26": 4061010432.0,
-            "27": 4061010432.0,
-            "28": 4061010432.0,
-            "29": 4061010432.0,
-            "30": 4061010432.0,
-            "31": 4061010432.0,
-            "32": 4061010432.0,
-            "33": 4061010432.0,
-            "34": 4061010432.0,
-            "35": 4061010432.0,
-            "36": 4061010432.0,
-            "37": 4061010432.0,
-            "38": 4061010432.0,
-            "39": 4061010432.0,
-            "40": 4061010432.0,
-            "41": 4061010432.0,
-            "42": 4061010432.0,
-            "43": 4061010432.0,
-            "44": 4061010432.0,
-            "45": 4061010432.0,
-            "46": 4061010432.0,
-            "47": 4061010432.0,
-            "48": 4061010432.0,
-            "49": 4061010432.0,
-            "50": 4061010432.0
+            "1": 3480480768.0,
+            "2": 4041655808.0,
+            "3": 4048918528.0,
+            "4": 4048918528.0,
+            "5": 4048918528.0,
+            "6": 4053445632.0,
+            "7": 4053445632.0,
+            "8": 4053990400.0,
+            "9": 4056294912.0,
+            "10": 4058575872.0,
+            "11": 4058575872.0,
+            "12": 4058575872.0,
+            "13": 4058575872.0,
+            "14": 4058575872.0,
+            "15": 4058575872.0,
+            "16": 4058575872.0,
+            "17": 4058575872.0,
+            "18": 4058575872.0,
+            "19": 4058575872.0,
+            "20": 4058575872.0,
+            "21": 4058575872.0,
+            "22": 4058575872.0,
+            "23": 4058575872.0,
+            "24": 4058575872.0,
+            "25": 4058575872.0,
+            "26": 4058575872.0,
+            "27": 4058575872.0,
+            "28": 4058575872.0,
+            "29": 4058575872.0,
+            "30": 4058575872.0,
+            "31": 4058575872.0,
+            "32": 4058575872.0,
+            "33": 4058575872.0,
+            "34": 4058575872.0,
+            "35": 4058575872.0,
+            "36": 4058575872.0,
+            "37": 4058575872.0,
+            "38": 4058575872.0,
+            "39": 4058575872.0,
+            "40": 4058575872.0,
+            "41": 4058575872.0,
+            "42": 4058575872.0,
+            "43": 4058575872.0,
+            "44": 4058575872.0,
+            "45": 4058575872.0,
+            "46": 4058575872.0,
+            "47": 4058575872.0,
+            "48": 4058575872.0,
+            "49": 4058575872.0,
+            "50": 4058575872.0
         }
     },
     "iteration-time": {
@@ -233,55 +233,55 @@
         "step_interval": 1,
         "values": {
             "1": "nan",
-            "2": 11.67698,
-            "3": 0.39155,
-            "4": 0.31689,
-            "5": 0.294,
-            "6": 0.31141,
-            "7": 0.29128,
-            "8": 0.28675,
-            "9": 0.28172,
-            "10": 0.27347,
-            "11": 0.26642,
-            "12": 0.27974,
-            "13": 0.27332,
-            "14": 0.36787,
-            "15": 0.26967,
-            "16": 0.26447,
-            "17": 0.26033,
-            "18": 0.2662,
-            "19": 0.28876,
-            "20": 0.27381,
-            "21": 0.26827,
-            "22": 0.28438,
-            "23": 0.27253,
-            "24": 0.27903,
-            "25": 0.27474,
-            "26": 0.28579,
-            "27": 0.28072,
-            "28": 0.2816,
-            "29": 0.32547,
-            "30": 0.27477,
-            "31": 0.27095,
-            "32": 0.27719,
-            "33": 0.26688,
-            "34": 0.27227,
-            "35": 0.2837,
-            "36": 0.27295,
-            "37": 0.26868,
-            "38": 0.26936,
-            "39": 0.27392,
-            "40": 0.2649,
-            "41": 0.27268,
-            "42": 0.26786,
-            "43": 0.26041,
-            "44": 0.2684,
-            "45": 0.26786,
-            "46": 0.26105,
-            "47": 0.26729,
-            "48": 0.26353,
-            "49": 0.27083,
-            "50": 0.26181
+            "2": 9.11114,
+            "3": 0.46745,
+            "4": 0.6067,
+            "5": 0.59171,
+            "6": 0.4369,
+            "7": 0.41515,
+            "8": 0.45255,
+            "9": 0.40714,
+            "10": 0.40441,
+            "11": 0.39176,
+            "12": 0.41251,
+            "13": 0.38216,
+            "14": 0.39025,
+            "15": 0.4058,
+            "16": 0.38848,
+            "17": 0.35936,
+            "18": 0.36512,
+            "19": 0.38989,
+            "20": 0.39482,
+            "21": 0.40064,
+            "22": 0.40323,
+            "23": 0.38135,
+            "24": 0.37421,
+            "25": 0.36701,
+            "26": 0.358,
+            "27": 0.35679,
+            "28": 0.37525,
+            "29": 0.37141,
+            "30": 0.36686,
+            "31": 0.37147,
+            "32": 0.3724,
+            "33": 0.37369,
+            "34": 0.36853,
+            "35": 0.37221,
+            "36": 0.36497,
+            "37": 0.36365,
+            "38": 0.36316,
+            "39": 0.36521,
+            "40": 0.3622,
+            "41": 0.36129,
+            "42": 0.37003,
+            "43": 0.36053,
+            "44": 0.36244,
+            "45": 0.36485,
+            "46": 0.35324,
+            "47": 0.36076,
+            "48": 0.3573,
+            "49": 0.36368,
+            "50": 0.35292
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_scoped_cudagraph/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_scoped_cudagraph/golden_values_dev_dgx_h100.json
index 13107c98731..ba5070c3f7d 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_scoped_cudagraph/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_scoped_cudagraph/golden_values_dev_dgx_h100.json
@@ -6,104 +6,104 @@
         "values": {
             "1": 10.95659,
             "2": 10.95273,
-            "3": 10.97293,
-            "4": 10.95528,
-            "5": 10.95882,
-            "6": 10.96034,
-            "7": 10.94954,
-            "8": 10.95612,
-            "9": 10.96238,
-            "10": 10.95507,
-            "11": 10.94969,
-            "12": 10.94911,
-            "13": 10.94317,
-            "14": 10.93962,
-            "15": 10.913,
-            "16": 10.89317,
-            "17": 10.89415,
-            "18": 10.8839,
-            "19": 10.88757,
-            "20": 10.81652,
-            "21": 10.77937,
-            "22": 10.77934,
-            "23": 10.75133,
-            "24": 10.73697,
-            "25": 10.70906,
-            "26": 10.70254,
-            "27": 10.66656,
-            "28": 10.58983,
-            "29": 10.57516,
-            "30": 10.5394,
-            "31": 10.54957,
-            "32": 10.49421,
-            "33": 10.45586,
-            "34": 10.45429,
-            "35": 10.41583,
-            "36": 10.40492,
-            "37": 10.37411,
-            "38": 10.38053,
-            "39": 10.33652,
-            "40": 10.33756,
-            "41": 10.29221,
-            "42": 10.24553,
-            "43": 10.23799,
-            "44": 10.20506,
-            "45": 10.23982,
-            "46": 10.1679,
-            "47": 10.16495,
-            "48": 10.11261,
-            "49": 10.11907,
-            "50": 10.09877,
-            "51": 10.11395,
-            "52": 10.07035,
-            "53": 10.03508,
-            "54": 10.01882,
-            "55": 9.97026,
-            "56": 10.01777,
-            "57": 10.00232,
-            "58": 10.00899,
-            "59": 9.93742,
-            "60": 9.97734,
-            "61": 9.92095,
-            "62": 9.86099,
-            "63": 9.97248,
-            "64": 9.91755,
-            "65": 9.8803,
-            "66": 9.90574,
-            "67": 9.88904,
-            "68": 9.81432,
-            "69": 9.83828,
-            "70": 9.82923,
-            "71": 9.85446,
-            "72": 9.84568,
-            "73": 9.79826,
-            "74": 9.79391,
-            "75": 9.74221,
-            "76": 9.8121,
-            "77": 9.8087,
-            "78": 9.76164,
-            "79": 9.73731,
-            "80": 9.76093,
-            "81": 9.80125,
-            "82": 9.72437,
-            "83": 9.66548,
-            "84": 9.62666,
-            "85": 9.59103,
+            "3": 10.97332,
+            "4": 10.95546,
+            "5": 10.95888,
+            "6": 10.96097,
+            "7": 10.94955,
+            "8": 10.95621,
+            "9": 10.96206,
+            "10": 10.95524,
+            "11": 10.94894,
+            "12": 10.94925,
+            "13": 10.9431,
+            "14": 10.93961,
+            "15": 10.91324,
+            "16": 10.89338,
+            "17": 10.89379,
+            "18": 10.88434,
+            "19": 10.88764,
+            "20": 10.81674,
+            "21": 10.77913,
+            "22": 10.77926,
+            "23": 10.75105,
+            "24": 10.73706,
+            "25": 10.70916,
+            "26": 10.70265,
+            "27": 10.66696,
+            "28": 10.59021,
+            "29": 10.5754,
+            "30": 10.53952,
+            "31": 10.54926,
+            "32": 10.49446,
+            "33": 10.45604,
+            "34": 10.45455,
+            "35": 10.41632,
+            "36": 10.40498,
+            "37": 10.37442,
+            "38": 10.38102,
+            "39": 10.33659,
+            "40": 10.3377,
+            "41": 10.29213,
+            "42": 10.24631,
+            "43": 10.23808,
+            "44": 10.20535,
+            "45": 10.24008,
+            "46": 10.16857,
+            "47": 10.1652,
+            "48": 10.11327,
+            "49": 10.11958,
+            "50": 10.09962,
+            "51": 10.11422,
+            "52": 10.07093,
+            "53": 10.03567,
+            "54": 10.01915,
+            "55": 9.97095,
+            "56": 10.01829,
+            "57": 10.00271,
+            "58": 10.0092,
+            "59": 9.93771,
+            "60": 9.97758,
+            "61": 9.92087,
+            "62": 9.86126,
+            "63": 9.97292,
+            "64": 9.91782,
+            "65": 9.88058,
+            "66": 9.90562,
+            "67": 9.8895,
+            "68": 9.81454,
+            "69": 9.83864,
+            "70": 9.82955,
+            "71": 9.855,
+            "72": 9.84555,
+            "73": 9.79876,
+            "74": 9.79422,
+            "75": 9.74264,
+            "76": 9.81239,
+            "77": 9.80893,
+            "78": 9.76169,
+            "79": 9.7373,
+            "80": 9.76111,
+            "81": 9.80122,
+            "82": 9.72383,
+            "83": 9.66576,
+            "84": 9.62658,
+            "85": 9.59095,
             "86": 9.7377,
-            "87": 9.72698,
-            "88": 9.73452,
-            "89": 9.63568,
-            "90": 9.62951,
-            "91": 9.6743,
-            "92": 9.63781,
-            "93": 9.53741,
-            "94": 9.65643,
-            "95": 9.62904,
-            "96": 9.63411,
-            "97": 9.54632,
-            "98": 9.59572,
-            "99": 9.64181,
-            "100": 9.53588
+            "87": 9.72693,
+            "88": 9.7346,
+            "89": 9.63566,
+            "90": 9.62949,
+            "91": 9.67372,
+            "92": 9.63811,
+            "93": 9.53749,
+            "94": 9.65631,
+            "95": 9.62893,
+            "96": 9.63437,
+            "97": 9.5463,
+            "98": 9.59551,
+            "99": 9.64184,
+            "100": 9.53486
         }
     },
     "num-zeros": {
@@ -111,106 +111,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 22985512.0,
-            "2": 22866856.0,
-            "3": 22718796.0,
-            "4": 22793112.0,
-            "5": 22800332.0,
-            "6": 22758732.0,
-            "7": 22889360.0,
-            "8": 22616952.0,
-            "9": 22770544.0,
-            "10": 22482356.0,
-            "11": 22768012.0,
-            "12": 22646636.0,
-            "13": 23376168.0,
-            "14": 23020836.0,
-            "15": 22728468.0,
-            "16": 22844216.0,
-            "17": 22956308.0,
-            "18": 23025414.0,
-            "19": 23121784.0,
-            "20": 22737782.0,
-            "21": 22939100.0,
-            "22": 22975384.0,
-            "23": 22636422.0,
-            "24": 22885688.0,
-            "25": 22646604.0,
-            "26": 23036340.0,
-            "27": 22820192.0,
-            "28": 23031660.0,
-            "29": 23007710.0,
-            "30": 22978120.0,
-            "31": 22931544.0,
-            "32": 22671892.0,
-            "33": 22753852.0,
-            "34": 23115320.0,
-            "35": 22764186.0,
-            "36": 22708208.0,
-            "37": 23140396.0,
-            "38": 22990976.0,
-            "39": 23017508.0,
-            "40": 22766752.0,
-            "41": 23101556.0,
-            "42": 22700170.0,
-            "43": 23019194.0,
-            "44": 22716630.0,
-            "45": 22868256.0,
-            "46": 22743362.0,
-            "47": 22871888.0,
-            "48": 22852526.0,
-            "49": 22908212.0,
-            "50": 22654480.0,
-            "51": 22713796.0,
-            "52": 22833128.0,
-            "53": 22987656.0,
-            "54": 22807024.0,
-            "55": 22950740.0,
-            "56": 22669944.0,
-            "57": 23234316.0,
-            "58": 22699600.0,
-            "59": 22862426.0,
-            "60": 23046704.0,
-            "61": 22688294.0,
-            "62": 22743112.0,
-            "63": 22643864.0,
-            "64": 23031820.0,
-            "65": 23243684.0,
-            "66": 22705280.0,
-            "67": 22986366.0,
-            "68": 22949460.0,
-            "69": 23193560.0,
-            "70": 22838360.0,
-            "71": 22750350.0,
-            "72": 23155256.0,
-            "73": 23168624.0,
-            "74": 22970414.0,
-            "75": 22903392.0,
-            "76": 22714040.0,
-            "77": 23011804.0,
-            "78": 23010392.0,
-            "79": 22845544.0,
-            "80": 22958276.0,
-            "81": 22850234.0,
-            "82": 22746280.0,
-            "83": 22741604.0,
-            "84": 23135624.0,
-            "85": 22945892.0,
-            "86": 23108160.0,
-            "87": 22369104.0,
-            "88": 22565104.0,
-            "89": 22738008.0,
-            "90": 22782056.0,
-            "91": 22941128.0,
-            "92": 22680628.0,
-            "93": 22647880.0,
-            "94": 23168946.0,
-            "95": 22702252.0,
-            "96": 22867296.0,
-            "97": 22852594.0,
-            "98": 22897226.0,
-            "99": 22645712.0,
-            "100": 23029136.0
+            "1": 22986336.0,
+            "2": 22867800.0,
+            "3": 22719788.0,
+            "4": 22794016.0,
+            "5": 22801176.0,
+            "6": 22759778.0,
+            "7": 22890350.0,
+            "8": 22617894.0,
+            "9": 22771488.0,
+            "10": 22483272.0,
+            "11": 22768864.0,
+            "12": 22647492.0,
+            "13": 23377228.0,
+            "14": 23021960.0,
+            "15": 22729568.0,
+            "16": 22845228.0,
+            "17": 22957312.0,
+            "18": 23026388.0,
+            "19": 23122824.0,
+            "20": 22738710.0,
+            "21": 22939894.0,
+            "22": 22976252.0,
+            "23": 22637356.0,
+            "24": 22886552.0,
+            "25": 22647580.0,
+            "26": 23037294.0,
+            "27": 22821206.0,
+            "28": 23032666.0,
+            "29": 23008740.0,
+            "30": 22979150.0,
+            "31": 22932438.0,
+            "32": 22672848.0,
+            "33": 22754864.0,
+            "34": 23116486.0,
+            "35": 22766216.0,
+            "36": 22709636.0,
+            "37": 23141720.0,
+            "38": 22992540.0,
+            "39": 23019594.0,
+            "40": 22767810.0,
+            "41": 23106740.0,
+            "42": 23749792.0,
+            "43": 24068872.0,
+            "44": 22717204.0,
+            "45": 22869462.0,
+            "46": 23792952.0,
+            "47": 22873204.0,
+            "48": 22853536.0,
+            "49": 23957960.0,
+            "50": 23704312.0,
+            "51": 23763734.0,
+            "52": 23883638.0,
+            "53": 24037404.0,
+            "54": 23856864.0,
+            "55": 24001098.0,
+            "56": 23720376.0,
+            "57": 24284706.0,
+            "58": 23749464.0,
+            "59": 23913848.0,
+            "60": 24098614.0,
+            "61": 23739552.0,
+            "62": 22746136.0,
+            "63": 24743096.0,
+            "64": 24081656.0,
+            "65": 24297808.0,
+            "66": 23760908.0,
+            "67": 24037080.0,
+            "68": 25048560.0,
+            "69": 24243456.0,
+            "70": 23892164.0,
+            "71": 24848832.0,
+            "72": 24205880.0,
+            "73": 24221284.0,
+            "74": 25068664.0,
+            "75": 23957416.0,
+            "76": 23764968.0,
+            "77": 25110192.0,
+            "78": 24061600.0,
+            "79": 23895220.0,
+            "80": 24008140.0,
+            "81": 23905368.0,
+            "82": 23796512.0,
+            "83": 22742706.0,
+            "84": 24186848.0,
+            "85": 23995992.0,
+            "86": 24180432.0,
+            "87": 23419392.0,
+            "88": 23615546.0,
+            "89": 23787832.0,
+            "90": 23832072.0,
+            "91": 23991824.0,
+            "92": 23731324.0,
+            "93": 22649044.0,
+            "94": 24219140.0,
+            "95": 22705788.0,
+            "96": 23918516.0,
+            "97": 23902516.0,
+            "98": 22898480.0,
+            "99": 23695706.0,
+            "100": 24079564.0
         }
     },
     "mem-allocated-bytes": {
@@ -218,106 +218,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 804504064.0,
-            "2": 766036992.0,
-            "3": 844984320.0,
-            "4": 801470464.0,
-            "5": 808671232.0,
-            "6": 808818688.0,
-            "7": 804558848.0,
-            "8": 801470464.0,
-            "9": 809105408.0,
-            "10": 807352320.0,
+            "1": 815727104.0,
+            "2": 787672064.0,
+            "3": 804506624.0,
+            "4": 807958528.0,
+            "5": 808808448.0,
+            "6": 808817664.0,
+            "7": 808515584.0,
+            "8": 807352320.0,
+            "9": 807352320.0,
+            "10": 804616192.0,
             "11": 801470464.0,
-            "12": 809530368.0,
-            "13": 808635392.0,
-            "14": 801470464.0,
-            "15": 808386560.0,
-            "16": 804108288.0,
-            "17": 809097216.0,
+            "12": 801470464.0,
+            "13": 807958528.0,
+            "14": 809416704.0,
+            "15": 808757248.0,
+            "16": 808307712.0,
+            "17": 804108288.0,
             "18": 801470464.0,
             "19": 801470464.0,
             "20": 801470464.0,
-            "21": 808868864.0,
-            "22": 808409088.0,
-            "23": 808865792.0,
-            "24": 801470464.0,
+            "21": 809096192.0,
+            "22": 808957952.0,
+            "23": 804558848.0,
+            "24": 808546304.0,
             "25": 801470464.0,
-            "26": 807958528.0,
-            "27": 808450048.0,
-            "28": 808056832.0,
-            "29": 801470464.0,
-            "30": 808410112.0,
-            "31": 808336384.0,
-            "32": 804922368.0,
-            "33": 801470464.0,
-            "34": 808819712.0,
-            "35": 809051136.0,
-            "36": 801470464.0,
-            "37": 808127488.0,
-            "38": 808793088.0,
-            "39": 807352320.0,
+            "26": 801470464.0,
+            "27": 808245248.0,
+            "28": 801470464.0,
+            "29": 804977664.0,
+            "30": 801470464.0,
+            "31": 801470464.0,
+            "32": 801470464.0,
+            "33": 809012224.0,
+            "34": 807958528.0,
+            "35": 807958528.0,
+            "36": 808537088.0,
+            "37": 804976640.0,
+            "38": 804354048.0,
+            "39": 804157440.0,
             "40": 801470464.0,
             "41": 801470464.0,
-            "42": 808155136.0,
-            "43": 808081408.0,
-            "44": 801470464.0,
-            "45": 808409088.0,
-            "46": 808429568.0,
-            "47": 807985152.0,
-            "48": 804370432.0,
-            "49": 801470464.0,
-            "50": 808466432.0,
+            "42": 801470464.0,
+            "43": 801470464.0,
+            "44": 808138752.0,
+            "45": 808515584.0,
+            "46": 808938496.0,
+            "47": 807837696.0,
+            "48": 807352320.0,
+            "49": 807676928.0,
+            "50": 808089600.0,
             "51": 801470464.0,
             "52": 801470464.0,
-            "53": 808466432.0,
-            "54": 808409088.0,
-            "55": 807352320.0,
-            "56": 804558848.0,
+            "53": 801470464.0,
+            "54": 801470464.0,
+            "55": 801470464.0,
+            "56": 808879104.0,
             "57": 801470464.0,
-            "58": 808627200.0,
-            "59": 808847360.0,
-            "60": 808333312.0,
-            "61": 804354048.0,
-            "62": 801470464.0,
-            "63": 808409088.0,
-            "64": 808681472.0,
-            "65": 808024064.0,
-            "66": 808409088.0,
-            "67": 805165056.0,
+            "58": 807958528.0,
+            "59": 807352320.0,
+            "60": 804780032.0,
+            "61": 804108288.0,
+            "62": 804108288.0,
+            "63": 801470464.0,
+            "64": 804558848.0,
+            "65": 804108288.0,
+            "66": 801470464.0,
+            "67": 801470464.0,
             "68": 801470464.0,
-            "69": 808627200.0,
-            "70": 808187904.0,
-            "71": 808409088.0,
-            "72": 807802880.0,
+            "69": 801470464.0,
+            "70": 801470464.0,
+            "71": 801470464.0,
+            "72": 801470464.0,
             "73": 801470464.0,
             "74": 801470464.0,
-            "75": 808377344.0,
-            "76": 809356288.0,
-            "77": 804843520.0,
+            "75": 801470464.0,
+            "76": 801470464.0,
+            "77": 801470464.0,
             "78": 801470464.0,
-            "79": 808267776.0,
-            "80": 805473280.0,
+            "79": 801470464.0,
+            "80": 801470464.0,
             "81": 801470464.0,
-            "82": 808417280.0,
-            "83": 808309760.0,
-            "84": 801470464.0,
-            "85": 808409088.0,
-            "86": 808409088.0,
+            "82": 801470464.0,
+            "83": 808409088.0,
+            "84": 808409088.0,
+            "85": 801470464.0,
+            "86": 801470464.0,
             "87": 801470464.0,
-            "88": 807958528.0,
-            "89": 808089600.0,
-            "90": 801470464.0,
-            "91": 808293376.0,
-            "92": 807958528.0,
-            "93": 807663616.0,
-            "94": 801470464.0,
-            "95": 808237056.0,
-            "96": 809064448.0,
-            "97": 801470464.0,
-            "98": 801470464.0,
-            "99": 808598528.0,
-            "100": 808688640.0
+            "88": 809389056.0,
+            "89": 808253440.0,
+            "90": 807802880.0,
+            "91": 807418880.0,
+            "92": 807352320.0,
+            "93": 807802880.0,
+            "94": 807802880.0,
+            "95": 805222400.0,
+            "96": 804149248.0,
+            "97": 804108288.0,
+            "98": 804108288.0,
+            "99": 804558848.0,
+            "100": 801470464.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -325,106 +325,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 978379776.0,
-            "2": 1181844992.0,
-            "3": 1184461312.0,
-            "4": 1184461312.0,
-            "5": 1184461312.0,
-            "6": 1184461312.0,
-            "7": 1184461312.0,
-            "8": 1184461312.0,
-            "9": 1184461312.0,
-            "10": 1184461312.0,
-            "11": 1184461312.0,
-            "12": 1184461312.0,
-            "13": 1184461312.0,
-            "14": 1184461312.0,
-            "15": 1184461312.0,
-            "16": 1184461312.0,
-            "17": 1184461312.0,
-            "18": 1184461312.0,
-            "19": 1184461312.0,
-            "20": 1184461312.0,
-            "21": 1184461312.0,
-            "22": 1184461312.0,
-            "23": 1184461312.0,
-            "24": 1184461312.0,
-            "25": 1184461312.0,
-            "26": 1184461312.0,
-            "27": 1184461312.0,
-            "28": 1184461312.0,
-            "29": 1184461312.0,
-            "30": 1184461312.0,
-            "31": 1184461312.0,
-            "32": 1184461312.0,
-            "33": 1184461312.0,
-            "34": 1184461312.0,
-            "35": 1184461312.0,
-            "36": 1184461312.0,
-            "37": 1184461312.0,
-            "38": 1184461312.0,
-            "39": 1184461312.0,
-            "40": 1184461312.0,
-            "41": 1184461312.0,
-            "42": 1184461312.0,
-            "43": 1184461312.0,
-            "44": 1184461312.0,
-            "45": 1184461312.0,
-            "46": 1184461312.0,
-            "47": 1184461312.0,
-            "48": 1184461312.0,
-            "49": 1184461312.0,
-            "50": 1184461312.0,
-            "51": 1184461312.0,
-            "52": 1184461312.0,
-            "53": 1184461312.0,
-            "54": 1184461312.0,
-            "55": 1184461312.0,
-            "56": 1184461312.0,
-            "57": 1184461312.0,
-            "58": 1184461312.0,
-            "59": 1184461312.0,
-            "60": 1184461312.0,
-            "61": 1184461312.0,
-            "62": 1184461312.0,
-            "63": 1184461312.0,
-            "64": 1184461312.0,
-            "65": 1184461312.0,
-            "66": 1184461312.0,
-            "67": 1184461312.0,
-            "68": 1184461312.0,
-            "69": 1184461312.0,
-            "70": 1184461312.0,
-            "71": 1184461312.0,
-            "72": 1184461312.0,
-            "73": 1184461312.0,
-            "74": 1184461312.0,
-            "75": 1184461312.0,
-            "76": 1184461312.0,
-            "77": 1184461312.0,
-            "78": 1184461312.0,
-            "79": 1184461312.0,
-            "80": 1184461312.0,
-            "81": 1184461312.0,
-            "82": 1184461312.0,
-            "83": 1184461312.0,
-            "84": 1184461312.0,
-            "85": 1184461312.0,
-            "86": 1184461312.0,
-            "87": 1184461312.0,
-            "88": 1184461312.0,
-            "89": 1184461312.0,
-            "90": 1184461312.0,
-            "91": 1184461312.0,
-            "92": 1184461312.0,
-            "93": 1184461312.0,
-            "94": 1184461312.0,
-            "95": 1184461312.0,
-            "96": 1184461312.0,
-            "97": 1184461312.0,
-            "98": 1184461312.0,
-            "99": 1184461312.0,
-            "100": 1184461312.0
+            "1": 957495296.0,
+            "2": 1172456960.0,
+            "3": 1178258432.0,
+            "4": 1178258432.0,
+            "5": 1178258432.0,
+            "6": 1178258432.0,
+            "7": 1178258432.0,
+            "8": 1178258432.0,
+            "9": 1178258432.0,
+            "10": 1178258432.0,
+            "11": 1178258432.0,
+            "12": 1178258432.0,
+            "13": 1178258432.0,
+            "14": 1178258432.0,
+            "15": 1178258432.0,
+            "16": 1178258432.0,
+            "17": 1178258432.0,
+            "18": 1178258432.0,
+            "19": 1178258432.0,
+            "20": 1178258432.0,
+            "21": 1178258432.0,
+            "22": 1178258432.0,
+            "23": 1178258432.0,
+            "24": 1178258432.0,
+            "25": 1178258432.0,
+            "26": 1178258432.0,
+            "27": 1178258432.0,
+            "28": 1178258432.0,
+            "29": 1178258432.0,
+            "30": 1178258432.0,
+            "31": 1178258432.0,
+            "32": 1178258432.0,
+            "33": 1178258432.0,
+            "34": 1178258432.0,
+            "35": 1178258432.0,
+            "36": 1178258432.0,
+            "37": 1178258432.0,
+            "38": 1178258432.0,
+            "39": 1178258432.0,
+            "40": 1178258432.0,
+            "41": 1178258432.0,
+            "42": 1178258432.0,
+            "43": 1178258432.0,
+            "44": 1178258432.0,
+            "45": 1178258432.0,
+            "46": 1178258432.0,
+            "47": 1178258432.0,
+            "48": 1178258432.0,
+            "49": 1178258432.0,
+            "50": 1178258432.0,
+            "51": 1178258432.0,
+            "52": 1178258432.0,
+            "53": 1178258432.0,
+            "54": 1178258432.0,
+            "55": 1178258432.0,
+            "56": 1178258432.0,
+            "57": 1178258432.0,
+            "58": 1178258432.0,
+            "59": 1178258432.0,
+            "60": 1178258432.0,
+            "61": 1178258432.0,
+            "62": 1178258432.0,
+            "63": 1178258432.0,
+            "64": 1178258432.0,
+            "65": 1178258432.0,
+            "66": 1178258432.0,
+            "67": 1178258432.0,
+            "68": 1178258432.0,
+            "69": 1178258432.0,
+            "70": 1178258432.0,
+            "71": 1178258432.0,
+            "72": 1178258432.0,
+            "73": 1178258432.0,
+            "74": 1178258432.0,
+            "75": 1178258432.0,
+            "76": 1178258432.0,
+            "77": 1178258432.0,
+            "78": 1178258432.0,
+            "79": 1178258432.0,
+            "80": 1178258432.0,
+            "81": 1178258432.0,
+            "82": 1178258432.0,
+            "83": 1178258432.0,
+            "84": 1178258432.0,
+            "85": 1178258432.0,
+            "86": 1178258432.0,
+            "87": 1178258432.0,
+            "88": 1178258432.0,
+            "89": 1178258432.0,
+            "90": 1178258432.0,
+            "91": 1178258432.0,
+            "92": 1178258432.0,
+            "93": 1178258432.0,
+            "94": 1178258432.0,
+            "95": 1178258432.0,
+            "96": 1178258432.0,
+            "97": 1178258432.0,
+            "98": 1178258432.0,
+            "99": 1178258432.0,
+            "100": 1178258432.0
         }
     },
     "mtp_1 loss": {
@@ -434,104 +434,104 @@
         "values": {
             "1": 10.91546,
             "2": 10.92323,
-            "3": 10.93384,
-            "4": 10.92739,
-            "5": 10.90724,
-            "6": 10.91817,
-            "7": 10.92486,
-            "8": 10.92528,
-            "9": 10.93457,
-            "10": 10.9265,
-            "11": 10.91896,
-            "12": 10.91863,
-            "13": 10.92808,
-            "14": 10.91206,
-            "15": 10.91984,
-            "16": 10.92451,
-            "17": 10.92227,
-            "18": 10.90737,
-            "19": 10.91483,
-            "20": 10.90522,
-            "21": 10.9114,
-            "22": 10.89772,
-            "23": 10.90537,
-            "24": 10.89029,
-            "25": 10.89787,
-            "26": 10.88468,
+            "3": 10.93397,
+            "4": 10.92751,
+            "5": 10.90737,
+            "6": 10.91848,
+            "7": 10.92458,
+            "8": 10.92554,
+            "9": 10.93463,
+            "10": 10.92668,
+            "11": 10.91878,
+            "12": 10.9187,
+            "13": 10.92838,
+            "14": 10.91232,
+            "15": 10.92007,
+            "16": 10.92421,
+            "17": 10.92201,
+            "18": 10.90719,
+            "19": 10.91465,
+            "20": 10.90473,
+            "21": 10.91184,
+            "22": 10.89773,
+            "23": 10.90467,
+            "24": 10.88981,
+            "25": 10.89799,
+            "26": 10.88458,
             "27": 10.89842,
-            "28": 10.8909,
-            "29": 10.87535,
-            "30": 10.88065,
-            "31": 10.87294,
-            "32": 10.87864,
-            "33": 10.86983,
-            "34": 10.86798,
-            "35": 10.85949,
-            "36": 10.8618,
-            "37": 10.85516,
-            "38": 10.85688,
-            "39": 10.84912,
-            "40": 10.86276,
-            "41": 10.85336,
-            "42": 10.84776,
-            "43": 10.8455,
-            "44": 10.83817,
-            "45": 10.84937,
-            "46": 10.83807,
-            "47": 10.83805,
-            "48": 10.83108,
-            "49": 10.82947,
-            "50": 10.82233,
-            "51": 10.82166,
-            "52": 10.82114,
-            "53": 10.8067,
-            "54": 10.8107,
-            "55": 10.79431,
-            "56": 10.79976,
-            "57": 10.78946,
-            "58": 10.79833,
-            "59": 10.78093,
-            "60": 10.77476,
-            "61": 10.77647,
-            "62": 10.76099,
-            "63": 10.78365,
-            "64": 10.75478,
-            "65": 10.75021,
-            "66": 10.75701,
-            "67": 10.73475,
-            "68": 10.72894,
-            "69": 10.72604,
-            "70": 10.72547,
-            "71": 10.72458,
-            "72": 10.7195,
-            "73": 10.71167,
-            "74": 10.704,
-            "75": 10.68533,
-            "76": 10.69498,
-            "77": 10.69053,
+            "28": 10.89072,
+            "29": 10.87529,
+            "30": 10.88012,
+            "31": 10.8727,
+            "32": 10.87838,
+            "33": 10.86974,
+            "34": 10.86841,
+            "35": 10.85917,
+            "36": 10.86175,
+            "37": 10.85541,
+            "38": 10.85717,
+            "39": 10.849,
+            "40": 10.86294,
+            "41": 10.85311,
+            "42": 10.84765,
+            "43": 10.84575,
+            "44": 10.83781,
+            "45": 10.84929,
+            "46": 10.83794,
+            "47": 10.83823,
+            "48": 10.83113,
+            "49": 10.82968,
+            "50": 10.82226,
+            "51": 10.82165,
+            "52": 10.82088,
+            "53": 10.8066,
+            "54": 10.81086,
+            "55": 10.79429,
+            "56": 10.79986,
+            "57": 10.78953,
+            "58": 10.7985,
+            "59": 10.78091,
+            "60": 10.77519,
+            "61": 10.77652,
+            "62": 10.76107,
+            "63": 10.78407,
+            "64": 10.75476,
+            "65": 10.75019,
+            "66": 10.75694,
+            "67": 10.73504,
+            "68": 10.72867,
+            "69": 10.72583,
+            "70": 10.72571,
+            "71": 10.72463,
+            "72": 10.71967,
+            "73": 10.71173,
+            "74": 10.70397,
+            "75": 10.68536,
+            "76": 10.6951,
+            "77": 10.69081,
             "78": 10.68213,
-            "79": 10.6697,
-            "80": 10.67692,
-            "81": 10.66916,
-            "82": 10.65024,
-            "83": 10.62678,
-            "84": 10.61021,
-            "85": 10.6026,
-            "86": 10.64309,
-            "87": 10.63639,
-            "88": 10.63081,
-            "89": 10.59534,
-            "90": 10.58433,
-            "91": 10.60787,
-            "92": 10.58304,
-            "93": 10.56199,
-            "94": 10.59372,
-            "95": 10.57621,
-            "96": 10.57236,
-            "97": 10.55407,
-            "98": 10.5595,
-            "99": 10.55809,
-            "100": 10.5283
+            "79": 10.66966,
+            "80": 10.677,
+            "81": 10.66877,
+            "82": 10.6502,
+            "83": 10.62666,
+            "84": 10.61061,
+            "85": 10.60269,
+            "86": 10.64312,
+            "87": 10.63649,
+            "88": 10.63061,
+            "89": 10.59523,
+            "90": 10.58422,
+            "91": 10.60753,
+            "92": 10.58273,
+            "93": 10.56197,
+            "94": 10.59313,
+            "95": 10.57585,
+            "96": 10.57208,
+            "97": 10.55381,
+            "98": 10.55891,
+            "99": 10.55782,
+            "100": 10.52811
         }
     },
     "iteration-time": {
@@ -540,105 +540,105 @@
         "step_interval": 1,
         "values": {
             "1": "nan",
-            "2": 27.18208,
-            "3": 1.39529,
-            "4": 3.6491,
-            "5": 0.67179,
-            "6": 0.67671,
-            "7": 0.66994,
-            "8": 0.66973,
-            "9": 0.67108,
-            "10": 0.67559,
-            "11": 0.67217,
-            "12": 0.67331,
-            "13": 0.66954,
-            "14": 0.67002,
-            "15": 0.67239,
-            "16": 0.76215,
-            "17": 0.69839,
-            "18": 0.68015,
-            "19": 0.69381,
-            "20": 0.68775,
-            "21": 0.69137,
-            "22": 0.68806,
-            "23": 0.69976,
-            "24": 0.77931,
-            "25": 0.76553,
-            "26": 0.68909,
-            "27": 0.68374,
-            "28": 0.68045,
-            "29": 0.6771,
-            "30": 0.67224,
-            "31": 0.67362,
-            "32": 0.67682,
-            "33": 0.672,
-            "34": 0.67674,
-            "35": 0.67276,
-            "36": 0.67257,
-            "37": 0.67332,
-            "38": 0.68112,
-            "39": 0.67781,
-            "40": 0.67515,
-            "41": 0.67587,
-            "42": 0.6733,
-            "43": 0.67545,
-            "44": 0.67335,
-            "45": 0.68357,
-            "46": 0.68261,
-            "47": 0.68097,
-            "48": 0.68067,
-            "49": 0.68428,
-            "50": 0.68183,
-            "51": 0.69468,
-            "52": 0.68108,
-            "53": 0.683,
-            "54": 0.68569,
-            "55": 0.68183,
-            "56": 0.68275,
-            "57": 0.6821,
-            "58": 0.68182,
-            "59": 0.68538,
-            "60": 0.68324,
-            "61": 0.68519,
-            "62": 0.68243,
-            "63": 0.68308,
-            "64": 0.69526,
-            "65": 0.68084,
-            "66": 0.68955,
-            "67": 0.68442,
-            "68": 0.68126,
-            "69": 0.68341,
-            "70": 0.68587,
-            "71": 0.68166,
-            "72": 0.68346,
-            "73": 0.68477,
-            "74": 0.6857,
-            "75": 0.68228,
-            "76": 0.68263,
-            "77": 0.67013,
-            "78": 0.66937,
-            "79": 0.66958,
-            "80": 0.66944,
-            "81": 0.67111,
-            "82": 0.67321,
-            "83": 0.66983,
-            "84": 0.67414,
-            "85": 0.67114,
-            "86": 0.67054,
-            "87": 0.66936,
-            "88": 0.66939,
-            "89": 0.66786,
-            "90": 0.66981,
-            "91": 0.66651,
-            "92": 0.67627,
-            "93": 0.68747,
-            "94": 0.67136,
-            "95": 0.67193,
-            "96": 0.67111,
-            "97": 0.66996,
-            "98": 0.68055,
-            "99": 0.6806,
-            "100": 0.67843
+            "2": 27.91418,
+            "3": 1.8444,
+            "4": 4.36938,
+            "5": 0.9997,
+            "6": 1.00434,
+            "7": 0.99907,
+            "8": 1.00275,
+            "9": 0.99461,
+            "10": 0.99275,
+            "11": 0.97843,
+            "12": 0.98765,
+            "13": 0.9903,
+            "14": 1.00077,
+            "15": 1.0,
+            "16": 0.98823,
+            "17": 0.98199,
+            "18": 0.9877,
+            "19": 0.98886,
+            "20": 0.99983,
+            "21": 0.98962,
+            "22": 0.99635,
+            "23": 0.96454,
+            "24": 0.93898,
+            "25": 0.96491,
+            "26": 0.98141,
+            "27": 0.95293,
+            "28": 0.95301,
+            "29": 0.94879,
+            "30": 0.98802,
+            "31": 0.98495,
+            "32": 0.99868,
+            "33": 0.98867,
+            "34": 0.99852,
+            "35": 1.00176,
+            "36": 0.99104,
+            "37": 0.99448,
+            "38": 0.99426,
+            "39": 0.9992,
+            "40": 0.99262,
+            "41": 0.99458,
+            "42": 0.99928,
+            "43": 0.99527,
+            "44": 0.99574,
+            "45": 0.99947,
+            "46": 1.00027,
+            "47": 0.99784,
+            "48": 0.99625,
+            "49": 1.0035,
+            "50": 1.01101,
+            "51": 1.07145,
+            "52": 0.99811,
+            "53": 1.0032,
+            "54": 0.9989,
+            "55": 0.99747,
+            "56": 0.99838,
+            "57": 1.01334,
+            "58": 0.99915,
+            "59": 0.98789,
+            "60": 0.98956,
+            "61": 0.99815,
+            "62": 0.99777,
+            "63": 0.99925,
+            "64": 1.01132,
+            "65": 0.99811,
+            "66": 0.99272,
+            "67": 0.99779,
+            "68": 1.00819,
+            "69": 1.00743,
+            "70": 1.0067,
+            "71": 0.99506,
+            "72": 1.00152,
+            "73": 0.99805,
+            "74": 1.00601,
+            "75": 1.00578,
+            "76": 1.0047,
+            "77": 1.00041,
+            "78": 1.01124,
+            "79": 1.00206,
+            "80": 1.00293,
+            "81": 1.00212,
+            "82": 1.00771,
+            "83": 0.99432,
+            "84": 0.99804,
+            "85": 1.03671,
+            "86": 0.99424,
+            "87": 0.99658,
+            "88": 0.99251,
+            "89": 0.99387,
+            "90": 0.99986,
+            "91": 0.99704,
+            "92": 1.00218,
+            "93": 1.00685,
+            "94": 1.00914,
+            "95": 0.99614,
+            "96": 0.99789,
+            "97": 1.00812,
+            "98": 1.00062,
+            "99": 0.99776,
+            "100": 0.99241
         }
     }
 }
\ No newline at end of file
diff --git a/uv.lock b/uv.lock
index 129f94b4288..a7399c6c16a 100644
--- a/uv.lock
+++ b/uv.lock
@@ -91,6 +91,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/b7/58/3bf0b7d474607dc7fd67dd1365c4e0f392c8177eaf4054e5ddee3ebd53b5/aiobotocore-2.26.0-py3-none-any.whl", hash = "sha256:a793db51c07930513b74ea7a95bd79aaa42f545bdb0f011779646eafa216abec", size = 87333, upload-time = "2025-11-28T07:54:58.457Z" },
 ]
 
+[[package]]
+name = "aiofiles"
+version = "25.1.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/41/c3/534eac40372d8ee36ef40df62ec129bee4fdb5ad9706e58a29be53b2c970/aiofiles-25.1.0.tar.gz", hash = "sha256:a8d728f0a29de45dc521f18f07297428d56992a742f0cd2701ba86e44d23d5b2", size = 46354, upload-time = "2025-10-09T20:51:04.358Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/bc/8a/340a1555ae33d7354dbca4faa54948d76d89a27ceef032c8c3bc661d003e/aiofiles-25.1.0-py3-none-any.whl", hash = "sha256:abe311e527c862958650f9438e859c1fa7568a141b22abcd015e120e86a85695", size = 14668, upload-time = "2025-10-09T20:51:03.174Z" },
+]
+
 [[package]]
 name = "aiohappyeyeballs"
 version = "2.6.1"
@@ -107,7 +116,7 @@ source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "aiohappyeyeballs" },
     { name = "aiosignal" },
-    { name = "async-timeout", marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "async-timeout", marker = "python_full_version < '3.11'" },
     { name = "attrs" },
     { name = "frozenlist" },
     { name = "multidict" },
@@ -247,7 +256,7 @@ version = "1.4.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "frozenlist" },
-    { name = "typing-extensions", marker = "python_full_version < '3.13' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/61/62/06741b579156360248d1ec624842ad0edf697050bbaf7c3e46394e106ad1/aiosignal-1.4.0.tar.gz", hash = "sha256:f47eecd9468083c2029cc99945502cb7708b082c232f9aca65da147157b251c7", size = 25007, upload-time = "2025-07-03T22:54:43.528Z" }
 wheels = [
@@ -301,10 +310,10 @@ name = "anyio"
 version = "4.9.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "exceptiongroup", marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "exceptiongroup", marker = "python_full_version < '3.11'" },
     { name = "idna" },
     { name = "sniffio" },
-    { name = "typing-extensions", marker = "python_full_version < '3.13' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/95/7d/4c1bd541d4dffa1b52bd83fb8527089e097a106fc90b467a7313b105f840/anyio-4.9.0.tar.gz", hash = "sha256:673c0c244e15788651a4ff38710fea9675823028a6f08a5eda409e0c9840a028", size = 190949, upload-time = "2025-03-17T00:02:54.77Z" }
 wheels = [
@@ -346,18 +355,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/ed/e0/ed152425e51b7c8a4ce81d33683b43d87e770a76a65922dc7524a0106ae8/apache_tvm_ffi-0.1.8.post2-cp314-cp314t-win_amd64.whl", hash = "sha256:ecb0d9f7f410ba3b4d92547c2477f73f8406455448f4ea8c146515671fd20210", size = 1849938, upload-time = "2026-01-13T18:11:06.312Z" },
 ]
 
-[[package]]
-name = "asgiref"
-version = "3.11.1"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "typing-extensions", marker = "python_full_version < '3.11'" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/63/40/f03da1264ae8f7cfdbf9146542e5e7e8100a4c66ab48e791df9a03d3f6c0/asgiref-3.11.1.tar.gz", hash = "sha256:5f184dc43b7e763efe848065441eac62229c9f7b0475f41f80e207a114eda4ce", size = 38550, upload-time = "2026-02-03T13:30:14.33Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/5c/0a/a72d10ed65068e115044937873362e6e32fab1b7dce0046aeb224682c989/asgiref-3.11.1-py3-none-any.whl", hash = "sha256:e8667a091e69529631969fd45dc268fa79b99c92c5fcdda727757e52146ec133", size = 24345, upload-time = "2026-02-03T13:30:13.039Z" },
-]
-
 [[package]]
 name = "astroid"
 version = "3.2.4"
@@ -749,7 +746,7 @@ name = "cffi"
 version = "2.0.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "pycparser", marker = "implementation_name != 'PyPy' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "pycparser", marker = "implementation_name != 'PyPy'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/eb/56/b1ba7935a17738ae8453301356628e8147c79dbb825bcbc73dc7401f9846/cffi-2.0.0.tar.gz", hash = "sha256:44d1b5909021139fe36001ae048dbdde8214afa20200eda0f64c068cac5d5529", size = 523588, upload-time = "2025-09-08T23:24:04.541Z" }
 wheels = [
@@ -920,7 +917,7 @@ name = "click"
 version = "8.3.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "colorama", marker = "sys_platform == 'win32' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "colorama", marker = "sys_platform == 'win32'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/3d/fa/656b739db8587d7b5dfa22e22ed02566950fbfbcdc20311993483657a5c0/click-8.3.1.tar.gz", hash = "sha256:12ff4785d337a1bb490bb7e9c2b1ee5da3112e94a8622f26a6c77f5d2fc6842a", size = 295065, upload-time = "2025-11-15T20:45:42.706Z" }
 wheels = [
@@ -1413,8 +1410,8 @@ name = "emerging-optimizers"
 version = "0.2.0"
 source = { git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git?rev=v0.2.0#1effa026ff096b7fa1063ca2fba19d98be6e6cdf" }
 dependencies = [
-    { name = "absl-py", marker = "python_full_version >= '3.12' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "torch", marker = "(python_full_version >= '3.12' and sys_platform == 'never') or (python_full_version < '3.12' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'never' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "absl-py", marker = "python_full_version >= '3.12'" },
+    { name = "torch", marker = "python_full_version >= '3.12' and sys_platform == 'never'" },
 ]
 
 [[package]]
@@ -1422,7 +1419,7 @@ name = "exceptiongroup"
 version = "1.3.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "typing-extensions", marker = "python_full_version < '3.13' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/50/79/66800aadf48771f6b62f7eb014e352e5d06856655206165d775e675a02c9/exceptiongroup-1.3.1.tar.gz", hash = "sha256:8b412432c6055b0b7d14c310000ae93352ed6754f70fa8f7c34141f91c4e3219", size = 30371, upload-time = "2025-11-21T23:01:54.787Z" }
 wheels = [
@@ -1585,11 +1582,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/7f/9c/34f6962f9b9e9c71f6e5ed806e0d0ff03c9d1b0b2340088a0cf4bce09b18/flask-3.1.3-py3-none-any.whl", hash = "sha256:f4bcbefc124291925f1a26446da31a5178f9483862233b23c0c96a20701f670c", size = 103424, upload-time = "2026-02-19T05:00:56.027Z" },
 ]
 
-[package.optional-dependencies]
-async = [
-    { name = "asgiref" },
-]
-
 [[package]]
 name = "flask-restful"
 version = "0.3.10"
@@ -2018,6 +2010,19 @@ http2 = [
     { name = "h2" },
 ]
 
+[[package]]
+name = "httpx-aiohttp"
+version = "0.1.12"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "aiohttp" },
+    { name = "httpx" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/63/2c/b894861cecf030fb45675ea24aa55b5722e97c602a163d872fca66c5a6d8/httpx_aiohttp-0.1.12.tar.gz", hash = "sha256:81feec51fd82c0ecfa0e9aaf1b1a6c2591260d5e2bcbeb7eb0277a78e610df2c", size = 275945, upload-time = "2025-12-12T10:12:15.283Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/16/8d/85c9701e9af72ca132a1783e2a54364a90c6da832304416a30fc11196ab2/httpx_aiohttp-0.1.12-py3-none-any.whl", hash = "sha256:5b0eac39a7f360fa7867a60bcb46bb1024eada9c01cbfecdb54dc1edb3fb7141", size = 6367, upload-time = "2025-12-12T10:12:14.018Z" },
+]
+
 [[package]]
 name = "huggingface-hub"
 version = "0.36.2"
@@ -2026,7 +2031,7 @@ dependencies = [
     { name = "filelock" },
     { name = "fsspec", version = "2025.10.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.14' or sys_platform != 'linux' or extra == 'extra-13-megatron-core-dev' or extra == 'extra-13-megatron-core-lts'" },
     { name = "fsspec", version = "2026.2.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.14' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "hf-xet", marker = "platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "hf-xet", marker = "platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'" },
     { name = "packaging" },
     { name = "pyyaml" },
     { name = "requests" },
@@ -2461,7 +2466,7 @@ resolution-markers = [
     "python_full_version < '3.11' and sys_platform != 'linux'",
 ]
 dependencies = [
-    { name = "mdurl", marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "mdurl", marker = "python_full_version < '3.11'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/38/71/3b932df36c1a044d397a1f92d1cf91ee0a503d91e470cbd670aa66b07ed0/markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb", size = 74596, upload-time = "2023-06-03T06:41:14.443Z" }
 wheels = [
@@ -2491,7 +2496,7 @@ resolution-markers = [
     "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
 ]
 dependencies = [
-    { name = "mdurl", marker = "python_full_version >= '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "mdurl", marker = "python_full_version >= '3.11'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/5b/f5/4ec618ed16cc4f8fb3b701563655a69816155e79e24a17b651541804721d/markdown_it_py-4.0.0.tar.gz", hash = "sha256:cb0a2b4aa34f932c007117b194e945bd74e0ec24133ceb5bac59009cda1cb9f3", size = 73070, upload-time = "2025-08-11T12:57:52.854Z" }
 wheels = [
@@ -2634,7 +2639,6 @@ dev = [
     { name = "fastapi" },
     { name = "flash-linear-attention" },
     { name = "flashinfer-python" },
-    { name = "flask", extra = ["async"], marker = "extra == 'extra-13-megatron-core-dev'" },
     { name = "hypercorn" },
     { name = "mamba-ssm" },
     { name = "megatron-energon", extra = ["av-decode"], marker = "extra == 'extra-13-megatron-core-dev'" },
@@ -2644,8 +2648,9 @@ dev = [
     { name = "nvidia-resiliency-ext" },
     { name = "nvtx" },
     { name = "onnxscript" },
-    { name = "openai" },
+    { name = "openai", extra = ["aiohttp"], marker = "extra == 'extra-13-megatron-core-dev'" },
     { name = "opentelemetry-api", version = "1.33.1", source = { registry = "https://pypi.org/simple" } },
+    { name = "quart" },
     { name = "tensorstore", version = "0.1.78", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "tensorstore", version = "0.1.81", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "tqdm" },
@@ -2755,7 +2760,6 @@ requires-dist = [
     { name = "flash-linear-attention", marker = "extra == 'dev'", specifier = "~=0.4.0" },
     { name = "flashinfer-python", marker = "extra == 'dev'", specifier = "~=0.5.0" },
     { name = "flashinfer-python", marker = "extra == 'lts'", specifier = "~=0.5.0" },
-    { name = "flask", extras = ["async"], marker = "extra == 'dev'" },
     { name = "flask-restful", marker = "extra == 'mlm'" },
     { name = "hypercorn", marker = "extra == 'dev'" },
     { name = "mamba-ssm", marker = "extra == 'dev'", specifier = "~=2.2" },
@@ -2773,10 +2777,11 @@ requires-dist = [
     { name = "nvtx", marker = "extra == 'lts'", specifier = "~=0.2" },
     { name = "onnxscript", marker = "extra == 'dev'" },
     { name = "onnxscript", marker = "extra == 'lts'" },
-    { name = "openai", marker = "extra == 'dev'" },
+    { name = "openai", extras = ["aiohttp"], marker = "extra == 'dev'" },
     { name = "opentelemetry-api", marker = "extra == 'dev'", specifier = "~=1.33.1" },
     { name = "opentelemetry-api", marker = "extra == 'lts'", specifier = "~=1.33.1" },
     { name = "packaging", specifier = ">=24.2" },
+    { name = "quart", marker = "extra == 'dev'" },
     { name = "sentencepiece", marker = "extra == 'mlm'" },
     { name = "tensorstore", marker = "extra == 'dev'", specifier = "~=0.1,!=0.1.46,!=0.1.72" },
     { name = "tensorstore", marker = "extra == 'lts'", specifier = "~=0.1,!=0.1.46,!=0.1.72" },
@@ -3029,7 +3034,7 @@ name = "multidict"
 version = "6.7.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "typing-extensions", marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "typing-extensions", marker = "python_full_version < '3.11'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/1a/c2/c2d94cbe6ac1753f3fc980da97b3d930efe1da3af3c9f5125354436c073d/multidict-6.7.1.tar.gz", hash = "sha256:ec6652a1bee61c53a3e5776b6049172c53b6aaba34f18c9ad04f82712bac623d", size = 102010, upload-time = "2026-01-26T02:46:45.979Z" }
 wheels = [
@@ -3836,6 +3841,12 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/1d/5f/bcdf0fb510c24f021e485f920677da363cd59d6e0310171bf2cad6e052b5/openai-2.23.0-py3-none-any.whl", hash = "sha256:1041d40bebf845053fda1946104f8bf9c3e2df957a41c3878c55c72c352630e9", size = 1118971, upload-time = "2026-02-24T03:20:18.708Z" },
 ]
 
+[package.optional-dependencies]
+aiohttp = [
+    { name = "aiohttp" },
+    { name = "httpx-aiohttp" },
+]
+
 [[package]]
 name = "opencensus"
 version = "0.11.4"
@@ -4166,10 +4177,10 @@ resolution-markers = [
     "python_full_version < '3.11' and sys_platform != 'linux'",
 ]
 dependencies = [
-    { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "python-dateutil", marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "pytz", marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "tzdata", marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
+    { name = "python-dateutil", marker = "python_full_version < '3.11'" },
+    { name = "pytz", marker = "python_full_version < '3.11'" },
+    { name = "tzdata", marker = "python_full_version < '3.11'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/33/01/d40b85317f86cf08d853a4f495195c73815fdf205eef3993821720274518/pandas-2.3.3.tar.gz", hash = "sha256:e05e1af93b977f7eafa636d043f9f94c7ee3ac81af99c13508215942e64c993b", size = 4495223, upload-time = "2025-09-29T23:34:51.853Z" }
 wheels = [
@@ -4245,9 +4256,9 @@ resolution-markers = [
     "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
 ]
 dependencies = [
-    { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "python-dateutil", marker = "python_full_version >= '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "tzdata", marker = "(python_full_version >= '3.11' and sys_platform == 'emscripten') or (python_full_version >= '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'emscripten' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'win32' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
+    { name = "python-dateutil", marker = "python_full_version >= '3.11'" },
+    { name = "tzdata", marker = "(python_full_version >= '3.11' and sys_platform == 'emscripten') or (python_full_version >= '3.11' and sys_platform == 'win32')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/2e/0c/b28ed414f080ee0ad153f848586d61d1878f91689950f037f976ce15f6c8/pandas-3.0.1.tar.gz", hash = "sha256:4186a699674af418f655dbd420ed87f50d56b4cd6603784279d9eef6627823c8", size = 4641901, upload-time = "2026-02-17T22:20:16.434Z" }
 wheels = [
@@ -5294,6 +5305,26 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/c0/28/26534bed77109632a956977f60d8519049f545abc39215d086e33a61f1f2/pyyaml_ft-8.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:de04cfe9439565e32f178106c51dd6ca61afaa2907d143835d501d84703d3793", size = 171579, upload-time = "2025-06-10T15:32:14.34Z" },
 ]
 
+[[package]]
+name = "quart"
+version = "0.20.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "aiofiles" },
+    { name = "blinker" },
+    { name = "click" },
+    { name = "flask" },
+    { name = "hypercorn" },
+    { name = "itsdangerous" },
+    { name = "jinja2" },
+    { name = "markupsafe" },
+    { name = "werkzeug" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/1d/9d/12e1143a5bd2ccc05c293a6f5ae1df8fd94a8fc1440ecc6c344b2b30ce13/quart-0.20.0.tar.gz", hash = "sha256:08793c206ff832483586f5ae47018c7e40bdd75d886fee3fabbdaa70c2cf505d", size = 63874, upload-time = "2024-12-23T13:53:05.664Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/7e/e9/cc28f21f52913adf333f653b9e0a3bf9cb223f5083a26422968ba73edd8d/quart-0.20.0-py3-none-any.whl", hash = "sha256:003c08f551746710acb757de49d9b768986fd431517d0eb127380b656b98b8f1", size = 77960, upload-time = "2024-12-23T13:53:02.842Z" },
+]
+
 [[package]]
 name = "ray"
 version = "2.54.0"
@@ -5353,7 +5384,7 @@ source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "attrs" },
     { name = "rpds-py" },
-    { name = "typing-extensions", marker = "python_full_version < '3.13' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/22/f5/df4e9027acead3ecc63e50fe1e36aca1523e1719559c499951bb4b53188f/referencing-0.37.0.tar.gz", hash = "sha256:44aefc3142c5b842538163acb373e24cce6632bd54bdb01b21ad5863489f50d8", size = 78036, upload-time = "2025-10-13T15:30:48.871Z" }
 wheels = [
@@ -6323,7 +6354,7 @@ version = "0.52.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "anyio" },
-    { name = "typing-extensions", marker = "python_full_version < '3.13' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/c4/68/79977123bb7be889ad680d79a40f339082c1978b5cfcf62c2d8d196873ac/starlette-0.52.1.tar.gz", hash = "sha256:834edd1b0a23167694292e94f597773bc3f89f362be6effee198165a35d62933", size = 2653702, upload-time = "2026-01-18T13:34:11.062Z" }
 wheels = [
@@ -6344,7 +6375,7 @@ name = "sympy"
 version = "1.14.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "mpmath", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32') or (sys_platform == 'emscripten' and extra == 'extra-13-megatron-core-dev') or (sys_platform == 'emscripten' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'win32' and extra == 'extra-13-megatron-core-dev') or (sys_platform == 'win32' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev') or (sys_platform == 'linux' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "mpmath" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/83/d3/803453b36afefb7c2bb238361cd4ae6125a569b4db67cd9e79846ba2d68c/sympy-1.14.0.tar.gz", hash = "sha256:d3d3fe8df1e5a0b42f0e7bdf50541697dbe7d23746e894990c030e2b05e72517", size = 7793921, upload-time = "2025-04-27T18:05:01.611Z" }
 wheels = [
@@ -6662,15 +6693,15 @@ name = "torch"
 version = "2.10.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "filelock", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32') or (sys_platform == 'emscripten' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'win32' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "fsspec", version = "2025.10.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32') or (sys_platform == 'emscripten' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'win32' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "jinja2", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32') or (sys_platform == 'emscripten' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'win32' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "filelock", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32')" },
+    { name = "fsspec", version = "2025.10.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32')" },
+    { name = "jinja2", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32')" },
     { name = "networkx", version = "3.4.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and sys_platform != 'linux') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "networkx", version = "3.6.1", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'emscripten' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'win32' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "setuptools", marker = "(python_full_version >= '3.12' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32') or (python_full_version < '3.12' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'emscripten' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'win32' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "sympy", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32') or (sys_platform == 'emscripten' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'win32' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "triton", marker = "sys_platform == 'never' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "typing-extensions", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32') or (sys_platform == 'emscripten' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'win32' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "setuptools", marker = "python_full_version >= '3.12' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'" },
+    { name = "sympy", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32')" },
+    { name = "triton", marker = "sys_platform == 'never'" },
+    { name = "typing-extensions", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32')" },
 ]
 wheels = [
     { url = "https://files.pythonhosted.org/packages/5b/30/bfebdd8ec77db9a79775121789992d6b3b75ee5494971294d7b4b7c999bc/torch-2.10.0-2-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:2b980edd8d7c0a68c4e951ee1856334a43193f98730d97408fbd148c1a933313", size = 79411457, upload-time = "2026-02-10T21:44:59.189Z" },
@@ -6740,7 +6771,7 @@ name = "tqdm"
 version = "4.67.3"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "colorama", marker = "sys_platform == 'win32' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "colorama", marker = "sys_platform == 'win32'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/09/a9/6ba95a270c6f1fbcd8dac228323f2777d886cb206987444e4bce66338dd4/tqdm-4.67.3.tar.gz", hash = "sha256:7d825f03f89244ef73f1d4ce193cb1774a8179fd96f31d7e1dcde62092b960bb", size = 169598, upload-time = "2026-02-03T17:35:53.048Z" }
 wheels = [

From 8d1fd3c26be533a466b62392c379af1c4f5e0f01 Mon Sep 17 00:00:00 2001
From: Zhongbo Zhu <42691305+zhongbozhu@users.noreply.github.com>
Date: Fri, 3 Apr 2026 12:07:49 -0700
Subject: [PATCH 329/334] [Dev] Skip routed expert padding for graph-safe MoE
 (#4071)

Signed-off-by: Zhongbo Zhu <zhongboz@nvidia.com>
---
 megatron/core/transformer/moe/experts.py   |  7 +++----
 megatron/core/transformer/moe/moe_utils.py | 16 ++++++++++++++++
 2 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/megatron/core/transformer/moe/experts.py b/megatron/core/transformer/moe/experts.py
index 24ee09efa32..c19328d9173 100644
--- a/megatron/core/transformer/moe/experts.py
+++ b/megatron/core/transformer/moe/experts.py
@@ -41,6 +41,7 @@
 from megatron.core.transformer.moe.moe_utils import (
     ProcessGroupCollection,
     get_align_size_for_quantization,
+    skip_routed_expert_padding,
 )
 from megatron.core.transformer.transformer_config import TransformerConfig
 from megatron.core.transformer.utils import (
@@ -915,8 +916,7 @@ def _fused_forward(
 
         # Apply padding if needed
         unpadded_tokens_per_expert = None
-        if self.config.moe_router_padding_for_quantization:
-            # Padding has already been applied in router
+        if skip_routed_expert_padding(self.config):
             pass
         elif self.config.fp8 or self.config.fp4:
             tokens_per_expert = tokens_per_expert.tolist()
@@ -1033,8 +1033,7 @@ def forward(
         unpadded_tokens_per_expert = None
         tokens_per_expert: list[int] = tokens_per_expert.tolist()
         permuted_probs = permuted_probs.unsqueeze(-1)
-        if self.config.moe_router_padding_for_quantization:
-            # Padding has already been applied in router
+        if skip_routed_expert_padding(self.config):
             pass
         elif self.config.fp8 or self.config.fp4:
             unpadded_tokens_per_expert = tokens_per_expert
diff --git a/megatron/core/transformer/moe/moe_utils.py b/megatron/core/transformer/moe/moe_utils.py
index ccd503ae331..babba1d904d 100644
--- a/megatron/core/transformer/moe/moe_utils.py
+++ b/megatron/core/transformer/moe/moe_utils.py
@@ -1321,6 +1321,22 @@ def get_align_size_for_quantization(config: TransformerConfig) -> int:
     return 16
 
 
+def skip_routed_expert_padding(config: TransformerConfig) -> bool:
+    """Whether the expert module should skip quantization padding.
+
+    Returns True when padding is already applied by the router or the
+    HybridEP dispatcher.
+    """
+    if config.moe_router_padding_for_quantization:
+        return True
+    if (
+        config.moe_token_dispatcher_type == "flex"
+        and config.moe_flex_dispatcher_backend == "hybridep"
+    ):
+        return True
+    return False
+
+
 # TODO(Hepteract): delete the usage of the global parallel_state.
 # Initialize process groups with the global parallel_state.
 def get_default_pg_collection() -> ProcessGroupCollection:

From 74751c970fae60a1b442dd492f58f539331bf0d9 Mon Sep 17 00:00:00 2001
From: Hao Wu <skyw@users.noreply.github.com>
Date: Mon, 6 Apr 2026 18:28:44 -0700
Subject: [PATCH 330/334] [DEV] Minor update optimizer (#4082)

Signed-off-by: Hao Wu <skyw@nvidia.com>
---
 megatron/core/safe_globals.py  | 1 +
 megatron/training/arguments.py | 4 ----
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/megatron/core/safe_globals.py b/megatron/core/safe_globals.py
index 8bcfe788f60..f9f9171cd21 100755
--- a/megatron/core/safe_globals.py
+++ b/megatron/core/safe_globals.py
@@ -33,6 +33,7 @@
     RerunState,
     BytesIO,
     Signals,
+    torch._C.Generator,  # Needed for torch format ckpt loading after weights_only default change
 ]
 
 
diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py
index 7d6e860f659..b3b10d7e8bb 100644
--- a/megatron/training/arguments.py
+++ b/megatron/training/arguments.py
@@ -1483,10 +1483,6 @@ def validate_args(args, defaults={}):
             args.use_layer_wise_distributed_optimizer = True
             args.use_distributed_optimizer = False
 
-        if args.optimizer == 'muon':
-            assert not args.overlap_grad_reduce, "Muon optimizer does not support overlap grad reduce. Use dist_muon instead."
-            assert not args.overlap_param_gather, "Muon optimizer does not support overlap param gather. Use dist_muon instead."
-
         assert not args.use_distributed_optimizer, "Muon optimizer does not support distributed optimizer for now."
         assert not args.use_torch_fsdp2, "Muon optimizer does not support Torch-FSDP2 for now."
         assert not args.use_megatron_fsdp, "Muon optimizer does not support Megatron-FSDP for now."

From ab6c0ffd87965b47146d6967f75322b6c53aca07 Mon Sep 17 00:00:00 2001
From: Kirthi Shankar Sivamani <ksivamani@nvidia.com>
Date: Mon, 6 Apr 2026 21:29:08 -0400
Subject: [PATCH 331/334] TE fused grouped mlp with grouped bias and delayed
 wgrad (#4095)

Signed-off-by: Kirthi Shankar Sivamani <ksivamani@nvidia.com>
Signed-off-by: Xin Yao <xiny@nvidia.com>
Co-authored-by: Xin Yao <xiny@nvidia.com>
---
 .../core/extensions/transformer_engine.py     |  8 +--
 megatron/core/transformer/moe/experts.py      | 49 +++++++++++++------
 2 files changed, 38 insertions(+), 19 deletions(-)

diff --git a/megatron/core/extensions/transformer_engine.py b/megatron/core/extensions/transformer_engine.py
index 26ad5539b19..97137f81465 100644
--- a/megatron/core/extensions/transformer_engine.py
+++ b/megatron/core/extensions/transformer_engine.py
@@ -1783,7 +1783,7 @@ def normalize_grouped_parameter_keys(
             ):
                 """Make grouped checkpoint keys compatible across parameter layouts."""
 
-                def maybe_remap_param(param_name: str) -> None:
+                def maybe_remap_param(param_name: str, single_grouped: bool) -> None:
                     grouped_key = f"{prefix}{param_name}"
                     indexed_keys = [
                         f"{prefix}{param_name}{gemm_idx}" for gemm_idx in range(self.num_gemms)
@@ -1792,7 +1792,7 @@ def maybe_remap_param(param_name: str) -> None:
                     has_any_indexed_key = any(key in state_dict for key in indexed_keys)
                     has_all_indexed_keys = all(key in state_dict for key in indexed_keys)
 
-                    if getattr(self, "single_grouped_parameter", False):
+                    if single_grouped:
                         if has_grouped_key or not has_all_indexed_keys:
                             return
                         state_dict[grouped_key] = torch.stack(
@@ -1807,9 +1807,9 @@ def maybe_remap_param(param_name: str) -> None:
                         for gemm_idx, tensor in enumerate(split_tensors):
                             state_dict[f"{prefix}{param_name}{gemm_idx}"] = tensor
 
-                maybe_remap_param("weight")
+                maybe_remap_param("weight", getattr(self, "single_grouped_weight", False))
                 if self.use_bias:
-                    maybe_remap_param("bias")
+                    maybe_remap_param("bias", getattr(self, "single_grouped_bias", False))
 
             self._register_load_state_dict_pre_hook(
                 normalize_grouped_parameter_keys, with_module=True
diff --git a/megatron/core/transformer/moe/experts.py b/megatron/core/transformer/moe/experts.py
index c19328d9173..976c9df3cd6 100644
--- a/megatron/core/transformer/moe/experts.py
+++ b/megatron/core/transformer/moe/experts.py
@@ -49,6 +49,7 @@
     sharded_state_dict_default,
 )
 from megatron.core.typed_torch import apply_module, not_none
+from megatron.core.utils import is_te_min_version
 
 if HAVE_TE:
     from megatron.core.extensions.transformer_engine import Fp8Padding, Fp8Unpadding
@@ -772,6 +773,9 @@ def _is_fused_impl_supported(self) -> bool:
         except ImportError:
             return False  # Transformer Engine version is too old
 
+        if not is_te_min_version("2.14.0"):
+            return False
+
         # Check for unsupported features
         if self.tp_group.size() > 1:
             return False  # Tensor parallelism is not supported
@@ -785,8 +789,6 @@ def _is_fused_impl_supported(self) -> bool:
             return False
         if not isinstance(self.linear_fc2, te.pytorch.GroupedLinear):
             return False
-        if self.linear_fc1.need_backward_dw() or self.linear_fc2.need_backward_dw():
-            return False  # Delayed weight gradient compuation is not supported
 
         # Check activation
         if self.activation_func != F.silu or not self.config.gated_linear_unit:
@@ -801,18 +803,20 @@ def _make_fused_ops(self) -> torch.nn.Module:
         ops = te.pytorch.ops.Sequential()
 
         # Check if there are 1 or "num_gemms" params in the GroupedLinear module.
-        fc1_single_grouped_parameter = self.linear_fc1.single_grouped_parameter
+        fc1_single_grouped_weight = self.linear_fc1.single_grouped_weight
         fc1_weight_dtype = (
             self.linear_fc1.weight.dtype
-            if fc1_single_grouped_parameter
+            if fc1_single_grouped_weight
             else self.linear_fc1.weight0.dtype
         )
-        fc2_single_grouped_parameter = self.linear_fc2.single_grouped_parameter
+        fc2_single_grouped_weight = self.linear_fc2.single_grouped_weight
         fc2_weight_dtype = (
             self.linear_fc2.weight.dtype
-            if fc2_single_grouped_parameter
+            if fc2_single_grouped_weight
             else self.linear_fc2.weight0.dtype
         )
+        fc1_single_grouped_bias = self.linear_fc1.single_grouped_bias
+        fc2_single_grouped_bias = self.linear_fc2.single_grouped_bias
 
         # TODO:ksivamani: Why meta device?
         op = te.pytorch.ops.GroupedLinear(
@@ -823,18 +827,22 @@ def _make_fused_ops(self) -> torch.nn.Module:
             device=torch.cuda.current_device(),
             dtype=fc1_weight_dtype,
             accumulate_into_main_grad=self.linear_fc1.fuse_wgrad_accumulation,
-            single_grouped_parameter=fc1_single_grouped_parameter,
+            single_grouped_weight=fc1_single_grouped_weight,
+            single_grouped_bias=fc1_single_grouped_bias,
+            delay_wgrad_compute=self.config.delay_wgrad_compute,
         )
 
         # Copy the weights from GroupedLinear module to GroupedLinear op.
-        if fc1_single_grouped_parameter:
+        if fc1_single_grouped_weight:
             setattr(op, "weight", getattr(self.linear_fc1, "weight"))
 
         for idx in range(self.linear_fc1.num_gemms):
-            if not fc1_single_grouped_parameter:
+            if not fc1_single_grouped_weight:
                 setattr(op, f"weight{idx}", getattr(self.linear_fc1, f"weight{idx}"))
-            if self.linear_fc1.use_bias:
+            if self.linear_fc1.use_bias and not fc1_single_grouped_bias:
                 setattr(op, f"bias{idx}", getattr(self.linear_fc1, f"bias{idx}"))
+        if self.linear_fc1.use_bias and fc1_single_grouped_bias:
+            setattr(op, "bias", getattr(self.linear_fc1, "bias"))
         ops.append(op)
 
         # Activation and post-multiply probs
@@ -844,7 +852,6 @@ def _make_fused_ops(self) -> torch.nn.Module:
         ops.append(op)
 
         # FC2
-        has_bias = self.linear_fc2.use_bias
         op = te.pytorch.ops.GroupedLinear(
             self.linear_fc2.num_gemms,
             self.linear_fc2.in_features,
@@ -853,18 +860,22 @@ def _make_fused_ops(self) -> torch.nn.Module:
             device=torch.cuda.current_device(),
             dtype=fc2_weight_dtype,
             accumulate_into_main_grad=self.linear_fc2.fuse_wgrad_accumulation,
-            single_grouped_parameter=fc2_single_grouped_parameter,
+            single_grouped_weight=fc2_single_grouped_weight,
+            single_grouped_bias=fc2_single_grouped_bias,
+            delay_wgrad_compute=self.config.delay_wgrad_compute,
         )
 
         # Copy the weights from GroupedLinear module to GroupedLinear op.
-        if fc2_single_grouped_parameter:
+        if fc2_single_grouped_weight:
             setattr(op, "weight", getattr(self.linear_fc2, "weight"))
 
         for idx in range(self.linear_fc2.num_gemms):
-            if not fc2_single_grouped_parameter:
+            if not fc2_single_grouped_weight:
                 setattr(op, f"weight{idx}", getattr(self.linear_fc2, f"weight{idx}"))
-            if self.linear_fc2.use_bias:
+            if self.linear_fc2.use_bias and not fc2_single_grouped_bias:
                 setattr(op, f"bias{idx}", getattr(self.linear_fc2, f"bias{idx}"))
+        if self.linear_fc2.use_bias and fc2_single_grouped_bias:
+            setattr(op, "bias", getattr(self.linear_fc2, "bias"))
         ops.append(op)
 
         # Emulate submodule pre-forward hooks
@@ -1230,6 +1241,14 @@ def backward_dw(self):
         If an error occurs during execution, it is caught and re-raised with a
         descriptive message.
         """
+        if self._with_fused_impl and self.config.delay_wgrad_compute:
+            if self._fused_ops is not None:
+                (seq,) = self._fused_ops
+                fused_children = list(seq.children())
+                assert len(fused_children) >= 3, "expected FC1, activation, FC2 in fused TE ops"
+                fused_children[2].backward_dw()
+                fused_children[0].backward_dw()
+            return
         self.linear_fc2.backward_dw()
         self.linear_fc1.backward_dw()
 

From 37a4cee9adf1d6dcb2c3a8c009636b81d6a2ff5f Mon Sep 17 00:00:00 2001
From: Pingtian Li <158665726+Wohox@users.noreply.github.com>
Date: Tue, 7 Apr 2026 10:34:47 +0800
Subject: [PATCH 332/334] [Dev][feat] Support overlapping A2A Combine backprop
 with wgrad GEMM (#3766)

Signed-off-by: Cory Ye <cye@nvidia.com>
Co-authored-by: Cory Ye <cye@nvidia.com>
---
 .../fsdp/src/megatron_fsdp/megatron_fsdp.py   |  48 ++++
 .../megatron_fsdp/param_and_grad_buffer.py    |   7 +
 .../core/extensions/transformer_engine.py     |  10 +-
 megatron/core/model_parallel_config.py        |   9 +
 megatron/core/transformer/moe/moe_layer.py    |  90 ++++++-
 .../core/transformer/transformer_config.py    |  13 +
 .../a2a_overlap/test_delay_wgrad_compute.py   | 230 ++++++++++++++++++
 .../a2a_overlap/test_schedule_chunk_1f1b.py   |   2 -
 .../a2a_overlap/test_schedule_layer_1f1b.py   |   2 -
 tests/unit_tests/a2a_overlap/utils.py         |  35 ++-
 .../unit_tests/models/test_mamba_moe_model.py |   1 +
 .../transformer/test_submodule_callables.py   |   1 -
 12 files changed, 429 insertions(+), 19 deletions(-)
 create mode 100644 tests/unit_tests/a2a_overlap/test_delay_wgrad_compute.py

diff --git a/megatron/core/distributed/fsdp/src/megatron_fsdp/megatron_fsdp.py b/megatron/core/distributed/fsdp/src/megatron_fsdp/megatron_fsdp.py
index f8640446814..bdc98bebf3c 100644
--- a/megatron/core/distributed/fsdp/src/megatron_fsdp/megatron_fsdp.py
+++ b/megatron/core/distributed/fsdp/src/megatron_fsdp/megatron_fsdp.py
@@ -73,6 +73,34 @@ class TrainingState(Enum):
     IDLE = auto()
 
 
+def setup_delayed_wgrad_acc_hook(module, grad_acc_func):
+    """Configure delayed wgrad gradient processing for MoE expert parameters.
+
+    When ``overlap_dispatch_backward_with_experts_wgrad`` is enabled on a TransformerLayer,
+    this function:
+      1. Marks expert parameters so the normal post-accumulate-grad hook is skipped.
+      2. Registers a callback on the MoE layer that invokes FSDP's gradient
+         reduce-scatter after the delayed wgrad computation completes.
+
+    Args:
+        module: The module being processed in the forward pre-hook. Only
+            ``TransformerLayer`` instances with the delayed wgrad config flag
+            enabled are affected; all other modules are no-ops.
+        process_post_backward_gradients_fn: The FSDP gradient processing function
+            (``_process_post_backward_gradients``) to be called after the delayed
+            wgrad computation finishes.
+    """
+    from functools import partial
+
+    need_backward_dw = getattr(module, "need_backward_dw", lambda: False)
+    if not need_backward_dw():
+        return
+
+    for param in module.parameters():
+        if getattr(param, 'skip_backward_post_hook', False):
+            param.post_wgrad_grad_acc_hook = partial(grad_acc_func, [param])
+
+
 class MegatronFSDP(torch.nn.Module):
     """Fully Sharded Data Parallel training.
 
@@ -662,6 +690,23 @@ def _process_post_backward_gradients(param_list):
             """
             # Filter out shared parameters whose gradients are handled by the root hook.
             param_list = [p for p in param_list if not getattr(p, "_is_shared", False)]
+
+            # Filter out parameters whose gradient processing is deferred to a delayed
+            # wgrad accumulation hook (post_wgrad_grad_acc_hook).  If skip_backward_post_hook
+            # is set but the delayed hook was never installed, process the parameter
+            # immediately as a safety fallback to avoid silently dropping gradients.
+            param_list = [
+                p
+                for p in param_list
+                if not (
+                    getattr(p, 'skip_backward_post_hook', False)
+                    and hasattr(p, 'post_wgrad_grad_acc_hook')
+                )
+            ]
+
+            if not param_list:
+                return
+
             for param in param_list:
                 _grad_acc(param)
 
@@ -728,6 +773,7 @@ def _pre_forward_param_unshard(
                 prefetch=fsdp_forward_prefetch,
                 prefetch_order=PrefetchOrder.FORWARD_PASS_ORDER,
             )
+
             return args, kwargs
 
         @torch.compiler.disable
@@ -983,6 +1029,8 @@ def _register_pre_backward_param_unshard_hook(module):
 
         fsdp_modules = []
         for name, module in root_module.named_modules():
+            # Set post backward hook for TE grouped gemm if enabled comm overlap
+            setup_delayed_wgrad_acc_hook(module, _process_post_backward_gradients)
             if self.enable_fine_grained_param_gather_hook:
                 _register_pre_forward_param_unshard_hook(module)
                 _register_pre_backward_param_unshard_hook(module)
diff --git a/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py b/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py
index ca593b4c1fe..b4e3d9becfb 100644
--- a/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py
+++ b/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py
@@ -2632,6 +2632,13 @@ def _reset_parameters(self, old_params, new_params):
                 if getattr(old_param, tp_attr, None) is not None:
                     setattr(new_param, tp_attr, getattr(old_param, tp_attr))
 
+            # For FSDP with delayed_wgrad_compute, `skip_backward_post_hook` needs
+            # to be reset on new param for correct grad accumulation of wgrad computation.
+            setattr(
+                new_param,
+                'skip_backward_post_hook',
+                getattr(old_param, 'skip_backward_post_hook', False),
+            )
         for item_id, p in enumerate(self.params):
             if p in param_map:
                 new_p = param_map[p]
diff --git a/megatron/core/extensions/transformer_engine.py b/megatron/core/extensions/transformer_engine.py
index 97137f81465..4d46f1240f5 100644
--- a/megatron/core/extensions/transformer_engine.py
+++ b/megatron/core/extensions/transformer_engine.py
@@ -1706,10 +1706,14 @@ def __init__(
             self.disable_parameter_transpose_cache = self.config.disable_parameter_transpose_cache
 
             extra_kwargs = _get_extra_te_kwargs(config)
+            self.delay_wgrad_compute = (
+                self.config.delay_wgrad_compute
+                or self.config.overlap_dispatch_backward_with_experts_wgrad
+            )
 
-            if self.config.delay_wgrad_compute:
+            if self.delay_wgrad_compute:
                 if is_te_min_version("2.3.0"):
-                    extra_kwargs["delay_wgrad_compute"] = self.config.delay_wgrad_compute
+                    extra_kwargs["delay_wgrad_compute"] = True
                 else:
                     raise RuntimeError(
                         "Only TE with version >=2.3.0 supports delay_wgrad_compute now."
@@ -2123,7 +2127,7 @@ def backward_dw(self):
             Compute weight gradients during the backward pass
             if delay_wgrad_compute is enabled.
             """
-            if self.config.delay_wgrad_compute:
+            if self.delay_wgrad_compute:
                 super().backward_dw()
 
     class TEColumnParallelGroupedLinear(TEGroupedLinear):
diff --git a/megatron/core/model_parallel_config.py b/megatron/core/model_parallel_config.py
index 34691253b30..681cd526b4e 100644
--- a/megatron/core/model_parallel_config.py
+++ b/megatron/core/model_parallel_config.py
@@ -278,6 +278,15 @@ class ModelParallelConfig:
     delay_wgrad_compute: bool = False
     """Delay the weight gradient computation to improve batch-level communication overlapping"""
 
+    overlap_dispatch_backward_with_experts_wgrad: bool = False
+    """Delay the weight gradient computation for TE Grouped GEMM MoE experts.
+    When enabled with FSDP, the expert weight gradients are computed on a separate
+    CUDA stream after the data gradients finish, allowing overlap of wgrad compute
+    with EP A2A communication. The FSDP gradient reduce-scatter for
+    expert parameters is deferred until the delayed wgrad computation completes.
+    This requires transformer_engine with GroupedLinear support (TE >= 2.3.0).
+    """
+
     ep_overlap_early_attn_memory_release: bool = False
     """Enable early memory release of attention activations during EP overlap.
     EP overlap can increase peak memory usage when the overlapped forward module allocates 
diff --git a/megatron/core/transformer/moe/moe_layer.py b/megatron/core/transformer/moe/moe_layer.py
index 85f2df2e043..9aa50653630 100644
--- a/megatron/core/transformer/moe/moe_layer.py
+++ b/megatron/core/transformer/moe/moe_layer.py
@@ -339,6 +339,19 @@ def __init__(
         self.cudagraph_tensor_store = MoECudaGraphTensorStore()
         self.fwd_execution_map = ["route", "expert_compute", "postprocess"]
 
+        # Setup events and streams for delayed wgrad computation.
+        self.setup_delayed_wgrad_for_dispatch_backward_overlap()
+
+    def setup_delayed_wgrad_for_dispatch_backward_overlap(self):
+        """Initializes CUDA events and streams for overlapping expert
+        weight gradient computation with dispatch backward.
+        """
+        self._delayed_wgrad_event: Optional[torch.cuda.Event] = None
+        self._delayed_wgrad_stream: Optional[torch.cuda.Stream] = None
+        if self.config.overlap_dispatch_backward_with_experts_wgrad:
+            self._delayed_wgrad_event = torch.cuda.Event()
+            self._delayed_wgrad_stream = torch.cuda.Stream(device="cuda")
+
     def _setup_inference_mode(self, pg_collection):
         """Set up inference-optimized token dispatcher and state.
 
@@ -429,6 +442,8 @@ def dispatch(self, hidden_states: torch.Tensor, probs: torch.Tensor):
         tokens and their associated probabilities to the devices hosting their assigned
         experts.
         """
+        if self.config.overlap_dispatch_backward_with_experts_wgrad:
+            hidden_states = _RegisterDelayedWgradForExperts.apply(self, hidden_states)
         return self.token_dispatcher.token_dispatch(hidden_states, probs)
 
     @maybe_skip_or_early_return_by_cudagraph("shared_experts_compute")
@@ -467,6 +482,10 @@ def routed_experts_compute(self, hidden_states: torch.Tensor, probs: torch.Tenso
         for each expert. It then passes the tokens through the local experts.
         The output from the experts is preprocessed for the combine step.
         """
+        if self.config.overlap_dispatch_backward_with_experts_wgrad:
+            hidden_states = _RecordExpertDgradCompletion.apply(
+                self._delayed_wgrad_event, hidden_states
+            )
         dispatched_input, tokens_per_expert, permuted_probs = (
             self.token_dispatcher.dispatch_postprocess(hidden_states, probs)
         )
@@ -612,24 +631,24 @@ def custom_forward(hidden_states, intermediate_tensors=None, padding_mask=None):
 
     def backward_dw(self, routed_experts: bool = True, shared_experts: bool = False):
         """Compute weight gradients for experts and shared experts."""
+        from megatron.core.pipeline_parallel.utils import get_comm_stream
+
         # TODO(Wohox): replace the "routed_experts" and "shared_experts" arguments with better
         # naming to better explain that they are actually from different fine-grained callables,
         # or use scanning to decide which backward_dw should be called.
         if routed_experts:
             self.experts.backward_dw()
-            if self.config.moe_latent_size:
+            if self.config.moe_latent_size and self.config.overlap_moe_expert_parallel_comm:
                 # TODO(Wohox): fc2_latent_proj forward and backward are executed in comm stream,
                 # so we execute its backward_dw in the comm stream too. But this may harm the
                 # EP overlap performance. Better to check if there is a better way to handle this.
-                from megatron.core.pipeline_parallel.utils import get_comm_stream
-
                 comm_stream = get_comm_stream()
                 with torch.cuda.stream(comm_stream):
                     self.fc2_latent_proj.backward_dw()
         if shared_experts:
             if self.use_shared_expert and not self.shared_expert_overlap:
                 self.shared_experts.backward_dw()
-            if self.config.moe_latent_size:
+            if self.config.moe_latent_size and self.config.overlap_moe_expert_parallel_comm:
                 self.fc1_latent_proj.backward_dw()
 
     def set_for_recompute_pre_mlp_layernorm(self):
@@ -640,3 +659,66 @@ def set_for_recompute_pre_mlp_layernorm(self):
             from megatron.core.extensions.transformer_engine import set_save_original_input
 
             set_save_original_input(self.shared_experts.linear_fc1)
+
+
+class _RecordExpertDgradCompletion(torch.autograd.Function):
+    """Autograd function that records a CUDA event when expert data gradients finish.
+
+    Placed in the forward graph just before the expert computation so that during
+    the backward pass, when the expert dgrad completes, we record an event. The
+    subsequent ``_RegisterDelayedWgradForExperts`` waits on this event before
+    launching the delayed wgrad computation on a separate CUDA stream.
+    """
+
+    @staticmethod
+    def forward(ctx, event: torch.cuda.Event, *inputs):
+        """Forward pass that stores the event and passes through inputs unchanged."""
+        ctx.event = event
+        return inputs[0] if len(inputs) == 1 else inputs
+
+    @staticmethod
+    def backward(ctx, *grad_outputs):
+        """Backward pass that records the event when expert dgrad completes."""
+        ctx.event.record(torch.cuda.current_stream())
+        ctx.event = None
+        return (None,) + grad_outputs
+
+
+class _RegisterDelayedWgradForExperts(torch.autograd.Function):
+    """Autograd function that orchestrates delayed wgrad computation for MoE experts.
+
+    Placed in the forward graph at the dispatch boundary. During the backward pass,
+    this function:
+      1. Records an event on the current (backward) stream to signal the dgrad is done.
+      2. Executes the delayed wgrad computation on a dedicated CUDA stream.
+      3. Waits for the wgrad computation to complete.
+      4. Invokes the registered gradient processing callback (e.g., FSDP reduce-scatter).
+    """
+
+    @staticmethod
+    def forward(ctx, module: MoELayer, *inputs):
+        """Forward pass that stores the MoE module and passes through inputs unchanged."""
+        ctx.module = module
+        return inputs[0] if len(inputs) == 1 else inputs
+
+    @staticmethod
+    def backward(ctx, *grad_outputs):
+        """Backward pass that executes delayed wgrad computation on a separate stream."""
+        module = ctx.module
+        event = module._delayed_wgrad_event
+        wgrad_stream = module._delayed_wgrad_stream
+
+        wgrad_stream.wait_event(event)
+        with torch.cuda.stream(wgrad_stream):
+            with torch.cuda.nvtx.range("delayed_expert_wgrad"):
+                module.backward_dw(routed_experts=True, shared_experts=False)
+            event.record(wgrad_stream)
+
+        torch.cuda.current_stream().wait_event(event)
+
+        for param in module.parameters():
+            if getattr(param, "post_wgrad_grad_acc_hook", None) is not None:
+                param.post_wgrad_grad_acc_hook()
+
+        ctx.module = None
+        return (None,) + grad_outputs
diff --git a/megatron/core/transformer/transformer_config.py b/megatron/core/transformer/transformer_config.py
index a01777bcf0c..e1c71761f1f 100644
--- a/megatron/core/transformer/transformer_config.py
+++ b/megatron/core/transformer/transformer_config.py
@@ -2327,6 +2327,19 @@ def __post_init__(self):
                     'partial cuda graph'
                 )
 
+        if self.overlap_dispatch_backward_with_experts_wgrad:
+            assert not self.overlap_moe_expert_parallel_comm, (
+                'overlap_moe_expert_parallel_comm must be disabled when enabling '
+                'overlap_dispatch_backward_with_experts_wgrad.'
+            )
+            assert is_te_min_version(
+                "2.3.0"
+            ), 'TE version >= 2.3.0 is required for overlap_dispatch_backward_with_experts_wgrad'
+            assert not self.delay_wgrad_compute, (
+                'delay_wgrad_compute and overlap_dispatch_backward_with_experts_wgrad '
+                'are mutually exclusive; use only one'
+            )
+
         if self.ep_overlap_early_attn_memory_release:
             assert self.overlap_moe_expert_parallel_comm, (
                 'overlap_moe_expert_parallel_comm must be enabled when enabling '
diff --git a/tests/unit_tests/a2a_overlap/test_delay_wgrad_compute.py b/tests/unit_tests/a2a_overlap/test_delay_wgrad_compute.py
new file mode 100644
index 00000000000..cfde6cf51b1
--- /dev/null
+++ b/tests/unit_tests/a2a_overlap/test_delay_wgrad_compute.py
@@ -0,0 +1,230 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+import gc
+
+import pytest
+import torch
+
+from megatron.core.models.gpt.gpt_layer_specs import get_gpt_decoder_block_spec
+from megatron.core.models.gpt.gpt_model import GPTModel
+from megatron.core.transformer import TransformerLayer
+from megatron.core.transformer.module import float16_to_fp32
+from megatron.core.utils import is_te_min_version
+from tests.unit_tests.a2a_overlap.utils import (
+    deterministic_mode,
+    get_test_config,
+    get_valid_flex_dispatcher_backend,
+    get_valid_fp8_flags,
+    get_valid_token_dispatcher_types,
+    reset_model,
+)
+from tests.unit_tests.test_utilities import Utils
+
+NUM_STEPS = 3
+SEQ_LEN = 128
+VOCAB_SIZE = 512
+LR = 0.01
+
+
+def _build_gpt_model(config):
+    """Build and return a GPTModel on CUDA from the given config."""
+    layer_spec = get_gpt_decoder_block_spec(config=config, use_transformer_engine=True)
+    model = GPTModel(
+        config=config,
+        transformer_layer_spec=layer_spec,
+        vocab_size=VOCAB_SIZE,
+        pre_process=True,
+        post_process=True,
+        max_sequence_length=300,
+    )
+    model.cuda()
+    return model
+
+
+def _build_input_data():
+    """Build fixed input data for the model."""
+    return {
+        "input_ids": torch.randint(0, VOCAB_SIZE, (1, SEQ_LEN), dtype=torch.int64).cuda(),
+        "labels": torch.randint(0, VOCAB_SIZE, (1, SEQ_LEN), dtype=torch.int64).cuda(),
+        "position_ids": torch.arange(SEQ_LEN, dtype=torch.int64).unsqueeze(0).cuda(),
+        "attention_mask": torch.ones((1, 1, SEQ_LEN, SEQ_LEN), dtype=bool).cuda(),
+    }
+
+
+def _train_step(model, optimizer, data):
+    """Run one forward-backward-optimizer step. Return the detached loss."""
+    optimizer.zero_grad()
+    loss = model.forward(**data)
+    loss = float16_to_fp32(loss)
+    loss.backward(torch.ones_like(loss))
+    optimizer.step()
+    return loss.detach().clone()
+
+
+def _assert_models_equal(ref_model, test_model):
+    """Assert that all parameters of two models are bit-identical."""
+    rank = torch.distributed.get_rank()
+    for (name_r, param_r), (_, param_t) in zip(
+        ref_model.named_parameters(), test_model.named_parameters()
+    ):
+        assert torch.equal(
+            param_r.data, param_t.data
+        ), f"[rank {rank}] Parameter mismatch after training: {name_r}"
+
+
+class TestDelayWgradCompute:
+    """Verify that overlap_dispatch_backward_with_experts_wgrad produces identical
+    training behaviour (per-step loss and final weights) as the non-delayed baseline
+    across multiple forward-backward-optimizer steps on the full GPTModel.
+    """
+
+    def setup_method(self, method):
+        Utils.initialize_model_parallel(
+            tensor_model_parallel_size=1,
+            pipeline_model_parallel_size=1,
+            expert_model_parallel_size=4,
+        )
+
+    def teardown_method(self, method):
+        Utils.destroy_model_parallel()
+
+    @pytest.mark.skipif(not is_te_min_version("2.3.0"), reason="Requires TE >= 2.3.0")
+    @pytest.mark.parametrize("shared_expert_intermediate_size", [None, 512])
+    @pytest.mark.parametrize("dispatcher_type", get_valid_token_dispatcher_types())
+    @pytest.mark.parametrize("fp8_flag", get_valid_fp8_flags())
+    def test_overlap_dispatch_backward_with_experts_wgrad(
+        self, shared_expert_intermediate_size, dispatcher_type, fp8_flag
+    ):
+        """Verify that overlap_dispatch_backward_with_experts_wgrad produces identical
+        per-step loss and final weights as the non-delayed baseline across multiple
+        forward-backward-optimizer steps on the full GPTModel.
+
+        Covers single/multi-layer, with/without shared experts, dispatcher types,
+        and FP8 modes.
+        """
+        num_layers = 4
+        extra_kwargs = {"moe_token_dispatcher_type": dispatcher_type}
+        if dispatcher_type == "flex":
+            extra_kwargs["moe_flex_dispatcher_backend"] = get_valid_flex_dispatcher_backend()
+        if fp8_flag is not None:
+            extra_kwargs["fp8"] = fp8_flag[0]
+            extra_kwargs["fp8_recipe"] = fp8_flag[1]
+        if shared_expert_intermediate_size is not None:
+            extra_kwargs["moe_shared_expert_intermediate_size"] = shared_expert_intermediate_size
+
+        with deterministic_mode():
+            ref_config = get_test_config(num_layers=num_layers, extra_kwargs=extra_kwargs)
+            ref_model = _build_gpt_model(ref_config)
+            init_params = reset_model(ref_model)
+
+            delay_kwargs = {**extra_kwargs, "overlap_dispatch_backward_with_experts_wgrad": True}
+            test_config = get_test_config(num_layers=num_layers, extra_kwargs=delay_kwargs)
+            test_model = _build_gpt_model(test_config)
+            reset_model(test_model, init_params)
+
+            data = _build_input_data()
+            ref_opt = torch.optim.SGD(ref_model.parameters(), lr=LR)
+            test_opt = torch.optim.SGD(test_model.parameters(), lr=LR)
+
+            rank = torch.distributed.get_rank()
+            for step in range(NUM_STEPS):
+                ref_loss = _train_step(ref_model, ref_opt, data)
+                test_loss = _train_step(test_model, test_opt, data)
+                assert torch.equal(ref_loss, test_loss), (
+                    f"[rank {rank}] Loss mismatch at step {step}: "
+                    f"ref={ref_loss.item()}, test={test_loss.item()}"
+                )
+
+            _assert_models_equal(ref_model, test_model)
+
+            del ref_model, test_model
+            gc.collect()
+            torch.cuda.empty_cache()
+
+    @pytest.mark.skipif(not is_te_min_version("2.3.0"), reason="Requires TE >= 2.3.0")
+    @pytest.mark.parametrize("shared_expert_intermediate_size", [None, 512])
+    @pytest.mark.parametrize("dispatcher_type", get_valid_token_dispatcher_types())
+    def test_overlap_dispatch_backward_with_experts_wgrad_with_fsdp(
+        self, shared_expert_intermediate_size, dispatcher_type
+    ):
+        """Verify delayed wgrad with MegatronFSDP wrapping.
+
+        The delayed wgrad path defers the FSDP reduce-scatter for expert
+        parameters until the wgrad computation completes on a separate stream.
+        This test checks that the deferred reduce-scatter produces identical
+        per-step loss and final weights as the non-delayed FSDP baseline.
+        """
+        from torch.distributed import DeviceMesh
+
+        from megatron.core import parallel_state
+        from megatron.core.distributed.fsdp.src.megatron_fsdp.fully_shard import (
+            fully_shard_model,
+            fully_shard_optimizer,
+        )
+
+        # Build expert device mesh required by MegatronFSDP for expert parallelism.
+        # Non-expert DeviceMesh will be auto-generated by fully_shard_model() with
+        # the same mesh dimension names (but different mesh shape, DP=WORLD_SIZE).
+        expt_dp_group = parallel_state.get_expert_data_parallel_group()
+        expt_dp_ranks = torch.distributed.get_process_group_ranks(expt_dp_group)
+        expt_tp_group = torch.distributed.new_group(
+            ranks=[torch.distributed.get_rank()]
+        )  # Dummy TP=1 group.
+        expt_device_mesh = DeviceMesh.from_group(
+            [expt_dp_group, expt_tp_group],
+            device_type="cuda",
+            mesh=[[x] for x in expt_dp_ranks],
+            # These are the default Megatron-FSDP DeviceMesh dimension names.
+            # Make sure they match the device_mesh=None case.
+            mesh_dim_names=("fsdp", "tp"),
+        )
+
+        num_layers = 4
+        extra_kwargs = {"moe_token_dispatcher_type": dispatcher_type}
+        if dispatcher_type == "flex":
+            extra_kwargs["moe_flex_dispatcher_backend"] = get_valid_flex_dispatcher_backend()
+        if shared_expert_intermediate_size is not None:
+            extra_kwargs["moe_shared_expert_intermediate_size"] = shared_expert_intermediate_size
+
+        with deterministic_mode():
+            # Build reference model (no delay) and wrap with FSDP
+            ref_config = get_test_config(num_layers=num_layers, extra_kwargs=extra_kwargs)
+            ref_model = _build_gpt_model(ref_config)
+            init_params = reset_model(ref_model)
+
+            ref_fsdp = fully_shard_model(
+                module=ref_model,
+                fsdp_unit_modules=[TransformerLayer],
+                expt_device_mesh=expt_device_mesh,
+            )
+            ref_opt = torch.optim.SGD(ref_fsdp.parameters(), lr=LR)
+            ref_opt = fully_shard_optimizer(optimizer=ref_opt)
+
+            # Build test model (with delay) and wrap with FSDP
+            delay_kwargs = {**extra_kwargs, "overlap_dispatch_backward_with_experts_wgrad": True}
+            test_config = get_test_config(num_layers=num_layers, extra_kwargs=delay_kwargs)
+            test_model = _build_gpt_model(test_config)
+            reset_model(test_model, init_params)
+
+            test_fsdp = fully_shard_model(
+                module=test_model,
+                fsdp_unit_modules=[TransformerLayer],
+                expt_device_mesh=expt_device_mesh,
+            )
+            test_opt = torch.optim.SGD(test_fsdp.parameters(), lr=LR)
+            test_opt = fully_shard_optimizer(optimizer=test_opt)
+
+            data = _build_input_data()
+            rank = torch.distributed.get_rank()
+            for step in range(NUM_STEPS):
+                ref_loss = _train_step(ref_fsdp, ref_opt, data)
+                test_loss = _train_step(test_fsdp, test_opt, data)
+                assert torch.equal(ref_loss, test_loss), (
+                    f"[rank {rank}] Loss mismatch at step {step}: "
+                    f"ref={ref_loss.item()}, test={test_loss.item()}"
+                )
+
+            _assert_models_equal(ref_fsdp, test_fsdp)
+
+            del ref_fsdp, test_fsdp, ref_opt, test_opt
+            gc.collect()
+            torch.cuda.empty_cache()
diff --git a/tests/unit_tests/a2a_overlap/test_schedule_chunk_1f1b.py b/tests/unit_tests/a2a_overlap/test_schedule_chunk_1f1b.py
index 6c59dd3f9e3..b933015406f 100644
--- a/tests/unit_tests/a2a_overlap/test_schedule_chunk_1f1b.py
+++ b/tests/unit_tests/a2a_overlap/test_schedule_chunk_1f1b.py
@@ -103,7 +103,6 @@ def test_1f1b_schedule_model_chunk(self, mtp_layers, dispatcher_type, fp8_flag,
         extra_kwargs = {"moe_token_dispatcher_type": dispatcher_type}
         if dispatcher_type == "flex":
             extra_kwargs["moe_flex_dispatcher_backend"] = "deepep"
-            extra_kwargs["moe_router_dtype"] = "fp32"
         if fp8_flag is not None:
             extra_kwargs["fp8"] = fp8_flag[0]
             extra_kwargs["fp8_recipe"] = fp8_flag[1]
@@ -215,7 +214,6 @@ def test_1f1b_schedule_model_chunk_with_padding_mask(self, dispatcher_type, laye
         }
         if dispatcher_type == "flex":
             extra_kwargs["moe_flex_dispatcher_backend"] = "deepep"
-            extra_kwargs["moe_router_dtype"] = "fp32"
         with deterministic_mode():
             for layer_num in layers:
                 output_tensors = []
diff --git a/tests/unit_tests/a2a_overlap/test_schedule_layer_1f1b.py b/tests/unit_tests/a2a_overlap/test_schedule_layer_1f1b.py
index 4bbab6ccb30..95e2e1950d9 100644
--- a/tests/unit_tests/a2a_overlap/test_schedule_layer_1f1b.py
+++ b/tests/unit_tests/a2a_overlap/test_schedule_layer_1f1b.py
@@ -410,7 +410,6 @@ def test_transformer_layer_overlap(self, dispatcher_type, fp8_flag):
         extra_kwargs = {"moe_token_dispatcher_type": dispatcher_type}
         if dispatcher_type == "flex":
             extra_kwargs["moe_flex_dispatcher_backend"] = "deepep"
-            extra_kwargs["moe_router_dtype"] = "fp32"
         if fp8_flag is not None:
             extra_kwargs["fp8"] = fp8_flag[0]
             extra_kwargs["fp8_recipe"] = fp8_flag[1]
@@ -460,7 +459,6 @@ def test_mtp_layer_overlap(self, dispatcher_type, fp8_flag):
         }
         if dispatcher_type == "flex":
             extra_kwargs["moe_flex_dispatcher_backend"] = "deepep"
-            extra_kwargs["moe_router_dtype"] = "fp32"
         if fp8_flag is not None:
             extra_kwargs["fp8_recipe"] = fp8_flag[1]
             extra_kwargs["fp8"] = fp8_flag[0]
diff --git a/tests/unit_tests/a2a_overlap/utils.py b/tests/unit_tests/a2a_overlap/utils.py
index a52843956df..9a644ee8cc8 100644
--- a/tests/unit_tests/a2a_overlap/utils.py
+++ b/tests/unit_tests/a2a_overlap/utils.py
@@ -216,33 +216,54 @@ def get_test_config(num_layers=1, num_moe_experts=8, extra_kwargs={}, moe_groupe
         multi_latent_attention=True,
         num_moe_experts=num_moe_experts,
         moe_grouped_gemm=moe_grouped_gemm,
+        moe_router_dtype="fp32",
         **extra_kwargs,
     )
     return config
 
 
 def get_valid_token_dispatcher_types():
-    try:
-        from deep_ep import Buffer
-        from deep_ep.utils import EventHandle, EventOverlap
+    from megatron.core.transformer.moe.fused_a2a import HAVE_DEEP_EP, HAVE_HYBRIDEP
 
+    if HAVE_HYBRIDEP or HAVE_DEEP_EP:
         return ["alltoall", "flex"]
-    except ImportError:
+    else:
         return ["alltoall"]
 
 
+def get_valid_flex_dispatcher_backend():
+    from megatron.core.transformer.moe.fused_a2a import HAVE_DEEP_EP, HAVE_HYBRIDEP
+
+    if HAVE_HYBRIDEP:
+        return "hybridep"
+    elif HAVE_DEEP_EP:
+        return "deepep"
+    else:
+        return None
+
+
 def get_valid_fp8_flags():
     from megatron.core.enums import Fp8Recipe
+    from megatron.training.utils import get_device_arch_version
 
     fp8_types = ["e4m3", "hybrid"]
     recipes = []
-    valid_flags = []
+    arch = get_device_arch_version()
+
     if is_te_min_version("2.3.0.dev0"):
-        recipes.append(Fp8Recipe.blockwise)
-        recipes.append(Fp8Recipe.tensorwise)
+        recipes.append(Fp8Recipe.tensorwise)  # Hopper + Blackwell
 
+    if is_te_min_version("2.4.0.dev0") and arch == 9:
+        recipes.append(Fp8Recipe.blockwise)  # Hopper only
+
+    if is_te_min_version("2.3.0.dev0") and arch >= 10:
+        recipes.append(Fp8Recipe.mxfp8)  # Blackwell only
+
+    valid_flags = []
     for fp8_type in fp8_types:
         for recipe in recipes:
+            if fp8_type == "hybrid" and recipe == Fp8Recipe.mxfp8:
+                continue
             valid_flags.append((fp8_type, recipe))
     valid_flags.append(None)
 
diff --git a/tests/unit_tests/models/test_mamba_moe_model.py b/tests/unit_tests/models/test_mamba_moe_model.py
index d43f377b140..a32776a9424 100644
--- a/tests/unit_tests/models/test_mamba_moe_model.py
+++ b/tests/unit_tests/models/test_mamba_moe_model.py
@@ -76,6 +76,7 @@
     "deallocate_pipeline_outputs": True,
     "defer_embedding_wgrad_compute": False,
     "delay_wgrad_compute": False,
+    "overlap_dispatch_backward_with_experts_wgrad": False,
     "deterministic_mode": False,
     "disable_bf16_reduced_precision_matmul": False,
     "disable_parameter_transpose_cache": False,
diff --git a/tests/unit_tests/transformer/test_submodule_callables.py b/tests/unit_tests/transformer/test_submodule_callables.py
index 03e2d751a52..7b41b3ca197 100644
--- a/tests/unit_tests/transformer/test_submodule_callables.py
+++ b/tests/unit_tests/transformer/test_submodule_callables.py
@@ -138,7 +138,6 @@ def test_1f1b_overlap(self, dispatcher_type, grouped_gemm, permute_fusion):
         }
         if dispatcher_type == "flex":
             extra_kwargs["moe_flex_dispatcher_backend"] = "deepep"
-            extra_kwargs["moe_router_dtype"] = "fp32"
         config = get_test_config(extra_kwargs=extra_kwargs, moe_grouped_gemm=grouped_gemm)
         microbatches = 4
         with deterministic_mode():

From b165580acd8ebb3c16e51de2a3ed6f05c4d59514 Mon Sep 17 00:00:00 2001
From: Yuzhong Wang <yuzhongw@nvidia.com>
Date: Fri, 12 Dec 2025 05:10:39 -0800
Subject: [PATCH 333/334] support GDN packed sequence

---
 megatron/core/ssm/gated_delta_net.py          | 103 ++++++++++++++----
 tests/unit_tests/ssm/test_gated_delta_net.py  |  48 +++++++-
 .../unit_tests/transformer/test_attention.py  |  17 ++-
 3 files changed, 145 insertions(+), 23 deletions(-)

diff --git a/megatron/core/ssm/gated_delta_net.py b/megatron/core/ssm/gated_delta_net.py
index 03897ed7465..da9f38a63f8 100644
--- a/megatron/core/ssm/gated_delta_net.py
+++ b/megatron/core/ssm/gated_delta_net.py
@@ -300,8 +300,29 @@ def forward(
             raise NotImplementedError("GDN does not support inference for now.")
 
         if packed_seq_params is not None:
-            # TODO: support packed sequence
-            raise NotImplementedError("GDN does not support packed sequence for now.")
+            assert batch == 1, "Packed sequence expects batch dimension to be 1"
+            assert (
+                not self.config.deterministic_mode
+            ), "Packed sequence does not support deterministic mode."
+
+            # Prefer cu_seqlens_q_padded if available, otherwise use cu_seqlens_q
+            cu_seqlens_q = packed_seq_params.cu_seqlens_q_padded or packed_seq_params.cu_seqlens_q
+            # Prefer cu_seqlens_kv_padded if available, otherwise use cu_seqlens_kv
+            cu_seqlens_kv = (
+                packed_seq_params.cu_seqlens_kv_padded or packed_seq_params.cu_seqlens_kv
+            )
+            assert torch.equal(cu_seqlens_q, cu_seqlens_kv), (
+                "Currently only support cu_seqlens_q equals to cu_seqlens_kv, "
+                f"but got {cu_seqlens_q=} and {cu_seqlens_kv=}"
+            )
+            num_packed_seqs = cu_seqlens_q.shape[0] - 1
+            assert num_packed_seqs > 0, (
+                "Number of packed sequences must be greater than 0, "
+                f"but got {cu_seqlens_q=} and {cu_seqlens_kv=}"
+            )
+        else:
+            cu_seqlens_q = None
+            cu_seqlens_kv = None
 
         # Input projection
         nvtx_range_push(suffix="in_proj")
@@ -309,20 +330,41 @@ def forward(
         nvtx_range_pop(suffix="in_proj")
 
         # CP All to All: CP to HP
-        qkvzba = tensor_a2a_cp2hp(
-            qkvzba,
-            seq_dim=0,
-            head_dim=-1,
-            cp_group=self.pg_collection.cp,
-            split_sections=[
-                self.qk_dim_local_tp,
-                self.qk_dim_local_tp,
-                self.v_dim_local_tp,
-                self.v_dim_local_tp,
-                self.num_value_heads // self.tp_size,
-                self.num_value_heads // self.tp_size,
-            ],
-        )
+        if packed_seq_params is not None:
+            unpacked_qkvzba = _unpack_sequence(qkvzba, cu_seqlens_q // self.cp_size, dim=0)
+            outputs = []
+            for qkvzba_i in unpacked_qkvzba:
+                qkvzba_i = tensor_a2a_cp2hp(
+                    qkvzba_i,
+                    seq_dim=0,
+                    head_dim=-1,
+                    cp_group=self.pg_collection.cp,
+                    split_sections=[
+                        self.qk_dim_local_tp,
+                        self.qk_dim_local_tp,
+                        self.v_dim_local_tp,
+                        self.v_dim_local_tp,
+                        self.num_value_heads // self.tp_size,
+                        self.num_value_heads // self.tp_size,
+                    ],
+                )
+                outputs.append(qkvzba_i)
+            qkvzba = torch.cat(outputs, dim=0)
+        else:
+            qkvzba = tensor_a2a_cp2hp(
+                qkvzba,
+                seq_dim=0,
+                head_dim=-1,
+                cp_group=self.pg_collection.cp,
+                split_sections=[
+                    self.qk_dim_local_tp,
+                    self.qk_dim_local_tp,
+                    self.v_dim_local_tp,
+                    self.v_dim_local_tp,
+                    self.num_value_heads // self.tp_size,
+                    self.num_value_heads // self.tp_size,
+                ],
+            )
 
         # Transpose: s b x --> b s x
         # From sbhd to bshd format
@@ -389,6 +431,7 @@ def forward(
                 activation=self.activation,
                 initial_state=None,
                 output_final_state=False,
+                cu_seqlens=cu_seqlens_q,
             )
         nvtx_range_pop(suffix="conv1d")
 
@@ -418,6 +461,7 @@ def forward(
             initial_state=None,
             output_final_state=False,
             use_qk_l2norm_in_kernel=False,
+            cu_seqlens=cu_seqlens_q,
         )
         nvtx_range_pop(suffix="gated_delta_rule")
 
@@ -432,9 +476,19 @@ def forward(
         norm_out = norm_out.transpose(0, 1).contiguous()
 
         # CP all to all: HP to CP
-        norm_out = tensor_a2a_hp2cp(
-            norm_out, seq_dim=0, head_dim=-1, cp_group=self.pg_collection.cp
-        )
+        if packed_seq_params is not None:
+            unpacked_norm_out = _unpack_sequence(norm_out, cu_seqlens_q, dim=0)
+            outputs = []
+            for norm_out_i in unpacked_norm_out:
+                norm_out_i = tensor_a2a_hp2cp(
+                    norm_out_i, seq_dim=0, head_dim=-1, cp_group=self.pg_collection.cp
+                )
+                outputs.append(norm_out_i)
+            norm_out = torch.cat(outputs, dim=0)
+        else:
+            norm_out = tensor_a2a_hp2cp(
+                norm_out, seq_dim=0, head_dim=-1, cp_group=self.pg_collection.cp
+            )
 
         # Output projection
         nvtx_range_push(suffix="out_proj")
@@ -604,6 +658,17 @@ def _backward_out_proj(self):
         self.out_proj.backward_dw()
 
 
+def _unpack_sequence(x, cu_seqlens, dim=1):
+    unpacked_x = []
+    num_seqs = cu_seqlens.shape[0] - 1
+    for i in range(num_seqs):
+        idx_start = cu_seqlens[i].item()
+        idx_end = cu_seqlens[i + 1].item()
+        chunked_index = [slice(None)] * dim + [slice(idx_start, idx_end)]
+        unpacked_x.append(x[chunked_index])
+    return unpacked_x
+
+
 ####################
 # Sharded state dict utilities
 ####################
diff --git a/tests/unit_tests/ssm/test_gated_delta_net.py b/tests/unit_tests/ssm/test_gated_delta_net.py
index 8f3c59b3d43..7d65af8a95e 100644
--- a/tests/unit_tests/ssm/test_gated_delta_net.py
+++ b/tests/unit_tests/ssm/test_gated_delta_net.py
@@ -32,6 +32,7 @@
 )
 from tests.unit_tests.test_utilities import Utils
 from tests.unit_tests.transformer.test_attention import _test_parallel_attention_correctness
+from tests.unit_tests.transformer.test_multi_latent_attention import make_test_packed_seq_params
 
 try:
     import fla
@@ -202,7 +203,51 @@ def test_jit_compiled_helpers(self):
         assert g.shape == alpha.shape
         assert beta_sig.shape == beta.shape
 
+    def test_gpu_forward_thd_correctness(self):
+        if self.sp_size > 1:
+            pytest.skip("Sequence parallel is not supported for this test case.")
 
+        atol, rtol = 3e-4, 3e-4
+
+        # Input shape
+        sequence_length = 32
+        micro_batch_size = 4
+        cu_seqlens = [0, 32, 64, 96, 128]
+        # sbhd input shape: [sequence length, batch size, hidden size]
+        sub_sequence_length = sequence_length // self.cp_size
+        hidden_states_sbhd = torch.rand(
+            (sub_sequence_length, micro_batch_size, self.gdn.config.hidden_size)
+        )
+        attention_mask_sbhd = None
+        hidden_states_sbhd = hidden_states_sbhd.cuda().bfloat16()
+        # thd input shape: [sequence length * batch size, 1, hidden size]
+        hidden_states_thd = hidden_states_sbhd.transpose(0, 1).contiguous()
+        hidden_states_thd = hidden_states_thd.view(-1, 1, self.gdn.config.hidden_size)
+        attention_mask_thd = None
+        packed_seq_params = make_test_packed_seq_params(cu_seqlens=cu_seqlens)
+
+        # THD format
+        output_thd, _ = self.gdn(
+            hidden_states_thd, attention_mask_thd, packed_seq_params=packed_seq_params
+        )
+        # SBHD format
+        output_sbhd, _ = self.gdn(hidden_states_sbhd, attention_mask_sbhd)
+        output_sbhd_T = output_sbhd.transpose(0, 1).contiguous().view(*output_thd.shape)
+
+        rank = torch.distributed.get_rank()
+        assert output_thd.shape[0] == sub_sequence_length * micro_batch_size
+        assert output_thd.shape[1] == 1
+        assert output_thd.shape[2] == self.gdn.config.hidden_size
+        torch.testing.assert_close(
+            output_sbhd_T,
+            output_thd,
+            atol=atol,
+            rtol=rtol,
+            msg=lambda msg: f"Output mismatch ({rank=}): {msg}",
+        )
+
+
+@pytest.mark.parametrize("sequence_packing", [False, True])
 @pytest.mark.parametrize(
     ("tp", "sp", "cp"),
     [
@@ -214,7 +259,7 @@ def test_jit_compiled_helpers(self):
     ],
 )
 @pytest.mark.skipif(not HAVE_FLA, reason="FLA is not installed.")
-def test_parallel_gated_delta_net_correctness(tmp_path_dist_ckpt, tp, sp, cp):
+def test_parallel_gated_delta_net_correctness(tmp_path_dist_ckpt, sequence_packing, tp, sp, cp):
     transformer_config = TransformerConfig(
         hidden_size=128,
         linear_conv_kernel_dim=2,
@@ -255,4 +300,5 @@ def test_parallel_gated_delta_net_correctness(tmp_path_dist_ckpt, tp, sp, cp):
         seed=123,
         sequence_length=256,
         micro_batch_size=4,
+        sequence_packing=sequence_packing,
     )
diff --git a/tests/unit_tests/transformer/test_attention.py b/tests/unit_tests/transformer/test_attention.py
index cb69a0b7a9e..55a4b8a4864 100644
--- a/tests/unit_tests/transformer/test_attention.py
+++ b/tests/unit_tests/transformer/test_attention.py
@@ -41,6 +41,7 @@
     init_checkpointing_mock_args,
 )
 from tests.unit_tests.test_utilities import Utils
+from tests.unit_tests.transformer.test_multi_latent_attention import make_test_packed_seq_params
 
 try:
     from transformer_engine.pytorch.attention.rope import apply_fused_qkv_rotary_pos_emb
@@ -712,6 +713,7 @@ def _test_parallel_attention_correctness(
     seed=123,
     sequence_length=256,
     micro_batch_size=4,
+    sequence_packing=False,
 ):
     # Model initialization function
     def initialize_gpt_model(
@@ -805,17 +807,24 @@ def initialize_gpt_model(
         def get_tensor_on_this_rank(tensor):
             if cp > 1:
                 tensor = get_tensor_on_this_cp_rank(tensor, 0, cp_group)
+            if sequence_packing:
+                tensor = tensor.transpose(0, 1).contiguous().view(-1, 1, *tensor.shape[2:])
             if tp > 1 and sp:
-                sp_seg = sequence_length // tp // cp
+                sp_seg = tensor.shape[0] // tp
                 tensor = tensor[tp_rank * sp_seg : (tp_rank + 1) * sp_seg]
             return tensor
 
         # Calculate parallel model output
+        if sequence_packing:
+            cu_seqlens = [i * sequence_length for i in range(micro_batch_size + 1)]
+            packed_seq_params = make_test_packed_seq_params(cu_seqlens=cu_seqlens)
+        else:
+            packed_seq_params = None
         input_hidden_states = get_tensor_on_this_rank(input_hidden_states)
         input_hidden_states = input_hidden_states.detach().requires_grad_(True)
         parallel_attention = gpt_model[0].decoder.layers[0].self_attention
         output_hidden_states_parallel, bias_hidden_states_parallel = parallel_attention(
-            input_hidden_states, attention_mask=None
+            input_hidden_states, attention_mask=None, packed_seq_params=packed_seq_params
         )
         output_hidden_states_parallel.sum().backward()
         input_grad_parallel = input_hidden_states.grad.detach()
@@ -881,6 +890,7 @@ def get_tensor_on_this_rank(tensor):
 
 
 # TODO(yuzhongw): Add test case for fallback_to_eager_attn
+@pytest.mark.parametrize("sequence_packing", [False, True])
 @pytest.mark.parametrize("apply_rope_fusion", [False, True])
 @pytest.mark.parametrize(
     ("tp", "sp", "cp"),
@@ -895,7 +905,7 @@ def get_tensor_on_this_rank(tensor):
 @pytest.mark.parametrize("qk_layernorm", [False, True])
 @pytest.mark.parametrize("output_gate", [False, True])
 def test_parallel_attention_correctness(
-    tmp_path_dist_ckpt, apply_rope_fusion, tp, sp, cp, qk_layernorm, output_gate
+    tmp_path_dist_ckpt, sequence_packing, apply_rope_fusion, tp, sp, cp, qk_layernorm, output_gate
 ):
     transformer_config = TransformerConfig(
         num_layers=1,
@@ -924,6 +934,7 @@ def test_parallel_attention_correctness(
         cp=cp,
         seed=123,
         sequence_length=256,
+        sequence_packing=sequence_packing,
     )
 
 
From cebd475eeb4aa5593a662e4c25b3333ec2047c26 Mon Sep 17 00:00:00 2001
From: Yuzhong Wang <yuzhongw@nvidia.com>
Date: Wed, 21 Jan 2026 19:58:08 -0800
Subject: [PATCH 334/334] Fix several bugs

Signed-off-by: yuzhongw <yuzhongw@nvidia.com>
Co-authored-by: kunlunl <kunlunl@nvidia.com>
---
 megatron/core/ssm/gated_delta_net.py | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/megatron/core/ssm/gated_delta_net.py b/megatron/core/ssm/gated_delta_net.py
index da9f38a63f8..7b308952e1d 100644
--- a/megatron/core/ssm/gated_delta_net.py
+++ b/megatron/core/ssm/gated_delta_net.py
@@ -306,11 +306,15 @@ def forward(
             ), "Packed sequence does not support deterministic mode."
 
             # Prefer cu_seqlens_q_padded if available, otherwise use cu_seqlens_q
-            cu_seqlens_q = packed_seq_params.cu_seqlens_q_padded or packed_seq_params.cu_seqlens_q
+            if packed_seq_params.cu_seqlens_q_padded is not None:
+                cu_seqlens_q = packed_seq_params.cu_seqlens_q_padded
+            else:
+                cu_seqlens_q = packed_seq_params.cu_seqlens_q
             # Prefer cu_seqlens_kv_padded if available, otherwise use cu_seqlens_kv
-            cu_seqlens_kv = (
-                packed_seq_params.cu_seqlens_kv_padded or packed_seq_params.cu_seqlens_kv
-            )
+            if packed_seq_params.cu_seqlens_kv_padded is not None:
+                cu_seqlens_kv = packed_seq_params.cu_seqlens_kv_padded
+            else:
+                cu_seqlens_kv = packed_seq_params.cu_seqlens_kv
             assert torch.equal(cu_seqlens_q, cu_seqlens_kv), (
                 "Currently only support cu_seqlens_q equals to cu_seqlens_kv, "
                 f"but got {cu_seqlens_q=} and {cu_seqlens_kv=}"
@@ -665,7 +669,7 @@ def _unpack_sequence(x, cu_seqlens, dim=1):
         idx_start = cu_seqlens[i].item()
         idx_end = cu_seqlens[i + 1].item()
         chunked_index = [slice(None)] * dim + [slice(idx_start, idx_end)]
-        unpacked_x.append(x[chunked_index])
+        unpacked_x.append(x[tuple(chunked_index)])
     return unpacked_x
 
 
@@ -920,6 +924,7 @@ def torch_chunk_gated_delta_rule(
     initial_state=None,
     output_final_state=False,
     use_qk_l2norm_in_kernel=False,
+    cu_seqlens=None,
 ):
     # pylint: disable=line-too-long
     '''
@@ -929,6 +934,10 @@ def torch_chunk_gated_delta_rule(
     Reference: https://github.com/huggingface/transformers/blob/144c8ce2809a2e21914017652700e1ecb450501e/src/transformers/models/qwen3_next/modeling_qwen3_next.py#L470-L547
     '''
 
+    assert (
+        cu_seqlens is None
+    ), "cu_seqlens is not supported for torch_chunk_gated_delta_rule for now."
+
     initial_dtype = query.dtype
     if use_qk_l2norm_in_kernel:
         query = l2norm(query, dim=-1, eps=1e-6)